summaryrefslogtreecommitdiff
path: root/drivers/block
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/block')
-rw-r--r--drivers/block/DAC960.c8
-rw-r--r--drivers/block/Kconfig4
-rw-r--r--drivers/block/aoe/aoechr.c3
-rw-r--r--drivers/block/aoe/aoecmd.c3
-rw-r--r--drivers/block/cciss.c4
-rw-r--r--drivers/block/cciss_scsi.c96
-rw-r--r--drivers/block/cpqarray.c2
-rw-r--r--drivers/block/drbd/drbd_proc.c2
-rw-r--r--drivers/block/drbd/drbd_receiver.c5
-rw-r--r--drivers/block/loop.c12
-rw-r--r--drivers/block/mg_disk.c4
-rw-r--r--drivers/block/mtip32xx/mtip32xx.c331
-rw-r--r--drivers/block/mtip32xx/mtip32xx.h18
-rw-r--r--drivers/block/nbd.c2
-rw-r--r--drivers/block/nvme.c33
-rw-r--r--drivers/block/pktcdvd.c2
-rw-r--r--drivers/block/ps3vram.c2
-rw-r--r--drivers/block/rbd.c50
-rw-r--r--drivers/block/rsxx/Makefile2
-rw-r--r--drivers/block/rsxx/config.c8
-rw-r--r--drivers/block/rsxx/core.c237
-rw-r--r--drivers/block/rsxx/cregs.c112
-rw-r--r--drivers/block/rsxx/dma.c239
-rw-r--r--drivers/block/rsxx/rsxx.h6
-rw-r--r--drivers/block/rsxx/rsxx_cfg.h2
-rw-r--r--drivers/block/rsxx/rsxx_priv.h34
-rw-r--r--drivers/block/xen-blkback/blkback.c68
-rw-r--r--drivers/block/xen-blkback/common.h40
-rw-r--r--drivers/block/xen-blkback/xenbus.c14
-rw-r--r--drivers/block/xen-blkfront.c154
30 files changed, 1050 insertions, 447 deletions
diff --git a/drivers/block/DAC960.c b/drivers/block/DAC960.c
index 5b5ee79ff236..eb3950113e42 100644
--- a/drivers/block/DAC960.c
+++ b/drivers/block/DAC960.c
@@ -6473,7 +6473,7 @@ static int dac960_initial_status_proc_show(struct seq_file *m, void *v)
static int dac960_initial_status_proc_open(struct inode *inode, struct file *file)
{
- return single_open(file, dac960_initial_status_proc_show, PDE(inode)->data);
+ return single_open(file, dac960_initial_status_proc_show, PDE_DATA(inode));
}
static const struct file_operations dac960_initial_status_proc_fops = {
@@ -6519,7 +6519,7 @@ static int dac960_current_status_proc_show(struct seq_file *m, void *v)
static int dac960_current_status_proc_open(struct inode *inode, struct file *file)
{
- return single_open(file, dac960_current_status_proc_show, PDE(inode)->data);
+ return single_open(file, dac960_current_status_proc_show, PDE_DATA(inode));
}
static const struct file_operations dac960_current_status_proc_fops = {
@@ -6540,14 +6540,14 @@ static int dac960_user_command_proc_show(struct seq_file *m, void *v)
static int dac960_user_command_proc_open(struct inode *inode, struct file *file)
{
- return single_open(file, dac960_user_command_proc_show, PDE(inode)->data);
+ return single_open(file, dac960_user_command_proc_show, PDE_DATA(inode));
}
static ssize_t dac960_user_command_proc_write(struct file *file,
const char __user *Buffer,
size_t Count, loff_t *pos)
{
- DAC960_Controller_T *Controller = (DAC960_Controller_T *) PDE(file_inode(file))->data;
+ DAC960_Controller_T *Controller = PDE_DATA(file_inode(file));
unsigned char CommandBuffer[80];
int Length;
if (Count > sizeof(CommandBuffer)-1) return -EINVAL;
diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig
index 5dc0daed8fac..b81ddfea1da0 100644
--- a/drivers/block/Kconfig
+++ b/drivers/block/Kconfig
@@ -532,11 +532,11 @@ config BLK_DEV_RBD
If unsure, say N.
config BLK_DEV_RSXX
- tristate "RamSam PCIe Flash SSD Device Driver"
+ tristate "IBM FlashSystem 70/80 PCIe SSD Device Driver"
depends on PCI
help
Device driver for IBM's high speed PCIe SSD
- storage devices: RamSan-70 and RamSan-80.
+ storage devices: FlashSystem-70 and FlashSystem-80.
To compile this driver as a module, choose M here: the
module will be called rsxx.
diff --git a/drivers/block/aoe/aoechr.c b/drivers/block/aoe/aoechr.c
index 42e67ad6bd20..ab41be625a53 100644
--- a/drivers/block/aoe/aoechr.c
+++ b/drivers/block/aoe/aoechr.c
@@ -139,13 +139,12 @@ bail: spin_unlock_irqrestore(&emsgs_lock, flags);
return;
}
- mp = kmalloc(n, GFP_ATOMIC);
+ mp = kmemdup(msg, n, GFP_ATOMIC);
if (mp == NULL) {
printk(KERN_ERR "aoe: allocation failure, len=%ld\n", n);
goto bail;
}
- memcpy(mp, msg, n);
em->msg = mp;
em->flags |= EMFL_VALID;
em->len = n;
diff --git a/drivers/block/aoe/aoecmd.c b/drivers/block/aoe/aoecmd.c
index 25ef5c014fca..92b6d7c51e39 100644
--- a/drivers/block/aoe/aoecmd.c
+++ b/drivers/block/aoe/aoecmd.c
@@ -51,8 +51,9 @@ new_skb(ulong len)
{
struct sk_buff *skb;
- skb = alloc_skb(len, GFP_ATOMIC);
+ skb = alloc_skb(len + MAX_HEADER, GFP_ATOMIC);
if (skb) {
+ skb_reserve(skb, MAX_HEADER);
skb_reset_mac_header(skb);
skb_reset_network_header(skb);
skb->protocol = __constant_htons(ETH_P_AOE);
diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c
index ade58bc8f3c4..e18c99140c0a 100644
--- a/drivers/block/cciss.c
+++ b/drivers/block/cciss.c
@@ -493,7 +493,7 @@ static int cciss_seq_open(struct inode *inode, struct file *file)
struct seq_file *seq = file->private_data;
if (!ret)
- seq->private = PDE(inode)->data;
+ seq->private = PDE_DATA(inode);
return ret;
}
@@ -4206,7 +4206,7 @@ static int cciss_find_cfgtables(ctlr_info_t *h)
if (rc)
return rc;
h->cfgtable = remap_pci_mem(pci_resource_start(h->pdev,
- cfg_base_addr_index) + cfg_offset, sizeof(h->cfgtable));
+ cfg_base_addr_index) + cfg_offset, sizeof(*h->cfgtable));
if (!h->cfgtable)
return -ENOMEM;
rc = write_driver_ver_to_cfgtable(h->cfgtable);
diff --git a/drivers/block/cciss_scsi.c b/drivers/block/cciss_scsi.c
index da3311129a0c..ecd845cd28d8 100644
--- a/drivers/block/cciss_scsi.c
+++ b/drivers/block/cciss_scsi.c
@@ -54,13 +54,11 @@ static CommandList_struct *cmd_special_alloc(ctlr_info_t *h);
static void cmd_free(ctlr_info_t *h, CommandList_struct *c);
static void cmd_special_free(ctlr_info_t *h, CommandList_struct *c);
-static int cciss_scsi_proc_info(
- struct Scsi_Host *sh,
+static int cciss_scsi_write_info(struct Scsi_Host *sh,
char *buffer, /* data buffer */
- char **start, /* where data in buffer starts */
- off_t offset, /* offset from start of imaginary file */
- int length, /* length of data in buffer */
- int func); /* 0 == read, 1 == write */
+ int length); /* length of data in buffer */
+static int cciss_scsi_show_info(struct seq_file *m,
+ struct Scsi_Host *sh);
static int cciss_scsi_queue_command (struct Scsi_Host *h,
struct scsi_cmnd *cmd);
@@ -82,7 +80,8 @@ static struct scsi_host_template cciss_driver_template = {
.module = THIS_MODULE,
.name = "cciss",
.proc_name = "cciss",
- .proc_info = cciss_scsi_proc_info,
+ .write_info = cciss_scsi_write_info,
+ .show_info = cciss_scsi_show_info,
.queuecommand = cciss_scsi_queue_command,
.this_id = 7,
.cmd_per_lun = 1,
@@ -1302,59 +1301,54 @@ cciss_scsi_user_command(ctlr_info_t *h, int hostno, char *buffer, int length)
return length;
}
-
static int
-cciss_scsi_proc_info(struct Scsi_Host *sh,
+cciss_scsi_write_info(struct Scsi_Host *sh,
char *buffer, /* data buffer */
- char **start, /* where data in buffer starts */
- off_t offset, /* offset from start of imaginary file */
- int length, /* length of data in buffer */
- int func) /* 0 == read, 1 == write */
+ int length) /* length of data in buffer */
{
+ ctlr_info_t *h = (ctlr_info_t *) sh->hostdata[0];
+ if (h == NULL) /* This really shouldn't ever happen. */
+ return -EINVAL;
- int buflen, datalen;
- ctlr_info_t *h;
+ return cciss_scsi_user_command(h, sh->host_no,
+ buffer, length);
+}
+
+static int
+cciss_scsi_show_info(struct seq_file *m, struct Scsi_Host *sh)
+{
+
+ ctlr_info_t *h = (ctlr_info_t *) sh->hostdata[0];
int i;
- h = (ctlr_info_t *) sh->hostdata[0];
if (h == NULL) /* This really shouldn't ever happen. */
return -EINVAL;
- if (func == 0) { /* User is reading from /proc/scsi/ciss*?/?* */
- buflen = sprintf(buffer, "cciss%d: SCSI host: %d\n",
- h->ctlr, sh->host_no);
-
- /* this information is needed by apps to know which cciss
- device corresponds to which scsi host number without
- having to open a scsi target device node. The device
- information is not a duplicate of /proc/scsi/scsi because
- the two may be out of sync due to scsi hotplug, rather
- this info is for an app to be able to use to know how to
- get them back in sync. */
-
- for (i = 0; i < ccissscsi[h->ctlr].ndevices; i++) {
- struct cciss_scsi_dev_t *sd =
- &ccissscsi[h->ctlr].dev[i];
- buflen += sprintf(&buffer[buflen], "c%db%dt%dl%d %02d "
- "0x%02x%02x%02x%02x%02x%02x%02x%02x\n",
- sh->host_no, sd->bus, sd->target, sd->lun,
- sd->devtype,
- sd->scsi3addr[0], sd->scsi3addr[1],
- sd->scsi3addr[2], sd->scsi3addr[3],
- sd->scsi3addr[4], sd->scsi3addr[5],
- sd->scsi3addr[6], sd->scsi3addr[7]);
- }
- datalen = buflen - offset;
- if (datalen < 0) { /* they're reading past EOF. */
- datalen = 0;
- *start = buffer+buflen;
- } else
- *start = buffer + offset;
- return(datalen);
- } else /* User is writing to /proc/scsi/cciss*?/?* ... */
- return cciss_scsi_user_command(h, sh->host_no,
- buffer, length);
-}
+ seq_printf(m, "cciss%d: SCSI host: %d\n",
+ h->ctlr, sh->host_no);
+
+ /* this information is needed by apps to know which cciss
+ device corresponds to which scsi host number without
+ having to open a scsi target device node. The device
+ information is not a duplicate of /proc/scsi/scsi because
+ the two may be out of sync due to scsi hotplug, rather
+ this info is for an app to be able to use to know how to
+ get them back in sync. */
+
+ for (i = 0; i < ccissscsi[h->ctlr].ndevices; i++) {
+ struct cciss_scsi_dev_t *sd =
+ &ccissscsi[h->ctlr].dev[i];
+ seq_printf(m, "c%db%dt%dl%d %02d "
+ "0x%02x%02x%02x%02x%02x%02x%02x%02x\n",
+ sh->host_no, sd->bus, sd->target, sd->lun,
+ sd->devtype,
+ sd->scsi3addr[0], sd->scsi3addr[1],
+ sd->scsi3addr[2], sd->scsi3addr[3],
+ sd->scsi3addr[4], sd->scsi3addr[5],
+ sd->scsi3addr[6], sd->scsi3addr[7]);
+ }
+ return 0;
+}
/* cciss_scatter_gather takes a struct scsi_cmnd, (cmd), and does the pci
dma mapping and fills in the scatter gather entries of the
diff --git a/drivers/block/cpqarray.c b/drivers/block/cpqarray.c
index 3f087133a25a..3b9e8ebcb96b 100644
--- a/drivers/block/cpqarray.c
+++ b/drivers/block/cpqarray.c
@@ -296,7 +296,7 @@ static int ida_proc_show(struct seq_file *m, void *v)
static int ida_proc_open(struct inode *inode, struct file *file)
{
- return single_open(file, ida_proc_show, PDE(inode)->data);
+ return single_open(file, ida_proc_show, PDE_DATA(inode));
}
static const struct file_operations ida_proc_fops = {
diff --git a/drivers/block/drbd/drbd_proc.c b/drivers/block/drbd/drbd_proc.c
index 56672a61eb94..928adb815b09 100644
--- a/drivers/block/drbd/drbd_proc.c
+++ b/drivers/block/drbd/drbd_proc.c
@@ -314,7 +314,7 @@ static int drbd_seq_show(struct seq_file *seq, void *v)
static int drbd_proc_open(struct inode *inode, struct file *file)
{
if (try_module_get(THIS_MODULE))
- return single_open(file, drbd_seq_show, PDE(inode)->data);
+ return single_open(file, drbd_seq_show, PDE_DATA(inode));
return -ENODEV;
}
diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c
index a9eccfc6079b..83c5ae0ed56b 100644
--- a/drivers/block/drbd/drbd_receiver.c
+++ b/drivers/block/drbd/drbd_receiver.c
@@ -757,7 +757,8 @@ static struct socket *drbd_wait_for_connect(struct drbd_tconn *tconn, struct acc
rcu_read_unlock();
timeo = connect_int * HZ;
- timeo += (random32() & 1) ? timeo / 7 : -timeo / 7; /* 28.5% random jitter */
+ /* 28.5% random jitter */
+ timeo += (prandom_u32() & 1) ? timeo / 7 : -timeo / 7;
err = wait_for_completion_interruptible_timeout(&ad->door_bell, timeo);
if (err <= 0)
@@ -953,7 +954,7 @@ retry:
conn_warn(tconn, "Error receiving initial packet\n");
sock_release(s);
randomize:
- if (random32() & 1)
+ if (prandom_u32() & 1)
goto retry;
}
}
diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index 747bb2af69dc..b2955b3f2cbc 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -230,9 +230,11 @@ static int __do_lo_send_write(struct file *file,
ssize_t bw;
mm_segment_t old_fs = get_fs();
+ file_start_write(file);
set_fs(get_ds());
bw = file->f_op->write(file, buf, len, &pos);
set_fs(old_fs);
+ file_end_write(file);
if (likely(bw == len))
return 0;
printk(KERN_ERR "loop: Write error at byte offset %llu, length %i.\n",
@@ -922,6 +924,11 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode,
lo->lo_flags |= LO_FLAGS_PARTSCAN;
if (lo->lo_flags & LO_FLAGS_PARTSCAN)
ioctl_by_bdev(bdev, BLKRRPART, 0);
+
+ /* Grab the block_device to prevent its destruction after we
+ * put /dev/loopXX inode. Later in loop_clr_fd() we bdput(bdev).
+ */
+ bdgrab(bdev);
return 0;
out_clr:
@@ -1031,8 +1038,10 @@ static int loop_clr_fd(struct loop_device *lo)
memset(lo->lo_encrypt_key, 0, LO_KEY_SIZE);
memset(lo->lo_crypt_name, 0, LO_NAME_SIZE);
memset(lo->lo_file_name, 0, LO_NAME_SIZE);
- if (bdev)
+ if (bdev) {
+ bdput(bdev);
invalidate_bdev(bdev);
+ }
set_capacity(lo->lo_disk, 0);
loop_sysfs_exit(lo);
if (bdev) {
@@ -1623,6 +1632,7 @@ static int loop_add(struct loop_device **l, int i)
goto out_free_dev;
i = err;
+ err = -ENOMEM;
lo->lo_queue = blk_alloc_queue(GFP_KERNEL);
if (!lo->lo_queue)
goto out_free_dev;
diff --git a/drivers/block/mg_disk.c b/drivers/block/mg_disk.c
index 1788f491e0fb..076ae7f1b781 100644
--- a/drivers/block/mg_disk.c
+++ b/drivers/block/mg_disk.c
@@ -890,8 +890,10 @@ static int mg_probe(struct platform_device *plat_dev)
gpio_direction_output(host->rst, 1);
/* reset out pin */
- if (!(prv_data->dev_attr & MG_DEV_MASK))
+ if (!(prv_data->dev_attr & MG_DEV_MASK)) {
+ err = -EINVAL;
goto probe_err_3a;
+ }
if (prv_data->dev_attr != MG_BOOT_DEV) {
rsc = platform_get_resource_byname(plat_dev, IORESOURCE_IO,
diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c
index 11cc9522cdd4..32c678028e53 100644
--- a/drivers/block/mtip32xx/mtip32xx.c
+++ b/drivers/block/mtip32xx/mtip32xx.c
@@ -81,12 +81,17 @@
/* Device instance number, incremented each time a device is probed. */
static int instance;
+struct list_head online_list;
+struct list_head removing_list;
+spinlock_t dev_lock;
+
/*
* Global variable used to hold the major block device number
* allocated in mtip_init().
*/
static int mtip_major;
static struct dentry *dfs_parent;
+static struct dentry *dfs_device_status;
static u32 cpu_use[NR_CPUS];
@@ -243,40 +248,31 @@ static inline void release_slot(struct mtip_port *port, int tag)
/*
* Reset the HBA (without sleeping)
*
- * Just like hba_reset, except does not call sleep, so can be
- * run from interrupt/tasklet context.
- *
* @dd Pointer to the driver data structure.
*
* return value
* 0 The reset was successful.
* -1 The HBA Reset bit did not clear.
*/
-static int hba_reset_nosleep(struct driver_data *dd)
+static int mtip_hba_reset(struct driver_data *dd)
{
unsigned long timeout;
- /* Chip quirk: quiesce any chip function */
- mdelay(10);
-
/* Set the reset bit */
writel(HOST_RESET, dd->mmio + HOST_CTL);
/* Flush */
readl(dd->mmio + HOST_CTL);
- /*
- * Wait 10ms then spin for up to 1 second
- * waiting for reset acknowledgement
- */
- timeout = jiffies + msecs_to_jiffies(1000);
- mdelay(10);
- while ((readl(dd->mmio + HOST_CTL) & HOST_RESET)
- && time_before(jiffies, timeout))
- mdelay(1);
+ /* Spin for up to 2 seconds, waiting for reset acknowledgement */
+ timeout = jiffies + msecs_to_jiffies(2000);
+ do {
+ mdelay(10);
+ if (test_bit(MTIP_DDF_REMOVE_PENDING_BIT, &dd->dd_flag))
+ return -1;
- if (test_bit(MTIP_DDF_REMOVE_PENDING_BIT, &dd->dd_flag))
- return -1;
+ } while ((readl(dd->mmio + HOST_CTL) & HOST_RESET)
+ && time_before(jiffies, timeout));
if (readl(dd->mmio + HOST_CTL) & HOST_RESET)
return -1;
@@ -481,7 +477,7 @@ static void mtip_restart_port(struct mtip_port *port)
dev_warn(&port->dd->pdev->dev,
"PxCMD.CR not clear, escalating reset\n");
- if (hba_reset_nosleep(port->dd))
+ if (mtip_hba_reset(port->dd))
dev_err(&port->dd->pdev->dev,
"HBA reset escalation failed.\n");
@@ -527,6 +523,26 @@ static void mtip_restart_port(struct mtip_port *port)
}
+static int mtip_device_reset(struct driver_data *dd)
+{
+ int rv = 0;
+
+ if (mtip_check_surprise_removal(dd->pdev))
+ return 0;
+
+ if (mtip_hba_reset(dd) < 0)
+ rv = -EFAULT;
+
+ mdelay(1);
+ mtip_init_port(dd->port);
+ mtip_start_port(dd->port);
+
+ /* Enable interrupts on the HBA. */
+ writel(readl(dd->mmio + HOST_CTL) | HOST_IRQ_EN,
+ dd->mmio + HOST_CTL);
+ return rv;
+}
+
/*
* Helper function for tag logging
*/
@@ -632,7 +648,7 @@ static void mtip_timeout_function(unsigned long int data)
if (cmdto_cnt) {
print_tags(port->dd, "timed out", tagaccum, cmdto_cnt);
if (!test_bit(MTIP_PF_IC_ACTIVE_BIT, &port->flags)) {
- mtip_restart_port(port);
+ mtip_device_reset(port->dd);
wake_up_interruptible(&port->svc_wait);
}
clear_bit(MTIP_PF_EH_ACTIVE_BIT, &port->flags);
@@ -1283,11 +1299,11 @@ static int mtip_exec_internal_command(struct mtip_port *port,
int rv = 0, ready2go = 1;
struct mtip_cmd *int_cmd = &port->commands[MTIP_TAG_INTERNAL];
unsigned long to;
+ struct driver_data *dd = port->dd;
/* Make sure the buffer is 8 byte aligned. This is asic specific. */
if (buffer & 0x00000007) {
- dev_err(&port->dd->pdev->dev,
- "SG buffer is not 8 byte aligned\n");
+ dev_err(&dd->pdev->dev, "SG buffer is not 8 byte aligned\n");
return -EFAULT;
}
@@ -1300,23 +1316,21 @@ static int mtip_exec_internal_command(struct mtip_port *port,
mdelay(100);
} while (time_before(jiffies, to));
if (!ready2go) {
- dev_warn(&port->dd->pdev->dev,
+ dev_warn(&dd->pdev->dev,
"Internal cmd active. new cmd [%02X]\n", fis->command);
return -EBUSY;
}
set_bit(MTIP_PF_IC_ACTIVE_BIT, &port->flags);
port->ic_pause_timer = 0;
- if (fis->command == ATA_CMD_SEC_ERASE_UNIT)
- clear_bit(MTIP_PF_SE_ACTIVE_BIT, &port->flags);
- else if (fis->command == ATA_CMD_DOWNLOAD_MICRO)
- clear_bit(MTIP_PF_DM_ACTIVE_BIT, &port->flags);
+ clear_bit(MTIP_PF_SE_ACTIVE_BIT, &port->flags);
+ clear_bit(MTIP_PF_DM_ACTIVE_BIT, &port->flags);
if (atomic == GFP_KERNEL) {
if (fis->command != ATA_CMD_STANDBYNOW1) {
/* wait for io to complete if non atomic */
if (mtip_quiesce_io(port, 5000) < 0) {
- dev_warn(&port->dd->pdev->dev,
+ dev_warn(&dd->pdev->dev,
"Failed to quiesce IO\n");
release_slot(port, MTIP_TAG_INTERNAL);
clear_bit(MTIP_PF_IC_ACTIVE_BIT, &port->flags);
@@ -1361,58 +1375,84 @@ static int mtip_exec_internal_command(struct mtip_port *port,
/* Issue the command to the hardware */
mtip_issue_non_ncq_command(port, MTIP_TAG_INTERNAL);
- /* Poll if atomic, wait_for_completion otherwise */
if (atomic == GFP_KERNEL) {
/* Wait for the command to complete or timeout. */
- if (wait_for_completion_timeout(
+ if (wait_for_completion_interruptible_timeout(
&wait,
- msecs_to_jiffies(timeout)) == 0) {
- dev_err(&port->dd->pdev->dev,
- "Internal command did not complete [%d] "
- "within timeout of %lu ms\n",
- atomic, timeout);
- if (mtip_check_surprise_removal(port->dd->pdev) ||
+ msecs_to_jiffies(timeout)) <= 0) {
+ if (rv == -ERESTARTSYS) { /* interrupted */
+ dev_err(&dd->pdev->dev,
+ "Internal command [%02X] was interrupted after %lu ms\n",
+ fis->command, timeout);
+ rv = -EINTR;
+ goto exec_ic_exit;
+ } else if (rv == 0) /* timeout */
+ dev_err(&dd->pdev->dev,
+ "Internal command did not complete [%02X] within timeout of %lu ms\n",
+ fis->command, timeout);
+ else
+ dev_err(&dd->pdev->dev,
+ "Internal command [%02X] wait returned code [%d] after %lu ms - unhandled\n",
+ fis->command, rv, timeout);
+
+ if (mtip_check_surprise_removal(dd->pdev) ||
test_bit(MTIP_DDF_REMOVE_PENDING_BIT,
- &port->dd->dd_flag)) {
+ &dd->dd_flag)) {
+ dev_err(&dd->pdev->dev,
+ "Internal command [%02X] wait returned due to SR\n",
+ fis->command);
rv = -ENXIO;
goto exec_ic_exit;
}
+ mtip_device_reset(dd); /* recover from timeout issue */
rv = -EAGAIN;
+ goto exec_ic_exit;
}
} else {
+ u32 hba_stat, port_stat;
+
/* Spin for <timeout> checking if command still outstanding */
timeout = jiffies + msecs_to_jiffies(timeout);
while ((readl(port->cmd_issue[MTIP_TAG_INTERNAL])
& (1 << MTIP_TAG_INTERNAL))
&& time_before(jiffies, timeout)) {
- if (mtip_check_surprise_removal(port->dd->pdev)) {
+ if (mtip_check_surprise_removal(dd->pdev)) {
rv = -ENXIO;
goto exec_ic_exit;
}
if ((fis->command != ATA_CMD_STANDBYNOW1) &&
test_bit(MTIP_DDF_REMOVE_PENDING_BIT,
- &port->dd->dd_flag)) {
+ &dd->dd_flag)) {
rv = -ENXIO;
goto exec_ic_exit;
}
- if (readl(port->mmio + PORT_IRQ_STAT) & PORT_IRQ_ERR) {
- atomic_inc(&int_cmd->active); /* error */
- break;
+ port_stat = readl(port->mmio + PORT_IRQ_STAT);
+ if (!port_stat)
+ continue;
+
+ if (port_stat & PORT_IRQ_ERR) {
+ dev_err(&dd->pdev->dev,
+ "Internal command [%02X] failed\n",
+ fis->command);
+ mtip_device_reset(dd);
+ rv = -EIO;
+ goto exec_ic_exit;
+ } else {
+ writel(port_stat, port->mmio + PORT_IRQ_STAT);
+ hba_stat = readl(dd->mmio + HOST_IRQ_STAT);
+ if (hba_stat)
+ writel(hba_stat,
+ dd->mmio + HOST_IRQ_STAT);
}
+ break;
}
}
- if (atomic_read(&int_cmd->active) > 1) {
- dev_err(&port->dd->pdev->dev,
- "Internal command [%02X] failed\n", fis->command);
- rv = -EIO;
- }
if (readl(port->cmd_issue[MTIP_TAG_INTERNAL])
& (1 << MTIP_TAG_INTERNAL)) {
rv = -ENXIO;
- if (!test_bit(MTIP_DDF_REMOVE_PENDING_BIT,
- &port->dd->dd_flag)) {
- mtip_restart_port(port);
+ if (!test_bit(MTIP_DDF_REMOVE_PENDING_BIT, &dd->dd_flag)) {
+ mtip_device_reset(dd);
rv = -EAGAIN;
}
}
@@ -1724,7 +1764,8 @@ static int mtip_get_smart_attr(struct mtip_port *port, unsigned int id,
* -EINVAL Invalid parameters passed in, trim not supported
* -EIO Error submitting trim request to hw
*/
-static int mtip_send_trim(struct driver_data *dd, unsigned int lba, unsigned int len)
+static int mtip_send_trim(struct driver_data *dd, unsigned int lba,
+ unsigned int len)
{
int i, rv = 0;
u64 tlba, tlen, sect_left;
@@ -1811,45 +1852,6 @@ static bool mtip_hw_get_capacity(struct driver_data *dd, sector_t *sectors)
}
/*
- * Reset the HBA.
- *
- * Resets the HBA by setting the HBA Reset bit in the Global
- * HBA Control register. After setting the HBA Reset bit the
- * function waits for 1 second before reading the HBA Reset
- * bit to make sure it has cleared. If HBA Reset is not clear
- * an error is returned. Cannot be used in non-blockable
- * context.
- *
- * @dd Pointer to the driver data structure.
- *
- * return value
- * 0 The reset was successful.
- * -1 The HBA Reset bit did not clear.
- */
-static int mtip_hba_reset(struct driver_data *dd)
-{
- mtip_deinit_port(dd->port);
-
- /* Set the reset bit */
- writel(HOST_RESET, dd->mmio + HOST_CTL);
-
- /* Flush */
- readl(dd->mmio + HOST_CTL);
-
- /* Wait for reset to clear */
- ssleep(1);
-
- /* Check the bit has cleared */
- if (readl(dd->mmio + HOST_CTL) & HOST_RESET) {
- dev_err(&dd->pdev->dev,
- "Reset bit did not clear.\n");
- return -1;
- }
-
- return 0;
-}
-
-/*
* Display the identify command data.
*
* @port Pointer to the port data structure.
@@ -2710,6 +2712,100 @@ static ssize_t mtip_hw_show_status(struct device *dev,
static DEVICE_ATTR(status, S_IRUGO, mtip_hw_show_status, NULL);
+/* debugsfs entries */
+
+static ssize_t show_device_status(struct device_driver *drv, char *buf)
+{
+ int size = 0;
+ struct driver_data *dd, *tmp;
+ unsigned long flags;
+ char id_buf[42];
+ u16 status = 0;
+
+ spin_lock_irqsave(&dev_lock, flags);
+ size += sprintf(&buf[size], "Devices Present:\n");
+ list_for_each_entry_safe(dd, tmp, &online_list, online_list) {
+ if (dd->pdev) {
+ if (dd->port &&
+ dd->port->identify &&
+ dd->port->identify_valid) {
+ strlcpy(id_buf,
+ (char *) (dd->port->identify + 10), 21);
+ status = *(dd->port->identify + 141);
+ } else {
+ memset(id_buf, 0, 42);
+ status = 0;
+ }
+
+ if (dd->port &&
+ test_bit(MTIP_PF_REBUILD_BIT, &dd->port->flags)) {
+ size += sprintf(&buf[size],
+ " device %s %s (ftl rebuild %d %%)\n",
+ dev_name(&dd->pdev->dev),
+ id_buf,
+ status);
+ } else {
+ size += sprintf(&buf[size],
+ " device %s %s\n",
+ dev_name(&dd->pdev->dev),
+ id_buf);
+ }
+ }
+ }
+
+ size += sprintf(&buf[size], "Devices Being Removed:\n");
+ list_for_each_entry_safe(dd, tmp, &removing_list, remove_list) {
+ if (dd->pdev) {
+ if (dd->port &&
+ dd->port->identify &&
+ dd->port->identify_valid) {
+ strlcpy(id_buf,
+ (char *) (dd->port->identify+10), 21);
+ status = *(dd->port->identify + 141);
+ } else {
+ memset(id_buf, 0, 42);
+ status = 0;
+ }
+
+ if (dd->port &&
+ test_bit(MTIP_PF_REBUILD_BIT, &dd->port->flags)) {
+ size += sprintf(&buf[size],
+ " device %s %s (ftl rebuild %d %%)\n",
+ dev_name(&dd->pdev->dev),
+ id_buf,
+ status);
+ } else {
+ size += sprintf(&buf[size],
+ " device %s %s\n",
+ dev_name(&dd->pdev->dev),
+ id_buf);
+ }
+ }
+ }
+ spin_unlock_irqrestore(&dev_lock, flags);
+
+ return size;
+}
+
+static ssize_t mtip_hw_read_device_status(struct file *f, char __user *ubuf,
+ size_t len, loff_t *offset)
+{
+ int size = *offset;
+ char buf[MTIP_DFS_MAX_BUF_SIZE];
+
+ if (!len || *offset)
+ return 0;
+
+ size += show_device_status(NULL, buf);
+
+ *offset = size <= len ? size : len;
+ size = copy_to_user(ubuf, buf, *offset);
+ if (size)
+ return -EFAULT;
+
+ return *offset;
+}
+
static ssize_t mtip_hw_read_registers(struct file *f, char __user *ubuf,
size_t len, loff_t *offset)
{
@@ -2804,6 +2900,13 @@ static ssize_t mtip_hw_read_flags(struct file *f, char __user *ubuf,
return *offset;
}
+static const struct file_operations mtip_device_status_fops = {
+ .owner = THIS_MODULE,
+ .open = simple_open,
+ .read = mtip_hw_read_device_status,
+ .llseek = no_llseek,
+};
+
static const struct file_operations mtip_regs_fops = {
.owner = THIS_MODULE,
.open = simple_open,
@@ -4161,6 +4264,7 @@ static int mtip_pci_probe(struct pci_dev *pdev,
const struct cpumask *node_mask;
int cpu, i = 0, j = 0;
int my_node = NUMA_NO_NODE;
+ unsigned long flags;
/* Allocate memory for this devices private data. */
my_node = pcibus_to_node(pdev->bus);
@@ -4218,12 +4322,16 @@ static int mtip_pci_probe(struct pci_dev *pdev,
dd->pdev = pdev;
dd->numa_node = my_node;
+ INIT_LIST_HEAD(&dd->online_list);
+ INIT_LIST_HEAD(&dd->remove_list);
+
memset(dd->workq_name, 0, 32);
snprintf(dd->workq_name, 31, "mtipq%d", dd->instance);
dd->isr_workq = create_workqueue(dd->workq_name);
if (!dd->isr_workq) {
dev_warn(&pdev->dev, "Can't create wq %d\n", dd->instance);
+ rv = -ENOMEM;
goto block_initialize_err;
}
@@ -4282,7 +4390,8 @@ static int mtip_pci_probe(struct pci_dev *pdev,
INIT_WORK(&dd->work[7].work, mtip_workq_sdbf7);
pci_set_master(pdev);
- if (pci_enable_msi(pdev)) {
+ rv = pci_enable_msi(pdev);
+ if (rv) {
dev_warn(&pdev->dev,
"Unable to enable MSI interrupt.\n");
goto block_initialize_err;
@@ -4303,6 +4412,14 @@ static int mtip_pci_probe(struct pci_dev *pdev,
instance++;
if (rv != MTIP_FTL_REBUILD_MAGIC)
set_bit(MTIP_DDF_INIT_DONE_BIT, &dd->dd_flag);
+ else
+ rv = 0; /* device in rebuild state, return 0 from probe */
+
+ /* Add to online list even if in ftl rebuild */
+ spin_lock_irqsave(&dev_lock, flags);
+ list_add(&dd->online_list, &online_list);
+ spin_unlock_irqrestore(&dev_lock, flags);
+
goto done;
block_initialize_err:
@@ -4336,9 +4453,15 @@ static void mtip_pci_remove(struct pci_dev *pdev)
{
struct driver_data *dd = pci_get_drvdata(pdev);
int counter = 0;
+ unsigned long flags;
set_bit(MTIP_DDF_REMOVE_PENDING_BIT, &dd->dd_flag);
+ spin_lock_irqsave(&dev_lock, flags);
+ list_del_init(&dd->online_list);
+ list_add(&dd->remove_list, &removing_list);
+ spin_unlock_irqrestore(&dev_lock, flags);
+
if (mtip_check_surprise_removal(pdev)) {
while (!test_bit(MTIP_DDF_CLEANUP_BIT, &dd->dd_flag)) {
counter++;
@@ -4364,6 +4487,10 @@ static void mtip_pci_remove(struct pci_dev *pdev)
pci_disable_msi(pdev);
+ spin_lock_irqsave(&dev_lock, flags);
+ list_del_init(&dd->remove_list);
+ spin_unlock_irqrestore(&dev_lock, flags);
+
kfree(dd);
pcim_iounmap_regions(pdev, 1 << MTIP_ABAR);
}
@@ -4511,6 +4638,11 @@ static int __init mtip_init(void)
pr_info(MTIP_DRV_NAME " Version " MTIP_DRV_VERSION "\n");
+ spin_lock_init(&dev_lock);
+
+ INIT_LIST_HEAD(&online_list);
+ INIT_LIST_HEAD(&removing_list);
+
/* Allocate a major block device number to use with this driver. */
error = register_blkdev(0, MTIP_DRV_NAME);
if (error <= 0) {
@@ -4520,11 +4652,18 @@ static int __init mtip_init(void)
}
mtip_major = error;
- if (!dfs_parent) {
- dfs_parent = debugfs_create_dir("rssd", NULL);
- if (IS_ERR_OR_NULL(dfs_parent)) {
- pr_warn("Error creating debugfs parent\n");
- dfs_parent = NULL;
+ dfs_parent = debugfs_create_dir("rssd", NULL);
+ if (IS_ERR_OR_NULL(dfs_parent)) {
+ pr_warn("Error creating debugfs parent\n");
+ dfs_parent = NULL;
+ }
+ if (dfs_parent) {
+ dfs_device_status = debugfs_create_file("device_status",
+ S_IRUGO, dfs_parent, NULL,
+ &mtip_device_status_fops);
+ if (IS_ERR_OR_NULL(dfs_device_status)) {
+ pr_err("Error creating device_status node\n");
+ dfs_device_status = NULL;
}
}
diff --git a/drivers/block/mtip32xx/mtip32xx.h b/drivers/block/mtip32xx/mtip32xx.h
index 3bffff5f670c..8e8334c9dd0f 100644
--- a/drivers/block/mtip32xx/mtip32xx.h
+++ b/drivers/block/mtip32xx/mtip32xx.h
@@ -129,9 +129,9 @@ enum {
MTIP_PF_EH_ACTIVE_BIT = 1, /* error handling */
MTIP_PF_SE_ACTIVE_BIT = 2, /* secure erase */
MTIP_PF_DM_ACTIVE_BIT = 3, /* download microcde */
- MTIP_PF_PAUSE_IO = ((1 << MTIP_PF_IC_ACTIVE_BIT) | \
- (1 << MTIP_PF_EH_ACTIVE_BIT) | \
- (1 << MTIP_PF_SE_ACTIVE_BIT) | \
+ MTIP_PF_PAUSE_IO = ((1 << MTIP_PF_IC_ACTIVE_BIT) |
+ (1 << MTIP_PF_EH_ACTIVE_BIT) |
+ (1 << MTIP_PF_SE_ACTIVE_BIT) |
(1 << MTIP_PF_DM_ACTIVE_BIT)),
MTIP_PF_SVC_THD_ACTIVE_BIT = 4,
@@ -144,9 +144,9 @@ enum {
MTIP_DDF_REMOVE_PENDING_BIT = 1,
MTIP_DDF_OVER_TEMP_BIT = 2,
MTIP_DDF_WRITE_PROTECT_BIT = 3,
- MTIP_DDF_STOP_IO = ((1 << MTIP_DDF_REMOVE_PENDING_BIT) | \
- (1 << MTIP_DDF_SEC_LOCK_BIT) | \
- (1 << MTIP_DDF_OVER_TEMP_BIT) | \
+ MTIP_DDF_STOP_IO = ((1 << MTIP_DDF_REMOVE_PENDING_BIT) |
+ (1 << MTIP_DDF_SEC_LOCK_BIT) |
+ (1 << MTIP_DDF_OVER_TEMP_BIT) |
(1 << MTIP_DDF_WRITE_PROTECT_BIT)),
MTIP_DDF_CLEANUP_BIT = 5,
@@ -180,7 +180,7 @@ struct mtip_work {
#define MTIP_TRIM_TIMEOUT_MS 240000
#define MTIP_MAX_TRIM_ENTRIES 8
-#define MTIP_MAX_TRIM_ENTRY_LEN 0xfff8
+#define MTIP_MAX_TRIM_ENTRY_LEN 0xfff8
struct mtip_trim_entry {
u32 lba; /* starting lba of region */
@@ -501,6 +501,10 @@ struct driver_data {
atomic_t irq_workers_active;
int isr_binding;
+
+ struct list_head online_list; /* linkage for online list */
+
+ struct list_head remove_list; /* linkage for removing list */
};
#endif
diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index 7fecc784be01..037288e7874d 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -856,6 +856,8 @@ static int __init nbd_init(void)
disk->queue->limits.discard_granularity = 512;
disk->queue->limits.max_discard_sectors = UINT_MAX;
disk->queue->limits.discard_zeroes_data = 0;
+ blk_queue_max_hw_sectors(disk->queue, 65536);
+ disk->queue->limits.max_sectors = 256;
}
if (register_blkdev(NBD_MAJOR, "nbd")) {
diff --git a/drivers/block/nvme.c b/drivers/block/nvme.c
index 07fb2dfaae13..9dcefe40380b 100644
--- a/drivers/block/nvme.c
+++ b/drivers/block/nvme.c
@@ -135,6 +135,7 @@ static inline void _nvme_check_size(void)
BUILD_BUG_ON(sizeof(struct nvme_id_ctrl) != 4096);
BUILD_BUG_ON(sizeof(struct nvme_id_ns) != 4096);
BUILD_BUG_ON(sizeof(struct nvme_lba_range_type) != 64);
+ BUILD_BUG_ON(sizeof(struct nvme_smart_log) != 512);
}
typedef void (*nvme_completion_fn)(struct nvme_dev *, void *,
@@ -237,7 +238,8 @@ static void *free_cmdid(struct nvme_queue *nvmeq, int cmdid,
*fn = special_completion;
return CMD_CTX_INVALID;
}
- *fn = info[cmdid].fn;
+ if (fn)
+ *fn = info[cmdid].fn;
ctx = info[cmdid].ctx;
info[cmdid].fn = special_completion;
info[cmdid].ctx = CMD_CTX_COMPLETED;
@@ -335,6 +337,7 @@ nvme_alloc_iod(unsigned nseg, unsigned nbytes, gfp_t gfp)
iod->offset = offsetof(struct nvme_iod, sg[nseg]);
iod->npages = -1;
iod->length = nbytes;
+ iod->nents = 0;
}
return iod;
@@ -375,7 +378,8 @@ static void bio_completion(struct nvme_dev *dev, void *ctx,
struct bio *bio = iod->private;
u16 status = le16_to_cpup(&cqe->status) >> 1;
- dma_unmap_sg(&dev->pci_dev->dev, iod->sg, iod->nents,
+ if (iod->nents)
+ dma_unmap_sg(&dev->pci_dev->dev, iod->sg, iod->nents,
bio_data_dir(bio) ? DMA_TO_DEVICE : DMA_FROM_DEVICE);
nvme_free_iod(dev, iod);
if (status) {
@@ -589,7 +593,7 @@ static int nvme_submit_bio_queue(struct nvme_queue *nvmeq, struct nvme_ns *ns,
result = nvme_map_bio(nvmeq->q_dmadev, iod, bio, dma_dir, psegs);
if (result < 0)
- goto free_iod;
+ goto free_cmdid;
length = result;
cmnd->rw.command_id = cmdid;
@@ -609,6 +613,8 @@ static int nvme_submit_bio_queue(struct nvme_queue *nvmeq, struct nvme_ns *ns,
return 0;
+ free_cmdid:
+ free_cmdid(nvmeq, cmdid, NULL);
free_iod:
nvme_free_iod(nvmeq->dev, iod);
nomem:
@@ -835,8 +841,8 @@ static int nvme_identify(struct nvme_dev *dev, unsigned nsid, unsigned cns,
return nvme_submit_admin_cmd(dev, &c, NULL);
}
-static int nvme_get_features(struct nvme_dev *dev, unsigned fid,
- unsigned nsid, dma_addr_t dma_addr)
+static int nvme_get_features(struct nvme_dev *dev, unsigned fid, unsigned nsid,
+ dma_addr_t dma_addr, u32 *result)
{
struct nvme_command c;
@@ -846,7 +852,7 @@ static int nvme_get_features(struct nvme_dev *dev, unsigned fid,
c.features.prp1 = cpu_to_le64(dma_addr);
c.features.fid = cpu_to_le32(fid);
- return nvme_submit_admin_cmd(dev, &c, NULL);
+ return nvme_submit_admin_cmd(dev, &c, result);
}
static int nvme_set_features(struct nvme_dev *dev, unsigned fid,
@@ -906,6 +912,10 @@ static void nvme_free_queue(struct nvme_dev *dev, int qid)
spin_lock_irq(&nvmeq->q_lock);
nvme_cancel_ios(nvmeq, false);
+ while (bio_list_peek(&nvmeq->sq_cong)) {
+ struct bio *bio = bio_list_pop(&nvmeq->sq_cong);
+ bio_endio(bio, -EIO);
+ }
spin_unlock_irq(&nvmeq->q_lock);
irq_set_affinity_hint(vector, NULL);
@@ -1230,12 +1240,17 @@ static int nvme_user_admin_cmd(struct nvme_dev *dev,
if (length != cmd.data_len)
status = -ENOMEM;
else
- status = nvme_submit_admin_cmd(dev, &c, NULL);
+ status = nvme_submit_admin_cmd(dev, &c, &cmd.result);
if (cmd.data_len) {
nvme_unmap_user_pages(dev, cmd.opcode & 1, iod);
nvme_free_iod(dev, iod);
}
+
+ if (!status && copy_to_user(&ucmd->result, &cmd.result,
+ sizeof(cmd.result)))
+ status = -EFAULT;
+
return status;
}
@@ -1523,9 +1538,9 @@ static int nvme_dev_add(struct nvme_dev *dev)
continue;
res = nvme_get_features(dev, NVME_FEAT_LBA_RANGE, i,
- dma_addr + 4096);
+ dma_addr + 4096, NULL);
if (res)
- continue;
+ memset(mem + 4096, 0, 4096);
ns = nvme_alloc_ns(dev, i, mem, mem + 4096);
if (ns)
diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c
index 2e7de7a59bfc..e0588c6dd86f 100644
--- a/drivers/block/pktcdvd.c
+++ b/drivers/block/pktcdvd.c
@@ -2648,7 +2648,7 @@ static int pkt_seq_show(struct seq_file *m, void *p)
static int pkt_seq_open(struct inode *inode, struct file *file)
{
- return single_open(file, pkt_seq_show, PDE(inode)->data);
+ return single_open(file, pkt_seq_show, PDE_DATA(inode));
}
static const struct file_operations pkt_proc_fops = {
diff --git a/drivers/block/ps3vram.c b/drivers/block/ps3vram.c
index 75e112d66006..06a2e53e5f37 100644
--- a/drivers/block/ps3vram.c
+++ b/drivers/block/ps3vram.c
@@ -525,7 +525,7 @@ static int ps3vram_proc_show(struct seq_file *m, void *v)
static int ps3vram_proc_open(struct inode *inode, struct file *file)
{
- return single_open(file, ps3vram_proc_show, PDE(inode)->data);
+ return single_open(file, ps3vram_proc_show, PDE_DATA(inode));
}
static const struct file_operations ps3vram_proc_fops = {
diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index 6c81a4c040b9..b7b7a88d9f68 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -1264,6 +1264,32 @@ static bool obj_request_done_test(struct rbd_obj_request *obj_request)
return atomic_read(&obj_request->done) != 0;
}
+static void
+rbd_img_obj_request_read_callback(struct rbd_obj_request *obj_request)
+{
+ dout("%s: obj %p img %p result %d %llu/%llu\n", __func__,
+ obj_request, obj_request->img_request, obj_request->result,
+ obj_request->xferred, obj_request->length);
+ /*
+ * ENOENT means a hole in the image. We zero-fill the
+ * entire length of the request. A short read also implies
+ * zero-fill to the end of the request. Either way we
+ * update the xferred count to indicate the whole request
+ * was satisfied.
+ */
+ BUG_ON(obj_request->type != OBJ_REQUEST_BIO);
+ if (obj_request->result == -ENOENT) {
+ zero_bio_chain(obj_request->bio_list, 0);
+ obj_request->result = 0;
+ obj_request->xferred = obj_request->length;
+ } else if (obj_request->xferred < obj_request->length &&
+ !obj_request->result) {
+ zero_bio_chain(obj_request->bio_list, obj_request->xferred);
+ obj_request->xferred = obj_request->length;
+ }
+ obj_request_done_set(obj_request);
+}
+
static void rbd_obj_request_complete(struct rbd_obj_request *obj_request)
{
dout("%s: obj %p cb %p\n", __func__, obj_request,
@@ -1284,23 +1310,10 @@ static void rbd_osd_read_callback(struct rbd_obj_request *obj_request)
{
dout("%s: obj %p result %d %llu/%llu\n", __func__, obj_request,
obj_request->result, obj_request->xferred, obj_request->length);
- /*
- * ENOENT means a hole in the object. We zero-fill the
- * entire length of the request. A short read also implies
- * zero-fill to the end of the request. Either way we
- * update the xferred count to indicate the whole request
- * was satisfied.
- */
- if (obj_request->result == -ENOENT) {
- zero_bio_chain(obj_request->bio_list, 0);
- obj_request->result = 0;
- obj_request->xferred = obj_request->length;
- } else if (obj_request->xferred < obj_request->length &&
- !obj_request->result) {
- zero_bio_chain(obj_request->bio_list, obj_request->xferred);
- obj_request->xferred = obj_request->length;
- }
- obj_request_done_set(obj_request);
+ if (obj_request->img_request)
+ rbd_img_obj_request_read_callback(obj_request);
+ else
+ obj_request_done_set(obj_request);
}
static void rbd_osd_write_callback(struct rbd_obj_request *obj_request)
@@ -1729,9 +1742,10 @@ static int rbd_img_request_submit(struct rbd_img_request *img_request)
struct rbd_device *rbd_dev = img_request->rbd_dev;
struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc;
struct rbd_obj_request *obj_request;
+ struct rbd_obj_request *next_obj_request;
dout("%s: img %p\n", __func__, img_request);
- for_each_obj_request(img_request, obj_request) {
+ for_each_obj_request_safe(img_request, obj_request, next_obj_request) {
int ret;
obj_request->callback = rbd_img_obj_callback;
diff --git a/drivers/block/rsxx/Makefile b/drivers/block/rsxx/Makefile
index f35cd0b71f7b..b1c53c0aa450 100644
--- a/drivers/block/rsxx/Makefile
+++ b/drivers/block/rsxx/Makefile
@@ -1,2 +1,2 @@
obj-$(CONFIG_BLK_DEV_RSXX) += rsxx.o
-rsxx-y := config.o core.o cregs.o dev.o dma.o
+rsxx-objs := config.o core.o cregs.o dev.o dma.o
diff --git a/drivers/block/rsxx/config.c b/drivers/block/rsxx/config.c
index a295e7e9ee41..10cd530d3e10 100644
--- a/drivers/block/rsxx/config.c
+++ b/drivers/block/rsxx/config.c
@@ -29,15 +29,13 @@
#include "rsxx_priv.h"
#include "rsxx_cfg.h"
-static void initialize_config(void *config)
+static void initialize_config(struct rsxx_card_cfg *cfg)
{
- struct rsxx_card_cfg *cfg = config;
-
cfg->hdr.version = RSXX_CFG_VERSION;
cfg->data.block_size = RSXX_HW_BLK_SIZE;
cfg->data.stripe_size = RSXX_HW_BLK_SIZE;
- cfg->data.vendor_id = RSXX_VENDOR_ID_TMS_IBM;
+ cfg->data.vendor_id = RSXX_VENDOR_ID_IBM;
cfg->data.cache_order = (-1);
cfg->data.intr_coal.mode = RSXX_INTR_COAL_DISABLED;
cfg->data.intr_coal.count = 0;
@@ -181,7 +179,7 @@ int rsxx_load_config(struct rsxx_cardinfo *card)
} else {
dev_info(CARD_TO_DEV(card),
"Initializing card configuration.\n");
- initialize_config(card);
+ initialize_config(&card->config);
st = rsxx_save_config(card);
if (st)
return st;
diff --git a/drivers/block/rsxx/core.c b/drivers/block/rsxx/core.c
index e5162487686a..5af21f2db29c 100644
--- a/drivers/block/rsxx/core.c
+++ b/drivers/block/rsxx/core.c
@@ -30,6 +30,7 @@
#include <linux/reboot.h>
#include <linux/slab.h>
#include <linux/bitops.h>
+#include <linux/delay.h>
#include <linux/genhd.h>
#include <linux/idr.h>
@@ -39,8 +40,8 @@
#define NO_LEGACY 0
-MODULE_DESCRIPTION("IBM RamSan PCIe Flash SSD Device Driver");
-MODULE_AUTHOR("IBM <support@ramsan.com>");
+MODULE_DESCRIPTION("IBM FlashSystem 70/80 PCIe SSD Device Driver");
+MODULE_AUTHOR("Joshua Morris/Philip Kelleher, IBM");
MODULE_LICENSE("GPL");
MODULE_VERSION(DRIVER_VERSION);
@@ -52,6 +53,13 @@ static DEFINE_IDA(rsxx_disk_ida);
static DEFINE_SPINLOCK(rsxx_ida_lock);
/*----------------- Interrupt Control & Handling -------------------*/
+
+static void rsxx_mask_interrupts(struct rsxx_cardinfo *card)
+{
+ card->isr_mask = 0;
+ card->ier_mask = 0;
+}
+
static void __enable_intr(unsigned int *mask, unsigned int intr)
{
*mask |= intr;
@@ -71,7 +79,8 @@ static void __disable_intr(unsigned int *mask, unsigned int intr)
*/
void rsxx_enable_ier(struct rsxx_cardinfo *card, unsigned int intr)
{
- if (unlikely(card->halt))
+ if (unlikely(card->halt) ||
+ unlikely(card->eeh_state))
return;
__enable_intr(&card->ier_mask, intr);
@@ -80,6 +89,9 @@ void rsxx_enable_ier(struct rsxx_cardinfo *card, unsigned int intr)
void rsxx_disable_ier(struct rsxx_cardinfo *card, unsigned int intr)
{
+ if (unlikely(card->eeh_state))
+ return;
+
__disable_intr(&card->ier_mask, intr);
iowrite32(card->ier_mask, card->regmap + IER);
}
@@ -87,7 +99,8 @@ void rsxx_disable_ier(struct rsxx_cardinfo *card, unsigned int intr)
void rsxx_enable_ier_and_isr(struct rsxx_cardinfo *card,
unsigned int intr)
{
- if (unlikely(card->halt))
+ if (unlikely(card->halt) ||
+ unlikely(card->eeh_state))
return;
__enable_intr(&card->isr_mask, intr);
@@ -97,6 +110,9 @@ void rsxx_enable_ier_and_isr(struct rsxx_cardinfo *card,
void rsxx_disable_ier_and_isr(struct rsxx_cardinfo *card,
unsigned int intr)
{
+ if (unlikely(card->eeh_state))
+ return;
+
__disable_intr(&card->isr_mask, intr);
__disable_intr(&card->ier_mask, intr);
iowrite32(card->ier_mask, card->regmap + IER);
@@ -115,6 +131,9 @@ static irqreturn_t rsxx_isr(int irq, void *pdata)
do {
reread_isr = 0;
+ if (unlikely(card->eeh_state))
+ break;
+
isr = ioread32(card->regmap + ISR);
if (isr == 0xffffffff) {
/*
@@ -161,9 +180,9 @@ static irqreturn_t rsxx_isr(int irq, void *pdata)
}
/*----------------- Card Event Handler -------------------*/
-static char *rsxx_card_state_to_str(unsigned int state)
+static const char * const rsxx_card_state_to_str(unsigned int state)
{
- static char *state_strings[] = {
+ static const char * const state_strings[] = {
"Unknown", "Shutdown", "Starting", "Formatting",
"Uninitialized", "Good", "Shutting Down",
"Fault", "Read Only Fault", "dStroying"
@@ -304,6 +323,192 @@ static int card_shutdown(struct rsxx_cardinfo *card)
return 0;
}
+static int rsxx_eeh_frozen(struct pci_dev *dev)
+{
+ struct rsxx_cardinfo *card = pci_get_drvdata(dev);
+ int i;
+ int st;
+
+ dev_warn(&dev->dev, "IBM FlashSystem PCI: preparing for slot reset.\n");
+
+ card->eeh_state = 1;
+ rsxx_mask_interrupts(card);
+
+ /*
+ * We need to guarantee that the write for eeh_state and masking
+ * interrupts does not become reordered. This will prevent a possible
+ * race condition with the EEH code.
+ */
+ wmb();
+
+ pci_disable_device(dev);
+
+ st = rsxx_eeh_save_issued_dmas(card);
+ if (st)
+ return st;
+
+ rsxx_eeh_save_issued_creg(card);
+
+ for (i = 0; i < card->n_targets; i++) {
+ if (card->ctrl[i].status.buf)
+ pci_free_consistent(card->dev, STATUS_BUFFER_SIZE8,
+ card->ctrl[i].status.buf,
+ card->ctrl[i].status.dma_addr);
+ if (card->ctrl[i].cmd.buf)
+ pci_free_consistent(card->dev, COMMAND_BUFFER_SIZE8,
+ card->ctrl[i].cmd.buf,
+ card->ctrl[i].cmd.dma_addr);
+ }
+
+ return 0;
+}
+
+static void rsxx_eeh_failure(struct pci_dev *dev)
+{
+ struct rsxx_cardinfo *card = pci_get_drvdata(dev);
+ int i;
+
+ dev_err(&dev->dev, "IBM FlashSystem PCI: disabling failed card.\n");
+
+ card->eeh_state = 1;
+
+ for (i = 0; i < card->n_targets; i++)
+ del_timer_sync(&card->ctrl[i].activity_timer);
+
+ rsxx_eeh_cancel_dmas(card);
+}
+
+static int rsxx_eeh_fifo_flush_poll(struct rsxx_cardinfo *card)
+{
+ unsigned int status;
+ int iter = 0;
+
+ /* We need to wait for the hardware to reset */
+ while (iter++ < 10) {
+ status = ioread32(card->regmap + PCI_RECONFIG);
+
+ if (status & RSXX_FLUSH_BUSY) {
+ ssleep(1);
+ continue;
+ }
+
+ if (status & RSXX_FLUSH_TIMEOUT)
+ dev_warn(CARD_TO_DEV(card), "HW: flash controller timeout\n");
+ return 0;
+ }
+
+ /* Hardware failed resetting itself. */
+ return -1;
+}
+
+static pci_ers_result_t rsxx_error_detected(struct pci_dev *dev,
+ enum pci_channel_state error)
+{
+ int st;
+
+ if (dev->revision < RSXX_EEH_SUPPORT)
+ return PCI_ERS_RESULT_NONE;
+
+ if (error == pci_channel_io_perm_failure) {
+ rsxx_eeh_failure(dev);
+ return PCI_ERS_RESULT_DISCONNECT;
+ }
+
+ st = rsxx_eeh_frozen(dev);
+ if (st) {
+ dev_err(&dev->dev, "Slot reset setup failed\n");
+ rsxx_eeh_failure(dev);
+ return PCI_ERS_RESULT_DISCONNECT;
+ }
+
+ return PCI_ERS_RESULT_NEED_RESET;
+}
+
+static pci_ers_result_t rsxx_slot_reset(struct pci_dev *dev)
+{
+ struct rsxx_cardinfo *card = pci_get_drvdata(dev);
+ unsigned long flags;
+ int i;
+ int st;
+
+ dev_warn(&dev->dev,
+ "IBM FlashSystem PCI: recovering from slot reset.\n");
+
+ st = pci_enable_device(dev);
+ if (st)
+ goto failed_hw_setup;
+
+ pci_set_master(dev);
+
+ st = rsxx_eeh_fifo_flush_poll(card);
+ if (st)
+ goto failed_hw_setup;
+
+ rsxx_dma_queue_reset(card);
+
+ for (i = 0; i < card->n_targets; i++) {
+ st = rsxx_hw_buffers_init(dev, &card->ctrl[i]);
+ if (st)
+ goto failed_hw_buffers_init;
+ }
+
+ if (card->config_valid)
+ rsxx_dma_configure(card);
+
+ /* Clears the ISR register from spurious interrupts */
+ st = ioread32(card->regmap + ISR);
+
+ card->eeh_state = 0;
+
+ st = rsxx_eeh_remap_dmas(card);
+ if (st)
+ goto failed_remap_dmas;
+
+ spin_lock_irqsave(&card->irq_lock, flags);
+ if (card->n_targets & RSXX_MAX_TARGETS)
+ rsxx_enable_ier_and_isr(card, CR_INTR_ALL_G);
+ else
+ rsxx_enable_ier_and_isr(card, CR_INTR_ALL_C);
+ spin_unlock_irqrestore(&card->irq_lock, flags);
+
+ rsxx_kick_creg_queue(card);
+
+ for (i = 0; i < card->n_targets; i++) {
+ spin_lock(&card->ctrl[i].queue_lock);
+ if (list_empty(&card->ctrl[i].queue)) {
+ spin_unlock(&card->ctrl[i].queue_lock);
+ continue;
+ }
+ spin_unlock(&card->ctrl[i].queue_lock);
+
+ queue_work(card->ctrl[i].issue_wq,
+ &card->ctrl[i].issue_dma_work);
+ }
+
+ dev_info(&dev->dev, "IBM FlashSystem PCI: recovery complete.\n");
+
+ return PCI_ERS_RESULT_RECOVERED;
+
+failed_hw_buffers_init:
+failed_remap_dmas:
+ for (i = 0; i < card->n_targets; i++) {
+ if (card->ctrl[i].status.buf)
+ pci_free_consistent(card->dev,
+ STATUS_BUFFER_SIZE8,
+ card->ctrl[i].status.buf,
+ card->ctrl[i].status.dma_addr);
+ if (card->ctrl[i].cmd.buf)
+ pci_free_consistent(card->dev,
+ COMMAND_BUFFER_SIZE8,
+ card->ctrl[i].cmd.buf,
+ card->ctrl[i].cmd.dma_addr);
+ }
+failed_hw_setup:
+ rsxx_eeh_failure(dev);
+ return PCI_ERS_RESULT_DISCONNECT;
+
+}
+
/*----------------- Driver Initialization & Setup -------------------*/
/* Returns: 0 if the driver is compatible with the device
-1 if the driver is NOT compatible with the device */
@@ -383,6 +588,7 @@ static int rsxx_pci_probe(struct pci_dev *dev,
spin_lock_init(&card->irq_lock);
card->halt = 0;
+ card->eeh_state = 0;
spin_lock_irq(&card->irq_lock);
rsxx_disable_ier_and_isr(card, CR_INTR_ALL);
@@ -538,9 +744,6 @@ static void rsxx_pci_remove(struct pci_dev *dev)
rsxx_disable_ier_and_isr(card, CR_INTR_EVENT);
spin_unlock_irqrestore(&card->irq_lock, flags);
- /* Prevent work_structs from re-queuing themselves. */
- card->halt = 1;
-
cancel_work_sync(&card->event_work);
rsxx_destroy_dev(card);
@@ -549,6 +752,10 @@ static void rsxx_pci_remove(struct pci_dev *dev)
spin_lock_irqsave(&card->irq_lock, flags);
rsxx_disable_ier_and_isr(card, CR_INTR_ALL);
spin_unlock_irqrestore(&card->irq_lock, flags);
+
+ /* Prevent work_structs from re-queuing themselves. */
+ card->halt = 1;
+
free_irq(dev->irq, card);
if (!force_legacy)
@@ -592,11 +799,14 @@ static void rsxx_pci_shutdown(struct pci_dev *dev)
card_shutdown(card);
}
+static const struct pci_error_handlers rsxx_err_handler = {
+ .error_detected = rsxx_error_detected,
+ .slot_reset = rsxx_slot_reset,
+};
+
static DEFINE_PCI_DEVICE_TABLE(rsxx_pci_ids) = {
- {PCI_DEVICE(PCI_VENDOR_ID_TMS_IBM, PCI_DEVICE_ID_RS70_FLASH)},
- {PCI_DEVICE(PCI_VENDOR_ID_TMS_IBM, PCI_DEVICE_ID_RS70D_FLASH)},
- {PCI_DEVICE(PCI_VENDOR_ID_TMS_IBM, PCI_DEVICE_ID_RS80_FLASH)},
- {PCI_DEVICE(PCI_VENDOR_ID_TMS_IBM, PCI_DEVICE_ID_RS81_FLASH)},
+ {PCI_DEVICE(PCI_VENDOR_ID_IBM, PCI_DEVICE_ID_FS70_FLASH)},
+ {PCI_DEVICE(PCI_VENDOR_ID_IBM, PCI_DEVICE_ID_FS80_FLASH)},
{0,},
};
@@ -609,6 +819,7 @@ static struct pci_driver rsxx_pci_driver = {
.remove = rsxx_pci_remove,
.suspend = rsxx_pci_suspend,
.shutdown = rsxx_pci_shutdown,
+ .err_handler = &rsxx_err_handler,
};
static int __init rsxx_core_init(void)
diff --git a/drivers/block/rsxx/cregs.c b/drivers/block/rsxx/cregs.c
index 80bbe639fccd..4b5c020a0a65 100644
--- a/drivers/block/rsxx/cregs.c
+++ b/drivers/block/rsxx/cregs.c
@@ -58,7 +58,7 @@ static struct kmem_cache *creg_cmd_pool;
#error Unknown endianess!!! Aborting...
#endif
-static void copy_to_creg_data(struct rsxx_cardinfo *card,
+static int copy_to_creg_data(struct rsxx_cardinfo *card,
int cnt8,
void *buf,
unsigned int stream)
@@ -66,6 +66,9 @@ static void copy_to_creg_data(struct rsxx_cardinfo *card,
int i = 0;
u32 *data = buf;
+ if (unlikely(card->eeh_state))
+ return -EIO;
+
for (i = 0; cnt8 > 0; i++, cnt8 -= 4) {
/*
* Firmware implementation makes it necessary to byte swap on
@@ -76,10 +79,12 @@ static void copy_to_creg_data(struct rsxx_cardinfo *card,
else
iowrite32(data[i], card->regmap + CREG_DATA(i));
}
+
+ return 0;
}
-static void copy_from_creg_data(struct rsxx_cardinfo *card,
+static int copy_from_creg_data(struct rsxx_cardinfo *card,
int cnt8,
void *buf,
unsigned int stream)
@@ -87,6 +92,9 @@ static void copy_from_creg_data(struct rsxx_cardinfo *card,
int i = 0;
u32 *data = buf;
+ if (unlikely(card->eeh_state))
+ return -EIO;
+
for (i = 0; cnt8 > 0; i++, cnt8 -= 4) {
/*
* Firmware implementation makes it necessary to byte swap on
@@ -97,41 +105,31 @@ static void copy_from_creg_data(struct rsxx_cardinfo *card,
else
data[i] = ioread32(card->regmap + CREG_DATA(i));
}
-}
-
-static struct creg_cmd *pop_active_cmd(struct rsxx_cardinfo *card)
-{
- struct creg_cmd *cmd;
- /*
- * Spin lock is needed because this can be called in atomic/interrupt
- * context.
- */
- spin_lock_bh(&card->creg_ctrl.lock);
- cmd = card->creg_ctrl.active_cmd;
- card->creg_ctrl.active_cmd = NULL;
- spin_unlock_bh(&card->creg_ctrl.lock);
-
- return cmd;
+ return 0;
}
static void creg_issue_cmd(struct rsxx_cardinfo *card, struct creg_cmd *cmd)
{
+ int st;
+
+ if (unlikely(card->eeh_state))
+ return;
+
iowrite32(cmd->addr, card->regmap + CREG_ADD);
iowrite32(cmd->cnt8, card->regmap + CREG_CNT);
if (cmd->op == CREG_OP_WRITE) {
- if (cmd->buf)
- copy_to_creg_data(card, cmd->cnt8,
- cmd->buf, cmd->stream);
+ if (cmd->buf) {
+ st = copy_to_creg_data(card, cmd->cnt8,
+ cmd->buf, cmd->stream);
+ if (st)
+ return;
+ }
}
- /*
- * Data copy must complete before initiating the command. This is
- * needed for weakly ordered processors (i.e. PowerPC), so that all
- * neccessary registers are written before we kick the hardware.
- */
- wmb();
+ if (unlikely(card->eeh_state))
+ return;
/* Setting the valid bit will kick off the command. */
iowrite32(cmd->op, card->regmap + CREG_CMD);
@@ -196,11 +194,11 @@ static int creg_queue_cmd(struct rsxx_cardinfo *card,
cmd->cb_private = cb_private;
cmd->status = 0;
- spin_lock(&card->creg_ctrl.lock);
+ spin_lock_bh(&card->creg_ctrl.lock);
list_add_tail(&cmd->list, &card->creg_ctrl.queue);
card->creg_ctrl.q_depth++;
creg_kick_queue(card);
- spin_unlock(&card->creg_ctrl.lock);
+ spin_unlock_bh(&card->creg_ctrl.lock);
return 0;
}
@@ -210,7 +208,11 @@ static void creg_cmd_timed_out(unsigned long data)
struct rsxx_cardinfo *card = (struct rsxx_cardinfo *) data;
struct creg_cmd *cmd;
- cmd = pop_active_cmd(card);
+ spin_lock(&card->creg_ctrl.lock);
+ cmd = card->creg_ctrl.active_cmd;
+ card->creg_ctrl.active_cmd = NULL;
+ spin_unlock(&card->creg_ctrl.lock);
+
if (cmd == NULL) {
card->creg_ctrl.creg_stats.creg_timeout++;
dev_warn(CARD_TO_DEV(card),
@@ -247,7 +249,11 @@ static void creg_cmd_done(struct work_struct *work)
if (del_timer_sync(&card->creg_ctrl.cmd_timer) == 0)
card->creg_ctrl.creg_stats.failed_cancel_timer++;
- cmd = pop_active_cmd(card);
+ spin_lock_bh(&card->creg_ctrl.lock);
+ cmd = card->creg_ctrl.active_cmd;
+ card->creg_ctrl.active_cmd = NULL;
+ spin_unlock_bh(&card->creg_ctrl.lock);
+
if (cmd == NULL) {
dev_err(CARD_TO_DEV(card),
"Spurious creg interrupt!\n");
@@ -287,7 +293,7 @@ static void creg_cmd_done(struct work_struct *work)
goto creg_done;
}
- copy_from_creg_data(card, cnt8, cmd->buf, cmd->stream);
+ st = copy_from_creg_data(card, cnt8, cmd->buf, cmd->stream);
}
creg_done:
@@ -296,10 +302,10 @@ creg_done:
kmem_cache_free(creg_cmd_pool, cmd);
- spin_lock(&card->creg_ctrl.lock);
+ spin_lock_bh(&card->creg_ctrl.lock);
card->creg_ctrl.active = 0;
creg_kick_queue(card);
- spin_unlock(&card->creg_ctrl.lock);
+ spin_unlock_bh(&card->creg_ctrl.lock);
}
static void creg_reset(struct rsxx_cardinfo *card)
@@ -324,7 +330,7 @@ static void creg_reset(struct rsxx_cardinfo *card)
"Resetting creg interface for recovery\n");
/* Cancel outstanding commands */
- spin_lock(&card->creg_ctrl.lock);
+ spin_lock_bh(&card->creg_ctrl.lock);
list_for_each_entry_safe(cmd, tmp, &card->creg_ctrl.queue, list) {
list_del(&cmd->list);
card->creg_ctrl.q_depth--;
@@ -345,7 +351,7 @@ static void creg_reset(struct rsxx_cardinfo *card)
card->creg_ctrl.active = 0;
}
- spin_unlock(&card->creg_ctrl.lock);
+ spin_unlock_bh(&card->creg_ctrl.lock);
card->creg_ctrl.reset = 0;
spin_lock_irqsave(&card->irq_lock, flags);
@@ -399,12 +405,12 @@ static int __issue_creg_rw(struct rsxx_cardinfo *card,
return st;
/*
- * This timeout is neccessary for unresponsive hardware. The additional
+ * This timeout is necessary for unresponsive hardware. The additional
* 20 seconds to used to guarantee that each cregs requests has time to
* complete.
*/
- timeout = msecs_to_jiffies((CREG_TIMEOUT_MSEC *
- card->creg_ctrl.q_depth) + 20000);
+ timeout = msecs_to_jiffies(CREG_TIMEOUT_MSEC *
+ card->creg_ctrl.q_depth + 20000);
/*
* The creg interface is guaranteed to complete. It has a timeout
@@ -690,6 +696,32 @@ int rsxx_reg_access(struct rsxx_cardinfo *card,
return 0;
}
+void rsxx_eeh_save_issued_creg(struct rsxx_cardinfo *card)
+{
+ struct creg_cmd *cmd = NULL;
+
+ cmd = card->creg_ctrl.active_cmd;
+ card->creg_ctrl.active_cmd = NULL;
+
+ if (cmd) {
+ del_timer_sync(&card->creg_ctrl.cmd_timer);
+
+ spin_lock_bh(&card->creg_ctrl.lock);
+ list_add(&cmd->list, &card->creg_ctrl.queue);
+ card->creg_ctrl.q_depth++;
+ card->creg_ctrl.active = 0;
+ spin_unlock_bh(&card->creg_ctrl.lock);
+ }
+}
+
+void rsxx_kick_creg_queue(struct rsxx_cardinfo *card)
+{
+ spin_lock_bh(&card->creg_ctrl.lock);
+ if (!list_empty(&card->creg_ctrl.queue))
+ creg_kick_queue(card);
+ spin_unlock_bh(&card->creg_ctrl.lock);
+}
+
/*------------ Initialization & Setup --------------*/
int rsxx_creg_setup(struct rsxx_cardinfo *card)
{
@@ -712,7 +744,7 @@ void rsxx_creg_destroy(struct rsxx_cardinfo *card)
int cnt = 0;
/* Cancel outstanding commands */
- spin_lock(&card->creg_ctrl.lock);
+ spin_lock_bh(&card->creg_ctrl.lock);
list_for_each_entry_safe(cmd, tmp, &card->creg_ctrl.queue, list) {
list_del(&cmd->list);
if (cmd->cb)
@@ -737,7 +769,7 @@ void rsxx_creg_destroy(struct rsxx_cardinfo *card)
"Canceled active creg command\n");
kmem_cache_free(creg_cmd_pool, cmd);
}
- spin_unlock(&card->creg_ctrl.lock);
+ spin_unlock_bh(&card->creg_ctrl.lock);
cancel_work_sync(&card->creg_ctrl.done_work);
}
diff --git a/drivers/block/rsxx/dma.c b/drivers/block/rsxx/dma.c
index 63176e67662f..0607513cfb41 100644
--- a/drivers/block/rsxx/dma.c
+++ b/drivers/block/rsxx/dma.c
@@ -28,7 +28,7 @@
struct rsxx_dma {
struct list_head list;
u8 cmd;
- unsigned int laddr; /* Logical address on the ramsan */
+ unsigned int laddr; /* Logical address */
struct {
u32 off;
u32 cnt;
@@ -81,9 +81,6 @@ enum rsxx_hw_status {
HW_STATUS_FAULT = 0x08,
};
-#define STATUS_BUFFER_SIZE8 4096
-#define COMMAND_BUFFER_SIZE8 4096
-
static struct kmem_cache *rsxx_dma_pool;
struct dma_tracker {
@@ -122,7 +119,7 @@ static unsigned int rsxx_get_dma_tgt(struct rsxx_cardinfo *card, u64 addr8)
return tgt;
}
-static void rsxx_dma_queue_reset(struct rsxx_cardinfo *card)
+void rsxx_dma_queue_reset(struct rsxx_cardinfo *card)
{
/* Reset all DMA Command/Status Queues */
iowrite32(DMA_QUEUE_RESET, card->regmap + RESET);
@@ -210,7 +207,8 @@ static void dma_intr_coal_auto_tune(struct rsxx_cardinfo *card)
u32 q_depth = 0;
u32 intr_coal;
- if (card->config.data.intr_coal.mode != RSXX_INTR_COAL_AUTO_TUNE)
+ if (card->config.data.intr_coal.mode != RSXX_INTR_COAL_AUTO_TUNE ||
+ unlikely(card->eeh_state))
return;
for (i = 0; i < card->n_targets; i++)
@@ -223,31 +221,26 @@ static void dma_intr_coal_auto_tune(struct rsxx_cardinfo *card)
}
/*----------------- RSXX DMA Handling -------------------*/
-static void rsxx_complete_dma(struct rsxx_cardinfo *card,
+static void rsxx_complete_dma(struct rsxx_dma_ctrl *ctrl,
struct rsxx_dma *dma,
unsigned int status)
{
if (status & DMA_SW_ERR)
- printk_ratelimited(KERN_ERR
- "SW Error in DMA(cmd x%02x, laddr x%08x)\n",
- dma->cmd, dma->laddr);
+ ctrl->stats.dma_sw_err++;
if (status & DMA_HW_FAULT)
- printk_ratelimited(KERN_ERR
- "HW Fault in DMA(cmd x%02x, laddr x%08x)\n",
- dma->cmd, dma->laddr);
+ ctrl->stats.dma_hw_fault++;
if (status & DMA_CANCELLED)
- printk_ratelimited(KERN_ERR
- "DMA Cancelled(cmd x%02x, laddr x%08x)\n",
- dma->cmd, dma->laddr);
+ ctrl->stats.dma_cancelled++;
if (dma->dma_addr)
- pci_unmap_page(card->dev, dma->dma_addr, get_dma_size(dma),
+ pci_unmap_page(ctrl->card->dev, dma->dma_addr,
+ get_dma_size(dma),
dma->cmd == HW_CMD_BLK_WRITE ?
PCI_DMA_TODEVICE :
PCI_DMA_FROMDEVICE);
if (dma->cb)
- dma->cb(card, dma->cb_data, status ? 1 : 0);
+ dma->cb(ctrl->card, dma->cb_data, status ? 1 : 0);
kmem_cache_free(rsxx_dma_pool, dma);
}
@@ -330,14 +323,15 @@ static void rsxx_handle_dma_error(struct rsxx_dma_ctrl *ctrl,
if (requeue_cmd)
rsxx_requeue_dma(ctrl, dma);
else
- rsxx_complete_dma(ctrl->card, dma, status);
+ rsxx_complete_dma(ctrl, dma, status);
}
static void dma_engine_stalled(unsigned long data)
{
struct rsxx_dma_ctrl *ctrl = (struct rsxx_dma_ctrl *)data;
- if (atomic_read(&ctrl->stats.hw_q_depth) == 0)
+ if (atomic_read(&ctrl->stats.hw_q_depth) == 0 ||
+ unlikely(ctrl->card->eeh_state))
return;
if (ctrl->cmd.idx != ioread32(ctrl->regmap + SW_CMD_IDX)) {
@@ -369,7 +363,8 @@ static void rsxx_issue_dmas(struct work_struct *work)
ctrl = container_of(work, struct rsxx_dma_ctrl, issue_dma_work);
hw_cmd_buf = ctrl->cmd.buf;
- if (unlikely(ctrl->card->halt))
+ if (unlikely(ctrl->card->halt) ||
+ unlikely(ctrl->card->eeh_state))
return;
while (1) {
@@ -397,7 +392,7 @@ static void rsxx_issue_dmas(struct work_struct *work)
*/
if (unlikely(ctrl->card->dma_fault)) {
push_tracker(ctrl->trackers, tag);
- rsxx_complete_dma(ctrl->card, dma, DMA_CANCELLED);
+ rsxx_complete_dma(ctrl, dma, DMA_CANCELLED);
continue;
}
@@ -432,19 +427,15 @@ static void rsxx_issue_dmas(struct work_struct *work)
/* Let HW know we've queued commands. */
if (cmds_pending) {
- /*
- * We must guarantee that the CPU writes to 'ctrl->cmd.buf'
- * (which is in PCI-consistent system-memory) from the loop
- * above make it into the coherency domain before the
- * following PIO "trigger" updating the cmd.idx. A WMB is
- * sufficient. We need not explicitly CPU cache-flush since
- * the memory is a PCI-consistent (ie; coherent) mapping.
- */
- wmb();
-
atomic_add(cmds_pending, &ctrl->stats.hw_q_depth);
mod_timer(&ctrl->activity_timer,
jiffies + DMA_ACTIVITY_TIMEOUT);
+
+ if (unlikely(ctrl->card->eeh_state)) {
+ del_timer_sync(&ctrl->activity_timer);
+ return;
+ }
+
iowrite32(ctrl->cmd.idx, ctrl->regmap + SW_CMD_IDX);
}
}
@@ -463,7 +454,8 @@ static void rsxx_dma_done(struct work_struct *work)
hw_st_buf = ctrl->status.buf;
if (unlikely(ctrl->card->halt) ||
- unlikely(ctrl->card->dma_fault))
+ unlikely(ctrl->card->dma_fault) ||
+ unlikely(ctrl->card->eeh_state))
return;
count = le16_to_cpu(hw_st_buf[ctrl->status.idx].count);
@@ -508,7 +500,7 @@ static void rsxx_dma_done(struct work_struct *work)
if (status)
rsxx_handle_dma_error(ctrl, dma, status);
else
- rsxx_complete_dma(ctrl->card, dma, 0);
+ rsxx_complete_dma(ctrl, dma, 0);
push_tracker(ctrl->trackers, tag);
@@ -727,20 +719,54 @@ bvec_err:
/*----------------- DMA Engine Initialization & Setup -------------------*/
+int rsxx_hw_buffers_init(struct pci_dev *dev, struct rsxx_dma_ctrl *ctrl)
+{
+ ctrl->status.buf = pci_alloc_consistent(dev, STATUS_BUFFER_SIZE8,
+ &ctrl->status.dma_addr);
+ ctrl->cmd.buf = pci_alloc_consistent(dev, COMMAND_BUFFER_SIZE8,
+ &ctrl->cmd.dma_addr);
+ if (ctrl->status.buf == NULL || ctrl->cmd.buf == NULL)
+ return -ENOMEM;
+
+ memset(ctrl->status.buf, 0xac, STATUS_BUFFER_SIZE8);
+ iowrite32(lower_32_bits(ctrl->status.dma_addr),
+ ctrl->regmap + SB_ADD_LO);
+ iowrite32(upper_32_bits(ctrl->status.dma_addr),
+ ctrl->regmap + SB_ADD_HI);
+
+ memset(ctrl->cmd.buf, 0x83, COMMAND_BUFFER_SIZE8);
+ iowrite32(lower_32_bits(ctrl->cmd.dma_addr), ctrl->regmap + CB_ADD_LO);
+ iowrite32(upper_32_bits(ctrl->cmd.dma_addr), ctrl->regmap + CB_ADD_HI);
+
+ ctrl->status.idx = ioread32(ctrl->regmap + HW_STATUS_CNT);
+ if (ctrl->status.idx > RSXX_MAX_OUTSTANDING_CMDS) {
+ dev_crit(&dev->dev, "Failed reading status cnt x%x\n",
+ ctrl->status.idx);
+ return -EINVAL;
+ }
+ iowrite32(ctrl->status.idx, ctrl->regmap + HW_STATUS_CNT);
+ iowrite32(ctrl->status.idx, ctrl->regmap + SW_STATUS_CNT);
+
+ ctrl->cmd.idx = ioread32(ctrl->regmap + HW_CMD_IDX);
+ if (ctrl->cmd.idx > RSXX_MAX_OUTSTANDING_CMDS) {
+ dev_crit(&dev->dev, "Failed reading cmd cnt x%x\n",
+ ctrl->status.idx);
+ return -EINVAL;
+ }
+ iowrite32(ctrl->cmd.idx, ctrl->regmap + HW_CMD_IDX);
+ iowrite32(ctrl->cmd.idx, ctrl->regmap + SW_CMD_IDX);
+
+ return 0;
+}
+
static int rsxx_dma_ctrl_init(struct pci_dev *dev,
struct rsxx_dma_ctrl *ctrl)
{
int i;
+ int st;
memset(&ctrl->stats, 0, sizeof(ctrl->stats));
- ctrl->status.buf = pci_alloc_consistent(dev, STATUS_BUFFER_SIZE8,
- &ctrl->status.dma_addr);
- ctrl->cmd.buf = pci_alloc_consistent(dev, COMMAND_BUFFER_SIZE8,
- &ctrl->cmd.dma_addr);
- if (ctrl->status.buf == NULL || ctrl->cmd.buf == NULL)
- return -ENOMEM;
-
ctrl->trackers = vmalloc(DMA_TRACKER_LIST_SIZE8);
if (!ctrl->trackers)
return -ENOMEM;
@@ -770,35 +796,9 @@ static int rsxx_dma_ctrl_init(struct pci_dev *dev,
INIT_WORK(&ctrl->issue_dma_work, rsxx_issue_dmas);
INIT_WORK(&ctrl->dma_done_work, rsxx_dma_done);
- memset(ctrl->status.buf, 0xac, STATUS_BUFFER_SIZE8);
- iowrite32(lower_32_bits(ctrl->status.dma_addr),
- ctrl->regmap + SB_ADD_LO);
- iowrite32(upper_32_bits(ctrl->status.dma_addr),
- ctrl->regmap + SB_ADD_HI);
-
- memset(ctrl->cmd.buf, 0x83, COMMAND_BUFFER_SIZE8);
- iowrite32(lower_32_bits(ctrl->cmd.dma_addr), ctrl->regmap + CB_ADD_LO);
- iowrite32(upper_32_bits(ctrl->cmd.dma_addr), ctrl->regmap + CB_ADD_HI);
-
- ctrl->status.idx = ioread32(ctrl->regmap + HW_STATUS_CNT);
- if (ctrl->status.idx > RSXX_MAX_OUTSTANDING_CMDS) {
- dev_crit(&dev->dev, "Failed reading status cnt x%x\n",
- ctrl->status.idx);
- return -EINVAL;
- }
- iowrite32(ctrl->status.idx, ctrl->regmap + HW_STATUS_CNT);
- iowrite32(ctrl->status.idx, ctrl->regmap + SW_STATUS_CNT);
-
- ctrl->cmd.idx = ioread32(ctrl->regmap + HW_CMD_IDX);
- if (ctrl->cmd.idx > RSXX_MAX_OUTSTANDING_CMDS) {
- dev_crit(&dev->dev, "Failed reading cmd cnt x%x\n",
- ctrl->status.idx);
- return -EINVAL;
- }
- iowrite32(ctrl->cmd.idx, ctrl->regmap + HW_CMD_IDX);
- iowrite32(ctrl->cmd.idx, ctrl->regmap + SW_CMD_IDX);
-
- wmb();
+ st = rsxx_hw_buffers_init(dev, ctrl);
+ if (st)
+ return st;
return 0;
}
@@ -834,7 +834,7 @@ static int rsxx_dma_stripe_setup(struct rsxx_cardinfo *card,
return 0;
}
-static int rsxx_dma_configure(struct rsxx_cardinfo *card)
+int rsxx_dma_configure(struct rsxx_cardinfo *card)
{
u32 intr_coal;
@@ -980,6 +980,103 @@ void rsxx_dma_destroy(struct rsxx_cardinfo *card)
}
}
+int rsxx_eeh_save_issued_dmas(struct rsxx_cardinfo *card)
+{
+ int i;
+ int j;
+ int cnt;
+ struct rsxx_dma *dma;
+ struct list_head *issued_dmas;
+
+ issued_dmas = kzalloc(sizeof(*issued_dmas) * card->n_targets,
+ GFP_KERNEL);
+ if (!issued_dmas)
+ return -ENOMEM;
+
+ for (i = 0; i < card->n_targets; i++) {
+ INIT_LIST_HEAD(&issued_dmas[i]);
+ cnt = 0;
+ for (j = 0; j < RSXX_MAX_OUTSTANDING_CMDS; j++) {
+ dma = get_tracker_dma(card->ctrl[i].trackers, j);
+ if (dma == NULL)
+ continue;
+
+ if (dma->cmd == HW_CMD_BLK_WRITE)
+ card->ctrl[i].stats.writes_issued--;
+ else if (dma->cmd == HW_CMD_BLK_DISCARD)
+ card->ctrl[i].stats.discards_issued--;
+ else
+ card->ctrl[i].stats.reads_issued--;
+
+ list_add_tail(&dma->list, &issued_dmas[i]);
+ push_tracker(card->ctrl[i].trackers, j);
+ cnt++;
+ }
+
+ spin_lock(&card->ctrl[i].queue_lock);
+ list_splice(&issued_dmas[i], &card->ctrl[i].queue);
+
+ atomic_sub(cnt, &card->ctrl[i].stats.hw_q_depth);
+ card->ctrl[i].stats.sw_q_depth += cnt;
+ card->ctrl[i].e_cnt = 0;
+
+ list_for_each_entry(dma, &card->ctrl[i].queue, list) {
+ if (dma->dma_addr)
+ pci_unmap_page(card->dev, dma->dma_addr,
+ get_dma_size(dma),
+ dma->cmd == HW_CMD_BLK_WRITE ?
+ PCI_DMA_TODEVICE :
+ PCI_DMA_FROMDEVICE);
+ }
+ spin_unlock(&card->ctrl[i].queue_lock);
+ }
+
+ kfree(issued_dmas);
+
+ return 0;
+}
+
+void rsxx_eeh_cancel_dmas(struct rsxx_cardinfo *card)
+{
+ struct rsxx_dma *dma;
+ struct rsxx_dma *tmp;
+ int i;
+
+ for (i = 0; i < card->n_targets; i++) {
+ spin_lock(&card->ctrl[i].queue_lock);
+ list_for_each_entry_safe(dma, tmp, &card->ctrl[i].queue, list) {
+ list_del(&dma->list);
+
+ rsxx_complete_dma(&card->ctrl[i], dma, DMA_CANCELLED);
+ }
+ spin_unlock(&card->ctrl[i].queue_lock);
+ }
+}
+
+int rsxx_eeh_remap_dmas(struct rsxx_cardinfo *card)
+{
+ struct rsxx_dma *dma;
+ int i;
+
+ for (i = 0; i < card->n_targets; i++) {
+ spin_lock(&card->ctrl[i].queue_lock);
+ list_for_each_entry(dma, &card->ctrl[i].queue, list) {
+ dma->dma_addr = pci_map_page(card->dev, dma->page,
+ dma->pg_off, get_dma_size(dma),
+ dma->cmd == HW_CMD_BLK_WRITE ?
+ PCI_DMA_TODEVICE :
+ PCI_DMA_FROMDEVICE);
+ if (!dma->dma_addr) {
+ spin_unlock(&card->ctrl[i].queue_lock);
+ kmem_cache_free(rsxx_dma_pool, dma);
+ return -ENOMEM;
+ }
+ }
+ spin_unlock(&card->ctrl[i].queue_lock);
+ }
+
+ return 0;
+}
int rsxx_dma_init(void)
{
diff --git a/drivers/block/rsxx/rsxx.h b/drivers/block/rsxx/rsxx.h
index 2e50b65902b7..24ba3642bd89 100644
--- a/drivers/block/rsxx/rsxx.h
+++ b/drivers/block/rsxx/rsxx.h
@@ -27,15 +27,17 @@
/*----------------- IOCTL Definitions -------------------*/
+#define RSXX_MAX_DATA 8
+
struct rsxx_reg_access {
__u32 addr;
__u32 cnt;
__u32 stat;
__u32 stream;
- __u32 data[8];
+ __u32 data[RSXX_MAX_DATA];
};
-#define RSXX_MAX_REG_CNT (8 * (sizeof(__u32)))
+#define RSXX_MAX_REG_CNT (RSXX_MAX_DATA * (sizeof(__u32)))
#define RSXX_IOC_MAGIC 'r'
diff --git a/drivers/block/rsxx/rsxx_cfg.h b/drivers/block/rsxx/rsxx_cfg.h
index c025fe5fdb70..f384c943846d 100644
--- a/drivers/block/rsxx/rsxx_cfg.h
+++ b/drivers/block/rsxx/rsxx_cfg.h
@@ -58,7 +58,7 @@ struct rsxx_card_cfg {
};
/* Vendor ID Values */
-#define RSXX_VENDOR_ID_TMS_IBM 0
+#define RSXX_VENDOR_ID_IBM 0
#define RSXX_VENDOR_ID_DSI 1
#define RSXX_VENDOR_COUNT 2
diff --git a/drivers/block/rsxx/rsxx_priv.h b/drivers/block/rsxx/rsxx_priv.h
index a1ac907d8f4c..382e8bf5c03b 100644
--- a/drivers/block/rsxx/rsxx_priv.h
+++ b/drivers/block/rsxx/rsxx_priv.h
@@ -45,16 +45,13 @@
struct proc_cmd;
-#define PCI_VENDOR_ID_TMS_IBM 0x15B6
-#define PCI_DEVICE_ID_RS70_FLASH 0x0019
-#define PCI_DEVICE_ID_RS70D_FLASH 0x001A
-#define PCI_DEVICE_ID_RS80_FLASH 0x001C
-#define PCI_DEVICE_ID_RS81_FLASH 0x001E
+#define PCI_DEVICE_ID_FS70_FLASH 0x04A9
+#define PCI_DEVICE_ID_FS80_FLASH 0x04AA
#define RS70_PCI_REV_SUPPORTED 4
#define DRIVER_NAME "rsxx"
-#define DRIVER_VERSION "3.7"
+#define DRIVER_VERSION "4.0"
/* Block size is 4096 */
#define RSXX_HW_BLK_SHIFT 12
@@ -67,6 +64,9 @@ struct proc_cmd;
#define RSXX_MAX_OUTSTANDING_CMDS 255
#define RSXX_CS_IDX_MASK 0xff
+#define STATUS_BUFFER_SIZE8 4096
+#define COMMAND_BUFFER_SIZE8 4096
+
#define RSXX_MAX_TARGETS 8
struct dma_tracker_list;
@@ -91,6 +91,9 @@ struct rsxx_dma_stats {
u32 discards_failed;
u32 done_rescheduled;
u32 issue_rescheduled;
+ u32 dma_sw_err;
+ u32 dma_hw_fault;
+ u32 dma_cancelled;
u32 sw_q_depth; /* Number of DMAs on the SW queue. */
atomic_t hw_q_depth; /* Number of DMAs queued to HW. */
};
@@ -116,6 +119,7 @@ struct rsxx_dma_ctrl {
struct rsxx_cardinfo {
struct pci_dev *dev;
unsigned int halt;
+ unsigned int eeh_state;
void __iomem *regmap;
spinlock_t irq_lock;
@@ -224,6 +228,7 @@ enum rsxx_pci_regmap {
PERF_RD512_HI = 0xac,
PERF_WR512_LO = 0xb0,
PERF_WR512_HI = 0xb4,
+ PCI_RECONFIG = 0xb8,
};
enum rsxx_intr {
@@ -237,6 +242,8 @@ enum rsxx_intr {
CR_INTR_DMA5 = 0x00000080,
CR_INTR_DMA6 = 0x00000100,
CR_INTR_DMA7 = 0x00000200,
+ CR_INTR_ALL_C = 0x0000003f,
+ CR_INTR_ALL_G = 0x000003ff,
CR_INTR_DMA_ALL = 0x000003f5,
CR_INTR_ALL = 0xffffffff,
};
@@ -253,8 +260,14 @@ enum rsxx_pci_reset {
DMA_QUEUE_RESET = 0x00000001,
};
+enum rsxx_hw_fifo_flush {
+ RSXX_FLUSH_BUSY = 0x00000002,
+ RSXX_FLUSH_TIMEOUT = 0x00000004,
+};
+
enum rsxx_pci_revision {
RSXX_DISCARD_SUPPORT = 2,
+ RSXX_EEH_SUPPORT = 3,
};
enum rsxx_creg_cmd {
@@ -360,11 +373,17 @@ int rsxx_dma_setup(struct rsxx_cardinfo *card);
void rsxx_dma_destroy(struct rsxx_cardinfo *card);
int rsxx_dma_init(void);
void rsxx_dma_cleanup(void);
+void rsxx_dma_queue_reset(struct rsxx_cardinfo *card);
+int rsxx_dma_configure(struct rsxx_cardinfo *card);
int rsxx_dma_queue_bio(struct rsxx_cardinfo *card,
struct bio *bio,
atomic_t *n_dmas,
rsxx_dma_cb cb,
void *cb_data);
+int rsxx_hw_buffers_init(struct pci_dev *dev, struct rsxx_dma_ctrl *ctrl);
+int rsxx_eeh_save_issued_dmas(struct rsxx_cardinfo *card);
+void rsxx_eeh_cancel_dmas(struct rsxx_cardinfo *card);
+int rsxx_eeh_remap_dmas(struct rsxx_cardinfo *card);
/***** cregs.c *****/
int rsxx_creg_write(struct rsxx_cardinfo *card, u32 addr,
@@ -389,10 +408,11 @@ int rsxx_creg_setup(struct rsxx_cardinfo *card);
void rsxx_creg_destroy(struct rsxx_cardinfo *card);
int rsxx_creg_init(void);
void rsxx_creg_cleanup(void);
-
int rsxx_reg_access(struct rsxx_cardinfo *card,
struct rsxx_reg_access __user *ucmd,
int read);
+void rsxx_eeh_save_issued_creg(struct rsxx_cardinfo *card);
+void rsxx_kick_creg_queue(struct rsxx_cardinfo *card);
diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c
index de1f319f7bd7..dd5b2fed97e9 100644
--- a/drivers/block/xen-blkback/blkback.c
+++ b/drivers/block/xen-blkback/blkback.c
@@ -164,7 +164,7 @@ static void make_response(struct xen_blkif *blkif, u64 id,
#define foreach_grant_safe(pos, n, rbtree, node) \
for ((pos) = container_of(rb_first((rbtree)), typeof(*(pos)), node), \
- (n) = rb_next(&(pos)->node); \
+ (n) = (&(pos)->node != NULL) ? rb_next(&(pos)->node) : NULL; \
&(pos)->node != NULL; \
(pos) = container_of(n, typeof(*(pos)), node), \
(n) = (&(pos)->node != NULL) ? rb_next(&(pos)->node) : NULL)
@@ -381,8 +381,8 @@ irqreturn_t xen_blkif_be_int(int irq, void *dev_id)
static void print_stats(struct xen_blkif *blkif)
{
- pr_info("xen-blkback (%s): oo %3d | rd %4d | wr %4d | f %4d"
- " | ds %4d\n",
+ pr_info("xen-blkback (%s): oo %3llu | rd %4llu | wr %4llu | f %4llu"
+ " | ds %4llu\n",
current->comm, blkif->st_oo_req,
blkif->st_rd_req, blkif->st_wr_req,
blkif->st_f_req, blkif->st_ds_req);
@@ -442,7 +442,7 @@ int xen_blkif_schedule(void *arg)
}
struct seg_buf {
- unsigned long buf;
+ unsigned int offset;
unsigned int nsec;
};
/*
@@ -621,30 +621,21 @@ static int xen_blkbk_map(struct blkif_request *req,
* If this is a new persistent grant
* save the handler
*/
- persistent_gnts[i]->handle = map[j].handle;
- persistent_gnts[i]->dev_bus_addr =
- map[j++].dev_bus_addr;
+ persistent_gnts[i]->handle = map[j++].handle;
}
pending_handle(pending_req, i) =
persistent_gnts[i]->handle;
if (ret)
continue;
-
- seg[i].buf = persistent_gnts[i]->dev_bus_addr |
- (req->u.rw.seg[i].first_sect << 9);
} else {
- pending_handle(pending_req, i) = map[j].handle;
+ pending_handle(pending_req, i) = map[j++].handle;
bitmap_set(pending_req->unmap_seg, i, 1);
- if (ret) {
- j++;
+ if (ret)
continue;
- }
-
- seg[i].buf = map[j++].dev_bus_addr |
- (req->u.rw.seg[i].first_sect << 9);
}
+ seg[i].offset = (req->u.rw.seg[i].first_sect << 9);
}
return ret;
}
@@ -679,6 +670,16 @@ static int dispatch_discard_io(struct xen_blkif *blkif,
return err;
}
+static int dispatch_other_io(struct xen_blkif *blkif,
+ struct blkif_request *req,
+ struct pending_req *pending_req)
+{
+ free_req(pending_req);
+ make_response(blkif, req->u.other.id, req->operation,
+ BLKIF_RSP_EOPNOTSUPP);
+ return -EIO;
+}
+
static void xen_blk_drain_io(struct xen_blkif *blkif)
{
atomic_set(&blkif->drain, 1);
@@ -800,17 +801,30 @@ __do_block_io_op(struct xen_blkif *blkif)
/* Apply all sanity checks to /private copy/ of request. */
barrier();
- if (unlikely(req.operation == BLKIF_OP_DISCARD)) {
+
+ switch (req.operation) {
+ case BLKIF_OP_READ:
+ case BLKIF_OP_WRITE:
+ case BLKIF_OP_WRITE_BARRIER:
+ case BLKIF_OP_FLUSH_DISKCACHE:
+ if (dispatch_rw_block_io(blkif, &req, pending_req))
+ goto done;
+ break;
+ case BLKIF_OP_DISCARD:
free_req(pending_req);
if (dispatch_discard_io(blkif, &req))
- break;
- } else if (dispatch_rw_block_io(blkif, &req, pending_req))
+ goto done;
break;
+ default:
+ if (dispatch_other_io(blkif, &req, pending_req))
+ goto done;
+ break;
+ }
/* Yield point for this unbounded loop. */
cond_resched();
}
-
+done:
return more_to_do;
}
@@ -904,7 +918,8 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif,
pr_debug(DRV_PFX "access denied: %s of [%llu,%llu] on dev=%04x\n",
operation == READ ? "read" : "write",
preq.sector_number,
- preq.sector_number + preq.nr_sects, preq.dev);
+ preq.sector_number + preq.nr_sects,
+ blkif->vbd.pdevice);
goto fail_response;
}
@@ -947,7 +962,7 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif,
(bio_add_page(bio,
pages[i],
seg[i].nsec << 9,
- seg[i].buf & ~PAGE_MASK) == 0)) {
+ seg[i].offset) == 0)) {
bio = bio_alloc(GFP_KERNEL, nseg-i);
if (unlikely(bio == NULL))
@@ -977,13 +992,7 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif,
bio->bi_end_io = end_block_io_op;
}
- /*
- * We set it one so that the last submit_bio does not have to call
- * atomic_inc.
- */
atomic_set(&pending_req->pendcnt, nbio);
-
- /* Get a reference count for the disk queue and start sending I/O */
blk_start_plug(&plug);
for (i = 0; i < nbio; i++)
@@ -1011,6 +1020,7 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif,
fail_put_bio:
for (i = 0; i < nbio; i++)
bio_put(biolist[i]);
+ atomic_set(&pending_req->pendcnt, 1);
__end_block_io_op(pending_req, -EINVAL);
msleep(1); /* back off a bit */
return -EIO;
diff --git a/drivers/block/xen-blkback/common.h b/drivers/block/xen-blkback/common.h
index 6072390c7f57..60103e2517ba 100644
--- a/drivers/block/xen-blkback/common.h
+++ b/drivers/block/xen-blkback/common.h
@@ -77,11 +77,18 @@ struct blkif_x86_32_request_discard {
uint64_t nr_sectors;
} __attribute__((__packed__));
+struct blkif_x86_32_request_other {
+ uint8_t _pad1;
+ blkif_vdev_t _pad2;
+ uint64_t id; /* private guest value, echoed in resp */
+} __attribute__((__packed__));
+
struct blkif_x86_32_request {
uint8_t operation; /* BLKIF_OP_??? */
union {
struct blkif_x86_32_request_rw rw;
struct blkif_x86_32_request_discard discard;
+ struct blkif_x86_32_request_other other;
} u;
} __attribute__((__packed__));
@@ -113,11 +120,19 @@ struct blkif_x86_64_request_discard {
uint64_t nr_sectors;
} __attribute__((__packed__));
+struct blkif_x86_64_request_other {
+ uint8_t _pad1;
+ blkif_vdev_t _pad2;
+ uint32_t _pad3; /* offsetof(blkif_..,u.discard.id)==8 */
+ uint64_t id; /* private guest value, echoed in resp */
+} __attribute__((__packed__));
+
struct blkif_x86_64_request {
uint8_t operation; /* BLKIF_OP_??? */
union {
struct blkif_x86_64_request_rw rw;
struct blkif_x86_64_request_discard discard;
+ struct blkif_x86_64_request_other other;
} u;
} __attribute__((__packed__));
@@ -172,7 +187,6 @@ struct persistent_gnt {
struct page *page;
grant_ref_t gnt;
grant_handle_t handle;
- uint64_t dev_bus_addr;
struct rb_node node;
};
@@ -208,13 +222,13 @@ struct xen_blkif {
/* statistics */
unsigned long st_print;
- int st_rd_req;
- int st_wr_req;
- int st_oo_req;
- int st_f_req;
- int st_ds_req;
- int st_rd_sect;
- int st_wr_sect;
+ unsigned long long st_rd_req;
+ unsigned long long st_wr_req;
+ unsigned long long st_oo_req;
+ unsigned long long st_f_req;
+ unsigned long long st_ds_req;
+ unsigned long long st_rd_sect;
+ unsigned long long st_wr_sect;
wait_queue_head_t waiting_to_free;
};
@@ -278,6 +292,11 @@ static inline void blkif_get_x86_32_req(struct blkif_request *dst,
dst->u.discard.nr_sectors = src->u.discard.nr_sectors;
break;
default:
+ /*
+ * Don't know how to translate this op. Only get the
+ * ID so failure can be reported to the frontend.
+ */
+ dst->u.other.id = src->u.other.id;
break;
}
}
@@ -309,6 +328,11 @@ static inline void blkif_get_x86_64_req(struct blkif_request *dst,
dst->u.discard.nr_sectors = src->u.discard.nr_sectors;
break;
default:
+ /*
+ * Don't know how to translate this op. Only get the
+ * ID so failure can be reported to the frontend.
+ */
+ dst->u.other.id = src->u.other.id;
break;
}
}
diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c
index 5e237f630c47..8bfd1bcf95ec 100644
--- a/drivers/block/xen-blkback/xenbus.c
+++ b/drivers/block/xen-blkback/xenbus.c
@@ -230,13 +230,13 @@ int __init xen_blkif_interface_init(void)
} \
static DEVICE_ATTR(name, S_IRUGO, show_##name, NULL)
-VBD_SHOW(oo_req, "%d\n", be->blkif->st_oo_req);
-VBD_SHOW(rd_req, "%d\n", be->blkif->st_rd_req);
-VBD_SHOW(wr_req, "%d\n", be->blkif->st_wr_req);
-VBD_SHOW(f_req, "%d\n", be->blkif->st_f_req);
-VBD_SHOW(ds_req, "%d\n", be->blkif->st_ds_req);
-VBD_SHOW(rd_sect, "%d\n", be->blkif->st_rd_sect);
-VBD_SHOW(wr_sect, "%d\n", be->blkif->st_wr_sect);
+VBD_SHOW(oo_req, "%llu\n", be->blkif->st_oo_req);
+VBD_SHOW(rd_req, "%llu\n", be->blkif->st_rd_req);
+VBD_SHOW(wr_req, "%llu\n", be->blkif->st_wr_req);
+VBD_SHOW(f_req, "%llu\n", be->blkif->st_f_req);
+VBD_SHOW(ds_req, "%llu\n", be->blkif->st_ds_req);
+VBD_SHOW(rd_sect, "%llu\n", be->blkif->st_rd_sect);
+VBD_SHOW(wr_sect, "%llu\n", be->blkif->st_wr_sect);
static struct attribute *xen_vbdstat_attrs[] = {
&dev_attr_oo_req.attr,
diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index c3dae2e0f290..a894f88762d8 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -44,7 +44,7 @@
#include <linux/mutex.h>
#include <linux/scatterlist.h>
#include <linux/bitmap.h>
-#include <linux/llist.h>
+#include <linux/list.h>
#include <xen/xen.h>
#include <xen/xenbus.h>
@@ -68,13 +68,12 @@ enum blkif_state {
struct grant {
grant_ref_t gref;
unsigned long pfn;
- struct llist_node node;
+ struct list_head node;
};
struct blk_shadow {
struct blkif_request req;
struct request *request;
- unsigned long frame[BLKIF_MAX_SEGMENTS_PER_REQUEST];
struct grant *grants_used[BLKIF_MAX_SEGMENTS_PER_REQUEST];
};
@@ -105,7 +104,7 @@ struct blkfront_info
struct work_struct work;
struct gnttab_free_callback callback;
struct blk_shadow shadow[BLK_RING_SIZE];
- struct llist_head persistent_gnts;
+ struct list_head persistent_gnts;
unsigned int persistent_gnts_c;
unsigned long shadow_free;
unsigned int feature_flush;
@@ -165,6 +164,69 @@ static int add_id_to_freelist(struct blkfront_info *info,
return 0;
}
+static int fill_grant_buffer(struct blkfront_info *info, int num)
+{
+ struct page *granted_page;
+ struct grant *gnt_list_entry, *n;
+ int i = 0;
+
+ while(i < num) {
+ gnt_list_entry = kzalloc(sizeof(struct grant), GFP_NOIO);
+ if (!gnt_list_entry)
+ goto out_of_memory;
+
+ granted_page = alloc_page(GFP_NOIO);
+ if (!granted_page) {
+ kfree(gnt_list_entry);
+ goto out_of_memory;
+ }
+
+ gnt_list_entry->pfn = page_to_pfn(granted_page);
+ gnt_list_entry->gref = GRANT_INVALID_REF;
+ list_add(&gnt_list_entry->node, &info->persistent_gnts);
+ i++;
+ }
+
+ return 0;
+
+out_of_memory:
+ list_for_each_entry_safe(gnt_list_entry, n,
+ &info->persistent_gnts, node) {
+ list_del(&gnt_list_entry->node);
+ __free_page(pfn_to_page(gnt_list_entry->pfn));
+ kfree(gnt_list_entry);
+ i--;
+ }
+ BUG_ON(i != 0);
+ return -ENOMEM;
+}
+
+static struct grant *get_grant(grant_ref_t *gref_head,
+ struct blkfront_info *info)
+{
+ struct grant *gnt_list_entry;
+ unsigned long buffer_mfn;
+
+ BUG_ON(list_empty(&info->persistent_gnts));
+ gnt_list_entry = list_first_entry(&info->persistent_gnts, struct grant,
+ node);
+ list_del(&gnt_list_entry->node);
+
+ if (gnt_list_entry->gref != GRANT_INVALID_REF) {
+ info->persistent_gnts_c--;
+ return gnt_list_entry;
+ }
+
+ /* Assign a gref to this page */
+ gnt_list_entry->gref = gnttab_claim_grant_reference(gref_head);
+ BUG_ON(gnt_list_entry->gref == -ENOSPC);
+ buffer_mfn = pfn_to_mfn(gnt_list_entry->pfn);
+ gnttab_grant_foreign_access_ref(gnt_list_entry->gref,
+ info->xbdev->otherend_id,
+ buffer_mfn, 0);
+ return gnt_list_entry;
+}
+
static const char *op_name(int op)
{
static const char *const names[] = {
@@ -293,7 +355,6 @@ static int blkif_ioctl(struct block_device *bdev, fmode_t mode,
static int blkif_queue_request(struct request *req)
{
struct blkfront_info *info = req->rq_disk->private_data;
- unsigned long buffer_mfn;
struct blkif_request *ring_req;
unsigned long id;
unsigned int fsect, lsect;
@@ -306,7 +367,6 @@ static int blkif_queue_request(struct request *req)
*/
bool new_persistent_gnts;
grant_ref_t gref_head;
- struct page *granted_page;
struct grant *gnt_list_entry = NULL;
struct scatterlist *sg;
@@ -370,41 +430,8 @@ static int blkif_queue_request(struct request *req)
fsect = sg->offset >> 9;
lsect = fsect + (sg->length >> 9) - 1;
- if (info->persistent_gnts_c) {
- BUG_ON(llist_empty(&info->persistent_gnts));
- gnt_list_entry = llist_entry(
- llist_del_first(&info->persistent_gnts),
- struct grant, node);
-
- ref = gnt_list_entry->gref;
- buffer_mfn = pfn_to_mfn(gnt_list_entry->pfn);
- info->persistent_gnts_c--;
- } else {
- ref = gnttab_claim_grant_reference(&gref_head);
- BUG_ON(ref == -ENOSPC);
-
- gnt_list_entry =
- kmalloc(sizeof(struct grant),
- GFP_ATOMIC);
- if (!gnt_list_entry)
- return -ENOMEM;
-
- granted_page = alloc_page(GFP_ATOMIC);
- if (!granted_page) {
- kfree(gnt_list_entry);
- return -ENOMEM;
- }
-
- gnt_list_entry->pfn =
- page_to_pfn(granted_page);
- gnt_list_entry->gref = ref;
-
- buffer_mfn = pfn_to_mfn(page_to_pfn(
- granted_page));
- gnttab_grant_foreign_access_ref(ref,
- info->xbdev->otherend_id,
- buffer_mfn, 0);
- }
+ gnt_list_entry = get_grant(&gref_head, info);
+ ref = gnt_list_entry->gref;
info->shadow[id].grants_used[i] = gnt_list_entry;
@@ -435,7 +462,6 @@ static int blkif_queue_request(struct request *req)
kunmap_atomic(shared_data);
}
- info->shadow[id].frame[i] = mfn_to_pfn(buffer_mfn);
ring_req->u.rw.seg[i] =
(struct blkif_request_segment) {
.gref = ref,
@@ -790,9 +816,8 @@ static void blkif_restart_queue(struct work_struct *work)
static void blkif_free(struct blkfront_info *info, int suspend)
{
- struct llist_node *all_gnts;
- struct grant *persistent_gnt, *tmp;
- struct llist_node *n;
+ struct grant *persistent_gnt;
+ struct grant *n;
/* Prevent new requests being issued until we fix things up. */
spin_lock_irq(&info->io_lock);
@@ -803,22 +828,20 @@ static void blkif_free(struct blkfront_info *info, int suspend)
blk_stop_queue(info->rq);
/* Remove all persistent grants */
- if (info->persistent_gnts_c) {
- all_gnts = llist_del_all(&info->persistent_gnts);
- persistent_gnt = llist_entry(all_gnts, typeof(*(persistent_gnt)), node);
- while (persistent_gnt) {
- gnttab_end_foreign_access(persistent_gnt->gref, 0, 0UL);
+ if (!list_empty(&info->persistent_gnts)) {
+ list_for_each_entry_safe(persistent_gnt, n,
+ &info->persistent_gnts, node) {
+ list_del(&persistent_gnt->node);
+ if (persistent_gnt->gref != GRANT_INVALID_REF) {
+ gnttab_end_foreign_access(persistent_gnt->gref,
+ 0, 0UL);
+ info->persistent_gnts_c--;
+ }
__free_page(pfn_to_page(persistent_gnt->pfn));
- tmp = persistent_gnt;
- n = persistent_gnt->node.next;
- if (n)
- persistent_gnt = llist_entry(n, typeof(*(persistent_gnt)), node);
- else
- persistent_gnt = NULL;
- kfree(tmp);
+ kfree(persistent_gnt);
}
- info->persistent_gnts_c = 0;
}
+ BUG_ON(info->persistent_gnts_c != 0);
/* No more gnttab callback work. */
gnttab_cancel_free_callback(&info->callback);
@@ -875,7 +898,7 @@ static void blkif_completion(struct blk_shadow *s, struct blkfront_info *info,
}
/* Add the persistent grant into the list of free grants */
for (i = 0; i < s->req.u.rw.nr_segments; i++) {
- llist_add(&s->grants_used[i]->node, &info->persistent_gnts);
+ list_add(&s->grants_used[i]->node, &info->persistent_gnts);
info->persistent_gnts_c++;
}
}
@@ -1013,6 +1036,12 @@ static int setup_blkring(struct xenbus_device *dev,
sg_init_table(info->sg, BLKIF_MAX_SEGMENTS_PER_REQUEST);
+ /* Allocate memory for grants */
+ err = fill_grant_buffer(info, BLK_RING_SIZE *
+ BLKIF_MAX_SEGMENTS_PER_REQUEST);
+ if (err)
+ goto fail;
+
err = xenbus_grant_ring(dev, virt_to_mfn(info->ring.sring));
if (err < 0) {
free_page((unsigned long)sring);
@@ -1171,7 +1200,7 @@ static int blkfront_probe(struct xenbus_device *dev,
spin_lock_init(&info->io_lock);
info->xbdev = dev;
info->vdevice = vdevice;
- init_llist_head(&info->persistent_gnts);
+ INIT_LIST_HEAD(&info->persistent_gnts);
info->persistent_gnts_c = 0;
info->connected = BLKIF_STATE_DISCONNECTED;
INIT_WORK(&info->work, blkif_restart_queue);
@@ -1203,11 +1232,10 @@ static int blkif_recover(struct blkfront_info *info)
int j;
/* Stage 1: Make a safe copy of the shadow state. */
- copy = kmalloc(sizeof(info->shadow),
+ copy = kmemdup(info->shadow, sizeof(info->shadow),
GFP_NOIO | __GFP_REPEAT | __GFP_HIGH);
if (!copy)
return -ENOMEM;
- memcpy(copy, info->shadow, sizeof(info->shadow));
/* Stage 2: Set up free list. */
memset(&info->shadow, 0, sizeof(info->shadow));
@@ -1236,7 +1264,7 @@ static int blkif_recover(struct blkfront_info *info)
gnttab_grant_foreign_access_ref(
req->u.rw.seg[j].gref,
info->xbdev->otherend_id,
- pfn_to_mfn(info->shadow[req->u.rw.id].frame[j]),
+ pfn_to_mfn(copy[i].grants_used[j]->pfn),
0);
}
info->shadow[req->u.rw.id].req = *req;