summaryrefslogtreecommitdiff
path: root/drivers/block
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/block')
-rw-r--r--drivers/block/nvme-core.c513
-rw-r--r--drivers/block/nvme-scsi.c96
-rw-r--r--drivers/block/rbd.c193
-rw-r--r--drivers/block/virtio_blk.c12
-rw-r--r--drivers/block/zram/zram_drv.c2
5 files changed, 494 insertions, 322 deletions
diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c
index cbdfbbf98392..ceb32dd52a6c 100644
--- a/drivers/block/nvme-core.c
+++ b/drivers/block/nvme-core.c
@@ -37,17 +37,18 @@
#include <linux/ptrace.h>
#include <linux/sched.h>
#include <linux/slab.h>
+#include <linux/t10-pi.h>
#include <linux/types.h>
#include <scsi/sg.h>
#include <asm-generic/io-64-nonatomic-lo-hi.h>
+#define NVME_MINORS (1U << MINORBITS)
#define NVME_Q_DEPTH 1024
#define NVME_AQ_DEPTH 64
#define SQ_SIZE(depth) (depth * sizeof(struct nvme_command))
#define CQ_SIZE(depth) (depth * sizeof(struct nvme_completion))
#define ADMIN_TIMEOUT (admin_timeout * HZ)
#define SHUTDOWN_TIMEOUT (shutdown_timeout * HZ)
-#define IOD_TIMEOUT (retry_time * HZ)
static unsigned char admin_timeout = 60;
module_param(admin_timeout, byte, 0644);
@@ -57,10 +58,6 @@ unsigned char nvme_io_timeout = 30;
module_param_named(io_timeout, nvme_io_timeout, byte, 0644);
MODULE_PARM_DESC(io_timeout, "timeout in seconds for I/O");
-static unsigned char retry_time = 30;
-module_param(retry_time, byte, 0644);
-MODULE_PARM_DESC(retry_time, "time in seconds to retry failed I/O");
-
static unsigned char shutdown_timeout = 5;
module_param(shutdown_timeout, byte, 0644);
MODULE_PARM_DESC(shutdown_timeout, "timeout in seconds for controller shutdown");
@@ -68,6 +65,9 @@ MODULE_PARM_DESC(shutdown_timeout, "timeout in seconds for controller shutdown")
static int nvme_major;
module_param(nvme_major, int, 0);
+static int nvme_char_major;
+module_param(nvme_char_major, int, 0);
+
static int use_threaded_interrupts;
module_param(use_threaded_interrupts, int, 0);
@@ -76,7 +76,8 @@ static LIST_HEAD(dev_list);
static struct task_struct *nvme_thread;
static struct workqueue_struct *nvme_workq;
static wait_queue_head_t nvme_kthread_wait;
-static struct notifier_block nvme_nb;
+
+static struct class *nvme_class;
static void nvme_reset_failed_dev(struct work_struct *ws);
static int nvme_process_cq(struct nvme_queue *nvmeq);
@@ -95,7 +96,6 @@ struct async_cmd_info {
* commands and one for I/O commands).
*/
struct nvme_queue {
- struct llist_node node;
struct device *q_dmadev;
struct nvme_dev *dev;
char irqname[24]; /* nvme4294967295-65535\0 */
@@ -482,6 +482,115 @@ static int nvme_error_status(u16 status)
}
}
+#ifdef CONFIG_BLK_DEV_INTEGRITY
+static void nvme_dif_prep(u32 p, u32 v, struct t10_pi_tuple *pi)
+{
+ if (be32_to_cpu(pi->ref_tag) == v)
+ pi->ref_tag = cpu_to_be32(p);
+}
+
+static void nvme_dif_complete(u32 p, u32 v, struct t10_pi_tuple *pi)
+{
+ if (be32_to_cpu(pi->ref_tag) == p)
+ pi->ref_tag = cpu_to_be32(v);
+}
+
+/**
+ * nvme_dif_remap - remaps ref tags to bip seed and physical lba
+ *
+ * The virtual start sector is the one that was originally submitted by the
+ * block layer. Due to partitioning, MD/DM cloning, etc. the actual physical
+ * start sector may be different. Remap protection information to match the
+ * physical LBA on writes, and back to the original seed on reads.
+ *
+ * Type 0 and 3 do not have a ref tag, so no remapping required.
+ */
+static void nvme_dif_remap(struct request *req,
+ void (*dif_swap)(u32 p, u32 v, struct t10_pi_tuple *pi))
+{
+ struct nvme_ns *ns = req->rq_disk->private_data;
+ struct bio_integrity_payload *bip;
+ struct t10_pi_tuple *pi;
+ void *p, *pmap;
+ u32 i, nlb, ts, phys, virt;
+
+ if (!ns->pi_type || ns->pi_type == NVME_NS_DPS_PI_TYPE3)
+ return;
+
+ bip = bio_integrity(req->bio);
+ if (!bip)
+ return;
+
+ pmap = kmap_atomic(bip->bip_vec->bv_page) + bip->bip_vec->bv_offset;
+ if (!pmap)
+ return;
+
+ p = pmap;
+ virt = bip_get_seed(bip);
+ phys = nvme_block_nr(ns, blk_rq_pos(req));
+ nlb = (blk_rq_bytes(req) >> ns->lba_shift);
+ ts = ns->disk->integrity->tuple_size;
+
+ for (i = 0; i < nlb; i++, virt++, phys++) {
+ pi = (struct t10_pi_tuple *)p;
+ dif_swap(phys, virt, pi);
+ p += ts;
+ }
+ kunmap_atomic(pmap);
+}
+
+static int nvme_noop_verify(struct blk_integrity_iter *iter)
+{
+ return 0;
+}
+
+static int nvme_noop_generate(struct blk_integrity_iter *iter)
+{
+ return 0;
+}
+
+struct blk_integrity nvme_meta_noop = {
+ .name = "NVME_META_NOOP",
+ .generate_fn = nvme_noop_generate,
+ .verify_fn = nvme_noop_verify,
+};
+
+static void nvme_init_integrity(struct nvme_ns *ns)
+{
+ struct blk_integrity integrity;
+
+ switch (ns->pi_type) {
+ case NVME_NS_DPS_PI_TYPE3:
+ integrity = t10_pi_type3_crc;
+ break;
+ case NVME_NS_DPS_PI_TYPE1:
+ case NVME_NS_DPS_PI_TYPE2:
+ integrity = t10_pi_type1_crc;
+ break;
+ default:
+ integrity = nvme_meta_noop;
+ break;
+ }
+ integrity.tuple_size = ns->ms;
+ blk_integrity_register(ns->disk, &integrity);
+ blk_queue_max_integrity_segments(ns->queue, 1);
+}
+#else /* CONFIG_BLK_DEV_INTEGRITY */
+static void nvme_dif_remap(struct request *req,
+ void (*dif_swap)(u32 p, u32 v, struct t10_pi_tuple *pi))
+{
+}
+static void nvme_dif_prep(u32 p, u32 v, struct t10_pi_tuple *pi)
+{
+}
+static void nvme_dif_complete(u32 p, u32 v, struct t10_pi_tuple *pi)
+{
+}
+static void nvme_init_integrity(struct nvme_ns *ns)
+{
+}
+#endif
+
static void req_completion(struct nvme_queue *nvmeq, void *ctx,
struct nvme_completion *cqe)
{
@@ -512,9 +621,16 @@ static void req_completion(struct nvme_queue *nvmeq, void *ctx,
"completing aborted command with status:%04x\n",
status);
- if (iod->nents)
+ if (iod->nents) {
dma_unmap_sg(&nvmeq->dev->pci_dev->dev, iod->sg, iod->nents,
rq_data_dir(req) ? DMA_TO_DEVICE : DMA_FROM_DEVICE);
+ if (blk_integrity_rq(req)) {
+ if (!rq_data_dir(req))
+ nvme_dif_remap(req, nvme_dif_complete);
+ dma_unmap_sg(&nvmeq->dev->pci_dev->dev, iod->meta_sg, 1,
+ rq_data_dir(req) ? DMA_TO_DEVICE : DMA_FROM_DEVICE);
+ }
+ }
nvme_free_iod(nvmeq->dev, iod);
blk_mq_complete_request(req);
@@ -670,6 +786,24 @@ static int nvme_submit_iod(struct nvme_queue *nvmeq, struct nvme_iod *iod,
cmnd->rw.prp2 = cpu_to_le64(iod->first_dma);
cmnd->rw.slba = cpu_to_le64(nvme_block_nr(ns, blk_rq_pos(req)));
cmnd->rw.length = cpu_to_le16((blk_rq_bytes(req) >> ns->lba_shift) - 1);
+
+ if (blk_integrity_rq(req)) {
+ cmnd->rw.metadata = cpu_to_le64(sg_dma_address(iod->meta_sg));
+ switch (ns->pi_type) {
+ case NVME_NS_DPS_PI_TYPE3:
+ control |= NVME_RW_PRINFO_PRCHK_GUARD;
+ break;
+ case NVME_NS_DPS_PI_TYPE1:
+ case NVME_NS_DPS_PI_TYPE2:
+ control |= NVME_RW_PRINFO_PRCHK_GUARD |
+ NVME_RW_PRINFO_PRCHK_REF;
+ cmnd->rw.reftag = cpu_to_le32(
+ nvme_block_nr(ns, blk_rq_pos(req)));
+ break;
+ }
+ } else if (ns->ms)
+ control |= NVME_RW_PRINFO_PRACT;
+
cmnd->rw.control = cpu_to_le16(control);
cmnd->rw.dsmgmt = cpu_to_le32(dsmgmt);
@@ -690,6 +824,19 @@ static int nvme_queue_rq(struct blk_mq_hw_ctx *hctx,
struct nvme_iod *iod;
enum dma_data_direction dma_dir;
+ /*
+ * If formated with metadata, require the block layer provide a buffer
+ * unless this namespace is formated such that the metadata can be
+ * stripped/generated by the controller with PRACT=1.
+ */
+ if (ns->ms && !blk_integrity_rq(req)) {
+ if (!(ns->pi_type && ns->ms == 8)) {
+ req->errors = -EFAULT;
+ blk_mq_complete_request(req);
+ return BLK_MQ_RQ_QUEUE_OK;
+ }
+ }
+
iod = nvme_alloc_iod(req, ns->dev, GFP_ATOMIC);
if (!iod)
return BLK_MQ_RQ_QUEUE_BUSY;
@@ -725,6 +872,21 @@ static int nvme_queue_rq(struct blk_mq_hw_ctx *hctx,
iod->nents, dma_dir);
goto retry_cmd;
}
+ if (blk_integrity_rq(req)) {
+ if (blk_rq_count_integrity_sg(req->q, req->bio) != 1)
+ goto error_cmd;
+
+ sg_init_table(iod->meta_sg, 1);
+ if (blk_rq_map_integrity_sg(
+ req->q, req->bio, iod->meta_sg) != 1)
+ goto error_cmd;
+
+ if (rq_data_dir(req))
+ nvme_dif_remap(req, nvme_dif_prep);
+
+ if (!dma_map_sg(nvmeq->q_dmadev, iod->meta_sg, 1, dma_dir))
+ goto error_cmd;
+ }
}
nvme_set_info(cmd, iod, req_completion);
@@ -817,14 +979,6 @@ static irqreturn_t nvme_irq_check(int irq, void *data)
return IRQ_WAKE_THREAD;
}
-static void nvme_abort_cmd_info(struct nvme_queue *nvmeq, struct nvme_cmd_info *
- cmd_info)
-{
- spin_lock_irq(&nvmeq->q_lock);
- cancel_cmd_info(cmd_info, NULL);
- spin_unlock_irq(&nvmeq->q_lock);
-}
-
struct sync_cmd_info {
struct task_struct *task;
u32 result;
@@ -847,7 +1001,6 @@ static void sync_completion(struct nvme_queue *nvmeq, void *ctx,
static int nvme_submit_sync_cmd(struct request *req, struct nvme_command *cmd,
u32 *result, unsigned timeout)
{
- int ret;
struct sync_cmd_info cmdinfo;
struct nvme_cmd_info *cmd_rq = blk_mq_rq_to_pdu(req);
struct nvme_queue *nvmeq = cmd_rq->nvmeq;
@@ -859,29 +1012,12 @@ static int nvme_submit_sync_cmd(struct request *req, struct nvme_command *cmd,
nvme_set_info(cmd_rq, &cmdinfo, sync_completion);
- set_current_state(TASK_KILLABLE);
- ret = nvme_submit_cmd(nvmeq, cmd);
- if (ret) {
- nvme_finish_cmd(nvmeq, req->tag, NULL);
- set_current_state(TASK_RUNNING);
- }
- ret = schedule_timeout(timeout);
-
- /*
- * Ensure that sync_completion has either run, or that it will
- * never run.
- */
- nvme_abort_cmd_info(nvmeq, blk_mq_rq_to_pdu(req));
-
- /*
- * We never got the completion
- */
- if (cmdinfo.status == -EINTR)
- return -EINTR;
+ set_current_state(TASK_UNINTERRUPTIBLE);
+ nvme_submit_cmd(nvmeq, cmd);
+ schedule();
if (result)
*result = cmdinfo.result;
-
return cmdinfo.status;
}
@@ -1158,29 +1294,18 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved)
struct nvme_cmd_info *cmd = blk_mq_rq_to_pdu(req);
struct nvme_queue *nvmeq = cmd->nvmeq;
- /*
- * The aborted req will be completed on receiving the abort req.
- * We enable the timer again. If hit twice, it'll cause a device reset,
- * as the device then is in a faulty state.
- */
- int ret = BLK_EH_RESET_TIMER;
-
dev_warn(nvmeq->q_dmadev, "Timeout I/O %d QID %d\n", req->tag,
nvmeq->qid);
-
spin_lock_irq(&nvmeq->q_lock);
- if (!nvmeq->dev->initialized) {
- /*
- * Force cancelled command frees the request, which requires we
- * return BLK_EH_NOT_HANDLED.
- */
- nvme_cancel_queue_ios(nvmeq->hctx, req, nvmeq, reserved);
- ret = BLK_EH_NOT_HANDLED;
- } else
- nvme_abort_req(req);
+ nvme_abort_req(req);
spin_unlock_irq(&nvmeq->q_lock);
- return ret;
+ /*
+ * The aborted req will be completed on receiving the abort req.
+ * We enable the timer again. If hit twice, it'll cause a device reset,
+ * as the device then is in a faulty state.
+ */
+ return BLK_EH_RESET_TIMER;
}
static void nvme_free_queue(struct nvme_queue *nvmeq)
@@ -1233,7 +1358,6 @@ static void nvme_clear_queue(struct nvme_queue *nvmeq)
struct blk_mq_hw_ctx *hctx = nvmeq->hctx;
spin_lock_irq(&nvmeq->q_lock);
- nvme_process_cq(nvmeq);
if (hctx && hctx->tags)
blk_mq_tag_busy_iter(hctx, nvme_cancel_queue_ios, nvmeq);
spin_unlock_irq(&nvmeq->q_lock);
@@ -1256,7 +1380,10 @@ static void nvme_disable_queue(struct nvme_dev *dev, int qid)
}
if (!qid && dev->admin_q)
blk_mq_freeze_queue_start(dev->admin_q);
- nvme_clear_queue(nvmeq);
+
+ spin_lock_irq(&nvmeq->q_lock);
+ nvme_process_cq(nvmeq);
+ spin_unlock_irq(&nvmeq->q_lock);
}
static struct nvme_queue *nvme_alloc_queue(struct nvme_dev *dev, int qid,
@@ -1875,13 +2002,24 @@ static int nvme_getgeo(struct block_device *bd, struct hd_geometry *geo)
return 0;
}
+static void nvme_config_discard(struct nvme_ns *ns)
+{
+ u32 logical_block_size = queue_logical_block_size(ns->queue);
+ ns->queue->limits.discard_zeroes_data = 0;
+ ns->queue->limits.discard_alignment = logical_block_size;
+ ns->queue->limits.discard_granularity = logical_block_size;
+ ns->queue->limits.max_discard_sectors = 0xffffffff;
+ queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, ns->queue);
+}
+
static int nvme_revalidate_disk(struct gendisk *disk)
{
struct nvme_ns *ns = disk->private_data;
struct nvme_dev *dev = ns->dev;
struct nvme_id_ns *id;
dma_addr_t dma_addr;
- int lbaf;
+ int lbaf, pi_type, old_ms;
+ unsigned short bs;
id = dma_alloc_coherent(&dev->pci_dev->dev, 4096, &dma_addr,
GFP_KERNEL);
@@ -1890,16 +2028,51 @@ static int nvme_revalidate_disk(struct gendisk *disk)
__func__);
return 0;
}
+ if (nvme_identify(dev, ns->ns_id, 0, dma_addr)) {
+ dev_warn(&dev->pci_dev->dev,
+ "identify failed ns:%d, setting capacity to 0\n",
+ ns->ns_id);
+ memset(id, 0, sizeof(*id));
+ }
- if (nvme_identify(dev, ns->ns_id, 0, dma_addr))
- goto free;
-
- lbaf = id->flbas & 0xf;
+ old_ms = ns->ms;
+ lbaf = id->flbas & NVME_NS_FLBAS_LBA_MASK;
ns->lba_shift = id->lbaf[lbaf].ds;
+ ns->ms = le16_to_cpu(id->lbaf[lbaf].ms);
+
+ /*
+ * If identify namespace failed, use default 512 byte block size so
+ * block layer can use before failing read/write for 0 capacity.
+ */
+ if (ns->lba_shift == 0)
+ ns->lba_shift = 9;
+ bs = 1 << ns->lba_shift;
+
+ /* XXX: PI implementation requires metadata equal t10 pi tuple size */
+ pi_type = ns->ms == sizeof(struct t10_pi_tuple) ?
+ id->dps & NVME_NS_DPS_PI_MASK : 0;
+
+ if (blk_get_integrity(disk) && (ns->pi_type != pi_type ||
+ ns->ms != old_ms ||
+ bs != queue_logical_block_size(disk->queue) ||
+ (ns->ms && id->flbas & NVME_NS_FLBAS_META_EXT)))
+ blk_integrity_unregister(disk);
+
+ ns->pi_type = pi_type;
+ blk_queue_logical_block_size(ns->queue, bs);
+
+ if (ns->ms && !blk_get_integrity(disk) && (disk->flags & GENHD_FL_UP) &&
+ !(id->flbas & NVME_NS_FLBAS_META_EXT))
+ nvme_init_integrity(ns);
+
+ if (id->ncap == 0 || (ns->ms && !blk_get_integrity(disk)))
+ set_capacity(disk, 0);
+ else
+ set_capacity(disk, le64_to_cpup(&id->nsze) << (ns->lba_shift - 9));
+
+ if (dev->oncs & NVME_CTRL_ONCS_DSM)
+ nvme_config_discard(ns);
- blk_queue_logical_block_size(ns->queue, 1 << ns->lba_shift);
- set_capacity(disk, le64_to_cpup(&id->nsze) << (ns->lba_shift - 9));
- free:
dma_free_coherent(&dev->pci_dev->dev, 4096, id, dma_addr);
return 0;
}
@@ -1923,8 +2096,7 @@ static int nvme_kthread(void *data)
spin_lock(&dev_list_lock);
list_for_each_entry_safe(dev, next, &dev_list, node) {
int i;
- if (readl(&dev->bar->csts) & NVME_CSTS_CFS &&
- dev->initialized) {
+ if (readl(&dev->bar->csts) & NVME_CSTS_CFS) {
if (work_busy(&dev->reset_work))
continue;
list_del_init(&dev->node);
@@ -1956,30 +2128,16 @@ static int nvme_kthread(void *data)
return 0;
}
-static void nvme_config_discard(struct nvme_ns *ns)
-{
- u32 logical_block_size = queue_logical_block_size(ns->queue);
- ns->queue->limits.discard_zeroes_data = 0;
- ns->queue->limits.discard_alignment = logical_block_size;
- ns->queue->limits.discard_granularity = logical_block_size;
- ns->queue->limits.max_discard_sectors = 0xffffffff;
- queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, ns->queue);
-}
-
-static struct nvme_ns *nvme_alloc_ns(struct nvme_dev *dev, unsigned nsid,
- struct nvme_id_ns *id, struct nvme_lba_range_type *rt)
+static void nvme_alloc_ns(struct nvme_dev *dev, unsigned nsid)
{
struct nvme_ns *ns;
struct gendisk *disk;
int node = dev_to_node(&dev->pci_dev->dev);
- int lbaf;
-
- if (rt->attributes & NVME_LBART_ATTRIB_HIDE)
- return NULL;
ns = kzalloc_node(sizeof(*ns), GFP_KERNEL, node);
if (!ns)
- return NULL;
+ return;
+
ns->queue = blk_mq_init_queue(&dev->tagset);
if (IS_ERR(ns->queue))
goto out_free_ns;
@@ -1995,9 +2153,9 @@ static struct nvme_ns *nvme_alloc_ns(struct nvme_dev *dev, unsigned nsid,
ns->ns_id = nsid;
ns->disk = disk;
- lbaf = id->flbas & 0xf;
- ns->lba_shift = id->lbaf[lbaf].ds;
- ns->ms = le16_to_cpu(id->lbaf[lbaf].ms);
+ ns->lba_shift = 9; /* set to a default value for 512 until disk is validated */
+ list_add_tail(&ns->list, &dev->namespaces);
+
blk_queue_logical_block_size(ns->queue, 1 << ns->lba_shift);
if (dev->max_hw_sectors)
blk_queue_max_hw_sectors(ns->queue, dev->max_hw_sectors);
@@ -2011,21 +2169,26 @@ static struct nvme_ns *nvme_alloc_ns(struct nvme_dev *dev, unsigned nsid,
disk->fops = &nvme_fops;
disk->private_data = ns;
disk->queue = ns->queue;
- disk->driverfs_dev = &dev->pci_dev->dev;
+ disk->driverfs_dev = dev->device;
disk->flags = GENHD_FL_EXT_DEVT;
sprintf(disk->disk_name, "nvme%dn%d", dev->instance, nsid);
- set_capacity(disk, le64_to_cpup(&id->nsze) << (ns->lba_shift - 9));
-
- if (dev->oncs & NVME_CTRL_ONCS_DSM)
- nvme_config_discard(ns);
-
- return ns;
+ /*
+ * Initialize capacity to 0 until we establish the namespace format and
+ * setup integrity extentions if necessary. The revalidate_disk after
+ * add_disk allows the driver to register with integrity if the format
+ * requires it.
+ */
+ set_capacity(disk, 0);
+ nvme_revalidate_disk(ns->disk);
+ add_disk(ns->disk);
+ if (ns->ms)
+ revalidate_disk(ns->disk);
+ return;
out_free_queue:
blk_cleanup_queue(ns->queue);
out_free_ns:
kfree(ns);
- return NULL;
}
static void nvme_create_io_queues(struct nvme_dev *dev)
@@ -2150,22 +2313,20 @@ static int nvme_dev_add(struct nvme_dev *dev)
struct pci_dev *pdev = dev->pci_dev;
int res;
unsigned nn, i;
- struct nvme_ns *ns;
struct nvme_id_ctrl *ctrl;
- struct nvme_id_ns *id_ns;
void *mem;
dma_addr_t dma_addr;
int shift = NVME_CAP_MPSMIN(readq(&dev->bar->cap)) + 12;
- mem = dma_alloc_coherent(&pdev->dev, 8192, &dma_addr, GFP_KERNEL);
+ mem = dma_alloc_coherent(&pdev->dev, 4096, &dma_addr, GFP_KERNEL);
if (!mem)
return -ENOMEM;
res = nvme_identify(dev, 0, 1, dma_addr);
if (res) {
dev_err(&pdev->dev, "Identify Controller failed (%d)\n", res);
- res = -EIO;
- goto out;
+ dma_free_coherent(&dev->pci_dev->dev, 4096, mem, dma_addr);
+ return -EIO;
}
ctrl = mem;
@@ -2191,6 +2352,7 @@ static int nvme_dev_add(struct nvme_dev *dev)
} else
dev->max_hw_sectors = max_hw_sectors;
}
+ dma_free_coherent(&dev->pci_dev->dev, 4096, mem, dma_addr);
dev->tagset.ops = &nvme_mq_ops;
dev->tagset.nr_hw_queues = dev->online_queues - 1;
@@ -2203,33 +2365,12 @@ static int nvme_dev_add(struct nvme_dev *dev)
dev->tagset.driver_data = dev;
if (blk_mq_alloc_tag_set(&dev->tagset))
- goto out;
-
- id_ns = mem;
- for (i = 1; i <= nn; i++) {
- res = nvme_identify(dev, i, 0, dma_addr);
- if (res)
- continue;
-
- if (id_ns->ncap == 0)
- continue;
-
- res = nvme_get_features(dev, NVME_FEAT_LBA_RANGE, i,
- dma_addr + 4096, NULL);
- if (res)
- memset(mem + 4096, 0, 4096);
+ return 0;
- ns = nvme_alloc_ns(dev, i, mem, mem + 4096);
- if (ns)
- list_add_tail(&ns->list, &dev->namespaces);
- }
- list_for_each_entry(ns, &dev->namespaces, list)
- add_disk(ns->disk);
- res = 0;
+ for (i = 1; i <= nn; i++)
+ nvme_alloc_ns(dev, i);
- out:
- dma_free_coherent(&dev->pci_dev->dev, 8192, mem, dma_addr);
- return res;
+ return 0;
}
static int nvme_dev_map(struct nvme_dev *dev)
@@ -2358,8 +2499,6 @@ static struct nvme_delq_ctx *nvme_get_dq(struct nvme_delq_ctx *dq)
static void nvme_del_queue_end(struct nvme_queue *nvmeq)
{
struct nvme_delq_ctx *dq = nvmeq->cmdinfo.ctx;
-
- nvme_clear_queue(nvmeq);
nvme_put_dq(dq);
}
@@ -2502,7 +2641,6 @@ static void nvme_dev_shutdown(struct nvme_dev *dev)
int i;
u32 csts = -1;
- dev->initialized = 0;
nvme_dev_list_remove(dev);
if (dev->bar) {
@@ -2513,7 +2651,6 @@ static void nvme_dev_shutdown(struct nvme_dev *dev)
for (i = dev->queue_count - 1; i >= 0; i--) {
struct nvme_queue *nvmeq = dev->queues[i];
nvme_suspend_queue(nvmeq);
- nvme_clear_queue(nvmeq);
}
} else {
nvme_disable_io_queues(dev);
@@ -2521,6 +2658,9 @@ static void nvme_dev_shutdown(struct nvme_dev *dev)
nvme_disable_queue(dev, 0);
}
nvme_dev_unmap(dev);
+
+ for (i = dev->queue_count - 1; i >= 0; i--)
+ nvme_clear_queue(dev->queues[i]);
}
static void nvme_dev_remove(struct nvme_dev *dev)
@@ -2528,8 +2668,11 @@ static void nvme_dev_remove(struct nvme_dev *dev)
struct nvme_ns *ns;
list_for_each_entry(ns, &dev->namespaces, list) {
- if (ns->disk->flags & GENHD_FL_UP)
+ if (ns->disk->flags & GENHD_FL_UP) {
+ if (blk_get_integrity(ns->disk))
+ blk_integrity_unregister(ns->disk);
del_gendisk(ns->disk);
+ }
if (!blk_queue_dying(ns->queue)) {
blk_mq_abort_requeue_list(ns->queue);
blk_cleanup_queue(ns->queue);
@@ -2611,6 +2754,7 @@ static void nvme_free_dev(struct kref *kref)
struct nvme_dev *dev = container_of(kref, struct nvme_dev, kref);
pci_dev_put(dev->pci_dev);
+ put_device(dev->device);
nvme_free_namespaces(dev);
nvme_release_instance(dev);
blk_mq_free_tag_set(&dev->tagset);
@@ -2622,11 +2766,27 @@ static void nvme_free_dev(struct kref *kref)
static int nvme_dev_open(struct inode *inode, struct file *f)
{
- struct nvme_dev *dev = container_of(f->private_data, struct nvme_dev,
- miscdev);
- kref_get(&dev->kref);
- f->private_data = dev;
- return 0;
+ struct nvme_dev *dev;
+ int instance = iminor(inode);
+ int ret = -ENODEV;
+
+ spin_lock(&dev_list_lock);
+ list_for_each_entry(dev, &dev_list, node) {
+ if (dev->instance == instance) {
+ if (!dev->admin_q) {
+ ret = -EWOULDBLOCK;
+ break;
+ }
+ if (!kref_get_unless_zero(&dev->kref))
+ break;
+ f->private_data = dev;
+ ret = 0;
+ break;
+ }
+ }
+ spin_unlock(&dev_list_lock);
+
+ return ret;
}
static int nvme_dev_release(struct inode *inode, struct file *f)
@@ -2768,7 +2928,6 @@ static int nvme_dev_resume(struct nvme_dev *dev)
nvme_unfreeze_queues(dev);
nvme_set_irq_hints(dev);
}
- dev->initialized = 1;
return 0;
}
@@ -2799,6 +2958,7 @@ static void nvme_reset_workfn(struct work_struct *work)
dev->reset_workfn(work);
}
+static void nvme_async_probe(struct work_struct *work);
static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
{
int node, result = -ENOMEM;
@@ -2834,37 +2994,20 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
goto release;
kref_init(&dev->kref);
- result = nvme_dev_start(dev);
- if (result)
+ dev->device = device_create(nvme_class, &pdev->dev,
+ MKDEV(nvme_char_major, dev->instance),
+ dev, "nvme%d", dev->instance);
+ if (IS_ERR(dev->device)) {
+ result = PTR_ERR(dev->device);
goto release_pools;
+ }
+ get_device(dev->device);
- if (dev->online_queues > 1)
- result = nvme_dev_add(dev);
- if (result)
- goto shutdown;
-
- scnprintf(dev->name, sizeof(dev->name), "nvme%d", dev->instance);
- dev->miscdev.minor = MISC_DYNAMIC_MINOR;
- dev->miscdev.parent = &pdev->dev;
- dev->miscdev.name = dev->name;
- dev->miscdev.fops = &nvme_dev_fops;
- result = misc_register(&dev->miscdev);
- if (result)
- goto remove;
-
- nvme_set_irq_hints(dev);
-
- dev->initialized = 1;
+ INIT_WORK(&dev->probe_work, nvme_async_probe);
+ schedule_work(&dev->probe_work);
return 0;
- remove:
- nvme_dev_remove(dev);
- nvme_dev_remove_admin(dev);
- nvme_free_namespaces(dev);
- shutdown:
- nvme_dev_shutdown(dev);
release_pools:
- nvme_free_queues(dev, 0);
nvme_release_prp_pools(dev);
release:
nvme_release_instance(dev);
@@ -2877,6 +3020,29 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
return result;
}
+static void nvme_async_probe(struct work_struct *work)
+{
+ struct nvme_dev *dev = container_of(work, struct nvme_dev, probe_work);
+ int result;
+
+ result = nvme_dev_start(dev);
+ if (result)
+ goto reset;
+
+ if (dev->online_queues > 1)
+ result = nvme_dev_add(dev);
+ if (result)
+ goto reset;
+
+ nvme_set_irq_hints(dev);
+ return;
+ reset:
+ if (!work_busy(&dev->reset_work)) {
+ dev->reset_workfn = nvme_reset_failed_dev;
+ queue_work(nvme_workq, &dev->reset_work);
+ }
+}
+
static void nvme_reset_notify(struct pci_dev *pdev, bool prepare)
{
struct nvme_dev *dev = pci_get_drvdata(pdev);
@@ -2902,11 +3068,12 @@ static void nvme_remove(struct pci_dev *pdev)
spin_unlock(&dev_list_lock);
pci_set_drvdata(pdev, NULL);
+ flush_work(&dev->probe_work);
flush_work(&dev->reset_work);
- misc_deregister(&dev->miscdev);
nvme_dev_shutdown(dev);
nvme_dev_remove(dev);
nvme_dev_remove_admin(dev);
+ device_destroy(nvme_class, MKDEV(nvme_char_major, dev->instance));
nvme_free_queues(dev, 0);
nvme_release_prp_pools(dev);
kref_put(&dev->kref, nvme_free_dev);
@@ -2990,11 +3157,26 @@ static int __init nvme_init(void)
else if (result > 0)
nvme_major = result;
+ result = __register_chrdev(nvme_char_major, 0, NVME_MINORS, "nvme",
+ &nvme_dev_fops);
+ if (result < 0)
+ goto unregister_blkdev;
+ else if (result > 0)
+ nvme_char_major = result;
+
+ nvme_class = class_create(THIS_MODULE, "nvme");
+ if (!nvme_class)
+ goto unregister_chrdev;
+
result = pci_register_driver(&nvme_driver);
if (result)
- goto unregister_blkdev;
+ goto destroy_class;
return 0;
+ destroy_class:
+ class_destroy(nvme_class);
+ unregister_chrdev:
+ __unregister_chrdev(nvme_char_major, 0, NVME_MINORS, "nvme");
unregister_blkdev:
unregister_blkdev(nvme_major, "nvme");
kill_workq:
@@ -3005,9 +3187,10 @@ static int __init nvme_init(void)
static void __exit nvme_exit(void)
{
pci_unregister_driver(&nvme_driver);
- unregister_hotcpu_notifier(&nvme_nb);
unregister_blkdev(nvme_major, "nvme");
destroy_workqueue(nvme_workq);
+ class_destroy(nvme_class);
+ __unregister_chrdev(nvme_char_major, 0, NVME_MINORS, "nvme");
BUG_ON(nvme_thread && !IS_ERR(nvme_thread));
_nvme_check_size();
}
diff --git a/drivers/block/nvme-scsi.c b/drivers/block/nvme-scsi.c
index 5e78568026c3..e10196e0182d 100644
--- a/drivers/block/nvme-scsi.c
+++ b/drivers/block/nvme-scsi.c
@@ -779,10 +779,8 @@ static int nvme_trans_device_id_page(struct nvme_ns *ns, struct sg_io_hdr *hdr,
struct nvme_dev *dev = ns->dev;
dma_addr_t dma_addr;
void *mem;
- struct nvme_id_ctrl *id_ctrl;
int res = SNTI_TRANSLATION_SUCCESS;
int nvme_sc;
- u8 ieee[4];
int xfer_len;
__be32 tmp_id = cpu_to_be32(ns->ns_id);
@@ -793,46 +791,60 @@ static int nvme_trans_device_id_page(struct nvme_ns *ns, struct sg_io_hdr *hdr,
goto out_dma;
}
- /* nvme controller identify */
- nvme_sc = nvme_identify(dev, 0, 1, dma_addr);
- res = nvme_trans_status_code(hdr, nvme_sc);
- if (res)
- goto out_free;
- if (nvme_sc) {
- res = nvme_sc;
- goto out_free;
- }
- id_ctrl = mem;
-
- /* Since SCSI tried to save 4 bits... [SPC-4(r34) Table 591] */
- ieee[0] = id_ctrl->ieee[0] << 4;
- ieee[1] = id_ctrl->ieee[0] >> 4 | id_ctrl->ieee[1] << 4;
- ieee[2] = id_ctrl->ieee[1] >> 4 | id_ctrl->ieee[2] << 4;
- ieee[3] = id_ctrl->ieee[2] >> 4;
-
- memset(inq_response, 0, STANDARD_INQUIRY_LENGTH);
+ memset(inq_response, 0, alloc_len);
inq_response[1] = INQ_DEVICE_IDENTIFICATION_PAGE; /* Page Code */
- inq_response[3] = 20; /* Page Length */
- /* Designation Descriptor start */
- inq_response[4] = 0x01; /* Proto ID=0h | Code set=1h */
- inq_response[5] = 0x03; /* PIV=0b | Asso=00b | Designator Type=3h */
- inq_response[6] = 0x00; /* Rsvd */
- inq_response[7] = 16; /* Designator Length */
- /* Designator start */
- inq_response[8] = 0x60 | ieee[3]; /* NAA=6h | IEEE ID MSB, High nibble*/
- inq_response[9] = ieee[2]; /* IEEE ID */
- inq_response[10] = ieee[1]; /* IEEE ID */
- inq_response[11] = ieee[0]; /* IEEE ID| Vendor Specific ID... */
- inq_response[12] = (dev->pci_dev->vendor & 0xFF00) >> 8;
- inq_response[13] = (dev->pci_dev->vendor & 0x00FF);
- inq_response[14] = dev->serial[0];
- inq_response[15] = dev->serial[1];
- inq_response[16] = dev->model[0];
- inq_response[17] = dev->model[1];
- memcpy(&inq_response[18], &tmp_id, sizeof(u32));
- /* Last 2 bytes are zero */
+ if (readl(&dev->bar->vs) >= NVME_VS(1, 1)) {
+ struct nvme_id_ns *id_ns = mem;
+ void *eui = id_ns->eui64;
+ int len = sizeof(id_ns->eui64);
- xfer_len = min(alloc_len, STANDARD_INQUIRY_LENGTH);
+ nvme_sc = nvme_identify(dev, ns->ns_id, 0, dma_addr);
+ res = nvme_trans_status_code(hdr, nvme_sc);
+ if (res)
+ goto out_free;
+ if (nvme_sc) {
+ res = nvme_sc;
+ goto out_free;
+ }
+
+ if (readl(&dev->bar->vs) >= NVME_VS(1, 2)) {
+ if (bitmap_empty(eui, len * 8)) {
+ eui = id_ns->nguid;
+ len = sizeof(id_ns->nguid);
+ }
+ }
+ if (bitmap_empty(eui, len * 8))
+ goto scsi_string;
+
+ inq_response[3] = 4 + len; /* Page Length */
+ /* Designation Descriptor start */
+ inq_response[4] = 0x01; /* Proto ID=0h | Code set=1h */
+ inq_response[5] = 0x02; /* PIV=0b | Asso=00b | Designator Type=2h */
+ inq_response[6] = 0x00; /* Rsvd */
+ inq_response[7] = len; /* Designator Length */
+ memcpy(&inq_response[8], eui, len);
+ } else {
+ scsi_string:
+ if (alloc_len < 72) {
+ res = nvme_trans_completion(hdr,
+ SAM_STAT_CHECK_CONDITION,
+ ILLEGAL_REQUEST, SCSI_ASC_INVALID_CDB,
+ SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
+ goto out_free;
+ }
+ inq_response[3] = 0x48; /* Page Length */
+ /* Designation Descriptor start */
+ inq_response[4] = 0x03; /* Proto ID=0h | Code set=3h */
+ inq_response[5] = 0x08; /* PIV=0b | Asso=00b | Designator Type=8h */
+ inq_response[6] = 0x00; /* Rsvd */
+ inq_response[7] = 0x44; /* Designator Length */
+
+ sprintf(&inq_response[8], "%04x", dev->pci_dev->vendor);
+ memcpy(&inq_response[12], dev->model, sizeof(dev->model));
+ sprintf(&inq_response[52], "%04x", tmp_id);
+ memcpy(&inq_response[56], dev->serial, sizeof(dev->serial));
+ }
+ xfer_len = alloc_len;
res = nvme_trans_copy_to_user(hdr, inq_response, xfer_len);
out_free:
@@ -1600,7 +1612,7 @@ static inline void nvme_trans_modesel_get_bd_len(u8 *parm_list, u8 cdb10,
/* 10 Byte CDB */
*bd_len = (parm_list[MODE_SELECT_10_BD_OFFSET] << 8) +
parm_list[MODE_SELECT_10_BD_OFFSET + 1];
- *llbaa = parm_list[MODE_SELECT_10_LLBAA_OFFSET] &&
+ *llbaa = parm_list[MODE_SELECT_10_LLBAA_OFFSET] &
MODE_SELECT_10_LLBAA_MASK;
} else {
/* 6 Byte CDB */
@@ -2222,7 +2234,7 @@ static int nvme_trans_inquiry(struct nvme_ns *ns, struct sg_io_hdr *hdr,
page_code = GET_INQ_PAGE_CODE(cmd);
alloc_len = GET_INQ_ALLOC_LENGTH(cmd);
- inq_response = kmalloc(STANDARD_INQUIRY_LENGTH, GFP_KERNEL);
+ inq_response = kmalloc(alloc_len, GFP_KERNEL);
if (inq_response == NULL) {
res = -ENOMEM;
goto out_mem;
diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index 8a86b62466f7..b40af3203089 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -38,6 +38,7 @@
#include <linux/kernel.h>
#include <linux/device.h>
#include <linux/module.h>
+#include <linux/blk-mq.h>
#include <linux/fs.h>
#include <linux/blkdev.h>
#include <linux/slab.h>
@@ -340,9 +341,7 @@ struct rbd_device {
char name[DEV_NAME_LEN]; /* blkdev name, e.g. rbd3 */
- struct list_head rq_queue; /* incoming rq queue */
spinlock_t lock; /* queue, flags, open_count */
- struct work_struct rq_work;
struct rbd_image_header header;
unsigned long flags; /* possibly lock protected */
@@ -360,6 +359,9 @@ struct rbd_device {
atomic_t parent_ref;
struct rbd_device *parent;
+ /* Block layer tags. */
+ struct blk_mq_tag_set tag_set;
+
/* protects updating the header */
struct rw_semaphore header_rwsem;
@@ -1817,7 +1819,8 @@ static void rbd_osd_req_callback(struct ceph_osd_request *osd_req,
/*
* We support a 64-bit length, but ultimately it has to be
- * passed to blk_end_request(), which takes an unsigned int.
+ * passed to the block layer, which just supports a 32-bit
+ * length field.
*/
obj_request->xferred = osd_req->r_reply_op_len[0];
rbd_assert(obj_request->xferred < (u64)UINT_MAX);
@@ -2275,7 +2278,10 @@ static bool rbd_img_obj_end_request(struct rbd_obj_request *obj_request)
more = obj_request->which < img_request->obj_request_count - 1;
} else {
rbd_assert(img_request->rq != NULL);
- more = blk_end_request(img_request->rq, result, xferred);
+
+ more = blk_update_request(img_request->rq, result, xferred);
+ if (!more)
+ __blk_mq_end_request(img_request->rq, result);
}
return more;
@@ -3304,8 +3310,10 @@ out:
return ret;
}
-static void rbd_handle_request(struct rbd_device *rbd_dev, struct request *rq)
+static void rbd_queue_workfn(struct work_struct *work)
{
+ struct request *rq = blk_mq_rq_from_pdu(work);
+ struct rbd_device *rbd_dev = rq->q->queuedata;
struct rbd_img_request *img_request;
struct ceph_snap_context *snapc = NULL;
u64 offset = (u64)blk_rq_pos(rq) << SECTOR_SHIFT;
@@ -3314,6 +3322,13 @@ static void rbd_handle_request(struct rbd_device *rbd_dev, struct request *rq)
u64 mapping_size;
int result;
+ if (rq->cmd_type != REQ_TYPE_FS) {
+ dout("%s: non-fs request type %d\n", __func__,
+ (int) rq->cmd_type);
+ result = -EIO;
+ goto err;
+ }
+
if (rq->cmd_flags & REQ_DISCARD)
op_type = OBJ_OP_DISCARD;
else if (rq->cmd_flags & REQ_WRITE)
@@ -3359,6 +3374,8 @@ static void rbd_handle_request(struct rbd_device *rbd_dev, struct request *rq)
goto err_rq; /* Shouldn't happen */
}
+ blk_mq_start_request(rq);
+
down_read(&rbd_dev->header_rwsem);
mapping_size = rbd_dev->mapping.size;
if (op_type != OBJ_OP_READ) {
@@ -3404,53 +3421,18 @@ err_rq:
rbd_warn(rbd_dev, "%s %llx at %llx result %d",
obj_op_name(op_type), length, offset, result);
ceph_put_snap_context(snapc);
- blk_end_request_all(rq, result);
+err:
+ blk_mq_end_request(rq, result);
}
-static void rbd_request_workfn(struct work_struct *work)
+static int rbd_queue_rq(struct blk_mq_hw_ctx *hctx,
+ const struct blk_mq_queue_data *bd)
{
- struct rbd_device *rbd_dev =
- container_of(work, struct rbd_device, rq_work);
- struct request *rq, *next;
- LIST_HEAD(requests);
-
- spin_lock_irq(&rbd_dev->lock); /* rq->q->queue_lock */
- list_splice_init(&rbd_dev->rq_queue, &requests);
- spin_unlock_irq(&rbd_dev->lock);
+ struct request *rq = bd->rq;
+ struct work_struct *work = blk_mq_rq_to_pdu(rq);
- list_for_each_entry_safe(rq, next, &requests, queuelist) {
- list_del_init(&rq->queuelist);
- rbd_handle_request(rbd_dev, rq);
- }
-}
-
-/*
- * Called with q->queue_lock held and interrupts disabled, possibly on
- * the way to schedule(). Do not sleep here!
- */
-static void rbd_request_fn(struct request_queue *q)
-{
- struct rbd_device *rbd_dev = q->queuedata;
- struct request *rq;
- int queued = 0;
-
- rbd_assert(rbd_dev);
-
- while ((rq = blk_fetch_request(q))) {
- /* Ignore any non-FS requests that filter through. */
- if (rq->cmd_type != REQ_TYPE_FS) {
- dout("%s: non-fs request type %d\n", __func__,
- (int) rq->cmd_type);
- __blk_end_request_all(rq, 0);
- continue;
- }
-
- list_add_tail(&rq->queuelist, &rbd_dev->rq_queue);
- queued++;
- }
-
- if (queued)
- queue_work(rbd_wq, &rbd_dev->rq_work);
+ queue_work(rbd_wq, work);
+ return BLK_MQ_RQ_QUEUE_OK;
}
/*
@@ -3511,6 +3493,7 @@ static void rbd_free_disk(struct rbd_device *rbd_dev)
del_gendisk(disk);
if (disk->queue)
blk_cleanup_queue(disk->queue);
+ blk_mq_free_tag_set(&rbd_dev->tag_set);
}
put_disk(disk);
}
@@ -3694,7 +3677,7 @@ static int rbd_dev_refresh(struct rbd_device *rbd_dev)
ret = rbd_dev_header_info(rbd_dev);
if (ret)
- return ret;
+ goto out;
/*
* If there is a parent, see if it has disappeared due to the
@@ -3703,30 +3686,46 @@ static int rbd_dev_refresh(struct rbd_device *rbd_dev)
if (rbd_dev->parent) {
ret = rbd_dev_v2_parent_info(rbd_dev);
if (ret)
- return ret;
+ goto out;
}
if (rbd_dev->spec->snap_id == CEPH_NOSNAP) {
- if (rbd_dev->mapping.size != rbd_dev->header.image_size)
- rbd_dev->mapping.size = rbd_dev->header.image_size;
+ rbd_dev->mapping.size = rbd_dev->header.image_size;
} else {
/* validate mapped snapshot's EXISTS flag */
rbd_exists_validate(rbd_dev);
}
+out:
up_write(&rbd_dev->header_rwsem);
-
- if (mapping_size != rbd_dev->mapping.size)
+ if (!ret && mapping_size != rbd_dev->mapping.size)
rbd_dev_update_size(rbd_dev);
+ return ret;
+}
+
+static int rbd_init_request(void *data, struct request *rq,
+ unsigned int hctx_idx, unsigned int request_idx,
+ unsigned int numa_node)
+{
+ struct work_struct *work = blk_mq_rq_to_pdu(rq);
+
+ INIT_WORK(work, rbd_queue_workfn);
return 0;
}
+static struct blk_mq_ops rbd_mq_ops = {
+ .queue_rq = rbd_queue_rq,
+ .map_queue = blk_mq_map_queue,
+ .init_request = rbd_init_request,
+};
+
static int rbd_init_disk(struct rbd_device *rbd_dev)
{
struct gendisk *disk;
struct request_queue *q;
u64 segment_size;
+ int err;
/* create gendisk info */
disk = alloc_disk(single_major ?
@@ -3744,10 +3743,25 @@ static int rbd_init_disk(struct rbd_device *rbd_dev)
disk->fops = &rbd_bd_ops;
disk->private_data = rbd_dev;
- q = blk_init_queue(rbd_request_fn, &rbd_dev->lock);
- if (!q)
+ memset(&rbd_dev->tag_set, 0, sizeof(rbd_dev->tag_set));
+ rbd_dev->tag_set.ops = &rbd_mq_ops;
+ rbd_dev->tag_set.queue_depth = BLKDEV_MAX_RQ;
+ rbd_dev->tag_set.numa_node = NUMA_NO_NODE;
+ rbd_dev->tag_set.flags =
+ BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_SG_MERGE;
+ rbd_dev->tag_set.nr_hw_queues = 1;
+ rbd_dev->tag_set.cmd_size = sizeof(struct work_struct);
+
+ err = blk_mq_alloc_tag_set(&rbd_dev->tag_set);
+ if (err)
goto out_disk;
+ q = blk_mq_init_queue(&rbd_dev->tag_set);
+ if (IS_ERR(q)) {
+ err = PTR_ERR(q);
+ goto out_tag_set;
+ }
+
/* We use the default size, but let's be explicit about it. */
blk_queue_physical_block_size(q, SECTOR_SIZE);
@@ -3773,10 +3787,11 @@ static int rbd_init_disk(struct rbd_device *rbd_dev)
rbd_dev->disk = disk;
return 0;
+out_tag_set:
+ blk_mq_free_tag_set(&rbd_dev->tag_set);
out_disk:
put_disk(disk);
-
- return -ENOMEM;
+ return err;
}
/*
@@ -4033,8 +4048,6 @@ static struct rbd_device *rbd_dev_create(struct rbd_client *rbdc,
return NULL;
spin_lock_init(&rbd_dev->lock);
- INIT_LIST_HEAD(&rbd_dev->rq_queue);
- INIT_WORK(&rbd_dev->rq_work, rbd_request_workfn);
rbd_dev->flags = 0;
atomic_set(&rbd_dev->parent_ref, 0);
INIT_LIST_HEAD(&rbd_dev->node);
@@ -4274,32 +4287,22 @@ static int rbd_dev_v2_parent_info(struct rbd_device *rbd_dev)
}
/*
- * We always update the parent overlap. If it's zero we
- * treat it specially.
+ * We always update the parent overlap. If it's zero we issue
+ * a warning, as we will proceed as if there was no parent.
*/
- rbd_dev->parent_overlap = overlap;
if (!overlap) {
-
- /* A null parent_spec indicates it's the initial probe */
-
if (parent_spec) {
- /*
- * The overlap has become zero, so the clone
- * must have been resized down to 0 at some
- * point. Treat this the same as a flatten.
- */
- rbd_dev_parent_put(rbd_dev);
- pr_info("%s: clone image now standalone\n",
- rbd_dev->disk->disk_name);
+ /* refresh, careful to warn just once */
+ if (rbd_dev->parent_overlap)
+ rbd_warn(rbd_dev,
+ "clone now standalone (overlap became 0)");
} else {
- /*
- * For the initial probe, if we find the
- * overlap is zero we just pretend there was
- * no parent image.
- */
- rbd_warn(rbd_dev, "ignoring parent with overlap 0");
+ /* initial probe */
+ rbd_warn(rbd_dev, "clone is standalone (overlap 0)");
}
}
+ rbd_dev->parent_overlap = overlap;
+
out:
ret = 0;
out_err:
@@ -4771,36 +4774,6 @@ static inline size_t next_token(const char **buf)
}
/*
- * Finds the next token in *buf, and if the provided token buffer is
- * big enough, copies the found token into it. The result, if
- * copied, is guaranteed to be terminated with '\0'. Note that *buf
- * must be terminated with '\0' on entry.
- *
- * Returns the length of the token found (not including the '\0').
- * Return value will be 0 if no token is found, and it will be >=
- * token_size if the token would not fit.
- *
- * The *buf pointer will be updated to point beyond the end of the
- * found token. Note that this occurs even if the token buffer is
- * too small to hold it.
- */
-static inline size_t copy_token(const char **buf,
- char *token,
- size_t token_size)
-{
- size_t len;
-
- len = next_token(buf);
- if (len < token_size) {
- memcpy(token, *buf, len);
- *(token + len) = '\0';
- }
- *buf += len;
-
- return len;
-}
-
-/*
* Finds the next token in *buf, dynamically allocates a buffer big
* enough to hold a copy of it, and copies the token into the new
* buffer. The copy is guaranteed to be terminated with '\0'. Note
diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index cdfbd21e3597..655e570b9b31 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -28,8 +28,7 @@ struct virtio_blk_vq {
char name[VQ_NAME_LEN];
} ____cacheline_aligned_in_smp;
-struct virtio_blk
-{
+struct virtio_blk {
struct virtio_device *vdev;
/* The disk structure for the kernel. */
@@ -52,8 +51,7 @@ struct virtio_blk
struct virtio_blk_vq *vqs;
};
-struct virtblk_req
-{
+struct virtblk_req {
struct request *req;
struct virtio_blk_outhdr out_hdr;
struct virtio_scsi_inhdr in_hdr;
@@ -575,6 +573,12 @@ static int virtblk_probe(struct virtio_device *vdev)
u16 min_io_size;
u8 physical_block_exp, alignment_offset;
+ if (!vdev->config->get) {
+ dev_err(&vdev->dev, "%s failure: config access disabled\n",
+ __func__);
+ return -EINVAL;
+ }
+
err = ida_simple_get(&vd_index_ida, 0, minor_to_index(1 << MINORBITS),
GFP_KERNEL);
if (err < 0)
diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c
index 8e233edd7a09..871bd3550cb0 100644
--- a/drivers/block/zram/zram_drv.c
+++ b/drivers/block/zram/zram_drv.c
@@ -528,7 +528,7 @@ out_cleanup:
static inline void update_used_max(struct zram *zram,
const unsigned long pages)
{
- int old_max, cur_max;
+ unsigned long old_max, cur_max;
old_max = atomic_long_read(&zram->stats.max_used_pages);