From 8ae4e4477d8f5cc7736816a0492f82934ca32ab7 Mon Sep 17 00:00:00 2001 From: Marc Olson Date: Wed, 6 Sep 2017 17:23:56 -0700 Subject: nvme: update timeout module parameter type The underlying blk_mq_tag_set, and request timeout parameters support an unsigned int. Extend the size of the nvme module parameters for io and admin commands to match. Signed-off-by: Marc Olson Signed-off-by: Christoph Hellwig --- drivers/nvme/host/core.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'drivers/nvme/host/core.c') diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index bb2aad078637..26c8913435b2 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -34,13 +34,13 @@ #define NVME_MINORS (1U << MINORBITS) -unsigned char admin_timeout = 60; -module_param(admin_timeout, byte, 0644); +unsigned int admin_timeout = 60; +module_param(admin_timeout, uint, 0644); MODULE_PARM_DESC(admin_timeout, "timeout in seconds for admin commands"); EXPORT_SYMBOL_GPL(admin_timeout); -unsigned char nvme_io_timeout = 30; -module_param_named(io_timeout, nvme_io_timeout, byte, 0644); +unsigned int nvme_io_timeout = 30; +module_param_named(io_timeout, nvme_io_timeout, uint, 0644); MODULE_PARM_DESC(io_timeout, "timeout in seconds for I/O"); EXPORT_SYMBOL_GPL(nvme_io_timeout); -- cgit v1.2.3 From 761f2e1ed8822cf12378c857de337290d0c82a81 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 5 Oct 2017 18:46:46 +0200 Subject: nvme: simplify compat_ioctl handling We can just use our normal ioctl handler for the compat case and remove the boilerplate code for it. Signed-off-by: Christoph Hellwig Reviewed-by: Keith Busch Reviewed-by: Sagi Grimberg Reviewed-by: Max Gurtovoy --- drivers/nvme/host/core.c | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) (limited to 'drivers/nvme/host/core.c') diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 26c8913435b2..573cc3b59bfa 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -1052,16 +1052,6 @@ static int nvme_ioctl(struct block_device *bdev, fmode_t mode, } } -#ifdef CONFIG_COMPAT -static int nvme_compat_ioctl(struct block_device *bdev, fmode_t mode, - unsigned int cmd, unsigned long arg) -{ - return nvme_ioctl(bdev, mode, cmd, arg); -} -#else -#define nvme_compat_ioctl NULL -#endif - static int nvme_open(struct block_device *bdev, fmode_t mode) { return nvme_get_ns_from_disk(bdev->bd_disk) ? 0 : -ENXIO; @@ -1380,7 +1370,7 @@ EXPORT_SYMBOL_GPL(nvme_sec_submit); static const struct block_device_operations nvme_fops = { .owner = THIS_MODULE, .ioctl = nvme_ioctl, - .compat_ioctl = nvme_compat_ioctl, + .compat_ioctl = nvme_ioctl, .open = nvme_open, .release = nvme_release, .getgeo = nvme_getgeo, -- cgit v1.2.3 From 31b8446079757575e576b0516f0e4c0fcdfbd3dd Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Wed, 11 Oct 2017 12:53:07 +0300 Subject: nvme: introduce nvme_reinit_tagset Move blk_mq_reinit_tagset from blk-mq to nvme core as the only user of it. Current transports that use it (rdma, fc) simply implement .reinit_request op. This patch does not change any functionality. Reviewed-by: Jens Axboe Reviewed-by: Max Gurtovoy Reviewed-by: Johannes Thumshirn Signed-off-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- drivers/nvme/host/core.c | 10 ++++++++++ drivers/nvme/host/fc.c | 3 ++- drivers/nvme/host/nvme.h | 2 ++ drivers/nvme/host/rdma.c | 7 +++---- 4 files changed, 17 insertions(+), 5 deletions(-) (limited to 'drivers/nvme/host/core.c') diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 573cc3b59bfa..c95155696741 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -2934,6 +2934,16 @@ void nvme_start_queues(struct nvme_ctrl *ctrl) } EXPORT_SYMBOL_GPL(nvme_start_queues); +int nvme_reinit_tagset(struct nvme_ctrl *ctrl, struct blk_mq_tag_set *set) +{ + if (!ctrl->ops->reinit_request) + return 0; + + return blk_mq_tagset_iter(set, set->driver_data, + ctrl->ops->reinit_request); +} +EXPORT_SYMBOL_GPL(nvme_reinit_tagset); + int __init nvme_core_init(void) { int result; diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index b8e0822127c1..372a0685b12a 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -2393,7 +2393,7 @@ nvme_fc_reinit_io_queues(struct nvme_fc_ctrl *ctrl) nvme_fc_init_io_queues(ctrl); - ret = blk_mq_reinit_tagset(&ctrl->tag_set, nvme_fc_reinit_request); + ret = nvme_reinit_tagset(&ctrl->ctrl, ctrl->ctrl.tagset); if (ret) goto out_free_io_queues; @@ -2753,6 +2753,7 @@ static const struct nvme_ctrl_ops nvme_fc_ctrl_ops = { .submit_async_event = nvme_fc_submit_async_event, .delete_ctrl = nvme_fc_del_nvme_ctrl, .get_address = nvmf_get_address, + .reinit_request = nvme_fc_reinit_request, }; static void diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index df787f39f4c1..cb9d93048f3d 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -237,6 +237,7 @@ struct nvme_ctrl_ops { void (*submit_async_event)(struct nvme_ctrl *ctrl, int aer_idx); int (*delete_ctrl)(struct nvme_ctrl *ctrl); int (*get_address)(struct nvme_ctrl *ctrl, char *buf, int size); + int (*reinit_request)(void *data, struct request *rq); }; static inline bool nvme_ctrl_ready(struct nvme_ctrl *ctrl) @@ -311,6 +312,7 @@ void nvme_unfreeze(struct nvme_ctrl *ctrl); void nvme_wait_freeze(struct nvme_ctrl *ctrl); void nvme_wait_freeze_timeout(struct nvme_ctrl *ctrl, long timeout); void nvme_start_freeze(struct nvme_ctrl *ctrl); +int nvme_reinit_tagset(struct nvme_ctrl *ctrl, struct blk_mq_tag_set *set); #define NVME_QID_ANY -1 struct request *nvme_alloc_request(struct request_queue *q, diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index 92a03ff5fb4d..039f38cb6987 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -774,8 +774,7 @@ static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl, goto out_free_tagset; } } else { - error = blk_mq_reinit_tagset(&ctrl->admin_tag_set, - nvme_rdma_reinit_request); + error = nvme_reinit_tagset(&ctrl->ctrl, ctrl->ctrl.admin_tagset); if (error) goto out_free_queue; } @@ -855,8 +854,7 @@ static int nvme_rdma_configure_io_queues(struct nvme_rdma_ctrl *ctrl, bool new) goto out_free_tag_set; } } else { - ret = blk_mq_reinit_tagset(&ctrl->tag_set, - nvme_rdma_reinit_request); + ret = nvme_reinit_tagset(&ctrl->ctrl, ctrl->ctrl.tagset); if (ret) goto out_free_io_queues; @@ -1845,6 +1843,7 @@ static const struct nvme_ctrl_ops nvme_rdma_ctrl_ops = { .submit_async_event = nvme_rdma_submit_async_event, .delete_ctrl = nvme_rdma_del_ctrl, .get_address = nvmf_get_address, + .reinit_request = nvme_rdma_reinit_request, }; static struct nvme_ctrl *nvme_rdma_create_ctrl(struct device *dev, -- cgit v1.2.3 From 9843f685ae365d0c628e6a0d929772bca7274311 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 18 Oct 2017 13:10:01 +0200 Subject: nvme: use ida_simple_{get,remove} for the controller instance Switch to the ida_simple_* helpers instead of opencoding them. Signed-off-by: Christoph Hellwig Reviewed-by: Keith Busch Reviewed-by: Sagi Grimberg Reviewed-by: Johannes Thumshirn --- drivers/nvme/host/core.c | 40 +++++++--------------------------------- 1 file changed, 7 insertions(+), 33 deletions(-) (limited to 'drivers/nvme/host/core.c') diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index c95155696741..7fae42d595d5 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -74,6 +74,8 @@ EXPORT_SYMBOL_GPL(nvme_wq); static LIST_HEAD(nvme_ctrl_list); static DEFINE_SPINLOCK(dev_list_lock); +static DEFINE_IDA(nvme_instance_ida); + static struct class *nvme_class; static __le32 nvme_get_log_dw10(u8 lid, size_t size) @@ -2696,35 +2698,6 @@ void nvme_queue_async_events(struct nvme_ctrl *ctrl) } EXPORT_SYMBOL_GPL(nvme_queue_async_events); -static DEFINE_IDA(nvme_instance_ida); - -static int nvme_set_instance(struct nvme_ctrl *ctrl) -{ - int instance, error; - - do { - if (!ida_pre_get(&nvme_instance_ida, GFP_KERNEL)) - return -ENODEV; - - spin_lock(&dev_list_lock); - error = ida_get_new(&nvme_instance_ida, &instance); - spin_unlock(&dev_list_lock); - } while (error == -EAGAIN); - - if (error) - return -ENODEV; - - ctrl->instance = instance; - return 0; -} - -static void nvme_release_instance(struct nvme_ctrl *ctrl) -{ - spin_lock(&dev_list_lock); - ida_remove(&nvme_instance_ida, ctrl->instance); - spin_unlock(&dev_list_lock); -} - void nvme_stop_ctrl(struct nvme_ctrl *ctrl) { nvme_stop_keep_alive(ctrl); @@ -2762,7 +2735,7 @@ static void nvme_free_ctrl(struct kref *kref) struct nvme_ctrl *ctrl = container_of(kref, struct nvme_ctrl, kref); put_device(ctrl->device); - nvme_release_instance(ctrl); + ida_simple_remove(&nvme_instance_ida, ctrl->instance); ida_destroy(&ctrl->ns_ida); ctrl->ops->free_ctrl(ctrl); @@ -2796,9 +2769,10 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev, INIT_WORK(&ctrl->async_event_work, nvme_async_event_work); INIT_WORK(&ctrl->fw_act_work, nvme_fw_act_work); - ret = nvme_set_instance(ctrl); - if (ret) + ret = ida_simple_get(&nvme_instance_ida, 0, 0, GFP_KERNEL); + if (ret < 0) goto out; + ctrl->instance = ret; ctrl->device = device_create_with_groups(nvme_class, ctrl->dev, MKDEV(nvme_char_major, ctrl->instance), @@ -2825,7 +2799,7 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev, return 0; out_release_instance: - nvme_release_instance(ctrl); + ida_simple_remove(&nvme_instance_ida, ctrl->instance); out: return ret; } -- cgit v1.2.3 From 2dd4122854f697afc777582d18548dded03ce5dd Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 18 Oct 2017 13:20:01 +0200 Subject: nvme: use kref_get_unless_zero in nvme_find_get_ns For kref_get_unless_zero to protect against lookup vs free races we need to use it in all places where we aren't guaranteed to already hold a reference. There is no such guarantee in nvme_find_get_ns, so switch to kref_get_unless_zero in this function. Signed-off-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Reviewed-by: Hannes Reinecke Reviewed-by: Johannes Thumshirn --- drivers/nvme/host/core.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers/nvme/host/core.c') diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 7fae42d595d5..1d931deac83b 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -2290,7 +2290,8 @@ static struct nvme_ns *nvme_find_get_ns(struct nvme_ctrl *ctrl, unsigned nsid) mutex_lock(&ctrl->namespaces_mutex); list_for_each_entry(ns, &ctrl->namespaces, list) { if (ns->ns_id == nsid) { - kref_get(&ns->kref); + if (!kref_get_unless_zero(&ns->kref)) + continue; ret = ns; break; } -- cgit v1.2.3 From c6424a90da446cff6c67be06767fc0d0be787110 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 18 Oct 2017 13:22:00 +0200 Subject: nvme: simplify nvme_open Now that we are protected against lookup vs free races for the namespace by using kref_get_unless_zero we don't need the hack of NULLing out the disk private data during removal. Signed-off-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Reviewed-by: Hannes Reinecke Reviewed-by: Johannes Thumshirn --- drivers/nvme/host/core.c | 40 ++++++++++------------------------------ 1 file changed, 10 insertions(+), 30 deletions(-) (limited to 'drivers/nvme/host/core.c') diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 1d931deac83b..9f8ae15c9fe8 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -253,12 +253,6 @@ static void nvme_free_ns(struct kref *kref) if (ns->ndev) nvme_nvm_unregister(ns); - if (ns->disk) { - spin_lock(&dev_list_lock); - ns->disk->private_data = NULL; - spin_unlock(&dev_list_lock); - } - put_disk(ns->disk); ida_simple_remove(&ns->ctrl->ns_ida, ns->instance); nvme_put_ctrl(ns->ctrl); @@ -270,29 +264,6 @@ static void nvme_put_ns(struct nvme_ns *ns) kref_put(&ns->kref, nvme_free_ns); } -static struct nvme_ns *nvme_get_ns_from_disk(struct gendisk *disk) -{ - struct nvme_ns *ns; - - spin_lock(&dev_list_lock); - ns = disk->private_data; - if (ns) { - if (!kref_get_unless_zero(&ns->kref)) - goto fail; - if (!try_module_get(ns->ctrl->ops->module)) - goto fail_put_ns; - } - spin_unlock(&dev_list_lock); - - return ns; - -fail_put_ns: - kref_put(&ns->kref, nvme_free_ns); -fail: - spin_unlock(&dev_list_lock); - return NULL; -} - struct request *nvme_alloc_request(struct request_queue *q, struct nvme_command *cmd, unsigned int flags, int qid) { @@ -1056,7 +1027,16 @@ static int nvme_ioctl(struct block_device *bdev, fmode_t mode, static int nvme_open(struct block_device *bdev, fmode_t mode) { - return nvme_get_ns_from_disk(bdev->bd_disk) ? 0 : -ENXIO; + struct nvme_ns *ns = bdev->bd_disk->private_data; + + if (!kref_get_unless_zero(&ns->kref)) + return -ENXIO; + if (!try_module_get(ns->ctrl->ops->module)) { + kref_put(&ns->kref, nvme_free_ns); + return -ENXIO; + } + + return 0; } static void nvme_release(struct gendisk *disk, fmode_t mode) -- cgit v1.2.3 From d22524a4782a943bb02a9cf6885ac470210aabfc Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 18 Oct 2017 13:25:42 +0200 Subject: nvme: switch controller refcounting to use struct device Instead of allocating a separate struct device for the character device handle embedd it into struct nvme_ctrl and use it for the main controller refcounting. This removes double refcounting and gets us an automatic reference for the character device operations. We keep ctrl->device as a pointer for now to avoid chaning printks all over, but in the future we could look into message printing helpers that take a controller structure similar to what other subsystems do. Note the delete_ctrl operation always already has a reference (either through sysfs due this change, or because every open file on the /dev/nvme-fabrics node has a refernece) when it is entered now, so we don't need to do the unless_zero variant there. Signed-off-by: Christoph Hellwig Reviewed-by: Hannes Reinecke --- drivers/nvme/host/core.c | 43 ++++++++++++++++++++++--------------------- drivers/nvme/host/fc.c | 8 ++------ drivers/nvme/host/nvme.h | 12 +++++++++++- drivers/nvme/host/pci.c | 2 +- drivers/nvme/host/rdma.c | 5 ++--- drivers/nvme/target/loop.c | 2 +- 6 files changed, 39 insertions(+), 33 deletions(-) (limited to 'drivers/nvme/host/core.c') diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 9f8ae15c9fe8..3a97daa163f6 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -1915,7 +1915,7 @@ static int nvme_dev_open(struct inode *inode, struct file *file) ret = -EWOULDBLOCK; break; } - if (!kref_get_unless_zero(&ctrl->kref)) + if (!kobject_get_unless_zero(&ctrl->device->kobj)) break; file->private_data = ctrl; ret = 0; @@ -2374,7 +2374,7 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid) list_add_tail(&ns->list, &ctrl->namespaces); mutex_unlock(&ctrl->namespaces_mutex); - kref_get(&ctrl->kref); + nvme_get_ctrl(ctrl); kfree(id); @@ -2703,7 +2703,7 @@ EXPORT_SYMBOL_GPL(nvme_start_ctrl); void nvme_uninit_ctrl(struct nvme_ctrl *ctrl) { - device_destroy(nvme_class, MKDEV(nvme_char_major, ctrl->instance)); + device_del(ctrl->device); spin_lock(&dev_list_lock); list_del(&ctrl->node); @@ -2711,23 +2711,17 @@ void nvme_uninit_ctrl(struct nvme_ctrl *ctrl) } EXPORT_SYMBOL_GPL(nvme_uninit_ctrl); -static void nvme_free_ctrl(struct kref *kref) +static void nvme_free_ctrl(struct device *dev) { - struct nvme_ctrl *ctrl = container_of(kref, struct nvme_ctrl, kref); + struct nvme_ctrl *ctrl = + container_of(dev, struct nvme_ctrl, ctrl_device); - put_device(ctrl->device); ida_simple_remove(&nvme_instance_ida, ctrl->instance); ida_destroy(&ctrl->ns_ida); ctrl->ops->free_ctrl(ctrl); } -void nvme_put_ctrl(struct nvme_ctrl *ctrl) -{ - kref_put(&ctrl->kref, nvme_free_ctrl); -} -EXPORT_SYMBOL_GPL(nvme_put_ctrl); - /* * Initialize a NVMe controller structures. This needs to be called during * earliest initialization so that we have the initialized structured around @@ -2742,7 +2736,6 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev, spin_lock_init(&ctrl->lock); INIT_LIST_HEAD(&ctrl->namespaces); mutex_init(&ctrl->namespaces_mutex); - kref_init(&ctrl->kref); ctrl->dev = dev; ctrl->ops = ops; ctrl->quirks = quirks; @@ -2755,15 +2748,21 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev, goto out; ctrl->instance = ret; - ctrl->device = device_create_with_groups(nvme_class, ctrl->dev, - MKDEV(nvme_char_major, ctrl->instance), - ctrl, nvme_dev_attr_groups, - "nvme%d", ctrl->instance); - if (IS_ERR(ctrl->device)) { - ret = PTR_ERR(ctrl->device); + device_initialize(&ctrl->ctrl_device); + ctrl->device = &ctrl->ctrl_device; + ctrl->device->devt = MKDEV(nvme_char_major, ctrl->instance); + ctrl->device->class = nvme_class; + ctrl->device->parent = ctrl->dev; + ctrl->device->groups = nvme_dev_attr_groups; + ctrl->device->release = nvme_free_ctrl; + dev_set_drvdata(ctrl->device, ctrl); + ret = dev_set_name(ctrl->device, "nvme%d", ctrl->instance); + if (ret) goto out_release_instance; - } - get_device(ctrl->device); + ret = device_add(ctrl->device); + if (ret) + goto out_free_name; + ida_init(&ctrl->ns_ida); spin_lock(&dev_list_lock); @@ -2779,6 +2778,8 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev, min(default_ps_max_latency_us, (unsigned long)S32_MAX)); return 0; +out_free_name: + kfree_const(dev->kobj.name); out_release_instance: ida_simple_remove(&nvme_instance_ida, ctrl->instance); out: diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index c6c903f1b172..aa9aec6923bb 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -2692,14 +2692,10 @@ nvme_fc_del_nvme_ctrl(struct nvme_ctrl *nctrl) struct nvme_fc_ctrl *ctrl = to_fc_ctrl(nctrl); int ret; - if (!kref_get_unless_zero(&ctrl->ctrl.kref)) - return -EBUSY; - + nvme_get_ctrl(&ctrl->ctrl); ret = __nvme_fc_del_ctrl(ctrl); - if (!ret) flush_workqueue(nvme_wq); - nvme_put_ctrl(&ctrl->ctrl); return ret; @@ -2918,7 +2914,7 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts, return ERR_PTR(ret); } - kref_get(&ctrl->ctrl.kref); + nvme_get_ctrl(&ctrl->ctrl); dev_info(ctrl->ctrl.device, "NVME-FC{%d}: new ctrl: NQN \"%s\"\n", diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index cb9d93048f3d..ae60d8342e60 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -127,12 +127,12 @@ struct nvme_ctrl { struct request_queue *admin_q; struct request_queue *connect_q; struct device *dev; - struct kref kref; int instance; struct blk_mq_tag_set *tagset; struct blk_mq_tag_set *admin_tagset; struct list_head namespaces; struct mutex namespaces_mutex; + struct device ctrl_device; struct device *device; /* char device */ struct list_head node; struct ida ns_ida; @@ -279,6 +279,16 @@ static inline void nvme_end_request(struct request *req, __le16 status, blk_mq_complete_request(req); } +static inline void nvme_get_ctrl(struct nvme_ctrl *ctrl) +{ + get_device(ctrl->device); +} + +static inline void nvme_put_ctrl(struct nvme_ctrl *ctrl) +{ + put_device(ctrl->device); +} + void nvme_complete_rq(struct request *req); void nvme_cancel_request(struct request *req, void *data, bool reserved); bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl, diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 11e6fd9d0ba4..7735571ffc9a 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -2292,7 +2292,7 @@ static void nvme_remove_dead_ctrl(struct nvme_dev *dev, int status) { dev_warn(dev->ctrl.device, "Removing after probe failure status: %d\n", status); - kref_get(&dev->ctrl.kref); + nvme_get_ctrl(&dev->ctrl); nvme_dev_disable(dev, false); if (!schedule_work(&dev->remove_work)) nvme_put_ctrl(&dev->ctrl); diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index 4552744eff45..62b58f9b7d00 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -1793,8 +1793,7 @@ static int nvme_rdma_del_ctrl(struct nvme_ctrl *nctrl) * Keep a reference until all work is flushed since * __nvme_rdma_del_ctrl can free the ctrl mem */ - if (!kref_get_unless_zero(&ctrl->ctrl.kref)) - return -EBUSY; + nvme_get_ctrl(&ctrl->ctrl); ret = __nvme_rdma_del_ctrl(ctrl); if (!ret) flush_work(&ctrl->delete_work); @@ -1955,7 +1954,7 @@ static struct nvme_ctrl *nvme_rdma_create_ctrl(struct device *dev, dev_info(ctrl->ctrl.device, "new ctrl: NQN \"%s\", addr %pISpcs\n", ctrl->ctrl.opts->subsysnqn, &ctrl->addr); - kref_get(&ctrl->ctrl.kref); + nvme_get_ctrl(&ctrl->ctrl); mutex_lock(&nvme_rdma_ctrl_mutex); list_add_tail(&ctrl->list, &nvme_rdma_ctrl_list); diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c index c56354e1e4c6..f83e925fe64a 100644 --- a/drivers/nvme/target/loop.c +++ b/drivers/nvme/target/loop.c @@ -642,7 +642,7 @@ static struct nvme_ctrl *nvme_loop_create_ctrl(struct device *dev, dev_info(ctrl->ctrl.device, "new ctrl: \"%s\"\n", ctrl->ctrl.opts->subsysnqn); - kref_get(&ctrl->ctrl.kref); + nvme_get_ctrl(&ctrl->ctrl); changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE); WARN_ON_ONCE(!changed); -- cgit v1.2.3 From a6a5149b10ec8ab8b4a9479a8230265c1b573be0 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 18 Oct 2017 16:59:25 +0200 Subject: nvme: get rid of nvme_ctrl_list Use the core chrdev code to set up the link between the character device and the nvme controller. This allows us to get rid of the global list of all controllers, and also ensures that we have both a reference to the controller and the transport module before the open method of the character device is called. Signed-off-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Reviewed-by: Johannes Thumshirn Reviewed-by: Hannes Reinecke --- drivers/nvme/host/core.c | 76 ++++++++++-------------------------------------- drivers/nvme/host/nvme.h | 3 +- 2 files changed, 18 insertions(+), 61 deletions(-) (limited to 'drivers/nvme/host/core.c') diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 3a97daa163f6..a56a1e0432e7 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -52,9 +52,6 @@ static u8 nvme_max_retries = 5; module_param_named(max_retries, nvme_max_retries, byte, 0644); MODULE_PARM_DESC(max_retries, "max number of retries a command may have"); -static int nvme_char_major; -module_param(nvme_char_major, int, 0); - static unsigned long default_ps_max_latency_us = 100000; module_param(default_ps_max_latency_us, ulong, 0644); MODULE_PARM_DESC(default_ps_max_latency_us, @@ -71,11 +68,8 @@ MODULE_PARM_DESC(streams, "turn on support for Streams write directives"); struct workqueue_struct *nvme_wq; EXPORT_SYMBOL_GPL(nvme_wq); -static LIST_HEAD(nvme_ctrl_list); -static DEFINE_SPINLOCK(dev_list_lock); - static DEFINE_IDA(nvme_instance_ida); - +static dev_t nvme_chr_devt; static struct class *nvme_class; static __le32 nvme_get_log_dw10(u8 lid, size_t size) @@ -1031,20 +1025,12 @@ static int nvme_open(struct block_device *bdev, fmode_t mode) if (!kref_get_unless_zero(&ns->kref)) return -ENXIO; - if (!try_module_get(ns->ctrl->ops->module)) { - kref_put(&ns->kref, nvme_free_ns); - return -ENXIO; - } - return 0; } static void nvme_release(struct gendisk *disk, fmode_t mode) { - struct nvme_ns *ns = disk->private_data; - - module_put(ns->ctrl->ops->module); - nvme_put_ns(ns); + nvme_put_ns(disk->private_data); } static int nvme_getgeo(struct block_device *bdev, struct hd_geometry *geo) @@ -1902,33 +1888,12 @@ EXPORT_SYMBOL_GPL(nvme_init_identify); static int nvme_dev_open(struct inode *inode, struct file *file) { - struct nvme_ctrl *ctrl; - int instance = iminor(inode); - int ret = -ENODEV; - - spin_lock(&dev_list_lock); - list_for_each_entry(ctrl, &nvme_ctrl_list, node) { - if (ctrl->instance != instance) - continue; - - if (!ctrl->admin_q) { - ret = -EWOULDBLOCK; - break; - } - if (!kobject_get_unless_zero(&ctrl->device->kobj)) - break; - file->private_data = ctrl; - ret = 0; - break; - } - spin_unlock(&dev_list_lock); - - return ret; -} + struct nvme_ctrl *ctrl = + container_of(inode->i_cdev, struct nvme_ctrl, cdev); -static int nvme_dev_release(struct inode *inode, struct file *file) -{ - nvme_put_ctrl(file->private_data); + if (!ctrl->admin_q) + return -EWOULDBLOCK; + file->private_data = ctrl; return 0; } @@ -1992,7 +1957,6 @@ static long nvme_dev_ioctl(struct file *file, unsigned int cmd, static const struct file_operations nvme_dev_fops = { .owner = THIS_MODULE, .open = nvme_dev_open, - .release = nvme_dev_release, .unlocked_ioctl = nvme_dev_ioctl, .compat_ioctl = nvme_dev_ioctl, }; @@ -2703,11 +2667,7 @@ EXPORT_SYMBOL_GPL(nvme_start_ctrl); void nvme_uninit_ctrl(struct nvme_ctrl *ctrl) { - device_del(ctrl->device); - - spin_lock(&dev_list_lock); - list_del(&ctrl->node); - spin_unlock(&dev_list_lock); + cdev_device_del(&ctrl->cdev, ctrl->device); } EXPORT_SYMBOL_GPL(nvme_uninit_ctrl); @@ -2750,7 +2710,7 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev, device_initialize(&ctrl->ctrl_device); ctrl->device = &ctrl->ctrl_device; - ctrl->device->devt = MKDEV(nvme_char_major, ctrl->instance); + ctrl->device->devt = MKDEV(MAJOR(nvme_chr_devt), ctrl->instance); ctrl->device->class = nvme_class; ctrl->device->parent = ctrl->dev; ctrl->device->groups = nvme_dev_attr_groups; @@ -2759,16 +2719,15 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev, ret = dev_set_name(ctrl->device, "nvme%d", ctrl->instance); if (ret) goto out_release_instance; - ret = device_add(ctrl->device); + + cdev_init(&ctrl->cdev, &nvme_dev_fops); + ctrl->cdev.owner = ops->module; + ret = cdev_device_add(&ctrl->cdev, ctrl->device); if (ret) goto out_free_name; ida_init(&ctrl->ns_ida); - spin_lock(&dev_list_lock); - list_add_tail(&ctrl->node, &nvme_ctrl_list); - spin_unlock(&dev_list_lock); - /* * Initialize latency tolerance controls. The sysfs files won't * be visible to userspace unless the device actually supports APST. @@ -2909,12 +2868,9 @@ int __init nvme_core_init(void) if (!nvme_wq) return -ENOMEM; - result = __register_chrdev(nvme_char_major, 0, NVME_MINORS, "nvme", - &nvme_dev_fops); + result = alloc_chrdev_region(&nvme_chr_devt, 0, NVME_MINORS, "nvme"); if (result < 0) goto destroy_wq; - else if (result > 0) - nvme_char_major = result; nvme_class = class_create(THIS_MODULE, "nvme"); if (IS_ERR(nvme_class)) { @@ -2925,7 +2881,7 @@ int __init nvme_core_init(void) return 0; unregister_chrdev: - __unregister_chrdev(nvme_char_major, 0, NVME_MINORS, "nvme"); + unregister_chrdev_region(nvme_chr_devt, NVME_MINORS); destroy_wq: destroy_workqueue(nvme_wq); return result; @@ -2934,7 +2890,7 @@ destroy_wq: void nvme_core_exit(void) { class_destroy(nvme_class); - __unregister_chrdev(nvme_char_major, 0, NVME_MINORS, "nvme"); + unregister_chrdev_region(nvme_chr_devt, NVME_MINORS); destroy_workqueue(nvme_wq); } diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index ae60d8342e60..1bb2bc165e54 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -15,6 +15,7 @@ #define _NVME_H #include +#include #include #include #include @@ -134,7 +135,7 @@ struct nvme_ctrl { struct mutex namespaces_mutex; struct device ctrl_device; struct device *device; /* char device */ - struct list_head node; + struct cdev cdev; struct ida ns_ida; struct work_struct reset_work; -- cgit v1.2.3 From 999ada28713d7bcf4a2c70cbab47b08cd95f2cf8 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 18 Oct 2017 17:09:31 +0200 Subject: nvme: check for a live controller in nvme_dev_open This is a much more sensible check than just the admin queue. Signed-off-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Reviewed-by: Hannes Reinecke Reviewed-by: Johannes Thumshirn --- drivers/nvme/host/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/nvme/host/core.c') diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index a56a1e0432e7..df525ab42fcd 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -1891,7 +1891,7 @@ static int nvme_dev_open(struct inode *inode, struct file *file) struct nvme_ctrl *ctrl = container_of(inode->i_cdev, struct nvme_ctrl, cdev); - if (!ctrl->admin_q) + if (ctrl->state != NVME_CTRL_LIVE) return -EWOULDBLOCK; file->private_data = ctrl; return 0; -- cgit v1.2.3 From c5017e85705bfea721732e153305d1988ff965c2 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 29 Oct 2017 10:44:29 +0200 Subject: nvme: move controller deletion to common code Move the ->delete_work and the associated helpers to common code instead of duplicating them in every driver. This also adds the missing reference get/put for the loop driver. Signed-off-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Reviewed-by: James Smart Signed-off-by: Christoph Hellwig --- drivers/nvme/host/core.c | 38 ++++++++++++++++++++++++++++++++++- drivers/nvme/host/fabrics.c | 2 +- drivers/nvme/host/fc.c | 42 +++++---------------------------------- drivers/nvme/host/nvme.h | 5 ++++- drivers/nvme/host/rdma.c | 45 ++++++------------------------------------ drivers/nvme/target/loop.c | 48 +++++++++------------------------------------ 6 files changed, 62 insertions(+), 118 deletions(-) (limited to 'drivers/nvme/host/core.c') diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index df525ab42fcd..d835ac05bbf7 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -97,6 +97,41 @@ static int nvme_reset_ctrl_sync(struct nvme_ctrl *ctrl) return ret; } +static void nvme_delete_ctrl_work(struct work_struct *work) +{ + struct nvme_ctrl *ctrl = + container_of(work, struct nvme_ctrl, delete_work); + + ctrl->ops->delete_ctrl(ctrl); +} + +int nvme_delete_ctrl(struct nvme_ctrl *ctrl) +{ + if (!nvme_change_ctrl_state(ctrl, NVME_CTRL_DELETING)) + return -EBUSY; + if (!queue_work(nvme_wq, &ctrl->delete_work)) + return -EBUSY; + return 0; +} +EXPORT_SYMBOL_GPL(nvme_delete_ctrl); + +int nvme_delete_ctrl_sync(struct nvme_ctrl *ctrl) +{ + int ret = 0; + + /* + * Keep a reference until the work is flushed since ->delete_ctrl + * can free the controller. + */ + nvme_get_ctrl(ctrl); + ret = nvme_delete_ctrl(ctrl); + if (!ret) + flush_work(&ctrl->delete_work); + nvme_put_ctrl(ctrl); + return ret; +} +EXPORT_SYMBOL_GPL(nvme_delete_ctrl_sync); + static blk_status_t nvme_error_status(struct request *req) { switch (nvme_req(req)->status & 0x7ff) { @@ -2122,7 +2157,7 @@ static ssize_t nvme_sysfs_delete(struct device *dev, struct nvme_ctrl *ctrl = dev_get_drvdata(dev); if (device_remove_file_self(dev, attr)) - ctrl->ops->delete_ctrl(ctrl); + nvme_delete_ctrl_sync(ctrl); return count; } static DEVICE_ATTR(delete_controller, S_IWUSR, NULL, nvme_sysfs_delete); @@ -2702,6 +2737,7 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev, INIT_WORK(&ctrl->scan_work, nvme_scan_work); INIT_WORK(&ctrl->async_event_work, nvme_async_event_work); INIT_WORK(&ctrl->fw_act_work, nvme_fw_act_work); + INIT_WORK(&ctrl->delete_work, nvme_delete_ctrl_work); ret = ida_simple_get(&nvme_instance_ida, 0, 0, GFP_KERNEL); if (ret < 0) diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c index 4a83137b0268..a4967de5bb25 100644 --- a/drivers/nvme/host/fabrics.c +++ b/drivers/nvme/host/fabrics.c @@ -887,7 +887,7 @@ nvmf_create_ctrl(struct device *dev, const char *buf, size_t count) "controller returned incorrect NQN: \"%s\".\n", ctrl->subnqn); up_read(&nvmf_transports_rwsem); - ctrl->ops->delete_ctrl(ctrl); + nvme_delete_ctrl_sync(ctrl); return ERR_PTR(-EINVAL); } diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index 827e9efbe556..a7bdb17de29d 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -157,7 +157,6 @@ struct nvme_fc_ctrl { struct blk_mq_tag_set admin_tag_set; struct blk_mq_tag_set tag_set; - struct work_struct delete_work; struct delayed_work connect_work; struct kref ref; @@ -223,7 +222,6 @@ static struct device *fc_udev_device; /* *********************** FC-NVME Port Management ************************ */ -static int __nvme_fc_del_ctrl(struct nvme_fc_ctrl *); static void __nvme_fc_delete_hw_queue(struct nvme_fc_ctrl *, struct nvme_fc_queue *, unsigned int); @@ -662,7 +660,7 @@ nvme_fc_unregister_remoteport(struct nvme_fc_remote_port *portptr) /* tear down all associations to the remote port */ list_for_each_entry(ctrl, &rport->ctrl_list, ctrl_list) - __nvme_fc_del_ctrl(ctrl); + nvme_delete_ctrl(&ctrl->ctrl); spin_unlock_irqrestore(&rport->lock, flags); @@ -2636,10 +2634,9 @@ nvme_fc_delete_association(struct nvme_fc_ctrl *ctrl) } static void -nvme_fc_delete_ctrl_work(struct work_struct *work) +nvme_fc_delete_ctrl(struct nvme_ctrl *nctrl) { - struct nvme_fc_ctrl *ctrl = - container_of(work, struct nvme_fc_ctrl, delete_work); + struct nvme_fc_ctrl *ctrl = to_fc_ctrl(nctrl); cancel_work_sync(&ctrl->ctrl.reset_work); cancel_delayed_work_sync(&ctrl->connect_work); @@ -2663,34 +2660,6 @@ nvme_fc_delete_ctrl_work(struct work_struct *work) nvme_put_ctrl(&ctrl->ctrl); } -static int -__nvme_fc_del_ctrl(struct nvme_fc_ctrl *ctrl) -{ - if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_DELETING)) - return -EBUSY; - if (!queue_work(nvme_wq, &ctrl->delete_work)) - return -EBUSY; - return 0; -} - -/* - * Request from nvme core layer to delete the controller - */ -static int -nvme_fc_del_nvme_ctrl(struct nvme_ctrl *nctrl) -{ - struct nvme_fc_ctrl *ctrl = to_fc_ctrl(nctrl); - int ret; - - nvme_get_ctrl(&ctrl->ctrl); - ret = __nvme_fc_del_ctrl(ctrl); - if (!ret) - flush_work(&ctrl->delete_work); - nvme_put_ctrl(&ctrl->ctrl); - - return ret; -} - static void nvme_fc_reconnect_or_delete(struct nvme_fc_ctrl *ctrl, int status) { @@ -2716,7 +2685,7 @@ nvme_fc_reconnect_or_delete(struct nvme_fc_ctrl *ctrl, int status) "NVME-FC{%d}: Max reconnect attempts (%d) " "reached. Removing controller\n", ctrl->cnum, ctrl->ctrl.nr_reconnects); - WARN_ON(__nvme_fc_del_ctrl(ctrl)); + WARN_ON(nvme_delete_ctrl(&ctrl->ctrl)); } } @@ -2748,7 +2717,7 @@ static const struct nvme_ctrl_ops nvme_fc_ctrl_ops = { .reg_write32 = nvmf_reg_write32, .free_ctrl = nvme_fc_nvme_ctrl_freed, .submit_async_event = nvme_fc_submit_async_event, - .delete_ctrl = nvme_fc_del_nvme_ctrl, + .delete_ctrl = nvme_fc_delete_ctrl, .get_address = nvmf_get_address, .reinit_request = nvme_fc_reinit_request, }; @@ -2851,7 +2820,6 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts, get_device(ctrl->dev); kref_init(&ctrl->ref); - INIT_WORK(&ctrl->delete_work, nvme_fc_delete_ctrl_work); INIT_WORK(&ctrl->ctrl.reset_work, nvme_fc_reset_ctrl_work); INIT_DELAYED_WORK(&ctrl->connect_work, nvme_fc_connect_ctrl_work); spin_lock_init(&ctrl->lock); diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index 1bb2bc165e54..35d9cee515f1 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -138,6 +138,7 @@ struct nvme_ctrl { struct cdev cdev; struct ida ns_ida; struct work_struct reset_work; + struct work_struct delete_work; struct opal_dev *opal_dev; @@ -236,7 +237,7 @@ struct nvme_ctrl_ops { int (*reg_read64)(struct nvme_ctrl *ctrl, u32 off, u64 *val); void (*free_ctrl)(struct nvme_ctrl *ctrl); void (*submit_async_event)(struct nvme_ctrl *ctrl, int aer_idx); - int (*delete_ctrl)(struct nvme_ctrl *ctrl); + void (*delete_ctrl)(struct nvme_ctrl *ctrl); int (*get_address)(struct nvme_ctrl *ctrl, char *buf, int size); int (*reinit_request)(void *data, struct request *rq); }; @@ -339,6 +340,8 @@ int nvme_set_queue_count(struct nvme_ctrl *ctrl, int *count); void nvme_start_keep_alive(struct nvme_ctrl *ctrl); void nvme_stop_keep_alive(struct nvme_ctrl *ctrl); int nvme_reset_ctrl(struct nvme_ctrl *ctrl); +int nvme_delete_ctrl(struct nvme_ctrl *ctrl); +int nvme_delete_ctrl_sync(struct nvme_ctrl *ctrl); #ifdef CONFIG_NVM int nvme_nvm_register(struct nvme_ns *ns, char *disk_name, int node); diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index 32b0a9ef26e6..5175b465997d 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -105,7 +105,6 @@ struct nvme_rdma_ctrl { /* other member variables */ struct blk_mq_tag_set tag_set; - struct work_struct delete_work; struct work_struct err_work; struct nvme_rdma_qe async_event_sqe; @@ -913,7 +912,7 @@ static void nvme_rdma_reconnect_or_remove(struct nvme_rdma_ctrl *ctrl) ctrl->ctrl.opts->reconnect_delay * HZ); } else { dev_info(ctrl->ctrl.device, "Removing controller...\n"); - queue_work(nvme_wq, &ctrl->delete_work); + queue_work(nvme_wq, &ctrl->ctrl.delete_work); } } @@ -1764,41 +1763,10 @@ static void nvme_rdma_remove_ctrl(struct nvme_rdma_ctrl *ctrl) nvme_put_ctrl(&ctrl->ctrl); } -static void nvme_rdma_del_ctrl_work(struct work_struct *work) +static void nvme_rdma_delete_ctrl(struct nvme_ctrl *ctrl) { - struct nvme_rdma_ctrl *ctrl = container_of(work, - struct nvme_rdma_ctrl, delete_work); - - nvme_stop_ctrl(&ctrl->ctrl); - nvme_rdma_remove_ctrl(ctrl); -} - -static int __nvme_rdma_del_ctrl(struct nvme_rdma_ctrl *ctrl) -{ - if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_DELETING)) - return -EBUSY; - - if (!queue_work(nvme_wq, &ctrl->delete_work)) - return -EBUSY; - - return 0; -} - -static int nvme_rdma_del_ctrl(struct nvme_ctrl *nctrl) -{ - struct nvme_rdma_ctrl *ctrl = to_rdma_ctrl(nctrl); - int ret = 0; - - /* - * Keep a reference until all work is flushed since - * __nvme_rdma_del_ctrl can free the ctrl mem - */ - nvme_get_ctrl(&ctrl->ctrl); - ret = __nvme_rdma_del_ctrl(ctrl); - if (!ret) - flush_work(&ctrl->delete_work); - nvme_put_ctrl(&ctrl->ctrl); - return ret; + nvme_stop_ctrl(ctrl); + nvme_rdma_remove_ctrl(to_rdma_ctrl(ctrl)); } static void nvme_rdma_reset_ctrl_work(struct work_struct *work) @@ -1846,7 +1814,7 @@ static const struct nvme_ctrl_ops nvme_rdma_ctrl_ops = { .reg_write32 = nvmf_reg_write32, .free_ctrl = nvme_rdma_free_ctrl, .submit_async_event = nvme_rdma_submit_async_event, - .delete_ctrl = nvme_rdma_del_ctrl, + .delete_ctrl = nvme_rdma_delete_ctrl, .get_address = nvmf_get_address, .reinit_request = nvme_rdma_reinit_request, }; @@ -1977,7 +1945,6 @@ static struct nvme_ctrl *nvme_rdma_create_ctrl(struct device *dev, INIT_DELAYED_WORK(&ctrl->reconnect_work, nvme_rdma_reconnect_ctrl_work); INIT_WORK(&ctrl->err_work, nvme_rdma_error_recovery_work); - INIT_WORK(&ctrl->delete_work, nvme_rdma_del_ctrl_work); INIT_WORK(&ctrl->ctrl.reset_work, nvme_rdma_reset_ctrl_work); ctrl->ctrl.queue_count = opts->nr_io_queues + 1; /* +1 for admin queue */ @@ -2081,7 +2048,7 @@ static void nvme_rdma_remove_one(struct ib_device *ib_device, void *client_data) dev_info(ctrl->ctrl.device, "Removing ctrl: NQN \"%s\", addr %pISp\n", ctrl->ctrl.opts->subsysnqn, &ctrl->addr); - __nvme_rdma_del_ctrl(ctrl); + nvme_delete_ctrl(&ctrl->ctrl); } mutex_unlock(&nvme_rdma_ctrl_mutex); diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c index f83e925fe64a..7f9f3fc3fb2a 100644 --- a/drivers/nvme/target/loop.c +++ b/drivers/nvme/target/loop.c @@ -53,7 +53,6 @@ struct nvme_loop_ctrl { struct nvme_ctrl ctrl; struct nvmet_ctrl *target_ctrl; - struct work_struct delete_work; }; static inline struct nvme_loop_ctrl *to_loop_ctrl(struct nvme_ctrl *ctrl) @@ -439,41 +438,13 @@ static void nvme_loop_shutdown_ctrl(struct nvme_loop_ctrl *ctrl) nvme_loop_destroy_admin_queue(ctrl); } -static void nvme_loop_del_ctrl_work(struct work_struct *work) +static void nvme_loop_delete_ctrl_host(struct nvme_ctrl *ctrl) { - struct nvme_loop_ctrl *ctrl = container_of(work, - struct nvme_loop_ctrl, delete_work); - - nvme_stop_ctrl(&ctrl->ctrl); - nvme_remove_namespaces(&ctrl->ctrl); - nvme_loop_shutdown_ctrl(ctrl); - nvme_uninit_ctrl(&ctrl->ctrl); - nvme_put_ctrl(&ctrl->ctrl); -} - -static int __nvme_loop_del_ctrl(struct nvme_loop_ctrl *ctrl) -{ - if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_DELETING)) - return -EBUSY; - - if (!queue_work(nvme_wq, &ctrl->delete_work)) - return -EBUSY; - - return 0; -} - -static int nvme_loop_del_ctrl(struct nvme_ctrl *nctrl) -{ - struct nvme_loop_ctrl *ctrl = to_loop_ctrl(nctrl); - int ret; - - ret = __nvme_loop_del_ctrl(ctrl); - if (ret) - return ret; - - flush_work(&ctrl->delete_work); - - return 0; + nvme_stop_ctrl(ctrl); + nvme_remove_namespaces(ctrl); + nvme_loop_shutdown_ctrl(to_loop_ctrl(ctrl)); + nvme_uninit_ctrl(ctrl); + nvme_put_ctrl(ctrl); } static void nvme_loop_delete_ctrl(struct nvmet_ctrl *nctrl) @@ -483,7 +454,7 @@ static void nvme_loop_delete_ctrl(struct nvmet_ctrl *nctrl) mutex_lock(&nvme_loop_ctrl_mutex); list_for_each_entry(ctrl, &nvme_loop_ctrl_list, list) { if (ctrl->ctrl.cntlid == nctrl->cntlid) - __nvme_loop_del_ctrl(ctrl); + nvme_delete_ctrl(&ctrl->ctrl); } mutex_unlock(&nvme_loop_ctrl_mutex); } @@ -539,7 +510,7 @@ static const struct nvme_ctrl_ops nvme_loop_ctrl_ops = { .reg_write32 = nvmf_reg_write32, .free_ctrl = nvme_loop_free_ctrl, .submit_async_event = nvme_loop_submit_async_event, - .delete_ctrl = nvme_loop_del_ctrl, + .delete_ctrl = nvme_loop_delete_ctrl_host, }; static int nvme_loop_create_io_queues(struct nvme_loop_ctrl *ctrl) @@ -601,7 +572,6 @@ static struct nvme_ctrl *nvme_loop_create_ctrl(struct device *dev, ctrl->ctrl.opts = opts; INIT_LIST_HEAD(&ctrl->list); - INIT_WORK(&ctrl->delete_work, nvme_loop_del_ctrl_work); INIT_WORK(&ctrl->ctrl.reset_work, nvme_loop_reset_ctrl_work); ret = nvme_init_ctrl(&ctrl->ctrl, dev, &nvme_loop_ctrl_ops, @@ -731,7 +701,7 @@ static void __exit nvme_loop_cleanup_module(void) mutex_lock(&nvme_loop_ctrl_mutex); list_for_each_entry_safe(ctrl, next, &nvme_loop_ctrl_list, list) - __nvme_loop_del_ctrl(ctrl); + nvme_delete_ctrl(&ctrl->ctrl); mutex_unlock(&nvme_loop_ctrl_mutex); flush_workqueue(nvme_wq); -- cgit v1.2.3 From 6cd53d14aaa006b5543f06fbf5e1680ce61c6c6e Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 29 Oct 2017 10:44:31 +0200 Subject: nvme: consolidate common code from ->reset_work No change in behavior except that the FC code cancels two work items a little later now. Signed-off-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Reviewed-by: James Smart Signed-off-by: Christoph Hellwig --- drivers/nvme/host/core.c | 4 ++++ drivers/nvme/host/fc.c | 13 ------------- drivers/nvme/host/rdma.c | 4 ---- drivers/nvme/target/loop.c | 4 ---- 4 files changed, 4 insertions(+), 21 deletions(-) (limited to 'drivers/nvme/host/core.c') diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index d835ac05bbf7..4fa748c9a3f6 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -102,7 +102,11 @@ static void nvme_delete_ctrl_work(struct work_struct *work) struct nvme_ctrl *ctrl = container_of(work, struct nvme_ctrl, delete_work); + nvme_stop_ctrl(ctrl); + nvme_remove_namespaces(ctrl); ctrl->ops->delete_ctrl(ctrl); + nvme_uninit_ctrl(ctrl); + nvme_put_ctrl(ctrl); } int nvme_delete_ctrl(struct nvme_ctrl *ctrl) diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index a7bdb17de29d..e447b532b9ee 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -2640,24 +2640,11 @@ nvme_fc_delete_ctrl(struct nvme_ctrl *nctrl) cancel_work_sync(&ctrl->ctrl.reset_work); cancel_delayed_work_sync(&ctrl->connect_work); - nvme_stop_ctrl(&ctrl->ctrl); - nvme_remove_namespaces(&ctrl->ctrl); /* * kill the association on the link side. this will block * waiting for io to terminate */ nvme_fc_delete_association(ctrl); - - /* - * tear down the controller - * After the last reference on the nvme ctrl is removed, - * the transport nvme_fc_nvme_ctrl_freed() callback will be - * invoked. From there, the transport will tear down it's - * logical queues and association. - */ - nvme_uninit_ctrl(&ctrl->ctrl); - - nvme_put_ctrl(&ctrl->ctrl); } static void diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index a3521b852ea8..ed6e05018a92 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -1757,11 +1757,7 @@ static void nvme_rdma_shutdown_ctrl(struct nvme_rdma_ctrl *ctrl, bool shutdown) static void nvme_rdma_delete_ctrl(struct nvme_ctrl *ctrl) { - nvme_stop_ctrl(ctrl); - nvme_remove_namespaces(ctrl); nvme_rdma_shutdown_ctrl(to_rdma_ctrl(ctrl), true); - nvme_uninit_ctrl(ctrl); - nvme_put_ctrl(ctrl); } static void nvme_rdma_reset_ctrl_work(struct work_struct *work) diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c index 7f9f3fc3fb2a..bc95c6ed531a 100644 --- a/drivers/nvme/target/loop.c +++ b/drivers/nvme/target/loop.c @@ -440,11 +440,7 @@ static void nvme_loop_shutdown_ctrl(struct nvme_loop_ctrl *ctrl) static void nvme_loop_delete_ctrl_host(struct nvme_ctrl *ctrl) { - nvme_stop_ctrl(ctrl); - nvme_remove_namespaces(ctrl); nvme_loop_shutdown_ctrl(to_loop_ctrl(ctrl)); - nvme_uninit_ctrl(ctrl); - nvme_put_ctrl(ctrl); } static void nvme_loop_delete_ctrl(struct nvmet_ctrl *nctrl) -- cgit v1.2.3 From 4054637c9b4fbe9feef0cf6f2516ef00d8053560 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Sun, 29 Oct 2017 14:21:02 +0200 Subject: nvme: flush reset_work before safely continuing with delete operation Prevent racing controller reset and delete flows. reset_work must not ever self-requeue so flushing it suffices. Reported-by: Christoph Hellwig Signed-off-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- drivers/nvme/host/core.c | 1 + drivers/nvme/host/fc.c | 1 - 2 files changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/nvme/host/core.c') diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 4fa748c9a3f6..003314eb6341 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -102,6 +102,7 @@ static void nvme_delete_ctrl_work(struct work_struct *work) struct nvme_ctrl *ctrl = container_of(work, struct nvme_ctrl, delete_work); + flush_work(&ctrl->reset_work); nvme_stop_ctrl(ctrl); nvme_remove_namespaces(ctrl); ctrl->ops->delete_ctrl(ctrl); diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index e447b532b9ee..6a025a8d8c32 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -2638,7 +2638,6 @@ nvme_fc_delete_ctrl(struct nvme_ctrl *nctrl) { struct nvme_fc_ctrl *ctrl = to_fc_ctrl(nctrl); - cancel_work_sync(&ctrl->ctrl.reset_work); cancel_delayed_work_sync(&ctrl->connect_work); /* * kill the association on the link side. this will block -- cgit v1.2.3 From 3cec7f9de448131778a1428100621fd371db75f4 Mon Sep 17 00:00:00 2001 From: James Smart Date: Wed, 25 Oct 2017 16:43:13 -0700 Subject: nvme: allow controller RESETTING to RECONNECTING transition Transport will typically transition from LIVE to RESETTING when initially performing a reset or recovering from an error. Adding this transition allows a transport to transition to RECONNECTING when it checks/waits for connectivity then creates new transport connections and reinits the controller. Signed-off-by: James Smart Reviewed-by: Sagi Grimberg Reviewed-by: Johannes Thumshirn Reviewed-by: Hannes Reinecke Signed-off-by: Christoph Hellwig --- drivers/nvme/host/core.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/nvme/host/core.c') diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 003314eb6341..07b9f4e1c283 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -241,6 +241,7 @@ bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl, case NVME_CTRL_RECONNECTING: switch (old_state) { case NVME_CTRL_LIVE: + case NVME_CTRL_RESETTING: changed = true; /* FALLTHRU */ default: -- cgit v1.2.3 From a806c6c81e6c0d07c8a8b2643bad4a37a196d681 Mon Sep 17 00:00:00 2001 From: Minwoo Im Date: Thu, 2 Nov 2017 18:07:44 +0900 Subject: nvme: comment typo fixed in clearing AER a tiny typo fixed in a comment. Signed-off-by: Minwoo Im Signed-off-by: Christoph Hellwig --- drivers/nvme/host/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/nvme/host/core.c') diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 07b9f4e1c283..64744355aa88 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -2637,7 +2637,7 @@ static void nvme_fw_act_work(struct work_struct *work) return; nvme_start_queues(ctrl); - /* read FW slot informationi to clear the AER*/ + /* read FW slot information to clear the AER */ nvme_get_fw_slot_info(ctrl); } -- cgit v1.2.3 From e54b064cb24c8268252336ccdd6523e08c0dcbde Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 2 Nov 2017 21:28:51 +0300 Subject: nvme: move the dying queue check from cancel to completion With multipath we don't want a hard DNR bit on a request that is cancelled by a controller reset, but instead want to be able to retry it on another patch. To archive this don't always set the DNR bit when the queue is dying in nvme_cancel_request, but defer that decision to nvme_req_needs_retry. Note that it applies to any command there and not just cancelled commands, but one the queue is dying that is the right thing to do anyway. Signed-off-by: Christoph Hellwig Reviewed-by: Keith Busch Reviewed-by: Sagi Grimberg Signed-off-by: Jens Axboe --- drivers/nvme/host/core.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) (limited to 'drivers/nvme/host/core.c') diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 64744355aa88..a9c7ad304ec5 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -172,6 +172,8 @@ static inline bool nvme_req_needs_retry(struct request *req) return false; if (nvme_req(req)->retries >= nvme_max_retries) return false; + if (blk_queue_dying(req->q)) + return false; return true; } @@ -189,18 +191,13 @@ EXPORT_SYMBOL_GPL(nvme_complete_rq); void nvme_cancel_request(struct request *req, void *data, bool reserved) { - int status; - if (!blk_mq_request_started(req)) return; dev_dbg_ratelimited(((struct nvme_ctrl *) data)->device, "Cancelling I/O %d", req->tag); - status = NVME_SC_ABORT_REQ; - if (blk_queue_dying(req->q)) - status |= NVME_SC_DNR; - nvme_req(req)->status = status; + nvme_req(req)->status = NVME_SC_ABORT_REQ; blk_mq_complete_request(req); } -- cgit v1.2.3 From b5be3b392998a5f3763dfbdb1866f81d8ee631c7 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 2 Nov 2017 21:28:52 +0300 Subject: nvme: always unregister the integrity profile in __nvme_revalidate_disk This is safe because the queue is always frozen when we revalidate, and it simplifies both the existing code as well as the multipath implementation. Signed-off-by: Christoph Hellwig Reviewed-by: Keith Busch Reviewed-by: Sagi Grimberg Signed-off-by: Jens Axboe --- drivers/nvme/host/core.c | 40 ++++++++++------------------------------ 1 file changed, 10 insertions(+), 30 deletions(-) (limited to 'drivers/nvme/host/core.c') diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index a9c7ad304ec5..da5357c66a0f 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -1081,29 +1081,6 @@ static int nvme_getgeo(struct block_device *bdev, struct hd_geometry *geo) } #ifdef CONFIG_BLK_DEV_INTEGRITY -static void nvme_prep_integrity(struct gendisk *disk, struct nvme_id_ns *id, - u16 bs) -{ - struct nvme_ns *ns = disk->private_data; - u16 old_ms = ns->ms; - u8 pi_type = 0; - - ns->ms = le16_to_cpu(id->lbaf[id->flbas & NVME_NS_FLBAS_LBA_MASK].ms); - ns->ext = ns->ms && (id->flbas & NVME_NS_FLBAS_META_EXT); - - /* PI implementation requires metadata equal t10 pi tuple size */ - if (ns->ms == sizeof(struct t10_pi_tuple)) - pi_type = id->dps & NVME_NS_DPS_PI_MASK; - - if (blk_get_integrity(disk) && - (ns->pi_type != pi_type || ns->ms != old_ms || - bs != queue_logical_block_size(disk->queue) || - (ns->ms && ns->ext))) - blk_integrity_unregister(disk); - - ns->pi_type = pi_type; -} - static void nvme_init_integrity(struct nvme_ns *ns) { struct blk_integrity integrity; @@ -1130,10 +1107,6 @@ static void nvme_init_integrity(struct nvme_ns *ns) blk_queue_max_integrity_segments(ns->queue, 1); } #else -static void nvme_prep_integrity(struct gendisk *disk, struct nvme_id_ns *id, - u16 bs) -{ -} static void nvme_init_integrity(struct nvme_ns *ns) { } @@ -1202,15 +1175,22 @@ static void __nvme_revalidate_disk(struct gendisk *disk, struct nvme_id_ns *id) ns->lba_shift = 9; bs = 1 << ns->lba_shift; ns->noiob = le16_to_cpu(id->noiob); + ns->ext = ns->ms && (id->flbas & NVME_NS_FLBAS_META_EXT); + ns->ms = le16_to_cpu(id->lbaf[id->flbas & NVME_NS_FLBAS_LBA_MASK].ms); + /* the PI implementation requires metadata equal t10 pi tuple size */ + if (ns->ms == sizeof(struct t10_pi_tuple)) + ns->pi_type = id->dps & NVME_NS_DPS_PI_MASK; + else + ns->pi_type = 0; blk_mq_freeze_queue(disk->queue); + blk_integrity_unregister(disk); - if (ctrl->ops->flags & NVME_F_METADATA_SUPPORTED) - nvme_prep_integrity(disk, id, bs); blk_queue_logical_block_size(ns->queue, bs); if (ns->noiob) nvme_set_chunk_size(ns); - if (ns->ms && !blk_get_integrity(disk) && !ns->ext) + if (ns->ms && !ns->ext && + (ctrl->ops->flags & NVME_F_METADATA_SUPPORTED)) nvme_init_integrity(ns); if (ns->ms && !(ns->ms == 8 && ns->pi_type) && !blk_get_integrity(disk)) set_capacity(disk, 0); -- cgit v1.2.3 From 39b7baa410fd00a0fe9da864400d95b3d5008de7 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 2 Nov 2017 21:28:53 +0300 Subject: nvme: don't pass struct nvme_ns to nvme_init_integrity To allow reusing this function for the multipath node. Signed-off-by: Christoph Hellwig Reviewed-by: Keith Busch Reviewed-by: Sagi Grimberg Signed-off-by: Jens Axboe --- drivers/nvme/host/core.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'drivers/nvme/host/core.c') diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index da5357c66a0f..88d886b390f4 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -1081,12 +1081,12 @@ static int nvme_getgeo(struct block_device *bdev, struct hd_geometry *geo) } #ifdef CONFIG_BLK_DEV_INTEGRITY -static void nvme_init_integrity(struct nvme_ns *ns) +static void nvme_init_integrity(struct gendisk *disk, u16 ms, u8 pi_type) { struct blk_integrity integrity; memset(&integrity, 0, sizeof(integrity)); - switch (ns->pi_type) { + switch (pi_type) { case NVME_NS_DPS_PI_TYPE3: integrity.profile = &t10_pi_type3_crc; integrity.tag_size = sizeof(u16) + sizeof(u32); @@ -1102,12 +1102,12 @@ static void nvme_init_integrity(struct nvme_ns *ns) integrity.profile = NULL; break; } - integrity.tuple_size = ns->ms; - blk_integrity_register(ns->disk, &integrity); - blk_queue_max_integrity_segments(ns->queue, 1); + integrity.tuple_size = ms; + blk_integrity_register(disk, &integrity); + blk_queue_max_integrity_segments(disk->queue, 1); } #else -static void nvme_init_integrity(struct nvme_ns *ns) +static void nvme_init_integrity(struct gendisk *disk, u16 ms, u8 pi_type) { } #endif /* CONFIG_BLK_DEV_INTEGRITY */ @@ -1191,7 +1191,7 @@ static void __nvme_revalidate_disk(struct gendisk *disk, struct nvme_id_ns *id) nvme_set_chunk_size(ns); if (ns->ms && !ns->ext && (ctrl->ops->flags & NVME_F_METADATA_SUPPORTED)) - nvme_init_integrity(ns); + nvme_init_integrity(disk, ns->ms, ns->pi_type); if (ns->ms && !(ns->ms == 8 && ns->pi_type) && !blk_get_integrity(disk)) set_capacity(disk, 0); else -- cgit v1.2.3 From 30e5e929c7bfaf87a65f926715773ef14339b79f Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 2 Nov 2017 21:28:54 +0300 Subject: nvme: don't pass struct nvme_ns to nvme_config_discard To allow reusing this function for the multipath node. Signed-off-by: Christoph Hellwig Reviewed-by: Keith Busch Signed-off-by: Jens Axboe --- drivers/nvme/host/core.c | 33 +++++++++++++++++---------------- 1 file changed, 17 insertions(+), 16 deletions(-) (limited to 'drivers/nvme/host/core.c') diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 88d886b390f4..551ec5df6a97 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -1118,29 +1118,26 @@ static void nvme_set_chunk_size(struct nvme_ns *ns) blk_queue_chunk_sectors(ns->queue, rounddown_pow_of_two(chunk_size)); } -static void nvme_config_discard(struct nvme_ns *ns) +static void nvme_config_discard(struct nvme_ctrl *ctrl, + unsigned stream_alignment, struct request_queue *queue) { - struct nvme_ctrl *ctrl = ns->ctrl; - u32 logical_block_size = queue_logical_block_size(ns->queue); + u32 size = queue_logical_block_size(queue); + + if (stream_alignment) + size *= stream_alignment; BUILD_BUG_ON(PAGE_SIZE / sizeof(struct nvme_dsm_range) < NVME_DSM_MAX_RANGES); - if (ctrl->nr_streams && ns->sws && ns->sgs) { - unsigned int sz = logical_block_size * ns->sws * ns->sgs; + queue->limits.discard_alignment = size; + queue->limits.discard_granularity = size; - ns->queue->limits.discard_alignment = sz; - ns->queue->limits.discard_granularity = sz; - } else { - ns->queue->limits.discard_alignment = logical_block_size; - ns->queue->limits.discard_granularity = logical_block_size; - } - blk_queue_max_discard_sectors(ns->queue, UINT_MAX); - blk_queue_max_discard_segments(ns->queue, NVME_DSM_MAX_RANGES); - queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, ns->queue); + blk_queue_max_discard_sectors(queue, UINT_MAX); + blk_queue_max_discard_segments(queue, NVME_DSM_MAX_RANGES); + queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, queue); if (ctrl->quirks & NVME_QUIRK_DEALLOCATE_ZEROES) - blk_queue_max_write_zeroes_sectors(ns->queue, UINT_MAX); + blk_queue_max_write_zeroes_sectors(queue, UINT_MAX); } static void nvme_report_ns_ids(struct nvme_ctrl *ctrl, unsigned int nsid, @@ -1164,6 +1161,7 @@ static void __nvme_revalidate_disk(struct gendisk *disk, struct nvme_id_ns *id) { struct nvme_ns *ns = disk->private_data; struct nvme_ctrl *ctrl = ns->ctrl; + unsigned stream_alignment = 0; u16 bs; /* @@ -1183,6 +1181,9 @@ static void __nvme_revalidate_disk(struct gendisk *disk, struct nvme_id_ns *id) else ns->pi_type = 0; + if (ctrl->nr_streams && ns->sws && ns->sgs) + stream_alignment = ns->sws * ns->sgs; + blk_mq_freeze_queue(disk->queue); blk_integrity_unregister(disk); @@ -1198,7 +1199,7 @@ static void __nvme_revalidate_disk(struct gendisk *disk, struct nvme_id_ns *id) set_capacity(disk, le64_to_cpup(&id->nsze) << (ns->lba_shift - 9)); if (ctrl->oncs & NVME_CTRL_ONCS_DSM) - nvme_config_discard(ns); + nvme_config_discard(ctrl, stream_alignment, disk->queue); blk_mq_unfreeze_queue(disk->queue); } -- cgit v1.2.3 From 6e78f21ae4488cdab4318f781de05c8ffc4de951 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 2 Nov 2017 21:28:55 +0300 Subject: nvme: set the chunk size before freezing the queue We don't need a frozen queue to update the chunk_size, which just is a hint, and moving it a little earlier will allow for some better code reuse with the multipath code. Signed-off-by: Christoph Hellwig Reviewed-by: Keith Busch Signed-off-by: Jens Axboe --- drivers/nvme/host/core.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'drivers/nvme/host/core.c') diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 551ec5df6a97..a4be843334ba 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -1184,12 +1184,13 @@ static void __nvme_revalidate_disk(struct gendisk *disk, struct nvme_id_ns *id) if (ctrl->nr_streams && ns->sws && ns->sgs) stream_alignment = ns->sws * ns->sgs; + if (ns->noiob) + nvme_set_chunk_size(ns); + blk_mq_freeze_queue(disk->queue); blk_integrity_unregister(disk); blk_queue_logical_block_size(ns->queue, bs); - if (ns->noiob) - nvme_set_chunk_size(ns); if (ns->ms && !ns->ext && (ctrl->ops->flags & NVME_F_METADATA_SUPPORTED)) nvme_init_integrity(disk, ns->ms, ns->pi_type); -- cgit v1.2.3 From 24b0b58c5b4a9f6f4bbd1c1efd5afb9a565da3ea Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 2 Nov 2017 21:28:56 +0300 Subject: nvme: split __nvme_revalidate_disk Split out the code that applies the calculate value to a given disk/queue into new helper that can be reused by the multipath code. Signed-off-by: Christoph Hellwig Reviewed-by: Keith Busch Reviewed-by: Sagi Grimberg Signed-off-by: Jens Axboe --- drivers/nvme/host/core.c | 49 +++++++++++++++++++++++++----------------------- 1 file changed, 26 insertions(+), 23 deletions(-) (limited to 'drivers/nvme/host/core.c') diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index a4be843334ba..28d58d353a42 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -1157,12 +1157,34 @@ static void nvme_report_ns_ids(struct nvme_ctrl *ctrl, unsigned int nsid, } } +static void nvme_update_disk_info(struct gendisk *disk, + struct nvme_ns *ns, struct nvme_id_ns *id) +{ + sector_t capacity = le64_to_cpup(&id->nsze) << (ns->lba_shift - 9); + unsigned stream_alignment = 0; + + if (ns->ctrl->nr_streams && ns->sws && ns->sgs) + stream_alignment = ns->sws * ns->sgs; + + blk_mq_freeze_queue(disk->queue); + blk_integrity_unregister(disk); + + blk_queue_logical_block_size(disk->queue, 1 << ns->lba_shift); + if (ns->ms && !ns->ext && + (ns->ctrl->ops->flags & NVME_F_METADATA_SUPPORTED)) + nvme_init_integrity(disk, ns->ms, ns->pi_type); + if (ns->ms && !(ns->ms == 8 && ns->pi_type) && !blk_get_integrity(disk)) + capacity = 0; + set_capacity(disk, capacity); + + if (ns->ctrl->oncs & NVME_CTRL_ONCS_DSM) + nvme_config_discard(ns->ctrl, stream_alignment, disk->queue); + blk_mq_unfreeze_queue(disk->queue); +} + static void __nvme_revalidate_disk(struct gendisk *disk, struct nvme_id_ns *id) { struct nvme_ns *ns = disk->private_data; - struct nvme_ctrl *ctrl = ns->ctrl; - unsigned stream_alignment = 0; - u16 bs; /* * If identify namespace failed, use default 512 byte block size so @@ -1171,7 +1193,6 @@ static void __nvme_revalidate_disk(struct gendisk *disk, struct nvme_id_ns *id) ns->lba_shift = id->lbaf[id->flbas & NVME_NS_FLBAS_LBA_MASK].ds; if (ns->lba_shift == 0) ns->lba_shift = 9; - bs = 1 << ns->lba_shift; ns->noiob = le16_to_cpu(id->noiob); ns->ext = ns->ms && (id->flbas & NVME_NS_FLBAS_META_EXT); ns->ms = le16_to_cpu(id->lbaf[id->flbas & NVME_NS_FLBAS_LBA_MASK].ms); @@ -1181,27 +1202,9 @@ static void __nvme_revalidate_disk(struct gendisk *disk, struct nvme_id_ns *id) else ns->pi_type = 0; - if (ctrl->nr_streams && ns->sws && ns->sgs) - stream_alignment = ns->sws * ns->sgs; - if (ns->noiob) nvme_set_chunk_size(ns); - - blk_mq_freeze_queue(disk->queue); - blk_integrity_unregister(disk); - - blk_queue_logical_block_size(ns->queue, bs); - if (ns->ms && !ns->ext && - (ctrl->ops->flags & NVME_F_METADATA_SUPPORTED)) - nvme_init_integrity(disk, ns->ms, ns->pi_type); - if (ns->ms && !(ns->ms == 8 && ns->pi_type) && !blk_get_integrity(disk)) - set_capacity(disk, 0); - else - set_capacity(disk, le64_to_cpup(&id->nsze) << (ns->lba_shift - 9)); - - if (ctrl->oncs & NVME_CTRL_ONCS_DSM) - nvme_config_discard(ctrl, stream_alignment, disk->queue); - blk_mq_unfreeze_queue(disk->queue); + nvme_update_disk_info(disk, ns, id); } static int nvme_revalidate_disk(struct gendisk *disk) -- cgit v1.2.3 From 715ea9e09dc81f18cad5ed64800f49b1c4c444df Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 7 Nov 2017 17:27:34 +0100 Subject: nvme: fix and clarify the check for missing metadata MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Update the check in nvme_setup_rw for missing metadata so that it is together with the other metadata handling, does not contain impossible to reach conditions and warns if we get an impossible requests for a (non-PI) metadata-enabled namespace when CONFIG_BLK_DEV_INTEGRITY is not set. Also add a little helper that checks if a given metadata configuration contains protection information Signed-off-by: Christoph Hellwig Reported-by: Javier González Reviewed-by: Keith Busch Signed-off-by: Jens Axboe --- drivers/nvme/host/core.c | 31 ++++++++++++++++++------------- 1 file changed, 18 insertions(+), 13 deletions(-) (limited to 'drivers/nvme/host/core.c') diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 28d58d353a42..3aabaa103e10 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -137,6 +137,11 @@ int nvme_delete_ctrl_sync(struct nvme_ctrl *ctrl) } EXPORT_SYMBOL_GPL(nvme_delete_ctrl_sync); +static inline bool nvme_ns_has_pi(struct nvme_ns *ns) +{ + return ns->pi_type && ns->ms == sizeof(struct t10_pi_tuple); +} + static blk_status_t nvme_error_status(struct request *req) { switch (nvme_req(req)->status & 0x7ff) { @@ -472,16 +477,6 @@ static inline blk_status_t nvme_setup_rw(struct nvme_ns *ns, u16 control = 0; u32 dsmgmt = 0; - /* - * If formated with metadata, require the block layer provide a buffer - * unless this namespace is formated such that the metadata can be - * stripped/generated by the controller with PRACT=1. - */ - if (ns && ns->ms && - (!ns->pi_type || ns->ms != sizeof(struct t10_pi_tuple)) && - !blk_integrity_rq(req) && !blk_rq_is_passthrough(req)) - return BLK_STS_NOTSUPP; - if (req->cmd_flags & REQ_FUA) control |= NVME_RW_FUA; if (req->cmd_flags & (REQ_FAILFAST_DEV | REQ_RAHEAD)) @@ -500,6 +495,18 @@ static inline blk_status_t nvme_setup_rw(struct nvme_ns *ns, nvme_assign_write_stream(ctrl, req, &control, &dsmgmt); if (ns->ms) { + /* + * If formated with metadata, the block layer always provides a + * metadata buffer if CONFIG_BLK_DEV_INTEGRITY is enabled. Else + * we enable the PRACT bit for protection information or set the + * namespace capacity to zero to prevent any I/O. + */ + if (!blk_integrity_rq(req)) { + if (WARN_ON_ONCE(!nvme_ns_has_pi(ns))) + return BLK_STS_NOTSUPP; + control |= NVME_RW_PRINFO_PRACT; + } + switch (ns->pi_type) { case NVME_NS_DPS_PI_TYPE3: control |= NVME_RW_PRINFO_PRCHK_GUARD; @@ -512,8 +519,6 @@ static inline blk_status_t nvme_setup_rw(struct nvme_ns *ns, nvme_block_nr(ns, blk_rq_pos(req))); break; } - if (!blk_integrity_rq(req)) - control |= NVME_RW_PRINFO_PRACT; } cmnd->rw.control = cpu_to_le16(control); @@ -1173,7 +1178,7 @@ static void nvme_update_disk_info(struct gendisk *disk, if (ns->ms && !ns->ext && (ns->ctrl->ops->flags & NVME_F_METADATA_SUPPORTED)) nvme_init_integrity(disk, ns->ms, ns->pi_type); - if (ns->ms && !(ns->ms == 8 && ns->pi_type) && !blk_get_integrity(disk)) + if (ns->ms && !nvme_ns_has_pi(ns) && !blk_get_integrity(disk)) capacity = 0; set_capacity(disk, capacity); -- cgit v1.2.3 From c627c487ec727350c6aaa1cf217b666099e83ee2 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Tue, 7 Nov 2017 10:28:31 -0700 Subject: nvme: factor get log into a helper MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit And fix the warning on a successful firmware log. Reviewed-by: Javier González Reviewed-by: Christoph Hellwig Signed-off-by: Keith Busch Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/nvme/host/core.c | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) (limited to 'drivers/nvme/host/core.c') diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 3aabaa103e10..90b6375a9da5 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -1747,6 +1747,18 @@ static void nvme_init_subnqn(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id) memset(ctrl->subnqn + off, 0, sizeof(ctrl->subnqn) - off); } +static int nvme_get_log(struct nvme_ctrl *ctrl, u8 log_page, void *log, + size_t size) +{ + struct nvme_command c = { }; + + c.common.opcode = nvme_admin_get_log_page; + c.common.nsid = cpu_to_le32(NVME_NSID_ALL); + c.common.cdw10[0] = nvme_get_log_dw10(log_page, size); + + return nvme_submit_sync_cmd(ctrl->admin_q, &c, log, size); +} + /* * Initialize the cached copies of the Identify data and various controller * register in our nvme_ctrl structure. This should be called as soon as @@ -2579,18 +2591,13 @@ static bool nvme_ctrl_pp_status(struct nvme_ctrl *ctrl) static void nvme_get_fw_slot_info(struct nvme_ctrl *ctrl) { - struct nvme_command c = { }; struct nvme_fw_slot_info_log *log; log = kmalloc(sizeof(*log), GFP_KERNEL); if (!log) return; - c.common.opcode = nvme_admin_get_log_page; - c.common.nsid = cpu_to_le32(NVME_NSID_ALL); - c.common.cdw10[0] = nvme_get_log_dw10(NVME_LOG_FW_SLOT, sizeof(*log)); - - if (!nvme_submit_sync_cmd(ctrl->admin_q, &c, log, sizeof(*log))) + if (nvme_get_log(ctrl, NVME_LOG_FW_SLOT, log, sizeof(*log))) dev_warn(ctrl->device, "Get FW SLOT INFO log error\n"); kfree(log); -- cgit v1.2.3 From 84fef62d135b6e47b52f4e9280b5dbc5bb0050ba Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Tue, 7 Nov 2017 10:28:32 -0700 Subject: nvme: check admin passthru command effects The NVMe standard provides a command effects log page so the host may be aware of special requirements it may need to do for a particular command. For example, the command may need to run with IO quiesced to prevent timeouts or undefined behavior, or it may change the logical block formats that determine how the host needs to construct future commands. This patch saves the nvme command effects log page if the controller supports it, and performs appropriate actions before and after an admin passthrough command is completed. If the controller does not support the command effects log page, the driver will define the effects for known opcodes. The nvme format and santize are the only commands in this patch with known effects. Signed-off-by: Keith Busch Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/nvme/host/core.c | 107 +++++++++++++++++++++++++++++++++++++++++++++++ drivers/nvme/host/nvme.h | 1 + include/linux/nvme.h | 18 ++++++++ 3 files changed, 126 insertions(+) (limited to 'drivers/nvme/host/core.c') diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 90b6375a9da5..65fd2fc1ae3c 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -72,6 +72,9 @@ static DEFINE_IDA(nvme_instance_ida); static dev_t nvme_chr_devt; static struct class *nvme_class; +static void nvme_ns_remove(struct nvme_ns *ns); +static int nvme_revalidate_disk(struct gendisk *disk); + static __le32 nvme_get_log_dw10(u8 lid, size_t size) { return cpu_to_le32((((size / 4) - 1) << 16) | lid); @@ -992,12 +995,87 @@ static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio) metadata, meta_len, io.slba, NULL, 0); } +static u32 nvme_known_admin_effects(u8 opcode) +{ + switch (opcode) { + case nvme_admin_format_nvm: + return NVME_CMD_EFFECTS_CSUPP | NVME_CMD_EFFECTS_LBCC | + NVME_CMD_EFFECTS_CSE_MASK; + case nvme_admin_sanitize_nvm: + return NVME_CMD_EFFECTS_CSE_MASK; + default: + break; + } + return 0; +} + +static u32 nvme_passthru_start(struct nvme_ctrl *ctrl, struct nvme_ns *ns, + u8 opcode) +{ + u32 effects = 0; + + if (ns) { + if (ctrl->effects) + effects = le32_to_cpu(ctrl->effects->iocs[opcode]); + if (effects & ~NVME_CMD_EFFECTS_CSUPP) + dev_warn(ctrl->device, + "IO command:%02x has unhandled effects:%08x\n", + opcode, effects); + return 0; + } + + if (ctrl->effects) + effects = le32_to_cpu(ctrl->effects->iocs[opcode]); + else + effects = nvme_known_admin_effects(opcode); + + /* + * For simplicity, IO to all namespaces is quiesced even if the command + * effects say only one namespace is affected. + */ + if (effects & (NVME_CMD_EFFECTS_LBCC | NVME_CMD_EFFECTS_CSE_MASK)) { + nvme_start_freeze(ctrl); + nvme_wait_freeze(ctrl); + } + return effects; +} + +static void nvme_update_formats(struct nvme_ctrl *ctrl) +{ + struct nvme_ns *ns; + + mutex_lock(&ctrl->namespaces_mutex); + list_for_each_entry(ns, &ctrl->namespaces, list) { + if (ns->disk && nvme_revalidate_disk(ns->disk)) + nvme_ns_remove(ns); + } + mutex_unlock(&ctrl->namespaces_mutex); +} + +static void nvme_passthru_end(struct nvme_ctrl *ctrl, u32 effects) +{ + /* + * Revalidate LBA changes prior to unfreezing. This is necessary to + * prevent memory corruption if a logical block size was changed by + * this command. + */ + if (effects & NVME_CMD_EFFECTS_LBCC) + nvme_update_formats(ctrl); + if (effects & (NVME_CMD_EFFECTS_LBCC | NVME_CMD_EFFECTS_CSE_MASK)) + nvme_unfreeze(ctrl); + if (effects & NVME_CMD_EFFECTS_CCC) + nvme_init_identify(ctrl); + if (effects & (NVME_CMD_EFFECTS_NIC | NVME_CMD_EFFECTS_NCC)) + nvme_queue_scan(ctrl); +} + static int nvme_user_cmd(struct nvme_ctrl *ctrl, struct nvme_ns *ns, struct nvme_passthru_cmd __user *ucmd) { struct nvme_passthru_cmd cmd; struct nvme_command c; unsigned timeout = 0; + u32 effects; int status; if (!capable(CAP_SYS_ADMIN)) @@ -1023,10 +1101,13 @@ static int nvme_user_cmd(struct nvme_ctrl *ctrl, struct nvme_ns *ns, if (cmd.timeout_ms) timeout = msecs_to_jiffies(cmd.timeout_ms); + effects = nvme_passthru_start(ctrl, ns, cmd.opcode); status = nvme_submit_user_cmd(ns ? ns->queue : ctrl->admin_q, &c, (void __user *)(uintptr_t)cmd.addr, cmd.data_len, (void __user *)(uintptr_t)cmd.metadata, cmd.metadata, 0, &cmd.result, timeout); + nvme_passthru_end(ctrl, effects); + if (status >= 0) { if (put_user(cmd.result, &ucmd->result)) return -EFAULT; @@ -1759,6 +1840,25 @@ static int nvme_get_log(struct nvme_ctrl *ctrl, u8 log_page, void *log, return nvme_submit_sync_cmd(ctrl->admin_q, &c, log, size); } +static int nvme_get_effects_log(struct nvme_ctrl *ctrl) +{ + int ret; + + if (!ctrl->effects) + ctrl->effects = kzalloc(sizeof(*ctrl->effects), GFP_KERNEL); + + if (!ctrl->effects) + return 0; + + ret = nvme_get_log(ctrl, NVME_LOG_CMD_EFFECTS, ctrl->effects, + sizeof(*ctrl->effects)); + if (ret) { + kfree(ctrl->effects); + ctrl->effects = NULL; + } + return ret; +} + /* * Initialize the cached copies of the Identify data and various controller * register in our nvme_ctrl structure. This should be called as soon as @@ -1794,6 +1894,12 @@ int nvme_init_identify(struct nvme_ctrl *ctrl) return -EIO; } + if (id->lpa & NVME_CTRL_LPA_CMD_EFFECTS_LOG) { + ret = nvme_get_effects_log(ctrl); + if (ret < 0) + return ret; + } + nvme_init_subnqn(ctrl, id); if (!ctrl->identified) { @@ -2713,6 +2819,7 @@ static void nvme_free_ctrl(struct device *dev) ida_simple_remove(&nvme_instance_ida, ctrl->instance); ida_destroy(&ctrl->ns_ida); + kfree(ctrl->effects); ctrl->ops->free_ctrl(ctrl); } diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index 35d9cee515f1..f7c21cea9485 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -174,6 +174,7 @@ struct nvme_ctrl { bool subsystem; unsigned long quirks; struct nvme_id_power_state psd[32]; + struct nvme_effects_log *effects; struct work_struct scan_work; struct work_struct async_event_work; struct delayed_work ka_work; diff --git a/include/linux/nvme.h b/include/linux/nvme.h index 9310ce77d8e1..fd1d4508a612 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -267,6 +267,7 @@ enum { NVME_CTRL_OACS_SEC_SUPP = 1 << 0, NVME_CTRL_OACS_DIRECTIVES = 1 << 5, NVME_CTRL_OACS_DBBUF_SUPP = 1 << 8, + NVME_CTRL_LPA_CMD_EFFECTS_LOG = 1 << 1, }; struct nvme_lbaf { @@ -395,6 +396,21 @@ struct nvme_fw_slot_info_log { __u8 rsvd64[448]; }; +enum { + NVME_CMD_EFFECTS_CSUPP = 1 << 0, + NVME_CMD_EFFECTS_LBCC = 1 << 1, + NVME_CMD_EFFECTS_NCC = 1 << 2, + NVME_CMD_EFFECTS_NIC = 1 << 3, + NVME_CMD_EFFECTS_CCC = 1 << 4, + NVME_CMD_EFFECTS_CSE_MASK = 3 << 16, +}; + +struct nvme_effects_log { + __le32 acs[256]; + __le32 iocs[256]; + __u8 resv[2048]; +}; + enum { NVME_SMART_CRIT_SPARE = 1 << 0, NVME_SMART_CRIT_TEMPERATURE = 1 << 1, @@ -681,6 +697,7 @@ enum nvme_admin_opcode { nvme_admin_format_nvm = 0x80, nvme_admin_security_send = 0x81, nvme_admin_security_recv = 0x82, + nvme_admin_sanitize_nvm = 0x84, }; enum { @@ -712,6 +729,7 @@ enum { NVME_LOG_ERROR = 0x01, NVME_LOG_SMART = 0x02, NVME_LOG_FW_SLOT = 0x03, + NVME_LOG_CMD_EFFECTS = 0x05, NVME_LOG_DISC = 0x70, NVME_LOG_RESERVATION = 0x80, NVME_FWACT_REPL = (0 << 3), -- cgit v1.2.3 From a47619b5c6ead6c2e10b8114a5e9291a1679a7c3 Mon Sep 17 00:00:00 2001 From: Javier González Date: Wed, 8 Nov 2017 10:59:03 +0100 Subject: nvme: compare NQN string with right size MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Copy subnqns using NVMF_NQN_SIZE as it is < 256 Signed-off-by: Javier González Reviewed-by: Sagi Grimberg Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/nvme/host/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/nvme/host/core.c') diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 65fd2fc1ae3c..8a60cc786220 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -1810,7 +1810,7 @@ static void nvme_init_subnqn(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id) nqnlen = strnlen(id->subnqn, NVMF_NQN_SIZE); if (nqnlen > 0 && nqnlen < NVMF_NQN_SIZE) { - strcpy(ctrl->subnqn, id->subnqn); + strncpy(ctrl->subnqn, id->subnqn, NVMF_NQN_SIZE); return; } -- cgit v1.2.3 From ab083b11f6f43af071f9b507e48d212bfbb2beb3 Mon Sep 17 00:00:00 2001 From: Javier González Date: Wed, 8 Nov 2017 10:59:04 +0100 Subject: nvme: fix eui_show() print format MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix print formatting, but keep the original output to prevent user breakage as suggested by Joe Perches. Signed-off-by: Javier González Reviewed-by: Sagi Grimberg Reviewed-by: Keith Busch Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/nvme/host/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/nvme/host/core.c') diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 8a60cc786220..6c79b73fe2c3 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -2190,7 +2190,7 @@ static ssize_t eui_show(struct device *dev, struct device_attribute *attr, char *buf) { struct nvme_ns *ns = nvme_get_ns_from_dev(dev); - return sprintf(buf, "%8phd\n", ns->eui); + return sprintf(buf, "%8ph\n", ns->eui); } static DEVICE_ATTR(eui, S_IRUGO, eui_show, NULL); -- cgit v1.2.3 From 38dabe210fbab4e7e8a03670ab3ba42f247ea08f Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Tue, 7 Nov 2017 15:13:10 -0700 Subject: nvme: centralize AEN defines All the transports were unnecessarilly duplicating the AEN request accounting. This patch defines everything in one place. Signed-off-by: Keith Busch Reviewed-by: Guan Junxiong Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/nvme/host/core.c | 2 +- drivers/nvme/host/fc.c | 33 ++++++++++++--------------------- drivers/nvme/host/nvme.h | 1 - drivers/nvme/host/pci.c | 16 +++------------- drivers/nvme/host/rdma.c | 14 +++----------- drivers/nvme/target/loop.c | 14 +++----------- include/linux/nvme.h | 8 ++++++++ 7 files changed, 30 insertions(+), 58 deletions(-) (limited to 'drivers/nvme/host/core.c') diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 6c79b73fe2c3..315087dfcd8d 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -2779,7 +2779,7 @@ EXPORT_SYMBOL_GPL(nvme_complete_async_event); void nvme_queue_async_events(struct nvme_ctrl *ctrl) { - ctrl->event_limit = NVME_NR_AERS; + ctrl->event_limit = NVME_NR_AEN_COMMANDS; queue_work(nvme_wq, &ctrl->async_event_work); } EXPORT_SYMBOL_GPL(nvme_queue_async_events); diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index da9296a1eb26..f3bfad47a95f 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -30,15 +30,6 @@ /* *************************** Data Structures/Defines ****************** */ -/* - * We handle AEN commands ourselves and don't even let the - * block layer know about them. - */ -#define NVME_FC_NR_AEN_COMMANDS 1 -#define NVME_FC_AQ_BLKMQ_DEPTH \ - (NVME_AQ_DEPTH - NVME_FC_NR_AEN_COMMANDS) -#define AEN_CMDID_BASE (NVME_FC_AQ_BLKMQ_DEPTH + 1) - enum nvme_fc_queue_flags { NVME_FC_Q_CONNECTED = (1 << 0), }; @@ -170,7 +161,7 @@ struct nvme_fc_ctrl { u32 iocnt; wait_queue_head_t ioabort_wait; - struct nvme_fc_fcp_op aen_ops[NVME_FC_NR_AEN_COMMANDS]; + struct nvme_fc_fcp_op aen_ops[NVME_NR_AEN_COMMANDS]; struct nvme_ctrl ctrl; }; @@ -1546,7 +1537,7 @@ nvme_fc_abort_aen_ops(struct nvme_fc_ctrl *ctrl) unsigned long flags; int i, ret; - for (i = 0; i < NVME_FC_NR_AEN_COMMANDS; i++, aen_op++) { + for (i = 0; i < NVME_NR_AEN_COMMANDS; i++, aen_op++) { if (atomic_read(&aen_op->state) != FCPOP_STATE_ACTIVE) continue; @@ -1816,7 +1807,7 @@ nvme_fc_init_aen_ops(struct nvme_fc_ctrl *ctrl) int i, ret; aen_op = ctrl->aen_ops; - for (i = 0; i < NVME_FC_NR_AEN_COMMANDS; i++, aen_op++) { + for (i = 0; i < NVME_NR_AEN_COMMANDS; i++, aen_op++) { private = kzalloc(ctrl->lport->ops->fcprqst_priv_sz, GFP_KERNEL); if (!private) @@ -1826,7 +1817,7 @@ nvme_fc_init_aen_ops(struct nvme_fc_ctrl *ctrl) sqe = &cmdiu->sqe; ret = __nvme_fc_init_request(ctrl, &ctrl->queues[0], aen_op, (struct request *)NULL, - (AEN_CMDID_BASE + i)); + (NVME_AQ_BLK_MQ_DEPTH + i)); if (ret) { kfree(private); return ret; @@ -1839,7 +1830,7 @@ nvme_fc_init_aen_ops(struct nvme_fc_ctrl *ctrl) memset(sqe, 0, sizeof(*sqe)); sqe->common.opcode = nvme_admin_async_event; /* Note: core layer may overwrite the sqe.command_id value */ - sqe->common.command_id = AEN_CMDID_BASE + i; + sqe->common.command_id = NVME_AQ_BLK_MQ_DEPTH + i; } return 0; } @@ -1851,7 +1842,7 @@ nvme_fc_term_aen_ops(struct nvme_fc_ctrl *ctrl) int i; aen_op = ctrl->aen_ops; - for (i = 0; i < NVME_FC_NR_AEN_COMMANDS; i++, aen_op++) { + for (i = 0; i < NVME_NR_AEN_COMMANDS; i++, aen_op++) { if (!aen_op->fcp_req.private) continue; @@ -2402,7 +2393,7 @@ nvme_fc_submit_async_event(struct nvme_ctrl *arg, int aer_idx) bool terminating = false; blk_status_t ret; - if (aer_idx > NVME_FC_NR_AEN_COMMANDS) + if (aer_idx > NVME_NR_AEN_COMMANDS) return; spin_lock_irqsave(&ctrl->lock, flags); @@ -2722,16 +2713,16 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl) * Create the admin queue */ - nvme_fc_init_queue(ctrl, 0, NVME_FC_AQ_BLKMQ_DEPTH); + nvme_fc_init_queue(ctrl, 0, NVME_AQ_BLK_MQ_DEPTH); ret = __nvme_fc_create_hw_queue(ctrl, &ctrl->queues[0], 0, - NVME_FC_AQ_BLKMQ_DEPTH); + NVME_AQ_BLK_MQ_DEPTH); if (ret) goto out_free_queue; ret = nvme_fc_connect_admin_queue(ctrl, &ctrl->queues[0], - NVME_FC_AQ_BLKMQ_DEPTH, - (NVME_FC_AQ_BLKMQ_DEPTH / 4)); + NVME_AQ_BLK_MQ_DEPTH, + (NVME_AQ_BLK_MQ_DEPTH / 4)); if (ret) goto out_delete_hw_queue; @@ -3145,7 +3136,7 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts, memset(&ctrl->admin_tag_set, 0, sizeof(ctrl->admin_tag_set)); ctrl->admin_tag_set.ops = &nvme_fc_admin_mq_ops; - ctrl->admin_tag_set.queue_depth = NVME_FC_AQ_BLKMQ_DEPTH; + ctrl->admin_tag_set.queue_depth = NVME_AQ_MQ_TAG_DEPTH; ctrl->admin_tag_set.reserved_tags = 2; /* fabric connect + Keep-Alive */ ctrl->admin_tag_set.numa_node = NUMA_NO_NODE; ctrl->admin_tag_set.cmd_size = sizeof(struct nvme_fc_fcp_op) + diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index f7c21cea9485..a6d750cfa6b2 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -313,7 +313,6 @@ void nvme_remove_namespaces(struct nvme_ctrl *ctrl); int nvme_sec_submit(void *data, u16 spsp, u8 secp, void *buffer, size_t len, bool send); -#define NVME_NR_AERS 1 void nvme_complete_async_event(struct nvme_ctrl *ctrl, __le16 status, union nvme_result *res); void nvme_queue_async_events(struct nvme_ctrl *ctrl); diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 32c413ec818c..c3dfd84feef7 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -35,12 +35,6 @@ #define SQ_SIZE(depth) (depth * sizeof(struct nvme_command)) #define CQ_SIZE(depth) (depth * sizeof(struct nvme_completion)) -/* - * We handle AEN commands ourselves and don't even let the - * block layer know about them. - */ -#define NVME_AQ_BLKMQ_DEPTH (NVME_AQ_DEPTH - NVME_NR_AERS) - #define SGES_PER_PAGE (PAGE_SIZE / sizeof(struct nvme_sgl_desc)) static int use_threaded_interrupts; @@ -956,7 +950,7 @@ static inline void nvme_handle_cqe(struct nvme_queue *nvmeq, * for them but rather special case them here. */ if (unlikely(nvmeq->qid == 0 && - cqe->command_id >= NVME_AQ_BLKMQ_DEPTH)) { + cqe->command_id >= NVME_AQ_BLK_MQ_DEPTH)) { nvme_complete_async_event(&nvmeq->dev->ctrl, cqe->status, &cqe->result); return; @@ -1057,7 +1051,7 @@ static void nvme_pci_submit_async_event(struct nvme_ctrl *ctrl, int aer_idx) memset(&c, 0, sizeof(c)); c.common.opcode = nvme_admin_async_event; - c.common.command_id = NVME_AQ_BLKMQ_DEPTH + aer_idx; + c.common.command_id = NVME_AQ_BLK_MQ_DEPTH + aer_idx; spin_lock_irq(&nvmeq->q_lock); __nvme_submit_cmd(nvmeq, &c); @@ -1524,11 +1518,7 @@ static int nvme_alloc_admin_tags(struct nvme_dev *dev) dev->admin_tagset.ops = &nvme_mq_admin_ops; dev->admin_tagset.nr_hw_queues = 1; - /* - * Subtract one to leave an empty queue entry for 'Full Queue' - * condition. See NVM-Express 1.2 specification, section 4.1.2. - */ - dev->admin_tagset.queue_depth = NVME_AQ_BLKMQ_DEPTH - 1; + dev->admin_tagset.queue_depth = NVME_AQ_MQ_TAG_DEPTH; dev->admin_tagset.timeout = ADMIN_TIMEOUT; dev->admin_tagset.numa_node = dev_to_node(dev->dev); dev->admin_tagset.cmd_size = nvme_pci_cmd_size(dev, false); diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index 32e21ab1ae52..008791bbdfb3 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -41,14 +41,6 @@ #define NVME_RDMA_MAX_INLINE_SEGMENTS 1 -/* - * We handle AEN commands ourselves and don't even let the - * block layer know about them. - */ -#define NVME_RDMA_NR_AEN_COMMANDS 1 -#define NVME_RDMA_AQ_BLKMQ_DEPTH \ - (NVME_AQ_DEPTH - NVME_RDMA_NR_AEN_COMMANDS) - struct nvme_rdma_device { struct ib_device *dev; struct ib_pd *pd; @@ -690,7 +682,7 @@ static struct blk_mq_tag_set *nvme_rdma_alloc_tagset(struct nvme_ctrl *nctrl, set = &ctrl->admin_tag_set; memset(set, 0, sizeof(*set)); set->ops = &nvme_rdma_admin_mq_ops; - set->queue_depth = NVME_RDMA_AQ_BLKMQ_DEPTH; + set->queue_depth = NVME_AQ_MQ_TAG_DEPTH; set->reserved_tags = 2; /* connect + keep-alive */ set->numa_node = NUMA_NO_NODE; set->cmd_size = sizeof(struct nvme_rdma_request) + @@ -1318,7 +1310,7 @@ static void nvme_rdma_submit_async_event(struct nvme_ctrl *arg, int aer_idx) memset(cmd, 0, sizeof(*cmd)); cmd->common.opcode = nvme_admin_async_event; - cmd->common.command_id = NVME_RDMA_AQ_BLKMQ_DEPTH; + cmd->common.command_id = NVME_AQ_BLK_MQ_DEPTH; cmd->common.flags |= NVME_CMD_SGL_METABUF; nvme_rdma_set_sg_null(cmd); @@ -1380,7 +1372,7 @@ static int __nvme_rdma_recv_done(struct ib_cq *cq, struct ib_wc *wc, int tag) * for them but rather special case them here. */ if (unlikely(nvme_rdma_queue_idx(queue) == 0 && - cqe->command_id >= NVME_RDMA_AQ_BLKMQ_DEPTH)) + cqe->command_id >= NVME_AQ_BLK_MQ_DEPTH)) nvme_complete_async_event(&queue->ctrl->ctrl, cqe->status, &cqe->result); else diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c index bc95c6ed531a..7258b796f209 100644 --- a/drivers/nvme/target/loop.c +++ b/drivers/nvme/target/loop.c @@ -23,14 +23,6 @@ #define NVME_LOOP_MAX_SEGMENTS 256 -/* - * We handle AEN commands ourselves and don't even let the - * block layer know about them. - */ -#define NVME_LOOP_NR_AEN_COMMANDS 1 -#define NVME_LOOP_AQ_BLKMQ_DEPTH \ - (NVME_AQ_DEPTH - NVME_LOOP_NR_AEN_COMMANDS) - struct nvme_loop_iod { struct nvme_request nvme_req; struct nvme_command cmd; @@ -112,7 +104,7 @@ static void nvme_loop_queue_response(struct nvmet_req *req) * for them but rather special case them here. */ if (unlikely(nvme_loop_queue_idx(queue) == 0 && - cqe->command_id >= NVME_LOOP_AQ_BLKMQ_DEPTH)) { + cqe->command_id >= NVME_AQ_BLK_MQ_DEPTH)) { nvme_complete_async_event(&queue->ctrl->ctrl, cqe->status, &cqe->result); } else { @@ -200,7 +192,7 @@ static void nvme_loop_submit_async_event(struct nvme_ctrl *arg, int aer_idx) memset(&iod->cmd, 0, sizeof(iod->cmd)); iod->cmd.common.opcode = nvme_admin_async_event; - iod->cmd.common.command_id = NVME_LOOP_AQ_BLKMQ_DEPTH; + iod->cmd.common.command_id = NVME_AQ_BLK_MQ_DEPTH; iod->cmd.common.flags |= NVME_CMD_SGL_METABUF; if (!nvmet_req_init(&iod->req, &queue->nvme_cq, &queue->nvme_sq, @@ -356,7 +348,7 @@ static int nvme_loop_configure_admin_queue(struct nvme_loop_ctrl *ctrl) memset(&ctrl->admin_tag_set, 0, sizeof(ctrl->admin_tag_set)); ctrl->admin_tag_set.ops = &nvme_loop_admin_mq_ops; - ctrl->admin_tag_set.queue_depth = NVME_LOOP_AQ_BLKMQ_DEPTH; + ctrl->admin_tag_set.queue_depth = NVME_AQ_MQ_TAG_DEPTH; ctrl->admin_tag_set.reserved_tags = 2; /* connect + keep-alive */ ctrl->admin_tag_set.numa_node = NUMA_NO_NODE; ctrl->admin_tag_set.cmd_size = sizeof(struct nvme_loop_iod) + diff --git a/include/linux/nvme.h b/include/linux/nvme.h index fd1d4508a612..89ffa7eed2fd 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -90,6 +90,14 @@ enum { }; #define NVME_AQ_DEPTH 32 +#define NVME_NR_AEN_COMMANDS 1 +#define NVME_AQ_BLK_MQ_DEPTH (NVME_AQ_DEPTH - NVME_NR_AEN_COMMANDS) + +/* + * Subtract one to leave an empty queue entry for 'Full Queue' condition. See + * NVM-Express 1.2 specification, section 4.1.2. + */ +#define NVME_AQ_MQ_TAG_DEPTH (NVME_AQ_BLK_MQ_DEPTH - 1) enum { NVME_REG_CAP = 0x0000, /* Controller Capabilities */ -- cgit v1.2.3 From ad22c355b707a8d8d48e282aadc01c0b0604b2e9 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Tue, 7 Nov 2017 15:13:12 -0700 Subject: nvme: remove handling of multiple AEN requests The driver can handle tracking only one AEN request, so this patch removes handling for multiple ones. Reviewed-by: Christoph Hellwig Reviewed-by: James Smart Signed-off-by: Keith Busch Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/nvme/host/core.c | 28 +++------------------------- drivers/nvme/host/fc.c | 9 +++------ drivers/nvme/host/nvme.h | 3 +-- drivers/nvme/host/pci.c | 4 ++-- drivers/nvme/host/rdma.c | 5 +---- drivers/nvme/target/loop.c | 2 +- 6 files changed, 11 insertions(+), 40 deletions(-) (limited to 'drivers/nvme/host/core.c') diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 315087dfcd8d..dedbf12847b6 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -2670,15 +2670,7 @@ static void nvme_async_event_work(struct work_struct *work) struct nvme_ctrl *ctrl = container_of(work, struct nvme_ctrl, async_event_work); - spin_lock_irq(&ctrl->lock); - while (ctrl->state == NVME_CTRL_LIVE && ctrl->event_limit > 0) { - int aer_idx = --ctrl->event_limit; - - spin_unlock_irq(&ctrl->lock); - ctrl->ops->submit_async_event(ctrl, aer_idx); - spin_lock_irq(&ctrl->lock); - } - spin_unlock_irq(&ctrl->lock); + ctrl->ops->submit_async_event(ctrl); } static bool nvme_ctrl_pp_status(struct nvme_ctrl *ctrl) @@ -2745,22 +2737,8 @@ void nvme_complete_async_event(struct nvme_ctrl *ctrl, __le16 status, union nvme_result *res) { u32 result = le32_to_cpu(res->u32); - bool done = true; - switch (le16_to_cpu(status) >> 1) { - case NVME_SC_SUCCESS: - done = false; - /*FALLTHRU*/ - case NVME_SC_ABORT_REQ: - ++ctrl->event_limit; - if (ctrl->state == NVME_CTRL_LIVE) - queue_work(nvme_wq, &ctrl->async_event_work); - break; - default: - break; - } - - if (done) + if (le16_to_cpu(status) >> 1 != NVME_SC_SUCCESS) return; switch (result & 0xff07) { @@ -2774,12 +2752,12 @@ void nvme_complete_async_event(struct nvme_ctrl *ctrl, __le16 status, default: dev_warn(ctrl->device, "async event result %08x\n", result); } + queue_work(nvme_wq, &ctrl->async_event_work); } EXPORT_SYMBOL_GPL(nvme_complete_async_event); void nvme_queue_async_events(struct nvme_ctrl *ctrl) { - ctrl->event_limit = NVME_NR_AEN_COMMANDS; queue_work(nvme_wq, &ctrl->async_event_work); } EXPORT_SYMBOL_GPL(nvme_queue_async_events); diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index 3aa595029192..6eb460b117d6 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -2382,7 +2382,7 @@ nvme_fc_poll(struct blk_mq_hw_ctx *hctx, unsigned int tag) } static void -nvme_fc_submit_async_event(struct nvme_ctrl *arg, int aer_idx) +nvme_fc_submit_async_event(struct nvme_ctrl *arg) { struct nvme_fc_ctrl *ctrl = to_fc_ctrl(arg); struct nvme_fc_fcp_op *aen_op; @@ -2390,9 +2390,6 @@ nvme_fc_submit_async_event(struct nvme_ctrl *arg, int aer_idx) bool terminating = false; blk_status_t ret; - if (aer_idx > NVME_NR_AEN_COMMANDS) - return; - spin_lock_irqsave(&ctrl->lock, flags); if (ctrl->flags & FCCTRL_TERMIO) terminating = true; @@ -2401,13 +2398,13 @@ nvme_fc_submit_async_event(struct nvme_ctrl *arg, int aer_idx) if (terminating) return; - aen_op = &ctrl->aen_ops[aer_idx]; + aen_op = &ctrl->aen_ops[0]; ret = nvme_fc_start_fcp_op(ctrl, aen_op->queue, aen_op, 0, NVMEFC_FCP_NODATA); if (ret) dev_err(ctrl->ctrl.device, - "failed async event work [%d]\n", aer_idx); + "failed async event work\n"); } static void diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index a6d750cfa6b2..b55c97ecea31 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -162,7 +162,6 @@ struct nvme_ctrl { u16 nssa; u16 nr_streams; atomic_t abort_limit; - u8 event_limit; u8 vwc; u32 vs; u32 sgls; @@ -237,7 +236,7 @@ struct nvme_ctrl_ops { int (*reg_write32)(struct nvme_ctrl *ctrl, u32 off, u32 val); int (*reg_read64)(struct nvme_ctrl *ctrl, u32 off, u64 *val); void (*free_ctrl)(struct nvme_ctrl *ctrl); - void (*submit_async_event)(struct nvme_ctrl *ctrl, int aer_idx); + void (*submit_async_event)(struct nvme_ctrl *ctrl); void (*delete_ctrl)(struct nvme_ctrl *ctrl); int (*get_address)(struct nvme_ctrl *ctrl, char *buf, int size); int (*reinit_request)(void *data, struct request *rq); diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index c3dfd84feef7..429d56f1a19e 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -1043,7 +1043,7 @@ static int nvme_poll(struct blk_mq_hw_ctx *hctx, unsigned int tag) return __nvme_poll(nvmeq, tag); } -static void nvme_pci_submit_async_event(struct nvme_ctrl *ctrl, int aer_idx) +static void nvme_pci_submit_async_event(struct nvme_ctrl *ctrl) { struct nvme_dev *dev = to_nvme_dev(ctrl); struct nvme_queue *nvmeq = dev->queues[0]; @@ -1051,7 +1051,7 @@ static void nvme_pci_submit_async_event(struct nvme_ctrl *ctrl, int aer_idx) memset(&c, 0, sizeof(c)); c.common.opcode = nvme_admin_async_event; - c.common.command_id = NVME_AQ_BLK_MQ_DEPTH + aer_idx; + c.common.command_id = NVME_AQ_BLK_MQ_DEPTH; spin_lock_irq(&nvmeq->q_lock); __nvme_submit_cmd(nvmeq, &c); diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index 008791bbdfb3..c8d854474a5b 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -1293,7 +1293,7 @@ static struct blk_mq_tags *nvme_rdma_tagset(struct nvme_rdma_queue *queue) return queue->ctrl->tag_set.tags[queue_idx - 1]; } -static void nvme_rdma_submit_async_event(struct nvme_ctrl *arg, int aer_idx) +static void nvme_rdma_submit_async_event(struct nvme_ctrl *arg) { struct nvme_rdma_ctrl *ctrl = to_rdma_ctrl(arg); struct nvme_rdma_queue *queue = &ctrl->queues[0]; @@ -1303,9 +1303,6 @@ static void nvme_rdma_submit_async_event(struct nvme_ctrl *arg, int aer_idx) struct ib_sge sge; int ret; - if (WARN_ON_ONCE(aer_idx != 0)) - return; - ib_dma_sync_single_for_cpu(dev, sqe->dma, sizeof(*cmd), DMA_TO_DEVICE); memset(cmd, 0, sizeof(*cmd)); diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c index 7258b796f209..f40e70eb4a38 100644 --- a/drivers/nvme/target/loop.c +++ b/drivers/nvme/target/loop.c @@ -184,7 +184,7 @@ static blk_status_t nvme_loop_queue_rq(struct blk_mq_hw_ctx *hctx, return BLK_STS_OK; } -static void nvme_loop_submit_async_event(struct nvme_ctrl *arg, int aer_idx) +static void nvme_loop_submit_async_event(struct nvme_ctrl *arg) { struct nvme_loop_ctrl *ctrl = to_loop_ctrl(arg); struct nvme_loop_queue *queue = &ctrl->queues[0]; -- cgit v1.2.3 From d99ca609a1b55f87a5e62a11ed70e4d091d815f0 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Tue, 7 Nov 2017 15:13:13 -0700 Subject: nvme: unexport starting async event work Async event work is for core use only and should not be called directly from drivers. Reviewed-by: Christoph Hellwig Signed-off-by: Keith Busch Reviewed-by: Guan Junxiong Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/nvme/host/core.c | 8 +------- drivers/nvme/host/nvme.h | 1 - 2 files changed, 1 insertion(+), 8 deletions(-) (limited to 'drivers/nvme/host/core.c') diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index dedbf12847b6..c135d0aeebd7 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -2756,12 +2756,6 @@ void nvme_complete_async_event(struct nvme_ctrl *ctrl, __le16 status, } EXPORT_SYMBOL_GPL(nvme_complete_async_event); -void nvme_queue_async_events(struct nvme_ctrl *ctrl) -{ - queue_work(nvme_wq, &ctrl->async_event_work); -} -EXPORT_SYMBOL_GPL(nvme_queue_async_events); - void nvme_stop_ctrl(struct nvme_ctrl *ctrl) { nvme_stop_keep_alive(ctrl); @@ -2778,7 +2772,7 @@ void nvme_start_ctrl(struct nvme_ctrl *ctrl) if (ctrl->queue_count > 1) { nvme_queue_scan(ctrl); - nvme_queue_async_events(ctrl); + queue_work(nvme_wq, &ctrl->async_event_work); nvme_start_queues(ctrl); } } diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index b55c97ecea31..151062573ece 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -314,7 +314,6 @@ int nvme_sec_submit(void *data, u16 spsp, u8 secp, void *buffer, size_t len, void nvme_complete_async_event(struct nvme_ctrl *ctrl, __le16 status, union nvme_result *res); -void nvme_queue_async_events(struct nvme_ctrl *ctrl); void nvme_stop_queues(struct nvme_ctrl *ctrl); void nvme_start_queues(struct nvme_ctrl *ctrl); -- cgit v1.2.3 From e3d7874dcf175cca2dca7795d6453f637ad8ba9b Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Tue, 7 Nov 2017 15:13:14 -0700 Subject: nvme: send uevent for some asynchronous events This will give udev a chance to observe and handle asynchronous event notifications and clear the log to unmask future events of the same type. The driver will create a change uevent of the asyncronuos event result before submitting the next AEN request to the device if a completed AEN event is of type error, smart, command set or vendor specific, Signed-off-by: Keith Busch Reviewed-by: Guan Junxiong Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/nvme/host/core.c | 28 ++++++++++++++++++++++++++++ drivers/nvme/host/nvme.h | 1 + include/linux/nvme.h | 4 ++++ 3 files changed, 33 insertions(+) (limited to 'drivers/nvme/host/core.c') diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index c135d0aeebd7..683d890d73fa 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -2665,11 +2665,28 @@ void nvme_remove_namespaces(struct nvme_ctrl *ctrl) } EXPORT_SYMBOL_GPL(nvme_remove_namespaces); +static void nvme_aen_uevent(struct nvme_ctrl *ctrl) +{ + char *envp[2] = { NULL, NULL }; + u32 aen_result = ctrl->aen_result; + + ctrl->aen_result = 0; + if (!aen_result) + return; + + envp[0] = kasprintf(GFP_KERNEL, "NVME_AEN=%#08x", aen_result); + if (!envp[0]) + return; + kobject_uevent_env(&ctrl->device->kobj, KOBJ_CHANGE, envp); + kfree(envp[0]); +} + static void nvme_async_event_work(struct work_struct *work) { struct nvme_ctrl *ctrl = container_of(work, struct nvme_ctrl, async_event_work); + nvme_aen_uevent(ctrl); ctrl->ops->submit_async_event(ctrl); } @@ -2741,6 +2758,17 @@ void nvme_complete_async_event(struct nvme_ctrl *ctrl, __le16 status, if (le16_to_cpu(status) >> 1 != NVME_SC_SUCCESS) return; + switch (result & 0x7) { + case NVME_AER_ERROR: + case NVME_AER_SMART: + case NVME_AER_CSS: + case NVME_AER_VS: + ctrl->aen_result = result; + break; + default: + break; + } + switch (result & 0xff07) { case NVME_AER_NOTICE_NS_CHANGED: dev_info(ctrl->device, "rescanning\n"); diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index 151062573ece..7b9cc7d616b7 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -168,6 +168,7 @@ struct nvme_ctrl { u16 kas; u8 npss; u8 apsta; + u32 aen_result; unsigned int shutdown_timeout; unsigned int kato; bool subsystem; diff --git a/include/linux/nvme.h b/include/linux/nvme.h index 89ffa7eed2fd..aea87f0d917b 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -428,6 +428,10 @@ enum { }; enum { + NVME_AER_ERROR = 0, + NVME_AER_SMART = 1, + NVME_AER_CSS = 6, + NVME_AER_VS = 7, NVME_AER_NOTICE_NS_CHANGED = 0x0002, NVME_AER_NOTICE_FW_ACT_STARTING = 0x0102, }; -- cgit v1.2.3 From 9a95e4ef709533efac4aafcb8bddf73f96db50ed Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Thu, 9 Nov 2017 10:49:59 -0800 Subject: block, nvme: Introduce blk_mq_req_flags_t Several block layer and NVMe core functions accept a combination of BLK_MQ_REQ_* flags through the 'flags' argument but there is no verification at compile time whether the right type of block layer flags is passed. Make it possible for sparse to verify this. This patch does not change any functionality. Signed-off-by: Bart Van Assche Reviewed-by: Hannes Reinecke Tested-by: Oleksandr Natalenko Cc: linux-nvme@lists.infradead.org Cc: Christoph Hellwig Cc: Johannes Thumshirn Cc: Ming Lei Signed-off-by: Jens Axboe --- block/blk-core.c | 12 ++++++------ block/blk-mq.c | 4 ++-- block/blk-mq.h | 2 +- drivers/nvme/host/core.c | 5 +++-- drivers/nvme/host/nvme.h | 5 +++-- include/linux/blk-mq.h | 17 +++++++++++------ include/linux/blk_types.h | 2 ++ include/linux/blkdev.h | 4 ++-- 8 files changed, 30 insertions(+), 21 deletions(-) (limited to 'drivers/nvme/host/core.c') diff --git a/block/blk-core.c b/block/blk-core.c index 29b08428ae45..7c54c195e79e 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -801,7 +801,7 @@ EXPORT_SYMBOL(blk_alloc_queue); * @q: request queue pointer * @flags: BLK_MQ_REQ_NOWAIT and/or BLK_MQ_REQ_PREEMPT */ -int blk_queue_enter(struct request_queue *q, unsigned int flags) +int blk_queue_enter(struct request_queue *q, blk_mq_req_flags_t flags) { const bool preempt = flags & BLK_MQ_REQ_PREEMPT; @@ -1225,7 +1225,7 @@ int blk_update_nr_requests(struct request_queue *q, unsigned int nr) * Returns request pointer on success, with @q->queue_lock *not held*. */ static struct request *__get_request(struct request_list *rl, unsigned int op, - struct bio *bio, unsigned int flags) + struct bio *bio, blk_mq_req_flags_t flags) { struct request_queue *q = rl->q; struct request *rq; @@ -1408,7 +1408,7 @@ rq_starved: * Returns request pointer on success, with @q->queue_lock *not held*. */ static struct request *get_request(struct request_queue *q, unsigned int op, - struct bio *bio, unsigned int flags) + struct bio *bio, blk_mq_req_flags_t flags) { const bool is_sync = op_is_sync(op); DEFINE_WAIT(wait); @@ -1458,7 +1458,7 @@ retry: /* flags: BLK_MQ_REQ_PREEMPT and/or BLK_MQ_REQ_NOWAIT. */ static struct request *blk_old_get_request(struct request_queue *q, - unsigned int op, unsigned int flags) + unsigned int op, blk_mq_req_flags_t flags) { struct request *rq; gfp_t gfp_mask = flags & BLK_MQ_REQ_NOWAIT ? GFP_ATOMIC : @@ -1495,7 +1495,7 @@ static struct request *blk_old_get_request(struct request_queue *q, * @flags: BLK_MQ_REQ_* flags, e.g. BLK_MQ_REQ_NOWAIT. */ struct request *blk_get_request_flags(struct request_queue *q, unsigned int op, - unsigned int flags) + blk_mq_req_flags_t flags) { struct request *req; @@ -2291,7 +2291,7 @@ blk_qc_t generic_make_request(struct bio *bio) current->bio_list = bio_list_on_stack; do { struct request_queue *q = bio->bi_disk->queue; - unsigned int flags = bio->bi_opf & REQ_NOWAIT ? + blk_mq_req_flags_t flags = bio->bi_opf & REQ_NOWAIT ? BLK_MQ_REQ_NOWAIT : 0; if (likely(blk_queue_enter(q, flags) == 0)) { diff --git a/block/blk-mq.c b/block/blk-mq.c index 211bc8a3e2cc..bfe24a5b62a3 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -383,7 +383,7 @@ static struct request *blk_mq_get_request(struct request_queue *q, } struct request *blk_mq_alloc_request(struct request_queue *q, unsigned int op, - unsigned int flags) + blk_mq_req_flags_t flags) { struct blk_mq_alloc_data alloc_data = { .flags = flags }; struct request *rq; @@ -409,7 +409,7 @@ struct request *blk_mq_alloc_request(struct request_queue *q, unsigned int op, EXPORT_SYMBOL(blk_mq_alloc_request); struct request *blk_mq_alloc_request_hctx(struct request_queue *q, - unsigned int op, unsigned int flags, unsigned int hctx_idx) + unsigned int op, blk_mq_req_flags_t flags, unsigned int hctx_idx) { struct blk_mq_alloc_data alloc_data = { .flags = flags }; struct request *rq; diff --git a/block/blk-mq.h b/block/blk-mq.h index 2502f40ccdc0..99a19c5523e2 100644 --- a/block/blk-mq.h +++ b/block/blk-mq.h @@ -111,7 +111,7 @@ static inline void blk_mq_put_ctx(struct blk_mq_ctx *ctx) struct blk_mq_alloc_data { /* input parameter */ struct request_queue *q; - unsigned int flags; + blk_mq_req_flags_t flags; unsigned int shallow_depth; /* input & output parameter */ diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 683d890d73fa..878d5c09d15b 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -305,7 +305,7 @@ static void nvme_put_ns(struct nvme_ns *ns) } struct request *nvme_alloc_request(struct request_queue *q, - struct nvme_command *cmd, unsigned int flags, int qid) + struct nvme_command *cmd, blk_mq_req_flags_t flags, int qid) { unsigned op = nvme_is_write(cmd) ? REQ_OP_DRV_OUT : REQ_OP_DRV_IN; struct request *req; @@ -573,7 +573,8 @@ EXPORT_SYMBOL_GPL(nvme_setup_cmd); */ int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd, union nvme_result *result, void *buffer, unsigned bufflen, - unsigned timeout, int qid, int at_head, int flags) + unsigned timeout, int qid, int at_head, + blk_mq_req_flags_t flags) { struct request *req; int ret; diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index 7b9cc7d616b7..23b8504ace7f 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -327,14 +327,15 @@ int nvme_reinit_tagset(struct nvme_ctrl *ctrl, struct blk_mq_tag_set *set); #define NVME_QID_ANY -1 struct request *nvme_alloc_request(struct request_queue *q, - struct nvme_command *cmd, unsigned int flags, int qid); + struct nvme_command *cmd, blk_mq_req_flags_t flags, int qid); blk_status_t nvme_setup_cmd(struct nvme_ns *ns, struct request *req, struct nvme_command *cmd); int nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd, void *buf, unsigned bufflen); int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd, union nvme_result *result, void *buffer, unsigned bufflen, - unsigned timeout, int qid, int at_head, int flags); + unsigned timeout, int qid, int at_head, + blk_mq_req_flags_t flags); int nvme_set_queue_count(struct nvme_ctrl *ctrl, int *count); void nvme_start_keep_alive(struct nvme_ctrl *ctrl); void nvme_stop_keep_alive(struct nvme_ctrl *ctrl); diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 82b56609736a..b326208277ee 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -209,16 +209,21 @@ void blk_mq_free_request(struct request *rq); bool blk_mq_can_queue(struct blk_mq_hw_ctx *); enum { - BLK_MQ_REQ_NOWAIT = (1 << 0), /* return when out of requests */ - BLK_MQ_REQ_RESERVED = (1 << 1), /* allocate from reserved pool */ - BLK_MQ_REQ_INTERNAL = (1 << 2), /* allocate internal/sched tag */ - BLK_MQ_REQ_PREEMPT = (1 << 3), /* set RQF_PREEMPT */ + /* return when out of requests */ + BLK_MQ_REQ_NOWAIT = (__force blk_mq_req_flags_t)(1 << 0), + /* allocate from reserved pool */ + BLK_MQ_REQ_RESERVED = (__force blk_mq_req_flags_t)(1 << 1), + /* allocate internal/sched tag */ + BLK_MQ_REQ_INTERNAL = (__force blk_mq_req_flags_t)(1 << 2), + /* set RQF_PREEMPT */ + BLK_MQ_REQ_PREEMPT = (__force blk_mq_req_flags_t)(1 << 3), }; struct request *blk_mq_alloc_request(struct request_queue *q, unsigned int op, - unsigned int flags); + blk_mq_req_flags_t flags); struct request *blk_mq_alloc_request_hctx(struct request_queue *q, - unsigned int op, unsigned int flags, unsigned int hctx_idx); + unsigned int op, blk_mq_req_flags_t flags, + unsigned int hctx_idx); struct request *blk_mq_tag_to_rq(struct blk_mq_tags *tags, unsigned int tag); enum { diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index 1b04085255fb..13ccfc9b210a 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -162,6 +162,8 @@ struct bio { */ #define BIO_RESET_BITS BVEC_POOL_OFFSET +typedef __u32 __bitwise blk_mq_req_flags_t; + /* * Operations and flags common to the bio and request structures. * We use 8 bits for encoding the operation, and the remaining 24 for flags. diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 402c9d536ae1..e80ea1d31343 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -935,7 +935,7 @@ extern void blk_put_request(struct request *); extern void __blk_put_request(struct request_queue *, struct request *); extern struct request *blk_get_request_flags(struct request_queue *, unsigned int op, - unsigned int flags); + blk_mq_req_flags_t flags); extern struct request *blk_get_request(struct request_queue *, unsigned int op, gfp_t gfp_mask); extern void blk_requeue_request(struct request_queue *, struct request *); @@ -959,7 +959,7 @@ extern int scsi_cmd_ioctl(struct request_queue *, struct gendisk *, fmode_t, extern int sg_scsi_ioctl(struct request_queue *, struct gendisk *, fmode_t, struct scsi_ioctl_command __user *); -extern int blk_queue_enter(struct request_queue *q, unsigned int flags); +extern int blk_queue_enter(struct request_queue *q, blk_mq_req_flags_t flags); extern void blk_queue_exit(struct request_queue *q); extern void blk_start_queue(struct request_queue *q); extern void blk_start_queue_async(struct request_queue *q); -- cgit v1.2.3 From ab9e00cc72fa4c6eb6370757b9e94c4645e91d5d Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 9 Nov 2017 13:48:55 +0100 Subject: nvme: track subsystems This adds a new nvme_subsystem structure so that we can track multiple controllers that belong to a single subsystem. For now we only use it to store the NQN, and to check that we don't have duplicate NQNs unless the involved subsystems support multiple controllers. Includes code originally from Hannes Reinecke to expose the subsystems in sysfs. Signed-off-by: Christoph Hellwig Reviewed-by: Keith Busch Reviewed-by: Martin K. Petersen Reviewed-by: Hannes Reinecke Signed-off-by: Jens Axboe --- drivers/nvme/host/core.c | 210 ++++++++++++++++++++++++++++++++++++++------ drivers/nvme/host/fabrics.c | 4 +- drivers/nvme/host/nvme.h | 27 ++++-- 3 files changed, 205 insertions(+), 36 deletions(-) (limited to 'drivers/nvme/host/core.c') diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 878d5c09d15b..78dc6f624d52 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -68,9 +68,14 @@ MODULE_PARM_DESC(streams, "turn on support for Streams write directives"); struct workqueue_struct *nvme_wq; EXPORT_SYMBOL_GPL(nvme_wq); +static DEFINE_IDA(nvme_subsystems_ida); +static LIST_HEAD(nvme_subsystems); +static DEFINE_MUTEX(nvme_subsystems_lock); + static DEFINE_IDA(nvme_instance_ida); static dev_t nvme_chr_devt; static struct class *nvme_class; +static struct class *nvme_subsys_class; static void nvme_ns_remove(struct nvme_ns *ns); static int nvme_revalidate_disk(struct gendisk *disk); @@ -1804,14 +1809,15 @@ static bool quirk_matches(const struct nvme_id_ctrl *id, string_matches(id->fr, q->fr, sizeof(id->fr)); } -static void nvme_init_subnqn(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id) +static void nvme_init_subnqn(struct nvme_subsystem *subsys, struct nvme_ctrl *ctrl, + struct nvme_id_ctrl *id) { size_t nqnlen; int off; nqnlen = strnlen(id->subnqn, NVMF_NQN_SIZE); if (nqnlen > 0 && nqnlen < NVMF_NQN_SIZE) { - strncpy(ctrl->subnqn, id->subnqn, NVMF_NQN_SIZE); + strncpy(subsys->subnqn, id->subnqn, NVMF_NQN_SIZE); return; } @@ -1819,14 +1825,140 @@ static void nvme_init_subnqn(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id) dev_warn(ctrl->device, "missing or invalid SUBNQN field.\n"); /* Generate a "fake" NQN per Figure 254 in NVMe 1.3 + ECN 001 */ - off = snprintf(ctrl->subnqn, NVMF_NQN_SIZE, + off = snprintf(subsys->subnqn, NVMF_NQN_SIZE, "nqn.2014.08.org.nvmexpress:%4x%4x", le16_to_cpu(id->vid), le16_to_cpu(id->ssvid)); - memcpy(ctrl->subnqn + off, id->sn, sizeof(id->sn)); + memcpy(subsys->subnqn + off, id->sn, sizeof(id->sn)); off += sizeof(id->sn); - memcpy(ctrl->subnqn + off, id->mn, sizeof(id->mn)); + memcpy(subsys->subnqn + off, id->mn, sizeof(id->mn)); off += sizeof(id->mn); - memset(ctrl->subnqn + off, 0, sizeof(ctrl->subnqn) - off); + memset(subsys->subnqn + off, 0, sizeof(subsys->subnqn) - off); +} + +static void __nvme_release_subsystem(struct nvme_subsystem *subsys) +{ + ida_simple_remove(&nvme_subsystems_ida, subsys->instance); + kfree(subsys); +} + +static void nvme_release_subsystem(struct device *dev) +{ + __nvme_release_subsystem(container_of(dev, struct nvme_subsystem, dev)); +} + +static void nvme_destroy_subsystem(struct kref *ref) +{ + struct nvme_subsystem *subsys = + container_of(ref, struct nvme_subsystem, ref); + + mutex_lock(&nvme_subsystems_lock); + list_del(&subsys->entry); + mutex_unlock(&nvme_subsystems_lock); + + device_del(&subsys->dev); + put_device(&subsys->dev); +} + +static void nvme_put_subsystem(struct nvme_subsystem *subsys) +{ + kref_put(&subsys->ref, nvme_destroy_subsystem); +} + +static struct nvme_subsystem *__nvme_find_get_subsystem(const char *subsysnqn) +{ + struct nvme_subsystem *subsys; + + lockdep_assert_held(&nvme_subsystems_lock); + + list_for_each_entry(subsys, &nvme_subsystems, entry) { + if (strcmp(subsys->subnqn, subsysnqn)) + continue; + if (!kref_get_unless_zero(&subsys->ref)) + continue; + return subsys; + } + + return NULL; +} + +static int nvme_init_subsystem(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id) +{ + struct nvme_subsystem *subsys, *found; + int ret; + + subsys = kzalloc(sizeof(*subsys), GFP_KERNEL); + if (!subsys) + return -ENOMEM; + ret = ida_simple_get(&nvme_subsystems_ida, 0, 0, GFP_KERNEL); + if (ret < 0) { + kfree(subsys); + return ret; + } + subsys->instance = ret; + mutex_init(&subsys->lock); + kref_init(&subsys->ref); + INIT_LIST_HEAD(&subsys->ctrls); + nvme_init_subnqn(subsys, ctrl, id); + memcpy(subsys->serial, id->sn, sizeof(subsys->serial)); + memcpy(subsys->model, id->mn, sizeof(subsys->model)); + memcpy(subsys->firmware_rev, id->fr, sizeof(subsys->firmware_rev)); + subsys->vendor_id = le16_to_cpu(id->vid); + subsys->cmic = id->cmic; + + subsys->dev.class = nvme_subsys_class; + subsys->dev.release = nvme_release_subsystem; + dev_set_name(&subsys->dev, "nvme-subsys%d", subsys->instance); + device_initialize(&subsys->dev); + + mutex_lock(&nvme_subsystems_lock); + found = __nvme_find_get_subsystem(subsys->subnqn); + if (found) { + /* + * Verify that the subsystem actually supports multiple + * controllers, else bail out. + */ + if (!(id->cmic & (1 << 1))) { + dev_err(ctrl->device, + "ignoring ctrl due to duplicate subnqn (%s).\n", + found->subnqn); + nvme_put_subsystem(found); + ret = -EINVAL; + goto out_unlock; + } + + __nvme_release_subsystem(subsys); + subsys = found; + } else { + ret = device_add(&subsys->dev); + if (ret) { + dev_err(ctrl->device, + "failed to register subsystem device.\n"); + goto out_unlock; + } + list_add_tail(&subsys->entry, &nvme_subsystems); + } + + ctrl->subsys = subsys; + mutex_unlock(&nvme_subsystems_lock); + + if (sysfs_create_link(&subsys->dev.kobj, &ctrl->device->kobj, + dev_name(ctrl->device))) { + dev_err(ctrl->device, + "failed to create sysfs link from subsystem.\n"); + /* the transport driver will eventually put the subsystem */ + return -EINVAL; + } + + mutex_lock(&subsys->lock); + list_add_tail(&ctrl->subsys_entry, &subsys->ctrls); + mutex_unlock(&subsys->lock); + + return 0; + +out_unlock: + mutex_unlock(&nvme_subsystems_lock); + put_device(&subsys->dev); + return ret; } static int nvme_get_log(struct nvme_ctrl *ctrl, u8 log_page, void *log, @@ -1901,9 +2033,13 @@ int nvme_init_identify(struct nvme_ctrl *ctrl) return ret; } - nvme_init_subnqn(ctrl, id); - if (!ctrl->identified) { + int i; + + ret = nvme_init_subsystem(ctrl, id); + if (ret) + goto out_free; + /* * Check for quirks. Quirk can depend on firmware version, * so, in principle, the set of quirks present can change @@ -1912,9 +2048,6 @@ int nvme_init_identify(struct nvme_ctrl *ctrl) * the device, but we'd have to make sure that the driver * behaves intelligently if the quirks change. */ - - int i; - for (i = 0; i < ARRAY_SIZE(core_quirks); i++) { if (quirk_matches(id, &core_quirks[i])) ctrl->quirks |= core_quirks[i].quirks; @@ -1927,14 +2060,10 @@ int nvme_init_identify(struct nvme_ctrl *ctrl) } ctrl->oacs = le16_to_cpu(id->oacs); - ctrl->vid = le16_to_cpu(id->vid); ctrl->oncs = le16_to_cpup(&id->oncs); atomic_set(&ctrl->abort_limit, id->acl + 1); ctrl->vwc = id->vwc; ctrl->cntlid = le16_to_cpup(&id->cntlid); - memcpy(ctrl->serial, id->sn, sizeof(id->sn)); - memcpy(ctrl->model, id->mn, sizeof(id->mn)); - memcpy(ctrl->firmware_rev, id->fr, sizeof(id->fr)); if (id->mdts) max_hw_sectors = 1 << (id->mdts + page_shift - 9); else @@ -2137,9 +2266,9 @@ static ssize_t wwid_show(struct device *dev, struct device_attribute *attr, char *buf) { struct nvme_ns *ns = nvme_get_ns_from_dev(dev); - struct nvme_ctrl *ctrl = ns->ctrl; - int serial_len = sizeof(ctrl->serial); - int model_len = sizeof(ctrl->model); + struct nvme_subsystem *subsys = ns->ctrl->subsys; + int serial_len = sizeof(subsys->serial); + int model_len = sizeof(subsys->model); if (!uuid_is_null(&ns->uuid)) return sprintf(buf, "uuid.%pU\n", &ns->uuid); @@ -2150,15 +2279,16 @@ static ssize_t wwid_show(struct device *dev, struct device_attribute *attr, if (memchr_inv(ns->eui, 0, sizeof(ns->eui))) return sprintf(buf, "eui.%8phN\n", ns->eui); - while (serial_len > 0 && (ctrl->serial[serial_len - 1] == ' ' || - ctrl->serial[serial_len - 1] == '\0')) + while (serial_len > 0 && (subsys->serial[serial_len - 1] == ' ' || + subsys->serial[serial_len - 1] == '\0')) serial_len--; - while (model_len > 0 && (ctrl->model[model_len - 1] == ' ' || - ctrl->model[model_len - 1] == '\0')) + while (model_len > 0 && (subsys->model[model_len - 1] == ' ' || + subsys->model[model_len - 1] == '\0')) model_len--; - return sprintf(buf, "nvme.%04x-%*phN-%*phN-%08x\n", ctrl->vid, - serial_len, ctrl->serial, model_len, ctrl->model, ns->ns_id); + return sprintf(buf, "nvme.%04x-%*phN-%*phN-%08x\n", subsys->vendor_id, + serial_len, subsys->serial, model_len, subsys->model, + ns->ns_id); } static DEVICE_ATTR(wwid, S_IRUGO, wwid_show, NULL); @@ -2244,10 +2374,15 @@ static ssize_t field##_show(struct device *dev, \ struct device_attribute *attr, char *buf) \ { \ struct nvme_ctrl *ctrl = dev_get_drvdata(dev); \ - return sprintf(buf, "%.*s\n", (int)sizeof(ctrl->field), ctrl->field); \ + return sprintf(buf, "%.*s\n", \ + (int)sizeof(ctrl->subsys->field), ctrl->subsys->field); \ } \ static DEVICE_ATTR(field, S_IRUGO, field##_show, NULL); +nvme_show_str_function(model); +nvme_show_str_function(serial); +nvme_show_str_function(firmware_rev); + #define nvme_show_int_function(field) \ static ssize_t field##_show(struct device *dev, \ struct device_attribute *attr, char *buf) \ @@ -2257,9 +2392,6 @@ static ssize_t field##_show(struct device *dev, \ } \ static DEVICE_ATTR(field, S_IRUGO, field##_show, NULL); -nvme_show_str_function(model); -nvme_show_str_function(serial); -nvme_show_str_function(firmware_rev); nvme_show_int_function(cntlid); static ssize_t nvme_sysfs_delete(struct device *dev, @@ -2313,7 +2445,7 @@ static ssize_t nvme_sysfs_show_subsysnqn(struct device *dev, { struct nvme_ctrl *ctrl = dev_get_drvdata(dev); - return snprintf(buf, PAGE_SIZE, "%s\n", ctrl->subnqn); + return snprintf(buf, PAGE_SIZE, "%s\n", ctrl->subsys->subnqn); } static DEVICE_ATTR(subsysnqn, S_IRUGO, nvme_sysfs_show_subsysnqn, NULL); @@ -2817,12 +2949,23 @@ static void nvme_free_ctrl(struct device *dev) { struct nvme_ctrl *ctrl = container_of(dev, struct nvme_ctrl, ctrl_device); + struct nvme_subsystem *subsys = ctrl->subsys; ida_simple_remove(&nvme_instance_ida, ctrl->instance); ida_destroy(&ctrl->ns_ida); kfree(ctrl->effects); + if (subsys) { + mutex_lock(&subsys->lock); + list_del(&ctrl->subsys_entry); + mutex_unlock(&subsys->lock); + sysfs_remove_link(&subsys->dev.kobj, dev_name(ctrl->device)); + } + ctrl->ops->free_ctrl(ctrl); + + if (subsys) + nvme_put_subsystem(subsys); } /* @@ -3022,8 +3165,15 @@ int __init nvme_core_init(void) goto unregister_chrdev; } + nvme_subsys_class = class_create(THIS_MODULE, "nvme-subsystem"); + if (IS_ERR(nvme_subsys_class)) { + result = PTR_ERR(nvme_subsys_class); + goto destroy_class; + } return 0; +destroy_class: + class_destroy(nvme_class); unregister_chrdev: unregister_chrdev_region(nvme_chr_devt, NVME_MINORS); destroy_wq: @@ -3033,6 +3183,8 @@ destroy_wq: void nvme_core_exit(void) { + ida_destroy(&nvme_subsystems_ida); + class_destroy(nvme_subsys_class); class_destroy(nvme_class); unregister_chrdev_region(nvme_chr_devt, NVME_MINORS); destroy_workqueue(nvme_wq); diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c index a4967de5bb25..76b4fe6816a0 100644 --- a/drivers/nvme/host/fabrics.c +++ b/drivers/nvme/host/fabrics.c @@ -882,10 +882,10 @@ nvmf_create_ctrl(struct device *dev, const char *buf, size_t count) goto out_unlock; } - if (strcmp(ctrl->subnqn, opts->subsysnqn)) { + if (strcmp(ctrl->subsys->subnqn, opts->subsysnqn)) { dev_warn(ctrl->device, "controller returned incorrect NQN: \"%s\".\n", - ctrl->subnqn); + ctrl->subsys->subnqn); up_read(&nvmf_transports_rwsem); nvme_delete_ctrl_sync(ctrl); return ERR_PTR(-EINVAL); diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index 23b8504ace7f..6ae50979c3aa 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -140,13 +140,12 @@ struct nvme_ctrl { struct work_struct reset_work; struct work_struct delete_work; + struct nvme_subsystem *subsys; + struct list_head subsys_entry; + struct opal_dev *opal_dev; char name[12]; - char serial[20]; - char model[40]; - char firmware_rev[8]; - char subnqn[NVMF_NQN_SIZE]; u16 cntlid; u32 ctrl_config; @@ -157,7 +156,6 @@ struct nvme_ctrl { u32 page_size; u32 max_hw_sectors; u16 oncs; - u16 vid; u16 oacs; u16 nssa; u16 nr_streams; @@ -200,6 +198,25 @@ struct nvme_ctrl { struct nvmf_ctrl_options *opts; }; +struct nvme_subsystem { + int instance; + struct device dev; + /* + * Because we unregister the device on the last put we need + * a separate refcount. + */ + struct kref ref; + struct list_head entry; + struct mutex lock; + struct list_head ctrls; + char subnqn[NVMF_NQN_SIZE]; + char serial[20]; + char model[40]; + char firmware_rev[8]; + u8 cmic; + u16 vendor_id; +}; + struct nvme_ns { struct list_head list; -- cgit v1.2.3 From 002fab040468f5b279f8d8d49d487e73b0c3f98a Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 9 Nov 2017 13:50:16 +0100 Subject: nvme: introduce a nvme_ns_ids structure This allows us to manage the various uniqueue namespace identifiers together instead needing various variables and arguments. Signed-off-by: Christoph Hellwig Reviewed-by: Keith Busch Reviewed-by: Sagi Grimberg Reviewed-by: Martin K. Petersen Reviewed-by: Hannes Reinecke Signed-off-by: Jens Axboe --- drivers/nvme/host/core.c | 69 +++++++++++++++++++++++++++--------------------- drivers/nvme/host/nvme.h | 14 +++++++--- 2 files changed, 49 insertions(+), 34 deletions(-) (limited to 'drivers/nvme/host/core.c') diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 78dc6f624d52..04c0949c754a 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -797,7 +797,7 @@ static int nvme_identify_ctrl(struct nvme_ctrl *dev, struct nvme_id_ctrl **id) } static int nvme_identify_ns_descs(struct nvme_ctrl *ctrl, unsigned nsid, - u8 *eui64, u8 *nguid, uuid_t *uuid) + struct nvme_ns_ids *ids) { struct nvme_command c = { }; int status; @@ -833,7 +833,7 @@ static int nvme_identify_ns_descs(struct nvme_ctrl *ctrl, unsigned nsid, goto free_data; } len = NVME_NIDT_EUI64_LEN; - memcpy(eui64, data + pos + sizeof(*cur), len); + memcpy(ids->eui64, data + pos + sizeof(*cur), len); break; case NVME_NIDT_NGUID: if (cur->nidl != NVME_NIDT_NGUID_LEN) { @@ -843,7 +843,7 @@ static int nvme_identify_ns_descs(struct nvme_ctrl *ctrl, unsigned nsid, goto free_data; } len = NVME_NIDT_NGUID_LEN; - memcpy(nguid, data + pos + sizeof(*cur), len); + memcpy(ids->nguid, data + pos + sizeof(*cur), len); break; case NVME_NIDT_UUID: if (cur->nidl != NVME_NIDT_UUID_LEN) { @@ -853,7 +853,7 @@ static int nvme_identify_ns_descs(struct nvme_ctrl *ctrl, unsigned nsid, goto free_data; } len = NVME_NIDT_UUID_LEN; - uuid_copy(uuid, data + pos + sizeof(*cur)); + uuid_copy(&ids->uuid, data + pos + sizeof(*cur)); break; default: /* Skip unnkown types */ @@ -1233,22 +1233,31 @@ static void nvme_config_discard(struct nvme_ctrl *ctrl, } static void nvme_report_ns_ids(struct nvme_ctrl *ctrl, unsigned int nsid, - struct nvme_id_ns *id, u8 *eui64, u8 *nguid, uuid_t *uuid) + struct nvme_id_ns *id, struct nvme_ns_ids *ids) { + memset(ids, 0, sizeof(*ids)); + if (ctrl->vs >= NVME_VS(1, 1, 0)) - memcpy(eui64, id->eui64, sizeof(id->eui64)); + memcpy(ids->eui64, id->eui64, sizeof(id->eui64)); if (ctrl->vs >= NVME_VS(1, 2, 0)) - memcpy(nguid, id->nguid, sizeof(id->nguid)); + memcpy(ids->nguid, id->nguid, sizeof(id->nguid)); if (ctrl->vs >= NVME_VS(1, 3, 0)) { /* Don't treat error as fatal we potentially * already have a NGUID or EUI-64 */ - if (nvme_identify_ns_descs(ctrl, nsid, eui64, nguid, uuid)) + if (nvme_identify_ns_descs(ctrl, nsid, ids)) dev_warn(ctrl->device, "%s: Identify Descriptors failed\n", __func__); } } +static bool nvme_ns_ids_equal(struct nvme_ns_ids *a, struct nvme_ns_ids *b) +{ + return uuid_equal(&a->uuid, &b->uuid) && + memcmp(&a->nguid, &b->nguid, sizeof(a->nguid)) == 0 && + memcmp(&a->eui64, &b->eui64, sizeof(a->eui64)) == 0; +} + static void nvme_update_disk_info(struct gendisk *disk, struct nvme_ns *ns, struct nvme_id_ns *id) { @@ -1304,8 +1313,7 @@ static int nvme_revalidate_disk(struct gendisk *disk) struct nvme_ns *ns = disk->private_data; struct nvme_ctrl *ctrl = ns->ctrl; struct nvme_id_ns *id; - u8 eui64[8] = { 0 }, nguid[16] = { 0 }; - uuid_t uuid = uuid_null; + struct nvme_ns_ids ids; int ret = 0; if (test_bit(NVME_NS_DEAD, &ns->flags)) { @@ -1322,10 +1330,8 @@ static int nvme_revalidate_disk(struct gendisk *disk) goto out; } - nvme_report_ns_ids(ctrl, ns->ns_id, id, eui64, nguid, &uuid); - if (!uuid_equal(&ns->uuid, &uuid) || - memcmp(&ns->nguid, &nguid, sizeof(ns->nguid)) || - memcmp(&ns->eui, &eui64, sizeof(ns->eui))) { + nvme_report_ns_ids(ctrl, ns->ns_id, id, &ids); + if (!nvme_ns_ids_equal(&ns->ids, &ids)) { dev_err(ctrl->device, "identifiers changed for nsid %d\n", ns->ns_id); ret = -ENODEV; @@ -2266,18 +2272,19 @@ static ssize_t wwid_show(struct device *dev, struct device_attribute *attr, char *buf) { struct nvme_ns *ns = nvme_get_ns_from_dev(dev); + struct nvme_ns_ids *ids = &ns->ids; struct nvme_subsystem *subsys = ns->ctrl->subsys; int serial_len = sizeof(subsys->serial); int model_len = sizeof(subsys->model); - if (!uuid_is_null(&ns->uuid)) - return sprintf(buf, "uuid.%pU\n", &ns->uuid); + if (!uuid_is_null(&ids->uuid)) + return sprintf(buf, "uuid.%pU\n", &ids->uuid); - if (memchr_inv(ns->nguid, 0, sizeof(ns->nguid))) - return sprintf(buf, "eui.%16phN\n", ns->nguid); + if (memchr_inv(ids->nguid, 0, sizeof(ids->nguid))) + return sprintf(buf, "eui.%16phN\n", ids->nguid); - if (memchr_inv(ns->eui, 0, sizeof(ns->eui))) - return sprintf(buf, "eui.%8phN\n", ns->eui); + if (memchr_inv(ids->eui64, 0, sizeof(ids->eui64))) + return sprintf(buf, "eui.%8phN\n", ids->eui64); while (serial_len > 0 && (subsys->serial[serial_len - 1] == ' ' || subsys->serial[serial_len - 1] == '\0')) @@ -2296,7 +2303,7 @@ static ssize_t nguid_show(struct device *dev, struct device_attribute *attr, char *buf) { struct nvme_ns *ns = nvme_get_ns_from_dev(dev); - return sprintf(buf, "%pU\n", ns->nguid); + return sprintf(buf, "%pU\n", ns->ids.nguid); } static DEVICE_ATTR(nguid, S_IRUGO, nguid_show, NULL); @@ -2304,16 +2311,17 @@ static ssize_t uuid_show(struct device *dev, struct device_attribute *attr, char *buf) { struct nvme_ns *ns = nvme_get_ns_from_dev(dev); + struct nvme_ns_ids *ids = &ns->ids; /* For backward compatibility expose the NGUID to userspace if * we have no UUID set */ - if (uuid_is_null(&ns->uuid)) { + if (uuid_is_null(&ids->uuid)) { printk_ratelimited(KERN_WARNING "No UUID available providing old NGUID\n"); - return sprintf(buf, "%pU\n", ns->nguid); + return sprintf(buf, "%pU\n", ids->nguid); } - return sprintf(buf, "%pU\n", &ns->uuid); + return sprintf(buf, "%pU\n", &ids->uuid); } static DEVICE_ATTR(uuid, S_IRUGO, uuid_show, NULL); @@ -2321,7 +2329,7 @@ static ssize_t eui_show(struct device *dev, struct device_attribute *attr, char *buf) { struct nvme_ns *ns = nvme_get_ns_from_dev(dev); - return sprintf(buf, "%8ph\n", ns->eui); + return sprintf(buf, "%8ph\n", ns->ids.eui64); } static DEVICE_ATTR(eui, S_IRUGO, eui_show, NULL); @@ -2347,18 +2355,19 @@ static umode_t nvme_ns_attrs_are_visible(struct kobject *kobj, { struct device *dev = container_of(kobj, struct device, kobj); struct nvme_ns *ns = nvme_get_ns_from_dev(dev); + struct nvme_ns_ids *ids = &ns->ids; if (a == &dev_attr_uuid.attr) { - if (uuid_is_null(&ns->uuid) || - !memchr_inv(ns->nguid, 0, sizeof(ns->nguid))) + if (uuid_is_null(&ids->uuid) || + !memchr_inv(ids->nguid, 0, sizeof(ids->nguid))) return 0; } if (a == &dev_attr_nguid.attr) { - if (!memchr_inv(ns->nguid, 0, sizeof(ns->nguid))) + if (!memchr_inv(ids->nguid, 0, sizeof(ids->nguid))) return 0; } if (a == &dev_attr_eui.attr) { - if (!memchr_inv(ns->eui, 0, sizeof(ns->eui))) + if (!memchr_inv(ids->eui64, 0, sizeof(ids->eui64))) return 0; } return a->mode; @@ -2591,7 +2600,7 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid) if (id->ncap == 0) goto out_free_id; - nvme_report_ns_ids(ctrl, ns->ns_id, id, ns->eui, ns->nguid, &ns->uuid); + nvme_report_ns_ids(ctrl, ns->ns_id, id, &ns->ids); if ((ctrl->quirks & NVME_QUIRK_LIGHTNVM) && id->vs[0] == 0x1) { if (nvme_nvm_register(ns, disk_name, node)) { diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index 6ae50979c3aa..2f1af915f93a 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -217,6 +217,15 @@ struct nvme_subsystem { u16 vendor_id; }; +/* + * Container structure for uniqueue namespace identifiers. + */ +struct nvme_ns_ids { + u8 eui64[8]; + u8 nguid[16]; + uuid_t uuid; +}; + struct nvme_ns { struct list_head list; @@ -227,11 +236,8 @@ struct nvme_ns { struct kref kref; int instance; - u8 eui[8]; - u8 nguid[16]; - uuid_t uuid; - unsigned ns_id; + struct nvme_ns_ids ids; int lba_shift; u16 ms; u16 sgs; -- cgit v1.2.3 From ed754e5deeb17f4e675c84e4b6c640cc7344e498 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 9 Nov 2017 13:50:43 +0100 Subject: nvme: track shared namespaces MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Introduce a new struct nvme_ns_head that holds information about an actual namespace, unlike struct nvme_ns, which only holds the per-controller namespace information. For private namespaces there is a 1:1 relation of the two, but for shared namespaces this lets us discover all the paths to it. For now only the identifiers are moved to the new structure, but most of the information in struct nvme_ns should eventually move over. To allow lockless path lookup the list of nvme_ns structures per nvme_ns_head is protected by SRCU, which requires freeing the nvme_ns structure through call_srcu. Signed-off-by: Christoph Hellwig Reviewed-by: Keith Busch Reviewed-by: Javier González Reviewed-by: Sagi Grimberg Reviewed-by: Johannes Thumshirn Reviewed-by: Martin K. Petersen Reviewed-by: Hannes Reinecke Signed-off-by: Jens Axboe --- drivers/nvme/host/core.c | 220 +++++++++++++++++++++++++++++++++++-------- drivers/nvme/host/lightnvm.c | 14 +-- drivers/nvme/host/nvme.h | 26 ++++- 3 files changed, 210 insertions(+), 50 deletions(-) (limited to 'drivers/nvme/host/core.c') diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 04c0949c754a..13676f6cd4f6 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -291,6 +291,22 @@ bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl, } EXPORT_SYMBOL_GPL(nvme_change_ctrl_state); +static void nvme_free_ns_head(struct kref *ref) +{ + struct nvme_ns_head *head = + container_of(ref, struct nvme_ns_head, ref); + + ida_simple_remove(&head->subsys->ns_ida, head->instance); + list_del_init(&head->entry); + cleanup_srcu_struct(&head->srcu); + kfree(head); +} + +static void nvme_put_ns_head(struct nvme_ns_head *head) +{ + kref_put(&head->ref, nvme_free_ns_head); +} + static void nvme_free_ns(struct kref *kref) { struct nvme_ns *ns = container_of(kref, struct nvme_ns, kref); @@ -299,7 +315,7 @@ static void nvme_free_ns(struct kref *kref) nvme_nvm_unregister(ns); put_disk(ns->disk); - ida_simple_remove(&ns->ctrl->ns_ida, ns->instance); + nvme_put_ns_head(ns->head); nvme_put_ctrl(ns->ctrl); kfree(ns); } @@ -435,7 +451,7 @@ static inline void nvme_setup_flush(struct nvme_ns *ns, { memset(cmnd, 0, sizeof(*cmnd)); cmnd->common.opcode = nvme_cmd_flush; - cmnd->common.nsid = cpu_to_le32(ns->ns_id); + cmnd->common.nsid = cpu_to_le32(ns->head->ns_id); } static blk_status_t nvme_setup_discard(struct nvme_ns *ns, struct request *req, @@ -466,7 +482,7 @@ static blk_status_t nvme_setup_discard(struct nvme_ns *ns, struct request *req, memset(cmnd, 0, sizeof(*cmnd)); cmnd->dsm.opcode = nvme_cmd_dsm; - cmnd->dsm.nsid = cpu_to_le32(ns->ns_id); + cmnd->dsm.nsid = cpu_to_le32(ns->head->ns_id); cmnd->dsm.nr = cpu_to_le32(segments - 1); cmnd->dsm.attributes = cpu_to_le32(NVME_DSMGMT_AD); @@ -495,7 +511,7 @@ static inline blk_status_t nvme_setup_rw(struct nvme_ns *ns, memset(cmnd, 0, sizeof(*cmnd)); cmnd->rw.opcode = (rq_data_dir(req) ? nvme_cmd_write : nvme_cmd_read); - cmnd->rw.nsid = cpu_to_le32(ns->ns_id); + cmnd->rw.nsid = cpu_to_le32(ns->head->ns_id); cmnd->rw.slba = cpu_to_le64(nvme_block_nr(ns, blk_rq_pos(req))); cmnd->rw.length = cpu_to_le16((blk_rq_bytes(req) >> ns->lba_shift) - 1); @@ -987,7 +1003,7 @@ static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio) memset(&c, 0, sizeof(c)); c.rw.opcode = io.opcode; c.rw.flags = io.flags; - c.rw.nsid = cpu_to_le32(ns->ns_id); + c.rw.nsid = cpu_to_le32(ns->head->ns_id); c.rw.slba = cpu_to_le64(io.slba); c.rw.length = cpu_to_le16(io.nblocks); c.rw.control = cpu_to_le16(io.control); @@ -1130,7 +1146,7 @@ static int nvme_ioctl(struct block_device *bdev, fmode_t mode, switch (cmd) { case NVME_IOCTL_ID: force_successful_syscall_return(); - return ns->ns_id; + return ns->head->ns_id; case NVME_IOCTL_ADMIN_CMD: return nvme_user_cmd(ns->ctrl, NULL, (void __user *)arg); case NVME_IOCTL_IO_CMD: @@ -1251,6 +1267,13 @@ static void nvme_report_ns_ids(struct nvme_ctrl *ctrl, unsigned int nsid, } } +static bool nvme_ns_ids_valid(struct nvme_ns_ids *ids) +{ + return !uuid_is_null(&ids->uuid) || + memchr_inv(ids->nguid, 0, sizeof(ids->nguid)) || + memchr_inv(ids->eui64, 0, sizeof(ids->eui64)); +} + static bool nvme_ns_ids_equal(struct nvme_ns_ids *a, struct nvme_ns_ids *b) { return uuid_equal(&a->uuid, &b->uuid) && @@ -1321,7 +1344,7 @@ static int nvme_revalidate_disk(struct gendisk *disk) return -ENODEV; } - id = nvme_identify_ns(ctrl, ns->ns_id); + id = nvme_identify_ns(ctrl, ns->head->ns_id); if (!id) return -ENODEV; @@ -1330,10 +1353,10 @@ static int nvme_revalidate_disk(struct gendisk *disk) goto out; } - nvme_report_ns_ids(ctrl, ns->ns_id, id, &ids); - if (!nvme_ns_ids_equal(&ns->ids, &ids)) { + nvme_report_ns_ids(ctrl, ns->head->ns_id, id, &ids); + if (!nvme_ns_ids_equal(&ns->head->ids, &ids)) { dev_err(ctrl->device, - "identifiers changed for nsid %d\n", ns->ns_id); + "identifiers changed for nsid %d\n", ns->head->ns_id); ret = -ENODEV; } @@ -1374,7 +1397,7 @@ static int nvme_pr_command(struct block_device *bdev, u32 cdw10, memset(&c, 0, sizeof(c)); c.common.opcode = op; - c.common.nsid = cpu_to_le32(ns->ns_id); + c.common.nsid = cpu_to_le32(ns->head->ns_id); c.common.cdw10[0] = cpu_to_le32(cdw10); return nvme_submit_sync_cmd(ns->queue, &c, data, 16); @@ -1861,6 +1884,7 @@ static void nvme_destroy_subsystem(struct kref *ref) list_del(&subsys->entry); mutex_unlock(&nvme_subsystems_lock); + ida_destroy(&subsys->ns_ida); device_del(&subsys->dev); put_device(&subsys->dev); } @@ -1904,6 +1928,7 @@ static int nvme_init_subsystem(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id) mutex_init(&subsys->lock); kref_init(&subsys->ref); INIT_LIST_HEAD(&subsys->ctrls); + INIT_LIST_HEAD(&subsys->nsheads); nvme_init_subnqn(subsys, ctrl, id); memcpy(subsys->serial, id->sn, sizeof(subsys->serial)); memcpy(subsys->model, id->mn, sizeof(subsys->model)); @@ -1941,6 +1966,7 @@ static int nvme_init_subsystem(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id) "failed to register subsystem device.\n"); goto out_unlock; } + ida_init(&subsys->ns_ida); list_add_tail(&subsys->entry, &nvme_subsystems); } @@ -2272,7 +2298,7 @@ static ssize_t wwid_show(struct device *dev, struct device_attribute *attr, char *buf) { struct nvme_ns *ns = nvme_get_ns_from_dev(dev); - struct nvme_ns_ids *ids = &ns->ids; + struct nvme_ns_ids *ids = &ns->head->ids; struct nvme_subsystem *subsys = ns->ctrl->subsys; int serial_len = sizeof(subsys->serial); int model_len = sizeof(subsys->model); @@ -2295,7 +2321,7 @@ static ssize_t wwid_show(struct device *dev, struct device_attribute *attr, return sprintf(buf, "nvme.%04x-%*phN-%*phN-%08x\n", subsys->vendor_id, serial_len, subsys->serial, model_len, subsys->model, - ns->ns_id); + ns->head->ns_id); } static DEVICE_ATTR(wwid, S_IRUGO, wwid_show, NULL); @@ -2303,7 +2329,7 @@ static ssize_t nguid_show(struct device *dev, struct device_attribute *attr, char *buf) { struct nvme_ns *ns = nvme_get_ns_from_dev(dev); - return sprintf(buf, "%pU\n", ns->ids.nguid); + return sprintf(buf, "%pU\n", ns->head->ids.nguid); } static DEVICE_ATTR(nguid, S_IRUGO, nguid_show, NULL); @@ -2311,7 +2337,7 @@ static ssize_t uuid_show(struct device *dev, struct device_attribute *attr, char *buf) { struct nvme_ns *ns = nvme_get_ns_from_dev(dev); - struct nvme_ns_ids *ids = &ns->ids; + struct nvme_ns_ids *ids = &ns->head->ids; /* For backward compatibility expose the NGUID to userspace if * we have no UUID set @@ -2329,7 +2355,7 @@ static ssize_t eui_show(struct device *dev, struct device_attribute *attr, char *buf) { struct nvme_ns *ns = nvme_get_ns_from_dev(dev); - return sprintf(buf, "%8ph\n", ns->ids.eui64); + return sprintf(buf, "%8ph\n", ns->head->ids.eui64); } static DEVICE_ATTR(eui, S_IRUGO, eui_show, NULL); @@ -2337,7 +2363,7 @@ static ssize_t nsid_show(struct device *dev, struct device_attribute *attr, char *buf) { struct nvme_ns *ns = nvme_get_ns_from_dev(dev); - return sprintf(buf, "%d\n", ns->ns_id); + return sprintf(buf, "%d\n", ns->head->ns_id); } static DEVICE_ATTR(nsid, S_IRUGO, nsid_show, NULL); @@ -2355,7 +2381,7 @@ static umode_t nvme_ns_attrs_are_visible(struct kobject *kobj, { struct device *dev = container_of(kobj, struct device, kobj); struct nvme_ns *ns = nvme_get_ns_from_dev(dev); - struct nvme_ns_ids *ids = &ns->ids; + struct nvme_ns_ids *ids = &ns->head->ids; if (a == &dev_attr_uuid.attr) { if (uuid_is_null(&ids->uuid) || @@ -2507,12 +2533,124 @@ static const struct attribute_group *nvme_dev_attr_groups[] = { NULL, }; +static struct nvme_ns_head *__nvme_find_ns_head(struct nvme_subsystem *subsys, + unsigned nsid) +{ + struct nvme_ns_head *h; + + lockdep_assert_held(&subsys->lock); + + list_for_each_entry(h, &subsys->nsheads, entry) { + if (h->ns_id == nsid && kref_get_unless_zero(&h->ref)) + return h; + } + + return NULL; +} + +static int __nvme_check_ids(struct nvme_subsystem *subsys, + struct nvme_ns_head *new) +{ + struct nvme_ns_head *h; + + lockdep_assert_held(&subsys->lock); + + list_for_each_entry(h, &subsys->nsheads, entry) { + if (nvme_ns_ids_valid(&new->ids) && + nvme_ns_ids_equal(&new->ids, &h->ids)) + return -EINVAL; + } + + return 0; +} + +static struct nvme_ns_head *nvme_alloc_ns_head(struct nvme_ctrl *ctrl, + unsigned nsid, struct nvme_id_ns *id) +{ + struct nvme_ns_head *head; + int ret = -ENOMEM; + + head = kzalloc(sizeof(*head), GFP_KERNEL); + if (!head) + goto out; + ret = ida_simple_get(&ctrl->subsys->ns_ida, 1, 0, GFP_KERNEL); + if (ret < 0) + goto out_free_head; + head->instance = ret; + INIT_LIST_HEAD(&head->list); + init_srcu_struct(&head->srcu); + head->subsys = ctrl->subsys; + head->ns_id = nsid; + kref_init(&head->ref); + + nvme_report_ns_ids(ctrl, nsid, id, &head->ids); + + ret = __nvme_check_ids(ctrl->subsys, head); + if (ret) { + dev_err(ctrl->device, + "duplicate IDs for nsid %d\n", nsid); + goto out_cleanup_srcu; + } + + list_add_tail(&head->entry, &ctrl->subsys->nsheads); + return head; +out_cleanup_srcu: + cleanup_srcu_struct(&head->srcu); + ida_simple_remove(&ctrl->subsys->ns_ida, head->instance); +out_free_head: + kfree(head); +out: + return ERR_PTR(ret); +} + +static int nvme_init_ns_head(struct nvme_ns *ns, unsigned nsid, + struct nvme_id_ns *id, bool *new) +{ + struct nvme_ctrl *ctrl = ns->ctrl; + bool is_shared = id->nmic & (1 << 0); + struct nvme_ns_head *head = NULL; + int ret = 0; + + mutex_lock(&ctrl->subsys->lock); + if (is_shared) + head = __nvme_find_ns_head(ctrl->subsys, nsid); + if (!head) { + head = nvme_alloc_ns_head(ctrl, nsid, id); + if (IS_ERR(head)) { + ret = PTR_ERR(head); + goto out_unlock; + } + + *new = true; + } else { + struct nvme_ns_ids ids; + + nvme_report_ns_ids(ctrl, nsid, id, &ids); + if (!nvme_ns_ids_equal(&head->ids, &ids)) { + dev_err(ctrl->device, + "IDs don't match for shared namespace %d\n", + nsid); + ret = -EINVAL; + goto out_unlock; + } + + *new = false; + } + + list_add_tail(&ns->siblings, &head->list); + ns->head = head; + +out_unlock: + mutex_unlock(&ctrl->subsys->lock); + return ret; +} + static int ns_cmp(void *priv, struct list_head *a, struct list_head *b) { struct nvme_ns *nsa = container_of(a, struct nvme_ns, list); struct nvme_ns *nsb = container_of(b, struct nvme_ns, list); - return nsa->ns_id - nsb->ns_id; + return nsa->head->ns_id - nsb->head->ns_id; } static struct nvme_ns *nvme_find_get_ns(struct nvme_ctrl *ctrl, unsigned nsid) @@ -2521,13 +2659,13 @@ static struct nvme_ns *nvme_find_get_ns(struct nvme_ctrl *ctrl, unsigned nsid) mutex_lock(&ctrl->namespaces_mutex); list_for_each_entry(ns, &ctrl->namespaces, list) { - if (ns->ns_id == nsid) { + if (ns->head->ns_id == nsid) { if (!kref_get_unless_zero(&ns->kref)) continue; ret = ns; break; } - if (ns->ns_id > nsid) + if (ns->head->ns_id > nsid) break; } mutex_unlock(&ctrl->namespaces_mutex); @@ -2542,7 +2680,7 @@ static int nvme_setup_streams_ns(struct nvme_ctrl *ctrl, struct nvme_ns *ns) if (!ctrl->nr_streams) return 0; - ret = nvme_get_stream_params(ctrl, &s, ns->ns_id); + ret = nvme_get_stream_params(ctrl, &s, ns->head->ns_id); if (ret) return ret; @@ -2567,32 +2705,26 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid) struct nvme_id_ns *id; char disk_name[DISK_NAME_LEN]; int node = dev_to_node(ctrl->dev); + bool new = true; ns = kzalloc_node(sizeof(*ns), GFP_KERNEL, node); if (!ns) return; - ns->instance = ida_simple_get(&ctrl->ns_ida, 1, 0, GFP_KERNEL); - if (ns->instance < 0) - goto out_free_ns; - ns->queue = blk_mq_init_queue(ctrl->tagset); if (IS_ERR(ns->queue)) - goto out_release_instance; + goto out_free_ns; queue_flag_set_unlocked(QUEUE_FLAG_NONROT, ns->queue); ns->queue->queuedata = ns; ns->ctrl = ctrl; kref_init(&ns->kref); - ns->ns_id = nsid; ns->lba_shift = 9; /* set to a default value for 512 until disk is validated */ blk_queue_logical_block_size(ns->queue, 1 << ns->lba_shift); nvme_set_queue_limits(ctrl, ns->queue); nvme_setup_streams_ns(ctrl, ns); - sprintf(disk_name, "nvme%dn%d", ctrl->instance, ns->instance); - id = nvme_identify_ns(ctrl, nsid); if (!id) goto out_free_queue; @@ -2600,18 +2732,21 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid) if (id->ncap == 0) goto out_free_id; - nvme_report_ns_ids(ctrl, ns->ns_id, id, &ns->ids); + if (nvme_init_ns_head(ns, nsid, id, &new)) + goto out_free_id; + + sprintf(disk_name, "nvme%dn%d", ctrl->instance, ns->head->instance); if ((ctrl->quirks & NVME_QUIRK_LIGHTNVM) && id->vs[0] == 0x1) { if (nvme_nvm_register(ns, disk_name, node)) { dev_warn(ctrl->device, "LightNVM init failure\n"); - goto out_free_id; + goto out_unlink_ns; } } disk = alloc_disk_node(0, node); if (!disk) - goto out_free_id; + goto out_unlink_ns; disk->fops = &nvme_fops; disk->private_data = ns; @@ -2639,18 +2774,22 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid) pr_warn("%s: failed to register lightnvm sysfs group for identification\n", ns->disk->disk_name); return; + out_unlink_ns: + mutex_lock(&ctrl->subsys->lock); + list_del_rcu(&ns->siblings); + mutex_unlock(&ctrl->subsys->lock); out_free_id: kfree(id); out_free_queue: blk_cleanup_queue(ns->queue); - out_release_instance: - ida_simple_remove(&ctrl->ns_ida, ns->instance); out_free_ns: kfree(ns); } static void nvme_ns_remove(struct nvme_ns *ns) { + struct nvme_ns_head *head = ns->head; + if (test_and_set_bit(NVME_NS_REMOVING, &ns->flags)) return; @@ -2665,10 +2804,16 @@ static void nvme_ns_remove(struct nvme_ns *ns) blk_cleanup_queue(ns->queue); } + mutex_lock(&ns->ctrl->subsys->lock); + if (head) + list_del_rcu(&ns->siblings); + mutex_unlock(&ns->ctrl->subsys->lock); + mutex_lock(&ns->ctrl->namespaces_mutex); list_del_init(&ns->list); mutex_unlock(&ns->ctrl->namespaces_mutex); + synchronize_srcu(&head->srcu); nvme_put_ns(ns); } @@ -2691,7 +2836,7 @@ static void nvme_remove_invalid_namespaces(struct nvme_ctrl *ctrl, struct nvme_ns *ns, *next; list_for_each_entry_safe(ns, next, &ctrl->namespaces, list) { - if (ns->ns_id > nsid) + if (ns->head->ns_id > nsid) nvme_ns_remove(ns); } } @@ -2961,7 +3106,6 @@ static void nvme_free_ctrl(struct device *dev) struct nvme_subsystem *subsys = ctrl->subsys; ida_simple_remove(&nvme_instance_ida, ctrl->instance); - ida_destroy(&ctrl->ns_ida); kfree(ctrl->effects); if (subsys) { @@ -3022,8 +3166,6 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev, if (ret) goto out_free_name; - ida_init(&ctrl->ns_ida); - /* * Initialize latency tolerance controls. The sysfs files won't * be visible to userspace unless the device actually supports APST. diff --git a/drivers/nvme/host/lightnvm.c b/drivers/nvme/host/lightnvm.c index 8fc949c5b49b..ba3d7f3349e5 100644 --- a/drivers/nvme/host/lightnvm.c +++ b/drivers/nvme/host/lightnvm.c @@ -305,7 +305,7 @@ static int nvme_nvm_identity(struct nvm_dev *nvmdev, struct nvm_id *nvm_id) int ret; c.identity.opcode = nvme_nvm_admin_identity; - c.identity.nsid = cpu_to_le32(ns->ns_id); + c.identity.nsid = cpu_to_le32(ns->head->ns_id); c.identity.chnl_off = 0; nvme_nvm_id = kmalloc(sizeof(struct nvme_nvm_id), GFP_KERNEL); @@ -344,7 +344,7 @@ static int nvme_nvm_get_l2p_tbl(struct nvm_dev *nvmdev, u64 slba, u32 nlb, int ret = 0; c.l2p.opcode = nvme_nvm_admin_get_l2p_tbl; - c.l2p.nsid = cpu_to_le32(ns->ns_id); + c.l2p.nsid = cpu_to_le32(ns->head->ns_id); entries = kmalloc(len, GFP_KERNEL); if (!entries) return -ENOMEM; @@ -402,7 +402,7 @@ static int nvme_nvm_get_bb_tbl(struct nvm_dev *nvmdev, struct ppa_addr ppa, int ret = 0; c.get_bb.opcode = nvme_nvm_admin_get_bb_tbl; - c.get_bb.nsid = cpu_to_le32(ns->ns_id); + c.get_bb.nsid = cpu_to_le32(ns->head->ns_id); c.get_bb.spba = cpu_to_le64(ppa.ppa); bb_tbl = kzalloc(tblsz, GFP_KERNEL); @@ -452,7 +452,7 @@ static int nvme_nvm_set_bb_tbl(struct nvm_dev *nvmdev, struct ppa_addr *ppas, int ret = 0; c.set_bb.opcode = nvme_nvm_admin_set_bb_tbl; - c.set_bb.nsid = cpu_to_le32(ns->ns_id); + c.set_bb.nsid = cpu_to_le32(ns->head->ns_id); c.set_bb.spba = cpu_to_le64(ppas->ppa); c.set_bb.nlb = cpu_to_le16(nr_ppas - 1); c.set_bb.value = type; @@ -469,7 +469,7 @@ static inline void nvme_nvm_rqtocmd(struct nvm_rq *rqd, struct nvme_ns *ns, struct nvme_nvm_command *c) { c->ph_rw.opcode = rqd->opcode; - c->ph_rw.nsid = cpu_to_le32(ns->ns_id); + c->ph_rw.nsid = cpu_to_le32(ns->head->ns_id); c->ph_rw.spba = cpu_to_le64(rqd->ppa_addr.ppa); c->ph_rw.metadata = cpu_to_le64(rqd->dma_meta_list); c->ph_rw.control = cpu_to_le16(rqd->flags); @@ -731,7 +731,7 @@ static int nvme_nvm_submit_vio(struct nvme_ns *ns, memset(&c, 0, sizeof(c)); c.ph_rw.opcode = vio.opcode; - c.ph_rw.nsid = cpu_to_le32(ns->ns_id); + c.ph_rw.nsid = cpu_to_le32(ns->head->ns_id); c.ph_rw.control = cpu_to_le16(vio.control); c.ph_rw.length = cpu_to_le16(vio.nppas); @@ -768,7 +768,7 @@ static int nvme_nvm_user_vcmd(struct nvme_ns *ns, int admin, memset(&c, 0, sizeof(c)); c.common.opcode = vcmd.opcode; - c.common.nsid = cpu_to_le32(ns->ns_id); + c.common.nsid = cpu_to_le32(ns->head->ns_id); c.common.cdw2[0] = cpu_to_le32(vcmd.cdw2); c.common.cdw2[1] = cpu_to_le32(vcmd.cdw3); /* cdw11-12 */ diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index 2f1af915f93a..6e5004b00975 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -136,7 +136,6 @@ struct nvme_ctrl { struct device ctrl_device; struct device *device; /* char device */ struct cdev cdev; - struct ida ns_ida; struct work_struct reset_work; struct work_struct delete_work; @@ -209,12 +208,14 @@ struct nvme_subsystem { struct list_head entry; struct mutex lock; struct list_head ctrls; + struct list_head nsheads; char subnqn[NVMF_NQN_SIZE]; char serial[20]; char model[40]; char firmware_rev[8]; u8 cmic; u16 vendor_id; + struct ida ns_ida; }; /* @@ -226,18 +227,35 @@ struct nvme_ns_ids { uuid_t uuid; }; +/* + * Anchor structure for namespaces. There is one for each namespace in a + * NVMe subsystem that any of our controllers can see, and the namespace + * structure for each controller is chained of it. For private namespaces + * there is a 1:1 relation to our namespace structures, that is ->list + * only ever has a single entry for private namespaces. + */ +struct nvme_ns_head { + struct list_head list; + struct srcu_struct srcu; + struct nvme_subsystem *subsys; + unsigned ns_id; + struct nvme_ns_ids ids; + struct list_head entry; + struct kref ref; + int instance; +}; + struct nvme_ns { struct list_head list; struct nvme_ctrl *ctrl; struct request_queue *queue; struct gendisk *disk; + struct list_head siblings; struct nvm_dev *ndev; struct kref kref; - int instance; + struct nvme_ns_head *head; - unsigned ns_id; - struct nvme_ns_ids ids; int lba_shift; u16 ms; u16 sgs; -- cgit v1.2.3 From 32acab3181c7053c775ca128c3a5c6ce50197d7f Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 2 Nov 2017 12:59:30 +0100 Subject: nvme: implement multipath access to nvme subsystems This patch adds native multipath support to the nvme driver. For each namespace we create only single block device node, which can be used to access that namespace through any of the controllers that refer to it. The gendisk for each controllers path to the name space still exists inside the kernel, but is hidden from userspace. The character device nodes are still available on a per-controller basis. A new link from the sysfs directory for the subsystem allows to find all controllers for a given subsystem. Currently we will always send I/O to the first available path, this will be changed once the NVMe Asynchronous Namespace Access (ANA) TP is ratified and implemented, at which point we will look at the ANA state for each namespace. Another possibility that was prototyped is to use the path that is closes to the submitting NUMA code, which will be mostly interesting for PCI, but might also be useful for RDMA or FC transports in the future. There is not plan to implement round robin or I/O service time path selectors, as those are not scalable with the performance rates provided by NVMe. The multipath device will go away once all paths to it disappear, any delay to keep it alive needs to be implemented at the controller level. Signed-off-by: Christoph Hellwig Reviewed-by: Keith Busch Reviewed-by: Martin K. Petersen Reviewed-by: Hannes Reinecke Signed-off-by: Jens Axboe --- drivers/nvme/host/Kconfig | 9 ++ drivers/nvme/host/Makefile | 1 + drivers/nvme/host/core.c | 147 +++++++++++++++++++++--- drivers/nvme/host/multipath.c | 255 ++++++++++++++++++++++++++++++++++++++++++ drivers/nvme/host/nvme.h | 57 ++++++++++ 5 files changed, 455 insertions(+), 14 deletions(-) create mode 100644 drivers/nvme/host/multipath.c (limited to 'drivers/nvme/host/core.c') diff --git a/drivers/nvme/host/Kconfig b/drivers/nvme/host/Kconfig index 46d6cb1e03bd..b979cf3bce65 100644 --- a/drivers/nvme/host/Kconfig +++ b/drivers/nvme/host/Kconfig @@ -13,6 +13,15 @@ config BLK_DEV_NVME To compile this driver as a module, choose M here: the module will be called nvme. +config NVME_MULTIPATH + bool "NVMe multipath support" + depends on NVME_CORE + ---help--- + This option enables support for multipath access to NVMe + subsystems. If this option is enabled only a single + /dev/nvmeXnY device will show up for each NVMe namespaces, + even if it is accessible through multiple controllers. + config NVME_FABRICS tristate diff --git a/drivers/nvme/host/Makefile b/drivers/nvme/host/Makefile index cc0aacb4c8b4..b856f2f549cd 100644 --- a/drivers/nvme/host/Makefile +++ b/drivers/nvme/host/Makefile @@ -5,6 +5,7 @@ obj-$(CONFIG_NVME_RDMA) += nvme-rdma.o obj-$(CONFIG_NVME_FC) += nvme-fc.o nvme-core-y := core.o +nvme-core-$(CONFIG_NVME_MULTIPATH) += multipath.o nvme-core-$(CONFIG_NVM) += lightnvm.o nvme-y += pci.o diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 13676f6cd4f6..59f80a613fd8 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -185,17 +185,22 @@ static inline bool nvme_req_needs_retry(struct request *req) return false; if (nvme_req(req)->retries >= nvme_max_retries) return false; - if (blk_queue_dying(req->q)) - return false; return true; } void nvme_complete_rq(struct request *req) { if (unlikely(nvme_req(req)->status && nvme_req_needs_retry(req))) { - nvme_req(req)->retries++; - blk_mq_requeue_request(req, true); - return; + if (nvme_req_needs_failover(req)) { + nvme_failover_req(req); + return; + } + + if (!blk_queue_dying(req->q)) { + nvme_req(req)->retries++; + blk_mq_requeue_request(req, true); + return; + } } blk_mq_end_request(req, nvme_error_status(req)); @@ -286,7 +291,8 @@ bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl, ctrl->state = new_state; spin_unlock_irqrestore(&ctrl->lock, flags); - + if (changed && ctrl->state == NVME_CTRL_LIVE) + nvme_kick_requeue_lists(ctrl); return changed; } EXPORT_SYMBOL_GPL(nvme_change_ctrl_state); @@ -296,6 +302,7 @@ static void nvme_free_ns_head(struct kref *ref) struct nvme_ns_head *head = container_of(ref, struct nvme_ns_head, ref); + nvme_mpath_remove_disk(head); ida_simple_remove(&head->subsys->ns_ida, head->instance); list_del_init(&head->entry); cleanup_srcu_struct(&head->srcu); @@ -1138,11 +1145,33 @@ static int nvme_user_cmd(struct nvme_ctrl *ctrl, struct nvme_ns *ns, return status; } -static int nvme_ioctl(struct block_device *bdev, fmode_t mode, - unsigned int cmd, unsigned long arg) +/* + * Issue ioctl requests on the first available path. Note that unlike normal + * block layer requests we will not retry failed request on another controller. + */ +static struct nvme_ns *nvme_get_ns_from_disk(struct gendisk *disk, + struct nvme_ns_head **head, int *srcu_idx) { - struct nvme_ns *ns = bdev->bd_disk->private_data; +#ifdef CONFIG_NVME_MULTIPATH + if (disk->fops == &nvme_ns_head_ops) { + *head = disk->private_data; + *srcu_idx = srcu_read_lock(&(*head)->srcu); + return nvme_find_path(*head); + } +#endif + *head = NULL; + *srcu_idx = -1; + return disk->private_data; +} + +static void nvme_put_ns_from_disk(struct nvme_ns_head *head, int idx) +{ + if (head) + srcu_read_unlock(&head->srcu, idx); +} +static int nvme_ns_ioctl(struct nvme_ns *ns, unsigned cmd, unsigned long arg) +{ switch (cmd) { case NVME_IOCTL_ID: force_successful_syscall_return(); @@ -1165,10 +1194,31 @@ static int nvme_ioctl(struct block_device *bdev, fmode_t mode, } } +static int nvme_ioctl(struct block_device *bdev, fmode_t mode, + unsigned int cmd, unsigned long arg) +{ + struct nvme_ns_head *head = NULL; + struct nvme_ns *ns; + int srcu_idx, ret; + + ns = nvme_get_ns_from_disk(bdev->bd_disk, &head, &srcu_idx); + if (unlikely(!ns)) + ret = -EWOULDBLOCK; + else + ret = nvme_ns_ioctl(ns, cmd, arg); + nvme_put_ns_from_disk(head, srcu_idx); + return ret; +} + static int nvme_open(struct block_device *bdev, fmode_t mode) { struct nvme_ns *ns = bdev->bd_disk->private_data; +#ifdef CONFIG_NVME_MULTIPATH + /* should never be called due to GENHD_FL_HIDDEN */ + if (WARN_ON_ONCE(ns->head->disk)) + return -ENXIO; +#endif if (!kref_get_unless_zero(&ns->kref)) return -ENXIO; return 0; @@ -1329,6 +1379,10 @@ static void __nvme_revalidate_disk(struct gendisk *disk, struct nvme_id_ns *id) if (ns->noiob) nvme_set_chunk_size(ns); nvme_update_disk_info(disk, ns, id); +#ifdef CONFIG_NVME_MULTIPATH + if (ns->head->disk) + nvme_update_disk_info(ns->head->disk, ns, id); +#endif } static int nvme_revalidate_disk(struct gendisk *disk) @@ -1388,8 +1442,10 @@ static char nvme_pr_type(enum pr_type type) static int nvme_pr_command(struct block_device *bdev, u32 cdw10, u64 key, u64 sa_key, u8 op) { - struct nvme_ns *ns = bdev->bd_disk->private_data; + struct nvme_ns_head *head = NULL; + struct nvme_ns *ns; struct nvme_command c; + int srcu_idx, ret; u8 data[16] = { 0, }; put_unaligned_le64(key, &data[0]); @@ -1397,10 +1453,16 @@ static int nvme_pr_command(struct block_device *bdev, u32 cdw10, memset(&c, 0, sizeof(c)); c.common.opcode = op; - c.common.nsid = cpu_to_le32(ns->head->ns_id); + c.common.nsid = cpu_to_le32(head->ns_id); c.common.cdw10[0] = cpu_to_le32(cdw10); - return nvme_submit_sync_cmd(ns->queue, &c, data, 16); + ns = nvme_get_ns_from_disk(bdev->bd_disk, &head, &srcu_idx); + if (unlikely(!ns)) + ret = -EWOULDBLOCK; + else + ret = nvme_submit_sync_cmd(ns->queue, &c, data, 16); + nvme_put_ns_from_disk(head, srcu_idx); + return ret; } static int nvme_pr_register(struct block_device *bdev, u64 old, @@ -1490,6 +1552,32 @@ static const struct block_device_operations nvme_fops = { .pr_ops = &nvme_pr_ops, }; +#ifdef CONFIG_NVME_MULTIPATH +static int nvme_ns_head_open(struct block_device *bdev, fmode_t mode) +{ + struct nvme_ns_head *head = bdev->bd_disk->private_data; + + if (!kref_get_unless_zero(&head->ref)) + return -ENXIO; + return 0; +} + +static void nvme_ns_head_release(struct gendisk *disk, fmode_t mode) +{ + nvme_put_ns_head(disk->private_data); +} + +const struct block_device_operations nvme_ns_head_ops = { + .owner = THIS_MODULE, + .open = nvme_ns_head_open, + .release = nvme_ns_head_release, + .ioctl = nvme_ioctl, + .compat_ioctl = nvme_ioctl, + .getgeo = nvme_getgeo, + .pr_ops = &nvme_pr_ops, +}; +#endif /* CONFIG_NVME_MULTIPATH */ + static int nvme_wait_ready(struct nvme_ctrl *ctrl, u64 cap, bool enabled) { unsigned long timeout = @@ -2592,6 +2680,10 @@ static struct nvme_ns_head *nvme_alloc_ns_head(struct nvme_ctrl *ctrl, goto out_cleanup_srcu; } + ret = nvme_mpath_alloc_disk(ctrl, head); + if (ret) + goto out_cleanup_srcu; + list_add_tail(&head->entry, &ctrl->subsys->nsheads); return head; out_cleanup_srcu: @@ -2704,7 +2796,7 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid) struct gendisk *disk; struct nvme_id_ns *id; char disk_name[DISK_NAME_LEN]; - int node = dev_to_node(ctrl->dev); + int node = dev_to_node(ctrl->dev), flags = GENHD_FL_EXT_DEVT; bool new = true; ns = kzalloc_node(sizeof(*ns), GFP_KERNEL, node); @@ -2735,7 +2827,30 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid) if (nvme_init_ns_head(ns, nsid, id, &new)) goto out_free_id; +#ifdef CONFIG_NVME_MULTIPATH + /* + * If multipathing is enabled we need to always use the subsystem + * instance number for numbering our devices to avoid conflicts + * between subsystems that have multiple controllers and thus use + * the multipath-aware subsystem node and those that have a single + * controller and use the controller node directly. + */ + if (ns->head->disk) { + sprintf(disk_name, "nvme%dc%dn%d", ctrl->subsys->instance, + ctrl->cntlid, ns->head->instance); + flags = GENHD_FL_HIDDEN; + } else { + sprintf(disk_name, "nvme%dn%d", ctrl->subsys->instance, + ns->head->instance); + } +#else + /* + * But without the multipath code enabled, multiple controller per + * subsystems are visible as devices and thus we cannot use the + * subsystem instance. + */ sprintf(disk_name, "nvme%dn%d", ctrl->instance, ns->head->instance); +#endif if ((ctrl->quirks & NVME_QUIRK_LIGHTNVM) && id->vs[0] == 0x1) { if (nvme_nvm_register(ns, disk_name, node)) { @@ -2751,7 +2866,7 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid) disk->fops = &nvme_fops; disk->private_data = ns; disk->queue = ns->queue; - disk->flags = GENHD_FL_EXT_DEVT; + disk->flags = flags; memcpy(disk->disk_name, disk_name, DISK_NAME_LEN); ns->disk = disk; @@ -2773,6 +2888,9 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid) if (ns->ndev && nvme_nvm_register_sysfs(ns)) pr_warn("%s: failed to register lightnvm sysfs group for identification\n", ns->disk->disk_name); + + if (new) + nvme_mpath_add_disk(ns->head); return; out_unlink_ns: mutex_lock(&ctrl->subsys->lock); @@ -2805,6 +2923,7 @@ static void nvme_ns_remove(struct nvme_ns *ns) } mutex_lock(&ns->ctrl->subsys->lock); + nvme_mpath_clear_current_path(ns); if (head) list_del_rcu(&ns->siblings); mutex_unlock(&ns->ctrl->subsys->lock); diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c new file mode 100644 index 000000000000..062754ebebfd --- /dev/null +++ b/drivers/nvme/host/multipath.c @@ -0,0 +1,255 @@ +/* + * Copyright (c) 2017 Christoph Hellwig. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ + +#include +#include "nvme.h" + +static bool multipath = true; +module_param(multipath, bool, 0644); +MODULE_PARM_DESC(multipath, + "turn on native support for multiple controllers per subsystem"); + +void nvme_failover_req(struct request *req) +{ + struct nvme_ns *ns = req->q->queuedata; + unsigned long flags; + + spin_lock_irqsave(&ns->head->requeue_lock, flags); + blk_steal_bios(&ns->head->requeue_list, req); + spin_unlock_irqrestore(&ns->head->requeue_lock, flags); + blk_mq_end_request(req, 0); + + nvme_reset_ctrl(ns->ctrl); + kblockd_schedule_work(&ns->head->requeue_work); +} + +bool nvme_req_needs_failover(struct request *req) +{ + if (!(req->cmd_flags & REQ_NVME_MPATH)) + return false; + + switch (nvme_req(req)->status & 0x7ff) { + /* + * Generic command status: + */ + case NVME_SC_INVALID_OPCODE: + case NVME_SC_INVALID_FIELD: + case NVME_SC_INVALID_NS: + case NVME_SC_LBA_RANGE: + case NVME_SC_CAP_EXCEEDED: + case NVME_SC_RESERVATION_CONFLICT: + return false; + + /* + * I/O command set specific error. Unfortunately these values are + * reused for fabrics commands, but those should never get here. + */ + case NVME_SC_BAD_ATTRIBUTES: + case NVME_SC_INVALID_PI: + case NVME_SC_READ_ONLY: + case NVME_SC_ONCS_NOT_SUPPORTED: + WARN_ON_ONCE(nvme_req(req)->cmd->common.opcode == + nvme_fabrics_command); + return false; + + /* + * Media and Data Integrity Errors: + */ + case NVME_SC_WRITE_FAULT: + case NVME_SC_READ_ERROR: + case NVME_SC_GUARD_CHECK: + case NVME_SC_APPTAG_CHECK: + case NVME_SC_REFTAG_CHECK: + case NVME_SC_COMPARE_FAILED: + case NVME_SC_ACCESS_DENIED: + case NVME_SC_UNWRITTEN_BLOCK: + return false; + } + + /* Everything else could be a path failure, so should be retried */ + return true; +} + +void nvme_kick_requeue_lists(struct nvme_ctrl *ctrl) +{ + struct nvme_ns *ns; + + mutex_lock(&ctrl->namespaces_mutex); + list_for_each_entry(ns, &ctrl->namespaces, list) { + if (ns->head->disk) + kblockd_schedule_work(&ns->head->requeue_work); + } + mutex_unlock(&ctrl->namespaces_mutex); +} + +static struct nvme_ns *__nvme_find_path(struct nvme_ns_head *head) +{ + struct nvme_ns *ns; + + list_for_each_entry_rcu(ns, &head->list, siblings) { + if (ns->ctrl->state == NVME_CTRL_LIVE) { + rcu_assign_pointer(head->current_path, ns); + return ns; + } + } + + return NULL; +} + +inline struct nvme_ns *nvme_find_path(struct nvme_ns_head *head) +{ + struct nvme_ns *ns = srcu_dereference(head->current_path, &head->srcu); + + if (unlikely(!ns || ns->ctrl->state != NVME_CTRL_LIVE)) + ns = __nvme_find_path(head); + return ns; +} + +static blk_qc_t nvme_ns_head_make_request(struct request_queue *q, + struct bio *bio) +{ + struct nvme_ns_head *head = q->queuedata; + struct device *dev = disk_to_dev(head->disk); + struct nvme_ns *ns; + blk_qc_t ret = BLK_QC_T_NONE; + int srcu_idx; + + srcu_idx = srcu_read_lock(&head->srcu); + ns = nvme_find_path(head); + if (likely(ns)) { + bio->bi_disk = ns->disk; + bio->bi_opf |= REQ_NVME_MPATH; + ret = direct_make_request(bio); + } else if (!list_empty_careful(&head->list)) { + dev_warn_ratelimited(dev, "no path available - requeing I/O\n"); + + spin_lock_irq(&head->requeue_lock); + bio_list_add(&head->requeue_list, bio); + spin_unlock_irq(&head->requeue_lock); + } else { + dev_warn_ratelimited(dev, "no path - failing I/O\n"); + + bio->bi_status = BLK_STS_IOERR; + bio_endio(bio); + } + + srcu_read_unlock(&head->srcu, srcu_idx); + return ret; +} + +static bool nvme_ns_head_poll(struct request_queue *q, blk_qc_t qc) +{ + struct nvme_ns_head *head = q->queuedata; + struct nvme_ns *ns; + bool found = false; + int srcu_idx; + + srcu_idx = srcu_read_lock(&head->srcu); + ns = srcu_dereference(head->current_path, &head->srcu); + if (likely(ns && ns->ctrl->state == NVME_CTRL_LIVE)) + found = ns->queue->poll_fn(q, qc); + srcu_read_unlock(&head->srcu, srcu_idx); + return found; +} + +static void nvme_requeue_work(struct work_struct *work) +{ + struct nvme_ns_head *head = + container_of(work, struct nvme_ns_head, requeue_work); + struct bio *bio, *next; + + spin_lock_irq(&head->requeue_lock); + next = bio_list_get(&head->requeue_list); + spin_unlock_irq(&head->requeue_lock); + + while ((bio = next) != NULL) { + next = bio->bi_next; + bio->bi_next = NULL; + + /* + * Reset disk to the mpath node and resubmit to select a new + * path. + */ + bio->bi_disk = head->disk; + generic_make_request(bio); + } +} + +int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl, struct nvme_ns_head *head) +{ + struct request_queue *q; + bool vwc = false; + + bio_list_init(&head->requeue_list); + spin_lock_init(&head->requeue_lock); + INIT_WORK(&head->requeue_work, nvme_requeue_work); + + /* + * Add a multipath node if the subsystems supports multiple controllers. + * We also do this for private namespaces as the namespace sharing data could + * change after a rescan. + */ + if (!(ctrl->subsys->cmic & (1 << 1)) || !multipath) + return 0; + + q = blk_alloc_queue_node(GFP_KERNEL, NUMA_NO_NODE); + if (!q) + goto out; + q->queuedata = head; + blk_queue_make_request(q, nvme_ns_head_make_request); + q->poll_fn = nvme_ns_head_poll; + queue_flag_set_unlocked(QUEUE_FLAG_NONROT, q); + /* set to a default value for 512 until disk is validated */ + blk_queue_logical_block_size(q, 512); + + /* we need to propagate up the VMC settings */ + if (ctrl->vwc & NVME_CTRL_VWC_PRESENT) + vwc = true; + blk_queue_write_cache(q, vwc, vwc); + + head->disk = alloc_disk(0); + if (!head->disk) + goto out_cleanup_queue; + head->disk->fops = &nvme_ns_head_ops; + head->disk->private_data = head; + head->disk->queue = q; + head->disk->flags = GENHD_FL_EXT_DEVT; + sprintf(head->disk->disk_name, "nvme%dn%d", + ctrl->subsys->instance, head->instance); + return 0; + +out_cleanup_queue: + blk_cleanup_queue(q); +out: + return -ENOMEM; +} + +void nvme_mpath_add_disk(struct nvme_ns_head *head) +{ + if (!head->disk) + return; + device_add_disk(&head->subsys->dev, head->disk); +} + +void nvme_mpath_remove_disk(struct nvme_ns_head *head) +{ + if (!head->disk) + return; + del_gendisk(head->disk); + blk_set_queue_dying(head->disk->queue); + /* make sure all pending bios are cleaned up */ + kblockd_schedule_work(&head->requeue_work); + flush_work(&head->requeue_work); + blk_cleanup_queue(head->disk->queue); + put_disk(head->disk); +} diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index 6e5004b00975..dd6ced664b45 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -95,6 +95,11 @@ struct nvme_request { u16 status; }; +/* + * Mark a bio as coming in through the mpath node. + */ +#define REQ_NVME_MPATH REQ_DRV + enum { NVME_REQ_CANCELLED = (1 << 0), }; @@ -235,6 +240,13 @@ struct nvme_ns_ids { * only ever has a single entry for private namespaces. */ struct nvme_ns_head { +#ifdef CONFIG_NVME_MULTIPATH + struct gendisk *disk; + struct nvme_ns __rcu *current_path; + struct bio_list requeue_list; + spinlock_t requeue_lock; + struct work_struct requeue_work; +#endif struct list_head list; struct srcu_struct srcu; struct nvme_subsystem *subsys; @@ -384,6 +396,51 @@ int nvme_reset_ctrl(struct nvme_ctrl *ctrl); int nvme_delete_ctrl(struct nvme_ctrl *ctrl); int nvme_delete_ctrl_sync(struct nvme_ctrl *ctrl); +extern const struct block_device_operations nvme_ns_head_ops; + +#ifdef CONFIG_NVME_MULTIPATH +void nvme_failover_req(struct request *req); +bool nvme_req_needs_failover(struct request *req); +void nvme_kick_requeue_lists(struct nvme_ctrl *ctrl); +int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl,struct nvme_ns_head *head); +void nvme_mpath_add_disk(struct nvme_ns_head *head); +void nvme_mpath_remove_disk(struct nvme_ns_head *head); + +static inline void nvme_mpath_clear_current_path(struct nvme_ns *ns) +{ + struct nvme_ns_head *head = ns->head; + + if (head && ns == srcu_dereference(head->current_path, &head->srcu)) + rcu_assign_pointer(head->current_path, NULL); +} +struct nvme_ns *nvme_find_path(struct nvme_ns_head *head); +#else +static inline void nvme_failover_req(struct request *req) +{ +} +static inline bool nvme_req_needs_failover(struct request *req) +{ + return false; +} +static inline void nvme_kick_requeue_lists(struct nvme_ctrl *ctrl) +{ +} +static inline int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl, + struct nvme_ns_head *head) +{ + return 0; +} +static inline void nvme_mpath_add_disk(struct nvme_ns_head *head) +{ +} +static inline void nvme_mpath_remove_disk(struct nvme_ns_head *head) +{ +} +static inline void nvme_mpath_clear_current_path(struct nvme_ns *ns) +{ +} +#endif /* CONFIG_NVME_MULTIPATH */ + #ifdef CONFIG_NVM int nvme_nvm_register(struct nvme_ns *ns, char *disk_name, int node); void nvme_nvm_unregister(struct nvme_ns *ns); -- cgit v1.2.3 From 5b85b826b8925b3f1910b6613eb82c4df240a5b5 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 9 Nov 2017 13:51:03 +0100 Subject: nvme: also expose the namespace identification sysfs files for mpath nodes We do this by adding a helper that returns the ns_head for a device that can belong to either the per-controller or per-subsystem block device nodes, and otherwise reuse all the existing code. Signed-off-by: Christoph Hellwig Reviewed-by: Keith Busch Reviewed-by: Sagi Grimberg Reviewed-by: Martin K. Petersen Reviewed-by: Hannes Reinecke Reviewed-by: Johannes Thumshirn Signed-off-by: Jens Axboe --- drivers/nvme/host/core.c | 57 +++++++++++++++++++++++-------------------- drivers/nvme/host/multipath.c | 6 +++++ drivers/nvme/host/nvme.h | 1 + 3 files changed, 38 insertions(+), 26 deletions(-) (limited to 'drivers/nvme/host/core.c') diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 59f80a613fd8..438d3fa12608 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -2382,12 +2382,22 @@ static ssize_t nvme_sysfs_rescan(struct device *dev, } static DEVICE_ATTR(rescan_controller, S_IWUSR, NULL, nvme_sysfs_rescan); +static inline struct nvme_ns_head *dev_to_ns_head(struct device *dev) +{ + struct gendisk *disk = dev_to_disk(dev); + + if (disk->fops == &nvme_fops) + return nvme_get_ns_from_dev(dev)->head; + else + return disk->private_data; +} + static ssize_t wwid_show(struct device *dev, struct device_attribute *attr, - char *buf) + char *buf) { - struct nvme_ns *ns = nvme_get_ns_from_dev(dev); - struct nvme_ns_ids *ids = &ns->head->ids; - struct nvme_subsystem *subsys = ns->ctrl->subsys; + struct nvme_ns_head *head = dev_to_ns_head(dev); + struct nvme_ns_ids *ids = &head->ids; + struct nvme_subsystem *subsys = head->subsys; int serial_len = sizeof(subsys->serial); int model_len = sizeof(subsys->model); @@ -2409,23 +2419,21 @@ static ssize_t wwid_show(struct device *dev, struct device_attribute *attr, return sprintf(buf, "nvme.%04x-%*phN-%*phN-%08x\n", subsys->vendor_id, serial_len, subsys->serial, model_len, subsys->model, - ns->head->ns_id); + head->ns_id); } static DEVICE_ATTR(wwid, S_IRUGO, wwid_show, NULL); static ssize_t nguid_show(struct device *dev, struct device_attribute *attr, - char *buf) + char *buf) { - struct nvme_ns *ns = nvme_get_ns_from_dev(dev); - return sprintf(buf, "%pU\n", ns->head->ids.nguid); + return sprintf(buf, "%pU\n", dev_to_ns_head(dev)->ids.nguid); } static DEVICE_ATTR(nguid, S_IRUGO, nguid_show, NULL); static ssize_t uuid_show(struct device *dev, struct device_attribute *attr, - char *buf) + char *buf) { - struct nvme_ns *ns = nvme_get_ns_from_dev(dev); - struct nvme_ns_ids *ids = &ns->head->ids; + struct nvme_ns_ids *ids = &dev_to_ns_head(dev)->ids; /* For backward compatibility expose the NGUID to userspace if * we have no UUID set @@ -2440,22 +2448,20 @@ static ssize_t uuid_show(struct device *dev, struct device_attribute *attr, static DEVICE_ATTR(uuid, S_IRUGO, uuid_show, NULL); static ssize_t eui_show(struct device *dev, struct device_attribute *attr, - char *buf) + char *buf) { - struct nvme_ns *ns = nvme_get_ns_from_dev(dev); - return sprintf(buf, "%8ph\n", ns->head->ids.eui64); + return sprintf(buf, "%8ph\n", dev_to_ns_head(dev)->ids.eui64); } static DEVICE_ATTR(eui, S_IRUGO, eui_show, NULL); static ssize_t nsid_show(struct device *dev, struct device_attribute *attr, - char *buf) + char *buf) { - struct nvme_ns *ns = nvme_get_ns_from_dev(dev); - return sprintf(buf, "%d\n", ns->head->ns_id); + return sprintf(buf, "%d\n", dev_to_ns_head(dev)->ns_id); } static DEVICE_ATTR(nsid, S_IRUGO, nsid_show, NULL); -static struct attribute *nvme_ns_attrs[] = { +static struct attribute *nvme_ns_id_attrs[] = { &dev_attr_wwid.attr, &dev_attr_uuid.attr, &dev_attr_nguid.attr, @@ -2464,12 +2470,11 @@ static struct attribute *nvme_ns_attrs[] = { NULL, }; -static umode_t nvme_ns_attrs_are_visible(struct kobject *kobj, +static umode_t nvme_ns_id_attrs_are_visible(struct kobject *kobj, struct attribute *a, int n) { struct device *dev = container_of(kobj, struct device, kobj); - struct nvme_ns *ns = nvme_get_ns_from_dev(dev); - struct nvme_ns_ids *ids = &ns->head->ids; + struct nvme_ns_ids *ids = &dev_to_ns_head(dev)->ids; if (a == &dev_attr_uuid.attr) { if (uuid_is_null(&ids->uuid) || @@ -2487,9 +2492,9 @@ static umode_t nvme_ns_attrs_are_visible(struct kobject *kobj, return a->mode; } -static const struct attribute_group nvme_ns_attr_group = { - .attrs = nvme_ns_attrs, - .is_visible = nvme_ns_attrs_are_visible, +const struct attribute_group nvme_ns_id_attr_group = { + .attrs = nvme_ns_id_attrs, + .is_visible = nvme_ns_id_attrs_are_visible, }; #define nvme_show_str_function(field) \ @@ -2882,7 +2887,7 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid) device_add_disk(ctrl->device, ns->disk); if (sysfs_create_group(&disk_to_dev(ns->disk)->kobj, - &nvme_ns_attr_group)) + &nvme_ns_id_attr_group)) pr_warn("%s: failed to create sysfs group for identification\n", ns->disk->disk_name); if (ns->ndev && nvme_nvm_register_sysfs(ns)) @@ -2915,7 +2920,7 @@ static void nvme_ns_remove(struct nvme_ns *ns) if (blk_get_integrity(ns->disk)) blk_integrity_unregister(ns->disk); sysfs_remove_group(&disk_to_dev(ns->disk)->kobj, - &nvme_ns_attr_group); + &nvme_ns_id_attr_group); if (ns->ndev) nvme_nvm_unregister_sysfs(ns); del_gendisk(ns->disk); diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c index 062754ebebfd..850275896e49 100644 --- a/drivers/nvme/host/multipath.c +++ b/drivers/nvme/host/multipath.c @@ -239,12 +239,18 @@ void nvme_mpath_add_disk(struct nvme_ns_head *head) if (!head->disk) return; device_add_disk(&head->subsys->dev, head->disk); + if (sysfs_create_group(&disk_to_dev(head->disk)->kobj, + &nvme_ns_id_attr_group)) + pr_warn("%s: failed to create sysfs group for identification\n", + head->disk->disk_name); } void nvme_mpath_remove_disk(struct nvme_ns_head *head) { if (!head->disk) return; + sysfs_remove_group(&disk_to_dev(head->disk)->kobj, + &nvme_ns_id_attr_group); del_gendisk(head->disk); blk_set_queue_dying(head->disk->queue); /* make sure all pending bios are cleaned up */ diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index dd6ced664b45..895a4330dda0 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -396,6 +396,7 @@ int nvme_reset_ctrl(struct nvme_ctrl *ctrl); int nvme_delete_ctrl(struct nvme_ctrl *ctrl); int nvme_delete_ctrl_sync(struct nvme_ctrl *ctrl); +extern const struct attribute_group nvme_ns_id_attr_group; extern const struct block_device_operations nvme_ns_head_ops; #ifdef CONFIG_NVME_MULTIPATH -- cgit v1.2.3 From e9a48034d7d1318ece7d4a235838a86c94db9d68 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Thu, 9 Nov 2017 17:57:06 +0100 Subject: nvme: create 'slaves' and 'holders' entries for hidden controllers When creating nvme multipath devices we should populate the 'slaves' and 'holders' directorys properly to aid userspace topology detection. Signed-off-by: Hannes Reinecke [hch: split from a larger patch, compile fix for CONFIG_NVME_MULTIPATH=n] Reviewed-by: Keith Busch Reviewed-by: Martin K. Petersen Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/nvme/host/core.c | 2 ++ drivers/nvme/host/multipath.c | 30 ++++++++++++++++++++++++++++++ drivers/nvme/host/nvme.h | 8 ++++++++ 3 files changed, 40 insertions(+) (limited to 'drivers/nvme/host/core.c') diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 438d3fa12608..c3182aa654fd 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -2896,6 +2896,7 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid) if (new) nvme_mpath_add_disk(ns->head); + nvme_mpath_add_disk_links(ns); return; out_unlink_ns: mutex_lock(&ctrl->subsys->lock); @@ -2919,6 +2920,7 @@ static void nvme_ns_remove(struct nvme_ns *ns) if (ns->disk && ns->disk->flags & GENHD_FL_UP) { if (blk_get_integrity(ns->disk)) blk_integrity_unregister(ns->disk); + nvme_mpath_remove_disk_links(ns); sysfs_remove_group(&disk_to_dev(ns->disk)->kobj, &nvme_ns_id_attr_group); if (ns->ndev) diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c index 850275896e49..78d92151a904 100644 --- a/drivers/nvme/host/multipath.c +++ b/drivers/nvme/host/multipath.c @@ -245,6 +245,25 @@ void nvme_mpath_add_disk(struct nvme_ns_head *head) head->disk->disk_name); } +void nvme_mpath_add_disk_links(struct nvme_ns *ns) +{ + struct kobject *slave_disk_kobj, *holder_disk_kobj; + + if (!ns->head->disk) + return; + + slave_disk_kobj = &disk_to_dev(ns->disk)->kobj; + if (sysfs_create_link(ns->head->disk->slave_dir, slave_disk_kobj, + kobject_name(slave_disk_kobj))) + return; + + holder_disk_kobj = &disk_to_dev(ns->head->disk)->kobj; + if (sysfs_create_link(ns->disk->part0.holder_dir, holder_disk_kobj, + kobject_name(holder_disk_kobj))) + sysfs_remove_link(ns->head->disk->slave_dir, + kobject_name(slave_disk_kobj)); +} + void nvme_mpath_remove_disk(struct nvme_ns_head *head) { if (!head->disk) @@ -259,3 +278,14 @@ void nvme_mpath_remove_disk(struct nvme_ns_head *head) blk_cleanup_queue(head->disk->queue); put_disk(head->disk); } + +void nvme_mpath_remove_disk_links(struct nvme_ns *ns) +{ + if (!ns->head->disk) + return; + + sysfs_remove_link(ns->disk->part0.holder_dir, + kobject_name(&disk_to_dev(ns->head->disk)->kobj)); + sysfs_remove_link(ns->head->disk->slave_dir, + kobject_name(&disk_to_dev(ns->disk)->kobj)); +} diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index 895a4330dda0..c0873a68872f 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -405,7 +405,9 @@ bool nvme_req_needs_failover(struct request *req); void nvme_kick_requeue_lists(struct nvme_ctrl *ctrl); int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl,struct nvme_ns_head *head); void nvme_mpath_add_disk(struct nvme_ns_head *head); +void nvme_mpath_add_disk_links(struct nvme_ns *ns); void nvme_mpath_remove_disk(struct nvme_ns_head *head); +void nvme_mpath_remove_disk_links(struct nvme_ns *ns); static inline void nvme_mpath_clear_current_path(struct nvme_ns *ns) { @@ -437,6 +439,12 @@ static inline void nvme_mpath_add_disk(struct nvme_ns_head *head) static inline void nvme_mpath_remove_disk(struct nvme_ns_head *head) { } +static inline void nvme_mpath_add_disk_links(struct nvme_ns *ns) +{ +} +static inline void nvme_mpath_remove_disk_links(struct nvme_ns *ns) +{ +} static inline void nvme_mpath_clear_current_path(struct nvme_ns *ns) { } -- cgit v1.2.3 From 1e496938b6aec67b7cfd06614164fee0fe9f13e3 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Fri, 10 Nov 2017 10:58:23 +0100 Subject: nvme: expose subsys attribute to sysfs We should be exposing the subsystem attributes like 'model' and 'subsysnqn' to sysfs to allow for easier identification of the subsystem. Signed-off-by: Hannes Reinecke Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/nvme/host/core.c | 48 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 48 insertions(+) (limited to 'drivers/nvme/host/core.c') diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index c3182aa654fd..7b3bbc1a9ac4 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -1999,6 +1999,53 @@ static struct nvme_subsystem *__nvme_find_get_subsystem(const char *subsysnqn) return NULL; } +#define SUBSYS_ATTR_RO(_name, _mode, _show) \ + struct device_attribute subsys_attr_##_name = \ + __ATTR(_name, _mode, _show, NULL) + +static ssize_t nvme_subsys_show_nqn(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct nvme_subsystem *subsys = + container_of(dev, struct nvme_subsystem, dev); + + return snprintf(buf, PAGE_SIZE, "%s\n", subsys->subnqn); +} +static SUBSYS_ATTR_RO(subsysnqn, S_IRUGO, nvme_subsys_show_nqn); + +#define nvme_subsys_show_str_function(field) \ +static ssize_t subsys_##field##_show(struct device *dev, \ + struct device_attribute *attr, char *buf) \ +{ \ + struct nvme_subsystem *subsys = \ + container_of(dev, struct nvme_subsystem, dev); \ + return sprintf(buf, "%.*s\n", \ + (int)sizeof(subsys->field), subsys->field); \ +} \ +static SUBSYS_ATTR_RO(field, S_IRUGO, subsys_##field##_show); + +nvme_subsys_show_str_function(model); +nvme_subsys_show_str_function(serial); +nvme_subsys_show_str_function(firmware_rev); + +static struct attribute *nvme_subsys_attrs[] = { + &subsys_attr_model.attr, + &subsys_attr_serial.attr, + &subsys_attr_firmware_rev.attr, + &subsys_attr_subsysnqn.attr, + NULL, +}; + +static struct attribute_group nvme_subsys_attrs_group = { + .attrs = nvme_subsys_attrs, +}; + +static const struct attribute_group *nvme_subsys_attrs_groups[] = { + &nvme_subsys_attrs_group, + NULL, +}; + static int nvme_init_subsystem(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id) { struct nvme_subsystem *subsys, *found; @@ -2026,6 +2073,7 @@ static int nvme_init_subsystem(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id) subsys->dev.class = nvme_subsys_class; subsys->dev.release = nvme_release_subsystem; + subsys->dev.groups = nvme_subsys_attrs_groups; dev_set_name(&subsys->dev, "nvme-subsys%d", subsys->instance); device_initialize(&subsys->dev); -- cgit v1.2.3 From a04b5de5050ab8b891128eb2c47a0916fe8622e1 Mon Sep 17 00:00:00 2001 From: Martin Wilck Date: Thu, 28 Sep 2017 21:33:23 +0200 Subject: nvme: fix visibility of "uuid" ns attribute "uuid" must be invisible if both ns->uuid and ns->nguid are unset, not if either one is. Fixes: d934f9848a77 "nvme: provide UUID value to userspace" Signed-off-by: Martin Wilck [hch: rebased to the nvme-4.15 tree to help resolving a conflict] Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/nvme/host/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/nvme/host/core.c') diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 7b3bbc1a9ac4..993813ccdc0b 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -2525,7 +2525,7 @@ static umode_t nvme_ns_id_attrs_are_visible(struct kobject *kobj, struct nvme_ns_ids *ids = &dev_to_ns_head(dev)->ids; if (a == &dev_attr_uuid.attr) { - if (uuid_is_null(&ids->uuid) || + if (uuid_is_null(&ids->uuid) && !memchr_inv(ids->nguid, 0, sizeof(ids->nguid))) return 0; } -- cgit v1.2.3