diff options
Diffstat (limited to 'drivers/nvme/host')
-rw-r--r-- | drivers/nvme/host/core.c | 215 | ||||
-rw-r--r-- | drivers/nvme/host/fabrics.c | 6 | ||||
-rw-r--r-- | drivers/nvme/host/fabrics.h | 8 | ||||
-rw-r--r-- | drivers/nvme/host/fc.c | 52 | ||||
-rw-r--r-- | drivers/nvme/host/multipath.c | 63 | ||||
-rw-r--r-- | drivers/nvme/host/nvme.h | 25 | ||||
-rw-r--r-- | drivers/nvme/host/pci.c | 63 | ||||
-rw-r--r-- | drivers/nvme/host/rdma.c | 44 | ||||
-rw-r--r-- | drivers/nvme/host/tcp.c | 60 | ||||
-rw-r--r-- | drivers/nvme/host/zns.c | 2 |
10 files changed, 373 insertions, 165 deletions
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 7efb31b87f37..838b5e2058be 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -6,6 +6,7 @@ #include <linux/blkdev.h> #include <linux/blk-mq.h> +#include <linux/blk-integrity.h> #include <linux/compat.h> #include <linux/delay.h> #include <linux/errno.h> @@ -13,7 +14,6 @@ #include <linux/kernel.h> #include <linux/module.h> #include <linux/backing-dev.h> -#include <linux/list_sort.h> #include <linux/slab.h> #include <linux/types.h> #include <linux/pr.h> @@ -119,25 +119,6 @@ static void nvme_remove_invalid_namespaces(struct nvme_ctrl *ctrl, static void nvme_update_keep_alive(struct nvme_ctrl *ctrl, struct nvme_command *cmd); -/* - * Prepare a queue for teardown. - * - * This must forcibly unquiesce queues to avoid blocking dispatch, and only set - * the capacity to 0 after that to avoid blocking dispatchers that may be - * holding bd_butex. This will end buffered writers dirtying pages that can't - * be synced. - */ -static void nvme_set_queue_dying(struct nvme_ns *ns) -{ - if (test_and_set_bit(NVME_NS_DEAD, &ns->flags)) - return; - - blk_set_queue_dying(ns->queue); - blk_mq_unquiesce_queue(ns->queue); - - set_capacity_and_notify(ns->disk, 0); -} - void nvme_queue_scan(struct nvme_ctrl *ctrl) { /* @@ -222,7 +203,7 @@ int nvme_reset_ctrl_sync(struct nvme_ctrl *ctrl) static void nvme_do_delete_ctrl(struct nvme_ctrl *ctrl) { dev_info(ctrl->device, - "Removing ctrl: NQN \"%s\"\n", ctrl->opts->subsysnqn); + "Removing ctrl: NQN \"%s\"\n", nvmf_ctrl_subsysnqn(ctrl)); flush_work(&ctrl->reset_work); nvme_stop_ctrl(ctrl); @@ -346,15 +327,19 @@ static inline enum nvme_disposition nvme_decide_disposition(struct request *req) return RETRY; } -static inline void nvme_end_req(struct request *req) +static inline void nvme_end_req_zoned(struct request *req) { - blk_status_t status = nvme_error_status(nvme_req(req)->status); - if (IS_ENABLED(CONFIG_BLK_DEV_ZONED) && req_op(req) == REQ_OP_ZONE_APPEND) req->__sector = nvme_lba_to_sect(req->q->queuedata, le64_to_cpu(nvme_req(req)->result.u64)); +} +static inline void nvme_end_req(struct request *req) +{ + blk_status_t status = nvme_error_status(nvme_req(req)->status); + + nvme_end_req_zoned(req); nvme_trace_bio_complete(req); blk_mq_end_request(req, status); } @@ -381,6 +366,13 @@ void nvme_complete_rq(struct request *req) } EXPORT_SYMBOL_GPL(nvme_complete_rq); +void nvme_complete_batch_req(struct request *req) +{ + nvme_cleanup_cmd(req); + nvme_end_req_zoned(req); +} +EXPORT_SYMBOL_GPL(nvme_complete_batch_req); + /* * Called to unwind from ->queue_rq on a failed command submission so that the * multipathing code gets called to potentially failover to another path. @@ -632,7 +624,7 @@ static inline void nvme_init_request(struct request *req, req->cmd_flags |= REQ_FAILFAST_DRIVER; if (req->mq_hctx->type == HCTX_TYPE_POLL) - req->cmd_flags |= REQ_HIPRI; + req->cmd_flags |= REQ_POLLED; nvme_clear_nvme_request(req); memcpy(nvme_req(req)->cmd, cmd, sizeof(*cmd)); } @@ -823,6 +815,7 @@ static void nvme_assign_write_stream(struct nvme_ctrl *ctrl, static inline void nvme_setup_flush(struct nvme_ns *ns, struct nvme_command *cmnd) { + memset(cmnd, 0, sizeof(*cmnd)); cmnd->common.opcode = nvme_cmd_flush; cmnd->common.nsid = cpu_to_le32(ns->head->ns_id); } @@ -874,6 +867,7 @@ static blk_status_t nvme_setup_discard(struct nvme_ns *ns, struct request *req, return BLK_STS_IOERR; } + memset(cmnd, 0, sizeof(*cmnd)); cmnd->dsm.opcode = nvme_cmd_dsm; cmnd->dsm.nsid = cpu_to_le32(ns->head->ns_id); cmnd->dsm.nr = cpu_to_le32(segments - 1); @@ -890,6 +884,8 @@ static blk_status_t nvme_setup_discard(struct nvme_ns *ns, struct request *req, static inline blk_status_t nvme_setup_write_zeroes(struct nvme_ns *ns, struct request *req, struct nvme_command *cmnd) { + memset(cmnd, 0, sizeof(*cmnd)); + if (ns->ctrl->quirks & NVME_QUIRK_DEALLOCATE_ZEROES) return nvme_setup_discard(ns, req, cmnd); @@ -923,9 +919,15 @@ static inline blk_status_t nvme_setup_rw(struct nvme_ns *ns, dsmgmt |= NVME_RW_DSM_FREQ_PREFETCH; cmnd->rw.opcode = op; + cmnd->rw.flags = 0; cmnd->rw.nsid = cpu_to_le32(ns->head->ns_id); + cmnd->rw.rsvd2 = 0; + cmnd->rw.metadata = 0; cmnd->rw.slba = cpu_to_le64(nvme_sect_to_lba(ns, blk_rq_pos(req))); cmnd->rw.length = cpu_to_le16((blk_rq_bytes(req) >> ns->lba_shift) - 1); + cmnd->rw.reftag = 0; + cmnd->rw.apptag = 0; + cmnd->rw.appmask = 0; if (req_op(req) == REQ_OP_WRITE && ctrl->nr_streams) nvme_assign_write_stream(ctrl, req, &control, &dsmgmt); @@ -979,12 +981,11 @@ EXPORT_SYMBOL_GPL(nvme_cleanup_cmd); blk_status_t nvme_setup_cmd(struct nvme_ns *ns, struct request *req) { struct nvme_command *cmd = nvme_req(req)->cmd; + struct nvme_ctrl *ctrl = nvme_req(req)->ctrl; blk_status_t ret = BLK_STS_OK; - if (!(req->rq_flags & RQF_DONTPREP)) { + if (!(req->rq_flags & RQF_DONTPREP)) nvme_clear_nvme_request(req); - memset(cmd, 0, sizeof(*cmd)); - } switch (req_op(req)) { case REQ_OP_DRV_IN: @@ -1027,7 +1028,8 @@ blk_status_t nvme_setup_cmd(struct nvme_ns *ns, struct request *req) return BLK_STS_IOERR; } - nvme_req(req)->genctr++; + if (!(ctrl->quirks & NVME_QUIRK_SKIP_CID_GEN)) + nvme_req(req)->genctr++; cmd->common.command_id = nvme_cid(req); trace_nvme_setup_cmd(req, cmd); return ret; @@ -2599,6 +2601,24 @@ static ssize_t nvme_subsys_show_nqn(struct device *dev, } static SUBSYS_ATTR_RO(subsysnqn, S_IRUGO, nvme_subsys_show_nqn); +static ssize_t nvme_subsys_show_type(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct nvme_subsystem *subsys = + container_of(dev, struct nvme_subsystem, dev); + + switch (subsys->subtype) { + case NVME_NQN_DISC: + return sysfs_emit(buf, "discovery\n"); + case NVME_NQN_NVME: + return sysfs_emit(buf, "nvm\n"); + default: + return sysfs_emit(buf, "reserved\n"); + } +} +static SUBSYS_ATTR_RO(subsystype, S_IRUGO, nvme_subsys_show_type); + #define nvme_subsys_show_str_function(field) \ static ssize_t subsys_##field##_show(struct device *dev, \ struct device_attribute *attr, char *buf) \ @@ -2619,6 +2639,7 @@ static struct attribute *nvme_subsys_attrs[] = { &subsys_attr_serial.attr, &subsys_attr_firmware_rev.attr, &subsys_attr_subsysnqn.attr, + &subsys_attr_subsystype.attr, #ifdef CONFIG_NVME_MULTIPATH &subsys_attr_iopolicy.attr, #endif @@ -2689,6 +2710,21 @@ static int nvme_init_subsystem(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id) memcpy(subsys->firmware_rev, id->fr, sizeof(subsys->firmware_rev)); subsys->vendor_id = le16_to_cpu(id->vid); subsys->cmic = id->cmic; + + /* Versions prior to 1.4 don't necessarily report a valid type */ + if (id->cntrltype == NVME_CTRL_DISC || + !strcmp(subsys->subnqn, NVME_DISC_SUBSYS_NAME)) + subsys->subtype = NVME_NQN_DISC; + else + subsys->subtype = NVME_NQN_NVME; + + if (nvme_discovery_ctrl(ctrl) && subsys->subtype != NVME_NQN_DISC) { + dev_err(ctrl->device, + "Subsystem %s is not a discovery controller", + subsys->subnqn); + kfree(subsys); + return -EINVAL; + } subsys->awupf = le16_to_cpu(id->awupf); #ifdef CONFIG_NVME_MULTIPATH subsys->iopolicy = NVME_IOPOLICY_NUMA; @@ -3524,7 +3560,9 @@ static struct nvme_ns_head *nvme_find_ns_head(struct nvme_subsystem *subsys, lockdep_assert_held(&subsys->lock); list_for_each_entry(h, &subsys->nsheads, entry) { - if (h->ns_id == nsid && nvme_tryget_ns_head(h)) + if (h->ns_id != nsid) + continue; + if (!list_empty(&h->list) && nvme_tryget_ns_head(h)) return h; } @@ -3547,10 +3585,15 @@ static int __nvme_check_ids(struct nvme_subsystem *subsys, return 0; } +static void nvme_cdev_rel(struct device *dev) +{ + ida_simple_remove(&nvme_ns_chr_minor_ida, MINOR(dev->devt)); +} + void nvme_cdev_del(struct cdev *cdev, struct device *cdev_device) { cdev_device_del(cdev, cdev_device); - ida_simple_remove(&nvme_ns_chr_minor_ida, MINOR(cdev_device->devt)); + put_device(cdev_device); } int nvme_cdev_add(struct cdev *cdev, struct device *cdev_device, @@ -3563,14 +3606,14 @@ int nvme_cdev_add(struct cdev *cdev, struct device *cdev_device, return minor; cdev_device->devt = MKDEV(MAJOR(nvme_ns_chr_devt), minor); cdev_device->class = nvme_ns_chr_class; + cdev_device->release = nvme_cdev_rel; device_initialize(cdev_device); cdev_init(cdev, fops); cdev->owner = owner; ret = cdev_device_add(cdev, cdev_device); - if (ret) { + if (ret) put_device(cdev_device); - ida_simple_remove(&nvme_ns_chr_minor_ida, minor); - } + return ret; } @@ -3602,11 +3645,9 @@ static int nvme_add_ns_cdev(struct nvme_ns *ns) ns->ctrl->instance, ns->head->instance); if (ret) return ret; - ret = nvme_cdev_add(&ns->cdev, &ns->cdev_device, &nvme_ns_chr_fops, - ns->ctrl->ops->module); - if (ret) - kfree_const(ns->cdev_device.kobj.name); - return ret; + + return nvme_cdev_add(&ns->cdev, &ns->cdev_device, &nvme_ns_chr_fops, + ns->ctrl->ops->module); } static struct nvme_ns_head *nvme_alloc_ns_head(struct nvme_ctrl *ctrl, @@ -3714,15 +3755,6 @@ out_unlock: return ret; } -static int ns_cmp(void *priv, const struct list_head *a, - const struct list_head *b) -{ - struct nvme_ns *nsa = container_of(a, struct nvme_ns, list); - struct nvme_ns *nsb = container_of(b, struct nvme_ns, list); - - return nsa->head->ns_id - nsb->head->ns_id; -} - struct nvme_ns *nvme_find_get_ns(struct nvme_ctrl *ctrl, unsigned nsid) { struct nvme_ns *ns, *ret = NULL; @@ -3743,6 +3775,22 @@ struct nvme_ns *nvme_find_get_ns(struct nvme_ctrl *ctrl, unsigned nsid) } EXPORT_SYMBOL_NS_GPL(nvme_find_get_ns, NVME_TARGET_PASSTHRU); +/* + * Add the namespace to the controller list while keeping the list ordered. + */ +static void nvme_ns_add_to_ctrl_list(struct nvme_ns *ns) +{ + struct nvme_ns *tmp; + + list_for_each_entry_reverse(tmp, &ns->ctrl->namespaces, list) { + if (tmp->head->ns_id < ns->head->ns_id) { + list_add(&ns->list, &tmp->list); + return; + } + } + list_add(&ns->list, &ns->ctrl->namespaces); +} + static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid, struct nvme_ns_ids *ids) { @@ -3793,9 +3841,8 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid, goto out_unlink_ns; down_write(&ctrl->namespaces_rwsem); - list_add_tail(&ns->list, &ctrl->namespaces); + nvme_ns_add_to_ctrl_list(ns); up_write(&ctrl->namespaces_rwsem); - nvme_get_ctrl(ctrl); if (device_add_disk(ctrl->device, ns->disk, nvme_ns_id_attr_groups)) @@ -3843,6 +3890,10 @@ static void nvme_ns_remove(struct nvme_ns *ns) mutex_lock(&ns->ctrl->subsys->lock); list_del_rcu(&ns->siblings); + if (list_empty(&ns->head->list)) { + list_del_init(&ns->head->entry); + last_path = true; + } mutex_unlock(&ns->ctrl->subsys->lock); /* guarantee not available in head->list */ @@ -3856,20 +3907,11 @@ static void nvme_ns_remove(struct nvme_ns *ns) nvme_cdev_del(&ns->cdev, &ns->cdev_device); del_gendisk(ns->disk); blk_cleanup_queue(ns->queue); - if (blk_get_integrity(ns->disk)) - blk_integrity_unregister(ns->disk); down_write(&ns->ctrl->namespaces_rwsem); list_del_init(&ns->list); up_write(&ns->ctrl->namespaces_rwsem); - /* Synchronize with nvme_init_ns_head() */ - mutex_lock(&ns->head->subsys->lock); - if (list_empty(&ns->head->list)) { - list_del_init(&ns->head->entry); - last_path = true; - } - mutex_unlock(&ns->head->subsys->lock); if (last_path) nvme_mpath_shutdown_disk(ns->head); nvme_put_ns(ns); @@ -4083,10 +4125,6 @@ static void nvme_scan_work(struct work_struct *work) if (nvme_scan_ns_list(ctrl) != 0) nvme_scan_ns_sequential(ctrl); mutex_unlock(&ctrl->scan_lock); - - down_write(&ctrl->namespaces_rwsem); - list_sort(NULL, &ctrl->namespaces, ns_cmp); - up_write(&ctrl->namespaces_rwsem); } /* @@ -4470,6 +4508,37 @@ out: } EXPORT_SYMBOL_GPL(nvme_init_ctrl); +static void nvme_start_ns_queue(struct nvme_ns *ns) +{ + if (test_and_clear_bit(NVME_NS_STOPPED, &ns->flags)) + blk_mq_unquiesce_queue(ns->queue); +} + +static void nvme_stop_ns_queue(struct nvme_ns *ns) +{ + if (!test_and_set_bit(NVME_NS_STOPPED, &ns->flags)) + blk_mq_quiesce_queue(ns->queue); +} + +/* + * Prepare a queue for teardown. + * + * This must forcibly unquiesce queues to avoid blocking dispatch, and only set + * the capacity to 0 after that to avoid blocking dispatchers that may be + * holding bd_butex. This will end buffered writers dirtying pages that can't + * be synced. + */ +static void nvme_set_queue_dying(struct nvme_ns *ns) +{ + if (test_and_set_bit(NVME_NS_DEAD, &ns->flags)) + return; + + blk_set_queue_dying(ns->queue); + nvme_start_ns_queue(ns); + + set_capacity_and_notify(ns->disk, 0); +} + /** * nvme_kill_queues(): Ends all namespace queues * @ctrl: the dead controller that needs to end @@ -4485,7 +4554,7 @@ void nvme_kill_queues(struct nvme_ctrl *ctrl) /* Forcibly unquiesce queues to avoid blocking dispatch */ if (ctrl->admin_q && !blk_queue_dying(ctrl->admin_q)) - blk_mq_unquiesce_queue(ctrl->admin_q); + nvme_start_admin_queue(ctrl); list_for_each_entry(ns, &ctrl->namespaces, list) nvme_set_queue_dying(ns); @@ -4548,7 +4617,7 @@ void nvme_stop_queues(struct nvme_ctrl *ctrl) down_read(&ctrl->namespaces_rwsem); list_for_each_entry(ns, &ctrl->namespaces, list) - blk_mq_quiesce_queue(ns->queue); + nvme_stop_ns_queue(ns); up_read(&ctrl->namespaces_rwsem); } EXPORT_SYMBOL_GPL(nvme_stop_queues); @@ -4559,11 +4628,25 @@ void nvme_start_queues(struct nvme_ctrl *ctrl) down_read(&ctrl->namespaces_rwsem); list_for_each_entry(ns, &ctrl->namespaces, list) - blk_mq_unquiesce_queue(ns->queue); + nvme_start_ns_queue(ns); up_read(&ctrl->namespaces_rwsem); } EXPORT_SYMBOL_GPL(nvme_start_queues); +void nvme_stop_admin_queue(struct nvme_ctrl *ctrl) +{ + if (!test_and_set_bit(NVME_CTRL_ADMIN_Q_STOPPED, &ctrl->flags)) + blk_mq_quiesce_queue(ctrl->admin_q); +} +EXPORT_SYMBOL_GPL(nvme_stop_admin_queue); + +void nvme_start_admin_queue(struct nvme_ctrl *ctrl) +{ + if (test_and_clear_bit(NVME_CTRL_ADMIN_Q_STOPPED, &ctrl->flags)) + blk_mq_unquiesce_queue(ctrl->admin_q); +} +EXPORT_SYMBOL_GPL(nvme_start_admin_queue); + void nvme_sync_io_queues(struct nvme_ctrl *ctrl) { struct nvme_ns *ns; diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c index 668c6bb7a567..c5a2b71c5268 100644 --- a/drivers/nvme/host/fabrics.c +++ b/drivers/nvme/host/fabrics.c @@ -548,6 +548,7 @@ static const match_table_t opt_tokens = { { NVMF_OPT_NR_POLL_QUEUES, "nr_poll_queues=%d" }, { NVMF_OPT_TOS, "tos=%d" }, { NVMF_OPT_FAIL_FAST_TMO, "fast_io_fail_tmo=%d" }, + { NVMF_OPT_DISCOVERY, "discovery" }, { NVMF_OPT_ERR, NULL } }; @@ -823,6 +824,9 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts, } opts->tos = token; break; + case NVMF_OPT_DISCOVERY: + opts->discovery_nqn = true; + break; default: pr_warn("unknown parameter or missing value '%s' in ctrl creation request\n", p); @@ -949,7 +953,7 @@ EXPORT_SYMBOL_GPL(nvmf_free_options); #define NVMF_ALLOWED_OPTS (NVMF_OPT_QUEUE_SIZE | NVMF_OPT_NR_IO_QUEUES | \ NVMF_OPT_KATO | NVMF_OPT_HOSTNQN | \ NVMF_OPT_HOST_ID | NVMF_OPT_DUP_CONNECT |\ - NVMF_OPT_DISABLE_SQFLOW |\ + NVMF_OPT_DISABLE_SQFLOW | NVMF_OPT_DISCOVERY |\ NVMF_OPT_FAIL_FAST_TMO) static struct nvme_ctrl * diff --git a/drivers/nvme/host/fabrics.h b/drivers/nvme/host/fabrics.h index a146cb903869..c3203ff1c654 100644 --- a/drivers/nvme/host/fabrics.h +++ b/drivers/nvme/host/fabrics.h @@ -67,6 +67,7 @@ enum { NVMF_OPT_TOS = 1 << 19, NVMF_OPT_FAIL_FAST_TMO = 1 << 20, NVMF_OPT_HOST_IFACE = 1 << 21, + NVMF_OPT_DISCOVERY = 1 << 22, }; /** @@ -178,6 +179,13 @@ nvmf_ctlr_matches_baseopts(struct nvme_ctrl *ctrl, return true; } +static inline char *nvmf_ctrl_subsysnqn(struct nvme_ctrl *ctrl) +{ + if (!ctrl->subsys) + return ctrl->opts->subsysnqn; + return ctrl->subsys->subnqn; +} + int nvmf_reg_read32(struct nvme_ctrl *ctrl, u32 off, u32 *val); int nvmf_reg_read64(struct nvme_ctrl *ctrl, u32 off, u64 *val); int nvmf_reg_write32(struct nvme_ctrl *ctrl, u32 off, u32 val); diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index b08a61ca283f..71b3108c22f0 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -16,6 +16,7 @@ #include <linux/nvme-fc.h> #include "fc.h" #include <scsi/scsi_transport_fc.h> +#include <linux/blk-mq-pci.h> /* *************************** Data Structures/Defines ****************** */ @@ -2382,7 +2383,7 @@ nvme_fc_ctrl_free(struct kref *ref) list_del(&ctrl->ctrl_list); spin_unlock_irqrestore(&ctrl->rport->lock, flags); - blk_mq_unquiesce_queue(ctrl->ctrl.admin_q); + nvme_start_admin_queue(&ctrl->ctrl); blk_cleanup_queue(ctrl->ctrl.admin_q); blk_cleanup_queue(ctrl->ctrl.fabrics_q); blk_mq_free_tag_set(&ctrl->admin_tag_set); @@ -2487,6 +2488,7 @@ __nvme_fc_abort_outstanding_ios(struct nvme_fc_ctrl *ctrl, bool start_queues) */ if (ctrl->ctrl.queue_count > 1) { nvme_stop_queues(&ctrl->ctrl); + nvme_sync_io_queues(&ctrl->ctrl); blk_mq_tagset_busy_iter(&ctrl->tag_set, nvme_fc_terminate_exchange, &ctrl->ctrl); blk_mq_tagset_wait_completed_request(&ctrl->tag_set); @@ -2509,7 +2511,8 @@ __nvme_fc_abort_outstanding_ios(struct nvme_fc_ctrl *ctrl, bool start_queues) /* * clean up the admin queue. Same thing as above. */ - blk_mq_quiesce_queue(ctrl->ctrl.admin_q); + nvme_stop_admin_queue(&ctrl->ctrl); + blk_sync_queue(ctrl->ctrl.admin_q); blk_mq_tagset_busy_iter(&ctrl->admin_tag_set, nvme_fc_terminate_exchange, &ctrl->ctrl); blk_mq_tagset_wait_completed_request(&ctrl->admin_tag_set); @@ -2839,6 +2842,28 @@ nvme_fc_complete_rq(struct request *rq) nvme_fc_ctrl_put(ctrl); } +static int nvme_fc_map_queues(struct blk_mq_tag_set *set) +{ + struct nvme_fc_ctrl *ctrl = set->driver_data; + int i; + + for (i = 0; i < set->nr_maps; i++) { + struct blk_mq_queue_map *map = &set->map[i]; + + if (!map->nr_queues) { + WARN_ON(i == HCTX_TYPE_DEFAULT); + continue; + } + + /* Call LLDD map queue functionality if defined */ + if (ctrl->lport->ops->map_queues) + ctrl->lport->ops->map_queues(&ctrl->lport->localport, + map); + else + blk_mq_map_queues(map); + } + return 0; +} static const struct blk_mq_ops nvme_fc_mq_ops = { .queue_rq = nvme_fc_queue_rq, @@ -2847,6 +2872,7 @@ static const struct blk_mq_ops nvme_fc_mq_ops = { .exit_request = nvme_fc_exit_request, .init_hctx = nvme_fc_init_hctx, .timeout = nvme_fc_timeout, + .map_queues = nvme_fc_map_queues, }; static int @@ -2951,6 +2977,13 @@ nvme_fc_recreate_io_queues(struct nvme_fc_ctrl *ctrl) if (ctrl->ctrl.queue_count == 1) return 0; + if (prior_ioq_cnt != nr_io_queues) { + dev_info(ctrl->ctrl.device, + "reconnect: revising io queue count from %d to %d\n", + prior_ioq_cnt, nr_io_queues); + blk_mq_update_nr_hw_queues(&ctrl->tag_set, nr_io_queues); + } + ret = nvme_fc_create_hw_io_queues(ctrl, ctrl->ctrl.sqsize + 1); if (ret) goto out_free_io_queues; @@ -2959,15 +2992,6 @@ nvme_fc_recreate_io_queues(struct nvme_fc_ctrl *ctrl) if (ret) goto out_delete_hw_queues; - if (prior_ioq_cnt != nr_io_queues) { - dev_info(ctrl->ctrl.device, - "reconnect: revising io queue count from %d to %d\n", - prior_ioq_cnt, nr_io_queues); - nvme_wait_freeze(&ctrl->ctrl); - blk_mq_update_nr_hw_queues(&ctrl->tag_set, nr_io_queues); - nvme_unfreeze(&ctrl->ctrl); - } - return 0; out_delete_hw_queues: @@ -3095,7 +3119,7 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl) ctrl->ctrl.max_hw_sectors = ctrl->ctrl.max_segments << (ilog2(SZ_4K) - 9); - blk_mq_unquiesce_queue(ctrl->ctrl.admin_q); + nvme_start_admin_queue(&ctrl->ctrl); ret = nvme_init_ctrl_finish(&ctrl->ctrl); if (ret || test_bit(ASSOC_FAILED, &ctrl->flags)) @@ -3249,7 +3273,7 @@ nvme_fc_delete_association(struct nvme_fc_ctrl *ctrl) nvme_fc_free_queue(&ctrl->queues[0]); /* re-enable the admin_q so anything new can fast fail */ - blk_mq_unquiesce_queue(ctrl->ctrl.admin_q); + nvme_start_admin_queue(&ctrl->ctrl); /* resume the io queues so that things will fast fail */ nvme_start_queues(&ctrl->ctrl); @@ -3572,7 +3596,7 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts, dev_info(ctrl->ctrl.device, "NVME-FC{%d}: new ctrl: NQN \"%s\"\n", - ctrl->cnum, ctrl->ctrl.opts->subsysnqn); + ctrl->cnum, nvmf_ctrl_subsysnqn(&ctrl->ctrl)); return &ctrl->ctrl; diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c index 5d7bc58a27bd..7f2071f2460c 100644 --- a/drivers/nvme/host/multipath.c +++ b/drivers/nvme/host/multipath.c @@ -85,8 +85,13 @@ void nvme_failover_req(struct request *req) } spin_lock_irqsave(&ns->head->requeue_lock, flags); - for (bio = req->bio; bio; bio = bio->bi_next) + for (bio = req->bio; bio; bio = bio->bi_next) { bio_set_dev(bio, ns->head->disk->part0); + if (bio->bi_opf & REQ_POLLED) { + bio->bi_opf &= ~REQ_POLLED; + bio->bi_cookie = BLK_QC_T_NONE; + } + } blk_steal_bios(&ns->head->requeue_list, req); spin_unlock_irqrestore(&ns->head->requeue_lock, flags); @@ -100,8 +105,11 @@ void nvme_kick_requeue_lists(struct nvme_ctrl *ctrl) down_read(&ctrl->namespaces_rwsem); list_for_each_entry(ns, &ctrl->namespaces, list) { - if (ns->head->disk) - kblockd_schedule_work(&ns->head->requeue_work); + if (!ns->head->disk) + continue; + kblockd_schedule_work(&ns->head->requeue_work); + if (ctrl->state == NVME_CTRL_LIVE) + disk_uevent(ns->head->disk, KOBJ_CHANGE); } up_read(&ctrl->namespaces_rwsem); } @@ -138,13 +146,12 @@ void nvme_mpath_clear_ctrl_paths(struct nvme_ctrl *ctrl) { struct nvme_ns *ns; - mutex_lock(&ctrl->scan_lock); down_read(&ctrl->namespaces_rwsem); - list_for_each_entry(ns, &ctrl->namespaces, list) - if (nvme_mpath_clear_current_path(ns)) - kblockd_schedule_work(&ns->head->requeue_work); + list_for_each_entry(ns, &ctrl->namespaces, list) { + nvme_mpath_clear_current_path(ns); + kblockd_schedule_work(&ns->head->requeue_work); + } up_read(&ctrl->namespaces_rwsem); - mutex_unlock(&ctrl->scan_lock); } void nvme_mpath_revalidate_paths(struct nvme_ns *ns) @@ -312,12 +319,11 @@ static bool nvme_available_path(struct nvme_ns_head *head) return false; } -static blk_qc_t nvme_ns_head_submit_bio(struct bio *bio) +static void nvme_ns_head_submit_bio(struct bio *bio) { struct nvme_ns_head *head = bio->bi_bdev->bd_disk->private_data; struct device *dev = disk_to_dev(head->disk); struct nvme_ns *ns; - blk_qc_t ret = BLK_QC_T_NONE; int srcu_idx; /* @@ -334,7 +340,7 @@ static blk_qc_t nvme_ns_head_submit_bio(struct bio *bio) bio->bi_opf |= REQ_NVME_MPATH; trace_block_bio_remap(bio, disk_devt(ns->head->disk), bio->bi_iter.bi_sector); - ret = submit_bio_noacct(bio); + submit_bio_noacct(bio); } else if (nvme_available_path(head)) { dev_warn_ratelimited(dev, "no usable path - requeuing I/O\n"); @@ -349,7 +355,6 @@ static blk_qc_t nvme_ns_head_submit_bio(struct bio *bio) } srcu_read_unlock(&head->srcu, srcu_idx); - return ret; } static int nvme_ns_head_open(struct block_device *bdev, fmode_t mode) @@ -431,8 +436,6 @@ static int nvme_add_ns_head_cdev(struct nvme_ns_head *head) return ret; ret = nvme_cdev_add(&head->cdev, &head->cdev_device, &nvme_ns_head_chr_fops, THIS_MODULE); - if (ret) - kfree_const(head->cdev_device.kobj.name); return ret; } @@ -481,6 +484,15 @@ int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl, struct nvme_ns_head *head) blk_queue_flag_set(QUEUE_FLAG_NONROT, head->disk->queue); blk_queue_flag_set(QUEUE_FLAG_NOWAIT, head->disk->queue); + /* + * This assumes all controllers that refer to a namespace either + * support poll queues or not. That is not a strict guarantee, + * but if the assumption is wrong the effect is only suboptimal + * performance but not correctness problem. + */ + if (ctrl->tagset->nr_maps > HCTX_TYPE_POLL && + ctrl->tagset->map[HCTX_TYPE_POLL].nr_queues) + blk_queue_flag_set(QUEUE_FLAG_POLL, head->disk->queue); /* set to a default value of 512 until the disk is validated */ blk_queue_logical_block_size(head->disk->queue, 512); @@ -496,13 +508,23 @@ int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl, struct nvme_ns_head *head) static void nvme_mpath_set_live(struct nvme_ns *ns) { struct nvme_ns_head *head = ns->head; + int rc; if (!head->disk) return; + /* + * test_and_set_bit() is used because it is protecting against two nvme + * paths simultaneously calling device_add_disk() on the same namespace + * head. + */ if (!test_and_set_bit(NVME_NSHEAD_DISK_LIVE, &head->flags)) { - device_add_disk(&head->subsys->dev, head->disk, - nvme_ns_id_attr_groups); + rc = device_add_disk(&head->subsys->dev, head->disk, + nvme_ns_id_attr_groups); + if (rc) { + clear_bit(NVME_NSHEAD_DISK_LIVE, &ns->flags); + return; + } nvme_add_ns_head_cdev(head); } @@ -540,7 +562,7 @@ static int nvme_parse_ana_log(struct nvme_ctrl *ctrl, void *data, return -EINVAL; nr_nsids = le32_to_cpu(desc->nnsids); - nsid_buf_size = nr_nsids * sizeof(__le32); + nsid_buf_size = flex_array_size(desc, nsids, nr_nsids); if (WARN_ON_ONCE(desc->grpid == 0)) return -EINVAL; @@ -600,14 +622,17 @@ static int nvme_update_ana_state(struct nvme_ctrl *ctrl, down_read(&ctrl->namespaces_rwsem); list_for_each_entry(ns, &ctrl->namespaces, list) { - unsigned nsid = le32_to_cpu(desc->nsids[n]); - + unsigned nsid; +again: + nsid = le32_to_cpu(desc->nsids[n]); if (ns->head->ns_id < nsid) continue; if (ns->head->ns_id == nsid) nvme_update_ns_ana_state(desc, ns); if (++n == nr_nsids) break; + if (ns->head->ns_id > nsid) + goto again; } up_read(&ctrl->namespaces_rwsem); return 0; diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index 9871c0c9374c..b334af8aa264 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -138,6 +138,12 @@ enum nvme_quirks { * 48 bits. */ NVME_QUIRK_DMA_ADDRESS_BITS_48 = (1 << 16), + + /* + * The controller requires the command_id value be be limited, so skip + * encoding the generation sequence number. + */ + NVME_QUIRK_SKIP_CID_GEN = (1 << 17), }; /* @@ -336,6 +342,7 @@ struct nvme_ctrl { int nr_reconnects; unsigned long flags; #define NVME_CTRL_FAILFAST_EXPIRED 0 +#define NVME_CTRL_ADMIN_Q_STOPPED 1 struct nvmf_ctrl_options *opts; struct page *discard_page; @@ -366,6 +373,7 @@ struct nvme_subsystem { char model[40]; char firmware_rev[8]; u8 cmic; + enum nvme_subsys_type subtype; u16 vendor_id; u16 awupf; /* 0's based awupf value. */ struct ida ns_ida; @@ -457,6 +465,7 @@ struct nvme_ns { #define NVME_NS_ANA_PENDING 2 #define NVME_NS_FORCE_RO 3 #define NVME_NS_READY 4 +#define NVME_NS_STOPPED 5 struct cdev cdev; struct device cdev_device; @@ -632,6 +641,20 @@ static inline bool nvme_is_aen_req(u16 qid, __u16 command_id) } void nvme_complete_rq(struct request *req); +void nvme_complete_batch_req(struct request *req); + +static __always_inline void nvme_complete_batch(struct io_comp_batch *iob, + void (*fn)(struct request *rq)) +{ + struct request *req; + + rq_list_for_each(&iob->req_list, req) { + fn(req); + nvme_complete_batch_req(req); + } + blk_mq_end_request_batch(iob); +} + blk_status_t nvme_host_path_error(struct request *req); bool nvme_cancel_request(struct request *req, void *data, bool reserved); void nvme_cancel_tagset(struct nvme_ctrl *ctrl); @@ -659,6 +682,8 @@ void nvme_complete_async_event(struct nvme_ctrl *ctrl, __le16 status, void nvme_stop_queues(struct nvme_ctrl *ctrl); void nvme_start_queues(struct nvme_ctrl *ctrl); +void nvme_stop_admin_queue(struct nvme_ctrl *ctrl); +void nvme_start_admin_queue(struct nvme_ctrl *ctrl); void nvme_kill_queues(struct nvme_ctrl *ctrl); void nvme_sync_queues(struct nvme_ctrl *ctrl); void nvme_sync_io_queues(struct nvme_ctrl *ctrl); diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index b82492cd7503..ca2ee806d74b 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -10,6 +10,7 @@ #include <linux/blkdev.h> #include <linux/blk-mq.h> #include <linux/blk-mq-pci.h> +#include <linux/blk-integrity.h> #include <linux/dmi.h> #include <linux/init.h> #include <linux/interrupt.h> @@ -244,8 +245,15 @@ static int nvme_dbbuf_dma_alloc(struct nvme_dev *dev) { unsigned int mem_size = nvme_dbbuf_size(dev); - if (dev->dbbuf_dbs) + if (dev->dbbuf_dbs) { + /* + * Clear the dbbuf memory so the driver doesn't observe stale + * values from the previous instantiation. + */ + memset(dev->dbbuf_dbs, 0, mem_size); + memset(dev->dbbuf_eis, 0, mem_size); return 0; + } dev->dbbuf_dbs = dma_alloc_coherent(dev->dev, mem_size, &dev->dbbuf_dbs_dma_addr, @@ -958,7 +966,7 @@ out_free_cmd: return ret; } -static void nvme_pci_complete_rq(struct request *req) +static __always_inline void nvme_pci_unmap_rq(struct request *req) { struct nvme_iod *iod = blk_mq_rq_to_pdu(req); struct nvme_dev *dev = iod->nvmeq->dev; @@ -968,9 +976,19 @@ static void nvme_pci_complete_rq(struct request *req) rq_integrity_vec(req)->bv_len, rq_data_dir(req)); if (blk_rq_nr_phys_segments(req)) nvme_unmap_data(dev, req); +} + +static void nvme_pci_complete_rq(struct request *req) +{ + nvme_pci_unmap_rq(req); nvme_complete_rq(req); } +static void nvme_pci_complete_batch(struct io_comp_batch *iob) +{ + nvme_complete_batch(iob, nvme_pci_unmap_rq); +} + /* We read the CQE phase first to check if the rest of the entry is valid */ static inline bool nvme_cqe_pending(struct nvme_queue *nvmeq) { @@ -995,7 +1013,8 @@ static inline struct blk_mq_tags *nvme_queue_tagset(struct nvme_queue *nvmeq) return nvmeq->dev->tagset.tags[nvmeq->qid - 1]; } -static inline void nvme_handle_cqe(struct nvme_queue *nvmeq, u16 idx) +static inline void nvme_handle_cqe(struct nvme_queue *nvmeq, + struct io_comp_batch *iob, u16 idx) { struct nvme_completion *cqe = &nvmeq->cqes[idx]; __u16 command_id = READ_ONCE(cqe->command_id); @@ -1022,7 +1041,9 @@ static inline void nvme_handle_cqe(struct nvme_queue *nvmeq, u16 idx) } trace_nvme_sq(req, cqe->sq_head, nvmeq->sq_tail); - if (!nvme_try_complete_req(req, cqe->status, cqe->result)) + if (!nvme_try_complete_req(req, cqe->status, cqe->result) && + !blk_mq_add_to_batch(req, iob, nvme_req(req)->status, + nvme_pci_complete_batch)) nvme_pci_complete_rq(req); } @@ -1038,7 +1059,8 @@ static inline void nvme_update_cq_head(struct nvme_queue *nvmeq) } } -static inline int nvme_process_cq(struct nvme_queue *nvmeq) +static inline int nvme_poll_cq(struct nvme_queue *nvmeq, + struct io_comp_batch *iob) { int found = 0; @@ -1049,7 +1071,7 @@ static inline int nvme_process_cq(struct nvme_queue *nvmeq) * the cqe requires a full read memory barrier */ dma_rmb(); - nvme_handle_cqe(nvmeq, nvmeq->cq_head); + nvme_handle_cqe(nvmeq, iob, nvmeq->cq_head); nvme_update_cq_head(nvmeq); } @@ -1061,9 +1083,13 @@ static inline int nvme_process_cq(struct nvme_queue *nvmeq) static irqreturn_t nvme_irq(int irq, void *data) { struct nvme_queue *nvmeq = data; + DEFINE_IO_COMP_BATCH(iob); - if (nvme_process_cq(nvmeq)) + if (nvme_poll_cq(nvmeq, &iob)) { + if (!rq_list_empty(iob.req_list)) + nvme_pci_complete_batch(&iob); return IRQ_HANDLED; + } return IRQ_NONE; } @@ -1087,11 +1113,11 @@ static void nvme_poll_irqdisable(struct nvme_queue *nvmeq) WARN_ON_ONCE(test_bit(NVMEQ_POLLED, &nvmeq->flags)); disable_irq(pci_irq_vector(pdev, nvmeq->cq_vector)); - nvme_process_cq(nvmeq); + nvme_poll_cq(nvmeq, NULL); enable_irq(pci_irq_vector(pdev, nvmeq->cq_vector)); } -static int nvme_poll(struct blk_mq_hw_ctx *hctx) +static int nvme_poll(struct blk_mq_hw_ctx *hctx, struct io_comp_batch *iob) { struct nvme_queue *nvmeq = hctx->driver_data; bool found; @@ -1100,7 +1126,7 @@ static int nvme_poll(struct blk_mq_hw_ctx *hctx) return 0; spin_lock(&nvmeq->cq_poll_lock); - found = nvme_process_cq(nvmeq); + found = nvme_poll_cq(nvmeq, iob); spin_unlock(&nvmeq->cq_poll_lock); return found; @@ -1273,7 +1299,7 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved) * Did we miss an interrupt? */ if (test_bit(NVMEQ_POLLED, &nvmeq->flags)) - nvme_poll(req->mq_hctx); + nvme_poll(req->mq_hctx, NULL); else nvme_poll_irqdisable(nvmeq); @@ -1330,7 +1356,7 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved) iod->aborted = 1; cmd.abort.opcode = nvme_admin_abort_cmd; - cmd.abort.cid = req->tag; + cmd.abort.cid = nvme_cid(req); cmd.abort.sqid = cpu_to_le16(nvmeq->qid); dev_warn(nvmeq->dev->ctrl.device, @@ -1395,7 +1421,7 @@ static int nvme_suspend_queue(struct nvme_queue *nvmeq) nvmeq->dev->online_queues--; if (!nvmeq->qid && nvmeq->dev->ctrl.admin_q) - blk_mq_quiesce_queue(nvmeq->dev->ctrl.admin_q); + nvme_stop_admin_queue(&nvmeq->dev->ctrl); if (!test_and_clear_bit(NVMEQ_POLLED, &nvmeq->flags)) pci_free_irq(to_pci_dev(nvmeq->dev->dev), nvmeq->cq_vector, nvmeq); return 0; @@ -1433,7 +1459,7 @@ static void nvme_reap_pending_cqes(struct nvme_dev *dev) for (i = dev->ctrl.queue_count - 1; i > 0; i--) { spin_lock(&dev->queues[i].cq_poll_lock); - nvme_process_cq(&dev->queues[i]); + nvme_poll_cq(&dev->queues[i], NULL); spin_unlock(&dev->queues[i].cq_poll_lock); } } @@ -1654,7 +1680,7 @@ static void nvme_dev_remove_admin(struct nvme_dev *dev) * user requests may be waiting on a stopped queue. Start the * queue to flush these to completion. */ - blk_mq_unquiesce_queue(dev->ctrl.admin_q); + nvme_start_admin_queue(&dev->ctrl); blk_cleanup_queue(dev->ctrl.admin_q); blk_mq_free_tag_set(&dev->admin_tagset); } @@ -1688,7 +1714,7 @@ static int nvme_alloc_admin_tags(struct nvme_dev *dev) return -ENODEV; } } else - blk_mq_unquiesce_queue(dev->ctrl.admin_q); + nvme_start_admin_queue(&dev->ctrl); return 0; } @@ -2623,7 +2649,7 @@ static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown) if (shutdown) { nvme_start_queues(&dev->ctrl); if (dev->ctrl.admin_q && !blk_queue_dying(dev->ctrl.admin_q)) - blk_mq_unquiesce_queue(dev->ctrl.admin_q); + nvme_start_admin_queue(&dev->ctrl); } mutex_unlock(&dev->shutdown_lock); } @@ -3369,7 +3395,8 @@ static const struct pci_device_id nvme_id_table[] = { { PCI_DEVICE(PCI_VENDOR_ID_APPLE, 0x2005), .driver_data = NVME_QUIRK_SINGLE_VECTOR | NVME_QUIRK_128_BYTES_SQES | - NVME_QUIRK_SHARED_TAGS }, + NVME_QUIRK_SHARED_TAGS | + NVME_QUIRK_SKIP_CID_GEN }, { PCI_DEVICE_CLASS(PCI_CLASS_STORAGE_EXPRESS, 0xffffff) }, { 0, } diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index a68704e39084..850f84d204d0 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -13,6 +13,7 @@ #include <linux/atomic.h> #include <linux/blk-mq.h> #include <linux/blk-mq-rdma.h> +#include <linux/blk-integrity.h> #include <linux/types.h> #include <linux/list.h> #include <linux/mutex.h> @@ -656,8 +657,8 @@ static void nvme_rdma_free_queue(struct nvme_rdma_queue *queue) if (!test_and_clear_bit(NVME_RDMA_Q_ALLOCATED, &queue->flags)) return; - nvme_rdma_destroy_queue_ib(queue); rdma_destroy_id(queue->cm_id); + nvme_rdma_destroy_queue_ib(queue); mutex_destroy(&queue->queue_lock); } @@ -918,7 +919,7 @@ static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl, else ctrl->ctrl.max_integrity_segments = 0; - blk_mq_unquiesce_queue(ctrl->ctrl.admin_q); + nvme_start_admin_queue(&ctrl->ctrl); error = nvme_init_ctrl_finish(&ctrl->ctrl); if (error) @@ -927,7 +928,7 @@ static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl, return 0; out_quiesce_queue: - blk_mq_quiesce_queue(ctrl->ctrl.admin_q); + nvme_stop_admin_queue(&ctrl->ctrl); blk_sync_queue(ctrl->ctrl.admin_q); out_stop_queue: nvme_rdma_stop_queue(&ctrl->queues[0]); @@ -1025,12 +1026,12 @@ out_free_io_queues: static void nvme_rdma_teardown_admin_queue(struct nvme_rdma_ctrl *ctrl, bool remove) { - blk_mq_quiesce_queue(ctrl->ctrl.admin_q); + nvme_stop_admin_queue(&ctrl->ctrl); blk_sync_queue(ctrl->ctrl.admin_q); nvme_rdma_stop_queue(&ctrl->queues[0]); nvme_cancel_admin_tagset(&ctrl->ctrl); if (remove) - blk_mq_unquiesce_queue(ctrl->ctrl.admin_q); + nvme_start_admin_queue(&ctrl->ctrl); nvme_rdma_destroy_admin_queue(ctrl, remove); } @@ -1095,11 +1096,13 @@ static int nvme_rdma_setup_ctrl(struct nvme_rdma_ctrl *ctrl, bool new) return ret; if (ctrl->ctrl.icdoff) { + ret = -EOPNOTSUPP; dev_err(ctrl->ctrl.device, "icdoff is not supported!\n"); goto destroy_admin; } if (!(ctrl->ctrl.sgls & (1 << 2))) { + ret = -EOPNOTSUPP; dev_err(ctrl->ctrl.device, "Mandatory keyed sgls are not supported!\n"); goto destroy_admin; @@ -1111,6 +1114,13 @@ static int nvme_rdma_setup_ctrl(struct nvme_rdma_ctrl *ctrl, bool new) ctrl->ctrl.opts->queue_size, ctrl->ctrl.sqsize + 1); } + if (ctrl->ctrl.sqsize + 1 > NVME_RDMA_MAX_QUEUE_SIZE) { + dev_warn(ctrl->ctrl.device, + "ctrl sqsize %u > max queue size %u, clamping down\n", + ctrl->ctrl.sqsize + 1, NVME_RDMA_MAX_QUEUE_SIZE); + ctrl->ctrl.sqsize = NVME_RDMA_MAX_QUEUE_SIZE - 1; + } + if (ctrl->ctrl.sqsize + 1 > ctrl->ctrl.maxcmd) { dev_warn(ctrl->ctrl.device, "sqsize %u > ctrl maxcmd %u, clamping down\n", @@ -1153,7 +1163,7 @@ destroy_io: nvme_rdma_destroy_io_queues(ctrl, new); } destroy_admin: - blk_mq_quiesce_queue(ctrl->ctrl.admin_q); + nvme_stop_admin_queue(&ctrl->ctrl); blk_sync_queue(ctrl->ctrl.admin_q); nvme_rdma_stop_queue(&ctrl->queues[0]); nvme_cancel_admin_tagset(&ctrl->ctrl); @@ -1193,7 +1203,7 @@ static void nvme_rdma_error_recovery_work(struct work_struct *work) nvme_rdma_teardown_io_queues(ctrl, false); nvme_start_queues(&ctrl->ctrl); nvme_rdma_teardown_admin_queue(ctrl, false); - blk_mq_unquiesce_queue(ctrl->ctrl.admin_q); + nvme_start_admin_queue(&ctrl->ctrl); if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_CONNECTING)) { /* state change failure is ok if we started ctrl delete */ @@ -1815,14 +1825,10 @@ static int nvme_rdma_conn_established(struct nvme_rdma_queue *queue) for (i = 0; i < queue->queue_size; i++) { ret = nvme_rdma_post_recv(queue, &queue->rsp_ring[i]); if (ret) - goto out_destroy_queue_ib; + return ret; } return 0; - -out_destroy_queue_ib: - nvme_rdma_destroy_queue_ib(queue); - return ret; } static int nvme_rdma_conn_rejected(struct nvme_rdma_queue *queue, @@ -1916,14 +1922,10 @@ static int nvme_rdma_route_resolved(struct nvme_rdma_queue *queue) if (ret) { dev_err(ctrl->ctrl.device, "rdma_connect_locked failed (%d).\n", ret); - goto out_destroy_queue_ib; + return ret; } return 0; - -out_destroy_queue_ib: - nvme_rdma_destroy_queue_ib(queue); - return ret; } static int nvme_rdma_cm_handler(struct rdma_cm_id *cm_id, @@ -1954,8 +1956,6 @@ static int nvme_rdma_cm_handler(struct rdma_cm_id *cm_id, case RDMA_CM_EVENT_ROUTE_ERROR: case RDMA_CM_EVENT_CONNECT_ERROR: case RDMA_CM_EVENT_UNREACHABLE: - nvme_rdma_destroy_queue_ib(queue); - fallthrough; case RDMA_CM_EVENT_ADDR_ERROR: dev_dbg(queue->ctrl->ctrl.device, "CM error event %d\n", ev->event); @@ -2115,7 +2115,7 @@ unmap_qe: return ret; } -static int nvme_rdma_poll(struct blk_mq_hw_ctx *hctx) +static int nvme_rdma_poll(struct blk_mq_hw_ctx *hctx, struct io_comp_batch *iob) { struct nvme_rdma_queue *queue = hctx->driver_data; @@ -2241,7 +2241,7 @@ static void nvme_rdma_shutdown_ctrl(struct nvme_rdma_ctrl *ctrl, bool shutdown) cancel_delayed_work_sync(&ctrl->reconnect_work); nvme_rdma_teardown_io_queues(ctrl, shutdown); - blk_mq_quiesce_queue(ctrl->ctrl.admin_q); + nvme_stop_admin_queue(&ctrl->ctrl); if (shutdown) nvme_shutdown_ctrl(&ctrl->ctrl); else @@ -2395,7 +2395,7 @@ static struct nvme_ctrl *nvme_rdma_create_ctrl(struct device *dev, goto out_uninit_ctrl; dev_info(ctrl->ctrl.device, "new ctrl: NQN \"%s\", addr %pISpcs\n", - ctrl->ctrl.opts->subsysnqn, &ctrl->addr); + nvmf_ctrl_subsysnqn(&ctrl->ctrl), &ctrl->addr); mutex_lock(&nvme_rdma_ctrl_mutex); list_add_tail(&ctrl->list, &nvme_rdma_ctrl_list); diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index e2ab12f3f51c..33bc83d8d992 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -274,6 +274,12 @@ static inline void nvme_tcp_send_all(struct nvme_tcp_queue *queue) } while (ret > 0); } +static inline bool nvme_tcp_queue_more(struct nvme_tcp_queue *queue) +{ + return !list_empty(&queue->send_list) || + !llist_empty(&queue->req_list) || queue->more_requests; +} + static inline void nvme_tcp_queue_request(struct nvme_tcp_request *req, bool sync, bool last) { @@ -294,9 +300,10 @@ static inline void nvme_tcp_queue_request(struct nvme_tcp_request *req, nvme_tcp_send_all(queue); queue->more_requests = false; mutex_unlock(&queue->send_mutex); - } else if (last) { - queue_work_on(queue->io_cpu, nvme_tcp_wq, &queue->io_work); } + + if (last && nvme_tcp_queue_more(queue)) + queue_work_on(queue->io_cpu, nvme_tcp_wq, &queue->io_work); } static void nvme_tcp_process_req_list(struct nvme_tcp_queue *queue) @@ -613,7 +620,7 @@ static int nvme_tcp_setup_h2c_data_pdu(struct nvme_tcp_request *req, cpu_to_le32(data->hdr.hlen + hdgst + req->pdu_len + ddgst); data->ttag = pdu->ttag; data->command_id = nvme_cid(rq); - data->data_offset = cpu_to_le32(req->data_sent); + data->data_offset = pdu->r2t_offset; data->data_length = cpu_to_le32(req->pdu_len); return 0; } @@ -906,12 +913,6 @@ done: read_unlock_bh(&sk->sk_callback_lock); } -static inline bool nvme_tcp_queue_more(struct nvme_tcp_queue *queue) -{ - return !list_empty(&queue->send_list) || - !llist_empty(&queue->req_list) || queue->more_requests; -} - static inline void nvme_tcp_done_send_req(struct nvme_tcp_queue *queue) { queue->request = NULL; @@ -925,12 +926,14 @@ static void nvme_tcp_fail_request(struct nvme_tcp_request *req) static int nvme_tcp_try_send_data(struct nvme_tcp_request *req) { struct nvme_tcp_queue *queue = req->queue; + int req_data_len = req->data_len; while (true) { struct page *page = nvme_tcp_req_cur_page(req); size_t offset = nvme_tcp_req_cur_offset(req); size_t len = nvme_tcp_req_cur_length(req); bool last = nvme_tcp_pdu_last_send(req, len); + int req_data_sent = req->data_sent; int ret, flags = MSG_DONTWAIT; if (last && !queue->data_digest && !nvme_tcp_queue_more(queue)) @@ -952,7 +955,15 @@ static int nvme_tcp_try_send_data(struct nvme_tcp_request *req) nvme_tcp_ddgst_update(queue->snd_hash, page, offset, ret); - /* fully successful last write*/ + /* + * update the request iterator except for the last payload send + * in the request where we don't want to modify it as we may + * compete with the RX path completing the request. + */ + if (req_data_sent + ret < req_data_len) + nvme_tcp_advance_req(req, ret); + + /* fully successful last send in current PDU */ if (last && ret == len) { if (queue->data_digest) { nvme_tcp_ddgst_final(queue->snd_hash, @@ -964,7 +975,6 @@ static int nvme_tcp_try_send_data(struct nvme_tcp_request *req) } return 1; } - nvme_tcp_advance_req(req, ret); } return -EAGAIN; } @@ -1040,10 +1050,11 @@ static int nvme_tcp_try_send_data_pdu(struct nvme_tcp_request *req) static int nvme_tcp_try_send_ddgst(struct nvme_tcp_request *req) { struct nvme_tcp_queue *queue = req->queue; + size_t offset = req->offset; int ret; struct msghdr msg = { .msg_flags = MSG_DONTWAIT }; struct kvec iov = { - .iov_base = &req->ddgst + req->offset, + .iov_base = (u8 *)&req->ddgst + req->offset, .iov_len = NVME_TCP_DIGEST_LENGTH - req->offset }; @@ -1056,7 +1067,7 @@ static int nvme_tcp_try_send_ddgst(struct nvme_tcp_request *req) if (unlikely(ret <= 0)) return ret; - if (req->offset + ret == NVME_TCP_DIGEST_LENGTH) { + if (offset + ret == NVME_TCP_DIGEST_LENGTH) { nvme_tcp_done_send_req(queue); return 1; } @@ -1145,8 +1156,7 @@ static void nvme_tcp_io_work(struct work_struct *w) pending = true; else if (unlikely(result < 0)) break; - } else - pending = !llist_empty(&queue->req_list); + } result = nvme_tcp_try_recv(queue); if (result > 0) @@ -1908,7 +1918,7 @@ static int nvme_tcp_configure_admin_queue(struct nvme_ctrl *ctrl, bool new) if (error) goto out_stop_queue; - blk_mq_unquiesce_queue(ctrl->admin_q); + nvme_start_admin_queue(ctrl); error = nvme_init_ctrl_finish(ctrl); if (error) @@ -1917,7 +1927,7 @@ static int nvme_tcp_configure_admin_queue(struct nvme_ctrl *ctrl, bool new) return 0; out_quiesce_queue: - blk_mq_quiesce_queue(ctrl->admin_q); + nvme_stop_admin_queue(ctrl); blk_sync_queue(ctrl->admin_q); out_stop_queue: nvme_tcp_stop_queue(ctrl, 0); @@ -1939,12 +1949,12 @@ out_free_queue: static void nvme_tcp_teardown_admin_queue(struct nvme_ctrl *ctrl, bool remove) { - blk_mq_quiesce_queue(ctrl->admin_q); + nvme_stop_admin_queue(ctrl); blk_sync_queue(ctrl->admin_q); nvme_tcp_stop_queue(ctrl, 0); nvme_cancel_admin_tagset(ctrl); if (remove) - blk_mq_unquiesce_queue(ctrl->admin_q); + nvme_start_admin_queue(ctrl); nvme_tcp_destroy_admin_queue(ctrl, remove); } @@ -1953,7 +1963,7 @@ static void nvme_tcp_teardown_io_queues(struct nvme_ctrl *ctrl, { if (ctrl->queue_count <= 1) return; - blk_mq_quiesce_queue(ctrl->admin_q); + nvme_stop_admin_queue(ctrl); nvme_start_freeze(ctrl); nvme_stop_queues(ctrl); nvme_sync_io_queues(ctrl); @@ -2048,7 +2058,7 @@ destroy_io: nvme_tcp_destroy_io_queues(ctrl, new); } destroy_admin: - blk_mq_quiesce_queue(ctrl->admin_q); + nvme_stop_admin_queue(ctrl); blk_sync_queue(ctrl->admin_q); nvme_tcp_stop_queue(ctrl, 0); nvme_cancel_admin_tagset(ctrl); @@ -2091,7 +2101,7 @@ static void nvme_tcp_error_recovery_work(struct work_struct *work) /* unquiesce to fail fast pending requests */ nvme_start_queues(ctrl); nvme_tcp_teardown_admin_queue(ctrl, false); - blk_mq_unquiesce_queue(ctrl->admin_q); + nvme_start_admin_queue(ctrl); if (!nvme_change_ctrl_state(ctrl, NVME_CTRL_CONNECTING)) { /* state change failure is ok if we started ctrl delete */ @@ -2109,7 +2119,7 @@ static void nvme_tcp_teardown_ctrl(struct nvme_ctrl *ctrl, bool shutdown) cancel_delayed_work_sync(&to_tcp_ctrl(ctrl)->connect_work); nvme_tcp_teardown_io_queues(ctrl, shutdown); - blk_mq_quiesce_queue(ctrl->admin_q); + nvme_stop_admin_queue(ctrl); if (shutdown) nvme_shutdown_ctrl(ctrl); else @@ -2422,7 +2432,7 @@ static int nvme_tcp_map_queues(struct blk_mq_tag_set *set) return 0; } -static int nvme_tcp_poll(struct blk_mq_hw_ctx *hctx) +static int nvme_tcp_poll(struct blk_mq_hw_ctx *hctx, struct io_comp_batch *iob) { struct nvme_tcp_queue *queue = hctx->driver_data; struct sock *sk = queue->sock->sk; @@ -2575,7 +2585,7 @@ static struct nvme_ctrl *nvme_tcp_create_ctrl(struct device *dev, goto out_uninit_ctrl; dev_info(ctrl->ctrl.device, "new ctrl: NQN \"%s\", addr %pISp\n", - ctrl->ctrl.opts->subsysnqn, &ctrl->addr); + nvmf_ctrl_subsysnqn(&ctrl->ctrl), &ctrl->addr); mutex_lock(&nvme_tcp_ctrl_mutex); list_add_tail(&ctrl->list, &nvme_tcp_ctrl_list); diff --git a/drivers/nvme/host/zns.c b/drivers/nvme/host/zns.c index d95010481fce..bfc259e0d7b8 100644 --- a/drivers/nvme/host/zns.c +++ b/drivers/nvme/host/zns.c @@ -233,6 +233,8 @@ out_free: blk_status_t nvme_setup_zone_mgmt_send(struct nvme_ns *ns, struct request *req, struct nvme_command *c, enum nvme_zone_mgmt_action action) { + memset(c, 0, sizeof(*c)); + c->zms.opcode = nvme_cmd_zone_mgmt_send; c->zms.nsid = cpu_to_le32(ns->head->ns_id); c->zms.slba = cpu_to_le64(nvme_sect_to_lba(ns, blk_rq_pos(req))); |