diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2018-10-22 19:46:08 +0300 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2018-10-22 19:46:08 +0300 |
commit | 6ab9e09238fdfd742fe23b81e2d385a1cab49d9b (patch) | |
tree | 38c27b6d70c68f1114df2f49f72f1f757b22cd07 /drivers/nvme | |
parent | 528985117126f11beea339cf39120ee99da04cd2 (diff) | |
parent | 52990a5fb0c991ecafebdab43138b5ed41376852 (diff) | |
download | linux-6ab9e09238fdfd742fe23b81e2d385a1cab49d9b.tar.xz |
Merge tag 'for-4.20/block-20181021' of git://git.kernel.dk/linux-block
Pull block layer updates from Jens Axboe:
"This is the main pull request for block changes for 4.20. This
contains:
- Series enabling runtime PM for blk-mq (Bart).
- Two pull requests from Christoph for NVMe, with items such as;
- Better AEN tracking
- Multipath improvements
- RDMA fixes
- Rework of FC for target removal
- Fixes for issues identified by static checkers
- Fabric cleanups, as prep for TCP transport
- Various cleanups and bug fixes
- Block merging cleanups (Christoph)
- Conversion of drivers to generic DMA mapping API (Christoph)
- Series fixing ref count issues with blkcg (Dennis)
- Series improving BFQ heuristics (Paolo, et al)
- Series improving heuristics for the Kyber IO scheduler (Omar)
- Removal of dangerous bio_rewind_iter() API (Ming)
- Apply single queue IPI redirection logic to blk-mq (Ming)
- Set of fixes and improvements for bcache (Coly et al)
- Series closing a hotplug race with sysfs group attributes (Hannes)
- Set of patches for lightnvm:
- pblk trace support (Hans)
- SPDX license header update (Javier)
- Tons of refactoring patches to cleanly abstract the 1.2 and 2.0
specs behind a common core interface. (Javier, Matias)
- Enable pblk to use a common interface to retrieve chunk metadata
(Matias)
- Bug fixes (Various)
- Set of fixes and updates to the blk IO latency target (Josef)
- blk-mq queue number updates fixes (Jianchao)
- Convert a bunch of drivers from the old legacy IO interface to
blk-mq. This will conclude with the removal of the legacy IO
interface itself in 4.21, with the rest of the drivers (me, Omar)
- Removal of the DAC960 driver. The SCSI tree will introduce two
replacement drivers for this (Hannes)"
* tag 'for-4.20/block-20181021' of git://git.kernel.dk/linux-block: (204 commits)
block: setup bounce bio_sets properly
blkcg: reassociate bios when make_request() is called recursively
blkcg: fix edge case for blk_get_rl() under memory pressure
nvme-fabrics: move controller options matching to fabrics
nvme-rdma: always have a valid trsvcid
mtip32xx: fully switch to the generic DMA API
rsxx: switch to the generic DMA API
umem: switch to the generic DMA API
sx8: switch to the generic DMA API
sx8: remove dead IF_64BIT_DMA_IS_POSSIBLE code
skd: switch to the generic DMA API
ubd: remove use of blk_rq_map_sg
nvme-pci: remove duplicate check
drivers/block: Remove DAC960 driver
nvme-pci: fix hot removal during error handling
nvmet-fcloop: suppress a compiler warning
nvme-core: make implicit seed truncation explicit
nvmet-fc: fix kernel-doc headers
nvme-fc: rework the request initialization code
nvme-fc: introduce struct nvme_fcp_op_w_sgl
...
Diffstat (limited to 'drivers/nvme')
-rw-r--r-- | drivers/nvme/host/core.c | 47 | ||||
-rw-r--r-- | drivers/nvme/host/fabrics.c | 37 | ||||
-rw-r--r-- | drivers/nvme/host/fabrics.h | 2 | ||||
-rw-r--r-- | drivers/nvme/host/fc.c | 153 | ||||
-rw-r--r-- | drivers/nvme/host/lightnvm.c | 137 | ||||
-rw-r--r-- | drivers/nvme/host/multipath.c | 79 | ||||
-rw-r--r-- | drivers/nvme/host/nvme.h | 35 | ||||
-rw-r--r-- | drivers/nvme/host/pci.c | 9 | ||||
-rw-r--r-- | drivers/nvme/host/rdma.c | 78 | ||||
-rw-r--r-- | drivers/nvme/host/trace.h | 28 | ||||
-rw-r--r-- | drivers/nvme/target/admin-cmd.c | 4 | ||||
-rw-r--r-- | drivers/nvme/target/core.c | 3 | ||||
-rw-r--r-- | drivers/nvme/target/discovery.c | 6 | ||||
-rw-r--r-- | drivers/nvme/target/fc.c | 136 | ||||
-rw-r--r-- | drivers/nvme/target/fcloop.c | 1 | ||||
-rw-r--r-- | drivers/nvme/target/io-cmd-bdev.c | 9 | ||||
-rw-r--r-- | drivers/nvme/target/io-cmd-file.c | 3 | ||||
-rw-r--r-- | drivers/nvme/target/nvmet.h | 1 | ||||
-rw-r--r-- | drivers/nvme/target/rdma.c | 19 |
19 files changed, 532 insertions, 255 deletions
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 6bb9908bf46f..9e4a30b05bd2 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -971,7 +971,7 @@ static int nvme_identify_ns_descs(struct nvme_ctrl *ctrl, unsigned nsid, uuid_copy(&ids->uuid, data + pos + sizeof(*cur)); break; default: - /* Skip unnkown types */ + /* Skip unknown types */ len = cur->nidl; break; } @@ -1132,7 +1132,7 @@ static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio) return nvme_submit_user_cmd(ns->queue, &c, (void __user *)(uintptr_t)io.addr, length, - metadata, meta_len, io.slba, NULL, 0); + metadata, meta_len, lower_32_bits(io.slba), NULL, 0); } static u32 nvme_known_admin_effects(u8 opcode) @@ -2076,7 +2076,7 @@ static void nvme_init_subnqn(struct nvme_subsystem *subsys, struct nvme_ctrl *ct nqnlen = strnlen(id->subnqn, NVMF_NQN_SIZE); if (nqnlen > 0 && nqnlen < NVMF_NQN_SIZE) { - strncpy(subsys->subnqn, id->subnqn, NVMF_NQN_SIZE); + strlcpy(subsys->subnqn, id->subnqn, NVMF_NQN_SIZE); return; } @@ -2729,11 +2729,19 @@ static umode_t nvme_ns_id_attrs_are_visible(struct kobject *kobj, return a->mode; } -const struct attribute_group nvme_ns_id_attr_group = { +static const struct attribute_group nvme_ns_id_attr_group = { .attrs = nvme_ns_id_attrs, .is_visible = nvme_ns_id_attrs_are_visible, }; +const struct attribute_group *nvme_ns_id_attr_groups[] = { + &nvme_ns_id_attr_group, +#ifdef CONFIG_NVM + &nvme_nvm_attr_group, +#endif + NULL, +}; + #define nvme_show_str_function(field) \ static ssize_t field##_show(struct device *dev, \ struct device_attribute *attr, char *buf) \ @@ -2900,9 +2908,14 @@ static struct nvme_ns_head *nvme_alloc_ns_head(struct nvme_ctrl *ctrl, unsigned nsid, struct nvme_id_ns *id) { struct nvme_ns_head *head; + size_t size = sizeof(*head); int ret = -ENOMEM; - head = kzalloc(sizeof(*head), GFP_KERNEL); +#ifdef CONFIG_NVME_MULTIPATH + size += num_possible_nodes() * sizeof(struct nvme_ns *); +#endif + + head = kzalloc(size, GFP_KERNEL); if (!head) goto out; ret = ida_simple_get(&ctrl->subsys->ns_ida, 1, 0, GFP_KERNEL); @@ -3099,14 +3112,7 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid) nvme_get_ctrl(ctrl); - device_add_disk(ctrl->device, ns->disk); - if (sysfs_create_group(&disk_to_dev(ns->disk)->kobj, - &nvme_ns_id_attr_group)) - pr_warn("%s: failed to create sysfs group for identification\n", - ns->disk->disk_name); - if (ns->ndev && nvme_nvm_register_sysfs(ns)) - pr_warn("%s: failed to register lightnvm sysfs group for identification\n", - ns->disk->disk_name); + device_add_disk(ctrl->device, ns->disk, nvme_ns_id_attr_groups); nvme_mpath_add_disk(ns, id); nvme_fault_inject_init(ns); @@ -3132,10 +3138,6 @@ static void nvme_ns_remove(struct nvme_ns *ns) nvme_fault_inject_fini(ns); if (ns->disk && ns->disk->flags & GENHD_FL_UP) { - sysfs_remove_group(&disk_to_dev(ns->disk)->kobj, - &nvme_ns_id_attr_group); - if (ns->ndev) - nvme_nvm_unregister_sysfs(ns); del_gendisk(ns->disk); blk_cleanup_queue(ns->queue); if (blk_get_integrity(ns->disk)) @@ -3411,16 +3413,21 @@ static void nvme_fw_act_work(struct work_struct *work) static void nvme_handle_aen_notice(struct nvme_ctrl *ctrl, u32 result) { - switch ((result & 0xff00) >> 8) { + u32 aer_notice_type = (result & 0xff00) >> 8; + + switch (aer_notice_type) { case NVME_AER_NOTICE_NS_CHANGED: + trace_nvme_async_event(ctrl, aer_notice_type); set_bit(NVME_AER_NOTICE_NS_CHANGED, &ctrl->events); nvme_queue_scan(ctrl); break; case NVME_AER_NOTICE_FW_ACT_STARTING: + trace_nvme_async_event(ctrl, aer_notice_type); queue_work(nvme_wq, &ctrl->fw_act_work); break; #ifdef CONFIG_NVME_MULTIPATH case NVME_AER_NOTICE_ANA: + trace_nvme_async_event(ctrl, aer_notice_type); if (!ctrl->ana_log_buf) break; queue_work(nvme_wq, &ctrl->ana_work); @@ -3435,11 +3442,12 @@ void nvme_complete_async_event(struct nvme_ctrl *ctrl, __le16 status, volatile union nvme_result *res) { u32 result = le32_to_cpu(res->u32); + u32 aer_type = result & 0x07; if (le16_to_cpu(status) >> 1 != NVME_SC_SUCCESS) return; - switch (result & 0x7) { + switch (aer_type) { case NVME_AER_NOTICE: nvme_handle_aen_notice(ctrl, result); break; @@ -3447,6 +3455,7 @@ void nvme_complete_async_event(struct nvme_ctrl *ctrl, __le16 status, case NVME_AER_SMART: case NVME_AER_CSS: case NVME_AER_VS: + trace_nvme_async_event(ctrl, aer_type); ctrl->aen_result = result; break; default: diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c index 206d63cb1afc..bd0969db6225 100644 --- a/drivers/nvme/host/fabrics.c +++ b/drivers/nvme/host/fabrics.c @@ -552,8 +552,11 @@ blk_status_t nvmf_fail_nonready_command(struct nvme_ctrl *ctrl, ctrl->state != NVME_CTRL_DEAD && !blk_noretry_request(rq) && !(rq->cmd_flags & REQ_NVME_MPATH)) return BLK_STS_RESOURCE; - nvme_req(rq)->status = NVME_SC_ABORT_REQ; - return BLK_STS_IOERR; + + nvme_req(rq)->status = NVME_SC_HOST_PATH_ERROR; + blk_mq_start_request(rq); + nvme_complete_rq(rq); + return BLK_STS_OK; } EXPORT_SYMBOL_GPL(nvmf_fail_nonready_command); @@ -865,6 +868,36 @@ static int nvmf_check_required_opts(struct nvmf_ctrl_options *opts, return 0; } +bool nvmf_ip_options_match(struct nvme_ctrl *ctrl, + struct nvmf_ctrl_options *opts) +{ + if (!nvmf_ctlr_matches_baseopts(ctrl, opts) || + strcmp(opts->traddr, ctrl->opts->traddr) || + strcmp(opts->trsvcid, ctrl->opts->trsvcid)) + return false; + + /* + * Checking the local address is rough. In most cases, none is specified + * and the host port is selected by the stack. + * + * Assume no match if: + * - local address is specified and address is not the same + * - local address is not specified but remote is, or vice versa + * (admin using specific host_traddr when it matters). + */ + if ((opts->mask & NVMF_OPT_HOST_TRADDR) && + (ctrl->opts->mask & NVMF_OPT_HOST_TRADDR)) { + if (strcmp(opts->host_traddr, ctrl->opts->host_traddr)) + return false; + } else if ((opts->mask & NVMF_OPT_HOST_TRADDR) || + (ctrl->opts->mask & NVMF_OPT_HOST_TRADDR)) { + return false; + } + + return true; +} +EXPORT_SYMBOL_GPL(nvmf_ip_options_match); + static int nvmf_check_allowed_opts(struct nvmf_ctrl_options *opts, unsigned int allowed_opts) { diff --git a/drivers/nvme/host/fabrics.h b/drivers/nvme/host/fabrics.h index aa2fdb2a2e8f..6ea6275f332a 100644 --- a/drivers/nvme/host/fabrics.h +++ b/drivers/nvme/host/fabrics.h @@ -166,6 +166,8 @@ blk_status_t nvmf_fail_nonready_command(struct nvme_ctrl *ctrl, struct request *rq); bool __nvmf_check_ready(struct nvme_ctrl *ctrl, struct request *rq, bool queue_live); +bool nvmf_ip_options_match(struct nvme_ctrl *ctrl, + struct nvmf_ctrl_options *opts); static inline bool nvmf_check_ready(struct nvme_ctrl *ctrl, struct request *rq, bool queue_live) diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index 611e70cae754..e52b9d3c0bd6 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -20,6 +20,7 @@ #include <uapi/scsi/fc/fc_fs.h> #include <uapi/scsi/fc/fc_els.h> #include <linux/delay.h> +#include <linux/overflow.h> #include "nvme.h" #include "fabrics.h" @@ -104,6 +105,12 @@ struct nvme_fc_fcp_op { struct nvme_fc_ersp_iu rsp_iu; }; +struct nvme_fcp_op_w_sgl { + struct nvme_fc_fcp_op op; + struct scatterlist sgl[SG_CHUNK_SIZE]; + uint8_t priv[0]; +}; + struct nvme_fc_lport { struct nvme_fc_local_port localport; @@ -122,6 +129,7 @@ struct nvme_fc_rport { struct list_head endp_list; /* for lport->endp_list */ struct list_head ctrl_list; struct list_head ls_req_list; + struct list_head disc_list; struct device *dev; /* physical device for dma */ struct nvme_fc_lport *lport; spinlock_t lock; @@ -210,7 +218,6 @@ static DEFINE_IDA(nvme_fc_ctrl_cnt); * These items are short-term. They will eventually be moved into * a generic FC class. See comments in module init. */ -static struct class *fc_class; static struct device *fc_udev_device; @@ -317,7 +324,7 @@ out_done: * @template: LLDD entrypoints and operational parameters for the port * @dev: physical hardware device node port corresponds to. Will be * used for DMA mappings - * @lport_p: pointer to a local port pointer. Upon success, the routine + * @portptr: pointer to a local port pointer. Upon success, the routine * will allocate a nvme_fc_local_port structure and place its * address in the local port pointer. Upon failure, local port * pointer will be set to 0. @@ -425,8 +432,7 @@ EXPORT_SYMBOL_GPL(nvme_fc_register_localport); * nvme_fc_unregister_localport - transport entry point called by an * LLDD to deregister/remove a previously * registered a NVME host FC port. - * @localport: pointer to the (registered) local port that is to be - * deregistered. + * @portptr: pointer to the (registered) local port that is to be deregistered. * * Returns: * a completion status. Must be 0 upon success; a negative errno @@ -507,6 +513,7 @@ nvme_fc_free_rport(struct kref *ref) list_del(&rport->endp_list); spin_unlock_irqrestore(&nvme_fc_lock, flags); + WARN_ON(!list_empty(&rport->disc_list)); ida_simple_remove(&lport->endp_cnt, rport->remoteport.port_num); kfree(rport); @@ -631,7 +638,7 @@ __nvme_fc_set_dev_loss_tmo(struct nvme_fc_rport *rport, * @localport: pointer to the (registered) local port that the remote * subsystem port is connected to. * @pinfo: pointer to information about the port to be registered - * @rport_p: pointer to a remote port pointer. Upon success, the routine + * @portptr: pointer to a remote port pointer. Upon success, the routine * will allocate a nvme_fc_remote_port structure and place its * address in the remote port pointer. Upon failure, remote port * pointer will be set to 0. @@ -694,6 +701,7 @@ nvme_fc_register_remoteport(struct nvme_fc_local_port *localport, INIT_LIST_HEAD(&newrec->endp_list); INIT_LIST_HEAD(&newrec->ctrl_list); INIT_LIST_HEAD(&newrec->ls_req_list); + INIT_LIST_HEAD(&newrec->disc_list); kref_init(&newrec->ref); atomic_set(&newrec->act_ctrl_cnt, 0); spin_lock_init(&newrec->lock); @@ -807,8 +815,8 @@ nvme_fc_ctrl_connectivity_loss(struct nvme_fc_ctrl *ctrl) * nvme_fc_unregister_remoteport - transport entry point called by an * LLDD to deregister/remove a previously * registered a NVME subsystem FC port. - * @remoteport: pointer to the (registered) remote port that is to be - * deregistered. + * @portptr: pointer to the (registered) remote port that is to be + * deregistered. * * Returns: * a completion status. Must be 0 upon success; a negative errno @@ -1385,7 +1393,7 @@ nvme_fc_disconnect_assoc_done(struct nvmefc_ls_req *lsreq, int status) __nvme_fc_finish_ls_req(lsop); - /* fc-nvme iniator doesn't care about success or failure of cmd */ + /* fc-nvme initiator doesn't care about success or failure of cmd */ kfree(lsop); } @@ -1685,6 +1693,8 @@ __nvme_fc_init_request(struct nvme_fc_ctrl *ctrl, struct nvme_fc_queue *queue, struct nvme_fc_fcp_op *op, struct request *rq, u32 rqno) { + struct nvme_fcp_op_w_sgl *op_w_sgl = + container_of(op, typeof(*op_w_sgl), op); struct nvme_fc_cmd_iu *cmdiu = &op->cmd_iu; int ret = 0; @@ -1694,7 +1704,6 @@ __nvme_fc_init_request(struct nvme_fc_ctrl *ctrl, op->fcp_req.rspaddr = &op->rsp_iu; op->fcp_req.rsplen = sizeof(op->rsp_iu); op->fcp_req.done = nvme_fc_fcpio_done; - op->fcp_req.first_sgl = (struct scatterlist *)&op[1]; op->fcp_req.private = &op->fcp_req.first_sgl[SG_CHUNK_SIZE]; op->ctrl = ctrl; op->queue = queue; @@ -1733,12 +1742,17 @@ nvme_fc_init_request(struct blk_mq_tag_set *set, struct request *rq, unsigned int hctx_idx, unsigned int numa_node) { struct nvme_fc_ctrl *ctrl = set->driver_data; - struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(rq); + struct nvme_fcp_op_w_sgl *op = blk_mq_rq_to_pdu(rq); int queue_idx = (set == &ctrl->tag_set) ? hctx_idx + 1 : 0; struct nvme_fc_queue *queue = &ctrl->queues[queue_idx]; + int res; nvme_req(rq)->ctrl = &ctrl->ctrl; - return __nvme_fc_init_request(ctrl, queue, op, rq, queue->rqcnt++); + res = __nvme_fc_init_request(ctrl, queue, &op->op, rq, queue->rqcnt++); + if (res) + return res; + op->op.fcp_req.first_sgl = &op->sgl[0]; + return res; } static int @@ -1768,7 +1782,6 @@ nvme_fc_init_aen_ops(struct nvme_fc_ctrl *ctrl) } aen_op->flags = FCOP_FLAGS_AEN; - aen_op->fcp_req.first_sgl = NULL; /* no sg list */ aen_op->fcp_req.private = private; memset(sqe, 0, sizeof(*sqe)); @@ -2422,10 +2435,9 @@ nvme_fc_create_io_queues(struct nvme_fc_ctrl *ctrl) ctrl->tag_set.reserved_tags = 1; /* fabric connect */ ctrl->tag_set.numa_node = NUMA_NO_NODE; ctrl->tag_set.flags = BLK_MQ_F_SHOULD_MERGE; - ctrl->tag_set.cmd_size = sizeof(struct nvme_fc_fcp_op) + - (SG_CHUNK_SIZE * - sizeof(struct scatterlist)) + - ctrl->lport->ops->fcprqst_priv_sz; + ctrl->tag_set.cmd_size = + struct_size((struct nvme_fcp_op_w_sgl *)NULL, priv, + ctrl->lport->ops->fcprqst_priv_sz); ctrl->tag_set.driver_data = ctrl; ctrl->tag_set.nr_hw_queues = ctrl->ctrl.queue_count - 1; ctrl->tag_set.timeout = NVME_IO_TIMEOUT; @@ -3027,10 +3039,9 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts, ctrl->admin_tag_set.queue_depth = NVME_AQ_MQ_TAG_DEPTH; ctrl->admin_tag_set.reserved_tags = 2; /* fabric connect + Keep-Alive */ ctrl->admin_tag_set.numa_node = NUMA_NO_NODE; - ctrl->admin_tag_set.cmd_size = sizeof(struct nvme_fc_fcp_op) + - (SG_CHUNK_SIZE * - sizeof(struct scatterlist)) + - ctrl->lport->ops->fcprqst_priv_sz; + ctrl->admin_tag_set.cmd_size = + struct_size((struct nvme_fcp_op_w_sgl *)NULL, priv, + ctrl->lport->ops->fcprqst_priv_sz); ctrl->admin_tag_set.driver_data = ctrl; ctrl->admin_tag_set.nr_hw_queues = 1; ctrl->admin_tag_set.timeout = ADMIN_TIMEOUT; @@ -3159,7 +3170,7 @@ nvme_fc_parse_traddr(struct nvmet_fc_traddr *traddr, char *buf, size_t blen) substring_t wwn = { name, &name[sizeof(name)-1] }; int nnoffset, pnoffset; - /* validate it string one of the 2 allowed formats */ + /* validate if string is one of the 2 allowed formats */ if (strnlen(buf, blen) == NVME_FC_TRADDR_MAXLENGTH && !strncmp(buf, "nn-0x", NVME_FC_TRADDR_OXNNLEN) && !strncmp(&buf[NVME_FC_TRADDR_MAX_PN_OFFSET], @@ -3254,6 +3265,90 @@ static struct nvmf_transport_ops nvme_fc_transport = { .create_ctrl = nvme_fc_create_ctrl, }; +/* Arbitrary successive failures max. With lots of subsystems could be high */ +#define DISCOVERY_MAX_FAIL 20 + +static ssize_t nvme_fc_nvme_discovery_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) +{ + unsigned long flags; + LIST_HEAD(local_disc_list); + struct nvme_fc_lport *lport; + struct nvme_fc_rport *rport; + int failcnt = 0; + + spin_lock_irqsave(&nvme_fc_lock, flags); +restart: + list_for_each_entry(lport, &nvme_fc_lport_list, port_list) { + list_for_each_entry(rport, &lport->endp_list, endp_list) { + if (!nvme_fc_lport_get(lport)) + continue; + if (!nvme_fc_rport_get(rport)) { + /* + * This is a temporary condition. Upon restart + * this rport will be gone from the list. + * + * Revert the lport put and retry. Anything + * added to the list already will be skipped (as + * they are no longer list_empty). Loops should + * resume at rports that were not yet seen. + */ + nvme_fc_lport_put(lport); + + if (failcnt++ < DISCOVERY_MAX_FAIL) + goto restart; + + pr_err("nvme_discovery: too many reference " + "failures\n"); + goto process_local_list; + } + if (list_empty(&rport->disc_list)) + list_add_tail(&rport->disc_list, + &local_disc_list); + } + } + +process_local_list: + while (!list_empty(&local_disc_list)) { + rport = list_first_entry(&local_disc_list, + struct nvme_fc_rport, disc_list); + list_del_init(&rport->disc_list); + spin_unlock_irqrestore(&nvme_fc_lock, flags); + + lport = rport->lport; + /* signal discovery. Won't hurt if it repeats */ + nvme_fc_signal_discovery_scan(lport, rport); + nvme_fc_rport_put(rport); + nvme_fc_lport_put(lport); + + spin_lock_irqsave(&nvme_fc_lock, flags); + } + spin_unlock_irqrestore(&nvme_fc_lock, flags); + + return count; +} +static DEVICE_ATTR(nvme_discovery, 0200, NULL, nvme_fc_nvme_discovery_store); + +static struct attribute *nvme_fc_attrs[] = { + &dev_attr_nvme_discovery.attr, + NULL +}; + +static struct attribute_group nvme_fc_attr_group = { + .attrs = nvme_fc_attrs, +}; + +static const struct attribute_group *nvme_fc_attr_groups[] = { + &nvme_fc_attr_group, + NULL +}; + +static struct class fc_class = { + .name = "fc", + .dev_groups = nvme_fc_attr_groups, + .owner = THIS_MODULE, +}; + static int __init nvme_fc_init_module(void) { int ret; @@ -3272,16 +3367,16 @@ static int __init nvme_fc_init_module(void) * put in place, this code will move to a more generic * location for the class. */ - fc_class = class_create(THIS_MODULE, "fc"); - if (IS_ERR(fc_class)) { + ret = class_register(&fc_class); + if (ret) { pr_err("couldn't register class fc\n"); - return PTR_ERR(fc_class); + return ret; } /* * Create a device for the FC-centric udev events */ - fc_udev_device = device_create(fc_class, NULL, MKDEV(0, 0), NULL, + fc_udev_device = device_create(&fc_class, NULL, MKDEV(0, 0), NULL, "fc_udev_device"); if (IS_ERR(fc_udev_device)) { pr_err("couldn't create fc_udev device!\n"); @@ -3296,9 +3391,9 @@ static int __init nvme_fc_init_module(void) return 0; out_destroy_device: - device_destroy(fc_class, MKDEV(0, 0)); + device_destroy(&fc_class, MKDEV(0, 0)); out_destroy_class: - class_destroy(fc_class); + class_unregister(&fc_class); return ret; } @@ -3313,8 +3408,8 @@ static void __exit nvme_fc_exit_module(void) ida_destroy(&nvme_fc_local_port_cnt); ida_destroy(&nvme_fc_ctrl_cnt); - device_destroy(fc_class, MKDEV(0, 0)); - class_destroy(fc_class); + device_destroy(&fc_class, MKDEV(0, 0)); + class_unregister(&fc_class); } module_init(nvme_fc_init_module); diff --git a/drivers/nvme/host/lightnvm.c b/drivers/nvme/host/lightnvm.c index 6fe5923c95d4..a4f3b263cd6c 100644 --- a/drivers/nvme/host/lightnvm.c +++ b/drivers/nvme/host/lightnvm.c @@ -567,13 +567,13 @@ static int nvme_nvm_set_bb_tbl(struct nvm_dev *nvmdev, struct ppa_addr *ppas, * Expect the lba in device format */ static int nvme_nvm_get_chk_meta(struct nvm_dev *ndev, - struct nvm_chk_meta *meta, - sector_t slba, int nchks) + sector_t slba, int nchks, + struct nvm_chk_meta *meta) { struct nvm_geo *geo = &ndev->geo; struct nvme_ns *ns = ndev->q->queuedata; struct nvme_ctrl *ctrl = ns->ctrl; - struct nvme_nvm_chk_meta *dev_meta = (struct nvme_nvm_chk_meta *)meta; + struct nvme_nvm_chk_meta *dev_meta, *dev_meta_off; struct ppa_addr ppa; size_t left = nchks * sizeof(struct nvme_nvm_chk_meta); size_t log_pos, offset, len; @@ -585,6 +585,10 @@ static int nvme_nvm_get_chk_meta(struct nvm_dev *ndev, */ max_len = min_t(unsigned int, ctrl->max_hw_sectors << 9, 256 * 1024); + dev_meta = kmalloc(max_len, GFP_KERNEL); + if (!dev_meta) + return -ENOMEM; + /* Normalize lba address space to obtain log offset */ ppa.ppa = slba; ppa = dev_to_generic_addr(ndev, ppa); @@ -598,6 +602,9 @@ static int nvme_nvm_get_chk_meta(struct nvm_dev *ndev, while (left) { len = min_t(unsigned int, left, max_len); + memset(dev_meta, 0, max_len); + dev_meta_off = dev_meta; + ret = nvme_get_log(ctrl, ns->head->ns_id, NVME_NVM_LOG_REPORT_CHUNK, 0, dev_meta, len, offset); @@ -607,21 +614,23 @@ static int nvme_nvm_get_chk_meta(struct nvm_dev *ndev, } for (i = 0; i < len; i += sizeof(struct nvme_nvm_chk_meta)) { - meta->state = dev_meta->state; - meta->type = dev_meta->type; - meta->wi = dev_meta->wi; - meta->slba = le64_to_cpu(dev_meta->slba); - meta->cnlb = le64_to_cpu(dev_meta->cnlb); - meta->wp = le64_to_cpu(dev_meta->wp); + meta->state = dev_meta_off->state; + meta->type = dev_meta_off->type; + meta->wi = dev_meta_off->wi; + meta->slba = le64_to_cpu(dev_meta_off->slba); + meta->cnlb = le64_to_cpu(dev_meta_off->cnlb); + meta->wp = le64_to_cpu(dev_meta_off->wp); meta++; - dev_meta++; + dev_meta_off++; } offset += len; left -= len; } + kfree(dev_meta); + return ret; } @@ -968,6 +977,9 @@ void nvme_nvm_update_nvm_info(struct nvme_ns *ns) struct nvm_dev *ndev = ns->ndev; struct nvm_geo *geo = &ndev->geo; + if (geo->version == NVM_OCSSD_SPEC_12) + return; + geo->csecs = 1 << ns->lba_shift; geo->sos = ns->ms; } @@ -1190,10 +1202,29 @@ static NVM_DEV_ATTR_12_RO(multiplane_modes); static NVM_DEV_ATTR_12_RO(media_capabilities); static NVM_DEV_ATTR_12_RO(max_phys_secs); -static struct attribute *nvm_dev_attrs_12[] = { +/* 2.0 values */ +static NVM_DEV_ATTR_20_RO(groups); +static NVM_DEV_ATTR_20_RO(punits); +static NVM_DEV_ATTR_20_RO(chunks); +static NVM_DEV_ATTR_20_RO(clba); +static NVM_DEV_ATTR_20_RO(ws_min); +static NVM_DEV_ATTR_20_RO(ws_opt); +static NVM_DEV_ATTR_20_RO(maxoc); +static NVM_DEV_ATTR_20_RO(maxocpu); +static NVM_DEV_ATTR_20_RO(mw_cunits); +static NVM_DEV_ATTR_20_RO(write_typ); +static NVM_DEV_ATTR_20_RO(write_max); +static NVM_DEV_ATTR_20_RO(reset_typ); +static NVM_DEV_ATTR_20_RO(reset_max); + +static struct attribute *nvm_dev_attrs[] = { + /* version agnostic attrs */ &dev_attr_version.attr, &dev_attr_capabilities.attr, + &dev_attr_read_typ.attr, + &dev_attr_read_max.attr, + /* 1.2 attrs */ &dev_attr_vendor_opcode.attr, &dev_attr_device_mode.attr, &dev_attr_media_manager.attr, @@ -1208,8 +1239,6 @@ static struct attribute *nvm_dev_attrs_12[] = { &dev_attr_page_size.attr, &dev_attr_hw_sector_size.attr, &dev_attr_oob_sector_size.attr, - &dev_attr_read_typ.attr, - &dev_attr_read_max.attr, &dev_attr_prog_typ.attr, &dev_attr_prog_max.attr, &dev_attr_erase_typ.attr, @@ -1218,33 +1247,7 @@ static struct attribute *nvm_dev_attrs_12[] = { &dev_attr_media_capabilities.attr, &dev_attr_max_phys_secs.attr, - NULL, -}; - -static const struct attribute_group nvm_dev_attr_group_12 = { - .name = "lightnvm", - .attrs = nvm_dev_attrs_12, -}; - -/* 2.0 values */ -static NVM_DEV_ATTR_20_RO(groups); -static NVM_DEV_ATTR_20_RO(punits); -static NVM_DEV_ATTR_20_RO(chunks); -static NVM_DEV_ATTR_20_RO(clba); -static NVM_DEV_ATTR_20_RO(ws_min); -static NVM_DEV_ATTR_20_RO(ws_opt); -static NVM_DEV_ATTR_20_RO(maxoc); -static NVM_DEV_ATTR_20_RO(maxocpu); -static NVM_DEV_ATTR_20_RO(mw_cunits); -static NVM_DEV_ATTR_20_RO(write_typ); -static NVM_DEV_ATTR_20_RO(write_max); -static NVM_DEV_ATTR_20_RO(reset_typ); -static NVM_DEV_ATTR_20_RO(reset_max); - -static struct attribute *nvm_dev_attrs_20[] = { - &dev_attr_version.attr, - &dev_attr_capabilities.attr, - + /* 2.0 attrs */ &dev_attr_groups.attr, &dev_attr_punits.attr, &dev_attr_chunks.attr, @@ -1255,8 +1258,6 @@ static struct attribute *nvm_dev_attrs_20[] = { &dev_attr_maxocpu.attr, &dev_attr_mw_cunits.attr, - &dev_attr_read_typ.attr, - &dev_attr_read_max.attr, &dev_attr_write_typ.attr, &dev_attr_write_max.attr, &dev_attr_reset_typ.attr, @@ -1265,44 +1266,38 @@ static struct attribute *nvm_dev_attrs_20[] = { NULL, }; -static const struct attribute_group nvm_dev_attr_group_20 = { - .name = "lightnvm", - .attrs = nvm_dev_attrs_20, -}; - -int nvme_nvm_register_sysfs(struct nvme_ns *ns) +static umode_t nvm_dev_attrs_visible(struct kobject *kobj, + struct attribute *attr, int index) { + struct device *dev = container_of(kobj, struct device, kobj); + struct gendisk *disk = dev_to_disk(dev); + struct nvme_ns *ns = disk->private_data; struct nvm_dev *ndev = ns->ndev; - struct nvm_geo *geo = &ndev->geo; + struct device_attribute *dev_attr = + container_of(attr, typeof(*dev_attr), attr); if (!ndev) - return -EINVAL; - - switch (geo->major_ver_id) { - case 1: - return sysfs_create_group(&disk_to_dev(ns->disk)->kobj, - &nvm_dev_attr_group_12); - case 2: - return sysfs_create_group(&disk_to_dev(ns->disk)->kobj, - &nvm_dev_attr_group_20); - } + return 0; - return -EINVAL; -} + if (dev_attr->show == nvm_dev_attr_show) + return attr->mode; -void nvme_nvm_unregister_sysfs(struct nvme_ns *ns) -{ - struct nvm_dev *ndev = ns->ndev; - struct nvm_geo *geo = &ndev->geo; - - switch (geo->major_ver_id) { + switch (ndev->geo.major_ver_id) { case 1: - sysfs_remove_group(&disk_to_dev(ns->disk)->kobj, - &nvm_dev_attr_group_12); + if (dev_attr->show == nvm_dev_attr_show_12) + return attr->mode; break; case 2: - sysfs_remove_group(&disk_to_dev(ns->disk)->kobj, - &nvm_dev_attr_group_20); + if (dev_attr->show == nvm_dev_attr_show_20) + return attr->mode; break; } + + return 0; } + +const struct attribute_group nvme_nvm_attr_group = { + .name = "lightnvm", + .attrs = nvm_dev_attrs, + .is_visible = nvm_dev_attrs_visible, +}; diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c index 9fe3fff818b8..5e3cc8c59a39 100644 --- a/drivers/nvme/host/multipath.c +++ b/drivers/nvme/host/multipath.c @@ -77,6 +77,13 @@ void nvme_failover_req(struct request *req) queue_work(nvme_wq, &ns->ctrl->ana_work); } break; + case NVME_SC_HOST_PATH_ERROR: + /* + * Temporary transport disruption in talking to the controller. + * Try to send on a new path. + */ + nvme_mpath_clear_current_path(ns); + break; default: /* * Reset the controller for any non-ANA error as we don't know @@ -110,29 +117,55 @@ static const char *nvme_ana_state_names[] = { [NVME_ANA_CHANGE] = "change", }; -static struct nvme_ns *__nvme_find_path(struct nvme_ns_head *head) +void nvme_mpath_clear_current_path(struct nvme_ns *ns) +{ + struct nvme_ns_head *head = ns->head; + int node; + + if (!head) + return; + + for_each_node(node) { + if (ns == rcu_access_pointer(head->current_path[node])) + rcu_assign_pointer(head->current_path[node], NULL); + } +} + +static struct nvme_ns *__nvme_find_path(struct nvme_ns_head *head, int node) { - struct nvme_ns *ns, *fallback = NULL; + int found_distance = INT_MAX, fallback_distance = INT_MAX, distance; + struct nvme_ns *found = NULL, *fallback = NULL, *ns; list_for_each_entry_rcu(ns, &head->list, siblings) { if (ns->ctrl->state != NVME_CTRL_LIVE || test_bit(NVME_NS_ANA_PENDING, &ns->flags)) continue; + + distance = node_distance(node, dev_to_node(ns->ctrl->dev)); + switch (ns->ana_state) { case NVME_ANA_OPTIMIZED: - rcu_assign_pointer(head->current_path, ns); - return ns; + if (distance < found_distance) { + found_distance = distance; + found = ns; + } + break; case NVME_ANA_NONOPTIMIZED: - fallback = ns; + if (distance < fallback_distance) { + fallback_distance = distance; + fallback = ns; + } break; default: break; } } - if (fallback) - rcu_assign_pointer(head->current_path, fallback); - return fallback; + if (!found) + found = fallback; + if (found) + rcu_assign_pointer(head->current_path[node], found); + return found; } static inline bool nvme_path_is_optimized(struct nvme_ns *ns) @@ -143,10 +176,12 @@ static inline bool nvme_path_is_optimized(struct nvme_ns *ns) inline struct nvme_ns *nvme_find_path(struct nvme_ns_head *head) { - struct nvme_ns *ns = srcu_dereference(head->current_path, &head->srcu); + int node = numa_node_id(); + struct nvme_ns *ns; + ns = srcu_dereference(head->current_path[node], &head->srcu); if (unlikely(!ns || !nvme_path_is_optimized(ns))) - ns = __nvme_find_path(head); + ns = __nvme_find_path(head, node); return ns; } @@ -193,7 +228,7 @@ static bool nvme_ns_head_poll(struct request_queue *q, blk_qc_t qc) int srcu_idx; srcu_idx = srcu_read_lock(&head->srcu); - ns = srcu_dereference(head->current_path, &head->srcu); + ns = srcu_dereference(head->current_path[numa_node_id()], &head->srcu); if (likely(ns && nvme_path_is_optimized(ns))) found = ns->queue->poll_fn(q, qc); srcu_read_unlock(&head->srcu, srcu_idx); @@ -282,12 +317,17 @@ static void nvme_mpath_set_live(struct nvme_ns *ns) if (!head->disk) return; - if (!(head->disk->flags & GENHD_FL_UP)) { - device_add_disk(&head->subsys->dev, head->disk); - if (sysfs_create_group(&disk_to_dev(head->disk)->kobj, - &nvme_ns_id_attr_group)) - dev_warn(&head->subsys->dev, - "failed to create id group.\n"); + if (!(head->disk->flags & GENHD_FL_UP)) + device_add_disk(&head->subsys->dev, head->disk, + nvme_ns_id_attr_groups); + + if (nvme_path_is_optimized(ns)) { + int node, srcu_idx; + + srcu_idx = srcu_read_lock(&head->srcu); + for_each_node(node) + __nvme_find_path(head, node); + srcu_read_unlock(&head->srcu, srcu_idx); } kblockd_schedule_work(&ns->head->requeue_work); @@ -494,11 +534,8 @@ void nvme_mpath_remove_disk(struct nvme_ns_head *head) { if (!head->disk) return; - if (head->disk->flags & GENHD_FL_UP) { - sysfs_remove_group(&disk_to_dev(head->disk)->kobj, - &nvme_ns_id_attr_group); + if (head->disk->flags & GENHD_FL_UP) del_gendisk(head->disk); - } blk_set_queue_dying(head->disk->queue); /* make sure all pending bios are cleaned up */ kblockd_schedule_work(&head->requeue_work); diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index bb4a2003c097..9fefba039d1e 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -277,14 +277,6 @@ struct nvme_ns_ids { * only ever has a single entry for private namespaces. */ struct nvme_ns_head { -#ifdef CONFIG_NVME_MULTIPATH - struct gendisk *disk; - struct nvme_ns __rcu *current_path; - struct bio_list requeue_list; - spinlock_t requeue_lock; - struct work_struct requeue_work; - struct mutex lock; -#endif struct list_head list; struct srcu_struct srcu; struct nvme_subsystem *subsys; @@ -293,6 +285,14 @@ struct nvme_ns_head { struct list_head entry; struct kref ref; int instance; +#ifdef CONFIG_NVME_MULTIPATH + struct gendisk *disk; + struct bio_list requeue_list; + spinlock_t requeue_lock; + struct work_struct requeue_work; + struct mutex lock; + struct nvme_ns __rcu *current_path[]; +#endif }; #ifdef CONFIG_FAULT_INJECTION_DEBUG_FS @@ -459,7 +459,7 @@ int nvme_delete_ctrl_sync(struct nvme_ctrl *ctrl); int nvme_get_log(struct nvme_ctrl *ctrl, u32 nsid, u8 log_page, u8 lsp, void *log, size_t size, u64 offset); -extern const struct attribute_group nvme_ns_id_attr_group; +extern const struct attribute_group *nvme_ns_id_attr_groups[]; extern const struct block_device_operations nvme_ns_head_ops; #ifdef CONFIG_NVME_MULTIPATH @@ -474,14 +474,7 @@ void nvme_mpath_remove_disk(struct nvme_ns_head *head); int nvme_mpath_init(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id); void nvme_mpath_uninit(struct nvme_ctrl *ctrl); void nvme_mpath_stop(struct nvme_ctrl *ctrl); - -static inline void nvme_mpath_clear_current_path(struct nvme_ns *ns) -{ - struct nvme_ns_head *head = ns->head; - - if (head && ns == rcu_access_pointer(head->current_path)) - rcu_assign_pointer(head->current_path, NULL); -} +void nvme_mpath_clear_current_path(struct nvme_ns *ns); struct nvme_ns *nvme_find_path(struct nvme_ns_head *head); static inline void nvme_mpath_check_last_path(struct nvme_ns *ns) @@ -551,8 +544,7 @@ static inline void nvme_mpath_stop(struct nvme_ctrl *ctrl) void nvme_nvm_update_nvm_info(struct nvme_ns *ns); int nvme_nvm_register(struct nvme_ns *ns, char *disk_name, int node); void nvme_nvm_unregister(struct nvme_ns *ns); -int nvme_nvm_register_sysfs(struct nvme_ns *ns); -void nvme_nvm_unregister_sysfs(struct nvme_ns *ns); +extern const struct attribute_group nvme_nvm_attr_group; int nvme_nvm_ioctl(struct nvme_ns *ns, unsigned int cmd, unsigned long arg); #else static inline void nvme_nvm_update_nvm_info(struct nvme_ns *ns) {}; @@ -563,11 +555,6 @@ static inline int nvme_nvm_register(struct nvme_ns *ns, char *disk_name, } static inline void nvme_nvm_unregister(struct nvme_ns *ns) {}; -static inline int nvme_nvm_register_sysfs(struct nvme_ns *ns) -{ - return 0; -} -static inline void nvme_nvm_unregister_sysfs(struct nvme_ns *ns) {}; static inline int nvme_nvm_ioctl(struct nvme_ns *ns, unsigned int cmd, unsigned long arg) { diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index d668682f91df..4e023cd007e1 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -772,10 +772,10 @@ static blk_status_t nvme_map_data(struct nvme_dev *dev, struct request *req, if (!dma_map_sg(dev->dev, &iod->meta_sg, 1, dma_dir)) goto out_unmap; - } - if (blk_integrity_rq(req)) cmnd->rw.metadata = cpu_to_le64(sg_dma_address(&iod->meta_sg)); + } + return BLK_STS_OK; out_unmap: @@ -1249,7 +1249,7 @@ static void nvme_free_queues(struct nvme_dev *dev, int lowest) /** * nvme_suspend_queue - put queue into suspended state - * @nvmeq - queue to suspend + * @nvmeq: queue to suspend */ static int nvme_suspend_queue(struct nvme_queue *nvmeq) { @@ -2564,13 +2564,12 @@ static void nvme_remove(struct pci_dev *pdev) struct nvme_dev *dev = pci_get_drvdata(pdev); nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_DELETING); - - cancel_work_sync(&dev->ctrl.reset_work); pci_set_drvdata(pdev, NULL); if (!pci_device_is_present(pdev)) { nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_DEAD); nvme_dev_disable(dev, true); + nvme_dev_remove_admin(dev); } flush_work(&dev->ctrl.reset_work); diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index dc042017c293..d181cafedc58 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -233,8 +233,15 @@ static void nvme_rdma_qp_event(struct ib_event *event, void *context) static int nvme_rdma_wait_for_cm(struct nvme_rdma_queue *queue) { - wait_for_completion_interruptible_timeout(&queue->cm_done, + int ret; + + ret = wait_for_completion_interruptible_timeout(&queue->cm_done, msecs_to_jiffies(NVME_RDMA_CONNECT_TIMEOUT_MS) + 1); + if (ret < 0) + return ret; + if (ret == 0) + return -ETIMEDOUT; + WARN_ON_ONCE(queue->cm_error > 0); return queue->cm_error; } @@ -1849,54 +1856,6 @@ static const struct nvme_ctrl_ops nvme_rdma_ctrl_ops = { .stop_ctrl = nvme_rdma_stop_ctrl, }; -static inline bool -__nvme_rdma_options_match(struct nvme_rdma_ctrl *ctrl, - struct nvmf_ctrl_options *opts) -{ - char *stdport = __stringify(NVME_RDMA_IP_PORT); - - - if (!nvmf_ctlr_matches_baseopts(&ctrl->ctrl, opts) || - strcmp(opts->traddr, ctrl->ctrl.opts->traddr)) - return false; - - if (opts->mask & NVMF_OPT_TRSVCID && - ctrl->ctrl.opts->mask & NVMF_OPT_TRSVCID) { - if (strcmp(opts->trsvcid, ctrl->ctrl.opts->trsvcid)) - return false; - } else if (opts->mask & NVMF_OPT_TRSVCID) { - if (strcmp(opts->trsvcid, stdport)) - return false; - } else if (ctrl->ctrl.opts->mask & NVMF_OPT_TRSVCID) { - if (strcmp(stdport, ctrl->ctrl.opts->trsvcid)) - return false; - } - /* else, it's a match as both have stdport. Fall to next checks */ - - /* - * checking the local address is rough. In most cases, one - * is not specified and the host port is selected by the stack. - * - * Assume no match if: - * local address is specified and address is not the same - * local address is not specified but remote is, or vice versa - * (admin using specific host_traddr when it matters). - */ - if (opts->mask & NVMF_OPT_HOST_TRADDR && - ctrl->ctrl.opts->mask & NVMF_OPT_HOST_TRADDR) { - if (strcmp(opts->host_traddr, ctrl->ctrl.opts->host_traddr)) - return false; - } else if (opts->mask & NVMF_OPT_HOST_TRADDR || - ctrl->ctrl.opts->mask & NVMF_OPT_HOST_TRADDR) - return false; - /* - * if neither controller had an host port specified, assume it's - * a match as everything else matched. - */ - - return true; -} - /* * Fails a connection request if it matches an existing controller * (association) with the same tuple: @@ -1917,7 +1876,7 @@ nvme_rdma_existing_controller(struct nvmf_ctrl_options *opts) mutex_lock(&nvme_rdma_ctrl_mutex); list_for_each_entry(ctrl, &nvme_rdma_ctrl_list, list) { - found = __nvme_rdma_options_match(ctrl, opts); + found = nvmf_ip_options_match(&ctrl->ctrl, opts); if (found) break; } @@ -1932,7 +1891,6 @@ static struct nvme_ctrl *nvme_rdma_create_ctrl(struct device *dev, struct nvme_rdma_ctrl *ctrl; int ret; bool changed; - char *port; ctrl = kzalloc(sizeof(*ctrl), GFP_KERNEL); if (!ctrl) @@ -1940,15 +1898,21 @@ static struct nvme_ctrl *nvme_rdma_create_ctrl(struct device *dev, ctrl->ctrl.opts = opts; INIT_LIST_HEAD(&ctrl->list); - if (opts->mask & NVMF_OPT_TRSVCID) - port = opts->trsvcid; - else - port = __stringify(NVME_RDMA_IP_PORT); + if (!(opts->mask & NVMF_OPT_TRSVCID)) { + opts->trsvcid = + kstrdup(__stringify(NVME_RDMA_IP_PORT), GFP_KERNEL); + if (!opts->trsvcid) { + ret = -ENOMEM; + goto out_free_ctrl; + } + opts->mask |= NVMF_OPT_TRSVCID; + } ret = inet_pton_with_scope(&init_net, AF_UNSPEC, - opts->traddr, port, &ctrl->addr); + opts->traddr, opts->trsvcid, &ctrl->addr); if (ret) { - pr_err("malformed address passed: %s:%s\n", opts->traddr, port); + pr_err("malformed address passed: %s:%s\n", + opts->traddr, opts->trsvcid); goto out_free_ctrl; } diff --git a/drivers/nvme/host/trace.h b/drivers/nvme/host/trace.h index a490790d6691..196d5bd56718 100644 --- a/drivers/nvme/host/trace.h +++ b/drivers/nvme/host/trace.h @@ -156,6 +156,34 @@ TRACE_EVENT(nvme_complete_rq, ); +#define aer_name(aer) { aer, #aer } + +TRACE_EVENT(nvme_async_event, + TP_PROTO(struct nvme_ctrl *ctrl, u32 result), + TP_ARGS(ctrl, result), + TP_STRUCT__entry( + __field(int, ctrl_id) + __field(u32, result) + ), + TP_fast_assign( + __entry->ctrl_id = ctrl->instance; + __entry->result = result; + ), + TP_printk("nvme%d: NVME_AEN=%#08x [%s]", + __entry->ctrl_id, __entry->result, + __print_symbolic(__entry->result, + aer_name(NVME_AER_NOTICE_NS_CHANGED), + aer_name(NVME_AER_NOTICE_ANA), + aer_name(NVME_AER_NOTICE_FW_ACT_STARTING), + aer_name(NVME_AER_ERROR), + aer_name(NVME_AER_SMART), + aer_name(NVME_AER_CSS), + aer_name(NVME_AER_VS)) + ) +); + +#undef aer_name + #endif /* _TRACE_NVME_H */ #undef TRACE_INCLUDE_PATH diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c index 2008fa62a373..1179f6314323 100644 --- a/drivers/nvme/target/admin-cmd.c +++ b/drivers/nvme/target/admin-cmd.c @@ -58,7 +58,7 @@ static u16 nvmet_get_smart_log_nsid(struct nvmet_req *req, ns = nvmet_find_namespace(req->sq->ctrl, req->cmd->get_log_page.nsid); if (!ns) { - pr_err("nvmet : Could not find namespace id : %d\n", + pr_err("Could not find namespace id : %d\n", le32_to_cpu(req->cmd->get_log_page.nsid)); return NVME_SC_INVALID_NS; } @@ -353,7 +353,7 @@ static void nvmet_execute_identify_ctrl(struct nvmet_req *req) if (req->port->inline_data_size) id->sgls |= cpu_to_le32(1 << 20); - strcpy(id->subnqn, ctrl->subsys->subsysnqn); + strlcpy(id->subnqn, ctrl->subsys->subsysnqn, sizeof(id->subnqn)); /* Max command capsule size is sqe + single page of in-capsule data */ id->ioccsz = cpu_to_le32((sizeof(struct nvme_command) + diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c index b5ec96abd048..0acdff9e6842 100644 --- a/drivers/nvme/target/core.c +++ b/drivers/nvme/target/core.c @@ -1105,8 +1105,7 @@ static struct nvmet_subsys *nvmet_find_get_subsys(struct nvmet_port *port, if (!port) return NULL; - if (!strncmp(NVME_DISC_SUBSYS_NAME, subsysnqn, - NVMF_NQN_SIZE)) { + if (!strcmp(NVME_DISC_SUBSYS_NAME, subsysnqn)) { if (!kref_get_unless_zero(&nvmet_disc_subsys->ref)) return NULL; return nvmet_disc_subsys; diff --git a/drivers/nvme/target/discovery.c b/drivers/nvme/target/discovery.c index eae29f493a07..bc0aa0bf1543 100644 --- a/drivers/nvme/target/discovery.c +++ b/drivers/nvme/target/discovery.c @@ -174,7 +174,7 @@ static void nvmet_execute_identify_disc_ctrl(struct nvmet_req *req) if (req->port->inline_data_size) id->sgls |= cpu_to_le32(1 << 20); - strcpy(id->subnqn, ctrl->subsys->subsysnqn); + strlcpy(id->subnqn, ctrl->subsys->subsysnqn, sizeof(id->subnqn)); status = nvmet_copy_to_sgl(req, 0, id, sizeof(*id)); @@ -219,12 +219,10 @@ u16 nvmet_parse_discovery_cmd(struct nvmet_req *req) return NVME_SC_INVALID_OPCODE | NVME_SC_DNR; } default: - pr_err("unsupported cmd %d\n", cmd->common.opcode); + pr_err("unhandled cmd %d\n", cmd->common.opcode); return NVME_SC_INVALID_OPCODE | NVME_SC_DNR; } - pr_err("unhandled cmd %d\n", cmd->common.opcode); - return NVME_SC_INVALID_OPCODE | NVME_SC_DNR; } int __init nvmet_init_discovery(void) diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c index 29b4b236afd8..409081a03b24 100644 --- a/drivers/nvme/target/fc.c +++ b/drivers/nvme/target/fc.c @@ -110,11 +110,19 @@ struct nvmet_fc_tgtport { struct list_head ls_busylist; struct list_head assoc_list; struct ida assoc_cnt; - struct nvmet_port *port; + struct nvmet_fc_port_entry *pe; struct kref ref; u32 max_sg_cnt; }; +struct nvmet_fc_port_entry { + struct nvmet_fc_tgtport *tgtport; + struct nvmet_port *port; + u64 node_name; + u64 port_name; + struct list_head pe_list; +}; + struct nvmet_fc_defer_fcp_req { struct list_head req_list; struct nvmefc_tgt_fcp_req *fcp_req; @@ -132,7 +140,6 @@ struct nvmet_fc_tgt_queue { atomic_t zrspcnt; atomic_t rsn; spinlock_t qlock; - struct nvmet_port *port; struct nvmet_cq nvme_cq; struct nvmet_sq nvme_sq; struct nvmet_fc_tgt_assoc *assoc; @@ -221,6 +228,7 @@ static DEFINE_SPINLOCK(nvmet_fc_tgtlock); static LIST_HEAD(nvmet_fc_target_list); static DEFINE_IDA(nvmet_fc_tgtport_cnt); +static LIST_HEAD(nvmet_fc_portentry_list); static void nvmet_fc_handle_ls_rqst_work(struct work_struct *work); @@ -645,7 +653,6 @@ nvmet_fc_alloc_target_queue(struct nvmet_fc_tgt_assoc *assoc, queue->qid = qid; queue->sqsize = sqsize; queue->assoc = assoc; - queue->port = assoc->tgtport->port; queue->cpu = nvmet_fc_queue_to_cpu(assoc->tgtport, qid); INIT_LIST_HEAD(&queue->fod_list); INIT_LIST_HEAD(&queue->avail_defer_list); @@ -957,6 +964,83 @@ nvmet_fc_find_target_assoc(struct nvmet_fc_tgtport *tgtport, return ret; } +static void +nvmet_fc_portentry_bind(struct nvmet_fc_tgtport *tgtport, + struct nvmet_fc_port_entry *pe, + struct nvmet_port *port) +{ + lockdep_assert_held(&nvmet_fc_tgtlock); + + pe->tgtport = tgtport; + tgtport->pe = pe; + + pe->port = port; + port->priv = pe; + + pe->node_name = tgtport->fc_target_port.node_name; + pe->port_name = tgtport->fc_target_port.port_name; + INIT_LIST_HEAD(&pe->pe_list); + + list_add_tail(&pe->pe_list, &nvmet_fc_portentry_list); +} + +static void +nvmet_fc_portentry_unbind(struct nvmet_fc_port_entry *pe) +{ + unsigned long flags; + + spin_lock_irqsave(&nvmet_fc_tgtlock, flags); + if (pe->tgtport) + pe->tgtport->pe = NULL; + list_del(&pe->pe_list); + spin_unlock_irqrestore(&nvmet_fc_tgtlock, flags); +} + +/* + * called when a targetport deregisters. Breaks the relationship + * with the nvmet port, but leaves the port_entry in place so that + * re-registration can resume operation. + */ +static void +nvmet_fc_portentry_unbind_tgt(struct nvmet_fc_tgtport *tgtport) +{ + struct nvmet_fc_port_entry *pe; + unsigned long flags; + + spin_lock_irqsave(&nvmet_fc_tgtlock, flags); + pe = tgtport->pe; + if (pe) + pe->tgtport = NULL; + tgtport->pe = NULL; + spin_unlock_irqrestore(&nvmet_fc_tgtlock, flags); +} + +/* + * called when a new targetport is registered. Looks in the + * existing nvmet port_entries to see if the nvmet layer is + * configured for the targetport's wwn's. (the targetport existed, + * nvmet configured, the lldd unregistered the tgtport, and is now + * reregistering the same targetport). If so, set the nvmet port + * port entry on the targetport. + */ +static void +nvmet_fc_portentry_rebind_tgt(struct nvmet_fc_tgtport *tgtport) +{ + struct nvmet_fc_port_entry *pe; + unsigned long flags; + + spin_lock_irqsave(&nvmet_fc_tgtlock, flags); + list_for_each_entry(pe, &nvmet_fc_portentry_list, pe_list) { + if (tgtport->fc_target_port.node_name == pe->node_name && + tgtport->fc_target_port.port_name == pe->port_name) { + WARN_ON(pe->tgtport); + tgtport->pe = pe; + pe->tgtport = tgtport; + break; + } + } + spin_unlock_irqrestore(&nvmet_fc_tgtlock, flags); +} /** * nvme_fc_register_targetport - transport entry point called by an @@ -1034,6 +1118,8 @@ nvmet_fc_register_targetport(struct nvmet_fc_port_info *pinfo, goto out_free_newrec; } + nvmet_fc_portentry_rebind_tgt(newrec); + spin_lock_irqsave(&nvmet_fc_tgtlock, flags); list_add_tail(&newrec->tgt_list, &nvmet_fc_target_list); spin_unlock_irqrestore(&nvmet_fc_tgtlock, flags); @@ -1159,8 +1245,8 @@ nvmet_fc_delete_ctrl(struct nvmet_ctrl *ctrl) * nvme_fc_unregister_targetport - transport entry point called by an * LLDD to deregister/remove a previously * registered a local NVME subsystem FC port. - * @tgtport: pointer to the (registered) target port that is to be - * deregistered. + * @target_port: pointer to the (registered) target port that is to be + * deregistered. * * Returns: * a completion status. Must be 0 upon success; a negative errno @@ -1171,6 +1257,8 @@ nvmet_fc_unregister_targetport(struct nvmet_fc_target_port *target_port) { struct nvmet_fc_tgtport *tgtport = targetport_to_tgtport(target_port); + nvmet_fc_portentry_unbind_tgt(tgtport); + /* terminate any outstanding associations */ __nvmet_fc_free_assocs(tgtport); @@ -1661,7 +1749,7 @@ nvmet_fc_handle_ls_rqst_work(struct work_struct *work) * * If this routine returns error, the LLDD should abort the exchange. * - * @tgtport: pointer to the (registered) target port the LS was + * @target_port: pointer to the (registered) target port the LS was * received on. * @lsreq: pointer to a lsreq request structure to be used to reference * the exchange corresponding to the LS. @@ -2147,7 +2235,7 @@ nvmet_fc_fcp_nvme_cmd_done(struct nvmet_req *nvme_req) /* - * Actual processing routine for received FC-NVME LS Requests from the LLD + * Actual processing routine for received FC-NVME I/O Requests from the LLD */ static void nvmet_fc_handle_fcp_rqst(struct nvmet_fc_tgtport *tgtport, @@ -2158,6 +2246,13 @@ nvmet_fc_handle_fcp_rqst(struct nvmet_fc_tgtport *tgtport, int ret; /* + * if there is no nvmet mapping to the targetport there + * shouldn't be requests. just terminate them. + */ + if (!tgtport->pe) + goto transport_error; + + /* * Fused commands are currently not supported in the linux * implementation. * @@ -2184,7 +2279,7 @@ nvmet_fc_handle_fcp_rqst(struct nvmet_fc_tgtport *tgtport, fod->req.cmd = &fod->cmdiubuf.sqe; fod->req.rsp = &fod->rspiubuf.cqe; - fod->req.port = fod->queue->port; + fod->req.port = tgtport->pe->port; /* clear any response payload */ memset(&fod->rspiubuf, 0, sizeof(fod->rspiubuf)); @@ -2468,7 +2563,7 @@ nvme_fc_parse_traddr(struct nvmet_fc_traddr *traddr, char *buf, size_t blen) substring_t wwn = { name, &name[sizeof(name)-1] }; int nnoffset, pnoffset; - /* validate it string one of the 2 allowed formats */ + /* validate if string is one of the 2 allowed formats */ if (strnlen(buf, blen) == NVME_FC_TRADDR_MAXLENGTH && !strncmp(buf, "nn-0x", NVME_FC_TRADDR_OXNNLEN) && !strncmp(&buf[NVME_FC_TRADDR_MAX_PN_OFFSET], @@ -2508,6 +2603,7 @@ static int nvmet_fc_add_port(struct nvmet_port *port) { struct nvmet_fc_tgtport *tgtport; + struct nvmet_fc_port_entry *pe; struct nvmet_fc_traddr traddr = { 0L, 0L }; unsigned long flags; int ret; @@ -2524,24 +2620,40 @@ nvmet_fc_add_port(struct nvmet_port *port) if (ret) return ret; + pe = kzalloc(sizeof(*pe), GFP_KERNEL); + if (!pe) + return -ENOMEM; + ret = -ENXIO; spin_lock_irqsave(&nvmet_fc_tgtlock, flags); list_for_each_entry(tgtport, &nvmet_fc_target_list, tgt_list) { if ((tgtport->fc_target_port.node_name == traddr.nn) && (tgtport->fc_target_port.port_name == traddr.pn)) { - tgtport->port = port; - ret = 0; + /* a FC port can only be 1 nvmet port id */ + if (!tgtport->pe) { + nvmet_fc_portentry_bind(tgtport, pe, port); + ret = 0; + } else + ret = -EALREADY; break; } } spin_unlock_irqrestore(&nvmet_fc_tgtlock, flags); + + if (ret) + kfree(pe); + return ret; } static void nvmet_fc_remove_port(struct nvmet_port *port) { - /* nothing to do */ + struct nvmet_fc_port_entry *pe = port->priv; + + nvmet_fc_portentry_unbind(pe); + + kfree(pe); } static const struct nvmet_fabrics_ops nvmet_fc_tgt_fcp_ops = { diff --git a/drivers/nvme/target/fcloop.c b/drivers/nvme/target/fcloop.c index 5251689a1d9a..291f4121f516 100644 --- a/drivers/nvme/target/fcloop.c +++ b/drivers/nvme/target/fcloop.c @@ -648,6 +648,7 @@ fcloop_fcp_op(struct nvmet_fc_target_port *tgtport, break; /* Fall-Thru to RSP handling */ + /* FALLTHRU */ case NVMET_FCOP_RSP: if (fcpreq) { diff --git a/drivers/nvme/target/io-cmd-bdev.c b/drivers/nvme/target/io-cmd-bdev.c index 7bc9f6240432..f93fb5711142 100644 --- a/drivers/nvme/target/io-cmd-bdev.c +++ b/drivers/nvme/target/io-cmd-bdev.c @@ -58,7 +58,7 @@ static void nvmet_bio_done(struct bio *bio) static void nvmet_bdev_execute_rw(struct nvmet_req *req) { int sg_cnt = req->sg_cnt; - struct bio *bio = &req->b.inline_bio; + struct bio *bio; struct scatterlist *sg; sector_t sector; blk_qc_t cookie; @@ -81,7 +81,12 @@ static void nvmet_bdev_execute_rw(struct nvmet_req *req) sector = le64_to_cpu(req->cmd->rw.slba); sector <<= (req->ns->blksize_shift - 9); - bio_init(bio, req->inline_bvec, ARRAY_SIZE(req->inline_bvec)); + if (req->data_len <= NVMET_MAX_INLINE_DATA_LEN) { + bio = &req->b.inline_bio; + bio_init(bio, req->inline_bvec, ARRAY_SIZE(req->inline_bvec)); + } else { + bio = bio_alloc(GFP_KERNEL, min(sg_cnt, BIO_MAX_PAGES)); + } bio_set_dev(bio, req->ns->bdev); bio->bi_iter.bi_sector = sector; bio->bi_private = req; diff --git a/drivers/nvme/target/io-cmd-file.c b/drivers/nvme/target/io-cmd-file.c index 81a9dc5290a8..39d972e2595f 100644 --- a/drivers/nvme/target/io-cmd-file.c +++ b/drivers/nvme/target/io-cmd-file.c @@ -246,7 +246,8 @@ static void nvmet_file_execute_discard(struct nvmet_req *req) break; offset = le64_to_cpu(range.slba) << req->ns->blksize_shift; - len = le32_to_cpu(range.nlb) << req->ns->blksize_shift; + len = le32_to_cpu(range.nlb); + len <<= req->ns->blksize_shift; if (offset + len > req->ns->size) { ret = NVME_SC_LBA_RANGE | NVME_SC_DNR; break; diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h index ec9af4ee03b6..08f7b57a1203 100644 --- a/drivers/nvme/target/nvmet.h +++ b/drivers/nvme/target/nvmet.h @@ -264,6 +264,7 @@ struct nvmet_fabrics_ops { }; #define NVMET_MAX_INLINE_BIOVEC 8 +#define NVMET_MAX_INLINE_DATA_LEN NVMET_MAX_INLINE_BIOVEC * PAGE_SIZE struct nvmet_req { struct nvme_command *cmd; diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c index bfc4da660bb4..bd265aceb90c 100644 --- a/drivers/nvme/target/rdma.c +++ b/drivers/nvme/target/rdma.c @@ -122,6 +122,7 @@ struct nvmet_rdma_device { int inline_page_count; }; +static struct workqueue_struct *nvmet_rdma_delete_wq; static bool nvmet_rdma_use_srq; module_param_named(use_srq, nvmet_rdma_use_srq, bool, 0444); MODULE_PARM_DESC(use_srq, "Use shared receive queue."); @@ -1267,12 +1268,12 @@ static int nvmet_rdma_queue_connect(struct rdma_cm_id *cm_id, if (queue->host_qid == 0) { /* Let inflight controller teardown complete */ - flush_scheduled_work(); + flush_workqueue(nvmet_rdma_delete_wq); } ret = nvmet_rdma_cm_accept(cm_id, queue, &event->param.conn); if (ret) { - schedule_work(&queue->release_work); + queue_work(nvmet_rdma_delete_wq, &queue->release_work); /* Destroying rdma_cm id is not needed here */ return 0; } @@ -1337,7 +1338,7 @@ static void __nvmet_rdma_queue_disconnect(struct nvmet_rdma_queue *queue) if (disconnect) { rdma_disconnect(queue->cm_id); - schedule_work(&queue->release_work); + queue_work(nvmet_rdma_delete_wq, &queue->release_work); } } @@ -1367,7 +1368,7 @@ static void nvmet_rdma_queue_connect_fail(struct rdma_cm_id *cm_id, mutex_unlock(&nvmet_rdma_queue_mutex); pr_err("failed to connect queue %d\n", queue->idx); - schedule_work(&queue->release_work); + queue_work(nvmet_rdma_delete_wq, &queue->release_work); } /** @@ -1649,8 +1650,17 @@ static int __init nvmet_rdma_init(void) if (ret) goto err_ib_client; + nvmet_rdma_delete_wq = alloc_workqueue("nvmet-rdma-delete-wq", + WQ_UNBOUND | WQ_MEM_RECLAIM | WQ_SYSFS, 0); + if (!nvmet_rdma_delete_wq) { + ret = -ENOMEM; + goto err_unreg_transport; + } + return 0; +err_unreg_transport: + nvmet_unregister_transport(&nvmet_rdma_ops); err_ib_client: ib_unregister_client(&nvmet_rdma_ib_client); return ret; @@ -1658,6 +1668,7 @@ err_ib_client: static void __exit nvmet_rdma_exit(void) { + destroy_workqueue(nvmet_rdma_delete_wq); nvmet_unregister_transport(&nvmet_rdma_ops); ib_unregister_client(&nvmet_rdma_ib_client); WARN_ON_ONCE(!list_empty(&nvmet_rdma_queue_list)); |