diff options
Diffstat (limited to 'drivers/nvme')
-rw-r--r-- | drivers/nvme/host/core.c | 157 | ||||
-rw-r--r-- | drivers/nvme/host/fabrics.c | 28 | ||||
-rw-r--r-- | drivers/nvme/host/fabrics.h | 10 | ||||
-rw-r--r-- | drivers/nvme/host/fc.c | 1318 | ||||
-rw-r--r-- | drivers/nvme/host/lightnvm.c | 55 | ||||
-rw-r--r-- | drivers/nvme/host/nvme.h | 53 | ||||
-rw-r--r-- | drivers/nvme/host/pci.c | 264 | ||||
-rw-r--r-- | drivers/nvme/host/rdma.c | 212 | ||||
-rw-r--r-- | drivers/nvme/host/scsi.c | 15 | ||||
-rw-r--r-- | drivers/nvme/target/admin-cmd.c | 33 | ||||
-rw-r--r-- | drivers/nvme/target/core.c | 32 | ||||
-rw-r--r-- | drivers/nvme/target/discovery.c | 19 | ||||
-rw-r--r-- | drivers/nvme/target/fabrics-cmd.c | 36 | ||||
-rw-r--r-- | drivers/nvme/target/fc.c | 282 | ||||
-rw-r--r-- | drivers/nvme/target/fcloop.c | 201 | ||||
-rw-r--r-- | drivers/nvme/target/io-cmd.c | 28 | ||||
-rw-r--r-- | drivers/nvme/target/loop.c | 195 | ||||
-rw-r--r-- | drivers/nvme/target/nvmet.h | 12 | ||||
-rw-r--r-- | drivers/nvme/target/rdma.c | 55 |
19 files changed, 2006 insertions, 999 deletions
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 9b3b57fef446..d5e0906262ea 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -49,10 +49,9 @@ unsigned char shutdown_timeout = 5; module_param(shutdown_timeout, byte, 0644); MODULE_PARM_DESC(shutdown_timeout, "timeout in seconds for controller shutdown"); -unsigned int nvme_max_retries = 5; -module_param_named(max_retries, nvme_max_retries, uint, 0644); +static u8 nvme_max_retries = 5; +module_param_named(max_retries, nvme_max_retries, byte, 0644); MODULE_PARM_DESC(max_retries, "max number of retries a command may have"); -EXPORT_SYMBOL_GPL(nvme_max_retries); static int nvme_char_major; module_param(nvme_char_major, int, 0); @@ -62,11 +61,66 @@ module_param(default_ps_max_latency_us, ulong, 0644); MODULE_PARM_DESC(default_ps_max_latency_us, "max power saving latency for new devices; use PM QOS to change per device"); +static bool force_apst; +module_param(force_apst, bool, 0644); +MODULE_PARM_DESC(force_apst, "allow APST for newly enumerated devices even if quirked off"); + static LIST_HEAD(nvme_ctrl_list); static DEFINE_SPINLOCK(dev_list_lock); static struct class *nvme_class; +static int nvme_error_status(struct request *req) +{ + switch (nvme_req(req)->status & 0x7ff) { + case NVME_SC_SUCCESS: + return 0; + case NVME_SC_CAP_EXCEEDED: + return -ENOSPC; + default: + return -EIO; + + /* + * XXX: these errors are a nasty side-band protocol to + * drivers/md/dm-mpath.c:noretry_error() that aren't documented + * anywhere.. + */ + case NVME_SC_CMD_SEQ_ERROR: + return -EILSEQ; + case NVME_SC_ONCS_NOT_SUPPORTED: + return -EOPNOTSUPP; + case NVME_SC_WRITE_FAULT: + case NVME_SC_READ_ERROR: + case NVME_SC_UNWRITTEN_BLOCK: + return -ENODATA; + } +} + +static inline bool nvme_req_needs_retry(struct request *req) +{ + if (blk_noretry_request(req)) + return false; + if (nvme_req(req)->status & NVME_SC_DNR) + return false; + if (jiffies - req->start_time >= req->timeout) + return false; + if (nvme_req(req)->retries >= nvme_max_retries) + return false; + return true; +} + +void nvme_complete_rq(struct request *req) +{ + if (unlikely(nvme_req(req)->status && nvme_req_needs_retry(req))) { + nvme_req(req)->retries++; + blk_mq_requeue_request(req, !blk_mq_queue_stopped(req->q)); + return; + } + + blk_mq_end_request(req, nvme_error_status(req)); +} +EXPORT_SYMBOL_GPL(nvme_complete_rq); + void nvme_cancel_request(struct request *req, void *data, bool reserved) { int status; @@ -80,7 +134,9 @@ void nvme_cancel_request(struct request *req, void *data, bool reserved) status = NVME_SC_ABORT_REQ; if (blk_queue_dying(req->q)) status |= NVME_SC_DNR; - blk_mq_complete_request(req, status); + nvme_req(req)->status = status; + blk_mq_complete_request(req); + } EXPORT_SYMBOL_GPL(nvme_cancel_request); @@ -205,12 +261,6 @@ fail: return NULL; } -void nvme_requeue_req(struct request *req) -{ - blk_mq_requeue_request(req, !blk_mq_queue_stopped(req->q)); -} -EXPORT_SYMBOL_GPL(nvme_requeue_req); - struct request *nvme_alloc_request(struct request_queue *q, struct nvme_command *cmd, unsigned int flags, int qid) { @@ -270,7 +320,7 @@ static inline int nvme_setup_discard(struct nvme_ns *ns, struct request *req, memset(cmnd, 0, sizeof(*cmnd)); cmnd->dsm.opcode = nvme_cmd_dsm; cmnd->dsm.nsid = cpu_to_le32(ns->ns_id); - cmnd->dsm.nr = segments - 1; + cmnd->dsm.nr = cpu_to_le32(segments - 1); cmnd->dsm.attributes = cpu_to_le32(NVME_DSMGMT_AD); req->special_vec.bv_page = virt_to_page(range); @@ -327,6 +377,12 @@ int nvme_setup_cmd(struct nvme_ns *ns, struct request *req, { int ret = BLK_MQ_RQ_QUEUE_OK; + if (!(req->rq_flags & RQF_DONTPREP)) { + nvme_req(req)->retries = 0; + nvme_req(req)->flags = 0; + req->rq_flags |= RQF_DONTPREP; + } + switch (req_op(req)) { case REQ_OP_DRV_IN: case REQ_OP_DRV_OUT: @@ -335,6 +391,8 @@ int nvme_setup_cmd(struct nvme_ns *ns, struct request *req, case REQ_OP_FLUSH: nvme_setup_flush(ns, cmd); break; + case REQ_OP_WRITE_ZEROES: + /* currently only aliased to deallocate for a few ctrls: */ case REQ_OP_DISCARD: ret = nvme_setup_discard(ns, req, cmd); break; @@ -378,7 +436,10 @@ int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd, blk_execute_rq(req->q, NULL, req, at_head); if (result) *result = nvme_req(req)->result; - ret = req->errors; + if (nvme_req(req)->flags & NVME_REQ_CANCELLED) + ret = -EINTR; + else + ret = nvme_req(req)->status; out: blk_mq_free_request(req); return ret; @@ -463,7 +524,10 @@ int __nvme_submit_user_cmd(struct request_queue *q, struct nvme_command *cmd, } submit: blk_execute_rq(req->q, disk, req, 0); - ret = req->errors; + if (nvme_req(req)->flags & NVME_REQ_CANCELLED) + ret = -EINTR; + else + ret = nvme_req(req)->status; if (result) *result = le32_to_cpu(nvme_req(req)->result.u32); if (meta && !ret && !write) { @@ -900,16 +964,14 @@ static void nvme_config_discard(struct nvme_ns *ns) BUILD_BUG_ON(PAGE_SIZE / sizeof(struct nvme_dsm_range) < NVME_DSM_MAX_RANGES); - if (ctrl->quirks & NVME_QUIRK_DISCARD_ZEROES) - ns->queue->limits.discard_zeroes_data = 1; - else - ns->queue->limits.discard_zeroes_data = 0; - ns->queue->limits.discard_alignment = logical_block_size; ns->queue->limits.discard_granularity = logical_block_size; blk_queue_max_discard_sectors(ns->queue, UINT_MAX); blk_queue_max_discard_segments(ns->queue, NVME_DSM_MAX_RANGES); queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, ns->queue); + + if (ctrl->quirks & NVME_QUIRK_DEALLOCATE_ZEROES) + blk_queue_max_write_zeroes_sectors(ns->queue, UINT_MAX); } static int nvme_revalidate_ns(struct nvme_ns *ns, struct nvme_id_ns **id) @@ -1267,7 +1329,7 @@ static void nvme_configure_apst(struct nvme_ctrl *ctrl) * heuristic: we are willing to spend at most 2% of the time * transitioning between power states. Therefore, when running * in any given state, we will enter the next lower-power - * non-operational state after waiting 100 * (enlat + exlat) + * non-operational state after waiting 50 * (enlat + exlat) * microseconds, as long as that state's total latency is under * the requested maximum latency. * @@ -1278,6 +1340,8 @@ static void nvme_configure_apst(struct nvme_ctrl *ctrl) unsigned apste; struct nvme_feat_auto_pst *table; + u64 max_lat_us = 0; + int max_ps = -1; int ret; /* @@ -1299,6 +1363,7 @@ static void nvme_configure_apst(struct nvme_ctrl *ctrl) if (ctrl->ps_max_latency_us == 0) { /* Turn off APST. */ apste = 0; + dev_dbg(ctrl->device, "APST disabled\n"); } else { __le64 target = cpu_to_le64(0); int state; @@ -1316,6 +1381,14 @@ static void nvme_configure_apst(struct nvme_ctrl *ctrl) table->entries[state] = target; /* + * Don't allow transitions to the deepest state + * if it's quirked off. + */ + if (state == ctrl->npss && + (ctrl->quirks & NVME_QUIRK_NO_DEEPEST_PS)) + continue; + + /* * Is this state a useful non-operational state for * higher-power states to autonomously transition to? */ @@ -1340,9 +1413,22 @@ static void nvme_configure_apst(struct nvme_ctrl *ctrl) target = cpu_to_le64((state << 3) | (transition_ms << 8)); + + if (max_ps == -1) + max_ps = state; + + if (total_latency_us > max_lat_us) + max_lat_us = total_latency_us; } apste = 1; + + if (max_ps == -1) { + dev_dbg(ctrl->device, "APST enabled but no non-operational states are available\n"); + } else { + dev_dbg(ctrl->device, "APST enabled: max PS = %d, max round-trip latency = %lluus, table = %*phN\n", + max_ps, max_lat_us, (int)sizeof(*table), table); + } } ret = nvme_set_features(ctrl, NVME_FEAT_AUTO_PST, apste, @@ -1387,16 +1473,15 @@ struct nvme_core_quirk_entry { }; static const struct nvme_core_quirk_entry core_quirks[] = { - /* - * Seen on a Samsung "SM951 NVMe SAMSUNG 256GB": using APST causes - * the controller to go out to lunch. It dies when the watchdog - * timer reads CSTS and gets 0xffffffff. - */ { - .vid = 0x144d, - .fr = "BXW75D0Q", + /* + * This Toshiba device seems to die using any APST states. See: + * https://bugs.launchpad.net/ubuntu/+source/linux/+bug/1678184/comments/11 + */ + .vid = 0x1179, + .mn = "THNSF5256GPUK TOSHIBA", .quirks = NVME_QUIRK_NO_APST, - }, + } }; /* match is null-terminated but idstr is space-padded. */ @@ -1481,6 +1566,11 @@ int nvme_init_identify(struct nvme_ctrl *ctrl) } } + if (force_apst && (ctrl->quirks & NVME_QUIRK_NO_DEEPEST_PS)) { + dev_warn(ctrl->dev, "forcibly allowing all power states due to nvme_core.force_apst -- use at your own risk\n"); + ctrl->quirks &= ~NVME_QUIRK_NO_DEEPEST_PS; + } + ctrl->oacs = le16_to_cpu(id->oacs); ctrl->vid = le16_to_cpu(id->vid); ctrl->oncs = le16_to_cpup(&id->oncs); @@ -1503,7 +1593,16 @@ int nvme_init_identify(struct nvme_ctrl *ctrl) ctrl->npss = id->npss; prev_apsta = ctrl->apsta; - ctrl->apsta = (ctrl->quirks & NVME_QUIRK_NO_APST) ? 0 : id->apsta; + if (ctrl->quirks & NVME_QUIRK_NO_APST) { + if (force_apst && id->apsta) { + dev_warn(ctrl->dev, "forcibly allowing APST due to nvme_core.force_apst -- use at your own risk\n"); + ctrl->apsta = 1; + } else { + ctrl->apsta = 0; + } + } else { + ctrl->apsta = id->apsta; + } memcpy(ctrl->psd, id->psd, sizeof(ctrl->psd)); if (ctrl->ops->is_fabrics) { @@ -2386,7 +2485,7 @@ void nvme_start_freeze(struct nvme_ctrl *ctrl) mutex_lock(&ctrl->namespaces_mutex); list_for_each_entry(ns, &ctrl->namespaces, list) - blk_mq_freeze_queue_start(ns->queue); + blk_freeze_queue_start(ns->queue); mutex_unlock(&ctrl->namespaces_mutex); } EXPORT_SYMBOL_GPL(nvme_start_freeze); diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c index 5b7386f69f4d..990e6fb32a63 100644 --- a/drivers/nvme/host/fabrics.c +++ b/drivers/nvme/host/fabrics.c @@ -471,6 +471,16 @@ int nvmf_connect_io_queue(struct nvme_ctrl *ctrl, u16 qid) } EXPORT_SYMBOL_GPL(nvmf_connect_io_queue); +bool nvmf_should_reconnect(struct nvme_ctrl *ctrl) +{ + if (ctrl->opts->max_reconnects != -1 && + ctrl->opts->nr_reconnects < ctrl->opts->max_reconnects) + return true; + + return false; +} +EXPORT_SYMBOL_GPL(nvmf_should_reconnect); + /** * nvmf_register_transport() - NVMe Fabrics Library registration function. * @ops: Transport ops instance to be registered to the @@ -533,6 +543,7 @@ static const match_table_t opt_tokens = { { NVMF_OPT_QUEUE_SIZE, "queue_size=%d" }, { NVMF_OPT_NR_IO_QUEUES, "nr_io_queues=%d" }, { NVMF_OPT_RECONNECT_DELAY, "reconnect_delay=%d" }, + { NVMF_OPT_CTRL_LOSS_TMO, "ctrl_loss_tmo=%d" }, { NVMF_OPT_KATO, "keep_alive_tmo=%d" }, { NVMF_OPT_HOSTNQN, "hostnqn=%s" }, { NVMF_OPT_HOST_TRADDR, "host_traddr=%s" }, @@ -546,6 +557,7 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts, char *options, *o, *p; int token, ret = 0; size_t nqnlen = 0; + int ctrl_loss_tmo = NVMF_DEF_CTRL_LOSS_TMO; /* Set defaults */ opts->queue_size = NVMF_DEF_QUEUE_SIZE; @@ -655,6 +667,16 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts, } opts->kato = token; break; + case NVMF_OPT_CTRL_LOSS_TMO: + if (match_int(args, &token)) { + ret = -EINVAL; + goto out; + } + + if (token < 0) + pr_warn("ctrl_loss_tmo < 0 will reconnect forever\n"); + ctrl_loss_tmo = token; + break; case NVMF_OPT_HOSTNQN: if (opts->host) { pr_err("hostnqn already user-assigned: %s\n", @@ -710,6 +732,12 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts, } } + if (ctrl_loss_tmo < 0) + opts->max_reconnects = -1; + else + opts->max_reconnects = DIV_ROUND_UP(ctrl_loss_tmo, + opts->reconnect_delay); + if (!opts->host) { kref_get(&nvmf_default_host->ref); opts->host = nvmf_default_host; diff --git a/drivers/nvme/host/fabrics.h b/drivers/nvme/host/fabrics.h index 156018182ce4..f5a9c1fb186f 100644 --- a/drivers/nvme/host/fabrics.h +++ b/drivers/nvme/host/fabrics.h @@ -21,6 +21,8 @@ #define NVMF_MAX_QUEUE_SIZE 1024 #define NVMF_DEF_QUEUE_SIZE 128 #define NVMF_DEF_RECONNECT_DELAY 10 +/* default to 600 seconds of reconnect attempts before giving up */ +#define NVMF_DEF_CTRL_LOSS_TMO 600 /* * Define a host as seen by the target. We allocate one at boot, but also @@ -53,6 +55,7 @@ enum { NVMF_OPT_HOSTNQN = 1 << 8, NVMF_OPT_RECONNECT_DELAY = 1 << 9, NVMF_OPT_HOST_TRADDR = 1 << 10, + NVMF_OPT_CTRL_LOSS_TMO = 1 << 11, }; /** @@ -77,6 +80,10 @@ enum { * @discovery_nqn: indicates if the subsysnqn is the well-known discovery NQN. * @kato: Keep-alive timeout. * @host: Virtual NVMe host, contains the NQN and Host ID. + * @nr_reconnects: number of reconnect attempted since the last ctrl failure + * @max_reconnects: maximum number of allowed reconnect attempts before removing + * the controller, (-1) means reconnect forever, zero means remove + * immediately; */ struct nvmf_ctrl_options { unsigned mask; @@ -91,6 +98,8 @@ struct nvmf_ctrl_options { bool discovery_nqn; unsigned int kato; struct nvmf_host *host; + int nr_reconnects; + int max_reconnects; }; /* @@ -133,5 +142,6 @@ void nvmf_unregister_transport(struct nvmf_transport_ops *ops); void nvmf_free_options(struct nvmf_ctrl_options *opts); const char *nvmf_get_subsysnqn(struct nvme_ctrl *ctrl); int nvmf_get_address(struct nvme_ctrl *ctrl, char *buf, int size); +bool nvmf_should_reconnect(struct nvme_ctrl *ctrl); #endif /* _NVME_FABRICS_H */ diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index 9690beb15e69..70e689bf1cad 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -19,6 +19,7 @@ #include <linux/parser.h> #include <uapi/scsi/fc/fc_fs.h> #include <uapi/scsi/fc/fc_els.h> +#include <linux/delay.h> #include "nvme.h" #include "fabrics.h" @@ -44,6 +45,8 @@ enum nvme_fc_queue_flags { #define NVMEFC_QUEUE_DELAY 3 /* ms units */ +#define NVME_FC_MAX_CONNECT_ATTEMPTS 1 + struct nvme_fc_queue { struct nvme_fc_ctrl *ctrl; struct device *dev; @@ -61,16 +64,24 @@ struct nvme_fc_queue { unsigned long flags; } __aligned(sizeof(u64)); /* alignment for other things alloc'd with */ +enum nvme_fcop_flags { + FCOP_FLAGS_TERMIO = (1 << 0), + FCOP_FLAGS_RELEASED = (1 << 1), + FCOP_FLAGS_COMPLETE = (1 << 2), + FCOP_FLAGS_AEN = (1 << 3), +}; + struct nvmefc_ls_req_op { struct nvmefc_ls_req ls_req; - struct nvme_fc_ctrl *ctrl; + struct nvme_fc_rport *rport; struct nvme_fc_queue *queue; struct request *rq; + u32 flags; int ls_error; struct completion ls_done; - struct list_head lsreq_list; /* ctrl->ls_req_list */ + struct list_head lsreq_list; /* rport->ls_req_list */ bool req_queued; }; @@ -79,6 +90,7 @@ enum nvme_fcpop_state { FCPOP_STATE_IDLE = 1, FCPOP_STATE_ACTIVE = 2, FCPOP_STATE_ABORTED = 3, + FCPOP_STATE_COMPLETE = 4, }; struct nvme_fc_fcp_op { @@ -97,6 +109,7 @@ struct nvme_fc_fcp_op { struct request *rq; atomic_t state; + u32 flags; u32 rqno; u32 nents; @@ -120,23 +133,24 @@ struct nvme_fc_rport { struct list_head endp_list; /* for lport->endp_list */ struct list_head ctrl_list; + struct list_head ls_req_list; + struct device *dev; /* physical device for dma */ + struct nvme_fc_lport *lport; spinlock_t lock; struct kref ref; } __aligned(sizeof(u64)); /* alignment for other things alloc'd with */ -enum nvme_fcctrl_state { - FCCTRL_INIT = 0, - FCCTRL_ACTIVE = 1, +enum nvme_fcctrl_flags { + FCCTRL_TERMIO = (1 << 0), }; struct nvme_fc_ctrl { spinlock_t lock; struct nvme_fc_queue *queues; - u32 queue_count; - struct device *dev; struct nvme_fc_lport *lport; struct nvme_fc_rport *rport; + u32 queue_count; u32 cnum; u64 association_id; @@ -144,14 +158,19 @@ struct nvme_fc_ctrl { u64 cap; struct list_head ctrl_list; /* rport->ctrl_list */ - struct list_head ls_req_list; struct blk_mq_tag_set admin_tag_set; struct blk_mq_tag_set tag_set; struct work_struct delete_work; + struct work_struct reset_work; + struct delayed_work connect_work; + int reconnect_delay; + int connect_attempts; + struct kref ref; - int state; + u32 flags; + u32 iocnt; struct nvme_fc_fcp_op aen_ops[NVME_FC_NR_AEN_COMMANDS]; @@ -419,9 +438,12 @@ nvme_fc_register_remoteport(struct nvme_fc_local_port *localport, INIT_LIST_HEAD(&newrec->endp_list); INIT_LIST_HEAD(&newrec->ctrl_list); + INIT_LIST_HEAD(&newrec->ls_req_list); kref_init(&newrec->ref); spin_lock_init(&newrec->lock); newrec->remoteport.localport = &lport->localport; + newrec->dev = lport->dev; + newrec->lport = lport; newrec->remoteport.private = &newrec[1]; newrec->remoteport.port_role = pinfo->port_role; newrec->remoteport.node_name = pinfo->node_name; @@ -444,7 +466,6 @@ out_kfree_rport: out_reghost_failed: *portptr = NULL; return ret; - } EXPORT_SYMBOL_GPL(nvme_fc_register_remoteport); @@ -487,6 +508,30 @@ nvme_fc_rport_get(struct nvme_fc_rport *rport) return kref_get_unless_zero(&rport->ref); } +static int +nvme_fc_abort_lsops(struct nvme_fc_rport *rport) +{ + struct nvmefc_ls_req_op *lsop; + unsigned long flags; + +restart: + spin_lock_irqsave(&rport->lock, flags); + + list_for_each_entry(lsop, &rport->ls_req_list, lsreq_list) { + if (!(lsop->flags & FCOP_FLAGS_TERMIO)) { + lsop->flags |= FCOP_FLAGS_TERMIO; + spin_unlock_irqrestore(&rport->lock, flags); + rport->lport->ops->ls_abort(&rport->lport->localport, + &rport->remoteport, + &lsop->ls_req); + goto restart; + } + } + spin_unlock_irqrestore(&rport->lock, flags); + + return 0; +} + /** * nvme_fc_unregister_remoteport - transport entry point called by an * LLDD to deregister/remove a previously @@ -522,6 +567,8 @@ nvme_fc_unregister_remoteport(struct nvme_fc_remote_port *portptr) spin_unlock_irqrestore(&rport->lock, flags); + nvme_fc_abort_lsops(rport); + nvme_fc_rport_put(rport); return 0; } @@ -624,16 +671,16 @@ static int nvme_fc_ctrl_get(struct nvme_fc_ctrl *); static void -__nvme_fc_finish_ls_req(struct nvme_fc_ctrl *ctrl, - struct nvmefc_ls_req_op *lsop) +__nvme_fc_finish_ls_req(struct nvmefc_ls_req_op *lsop) { + struct nvme_fc_rport *rport = lsop->rport; struct nvmefc_ls_req *lsreq = &lsop->ls_req; unsigned long flags; - spin_lock_irqsave(&ctrl->lock, flags); + spin_lock_irqsave(&rport->lock, flags); if (!lsop->req_queued) { - spin_unlock_irqrestore(&ctrl->lock, flags); + spin_unlock_irqrestore(&rport->lock, flags); return; } @@ -641,56 +688,71 @@ __nvme_fc_finish_ls_req(struct nvme_fc_ctrl *ctrl, lsop->req_queued = false; - spin_unlock_irqrestore(&ctrl->lock, flags); + spin_unlock_irqrestore(&rport->lock, flags); - fc_dma_unmap_single(ctrl->dev, lsreq->rqstdma, + fc_dma_unmap_single(rport->dev, lsreq->rqstdma, (lsreq->rqstlen + lsreq->rsplen), DMA_BIDIRECTIONAL); - nvme_fc_ctrl_put(ctrl); + nvme_fc_rport_put(rport); } static int -__nvme_fc_send_ls_req(struct nvme_fc_ctrl *ctrl, +__nvme_fc_send_ls_req(struct nvme_fc_rport *rport, struct nvmefc_ls_req_op *lsop, void (*done)(struct nvmefc_ls_req *req, int status)) { struct nvmefc_ls_req *lsreq = &lsop->ls_req; unsigned long flags; - int ret; + int ret = 0; - if (!nvme_fc_ctrl_get(ctrl)) + if (rport->remoteport.port_state != FC_OBJSTATE_ONLINE) + return -ECONNREFUSED; + + if (!nvme_fc_rport_get(rport)) return -ESHUTDOWN; lsreq->done = done; - lsop->ctrl = ctrl; + lsop->rport = rport; lsop->req_queued = false; INIT_LIST_HEAD(&lsop->lsreq_list); init_completion(&lsop->ls_done); - lsreq->rqstdma = fc_dma_map_single(ctrl->dev, lsreq->rqstaddr, + lsreq->rqstdma = fc_dma_map_single(rport->dev, lsreq->rqstaddr, lsreq->rqstlen + lsreq->rsplen, DMA_BIDIRECTIONAL); - if (fc_dma_mapping_error(ctrl->dev, lsreq->rqstdma)) { - nvme_fc_ctrl_put(ctrl); - dev_err(ctrl->dev, - "els request command failed EFAULT.\n"); - return -EFAULT; + if (fc_dma_mapping_error(rport->dev, lsreq->rqstdma)) { + ret = -EFAULT; + goto out_putrport; } lsreq->rspdma = lsreq->rqstdma + lsreq->rqstlen; - spin_lock_irqsave(&ctrl->lock, flags); + spin_lock_irqsave(&rport->lock, flags); - list_add_tail(&lsop->lsreq_list, &ctrl->ls_req_list); + list_add_tail(&lsop->lsreq_list, &rport->ls_req_list); lsop->req_queued = true; - spin_unlock_irqrestore(&ctrl->lock, flags); + spin_unlock_irqrestore(&rport->lock, flags); - ret = ctrl->lport->ops->ls_req(&ctrl->lport->localport, - &ctrl->rport->remoteport, lsreq); + ret = rport->lport->ops->ls_req(&rport->lport->localport, + &rport->remoteport, lsreq); if (ret) - lsop->ls_error = ret; + goto out_unlink; + + return 0; + +out_unlink: + lsop->ls_error = ret; + spin_lock_irqsave(&rport->lock, flags); + lsop->req_queued = false; + list_del(&lsop->lsreq_list); + spin_unlock_irqrestore(&rport->lock, flags); + fc_dma_unmap_single(rport->dev, lsreq->rqstdma, + (lsreq->rqstlen + lsreq->rsplen), + DMA_BIDIRECTIONAL); +out_putrport: + nvme_fc_rport_put(rport); return ret; } @@ -705,15 +767,15 @@ nvme_fc_send_ls_req_done(struct nvmefc_ls_req *lsreq, int status) } static int -nvme_fc_send_ls_req(struct nvme_fc_ctrl *ctrl, struct nvmefc_ls_req_op *lsop) +nvme_fc_send_ls_req(struct nvme_fc_rport *rport, struct nvmefc_ls_req_op *lsop) { struct nvmefc_ls_req *lsreq = &lsop->ls_req; struct fcnvme_ls_rjt *rjt = lsreq->rspaddr; int ret; - ret = __nvme_fc_send_ls_req(ctrl, lsop, nvme_fc_send_ls_req_done); + ret = __nvme_fc_send_ls_req(rport, lsop, nvme_fc_send_ls_req_done); - if (!ret) + if (!ret) { /* * No timeout/not interruptible as we need the struct * to exist until the lldd calls us back. Thus mandate @@ -722,14 +784,14 @@ nvme_fc_send_ls_req(struct nvme_fc_ctrl *ctrl, struct nvmefc_ls_req_op *lsop) */ wait_for_completion(&lsop->ls_done); - __nvme_fc_finish_ls_req(ctrl, lsop); + __nvme_fc_finish_ls_req(lsop); - if (ret) { - dev_err(ctrl->dev, - "ls request command failed (%d).\n", ret); - return ret; + ret = lsop->ls_error; } + if (ret) + return ret; + /* ACC or RJT payload ? */ if (rjt->w0.ls_cmd == FCNVME_LS_RJT) return -ENXIO; @@ -737,19 +799,14 @@ nvme_fc_send_ls_req(struct nvme_fc_ctrl *ctrl, struct nvmefc_ls_req_op *lsop) return 0; } -static void -nvme_fc_send_ls_req_async(struct nvme_fc_ctrl *ctrl, +static int +nvme_fc_send_ls_req_async(struct nvme_fc_rport *rport, struct nvmefc_ls_req_op *lsop, void (*done)(struct nvmefc_ls_req *req, int status)) { - int ret; - - ret = __nvme_fc_send_ls_req(ctrl, lsop, done); - /* don't wait for completion */ - if (ret) - done(&lsop->ls_req, ret); + return __nvme_fc_send_ls_req(rport, lsop, done); } /* Validation Error indexes into the string table below */ @@ -839,7 +896,7 @@ nvme_fc_connect_admin_queue(struct nvme_fc_ctrl *ctrl, lsreq->rsplen = sizeof(*assoc_acc); lsreq->timeout = NVME_FC_CONNECT_TIMEOUT_SEC; - ret = nvme_fc_send_ls_req(ctrl, lsop); + ret = nvme_fc_send_ls_req(ctrl->rport, lsop); if (ret) goto out_free_buffer; @@ -848,11 +905,12 @@ nvme_fc_connect_admin_queue(struct nvme_fc_ctrl *ctrl, /* validate the ACC response */ if (assoc_acc->hdr.w0.ls_cmd != FCNVME_LS_ACC) fcret = VERR_LSACC; - if (assoc_acc->hdr.desc_list_len != + else if (assoc_acc->hdr.desc_list_len != fcnvme_lsdesc_len( sizeof(struct fcnvme_ls_cr_assoc_acc))) fcret = VERR_CR_ASSOC_ACC_LEN; - if (assoc_acc->hdr.rqst.desc_tag != cpu_to_be32(FCNVME_LSDESC_RQST)) + else if (assoc_acc->hdr.rqst.desc_tag != + cpu_to_be32(FCNVME_LSDESC_RQST)) fcret = VERR_LSDESC_RQST; else if (assoc_acc->hdr.rqst.desc_len != fcnvme_lsdesc_len(sizeof(struct fcnvme_lsdesc_rqst))) @@ -946,7 +1004,7 @@ nvme_fc_connect_queue(struct nvme_fc_ctrl *ctrl, struct nvme_fc_queue *queue, lsreq->rsplen = sizeof(*conn_acc); lsreq->timeout = NVME_FC_CONNECT_TIMEOUT_SEC; - ret = nvme_fc_send_ls_req(ctrl, lsop); + ret = nvme_fc_send_ls_req(ctrl->rport, lsop); if (ret) goto out_free_buffer; @@ -955,10 +1013,10 @@ nvme_fc_connect_queue(struct nvme_fc_ctrl *ctrl, struct nvme_fc_queue *queue, /* validate the ACC response */ if (conn_acc->hdr.w0.ls_cmd != FCNVME_LS_ACC) fcret = VERR_LSACC; - if (conn_acc->hdr.desc_list_len != + else if (conn_acc->hdr.desc_list_len != fcnvme_lsdesc_len(sizeof(struct fcnvme_ls_cr_conn_acc))) fcret = VERR_CR_CONN_ACC_LEN; - if (conn_acc->hdr.rqst.desc_tag != cpu_to_be32(FCNVME_LSDESC_RQST)) + else if (conn_acc->hdr.rqst.desc_tag != cpu_to_be32(FCNVME_LSDESC_RQST)) fcret = VERR_LSDESC_RQST; else if (conn_acc->hdr.rqst.desc_len != fcnvme_lsdesc_len(sizeof(struct fcnvme_lsdesc_rqst))) @@ -997,14 +1055,8 @@ static void nvme_fc_disconnect_assoc_done(struct nvmefc_ls_req *lsreq, int status) { struct nvmefc_ls_req_op *lsop = ls_req_to_lsop(lsreq); - struct nvme_fc_ctrl *ctrl = lsop->ctrl; - __nvme_fc_finish_ls_req(ctrl, lsop); - - if (status) - dev_err(ctrl->dev, - "disconnect assoc ls request command failed (%d).\n", - status); + __nvme_fc_finish_ls_req(lsop); /* fc-nvme iniator doesn't care about success or failure of cmd */ @@ -1035,6 +1087,7 @@ nvme_fc_xmt_disconnect_assoc(struct nvme_fc_ctrl *ctrl) struct fcnvme_ls_disconnect_acc *discon_acc; struct nvmefc_ls_req_op *lsop; struct nvmefc_ls_req *lsreq; + int ret; lsop = kzalloc((sizeof(*lsop) + ctrl->lport->ops->lsrqst_priv_sz + @@ -1077,7 +1130,10 @@ nvme_fc_xmt_disconnect_assoc(struct nvme_fc_ctrl *ctrl) lsreq->rsplen = sizeof(*discon_acc); lsreq->timeout = NVME_FC_CONNECT_TIMEOUT_SEC; - nvme_fc_send_ls_req_async(ctrl, lsop, nvme_fc_disconnect_assoc_done); + ret = nvme_fc_send_ls_req_async(ctrl->rport, lsop, + nvme_fc_disconnect_assoc_done); + if (ret) + kfree(lsop); /* only meaningful part to terminating the association */ ctrl->association_id = 0; @@ -1086,6 +1142,7 @@ nvme_fc_xmt_disconnect_assoc(struct nvme_fc_ctrl *ctrl) /* *********************** NVME Ctrl Routines **************************** */ +static void __nvme_fc_final_op_cleanup(struct request *rq); static int nvme_fc_reinit_request(void *data, struct request *rq) @@ -1115,29 +1172,92 @@ __nvme_fc_exit_request(struct nvme_fc_ctrl *ctrl, } static void -nvme_fc_exit_request(void *data, struct request *rq, - unsigned int hctx_idx, unsigned int rq_idx) +nvme_fc_exit_request(struct blk_mq_tag_set *set, struct request *rq, + unsigned int hctx_idx) { struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(rq); - return __nvme_fc_exit_request(data, op); + return __nvme_fc_exit_request(set->driver_data, op); +} + +static int +__nvme_fc_abort_op(struct nvme_fc_ctrl *ctrl, struct nvme_fc_fcp_op *op) +{ + int state; + + state = atomic_xchg(&op->state, FCPOP_STATE_ABORTED); + if (state != FCPOP_STATE_ACTIVE) { + atomic_set(&op->state, state); + return -ECANCELED; + } + + ctrl->lport->ops->fcp_abort(&ctrl->lport->localport, + &ctrl->rport->remoteport, + op->queue->lldd_handle, + &op->fcp_req); + + return 0; } static void -nvme_fc_exit_aen_ops(struct nvme_fc_ctrl *ctrl) +nvme_fc_abort_aen_ops(struct nvme_fc_ctrl *ctrl) { struct nvme_fc_fcp_op *aen_op = ctrl->aen_ops; - int i; + unsigned long flags; + int i, ret; for (i = 0; i < NVME_FC_NR_AEN_COMMANDS; i++, aen_op++) { - if (atomic_read(&aen_op->state) == FCPOP_STATE_UNINIT) + if (atomic_read(&aen_op->state) != FCPOP_STATE_ACTIVE) continue; - __nvme_fc_exit_request(ctrl, aen_op); - nvme_fc_ctrl_put(ctrl); + + spin_lock_irqsave(&ctrl->lock, flags); + if (ctrl->flags & FCCTRL_TERMIO) { + ctrl->iocnt++; + aen_op->flags |= FCOP_FLAGS_TERMIO; + } + spin_unlock_irqrestore(&ctrl->lock, flags); + + ret = __nvme_fc_abort_op(ctrl, aen_op); + if (ret) { + /* + * if __nvme_fc_abort_op failed the io wasn't + * active. Thus this call path is running in + * parallel to the io complete. Treat as non-error. + */ + + /* back out the flags/counters */ + spin_lock_irqsave(&ctrl->lock, flags); + if (ctrl->flags & FCCTRL_TERMIO) + ctrl->iocnt--; + aen_op->flags &= ~FCOP_FLAGS_TERMIO; + spin_unlock_irqrestore(&ctrl->lock, flags); + return; + } } } -void +static inline int +__nvme_fc_fcpop_chk_teardowns(struct nvme_fc_ctrl *ctrl, + struct nvme_fc_fcp_op *op) +{ + unsigned long flags; + bool complete_rq = false; + + spin_lock_irqsave(&ctrl->lock, flags); + if (unlikely(op->flags & FCOP_FLAGS_TERMIO)) { + if (ctrl->flags & FCCTRL_TERMIO) + ctrl->iocnt--; + } + if (op->flags & FCOP_FLAGS_RELEASED) + complete_rq = true; + else + op->flags |= FCOP_FLAGS_COMPLETE; + spin_unlock_irqrestore(&ctrl->lock, flags); + + return complete_rq; +} + +static void nvme_fc_fcpio_done(struct nvmefc_fcp_req *req) { struct nvme_fc_fcp_op *op = fcp_req_to_fcp_op(req); @@ -1146,7 +1266,10 @@ nvme_fc_fcpio_done(struct nvmefc_fcp_req *req) struct nvme_fc_ctrl *ctrl = op->ctrl; struct nvme_fc_queue *queue = op->queue; struct nvme_completion *cqe = &op->rsp_iu.cqe; - u16 status; + struct nvme_command *sqe = &op->cmd_iu.sqe; + __le16 status = cpu_to_le16(NVME_SC_SUCCESS << 1); + union nvme_result result; + bool complete_rq; /* * WARNING: @@ -1181,9 +1304,9 @@ nvme_fc_fcpio_done(struct nvmefc_fcp_req *req) sizeof(op->rsp_iu), DMA_FROM_DEVICE); if (atomic_read(&op->state) == FCPOP_STATE_ABORTED) - status = NVME_SC_ABORT_REQ | NVME_SC_DNR; - else - status = freq->status; + status = cpu_to_le16((NVME_SC_ABORT_REQ | NVME_SC_DNR) << 1); + else if (freq->status) + status = cpu_to_le16(NVME_SC_FC_TRANSPORT_ERROR << 1); /* * For the linux implementation, if we have an unsuccesful @@ -1211,10 +1334,10 @@ nvme_fc_fcpio_done(struct nvmefc_fcp_req *req) */ if (freq->transferred_length != be32_to_cpu(op->cmd_iu.data_len)) { - status = -EIO; + status = cpu_to_le16(NVME_SC_FC_TRANSPORT_ERROR << 1); goto done; } - op->nreq.result.u64 = 0; + result.u64 = 0; break; case sizeof(struct nvme_fc_ersp_iu): @@ -1226,28 +1349,40 @@ nvme_fc_fcpio_done(struct nvmefc_fcp_req *req) (freq->rcv_rsplen / 4) || be32_to_cpu(op->rsp_iu.xfrd_len) != freq->transferred_length || - op->rqno != le16_to_cpu(cqe->command_id))) { - status = -EIO; + op->rsp_iu.status_code || + sqe->common.command_id != cqe->command_id)) { + status = cpu_to_le16(NVME_SC_FC_TRANSPORT_ERROR << 1); goto done; } - op->nreq.result = cqe->result; - status = le16_to_cpu(cqe->status) >> 1; + result = cqe->result; + status = cqe->status; break; default: - status = -EIO; + status = cpu_to_le16(NVME_SC_FC_TRANSPORT_ERROR << 1); goto done; } done: - if (!queue->qnum && op->rqno >= AEN_CMDID_BASE) { - nvme_complete_async_event(&queue->ctrl->ctrl, status, - &op->nreq.result); + if (op->flags & FCOP_FLAGS_AEN) { + nvme_complete_async_event(&queue->ctrl->ctrl, status, &result); + complete_rq = __nvme_fc_fcpop_chk_teardowns(ctrl, op); + atomic_set(&op->state, FCPOP_STATE_IDLE); + op->flags = FCOP_FLAGS_AEN; /* clear other flags */ nvme_fc_ctrl_put(ctrl); return; } - blk_mq_complete_request(rq, status); + complete_rq = __nvme_fc_fcpop_chk_teardowns(ctrl, op); + if (!complete_rq) { + if (unlikely(op->flags & FCOP_FLAGS_TERMIO)) { + status = cpu_to_le16(NVME_SC_ABORT_REQ); + if (blk_queue_dying(rq->q)) + status |= cpu_to_le16(NVME_SC_DNR); + } + nvme_end_request(rq, status, result); + } else + __nvme_fc_final_op_cleanup(rq); } static int @@ -1299,11 +1434,10 @@ out_on_error: } static int -nvme_fc_init_request(void *data, struct request *rq, - unsigned int hctx_idx, unsigned int rq_idx, - unsigned int numa_node) +nvme_fc_init_request(struct blk_mq_tag_set *set, struct request *rq, + unsigned int hctx_idx, unsigned int numa_node) { - struct nvme_fc_ctrl *ctrl = data; + struct nvme_fc_ctrl *ctrl = set->driver_data; struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(rq); struct nvme_fc_queue *queue = &ctrl->queues[hctx_idx+1]; @@ -1311,11 +1445,10 @@ nvme_fc_init_request(void *data, struct request *rq, } static int -nvme_fc_init_admin_request(void *data, struct request *rq, - unsigned int hctx_idx, unsigned int rq_idx, - unsigned int numa_node) +nvme_fc_init_admin_request(struct blk_mq_tag_set *set, struct request *rq, + unsigned int hctx_idx, unsigned int numa_node) { - struct nvme_fc_ctrl *ctrl = data; + struct nvme_fc_ctrl *ctrl = set->driver_data; struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(rq); struct nvme_fc_queue *queue = &ctrl->queues[0]; @@ -1328,25 +1461,55 @@ nvme_fc_init_aen_ops(struct nvme_fc_ctrl *ctrl) struct nvme_fc_fcp_op *aen_op; struct nvme_fc_cmd_iu *cmdiu; struct nvme_command *sqe; + void *private; int i, ret; aen_op = ctrl->aen_ops; for (i = 0; i < NVME_FC_NR_AEN_COMMANDS; i++, aen_op++) { + private = kzalloc(ctrl->lport->ops->fcprqst_priv_sz, + GFP_KERNEL); + if (!private) + return -ENOMEM; + cmdiu = &aen_op->cmd_iu; sqe = &cmdiu->sqe; ret = __nvme_fc_init_request(ctrl, &ctrl->queues[0], aen_op, (struct request *)NULL, (AEN_CMDID_BASE + i)); - if (ret) + if (ret) { + kfree(private); return ret; + } + + aen_op->flags = FCOP_FLAGS_AEN; + aen_op->fcp_req.first_sgl = NULL; /* no sg list */ + aen_op->fcp_req.private = private; memset(sqe, 0, sizeof(*sqe)); sqe->common.opcode = nvme_admin_async_event; + /* Note: core layer may overwrite the sqe.command_id value */ sqe->common.command_id = AEN_CMDID_BASE + i; } return 0; } +static void +nvme_fc_term_aen_ops(struct nvme_fc_ctrl *ctrl) +{ + struct nvme_fc_fcp_op *aen_op; + int i; + + aen_op = ctrl->aen_ops; + for (i = 0; i < NVME_FC_NR_AEN_COMMANDS; i++, aen_op++) { + if (!aen_op->fcp_req.private) + continue; + + __nvme_fc_exit_request(ctrl, aen_op); + + kfree(aen_op->fcp_req.private); + aen_op->fcp_req.private = NULL; + } +} static inline void __nvme_fc_init_hctx(struct blk_mq_hw_ctx *hctx, struct nvme_fc_ctrl *ctrl, @@ -1446,15 +1609,6 @@ __nvme_fc_delete_hw_queue(struct nvme_fc_ctrl *ctrl, } static void -nvme_fc_destroy_admin_queue(struct nvme_fc_ctrl *ctrl) -{ - __nvme_fc_delete_hw_queue(ctrl, &ctrl->queues[0], 0); - blk_cleanup_queue(ctrl->ctrl.admin_q); - blk_mq_free_tag_set(&ctrl->admin_tag_set); - nvme_fc_free_queue(&ctrl->queues[0]); -} - -static void nvme_fc_free_io_queues(struct nvme_fc_ctrl *ctrl) { int i; @@ -1541,19 +1695,27 @@ nvme_fc_ctrl_free(struct kref *ref) container_of(ref, struct nvme_fc_ctrl, ref); unsigned long flags; - if (ctrl->state != FCCTRL_INIT) { - /* remove from rport list */ - spin_lock_irqsave(&ctrl->rport->lock, flags); - list_del(&ctrl->ctrl_list); - spin_unlock_irqrestore(&ctrl->rport->lock, flags); + if (ctrl->ctrl.tagset) { + blk_cleanup_queue(ctrl->ctrl.connect_q); + blk_mq_free_tag_set(&ctrl->tag_set); } + /* remove from rport list */ + spin_lock_irqsave(&ctrl->rport->lock, flags); + list_del(&ctrl->ctrl_list); + spin_unlock_irqrestore(&ctrl->rport->lock, flags); + + blk_cleanup_queue(ctrl->ctrl.admin_q); + blk_mq_free_tag_set(&ctrl->admin_tag_set); + + kfree(ctrl->queues); + put_device(ctrl->dev); nvme_fc_rport_put(ctrl->rport); - kfree(ctrl->queues); ida_simple_remove(&nvme_fc_ctrl_cnt, ctrl->cnum); - nvmf_free_options(ctrl->ctrl.opts); + if (ctrl->ctrl.opts) + nvmf_free_options(ctrl->ctrl.opts); kfree(ctrl); } @@ -1574,57 +1736,38 @@ nvme_fc_ctrl_get(struct nvme_fc_ctrl *ctrl) * controller. Called after last nvme_put_ctrl() call */ static void -nvme_fc_free_nvme_ctrl(struct nvme_ctrl *nctrl) +nvme_fc_nvme_ctrl_freed(struct nvme_ctrl *nctrl) { struct nvme_fc_ctrl *ctrl = to_fc_ctrl(nctrl); WARN_ON(nctrl != &ctrl->ctrl); - /* - * Tear down the association, which will generate link - * traffic to terminate connections - */ - - if (ctrl->state != FCCTRL_INIT) { - /* send a Disconnect(association) LS to fc-nvme target */ - nvme_fc_xmt_disconnect_assoc(ctrl); - - if (ctrl->ctrl.tagset) { - blk_cleanup_queue(ctrl->ctrl.connect_q); - blk_mq_free_tag_set(&ctrl->tag_set); - nvme_fc_delete_hw_io_queues(ctrl); - nvme_fc_free_io_queues(ctrl); - } - - nvme_fc_exit_aen_ops(ctrl); - - nvme_fc_destroy_admin_queue(ctrl); - } - nvme_fc_ctrl_put(ctrl); } - -static int -__nvme_fc_abort_op(struct nvme_fc_ctrl *ctrl, struct nvme_fc_fcp_op *op) +static void +nvme_fc_error_recovery(struct nvme_fc_ctrl *ctrl, char *errmsg) { - int state; + dev_warn(ctrl->ctrl.device, + "NVME-FC{%d}: transport association error detected: %s\n", + ctrl->cnum, errmsg); + dev_info(ctrl->ctrl.device, + "NVME-FC{%d}: resetting controller\n", ctrl->cnum); - state = atomic_xchg(&op->state, FCPOP_STATE_ABORTED); - if (state != FCPOP_STATE_ACTIVE) { - atomic_set(&op->state, state); - return -ECANCELED; /* fail */ + if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_RECONNECTING)) { + dev_err(ctrl->ctrl.device, + "NVME-FC{%d}: error_recovery: Couldn't change state " + "to RECONNECTING\n", ctrl->cnum); + return; } - ctrl->lport->ops->fcp_abort(&ctrl->lport->localport, - &ctrl->rport->remoteport, - op->queue->lldd_handle, - &op->fcp_req); - - return 0; + if (!queue_work(nvme_fc_wq, &ctrl->reset_work)) + dev_err(ctrl->ctrl.device, + "NVME-FC{%d}: error_recovery: Failed to schedule " + "reset work\n", ctrl->cnum); } -enum blk_eh_timer_return +static enum blk_eh_timer_return nvme_fc_timeout(struct request *rq, bool reserved) { struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(rq); @@ -1640,11 +1783,13 @@ nvme_fc_timeout(struct request *rq, bool reserved) return BLK_EH_HANDLED; /* - * TODO: force a controller reset - * when that happens, queues will be torn down and outstanding - * ios will be terminated, and the above abort, on a single io - * will no longer be needed. + * we can't individually ABTS an io without affecting the queue, + * thus killing the queue, adn thus the association. + * So resolve by performing a controller reset, which will stop + * the host/io stack, terminate the association on the link, + * and recreate an association on the link. */ + nvme_fc_error_recovery(ctrl, "io timeout error"); return BLK_EH_HANDLED; } @@ -1738,6 +1883,13 @@ nvme_fc_start_fcp_op(struct nvme_fc_ctrl *ctrl, struct nvme_fc_queue *queue, u32 csn; int ret; + /* + * before attempting to send the io, check to see if we believe + * the target device is present + */ + if (ctrl->rport->remoteport.port_state != FC_OBJSTATE_ONLINE) + return BLK_MQ_RQ_QUEUE_ERROR; + if (!nvme_fc_ctrl_get(ctrl)) return BLK_MQ_RQ_QUEUE_ERROR; @@ -1761,7 +1913,7 @@ nvme_fc_start_fcp_op(struct nvme_fc_ctrl *ctrl, struct nvme_fc_queue *queue, op->fcp_req.io_dir = io_dir; op->fcp_req.transferred_length = 0; op->fcp_req.rcv_rsplen = 0; - op->fcp_req.status = 0; + op->fcp_req.status = NVME_SC_SUCCESS; op->fcp_req.sqid = cpu_to_le16(queue->qnum); /* @@ -1782,14 +1934,9 @@ nvme_fc_start_fcp_op(struct nvme_fc_ctrl *ctrl, struct nvme_fc_queue *queue, sqe->rw.dptr.sgl.length = cpu_to_le32(data_len); sqe->rw.dptr.sgl.addr = 0; - /* odd that we set the command_id - should come from nvme-fabrics */ - WARN_ON_ONCE(sqe->common.command_id != cpu_to_le16(op->rqno)); - - if (op->rq) { /* skipped on aens */ + if (!(op->flags & FCOP_FLAGS_AEN)) { ret = nvme_fc_map_data(ctrl, op->rq, op); if (ret < 0) { - dev_err(queue->ctrl->ctrl.device, - "Failed to map data (%d)\n", ret); nvme_cleanup_cmd(op->rq); nvme_fc_ctrl_put(ctrl); return (ret == -ENOMEM || ret == -EAGAIN) ? @@ -1802,7 +1949,7 @@ nvme_fc_start_fcp_op(struct nvme_fc_ctrl *ctrl, struct nvme_fc_queue *queue, atomic_set(&op->state, FCPOP_STATE_ACTIVE); - if (op->rq) + if (!(op->flags & FCOP_FLAGS_AEN)) blk_mq_start_request(op->rq); ret = ctrl->lport->ops->fcp_io(&ctrl->lport->localport, @@ -1810,9 +1957,6 @@ nvme_fc_start_fcp_op(struct nvme_fc_ctrl *ctrl, struct nvme_fc_queue *queue, queue->lldd_handle, &op->fcp_req); if (ret) { - dev_err(ctrl->dev, - "Send nvme command failed - lldd returned %d.\n", ret); - if (op->rq) { /* normal request */ nvme_fc_unmap_data(ctrl, op->rq, op); nvme_cleanup_cmd(op->rq); @@ -1882,12 +2026,8 @@ nvme_fc_poll(struct blk_mq_hw_ctx *hctx, unsigned int tag) struct nvme_fc_fcp_op *op; req = blk_mq_tag_to_rq(nvme_fc_tagset(queue), tag); - if (!req) { - dev_err(queue->ctrl->ctrl.device, - "tag 0x%x on QNum %#x not found\n", - tag, queue->qnum); + if (!req) return 0; - } op = blk_mq_rq_to_pdu(req); @@ -1904,11 +2044,21 @@ nvme_fc_submit_async_event(struct nvme_ctrl *arg, int aer_idx) { struct nvme_fc_ctrl *ctrl = to_fc_ctrl(arg); struct nvme_fc_fcp_op *aen_op; + unsigned long flags; + bool terminating = false; int ret; if (aer_idx > NVME_FC_NR_AEN_COMMANDS) return; + spin_lock_irqsave(&ctrl->lock, flags); + if (ctrl->flags & FCCTRL_TERMIO) + terminating = true; + spin_unlock_irqrestore(&ctrl->lock, flags); + + if (terminating) + return; + aen_op = &ctrl->aen_ops[aer_idx]; ret = nvme_fc_start_fcp_op(ctrl, aen_op->queue, aen_op, 0, @@ -1919,36 +2069,101 @@ nvme_fc_submit_async_event(struct nvme_ctrl *arg, int aer_idx) } static void -nvme_fc_complete_rq(struct request *rq) +__nvme_fc_final_op_cleanup(struct request *rq) { struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(rq); struct nvme_fc_ctrl *ctrl = op->ctrl; - int error = 0, state; - state = atomic_xchg(&op->state, FCPOP_STATE_IDLE); + atomic_set(&op->state, FCPOP_STATE_IDLE); + op->flags &= ~(FCOP_FLAGS_TERMIO | FCOP_FLAGS_RELEASED | + FCOP_FLAGS_COMPLETE); nvme_cleanup_cmd(rq); - nvme_fc_unmap_data(ctrl, rq, op); + nvme_complete_rq(rq); + nvme_fc_ctrl_put(ctrl); - if (unlikely(rq->errors)) { - if (nvme_req_needs_retry(rq, rq->errors)) { - nvme_requeue_req(rq); - return; - } +} - if (blk_rq_is_passthrough(rq)) - error = rq->errors; - else - error = nvme_error_status(rq->errors); +static void +nvme_fc_complete_rq(struct request *rq) +{ + struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(rq); + struct nvme_fc_ctrl *ctrl = op->ctrl; + unsigned long flags; + bool completed = false; + + /* + * the core layer, on controller resets after calling + * nvme_shutdown_ctrl(), calls complete_rq without our + * calling blk_mq_complete_request(), thus there may still + * be live i/o outstanding with the LLDD. Means transport has + * to track complete calls vs fcpio_done calls to know what + * path to take on completes and dones. + */ + spin_lock_irqsave(&ctrl->lock, flags); + if (op->flags & FCOP_FLAGS_COMPLETE) + completed = true; + else + op->flags |= FCOP_FLAGS_RELEASED; + spin_unlock_irqrestore(&ctrl->lock, flags); + + if (completed) + __nvme_fc_final_op_cleanup(rq); +} + +/* + * This routine is used by the transport when it needs to find active + * io on a queue that is to be terminated. The transport uses + * blk_mq_tagset_busy_itr() to find the busy requests, which then invoke + * this routine to kill them on a 1 by 1 basis. + * + * As FC allocates FC exchange for each io, the transport must contact + * the LLDD to terminate the exchange, thus releasing the FC exchange. + * After terminating the exchange the LLDD will call the transport's + * normal io done path for the request, but it will have an aborted + * status. The done path will return the io request back to the block + * layer with an error status. + */ +static void +nvme_fc_terminate_exchange(struct request *req, void *data, bool reserved) +{ + struct nvme_ctrl *nctrl = data; + struct nvme_fc_ctrl *ctrl = to_fc_ctrl(nctrl); + struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(req); + unsigned long flags; + int status; + + if (!blk_mq_request_started(req)) + return; + + spin_lock_irqsave(&ctrl->lock, flags); + if (ctrl->flags & FCCTRL_TERMIO) { + ctrl->iocnt++; + op->flags |= FCOP_FLAGS_TERMIO; } + spin_unlock_irqrestore(&ctrl->lock, flags); - nvme_fc_ctrl_put(ctrl); + status = __nvme_fc_abort_op(ctrl, op); + if (status) { + /* + * if __nvme_fc_abort_op failed the io wasn't + * active. Thus this call path is running in + * parallel to the io complete. Treat as non-error. + */ - blk_mq_end_request(rq, error); + /* back out the flags/counters */ + spin_lock_irqsave(&ctrl->lock, flags); + if (ctrl->flags & FCCTRL_TERMIO) + ctrl->iocnt--; + op->flags &= ~FCOP_FLAGS_TERMIO; + spin_unlock_irqrestore(&ctrl->lock, flags); + return; + } } -static struct blk_mq_ops nvme_fc_mq_ops = { + +static const struct blk_mq_ops nvme_fc_mq_ops = { .queue_rq = nvme_fc_queue_rq, .complete = nvme_fc_complete_rq, .init_request = nvme_fc_init_request, @@ -1959,145 +2174,275 @@ static struct blk_mq_ops nvme_fc_mq_ops = { .timeout = nvme_fc_timeout, }; -static struct blk_mq_ops nvme_fc_admin_mq_ops = { - .queue_rq = nvme_fc_queue_rq, - .complete = nvme_fc_complete_rq, - .init_request = nvme_fc_init_admin_request, - .exit_request = nvme_fc_exit_request, - .reinit_request = nvme_fc_reinit_request, - .init_hctx = nvme_fc_init_admin_hctx, - .timeout = nvme_fc_timeout, -}; - static int -nvme_fc_configure_admin_queue(struct nvme_fc_ctrl *ctrl) +nvme_fc_create_io_queues(struct nvme_fc_ctrl *ctrl) { - u32 segs; - int error; + struct nvmf_ctrl_options *opts = ctrl->ctrl.opts; + int ret; - nvme_fc_init_queue(ctrl, 0, NVME_FC_AQ_BLKMQ_DEPTH); + ret = nvme_set_queue_count(&ctrl->ctrl, &opts->nr_io_queues); + if (ret) { + dev_info(ctrl->ctrl.device, + "set_queue_count failed: %d\n", ret); + return ret; + } - error = nvme_fc_connect_admin_queue(ctrl, &ctrl->queues[0], - NVME_FC_AQ_BLKMQ_DEPTH, - (NVME_FC_AQ_BLKMQ_DEPTH / 4)); - if (error) - return error; + ctrl->queue_count = opts->nr_io_queues + 1; + if (!opts->nr_io_queues) + return 0; - memset(&ctrl->admin_tag_set, 0, sizeof(ctrl->admin_tag_set)); - ctrl->admin_tag_set.ops = &nvme_fc_admin_mq_ops; - ctrl->admin_tag_set.queue_depth = NVME_FC_AQ_BLKMQ_DEPTH; - ctrl->admin_tag_set.reserved_tags = 2; /* fabric connect + Keep-Alive */ - ctrl->admin_tag_set.numa_node = NUMA_NO_NODE; - ctrl->admin_tag_set.cmd_size = sizeof(struct nvme_fc_fcp_op) + + dev_info(ctrl->ctrl.device, "creating %d I/O queues.\n", + opts->nr_io_queues); + + nvme_fc_init_io_queues(ctrl); + + memset(&ctrl->tag_set, 0, sizeof(ctrl->tag_set)); + ctrl->tag_set.ops = &nvme_fc_mq_ops; + ctrl->tag_set.queue_depth = ctrl->ctrl.opts->queue_size; + ctrl->tag_set.reserved_tags = 1; /* fabric connect */ + ctrl->tag_set.numa_node = NUMA_NO_NODE; + ctrl->tag_set.flags = BLK_MQ_F_SHOULD_MERGE; + ctrl->tag_set.cmd_size = sizeof(struct nvme_fc_fcp_op) + (SG_CHUNK_SIZE * sizeof(struct scatterlist)) + ctrl->lport->ops->fcprqst_priv_sz; - ctrl->admin_tag_set.driver_data = ctrl; - ctrl->admin_tag_set.nr_hw_queues = 1; - ctrl->admin_tag_set.timeout = ADMIN_TIMEOUT; + ctrl->tag_set.driver_data = ctrl; + ctrl->tag_set.nr_hw_queues = ctrl->queue_count - 1; + ctrl->tag_set.timeout = NVME_IO_TIMEOUT; - error = blk_mq_alloc_tag_set(&ctrl->admin_tag_set); - if (error) - goto out_free_queue; + ret = blk_mq_alloc_tag_set(&ctrl->tag_set); + if (ret) + return ret; - ctrl->ctrl.admin_q = blk_mq_init_queue(&ctrl->admin_tag_set); - if (IS_ERR(ctrl->ctrl.admin_q)) { - error = PTR_ERR(ctrl->ctrl.admin_q); - goto out_free_tagset; + ctrl->ctrl.tagset = &ctrl->tag_set; + + ctrl->ctrl.connect_q = blk_mq_init_queue(&ctrl->tag_set); + if (IS_ERR(ctrl->ctrl.connect_q)) { + ret = PTR_ERR(ctrl->ctrl.connect_q); + goto out_free_tag_set; + } + + ret = nvme_fc_create_hw_io_queues(ctrl, ctrl->ctrl.opts->queue_size); + if (ret) + goto out_cleanup_blk_queue; + + ret = nvme_fc_connect_io_queues(ctrl, ctrl->ctrl.opts->queue_size); + if (ret) + goto out_delete_hw_queues; + + return 0; + +out_delete_hw_queues: + nvme_fc_delete_hw_io_queues(ctrl); +out_cleanup_blk_queue: + nvme_stop_keep_alive(&ctrl->ctrl); + blk_cleanup_queue(ctrl->ctrl.connect_q); +out_free_tag_set: + blk_mq_free_tag_set(&ctrl->tag_set); + nvme_fc_free_io_queues(ctrl); + + /* force put free routine to ignore io queues */ + ctrl->ctrl.tagset = NULL; + + return ret; +} + +static int +nvme_fc_reinit_io_queues(struct nvme_fc_ctrl *ctrl) +{ + struct nvmf_ctrl_options *opts = ctrl->ctrl.opts; + int ret; + + ret = nvme_set_queue_count(&ctrl->ctrl, &opts->nr_io_queues); + if (ret) { + dev_info(ctrl->ctrl.device, + "set_queue_count failed: %d\n", ret); + return ret; } - error = __nvme_fc_create_hw_queue(ctrl, &ctrl->queues[0], 0, + /* check for io queues existing */ + if (ctrl->queue_count == 1) + return 0; + + dev_info(ctrl->ctrl.device, "Recreating %d I/O queues.\n", + opts->nr_io_queues); + + nvme_fc_init_io_queues(ctrl); + + ret = blk_mq_reinit_tagset(&ctrl->tag_set); + if (ret) + goto out_free_io_queues; + + ret = nvme_fc_create_hw_io_queues(ctrl, ctrl->ctrl.opts->queue_size); + if (ret) + goto out_free_io_queues; + + ret = nvme_fc_connect_io_queues(ctrl, ctrl->ctrl.opts->queue_size); + if (ret) + goto out_delete_hw_queues; + + return 0; + +out_delete_hw_queues: + nvme_fc_delete_hw_io_queues(ctrl); +out_free_io_queues: + nvme_fc_free_io_queues(ctrl); + return ret; +} + +/* + * This routine restarts the controller on the host side, and + * on the link side, recreates the controller association. + */ +static int +nvme_fc_create_association(struct nvme_fc_ctrl *ctrl) +{ + struct nvmf_ctrl_options *opts = ctrl->ctrl.opts; + u32 segs; + int ret; + bool changed; + + ctrl->connect_attempts++; + + /* + * Create the admin queue + */ + + nvme_fc_init_queue(ctrl, 0, NVME_FC_AQ_BLKMQ_DEPTH); + + ret = __nvme_fc_create_hw_queue(ctrl, &ctrl->queues[0], 0, NVME_FC_AQ_BLKMQ_DEPTH); - if (error) - goto out_cleanup_queue; + if (ret) + goto out_free_queue; - error = nvmf_connect_admin_queue(&ctrl->ctrl); - if (error) + ret = nvme_fc_connect_admin_queue(ctrl, &ctrl->queues[0], + NVME_FC_AQ_BLKMQ_DEPTH, + (NVME_FC_AQ_BLKMQ_DEPTH / 4)); + if (ret) goto out_delete_hw_queue; - error = nvmf_reg_read64(&ctrl->ctrl, NVME_REG_CAP, &ctrl->cap); - if (error) { + if (ctrl->ctrl.state != NVME_CTRL_NEW) + blk_mq_start_stopped_hw_queues(ctrl->ctrl.admin_q, true); + + ret = nvmf_connect_admin_queue(&ctrl->ctrl); + if (ret) + goto out_disconnect_admin_queue; + + /* + * Check controller capabilities + * + * todo:- add code to check if ctrl attributes changed from + * prior connection values + */ + + ret = nvmf_reg_read64(&ctrl->ctrl, NVME_REG_CAP, &ctrl->cap); + if (ret) { dev_err(ctrl->ctrl.device, "prop_get NVME_REG_CAP failed\n"); - goto out_delete_hw_queue; + goto out_disconnect_admin_queue; } ctrl->ctrl.sqsize = min_t(int, NVME_CAP_MQES(ctrl->cap) + 1, ctrl->ctrl.sqsize); - error = nvme_enable_ctrl(&ctrl->ctrl, ctrl->cap); - if (error) - goto out_delete_hw_queue; + ret = nvme_enable_ctrl(&ctrl->ctrl, ctrl->cap); + if (ret) + goto out_disconnect_admin_queue; segs = min_t(u32, NVME_FC_MAX_SEGMENTS, ctrl->lport->ops->max_sgl_segments); ctrl->ctrl.max_hw_sectors = (segs - 1) << (PAGE_SHIFT - 9); - error = nvme_init_identify(&ctrl->ctrl); - if (error) - goto out_delete_hw_queue; + ret = nvme_init_identify(&ctrl->ctrl); + if (ret) + goto out_disconnect_admin_queue; + + /* sanity checks */ + + /* FC-NVME does not have other data in the capsule */ + if (ctrl->ctrl.icdoff) { + dev_err(ctrl->ctrl.device, "icdoff %d is not supported!\n", + ctrl->ctrl.icdoff); + goto out_disconnect_admin_queue; + } nvme_start_keep_alive(&ctrl->ctrl); - return 0; + /* FC-NVME supports normal SGL Data Block Descriptors */ + if (opts->queue_size > ctrl->ctrl.maxcmd) { + /* warn if maxcmd is lower than queue_size */ + dev_warn(ctrl->ctrl.device, + "queue_size %zu > ctrl maxcmd %u, reducing " + "to queue_size\n", + opts->queue_size, ctrl->ctrl.maxcmd); + opts->queue_size = ctrl->ctrl.maxcmd; + } + + ret = nvme_fc_init_aen_ops(ctrl); + if (ret) + goto out_term_aen_ops; + + /* + * Create the io queues + */ + + if (ctrl->queue_count > 1) { + if (ctrl->ctrl.state == NVME_CTRL_NEW) + ret = nvme_fc_create_io_queues(ctrl); + else + ret = nvme_fc_reinit_io_queues(ctrl); + if (ret) + goto out_term_aen_ops; + } + + changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE); + WARN_ON_ONCE(!changed); + + ctrl->connect_attempts = 0; + + kref_get(&ctrl->ctrl.kref); + + if (ctrl->queue_count > 1) { + nvme_start_queues(&ctrl->ctrl); + nvme_queue_scan(&ctrl->ctrl); + nvme_queue_async_events(&ctrl->ctrl); + } + + return 0; /* Success */ + +out_term_aen_ops: + nvme_fc_term_aen_ops(ctrl); + nvme_stop_keep_alive(&ctrl->ctrl); +out_disconnect_admin_queue: + /* send a Disconnect(association) LS to fc-nvme target */ + nvme_fc_xmt_disconnect_assoc(ctrl); out_delete_hw_queue: __nvme_fc_delete_hw_queue(ctrl, &ctrl->queues[0], 0); -out_cleanup_queue: - blk_cleanup_queue(ctrl->ctrl.admin_q); -out_free_tagset: - blk_mq_free_tag_set(&ctrl->admin_tag_set); out_free_queue: nvme_fc_free_queue(&ctrl->queues[0]); - return error; + + return ret; } /* - * This routine is used by the transport when it needs to find active - * io on a queue that is to be terminated. The transport uses - * blk_mq_tagset_busy_itr() to find the busy requests, which then invoke - * this routine to kill them on a 1 by 1 basis. - * - * As FC allocates FC exchange for each io, the transport must contact - * the LLDD to terminate the exchange, thus releasing the FC exchange. - * After terminating the exchange the LLDD will call the transport's - * normal io done path for the request, but it will have an aborted - * status. The done path will return the io request back to the block - * layer with an error status. + * This routine stops operation of the controller on the host side. + * On the host os stack side: Admin and IO queues are stopped, + * outstanding ios on them terminated via FC ABTS. + * On the link side: the association is terminated. */ static void -nvme_fc_terminate_exchange(struct request *req, void *data, bool reserved) +nvme_fc_delete_association(struct nvme_fc_ctrl *ctrl) { - struct nvme_ctrl *nctrl = data; - struct nvme_fc_ctrl *ctrl = to_fc_ctrl(nctrl); - struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(req); -int status; - - if (!blk_mq_request_started(req)) - return; + unsigned long flags; - /* this performs an ABTS-LS on the FC exchange for the io */ - status = __nvme_fc_abort_op(ctrl, op); - /* - * if __nvme_fc_abort_op failed: io wasn't active to abort - * consider it done. Assume completion path already completing - * in parallel - */ - if (status) - /* io wasn't active to abort consider it done */ - /* assume completion path already completing in parallel */ - return; -} + nvme_stop_keep_alive(&ctrl->ctrl); + spin_lock_irqsave(&ctrl->lock, flags); + ctrl->flags |= FCCTRL_TERMIO; + ctrl->iocnt = 0; + spin_unlock_irqrestore(&ctrl->lock, flags); -/* - * This routine stops operation of the controller. Admin and IO queues - * are stopped, outstanding ios on them terminated, and the nvme ctrl - * is shutdown. - */ -static void -nvme_fc_shutdown_ctrl(struct nvme_fc_ctrl *ctrl) -{ /* * If io queues are present, stop them and terminate all outstanding * ios on them. As FC allocates FC exchange for each io, the @@ -2116,35 +2461,79 @@ nvme_fc_shutdown_ctrl(struct nvme_fc_ctrl *ctrl) nvme_fc_terminate_exchange, &ctrl->ctrl); } - if (ctrl->ctrl.state == NVME_CTRL_LIVE) - nvme_shutdown_ctrl(&ctrl->ctrl); + /* + * Other transports, which don't have link-level contexts bound + * to sqe's, would try to gracefully shutdown the controller by + * writing the registers for shutdown and polling (call + * nvme_shutdown_ctrl()). Given a bunch of i/o was potentially + * just aborted and we will wait on those contexts, and given + * there was no indication of how live the controlelr is on the + * link, don't send more io to create more contexts for the + * shutdown. Let the controller fail via keepalive failure if + * its still present. + */ /* - * now clean up the admin queue. Same thing as above. + * clean up the admin queue. Same thing as above. * use blk_mq_tagset_busy_itr() and the transport routine to * terminate the exchanges. */ blk_mq_stop_hw_queues(ctrl->ctrl.admin_q); blk_mq_tagset_busy_iter(&ctrl->admin_tag_set, nvme_fc_terminate_exchange, &ctrl->ctrl); + + /* kill the aens as they are a separate path */ + nvme_fc_abort_aen_ops(ctrl); + + /* wait for all io that had to be aborted */ + spin_lock_irqsave(&ctrl->lock, flags); + while (ctrl->iocnt) { + spin_unlock_irqrestore(&ctrl->lock, flags); + msleep(1000); + spin_lock_irqsave(&ctrl->lock, flags); + } + ctrl->flags &= ~FCCTRL_TERMIO; + spin_unlock_irqrestore(&ctrl->lock, flags); + + nvme_fc_term_aen_ops(ctrl); + + /* + * send a Disconnect(association) LS to fc-nvme target + * Note: could have been sent at top of process, but + * cleaner on link traffic if after the aborts complete. + * Note: if association doesn't exist, association_id will be 0 + */ + if (ctrl->association_id) + nvme_fc_xmt_disconnect_assoc(ctrl); + + if (ctrl->ctrl.tagset) { + nvme_fc_delete_hw_io_queues(ctrl); + nvme_fc_free_io_queues(ctrl); + } + + __nvme_fc_delete_hw_queue(ctrl, &ctrl->queues[0], 0); + nvme_fc_free_queue(&ctrl->queues[0]); } -/* - * Called to teardown an association. - * May be called with association fully in place or partially in place. - */ static void -__nvme_fc_remove_ctrl(struct nvme_fc_ctrl *ctrl) +nvme_fc_delete_ctrl_work(struct work_struct *work) { - nvme_stop_keep_alive(&ctrl->ctrl); + struct nvme_fc_ctrl *ctrl = + container_of(work, struct nvme_fc_ctrl, delete_work); - /* stop and terminate ios on admin and io queues */ - nvme_fc_shutdown_ctrl(ctrl); + cancel_work_sync(&ctrl->reset_work); + cancel_delayed_work_sync(&ctrl->connect_work); + + /* + * kill the association on the link side. this will block + * waiting for io to terminate + */ + nvme_fc_delete_association(ctrl); /* * tear down the controller * This will result in the last reference on the nvme ctrl to - * expire, calling the transport nvme_fc_free_nvme_ctrl() callback. + * expire, calling the transport nvme_fc_nvme_ctrl_freed() callback. * From there, the transport will tear down it's logical queues and * association. */ @@ -2153,15 +2542,6 @@ __nvme_fc_remove_ctrl(struct nvme_fc_ctrl *ctrl) nvme_put_ctrl(&ctrl->ctrl); } -static void -nvme_fc_del_ctrl_work(struct work_struct *work) -{ - struct nvme_fc_ctrl *ctrl = - container_of(work, struct nvme_fc_ctrl, delete_work); - - __nvme_fc_remove_ctrl(ctrl); -} - static int __nvme_fc_del_ctrl(struct nvme_fc_ctrl *ctrl) { @@ -2181,25 +2561,85 @@ static int nvme_fc_del_nvme_ctrl(struct nvme_ctrl *nctrl) { struct nvme_fc_ctrl *ctrl = to_fc_ctrl(nctrl); - struct nvme_fc_rport *rport = ctrl->rport; - unsigned long flags; int ret; - spin_lock_irqsave(&rport->lock, flags); + if (!kref_get_unless_zero(&ctrl->ctrl.kref)) + return -EBUSY; + ret = __nvme_fc_del_ctrl(ctrl); - spin_unlock_irqrestore(&rport->lock, flags); - if (ret) - return ret; - flush_work(&ctrl->delete_work); + if (!ret) + flush_workqueue(nvme_fc_wq); - return 0; + nvme_put_ctrl(&ctrl->ctrl); + + return ret; +} + +static void +nvme_fc_reset_ctrl_work(struct work_struct *work) +{ + struct nvme_fc_ctrl *ctrl = + container_of(work, struct nvme_fc_ctrl, reset_work); + int ret; + + /* will block will waiting for io to terminate */ + nvme_fc_delete_association(ctrl); + + ret = nvme_fc_create_association(ctrl); + if (ret) { + dev_warn(ctrl->ctrl.device, + "NVME-FC{%d}: reset: Reconnect attempt failed (%d)\n", + ctrl->cnum, ret); + if (ctrl->connect_attempts >= NVME_FC_MAX_CONNECT_ATTEMPTS) { + dev_warn(ctrl->ctrl.device, + "NVME-FC{%d}: Max reconnect attempts (%d) " + "reached. Removing controller\n", + ctrl->cnum, ctrl->connect_attempts); + + if (!nvme_change_ctrl_state(&ctrl->ctrl, + NVME_CTRL_DELETING)) { + dev_err(ctrl->ctrl.device, + "NVME-FC{%d}: failed to change state " + "to DELETING\n", ctrl->cnum); + return; + } + + WARN_ON(!queue_work(nvme_fc_wq, &ctrl->delete_work)); + return; + } + + dev_warn(ctrl->ctrl.device, + "NVME-FC{%d}: Reconnect attempt in %d seconds.\n", + ctrl->cnum, ctrl->reconnect_delay); + queue_delayed_work(nvme_fc_wq, &ctrl->connect_work, + ctrl->reconnect_delay * HZ); + } else + dev_info(ctrl->ctrl.device, + "NVME-FC{%d}: controller reset complete\n", ctrl->cnum); } +/* + * called by the nvme core layer, for sysfs interface that requests + * a reset of the nvme controller + */ static int nvme_fc_reset_nvme_ctrl(struct nvme_ctrl *nctrl) { - return -EIO; + struct nvme_fc_ctrl *ctrl = to_fc_ctrl(nctrl); + + dev_warn(ctrl->ctrl.device, + "NVME-FC{%d}: admin requested controller reset\n", ctrl->cnum); + + if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_RESETTING)) + return -EBUSY; + + if (!queue_work(nvme_fc_wq, &ctrl->reset_work)) + return -EBUSY; + + flush_work(&ctrl->reset_work); + + return 0; } static const struct nvme_ctrl_ops nvme_fc_ctrl_ops = { @@ -2210,95 +2650,75 @@ static const struct nvme_ctrl_ops nvme_fc_ctrl_ops = { .reg_read64 = nvmf_reg_read64, .reg_write32 = nvmf_reg_write32, .reset_ctrl = nvme_fc_reset_nvme_ctrl, - .free_ctrl = nvme_fc_free_nvme_ctrl, + .free_ctrl = nvme_fc_nvme_ctrl_freed, .submit_async_event = nvme_fc_submit_async_event, .delete_ctrl = nvme_fc_del_nvme_ctrl, .get_subsysnqn = nvmf_get_subsysnqn, .get_address = nvmf_get_address, }; -static int -nvme_fc_create_io_queues(struct nvme_fc_ctrl *ctrl) +static void +nvme_fc_connect_ctrl_work(struct work_struct *work) { - struct nvmf_ctrl_options *opts = ctrl->ctrl.opts; int ret; - ret = nvme_set_queue_count(&ctrl->ctrl, &opts->nr_io_queues); - if (ret) { - dev_info(ctrl->ctrl.device, - "set_queue_count failed: %d\n", ret); - return ret; - } - - ctrl->queue_count = opts->nr_io_queues + 1; - if (!opts->nr_io_queues) - return 0; - - dev_info(ctrl->ctrl.device, "creating %d I/O queues.\n", - opts->nr_io_queues); - - nvme_fc_init_io_queues(ctrl); - - memset(&ctrl->tag_set, 0, sizeof(ctrl->tag_set)); - ctrl->tag_set.ops = &nvme_fc_mq_ops; - ctrl->tag_set.queue_depth = ctrl->ctrl.opts->queue_size; - ctrl->tag_set.reserved_tags = 1; /* fabric connect */ - ctrl->tag_set.numa_node = NUMA_NO_NODE; - ctrl->tag_set.flags = BLK_MQ_F_SHOULD_MERGE; - ctrl->tag_set.cmd_size = sizeof(struct nvme_fc_fcp_op) + - (SG_CHUNK_SIZE * - sizeof(struct scatterlist)) + - ctrl->lport->ops->fcprqst_priv_sz; - ctrl->tag_set.driver_data = ctrl; - ctrl->tag_set.nr_hw_queues = ctrl->queue_count - 1; - ctrl->tag_set.timeout = NVME_IO_TIMEOUT; - - ret = blk_mq_alloc_tag_set(&ctrl->tag_set); - if (ret) - return ret; - - ctrl->ctrl.tagset = &ctrl->tag_set; - - ctrl->ctrl.connect_q = blk_mq_init_queue(&ctrl->tag_set); - if (IS_ERR(ctrl->ctrl.connect_q)) { - ret = PTR_ERR(ctrl->ctrl.connect_q); - goto out_free_tag_set; - } - - ret = nvme_fc_create_hw_io_queues(ctrl, ctrl->ctrl.opts->queue_size); - if (ret) - goto out_cleanup_blk_queue; + struct nvme_fc_ctrl *ctrl = + container_of(to_delayed_work(work), + struct nvme_fc_ctrl, connect_work); - ret = nvme_fc_connect_io_queues(ctrl, ctrl->ctrl.opts->queue_size); - if (ret) - goto out_delete_hw_queues; + ret = nvme_fc_create_association(ctrl); + if (ret) { + dev_warn(ctrl->ctrl.device, + "NVME-FC{%d}: Reconnect attempt failed (%d)\n", + ctrl->cnum, ret); + if (ctrl->connect_attempts >= NVME_FC_MAX_CONNECT_ATTEMPTS) { + dev_warn(ctrl->ctrl.device, + "NVME-FC{%d}: Max reconnect attempts (%d) " + "reached. Removing controller\n", + ctrl->cnum, ctrl->connect_attempts); + + if (!nvme_change_ctrl_state(&ctrl->ctrl, + NVME_CTRL_DELETING)) { + dev_err(ctrl->ctrl.device, + "NVME-FC{%d}: failed to change state " + "to DELETING\n", ctrl->cnum); + return; + } - return 0; + WARN_ON(!queue_work(nvme_fc_wq, &ctrl->delete_work)); + return; + } -out_delete_hw_queues: - nvme_fc_delete_hw_io_queues(ctrl); -out_cleanup_blk_queue: - nvme_stop_keep_alive(&ctrl->ctrl); - blk_cleanup_queue(ctrl->ctrl.connect_q); -out_free_tag_set: - blk_mq_free_tag_set(&ctrl->tag_set); - nvme_fc_free_io_queues(ctrl); + dev_warn(ctrl->ctrl.device, + "NVME-FC{%d}: Reconnect attempt in %d seconds.\n", + ctrl->cnum, ctrl->reconnect_delay); + queue_delayed_work(nvme_fc_wq, &ctrl->connect_work, + ctrl->reconnect_delay * HZ); + } else + dev_info(ctrl->ctrl.device, + "NVME-FC{%d}: controller reconnect complete\n", + ctrl->cnum); +} - /* force put free routine to ignore io queues */ - ctrl->ctrl.tagset = NULL; - return ret; -} +static const struct blk_mq_ops nvme_fc_admin_mq_ops = { + .queue_rq = nvme_fc_queue_rq, + .complete = nvme_fc_complete_rq, + .init_request = nvme_fc_init_admin_request, + .exit_request = nvme_fc_exit_request, + .reinit_request = nvme_fc_reinit_request, + .init_hctx = nvme_fc_init_admin_hctx, + .timeout = nvme_fc_timeout, +}; static struct nvme_ctrl * -__nvme_fc_create_ctrl(struct device *dev, struct nvmf_ctrl_options *opts, +nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts, struct nvme_fc_lport *lport, struct nvme_fc_rport *rport) { struct nvme_fc_ctrl *ctrl; unsigned long flags; int ret, idx; - bool changed; ctrl = kzalloc(sizeof(*ctrl), GFP_KERNEL); if (!ctrl) { @@ -2314,21 +2734,18 @@ __nvme_fc_create_ctrl(struct device *dev, struct nvmf_ctrl_options *opts, ctrl->ctrl.opts = opts; INIT_LIST_HEAD(&ctrl->ctrl_list); - INIT_LIST_HEAD(&ctrl->ls_req_list); ctrl->lport = lport; ctrl->rport = rport; ctrl->dev = lport->dev; - ctrl->state = FCCTRL_INIT; ctrl->cnum = idx; - ret = nvme_init_ctrl(&ctrl->ctrl, dev, &nvme_fc_ctrl_ops, 0); - if (ret) - goto out_free_ida; - get_device(ctrl->dev); kref_init(&ctrl->ref); - INIT_WORK(&ctrl->delete_work, nvme_fc_del_ctrl_work); + INIT_WORK(&ctrl->delete_work, nvme_fc_delete_ctrl_work); + INIT_WORK(&ctrl->reset_work, nvme_fc_reset_ctrl_work); + INIT_DELAYED_WORK(&ctrl->connect_work, nvme_fc_connect_ctrl_work); + ctrl->reconnect_delay = opts->reconnect_delay; spin_lock_init(&ctrl->lock); /* io queue count */ @@ -2345,87 +2762,87 @@ __nvme_fc_create_ctrl(struct device *dev, struct nvmf_ctrl_options *opts, ctrl->queues = kcalloc(ctrl->queue_count, sizeof(struct nvme_fc_queue), GFP_KERNEL); if (!ctrl->queues) - goto out_uninit_ctrl; - - ret = nvme_fc_configure_admin_queue(ctrl); - if (ret) - goto out_uninit_ctrl; - - /* sanity checks */ - - /* FC-NVME does not have other data in the capsule */ - if (ctrl->ctrl.icdoff) { - dev_err(ctrl->ctrl.device, "icdoff %d is not supported!\n", - ctrl->ctrl.icdoff); - goto out_remove_admin_queue; - } - - /* FC-NVME supports normal SGL Data Block Descriptors */ + goto out_free_ida; - if (opts->queue_size > ctrl->ctrl.maxcmd) { - /* warn if maxcmd is lower than queue_size */ - dev_warn(ctrl->ctrl.device, - "queue_size %zu > ctrl maxcmd %u, reducing " - "to queue_size\n", - opts->queue_size, ctrl->ctrl.maxcmd); - opts->queue_size = ctrl->ctrl.maxcmd; - } + memset(&ctrl->admin_tag_set, 0, sizeof(ctrl->admin_tag_set)); + ctrl->admin_tag_set.ops = &nvme_fc_admin_mq_ops; + ctrl->admin_tag_set.queue_depth = NVME_FC_AQ_BLKMQ_DEPTH; + ctrl->admin_tag_set.reserved_tags = 2; /* fabric connect + Keep-Alive */ + ctrl->admin_tag_set.numa_node = NUMA_NO_NODE; + ctrl->admin_tag_set.cmd_size = sizeof(struct nvme_fc_fcp_op) + + (SG_CHUNK_SIZE * + sizeof(struct scatterlist)) + + ctrl->lport->ops->fcprqst_priv_sz; + ctrl->admin_tag_set.driver_data = ctrl; + ctrl->admin_tag_set.nr_hw_queues = 1; + ctrl->admin_tag_set.timeout = ADMIN_TIMEOUT; - ret = nvme_fc_init_aen_ops(ctrl); + ret = blk_mq_alloc_tag_set(&ctrl->admin_tag_set); if (ret) - goto out_exit_aen_ops; + goto out_free_queues; - if (ctrl->queue_count > 1) { - ret = nvme_fc_create_io_queues(ctrl); - if (ret) - goto out_exit_aen_ops; + ctrl->ctrl.admin_q = blk_mq_init_queue(&ctrl->admin_tag_set); + if (IS_ERR(ctrl->ctrl.admin_q)) { + ret = PTR_ERR(ctrl->ctrl.admin_q); + goto out_free_admin_tag_set; } - spin_lock_irqsave(&ctrl->lock, flags); - ctrl->state = FCCTRL_ACTIVE; - spin_unlock_irqrestore(&ctrl->lock, flags); - - changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE); - WARN_ON_ONCE(!changed); + /* + * Would have been nice to init io queues tag set as well. + * However, we require interaction from the controller + * for max io queue count before we can do so. + * Defer this to the connect path. + */ - dev_info(ctrl->ctrl.device, - "NVME-FC{%d}: new ctrl: NQN \"%s\"\n", - ctrl->cnum, ctrl->ctrl.opts->subsysnqn); + ret = nvme_init_ctrl(&ctrl->ctrl, dev, &nvme_fc_ctrl_ops, 0); + if (ret) + goto out_cleanup_admin_q; - kref_get(&ctrl->ctrl.kref); + /* at this point, teardown path changes to ref counting on nvme ctrl */ spin_lock_irqsave(&rport->lock, flags); list_add_tail(&ctrl->ctrl_list, &rport->ctrl_list); spin_unlock_irqrestore(&rport->lock, flags); - if (opts->nr_io_queues) { - nvme_queue_scan(&ctrl->ctrl); - nvme_queue_async_events(&ctrl->ctrl); + ret = nvme_fc_create_association(ctrl); + if (ret) { + ctrl->ctrl.opts = NULL; + /* initiate nvme ctrl ref counting teardown */ + nvme_uninit_ctrl(&ctrl->ctrl); + nvme_put_ctrl(&ctrl->ctrl); + + /* as we're past the point where we transition to the ref + * counting teardown path, if we return a bad pointer here, + * the calling routine, thinking it's prior to the + * transition, will do an rport put. Since the teardown + * path also does a rport put, we do an extra get here to + * so proper order/teardown happens. + */ + nvme_fc_rport_get(rport); + + if (ret > 0) + ret = -EIO; + return ERR_PTR(ret); } - return &ctrl->ctrl; + dev_info(ctrl->ctrl.device, + "NVME-FC{%d}: new ctrl: NQN \"%s\"\n", + ctrl->cnum, ctrl->ctrl.opts->subsysnqn); -out_exit_aen_ops: - nvme_fc_exit_aen_ops(ctrl); -out_remove_admin_queue: - /* send a Disconnect(association) LS to fc-nvme target */ - nvme_fc_xmt_disconnect_assoc(ctrl); - nvme_stop_keep_alive(&ctrl->ctrl); - nvme_fc_destroy_admin_queue(ctrl); -out_uninit_ctrl: - nvme_uninit_ctrl(&ctrl->ctrl); - nvme_put_ctrl(&ctrl->ctrl); - if (ret > 0) - ret = -EIO; - /* exit via here will follow ctlr ref point callbacks to free */ - return ERR_PTR(ret); + return &ctrl->ctrl; +out_cleanup_admin_q: + blk_cleanup_queue(ctrl->ctrl.admin_q); +out_free_admin_tag_set: + blk_mq_free_tag_set(&ctrl->admin_tag_set); +out_free_queues: + kfree(ctrl->queues); out_free_ida: + put_device(ctrl->dev); ida_simple_remove(&nvme_fc_ctrl_cnt, ctrl->cnum); out_free_ctrl: kfree(ctrl); out_fail: - nvme_fc_rport_put(rport); /* exit via here doesn't follow ctlr ref points */ return ERR_PTR(ret); } @@ -2497,6 +2914,7 @@ nvme_fc_create_ctrl(struct device *dev, struct nvmf_ctrl_options *opts) { struct nvme_fc_lport *lport; struct nvme_fc_rport *rport; + struct nvme_ctrl *ctrl; struct nvmet_fc_traddr laddr = { 0L, 0L }; struct nvmet_fc_traddr raddr = { 0L, 0L }; unsigned long flags; @@ -2528,7 +2946,10 @@ nvme_fc_create_ctrl(struct device *dev, struct nvmf_ctrl_options *opts) spin_unlock_irqrestore(&nvme_fc_lock, flags); - return __nvme_fc_create_ctrl(dev, opts, lport, rport); + ctrl = nvme_fc_init_ctrl(dev, opts, lport, rport); + if (IS_ERR(ctrl)) + nvme_fc_rport_put(rport); + return ctrl; } } spin_unlock_irqrestore(&nvme_fc_lock, flags); @@ -2546,11 +2967,20 @@ static struct nvmf_transport_ops nvme_fc_transport = { static int __init nvme_fc_init_module(void) { + int ret; + nvme_fc_wq = create_workqueue("nvme_fc_wq"); if (!nvme_fc_wq) return -ENOMEM; - return nvmf_register_transport(&nvme_fc_transport); + ret = nvmf_register_transport(&nvme_fc_transport); + if (ret) + goto err; + + return 0; +err: + destroy_workqueue(nvme_fc_wq); + return ret; } static void __exit nvme_fc_exit_module(void) diff --git a/drivers/nvme/host/lightnvm.c b/drivers/nvme/host/lightnvm.c index 21cac8523bd8..8c4adac6fafc 100644 --- a/drivers/nvme/host/lightnvm.c +++ b/drivers/nvme/host/lightnvm.c @@ -241,9 +241,9 @@ static inline void _nvme_nvm_check_size(void) BUILD_BUG_ON(sizeof(struct nvme_nvm_l2ptbl) != 64); BUILD_BUG_ON(sizeof(struct nvme_nvm_erase_blk) != 64); BUILD_BUG_ON(sizeof(struct nvme_nvm_id_group) != 960); - BUILD_BUG_ON(sizeof(struct nvme_nvm_addr_format) != 128); + BUILD_BUG_ON(sizeof(struct nvme_nvm_addr_format) != 16); BUILD_BUG_ON(sizeof(struct nvme_nvm_id) != 4096); - BUILD_BUG_ON(sizeof(struct nvme_nvm_bb_tbl) != 512); + BUILD_BUG_ON(sizeof(struct nvme_nvm_bb_tbl) != 64); } static int init_grps(struct nvm_id *nvm_id, struct nvme_nvm_id *nvme_nvm_id) @@ -324,7 +324,7 @@ static int nvme_nvm_identity(struct nvm_dev *nvmdev, struct nvm_id *nvm_id) nvm_id->cap = le32_to_cpu(nvme_nvm_id->cap); nvm_id->dom = le32_to_cpu(nvme_nvm_id->dom); memcpy(&nvm_id->ppaf, &nvme_nvm_id->ppaf, - sizeof(struct nvme_nvm_addr_format)); + sizeof(struct nvm_addr_format)); ret = init_grps(nvm_id, nvme_nvm_id); out: @@ -483,8 +483,8 @@ static void nvme_nvm_end_io(struct request *rq, int error) { struct nvm_rq *rqd = rq->end_io_data; - rqd->ppa_status = nvme_req(rq)->result.u64; - rqd->error = error; + rqd->ppa_status = le64_to_cpu(nvme_req(rq)->result.u64); + rqd->error = nvme_req(rq)->status; nvm_end_io(rqd); kfree(nvme_req(rq)->cmd); @@ -503,6 +503,8 @@ static int nvme_nvm_submit_io(struct nvm_dev *dev, struct nvm_rq *rqd) if (!cmd) return -ENOMEM; + nvme_nvm_rqtocmd(rq, rqd, ns, cmd); + rq = nvme_alloc_request(q, (struct nvme_command *)cmd, 0, NVME_QID_ANY); if (IS_ERR(rq)) { kfree(cmd); @@ -510,14 +512,12 @@ static int nvme_nvm_submit_io(struct nvm_dev *dev, struct nvm_rq *rqd) } rq->cmd_flags &= ~REQ_FAILFAST_DRIVER; - rq->ioprio = bio_prio(bio); - if (bio_has_data(bio)) - rq->nr_phys_segments = bio_phys_segments(q, bio); - - rq->__data_len = bio->bi_iter.bi_size; - rq->bio = rq->biotail = bio; - - nvme_nvm_rqtocmd(rq, rqd, ns, cmd); + if (bio) { + blk_init_request_from_bio(rq, bio); + } else { + rq->ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, IOPRIO_NORM); + rq->__data_len = 0; + } rq->end_io_data = rqd; @@ -526,21 +526,6 @@ static int nvme_nvm_submit_io(struct nvm_dev *dev, struct nvm_rq *rqd) return 0; } -static int nvme_nvm_erase_block(struct nvm_dev *dev, struct nvm_rq *rqd) -{ - struct request_queue *q = dev->q; - struct nvme_ns *ns = q->queuedata; - struct nvme_nvm_command c = {}; - - c.erase.opcode = NVM_OP_ERASE; - c.erase.nsid = cpu_to_le32(ns->ns_id); - c.erase.spba = cpu_to_le64(rqd->ppa_addr.ppa); - c.erase.length = cpu_to_le16(rqd->nr_ppas - 1); - c.erase.control = cpu_to_le16(rqd->flags); - - return nvme_submit_sync_cmd(q, (struct nvme_command *)&c, NULL, 0); -} - static void *nvme_nvm_create_dma_pool(struct nvm_dev *nvmdev, char *name) { struct nvme_ns *ns = nvmdev->q->queuedata; @@ -576,7 +561,6 @@ static struct nvm_dev_ops nvme_nvm_dev_ops = { .set_bb_tbl = nvme_nvm_set_bb_tbl, .submit_io = nvme_nvm_submit_io, - .erase_block = nvme_nvm_erase_block, .create_dma_pool = nvme_nvm_create_dma_pool, .destroy_dma_pool = nvme_nvm_destroy_dma_pool, @@ -611,7 +595,7 @@ static int nvme_nvm_submit_user_cmd(struct request_queue *q, __le64 *metadata = NULL; dma_addr_t metadata_dma; DECLARE_COMPLETION_ONSTACK(wait); - int ret; + int ret = 0; rq = nvme_alloc_request(q, (struct nvme_command *)vcmd, 0, NVME_QID_ANY); @@ -681,9 +665,12 @@ submit: wait_for_completion_io(&wait); - ret = nvme_error_status(rq->errors); + if (nvme_req(rq)->flags & NVME_REQ_CANCELLED) + ret = -EINTR; + else if (nvme_req(rq)->status & 0x7ff) + ret = -EIO; if (result) - *result = rq->errors & 0x7ff; + *result = nvme_req(rq)->status & 0x7ff; if (status) *status = le64_to_cpu(nvme_req(rq)->result.u64); @@ -766,7 +753,7 @@ static int nvme_nvm_user_vcmd(struct nvme_ns *ns, int admin, c.common.cdw2[1] = cpu_to_le32(vcmd.cdw3); /* cdw11-12 */ c.ph_rw.length = cpu_to_le16(vcmd.nppas); - c.ph_rw.control = cpu_to_le32(vcmd.control); + c.ph_rw.control = cpu_to_le16(vcmd.control); c.common.cdw10[3] = cpu_to_le32(vcmd.cdw13); c.common.cdw10[4] = cpu_to_le32(vcmd.cdw14); c.common.cdw10[5] = cpu_to_le32(vcmd.cdw15); @@ -809,6 +796,8 @@ int nvme_nvm_register(struct nvme_ns *ns, char *disk_name, int node) struct request_queue *q = ns->queue; struct nvm_dev *dev; + _nvme_nvm_check_size(); + dev = nvm_alloc_dev(node); if (!dev) return -ENOMEM; diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index 2aa20e3e5675..29c708ca9621 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -21,16 +21,6 @@ #include <linux/lightnvm.h> #include <linux/sed-opal.h> -enum { - /* - * Driver internal status code for commands that were cancelled due - * to timeouts or controller shutdown. The value is negative so - * that it a) doesn't overlap with the unsigned hardware error codes, - * and b) can easily be tested for. - */ - NVME_SC_CANCELLED = -EINTR, -}; - extern unsigned char nvme_io_timeout; #define NVME_IO_TIMEOUT (nvme_io_timeout * HZ) @@ -43,8 +33,6 @@ extern unsigned char shutdown_timeout; #define NVME_DEFAULT_KATO 5 #define NVME_KATO_GRACE 10 -extern unsigned int nvme_max_retries; - enum { NVME_NS_LBA = 0, NVME_NS_LIGHTNVM = 1, @@ -68,10 +56,10 @@ enum nvme_quirks { NVME_QUIRK_IDENTIFY_CNS = (1 << 1), /* - * The controller deterministically returns O's on reads to discarded - * logical blocks. + * The controller deterministically returns O's on reads to + * logical blocks that deallocate was called on. */ - NVME_QUIRK_DISCARD_ZEROES = (1 << 2), + NVME_QUIRK_DEALLOCATE_ZEROES = (1 << 2), /* * The controller needs a delay before starts checking the device @@ -83,6 +71,11 @@ enum nvme_quirks { * APST should not be used. */ NVME_QUIRK_NO_APST = (1 << 4), + + /* + * The deepest sleep state should not be used. + */ + NVME_QUIRK_NO_DEEPEST_PS = (1 << 5), }; /* @@ -92,6 +85,13 @@ enum nvme_quirks { struct nvme_request { struct nvme_command *cmd; union nvme_result result; + u8 retries; + u8 flags; + u16 status; +}; + +enum { + NVME_REQ_CANCELLED = (1 << 0), }; static inline struct nvme_request *nvme_req(struct request *req) @@ -249,25 +249,17 @@ static inline void nvme_cleanup_cmd(struct request *req) } } -static inline int nvme_error_status(u16 status) +static inline void nvme_end_request(struct request *req, __le16 status, + union nvme_result result) { - switch (status & 0x7ff) { - case NVME_SC_SUCCESS: - return 0; - case NVME_SC_CAP_EXCEEDED: - return -ENOSPC; - default: - return -EIO; - } -} + struct nvme_request *rq = nvme_req(req); -static inline bool nvme_req_needs_retry(struct request *req, u16 status) -{ - return !(status & NVME_SC_DNR || blk_noretry_request(req)) && - (jiffies - req->start_time) < req->timeout && - req->retries < nvme_max_retries; + rq->status = le16_to_cpu(status) >> 1; + rq->result = result; + blk_mq_complete_request(req); } +void nvme_complete_rq(struct request *req); void nvme_cancel_request(struct request *req, void *data, bool reserved); bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl, enum nvme_ctrl_state new_state); @@ -302,7 +294,6 @@ void nvme_start_freeze(struct nvme_ctrl *ctrl); #define NVME_QID_ANY -1 struct request *nvme_alloc_request(struct request_queue *q, struct nvme_command *cmd, unsigned int flags, int qid); -void nvme_requeue_req(struct request *req); int nvme_setup_cmd(struct nvme_ns *ns, struct request *req, struct nvme_command *cmd); int nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd, diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 925997127a6b..fed803232edc 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -19,6 +19,7 @@ #include <linux/blk-mq-pci.h> #include <linux/cpu.h> #include <linux/delay.h> +#include <linux/dmi.h> #include <linux/errno.h> #include <linux/fs.h> #include <linux/genhd.h> @@ -103,8 +104,22 @@ struct nvme_dev { u32 cmbloc; struct nvme_ctrl ctrl; struct completion ioq_wait; + u32 *dbbuf_dbs; + dma_addr_t dbbuf_dbs_dma_addr; + u32 *dbbuf_eis; + dma_addr_t dbbuf_eis_dma_addr; }; +static inline unsigned int sq_idx(unsigned int qid, u32 stride) +{ + return qid * 2 * stride; +} + +static inline unsigned int cq_idx(unsigned int qid, u32 stride) +{ + return (qid * 2 + 1) * stride; +} + static inline struct nvme_dev *to_nvme_dev(struct nvme_ctrl *ctrl) { return container_of(ctrl, struct nvme_dev, ctrl); @@ -132,6 +147,10 @@ struct nvme_queue { u16 qid; u8 cq_phase; u8 cqe_seen; + u32 *dbbuf_sq_db; + u32 *dbbuf_cq_db; + u32 *dbbuf_sq_ei; + u32 *dbbuf_cq_ei; }; /* @@ -170,6 +189,112 @@ static inline void _nvme_check_size(void) BUILD_BUG_ON(sizeof(struct nvme_id_ns) != 4096); BUILD_BUG_ON(sizeof(struct nvme_lba_range_type) != 64); BUILD_BUG_ON(sizeof(struct nvme_smart_log) != 512); + BUILD_BUG_ON(sizeof(struct nvme_dbbuf) != 64); +} + +static inline unsigned int nvme_dbbuf_size(u32 stride) +{ + return ((num_possible_cpus() + 1) * 8 * stride); +} + +static int nvme_dbbuf_dma_alloc(struct nvme_dev *dev) +{ + unsigned int mem_size = nvme_dbbuf_size(dev->db_stride); + + if (dev->dbbuf_dbs) + return 0; + + dev->dbbuf_dbs = dma_alloc_coherent(dev->dev, mem_size, + &dev->dbbuf_dbs_dma_addr, + GFP_KERNEL); + if (!dev->dbbuf_dbs) + return -ENOMEM; + dev->dbbuf_eis = dma_alloc_coherent(dev->dev, mem_size, + &dev->dbbuf_eis_dma_addr, + GFP_KERNEL); + if (!dev->dbbuf_eis) { + dma_free_coherent(dev->dev, mem_size, + dev->dbbuf_dbs, dev->dbbuf_dbs_dma_addr); + dev->dbbuf_dbs = NULL; + return -ENOMEM; + } + + return 0; +} + +static void nvme_dbbuf_dma_free(struct nvme_dev *dev) +{ + unsigned int mem_size = nvme_dbbuf_size(dev->db_stride); + + if (dev->dbbuf_dbs) { + dma_free_coherent(dev->dev, mem_size, + dev->dbbuf_dbs, dev->dbbuf_dbs_dma_addr); + dev->dbbuf_dbs = NULL; + } + if (dev->dbbuf_eis) { + dma_free_coherent(dev->dev, mem_size, + dev->dbbuf_eis, dev->dbbuf_eis_dma_addr); + dev->dbbuf_eis = NULL; + } +} + +static void nvme_dbbuf_init(struct nvme_dev *dev, + struct nvme_queue *nvmeq, int qid) +{ + if (!dev->dbbuf_dbs || !qid) + return; + + nvmeq->dbbuf_sq_db = &dev->dbbuf_dbs[sq_idx(qid, dev->db_stride)]; + nvmeq->dbbuf_cq_db = &dev->dbbuf_dbs[cq_idx(qid, dev->db_stride)]; + nvmeq->dbbuf_sq_ei = &dev->dbbuf_eis[sq_idx(qid, dev->db_stride)]; + nvmeq->dbbuf_cq_ei = &dev->dbbuf_eis[cq_idx(qid, dev->db_stride)]; +} + +static void nvme_dbbuf_set(struct nvme_dev *dev) +{ + struct nvme_command c; + + if (!dev->dbbuf_dbs) + return; + + memset(&c, 0, sizeof(c)); + c.dbbuf.opcode = nvme_admin_dbbuf; + c.dbbuf.prp1 = cpu_to_le64(dev->dbbuf_dbs_dma_addr); + c.dbbuf.prp2 = cpu_to_le64(dev->dbbuf_eis_dma_addr); + + if (nvme_submit_sync_cmd(dev->ctrl.admin_q, &c, NULL, 0)) { + dev_warn(dev->dev, "unable to set dbbuf\n"); + /* Free memory and continue on */ + nvme_dbbuf_dma_free(dev); + } +} + +static inline int nvme_dbbuf_need_event(u16 event_idx, u16 new_idx, u16 old) +{ + return (u16)(new_idx - event_idx - 1) < (u16)(new_idx - old); +} + +/* Update dbbuf and return true if an MMIO is required */ +static bool nvme_dbbuf_update_and_check_event(u16 value, u32 *dbbuf_db, + volatile u32 *dbbuf_ei) +{ + if (dbbuf_db) { + u16 old_value; + + /* + * Ensure that the queue is written before updating + * the doorbell in memory + */ + wmb(); + + old_value = *dbbuf_db; + *dbbuf_db = value; + + if (!nvme_dbbuf_need_event(*dbbuf_ei, value, old_value)) + return false; + } + + return true; } /* @@ -225,11 +350,11 @@ static void nvme_admin_exit_hctx(struct blk_mq_hw_ctx *hctx, unsigned int hctx_i nvmeq->tags = NULL; } -static int nvme_admin_init_request(void *data, struct request *req, - unsigned int hctx_idx, unsigned int rq_idx, - unsigned int numa_node) +static int nvme_admin_init_request(struct blk_mq_tag_set *set, + struct request *req, unsigned int hctx_idx, + unsigned int numa_node) { - struct nvme_dev *dev = data; + struct nvme_dev *dev = set->driver_data; struct nvme_iod *iod = blk_mq_rq_to_pdu(req); struct nvme_queue *nvmeq = dev->queues[0]; @@ -252,11 +377,10 @@ static int nvme_init_hctx(struct blk_mq_hw_ctx *hctx, void *data, return 0; } -static int nvme_init_request(void *data, struct request *req, - unsigned int hctx_idx, unsigned int rq_idx, - unsigned int numa_node) +static int nvme_init_request(struct blk_mq_tag_set *set, struct request *req, + unsigned int hctx_idx, unsigned int numa_node) { - struct nvme_dev *dev = data; + struct nvme_dev *dev = set->driver_data; struct nvme_iod *iod = blk_mq_rq_to_pdu(req); struct nvme_queue *nvmeq = dev->queues[hctx_idx + 1]; @@ -291,7 +415,9 @@ static void __nvme_submit_cmd(struct nvme_queue *nvmeq, if (++tail == nvmeq->q_depth) tail = 0; - writel(tail, nvmeq->q_db); + if (nvme_dbbuf_update_and_check_event(tail, nvmeq->dbbuf_sq_db, + nvmeq->dbbuf_sq_ei)) + writel(tail, nvmeq->q_db); nvmeq->sq_tail = tail; } @@ -320,10 +446,6 @@ static int nvme_init_iod(struct request *rq, struct nvme_dev *dev) iod->nents = 0; iod->length = size; - if (!(rq->rq_flags & RQF_DONTPREP)) { - rq->retries = 0; - rq->rq_flags |= RQF_DONTPREP; - } return BLK_MQ_RQ_QUEUE_OK; } @@ -622,34 +744,12 @@ out_free_cmd: return ret; } -static void nvme_complete_rq(struct request *req) +static void nvme_pci_complete_rq(struct request *req) { struct nvme_iod *iod = blk_mq_rq_to_pdu(req); - struct nvme_dev *dev = iod->nvmeq->dev; - int error = 0; - - nvme_unmap_data(dev, req); - - if (unlikely(req->errors)) { - if (nvme_req_needs_retry(req, req->errors)) { - req->retries++; - nvme_requeue_req(req); - return; - } - - if (blk_rq_is_passthrough(req)) - error = req->errors; - else - error = nvme_error_status(req->errors); - } - - if (unlikely(iod->aborted)) { - dev_warn(dev->ctrl.device, - "completing aborted command with status: %04x\n", - req->errors); - } - blk_mq_end_request(req, error); + nvme_unmap_data(iod->nvmeq->dev, req); + nvme_complete_rq(req); } /* We read the CQE phase first to check if the rest of the entry is valid */ @@ -699,15 +799,16 @@ static void __nvme_process_cq(struct nvme_queue *nvmeq, unsigned int *tag) } req = blk_mq_tag_to_rq(*nvmeq->tags, cqe.command_id); - nvme_req(req)->result = cqe.result; - blk_mq_complete_request(req, le16_to_cpu(cqe.status) >> 1); + nvme_end_request(req, cqe.status, cqe.result); } if (head == nvmeq->cq_head && phase == nvmeq->cq_phase) return; if (likely(nvmeq->cq_vector >= 0)) - writel(head, nvmeq->q_db + nvmeq->dev->db_stride); + if (nvme_dbbuf_update_and_check_event(head, nvmeq->dbbuf_cq_db, + nvmeq->dbbuf_cq_ei)) + writel(head, nvmeq->q_db + nvmeq->dev->db_stride); nvmeq->cq_head = head; nvmeq->cq_phase = phase; @@ -739,10 +840,8 @@ static irqreturn_t nvme_irq_check(int irq, void *data) return IRQ_NONE; } -static int nvme_poll(struct blk_mq_hw_ctx *hctx, unsigned int tag) +static int __nvme_poll(struct nvme_queue *nvmeq, unsigned int tag) { - struct nvme_queue *nvmeq = hctx->driver_data; - if (nvme_cqe_valid(nvmeq, nvmeq->cq_head, nvmeq->cq_phase)) { spin_lock_irq(&nvmeq->q_lock); __nvme_process_cq(nvmeq, &tag); @@ -755,6 +854,13 @@ static int nvme_poll(struct blk_mq_hw_ctx *hctx, unsigned int tag) return 0; } +static int nvme_poll(struct blk_mq_hw_ctx *hctx, unsigned int tag) +{ + struct nvme_queue *nvmeq = hctx->driver_data; + + return __nvme_poll(nvmeq, tag); +} + static void nvme_pci_submit_async_event(struct nvme_ctrl *ctrl, int aer_idx) { struct nvme_dev *dev = to_nvme_dev(ctrl); @@ -806,7 +912,7 @@ static int adapter_alloc_sq(struct nvme_dev *dev, u16 qid, struct nvme_queue *nvmeq) { struct nvme_command c; - int flags = NVME_QUEUE_PHYS_CONTIG | NVME_SQ_PRIO_MEDIUM; + int flags = NVME_QUEUE_PHYS_CONTIG; /* * Note: we (ab)use the fact the the prp fields survive if no data @@ -837,9 +943,9 @@ static void abort_endio(struct request *req, int error) { struct nvme_iod *iod = blk_mq_rq_to_pdu(req); struct nvme_queue *nvmeq = iod->nvmeq; - u16 status = req->errors; - dev_warn(nvmeq->dev->ctrl.device, "Abort status: 0x%x", status); + dev_warn(nvmeq->dev->ctrl.device, + "Abort status: 0x%x", nvme_req(req)->status); atomic_inc(&nvmeq->dev->ctrl.abort_limit); blk_mq_free_request(req); } @@ -853,6 +959,16 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved) struct nvme_command cmd; /* + * Did we miss an interrupt? + */ + if (__nvme_poll(nvmeq, req->tag)) { + dev_warn(dev->ctrl.device, + "I/O %d QID %d timeout, completion polled\n", + req->tag, nvmeq->qid); + return BLK_EH_HANDLED; + } + + /* * Shutdown immediately if controller times out while starting. The * reset work will see the pci device disabled when it gets the forced * cancellation error. All outstanding requests are completed on @@ -863,7 +979,7 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved) "I/O %d QID %d timeout, disable controller\n", req->tag, nvmeq->qid); nvme_dev_disable(dev, false); - req->errors = NVME_SC_CANCELLED; + nvme_req(req)->flags |= NVME_REQ_CANCELLED; return BLK_EH_HANDLED; } @@ -883,7 +999,7 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved) * Mark the request as handled, since the inline shutdown * forces all outstanding requests to complete. */ - req->errors = NVME_SC_CANCELLED; + nvme_req(req)->flags |= NVME_REQ_CANCELLED; return BLK_EH_HANDLED; } @@ -1093,6 +1209,7 @@ static void nvme_init_queue(struct nvme_queue *nvmeq, u16 qid) nvmeq->cq_phase = 1; nvmeq->q_db = &dev->dbs[qid * 2 * dev->db_stride]; memset((void *)nvmeq->cqes, 0, CQ_SIZE(nvmeq->q_depth)); + nvme_dbbuf_init(dev, nvmeq, qid); dev->online_queues++; spin_unlock_irq(&nvmeq->q_lock); } @@ -1125,18 +1242,18 @@ static int nvme_create_queue(struct nvme_queue *nvmeq, int qid) return result; } -static struct blk_mq_ops nvme_mq_admin_ops = { +static const struct blk_mq_ops nvme_mq_admin_ops = { .queue_rq = nvme_queue_rq, - .complete = nvme_complete_rq, + .complete = nvme_pci_complete_rq, .init_hctx = nvme_admin_init_hctx, .exit_hctx = nvme_admin_exit_hctx, .init_request = nvme_admin_init_request, .timeout = nvme_timeout, }; -static struct blk_mq_ops nvme_mq_ops = { +static const struct blk_mq_ops nvme_mq_ops = { .queue_rq = nvme_queue_rq, - .complete = nvme_complete_rq, + .complete = nvme_pci_complete_rq, .init_hctx = nvme_init_hctx, .init_request = nvme_init_request, .map_queues = nvme_pci_map_queues, @@ -1565,6 +1682,8 @@ static int nvme_dev_add(struct nvme_dev *dev) if (blk_mq_alloc_tag_set(&dev->tagset)) return 0; dev->ctrl.tagset = &dev->tagset; + + nvme_dbbuf_set(dev); } else { blk_mq_update_nr_hw_queues(&dev->tagset, dev->online_queues - 1); @@ -1751,6 +1870,7 @@ static void nvme_pci_free_ctrl(struct nvme_ctrl *ctrl) { struct nvme_dev *dev = to_nvme_dev(ctrl); + nvme_dbbuf_dma_free(dev); put_device(dev->dev); if (dev->tagset.tags) blk_mq_free_tag_set(&dev->tagset); @@ -1818,6 +1938,13 @@ static void nvme_reset_work(struct work_struct *work) dev->ctrl.opal_dev = NULL; } + if (dev->ctrl.oacs & NVME_CTRL_OACS_DBBUF_SUPP) { + result = nvme_dbbuf_dma_alloc(dev); + if (result) + dev_warn(dev->dev, + "unable to allocate dma for dbbuf\n"); + } + result = nvme_setup_io_queues(dev); if (result) goto out; @@ -1939,10 +2066,31 @@ static int nvme_dev_map(struct nvme_dev *dev) return -ENODEV; } +static unsigned long check_dell_samsung_bug(struct pci_dev *pdev) +{ + if (pdev->vendor == 0x144d && pdev->device == 0xa802) { + /* + * Several Samsung devices seem to drop off the PCIe bus + * randomly when APST is on and uses the deepest sleep state. + * This has been observed on a Samsung "SM951 NVMe SAMSUNG + * 256GB", a "PM951 NVMe SAMSUNG 512GB", and a "Samsung SSD + * 950 PRO 256GB", but it seems to be restricted to two Dell + * laptops. + */ + if (dmi_match(DMI_SYS_VENDOR, "Dell Inc.") && + (dmi_match(DMI_PRODUCT_NAME, "XPS 15 9550") || + dmi_match(DMI_PRODUCT_NAME, "Precision 5510"))) + return NVME_QUIRK_NO_DEEPEST_PS; + } + + return 0; +} + static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id) { int node, result = -ENOMEM; struct nvme_dev *dev; + unsigned long quirks = id->driver_data; node = dev_to_node(&pdev->dev); if (node == NUMA_NO_NODE) @@ -1974,8 +2122,10 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id) if (result) goto put_pci; + quirks |= check_dell_samsung_bug(pdev); + result = nvme_init_ctrl(&dev->ctrl, &pdev->dev, &nvme_pci_ctrl_ops, - id->driver_data); + quirks); if (result) goto release_pools; @@ -2131,13 +2281,13 @@ static const struct pci_error_handlers nvme_err_handler = { static const struct pci_device_id nvme_id_table[] = { { PCI_VDEVICE(INTEL, 0x0953), .driver_data = NVME_QUIRK_STRIPE_SIZE | - NVME_QUIRK_DISCARD_ZEROES, }, + NVME_QUIRK_DEALLOCATE_ZEROES, }, { PCI_VDEVICE(INTEL, 0x0a53), .driver_data = NVME_QUIRK_STRIPE_SIZE | - NVME_QUIRK_DISCARD_ZEROES, }, + NVME_QUIRK_DEALLOCATE_ZEROES, }, { PCI_VDEVICE(INTEL, 0x0a54), .driver_data = NVME_QUIRK_STRIPE_SIZE | - NVME_QUIRK_DISCARD_ZEROES, }, + NVME_QUIRK_DEALLOCATE_ZEROES, }, { PCI_VDEVICE(INTEL, 0x5845), /* Qemu emulated controller */ .driver_data = NVME_QUIRK_IDENTIFY_CNS, }, { PCI_DEVICE(0x1c58, 0x0003), /* HGST adapter */ diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index 779f516e7a4e..dd1c6deef82f 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -34,7 +34,7 @@ #include "fabrics.h" -#define NVME_RDMA_CONNECT_TIMEOUT_MS 1000 /* 1 second */ +#define NVME_RDMA_CONNECT_TIMEOUT_MS 3000 /* 3 second */ #define NVME_RDMA_MAX_SEGMENT_SIZE 0xffffff /* 24-bit SGL field */ @@ -118,7 +118,6 @@ struct nvme_rdma_ctrl { struct nvme_rdma_qe async_event_sqe; - int reconnect_delay; struct delayed_work reconnect_work; struct list_head list; @@ -129,14 +128,8 @@ struct nvme_rdma_ctrl { u64 cap; u32 max_fr_pages; - union { - struct sockaddr addr; - struct sockaddr_in addr_in; - }; - union { - struct sockaddr src_addr; - struct sockaddr_in src_addr_in; - }; + struct sockaddr_storage addr; + struct sockaddr_storage src_addr; struct nvme_ctrl ctrl; }; @@ -322,16 +315,16 @@ static void __nvme_rdma_exit_request(struct nvme_rdma_ctrl *ctrl, DMA_TO_DEVICE); } -static void nvme_rdma_exit_request(void *data, struct request *rq, - unsigned int hctx_idx, unsigned int rq_idx) +static void nvme_rdma_exit_request(struct blk_mq_tag_set *set, + struct request *rq, unsigned int hctx_idx) { - return __nvme_rdma_exit_request(data, rq, hctx_idx + 1); + return __nvme_rdma_exit_request(set->driver_data, rq, hctx_idx + 1); } -static void nvme_rdma_exit_admin_request(void *data, struct request *rq, - unsigned int hctx_idx, unsigned int rq_idx) +static void nvme_rdma_exit_admin_request(struct blk_mq_tag_set *set, + struct request *rq, unsigned int hctx_idx) { - return __nvme_rdma_exit_request(data, rq, 0); + return __nvme_rdma_exit_request(set->driver_data, rq, 0); } static int __nvme_rdma_init_request(struct nvme_rdma_ctrl *ctrl, @@ -343,8 +336,6 @@ static int __nvme_rdma_init_request(struct nvme_rdma_ctrl *ctrl, struct ib_device *ibdev = dev->dev; int ret; - BUG_ON(queue_idx >= ctrl->queue_count); - ret = nvme_rdma_alloc_qe(ibdev, &req->sqe, sizeof(struct nvme_command), DMA_TO_DEVICE); if (ret) @@ -367,18 +358,18 @@ out_free_qe: return -ENOMEM; } -static int nvme_rdma_init_request(void *data, struct request *rq, - unsigned int hctx_idx, unsigned int rq_idx, - unsigned int numa_node) +static int nvme_rdma_init_request(struct blk_mq_tag_set *set, + struct request *rq, unsigned int hctx_idx, + unsigned int numa_node) { - return __nvme_rdma_init_request(data, rq, hctx_idx + 1); + return __nvme_rdma_init_request(set->driver_data, rq, hctx_idx + 1); } -static int nvme_rdma_init_admin_request(void *data, struct request *rq, - unsigned int hctx_idx, unsigned int rq_idx, - unsigned int numa_node) +static int nvme_rdma_init_admin_request(struct blk_mq_tag_set *set, + struct request *rq, unsigned int hctx_idx, + unsigned int numa_node) { - return __nvme_rdma_init_request(data, rq, 0); + return __nvme_rdma_init_request(set->driver_data, rq, 0); } static int nvme_rdma_init_hctx(struct blk_mq_hw_ctx *hctx, void *data, @@ -571,11 +562,12 @@ static int nvme_rdma_init_queue(struct nvme_rdma_ctrl *ctrl, return PTR_ERR(queue->cm_id); } - queue->cm_error = -ETIMEDOUT; if (ctrl->ctrl.opts->mask & NVMF_OPT_HOST_TRADDR) - src_addr = &ctrl->src_addr; + src_addr = (struct sockaddr *)&ctrl->src_addr; - ret = rdma_resolve_addr(queue->cm_id, src_addr, &ctrl->addr, + queue->cm_error = -ETIMEDOUT; + ret = rdma_resolve_addr(queue->cm_id, src_addr, + (struct sockaddr *)&ctrl->addr, NVME_RDMA_CONNECT_TIMEOUT_MS); if (ret) { dev_info(ctrl->ctrl.device, @@ -652,8 +644,22 @@ out_free_queues: static int nvme_rdma_init_io_queues(struct nvme_rdma_ctrl *ctrl) { + struct nvmf_ctrl_options *opts = ctrl->ctrl.opts; + unsigned int nr_io_queues; int i, ret; + nr_io_queues = min(opts->nr_io_queues, num_online_cpus()); + ret = nvme_set_queue_count(&ctrl->ctrl, &nr_io_queues); + if (ret) + return ret; + + ctrl->queue_count = nr_io_queues + 1; + if (ctrl->queue_count < 2) + return 0; + + dev_info(ctrl->ctrl.device, + "creating %d I/O queues.\n", nr_io_queues); + for (i = 1; i < ctrl->queue_count; i++) { ret = nvme_rdma_init_queue(ctrl, i, ctrl->ctrl.opts->queue_size); @@ -700,6 +706,26 @@ free_ctrl: kfree(ctrl); } +static void nvme_rdma_reconnect_or_remove(struct nvme_rdma_ctrl *ctrl) +{ + /* If we are resetting/deleting then do nothing */ + if (ctrl->ctrl.state != NVME_CTRL_RECONNECTING) { + WARN_ON_ONCE(ctrl->ctrl.state == NVME_CTRL_NEW || + ctrl->ctrl.state == NVME_CTRL_LIVE); + return; + } + + if (nvmf_should_reconnect(&ctrl->ctrl)) { + dev_info(ctrl->ctrl.device, "Reconnecting in %d seconds...\n", + ctrl->ctrl.opts->reconnect_delay); + queue_delayed_work(nvme_rdma_wq, &ctrl->reconnect_work, + ctrl->ctrl.opts->reconnect_delay * HZ); + } else { + dev_info(ctrl->ctrl.device, "Removing controller...\n"); + queue_work(nvme_rdma_wq, &ctrl->delete_work); + } +} + static void nvme_rdma_reconnect_ctrl_work(struct work_struct *work) { struct nvme_rdma_ctrl *ctrl = container_of(to_delayed_work(work), @@ -707,6 +733,8 @@ static void nvme_rdma_reconnect_ctrl_work(struct work_struct *work) bool changed; int ret; + ++ctrl->ctrl.opts->nr_reconnects; + if (ctrl->queue_count > 1) { nvme_rdma_free_io_queues(ctrl); @@ -751,6 +779,7 @@ static void nvme_rdma_reconnect_ctrl_work(struct work_struct *work) changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE); WARN_ON_ONCE(!changed); + ctrl->ctrl.opts->nr_reconnects = 0; if (ctrl->queue_count > 1) { nvme_start_queues(&ctrl->ctrl); @@ -765,13 +794,9 @@ static void nvme_rdma_reconnect_ctrl_work(struct work_struct *work) stop_admin_q: blk_mq_stop_hw_queues(ctrl->ctrl.admin_q); requeue: - /* Make sure we are not resetting/deleting */ - if (ctrl->ctrl.state == NVME_CTRL_RECONNECTING) { - dev_info(ctrl->ctrl.device, - "Failed reconnect attempt, requeueing...\n"); - queue_delayed_work(nvme_rdma_wq, &ctrl->reconnect_work, - ctrl->reconnect_delay * HZ); - } + dev_info(ctrl->ctrl.device, "Failed reconnect attempt %d\n", + ctrl->ctrl.opts->nr_reconnects); + nvme_rdma_reconnect_or_remove(ctrl); } static void nvme_rdma_error_recovery_work(struct work_struct *work) @@ -798,11 +823,7 @@ static void nvme_rdma_error_recovery_work(struct work_struct *work) blk_mq_tagset_busy_iter(&ctrl->admin_tag_set, nvme_cancel_request, &ctrl->ctrl); - dev_info(ctrl->ctrl.device, "reconnecting in %d seconds\n", - ctrl->reconnect_delay); - - queue_delayed_work(nvme_rdma_wq, &ctrl->reconnect_work, - ctrl->reconnect_delay * HZ); + nvme_rdma_reconnect_or_remove(ctrl); } static void nvme_rdma_error_recovery(struct nvme_rdma_ctrl *ctrl) @@ -1157,8 +1178,7 @@ static int nvme_rdma_process_nvme_rsp(struct nvme_rdma_queue *queue, wc->ex.invalidate_rkey == req->mr->rkey) req->mr->need_inval = false; - req->req.result = cqe->result; - blk_mq_complete_request(rq, le16_to_cpu(cqe->status) >> 1); + nvme_end_request(rq, cqe->status, cqe->result); return ret; } @@ -1395,7 +1415,7 @@ nvme_rdma_timeout(struct request *rq, bool reserved) nvme_rdma_error_recovery(req->queue->ctrl); /* fail with DNR on cmd timeout */ - rq->errors = NVME_SC_ABORT_REQ | NVME_SC_DNR; + nvme_req(rq)->status = NVME_SC_ABORT_REQ | NVME_SC_DNR; return BLK_EH_HANDLED; } @@ -1497,27 +1517,12 @@ static int nvme_rdma_poll(struct blk_mq_hw_ctx *hctx, unsigned int tag) static void nvme_rdma_complete_rq(struct request *rq) { struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq); - struct nvme_rdma_queue *queue = req->queue; - int error = 0; - - nvme_rdma_unmap_data(queue, rq); - - if (unlikely(rq->errors)) { - if (nvme_req_needs_retry(rq, rq->errors)) { - nvme_requeue_req(rq); - return; - } - if (blk_rq_is_passthrough(rq)) - error = rq->errors; - else - error = nvme_error_status(rq->errors); - } - - blk_mq_end_request(rq, error); + nvme_rdma_unmap_data(req->queue, rq); + nvme_complete_rq(rq); } -static struct blk_mq_ops nvme_rdma_mq_ops = { +static const struct blk_mq_ops nvme_rdma_mq_ops = { .queue_rq = nvme_rdma_queue_rq, .complete = nvme_rdma_complete_rq, .init_request = nvme_rdma_init_request, @@ -1528,7 +1533,7 @@ static struct blk_mq_ops nvme_rdma_mq_ops = { .timeout = nvme_rdma_timeout, }; -static struct blk_mq_ops nvme_rdma_admin_mq_ops = { +static const struct blk_mq_ops nvme_rdma_admin_mq_ops = { .queue_rq = nvme_rdma_queue_rq, .complete = nvme_rdma_complete_rq, .init_request = nvme_rdma_init_admin_request, @@ -1594,7 +1599,7 @@ static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl) } ctrl->ctrl.sqsize = - min_t(int, NVME_CAP_MQES(ctrl->cap) + 1, ctrl->ctrl.sqsize); + min_t(int, NVME_CAP_MQES(ctrl->cap), ctrl->ctrl.sqsize); error = nvme_enable_ctrl(&ctrl->ctrl, ctrl->cap); if (error) @@ -1791,20 +1796,8 @@ static const struct nvme_ctrl_ops nvme_rdma_ctrl_ops = { static int nvme_rdma_create_io_queues(struct nvme_rdma_ctrl *ctrl) { - struct nvmf_ctrl_options *opts = ctrl->ctrl.opts; int ret; - ret = nvme_set_queue_count(&ctrl->ctrl, &opts->nr_io_queues); - if (ret) - return ret; - - ctrl->queue_count = opts->nr_io_queues + 1; - if (ctrl->queue_count < 2) - return 0; - - dev_info(ctrl->ctrl.device, - "creating %d I/O queues.\n", opts->nr_io_queues); - ret = nvme_rdma_init_io_queues(ctrl); if (ret) return ret; @@ -1857,27 +1850,13 @@ out_free_io_queues: return ret; } -static int nvme_rdma_parse_ipaddr(struct sockaddr_in *in_addr, char *p) -{ - u8 *addr = (u8 *)&in_addr->sin_addr.s_addr; - size_t buflen = strlen(p); - - /* XXX: handle IPv6 addresses */ - - if (buflen > INET_ADDRSTRLEN) - return -EINVAL; - if (in4_pton(p, buflen, addr, '\0', NULL) == 0) - return -EINVAL; - in_addr->sin_family = AF_INET; - return 0; -} - static struct nvme_ctrl *nvme_rdma_create_ctrl(struct device *dev, struct nvmf_ctrl_options *opts) { struct nvme_rdma_ctrl *ctrl; int ret; bool changed; + char *port; ctrl = kzalloc(sizeof(*ctrl), GFP_KERNEL); if (!ctrl) @@ -1885,40 +1864,33 @@ static struct nvme_ctrl *nvme_rdma_create_ctrl(struct device *dev, ctrl->ctrl.opts = opts; INIT_LIST_HEAD(&ctrl->list); - ret = nvme_rdma_parse_ipaddr(&ctrl->addr_in, opts->traddr); + if (opts->mask & NVMF_OPT_TRSVCID) + port = opts->trsvcid; + else + port = __stringify(NVME_RDMA_IP_PORT); + + ret = inet_pton_with_scope(&init_net, AF_UNSPEC, + opts->traddr, port, &ctrl->addr); if (ret) { - pr_err("malformed IP address passed: %s\n", opts->traddr); + pr_err("malformed address passed: %s:%s\n", opts->traddr, port); goto out_free_ctrl; } if (opts->mask & NVMF_OPT_HOST_TRADDR) { - ret = nvme_rdma_parse_ipaddr(&ctrl->src_addr_in, - opts->host_traddr); + ret = inet_pton_with_scope(&init_net, AF_UNSPEC, + opts->host_traddr, NULL, &ctrl->src_addr); if (ret) { - pr_err("malformed src IP address passed: %s\n", + pr_err("malformed src address passed: %s\n", opts->host_traddr); goto out_free_ctrl; } } - if (opts->mask & NVMF_OPT_TRSVCID) { - u16 port; - - ret = kstrtou16(opts->trsvcid, 0, &port); - if (ret) - goto out_free_ctrl; - - ctrl->addr_in.sin_port = cpu_to_be16(port); - } else { - ctrl->addr_in.sin_port = cpu_to_be16(NVME_RDMA_IP_PORT); - } - ret = nvme_init_ctrl(&ctrl->ctrl, dev, &nvme_rdma_ctrl_ops, 0 /* no quirks, we're perfect! */); if (ret) goto out_free_ctrl; - ctrl->reconnect_delay = opts->reconnect_delay; INIT_DELAYED_WORK(&ctrl->reconnect_work, nvme_rdma_reconnect_ctrl_work); INIT_WORK(&ctrl->err_work, nvme_rdma_error_recovery_work); @@ -1977,7 +1949,7 @@ static struct nvme_ctrl *nvme_rdma_create_ctrl(struct device *dev, changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE); WARN_ON_ONCE(!changed); - dev_info(ctrl->ctrl.device, "new ctrl: NQN \"%s\", addr %pISp\n", + dev_info(ctrl->ctrl.device, "new ctrl: NQN \"%s\", addr %pISpcs\n", ctrl->ctrl.opts->subsysnqn, &ctrl->addr); kref_get(&ctrl->ctrl.kref); @@ -2013,7 +1985,7 @@ static struct nvmf_transport_ops nvme_rdma_transport = { .name = "rdma", .required_opts = NVMF_OPT_TRADDR, .allowed_opts = NVMF_OPT_TRSVCID | NVMF_OPT_RECONNECT_DELAY | - NVMF_OPT_HOST_TRADDR, + NVMF_OPT_HOST_TRADDR | NVMF_OPT_CTRL_LOSS_TMO, .create_ctrl = nvme_rdma_create_ctrl, }; @@ -2055,12 +2027,20 @@ static int __init nvme_rdma_init_module(void) return -ENOMEM; ret = ib_register_client(&nvme_rdma_ib_client); - if (ret) { - destroy_workqueue(nvme_rdma_wq); - return ret; - } + if (ret) + goto err_destroy_wq; + + ret = nvmf_register_transport(&nvme_rdma_transport); + if (ret) + goto err_unreg_client; + + return 0; - return nvmf_register_transport(&nvme_rdma_transport); +err_unreg_client: + ib_unregister_client(&nvme_rdma_ib_client); +err_destroy_wq: + destroy_workqueue(nvme_rdma_wq); + return ret; } static void __exit nvme_rdma_cleanup_module(void) diff --git a/drivers/nvme/host/scsi.c b/drivers/nvme/host/scsi.c index f49ae2758bb7..1f7671e631dd 100644 --- a/drivers/nvme/host/scsi.c +++ b/drivers/nvme/host/scsi.c @@ -1609,7 +1609,7 @@ static int nvme_trans_do_nvme_io(struct nvme_ns *ns, struct sg_io_hdr *hdr, struct nvme_command c; u8 opcode = (is_write ? nvme_cmd_write : nvme_cmd_read); u16 control; - u32 max_blocks = queue_max_hw_sectors(ns->queue); + u32 max_blocks = queue_max_hw_sectors(ns->queue) >> (ns->lba_shift - 9); num_cmds = nvme_trans_io_get_num_cmds(hdr, cdb_info, max_blocks); @@ -2138,15 +2138,6 @@ static int nvme_trans_request_sense(struct nvme_ns *ns, struct sg_io_hdr *hdr, return res; } -static int nvme_trans_security_protocol(struct nvme_ns *ns, - struct sg_io_hdr *hdr, - u8 *cmd) -{ - return nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION, - ILLEGAL_REQUEST, SCSI_ASC_ILLEGAL_COMMAND, - SCSI_ASCQ_CAUSE_NOT_REPORTABLE); -} - static int nvme_trans_synchronize_cache(struct nvme_ns *ns, struct sg_io_hdr *hdr) { @@ -2414,10 +2405,6 @@ static int nvme_scsi_translate(struct nvme_ns *ns, struct sg_io_hdr *hdr) case REQUEST_SENSE: retcode = nvme_trans_request_sense(ns, hdr, cmd); break; - case SECURITY_PROTOCOL_IN: - case SECURITY_PROTOCOL_OUT: - retcode = nvme_trans_security_protocol(ns, hdr, cmd); - break; case SYNCHRONIZE_CACHE: retcode = nvme_trans_synchronize_cache(ns, hdr); break; diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c index a7bcff45f437..ff1f97006322 100644 --- a/drivers/nvme/target/admin-cmd.c +++ b/drivers/nvme/target/admin-cmd.c @@ -100,7 +100,7 @@ static u16 nvmet_get_smart_log(struct nvmet_req *req, u16 status; WARN_ON(req == NULL || slog == NULL); - if (req->cmd->get_log_page.nsid == 0xFFFFFFFF) + if (req->cmd->get_log_page.nsid == cpu_to_le32(0xFFFFFFFF)) status = nvmet_get_smart_log_all(req, slog); else status = nvmet_get_smart_log_nsid(req, slog); @@ -121,7 +121,7 @@ static void nvmet_execute_get_log_page(struct nvmet_req *req) } switch (req->cmd->get_log_page.lid) { - case 0x01: + case NVME_LOG_ERROR: /* * We currently never set the More bit in the status field, * so all error log entries are invalid and can be zeroed out. @@ -129,7 +129,7 @@ static void nvmet_execute_get_log_page(struct nvmet_req *req) * mandatory log page. */ break; - case 0x02: + case NVME_LOG_SMART: /* * XXX: fill out actual smart log * @@ -149,7 +149,7 @@ static void nvmet_execute_get_log_page(struct nvmet_req *req) goto err; } break; - case 0x03: + case NVME_LOG_FW_SLOT: /* * We only support a single firmware slot which always is * active, so we can zero out the whole firmware slot log and @@ -480,31 +480,25 @@ static void nvmet_execute_keep_alive(struct nvmet_req *req) nvmet_req_complete(req, 0); } -int nvmet_parse_admin_cmd(struct nvmet_req *req) +u16 nvmet_parse_admin_cmd(struct nvmet_req *req) { struct nvme_command *cmd = req->cmd; + u16 ret; req->ns = NULL; - if (unlikely(!(req->sq->ctrl->cc & NVME_CC_ENABLE))) { - pr_err("nvmet: got admin cmd %d while CC.EN == 0\n", - cmd->common.opcode); - return NVME_SC_CMD_SEQ_ERROR | NVME_SC_DNR; - } - if (unlikely(!(req->sq->ctrl->csts & NVME_CSTS_RDY))) { - pr_err("nvmet: got admin cmd %d while CSTS.RDY == 0\n", - cmd->common.opcode); - return NVME_SC_CMD_SEQ_ERROR | NVME_SC_DNR; - } + ret = nvmet_check_ctrl_status(req, cmd); + if (unlikely(ret)) + return ret; switch (cmd->common.opcode) { case nvme_admin_get_log_page: req->data_len = nvmet_get_log_page_len(cmd); switch (cmd->get_log_page.lid) { - case 0x01: - case 0x02: - case 0x03: + case NVME_LOG_ERROR: + case NVME_LOG_SMART: + case NVME_LOG_FW_SLOT: req->execute = nvmet_execute_get_log_page; return 0; } @@ -545,6 +539,7 @@ int nvmet_parse_admin_cmd(struct nvmet_req *req) return 0; } - pr_err("nvmet: unhandled cmd %d\n", cmd->common.opcode); + pr_err("unhandled cmd %d on qid %d\n", cmd->common.opcode, + req->sq->qid); return NVME_SC_INVALID_OPCODE | NVME_SC_DNR; } diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c index 11b0a0a5f661..cf90713043da 100644 --- a/drivers/nvme/target/core.c +++ b/drivers/nvme/target/core.c @@ -273,8 +273,8 @@ int nvmet_ns_enable(struct nvmet_ns *ns) ns->bdev = blkdev_get_by_path(ns->device_path, FMODE_READ | FMODE_WRITE, NULL); if (IS_ERR(ns->bdev)) { - pr_err("nvmet: failed to open block device %s: (%ld)\n", - ns->device_path, PTR_ERR(ns->bdev)); + pr_err("failed to open block device %s: (%ld)\n", + ns->device_path, PTR_ERR(ns->bdev)); ret = PTR_ERR(ns->bdev); ns->bdev = NULL; goto out_unlock; @@ -425,6 +425,13 @@ void nvmet_sq_setup(struct nvmet_ctrl *ctrl, struct nvmet_sq *sq, ctrl->sqs[qid] = sq; } +static void nvmet_confirm_sq(struct percpu_ref *ref) +{ + struct nvmet_sq *sq = container_of(ref, struct nvmet_sq, ref); + + complete(&sq->confirm_done); +} + void nvmet_sq_destroy(struct nvmet_sq *sq) { /* @@ -433,7 +440,8 @@ void nvmet_sq_destroy(struct nvmet_sq *sq) */ if (sq->ctrl && sq->ctrl->sqs && sq->ctrl->sqs[0] == sq) nvmet_async_events_free(sq->ctrl); - percpu_ref_kill(&sq->ref); + percpu_ref_kill_and_confirm(&sq->ref, nvmet_confirm_sq); + wait_for_completion(&sq->confirm_done); wait_for_completion(&sq->free_done); percpu_ref_exit(&sq->ref); @@ -461,6 +469,7 @@ int nvmet_sq_init(struct nvmet_sq *sq) return ret; } init_completion(&sq->free_done); + init_completion(&sq->confirm_done); return 0; } @@ -652,6 +661,23 @@ out: return status; } +u16 nvmet_check_ctrl_status(struct nvmet_req *req, struct nvme_command *cmd) +{ + if (unlikely(!(req->sq->ctrl->cc & NVME_CC_ENABLE))) { + pr_err("got io cmd %d while CC.EN == 0 on qid = %d\n", + cmd->common.opcode, req->sq->qid); + return NVME_SC_CMD_SEQ_ERROR | NVME_SC_DNR; + } + + if (unlikely(!(req->sq->ctrl->csts & NVME_CSTS_RDY))) { + pr_err("got io cmd %d while CSTS.RDY == 0 on qid = %d\n", + cmd->common.opcode, req->sq->qid); + req->ns = NULL; + return NVME_SC_CMD_SEQ_ERROR | NVME_SC_DNR; + } + return 0; +} + static bool __nvmet_host_allowed(struct nvmet_subsys *subsys, const char *hostnqn) { diff --git a/drivers/nvme/target/discovery.c b/drivers/nvme/target/discovery.c index af8aabf05335..1aaf597e81fc 100644 --- a/drivers/nvme/target/discovery.c +++ b/drivers/nvme/target/discovery.c @@ -159,15 +159,15 @@ out: nvmet_req_complete(req, status); } -int nvmet_parse_discovery_cmd(struct nvmet_req *req) +u16 nvmet_parse_discovery_cmd(struct nvmet_req *req) { struct nvme_command *cmd = req->cmd; req->ns = NULL; if (unlikely(!(req->sq->ctrl->csts & NVME_CSTS_RDY))) { - pr_err("nvmet: got cmd %d while not ready\n", - cmd->common.opcode); + pr_err("got cmd %d while not ready\n", + cmd->common.opcode); return NVME_SC_INVALID_OPCODE | NVME_SC_DNR; } @@ -180,8 +180,8 @@ int nvmet_parse_discovery_cmd(struct nvmet_req *req) req->execute = nvmet_execute_get_disc_log_page; return 0; default: - pr_err("nvmet: unsupported get_log_page lid %d\n", - cmd->get_log_page.lid); + pr_err("unsupported get_log_page lid %d\n", + cmd->get_log_page.lid); return NVME_SC_INVALID_OPCODE | NVME_SC_DNR; } case nvme_admin_identify: @@ -192,17 +192,16 @@ int nvmet_parse_discovery_cmd(struct nvmet_req *req) nvmet_execute_identify_disc_ctrl; return 0; default: - pr_err("nvmet: unsupported identify cns %d\n", - cmd->identify.cns); + pr_err("unsupported identify cns %d\n", + cmd->identify.cns); return NVME_SC_INVALID_OPCODE | NVME_SC_DNR; } default: - pr_err("nvmet: unsupported cmd %d\n", - cmd->common.opcode); + pr_err("unsupported cmd %d\n", cmd->common.opcode); return NVME_SC_INVALID_OPCODE | NVME_SC_DNR; } - pr_err("nvmet: unhandled cmd %d\n", cmd->common.opcode); + pr_err("unhandled cmd %d\n", cmd->common.opcode); return NVME_SC_INVALID_OPCODE | NVME_SC_DNR; } diff --git a/drivers/nvme/target/fabrics-cmd.c b/drivers/nvme/target/fabrics-cmd.c index 8bd022af3df6..3cc17269504b 100644 --- a/drivers/nvme/target/fabrics-cmd.c +++ b/drivers/nvme/target/fabrics-cmd.c @@ -73,7 +73,7 @@ static void nvmet_execute_prop_get(struct nvmet_req *req) nvmet_req_complete(req, status); } -int nvmet_parse_fabrics_cmd(struct nvmet_req *req) +u16 nvmet_parse_fabrics_cmd(struct nvmet_req *req) { struct nvme_command *cmd = req->cmd; @@ -122,7 +122,15 @@ static void nvmet_execute_admin_connect(struct nvmet_req *req) struct nvmet_ctrl *ctrl = NULL; u16 status = 0; - d = kmap(sg_page(req->sg)) + req->sg->offset; + d = kmalloc(sizeof(*d), GFP_KERNEL); + if (!d) { + status = NVME_SC_INTERNAL; + goto complete; + } + + status = nvmet_copy_from_sgl(req, 0, d, sizeof(*d)); + if (status) + goto out; /* zero out initial completion result, assign values as needed */ req->rsp->result.u32 = 0; @@ -143,7 +151,7 @@ static void nvmet_execute_admin_connect(struct nvmet_req *req) } status = nvmet_alloc_ctrl(d->subsysnqn, d->hostnqn, req, - le32_to_cpu(c->kato), &ctrl); + le32_to_cpu(c->kato), &ctrl); if (status) goto out; @@ -158,7 +166,8 @@ static void nvmet_execute_admin_connect(struct nvmet_req *req) req->rsp->result.u16 = cpu_to_le16(ctrl->cntlid); out: - kunmap(sg_page(req->sg)); + kfree(d); +complete: nvmet_req_complete(req, status); } @@ -170,7 +179,15 @@ static void nvmet_execute_io_connect(struct nvmet_req *req) u16 qid = le16_to_cpu(c->qid); u16 status = 0; - d = kmap(sg_page(req->sg)) + req->sg->offset; + d = kmalloc(sizeof(*d), GFP_KERNEL); + if (!d) { + status = NVME_SC_INTERNAL; + goto complete; + } + + status = nvmet_copy_from_sgl(req, 0, d, sizeof(*d)); + if (status) + goto out; /* zero out initial completion result, assign values as needed */ req->rsp->result.u32 = 0; @@ -183,8 +200,8 @@ static void nvmet_execute_io_connect(struct nvmet_req *req) } status = nvmet_ctrl_find_get(d->subsysnqn, d->hostnqn, - le16_to_cpu(d->cntlid), - req, &ctrl); + le16_to_cpu(d->cntlid), + req, &ctrl); if (status) goto out; @@ -205,7 +222,8 @@ static void nvmet_execute_io_connect(struct nvmet_req *req) pr_info("adding queue %d to ctrl %d.\n", qid, ctrl->cntlid); out: - kunmap(sg_page(req->sg)); + kfree(d); +complete: nvmet_req_complete(req, status); return; @@ -214,7 +232,7 @@ out_ctrl_put: goto out; } -int nvmet_parse_connect_cmd(struct nvmet_req *req) +u16 nvmet_parse_connect_cmd(struct nvmet_req *req) { struct nvme_command *cmd = req->cmd; diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c index 8f483ee7868c..62eba29c85fb 100644 --- a/drivers/nvme/target/fc.c +++ b/drivers/nvme/target/fc.c @@ -82,10 +82,13 @@ struct nvmet_fc_fcp_iod { enum nvmet_fcp_datadir io_dir; bool active; bool abort; + bool aborted; + bool writedataactive; spinlock_t flock; struct nvmet_req req; struct work_struct work; + struct work_struct done_work; struct nvmet_fc_tgtport *tgtport; struct nvmet_fc_tgt_queue *queue; @@ -116,7 +119,7 @@ struct nvmet_fc_tgt_queue { u16 qid; u16 sqsize; u16 ersp_ratio; - u16 sqhd; + __le16 sqhd; int cpu; atomic_t connected; atomic_t sqtail; @@ -213,6 +216,7 @@ static DEFINE_IDA(nvmet_fc_tgtport_cnt); static void nvmet_fc_handle_ls_rqst_work(struct work_struct *work); static void nvmet_fc_handle_fcp_rqst_work(struct work_struct *work); +static void nvmet_fc_fcp_rqst_op_done_work(struct work_struct *work); static void nvmet_fc_tgt_a_put(struct nvmet_fc_tgt_assoc *assoc); static int nvmet_fc_tgt_a_get(struct nvmet_fc_tgt_assoc *assoc); static void nvmet_fc_tgt_q_put(struct nvmet_fc_tgt_queue *queue); @@ -414,9 +418,13 @@ nvmet_fc_prep_fcp_iodlist(struct nvmet_fc_tgtport *tgtport, for (i = 0; i < queue->sqsize; fod++, i++) { INIT_WORK(&fod->work, nvmet_fc_handle_fcp_rqst_work); + INIT_WORK(&fod->done_work, nvmet_fc_fcp_rqst_op_done_work); fod->tgtport = tgtport; fod->queue = queue; fod->active = false; + fod->abort = false; + fod->aborted = false; + fod->fcpreq = NULL; list_add_tail(&fod->fcp_list, &queue->fod_list); spin_lock_init(&fod->flock); @@ -463,7 +471,6 @@ nvmet_fc_alloc_fcp_iod(struct nvmet_fc_tgt_queue *queue) if (fod) { list_del(&fod->fcp_list); fod->active = true; - fod->abort = false; /* * no queue reference is taken, as it was taken by the * queue lookup just prior to the allocation. The iod @@ -479,17 +486,30 @@ static void nvmet_fc_free_fcp_iod(struct nvmet_fc_tgt_queue *queue, struct nvmet_fc_fcp_iod *fod) { + struct nvmefc_tgt_fcp_req *fcpreq = fod->fcpreq; + struct nvmet_fc_tgtport *tgtport = fod->tgtport; unsigned long flags; + fc_dma_sync_single_for_cpu(tgtport->dev, fod->rspdma, + sizeof(fod->rspiubuf), DMA_TO_DEVICE); + + fcpreq->nvmet_fc_private = NULL; + spin_lock_irqsave(&queue->qlock, flags); list_add_tail(&fod->fcp_list, &fod->queue->fod_list); fod->active = false; + fod->abort = false; + fod->aborted = false; + fod->writedataactive = false; + fod->fcpreq = NULL; spin_unlock_irqrestore(&queue->qlock, flags); /* * release the reference taken at queue lookup and fod allocation */ nvmet_fc_tgt_q_put(queue); + + tgtport->ops->fcp_req_release(&tgtport->fc_target_port, fcpreq); } static int @@ -616,32 +636,12 @@ nvmet_fc_tgt_q_get(struct nvmet_fc_tgt_queue *queue) static void -nvmet_fc_abort_op(struct nvmet_fc_tgtport *tgtport, - struct nvmefc_tgt_fcp_req *fcpreq) -{ - int ret; - - fcpreq->op = NVMET_FCOP_ABORT; - fcpreq->offset = 0; - fcpreq->timeout = 0; - fcpreq->transfer_length = 0; - fcpreq->transferred_length = 0; - fcpreq->fcp_error = 0; - fcpreq->sg_cnt = 0; - - ret = tgtport->ops->fcp_op(&tgtport->fc_target_port, fcpreq); - if (ret) - /* should never reach here !! */ - WARN_ON(1); -} - - -static void nvmet_fc_delete_target_queue(struct nvmet_fc_tgt_queue *queue) { + struct nvmet_fc_tgtport *tgtport = queue->assoc->tgtport; struct nvmet_fc_fcp_iod *fod = queue->fod; unsigned long flags; - int i; + int i, writedataactive; bool disconnect; disconnect = atomic_xchg(&queue->connected, 0); @@ -652,7 +652,20 @@ nvmet_fc_delete_target_queue(struct nvmet_fc_tgt_queue *queue) if (fod->active) { spin_lock(&fod->flock); fod->abort = true; + writedataactive = fod->writedataactive; spin_unlock(&fod->flock); + /* + * only call lldd abort routine if waiting for + * writedata. other outstanding ops should finish + * on their own. + */ + if (writedataactive) { + spin_lock(&fod->flock); + fod->aborted = true; + spin_unlock(&fod->flock); + tgtport->ops->fcp_abort( + &tgtport->fc_target_port, fod->fcpreq); + } } } spin_unlock_irqrestore(&queue->qlock, flags); @@ -846,7 +859,8 @@ nvmet_fc_register_targetport(struct nvmet_fc_port_info *pinfo, int ret, idx; if (!template->xmt_ls_rsp || !template->fcp_op || - !template->targetport_delete || + !template->fcp_abort || + !template->fcp_req_release || !template->targetport_delete || !template->max_hw_queues || !template->max_sgl_segments || !template->max_dif_sgl_segments || !template->dma_boundary) { ret = -EINVAL; @@ -1044,7 +1058,7 @@ EXPORT_SYMBOL_GPL(nvmet_fc_unregister_targetport); static void -nvmet_fc_format_rsp_hdr(void *buf, u8 ls_cmd, u32 desc_len, u8 rqst_ls_cmd) +nvmet_fc_format_rsp_hdr(void *buf, u8 ls_cmd, __be32 desc_len, u8 rqst_ls_cmd) { struct fcnvme_ls_acc_hdr *acc = buf; @@ -1189,8 +1203,8 @@ nvmet_fc_ls_create_association(struct nvmet_fc_tgtport *tgtport, validation_errors[ret]); iod->lsreq->rsplen = nvmet_fc_format_rjt(acc, NVME_FC_MAX_LS_BUFFER_SIZE, rqst->w0.ls_cmd, - ELS_RJT_LOGIC, - ELS_EXPL_NONE, 0); + FCNVME_RJT_RC_LOGIC, + FCNVME_RJT_EXP_NONE, 0); return; } @@ -1281,8 +1295,9 @@ nvmet_fc_ls_create_connection(struct nvmet_fc_tgtport *tgtport, iod->lsreq->rsplen = nvmet_fc_format_rjt(acc, NVME_FC_MAX_LS_BUFFER_SIZE, rqst->w0.ls_cmd, (ret == VERR_NO_ASSOC) ? - ELS_RJT_PROT : ELS_RJT_LOGIC, - ELS_EXPL_NONE, 0); + FCNVME_RJT_RC_INV_ASSOC : + FCNVME_RJT_RC_LOGIC, + FCNVME_RJT_EXP_NONE, 0); return; } @@ -1369,8 +1384,12 @@ nvmet_fc_ls_disconnect(struct nvmet_fc_tgtport *tgtport, validation_errors[ret]); iod->lsreq->rsplen = nvmet_fc_format_rjt(acc, NVME_FC_MAX_LS_BUFFER_SIZE, rqst->w0.ls_cmd, - (ret == 8) ? ELS_RJT_PROT : ELS_RJT_LOGIC, - ELS_EXPL_NONE, 0); + (ret == VERR_NO_ASSOC) ? + FCNVME_RJT_RC_INV_ASSOC : + (ret == VERR_NO_CONN) ? + FCNVME_RJT_RC_INV_CONN : + FCNVME_RJT_RC_LOGIC, + FCNVME_RJT_EXP_NONE, 0); return; } @@ -1479,7 +1498,7 @@ nvmet_fc_handle_ls_rqst(struct nvmet_fc_tgtport *tgtport, default: iod->lsreq->rsplen = nvmet_fc_format_rjt(iod->rspbuf, NVME_FC_MAX_LS_BUFFER_SIZE, w0->ls_cmd, - ELS_RJT_INVAL, ELS_EXPL_NONE, 0); + FCNVME_RJT_RC_INVAL, FCNVME_RJT_EXP_NONE, 0); } nvmet_fc_xmt_ls_rsp(tgtport, iod); @@ -1619,6 +1638,8 @@ nvmet_fc_free_tgt_pgs(struct nvmet_fc_fcp_iod *fod) for_each_sg(fod->data_sg, sg, fod->data_sg_cnt, count) __free_page(sg_page(sg)); kfree(fod->data_sg); + fod->data_sg = NULL; + fod->data_sg_cnt = 0; } @@ -1679,7 +1700,7 @@ nvmet_fc_prep_fcp_rsp(struct nvmet_fc_tgtport *tgtport, xfr_length != fod->total_length || (le16_to_cpu(cqe->status) & 0xFFFE) || cqewd[0] || cqewd[1] || (sqe->flags & (NVME_CMD_FUSE_FIRST | NVME_CMD_FUSE_SECOND)) || - queue_90percent_full(fod->queue, cqe->sq_head)) + queue_90percent_full(fod->queue, le16_to_cpu(cqe->sq_head))) send_ersp = true; /* re-set the fields */ @@ -1704,6 +1725,26 @@ nvmet_fc_prep_fcp_rsp(struct nvmet_fc_tgtport *tgtport, static void nvmet_fc_xmt_fcp_op_done(struct nvmefc_tgt_fcp_req *fcpreq); static void +nvmet_fc_abort_op(struct nvmet_fc_tgtport *tgtport, + struct nvmet_fc_fcp_iod *fod) +{ + struct nvmefc_tgt_fcp_req *fcpreq = fod->fcpreq; + + /* data no longer needed */ + nvmet_fc_free_tgt_pgs(fod); + + /* + * if an ABTS was received or we issued the fcp_abort early + * don't call abort routine again. + */ + /* no need to take lock - lock was taken earlier to get here */ + if (!fod->aborted) + tgtport->ops->fcp_abort(&tgtport->fc_target_port, fcpreq); + + nvmet_fc_free_fcp_iod(fod->queue, fod); +} + +static void nvmet_fc_xmt_fcp_rsp(struct nvmet_fc_tgtport *tgtport, struct nvmet_fc_fcp_iod *fod) { @@ -1716,7 +1757,7 @@ nvmet_fc_xmt_fcp_rsp(struct nvmet_fc_tgtport *tgtport, ret = tgtport->ops->fcp_op(&tgtport->fc_target_port, fod->fcpreq); if (ret) - nvmet_fc_abort_op(tgtport, fod->fcpreq); + nvmet_fc_abort_op(tgtport, fod); } static void @@ -1725,6 +1766,7 @@ nvmet_fc_transfer_fcp_data(struct nvmet_fc_tgtport *tgtport, { struct nvmefc_tgt_fcp_req *fcpreq = fod->fcpreq; struct scatterlist *sg, *datasg; + unsigned long flags; u32 tlen, sg_off; int ret; @@ -1789,10 +1831,13 @@ nvmet_fc_transfer_fcp_data(struct nvmet_fc_tgtport *tgtport, */ fod->abort = true; - if (op == NVMET_FCOP_WRITEDATA) + if (op == NVMET_FCOP_WRITEDATA) { + spin_lock_irqsave(&fod->flock, flags); + fod->writedataactive = false; + spin_unlock_irqrestore(&fod->flock, flags); nvmet_req_complete(&fod->req, NVME_SC_FC_TRANSPORT_ERROR); - else /* NVMET_FCOP_READDATA or NVMET_FCOP_READDATA_RSP */ { + } else /* NVMET_FCOP_READDATA or NVMET_FCOP_READDATA_RSP */ { fcpreq->fcp_error = ret; fcpreq->transferred_length = 0; nvmet_fc_xmt_fcp_op_done(fod->fcpreq); @@ -1800,32 +1845,54 @@ nvmet_fc_transfer_fcp_data(struct nvmet_fc_tgtport *tgtport, } } +static inline bool +__nvmet_fc_fod_op_abort(struct nvmet_fc_fcp_iod *fod, bool abort) +{ + struct nvmefc_tgt_fcp_req *fcpreq = fod->fcpreq; + struct nvmet_fc_tgtport *tgtport = fod->tgtport; + + /* if in the middle of an io and we need to tear down */ + if (abort) { + if (fcpreq->op == NVMET_FCOP_WRITEDATA) { + nvmet_req_complete(&fod->req, + NVME_SC_FC_TRANSPORT_ERROR); + return true; + } + + nvmet_fc_abort_op(tgtport, fod); + return true; + } + + return false; +} + +/* + * actual done handler for FCP operations when completed by the lldd + */ static void -nvmet_fc_xmt_fcp_op_done(struct nvmefc_tgt_fcp_req *fcpreq) +nvmet_fc_fod_op_done(struct nvmet_fc_fcp_iod *fod) { - struct nvmet_fc_fcp_iod *fod = fcpreq->nvmet_fc_private; + struct nvmefc_tgt_fcp_req *fcpreq = fod->fcpreq; struct nvmet_fc_tgtport *tgtport = fod->tgtport; unsigned long flags; bool abort; spin_lock_irqsave(&fod->flock, flags); abort = fod->abort; + fod->writedataactive = false; spin_unlock_irqrestore(&fod->flock, flags); - /* if in the middle of an io and we need to tear down */ - if (abort && fcpreq->op != NVMET_FCOP_ABORT) { - /* data no longer needed */ - nvmet_fc_free_tgt_pgs(fod); - - nvmet_req_complete(&fod->req, fcpreq->fcp_error); - return; - } - switch (fcpreq->op) { case NVMET_FCOP_WRITEDATA: + if (__nvmet_fc_fod_op_abort(fod, abort)) + return; if (fcpreq->fcp_error || fcpreq->transferred_length != fcpreq->transfer_length) { + spin_lock(&fod->flock); + fod->abort = true; + spin_unlock(&fod->flock); + nvmet_req_complete(&fod->req, NVME_SC_FC_TRANSPORT_ERROR); return; @@ -1833,6 +1900,10 @@ nvmet_fc_xmt_fcp_op_done(struct nvmefc_tgt_fcp_req *fcpreq) fod->offset += fcpreq->transferred_length; if (fod->offset != fod->total_length) { + spin_lock_irqsave(&fod->flock, flags); + fod->writedataactive = true; + spin_unlock_irqrestore(&fod->flock, flags); + /* transfer the next chunk */ nvmet_fc_transfer_fcp_data(tgtport, fod, NVMET_FCOP_WRITEDATA); @@ -1847,12 +1918,11 @@ nvmet_fc_xmt_fcp_op_done(struct nvmefc_tgt_fcp_req *fcpreq) case NVMET_FCOP_READDATA: case NVMET_FCOP_READDATA_RSP: + if (__nvmet_fc_fod_op_abort(fod, abort)) + return; if (fcpreq->fcp_error || fcpreq->transferred_length != fcpreq->transfer_length) { - /* data no longer needed */ - nvmet_fc_free_tgt_pgs(fod); - - nvmet_fc_abort_op(tgtport, fod->fcpreq); + nvmet_fc_abort_op(tgtport, fod); return; } @@ -1861,8 +1931,6 @@ nvmet_fc_xmt_fcp_op_done(struct nvmefc_tgt_fcp_req *fcpreq) if (fcpreq->op == NVMET_FCOP_READDATA_RSP) { /* data no longer needed */ nvmet_fc_free_tgt_pgs(fod); - fc_dma_sync_single_for_cpu(tgtport->dev, fod->rspdma, - sizeof(fod->rspiubuf), DMA_TO_DEVICE); nvmet_fc_free_fcp_iod(fod->queue, fod); return; } @@ -1885,19 +1953,38 @@ nvmet_fc_xmt_fcp_op_done(struct nvmefc_tgt_fcp_req *fcpreq) break; case NVMET_FCOP_RSP: - case NVMET_FCOP_ABORT: - fc_dma_sync_single_for_cpu(tgtport->dev, fod->rspdma, - sizeof(fod->rspiubuf), DMA_TO_DEVICE); + if (__nvmet_fc_fod_op_abort(fod, abort)) + return; nvmet_fc_free_fcp_iod(fod->queue, fod); break; default: - nvmet_fc_free_tgt_pgs(fod); - nvmet_fc_abort_op(tgtport, fod->fcpreq); break; } } +static void +nvmet_fc_fcp_rqst_op_done_work(struct work_struct *work) +{ + struct nvmet_fc_fcp_iod *fod = + container_of(work, struct nvmet_fc_fcp_iod, done_work); + + nvmet_fc_fod_op_done(fod); +} + +static void +nvmet_fc_xmt_fcp_op_done(struct nvmefc_tgt_fcp_req *fcpreq) +{ + struct nvmet_fc_fcp_iod *fod = fcpreq->nvmet_fc_private; + struct nvmet_fc_tgt_queue *queue = fod->queue; + + if (fod->tgtport->ops->target_features & NVMET_FCTGTFEAT_OPDONE_IN_ISR) + /* context switch so completion is not in ISR context */ + queue_work_on(queue->cpu, queue->work_q, &fod->done_work); + else + nvmet_fc_fod_op_done(fod); +} + /* * actual completion handler after execution by the nvmet layer */ @@ -1919,10 +2006,7 @@ __nvmet_fc_fcp_nvme_cmd_done(struct nvmet_fc_tgtport *tgtport, fod->queue->sqhd = cqe->sq_head; if (abort) { - /* data no longer needed */ - nvmet_fc_free_tgt_pgs(fod); - - nvmet_fc_abort_op(tgtport, fod->fcpreq); + nvmet_fc_abort_op(tgtport, fod); return; } @@ -1971,7 +2055,7 @@ nvmet_fc_fcp_nvme_cmd_done(struct nvmet_req *nvme_req) /* * Actual processing routine for received FC-NVME LS Requests from the LLD */ -void +static void nvmet_fc_handle_fcp_rqst(struct nvmet_fc_tgtport *tgtport, struct nvmet_fc_fcp_iod *fod) { @@ -2018,8 +2102,8 @@ nvmet_fc_handle_fcp_rqst(struct nvmet_fc_tgtport *tgtport, &fod->queue->nvme_cq, &fod->queue->nvme_sq, &nvmet_fc_tgt_fcp_ops); - if (!ret) { /* bad SQE content */ - nvmet_fc_abort_op(tgtport, fod->fcpreq); + if (!ret) { /* bad SQE content or invalid ctrl state */ + nvmet_fc_abort_op(tgtport, fod); return; } @@ -2059,7 +2143,7 @@ nvmet_fc_handle_fcp_rqst(struct nvmet_fc_tgtport *tgtport, return; transport_error: - nvmet_fc_abort_op(tgtport, fod->fcpreq); + nvmet_fc_abort_op(tgtport, fod); } /* @@ -2089,7 +2173,7 @@ nvmet_fc_handle_fcp_rqst_work(struct work_struct *work) * If this routine returns error, the lldd should abort the exchange. * * @target_port: pointer to the (registered) target port the FCP CMD IU - * was receive on. + * was received on. * @fcpreq: pointer to a fcpreq request structure to be used to reference * the exchange corresponding to the FCP Exchange. * @cmdiubuf: pointer to the buffer containing the FCP CMD IU @@ -2112,7 +2196,6 @@ nvmet_fc_rcv_fcp_req(struct nvmet_fc_target_port *target_port, (be16_to_cpu(cmdiu->iu_len) != (sizeof(*cmdiu)/4))) return -EIO; - queue = nvmet_fc_find_target_queue(tgtport, be64_to_cpu(cmdiu->connection_id)); if (!queue) @@ -2142,12 +2225,68 @@ nvmet_fc_rcv_fcp_req(struct nvmet_fc_target_port *target_port, ((queue->qid - 1) % tgtport->ops->max_hw_queues) : 0; memcpy(&fod->cmdiubuf, cmdiubuf, cmdiubuf_len); - queue_work_on(queue->cpu, queue->work_q, &fod->work); + if (tgtport->ops->target_features & NVMET_FCTGTFEAT_CMD_IN_ISR) + queue_work_on(queue->cpu, queue->work_q, &fod->work); + else + nvmet_fc_handle_fcp_rqst(tgtport, fod); return 0; } EXPORT_SYMBOL_GPL(nvmet_fc_rcv_fcp_req); +/** + * nvmet_fc_rcv_fcp_abort - transport entry point called by an LLDD + * upon the reception of an ABTS for a FCP command + * + * Notify the transport that an ABTS has been received for a FCP command + * that had been given to the transport via nvmet_fc_rcv_fcp_req(). The + * LLDD believes the command is still being worked on + * (template_ops->fcp_req_release() has not been called). + * + * The transport will wait for any outstanding work (an op to the LLDD, + * which the lldd should complete with error due to the ABTS; or the + * completion from the nvmet layer of the nvme command), then will + * stop processing and call the nvmet_fc_rcv_fcp_req() callback to + * return the i/o context to the LLDD. The LLDD may send the BA_ACC + * to the ABTS either after return from this function (assuming any + * outstanding op work has been terminated) or upon the callback being + * called. + * + * @target_port: pointer to the (registered) target port the FCP CMD IU + * was received on. + * @fcpreq: pointer to the fcpreq request structure that corresponds + * to the exchange that received the ABTS. + */ +void +nvmet_fc_rcv_fcp_abort(struct nvmet_fc_target_port *target_port, + struct nvmefc_tgt_fcp_req *fcpreq) +{ + struct nvmet_fc_fcp_iod *fod = fcpreq->nvmet_fc_private; + struct nvmet_fc_tgt_queue *queue; + unsigned long flags; + + if (!fod || fod->fcpreq != fcpreq) + /* job appears to have already completed, ignore abort */ + return; + + queue = fod->queue; + + spin_lock_irqsave(&queue->qlock, flags); + if (fod->active) { + /* + * mark as abort. The abort handler, invoked upon completion + * of any work, will detect the aborted status and do the + * callback. + */ + spin_lock(&fod->flock); + fod->abort = true; + fod->aborted = true; + spin_unlock(&fod->flock); + } + spin_unlock_irqrestore(&queue->qlock, flags); +} +EXPORT_SYMBOL_GPL(nvmet_fc_rcv_fcp_abort); + enum { FCT_TRADDR_ERR = 0, FCT_TRADDR_WWNN = 1 << 0, @@ -2177,7 +2316,7 @@ nvmet_fc_parse_traddr(struct nvmet_fc_traddr *traddr, char *buf) if (!options) return -ENOMEM; - while ((p = strsep(&o, ",\n")) != NULL) { + while ((p = strsep(&o, ":\n")) != NULL) { if (!*p) continue; @@ -2238,6 +2377,7 @@ nvmet_fc_add_port(struct nvmet_port *port) if (!tgtport->port) { tgtport->port = port; port->priv = tgtport; + nvmet_fc_tgtport_get(tgtport); ret = 0; } else ret = -EALREADY; diff --git a/drivers/nvme/target/fcloop.c b/drivers/nvme/target/fcloop.c index 4e8e6a22bce1..15551ef79c8c 100644 --- a/drivers/nvme/target/fcloop.c +++ b/drivers/nvme/target/fcloop.c @@ -246,11 +246,19 @@ struct fcloop_lsreq { struct fcloop_fcpreq { struct fcloop_tport *tport; struct nvmefc_fcp_req *fcpreq; + spinlock_t reqlock; u16 status; + bool active; + bool aborted; struct work_struct work; struct nvmefc_tgt_fcp_req tgt_fcp_req; }; +struct fcloop_ini_fcpreq { + struct nvmefc_fcp_req *fcpreq; + struct fcloop_fcpreq *tfcp_req; + struct work_struct iniwork; +}; static inline struct fcloop_lsreq * tgt_ls_req_to_lsreq(struct nvmefc_tgt_ls_req *tgt_lsreq) @@ -341,7 +349,21 @@ fcloop_xmt_ls_rsp(struct nvmet_fc_target_port *tport, } /* - * FCP IO operation done. call back up initiator "done" flows. + * FCP IO operation done by initiator abort. + * call back up initiator "done" flows. + */ +static void +fcloop_tgt_fcprqst_ini_done_work(struct work_struct *work) +{ + struct fcloop_ini_fcpreq *inireq = + container_of(work, struct fcloop_ini_fcpreq, iniwork); + + inireq->fcpreq->done(inireq->fcpreq); +} + +/* + * FCP IO operation done by target completion. + * call back up initiator "done" flows. */ static void fcloop_tgt_fcprqst_done_work(struct work_struct *work) @@ -349,12 +371,18 @@ fcloop_tgt_fcprqst_done_work(struct work_struct *work) struct fcloop_fcpreq *tfcp_req = container_of(work, struct fcloop_fcpreq, work); struct fcloop_tport *tport = tfcp_req->tport; - struct nvmefc_fcp_req *fcpreq = tfcp_req->fcpreq; + struct nvmefc_fcp_req *fcpreq; - if (tport->remoteport) { + spin_lock(&tfcp_req->reqlock); + fcpreq = tfcp_req->fcpreq; + spin_unlock(&tfcp_req->reqlock); + + if (tport->remoteport && fcpreq) { fcpreq->status = tfcp_req->status; fcpreq->done(fcpreq); } + + kfree(tfcp_req); } @@ -364,20 +392,25 @@ fcloop_fcp_req(struct nvme_fc_local_port *localport, void *hw_queue_handle, struct nvmefc_fcp_req *fcpreq) { - struct fcloop_fcpreq *tfcp_req = fcpreq->private; struct fcloop_rport *rport = remoteport->private; + struct fcloop_ini_fcpreq *inireq = fcpreq->private; + struct fcloop_fcpreq *tfcp_req; int ret = 0; - INIT_WORK(&tfcp_req->work, fcloop_tgt_fcprqst_done_work); + if (!rport->targetport) + return -ECONNREFUSED; - if (!rport->targetport) { - tfcp_req->status = NVME_SC_FC_TRANSPORT_ERROR; - schedule_work(&tfcp_req->work); - return ret; - } + tfcp_req = kzalloc(sizeof(*tfcp_req), GFP_KERNEL); + if (!tfcp_req) + return -ENOMEM; + inireq->fcpreq = fcpreq; + inireq->tfcp_req = tfcp_req; + INIT_WORK(&inireq->iniwork, fcloop_tgt_fcprqst_ini_done_work); tfcp_req->fcpreq = fcpreq; tfcp_req->tport = rport->targetport->private; + spin_lock_init(&tfcp_req->reqlock); + INIT_WORK(&tfcp_req->work, fcloop_tgt_fcprqst_done_work); ret = nvmet_fc_rcv_fcp_req(rport->targetport, &tfcp_req->tgt_fcp_req, fcpreq->cmdaddr, fcpreq->cmdlen); @@ -444,63 +477,129 @@ fcloop_fcp_op(struct nvmet_fc_target_port *tgtport, struct nvmefc_tgt_fcp_req *tgt_fcpreq) { struct fcloop_fcpreq *tfcp_req = tgt_fcp_req_to_fcpreq(tgt_fcpreq); - struct nvmefc_fcp_req *fcpreq = tfcp_req->fcpreq; + struct nvmefc_fcp_req *fcpreq; u32 rsplen = 0, xfrlen = 0; - int fcp_err = 0; + int fcp_err = 0, active, aborted; u8 op = tgt_fcpreq->op; + spin_lock(&tfcp_req->reqlock); + fcpreq = tfcp_req->fcpreq; + active = tfcp_req->active; + aborted = tfcp_req->aborted; + tfcp_req->active = true; + spin_unlock(&tfcp_req->reqlock); + + if (unlikely(active)) + /* illegal - call while i/o active */ + return -EALREADY; + + if (unlikely(aborted)) { + /* target transport has aborted i/o prior */ + spin_lock(&tfcp_req->reqlock); + tfcp_req->active = false; + spin_unlock(&tfcp_req->reqlock); + tgt_fcpreq->transferred_length = 0; + tgt_fcpreq->fcp_error = -ECANCELED; + tgt_fcpreq->done(tgt_fcpreq); + return 0; + } + + /* + * if fcpreq is NULL, the I/O has been aborted (from + * initiator side). For the target side, act as if all is well + * but don't actually move data. + */ + switch (op) { case NVMET_FCOP_WRITEDATA: xfrlen = tgt_fcpreq->transfer_length; - fcloop_fcp_copy_data(op, tgt_fcpreq->sg, fcpreq->first_sgl, - tgt_fcpreq->offset, xfrlen); - fcpreq->transferred_length += xfrlen; + if (fcpreq) { + fcloop_fcp_copy_data(op, tgt_fcpreq->sg, + fcpreq->first_sgl, tgt_fcpreq->offset, + xfrlen); + fcpreq->transferred_length += xfrlen; + } break; case NVMET_FCOP_READDATA: case NVMET_FCOP_READDATA_RSP: xfrlen = tgt_fcpreq->transfer_length; - fcloop_fcp_copy_data(op, tgt_fcpreq->sg, fcpreq->first_sgl, - tgt_fcpreq->offset, xfrlen); - fcpreq->transferred_length += xfrlen; + if (fcpreq) { + fcloop_fcp_copy_data(op, tgt_fcpreq->sg, + fcpreq->first_sgl, tgt_fcpreq->offset, + xfrlen); + fcpreq->transferred_length += xfrlen; + } if (op == NVMET_FCOP_READDATA) break; /* Fall-Thru to RSP handling */ case NVMET_FCOP_RSP: - rsplen = ((fcpreq->rsplen < tgt_fcpreq->rsplen) ? - fcpreq->rsplen : tgt_fcpreq->rsplen); - memcpy(fcpreq->rspaddr, tgt_fcpreq->rspaddr, rsplen); - if (rsplen < tgt_fcpreq->rsplen) - fcp_err = -E2BIG; - fcpreq->rcv_rsplen = rsplen; - fcpreq->status = 0; + if (fcpreq) { + rsplen = ((fcpreq->rsplen < tgt_fcpreq->rsplen) ? + fcpreq->rsplen : tgt_fcpreq->rsplen); + memcpy(fcpreq->rspaddr, tgt_fcpreq->rspaddr, rsplen); + if (rsplen < tgt_fcpreq->rsplen) + fcp_err = -E2BIG; + fcpreq->rcv_rsplen = rsplen; + fcpreq->status = 0; + } tfcp_req->status = 0; break; - case NVMET_FCOP_ABORT: - tfcp_req->status = NVME_SC_FC_TRANSPORT_ABORTED; - break; - default: fcp_err = -EINVAL; break; } + spin_lock(&tfcp_req->reqlock); + tfcp_req->active = false; + spin_unlock(&tfcp_req->reqlock); + tgt_fcpreq->transferred_length = xfrlen; tgt_fcpreq->fcp_error = fcp_err; tgt_fcpreq->done(tgt_fcpreq); - if ((!fcp_err) && (op == NVMET_FCOP_RSP || - op == NVMET_FCOP_READDATA_RSP || - op == NVMET_FCOP_ABORT)) - schedule_work(&tfcp_req->work); - return 0; } static void +fcloop_tgt_fcp_abort(struct nvmet_fc_target_port *tgtport, + struct nvmefc_tgt_fcp_req *tgt_fcpreq) +{ + struct fcloop_fcpreq *tfcp_req = tgt_fcp_req_to_fcpreq(tgt_fcpreq); + int active; + + /* + * mark aborted only in case there were 2 threads in transport + * (one doing io, other doing abort) and only kills ops posted + * after the abort request + */ + spin_lock(&tfcp_req->reqlock); + active = tfcp_req->active; + tfcp_req->aborted = true; + spin_unlock(&tfcp_req->reqlock); + + tfcp_req->status = NVME_SC_FC_TRANSPORT_ABORTED; + + /* + * nothing more to do. If io wasn't active, the transport should + * immediately call the req_release. If it was active, the op + * will complete, and the lldd should call req_release. + */ +} + +static void +fcloop_fcp_req_release(struct nvmet_fc_target_port *tgtport, + struct nvmefc_tgt_fcp_req *tgt_fcpreq) +{ + struct fcloop_fcpreq *tfcp_req = tgt_fcp_req_to_fcpreq(tgt_fcpreq); + + schedule_work(&tfcp_req->work); +} + +static void fcloop_ls_abort(struct nvme_fc_local_port *localport, struct nvme_fc_remote_port *remoteport, struct nvmefc_ls_req *lsreq) @@ -513,6 +612,27 @@ fcloop_fcp_abort(struct nvme_fc_local_port *localport, void *hw_queue_handle, struct nvmefc_fcp_req *fcpreq) { + struct fcloop_rport *rport = remoteport->private; + struct fcloop_ini_fcpreq *inireq = fcpreq->private; + struct fcloop_fcpreq *tfcp_req = inireq->tfcp_req; + + if (!tfcp_req) + /* abort has already been called */ + return; + + if (rport->targetport) + nvmet_fc_rcv_fcp_abort(rport->targetport, + &tfcp_req->tgt_fcp_req); + + /* break initiator/target relationship for io */ + spin_lock(&tfcp_req->reqlock); + inireq->tfcp_req = NULL; + tfcp_req->fcpreq = NULL; + spin_unlock(&tfcp_req->reqlock); + + /* post the aborted io completion */ + fcpreq->status = -ECANCELED; + schedule_work(&inireq->iniwork); } static void @@ -546,7 +666,7 @@ fcloop_targetport_delete(struct nvmet_fc_target_port *targetport) #define FCLOOP_SGL_SEGS 256 #define FCLOOP_DMABOUND_4G 0xFFFFFFFF -struct nvme_fc_port_template fctemplate = { +static struct nvme_fc_port_template fctemplate = { .localport_delete = fcloop_localport_delete, .remoteport_delete = fcloop_remoteport_delete, .create_queue = fcloop_create_queue, @@ -563,20 +683,23 @@ struct nvme_fc_port_template fctemplate = { .local_priv_sz = sizeof(struct fcloop_lport), .remote_priv_sz = sizeof(struct fcloop_rport), .lsrqst_priv_sz = sizeof(struct fcloop_lsreq), - .fcprqst_priv_sz = sizeof(struct fcloop_fcpreq), + .fcprqst_priv_sz = sizeof(struct fcloop_ini_fcpreq), }; -struct nvmet_fc_target_template tgttemplate = { +static struct nvmet_fc_target_template tgttemplate = { .targetport_delete = fcloop_targetport_delete, .xmt_ls_rsp = fcloop_xmt_ls_rsp, .fcp_op = fcloop_fcp_op, + .fcp_abort = fcloop_tgt_fcp_abort, + .fcp_req_release = fcloop_fcp_req_release, .max_hw_queues = FCLOOP_HW_QUEUES, .max_sgl_segments = FCLOOP_SGL_SEGS, .max_dif_sgl_segments = FCLOOP_SGL_SEGS, .dma_boundary = FCLOOP_DMABOUND_4G, /* optional features */ - .target_features = NVMET_FCTGTFEAT_READDATA_RSP | - NVMET_FCTGTFEAT_NEEDS_CMD_CPUSCHED, + .target_features = NVMET_FCTGTFEAT_CMD_IN_ISR | + NVMET_FCTGTFEAT_NEEDS_CMD_CPUSCHED | + NVMET_FCTGTFEAT_OPDONE_IN_ISR, /* sizes of additional private data for data structures */ .target_priv_sz = sizeof(struct fcloop_tport), }; diff --git a/drivers/nvme/target/io-cmd.c b/drivers/nvme/target/io-cmd.c index 4195115c7e54..c77940d80fc8 100644 --- a/drivers/nvme/target/io-cmd.c +++ b/drivers/nvme/target/io-cmd.c @@ -180,11 +180,11 @@ static void nvmet_execute_write_zeroes(struct nvmet_req *req) sector = le64_to_cpu(write_zeroes->slba) << (req->ns->blksize_shift - 9); - nr_sector = (((sector_t)le32_to_cpu(write_zeroes->length)) << + nr_sector = (((sector_t)le16_to_cpu(write_zeroes->length)) << (req->ns->blksize_shift - 9)) + 1; if (__blkdev_issue_zeroout(req->ns->bdev, sector, nr_sector, - GFP_KERNEL, &bio, true)) + GFP_KERNEL, &bio, 0)) status = NVME_SC_INTERNAL | NVME_SC_DNR; if (bio) { @@ -196,26 +196,19 @@ static void nvmet_execute_write_zeroes(struct nvmet_req *req) } } -int nvmet_parse_io_cmd(struct nvmet_req *req) +u16 nvmet_parse_io_cmd(struct nvmet_req *req) { struct nvme_command *cmd = req->cmd; + u16 ret; - if (unlikely(!(req->sq->ctrl->cc & NVME_CC_ENABLE))) { - pr_err("nvmet: got io cmd %d while CC.EN == 0\n", - cmd->common.opcode); + ret = nvmet_check_ctrl_status(req, cmd); + if (unlikely(ret)) { req->ns = NULL; - return NVME_SC_CMD_SEQ_ERROR | NVME_SC_DNR; - } - - if (unlikely(!(req->sq->ctrl->csts & NVME_CSTS_RDY))) { - pr_err("nvmet: got io cmd %d while CSTS.RDY == 0\n", - cmd->common.opcode); - req->ns = NULL; - return NVME_SC_CMD_SEQ_ERROR | NVME_SC_DNR; + return ret; } req->ns = nvmet_find_namespace(req->sq->ctrl, cmd->rw.nsid); - if (!req->ns) + if (unlikely(!req->ns)) return NVME_SC_INVALID_NS | NVME_SC_DNR; switch (cmd->common.opcode) { @@ -230,14 +223,15 @@ int nvmet_parse_io_cmd(struct nvmet_req *req) return 0; case nvme_cmd_dsm: req->execute = nvmet_execute_dsm; - req->data_len = le32_to_cpu(cmd->dsm.nr + 1) * + req->data_len = (le32_to_cpu(cmd->dsm.nr) + 1) * sizeof(struct nvme_dsm_range); return 0; case nvme_cmd_write_zeroes: req->execute = nvmet_execute_write_zeroes; return 0; default: - pr_err("nvmet: unhandled cmd %d\n", cmd->common.opcode); + pr_err("unhandled cmd %d on qid %d\n", cmd->common.opcode, + req->sq->qid); return NVME_SC_INVALID_OPCODE | NVME_SC_DNR; } } diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c index d1f06e7768ff..feb497134aee 100644 --- a/drivers/nvme/target/loop.c +++ b/drivers/nvme/target/loop.c @@ -13,12 +13,10 @@ */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/scatterlist.h> -#include <linux/delay.h> #include <linux/blk-mq.h> #include <linux/nvme.h> #include <linux/module.h> #include <linux/parser.h> -#include <linux/t10-pi.h> #include "nvmet.h" #include "../host/nvme.h" #include "../host/fabrics.h" @@ -93,31 +91,26 @@ static inline int nvme_loop_queue_idx(struct nvme_loop_queue *queue) static void nvme_loop_complete_rq(struct request *req) { struct nvme_loop_iod *iod = blk_mq_rq_to_pdu(req); - int error = 0; nvme_cleanup_cmd(req); sg_free_table_chained(&iod->sg_table, true); + nvme_complete_rq(req); +} - if (unlikely(req->errors)) { - if (nvme_req_needs_retry(req, req->errors)) { - nvme_requeue_req(req); - return; - } - - if (blk_rq_is_passthrough(req)) - error = req->errors; - else - error = nvme_error_status(req->errors); - } +static struct blk_mq_tags *nvme_loop_tagset(struct nvme_loop_queue *queue) +{ + u32 queue_idx = nvme_loop_queue_idx(queue); - blk_mq_end_request(req, error); + if (queue_idx == 0) + return queue->ctrl->admin_tag_set.tags[queue_idx]; + return queue->ctrl->tag_set.tags[queue_idx - 1]; } static void nvme_loop_queue_response(struct nvmet_req *req) { - struct nvme_loop_iod *iod = - container_of(req, struct nvme_loop_iod, req); - struct nvme_completion *cqe = &iod->rsp; + struct nvme_loop_queue *queue = + container_of(req->sq, struct nvme_loop_queue, nvme_sq); + struct nvme_completion *cqe = req->rsp; /* * AEN requests are special as they don't time out and can @@ -125,15 +118,22 @@ static void nvme_loop_queue_response(struct nvmet_req *req) * aborts. We don't even bother to allocate a struct request * for them but rather special case them here. */ - if (unlikely(nvme_loop_queue_idx(iod->queue) == 0 && + if (unlikely(nvme_loop_queue_idx(queue) == 0 && cqe->command_id >= NVME_LOOP_AQ_BLKMQ_DEPTH)) { - nvme_complete_async_event(&iod->queue->ctrl->ctrl, cqe->status, + nvme_complete_async_event(&queue->ctrl->ctrl, cqe->status, &cqe->result); } else { - struct request *rq = blk_mq_rq_from_pdu(iod); + struct request *rq; + + rq = blk_mq_tag_to_rq(nvme_loop_tagset(queue), cqe->command_id); + if (!rq) { + dev_err(queue->ctrl->ctrl.device, + "tag 0x%x on queue %d not found\n", + cqe->command_id, nvme_loop_queue_idx(queue)); + return; + } - iod->nvme_req.result = cqe->result; - blk_mq_complete_request(rq, le16_to_cpu(cqe->status) >> 1); + nvme_end_request(rq, cqe->status, cqe->result); } } @@ -154,7 +154,7 @@ nvme_loop_timeout(struct request *rq, bool reserved) schedule_work(&iod->queue->ctrl->reset_work); /* fail with DNR on admin cmd timeout */ - rq->errors = NVME_SC_ABORT_REQ | NVME_SC_DNR; + nvme_req(rq)->status = NVME_SC_ABORT_REQ | NVME_SC_DNR; return BLK_EH_HANDLED; } @@ -223,8 +223,6 @@ static void nvme_loop_submit_async_event(struct nvme_ctrl *arg, int aer_idx) static int nvme_loop_init_iod(struct nvme_loop_ctrl *ctrl, struct nvme_loop_iod *iod, unsigned int queue_idx) { - BUG_ON(queue_idx >= ctrl->queue_count); - iod->req.cmd = &iod->cmd; iod->req.rsp = &iod->rsp; iod->queue = &ctrl->queues[queue_idx]; @@ -232,18 +230,19 @@ static int nvme_loop_init_iod(struct nvme_loop_ctrl *ctrl, return 0; } -static int nvme_loop_init_request(void *data, struct request *req, - unsigned int hctx_idx, unsigned int rq_idx, - unsigned int numa_node) +static int nvme_loop_init_request(struct blk_mq_tag_set *set, + struct request *req, unsigned int hctx_idx, + unsigned int numa_node) { - return nvme_loop_init_iod(data, blk_mq_rq_to_pdu(req), hctx_idx + 1); + return nvme_loop_init_iod(set->driver_data, blk_mq_rq_to_pdu(req), + hctx_idx + 1); } -static int nvme_loop_init_admin_request(void *data, struct request *req, - unsigned int hctx_idx, unsigned int rq_idx, - unsigned int numa_node) +static int nvme_loop_init_admin_request(struct blk_mq_tag_set *set, + struct request *req, unsigned int hctx_idx, + unsigned int numa_node) { - return nvme_loop_init_iod(data, blk_mq_rq_to_pdu(req), 0); + return nvme_loop_init_iod(set->driver_data, blk_mq_rq_to_pdu(req), 0); } static int nvme_loop_init_hctx(struct blk_mq_hw_ctx *hctx, void *data, @@ -270,7 +269,7 @@ static int nvme_loop_init_admin_hctx(struct blk_mq_hw_ctx *hctx, void *data, return 0; } -static struct blk_mq_ops nvme_loop_mq_ops = { +static const struct blk_mq_ops nvme_loop_mq_ops = { .queue_rq = nvme_loop_queue_rq, .complete = nvme_loop_complete_rq, .init_request = nvme_loop_init_request, @@ -278,7 +277,7 @@ static struct blk_mq_ops nvme_loop_mq_ops = { .timeout = nvme_loop_timeout, }; -static struct blk_mq_ops nvme_loop_admin_mq_ops = { +static const struct blk_mq_ops nvme_loop_admin_mq_ops = { .queue_rq = nvme_loop_queue_rq, .complete = nvme_loop_complete_rq, .init_request = nvme_loop_init_admin_request, @@ -288,9 +287,9 @@ static struct blk_mq_ops nvme_loop_admin_mq_ops = { static void nvme_loop_destroy_admin_queue(struct nvme_loop_ctrl *ctrl) { + nvmet_sq_destroy(&ctrl->queues[0].nvme_sq); blk_cleanup_queue(ctrl->ctrl.admin_q); blk_mq_free_tag_set(&ctrl->admin_tag_set); - nvmet_sq_destroy(&ctrl->queues[0].nvme_sq); } static void nvme_loop_free_ctrl(struct nvme_ctrl *nctrl) @@ -314,6 +313,56 @@ free_ctrl: kfree(ctrl); } +static void nvme_loop_destroy_io_queues(struct nvme_loop_ctrl *ctrl) +{ + int i; + + for (i = 1; i < ctrl->queue_count; i++) + nvmet_sq_destroy(&ctrl->queues[i].nvme_sq); +} + +static int nvme_loop_init_io_queues(struct nvme_loop_ctrl *ctrl) +{ + struct nvmf_ctrl_options *opts = ctrl->ctrl.opts; + unsigned int nr_io_queues; + int ret, i; + + nr_io_queues = min(opts->nr_io_queues, num_online_cpus()); + ret = nvme_set_queue_count(&ctrl->ctrl, &nr_io_queues); + if (ret || !nr_io_queues) + return ret; + + dev_info(ctrl->ctrl.device, "creating %d I/O queues.\n", nr_io_queues); + + for (i = 1; i <= nr_io_queues; i++) { + ctrl->queues[i].ctrl = ctrl; + ret = nvmet_sq_init(&ctrl->queues[i].nvme_sq); + if (ret) + goto out_destroy_queues; + + ctrl->queue_count++; + } + + return 0; + +out_destroy_queues: + nvme_loop_destroy_io_queues(ctrl); + return ret; +} + +static int nvme_loop_connect_io_queues(struct nvme_loop_ctrl *ctrl) +{ + int i, ret; + + for (i = 1; i < ctrl->queue_count; i++) { + ret = nvmf_connect_io_queue(&ctrl->ctrl, i); + if (ret) + return ret; + } + + return 0; +} + static int nvme_loop_configure_admin_queue(struct nvme_loop_ctrl *ctrl) { int error; @@ -357,7 +406,7 @@ static int nvme_loop_configure_admin_queue(struct nvme_loop_ctrl *ctrl) } ctrl->ctrl.sqsize = - min_t(int, NVME_CAP_MQES(ctrl->cap) + 1, ctrl->ctrl.sqsize); + min_t(int, NVME_CAP_MQES(ctrl->cap), ctrl->ctrl.sqsize); error = nvme_enable_ctrl(&ctrl->ctrl, ctrl->cap); if (error) @@ -385,17 +434,13 @@ out_free_sq: static void nvme_loop_shutdown_ctrl(struct nvme_loop_ctrl *ctrl) { - int i; - nvme_stop_keep_alive(&ctrl->ctrl); if (ctrl->queue_count > 1) { nvme_stop_queues(&ctrl->ctrl); blk_mq_tagset_busy_iter(&ctrl->tag_set, nvme_cancel_request, &ctrl->ctrl); - - for (i = 1; i < ctrl->queue_count; i++) - nvmet_sq_destroy(&ctrl->queues[i].nvme_sq); + nvme_loop_destroy_io_queues(ctrl); } if (ctrl->ctrl.state == NVME_CTRL_LIVE) @@ -459,7 +504,7 @@ static void nvme_loop_reset_ctrl_work(struct work_struct *work) struct nvme_loop_ctrl *ctrl = container_of(work, struct nvme_loop_ctrl, reset_work); bool changed; - int i, ret; + int ret; nvme_loop_shutdown_ctrl(ctrl); @@ -467,20 +512,13 @@ static void nvme_loop_reset_ctrl_work(struct work_struct *work) if (ret) goto out_disable; - for (i = 1; i <= ctrl->ctrl.opts->nr_io_queues; i++) { - ctrl->queues[i].ctrl = ctrl; - ret = nvmet_sq_init(&ctrl->queues[i].nvme_sq); - if (ret) - goto out_free_queues; - - ctrl->queue_count++; - } + ret = nvme_loop_init_io_queues(ctrl); + if (ret) + goto out_destroy_admin; - for (i = 1; i <= ctrl->ctrl.opts->nr_io_queues; i++) { - ret = nvmf_connect_io_queue(&ctrl->ctrl, i); - if (ret) - goto out_free_queues; - } + ret = nvme_loop_connect_io_queues(ctrl); + if (ret) + goto out_destroy_io; changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE); WARN_ON_ONCE(!changed); @@ -492,9 +530,9 @@ static void nvme_loop_reset_ctrl_work(struct work_struct *work) return; -out_free_queues: - for (i = 1; i < ctrl->queue_count; i++) - nvmet_sq_destroy(&ctrl->queues[i].nvme_sq); +out_destroy_io: + nvme_loop_destroy_io_queues(ctrl); +out_destroy_admin: nvme_loop_destroy_admin_queue(ctrl); out_disable: dev_warn(ctrl->ctrl.device, "Removing after reset failure\n"); @@ -533,25 +571,12 @@ static const struct nvme_ctrl_ops nvme_loop_ctrl_ops = { static int nvme_loop_create_io_queues(struct nvme_loop_ctrl *ctrl) { - struct nvmf_ctrl_options *opts = ctrl->ctrl.opts; - int ret, i; + int ret; - ret = nvme_set_queue_count(&ctrl->ctrl, &opts->nr_io_queues); - if (ret || !opts->nr_io_queues) + ret = nvme_loop_init_io_queues(ctrl); + if (ret) return ret; - dev_info(ctrl->ctrl.device, "creating %d I/O queues.\n", - opts->nr_io_queues); - - for (i = 1; i <= opts->nr_io_queues; i++) { - ctrl->queues[i].ctrl = ctrl; - ret = nvmet_sq_init(&ctrl->queues[i].nvme_sq); - if (ret) - goto out_destroy_queues; - - ctrl->queue_count++; - } - memset(&ctrl->tag_set, 0, sizeof(ctrl->tag_set)); ctrl->tag_set.ops = &nvme_loop_mq_ops; ctrl->tag_set.queue_depth = ctrl->ctrl.opts->queue_size; @@ -575,11 +600,9 @@ static int nvme_loop_create_io_queues(struct nvme_loop_ctrl *ctrl) goto out_free_tagset; } - for (i = 1; i <= opts->nr_io_queues; i++) { - ret = nvmf_connect_io_queue(&ctrl->ctrl, i); - if (ret) - goto out_cleanup_connect_q; - } + ret = nvme_loop_connect_io_queues(ctrl); + if (ret) + goto out_cleanup_connect_q; return 0; @@ -588,8 +611,7 @@ out_cleanup_connect_q: out_free_tagset: blk_mq_free_tag_set(&ctrl->tag_set); out_destroy_queues: - for (i = 1; i < ctrl->queue_count; i++) - nvmet_sq_destroy(&ctrl->queues[i].nvme_sq); + nvme_loop_destroy_io_queues(ctrl); return ret; } @@ -724,7 +746,12 @@ static int __init nvme_loop_init_module(void) ret = nvmet_register_transport(&nvme_loop_ops); if (ret) return ret; - return nvmf_register_transport(&nvme_loop_transport); + + ret = nvmf_register_transport(&nvme_loop_transport); + if (ret) + nvmet_unregister_transport(&nvme_loop_ops); + + return ret; } static void __exit nvme_loop_cleanup_module(void) diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h index 1370eee0a3c0..7cb77ba5993b 100644 --- a/drivers/nvme/target/nvmet.h +++ b/drivers/nvme/target/nvmet.h @@ -73,6 +73,7 @@ struct nvmet_sq { u16 qid; u16 size; struct completion free_done; + struct completion confirm_done; }; /** @@ -252,11 +253,11 @@ struct nvmet_async_event { u8 log_page; }; -int nvmet_parse_connect_cmd(struct nvmet_req *req); -int nvmet_parse_io_cmd(struct nvmet_req *req); -int nvmet_parse_admin_cmd(struct nvmet_req *req); -int nvmet_parse_discovery_cmd(struct nvmet_req *req); -int nvmet_parse_fabrics_cmd(struct nvmet_req *req); +u16 nvmet_parse_connect_cmd(struct nvmet_req *req); +u16 nvmet_parse_io_cmd(struct nvmet_req *req); +u16 nvmet_parse_admin_cmd(struct nvmet_req *req); +u16 nvmet_parse_discovery_cmd(struct nvmet_req *req); +u16 nvmet_parse_fabrics_cmd(struct nvmet_req *req); bool nvmet_req_init(struct nvmet_req *req, struct nvmet_cq *cq, struct nvmet_sq *sq, struct nvmet_fabrics_ops *ops); @@ -277,6 +278,7 @@ u16 nvmet_alloc_ctrl(const char *subsysnqn, const char *hostnqn, u16 nvmet_ctrl_find_get(const char *subsysnqn, const char *hostnqn, u16 cntlid, struct nvmet_req *req, struct nvmet_ctrl **ret); void nvmet_ctrl_put(struct nvmet_ctrl *ctrl); +u16 nvmet_check_ctrl_status(struct nvmet_req *req, struct nvme_command *cmd); struct nvmet_subsys *nvmet_subsys_alloc(const char *subsysnqn, enum nvme_subsys_type type); diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c index 9aa1da3778b3..99c69018a35f 100644 --- a/drivers/nvme/target/rdma.c +++ b/drivers/nvme/target/rdma.c @@ -703,11 +703,6 @@ static void nvmet_rdma_handle_command(struct nvmet_rdma_queue *queue, { u16 status; - cmd->queue = queue; - cmd->n_rdma = 0; - cmd->req.port = queue->port; - - ib_dma_sync_single_for_cpu(queue->dev->device, cmd->cmd->sge[0].addr, cmd->cmd->sge[0].length, DMA_FROM_DEVICE); @@ -760,9 +755,12 @@ static void nvmet_rdma_recv_done(struct ib_cq *cq, struct ib_wc *wc) cmd->queue = queue; rsp = nvmet_rdma_get_rsp(queue); + rsp->queue = queue; rsp->cmd = cmd; rsp->flags = 0; rsp->req.cmd = cmd->nvme_cmd; + rsp->req.port = queue->port; + rsp->n_rdma = 0; if (unlikely(queue->state != NVMET_RDMA_Q_LIVE)) { unsigned long flags; @@ -1201,6 +1199,11 @@ static int nvmet_rdma_queue_connect(struct rdma_cm_id *cm_id, } queue->port = cm_id->context; + if (queue->host_qid == 0) { + /* Let inflight controller teardown complete */ + flush_scheduled_work(); + } + ret = nvmet_rdma_cm_accept(cm_id, queue, &event->param.conn); if (ret) goto release_queue; @@ -1429,12 +1432,16 @@ restart: static int nvmet_rdma_add_port(struct nvmet_port *port) { struct rdma_cm_id *cm_id; - struct sockaddr_in addr_in; - u16 port_in; + struct sockaddr_storage addr = { }; + __kernel_sa_family_t af; int ret; switch (port->disc_addr.adrfam) { case NVMF_ADDR_FAMILY_IP4: + af = AF_INET; + break; + case NVMF_ADDR_FAMILY_IP6: + af = AF_INET6; break; default: pr_err("address family %d not supported\n", @@ -1442,13 +1449,13 @@ static int nvmet_rdma_add_port(struct nvmet_port *port) return -EINVAL; } - ret = kstrtou16(port->disc_addr.trsvcid, 0, &port_in); - if (ret) + ret = inet_pton_with_scope(&init_net, af, port->disc_addr.traddr, + port->disc_addr.trsvcid, &addr); + if (ret) { + pr_err("malformed ip/port passed: %s:%s\n", + port->disc_addr.traddr, port->disc_addr.trsvcid); return ret; - - addr_in.sin_family = AF_INET; - addr_in.sin_addr.s_addr = in_aton(port->disc_addr.traddr); - addr_in.sin_port = htons(port_in); + } cm_id = rdma_create_id(&init_net, nvmet_rdma_cm_handler, port, RDMA_PS_TCP, IB_QPT_RC); @@ -1457,20 +1464,32 @@ static int nvmet_rdma_add_port(struct nvmet_port *port) return PTR_ERR(cm_id); } - ret = rdma_bind_addr(cm_id, (struct sockaddr *)&addr_in); + /* + * Allow both IPv4 and IPv6 sockets to bind a single port + * at the same time. + */ + ret = rdma_set_afonly(cm_id, 1); + if (ret) { + pr_err("rdma_set_afonly failed (%d)\n", ret); + goto out_destroy_id; + } + + ret = rdma_bind_addr(cm_id, (struct sockaddr *)&addr); if (ret) { - pr_err("binding CM ID to %pISpc failed (%d)\n", &addr_in, ret); + pr_err("binding CM ID to %pISpcs failed (%d)\n", + (struct sockaddr *)&addr, ret); goto out_destroy_id; } ret = rdma_listen(cm_id, 128); if (ret) { - pr_err("listening to %pISpc failed (%d)\n", &addr_in, ret); + pr_err("listening to %pISpcs failed (%d)\n", + (struct sockaddr *)&addr, ret); goto out_destroy_id; } - pr_info("enabling port %d (%pISpc)\n", - le16_to_cpu(port->disc_addr.portid), &addr_in); + pr_info("enabling port %d (%pISpcs)\n", + le16_to_cpu(port->disc_addr.portid), (struct sockaddr *)&addr); port->priv = cm_id; return 0; |