summaryrefslogtreecommitdiff
path: root/drivers/nvme/host
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/nvme/host')
-rw-r--r--drivers/nvme/host/core.c215
-rw-r--r--drivers/nvme/host/fabrics.c6
-rw-r--r--drivers/nvme/host/fabrics.h8
-rw-r--r--drivers/nvme/host/fc.c52
-rw-r--r--drivers/nvme/host/multipath.c63
-rw-r--r--drivers/nvme/host/nvme.h25
-rw-r--r--drivers/nvme/host/pci.c63
-rw-r--r--drivers/nvme/host/rdma.c44
-rw-r--r--drivers/nvme/host/tcp.c60
-rw-r--r--drivers/nvme/host/zns.c2
10 files changed, 373 insertions, 165 deletions
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 7efb31b87f37..838b5e2058be 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -6,6 +6,7 @@
#include <linux/blkdev.h>
#include <linux/blk-mq.h>
+#include <linux/blk-integrity.h>
#include <linux/compat.h>
#include <linux/delay.h>
#include <linux/errno.h>
@@ -13,7 +14,6 @@
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/backing-dev.h>
-#include <linux/list_sort.h>
#include <linux/slab.h>
#include <linux/types.h>
#include <linux/pr.h>
@@ -119,25 +119,6 @@ static void nvme_remove_invalid_namespaces(struct nvme_ctrl *ctrl,
static void nvme_update_keep_alive(struct nvme_ctrl *ctrl,
struct nvme_command *cmd);
-/*
- * Prepare a queue for teardown.
- *
- * This must forcibly unquiesce queues to avoid blocking dispatch, and only set
- * the capacity to 0 after that to avoid blocking dispatchers that may be
- * holding bd_butex. This will end buffered writers dirtying pages that can't
- * be synced.
- */
-static void nvme_set_queue_dying(struct nvme_ns *ns)
-{
- if (test_and_set_bit(NVME_NS_DEAD, &ns->flags))
- return;
-
- blk_set_queue_dying(ns->queue);
- blk_mq_unquiesce_queue(ns->queue);
-
- set_capacity_and_notify(ns->disk, 0);
-}
-
void nvme_queue_scan(struct nvme_ctrl *ctrl)
{
/*
@@ -222,7 +203,7 @@ int nvme_reset_ctrl_sync(struct nvme_ctrl *ctrl)
static void nvme_do_delete_ctrl(struct nvme_ctrl *ctrl)
{
dev_info(ctrl->device,
- "Removing ctrl: NQN \"%s\"\n", ctrl->opts->subsysnqn);
+ "Removing ctrl: NQN \"%s\"\n", nvmf_ctrl_subsysnqn(ctrl));
flush_work(&ctrl->reset_work);
nvme_stop_ctrl(ctrl);
@@ -346,15 +327,19 @@ static inline enum nvme_disposition nvme_decide_disposition(struct request *req)
return RETRY;
}
-static inline void nvme_end_req(struct request *req)
+static inline void nvme_end_req_zoned(struct request *req)
{
- blk_status_t status = nvme_error_status(nvme_req(req)->status);
-
if (IS_ENABLED(CONFIG_BLK_DEV_ZONED) &&
req_op(req) == REQ_OP_ZONE_APPEND)
req->__sector = nvme_lba_to_sect(req->q->queuedata,
le64_to_cpu(nvme_req(req)->result.u64));
+}
+static inline void nvme_end_req(struct request *req)
+{
+ blk_status_t status = nvme_error_status(nvme_req(req)->status);
+
+ nvme_end_req_zoned(req);
nvme_trace_bio_complete(req);
blk_mq_end_request(req, status);
}
@@ -381,6 +366,13 @@ void nvme_complete_rq(struct request *req)
}
EXPORT_SYMBOL_GPL(nvme_complete_rq);
+void nvme_complete_batch_req(struct request *req)
+{
+ nvme_cleanup_cmd(req);
+ nvme_end_req_zoned(req);
+}
+EXPORT_SYMBOL_GPL(nvme_complete_batch_req);
+
/*
* Called to unwind from ->queue_rq on a failed command submission so that the
* multipathing code gets called to potentially failover to another path.
@@ -632,7 +624,7 @@ static inline void nvme_init_request(struct request *req,
req->cmd_flags |= REQ_FAILFAST_DRIVER;
if (req->mq_hctx->type == HCTX_TYPE_POLL)
- req->cmd_flags |= REQ_HIPRI;
+ req->cmd_flags |= REQ_POLLED;
nvme_clear_nvme_request(req);
memcpy(nvme_req(req)->cmd, cmd, sizeof(*cmd));
}
@@ -823,6 +815,7 @@ static void nvme_assign_write_stream(struct nvme_ctrl *ctrl,
static inline void nvme_setup_flush(struct nvme_ns *ns,
struct nvme_command *cmnd)
{
+ memset(cmnd, 0, sizeof(*cmnd));
cmnd->common.opcode = nvme_cmd_flush;
cmnd->common.nsid = cpu_to_le32(ns->head->ns_id);
}
@@ -874,6 +867,7 @@ static blk_status_t nvme_setup_discard(struct nvme_ns *ns, struct request *req,
return BLK_STS_IOERR;
}
+ memset(cmnd, 0, sizeof(*cmnd));
cmnd->dsm.opcode = nvme_cmd_dsm;
cmnd->dsm.nsid = cpu_to_le32(ns->head->ns_id);
cmnd->dsm.nr = cpu_to_le32(segments - 1);
@@ -890,6 +884,8 @@ static blk_status_t nvme_setup_discard(struct nvme_ns *ns, struct request *req,
static inline blk_status_t nvme_setup_write_zeroes(struct nvme_ns *ns,
struct request *req, struct nvme_command *cmnd)
{
+ memset(cmnd, 0, sizeof(*cmnd));
+
if (ns->ctrl->quirks & NVME_QUIRK_DEALLOCATE_ZEROES)
return nvme_setup_discard(ns, req, cmnd);
@@ -923,9 +919,15 @@ static inline blk_status_t nvme_setup_rw(struct nvme_ns *ns,
dsmgmt |= NVME_RW_DSM_FREQ_PREFETCH;
cmnd->rw.opcode = op;
+ cmnd->rw.flags = 0;
cmnd->rw.nsid = cpu_to_le32(ns->head->ns_id);
+ cmnd->rw.rsvd2 = 0;
+ cmnd->rw.metadata = 0;
cmnd->rw.slba = cpu_to_le64(nvme_sect_to_lba(ns, blk_rq_pos(req)));
cmnd->rw.length = cpu_to_le16((blk_rq_bytes(req) >> ns->lba_shift) - 1);
+ cmnd->rw.reftag = 0;
+ cmnd->rw.apptag = 0;
+ cmnd->rw.appmask = 0;
if (req_op(req) == REQ_OP_WRITE && ctrl->nr_streams)
nvme_assign_write_stream(ctrl, req, &control, &dsmgmt);
@@ -979,12 +981,11 @@ EXPORT_SYMBOL_GPL(nvme_cleanup_cmd);
blk_status_t nvme_setup_cmd(struct nvme_ns *ns, struct request *req)
{
struct nvme_command *cmd = nvme_req(req)->cmd;
+ struct nvme_ctrl *ctrl = nvme_req(req)->ctrl;
blk_status_t ret = BLK_STS_OK;
- if (!(req->rq_flags & RQF_DONTPREP)) {
+ if (!(req->rq_flags & RQF_DONTPREP))
nvme_clear_nvme_request(req);
- memset(cmd, 0, sizeof(*cmd));
- }
switch (req_op(req)) {
case REQ_OP_DRV_IN:
@@ -1027,7 +1028,8 @@ blk_status_t nvme_setup_cmd(struct nvme_ns *ns, struct request *req)
return BLK_STS_IOERR;
}
- nvme_req(req)->genctr++;
+ if (!(ctrl->quirks & NVME_QUIRK_SKIP_CID_GEN))
+ nvme_req(req)->genctr++;
cmd->common.command_id = nvme_cid(req);
trace_nvme_setup_cmd(req, cmd);
return ret;
@@ -2599,6 +2601,24 @@ static ssize_t nvme_subsys_show_nqn(struct device *dev,
}
static SUBSYS_ATTR_RO(subsysnqn, S_IRUGO, nvme_subsys_show_nqn);
+static ssize_t nvme_subsys_show_type(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct nvme_subsystem *subsys =
+ container_of(dev, struct nvme_subsystem, dev);
+
+ switch (subsys->subtype) {
+ case NVME_NQN_DISC:
+ return sysfs_emit(buf, "discovery\n");
+ case NVME_NQN_NVME:
+ return sysfs_emit(buf, "nvm\n");
+ default:
+ return sysfs_emit(buf, "reserved\n");
+ }
+}
+static SUBSYS_ATTR_RO(subsystype, S_IRUGO, nvme_subsys_show_type);
+
#define nvme_subsys_show_str_function(field) \
static ssize_t subsys_##field##_show(struct device *dev, \
struct device_attribute *attr, char *buf) \
@@ -2619,6 +2639,7 @@ static struct attribute *nvme_subsys_attrs[] = {
&subsys_attr_serial.attr,
&subsys_attr_firmware_rev.attr,
&subsys_attr_subsysnqn.attr,
+ &subsys_attr_subsystype.attr,
#ifdef CONFIG_NVME_MULTIPATH
&subsys_attr_iopolicy.attr,
#endif
@@ -2689,6 +2710,21 @@ static int nvme_init_subsystem(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id)
memcpy(subsys->firmware_rev, id->fr, sizeof(subsys->firmware_rev));
subsys->vendor_id = le16_to_cpu(id->vid);
subsys->cmic = id->cmic;
+
+ /* Versions prior to 1.4 don't necessarily report a valid type */
+ if (id->cntrltype == NVME_CTRL_DISC ||
+ !strcmp(subsys->subnqn, NVME_DISC_SUBSYS_NAME))
+ subsys->subtype = NVME_NQN_DISC;
+ else
+ subsys->subtype = NVME_NQN_NVME;
+
+ if (nvme_discovery_ctrl(ctrl) && subsys->subtype != NVME_NQN_DISC) {
+ dev_err(ctrl->device,
+ "Subsystem %s is not a discovery controller",
+ subsys->subnqn);
+ kfree(subsys);
+ return -EINVAL;
+ }
subsys->awupf = le16_to_cpu(id->awupf);
#ifdef CONFIG_NVME_MULTIPATH
subsys->iopolicy = NVME_IOPOLICY_NUMA;
@@ -3524,7 +3560,9 @@ static struct nvme_ns_head *nvme_find_ns_head(struct nvme_subsystem *subsys,
lockdep_assert_held(&subsys->lock);
list_for_each_entry(h, &subsys->nsheads, entry) {
- if (h->ns_id == nsid && nvme_tryget_ns_head(h))
+ if (h->ns_id != nsid)
+ continue;
+ if (!list_empty(&h->list) && nvme_tryget_ns_head(h))
return h;
}
@@ -3547,10 +3585,15 @@ static int __nvme_check_ids(struct nvme_subsystem *subsys,
return 0;
}
+static void nvme_cdev_rel(struct device *dev)
+{
+ ida_simple_remove(&nvme_ns_chr_minor_ida, MINOR(dev->devt));
+}
+
void nvme_cdev_del(struct cdev *cdev, struct device *cdev_device)
{
cdev_device_del(cdev, cdev_device);
- ida_simple_remove(&nvme_ns_chr_minor_ida, MINOR(cdev_device->devt));
+ put_device(cdev_device);
}
int nvme_cdev_add(struct cdev *cdev, struct device *cdev_device,
@@ -3563,14 +3606,14 @@ int nvme_cdev_add(struct cdev *cdev, struct device *cdev_device,
return minor;
cdev_device->devt = MKDEV(MAJOR(nvme_ns_chr_devt), minor);
cdev_device->class = nvme_ns_chr_class;
+ cdev_device->release = nvme_cdev_rel;
device_initialize(cdev_device);
cdev_init(cdev, fops);
cdev->owner = owner;
ret = cdev_device_add(cdev, cdev_device);
- if (ret) {
+ if (ret)
put_device(cdev_device);
- ida_simple_remove(&nvme_ns_chr_minor_ida, minor);
- }
+
return ret;
}
@@ -3602,11 +3645,9 @@ static int nvme_add_ns_cdev(struct nvme_ns *ns)
ns->ctrl->instance, ns->head->instance);
if (ret)
return ret;
- ret = nvme_cdev_add(&ns->cdev, &ns->cdev_device, &nvme_ns_chr_fops,
- ns->ctrl->ops->module);
- if (ret)
- kfree_const(ns->cdev_device.kobj.name);
- return ret;
+
+ return nvme_cdev_add(&ns->cdev, &ns->cdev_device, &nvme_ns_chr_fops,
+ ns->ctrl->ops->module);
}
static struct nvme_ns_head *nvme_alloc_ns_head(struct nvme_ctrl *ctrl,
@@ -3714,15 +3755,6 @@ out_unlock:
return ret;
}
-static int ns_cmp(void *priv, const struct list_head *a,
- const struct list_head *b)
-{
- struct nvme_ns *nsa = container_of(a, struct nvme_ns, list);
- struct nvme_ns *nsb = container_of(b, struct nvme_ns, list);
-
- return nsa->head->ns_id - nsb->head->ns_id;
-}
-
struct nvme_ns *nvme_find_get_ns(struct nvme_ctrl *ctrl, unsigned nsid)
{
struct nvme_ns *ns, *ret = NULL;
@@ -3743,6 +3775,22 @@ struct nvme_ns *nvme_find_get_ns(struct nvme_ctrl *ctrl, unsigned nsid)
}
EXPORT_SYMBOL_NS_GPL(nvme_find_get_ns, NVME_TARGET_PASSTHRU);
+/*
+ * Add the namespace to the controller list while keeping the list ordered.
+ */
+static void nvme_ns_add_to_ctrl_list(struct nvme_ns *ns)
+{
+ struct nvme_ns *tmp;
+
+ list_for_each_entry_reverse(tmp, &ns->ctrl->namespaces, list) {
+ if (tmp->head->ns_id < ns->head->ns_id) {
+ list_add(&ns->list, &tmp->list);
+ return;
+ }
+ }
+ list_add(&ns->list, &ns->ctrl->namespaces);
+}
+
static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid,
struct nvme_ns_ids *ids)
{
@@ -3793,9 +3841,8 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid,
goto out_unlink_ns;
down_write(&ctrl->namespaces_rwsem);
- list_add_tail(&ns->list, &ctrl->namespaces);
+ nvme_ns_add_to_ctrl_list(ns);
up_write(&ctrl->namespaces_rwsem);
-
nvme_get_ctrl(ctrl);
if (device_add_disk(ctrl->device, ns->disk, nvme_ns_id_attr_groups))
@@ -3843,6 +3890,10 @@ static void nvme_ns_remove(struct nvme_ns *ns)
mutex_lock(&ns->ctrl->subsys->lock);
list_del_rcu(&ns->siblings);
+ if (list_empty(&ns->head->list)) {
+ list_del_init(&ns->head->entry);
+ last_path = true;
+ }
mutex_unlock(&ns->ctrl->subsys->lock);
/* guarantee not available in head->list */
@@ -3856,20 +3907,11 @@ static void nvme_ns_remove(struct nvme_ns *ns)
nvme_cdev_del(&ns->cdev, &ns->cdev_device);
del_gendisk(ns->disk);
blk_cleanup_queue(ns->queue);
- if (blk_get_integrity(ns->disk))
- blk_integrity_unregister(ns->disk);
down_write(&ns->ctrl->namespaces_rwsem);
list_del_init(&ns->list);
up_write(&ns->ctrl->namespaces_rwsem);
- /* Synchronize with nvme_init_ns_head() */
- mutex_lock(&ns->head->subsys->lock);
- if (list_empty(&ns->head->list)) {
- list_del_init(&ns->head->entry);
- last_path = true;
- }
- mutex_unlock(&ns->head->subsys->lock);
if (last_path)
nvme_mpath_shutdown_disk(ns->head);
nvme_put_ns(ns);
@@ -4083,10 +4125,6 @@ static void nvme_scan_work(struct work_struct *work)
if (nvme_scan_ns_list(ctrl) != 0)
nvme_scan_ns_sequential(ctrl);
mutex_unlock(&ctrl->scan_lock);
-
- down_write(&ctrl->namespaces_rwsem);
- list_sort(NULL, &ctrl->namespaces, ns_cmp);
- up_write(&ctrl->namespaces_rwsem);
}
/*
@@ -4470,6 +4508,37 @@ out:
}
EXPORT_SYMBOL_GPL(nvme_init_ctrl);
+static void nvme_start_ns_queue(struct nvme_ns *ns)
+{
+ if (test_and_clear_bit(NVME_NS_STOPPED, &ns->flags))
+ blk_mq_unquiesce_queue(ns->queue);
+}
+
+static void nvme_stop_ns_queue(struct nvme_ns *ns)
+{
+ if (!test_and_set_bit(NVME_NS_STOPPED, &ns->flags))
+ blk_mq_quiesce_queue(ns->queue);
+}
+
+/*
+ * Prepare a queue for teardown.
+ *
+ * This must forcibly unquiesce queues to avoid blocking dispatch, and only set
+ * the capacity to 0 after that to avoid blocking dispatchers that may be
+ * holding bd_butex. This will end buffered writers dirtying pages that can't
+ * be synced.
+ */
+static void nvme_set_queue_dying(struct nvme_ns *ns)
+{
+ if (test_and_set_bit(NVME_NS_DEAD, &ns->flags))
+ return;
+
+ blk_set_queue_dying(ns->queue);
+ nvme_start_ns_queue(ns);
+
+ set_capacity_and_notify(ns->disk, 0);
+}
+
/**
* nvme_kill_queues(): Ends all namespace queues
* @ctrl: the dead controller that needs to end
@@ -4485,7 +4554,7 @@ void nvme_kill_queues(struct nvme_ctrl *ctrl)
/* Forcibly unquiesce queues to avoid blocking dispatch */
if (ctrl->admin_q && !blk_queue_dying(ctrl->admin_q))
- blk_mq_unquiesce_queue(ctrl->admin_q);
+ nvme_start_admin_queue(ctrl);
list_for_each_entry(ns, &ctrl->namespaces, list)
nvme_set_queue_dying(ns);
@@ -4548,7 +4617,7 @@ void nvme_stop_queues(struct nvme_ctrl *ctrl)
down_read(&ctrl->namespaces_rwsem);
list_for_each_entry(ns, &ctrl->namespaces, list)
- blk_mq_quiesce_queue(ns->queue);
+ nvme_stop_ns_queue(ns);
up_read(&ctrl->namespaces_rwsem);
}
EXPORT_SYMBOL_GPL(nvme_stop_queues);
@@ -4559,11 +4628,25 @@ void nvme_start_queues(struct nvme_ctrl *ctrl)
down_read(&ctrl->namespaces_rwsem);
list_for_each_entry(ns, &ctrl->namespaces, list)
- blk_mq_unquiesce_queue(ns->queue);
+ nvme_start_ns_queue(ns);
up_read(&ctrl->namespaces_rwsem);
}
EXPORT_SYMBOL_GPL(nvme_start_queues);
+void nvme_stop_admin_queue(struct nvme_ctrl *ctrl)
+{
+ if (!test_and_set_bit(NVME_CTRL_ADMIN_Q_STOPPED, &ctrl->flags))
+ blk_mq_quiesce_queue(ctrl->admin_q);
+}
+EXPORT_SYMBOL_GPL(nvme_stop_admin_queue);
+
+void nvme_start_admin_queue(struct nvme_ctrl *ctrl)
+{
+ if (test_and_clear_bit(NVME_CTRL_ADMIN_Q_STOPPED, &ctrl->flags))
+ blk_mq_unquiesce_queue(ctrl->admin_q);
+}
+EXPORT_SYMBOL_GPL(nvme_start_admin_queue);
+
void nvme_sync_io_queues(struct nvme_ctrl *ctrl)
{
struct nvme_ns *ns;
diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c
index 668c6bb7a567..c5a2b71c5268 100644
--- a/drivers/nvme/host/fabrics.c
+++ b/drivers/nvme/host/fabrics.c
@@ -548,6 +548,7 @@ static const match_table_t opt_tokens = {
{ NVMF_OPT_NR_POLL_QUEUES, "nr_poll_queues=%d" },
{ NVMF_OPT_TOS, "tos=%d" },
{ NVMF_OPT_FAIL_FAST_TMO, "fast_io_fail_tmo=%d" },
+ { NVMF_OPT_DISCOVERY, "discovery" },
{ NVMF_OPT_ERR, NULL }
};
@@ -823,6 +824,9 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts,
}
opts->tos = token;
break;
+ case NVMF_OPT_DISCOVERY:
+ opts->discovery_nqn = true;
+ break;
default:
pr_warn("unknown parameter or missing value '%s' in ctrl creation request\n",
p);
@@ -949,7 +953,7 @@ EXPORT_SYMBOL_GPL(nvmf_free_options);
#define NVMF_ALLOWED_OPTS (NVMF_OPT_QUEUE_SIZE | NVMF_OPT_NR_IO_QUEUES | \
NVMF_OPT_KATO | NVMF_OPT_HOSTNQN | \
NVMF_OPT_HOST_ID | NVMF_OPT_DUP_CONNECT |\
- NVMF_OPT_DISABLE_SQFLOW |\
+ NVMF_OPT_DISABLE_SQFLOW | NVMF_OPT_DISCOVERY |\
NVMF_OPT_FAIL_FAST_TMO)
static struct nvme_ctrl *
diff --git a/drivers/nvme/host/fabrics.h b/drivers/nvme/host/fabrics.h
index a146cb903869..c3203ff1c654 100644
--- a/drivers/nvme/host/fabrics.h
+++ b/drivers/nvme/host/fabrics.h
@@ -67,6 +67,7 @@ enum {
NVMF_OPT_TOS = 1 << 19,
NVMF_OPT_FAIL_FAST_TMO = 1 << 20,
NVMF_OPT_HOST_IFACE = 1 << 21,
+ NVMF_OPT_DISCOVERY = 1 << 22,
};
/**
@@ -178,6 +179,13 @@ nvmf_ctlr_matches_baseopts(struct nvme_ctrl *ctrl,
return true;
}
+static inline char *nvmf_ctrl_subsysnqn(struct nvme_ctrl *ctrl)
+{
+ if (!ctrl->subsys)
+ return ctrl->opts->subsysnqn;
+ return ctrl->subsys->subnqn;
+}
+
int nvmf_reg_read32(struct nvme_ctrl *ctrl, u32 off, u32 *val);
int nvmf_reg_read64(struct nvme_ctrl *ctrl, u32 off, u64 *val);
int nvmf_reg_write32(struct nvme_ctrl *ctrl, u32 off, u32 val);
diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c
index b08a61ca283f..71b3108c22f0 100644
--- a/drivers/nvme/host/fc.c
+++ b/drivers/nvme/host/fc.c
@@ -16,6 +16,7 @@
#include <linux/nvme-fc.h>
#include "fc.h"
#include <scsi/scsi_transport_fc.h>
+#include <linux/blk-mq-pci.h>
/* *************************** Data Structures/Defines ****************** */
@@ -2382,7 +2383,7 @@ nvme_fc_ctrl_free(struct kref *ref)
list_del(&ctrl->ctrl_list);
spin_unlock_irqrestore(&ctrl->rport->lock, flags);
- blk_mq_unquiesce_queue(ctrl->ctrl.admin_q);
+ nvme_start_admin_queue(&ctrl->ctrl);
blk_cleanup_queue(ctrl->ctrl.admin_q);
blk_cleanup_queue(ctrl->ctrl.fabrics_q);
blk_mq_free_tag_set(&ctrl->admin_tag_set);
@@ -2487,6 +2488,7 @@ __nvme_fc_abort_outstanding_ios(struct nvme_fc_ctrl *ctrl, bool start_queues)
*/
if (ctrl->ctrl.queue_count > 1) {
nvme_stop_queues(&ctrl->ctrl);
+ nvme_sync_io_queues(&ctrl->ctrl);
blk_mq_tagset_busy_iter(&ctrl->tag_set,
nvme_fc_terminate_exchange, &ctrl->ctrl);
blk_mq_tagset_wait_completed_request(&ctrl->tag_set);
@@ -2509,7 +2511,8 @@ __nvme_fc_abort_outstanding_ios(struct nvme_fc_ctrl *ctrl, bool start_queues)
/*
* clean up the admin queue. Same thing as above.
*/
- blk_mq_quiesce_queue(ctrl->ctrl.admin_q);
+ nvme_stop_admin_queue(&ctrl->ctrl);
+ blk_sync_queue(ctrl->ctrl.admin_q);
blk_mq_tagset_busy_iter(&ctrl->admin_tag_set,
nvme_fc_terminate_exchange, &ctrl->ctrl);
blk_mq_tagset_wait_completed_request(&ctrl->admin_tag_set);
@@ -2839,6 +2842,28 @@ nvme_fc_complete_rq(struct request *rq)
nvme_fc_ctrl_put(ctrl);
}
+static int nvme_fc_map_queues(struct blk_mq_tag_set *set)
+{
+ struct nvme_fc_ctrl *ctrl = set->driver_data;
+ int i;
+
+ for (i = 0; i < set->nr_maps; i++) {
+ struct blk_mq_queue_map *map = &set->map[i];
+
+ if (!map->nr_queues) {
+ WARN_ON(i == HCTX_TYPE_DEFAULT);
+ continue;
+ }
+
+ /* Call LLDD map queue functionality if defined */
+ if (ctrl->lport->ops->map_queues)
+ ctrl->lport->ops->map_queues(&ctrl->lport->localport,
+ map);
+ else
+ blk_mq_map_queues(map);
+ }
+ return 0;
+}
static const struct blk_mq_ops nvme_fc_mq_ops = {
.queue_rq = nvme_fc_queue_rq,
@@ -2847,6 +2872,7 @@ static const struct blk_mq_ops nvme_fc_mq_ops = {
.exit_request = nvme_fc_exit_request,
.init_hctx = nvme_fc_init_hctx,
.timeout = nvme_fc_timeout,
+ .map_queues = nvme_fc_map_queues,
};
static int
@@ -2951,6 +2977,13 @@ nvme_fc_recreate_io_queues(struct nvme_fc_ctrl *ctrl)
if (ctrl->ctrl.queue_count == 1)
return 0;
+ if (prior_ioq_cnt != nr_io_queues) {
+ dev_info(ctrl->ctrl.device,
+ "reconnect: revising io queue count from %d to %d\n",
+ prior_ioq_cnt, nr_io_queues);
+ blk_mq_update_nr_hw_queues(&ctrl->tag_set, nr_io_queues);
+ }
+
ret = nvme_fc_create_hw_io_queues(ctrl, ctrl->ctrl.sqsize + 1);
if (ret)
goto out_free_io_queues;
@@ -2959,15 +2992,6 @@ nvme_fc_recreate_io_queues(struct nvme_fc_ctrl *ctrl)
if (ret)
goto out_delete_hw_queues;
- if (prior_ioq_cnt != nr_io_queues) {
- dev_info(ctrl->ctrl.device,
- "reconnect: revising io queue count from %d to %d\n",
- prior_ioq_cnt, nr_io_queues);
- nvme_wait_freeze(&ctrl->ctrl);
- blk_mq_update_nr_hw_queues(&ctrl->tag_set, nr_io_queues);
- nvme_unfreeze(&ctrl->ctrl);
- }
-
return 0;
out_delete_hw_queues:
@@ -3095,7 +3119,7 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl)
ctrl->ctrl.max_hw_sectors = ctrl->ctrl.max_segments <<
(ilog2(SZ_4K) - 9);
- blk_mq_unquiesce_queue(ctrl->ctrl.admin_q);
+ nvme_start_admin_queue(&ctrl->ctrl);
ret = nvme_init_ctrl_finish(&ctrl->ctrl);
if (ret || test_bit(ASSOC_FAILED, &ctrl->flags))
@@ -3249,7 +3273,7 @@ nvme_fc_delete_association(struct nvme_fc_ctrl *ctrl)
nvme_fc_free_queue(&ctrl->queues[0]);
/* re-enable the admin_q so anything new can fast fail */
- blk_mq_unquiesce_queue(ctrl->ctrl.admin_q);
+ nvme_start_admin_queue(&ctrl->ctrl);
/* resume the io queues so that things will fast fail */
nvme_start_queues(&ctrl->ctrl);
@@ -3572,7 +3596,7 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
dev_info(ctrl->ctrl.device,
"NVME-FC{%d}: new ctrl: NQN \"%s\"\n",
- ctrl->cnum, ctrl->ctrl.opts->subsysnqn);
+ ctrl->cnum, nvmf_ctrl_subsysnqn(&ctrl->ctrl));
return &ctrl->ctrl;
diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c
index 5d7bc58a27bd..7f2071f2460c 100644
--- a/drivers/nvme/host/multipath.c
+++ b/drivers/nvme/host/multipath.c
@@ -85,8 +85,13 @@ void nvme_failover_req(struct request *req)
}
spin_lock_irqsave(&ns->head->requeue_lock, flags);
- for (bio = req->bio; bio; bio = bio->bi_next)
+ for (bio = req->bio; bio; bio = bio->bi_next) {
bio_set_dev(bio, ns->head->disk->part0);
+ if (bio->bi_opf & REQ_POLLED) {
+ bio->bi_opf &= ~REQ_POLLED;
+ bio->bi_cookie = BLK_QC_T_NONE;
+ }
+ }
blk_steal_bios(&ns->head->requeue_list, req);
spin_unlock_irqrestore(&ns->head->requeue_lock, flags);
@@ -100,8 +105,11 @@ void nvme_kick_requeue_lists(struct nvme_ctrl *ctrl)
down_read(&ctrl->namespaces_rwsem);
list_for_each_entry(ns, &ctrl->namespaces, list) {
- if (ns->head->disk)
- kblockd_schedule_work(&ns->head->requeue_work);
+ if (!ns->head->disk)
+ continue;
+ kblockd_schedule_work(&ns->head->requeue_work);
+ if (ctrl->state == NVME_CTRL_LIVE)
+ disk_uevent(ns->head->disk, KOBJ_CHANGE);
}
up_read(&ctrl->namespaces_rwsem);
}
@@ -138,13 +146,12 @@ void nvme_mpath_clear_ctrl_paths(struct nvme_ctrl *ctrl)
{
struct nvme_ns *ns;
- mutex_lock(&ctrl->scan_lock);
down_read(&ctrl->namespaces_rwsem);
- list_for_each_entry(ns, &ctrl->namespaces, list)
- if (nvme_mpath_clear_current_path(ns))
- kblockd_schedule_work(&ns->head->requeue_work);
+ list_for_each_entry(ns, &ctrl->namespaces, list) {
+ nvme_mpath_clear_current_path(ns);
+ kblockd_schedule_work(&ns->head->requeue_work);
+ }
up_read(&ctrl->namespaces_rwsem);
- mutex_unlock(&ctrl->scan_lock);
}
void nvme_mpath_revalidate_paths(struct nvme_ns *ns)
@@ -312,12 +319,11 @@ static bool nvme_available_path(struct nvme_ns_head *head)
return false;
}
-static blk_qc_t nvme_ns_head_submit_bio(struct bio *bio)
+static void nvme_ns_head_submit_bio(struct bio *bio)
{
struct nvme_ns_head *head = bio->bi_bdev->bd_disk->private_data;
struct device *dev = disk_to_dev(head->disk);
struct nvme_ns *ns;
- blk_qc_t ret = BLK_QC_T_NONE;
int srcu_idx;
/*
@@ -334,7 +340,7 @@ static blk_qc_t nvme_ns_head_submit_bio(struct bio *bio)
bio->bi_opf |= REQ_NVME_MPATH;
trace_block_bio_remap(bio, disk_devt(ns->head->disk),
bio->bi_iter.bi_sector);
- ret = submit_bio_noacct(bio);
+ submit_bio_noacct(bio);
} else if (nvme_available_path(head)) {
dev_warn_ratelimited(dev, "no usable path - requeuing I/O\n");
@@ -349,7 +355,6 @@ static blk_qc_t nvme_ns_head_submit_bio(struct bio *bio)
}
srcu_read_unlock(&head->srcu, srcu_idx);
- return ret;
}
static int nvme_ns_head_open(struct block_device *bdev, fmode_t mode)
@@ -431,8 +436,6 @@ static int nvme_add_ns_head_cdev(struct nvme_ns_head *head)
return ret;
ret = nvme_cdev_add(&head->cdev, &head->cdev_device,
&nvme_ns_head_chr_fops, THIS_MODULE);
- if (ret)
- kfree_const(head->cdev_device.kobj.name);
return ret;
}
@@ -481,6 +484,15 @@ int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl, struct nvme_ns_head *head)
blk_queue_flag_set(QUEUE_FLAG_NONROT, head->disk->queue);
blk_queue_flag_set(QUEUE_FLAG_NOWAIT, head->disk->queue);
+ /*
+ * This assumes all controllers that refer to a namespace either
+ * support poll queues or not. That is not a strict guarantee,
+ * but if the assumption is wrong the effect is only suboptimal
+ * performance but not correctness problem.
+ */
+ if (ctrl->tagset->nr_maps > HCTX_TYPE_POLL &&
+ ctrl->tagset->map[HCTX_TYPE_POLL].nr_queues)
+ blk_queue_flag_set(QUEUE_FLAG_POLL, head->disk->queue);
/* set to a default value of 512 until the disk is validated */
blk_queue_logical_block_size(head->disk->queue, 512);
@@ -496,13 +508,23 @@ int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl, struct nvme_ns_head *head)
static void nvme_mpath_set_live(struct nvme_ns *ns)
{
struct nvme_ns_head *head = ns->head;
+ int rc;
if (!head->disk)
return;
+ /*
+ * test_and_set_bit() is used because it is protecting against two nvme
+ * paths simultaneously calling device_add_disk() on the same namespace
+ * head.
+ */
if (!test_and_set_bit(NVME_NSHEAD_DISK_LIVE, &head->flags)) {
- device_add_disk(&head->subsys->dev, head->disk,
- nvme_ns_id_attr_groups);
+ rc = device_add_disk(&head->subsys->dev, head->disk,
+ nvme_ns_id_attr_groups);
+ if (rc) {
+ clear_bit(NVME_NSHEAD_DISK_LIVE, &ns->flags);
+ return;
+ }
nvme_add_ns_head_cdev(head);
}
@@ -540,7 +562,7 @@ static int nvme_parse_ana_log(struct nvme_ctrl *ctrl, void *data,
return -EINVAL;
nr_nsids = le32_to_cpu(desc->nnsids);
- nsid_buf_size = nr_nsids * sizeof(__le32);
+ nsid_buf_size = flex_array_size(desc, nsids, nr_nsids);
if (WARN_ON_ONCE(desc->grpid == 0))
return -EINVAL;
@@ -600,14 +622,17 @@ static int nvme_update_ana_state(struct nvme_ctrl *ctrl,
down_read(&ctrl->namespaces_rwsem);
list_for_each_entry(ns, &ctrl->namespaces, list) {
- unsigned nsid = le32_to_cpu(desc->nsids[n]);
-
+ unsigned nsid;
+again:
+ nsid = le32_to_cpu(desc->nsids[n]);
if (ns->head->ns_id < nsid)
continue;
if (ns->head->ns_id == nsid)
nvme_update_ns_ana_state(desc, ns);
if (++n == nr_nsids)
break;
+ if (ns->head->ns_id > nsid)
+ goto again;
}
up_read(&ctrl->namespaces_rwsem);
return 0;
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index 9871c0c9374c..b334af8aa264 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -138,6 +138,12 @@ enum nvme_quirks {
* 48 bits.
*/
NVME_QUIRK_DMA_ADDRESS_BITS_48 = (1 << 16),
+
+ /*
+ * The controller requires the command_id value be be limited, so skip
+ * encoding the generation sequence number.
+ */
+ NVME_QUIRK_SKIP_CID_GEN = (1 << 17),
};
/*
@@ -336,6 +342,7 @@ struct nvme_ctrl {
int nr_reconnects;
unsigned long flags;
#define NVME_CTRL_FAILFAST_EXPIRED 0
+#define NVME_CTRL_ADMIN_Q_STOPPED 1
struct nvmf_ctrl_options *opts;
struct page *discard_page;
@@ -366,6 +373,7 @@ struct nvme_subsystem {
char model[40];
char firmware_rev[8];
u8 cmic;
+ enum nvme_subsys_type subtype;
u16 vendor_id;
u16 awupf; /* 0's based awupf value. */
struct ida ns_ida;
@@ -457,6 +465,7 @@ struct nvme_ns {
#define NVME_NS_ANA_PENDING 2
#define NVME_NS_FORCE_RO 3
#define NVME_NS_READY 4
+#define NVME_NS_STOPPED 5
struct cdev cdev;
struct device cdev_device;
@@ -632,6 +641,20 @@ static inline bool nvme_is_aen_req(u16 qid, __u16 command_id)
}
void nvme_complete_rq(struct request *req);
+void nvme_complete_batch_req(struct request *req);
+
+static __always_inline void nvme_complete_batch(struct io_comp_batch *iob,
+ void (*fn)(struct request *rq))
+{
+ struct request *req;
+
+ rq_list_for_each(&iob->req_list, req) {
+ fn(req);
+ nvme_complete_batch_req(req);
+ }
+ blk_mq_end_request_batch(iob);
+}
+
blk_status_t nvme_host_path_error(struct request *req);
bool nvme_cancel_request(struct request *req, void *data, bool reserved);
void nvme_cancel_tagset(struct nvme_ctrl *ctrl);
@@ -659,6 +682,8 @@ void nvme_complete_async_event(struct nvme_ctrl *ctrl, __le16 status,
void nvme_stop_queues(struct nvme_ctrl *ctrl);
void nvme_start_queues(struct nvme_ctrl *ctrl);
+void nvme_stop_admin_queue(struct nvme_ctrl *ctrl);
+void nvme_start_admin_queue(struct nvme_ctrl *ctrl);
void nvme_kill_queues(struct nvme_ctrl *ctrl);
void nvme_sync_queues(struct nvme_ctrl *ctrl);
void nvme_sync_io_queues(struct nvme_ctrl *ctrl);
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index b82492cd7503..ca2ee806d74b 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -10,6 +10,7 @@
#include <linux/blkdev.h>
#include <linux/blk-mq.h>
#include <linux/blk-mq-pci.h>
+#include <linux/blk-integrity.h>
#include <linux/dmi.h>
#include <linux/init.h>
#include <linux/interrupt.h>
@@ -244,8 +245,15 @@ static int nvme_dbbuf_dma_alloc(struct nvme_dev *dev)
{
unsigned int mem_size = nvme_dbbuf_size(dev);
- if (dev->dbbuf_dbs)
+ if (dev->dbbuf_dbs) {
+ /*
+ * Clear the dbbuf memory so the driver doesn't observe stale
+ * values from the previous instantiation.
+ */
+ memset(dev->dbbuf_dbs, 0, mem_size);
+ memset(dev->dbbuf_eis, 0, mem_size);
return 0;
+ }
dev->dbbuf_dbs = dma_alloc_coherent(dev->dev, mem_size,
&dev->dbbuf_dbs_dma_addr,
@@ -958,7 +966,7 @@ out_free_cmd:
return ret;
}
-static void nvme_pci_complete_rq(struct request *req)
+static __always_inline void nvme_pci_unmap_rq(struct request *req)
{
struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
struct nvme_dev *dev = iod->nvmeq->dev;
@@ -968,9 +976,19 @@ static void nvme_pci_complete_rq(struct request *req)
rq_integrity_vec(req)->bv_len, rq_data_dir(req));
if (blk_rq_nr_phys_segments(req))
nvme_unmap_data(dev, req);
+}
+
+static void nvme_pci_complete_rq(struct request *req)
+{
+ nvme_pci_unmap_rq(req);
nvme_complete_rq(req);
}
+static void nvme_pci_complete_batch(struct io_comp_batch *iob)
+{
+ nvme_complete_batch(iob, nvme_pci_unmap_rq);
+}
+
/* We read the CQE phase first to check if the rest of the entry is valid */
static inline bool nvme_cqe_pending(struct nvme_queue *nvmeq)
{
@@ -995,7 +1013,8 @@ static inline struct blk_mq_tags *nvme_queue_tagset(struct nvme_queue *nvmeq)
return nvmeq->dev->tagset.tags[nvmeq->qid - 1];
}
-static inline void nvme_handle_cqe(struct nvme_queue *nvmeq, u16 idx)
+static inline void nvme_handle_cqe(struct nvme_queue *nvmeq,
+ struct io_comp_batch *iob, u16 idx)
{
struct nvme_completion *cqe = &nvmeq->cqes[idx];
__u16 command_id = READ_ONCE(cqe->command_id);
@@ -1022,7 +1041,9 @@ static inline void nvme_handle_cqe(struct nvme_queue *nvmeq, u16 idx)
}
trace_nvme_sq(req, cqe->sq_head, nvmeq->sq_tail);
- if (!nvme_try_complete_req(req, cqe->status, cqe->result))
+ if (!nvme_try_complete_req(req, cqe->status, cqe->result) &&
+ !blk_mq_add_to_batch(req, iob, nvme_req(req)->status,
+ nvme_pci_complete_batch))
nvme_pci_complete_rq(req);
}
@@ -1038,7 +1059,8 @@ static inline void nvme_update_cq_head(struct nvme_queue *nvmeq)
}
}
-static inline int nvme_process_cq(struct nvme_queue *nvmeq)
+static inline int nvme_poll_cq(struct nvme_queue *nvmeq,
+ struct io_comp_batch *iob)
{
int found = 0;
@@ -1049,7 +1071,7 @@ static inline int nvme_process_cq(struct nvme_queue *nvmeq)
* the cqe requires a full read memory barrier
*/
dma_rmb();
- nvme_handle_cqe(nvmeq, nvmeq->cq_head);
+ nvme_handle_cqe(nvmeq, iob, nvmeq->cq_head);
nvme_update_cq_head(nvmeq);
}
@@ -1061,9 +1083,13 @@ static inline int nvme_process_cq(struct nvme_queue *nvmeq)
static irqreturn_t nvme_irq(int irq, void *data)
{
struct nvme_queue *nvmeq = data;
+ DEFINE_IO_COMP_BATCH(iob);
- if (nvme_process_cq(nvmeq))
+ if (nvme_poll_cq(nvmeq, &iob)) {
+ if (!rq_list_empty(iob.req_list))
+ nvme_pci_complete_batch(&iob);
return IRQ_HANDLED;
+ }
return IRQ_NONE;
}
@@ -1087,11 +1113,11 @@ static void nvme_poll_irqdisable(struct nvme_queue *nvmeq)
WARN_ON_ONCE(test_bit(NVMEQ_POLLED, &nvmeq->flags));
disable_irq(pci_irq_vector(pdev, nvmeq->cq_vector));
- nvme_process_cq(nvmeq);
+ nvme_poll_cq(nvmeq, NULL);
enable_irq(pci_irq_vector(pdev, nvmeq->cq_vector));
}
-static int nvme_poll(struct blk_mq_hw_ctx *hctx)
+static int nvme_poll(struct blk_mq_hw_ctx *hctx, struct io_comp_batch *iob)
{
struct nvme_queue *nvmeq = hctx->driver_data;
bool found;
@@ -1100,7 +1126,7 @@ static int nvme_poll(struct blk_mq_hw_ctx *hctx)
return 0;
spin_lock(&nvmeq->cq_poll_lock);
- found = nvme_process_cq(nvmeq);
+ found = nvme_poll_cq(nvmeq, iob);
spin_unlock(&nvmeq->cq_poll_lock);
return found;
@@ -1273,7 +1299,7 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved)
* Did we miss an interrupt?
*/
if (test_bit(NVMEQ_POLLED, &nvmeq->flags))
- nvme_poll(req->mq_hctx);
+ nvme_poll(req->mq_hctx, NULL);
else
nvme_poll_irqdisable(nvmeq);
@@ -1330,7 +1356,7 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved)
iod->aborted = 1;
cmd.abort.opcode = nvme_admin_abort_cmd;
- cmd.abort.cid = req->tag;
+ cmd.abort.cid = nvme_cid(req);
cmd.abort.sqid = cpu_to_le16(nvmeq->qid);
dev_warn(nvmeq->dev->ctrl.device,
@@ -1395,7 +1421,7 @@ static int nvme_suspend_queue(struct nvme_queue *nvmeq)
nvmeq->dev->online_queues--;
if (!nvmeq->qid && nvmeq->dev->ctrl.admin_q)
- blk_mq_quiesce_queue(nvmeq->dev->ctrl.admin_q);
+ nvme_stop_admin_queue(&nvmeq->dev->ctrl);
if (!test_and_clear_bit(NVMEQ_POLLED, &nvmeq->flags))
pci_free_irq(to_pci_dev(nvmeq->dev->dev), nvmeq->cq_vector, nvmeq);
return 0;
@@ -1433,7 +1459,7 @@ static void nvme_reap_pending_cqes(struct nvme_dev *dev)
for (i = dev->ctrl.queue_count - 1; i > 0; i--) {
spin_lock(&dev->queues[i].cq_poll_lock);
- nvme_process_cq(&dev->queues[i]);
+ nvme_poll_cq(&dev->queues[i], NULL);
spin_unlock(&dev->queues[i].cq_poll_lock);
}
}
@@ -1654,7 +1680,7 @@ static void nvme_dev_remove_admin(struct nvme_dev *dev)
* user requests may be waiting on a stopped queue. Start the
* queue to flush these to completion.
*/
- blk_mq_unquiesce_queue(dev->ctrl.admin_q);
+ nvme_start_admin_queue(&dev->ctrl);
blk_cleanup_queue(dev->ctrl.admin_q);
blk_mq_free_tag_set(&dev->admin_tagset);
}
@@ -1688,7 +1714,7 @@ static int nvme_alloc_admin_tags(struct nvme_dev *dev)
return -ENODEV;
}
} else
- blk_mq_unquiesce_queue(dev->ctrl.admin_q);
+ nvme_start_admin_queue(&dev->ctrl);
return 0;
}
@@ -2623,7 +2649,7 @@ static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown)
if (shutdown) {
nvme_start_queues(&dev->ctrl);
if (dev->ctrl.admin_q && !blk_queue_dying(dev->ctrl.admin_q))
- blk_mq_unquiesce_queue(dev->ctrl.admin_q);
+ nvme_start_admin_queue(&dev->ctrl);
}
mutex_unlock(&dev->shutdown_lock);
}
@@ -3369,7 +3395,8 @@ static const struct pci_device_id nvme_id_table[] = {
{ PCI_DEVICE(PCI_VENDOR_ID_APPLE, 0x2005),
.driver_data = NVME_QUIRK_SINGLE_VECTOR |
NVME_QUIRK_128_BYTES_SQES |
- NVME_QUIRK_SHARED_TAGS },
+ NVME_QUIRK_SHARED_TAGS |
+ NVME_QUIRK_SKIP_CID_GEN },
{ PCI_DEVICE_CLASS(PCI_CLASS_STORAGE_EXPRESS, 0xffffff) },
{ 0, }
diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
index a68704e39084..850f84d204d0 100644
--- a/drivers/nvme/host/rdma.c
+++ b/drivers/nvme/host/rdma.c
@@ -13,6 +13,7 @@
#include <linux/atomic.h>
#include <linux/blk-mq.h>
#include <linux/blk-mq-rdma.h>
+#include <linux/blk-integrity.h>
#include <linux/types.h>
#include <linux/list.h>
#include <linux/mutex.h>
@@ -656,8 +657,8 @@ static void nvme_rdma_free_queue(struct nvme_rdma_queue *queue)
if (!test_and_clear_bit(NVME_RDMA_Q_ALLOCATED, &queue->flags))
return;
- nvme_rdma_destroy_queue_ib(queue);
rdma_destroy_id(queue->cm_id);
+ nvme_rdma_destroy_queue_ib(queue);
mutex_destroy(&queue->queue_lock);
}
@@ -918,7 +919,7 @@ static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl,
else
ctrl->ctrl.max_integrity_segments = 0;
- blk_mq_unquiesce_queue(ctrl->ctrl.admin_q);
+ nvme_start_admin_queue(&ctrl->ctrl);
error = nvme_init_ctrl_finish(&ctrl->ctrl);
if (error)
@@ -927,7 +928,7 @@ static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl,
return 0;
out_quiesce_queue:
- blk_mq_quiesce_queue(ctrl->ctrl.admin_q);
+ nvme_stop_admin_queue(&ctrl->ctrl);
blk_sync_queue(ctrl->ctrl.admin_q);
out_stop_queue:
nvme_rdma_stop_queue(&ctrl->queues[0]);
@@ -1025,12 +1026,12 @@ out_free_io_queues:
static void nvme_rdma_teardown_admin_queue(struct nvme_rdma_ctrl *ctrl,
bool remove)
{
- blk_mq_quiesce_queue(ctrl->ctrl.admin_q);
+ nvme_stop_admin_queue(&ctrl->ctrl);
blk_sync_queue(ctrl->ctrl.admin_q);
nvme_rdma_stop_queue(&ctrl->queues[0]);
nvme_cancel_admin_tagset(&ctrl->ctrl);
if (remove)
- blk_mq_unquiesce_queue(ctrl->ctrl.admin_q);
+ nvme_start_admin_queue(&ctrl->ctrl);
nvme_rdma_destroy_admin_queue(ctrl, remove);
}
@@ -1095,11 +1096,13 @@ static int nvme_rdma_setup_ctrl(struct nvme_rdma_ctrl *ctrl, bool new)
return ret;
if (ctrl->ctrl.icdoff) {
+ ret = -EOPNOTSUPP;
dev_err(ctrl->ctrl.device, "icdoff is not supported!\n");
goto destroy_admin;
}
if (!(ctrl->ctrl.sgls & (1 << 2))) {
+ ret = -EOPNOTSUPP;
dev_err(ctrl->ctrl.device,
"Mandatory keyed sgls are not supported!\n");
goto destroy_admin;
@@ -1111,6 +1114,13 @@ static int nvme_rdma_setup_ctrl(struct nvme_rdma_ctrl *ctrl, bool new)
ctrl->ctrl.opts->queue_size, ctrl->ctrl.sqsize + 1);
}
+ if (ctrl->ctrl.sqsize + 1 > NVME_RDMA_MAX_QUEUE_SIZE) {
+ dev_warn(ctrl->ctrl.device,
+ "ctrl sqsize %u > max queue size %u, clamping down\n",
+ ctrl->ctrl.sqsize + 1, NVME_RDMA_MAX_QUEUE_SIZE);
+ ctrl->ctrl.sqsize = NVME_RDMA_MAX_QUEUE_SIZE - 1;
+ }
+
if (ctrl->ctrl.sqsize + 1 > ctrl->ctrl.maxcmd) {
dev_warn(ctrl->ctrl.device,
"sqsize %u > ctrl maxcmd %u, clamping down\n",
@@ -1153,7 +1163,7 @@ destroy_io:
nvme_rdma_destroy_io_queues(ctrl, new);
}
destroy_admin:
- blk_mq_quiesce_queue(ctrl->ctrl.admin_q);
+ nvme_stop_admin_queue(&ctrl->ctrl);
blk_sync_queue(ctrl->ctrl.admin_q);
nvme_rdma_stop_queue(&ctrl->queues[0]);
nvme_cancel_admin_tagset(&ctrl->ctrl);
@@ -1193,7 +1203,7 @@ static void nvme_rdma_error_recovery_work(struct work_struct *work)
nvme_rdma_teardown_io_queues(ctrl, false);
nvme_start_queues(&ctrl->ctrl);
nvme_rdma_teardown_admin_queue(ctrl, false);
- blk_mq_unquiesce_queue(ctrl->ctrl.admin_q);
+ nvme_start_admin_queue(&ctrl->ctrl);
if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_CONNECTING)) {
/* state change failure is ok if we started ctrl delete */
@@ -1815,14 +1825,10 @@ static int nvme_rdma_conn_established(struct nvme_rdma_queue *queue)
for (i = 0; i < queue->queue_size; i++) {
ret = nvme_rdma_post_recv(queue, &queue->rsp_ring[i]);
if (ret)
- goto out_destroy_queue_ib;
+ return ret;
}
return 0;
-
-out_destroy_queue_ib:
- nvme_rdma_destroy_queue_ib(queue);
- return ret;
}
static int nvme_rdma_conn_rejected(struct nvme_rdma_queue *queue,
@@ -1916,14 +1922,10 @@ static int nvme_rdma_route_resolved(struct nvme_rdma_queue *queue)
if (ret) {
dev_err(ctrl->ctrl.device,
"rdma_connect_locked failed (%d).\n", ret);
- goto out_destroy_queue_ib;
+ return ret;
}
return 0;
-
-out_destroy_queue_ib:
- nvme_rdma_destroy_queue_ib(queue);
- return ret;
}
static int nvme_rdma_cm_handler(struct rdma_cm_id *cm_id,
@@ -1954,8 +1956,6 @@ static int nvme_rdma_cm_handler(struct rdma_cm_id *cm_id,
case RDMA_CM_EVENT_ROUTE_ERROR:
case RDMA_CM_EVENT_CONNECT_ERROR:
case RDMA_CM_EVENT_UNREACHABLE:
- nvme_rdma_destroy_queue_ib(queue);
- fallthrough;
case RDMA_CM_EVENT_ADDR_ERROR:
dev_dbg(queue->ctrl->ctrl.device,
"CM error event %d\n", ev->event);
@@ -2115,7 +2115,7 @@ unmap_qe:
return ret;
}
-static int nvme_rdma_poll(struct blk_mq_hw_ctx *hctx)
+static int nvme_rdma_poll(struct blk_mq_hw_ctx *hctx, struct io_comp_batch *iob)
{
struct nvme_rdma_queue *queue = hctx->driver_data;
@@ -2241,7 +2241,7 @@ static void nvme_rdma_shutdown_ctrl(struct nvme_rdma_ctrl *ctrl, bool shutdown)
cancel_delayed_work_sync(&ctrl->reconnect_work);
nvme_rdma_teardown_io_queues(ctrl, shutdown);
- blk_mq_quiesce_queue(ctrl->ctrl.admin_q);
+ nvme_stop_admin_queue(&ctrl->ctrl);
if (shutdown)
nvme_shutdown_ctrl(&ctrl->ctrl);
else
@@ -2395,7 +2395,7 @@ static struct nvme_ctrl *nvme_rdma_create_ctrl(struct device *dev,
goto out_uninit_ctrl;
dev_info(ctrl->ctrl.device, "new ctrl: NQN \"%s\", addr %pISpcs\n",
- ctrl->ctrl.opts->subsysnqn, &ctrl->addr);
+ nvmf_ctrl_subsysnqn(&ctrl->ctrl), &ctrl->addr);
mutex_lock(&nvme_rdma_ctrl_mutex);
list_add_tail(&ctrl->list, &nvme_rdma_ctrl_list);
diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c
index e2ab12f3f51c..33bc83d8d992 100644
--- a/drivers/nvme/host/tcp.c
+++ b/drivers/nvme/host/tcp.c
@@ -274,6 +274,12 @@ static inline void nvme_tcp_send_all(struct nvme_tcp_queue *queue)
} while (ret > 0);
}
+static inline bool nvme_tcp_queue_more(struct nvme_tcp_queue *queue)
+{
+ return !list_empty(&queue->send_list) ||
+ !llist_empty(&queue->req_list) || queue->more_requests;
+}
+
static inline void nvme_tcp_queue_request(struct nvme_tcp_request *req,
bool sync, bool last)
{
@@ -294,9 +300,10 @@ static inline void nvme_tcp_queue_request(struct nvme_tcp_request *req,
nvme_tcp_send_all(queue);
queue->more_requests = false;
mutex_unlock(&queue->send_mutex);
- } else if (last) {
- queue_work_on(queue->io_cpu, nvme_tcp_wq, &queue->io_work);
}
+
+ if (last && nvme_tcp_queue_more(queue))
+ queue_work_on(queue->io_cpu, nvme_tcp_wq, &queue->io_work);
}
static void nvme_tcp_process_req_list(struct nvme_tcp_queue *queue)
@@ -613,7 +620,7 @@ static int nvme_tcp_setup_h2c_data_pdu(struct nvme_tcp_request *req,
cpu_to_le32(data->hdr.hlen + hdgst + req->pdu_len + ddgst);
data->ttag = pdu->ttag;
data->command_id = nvme_cid(rq);
- data->data_offset = cpu_to_le32(req->data_sent);
+ data->data_offset = pdu->r2t_offset;
data->data_length = cpu_to_le32(req->pdu_len);
return 0;
}
@@ -906,12 +913,6 @@ done:
read_unlock_bh(&sk->sk_callback_lock);
}
-static inline bool nvme_tcp_queue_more(struct nvme_tcp_queue *queue)
-{
- return !list_empty(&queue->send_list) ||
- !llist_empty(&queue->req_list) || queue->more_requests;
-}
-
static inline void nvme_tcp_done_send_req(struct nvme_tcp_queue *queue)
{
queue->request = NULL;
@@ -925,12 +926,14 @@ static void nvme_tcp_fail_request(struct nvme_tcp_request *req)
static int nvme_tcp_try_send_data(struct nvme_tcp_request *req)
{
struct nvme_tcp_queue *queue = req->queue;
+ int req_data_len = req->data_len;
while (true) {
struct page *page = nvme_tcp_req_cur_page(req);
size_t offset = nvme_tcp_req_cur_offset(req);
size_t len = nvme_tcp_req_cur_length(req);
bool last = nvme_tcp_pdu_last_send(req, len);
+ int req_data_sent = req->data_sent;
int ret, flags = MSG_DONTWAIT;
if (last && !queue->data_digest && !nvme_tcp_queue_more(queue))
@@ -952,7 +955,15 @@ static int nvme_tcp_try_send_data(struct nvme_tcp_request *req)
nvme_tcp_ddgst_update(queue->snd_hash, page,
offset, ret);
- /* fully successful last write*/
+ /*
+ * update the request iterator except for the last payload send
+ * in the request where we don't want to modify it as we may
+ * compete with the RX path completing the request.
+ */
+ if (req_data_sent + ret < req_data_len)
+ nvme_tcp_advance_req(req, ret);
+
+ /* fully successful last send in current PDU */
if (last && ret == len) {
if (queue->data_digest) {
nvme_tcp_ddgst_final(queue->snd_hash,
@@ -964,7 +975,6 @@ static int nvme_tcp_try_send_data(struct nvme_tcp_request *req)
}
return 1;
}
- nvme_tcp_advance_req(req, ret);
}
return -EAGAIN;
}
@@ -1040,10 +1050,11 @@ static int nvme_tcp_try_send_data_pdu(struct nvme_tcp_request *req)
static int nvme_tcp_try_send_ddgst(struct nvme_tcp_request *req)
{
struct nvme_tcp_queue *queue = req->queue;
+ size_t offset = req->offset;
int ret;
struct msghdr msg = { .msg_flags = MSG_DONTWAIT };
struct kvec iov = {
- .iov_base = &req->ddgst + req->offset,
+ .iov_base = (u8 *)&req->ddgst + req->offset,
.iov_len = NVME_TCP_DIGEST_LENGTH - req->offset
};
@@ -1056,7 +1067,7 @@ static int nvme_tcp_try_send_ddgst(struct nvme_tcp_request *req)
if (unlikely(ret <= 0))
return ret;
- if (req->offset + ret == NVME_TCP_DIGEST_LENGTH) {
+ if (offset + ret == NVME_TCP_DIGEST_LENGTH) {
nvme_tcp_done_send_req(queue);
return 1;
}
@@ -1145,8 +1156,7 @@ static void nvme_tcp_io_work(struct work_struct *w)
pending = true;
else if (unlikely(result < 0))
break;
- } else
- pending = !llist_empty(&queue->req_list);
+ }
result = nvme_tcp_try_recv(queue);
if (result > 0)
@@ -1908,7 +1918,7 @@ static int nvme_tcp_configure_admin_queue(struct nvme_ctrl *ctrl, bool new)
if (error)
goto out_stop_queue;
- blk_mq_unquiesce_queue(ctrl->admin_q);
+ nvme_start_admin_queue(ctrl);
error = nvme_init_ctrl_finish(ctrl);
if (error)
@@ -1917,7 +1927,7 @@ static int nvme_tcp_configure_admin_queue(struct nvme_ctrl *ctrl, bool new)
return 0;
out_quiesce_queue:
- blk_mq_quiesce_queue(ctrl->admin_q);
+ nvme_stop_admin_queue(ctrl);
blk_sync_queue(ctrl->admin_q);
out_stop_queue:
nvme_tcp_stop_queue(ctrl, 0);
@@ -1939,12 +1949,12 @@ out_free_queue:
static void nvme_tcp_teardown_admin_queue(struct nvme_ctrl *ctrl,
bool remove)
{
- blk_mq_quiesce_queue(ctrl->admin_q);
+ nvme_stop_admin_queue(ctrl);
blk_sync_queue(ctrl->admin_q);
nvme_tcp_stop_queue(ctrl, 0);
nvme_cancel_admin_tagset(ctrl);
if (remove)
- blk_mq_unquiesce_queue(ctrl->admin_q);
+ nvme_start_admin_queue(ctrl);
nvme_tcp_destroy_admin_queue(ctrl, remove);
}
@@ -1953,7 +1963,7 @@ static void nvme_tcp_teardown_io_queues(struct nvme_ctrl *ctrl,
{
if (ctrl->queue_count <= 1)
return;
- blk_mq_quiesce_queue(ctrl->admin_q);
+ nvme_stop_admin_queue(ctrl);
nvme_start_freeze(ctrl);
nvme_stop_queues(ctrl);
nvme_sync_io_queues(ctrl);
@@ -2048,7 +2058,7 @@ destroy_io:
nvme_tcp_destroy_io_queues(ctrl, new);
}
destroy_admin:
- blk_mq_quiesce_queue(ctrl->admin_q);
+ nvme_stop_admin_queue(ctrl);
blk_sync_queue(ctrl->admin_q);
nvme_tcp_stop_queue(ctrl, 0);
nvme_cancel_admin_tagset(ctrl);
@@ -2091,7 +2101,7 @@ static void nvme_tcp_error_recovery_work(struct work_struct *work)
/* unquiesce to fail fast pending requests */
nvme_start_queues(ctrl);
nvme_tcp_teardown_admin_queue(ctrl, false);
- blk_mq_unquiesce_queue(ctrl->admin_q);
+ nvme_start_admin_queue(ctrl);
if (!nvme_change_ctrl_state(ctrl, NVME_CTRL_CONNECTING)) {
/* state change failure is ok if we started ctrl delete */
@@ -2109,7 +2119,7 @@ static void nvme_tcp_teardown_ctrl(struct nvme_ctrl *ctrl, bool shutdown)
cancel_delayed_work_sync(&to_tcp_ctrl(ctrl)->connect_work);
nvme_tcp_teardown_io_queues(ctrl, shutdown);
- blk_mq_quiesce_queue(ctrl->admin_q);
+ nvme_stop_admin_queue(ctrl);
if (shutdown)
nvme_shutdown_ctrl(ctrl);
else
@@ -2422,7 +2432,7 @@ static int nvme_tcp_map_queues(struct blk_mq_tag_set *set)
return 0;
}
-static int nvme_tcp_poll(struct blk_mq_hw_ctx *hctx)
+static int nvme_tcp_poll(struct blk_mq_hw_ctx *hctx, struct io_comp_batch *iob)
{
struct nvme_tcp_queue *queue = hctx->driver_data;
struct sock *sk = queue->sock->sk;
@@ -2575,7 +2585,7 @@ static struct nvme_ctrl *nvme_tcp_create_ctrl(struct device *dev,
goto out_uninit_ctrl;
dev_info(ctrl->ctrl.device, "new ctrl: NQN \"%s\", addr %pISp\n",
- ctrl->ctrl.opts->subsysnqn, &ctrl->addr);
+ nvmf_ctrl_subsysnqn(&ctrl->ctrl), &ctrl->addr);
mutex_lock(&nvme_tcp_ctrl_mutex);
list_add_tail(&ctrl->list, &nvme_tcp_ctrl_list);
diff --git a/drivers/nvme/host/zns.c b/drivers/nvme/host/zns.c
index d95010481fce..bfc259e0d7b8 100644
--- a/drivers/nvme/host/zns.c
+++ b/drivers/nvme/host/zns.c
@@ -233,6 +233,8 @@ out_free:
blk_status_t nvme_setup_zone_mgmt_send(struct nvme_ns *ns, struct request *req,
struct nvme_command *c, enum nvme_zone_mgmt_action action)
{
+ memset(c, 0, sizeof(*c));
+
c->zms.opcode = nvme_cmd_zone_mgmt_send;
c->zms.nsid = cpu_to_le32(ns->head->ns_id);
c->zms.slba = cpu_to_le64(nvme_sect_to_lba(ns, blk_rq_pos(req)));