summaryrefslogtreecommitdiff
path: root/drivers/nvme
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/nvme')
-rw-r--r--drivers/nvme/host/core.c53
-rw-r--r--drivers/nvme/host/fabrics.c37
-rw-r--r--drivers/nvme/host/fabrics.h2
-rw-r--r--drivers/nvme/host/fc.c153
-rw-r--r--drivers/nvme/host/lightnvm.c137
-rw-r--r--drivers/nvme/host/multipath.c85
-rw-r--r--drivers/nvme/host/nvme.h36
-rw-r--r--drivers/nvme/host/pci.c107
-rw-r--r--drivers/nvme/host/rdma.c78
-rw-r--r--drivers/nvme/host/trace.h28
-rw-r--r--drivers/nvme/target/admin-cmd.c8
-rw-r--r--drivers/nvme/target/configfs.c47
-rw-r--r--drivers/nvme/target/core.c183
-rw-r--r--drivers/nvme/target/discovery.c6
-rw-r--r--drivers/nvme/target/fc.c136
-rw-r--r--drivers/nvme/target/fcloop.c1
-rw-r--r--drivers/nvme/target/io-cmd-bdev.c12
-rw-r--r--drivers/nvme/target/io-cmd-file.c3
-rw-r--r--drivers/nvme/target/nvmet.h18
-rw-r--r--drivers/nvme/target/rdma.c68
20 files changed, 890 insertions, 308 deletions
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index dd8ec1dd9219..2e65be8b1387 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -971,7 +971,7 @@ static int nvme_identify_ns_descs(struct nvme_ctrl *ctrl, unsigned nsid,
uuid_copy(&ids->uuid, data + pos + sizeof(*cur));
break;
default:
- /* Skip unnkown types */
+ /* Skip unknown types */
len = cur->nidl;
break;
}
@@ -1132,7 +1132,7 @@ static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio)
return nvme_submit_user_cmd(ns->queue, &c,
(void __user *)(uintptr_t)io.addr, length,
- metadata, meta_len, io.slba, NULL, 0);
+ metadata, meta_len, lower_32_bits(io.slba), NULL, 0);
}
static u32 nvme_known_admin_effects(u8 opcode)
@@ -2076,7 +2076,7 @@ static void nvme_init_subnqn(struct nvme_subsystem *subsys, struct nvme_ctrl *ct
nqnlen = strnlen(id->subnqn, NVMF_NQN_SIZE);
if (nqnlen > 0 && nqnlen < NVMF_NQN_SIZE) {
- strncpy(subsys->subnqn, id->subnqn, NVMF_NQN_SIZE);
+ strlcpy(subsys->subnqn, id->subnqn, NVMF_NQN_SIZE);
return;
}
@@ -2729,11 +2729,19 @@ static umode_t nvme_ns_id_attrs_are_visible(struct kobject *kobj,
return a->mode;
}
-const struct attribute_group nvme_ns_id_attr_group = {
+static const struct attribute_group nvme_ns_id_attr_group = {
.attrs = nvme_ns_id_attrs,
.is_visible = nvme_ns_id_attrs_are_visible,
};
+const struct attribute_group *nvme_ns_id_attr_groups[] = {
+ &nvme_ns_id_attr_group,
+#ifdef CONFIG_NVM
+ &nvme_nvm_attr_group,
+#endif
+ NULL,
+};
+
#define nvme_show_str_function(field) \
static ssize_t field##_show(struct device *dev, \
struct device_attribute *attr, char *buf) \
@@ -2900,9 +2908,14 @@ static struct nvme_ns_head *nvme_alloc_ns_head(struct nvme_ctrl *ctrl,
unsigned nsid, struct nvme_id_ns *id)
{
struct nvme_ns_head *head;
+ size_t size = sizeof(*head);
int ret = -ENOMEM;
- head = kzalloc(sizeof(*head), GFP_KERNEL);
+#ifdef CONFIG_NVME_MULTIPATH
+ size += num_possible_nodes() * sizeof(struct nvme_ns *);
+#endif
+
+ head = kzalloc(size, GFP_KERNEL);
if (!head)
goto out;
ret = ida_simple_get(&ctrl->subsys->ns_ida, 1, 0, GFP_KERNEL);
@@ -3051,7 +3064,11 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)
ns->queue = blk_mq_init_queue(ctrl->tagset);
if (IS_ERR(ns->queue))
goto out_free_ns;
+
blk_queue_flag_set(QUEUE_FLAG_NONROT, ns->queue);
+ if (ctrl->ops->flags & NVME_F_PCI_P2PDMA)
+ blk_queue_flag_set(QUEUE_FLAG_PCI_P2PDMA, ns->queue);
+
ns->queue->queuedata = ns;
ns->ctrl = ctrl;
@@ -3099,14 +3116,7 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)
nvme_get_ctrl(ctrl);
- device_add_disk(ctrl->device, ns->disk);
- if (sysfs_create_group(&disk_to_dev(ns->disk)->kobj,
- &nvme_ns_id_attr_group))
- pr_warn("%s: failed to create sysfs group for identification\n",
- ns->disk->disk_name);
- if (ns->ndev && nvme_nvm_register_sysfs(ns))
- pr_warn("%s: failed to register lightnvm sysfs group for identification\n",
- ns->disk->disk_name);
+ device_add_disk(ctrl->device, ns->disk, nvme_ns_id_attr_groups);
nvme_mpath_add_disk(ns, id);
nvme_fault_inject_init(ns);
@@ -3132,10 +3142,6 @@ static void nvme_ns_remove(struct nvme_ns *ns)
nvme_fault_inject_fini(ns);
if (ns->disk && ns->disk->flags & GENHD_FL_UP) {
- sysfs_remove_group(&disk_to_dev(ns->disk)->kobj,
- &nvme_ns_id_attr_group);
- if (ns->ndev)
- nvme_nvm_unregister_sysfs(ns);
del_gendisk(ns->disk);
blk_cleanup_queue(ns->queue);
if (blk_get_integrity(ns->disk))
@@ -3143,8 +3149,8 @@ static void nvme_ns_remove(struct nvme_ns *ns)
}
mutex_lock(&ns->ctrl->subsys->lock);
- nvme_mpath_clear_current_path(ns);
list_del_rcu(&ns->siblings);
+ nvme_mpath_clear_current_path(ns);
mutex_unlock(&ns->ctrl->subsys->lock);
down_write(&ns->ctrl->namespaces_rwsem);
@@ -3411,16 +3417,21 @@ static void nvme_fw_act_work(struct work_struct *work)
static void nvme_handle_aen_notice(struct nvme_ctrl *ctrl, u32 result)
{
- switch ((result & 0xff00) >> 8) {
+ u32 aer_notice_type = (result & 0xff00) >> 8;
+
+ switch (aer_notice_type) {
case NVME_AER_NOTICE_NS_CHANGED:
+ trace_nvme_async_event(ctrl, aer_notice_type);
set_bit(NVME_AER_NOTICE_NS_CHANGED, &ctrl->events);
nvme_queue_scan(ctrl);
break;
case NVME_AER_NOTICE_FW_ACT_STARTING:
+ trace_nvme_async_event(ctrl, aer_notice_type);
queue_work(nvme_wq, &ctrl->fw_act_work);
break;
#ifdef CONFIG_NVME_MULTIPATH
case NVME_AER_NOTICE_ANA:
+ trace_nvme_async_event(ctrl, aer_notice_type);
if (!ctrl->ana_log_buf)
break;
queue_work(nvme_wq, &ctrl->ana_work);
@@ -3435,11 +3446,12 @@ void nvme_complete_async_event(struct nvme_ctrl *ctrl, __le16 status,
volatile union nvme_result *res)
{
u32 result = le32_to_cpu(res->u32);
+ u32 aer_type = result & 0x07;
if (le16_to_cpu(status) >> 1 != NVME_SC_SUCCESS)
return;
- switch (result & 0x7) {
+ switch (aer_type) {
case NVME_AER_NOTICE:
nvme_handle_aen_notice(ctrl, result);
break;
@@ -3447,6 +3459,7 @@ void nvme_complete_async_event(struct nvme_ctrl *ctrl, __le16 status,
case NVME_AER_SMART:
case NVME_AER_CSS:
case NVME_AER_VS:
+ trace_nvme_async_event(ctrl, aer_type);
ctrl->aen_result = result;
break;
default:
diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c
index 206d63cb1afc..bd0969db6225 100644
--- a/drivers/nvme/host/fabrics.c
+++ b/drivers/nvme/host/fabrics.c
@@ -552,8 +552,11 @@ blk_status_t nvmf_fail_nonready_command(struct nvme_ctrl *ctrl,
ctrl->state != NVME_CTRL_DEAD &&
!blk_noretry_request(rq) && !(rq->cmd_flags & REQ_NVME_MPATH))
return BLK_STS_RESOURCE;
- nvme_req(rq)->status = NVME_SC_ABORT_REQ;
- return BLK_STS_IOERR;
+
+ nvme_req(rq)->status = NVME_SC_HOST_PATH_ERROR;
+ blk_mq_start_request(rq);
+ nvme_complete_rq(rq);
+ return BLK_STS_OK;
}
EXPORT_SYMBOL_GPL(nvmf_fail_nonready_command);
@@ -865,6 +868,36 @@ static int nvmf_check_required_opts(struct nvmf_ctrl_options *opts,
return 0;
}
+bool nvmf_ip_options_match(struct nvme_ctrl *ctrl,
+ struct nvmf_ctrl_options *opts)
+{
+ if (!nvmf_ctlr_matches_baseopts(ctrl, opts) ||
+ strcmp(opts->traddr, ctrl->opts->traddr) ||
+ strcmp(opts->trsvcid, ctrl->opts->trsvcid))
+ return false;
+
+ /*
+ * Checking the local address is rough. In most cases, none is specified
+ * and the host port is selected by the stack.
+ *
+ * Assume no match if:
+ * - local address is specified and address is not the same
+ * - local address is not specified but remote is, or vice versa
+ * (admin using specific host_traddr when it matters).
+ */
+ if ((opts->mask & NVMF_OPT_HOST_TRADDR) &&
+ (ctrl->opts->mask & NVMF_OPT_HOST_TRADDR)) {
+ if (strcmp(opts->host_traddr, ctrl->opts->host_traddr))
+ return false;
+ } else if ((opts->mask & NVMF_OPT_HOST_TRADDR) ||
+ (ctrl->opts->mask & NVMF_OPT_HOST_TRADDR)) {
+ return false;
+ }
+
+ return true;
+}
+EXPORT_SYMBOL_GPL(nvmf_ip_options_match);
+
static int nvmf_check_allowed_opts(struct nvmf_ctrl_options *opts,
unsigned int allowed_opts)
{
diff --git a/drivers/nvme/host/fabrics.h b/drivers/nvme/host/fabrics.h
index aa2fdb2a2e8f..6ea6275f332a 100644
--- a/drivers/nvme/host/fabrics.h
+++ b/drivers/nvme/host/fabrics.h
@@ -166,6 +166,8 @@ blk_status_t nvmf_fail_nonready_command(struct nvme_ctrl *ctrl,
struct request *rq);
bool __nvmf_check_ready(struct nvme_ctrl *ctrl, struct request *rq,
bool queue_live);
+bool nvmf_ip_options_match(struct nvme_ctrl *ctrl,
+ struct nvmf_ctrl_options *opts);
static inline bool nvmf_check_ready(struct nvme_ctrl *ctrl, struct request *rq,
bool queue_live)
diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c
index 611e70cae754..e52b9d3c0bd6 100644
--- a/drivers/nvme/host/fc.c
+++ b/drivers/nvme/host/fc.c
@@ -20,6 +20,7 @@
#include <uapi/scsi/fc/fc_fs.h>
#include <uapi/scsi/fc/fc_els.h>
#include <linux/delay.h>
+#include <linux/overflow.h>
#include "nvme.h"
#include "fabrics.h"
@@ -104,6 +105,12 @@ struct nvme_fc_fcp_op {
struct nvme_fc_ersp_iu rsp_iu;
};
+struct nvme_fcp_op_w_sgl {
+ struct nvme_fc_fcp_op op;
+ struct scatterlist sgl[SG_CHUNK_SIZE];
+ uint8_t priv[0];
+};
+
struct nvme_fc_lport {
struct nvme_fc_local_port localport;
@@ -122,6 +129,7 @@ struct nvme_fc_rport {
struct list_head endp_list; /* for lport->endp_list */
struct list_head ctrl_list;
struct list_head ls_req_list;
+ struct list_head disc_list;
struct device *dev; /* physical device for dma */
struct nvme_fc_lport *lport;
spinlock_t lock;
@@ -210,7 +218,6 @@ static DEFINE_IDA(nvme_fc_ctrl_cnt);
* These items are short-term. They will eventually be moved into
* a generic FC class. See comments in module init.
*/
-static struct class *fc_class;
static struct device *fc_udev_device;
@@ -317,7 +324,7 @@ out_done:
* @template: LLDD entrypoints and operational parameters for the port
* @dev: physical hardware device node port corresponds to. Will be
* used for DMA mappings
- * @lport_p: pointer to a local port pointer. Upon success, the routine
+ * @portptr: pointer to a local port pointer. Upon success, the routine
* will allocate a nvme_fc_local_port structure and place its
* address in the local port pointer. Upon failure, local port
* pointer will be set to 0.
@@ -425,8 +432,7 @@ EXPORT_SYMBOL_GPL(nvme_fc_register_localport);
* nvme_fc_unregister_localport - transport entry point called by an
* LLDD to deregister/remove a previously
* registered a NVME host FC port.
- * @localport: pointer to the (registered) local port that is to be
- * deregistered.
+ * @portptr: pointer to the (registered) local port that is to be deregistered.
*
* Returns:
* a completion status. Must be 0 upon success; a negative errno
@@ -507,6 +513,7 @@ nvme_fc_free_rport(struct kref *ref)
list_del(&rport->endp_list);
spin_unlock_irqrestore(&nvme_fc_lock, flags);
+ WARN_ON(!list_empty(&rport->disc_list));
ida_simple_remove(&lport->endp_cnt, rport->remoteport.port_num);
kfree(rport);
@@ -631,7 +638,7 @@ __nvme_fc_set_dev_loss_tmo(struct nvme_fc_rport *rport,
* @localport: pointer to the (registered) local port that the remote
* subsystem port is connected to.
* @pinfo: pointer to information about the port to be registered
- * @rport_p: pointer to a remote port pointer. Upon success, the routine
+ * @portptr: pointer to a remote port pointer. Upon success, the routine
* will allocate a nvme_fc_remote_port structure and place its
* address in the remote port pointer. Upon failure, remote port
* pointer will be set to 0.
@@ -694,6 +701,7 @@ nvme_fc_register_remoteport(struct nvme_fc_local_port *localport,
INIT_LIST_HEAD(&newrec->endp_list);
INIT_LIST_HEAD(&newrec->ctrl_list);
INIT_LIST_HEAD(&newrec->ls_req_list);
+ INIT_LIST_HEAD(&newrec->disc_list);
kref_init(&newrec->ref);
atomic_set(&newrec->act_ctrl_cnt, 0);
spin_lock_init(&newrec->lock);
@@ -807,8 +815,8 @@ nvme_fc_ctrl_connectivity_loss(struct nvme_fc_ctrl *ctrl)
* nvme_fc_unregister_remoteport - transport entry point called by an
* LLDD to deregister/remove a previously
* registered a NVME subsystem FC port.
- * @remoteport: pointer to the (registered) remote port that is to be
- * deregistered.
+ * @portptr: pointer to the (registered) remote port that is to be
+ * deregistered.
*
* Returns:
* a completion status. Must be 0 upon success; a negative errno
@@ -1385,7 +1393,7 @@ nvme_fc_disconnect_assoc_done(struct nvmefc_ls_req *lsreq, int status)
__nvme_fc_finish_ls_req(lsop);
- /* fc-nvme iniator doesn't care about success or failure of cmd */
+ /* fc-nvme initiator doesn't care about success or failure of cmd */
kfree(lsop);
}
@@ -1685,6 +1693,8 @@ __nvme_fc_init_request(struct nvme_fc_ctrl *ctrl,
struct nvme_fc_queue *queue, struct nvme_fc_fcp_op *op,
struct request *rq, u32 rqno)
{
+ struct nvme_fcp_op_w_sgl *op_w_sgl =
+ container_of(op, typeof(*op_w_sgl), op);
struct nvme_fc_cmd_iu *cmdiu = &op->cmd_iu;
int ret = 0;
@@ -1694,7 +1704,6 @@ __nvme_fc_init_request(struct nvme_fc_ctrl *ctrl,
op->fcp_req.rspaddr = &op->rsp_iu;
op->fcp_req.rsplen = sizeof(op->rsp_iu);
op->fcp_req.done = nvme_fc_fcpio_done;
- op->fcp_req.first_sgl = (struct scatterlist *)&op[1];
op->fcp_req.private = &op->fcp_req.first_sgl[SG_CHUNK_SIZE];
op->ctrl = ctrl;
op->queue = queue;
@@ -1733,12 +1742,17 @@ nvme_fc_init_request(struct blk_mq_tag_set *set, struct request *rq,
unsigned int hctx_idx, unsigned int numa_node)
{
struct nvme_fc_ctrl *ctrl = set->driver_data;
- struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(rq);
+ struct nvme_fcp_op_w_sgl *op = blk_mq_rq_to_pdu(rq);
int queue_idx = (set == &ctrl->tag_set) ? hctx_idx + 1 : 0;
struct nvme_fc_queue *queue = &ctrl->queues[queue_idx];
+ int res;
nvme_req(rq)->ctrl = &ctrl->ctrl;
- return __nvme_fc_init_request(ctrl, queue, op, rq, queue->rqcnt++);
+ res = __nvme_fc_init_request(ctrl, queue, &op->op, rq, queue->rqcnt++);
+ if (res)
+ return res;
+ op->op.fcp_req.first_sgl = &op->sgl[0];
+ return res;
}
static int
@@ -1768,7 +1782,6 @@ nvme_fc_init_aen_ops(struct nvme_fc_ctrl *ctrl)
}
aen_op->flags = FCOP_FLAGS_AEN;
- aen_op->fcp_req.first_sgl = NULL; /* no sg list */
aen_op->fcp_req.private = private;
memset(sqe, 0, sizeof(*sqe));
@@ -2422,10 +2435,9 @@ nvme_fc_create_io_queues(struct nvme_fc_ctrl *ctrl)
ctrl->tag_set.reserved_tags = 1; /* fabric connect */
ctrl->tag_set.numa_node = NUMA_NO_NODE;
ctrl->tag_set.flags = BLK_MQ_F_SHOULD_MERGE;
- ctrl->tag_set.cmd_size = sizeof(struct nvme_fc_fcp_op) +
- (SG_CHUNK_SIZE *
- sizeof(struct scatterlist)) +
- ctrl->lport->ops->fcprqst_priv_sz;
+ ctrl->tag_set.cmd_size =
+ struct_size((struct nvme_fcp_op_w_sgl *)NULL, priv,
+ ctrl->lport->ops->fcprqst_priv_sz);
ctrl->tag_set.driver_data = ctrl;
ctrl->tag_set.nr_hw_queues = ctrl->ctrl.queue_count - 1;
ctrl->tag_set.timeout = NVME_IO_TIMEOUT;
@@ -3027,10 +3039,9 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
ctrl->admin_tag_set.queue_depth = NVME_AQ_MQ_TAG_DEPTH;
ctrl->admin_tag_set.reserved_tags = 2; /* fabric connect + Keep-Alive */
ctrl->admin_tag_set.numa_node = NUMA_NO_NODE;
- ctrl->admin_tag_set.cmd_size = sizeof(struct nvme_fc_fcp_op) +
- (SG_CHUNK_SIZE *
- sizeof(struct scatterlist)) +
- ctrl->lport->ops->fcprqst_priv_sz;
+ ctrl->admin_tag_set.cmd_size =
+ struct_size((struct nvme_fcp_op_w_sgl *)NULL, priv,
+ ctrl->lport->ops->fcprqst_priv_sz);
ctrl->admin_tag_set.driver_data = ctrl;
ctrl->admin_tag_set.nr_hw_queues = 1;
ctrl->admin_tag_set.timeout = ADMIN_TIMEOUT;
@@ -3159,7 +3170,7 @@ nvme_fc_parse_traddr(struct nvmet_fc_traddr *traddr, char *buf, size_t blen)
substring_t wwn = { name, &name[sizeof(name)-1] };
int nnoffset, pnoffset;
- /* validate it string one of the 2 allowed formats */
+ /* validate if string is one of the 2 allowed formats */
if (strnlen(buf, blen) == NVME_FC_TRADDR_MAXLENGTH &&
!strncmp(buf, "nn-0x", NVME_FC_TRADDR_OXNNLEN) &&
!strncmp(&buf[NVME_FC_TRADDR_MAX_PN_OFFSET],
@@ -3254,6 +3265,90 @@ static struct nvmf_transport_ops nvme_fc_transport = {
.create_ctrl = nvme_fc_create_ctrl,
};
+/* Arbitrary successive failures max. With lots of subsystems could be high */
+#define DISCOVERY_MAX_FAIL 20
+
+static ssize_t nvme_fc_nvme_discovery_store(struct device *dev,
+ struct device_attribute *attr, const char *buf, size_t count)
+{
+ unsigned long flags;
+ LIST_HEAD(local_disc_list);
+ struct nvme_fc_lport *lport;
+ struct nvme_fc_rport *rport;
+ int failcnt = 0;
+
+ spin_lock_irqsave(&nvme_fc_lock, flags);
+restart:
+ list_for_each_entry(lport, &nvme_fc_lport_list, port_list) {
+ list_for_each_entry(rport, &lport->endp_list, endp_list) {
+ if (!nvme_fc_lport_get(lport))
+ continue;
+ if (!nvme_fc_rport_get(rport)) {
+ /*
+ * This is a temporary condition. Upon restart
+ * this rport will be gone from the list.
+ *
+ * Revert the lport put and retry. Anything
+ * added to the list already will be skipped (as
+ * they are no longer list_empty). Loops should
+ * resume at rports that were not yet seen.
+ */
+ nvme_fc_lport_put(lport);
+
+ if (failcnt++ < DISCOVERY_MAX_FAIL)
+ goto restart;
+
+ pr_err("nvme_discovery: too many reference "
+ "failures\n");
+ goto process_local_list;
+ }
+ if (list_empty(&rport->disc_list))
+ list_add_tail(&rport->disc_list,
+ &local_disc_list);
+ }
+ }
+
+process_local_list:
+ while (!list_empty(&local_disc_list)) {
+ rport = list_first_entry(&local_disc_list,
+ struct nvme_fc_rport, disc_list);
+ list_del_init(&rport->disc_list);
+ spin_unlock_irqrestore(&nvme_fc_lock, flags);
+
+ lport = rport->lport;
+ /* signal discovery. Won't hurt if it repeats */
+ nvme_fc_signal_discovery_scan(lport, rport);
+ nvme_fc_rport_put(rport);
+ nvme_fc_lport_put(lport);
+
+ spin_lock_irqsave(&nvme_fc_lock, flags);
+ }
+ spin_unlock_irqrestore(&nvme_fc_lock, flags);
+
+ return count;
+}
+static DEVICE_ATTR(nvme_discovery, 0200, NULL, nvme_fc_nvme_discovery_store);
+
+static struct attribute *nvme_fc_attrs[] = {
+ &dev_attr_nvme_discovery.attr,
+ NULL
+};
+
+static struct attribute_group nvme_fc_attr_group = {
+ .attrs = nvme_fc_attrs,
+};
+
+static const struct attribute_group *nvme_fc_attr_groups[] = {
+ &nvme_fc_attr_group,
+ NULL
+};
+
+static struct class fc_class = {
+ .name = "fc",
+ .dev_groups = nvme_fc_attr_groups,
+ .owner = THIS_MODULE,
+};
+
static int __init nvme_fc_init_module(void)
{
int ret;
@@ -3272,16 +3367,16 @@ static int __init nvme_fc_init_module(void)
* put in place, this code will move to a more generic
* location for the class.
*/
- fc_class = class_create(THIS_MODULE, "fc");
- if (IS_ERR(fc_class)) {
+ ret = class_register(&fc_class);
+ if (ret) {
pr_err("couldn't register class fc\n");
- return PTR_ERR(fc_class);
+ return ret;
}
/*
* Create a device for the FC-centric udev events
*/
- fc_udev_device = device_create(fc_class, NULL, MKDEV(0, 0), NULL,
+ fc_udev_device = device_create(&fc_class, NULL, MKDEV(0, 0), NULL,
"fc_udev_device");
if (IS_ERR(fc_udev_device)) {
pr_err("couldn't create fc_udev device!\n");
@@ -3296,9 +3391,9 @@ static int __init nvme_fc_init_module(void)
return 0;
out_destroy_device:
- device_destroy(fc_class, MKDEV(0, 0));
+ device_destroy(&fc_class, MKDEV(0, 0));
out_destroy_class:
- class_destroy(fc_class);
+ class_unregister(&fc_class);
return ret;
}
@@ -3313,8 +3408,8 @@ static void __exit nvme_fc_exit_module(void)
ida_destroy(&nvme_fc_local_port_cnt);
ida_destroy(&nvme_fc_ctrl_cnt);
- device_destroy(fc_class, MKDEV(0, 0));
- class_destroy(fc_class);
+ device_destroy(&fc_class, MKDEV(0, 0));
+ class_unregister(&fc_class);
}
module_init(nvme_fc_init_module);
diff --git a/drivers/nvme/host/lightnvm.c b/drivers/nvme/host/lightnvm.c
index 6fe5923c95d4..a4f3b263cd6c 100644
--- a/drivers/nvme/host/lightnvm.c
+++ b/drivers/nvme/host/lightnvm.c
@@ -567,13 +567,13 @@ static int nvme_nvm_set_bb_tbl(struct nvm_dev *nvmdev, struct ppa_addr *ppas,
* Expect the lba in device format
*/
static int nvme_nvm_get_chk_meta(struct nvm_dev *ndev,
- struct nvm_chk_meta *meta,
- sector_t slba, int nchks)
+ sector_t slba, int nchks,
+ struct nvm_chk_meta *meta)
{
struct nvm_geo *geo = &ndev->geo;
struct nvme_ns *ns = ndev->q->queuedata;
struct nvme_ctrl *ctrl = ns->ctrl;
- struct nvme_nvm_chk_meta *dev_meta = (struct nvme_nvm_chk_meta *)meta;
+ struct nvme_nvm_chk_meta *dev_meta, *dev_meta_off;
struct ppa_addr ppa;
size_t left = nchks * sizeof(struct nvme_nvm_chk_meta);
size_t log_pos, offset, len;
@@ -585,6 +585,10 @@ static int nvme_nvm_get_chk_meta(struct nvm_dev *ndev,
*/
max_len = min_t(unsigned int, ctrl->max_hw_sectors << 9, 256 * 1024);
+ dev_meta = kmalloc(max_len, GFP_KERNEL);
+ if (!dev_meta)
+ return -ENOMEM;
+
/* Normalize lba address space to obtain log offset */
ppa.ppa = slba;
ppa = dev_to_generic_addr(ndev, ppa);
@@ -598,6 +602,9 @@ static int nvme_nvm_get_chk_meta(struct nvm_dev *ndev,
while (left) {
len = min_t(unsigned int, left, max_len);
+ memset(dev_meta, 0, max_len);
+ dev_meta_off = dev_meta;
+
ret = nvme_get_log(ctrl, ns->head->ns_id,
NVME_NVM_LOG_REPORT_CHUNK, 0, dev_meta, len,
offset);
@@ -607,21 +614,23 @@ static int nvme_nvm_get_chk_meta(struct nvm_dev *ndev,
}
for (i = 0; i < len; i += sizeof(struct nvme_nvm_chk_meta)) {
- meta->state = dev_meta->state;
- meta->type = dev_meta->type;
- meta->wi = dev_meta->wi;
- meta->slba = le64_to_cpu(dev_meta->slba);
- meta->cnlb = le64_to_cpu(dev_meta->cnlb);
- meta->wp = le64_to_cpu(dev_meta->wp);
+ meta->state = dev_meta_off->state;
+ meta->type = dev_meta_off->type;
+ meta->wi = dev_meta_off->wi;
+ meta->slba = le64_to_cpu(dev_meta_off->slba);
+ meta->cnlb = le64_to_cpu(dev_meta_off->cnlb);
+ meta->wp = le64_to_cpu(dev_meta_off->wp);
meta++;
- dev_meta++;
+ dev_meta_off++;
}
offset += len;
left -= len;
}
+ kfree(dev_meta);
+
return ret;
}
@@ -968,6 +977,9 @@ void nvme_nvm_update_nvm_info(struct nvme_ns *ns)
struct nvm_dev *ndev = ns->ndev;
struct nvm_geo *geo = &ndev->geo;
+ if (geo->version == NVM_OCSSD_SPEC_12)
+ return;
+
geo->csecs = 1 << ns->lba_shift;
geo->sos = ns->ms;
}
@@ -1190,10 +1202,29 @@ static NVM_DEV_ATTR_12_RO(multiplane_modes);
static NVM_DEV_ATTR_12_RO(media_capabilities);
static NVM_DEV_ATTR_12_RO(max_phys_secs);
-static struct attribute *nvm_dev_attrs_12[] = {
+/* 2.0 values */
+static NVM_DEV_ATTR_20_RO(groups);
+static NVM_DEV_ATTR_20_RO(punits);
+static NVM_DEV_ATTR_20_RO(chunks);
+static NVM_DEV_ATTR_20_RO(clba);
+static NVM_DEV_ATTR_20_RO(ws_min);
+static NVM_DEV_ATTR_20_RO(ws_opt);
+static NVM_DEV_ATTR_20_RO(maxoc);
+static NVM_DEV_ATTR_20_RO(maxocpu);
+static NVM_DEV_ATTR_20_RO(mw_cunits);
+static NVM_DEV_ATTR_20_RO(write_typ);
+static NVM_DEV_ATTR_20_RO(write_max);
+static NVM_DEV_ATTR_20_RO(reset_typ);
+static NVM_DEV_ATTR_20_RO(reset_max);
+
+static struct attribute *nvm_dev_attrs[] = {
+ /* version agnostic attrs */
&dev_attr_version.attr,
&dev_attr_capabilities.attr,
+ &dev_attr_read_typ.attr,
+ &dev_attr_read_max.attr,
+ /* 1.2 attrs */
&dev_attr_vendor_opcode.attr,
&dev_attr_device_mode.attr,
&dev_attr_media_manager.attr,
@@ -1208,8 +1239,6 @@ static struct attribute *nvm_dev_attrs_12[] = {
&dev_attr_page_size.attr,
&dev_attr_hw_sector_size.attr,
&dev_attr_oob_sector_size.attr,
- &dev_attr_read_typ.attr,
- &dev_attr_read_max.attr,
&dev_attr_prog_typ.attr,
&dev_attr_prog_max.attr,
&dev_attr_erase_typ.attr,
@@ -1218,33 +1247,7 @@ static struct attribute *nvm_dev_attrs_12[] = {
&dev_attr_media_capabilities.attr,
&dev_attr_max_phys_secs.attr,
- NULL,
-};
-
-static const struct attribute_group nvm_dev_attr_group_12 = {
- .name = "lightnvm",
- .attrs = nvm_dev_attrs_12,
-};
-
-/* 2.0 values */
-static NVM_DEV_ATTR_20_RO(groups);
-static NVM_DEV_ATTR_20_RO(punits);
-static NVM_DEV_ATTR_20_RO(chunks);
-static NVM_DEV_ATTR_20_RO(clba);
-static NVM_DEV_ATTR_20_RO(ws_min);
-static NVM_DEV_ATTR_20_RO(ws_opt);
-static NVM_DEV_ATTR_20_RO(maxoc);
-static NVM_DEV_ATTR_20_RO(maxocpu);
-static NVM_DEV_ATTR_20_RO(mw_cunits);
-static NVM_DEV_ATTR_20_RO(write_typ);
-static NVM_DEV_ATTR_20_RO(write_max);
-static NVM_DEV_ATTR_20_RO(reset_typ);
-static NVM_DEV_ATTR_20_RO(reset_max);
-
-static struct attribute *nvm_dev_attrs_20[] = {
- &dev_attr_version.attr,
- &dev_attr_capabilities.attr,
-
+ /* 2.0 attrs */
&dev_attr_groups.attr,
&dev_attr_punits.attr,
&dev_attr_chunks.attr,
@@ -1255,8 +1258,6 @@ static struct attribute *nvm_dev_attrs_20[] = {
&dev_attr_maxocpu.attr,
&dev_attr_mw_cunits.attr,
- &dev_attr_read_typ.attr,
- &dev_attr_read_max.attr,
&dev_attr_write_typ.attr,
&dev_attr_write_max.attr,
&dev_attr_reset_typ.attr,
@@ -1265,44 +1266,38 @@ static struct attribute *nvm_dev_attrs_20[] = {
NULL,
};
-static const struct attribute_group nvm_dev_attr_group_20 = {
- .name = "lightnvm",
- .attrs = nvm_dev_attrs_20,
-};
-
-int nvme_nvm_register_sysfs(struct nvme_ns *ns)
+static umode_t nvm_dev_attrs_visible(struct kobject *kobj,
+ struct attribute *attr, int index)
{
+ struct device *dev = container_of(kobj, struct device, kobj);
+ struct gendisk *disk = dev_to_disk(dev);
+ struct nvme_ns *ns = disk->private_data;
struct nvm_dev *ndev = ns->ndev;
- struct nvm_geo *geo = &ndev->geo;
+ struct device_attribute *dev_attr =
+ container_of(attr, typeof(*dev_attr), attr);
if (!ndev)
- return -EINVAL;
-
- switch (geo->major_ver_id) {
- case 1:
- return sysfs_create_group(&disk_to_dev(ns->disk)->kobj,
- &nvm_dev_attr_group_12);
- case 2:
- return sysfs_create_group(&disk_to_dev(ns->disk)->kobj,
- &nvm_dev_attr_group_20);
- }
+ return 0;
- return -EINVAL;
-}
+ if (dev_attr->show == nvm_dev_attr_show)
+ return attr->mode;
-void nvme_nvm_unregister_sysfs(struct nvme_ns *ns)
-{
- struct nvm_dev *ndev = ns->ndev;
- struct nvm_geo *geo = &ndev->geo;
-
- switch (geo->major_ver_id) {
+ switch (ndev->geo.major_ver_id) {
case 1:
- sysfs_remove_group(&disk_to_dev(ns->disk)->kobj,
- &nvm_dev_attr_group_12);
+ if (dev_attr->show == nvm_dev_attr_show_12)
+ return attr->mode;
break;
case 2:
- sysfs_remove_group(&disk_to_dev(ns->disk)->kobj,
- &nvm_dev_attr_group_20);
+ if (dev_attr->show == nvm_dev_attr_show_20)
+ return attr->mode;
break;
}
+
+ return 0;
}
+
+const struct attribute_group nvme_nvm_attr_group = {
+ .name = "lightnvm",
+ .attrs = nvm_dev_attrs,
+ .is_visible = nvm_dev_attrs_visible,
+};
diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c
index 5a9562881d4e..5e3cc8c59a39 100644
--- a/drivers/nvme/host/multipath.c
+++ b/drivers/nvme/host/multipath.c
@@ -77,6 +77,13 @@ void nvme_failover_req(struct request *req)
queue_work(nvme_wq, &ns->ctrl->ana_work);
}
break;
+ case NVME_SC_HOST_PATH_ERROR:
+ /*
+ * Temporary transport disruption in talking to the controller.
+ * Try to send on a new path.
+ */
+ nvme_mpath_clear_current_path(ns);
+ break;
default:
/*
* Reset the controller for any non-ANA error as we don't know
@@ -110,29 +117,55 @@ static const char *nvme_ana_state_names[] = {
[NVME_ANA_CHANGE] = "change",
};
-static struct nvme_ns *__nvme_find_path(struct nvme_ns_head *head)
+void nvme_mpath_clear_current_path(struct nvme_ns *ns)
+{
+ struct nvme_ns_head *head = ns->head;
+ int node;
+
+ if (!head)
+ return;
+
+ for_each_node(node) {
+ if (ns == rcu_access_pointer(head->current_path[node]))
+ rcu_assign_pointer(head->current_path[node], NULL);
+ }
+}
+
+static struct nvme_ns *__nvme_find_path(struct nvme_ns_head *head, int node)
{
- struct nvme_ns *ns, *fallback = NULL;
+ int found_distance = INT_MAX, fallback_distance = INT_MAX, distance;
+ struct nvme_ns *found = NULL, *fallback = NULL, *ns;
list_for_each_entry_rcu(ns, &head->list, siblings) {
if (ns->ctrl->state != NVME_CTRL_LIVE ||
test_bit(NVME_NS_ANA_PENDING, &ns->flags))
continue;
+
+ distance = node_distance(node, dev_to_node(ns->ctrl->dev));
+
switch (ns->ana_state) {
case NVME_ANA_OPTIMIZED:
- rcu_assign_pointer(head->current_path, ns);
- return ns;
+ if (distance < found_distance) {
+ found_distance = distance;
+ found = ns;
+ }
+ break;
case NVME_ANA_NONOPTIMIZED:
- fallback = ns;
+ if (distance < fallback_distance) {
+ fallback_distance = distance;
+ fallback = ns;
+ }
break;
default:
break;
}
}
- if (fallback)
- rcu_assign_pointer(head->current_path, fallback);
- return fallback;
+ if (!found)
+ found = fallback;
+ if (found)
+ rcu_assign_pointer(head->current_path[node], found);
+ return found;
}
static inline bool nvme_path_is_optimized(struct nvme_ns *ns)
@@ -143,10 +176,12 @@ static inline bool nvme_path_is_optimized(struct nvme_ns *ns)
inline struct nvme_ns *nvme_find_path(struct nvme_ns_head *head)
{
- struct nvme_ns *ns = srcu_dereference(head->current_path, &head->srcu);
+ int node = numa_node_id();
+ struct nvme_ns *ns;
+ ns = srcu_dereference(head->current_path[node], &head->srcu);
if (unlikely(!ns || !nvme_path_is_optimized(ns)))
- ns = __nvme_find_path(head);
+ ns = __nvme_find_path(head, node);
return ns;
}
@@ -193,7 +228,7 @@ static bool nvme_ns_head_poll(struct request_queue *q, blk_qc_t qc)
int srcu_idx;
srcu_idx = srcu_read_lock(&head->srcu);
- ns = srcu_dereference(head->current_path, &head->srcu);
+ ns = srcu_dereference(head->current_path[numa_node_id()], &head->srcu);
if (likely(ns && nvme_path_is_optimized(ns)))
found = ns->queue->poll_fn(q, qc);
srcu_read_unlock(&head->srcu, srcu_idx);
@@ -282,12 +317,17 @@ static void nvme_mpath_set_live(struct nvme_ns *ns)
if (!head->disk)
return;
- if (!(head->disk->flags & GENHD_FL_UP)) {
- device_add_disk(&head->subsys->dev, head->disk);
- if (sysfs_create_group(&disk_to_dev(head->disk)->kobj,
- &nvme_ns_id_attr_group))
- dev_warn(&head->subsys->dev,
- "failed to create id group.\n");
+ if (!(head->disk->flags & GENHD_FL_UP))
+ device_add_disk(&head->subsys->dev, head->disk,
+ nvme_ns_id_attr_groups);
+
+ if (nvme_path_is_optimized(ns)) {
+ int node, srcu_idx;
+
+ srcu_idx = srcu_read_lock(&head->srcu);
+ for_each_node(node)
+ __nvme_find_path(head, node);
+ srcu_read_unlock(&head->srcu, srcu_idx);
}
kblockd_schedule_work(&ns->head->requeue_work);
@@ -494,11 +534,8 @@ void nvme_mpath_remove_disk(struct nvme_ns_head *head)
{
if (!head->disk)
return;
- if (head->disk->flags & GENHD_FL_UP) {
- sysfs_remove_group(&disk_to_dev(head->disk)->kobj,
- &nvme_ns_id_attr_group);
+ if (head->disk->flags & GENHD_FL_UP)
del_gendisk(head->disk);
- }
blk_set_queue_dying(head->disk->queue);
/* make sure all pending bios are cleaned up */
kblockd_schedule_work(&head->requeue_work);
@@ -537,8 +574,10 @@ int nvme_mpath_init(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id)
INIT_WORK(&ctrl->ana_work, nvme_ana_work);
ctrl->ana_log_buf = kmalloc(ctrl->ana_log_size, GFP_KERNEL);
- if (!ctrl->ana_log_buf)
+ if (!ctrl->ana_log_buf) {
+ error = -ENOMEM;
goto out;
+ }
error = nvme_read_ana_log(ctrl, true);
if (error)
@@ -547,7 +586,7 @@ int nvme_mpath_init(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id)
out_free_ana_log_buf:
kfree(ctrl->ana_log_buf);
out:
- return -ENOMEM;
+ return error;
}
void nvme_mpath_uninit(struct nvme_ctrl *ctrl)
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index bb4a2003c097..cee79cb388af 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -277,14 +277,6 @@ struct nvme_ns_ids {
* only ever has a single entry for private namespaces.
*/
struct nvme_ns_head {
-#ifdef CONFIG_NVME_MULTIPATH
- struct gendisk *disk;
- struct nvme_ns __rcu *current_path;
- struct bio_list requeue_list;
- spinlock_t requeue_lock;
- struct work_struct requeue_work;
- struct mutex lock;
-#endif
struct list_head list;
struct srcu_struct srcu;
struct nvme_subsystem *subsys;
@@ -293,6 +285,14 @@ struct nvme_ns_head {
struct list_head entry;
struct kref ref;
int instance;
+#ifdef CONFIG_NVME_MULTIPATH
+ struct gendisk *disk;
+ struct bio_list requeue_list;
+ spinlock_t requeue_lock;
+ struct work_struct requeue_work;
+ struct mutex lock;
+ struct nvme_ns __rcu *current_path[];
+#endif
};
#ifdef CONFIG_FAULT_INJECTION_DEBUG_FS
@@ -343,6 +343,7 @@ struct nvme_ctrl_ops {
unsigned int flags;
#define NVME_F_FABRICS (1 << 0)
#define NVME_F_METADATA_SUPPORTED (1 << 1)
+#define NVME_F_PCI_P2PDMA (1 << 2)
int (*reg_read32)(struct nvme_ctrl *ctrl, u32 off, u32 *val);
int (*reg_write32)(struct nvme_ctrl *ctrl, u32 off, u32 val);
int (*reg_read64)(struct nvme_ctrl *ctrl, u32 off, u64 *val);
@@ -459,7 +460,7 @@ int nvme_delete_ctrl_sync(struct nvme_ctrl *ctrl);
int nvme_get_log(struct nvme_ctrl *ctrl, u32 nsid, u8 log_page, u8 lsp,
void *log, size_t size, u64 offset);
-extern const struct attribute_group nvme_ns_id_attr_group;
+extern const struct attribute_group *nvme_ns_id_attr_groups[];
extern const struct block_device_operations nvme_ns_head_ops;
#ifdef CONFIG_NVME_MULTIPATH
@@ -474,14 +475,7 @@ void nvme_mpath_remove_disk(struct nvme_ns_head *head);
int nvme_mpath_init(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id);
void nvme_mpath_uninit(struct nvme_ctrl *ctrl);
void nvme_mpath_stop(struct nvme_ctrl *ctrl);
-
-static inline void nvme_mpath_clear_current_path(struct nvme_ns *ns)
-{
- struct nvme_ns_head *head = ns->head;
-
- if (head && ns == rcu_access_pointer(head->current_path))
- rcu_assign_pointer(head->current_path, NULL);
-}
+void nvme_mpath_clear_current_path(struct nvme_ns *ns);
struct nvme_ns *nvme_find_path(struct nvme_ns_head *head);
static inline void nvme_mpath_check_last_path(struct nvme_ns *ns)
@@ -551,8 +545,7 @@ static inline void nvme_mpath_stop(struct nvme_ctrl *ctrl)
void nvme_nvm_update_nvm_info(struct nvme_ns *ns);
int nvme_nvm_register(struct nvme_ns *ns, char *disk_name, int node);
void nvme_nvm_unregister(struct nvme_ns *ns);
-int nvme_nvm_register_sysfs(struct nvme_ns *ns);
-void nvme_nvm_unregister_sysfs(struct nvme_ns *ns);
+extern const struct attribute_group nvme_nvm_attr_group;
int nvme_nvm_ioctl(struct nvme_ns *ns, unsigned int cmd, unsigned long arg);
#else
static inline void nvme_nvm_update_nvm_info(struct nvme_ns *ns) {};
@@ -563,11 +556,6 @@ static inline int nvme_nvm_register(struct nvme_ns *ns, char *disk_name,
}
static inline void nvme_nvm_unregister(struct nvme_ns *ns) {};
-static inline int nvme_nvm_register_sysfs(struct nvme_ns *ns)
-{
- return 0;
-}
-static inline void nvme_nvm_unregister_sysfs(struct nvme_ns *ns) {};
static inline int nvme_nvm_ioctl(struct nvme_ns *ns, unsigned int cmd,
unsigned long arg)
{
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index d668682f91df..f30031945ee4 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -30,6 +30,7 @@
#include <linux/types.h>
#include <linux/io-64-nonatomic-lo-hi.h>
#include <linux/sed-opal.h>
+#include <linux/pci-p2pdma.h>
#include "nvme.h"
@@ -99,9 +100,8 @@ struct nvme_dev {
struct work_struct remove_work;
struct mutex shutdown_lock;
bool subsystem;
- void __iomem *cmb;
- pci_bus_addr_t cmb_bus_addr;
u64 cmb_size;
+ bool cmb_use_sqes;
u32 cmbsz;
u32 cmbloc;
struct nvme_ctrl ctrl;
@@ -158,7 +158,7 @@ struct nvme_queue {
struct nvme_dev *dev;
spinlock_t sq_lock;
struct nvme_command *sq_cmds;
- struct nvme_command __iomem *sq_cmds_io;
+ bool sq_cmds_is_io;
spinlock_t cq_lock ____cacheline_aligned_in_smp;
volatile struct nvme_completion *cqes;
struct blk_mq_tags **tags;
@@ -447,11 +447,8 @@ static int nvme_pci_map_queues(struct blk_mq_tag_set *set)
static void nvme_submit_cmd(struct nvme_queue *nvmeq, struct nvme_command *cmd)
{
spin_lock(&nvmeq->sq_lock);
- if (nvmeq->sq_cmds_io)
- memcpy_toio(&nvmeq->sq_cmds_io[nvmeq->sq_tail], cmd,
- sizeof(*cmd));
- else
- memcpy(&nvmeq->sq_cmds[nvmeq->sq_tail], cmd, sizeof(*cmd));
+
+ memcpy(&nvmeq->sq_cmds[nvmeq->sq_tail], cmd, sizeof(*cmd));
if (++nvmeq->sq_tail == nvmeq->q_depth)
nvmeq->sq_tail = 0;
@@ -748,8 +745,13 @@ static blk_status_t nvme_map_data(struct nvme_dev *dev, struct request *req,
goto out;
ret = BLK_STS_RESOURCE;
- nr_mapped = dma_map_sg_attrs(dev->dev, iod->sg, iod->nents, dma_dir,
- DMA_ATTR_NO_WARN);
+
+ if (is_pci_p2pdma_page(sg_page(iod->sg)))
+ nr_mapped = pci_p2pdma_map_sg(dev->dev, iod->sg, iod->nents,
+ dma_dir);
+ else
+ nr_mapped = dma_map_sg_attrs(dev->dev, iod->sg, iod->nents,
+ dma_dir, DMA_ATTR_NO_WARN);
if (!nr_mapped)
goto out;
@@ -772,10 +774,10 @@ static blk_status_t nvme_map_data(struct nvme_dev *dev, struct request *req,
if (!dma_map_sg(dev->dev, &iod->meta_sg, 1, dma_dir))
goto out_unmap;
- }
- if (blk_integrity_rq(req))
cmnd->rw.metadata = cpu_to_le64(sg_dma_address(&iod->meta_sg));
+ }
+
return BLK_STS_OK;
out_unmap:
@@ -791,7 +793,10 @@ static void nvme_unmap_data(struct nvme_dev *dev, struct request *req)
DMA_TO_DEVICE : DMA_FROM_DEVICE;
if (iod->nents) {
- dma_unmap_sg(dev->dev, iod->sg, iod->nents, dma_dir);
+ /* P2PDMA requests do not need to be unmapped */
+ if (!is_pci_p2pdma_page(sg_page(iod->sg)))
+ dma_unmap_sg(dev->dev, iod->sg, iod->nents, dma_dir);
+
if (blk_integrity_rq(req))
dma_unmap_sg(dev->dev, &iod->meta_sg, 1, dma_dir);
}
@@ -1232,9 +1237,18 @@ static void nvme_free_queue(struct nvme_queue *nvmeq)
{
dma_free_coherent(nvmeq->q_dmadev, CQ_SIZE(nvmeq->q_depth),
(void *)nvmeq->cqes, nvmeq->cq_dma_addr);
- if (nvmeq->sq_cmds)
- dma_free_coherent(nvmeq->q_dmadev, SQ_SIZE(nvmeq->q_depth),
- nvmeq->sq_cmds, nvmeq->sq_dma_addr);
+
+ if (nvmeq->sq_cmds) {
+ if (nvmeq->sq_cmds_is_io)
+ pci_free_p2pmem(to_pci_dev(nvmeq->q_dmadev),
+ nvmeq->sq_cmds,
+ SQ_SIZE(nvmeq->q_depth));
+ else
+ dma_free_coherent(nvmeq->q_dmadev,
+ SQ_SIZE(nvmeq->q_depth),
+ nvmeq->sq_cmds,
+ nvmeq->sq_dma_addr);
+ }
}
static void nvme_free_queues(struct nvme_dev *dev, int lowest)
@@ -1249,7 +1263,7 @@ static void nvme_free_queues(struct nvme_dev *dev, int lowest)
/**
* nvme_suspend_queue - put queue into suspended state
- * @nvmeq - queue to suspend
+ * @nvmeq: queue to suspend
*/
static int nvme_suspend_queue(struct nvme_queue *nvmeq)
{
@@ -1323,12 +1337,21 @@ static int nvme_cmb_qdepth(struct nvme_dev *dev, int nr_io_queues,
static int nvme_alloc_sq_cmds(struct nvme_dev *dev, struct nvme_queue *nvmeq,
int qid, int depth)
{
- /* CMB SQEs will be mapped before creation */
- if (qid && dev->cmb && use_cmb_sqes && (dev->cmbsz & NVME_CMBSZ_SQS))
- return 0;
+ struct pci_dev *pdev = to_pci_dev(dev->dev);
+
+ if (qid && dev->cmb_use_sqes && (dev->cmbsz & NVME_CMBSZ_SQS)) {
+ nvmeq->sq_cmds = pci_alloc_p2pmem(pdev, SQ_SIZE(depth));
+ nvmeq->sq_dma_addr = pci_p2pmem_virt_to_bus(pdev,
+ nvmeq->sq_cmds);
+ nvmeq->sq_cmds_is_io = true;
+ }
+
+ if (!nvmeq->sq_cmds) {
+ nvmeq->sq_cmds = dma_alloc_coherent(dev->dev, SQ_SIZE(depth),
+ &nvmeq->sq_dma_addr, GFP_KERNEL);
+ nvmeq->sq_cmds_is_io = false;
+ }
- nvmeq->sq_cmds = dma_alloc_coherent(dev->dev, SQ_SIZE(depth),
- &nvmeq->sq_dma_addr, GFP_KERNEL);
if (!nvmeq->sq_cmds)
return -ENOMEM;
return 0;
@@ -1405,13 +1428,6 @@ static int nvme_create_queue(struct nvme_queue *nvmeq, int qid)
int result;
s16 vector;
- if (dev->cmb && use_cmb_sqes && (dev->cmbsz & NVME_CMBSZ_SQS)) {
- unsigned offset = (qid - 1) * roundup(SQ_SIZE(nvmeq->q_depth),
- dev->ctrl.page_size);
- nvmeq->sq_dma_addr = dev->cmb_bus_addr + offset;
- nvmeq->sq_cmds_io = dev->cmb + offset;
- }
-
/*
* A queue's vector matches the queue identifier unless the controller
* has only one vector available.
@@ -1652,9 +1668,6 @@ static void nvme_map_cmb(struct nvme_dev *dev)
return;
dev->cmbloc = readl(dev->bar + NVME_REG_CMBLOC);
- if (!use_cmb_sqes)
- return;
-
size = nvme_cmb_size_unit(dev) * nvme_cmb_size(dev);
offset = nvme_cmb_size_unit(dev) * NVME_CMB_OFST(dev->cmbloc);
bar = NVME_CMB_BIR(dev->cmbloc);
@@ -1671,11 +1684,18 @@ static void nvme_map_cmb(struct nvme_dev *dev)
if (size > bar_size - offset)
size = bar_size - offset;
- dev->cmb = ioremap_wc(pci_resource_start(pdev, bar) + offset, size);
- if (!dev->cmb)
+ if (pci_p2pdma_add_resource(pdev, bar, size, offset)) {
+ dev_warn(dev->ctrl.device,
+ "failed to register the CMB\n");
return;
- dev->cmb_bus_addr = pci_bus_address(pdev, bar) + offset;
+ }
+
dev->cmb_size = size;
+ dev->cmb_use_sqes = use_cmb_sqes && (dev->cmbsz & NVME_CMBSZ_SQS);
+
+ if ((dev->cmbsz & (NVME_CMBSZ_WDS | NVME_CMBSZ_RDS)) ==
+ (NVME_CMBSZ_WDS | NVME_CMBSZ_RDS))
+ pci_p2pmem_publish(pdev, true);
if (sysfs_add_file_to_group(&dev->ctrl.device->kobj,
&dev_attr_cmb.attr, NULL))
@@ -1685,12 +1705,10 @@ static void nvme_map_cmb(struct nvme_dev *dev)
static inline void nvme_release_cmb(struct nvme_dev *dev)
{
- if (dev->cmb) {
- iounmap(dev->cmb);
- dev->cmb = NULL;
+ if (dev->cmb_size) {
sysfs_remove_file_from_group(&dev->ctrl.device->kobj,
&dev_attr_cmb.attr, NULL);
- dev->cmbsz = 0;
+ dev->cmb_size = 0;
}
}
@@ -1889,13 +1907,13 @@ static int nvme_setup_io_queues(struct nvme_dev *dev)
if (nr_io_queues == 0)
return 0;
- if (dev->cmb && (dev->cmbsz & NVME_CMBSZ_SQS)) {
+ if (dev->cmb_use_sqes) {
result = nvme_cmb_qdepth(dev, nr_io_queues,
sizeof(struct nvme_command));
if (result > 0)
dev->q_depth = result;
else
- nvme_release_cmb(dev);
+ dev->cmb_use_sqes = false;
}
do {
@@ -2390,7 +2408,8 @@ static int nvme_pci_get_address(struct nvme_ctrl *ctrl, char *buf, int size)
static const struct nvme_ctrl_ops nvme_pci_ctrl_ops = {
.name = "pcie",
.module = THIS_MODULE,
- .flags = NVME_F_METADATA_SUPPORTED,
+ .flags = NVME_F_METADATA_SUPPORTED |
+ NVME_F_PCI_P2PDMA,
.reg_read32 = nvme_pci_reg_read32,
.reg_write32 = nvme_pci_reg_write32,
.reg_read64 = nvme_pci_reg_read64,
@@ -2564,13 +2583,12 @@ static void nvme_remove(struct pci_dev *pdev)
struct nvme_dev *dev = pci_get_drvdata(pdev);
nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_DELETING);
-
- cancel_work_sync(&dev->ctrl.reset_work);
pci_set_drvdata(pdev, NULL);
if (!pci_device_is_present(pdev)) {
nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_DEAD);
nvme_dev_disable(dev, true);
+ nvme_dev_remove_admin(dev);
}
flush_work(&dev->ctrl.reset_work);
@@ -2649,7 +2667,6 @@ static void nvme_error_resume(struct pci_dev *pdev)
struct nvme_dev *dev = pci_get_drvdata(pdev);
flush_work(&dev->ctrl.reset_work);
- pci_cleanup_aer_uncorrect_error_status(pdev);
}
static const struct pci_error_handlers nvme_err_handler = {
diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
index dc042017c293..d181cafedc58 100644
--- a/drivers/nvme/host/rdma.c
+++ b/drivers/nvme/host/rdma.c
@@ -233,8 +233,15 @@ static void nvme_rdma_qp_event(struct ib_event *event, void *context)
static int nvme_rdma_wait_for_cm(struct nvme_rdma_queue *queue)
{
- wait_for_completion_interruptible_timeout(&queue->cm_done,
+ int ret;
+
+ ret = wait_for_completion_interruptible_timeout(&queue->cm_done,
msecs_to_jiffies(NVME_RDMA_CONNECT_TIMEOUT_MS) + 1);
+ if (ret < 0)
+ return ret;
+ if (ret == 0)
+ return -ETIMEDOUT;
+ WARN_ON_ONCE(queue->cm_error > 0);
return queue->cm_error;
}
@@ -1849,54 +1856,6 @@ static const struct nvme_ctrl_ops nvme_rdma_ctrl_ops = {
.stop_ctrl = nvme_rdma_stop_ctrl,
};
-static inline bool
-__nvme_rdma_options_match(struct nvme_rdma_ctrl *ctrl,
- struct nvmf_ctrl_options *opts)
-{
- char *stdport = __stringify(NVME_RDMA_IP_PORT);
-
-
- if (!nvmf_ctlr_matches_baseopts(&ctrl->ctrl, opts) ||
- strcmp(opts->traddr, ctrl->ctrl.opts->traddr))
- return false;
-
- if (opts->mask & NVMF_OPT_TRSVCID &&
- ctrl->ctrl.opts->mask & NVMF_OPT_TRSVCID) {
- if (strcmp(opts->trsvcid, ctrl->ctrl.opts->trsvcid))
- return false;
- } else if (opts->mask & NVMF_OPT_TRSVCID) {
- if (strcmp(opts->trsvcid, stdport))
- return false;
- } else if (ctrl->ctrl.opts->mask & NVMF_OPT_TRSVCID) {
- if (strcmp(stdport, ctrl->ctrl.opts->trsvcid))
- return false;
- }
- /* else, it's a match as both have stdport. Fall to next checks */
-
- /*
- * checking the local address is rough. In most cases, one
- * is not specified and the host port is selected by the stack.
- *
- * Assume no match if:
- * local address is specified and address is not the same
- * local address is not specified but remote is, or vice versa
- * (admin using specific host_traddr when it matters).
- */
- if (opts->mask & NVMF_OPT_HOST_TRADDR &&
- ctrl->ctrl.opts->mask & NVMF_OPT_HOST_TRADDR) {
- if (strcmp(opts->host_traddr, ctrl->ctrl.opts->host_traddr))
- return false;
- } else if (opts->mask & NVMF_OPT_HOST_TRADDR ||
- ctrl->ctrl.opts->mask & NVMF_OPT_HOST_TRADDR)
- return false;
- /*
- * if neither controller had an host port specified, assume it's
- * a match as everything else matched.
- */
-
- return true;
-}
-
/*
* Fails a connection request if it matches an existing controller
* (association) with the same tuple:
@@ -1917,7 +1876,7 @@ nvme_rdma_existing_controller(struct nvmf_ctrl_options *opts)
mutex_lock(&nvme_rdma_ctrl_mutex);
list_for_each_entry(ctrl, &nvme_rdma_ctrl_list, list) {
- found = __nvme_rdma_options_match(ctrl, opts);
+ found = nvmf_ip_options_match(&ctrl->ctrl, opts);
if (found)
break;
}
@@ -1932,7 +1891,6 @@ static struct nvme_ctrl *nvme_rdma_create_ctrl(struct device *dev,
struct nvme_rdma_ctrl *ctrl;
int ret;
bool changed;
- char *port;
ctrl = kzalloc(sizeof(*ctrl), GFP_KERNEL);
if (!ctrl)
@@ -1940,15 +1898,21 @@ static struct nvme_ctrl *nvme_rdma_create_ctrl(struct device *dev,
ctrl->ctrl.opts = opts;
INIT_LIST_HEAD(&ctrl->list);
- if (opts->mask & NVMF_OPT_TRSVCID)
- port = opts->trsvcid;
- else
- port = __stringify(NVME_RDMA_IP_PORT);
+ if (!(opts->mask & NVMF_OPT_TRSVCID)) {
+ opts->trsvcid =
+ kstrdup(__stringify(NVME_RDMA_IP_PORT), GFP_KERNEL);
+ if (!opts->trsvcid) {
+ ret = -ENOMEM;
+ goto out_free_ctrl;
+ }
+ opts->mask |= NVMF_OPT_TRSVCID;
+ }
ret = inet_pton_with_scope(&init_net, AF_UNSPEC,
- opts->traddr, port, &ctrl->addr);
+ opts->traddr, opts->trsvcid, &ctrl->addr);
if (ret) {
- pr_err("malformed address passed: %s:%s\n", opts->traddr, port);
+ pr_err("malformed address passed: %s:%s\n",
+ opts->traddr, opts->trsvcid);
goto out_free_ctrl;
}
diff --git a/drivers/nvme/host/trace.h b/drivers/nvme/host/trace.h
index a490790d6691..196d5bd56718 100644
--- a/drivers/nvme/host/trace.h
+++ b/drivers/nvme/host/trace.h
@@ -156,6 +156,34 @@ TRACE_EVENT(nvme_complete_rq,
);
+#define aer_name(aer) { aer, #aer }
+
+TRACE_EVENT(nvme_async_event,
+ TP_PROTO(struct nvme_ctrl *ctrl, u32 result),
+ TP_ARGS(ctrl, result),
+ TP_STRUCT__entry(
+ __field(int, ctrl_id)
+ __field(u32, result)
+ ),
+ TP_fast_assign(
+ __entry->ctrl_id = ctrl->instance;
+ __entry->result = result;
+ ),
+ TP_printk("nvme%d: NVME_AEN=%#08x [%s]",
+ __entry->ctrl_id, __entry->result,
+ __print_symbolic(__entry->result,
+ aer_name(NVME_AER_NOTICE_NS_CHANGED),
+ aer_name(NVME_AER_NOTICE_ANA),
+ aer_name(NVME_AER_NOTICE_FW_ACT_STARTING),
+ aer_name(NVME_AER_ERROR),
+ aer_name(NVME_AER_SMART),
+ aer_name(NVME_AER_CSS),
+ aer_name(NVME_AER_VS))
+ )
+);
+
+#undef aer_name
+
#endif /* _TRACE_NVME_H */
#undef TRACE_INCLUDE_PATH
diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c
index a21caea1e080..1179f6314323 100644
--- a/drivers/nvme/target/admin-cmd.c
+++ b/drivers/nvme/target/admin-cmd.c
@@ -58,7 +58,7 @@ static u16 nvmet_get_smart_log_nsid(struct nvmet_req *req,
ns = nvmet_find_namespace(req->sq->ctrl, req->cmd->get_log_page.nsid);
if (!ns) {
- pr_err("nvmet : Could not find namespace id : %d\n",
+ pr_err("Could not find namespace id : %d\n",
le32_to_cpu(req->cmd->get_log_page.nsid));
return NVME_SC_INVALID_NS;
}
@@ -245,6 +245,10 @@ static void nvmet_execute_get_log_page_ana(struct nvmet_req *req)
offset += len;
ngrps++;
}
+ for ( ; grpid <= NVMET_MAX_ANAGRPS; grpid++) {
+ if (nvmet_ana_group_enabled[grpid])
+ ngrps++;
+ }
hdr.chgcnt = cpu_to_le64(nvmet_ana_chgcnt);
hdr.ngrps = cpu_to_le16(ngrps);
@@ -349,7 +353,7 @@ static void nvmet_execute_identify_ctrl(struct nvmet_req *req)
if (req->port->inline_data_size)
id->sgls |= cpu_to_le32(1 << 20);
- strcpy(id->subnqn, ctrl->subsys->subsysnqn);
+ strlcpy(id->subnqn, ctrl->subsys->subsysnqn, sizeof(id->subnqn));
/* Max command capsule size is sqe + single page of in-capsule data */
id->ioccsz = cpu_to_le32((sizeof(struct nvme_command) +
diff --git a/drivers/nvme/target/configfs.c b/drivers/nvme/target/configfs.c
index b37a8e3e3f80..d895579b6c5d 100644
--- a/drivers/nvme/target/configfs.c
+++ b/drivers/nvme/target/configfs.c
@@ -17,6 +17,8 @@
#include <linux/slab.h>
#include <linux/stat.h>
#include <linux/ctype.h>
+#include <linux/pci.h>
+#include <linux/pci-p2pdma.h>
#include "nvmet.h"
@@ -340,6 +342,48 @@ out_unlock:
CONFIGFS_ATTR(nvmet_ns_, device_path);
+#ifdef CONFIG_PCI_P2PDMA
+static ssize_t nvmet_ns_p2pmem_show(struct config_item *item, char *page)
+{
+ struct nvmet_ns *ns = to_nvmet_ns(item);
+
+ return pci_p2pdma_enable_show(page, ns->p2p_dev, ns->use_p2pmem);
+}
+
+static ssize_t nvmet_ns_p2pmem_store(struct config_item *item,
+ const char *page, size_t count)
+{
+ struct nvmet_ns *ns = to_nvmet_ns(item);
+ struct pci_dev *p2p_dev = NULL;
+ bool use_p2pmem;
+ int ret = count;
+ int error;
+
+ mutex_lock(&ns->subsys->lock);
+ if (ns->enabled) {
+ ret = -EBUSY;
+ goto out_unlock;
+ }
+
+ error = pci_p2pdma_enable_store(page, &p2p_dev, &use_p2pmem);
+ if (error) {
+ ret = error;
+ goto out_unlock;
+ }
+
+ ns->use_p2pmem = use_p2pmem;
+ pci_dev_put(ns->p2p_dev);
+ ns->p2p_dev = p2p_dev;
+
+out_unlock:
+ mutex_unlock(&ns->subsys->lock);
+
+ return ret;
+}
+
+CONFIGFS_ATTR(nvmet_ns_, p2pmem);
+#endif /* CONFIG_PCI_P2PDMA */
+
static ssize_t nvmet_ns_device_uuid_show(struct config_item *item, char *page)
{
return sprintf(page, "%pUb\n", &to_nvmet_ns(item)->uuid);
@@ -509,6 +553,9 @@ static struct configfs_attribute *nvmet_ns_attrs[] = {
&nvmet_ns_attr_ana_grpid,
&nvmet_ns_attr_enable,
&nvmet_ns_attr_buffered_io,
+#ifdef CONFIG_PCI_P2PDMA
+ &nvmet_ns_attr_p2pmem,
+#endif
NULL,
};
diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c
index b5ec96abd048..f4efe289dc7b 100644
--- a/drivers/nvme/target/core.c
+++ b/drivers/nvme/target/core.c
@@ -15,6 +15,7 @@
#include <linux/module.h>
#include <linux/random.h>
#include <linux/rculist.h>
+#include <linux/pci-p2pdma.h>
#include "nvmet.h"
@@ -365,9 +366,93 @@ static void nvmet_ns_dev_disable(struct nvmet_ns *ns)
nvmet_file_ns_disable(ns);
}
+static int nvmet_p2pmem_ns_enable(struct nvmet_ns *ns)
+{
+ int ret;
+ struct pci_dev *p2p_dev;
+
+ if (!ns->use_p2pmem)
+ return 0;
+
+ if (!ns->bdev) {
+ pr_err("peer-to-peer DMA is not supported by non-block device namespaces\n");
+ return -EINVAL;
+ }
+
+ if (!blk_queue_pci_p2pdma(ns->bdev->bd_queue)) {
+ pr_err("peer-to-peer DMA is not supported by the driver of %s\n",
+ ns->device_path);
+ return -EINVAL;
+ }
+
+ if (ns->p2p_dev) {
+ ret = pci_p2pdma_distance(ns->p2p_dev, nvmet_ns_dev(ns), true);
+ if (ret < 0)
+ return -EINVAL;
+ } else {
+ /*
+ * Right now we just check that there is p2pmem available so
+ * we can report an error to the user right away if there
+ * is not. We'll find the actual device to use once we
+ * setup the controller when the port's device is available.
+ */
+
+ p2p_dev = pci_p2pmem_find(nvmet_ns_dev(ns));
+ if (!p2p_dev) {
+ pr_err("no peer-to-peer memory is available for %s\n",
+ ns->device_path);
+ return -EINVAL;
+ }
+
+ pci_dev_put(p2p_dev);
+ }
+
+ return 0;
+}
+
+/*
+ * Note: ctrl->subsys->lock should be held when calling this function
+ */
+static void nvmet_p2pmem_ns_add_p2p(struct nvmet_ctrl *ctrl,
+ struct nvmet_ns *ns)
+{
+ struct device *clients[2];
+ struct pci_dev *p2p_dev;
+ int ret;
+
+ if (!ctrl->p2p_client)
+ return;
+
+ if (ns->p2p_dev) {
+ ret = pci_p2pdma_distance(ns->p2p_dev, ctrl->p2p_client, true);
+ if (ret < 0)
+ return;
+
+ p2p_dev = pci_dev_get(ns->p2p_dev);
+ } else {
+ clients[0] = ctrl->p2p_client;
+ clients[1] = nvmet_ns_dev(ns);
+
+ p2p_dev = pci_p2pmem_find_many(clients, ARRAY_SIZE(clients));
+ if (!p2p_dev) {
+ pr_err("no peer-to-peer memory is available that's supported by %s and %s\n",
+ dev_name(ctrl->p2p_client), ns->device_path);
+ return;
+ }
+ }
+
+ ret = radix_tree_insert(&ctrl->p2p_ns_map, ns->nsid, p2p_dev);
+ if (ret < 0)
+ pci_dev_put(p2p_dev);
+
+ pr_info("using p2pmem on %s for nsid %d\n", pci_name(p2p_dev),
+ ns->nsid);
+}
+
int nvmet_ns_enable(struct nvmet_ns *ns)
{
struct nvmet_subsys *subsys = ns->subsys;
+ struct nvmet_ctrl *ctrl;
int ret;
mutex_lock(&subsys->lock);
@@ -384,6 +469,13 @@ int nvmet_ns_enable(struct nvmet_ns *ns)
if (ret)
goto out_unlock;
+ ret = nvmet_p2pmem_ns_enable(ns);
+ if (ret)
+ goto out_unlock;
+
+ list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry)
+ nvmet_p2pmem_ns_add_p2p(ctrl, ns);
+
ret = percpu_ref_init(&ns->ref, nvmet_destroy_namespace,
0, GFP_KERNEL);
if (ret)
@@ -418,6 +510,9 @@ out_unlock:
mutex_unlock(&subsys->lock);
return ret;
out_dev_put:
+ list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry)
+ pci_dev_put(radix_tree_delete(&ctrl->p2p_ns_map, ns->nsid));
+
nvmet_ns_dev_disable(ns);
goto out_unlock;
}
@@ -425,6 +520,7 @@ out_dev_put:
void nvmet_ns_disable(struct nvmet_ns *ns)
{
struct nvmet_subsys *subsys = ns->subsys;
+ struct nvmet_ctrl *ctrl;
mutex_lock(&subsys->lock);
if (!ns->enabled)
@@ -434,6 +530,10 @@ void nvmet_ns_disable(struct nvmet_ns *ns)
list_del_rcu(&ns->dev_link);
if (ns->nsid == subsys->max_nsid)
subsys->max_nsid = nvmet_max_nsid(subsys);
+
+ list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry)
+ pci_dev_put(radix_tree_delete(&ctrl->p2p_ns_map, ns->nsid));
+
mutex_unlock(&subsys->lock);
/*
@@ -450,6 +550,7 @@ void nvmet_ns_disable(struct nvmet_ns *ns)
percpu_ref_exit(&ns->ref);
mutex_lock(&subsys->lock);
+
subsys->nr_namespaces--;
nvmet_ns_changed(subsys, ns->nsid);
nvmet_ns_dev_disable(ns);
@@ -725,6 +826,51 @@ void nvmet_req_execute(struct nvmet_req *req)
}
EXPORT_SYMBOL_GPL(nvmet_req_execute);
+int nvmet_req_alloc_sgl(struct nvmet_req *req)
+{
+ struct pci_dev *p2p_dev = NULL;
+
+ if (IS_ENABLED(CONFIG_PCI_P2PDMA)) {
+ if (req->sq->ctrl && req->ns)
+ p2p_dev = radix_tree_lookup(&req->sq->ctrl->p2p_ns_map,
+ req->ns->nsid);
+
+ req->p2p_dev = NULL;
+ if (req->sq->qid && p2p_dev) {
+ req->sg = pci_p2pmem_alloc_sgl(p2p_dev, &req->sg_cnt,
+ req->transfer_len);
+ if (req->sg) {
+ req->p2p_dev = p2p_dev;
+ return 0;
+ }
+ }
+
+ /*
+ * If no P2P memory was available we fallback to using
+ * regular memory
+ */
+ }
+
+ req->sg = sgl_alloc(req->transfer_len, GFP_KERNEL, &req->sg_cnt);
+ if (!req->sg)
+ return -ENOMEM;
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(nvmet_req_alloc_sgl);
+
+void nvmet_req_free_sgl(struct nvmet_req *req)
+{
+ if (req->p2p_dev)
+ pci_p2pmem_free_sgl(req->p2p_dev, req->sg);
+ else
+ sgl_free(req->sg);
+
+ req->sg = NULL;
+ req->sg_cnt = 0;
+}
+EXPORT_SYMBOL_GPL(nvmet_req_free_sgl);
+
static inline bool nvmet_cc_en(u32 cc)
{
return (cc >> NVME_CC_EN_SHIFT) & 0x1;
@@ -921,6 +1067,37 @@ bool nvmet_host_allowed(struct nvmet_req *req, struct nvmet_subsys *subsys,
return __nvmet_host_allowed(subsys, hostnqn);
}
+/*
+ * Note: ctrl->subsys->lock should be held when calling this function
+ */
+static void nvmet_setup_p2p_ns_map(struct nvmet_ctrl *ctrl,
+ struct nvmet_req *req)
+{
+ struct nvmet_ns *ns;
+
+ if (!req->p2p_client)
+ return;
+
+ ctrl->p2p_client = get_device(req->p2p_client);
+
+ list_for_each_entry_rcu(ns, &ctrl->subsys->namespaces, dev_link)
+ nvmet_p2pmem_ns_add_p2p(ctrl, ns);
+}
+
+/*
+ * Note: ctrl->subsys->lock should be held when calling this function
+ */
+static void nvmet_release_p2p_ns_map(struct nvmet_ctrl *ctrl)
+{
+ struct radix_tree_iter iter;
+ void __rcu **slot;
+
+ radix_tree_for_each_slot(slot, &ctrl->p2p_ns_map, &iter, 0)
+ pci_dev_put(radix_tree_deref_slot(slot));
+
+ put_device(ctrl->p2p_client);
+}
+
u16 nvmet_alloc_ctrl(const char *subsysnqn, const char *hostnqn,
struct nvmet_req *req, u32 kato, struct nvmet_ctrl **ctrlp)
{
@@ -962,6 +1139,7 @@ u16 nvmet_alloc_ctrl(const char *subsysnqn, const char *hostnqn,
INIT_WORK(&ctrl->async_event_work, nvmet_async_event_work);
INIT_LIST_HEAD(&ctrl->async_events);
+ INIT_RADIX_TREE(&ctrl->p2p_ns_map, GFP_KERNEL);
memcpy(ctrl->subsysnqn, subsysnqn, NVMF_NQN_SIZE);
memcpy(ctrl->hostnqn, hostnqn, NVMF_NQN_SIZE);
@@ -1026,6 +1204,7 @@ u16 nvmet_alloc_ctrl(const char *subsysnqn, const char *hostnqn,
mutex_lock(&subsys->lock);
list_add_tail(&ctrl->subsys_entry, &subsys->ctrls);
+ nvmet_setup_p2p_ns_map(ctrl, req);
mutex_unlock(&subsys->lock);
*ctrlp = ctrl;
@@ -1053,6 +1232,7 @@ static void nvmet_ctrl_free(struct kref *ref)
struct nvmet_subsys *subsys = ctrl->subsys;
mutex_lock(&subsys->lock);
+ nvmet_release_p2p_ns_map(ctrl);
list_del(&ctrl->subsys_entry);
mutex_unlock(&subsys->lock);
@@ -1105,8 +1285,7 @@ static struct nvmet_subsys *nvmet_find_get_subsys(struct nvmet_port *port,
if (!port)
return NULL;
- if (!strncmp(NVME_DISC_SUBSYS_NAME, subsysnqn,
- NVMF_NQN_SIZE)) {
+ if (!strcmp(NVME_DISC_SUBSYS_NAME, subsysnqn)) {
if (!kref_get_unless_zero(&nvmet_disc_subsys->ref))
return NULL;
return nvmet_disc_subsys;
diff --git a/drivers/nvme/target/discovery.c b/drivers/nvme/target/discovery.c
index eae29f493a07..bc0aa0bf1543 100644
--- a/drivers/nvme/target/discovery.c
+++ b/drivers/nvme/target/discovery.c
@@ -174,7 +174,7 @@ static void nvmet_execute_identify_disc_ctrl(struct nvmet_req *req)
if (req->port->inline_data_size)
id->sgls |= cpu_to_le32(1 << 20);
- strcpy(id->subnqn, ctrl->subsys->subsysnqn);
+ strlcpy(id->subnqn, ctrl->subsys->subsysnqn, sizeof(id->subnqn));
status = nvmet_copy_to_sgl(req, 0, id, sizeof(*id));
@@ -219,12 +219,10 @@ u16 nvmet_parse_discovery_cmd(struct nvmet_req *req)
return NVME_SC_INVALID_OPCODE | NVME_SC_DNR;
}
default:
- pr_err("unsupported cmd %d\n", cmd->common.opcode);
+ pr_err("unhandled cmd %d\n", cmd->common.opcode);
return NVME_SC_INVALID_OPCODE | NVME_SC_DNR;
}
- pr_err("unhandled cmd %d\n", cmd->common.opcode);
- return NVME_SC_INVALID_OPCODE | NVME_SC_DNR;
}
int __init nvmet_init_discovery(void)
diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c
index 29b4b236afd8..409081a03b24 100644
--- a/drivers/nvme/target/fc.c
+++ b/drivers/nvme/target/fc.c
@@ -110,11 +110,19 @@ struct nvmet_fc_tgtport {
struct list_head ls_busylist;
struct list_head assoc_list;
struct ida assoc_cnt;
- struct nvmet_port *port;
+ struct nvmet_fc_port_entry *pe;
struct kref ref;
u32 max_sg_cnt;
};
+struct nvmet_fc_port_entry {
+ struct nvmet_fc_tgtport *tgtport;
+ struct nvmet_port *port;
+ u64 node_name;
+ u64 port_name;
+ struct list_head pe_list;
+};
+
struct nvmet_fc_defer_fcp_req {
struct list_head req_list;
struct nvmefc_tgt_fcp_req *fcp_req;
@@ -132,7 +140,6 @@ struct nvmet_fc_tgt_queue {
atomic_t zrspcnt;
atomic_t rsn;
spinlock_t qlock;
- struct nvmet_port *port;
struct nvmet_cq nvme_cq;
struct nvmet_sq nvme_sq;
struct nvmet_fc_tgt_assoc *assoc;
@@ -221,6 +228,7 @@ static DEFINE_SPINLOCK(nvmet_fc_tgtlock);
static LIST_HEAD(nvmet_fc_target_list);
static DEFINE_IDA(nvmet_fc_tgtport_cnt);
+static LIST_HEAD(nvmet_fc_portentry_list);
static void nvmet_fc_handle_ls_rqst_work(struct work_struct *work);
@@ -645,7 +653,6 @@ nvmet_fc_alloc_target_queue(struct nvmet_fc_tgt_assoc *assoc,
queue->qid = qid;
queue->sqsize = sqsize;
queue->assoc = assoc;
- queue->port = assoc->tgtport->port;
queue->cpu = nvmet_fc_queue_to_cpu(assoc->tgtport, qid);
INIT_LIST_HEAD(&queue->fod_list);
INIT_LIST_HEAD(&queue->avail_defer_list);
@@ -957,6 +964,83 @@ nvmet_fc_find_target_assoc(struct nvmet_fc_tgtport *tgtport,
return ret;
}
+static void
+nvmet_fc_portentry_bind(struct nvmet_fc_tgtport *tgtport,
+ struct nvmet_fc_port_entry *pe,
+ struct nvmet_port *port)
+{
+ lockdep_assert_held(&nvmet_fc_tgtlock);
+
+ pe->tgtport = tgtport;
+ tgtport->pe = pe;
+
+ pe->port = port;
+ port->priv = pe;
+
+ pe->node_name = tgtport->fc_target_port.node_name;
+ pe->port_name = tgtport->fc_target_port.port_name;
+ INIT_LIST_HEAD(&pe->pe_list);
+
+ list_add_tail(&pe->pe_list, &nvmet_fc_portentry_list);
+}
+
+static void
+nvmet_fc_portentry_unbind(struct nvmet_fc_port_entry *pe)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&nvmet_fc_tgtlock, flags);
+ if (pe->tgtport)
+ pe->tgtport->pe = NULL;
+ list_del(&pe->pe_list);
+ spin_unlock_irqrestore(&nvmet_fc_tgtlock, flags);
+}
+
+/*
+ * called when a targetport deregisters. Breaks the relationship
+ * with the nvmet port, but leaves the port_entry in place so that
+ * re-registration can resume operation.
+ */
+static void
+nvmet_fc_portentry_unbind_tgt(struct nvmet_fc_tgtport *tgtport)
+{
+ struct nvmet_fc_port_entry *pe;
+ unsigned long flags;
+
+ spin_lock_irqsave(&nvmet_fc_tgtlock, flags);
+ pe = tgtport->pe;
+ if (pe)
+ pe->tgtport = NULL;
+ tgtport->pe = NULL;
+ spin_unlock_irqrestore(&nvmet_fc_tgtlock, flags);
+}
+
+/*
+ * called when a new targetport is registered. Looks in the
+ * existing nvmet port_entries to see if the nvmet layer is
+ * configured for the targetport's wwn's. (the targetport existed,
+ * nvmet configured, the lldd unregistered the tgtport, and is now
+ * reregistering the same targetport). If so, set the nvmet port
+ * port entry on the targetport.
+ */
+static void
+nvmet_fc_portentry_rebind_tgt(struct nvmet_fc_tgtport *tgtport)
+{
+ struct nvmet_fc_port_entry *pe;
+ unsigned long flags;
+
+ spin_lock_irqsave(&nvmet_fc_tgtlock, flags);
+ list_for_each_entry(pe, &nvmet_fc_portentry_list, pe_list) {
+ if (tgtport->fc_target_port.node_name == pe->node_name &&
+ tgtport->fc_target_port.port_name == pe->port_name) {
+ WARN_ON(pe->tgtport);
+ tgtport->pe = pe;
+ pe->tgtport = tgtport;
+ break;
+ }
+ }
+ spin_unlock_irqrestore(&nvmet_fc_tgtlock, flags);
+}
/**
* nvme_fc_register_targetport - transport entry point called by an
@@ -1034,6 +1118,8 @@ nvmet_fc_register_targetport(struct nvmet_fc_port_info *pinfo,
goto out_free_newrec;
}
+ nvmet_fc_portentry_rebind_tgt(newrec);
+
spin_lock_irqsave(&nvmet_fc_tgtlock, flags);
list_add_tail(&newrec->tgt_list, &nvmet_fc_target_list);
spin_unlock_irqrestore(&nvmet_fc_tgtlock, flags);
@@ -1159,8 +1245,8 @@ nvmet_fc_delete_ctrl(struct nvmet_ctrl *ctrl)
* nvme_fc_unregister_targetport - transport entry point called by an
* LLDD to deregister/remove a previously
* registered a local NVME subsystem FC port.
- * @tgtport: pointer to the (registered) target port that is to be
- * deregistered.
+ * @target_port: pointer to the (registered) target port that is to be
+ * deregistered.
*
* Returns:
* a completion status. Must be 0 upon success; a negative errno
@@ -1171,6 +1257,8 @@ nvmet_fc_unregister_targetport(struct nvmet_fc_target_port *target_port)
{
struct nvmet_fc_tgtport *tgtport = targetport_to_tgtport(target_port);
+ nvmet_fc_portentry_unbind_tgt(tgtport);
+
/* terminate any outstanding associations */
__nvmet_fc_free_assocs(tgtport);
@@ -1661,7 +1749,7 @@ nvmet_fc_handle_ls_rqst_work(struct work_struct *work)
*
* If this routine returns error, the LLDD should abort the exchange.
*
- * @tgtport: pointer to the (registered) target port the LS was
+ * @target_port: pointer to the (registered) target port the LS was
* received on.
* @lsreq: pointer to a lsreq request structure to be used to reference
* the exchange corresponding to the LS.
@@ -2147,7 +2235,7 @@ nvmet_fc_fcp_nvme_cmd_done(struct nvmet_req *nvme_req)
/*
- * Actual processing routine for received FC-NVME LS Requests from the LLD
+ * Actual processing routine for received FC-NVME I/O Requests from the LLD
*/
static void
nvmet_fc_handle_fcp_rqst(struct nvmet_fc_tgtport *tgtport,
@@ -2158,6 +2246,13 @@ nvmet_fc_handle_fcp_rqst(struct nvmet_fc_tgtport *tgtport,
int ret;
/*
+ * if there is no nvmet mapping to the targetport there
+ * shouldn't be requests. just terminate them.
+ */
+ if (!tgtport->pe)
+ goto transport_error;
+
+ /*
* Fused commands are currently not supported in the linux
* implementation.
*
@@ -2184,7 +2279,7 @@ nvmet_fc_handle_fcp_rqst(struct nvmet_fc_tgtport *tgtport,
fod->req.cmd = &fod->cmdiubuf.sqe;
fod->req.rsp = &fod->rspiubuf.cqe;
- fod->req.port = fod->queue->port;
+ fod->req.port = tgtport->pe->port;
/* clear any response payload */
memset(&fod->rspiubuf, 0, sizeof(fod->rspiubuf));
@@ -2468,7 +2563,7 @@ nvme_fc_parse_traddr(struct nvmet_fc_traddr *traddr, char *buf, size_t blen)
substring_t wwn = { name, &name[sizeof(name)-1] };
int nnoffset, pnoffset;
- /* validate it string one of the 2 allowed formats */
+ /* validate if string is one of the 2 allowed formats */
if (strnlen(buf, blen) == NVME_FC_TRADDR_MAXLENGTH &&
!strncmp(buf, "nn-0x", NVME_FC_TRADDR_OXNNLEN) &&
!strncmp(&buf[NVME_FC_TRADDR_MAX_PN_OFFSET],
@@ -2508,6 +2603,7 @@ static int
nvmet_fc_add_port(struct nvmet_port *port)
{
struct nvmet_fc_tgtport *tgtport;
+ struct nvmet_fc_port_entry *pe;
struct nvmet_fc_traddr traddr = { 0L, 0L };
unsigned long flags;
int ret;
@@ -2524,24 +2620,40 @@ nvmet_fc_add_port(struct nvmet_port *port)
if (ret)
return ret;
+ pe = kzalloc(sizeof(*pe), GFP_KERNEL);
+ if (!pe)
+ return -ENOMEM;
+
ret = -ENXIO;
spin_lock_irqsave(&nvmet_fc_tgtlock, flags);
list_for_each_entry(tgtport, &nvmet_fc_target_list, tgt_list) {
if ((tgtport->fc_target_port.node_name == traddr.nn) &&
(tgtport->fc_target_port.port_name == traddr.pn)) {
- tgtport->port = port;
- ret = 0;
+ /* a FC port can only be 1 nvmet port id */
+ if (!tgtport->pe) {
+ nvmet_fc_portentry_bind(tgtport, pe, port);
+ ret = 0;
+ } else
+ ret = -EALREADY;
break;
}
}
spin_unlock_irqrestore(&nvmet_fc_tgtlock, flags);
+
+ if (ret)
+ kfree(pe);
+
return ret;
}
static void
nvmet_fc_remove_port(struct nvmet_port *port)
{
- /* nothing to do */
+ struct nvmet_fc_port_entry *pe = port->priv;
+
+ nvmet_fc_portentry_unbind(pe);
+
+ kfree(pe);
}
static const struct nvmet_fabrics_ops nvmet_fc_tgt_fcp_ops = {
diff --git a/drivers/nvme/target/fcloop.c b/drivers/nvme/target/fcloop.c
index 5251689a1d9a..291f4121f516 100644
--- a/drivers/nvme/target/fcloop.c
+++ b/drivers/nvme/target/fcloop.c
@@ -648,6 +648,7 @@ fcloop_fcp_op(struct nvmet_fc_target_port *tgtport,
break;
/* Fall-Thru to RSP handling */
+ /* FALLTHRU */
case NVMET_FCOP_RSP:
if (fcpreq) {
diff --git a/drivers/nvme/target/io-cmd-bdev.c b/drivers/nvme/target/io-cmd-bdev.c
index 7bc9f6240432..c1ec3475a140 100644
--- a/drivers/nvme/target/io-cmd-bdev.c
+++ b/drivers/nvme/target/io-cmd-bdev.c
@@ -58,7 +58,7 @@ static void nvmet_bio_done(struct bio *bio)
static void nvmet_bdev_execute_rw(struct nvmet_req *req)
{
int sg_cnt = req->sg_cnt;
- struct bio *bio = &req->b.inline_bio;
+ struct bio *bio;
struct scatterlist *sg;
sector_t sector;
blk_qc_t cookie;
@@ -78,10 +78,18 @@ static void nvmet_bdev_execute_rw(struct nvmet_req *req)
op = REQ_OP_READ;
}
+ if (is_pci_p2pdma_page(sg_page(req->sg)))
+ op_flags |= REQ_NOMERGE;
+
sector = le64_to_cpu(req->cmd->rw.slba);
sector <<= (req->ns->blksize_shift - 9);
- bio_init(bio, req->inline_bvec, ARRAY_SIZE(req->inline_bvec));
+ if (req->data_len <= NVMET_MAX_INLINE_DATA_LEN) {
+ bio = &req->b.inline_bio;
+ bio_init(bio, req->inline_bvec, ARRAY_SIZE(req->inline_bvec));
+ } else {
+ bio = bio_alloc(GFP_KERNEL, min(sg_cnt, BIO_MAX_PAGES));
+ }
bio_set_dev(bio, req->ns->bdev);
bio->bi_iter.bi_sector = sector;
bio->bi_private = req;
diff --git a/drivers/nvme/target/io-cmd-file.c b/drivers/nvme/target/io-cmd-file.c
index 81a9dc5290a8..39d972e2595f 100644
--- a/drivers/nvme/target/io-cmd-file.c
+++ b/drivers/nvme/target/io-cmd-file.c
@@ -246,7 +246,8 @@ static void nvmet_file_execute_discard(struct nvmet_req *req)
break;
offset = le64_to_cpu(range.slba) << req->ns->blksize_shift;
- len = le32_to_cpu(range.nlb) << req->ns->blksize_shift;
+ len = le32_to_cpu(range.nlb);
+ len <<= req->ns->blksize_shift;
if (offset + len > req->ns->size) {
ret = NVME_SC_LBA_RANGE | NVME_SC_DNR;
break;
diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h
index ec9af4ee03b6..c2b4d9ee6391 100644
--- a/drivers/nvme/target/nvmet.h
+++ b/drivers/nvme/target/nvmet.h
@@ -26,6 +26,7 @@
#include <linux/configfs.h>
#include <linux/rcupdate.h>
#include <linux/blkdev.h>
+#include <linux/radix-tree.h>
#define NVMET_ASYNC_EVENTS 4
#define NVMET_ERROR_LOG_SLOTS 128
@@ -77,6 +78,9 @@ struct nvmet_ns {
struct completion disable_done;
mempool_t *bvec_pool;
struct kmem_cache *bvec_cache;
+
+ int use_p2pmem;
+ struct pci_dev *p2p_dev;
};
static inline struct nvmet_ns *to_nvmet_ns(struct config_item *item)
@@ -84,6 +88,11 @@ static inline struct nvmet_ns *to_nvmet_ns(struct config_item *item)
return container_of(to_config_group(item), struct nvmet_ns, group);
}
+static inline struct device *nvmet_ns_dev(struct nvmet_ns *ns)
+{
+ return ns->bdev ? disk_to_dev(ns->bdev->bd_disk) : NULL;
+}
+
struct nvmet_cq {
u16 qid;
u16 size;
@@ -184,6 +193,9 @@ struct nvmet_ctrl {
char subsysnqn[NVMF_NQN_FIELD_LEN];
char hostnqn[NVMF_NQN_FIELD_LEN];
+
+ struct device *p2p_client;
+ struct radix_tree_root p2p_ns_map;
};
struct nvmet_subsys {
@@ -264,6 +276,7 @@ struct nvmet_fabrics_ops {
};
#define NVMET_MAX_INLINE_BIOVEC 8
+#define NVMET_MAX_INLINE_DATA_LEN NVMET_MAX_INLINE_BIOVEC * PAGE_SIZE
struct nvmet_req {
struct nvme_command *cmd;
@@ -294,6 +307,9 @@ struct nvmet_req {
void (*execute)(struct nvmet_req *req);
const struct nvmet_fabrics_ops *ops;
+
+ struct pci_dev *p2p_dev;
+ struct device *p2p_client;
};
extern struct workqueue_struct *buffered_io_wq;
@@ -336,6 +352,8 @@ bool nvmet_req_init(struct nvmet_req *req, struct nvmet_cq *cq,
void nvmet_req_uninit(struct nvmet_req *req);
void nvmet_req_execute(struct nvmet_req *req);
void nvmet_req_complete(struct nvmet_req *req, u16 status);
+int nvmet_req_alloc_sgl(struct nvmet_req *req);
+void nvmet_req_free_sgl(struct nvmet_req *req);
void nvmet_cq_setup(struct nvmet_ctrl *ctrl, struct nvmet_cq *cq, u16 qid,
u16 size);
diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c
index 3533e918ea37..ddce100be57a 100644
--- a/drivers/nvme/target/rdma.c
+++ b/drivers/nvme/target/rdma.c
@@ -66,6 +66,7 @@ struct nvmet_rdma_rsp {
struct nvmet_req req;
+ bool allocated;
u8 n_rdma;
u32 flags;
u32 invalidate_rkey;
@@ -121,6 +122,7 @@ struct nvmet_rdma_device {
int inline_page_count;
};
+static struct workqueue_struct *nvmet_rdma_delete_wq;
static bool nvmet_rdma_use_srq;
module_param_named(use_srq, nvmet_rdma_use_srq, bool, 0444);
MODULE_PARM_DESC(use_srq, "Use shared receive queue.");
@@ -174,11 +176,19 @@ nvmet_rdma_get_rsp(struct nvmet_rdma_queue *queue)
unsigned long flags;
spin_lock_irqsave(&queue->rsps_lock, flags);
- rsp = list_first_entry(&queue->free_rsps,
+ rsp = list_first_entry_or_null(&queue->free_rsps,
struct nvmet_rdma_rsp, free_list);
- list_del(&rsp->free_list);
+ if (likely(rsp))
+ list_del(&rsp->free_list);
spin_unlock_irqrestore(&queue->rsps_lock, flags);
+ if (unlikely(!rsp)) {
+ rsp = kmalloc(sizeof(*rsp), GFP_KERNEL);
+ if (unlikely(!rsp))
+ return NULL;
+ rsp->allocated = true;
+ }
+
return rsp;
}
@@ -187,6 +197,11 @@ nvmet_rdma_put_rsp(struct nvmet_rdma_rsp *rsp)
{
unsigned long flags;
+ if (rsp->allocated) {
+ kfree(rsp);
+ return;
+ }
+
spin_lock_irqsave(&rsp->queue->rsps_lock, flags);
list_add_tail(&rsp->free_list, &rsp->queue->free_rsps);
spin_unlock_irqrestore(&rsp->queue->rsps_lock, flags);
@@ -489,7 +504,7 @@ static void nvmet_rdma_release_rsp(struct nvmet_rdma_rsp *rsp)
}
if (rsp->req.sg != rsp->cmd->inline_sg)
- sgl_free(rsp->req.sg);
+ nvmet_req_free_sgl(&rsp->req);
if (unlikely(!list_empty_careful(&queue->rsp_wr_wait_list)))
nvmet_rdma_process_wr_wait_list(queue);
@@ -638,24 +653,24 @@ static u16 nvmet_rdma_map_sgl_keyed(struct nvmet_rdma_rsp *rsp,
{
struct rdma_cm_id *cm_id = rsp->queue->cm_id;
u64 addr = le64_to_cpu(sgl->addr);
- u32 len = get_unaligned_le24(sgl->length);
u32 key = get_unaligned_le32(sgl->key);
int ret;
+ rsp->req.transfer_len = get_unaligned_le24(sgl->length);
+
/* no data command? */
- if (!len)
+ if (!rsp->req.transfer_len)
return 0;
- rsp->req.sg = sgl_alloc(len, GFP_KERNEL, &rsp->req.sg_cnt);
- if (!rsp->req.sg)
- return NVME_SC_INTERNAL;
+ ret = nvmet_req_alloc_sgl(&rsp->req);
+ if (ret < 0)
+ goto error_out;
ret = rdma_rw_ctx_init(&rsp->rw, cm_id->qp, cm_id->port_num,
rsp->req.sg, rsp->req.sg_cnt, 0, addr, key,
nvmet_data_dir(&rsp->req));
if (ret < 0)
- return NVME_SC_INTERNAL;
- rsp->req.transfer_len += len;
+ goto error_out;
rsp->n_rdma += ret;
if (invalidate) {
@@ -664,6 +679,10 @@ static u16 nvmet_rdma_map_sgl_keyed(struct nvmet_rdma_rsp *rsp,
}
return 0;
+
+error_out:
+ rsp->req.transfer_len = 0;
+ return NVME_SC_INTERNAL;
}
static u16 nvmet_rdma_map_sgl(struct nvmet_rdma_rsp *rsp)
@@ -731,6 +750,8 @@ static void nvmet_rdma_handle_command(struct nvmet_rdma_queue *queue,
cmd->send_sge.addr, cmd->send_sge.length,
DMA_TO_DEVICE);
+ cmd->req.p2p_client = &queue->dev->device->dev;
+
if (!nvmet_req_init(&cmd->req, &queue->nvme_cq,
&queue->nvme_sq, &nvmet_rdma_ops))
return;
@@ -776,6 +797,15 @@ static void nvmet_rdma_recv_done(struct ib_cq *cq, struct ib_wc *wc)
cmd->queue = queue;
rsp = nvmet_rdma_get_rsp(queue);
+ if (unlikely(!rsp)) {
+ /*
+ * we get here only under memory pressure,
+ * silently drop and have the host retry
+ * as we can't even fail it.
+ */
+ nvmet_rdma_post_recv(queue->dev, cmd);
+ return;
+ }
rsp->queue = queue;
rsp->cmd = cmd;
rsp->flags = 0;
@@ -1244,12 +1274,12 @@ static int nvmet_rdma_queue_connect(struct rdma_cm_id *cm_id,
if (queue->host_qid == 0) {
/* Let inflight controller teardown complete */
- flush_scheduled_work();
+ flush_workqueue(nvmet_rdma_delete_wq);
}
ret = nvmet_rdma_cm_accept(cm_id, queue, &event->param.conn);
if (ret) {
- schedule_work(&queue->release_work);
+ queue_work(nvmet_rdma_delete_wq, &queue->release_work);
/* Destroying rdma_cm id is not needed here */
return 0;
}
@@ -1314,7 +1344,7 @@ static void __nvmet_rdma_queue_disconnect(struct nvmet_rdma_queue *queue)
if (disconnect) {
rdma_disconnect(queue->cm_id);
- schedule_work(&queue->release_work);
+ queue_work(nvmet_rdma_delete_wq, &queue->release_work);
}
}
@@ -1344,7 +1374,7 @@ static void nvmet_rdma_queue_connect_fail(struct rdma_cm_id *cm_id,
mutex_unlock(&nvmet_rdma_queue_mutex);
pr_err("failed to connect queue %d\n", queue->idx);
- schedule_work(&queue->release_work);
+ queue_work(nvmet_rdma_delete_wq, &queue->release_work);
}
/**
@@ -1626,8 +1656,17 @@ static int __init nvmet_rdma_init(void)
if (ret)
goto err_ib_client;
+ nvmet_rdma_delete_wq = alloc_workqueue("nvmet-rdma-delete-wq",
+ WQ_UNBOUND | WQ_MEM_RECLAIM | WQ_SYSFS, 0);
+ if (!nvmet_rdma_delete_wq) {
+ ret = -ENOMEM;
+ goto err_unreg_transport;
+ }
+
return 0;
+err_unreg_transport:
+ nvmet_unregister_transport(&nvmet_rdma_ops);
err_ib_client:
ib_unregister_client(&nvmet_rdma_ib_client);
return ret;
@@ -1635,6 +1674,7 @@ err_ib_client:
static void __exit nvmet_rdma_exit(void)
{
+ destroy_workqueue(nvmet_rdma_delete_wq);
nvmet_unregister_transport(&nvmet_rdma_ops);
ib_unregister_client(&nvmet_rdma_ib_client);
WARN_ON_ONCE(!list_empty(&nvmet_rdma_queue_list));