summaryrefslogtreecommitdiff
path: root/drivers/nvme/host
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/nvme/host')
-rw-r--r--drivers/nvme/host/core.c92
-rw-r--r--drivers/nvme/host/ioctl.c21
-rw-r--r--drivers/nvme/host/multipath.c3
-rw-r--r--drivers/nvme/host/nvme.h3
-rw-r--r--drivers/nvme/host/pci.c6
-rw-r--r--drivers/nvme/host/tcp.c24
6 files changed, 75 insertions, 74 deletions
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 93a8119ad5ca..88fec86b8baa 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -377,12 +377,12 @@ static void nvme_log_err_passthru(struct request *req)
nr->status & NVME_SC_MASK, /* Status Code */
nr->status & NVME_STATUS_MORE ? "MORE " : "",
nr->status & NVME_STATUS_DNR ? "DNR " : "",
- nr->cmd->common.cdw10,
- nr->cmd->common.cdw11,
- nr->cmd->common.cdw12,
- nr->cmd->common.cdw13,
- nr->cmd->common.cdw14,
- nr->cmd->common.cdw14);
+ le32_to_cpu(nr->cmd->common.cdw10),
+ le32_to_cpu(nr->cmd->common.cdw11),
+ le32_to_cpu(nr->cmd->common.cdw12),
+ le32_to_cpu(nr->cmd->common.cdw13),
+ le32_to_cpu(nr->cmd->common.cdw14),
+ le32_to_cpu(nr->cmd->common.cdw15));
}
enum nvme_disposition {
@@ -759,6 +759,10 @@ blk_status_t nvme_fail_nonready_command(struct nvme_ctrl *ctrl,
!test_bit(NVME_CTRL_FAILFAST_EXPIRED, &ctrl->flags) &&
!blk_noretry_request(rq) && !(rq->cmd_flags & REQ_NVME_MPATH))
return BLK_STS_RESOURCE;
+
+ if (!(rq->rq_flags & RQF_DONTPREP))
+ nvme_clear_nvme_request(rq);
+
return nvme_host_path_error(rq);
}
EXPORT_SYMBOL_GPL(nvme_fail_nonready_command);
@@ -1996,21 +2000,41 @@ static void nvme_configure_metadata(struct nvme_ctrl *ctrl,
}
-static void nvme_update_atomic_write_disk_info(struct nvme_ns *ns,
- struct nvme_id_ns *id, struct queue_limits *lim,
- u32 bs, u32 atomic_bs)
+static u32 nvme_configure_atomic_write(struct nvme_ns *ns,
+ struct nvme_id_ns *id, struct queue_limits *lim, u32 bs)
{
- unsigned int boundary = 0;
+ u32 atomic_bs, boundary = 0;
- if (id->nsfeat & NVME_NS_FEAT_ATOMICS && id->nawupf) {
- if (le16_to_cpu(id->nabspf))
+ /*
+ * We do not support an offset for the atomic boundaries.
+ */
+ if (id->nabo)
+ return bs;
+
+ if ((id->nsfeat & NVME_NS_FEAT_ATOMICS) && id->nawupf) {
+ /*
+ * Use the per-namespace atomic write unit when available.
+ */
+ atomic_bs = (1 + le16_to_cpu(id->nawupf)) * bs;
+ if (id->nabspf)
boundary = (le16_to_cpu(id->nabspf) + 1) * bs;
+ } else {
+ /*
+ * Use the controller wide atomic write unit. This sucks
+ * because the limit is defined in terms of logical blocks while
+ * namespaces can have different formats, and because there is
+ * no clear language in the specification prohibiting different
+ * values for different controllers in the subsystem.
+ */
+ atomic_bs = (1 + ns->ctrl->subsys->awupf) * bs;
}
+
lim->atomic_write_hw_max = atomic_bs;
lim->atomic_write_hw_boundary = boundary;
lim->atomic_write_hw_unit_min = bs;
lim->atomic_write_hw_unit_max = rounddown_pow_of_two(atomic_bs);
lim->features |= BLK_FEAT_ATOMIC_WRITES;
+ return atomic_bs;
}
static u32 nvme_max_drv_segments(struct nvme_ctrl *ctrl)
@@ -2048,34 +2072,8 @@ static bool nvme_update_disk_info(struct nvme_ns *ns, struct nvme_id_ns *id,
valid = false;
}
- atomic_bs = phys_bs = bs;
- if (id->nabo == 0) {
- /*
- * Bit 1 indicates whether NAWUPF is defined for this namespace
- * and whether it should be used instead of AWUPF. If NAWUPF ==
- * 0 then AWUPF must be used instead.
- */
- if (id->nsfeat & NVME_NS_FEAT_ATOMICS && id->nawupf)
- atomic_bs = (1 + le16_to_cpu(id->nawupf)) * bs;
- else
- atomic_bs = (1 + ns->ctrl->awupf) * bs;
-
- /*
- * Set subsystem atomic bs.
- */
- if (ns->ctrl->subsys->atomic_bs) {
- if (atomic_bs != ns->ctrl->subsys->atomic_bs) {
- dev_err_ratelimited(ns->ctrl->device,
- "%s: Inconsistent Atomic Write Size, Namespace will not be added: Subsystem=%d bytes, Controller/Namespace=%d bytes\n",
- ns->disk ? ns->disk->disk_name : "?",
- ns->ctrl->subsys->atomic_bs,
- atomic_bs);
- }
- } else
- ns->ctrl->subsys->atomic_bs = atomic_bs;
-
- nvme_update_atomic_write_disk_info(ns, id, lim, bs, atomic_bs);
- }
+ phys_bs = bs;
+ atomic_bs = nvme_configure_atomic_write(ns, id, lim, bs);
if (id->nsfeat & NVME_NS_FEAT_IO_OPT) {
/* NPWG = Namespace Preferred Write Granularity */
@@ -2215,16 +2213,6 @@ static int nvme_update_ns_info_block(struct nvme_ns *ns,
if (!nvme_update_disk_info(ns, id, &lim))
capacity = 0;
- /*
- * Validate the max atomic write size fits within the subsystem's
- * atomic write capabilities.
- */
- if (lim.atomic_write_hw_max > ns->ctrl->subsys->atomic_bs) {
- blk_mq_unfreeze_queue(ns->disk->queue, memflags);
- ret = -ENXIO;
- goto out;
- }
-
nvme_config_discard(ns, &lim);
if (IS_ENABLED(CONFIG_BLK_DEV_ZONED) &&
ns->head->ids.csi == NVME_CSI_ZNS)
@@ -3040,6 +3028,7 @@ static int nvme_init_subsystem(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id)
memcpy(subsys->model, id->mn, sizeof(subsys->model));
subsys->vendor_id = le16_to_cpu(id->vid);
subsys->cmic = id->cmic;
+ subsys->awupf = le16_to_cpu(id->awupf);
/* Versions prior to 1.4 don't necessarily report a valid type */
if (id->cntrltype == NVME_CTRL_DISC ||
@@ -3464,7 +3453,6 @@ static int nvme_init_identify(struct nvme_ctrl *ctrl)
dev_pm_qos_expose_latency_tolerance(ctrl->device);
else if (!ctrl->apst_enabled && prev_apst_enabled)
dev_pm_qos_hide_latency_tolerance(ctrl->device);
- ctrl->awupf = le16_to_cpu(id->awupf);
out_free:
kfree(id);
return ret;
@@ -3896,7 +3884,7 @@ static void nvme_ns_add_to_ctrl_list(struct nvme_ns *ns)
return;
}
}
- list_add(&ns->list, &ns->ctrl->namespaces);
+ list_add_rcu(&ns->list, &ns->ctrl->namespaces);
}
static void nvme_alloc_ns(struct nvme_ctrl *ctrl, struct nvme_ns_info *info)
diff --git a/drivers/nvme/host/ioctl.c b/drivers/nvme/host/ioctl.c
index f29107d95ff2..13aab3ca34f6 100644
--- a/drivers/nvme/host/ioctl.c
+++ b/drivers/nvme/host/ioctl.c
@@ -429,21 +429,14 @@ static enum rq_end_io_ret nvme_uring_cmd_end_io(struct request *req,
pdu->result = le64_to_cpu(nvme_req(req)->result.u64);
/*
- * For iopoll, complete it directly. Note that using the uring_cmd
- * helper for this is safe only because we check blk_rq_is_poll().
- * As that returns false if we're NOT on a polled queue, then it's
- * safe to use the polled completion helper.
- *
- * Otherwise, move the completion to task work.
+ * IOPOLL could potentially complete this request directly, but
+ * if multiple rings are polling on the same queue, then it's possible
+ * for one ring to find completions for another ring. Punting the
+ * completion via task_work will always direct it to the right
+ * location, rather than potentially complete requests for ringA
+ * under iopoll invocations from ringB.
*/
- if (blk_rq_is_poll(req)) {
- if (pdu->bio)
- blk_rq_unmap_user(pdu->bio);
- io_uring_cmd_iopoll_done(ioucmd, pdu->result, pdu->status);
- } else {
- io_uring_cmd_do_in_task_lazy(ioucmd, nvme_uring_task_cb);
- }
-
+ io_uring_cmd_do_in_task_lazy(ioucmd, nvme_uring_task_cb);
return RQ_END_IO_FREE;
}
diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c
index cf0ef4745564..700dfbd5a451 100644
--- a/drivers/nvme/host/multipath.c
+++ b/drivers/nvme/host/multipath.c
@@ -1050,7 +1050,8 @@ void nvme_mpath_add_sysfs_link(struct nvme_ns_head *head)
*/
srcu_idx = srcu_read_lock(&head->srcu);
- list_for_each_entry_rcu(ns, &head->list, siblings) {
+ list_for_each_entry_srcu(ns, &head->list, siblings,
+ srcu_read_lock_held(&head->srcu)) {
/*
* Ensure that ns path disk node is already added otherwise we
* may get invalid kobj name for target
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index 8fc4683418a3..d8c4e545f732 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -410,7 +410,6 @@ struct nvme_ctrl {
enum nvme_ctrl_type cntrltype;
enum nvme_dctype dctype;
- u16 awupf; /* 0's based value. */
};
static inline enum nvme_ctrl_state nvme_ctrl_state(struct nvme_ctrl *ctrl)
@@ -443,11 +442,11 @@ struct nvme_subsystem {
u8 cmic;
enum nvme_subsys_type subtype;
u16 vendor_id;
+ u16 awupf; /* 0's based value. */
struct ida ns_ida;
#ifdef CONFIG_NVME_MULTIPATH
enum nvme_iopolicy iopolicy;
#endif
- u32 atomic_bs;
};
/*
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index f1dd804151b1..776c867fb64d 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -2031,8 +2031,6 @@ static void nvme_map_cmb(struct nvme_dev *dev)
if ((dev->cmbsz & (NVME_CMBSZ_WDS | NVME_CMBSZ_RDS)) ==
(NVME_CMBSZ_WDS | NVME_CMBSZ_RDS))
pci_p2pmem_publish(pdev, true);
-
- nvme_update_attrs(dev);
}
static int nvme_set_host_mem(struct nvme_dev *dev, u32 bits)
@@ -2969,6 +2967,8 @@ static void nvme_reset_work(struct work_struct *work)
if (result < 0)
goto out;
+ nvme_update_attrs(dev);
+
result = nvme_setup_io_queues(dev);
if (result)
goto out;
@@ -3305,6 +3305,8 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
if (result < 0)
goto out_disable;
+ nvme_update_attrs(dev);
+
result = nvme_setup_io_queues(dev);
if (result)
goto out_disable;
diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c
index aba365f97cf6..b882ee6ef40f 100644
--- a/drivers/nvme/host/tcp.c
+++ b/drivers/nvme/host/tcp.c
@@ -452,7 +452,8 @@ nvme_tcp_fetch_request(struct nvme_tcp_queue *queue)
return NULL;
}
- list_del(&req->entry);
+ list_del_init(&req->entry);
+ init_llist_node(&req->lentry);
return req;
}
@@ -560,6 +561,8 @@ static int nvme_tcp_init_request(struct blk_mq_tag_set *set,
req->queue = queue;
nvme_req(rq)->ctrl = &ctrl->ctrl;
nvme_req(rq)->cmd = &pdu->cmd;
+ init_llist_node(&req->lentry);
+ INIT_LIST_HEAD(&req->entry);
return 0;
}
@@ -764,6 +767,14 @@ static int nvme_tcp_handle_r2t(struct nvme_tcp_queue *queue,
return -EPROTO;
}
+ if (llist_on_list(&req->lentry) ||
+ !list_empty(&req->entry)) {
+ dev_err(queue->ctrl->ctrl.device,
+ "req %d unexpected r2t while processing request\n",
+ rq->tag);
+ return -EPROTO;
+ }
+
req->pdu_len = 0;
req->h2cdata_left = r2t_length;
req->h2cdata_offset = r2t_offset;
@@ -1348,7 +1359,7 @@ static int nvme_tcp_try_recv(struct nvme_tcp_queue *queue)
queue->nr_cqe = 0;
consumed = sock->ops->read_sock(sk, &rd_desc, nvme_tcp_recv_skb);
release_sock(sk);
- return consumed;
+ return consumed == -EAGAIN ? 0 : consumed;
}
static void nvme_tcp_io_work(struct work_struct *w)
@@ -1376,6 +1387,11 @@ static void nvme_tcp_io_work(struct work_struct *w)
else if (unlikely(result < 0))
return;
+ /* did we get some space after spending time in recv? */
+ if (nvme_tcp_queue_has_pending(queue) &&
+ sk_stream_is_writeable(queue->sock->sk))
+ pending = true;
+
if (!pending || !queue->rd_enabled)
return;
@@ -2392,7 +2408,7 @@ static int nvme_tcp_setup_ctrl(struct nvme_ctrl *ctrl, bool new)
nvme_tcp_teardown_admin_queue(ctrl, false);
ret = nvme_tcp_configure_admin_queue(ctrl, false);
if (ret)
- return ret;
+ goto destroy_admin;
}
if (ctrl->icdoff) {
@@ -2636,6 +2652,8 @@ static void nvme_tcp_submit_async_event(struct nvme_ctrl *arg)
ctrl->async_req.offset = 0;
ctrl->async_req.curr_bio = NULL;
ctrl->async_req.data_len = 0;
+ init_llist_node(&ctrl->async_req.lentry);
+ INIT_LIST_HEAD(&ctrl->async_req.entry);
nvme_tcp_queue_request(&ctrl->async_req, true, true);
}