summaryrefslogtreecommitdiff
path: root/drivers/nvme
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/nvme')
-rw-r--r--drivers/nvme/host/core.c63
-rw-r--r--drivers/nvme/host/fabrics.c25
-rw-r--r--drivers/nvme/host/fault_inject.c2
-rw-r--r--drivers/nvme/host/multipath.c44
-rw-r--r--drivers/nvme/host/nvme.h10
-rw-r--r--drivers/nvme/host/pci.c147
-rw-r--r--drivers/nvme/host/tcp.c61
-rw-r--r--drivers/nvme/host/trace.h6
-rw-r--r--drivers/nvme/host/zns.c5
-rw-r--r--drivers/nvme/target/io-cmd-file.c4
-rw-r--r--drivers/nvme/target/passthru.c3
-rw-r--r--drivers/nvme/target/tcp.c53
12 files changed, 306 insertions, 117 deletions
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 4b5de8f5435a..5e0bfda04bd7 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -666,6 +666,7 @@ blk_status_t nvme_fail_nonready_command(struct nvme_ctrl *ctrl,
struct request *rq)
{
if (ctrl->state != NVME_CTRL_DELETING_NOIO &&
+ ctrl->state != NVME_CTRL_DELETING &&
ctrl->state != NVME_CTRL_DEAD &&
!test_bit(NVME_CTRL_FAILFAST_EXPIRED, &ctrl->flags) &&
!blk_noretry_request(rq) && !(rq->cmd_flags & REQ_NVME_MPATH))
@@ -895,10 +896,19 @@ static inline blk_status_t nvme_setup_write_zeroes(struct nvme_ns *ns,
cpu_to_le64(nvme_sect_to_lba(ns, blk_rq_pos(req)));
cmnd->write_zeroes.length =
cpu_to_le16((blk_rq_bytes(req) >> ns->lba_shift) - 1);
- if (nvme_ns_has_pi(ns))
+
+ if (nvme_ns_has_pi(ns)) {
cmnd->write_zeroes.control = cpu_to_le16(NVME_RW_PRINFO_PRACT);
- else
- cmnd->write_zeroes.control = 0;
+
+ switch (ns->pi_type) {
+ case NVME_NS_DPS_PI_TYPE1:
+ case NVME_NS_DPS_PI_TYPE2:
+ cmnd->write_zeroes.reftag =
+ cpu_to_le32(t10_pi_ref_tag(req));
+ break;
+ }
+ }
+
return BLK_STS_OK;
}
@@ -981,7 +991,6 @@ EXPORT_SYMBOL_GPL(nvme_cleanup_cmd);
blk_status_t nvme_setup_cmd(struct nvme_ns *ns, struct request *req)
{
struct nvme_command *cmd = nvme_req(req)->cmd;
- struct nvme_ctrl *ctrl = nvme_req(req)->ctrl;
blk_status_t ret = BLK_STS_OK;
if (!(req->rq_flags & RQF_DONTPREP))
@@ -1028,8 +1037,6 @@ blk_status_t nvme_setup_cmd(struct nvme_ns *ns, struct request *req)
return BLK_STS_IOERR;
}
- if (!(ctrl->quirks & NVME_QUIRK_SKIP_CID_GEN))
- nvme_req(req)->genctr++;
cmd->common.command_id = nvme_cid(req);
trace_nvme_setup_cmd(req, cmd);
return ret;
@@ -1047,7 +1054,7 @@ static int nvme_execute_rq(struct gendisk *disk, struct request *rq,
{
blk_status_t status;
- status = blk_execute_rq(disk, rq, at_head);
+ status = blk_execute_rq(rq, at_head);
if (nvme_req(rq)->flags & NVME_REQ_CANCELLED)
return -EINTR;
if (nvme_req(rq)->status)
@@ -1274,7 +1281,7 @@ static void nvme_keep_alive_work(struct work_struct *work)
rq->timeout = ctrl->kato * HZ;
rq->end_io_data = ctrl;
- blk_execute_rq_nowait(NULL, rq, 0, nvme_keep_alive_end_io);
+ blk_execute_rq_nowait(rq, false, nvme_keep_alive_end_io);
}
static void nvme_start_keep_alive(struct nvme_ctrl *ctrl)
@@ -1740,9 +1747,20 @@ static int nvme_configure_metadata(struct nvme_ns *ns, struct nvme_id_ns *id)
*/
if (WARN_ON_ONCE(!(id->flbas & NVME_NS_FLBAS_META_EXT)))
return -EINVAL;
- if (ctrl->max_integrity_segments)
- ns->features |=
- (NVME_NS_METADATA_SUPPORTED | NVME_NS_EXT_LBAS);
+
+ ns->features |= NVME_NS_EXT_LBAS;
+
+ /*
+ * The current fabrics transport drivers support namespace
+ * metadata formats only if nvme_ns_has_pi() returns true.
+ * Suppress support for all other formats so the namespace will
+ * have a 0 capacity and not be usable through the block stack.
+ *
+ * Note, this check will need to be modified if any drivers
+ * gain the ability to use other metadata formats.
+ */
+ if (ctrl->max_integrity_segments && nvme_ns_has_pi(ns))
+ ns->features |= NVME_NS_METADATA_SUPPORTED;
} else {
/*
* For PCIe controllers, we can't easily remap the separate
@@ -2469,6 +2487,20 @@ static const struct nvme_core_quirk_entry core_quirks[] = {
.vid = 0x14a4,
.fr = "22301111",
.quirks = NVME_QUIRK_SIMPLE_SUSPEND,
+ },
+ {
+ /*
+ * This Kioxia CD6-V Series / HPE PE8030 device times out and
+ * aborts I/O during any load, but more easily reproducible
+ * with discards (fstrim).
+ *
+ * The device is left in a state where it is also not possible
+ * to use "nvme set-feature" to disable APST, but booting with
+ * nvme_core.default_ps_max_latency=0 works.
+ */
+ .vid = 0x1e0f,
+ .mn = "KCD6XVUL6T40",
+ .quirks = NVME_QUIRK_NO_APST,
}
};
@@ -2673,8 +2705,9 @@ static bool nvme_validate_cntlid(struct nvme_subsystem *subsys,
if (tmp->cntlid == ctrl->cntlid) {
dev_err(ctrl->device,
- "Duplicate cntlid %u with %s, rejecting\n",
- ctrl->cntlid, dev_name(tmp->device));
+ "Duplicate cntlid %u with %s, subsys %s, rejecting\n",
+ ctrl->cntlid, dev_name(tmp->device),
+ subsys->subnqn);
return false;
}
@@ -2726,9 +2759,7 @@ static int nvme_init_subsystem(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id)
return -EINVAL;
}
subsys->awupf = le16_to_cpu(id->awupf);
-#ifdef CONFIG_NVME_MULTIPATH
- subsys->iopolicy = NVME_IOPOLICY_NUMA;
-#endif
+ nvme_mpath_default_iopolicy(subsys);
subsys->dev.class = nvme_subsys_class;
subsys->dev.release = nvme_release_subsystem;
diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c
index c5a2b71c5268..7ae041e2b3fb 100644
--- a/drivers/nvme/host/fabrics.c
+++ b/drivers/nvme/host/fabrics.c
@@ -698,6 +698,9 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts,
if (token >= 0)
pr_warn("I/O fail on reconnect controller after %d sec\n",
token);
+ else
+ token = -1;
+
opts->fast_io_fail_tmo = token;
break;
case NVMF_OPT_HOSTNQN:
@@ -1066,6 +1069,26 @@ out_unlock:
return ret ? ret : count;
}
+static void __nvmf_concat_opt_tokens(struct seq_file *seq_file)
+{
+ const struct match_token *tok;
+ int idx;
+
+ /*
+ * Add dummy entries for instance and cntlid to
+ * signal an invalid/non-existing controller
+ */
+ seq_puts(seq_file, "instance=-1,cntlid=-1");
+ for (idx = 0; idx < ARRAY_SIZE(opt_tokens); idx++) {
+ tok = &opt_tokens[idx];
+ if (tok->token == NVMF_OPT_ERR)
+ continue;
+ seq_puts(seq_file, ",");
+ seq_puts(seq_file, tok->pattern);
+ }
+ seq_puts(seq_file, "\n");
+}
+
static int nvmf_dev_show(struct seq_file *seq_file, void *private)
{
struct nvme_ctrl *ctrl;
@@ -1074,7 +1097,7 @@ static int nvmf_dev_show(struct seq_file *seq_file, void *private)
mutex_lock(&nvmf_dev_mutex);
ctrl = seq_file->private;
if (!ctrl) {
- ret = -EINVAL;
+ __nvmf_concat_opt_tokens(seq_file);
goto out_unlock;
}
diff --git a/drivers/nvme/host/fault_inject.c b/drivers/nvme/host/fault_inject.c
index 1352159733b0..83d2e6860d38 100644
--- a/drivers/nvme/host/fault_inject.c
+++ b/drivers/nvme/host/fault_inject.c
@@ -56,7 +56,7 @@ void nvme_fault_inject_fini(struct nvme_fault_inject *fault_inject)
void nvme_should_fail(struct request *req)
{
- struct gendisk *disk = req->rq_disk;
+ struct gendisk *disk = req->q->disk;
struct nvme_fault_inject *fault_inject = NULL;
u16 status;
diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c
index 7f2071f2460c..f8bf6606eb2f 100644
--- a/drivers/nvme/host/multipath.c
+++ b/drivers/nvme/host/multipath.c
@@ -13,6 +13,42 @@ module_param(multipath, bool, 0444);
MODULE_PARM_DESC(multipath,
"turn on native support for multiple controllers per subsystem");
+static const char *nvme_iopolicy_names[] = {
+ [NVME_IOPOLICY_NUMA] = "numa",
+ [NVME_IOPOLICY_RR] = "round-robin",
+};
+
+static int iopolicy = NVME_IOPOLICY_NUMA;
+
+static int nvme_set_iopolicy(const char *val, const struct kernel_param *kp)
+{
+ if (!val)
+ return -EINVAL;
+ if (!strncmp(val, "numa", 4))
+ iopolicy = NVME_IOPOLICY_NUMA;
+ else if (!strncmp(val, "round-robin", 11))
+ iopolicy = NVME_IOPOLICY_RR;
+ else
+ return -EINVAL;
+
+ return 0;
+}
+
+static int nvme_get_iopolicy(char *buf, const struct kernel_param *kp)
+{
+ return sprintf(buf, "%s\n", nvme_iopolicy_names[iopolicy]);
+}
+
+module_param_call(iopolicy, nvme_set_iopolicy, nvme_get_iopolicy,
+ &iopolicy, 0644);
+MODULE_PARM_DESC(iopolicy,
+ "Default multipath I/O policy; 'numa' (default) or 'round-robin'");
+
+void nvme_mpath_default_iopolicy(struct nvme_subsystem *subsys)
+{
+ subsys->iopolicy = iopolicy;
+}
+
void nvme_mpath_unfreeze(struct nvme_subsystem *subsys)
{
struct nvme_ns_head *h;
@@ -706,11 +742,6 @@ void nvme_mpath_stop(struct nvme_ctrl *ctrl)
struct device_attribute subsys_attr_##_name = \
__ATTR(_name, _mode, _show, _store)
-static const char *nvme_iopolicy_names[] = {
- [NVME_IOPOLICY_NUMA] = "numa",
- [NVME_IOPOLICY_RR] = "round-robin",
-};
-
static ssize_t nvme_subsys_iopolicy_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
@@ -866,7 +897,7 @@ int nvme_mpath_init_identify(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id)
}
if (ana_log_size > ctrl->ana_log_size) {
nvme_mpath_stop(ctrl);
- kfree(ctrl->ana_log_buf);
+ nvme_mpath_uninit(ctrl);
ctrl->ana_log_buf = kmalloc(ana_log_size, GFP_KERNEL);
if (!ctrl->ana_log_buf)
return -ENOMEM;
@@ -886,4 +917,5 @@ void nvme_mpath_uninit(struct nvme_ctrl *ctrl)
{
kfree(ctrl->ana_log_buf);
ctrl->ana_log_buf = NULL;
+ ctrl->ana_log_size = 0;
}
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index b334af8aa264..a162f6c6da6e 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -614,6 +614,10 @@ static inline bool nvme_try_complete_req(struct request *req, __le16 status,
union nvme_result result)
{
struct nvme_request *rq = nvme_req(req);
+ struct nvme_ctrl *ctrl = rq->ctrl;
+
+ if (!(ctrl->quirks & NVME_QUIRK_SKIP_CID_GEN))
+ rq->genctr++;
rq->status = le16_to_cpu(status) >> 1;
rq->result = result;
@@ -709,7 +713,7 @@ static inline bool nvme_check_ready(struct nvme_ctrl *ctrl, struct request *rq,
return true;
if (ctrl->ops->flags & NVME_F_FABRICS &&
ctrl->state == NVME_CTRL_DELETING)
- return true;
+ return queue_live;
return __nvme_check_ready(ctrl, rq, queue_live);
}
int nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd,
@@ -763,6 +767,7 @@ static inline bool nvme_ctrl_use_ana(struct nvme_ctrl *ctrl)
void nvme_mpath_unfreeze(struct nvme_subsystem *subsys);
void nvme_mpath_wait_freeze(struct nvme_subsystem *subsys);
void nvme_mpath_start_freeze(struct nvme_subsystem *subsys);
+void nvme_mpath_default_iopolicy(struct nvme_subsystem *subsys);
bool nvme_mpath_set_disk_name(struct nvme_ns *ns, char *disk_name, int *flags);
void nvme_failover_req(struct request *req);
void nvme_kick_requeue_lists(struct nvme_ctrl *ctrl);
@@ -860,6 +865,9 @@ static inline void nvme_mpath_wait_freeze(struct nvme_subsystem *subsys)
static inline void nvme_mpath_start_freeze(struct nvme_subsystem *subsys)
{
}
+static inline void nvme_mpath_default_iopolicy(struct nvme_subsystem *subsys)
+{
+}
#endif /* CONFIG_NVME_MULTIPATH */
int nvme_revalidate_zones(struct nvme_ns *ns);
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index ca2ee806d74b..d8585df2c2fd 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -500,22 +500,13 @@ static inline void nvme_write_sq_db(struct nvme_queue *nvmeq, bool write_sq)
nvmeq->last_sq_tail = nvmeq->sq_tail;
}
-/**
- * nvme_submit_cmd() - Copy a command into a queue and ring the doorbell
- * @nvmeq: The queue to use
- * @cmd: The command to send
- * @write_sq: whether to write to the SQ doorbell
- */
-static void nvme_submit_cmd(struct nvme_queue *nvmeq, struct nvme_command *cmd,
- bool write_sq)
+static inline void nvme_sq_copy_cmd(struct nvme_queue *nvmeq,
+ struct nvme_command *cmd)
{
- spin_lock(&nvmeq->sq_lock);
memcpy(nvmeq->sq_cmds + (nvmeq->sq_tail << nvmeq->sqes),
- cmd, sizeof(*cmd));
+ absolute_pointer(cmd), sizeof(*cmd));
if (++nvmeq->sq_tail == nvmeq->q_depth)
nvmeq->sq_tail = 0;
- nvme_write_sq_db(nvmeq, write_sq);
- spin_unlock(&nvmeq->sq_lock);
}
static void nvme_commit_rqs(struct blk_mq_hw_ctx *hctx)
@@ -912,52 +903,32 @@ static blk_status_t nvme_map_metadata(struct nvme_dev *dev, struct request *req,
return BLK_STS_OK;
}
-/*
- * NOTE: ns is NULL when called on the admin queue.
- */
-static blk_status_t nvme_queue_rq(struct blk_mq_hw_ctx *hctx,
- const struct blk_mq_queue_data *bd)
+static blk_status_t nvme_prep_rq(struct nvme_dev *dev, struct request *req)
{
- struct nvme_ns *ns = hctx->queue->queuedata;
- struct nvme_queue *nvmeq = hctx->driver_data;
- struct nvme_dev *dev = nvmeq->dev;
- struct request *req = bd->rq;
struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
- struct nvme_command *cmnd = &iod->cmd;
blk_status_t ret;
iod->aborted = 0;
iod->npages = -1;
iod->nents = 0;
- /*
- * We should not need to do this, but we're still using this to
- * ensure we can drain requests on a dying queue.
- */
- if (unlikely(!test_bit(NVMEQ_ENABLED, &nvmeq->flags)))
- return BLK_STS_IOERR;
-
- if (!nvme_check_ready(&dev->ctrl, req, true))
- return nvme_fail_nonready_command(&dev->ctrl, req);
-
- ret = nvme_setup_cmd(ns, req);
+ ret = nvme_setup_cmd(req->q->queuedata, req);
if (ret)
return ret;
if (blk_rq_nr_phys_segments(req)) {
- ret = nvme_map_data(dev, req, cmnd);
+ ret = nvme_map_data(dev, req, &iod->cmd);
if (ret)
goto out_free_cmd;
}
if (blk_integrity_rq(req)) {
- ret = nvme_map_metadata(dev, req, cmnd);
+ ret = nvme_map_metadata(dev, req, &iod->cmd);
if (ret)
goto out_unmap_data;
}
blk_mq_start_request(req);
- nvme_submit_cmd(nvmeq, cmnd, bd->last);
return BLK_STS_OK;
out_unmap_data:
nvme_unmap_data(dev, req);
@@ -966,6 +937,96 @@ out_free_cmd:
return ret;
}
+/*
+ * NOTE: ns is NULL when called on the admin queue.
+ */
+static blk_status_t nvme_queue_rq(struct blk_mq_hw_ctx *hctx,
+ const struct blk_mq_queue_data *bd)
+{
+ struct nvme_queue *nvmeq = hctx->driver_data;
+ struct nvme_dev *dev = nvmeq->dev;
+ struct request *req = bd->rq;
+ struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
+ blk_status_t ret;
+
+ /*
+ * We should not need to do this, but we're still using this to
+ * ensure we can drain requests on a dying queue.
+ */
+ if (unlikely(!test_bit(NVMEQ_ENABLED, &nvmeq->flags)))
+ return BLK_STS_IOERR;
+
+ if (unlikely(!nvme_check_ready(&dev->ctrl, req, true)))
+ return nvme_fail_nonready_command(&dev->ctrl, req);
+
+ ret = nvme_prep_rq(dev, req);
+ if (unlikely(ret))
+ return ret;
+ spin_lock(&nvmeq->sq_lock);
+ nvme_sq_copy_cmd(nvmeq, &iod->cmd);
+ nvme_write_sq_db(nvmeq, bd->last);
+ spin_unlock(&nvmeq->sq_lock);
+ return BLK_STS_OK;
+}
+
+static void nvme_submit_cmds(struct nvme_queue *nvmeq, struct request **rqlist)
+{
+ spin_lock(&nvmeq->sq_lock);
+ while (!rq_list_empty(*rqlist)) {
+ struct request *req = rq_list_pop(rqlist);
+ struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
+
+ nvme_sq_copy_cmd(nvmeq, &iod->cmd);
+ }
+ nvme_write_sq_db(nvmeq, true);
+ spin_unlock(&nvmeq->sq_lock);
+}
+
+static bool nvme_prep_rq_batch(struct nvme_queue *nvmeq, struct request *req)
+{
+ /*
+ * We should not need to do this, but we're still using this to
+ * ensure we can drain requests on a dying queue.
+ */
+ if (unlikely(!test_bit(NVMEQ_ENABLED, &nvmeq->flags)))
+ return false;
+ if (unlikely(!nvme_check_ready(&nvmeq->dev->ctrl, req, true)))
+ return false;
+
+ req->mq_hctx->tags->rqs[req->tag] = req;
+ return nvme_prep_rq(nvmeq->dev, req) == BLK_STS_OK;
+}
+
+static void nvme_queue_rqs(struct request **rqlist)
+{
+ struct request *req, *next, *prev = NULL;
+ struct request *requeue_list = NULL;
+
+ rq_list_for_each_safe(rqlist, req, next) {
+ struct nvme_queue *nvmeq = req->mq_hctx->driver_data;
+
+ if (!nvme_prep_rq_batch(nvmeq, req)) {
+ /* detach 'req' and add to remainder list */
+ rq_list_move(rqlist, &requeue_list, req, prev);
+
+ req = prev;
+ if (!req)
+ continue;
+ }
+
+ if (!next || req->mq_hctx != next->mq_hctx) {
+ /* detach rest of list, and submit */
+ req->rq_next = NULL;
+ nvme_submit_cmds(nvmeq, rqlist);
+ *rqlist = next;
+ prev = NULL;
+ } else
+ prev = req;
+ }
+
+ *rqlist = requeue_list;
+}
+
static __always_inline void nvme_pci_unmap_rq(struct request *req)
{
struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
@@ -1140,7 +1201,11 @@ static void nvme_pci_submit_async_event(struct nvme_ctrl *ctrl)
c.common.opcode = nvme_admin_async_event;
c.common.command_id = NVME_AQ_BLK_MQ_DEPTH;
- nvme_submit_cmd(nvmeq, &c, true);
+
+ spin_lock(&nvmeq->sq_lock);
+ nvme_sq_copy_cmd(nvmeq, &c);
+ nvme_write_sq_db(nvmeq, true);
+ spin_unlock(&nvmeq->sq_lock);
}
static int adapter_delete_queue(struct nvme_dev *dev, u8 opcode, u16 id)
@@ -1371,7 +1436,7 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved)
}
abort_req->end_io_data = NULL;
- blk_execute_rq_nowait(NULL, abort_req, 0, abort_endio);
+ blk_execute_rq_nowait(abort_req, false, abort_endio);
/*
* The aborted req will be completed on receiving the abort req.
@@ -1663,6 +1728,7 @@ static const struct blk_mq_ops nvme_mq_admin_ops = {
static const struct blk_mq_ops nvme_mq_ops = {
.queue_rq = nvme_queue_rq,
+ .queue_rqs = nvme_queue_rqs,
.complete = nvme_pci_complete_rq,
.commit_rqs = nvme_commit_rqs,
.init_hctx = nvme_init_hctx,
@@ -2416,9 +2482,8 @@ static int nvme_delete_queue(struct nvme_queue *nvmeq, u8 opcode)
req->end_io_data = nvmeq;
init_completion(&nvmeq->delete_done);
- blk_execute_rq_nowait(NULL, req, false,
- opcode == nvme_admin_delete_cq ?
- nvme_del_cq_end : nvme_del_queue_end);
+ blk_execute_rq_nowait(req, false, opcode == nvme_admin_delete_cq ?
+ nvme_del_cq_end : nvme_del_queue_end);
return 0;
}
diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c
index 33bc83d8d992..4ceb28675fdf 100644
--- a/drivers/nvme/host/tcp.c
+++ b/drivers/nvme/host/tcp.c
@@ -572,7 +572,7 @@ static int nvme_tcp_handle_comp(struct nvme_tcp_queue *queue,
return ret;
}
-static int nvme_tcp_setup_h2c_data_pdu(struct nvme_tcp_request *req,
+static void nvme_tcp_setup_h2c_data_pdu(struct nvme_tcp_request *req,
struct nvme_tcp_r2t_pdu *pdu)
{
struct nvme_tcp_data_pdu *data = req->pdu;
@@ -581,32 +581,11 @@ static int nvme_tcp_setup_h2c_data_pdu(struct nvme_tcp_request *req,
u8 hdgst = nvme_tcp_hdgst_len(queue);
u8 ddgst = nvme_tcp_ddgst_len(queue);
+ req->state = NVME_TCP_SEND_H2C_PDU;
+ req->offset = 0;
req->pdu_len = le32_to_cpu(pdu->r2t_length);
req->pdu_sent = 0;
- if (unlikely(!req->pdu_len)) {
- dev_err(queue->ctrl->ctrl.device,
- "req %d r2t len is %u, probably a bug...\n",
- rq->tag, req->pdu_len);
- return -EPROTO;
- }
-
- if (unlikely(req->data_sent + req->pdu_len > req->data_len)) {
- dev_err(queue->ctrl->ctrl.device,
- "req %d r2t len %u exceeded data len %u (%zu sent)\n",
- rq->tag, req->pdu_len, req->data_len,
- req->data_sent);
- return -EPROTO;
- }
-
- if (unlikely(le32_to_cpu(pdu->r2t_offset) < req->data_sent)) {
- dev_err(queue->ctrl->ctrl.device,
- "req %d unexpected r2t offset %u (expected %zu)\n",
- rq->tag, le32_to_cpu(pdu->r2t_offset),
- req->data_sent);
- return -EPROTO;
- }
-
memset(data, 0, sizeof(*data));
data->hdr.type = nvme_tcp_h2c_data;
data->hdr.flags = NVME_TCP_F_DATA_LAST;
@@ -622,7 +601,6 @@ static int nvme_tcp_setup_h2c_data_pdu(struct nvme_tcp_request *req,
data->command_id = nvme_cid(rq);
data->data_offset = pdu->r2t_offset;
data->data_length = cpu_to_le32(req->pdu_len);
- return 0;
}
static int nvme_tcp_handle_r2t(struct nvme_tcp_queue *queue,
@@ -630,7 +608,7 @@ static int nvme_tcp_handle_r2t(struct nvme_tcp_queue *queue,
{
struct nvme_tcp_request *req;
struct request *rq;
- int ret;
+ u32 r2t_length = le32_to_cpu(pdu->r2t_length);
rq = nvme_find_rq(nvme_tcp_tagset(queue), pdu->command_id);
if (!rq) {
@@ -641,13 +619,28 @@ static int nvme_tcp_handle_r2t(struct nvme_tcp_queue *queue,
}
req = blk_mq_rq_to_pdu(rq);
- ret = nvme_tcp_setup_h2c_data_pdu(req, pdu);
- if (unlikely(ret))
- return ret;
+ if (unlikely(!r2t_length)) {
+ dev_err(queue->ctrl->ctrl.device,
+ "req %d r2t len is %u, probably a bug...\n",
+ rq->tag, r2t_length);
+ return -EPROTO;
+ }
- req->state = NVME_TCP_SEND_H2C_PDU;
- req->offset = 0;
+ if (unlikely(req->data_sent + r2t_length > req->data_len)) {
+ dev_err(queue->ctrl->ctrl.device,
+ "req %d r2t len %u exceeded data len %u (%zu sent)\n",
+ rq->tag, r2t_length, req->data_len, req->data_sent);
+ return -EPROTO;
+ }
+ if (unlikely(le32_to_cpu(pdu->r2t_offset) < req->data_sent)) {
+ dev_err(queue->ctrl->ctrl.device,
+ "req %d unexpected r2t offset %u (expected %zu)\n",
+ rq->tag, le32_to_cpu(pdu->r2t_offset), req->data_sent);
+ return -EPROTO;
+ }
+
+ nvme_tcp_setup_h2c_data_pdu(req, pdu);
nvme_tcp_queue_request(req, false, true);
return 0;
@@ -1232,6 +1225,7 @@ static int nvme_tcp_alloc_async_req(struct nvme_tcp_ctrl *ctrl)
static void nvme_tcp_free_queue(struct nvme_ctrl *nctrl, int qid)
{
+ struct page *page;
struct nvme_tcp_ctrl *ctrl = to_tcp_ctrl(nctrl);
struct nvme_tcp_queue *queue = &ctrl->queues[qid];
@@ -1241,6 +1235,11 @@ static void nvme_tcp_free_queue(struct nvme_ctrl *nctrl, int qid)
if (queue->hdr_digest || queue->data_digest)
nvme_tcp_free_crypto(queue);
+ if (queue->pf_cache.va) {
+ page = virt_to_head_page(queue->pf_cache.va);
+ __page_frag_cache_drain(page, queue->pf_cache.pagecnt_bias);
+ queue->pf_cache.va = NULL;
+ }
sock_release(queue->sock);
kfree(queue->pdu);
mutex_destroy(&queue->send_mutex);
diff --git a/drivers/nvme/host/trace.h b/drivers/nvme/host/trace.h
index 35bac7a25422..b5f85259461a 100644
--- a/drivers/nvme/host/trace.h
+++ b/drivers/nvme/host/trace.h
@@ -68,7 +68,7 @@ TRACE_EVENT(nvme_setup_cmd,
__entry->nsid = le32_to_cpu(cmd->common.nsid);
__entry->metadata = !!blk_integrity_rq(req);
__entry->fctype = cmd->fabrics.fctype;
- __assign_disk_name(__entry->disk, req->rq_disk);
+ __assign_disk_name(__entry->disk, req->q->disk);
memcpy(__entry->cdw10, &cmd->common.cdw10,
sizeof(__entry->cdw10));
),
@@ -103,7 +103,7 @@ TRACE_EVENT(nvme_complete_rq,
__entry->retries = nvme_req(req)->retries;
__entry->flags = nvme_req(req)->flags;
__entry->status = nvme_req(req)->status;
- __assign_disk_name(__entry->disk, req->rq_disk);
+ __assign_disk_name(__entry->disk, req->q->disk);
),
TP_printk("nvme%d: %sqid=%d, cmdid=%u, res=%#llx, retries=%u, flags=0x%x, status=%#x",
__entry->ctrl_id, __print_disk_name(__entry->disk),
@@ -153,7 +153,7 @@ TRACE_EVENT(nvme_sq,
),
TP_fast_assign(
__entry->ctrl_id = nvme_req(req)->ctrl->instance;
- __assign_disk_name(__entry->disk, req->rq_disk);
+ __assign_disk_name(__entry->disk, req->q->disk);
__entry->qid = nvme_req_qid(req);
__entry->sq_head = le16_to_cpu(sq_head);
__entry->sq_tail = sq_tail;
diff --git a/drivers/nvme/host/zns.c b/drivers/nvme/host/zns.c
index bfc259e0d7b8..9f81beb4df4e 100644
--- a/drivers/nvme/host/zns.c
+++ b/drivers/nvme/host/zns.c
@@ -166,7 +166,10 @@ static int nvme_zone_parse_entry(struct nvme_ns *ns,
zone.len = ns->zsze;
zone.capacity = nvme_lba_to_sect(ns, le64_to_cpu(entry->zcap));
zone.start = nvme_lba_to_sect(ns, le64_to_cpu(entry->zslba));
- zone.wp = nvme_lba_to_sect(ns, le64_to_cpu(entry->wp));
+ if (zone.cond == BLK_ZONE_COND_FULL)
+ zone.wp = zone.start + zone.len;
+ else
+ zone.wp = nvme_lba_to_sect(ns, le64_to_cpu(entry->wp));
return cb(&zone, idx, data);
}
diff --git a/drivers/nvme/target/io-cmd-file.c b/drivers/nvme/target/io-cmd-file.c
index 6aa30f30b572..6be6e59d273b 100644
--- a/drivers/nvme/target/io-cmd-file.c
+++ b/drivers/nvme/target/io-cmd-file.c
@@ -8,6 +8,7 @@
#include <linux/uio.h>
#include <linux/falloc.h>
#include <linux/file.h>
+#include <linux/fs.h>
#include "nvmet.h"
#define NVMET_MAX_MPOOL_BVEC 16
@@ -266,7 +267,8 @@ static void nvmet_file_execute_rw(struct nvmet_req *req)
if (req->ns->buffered_io) {
if (likely(!req->f.mpool_alloc) &&
- nvmet_file_execute_io(req, IOCB_NOWAIT))
+ (req->ns->file->f_mode & FMODE_NOWAIT) &&
+ nvmet_file_execute_io(req, IOCB_NOWAIT))
return;
nvmet_file_submit_buffered_io(req);
} else
diff --git a/drivers/nvme/target/passthru.c b/drivers/nvme/target/passthru.c
index f0efb3537989..9e5b89ae29df 100644
--- a/drivers/nvme/target/passthru.c
+++ b/drivers/nvme/target/passthru.c
@@ -284,8 +284,7 @@ static void nvmet_passthru_execute_cmd(struct nvmet_req *req)
schedule_work(&req->p.work);
} else {
rq->end_io_data = req;
- blk_execute_rq_nowait(ns ? ns->disk : NULL, rq, 0,
- nvmet_passthru_req_done);
+ blk_execute_rq_nowait(rq, false, nvmet_passthru_req_done);
}
if (ns)
diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c
index 84c387e4bf43..7c1c43ce466b 100644
--- a/drivers/nvme/target/tcp.c
+++ b/drivers/nvme/target/tcp.c
@@ -166,6 +166,8 @@ static struct workqueue_struct *nvmet_tcp_wq;
static const struct nvmet_fabrics_ops nvmet_tcp_ops;
static void nvmet_tcp_free_cmd(struct nvmet_tcp_cmd *c);
static void nvmet_tcp_finish_cmd(struct nvmet_tcp_cmd *cmd);
+static void nvmet_tcp_free_cmd_buffers(struct nvmet_tcp_cmd *cmd);
+static void nvmet_tcp_unmap_pdu_iovec(struct nvmet_tcp_cmd *cmd);
static inline u16 nvmet_tcp_cmd_tag(struct nvmet_tcp_queue *queue,
struct nvmet_tcp_cmd *cmd)
@@ -297,6 +299,16 @@ static int nvmet_tcp_check_ddgst(struct nvmet_tcp_queue *queue, void *pdu)
return 0;
}
+static void nvmet_tcp_free_cmd_buffers(struct nvmet_tcp_cmd *cmd)
+{
+ WARN_ON(unlikely(cmd->nr_mapped > 0));
+
+ kfree(cmd->iov);
+ sgl_free(cmd->req.sg);
+ cmd->iov = NULL;
+ cmd->req.sg = NULL;
+}
+
static void nvmet_tcp_unmap_pdu_iovec(struct nvmet_tcp_cmd *cmd)
{
struct scatterlist *sg;
@@ -306,6 +318,8 @@ static void nvmet_tcp_unmap_pdu_iovec(struct nvmet_tcp_cmd *cmd)
for (i = 0; i < cmd->nr_mapped; i++)
kunmap(sg_page(&sg[i]));
+
+ cmd->nr_mapped = 0;
}
static void nvmet_tcp_map_pdu_iovec(struct nvmet_tcp_cmd *cmd)
@@ -387,7 +401,7 @@ static int nvmet_tcp_map_data(struct nvmet_tcp_cmd *cmd)
return 0;
err:
- sgl_free(cmd->req.sg);
+ nvmet_tcp_free_cmd_buffers(cmd);
return NVME_SC_INTERNAL;
}
@@ -632,10 +646,8 @@ static int nvmet_try_send_data(struct nvmet_tcp_cmd *cmd, bool last_in_batch)
}
}
- if (queue->nvme_sq.sqhd_disabled) {
- kfree(cmd->iov);
- sgl_free(cmd->req.sg);
- }
+ if (queue->nvme_sq.sqhd_disabled)
+ nvmet_tcp_free_cmd_buffers(cmd);
return 1;
@@ -664,8 +676,7 @@ static int nvmet_try_send_response(struct nvmet_tcp_cmd *cmd,
if (left)
return -EAGAIN;
- kfree(cmd->iov);
- sgl_free(cmd->req.sg);
+ nvmet_tcp_free_cmd_buffers(cmd);
cmd->queue->snd_cmd = NULL;
nvmet_tcp_put_cmd(cmd);
return 1;
@@ -700,10 +711,11 @@ static int nvmet_try_send_r2t(struct nvmet_tcp_cmd *cmd, bool last_in_batch)
static int nvmet_try_send_ddgst(struct nvmet_tcp_cmd *cmd, bool last_in_batch)
{
struct nvmet_tcp_queue *queue = cmd->queue;
+ int left = NVME_TCP_DIGEST_LENGTH - cmd->offset;
struct msghdr msg = { .msg_flags = MSG_DONTWAIT };
struct kvec iov = {
.iov_base = (u8 *)&cmd->exp_ddgst + cmd->offset,
- .iov_len = NVME_TCP_DIGEST_LENGTH - cmd->offset
+ .iov_len = left
};
int ret;
@@ -717,6 +729,10 @@ static int nvmet_try_send_ddgst(struct nvmet_tcp_cmd *cmd, bool last_in_batch)
return ret;
cmd->offset += ret;
+ left -= ret;
+
+ if (left)
+ return -EAGAIN;
if (queue->nvme_sq.sqhd_disabled) {
cmd->queue->snd_cmd = NULL;
@@ -906,7 +922,14 @@ static void nvmet_tcp_handle_req_failure(struct nvmet_tcp_queue *queue,
size_t data_len = le32_to_cpu(req->cmd->common.dptr.sgl.length);
int ret;
- if (!nvme_is_write(cmd->req.cmd) ||
+ /*
+ * This command has not been processed yet, hence we are trying to
+ * figure out if there is still pending data left to receive. If
+ * we don't, we can simply prepare for the next pdu and bail out,
+ * otherwise we will need to prepare a buffer and receive the
+ * stale data before continuing forward.
+ */
+ if (!nvme_is_write(cmd->req.cmd) || !data_len ||
data_len > cmd->req.port->inline_data_size) {
nvmet_prepare_receive_pdu(queue);
return;
@@ -1406,8 +1429,7 @@ static void nvmet_tcp_finish_cmd(struct nvmet_tcp_cmd *cmd)
{
nvmet_req_uninit(&cmd->req);
nvmet_tcp_unmap_pdu_iovec(cmd);
- kfree(cmd->iov);
- sgl_free(cmd->req.sg);
+ nvmet_tcp_free_cmd_buffers(cmd);
}
static void nvmet_tcp_uninit_data_in_cmds(struct nvmet_tcp_queue *queue)
@@ -1417,7 +1439,10 @@ static void nvmet_tcp_uninit_data_in_cmds(struct nvmet_tcp_queue *queue)
for (i = 0; i < queue->nr_cmds; i++, cmd++) {
if (nvmet_tcp_need_data_in(cmd))
- nvmet_tcp_finish_cmd(cmd);
+ nvmet_req_uninit(&cmd->req);
+
+ nvmet_tcp_unmap_pdu_iovec(cmd);
+ nvmet_tcp_free_cmd_buffers(cmd);
}
if (!queue->nr_cmds && nvmet_tcp_need_data_in(&queue->connect)) {
@@ -1437,7 +1462,9 @@ static void nvmet_tcp_release_queue_work(struct work_struct *w)
mutex_unlock(&nvmet_tcp_queue_mutex);
nvmet_tcp_restore_socket_callbacks(queue);
- flush_work(&queue->io_work);
+ cancel_work_sync(&queue->io_work);
+ /* stop accepting incoming data */
+ queue->rcv_state = NVMET_TCP_RECV_ERR;
nvmet_tcp_uninit_data_in_cmds(queue);
nvmet_sq_destroy(&queue->nvme_sq);