From 8a82dbf19129dde9e6fc9ab25a00dbc7569abe6a Mon Sep 17 00:00:00 2001 From: James Smart Date: Mon, 9 Oct 2017 13:39:44 -0700 Subject: nvme-fc: fix iowait hang MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add missing iowait head initialization. Fix irqsave vs irq: wait_event_lock_irq() doesn't do irq save/restore Fixes: 36715cf4b366 ("nvme_fc: replace ioabort msleep loop with completion”) Cc: # 4.13 Signed-off-by: James Smart Reviewed-by: Sagi Grimberg Reviewed-by: Himanshu Madhani Tested-by: Himanshu Madhani Signed-off-by: Christoph Hellwig --- drivers/nvme/host/fc.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index af075e998944..8182b1999f49 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -2545,10 +2545,10 @@ nvme_fc_delete_association(struct nvme_fc_ctrl *ctrl) nvme_fc_abort_aen_ops(ctrl); /* wait for all io that had to be aborted */ - spin_lock_irqsave(&ctrl->lock, flags); + spin_lock_irq(&ctrl->lock); wait_event_lock_irq(ctrl->ioabort_wait, ctrl->iocnt == 0, ctrl->lock); ctrl->flags &= ~FCCTRL_TERMIO; - spin_unlock_irqrestore(&ctrl->lock, flags); + spin_unlock_irq(&ctrl->lock); nvme_fc_term_aen_ops(ctrl); @@ -2760,6 +2760,7 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts, ctrl->rport = rport; ctrl->dev = lport->dev; ctrl->cnum = idx; + init_waitqueue_head(&ctrl->ioabort_wait); get_device(ctrl->dev); kref_init(&ctrl->ref); -- cgit v1.2.3 From 17c4dc6eb7e1b2fb1ce6a52467e3be635224606e Mon Sep 17 00:00:00 2001 From: James Smart Date: Mon, 9 Oct 2017 16:39:22 -0700 Subject: nvme-fc: retry initial controller connections 3 times Currently, if a frame is lost of command fails as part of initial association create for a new controller, the new controller connection request will immediately fail. Add in an immediate 3 retry loop before giving up. Signed-off-by: James Smart Signed-off-by: Christoph Hellwig --- drivers/nvme/host/fc.c | 32 ++++++++++++++++++++++++++++++-- 1 file changed, 30 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index 8182b1999f49..be49d0f79381 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -2734,7 +2734,7 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts, { struct nvme_fc_ctrl *ctrl; unsigned long flags; - int ret, idx; + int ret, idx, retry; if (!(rport->remoteport.port_role & (FC_PORT_ROLE_NVME_DISCOVERY | FC_PORT_ROLE_NVME_TARGET))) { @@ -2826,9 +2826,37 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts, list_add_tail(&ctrl->ctrl_list, &rport->ctrl_list); spin_unlock_irqrestore(&rport->lock, flags); - ret = nvme_fc_create_association(ctrl); + /* + * It's possible that transactions used to create the association + * may fail. Examples: CreateAssociation LS or CreateIOConnection + * LS gets dropped/corrupted/fails; or a frame gets dropped or a + * command times out for one of the actions to init the controller + * (Connect, Get/Set_Property, Set_Features, etc). Many of these + * transport errors (frame drop, LS failure) inherently must kill + * the association. The transport is coded so that any command used + * to create the association (prior to a LIVE state transition + * while NEW or RECONNECTING) will fail if it completes in error or + * times out. + * + * As such: as the connect request was mostly likely due to a + * udev event that discovered the remote port, meaning there is + * not an admin or script there to restart if the connect + * request fails, retry the initial connection creation up to + * three times before giving up and declaring failure. + */ + for (retry = 0; retry < 3; retry++) { + ret = nvme_fc_create_association(ctrl); + if (!ret) + break; + } + if (ret) { + /* couldn't schedule retry - fail out */ + dev_err(ctrl->ctrl.device, + "NVME-FC{%d}: Connect retry failed\n", ctrl->cnum); + ctrl->ctrl.opts = NULL; + /* initiate nvme ctrl ref counting teardown */ nvme_uninit_ctrl(&ctrl->ctrl); nvme_put_ctrl(&ctrl->ctrl); -- cgit v1.2.3 From f9cf2a64912d67c9cf49c316a0a0ada0ea7ed1da Mon Sep 17 00:00:00 2001 From: James Smart Date: Wed, 18 Oct 2017 14:33:59 -0700 Subject: nvmet: synchronize sqhd update In testing target io in read write mix, we did indeed get into cases where sqhd didn't update properly and slowly missed enough updates to shutdown the queue. Protect the updating sqhd by using cmpxchg, and for that turn the sqhd field into a u32 so that cmpxchg works on it for all architectures. Signed-off-by: James Smart Signed-off-by: Christoph Hellwig --- drivers/nvme/target/core.c | 15 ++++++++++++--- drivers/nvme/target/nvmet.h | 2 +- 2 files changed, 13 insertions(+), 4 deletions(-) (limited to 'drivers') diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c index 1b208beeef50..645ba7eee35d 100644 --- a/drivers/nvme/target/core.c +++ b/drivers/nvme/target/core.c @@ -387,12 +387,21 @@ struct nvmet_ns *nvmet_ns_alloc(struct nvmet_subsys *subsys, u32 nsid) static void __nvmet_req_complete(struct nvmet_req *req, u16 status) { + u32 old_sqhd, new_sqhd; + u16 sqhd; + if (status) nvmet_set_status(req, status); - if (req->sq->size) - req->sq->sqhd = (req->sq->sqhd + 1) % req->sq->size; - req->rsp->sq_head = cpu_to_le16(req->sq->sqhd); + if (req->sq->size) { + do { + old_sqhd = req->sq->sqhd; + new_sqhd = (old_sqhd + 1) % req->sq->size; + } while (cmpxchg(&req->sq->sqhd, old_sqhd, new_sqhd) != + old_sqhd); + } + sqhd = req->sq->sqhd & 0x0000FFFF; + req->rsp->sq_head = cpu_to_le16(sqhd); req->rsp->sq_id = cpu_to_le16(req->sq->qid); req->rsp->command_id = req->cmd->common.command_id; diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h index 7b8e20adf760..87e429bfcd8a 100644 --- a/drivers/nvme/target/nvmet.h +++ b/drivers/nvme/target/nvmet.h @@ -74,7 +74,7 @@ struct nvmet_sq { struct percpu_ref ref; u16 qid; u16 size; - u16 sqhd; + u32 sqhd; struct completion free_done; struct completion confirm_done; }; -- cgit v1.2.3 From bd9f07590a17f3158b51fb869dca723f1f606bdc Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Thu, 19 Oct 2017 16:00:30 +0300 Subject: nvme-rdma: Fix possible double free in reconnect flow The fact that we free the async event buffer in nvme_rdma_destroy_admin_queue can cause us to free it more than once because this happens in every reconnect attempt since commit 31fdf1840170. we rely on the queue state flags DELETING to avoid this for other resources. A more complete fix is to not destroy the admin/io queues unconditionally on every reconnect attempt, but its a bit more extensive and will go in the next release. Fixes: 31fdf1840170 ("nvme-rdma: reuse configure/destroy_admin_queue") Reported-by: Yi Zhang Reviewed-by: Johannes Thumshirn Signed-off-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- drivers/nvme/host/rdma.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index 92a03ff5fb4d..4dbee893a047 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -571,6 +571,12 @@ static void nvme_rdma_free_queue(struct nvme_rdma_queue *queue) if (test_and_set_bit(NVME_RDMA_Q_DELETING, &queue->flags)) return; + if (nvme_rdma_queue_idx(queue) == 0) { + nvme_rdma_free_qe(queue->device->dev, + &queue->ctrl->async_event_sqe, + sizeof(struct nvme_command), DMA_TO_DEVICE); + } + nvme_rdma_destroy_queue_ib(queue); rdma_destroy_id(queue->cm_id); } @@ -739,8 +745,6 @@ out: static void nvme_rdma_destroy_admin_queue(struct nvme_rdma_ctrl *ctrl, bool remove) { - nvme_rdma_free_qe(ctrl->queues[0].device->dev, &ctrl->async_event_sqe, - sizeof(struct nvme_command), DMA_TO_DEVICE); nvme_rdma_stop_queue(&ctrl->queues[0]); if (remove) { blk_cleanup_queue(ctrl->ctrl.admin_q); -- cgit v1.2.3 From f04b9cc87b5fc466b1b7231ba7b078e885956c5b Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Thu, 19 Oct 2017 18:10:53 +0300 Subject: nvme-rdma: Fix error status return in tagset allocation failure We should make sure to escelate allocation failures to prevent a use-after-free in nvmf_create_ctrl. Fixes: b28a308ee777 ("nvme-rdma: move tagset allocation to a dedicated routine") Signed-off-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- drivers/nvme/host/rdma.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index 4dbee893a047..87bac27ec64b 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -769,8 +769,10 @@ static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl, if (new) { ctrl->ctrl.admin_tagset = nvme_rdma_alloc_tagset(&ctrl->ctrl, true); - if (IS_ERR(ctrl->ctrl.admin_tagset)) + if (IS_ERR(ctrl->ctrl.admin_tagset)) { + error = PTR_ERR(ctrl->ctrl.admin_tagset); goto out_free_queue; + } ctrl->ctrl.admin_q = blk_mq_init_queue(&ctrl->admin_tag_set); if (IS_ERR(ctrl->ctrl.admin_q)) { @@ -850,8 +852,10 @@ static int nvme_rdma_configure_io_queues(struct nvme_rdma_ctrl *ctrl, bool new) if (new) { ctrl->ctrl.tagset = nvme_rdma_alloc_tagset(&ctrl->ctrl, false); - if (IS_ERR(ctrl->ctrl.tagset)) + if (IS_ERR(ctrl->ctrl.tagset)) { + ret = PTR_ERR(ctrl->ctrl.tagset); goto out_free_io_queues; + } ctrl->ctrl.connect_q = blk_mq_init_queue(&ctrl->tag_set); if (IS_ERR(ctrl->ctrl.connect_q)) { -- cgit v1.2.3