From 43efdb8e870ee0f58633fd579aa5b5185bf5d39e Mon Sep 17 00:00:00 2001 From: Chao Leng Date: Mon, 12 Oct 2020 16:10:40 +0800 Subject: nvme-rdma: fix crash when connect rejected A crash can happened when a connect is rejected. The host establishes the connection after received ConnectReply, and then continues to send the fabrics Connect command. If the controller does not receive the ReadyToUse capsule, host may receive a ConnectReject reply. Call nvme_rdma_destroy_queue_ib after the host received the RDMA_CM_EVENT_REJECTED event. Then when the fabrics Connect command times out, nvme_rdma_timeout calls nvme_rdma_complete_rq to fail the request. A crash happenes due to use after free in nvme_rdma_complete_rq. nvme_rdma_destroy_queue_ib is redundant when handling the RDMA_CM_EVENT_REJECTED event as nvme_rdma_destroy_queue_ib is already called in connection failure handler. Signed-off-by: Chao Leng Reviewed-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- drivers/nvme/host/rdma.c | 1 - 1 file changed, 1 deletion(-) (limited to 'drivers') diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index 9e378d0a0c01..116902b1b2c3 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -1926,7 +1926,6 @@ static int nvme_rdma_cm_handler(struct rdma_cm_id *cm_id, complete(&queue->cm_done); return 0; case RDMA_CM_EVENT_REJECTED: - nvme_rdma_destroy_queue_ib(queue); cm_error = nvme_rdma_conn_rejected(queue, ev); break; case RDMA_CM_EVENT_ROUTE_ERROR: -- cgit v1.2.3 From a87da50f39d467f2ea4c1f98decb72ef6d87a31e Mon Sep 17 00:00:00 2001 From: Chao Leng Date: Mon, 12 Oct 2020 16:55:37 +0800 Subject: nvme-rdma: fix crash due to incorrect cqe A crash happened due to injecting error test. When a CQE has incorrect command id due do an error injection, the host may find a request which is already freed. Dereferencing req->mr->rkey causes a crash in nvme_rdma_process_nvme_rsp because the mr is already freed. Add a check for the mr to fix it. Signed-off-by: Chao Leng Reviewed-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- drivers/nvme/host/rdma.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index 116902b1b2c3..aad829a2b50d 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -1730,10 +1730,11 @@ static void nvme_rdma_process_nvme_rsp(struct nvme_rdma_queue *queue, req->result = cqe->result; if (wc->wc_flags & IB_WC_WITH_INVALIDATE) { - if (unlikely(wc->ex.invalidate_rkey != req->mr->rkey)) { + if (unlikely(!req->mr || + wc->ex.invalidate_rkey != req->mr->rkey)) { dev_err(queue->ctrl->ctrl.device, "Bogus remote invalidation for rkey %#x\n", - req->mr->rkey); + req->mr ? req->mr->rkey : 0); nvme_rdma_error_recovery(queue->ctrl); } } else if (req->mr) { -- cgit v1.2.3 From 643c476d6f78cf0349fb8e07334962dd056a3c90 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Thu, 15 Oct 2020 11:36:29 -0700 Subject: nvme: use queuedata for nvme_req_qid The request's rq_disk isn't set for passthrough IO commands, so tracing uses qid 0 for these which incorrectly decodes as an admin command. Use the request_queue's queuedata instead since that value is always set for the IO queues, and never set for the admin queue. Signed-off-by: Keith Busch Signed-off-by: Christoph Hellwig --- drivers/nvme/host/nvme.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index e7c88b40f5bb..cc111136a981 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -176,7 +176,7 @@ static inline struct nvme_request *nvme_req(struct request *req) static inline u16 nvme_req_qid(struct request *req) { - if (!req->rq_disk) + if (!req->q->queuedata) return 0; return blk_mq_unique_tag_to_hwq(blk_mq_unique_tag(req)) + 1; } -- cgit v1.2.3 From 02ca079c99319c4308c6bb892613f29119c1a9f9 Mon Sep 17 00:00:00 2001 From: Kai-Heng Feng Date: Tue, 13 Oct 2020 16:34:45 +0800 Subject: nvme-pci: disable Write Zeroes on Sandisk Skyhawk Like commit 5611ec2b9814 ("nvme-pci: prevent SK hynix PC400 from using Write Zeroes command"), Sandisk Skyhawk has the same issue: [ 6305.633887] blk_update_request: operation not supported error, dev nvme0n1, sector 340812032 op 0x9:(WRITE_ZEROES) flags 0x0 phys_seg 0 prio class 0 So also disable Write Zeroes command on Sandisk Skyhawk. BugLink: https://bugs.launchpad.net/bugs/1899503 Signed-off-by: Kai-Heng Feng Reviewed-by: Chaitanya Kulkarni Signed-off-by: Christoph Hellwig --- drivers/nvme/host/pci.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers') diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index e5b02242f3ca..df8f3612107f 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -3185,6 +3185,8 @@ static const struct pci_device_id nvme_id_table[] = { NVME_QUIRK_IGNORE_DEV_SUBNQN, }, { PCI_DEVICE(0x1c5c, 0x1504), /* SK Hynix PC400 */ .driver_data = NVME_QUIRK_DISABLE_WRITE_ZEROES, }, + { PCI_DEVICE(0x15b7, 0x2001), /* Sandisk Skyhawk */ + .driver_data = NVME_QUIRK_DISABLE_WRITE_ZEROES, }, { PCI_DEVICE(PCI_VENDOR_ID_APPLE, 0x2001), .driver_data = NVME_QUIRK_SINGLE_VECTOR }, { PCI_DEVICE(PCI_VENDOR_ID_APPLE, 0x2003) }, -- cgit v1.2.3 From 85bd23f3dc09a2ae9e56885420e52c54bf983713 Mon Sep 17 00:00:00 2001 From: zhenwei pi Date: Thu, 15 Oct 2020 09:51:40 +0800 Subject: nvmet: fix uninitialized work for zero kato When connecting a controller with a zero kato value using the following command line nvme connect -t tcp -n NQN -a ADDR -s PORT --keep-alive-tmo=0 the warning below can be reproduced: WARNING: CPU: 1 PID: 241 at kernel/workqueue.c:1627 __queue_delayed_work+0x6d/0x90 with trace: mod_delayed_work_on+0x59/0x90 nvmet_update_cc+0xee/0x100 [nvmet] nvmet_execute_prop_set+0x72/0x80 [nvmet] nvmet_tcp_try_recv_pdu+0x2f7/0x770 [nvmet_tcp] nvmet_tcp_io_work+0x63f/0xb2d [nvmet_tcp] ... This is caused by queuing up an uninitialized work. Althrough the keep-alive timer is disabled during allocating the controller (fixed in 0d3b6a8d213a), ka_work still has a chance to run (called by nvmet_start_ctrl). Fixes: 0d3b6a8d213a ("nvmet: Disable keep-alive timer when kato is cleared to 0h") Signed-off-by: zhenwei pi Signed-off-by: Christoph Hellwig --- drivers/nvme/target/core.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c index 25d62d867563..aafcbc424b7a 100644 --- a/drivers/nvme/target/core.c +++ b/drivers/nvme/target/core.c @@ -1126,7 +1126,8 @@ static void nvmet_start_ctrl(struct nvmet_ctrl *ctrl) * in case a host died before it enabled the controller. Hence, simply * reset the keep alive timer when the controller is enabled. */ - mod_delayed_work(system_wq, &ctrl->ka_work, ctrl->kato * HZ); + if (ctrl->kato) + mod_delayed_work(system_wq, &ctrl->ka_work, ctrl->kato * HZ); } static void nvmet_clear_ctrl(struct nvmet_ctrl *ctrl) -- cgit v1.2.3 From df06047d54276f73782c9d97882b305fca745d3f Mon Sep 17 00:00:00 2001 From: Logan Gunthorpe Date: Fri, 16 Oct 2020 16:19:04 -0600 Subject: nvmet: limit passthru MTDS by BIO_MAX_PAGES nvmet_passthru_map_sg() only supports mapping a single BIO, not a chain so the effective maximum transfer should also be limitted by BIO_MAX_PAGES (presently this works out to 1MB). For PCI passthru devices the max_sectors would typically be more limitting than BIO_MAX_PAGES, but this may not be true for all passthru devices. Fixes: c1fef73f793b ("nvmet: add passthru code to process commands") Suggested-by: Christoph Hellwig Signed-off-by: Logan Gunthorpe Cc: Christoph Hellwig Cc: Sagi Grimberg Cc: Chaitanya Kulkarni Signed-off-by: Christoph Hellwig --- drivers/nvme/target/passthru.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/nvme/target/passthru.c b/drivers/nvme/target/passthru.c index 56c571052216..323046e1d67f 100644 --- a/drivers/nvme/target/passthru.c +++ b/drivers/nvme/target/passthru.c @@ -26,7 +26,7 @@ static u16 nvmet_passthru_override_id_ctrl(struct nvmet_req *req) struct nvme_ctrl *pctrl = ctrl->subsys->passthru_ctrl; u16 status = NVME_SC_SUCCESS; struct nvme_id_ctrl *id; - u32 max_hw_sectors; + int max_hw_sectors; int page_shift; id = kzalloc(sizeof(*id), GFP_KERNEL); @@ -48,6 +48,13 @@ static u16 nvmet_passthru_override_id_ctrl(struct nvmet_req *req) max_hw_sectors = min_not_zero(pctrl->max_segments << (PAGE_SHIFT - 9), pctrl->max_hw_sectors); + /* + * nvmet_passthru_map_sg is limitted to using a single bio so limit + * the mdts based on BIO_MAX_PAGES as well + */ + max_hw_sectors = min_not_zero(BIO_MAX_PAGES << (PAGE_SHIFT - 9), + max_hw_sectors); + page_shift = NVME_CAP_MPSMIN(ctrl->cap) + 12; id->mdts = ilog2(max_hw_sectors) + 9 - page_shift; -- cgit v1.2.3 From 5e063101ffacf7c14797f5185c58a967ca83c79f Mon Sep 17 00:00:00 2001 From: Logan Gunthorpe Date: Fri, 16 Oct 2020 16:19:05 -0600 Subject: nvmet: cleanup nvmet_passthru_map_sg() Clean up some confusing elements of nvmet_passthru_map_sg() by returning early if the request is greater than the maximum bio size. This allows us to drop the sg_cnt variable. This should not result in any functional change but makes the code clearer and more understandable. The original code allocated a truncated bio then would return EINVAL when bio_add_pc_page() filled that bio. The new code just returns EINVAL early if this would happen. Fixes: c1fef73f793b ("nvmet: add passthru code to process commands") Signed-off-by: Logan Gunthorpe Suggested-by: Douglas Gilbert Reviewed-by: Sagi Grimberg Cc: Christoph Hellwig Cc: Chaitanya Kulkarni Signed-off-by: Christoph Hellwig --- drivers/nvme/target/passthru.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'drivers') diff --git a/drivers/nvme/target/passthru.c b/drivers/nvme/target/passthru.c index 323046e1d67f..0814cba8298a 100644 --- a/drivers/nvme/target/passthru.c +++ b/drivers/nvme/target/passthru.c @@ -187,18 +187,20 @@ static void nvmet_passthru_req_done(struct request *rq, static int nvmet_passthru_map_sg(struct nvmet_req *req, struct request *rq) { - int sg_cnt = req->sg_cnt; struct scatterlist *sg; int op_flags = 0; struct bio *bio; int i, ret; + if (req->sg_cnt > BIO_MAX_PAGES) + return -EINVAL; + if (req->cmd->common.opcode == nvme_cmd_flush) op_flags = REQ_FUA; else if (nvme_is_write(req->cmd)) op_flags = REQ_SYNC | REQ_IDLE; - bio = bio_alloc(GFP_KERNEL, min(sg_cnt, BIO_MAX_PAGES)); + bio = bio_alloc(GFP_KERNEL, req->sg_cnt); bio->bi_end_io = bio_put; bio->bi_opf = req_op(rq) | op_flags; @@ -208,7 +210,6 @@ static int nvmet_passthru_map_sg(struct nvmet_req *req, struct request *rq) bio_put(bio); return -EINVAL; } - sg_cnt--; } ret = blk_rq_append_bio(rq, &bio); -- cgit v1.2.3 From 150dfb6c834c9e0e92db7794530b09fd2b9f05c8 Mon Sep 17 00:00:00 2001 From: Chaitanya Kulkarni Date: Tue, 20 Oct 2020 16:14:04 -0700 Subject: nvmet: don't use BLK_MQ_REQ_NOWAIT for passthru MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit By default, we set the passthru request allocation flag such that it returns the error in the following code path and we fail the I/O when BLK_MQ_REQ_NOWAIT is used for request allocation :- nvme_alloc_request()  blk_mq_alloc_request()   blk_mq_queue_enter()    if (flag & BLK_MQ_REQ_NOWAIT)         return -EBUSY; <-- return if busy. On some controllers using BLK_MQ_REQ_NOWAIT ends up in I/O error where the controller is perfectly healthy and not in a degraded state. Block layer request allocation does allow us to wait instead of immediately returning the error when we BLK_MQ_REQ_NOWAIT flag is not used. This has shown to fix the I/O error problem reported under heavy random write workload. Remove the BLK_MQ_REQ_NOWAIT parameter for passthru request allocation which resolves this issue. Signed-off-by: Chaitanya Kulkarni Reviewed-by: Logan Gunthorpe Signed-off-by: Christoph Hellwig --- drivers/nvme/target/passthru.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/nvme/target/passthru.c b/drivers/nvme/target/passthru.c index 0814cba8298a..8ee94f056898 100644 --- a/drivers/nvme/target/passthru.c +++ b/drivers/nvme/target/passthru.c @@ -244,7 +244,7 @@ static void nvmet_passthru_execute_cmd(struct nvmet_req *req) q = ns->queue; } - rq = nvme_alloc_request(q, req->cmd, BLK_MQ_REQ_NOWAIT, NVME_QID_ANY); + rq = nvme_alloc_request(q, req->cmd, 0, NVME_QID_ANY); if (IS_ERR(rq)) { status = NVME_SC_INTERNAL; goto out_put_ns; -- cgit v1.2.3 From 52793d62a696e9188092eb0817fb1219ee5729ff Mon Sep 17 00:00:00 2001 From: James Smart Date: Fri, 16 Oct 2020 14:06:27 -0700 Subject: nvme-fc: fix io timeout to abort I/O Currently, an I/O timeout unconditionally invokes nvme_fc_error_recovery() which checks for LIVE or CONNECTING state. If live, the routine resets the controller which initiates a reconnect - which is valid. If CONNECTING, err_work is scheduled. Err_work then calls the terminate_io routine, which also checks for CONNECTING and noops any further action on outstanding I/O. The result is nothing happened to the timed out io. As such, if the command was dropped on the wire, it will never timeout / complete, and the connect process will hang. Change the behavior of the io timeout routine to unconditionally abort the I/O. I/O completion handling will note that an io failed due to an abort and will terminate the connection / association as needed. If the abort was unable to happen, continue with a call to nvme_fc_error_recovery(). To ensure something different happens in nvme_fc_error_recovery() rework it so at it will abort all I/Os on the association to force a failure. As I/O aborts now may occur outside of delete_association, counting for completion must be wary and only count those aborted during delete_association when TERMIO is set on the controller. Signed-off-by: James Smart Signed-off-by: Christoph Hellwig --- drivers/nvme/host/fc.c | 108 +++++++++++++++++++++++++++++++------------------ 1 file changed, 69 insertions(+), 39 deletions(-) (limited to 'drivers') diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index e2e09e25c056..3e72b7d74df3 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -1837,8 +1837,10 @@ __nvme_fc_abort_op(struct nvme_fc_ctrl *ctrl, struct nvme_fc_fcp_op *op) opstate = atomic_xchg(&op->state, FCPOP_STATE_ABORTED); if (opstate != FCPOP_STATE_ACTIVE) atomic_set(&op->state, opstate); - else if (test_bit(FCCTRL_TERMIO, &ctrl->flags)) + else if (test_bit(FCCTRL_TERMIO, &ctrl->flags)) { + op->flags |= FCOP_FLAGS_TERMIO; ctrl->iocnt++; + } spin_unlock_irqrestore(&ctrl->lock, flags); if (opstate != FCPOP_STATE_ACTIVE) @@ -1874,7 +1876,8 @@ __nvme_fc_fcpop_chk_teardowns(struct nvme_fc_ctrl *ctrl, if (opstate == FCPOP_STATE_ABORTED) { spin_lock_irqsave(&ctrl->lock, flags); - if (test_bit(FCCTRL_TERMIO, &ctrl->flags)) { + if (test_bit(FCCTRL_TERMIO, &ctrl->flags) && + op->flags & FCOP_FLAGS_TERMIO) { if (!--ctrl->iocnt) wake_up(&ctrl->ioabort_wait); } @@ -2446,15 +2449,20 @@ nvme_fc_timeout(struct request *rq, bool reserved) { struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(rq); struct nvme_fc_ctrl *ctrl = op->ctrl; + struct nvme_fc_cmd_iu *cmdiu = &op->cmd_iu; + struct nvme_command *sqe = &cmdiu->sqe; /* - * we can't individually ABTS an io without affecting the queue, - * thus killing the queue, and thus the association. - * So resolve by performing a controller reset, which will stop - * the host/io stack, terminate the association on the link, - * and recreate an association on the link. + * Attempt to abort the offending command. Command completion + * will detect the aborted io and will fail the connection. */ - nvme_fc_error_recovery(ctrl, "io timeout error"); + dev_info(ctrl->ctrl.device, + "NVME-FC{%d.%d}: io timeout: opcode %d fctype %d w10/11: " + "x%08x/x%08x\n", + ctrl->cnum, op->queue->qnum, sqe->common.opcode, + sqe->connect.fctype, sqe->common.cdw10, sqe->common.cdw11); + if (__nvme_fc_abort_op(ctrl, op)) + nvme_fc_error_recovery(ctrl, "io timeout abort failed"); /* * the io abort has been initiated. Have the reset timer @@ -2726,6 +2734,7 @@ nvme_fc_complete_rq(struct request *rq) struct nvme_fc_ctrl *ctrl = op->ctrl; atomic_set(&op->state, FCPOP_STATE_IDLE); + op->flags &= ~FCOP_FLAGS_TERMIO; nvme_fc_unmap_data(ctrl, rq, op); nvme_complete_rq(rq); @@ -3090,26 +3099,19 @@ out_free_queue: return ret; } + /* - * This routine stops operation of the controller on the host side. - * On the host os stack side: Admin and IO queues are stopped, - * outstanding ios on them terminated via FC ABTS. - * On the link side: the association is terminated. + * This routine runs through all outstanding commands on the association + * and aborts them. This routine is typically be called by the + * delete_association routine. It is also called due to an error during + * reconnect. In that scenario, it is most likely a command that initializes + * the controller, including fabric Connect commands on io queues, that + * may have timed out or failed thus the io must be killed for the connect + * thread to see the error. */ static void -nvme_fc_delete_association(struct nvme_fc_ctrl *ctrl) +__nvme_fc_abort_outstanding_ios(struct nvme_fc_ctrl *ctrl, bool start_queues) { - struct nvmefc_ls_rcv_op *disls = NULL; - unsigned long flags; - - if (!test_and_clear_bit(ASSOC_ACTIVE, &ctrl->flags)) - return; - - spin_lock_irqsave(&ctrl->lock, flags); - set_bit(FCCTRL_TERMIO, &ctrl->flags); - ctrl->iocnt = 0; - spin_unlock_irqrestore(&ctrl->lock, flags); - /* * If io queues are present, stop them and terminate all outstanding * ios on them. As FC allocates FC exchange for each io, the @@ -3127,6 +3129,8 @@ nvme_fc_delete_association(struct nvme_fc_ctrl *ctrl) blk_mq_tagset_busy_iter(&ctrl->tag_set, nvme_fc_terminate_exchange, &ctrl->ctrl); blk_mq_tagset_wait_completed_request(&ctrl->tag_set); + if (start_queues) + nvme_start_queues(&ctrl->ctrl); } /* @@ -3143,13 +3147,34 @@ nvme_fc_delete_association(struct nvme_fc_ctrl *ctrl) /* * clean up the admin queue. Same thing as above. - * use blk_mq_tagset_busy_itr() and the transport routine to - * terminate the exchanges. */ blk_mq_quiesce_queue(ctrl->ctrl.admin_q); blk_mq_tagset_busy_iter(&ctrl->admin_tag_set, nvme_fc_terminate_exchange, &ctrl->ctrl); blk_mq_tagset_wait_completed_request(&ctrl->admin_tag_set); +} + +/* + * This routine stops operation of the controller on the host side. + * On the host os stack side: Admin and IO queues are stopped, + * outstanding ios on them terminated via FC ABTS. + * On the link side: the association is terminated. + */ +static void +nvme_fc_delete_association(struct nvme_fc_ctrl *ctrl) +{ + struct nvmefc_ls_rcv_op *disls = NULL; + unsigned long flags; + + if (!test_and_clear_bit(ASSOC_ACTIVE, &ctrl->flags)) + return; + + spin_lock_irqsave(&ctrl->lock, flags); + set_bit(FCCTRL_TERMIO, &ctrl->flags); + ctrl->iocnt = 0; + spin_unlock_irqrestore(&ctrl->lock, flags); + + __nvme_fc_abort_outstanding_ios(ctrl, false); /* kill the aens as they are a separate path */ nvme_fc_abort_aen_ops(ctrl); @@ -3263,22 +3288,27 @@ static void __nvme_fc_terminate_io(struct nvme_fc_ctrl *ctrl) { /* - * if state is connecting - the error occurred as part of a - * reconnect attempt. The create_association error paths will - * clean up any outstanding io. - * - * if it's a different state - ensure all pending io is - * terminated. Given this can delay while waiting for the - * aborted io to return, we recheck adapter state below - * before changing state. + * if state is CONNECTING - the error occurred as part of a + * reconnect attempt. Abort any ios on the association and + * let the create_association error paths resolve things. */ - if (ctrl->ctrl.state != NVME_CTRL_CONNECTING) { - nvme_stop_keep_alive(&ctrl->ctrl); - - /* will block will waiting for io to terminate */ - nvme_fc_delete_association(ctrl); + if (ctrl->ctrl.state == NVME_CTRL_CONNECTING) { + __nvme_fc_abort_outstanding_ios(ctrl, true); + return; } + /* + * For any other state, kill the association. As this routine + * is a common io abort routine for resetting and such, after + * the association is terminated, ensure that the state is set + * to CONNECTING. + */ + + nvme_stop_keep_alive(&ctrl->ctrl); + + /* will block will waiting for io to terminate */ + nvme_fc_delete_association(ctrl); + if (ctrl->ctrl.state != NVME_CTRL_CONNECTING && !nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_CONNECTING)) dev_err(ctrl->ctrl.device, -- cgit v1.2.3 From 514a6dc9ecfd2fe4e1deebcb7a63e3de23e6c38b Mon Sep 17 00:00:00 2001 From: James Smart Date: Fri, 16 Oct 2020 14:06:04 -0700 Subject: nvme-fc: fix error loop in create_hw_io_queues The loop that backs out of hw io queue creation continues through index 0, which corresponds to the admin queue as well. Fix the loop so it only proceeds through indexes 1..n which correspond to I/O queues. Signed-off-by: James Smart Reviewed-by: Himanshu Madhani Reviewed-by: Hannes Reinecke Signed-off-by: Christoph Hellwig --- drivers/nvme/host/fc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index 3e72b7d74df3..108130f140d0 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -2317,7 +2317,7 @@ nvme_fc_create_hw_io_queues(struct nvme_fc_ctrl *ctrl, u16 qsize) return 0; delete_queues: - for (; i >= 0; i--) + for (; i > 0; i--) __nvme_fc_delete_hw_queue(ctrl, &ctrl->queues[i], i); return ret; } @@ -2436,7 +2436,7 @@ nvme_fc_error_recovery(struct nvme_fc_ctrl *ctrl, char *errmsg) return; dev_warn(ctrl->ctrl.device, - "NVME-FC{%d}: transport association error detected: %s\n", + "NVME-FC{%d}: transport association event: %s\n", ctrl->cnum, errmsg); dev_warn(ctrl->ctrl.device, "NVME-FC{%d}: resetting controller\n", ctrl->cnum); -- cgit v1.2.3 From 88e837ed0f1fddd34a19092aaa7098d579e6c506 Mon Sep 17 00:00:00 2001 From: James Smart Date: Fri, 16 Oct 2020 14:17:24 -0700 Subject: nvme-fc: wait for queues to freeze before calling update_hr_hw_queues On reconnect, the code currently does not freeze the controller before possibly updating the number hw queues for the controller. Add the freeze before updating the number of hw queues. Note: the queues are already started and remain started through the reconnect. Signed-off-by: James Smart Reviewed-by: Himanshu Madhani Reviewed-by: Hannes Reinecke Signed-off-by: Christoph Hellwig --- drivers/nvme/host/fc.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index 108130f140d0..5f1d09640c40 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -2885,11 +2885,14 @@ nvme_fc_recreate_io_queues(struct nvme_fc_ctrl *ctrl) if (ret) goto out_delete_hw_queues; - if (prior_ioq_cnt != nr_io_queues) + if (prior_ioq_cnt != nr_io_queues) { dev_info(ctrl->ctrl.device, "reconnect: revising io queue count from %d to %d\n", prior_ioq_cnt, nr_io_queues); - blk_mq_update_nr_hw_queues(&ctrl->tag_set, nr_io_queues); + nvme_wait_freeze(&ctrl->ctrl); + blk_mq_update_nr_hw_queues(&ctrl->tag_set, nr_io_queues); + nvme_unfreeze(&ctrl->ctrl); + } return 0; -- cgit v1.2.3 From f673714a1247669bc90322dfb14a5cf553833796 Mon Sep 17 00:00:00 2001 From: James Smart Date: Fri, 16 Oct 2020 14:29:28 -0700 Subject: nvme-fc: shorten reconnect delay if possible for FC We've had several complaints about a 10s reconnect delay (the default) when there was an error while there is connectivity to a subsystem. The max_reconnects and reconnect_delay are set in common code prior to calling the transport to create the controller. This change checks if the default reconnect delay is being used, and if so, it adjusts it to a shorter period (2s) for the nvme-fc transport. It does so by calculating the controller loss tmo window, changing the value of the reconnect delay, and then recalculating the maximum number of reconnect attempts allowed. Signed-off-by: James Smart Reviewed-by: Himanshu Madhani Reviewed-by: Hannes Reinecke Signed-off-by: Christoph Hellwig --- drivers/nvme/host/fc.c | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index 5f1d09640c40..3c002bdcace3 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -26,6 +26,10 @@ enum nvme_fc_queue_flags { }; #define NVME_FC_DEFAULT_DEV_LOSS_TMO 60 /* seconds */ +#define NVME_FC_DEFAULT_RECONNECT_TMO 2 /* delay between reconnects + * when connected and a + * connection failure. + */ struct nvme_fc_queue { struct nvme_fc_ctrl *ctrl; @@ -3436,7 +3440,7 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts, { struct nvme_fc_ctrl *ctrl; unsigned long flags; - int ret, idx; + int ret, idx, ctrl_loss_tmo; if (!(rport->remoteport.port_role & (FC_PORT_ROLE_NVME_DISCOVERY | FC_PORT_ROLE_NVME_TARGET))) { @@ -3462,6 +3466,19 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts, goto out_free_ctrl; } + /* + * if ctrl_loss_tmo is being enforced and the default reconnect delay + * is being used, change to a shorter reconnect delay for FC. + */ + if (opts->max_reconnects != -1 && + opts->reconnect_delay == NVMF_DEF_RECONNECT_DELAY && + opts->reconnect_delay > NVME_FC_DEFAULT_RECONNECT_TMO) { + ctrl_loss_tmo = opts->max_reconnects * opts->reconnect_delay; + opts->reconnect_delay = NVME_FC_DEFAULT_RECONNECT_TMO; + opts->max_reconnects = DIV_ROUND_UP(ctrl_loss_tmo, + opts->reconnect_delay); + } + ctrl->ctrl.opts = opts; ctrl->ctrl.nr_reconnects = 0; if (lport->dev) -- cgit v1.2.3