From 43efdb8e870ee0f58633fd579aa5b5185bf5d39e Mon Sep 17 00:00:00 2001
From: Chao Leng <lengchao@huawei.com>
Date: Mon, 12 Oct 2020 16:10:40 +0800
Subject: nvme-rdma: fix crash when connect rejected

A crash can happened when a connect is rejected.   The host establishes
the connection after received ConnectReply, and then continues to send
the fabrics Connect command.  If the controller does not receive the
ReadyToUse capsule, host may receive a ConnectReject reply.

Call nvme_rdma_destroy_queue_ib after the host received the
RDMA_CM_EVENT_REJECTED event.  Then when the fabrics Connect command
times out, nvme_rdma_timeout calls nvme_rdma_complete_rq to fail the
request.  A crash happenes due to use after free in
nvme_rdma_complete_rq.

nvme_rdma_destroy_queue_ib is redundant when handling the
RDMA_CM_EVENT_REJECTED event as nvme_rdma_destroy_queue_ib is already
called in connection failure handler.

Signed-off-by: Chao Leng <lengchao@huawei.com>
Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/host/rdma.c | 1 -
 1 file changed, 1 deletion(-)

(limited to 'drivers')

diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
index 9e378d0a0c01..116902b1b2c3 100644
--- a/drivers/nvme/host/rdma.c
+++ b/drivers/nvme/host/rdma.c
@@ -1926,7 +1926,6 @@ static int nvme_rdma_cm_handler(struct rdma_cm_id *cm_id,
 		complete(&queue->cm_done);
 		return 0;
 	case RDMA_CM_EVENT_REJECTED:
-		nvme_rdma_destroy_queue_ib(queue);
 		cm_error = nvme_rdma_conn_rejected(queue, ev);
 		break;
 	case RDMA_CM_EVENT_ROUTE_ERROR:
-- 
cgit v1.2.3


From a87da50f39d467f2ea4c1f98decb72ef6d87a31e Mon Sep 17 00:00:00 2001
From: Chao Leng <lengchao@huawei.com>
Date: Mon, 12 Oct 2020 16:55:37 +0800
Subject: nvme-rdma: fix crash due to incorrect cqe

A crash happened due to injecting error test.
When a CQE has incorrect command id due do an error injection, the host
may find a request which is already freed.  Dereferencing req->mr->rkey
causes a crash in nvme_rdma_process_nvme_rsp because the mr is already
freed.

Add a check for the mr to fix it.

Signed-off-by: Chao Leng <lengchao@huawei.com>
Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/host/rdma.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'drivers')

diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
index 116902b1b2c3..aad829a2b50d 100644
--- a/drivers/nvme/host/rdma.c
+++ b/drivers/nvme/host/rdma.c
@@ -1730,10 +1730,11 @@ static void nvme_rdma_process_nvme_rsp(struct nvme_rdma_queue *queue,
 	req->result = cqe->result;
 
 	if (wc->wc_flags & IB_WC_WITH_INVALIDATE) {
-		if (unlikely(wc->ex.invalidate_rkey != req->mr->rkey)) {
+		if (unlikely(!req->mr ||
+			     wc->ex.invalidate_rkey != req->mr->rkey)) {
 			dev_err(queue->ctrl->ctrl.device,
 				"Bogus remote invalidation for rkey %#x\n",
-				req->mr->rkey);
+				req->mr ? req->mr->rkey : 0);
 			nvme_rdma_error_recovery(queue->ctrl);
 		}
 	} else if (req->mr) {
-- 
cgit v1.2.3


From 643c476d6f78cf0349fb8e07334962dd056a3c90 Mon Sep 17 00:00:00 2001
From: Keith Busch <kbusch@kernel.org>
Date: Thu, 15 Oct 2020 11:36:29 -0700
Subject: nvme: use queuedata for nvme_req_qid

The request's rq_disk isn't set for passthrough IO commands, so tracing
uses qid 0 for these which incorrectly decodes as an admin command. Use
the request_queue's queuedata instead since that value is always set for
the IO queues, and never set for the admin queue.

Signed-off-by: Keith Busch <kbusch@kernel.org>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/host/nvme.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'drivers')

diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index e7c88b40f5bb..cc111136a981 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -176,7 +176,7 @@ static inline struct nvme_request *nvme_req(struct request *req)
 
 static inline u16 nvme_req_qid(struct request *req)
 {
-	if (!req->rq_disk)
+	if (!req->q->queuedata)
 		return 0;
 	return blk_mq_unique_tag_to_hwq(blk_mq_unique_tag(req)) + 1;
 }
-- 
cgit v1.2.3


From 02ca079c99319c4308c6bb892613f29119c1a9f9 Mon Sep 17 00:00:00 2001
From: Kai-Heng Feng <kai.heng.feng@canonical.com>
Date: Tue, 13 Oct 2020 16:34:45 +0800
Subject: nvme-pci: disable Write Zeroes on Sandisk Skyhawk

Like commit 5611ec2b9814 ("nvme-pci: prevent SK hynix PC400 from using
Write Zeroes command"), Sandisk Skyhawk has the same issue:
[ 6305.633887] blk_update_request: operation not supported error, dev nvme0n1, sector 340812032 op 0x9:(WRITE_ZEROES) flags 0x0 phys_seg 0 prio class 0

So also disable Write Zeroes command on Sandisk Skyhawk.

BugLink: https://bugs.launchpad.net/bugs/1899503
Signed-off-by: Kai-Heng Feng <kai.heng.feng@canonical.com>
Reviewed-by: Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/host/pci.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'drivers')

diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index e5b02242f3ca..df8f3612107f 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -3185,6 +3185,8 @@ static const struct pci_device_id nvme_id_table[] = {
 				NVME_QUIRK_IGNORE_DEV_SUBNQN, },
 	{ PCI_DEVICE(0x1c5c, 0x1504),   /* SK Hynix PC400 */
 		.driver_data = NVME_QUIRK_DISABLE_WRITE_ZEROES, },
+	{ PCI_DEVICE(0x15b7, 0x2001),   /*  Sandisk Skyhawk */
+		.driver_data = NVME_QUIRK_DISABLE_WRITE_ZEROES, },
 	{ PCI_DEVICE(PCI_VENDOR_ID_APPLE, 0x2001),
 		.driver_data = NVME_QUIRK_SINGLE_VECTOR },
 	{ PCI_DEVICE(PCI_VENDOR_ID_APPLE, 0x2003) },
-- 
cgit v1.2.3


From 85bd23f3dc09a2ae9e56885420e52c54bf983713 Mon Sep 17 00:00:00 2001
From: zhenwei pi <pizhenwei@bytedance.com>
Date: Thu, 15 Oct 2020 09:51:40 +0800
Subject: nvmet: fix uninitialized work for zero kato

When connecting a controller with a zero kato value using the following
command line

   nvme connect -t tcp -n NQN -a ADDR -s PORT --keep-alive-tmo=0

the warning below can be reproduced:

WARNING: CPU: 1 PID: 241 at kernel/workqueue.c:1627 __queue_delayed_work+0x6d/0x90
with trace:
  mod_delayed_work_on+0x59/0x90
  nvmet_update_cc+0xee/0x100 [nvmet]
  nvmet_execute_prop_set+0x72/0x80 [nvmet]
  nvmet_tcp_try_recv_pdu+0x2f7/0x770 [nvmet_tcp]
  nvmet_tcp_io_work+0x63f/0xb2d [nvmet_tcp]
  ...

This is caused by queuing up an uninitialized work.  Althrough the
keep-alive timer is disabled during allocating the controller (fixed in
0d3b6a8d213a), ka_work still has a chance to run (called by
nvmet_start_ctrl).

Fixes: 0d3b6a8d213a ("nvmet: Disable keep-alive timer when kato is cleared to 0h")
Signed-off-by: zhenwei pi <pizhenwei@bytedance.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/target/core.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'drivers')

diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c
index 25d62d867563..aafcbc424b7a 100644
--- a/drivers/nvme/target/core.c
+++ b/drivers/nvme/target/core.c
@@ -1126,7 +1126,8 @@ static void nvmet_start_ctrl(struct nvmet_ctrl *ctrl)
 	 * in case a host died before it enabled the controller.  Hence, simply
 	 * reset the keep alive timer when the controller is enabled.
 	 */
-	mod_delayed_work(system_wq, &ctrl->ka_work, ctrl->kato * HZ);
+	if (ctrl->kato)
+		mod_delayed_work(system_wq, &ctrl->ka_work, ctrl->kato * HZ);
 }
 
 static void nvmet_clear_ctrl(struct nvmet_ctrl *ctrl)
-- 
cgit v1.2.3


From df06047d54276f73782c9d97882b305fca745d3f Mon Sep 17 00:00:00 2001
From: Logan Gunthorpe <logang@deltatee.com>
Date: Fri, 16 Oct 2020 16:19:04 -0600
Subject: nvmet: limit passthru MTDS by BIO_MAX_PAGES

nvmet_passthru_map_sg() only supports mapping a single BIO, not a chain
so the effective maximum transfer should also be limitted by
BIO_MAX_PAGES (presently this works out to 1MB).

For PCI passthru devices the max_sectors would typically be more
limitting than BIO_MAX_PAGES, but this may not be true for all passthru
devices.

Fixes: c1fef73f793b ("nvmet: add passthru code to process commands")
Suggested-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Logan Gunthorpe <logang@deltatee.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Sagi Grimberg <sagi@grimberg.me>
Cc: Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/target/passthru.c | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

(limited to 'drivers')

diff --git a/drivers/nvme/target/passthru.c b/drivers/nvme/target/passthru.c
index 56c571052216..323046e1d67f 100644
--- a/drivers/nvme/target/passthru.c
+++ b/drivers/nvme/target/passthru.c
@@ -26,7 +26,7 @@ static u16 nvmet_passthru_override_id_ctrl(struct nvmet_req *req)
 	struct nvme_ctrl *pctrl = ctrl->subsys->passthru_ctrl;
 	u16 status = NVME_SC_SUCCESS;
 	struct nvme_id_ctrl *id;
-	u32 max_hw_sectors;
+	int max_hw_sectors;
 	int page_shift;
 
 	id = kzalloc(sizeof(*id), GFP_KERNEL);
@@ -48,6 +48,13 @@ static u16 nvmet_passthru_override_id_ctrl(struct nvmet_req *req)
 	max_hw_sectors = min_not_zero(pctrl->max_segments << (PAGE_SHIFT - 9),
 				      pctrl->max_hw_sectors);
 
+	/*
+	 * nvmet_passthru_map_sg is limitted to using a single bio so limit
+	 * the mdts based on BIO_MAX_PAGES as well
+	 */
+	max_hw_sectors = min_not_zero(BIO_MAX_PAGES << (PAGE_SHIFT - 9),
+				      max_hw_sectors);
+
 	page_shift = NVME_CAP_MPSMIN(ctrl->cap) + 12;
 
 	id->mdts = ilog2(max_hw_sectors) + 9 - page_shift;
-- 
cgit v1.2.3


From 5e063101ffacf7c14797f5185c58a967ca83c79f Mon Sep 17 00:00:00 2001
From: Logan Gunthorpe <logang@deltatee.com>
Date: Fri, 16 Oct 2020 16:19:05 -0600
Subject: nvmet: cleanup nvmet_passthru_map_sg()

Clean up some confusing elements of nvmet_passthru_map_sg() by returning
early if the request is greater than the maximum bio size. This allows
us to drop the sg_cnt variable.

This should not result in any functional change but makes the code
clearer and more understandable. The original code allocated a truncated
bio then would return EINVAL when bio_add_pc_page() filled that bio. The
new code just returns EINVAL early if this would happen.

Fixes: c1fef73f793b ("nvmet: add passthru code to process commands")
Signed-off-by: Logan Gunthorpe <logang@deltatee.com>
Suggested-by: Douglas Gilbert <dgilbert@interlog.com>
Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/target/passthru.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

(limited to 'drivers')

diff --git a/drivers/nvme/target/passthru.c b/drivers/nvme/target/passthru.c
index 323046e1d67f..0814cba8298a 100644
--- a/drivers/nvme/target/passthru.c
+++ b/drivers/nvme/target/passthru.c
@@ -187,18 +187,20 @@ static void nvmet_passthru_req_done(struct request *rq,
 
 static int nvmet_passthru_map_sg(struct nvmet_req *req, struct request *rq)
 {
-	int sg_cnt = req->sg_cnt;
 	struct scatterlist *sg;
 	int op_flags = 0;
 	struct bio *bio;
 	int i, ret;
 
+	if (req->sg_cnt > BIO_MAX_PAGES)
+		return -EINVAL;
+
 	if (req->cmd->common.opcode == nvme_cmd_flush)
 		op_flags = REQ_FUA;
 	else if (nvme_is_write(req->cmd))
 		op_flags = REQ_SYNC | REQ_IDLE;
 
-	bio = bio_alloc(GFP_KERNEL, min(sg_cnt, BIO_MAX_PAGES));
+	bio = bio_alloc(GFP_KERNEL, req->sg_cnt);
 	bio->bi_end_io = bio_put;
 	bio->bi_opf = req_op(rq) | op_flags;
 
@@ -208,7 +210,6 @@ static int nvmet_passthru_map_sg(struct nvmet_req *req, struct request *rq)
 			bio_put(bio);
 			return -EINVAL;
 		}
-		sg_cnt--;
 	}
 
 	ret = blk_rq_append_bio(rq, &bio);
-- 
cgit v1.2.3


From 150dfb6c834c9e0e92db7794530b09fd2b9f05c8 Mon Sep 17 00:00:00 2001
From: Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com>
Date: Tue, 20 Oct 2020 16:14:04 -0700
Subject: nvmet: don't use BLK_MQ_REQ_NOWAIT for passthru
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

By default, we set the passthru request allocation flag such that it
returns the error in the following code path and we fail the I/O when
BLK_MQ_REQ_NOWAIT is used for request allocation :-

nvme_alloc_request()
 blk_mq_alloc_request()
  blk_mq_queue_enter()
   if (flag & BLK_MQ_REQ_NOWAIT)
        return -EBUSY; <-- return if busy.

On some controllers using BLK_MQ_REQ_NOWAIT ends up in I/O error where
the controller is perfectly healthy and not in a degraded state.

Block layer request allocation does allow us to wait instead of
immediately returning the error when we BLK_MQ_REQ_NOWAIT flag is not
used. This has shown to fix the I/O error problem reported under
heavy random write workload.

Remove the BLK_MQ_REQ_NOWAIT parameter for passthru request allocation
which resolves this issue.

Signed-off-by: Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com>
Reviewed-by: Logan Gunthorpe <logang@deltatee.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/target/passthru.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'drivers')

diff --git a/drivers/nvme/target/passthru.c b/drivers/nvme/target/passthru.c
index 0814cba8298a..8ee94f056898 100644
--- a/drivers/nvme/target/passthru.c
+++ b/drivers/nvme/target/passthru.c
@@ -244,7 +244,7 @@ static void nvmet_passthru_execute_cmd(struct nvmet_req *req)
 		q = ns->queue;
 	}
 
-	rq = nvme_alloc_request(q, req->cmd, BLK_MQ_REQ_NOWAIT, NVME_QID_ANY);
+	rq = nvme_alloc_request(q, req->cmd, 0, NVME_QID_ANY);
 	if (IS_ERR(rq)) {
 		status = NVME_SC_INTERNAL;
 		goto out_put_ns;
-- 
cgit v1.2.3


From 52793d62a696e9188092eb0817fb1219ee5729ff Mon Sep 17 00:00:00 2001
From: James Smart <james.smart@broadcom.com>
Date: Fri, 16 Oct 2020 14:06:27 -0700
Subject: nvme-fc: fix io timeout to abort I/O

Currently, an I/O timeout unconditionally invokes
nvme_fc_error_recovery() which checks for LIVE or CONNECTING state.  If
live, the routine resets the controller which initiates a reconnect -
which is valid.  If CONNECTING, err_work is scheduled.  Err_work then
calls the terminate_io routine, which also checks for CONNECTING and
noops any further action on outstanding I/O.  The result is nothing
happened to the timed out io.  As such, if the command was dropped on
the wire, it will never timeout / complete, and the connect process
will hang.

Change the behavior of the io timeout routine to unconditionally abort
the I/O.  I/O completion handling will note that an io failed due to an
abort and will terminate the connection / association as needed.  If the
abort was unable to happen, continue with a call to
nvme_fc_error_recovery(). To ensure something different happens in
nvme_fc_error_recovery() rework it so at it will abort all I/Os on the
association to force a failure.

As I/O aborts now may occur outside of delete_association, counting for
completion must be wary and only count those aborted during
delete_association when TERMIO is set on the controller.

Signed-off-by: James Smart <james.smart@broadcom.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/host/fc.c | 108 +++++++++++++++++++++++++++++++------------------
 1 file changed, 69 insertions(+), 39 deletions(-)

(limited to 'drivers')

diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c
index e2e09e25c056..3e72b7d74df3 100644
--- a/drivers/nvme/host/fc.c
+++ b/drivers/nvme/host/fc.c
@@ -1837,8 +1837,10 @@ __nvme_fc_abort_op(struct nvme_fc_ctrl *ctrl, struct nvme_fc_fcp_op *op)
 	opstate = atomic_xchg(&op->state, FCPOP_STATE_ABORTED);
 	if (opstate != FCPOP_STATE_ACTIVE)
 		atomic_set(&op->state, opstate);
-	else if (test_bit(FCCTRL_TERMIO, &ctrl->flags))
+	else if (test_bit(FCCTRL_TERMIO, &ctrl->flags)) {
+		op->flags |= FCOP_FLAGS_TERMIO;
 		ctrl->iocnt++;
+	}
 	spin_unlock_irqrestore(&ctrl->lock, flags);
 
 	if (opstate != FCPOP_STATE_ACTIVE)
@@ -1874,7 +1876,8 @@ __nvme_fc_fcpop_chk_teardowns(struct nvme_fc_ctrl *ctrl,
 
 	if (opstate == FCPOP_STATE_ABORTED) {
 		spin_lock_irqsave(&ctrl->lock, flags);
-		if (test_bit(FCCTRL_TERMIO, &ctrl->flags)) {
+		if (test_bit(FCCTRL_TERMIO, &ctrl->flags) &&
+		    op->flags & FCOP_FLAGS_TERMIO) {
 			if (!--ctrl->iocnt)
 				wake_up(&ctrl->ioabort_wait);
 		}
@@ -2446,15 +2449,20 @@ nvme_fc_timeout(struct request *rq, bool reserved)
 {
 	struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(rq);
 	struct nvme_fc_ctrl *ctrl = op->ctrl;
+	struct nvme_fc_cmd_iu *cmdiu = &op->cmd_iu;
+	struct nvme_command *sqe = &cmdiu->sqe;
 
 	/*
-	 * we can't individually ABTS an io without affecting the queue,
-	 * thus killing the queue, and thus the association.
-	 * So resolve by performing a controller reset, which will stop
-	 * the host/io stack, terminate the association on the link,
-	 * and recreate an association on the link.
+	 * Attempt to abort the offending command. Command completion
+	 * will detect the aborted io and will fail the connection.
 	 */
-	nvme_fc_error_recovery(ctrl, "io timeout error");
+	dev_info(ctrl->ctrl.device,
+		"NVME-FC{%d.%d}: io timeout: opcode %d fctype %d w10/11: "
+		"x%08x/x%08x\n",
+		ctrl->cnum, op->queue->qnum, sqe->common.opcode,
+		sqe->connect.fctype, sqe->common.cdw10, sqe->common.cdw11);
+	if (__nvme_fc_abort_op(ctrl, op))
+		nvme_fc_error_recovery(ctrl, "io timeout abort failed");
 
 	/*
 	 * the io abort has been initiated. Have the reset timer
@@ -2726,6 +2734,7 @@ nvme_fc_complete_rq(struct request *rq)
 	struct nvme_fc_ctrl *ctrl = op->ctrl;
 
 	atomic_set(&op->state, FCPOP_STATE_IDLE);
+	op->flags &= ~FCOP_FLAGS_TERMIO;
 
 	nvme_fc_unmap_data(ctrl, rq, op);
 	nvme_complete_rq(rq);
@@ -3090,26 +3099,19 @@ out_free_queue:
 	return ret;
 }
 
+
 /*
- * This routine stops operation of the controller on the host side.
- * On the host os stack side: Admin and IO queues are stopped,
- *   outstanding ios on them terminated via FC ABTS.
- * On the link side: the association is terminated.
+ * This routine runs through all outstanding commands on the association
+ * and aborts them.  This routine is typically be called by the
+ * delete_association routine. It is also called due to an error during
+ * reconnect. In that scenario, it is most likely a command that initializes
+ * the controller, including fabric Connect commands on io queues, that
+ * may have timed out or failed thus the io must be killed for the connect
+ * thread to see the error.
  */
 static void
-nvme_fc_delete_association(struct nvme_fc_ctrl *ctrl)
+__nvme_fc_abort_outstanding_ios(struct nvme_fc_ctrl *ctrl, bool start_queues)
 {
-	struct nvmefc_ls_rcv_op *disls = NULL;
-	unsigned long flags;
-
-	if (!test_and_clear_bit(ASSOC_ACTIVE, &ctrl->flags))
-		return;
-
-	spin_lock_irqsave(&ctrl->lock, flags);
-	set_bit(FCCTRL_TERMIO, &ctrl->flags);
-	ctrl->iocnt = 0;
-	spin_unlock_irqrestore(&ctrl->lock, flags);
-
 	/*
 	 * If io queues are present, stop them and terminate all outstanding
 	 * ios on them. As FC allocates FC exchange for each io, the
@@ -3127,6 +3129,8 @@ nvme_fc_delete_association(struct nvme_fc_ctrl *ctrl)
 		blk_mq_tagset_busy_iter(&ctrl->tag_set,
 				nvme_fc_terminate_exchange, &ctrl->ctrl);
 		blk_mq_tagset_wait_completed_request(&ctrl->tag_set);
+		if (start_queues)
+			nvme_start_queues(&ctrl->ctrl);
 	}
 
 	/*
@@ -3143,13 +3147,34 @@ nvme_fc_delete_association(struct nvme_fc_ctrl *ctrl)
 
 	/*
 	 * clean up the admin queue. Same thing as above.
-	 * use blk_mq_tagset_busy_itr() and the transport routine to
-	 * terminate the exchanges.
 	 */
 	blk_mq_quiesce_queue(ctrl->ctrl.admin_q);
 	blk_mq_tagset_busy_iter(&ctrl->admin_tag_set,
 				nvme_fc_terminate_exchange, &ctrl->ctrl);
 	blk_mq_tagset_wait_completed_request(&ctrl->admin_tag_set);
+}
+
+/*
+ * This routine stops operation of the controller on the host side.
+ * On the host os stack side: Admin and IO queues are stopped,
+ *   outstanding ios on them terminated via FC ABTS.
+ * On the link side: the association is terminated.
+ */
+static void
+nvme_fc_delete_association(struct nvme_fc_ctrl *ctrl)
+{
+	struct nvmefc_ls_rcv_op *disls = NULL;
+	unsigned long flags;
+
+	if (!test_and_clear_bit(ASSOC_ACTIVE, &ctrl->flags))
+		return;
+
+	spin_lock_irqsave(&ctrl->lock, flags);
+	set_bit(FCCTRL_TERMIO, &ctrl->flags);
+	ctrl->iocnt = 0;
+	spin_unlock_irqrestore(&ctrl->lock, flags);
+
+	__nvme_fc_abort_outstanding_ios(ctrl, false);
 
 	/* kill the aens as they are a separate path */
 	nvme_fc_abort_aen_ops(ctrl);
@@ -3263,22 +3288,27 @@ static void
 __nvme_fc_terminate_io(struct nvme_fc_ctrl *ctrl)
 {
 	/*
-	 * if state is connecting - the error occurred as part of a
-	 * reconnect attempt. The create_association error paths will
-	 * clean up any outstanding io.
-	 *
-	 * if it's a different state - ensure all pending io is
-	 * terminated. Given this can delay while waiting for the
-	 * aborted io to return, we recheck adapter state below
-	 * before changing state.
+	 * if state is CONNECTING - the error occurred as part of a
+	 * reconnect attempt. Abort any ios on the association and
+	 * let the create_association error paths resolve things.
 	 */
-	if (ctrl->ctrl.state != NVME_CTRL_CONNECTING) {
-		nvme_stop_keep_alive(&ctrl->ctrl);
-
-		/* will block will waiting for io to terminate */
-		nvme_fc_delete_association(ctrl);
+	if (ctrl->ctrl.state == NVME_CTRL_CONNECTING) {
+		__nvme_fc_abort_outstanding_ios(ctrl, true);
+		return;
 	}
 
+	/*
+	 * For any other state, kill the association. As this routine
+	 * is a common io abort routine for resetting and such, after
+	 * the association is terminated, ensure that the state is set
+	 * to CONNECTING.
+	 */
+
+	nvme_stop_keep_alive(&ctrl->ctrl);
+
+	/* will block will waiting for io to terminate */
+	nvme_fc_delete_association(ctrl);
+
 	if (ctrl->ctrl.state != NVME_CTRL_CONNECTING &&
 	    !nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_CONNECTING))
 		dev_err(ctrl->ctrl.device,
-- 
cgit v1.2.3


From 514a6dc9ecfd2fe4e1deebcb7a63e3de23e6c38b Mon Sep 17 00:00:00 2001
From: James Smart <james.smart@broadcom.com>
Date: Fri, 16 Oct 2020 14:06:04 -0700
Subject: nvme-fc: fix error loop in create_hw_io_queues

The loop that backs out of hw io queue creation continues through index
0, which corresponds to the admin queue as well.

Fix the loop so it only proceeds through indexes 1..n which correspond to
I/O queues.

Signed-off-by: James Smart <james.smart@broadcom.com>
Reviewed-by: Himanshu Madhani <himanshu.madhani@oracle.com>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/host/fc.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'drivers')

diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c
index 3e72b7d74df3..108130f140d0 100644
--- a/drivers/nvme/host/fc.c
+++ b/drivers/nvme/host/fc.c
@@ -2317,7 +2317,7 @@ nvme_fc_create_hw_io_queues(struct nvme_fc_ctrl *ctrl, u16 qsize)
 	return 0;
 
 delete_queues:
-	for (; i >= 0; i--)
+	for (; i > 0; i--)
 		__nvme_fc_delete_hw_queue(ctrl, &ctrl->queues[i], i);
 	return ret;
 }
@@ -2436,7 +2436,7 @@ nvme_fc_error_recovery(struct nvme_fc_ctrl *ctrl, char *errmsg)
 		return;
 
 	dev_warn(ctrl->ctrl.device,
-		"NVME-FC{%d}: transport association error detected: %s\n",
+		"NVME-FC{%d}: transport association event: %s\n",
 		ctrl->cnum, errmsg);
 	dev_warn(ctrl->ctrl.device,
 		"NVME-FC{%d}: resetting controller\n", ctrl->cnum);
-- 
cgit v1.2.3


From 88e837ed0f1fddd34a19092aaa7098d579e6c506 Mon Sep 17 00:00:00 2001
From: James Smart <james.smart@broadcom.com>
Date: Fri, 16 Oct 2020 14:17:24 -0700
Subject: nvme-fc: wait for queues to freeze before calling update_hr_hw_queues

On reconnect, the code currently does not freeze the controller before
possibly updating the number hw queues for the controller.

Add the freeze before updating the number of hw queues.  Note: the queues
are already started and remain started through the reconnect.

Signed-off-by: James Smart <james.smart@broadcom.com>
Reviewed-by: Himanshu Madhani <himanshu.madhani@oracle.com>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/host/fc.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

(limited to 'drivers')

diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c
index 108130f140d0..5f1d09640c40 100644
--- a/drivers/nvme/host/fc.c
+++ b/drivers/nvme/host/fc.c
@@ -2885,11 +2885,14 @@ nvme_fc_recreate_io_queues(struct nvme_fc_ctrl *ctrl)
 	if (ret)
 		goto out_delete_hw_queues;
 
-	if (prior_ioq_cnt != nr_io_queues)
+	if (prior_ioq_cnt != nr_io_queues) {
 		dev_info(ctrl->ctrl.device,
 			"reconnect: revising io queue count from %d to %d\n",
 			prior_ioq_cnt, nr_io_queues);
-	blk_mq_update_nr_hw_queues(&ctrl->tag_set, nr_io_queues);
+		nvme_wait_freeze(&ctrl->ctrl);
+		blk_mq_update_nr_hw_queues(&ctrl->tag_set, nr_io_queues);
+		nvme_unfreeze(&ctrl->ctrl);
+	}
 
 	return 0;
 
-- 
cgit v1.2.3


From f673714a1247669bc90322dfb14a5cf553833796 Mon Sep 17 00:00:00 2001
From: James Smart <james.smart@broadcom.com>
Date: Fri, 16 Oct 2020 14:29:28 -0700
Subject: nvme-fc: shorten reconnect delay if possible for FC

We've had several complaints about a 10s reconnect delay (the default)
when there was an error while there is connectivity to a subsystem.
The max_reconnects and reconnect_delay are set in common code prior to
calling the transport to create the controller.

This change checks if the default reconnect delay is being used, and if
so, it adjusts it to a shorter period (2s) for the nvme-fc transport.
It does so by calculating the controller loss tmo window, changing the
value of the reconnect delay, and then recalculating the maximum number
of reconnect attempts allowed.

Signed-off-by: James Smart <james.smart@broadcom.com>
Reviewed-by: Himanshu Madhani <himanshu.madhani@oracle.com>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/host/fc.c | 19 ++++++++++++++++++-
 1 file changed, 18 insertions(+), 1 deletion(-)

(limited to 'drivers')

diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c
index 5f1d09640c40..3c002bdcace3 100644
--- a/drivers/nvme/host/fc.c
+++ b/drivers/nvme/host/fc.c
@@ -26,6 +26,10 @@ enum nvme_fc_queue_flags {
 };
 
 #define NVME_FC_DEFAULT_DEV_LOSS_TMO	60	/* seconds */
+#define NVME_FC_DEFAULT_RECONNECT_TMO	2	/* delay between reconnects
+						 * when connected and a
+						 * connection failure.
+						 */
 
 struct nvme_fc_queue {
 	struct nvme_fc_ctrl	*ctrl;
@@ -3436,7 +3440,7 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
 {
 	struct nvme_fc_ctrl *ctrl;
 	unsigned long flags;
-	int ret, idx;
+	int ret, idx, ctrl_loss_tmo;
 
 	if (!(rport->remoteport.port_role &
 	    (FC_PORT_ROLE_NVME_DISCOVERY | FC_PORT_ROLE_NVME_TARGET))) {
@@ -3462,6 +3466,19 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
 		goto out_free_ctrl;
 	}
 
+	/*
+	 * if ctrl_loss_tmo is being enforced and the default reconnect delay
+	 * is being used, change to a shorter reconnect delay for FC.
+	 */
+	if (opts->max_reconnects != -1 &&
+	    opts->reconnect_delay == NVMF_DEF_RECONNECT_DELAY &&
+	    opts->reconnect_delay > NVME_FC_DEFAULT_RECONNECT_TMO) {
+		ctrl_loss_tmo = opts->max_reconnects * opts->reconnect_delay;
+		opts->reconnect_delay = NVME_FC_DEFAULT_RECONNECT_TMO;
+		opts->max_reconnects = DIV_ROUND_UP(ctrl_loss_tmo,
+						opts->reconnect_delay);
+	}
+
 	ctrl->ctrl.opts = opts;
 	ctrl->ctrl.nr_reconnects = 0;
 	if (lport->dev)
-- 
cgit v1.2.3