1 files changed, 616 insertions, 315 deletions
diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c
index b7118db11ea7..5aa52863ba54 100644
--- a/drivers/nvme/host/fc.c
+++ b/drivers/nvme/host/fc.c
@@ -19,6 +19,7 @@
 #include <linux/parser.h>
 #include <uapi/scsi/fc/fc_fs.h>
 #include <uapi/scsi/fc/fc_els.h>
+#include <linux/delay.h>
 
 #include "nvme.h"
 #include "fabrics.h"
@@ -44,6 +45,8 @@ enum nvme_fc_queue_flags {
 
 #define NVMEFC_QUEUE_DELAY	3		/* ms units */
 
+#define NVME_FC_MAX_CONNECT_ATTEMPTS	1
+
 struct nvme_fc_queue {
 	struct nvme_fc_ctrl	*ctrl;
 	struct device		*dev;
@@ -137,19 +140,17 @@ struct nvme_fc_rport {
 	struct kref			ref;
 } __aligned(sizeof(u64));	/* alignment for other things alloc'd with */
 
-enum nvme_fcctrl_state {
-	FCCTRL_INIT		= 0,
-	FCCTRL_ACTIVE		= 1,
+enum nvme_fcctrl_flags {
+	FCCTRL_TERMIO		= (1 << 0),
 };
 
 struct nvme_fc_ctrl {
 	spinlock_t		lock;
 	struct nvme_fc_queue	*queues;
-	u32			queue_count;
-
 	struct device		*dev;
 	struct nvme_fc_lport	*lport;
 	struct nvme_fc_rport	*rport;
+	u32			queue_count;
 	u32			cnum;
 
 	u64			association_id;
@@ -162,8 +163,14 @@ struct nvme_fc_ctrl {
 	struct blk_mq_tag_set	tag_set;
 
 	struct work_struct	delete_work;
+	struct work_struct	reset_work;
+	struct delayed_work	connect_work;
+	int			reconnect_delay;
+	int			connect_attempts;
+
 	struct kref		ref;
-	int			state;
+	u32			flags;
+	u32			iocnt;
 
 	struct nvme_fc_fcp_op	aen_ops[NVME_FC_NR_AEN_COMMANDS];
 
@@ -1204,7 +1211,10 @@ nvme_fc_abort_aen_ops(struct nvme_fc_ctrl *ctrl)
 			continue;
 
 		spin_lock_irqsave(&ctrl->lock, flags);
-		aen_op->flags |= FCOP_FLAGS_TERMIO;
+		if (ctrl->flags & FCCTRL_TERMIO) {
+			ctrl->iocnt++;
+			aen_op->flags |= FCOP_FLAGS_TERMIO;
+		}
 		spin_unlock_irqrestore(&ctrl->lock, flags);
 
 		ret = __nvme_fc_abort_op(ctrl, aen_op);
@@ -1217,6 +1227,8 @@ nvme_fc_abort_aen_ops(struct nvme_fc_ctrl *ctrl)
 
 			/* back out the flags/counters */
 			spin_lock_irqsave(&ctrl->lock, flags);
+			if (ctrl->flags & FCCTRL_TERMIO)
+				ctrl->iocnt--;
 			aen_op->flags &= ~FCOP_FLAGS_TERMIO;
 			spin_unlock_irqrestore(&ctrl->lock, flags);
 			return;
@@ -1232,6 +1244,10 @@ __nvme_fc_fcpop_chk_teardowns(struct nvme_fc_ctrl *ctrl,
 	bool complete_rq = false;
 
 	spin_lock_irqsave(&ctrl->lock, flags);
+	if (unlikely(op->flags & FCOP_FLAGS_TERMIO)) {
+		if (ctrl->flags & FCCTRL_TERMIO)
+			ctrl->iocnt--;
+	}
 	if (op->flags & FCOP_FLAGS_RELEASED)
 		complete_rq = true;
 	else
@@ -1447,19 +1463,29 @@ nvme_fc_init_aen_ops(struct nvme_fc_ctrl *ctrl)
 	struct nvme_fc_fcp_op *aen_op;
 	struct nvme_fc_cmd_iu *cmdiu;
 	struct nvme_command *sqe;
+	void *private;
 	int i, ret;
 
 	aen_op = ctrl->aen_ops;
 	for (i = 0; i < NVME_FC_NR_AEN_COMMANDS; i++, aen_op++) {
+		private = kzalloc(ctrl->lport->ops->fcprqst_priv_sz,
+						GFP_KERNEL);
+		if (!private)
+			return -ENOMEM;
+
 		cmdiu = &aen_op->cmd_iu;
 		sqe = &cmdiu->sqe;
 		ret = __nvme_fc_init_request(ctrl, &ctrl->queues[0],
 				aen_op, (struct request *)NULL,
 				(AEN_CMDID_BASE + i));
-		if (ret)
+		if (ret) {
+			kfree(private);
 			return ret;
+		}
 
 		aen_op->flags = FCOP_FLAGS_AEN;
+		aen_op->fcp_req.first_sgl = NULL; /* no sg list */
+		aen_op->fcp_req.private = private;
 
 		memset(sqe, 0, sizeof(*sqe));
 		sqe->common.opcode = nvme_admin_async_event;
@@ -1469,6 +1495,23 @@ nvme_fc_init_aen_ops(struct nvme_fc_ctrl *ctrl)
 	return 0;
 }
 
+static void
+nvme_fc_term_aen_ops(struct nvme_fc_ctrl *ctrl)
+{
+	struct nvme_fc_fcp_op *aen_op;
+	int i;
+
+	aen_op = ctrl->aen_ops;
+	for (i = 0; i < NVME_FC_NR_AEN_COMMANDS; i++, aen_op++) {
+		if (!aen_op->fcp_req.private)
+			continue;
+
+		__nvme_fc_exit_request(ctrl, aen_op);
+
+		kfree(aen_op->fcp_req.private);
+		aen_op->fcp_req.private = NULL;
+	}
+}
 
 static inline void
 __nvme_fc_init_hctx(struct blk_mq_hw_ctx *hctx, struct nvme_fc_ctrl *ctrl,
@@ -1568,15 +1611,6 @@ __nvme_fc_delete_hw_queue(struct nvme_fc_ctrl *ctrl,
 }
 
 static void
-nvme_fc_destroy_admin_queue(struct nvme_fc_ctrl *ctrl)
-{
-	__nvme_fc_delete_hw_queue(ctrl, &ctrl->queues[0], 0);
-	blk_cleanup_queue(ctrl->ctrl.admin_q);
-	blk_mq_free_tag_set(&ctrl->admin_tag_set);
-	nvme_fc_free_queue(&ctrl->queues[0]);
-}
-
-static void
 nvme_fc_free_io_queues(struct nvme_fc_ctrl *ctrl)
 {
 	int i;
@@ -1663,17 +1697,24 @@ nvme_fc_ctrl_free(struct kref *ref)
 		container_of(ref, struct nvme_fc_ctrl, ref);
 	unsigned long flags;
 
-	if (ctrl->state != FCCTRL_INIT) {
-		/* remove from rport list */
-		spin_lock_irqsave(&ctrl->rport->lock, flags);
-		list_del(&ctrl->ctrl_list);
-		spin_unlock_irqrestore(&ctrl->rport->lock, flags);
+	if (ctrl->ctrl.tagset) {
+		blk_cleanup_queue(ctrl->ctrl.connect_q);
+		blk_mq_free_tag_set(&ctrl->tag_set);
 	}
 
+	/* remove from rport list */
+	spin_lock_irqsave(&ctrl->rport->lock, flags);
+	list_del(&ctrl->ctrl_list);
+	spin_unlock_irqrestore(&ctrl->rport->lock, flags);
+
+	blk_cleanup_queue(ctrl->ctrl.admin_q);
+	blk_mq_free_tag_set(&ctrl->admin_tag_set);
+
+	kfree(ctrl->queues);
+
 	put_device(ctrl->dev);
 	nvme_fc_rport_put(ctrl->rport);
 
-	kfree(ctrl->queues);
 	ida_simple_remove(&nvme_fc_ctrl_cnt, ctrl->cnum);
 	nvmf_free_options(ctrl->ctrl.opts);
 	kfree(ctrl);
@@ -1696,32 +1737,35 @@ nvme_fc_ctrl_get(struct nvme_fc_ctrl *ctrl)
  * controller. Called after last nvme_put_ctrl() call
  */
 static void
-nvme_fc_free_nvme_ctrl(struct nvme_ctrl *nctrl)
+nvme_fc_nvme_ctrl_freed(struct nvme_ctrl *nctrl)
 {
 	struct nvme_fc_ctrl *ctrl = to_fc_ctrl(nctrl);
 
 	WARN_ON(nctrl != &ctrl->ctrl);
 
-	/*
-	 * Tear down the association, which will generate link
-	 * traffic to terminate connections
-	 */
-
-	if (ctrl->state != FCCTRL_INIT) {
-		/* send a Disconnect(association) LS to fc-nvme target */
-		nvme_fc_xmt_disconnect_assoc(ctrl);
+	nvme_fc_ctrl_put(ctrl);
+}
 
-		if (ctrl->ctrl.tagset) {
-			blk_cleanup_queue(ctrl->ctrl.connect_q);
-			blk_mq_free_tag_set(&ctrl->tag_set);
-			nvme_fc_delete_hw_io_queues(ctrl);
-			nvme_fc_free_io_queues(ctrl);
-		}
+static void
+nvme_fc_error_recovery(struct nvme_fc_ctrl *ctrl, char *errmsg)
+{
+	dev_warn(ctrl->ctrl.device,
+		"NVME-FC{%d}: transport association error detected: %s\n",
+		ctrl->cnum, errmsg);
+	dev_info(ctrl->ctrl.device,
+		"NVME-FC{%d}: resetting controller\n", ctrl->cnum);
 
-		nvme_fc_destroy_admin_queue(ctrl);
+	if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_RECONNECTING)) {
+		dev_err(ctrl->ctrl.device,
+			"NVME-FC{%d}: error_recovery: Couldn't change state "
+			"to RECONNECTING\n", ctrl->cnum);
+		return;
 	}
 
-	nvme_fc_ctrl_put(ctrl);
+	if (!queue_work(nvme_fc_wq, &ctrl->reset_work))
+		dev_err(ctrl->ctrl.device,
+			"NVME-FC{%d}: error_recovery: Failed to schedule "
+			"reset work\n", ctrl->cnum);
 }
 
 enum blk_eh_timer_return
@@ -1740,11 +1784,13 @@ nvme_fc_timeout(struct request *rq, bool reserved)
 		return BLK_EH_HANDLED;
 
 	/*
-	 * TODO: force a controller reset
-	 *   when that happens, queues will be torn down and outstanding
-	 *   ios will be terminated, and the above abort, on a single io
-	 *   will no longer be needed.
+	 * we can't individually ABTS an io without affecting the queue,
+	 * thus killing the queue, adn thus the association.
+	 * So resolve by performing a controller reset, which will stop
+	 * the host/io stack, terminate the association on the link,
+	 * and recreate an association on the link.
 	 */
+	nvme_fc_error_recovery(ctrl, "io timeout error");
 
 	return BLK_EH_HANDLED;
 }
@@ -1838,6 +1884,13 @@ nvme_fc_start_fcp_op(struct nvme_fc_ctrl *ctrl, struct nvme_fc_queue *queue,
 	u32 csn;
 	int ret;
 
+	/*
+	 * before attempting to send the io, check to see if we believe
+	 * the target device is present
+	 */
+	if (ctrl->rport->remoteport.port_state != FC_OBJSTATE_ONLINE)
+		return BLK_MQ_RQ_QUEUE_ERROR;
+
 	if (!nvme_fc_ctrl_get(ctrl))
 		return BLK_MQ_RQ_QUEUE_ERROR;
 
@@ -1885,8 +1938,6 @@ nvme_fc_start_fcp_op(struct nvme_fc_ctrl *ctrl, struct nvme_fc_queue *queue,
 	if (!(op->flags & FCOP_FLAGS_AEN)) {
 		ret = nvme_fc_map_data(ctrl, op->rq, op);
 		if (ret < 0) {
-			dev_err(queue->ctrl->ctrl.device,
-			     "Failed to map data (%d)\n", ret);
 			nvme_cleanup_cmd(op->rq);
 			nvme_fc_ctrl_put(ctrl);
 			return (ret == -ENOMEM || ret == -EAGAIN) ?
@@ -1907,9 +1958,6 @@ nvme_fc_start_fcp_op(struct nvme_fc_ctrl *ctrl, struct nvme_fc_queue *queue,
 					queue->lldd_handle, &op->fcp_req);
 
 	if (ret) {
-		dev_err(ctrl->dev,
-			"Send nvme command failed - lldd returned %d.\n", ret);
-
 		if (op->rq) {			/* normal request */
 			nvme_fc_unmap_data(ctrl, op->rq, op);
 			nvme_cleanup_cmd(op->rq);
@@ -1979,12 +2027,8 @@ nvme_fc_poll(struct blk_mq_hw_ctx *hctx, unsigned int tag)
 	struct nvme_fc_fcp_op *op;
 
 	req = blk_mq_tag_to_rq(nvme_fc_tagset(queue), tag);
-	if (!req) {
-		dev_err(queue->ctrl->ctrl.device,
-			 "tag 0x%x on QNum %#x not found\n",
-			tag, queue->qnum);
+	if (!req)
 		return 0;
-	}
 
 	op = blk_mq_rq_to_pdu(req);
 
@@ -2001,11 +2045,21 @@ nvme_fc_submit_async_event(struct nvme_ctrl *arg, int aer_idx)
 {
 	struct nvme_fc_ctrl *ctrl = to_fc_ctrl(arg);
 	struct nvme_fc_fcp_op *aen_op;
+	unsigned long flags;
+	bool terminating = false;
 	int ret;
 
 	if (aer_idx > NVME_FC_NR_AEN_COMMANDS)
 		return;
 
+	spin_lock_irqsave(&ctrl->lock, flags);
+	if (ctrl->flags & FCCTRL_TERMIO)
+		terminating = true;
+	spin_unlock_irqrestore(&ctrl->lock, flags);
+
+	if (terminating)
+		return;
+
 	aen_op = &ctrl->aen_ops[aer_idx];
 
 	ret = nvme_fc_start_fcp_op(ctrl, aen_op->queue, aen_op, 0,
@@ -2059,6 +2113,57 @@ nvme_fc_complete_rq(struct request *rq)
 		__nvme_fc_final_op_cleanup(rq);
 }
 
+/*
+ * This routine is used by the transport when it needs to find active
+ * io on a queue that is to be terminated. The transport uses
+ * blk_mq_tagset_busy_itr() to find the busy requests, which then invoke
+ * this routine to kill them on a 1 by 1 basis.
+ *
+ * As FC allocates FC exchange for each io, the transport must contact
+ * the LLDD to terminate the exchange, thus releasing the FC exchange.
+ * After terminating the exchange the LLDD will call the transport's
+ * normal io done path for the request, but it will have an aborted
+ * status. The done path will return the io request back to the block
+ * layer with an error status.
+ */
+static void
+nvme_fc_terminate_exchange(struct request *req, void *data, bool reserved)
+{
+	struct nvme_ctrl *nctrl = data;
+	struct nvme_fc_ctrl *ctrl = to_fc_ctrl(nctrl);
+	struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(req);
+	unsigned long flags;
+	int status;
+
+	if (!blk_mq_request_started(req))
+		return;
+
+	spin_lock_irqsave(&ctrl->lock, flags);
+	if (ctrl->flags & FCCTRL_TERMIO) {
+		ctrl->iocnt++;
+		op->flags |= FCOP_FLAGS_TERMIO;
+	}
+	spin_unlock_irqrestore(&ctrl->lock, flags);
+
+	status = __nvme_fc_abort_op(ctrl, op);
+	if (status) {
+		/*
+		 * if __nvme_fc_abort_op failed the io wasn't
+		 * active. Thus this call path is running in
+		 * parallel to the io complete. Treat as non-error.
+		 */
+
+		/* back out the flags/counters */
+		spin_lock_irqsave(&ctrl->lock, flags);
+		if (ctrl->flags & FCCTRL_TERMIO)
+			ctrl->iocnt--;
+		op->flags &= ~FCOP_FLAGS_TERMIO;
+		spin_unlock_irqrestore(&ctrl->lock, flags);
+		return;
+	}
+}
+
+
 static const struct blk_mq_ops nvme_fc_mq_ops = {
 	.queue_rq	= nvme_fc_queue_rq,
 	.complete	= nvme_fc_complete_rq,
@@ -2070,152 +2175,275 @@ static const struct blk_mq_ops nvme_fc_mq_ops = {
 	.timeout	= nvme_fc_timeout,
 };
 
-static const struct blk_mq_ops nvme_fc_admin_mq_ops = {
-	.queue_rq	= nvme_fc_queue_rq,
-	.complete	= nvme_fc_complete_rq,
-	.init_request	= nvme_fc_init_admin_request,
-	.exit_request	= nvme_fc_exit_request,
-	.reinit_request	= nvme_fc_reinit_request,
-	.init_hctx	= nvme_fc_init_admin_hctx,
-	.timeout	= nvme_fc_timeout,
-};
-
 static int
-nvme_fc_configure_admin_queue(struct nvme_fc_ctrl *ctrl)
+nvme_fc_create_io_queues(struct nvme_fc_ctrl *ctrl)
 {
-	u32 segs;
-	int error;
+	struct nvmf_ctrl_options *opts = ctrl->ctrl.opts;
+	int ret;
 
-	nvme_fc_init_queue(ctrl, 0, NVME_FC_AQ_BLKMQ_DEPTH);
+	ret = nvme_set_queue_count(&ctrl->ctrl, &opts->nr_io_queues);
+	if (ret) {
+		dev_info(ctrl->ctrl.device,
+			"set_queue_count failed: %d\n", ret);
+		return ret;
+	}
 
-	error = nvme_fc_connect_admin_queue(ctrl, &ctrl->queues[0],
-				NVME_FC_AQ_BLKMQ_DEPTH,
-				(NVME_FC_AQ_BLKMQ_DEPTH / 4));
-	if (error)
-		return error;
+	ctrl->queue_count = opts->nr_io_queues + 1;
+	if (!opts->nr_io_queues)
+		return 0;
 
-	memset(&ctrl->admin_tag_set, 0, sizeof(ctrl->admin_tag_set));
-	ctrl->admin_tag_set.ops = &nvme_fc_admin_mq_ops;
-	ctrl->admin_tag_set.queue_depth = NVME_FC_AQ_BLKMQ_DEPTH;
-	ctrl->admin_tag_set.reserved_tags = 2; /* fabric connect + Keep-Alive */
-	ctrl->admin_tag_set.numa_node = NUMA_NO_NODE;
-	ctrl->admin_tag_set.cmd_size = sizeof(struct nvme_fc_fcp_op) +
+	dev_info(ctrl->ctrl.device, "creating %d I/O queues.\n",
+			opts->nr_io_queues);
+
+	nvme_fc_init_io_queues(ctrl);
+
+	memset(&ctrl->tag_set, 0, sizeof(ctrl->tag_set));
+	ctrl->tag_set.ops = &nvme_fc_mq_ops;
+	ctrl->tag_set.queue_depth = ctrl->ctrl.opts->queue_size;
+	ctrl->tag_set.reserved_tags = 1; /* fabric connect */
+	ctrl->tag_set.numa_node = NUMA_NO_NODE;
+	ctrl->tag_set.flags = BLK_MQ_F_SHOULD_MERGE;
+	ctrl->tag_set.cmd_size = sizeof(struct nvme_fc_fcp_op) +
 					(SG_CHUNK_SIZE *
 						sizeof(struct scatterlist)) +
 					ctrl->lport->ops->fcprqst_priv_sz;
-	ctrl->admin_tag_set.driver_data = ctrl;
-	ctrl->admin_tag_set.nr_hw_queues = 1;
-	ctrl->admin_tag_set.timeout = ADMIN_TIMEOUT;
+	ctrl->tag_set.driver_data = ctrl;
+	ctrl->tag_set.nr_hw_queues = ctrl->queue_count - 1;
+	ctrl->tag_set.timeout = NVME_IO_TIMEOUT;
 
-	error = blk_mq_alloc_tag_set(&ctrl->admin_tag_set);
-	if (error)
-		goto out_free_queue;
+	ret = blk_mq_alloc_tag_set(&ctrl->tag_set);
+	if (ret)
+		return ret;
 
-	ctrl->ctrl.admin_q = blk_mq_init_queue(&ctrl->admin_tag_set);
-	if (IS_ERR(ctrl->ctrl.admin_q)) {
-		error = PTR_ERR(ctrl->ctrl.admin_q);
-		goto out_free_tagset;
+	ctrl->ctrl.tagset = &ctrl->tag_set;
+
+	ctrl->ctrl.connect_q = blk_mq_init_queue(&ctrl->tag_set);
+	if (IS_ERR(ctrl->ctrl.connect_q)) {
+		ret = PTR_ERR(ctrl->ctrl.connect_q);
+		goto out_free_tag_set;
+	}
+
+	ret = nvme_fc_create_hw_io_queues(ctrl, ctrl->ctrl.opts->queue_size);
+	if (ret)
+		goto out_cleanup_blk_queue;
+
+	ret = nvme_fc_connect_io_queues(ctrl, ctrl->ctrl.opts->queue_size);
+	if (ret)
+		goto out_delete_hw_queues;
+
+	return 0;
+
+out_delete_hw_queues:
+	nvme_fc_delete_hw_io_queues(ctrl);
+out_cleanup_blk_queue:
+	nvme_stop_keep_alive(&ctrl->ctrl);
+	blk_cleanup_queue(ctrl->ctrl.connect_q);
+out_free_tag_set:
+	blk_mq_free_tag_set(&ctrl->tag_set);
+	nvme_fc_free_io_queues(ctrl);
+
+	/* force put free routine to ignore io queues */
+	ctrl->ctrl.tagset = NULL;
+
+	return ret;
+}
+
+static int
+nvme_fc_reinit_io_queues(struct nvme_fc_ctrl *ctrl)
+{
+	struct nvmf_ctrl_options *opts = ctrl->ctrl.opts;
+	int ret;
+
+	ret = nvme_set_queue_count(&ctrl->ctrl, &opts->nr_io_queues);
+	if (ret) {
+		dev_info(ctrl->ctrl.device,
+			"set_queue_count failed: %d\n", ret);
+		return ret;
 	}
 
-	error = __nvme_fc_create_hw_queue(ctrl, &ctrl->queues[0], 0,
+	/* check for io queues existing */
+	if (ctrl->queue_count == 1)
+		return 0;
+
+	dev_info(ctrl->ctrl.device, "Recreating %d I/O queues.\n",
+			opts->nr_io_queues);
+
+	nvme_fc_init_io_queues(ctrl);
+
+	ret = blk_mq_reinit_tagset(&ctrl->tag_set);
+	if (ret)
+		goto out_free_io_queues;
+
+	ret = nvme_fc_create_hw_io_queues(ctrl, ctrl->ctrl.opts->queue_size);
+	if (ret)
+		goto out_free_io_queues;
+
+	ret = nvme_fc_connect_io_queues(ctrl, ctrl->ctrl.opts->queue_size);
+	if (ret)
+		goto out_delete_hw_queues;
+
+	return 0;
+
+out_delete_hw_queues:
+	nvme_fc_delete_hw_io_queues(ctrl);
+out_free_io_queues:
+	nvme_fc_free_io_queues(ctrl);
+	return ret;
+}
+
+/*
+ * This routine restarts the controller on the host side, and
+ * on the link side, recreates the controller association.
+ */
+static int
+nvme_fc_create_association(struct nvme_fc_ctrl *ctrl)
+{
+	struct nvmf_ctrl_options *opts = ctrl->ctrl.opts;
+	u32 segs;
+	int ret;
+	bool changed;
+
+	ctrl->connect_attempts++;
+
+	/*
+	 * Create the admin queue
+	 */
+
+	nvme_fc_init_queue(ctrl, 0, NVME_FC_AQ_BLKMQ_DEPTH);
+
+	ret = __nvme_fc_create_hw_queue(ctrl, &ctrl->queues[0], 0,
 				NVME_FC_AQ_BLKMQ_DEPTH);
-	if (error)
-		goto out_cleanup_queue;
+	if (ret)
+		goto out_free_queue;
 
-	error = nvmf_connect_admin_queue(&ctrl->ctrl);
-	if (error)
+	ret = nvme_fc_connect_admin_queue(ctrl, &ctrl->queues[0],
+				NVME_FC_AQ_BLKMQ_DEPTH,
+				(NVME_FC_AQ_BLKMQ_DEPTH / 4));
+	if (ret)
 		goto out_delete_hw_queue;
 
-	error = nvmf_reg_read64(&ctrl->ctrl, NVME_REG_CAP, &ctrl->cap);
-	if (error) {
+	if (ctrl->ctrl.state != NVME_CTRL_NEW)
+		blk_mq_start_stopped_hw_queues(ctrl->ctrl.admin_q, true);
+
+	ret = nvmf_connect_admin_queue(&ctrl->ctrl);
+	if (ret)
+		goto out_disconnect_admin_queue;
+
+	/*
+	 * Check controller capabilities
+	 *
+	 * todo:- add code to check if ctrl attributes changed from
+	 * prior connection values
+	 */
+
+	ret = nvmf_reg_read64(&ctrl->ctrl, NVME_REG_CAP, &ctrl->cap);
+	if (ret) {
 		dev_err(ctrl->ctrl.device,
 			"prop_get NVME_REG_CAP failed\n");
-		goto out_delete_hw_queue;
+		goto out_disconnect_admin_queue;
 	}
 
 	ctrl->ctrl.sqsize =
 		min_t(int, NVME_CAP_MQES(ctrl->cap) + 1, ctrl->ctrl.sqsize);
 
-	error = nvme_enable_ctrl(&ctrl->ctrl, ctrl->cap);
-	if (error)
-		goto out_delete_hw_queue;
+	ret = nvme_enable_ctrl(&ctrl->ctrl, ctrl->cap);
+	if (ret)
+		goto out_disconnect_admin_queue;
 
 	segs = min_t(u32, NVME_FC_MAX_SEGMENTS,
 			ctrl->lport->ops->max_sgl_segments);
 	ctrl->ctrl.max_hw_sectors = (segs - 1) << (PAGE_SHIFT - 9);
 
-	error = nvme_init_identify(&ctrl->ctrl);
-	if (error)
-		goto out_delete_hw_queue;
+	ret = nvme_init_identify(&ctrl->ctrl);
+	if (ret)
+		goto out_disconnect_admin_queue;
+
+	/* sanity checks */
+
+	/* FC-NVME does not have other data in the capsule */
+	if (ctrl->ctrl.icdoff) {
+		dev_err(ctrl->ctrl.device, "icdoff %d is not supported!\n",
+				ctrl->ctrl.icdoff);
+		goto out_disconnect_admin_queue;
+	}
 
 	nvme_start_keep_alive(&ctrl->ctrl);
 
-	return 0;
+	/* FC-NVME supports normal SGL Data Block Descriptors */
+
+	if (opts->queue_size > ctrl->ctrl.maxcmd) {
+		/* warn if maxcmd is lower than queue_size */
+		dev_warn(ctrl->ctrl.device,
+			"queue_size %zu > ctrl maxcmd %u, reducing "
+			"to queue_size\n",
+			opts->queue_size, ctrl->ctrl.maxcmd);
+		opts->queue_size = ctrl->ctrl.maxcmd;
+	}
 
+	ret = nvme_fc_init_aen_ops(ctrl);
+	if (ret)
+		goto out_term_aen_ops;
+
+	/*
+	 * Create the io queues
+	 */
+
+	if (ctrl->queue_count > 1) {
+		if (ctrl->ctrl.state == NVME_CTRL_NEW)
+			ret = nvme_fc_create_io_queues(ctrl);
+		else
+			ret = nvme_fc_reinit_io_queues(ctrl);
+		if (ret)
+			goto out_term_aen_ops;
+	}
+
+	changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE);
+	WARN_ON_ONCE(!changed);
+
+	ctrl->connect_attempts = 0;
+
+	kref_get(&ctrl->ctrl.kref);
+
+	if (ctrl->queue_count > 1) {
+		nvme_start_queues(&ctrl->ctrl);
+		nvme_queue_scan(&ctrl->ctrl);
+		nvme_queue_async_events(&ctrl->ctrl);
+	}
+
+	return 0;	/* Success */
+
+out_term_aen_ops:
+	nvme_fc_term_aen_ops(ctrl);
+	nvme_stop_keep_alive(&ctrl->ctrl);
+out_disconnect_admin_queue:
+	/* send a Disconnect(association) LS to fc-nvme target */
+	nvme_fc_xmt_disconnect_assoc(ctrl);
 out_delete_hw_queue:
 	__nvme_fc_delete_hw_queue(ctrl, &ctrl->queues[0], 0);
-out_cleanup_queue:
-	blk_cleanup_queue(ctrl->ctrl.admin_q);
-out_free_tagset:
-	blk_mq_free_tag_set(&ctrl->admin_tag_set);
 out_free_queue:
 	nvme_fc_free_queue(&ctrl->queues[0]);
-	return error;
+
+	return ret;
 }
 
 /*
- * This routine is used by the transport when it needs to find active
- * io on a queue that is to be terminated. The transport uses
- * blk_mq_tagset_busy_itr() to find the busy requests, which then invoke
- * this routine to kill them on a 1 by 1 basis.
- *
- * As FC allocates FC exchange for each io, the transport must contact
- * the LLDD to terminate the exchange, thus releasing the FC exchange.
- * After terminating the exchange the LLDD will call the transport's
- * normal io done path for the request, but it will have an aborted
- * status. The done path will return the io request back to the block
- * layer with an error status.
+ * This routine stops operation of the controller on the host side.
+ * On the host os stack side: Admin and IO queues are stopped,
+ *   outstanding ios on them terminated via FC ABTS.
+ * On the link side: the association is terminated.
  */
 static void
-nvme_fc_terminate_exchange(struct request *req, void *data, bool reserved)
+nvme_fc_delete_association(struct nvme_fc_ctrl *ctrl)
 {
-	struct nvme_ctrl *nctrl = data;
-	struct nvme_fc_ctrl *ctrl = to_fc_ctrl(nctrl);
-	struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(req);
 	unsigned long flags;
-	int status;
 
-	if (!blk_mq_request_started(req))
-		return;
+	nvme_stop_keep_alive(&ctrl->ctrl);
 
 	spin_lock_irqsave(&ctrl->lock, flags);
-	op->flags |= FCOP_FLAGS_TERMIO;
+	ctrl->flags |= FCCTRL_TERMIO;
+	ctrl->iocnt = 0;
 	spin_unlock_irqrestore(&ctrl->lock, flags);
 
-	status = __nvme_fc_abort_op(ctrl, op);
-	if (status) {
-		/*
-		 * if __nvme_fc_abort_op failed the io wasn't
-		 * active. Thus this call path is running in
-		 * parallel to the io complete. Treat as non-error.
-		 */
-
-		/* back out the flags/counters */
-		spin_lock_irqsave(&ctrl->lock, flags);
-		op->flags &= ~FCOP_FLAGS_TERMIO;
-		spin_unlock_irqrestore(&ctrl->lock, flags);
-		return;
-	}
-}
-
-/*
- * This routine stops operation of the controller. Admin and IO queues
- * are stopped, outstanding ios on them terminated, and the nvme ctrl
- * is shutdown.
- */
-static void
-nvme_fc_shutdown_ctrl(struct nvme_fc_ctrl *ctrl)
-{
 	/*
 	 * If io queues are present, stop them and terminate all outstanding
 	 * ios on them. As FC allocates FC exchange for each io, the
@@ -2234,11 +2462,20 @@ nvme_fc_shutdown_ctrl(struct nvme_fc_ctrl *ctrl)
 				nvme_fc_terminate_exchange, &ctrl->ctrl);
 	}
 
-	if (ctrl->ctrl.state == NVME_CTRL_LIVE)
-		nvme_shutdown_ctrl(&ctrl->ctrl);
+	/*
+	 * Other transports, which don't have link-level contexts bound
+	 * to sqe's, would try to gracefully shutdown the controller by
+	 * writing the registers for shutdown and polling (call
+	 * nvme_shutdown_ctrl()). Given a bunch of i/o was potentially
+	 * just aborted and we will wait on those contexts, and given
+	 * there was no indication of how live the controlelr is on the
+	 * link, don't send more io to create more contexts for the
+	 * shutdown. Let the controller fail via keepalive failure if
+	 * its still present.
+	 */
 
 	/*
-	 * now clean up the admin queue. Same thing as above.
+	 * clean up the admin queue. Same thing as above.
 	 * use blk_mq_tagset_busy_itr() and the transport routine to
 	 * terminate the exchanges.
 	 */
@@ -2248,24 +2485,56 @@ nvme_fc_shutdown_ctrl(struct nvme_fc_ctrl *ctrl)
 
 	/* kill the aens as they are a separate path */
 	nvme_fc_abort_aen_ops(ctrl);
+
+	/* wait for all io that had to be aborted */
+	spin_lock_irqsave(&ctrl->lock, flags);
+	while (ctrl->iocnt) {
+		spin_unlock_irqrestore(&ctrl->lock, flags);
+		msleep(1000);
+		spin_lock_irqsave(&ctrl->lock, flags);
+	}
+	ctrl->flags &= ~FCCTRL_TERMIO;
+	spin_unlock_irqrestore(&ctrl->lock, flags);
+
+	nvme_fc_term_aen_ops(ctrl);
+
+	/*
+	 * send a Disconnect(association) LS to fc-nvme target
+	 * Note: could have been sent at top of process, but
+	 * cleaner on link traffic if after the aborts complete.
+	 * Note: if association doesn't exist, association_id will be 0
+	 */
+	if (ctrl->association_id)
+		nvme_fc_xmt_disconnect_assoc(ctrl);
+
+	if (ctrl->ctrl.tagset) {
+		nvme_fc_delete_hw_io_queues(ctrl);
+		nvme_fc_free_io_queues(ctrl);
+	}
+
+	__nvme_fc_delete_hw_queue(ctrl, &ctrl->queues[0], 0);
+	nvme_fc_free_queue(&ctrl->queues[0]);
 }
 
-/*
- * Called to teardown an association.
- * May be called with association fully in place or partially in place.
- */
 static void
-__nvme_fc_remove_ctrl(struct nvme_fc_ctrl *ctrl)
+nvme_fc_delete_ctrl_work(struct work_struct *work)
 {
-	nvme_stop_keep_alive(&ctrl->ctrl);
+	struct nvme_fc_ctrl *ctrl =
+		container_of(work, struct nvme_fc_ctrl, delete_work);
 
-	/* stop and terminate ios on admin and io queues */
-	nvme_fc_shutdown_ctrl(ctrl);
+	cancel_work_sync(&ctrl->reset_work);
+	cancel_delayed_work_sync(&ctrl->connect_work);
+
+	/*
+	 * kill the association on the link side.  this will block
+	 * waiting for io to terminate
+	 */
+	nvme_fc_delete_association(ctrl);
 
 	/*
 	 * tear down the controller
 	 * This will result in the last reference on the nvme ctrl to
-	 * expire, calling the transport nvme_fc_free_nvme_ctrl() callback.
+	 * expire, calling the transport nvme_fc_nvme_ctrl_freed() callback.
 	 * From there, the transport will tear down it's logical queues and
 	 * association.
 	 */
@@ -2274,15 +2543,6 @@ __nvme_fc_remove_ctrl(struct nvme_fc_ctrl *ctrl)
 	nvme_put_ctrl(&ctrl->ctrl);
 }
 
-static void
-nvme_fc_del_ctrl_work(struct work_struct *work)
-{
-	struct nvme_fc_ctrl *ctrl =
-			container_of(work, struct nvme_fc_ctrl, delete_work);
-
-	__nvme_fc_remove_ctrl(ctrl);
-}
-
 static int
 __nvme_fc_del_ctrl(struct nvme_fc_ctrl *ctrl)
 {
@@ -2302,25 +2562,85 @@ static int
 nvme_fc_del_nvme_ctrl(struct nvme_ctrl *nctrl)
 {
 	struct nvme_fc_ctrl *ctrl = to_fc_ctrl(nctrl);
-	struct nvme_fc_rport *rport = ctrl->rport;
-	unsigned long flags;
 	int ret;
 
-	spin_lock_irqsave(&rport->lock, flags);
+	if (!kref_get_unless_zero(&ctrl->ctrl.kref))
+		return -EBUSY;
+
 	ret = __nvme_fc_del_ctrl(ctrl);
-	spin_unlock_irqrestore(&rport->lock, flags);
-	if (ret)
-		return ret;
 
-	flush_work(&ctrl->delete_work);
+	if (!ret)
+		flush_workqueue(nvme_fc_wq);
 
-	return 0;
+	nvme_put_ctrl(&ctrl->ctrl);
+
+	return ret;
 }
 
+static void
+nvme_fc_reset_ctrl_work(struct work_struct *work)
+{
+	struct nvme_fc_ctrl *ctrl =
+			container_of(work, struct nvme_fc_ctrl, reset_work);
+	int ret;
+
+	/* will block will waiting for io to terminate */
+	nvme_fc_delete_association(ctrl);
+
+	ret = nvme_fc_create_association(ctrl);
+	if (ret) {
+		dev_warn(ctrl->ctrl.device,
+			"NVME-FC{%d}: reset: Reconnect attempt failed (%d)\n",
+			ctrl->cnum, ret);
+		if (ctrl->connect_attempts >= NVME_FC_MAX_CONNECT_ATTEMPTS) {
+			dev_warn(ctrl->ctrl.device,
+				"NVME-FC{%d}: Max reconnect attempts (%d) "
+				"reached. Removing controller\n",
+				ctrl->cnum, ctrl->connect_attempts);
+
+			if (!nvme_change_ctrl_state(&ctrl->ctrl,
+				NVME_CTRL_DELETING)) {
+				dev_err(ctrl->ctrl.device,
+					"NVME-FC{%d}: failed to change state "
+					"to DELETING\n", ctrl->cnum);
+				return;
+			}
+
+			WARN_ON(!queue_work(nvme_fc_wq, &ctrl->delete_work));
+			return;
+		}
+
+		dev_warn(ctrl->ctrl.device,
+			"NVME-FC{%d}: Reconnect attempt in %d seconds.\n",
+			ctrl->cnum, ctrl->reconnect_delay);
+		queue_delayed_work(nvme_fc_wq, &ctrl->connect_work,
+				ctrl->reconnect_delay * HZ);
+	} else
+		dev_info(ctrl->ctrl.device,
+			"NVME-FC{%d}: controller reset complete\n", ctrl->cnum);
+}
+
+/*
+ * called by the nvme core layer, for sysfs interface that requests
+ * a reset of the nvme controller
+ */
 static int
 nvme_fc_reset_nvme_ctrl(struct nvme_ctrl *nctrl)
 {
-	return -EIO;
+	struct nvme_fc_ctrl *ctrl = to_fc_ctrl(nctrl);
+
+	dev_warn(ctrl->ctrl.device,
+		"NVME-FC{%d}: admin requested controller reset\n", ctrl->cnum);
+
+	if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_RESETTING))
+		return -EBUSY;
+
+	if (!queue_work(nvme_fc_wq, &ctrl->reset_work))
+		return -EBUSY;
+
+	flush_work(&ctrl->reset_work);
+
+	return 0;
 }
 
 static const struct nvme_ctrl_ops nvme_fc_ctrl_ops = {
@@ -2331,95 +2651,75 @@ static const struct nvme_ctrl_ops nvme_fc_ctrl_ops = {
 	.reg_read64		= nvmf_reg_read64,
 	.reg_write32		= nvmf_reg_write32,
 	.reset_ctrl		= nvme_fc_reset_nvme_ctrl,
-	.free_ctrl		= nvme_fc_free_nvme_ctrl,
+	.free_ctrl		= nvme_fc_nvme_ctrl_freed,
 	.submit_async_event	= nvme_fc_submit_async_event,
 	.delete_ctrl		= nvme_fc_del_nvme_ctrl,
 	.get_subsysnqn		= nvmf_get_subsysnqn,
 	.get_address		= nvmf_get_address,
 };
 
-static int
-nvme_fc_create_io_queues(struct nvme_fc_ctrl *ctrl)
+static void
+nvme_fc_connect_ctrl_work(struct work_struct *work)
 {
-	struct nvmf_ctrl_options *opts = ctrl->ctrl.opts;
 	int ret;
 
-	ret = nvme_set_queue_count(&ctrl->ctrl, &opts->nr_io_queues);
-	if (ret) {
-		dev_info(ctrl->ctrl.device,
-			"set_queue_count failed: %d\n", ret);
-		return ret;
-	}
-
-	ctrl->queue_count = opts->nr_io_queues + 1;
-	if (!opts->nr_io_queues)
-		return 0;
-
-	dev_info(ctrl->ctrl.device, "creating %d I/O queues.\n",
-			opts->nr_io_queues);
-
-	nvme_fc_init_io_queues(ctrl);
-
-	memset(&ctrl->tag_set, 0, sizeof(ctrl->tag_set));
-	ctrl->tag_set.ops = &nvme_fc_mq_ops;
-	ctrl->tag_set.queue_depth = ctrl->ctrl.opts->queue_size;
-	ctrl->tag_set.reserved_tags = 1; /* fabric connect */
-	ctrl->tag_set.numa_node = NUMA_NO_NODE;
-	ctrl->tag_set.flags = BLK_MQ_F_SHOULD_MERGE;
-	ctrl->tag_set.cmd_size = sizeof(struct nvme_fc_fcp_op) +
-					(SG_CHUNK_SIZE *
-						sizeof(struct scatterlist)) +
-					ctrl->lport->ops->fcprqst_priv_sz;
-	ctrl->tag_set.driver_data = ctrl;
-	ctrl->tag_set.nr_hw_queues = ctrl->queue_count - 1;
-	ctrl->tag_set.timeout = NVME_IO_TIMEOUT;
-
-	ret = blk_mq_alloc_tag_set(&ctrl->tag_set);
-	if (ret)
-		return ret;
-
-	ctrl->ctrl.tagset = &ctrl->tag_set;
-
-	ctrl->ctrl.connect_q = blk_mq_init_queue(&ctrl->tag_set);
-	if (IS_ERR(ctrl->ctrl.connect_q)) {
-		ret = PTR_ERR(ctrl->ctrl.connect_q);
-		goto out_free_tag_set;
-	}
-
-	ret = nvme_fc_create_hw_io_queues(ctrl, ctrl->ctrl.opts->queue_size);
-	if (ret)
-		goto out_cleanup_blk_queue;
+	struct nvme_fc_ctrl *ctrl =
+			container_of(to_delayed_work(work),
+				struct nvme_fc_ctrl, connect_work);
 
-	ret = nvme_fc_connect_io_queues(ctrl, ctrl->ctrl.opts->queue_size);
-	if (ret)
-		goto out_delete_hw_queues;
+	ret = nvme_fc_create_association(ctrl);
+	if (ret) {
+		dev_warn(ctrl->ctrl.device,
+			"NVME-FC{%d}: Reconnect attempt failed (%d)\n",
+			ctrl->cnum, ret);
+		if (ctrl->connect_attempts >= NVME_FC_MAX_CONNECT_ATTEMPTS) {
+			dev_warn(ctrl->ctrl.device,
+				"NVME-FC{%d}: Max reconnect attempts (%d) "
+				"reached. Removing controller\n",
+				ctrl->cnum, ctrl->connect_attempts);
+
+			if (!nvme_change_ctrl_state(&ctrl->ctrl,
+				NVME_CTRL_DELETING)) {
+				dev_err(ctrl->ctrl.device,
+					"NVME-FC{%d}: failed to change state "
+					"to DELETING\n", ctrl->cnum);
+				return;
+			}
 
-	return 0;
+			WARN_ON(!queue_work(nvme_fc_wq, &ctrl->delete_work));
+			return;
+		}
 
-out_delete_hw_queues:
-	nvme_fc_delete_hw_io_queues(ctrl);
-out_cleanup_blk_queue:
-	nvme_stop_keep_alive(&ctrl->ctrl);
-	blk_cleanup_queue(ctrl->ctrl.connect_q);
-out_free_tag_set:
-	blk_mq_free_tag_set(&ctrl->tag_set);
-	nvme_fc_free_io_queues(ctrl);
+		dev_warn(ctrl->ctrl.device,
+			"NVME-FC{%d}: Reconnect attempt in %d seconds.\n",
+			ctrl->cnum, ctrl->reconnect_delay);
+		queue_delayed_work(nvme_fc_wq, &ctrl->connect_work,
+				ctrl->reconnect_delay * HZ);
+	} else
+		dev_info(ctrl->ctrl.device,
+			"NVME-FC{%d}: controller reconnect complete\n",
+			ctrl->cnum);
+}
 
-	/* force put free routine to ignore io queues */
-	ctrl->ctrl.tagset = NULL;
 
-	return ret;
-}
+static const struct blk_mq_ops nvme_fc_admin_mq_ops = {
+	.queue_rq	= nvme_fc_queue_rq,
+	.complete	= nvme_fc_complete_rq,
+	.init_request	= nvme_fc_init_admin_request,
+	.exit_request	= nvme_fc_exit_request,
+	.reinit_request	= nvme_fc_reinit_request,
+	.init_hctx	= nvme_fc_init_admin_hctx,
+	.timeout	= nvme_fc_timeout,
+};
 
 
 static struct nvme_ctrl *
-__nvme_fc_create_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
+nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
 	struct nvme_fc_lport *lport, struct nvme_fc_rport *rport)
 {
 	struct nvme_fc_ctrl *ctrl;
 	unsigned long flags;
 	int ret, idx;
-	bool changed;
 
 	ctrl = kzalloc(sizeof(*ctrl), GFP_KERNEL);
 	if (!ctrl) {
@@ -2438,17 +2738,15 @@ __nvme_fc_create_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
 	ctrl->lport = lport;
 	ctrl->rport = rport;
 	ctrl->dev = lport->dev;
-	ctrl->state = FCCTRL_INIT;
 	ctrl->cnum = idx;
 
-	ret = nvme_init_ctrl(&ctrl->ctrl, dev, &nvme_fc_ctrl_ops, 0);
-	if (ret)
-		goto out_free_ida;
-
 	get_device(ctrl->dev);
 	kref_init(&ctrl->ref);
 
-	INIT_WORK(&ctrl->delete_work, nvme_fc_del_ctrl_work);
+	INIT_WORK(&ctrl->delete_work, nvme_fc_delete_ctrl_work);
+	INIT_WORK(&ctrl->reset_work, nvme_fc_reset_ctrl_work);
+	INIT_DELAYED_WORK(&ctrl->connect_work, nvme_fc_connect_ctrl_work);
+	ctrl->reconnect_delay = opts->reconnect_delay;
 	spin_lock_init(&ctrl->lock);
 
 	/* io queue count */
@@ -2465,87 +2763,86 @@ __nvme_fc_create_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
 	ctrl->queues = kcalloc(ctrl->queue_count, sizeof(struct nvme_fc_queue),
 				GFP_KERNEL);
 	if (!ctrl->queues)
-		goto out_uninit_ctrl;
-
-	ret = nvme_fc_configure_admin_queue(ctrl);
-	if (ret)
-		goto out_uninit_ctrl;
-
-	/* sanity checks */
-
-	/* FC-NVME does not have other data in the capsule */
-	if (ctrl->ctrl.icdoff) {
-		dev_err(ctrl->ctrl.device, "icdoff %d is not supported!\n",
-				ctrl->ctrl.icdoff);
-		goto out_remove_admin_queue;
-	}
-
-	/* FC-NVME supports normal SGL Data Block Descriptors */
+		goto out_free_ida;
 
-	if (opts->queue_size > ctrl->ctrl.maxcmd) {
-		/* warn if maxcmd is lower than queue_size */
-		dev_warn(ctrl->ctrl.device,
-			"queue_size %zu > ctrl maxcmd %u, reducing "
-			"to queue_size\n",
-			opts->queue_size, ctrl->ctrl.maxcmd);
-		opts->queue_size = ctrl->ctrl.maxcmd;
-	}
+	memset(&ctrl->admin_tag_set, 0, sizeof(ctrl->admin_tag_set));
+	ctrl->admin_tag_set.ops = &nvme_fc_admin_mq_ops;
+	ctrl->admin_tag_set.queue_depth = NVME_FC_AQ_BLKMQ_DEPTH;
+	ctrl->admin_tag_set.reserved_tags = 2; /* fabric connect + Keep-Alive */
+	ctrl->admin_tag_set.numa_node = NUMA_NO_NODE;
+	ctrl->admin_tag_set.cmd_size = sizeof(struct nvme_fc_fcp_op) +
+					(SG_CHUNK_SIZE *
+						sizeof(struct scatterlist)) +
+					ctrl->lport->ops->fcprqst_priv_sz;
+	ctrl->admin_tag_set.driver_data = ctrl;
+	ctrl->admin_tag_set.nr_hw_queues = 1;
+	ctrl->admin_tag_set.timeout = ADMIN_TIMEOUT;
 
-	ret = nvme_fc_init_aen_ops(ctrl);
+	ret = blk_mq_alloc_tag_set(&ctrl->admin_tag_set);
 	if (ret)
-		goto out_stop_keep_alive;
+		goto out_free_queues;
 
-	if (ctrl->queue_count > 1) {
-		ret = nvme_fc_create_io_queues(ctrl);
-		if (ret)
-			goto out_stop_keep_alive;
+	ctrl->ctrl.admin_q = blk_mq_init_queue(&ctrl->admin_tag_set);
+	if (IS_ERR(ctrl->ctrl.admin_q)) {
+		ret = PTR_ERR(ctrl->ctrl.admin_q);
+		goto out_free_admin_tag_set;
 	}
 
-	spin_lock_irqsave(&ctrl->lock, flags);
-	ctrl->state = FCCTRL_ACTIVE;
-	spin_unlock_irqrestore(&ctrl->lock, flags);
-
-	changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE);
-	WARN_ON_ONCE(!changed);
+	/*
+	 * Would have been nice to init io queues tag set as well.
+	 * However, we require interaction from the controller
+	 * for max io queue count before we can do so.
+	 * Defer this to the connect path.
+	 */
 
-	dev_info(ctrl->ctrl.device,
-		"NVME-FC{%d}: new ctrl: NQN \"%s\"\n",
-		ctrl->cnum, ctrl->ctrl.opts->subsysnqn);
+	ret = nvme_init_ctrl(&ctrl->ctrl, dev, &nvme_fc_ctrl_ops, 0);
+	if (ret)
+		goto out_cleanup_admin_q;
 
-	kref_get(&ctrl->ctrl.kref);
+	/* at this point, teardown path changes to ref counting on nvme ctrl */
 
 	spin_lock_irqsave(&rport->lock, flags);
 	list_add_tail(&ctrl->ctrl_list, &rport->ctrl_list);
 	spin_unlock_irqrestore(&rport->lock, flags);
 
-	if (opts->nr_io_queues) {
-		nvme_queue_scan(&ctrl->ctrl);
-		nvme_queue_async_events(&ctrl->ctrl);
+	ret = nvme_fc_create_association(ctrl);
+	if (ret) {
+		/* initiate nvme ctrl ref counting teardown */
+		nvme_uninit_ctrl(&ctrl->ctrl);
+		nvme_put_ctrl(&ctrl->ctrl);
+
+		/* as we're past the point where we transition to the ref
+		 * counting teardown path, if we return a bad pointer here,
+		 * the calling routine, thinking it's prior to the
+		 * transition, will do an rport put. Since the teardown
+		 * path also does a rport put, we do an extra get here to
+		 * so proper order/teardown happens.
+		 */
+		nvme_fc_rport_get(rport);
+
+		if (ret > 0)
+			ret = -EIO;
+		return ERR_PTR(ret);
 	}
 
-	return &ctrl->ctrl;
+	dev_info(ctrl->ctrl.device,
+		"NVME-FC{%d}: new ctrl: NQN \"%s\"\n",
+		ctrl->cnum, ctrl->ctrl.opts->subsysnqn);
 
-out_stop_keep_alive:
-	nvme_stop_keep_alive(&ctrl->ctrl);
-out_remove_admin_queue:
-	/* send a Disconnect(association) LS to fc-nvme target */
-	nvme_fc_xmt_disconnect_assoc(ctrl);
-	nvme_stop_keep_alive(&ctrl->ctrl);
-	nvme_fc_destroy_admin_queue(ctrl);
-out_uninit_ctrl:
-	nvme_uninit_ctrl(&ctrl->ctrl);
-	nvme_put_ctrl(&ctrl->ctrl);
-	if (ret > 0)
-		ret = -EIO;
-	/* exit via here will follow ctlr ref point callbacks to free */
-	return ERR_PTR(ret);
+	return &ctrl->ctrl;
 
+out_cleanup_admin_q:
+	blk_cleanup_queue(ctrl->ctrl.admin_q);
+out_free_admin_tag_set:
+	blk_mq_free_tag_set(&ctrl->admin_tag_set);
+out_free_queues:
+	kfree(ctrl->queues);
 out_free_ida:
+	put_device(ctrl->dev);
 	ida_simple_remove(&nvme_fc_ctrl_cnt, ctrl->cnum);
 out_free_ctrl:
 	kfree(ctrl);
 out_fail:
-	nvme_fc_rport_put(rport);
 	/* exit via here doesn't follow ctlr ref points */
 	return ERR_PTR(ret);
 }
@@ -2617,6 +2914,7 @@ nvme_fc_create_ctrl(struct device *dev, struct nvmf_ctrl_options *opts)
 {
 	struct nvme_fc_lport *lport;
 	struct nvme_fc_rport *rport;
+	struct nvme_ctrl *ctrl;
 	struct nvmet_fc_traddr laddr = { 0L, 0L };
 	struct nvmet_fc_traddr raddr = { 0L, 0L };
 	unsigned long flags;
@@ -2648,7 +2946,10 @@ nvme_fc_create_ctrl(struct device *dev, struct nvmf_ctrl_options *opts)
 
 			spin_unlock_irqrestore(&nvme_fc_lock, flags);
 
-			return __nvme_fc_create_ctrl(dev, opts, lport, rport);
+			ctrl = nvme_fc_init_ctrl(dev, opts, lport, rport);
+			if (IS_ERR(ctrl))
+				nvme_fc_rport_put(rport);
+			return ctrl;
 		}
 	}
 	spin_unlock_irqrestore(&nvme_fc_lock, flags);