1 files changed, 1479 insertions, 812 deletions
diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c
index 2242e9b3ca12..d0817facdae3 100644
--- a/drivers/scsi/lpfc/lpfc_sli.c
+++ b/drivers/scsi/lpfc/lpfc_sli.c
@@ -1,7 +1,7 @@
 /*******************************************************************
  * This file is part of the Emulex Linux Device Driver for         *
  * Fibre Channel Host Bus Adapters.                                *
- * Copyright (C) 2017-2018 Broadcom. All Rights Reserved. The term *
+ * Copyright (C) 2017-2019 Broadcom. All Rights Reserved. The term *
  * “Broadcom” refers to Broadcom Inc. and/or its subsidiaries.  *
  * Copyright (C) 2004-2016 Emulex.  All rights reserved.           *
  * EMULEX and SLI are trademarks of Emulex.                        *
@@ -78,12 +78,13 @@ static void lpfc_sli4_send_seq_to_ulp(struct lpfc_vport *,
 				      struct hbq_dmabuf *);
 static void lpfc_sli4_handle_mds_loopback(struct lpfc_vport *vport,
 					  struct hbq_dmabuf *dmabuf);
-static int lpfc_sli4_fp_handle_cqe(struct lpfc_hba *, struct lpfc_queue *,
-				    struct lpfc_cqe *);
+static bool lpfc_sli4_fp_handle_cqe(struct lpfc_hba *phba,
+				   struct lpfc_queue *cq, struct lpfc_cqe *cqe);
 static int lpfc_sli4_post_sgl_list(struct lpfc_hba *, struct list_head *,
 				       int);
 static void lpfc_sli4_hba_handle_eqe(struct lpfc_hba *phba,
-				     struct lpfc_eqe *eqe, uint32_t qidx);
+				     struct lpfc_queue *eq,
+				     struct lpfc_eqe *eqe);
 static bool lpfc_sli4_mbox_completions_pending(struct lpfc_hba *phba);
 static bool lpfc_sli4_process_missed_mbox_completions(struct lpfc_hba *phba);
 static int lpfc_sli4_abort_nvme_io(struct lpfc_hba *phba,
@@ -160,7 +161,7 @@ lpfc_sli4_wq_put(struct lpfc_queue *q, union lpfc_wqe128 *wqe)
 	}
 	q->WQ_posted++;
 	/* set consumption flag every once in a while */
-	if (!((q->host_index + 1) % q->entry_repost))
+	if (!((q->host_index + 1) % q->notify_interval))
 		bf_set(wqe_wqec, &wqe->generic.wqe_com, 1);
 	else
 		bf_set(wqe_wqec, &wqe->generic.wqe_com, 0);
@@ -325,29 +326,16 @@ lpfc_sli4_mq_release(struct lpfc_queue *q)
 static struct lpfc_eqe *
 lpfc_sli4_eq_get(struct lpfc_queue *q)
 {
-	struct lpfc_hba *phba;
 	struct lpfc_eqe *eqe;
-	uint32_t idx;
 
 	/* sanity check on queue memory */
 	if (unlikely(!q))
 		return NULL;
-	phba = q->phba;
-	eqe = q->qe[q->hba_index].eqe;
+	eqe = q->qe[q->host_index].eqe;
 
 	/* If the next EQE is not valid then we are done */
 	if (bf_get_le32(lpfc_eqe_valid, eqe) != q->qe_valid)
 		return NULL;
-	/* If the host has not yet processed the next entry then we are done */
-	idx = ((q->hba_index + 1) % q->entry_count);
-	if (idx == q->host_index)
-		return NULL;
-
-	q->hba_index = idx;
-	/* if the index wrapped around, toggle the valid bit */
-	if (phba->sli4_hba.pc_sli4_params.eqav && !q->hba_index)
-		q->qe_valid = (q->qe_valid) ? 0 : 1;
-
 
 	/*
 	 * insert barrier for instruction interlock : data from the hardware
@@ -397,44 +385,25 @@ lpfc_sli4_if6_eq_clr_intr(struct lpfc_queue *q)
 }
 
 /**
- * lpfc_sli4_eq_release - Indicates the host has finished processing an EQ
+ * lpfc_sli4_write_eq_db - write EQ DB for eqe's consumed or arm state
+ * @phba: adapter with EQ
  * @q: The Event Queue that the host has completed processing for.
+ * @count: Number of elements that have been consumed
  * @arm: Indicates whether the host wants to arms this CQ.
  *
- * This routine will mark all Event Queue Entries on @q, from the last
- * known completed entry to the last entry that was processed, as completed
- * by clearing the valid bit for each completion queue entry. Then it will
- * notify the HBA, by ringing the doorbell, that the EQEs have been processed.
- * The internal host index in the @q will be updated by this routine to indicate
- * that the host has finished processing the entries. The @arm parameter
- * indicates that the queue should be rearmed when ringing the doorbell.
- *
- * This function will return the number of EQEs that were popped.
+ * This routine will notify the HBA, by ringing the doorbell, that count
+ * number of EQEs have been processed. The @arm parameter indicates whether
+ * the queue should be rearmed when ringing the doorbell.
  **/
-uint32_t
-lpfc_sli4_eq_release(struct lpfc_queue *q, bool arm)
+void
+lpfc_sli4_write_eq_db(struct lpfc_hba *phba, struct lpfc_queue *q,
+		     uint32_t count, bool arm)
 {
-	uint32_t released = 0;
-	struct lpfc_hba *phba;
-	struct lpfc_eqe *temp_eqe;
 	struct lpfc_register doorbell;
 
 	/* sanity check on queue memory */
-	if (unlikely(!q))
-		return 0;
-	phba = q->phba;
-
-	/* while there are valid entries */
-	while (q->hba_index != q->host_index) {
-		if (!phba->sli4_hba.pc_sli4_params.eqav) {
-			temp_eqe = q->qe[q->host_index].eqe;
-			bf_set_le32(lpfc_eqe_valid, temp_eqe, 0);
-		}
-		released++;
-		q->host_index = ((q->host_index + 1) % q->entry_count);
-	}
-	if (unlikely(released == 0 && !arm))
-		return 0;
+	if (unlikely(!q || (count == 0 && !arm)))
+		return;
 
 	/* ring doorbell for number popped */
 	doorbell.word0 = 0;
@@ -442,7 +411,7 @@ lpfc_sli4_eq_release(struct lpfc_queue *q, bool arm)
 		bf_set(lpfc_eqcq_doorbell_arm, &doorbell, 1);
 		bf_set(lpfc_eqcq_doorbell_eqci, &doorbell, 1);
 	}
-	bf_set(lpfc_eqcq_doorbell_num_released, &doorbell, released);
+	bf_set(lpfc_eqcq_doorbell_num_released, &doorbell, count);
 	bf_set(lpfc_eqcq_doorbell_qt, &doorbell, LPFC_QUEUE_TYPE_EVENT);
 	bf_set(lpfc_eqcq_doorbell_eqid_hi, &doorbell,
 			(q->queue_id >> LPFC_EQID_HI_FIELD_SHIFT));
@@ -451,60 +420,112 @@ lpfc_sli4_eq_release(struct lpfc_queue *q, bool arm)
 	/* PCI read to flush PCI pipeline on re-arming for INTx mode */
 	if ((q->phba->intr_type == INTx) && (arm == LPFC_QUEUE_REARM))
 		readl(q->phba->sli4_hba.EQDBregaddr);
-	return released;
 }
 
 /**
- * lpfc_sli4_if6_eq_release - Indicates the host has finished processing an EQ
+ * lpfc_sli4_if6_write_eq_db - write EQ DB for eqe's consumed or arm state
+ * @phba: adapter with EQ
  * @q: The Event Queue that the host has completed processing for.
+ * @count: Number of elements that have been consumed
  * @arm: Indicates whether the host wants to arms this CQ.
  *
- * This routine will mark all Event Queue Entries on @q, from the last
- * known completed entry to the last entry that was processed, as completed
- * by clearing the valid bit for each completion queue entry. Then it will
- * notify the HBA, by ringing the doorbell, that the EQEs have been processed.
- * The internal host index in the @q will be updated by this routine to indicate
- * that the host has finished processing the entries. The @arm parameter
- * indicates that the queue should be rearmed when ringing the doorbell.
- *
- * This function will return the number of EQEs that were popped.
+ * This routine will notify the HBA, by ringing the doorbell, that count
+ * number of EQEs have been processed. The @arm parameter indicates whether
+ * the queue should be rearmed when ringing the doorbell.
  **/
-uint32_t
-lpfc_sli4_if6_eq_release(struct lpfc_queue *q, bool arm)
+void
+lpfc_sli4_if6_write_eq_db(struct lpfc_hba *phba, struct lpfc_queue *q,
+			  uint32_t count, bool arm)
 {
-	uint32_t released = 0;
-	struct lpfc_hba *phba;
-	struct lpfc_eqe *temp_eqe;
 	struct lpfc_register doorbell;
 
 	/* sanity check on queue memory */
-	if (unlikely(!q))
-		return 0;
-	phba = q->phba;
-
-	/* while there are valid entries */
-	while (q->hba_index != q->host_index) {
-		if (!phba->sli4_hba.pc_sli4_params.eqav) {
-			temp_eqe = q->qe[q->host_index].eqe;
-			bf_set_le32(lpfc_eqe_valid, temp_eqe, 0);
-		}
-		released++;
-		q->host_index = ((q->host_index + 1) % q->entry_count);
-	}
-	if (unlikely(released == 0 && !arm))
-		return 0;
+	if (unlikely(!q || (count == 0 && !arm)))
+		return;
 
 	/* ring doorbell for number popped */
 	doorbell.word0 = 0;
 	if (arm)
 		bf_set(lpfc_if6_eq_doorbell_arm, &doorbell, 1);
-	bf_set(lpfc_if6_eq_doorbell_num_released, &doorbell, released);
+	bf_set(lpfc_if6_eq_doorbell_num_released, &doorbell, count);
 	bf_set(lpfc_if6_eq_doorbell_eqid, &doorbell, q->queue_id);
 	writel(doorbell.word0, q->phba->sli4_hba.EQDBregaddr);
 	/* PCI read to flush PCI pipeline on re-arming for INTx mode */
 	if ((q->phba->intr_type == INTx) && (arm == LPFC_QUEUE_REARM))
 		readl(q->phba->sli4_hba.EQDBregaddr);
-	return released;
+}
+
+static void
+__lpfc_sli4_consume_eqe(struct lpfc_hba *phba, struct lpfc_queue *eq,
+			struct lpfc_eqe *eqe)
+{
+	if (!phba->sli4_hba.pc_sli4_params.eqav)
+		bf_set_le32(lpfc_eqe_valid, eqe, 0);
+
+	eq->host_index = ((eq->host_index + 1) % eq->entry_count);
+
+	/* if the index wrapped around, toggle the valid bit */
+	if (phba->sli4_hba.pc_sli4_params.eqav && !eq->host_index)
+		eq->qe_valid = (eq->qe_valid) ? 0 : 1;
+}
+
+static void
+lpfc_sli4_eq_flush(struct lpfc_hba *phba, struct lpfc_queue *eq)
+{
+	struct lpfc_eqe *eqe;
+	uint32_t count = 0;
+
+	/* walk all the EQ entries and drop on the floor */
+	eqe = lpfc_sli4_eq_get(eq);
+	while (eqe) {
+		__lpfc_sli4_consume_eqe(phba, eq, eqe);
+		count++;
+		eqe = lpfc_sli4_eq_get(eq);
+	}
+
+	/* Clear and re-arm the EQ */
+	phba->sli4_hba.sli4_write_eq_db(phba, eq, count, LPFC_QUEUE_REARM);
+}
+
+static int
+lpfc_sli4_process_eq(struct lpfc_hba *phba, struct lpfc_queue *eq)
+{
+	struct lpfc_eqe *eqe;
+	int count = 0, consumed = 0;
+
+	if (cmpxchg(&eq->queue_claimed, 0, 1) != 0)
+		goto rearm_and_exit;
+
+	eqe = lpfc_sli4_eq_get(eq);
+	while (eqe) {
+		lpfc_sli4_hba_handle_eqe(phba, eq, eqe);
+		__lpfc_sli4_consume_eqe(phba, eq, eqe);
+
+		consumed++;
+		if (!(++count % eq->max_proc_limit))
+			break;
+
+		if (!(count % eq->notify_interval)) {
+			phba->sli4_hba.sli4_write_eq_db(phba, eq, consumed,
+							LPFC_QUEUE_NOARM);
+			consumed = 0;
+		}
+
+		eqe = lpfc_sli4_eq_get(eq);
+	}
+	eq->EQ_processed += count;
+
+	/* Track the max number of EQEs processed in 1 intr */
+	if (count > eq->EQ_max_eqe)
+		eq->EQ_max_eqe = count;
+
+	eq->queue_claimed = 0;
+
+rearm_and_exit:
+	/* Always clear and re-arm the EQ */
+	phba->sli4_hba.sli4_write_eq_db(phba, eq, consumed, LPFC_QUEUE_REARM);
+
+	return count;
 }
 
 /**
@@ -519,28 +540,16 @@ lpfc_sli4_if6_eq_release(struct lpfc_queue *q, bool arm)
 static struct lpfc_cqe *
 lpfc_sli4_cq_get(struct lpfc_queue *q)
 {
-	struct lpfc_hba *phba;
 	struct lpfc_cqe *cqe;
-	uint32_t idx;
 
 	/* sanity check on queue memory */
 	if (unlikely(!q))
 		return NULL;
-	phba = q->phba;
-	cqe = q->qe[q->hba_index].cqe;
+	cqe = q->qe[q->host_index].cqe;
 
 	/* If the next CQE is not valid then we are done */
 	if (bf_get_le32(lpfc_cqe_valid, cqe) != q->qe_valid)
 		return NULL;
-	/* If the host has not yet processed the next entry then we are done */
-	idx = ((q->hba_index + 1) % q->entry_count);
-	if (idx == q->host_index)
-		return NULL;
-
-	q->hba_index = idx;
-	/* if the index wrapped around, toggle the valid bit */
-	if (phba->sli4_hba.pc_sli4_params.cqav && !q->hba_index)
-		q->qe_valid = (q->qe_valid) ? 0 : 1;
 
 	/*
 	 * insert barrier for instruction interlock : data from the hardware
@@ -554,107 +563,81 @@ lpfc_sli4_cq_get(struct lpfc_queue *q)
 	return cqe;
 }
 
+static void
+__lpfc_sli4_consume_cqe(struct lpfc_hba *phba, struct lpfc_queue *cq,
+			struct lpfc_cqe *cqe)
+{
+	if (!phba->sli4_hba.pc_sli4_params.cqav)
+		bf_set_le32(lpfc_cqe_valid, cqe, 0);
+
+	cq->host_index = ((cq->host_index + 1) % cq->entry_count);
+
+	/* if the index wrapped around, toggle the valid bit */
+	if (phba->sli4_hba.pc_sli4_params.cqav && !cq->host_index)
+		cq->qe_valid = (cq->qe_valid) ? 0 : 1;
+}
+
 /**
- * lpfc_sli4_cq_release - Indicates the host has finished processing a CQ
+ * lpfc_sli4_write_cq_db - write cq DB for entries consumed or arm state.
+ * @phba: the adapter with the CQ
  * @q: The Completion Queue that the host has completed processing for.
+ * @count: the number of elements that were consumed
  * @arm: Indicates whether the host wants to arms this CQ.
  *
- * This routine will mark all Completion queue entries on @q, from the last
- * known completed entry to the last entry that was processed, as completed
- * by clearing the valid bit for each completion queue entry. Then it will
- * notify the HBA, by ringing the doorbell, that the CQEs have been processed.
- * The internal host index in the @q will be updated by this routine to indicate
- * that the host has finished processing the entries. The @arm parameter
- * indicates that the queue should be rearmed when ringing the doorbell.
- *
- * This function will return the number of CQEs that were released.
+ * This routine will notify the HBA, by ringing the doorbell, that the
+ * CQEs have been processed. The @arm parameter specifies whether the
+ * queue should be rearmed when ringing the doorbell.
  **/
-uint32_t
-lpfc_sli4_cq_release(struct lpfc_queue *q, bool arm)
+void
+lpfc_sli4_write_cq_db(struct lpfc_hba *phba, struct lpfc_queue *q,
+		     uint32_t count, bool arm)
 {
-	uint32_t released = 0;
-	struct lpfc_hba *phba;
-	struct lpfc_cqe *temp_qe;
 	struct lpfc_register doorbell;
 
 	/* sanity check on queue memory */
-	if (unlikely(!q))
-		return 0;
-	phba = q->phba;
-
-	/* while there are valid entries */
-	while (q->hba_index != q->host_index) {
-		if (!phba->sli4_hba.pc_sli4_params.cqav) {
-			temp_qe = q->qe[q->host_index].cqe;
-			bf_set_le32(lpfc_cqe_valid, temp_qe, 0);
-		}
-		released++;
-		q->host_index = ((q->host_index + 1) % q->entry_count);
-	}
-	if (unlikely(released == 0 && !arm))
-		return 0;
+	if (unlikely(!q || (count == 0 && !arm)))
+		return;
 
 	/* ring doorbell for number popped */
 	doorbell.word0 = 0;
 	if (arm)
 		bf_set(lpfc_eqcq_doorbell_arm, &doorbell, 1);
-	bf_set(lpfc_eqcq_doorbell_num_released, &doorbell, released);
+	bf_set(lpfc_eqcq_doorbell_num_released, &doorbell, count);
 	bf_set(lpfc_eqcq_doorbell_qt, &doorbell, LPFC_QUEUE_TYPE_COMPLETION);
 	bf_set(lpfc_eqcq_doorbell_cqid_hi, &doorbell,
 			(q->queue_id >> LPFC_CQID_HI_FIELD_SHIFT));
 	bf_set(lpfc_eqcq_doorbell_cqid_lo, &doorbell, q->queue_id);
 	writel(doorbell.word0, q->phba->sli4_hba.CQDBregaddr);
-	return released;
 }
 
 /**
- * lpfc_sli4_if6_cq_release - Indicates the host has finished processing a CQ
+ * lpfc_sli4_if6_write_cq_db - write cq DB for entries consumed or arm state.
+ * @phba: the adapter with the CQ
  * @q: The Completion Queue that the host has completed processing for.
+ * @count: the number of elements that were consumed
  * @arm: Indicates whether the host wants to arms this CQ.
  *
- * This routine will mark all Completion queue entries on @q, from the last
- * known completed entry to the last entry that was processed, as completed
- * by clearing the valid bit for each completion queue entry. Then it will
- * notify the HBA, by ringing the doorbell, that the CQEs have been processed.
- * The internal host index in the @q will be updated by this routine to indicate
- * that the host has finished processing the entries. The @arm parameter
- * indicates that the queue should be rearmed when ringing the doorbell.
- *
- * This function will return the number of CQEs that were released.
+ * This routine will notify the HBA, by ringing the doorbell, that the
+ * CQEs have been processed. The @arm parameter specifies whether the
+ * queue should be rearmed when ringing the doorbell.
  **/
-uint32_t
-lpfc_sli4_if6_cq_release(struct lpfc_queue *q, bool arm)
+void
+lpfc_sli4_if6_write_cq_db(struct lpfc_hba *phba, struct lpfc_queue *q,
+			 uint32_t count, bool arm)
 {
-	uint32_t released = 0;
-	struct lpfc_hba *phba;
-	struct lpfc_cqe *temp_qe;
 	struct lpfc_register doorbell;
 
 	/* sanity check on queue memory */
-	if (unlikely(!q))
-		return 0;
-	phba = q->phba;
-
-	/* while there are valid entries */
-	while (q->hba_index != q->host_index) {
-		if (!phba->sli4_hba.pc_sli4_params.cqav) {
-			temp_qe = q->qe[q->host_index].cqe;
-			bf_set_le32(lpfc_cqe_valid, temp_qe, 0);
-		}
-		released++;
-		q->host_index = ((q->host_index + 1) % q->entry_count);
-	}
-	if (unlikely(released == 0 && !arm))
-		return 0;
+	if (unlikely(!q || (count == 0 && !arm)))
+		return;
 
 	/* ring doorbell for number popped */
 	doorbell.word0 = 0;
 	if (arm)
 		bf_set(lpfc_if6_cq_doorbell_arm, &doorbell, 1);
-	bf_set(lpfc_if6_cq_doorbell_num_released, &doorbell, released);
+	bf_set(lpfc_if6_cq_doorbell_num_released, &doorbell, count);
 	bf_set(lpfc_if6_cq_doorbell_cqid, &doorbell, q->queue_id);
 	writel(doorbell.word0, q->phba->sli4_hba.CQDBregaddr);
-	return released;
 }
 
 /**
@@ -703,15 +686,15 @@ lpfc_sli4_rq_put(struct lpfc_queue *hq, struct lpfc_queue *dq,
 	hq->RQ_buf_posted++;
 
 	/* Ring The Header Receive Queue Doorbell */
-	if (!(hq->host_index % hq->entry_repost)) {
+	if (!(hq->host_index % hq->notify_interval)) {
 		doorbell.word0 = 0;
 		if (hq->db_format == LPFC_DB_RING_FORMAT) {
 			bf_set(lpfc_rq_db_ring_fm_num_posted, &doorbell,
-			       hq->entry_repost);
+			       hq->notify_interval);
 			bf_set(lpfc_rq_db_ring_fm_id, &doorbell, hq->queue_id);
 		} else if (hq->db_format == LPFC_DB_LIST_FORMAT) {
 			bf_set(lpfc_rq_db_list_fm_num_posted, &doorbell,
-			       hq->entry_repost);
+			       hq->notify_interval);
 			bf_set(lpfc_rq_db_list_fm_index, &doorbell,
 			       hq->host_index);
 			bf_set(lpfc_rq_db_list_fm_id, &doorbell, hq->queue_id);
@@ -1025,7 +1008,7 @@ lpfc_test_rrq_active(struct lpfc_hba *phba, struct lpfc_nodelist *ndlp,
 	if (!ndlp->active_rrqs_xri_bitmap)
 		return 0;
 	if (test_bit(xritag, ndlp->active_rrqs_xri_bitmap))
-			return 1;
+		return 1;
 	else
 		return 0;
 }
@@ -1133,14 +1116,14 @@ __lpfc_sli_get_els_sglq(struct lpfc_hba *phba, struct lpfc_iocbq *piocbq)
 	struct list_head *lpfc_els_sgl_list = &phba->sli4_hba.lpfc_els_sgl_list;
 	struct lpfc_sglq *sglq = NULL;
 	struct lpfc_sglq *start_sglq = NULL;
-	struct lpfc_scsi_buf *lpfc_cmd;
+	struct lpfc_io_buf *lpfc_cmd;
 	struct lpfc_nodelist *ndlp;
 	int found = 0;
 
 	lockdep_assert_held(&phba->hbalock);
 
 	if (piocbq->iocb_flag &  LPFC_IO_FCP) {
-		lpfc_cmd = (struct lpfc_scsi_buf *) piocbq->context1;
+		lpfc_cmd = (struct lpfc_io_buf *) piocbq->context1;
 		ndlp = lpfc_cmd->rdata->pnode;
 	} else  if ((piocbq->iocb.ulpCommand == CMD_GEN_REQUEST64_CR) &&
 			!(piocbq->iocb_flag & LPFC_IO_LIBDFC)) {
@@ -1596,6 +1579,7 @@ lpfc_sli_ringtxcmpl_put(struct lpfc_hba *phba, struct lpfc_sli_ring *pring,
 
 	list_add_tail(&piocb->list, &pring->txcmplq);
 	piocb->iocb_flag |= LPFC_IO_ON_TXCMPLQ;
+	pring->txcmplq_cnt++;
 
 	if ((unlikely(pring->ringno == LPFC_ELS_RING)) &&
 	   (piocb->iocb.ulpCommand != CMD_ABORT_XRI_CN) &&
@@ -3008,6 +2992,7 @@ lpfc_sli_iocbq_lookup(struct lpfc_hba *phba,
 			/* remove from txcmpl queue list */
 			list_del_init(&cmd_iocb->list);
 			cmd_iocb->iocb_flag &= ~LPFC_IO_ON_TXCMPLQ;
+			pring->txcmplq_cnt--;
 			return cmd_iocb;
 		}
 	}
@@ -3045,6 +3030,7 @@ lpfc_sli_iocbq_lookup_by_tag(struct lpfc_hba *phba,
 			/* remove from txcmpl queue list */
 			list_del_init(&cmd_iocb->list);
 			cmd_iocb->iocb_flag &= ~LPFC_IO_ON_TXCMPLQ;
+			pring->txcmplq_cnt--;
 			return cmd_iocb;
 		}
 	}
@@ -3981,8 +3967,8 @@ lpfc_sli_abort_fcp_rings(struct lpfc_hba *phba)
 
 	/* Look on all the FCP Rings for the iotag */
 	if (phba->sli_rev >= LPFC_SLI_REV4) {
-		for (i = 0; i < phba->cfg_fcp_io_channel; i++) {
-			pring = phba->sli4_hba.fcp_wq[i]->pring;
+		for (i = 0; i < phba->cfg_hdw_queue; i++) {
+			pring = phba->sli4_hba.hdwq[i].fcp_wq->pring;
 			lpfc_sli_abort_iocb_ring(phba, pring);
 		}
 	} else {
@@ -4006,12 +3992,13 @@ lpfc_sli_abort_nvme_rings(struct lpfc_hba *phba)
 	struct lpfc_sli_ring  *pring;
 	uint32_t i;
 
-	if (phba->sli_rev < LPFC_SLI_REV4)
+	if ((phba->sli_rev < LPFC_SLI_REV4) ||
+	    !(phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME))
 		return;
 
 	/* Abort all IO on each NVME ring. */
-	for (i = 0; i < phba->cfg_nvme_io_channel; i++) {
-		pring = phba->sli4_hba.nvme_wq[i]->pring;
+	for (i = 0; i < phba->cfg_hdw_queue; i++) {
+		pring = phba->sli4_hba.hdwq[i].nvme_wq->pring;
 		lpfc_sli_abort_wqe_ring(phba, pring);
 	}
 }
@@ -4044,8 +4031,8 @@ lpfc_sli_flush_fcp_rings(struct lpfc_hba *phba)
 
 	/* Look on all the FCP Rings for the iotag */
 	if (phba->sli_rev >= LPFC_SLI_REV4) {
-		for (i = 0; i < phba->cfg_fcp_io_channel; i++) {
-			pring = phba->sli4_hba.fcp_wq[i]->pring;
+		for (i = 0; i < phba->cfg_hdw_queue; i++) {
+			pring = phba->sli4_hba.hdwq[i].fcp_wq->pring;
 
 			spin_lock_irq(&pring->ring_lock);
 			/* Retrieve everything on txq */
@@ -4110,7 +4097,8 @@ lpfc_sli_flush_nvme_rings(struct lpfc_hba *phba)
 	uint32_t i;
 	struct lpfc_iocbq *piocb, *next_iocb;
 
-	if (phba->sli_rev < LPFC_SLI_REV4)
+	if ((phba->sli_rev < LPFC_SLI_REV4) ||
+	    !(phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME))
 		return;
 
 	/* Hint to other driver operations that a flush is in progress. */
@@ -4122,8 +4110,8 @@ lpfc_sli_flush_nvme_rings(struct lpfc_hba *phba)
 	 * a local driver reason code.  This is a flush so no
 	 * abort exchange to FW.
 	 */
-	for (i = 0; i < phba->cfg_nvme_io_channel; i++) {
-		pring = phba->sli4_hba.nvme_wq[i]->pring;
+	for (i = 0; i < phba->cfg_hdw_queue; i++) {
+		pring = phba->sli4_hba.hdwq[i].nvme_wq->pring;
 
 		spin_lock_irq(&pring->ring_lock);
 		list_for_each_entry_safe(piocb, next_iocb,
@@ -5564,41 +5552,35 @@ lpfc_sli4_arm_cqeq_intr(struct lpfc_hba *phba)
 {
 	int qidx;
 	struct lpfc_sli4_hba *sli4_hba = &phba->sli4_hba;
+	struct lpfc_sli4_hdw_queue *qp;
 
-	sli4_hba->sli4_cq_release(sli4_hba->mbx_cq, LPFC_QUEUE_REARM);
-	sli4_hba->sli4_cq_release(sli4_hba->els_cq, LPFC_QUEUE_REARM);
+	sli4_hba->sli4_write_cq_db(phba, sli4_hba->mbx_cq, 0, LPFC_QUEUE_REARM);
+	sli4_hba->sli4_write_cq_db(phba, sli4_hba->els_cq, 0, LPFC_QUEUE_REARM);
 	if (sli4_hba->nvmels_cq)
-		sli4_hba->sli4_cq_release(sli4_hba->nvmels_cq,
-						LPFC_QUEUE_REARM);
+		sli4_hba->sli4_write_cq_db(phba, sli4_hba->nvmels_cq, 0,
+					   LPFC_QUEUE_REARM);
 
-	if (sli4_hba->fcp_cq)
-		for (qidx = 0; qidx < phba->cfg_fcp_io_channel; qidx++)
-			sli4_hba->sli4_cq_release(sli4_hba->fcp_cq[qidx],
-						LPFC_QUEUE_REARM);
-
-	if (sli4_hba->nvme_cq)
-		for (qidx = 0; qidx < phba->cfg_nvme_io_channel; qidx++)
-			sli4_hba->sli4_cq_release(sli4_hba->nvme_cq[qidx],
-						LPFC_QUEUE_REARM);
-
-	if (phba->cfg_fof)
-		sli4_hba->sli4_cq_release(sli4_hba->oas_cq, LPFC_QUEUE_REARM);
+	qp = sli4_hba->hdwq;
+	if (sli4_hba->hdwq) {
+		for (qidx = 0; qidx < phba->cfg_hdw_queue; qidx++) {
+			sli4_hba->sli4_write_cq_db(phba, qp[qidx].fcp_cq, 0,
+						   LPFC_QUEUE_REARM);
+			sli4_hba->sli4_write_cq_db(phba, qp[qidx].nvme_cq, 0,
+						   LPFC_QUEUE_REARM);
+		}
 
-	if (sli4_hba->hba_eq)
-		for (qidx = 0; qidx < phba->io_channel_irqs; qidx++)
-			sli4_hba->sli4_eq_release(sli4_hba->hba_eq[qidx],
-							LPFC_QUEUE_REARM);
+		for (qidx = 0; qidx < phba->cfg_irq_chann; qidx++)
+			sli4_hba->sli4_write_eq_db(phba, qp[qidx].hba_eq,
+						0, LPFC_QUEUE_REARM);
+	}
 
 	if (phba->nvmet_support) {
 		for (qidx = 0; qidx < phba->cfg_nvmet_mrq; qidx++) {
-			sli4_hba->sli4_cq_release(
-				sli4_hba->nvmet_cqset[qidx],
+			sli4_hba->sli4_write_cq_db(phba,
+				sli4_hba->nvmet_cqset[qidx], 0,
 				LPFC_QUEUE_REARM);
 		}
 	}
-
-	if (phba->cfg_fof)
-		sli4_hba->sli4_eq_release(sli4_hba->fof_eq, LPFC_QUEUE_REARM);
 }
 
 /**
@@ -6027,11 +6009,8 @@ lpfc_sli4_alloc_extent(struct lpfc_hba *phba, uint16_t type)
 		list_add_tail(&rsrc_blks->list, ext_blk_list);
 		rsrc_start = rsrc_id;
 		if ((type == LPFC_RSC_TYPE_FCOE_XRI) && (j == 0)) {
-			phba->sli4_hba.scsi_xri_start = rsrc_start +
+			phba->sli4_hba.io_xri_start = rsrc_start +
 				lpfc_sli4_get_iocb_cnt(phba);
-			phba->sli4_hba.nvme_xri_start =
-				phba->sli4_hba.scsi_xri_start +
-				phba->sli4_hba.scsi_xri_max;
 		}
 
 		while (rsrc_id < (rsrc_start + rsrc_size)) {
@@ -7056,6 +7035,38 @@ lpfc_sli4_repost_sgl_list(struct lpfc_hba *phba,
 	return total_cnt;
 }
 
+/**
+ * lpfc_sli4_repost_io_sgl_list - Repost all the allocated nvme buffer sgls
+ * @phba: pointer to lpfc hba data structure.
+ *
+ * This routine walks the list of nvme buffers that have been allocated and
+ * repost them to the port by using SGL block post. This is needed after a
+ * pci_function_reset/warm_start or start. The lpfc_hba_down_post_s4 routine
+ * is responsible for moving all nvme buffers on the lpfc_abts_nvme_sgl_list
+ * to the lpfc_io_buf_list. If the repost fails, reject all nvme buffers.
+ *
+ * Returns: 0 = success, non-zero failure.
+ **/
+int
+lpfc_sli4_repost_io_sgl_list(struct lpfc_hba *phba)
+{
+	LIST_HEAD(post_nblist);
+	int num_posted, rc = 0;
+
+	/* get all NVME buffers need to repost to a local list */
+	lpfc_io_buf_flush(phba, &post_nblist);
+
+	/* post the list of nvme buffer sgls to port if available */
+	if (!list_empty(&post_nblist)) {
+		num_posted = lpfc_sli4_post_io_sgl_list(
+			phba, &post_nblist, phba->sli4_hba.io_xri_cnt);
+		/* failed to post any nvme buffer, return error */
+		if (num_posted == 0)
+			rc = -EIO;
+	}
+	return rc;
+}
+
 void
 lpfc_set_host_data(struct lpfc_hba *phba, LPFC_MBOXQ_t *mbox)
 {
@@ -7144,7 +7155,7 @@ lpfc_post_rq_buffer(struct lpfc_hba *phba, struct lpfc_queue *hrq,
 int
 lpfc_sli4_hba_setup(struct lpfc_hba *phba)
 {
-	int rc, i, cnt;
+	int rc, i, cnt, len;
 	LPFC_MBOXQ_t *mboxq;
 	struct lpfc_mqe *mqe;
 	uint8_t *vpd;
@@ -7517,24 +7528,26 @@ lpfc_sli4_hba_setup(struct lpfc_hba *phba)
 		/* We need 1 iocbq for every SGL, for IO processing */
 		cnt += phba->sli4_hba.nvmet_xri_cnt;
 	} else {
-		/* update host scsi xri-sgl sizes and mappings */
-		rc = lpfc_sli4_scsi_sgl_update(phba);
+		/* update host common xri-sgl sizes and mappings */
+		rc = lpfc_sli4_io_sgl_update(phba);
 		if (unlikely(rc)) {
 			lpfc_printf_log(phba, KERN_ERR, LOG_MBOX | LOG_SLI,
-					"6309 Failed to update scsi-sgl size "
+					"6082 Failed to update nvme-sgl size "
 					"and mapping: %d\n", rc);
 			goto out_destroy_queue;
 		}
 
-		/* update host nvme xri-sgl sizes and mappings */
-		rc = lpfc_sli4_nvme_sgl_update(phba);
+		/* register the allocated common sgl pool to the port */
+		rc = lpfc_sli4_repost_io_sgl_list(phba);
 		if (unlikely(rc)) {
 			lpfc_printf_log(phba, KERN_ERR, LOG_MBOX | LOG_SLI,
-					"6082 Failed to update nvme-sgl size "
-					"and mapping: %d\n", rc);
+					"6116 Error %d during nvme sgl post "
+					"operation\n", rc);
+			/* Some NVME buffers were moved to abort nvme list */
+			/* A pci function reset will repost them */
+			rc = -ENODEV;
 			goto out_destroy_queue;
 		}
-
 		cnt = phba->cfg_iocb_cnt * 1024;
 	}
 
@@ -7571,36 +7584,6 @@ lpfc_sli4_hba_setup(struct lpfc_hba *phba)
 		}
 	}
 
-	if (phba->cfg_enable_fc4_type & LPFC_ENABLE_FCP) {
-		/* register the allocated scsi sgl pool to the port */
-		rc = lpfc_sli4_repost_scsi_sgl_list(phba);
-		if (unlikely(rc)) {
-			lpfc_printf_log(phba, KERN_ERR, LOG_MBOX | LOG_SLI,
-					"0383 Error %d during scsi sgl post "
-					"operation\n", rc);
-			/* Some Scsi buffers were moved to abort scsi list */
-			/* A pci function reset will repost them */
-			rc = -ENODEV;
-			goto out_destroy_queue;
-		}
-	}
-
-	if ((phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME) &&
-	    (phba->nvmet_support == 0)) {
-
-		/* register the allocated nvme sgl pool to the port */
-		rc = lpfc_repost_nvme_sgl_list(phba);
-		if (unlikely(rc)) {
-			lpfc_printf_log(phba, KERN_ERR, LOG_MBOX | LOG_SLI,
-					"6116 Error %d during nvme sgl post "
-					"operation\n", rc);
-			/* Some NVME buffers were moved to abort nvme list */
-			/* A pci function reset will repost them */
-			rc = -ENODEV;
-			goto out_destroy_queue;
-		}
-	}
-
 	/* Post the rpi header region to the device. */
 	rc = lpfc_sli4_post_all_rpi_hdrs(phba);
 	if (unlikely(rc)) {
@@ -7650,6 +7633,25 @@ lpfc_sli4_hba_setup(struct lpfc_hba *phba)
 		lpfc_sli_read_link_ste(phba);
 	}
 
+	/* Don't post more new bufs if repost already recovered
+	 * the nvme sgls.
+	 */
+	if (phba->nvmet_support == 0) {
+		if (phba->sli4_hba.io_xri_cnt == 0) {
+			len = lpfc_new_io_buf(
+					      phba, phba->sli4_hba.io_xri_max);
+			if (len == 0) {
+				rc = -ENOMEM;
+				goto out_unset_queue;
+			}
+
+			if (phba->cfg_xri_rebalancing)
+				lpfc_create_multixri_pools(phba);
+		}
+	} else {
+		phba->cfg_xri_rebalancing = 0;
+	}
+
 	/* Arm the CQs and then EQs on device */
 	lpfc_sli4_arm_cqeq_intr(phba);
 
@@ -7678,6 +7680,11 @@ lpfc_sli4_hba_setup(struct lpfc_hba *phba)
 	phba->hb_outstanding = 0;
 	phba->last_completion_time = jiffies;
 
+	/* start eq_delay heartbeat */
+	if (phba->cfg_auto_imax)
+		queue_delayed_work(phba->wq, &phba->eq_delay_work,
+				   msecs_to_jiffies(LPFC_EQ_DELAY_MSECS));
+
 	/* Start error attention (ERATT) polling timer */
 	mod_timer(&phba->eratt_poll,
 		  jiffies + msecs_to_jiffies(1000 * phba->eratt_poll_interval));
@@ -7729,18 +7736,21 @@ lpfc_sli4_hba_setup(struct lpfc_hba *phba)
 			lpfc_printf_log(phba, KERN_ERR, LOG_INIT | LOG_SLI,
 					"3104 Adapter failed to issue "
 					"DOWN_LINK mbox cmd, rc:x%x\n", rc);
-			goto out_unset_queue;
+			goto out_io_buff_free;
 		}
 	} else if (phba->cfg_suppress_link_up == LPFC_INITIALIZE_LINK) {
 		/* don't perform init_link on SLI4 FC port loopback test */
 		if (!(phba->link_flag & LS_LOOPBACK_MODE)) {
 			rc = phba->lpfc_hba_init_link(phba, MBX_NOWAIT);
 			if (rc)
-				goto out_unset_queue;
+				goto out_io_buff_free;
 		}
 	}
 	mempool_free(mboxq, phba->mbox_mem_pool);
 	return rc;
+out_io_buff_free:
+	/* Free allocated IO Buffers */
+	lpfc_io_free(phba);
 out_unset_queue:
 	/* Unset all the queues set up in this routine when error out */
 	lpfc_sli4_queue_unset(phba);
@@ -7846,7 +7856,6 @@ lpfc_sli4_process_missed_mbox_completions(struct lpfc_hba *phba)
 	struct lpfc_sli4_hba *sli4_hba = &phba->sli4_hba;
 	uint32_t eqidx;
 	struct lpfc_queue *fpeq = NULL;
-	struct lpfc_eqe *eqe;
 	bool mbox_pending;
 
 	if (unlikely(!phba) || (phba->sli_rev != LPFC_SLI_REV4))
@@ -7854,11 +7863,11 @@ lpfc_sli4_process_missed_mbox_completions(struct lpfc_hba *phba)
 
 	/* Find the eq associated with the mcq */
 
-	if (sli4_hba->hba_eq)
-		for (eqidx = 0; eqidx < phba->io_channel_irqs; eqidx++)
-			if (sli4_hba->hba_eq[eqidx]->queue_id ==
+	if (sli4_hba->hdwq)
+		for (eqidx = 0; eqidx < phba->cfg_irq_chann; eqidx++)
+			if (sli4_hba->hdwq[eqidx].hba_eq->queue_id ==
 			    sli4_hba->mbx_cq->assoc_qid) {
-				fpeq = sli4_hba->hba_eq[eqidx];
+				fpeq = sli4_hba->hdwq[eqidx].hba_eq;
 				break;
 			}
 	if (!fpeq)
@@ -7880,14 +7889,11 @@ lpfc_sli4_process_missed_mbox_completions(struct lpfc_hba *phba)
 	 */
 
 	if (mbox_pending)
-		while ((eqe = lpfc_sli4_eq_get(fpeq))) {
-			lpfc_sli4_hba_handle_eqe(phba, eqe, eqidx);
-			fpeq->EQ_processed++;
-		}
-
-	/* Always clear and re-arm the EQ */
-
-	sli4_hba->sli4_eq_release(fpeq, LPFC_QUEUE_REARM);
+		/* process and rearm the EQ */
+		lpfc_sli4_process_eq(phba, fpeq);
+	else
+		/* Always clear and re-arm the EQ */
+		sli4_hba->sli4_write_eq_db(phba, fpeq, 0, LPFC_QUEUE_REARM);
 
 	return mbox_pending;
 
@@ -8557,7 +8563,6 @@ lpfc_sli4_post_sync_mbox(struct lpfc_hba *phba, LPFC_MBOXQ_t *mboxq)
 	rc = lpfc_sli4_wait_bmbx_ready(phba, mboxq);
 	if (rc)
 		goto exit;
-
 	/*
 	 * Initialize the bootstrap memory region to avoid stale data areas
 	 * in the mailbox post.  Then copy the caller's mailbox contents to
@@ -9476,7 +9481,7 @@ lpfc_sli4_iocb2wqe(struct lpfc_hba *phba, struct lpfc_iocbq *iocbq,
 			bf_set(wqe_pbde, &wqe->fcp_iwrite.wqe_com, 0);
 
 		if (phba->fcp_embed_io) {
-			struct lpfc_scsi_buf *lpfc_cmd;
+			struct lpfc_io_buf *lpfc_cmd;
 			struct sli4_sge *sgl;
 			struct fcp_cmnd *fcp_cmnd;
 			uint32_t *ptr;
@@ -9484,7 +9489,7 @@ lpfc_sli4_iocb2wqe(struct lpfc_hba *phba, struct lpfc_iocbq *iocbq,
 			/* 128 byte wqe support here */
 
 			lpfc_cmd = iocbq->context1;
-			sgl = (struct sli4_sge *)lpfc_cmd->fcp_bpl;
+			sgl = (struct sli4_sge *)lpfc_cmd->dma_sgl;
 			fcp_cmnd = lpfc_cmd->fcp_cmnd;
 
 			/* Word 0-2 - FCP_CMND */
@@ -9540,7 +9545,7 @@ lpfc_sli4_iocb2wqe(struct lpfc_hba *phba, struct lpfc_iocbq *iocbq,
 			bf_set(wqe_pbde, &wqe->fcp_iread.wqe_com, 0);
 
 		if (phba->fcp_embed_io) {
-			struct lpfc_scsi_buf *lpfc_cmd;
+			struct lpfc_io_buf *lpfc_cmd;
 			struct sli4_sge *sgl;
 			struct fcp_cmnd *fcp_cmnd;
 			uint32_t *ptr;
@@ -9548,7 +9553,7 @@ lpfc_sli4_iocb2wqe(struct lpfc_hba *phba, struct lpfc_iocbq *iocbq,
 			/* 128 byte wqe support here */
 
 			lpfc_cmd = iocbq->context1;
-			sgl = (struct sli4_sge *)lpfc_cmd->fcp_bpl;
+			sgl = (struct sli4_sge *)lpfc_cmd->dma_sgl;
 			fcp_cmnd = lpfc_cmd->fcp_cmnd;
 
 			/* Word 0-2 - FCP_CMND */
@@ -9597,7 +9602,7 @@ lpfc_sli4_iocb2wqe(struct lpfc_hba *phba, struct lpfc_iocbq *iocbq,
 		/* Note, word 10 is already initialized to 0 */
 
 		if (phba->fcp_embed_io) {
-			struct lpfc_scsi_buf *lpfc_cmd;
+			struct lpfc_io_buf *lpfc_cmd;
 			struct sli4_sge *sgl;
 			struct fcp_cmnd *fcp_cmnd;
 			uint32_t *ptr;
@@ -9605,7 +9610,7 @@ lpfc_sli4_iocb2wqe(struct lpfc_hba *phba, struct lpfc_iocbq *iocbq,
 			/* 128 byte wqe support here */
 
 			lpfc_cmd = iocbq->context1;
-			sgl = (struct sli4_sge *)lpfc_cmd->fcp_bpl;
+			sgl = (struct sli4_sge *)lpfc_cmd->dma_sgl;
 			fcp_cmnd = lpfc_cmd->fcp_cmnd;
 
 			/* Word 0-2 - FCP_CMND */
@@ -9864,10 +9869,7 @@ __lpfc_sli_issue_iocb_s4(struct lpfc_hba *phba, uint32_t ring_number,
 	/* Get the WQ */
 	if ((piocb->iocb_flag & LPFC_IO_FCP) ||
 	    (piocb->iocb_flag & LPFC_USE_FCPWQIDX)) {
-		if (!phba->cfg_fof || (!(piocb->iocb_flag & LPFC_IO_OAS)))
-			wq = phba->sli4_hba.fcp_wq[piocb->hba_wqidx];
-		else
-			wq = phba->sli4_hba.oas_wq;
+		wq = phba->sli4_hba.hdwq[piocb->hba_wqidx].fcp_wq;
 	} else {
 		wq = phba->sli4_hba.els_wq;
 	}
@@ -10001,29 +10003,20 @@ lpfc_sli_api_table_setup(struct lpfc_hba *phba, uint8_t dev_grp)
 struct lpfc_sli_ring *
 lpfc_sli4_calc_ring(struct lpfc_hba *phba, struct lpfc_iocbq *piocb)
 {
+	struct lpfc_io_buf *lpfc_cmd;
+
 	if (piocb->iocb_flag & (LPFC_IO_FCP | LPFC_USE_FCPWQIDX)) {
-		if (!(phba->cfg_fof) ||
-		    (!(piocb->iocb_flag & LPFC_IO_FOF))) {
-			if (unlikely(!phba->sli4_hba.fcp_wq))
-				return NULL;
-			/*
-			 * for abort iocb hba_wqidx should already
-			 * be setup based on what work queue we used.
-			 */
-			if (!(piocb->iocb_flag & LPFC_USE_FCPWQIDX)) {
-				piocb->hba_wqidx =
-					lpfc_sli4_scmd_to_wqidx_distr(phba,
-							      piocb->context1);
-				piocb->hba_wqidx = piocb->hba_wqidx %
-					phba->cfg_fcp_io_channel;
-			}
-			return phba->sli4_hba.fcp_wq[piocb->hba_wqidx]->pring;
-		} else {
-			if (unlikely(!phba->sli4_hba.oas_wq))
-				return NULL;
-			piocb->hba_wqidx = 0;
-			return phba->sli4_hba.oas_wq->pring;
+		if (unlikely(!phba->sli4_hba.hdwq))
+			return NULL;
+		/*
+		 * for abort iocb hba_wqidx should already
+		 * be setup based on what work queue we used.
+		 */
+		if (!(piocb->iocb_flag & LPFC_USE_FCPWQIDX)) {
+			lpfc_cmd = (struct lpfc_io_buf *)piocb->context1;
+			piocb->hba_wqidx = lpfc_cmd->hdwq_no;
 		}
+		return phba->sli4_hba.hdwq[piocb->hba_wqidx].fcp_wq->pring;
 	} else {
 		if (unlikely(!phba->sli4_hba.els_wq))
 			return NULL;
@@ -10049,12 +10042,9 @@ int
 lpfc_sli_issue_iocb(struct lpfc_hba *phba, uint32_t ring_number,
 		    struct lpfc_iocbq *piocb, uint32_t flag)
 {
-	struct lpfc_hba_eq_hdl *hba_eq_hdl;
 	struct lpfc_sli_ring *pring;
-	struct lpfc_queue *fpeq;
-	struct lpfc_eqe *eqe;
 	unsigned long iflags;
-	int rc, idx;
+	int rc;
 
 	if (phba->sli_rev == LPFC_SLI_REV4) {
 		pring = lpfc_sli4_calc_ring(phba, piocb);
@@ -10064,34 +10054,6 @@ lpfc_sli_issue_iocb(struct lpfc_hba *phba, uint32_t ring_number,
 		spin_lock_irqsave(&pring->ring_lock, iflags);
 		rc = __lpfc_sli_issue_iocb(phba, ring_number, piocb, flag);
 		spin_unlock_irqrestore(&pring->ring_lock, iflags);
-
-		if (lpfc_fcp_look_ahead && (piocb->iocb_flag &  LPFC_IO_FCP)) {
-			idx = piocb->hba_wqidx;
-			hba_eq_hdl = &phba->sli4_hba.hba_eq_hdl[idx];
-
-			if (atomic_dec_and_test(&hba_eq_hdl->hba_eq_in_use)) {
-
-				/* Get associated EQ with this index */
-				fpeq = phba->sli4_hba.hba_eq[idx];
-
-				/* Turn off interrupts from this EQ */
-				phba->sli4_hba.sli4_eq_clr_intr(fpeq);
-
-				/*
-				 * Process all the events on FCP EQ
-				 */
-				while ((eqe = lpfc_sli4_eq_get(fpeq))) {
-					lpfc_sli4_hba_handle_eqe(phba,
-						eqe, idx);
-					fpeq->EQ_processed++;
-				}
-
-				/* Always clear and re-arm the EQ */
-				phba->sli4_hba.sli4_eq_release(fpeq,
-					LPFC_QUEUE_REARM);
-			}
-			atomic_inc(&hba_eq_hdl->hba_eq_in_use);
-		}
 	} else {
 		/* For now, SLI2/3 will still use hbalock */
 		spin_lock_irqsave(&phba->hbalock, iflags);
@@ -10506,19 +10468,11 @@ lpfc_sli4_queue_init(struct lpfc_hba *phba)
 	INIT_LIST_HEAD(&psli->mboxq);
 	INIT_LIST_HEAD(&psli->mboxq_cmpl);
 	/* Initialize list headers for txq and txcmplq as double linked lists */
-	for (i = 0; i < phba->cfg_fcp_io_channel; i++) {
-		pring = phba->sli4_hba.fcp_wq[i]->pring;
-		pring->flag = 0;
-		pring->ringno = LPFC_FCP_RING;
-		INIT_LIST_HEAD(&pring->txq);
-		INIT_LIST_HEAD(&pring->txcmplq);
-		INIT_LIST_HEAD(&pring->iocb_continueq);
-		spin_lock_init(&pring->ring_lock);
-	}
-	for (i = 0; i < phba->cfg_nvme_io_channel; i++) {
-		pring = phba->sli4_hba.nvme_wq[i]->pring;
+	for (i = 0; i < phba->cfg_hdw_queue; i++) {
+		pring = phba->sli4_hba.hdwq[i].fcp_wq->pring;
 		pring->flag = 0;
 		pring->ringno = LPFC_FCP_RING;
+		pring->txcmplq_cnt = 0;
 		INIT_LIST_HEAD(&pring->txq);
 		INIT_LIST_HEAD(&pring->txcmplq);
 		INIT_LIST_HEAD(&pring->iocb_continueq);
@@ -10527,25 +10481,27 @@ lpfc_sli4_queue_init(struct lpfc_hba *phba)
 	pring = phba->sli4_hba.els_wq->pring;
 	pring->flag = 0;
 	pring->ringno = LPFC_ELS_RING;
+	pring->txcmplq_cnt = 0;
 	INIT_LIST_HEAD(&pring->txq);
 	INIT_LIST_HEAD(&pring->txcmplq);
 	INIT_LIST_HEAD(&pring->iocb_continueq);
 	spin_lock_init(&pring->ring_lock);
 
-	if (phba->cfg_nvme_io_channel) {
+	if (phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME) {
+		for (i = 0; i < phba->cfg_hdw_queue; i++) {
+			pring = phba->sli4_hba.hdwq[i].nvme_wq->pring;
+			pring->flag = 0;
+			pring->ringno = LPFC_FCP_RING;
+			pring->txcmplq_cnt = 0;
+			INIT_LIST_HEAD(&pring->txq);
+			INIT_LIST_HEAD(&pring->txcmplq);
+			INIT_LIST_HEAD(&pring->iocb_continueq);
+			spin_lock_init(&pring->ring_lock);
+		}
 		pring = phba->sli4_hba.nvmels_wq->pring;
 		pring->flag = 0;
 		pring->ringno = LPFC_ELS_RING;
-		INIT_LIST_HEAD(&pring->txq);
-		INIT_LIST_HEAD(&pring->txcmplq);
-		INIT_LIST_HEAD(&pring->iocb_continueq);
-		spin_lock_init(&pring->ring_lock);
-	}
-
-	if (phba->cfg_fof) {
-		pring = phba->sli4_hba.oas_wq->pring;
-		pring->flag = 0;
-		pring->ringno = LPFC_FCP_RING;
+		pring->txcmplq_cnt = 0;
 		INIT_LIST_HEAD(&pring->txq);
 		INIT_LIST_HEAD(&pring->txcmplq);
 		INIT_LIST_HEAD(&pring->iocb_continueq);
@@ -11327,6 +11283,7 @@ lpfc_sli4_abort_nvme_io(struct lpfc_hba *phba, struct lpfc_sli_ring *pring,
 	struct lpfc_iocbq *abtsiocbp;
 	union lpfc_wqe128 *abts_wqe;
 	int retval;
+	int idx = cmdiocb->hba_wqidx;
 
 	/*
 	 * There are certain command types we don't want to abort.  And we
@@ -11382,7 +11339,8 @@ lpfc_sli4_abort_nvme_io(struct lpfc_hba *phba, struct lpfc_sli_ring *pring,
 	abtsiocbp->iocb_flag |= LPFC_IO_NVME;
 	abtsiocbp->vport = vport;
 	abtsiocbp->wqe_cmpl = lpfc_nvme_abort_fcreq_cmpl;
-	retval = lpfc_sli4_issue_wqe(phba, LPFC_FCP_RING, abtsiocbp);
+	retval = lpfc_sli4_issue_wqe(phba, &phba->sli4_hba.hdwq[idx],
+				     abtsiocbp);
 	if (retval) {
 		lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME,
 				 "6147 Failed abts issue_wqe with status x%x "
@@ -11457,7 +11415,7 @@ lpfc_sli_validate_fcp_iocb(struct lpfc_iocbq *iocbq, struct lpfc_vport *vport,
 			   uint16_t tgt_id, uint64_t lun_id,
 			   lpfc_ctx_cmd ctx_cmd)
 {
-	struct lpfc_scsi_buf *lpfc_cmd;
+	struct lpfc_io_buf *lpfc_cmd;
 	int rc = 1;
 
 	if (iocbq->vport != vport)
@@ -11467,7 +11425,7 @@ lpfc_sli_validate_fcp_iocb(struct lpfc_iocbq *iocbq, struct lpfc_vport *vport,
 	    !(iocbq->iocb_flag & LPFC_IO_ON_TXCMPLQ))
 		return rc;
 
-	lpfc_cmd = container_of(iocbq, struct lpfc_scsi_buf, cur_iocbq);
+	lpfc_cmd = container_of(iocbq, struct lpfc_io_buf, cur_iocbq);
 
 	if (lpfc_cmd->pCmd == NULL)
 		return rc;
@@ -11694,14 +11652,14 @@ lpfc_sli_abort_taskmgmt(struct lpfc_vport *vport, struct lpfc_sli_ring *pring,
 			uint16_t tgt_id, uint64_t lun_id, lpfc_ctx_cmd cmd)
 {
 	struct lpfc_hba *phba = vport->phba;
-	struct lpfc_scsi_buf *lpfc_cmd;
+	struct lpfc_io_buf *lpfc_cmd;
 	struct lpfc_iocbq *abtsiocbq;
 	struct lpfc_nodelist *ndlp;
 	struct lpfc_iocbq *iocbq;
 	IOCB_t *icmd;
 	int sum, i, ret_val;
 	unsigned long iflags;
-	struct lpfc_sli_ring *pring_s4;
+	struct lpfc_sli_ring *pring_s4 = NULL;
 
 	spin_lock_irqsave(&phba->hbalock, iflags);
 
@@ -11719,17 +11677,46 @@ lpfc_sli_abort_taskmgmt(struct lpfc_vport *vport, struct lpfc_sli_ring *pring,
 					       cmd) != 0)
 			continue;
 
+		/* Guard against IO completion being called at same time */
+		lpfc_cmd = container_of(iocbq, struct lpfc_io_buf, cur_iocbq);
+		spin_lock(&lpfc_cmd->buf_lock);
+
+		if (!lpfc_cmd->pCmd) {
+			spin_unlock(&lpfc_cmd->buf_lock);
+			continue;
+		}
+
+		if (phba->sli_rev == LPFC_SLI_REV4) {
+			pring_s4 =
+			    phba->sli4_hba.hdwq[iocbq->hba_wqidx].fcp_wq->pring;
+			if (!pring_s4) {
+				spin_unlock(&lpfc_cmd->buf_lock);
+				continue;
+			}
+			/* Note: both hbalock and ring_lock must be set here */
+			spin_lock(&pring_s4->ring_lock);
+		}
+
 		/*
 		 * If the iocbq is already being aborted, don't take a second
 		 * action, but do count it.
 		 */
-		if (iocbq->iocb_flag & LPFC_DRIVER_ABORTED)
+		if ((iocbq->iocb_flag & LPFC_DRIVER_ABORTED) ||
+		    !(iocbq->iocb_flag & LPFC_IO_ON_TXCMPLQ)) {
+			if (phba->sli_rev == LPFC_SLI_REV4)
+				spin_unlock(&pring_s4->ring_lock);
+			spin_unlock(&lpfc_cmd->buf_lock);
 			continue;
+		}
 
 		/* issue ABTS for this IOCB based on iotag */
 		abtsiocbq = __lpfc_sli_get_iocbq(phba);
-		if (abtsiocbq == NULL)
+		if (!abtsiocbq) {
+			if (phba->sli_rev == LPFC_SLI_REV4)
+				spin_unlock(&pring_s4->ring_lock);
+			spin_unlock(&lpfc_cmd->buf_lock);
 			continue;
+		}
 
 		icmd = &iocbq->iocb;
 		abtsiocbq->iocb.un.acxri.abortType = ABORT_TYPE_ABTS;
@@ -11750,7 +11737,6 @@ lpfc_sli_abort_taskmgmt(struct lpfc_vport *vport, struct lpfc_sli_ring *pring,
 		if (iocbq->iocb_flag & LPFC_IO_FOF)
 			abtsiocbq->iocb_flag |= LPFC_IO_FOF;
 
-		lpfc_cmd = container_of(iocbq, struct lpfc_scsi_buf, cur_iocbq);
 		ndlp = lpfc_cmd->rdata->pnode;
 
 		if (lpfc_is_link_up(phba) &&
@@ -11769,11 +11755,6 @@ lpfc_sli_abort_taskmgmt(struct lpfc_vport *vport, struct lpfc_sli_ring *pring,
 		iocbq->iocb_flag |= LPFC_DRIVER_ABORTED;
 
 		if (phba->sli_rev == LPFC_SLI_REV4) {
-			pring_s4 = lpfc_sli4_calc_ring(phba, abtsiocbq);
-			if (!pring_s4)
-				continue;
-			/* Note: both hbalock and ring_lock must be set here */
-			spin_lock(&pring_s4->ring_lock);
 			ret_val = __lpfc_sli_issue_iocb(phba, pring_s4->ringno,
 							abtsiocbq, 0);
 			spin_unlock(&pring_s4->ring_lock);
@@ -11782,6 +11763,7 @@ lpfc_sli_abort_taskmgmt(struct lpfc_vport *vport, struct lpfc_sli_ring *pring,
 							abtsiocbq, 0);
 		}
 
+		spin_unlock(&lpfc_cmd->buf_lock);
 
 		if (ret_val == IOCB_ERROR)
 			__lpfc_sli_release_iocbq(phba, abtsiocbq);
@@ -11816,7 +11798,7 @@ lpfc_sli_wake_iocb_wait(struct lpfc_hba *phba,
 {
 	wait_queue_head_t *pdone_q;
 	unsigned long iflags;
-	struct lpfc_scsi_buf *lpfc_cmd;
+	struct lpfc_io_buf *lpfc_cmd;
 
 	spin_lock_irqsave(&phba->hbalock, iflags);
 	if (cmdiocbq->iocb_flag & LPFC_IO_WAKE_TMO) {
@@ -11845,7 +11827,7 @@ lpfc_sli_wake_iocb_wait(struct lpfc_hba *phba,
 	/* Set the exchange busy flag for task management commands */
 	if ((cmdiocbq->iocb_flag & LPFC_IO_FCP) &&
 		!(cmdiocbq->iocb_flag & LPFC_IO_LIBDFC)) {
-		lpfc_cmd = container_of(cmdiocbq, struct lpfc_scsi_buf,
+		lpfc_cmd = container_of(cmdiocbq, struct lpfc_io_buf,
 			cur_iocbq);
 		lpfc_cmd->exch_busy = rspiocbq->iocb_flag & LPFC_EXCHANGE_BUSY;
 	}
@@ -12919,35 +12901,6 @@ lpfc_sli_intr_handler(int irq, void *dev_id)
 }  /* lpfc_sli_intr_handler */
 
 /**
- * lpfc_sli4_fcp_xri_abort_event_proc - Process fcp xri abort event
- * @phba: pointer to lpfc hba data structure.
- *
- * This routine is invoked by the worker thread to process all the pending
- * SLI4 FCP abort XRI events.
- **/
-void lpfc_sli4_fcp_xri_abort_event_proc(struct lpfc_hba *phba)
-{
-	struct lpfc_cq_event *cq_event;
-
-	/* First, declare the fcp xri abort event has been handled */
-	spin_lock_irq(&phba->hbalock);
-	phba->hba_flag &= ~FCP_XRI_ABORT_EVENT;
-	spin_unlock_irq(&phba->hbalock);
-	/* Now, handle all the fcp xri abort events */
-	while (!list_empty(&phba->sli4_hba.sp_fcp_xri_aborted_work_queue)) {
-		/* Get the first event from the head of the event queue */
-		spin_lock_irq(&phba->hbalock);
-		list_remove_head(&phba->sli4_hba.sp_fcp_xri_aborted_work_queue,
-				 cq_event, struct lpfc_cq_event, list);
-		spin_unlock_irq(&phba->hbalock);
-		/* Notify aborted XRI for FCP work queue */
-		lpfc_sli4_fcp_xri_aborted(phba, &cq_event->cqe.wcqe_axri);
-		/* Free the event processed back to the free pool */
-		lpfc_sli4_cq_event_release(phba, cq_event);
-	}
-}
-
-/**
  * lpfc_sli4_els_xri_abort_event_proc - Process els xri abort event
  * @phba: pointer to lpfc hba data structure.
  *
@@ -13320,11 +13273,14 @@ out_no_mqe_complete:
  * Return: true if work posted to worker thread, otherwise false.
  **/
 static bool
-lpfc_sli4_sp_handle_mcqe(struct lpfc_hba *phba, struct lpfc_cqe *cqe)
+lpfc_sli4_sp_handle_mcqe(struct lpfc_hba *phba, struct lpfc_queue *cq,
+			 struct lpfc_cqe *cqe)
 {
 	struct lpfc_mcqe mcqe;
 	bool workposted;
 
+	cq->CQ_mbox++;
+
 	/* Copy the mailbox MCQE and convert endian order as needed */
 	lpfc_sli4_pcimem_bcopy(cqe, &mcqe, sizeof(struct lpfc_mcqe));
 
@@ -13443,17 +13399,8 @@ lpfc_sli4_sp_handle_abort_xri_wcqe(struct lpfc_hba *phba,
 
 	switch (cq->subtype) {
 	case LPFC_FCP:
-		cq_event = lpfc_cq_event_setup(
-			phba, wcqe, sizeof(struct sli4_wcqe_xri_aborted));
-		if (!cq_event)
-			return false;
-		spin_lock_irqsave(&phba->hbalock, iflags);
-		list_add_tail(&cq_event->list,
-			      &phba->sli4_hba.sp_fcp_xri_aborted_work_queue);
-		/* Set the fcp xri abort event flag */
-		phba->hba_flag |= FCP_XRI_ABORT_EVENT;
-		spin_unlock_irqrestore(&phba->hbalock, iflags);
-		workposted = true;
+		lpfc_sli4_fcp_xri_aborted(phba, wcqe, cq->hdwq);
+		workposted = false;
 		break;
 	case LPFC_NVME_LS: /* NVME LS uses ELS resources */
 	case LPFC_ELS:
@@ -13461,6 +13408,7 @@ lpfc_sli4_sp_handle_abort_xri_wcqe(struct lpfc_hba *phba,
 			phba, wcqe, sizeof(struct sli4_wcqe_xri_aborted));
 		if (!cq_event)
 			return false;
+		cq_event->hdwq = cq->hdwq;
 		spin_lock_irqsave(&phba->hbalock, iflags);
 		list_add_tail(&cq_event->list,
 			      &phba->sli4_hba.sp_els_xri_aborted_work_queue);
@@ -13474,7 +13422,7 @@ lpfc_sli4_sp_handle_abort_xri_wcqe(struct lpfc_hba *phba,
 		if (phba->nvmet_support)
 			lpfc_sli4_nvmet_xri_aborted(phba, wcqe);
 		else
-			lpfc_sli4_nvme_xri_aborted(phba, wcqe);
+			lpfc_sli4_nvme_xri_aborted(phba, wcqe, cq->hdwq);
 
 		workposted = false;
 		break;
@@ -13592,7 +13540,7 @@ out:
  * lpfc_sli4_sp_handle_cqe - Process a slow path completion queue entry
  * @phba: Pointer to HBA context object.
  * @cq: Pointer to the completion queue.
- * @wcqe: Pointer to a completion queue entry.
+ * @cqe: Pointer to a completion queue entry.
  *
  * This routine process a slow-path work-queue or receive queue completion queue
  * entry.
@@ -13684,7 +13632,7 @@ lpfc_sli4_sp_handle_eqe(struct lpfc_hba *phba, struct lpfc_eqe *eqe,
 	/* Save EQ associated with this CQ */
 	cq->assoc_qp = speq;
 
-	if (!queue_work(phba->wq, &cq->spwork))
+	if (!queue_work_on(cq->chann, phba->wq, &cq->spwork))
 		lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
 				"0390 Cannot schedule soft IRQ "
 				"for CQ eqcqid=%d, cqid=%d on CPU %d\n",
@@ -13692,60 +13640,129 @@ lpfc_sli4_sp_handle_eqe(struct lpfc_hba *phba, struct lpfc_eqe *eqe,
 }
 
 /**
- * lpfc_sli4_sp_process_cq - Process a slow-path event queue entry
+ * __lpfc_sli4_process_cq - Process elements of a CQ
  * @phba: Pointer to HBA context object.
+ * @cq: Pointer to CQ to be processed
+ * @handler: Routine to process each cqe
+ * @delay: Pointer to usdelay to set in case of rescheduling of the handler
  *
- * This routine process a event queue entry from the slow-path event queue.
- * It will check the MajorCode and MinorCode to determine this is for a
- * completion event on a completion queue, if not, an error shall be logged
- * and just return. Otherwise, it will get to the corresponding completion
- * queue and process all the entries on that completion queue, rearm the
- * completion queue, and then return.
+ * This routine processes completion queue entries in a CQ. While a valid
+ * queue element is found, the handler is called. During processing checks
+ * are made for periodic doorbell writes to let the hardware know of
+ * element consumption.
+ *
+ * If the max limit on cqes to process is hit, or there are no more valid
+ * entries, the loop stops. If we processed a sufficient number of elements,
+ * meaning there is sufficient load, rather than rearming and generating
+ * another interrupt, a cq rescheduling delay will be set. A delay of 0
+ * indicates no rescheduling.
  *
+ * Returns True if work scheduled, False otherwise.
  **/
-static void
-lpfc_sli4_sp_process_cq(struct work_struct *work)
+static bool
+__lpfc_sli4_process_cq(struct lpfc_hba *phba, struct lpfc_queue *cq,
+	bool (*handler)(struct lpfc_hba *, struct lpfc_queue *,
+			struct lpfc_cqe *), unsigned long *delay)
 {
-	struct lpfc_queue *cq =
-		container_of(work, struct lpfc_queue, spwork);
-	struct lpfc_hba *phba = cq->phba;
 	struct lpfc_cqe *cqe;
 	bool workposted = false;
-	int ccount = 0;
+	int count = 0, consumed = 0;
+	bool arm = true;
+
+	/* default - no reschedule */
+	*delay = 0;
+
+	if (cmpxchg(&cq->queue_claimed, 0, 1) != 0)
+		goto rearm_and_exit;
 
 	/* Process all the entries to the CQ */
+	cqe = lpfc_sli4_cq_get(cq);
+	while (cqe) {
+#if defined(CONFIG_SCSI_LPFC_DEBUG_FS) && defined(BUILD_NVME)
+		if (phba->ktime_on)
+			cq->isr_timestamp = ktime_get_ns();
+		else
+			cq->isr_timestamp = 0;
+#endif
+		workposted |= handler(phba, cq, cqe);
+		__lpfc_sli4_consume_cqe(phba, cq, cqe);
+
+		consumed++;
+		if (!(++count % cq->max_proc_limit))
+			break;
+
+		if (!(count % cq->notify_interval)) {
+			phba->sli4_hba.sli4_write_cq_db(phba, cq, consumed,
+						LPFC_QUEUE_NOARM);
+			consumed = 0;
+		}
+
+		cqe = lpfc_sli4_cq_get(cq);
+	}
+	if (count >= phba->cfg_cq_poll_threshold) {
+		*delay = 1;
+		arm = false;
+	}
+
+	/* Track the max number of CQEs processed in 1 EQ */
+	if (count > cq->CQ_max_cqe)
+		cq->CQ_max_cqe = count;
+
+	cq->assoc_qp->EQ_cqe_cnt += count;
+
+	/* Catch the no cq entry condition */
+	if (unlikely(count == 0))
+		lpfc_printf_log(phba, KERN_INFO, LOG_SLI,
+				"0369 No entry from completion queue "
+				"qid=%d\n", cq->queue_id);
+
+	cq->queue_claimed = 0;
+
+rearm_and_exit:
+	phba->sli4_hba.sli4_write_cq_db(phba, cq, consumed,
+			arm ?  LPFC_QUEUE_REARM : LPFC_QUEUE_NOARM);
+
+	return workposted;
+}
+
+/**
+ * lpfc_sli4_sp_process_cq - Process a slow-path event queue entry
+ * @cq: pointer to CQ to process
+ *
+ * This routine calls the cq processing routine with a handler specific
+ * to the type of queue bound to it.
+ *
+ * The CQ routine returns two values: the first is the calling status,
+ * which indicates whether work was queued to the  background discovery
+ * thread. If true, the routine should wakeup the discovery thread;
+ * the second is the delay parameter. If non-zero, rather than rearming
+ * the CQ and yet another interrupt, the CQ handler should be queued so
+ * that it is processed in a subsequent polling action. The value of
+ * the delay indicates when to reschedule it.
+ **/
+static void
+__lpfc_sli4_sp_process_cq(struct lpfc_queue *cq)
+{
+	struct lpfc_hba *phba = cq->phba;
+	unsigned long delay;
+	bool workposted = false;
+
+	/* Process and rearm the CQ */
 	switch (cq->type) {
 	case LPFC_MCQ:
-		while ((cqe = lpfc_sli4_cq_get(cq))) {
-			workposted |= lpfc_sli4_sp_handle_mcqe(phba, cqe);
-			if (!(++ccount % cq->entry_repost))
-				break;
-			cq->CQ_mbox++;
-		}
+		workposted |= __lpfc_sli4_process_cq(phba, cq,
+						lpfc_sli4_sp_handle_mcqe,
+						&delay);
 		break;
 	case LPFC_WCQ:
-		while ((cqe = lpfc_sli4_cq_get(cq))) {
-			if (cq->subtype == LPFC_FCP ||
-			    cq->subtype == LPFC_NVME) {
-#ifdef CONFIG_SCSI_LPFC_DEBUG_FS
-				if (phba->ktime_on)
-					cq->isr_timestamp = ktime_get_ns();
-				else
-					cq->isr_timestamp = 0;
-#endif
-				workposted |= lpfc_sli4_fp_handle_cqe(phba, cq,
-								       cqe);
-			} else {
-				workposted |= lpfc_sli4_sp_handle_cqe(phba, cq,
-								      cqe);
-			}
-			if (!(++ccount % cq->entry_repost))
-				break;
-		}
-
-		/* Track the max number of CQEs processed in 1 EQ */
-		if (ccount > cq->CQ_max_cqe)
-			cq->CQ_max_cqe = ccount;
+		if (cq->subtype == LPFC_FCP || cq->subtype == LPFC_NVME)
+			workposted |= __lpfc_sli4_process_cq(phba, cq,
+						lpfc_sli4_fp_handle_cqe,
+						&delay);
+		else
+			workposted |= __lpfc_sli4_process_cq(phba, cq,
+						lpfc_sli4_sp_handle_cqe,
+						&delay);
 		break;
 	default:
 		lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
@@ -13754,14 +13771,14 @@ lpfc_sli4_sp_process_cq(struct work_struct *work)
 		return;
 	}
 
-	/* Catch the no cq entry condition, log an error */
-	if (unlikely(ccount == 0))
-		lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
-				"0371 No entry from the CQ: identifier "
-				"(x%x), type (%d)\n", cq->queue_id, cq->type);
-
-	/* In any case, flash and re-arm the RCQ */
-	phba->sli4_hba.sli4_cq_release(cq, LPFC_QUEUE_REARM);
+	if (delay) {
+		if (!queue_delayed_work_on(cq->chann, phba->wq,
+					   &cq->sched_spwork, delay))
+			lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
+				"0394 Cannot schedule soft IRQ "
+				"for cqid=%d on CPU %d\n",
+				cq->queue_id, cq->chann);
+	}
 
 	/* wake up worker thread if there are works to be done */
 	if (workposted)
@@ -13769,6 +13786,36 @@ lpfc_sli4_sp_process_cq(struct work_struct *work)
 }
 
 /**
+ * lpfc_sli4_sp_process_cq - slow-path work handler when started by
+ *   interrupt
+ * @work: pointer to work element
+ *
+ * translates from the work handler and calls the slow-path handler.
+ **/
+static void
+lpfc_sli4_sp_process_cq(struct work_struct *work)
+{
+	struct lpfc_queue *cq = container_of(work, struct lpfc_queue, spwork);
+
+	__lpfc_sli4_sp_process_cq(cq);
+}
+
+/**
+ * lpfc_sli4_dly_sp_process_cq - slow-path work handler when started by timer
+ * @work: pointer to work element
+ *
+ * translates from the work handler and calls the slow-path handler.
+ **/
+static void
+lpfc_sli4_dly_sp_process_cq(struct work_struct *work)
+{
+	struct lpfc_queue *cq = container_of(to_delayed_work(work),
+					struct lpfc_queue, sched_spwork);
+
+	__lpfc_sli4_sp_process_cq(cq);
+}
+
+/**
  * lpfc_sli4_fp_handle_fcp_wcqe - Process fast-path work queue completion entry
  * @phba: Pointer to HBA context object.
  * @cq: Pointer to associated CQ
@@ -13999,13 +14046,16 @@ out:
 
 /**
  * lpfc_sli4_fp_handle_cqe - Process fast-path work queue completion entry
+ * @phba: adapter with cq
  * @cq: Pointer to the completion queue.
  * @eqe: Pointer to fast-path completion queue entry.
  *
  * This routine process a fast-path work queue completion entry from fast-path
  * event queue for FCP command response completion.
+ *
+ * Return: true if work posted to worker thread, otherwise false.
  **/
-static int
+static bool
 lpfc_sli4_fp_handle_cqe(struct lpfc_hba *phba, struct lpfc_queue *cq,
 			 struct lpfc_cqe *cqe)
 {
@@ -14072,10 +14122,11 @@ lpfc_sli4_fp_handle_cqe(struct lpfc_hba *phba, struct lpfc_queue *cq,
  * completion queue, and then return.
  **/
 static void
-lpfc_sli4_hba_handle_eqe(struct lpfc_hba *phba, struct lpfc_eqe *eqe,
-			uint32_t qidx)
+lpfc_sli4_hba_handle_eqe(struct lpfc_hba *phba, struct lpfc_queue *eq,
+			 struct lpfc_eqe *eqe)
 {
 	struct lpfc_queue *cq = NULL;
+	uint32_t qidx = eq->hdwq;
 	uint16_t cqid, id;
 
 	if (unlikely(bf_get_le32(lpfc_eqe_major_code, eqe) != 0)) {
@@ -14090,6 +14141,14 @@ lpfc_sli4_hba_handle_eqe(struct lpfc_hba *phba, struct lpfc_eqe *eqe,
 	/* Get the reference to the corresponding CQ */
 	cqid = bf_get_le32(lpfc_eqe_resource_id, eqe);
 
+	/* Use the fast lookup method first */
+	if (cqid <= phba->sli4_hba.cq_max) {
+		cq = phba->sli4_hba.cq_lookup[cqid];
+		if (cq)
+			goto  work_cq;
+	}
+
+	/* Next check for NVMET completion */
 	if (phba->cfg_nvmet_mrq && phba->sli4_hba.nvmet_cqset) {
 		id = phba->sli4_hba.nvmet_cqset[0]->queue_id;
 		if ((cqid >= id) && (cqid < (id + phba->cfg_nvmet_mrq))) {
@@ -14099,20 +14158,6 @@ lpfc_sli4_hba_handle_eqe(struct lpfc_hba *phba, struct lpfc_eqe *eqe,
 		}
 	}
 
-	if (phba->sli4_hba.nvme_cq_map &&
-	    (cqid == phba->sli4_hba.nvme_cq_map[qidx])) {
-		/* Process NVME / NVMET command completion */
-		cq = phba->sli4_hba.nvme_cq[qidx];
-		goto  process_cq;
-	}
-
-	if (phba->sli4_hba.fcp_cq_map &&
-	    (cqid == phba->sli4_hba.fcp_cq_map[qidx])) {
-		/* Process FCP command completion */
-		cq = phba->sli4_hba.fcp_cq[qidx];
-		goto  process_cq;
-	}
-
 	if (phba->sli4_hba.nvmels_cq &&
 	    (cqid == phba->sli4_hba.nvmels_cq->queue_id)) {
 		/* Process NVME unsol rcv */
@@ -14121,7 +14166,8 @@ lpfc_sli4_hba_handle_eqe(struct lpfc_hba *phba, struct lpfc_eqe *eqe,
 
 	/* Otherwise this is a Slow path event */
 	if (cq == NULL) {
-		lpfc_sli4_sp_handle_eqe(phba, eqe, phba->sli4_hba.hba_eq[qidx]);
+		lpfc_sli4_sp_handle_eqe(phba, eqe,
+					phba->sli4_hba.hdwq[qidx].hba_eq);
 		return;
 	}
 
@@ -14134,10 +14180,8 @@ process_cq:
 		return;
 	}
 
-	/* Save EQ associated with this CQ */
-	cq->assoc_qp = phba->sli4_hba.hba_eq[qidx];
-
-	if (!queue_work(phba->wq, &cq->irqwork))
+work_cq:
+	if (!queue_work_on(cq->chann, phba->wq, &cq->irqwork))
 		lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
 				"0363 Cannot schedule soft IRQ "
 				"for CQ eqcqid=%d, cqid=%d on CPU %d\n",
@@ -14145,219 +14189,73 @@ process_cq:
 }
 
 /**
- * lpfc_sli4_hba_process_cq - Process a fast-path event queue entry
- * @phba: Pointer to HBA context object.
- * @eqe: Pointer to fast-path event queue entry.
+ * __lpfc_sli4_hba_process_cq - Process a fast-path event queue entry
+ * @cq: Pointer to CQ to be processed
  *
- * This routine process a event queue entry from the fast-path event queue.
- * It will check the MajorCode and MinorCode to determine this is for a
- * completion event on a completion queue, if not, an error shall be logged
- * and just return. Otherwise, it will get to the corresponding completion
- * queue and process all the entries on the completion queue, rearm the
- * completion queue, and then return.
+ * This routine calls the cq processing routine with the handler for
+ * fast path CQEs.
+ *
+ * The CQ routine returns two values: the first is the calling status,
+ * which indicates whether work was queued to the  background discovery
+ * thread. If true, the routine should wakeup the discovery thread;
+ * the second is the delay parameter. If non-zero, rather than rearming
+ * the CQ and yet another interrupt, the CQ handler should be queued so
+ * that it is processed in a subsequent polling action. The value of
+ * the delay indicates when to reschedule it.
  **/
 static void
-lpfc_sli4_hba_process_cq(struct work_struct *work)
+__lpfc_sli4_hba_process_cq(struct lpfc_queue *cq)
 {
-	struct lpfc_queue *cq =
-		container_of(work, struct lpfc_queue, irqwork);
 	struct lpfc_hba *phba = cq->phba;
-	struct lpfc_cqe *cqe;
+	unsigned long delay;
 	bool workposted = false;
-	int ccount = 0;
-
-	/* Process all the entries to the CQ */
-	while ((cqe = lpfc_sli4_cq_get(cq))) {
-#ifdef CONFIG_SCSI_LPFC_DEBUG_FS
-		if (phba->ktime_on)
-			cq->isr_timestamp = ktime_get_ns();
-		else
-			cq->isr_timestamp = 0;
-#endif
-		workposted |= lpfc_sli4_fp_handle_cqe(phba, cq, cqe);
-		if (!(++ccount % cq->entry_repost))
-			break;
-	}
 
-	/* Track the max number of CQEs processed in 1 EQ */
-	if (ccount > cq->CQ_max_cqe)
-		cq->CQ_max_cqe = ccount;
-	cq->assoc_qp->EQ_cqe_cnt += ccount;
+	/* process and rearm the CQ */
+	workposted |= __lpfc_sli4_process_cq(phba, cq, lpfc_sli4_fp_handle_cqe,
+					     &delay);
 
-	/* Catch the no cq entry condition */
-	if (unlikely(ccount == 0))
-		lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
-				"0369 No entry from fast-path completion "
-				"queue fcpcqid=%d\n", cq->queue_id);
-
-	/* In any case, flash and re-arm the CQ */
-	phba->sli4_hba.sli4_cq_release(cq, LPFC_QUEUE_REARM);
+	if (delay) {
+		if (!queue_delayed_work_on(cq->chann, phba->wq,
+					   &cq->sched_irqwork, delay))
+			lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
+				"0367 Cannot schedule soft IRQ "
+				"for cqid=%d on CPU %d\n",
+				cq->queue_id, cq->chann);
+	}
 
 	/* wake up worker thread if there are works to be done */
 	if (workposted)
 		lpfc_worker_wake_up(phba);
 }
 
-static void
-lpfc_sli4_eq_flush(struct lpfc_hba *phba, struct lpfc_queue *eq)
-{
-	struct lpfc_eqe *eqe;
-
-	/* walk all the EQ entries and drop on the floor */
-	while ((eqe = lpfc_sli4_eq_get(eq)))
-		;
-
-	/* Clear and re-arm the EQ */
-	phba->sli4_hba.sli4_eq_release(eq, LPFC_QUEUE_REARM);
-}
-
-
 /**
- * lpfc_sli4_fof_handle_eqe - Process a Flash Optimized Fabric event queue
- *			     entry
- * @phba: Pointer to HBA context object.
- * @eqe: Pointer to fast-path event queue entry.
+ * lpfc_sli4_hba_process_cq - fast-path work handler when started by
+ *   interrupt
+ * @work: pointer to work element
  *
- * This routine process a event queue entry from the Flash Optimized Fabric
- * event queue.  It will check the MajorCode and MinorCode to determine this
- * is for a completion event on a completion queue, if not, an error shall be
- * logged and just return. Otherwise, it will get to the corresponding
- * completion queue and process all the entries on the completion queue, rearm
- * the completion queue, and then return.
+ * translates from the work handler and calls the fast-path handler.
  **/
 static void
-lpfc_sli4_fof_handle_eqe(struct lpfc_hba *phba, struct lpfc_eqe *eqe)
+lpfc_sli4_hba_process_cq(struct work_struct *work)
 {
-	struct lpfc_queue *cq;
-	uint16_t cqid;
+	struct lpfc_queue *cq = container_of(work, struct lpfc_queue, irqwork);
 
-	if (unlikely(bf_get_le32(lpfc_eqe_major_code, eqe) != 0)) {
-		lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
-				"9147 Not a valid completion "
-				"event: majorcode=x%x, minorcode=x%x\n",
-				bf_get_le32(lpfc_eqe_major_code, eqe),
-				bf_get_le32(lpfc_eqe_minor_code, eqe));
-		return;
-	}
-
-	/* Get the reference to the corresponding CQ */
-	cqid = bf_get_le32(lpfc_eqe_resource_id, eqe);
-
-	/* Next check for OAS */
-	cq = phba->sli4_hba.oas_cq;
-	if (unlikely(!cq)) {
-		if (phba->sli.sli_flag & LPFC_SLI_ACTIVE)
-			lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
-					"9148 OAS completion queue "
-					"does not exist\n");
-		return;
-	}
-
-	if (unlikely(cqid != cq->queue_id)) {
-		lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
-				"9149 Miss-matched fast-path compl "
-				"queue id: eqcqid=%d, fcpcqid=%d\n",
-				cqid, cq->queue_id);
-		return;
-	}
-
-	/* Save EQ associated with this CQ */
-	cq->assoc_qp = phba->sli4_hba.fof_eq;
-
-	/* CQ work will be processed on CPU affinitized to this IRQ */
-	if (!queue_work(phba->wq, &cq->irqwork))
-		lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
-				"0367 Cannot schedule soft IRQ "
-				"for CQ eqcqid=%d, cqid=%d on CPU %d\n",
-				cqid, cq->queue_id, smp_processor_id());
+	__lpfc_sli4_hba_process_cq(cq);
 }
 
 /**
- * lpfc_sli4_fof_intr_handler - HBA interrupt handler to SLI-4 device
- * @irq: Interrupt number.
- * @dev_id: The device context pointer.
+ * lpfc_sli4_hba_process_cq - fast-path work handler when started by timer
+ * @work: pointer to work element
  *
- * This function is directly called from the PCI layer as an interrupt
- * service routine when device with SLI-4 interface spec is enabled with
- * MSI-X multi-message interrupt mode and there is a Flash Optimized Fabric
- * IOCB ring event in the HBA. However, when the device is enabled with either
- * MSI or Pin-IRQ interrupt mode, this function is called as part of the
- * device-level interrupt handler. When the PCI slot is in error recovery
- * or the HBA is undergoing initialization, the interrupt handler will not
- * process the interrupt. The Flash Optimized Fabric ring event are handled in
- * the intrrupt context. This function is called without any lock held.
- * It gets the hbalock to access and update SLI data structures. Note that,
- * the EQ to CQ are one-to-one map such that the EQ index is
- * equal to that of CQ index.
- *
- * This function returns IRQ_HANDLED when interrupt is handled else it
- * returns IRQ_NONE.
+ * translates from the work handler and calls the fast-path handler.
  **/
-irqreturn_t
-lpfc_sli4_fof_intr_handler(int irq, void *dev_id)
+static void
+lpfc_sli4_dly_hba_process_cq(struct work_struct *work)
 {
-	struct lpfc_hba *phba;
-	struct lpfc_hba_eq_hdl *hba_eq_hdl;
-	struct lpfc_queue *eq;
-	struct lpfc_eqe *eqe;
-	unsigned long iflag;
-	int ecount = 0;
-
-	/* Get the driver's phba structure from the dev_id */
-	hba_eq_hdl = (struct lpfc_hba_eq_hdl *)dev_id;
-	phba = hba_eq_hdl->phba;
-
-	if (unlikely(!phba))
-		return IRQ_NONE;
-
-	/* Get to the EQ struct associated with this vector */
-	eq = phba->sli4_hba.fof_eq;
-	if (unlikely(!eq))
-		return IRQ_NONE;
-
-	/* Check device state for handling interrupt */
-	if (unlikely(lpfc_intr_state_check(phba))) {
-		/* Check again for link_state with lock held */
-		spin_lock_irqsave(&phba->hbalock, iflag);
-		if (phba->link_state < LPFC_LINK_DOWN)
-			/* Flush, clear interrupt, and rearm the EQ */
-			lpfc_sli4_eq_flush(phba, eq);
-		spin_unlock_irqrestore(&phba->hbalock, iflag);
-		return IRQ_NONE;
-	}
-
-	/*
-	 * Process all the event on FCP fast-path EQ
-	 */
-	while ((eqe = lpfc_sli4_eq_get(eq))) {
-		lpfc_sli4_fof_handle_eqe(phba, eqe);
-		if (!(++ecount % eq->entry_repost))
-			break;
-		eq->EQ_processed++;
-	}
+	struct lpfc_queue *cq = container_of(to_delayed_work(work),
+					struct lpfc_queue, sched_irqwork);
 
-	/* Track the max number of EQEs processed in 1 intr */
-	if (ecount > eq->EQ_max_eqe)
-		eq->EQ_max_eqe = ecount;
-
-
-	if (unlikely(ecount == 0)) {
-		eq->EQ_no_entry++;
-
-		if (phba->intr_type == MSIX)
-			/* MSI-X treated interrupt served as no EQ share INT */
-			lpfc_printf_log(phba, KERN_WARNING, LOG_SLI,
-					"9145 MSI-X interrupt with no EQE\n");
-		else {
-			lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
-					"9146 ISR interrupt with no EQE\n");
-			/* Non MSI-X treated on interrupt as EQ share INT */
-			return IRQ_NONE;
-		}
-	}
-	/* Always clear and re-arm the fast-path EQ */
-	phba->sli4_hba.sli4_eq_release(eq, LPFC_QUEUE_REARM);
-	return IRQ_HANDLED;
+	__lpfc_sli4_hba_process_cq(cq);
 }
 
 /**
@@ -14392,10 +14290,11 @@ lpfc_sli4_hba_intr_handler(int irq, void *dev_id)
 	struct lpfc_hba *phba;
 	struct lpfc_hba_eq_hdl *hba_eq_hdl;
 	struct lpfc_queue *fpeq;
-	struct lpfc_eqe *eqe;
 	unsigned long iflag;
 	int ecount = 0;
 	int hba_eqidx;
+	struct lpfc_eq_intr_info *eqi;
+	uint32_t icnt;
 
 	/* Get the driver's phba structure from the dev_id */
 	hba_eq_hdl = (struct lpfc_hba_eq_hdl *)dev_id;
@@ -14404,23 +14303,14 @@ lpfc_sli4_hba_intr_handler(int irq, void *dev_id)
 
 	if (unlikely(!phba))
 		return IRQ_NONE;
-	if (unlikely(!phba->sli4_hba.hba_eq))
+	if (unlikely(!phba->sli4_hba.hdwq))
 		return IRQ_NONE;
 
 	/* Get to the EQ struct associated with this vector */
-	fpeq = phba->sli4_hba.hba_eq[hba_eqidx];
+	fpeq = phba->sli4_hba.hdwq[hba_eqidx].hba_eq;
 	if (unlikely(!fpeq))
 		return IRQ_NONE;
 
-	if (lpfc_fcp_look_ahead) {
-		if (atomic_dec_and_test(&hba_eq_hdl->hba_eq_in_use))
-			phba->sli4_hba.sli4_eq_clr_intr(fpeq);
-		else {
-			atomic_inc(&hba_eq_hdl->hba_eq_in_use);
-			return IRQ_NONE;
-		}
-	}
-
 	/* Check device state for handling interrupt */
 	if (unlikely(lpfc_intr_state_check(phba))) {
 		/* Check again for link_state with lock held */
@@ -14429,36 +14319,25 @@ lpfc_sli4_hba_intr_handler(int irq, void *dev_id)
 			/* Flush, clear interrupt, and rearm the EQ */
 			lpfc_sli4_eq_flush(phba, fpeq);
 		spin_unlock_irqrestore(&phba->hbalock, iflag);
-		if (lpfc_fcp_look_ahead)
-			atomic_inc(&hba_eq_hdl->hba_eq_in_use);
 		return IRQ_NONE;
 	}
 
-	/*
-	 * Process all the event on FCP fast-path EQ
-	 */
-	while ((eqe = lpfc_sli4_eq_get(fpeq))) {
-		lpfc_sli4_hba_handle_eqe(phba, eqe, hba_eqidx);
-		if (!(++ecount % fpeq->entry_repost))
-			break;
-		fpeq->EQ_processed++;
-	}
+	eqi = phba->sli4_hba.eq_info;
+	icnt = this_cpu_inc_return(eqi->icnt);
+	fpeq->last_cpu = smp_processor_id();
 
-	/* Track the max number of EQEs processed in 1 intr */
-	if (ecount > fpeq->EQ_max_eqe)
-		fpeq->EQ_max_eqe = ecount;
+	if (icnt > LPFC_EQD_ISR_TRIGGER &&
+	    phba->cfg_irq_chann == 1 &&
+	    phba->cfg_auto_imax &&
+	    fpeq->q_mode != LPFC_MAX_AUTO_EQ_DELAY &&
+	    phba->sli.sli_flag & LPFC_SLI_USE_EQDR)
+		lpfc_sli4_mod_hba_eq_delay(phba, fpeq, LPFC_MAX_AUTO_EQ_DELAY);
 
-	/* Always clear and re-arm the fast-path EQ */
-	phba->sli4_hba.sli4_eq_release(fpeq, LPFC_QUEUE_REARM);
+	/* process and rearm the EQ */
+	ecount = lpfc_sli4_process_eq(phba, fpeq);
 
 	if (unlikely(ecount == 0)) {
 		fpeq->EQ_no_entry++;
-
-		if (lpfc_fcp_look_ahead) {
-			atomic_inc(&hba_eq_hdl->hba_eq_in_use);
-			return IRQ_NONE;
-		}
-
 		if (phba->intr_type == MSIX)
 			/* MSI-X treated interrupt served as no EQ share INT */
 			lpfc_printf_log(phba, KERN_WARNING, LOG_SLI,
@@ -14468,9 +14347,6 @@ lpfc_sli4_hba_intr_handler(int irq, void *dev_id)
 			return IRQ_NONE;
 	}
 
-	if (lpfc_fcp_look_ahead)
-		atomic_inc(&hba_eq_hdl->hba_eq_in_use);
-
 	return IRQ_HANDLED;
 } /* lpfc_sli4_fp_intr_handler */
 
@@ -14508,20 +14384,13 @@ lpfc_sli4_intr_handler(int irq, void *dev_id)
 	/*
 	 * Invoke fast-path host attention interrupt handling as appropriate.
 	 */
-	for (qidx = 0; qidx < phba->io_channel_irqs; qidx++) {
+	for (qidx = 0; qidx < phba->cfg_irq_chann; qidx++) {
 		hba_irq_rc = lpfc_sli4_hba_intr_handler(irq,
 					&phba->sli4_hba.hba_eq_hdl[qidx]);
 		if (hba_irq_rc == IRQ_HANDLED)
 			hba_handled |= true;
 	}
 
-	if (phba->cfg_fof) {
-		hba_irq_rc = lpfc_sli4_fof_intr_handler(irq,
-					&phba->sli4_hba.hba_eq_hdl[qidx]);
-		if (hba_irq_rc == IRQ_HANDLED)
-			hba_handled |= true;
-	}
-
 	return (hba_handled == true) ? IRQ_HANDLED : IRQ_NONE;
 } /* lpfc_sli4_intr_handler */
 
@@ -14553,6 +14422,9 @@ lpfc_sli4_queue_free(struct lpfc_queue *queue)
 		kfree(queue->rqbp);
 	}
 
+	if (!list_empty(&queue->cpu_list))
+		list_del(&queue->cpu_list);
+
 	if (!list_empty(&queue->wq_list))
 		list_del(&queue->wq_list);
 
@@ -14601,6 +14473,7 @@ lpfc_sli4_queue_alloc(struct lpfc_hba *phba, uint32_t page_size,
 	INIT_LIST_HEAD(&queue->wqfull_list);
 	INIT_LIST_HEAD(&queue->page_list);
 	INIT_LIST_HEAD(&queue->child_list);
+	INIT_LIST_HEAD(&queue->cpu_list);
 
 	/* Set queue parameters now.  If the system cannot provide memory
 	 * resources, the free routine needs to know what was allocated.
@@ -14633,8 +14506,10 @@ lpfc_sli4_queue_alloc(struct lpfc_hba *phba, uint32_t page_size,
 	}
 	INIT_WORK(&queue->irqwork, lpfc_sli4_hba_process_cq);
 	INIT_WORK(&queue->spwork, lpfc_sli4_sp_process_cq);
+	INIT_DELAYED_WORK(&queue->sched_irqwork, lpfc_sli4_dly_hba_process_cq);
+	INIT_DELAYED_WORK(&queue->sched_spwork, lpfc_sli4_dly_sp_process_cq);
 
-	/* entry_repost will be set during q creation */
+	/* notify_interval will be set during q creation */
 
 	return queue;
 out_fail:
@@ -14671,43 +14546,76 @@ lpfc_dual_chute_pci_bar_map(struct lpfc_hba *phba, uint16_t pci_barset)
 }
 
 /**
- * lpfc_modify_hba_eq_delay - Modify Delay Multiplier on FCP EQs
- * @phba: HBA structure that indicates port to create a queue on.
- * @startq: The starting FCP EQ to modify
+ * lpfc_modify_hba_eq_delay - Modify Delay Multiplier on EQs
+ * @phba: HBA structure that EQs are on.
+ * @startq: The starting EQ index to modify
+ * @numq: The number of EQs (consecutive indexes) to modify
+ * @usdelay: amount of delay
  *
- * This function sends an MODIFY_EQ_DELAY mailbox command to the HBA.
- * The command allows up to LPFC_MAX_EQ_DELAY_EQID_CNT EQ ID's to be
- * updated in one mailbox command.
+ * This function revises the EQ delay on 1 or more EQs. The EQ delay
+ * is set either by writing to a register (if supported by the SLI Port)
+ * or by mailbox command. The mailbox command allows several EQs to be
+ * updated at once.
  *
- * The @phba struct is used to send mailbox command to HBA. The @startq
- * is used to get the starting FCP EQ to change.
- * This function is asynchronous and will wait for the mailbox
- * command to finish before continuing.
+ * The @phba struct is used to send a mailbox command to HBA. The @startq
+ * is used to get the starting EQ index to change. The @numq value is
+ * used to specify how many consecutive EQ indexes, starting at EQ index,
+ * are to be changed. This function is asynchronous and will wait for any
+ * mailbox commands to finish before returning.
  *
- * On success this function will return a zero. If unable to allocate enough
- * memory this function will return -ENOMEM. If the queue create mailbox command
- * fails this function will return -ENXIO.
+ * On success this function will return a zero. If unable to allocate
+ * enough memory this function will return -ENOMEM. If a mailbox command
+ * fails this function will return -ENXIO. Note: on ENXIO, some EQs may
+ * have had their delay multipler changed.
  **/
-int
+void
 lpfc_modify_hba_eq_delay(struct lpfc_hba *phba, uint32_t startq,
-			 uint32_t numq, uint32_t imax)
+			 uint32_t numq, uint32_t usdelay)
 {
 	struct lpfc_mbx_modify_eq_delay *eq_delay;
 	LPFC_MBOXQ_t *mbox;
 	struct lpfc_queue *eq;
-	int cnt, rc, length, status = 0;
+	int cnt = 0, rc, length;
 	uint32_t shdr_status, shdr_add_status;
-	uint32_t result, val;
+	uint32_t dmult;
 	int qidx;
 	union lpfc_sli4_cfg_shdr *shdr;
-	uint16_t dmult;
 
-	if (startq >= phba->io_channel_irqs)
-		return 0;
+	if (startq >= phba->cfg_irq_chann)
+		return;
+
+	if (usdelay > 0xFFFF) {
+		lpfc_printf_log(phba, KERN_INFO, LOG_INIT | LOG_FCP | LOG_NVME,
+				"6429 usdelay %d too large. Scaled down to "
+				"0xFFFF.\n", usdelay);
+		usdelay = 0xFFFF;
+	}
+
+	/* set values by EQ_DELAY register if supported */
+	if (phba->sli.sli_flag & LPFC_SLI_USE_EQDR) {
+		for (qidx = startq; qidx < phba->cfg_irq_chann; qidx++) {
+			eq = phba->sli4_hba.hdwq[qidx].hba_eq;
+			if (!eq)
+				continue;
+
+			lpfc_sli4_mod_hba_eq_delay(phba, eq, usdelay);
+
+			if (++cnt >= numq)
+				break;
+		}
+
+		return;
+	}
+
+	/* Otherwise, set values by mailbox cmd */
 
 	mbox = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL);
-	if (!mbox)
-		return -ENOMEM;
+	if (!mbox) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT | LOG_FCP | LOG_NVME,
+				"6428 Failed allocating mailbox cmd buffer."
+				" EQ delay was not set.\n");
+		return;
+	}
 	length = (sizeof(struct lpfc_mbx_modify_eq_delay) -
 		  sizeof(struct lpfc_sli4_cfg_mhdr));
 	lpfc_sli4_config(phba, mbox, LPFC_MBOX_SUBSYSTEM_COMMON,
@@ -14716,45 +14624,22 @@ lpfc_modify_hba_eq_delay(struct lpfc_hba *phba, uint32_t startq,
 	eq_delay = &mbox->u.mqe.un.eq_delay;
 
 	/* Calculate delay multiper from maximum interrupt per second */
-	result = imax / phba->io_channel_irqs;
-	if (result > LPFC_DMULT_CONST || result == 0)
-		dmult = 0;
-	else
-		dmult = LPFC_DMULT_CONST/result - 1;
+	dmult = (usdelay * LPFC_DMULT_CONST) / LPFC_SEC_TO_USEC;
+	if (dmult)
+		dmult--;
 	if (dmult > LPFC_DMULT_MAX)
 		dmult = LPFC_DMULT_MAX;
 
-	cnt = 0;
-	for (qidx = startq; qidx < phba->io_channel_irqs; qidx++) {
-		eq = phba->sli4_hba.hba_eq[qidx];
+	for (qidx = startq; qidx < phba->cfg_irq_chann; qidx++) {
+		eq = phba->sli4_hba.hdwq[qidx].hba_eq;
 		if (!eq)
 			continue;
-		eq->q_mode = imax;
+		eq->q_mode = usdelay;
 		eq_delay->u.request.eq[cnt].eq_id = eq->queue_id;
 		eq_delay->u.request.eq[cnt].phase = 0;
 		eq_delay->u.request.eq[cnt].delay_multi = dmult;
-		cnt++;
-
-		/* q_mode is only used for auto_imax */
-		if (phba->sli.sli_flag & LPFC_SLI_USE_EQDR) {
-			/* Use EQ Delay Register method for q_mode */
-
-			/* Convert for EQ Delay register */
-			val =  phba->cfg_fcp_imax;
-			if (val) {
-				/* First, interrupts per sec per EQ */
-				val = phba->cfg_fcp_imax /
-					phba->io_channel_irqs;
-
-				/* us delay between each interrupt */
-				val = LPFC_SEC_TO_USEC / val;
-			}
-			eq->q_mode = val;
-		} else {
-			eq->q_mode = imax;
-		}
 
-		if (cnt >= numq)
+		if (++cnt >= numq)
 			break;
 	}
 	eq_delay->u.request.num_eq = cnt;
@@ -14772,10 +14657,9 @@ lpfc_modify_hba_eq_delay(struct lpfc_hba *phba, uint32_t startq,
 				"2512 MODIFY_EQ_DELAY mailbox failed with "
 				"status x%x add_status x%x, mbx status x%x\n",
 				shdr_status, shdr_add_status, rc);
-		status = -ENXIO;
 	}
 	mempool_free(mbox, phba->mbox_mem_pool);
-	return status;
+	return;
 }
 
 /**
@@ -14900,8 +14784,8 @@ lpfc_eq_create(struct lpfc_hba *phba, struct lpfc_queue *eq, uint32_t imax)
 	if (eq->queue_id == 0xFFFF)
 		status = -ENXIO;
 	eq->host_index = 0;
-	eq->hba_index = 0;
-	eq->entry_repost = LPFC_EQ_REPOST;
+	eq->notify_interval = LPFC_EQ_NOTIFY_INTRVL;
+	eq->max_proc_limit = LPFC_EQ_MAX_PROC_LIMIT;
 
 	mempool_free(mbox, phba->mbox_mem_pool);
 	return status;
@@ -15039,10 +14923,13 @@ lpfc_cq_create(struct lpfc_hba *phba, struct lpfc_queue *cq,
 	cq->subtype = subtype;
 	cq->queue_id = bf_get(lpfc_mbx_cq_create_q_id, &cq_create->u.response);
 	cq->assoc_qid = eq->queue_id;
+	cq->assoc_qp = eq;
 	cq->host_index = 0;
-	cq->hba_index = 0;
-	cq->entry_repost = LPFC_CQ_REPOST;
+	cq->notify_interval = LPFC_CQ_NOTIFY_INTRVL;
+	cq->max_proc_limit = min(phba->cfg_cq_max_proc_limit, cq->entry_count);
 
+	if (cq->queue_id > phba->sli4_hba.cq_max)
+		phba->sli4_hba.cq_max = cq->queue_id;
 out:
 	mempool_free(mbox, phba->mbox_mem_pool);
 	return status;
@@ -15052,7 +14939,7 @@ out:
  * lpfc_cq_create_set - Create a set of Completion Queues on the HBA for MRQ
  * @phba: HBA structure that indicates port to create a queue on.
  * @cqp: The queue structure array to use to create the completion queues.
- * @eqp: The event queue array to bind these completion queues to.
+ * @hdwq: The hardware queue array  with the EQ to bind completion queues to.
  *
  * This function creates a set of  completion queue, s to support MRQ
  * as detailed in @cqp, on a port,
@@ -15072,7 +14959,8 @@ out:
  **/
 int
 lpfc_cq_create_set(struct lpfc_hba *phba, struct lpfc_queue **cqp,
-		   struct lpfc_queue **eqp, uint32_t type, uint32_t subtype)
+		   struct lpfc_sli4_hdw_queue *hdwq, uint32_t type,
+		   uint32_t subtype)
 {
 	struct lpfc_queue *cq;
 	struct lpfc_queue *eq;
@@ -15087,7 +14975,7 @@ lpfc_cq_create_set(struct lpfc_hba *phba, struct lpfc_queue **cqp,
 
 	/* sanity check on queue memory */
 	numcq = phba->cfg_nvmet_mrq;
-	if (!cqp || !eqp || !numcq)
+	if (!cqp || !hdwq || !numcq)
 		return -ENODEV;
 
 	mbox = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL);
@@ -15114,7 +15002,7 @@ lpfc_cq_create_set(struct lpfc_hba *phba, struct lpfc_queue **cqp,
 
 	for (idx = 0; idx < numcq; idx++) {
 		cq = cqp[idx];
-		eq = eqp[idx];
+		eq = hdwq[idx].hba_eq;
 		if (!cq || !eq) {
 			status = -ENOMEM;
 			goto out;
@@ -15247,9 +15135,11 @@ lpfc_cq_create_set(struct lpfc_hba *phba, struct lpfc_queue **cqp,
 		cq->type = type;
 		cq->subtype = subtype;
 		cq->assoc_qid = eq->queue_id;
+		cq->assoc_qp = eq;
 		cq->host_index = 0;
-		cq->hba_index = 0;
-		cq->entry_repost = LPFC_CQ_REPOST;
+		cq->notify_interval = LPFC_CQ_NOTIFY_INTRVL;
+		cq->max_proc_limit = min(phba->cfg_cq_max_proc_limit,
+					 cq->entry_count);
 		cq->chann = idx;
 
 		rc = 0;
@@ -15287,6 +15177,8 @@ lpfc_cq_create_set(struct lpfc_hba *phba, struct lpfc_queue **cqp,
 	for (idx = 0; idx < numcq; idx++) {
 		cq = cqp[idx];
 		cq->queue_id = rc + idx;
+		if (cq->queue_id > phba->sli4_hba.cq_max)
+			phba->sli4_hba.cq_max = cq->queue_id;
 	}
 
 out:
@@ -15499,7 +15391,6 @@ lpfc_mq_create(struct lpfc_hba *phba, struct lpfc_queue *mq,
 	mq->subtype = subtype;
 	mq->host_index = 0;
 	mq->hba_index = 0;
-	mq->entry_repost = LPFC_MQ_REPOST;
 
 	/* link the mq onto the parent cq child list */
 	list_add_tail(&mq->list, &cq->child_list);
@@ -15765,7 +15656,7 @@ lpfc_wq_create(struct lpfc_hba *phba, struct lpfc_queue *wq,
 	wq->subtype = subtype;
 	wq->host_index = 0;
 	wq->hba_index = 0;
-	wq->entry_repost = LPFC_RELEASE_NOTIFICATION_INTERVAL;
+	wq->notify_interval = LPFC_WQ_NOTIFY_INTRVL;
 
 	/* link the wq onto the parent cq child list */
 	list_add_tail(&wq->list, &cq->child_list);
@@ -15959,7 +15850,7 @@ lpfc_rq_create(struct lpfc_hba *phba, struct lpfc_queue *hrq,
 	hrq->subtype = subtype;
 	hrq->host_index = 0;
 	hrq->hba_index = 0;
-	hrq->entry_repost = LPFC_RQ_REPOST;
+	hrq->notify_interval = LPFC_RQ_NOTIFY_INTRVL;
 
 	/* now create the data queue */
 	lpfc_sli4_config(phba, mbox, LPFC_MBOX_SUBSYSTEM_FCOE,
@@ -16052,7 +15943,7 @@ lpfc_rq_create(struct lpfc_hba *phba, struct lpfc_queue *hrq,
 	drq->subtype = subtype;
 	drq->host_index = 0;
 	drq->hba_index = 0;
-	drq->entry_repost = LPFC_RQ_REPOST;
+	drq->notify_interval = LPFC_RQ_NOTIFY_INTRVL;
 
 	/* link the header and data RQs onto the parent cq child list */
 	list_add_tail(&hrq->list, &cq->child_list);
@@ -16210,7 +16101,7 @@ lpfc_mrq_create(struct lpfc_hba *phba, struct lpfc_queue **hrqp,
 		hrq->subtype = subtype;
 		hrq->host_index = 0;
 		hrq->hba_index = 0;
-		hrq->entry_repost = LPFC_RQ_REPOST;
+		hrq->notify_interval = LPFC_RQ_NOTIFY_INTRVL;
 
 		drq->db_format = LPFC_DB_RING_FORMAT;
 		drq->db_regaddr = phba->sli4_hba.RQDBregaddr;
@@ -16219,7 +16110,7 @@ lpfc_mrq_create(struct lpfc_hba *phba, struct lpfc_queue **hrqp,
 		drq->subtype = subtype;
 		drq->host_index = 0;
 		drq->hba_index = 0;
-		drq->entry_repost = LPFC_RQ_REPOST;
+		drq->notify_interval = LPFC_RQ_NOTIFY_INTRVL;
 
 		list_add_tail(&hrq->list, &cq->child_list);
 		list_add_tail(&drq->list, &cq->child_list);
@@ -16279,6 +16170,7 @@ lpfc_eq_destroy(struct lpfc_hba *phba, struct lpfc_queue *eq)
 	/* sanity check on queue memory */
 	if (!eq)
 		return -ENODEV;
+
 	mbox = mempool_alloc(eq->phba->mbox_mem_pool, GFP_KERNEL);
 	if (!mbox)
 		return -ENOMEM;
@@ -16828,22 +16720,21 @@ lpfc_sli4_post_sgl_list(struct lpfc_hba *phba,
 }
 
 /**
- * lpfc_sli4_post_scsi_sgl_block - post a block of scsi sgl list to firmware
+ * lpfc_sli4_post_io_sgl_block - post a block of nvme sgl list to firmware
  * @phba: pointer to lpfc hba data structure.
- * @sblist: pointer to scsi buffer list.
+ * @nblist: pointer to nvme buffer list.
  * @count: number of scsi buffers on the list.
  *
  * This routine is invoked to post a block of @count scsi sgl pages from a
- * SCSI buffer list @sblist to the HBA using non-embedded mailbox command.
+ * SCSI buffer list @nblist to the HBA using non-embedded mailbox command.
  * No Lock is held.
  *
  **/
-int
-lpfc_sli4_post_scsi_sgl_block(struct lpfc_hba *phba,
-			      struct list_head *sblist,
-			      int count)
+static int
+lpfc_sli4_post_io_sgl_block(struct lpfc_hba *phba, struct list_head *nblist,
+			    int count)
 {
-	struct lpfc_scsi_buf *psb;
+	struct lpfc_io_buf *lpfc_ncmd;
 	struct lpfc_mbx_post_uembed_sgl_page1 *sgl;
 	struct sgl_page_pairs *sgl_pg_pairs;
 	void *viraddr;
@@ -16861,25 +16752,25 @@ lpfc_sli4_post_scsi_sgl_block(struct lpfc_hba *phba,
 		 sizeof(union lpfc_sli4_cfg_shdr) + sizeof(uint32_t);
 	if (reqlen > SLI4_PAGE_SIZE) {
 		lpfc_printf_log(phba, KERN_WARNING, LOG_INIT,
-				"0217 Block sgl registration required DMA "
+				"6118 Block sgl registration required DMA "
 				"size (%d) great than a page\n", reqlen);
 		return -ENOMEM;
 	}
 	mbox = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL);
 	if (!mbox) {
 		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
-				"0283 Failed to allocate mbox cmd memory\n");
+				"6119 Failed to allocate mbox cmd memory\n");
 		return -ENOMEM;
 	}
 
 	/* Allocate DMA memory and set up the non-embedded mailbox command */
 	alloclen = lpfc_sli4_config(phba, mbox, LPFC_MBOX_SUBSYSTEM_FCOE,
-				LPFC_MBOX_OPCODE_FCOE_POST_SGL_PAGES, reqlen,
-				LPFC_SLI4_MBX_NEMBED);
+				    LPFC_MBOX_OPCODE_FCOE_POST_SGL_PAGES,
+				    reqlen, LPFC_SLI4_MBX_NEMBED);
 
 	if (alloclen < reqlen) {
 		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
-				"2561 Allocated DMA memory size (%d) is "
+				"6120 Allocated DMA memory size (%d) is "
 				"less than the requested DMA memory "
 				"size (%d)\n", alloclen, reqlen);
 		lpfc_sli4_mbox_cmd_free(phba, mbox);
@@ -16894,14 +16785,15 @@ lpfc_sli4_post_scsi_sgl_block(struct lpfc_hba *phba,
 	sgl_pg_pairs = &sgl->sgl_pg_pairs;
 
 	pg_pairs = 0;
-	list_for_each_entry(psb, sblist, list) {
+	list_for_each_entry(lpfc_ncmd, nblist, list) {
 		/* Set up the sge entry */
 		sgl_pg_pairs->sgl_pg0_addr_lo =
-			cpu_to_le32(putPaddrLow(psb->dma_phys_bpl));
+			cpu_to_le32(putPaddrLow(lpfc_ncmd->dma_phys_sgl));
 		sgl_pg_pairs->sgl_pg0_addr_hi =
-			cpu_to_le32(putPaddrHigh(psb->dma_phys_bpl));
+			cpu_to_le32(putPaddrHigh(lpfc_ncmd->dma_phys_sgl));
 		if (phba->cfg_sg_dma_buf_size > SGL_PAGE_SIZE)
-			pdma_phys_bpl1 = psb->dma_phys_bpl + SGL_PAGE_SIZE;
+			pdma_phys_bpl1 = lpfc_ncmd->dma_phys_sgl +
+						SGL_PAGE_SIZE;
 		else
 			pdma_phys_bpl1 = 0;
 		sgl_pg_pairs->sgl_pg1_addr_lo =
@@ -16910,7 +16802,7 @@ lpfc_sli4_post_scsi_sgl_block(struct lpfc_hba *phba,
 			cpu_to_le32(putPaddrHigh(pdma_phys_bpl1));
 		/* Keep the first xritag on the list */
 		if (pg_pairs == 0)
-			xritag_start = psb->cur_iocbq.sli4_xritag;
+			xritag_start = lpfc_ncmd->cur_iocbq.sli4_xritag;
 		sgl_pg_pairs++;
 		pg_pairs++;
 	}
@@ -16919,20 +16811,20 @@ lpfc_sli4_post_scsi_sgl_block(struct lpfc_hba *phba,
 	/* Perform endian conversion if necessary */
 	sgl->word0 = cpu_to_le32(sgl->word0);
 
-	if (!phba->sli4_hba.intr_enable)
+	if (!phba->sli4_hba.intr_enable) {
 		rc = lpfc_sli_issue_mbox(phba, mbox, MBX_POLL);
-	else {
+	} else {
 		mbox_tmo = lpfc_mbox_tmo_val(phba, mbox);
 		rc = lpfc_sli_issue_mbox_wait(phba, mbox, mbox_tmo);
 	}
-	shdr = (union lpfc_sli4_cfg_shdr *) &sgl->cfg_shdr;
+	shdr = (union lpfc_sli4_cfg_shdr *)&sgl->cfg_shdr;
 	shdr_status = bf_get(lpfc_mbox_hdr_status, &shdr->response);
 	shdr_add_status = bf_get(lpfc_mbox_hdr_add_status, &shdr->response);
 	if (rc != MBX_TIMEOUT)
 		lpfc_sli4_mbox_cmd_free(phba, mbox);
 	if (shdr_status || shdr_add_status || rc) {
 		lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
-				"2564 POST_SGL_BLOCK mailbox command failed "
+				"6125 POST_SGL_BLOCK mailbox command failed "
 				"status x%x add_status x%x mbx status x%x\n",
 				shdr_status, shdr_add_status, rc);
 		rc = -ENXIO;
@@ -16941,6 +16833,134 @@ lpfc_sli4_post_scsi_sgl_block(struct lpfc_hba *phba,
 }
 
 /**
+ * lpfc_sli4_post_io_sgl_list - Post blocks of nvme buffer sgls from a list
+ * @phba: pointer to lpfc hba data structure.
+ * @post_nblist: pointer to the nvme buffer list.
+ *
+ * This routine walks a list of nvme buffers that was passed in. It attempts
+ * to construct blocks of nvme buffer sgls which contains contiguous xris and
+ * uses the non-embedded SGL block post mailbox commands to post to the port.
+ * For single NVME buffer sgl with non-contiguous xri, if any, it shall use
+ * embedded SGL post mailbox command for posting. The @post_nblist passed in
+ * must be local list, thus no lock is needed when manipulate the list.
+ *
+ * Returns: 0 = failure, non-zero number of successfully posted buffers.
+ **/
+int
+lpfc_sli4_post_io_sgl_list(struct lpfc_hba *phba,
+			   struct list_head *post_nblist, int sb_count)
+{
+	struct lpfc_io_buf *lpfc_ncmd, *lpfc_ncmd_next;
+	int status, sgl_size;
+	int post_cnt = 0, block_cnt = 0, num_posting = 0, num_posted = 0;
+	dma_addr_t pdma_phys_sgl1;
+	int last_xritag = NO_XRI;
+	int cur_xritag;
+	LIST_HEAD(prep_nblist);
+	LIST_HEAD(blck_nblist);
+	LIST_HEAD(nvme_nblist);
+
+	/* sanity check */
+	if (sb_count <= 0)
+		return -EINVAL;
+
+	sgl_size = phba->cfg_sg_dma_buf_size;
+	list_for_each_entry_safe(lpfc_ncmd, lpfc_ncmd_next, post_nblist, list) {
+		list_del_init(&lpfc_ncmd->list);
+		block_cnt++;
+		if ((last_xritag != NO_XRI) &&
+		    (lpfc_ncmd->cur_iocbq.sli4_xritag != last_xritag + 1)) {
+			/* a hole in xri block, form a sgl posting block */
+			list_splice_init(&prep_nblist, &blck_nblist);
+			post_cnt = block_cnt - 1;
+			/* prepare list for next posting block */
+			list_add_tail(&lpfc_ncmd->list, &prep_nblist);
+			block_cnt = 1;
+		} else {
+			/* prepare list for next posting block */
+			list_add_tail(&lpfc_ncmd->list, &prep_nblist);
+			/* enough sgls for non-embed sgl mbox command */
+			if (block_cnt == LPFC_NEMBED_MBOX_SGL_CNT) {
+				list_splice_init(&prep_nblist, &blck_nblist);
+				post_cnt = block_cnt;
+				block_cnt = 0;
+			}
+		}
+		num_posting++;
+		last_xritag = lpfc_ncmd->cur_iocbq.sli4_xritag;
+
+		/* end of repost sgl list condition for NVME buffers */
+		if (num_posting == sb_count) {
+			if (post_cnt == 0) {
+				/* last sgl posting block */
+				list_splice_init(&prep_nblist, &blck_nblist);
+				post_cnt = block_cnt;
+			} else if (block_cnt == 1) {
+				/* last single sgl with non-contiguous xri */
+				if (sgl_size > SGL_PAGE_SIZE)
+					pdma_phys_sgl1 =
+						lpfc_ncmd->dma_phys_sgl +
+						SGL_PAGE_SIZE;
+				else
+					pdma_phys_sgl1 = 0;
+				cur_xritag = lpfc_ncmd->cur_iocbq.sli4_xritag;
+				status = lpfc_sli4_post_sgl(
+						phba, lpfc_ncmd->dma_phys_sgl,
+						pdma_phys_sgl1, cur_xritag);
+				if (status) {
+					/* Post error.  Buffer unavailable. */
+					lpfc_ncmd->flags |=
+						LPFC_SBUF_NOT_POSTED;
+				} else {
+					/* Post success. Bffer available. */
+					lpfc_ncmd->flags &=
+						~LPFC_SBUF_NOT_POSTED;
+					lpfc_ncmd->status = IOSTAT_SUCCESS;
+					num_posted++;
+				}
+				/* success, put on NVME buffer sgl list */
+				list_add_tail(&lpfc_ncmd->list, &nvme_nblist);
+			}
+		}
+
+		/* continue until a nembed page worth of sgls */
+		if (post_cnt == 0)
+			continue;
+
+		/* post block of NVME buffer list sgls */
+		status = lpfc_sli4_post_io_sgl_block(phba, &blck_nblist,
+						     post_cnt);
+
+		/* don't reset xirtag due to hole in xri block */
+		if (block_cnt == 0)
+			last_xritag = NO_XRI;
+
+		/* reset NVME buffer post count for next round of posting */
+		post_cnt = 0;
+
+		/* put posted NVME buffer-sgl posted on NVME buffer sgl list */
+		while (!list_empty(&blck_nblist)) {
+			list_remove_head(&blck_nblist, lpfc_ncmd,
+					 struct lpfc_io_buf, list);
+			if (status) {
+				/* Post error.  Mark buffer unavailable. */
+				lpfc_ncmd->flags |= LPFC_SBUF_NOT_POSTED;
+			} else {
+				/* Post success, Mark buffer available. */
+				lpfc_ncmd->flags &= ~LPFC_SBUF_NOT_POSTED;
+				lpfc_ncmd->status = IOSTAT_SUCCESS;
+				num_posted++;
+			}
+			list_add_tail(&lpfc_ncmd->list, &nvme_nblist);
+		}
+	}
+	/* Push NVME buffers with sgl posted to the available list */
+	lpfc_io_buf_replenish(phba, &nvme_nblist);
+
+	return num_posted;
+}
+
+/**
  * lpfc_fc_frame_check - Check that this frame is a valid frame to handle
  * @phba: pointer to lpfc_hba struct that the frame was received on
  * @fc_hdr: A pointer to the FC Header data (In Big Endian Format)
@@ -19500,7 +19520,7 @@ lpfc_drain_txq(struct lpfc_hba *phba)
 
 	if (phba->link_flag & LS_MDS_LOOPBACK) {
 		/* MDS WQE are posted only to first WQ*/
-		wq = phba->sli4_hba.fcp_wq[0];
+		wq = phba->sli4_hba.hdwq[0].fcp_wq;
 		if (unlikely(!wq))
 			return 0;
 		pring = wq->pring;
@@ -19708,7 +19728,7 @@ lpfc_wqe_bpl2sgl(struct lpfc_hba *phba, struct lpfc_iocbq *pwqeq,
  * @pwqe: Pointer to command WQE.
  **/
 int
-lpfc_sli4_issue_wqe(struct lpfc_hba *phba, uint32_t ring_number,
+lpfc_sli4_issue_wqe(struct lpfc_hba *phba, struct lpfc_sli4_hdw_queue *qp,
 		    struct lpfc_iocbq *pwqe)
 {
 	union lpfc_wqe128 *wqe = &pwqe->wqe;
@@ -19722,7 +19742,8 @@ lpfc_sli4_issue_wqe(struct lpfc_hba *phba, uint32_t ring_number,
 	/* NVME_LS and NVME_LS ABTS requests. */
 	if (pwqe->iocb_flag & LPFC_IO_NVME_LS) {
 		pring =  phba->sli4_hba.nvmels_wq->pring;
-		spin_lock_irqsave(&pring->ring_lock, iflags);
+		lpfc_qp_spin_lock_irqsave(&pring->ring_lock, iflags,
+					  qp, wq_access);
 		sglq = __lpfc_sli_get_els_sglq(phba, pwqe);
 		if (!sglq) {
 			spin_unlock_irqrestore(&pring->ring_lock, iflags);
@@ -19750,12 +19771,13 @@ lpfc_sli4_issue_wqe(struct lpfc_hba *phba, uint32_t ring_number,
 	/* NVME_FCREQ and NVME_ABTS requests */
 	if (pwqe->iocb_flag & LPFC_IO_NVME) {
 		/* Get the IO distribution (hba_wqidx) for WQ assignment. */
-		pring = phba->sli4_hba.nvme_wq[pwqe->hba_wqidx]->pring;
+		wq = qp->nvme_wq;
+		pring = wq->pring;
 
-		spin_lock_irqsave(&pring->ring_lock, iflags);
-		wq = phba->sli4_hba.nvme_wq[pwqe->hba_wqidx];
-		bf_set(wqe_cqid, &wqe->generic.wqe_com,
-		      phba->sli4_hba.nvme_cq[pwqe->hba_wqidx]->queue_id);
+		bf_set(wqe_cqid, &wqe->generic.wqe_com, qp->nvme_cq_map);
+
+		lpfc_qp_spin_lock_irqsave(&pring->ring_lock, iflags,
+					  qp, wq_access);
 		ret = lpfc_sli4_wq_put(wq, wqe);
 		if (ret) {
 			spin_unlock_irqrestore(&pring->ring_lock, iflags);
@@ -19769,9 +19791,9 @@ lpfc_sli4_issue_wqe(struct lpfc_hba *phba, uint32_t ring_number,
 	/* NVMET requests */
 	if (pwqe->iocb_flag & LPFC_IO_NVMET) {
 		/* Get the IO distribution (hba_wqidx) for WQ assignment. */
-		pring = phba->sli4_hba.nvme_wq[pwqe->hba_wqidx]->pring;
+		wq = qp->nvme_wq;
+		pring = wq->pring;
 
-		spin_lock_irqsave(&pring->ring_lock, iflags);
 		ctxp = pwqe->context2;
 		sglq = ctxp->ctxbuf->sglq;
 		if (pwqe->sli4_xritag ==  NO_XRI) {
@@ -19780,9 +19802,10 @@ lpfc_sli4_issue_wqe(struct lpfc_hba *phba, uint32_t ring_number,
 		}
 		bf_set(wqe_xri_tag, &pwqe->wqe.xmit_bls_rsp.wqe_com,
 		       pwqe->sli4_xritag);
-		wq = phba->sli4_hba.nvme_wq[pwqe->hba_wqidx];
-		bf_set(wqe_cqid, &wqe->generic.wqe_com,
-		      phba->sli4_hba.nvme_cq[pwqe->hba_wqidx]->queue_id);
+		bf_set(wqe_cqid, &wqe->generic.wqe_com, qp->nvme_cq_map);
+
+		lpfc_qp_spin_lock_irqsave(&pring->ring_lock, iflags,
+					  qp, wq_access);
 		ret = lpfc_sli4_wq_put(wq, wqe);
 		if (ret) {
 			spin_unlock_irqrestore(&pring->ring_lock, iflags);
@@ -19794,3 +19817,647 @@ lpfc_sli4_issue_wqe(struct lpfc_hba *phba, uint32_t ring_number,
 	}
 	return WQE_ERROR;
 }
+
+#ifdef LPFC_MXP_STAT
+/**
+ * lpfc_snapshot_mxp - Snapshot pbl, pvt and busy count
+ * @phba: pointer to lpfc hba data structure.
+ * @hwqid: belong to which HWQ.
+ *
+ * The purpose of this routine is to take a snapshot of pbl, pvt and busy count
+ * 15 seconds after a test case is running.
+ *
+ * The user should call lpfc_debugfs_multixripools_write before running a test
+ * case to clear stat_snapshot_taken. Then the user starts a test case. During
+ * test case is running, stat_snapshot_taken is incremented by 1 every time when
+ * this routine is called from heartbeat timer. When stat_snapshot_taken is
+ * equal to LPFC_MXP_SNAPSHOT_TAKEN, a snapshot is taken.
+ **/
+void lpfc_snapshot_mxp(struct lpfc_hba *phba, u32 hwqid)
+{
+	struct lpfc_sli4_hdw_queue *qp;
+	struct lpfc_multixri_pool *multixri_pool;
+	struct lpfc_pvt_pool *pvt_pool;
+	struct lpfc_pbl_pool *pbl_pool;
+	u32 txcmplq_cnt;
+
+	qp = &phba->sli4_hba.hdwq[hwqid];
+	multixri_pool = qp->p_multixri_pool;
+	if (!multixri_pool)
+		return;
+
+	if (multixri_pool->stat_snapshot_taken == LPFC_MXP_SNAPSHOT_TAKEN) {
+		pvt_pool = &qp->p_multixri_pool->pvt_pool;
+		pbl_pool = &qp->p_multixri_pool->pbl_pool;
+		txcmplq_cnt = qp->fcp_wq->pring->txcmplq_cnt;
+		if (qp->nvme_wq)
+			txcmplq_cnt += qp->nvme_wq->pring->txcmplq_cnt;
+
+		multixri_pool->stat_pbl_count = pbl_pool->count;
+		multixri_pool->stat_pvt_count = pvt_pool->count;
+		multixri_pool->stat_busy_count = txcmplq_cnt;
+	}
+
+	multixri_pool->stat_snapshot_taken++;
+}
+#endif
+
+/**
+ * lpfc_adjust_pvt_pool_count - Adjust private pool count
+ * @phba: pointer to lpfc hba data structure.
+ * @hwqid: belong to which HWQ.
+ *
+ * This routine moves some XRIs from private to public pool when private pool
+ * is not busy.
+ **/
+void lpfc_adjust_pvt_pool_count(struct lpfc_hba *phba, u32 hwqid)
+{
+	struct lpfc_multixri_pool *multixri_pool;
+	u32 io_req_count;
+	u32 prev_io_req_count;
+
+	multixri_pool = phba->sli4_hba.hdwq[hwqid].p_multixri_pool;
+	if (!multixri_pool)
+		return;
+	io_req_count = multixri_pool->io_req_count;
+	prev_io_req_count = multixri_pool->prev_io_req_count;
+
+	if (prev_io_req_count != io_req_count) {
+		/* Private pool is busy */
+		multixri_pool->prev_io_req_count = io_req_count;
+	} else {
+		/* Private pool is not busy.
+		 * Move XRIs from private to public pool.
+		 */
+		lpfc_move_xri_pvt_to_pbl(phba, hwqid);
+	}
+}
+
+/**
+ * lpfc_adjust_high_watermark - Adjust high watermark
+ * @phba: pointer to lpfc hba data structure.
+ * @hwqid: belong to which HWQ.
+ *
+ * This routine sets high watermark as number of outstanding XRIs,
+ * but make sure the new value is between xri_limit/2 and xri_limit.
+ **/
+void lpfc_adjust_high_watermark(struct lpfc_hba *phba, u32 hwqid)
+{
+	u32 new_watermark;
+	u32 watermark_max;
+	u32 watermark_min;
+	u32 xri_limit;
+	u32 txcmplq_cnt;
+	u32 abts_io_bufs;
+	struct lpfc_multixri_pool *multixri_pool;
+	struct lpfc_sli4_hdw_queue *qp;
+
+	qp = &phba->sli4_hba.hdwq[hwqid];
+	multixri_pool = qp->p_multixri_pool;
+	if (!multixri_pool)
+		return;
+	xri_limit = multixri_pool->xri_limit;
+
+	watermark_max = xri_limit;
+	watermark_min = xri_limit / 2;
+
+	txcmplq_cnt = qp->fcp_wq->pring->txcmplq_cnt;
+	abts_io_bufs = qp->abts_scsi_io_bufs;
+	if (qp->nvme_wq) {
+		txcmplq_cnt += qp->nvme_wq->pring->txcmplq_cnt;
+		abts_io_bufs += qp->abts_nvme_io_bufs;
+	}
+
+	new_watermark = txcmplq_cnt + abts_io_bufs;
+	new_watermark = min(watermark_max, new_watermark);
+	new_watermark = max(watermark_min, new_watermark);
+	multixri_pool->pvt_pool.high_watermark = new_watermark;
+
+#ifdef LPFC_MXP_STAT
+	multixri_pool->stat_max_hwm = max(multixri_pool->stat_max_hwm,
+					  new_watermark);
+#endif
+}
+
+/**
+ * lpfc_move_xri_pvt_to_pbl - Move some XRIs from private to public pool
+ * @phba: pointer to lpfc hba data structure.
+ * @hwqid: belong to which HWQ.
+ *
+ * This routine is called from hearbeat timer when pvt_pool is idle.
+ * All free XRIs are moved from private to public pool on hwqid with 2 steps.
+ * The first step moves (all - low_watermark) amount of XRIs.
+ * The second step moves the rest of XRIs.
+ **/
+void lpfc_move_xri_pvt_to_pbl(struct lpfc_hba *phba, u32 hwqid)
+{
+	struct lpfc_pbl_pool *pbl_pool;
+	struct lpfc_pvt_pool *pvt_pool;
+	struct lpfc_sli4_hdw_queue *qp;
+	struct lpfc_io_buf *lpfc_ncmd;
+	struct lpfc_io_buf *lpfc_ncmd_next;
+	unsigned long iflag;
+	struct list_head tmp_list;
+	u32 tmp_count;
+
+	qp = &phba->sli4_hba.hdwq[hwqid];
+	pbl_pool = &qp->p_multixri_pool->pbl_pool;
+	pvt_pool = &qp->p_multixri_pool->pvt_pool;
+	tmp_count = 0;
+
+	lpfc_qp_spin_lock_irqsave(&pbl_pool->lock, iflag, qp, mv_to_pub_pool);
+	lpfc_qp_spin_lock(&pvt_pool->lock, qp, mv_from_pvt_pool);
+
+	if (pvt_pool->count > pvt_pool->low_watermark) {
+		/* Step 1: move (all - low_watermark) from pvt_pool
+		 * to pbl_pool
+		 */
+
+		/* Move low watermark of bufs from pvt_pool to tmp_list */
+		INIT_LIST_HEAD(&tmp_list);
+		list_for_each_entry_safe(lpfc_ncmd, lpfc_ncmd_next,
+					 &pvt_pool->list, list) {
+			list_move_tail(&lpfc_ncmd->list, &tmp_list);
+			tmp_count++;
+			if (tmp_count >= pvt_pool->low_watermark)
+				break;
+		}
+
+		/* Move all bufs from pvt_pool to pbl_pool */
+		list_splice_init(&pvt_pool->list, &pbl_pool->list);
+
+		/* Move all bufs from tmp_list to pvt_pool */
+		list_splice(&tmp_list, &pvt_pool->list);
+
+		pbl_pool->count += (pvt_pool->count - tmp_count);
+		pvt_pool->count = tmp_count;
+	} else {
+		/* Step 2: move the rest from pvt_pool to pbl_pool */
+		list_splice_init(&pvt_pool->list, &pbl_pool->list);
+		pbl_pool->count += pvt_pool->count;
+		pvt_pool->count = 0;
+	}
+
+	spin_unlock(&pvt_pool->lock);
+	spin_unlock_irqrestore(&pbl_pool->lock, iflag);
+}
+
+/**
+ * _lpfc_move_xri_pbl_to_pvt - Move some XRIs from public to private pool
+ * @phba: pointer to lpfc hba data structure
+ * @pbl_pool: specified public free XRI pool
+ * @pvt_pool: specified private free XRI pool
+ * @count: number of XRIs to move
+ *
+ * This routine tries to move some free common bufs from the specified pbl_pool
+ * to the specified pvt_pool. It might move less than count XRIs if there's not
+ * enough in public pool.
+ *
+ * Return:
+ *   true - if XRIs are successfully moved from the specified pbl_pool to the
+ *          specified pvt_pool
+ *   false - if the specified pbl_pool is empty or locked by someone else
+ **/
+static bool
+_lpfc_move_xri_pbl_to_pvt(struct lpfc_hba *phba, struct lpfc_sli4_hdw_queue *qp,
+			  struct lpfc_pbl_pool *pbl_pool,
+			  struct lpfc_pvt_pool *pvt_pool, u32 count)
+{
+	struct lpfc_io_buf *lpfc_ncmd;
+	struct lpfc_io_buf *lpfc_ncmd_next;
+	unsigned long iflag;
+	int ret;
+
+	ret = spin_trylock_irqsave(&pbl_pool->lock, iflag);
+	if (ret) {
+		if (pbl_pool->count) {
+			/* Move a batch of XRIs from public to private pool */
+			lpfc_qp_spin_lock(&pvt_pool->lock, qp, mv_to_pvt_pool);
+			list_for_each_entry_safe(lpfc_ncmd,
+						 lpfc_ncmd_next,
+						 &pbl_pool->list,
+						 list) {
+				list_move_tail(&lpfc_ncmd->list,
+					       &pvt_pool->list);
+				pvt_pool->count++;
+				pbl_pool->count--;
+				count--;
+				if (count == 0)
+					break;
+			}
+
+			spin_unlock(&pvt_pool->lock);
+			spin_unlock_irqrestore(&pbl_pool->lock, iflag);
+			return true;
+		}
+		spin_unlock_irqrestore(&pbl_pool->lock, iflag);
+	}
+
+	return false;
+}
+
+/**
+ * lpfc_move_xri_pbl_to_pvt - Move some XRIs from public to private pool
+ * @phba: pointer to lpfc hba data structure.
+ * @hwqid: belong to which HWQ.
+ * @count: number of XRIs to move
+ *
+ * This routine tries to find some free common bufs in one of public pools with
+ * Round Robin method. The search always starts from local hwqid, then the next
+ * HWQ which was found last time (rrb_next_hwqid). Once a public pool is found,
+ * a batch of free common bufs are moved to private pool on hwqid.
+ * It might move less than count XRIs if there's not enough in public pool.
+ **/
+void lpfc_move_xri_pbl_to_pvt(struct lpfc_hba *phba, u32 hwqid, u32 count)
+{
+	struct lpfc_multixri_pool *multixri_pool;
+	struct lpfc_multixri_pool *next_multixri_pool;
+	struct lpfc_pvt_pool *pvt_pool;
+	struct lpfc_pbl_pool *pbl_pool;
+	struct lpfc_sli4_hdw_queue *qp;
+	u32 next_hwqid;
+	u32 hwq_count;
+	int ret;
+
+	qp = &phba->sli4_hba.hdwq[hwqid];
+	multixri_pool = qp->p_multixri_pool;
+	pvt_pool = &multixri_pool->pvt_pool;
+	pbl_pool = &multixri_pool->pbl_pool;
+
+	/* Check if local pbl_pool is available */
+	ret = _lpfc_move_xri_pbl_to_pvt(phba, qp, pbl_pool, pvt_pool, count);
+	if (ret) {
+#ifdef LPFC_MXP_STAT
+		multixri_pool->local_pbl_hit_count++;
+#endif
+		return;
+	}
+
+	hwq_count = phba->cfg_hdw_queue;
+
+	/* Get the next hwqid which was found last time */
+	next_hwqid = multixri_pool->rrb_next_hwqid;
+
+	do {
+		/* Go to next hwq */
+		next_hwqid = (next_hwqid + 1) % hwq_count;
+
+		next_multixri_pool =
+			phba->sli4_hba.hdwq[next_hwqid].p_multixri_pool;
+		pbl_pool = &next_multixri_pool->pbl_pool;
+
+		/* Check if the public free xri pool is available */
+		ret = _lpfc_move_xri_pbl_to_pvt(
+			phba, qp, pbl_pool, pvt_pool, count);
+
+		/* Exit while-loop if success or all hwqid are checked */
+	} while (!ret && next_hwqid != multixri_pool->rrb_next_hwqid);
+
+	/* Starting point for the next time */
+	multixri_pool->rrb_next_hwqid = next_hwqid;
+
+	if (!ret) {
+		/* stats: all public pools are empty*/
+		multixri_pool->pbl_empty_count++;
+	}
+
+#ifdef LPFC_MXP_STAT
+	if (ret) {
+		if (next_hwqid == hwqid)
+			multixri_pool->local_pbl_hit_count++;
+		else
+			multixri_pool->other_pbl_hit_count++;
+	}
+#endif
+}
+
+/**
+ * lpfc_keep_pvt_pool_above_lowwm - Keep pvt_pool above low watermark
+ * @phba: pointer to lpfc hba data structure.
+ * @qp: belong to which HWQ.
+ *
+ * This routine get a batch of XRIs from pbl_pool if pvt_pool is less than
+ * low watermark.
+ **/
+void lpfc_keep_pvt_pool_above_lowwm(struct lpfc_hba *phba, u32 hwqid)
+{
+	struct lpfc_multixri_pool *multixri_pool;
+	struct lpfc_pvt_pool *pvt_pool;
+
+	multixri_pool = phba->sli4_hba.hdwq[hwqid].p_multixri_pool;
+	pvt_pool = &multixri_pool->pvt_pool;
+
+	if (pvt_pool->count < pvt_pool->low_watermark)
+		lpfc_move_xri_pbl_to_pvt(phba, hwqid, XRI_BATCH);
+}
+
+/**
+ * lpfc_release_io_buf - Return one IO buf back to free pool
+ * @phba: pointer to lpfc hba data structure.
+ * @lpfc_ncmd: IO buf to be returned.
+ * @qp: belong to which HWQ.
+ *
+ * This routine returns one IO buf back to free pool. If this is an urgent IO,
+ * the IO buf is returned to expedite pool. If cfg_xri_rebalancing==1,
+ * the IO buf is returned to pbl_pool or pvt_pool based on watermark and
+ * xri_limit.  If cfg_xri_rebalancing==0, the IO buf is returned to
+ * lpfc_io_buf_list_put.
+ **/
+void lpfc_release_io_buf(struct lpfc_hba *phba, struct lpfc_io_buf *lpfc_ncmd,
+			 struct lpfc_sli4_hdw_queue *qp)
+{
+	unsigned long iflag;
+	struct lpfc_pbl_pool *pbl_pool;
+	struct lpfc_pvt_pool *pvt_pool;
+	struct lpfc_epd_pool *epd_pool;
+	u32 txcmplq_cnt;
+	u32 xri_owned;
+	u32 xri_limit;
+	u32 abts_io_bufs;
+
+	/* MUST zero fields if buffer is reused by another protocol */
+	lpfc_ncmd->nvmeCmd = NULL;
+	lpfc_ncmd->cur_iocbq.wqe_cmpl = NULL;
+	lpfc_ncmd->cur_iocbq.iocb_cmpl = NULL;
+
+	if (phba->cfg_xri_rebalancing) {
+		if (lpfc_ncmd->expedite) {
+			/* Return to expedite pool */
+			epd_pool = &phba->epd_pool;
+			spin_lock_irqsave(&epd_pool->lock, iflag);
+			list_add_tail(&lpfc_ncmd->list, &epd_pool->list);
+			epd_pool->count++;
+			spin_unlock_irqrestore(&epd_pool->lock, iflag);
+			return;
+		}
+
+		/* Avoid invalid access if an IO sneaks in and is being rejected
+		 * just _after_ xri pools are destroyed in lpfc_offline.
+		 * Nothing much can be done at this point.
+		 */
+		if (!qp->p_multixri_pool)
+			return;
+
+		pbl_pool = &qp->p_multixri_pool->pbl_pool;
+		pvt_pool = &qp->p_multixri_pool->pvt_pool;
+
+		txcmplq_cnt = qp->fcp_wq->pring->txcmplq_cnt;
+		abts_io_bufs = qp->abts_scsi_io_bufs;
+		if (qp->nvme_wq) {
+			txcmplq_cnt += qp->nvme_wq->pring->txcmplq_cnt;
+			abts_io_bufs += qp->abts_nvme_io_bufs;
+		}
+
+		xri_owned = pvt_pool->count + txcmplq_cnt + abts_io_bufs;
+		xri_limit = qp->p_multixri_pool->xri_limit;
+
+#ifdef LPFC_MXP_STAT
+		if (xri_owned <= xri_limit)
+			qp->p_multixri_pool->below_limit_count++;
+		else
+			qp->p_multixri_pool->above_limit_count++;
+#endif
+
+		/* XRI goes to either public or private free xri pool
+		 *     based on watermark and xri_limit
+		 */
+		if ((pvt_pool->count < pvt_pool->low_watermark) ||
+		    (xri_owned < xri_limit &&
+		     pvt_pool->count < pvt_pool->high_watermark)) {
+			lpfc_qp_spin_lock_irqsave(&pvt_pool->lock, iflag,
+						  qp, free_pvt_pool);
+			list_add_tail(&lpfc_ncmd->list,
+				      &pvt_pool->list);
+			pvt_pool->count++;
+			spin_unlock_irqrestore(&pvt_pool->lock, iflag);
+		} else {
+			lpfc_qp_spin_lock_irqsave(&pbl_pool->lock, iflag,
+						  qp, free_pub_pool);
+			list_add_tail(&lpfc_ncmd->list,
+				      &pbl_pool->list);
+			pbl_pool->count++;
+			spin_unlock_irqrestore(&pbl_pool->lock, iflag);
+		}
+	} else {
+		lpfc_qp_spin_lock_irqsave(&qp->io_buf_list_put_lock, iflag,
+					  qp, free_xri);
+		list_add_tail(&lpfc_ncmd->list,
+			      &qp->lpfc_io_buf_list_put);
+		qp->put_io_bufs++;
+		spin_unlock_irqrestore(&qp->io_buf_list_put_lock,
+				       iflag);
+	}
+}
+
+/**
+ * lpfc_get_io_buf_from_private_pool - Get one free IO buf from private pool
+ * @phba: pointer to lpfc hba data structure.
+ * @pvt_pool: pointer to private pool data structure.
+ * @ndlp: pointer to lpfc nodelist data structure.
+ *
+ * This routine tries to get one free IO buf from private pool.
+ *
+ * Return:
+ *   pointer to one free IO buf - if private pool is not empty
+ *   NULL - if private pool is empty
+ **/
+static struct lpfc_io_buf *
+lpfc_get_io_buf_from_private_pool(struct lpfc_hba *phba,
+				  struct lpfc_sli4_hdw_queue *qp,
+				  struct lpfc_pvt_pool *pvt_pool,
+				  struct lpfc_nodelist *ndlp)
+{
+	struct lpfc_io_buf *lpfc_ncmd;
+	struct lpfc_io_buf *lpfc_ncmd_next;
+	unsigned long iflag;
+
+	lpfc_qp_spin_lock_irqsave(&pvt_pool->lock, iflag, qp, alloc_pvt_pool);
+	list_for_each_entry_safe(lpfc_ncmd, lpfc_ncmd_next,
+				 &pvt_pool->list, list) {
+		if (lpfc_test_rrq_active(
+			phba, ndlp, lpfc_ncmd->cur_iocbq.sli4_lxritag))
+			continue;
+		list_del(&lpfc_ncmd->list);
+		pvt_pool->count--;
+		spin_unlock_irqrestore(&pvt_pool->lock, iflag);
+		return lpfc_ncmd;
+	}
+	spin_unlock_irqrestore(&pvt_pool->lock, iflag);
+
+	return NULL;
+}
+
+/**
+ * lpfc_get_io_buf_from_expedite_pool - Get one free IO buf from expedite pool
+ * @phba: pointer to lpfc hba data structure.
+ *
+ * This routine tries to get one free IO buf from expedite pool.
+ *
+ * Return:
+ *   pointer to one free IO buf - if expedite pool is not empty
+ *   NULL - if expedite pool is empty
+ **/
+static struct lpfc_io_buf *
+lpfc_get_io_buf_from_expedite_pool(struct lpfc_hba *phba)
+{
+	struct lpfc_io_buf *lpfc_ncmd;
+	struct lpfc_io_buf *lpfc_ncmd_next;
+	unsigned long iflag;
+	struct lpfc_epd_pool *epd_pool;
+
+	epd_pool = &phba->epd_pool;
+	lpfc_ncmd = NULL;
+
+	spin_lock_irqsave(&epd_pool->lock, iflag);
+	if (epd_pool->count > 0) {
+		list_for_each_entry_safe(lpfc_ncmd, lpfc_ncmd_next,
+					 &epd_pool->list, list) {
+			list_del(&lpfc_ncmd->list);
+			epd_pool->count--;
+			break;
+		}
+	}
+	spin_unlock_irqrestore(&epd_pool->lock, iflag);
+
+	return lpfc_ncmd;
+}
+
+/**
+ * lpfc_get_io_buf_from_multixri_pools - Get one free IO bufs
+ * @phba: pointer to lpfc hba data structure.
+ * @ndlp: pointer to lpfc nodelist data structure.
+ * @hwqid: belong to which HWQ
+ * @expedite: 1 means this request is urgent.
+ *
+ * This routine will do the following actions and then return a pointer to
+ * one free IO buf.
+ *
+ * 1. If private free xri count is empty, move some XRIs from public to
+ *    private pool.
+ * 2. Get one XRI from private free xri pool.
+ * 3. If we fail to get one from pvt_pool and this is an expedite request,
+ *    get one free xri from expedite pool.
+ *
+ * Note: ndlp is only used on SCSI side for RRQ testing.
+ *       The caller should pass NULL for ndlp on NVME side.
+ *
+ * Return:
+ *   pointer to one free IO buf - if private pool is not empty
+ *   NULL - if private pool is empty
+ **/
+static struct lpfc_io_buf *
+lpfc_get_io_buf_from_multixri_pools(struct lpfc_hba *phba,
+				    struct lpfc_nodelist *ndlp,
+				    int hwqid, int expedite)
+{
+	struct lpfc_sli4_hdw_queue *qp;
+	struct lpfc_multixri_pool *multixri_pool;
+	struct lpfc_pvt_pool *pvt_pool;
+	struct lpfc_io_buf *lpfc_ncmd;
+
+	qp = &phba->sli4_hba.hdwq[hwqid];
+	lpfc_ncmd = NULL;
+	multixri_pool = qp->p_multixri_pool;
+	pvt_pool = &multixri_pool->pvt_pool;
+	multixri_pool->io_req_count++;
+
+	/* If pvt_pool is empty, move some XRIs from public to private pool */
+	if (pvt_pool->count == 0)
+		lpfc_move_xri_pbl_to_pvt(phba, hwqid, XRI_BATCH);
+
+	/* Get one XRI from private free xri pool */
+	lpfc_ncmd = lpfc_get_io_buf_from_private_pool(phba, qp, pvt_pool, ndlp);
+
+	if (lpfc_ncmd) {
+		lpfc_ncmd->hdwq = qp;
+		lpfc_ncmd->hdwq_no = hwqid;
+	} else if (expedite) {
+		/* If we fail to get one from pvt_pool and this is an expedite
+		 * request, get one free xri from expedite pool.
+		 */
+		lpfc_ncmd = lpfc_get_io_buf_from_expedite_pool(phba);
+	}
+
+	return lpfc_ncmd;
+}
+
+static inline struct lpfc_io_buf *
+lpfc_io_buf(struct lpfc_hba *phba, struct lpfc_nodelist *ndlp, int idx)
+{
+	struct lpfc_sli4_hdw_queue *qp;
+	struct lpfc_io_buf *lpfc_cmd, *lpfc_cmd_next;
+
+	qp = &phba->sli4_hba.hdwq[idx];
+	list_for_each_entry_safe(lpfc_cmd, lpfc_cmd_next,
+				 &qp->lpfc_io_buf_list_get, list) {
+		if (lpfc_test_rrq_active(phba, ndlp,
+					 lpfc_cmd->cur_iocbq.sli4_lxritag))
+			continue;
+
+		if (lpfc_cmd->flags & LPFC_SBUF_NOT_POSTED)
+			continue;
+
+		list_del_init(&lpfc_cmd->list);
+		qp->get_io_bufs--;
+		lpfc_cmd->hdwq = qp;
+		lpfc_cmd->hdwq_no = idx;
+		return lpfc_cmd;
+	}
+	return NULL;
+}
+
+/**
+ * lpfc_get_io_buf - Get one IO buffer from free pool
+ * @phba: The HBA for which this call is being executed.
+ * @ndlp: pointer to lpfc nodelist data structure.
+ * @hwqid: belong to which HWQ
+ * @expedite: 1 means this request is urgent.
+ *
+ * This routine gets one IO buffer from free pool. If cfg_xri_rebalancing==1,
+ * removes a IO buffer from multiXRI pools. If cfg_xri_rebalancing==0, removes
+ * a IO buffer from head of @hdwq io_buf_list and returns to caller.
+ *
+ * Note: ndlp is only used on SCSI side for RRQ testing.
+ *       The caller should pass NULL for ndlp on NVME side.
+ *
+ * Return codes:
+ *   NULL - Error
+ *   Pointer to lpfc_io_buf - Success
+ **/
+struct lpfc_io_buf *lpfc_get_io_buf(struct lpfc_hba *phba,
+				    struct lpfc_nodelist *ndlp,
+				    u32 hwqid, int expedite)
+{
+	struct lpfc_sli4_hdw_queue *qp;
+	unsigned long iflag;
+	struct lpfc_io_buf *lpfc_cmd;
+
+	qp = &phba->sli4_hba.hdwq[hwqid];
+	lpfc_cmd = NULL;
+
+	if (phba->cfg_xri_rebalancing)
+		lpfc_cmd = lpfc_get_io_buf_from_multixri_pools(
+			phba, ndlp, hwqid, expedite);
+	else {
+		lpfc_qp_spin_lock_irqsave(&qp->io_buf_list_get_lock, iflag,
+					  qp, alloc_xri_get);
+		if (qp->get_io_bufs > LPFC_NVME_EXPEDITE_XRICNT || expedite)
+			lpfc_cmd = lpfc_io_buf(phba, ndlp, hwqid);
+		if (!lpfc_cmd) {
+			lpfc_qp_spin_lock(&qp->io_buf_list_put_lock,
+					  qp, alloc_xri_put);
+			list_splice(&qp->lpfc_io_buf_list_put,
+				    &qp->lpfc_io_buf_list_get);
+			qp->get_io_bufs += qp->put_io_bufs;
+			INIT_LIST_HEAD(&qp->lpfc_io_buf_list_put);
+			qp->put_io_bufs = 0;
+			spin_unlock(&qp->io_buf_list_put_lock);
+			if (qp->get_io_bufs > LPFC_NVME_EXPEDITE_XRICNT ||
+			    expedite)
+				lpfc_cmd = lpfc_io_buf(phba, ndlp, hwqid);
+		}
+		spin_unlock_irqrestore(&qp->io_buf_list_get_lock, iflag);
+	}
+
+	return lpfc_cmd;
+}