11 files changed, 831 insertions, 343 deletions
diff --git a/drivers/scsi/lpfc/lpfc.h b/drivers/scsi/lpfc/lpfc.h
index 0f8964fdfecf..9fd2811ffa8b 100644
--- a/drivers/scsi/lpfc/lpfc.h
+++ b/drivers/scsi/lpfc/lpfc.h
@@ -84,8 +84,6 @@ struct lpfc_sli2_slim;
 #define LPFC_HB_MBOX_INTERVAL   5	/* Heart beat interval in seconds. */
 #define LPFC_HB_MBOX_TIMEOUT    30	/* Heart beat timeout  in seconds. */
 
-#define LPFC_LOOK_AHEAD_OFF	0	/* Look ahead logic is turned off */
-
 /* Error Attention event polling interval */
 #define LPFC_ERATT_POLL_INTERVAL	5 /* EATT poll interval in seconds */
 
@@ -821,6 +819,7 @@ struct lpfc_hba {
 	uint32_t cfg_fcp_imax;
 	uint32_t cfg_fcp_cpu_map;
 	uint32_t cfg_hdw_queue;
+	uint32_t cfg_irq_chann;
 	uint32_t cfg_suppress_rsp;
 	uint32_t cfg_nvme_oas;
 	uint32_t cfg_nvme_embed_cmd;
@@ -1042,6 +1041,9 @@ struct lpfc_hba {
 	struct dentry *debug_nvmeio_trc;
 	struct lpfc_debugfs_nvmeio_trc *nvmeio_trc;
 	struct dentry *debug_hdwqinfo;
+#ifdef LPFC_HDWQ_LOCK_STAT
+	struct dentry *debug_lockstat;
+#endif
 	atomic_t nvmeio_trc_cnt;
 	uint32_t nvmeio_trc_size;
 	uint32_t nvmeio_trc_output_idx;
@@ -1161,6 +1163,7 @@ struct lpfc_hba {
 #define LPFC_CHECK_NVME_IO	1
 #define LPFC_CHECK_NVMET_RCV	2
 #define LPFC_CHECK_NVMET_IO	4
+#define LPFC_CHECK_SCSI_IO	8
 	uint16_t ktime_on;
 	uint64_t ktime_data_samples;
 	uint64_t ktime_status_samples;
diff --git a/drivers/scsi/lpfc/lpfc_attr.c b/drivers/scsi/lpfc/lpfc_attr.c
index 787812dd57a9..fc7f80d68638 100644
--- a/drivers/scsi/lpfc/lpfc_attr.c
+++ b/drivers/scsi/lpfc/lpfc_attr.c
@@ -4958,7 +4958,7 @@ lpfc_fcp_imax_store(struct device *dev, struct device_attribute *attr,
 	phba->cfg_fcp_imax = (uint32_t)val;
 	phba->initial_imax = phba->cfg_fcp_imax;
 
-	for (i = 0; i < phba->cfg_hdw_queue; i += LPFC_MAX_EQ_DELAY_EQID_CNT)
+	for (i = 0; i < phba->cfg_irq_chann; i += LPFC_MAX_EQ_DELAY_EQID_CNT)
 		lpfc_modify_hba_eq_delay(phba, i, LPFC_MAX_EQ_DELAY_EQID_CNT,
 					 val);
 
@@ -5059,13 +5059,6 @@ lpfc_fcp_cpu_map_show(struct device *dev, struct device_attribute *attr,
 				phba->cfg_fcp_cpu_map,
 				phba->sli4_hba.num_online_cpu);
 		break;
-	case 2:
-		len += snprintf(buf + len, PAGE_SIZE-len,
-				"fcp_cpu_map: Driver centric mapping (%d): "
-				"%d online CPUs\n",
-				phba->cfg_fcp_cpu_map,
-				phba->sli4_hba.num_online_cpu);
-		break;
 	}
 
 	while (phba->sli4_hba.curr_disp_cpu < phba->sli4_hba.num_present_cpu) {
@@ -5076,35 +5069,35 @@ lpfc_fcp_cpu_map_show(struct device *dev, struct device_attribute *attr,
 				len += snprintf(
 					buf + len, PAGE_SIZE - len,
 					"CPU %02d hdwq None "
-					"physid %d coreid %d\n",
+					"physid %d coreid %d ht %d\n",
 					phba->sli4_hba.curr_disp_cpu,
 					cpup->phys_id,
-					cpup->core_id);
+					cpup->core_id, cpup->hyper);
 			else
 				len += snprintf(
 					buf + len, PAGE_SIZE - len,
-					"CPU %02d hdwq %04d "
-					"physid %d coreid %d\n",
+					"CPU %02d EQ %04d hdwq %04d "
+					"physid %d coreid %d ht %d\n",
 					phba->sli4_hba.curr_disp_cpu,
-					cpup->hdwq, cpup->phys_id,
-					cpup->core_id);
+					cpup->eq, cpup->hdwq, cpup->phys_id,
+					cpup->core_id, cpup->hyper);
 		} else {
 			if (cpup->hdwq == LPFC_VECTOR_MAP_EMPTY)
 				len += snprintf(
 					buf + len, PAGE_SIZE - len,
 					"CPU %02d hdwq None "
-					"physid %d coreid %d IRQ %d\n",
+					"physid %d coreid %d ht %d IRQ %d\n",
 					phba->sli4_hba.curr_disp_cpu,
 					cpup->phys_id,
-					cpup->core_id, cpup->irq);
+					cpup->core_id, cpup->hyper, cpup->irq);
 			else
 				len += snprintf(
 					buf + len, PAGE_SIZE - len,
-					"CPU %02d hdwq %04d "
-					"physid %d coreid %d IRQ %d\n",
+					"CPU %02d EQ %04d hdwq %04d "
+					"physid %d coreid %d ht %d IRQ %d\n",
 					phba->sli4_hba.curr_disp_cpu,
-					cpup->hdwq, cpup->phys_id,
-					cpup->core_id, cpup->irq);
+					cpup->eq, cpup->hdwq, cpup->phys_id,
+					cpup->core_id, cpup->hyper, cpup->irq);
 		}
 
 		phba->sli4_hba.curr_disp_cpu++;
@@ -5146,14 +5139,13 @@ lpfc_fcp_cpu_map_store(struct device *dev, struct device_attribute *attr,
 # lpfc_fcp_cpu_map: Defines how to map CPUs to IRQ vectors
 # for the HBA.
 #
-# Value range is [0 to 2]. Default value is LPFC_DRIVER_CPU_MAP (2).
+# Value range is [0 to 1]. Default value is LPFC_HBA_CPU_MAP (1).
 #	0 - Do not affinitze IRQ vectors
 #	1 - Affintize HBA vectors with respect to each HBA
 #	    (start with CPU0 for each HBA)
-#	2 - Affintize HBA vectors with respect to the entire driver
-#	    (round robin thru all CPUs across all HBAs)
+# This also defines how Hardware Queues are mapped to specific CPUs.
 */
-static int lpfc_fcp_cpu_map = LPFC_DRIVER_CPU_MAP;
+static int lpfc_fcp_cpu_map = LPFC_HBA_CPU_MAP;
 module_param(lpfc_fcp_cpu_map, int, S_IRUGO|S_IWUSR);
 MODULE_PARM_DESC(lpfc_fcp_cpu_map,
 		 "Defines how to map CPUs to IRQ vectors per HBA");
@@ -5187,7 +5179,7 @@ lpfc_fcp_cpu_map_init(struct lpfc_hba *phba, int val)
 	lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
 			"3326 lpfc_fcp_cpu_map: %d out of range, using "
 			"default\n", val);
-	phba->cfg_fcp_cpu_map = LPFC_DRIVER_CPU_MAP;
+	phba->cfg_fcp_cpu_map = LPFC_HBA_CPU_MAP;
 
 	return 0;
 }
@@ -5308,7 +5300,7 @@ LPFC_ATTR_R(xri_rebalancing, 1, 0, 1, "Enable/Disable XRI rebalancing");
  * CPU. Otherwise, the default 0 (Round Robin) scheduling of FCP/NVME I/Os
  * through WQs will be used.
  */
-LPFC_ATTR_RW(fcp_io_sched, LPFC_FCP_SCHED_BY_HDWQ,
+LPFC_ATTR_RW(fcp_io_sched, LPFC_FCP_SCHED_BY_CPU,
 	     LPFC_FCP_SCHED_BY_HDWQ,
 	     LPFC_FCP_SCHED_BY_CPU,
 	     "Determine scheduling algorithm for "
@@ -5474,18 +5466,18 @@ LPFC_ATTR_RW(nvme_embed_cmd, 1, 0, 2,
 	     "Embed NVME Command in WQE");
 
 /*
- * lpfc_hdw_queue: Set the number of IO channels the driver
+ * lpfc_hdw_queue: Set the number of Hardware Queues the driver
  * will advertise it supports to the NVME and  SCSI layers. This also
- * will map to the number of EQ/CQ/WQs the driver will create.
+ * will map to the number of CQ/WQ pairs the driver will create.
  *
  * The NVME Layer will try to create this many, plus 1 administrative
  * hardware queue. The administrative queue will always map to WQ 0
- * A hardware IO queue maps (qidx) to a specific driver WQ.
+ * A hardware IO queue maps (qidx) to a specific driver CQ/WQ.
  *
  *      0    = Configure the number of hdw queues to the number of active CPUs.
- *      1,64 = Manually specify how many hdw queues to use.
+ *      1,128 = Manually specify how many hdw queues to use.
  *
- * Value range is [0,64]. Default value is 0.
+ * Value range is [0,128]. Default value is 0.
  */
 LPFC_ATTR_R(hdw_queue,
 	    LPFC_HBA_HDWQ_DEF,
@@ -5493,6 +5485,22 @@ LPFC_ATTR_R(hdw_queue,
 	    "Set the number of I/O Hardware Queues");
 
 /*
+ * lpfc_irq_chann: Set the number of IRQ vectors that are available
+ * for Hardware Queues to utilize.  This also will map to the number
+ * of EQ / MSI-X vectors the driver will create. This should never be
+ * more than the number of Hardware Queues
+ *
+ *      0     = Configure number of IRQ Channels to the number of active CPUs.
+ *      1,128 = Manually specify how many IRQ Channels to use.
+ *
+ * Value range is [0,128]. Default value is 0.
+ */
+LPFC_ATTR_R(irq_chann,
+	    LPFC_HBA_HDWQ_DEF,
+	    LPFC_HBA_HDWQ_MIN, LPFC_HBA_HDWQ_MAX,
+	    "Set the number of I/O IRQ Channels");
+
+/*
 # lpfc_enable_hba_reset: Allow or prevent HBA resets to the hardware.
 #       0  = HBA resets disabled
 #       1  = HBA resets enabled (default)
@@ -5533,16 +5541,6 @@ LPFC_ATTR_RW(XLanePriority, 0, 0x0, 0x7f, "CS_CTL for Express Lane Feature.");
 LPFC_ATTR_R(enable_bg, 0, 0, 1, "Enable BlockGuard Support");
 
 /*
-# lpfc_fcp_look_ahead: Look ahead for completions in FCP start routine
-#       0  = disabled (default)
-#       1  = enabled
-# Value range is [0,1]. Default value is 0.
-#
-# This feature in under investigation and may be supported in the future.
-*/
-unsigned int lpfc_fcp_look_ahead = LPFC_LOOK_AHEAD_OFF;
-
-/*
 # lpfc_prot_mask: i
 #	- Bit mask of host protection capabilities used to register with the
 #	  SCSI mid-layer
@@ -5788,6 +5786,7 @@ struct device_attribute *lpfc_hba_attrs[] = {
 	&dev_attr_lpfc_fcp_imax,
 	&dev_attr_lpfc_fcp_cpu_map,
 	&dev_attr_lpfc_hdw_queue,
+	&dev_attr_lpfc_irq_chann,
 	&dev_attr_lpfc_suppress_rsp,
 	&dev_attr_lpfc_nvmet_mrq,
 	&dev_attr_lpfc_nvmet_mrq_post,
@@ -6867,6 +6866,7 @@ lpfc_get_cfgparam(struct lpfc_hba *phba)
 	lpfc_nvme_enable_fb_init(phba, lpfc_nvme_enable_fb);
 	lpfc_nvmet_fb_size_init(phba, lpfc_nvmet_fb_size);
 	lpfc_hdw_queue_init(phba, lpfc_hdw_queue);
+	lpfc_irq_chann_init(phba, lpfc_irq_chann);
 	lpfc_enable_bbcr_init(phba, lpfc_enable_bbcr);
 	lpfc_enable_dpp_init(phba, lpfc_enable_dpp);
 
@@ -6891,6 +6891,10 @@ lpfc_get_cfgparam(struct lpfc_hba *phba)
 	/* A value of 0 means use the number of CPUs found in the system */
 	if (phba->cfg_hdw_queue == 0)
 		phba->cfg_hdw_queue = phba->sli4_hba.num_present_cpu;
+	if (phba->cfg_irq_chann == 0)
+		phba->cfg_irq_chann = phba->sli4_hba.num_present_cpu;
+	if (phba->cfg_irq_chann > phba->cfg_hdw_queue)
+		phba->cfg_irq_chann = phba->cfg_hdw_queue;
 
 	phba->cfg_soft_wwnn = 0L;
 	phba->cfg_soft_wwpn = 0L;
@@ -6933,6 +6937,10 @@ lpfc_nvme_mod_param_dep(struct lpfc_hba *phba)
 {
 	if (phba->cfg_hdw_queue > phba->sli4_hba.num_present_cpu)
 		phba->cfg_hdw_queue = phba->sli4_hba.num_present_cpu;
+	if (phba->cfg_irq_chann > phba->sli4_hba.num_present_cpu)
+		phba->cfg_irq_chann = phba->sli4_hba.num_present_cpu;
+	if (phba->cfg_irq_chann > phba->cfg_hdw_queue)
+		phba->cfg_irq_chann = phba->cfg_hdw_queue;
 
 	if (phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME &&
 	    phba->nvmet_support) {
@@ -6953,11 +6961,11 @@ lpfc_nvme_mod_param_dep(struct lpfc_hba *phba)
 		}
 
 		if (!phba->cfg_nvmet_mrq)
-			phba->cfg_nvmet_mrq = phba->cfg_hdw_queue;
+			phba->cfg_nvmet_mrq = phba->cfg_irq_chann;
 
 		/* Adjust lpfc_nvmet_mrq to avoid running out of WQE slots */
-		if (phba->cfg_nvmet_mrq > phba->cfg_hdw_queue) {
-			phba->cfg_nvmet_mrq = phba->cfg_hdw_queue;
+		if (phba->cfg_nvmet_mrq > phba->cfg_irq_chann) {
+			phba->cfg_nvmet_mrq = phba->cfg_irq_chann;
 			lpfc_printf_log(phba, KERN_ERR, LOG_NVME_DISC,
 					"6018 Adjust lpfc_nvmet_mrq to %d\n",
 					phba->cfg_nvmet_mrq);
diff --git a/drivers/scsi/lpfc/lpfc_crtn.h b/drivers/scsi/lpfc/lpfc_crtn.h
index 726cd6a7c452..982401c31c12 100644
--- a/drivers/scsi/lpfc/lpfc_crtn.h
+++ b/drivers/scsi/lpfc/lpfc_crtn.h
@@ -440,7 +440,6 @@ extern spinlock_t _dump_buf_lock;
 extern int _dump_buf_done;
 extern spinlock_t pgcnt_lock;
 extern unsigned int pgcnt;
-extern unsigned int lpfc_fcp_look_ahead;
 
 /* Interface exported by fabric iocb scheduler */
 void lpfc_fabric_abort_nport(struct lpfc_nodelist *);
diff --git a/drivers/scsi/lpfc/lpfc_debugfs.c b/drivers/scsi/lpfc/lpfc_debugfs.c
index 2a2c46766eb6..72076b2cd4ff 100644
--- a/drivers/scsi/lpfc/lpfc_debugfs.c
+++ b/drivers/scsi/lpfc/lpfc_debugfs.c
@@ -378,6 +378,67 @@ skipit:
 	return len;
 }
 
+static int lpfc_debugfs_last_xripool;
+
+/**
+ * lpfc_debugfs_common_xri_data - Dump Hardware Queue info to a buffer
+ * @phba: The HBA to gather host buffer info from.
+ * @buf: The buffer to dump log into.
+ * @size: The maximum amount of data to process.
+ *
+ * Description:
+ * This routine dumps the Hardware Queue info from the @phba to @buf up to
+ * @size number of bytes. A header that describes the current hdwq state will be
+ * dumped to @buf first and then info on each hdwq entry will be dumped to @buf
+ * until @size bytes have been dumped or all the hdwq info has been dumped.
+ *
+ * Notes:
+ * This routine will rotate through each configured Hardware Queue each
+ * time called.
+ *
+ * Return Value:
+ * This routine returns the amount of bytes that were dumped into @buf and will
+ * not exceed @size.
+ **/
+static int
+lpfc_debugfs_commonxripools_data(struct lpfc_hba *phba, char *buf, int size)
+{
+	struct lpfc_sli4_hdw_queue *qp;
+	int len = 0;
+	int i, out;
+	unsigned long iflag;
+
+	for (i = 0; i < phba->cfg_hdw_queue; i++) {
+		if (len > (LPFC_DUMP_MULTIXRIPOOL_SIZE - 80))
+			break;
+		qp = &phba->sli4_hba.hdwq[lpfc_debugfs_last_xripool];
+
+		len +=  snprintf(buf + len, size - len, "HdwQ %d Info ", i);
+		spin_lock_irqsave(&qp->abts_scsi_buf_list_lock, iflag);
+		spin_lock(&qp->abts_nvme_buf_list_lock);
+		spin_lock(&qp->io_buf_list_get_lock);
+		spin_lock(&qp->io_buf_list_put_lock);
+		out = qp->total_io_bufs - (qp->get_io_bufs + qp->put_io_bufs +
+			qp->abts_scsi_io_bufs + qp->abts_nvme_io_bufs);
+		len +=  snprintf(buf + len, size - len,
+				 "tot:%d get:%d put:%d mt:%d "
+				 "ABTS scsi:%d nvme:%d Out:%d\n",
+			qp->total_io_bufs, qp->get_io_bufs, qp->put_io_bufs,
+			qp->empty_io_bufs, qp->abts_scsi_io_bufs,
+			qp->abts_nvme_io_bufs, out);
+		spin_unlock(&qp->io_buf_list_put_lock);
+		spin_unlock(&qp->io_buf_list_get_lock);
+		spin_unlock(&qp->abts_nvme_buf_list_lock);
+		spin_unlock_irqrestore(&qp->abts_scsi_buf_list_lock, iflag);
+
+		lpfc_debugfs_last_xripool++;
+		if (lpfc_debugfs_last_xripool >= phba->cfg_hdw_queue)
+			lpfc_debugfs_last_xripool = 0;
+	}
+
+	return len;
+}
+
 /**
  * lpfc_debugfs_multixripools_data - Display multi-XRI pools information
  * @phba: The HBA to gather host buffer info from.
@@ -405,6 +466,17 @@ lpfc_debugfs_multixripools_data(struct lpfc_hba *phba, char *buf, int size)
 	u32 txcmplq_cnt;
 	char tmp[LPFC_DEBUG_OUT_LINE_SZ] = {0};
 
+	if (phba->sli_rev != LPFC_SLI_REV4)
+		return 0;
+
+	if (!phba->sli4_hba.hdwq)
+		return 0;
+
+	if (!phba->cfg_xri_rebalancing) {
+		i = lpfc_debugfs_commonxripools_data(phba, buf, size);
+		return i;
+	}
+
 	/*
 	 * Pbl: Current number of free XRIs in public pool
 	 * Pvt: Current number of free XRIs in private pool
@@ -498,10 +570,12 @@ lpfc_debugfs_multixripools_data(struct lpfc_hba *phba, char *buf, int size)
 	return strnlen(buf, size);
 }
 
-static int lpfc_debugfs_last_hdwq;
+
+#ifdef LPFC_HDWQ_LOCK_STAT
+static int lpfc_debugfs_last_lock;
 
 /**
- * lpfc_debugfs_hdwqinfo_data - Dump Hardware Queue info to a buffer
+ * lpfc_debugfs_lockstat_data - Dump Hardware Queue info to a buffer
  * @phba: The HBA to gather host buffer info from.
  * @buf: The buffer to dump log into.
  * @size: The maximum amount of data to process.
@@ -521,12 +595,11 @@ static int lpfc_debugfs_last_hdwq;
  * not exceed @size.
  **/
 static int
-lpfc_debugfs_hdwqinfo_data(struct lpfc_hba *phba, char *buf, int size)
+lpfc_debugfs_lockstat_data(struct lpfc_hba *phba, char *buf, int size)
 {
 	struct lpfc_sli4_hdw_queue *qp;
 	int len = 0;
-	int i, out;
-	unsigned long iflag;
+	int i;
 
 	if (phba->sli_rev != LPFC_SLI_REV4)
 		return 0;
@@ -535,35 +608,40 @@ lpfc_debugfs_hdwqinfo_data(struct lpfc_hba *phba, char *buf, int size)
 		return 0;
 
 	for (i = 0; i < phba->cfg_hdw_queue; i++) {
-		if (len > (LPFC_HDWQINFO_SIZE - 80))
+		if (len > (LPFC_HDWQINFO_SIZE - 100))
 			break;
-		qp = &phba->sli4_hba.hdwq[lpfc_debugfs_last_hdwq];
+		qp = &phba->sli4_hba.hdwq[lpfc_debugfs_last_lock];
 
-		len +=  snprintf(buf + len, size - len, "HdwQ %d Info ", i);
-		spin_lock_irqsave(&qp->abts_scsi_buf_list_lock, iflag);
-		spin_lock(&qp->abts_nvme_buf_list_lock);
-		spin_lock(&qp->io_buf_list_get_lock);
-		spin_lock(&qp->io_buf_list_put_lock);
-		out = qp->total_io_bufs - (qp->get_io_bufs + qp->put_io_bufs +
-			qp->abts_scsi_io_bufs + qp->abts_nvme_io_bufs);
-		len +=  snprintf(buf + len, size - len,
-				 "tot:%d get:%d put:%d mt:%d "
-				 "ABTS scsi:%d nvme:%d Out:%d\n",
-			qp->total_io_bufs, qp->get_io_bufs, qp->put_io_bufs,
-			qp->empty_io_bufs, qp->abts_scsi_io_bufs,
-			qp->abts_nvme_io_bufs, out);
-		spin_unlock(&qp->io_buf_list_put_lock);
-		spin_unlock(&qp->io_buf_list_get_lock);
-		spin_unlock(&qp->abts_nvme_buf_list_lock);
-		spin_unlock_irqrestore(&qp->abts_scsi_buf_list_lock, iflag);
+		len +=  snprintf(buf + len, size - len, "HdwQ %03d Lock ", i);
+		if (phba->cfg_xri_rebalancing) {
+			len +=  snprintf(buf + len, size - len,
+					 "get_pvt:%d mv_pvt:%d "
+					 "mv2pub:%d mv2pvt:%d "
+					 "put_pvt:%d put_pub:%d wq:%d\n",
+					 qp->lock_conflict.alloc_pvt_pool,
+					 qp->lock_conflict.mv_from_pvt_pool,
+					 qp->lock_conflict.mv_to_pub_pool,
+					 qp->lock_conflict.mv_to_pvt_pool,
+					 qp->lock_conflict.free_pvt_pool,
+					 qp->lock_conflict.free_pub_pool,
+					 qp->lock_conflict.wq_access);
+		} else {
+			len +=  snprintf(buf + len, size - len,
+					 "get:%d put:%d free:%d wq:%d\n",
+					 qp->lock_conflict.alloc_xri_get,
+					 qp->lock_conflict.alloc_xri_put,
+					 qp->lock_conflict.free_xri,
+					 qp->lock_conflict.wq_access);
+		}
 
-		lpfc_debugfs_last_hdwq++;
-		if (lpfc_debugfs_last_hdwq >= phba->cfg_hdw_queue)
-			lpfc_debugfs_last_hdwq = 0;
+		lpfc_debugfs_last_lock++;
+		if (lpfc_debugfs_last_lock >= phba->cfg_hdw_queue)
+			lpfc_debugfs_last_lock = 0;
 	}
 
 	return len;
 }
+#endif
 
 static int lpfc_debugfs_last_hba_slim_off;
 
@@ -964,7 +1042,7 @@ lpfc_debugfs_nvmestat_data(struct lpfc_vport *vport, char *buf, int size)
 	struct lpfc_nvme_lport *lport;
 	uint64_t data1, data2, data3;
 	uint64_t tot, totin, totout;
-	int cnt, i, maxch;
+	int cnt, i;
 	int len = 0;
 
 	if (phba->nvmet_support) {
@@ -1106,10 +1184,6 @@ lpfc_debugfs_nvmestat_data(struct lpfc_vport *vport, char *buf, int size)
 				atomic_read(&lport->fc4NvmeLsRequests),
 				atomic_read(&lport->fc4NvmeLsCmpls));
 
-		if (phba->cfg_hdw_queue < LPFC_HBA_HDWQ_MAX)
-			maxch = phba->cfg_hdw_queue;
-		else
-			maxch = LPFC_HBA_HDWQ_MAX;
 		totin = 0;
 		totout = 0;
 		for (i = 0; i < phba->cfg_hdw_queue; i++) {
@@ -1547,7 +1621,7 @@ lpfc_debugfs_cpucheck_data(struct lpfc_vport *vport, char *buf, int size)
 {
 	struct lpfc_hba   *phba = vport->phba;
 	struct lpfc_sli4_hdw_queue *qp;
-	int i, j;
+	int i, j, max_cnt;
 	int len = 0;
 	uint32_t tot_xmt;
 	uint32_t tot_rcv;
@@ -1565,6 +1639,7 @@ lpfc_debugfs_cpucheck_data(struct lpfc_vport *vport, char *buf, int size)
 	} else {
 		len += snprintf(buf + len, PAGE_SIZE - len, "\n");
 	}
+	max_cnt = size - LPFC_DEBUG_OUT_LINE_SZ;
 
 	for (i = 0; i < phba->cfg_hdw_queue; i++) {
 		qp = &phba->sli4_hba.hdwq[i];
@@ -1606,6 +1681,11 @@ lpfc_debugfs_cpucheck_data(struct lpfc_vport *vport, char *buf, int size)
 		}
 		len += snprintf(buf + len, PAGE_SIZE - len,
 				"Total: %x\n", tot_xmt);
+		if (len >= max_cnt) {
+			len += snprintf(buf + len, PAGE_SIZE - len,
+					"Truncated ...\n");
+			return len;
+		}
 	}
 	return len;
 }
@@ -1904,11 +1984,8 @@ lpfc_debugfs_multixripools_open(struct inode *inode, struct file *file)
 		goto out;
 	}
 
-	if (phba->cfg_xri_rebalancing)
-		debug->len = lpfc_debugfs_multixripools_data(
-			phba, debug->buffer, LPFC_DUMP_MULTIXRIPOOL_SIZE);
-	else
-		debug->len = 0;
+	debug->len = lpfc_debugfs_multixripools_data(
+		phba, debug->buffer, LPFC_DUMP_MULTIXRIPOOL_SIZE);
 
 	debug->i_private = inode->i_private;
 	file->private_data = debug;
@@ -1918,8 +1995,9 @@ out:
 	return rc;
 }
 
+#ifdef LPFC_HDWQ_LOCK_STAT
 /**
- * lpfc_debugfs_hdwqinfo_open - Open the hdwqinfo debugfs buffer
+ * lpfc_debugfs_lockstat_open - Open the lockstat debugfs buffer
  * @inode: The inode pointer that contains a vport pointer.
  * @file: The file pointer to attach the log output.
  *
@@ -1934,7 +2012,7 @@ out:
  * error value.
  **/
 static int
-lpfc_debugfs_hdwqinfo_open(struct inode *inode, struct file *file)
+lpfc_debugfs_lockstat_open(struct inode *inode, struct file *file)
 {
 	struct lpfc_hba *phba = inode->i_private;
 	struct lpfc_debug *debug;
@@ -1951,7 +2029,7 @@ lpfc_debugfs_hdwqinfo_open(struct inode *inode, struct file *file)
 		goto out;
 	}
 
-	debug->len = lpfc_debugfs_hdwqinfo_data(phba, debug->buffer,
+	debug->len = lpfc_debugfs_lockstat_data(phba, debug->buffer,
 		LPFC_HBQINFO_SIZE);
 	file->private_data = debug;
 
@@ -1960,6 +2038,48 @@ out:
 	return rc;
 }
 
+static ssize_t
+lpfc_debugfs_lockstat_write(struct file *file, const char __user *buf,
+			    size_t nbytes, loff_t *ppos)
+{
+	struct lpfc_debug *debug = file->private_data;
+	struct lpfc_hba *phba = (struct lpfc_hba *)debug->i_private;
+	struct lpfc_sli4_hdw_queue *qp;
+	char mybuf[64];
+	char *pbuf;
+	int i;
+
+	/* Protect copy from user */
+	if (!access_ok(buf, nbytes))
+		return -EFAULT;
+
+	memset(mybuf, 0, sizeof(mybuf));
+
+	if (copy_from_user(mybuf, buf, nbytes))
+		return -EFAULT;
+	pbuf = &mybuf[0];
+
+	if ((strncmp(pbuf, "reset", strlen("reset")) == 0) ||
+	    (strncmp(pbuf, "zero", strlen("zero")) == 0)) {
+		for (i = 0; i < phba->cfg_hdw_queue; i++) {
+			qp = &phba->sli4_hba.hdwq[i];
+			qp->lock_conflict.alloc_xri_get = 0;
+			qp->lock_conflict.alloc_xri_put = 0;
+			qp->lock_conflict.free_xri = 0;
+			qp->lock_conflict.wq_access = 0;
+			qp->lock_conflict.alloc_pvt_pool = 0;
+			qp->lock_conflict.mv_from_pvt_pool = 0;
+			qp->lock_conflict.mv_to_pub_pool = 0;
+			qp->lock_conflict.mv_to_pvt_pool = 0;
+			qp->lock_conflict.free_pvt_pool = 0;
+			qp->lock_conflict.free_pub_pool = 0;
+			qp->lock_conflict.wq_access = 0;
+		}
+	}
+	return nbytes;
+}
+#endif
+
 /**
  * lpfc_debugfs_dumpHBASlim_open - Open the Dump HBA SLIM debugfs buffer
  * @inode: The inode pointer that contains a vport pointer.
@@ -2816,7 +2936,7 @@ lpfc_debugfs_cpucheck_open(struct inode *inode, struct file *file)
 	}
 
 	debug->len = lpfc_debugfs_cpucheck_data(vport, debug->buffer,
-		LPFC_NVMEKTIME_SIZE);
+		LPFC_CPUCHECK_SIZE);
 
 	debug->i_private = inode->i_private;
 	file->private_data = debug;
@@ -2851,8 +2971,18 @@ lpfc_debugfs_cpucheck_write(struct file *file, const char __user *buf,
 		if (phba->nvmet_support)
 			phba->cpucheck_on |= LPFC_CHECK_NVMET_IO;
 		else
+			phba->cpucheck_on |= (LPFC_CHECK_NVME_IO |
+				LPFC_CHECK_SCSI_IO);
+		return strlen(pbuf);
+	} else if ((strncmp(pbuf, "nvme_on", sizeof("nvme_on") - 1) == 0)) {
+		if (phba->nvmet_support)
+			phba->cpucheck_on |= LPFC_CHECK_NVMET_IO;
+		else
 			phba->cpucheck_on |= LPFC_CHECK_NVME_IO;
 		return strlen(pbuf);
+	} else if ((strncmp(pbuf, "scsi_on", sizeof("scsi_on") - 1) == 0)) {
+		phba->cpucheck_on |= LPFC_CHECK_SCSI_IO;
+		return strlen(pbuf);
 	} else if ((strncmp(pbuf, "rcv",
 		   sizeof("rcv") - 1) == 0)) {
 		if (phba->nvmet_support)
@@ -3732,46 +3862,38 @@ lpfc_idiag_cqs_for_eq(struct lpfc_hba *phba, char *pbuffer,
 		int *len, int max_cnt, int eqidx, int eq_id)
 {
 	struct lpfc_queue *qp;
-	int qidx, rc;
+	int rc;
 
-	for (qidx = 0; qidx < phba->cfg_hdw_queue; qidx++) {
-		qp = phba->sli4_hba.hdwq[qidx].fcp_cq;
-		if (qp->assoc_qid != eq_id)
-			continue;
+	qp = phba->sli4_hba.hdwq[eqidx].fcp_cq;
 
-		*len = __lpfc_idiag_print_cq(qp, "FCP", pbuffer, *len);
+	*len = __lpfc_idiag_print_cq(qp, "FCP", pbuffer, *len);
 
-		/* Reset max counter */
-		qp->CQ_max_cqe = 0;
+	/* Reset max counter */
+	qp->CQ_max_cqe = 0;
 
-		if (*len >= max_cnt)
-			return 1;
+	if (*len >= max_cnt)
+		return 1;
 
-		rc = lpfc_idiag_wqs_for_cq(phba, "FCP", pbuffer, len,
-				max_cnt, qp->queue_id);
-		if (rc)
-			return 1;
-	}
+	rc = lpfc_idiag_wqs_for_cq(phba, "FCP", pbuffer, len,
+				   max_cnt, qp->queue_id);
+	if (rc)
+		return 1;
 
 	if (phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME) {
-		for (qidx = 0; qidx < phba->cfg_hdw_queue; qidx++) {
-			qp = phba->sli4_hba.hdwq[qidx].nvme_cq;
-			if (qp->assoc_qid != eq_id)
-				continue;
+		qp = phba->sli4_hba.hdwq[eqidx].nvme_cq;
 
-			*len = __lpfc_idiag_print_cq(qp, "NVME", pbuffer, *len);
+		*len = __lpfc_idiag_print_cq(qp, "NVME", pbuffer, *len);
 
-			/* Reset max counter */
-			qp->CQ_max_cqe = 0;
+		/* Reset max counter */
+		qp->CQ_max_cqe = 0;
 
-			if (*len >= max_cnt)
-				return 1;
+		if (*len >= max_cnt)
+			return 1;
 
-			rc = lpfc_idiag_wqs_for_cq(phba, "NVME", pbuffer, len,
-						   max_cnt, qp->queue_id);
-			if (rc)
-				return 1;
-		}
+		rc = lpfc_idiag_wqs_for_cq(phba, "NVME", pbuffer, len,
+					   max_cnt, qp->queue_id);
+		if (rc)
+			return 1;
 	}
 
 	if ((eqidx < phba->cfg_nvmet_mrq) && phba->nvmet_support) {
@@ -3812,9 +3934,10 @@ __lpfc_idiag_print_eq(struct lpfc_queue *qp, char *eqtype,
 			(unsigned long long)qp->q_cnt_4, qp->q_mode);
 	len += snprintf(pbuffer + len, LPFC_QUE_INFO_GET_BUF_SIZE - len,
 			"EQID[%02d], QE-CNT[%04d], QE-SZ[%04d], "
-			"HST-IDX[%04d], PRT-IDX[%04d], PST[%03d]",
+			"HST-IDX[%04d], PRT-IDX[%04d], PST[%03d] AFFIN[%03d]",
 			qp->queue_id, qp->entry_count, qp->entry_size,
-			qp->host_index, qp->hba_index, qp->entry_repost);
+			qp->host_index, qp->hba_index, qp->entry_repost,
+			qp->chann);
 	len +=  snprintf(pbuffer + len, LPFC_QUE_INFO_GET_BUF_SIZE - len, "\n");
 
 	return len;
@@ -3869,7 +3992,7 @@ lpfc_idiag_queinfo_read(struct file *file, char __user *buf, size_t nbytes,
 			phba->lpfc_idiag_last_eq = 0;
 
 		len += snprintf(pbuffer + len, LPFC_QUE_INFO_GET_BUF_SIZE - len,
-					"EQ %d out of %d HBA EQs\n",
+					"HDWQ %d out of %d HBA HDWQs\n",
 					x, phba->cfg_hdw_queue);
 
 		/* Fast-path EQ */
@@ -5299,14 +5422,17 @@ static const struct file_operations lpfc_debugfs_op_hbqinfo = {
 	.release =      lpfc_debugfs_release,
 };
 
-#undef lpfc_debugfs_op_hdwqinfo
-static const struct file_operations lpfc_debugfs_op_hdwqinfo = {
+#ifdef LPFC_HDWQ_LOCK_STAT
+#undef lpfc_debugfs_op_lockstat
+static const struct file_operations lpfc_debugfs_op_lockstat = {
 	.owner =        THIS_MODULE,
-	.open =         lpfc_debugfs_hdwqinfo_open,
+	.open =         lpfc_debugfs_lockstat_open,
 	.llseek =       lpfc_debugfs_lseek,
 	.read =         lpfc_debugfs_read,
+	.write =        lpfc_debugfs_lockstat_write,
 	.release =      lpfc_debugfs_release,
 };
+#endif
 
 #undef lpfc_debugfs_op_dumpHBASlim
 static const struct file_operations lpfc_debugfs_op_dumpHBASlim = {
@@ -5756,17 +5882,19 @@ lpfc_debugfs_initialize(struct lpfc_vport *vport)
 					    phba->hba_debugfs_root,
 					    phba, &lpfc_debugfs_op_hbqinfo);
 
-		/* Setup hdwqinfo */
-		snprintf(name, sizeof(name), "hdwqinfo");
-		phba->debug_hdwqinfo =
+#ifdef LPFC_HDWQ_LOCK_STAT
+		/* Setup lockstat */
+		snprintf(name, sizeof(name), "lockstat");
+		phba->debug_lockstat =
 			debugfs_create_file(name, S_IFREG | 0644,
 					    phba->hba_debugfs_root,
-					    phba, &lpfc_debugfs_op_hdwqinfo);
-		if (!phba->debug_hdwqinfo) {
+					    phba, &lpfc_debugfs_op_lockstat);
+		if (!phba->debug_lockstat) {
 			lpfc_printf_vlog(vport, KERN_ERR, LOG_INIT,
-					 "0511 Cant create debugfs hdwqinfo\n");
+					 "0913 Cant create debugfs lockstat\n");
 			goto debug_failed;
 		}
+#endif
 
 		/* Setup dumpHBASlim */
 		if (phba->sli_rev < LPFC_SLI_REV4) {
@@ -6006,7 +6134,7 @@ nvmeio_off:
 				    vport, &lpfc_debugfs_op_scsistat);
 	if (!vport->debug_scsistat) {
 		lpfc_printf_vlog(vport, KERN_ERR, LOG_INIT,
-				 "0811 Cannot create debugfs scsistat\n");
+				 "0914 Cannot create debugfs scsistat\n");
 		goto debug_failed;
 	}
 
@@ -6171,9 +6299,10 @@ lpfc_debugfs_terminate(struct lpfc_vport *vport)
 		debugfs_remove(phba->debug_hbqinfo); /* hbqinfo */
 		phba->debug_hbqinfo = NULL;
 
-		debugfs_remove(phba->debug_hdwqinfo); /* hdwqinfo */
-		phba->debug_hdwqinfo = NULL;
-
+#ifdef LPFC_HDWQ_LOCK_STAT
+		debugfs_remove(phba->debug_lockstat); /* lockstat */
+		phba->debug_lockstat = NULL;
+#endif
 		debugfs_remove(phba->debug_dumpHBASlim); /* HBASlim */
 		phba->debug_dumpHBASlim = NULL;
 
diff --git a/drivers/scsi/lpfc/lpfc_debugfs.h b/drivers/scsi/lpfc/lpfc_debugfs.h
index cf256a6dca42..1fbee6496f85 100644
--- a/drivers/scsi/lpfc/lpfc_debugfs.h
+++ b/drivers/scsi/lpfc/lpfc_debugfs.h
@@ -290,9 +290,6 @@ struct lpfc_idiag {
 /* multixripool output buffer size */
 #define LPFC_DUMP_MULTIXRIPOOL_SIZE 8192
 
-/* hdwqinfo output buffer size */
-#define LPFC_HDWQINFO_SIZE 8192
-
 enum {
 	DUMP_FCP,
 	DUMP_NVME,
diff --git a/drivers/scsi/lpfc/lpfc_hw4.h b/drivers/scsi/lpfc/lpfc_hw4.h
index cd39845c909f..665852291a4f 100644
--- a/drivers/scsi/lpfc/lpfc_hw4.h
+++ b/drivers/scsi/lpfc/lpfc_hw4.h
@@ -211,9 +211,8 @@ struct lpfc_sli_intf {
 #define LPFC_DEF_IMAX          150000
 
 #define LPFC_MIN_CPU_MAP       0
-#define LPFC_MAX_CPU_MAP       2
+#define LPFC_MAX_CPU_MAP       1
 #define LPFC_HBA_CPU_MAP       1
-#define LPFC_DRIVER_CPU_MAP    2  /* Default */
 
 /* PORT_CAPABILITIES constants. */
 #define LPFC_MAX_SUPPORTED_PAGES	8
diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c
index d9db29817f6b..145c08f112a3 100644
--- a/drivers/scsi/lpfc/lpfc_init.c
+++ b/drivers/scsi/lpfc/lpfc_init.c
@@ -37,6 +37,7 @@
 #include <linux/miscdevice.h>
 #include <linux/percpu.h>
 #include <linux/msi.h>
+#include <linux/irq.h>
 #include <linux/bitops.h>
 
 #include <scsi/scsi.h>
@@ -92,6 +93,8 @@ static void lpfc_sli4_cq_event_release_all(struct lpfc_hba *);
 static void lpfc_sli4_disable_intr(struct lpfc_hba *);
 static uint32_t lpfc_sli4_enable_intr(struct lpfc_hba *, uint32_t);
 static void lpfc_sli4_oas_verify(struct lpfc_hba *phba);
+static uint16_t lpfc_find_eq_handle(struct lpfc_hba *, uint16_t);
+static uint16_t lpfc_find_cpu_handle(struct lpfc_hba *, uint16_t, int);
 
 static struct scsi_transport_template *lpfc_transport_template = NULL;
 static struct scsi_transport_template *lpfc_vport_transport_template = NULL;
@@ -1367,13 +1370,13 @@ lpfc_hb_timeout_handler(struct lpfc_hba *phba)
 		}
 
 		/* Interrupts per sec per EQ */
-		val = phba->cfg_fcp_imax / phba->cfg_hdw_queue;
+		val = phba->cfg_fcp_imax / phba->cfg_irq_chann;
 		tick_cqe = val / CONFIG_HZ; /* Per tick per EQ */
 
 		/* Assume 1 CQE/ISR, calc max CQEs allowed for time duration */
 		max_cqe = time_elapsed * tick_cqe;
 
-		for (i = 0; i < phba->cfg_hdw_queue; i++) {
+		for (i = 0; i < phba->cfg_irq_chann; i++) {
 			/* Fast-path EQ */
 			qp = phba->sli4_hba.hdwq[i].hba_eq;
 			if (!qp)
@@ -1397,7 +1400,7 @@ lpfc_hb_timeout_handler(struct lpfc_hba *phba)
 				if (val) {
 					/* First, interrupts per sec per EQ */
 					val = phba->cfg_fcp_imax /
-						phba->cfg_hdw_queue;
+						phba->cfg_irq_chann;
 
 					/* us delay between each interrupt */
 					val = LPFC_SEC_TO_USEC / val;
@@ -4335,8 +4338,13 @@ lpfc_create_port(struct lpfc_hba *phba, int instance, struct device *dev)
 	shost->max_lun = vport->cfg_max_luns;
 	shost->this_id = -1;
 	shost->max_cmd_len = 16;
+
 	if (phba->sli_rev == LPFC_SLI_REV4) {
-		shost->nr_hw_queues = phba->cfg_hdw_queue;
+		if (phba->cfg_fcp_io_sched == LPFC_FCP_SCHED_BY_HDWQ)
+			shost->nr_hw_queues = phba->cfg_hdw_queue;
+		else
+			shost->nr_hw_queues = phba->sli4_hba.num_present_cpu;
+
 		shost->dma_boundary =
 			phba->sli4_hba.pc_sli4_params.sge_supp_len-1;
 		shost->sg_tablesize = phba->cfg_scsi_seg_cnt;
@@ -6819,7 +6827,7 @@ lpfc_sli4_driver_resource_setup(struct lpfc_hba *phba)
 		goto out_remove_rpi_hdrs;
 	}
 
-	phba->sli4_hba.hba_eq_hdl = kcalloc(phba->cfg_hdw_queue,
+	phba->sli4_hba.hba_eq_hdl = kcalloc(phba->cfg_irq_chann,
 					    sizeof(struct lpfc_hba_eq_hdl),
 					    GFP_KERNEL);
 	if (!phba->sli4_hba.hba_eq_hdl) {
@@ -8257,7 +8265,7 @@ lpfc_sli4_read_config(struct lpfc_hba *phba)
 	struct lpfc_rsrc_desc_fcfcoe *desc;
 	char *pdesc_0;
 	uint16_t forced_link_speed;
-	uint32_t if_type;
+	uint32_t if_type, qmin;
 	int length, i, rc = 0, rc2;
 
 	pmb = (LPFC_MBOXQ_t *) mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL);
@@ -8362,40 +8370,44 @@ lpfc_sli4_read_config(struct lpfc_hba *phba)
 				phba->sli4_hba.max_cfg_param.max_rq);
 
 		/*
-		 * Calculate NVME queue resources based on how
-		 * many WQ/CQs are available.
+		 * Calculate queue resources based on how
+		 * many WQ/CQ/EQs are available.
 		 */
-		if (phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME) {
-			length = phba->sli4_hba.max_cfg_param.max_wq;
-			if (phba->sli4_hba.max_cfg_param.max_cq <
-			    phba->sli4_hba.max_cfg_param.max_wq)
-				length = phba->sli4_hba.max_cfg_param.max_cq;
+		qmin = phba->sli4_hba.max_cfg_param.max_wq;
+		if (phba->sli4_hba.max_cfg_param.max_cq < qmin)
+			qmin = phba->sli4_hba.max_cfg_param.max_cq;
+		if (phba->sli4_hba.max_cfg_param.max_eq < qmin)
+			qmin = phba->sli4_hba.max_cfg_param.max_eq;
+		/*
+		 * Whats left after this can go toward NVME / FCP.
+		 * The minus 4 accounts for ELS, NVME LS, MBOX
+		 * plus one extra. When configured for
+		 * NVMET, FCP io channel WQs are not created.
+		 */
+		qmin -= 4;
 
-			/*
-			 * Whats left after this can go toward NVME.
-			 * The minus 6 accounts for ELS, NVME LS, MBOX
-			 * plus a couple extra. When configured for
-			 * NVMET, FCP io channel WQs are not created.
-			 */
-			length -= 6;
-
-			/* Take off FCP queues */
-			if (!phba->nvmet_support)
-				length -= phba->cfg_hdw_queue;
-
-			/* Check to see if there is enough for NVME */
-			if (phba->cfg_hdw_queue > length) {
-				lpfc_printf_log(
-					phba, KERN_ERR, LOG_SLI,
-					"2005 Reducing NVME IO channel to %d: "
-					"WQ %d CQ %d CommonIO %d\n",
-					length,
+		/* If NVME is configured, double the number of CQ/WQs needed */
+		if ((phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME) &&
+		    !phba->nvmet_support)
+			qmin /= 2;
+
+		/* Check to see if there is enough for NVME */
+		if ((phba->cfg_irq_chann > qmin) ||
+		    (phba->cfg_hdw_queue > qmin)) {
+			lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
+					"2005 Reducing Queues: "
+					"WQ %d CQ %d EQ %d: min %d: "
+					"IRQ %d HDWQ %d\n",
 					phba->sli4_hba.max_cfg_param.max_wq,
 					phba->sli4_hba.max_cfg_param.max_cq,
+					phba->sli4_hba.max_cfg_param.max_eq,
+					qmin, phba->cfg_irq_chann,
 					phba->cfg_hdw_queue);
 
-				phba->cfg_hdw_queue = length;
-			}
+			if (phba->cfg_irq_chann > qmin)
+				phba->cfg_irq_chann = qmin;
+			if (phba->cfg_hdw_queue > qmin)
+				phba->cfg_hdw_queue = qmin;
 		}
 	}
 
@@ -8612,25 +8624,17 @@ lpfc_sli4_queue_verify(struct lpfc_hba *phba)
 	 * device parameters
 	 */
 
-	if (phba->cfg_hdw_queue > phba->sli4_hba.max_cfg_param.max_eq) {
-		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
-				"2575 Reducing IO channels to match number of "
-				"available EQs: from %d to %d\n",
-				phba->cfg_hdw_queue,
-				phba->sli4_hba.max_cfg_param.max_eq);
-		phba->cfg_hdw_queue = phba->sli4_hba.max_cfg_param.max_eq;
-	}
-
 	if (phba->nvmet_support) {
-		if (phba->cfg_hdw_queue < phba->cfg_nvmet_mrq)
-			phba->cfg_nvmet_mrq = phba->cfg_hdw_queue;
+		if (phba->cfg_irq_chann < phba->cfg_nvmet_mrq)
+			phba->cfg_nvmet_mrq = phba->cfg_irq_chann;
 	}
 	if (phba->cfg_nvmet_mrq > LPFC_NVMET_MRQ_MAX)
 		phba->cfg_nvmet_mrq = LPFC_NVMET_MRQ_MAX;
 
 	lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
-			"2574 IO channels: hdwQ %d MRQ: %d\n",
-			phba->cfg_hdw_queue, phba->cfg_nvmet_mrq);
+			"2574 IO channels: hdwQ %d IRQ %d MRQ: %d\n",
+			phba->cfg_hdw_queue, phba->cfg_irq_chann,
+			phba->cfg_nvmet_mrq);
 
 	/* Get EQ depth from module parameter, fake the default for now */
 	phba->sli4_hba.eq_esize = LPFC_EQE_SIZE_4B;
@@ -8658,6 +8662,7 @@ lpfc_alloc_nvme_wq_cq(struct lpfc_hba *phba, int wqidx)
 	}
 	qdesc->qe_valid = 1;
 	qdesc->hdwq = wqidx;
+	qdesc->chann = lpfc_find_cpu_handle(phba, wqidx, LPFC_FIND_BY_HDWQ);
 	phba->sli4_hba.hdwq[wqidx].nvme_cq = qdesc;
 
 	qdesc = lpfc_sli4_queue_alloc(phba, LPFC_EXPANDED_PAGE_SIZE,
@@ -8669,6 +8674,7 @@ lpfc_alloc_nvme_wq_cq(struct lpfc_hba *phba, int wqidx)
 		return 1;
 	}
 	qdesc->hdwq = wqidx;
+	qdesc->chann = wqidx;
 	phba->sli4_hba.hdwq[wqidx].nvme_wq = qdesc;
 	list_add_tail(&qdesc->wq_list, &phba->sli4_hba.lpfc_wq_list);
 	return 0;
@@ -8698,6 +8704,7 @@ lpfc_alloc_fcp_wq_cq(struct lpfc_hba *phba, int wqidx)
 	}
 	qdesc->qe_valid = 1;
 	qdesc->hdwq = wqidx;
+	qdesc->chann = lpfc_find_cpu_handle(phba, wqidx, LPFC_FIND_BY_HDWQ);
 	phba->sli4_hba.hdwq[wqidx].fcp_cq = qdesc;
 
 	/* Create Fast Path FCP WQs */
@@ -8720,6 +8727,7 @@ lpfc_alloc_fcp_wq_cq(struct lpfc_hba *phba, int wqidx)
 		return 1;
 	}
 	qdesc->hdwq = wqidx;
+	qdesc->chann = wqidx;
 	phba->sli4_hba.hdwq[wqidx].fcp_wq = qdesc;
 	list_add_tail(&qdesc->wq_list, &phba->sli4_hba.lpfc_wq_list);
 	return 0;
@@ -8743,7 +8751,7 @@ int
 lpfc_sli4_queue_create(struct lpfc_hba *phba)
 {
 	struct lpfc_queue *qdesc;
-	int idx;
+	int idx, eqidx;
 	struct lpfc_sli4_hdw_queue *qp;
 
 	/*
@@ -8829,7 +8837,18 @@ lpfc_sli4_queue_create(struct lpfc_hba *phba)
 
 	/* Create HBA Event Queues (EQs) */
 	for (idx = 0; idx < phba->cfg_hdw_queue; idx++) {
-		/* Create EQs */
+		/*
+		 * If there are more Hardware Queues than available
+		 * CQs, multiple Hardware Queues may share a common EQ.
+		 */
+		if (idx >= phba->cfg_irq_chann) {
+			/* Share an existing EQ */
+			eqidx = lpfc_find_eq_handle(phba, idx);
+			phba->sli4_hba.hdwq[idx].hba_eq =
+				phba->sli4_hba.hdwq[eqidx].hba_eq;
+			continue;
+		}
+		/* Create an EQ */
 		qdesc = lpfc_sli4_queue_alloc(phba, LPFC_DEFAULT_PAGE_SIZE,
 					      phba->sli4_hba.eq_esize,
 					      phba->sli4_hba.eq_ecount);
@@ -8840,20 +8859,27 @@ lpfc_sli4_queue_create(struct lpfc_hba *phba)
 		}
 		qdesc->qe_valid = 1;
 		qdesc->hdwq = idx;
+
+		/* Save the CPU this EQ is affinitised to */
+		eqidx = lpfc_find_eq_handle(phba, idx);
+		qdesc->chann = lpfc_find_cpu_handle(phba, eqidx,
+						    LPFC_FIND_BY_EQ);
 		phba->sli4_hba.hdwq[idx].hba_eq = qdesc;
 	}
 
 
 	/* Allocate SCSI SLI4 CQ/WQs */
-	for (idx = 0; idx < phba->cfg_hdw_queue; idx++)
+	for (idx = 0; idx < phba->cfg_hdw_queue; idx++) {
 		if (lpfc_alloc_fcp_wq_cq(phba, idx))
 			goto out_error;
+	}
 
 	/* Allocate NVME SLI4 CQ/WQs */
 	if (phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME) {
-		for (idx = 0; idx < phba->cfg_hdw_queue; idx++)
+		for (idx = 0; idx < phba->cfg_hdw_queue; idx++) {
 			if (lpfc_alloc_nvme_wq_cq(phba, idx))
 				goto out_error;
+		}
 
 		if (phba->nvmet_support) {
 			for (idx = 0; idx < phba->cfg_nvmet_mrq; idx++) {
@@ -8871,6 +8897,7 @@ lpfc_sli4_queue_create(struct lpfc_hba *phba)
 				}
 				qdesc->qe_valid = 1;
 				qdesc->hdwq = idx;
+				qdesc->chann = idx;
 				phba->sli4_hba.nvmet_cqset[idx] = qdesc;
 			}
 		}
@@ -8902,6 +8929,7 @@ lpfc_sli4_queue_create(struct lpfc_hba *phba)
 		goto out_error;
 	}
 	qdesc->qe_valid = 1;
+	qdesc->chann = 0;
 	phba->sli4_hba.els_cq = qdesc;
 
 
@@ -8919,6 +8947,7 @@ lpfc_sli4_queue_create(struct lpfc_hba *phba)
 				"0505 Failed allocate slow-path MQ\n");
 		goto out_error;
 	}
+	qdesc->chann = 0;
 	phba->sli4_hba.mbx_wq = qdesc;
 
 	/*
@@ -8934,6 +8963,7 @@ lpfc_sli4_queue_create(struct lpfc_hba *phba)
 				"0504 Failed allocate slow-path ELS WQ\n");
 		goto out_error;
 	}
+	qdesc->chann = 0;
 	phba->sli4_hba.els_wq = qdesc;
 	list_add_tail(&qdesc->wq_list, &phba->sli4_hba.lpfc_wq_list);
 
@@ -8947,6 +8977,7 @@ lpfc_sli4_queue_create(struct lpfc_hba *phba)
 					"6079 Failed allocate NVME LS CQ\n");
 			goto out_error;
 		}
+		qdesc->chann = 0;
 		qdesc->qe_valid = 1;
 		phba->sli4_hba.nvmels_cq = qdesc;
 
@@ -8959,6 +8990,7 @@ lpfc_sli4_queue_create(struct lpfc_hba *phba)
 					"6080 Failed allocate NVME LS WQ\n");
 			goto out_error;
 		}
+		qdesc->chann = 0;
 		phba->sli4_hba.nvmels_wq = qdesc;
 		list_add_tail(&qdesc->wq_list, &phba->sli4_hba.lpfc_wq_list);
 	}
@@ -9085,17 +9117,21 @@ lpfc_sli4_release_queues(struct lpfc_queue ***qs, int max)
 }
 
 static inline void
-lpfc_sli4_release_hdwq(struct lpfc_sli4_hdw_queue *hdwq, int max)
+lpfc_sli4_release_hdwq(struct lpfc_hba *phba)
 {
+	struct lpfc_sli4_hdw_queue *hdwq;
 	uint32_t idx;
 
-	for (idx = 0; idx < max; idx++) {
-		lpfc_sli4_queue_free(hdwq[idx].hba_eq);
+	hdwq = phba->sli4_hba.hdwq;
+	for (idx = 0; idx < phba->cfg_hdw_queue; idx++) {
+		if (idx < phba->cfg_irq_chann)
+			lpfc_sli4_queue_free(hdwq[idx].hba_eq);
+		hdwq[idx].hba_eq = NULL;
+
 		lpfc_sli4_queue_free(hdwq[idx].fcp_cq);
 		lpfc_sli4_queue_free(hdwq[idx].nvme_cq);
 		lpfc_sli4_queue_free(hdwq[idx].fcp_wq);
 		lpfc_sli4_queue_free(hdwq[idx].nvme_wq);
-		hdwq[idx].hba_eq = NULL;
 		hdwq[idx].fcp_cq = NULL;
 		hdwq[idx].nvme_cq = NULL;
 		hdwq[idx].fcp_wq = NULL;
@@ -9120,8 +9156,7 @@ lpfc_sli4_queue_destroy(struct lpfc_hba *phba)
 {
 	/* Release HBA eqs */
 	if (phba->sli4_hba.hdwq)
-		lpfc_sli4_release_hdwq(phba->sli4_hba.hdwq,
-				       phba->cfg_hdw_queue);
+		lpfc_sli4_release_hdwq(phba);
 
 	if (phba->nvmet_support) {
 		lpfc_sli4_release_queues(&phba->sli4_hba.nvmet_cqset,
@@ -9202,7 +9237,6 @@ lpfc_create_wq_cq(struct lpfc_hba *phba, struct lpfc_queue *eq,
 			qidx, (uint32_t)rc);
 		return rc;
 	}
-	cq->chann = qidx;
 
 	if (qtype != LPFC_MBOX) {
 		/* Setup cq_map for fast lookup */
@@ -9222,7 +9256,6 @@ lpfc_create_wq_cq(struct lpfc_hba *phba, struct lpfc_queue *eq,
 			/* no need to tear down cq - caller will do so */
 			return rc;
 		}
-		wq->chann = qidx;
 
 		/* Bind this CQ/WQ to the NVME ring */
 		pring = wq->pring;
@@ -9252,6 +9285,38 @@ lpfc_create_wq_cq(struct lpfc_hba *phba, struct lpfc_queue *eq,
 }
 
 /**
+ * lpfc_setup_cq_lookup - Setup the CQ lookup table
+ * @phba: pointer to lpfc hba data structure.
+ *
+ * This routine will populate the cq_lookup table by all
+ * available CQ queue_id's.
+ **/
+void
+lpfc_setup_cq_lookup(struct lpfc_hba *phba)
+{
+	struct lpfc_queue *eq, *childq;
+	struct lpfc_sli4_hdw_queue *qp;
+	int qidx;
+
+	qp = phba->sli4_hba.hdwq;
+	memset(phba->sli4_hba.cq_lookup, 0,
+	       (sizeof(struct lpfc_queue *) * (phba->sli4_hba.cq_max + 1)));
+	for (qidx = 0; qidx < phba->cfg_irq_chann; qidx++) {
+		eq = qp[qidx].hba_eq;
+		if (!eq)
+			continue;
+		list_for_each_entry(childq, &eq->child_list, list) {
+			if (childq->queue_id > phba->sli4_hba.cq_max)
+				continue;
+			if ((childq->subtype == LPFC_FCP) ||
+			    (childq->subtype == LPFC_NVME))
+				phba->sli4_hba.cq_lookup[childq->queue_id] =
+					childq;
+		}
+	}
+}
+
+/**
  * lpfc_sli4_queue_setup - Set up all the SLI4 queues
  * @phba: pointer to lpfc hba data structure.
  *
@@ -9331,7 +9396,7 @@ lpfc_sli4_queue_setup(struct lpfc_hba *phba)
 		rc = -ENOMEM;
 		goto out_error;
 	}
-	for (qidx = 0; qidx < phba->cfg_hdw_queue; qidx++) {
+	for (qidx = 0; qidx < phba->cfg_irq_chann; qidx++) {
 		if (!qp[qidx].hba_eq) {
 			lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
 					"0522 Fast-path EQ (%d) not "
@@ -9578,11 +9643,23 @@ lpfc_sli4_queue_setup(struct lpfc_hba *phba)
 			phba->sli4_hba.dat_rq->queue_id,
 			phba->sli4_hba.els_cq->queue_id);
 
-	for (qidx = 0; qidx < phba->cfg_hdw_queue;
+	for (qidx = 0; qidx < phba->cfg_irq_chann;
 	     qidx += LPFC_MAX_EQ_DELAY_EQID_CNT)
 		lpfc_modify_hba_eq_delay(phba, qidx, LPFC_MAX_EQ_DELAY_EQID_CNT,
 					 phba->cfg_fcp_imax);
 
+	if (phba->sli4_hba.cq_max) {
+		kfree(phba->sli4_hba.cq_lookup);
+		phba->sli4_hba.cq_lookup = kcalloc((phba->sli4_hba.cq_max + 1),
+			sizeof(struct lpfc_queue *), GFP_KERNEL);
+		if (!phba->sli4_hba.cq_lookup) {
+			lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+					"0549 Failed setup of CQ Lookup table: "
+					"size 0x%x\n", phba->sli4_hba.cq_max);
+			goto out_destroy;
+		}
+		lpfc_setup_cq_lookup(phba);
+	}
 	return 0;
 
 out_destroy:
@@ -9664,9 +9741,14 @@ lpfc_sli4_queue_unset(struct lpfc_hba *phba)
 			lpfc_wq_destroy(phba, qp->nvme_wq);
 			lpfc_cq_destroy(phba, qp->fcp_cq);
 			lpfc_cq_destroy(phba, qp->nvme_cq);
-			lpfc_eq_destroy(phba, qp->hba_eq);
+			if (qidx < phba->cfg_irq_chann)
+				lpfc_eq_destroy(phba, qp->hba_eq);
 		}
 	}
+
+	kfree(phba->sli4_hba.cq_lookup);
+	phba->sli4_hba.cq_lookup = NULL;
+	phba->sli4_hba.cq_max = 0;
 }
 
 /**
@@ -10446,22 +10528,198 @@ lpfc_sli_disable_intr(struct lpfc_hba *phba)
 }
 
 /**
+ * lpfc_find_cpu_handle - Find the CPU that corresponds to the specified EQ
+ * @phba: pointer to lpfc hba data structure.
+ * @id: EQ vector index or Hardware Queue index
+ * @match: LPFC_FIND_BY_EQ = match by EQ
+ *         LPFC_FIND_BY_HDWQ = match by Hardware Queue
+ */
+static uint16_t
+lpfc_find_cpu_handle(struct lpfc_hba *phba, uint16_t id, int match)
+{
+	struct lpfc_vector_map_info *cpup;
+	int cpu;
+
+	/* Find the desired phys_id for the specified EQ */
+	cpup = phba->sli4_hba.cpu_map;
+	for (cpu = 0; cpu < phba->sli4_hba.num_present_cpu; cpu++) {
+		if ((match == LPFC_FIND_BY_EQ) &&
+		    (cpup->irq != LPFC_VECTOR_MAP_EMPTY) &&
+		    (cpup->eq == id))
+			return cpu;
+		if ((match == LPFC_FIND_BY_HDWQ) && (cpup->hdwq == id))
+			return cpu;
+		cpup++;
+	}
+	return 0;
+}
+
+/**
+ * lpfc_find_eq_handle - Find the EQ that corresponds to the specified
+ *                       Hardware Queue
+ * @phba: pointer to lpfc hba data structure.
+ * @hdwq: Hardware Queue index
+ */
+static uint16_t
+lpfc_find_eq_handle(struct lpfc_hba *phba, uint16_t hdwq)
+{
+	struct lpfc_vector_map_info *cpup;
+	int cpu;
+
+	/* Find the desired phys_id for the specified EQ */
+	cpup = phba->sli4_hba.cpu_map;
+	for (cpu = 0; cpu < phba->sli4_hba.num_present_cpu; cpu++) {
+		if (cpup->hdwq == hdwq)
+			return cpup->eq;
+		cpup++;
+	}
+	return 0;
+}
+
+/**
+ * lpfc_find_phys_id_eq - Find the next EQ that corresponds to the specified
+ *                        Physical Id.
+ * @phba: pointer to lpfc hba data structure.
+ * @eqidx: EQ index
+ * @phys_id: CPU package physical id
+ */
+static uint16_t
+lpfc_find_phys_id_eq(struct lpfc_hba *phba, uint16_t eqidx, uint16_t phys_id)
+{
+	struct lpfc_vector_map_info *cpup;
+	int cpu, desired_phys_id;
+
+	desired_phys_id = LPFC_VECTOR_MAP_EMPTY;
+
+	/* Find the desired phys_id for the specified EQ */
+	cpup = phba->sli4_hba.cpu_map;
+	for (cpu = 0; cpu < phba->sli4_hba.num_present_cpu; cpu++) {
+		if ((cpup->irq != LPFC_VECTOR_MAP_EMPTY) &&
+		    (cpup->eq == eqidx)) {
+			desired_phys_id = cpup->phys_id;
+			break;
+		}
+		cpup++;
+	}
+	if (phys_id == desired_phys_id)
+		return eqidx;
+
+	/* Find a EQ thats on the specified phys_id */
+	cpup = phba->sli4_hba.cpu_map;
+	for (cpu = 0; cpu < phba->sli4_hba.num_present_cpu; cpu++) {
+		if ((cpup->irq != LPFC_VECTOR_MAP_EMPTY) &&
+		    (cpup->phys_id == phys_id))
+			return cpup->eq;
+		cpup++;
+	}
+	return 0;
+}
+
+/**
+ * lpfc_find_cpu_map - Find next available CPU map entry that matches the
+ *                     phys_id and core_id.
+ * @phba: pointer to lpfc hba data structure.
+ * @phys_id: CPU package physical id
+ * @core_id: CPU core id
+ * @hdwqidx: Hardware Queue index
+ * @eqidx: EQ index
+ * @isr_avail: Should an IRQ be associated with this entry
+ */
+static struct lpfc_vector_map_info *
+lpfc_find_cpu_map(struct lpfc_hba *phba, uint16_t phys_id, uint16_t core_id,
+		  uint16_t hdwqidx, uint16_t eqidx, int isr_avail)
+{
+	struct lpfc_vector_map_info *cpup;
+	int cpu;
+
+	cpup = phba->sli4_hba.cpu_map;
+	for (cpu = 0; cpu < phba->sli4_hba.num_present_cpu; cpu++) {
+		/* Does the cpup match the one we are looking for */
+		if ((cpup->phys_id == phys_id) &&
+		    (cpup->core_id == core_id)) {
+			/* If it has been already assigned, then skip it */
+			if (cpup->hdwq != LPFC_VECTOR_MAP_EMPTY) {
+				cpup++;
+				continue;
+			}
+			/* Ensure we are on the same phys_id as the first one */
+			if (!isr_avail)
+				cpup->eq = lpfc_find_phys_id_eq(phba, eqidx,
+								phys_id);
+			else
+				cpup->eq = eqidx;
+
+			cpup->hdwq = hdwqidx;
+			if (isr_avail) {
+				cpup->irq =
+				    pci_irq_vector(phba->pcidev, eqidx);
+
+				/* Now affinitize to the selected CPU */
+				irq_set_affinity_hint(cpup->irq,
+						      get_cpu_mask(cpu));
+				irq_set_status_flags(cpup->irq,
+						     IRQ_NO_BALANCING);
+
+				lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
+						"3330 Set Affinity: CPU %d "
+						"EQ %d irq %d (HDWQ %x)\n",
+						cpu, cpup->eq,
+						cpup->irq, cpup->hdwq);
+			}
+			return cpup;
+		}
+		cpup++;
+	}
+	return 0;
+}
+
+#ifdef CONFIG_X86
+/**
+ * lpfc_find_hyper - Determine if the CPU map entry is hyper-threaded
+ * @phba: pointer to lpfc hba data structure.
+ * @cpu: CPU map index
+ * @phys_id: CPU package physical id
+ * @core_id: CPU core id
+ */
+static int
+lpfc_find_hyper(struct lpfc_hba *phba, int cpu,
+		uint16_t phys_id, uint16_t core_id)
+{
+	struct lpfc_vector_map_info *cpup;
+	int idx;
+
+	cpup = phba->sli4_hba.cpu_map;
+	for (idx = 0; idx < phba->sli4_hba.num_present_cpu; idx++) {
+		/* Does the cpup match the one we are looking for */
+		if ((cpup->phys_id == phys_id) &&
+		    (cpup->core_id == core_id) &&
+		    (cpu != idx)) {
+			return 1;
+		}
+		cpup++;
+	}
+	return 0;
+}
+#endif
+
+/**
  * lpfc_cpu_affinity_check - Check vector CPU affinity mappings
  * @phba: pointer to lpfc hba data structure.
+ * @vectors: number of msix vectors allocated.
  *
  * The routine will figure out the CPU affinity assignment for every
- * MSI-X vector allocated for the HBA.  The hba_eq_hdl will be updated
- * with a pointer to the CPU mask that defines ALL the CPUs this vector
- * can be associated with. If the vector can be unquely associated with
- * a single CPU, that CPU will be recorded in hba_eq_hdl[index].cpu.
+ * MSI-X vector allocated for the HBA.
  * In addition, the CPU to IO channel mapping will be calculated
  * and the phba->sli4_hba.cpu_map array will reflect this.
  */
 static void
-lpfc_cpu_affinity_check(struct lpfc_hba *phba)
+lpfc_cpu_affinity_check(struct lpfc_hba *phba, int vectors)
 {
+	int i, j, idx, phys_id;
+	int max_phys_id, min_phys_id;
+	int max_core_id, min_core_id;
 	struct lpfc_vector_map_info *cpup;
-	int cpu, idx;
+	int cpu, eqidx, hdwqidx, isr_avail;
 #ifdef CONFIG_X86
 	struct cpuinfo_x86 *cpuinfo;
 #endif
@@ -10471,6 +10729,12 @@ lpfc_cpu_affinity_check(struct lpfc_hba *phba)
 	       (sizeof(struct lpfc_vector_map_info) *
 	       phba->sli4_hba.num_present_cpu));
 
+	max_phys_id = 0;
+	min_phys_id = 0xffff;
+	max_core_id = 0;
+	min_core_id = 0xffff;
+	phys_id = 0;
+
 	/* Update CPU map with physical id and core id of each CPU */
 	cpup = phba->sli4_hba.cpu_map;
 	for (cpu = 0; cpu < phba->sli4_hba.num_present_cpu; cpu++) {
@@ -10478,34 +10742,91 @@ lpfc_cpu_affinity_check(struct lpfc_hba *phba)
 		cpuinfo = &cpu_data(cpu);
 		cpup->phys_id = cpuinfo->phys_proc_id;
 		cpup->core_id = cpuinfo->cpu_core_id;
+		cpup->hyper = lpfc_find_hyper(phba, cpu,
+					      cpup->phys_id, cpup->core_id);
 #else
 		/* No distinction between CPUs for other platforms */
 		cpup->phys_id = 0;
-		cpup->core_id = 0;
+		cpup->core_id = cpu;
+		cpup->hyper = 0;
 #endif
+
 		lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
 				"3328 CPU physid %d coreid %d\n",
 				cpup->phys_id, cpup->core_id);
+
+		if (cpup->phys_id > max_phys_id)
+			max_phys_id = cpup->phys_id;
+		if (cpup->phys_id < min_phys_id)
+			min_phys_id = cpup->phys_id;
+
+		if (cpup->core_id > max_core_id)
+			max_core_id = cpup->core_id;
+		if (cpup->core_id < min_core_id)
+			min_core_id = cpup->core_id;
+
 		cpup++;
 	}
 
-	for (idx = 0; idx <  phba->cfg_hdw_queue; idx++) {
-		cpup = &phba->sli4_hba.cpu_map[idx];
-		cpup->irq = pci_irq_vector(phba->pcidev, idx);
+	/*
+	 * If the number of IRQ vectors == number of CPUs,
+	 * mapping is pretty simple: 1 to 1.
+	 * This is the desired path if NVME is enabled.
+	 */
+	if (vectors == phba->sli4_hba.num_present_cpu) {
+		cpup = phba->sli4_hba.cpu_map;
+		for (idx = 0; idx < vectors; idx++) {
+			cpup->eq = idx;
+			cpup->hdwq = idx;
+			cpup->irq = pci_irq_vector(phba->pcidev, idx);
+
+			/* Now affinitize to the selected CPU */
+			irq_set_affinity_hint(
+				pci_irq_vector(phba->pcidev, idx),
+				get_cpu_mask(idx));
+			irq_set_status_flags(cpup->irq, IRQ_NO_BALANCING);
 
-		/* For now assume vector N maps to CPU N */
-		irq_set_affinity_hint(cpup->irq, get_cpu_mask(idx));
-		cpup->hdwq = idx;
+			lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
+					"3336 Set Affinity: CPU %d "
+					"EQ %d irq %d\n",
+					idx, cpup->eq,
+					pci_irq_vector(phba->pcidev, idx));
+			cpup++;
+		}
+		return;
+	}
 
-		lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
-			"3336 Set Affinity: CPU %d "
-			"hdwq %d irq %d\n",
-			cpu, cpup->hdwq, cpup->irq);
+	idx = 0;
+	isr_avail = 1;
+	eqidx = 0;
+	hdwqidx = 0;
+
+	/* Mapping is more complicated for this case. Hardware Queues are
+	 * assigned in a "ping pong" fashion, ping pong-ing between the
+	 * available phys_id's.
+	 */
+	while (idx < phba->sli4_hba.num_present_cpu) {
+		for (i = min_core_id; i <= max_core_id; i++) {
+			for (j = min_phys_id; j <= max_phys_id; j++) {
+				cpup = lpfc_find_cpu_map(phba, j, i, hdwqidx,
+							 eqidx, isr_avail);
+				if (!cpup)
+					continue;
+				idx++;
+				hdwqidx++;
+				if (hdwqidx >= phba->cfg_hdw_queue)
+					hdwqidx = 0;
+				eqidx++;
+				if (eqidx >= phba->cfg_irq_chann) {
+					isr_avail = 0;
+					eqidx = 0;
+				}
+			}
+		}
 	}
 	return;
 }
 
-
 /**
  * lpfc_sli4_enable_msix - Enable MSI-X interrupt mode to SLI-4 device
  * @phba: pointer to lpfc hba data structure.
@@ -10524,7 +10845,7 @@ lpfc_sli4_enable_msix(struct lpfc_hba *phba)
 	char *name;
 
 	/* Set up MSI-X multi-message vectors */
-	vectors = phba->cfg_hdw_queue;
+	vectors = phba->cfg_irq_chann;
 
 	rc = pci_alloc_irq_vectors(phba->pcidev,
 				(phba->nvmet_support) ? 1 : 2,
@@ -10545,7 +10866,6 @@ lpfc_sli4_enable_msix(struct lpfc_hba *phba)
 
 		phba->sli4_hba.hba_eq_hdl[index].idx = index;
 		phba->sli4_hba.hba_eq_hdl[index].phba = phba;
-		atomic_set(&phba->sli4_hba.hba_eq_hdl[index].hba_eq_in_use, 1);
 		rc = request_irq(pci_irq_vector(phba->pcidev, index),
 			 &lpfc_sli4_hba_intr_handler, 0,
 			 name,
@@ -10558,17 +10878,16 @@ lpfc_sli4_enable_msix(struct lpfc_hba *phba)
 		}
 	}
 
-	if (vectors != phba->cfg_hdw_queue) {
+	if (vectors != phba->cfg_irq_chann) {
 		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
 				"3238 Reducing IO channels to match number of "
 				"MSI-X vectors, requested %d got %d\n",
-				phba->cfg_hdw_queue, vectors);
-		if (phba->cfg_hdw_queue > vectors)
-			phba->cfg_hdw_queue = vectors;
+				phba->cfg_irq_chann, vectors);
+		if (phba->cfg_irq_chann > vectors)
+			phba->cfg_irq_chann = vectors;
 		if (phba->cfg_nvmet_mrq > vectors)
 			phba->cfg_nvmet_mrq = vectors;
 	}
-	lpfc_cpu_affinity_check(phba);
 
 	return rc;
 
@@ -10623,7 +10942,7 @@ lpfc_sli4_enable_msi(struct lpfc_hba *phba)
 		return rc;
 	}
 
-	for (index = 0; index < phba->cfg_hdw_queue; index++) {
+	for (index = 0; index < phba->cfg_irq_chann; index++) {
 		phba->sli4_hba.hba_eq_hdl[index].idx = index;
 		phba->sli4_hba.hba_eq_hdl[index].phba = phba;
 	}
@@ -10688,11 +11007,10 @@ lpfc_sli4_enable_intr(struct lpfc_hba *phba, uint32_t cfg_mode)
 			phba->intr_type = INTx;
 			intr_mode = 0;
 
-			for (idx = 0; idx < phba->cfg_hdw_queue; idx++) {
+			for (idx = 0; idx < phba->cfg_irq_chann; idx++) {
 				eqhdl = &phba->sli4_hba.hba_eq_hdl[idx];
 				eqhdl->idx = idx;
 				eqhdl->phba = phba;
-				atomic_set(&eqhdl->hba_eq_in_use, 1);
 			}
 		}
 	}
@@ -10716,7 +11034,7 @@ lpfc_sli4_disable_intr(struct lpfc_hba *phba)
 		int index;
 
 		/* Free up MSI-X multi-message vectors */
-		for (index = 0; index < phba->cfg_hdw_queue; index++) {
+		for (index = 0; index < phba->cfg_irq_chann; index++) {
 			irq_set_affinity_hint(
 				pci_irq_vector(phba->pcidev, index),
 				NULL);
@@ -12092,12 +12410,13 @@ lpfc_pci_probe_one_s4(struct pci_dev *pdev, const struct pci_device_id *pid)
 	}
 	/* Default to single EQ for non-MSI-X */
 	if (phba->intr_type != MSIX) {
-		phba->cfg_hdw_queue = 1;
+		phba->cfg_irq_chann = 1;
 		if (phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME) {
 			if (phba->nvmet_support)
 				phba->cfg_nvmet_mrq = 1;
 		}
 	}
+	lpfc_cpu_affinity_check(phba, phba->cfg_irq_chann);
 
 	/* Create SCSI host to the physical port */
 	error = lpfc_create_shost(phba);
diff --git a/drivers/scsi/lpfc/lpfc_nvme.c b/drivers/scsi/lpfc/lpfc_nvme.c
index c9aacd56a449..9480257c5143 100644
--- a/drivers/scsi/lpfc/lpfc_nvme.c
+++ b/drivers/scsi/lpfc/lpfc_nvme.c
@@ -239,7 +239,7 @@ lpfc_nvme_create_queue(struct nvme_fc_local_port *pnvme_lport,
 	if (qidx) {
 		str = "IO ";  /* IO queue */
 		qhandle->index = ((qidx - 1) %
-			vport->phba->cfg_hdw_queue);
+			lpfc_nvme_template.max_hw_queues);
 	} else {
 		str = "ADM";  /* Admin queue */
 		qhandle->index = qidx;
@@ -1546,14 +1546,12 @@ lpfc_nvme_fcp_io_submit(struct nvme_fc_local_port *pnvme_lport,
 		}
 	}
 
+	/* Lookup Hardware Queue index based on fcp_io_sched module parameter */
 	if (phba->cfg_fcp_io_sched == LPFC_FCP_SCHED_BY_HDWQ) {
 		idx = lpfc_queue_info->index;
 	} else {
 		cpu = smp_processor_id();
-		if (cpu < phba->cfg_hdw_queue)
-			idx = cpu;
-		else
-			idx = cpu % phba->cfg_hdw_queue;
+		idx = phba->sli4_hba.cpu_map[cpu].hdwq;
 	}
 
 	lpfc_ncmd = lpfc_get_nvme_buf(phba, ndlp, idx, expedite);
@@ -2060,7 +2058,13 @@ lpfc_nvme_create_localport(struct lpfc_vport *vport)
 	 * allocate + 3, one for cmd, one for rsp and one for this alignment
 	 */
 	lpfc_nvme_template.max_sgl_segments = phba->cfg_nvme_seg_cnt + 1;
-	lpfc_nvme_template.max_hw_queues = phba->cfg_hdw_queue;
+
+	/* Advertise how many hw queues we support based on fcp_io_sched */
+	if (phba->cfg_fcp_io_sched == LPFC_FCP_SCHED_BY_HDWQ)
+		lpfc_nvme_template.max_hw_queues = phba->cfg_hdw_queue;
+	else
+		lpfc_nvme_template.max_hw_queues =
+			phba->sli4_hba.num_present_cpu;
 
 	/* localport is allocated from the stack, but the registration
 	 * call allocates heap memory as well as the private area.
@@ -2554,6 +2558,8 @@ lpfc_nvme_wait_for_io_drain(struct lpfc_hba *phba)
 	 * WQEs have been removed from the txcmplqs.
 	 */
 	for (i = 0; i < phba->cfg_hdw_queue; i++) {
+		if (!phba->sli4_hba.hdwq[i].nvme_wq)
+			continue;
 		pring = phba->sli4_hba.hdwq[i].nvme_wq->pring;
 
 		if (!pring)
diff --git a/drivers/scsi/lpfc/lpfc_scsi.c b/drivers/scsi/lpfc/lpfc_scsi.c
index 7b22cc995d7f..a827520789f1 100644
--- a/drivers/scsi/lpfc/lpfc_scsi.c
+++ b/drivers/scsi/lpfc/lpfc_scsi.c
@@ -692,10 +692,7 @@ lpfc_get_scsi_buf_s4(struct lpfc_hba *phba, struct lpfc_nodelist *ndlp,
 		tag = blk_mq_unique_tag(cmnd->request);
 		idx = blk_mq_unique_tag_to_hwq(tag);
 	} else {
-		if (cpu < phba->cfg_hdw_queue)
-			idx = cpu;
-		else
-			idx = cpu % phba->cfg_hdw_queue;
+		idx = phba->sli4_hba.cpu_map[cpu].hdwq;
 	}
 
 	lpfc_cmd = lpfc_get_io_buf(phba, ndlp, idx,
@@ -3650,6 +3647,9 @@ lpfc_scsi_cmd_iocb_cmpl(struct lpfc_hba *phba, struct lpfc_iocbq *pIocbIn,
 	struct Scsi_Host *shost;
 	int idx;
 	uint32_t logit = LOG_FCP;
+#ifdef CONFIG_SCSI_LPFC_DEBUG_FS
+	int cpu;
+#endif
 
 	/* Sanity check on return of outstanding command */
 	cmd = lpfc_cmd->pCmd;
@@ -3660,6 +3660,13 @@ lpfc_scsi_cmd_iocb_cmpl(struct lpfc_hba *phba, struct lpfc_iocbq *pIocbIn,
 	if (phba->sli4_hba.hdwq)
 		phba->sli4_hba.hdwq[idx].scsi_cstat.io_cmpls++;
 
+#ifdef CONFIG_SCSI_LPFC_DEBUG_FS
+	if (phba->cpucheck_on & LPFC_CHECK_SCSI_IO) {
+		cpu = smp_processor_id();
+		if (cpu < LPFC_CHECK_CPU_CNT)
+			phba->sli4_hba.hdwq[idx].cpucheck_cmpl_io[cpu]++;
+	}
+#endif
 	shost = cmd->device->host;
 
 	lpfc_cmd->result = (pIocbOut->iocb.un.ulpWord[4] & IOERR_PARAM_MASK);
@@ -4336,6 +4343,9 @@ lpfc_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *cmnd)
 	struct lpfc_io_buf *lpfc_cmd;
 	struct fc_rport *rport = starget_to_rport(scsi_target(cmnd->device));
 	int err, idx;
+#ifdef CONFIG_SCSI_LPFC_DEBUG_FS
+	int cpu;
+#endif
 
 	rdata = lpfc_rport_data_from_scsi_device(cmnd->device);
 
@@ -4450,6 +4460,16 @@ lpfc_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *cmnd)
 
 	lpfc_scsi_prep_cmnd(vport, lpfc_cmd, ndlp);
 
+#ifdef CONFIG_SCSI_LPFC_DEBUG_FS
+	if (phba->cpucheck_on & LPFC_CHECK_SCSI_IO) {
+		cpu = smp_processor_id();
+		if (cpu < LPFC_CHECK_CPU_CNT) {
+			struct lpfc_sli4_hdw_queue *hdwq =
+					&phba->sli4_hba.hdwq[lpfc_cmd->hdwq_no];
+			hdwq->cpucheck_xmt_io[cpu]++;
+		}
+	}
+#endif
 	err = lpfc_sli_issue_iocb(phba, LPFC_FCP_RING,
 				  &lpfc_cmd->cur_iocbq, SLI_IOCB_RET_IOCB);
 	if (err) {
diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c
index c0f0adccdea7..0cc81321643d 100644
--- a/drivers/scsi/lpfc/lpfc_sli.c
+++ b/drivers/scsi/lpfc/lpfc_sli.c
@@ -5586,7 +5586,7 @@ lpfc_sli4_arm_cqeq_intr(struct lpfc_hba *phba)
 						LPFC_QUEUE_REARM);
 		}
 
-		for (qidx = 0; qidx < phba->cfg_hdw_queue; qidx++)
+		for (qidx = 0; qidx < phba->cfg_irq_chann; qidx++)
 			sli4_hba->sli4_eq_release(qp[qidx].hba_eq,
 						LPFC_QUEUE_REARM);
 	}
@@ -7878,7 +7878,7 @@ lpfc_sli4_process_missed_mbox_completions(struct lpfc_hba *phba)
 	/* Find the eq associated with the mcq */
 
 	if (sli4_hba->hdwq)
-		for (eqidx = 0; eqidx < phba->cfg_hdw_queue; eqidx++)
+		for (eqidx = 0; eqidx < phba->cfg_irq_chann; eqidx++)
 			if (sli4_hba->hdwq[eqidx].hba_eq->queue_id ==
 			    sli4_hba->mbx_cq->assoc_qid) {
 				fpeq = sli4_hba->hdwq[eqidx].hba_eq;
@@ -10058,12 +10058,9 @@ int
 lpfc_sli_issue_iocb(struct lpfc_hba *phba, uint32_t ring_number,
 		    struct lpfc_iocbq *piocb, uint32_t flag)
 {
-	struct lpfc_hba_eq_hdl *hba_eq_hdl;
 	struct lpfc_sli_ring *pring;
-	struct lpfc_queue *fpeq;
-	struct lpfc_eqe *eqe;
 	unsigned long iflags;
-	int rc, idx;
+	int rc;
 
 	if (phba->sli_rev == LPFC_SLI_REV4) {
 		pring = lpfc_sli4_calc_ring(phba, piocb);
@@ -10073,34 +10070,6 @@ lpfc_sli_issue_iocb(struct lpfc_hba *phba, uint32_t ring_number,
 		spin_lock_irqsave(&pring->ring_lock, iflags);
 		rc = __lpfc_sli_issue_iocb(phba, ring_number, piocb, flag);
 		spin_unlock_irqrestore(&pring->ring_lock, iflags);
-
-		if (lpfc_fcp_look_ahead && (piocb->iocb_flag &  LPFC_IO_FCP)) {
-			idx = piocb->hba_wqidx;
-			hba_eq_hdl = &phba->sli4_hba.hba_eq_hdl[idx];
-
-			if (atomic_dec_and_test(&hba_eq_hdl->hba_eq_in_use)) {
-
-				/* Get associated EQ with this index */
-				fpeq = phba->sli4_hba.hdwq[idx].hba_eq;
-
-				/* Turn off interrupts from this EQ */
-				phba->sli4_hba.sli4_eq_clr_intr(fpeq);
-
-				/*
-				 * Process all the events on FCP EQ
-				 */
-				while ((eqe = lpfc_sli4_eq_get(fpeq))) {
-					lpfc_sli4_hba_handle_eqe(phba,
-						eqe, idx);
-					fpeq->EQ_processed++;
-				}
-
-				/* Always clear and re-arm the EQ */
-				phba->sli4_hba.sli4_eq_release(fpeq,
-					LPFC_QUEUE_REARM);
-			}
-			atomic_inc(&hba_eq_hdl->hba_eq_in_use);
-		}
 	} else {
 		/* For now, SLI2/3 will still use hbalock */
 		spin_lock_irqsave(&phba->hbalock, iflags);
@@ -13651,7 +13620,7 @@ lpfc_sli4_sp_handle_eqe(struct lpfc_hba *phba, struct lpfc_eqe *eqe,
 	/* Save EQ associated with this CQ */
 	cq->assoc_qp = speq;
 
-	if (!queue_work(phba->wq, &cq->spwork))
+	if (!queue_work_on(cq->chann, phba->wq, &cq->spwork))
 		lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
 				"0390 Cannot schedule soft IRQ "
 				"for CQ eqcqid=%d, cqid=%d on CPU %d\n",
@@ -14057,18 +14026,11 @@ lpfc_sli4_hba_handle_eqe(struct lpfc_hba *phba, struct lpfc_eqe *eqe,
 	/* Get the reference to the corresponding CQ */
 	cqid = bf_get_le32(lpfc_eqe_resource_id, eqe);
 
-	/* First check for NVME/SCSI completion */
-	if ((phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME) &&
-	    (cqid == phba->sli4_hba.hdwq[qidx].nvme_cq_map)) {
-		/* Process NVME / NVMET command completion */
-		cq = phba->sli4_hba.hdwq[qidx].nvme_cq;
-		goto  process_cq;
-	}
-
-	if (cqid == phba->sli4_hba.hdwq[qidx].fcp_cq_map) {
-		/* Process FCP command completion */
-		cq = phba->sli4_hba.hdwq[qidx].fcp_cq;
-		goto  process_cq;
+	/* Use the fast lookup method first */
+	if (cqid <= phba->sli4_hba.cq_max) {
+		cq = phba->sli4_hba.cq_lookup[cqid];
+		if (cq)
+			goto  work_cq;
 	}
 
 	/* Next check for NVMET completion */
@@ -14103,9 +14065,7 @@ process_cq:
 		return;
 	}
 
-	/* Save EQ associated with this CQ */
-	cq->assoc_qp = phba->sli4_hba.hdwq[qidx].hba_eq;
-
+work_cq:
 	if (!queue_work_on(cq->chann, phba->wq, &cq->irqwork))
 		lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
 				"0363 Cannot schedule soft IRQ "
@@ -14233,15 +14193,6 @@ lpfc_sli4_hba_intr_handler(int irq, void *dev_id)
 	if (unlikely(!fpeq))
 		return IRQ_NONE;
 
-	if (lpfc_fcp_look_ahead) {
-		if (atomic_dec_and_test(&hba_eq_hdl->hba_eq_in_use))
-			phba->sli4_hba.sli4_eq_clr_intr(fpeq);
-		else {
-			atomic_inc(&hba_eq_hdl->hba_eq_in_use);
-			return IRQ_NONE;
-		}
-	}
-
 	/* Check device state for handling interrupt */
 	if (unlikely(lpfc_intr_state_check(phba))) {
 		/* Check again for link_state with lock held */
@@ -14250,8 +14201,6 @@ lpfc_sli4_hba_intr_handler(int irq, void *dev_id)
 			/* Flush, clear interrupt, and rearm the EQ */
 			lpfc_sli4_eq_flush(phba, fpeq);
 		spin_unlock_irqrestore(&phba->hbalock, iflag);
-		if (lpfc_fcp_look_ahead)
-			atomic_inc(&hba_eq_hdl->hba_eq_in_use);
 		return IRQ_NONE;
 	}
 
@@ -14274,12 +14223,6 @@ lpfc_sli4_hba_intr_handler(int irq, void *dev_id)
 
 	if (unlikely(ecount == 0)) {
 		fpeq->EQ_no_entry++;
-
-		if (lpfc_fcp_look_ahead) {
-			atomic_inc(&hba_eq_hdl->hba_eq_in_use);
-			return IRQ_NONE;
-		}
-
 		if (phba->intr_type == MSIX)
 			/* MSI-X treated interrupt served as no EQ share INT */
 			lpfc_printf_log(phba, KERN_WARNING, LOG_SLI,
@@ -14289,9 +14232,6 @@ lpfc_sli4_hba_intr_handler(int irq, void *dev_id)
 			return IRQ_NONE;
 	}
 
-	if (lpfc_fcp_look_ahead)
-		atomic_inc(&hba_eq_hdl->hba_eq_in_use);
-
 	return IRQ_HANDLED;
 } /* lpfc_sli4_fp_intr_handler */
 
@@ -14329,7 +14269,7 @@ lpfc_sli4_intr_handler(int irq, void *dev_id)
 	/*
 	 * Invoke fast-path host attention interrupt handling as appropriate.
 	 */
-	for (qidx = 0; qidx < phba->cfg_hdw_queue; qidx++) {
+	for (qidx = 0; qidx < phba->cfg_irq_chann; qidx++) {
 		hba_irq_rc = lpfc_sli4_hba_intr_handler(irq,
 					&phba->sli4_hba.hba_eq_hdl[qidx]);
 		if (hba_irq_rc == IRQ_HANDLED)
@@ -14516,7 +14456,7 @@ lpfc_modify_hba_eq_delay(struct lpfc_hba *phba, uint32_t startq,
 	union lpfc_sli4_cfg_shdr *shdr;
 	uint16_t dmult;
 
-	if (startq >= phba->cfg_hdw_queue)
+	if (startq >= phba->cfg_irq_chann)
 		return 0;
 
 	mbox = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL);
@@ -14530,7 +14470,7 @@ lpfc_modify_hba_eq_delay(struct lpfc_hba *phba, uint32_t startq,
 	eq_delay = &mbox->u.mqe.un.eq_delay;
 
 	/* Calculate delay multiper from maximum interrupt per second */
-	result = imax / phba->cfg_hdw_queue;
+	result = imax / phba->cfg_irq_chann;
 	if (result > LPFC_DMULT_CONST || result == 0)
 		dmult = 0;
 	else
@@ -14539,7 +14479,7 @@ lpfc_modify_hba_eq_delay(struct lpfc_hba *phba, uint32_t startq,
 		dmult = LPFC_DMULT_MAX;
 
 	cnt = 0;
-	for (qidx = startq; qidx < phba->cfg_hdw_queue; qidx++) {
+	for (qidx = startq; qidx < phba->cfg_irq_chann; qidx++) {
 		eq = phba->sli4_hba.hdwq[qidx].hba_eq;
 		if (!eq)
 			continue;
@@ -14557,7 +14497,7 @@ lpfc_modify_hba_eq_delay(struct lpfc_hba *phba, uint32_t startq,
 			val =  phba->cfg_fcp_imax;
 			if (val) {
 				/* First, interrupts per sec per EQ */
-				val = phba->cfg_fcp_imax / phba->cfg_hdw_queue;
+				val = phba->cfg_fcp_imax / phba->cfg_irq_chann;
 
 				/* us delay between each interrupt */
 				val = LPFC_SEC_TO_USEC / val;
@@ -14852,10 +14792,13 @@ lpfc_cq_create(struct lpfc_hba *phba, struct lpfc_queue *cq,
 	cq->subtype = subtype;
 	cq->queue_id = bf_get(lpfc_mbx_cq_create_q_id, &cq_create->u.response);
 	cq->assoc_qid = eq->queue_id;
+	cq->assoc_qp = eq;
 	cq->host_index = 0;
 	cq->hba_index = 0;
 	cq->entry_repost = LPFC_CQ_REPOST;
 
+	if (cq->queue_id > phba->sli4_hba.cq_max)
+		phba->sli4_hba.cq_max = cq->queue_id;
 out:
 	mempool_free(mbox, phba->mbox_mem_pool);
 	return status;
@@ -15061,6 +15004,7 @@ lpfc_cq_create_set(struct lpfc_hba *phba, struct lpfc_queue **cqp,
 		cq->type = type;
 		cq->subtype = subtype;
 		cq->assoc_qid = eq->queue_id;
+		cq->assoc_qp = eq;
 		cq->host_index = 0;
 		cq->hba_index = 0;
 		cq->entry_repost = LPFC_CQ_REPOST;
@@ -15101,6 +15045,8 @@ lpfc_cq_create_set(struct lpfc_hba *phba, struct lpfc_queue **cqp,
 	for (idx = 0; idx < numcq; idx++) {
 		cq = cqp[idx];
 		cq->queue_id = rc + idx;
+		if (cq->queue_id > phba->sli4_hba.cq_max)
+			phba->sli4_hba.cq_max = cq->queue_id;
 	}
 
 out:
@@ -19664,7 +19610,8 @@ lpfc_sli4_issue_wqe(struct lpfc_hba *phba, struct lpfc_sli4_hdw_queue *qp,
 	/* NVME_LS and NVME_LS ABTS requests. */
 	if (pwqe->iocb_flag & LPFC_IO_NVME_LS) {
 		pring =  phba->sli4_hba.nvmels_wq->pring;
-		spin_lock_irqsave(&pring->ring_lock, iflags);
+		lpfc_qp_spin_lock_irqsave(&pring->ring_lock, iflags,
+					  qp, wq_access);
 		sglq = __lpfc_sli_get_els_sglq(phba, pwqe);
 		if (!sglq) {
 			spin_unlock_irqrestore(&pring->ring_lock, iflags);
@@ -19697,7 +19644,8 @@ lpfc_sli4_issue_wqe(struct lpfc_hba *phba, struct lpfc_sli4_hdw_queue *qp,
 
 		bf_set(wqe_cqid, &wqe->generic.wqe_com, qp->nvme_cq_map);
 
-		spin_lock_irqsave(&pring->ring_lock, iflags);
+		lpfc_qp_spin_lock_irqsave(&pring->ring_lock, iflags,
+					  qp, wq_access);
 		ret = lpfc_sli4_wq_put(wq, wqe);
 		if (ret) {
 			spin_unlock_irqrestore(&pring->ring_lock, iflags);
@@ -19724,7 +19672,8 @@ lpfc_sli4_issue_wqe(struct lpfc_hba *phba, struct lpfc_sli4_hdw_queue *qp,
 		       pwqe->sli4_xritag);
 		bf_set(wqe_cqid, &wqe->generic.wqe_com, qp->nvme_cq_map);
 
-		spin_lock_irqsave(&pring->ring_lock, iflags);
+		lpfc_qp_spin_lock_irqsave(&pring->ring_lock, iflags,
+					  qp, wq_access);
 		ret = lpfc_sli4_wq_put(wq, wqe);
 		if (ret) {
 			spin_unlock_irqrestore(&pring->ring_lock, iflags);
@@ -19872,18 +19821,20 @@ void lpfc_move_xri_pvt_to_pbl(struct lpfc_hba *phba, u32 hwqid)
 {
 	struct lpfc_pbl_pool *pbl_pool;
 	struct lpfc_pvt_pool *pvt_pool;
+	struct lpfc_sli4_hdw_queue *qp;
 	struct lpfc_io_buf *lpfc_ncmd;
 	struct lpfc_io_buf *lpfc_ncmd_next;
 	unsigned long iflag;
 	struct list_head tmp_list;
 	u32 tmp_count;
 
-	pbl_pool = &phba->sli4_hba.hdwq[hwqid].p_multixri_pool->pbl_pool;
-	pvt_pool = &phba->sli4_hba.hdwq[hwqid].p_multixri_pool->pvt_pool;
+	qp = &phba->sli4_hba.hdwq[hwqid];
+	pbl_pool = &qp->p_multixri_pool->pbl_pool;
+	pvt_pool = &qp->p_multixri_pool->pvt_pool;
 	tmp_count = 0;
 
-	spin_lock_irqsave(&pbl_pool->lock, iflag);
-	spin_lock(&pvt_pool->lock);
+	lpfc_qp_spin_lock_irqsave(&pbl_pool->lock, iflag, qp, mv_to_pub_pool);
+	lpfc_qp_spin_lock(&pvt_pool->lock, qp, mv_from_pvt_pool);
 
 	if (pvt_pool->count > pvt_pool->low_watermark) {
 		/* Step 1: move (all - low_watermark) from pvt_pool
@@ -19936,7 +19887,8 @@ void lpfc_move_xri_pvt_to_pbl(struct lpfc_hba *phba, u32 hwqid)
  *   false - if the specified pbl_pool is empty or locked by someone else
  **/
 static bool
-_lpfc_move_xri_pbl_to_pvt(struct lpfc_hba *phba, struct lpfc_pbl_pool *pbl_pool,
+_lpfc_move_xri_pbl_to_pvt(struct lpfc_hba *phba, struct lpfc_sli4_hdw_queue *qp,
+			  struct lpfc_pbl_pool *pbl_pool,
 			  struct lpfc_pvt_pool *pvt_pool, u32 count)
 {
 	struct lpfc_io_buf *lpfc_ncmd;
@@ -19948,7 +19900,7 @@ _lpfc_move_xri_pbl_to_pvt(struct lpfc_hba *phba, struct lpfc_pbl_pool *pbl_pool,
 	if (ret) {
 		if (pbl_pool->count) {
 			/* Move a batch of XRIs from public to private pool */
-			spin_lock(&pvt_pool->lock);
+			lpfc_qp_spin_lock(&pvt_pool->lock, qp, mv_to_pvt_pool);
 			list_for_each_entry_safe(lpfc_ncmd,
 						 lpfc_ncmd_next,
 						 &pbl_pool->list,
@@ -19990,16 +19942,18 @@ void lpfc_move_xri_pbl_to_pvt(struct lpfc_hba *phba, u32 hwqid, u32 count)
 	struct lpfc_multixri_pool *next_multixri_pool;
 	struct lpfc_pvt_pool *pvt_pool;
 	struct lpfc_pbl_pool *pbl_pool;
+	struct lpfc_sli4_hdw_queue *qp;
 	u32 next_hwqid;
 	u32 hwq_count;
 	int ret;
 
-	multixri_pool = phba->sli4_hba.hdwq[hwqid].p_multixri_pool;
+	qp = &phba->sli4_hba.hdwq[hwqid];
+	multixri_pool = qp->p_multixri_pool;
 	pvt_pool = &multixri_pool->pvt_pool;
 	pbl_pool = &multixri_pool->pbl_pool;
 
 	/* Check if local pbl_pool is available */
-	ret = _lpfc_move_xri_pbl_to_pvt(phba, pbl_pool, pvt_pool, count);
+	ret = _lpfc_move_xri_pbl_to_pvt(phba, qp, pbl_pool, pvt_pool, count);
 	if (ret) {
 #ifdef LPFC_MXP_STAT
 		multixri_pool->local_pbl_hit_count++;
@@ -20022,7 +19976,7 @@ void lpfc_move_xri_pbl_to_pvt(struct lpfc_hba *phba, u32 hwqid, u32 count)
 
 		/* Check if the public free xri pool is available */
 		ret = _lpfc_move_xri_pbl_to_pvt(
-			phba, pbl_pool, pvt_pool, count);
+			phba, qp, pbl_pool, pvt_pool, count);
 
 		/* Exit while-loop if success or all hwqid are checked */
 	} while (!ret && next_hwqid != multixri_pool->rrb_next_hwqid);
@@ -20138,20 +20092,23 @@ void lpfc_release_io_buf(struct lpfc_hba *phba, struct lpfc_io_buf *lpfc_ncmd,
 		if ((pvt_pool->count < pvt_pool->low_watermark) ||
 		    (xri_owned < xri_limit &&
 		     pvt_pool->count < pvt_pool->high_watermark)) {
-			spin_lock_irqsave(&pvt_pool->lock, iflag);
+			lpfc_qp_spin_lock_irqsave(&pvt_pool->lock, iflag,
+						  qp, free_pvt_pool);
 			list_add_tail(&lpfc_ncmd->list,
 				      &pvt_pool->list);
 			pvt_pool->count++;
 			spin_unlock_irqrestore(&pvt_pool->lock, iflag);
 		} else {
-			spin_lock_irqsave(&pbl_pool->lock, iflag);
+			lpfc_qp_spin_lock_irqsave(&pbl_pool->lock, iflag,
+						  qp, free_pub_pool);
 			list_add_tail(&lpfc_ncmd->list,
 				      &pbl_pool->list);
 			pbl_pool->count++;
 			spin_unlock_irqrestore(&pbl_pool->lock, iflag);
 		}
 	} else {
-		spin_lock_irqsave(&qp->io_buf_list_put_lock, iflag);
+		lpfc_qp_spin_lock_irqsave(&qp->io_buf_list_put_lock, iflag,
+					  qp, free_xri);
 		list_add_tail(&lpfc_ncmd->list,
 			      &qp->lpfc_io_buf_list_put);
 		qp->put_io_bufs++;
@@ -20174,6 +20131,7 @@ void lpfc_release_io_buf(struct lpfc_hba *phba, struct lpfc_io_buf *lpfc_ncmd,
  **/
 static struct lpfc_io_buf *
 lpfc_get_io_buf_from_private_pool(struct lpfc_hba *phba,
+				  struct lpfc_sli4_hdw_queue *qp,
 				  struct lpfc_pvt_pool *pvt_pool,
 				  struct lpfc_nodelist *ndlp)
 {
@@ -20181,7 +20139,7 @@ lpfc_get_io_buf_from_private_pool(struct lpfc_hba *phba,
 	struct lpfc_io_buf *lpfc_ncmd_next;
 	unsigned long iflag;
 
-	spin_lock_irqsave(&pvt_pool->lock, iflag);
+	lpfc_qp_spin_lock_irqsave(&pvt_pool->lock, iflag, qp, alloc_pvt_pool);
 	list_for_each_entry_safe(lpfc_ncmd, lpfc_ncmd_next,
 				 &pvt_pool->list, list) {
 		if (lpfc_test_rrq_active(
@@ -20276,7 +20234,7 @@ lpfc_get_io_buf_from_multixri_pools(struct lpfc_hba *phba,
 		lpfc_move_xri_pbl_to_pvt(phba, hwqid, XRI_BATCH);
 
 	/* Get one XRI from private free xri pool */
-	lpfc_ncmd = lpfc_get_io_buf_from_private_pool(phba, pvt_pool, ndlp);
+	lpfc_ncmd = lpfc_get_io_buf_from_private_pool(phba, qp, pvt_pool, ndlp);
 
 	if (lpfc_ncmd) {
 		lpfc_ncmd->hdwq = qp;
@@ -20349,11 +20307,13 @@ struct lpfc_io_buf *lpfc_get_io_buf(struct lpfc_hba *phba,
 		lpfc_cmd = lpfc_get_io_buf_from_multixri_pools(
 			phba, ndlp, hwqid, expedite);
 	else {
-		spin_lock_irqsave(&qp->io_buf_list_get_lock, iflag);
+		lpfc_qp_spin_lock_irqsave(&qp->io_buf_list_get_lock, iflag,
+					  qp, alloc_xri_get);
 		if (qp->get_io_bufs > LPFC_NVME_EXPEDITE_XRICNT || expedite)
 			lpfc_cmd = lpfc_io_buf(phba, ndlp, hwqid);
 		if (!lpfc_cmd) {
-			spin_lock(&qp->io_buf_list_put_lock);
+			lpfc_qp_spin_lock(&qp->io_buf_list_put_lock,
+					  qp, alloc_xri_put);
 			list_splice(&qp->lpfc_io_buf_list_put,
 				    &qp->lpfc_io_buf_list_get);
 			qp->get_io_bufs += qp->put_io_bufs;
diff --git a/drivers/scsi/lpfc/lpfc_sli4.h b/drivers/scsi/lpfc/lpfc_sli4.h
index f5e58cd4c6ac..c381f2cb4909 100644
--- a/drivers/scsi/lpfc/lpfc_sli4.h
+++ b/drivers/scsi/lpfc/lpfc_sli4.h
@@ -41,7 +41,7 @@
 
 /* Multi-queue arrangement for FCP EQ/CQ/WQ tuples */
 #define LPFC_HBA_HDWQ_MIN	0
-#define LPFC_HBA_HDWQ_MAX	64
+#define LPFC_HBA_HDWQ_MAX	128
 #define LPFC_HBA_HDWQ_DEF	0
 
 /* Common buffer size to accomidate SCSI and NVME IO buffers */
@@ -166,16 +166,19 @@ struct lpfc_queue {
 	uint32_t assoc_qid;     /* Queue ID associated with, for CQ/WQ/MQ */
 	uint32_t host_index;	/* The host's index for putting or getting */
 	uint32_t hba_index;	/* The last known hba index for get or put */
+	uint32_t q_mode;
 
 	struct lpfc_sli_ring *pring; /* ptr to io ring associated with q */
 	struct lpfc_rqb *rqbp;	/* ptr to RQ buffers */
 
-	uint32_t q_mode;
 	uint16_t page_count;	/* Number of pages allocated for this queue */
 	uint16_t page_size;	/* size of page allocated for this queue */
 #define LPFC_EXPANDED_PAGE_SIZE	16384
 #define LPFC_DEFAULT_PAGE_SIZE	4096
-	uint16_t chann;		/* IO channel this queue is associated with */
+	uint16_t chann;		/* Hardware Queue association WQ/CQ */
+				/* CPU affinity for EQ */
+#define LPFC_FIND_BY_EQ		0
+#define LPFC_FIND_BY_HDWQ	1
 	uint8_t db_format;
 #define LPFC_DB_RING_FORMAT	0x01
 #define LPFC_DB_LIST_FORMAT	0x02
@@ -431,11 +434,6 @@ struct lpfc_hba_eq_hdl {
 	uint32_t idx;
 	char handler_name[LPFC_SLI4_HANDLER_NAME_SZ];
 	struct lpfc_hba *phba;
-	atomic_t hba_eq_in_use;
-	struct cpumask *cpumask;
-	/* CPU affinitsed to or 0xffffffff if multiple */
-	uint32_t cpu;
-#define LPFC_MULTI_CPU_AFFINITY 0xffffffff
 };
 
 /*BB Credit recovery value*/
@@ -529,7 +527,9 @@ struct lpfc_vector_map_info {
 	uint16_t	phys_id;
 	uint16_t	core_id;
 	uint16_t	irq;
+	uint16_t	eq;
 	uint16_t	hdwq;
+	uint16_t	hyper;
 };
 #define LPFC_VECTOR_MAP_EMPTY	0xffff
 
@@ -593,6 +593,21 @@ struct lpfc_fc4_ctrl_stat {
 	u32 io_cmpls;
 };
 
+#ifdef LPFC_HDWQ_LOCK_STAT
+struct lpfc_lock_stat {
+	uint32_t alloc_xri_get;
+	uint32_t alloc_xri_put;
+	uint32_t free_xri;
+	uint32_t wq_access;
+	uint32_t alloc_pvt_pool;
+	uint32_t mv_from_pvt_pool;
+	uint32_t mv_to_pub_pool;
+	uint32_t mv_to_pvt_pool;
+	uint32_t free_pub_pool;
+	uint32_t free_pvt_pool;
+};
+#endif
+
 /* SLI4 HBA data structure entries */
 struct lpfc_sli4_hdw_queue {
 	/* Pointers to the constructed SLI4 queues */
@@ -626,6 +641,9 @@ struct lpfc_sli4_hdw_queue {
 	/* FC-4 Stats counters */
 	struct lpfc_fc4_ctrl_stat nvme_cstat;
 	struct lpfc_fc4_ctrl_stat scsi_cstat;
+#ifdef LPFC_HDWQ_LOCK_STAT
+	struct lpfc_lock_stat lock_conflict;
+#endif
 
 #ifdef CONFIG_SCSI_LPFC_DEBUG_FS
 #define LPFC_CHECK_CPU_CNT    128
@@ -635,6 +653,34 @@ struct lpfc_sli4_hdw_queue {
 #endif
 };
 
+#ifdef LPFC_HDWQ_LOCK_STAT
+/* compile time trylock stats */
+#define lpfc_qp_spin_lock_irqsave(lock, flag, qp, lstat) \
+	{ \
+	int only_once = 1; \
+	while (spin_trylock_irqsave(lock, flag) == 0) { \
+		if (only_once) { \
+			only_once = 0; \
+			qp->lock_conflict.lstat++; \
+		} \
+	} \
+	}
+#define lpfc_qp_spin_lock(lock, qp, lstat) \
+	{ \
+	int only_once = 1; \
+	while (spin_trylock(lock) == 0) { \
+		if (only_once) { \
+			only_once = 0; \
+			qp->lock_conflict.lstat++; \
+		} \
+	} \
+	}
+#else
+#define lpfc_qp_spin_lock_irqsave(lock, flag, qp, lstat) \
+	spin_lock_irqsave(lock, flag)
+#define lpfc_qp_spin_lock(lock, qp, lstat) spin_lock(lock)
+#endif
+
 struct lpfc_sli4_hba {
 	void __iomem *conf_regs_memmap_p; /* Kernel memory mapped address for
 					   * config space registers
@@ -764,6 +810,8 @@ struct lpfc_sli4_hba {
 	uint16_t nvmet_xri_cnt;
 	uint16_t nvmet_io_wait_cnt;
 	uint16_t nvmet_io_wait_total;
+	uint16_t cq_max;
+	struct lpfc_queue **cq_lookup;
 	struct list_head lpfc_els_sgl_list;
 	struct list_head lpfc_abts_els_sgl_list;
 	spinlock_t abts_scsi_buf_list_lock; /* list of aborted SCSI IOs */