scsi: lpfc: Fix driver handling of nvme resources during unload

During driver unload, the driver may crash due to NULL pointers. The NULL pointers were due to the driver not protecting itself sufficiently during some of the teardown paths. Additionally, the driver was not waiting for and cleanup up nvme io resources. As such, the driver wasn't making the callbacks to the transport, stalling the transports association teardown. This patch waits for io clean up before tearding down and adds checks for possible NULL pointers. Cc: <stable@vger.kernel.org> # 4.12+ Signed-off-by: Dick Kennedy <dick.kennedy@broadcom.com> Signed-off-by: James Smart <james.smart@broadcom.com> Reviewed-by: Hannes Reinecke <hare@suse.com> Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
author: James Smart <jsmart2021@gmail.com> 2017-11-21 03:00:42 +0300
committer: Martin K. Petersen <martin.petersen@oracle.com> 2017-12-05 04:32:55 +0300
commit: c3725bdcdf28f5e2f3a78b69e9dd010f49284a09 (patch)
tree: db9c5510bed66f620cadef80a72864de7ecf3123 /drivers/scsi/lpfc/lpfc_nvme.c
parent: 3386f4bdd243ad5a9094d390297602543abe9902 (diff)
download: linux-c3725bdcdf28f5e2f3a78b69e9dd010f49284a09.tar.xz
1 files changed, 85 insertions, 11 deletions
diff --git a/drivers/scsi/lpfc/lpfc_nvme.c b/drivers/scsi/lpfc/lpfc_nvme.c
index 49f241d30c17..1f02cf7e9d00 100644
--- a/drivers/scsi/lpfc/lpfc_nvme.c
+++ b/drivers/scsi/lpfc/lpfc_nvme.c
@@ -88,6 +88,9 @@ lpfc_nvme_create_queue(struct nvme_fc_local_port *pnvme_lport,
 	struct lpfc_nvme_qhandle *qhandle;
 	char *str;
 
+	if (!pnvme_lport->private)
+		return -ENOMEM;
+
 	lport = (struct lpfc_nvme_lport *)pnvme_lport->private;
 	vport = lport->vport;
 	qhandle = kzalloc(sizeof(struct lpfc_nvme_qhandle), GFP_KERNEL);
@@ -140,6 +143,9 @@ lpfc_nvme_delete_queue(struct nvme_fc_local_port *pnvme_lport,
 	struct lpfc_nvme_lport *lport;
 	struct lpfc_vport *vport;
 
+	if (!pnvme_lport->private)
+		return;
+
 	lport = (struct lpfc_nvme_lport *)pnvme_lport->private;
 	vport = lport->vport;
 
@@ -1265,13 +1271,29 @@ lpfc_nvme_fcp_io_submit(struct nvme_fc_local_port *pnvme_lport,
 	struct lpfc_nvme_buf *lpfc_ncmd;
 	struct lpfc_nvme_rport *rport;
 	struct lpfc_nvme_qhandle *lpfc_queue_info;
-	struct lpfc_nvme_fcpreq_priv *freqpriv = pnvme_fcreq->private;
+	struct lpfc_nvme_fcpreq_priv *freqpriv;
 #ifdef CONFIG_SCSI_LPFC_DEBUG_FS
 	uint64_t start = 0;
 #endif
 
+	/* Validate pointers. LLDD fault handling with transport does
+	 * have timing races.
+	 */
 	lport = (struct lpfc_nvme_lport *)pnvme_lport->private;
+	if (unlikely(!lport)) {
+		ret = -EINVAL;
+		goto out_fail;
+	}
+
 	vport = lport->vport;
+
+	if (unlikely(!hw_queue_handle)) {
+		lpfc_printf_vlog(vport, KERN_INFO, LOG_NVME_ABTS,
+				 "6129 Fail Abort, NULL hw_queue_handle\n");
+		ret = -EINVAL;
+		goto out_fail;
+	}
+
 	phba = vport->phba;
 
 	if (vport->load_flag & FC_UNLOADING) {
@@ -1284,13 +1306,9 @@ lpfc_nvme_fcp_io_submit(struct nvme_fc_local_port *pnvme_lport,
 		goto out_fail;
 	}
 
-	/* Validate pointers. */
-	if (!pnvme_lport || !pnvme_rport || !freqpriv) {
-		lpfc_printf_vlog(vport, KERN_INFO, LOG_NVME_IOERR | LOG_NODE,
-				 "6117 No Send:IO submit ptrs NULL, lport %p, "
-				 "rport %p fcreq_priv %p\n",
-				 pnvme_lport, pnvme_rport, freqpriv);
-		ret = -ENODEV;
+	freqpriv = pnvme_fcreq->private;
+	if (unlikely(!freqpriv)) {
+		ret = -EINVAL;
 		goto out_fail;
 	}
 
@@ -1497,20 +1515,34 @@ lpfc_nvme_fcp_abort(struct nvme_fc_local_port *pnvme_lport,
 	struct lpfc_nvme_lport *lport;
 	struct lpfc_vport *vport;
 	struct lpfc_hba *phba;
-	struct lpfc_nvme_rport *rport;
 	struct lpfc_nvme_buf *lpfc_nbuf;
 	struct lpfc_iocbq *abts_buf;
 	struct lpfc_iocbq *nvmereq_wqe;
-	struct lpfc_nvme_fcpreq_priv *freqpriv = pnvme_fcreq->private;
+	struct lpfc_nvme_fcpreq_priv *freqpriv;
 	union lpfc_wqe *abts_wqe;
 	unsigned long flags;
 	int ret_val;
 
+	/* Validate pointers. LLDD fault handling with transport does
+	 * have timing races.
+	 */
 	lport = (struct lpfc_nvme_lport *)pnvme_lport->private;
-	rport = (struct lpfc_nvme_rport *)pnvme_rport->private;
+	if (unlikely(!lport))
+		return;
+
 	vport = lport->vport;
+
+	if (unlikely(!hw_queue_handle)) {
+		lpfc_printf_vlog(vport, KERN_INFO, LOG_NVME_ABTS,
+				 "6129 Fail Abort, HW Queue Handle NULL.\n");
+		return;
+	}
+
 	phba = vport->phba;
+	freqpriv = pnvme_fcreq->private;
 
+	if (unlikely(!freqpriv))
+		return;
 	if (vport->load_flag & FC_UNLOADING)
 		return;
 
@@ -2677,3 +2709,45 @@ lpfc_sli4_nvme_xri_aborted(struct lpfc_hba *phba,
 			"6312 XRI Aborted xri x%x not found\n", xri);
 
 }
+
+/**
+ * lpfc_nvme_wait_for_io_drain - Wait for all NVME wqes to complete
+ * @phba: Pointer to HBA context object.
+ *
+ * This function flushes all wqes in the nvme rings and frees all resources
+ * in the txcmplq. This function does not issue abort wqes for the IO
+ * commands in txcmplq, they will just be returned with
+ * IOERR_SLI_DOWN. This function is invoked with EEH when device's PCI
+ * slot has been permanently disabled.
+ **/
+void
+lpfc_nvme_wait_for_io_drain(struct lpfc_hba *phba)
+{
+	struct lpfc_sli_ring  *pring;
+	u32 i, wait_cnt = 0;
+
+	if (phba->sli_rev < LPFC_SLI_REV4)
+		return;
+
+	/* Cycle through all NVME rings and make sure all outstanding
+	 * WQEs have been removed from the txcmplqs.
+	 */
+	for (i = 0; i < phba->cfg_nvme_io_channel; i++) {
+		pring = phba->sli4_hba.nvme_wq[i]->pring;
+
+		/* Retrieve everything on the txcmplq */
+		while (!list_empty(&pring->txcmplq)) {
+			msleep(LPFC_XRI_EXCH_BUSY_WAIT_T1);
+			wait_cnt++;
+
+			/* The sleep is 10mS.  Every ten seconds,
+			 * dump a message.  Something is wrong.
+			 */
+			if ((wait_cnt % 1000) == 0) {
+				lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR,
+						"6178 NVME IO not empty, "
+						"cnt %d\n", wait_cnt);
+			}
+		}
+	}
+}
author	James Smart <jsmart2021@gmail.com>	2017-11-21 03:00:42 +0300
committer	Martin K. Petersen <martin.petersen@oracle.com>	2017-12-05 04:32:55 +0300
commit	c3725bdcdf28f5e2f3a78b69e9dd010f49284a09 (patch)
tree	db9c5510bed66f620cadef80a72864de7ecf3123 /drivers/scsi/lpfc/lpfc_nvme.c
parent	3386f4bdd243ad5a9094d390297602543abe9902 (diff)
download	linux-c3725bdcdf28f5e2f3a78b69e9dd010f49284a09.tar.xz