diff options
author | Suganath Prabu S <suganath-prabu.subramani@broadcom.com> | 2021-05-18 08:16:25 +0300 |
---|---|---|
committer | Martin K. Petersen <martin.petersen@oracle.com> | 2021-06-01 05:48:20 +0300 |
commit | a0815c45c89f544861eae55d85ccee6b1b1451e8 (patch) | |
tree | a757b8631a87eabc583d13fccc3c499430f60ac9 /drivers/scsi/mpt3sas | |
parent | 19a622c39a9d497d3c06ffe9068ee4c7bbd2bdcc (diff) | |
download | linux-a0815c45c89f544861eae55d85ccee6b1b1451e8.tar.xz |
scsi: mpt3sas: Handle firmware faults during second half of IOC init
If a firmware fault occurs while scanning the devices during IOC
initialization then the driver issues the hard reset operation to recover
the IOC. However, the driver is not issuing a Port enable request
messageĀ as part of hard reset operation during IOC initialization. Due to
this, the driver will not receive get any device discovery-related events
and hence devices will not be accessible.
Teach the driver to gracefully handle firmware faults while scanning for
target devices during IOC initialization. Make the driver issue a port
enable request message as part of hard reset operation. This permits
receiving device discovery-related events from the firmware after the hard
reset operation completes.
Link: https://lore.kernel.org/r/20210518051625.1596742-4-suganath-prabu.subramani@broadcom.com
Signed-off-by: Suganath Prabu S <suganath-prabu.subramani@broadcom.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Diffstat (limited to 'drivers/scsi/mpt3sas')
-rw-r--r-- | drivers/scsi/mpt3sas/mpt3sas_base.c | 8 | ||||
-rw-r--r-- | drivers/scsi/mpt3sas/mpt3sas_base.h | 1 | ||||
-rw-r--r-- | drivers/scsi/mpt3sas/mpt3sas_scsih.c | 152 |
3 files changed, 145 insertions, 16 deletions
diff --git a/drivers/scsi/mpt3sas/mpt3sas_base.c b/drivers/scsi/mpt3sas/mpt3sas_base.c index 4500d53f09f7..bc4ed3ed4b9a 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_base.c +++ b/drivers/scsi/mpt3sas/mpt3sas_base.c @@ -7205,7 +7205,7 @@ mpt3sas_port_enable_done(struct MPT3SAS_ADAPTER *ioc, u16 smid, u8 msix_index, if (ioc_status != MPI2_IOCSTATUS_SUCCESS) ioc->port_enable_failed = 1; - if (ioc->is_driver_loading) { + if (ioc->port_enable_cmds.status & MPT3_CMD_COMPLETE_ASYNC) { if (ioc_status == MPI2_IOCSTATUS_SUCCESS) { mpt3sas_port_enable_complete(ioc); return 1; @@ -7214,6 +7214,7 @@ mpt3sas_port_enable_done(struct MPT3SAS_ADAPTER *ioc, u16 smid, u8 msix_index, ioc->start_scan = 0; return 1; } + ioc->port_enable_cmds.status &= ~MPT3_CMD_COMPLETE_ASYNC; } complete(&ioc->port_enable_cmds.done); return 1; @@ -7308,6 +7309,7 @@ mpt3sas_port_enable(struct MPT3SAS_ADAPTER *ioc) } ioc->drv_internal_flags |= MPT_DRV_INTERNAL_FIRST_PE_ISSUED; ioc->port_enable_cmds.status = MPT3_CMD_PENDING; + ioc->port_enable_cmds.status |= MPT3_CMD_COMPLETE_ASYNC; mpi_request = mpt3sas_base_get_msg_frame(ioc, smid); ioc->port_enable_cmds.smid = smid; memset(mpi_request, 0, sizeof(Mpi2PortEnableRequest_t)); @@ -7856,7 +7858,7 @@ _base_make_ioc_operational(struct MPT3SAS_ADAPTER *ioc) if (r) return r; - if (ioc->is_driver_loading) { + if (!ioc->shost_recovery) { if (ioc->is_warpdrive && ioc->manu_pg10.OEMIdentifier == 0x80) { @@ -8276,8 +8278,6 @@ _base_clear_outstanding_mpt_commands(struct MPT3SAS_ADAPTER *ioc) ioc->start_scan_failed = MPI2_IOCSTATUS_INTERNAL_ERROR; ioc->start_scan = 0; - ioc->port_enable_cmds.status = - MPT3_CMD_NOT_USED; } else { complete(&ioc->port_enable_cmds.done); } diff --git a/drivers/scsi/mpt3sas/mpt3sas_base.h b/drivers/scsi/mpt3sas/mpt3sas_base.h index c7b001618fc0..d4834c8ee9c0 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_base.h +++ b/drivers/scsi/mpt3sas/mpt3sas_base.h @@ -500,6 +500,7 @@ struct MPT3SAS_DEVICE { #define MPT3_CMD_PENDING 0x0002 /* pending */ #define MPT3_CMD_REPLY_VALID 0x0004 /* reply is valid */ #define MPT3_CMD_RESET 0x0008 /* host reset dropped the command */ +#define MPT3_CMD_COMPLETE_ASYNC 0x0010 /* tells whether cmd completes in same thread or not */ /** * struct _internal_cmd - internal commands struct diff --git a/drivers/scsi/mpt3sas/mpt3sas_scsih.c b/drivers/scsi/mpt3sas/mpt3sas_scsih.c index 79e34b5090b1..d70ae57d897f 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_scsih.c +++ b/drivers/scsi/mpt3sas/mpt3sas_scsih.c @@ -78,6 +78,7 @@ static void _scsih_pcie_device_remove_from_sml(struct MPT3SAS_ADAPTER *ioc, static void _scsih_pcie_check_device(struct MPT3SAS_ADAPTER *ioc, u16 handle); static u8 _scsih_check_for_pending_tm(struct MPT3SAS_ADAPTER *ioc, u16 smid); +static void _scsih_complete_devices_scanning(struct MPT3SAS_ADAPTER *ioc); /* global parameters */ LIST_HEAD(mpt3sas_ioc_list); @@ -3631,8 +3632,6 @@ _scsih_error_recovery_delete_devices(struct MPT3SAS_ADAPTER *ioc) { struct fw_event_work *fw_event; - if (ioc->is_driver_loading) - return; fw_event = alloc_fw_event_work(0); if (!fw_event) return; @@ -3693,6 +3692,14 @@ _scsih_fw_event_cleanup_queue(struct MPT3SAS_ADAPTER *ioc) if ((list_empty(&ioc->fw_event_list) && !ioc->current_event) || !ioc->firmware_event_thread) return; + /* + * Set current running event as ignore, so that + * current running event will exit quickly. + * As diag reset has occurred it is of no use + * to process remaining stale event data entries. + */ + if (ioc->shost_recovery && ioc->current_event) + ioc->current_event->ignore = 1; ioc->fw_events_cleanup = 1; while ((fw_event = dequeue_next_fw_event(ioc)) || @@ -3720,6 +3727,19 @@ _scsih_fw_event_cleanup_queue(struct MPT3SAS_ADAPTER *ioc) } /* + * Driver has to clear ioc->start_scan flag when + * it is cleaning up MPT3SAS_PORT_ENABLE_COMPLETE, + * otherwise scsi_scan_host() API waits for the + * 5 minute timer to expire. If we exit from + * scsi_scan_host() early then we can issue the + * new port enable request as part of current diag reset. + */ + if (fw_event->event == MPT3SAS_PORT_ENABLE_COMPLETE) { + ioc->port_enable_cmds.status |= MPT3_CMD_RESET; + ioc->start_scan = 0; + } + + /* * Wait on the fw_event to complete. If this returns 1, then * the event was never executed, and we need a put for the * reference the work had on the fw_event. @@ -10140,6 +10160,17 @@ _scsih_remove_unresponding_devices(struct MPT3SAS_ADAPTER *ioc) * owner for the reference the list had on any object we prune. */ spin_lock_irqsave(&ioc->sas_device_lock, flags); + + /* + * Clean up the sas_device_init_list list as + * driver goes for fresh scan as part of diag reset. + */ + list_for_each_entry_safe(sas_device, sas_device_next, + &ioc->sas_device_init_list, list) { + list_del_init(&sas_device->list); + sas_device_put(sas_device); + } + list_for_each_entry_safe(sas_device, sas_device_next, &ioc->sas_device_list, list) { if (!sas_device->responding) @@ -10161,6 +10192,16 @@ _scsih_remove_unresponding_devices(struct MPT3SAS_ADAPTER *ioc) ioc_info(ioc, "Removing unresponding devices: pcie end-devices\n"); INIT_LIST_HEAD(&head); spin_lock_irqsave(&ioc->pcie_device_lock, flags); + /* + * Clean up the pcie_device_init_list list as + * driver goes for fresh scan as part of diag reset. + */ + list_for_each_entry_safe(pcie_device, pcie_device_next, + &ioc->pcie_device_init_list, list) { + list_del_init(&pcie_device->list); + pcie_device_put(pcie_device); + } + list_for_each_entry_safe(pcie_device, pcie_device_next, &ioc->pcie_device_list, list) { if (!pcie_device->responding) @@ -10563,8 +10604,7 @@ void mpt3sas_scsih_reset_done_handler(struct MPT3SAS_ADAPTER *ioc) { dtmprintk(ioc, ioc_info(ioc, "%s: MPT3_IOC_DONE_RESET\n", __func__)); - if ((!ioc->is_driver_loading) && !(disable_discovery > 0 && - !ioc->sas_hba.num_phys)) { + if (!(disable_discovery > 0 && !ioc->sas_hba.num_phys)) { if (ioc->multipath_on_hba) { _scsih_sas_port_refresh(ioc); _scsih_update_vphys_after_reset(ioc); @@ -10619,6 +10659,18 @@ _mpt3sas_fw_work(struct MPT3SAS_ADAPTER *ioc, struct fw_event_work *fw_event) _scsih_del_dirty_vphy(ioc); _scsih_del_dirty_port_entries(ioc); _scsih_scan_for_devices_after_reset(ioc); + /* + * If diag reset has occurred during the driver load + * then driver has to complete the driver load operation + * by executing the following items: + *- Register the devices from sas_device_init_list to SML + *- clear is_driver_loading flag, + *- start the watchdog thread. + * In happy driver load path, above things are taken care of when + * driver executes scsih_scan_finished(). + */ + if (ioc->is_driver_loading) + _scsih_complete_devices_scanning(ioc); _scsih_set_nvme_max_shutdown_latency(ioc); break; case MPT3SAS_PORT_ENABLE_COMPLETE: @@ -10764,11 +10816,23 @@ mpt3sas_scsih_event_callback(struct MPT3SAS_ADAPTER *ioc, u8 msix_index, _scsih_check_topo_delete_events(ioc, (Mpi2EventDataSasTopologyChangeList_t *) mpi_reply->EventData); + /* + * No need to add the topology change list + * event to fw event work queue when + * diag reset is going on. Since during diag + * reset driver scan the devices by reading + * sas device page0's not by processing the + * events. + */ + if (ioc->shost_recovery) + return 1; break; case MPI2_EVENT_PCIE_TOPOLOGY_CHANGE_LIST: _scsih_check_pcie_topo_remove_events(ioc, (Mpi26EventDataPCIeTopologyChangeList_t *) mpi_reply->EventData); + if (ioc->shost_recovery) + return 1; break; case MPI2_EVENT_IR_CONFIGURATION_CHANGE_LIST: _scsih_check_ir_config_unhide_events(ioc, @@ -11284,13 +11348,27 @@ _scsih_probe_boot_devices(struct MPT3SAS_ADAPTER *ioc) if (channel == RAID_CHANNEL) { raid_device = device; + /* + * If this boot vd is already registered with SML then + * no need to register it again as part of device scanning + * after diag reset during driver load operation. + */ + if (raid_device->starget) + return; rc = scsi_add_device(ioc->shost, RAID_CHANNEL, raid_device->id, 0); if (rc) _scsih_raid_device_remove(ioc, raid_device); } else if (channel == PCIE_CHANNEL) { - spin_lock_irqsave(&ioc->pcie_device_lock, flags); pcie_device = device; + /* + * If this boot NVMe device is already registered with SML then + * no need to register it again as part of device scanning + * after diag reset during driver load operation. + */ + if (pcie_device->starget) + return; + spin_lock_irqsave(&ioc->pcie_device_lock, flags); tid = pcie_device->id; list_move_tail(&pcie_device->list, &ioc->pcie_device_list); spin_unlock_irqrestore(&ioc->pcie_device_lock, flags); @@ -11298,8 +11376,15 @@ _scsih_probe_boot_devices(struct MPT3SAS_ADAPTER *ioc) if (rc) _scsih_pcie_device_remove(ioc, pcie_device); } else { - spin_lock_irqsave(&ioc->sas_device_lock, flags); sas_device = device; + /* + * If this boot sas/sata device is already registered with SML + * then no need to register it again as part of device scanning + * after diag reset during driver load operation. + */ + if (sas_device->starget) + return; + spin_lock_irqsave(&ioc->sas_device_lock, flags); handle = sas_device->handle; sas_address_parent = sas_device->sas_address_parent; sas_address = sas_device->sas_address; @@ -11598,6 +11683,25 @@ scsih_scan_start(struct Scsi_Host *shost) } /** + * _scsih_complete_devices_scanning - add the devices to sml and + * complete ioc initialization. + * @ioc: per adapter object + * + * Return nothing. + */ +static void _scsih_complete_devices_scanning(struct MPT3SAS_ADAPTER *ioc) +{ + + if (ioc->wait_for_discovery_to_complete) { + ioc->wait_for_discovery_to_complete = 0; + _scsih_probe_devices(ioc); + } + + mpt3sas_base_start_watchdog(ioc); + ioc->is_driver_loading = 0; +} + +/** * scsih_scan_finished - scsi lld callback for .scan_finished * @shost: SCSI host pointer * @time: elapsed time of the scan in jiffies @@ -11610,6 +11714,8 @@ static int scsih_scan_finished(struct Scsi_Host *shost, unsigned long time) { struct MPT3SAS_ADAPTER *ioc = shost_priv(shost); + u32 ioc_state; + int issue_hard_reset = 0; if (disable_discovery > 0) { ioc->is_driver_loading = 0; @@ -11624,9 +11730,30 @@ scsih_scan_finished(struct Scsi_Host *shost, unsigned long time) return 1; } - if (ioc->start_scan) + if (ioc->start_scan) { + ioc_state = mpt3sas_base_get_iocstate(ioc, 0); + if ((ioc_state & MPI2_IOC_STATE_MASK) == MPI2_IOC_STATE_FAULT) { + mpt3sas_print_fault_code(ioc, ioc_state & + MPI2_DOORBELL_DATA_MASK); + issue_hard_reset = 1; + goto out; + } else if ((ioc_state & MPI2_IOC_STATE_MASK) == + MPI2_IOC_STATE_COREDUMP) { + mpt3sas_base_coredump_info(ioc, ioc_state & + MPI2_DOORBELL_DATA_MASK); + mpt3sas_base_wait_for_coredump_completion(ioc, __func__); + issue_hard_reset = 1; + goto out; + } return 0; + } + if (ioc->port_enable_cmds.status & MPT3_CMD_RESET) { + ioc_info(ioc, + "port enable: aborted due to diag reset\n"); + ioc->port_enable_cmds.status = MPT3_CMD_NOT_USED; + goto out; + } if (ioc->start_scan_failed) { ioc_info(ioc, "port enable: FAILED with (ioc_status=0x%08x)\n", ioc->start_scan_failed); @@ -11638,13 +11765,14 @@ scsih_scan_finished(struct Scsi_Host *shost, unsigned long time) ioc_info(ioc, "port enable: SUCCESS\n"); ioc->port_enable_cmds.status = MPT3_CMD_NOT_USED; + _scsih_complete_devices_scanning(ioc); - if (ioc->wait_for_discovery_to_complete) { - ioc->wait_for_discovery_to_complete = 0; - _scsih_probe_devices(ioc); +out: + if (issue_hard_reset) { + ioc->port_enable_cmds.status = MPT3_CMD_NOT_USED; + if (mpt3sas_base_hard_reset_handler(ioc, SOFT_RESET)) + ioc->is_driver_loading = 0; } - mpt3sas_base_start_watchdog(ioc); - ioc->is_driver_loading = 0; return 1; } |