diff options
| author | Ranjan Kumar <ranjan.kumar@broadcom.com> | 2026-01-16 09:07:17 +0300 |
|---|---|---|
| committer | Martin K. Petersen <martin.petersen@oracle.com> | 2026-01-24 06:16:16 +0300 |
| commit | ec54b348f274fdd2bd32bbe74de6d62ae1a10a18 (patch) | |
| tree | 54ac03f687adb13c0f5f5c4f83f491b168f7ef91 | |
| parent | d0654335d90053f0573db293b60a93d855748f83 (diff) | |
| download | linux-ec54b348f274fdd2bd32bbe74de6d62ae1a10a18.tar.xz | |
scsi: mpi3mr: Record and report controller firmware faults
Capture and retain firmware fault codes and extended fault information
whenever the controller enters a fault state.
Maintain a persistent firmware fault counter, expose it via sysfs, and
generate uevents to aid userspace diagnostics and failure analysis.
Co-developed-by: Salomon Dushimirimana <salomondush@google.com>
Signed-off-by: Salomon Dushimirimana <salomondush@google.com>
Signed-off-by: Ranjan Kumar <ranjan.kumar@broadcom.com>
Link: https://patch.msgid.link/20260116060719.32937-7-ranjan.kumar@broadcom.com
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
| -rw-r--r-- | drivers/scsi/mpi3mr/mpi3mr.h | 8 | ||||
| -rw-r--r-- | drivers/scsi/mpi3mr/mpi3mr_app.c | 24 | ||||
| -rw-r--r-- | drivers/scsi/mpi3mr/mpi3mr_fw.c | 103 |
3 files changed, 135 insertions, 0 deletions
diff --git a/drivers/scsi/mpi3mr/mpi3mr.h b/drivers/scsi/mpi3mr/mpi3mr.h index 590c017acf25..58db60e13c13 100644 --- a/drivers/scsi/mpi3mr/mpi3mr.h +++ b/drivers/scsi/mpi3mr/mpi3mr.h @@ -1137,6 +1137,10 @@ struct scmd_priv { * @default_qcount: Total Default queues * @active_poll_qcount: Currently active poll queue count * @requested_poll_qcount: User requested poll queue count + * @fault_during_init: Indicates a firmware fault occurred during initialization + * @saved_fault_code: Firmware fault code captured at the time of failure + * @saved_fault_info: Additional firmware-provided fault information + * @fwfault_counter: Count of firmware faults detected by the driver * @bsg_dev: BSG device structure * @bsg_queue: Request queue for BSG device * @stop_bsgs: Stop BSG request flag @@ -1340,6 +1344,10 @@ struct mpi3mr_ioc { u16 default_qcount; u16 active_poll_qcount; u16 requested_poll_qcount; + u8 fault_during_init; + u32 saved_fault_code; + u32 saved_fault_info[3]; + u64 fwfault_counter; struct device bsg_dev; struct request_queue *bsg_queue; diff --git a/drivers/scsi/mpi3mr/mpi3mr_app.c b/drivers/scsi/mpi3mr/mpi3mr_app.c index 37cca0573ddc..1353a8ff9c85 100644 --- a/drivers/scsi/mpi3mr/mpi3mr_app.c +++ b/drivers/scsi/mpi3mr/mpi3mr_app.c @@ -3255,6 +3255,29 @@ adp_state_show(struct device *dev, struct device_attribute *attr, static DEVICE_ATTR_RO(adp_state); +/** + * fwfault_count_show() - SysFS callback to show firmware fault count + * @dev: class device + * @attr: Device attribute + * @buf: Buffer to copy data into + * + * Displays the total number of firmware faults detected by the driver + * since the controller was initialized. + * + * Return: Number of bytes written to @buf + */ + +static ssize_t +fwfault_count_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct Scsi_Host *shost = class_to_shost(dev); + struct mpi3mr_ioc *mrioc = shost_priv(shost); + + return snprintf(buf, PAGE_SIZE, "%llu\n", mrioc->fwfault_counter); +} +static DEVICE_ATTR_RO(fwfault_count); + static struct attribute *mpi3mr_host_attrs[] = { &dev_attr_version_fw.attr, &dev_attr_fw_queue_depth.attr, @@ -3263,6 +3286,7 @@ static struct attribute *mpi3mr_host_attrs[] = { &dev_attr_reply_qfull_count.attr, &dev_attr_logging_level.attr, &dev_attr_adp_state.attr, + &dev_attr_fwfault_count.attr, NULL, }; diff --git a/drivers/scsi/mpi3mr/mpi3mr_fw.c b/drivers/scsi/mpi3mr/mpi3mr_fw.c index 178738850541..0d7515e7144b 100644 --- a/drivers/scsi/mpi3mr/mpi3mr_fw.c +++ b/drivers/scsi/mpi3mr/mpi3mr_fw.c @@ -1109,6 +1109,31 @@ void mpi3mr_print_fault_info(struct mpi3mr_ioc *mrioc) } /** + * mpi3mr_save_fault_info - Save fault information + * @mrioc: Adapter instance reference + * + * Save the controller fault information if there is a + * controller fault. + * + * Return: Nothing. + */ +static void mpi3mr_save_fault_info(struct mpi3mr_ioc *mrioc) +{ + u32 ioc_status, i; + + ioc_status = readl(&mrioc->sysif_regs->ioc_status); + + if (ioc_status & MPI3_SYSIF_IOC_STATUS_FAULT) { + mrioc->saved_fault_code = readl(&mrioc->sysif_regs->fault) & + MPI3_SYSIF_FAULT_CODE_MASK; + for (i = 0; i < 3; i++) { + mrioc->saved_fault_info[i] = + readl(&mrioc->sysif_regs->fault_info[i]); + } + } +} + +/** * mpi3mr_get_iocstate - Get IOC State * @mrioc: Adapter instance reference * @@ -1250,6 +1275,60 @@ out_failed: } /** + * mpi3mr_fault_uevent_emit - Emit uevent for any controller + * fault + * @mrioc: Pointer to the mpi3mr_ioc structure for the controller instance + * + * This function is invoked when the controller undergoes any + * type of fault. + */ + +static void mpi3mr_fault_uevent_emit(struct mpi3mr_ioc *mrioc) +{ + struct kobj_uevent_env *env; + int ret; + + env = kzalloc(sizeof(*env), GFP_KERNEL); + if (!env) + return; + + ret = add_uevent_var(env, "DRIVER=%s", mrioc->driver_name); + if (ret) + goto out_free; + + ret = add_uevent_var(env, "IOC_ID=%u", mrioc->id); + if (ret) + goto out_free; + + ret = add_uevent_var(env, "FAULT_CODE=0x%08x", + mrioc->saved_fault_code); + if (ret) + goto out_free; + + ret = add_uevent_var(env, "FAULT_INFO0=0x%08x", + mrioc->saved_fault_info[0]); + if (ret) + goto out_free; + + ret = add_uevent_var(env, "FAULT_INFO1=0x%08x", + mrioc->saved_fault_info[1]); + if (ret) + goto out_free; + + ret = add_uevent_var(env, "FAULT_INFO2=0x%08x", + mrioc->saved_fault_info[2]); + if (ret) + goto out_free; + + kobject_uevent_env(&mrioc->shost->shost_gendev.kobj, + KOBJ_CHANGE, env->envp); + +out_free: + kfree(env); + +} + +/** * mpi3mr_clear_reset_history - clear reset history * @mrioc: Adapter instance reference * @@ -1480,6 +1559,10 @@ retry_bring_ioc_ready: if (ioc_state == MRIOC_STATE_FAULT) { timeout = MPI3_SYSIF_DIAG_SAVE_TIMEOUT * 10; mpi3mr_print_fault_info(mrioc); + mpi3mr_save_fault_info(mrioc); + mrioc->fault_during_init = 1; + mrioc->fwfault_counter++; + do { host_diagnostic = readl(&mrioc->sysif_regs->host_diagnostic); @@ -2577,6 +2660,9 @@ void mpi3mr_check_rh_fault_ioc(struct mpi3mr_ioc *mrioc, u32 reason_code) mpi3mr_set_trigger_data_in_all_hdb(mrioc, MPI3MR_HDB_TRIGGER_TYPE_FAULT, &trigger_data, 0); mpi3mr_print_fault_info(mrioc); + mpi3mr_save_fault_info(mrioc); + mrioc->fault_during_init = 1; + mrioc->fwfault_counter++; return; } @@ -2594,6 +2680,10 @@ void mpi3mr_check_rh_fault_ioc(struct mpi3mr_ioc *mrioc, u32 reason_code) break; msleep(100); } while (--timeout); + + mpi3mr_save_fault_info(mrioc); + mrioc->fault_during_init = 1; + mrioc->fwfault_counter++; } /** @@ -2770,6 +2860,11 @@ static void mpi3mr_watchdog_work(struct work_struct *work) union mpi3mr_trigger_data trigger_data; u16 reset_reason = MPI3MR_RESET_FROM_FAULT_WATCH; + if (mrioc->fault_during_init) { + mpi3mr_fault_uevent_emit(mrioc); + mrioc->fault_during_init = 0; + } + if (mrioc->reset_in_progress || mrioc->pci_err_recovery) return; @@ -2842,6 +2937,10 @@ static void mpi3mr_watchdog_work(struct work_struct *work) goto schedule_work; } + mpi3mr_save_fault_info(mrioc); + mpi3mr_fault_uevent_emit(mrioc); + mrioc->fwfault_counter++; + switch (trigger_data.fault) { case MPI3_SYSIF_FAULT_CODE_COMPLETE_RESET_NEEDED: case MPI3_SYSIF_FAULT_CODE_POWER_CYCLE_REQUIRED: @@ -5478,6 +5577,10 @@ int mpi3mr_soft_reset_handler(struct mpi3mr_ioc *mrioc, break; msleep(100); } while (--timeout); + + mpi3mr_save_fault_info(mrioc); + mpi3mr_fault_uevent_emit(mrioc); + mrioc->fwfault_counter++; mpi3mr_set_trigger_data_in_all_hdb(mrioc, MPI3MR_HDB_TRIGGER_TYPE_FAULT, &trigger_data, 0); } |
