diff options
author | Arun Easi <aeasi@marvell.com> | 2020-03-31 13:40:13 +0300 |
---|---|---|
committer | Martin K. Petersen <martin.petersen@oracle.com> | 2020-04-24 19:15:47 +0300 |
commit | cbb01c2f2f630f1497f703c51ff21538ae2d86b8 (patch) | |
tree | 532a18535dc03b59d781bc247a364051b62cd903 /drivers/scsi/qla2xxx/qla_tmpl.c | |
parent | e304142c308979fff404a38118878679ca58a2ba (diff) | |
download | linux-cbb01c2f2f630f1497f703c51ff21538ae2d86b8.tar.xz |
scsi: qla2xxx: Fix MPI failure AEN (8200) handling
Today, upon an MPI failure AEN, on top of collecting an MPI dump, a regular
firmware dump is also taken and then chip reset. This is disruptive to IOs
and not required. Make the firmware dump collection, followed by chip
reset, optional (not done by default).
Firmware dump buffer and MPI dump buffer are independent of each
other with this change and each can have dump that was taken at two
different times for two different issues. The MPI dump is saved in a
separate buffer and is retrieved differently from firmware dump.
To collect full dump on MPI failure AEN, a module parameter is
introduced:
ql2xfulldump_on_mpifail (default: 0)
Link: https://lore.kernel.org/r/20200331104015.24868-2-njavali@marvell.com
Reported-by: kbuild test robot <lkp@intel.com>
Reviewed-by: Himanshu Madhani <himanshu.madhani@oracle.com>
Signed-off-by: Arun Easi <aeasi@marvell.com>
Signed-off-by: Nilesh Javali <njavali@marvell.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Diffstat (limited to 'drivers/scsi/qla2xxx/qla_tmpl.c')
-rw-r--r-- | drivers/scsi/qla2xxx/qla_tmpl.c | 121 |
1 files changed, 99 insertions, 22 deletions
diff --git a/drivers/scsi/qla2xxx/qla_tmpl.c b/drivers/scsi/qla2xxx/qla_tmpl.c index 6aeb1c3fb7a8..342363862434 100644 --- a/drivers/scsi/qla2xxx/qla_tmpl.c +++ b/drivers/scsi/qla2xxx/qla_tmpl.c @@ -12,6 +12,33 @@ #define IOBASE(vha) IOBAR(ISPREG(vha)) #define INVALID_ENTRY ((struct qla27xx_fwdt_entry *)0xffffffffffffffffUL) +/* hardware_lock assumed held. */ +static void +qla27xx_write_remote_reg(struct scsi_qla_host *vha, + u32 addr, u32 data) +{ + char *reg = (char *)ISPREG(vha); + + ql_dbg(ql_dbg_misc, vha, 0xd300, + "%s: addr/data = %xh/%xh\n", __func__, addr, data); + + WRT_REG_DWORD(reg + IOBASE(vha), 0x40); + WRT_REG_DWORD(reg + 0xc4, data); + WRT_REG_DWORD(reg + 0xc0, addr); +} + +void +qla27xx_reset_mpi(scsi_qla_host_t *vha) +{ + ql_dbg(ql_dbg_misc + ql_dbg_verbose, vha, 0xd301, + "Entered %s.\n", __func__); + + qla27xx_write_remote_reg(vha, 0x104050, 0x40004); + qla27xx_write_remote_reg(vha, 0x10405c, 0x4); + + vha->hw->stat.num_mpi_reset++; +} + static inline void qla27xx_insert16(uint16_t value, void *buf, ulong *len) { @@ -998,6 +1025,62 @@ qla27xx_fwdt_template_valid(void *p) } void +qla27xx_mpi_fwdump(scsi_qla_host_t *vha, int hardware_locked) +{ + ulong flags = 0; + bool need_mpi_reset = 1; + +#ifndef __CHECKER__ + if (!hardware_locked) + spin_lock_irqsave(&vha->hw->hardware_lock, flags); +#endif + if (!vha->hw->mpi_fw_dump) { + ql_log(ql_log_warn, vha, 0x02f3, "-> mpi_fwdump no buffer\n"); + } else if (vha->hw->mpi_fw_dumped) { + ql_log(ql_log_warn, vha, 0x02f4, + "-> MPI firmware already dumped (%p) -- ignoring request\n", + vha->hw->mpi_fw_dump); + } else { + struct fwdt *fwdt = &vha->hw->fwdt[1]; + ulong len; + void *buf = vha->hw->mpi_fw_dump; + + ql_log(ql_log_warn, vha, 0x02f5, "-> fwdt1 running...\n"); + if (!fwdt->template) { + ql_log(ql_log_warn, vha, 0x02f6, + "-> fwdt1 no template\n"); + goto bailout; + } + len = qla27xx_execute_fwdt_template(vha, fwdt->template, buf); + if (len == 0) { + goto bailout; + } else if (len != fwdt->dump_size) { + ql_log(ql_log_warn, vha, 0x02f7, + "-> fwdt1 fwdump residual=%+ld\n", + fwdt->dump_size - len); + } else { + need_mpi_reset = 0; + } + + vha->hw->mpi_fw_dump_len = len; + vha->hw->mpi_fw_dumped = 1; + + ql_log(ql_log_warn, vha, 0x02f8, + "-> MPI firmware dump saved to buffer (%lu/%p)\n", + vha->host_no, vha->hw->mpi_fw_dump); + qla2x00_post_uevent_work(vha, QLA_UEVENT_CODE_FW_DUMP); + } + +bailout: + if (need_mpi_reset) + qla27xx_reset_mpi(vha); +#ifndef __CHECKER__ + if (!hardware_locked) + spin_unlock_irqrestore(&vha->hw->hardware_lock, flags); +#endif +} + +void qla27xx_fwdump(scsi_qla_host_t *vha, int hardware_locked) { ulong flags = 0; @@ -1015,30 +1098,25 @@ qla27xx_fwdump(scsi_qla_host_t *vha, int hardware_locked) vha->hw->fw_dump); } else { struct fwdt *fwdt = vha->hw->fwdt; - uint j; ulong len; void *buf = vha->hw->fw_dump; - uint count = vha->hw->fw_dump_mpi ? 2 : 1; - - for (j = 0; j < count; j++, fwdt++, buf += len) { - ql_log(ql_log_warn, vha, 0xd011, - "-> fwdt%u running...\n", j); - if (!fwdt->template) { - ql_log(ql_log_warn, vha, 0xd012, - "-> fwdt%u no template\n", j); - break; - } - len = qla27xx_execute_fwdt_template(vha, - fwdt->template, buf); - if (len == 0) { - goto bailout; - } else if (len != fwdt->dump_size) { - ql_log(ql_log_warn, vha, 0xd013, - "-> fwdt%u fwdump residual=%+ld\n", - j, fwdt->dump_size - len); - } + + ql_log(ql_log_warn, vha, 0xd011, "-> fwdt0 running...\n"); + if (!fwdt->template) { + ql_log(ql_log_warn, vha, 0xd012, + "-> fwdt0 no template\n"); + goto bailout; } - vha->hw->fw_dump_len = buf - (void *)vha->hw->fw_dump; + len = qla27xx_execute_fwdt_template(vha, fwdt->template, buf); + if (len == 0) { + goto bailout; + } else if (len != fwdt->dump_size) { + ql_log(ql_log_warn, vha, 0xd013, + "-> fwdt0 fwdump residual=%+ld\n", + fwdt->dump_size - len); + } + + vha->hw->fw_dump_len = len; vha->hw->fw_dumped = 1; ql_log(ql_log_warn, vha, 0xd015, @@ -1048,7 +1126,6 @@ qla27xx_fwdump(scsi_qla_host_t *vha, int hardware_locked) } bailout: - vha->hw->fw_dump_mpi = 0; #ifndef __CHECKER__ if (!hardware_locked) spin_unlock_irqrestore(&vha->hw->hardware_lock, flags); |