From 845a0e40afb77bebdbda353b44ebf48784aa51f4 Mon Sep 17 00:00:00 2001 From: "nagalakshmi.nandigama@lsi.com" Date: Thu, 1 Dec 2011 07:42:04 +0530 Subject: [SCSI] mpt2sas: Better handling DEAD IOC (PCI-E LInk down) error condition Detection of Dead IOC has been done in fault_reset_work thread. If IOC Doorbell is 0xFFFFFFFF, it will be detected as non-operation/DEAD IOC. When a DEAD IOC is detected, the code is modified to remove that IOC and all its attached devices from OS. The PCI layer API pci_remove_bus_device() is called to remove the dead IOC. Signed-off-by: Nagalakshmi Nandigama Signed-off-by: James Bottomley --- drivers/scsi/mpt2sas/mpt2sas_base.c | 59 +++++++++++++++++++++++++++++++++++++ 1 file changed, 59 insertions(+) (limited to 'drivers/scsi/mpt2sas/mpt2sas_base.c') diff --git a/drivers/scsi/mpt2sas/mpt2sas_base.c b/drivers/scsi/mpt2sas/mpt2sas_base.c index beda04a8404b..c041cc70a25f 100644 --- a/drivers/scsi/mpt2sas/mpt2sas_base.c +++ b/drivers/scsi/mpt2sas/mpt2sas_base.c @@ -57,6 +57,7 @@ #include #include #include +#include #include #include "mpt2sas_base.h" @@ -120,9 +121,33 @@ _scsih_set_fwfault_debug(const char *val, struct kernel_param *kp) ioc->fwfault_debug = mpt2sas_fwfault_debug; return 0; } + module_param_call(mpt2sas_fwfault_debug, _scsih_set_fwfault_debug, param_get_int, &mpt2sas_fwfault_debug, 0644); +/** + * mpt2sas_remove_dead_ioc_func - kthread context to remove dead ioc + * @arg: input argument, used to derive ioc + * + * Return 0 if controller is removed from pci subsystem. + * Return -1 for other case. + */ +static int mpt2sas_remove_dead_ioc_func(void *arg) +{ + struct MPT2SAS_ADAPTER *ioc = (struct MPT2SAS_ADAPTER *)arg; + struct pci_dev *pdev; + + if ((ioc == NULL)) + return -1; + + pdev = ioc->pdev; + if ((pdev == NULL)) + return -1; + pci_remove_bus_device(pdev); + return 0; +} + + /** * _base_fault_reset_work - workq handling ioc fault conditions * @work: input argument, used to derive ioc @@ -138,6 +163,7 @@ _base_fault_reset_work(struct work_struct *work) unsigned long flags; u32 doorbell; int rc; + struct task_struct *p; spin_lock_irqsave(&ioc->ioc_reset_in_progress_lock, flags); if (ioc->shost_recovery) @@ -145,6 +171,39 @@ _base_fault_reset_work(struct work_struct *work) spin_unlock_irqrestore(&ioc->ioc_reset_in_progress_lock, flags); doorbell = mpt2sas_base_get_iocstate(ioc, 0); + if ((doorbell & MPI2_IOC_STATE_MASK) == MPI2_IOC_STATE_MASK) { + printk(MPT2SAS_INFO_FMT "%s : SAS host is non-operational !!!!\n", + ioc->name, __func__); + + /* + * Call _scsih_flush_pending_cmds callback so that we flush all + * pending commands back to OS. This call is required to aovid + * deadlock at block layer. Dead IOC will fail to do diag reset, + * and this call is safe since dead ioc will never return any + * command back from HW. + */ + ioc->schedule_dead_ioc_flush_running_cmds(ioc); + /* + * Set remove_host flag early since kernel thread will + * take some time to execute. + */ + ioc->remove_host = 1; + /*Remove the Dead Host */ + p = kthread_run(mpt2sas_remove_dead_ioc_func, ioc, + "mpt2sas_dead_ioc_%d", ioc->id); + if (IS_ERR(p)) { + printk(MPT2SAS_ERR_FMT + "%s: Running mpt2sas_dead_ioc thread failed !!!!\n", + ioc->name, __func__); + } else { + printk(MPT2SAS_ERR_FMT + "%s: Running mpt2sas_dead_ioc thread success !!!!\n", + ioc->name, __func__); + } + + return; /* don't rearm timer */ + } + if ((doorbell & MPI2_IOC_STATE_MASK) == MPI2_IOC_STATE_FAULT) { rc = mpt2sas_base_hard_reset_handler(ioc, CAN_SLEEP, FORCE_BIG_HAMMER); -- cgit v1.2.3