summaryrefslogtreecommitdiff
path: root/drivers/misc/genwqe/card_base.c
diff options
context:
space:
mode:
authorKleber Sacilotto de Souza <klebers@linux.vnet.ibm.com>2014-06-04 17:57:51 +0400
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>2014-07-10 01:14:27 +0400
commitfb145456fa4f4311f90703aeee058bab3b274bf8 (patch)
tree980ae85a3c35b8b574dddeaf96bf84d41dbb83d3 /drivers/misc/genwqe/card_base.c
parentc1f732ad767e37bd1d41043cbdefc0874b4d05e5 (diff)
downloadlinux-fb145456fa4f4311f90703aeee058bab3b274bf8.tar.xz
GenWQE: Add support for EEH error recovery
This patch implements the callbacks and functions necessary to have EEH recovery support. It adds a config option to enable or disable explicit calls to trigger platform specific mechanisms on error recovery paths. This option is enabled by default only on PPC64 systems and can be overritten via debugfs. If this option is enabled, on the error recovery path the driver will call pci_channel_offline() to check for error condition and issue non-raw MMIO reads to trigger early EEH detection in case of hardware failures. This is necessary since the driver MMIO helper funtions use raw accessors. Signed-off-by: Kleber Sacilotto de Souza <klebers@linux.vnet.ibm.com> Acked-by: Frank Haverkamp <haver@linux.vnet.ibm.com> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Diffstat (limited to 'drivers/misc/genwqe/card_base.c')
-rw-r--r--drivers/misc/genwqe/card_base.c79
1 files changed, 66 insertions, 13 deletions
diff --git a/drivers/misc/genwqe/card_base.c b/drivers/misc/genwqe/card_base.c
index e6cc3e1e7326..87ebaba9b133 100644
--- a/drivers/misc/genwqe/card_base.c
+++ b/drivers/misc/genwqe/card_base.c
@@ -140,6 +140,12 @@ static struct genwqe_dev *genwqe_dev_alloc(void)
cd->class_genwqe = class_genwqe;
cd->debugfs_genwqe = debugfs_genwqe;
+ /*
+ * This comes from kernel config option and can be overritten via
+ * debugfs.
+ */
+ cd->use_platform_recovery = CONFIG_GENWQE_PLATFORM_ERROR_RECOVERY;
+
init_waitqueue_head(&cd->queue_waitq);
spin_lock_init(&cd->file_lock);
@@ -943,6 +949,19 @@ static int genwqe_health_thread(void *data)
return 0;
fatal_error:
+ if (cd->use_platform_recovery) {
+ /*
+ * Since we use raw accessors, EEH errors won't be detected
+ * by the platform until we do a non-raw MMIO or config space
+ * read
+ */
+ readq(cd->mmio + IO_SLC_CFGREG_GFIR);
+
+ /* We do nothing if the card is going over PCI recovery */
+ if (pci_channel_offline(pci_dev))
+ return -EIO;
+ }
+
dev_err(&pci_dev->dev,
"[%s] card unusable. Please trigger unbind!\n", __func__);
@@ -1048,6 +1067,9 @@ static int genwqe_pci_setup(struct genwqe_dev *cd)
pci_set_master(pci_dev);
pci_enable_pcie_error_reporting(pci_dev);
+ /* EEH recovery requires PCIe fundamental reset */
+ pci_dev->needs_freset = 1;
+
/* request complete BAR-0 space (length = 0) */
cd->mmio_len = pci_resource_len(pci_dev, 0);
cd->mmio = pci_iomap(pci_dev, 0, 0);
@@ -1186,23 +1208,40 @@ static pci_ers_result_t genwqe_err_error_detected(struct pci_dev *pci_dev,
dev_err(&pci_dev->dev, "[%s] state=%d\n", __func__, state);
- if (pci_dev == NULL)
- return PCI_ERS_RESULT_NEED_RESET;
-
cd = dev_get_drvdata(&pci_dev->dev);
if (cd == NULL)
- return PCI_ERS_RESULT_NEED_RESET;
+ return PCI_ERS_RESULT_DISCONNECT;
- switch (state) {
- case pci_channel_io_normal:
- return PCI_ERS_RESULT_CAN_RECOVER;
- case pci_channel_io_frozen:
- return PCI_ERS_RESULT_NEED_RESET;
- case pci_channel_io_perm_failure:
+ /* Stop the card */
+ genwqe_health_check_stop(cd);
+ genwqe_stop(cd);
+
+ /*
+ * On permanent failure, the PCI code will call device remove
+ * after the return of this function.
+ * genwqe_stop() can be called twice.
+ */
+ if (state == pci_channel_io_perm_failure) {
return PCI_ERS_RESULT_DISCONNECT;
+ } else {
+ genwqe_pci_remove(cd);
+ return PCI_ERS_RESULT_NEED_RESET;
}
+}
+
+static pci_ers_result_t genwqe_err_slot_reset(struct pci_dev *pci_dev)
+{
+ int rc;
+ struct genwqe_dev *cd = dev_get_drvdata(&pci_dev->dev);
- return PCI_ERS_RESULT_NEED_RESET;
+ rc = genwqe_pci_setup(cd);
+ if (!rc) {
+ return PCI_ERS_RESULT_RECOVERED;
+ } else {
+ dev_err(&pci_dev->dev,
+ "err: problems with PCI setup (err=%d)\n", rc);
+ return PCI_ERS_RESULT_DISCONNECT;
+ }
}
static pci_ers_result_t genwqe_err_result_none(struct pci_dev *dev)
@@ -1210,8 +1249,22 @@ static pci_ers_result_t genwqe_err_result_none(struct pci_dev *dev)
return PCI_ERS_RESULT_NONE;
}
-static void genwqe_err_resume(struct pci_dev *dev)
+static void genwqe_err_resume(struct pci_dev *pci_dev)
{
+ int rc;
+ struct genwqe_dev *cd = dev_get_drvdata(&pci_dev->dev);
+
+ rc = genwqe_start(cd);
+ if (!rc) {
+ rc = genwqe_health_check_start(cd);
+ if (rc)
+ dev_err(&pci_dev->dev,
+ "err: cannot start health checking! (err=%d)\n",
+ rc);
+ } else {
+ dev_err(&pci_dev->dev,
+ "err: cannot start card services! (err=%d)\n", rc);
+ }
}
static int genwqe_sriov_configure(struct pci_dev *dev, int numvfs)
@@ -1234,7 +1287,7 @@ static struct pci_error_handlers genwqe_err_handler = {
.error_detected = genwqe_err_error_detected,
.mmio_enabled = genwqe_err_result_none,
.link_reset = genwqe_err_result_none,
- .slot_reset = genwqe_err_result_none,
+ .slot_reset = genwqe_err_slot_reset,
.resume = genwqe_err_resume,
};