summaryrefslogtreecommitdiff
path: root/arch/powerpc/kernel/eeh_driver.c
diff options
context:
space:
mode:
authorGavin Shan <gwshan@linux.vnet.ibm.com>2014-09-30 06:39:07 +0400
committerMichael Ellerman <mpe@ellerman.id.au>2014-09-30 11:15:18 +0400
commit5cfb20b96f624e9852c4f3f1c4397e81ca28d5aa (patch)
treeacf784a0da023e6ef83e60514e3ee3fa1fe30a53 /arch/powerpc/kernel/eeh_driver.c
parent93e8b36d7bf5c54f1c52d8b78e34f88e52a3dfa2 (diff)
downloadlinux-5cfb20b96f624e9852c4f3f1c4397e81ca28d5aa.tar.xz
powerpc/eeh: Emulate EEH recovery for VFIO devices
When enabling EEH functionality on passed through devices (PE) with VFIO, the devices in the PE would be removed permanently from guest side. In that case, the PE remains frozen state. When returning PE to host, or restarting the guest again, we had mechanism unfreezing the PE by clearing PESTA/B frozen bits. However, that's not enough for some adapters, which are indicated as following "lspci" shows. Those adapters require hot reset on the parent bus to bring their firmware back to workable state. Otherwise, those adaptrs won't be operative and the host (for returning case) or the guest will fail to load the drivers for those adapters without exception. 0000:01:00.0 Ethernet controller: Emulex Corporation OneConnect \ 10Gb NIC (be3) (rev 02) 0000:01:00.0 0200: 19a2:0710 (rev 02) 0001:03:00.0 Ethernet controller: Emulex Corporation OneConnect \ NIC (Lancer) (rev 10) 0001:03:00.0 0200: 10df:e220 (rev 10) The patch adds mechanism to emulate EEH recovery (for hot reset on parent PCI bus) on 3 gates to fix the issue: open/release one adapter of the PE, enable EEH functionality on one adapter of the PE. Reported-by: Murilo Fossa Vicentini <muvic@br.ibm.com> Signed-off-by: Gavin Shan <gwshan@linux.vnet.ibm.com> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Diffstat (limited to 'arch/powerpc/kernel/eeh_driver.c')
-rw-r--r--arch/powerpc/kernel/eeh_driver.c90
1 files changed, 86 insertions, 4 deletions
diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c
index 948e6f99089f..3fd514f8e4b2 100644
--- a/arch/powerpc/kernel/eeh_driver.c
+++ b/arch/powerpc/kernel/eeh_driver.c
@@ -180,6 +180,22 @@ static bool eeh_dev_removed(struct eeh_dev *edev)
return false;
}
+static void *eeh_dev_save_state(void *data, void *userdata)
+{
+ struct eeh_dev *edev = data;
+ struct pci_dev *pdev;
+
+ if (!edev)
+ return NULL;
+
+ pdev = eeh_dev_to_pci_dev(edev);
+ if (!pdev)
+ return NULL;
+
+ pci_save_state(pdev);
+ return NULL;
+}
+
/**
* eeh_report_error - Report pci error to each device driver
* @data: eeh device
@@ -303,6 +319,22 @@ static void *eeh_report_reset(void *data, void *userdata)
return NULL;
}
+static void *eeh_dev_restore_state(void *data, void *userdata)
+{
+ struct eeh_dev *edev = data;
+ struct pci_dev *pdev;
+
+ if (!edev)
+ return NULL;
+
+ pdev = eeh_dev_to_pci_dev(edev);
+ if (!pdev)
+ return NULL;
+
+ pci_restore_state(pdev);
+ return NULL;
+}
+
/**
* eeh_report_resume - Tell device to resume normal operations
* @data: eeh device
@@ -450,10 +482,11 @@ static void *eeh_pe_detach_dev(void *data, void *userdata)
static void *__eeh_clear_pe_frozen_state(void *data, void *flag)
{
struct eeh_pe *pe = (struct eeh_pe *)data;
+ bool *clear_sw_state = flag;
int i, rc = 1;
for (i = 0; rc && i < 3; i++)
- rc = eeh_unfreeze_pe(pe, false);
+ rc = eeh_unfreeze_pe(pe, clear_sw_state);
/* Stop immediately on any errors */
if (rc) {
@@ -465,17 +498,66 @@ static void *__eeh_clear_pe_frozen_state(void *data, void *flag)
return NULL;
}
-static int eeh_clear_pe_frozen_state(struct eeh_pe *pe)
+static int eeh_clear_pe_frozen_state(struct eeh_pe *pe,
+ bool clear_sw_state)
{
void *rc;
- rc = eeh_pe_traverse(pe, __eeh_clear_pe_frozen_state, NULL);
+ rc = eeh_pe_traverse(pe, __eeh_clear_pe_frozen_state, &clear_sw_state);
if (!rc)
eeh_pe_state_clear(pe, EEH_PE_ISOLATED);
return rc ? -EIO : 0;
}
+int eeh_pe_reset_and_recover(struct eeh_pe *pe)
+{
+ int result, ret;
+
+ /* Bail if the PE is being recovered */
+ if (pe->state & EEH_PE_RECOVERING)
+ return 0;
+
+ /* Put the PE into recovery mode */
+ eeh_pe_state_mark(pe, EEH_PE_RECOVERING);
+
+ /* Save states */
+ eeh_pe_dev_traverse(pe, eeh_dev_save_state, NULL);
+
+ /* Report error */
+ eeh_pe_dev_traverse(pe, eeh_report_error, &result);
+
+ /* Issue reset */
+ eeh_pe_state_mark(pe, EEH_PE_RESET);
+ ret = eeh_reset_pe(pe);
+ if (ret) {
+ eeh_pe_state_clear(pe, EEH_PE_RECOVERING | EEH_PE_RESET);
+ return ret;
+ }
+ eeh_pe_state_clear(pe, EEH_PE_RESET);
+
+ /* Unfreeze the PE */
+ ret = eeh_clear_pe_frozen_state(pe, true);
+ if (ret) {
+ eeh_pe_state_clear(pe, EEH_PE_RECOVERING);
+ return ret;
+ }
+
+ /* Notify completion of reset */
+ eeh_pe_dev_traverse(pe, eeh_report_reset, &result);
+
+ /* Restore device state */
+ eeh_pe_dev_traverse(pe, eeh_dev_restore_state, NULL);
+
+ /* Resume */
+ eeh_pe_dev_traverse(pe, eeh_report_resume, NULL);
+
+ /* Clear recovery mode */
+ eeh_pe_state_clear(pe, EEH_PE_RECOVERING);
+
+ return 0;
+}
+
/**
* eeh_reset_device - Perform actual reset of a pci slot
* @pe: EEH PE
@@ -534,7 +616,7 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus)
eeh_pe_state_clear(pe, EEH_PE_RESET);
/* Clear frozen state */
- rc = eeh_clear_pe_frozen_state(pe);
+ rc = eeh_clear_pe_frozen_state(pe, false);
if (rc)
return rc;