summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/vfio.txt87
-rw-r--r--drivers/vfio/Makefile1
-rw-r--r--drivers/vfio/pci/vfio_pci.c18
-rw-r--r--drivers/vfio/vfio_iommu_spapr_tce.c17
-rw-r--r--drivers/vfio/vfio_spapr_eeh.c87
-rw-r--r--include/linux/vfio.h23
-rw-r--r--include/uapi/linux/vfio.h34
7 files changed, 259 insertions, 8 deletions
diff --git a/Documentation/vfio.txt b/Documentation/vfio.txt
index b9ca02370d46..96978eced341 100644
--- a/Documentation/vfio.txt
+++ b/Documentation/vfio.txt
@@ -305,7 +305,15 @@ faster, the map/unmap handling has been implemented in real mode which provides
an excellent performance which has limitations such as inability to do
locked pages accounting in real time.
-So 3 additional ioctls have been added:
+4) According to sPAPR specification, A Partitionable Endpoint (PE) is an I/O
+subtree that can be treated as a unit for the purposes of partitioning and
+error recovery. A PE may be a single or multi-function IOA (IO Adapter), a
+function of a multi-function IOA, or multiple IOAs (possibly including switch
+and bridge structures above the multiple IOAs). PPC64 guests detect PCI errors
+and recover from them via EEH RTAS services, which works on the basis of
+additional ioctl commands.
+
+So 4 additional ioctls have been added:
VFIO_IOMMU_SPAPR_TCE_GET_INFO - returns the size and the start
of the DMA window on the PCI bus.
@@ -316,9 +324,12 @@ So 3 additional ioctls have been added:
VFIO_IOMMU_DISABLE - disables the container.
+ VFIO_EEH_PE_OP - provides an API for EEH setup, error detection and recovery.
The code flow from the example above should be slightly changed:
+ struct vfio_eeh_pe_op pe_op = { .argsz = sizeof(pe_op), .flags = 0 };
+
.....
/* Add the group to the container */
ioctl(group, VFIO_GROUP_SET_CONTAINER, &container);
@@ -342,9 +353,79 @@ The code flow from the example above should be slightly changed:
dma_map.flags = VFIO_DMA_MAP_FLAG_READ | VFIO_DMA_MAP_FLAG_WRITE;
/* Check here is .iova/.size are within DMA window from spapr_iommu_info */
-
ioctl(container, VFIO_IOMMU_MAP_DMA, &dma_map);
- .....
+
+ /* Get a file descriptor for the device */
+ device = ioctl(group, VFIO_GROUP_GET_DEVICE_FD, "0000:06:0d.0");
+
+ ....
+
+ /* Gratuitous device reset and go... */
+ ioctl(device, VFIO_DEVICE_RESET);
+
+ /* Make sure EEH is supported */
+ ioctl(container, VFIO_CHECK_EXTENSION, VFIO_EEH);
+
+ /* Enable the EEH functionality on the device */
+ pe_op.op = VFIO_EEH_PE_ENABLE;
+ ioctl(container, VFIO_EEH_PE_OP, &pe_op);
+
+ /* You're suggested to create additional data struct to represent
+ * PE, and put child devices belonging to same IOMMU group to the
+ * PE instance for later reference.
+ */
+
+ /* Check the PE's state and make sure it's in functional state */
+ pe_op.op = VFIO_EEH_PE_GET_STATE;
+ ioctl(container, VFIO_EEH_PE_OP, &pe_op);
+
+ /* Save device state using pci_save_state().
+ * EEH should be enabled on the specified device.
+ */
+
+ ....
+
+ /* When 0xFF's returned from reading PCI config space or IO BARs
+ * of the PCI device. Check the PE's state to see if that has been
+ * frozen.
+ */
+ ioctl(container, VFIO_EEH_PE_OP, &pe_op);
+
+ /* Waiting for pending PCI transactions to be completed and don't
+ * produce any more PCI traffic from/to the affected PE until
+ * recovery is finished.
+ */
+
+ /* Enable IO for the affected PE and collect logs. Usually, the
+ * standard part of PCI config space, AER registers are dumped
+ * as logs for further analysis.
+ */
+ pe_op.op = VFIO_EEH_PE_UNFREEZE_IO;
+ ioctl(container, VFIO_EEH_PE_OP, &pe_op);
+
+ /*
+ * Issue PE reset: hot or fundamental reset. Usually, hot reset
+ * is enough. However, the firmware of some PCI adapters would
+ * require fundamental reset.
+ */
+ pe_op.op = VFIO_EEH_PE_RESET_HOT;
+ ioctl(container, VFIO_EEH_PE_OP, &pe_op);
+ pe_op.op = VFIO_EEH_PE_RESET_DEACTIVATE;
+ ioctl(container, VFIO_EEH_PE_OP, &pe_op);
+
+ /* Configure the PCI bridges for the affected PE */
+ pe_op.op = VFIO_EEH_PE_CONFIGURE;
+ ioctl(container, VFIO_EEH_PE_OP, &pe_op);
+
+ /* Restored state we saved at initialization time. pci_restore_state()
+ * is good enough as an example.
+ */
+
+ /* Hopefully, error is recovered successfully. Now, you can resume to
+ * start PCI traffic to/from the affected PE.
+ */
+
+ ....
-------------------------------------------------------------------------------
diff --git a/drivers/vfio/Makefile b/drivers/vfio/Makefile
index 72bfabc8629e..50e30bc75e85 100644
--- a/drivers/vfio/Makefile
+++ b/drivers/vfio/Makefile
@@ -1,4 +1,5 @@
obj-$(CONFIG_VFIO) += vfio.o
obj-$(CONFIG_VFIO_IOMMU_TYPE1) += vfio_iommu_type1.o
obj-$(CONFIG_VFIO_IOMMU_SPAPR_TCE) += vfio_iommu_spapr_tce.o
+obj-$(CONFIG_EEH) += vfio_spapr_eeh.o
obj-$(CONFIG_VFIO_PCI) += pci/
diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c
index 010e0f8b8e4f..e2ee80f36e3e 100644
--- a/drivers/vfio/pci/vfio_pci.c
+++ b/drivers/vfio/pci/vfio_pci.c
@@ -157,8 +157,10 @@ static void vfio_pci_release(void *device_data)
{
struct vfio_pci_device *vdev = device_data;
- if (atomic_dec_and_test(&vdev->refcnt))
+ if (atomic_dec_and_test(&vdev->refcnt)) {
+ vfio_spapr_pci_eeh_release(vdev->pdev);
vfio_pci_disable(vdev);
+ }
module_put(THIS_MODULE);
}
@@ -166,19 +168,27 @@ static void vfio_pci_release(void *device_data)
static int vfio_pci_open(void *device_data)
{
struct vfio_pci_device *vdev = device_data;
+ int ret;
if (!try_module_get(THIS_MODULE))
return -ENODEV;
if (atomic_inc_return(&vdev->refcnt) == 1) {
- int ret = vfio_pci_enable(vdev);
+ ret = vfio_pci_enable(vdev);
+ if (ret)
+ goto error;
+
+ ret = vfio_spapr_pci_eeh_open(vdev->pdev);
if (ret) {
- module_put(THIS_MODULE);
- return ret;
+ vfio_pci_disable(vdev);
+ goto error;
}
}
return 0;
+error:
+ module_put(THIS_MODULE);
+ return ret;
}
static int vfio_pci_get_irq_count(struct vfio_pci_device *vdev, int irq_type)
diff --git a/drivers/vfio/vfio_iommu_spapr_tce.c b/drivers/vfio/vfio_iommu_spapr_tce.c
index a84788ba662c..730b4ef3e0cc 100644
--- a/drivers/vfio/vfio_iommu_spapr_tce.c
+++ b/drivers/vfio/vfio_iommu_spapr_tce.c
@@ -156,7 +156,16 @@ static long tce_iommu_ioctl(void *iommu_data,
switch (cmd) {
case VFIO_CHECK_EXTENSION:
- return (arg == VFIO_SPAPR_TCE_IOMMU) ? 1 : 0;
+ switch (arg) {
+ case VFIO_SPAPR_TCE_IOMMU:
+ ret = 1;
+ break;
+ default:
+ ret = vfio_spapr_iommu_eeh_ioctl(NULL, cmd, arg);
+ break;
+ }
+
+ return (ret < 0) ? 0 : ret;
case VFIO_IOMMU_SPAPR_TCE_GET_INFO: {
struct vfio_iommu_spapr_tce_info info;
@@ -283,6 +292,12 @@ static long tce_iommu_ioctl(void *iommu_data,
tce_iommu_disable(container);
mutex_unlock(&container->lock);
return 0;
+ case VFIO_EEH_PE_OP:
+ if (!container->tbl || !container->tbl->it_group)
+ return -ENODEV;
+
+ return vfio_spapr_iommu_eeh_ioctl(container->tbl->it_group,
+ cmd, arg);
}
return -ENOTTY;
diff --git a/drivers/vfio/vfio_spapr_eeh.c b/drivers/vfio/vfio_spapr_eeh.c
new file mode 100644
index 000000000000..f834b4ce1431
--- /dev/null
+++ b/drivers/vfio/vfio_spapr_eeh.c
@@ -0,0 +1,87 @@
+/*
+ * EEH functionality support for VFIO devices. The feature is only
+ * available on sPAPR compatible platforms.
+ *
+ * Copyright Gavin Shan, IBM Corporation 2014.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/uaccess.h>
+#include <linux/vfio.h>
+#include <asm/eeh.h>
+
+/* We might build address mapping here for "fast" path later */
+int vfio_spapr_pci_eeh_open(struct pci_dev *pdev)
+{
+ return eeh_dev_open(pdev);
+}
+
+void vfio_spapr_pci_eeh_release(struct pci_dev *pdev)
+{
+ eeh_dev_release(pdev);
+}
+
+long vfio_spapr_iommu_eeh_ioctl(struct iommu_group *group,
+ unsigned int cmd, unsigned long arg)
+{
+ struct eeh_pe *pe;
+ struct vfio_eeh_pe_op op;
+ unsigned long minsz;
+ long ret = -EINVAL;
+
+ switch (cmd) {
+ case VFIO_CHECK_EXTENSION:
+ if (arg == VFIO_EEH)
+ ret = eeh_enabled() ? 1 : 0;
+ else
+ ret = 0;
+ break;
+ case VFIO_EEH_PE_OP:
+ pe = eeh_iommu_group_to_pe(group);
+ if (!pe)
+ return -ENODEV;
+
+ minsz = offsetofend(struct vfio_eeh_pe_op, op);
+ if (copy_from_user(&op, (void __user *)arg, minsz))
+ return -EFAULT;
+ if (op.argsz < minsz || op.flags)
+ return -EINVAL;
+
+ switch (op.op) {
+ case VFIO_EEH_PE_DISABLE:
+ ret = eeh_pe_set_option(pe, EEH_OPT_DISABLE);
+ break;
+ case VFIO_EEH_PE_ENABLE:
+ ret = eeh_pe_set_option(pe, EEH_OPT_ENABLE);
+ break;
+ case VFIO_EEH_PE_UNFREEZE_IO:
+ ret = eeh_pe_set_option(pe, EEH_OPT_THAW_MMIO);
+ break;
+ case VFIO_EEH_PE_UNFREEZE_DMA:
+ ret = eeh_pe_set_option(pe, EEH_OPT_THAW_DMA);
+ break;
+ case VFIO_EEH_PE_GET_STATE:
+ ret = eeh_pe_get_state(pe);
+ break;
+ case VFIO_EEH_PE_RESET_DEACTIVATE:
+ ret = eeh_pe_reset(pe, EEH_RESET_DEACTIVATE);
+ break;
+ case VFIO_EEH_PE_RESET_HOT:
+ ret = eeh_pe_reset(pe, EEH_RESET_HOT);
+ break;
+ case VFIO_EEH_PE_RESET_FUNDAMENTAL:
+ ret = eeh_pe_reset(pe, EEH_RESET_FUNDAMENTAL);
+ break;
+ case VFIO_EEH_PE_CONFIGURE:
+ ret = eeh_pe_configure(pe);
+ break;
+ default:
+ ret = -EINVAL;
+ }
+ }
+
+ return ret;
+}
diff --git a/include/linux/vfio.h b/include/linux/vfio.h
index 8ec980b5e3af..25a0fbd4b998 100644
--- a/include/linux/vfio.h
+++ b/include/linux/vfio.h
@@ -98,4 +98,27 @@ extern int vfio_external_user_iommu_id(struct vfio_group *group);
extern long vfio_external_check_extension(struct vfio_group *group,
unsigned long arg);
+#ifdef CONFIG_EEH
+extern int vfio_spapr_pci_eeh_open(struct pci_dev *pdev);
+extern void vfio_spapr_pci_eeh_release(struct pci_dev *pdev);
+extern long vfio_spapr_iommu_eeh_ioctl(struct iommu_group *group,
+ unsigned int cmd,
+ unsigned long arg);
+#else
+static inline int vfio_spapr_pci_eeh_open(struct pci_dev *pdev)
+{
+ return 0;
+}
+
+static inline void vfio_spapr_pci_eeh_release(struct pci_dev *pdev)
+{
+}
+
+static inline long vfio_spapr_iommu_eeh_ioctl(struct iommu_group *group,
+ unsigned int cmd,
+ unsigned long arg)
+{
+ return -ENOTTY;
+}
+#endif /* CONFIG_EEH */
#endif /* VFIO_H */
diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h
index cb9023d4f063..6612974c64bf 100644
--- a/include/uapi/linux/vfio.h
+++ b/include/uapi/linux/vfio.h
@@ -30,6 +30,9 @@
*/
#define VFIO_DMA_CC_IOMMU 4
+/* Check if EEH is supported */
+#define VFIO_EEH 5
+
/*
* The IOCTL interface is designed for extensibility by embedding the
* structure length (argsz) and flags into structures passed between
@@ -455,6 +458,37 @@ struct vfio_iommu_spapr_tce_info {
#define VFIO_IOMMU_SPAPR_TCE_GET_INFO _IO(VFIO_TYPE, VFIO_BASE + 12)
+/*
+ * EEH PE operation struct provides ways to:
+ * - enable/disable EEH functionality;
+ * - unfreeze IO/DMA for frozen PE;
+ * - read PE state;
+ * - reset PE;
+ * - configure PE.
+ */
+struct vfio_eeh_pe_op {
+ __u32 argsz;
+ __u32 flags;
+ __u32 op;
+};
+
+#define VFIO_EEH_PE_DISABLE 0 /* Disable EEH functionality */
+#define VFIO_EEH_PE_ENABLE 1 /* Enable EEH functionality */
+#define VFIO_EEH_PE_UNFREEZE_IO 2 /* Enable IO for frozen PE */
+#define VFIO_EEH_PE_UNFREEZE_DMA 3 /* Enable DMA for frozen PE */
+#define VFIO_EEH_PE_GET_STATE 4 /* PE state retrieval */
+#define VFIO_EEH_PE_STATE_NORMAL 0 /* PE in functional state */
+#define VFIO_EEH_PE_STATE_RESET 1 /* PE reset in progress */
+#define VFIO_EEH_PE_STATE_STOPPED 2 /* Stopped DMA and IO */
+#define VFIO_EEH_PE_STATE_STOPPED_DMA 4 /* Stopped DMA only */
+#define VFIO_EEH_PE_STATE_UNAVAIL 5 /* State unavailable */
+#define VFIO_EEH_PE_RESET_DEACTIVATE 5 /* Deassert PE reset */
+#define VFIO_EEH_PE_RESET_HOT 6 /* Assert hot reset */
+#define VFIO_EEH_PE_RESET_FUNDAMENTAL 7 /* Assert fundamental reset */
+#define VFIO_EEH_PE_CONFIGURE 8 /* PE configuration */
+
+#define VFIO_EEH_PE_OP _IO(VFIO_TYPE, VFIO_BASE + 21)
+
/* ***************************************************************** */
#endif /* _UAPIVFIO_H */