Merge branch 'next' of git://git.kernel.org/pub/scm/linux/kernel/git/benh/powerpc

Pull powerpc updates from Ben Herrenschmidt: "This is the powerpc changes for the 3.11 merge window. In addition to the usual bug fixes and small updates, the main highlights are: - Support for transparent huge pages by Aneesh Kumar for 64-bit server processors. This allows the use of 16M pages as transparent huge pages on kernels compiled with a 64K base page size. - Base VFIO support for KVM on power by Alexey Kardashevskiy - Wiring up of our nvram to the pstore infrastructure, including putting compressed oopses in there by Aruna Balakrishnaiah - Move, rework and improve our "EEH" (basically PCI error handling and recovery) infrastructure. It is no longer specific to pseries but is now usable by the new "powernv" platform as well (no hypervisor) by Gavin Shan. - I fixed some bugs in our math-emu instruction decoding and made it usable to emulate some optional FP instructions on processors with hard FP that lack them (such as fsqrt on Freescale embedded processors). - Support for Power8 "Event Based Branch" facility by Michael Ellerman. This facility allows what is basically "userspace interrupts" for performance monitor events. - A bunch of Transactional Memory vs. Signals bug fixes and HW breakpoint/watchpoint fixes by Michael Neuling. And more ... I appologize in advance if I've failed to highlight something that somebody deemed worth it." * 'next' of git://git.kernel.org/pub/scm/linux/kernel/git/benh/powerpc: (156 commits) pstore: Add hsize argument in write_buf call of pstore_ftrace_call powerpc/fsl: add MPIC timer wakeup support powerpc/mpic: create mpic subsystem object powerpc/mpic: add global timer support powerpc/mpic: add irq_set_wake support powerpc/85xx: enable coreint for all the 64bit boards powerpc/8xx: Erroneous double irq_eoi() on CPM IRQ in MPC8xx powerpc/fsl: Enable CONFIG_E1000E in mpc85xx_smp_defconfig powerpc/mpic: Add get_version API both for internal and external use powerpc: Handle both new style and old style reserve maps powerpc/hw_brk: Fix off by one error when validating DAWR region end powerpc/pseries: Support compression of oops text via pstore powerpc/pseries: Re-organise the oops compression code pstore: Pass header size in the pstore write callback powerpc/powernv: Fix iommu initialization again powerpc/pseries: Inform the hypervisor we are using EBB regs powerpc/perf: Add power8 EBB support powerpc/perf: Core EBB support for 64-bit book3s powerpc/perf: Drop MMCRA from thread_struct powerpc/perf: Don't enable if we have zero events ...
author: Linus Torvalds <torvalds@linux-foundation.org> 2013-07-04 21:29:23 +0400
committer: Linus Torvalds <torvalds@linux-foundation.org> 2013-07-04 21:29:23 +0400
commit: 65b97fb7303050fc826e518cf67fc283da23314f (patch)
tree: 595e7f04d65d95a39d65bd2dcf2385b3b6ea7969 /arch/powerpc/platforms/powernv
parent: ddcf6600b133697adbafd96e080818bdc0dfd028 (diff)
parent: 1d8b368ab4aacfc3f864655baad4d31a3028ec1a (diff)
download: linux-65b97fb7303050fc826e518cf67fc283da23314f.tar.xz
11 files changed, 1578 insertions, 45 deletions
diff --git a/arch/powerpc/platforms/powernv/Makefile b/arch/powerpc/platforms/powernv/Makefile
index bcc3cb48a44e..7fe595152478 100644
--- a/arch/powerpc/platforms/powernv/Makefile
+++ b/arch/powerpc/platforms/powernv/Makefile
@@ -3,3 +3,4 @@ obj-y			+= opal-rtc.o opal-nvram.o
 
 obj-$(CONFIG_SMP)	+= smp.o
 obj-$(CONFIG_PCI)	+= pci.o pci-p5ioc2.o pci-ioda.o
+obj-$(CONFIG_EEH)	+= eeh-ioda.o eeh-powernv.o
diff --git a/arch/powerpc/platforms/powernv/eeh-ioda.c b/arch/powerpc/platforms/powernv/eeh-ioda.c
new file mode 100644
index 000000000000..0cd1c4a71755
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/eeh-ioda.c
@@ -0,0 +1,916 @@
+/*
+ * The file intends to implement the functions needed by EEH, which is
+ * built on IODA compliant chip. Actually, lots of functions related
+ * to EEH would be built based on the OPAL APIs.
+ *
+ * Copyright Benjamin Herrenschmidt & Gavin Shan, IBM Corporation 2013.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <linux/bootmem.h>
+#include <linux/debugfs.h>
+#include <linux/delay.h>
+#include <linux/init.h>
+#include <linux/io.h>
+#include <linux/irq.h>
+#include <linux/kernel.h>
+#include <linux/msi.h>
+#include <linux/notifier.h>
+#include <linux/pci.h>
+#include <linux/string.h>
+
+#include <asm/eeh.h>
+#include <asm/eeh_event.h>
+#include <asm/io.h>
+#include <asm/iommu.h>
+#include <asm/msi_bitmap.h>
+#include <asm/opal.h>
+#include <asm/pci-bridge.h>
+#include <asm/ppc-pci.h>
+#include <asm/tce.h>
+
+#include "powernv.h"
+#include "pci.h"
+
+/* Debugging option */
+#ifdef IODA_EEH_DBG_ON
+#define IODA_EEH_DBG(args...)	pr_info(args)
+#else
+#define IODA_EEH_DBG(args...)
+#endif
+
+static char *hub_diag = NULL;
+static int ioda_eeh_nb_init = 0;
+
+static int ioda_eeh_event(struct notifier_block *nb,
+			  unsigned long events, void *change)
+{
+	uint64_t changed_evts = (uint64_t)change;
+
+	/* We simply send special EEH event */
+	if ((changed_evts & OPAL_EVENT_PCI_ERROR) &&
+	    (events & OPAL_EVENT_PCI_ERROR))
+		eeh_send_failure_event(NULL);
+
+	return 0;
+}
+
+static struct notifier_block ioda_eeh_nb = {
+	.notifier_call	= ioda_eeh_event,
+	.next		= NULL,
+	.priority	= 0
+};
+
+#ifdef CONFIG_DEBUG_FS
+static int ioda_eeh_dbgfs_set(void *data, u64 val)
+{
+	struct pci_controller *hose = data;
+	struct pnv_phb *phb = hose->private_data;
+
+	out_be64(phb->regs + 0xD10, val);
+	return 0;
+}
+
+static int ioda_eeh_dbgfs_get(void *data, u64 *val)
+{
+	struct pci_controller *hose = data;
+	struct pnv_phb *phb = hose->private_data;
+
+	*val = in_be64(phb->regs + 0xD10);
+	return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(ioda_eeh_dbgfs_ops, ioda_eeh_dbgfs_get,
+			ioda_eeh_dbgfs_set, "0x%llx\n");
+#endif /* CONFIG_DEBUG_FS */
+
+/**
+ * ioda_eeh_post_init - Chip dependent post initialization
+ * @hose: PCI controller
+ *
+ * The function will be called after eeh PEs and devices
+ * have been built. That means the EEH is ready to supply
+ * service with I/O cache.
+ */
+static int ioda_eeh_post_init(struct pci_controller *hose)
+{
+	struct pnv_phb *phb = hose->private_data;
+	int ret;
+
+	/* Register OPAL event notifier */
+	if (!ioda_eeh_nb_init) {
+		ret = opal_notifier_register(&ioda_eeh_nb);
+		if (ret) {
+			pr_err("%s: Can't register OPAL event notifier (%d)\n",
+			       __func__, ret);
+			return ret;
+		}
+
+		ioda_eeh_nb_init = 1;
+	}
+
+	/* FIXME: Enable it for PHB3 later */
+	if (phb->type == PNV_PHB_IODA1) {
+		if (!hub_diag) {
+			hub_diag = (char *)__get_free_page(GFP_KERNEL |
+							   __GFP_ZERO);
+			if (!hub_diag) {
+				pr_err("%s: Out of memory !\n",
+				       __func__);
+				return -ENOMEM;
+			}
+		}
+
+#ifdef CONFIG_DEBUG_FS
+		if (phb->dbgfs)
+			debugfs_create_file("err_injct", 0600,
+					    phb->dbgfs, hose,
+					    &ioda_eeh_dbgfs_ops);
+#endif
+
+		phb->eeh_state |= PNV_EEH_STATE_ENABLED;
+	}
+
+	return 0;
+}
+
+/**
+ * ioda_eeh_set_option - Set EEH operation or I/O setting
+ * @pe: EEH PE
+ * @option: options
+ *
+ * Enable or disable EEH option for the indicated PE. The
+ * function also can be used to enable I/O or DMA for the
+ * PE.
+ */
+static int ioda_eeh_set_option(struct eeh_pe *pe, int option)
+{
+	s64 ret;
+	u32 pe_no;
+	struct pci_controller *hose = pe->phb;
+	struct pnv_phb *phb = hose->private_data;
+
+	/* Check on PE number */
+	if (pe->addr < 0 || pe->addr >= phb->ioda.total_pe) {
+		pr_err("%s: PE address %x out of range [0, %x] "
+		       "on PHB#%x\n",
+			__func__, pe->addr, phb->ioda.total_pe,
+			hose->global_number);
+		return -EINVAL;
+	}
+
+	pe_no = pe->addr;
+	switch (option) {
+	case EEH_OPT_DISABLE:
+		ret = -EEXIST;
+		break;
+	case EEH_OPT_ENABLE:
+		ret = 0;
+		break;
+	case EEH_OPT_THAW_MMIO:
+		ret = opal_pci_eeh_freeze_clear(phb->opal_id, pe_no,
+				OPAL_EEH_ACTION_CLEAR_FREEZE_MMIO);
+		if (ret) {
+			pr_warning("%s: Failed to enable MMIO for "
+				   "PHB#%x-PE#%x, err=%lld\n",
+				__func__, hose->global_number, pe_no, ret);
+			return -EIO;
+		}
+
+		break;
+	case EEH_OPT_THAW_DMA:
+		ret = opal_pci_eeh_freeze_clear(phb->opal_id, pe_no,
+				OPAL_EEH_ACTION_CLEAR_FREEZE_DMA);
+		if (ret) {
+			pr_warning("%s: Failed to enable DMA for "
+				   "PHB#%x-PE#%x, err=%lld\n",
+				__func__, hose->global_number, pe_no, ret);
+			return -EIO;
+		}
+
+		break;
+	default:
+		pr_warning("%s: Invalid option %d\n", __func__, option);
+		return -EINVAL;
+	}
+
+	return ret;
+}
+
+/**
+ * ioda_eeh_get_state - Retrieve the state of PE
+ * @pe: EEH PE
+ *
+ * The PE's state should be retrieved from the PEEV, PEST
+ * IODA tables. Since the OPAL has exported the function
+ * to do it, it'd better to use that.
+ */
+static int ioda_eeh_get_state(struct eeh_pe *pe)
+{
+	s64 ret = 0;
+	u8 fstate;
+	u16 pcierr;
+	u32 pe_no;
+	int result;
+	struct pci_controller *hose = pe->phb;
+	struct pnv_phb *phb = hose->private_data;
+
+	/*
+	 * Sanity check on PE address. The PHB PE address should
+	 * be zero.
+	 */
+	if (pe->addr < 0 || pe->addr >= phb->ioda.total_pe) {
+		pr_err("%s: PE address %x out of range [0, %x] "
+		       "on PHB#%x\n",
+		       __func__, pe->addr, phb->ioda.total_pe,
+		       hose->global_number);
+		return EEH_STATE_NOT_SUPPORT;
+	}
+
+	/* Retrieve PE status through OPAL */
+	pe_no = pe->addr;
+	ret = opal_pci_eeh_freeze_status(phb->opal_id, pe_no,
+			&fstate, &pcierr, NULL);
+	if (ret) {
+		pr_err("%s: Failed to get EEH status on "
+		       "PHB#%x-PE#%x\n, err=%lld\n",
+		       __func__, hose->global_number, pe_no, ret);
+		return EEH_STATE_NOT_SUPPORT;
+	}
+
+	/* Check PHB status */
+	if (pe->type & EEH_PE_PHB) {
+		result = 0;
+		result &= ~EEH_STATE_RESET_ACTIVE;
+
+		if (pcierr != OPAL_EEH_PHB_ERROR) {
+			result |= EEH_STATE_MMIO_ACTIVE;
+			result |= EEH_STATE_DMA_ACTIVE;
+			result |= EEH_STATE_MMIO_ENABLED;
+			result |= EEH_STATE_DMA_ENABLED;
+		}
+
+		return result;
+	}
+
+	/* Parse result out */
+	result = 0;
+	switch (fstate) {
+	case OPAL_EEH_STOPPED_NOT_FROZEN:
+		result &= ~EEH_STATE_RESET_ACTIVE;
+		result |= EEH_STATE_MMIO_ACTIVE;
+		result |= EEH_STATE_DMA_ACTIVE;
+		result |= EEH_STATE_MMIO_ENABLED;
+		result |= EEH_STATE_DMA_ENABLED;
+		break;
+	case OPAL_EEH_STOPPED_MMIO_FREEZE:
+		result &= ~EEH_STATE_RESET_ACTIVE;
+		result |= EEH_STATE_DMA_ACTIVE;
+		result |= EEH_STATE_DMA_ENABLED;
+		break;
+	case OPAL_EEH_STOPPED_DMA_FREEZE:
+		result &= ~EEH_STATE_RESET_ACTIVE;
+		result |= EEH_STATE_MMIO_ACTIVE;
+		result |= EEH_STATE_MMIO_ENABLED;
+		break;
+	case OPAL_EEH_STOPPED_MMIO_DMA_FREEZE:
+		result &= ~EEH_STATE_RESET_ACTIVE;
+		break;
+	case OPAL_EEH_STOPPED_RESET:
+		result |= EEH_STATE_RESET_ACTIVE;
+		break;
+	case OPAL_EEH_STOPPED_TEMP_UNAVAIL:
+		result |= EEH_STATE_UNAVAILABLE;
+		break;
+	case OPAL_EEH_STOPPED_PERM_UNAVAIL:
+		result |= EEH_STATE_NOT_SUPPORT;
+		break;
+	default:
+		pr_warning("%s: Unexpected EEH status 0x%x "
+			   "on PHB#%x-PE#%x\n",
+			   __func__, fstate, hose->global_number, pe_no);
+	}
+
+	return result;
+}
+
+static int ioda_eeh_pe_clear(struct eeh_pe *pe)
+{
+	struct pci_controller *hose;
+	struct pnv_phb *phb;
+	u32 pe_no;
+	u8 fstate;
+	u16 pcierr;
+	s64 ret;
+
+	pe_no = pe->addr;
+	hose = pe->phb;
+	phb = pe->phb->private_data;
+
+	/* Clear the EEH error on the PE */
+	ret = opal_pci_eeh_freeze_clear(phb->opal_id,
+			pe_no, OPAL_EEH_ACTION_CLEAR_FREEZE_ALL);
+	if (ret) {
+		pr_err("%s: Failed to clear EEH error for "
+		       "PHB#%x-PE#%x, err=%lld\n",
+		       __func__, hose->global_number, pe_no, ret);
+		return -EIO;
+	}
+
+	/*
+	 * Read the PE state back and verify that the frozen
+	 * state has been removed.
+	 */
+	ret = opal_pci_eeh_freeze_status(phb->opal_id, pe_no,
+			&fstate, &pcierr, NULL);
+	if (ret) {
+		pr_err("%s: Failed to get EEH status on "
+		       "PHB#%x-PE#%x\n, err=%lld\n",
+		       __func__, hose->global_number, pe_no, ret);
+		return -EIO;
+	}
+
+	if (fstate != OPAL_EEH_STOPPED_NOT_FROZEN) {
+		pr_err("%s: Frozen state not cleared on "
+		       "PHB#%x-PE#%x, sts=%x\n",
+		       __func__, hose->global_number, pe_no, fstate);
+		return -EIO;
+	}
+
+	return 0;
+}
+
+static s64 ioda_eeh_phb_poll(struct pnv_phb *phb)
+{
+	s64 rc = OPAL_HARDWARE;
+
+	while (1) {
+		rc = opal_pci_poll(phb->opal_id);
+		if (rc <= 0)
+			break;
+
+		msleep(rc);
+	}
+
+	return rc;
+}
+
+static int ioda_eeh_phb_reset(struct pci_controller *hose, int option)
+{
+	struct pnv_phb *phb = hose->private_data;
+	s64 rc = OPAL_HARDWARE;
+
+	pr_debug("%s: Reset PHB#%x, option=%d\n",
+		 __func__, hose->global_number, option);
+
+	/* Issue PHB complete reset request */
+	if (option == EEH_RESET_FUNDAMENTAL ||
+	    option == EEH_RESET_HOT)
+		rc = opal_pci_reset(phb->opal_id,
+				OPAL_PHB_COMPLETE,
+				OPAL_ASSERT_RESET);
+	else if (option == EEH_RESET_DEACTIVATE)
+		rc = opal_pci_reset(phb->opal_id,
+				OPAL_PHB_COMPLETE,
+				OPAL_DEASSERT_RESET);
+	if (rc < 0)
+		goto out;
+
+	/*
+	 * Poll state of the PHB until the request is done
+	 * successfully.
+	 */
+	rc = ioda_eeh_phb_poll(phb);
+out:
+	if (rc != OPAL_SUCCESS)
+		return -EIO;
+
+	return 0;
+}
+
+static int ioda_eeh_root_reset(struct pci_controller *hose, int option)
+{
+	struct pnv_phb *phb = hose->private_data;
+	s64 rc = OPAL_SUCCESS;
+
+	pr_debug("%s: Reset PHB#%x, option=%d\n",
+		 __func__, hose->global_number, option);
+
+	/*
+	 * During the reset deassert time, we needn't care
+	 * the reset scope because the firmware does nothing
+	 * for fundamental or hot reset during deassert phase.
+	 */
+	if (option == EEH_RESET_FUNDAMENTAL)
+		rc = opal_pci_reset(phb->opal_id,
+				OPAL_PCI_FUNDAMENTAL_RESET,
+				OPAL_ASSERT_RESET);
+	else if (option == EEH_RESET_HOT)
+		rc = opal_pci_reset(phb->opal_id,
+				OPAL_PCI_HOT_RESET,
+				OPAL_ASSERT_RESET);
+	else if (option == EEH_RESET_DEACTIVATE)
+		rc = opal_pci_reset(phb->opal_id,
+				OPAL_PCI_HOT_RESET,
+				OPAL_DEASSERT_RESET);
+	if (rc < 0)
+		goto out;
+
+	/* Poll state of the PHB until the request is done */
+	rc = ioda_eeh_phb_poll(phb);
+out:
+	if (rc != OPAL_SUCCESS)
+		return -EIO;
+
+	return 0;
+}
+
+static int ioda_eeh_bridge_reset(struct pci_controller *hose,
+		struct pci_dev *dev, int option)
+{
+	u16 ctrl;
+
+	pr_debug("%s: Reset device %04x:%02x:%02x.%01x with option %d\n",
+		 __func__, hose->global_number, dev->bus->number,
+		 PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn), option);
+
+	switch (option) {
+	case EEH_RESET_FUNDAMENTAL:
+	case EEH_RESET_HOT:
+		pci_read_config_word(dev, PCI_BRIDGE_CONTROL, &ctrl);
+		ctrl |= PCI_BRIDGE_CTL_BUS_RESET;
+		pci_write_config_word(dev, PCI_BRIDGE_CONTROL, ctrl);
+		break;
+	case EEH_RESET_DEACTIVATE:
+		pci_read_config_word(dev, PCI_BRIDGE_CONTROL, &ctrl);
+		ctrl &= ~PCI_BRIDGE_CTL_BUS_RESET;
+		pci_write_config_word(dev, PCI_BRIDGE_CONTROL, ctrl);
+		break;
+	}
+
+	return 0;
+}
+
+/**
+ * ioda_eeh_reset - Reset the indicated PE
+ * @pe: EEH PE
+ * @option: reset option
+ *
+ * Do reset on the indicated PE. For PCI bus sensitive PE,
+ * we need to reset the parent p2p bridge. The PHB has to
+ * be reinitialized if the p2p bridge is root bridge. For
+ * PCI device sensitive PE, we will try to reset the device
+ * through FLR. For now, we don't have OPAL APIs to do HARD
+ * reset yet, so all reset would be SOFT (HOT) reset.
+ */
+static int ioda_eeh_reset(struct eeh_pe *pe, int option)
+{
+	struct pci_controller *hose = pe->phb;
+	struct eeh_dev *edev;
+	struct pci_dev *dev;
+	int ret;
+
+	/*
+	 * Anyway, we have to clear the problematic state for the
+	 * corresponding PE. However, we needn't do it if the PE
+	 * is PHB associated. That means the PHB is having fatal
+	 * errors and it needs reset. Further more, the AIB interface
+	 * isn't reliable any more.
+	 */
+	if (!(pe->type & EEH_PE_PHB) &&
+	    (option == EEH_RESET_HOT ||
+	    option == EEH_RESET_FUNDAMENTAL)) {
+		ret = ioda_eeh_pe_clear(pe);
+		if (ret)
+			return -EIO;
+	}
+
+	/*
+	 * The rules applied to reset, either fundamental or hot reset:
+	 *
+	 * We always reset the direct upstream bridge of the PE. If the
+	 * direct upstream bridge isn't root bridge, we always take hot
+	 * reset no matter what option (fundamental or hot) is. Otherwise,
+	 * we should do the reset according to the required option.
+	 */
+	if (pe->type & EEH_PE_PHB) {
+		ret = ioda_eeh_phb_reset(hose, option);
+	} else {
+		if (pe->type & EEH_PE_DEVICE) {
+			/*
+			 * If it's device PE, we didn't refer to the parent
+			 * PCI bus yet. So we have to figure it out indirectly.
+			 */
+			edev = list_first_entry(&pe->edevs,
+					struct eeh_dev, list);
+			dev = eeh_dev_to_pci_dev(edev);
+			dev = dev->bus->self;
+		} else {
+			/*
+			 * If it's bus PE, the parent PCI bus is already there
+			 * and just pick it up.
+			 */
+			dev = pe->bus->self;
+		}
+
+		/*
+		 * Do reset based on the fact that the direct upstream bridge
+		 * is root bridge (port) or not.
+		 */
+		if (dev->bus->number == 0)
+			ret = ioda_eeh_root_reset(hose, option);
+		else
+			ret = ioda_eeh_bridge_reset(hose, dev, option);
+	}
+
+	return ret;
+}
+
+/**
+ * ioda_eeh_get_log - Retrieve error log
+ * @pe: EEH PE
+ * @severity: Severity level of the log
+ * @drv_log: buffer to store the log
+ * @len: space of the log buffer
+ *
+ * The function is used to retrieve error log from P7IOC.
+ */
+static int ioda_eeh_get_log(struct eeh_pe *pe, int severity,
+			    char *drv_log, unsigned long len)
+{
+	s64 ret;
+	unsigned long flags;
+	struct pci_controller *hose = pe->phb;
+	struct pnv_phb *phb = hose->private_data;
+
+	spin_lock_irqsave(&phb->lock, flags);
+
+	ret = opal_pci_get_phb_diag_data2(phb->opal_id,
+			phb->diag.blob, PNV_PCI_DIAG_BUF_SIZE);
+	if (ret) {
+		spin_unlock_irqrestore(&phb->lock, flags);
+		pr_warning("%s: Failed to get log for PHB#%x-PE#%x\n",
+			   __func__, hose->global_number, pe->addr);
+		return -EIO;
+	}
+
+	/*
+	 * FIXME: We probably need log the error in somewhere.
+	 * Lets make it up in future.
+	 */
+	/* pr_info("%s", phb->diag.blob); */
+
+	spin_unlock_irqrestore(&phb->lock, flags);
+
+	return 0;
+}
+
+/**
+ * ioda_eeh_configure_bridge - Configure the PCI bridges for the indicated PE
+ * @pe: EEH PE
+ *
+ * For particular PE, it might have included PCI bridges. In order
+ * to make the PE work properly, those PCI bridges should be configured
+ * correctly. However, we need do nothing on P7IOC since the reset
+ * function will do everything that should be covered by the function.
+ */
+static int ioda_eeh_configure_bridge(struct eeh_pe *pe)
+{
+	return 0;
+}
+
+static void ioda_eeh_hub_diag_common(struct OpalIoP7IOCErrorData *data)
+{
+	/* GEM */
+	pr_info("  GEM XFIR:        %016llx\n", data->gemXfir);
+	pr_info("  GEM RFIR:        %016llx\n", data->gemRfir);
+	pr_info("  GEM RIRQFIR:     %016llx\n", data->gemRirqfir);
+	pr_info("  GEM Mask:        %016llx\n", data->gemMask);
+	pr_info("  GEM RWOF:        %016llx\n", data->gemRwof);
+
+	/* LEM */
+	pr_info("  LEM FIR:         %016llx\n", data->lemFir);
+	pr_info("  LEM Error Mask:  %016llx\n", data->lemErrMask);
+	pr_info("  LEM Action 0:    %016llx\n", data->lemAction0);
+	pr_info("  LEM Action 1:    %016llx\n", data->lemAction1);
+	pr_info("  LEM WOF:         %016llx\n", data->lemWof);
+}
+
+static void ioda_eeh_hub_diag(struct pci_controller *hose)
+{
+	struct pnv_phb *phb = hose->private_data;
+	struct OpalIoP7IOCErrorData *data;
+	long rc;
+
+	data = (struct OpalIoP7IOCErrorData *)ioda_eeh_hub_diag;
+	rc = opal_pci_get_hub_diag_data(phb->hub_id, data, PAGE_SIZE);
+	if (rc != OPAL_SUCCESS) {
+		pr_warning("%s: Failed to get HUB#%llx diag-data (%ld)\n",
+			   __func__, phb->hub_id, rc);
+		return;
+	}
+
+	switch (data->type) {
+	case OPAL_P7IOC_DIAG_TYPE_RGC:
+		pr_info("P7IOC diag-data for RGC\n\n");
+		ioda_eeh_hub_diag_common(data);
+		pr_info("  RGC Status:      %016llx\n", data->rgc.rgcStatus);
+		pr_info("  RGC LDCP:        %016llx\n", data->rgc.rgcLdcp);
+		break;
+	case OPAL_P7IOC_DIAG_TYPE_BI:
+		pr_info("P7IOC diag-data for BI %s\n\n",
+			data->bi.biDownbound ? "Downbound" : "Upbound");
+		ioda_eeh_hub_diag_common(data);
+		pr_info("  BI LDCP 0:       %016llx\n", data->bi.biLdcp0);
+		pr_info("  BI LDCP 1:       %016llx\n", data->bi.biLdcp1);
+		pr_info("  BI LDCP 2:       %016llx\n", data->bi.biLdcp2);
+		pr_info("  BI Fence Status: %016llx\n", data->bi.biFenceStatus);
+		break;
+	case OPAL_P7IOC_DIAG_TYPE_CI:
+		pr_info("P7IOC diag-data for CI Port %d\\nn",
+			data->ci.ciPort);
+		ioda_eeh_hub_diag_common(data);
+		pr_info("  CI Port Status:  %016llx\n", data->ci.ciPortStatus);
+		pr_info("  CI Port LDCP:    %016llx\n", data->ci.ciPortLdcp);
+		break;
+	case OPAL_P7IOC_DIAG_TYPE_MISC:
+		pr_info("P7IOC diag-data for MISC\n\n");
+		ioda_eeh_hub_diag_common(data);
+		break;
+	case OPAL_P7IOC_DIAG_TYPE_I2C:
+		pr_info("P7IOC diag-data for I2C\n\n");
+		ioda_eeh_hub_diag_common(data);
+		break;
+	default:
+		pr_warning("%s: Invalid type of HUB#%llx diag-data (%d)\n",
+			   __func__, phb->hub_id, data->type);
+	}
+}
+
+static void ioda_eeh_p7ioc_phb_diag(struct pci_controller *hose,
+				    struct OpalIoPhbErrorCommon *common)
+{
+	struct OpalIoP7IOCPhbErrorData *data;
+	int i;
+
+	data = (struct OpalIoP7IOCPhbErrorData *)common;
+
+	pr_info("P7IOC PHB#%x Diag-data (Version: %d)\n\n",
+		hose->global_number, common->version);
+
+	pr_info("  brdgCtl:              %08x\n", data->brdgCtl);
+
+	pr_info("  portStatusReg:        %08x\n", data->portStatusReg);
+	pr_info("  rootCmplxStatus:      %08x\n", data->rootCmplxStatus);
+	pr_info("  busAgentStatus:       %08x\n", data->busAgentStatus);
+
+	pr_info("  deviceStatus:         %08x\n", data->deviceStatus);
+	pr_info("  slotStatus:           %08x\n", data->slotStatus);
+	pr_info("  linkStatus:           %08x\n", data->linkStatus);
+	pr_info("  devCmdStatus:         %08x\n", data->devCmdStatus);
+	pr_info("  devSecStatus:         %08x\n", data->devSecStatus);
+
+	pr_info("  rootErrorStatus:      %08x\n", data->rootErrorStatus);
+	pr_info("  uncorrErrorStatus:    %08x\n", data->uncorrErrorStatus);
+	pr_info("  corrErrorStatus:      %08x\n", data->corrErrorStatus);
+	pr_info("  tlpHdr1:              %08x\n", data->tlpHdr1);
+	pr_info("  tlpHdr2:              %08x\n", data->tlpHdr2);
+	pr_info("  tlpHdr3:              %08x\n", data->tlpHdr3);
+	pr_info("  tlpHdr4:              %08x\n", data->tlpHdr4);
+	pr_info("  sourceId:             %08x\n", data->sourceId);
+
+	pr_info("  errorClass:           %016llx\n", data->errorClass);
+	pr_info("  correlator:           %016llx\n", data->correlator);
+	pr_info("  p7iocPlssr:           %016llx\n", data->p7iocPlssr);
+	pr_info("  p7iocCsr:             %016llx\n", data->p7iocCsr);
+	pr_info("  lemFir:               %016llx\n", data->lemFir);
+	pr_info("  lemErrorMask:         %016llx\n", data->lemErrorMask);
+	pr_info("  lemWOF:               %016llx\n", data->lemWOF);
+	pr_info("  phbErrorStatus:       %016llx\n", data->phbErrorStatus);
+	pr_info("  phbFirstErrorStatus:  %016llx\n", data->phbFirstErrorStatus);
+	pr_info("  phbErrorLog0:         %016llx\n", data->phbErrorLog0);
+	pr_info("  phbErrorLog1:         %016llx\n", data->phbErrorLog1);
+	pr_info("  mmioErrorStatus:      %016llx\n", data->mmioErrorStatus);
+	pr_info("  mmioFirstErrorStatus: %016llx\n", data->mmioFirstErrorStatus);
+	pr_info("  mmioErrorLog0:        %016llx\n", data->mmioErrorLog0);
+	pr_info("  mmioErrorLog1:        %016llx\n", data->mmioErrorLog1);
+	pr_info("  dma0ErrorStatus:      %016llx\n", data->dma0ErrorStatus);
+	pr_info("  dma0FirstErrorStatus: %016llx\n", data->dma0FirstErrorStatus);
+	pr_info("  dma0ErrorLog0:        %016llx\n", data->dma0ErrorLog0);
+	pr_info("  dma0ErrorLog1:        %016llx\n", data->dma0ErrorLog1);
+	pr_info("  dma1ErrorStatus:      %016llx\n", data->dma1ErrorStatus);
+	pr_info("  dma1FirstErrorStatus: %016llx\n", data->dma1FirstErrorStatus);
+	pr_info("  dma1ErrorLog0:        %016llx\n", data->dma1ErrorLog0);
+	pr_info("  dma1ErrorLog1:        %016llx\n", data->dma1ErrorLog1);
+
+	for (i = 0; i < OPAL_P7IOC_NUM_PEST_REGS; i++) {
+		if ((data->pestA[i] >> 63) == 0 &&
+		    (data->pestB[i] >> 63) == 0)
+			continue;
+
+		pr_info("  PE[%3d] PESTA:        %016llx\n", i, data->pestA[i]);
+		pr_info("          PESTB:        %016llx\n", data->pestB[i]);
+	}
+}
+
+static void ioda_eeh_phb_diag(struct pci_controller *hose)
+{
+	struct pnv_phb *phb = hose->private_data;
+	struct OpalIoPhbErrorCommon *common;
+	long rc;
+
+	common = (struct OpalIoPhbErrorCommon *)phb->diag.blob;
+	rc = opal_pci_get_phb_diag_data2(phb->opal_id, common, PAGE_SIZE);
+	if (rc != OPAL_SUCCESS) {
+		pr_warning("%s: Failed to get diag-data for PHB#%x (%ld)\n",
+			    __func__, hose->global_number, rc);
+		return;
+	}
+
+	switch (common->ioType) {
+	case OPAL_PHB_ERROR_DATA_TYPE_P7IOC:
+		ioda_eeh_p7ioc_phb_diag(hose, common);
+		break;
+	default:
+		pr_warning("%s: Unrecognized I/O chip %d\n",
+			   __func__, common->ioType);
+	}
+}
+
+static int ioda_eeh_get_phb_pe(struct pci_controller *hose,
+			       struct eeh_pe **pe)
+{
+	struct eeh_pe *phb_pe;
+
+	phb_pe = eeh_phb_pe_get(hose);
+	if (!phb_pe) {
+		pr_warning("%s Can't find PE for PHB#%d\n",
+			   __func__, hose->global_number);
+		return -EEXIST;
+	}
+
+	*pe = phb_pe;
+	return 0;
+}
+
+static int ioda_eeh_get_pe(struct pci_controller *hose,
+			   u16 pe_no, struct eeh_pe **pe)
+{
+	struct eeh_pe *phb_pe, *dev_pe;
+	struct eeh_dev dev;
+
+	/* Find the PHB PE */
+	if (ioda_eeh_get_phb_pe(hose, &phb_pe))
+		return -EEXIST;
+
+	/* Find the PE according to PE# */
+	memset(&dev, 0, sizeof(struct eeh_dev));
+	dev.phb = hose;
+	dev.pe_config_addr = pe_no;
+	dev_pe = eeh_pe_get(&dev);
+	if (!dev_pe) {
+		pr_warning("%s: Can't find PE for PHB#%x - PE#%x\n",
+			   __func__, hose->global_number, pe_no);
+		return -EEXIST;
+	}
+
+	*pe = dev_pe;
+	return 0;
+}
+
+/**
+ * ioda_eeh_next_error - Retrieve next error for EEH core to handle
+ * @pe: The affected PE
+ *
+ * The function is expected to be called by EEH core while it gets
+ * special EEH event (without binding PE). The function calls to
+ * OPAL APIs for next error to handle. The informational error is
+ * handled internally by platform. However, the dead IOC, dead PHB,
+ * fenced PHB and frozen PE should be handled by EEH core eventually.
+ */
+static int ioda_eeh_next_error(struct eeh_pe **pe)
+{
+	struct pci_controller *hose, *tmp;
+	struct pnv_phb *phb;
+	u64 frozen_pe_no;
+	u16 err_type, severity;
+	long rc;
+	int ret = 1;
+
+	/*
+	 * While running here, it's safe to purge the event queue.
+	 * And we should keep the cached OPAL notifier event sychronized
+	 * between the kernel and firmware.
+	 */
+	eeh_remove_event(NULL);
+	opal_notifier_update_evt(OPAL_EVENT_PCI_ERROR, 0x0ul);
+
+	list_for_each_entry_safe(hose, tmp, &hose_list, list_node) {
+		/*
+		 * If the subordinate PCI buses of the PHB has been
+		 * removed, we needn't take care of it any more.
+		 */
+		phb = hose->private_data;
+		if (phb->eeh_state & PNV_EEH_STATE_REMOVED)
+			continue;
+
+		rc = opal_pci_next_error(phb->opal_id,
+				&frozen_pe_no, &err_type, &severity);
+
+		/* If OPAL API returns error, we needn't proceed */
+		if (rc != OPAL_SUCCESS) {
+			IODA_EEH_DBG("%s: Invalid return value on "
+				     "PHB#%x (0x%lx) from opal_pci_next_error",
+				     __func__, hose->global_number, rc);
+			continue;
+		}
+
+		/* If the PHB doesn't have error, stop processing */
+		if (err_type == OPAL_EEH_NO_ERROR ||
+		    severity == OPAL_EEH_SEV_NO_ERROR) {
+			IODA_EEH_DBG("%s: No error found on PHB#%x\n",
+				     __func__, hose->global_number);
+			continue;
+		}
+
+		/*
+		 * Processing the error. We're expecting the error with
+		 * highest priority reported upon multiple errors on the
+		 * specific PHB.
+		 */
+		IODA_EEH_DBG("%s: Error (%d, %d, %d) on PHB#%x\n",
+			err_type, severity, pe_no, hose->global_number);
+		switch (err_type) {
+		case OPAL_EEH_IOC_ERROR:
+			if (severity == OPAL_EEH_SEV_IOC_DEAD) {
+				list_for_each_entry_safe(hose, tmp,
+						&hose_list, list_node) {
+					phb = hose->private_data;
+					phb->eeh_state |= PNV_EEH_STATE_REMOVED;
+				}
+
+				pr_err("EEH: dead IOC detected\n");
+				ret = 4;
+				goto out;
+			} else if (severity == OPAL_EEH_SEV_INF) {
+				pr_info("EEH: IOC informative error "
+					"detected\n");
+				ioda_eeh_hub_diag(hose);
+			}
+
+			break;
+		case OPAL_EEH_PHB_ERROR:
+			if (severity == OPAL_EEH_SEV_PHB_DEAD) {
+				if (ioda_eeh_get_phb_pe(hose, pe))
+					break;
+
+				pr_err("EEH: dead PHB#%x detected\n",
+					hose->global_number);
+				phb->eeh_state |= PNV_EEH_STATE_REMOVED;
+				ret = 3;
+				goto out;
+			} else if (severity == OPAL_EEH_SEV_PHB_FENCED) {
+				if (ioda_eeh_get_phb_pe(hose, pe))
+					break;
+
+				pr_err("EEH: fenced PHB#%x detected\n",
+					hose->global_number);
+				ret = 2;
+				goto out;
+			} else if (severity == OPAL_EEH_SEV_INF) {
+				pr_info("EEH: PHB#%x informative error "
+					"detected\n",
+					hose->global_number);
+				ioda_eeh_phb_diag(hose);
+			}
+
+			break;
+		case OPAL_EEH_PE_ERROR:
+			if (ioda_eeh_get_pe(hose, frozen_pe_no, pe))
+				break;
+
+			pr_err("EEH: Frozen PE#%x on PHB#%x detected\n",
+				(*pe)->addr, (*pe)->phb->global_number);
+			ret = 1;
+			goto out;
+		}
+	}
+
+	ret = 0;
+out:
+	return ret;
+}
+
+struct pnv_eeh_ops ioda_eeh_ops = {
+	.post_init		= ioda_eeh_post_init,
+	.set_option		= ioda_eeh_set_option,
+	.get_state		= ioda_eeh_get_state,
+	.reset			= ioda_eeh_reset,
+	.get_log		= ioda_eeh_get_log,
+	.configure_bridge	= ioda_eeh_configure_bridge,
+	.next_error		= ioda_eeh_next_error
+};
diff --git a/arch/powerpc/platforms/powernv/eeh-powernv.c b/arch/powerpc/platforms/powernv/eeh-powernv.c
new file mode 100644
index 000000000000..969cce73055a
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/eeh-powernv.c
@@ -0,0 +1,379 @@
+/*
+ * The file intends to implement the platform dependent EEH operations on
+ * powernv platform. Actually, the powernv was created in order to fully
+ * hypervisor support.
+ *
+ * Copyright Benjamin Herrenschmidt & Gavin Shan, IBM Corporation 2013.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <linux/atomic.h>
+#include <linux/delay.h>
+#include <linux/export.h>
+#include <linux/init.h>
+#include <linux/list.h>
+#include <linux/msi.h>
+#include <linux/of.h>
+#include <linux/pci.h>
+#include <linux/proc_fs.h>
+#include <linux/rbtree.h>
+#include <linux/sched.h>
+#include <linux/seq_file.h>
+#include <linux/spinlock.h>
+
+#include <asm/eeh.h>
+#include <asm/eeh_event.h>
+#include <asm/firmware.h>
+#include <asm/io.h>
+#include <asm/iommu.h>
+#include <asm/machdep.h>
+#include <asm/msi_bitmap.h>
+#include <asm/opal.h>
+#include <asm/ppc-pci.h>
+
+#include "powernv.h"
+#include "pci.h"
+
+/**
+ * powernv_eeh_init - EEH platform dependent initialization
+ *
+ * EEH platform dependent initialization on powernv
+ */
+static int powernv_eeh_init(void)
+{
+	/* We require OPALv3 */
+	if (!firmware_has_feature(FW_FEATURE_OPALv3)) {
+		pr_warning("%s: OPALv3 is required !\n", __func__);
+		return -EINVAL;
+	}
+
+	/* Set EEH probe mode */
+	eeh_probe_mode_set(EEH_PROBE_MODE_DEV);
+
+	return 0;
+}
+
+/**
+ * powernv_eeh_post_init - EEH platform dependent post initialization
+ *
+ * EEH platform dependent post initialization on powernv. When
+ * the function is called, the EEH PEs and devices should have
+ * been built. If the I/O cache staff has been built, EEH is
+ * ready to supply service.
+ */
+static int powernv_eeh_post_init(void)
+{
+	struct pci_controller *hose;
+	struct pnv_phb *phb;
+	int ret = 0;
+
+	list_for_each_entry(hose, &hose_list, list_node) {
+		phb = hose->private_data;
+
+		if (phb->eeh_ops && phb->eeh_ops->post_init) {
+			ret = phb->eeh_ops->post_init(hose);
+			if (ret)
+				break;
+		}
+	}
+
+	return ret;
+}
+
+/**
+ * powernv_eeh_dev_probe - Do probe on PCI device
+ * @dev: PCI device
+ * @flag: unused
+ *
+ * When EEH module is installed during system boot, all PCI devices
+ * are checked one by one to see if it supports EEH. The function
+ * is introduced for the purpose. By default, EEH has been enabled
+ * on all PCI devices. That's to say, we only need do necessary
+ * initialization on the corresponding eeh device and create PE
+ * accordingly.
+ *
+ * It's notable that's unsafe to retrieve the EEH device through
+ * the corresponding PCI device. During the PCI device hotplug, which
+ * was possiblly triggered by EEH core, the binding between EEH device
+ * and the PCI device isn't built yet.
+ */
+static int powernv_eeh_dev_probe(struct pci_dev *dev, void *flag)
+{
+	struct pci_controller *hose = pci_bus_to_host(dev->bus);
+	struct pnv_phb *phb = hose->private_data;
+	struct device_node *dn = pci_device_to_OF_node(dev);
+	struct eeh_dev *edev = of_node_to_eeh_dev(dn);
+
+	/*
+	 * When probing the root bridge, which doesn't have any
+	 * subordinate PCI devices. We don't have OF node for
+	 * the root bridge. So it's not reasonable to continue
+	 * the probing.
+	 */
+	if (!dn || !edev)
+		return 0;
+
+	/* Skip for PCI-ISA bridge */
+	if ((dev->class >> 8) == PCI_CLASS_BRIDGE_ISA)
+		return 0;
+
+	/* Initialize eeh device */
+	edev->class_code	= dev->class;
+	edev->mode		= 0;
+	edev->config_addr	= ((dev->bus->number << 8) | dev->devfn);
+	edev->pe_config_addr	= phb->bdfn_to_pe(phb, dev->bus, dev->devfn & 0xff);
+
+	/* Create PE */
+	eeh_add_to_parent_pe(edev);
+
+	/*
+	 * Enable EEH explicitly so that we will do EEH check
+	 * while accessing I/O stuff
+	 *
+	 * FIXME: Enable that for PHB3 later
+	 */
+	if (phb->type == PNV_PHB_IODA1)
+		eeh_subsystem_enabled = 1;
+
+	/* Save memory bars */
+	eeh_save_bars(edev);
+
+	return 0;
+}
+
+/**
+ * powernv_eeh_set_option - Initialize EEH or MMIO/DMA reenable
+ * @pe: EEH PE
+ * @option: operation to be issued
+ *
+ * The function is used to control the EEH functionality globally.
+ * Currently, following options are support according to PAPR:
+ * Enable EEH, Disable EEH, Enable MMIO and Enable DMA
+ */
+static int powernv_eeh_set_option(struct eeh_pe *pe, int option)
+{
+	struct pci_controller *hose = pe->phb;
+	struct pnv_phb *phb = hose->private_data;
+	int ret = -EEXIST;
+
+	/*
+	 * What we need do is pass it down for hardware
+	 * implementation to handle it.
+	 */
+	if (phb->eeh_ops && phb->eeh_ops->set_option)
+		ret = phb->eeh_ops->set_option(pe, option);
+
+	return ret;
+}
+
+/**
+ * powernv_eeh_get_pe_addr - Retrieve PE address
+ * @pe: EEH PE
+ *
+ * Retrieve the PE address according to the given tranditional
+ * PCI BDF (Bus/Device/Function) address.
+ */
+static int powernv_eeh_get_pe_addr(struct eeh_pe *pe)
+{
+	return pe->addr;
+}
+
+/**
+ * powernv_eeh_get_state - Retrieve PE state
+ * @pe: EEH PE
+ * @delay: delay while PE state is temporarily unavailable
+ *
+ * Retrieve the state of the specified PE. For IODA-compitable
+ * platform, it should be retrieved from IODA table. Therefore,
+ * we prefer passing down to hardware implementation to handle
+ * it.
+ */
+static int powernv_eeh_get_state(struct eeh_pe *pe, int *delay)
+{
+	struct pci_controller *hose = pe->phb;
+	struct pnv_phb *phb = hose->private_data;
+	int ret = EEH_STATE_NOT_SUPPORT;
+
+	if (phb->eeh_ops && phb->eeh_ops->get_state) {
+		ret = phb->eeh_ops->get_state(pe);
+
+		/*
+		 * If the PE state is temporarily unavailable,
+		 * to inform the EEH core delay for default
+		 * period (1 second)
+		 */
+		if (delay) {
+			*delay = 0;
+			if (ret & EEH_STATE_UNAVAILABLE)
+				*delay = 1000;
+		}
+	}
+
+	return ret;
+}
+
+/**
+ * powernv_eeh_reset - Reset the specified PE
+ * @pe: EEH PE
+ * @option: reset option
+ *
+ * Reset the specified PE
+ */
+static int powernv_eeh_reset(struct eeh_pe *pe, int option)
+{
+	struct pci_controller *hose = pe->phb;
+	struct pnv_phb *phb = hose->private_data;
+	int ret = -EEXIST;
+
+	if (phb->eeh_ops && phb->eeh_ops->reset)
+		ret = phb->eeh_ops->reset(pe, option);
+
+	return ret;
+}
+
+/**
+ * powernv_eeh_wait_state - Wait for PE state
+ * @pe: EEH PE
+ * @max_wait: maximal period in microsecond
+ *
+ * Wait for the state of associated PE. It might take some time
+ * to retrieve the PE's state.
+ */
+static int powernv_eeh_wait_state(struct eeh_pe *pe, int max_wait)
+{
+	int ret;
+	int mwait;
+
+	while (1) {
+		ret = powernv_eeh_get_state(pe, &mwait);
+
+		/*
+		 * If the PE's state is temporarily unavailable,
+		 * we have to wait for the specified time. Otherwise,
+		 * the PE's state will be returned immediately.
+		 */
+		if (ret != EEH_STATE_UNAVAILABLE)
+			return ret;
+
+		max_wait -= mwait;
+		if (max_wait <= 0) {
+			pr_warning("%s: Timeout getting PE#%x's state (%d)\n",
+				   __func__, pe->addr, max_wait);
+			return EEH_STATE_NOT_SUPPORT;
+		}
+
+		msleep(mwait);
+	}
+
+	return EEH_STATE_NOT_SUPPORT;
+}
+
+/**
+ * powernv_eeh_get_log - Retrieve error log
+ * @pe: EEH PE
+ * @severity: temporary or permanent error log
+ * @drv_log: driver log to be combined with retrieved error log
+ * @len: length of driver log
+ *
+ * Retrieve the temporary or permanent error from the PE.
+ */
+static int powernv_eeh_get_log(struct eeh_pe *pe, int severity,
+			char *drv_log, unsigned long len)
+{
+	struct pci_controller *hose = pe->phb;
+	struct pnv_phb *phb = hose->private_data;
+	int ret = -EEXIST;
+
+	if (phb->eeh_ops && phb->eeh_ops->get_log)
+		ret = phb->eeh_ops->get_log(pe, severity, drv_log, len);
+
+	return ret;
+}
+
+/**
+ * powernv_eeh_configure_bridge - Configure PCI bridges in the indicated PE
+ * @pe: EEH PE
+ *
+ * The function will be called to reconfigure the bridges included
+ * in the specified PE so that the mulfunctional PE would be recovered
+ * again.
+ */
+static int powernv_eeh_configure_bridge(struct eeh_pe *pe)
+{
+	struct pci_controller *hose = pe->phb;
+	struct pnv_phb *phb = hose->private_data;
+	int ret = 0;
+
+	if (phb->eeh_ops && phb->eeh_ops->configure_bridge)
+		ret = phb->eeh_ops->configure_bridge(pe);
+
+	return ret;
+}
+
+/**
+ * powernv_eeh_next_error - Retrieve next EEH error to handle
+ * @pe: Affected PE
+ *
+ * Using OPAL API, to retrieve next EEH error for EEH core to handle
+ */
+static int powernv_eeh_next_error(struct eeh_pe **pe)
+{
+	struct pci_controller *hose;
+	struct pnv_phb *phb = NULL;
+
+	list_for_each_entry(hose, &hose_list, list_node) {
+		phb = hose->private_data;
+		break;
+	}
+
+	if (phb && phb->eeh_ops->next_error)
+		return phb->eeh_ops->next_error(pe);
+
+	return -EEXIST;
+}
+
+static struct eeh_ops powernv_eeh_ops = {
+	.name                   = "powernv",
+	.init                   = powernv_eeh_init,
+	.post_init              = powernv_eeh_post_init,
+	.of_probe               = NULL,
+	.dev_probe              = powernv_eeh_dev_probe,
+	.set_option             = powernv_eeh_set_option,
+	.get_pe_addr            = powernv_eeh_get_pe_addr,
+	.get_state              = powernv_eeh_get_state,
+	.reset                  = powernv_eeh_reset,
+	.wait_state             = powernv_eeh_wait_state,
+	.get_log                = powernv_eeh_get_log,
+	.configure_bridge       = powernv_eeh_configure_bridge,
+	.read_config            = pnv_pci_cfg_read,
+	.write_config           = pnv_pci_cfg_write,
+	.next_error		= powernv_eeh_next_error
+};
+
+/**
+ * eeh_powernv_init - Register platform dependent EEH operations
+ *
+ * EEH initialization on powernv platform. This function should be
+ * called before any EEH related functions.
+ */
+static int __init eeh_powernv_init(void)
+{
+	int ret = -EINVAL;
+
+	if (!machine_is(powernv))
+		return ret;
+
+	ret = eeh_ops_register(&powernv_eeh_ops);
+	if (!ret)
+		pr_info("EEH: PowerNV platform initialized\n");
+	else
+		pr_info("EEH: Failed to initialize PowerNV platform (%d)\n", ret);
+
+	return ret;
+}
+
+early_initcall(eeh_powernv_init);
diff --git a/arch/powerpc/platforms/powernv/opal-wrappers.S b/arch/powerpc/platforms/powernv/opal-wrappers.S
index 6fabe92eafb6..e88863ffb135 100644
--- a/arch/powerpc/platforms/powernv/opal-wrappers.S
+++ b/arch/powerpc/platforms/powernv/opal-wrappers.S
@@ -107,4 +107,7 @@ OPAL_CALL(opal_pci_mask_pe_error,		OPAL_PCI_MASK_PE_ERROR);
 OPAL_CALL(opal_set_slot_led_status,		OPAL_SET_SLOT_LED_STATUS);
 OPAL_CALL(opal_get_epow_status,			OPAL_GET_EPOW_STATUS);
 OPAL_CALL(opal_set_system_attention_led,	OPAL_SET_SYSTEM_ATTENTION_LED);
+OPAL_CALL(opal_pci_next_error,			OPAL_PCI_NEXT_ERROR);
+OPAL_CALL(opal_pci_poll,			OPAL_PCI_POLL);
 OPAL_CALL(opal_pci_msi_eoi,			OPAL_PCI_MSI_EOI);
+OPAL_CALL(opal_pci_get_phb_diag_data2,		OPAL_PCI_GET_PHB_DIAG_DATA2);
diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c
index 628c564ceadb..106301fd2fa5 100644
--- a/arch/powerpc/platforms/powernv/opal.c
+++ b/arch/powerpc/platforms/powernv/opal.c
@@ -15,6 +15,7 @@
 #include <linux/of.h>
 #include <linux/of_platform.h>
 #include <linux/interrupt.h>
+#include <linux/notifier.h>
 #include <linux/slab.h>
 #include <asm/opal.h>
 #include <asm/firmware.h>
@@ -31,6 +32,10 @@ static DEFINE_SPINLOCK(opal_write_lock);
 extern u64 opal_mc_secondary_handler[];
 static unsigned int *opal_irqs;
 static unsigned int opal_irq_count;
+static ATOMIC_NOTIFIER_HEAD(opal_notifier_head);
+static DEFINE_SPINLOCK(opal_notifier_lock);
+static uint64_t last_notified_mask = 0x0ul;
+static atomic_t opal_notifier_hold = ATOMIC_INIT(0);
 
 int __init early_init_dt_scan_opal(unsigned long node,
 				   const char *uname, int depth, void *data)
@@ -95,6 +100,68 @@ static int __init opal_register_exception_handlers(void)
 
 early_initcall(opal_register_exception_handlers);
 
+int opal_notifier_register(struct notifier_block *nb)
+{
+	if (!nb) {
+		pr_warning("%s: Invalid argument (%p)\n",
+			   __func__, nb);
+		return -EINVAL;
+	}
+
+	atomic_notifier_chain_register(&opal_notifier_head, nb);
+	return 0;
+}
+
+static void opal_do_notifier(uint64_t events)
+{
+	unsigned long flags;
+	uint64_t changed_mask;
+
+	if (atomic_read(&opal_notifier_hold))
+		return;
+
+	spin_lock_irqsave(&opal_notifier_lock, flags);
+	changed_mask = last_notified_mask ^ events;
+	last_notified_mask = events;
+	spin_unlock_irqrestore(&opal_notifier_lock, flags);
+
+	/*
+	 * We feed with the event bits and changed bits for
+	 * enough information to the callback.
+	 */
+	atomic_notifier_call_chain(&opal_notifier_head,
+				   events, (void *)changed_mask);
+}
+
+void opal_notifier_update_evt(uint64_t evt_mask,
+			      uint64_t evt_val)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&opal_notifier_lock, flags);
+	last_notified_mask &= ~evt_mask;
+	last_notified_mask |= evt_val;
+	spin_unlock_irqrestore(&opal_notifier_lock, flags);
+}
+
+void opal_notifier_enable(void)
+{
+	int64_t rc;
+	uint64_t evt = 0;
+
+	atomic_set(&opal_notifier_hold, 0);
+
+	/* Process pending events */
+	rc = opal_poll_events(&evt);
+	if (rc == OPAL_SUCCESS && evt)
+		opal_do_notifier(evt);
+}
+
+void opal_notifier_disable(void)
+{
+	atomic_set(&opal_notifier_hold, 1);
+}
+
 int opal_get_chars(uint32_t vtermno, char *buf, int count)
 {
 	s64 len, rc;
@@ -297,7 +364,7 @@ static irqreturn_t opal_interrupt(int irq, void *data)
 
 	opal_handle_interrupt(virq_to_hw(irq), &events);
 
-	/* XXX TODO: Do something with the events */
+	opal_do_notifier(events);
 
 	return IRQ_HANDLED;
 }
diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
index 9c9d15e4cdf2..49b57b9f835d 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -13,6 +13,7 @@
 
 #include <linux/kernel.h>
 #include <linux/pci.h>
+#include <linux/debugfs.h>
 #include <linux/delay.h>
 #include <linux/string.h>
 #include <linux/init.h>
@@ -32,6 +33,7 @@
 #include <asm/iommu.h>
 #include <asm/tce.h>
 #include <asm/xics.h>
+#include <asm/debug.h>
 
 #include "powernv.h"
 #include "pci.h"
@@ -441,6 +443,17 @@ static void pnv_pci_ioda_dma_dev_setup(struct pnv_phb *phb, struct pci_dev *pdev
 	set_iommu_table_base(&pdev->dev, &pe->tce32_table);
 }
 
+static void pnv_ioda_setup_bus_dma(struct pnv_ioda_pe *pe, struct pci_bus *bus)
+{
+	struct pci_dev *dev;
+
+	list_for_each_entry(dev, &bus->devices, bus_list) {
+		set_iommu_table_base(&dev->dev, &pe->tce32_table);
+		if (dev->subordinate)
+			pnv_ioda_setup_bus_dma(pe, dev->subordinate);
+	}
+}
+
 static void pnv_pci_ioda1_tce_invalidate(struct iommu_table *tbl,
 					 u64 *startp, u64 *endp)
 {
@@ -595,6 +608,12 @@ static void pnv_pci_ioda_setup_dma_pe(struct pnv_phb *phb,
 			       TCE_PCI_SWINV_PAIR;
 	}
 	iommu_init_table(tbl, phb->hose->node);
+	iommu_register_group(tbl, pci_domain_nr(pe->pbus), pe->pe_number);
+
+	if (pe->pdev)
+		set_iommu_table_base(&pe->pdev->dev, tbl);
+	else
+		pnv_ioda_setup_bus_dma(pe, pe->pbus);
 
 	return;
  fail:
@@ -667,6 +686,11 @@ static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb,
 	}
 	iommu_init_table(tbl, phb->hose->node);
 
+	if (pe->pdev)
+		set_iommu_table_base(&pe->pdev->dev, tbl);
+	else
+		pnv_ioda_setup_bus_dma(pe, pe->pbus);
+
 	return;
 fail:
 	if (pe->tce32_seg >= 0)
@@ -968,11 +992,38 @@ static void pnv_pci_ioda_setup_DMA(void)
 	}
 }
 
+static void pnv_pci_ioda_create_dbgfs(void)
+{
+#ifdef CONFIG_DEBUG_FS
+	struct pci_controller *hose, *tmp;
+	struct pnv_phb *phb;
+	char name[16];
+
+	list_for_each_entry_safe(hose, tmp, &hose_list, list_node) {
+		phb = hose->private_data;
+
+		sprintf(name, "PCI%04x", hose->global_number);
+		phb->dbgfs = debugfs_create_dir(name, powerpc_debugfs_root);
+		if (!phb->dbgfs)
+			pr_warning("%s: Error on creating debugfs on PHB#%x\n",
+				__func__, hose->global_number);
+	}
+#endif /* CONFIG_DEBUG_FS */
+}
+
 static void pnv_pci_ioda_fixup(void)
 {
 	pnv_pci_ioda_setup_PEs();
 	pnv_pci_ioda_setup_seg();
 	pnv_pci_ioda_setup_DMA();
+
+	pnv_pci_ioda_create_dbgfs();
+
+#ifdef CONFIG_EEH
+	eeh_probe_mode_set(EEH_PROBE_MODE_DEV);
+	eeh_addr_cache_build();
+	eeh_init();
+#endif
 }
 
 /*
@@ -1049,7 +1100,8 @@ static void pnv_pci_ioda_shutdown(struct pnv_phb *phb)
 		       OPAL_ASSERT_RESET);
 }
 
-void __init pnv_pci_init_ioda_phb(struct device_node *np, int ioda_type)
+void __init pnv_pci_init_ioda_phb(struct device_node *np,
+				  u64 hub_id, int ioda_type)
 {
 	struct pci_controller *hose;
 	static int primary = 1;
@@ -1087,6 +1139,7 @@ void __init pnv_pci_init_ioda_phb(struct device_node *np, int ioda_type)
 	hose->first_busno = 0;
 	hose->last_busno = 0xff;
 	hose->private_data = phb;
+	phb->hub_id = hub_id;
 	phb->opal_id = phb_id;
 	phb->type = ioda_type;
 
@@ -1172,6 +1225,9 @@ void __init pnv_pci_init_ioda_phb(struct device_node *np, int ioda_type)
 		phb->ioda.io_size, phb->ioda.io_segsize);
 
 	phb->hose->ops = &pnv_pci_ops;
+#ifdef CONFIG_EEH
+	phb->eeh_ops = &ioda_eeh_ops;
+#endif
 
 	/* Setup RID -> PE mapping function */
 	phb->bdfn_to_pe = pnv_ioda_bdfn_to_pe;
@@ -1212,7 +1268,7 @@ void __init pnv_pci_init_ioda_phb(struct device_node *np, int ioda_type)
 
 void pnv_pci_init_ioda2_phb(struct device_node *np)
 {
-	pnv_pci_init_ioda_phb(np, PNV_PHB_IODA2);
+	pnv_pci_init_ioda_phb(np, 0, PNV_PHB_IODA2);
 }
 
 void __init pnv_pci_init_ioda_hub(struct device_node *np)
@@ -1235,6 +1291,6 @@ void __init pnv_pci_init_ioda_hub(struct device_node *np)
 	for_each_child_of_node(np, phbn) {
 		/* Look for IODA1 PHBs */
 		if (of_device_is_compatible(phbn, "ibm,ioda-phb"))
-			pnv_pci_init_ioda_phb(phbn, PNV_PHB_IODA1);
+			pnv_pci_init_ioda_phb(phbn, hub_id, PNV_PHB_IODA1);
 	}
 }
diff --git a/arch/powerpc/platforms/powernv/pci-p5ioc2.c b/arch/powerpc/platforms/powernv/pci-p5ioc2.c
index 92b37a0186c9..b68db6325c1b 100644
--- a/arch/powerpc/platforms/powernv/pci-p5ioc2.c
+++ b/arch/powerpc/platforms/powernv/pci-p5ioc2.c
@@ -86,13 +86,16 @@ static void pnv_pci_init_p5ioc2_msis(struct pnv_phb *phb) { }
 static void pnv_pci_p5ioc2_dma_dev_setup(struct pnv_phb *phb,
 					 struct pci_dev *pdev)
 {
-	if (phb->p5ioc2.iommu_table.it_map == NULL)
+	if (phb->p5ioc2.iommu_table.it_map == NULL) {
 		iommu_init_table(&phb->p5ioc2.iommu_table, phb->hose->node);
+		iommu_register_group(&phb->p5ioc2.iommu_table,
+				pci_domain_nr(phb->hose->bus), phb->opal_id);
+	}
 
 	set_iommu_table_base(&pdev->dev, &phb->p5ioc2.iommu_table);
 }
 
-static void __init pnv_pci_init_p5ioc2_phb(struct device_node *np,
+static void __init pnv_pci_init_p5ioc2_phb(struct device_node *np, u64 hub_id,
 					   void *tce_mem, u64 tce_size)
 {
 	struct pnv_phb *phb;
@@ -133,6 +136,7 @@ static void __init pnv_pci_init_p5ioc2_phb(struct device_node *np,
 	phb->hose->first_busno = 0;
 	phb->hose->last_busno = 0xff;
 	phb->hose->private_data = phb;
+	phb->hub_id = hub_id;
 	phb->opal_id = phb_id;
 	phb->type = PNV_PHB_P5IOC2;
 	phb->model = PNV_PHB_MODEL_P5IOC2;
@@ -226,7 +230,8 @@ void __init pnv_pci_init_p5ioc2_hub(struct device_node *np)
 	for_each_child_of_node(np, phbn) {
 		if (of_device_is_compatible(phbn, "ibm,p5ioc2-pcix") ||
 		    of_device_is_compatible(phbn, "ibm,p5ioc2-pciex")) {
-			pnv_pci_init_p5ioc2_phb(phbn, tce_mem, tce_per_phb);
+			pnv_pci_init_p5ioc2_phb(phbn, hub_id,
+					tce_mem, tce_per_phb);
 			tce_mem += tce_per_phb;
 		}
 	}
diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c
index 277343cc6a3d..a28d3b5e6393 100644
--- a/arch/powerpc/platforms/powernv/pci.c
+++ b/arch/powerpc/platforms/powernv/pci.c
@@ -20,6 +20,7 @@
 #include <linux/irq.h>
 #include <linux/io.h>
 #include <linux/msi.h>
+#include <linux/iommu.h>
 
 #include <asm/sections.h>
 #include <asm/io.h>
@@ -32,6 +33,8 @@
 #include <asm/iommu.h>
 #include <asm/tce.h>
 #include <asm/firmware.h>
+#include <asm/eeh_event.h>
+#include <asm/eeh.h>
 
 #include "powernv.h"
 #include "pci.h"
@@ -202,7 +205,8 @@ static void pnv_pci_handle_eeh_config(struct pnv_phb *phb, u32 pe_no)
 
 	spin_lock_irqsave(&phb->lock, flags);
 
-	rc = opal_pci_get_phb_diag_data(phb->opal_id, phb->diag.blob, PNV_PCI_DIAG_BUF_SIZE);
+	rc = opal_pci_get_phb_diag_data2(phb->opal_id, phb->diag.blob,
+					 PNV_PCI_DIAG_BUF_SIZE);
 	has_diag = (rc == OPAL_SUCCESS);
 
 	rc = opal_pci_eeh_freeze_clear(phb->opal_id, pe_no,
@@ -227,43 +231,50 @@ static void pnv_pci_handle_eeh_config(struct pnv_phb *phb, u32 pe_no)
 	spin_unlock_irqrestore(&phb->lock, flags);
 }
 
-static void pnv_pci_config_check_eeh(struct pnv_phb *phb, struct pci_bus *bus,
-				     u32 bdfn)
+static void pnv_pci_config_check_eeh(struct pnv_phb *phb,
+				     struct device_node *dn)
 {
 	s64	rc;
 	u8	fstate;
 	u16	pcierr;
 	u32	pe_no;
 
-	/* Get PE# if we support IODA */
-	pe_no = phb->bdfn_to_pe ? phb->bdfn_to_pe(phb, bus, bdfn & 0xff) : 0;
+	/*
+	 * Get the PE#. During the PCI probe stage, we might not
+	 * setup that yet. So all ER errors should be mapped to
+	 * PE#0
+	 */
+	pe_no = PCI_DN(dn)->pe_number;
+	if (pe_no == IODA_INVALID_PE)
+		pe_no = 0;
 
 	/* Read freeze status */
 	rc = opal_pci_eeh_freeze_status(phb->opal_id, pe_no, &fstate, &pcierr,
 					NULL);
 	if (rc) {
-		pr_warning("PCI %d: Failed to read EEH status for PE#%d,"
-			   " err %lld\n", phb->hose->global_number, pe_no, rc);
+		pr_warning("%s: Can't read EEH status (PE#%d) for "
+			   "%s, err %lld\n",
+			   __func__, pe_no, dn->full_name, rc);
 		return;
 	}
-	cfg_dbg(" -> EEH check, bdfn=%04x PE%d fstate=%x\n",
-		bdfn, pe_no, fstate);
+	cfg_dbg(" -> EEH check, bdfn=%04x PE#%d fstate=%x\n",
+		(PCI_DN(dn)->busno << 8) | (PCI_DN(dn)->devfn),
+		pe_no, fstate);
 	if (fstate != 0)
 		pnv_pci_handle_eeh_config(phb, pe_no);
 }
 
-static int pnv_pci_read_config(struct pci_bus *bus,
-			       unsigned int devfn,
-			       int where, int size, u32 *val)
+int pnv_pci_cfg_read(struct device_node *dn,
+		     int where, int size, u32 *val)
 {
-	struct pci_controller *hose = pci_bus_to_host(bus);
-	struct pnv_phb *phb = hose->private_data;
-	u32 bdfn = (((uint64_t)bus->number) << 8) | devfn;
+	struct pci_dn *pdn = PCI_DN(dn);
+	struct pnv_phb *phb = pdn->phb->private_data;
+	u32 bdfn = (pdn->busno << 8) | pdn->devfn;
+#ifdef CONFIG_EEH
+	struct eeh_pe *phb_pe = NULL;
+#endif
 	s64 rc;
 
-	if (hose == NULL)
-		return PCIBIOS_DEVICE_NOT_FOUND;
-
 	switch (size) {
 	case 1: {
 		u8 v8;
@@ -287,28 +298,43 @@ static int pnv_pci_read_config(struct pci_bus *bus,
 	default:
 		return PCIBIOS_FUNC_NOT_SUPPORTED;
 	}
-	cfg_dbg("pnv_pci_read_config bus: %x devfn: %x +%x/%x -> %08x\n",
-		bus->number, devfn, where, size, *val);
-
-	/* Check if the PHB got frozen due to an error (no response) */
-	pnv_pci_config_check_eeh(phb, bus, bdfn);
+	cfg_dbg("%s: bus: %x devfn: %x +%x/%x -> %08x\n",
+		__func__, pdn->busno, pdn->devfn, where, size, *val);
+
+	/*
+	 * Check if the specified PE has been put into frozen
+	 * state. On the other hand, we needn't do that while
+	 * the PHB has been put into frozen state because of
+	 * PHB-fatal errors.
+	 */
+#ifdef CONFIG_EEH
+	phb_pe = eeh_phb_pe_get(pdn->phb);
+	if (phb_pe && (phb_pe->state & EEH_PE_ISOLATED))
+		return PCIBIOS_SUCCESSFUL;
+
+	if (phb->eeh_state & PNV_EEH_STATE_ENABLED) {
+		if (*val == EEH_IO_ERROR_VALUE(size) &&
+		    eeh_dev_check_failure(of_node_to_eeh_dev(dn)))
+			return PCIBIOS_DEVICE_NOT_FOUND;
+	} else {
+		pnv_pci_config_check_eeh(phb, dn);
+	}
+#else
+	pnv_pci_config_check_eeh(phb, dn);
+#endif
 
 	return PCIBIOS_SUCCESSFUL;
 }
 
-static int pnv_pci_write_config(struct pci_bus *bus,
-				unsigned int devfn,
-				int where, int size, u32 val)
+int pnv_pci_cfg_write(struct device_node *dn,
+		      int where, int size, u32 val)
 {
-	struct pci_controller *hose = pci_bus_to_host(bus);
-	struct pnv_phb *phb = hose->private_data;
-	u32 bdfn = (((uint64_t)bus->number) << 8) | devfn;
-
-	if (hose == NULL)
-		return PCIBIOS_DEVICE_NOT_FOUND;
+	struct pci_dn *pdn = PCI_DN(dn);
+	struct pnv_phb *phb = pdn->phb->private_data;
+	u32 bdfn = (pdn->busno << 8) | pdn->devfn;
 
-	cfg_dbg("pnv_pci_write_config bus: %x devfn: %x +%x/%x -> %08x\n",
-		bus->number, devfn, where, size, val);
+	cfg_dbg("%s: bus: %x devfn: %x +%x/%x -> %08x\n",
+		pdn->busno, pdn->devfn, where, size, val);
 	switch (size) {
 	case 1:
 		opal_pci_config_write_byte(phb->opal_id, bdfn, where, val);
@@ -322,14 +348,54 @@ static int pnv_pci_write_config(struct pci_bus *bus,
 	default:
 		return PCIBIOS_FUNC_NOT_SUPPORTED;
 	}
+
 	/* Check if the PHB got frozen due to an error (no response) */
-	pnv_pci_config_check_eeh(phb, bus, bdfn);
+#ifdef CONFIG_EEH
+	if (!(phb->eeh_state & PNV_EEH_STATE_ENABLED))
+		pnv_pci_config_check_eeh(phb, dn);
+#else
+	pnv_pci_config_check_eeh(phb, dn);
+#endif
 
 	return PCIBIOS_SUCCESSFUL;
 }
 
+static int pnv_pci_read_config(struct pci_bus *bus,
+			       unsigned int devfn,
+			       int where, int size, u32 *val)
+{
+	struct device_node *dn, *busdn = pci_bus_to_OF_node(bus);
+	struct pci_dn *pdn;
+
+	for (dn = busdn->child; dn; dn = dn->sibling) {
+		pdn = PCI_DN(dn);
+		if (pdn && pdn->devfn == devfn)
+			return pnv_pci_cfg_read(dn, where, size, val);
+	}
+
+	*val = 0xFFFFFFFF;
+	return PCIBIOS_DEVICE_NOT_FOUND;
+
+}
+
+static int pnv_pci_write_config(struct pci_bus *bus,
+				unsigned int devfn,
+				int where, int size, u32 val)
+{
+	struct device_node *dn, *busdn = pci_bus_to_OF_node(bus);
+	struct pci_dn *pdn;
+
+	for (dn = busdn->child; dn; dn = dn->sibling) {
+		pdn = PCI_DN(dn);
+		if (pdn && pdn->devfn == devfn)
+			return pnv_pci_cfg_write(dn, where, size, val);
+	}
+
+	return PCIBIOS_DEVICE_NOT_FOUND;
+}
+
 struct pci_ops pnv_pci_ops = {
-	.read = pnv_pci_read_config,
+	.read  = pnv_pci_read_config,
 	.write = pnv_pci_write_config,
 };
 
@@ -412,6 +478,7 @@ static struct iommu_table *pnv_pci_setup_bml_iommu(struct pci_controller *hose)
 	pnv_pci_setup_iommu_table(tbl, __va(be64_to_cpup(basep)),
 				  be32_to_cpup(sizep), 0);
 	iommu_init_table(tbl, hose->node);
+	iommu_register_group(tbl, pci_domain_nr(hose->bus), 0);
 
 	/* Deal with SW invalidated TCEs when needed (BML way) */
 	swinvp = of_get_property(hose->dn, "linux,tce-sw-invalidate-info",
diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h
index 25d76c4df50b..d633c64e05a1 100644
--- a/arch/powerpc/platforms/powernv/pci.h
+++ b/arch/powerpc/platforms/powernv/pci.h
@@ -66,15 +66,43 @@ struct pnv_ioda_pe {
 	struct list_head	list;
 };
 
+/* IOC dependent EEH operations */
+#ifdef CONFIG_EEH
+struct pnv_eeh_ops {
+	int (*post_init)(struct pci_controller *hose);
+	int (*set_option)(struct eeh_pe *pe, int option);
+	int (*get_state)(struct eeh_pe *pe);
+	int (*reset)(struct eeh_pe *pe, int option);
+	int (*get_log)(struct eeh_pe *pe, int severity,
+		       char *drv_log, unsigned long len);
+	int (*configure_bridge)(struct eeh_pe *pe);
+	int (*next_error)(struct eeh_pe **pe);
+};
+
+#define PNV_EEH_STATE_ENABLED	(1 << 0)	/* EEH enabled	*/
+#define PNV_EEH_STATE_REMOVED	(1 << 1)	/* PHB removed	*/
+
+#endif /* CONFIG_EEH */
+
 struct pnv_phb {
 	struct pci_controller	*hose;
 	enum pnv_phb_type	type;
 	enum pnv_phb_model	model;
+	u64			hub_id;
 	u64			opal_id;
 	void __iomem		*regs;
 	int			initialized;
 	spinlock_t		lock;
 
+#ifdef CONFIG_EEH
+	struct pnv_eeh_ops	*eeh_ops;
+	int			eeh_state;
+#endif
+
+#ifdef CONFIG_DEBUG_FS
+	struct dentry		*dbgfs;
+#endif
+
 #ifdef CONFIG_PCI_MSI
 	unsigned int		msi_base;
 	unsigned int		msi32_support;
@@ -150,7 +178,14 @@ struct pnv_phb {
 };
 
 extern struct pci_ops pnv_pci_ops;
+#ifdef CONFIG_EEH
+extern struct pnv_eeh_ops ioda_eeh_ops;
+#endif
 
+int pnv_pci_cfg_read(struct device_node *dn,
+		     int where, int size, u32 *val);
+int pnv_pci_cfg_write(struct device_node *dn,
+		      int where, int size, u32 val);
 extern void pnv_pci_setup_iommu_table(struct iommu_table *tbl,
 				      void *tce_mem, u64 tce_size,
 				      u64 dma_offset);
diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c
index d4459bfc92f7..84438af96c05 100644
--- a/arch/powerpc/platforms/powernv/setup.c
+++ b/arch/powerpc/platforms/powernv/setup.c
@@ -93,6 +93,8 @@ static void  __noreturn pnv_restart(char *cmd)
 {
 	long rc = OPAL_BUSY;
 
+	opal_notifier_disable();
+
 	while (rc == OPAL_BUSY || rc == OPAL_BUSY_EVENT) {
 		rc = opal_cec_reboot();
 		if (rc == OPAL_BUSY_EVENT)
@@ -108,6 +110,8 @@ static void __noreturn pnv_power_off(void)
 {
 	long rc = OPAL_BUSY;
 
+	opal_notifier_disable();
+
 	while (rc == OPAL_BUSY || rc == OPAL_BUSY_EVENT) {
 		rc = opal_cec_power_down(0);
 		if (rc == OPAL_BUSY_EVENT)
diff --git a/arch/powerpc/platforms/powernv/smp.c b/arch/powerpc/platforms/powernv/smp.c
index 88c9459c3e07..89e3857af4e0 100644
--- a/arch/powerpc/platforms/powernv/smp.c
+++ b/arch/powerpc/platforms/powernv/smp.c
@@ -40,7 +40,7 @@
 #define DBG(fmt...)
 #endif
 
-static void __cpuinit pnv_smp_setup_cpu(int cpu)
+static void pnv_smp_setup_cpu(int cpu)
 {
 	if (cpu != boot_cpuid)
 		xics_setup_cpu();
@@ -51,7 +51,7 @@ static int pnv_smp_cpu_bootable(unsigned int nr)
 	/* Special case - we inhibit secondary thread startup
 	 * during boot if the user requests it.
 	 */
-	if (system_state < SYSTEM_RUNNING && cpu_has_feature(CPU_FTR_SMT)) {
+	if (system_state == SYSTEM_BOOTING && cpu_has_feature(CPU_FTR_SMT)) {
 		if (!smt_enabled_at_boot && cpu_thread_in_core(nr) != 0)
 			return 0;
 		if (smt_enabled_at_boot
author	Linus Torvalds <torvalds@linux-foundation.org>	2013-07-04 21:29:23 +0400
committer	Linus Torvalds <torvalds@linux-foundation.org>	2013-07-04 21:29:23 +0400
commit	65b97fb7303050fc826e518cf67fc283da23314f (patch)
tree	595e7f04d65d95a39d65bd2dcf2385b3b6ea7969 /arch/powerpc/platforms/powernv
parent	ddcf6600b133697adbafd96e080818bdc0dfd028 (diff)
parent	1d8b368ab4aacfc3f864655baad4d31a3028ec1a (diff)
download	linux-65b97fb7303050fc826e518cf67fc283da23314f.tar.xz