From 3ec8b78fcc5aa7745026d8d85a4e9ab52c922765 Mon Sep 17 00:00:00 2001
From: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
Date: Fri, 31 Jan 2014 00:30:45 +0530
Subject: powerpc/pseries: Disable relocation on exception while going down
 during crash.

Disable relocation on exception while going down even in kdump case. This
is because we are about clear htab mappings while kexec-ing into kdump
kernel and we may run into issues if we still have AIL ON.

Signed-off-by: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/platforms/pseries/setup.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'arch/powerpc/platforms')

diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c
index 8e639d7cbda7..972df0ffd4dc 100644
--- a/arch/powerpc/platforms/pseries/setup.c
+++ b/arch/powerpc/platforms/pseries/setup.c
@@ -430,8 +430,7 @@ static void pSeries_machine_kexec(struct kimage *image)
 {
 	long rc;
 
-	if (firmware_has_feature(FW_FEATURE_SET_MODE) &&
-	    (image->type != KEXEC_TYPE_CRASH)) {
+	if (firmware_has_feature(FW_FEATURE_SET_MODE)) {
 		rc = pSeries_disable_reloc_on_exc();
 		if (rc != H_SUCCESS)
 			pr_warning("Warning: Failed to disable relocation on "
-- 
cgit v1.2.3


From 8d4887ee30695d6b092e96029ae974505c43bfb6 Mon Sep 17 00:00:00 2001
From: Michael Ellerman <mpe@ellerman.id.au>
Date: Tue, 10 Dec 2013 14:02:49 +1100
Subject: powerpc/pseries: Select ARCH_RANDOM on pseries

We have a driver for the ARCH_RANDOM hook in rng.c, so we should select
ARCH_RANDOM on pseries.

Without this the build breaks if you turn ARCH_RANDOM off.

This hasn't broken the build because pseries_defconfig doesn't specify a
value for PPC_POWERNV, which is default y, and selects ARCH_RANDOM.

Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/platforms/pseries/Kconfig | 1 +
 1 file changed, 1 insertion(+)

(limited to 'arch/powerpc/platforms')

diff --git a/arch/powerpc/platforms/pseries/Kconfig b/arch/powerpc/platforms/pseries/Kconfig
index 37300f6ee244..80b1d57c306a 100644
--- a/arch/powerpc/platforms/pseries/Kconfig
+++ b/arch/powerpc/platforms/pseries/Kconfig
@@ -20,6 +20,7 @@ config PPC_PSERIES
 	select PPC_DOORBELL
 	select HAVE_CONTEXT_TRACKING
 	select HOTPLUG_CPU if SMP
+	select ARCH_RANDOM
 	default y
 
 config PPC_SPLPAR
-- 
cgit v1.2.3


From cd15b048445d0a54f7147c35a86c5a16ef231554 Mon Sep 17 00:00:00 2001
From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Date: Tue, 11 Feb 2014 11:32:38 +1100
Subject: powerpc/powernv: Add iommu DMA bypass support for IODA2

This patch adds the support for to create a direct iommu "bypass"
window on IODA2 bridges (such as Power8) allowing to bypass iommu
page translation completely for 64-bit DMA capable devices, thus
significantly improving DMA performances.

Additionally, this adds a hook to the struct iommu_table so that
the IOMMU API / VFIO can disable the bypass when external ownership
is requested, since in that case, the device will be used by an
environment such as userspace or a KVM guest which must not be
allowed to bypass translations.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/include/asm/dma-mapping.h    |  1 +
 arch/powerpc/include/asm/iommu.h          |  1 +
 arch/powerpc/kernel/dma.c                 | 10 ++--
 arch/powerpc/kernel/iommu.c               | 12 +++++
 arch/powerpc/platforms/powernv/pci-ioda.c | 84 +++++++++++++++++++++++++++++++
 arch/powerpc/platforms/powernv/pci.c      | 10 ++++
 arch/powerpc/platforms/powernv/pci.h      |  6 ++-
 arch/powerpc/platforms/powernv/powernv.h  |  8 +++
 arch/powerpc/platforms/powernv/setup.c    |  9 ++++
 9 files changed, 137 insertions(+), 4 deletions(-)

(limited to 'arch/powerpc/platforms')

diff --git a/arch/powerpc/include/asm/dma-mapping.h b/arch/powerpc/include/asm/dma-mapping.h
index e27e9ad6818e..150866b2a3fe 100644
--- a/arch/powerpc/include/asm/dma-mapping.h
+++ b/arch/powerpc/include/asm/dma-mapping.h
@@ -134,6 +134,7 @@ static inline int dma_supported(struct device *dev, u64 mask)
 }
 
 extern int dma_set_mask(struct device *dev, u64 dma_mask);
+extern int __dma_set_mask(struct device *dev, u64 dma_mask);
 
 #define dma_alloc_coherent(d,s,h,f)	dma_alloc_attrs(d,s,h,f,NULL)
 
diff --git a/arch/powerpc/include/asm/iommu.h b/arch/powerpc/include/asm/iommu.h
index f7a8036579b5..42632c7a2a4e 100644
--- a/arch/powerpc/include/asm/iommu.h
+++ b/arch/powerpc/include/asm/iommu.h
@@ -77,6 +77,7 @@ struct iommu_table {
 #ifdef CONFIG_IOMMU_API
 	struct iommu_group *it_group;
 #endif
+	void (*set_bypass)(struct iommu_table *tbl, bool enable);
 };
 
 /* Pure 2^n version of get_order */
diff --git a/arch/powerpc/kernel/dma.c b/arch/powerpc/kernel/dma.c
index 8032b97ccdcb..ee78f6e49d64 100644
--- a/arch/powerpc/kernel/dma.c
+++ b/arch/powerpc/kernel/dma.c
@@ -191,12 +191,10 @@ EXPORT_SYMBOL(dma_direct_ops);
 
 #define PREALLOC_DMA_DEBUG_ENTRIES (1 << 16)
 
-int dma_set_mask(struct device *dev, u64 dma_mask)
+int __dma_set_mask(struct device *dev, u64 dma_mask)
 {
 	struct dma_map_ops *dma_ops = get_dma_ops(dev);
 
-	if (ppc_md.dma_set_mask)
-		return ppc_md.dma_set_mask(dev, dma_mask);
 	if ((dma_ops != NULL) && (dma_ops->set_dma_mask != NULL))
 		return dma_ops->set_dma_mask(dev, dma_mask);
 	if (!dev->dma_mask || !dma_supported(dev, dma_mask))
@@ -204,6 +202,12 @@ int dma_set_mask(struct device *dev, u64 dma_mask)
 	*dev->dma_mask = dma_mask;
 	return 0;
 }
+int dma_set_mask(struct device *dev, u64 dma_mask)
+{
+	if (ppc_md.dma_set_mask)
+		return ppc_md.dma_set_mask(dev, dma_mask);
+	return __dma_set_mask(dev, dma_mask);
+}
 EXPORT_SYMBOL(dma_set_mask);
 
 u64 dma_get_required_mask(struct device *dev)
diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c
index d773dd440a45..88e3ec6e1d96 100644
--- a/arch/powerpc/kernel/iommu.c
+++ b/arch/powerpc/kernel/iommu.c
@@ -1088,6 +1088,14 @@ int iommu_take_ownership(struct iommu_table *tbl)
 	memset(tbl->it_map, 0xff, sz);
 	iommu_clear_tces_and_put_pages(tbl, tbl->it_offset, tbl->it_size);
 
+	/*
+	 * Disable iommu bypass, otherwise the user can DMA to all of
+	 * our physical memory via the bypass window instead of just
+	 * the pages that has been explicitly mapped into the iommu
+	 */
+	if (tbl->set_bypass)
+		tbl->set_bypass(tbl, false);
+
 	return 0;
 }
 EXPORT_SYMBOL_GPL(iommu_take_ownership);
@@ -1102,6 +1110,10 @@ void iommu_release_ownership(struct iommu_table *tbl)
 	/* Restore bit#0 set by iommu_init_table() */
 	if (tbl->it_offset == 0)
 		set_bit(0, tbl->it_map);
+
+	/* The kernel owns the device now, we can restore the iommu bypass */
+	if (tbl->set_bypass)
+		tbl->set_bypass(tbl, true);
 }
 EXPORT_SYMBOL_GPL(iommu_release_ownership);
 
diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
index 7d6dcc6d5fa9..3b2b4fb3585b 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -21,6 +21,7 @@
 #include <linux/irq.h>
 #include <linux/io.h>
 #include <linux/msi.h>
+#include <linux/memblock.h>
 
 #include <asm/sections.h>
 #include <asm/io.h>
@@ -460,9 +461,39 @@ static void pnv_pci_ioda_dma_dev_setup(struct pnv_phb *phb, struct pci_dev *pdev
 		return;
 
 	pe = &phb->ioda.pe_array[pdn->pe_number];
+	WARN_ON(get_dma_ops(&pdev->dev) != &dma_iommu_ops);
 	set_iommu_table_base_and_group(&pdev->dev, &pe->tce32_table);
 }
 
+static int pnv_pci_ioda_dma_set_mask(struct pnv_phb *phb,
+				     struct pci_dev *pdev, u64 dma_mask)
+{
+	struct pci_dn *pdn = pci_get_pdn(pdev);
+	struct pnv_ioda_pe *pe;
+	uint64_t top;
+	bool bypass = false;
+
+	if (WARN_ON(!pdn || pdn->pe_number == IODA_INVALID_PE))
+		return -ENODEV;;
+
+	pe = &phb->ioda.pe_array[pdn->pe_number];
+	if (pe->tce_bypass_enabled) {
+		top = pe->tce_bypass_base + memblock_end_of_DRAM() - 1;
+		bypass = (dma_mask >= top);
+	}
+
+	if (bypass) {
+		dev_info(&pdev->dev, "Using 64-bit DMA iommu bypass\n");
+		set_dma_ops(&pdev->dev, &dma_direct_ops);
+		set_dma_offset(&pdev->dev, pe->tce_bypass_base);
+	} else {
+		dev_info(&pdev->dev, "Using 32-bit DMA via iommu\n");
+		set_dma_ops(&pdev->dev, &dma_iommu_ops);
+		set_iommu_table_base(&pdev->dev, &pe->tce32_table);
+	}
+	return 0;
+}
+
 static void pnv_ioda_setup_bus_dma(struct pnv_ioda_pe *pe, struct pci_bus *bus)
 {
 	struct pci_dev *dev;
@@ -657,6 +688,56 @@ static void pnv_pci_ioda_setup_dma_pe(struct pnv_phb *phb,
 		__free_pages(tce_mem, get_order(TCE32_TABLE_SIZE * segs));
 }
 
+static void pnv_pci_ioda2_set_bypass(struct iommu_table *tbl, bool enable)
+{
+	struct pnv_ioda_pe *pe = container_of(tbl, struct pnv_ioda_pe,
+					      tce32_table);
+	uint16_t window_id = (pe->pe_number << 1 ) + 1;
+	int64_t rc;
+
+	pe_info(pe, "%sabling 64-bit DMA bypass\n", enable ? "En" : "Dis");
+	if (enable) {
+		phys_addr_t top = memblock_end_of_DRAM();
+
+		top = roundup_pow_of_two(top);
+		rc = opal_pci_map_pe_dma_window_real(pe->phb->opal_id,
+						     pe->pe_number,
+						     window_id,
+						     pe->tce_bypass_base,
+						     top);
+	} else {
+		rc = opal_pci_map_pe_dma_window_real(pe->phb->opal_id,
+						     pe->pe_number,
+						     window_id,
+						     pe->tce_bypass_base,
+						     0);
+
+		/*
+		 * We might want to reset the DMA ops of all devices on
+		 * this PE. However in theory, that shouldn't be necessary
+		 * as this is used for VFIO/KVM pass-through and the device
+		 * hasn't yet been returned to its kernel driver
+		 */
+	}
+	if (rc)
+		pe_err(pe, "OPAL error %lld configuring bypass window\n", rc);
+	else
+		pe->tce_bypass_enabled = enable;
+}
+
+static void pnv_pci_ioda2_setup_bypass_pe(struct pnv_phb *phb,
+					  struct pnv_ioda_pe *pe)
+{
+	/* TVE #1 is selected by PCI address bit 59 */
+	pe->tce_bypass_base = 1ull << 59;
+
+	/* Install set_bypass callback for VFIO */
+	pe->tce32_table.set_bypass = pnv_pci_ioda2_set_bypass;
+
+	/* Enable bypass by default */
+	pnv_pci_ioda2_set_bypass(&pe->tce32_table, true);
+}
+
 static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb,
 				       struct pnv_ioda_pe *pe)
 {
@@ -727,6 +808,8 @@ static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb,
 	else
 		pnv_ioda_setup_bus_dma(pe, pe->pbus);
 
+	/* Also create a bypass window */
+	pnv_pci_ioda2_setup_bypass_pe(phb, pe);
 	return;
 fail:
 	if (pe->tce32_seg >= 0)
@@ -1286,6 +1369,7 @@ void __init pnv_pci_init_ioda_phb(struct device_node *np,
 
 	/* Setup TCEs */
 	phb->dma_dev_setup = pnv_pci_ioda_dma_dev_setup;
+	phb->dma_set_mask = pnv_pci_ioda_dma_set_mask;
 
 	/* Setup shutdown function for kexec */
 	phb->shutdown = pnv_pci_ioda_shutdown;
diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c
index b555ebc57ef5..95633d79ef5d 100644
--- a/arch/powerpc/platforms/powernv/pci.c
+++ b/arch/powerpc/platforms/powernv/pci.c
@@ -634,6 +634,16 @@ static void pnv_pci_dma_dev_setup(struct pci_dev *pdev)
 		pnv_pci_dma_fallback_setup(hose, pdev);
 }
 
+int pnv_pci_dma_set_mask(struct pci_dev *pdev, u64 dma_mask)
+{
+	struct pci_controller *hose = pci_bus_to_host(pdev->bus);
+	struct pnv_phb *phb = hose->private_data;
+
+	if (phb && phb->dma_set_mask)
+		return phb->dma_set_mask(phb, pdev, dma_mask);
+	return __dma_set_mask(&pdev->dev, dma_mask);
+}
+
 void pnv_pci_shutdown(void)
 {
 	struct pci_controller *hose;
diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h
index 13f1942a9a5f..cde169442775 100644
--- a/arch/powerpc/platforms/powernv/pci.h
+++ b/arch/powerpc/platforms/powernv/pci.h
@@ -54,7 +54,9 @@ struct pnv_ioda_pe {
 	struct iommu_table	tce32_table;
 	phys_addr_t		tce_inval_reg_phys;
 
-	/* XXX TODO: Add support for additional 64-bit iommus */
+	/* 64-bit TCE bypass region */
+	bool			tce_bypass_enabled;
+	uint64_t		tce_bypass_base;
 
 	/* MSIs. MVE index is identical for for 32 and 64 bit MSI
 	 * and -1 if not supported. (It's actually identical to the
@@ -113,6 +115,8 @@ struct pnv_phb {
 			 unsigned int hwirq, unsigned int virq,
 			 unsigned int is_64, struct msi_msg *msg);
 	void (*dma_dev_setup)(struct pnv_phb *phb, struct pci_dev *pdev);
+	int (*dma_set_mask)(struct pnv_phb *phb, struct pci_dev *pdev,
+			    u64 dma_mask);
 	void (*fixup_phb)(struct pci_controller *hose);
 	u32 (*bdfn_to_pe)(struct pnv_phb *phb, struct pci_bus *bus, u32 devfn);
 	void (*shutdown)(struct pnv_phb *phb);
diff --git a/arch/powerpc/platforms/powernv/powernv.h b/arch/powerpc/platforms/powernv/powernv.h
index de6819be1f95..0051e108ef0f 100644
--- a/arch/powerpc/platforms/powernv/powernv.h
+++ b/arch/powerpc/platforms/powernv/powernv.h
@@ -7,12 +7,20 @@ extern void pnv_smp_init(void);
 static inline void pnv_smp_init(void) { }
 #endif
 
+struct pci_dev;
+
 #ifdef CONFIG_PCI
 extern void pnv_pci_init(void);
 extern void pnv_pci_shutdown(void);
+extern int pnv_pci_dma_set_mask(struct pci_dev *pdev, u64 dma_mask);
 #else
 static inline void pnv_pci_init(void) { }
 static inline void pnv_pci_shutdown(void) { }
+
+static inline int pnv_pci_dma_set_mask(struct pci_dev *pdev, u64 dma_mask)
+{
+	return -ENODEV;
+}
 #endif
 
 extern void pnv_lpc_init(void);
diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c
index 21166f65c97c..110f4fbd319f 100644
--- a/arch/powerpc/platforms/powernv/setup.c
+++ b/arch/powerpc/platforms/powernv/setup.c
@@ -27,6 +27,7 @@
 #include <linux/interrupt.h>
 #include <linux/bug.h>
 #include <linux/cpuidle.h>
+#include <linux/pci.h>
 
 #include <asm/machdep.h>
 #include <asm/firmware.h>
@@ -141,6 +142,13 @@ static void pnv_progress(char *s, unsigned short hex)
 {
 }
 
+static int pnv_dma_set_mask(struct device *dev, u64 dma_mask)
+{
+	if (dev_is_pci(dev))
+		return pnv_pci_dma_set_mask(to_pci_dev(dev), dma_mask);
+	return __dma_set_mask(dev, dma_mask);
+}
+
 static void pnv_shutdown(void)
 {
 	/* Let the PCI code clear up IODA tables */
@@ -238,6 +246,7 @@ define_machine(powernv) {
 	.machine_shutdown	= pnv_shutdown,
 	.power_save             = powernv_idle,
 	.calibrate_decr		= generic_calibrate_decr,
+	.dma_set_mask		= pnv_dma_set_mask,
 #ifdef CONFIG_KEXEC
 	.kexec_cpu_down		= pnv_kexec_cpu_down,
 #endif
-- 
cgit v1.2.3


From b020cc6c03a37c3526fcb1dff274f649257949e0 Mon Sep 17 00:00:00 2001
From: Kleber Sacilotto de Souza <klebers@linux.vnet.ibm.com>
Date: Fri, 17 Jan 2014 11:56:51 -0200
Subject: powerpc/pseries: Fix regression on PCI link speed

Commit 5091f0c (powerpc/pseries: Fix PCIE link speed endian issue)
introduced a regression on the PCI link speed detection using the
device-tree property. The ibm,pcie-link-speed-stats property is composed
of two 32-bit integers, the first one being the maxinum link speed and
the second the current link speed. The changes introduced by the
aforementioned commit are considering just the first integer.

Fix this issue by changing how the property is accessed, using the
helper functions to properly access the array of values. The explicit
byte swapping is not needed anymore here, since it's done by the helper
functions.

Signed-off-by: Kleber Sacilotto de Souza <klebers@linux.vnet.ibm.com>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/platforms/pseries/pci.c | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)

(limited to 'arch/powerpc/platforms')

diff --git a/arch/powerpc/platforms/pseries/pci.c b/arch/powerpc/platforms/pseries/pci.c
index 70670a2d9cf2..a6f7a1460e2f 100644
--- a/arch/powerpc/platforms/pseries/pci.c
+++ b/arch/powerpc/platforms/pseries/pci.c
@@ -113,7 +113,8 @@ int pseries_root_bridge_prepare(struct pci_host_bridge *bridge)
 {
 	struct device_node *dn, *pdn;
 	struct pci_bus *bus;
-	const __be32 *pcie_link_speed_stats;
+	u32 pcie_link_speed_stats[2];
+	int rc;
 
 	bus = bridge->bus;
 
@@ -122,20 +123,21 @@ int pseries_root_bridge_prepare(struct pci_host_bridge *bridge)
 		return 0;
 
 	for (pdn = dn; pdn != NULL; pdn = of_get_next_parent(pdn)) {
-		pcie_link_speed_stats = of_get_property(pdn,
-			"ibm,pcie-link-speed-stats", NULL);
-		if (pcie_link_speed_stats)
+		rc = of_property_read_u32_array(pdn,
+				"ibm,pcie-link-speed-stats",
+				&pcie_link_speed_stats[0], 2);
+		if (!rc)
 			break;
 	}
 
 	of_node_put(pdn);
 
-	if (!pcie_link_speed_stats) {
+	if (rc) {
 		pr_err("no ibm,pcie-link-speed-stats property\n");
 		return 0;
 	}
 
-	switch (be32_to_cpup(pcie_link_speed_stats)) {
+	switch (pcie_link_speed_stats[0]) {
 	case 0x01:
 		bus->max_bus_speed = PCIE_SPEED_2_5GT;
 		break;
@@ -147,7 +149,7 @@ int pseries_root_bridge_prepare(struct pci_host_bridge *bridge)
 		break;
 	}
 
-	switch (be32_to_cpup(pcie_link_speed_stats)) {
+	switch (pcie_link_speed_stats[1]) {
 	case 0x01:
 		bus->cur_bus_speed = PCIE_SPEED_2_5GT;
 		break;
-- 
cgit v1.2.3


From 49d9684a54d21930372b7fb0d3d7b5617f621706 Mon Sep 17 00:00:00 2001
From: Kleber Sacilotto de Souza <klebers@linux.vnet.ibm.com>
Date: Fri, 17 Jan 2014 11:56:52 -0200
Subject: powerpc/pseries: Add Gen3 definitions for PCIE link speed

Rev3 of the PCI Express Base Specification defines a Supported Link
Speeds Vector where the bit definitions within this field are:

Bit 0 - 2.5 GT/s
Bit 1 - 5.0 GT/s
Bit 2 - 8.0 GT/s

This vector definition is used by the platform firmware to export the
maximum and current link speeds of the PCI bus via the
"ibm,pcie-link-speed-stats" device-tree property.

This patch updates pseries_root_bridge_prepare() to detect Gen3
speed buses (defined by 0x04).

Signed-off-by: Kleber Sacilotto de Souza <klebers@linux.vnet.ibm.com>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/platforms/pseries/pci.c | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'arch/powerpc/platforms')

diff --git a/arch/powerpc/platforms/pseries/pci.c b/arch/powerpc/platforms/pseries/pci.c
index a6f7a1460e2f..c413ec158ff5 100644
--- a/arch/powerpc/platforms/pseries/pci.c
+++ b/arch/powerpc/platforms/pseries/pci.c
@@ -144,6 +144,9 @@ int pseries_root_bridge_prepare(struct pci_host_bridge *bridge)
 	case 0x02:
 		bus->max_bus_speed = PCIE_SPEED_5_0GT;
 		break;
+	case 0x04:
+		bus->max_bus_speed = PCIE_SPEED_8_0GT;
+		break;
 	default:
 		bus->max_bus_speed = PCI_SPEED_UNKNOWN;
 		break;
@@ -156,6 +159,9 @@ int pseries_root_bridge_prepare(struct pci_host_bridge *bridge)
 	case 0x02:
 		bus->cur_bus_speed = PCIE_SPEED_5_0GT;
 		break;
+	case 0x04:
+		bus->cur_bus_speed = PCIE_SPEED_8_0GT;
+		break;
 	default:
 		bus->cur_bus_speed = PCI_SPEED_UNKNOWN;
 		break;
-- 
cgit v1.2.3


From 5b2e198e50f6ba57081586b853163ea1bb95f1a8 Mon Sep 17 00:00:00 2001
From: Gavin Shan <shangw@linux.vnet.ibm.com>
Date: Wed, 12 Feb 2014 15:24:54 +0800
Subject: powerpc/powernv: Rework EEH reset

When doing reset in order to recover the affected PE, we issue
hot reset on PE primary bus if it's not root bus. Otherwise, we
issue hot or fundamental reset on root port or PHB accordingly.
For the later case, we didn't cover the situation where PE only
includes root port and it potentially causes kernel crash upon
EEH error to the PE.

The patch reworks the logic of EEH reset to improve the code
readability and also avoid the kernel crash.

Cc: stable@vger.kernel.org
Reported-by: Thadeu Lima de Souza Cascardo <cascardo@linux.vnet.ibm.com>
Signed-off-by: Gavin Shan <shangw@linux.vnet.ibm.com>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/platforms/powernv/eeh-ioda.c | 29 ++++-------------------------
 1 file changed, 4 insertions(+), 25 deletions(-)

(limited to 'arch/powerpc/platforms')

diff --git a/arch/powerpc/platforms/powernv/eeh-ioda.c b/arch/powerpc/platforms/powernv/eeh-ioda.c
index e1e71618b70c..fcb79cffdb66 100644
--- a/arch/powerpc/platforms/powernv/eeh-ioda.c
+++ b/arch/powerpc/platforms/powernv/eeh-ioda.c
@@ -489,8 +489,7 @@ static int ioda_eeh_bridge_reset(struct pci_controller *hose,
 static int ioda_eeh_reset(struct eeh_pe *pe, int option)
 {
 	struct pci_controller *hose = pe->phb;
-	struct eeh_dev *edev;
-	struct pci_dev *dev;
+	struct pci_bus *bus;
 	int ret;
 
 	/*
@@ -519,31 +518,11 @@ static int ioda_eeh_reset(struct eeh_pe *pe, int option)
 	if (pe->type & EEH_PE_PHB) {
 		ret = ioda_eeh_phb_reset(hose, option);
 	} else {
-		if (pe->type & EEH_PE_DEVICE) {
-			/*
-			 * If it's device PE, we didn't refer to the parent
-			 * PCI bus yet. So we have to figure it out indirectly.
-			 */
-			edev = list_first_entry(&pe->edevs,
-					struct eeh_dev, list);
-			dev = eeh_dev_to_pci_dev(edev);
-			dev = dev->bus->self;
-		} else {
-			/*
-			 * If it's bus PE, the parent PCI bus is already there
-			 * and just pick it up.
-			 */
-			dev = pe->bus->self;
-		}
-
-		/*
-		 * Do reset based on the fact that the direct upstream bridge
-		 * is root bridge (port) or not.
-		 */
-		if (dev->bus->number == 0)
+		bus = eeh_pe_bus_get(pe);
+		if (pci_is_root_bus(bus))
 			ret = ioda_eeh_root_reset(hose, option);
 		else
-			ret = ioda_eeh_bridge_reset(hose, dev, option);
+			ret = ioda_eeh_bridge_reset(hose, bus->self, option);
 	}
 
 	return ret;
-- 
cgit v1.2.3


From 2ec5a0adf60c23bb6b0a95d3b96a8c1ff1e1aa5a Mon Sep 17 00:00:00 2001
From: Gavin Shan <shangw@linux.vnet.ibm.com>
Date: Wed, 12 Feb 2014 15:24:55 +0800
Subject: powerpc/eeh: Cleanup on eeh_subsystem_enabled

The patch cleans up variable eeh_subsystem_enabled so that we needn't
refer the variable directly from external. Instead, we will use
function eeh_enabled() and eeh_set_enable() to operate the variable.

Signed-off-by: Gavin Shan <shangw@linux.vnet.ibm.com>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/include/asm/eeh.h               | 21 +++++++++++++++++++--
 arch/powerpc/kernel/eeh.c                    | 12 ++++++------
 arch/powerpc/platforms/powernv/eeh-powernv.c |  2 +-
 arch/powerpc/platforms/pseries/eeh_pseries.c |  2 +-
 4 files changed, 27 insertions(+), 10 deletions(-)

(limited to 'arch/powerpc/platforms')

diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h
index 9e39ceb1d19f..d4dd41fb951b 100644
--- a/arch/powerpc/include/asm/eeh.h
+++ b/arch/powerpc/include/asm/eeh.h
@@ -172,10 +172,20 @@ struct eeh_ops {
 };
 
 extern struct eeh_ops *eeh_ops;
-extern int eeh_subsystem_enabled;
+extern bool eeh_subsystem_enabled;
 extern raw_spinlock_t confirm_error_lock;
 extern int eeh_probe_mode;
 
+static inline bool eeh_enabled(void)
+{
+	return eeh_subsystem_enabled;
+}
+
+static inline void eeh_set_enable(bool mode)
+{
+	eeh_subsystem_enabled = mode;
+}
+
 #define EEH_PROBE_MODE_DEV	(1<<0)	/* From PCI device	*/
 #define EEH_PROBE_MODE_DEVTREE	(1<<1)	/* From device tree	*/
 
@@ -246,7 +256,7 @@ void eeh_remove_device(struct pci_dev *);
  * If this macro yields TRUE, the caller relays to eeh_check_failure()
  * which does further tests out of line.
  */
-#define EEH_POSSIBLE_ERROR(val, type)	((val) == (type)~0 && eeh_subsystem_enabled)
+#define EEH_POSSIBLE_ERROR(val, type)	((val) == (type)~0 && eeh_enabled())
 
 /*
  * Reads from a device which has been isolated by EEH will return
@@ -257,6 +267,13 @@ void eeh_remove_device(struct pci_dev *);
 
 #else /* !CONFIG_EEH */
 
+static inline bool eeh_enabled(void)
+{
+        return false;
+}
+
+static inline void eeh_set_enable(bool mode) { }
+
 static inline int eeh_init(void)
 {
 	return 0;
diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c
index 148db72a8c43..f22f7b6f6b01 100644
--- a/arch/powerpc/kernel/eeh.c
+++ b/arch/powerpc/kernel/eeh.c
@@ -89,7 +89,7 @@
 /* Platform dependent EEH operations */
 struct eeh_ops *eeh_ops = NULL;
 
-int eeh_subsystem_enabled;
+bool eeh_subsystem_enabled = false;
 EXPORT_SYMBOL(eeh_subsystem_enabled);
 
 /*
@@ -364,7 +364,7 @@ int eeh_dev_check_failure(struct eeh_dev *edev)
 
 	eeh_stats.total_mmio_ffs++;
 
-	if (!eeh_subsystem_enabled)
+	if (!eeh_enabled())
 		return 0;
 
 	if (!edev) {
@@ -822,7 +822,7 @@ int eeh_init(void)
 			return ret;
 	}
 
-	if (eeh_subsystem_enabled)
+	if (eeh_enabled())
 		pr_info("EEH: PCI Enhanced I/O Error Handling Enabled\n");
 	else
 		pr_warning("EEH: No capable adapters found\n");
@@ -897,7 +897,7 @@ void eeh_add_device_late(struct pci_dev *dev)
 	struct device_node *dn;
 	struct eeh_dev *edev;
 
-	if (!dev || !eeh_subsystem_enabled)
+	if (!dev || !eeh_enabled())
 		return;
 
 	pr_debug("EEH: Adding device %s\n", pci_name(dev));
@@ -1005,7 +1005,7 @@ void eeh_remove_device(struct pci_dev *dev)
 {
 	struct eeh_dev *edev;
 
-	if (!dev || !eeh_subsystem_enabled)
+	if (!dev || !eeh_enabled())
 		return;
 	edev = pci_dev_to_eeh_dev(dev);
 
@@ -1045,7 +1045,7 @@ void eeh_remove_device(struct pci_dev *dev)
 
 static int proc_eeh_show(struct seq_file *m, void *v)
 {
-	if (0 == eeh_subsystem_enabled) {
+	if (!eeh_enabled()) {
 		seq_printf(m, "EEH Subsystem is globally disabled\n");
 		seq_printf(m, "eeh_total_mmio_ffs=%llu\n", eeh_stats.total_mmio_ffs);
 	} else {
diff --git a/arch/powerpc/platforms/powernv/eeh-powernv.c b/arch/powerpc/platforms/powernv/eeh-powernv.c
index a79fddc5e74e..a59788e83b8b 100644
--- a/arch/powerpc/platforms/powernv/eeh-powernv.c
+++ b/arch/powerpc/platforms/powernv/eeh-powernv.c
@@ -145,7 +145,7 @@ static int powernv_eeh_dev_probe(struct pci_dev *dev, void *flag)
 	 * Enable EEH explicitly so that we will do EEH check
 	 * while accessing I/O stuff
 	 */
-	eeh_subsystem_enabled = 1;
+	eeh_set_enable(true);
 
 	/* Save memory bars */
 	eeh_save_bars(edev);
diff --git a/arch/powerpc/platforms/pseries/eeh_pseries.c b/arch/powerpc/platforms/pseries/eeh_pseries.c
index 9ef3cc8ebc11..8a8f0472d98f 100644
--- a/arch/powerpc/platforms/pseries/eeh_pseries.c
+++ b/arch/powerpc/platforms/pseries/eeh_pseries.c
@@ -265,7 +265,7 @@ static void *pseries_eeh_of_probe(struct device_node *dn, void *flag)
 			enable = 1;
 
 		if (enable) {
-			eeh_subsystem_enabled = 1;
+			eeh_set_enable(true);
 			eeh_add_to_parent_pe(edev);
 
 			pr_debug("%s: EEH enabled on %s PHB#%d-PE#%x, config addr#%x\n",
-- 
cgit v1.2.3


From 66f9af83e56bfa12964d251df9d60fb571579913 Mon Sep 17 00:00:00 2001
From: Gavin Shan <shangw@linux.vnet.ibm.com>
Date: Wed, 12 Feb 2014 15:24:56 +0800
Subject: powerpc/eeh: Disable EEH on reboot

We possiblly detect EEH errors during reboot, particularly in kexec
path, but it's impossible for device drivers and EEH core to handle
or recover them properly.

The patch registers one reboot notifier for EEH and disable EEH
subsystem during reboot. That means the EEH errors is going to be
cleared by hardware reset or second kernel during early stage of
PCI probe.

Signed-off-by: Gavin Shan <shangw@linux.vnet.ibm.com>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/kernel/eeh.c                 | 20 ++++++++++++++++++++
 arch/powerpc/platforms/powernv/eeh-ioda.c |  3 ++-
 2 files changed, 22 insertions(+), 1 deletion(-)

(limited to 'arch/powerpc/platforms')

diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c
index f22f7b6f6b01..e7b76a6bf150 100644
--- a/arch/powerpc/kernel/eeh.c
+++ b/arch/powerpc/kernel/eeh.c
@@ -28,6 +28,7 @@
 #include <linux/pci.h>
 #include <linux/proc_fs.h>
 #include <linux/rbtree.h>
+#include <linux/reboot.h>
 #include <linux/seq_file.h>
 #include <linux/spinlock.h>
 #include <linux/export.h>
@@ -747,6 +748,17 @@ int __exit eeh_ops_unregister(const char *name)
 	return -EEXIST;
 }
 
+static int eeh_reboot_notifier(struct notifier_block *nb,
+			       unsigned long action, void *unused)
+{
+	eeh_set_enable(false);
+	return NOTIFY_DONE;
+}
+
+static struct notifier_block eeh_reboot_nb = {
+	.notifier_call = eeh_reboot_notifier,
+};
+
 /**
  * eeh_init - EEH initialization
  *
@@ -778,6 +790,14 @@ int eeh_init(void)
 	if (machine_is(powernv) && cnt++ <= 0)
 		return ret;
 
+	/* Register reboot notifier */
+	ret = register_reboot_notifier(&eeh_reboot_nb);
+	if (ret) {
+		pr_warn("%s: Failed to register notifier (%d)\n",
+			__func__, ret);
+		return ret;
+	}
+
 	/* call platform initialization function */
 	if (!eeh_ops) {
 		pr_warning("%s: Platform EEH operation not found\n",
diff --git a/arch/powerpc/platforms/powernv/eeh-ioda.c b/arch/powerpc/platforms/powernv/eeh-ioda.c
index fcb79cffdb66..f51474336460 100644
--- a/arch/powerpc/platforms/powernv/eeh-ioda.c
+++ b/arch/powerpc/platforms/powernv/eeh-ioda.c
@@ -44,7 +44,8 @@ static int ioda_eeh_event(struct notifier_block *nb,
 
 	/* We simply send special EEH event */
 	if ((changed_evts & OPAL_EVENT_PCI_ERROR) &&
-	    (events & OPAL_EVENT_PCI_ERROR))
+	    (events & OPAL_EVENT_PCI_ERROR) &&
+	    eeh_enabled())
 		eeh_send_failure_event(NULL);
 
 	return 0;
-- 
cgit v1.2.3


From 41dd03a94c7d408d2ef32530545097f7d1befe5c Mon Sep 17 00:00:00 2001
From: Tony Breeds <tony@bakeyournoodle.com>
Date: Thu, 20 Feb 2014 21:13:52 +1100
Subject: powerpc/le: Ensure that the 'stop-self' RTAS token is handled
 correctly

Currently we're storing a host endian RTAS token in
rtas_stop_self_args.token.  We then pass that directly to rtas.  This is
fine on big endian however on little endian the token is not what we
expect.

This will typically result in hitting:
	panic("Alas, I survived.\n");

To fix this we always use the stop-self token in host order and always
convert it to be32 before passing this to rtas.

Signed-off-by: Tony Breeds <tony@bakeyournoodle.com>
Cc: stable@vger.kernel.org
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/platforms/pseries/hotplug-cpu.c | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

(limited to 'arch/powerpc/platforms')

diff --git a/arch/powerpc/platforms/pseries/hotplug-cpu.c b/arch/powerpc/platforms/pseries/hotplug-cpu.c
index 82789e79e539..0ea99e3d4815 100644
--- a/arch/powerpc/platforms/pseries/hotplug-cpu.c
+++ b/arch/powerpc/platforms/pseries/hotplug-cpu.c
@@ -35,12 +35,7 @@
 #include "offline_states.h"
 
 /* This version can't take the spinlock, because it never returns */
-static struct rtas_args rtas_stop_self_args = {
-	.token = RTAS_UNKNOWN_SERVICE,
-	.nargs = 0,
-	.nret = 1,
-	.rets = &rtas_stop_self_args.args[0],
-};
+static int rtas_stop_self_token = RTAS_UNKNOWN_SERVICE;
 
 static DEFINE_PER_CPU(enum cpu_state_vals, preferred_offline_state) =
 							CPU_STATE_OFFLINE;
@@ -93,15 +88,20 @@ void set_default_offline_state(int cpu)
 
 static void rtas_stop_self(void)
 {
-	struct rtas_args *args = &rtas_stop_self_args;
+	struct rtas_args args = {
+		.token = cpu_to_be32(rtas_stop_self_token),
+		.nargs = 0,
+		.nret = 1,
+		.rets = &args.args[0],
+	};
 
 	local_irq_disable();
 
-	BUG_ON(args->token == RTAS_UNKNOWN_SERVICE);
+	BUG_ON(rtas_stop_self_token == RTAS_UNKNOWN_SERVICE);
 
 	printk("cpu %u (hwid %u) Ready to die...\n",
 	       smp_processor_id(), hard_smp_processor_id());
-	enter_rtas(__pa(args));
+	enter_rtas(__pa(&args));
 
 	panic("Alas, I survived.\n");
 }
@@ -392,10 +392,10 @@ static int __init pseries_cpu_hotplug_init(void)
 		}
 	}
 
-	rtas_stop_self_args.token = rtas_token("stop-self");
+	rtas_stop_self_token = rtas_token("stop-self");
 	qcss_tok = rtas_token("query-cpu-stopped-state");
 
-	if (rtas_stop_self_args.token == RTAS_UNKNOWN_SERVICE ||
+	if (rtas_stop_self_token == RTAS_UNKNOWN_SERVICE ||
 			qcss_tok == RTAS_UNKNOWN_SERVICE) {
 		printk(KERN_INFO "CPU Hotplug not supported by firmware "
 				"- disabling.\n");
-- 
cgit v1.2.3


From 947166043732b69878123bf31f51933ad0316080 Mon Sep 17 00:00:00 2001
From: Gavin Shan <shangw@linux.vnet.ibm.com>
Date: Tue, 25 Feb 2014 15:28:37 +0800
Subject: powerpc/powernv: Dump PHB diag-data immediately

The PHB diag-data is important to help locating the root cause for
EEH errors such as frozen PE or fenced PHB. However, the EEH core
enables IO path by clearing part of HW registers before collecting
this data causing it to be corrupted.

This patch fixes this by dumping the PHB diag-data immediately when
frozen/fenced state on PE or PHB is detected for the first time in
eeh_ops::get_state() or next_error() backend.

Signed-off-by: Gavin Shan <shangw@linux.vnet.ibm.com>
CC: <stable@vger.kernel.org>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/platforms/powernv/eeh-ioda.c | 96 ++++++++++++++-----------------
 1 file changed, 43 insertions(+), 53 deletions(-)

(limited to 'arch/powerpc/platforms')

diff --git a/arch/powerpc/platforms/powernv/eeh-ioda.c b/arch/powerpc/platforms/powernv/eeh-ioda.c
index f51474336460..253fefe3d1a0 100644
--- a/arch/powerpc/platforms/powernv/eeh-ioda.c
+++ b/arch/powerpc/platforms/powernv/eeh-ioda.c
@@ -114,6 +114,7 @@ DEFINE_SIMPLE_ATTRIBUTE(ioda_eeh_inbB_dbgfs_ops, ioda_eeh_inbB_dbgfs_get,
 			ioda_eeh_inbB_dbgfs_set, "0x%llx\n");
 #endif /* CONFIG_DEBUG_FS */
 
+
 /**
  * ioda_eeh_post_init - Chip dependent post initialization
  * @hose: PCI controller
@@ -221,6 +222,22 @@ static int ioda_eeh_set_option(struct eeh_pe *pe, int option)
 	return ret;
 }
 
+static void ioda_eeh_phb_diag(struct pci_controller *hose)
+{
+	struct pnv_phb *phb = hose->private_data;
+	long rc;
+
+	rc = opal_pci_get_phb_diag_data2(phb->opal_id, phb->diag.blob,
+					 PNV_PCI_DIAG_BUF_SIZE);
+	if (rc != OPAL_SUCCESS) {
+		pr_warning("%s: Failed to get diag-data for PHB#%x (%ld)\n",
+			    __func__, hose->global_number, rc);
+		return;
+	}
+
+	pnv_pci_dump_phb_diag_data(hose, phb->diag.blob);
+}
+
 /**
  * ioda_eeh_get_state - Retrieve the state of PE
  * @pe: EEH PE
@@ -272,6 +289,9 @@ static int ioda_eeh_get_state(struct eeh_pe *pe)
 			result |= EEH_STATE_DMA_ACTIVE;
 			result |= EEH_STATE_MMIO_ENABLED;
 			result |= EEH_STATE_DMA_ENABLED;
+		} else if (!(pe->state & EEH_PE_ISOLATED)) {
+			eeh_pe_state_mark(pe, EEH_PE_ISOLATED);
+			ioda_eeh_phb_diag(hose);
 		}
 
 		return result;
@@ -315,6 +335,15 @@ static int ioda_eeh_get_state(struct eeh_pe *pe)
 			   __func__, fstate, hose->global_number, pe_no);
 	}
 
+	/* Dump PHB diag-data for frozen PE */
+	if (result != EEH_STATE_NOT_SUPPORT &&
+	    (result & (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE)) !=
+	    (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE) &&
+	    !(pe->state & EEH_PE_ISOLATED)) {
+		eeh_pe_state_mark(pe, EEH_PE_ISOLATED);
+		ioda_eeh_phb_diag(hose);
+	}
+
 	return result;
 }
 
@@ -529,42 +558,6 @@ static int ioda_eeh_reset(struct eeh_pe *pe, int option)
 	return ret;
 }
 
-/**
- * ioda_eeh_get_log - Retrieve error log
- * @pe: EEH PE
- * @severity: Severity level of the log
- * @drv_log: buffer to store the log
- * @len: space of the log buffer
- *
- * The function is used to retrieve error log from P7IOC.
- */
-static int ioda_eeh_get_log(struct eeh_pe *pe, int severity,
-			    char *drv_log, unsigned long len)
-{
-	s64 ret;
-	unsigned long flags;
-	struct pci_controller *hose = pe->phb;
-	struct pnv_phb *phb = hose->private_data;
-
-	spin_lock_irqsave(&phb->lock, flags);
-
-	ret = opal_pci_get_phb_diag_data2(phb->opal_id,
-			phb->diag.blob, PNV_PCI_DIAG_BUF_SIZE);
-	if (ret) {
-		spin_unlock_irqrestore(&phb->lock, flags);
-		pr_warning("%s: Can't get log for PHB#%x-PE#%x (%lld)\n",
-			   __func__, hose->global_number, pe->addr, ret);
-		return -EIO;
-	}
-
-	/* The PHB diag-data is always indicative */
-	pnv_pci_dump_phb_diag_data(hose, phb->diag.blob);
-
-	spin_unlock_irqrestore(&phb->lock, flags);
-
-	return 0;
-}
-
 /**
  * ioda_eeh_configure_bridge - Configure the PCI bridges for the indicated PE
  * @pe: EEH PE
@@ -646,22 +639,6 @@ static void ioda_eeh_hub_diag(struct pci_controller *hose)
 	}
 }
 
-static void ioda_eeh_phb_diag(struct pci_controller *hose)
-{
-	struct pnv_phb *phb = hose->private_data;
-	long rc;
-
-	rc = opal_pci_get_phb_diag_data2(phb->opal_id, phb->diag.blob,
-					 PNV_PCI_DIAG_BUF_SIZE);
-	if (rc != OPAL_SUCCESS) {
-		pr_warning("%s: Failed to get diag-data for PHB#%x (%ld)\n",
-			    __func__, hose->global_number, rc);
-		return;
-	}
-
-	pnv_pci_dump_phb_diag_data(hose, phb->diag.blob);
-}
-
 static int ioda_eeh_get_phb_pe(struct pci_controller *hose,
 			       struct eeh_pe **pe)
 {
@@ -834,6 +811,20 @@ static int ioda_eeh_next_error(struct eeh_pe **pe)
 				__func__, err_type);
 		}
 
+		/*
+		 * EEH core will try recover from fenced PHB or
+		 * frozen PE. In the time for frozen PE, EEH core
+		 * enable IO path for that before collecting logs,
+		 * but it ruins the site. So we have to dump the
+		 * log in advance here.
+		 */
+		if ((ret == EEH_NEXT_ERR_FROZEN_PE  ||
+		    ret == EEH_NEXT_ERR_FENCED_PHB) &&
+		    !((*pe)->state & EEH_PE_ISOLATED)) {
+			eeh_pe_state_mark(*pe, EEH_PE_ISOLATED);
+			ioda_eeh_phb_diag(hose);
+		}
+
 		/*
 		 * If we have no errors on the specific PHB or only
 		 * informative error there, we continue poking it.
@@ -852,7 +843,6 @@ struct pnv_eeh_ops ioda_eeh_ops = {
 	.set_option		= ioda_eeh_set_option,
 	.get_state		= ioda_eeh_get_state,
 	.reset			= ioda_eeh_reset,
-	.get_log		= ioda_eeh_get_log,
 	.configure_bridge	= ioda_eeh_configure_bridge,
 	.next_error		= ioda_eeh_next_error
 };
-- 
cgit v1.2.3


From af87d2fe95444d107e0c0cf0ba7e20e6716a7bfd Mon Sep 17 00:00:00 2001
From: Gavin Shan <shangw@linux.vnet.ibm.com>
Date: Tue, 25 Feb 2014 15:28:38 +0800
Subject: powerpc/powernv: Refactor PHB diag-data dump

As Ben suggested, the patch prints PHB diag-data with multiple
fields in one line and omits the line if the fields of that
line are all zero.

With the patch applied, the PHB3 diag-data dump looks like:

PHB3 PHB#3 Diag-data (Version: 1)

  brdgCtl:     00000002
  RootSts:     0000000f 00400000 b0830008 00100147 00002000
  nFir:        0000000000000000 0030006e00000000 0000000000000000
  PhbSts:      0000001c00000000 0000000000000000
  Lem:         0000000000100000 42498e327f502eae 0000000000000000
  InAErr:      8000000000000000 8000000000000000 0402030000000000 0000000000000000
  PE[  8] A/B: 8480002b00000000 8000000000000000

[ The current diag data is so big that it overflows the printk
  buffer pretty quickly in cases when we get a handful of errors
  at once which can happen. --BenH
]

Signed-off-by: Gavin Shan <shangw@linux.vnet.ibm.com>
CC: <stable@vger.kernel.org>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/platforms/powernv/pci.c | 220 ++++++++++++++++++++---------------
 1 file changed, 125 insertions(+), 95 deletions(-)

(limited to 'arch/powerpc/platforms')

diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c
index 95633d79ef5d..8518817dcdfd 100644
--- a/arch/powerpc/platforms/powernv/pci.c
+++ b/arch/powerpc/platforms/powernv/pci.c
@@ -134,57 +134,72 @@ static void pnv_pci_dump_p7ioc_diag_data(struct pci_controller *hose,
 	pr_info("P7IOC PHB#%d Diag-data (Version: %d)\n\n",
 		hose->global_number, common->version);
 
-	pr_info("  brdgCtl:              %08x\n", data->brdgCtl);
-
-	pr_info("  portStatusReg:        %08x\n", data->portStatusReg);
-	pr_info("  rootCmplxStatus:      %08x\n", data->rootCmplxStatus);
-	pr_info("  busAgentStatus:       %08x\n", data->busAgentStatus);
-
-	pr_info("  deviceStatus:         %08x\n", data->deviceStatus);
-	pr_info("  slotStatus:           %08x\n", data->slotStatus);
-	pr_info("  linkStatus:           %08x\n", data->linkStatus);
-	pr_info("  devCmdStatus:         %08x\n", data->devCmdStatus);
-	pr_info("  devSecStatus:         %08x\n", data->devSecStatus);
-
-	pr_info("  rootErrorStatus:      %08x\n", data->rootErrorStatus);
-	pr_info("  uncorrErrorStatus:    %08x\n", data->uncorrErrorStatus);
-	pr_info("  corrErrorStatus:      %08x\n", data->corrErrorStatus);
-	pr_info("  tlpHdr1:              %08x\n", data->tlpHdr1);
-	pr_info("  tlpHdr2:              %08x\n", data->tlpHdr2);
-	pr_info("  tlpHdr3:              %08x\n", data->tlpHdr3);
-	pr_info("  tlpHdr4:              %08x\n", data->tlpHdr4);
-	pr_info("  sourceId:             %08x\n", data->sourceId);
-	pr_info("  errorClass:           %016llx\n", data->errorClass);
-	pr_info("  correlator:           %016llx\n", data->correlator);
-	pr_info("  p7iocPlssr:           %016llx\n", data->p7iocPlssr);
-	pr_info("  p7iocCsr:             %016llx\n", data->p7iocCsr);
-	pr_info("  lemFir:               %016llx\n", data->lemFir);
-	pr_info("  lemErrorMask:         %016llx\n", data->lemErrorMask);
-	pr_info("  lemWOF:               %016llx\n", data->lemWOF);
-	pr_info("  phbErrorStatus:       %016llx\n", data->phbErrorStatus);
-	pr_info("  phbFirstErrorStatus:  %016llx\n", data->phbFirstErrorStatus);
-	pr_info("  phbErrorLog0:         %016llx\n", data->phbErrorLog0);
-	pr_info("  phbErrorLog1:         %016llx\n", data->phbErrorLog1);
-	pr_info("  mmioErrorStatus:      %016llx\n", data->mmioErrorStatus);
-	pr_info("  mmioFirstErrorStatus: %016llx\n", data->mmioFirstErrorStatus);
-	pr_info("  mmioErrorLog0:        %016llx\n", data->mmioErrorLog0);
-	pr_info("  mmioErrorLog1:        %016llx\n", data->mmioErrorLog1);
-	pr_info("  dma0ErrorStatus:      %016llx\n", data->dma0ErrorStatus);
-	pr_info("  dma0FirstErrorStatus: %016llx\n", data->dma0FirstErrorStatus);
-	pr_info("  dma0ErrorLog0:        %016llx\n", data->dma0ErrorLog0);
-	pr_info("  dma0ErrorLog1:        %016llx\n", data->dma0ErrorLog1);
-	pr_info("  dma1ErrorStatus:      %016llx\n", data->dma1ErrorStatus);
-	pr_info("  dma1FirstErrorStatus: %016llx\n", data->dma1FirstErrorStatus);
-	pr_info("  dma1ErrorLog0:        %016llx\n", data->dma1ErrorLog0);
-	pr_info("  dma1ErrorLog1:        %016llx\n", data->dma1ErrorLog1);
+	if (data->brdgCtl)
+		pr_info("  brdgCtl:     %08x\n",
+			data->brdgCtl);
+	if (data->portStatusReg || data->rootCmplxStatus ||
+	    data->busAgentStatus)
+		pr_info("  UtlSts:      %08x %08x %08x\n",
+			data->portStatusReg, data->rootCmplxStatus,
+			data->busAgentStatus);
+	if (data->deviceStatus || data->slotStatus   ||
+	    data->linkStatus   || data->devCmdStatus ||
+	    data->devSecStatus)
+		pr_info("  RootSts:     %08x %08x %08x %08x %08x\n",
+			data->deviceStatus, data->slotStatus,
+			data->linkStatus, data->devCmdStatus,
+			data->devSecStatus);
+	if (data->rootErrorStatus   || data->uncorrErrorStatus ||
+	    data->corrErrorStatus)
+		pr_info("  RootErrSts:  %08x %08x %08x\n",
+			data->rootErrorStatus, data->uncorrErrorStatus,
+			data->corrErrorStatus);
+	if (data->tlpHdr1 || data->tlpHdr2 ||
+	    data->tlpHdr3 || data->tlpHdr4)
+		pr_info("  RootErrLog:  %08x %08x %08x %08x\n",
+			data->tlpHdr1, data->tlpHdr2,
+			data->tlpHdr3, data->tlpHdr4);
+	if (data->sourceId || data->errorClass ||
+	    data->correlator)
+		pr_info("  RootErrLog1: %08x %016llx %016llx\n",
+			data->sourceId, data->errorClass,
+			data->correlator);
+	if (data->p7iocPlssr || data->p7iocCsr)
+		pr_info("  PhbSts:      %016llx %016llx\n",
+			data->p7iocPlssr, data->p7iocCsr);
+	if (data->lemFir || data->lemErrorMask ||
+	    data->lemWOF)
+		pr_info("  Lem:         %016llx %016llx %016llx\n",
+			data->lemFir, data->lemErrorMask,
+			data->lemWOF);
+	if (data->phbErrorStatus || data->phbFirstErrorStatus ||
+	    data->phbErrorLog0   || data->phbErrorLog1)
+		pr_info("  PhbErr:      %016llx %016llx %016llx %016llx\n",
+			data->phbErrorStatus, data->phbFirstErrorStatus,
+			data->phbErrorLog0, data->phbErrorLog1);
+	if (data->mmioErrorStatus || data->mmioFirstErrorStatus ||
+	    data->mmioErrorLog0   || data->mmioErrorLog1)
+		pr_info("  OutErr:      %016llx %016llx %016llx %016llx\n",
+			data->mmioErrorStatus, data->mmioFirstErrorStatus,
+			data->mmioErrorLog0, data->mmioErrorLog1);
+	if (data->dma0ErrorStatus || data->dma0FirstErrorStatus ||
+	    data->dma0ErrorLog0   || data->dma0ErrorLog1)
+		pr_info("  InAErr:      %016llx %016llx %016llx %016llx\n",
+			data->dma0ErrorStatus, data->dma0FirstErrorStatus,
+			data->dma0ErrorLog0, data->dma0ErrorLog1);
+	if (data->dma1ErrorStatus || data->dma1FirstErrorStatus ||
+	    data->dma1ErrorLog0   || data->dma1ErrorLog1)
+		pr_info("  InBErr:      %016llx %016llx %016llx %016llx\n",
+			data->dma1ErrorStatus, data->dma1FirstErrorStatus,
+			data->dma1ErrorLog0, data->dma1ErrorLog1);
 
 	for (i = 0; i < OPAL_P7IOC_NUM_PEST_REGS; i++) {
 		if ((data->pestA[i] >> 63) == 0 &&
 		    (data->pestB[i] >> 63) == 0)
 			continue;
 
-		pr_info("  PE[%3d] PESTA:        %016llx\n", i, data->pestA[i]);
-		pr_info("          PESTB:        %016llx\n", data->pestB[i]);
+		pr_info("  PE[%3d] A/B: %016llx %016llx\n",
+			i, data->pestA[i], data->pestB[i]);
 	}
 }
 
@@ -197,62 +212,77 @@ static void pnv_pci_dump_phb3_diag_data(struct pci_controller *hose,
 	data = (struct OpalIoPhb3ErrorData*)common;
 	pr_info("PHB3 PHB#%d Diag-data (Version: %d)\n\n",
 		hose->global_number, common->version);
-
-	pr_info("  brdgCtl:              %08x\n", data->brdgCtl);
-
-	pr_info("  portStatusReg:        %08x\n", data->portStatusReg);
-	pr_info("  rootCmplxStatus:      %08x\n", data->rootCmplxStatus);
-	pr_info("  busAgentStatus:       %08x\n", data->busAgentStatus);
-
-	pr_info("  deviceStatus:         %08x\n", data->deviceStatus);
-	pr_info("  slotStatus:           %08x\n", data->slotStatus);
-	pr_info("  linkStatus:           %08x\n", data->linkStatus);
-	pr_info("  devCmdStatus:         %08x\n", data->devCmdStatus);
-	pr_info("  devSecStatus:         %08x\n", data->devSecStatus);
-
-	pr_info("  rootErrorStatus:      %08x\n", data->rootErrorStatus);
-	pr_info("  uncorrErrorStatus:    %08x\n", data->uncorrErrorStatus);
-	pr_info("  corrErrorStatus:      %08x\n", data->corrErrorStatus);
-	pr_info("  tlpHdr1:              %08x\n", data->tlpHdr1);
-	pr_info("  tlpHdr2:              %08x\n", data->tlpHdr2);
-	pr_info("  tlpHdr3:              %08x\n", data->tlpHdr3);
-	pr_info("  tlpHdr4:              %08x\n", data->tlpHdr4);
-	pr_info("  sourceId:             %08x\n", data->sourceId);
-	pr_info("  errorClass:           %016llx\n", data->errorClass);
-	pr_info("  correlator:           %016llx\n", data->correlator);
-
-	pr_info("  nFir:                 %016llx\n", data->nFir);
-	pr_info("  nFirMask:             %016llx\n", data->nFirMask);
-	pr_info("  nFirWOF:              %016llx\n", data->nFirWOF);
-	pr_info("  PhbPlssr:             %016llx\n", data->phbPlssr);
-	pr_info("  PhbCsr:               %016llx\n", data->phbCsr);
-	pr_info("  lemFir:               %016llx\n", data->lemFir);
-	pr_info("  lemErrorMask:         %016llx\n", data->lemErrorMask);
-	pr_info("  lemWOF:               %016llx\n", data->lemWOF);
-	pr_info("  phbErrorStatus:       %016llx\n", data->phbErrorStatus);
-	pr_info("  phbFirstErrorStatus:  %016llx\n", data->phbFirstErrorStatus);
-	pr_info("  phbErrorLog0:         %016llx\n", data->phbErrorLog0);
-	pr_info("  phbErrorLog1:         %016llx\n", data->phbErrorLog1);
-	pr_info("  mmioErrorStatus:      %016llx\n", data->mmioErrorStatus);
-	pr_info("  mmioFirstErrorStatus: %016llx\n", data->mmioFirstErrorStatus);
-	pr_info("  mmioErrorLog0:        %016llx\n", data->mmioErrorLog0);
-	pr_info("  mmioErrorLog1:        %016llx\n", data->mmioErrorLog1);
-	pr_info("  dma0ErrorStatus:      %016llx\n", data->dma0ErrorStatus);
-	pr_info("  dma0FirstErrorStatus: %016llx\n", data->dma0FirstErrorStatus);
-	pr_info("  dma0ErrorLog0:        %016llx\n", data->dma0ErrorLog0);
-	pr_info("  dma0ErrorLog1:        %016llx\n", data->dma0ErrorLog1);
-	pr_info("  dma1ErrorStatus:      %016llx\n", data->dma1ErrorStatus);
-	pr_info("  dma1FirstErrorStatus: %016llx\n", data->dma1FirstErrorStatus);
-	pr_info("  dma1ErrorLog0:        %016llx\n", data->dma1ErrorLog0);
-	pr_info("  dma1ErrorLog1:        %016llx\n", data->dma1ErrorLog1);
+	if (data->brdgCtl)
+		pr_info("  brdgCtl:     %08x\n",
+			data->brdgCtl);
+	if (data->portStatusReg || data->rootCmplxStatus ||
+	    data->busAgentStatus)
+		pr_info("  UtlSts:      %08x %08x %08x\n",
+			data->portStatusReg, data->rootCmplxStatus,
+			data->busAgentStatus);
+	if (data->deviceStatus || data->slotStatus   ||
+	    data->linkStatus   || data->devCmdStatus ||
+	    data->devSecStatus)
+		pr_info("  RootSts:     %08x %08x %08x %08x %08x\n",
+			data->deviceStatus, data->slotStatus,
+			data->linkStatus, data->devCmdStatus,
+			data->devSecStatus);
+	if (data->rootErrorStatus || data->uncorrErrorStatus ||
+	    data->corrErrorStatus)
+		pr_info("  RootErrSts:  %08x %08x %08x\n",
+			data->rootErrorStatus, data->uncorrErrorStatus,
+			data->corrErrorStatus);
+	if (data->tlpHdr1 || data->tlpHdr2 ||
+	    data->tlpHdr3 || data->tlpHdr4)
+		pr_info("  RootErrLog:  %08x %08x %08x %08x\n",
+			data->tlpHdr1, data->tlpHdr2,
+			data->tlpHdr3, data->tlpHdr4);
+	if (data->sourceId || data->errorClass ||
+	    data->correlator)
+		pr_info("  RootErrLog1: %08x %016llx %016llx\n",
+			data->sourceId, data->errorClass,
+			data->correlator);
+	if (data->nFir || data->nFirMask ||
+	    data->nFirWOF)
+		pr_info("  nFir:        %016llx %016llx %016llx\n",
+			data->nFir, data->nFirMask,
+			data->nFirWOF);
+	if (data->phbPlssr || data->phbCsr)
+		pr_info("  PhbSts:      %016llx %016llx\n",
+			data->phbPlssr, data->phbCsr);
+	if (data->lemFir || data->lemErrorMask ||
+	    data->lemWOF)
+		pr_info("  Lem:         %016llx %016llx %016llx\n",
+			data->lemFir, data->lemErrorMask,
+			data->lemWOF);
+	if (data->phbErrorStatus || data->phbFirstErrorStatus ||
+	    data->phbErrorLog0   || data->phbErrorLog1)
+		pr_info("  PhbErr:      %016llx %016llx %016llx %016llx\n",
+			data->phbErrorStatus, data->phbFirstErrorStatus,
+			data->phbErrorLog0, data->phbErrorLog1);
+	if (data->mmioErrorStatus || data->mmioFirstErrorStatus ||
+	    data->mmioErrorLog0   || data->mmioErrorLog1)
+		pr_info("  OutErr:      %016llx %016llx %016llx %016llx\n",
+			data->mmioErrorStatus, data->mmioFirstErrorStatus,
+			data->mmioErrorLog0, data->mmioErrorLog1);
+	if (data->dma0ErrorStatus || data->dma0FirstErrorStatus ||
+	    data->dma0ErrorLog0   || data->dma0ErrorLog1)
+		pr_info("  InAErr:      %016llx %016llx %016llx %016llx\n",
+			data->dma0ErrorStatus, data->dma0FirstErrorStatus,
+			data->dma0ErrorLog0, data->dma0ErrorLog1);
+	if (data->dma1ErrorStatus || data->dma1FirstErrorStatus ||
+	    data->dma1ErrorLog0   || data->dma1ErrorLog1)
+		pr_info("  InBErr:      %016llx %016llx %016llx %016llx\n",
+			data->dma1ErrorStatus, data->dma1FirstErrorStatus,
+			data->dma1ErrorLog0, data->dma1ErrorLog1);
 
 	for (i = 0; i < OPAL_PHB3_NUM_PEST_REGS; i++) {
 		if ((data->pestA[i] >> 63) == 0 &&
 		    (data->pestB[i] >> 63) == 0)
 			continue;
 
-		pr_info("  PE[%3d] PESTA:        %016llx\n", i, data->pestA[i]);
-		pr_info("          PESTB:        %016llx\n", data->pestB[i]);
+		pr_info("  PE[%3d] A/B: %016llx %016llx\n",
+			i, data->pestA[i], data->pestB[i]);
 	}
 }
 
-- 
cgit v1.2.3


From e0cf957614976896111e676e5134ac98ee227d3d Mon Sep 17 00:00:00 2001
From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Date: Fri, 28 Feb 2014 16:20:38 +1100
Subject: powerpc/powernv: Fix indirect XSCOM unmangling

We need to unmangle the full address, not just the register
number, and we also need to support the real indirect bit
being set for in-kernel uses.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
CC: <stable@vger.kernel.org> [v3.13]
---
 arch/powerpc/platforms/powernv/opal-xscom.c | 21 ++++++++++++---------
 1 file changed, 12 insertions(+), 9 deletions(-)

(limited to 'arch/powerpc/platforms')

diff --git a/arch/powerpc/platforms/powernv/opal-xscom.c b/arch/powerpc/platforms/powernv/opal-xscom.c
index 4fbf276ac99e..4cd2ea6c0dbe 100644
--- a/arch/powerpc/platforms/powernv/opal-xscom.c
+++ b/arch/powerpc/platforms/powernv/opal-xscom.c
@@ -71,11 +71,11 @@ static int opal_xscom_err_xlate(int64_t rc)
 	}
 }
 
-static u64 opal_scom_unmangle(u64 reg)
+static u64 opal_scom_unmangle(u64 addr)
 {
 	/*
 	 * XSCOM indirect addresses have the top bit set. Additionally
-	 * the reset of the top 3 nibbles is always 0.
+	 * the rest of the top 3 nibbles is always 0.
 	 *
 	 * Because the debugfs interface uses signed offsets and shifts
 	 * the address left by 3, we basically cannot use the top 4 bits
@@ -86,10 +86,13 @@ static u64 opal_scom_unmangle(u64 reg)
 	 * conversion here. To leave room for further xscom address
 	 * expansion, we only clear out the top byte
 	 *
+	 * For in-kernel use, we also support the real indirect bit, so
+	 * we test for any of the top 5 bits
+	 *
 	 */
-	if (reg & (1ull << 59))
-		reg = (reg & ~(0xffull << 56)) | (1ull << 63);
-	return reg;
+	if (addr & (0x1full << 59))
+		addr = (addr & ~(0xffull << 56)) | (1ull << 63);
+	return addr;
 }
 
 static int opal_scom_read(scom_map_t map, u64 reg, u64 *value)
@@ -98,8 +101,8 @@ static int opal_scom_read(scom_map_t map, u64 reg, u64 *value)
 	int64_t rc;
 	__be64 v;
 
-	reg = opal_scom_unmangle(reg);
-	rc = opal_xscom_read(m->chip, m->addr + reg, (__be64 *)__pa(&v));
+	reg = opal_scom_unmangle(m->addr + reg);
+	rc = opal_xscom_read(m->chip, reg, (__be64 *)__pa(&v));
 	*value = be64_to_cpu(v);
 	return opal_xscom_err_xlate(rc);
 }
@@ -109,8 +112,8 @@ static int opal_scom_write(scom_map_t map, u64 reg, u64 value)
 	struct opal_scom_map *m = map;
 	int64_t rc;
 
-	reg = opal_scom_unmangle(reg);
-	rc = opal_xscom_write(m->chip, m->addr + reg, value);
+	reg = opal_scom_unmangle(m->addr + reg);
+	rc = opal_xscom_write(m->chip, reg, value);
 	return opal_xscom_err_xlate(rc);
 }
 
-- 
cgit v1.2.3