summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--arch/powerpc/Kconfig9
-rw-r--r--arch/powerpc/boot/.gitignore1
-rw-r--r--arch/powerpc/boot/dts/kilauea.dts2
-rw-r--r--arch/powerpc/boot/dts/virtex440-ml507.dts2
-rw-r--r--arch/powerpc/include/asm/bitops.h5
-rw-r--r--arch/powerpc/include/asm/cache.h14
-rw-r--r--arch/powerpc/include/asm/code-patching.h7
-rw-r--r--arch/powerpc/include/asm/cputable.h12
-rw-r--r--arch/powerpc/include/asm/exception-64s.h21
-rw-r--r--arch/powerpc/include/asm/hardirq.h3
-rw-r--r--arch/powerpc/include/asm/io.h16
-rw-r--r--arch/powerpc/include/asm/iommu.h54
-rw-r--r--arch/powerpc/include/asm/lppaca.h2
-rw-r--r--arch/powerpc/include/asm/mce.h198
-rw-r--r--arch/powerpc/include/asm/opal.h98
-rw-r--r--arch/powerpc/include/asm/paca.h9
-rw-r--r--arch/powerpc/include/asm/pgtable.h66
-rw-r--r--arch/powerpc/include/asm/pte-hash64.h8
-rw-r--r--arch/powerpc/kernel/Makefile1
-rw-r--r--arch/powerpc/kernel/asm-offsets.c4
-rw-r--r--arch/powerpc/kernel/cpu_setup_power.S38
-rw-r--r--arch/powerpc/kernel/cputable.c16
-rw-r--r--arch/powerpc/kernel/dma-iommu.c4
-rw-r--r--arch/powerpc/kernel/eeh.c2
-rw-r--r--arch/powerpc/kernel/eeh_driver.c2
-rw-r--r--arch/powerpc/kernel/entry_64.S5
-rw-r--r--arch/powerpc/kernel/exceptions-64s.S242
-rw-r--r--arch/powerpc/kernel/idle_power7.S1
-rw-r--r--arch/powerpc/kernel/iommu.c136
-rw-r--r--arch/powerpc/kernel/irq.c12
-rw-r--r--arch/powerpc/kernel/mce.c345
-rw-r--r--arch/powerpc/kernel/mce_power.c284
-rw-r--r--arch/powerpc/kernel/misc_32.S4
-rw-r--r--arch/powerpc/kernel/misc_64.S6
-rw-r--r--arch/powerpc/kernel/paca.c32
-rw-r--r--arch/powerpc/kernel/setup_64.c16
-rw-r--r--arch/powerpc/kernel/smp.c9
-rw-r--r--arch/powerpc/kernel/sysfs.c72
-rw-r--r--arch/powerpc/kernel/time.c3
-rw-r--r--arch/powerpc/kernel/traps.c15
-rw-r--r--arch/powerpc/kernel/vio.c31
-rw-r--r--arch/powerpc/kvm/book3s_hv_ras.c50
-rw-r--r--arch/powerpc/lib/code-patching.c15
-rw-r--r--arch/powerpc/mm/hash_low_64.S15
-rw-r--r--arch/powerpc/mm/hash_utils_64.c7
-rw-r--r--arch/powerpc/mm/hugepage-hash64.c6
-rw-r--r--arch/powerpc/mm/hugetlbpage-hash64.c4
-rw-r--r--arch/powerpc/mm/pgtable.c2
-rw-r--r--arch/powerpc/mm/pgtable_64.c2
-rw-r--r--arch/powerpc/mm/tlb_nohash.c19
-rw-r--r--arch/powerpc/platforms/83xx/Kconfig1
-rw-r--r--arch/powerpc/platforms/8xx/Kconfig1
-rw-r--r--arch/powerpc/platforms/Kconfig.cputype1
-rw-r--r--arch/powerpc/platforms/cell/beat_htab.c4
-rw-r--r--arch/powerpc/platforms/cell/iommu.c14
-rw-r--r--arch/powerpc/platforms/pasemi/iommu.c5
-rw-r--r--arch/powerpc/platforms/powernv/Kconfig5
-rw-r--r--arch/powerpc/platforms/powernv/Makefile1
-rw-r--r--arch/powerpc/platforms/powernv/eeh-ioda.c154
-rw-r--r--arch/powerpc/platforms/powernv/opal-flash.c31
-rw-r--r--arch/powerpc/platforms/powernv/opal-memory-errors.c146
-rw-r--r--arch/powerpc/platforms/powernv/opal-rtc.c6
-rw-r--r--arch/powerpc/platforms/powernv/opal-wrappers.S2
-rw-r--r--arch/powerpc/platforms/powernv/opal.c251
-rw-r--r--arch/powerpc/platforms/powernv/pci-ioda.c8
-rw-r--r--arch/powerpc/platforms/powernv/pci-p5ioc2.c2
-rw-r--r--arch/powerpc/platforms/powernv/pci.c230
-rw-r--r--arch/powerpc/platforms/powernv/pci.h3
-rw-r--r--arch/powerpc/platforms/ps3/spu.c2
-rw-r--r--arch/powerpc/platforms/pseries/Kconfig2
-rw-r--r--arch/powerpc/platforms/pseries/iommu.c18
-rw-r--r--arch/powerpc/platforms/pseries/lpar.c5
-rw-r--r--arch/powerpc/platforms/pseries/setup.c4
-rw-r--r--arch/powerpc/platforms/wsp/wsp_pci.c11
-rw-r--r--arch/powerpc/sysdev/Kconfig2
-rw-r--r--arch/powerpc/sysdev/fsl_pci.c2
-rw-r--r--arch/powerpc/xmon/xmon.c4
-rw-r--r--drivers/macintosh/windfarm_lm75_sensor.c2
-rw-r--r--drivers/macintosh/windfarm_max6690_sensor.c2
-rw-r--r--drivers/net/ethernet/ibm/ibmveth.c12
-rw-r--r--drivers/vfio/vfio_iommu_spapr_tce.c28
-rw-r--r--include/linux/mm.h2
-rw-r--r--mm/mempolicy.c5
83 files changed, 2206 insertions, 687 deletions
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index b44b52c0a8f0..f0a893142cee 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -209,9 +209,6 @@ config DEFAULT_UIMAGE
Used to allow a board to specify it wants a uImage built by default
default n
-config REDBOOT
- bool
-
config ARCH_HIBERNATION_POSSIBLE
bool
default y
@@ -379,6 +376,12 @@ config ARCH_HAS_WALK_MEMORY
config ARCH_ENABLE_MEMORY_HOTREMOVE
def_bool y
+config PPC64_SUPPORTS_MEMORY_FAILURE
+ bool "Add support for memory hwpoison"
+ depends on PPC_BOOK3S_64
+ default "y" if PPC_POWERNV
+ select ARCH_SUPPORTS_MEMORY_FAILURE
+
config KEXEC
bool "kexec system call"
depends on (PPC_BOOK3S || FSL_BOOKE || (44x && !SMP))
diff --git a/arch/powerpc/boot/.gitignore b/arch/powerpc/boot/.gitignore
index 554734ff302e..d61c03525777 100644
--- a/arch/powerpc/boot/.gitignore
+++ b/arch/powerpc/boot/.gitignore
@@ -16,6 +16,7 @@ mktree
uImage
cuImage.*
dtbImage.*
+*.dtb
treeImage.*
zImage
zImage.initrd
diff --git a/arch/powerpc/boot/dts/kilauea.dts b/arch/powerpc/boot/dts/kilauea.dts
index 1613d6e4049e..5ba7f01e2a29 100644
--- a/arch/powerpc/boot/dts/kilauea.dts
+++ b/arch/powerpc/boot/dts/kilauea.dts
@@ -406,7 +406,7 @@
MSI: ppc4xx-msi@C10000000 {
compatible = "amcc,ppc4xx-msi", "ppc4xx-msi";
- reg = < 0x0 0xEF620000 0x100>;
+ reg = <0xEF620000 0x100>;
sdr-base = <0x4B0>;
msi-data = <0x00000000>;
msi-mask = <0x44440000>;
diff --git a/arch/powerpc/boot/dts/virtex440-ml507.dts b/arch/powerpc/boot/dts/virtex440-ml507.dts
index fc7073bc547e..391a4e299783 100644
--- a/arch/powerpc/boot/dts/virtex440-ml507.dts
+++ b/arch/powerpc/boot/dts/virtex440-ml507.dts
@@ -257,6 +257,8 @@
#size-cells = <1>;
compatible = "xlnx,compound";
ethernet@81c00000 {
+ #address-cells = <1>;
+ #size-cells = <0>;
compatible = "xlnx,xps-ll-temac-1.01.b";
device_type = "network";
interrupt-parent = <&xps_intc_0>;
diff --git a/arch/powerpc/include/asm/bitops.h b/arch/powerpc/include/asm/bitops.h
index 910194e9a1e2..a5e9a7d494d8 100644
--- a/arch/powerpc/include/asm/bitops.h
+++ b/arch/powerpc/include/asm/bitops.h
@@ -46,6 +46,11 @@
#include <asm/asm-compat.h>
#include <asm/synch.h>
+/* PPC bit number conversion */
+#define PPC_BITLSHIFT(be) (BITS_PER_LONG - 1 - (be))
+#define PPC_BIT(bit) (1UL << PPC_BITLSHIFT(bit))
+#define PPC_BITMASK(bs, be) ((PPC_BIT(bs) - PPC_BIT(be)) | PPC_BIT(bs))
+
/*
* clear_bit doesn't imply a memory barrier
*/
diff --git a/arch/powerpc/include/asm/cache.h b/arch/powerpc/include/asm/cache.h
index 9e495c9a6a88..ed0afc1e44a4 100644
--- a/arch/powerpc/include/asm/cache.h
+++ b/arch/powerpc/include/asm/cache.h
@@ -41,8 +41,20 @@ struct ppc64_caches {
extern struct ppc64_caches ppc64_caches;
#endif /* __powerpc64__ && ! __ASSEMBLY__ */
-#if !defined(__ASSEMBLY__)
+#if defined(__ASSEMBLY__)
+/*
+ * For a snooping icache, we still need a dummy icbi to purge all the
+ * prefetched instructions from the ifetch buffers. We also need a sync
+ * before the icbi to order the the actual stores to memory that might
+ * have modified instructions with the icbi.
+ */
+#define PURGE_PREFETCHED_INS \
+ sync; \
+ icbi 0,r3; \
+ sync; \
+ isync
+#else
#define __read_mostly __attribute__((__section__(".data..read_mostly")))
#ifdef CONFIG_6xx
diff --git a/arch/powerpc/include/asm/code-patching.h b/arch/powerpc/include/asm/code-patching.h
index a6f8c7a5cbb7..97e02f985df8 100644
--- a/arch/powerpc/include/asm/code-patching.h
+++ b/arch/powerpc/include/asm/code-patching.h
@@ -34,6 +34,13 @@ int instr_is_branch_to_addr(const unsigned int *instr, unsigned long addr);
unsigned long branch_target(const unsigned int *instr);
unsigned int translate_branch(const unsigned int *dest,
const unsigned int *src);
+#ifdef CONFIG_PPC_BOOK3E_64
+void __patch_exception(int exc, unsigned long addr);
+#define patch_exception(exc, name) do { \
+ extern unsigned int name; \
+ __patch_exception((exc), (unsigned long)&name); \
+} while (0)
+#endif
static inline unsigned long ppc_function_entry(void *func)
{
diff --git a/arch/powerpc/include/asm/cputable.h b/arch/powerpc/include/asm/cputable.h
index 0d4939ba48e7..617cc767c076 100644
--- a/arch/powerpc/include/asm/cputable.h
+++ b/arch/powerpc/include/asm/cputable.h
@@ -90,6 +90,18 @@ struct cpu_spec {
* if the error is fatal, 1 if it was fully recovered and 0 to
* pass up (not CPU originated) */
int (*machine_check)(struct pt_regs *regs);
+
+ /*
+ * Processor specific early machine check handler which is
+ * called in real mode to handle SLB and TLB errors.
+ */
+ long (*machine_check_early)(struct pt_regs *regs);
+
+ /*
+ * Processor specific routine to flush tlbs.
+ */
+ void (*flush_tlb)(unsigned long inval_selector);
+
};
extern struct cpu_spec *cur_cpu_spec;
diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h
index 243ce69ad685..66830618cc19 100644
--- a/arch/powerpc/include/asm/exception-64s.h
+++ b/arch/powerpc/include/asm/exception-64s.h
@@ -301,9 +301,12 @@ do_kvm_##n: \
beq 4f; /* if from kernel mode */ \
ACCOUNT_CPU_USER_ENTRY(r9, r10); \
SAVE_PPR(area, r9, r10); \
-4: std r2,GPR2(r1); /* save r2 in stackframe */ \
- SAVE_4GPRS(3, r1); /* save r3 - r6 in stackframe */ \
- SAVE_2GPRS(7, r1); /* save r7, r8 in stackframe */ \
+4: EXCEPTION_PROLOG_COMMON_2(area) \
+ EXCEPTION_PROLOG_COMMON_3(n) \
+ ACCOUNT_STOLEN_TIME
+
+/* Save original regs values from save area to stack frame. */
+#define EXCEPTION_PROLOG_COMMON_2(area) \
ld r9,area+EX_R9(r13); /* move r9, r10 to stackframe */ \
ld r10,area+EX_R10(r13); \
std r9,GPR9(r1); \
@@ -318,11 +321,16 @@ do_kvm_##n: \
ld r10,area+EX_CFAR(r13); \
std r10,ORIG_GPR3(r1); \
END_FTR_SECTION_NESTED(CPU_FTR_CFAR, CPU_FTR_CFAR, 66); \
+ GET_CTR(r10, area); \
+ std r10,_CTR(r1);
+
+#define EXCEPTION_PROLOG_COMMON_3(n) \
+ std r2,GPR2(r1); /* save r2 in stackframe */ \
+ SAVE_4GPRS(3, r1); /* save r3 - r6 in stackframe */ \
+ SAVE_2GPRS(7, r1); /* save r7, r8 in stackframe */ \
mflr r9; /* Get LR, later save to stack */ \
ld r2,PACATOC(r13); /* get kernel TOC into r2 */ \
std r9,_LINK(r1); \
- GET_CTR(r10, area); \
- std r10,_CTR(r1); \
lbz r10,PACASOFTIRQEN(r13); \
mfspr r11,SPRN_XER; /* save XER in stackframe */ \
std r10,SOFTE(r1); \
@@ -332,8 +340,7 @@ do_kvm_##n: \
li r10,0; \
ld r11,exception_marker@toc(r2); \
std r10,RESULT(r1); /* clear regs->result */ \
- std r11,STACK_FRAME_OVERHEAD-16(r1); /* mark the frame */ \
- ACCOUNT_STOLEN_TIME
+ std r11,STACK_FRAME_OVERHEAD-16(r1); /* mark the frame */
/*
* Exception vectors.
diff --git a/arch/powerpc/include/asm/hardirq.h b/arch/powerpc/include/asm/hardirq.h
index 3bdcfce2c42a..418fb654370d 100644
--- a/arch/powerpc/include/asm/hardirq.h
+++ b/arch/powerpc/include/asm/hardirq.h
@@ -6,7 +6,8 @@
typedef struct {
unsigned int __softirq_pending;
- unsigned int timer_irqs;
+ unsigned int timer_irqs_event;
+ unsigned int timer_irqs_others;
unsigned int pmu_irqs;
unsigned int mce_exceptions;
unsigned int spurious_irqs;
diff --git a/arch/powerpc/include/asm/io.h b/arch/powerpc/include/asm/io.h
index 575fbf81fad0..97d3869991ca 100644
--- a/arch/powerpc/include/asm/io.h
+++ b/arch/powerpc/include/asm/io.h
@@ -191,8 +191,24 @@ DEF_MMIO_OUT_D(out_le32, 32, stw);
#endif /* __BIG_ENDIAN */
+/*
+ * Cache inhibitied accessors for use in real mode, you don't want to use these
+ * unless you know what you're doing.
+ *
+ * NB. These use the cpu byte ordering.
+ */
+DEF_MMIO_OUT_X(out_rm8, 8, stbcix);
+DEF_MMIO_OUT_X(out_rm16, 16, sthcix);
+DEF_MMIO_OUT_X(out_rm32, 32, stwcix);
+DEF_MMIO_IN_X(in_rm8, 8, lbzcix);
+DEF_MMIO_IN_X(in_rm16, 16, lhzcix);
+DEF_MMIO_IN_X(in_rm32, 32, lwzcix);
+
#ifdef __powerpc64__
+DEF_MMIO_OUT_X(out_rm64, 64, stdcix);
+DEF_MMIO_IN_X(in_rm64, 64, ldcix);
+
#ifdef __BIG_ENDIAN__
DEF_MMIO_OUT_D(out_be64, 64, std);
DEF_MMIO_IN_D(in_be64, 64, ld);
diff --git a/arch/powerpc/include/asm/iommu.h b/arch/powerpc/include/asm/iommu.h
index c34656a8925e..f7a8036579b5 100644
--- a/arch/powerpc/include/asm/iommu.h
+++ b/arch/powerpc/include/asm/iommu.h
@@ -30,22 +30,19 @@
#include <asm/machdep.h>
#include <asm/types.h>
-#define IOMMU_PAGE_SHIFT 12
-#define IOMMU_PAGE_SIZE (ASM_CONST(1) << IOMMU_PAGE_SHIFT)
-#define IOMMU_PAGE_MASK (~((1 << IOMMU_PAGE_SHIFT) - 1))
-#define IOMMU_PAGE_ALIGN(addr) _ALIGN_UP(addr, IOMMU_PAGE_SIZE)
+#define IOMMU_PAGE_SHIFT_4K 12
+#define IOMMU_PAGE_SIZE_4K (ASM_CONST(1) << IOMMU_PAGE_SHIFT_4K)
+#define IOMMU_PAGE_MASK_4K (~((1 << IOMMU_PAGE_SHIFT_4K) - 1))
+#define IOMMU_PAGE_ALIGN_4K(addr) _ALIGN_UP(addr, IOMMU_PAGE_SIZE_4K)
+
+#define IOMMU_PAGE_SIZE(tblptr) (ASM_CONST(1) << (tblptr)->it_page_shift)
+#define IOMMU_PAGE_MASK(tblptr) (~((1 << (tblptr)->it_page_shift) - 1))
+#define IOMMU_PAGE_ALIGN(addr, tblptr) _ALIGN_UP(addr, IOMMU_PAGE_SIZE(tblptr))
/* Boot time flags */
extern int iommu_is_off;
extern int iommu_force_on;
-/* Pure 2^n version of get_order */
-static __inline__ __attribute_const__ int get_iommu_order(unsigned long size)
-{
- return __ilog2((size - 1) >> IOMMU_PAGE_SHIFT) + 1;
-}
-
-
/*
* IOMAP_MAX_ORDER defines the largest contiguous block
* of dma space we can get. IOMAP_MAX_ORDER = 13
@@ -76,11 +73,20 @@ struct iommu_table {
struct iommu_pool large_pool;
struct iommu_pool pools[IOMMU_NR_POOLS];
unsigned long *it_map; /* A simple allocation bitmap for now */
+ unsigned long it_page_shift;/* table iommu page size */
#ifdef CONFIG_IOMMU_API
struct iommu_group *it_group;
#endif
};
+/* Pure 2^n version of get_order */
+static inline __attribute_const__
+int get_iommu_order(unsigned long size, struct iommu_table *tbl)
+{
+ return __ilog2((size - 1) >> tbl->it_page_shift) + 1;
+}
+
+
struct scatterlist;
static inline void set_iommu_table_base(struct device *dev, void *base)
@@ -101,8 +107,34 @@ extern void iommu_free_table(struct iommu_table *tbl, const char *node_name);
*/
extern struct iommu_table *iommu_init_table(struct iommu_table * tbl,
int nid);
+#ifdef CONFIG_IOMMU_API
extern void iommu_register_group(struct iommu_table *tbl,
int pci_domain_number, unsigned long pe_num);
+extern int iommu_add_device(struct device *dev);
+extern void iommu_del_device(struct device *dev);
+#else
+static inline void iommu_register_group(struct iommu_table *tbl,
+ int pci_domain_number,
+ unsigned long pe_num)
+{
+}
+
+static inline int iommu_add_device(struct device *dev)
+{
+ return 0;
+}
+
+static inline void iommu_del_device(struct device *dev)
+{
+}
+#endif /* !CONFIG_IOMMU_API */
+
+static inline void set_iommu_table_base_and_group(struct device *dev,
+ void *base)
+{
+ set_iommu_table_base(dev, base);
+ iommu_add_device(dev);
+}
extern int iommu_map_sg(struct device *dev, struct iommu_table *tbl,
struct scatterlist *sglist, int nelems,
diff --git a/arch/powerpc/include/asm/lppaca.h b/arch/powerpc/include/asm/lppaca.h
index 844c28de7ec0..d0a2a2f99564 100644
--- a/arch/powerpc/include/asm/lppaca.h
+++ b/arch/powerpc/include/asm/lppaca.h
@@ -132,8 +132,6 @@ struct slb_shadow {
} save_area[SLB_NUM_BOLTED];
} ____cacheline_aligned;
-extern struct slb_shadow slb_shadow[];
-
/*
* Layout of entries in the hypervisor's dispatch trace log buffer.
*/
diff --git a/arch/powerpc/include/asm/mce.h b/arch/powerpc/include/asm/mce.h
new file mode 100644
index 000000000000..a2b8c7b35fba
--- /dev/null
+++ b/arch/powerpc/include/asm/mce.h
@@ -0,0 +1,198 @@
+/*
+ * Machine check exception header file.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright 2013 IBM Corporation
+ * Author: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
+ */
+
+#ifndef __ASM_PPC64_MCE_H__
+#define __ASM_PPC64_MCE_H__
+
+#include <linux/bitops.h>
+
+/*
+ * Machine Check bits on power7 and power8
+ */
+#define P7_SRR1_MC_LOADSTORE(srr1) ((srr1) & PPC_BIT(42)) /* P8 too */
+
+/* SRR1 bits for machine check (On Power7 and Power8) */
+#define P7_SRR1_MC_IFETCH(srr1) ((srr1) & PPC_BITMASK(43, 45)) /* P8 too */
+
+#define P7_SRR1_MC_IFETCH_UE (0x1 << PPC_BITLSHIFT(45)) /* P8 too */
+#define P7_SRR1_MC_IFETCH_SLB_PARITY (0x2 << PPC_BITLSHIFT(45)) /* P8 too */
+#define P7_SRR1_MC_IFETCH_SLB_MULTIHIT (0x3 << PPC_BITLSHIFT(45)) /* P8 too */
+#define P7_SRR1_MC_IFETCH_SLB_BOTH (0x4 << PPC_BITLSHIFT(45))
+#define P7_SRR1_MC_IFETCH_TLB_MULTIHIT (0x5 << PPC_BITLSHIFT(45)) /* P8 too */
+#define P7_SRR1_MC_IFETCH_UE_TLB_RELOAD (0x6 << PPC_BITLSHIFT(45)) /* P8 too */
+#define P7_SRR1_MC_IFETCH_UE_IFU_INTERNAL (0x7 << PPC_BITLSHIFT(45))
+
+/* SRR1 bits for machine check (On Power8) */
+#define P8_SRR1_MC_IFETCH_ERAT_MULTIHIT (0x4 << PPC_BITLSHIFT(45))
+
+/* DSISR bits for machine check (On Power7 and Power8) */
+#define P7_DSISR_MC_UE (PPC_BIT(48)) /* P8 too */
+#define P7_DSISR_MC_UE_TABLEWALK (PPC_BIT(49)) /* P8 too */
+#define P7_DSISR_MC_ERAT_MULTIHIT (PPC_BIT(52)) /* P8 too */
+#define P7_DSISR_MC_TLB_MULTIHIT_MFTLB (PPC_BIT(53)) /* P8 too */
+#define P7_DSISR_MC_SLB_PARITY_MFSLB (PPC_BIT(55)) /* P8 too */
+#define P7_DSISR_MC_SLB_MULTIHIT (PPC_BIT(56)) /* P8 too */
+#define P7_DSISR_MC_SLB_MULTIHIT_PARITY (PPC_BIT(57)) /* P8 too */
+
+/*
+ * DSISR bits for machine check (Power8) in addition to above.
+ * Secondary DERAT Multihit
+ */
+#define P8_DSISR_MC_ERAT_MULTIHIT_SEC (PPC_BIT(54))
+
+/* SLB error bits */
+#define P7_DSISR_MC_SLB_ERRORS (P7_DSISR_MC_ERAT_MULTIHIT | \
+ P7_DSISR_MC_SLB_PARITY_MFSLB | \
+ P7_DSISR_MC_SLB_MULTIHIT | \
+ P7_DSISR_MC_SLB_MULTIHIT_PARITY)
+
+#define P8_DSISR_MC_SLB_ERRORS (P7_DSISR_MC_SLB_ERRORS | \
+ P8_DSISR_MC_ERAT_MULTIHIT_SEC)
+enum MCE_Version {
+ MCE_V1 = 1,
+};
+
+enum MCE_Severity {
+ MCE_SEV_NO_ERROR = 0,
+ MCE_SEV_WARNING = 1,
+ MCE_SEV_ERROR_SYNC = 2,
+ MCE_SEV_FATAL = 3,
+};
+
+enum MCE_Disposition {
+ MCE_DISPOSITION_RECOVERED = 0,
+ MCE_DISPOSITION_NOT_RECOVERED = 1,
+};
+
+enum MCE_Initiator {
+ MCE_INITIATOR_UNKNOWN = 0,
+ MCE_INITIATOR_CPU = 1,
+};
+
+enum MCE_ErrorType {
+ MCE_ERROR_TYPE_UNKNOWN = 0,
+ MCE_ERROR_TYPE_UE = 1,
+ MCE_ERROR_TYPE_SLB = 2,
+ MCE_ERROR_TYPE_ERAT = 3,
+ MCE_ERROR_TYPE_TLB = 4,
+};
+
+enum MCE_UeErrorType {
+ MCE_UE_ERROR_INDETERMINATE = 0,
+ MCE_UE_ERROR_IFETCH = 1,
+ MCE_UE_ERROR_PAGE_TABLE_WALK_IFETCH = 2,
+ MCE_UE_ERROR_LOAD_STORE = 3,
+ MCE_UE_ERROR_PAGE_TABLE_WALK_LOAD_STORE = 4,
+};
+
+enum MCE_SlbErrorType {
+ MCE_SLB_ERROR_INDETERMINATE = 0,
+ MCE_SLB_ERROR_PARITY = 1,
+ MCE_SLB_ERROR_MULTIHIT = 2,
+};
+
+enum MCE_EratErrorType {
+ MCE_ERAT_ERROR_INDETERMINATE = 0,
+ MCE_ERAT_ERROR_PARITY = 1,
+ MCE_ERAT_ERROR_MULTIHIT = 2,
+};
+
+enum MCE_TlbErrorType {
+ MCE_TLB_ERROR_INDETERMINATE = 0,
+ MCE_TLB_ERROR_PARITY = 1,
+ MCE_TLB_ERROR_MULTIHIT = 2,
+};
+
+struct machine_check_event {
+ enum MCE_Version version:8; /* 0x00 */
+ uint8_t in_use; /* 0x01 */
+ enum MCE_Severity severity:8; /* 0x02 */
+ enum MCE_Initiator initiator:8; /* 0x03 */
+ enum MCE_ErrorType error_type:8; /* 0x04 */
+ enum MCE_Disposition disposition:8; /* 0x05 */
+ uint8_t reserved_1[2]; /* 0x06 */
+ uint64_t gpr3; /* 0x08 */
+ uint64_t srr0; /* 0x10 */
+ uint64_t srr1; /* 0x18 */
+ union { /* 0x20 */
+ struct {
+ enum MCE_UeErrorType ue_error_type:8;
+ uint8_t effective_address_provided;
+ uint8_t physical_address_provided;
+ uint8_t reserved_1[5];
+ uint64_t effective_address;
+ uint64_t physical_address;
+ uint8_t reserved_2[8];
+ } ue_error;
+
+ struct {
+ enum MCE_SlbErrorType slb_error_type:8;
+ uint8_t effective_address_provided;
+ uint8_t reserved_1[6];
+ uint64_t effective_address;
+ uint8_t reserved_2[16];
+ } slb_error;
+
+ struct {
+ enum MCE_EratErrorType erat_error_type:8;
+ uint8_t effective_address_provided;
+ uint8_t reserved_1[6];
+ uint64_t effective_address;
+ uint8_t reserved_2[16];
+ } erat_error;
+
+ struct {
+ enum MCE_TlbErrorType tlb_error_type:8;
+ uint8_t effective_address_provided;
+ uint8_t reserved_1[6];
+ uint64_t effective_address;
+ uint8_t reserved_2[16];
+ } tlb_error;
+ } u;
+};
+
+struct mce_error_info {
+ enum MCE_ErrorType error_type:8;
+ union {
+ enum MCE_UeErrorType ue_error_type:8;
+ enum MCE_SlbErrorType slb_error_type:8;
+ enum MCE_EratErrorType erat_error_type:8;
+ enum MCE_TlbErrorType tlb_error_type:8;
+ } u;
+ uint8_t reserved[2];
+};
+
+#define MAX_MC_EVT 100
+
+/* Release flags for get_mce_event() */
+#define MCE_EVENT_RELEASE true
+#define MCE_EVENT_DONTRELEASE false
+
+extern void save_mce_event(struct pt_regs *regs, long handled,
+ struct mce_error_info *mce_err, uint64_t addr);
+extern int get_mce_event(struct machine_check_event *mce, bool release);
+extern void release_mce_event(void);
+extern void machine_check_queue_event(void);
+extern void machine_check_process_queued_event(void);
+extern void machine_check_print_event_info(struct machine_check_event *evt);
+extern uint64_t get_mce_fault_addr(struct machine_check_event *evt);
+
+#endif /* __ASM_PPC64_MCE_H__ */
diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h
index 7bdcf340016c..a4041e9ed550 100644
--- a/arch/powerpc/include/asm/opal.h
+++ b/arch/powerpc/include/asm/opal.h
@@ -33,6 +33,28 @@ struct opal_takeover_args {
u64 rd_loc; /* r11 */
};
+/*
+ * SG entry
+ *
+ * WARNING: The current implementation requires each entry
+ * to represent a block that is 4k aligned *and* each block
+ * size except the last one in the list to be as well.
+ */
+struct opal_sg_entry {
+ void *data;
+ long length;
+};
+
+/* sg list */
+struct opal_sg_list {
+ unsigned long num_entries;
+ struct opal_sg_list *next;
+ struct opal_sg_entry entry[];
+};
+
+/* We calculate number of sg entries based on PAGE_SIZE */
+#define SG_ENTRIES_PER_NODE ((PAGE_SIZE - 16) / sizeof(struct opal_sg_entry))
+
extern long opal_query_takeover(u64 *hal_size, u64 *hal_align);
extern long opal_do_takeover(struct opal_takeover_args *args);
@@ -132,6 +154,8 @@ extern int opal_enter_rtas(struct rtas_args *args,
#define OPAL_FLASH_VALIDATE 76
#define OPAL_FLASH_MANAGE 77
#define OPAL_FLASH_UPDATE 78
+#define OPAL_GET_MSG 85
+#define OPAL_CHECK_ASYNC_COMPLETION 86
#ifndef __ASSEMBLY__
@@ -211,7 +235,16 @@ enum OpalPendingState {
OPAL_EVENT_ERROR_LOG = 0x40,
OPAL_EVENT_EPOW = 0x80,
OPAL_EVENT_LED_STATUS = 0x100,
- OPAL_EVENT_PCI_ERROR = 0x200
+ OPAL_EVENT_PCI_ERROR = 0x200,
+ OPAL_EVENT_MSG_PENDING = 0x800,
+};
+
+enum OpalMessageType {
+ OPAL_MSG_ASYNC_COMP = 0,
+ OPAL_MSG_MEM_ERR,
+ OPAL_MSG_EPOW,
+ OPAL_MSG_SHUTDOWN,
+ OPAL_MSG_TYPE_MAX,
};
/* Machine check related definitions */
@@ -356,6 +389,12 @@ enum OpalLPCAddressType {
OPAL_LPC_FW = 2,
};
+struct opal_msg {
+ uint32_t msg_type;
+ uint32_t reserved;
+ uint64_t params[8];
+};
+
struct opal_machine_check_event {
enum OpalMCE_Version version:8; /* 0x00 */
uint8_t in_use; /* 0x01 */
@@ -404,6 +443,58 @@ struct opal_machine_check_event {
} u;
};
+/* FSP memory errors handling */
+enum OpalMemErr_Version {
+ OpalMemErr_V1 = 1,
+};
+
+enum OpalMemErrType {
+ OPAL_MEM_ERR_TYPE_RESILIENCE = 0,
+ OPAL_MEM_ERR_TYPE_DYN_DALLOC,
+ OPAL_MEM_ERR_TYPE_SCRUB,
+};
+
+/* Memory Reilience error type */
+enum OpalMemErr_ResilErrType {
+ OPAL_MEM_RESILIENCE_CE = 0,
+ OPAL_MEM_RESILIENCE_UE,
+ OPAL_MEM_RESILIENCE_UE_SCRUB,
+};
+
+/* Dynamic Memory Deallocation type */
+enum OpalMemErr_DynErrType {
+ OPAL_MEM_DYNAMIC_DEALLOC = 0,
+};
+
+/* OpalMemoryErrorData->flags */
+#define OPAL_MEM_CORRECTED_ERROR 0x0001
+#define OPAL_MEM_THRESHOLD_EXCEEDED 0x0002
+#define OPAL_MEM_ACK_REQUIRED 0x8000
+
+struct OpalMemoryErrorData {
+ enum OpalMemErr_Version version:8; /* 0x00 */
+ enum OpalMemErrType type:8; /* 0x01 */
+ uint16_t flags; /* 0x02 */
+ uint8_t reserved_1[4]; /* 0x04 */
+
+ union {
+ /* Memory Resilience corrected/uncorrected error info */
+ struct {
+ enum OpalMemErr_ResilErrType resil_err_type:8;
+ uint8_t reserved_1[7];
+ uint64_t physical_address_start;
+ uint64_t physical_address_end;
+ } resilience;
+ /* Dynamic memory deallocation error info */
+ struct {
+ enum OpalMemErr_DynErrType dyn_err_type:8;
+ uint8_t reserved_1[7];
+ uint64_t physical_address_start;
+ uint64_t physical_address_end;
+ } dyn_dealloc;
+ } u;
+};
+
enum {
OPAL_P7IOC_DIAG_TYPE_NONE = 0,
OPAL_P7IOC_DIAG_TYPE_RGC = 1,
@@ -731,6 +822,9 @@ int64_t opal_validate_flash(uint64_t buffer, uint32_t *size, uint32_t *result);
int64_t opal_manage_flash(uint8_t op);
int64_t opal_update_flash(uint64_t blk_list);
+int64_t opal_get_msg(uint64_t buffer, size_t size);
+int64_t opal_check_completion(uint64_t buffer, size_t size, uint64_t token);
+
/* Internal functions */
extern int early_init_dt_scan_opal(unsigned long node, const char *uname, int depth, void *data);
@@ -744,6 +838,8 @@ extern int early_init_dt_scan_opal(unsigned long node, const char *uname,
int depth, void *data);
extern int opal_notifier_register(struct notifier_block *nb);
+extern int opal_message_notifier_register(enum OpalMessageType msg_type,
+ struct notifier_block *nb);
extern void opal_notifier_enable(void);
extern void opal_notifier_disable(void);
extern void opal_notifier_update_evt(uint64_t evt_mask, uint64_t evt_val);
diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h
index b6ea9e068c13..c3523d1dda58 100644
--- a/arch/powerpc/include/asm/paca.h
+++ b/arch/powerpc/include/asm/paca.h
@@ -152,6 +152,15 @@ struct paca_struct {
*/
struct opal_machine_check_event *opal_mc_evt;
#endif
+#ifdef CONFIG_PPC_BOOK3S_64
+ /* Exclusive emergency stack pointer for machine check exception. */
+ void *mc_emergency_sp;
+ /*
+ * Flag to check whether we are in machine check early handler
+ * and already using emergency stack.
+ */
+ u16 in_mce;
+#endif
/* Stuff for accurate time accounting */
u64 user_time; /* accumulated usermode TB ticks */
diff --git a/arch/powerpc/include/asm/pgtable.h b/arch/powerpc/include/asm/pgtable.h
index 7d6eacf249cf..b999ca318985 100644
--- a/arch/powerpc/include/asm/pgtable.h
+++ b/arch/powerpc/include/asm/pgtable.h
@@ -3,6 +3,7 @@
#ifdef __KERNEL__
#ifndef __ASSEMBLY__
+#include <linux/mmdebug.h>
#include <asm/processor.h> /* For TASK_SIZE */
#include <asm/mmu.h>
#include <asm/page.h>
@@ -33,10 +34,73 @@ static inline int pte_dirty(pte_t pte) { return pte_val(pte) & _PAGE_DIRTY; }
static inline int pte_young(pte_t pte) { return pte_val(pte) & _PAGE_ACCESSED; }
static inline int pte_file(pte_t pte) { return pte_val(pte) & _PAGE_FILE; }
static inline int pte_special(pte_t pte) { return pte_val(pte) & _PAGE_SPECIAL; }
-static inline int pte_present(pte_t pte) { return pte_val(pte) & _PAGE_PRESENT; }
static inline int pte_none(pte_t pte) { return (pte_val(pte) & ~_PTE_NONE_MASK) == 0; }
static inline pgprot_t pte_pgprot(pte_t pte) { return __pgprot(pte_val(pte) & PAGE_PROT_BITS); }
+#ifdef CONFIG_NUMA_BALANCING
+
+static inline int pte_present(pte_t pte)
+{
+ return pte_val(pte) & (_PAGE_PRESENT | _PAGE_NUMA);
+}
+
+#define pte_numa pte_numa
+static inline int pte_numa(pte_t pte)
+{
+ return (pte_val(pte) &
+ (_PAGE_NUMA|_PAGE_PRESENT)) == _PAGE_NUMA;
+}
+
+#define pte_mknonnuma pte_mknonnuma
+static inline pte_t pte_mknonnuma(pte_t pte)
+{
+ pte_val(pte) &= ~_PAGE_NUMA;
+ pte_val(pte) |= _PAGE_PRESENT | _PAGE_ACCESSED;
+ return pte;
+}
+
+#define pte_mknuma pte_mknuma
+static inline pte_t pte_mknuma(pte_t pte)
+{
+ /*
+ * We should not set _PAGE_NUMA on non present ptes. Also clear the
+ * present bit so that hash_page will return 1 and we collect this
+ * as numa fault.
+ */
+ if (pte_present(pte)) {
+ pte_val(pte) |= _PAGE_NUMA;
+ pte_val(pte) &= ~_PAGE_PRESENT;
+ } else
+ VM_BUG_ON(1);
+ return pte;
+}
+
+#define pmd_numa pmd_numa
+static inline int pmd_numa(pmd_t pmd)
+{
+ return pte_numa(pmd_pte(pmd));
+}
+
+#define pmd_mknonnuma pmd_mknonnuma
+static inline pmd_t pmd_mknonnuma(pmd_t pmd)
+{
+ return pte_pmd(pte_mknonnuma(pmd_pte(pmd)));
+}
+
+#define pmd_mknuma pmd_mknuma
+static inline pmd_t pmd_mknuma(pmd_t pmd)
+{
+ return pte_pmd(pte_mknuma(pmd_pte(pmd)));
+}
+
+# else
+
+static inline int pte_present(pte_t pte)
+{
+ return pte_val(pte) & _PAGE_PRESENT;
+}
+#endif /* CONFIG_NUMA_BALANCING */
+
/* Conversion functions: convert a page and protection to a page entry,
* and a page entry and page directory to the page they refer to.
*
diff --git a/arch/powerpc/include/asm/pte-hash64.h b/arch/powerpc/include/asm/pte-hash64.h
index 0419eeb53274..2505d8eab15c 100644
--- a/arch/powerpc/include/asm/pte-hash64.h
+++ b/arch/powerpc/include/asm/pte-hash64.h
@@ -19,7 +19,7 @@
#define _PAGE_FILE 0x0002 /* (!present only) software: pte holds file offset */
#define _PAGE_EXEC 0x0004 /* No execute on POWER4 and newer (we invert) */
#define _PAGE_GUARDED 0x0008
-#define _PAGE_COHERENT 0x0010 /* M: enforce memory coherence (SMP systems) */
+/* We can derive Memory coherence from _PAGE_NO_CACHE */
#define _PAGE_NO_CACHE 0x0020 /* I: cache inhibit */
#define _PAGE_WRITETHRU 0x0040 /* W: cache write-through */
#define _PAGE_DIRTY 0x0080 /* C: page changed */
@@ -27,6 +27,12 @@
#define _PAGE_RW 0x0200 /* software: user write access allowed */
#define _PAGE_BUSY 0x0800 /* software: PTE & hash are busy */
+/*
+ * Used for tracking numa faults
+ */
+#define _PAGE_NUMA 0x00000010 /* Gather numa placement stats */
+
+
/* No separate kernel read-only */
#define _PAGE_KERNEL_RW (_PAGE_RW | _PAGE_DIRTY) /* user access blocked by key */
#define _PAGE_KERNEL_RO _PAGE_KERNEL_RW
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
index 445cb6e39d5b..904d713366ff 100644
--- a/arch/powerpc/kernel/Makefile
+++ b/arch/powerpc/kernel/Makefile
@@ -39,6 +39,7 @@ obj-$(CONFIG_PPC64) += setup_64.o sys_ppc32.o \
obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o
obj-$(CONFIG_PPC_BOOK3S_64) += cpu_setup_ppc970.o cpu_setup_pa6t.o
obj-$(CONFIG_PPC_BOOK3S_64) += cpu_setup_power.o
+obj-$(CONFIG_PPC_BOOK3S_64) += mce.o mce_power.o
obj64-$(CONFIG_RELOCATABLE) += reloc_64.o
obj-$(CONFIG_PPC_BOOK3E_64) += exceptions-64e.o idle_book3e.o
obj-$(CONFIG_PPC_A2) += cpu_setup_a2.o
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index 2ea5cc033ec8..41a283956a29 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -232,6 +232,10 @@ int main(void)
DEFINE(PACA_DTL_RIDX, offsetof(struct paca_struct, dtl_ridx));
#endif /* CONFIG_PPC_STD_MMU_64 */
DEFINE(PACAEMERGSP, offsetof(struct paca_struct, emergency_sp));
+#ifdef CONFIG_PPC_BOOK3S_64
+ DEFINE(PACAMCEMERGSP, offsetof(struct paca_struct, mc_emergency_sp));
+ DEFINE(PACA_IN_MCE, offsetof(struct paca_struct, in_mce));
+#endif
DEFINE(PACAHWCPUID, offsetof(struct paca_struct, hw_cpu_id));
DEFINE(PACAKEXECSTATE, offsetof(struct paca_struct, kexec_state));
DEFINE(PACA_STARTTIME, offsetof(struct paca_struct, starttime));
diff --git a/arch/powerpc/kernel/cpu_setup_power.S b/arch/powerpc/kernel/cpu_setup_power.S
index 18b5b9cf8e37..37d1bb002aa9 100644
--- a/arch/powerpc/kernel/cpu_setup_power.S
+++ b/arch/powerpc/kernel/cpu_setup_power.S
@@ -29,7 +29,7 @@ _GLOBAL(__setup_cpu_power7)
mtspr SPRN_LPID,r0
mfspr r3,SPRN_LPCR
bl __init_LPCR
- bl __init_TLB
+ bl __init_tlb_power7
mtlr r11
blr
@@ -42,7 +42,7 @@ _GLOBAL(__restore_cpu_power7)
mtspr SPRN_LPID,r0
mfspr r3,SPRN_LPCR
bl __init_LPCR
- bl __init_TLB
+ bl __init_tlb_power7
mtlr r11
blr
@@ -59,7 +59,7 @@ _GLOBAL(__setup_cpu_power8)
oris r3, r3, LPCR_AIL_3@h
bl __init_LPCR
bl __init_HFSCR
- bl __init_TLB
+ bl __init_tlb_power8
bl __init_PMU_HV
mtlr r11
blr
@@ -78,7 +78,7 @@ _GLOBAL(__restore_cpu_power8)
oris r3, r3, LPCR_AIL_3@h
bl __init_LPCR
bl __init_HFSCR
- bl __init_TLB
+ bl __init_tlb_power8
bl __init_PMU_HV
mtlr r11
blr
@@ -134,15 +134,31 @@ __init_HFSCR:
mtspr SPRN_HFSCR,r3
blr
-__init_TLB:
- /*
- * Clear the TLB using the "IS 3" form of tlbiel instruction
- * (invalidate by congruence class). P7 has 128 CCs, P8 has 512
- * so we just always do 512
- */
+/*
+ * Clear the TLB using the specified IS form of tlbiel instruction
+ * (invalidate by congruence class). P7 has 128 CCs., P8 has 512.
+ *
+ * r3 = IS field
+ */
+__init_tlb_power7:
+ li r3,0xc00 /* IS field = 0b11 */
+_GLOBAL(__flush_tlb_power7)
+ li r6,128
+ mtctr r6
+ mr r7,r3 /* IS field */
+ ptesync
+2: tlbiel r7
+ addi r7,r7,0x1000
+ bdnz 2b
+ ptesync
+1: blr
+
+__init_tlb_power8:
+ li r3,0xc00 /* IS field = 0b11 */
+_GLOBAL(__flush_tlb_power8)
li r6,512
mtctr r6
- li r7,0xc00 /* IS field = 0b11 */
+ mr r7,r3 /* IS field */
ptesync
2: tlbiel r7
addi r7,r7,0x1000
diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c
index 597d954e5860..6c8dd5da4de5 100644
--- a/arch/powerpc/kernel/cputable.c
+++ b/arch/powerpc/kernel/cputable.c
@@ -71,6 +71,10 @@ extern void __restore_cpu_power7(void);
extern void __setup_cpu_power8(unsigned long offset, struct cpu_spec* spec);
extern void __restore_cpu_power8(void);
extern void __restore_cpu_a2(void);
+extern void __flush_tlb_power7(unsigned long inval_selector);
+extern void __flush_tlb_power8(unsigned long inval_selector);
+extern long __machine_check_early_realmode_p7(struct pt_regs *regs);
+extern long __machine_check_early_realmode_p8(struct pt_regs *regs);
#endif /* CONFIG_PPC64 */
#if defined(CONFIG_E500)
extern void __setup_cpu_e5500(unsigned long offset, struct cpu_spec* spec);
@@ -440,6 +444,8 @@ static struct cpu_spec __initdata cpu_specs[] = {
.oprofile_cpu_type = "ppc64/ibm-compat-v1",
.cpu_setup = __setup_cpu_power7,
.cpu_restore = __restore_cpu_power7,
+ .flush_tlb = __flush_tlb_power7,
+ .machine_check_early = __machine_check_early_realmode_p7,
.platform = "power7",
},
{ /* 2.07-compliant processor, i.e. Power8 "architected" mode */
@@ -456,6 +462,8 @@ static struct cpu_spec __initdata cpu_specs[] = {
.oprofile_cpu_type = "ppc64/ibm-compat-v1",
.cpu_setup = __setup_cpu_power8,
.cpu_restore = __restore_cpu_power8,
+ .flush_tlb = __flush_tlb_power8,
+ .machine_check_early = __machine_check_early_realmode_p8,
.platform = "power8",
},
{ /* Power7 */
@@ -474,6 +482,8 @@ static struct cpu_spec __initdata cpu_specs[] = {
.oprofile_type = PPC_OPROFILE_POWER4,
.cpu_setup = __setup_cpu_power7,
.cpu_restore = __restore_cpu_power7,
+ .flush_tlb = __flush_tlb_power7,
+ .machine_check_early = __machine_check_early_realmode_p7,
.platform = "power7",
},
{ /* Power7+ */
@@ -492,6 +502,8 @@ static struct cpu_spec __initdata cpu_specs[] = {
.oprofile_type = PPC_OPROFILE_POWER4,
.cpu_setup = __setup_cpu_power7,
.cpu_restore = __restore_cpu_power7,
+ .flush_tlb = __flush_tlb_power7,
+ .machine_check_early = __machine_check_early_realmode_p7,
.platform = "power7+",
},
{ /* Power8E */
@@ -510,6 +522,8 @@ static struct cpu_spec __initdata cpu_specs[] = {
.oprofile_type = PPC_OPROFILE_INVALID,
.cpu_setup = __setup_cpu_power8,
.cpu_restore = __restore_cpu_power8,
+ .flush_tlb = __flush_tlb_power8,
+ .machine_check_early = __machine_check_early_realmode_p8,
.platform = "power8",
},
{ /* Power8 */
@@ -528,6 +542,8 @@ static struct cpu_spec __initdata cpu_specs[] = {
.oprofile_type = PPC_OPROFILE_INVALID,
.cpu_setup = __setup_cpu_power8,
.cpu_restore = __restore_cpu_power8,
+ .flush_tlb = __flush_tlb_power8,
+ .machine_check_early = __machine_check_early_realmode_p8,
.platform = "power8",
},
{ /* Cell Broadband Engine */
diff --git a/arch/powerpc/kernel/dma-iommu.c b/arch/powerpc/kernel/dma-iommu.c
index e4897523de41..54d0116256f7 100644
--- a/arch/powerpc/kernel/dma-iommu.c
+++ b/arch/powerpc/kernel/dma-iommu.c
@@ -83,10 +83,10 @@ static int dma_iommu_dma_supported(struct device *dev, u64 mask)
return 0;
}
- if (tbl->it_offset > (mask >> IOMMU_PAGE_SHIFT)) {
+ if (tbl->it_offset > (mask >> tbl->it_page_shift)) {
dev_info(dev, "Warning: IOMMU offset too big for device mask\n");
dev_info(dev, "mask: 0x%08llx, table offset: 0x%08lx\n",
- mask, tbl->it_offset << IOMMU_PAGE_SHIFT);
+ mask, tbl->it_offset << tbl->it_page_shift);
return 0;
} else
return 1;
diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c
index 4bd687d5e7aa..f4b7a227f183 100644
--- a/arch/powerpc/kernel/eeh.c
+++ b/arch/powerpc/kernel/eeh.c
@@ -84,7 +84,7 @@
#define EEH_MAX_FAILS 2100000
/* Time to wait for a PCI slot to report status, in milliseconds */
-#define PCI_BUS_RESET_WAIT_MSEC (60*1000)
+#define PCI_BUS_RESET_WAIT_MSEC (5*60*1000)
/* Platform dependent EEH operations */
struct eeh_ops *eeh_ops = NULL;
diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c
index 36bed5a12750..4ef59c33777f 100644
--- a/arch/powerpc/kernel/eeh_driver.c
+++ b/arch/powerpc/kernel/eeh_driver.c
@@ -468,7 +468,7 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus)
/* The longest amount of time to wait for a pci device
* to come back on line, in seconds.
*/
-#define MAX_WAIT_FOR_RECOVERY 150
+#define MAX_WAIT_FOR_RECOVERY 300
static void eeh_handle_normal_event(struct eeh_pe *pe)
{
diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
index bbfb0294b354..770d6d65c47b 100644
--- a/arch/powerpc/kernel/entry_64.S
+++ b/arch/powerpc/kernel/entry_64.S
@@ -184,6 +184,11 @@ syscall_exit:
bl .do_show_syscall_exit
ld r3,RESULT(r1)
#endif
+#ifdef CONFIG_PPC_BOOK3S_64
+BEGIN_FTR_SECTION
+ bl .machine_check_process_queued_event
+END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
+#endif
CURRENT_THREAD_INFO(r12, r1)
ld r8,_MSR(r1)
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index 9f905e40922e..38d507306a11 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -155,8 +155,30 @@ machine_check_pSeries_1:
*/
HMT_MEDIUM_PPR_DISCARD
SET_SCRATCH0(r13) /* save r13 */
+#ifdef CONFIG_PPC_P7_NAP
+BEGIN_FTR_SECTION
+ /* Running native on arch 2.06 or later, check if we are
+ * waking up from nap. We only handle no state loss and
+ * supervisor state loss. We do -not- handle hypervisor
+ * state loss at this time.
+ */
+ mfspr r13,SPRN_SRR1
+ rlwinm. r13,r13,47-31,30,31
+ beq 9f
+
+ /* waking up from powersave (nap) state */
+ cmpwi cr1,r13,2
+ /* Total loss of HV state is fatal. let's just stay stuck here */
+ bgt cr1,.
+9:
+END_FTR_SECTION_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
+#endif /* CONFIG_PPC_P7_NAP */
EXCEPTION_PROLOG_0(PACA_EXMC)
+BEGIN_FTR_SECTION
+ b machine_check_pSeries_early
+FTR_SECTION_ELSE
b machine_check_pSeries_0
+ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE)
. = 0x300
.globl data_access_pSeries
@@ -405,6 +427,64 @@ denorm_exception_hv:
.align 7
/* moved from 0x200 */
+machine_check_pSeries_early:
+BEGIN_FTR_SECTION
+ EXCEPTION_PROLOG_1(PACA_EXMC, NOTEST, 0x200)
+ /*
+ * Register contents:
+ * R13 = PACA
+ * R9 = CR
+ * Original R9 to R13 is saved on PACA_EXMC
+ *
+ * Switch to mc_emergency stack and handle re-entrancy (though we
+ * currently don't test for overflow). Save MCE registers srr1,
+ * srr0, dar and dsisr and then set ME=1
+ *
+ * We use paca->in_mce to check whether this is the first entry or
+ * nested machine check. We increment paca->in_mce to track nested
+ * machine checks.
+ *
+ * If this is the first entry then set stack pointer to
+ * paca->mc_emergency_sp, otherwise r1 is already pointing to
+ * stack frame on mc_emergency stack.
+ *
+ * NOTE: We are here with MSR_ME=0 (off), which means we risk a
+ * checkstop if we get another machine check exception before we do
+ * rfid with MSR_ME=1.
+ */
+ mr r11,r1 /* Save r1 */
+ lhz r10,PACA_IN_MCE(r13)
+ cmpwi r10,0 /* Are we in nested machine check */
+ bne 0f /* Yes, we are. */
+ /* First machine check entry */
+ ld r1,PACAMCEMERGSP(r13) /* Use MC emergency stack */
+0: subi r1,r1,INT_FRAME_SIZE /* alloc stack frame */
+ addi r10,r10,1 /* increment paca->in_mce */
+ sth r10,PACA_IN_MCE(r13)
+ std r11,GPR1(r1) /* Save r1 on the stack. */
+ std r11,0(r1) /* make stack chain pointer */
+ mfspr r11,SPRN_SRR0 /* Save SRR0 */
+ std r11,_NIP(r1)
+ mfspr r11,SPRN_SRR1 /* Save SRR1 */
+ std r11,_MSR(r1)
+ mfspr r11,SPRN_DAR /* Save DAR */
+ std r11,_DAR(r1)
+ mfspr r11,SPRN_DSISR /* Save DSISR */
+ std r11,_DSISR(r1)
+ std r9,_CCR(r1) /* Save CR in stackframe */
+ /* Save r9 through r13 from EXMC save area to stack frame. */
+ EXCEPTION_PROLOG_COMMON_2(PACA_EXMC)
+ mfmsr r11 /* get MSR value */
+ ori r11,r11,MSR_ME /* turn on ME bit */
+ ori r11,r11,MSR_RI /* turn on RI bit */
+ ld r12,PACAKBASE(r13) /* get high part of &label */
+ LOAD_HANDLER(r12, machine_check_handle_early)
+ mtspr SPRN_SRR0,r12
+ mtspr SPRN_SRR1,r11
+ rfid
+ b . /* prevent speculative execution */
+END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
+
machine_check_pSeries:
.globl machine_check_fwnmi
machine_check_fwnmi:
@@ -688,30 +768,6 @@ kvmppc_skip_Hinterrupt:
STD_EXCEPTION_COMMON(0x100, system_reset, .system_reset_exception)
- /*
- * Machine check is different because we use a different
- * save area: PACA_EXMC instead of PACA_EXGEN.
- */
- .align 7
- .globl machine_check_common
-machine_check_common:
-
- mfspr r10,SPRN_DAR
- std r10,PACA_EXGEN+EX_DAR(r13)
- mfspr r10,SPRN_DSISR
- stw r10,PACA_EXGEN+EX_DSISR(r13)
- EXCEPTION_PROLOG_COMMON(0x200, PACA_EXMC)
- FINISH_NAP
- DISABLE_INTS
- ld r3,PACA_EXGEN+EX_DAR(r13)
- lwz r4,PACA_EXGEN+EX_DSISR(r13)
- std r3,_DAR(r1)
- std r4,_DSISR(r1)
- bl .save_nvgprs
- addi r3,r1,STACK_FRAME_OVERHEAD
- bl .machine_check_exception
- b .ret_from_except
-
STD_EXCEPTION_COMMON_ASYNC(0x500, hardware_interrupt, do_IRQ)
STD_EXCEPTION_COMMON_ASYNC(0x900, decrementer, .timer_interrupt)
STD_EXCEPTION_COMMON(0x980, hdecrementer, .hdec_interrupt)
@@ -1080,6 +1136,30 @@ unrecov_user_slb:
#endif /* __DISABLED__ */
+ /*
+ * Machine check is different because we use a different
+ * save area: PACA_EXMC instead of PACA_EXGEN.
+ */
+ .align 7
+ .globl machine_check_common
+machine_check_common:
+
+ mfspr r10,SPRN_DAR
+ std r10,PACA_EXGEN+EX_DAR(r13)
+ mfspr r10,SPRN_DSISR
+ stw r10,PACA_EXGEN+EX_DSISR(r13)
+ EXCEPTION_PROLOG_COMMON(0x200, PACA_EXMC)
+ FINISH_NAP
+ DISABLE_INTS
+ ld r3,PACA_EXGEN+EX_DAR(r13)
+ lwz r4,PACA_EXGEN+EX_DSISR(r13)
+ std r3,_DAR(r1)
+ std r4,_DSISR(r1)
+ bl .save_nvgprs
+ addi r3,r1,STACK_FRAME_OVERHEAD
+ bl .machine_check_exception
+ b .ret_from_except
+
.align 7
.globl alignment_common
alignment_common:
@@ -1263,6 +1343,120 @@ _GLOBAL(opal_mc_secondary_handler)
#endif /* CONFIG_PPC_POWERNV */
+#define MACHINE_CHECK_HANDLER_WINDUP \
+ /* Clear MSR_RI before setting SRR0 and SRR1. */\
+ li r0,MSR_RI; \
+ mfmsr r9; /* get MSR value */ \
+ andc r9,r9,r0; \
+ mtmsrd r9,1; /* Clear MSR_RI */ \
+ /* Move original SRR0 and SRR1 into the respective regs */ \
+ ld r9,_MSR(r1); \
+ mtspr SPRN_SRR1,r9; \
+ ld r3,_NIP(r1); \
+ mtspr SPRN_SRR0,r3; \
+ ld r9,_CTR(r1); \
+ mtctr r9; \
+ ld r9,_XER(r1); \
+ mtxer r9; \
+ ld r9,_LINK(r1); \
+ mtlr r9; \
+ REST_GPR(0, r1); \
+ REST_8GPRS(2, r1); \
+ REST_GPR(10, r1); \
+ ld r11,_CCR(r1); \
+ mtcr r11; \
+ /* Decrement paca->in_mce. */ \
+ lhz r12,PACA_IN_MCE(r13); \
+ subi r12,r12,1; \
+ sth r12,PACA_IN_MCE(r13); \
+ REST_GPR(11, r1); \
+ REST_2GPRS(12, r1); \
+ /* restore original r1. */ \
+ ld r1,GPR1(r1)
+
+ /*
+ * Handle machine check early in real mode. We come here with
+ * ME=1, MMU (IR=0 and DR=0) off and using MC emergency stack.
+ */
+ .align 7
+ .globl machine_check_handle_early
+machine_check_handle_early:
+ std r0,GPR0(r1) /* Save r0 */
+ EXCEPTION_PROLOG_COMMON_3(0x200)
+ bl .save_nvgprs
+ addi r3,r1,STACK_FRAME_OVERHEAD
+ bl .machine_check_early
+ ld r12,_MSR(r1)
+#ifdef CONFIG_PPC_P7_NAP
+ /*
+ * Check if thread was in power saving mode. We come here when any
+ * of the following is true:
+ * a. thread wasn't in power saving mode
+ * b. thread was in power saving mode with no state loss or
+ * supervisor state loss
+ *
+ * Go back to nap again if (b) is true.
+ */
+ rlwinm. r11,r12,47-31,30,31 /* Was it in power saving mode? */
+ beq 4f /* No, it wasn;t */
+ /* Thread was in power saving mode. Go back to nap again. */
+ cmpwi r11,2
+ bne 3f
+ /* Supervisor state loss */
+ li r0,1
+ stb r0,PACA_NAPSTATELOST(r13)
+3: bl .machine_check_queue_event
+ MACHINE_CHECK_HANDLER_WINDUP
+ GET_PACA(r13)
+ ld r1,PACAR1(r13)
+ b .power7_enter_nap_mode
+4:
+#endif
+ /*
+ * Check if we are coming from hypervisor userspace. If yes then we
+ * continue in host kernel in V mode to deliver the MC event.
+ */
+ rldicl. r11,r12,4,63 /* See if MC hit while in HV mode. */
+ beq 5f
+ andi. r11,r12,MSR_PR /* See if coming from user. */
+ bne 9f /* continue in V mode if we are. */
+
+5:
+#ifdef CONFIG_KVM_BOOK3S_64_HV
+ /*
+ * We are coming from kernel context. Check if we are coming from
+ * guest. if yes, then we can continue. We will fall through
+ * do_kvm_200->kvmppc_interrupt to deliver the MC event to guest.
+ */
+ lbz r11,HSTATE_IN_GUEST(r13)
+ cmpwi r11,0 /* Check if coming from guest */
+ bne 9f /* continue if we are. */
+#endif
+ /*
+ * At this point we are not sure about what context we come from.
+ * Queue up the MCE event and return from the interrupt.
+ * But before that, check if this is an un-recoverable exception.
+ * If yes, then stay on emergency stack and panic.
+ */
+ andi. r11,r12,MSR_RI
+ bne 2f
+1: addi r3,r1,STACK_FRAME_OVERHEAD
+ bl .unrecoverable_exception
+ b 1b
+2:
+ /*
+ * Return from MC interrupt.
+ * Queue up the MCE event so that we can log it later, while
+ * returning from kernel or opal call.
+ */
+ bl .machine_check_queue_event
+ MACHINE_CHECK_HANDLER_WINDUP
+ rfid
+9:
+ /* Deliver the machine check to host kernel in V mode. */
+ MACHINE_CHECK_HANDLER_WINDUP
+ b machine_check_pSeries
+
/*
* r13 points to the PACA, r9 contains the saved CR,
* r12 contain the saved SRR1, SRR0 is still ready for return
diff --git a/arch/powerpc/kernel/idle_power7.S b/arch/powerpc/kernel/idle_power7.S
index 847e40e62fce..3fdef0f0c67f 100644
--- a/arch/powerpc/kernel/idle_power7.S
+++ b/arch/powerpc/kernel/idle_power7.S
@@ -84,6 +84,7 @@ _GLOBAL(power7_nap)
std r9,_MSR(r1)
std r1,PACAR1(r13)
+_GLOBAL(power7_enter_nap_mode)
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
/* Tell KVM we're napping */
li r4,KVM_HWTHREAD_IN_NAP
diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c
index 572bb5b95f35..f58d8135aab2 100644
--- a/arch/powerpc/kernel/iommu.c
+++ b/arch/powerpc/kernel/iommu.c
@@ -251,14 +251,13 @@ again:
if (dev)
boundary_size = ALIGN(dma_get_seg_boundary(dev) + 1,
- 1 << IOMMU_PAGE_SHIFT);
+ 1 << tbl->it_page_shift);
else
- boundary_size = ALIGN(1UL << 32, 1 << IOMMU_PAGE_SHIFT);
+ boundary_size = ALIGN(1UL << 32, 1 << tbl->it_page_shift);
/* 4GB boundary for iseries_hv_alloc and iseries_hv_map */
- n = iommu_area_alloc(tbl->it_map, limit, start, npages,
- tbl->it_offset, boundary_size >> IOMMU_PAGE_SHIFT,
- align_mask);
+ n = iommu_area_alloc(tbl->it_map, limit, start, npages, tbl->it_offset,
+ boundary_size >> tbl->it_page_shift, align_mask);
if (n == -1) {
if (likely(pass == 0)) {
/* First try the pool from the start */
@@ -320,12 +319,12 @@ static dma_addr_t iommu_alloc(struct device *dev, struct iommu_table *tbl,
return DMA_ERROR_CODE;
entry += tbl->it_offset; /* Offset into real TCE table */
- ret = entry << IOMMU_PAGE_SHIFT; /* Set the return dma address */
+ ret = entry << tbl->it_page_shift; /* Set the return dma address */
/* Put the TCEs in the HW table */
build_fail = ppc_md.tce_build(tbl, entry, npages,
- (unsigned long)page & IOMMU_PAGE_MASK,
- direction, attrs);
+ (unsigned long)page &
+ IOMMU_PAGE_MASK(tbl), direction, attrs);
/* ppc_md.tce_build() only returns non-zero for transient errors.
* Clean up the table bitmap in this case and return
@@ -352,7 +351,7 @@ static bool iommu_free_check(struct iommu_table *tbl, dma_addr_t dma_addr,
{
unsigned long entry, free_entry;
- entry = dma_addr >> IOMMU_PAGE_SHIFT;
+ entry = dma_addr >> tbl->it_page_shift;
free_entry = entry - tbl->it_offset;
if (((free_entry + npages) > tbl->it_size) ||
@@ -401,7 +400,7 @@ static void __iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr,
unsigned long flags;
struct iommu_pool *pool;
- entry = dma_addr >> IOMMU_PAGE_SHIFT;
+ entry = dma_addr >> tbl->it_page_shift;
free_entry = entry - tbl->it_offset;
pool = get_pool(tbl, free_entry);
@@ -468,13 +467,13 @@ int iommu_map_sg(struct device *dev, struct iommu_table *tbl,
}
/* Allocate iommu entries for that segment */
vaddr = (unsigned long) sg_virt(s);
- npages = iommu_num_pages(vaddr, slen, IOMMU_PAGE_SIZE);
+ npages = iommu_num_pages(vaddr, slen, IOMMU_PAGE_SIZE(tbl));
align = 0;
- if (IOMMU_PAGE_SHIFT < PAGE_SHIFT && slen >= PAGE_SIZE &&
+ if (tbl->it_page_shift < PAGE_SHIFT && slen >= PAGE_SIZE &&
(vaddr & ~PAGE_MASK) == 0)
- align = PAGE_SHIFT - IOMMU_PAGE_SHIFT;
+ align = PAGE_SHIFT - tbl->it_page_shift;
entry = iommu_range_alloc(dev, tbl, npages, &handle,
- mask >> IOMMU_PAGE_SHIFT, align);
+ mask >> tbl->it_page_shift, align);
DBG(" - vaddr: %lx, size: %lx\n", vaddr, slen);
@@ -489,16 +488,16 @@ int iommu_map_sg(struct device *dev, struct iommu_table *tbl,
/* Convert entry to a dma_addr_t */
entry += tbl->it_offset;
- dma_addr = entry << IOMMU_PAGE_SHIFT;
- dma_addr |= (s->offset & ~IOMMU_PAGE_MASK);
+ dma_addr = entry << tbl->it_page_shift;
+ dma_addr |= (s->offset & ~IOMMU_PAGE_MASK(tbl));
DBG(" - %lu pages, entry: %lx, dma_addr: %lx\n",
npages, entry, dma_addr);
/* Insert into HW table */
build_fail = ppc_md.tce_build(tbl, entry, npages,
- vaddr & IOMMU_PAGE_MASK,
- direction, attrs);
+ vaddr & IOMMU_PAGE_MASK(tbl),
+ direction, attrs);
if(unlikely(build_fail))
goto failure;
@@ -559,9 +558,9 @@ int iommu_map_sg(struct device *dev, struct iommu_table *tbl,
if (s->dma_length != 0) {
unsigned long vaddr, npages;
- vaddr = s->dma_address & IOMMU_PAGE_MASK;
+ vaddr = s->dma_address & IOMMU_PAGE_MASK(tbl);
npages = iommu_num_pages(s->dma_address, s->dma_length,
- IOMMU_PAGE_SIZE);
+ IOMMU_PAGE_SIZE(tbl));
__iommu_free(tbl, vaddr, npages);
s->dma_address = DMA_ERROR_CODE;
s->dma_length = 0;
@@ -592,7 +591,7 @@ void iommu_unmap_sg(struct iommu_table *tbl, struct scatterlist *sglist,
if (sg->dma_length == 0)
break;
npages = iommu_num_pages(dma_handle, sg->dma_length,
- IOMMU_PAGE_SIZE);
+ IOMMU_PAGE_SIZE(tbl));
__iommu_free(tbl, dma_handle, npages);
sg = sg_next(sg);
}
@@ -676,7 +675,7 @@ struct iommu_table *iommu_init_table(struct iommu_table *tbl, int nid)
set_bit(0, tbl->it_map);
/* We only split the IOMMU table if we have 1GB or more of space */
- if ((tbl->it_size << IOMMU_PAGE_SHIFT) >= (1UL * 1024 * 1024 * 1024))
+ if ((tbl->it_size << tbl->it_page_shift) >= (1UL * 1024 * 1024 * 1024))
tbl->nr_pools = IOMMU_NR_POOLS;
else
tbl->nr_pools = 1;
@@ -768,16 +767,16 @@ dma_addr_t iommu_map_page(struct device *dev, struct iommu_table *tbl,
vaddr = page_address(page) + offset;
uaddr = (unsigned long)vaddr;
- npages = iommu_num_pages(uaddr, size, IOMMU_PAGE_SIZE);
+ npages = iommu_num_pages(uaddr, size, IOMMU_PAGE_SIZE(tbl));
if (tbl) {
align = 0;
- if (IOMMU_PAGE_SHIFT < PAGE_SHIFT && size >= PAGE_SIZE &&
+ if (tbl->it_page_shift < PAGE_SHIFT && size >= PAGE_SIZE &&
((unsigned long)vaddr & ~PAGE_MASK) == 0)
- align = PAGE_SHIFT - IOMMU_PAGE_SHIFT;
+ align = PAGE_SHIFT - tbl->it_page_shift;
dma_handle = iommu_alloc(dev, tbl, vaddr, npages, direction,
- mask >> IOMMU_PAGE_SHIFT, align,
+ mask >> tbl->it_page_shift, align,
attrs);
if (dma_handle == DMA_ERROR_CODE) {
if (printk_ratelimit()) {
@@ -786,7 +785,7 @@ dma_addr_t iommu_map_page(struct device *dev, struct iommu_table *tbl,
npages);
}
} else
- dma_handle |= (uaddr & ~IOMMU_PAGE_MASK);
+ dma_handle |= (uaddr & ~IOMMU_PAGE_MASK(tbl));
}
return dma_handle;
@@ -801,7 +800,8 @@ void iommu_unmap_page(struct iommu_table *tbl, dma_addr_t dma_handle,
BUG_ON(direction == DMA_NONE);
if (tbl) {
- npages = iommu_num_pages(dma_handle, size, IOMMU_PAGE_SIZE);
+ npages = iommu_num_pages(dma_handle, size,
+ IOMMU_PAGE_SIZE(tbl));
iommu_free(tbl, dma_handle, npages);
}
}
@@ -845,10 +845,10 @@ void *iommu_alloc_coherent(struct device *dev, struct iommu_table *tbl,
memset(ret, 0, size);
/* Set up tces to cover the allocated range */
- nio_pages = size >> IOMMU_PAGE_SHIFT;
- io_order = get_iommu_order(size);
+ nio_pages = size >> tbl->it_page_shift;
+ io_order = get_iommu_order(size, tbl);
mapping = iommu_alloc(dev, tbl, ret, nio_pages, DMA_BIDIRECTIONAL,
- mask >> IOMMU_PAGE_SHIFT, io_order, NULL);
+ mask >> tbl->it_page_shift, io_order, NULL);
if (mapping == DMA_ERROR_CODE) {
free_pages((unsigned long)ret, order);
return NULL;
@@ -864,7 +864,7 @@ void iommu_free_coherent(struct iommu_table *tbl, size_t size,
unsigned int nio_pages;
size = PAGE_ALIGN(size);
- nio_pages = size >> IOMMU_PAGE_SHIFT;
+ nio_pages = size >> tbl->it_page_shift;
iommu_free(tbl, dma_handle, nio_pages);
size = PAGE_ALIGN(size);
free_pages((unsigned long)vaddr, get_order(size));
@@ -935,10 +935,10 @@ int iommu_tce_clear_param_check(struct iommu_table *tbl,
if (tce_value)
return -EINVAL;
- if (ioba & ~IOMMU_PAGE_MASK)
+ if (ioba & ~IOMMU_PAGE_MASK(tbl))
return -EINVAL;
- ioba >>= IOMMU_PAGE_SHIFT;
+ ioba >>= tbl->it_page_shift;
if (ioba < tbl->it_offset)
return -EINVAL;
@@ -955,13 +955,13 @@ int iommu_tce_put_param_check(struct iommu_table *tbl,
if (!(tce & (TCE_PCI_WRITE | TCE_PCI_READ)))
return -EINVAL;
- if (tce & ~(IOMMU_PAGE_MASK | TCE_PCI_WRITE | TCE_PCI_READ))
+ if (tce & ~(IOMMU_PAGE_MASK(tbl) | TCE_PCI_WRITE | TCE_PCI_READ))
return -EINVAL;
- if (ioba & ~IOMMU_PAGE_MASK)
+ if (ioba & ~IOMMU_PAGE_MASK(tbl))
return -EINVAL;
- ioba >>= IOMMU_PAGE_SHIFT;
+ ioba >>= tbl->it_page_shift;
if (ioba < tbl->it_offset)
return -EINVAL;
@@ -1037,7 +1037,7 @@ int iommu_tce_build(struct iommu_table *tbl, unsigned long entry,
/* if (unlikely(ret))
pr_err("iommu_tce: %s failed on hwaddr=%lx ioba=%lx kva=%lx ret=%d\n",
- __func__, hwaddr, entry << IOMMU_PAGE_SHIFT,
+ __func__, hwaddr, entry << IOMMU_PAGE_SHIFT(tbl),
hwaddr, ret); */
return ret;
@@ -1049,14 +1049,14 @@ int iommu_put_tce_user_mode(struct iommu_table *tbl, unsigned long entry,
{
int ret;
struct page *page = NULL;
- unsigned long hwaddr, offset = tce & IOMMU_PAGE_MASK & ~PAGE_MASK;
+ unsigned long hwaddr, offset = tce & IOMMU_PAGE_MASK(tbl) & ~PAGE_MASK;
enum dma_data_direction direction = iommu_tce_direction(tce);
ret = get_user_pages_fast(tce & PAGE_MASK, 1,
direction != DMA_TO_DEVICE, &page);
if (unlikely(ret != 1)) {
/* pr_err("iommu_tce: get_user_pages_fast failed tce=%lx ioba=%lx ret=%d\n",
- tce, entry << IOMMU_PAGE_SHIFT, ret); */
+ tce, entry << IOMMU_PAGE_SHIFT(tbl), ret); */
return -EFAULT;
}
hwaddr = (unsigned long) page_address(page) + offset;
@@ -1067,7 +1067,7 @@ int iommu_put_tce_user_mode(struct iommu_table *tbl, unsigned long entry,
if (ret < 0)
pr_err("iommu_tce: %s failed ioba=%lx, tce=%lx, ret=%d\n",
- __func__, entry << IOMMU_PAGE_SHIFT, tce, ret);
+ __func__, entry << tbl->it_page_shift, tce, ret);
return ret;
}
@@ -1105,7 +1105,7 @@ void iommu_release_ownership(struct iommu_table *tbl)
}
EXPORT_SYMBOL_GPL(iommu_release_ownership);
-static int iommu_add_device(struct device *dev)
+int iommu_add_device(struct device *dev)
{
struct iommu_table *tbl;
int ret = 0;
@@ -1127,6 +1127,12 @@ static int iommu_add_device(struct device *dev)
pr_debug("iommu_tce: adding %s to iommu group %d\n",
dev_name(dev), iommu_group_id(tbl->it_group));
+ if (PAGE_SIZE < IOMMU_PAGE_SIZE(tbl)) {
+ pr_err("iommu_tce: unsupported iommu page size.");
+ pr_err("%s has not been added\n", dev_name(dev));
+ return -EINVAL;
+ }
+
ret = iommu_group_add_device(tbl->it_group, dev);
if (ret < 0)
pr_err("iommu_tce: %s has not been added, ret=%d\n",
@@ -1134,52 +1140,12 @@ static int iommu_add_device(struct device *dev)
return ret;
}
+EXPORT_SYMBOL_GPL(iommu_add_device);
-static void iommu_del_device(struct device *dev)
+void iommu_del_device(struct device *dev)
{
iommu_group_remove_device(dev);
}
-
-static int iommu_bus_notifier(struct notifier_block *nb,
- unsigned long action, void *data)
-{
- struct device *dev = data;
-
- switch (action) {
- case BUS_NOTIFY_ADD_DEVICE:
- return iommu_add_device(dev);
- case BUS_NOTIFY_DEL_DEVICE:
- iommu_del_device(dev);
- return 0;
- default:
- return 0;
- }
-}
-
-static struct notifier_block tce_iommu_bus_nb = {
- .notifier_call = iommu_bus_notifier,
-};
-
-static int __init tce_iommu_init(void)
-{
- struct pci_dev *pdev = NULL;
-
- BUILD_BUG_ON(PAGE_SIZE < IOMMU_PAGE_SIZE);
-
- for_each_pci_dev(pdev)
- iommu_add_device(&pdev->dev);
-
- bus_register_notifier(&pci_bus_type, &tce_iommu_bus_nb);
- return 0;
-}
-
-subsys_initcall_sync(tce_iommu_init);
-
-#else
-
-void iommu_register_group(struct iommu_table *tbl,
- int pci_domain_number, unsigned long pe_num)
-{
-}
+EXPORT_SYMBOL_GPL(iommu_del_device);
#endif /* CONFIG_IOMMU_API */
diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
index ba0165615215..9729b23bfb0a 100644
--- a/arch/powerpc/kernel/irq.c
+++ b/arch/powerpc/kernel/irq.c
@@ -354,8 +354,13 @@ int arch_show_interrupts(struct seq_file *p, int prec)
seq_printf(p, "%*s: ", prec, "LOC");
for_each_online_cpu(j)
- seq_printf(p, "%10u ", per_cpu(irq_stat, j).timer_irqs);
- seq_printf(p, " Local timer interrupts\n");
+ seq_printf(p, "%10u ", per_cpu(irq_stat, j).timer_irqs_event);
+ seq_printf(p, " Local timer interrupts for timer event device\n");
+
+ seq_printf(p, "%*s: ", prec, "LOC");
+ for_each_online_cpu(j)
+ seq_printf(p, "%10u ", per_cpu(irq_stat, j).timer_irqs_others);
+ seq_printf(p, " Local timer interrupts for others\n");
seq_printf(p, "%*s: ", prec, "SPU");
for_each_online_cpu(j)
@@ -389,11 +394,12 @@ int arch_show_interrupts(struct seq_file *p, int prec)
*/
u64 arch_irq_stat_cpu(unsigned int cpu)
{
- u64 sum = per_cpu(irq_stat, cpu).timer_irqs;
+ u64 sum = per_cpu(irq_stat, cpu).timer_irqs_event;
sum += per_cpu(irq_stat, cpu).pmu_irqs;
sum += per_cpu(irq_stat, cpu).mce_exceptions;
sum += per_cpu(irq_stat, cpu).spurious_irqs;
+ sum += per_cpu(irq_stat, cpu).timer_irqs_others;
#ifdef CONFIG_PPC_DOORBELL
sum += per_cpu(irq_stat, cpu).doorbell_irqs;
#endif
diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c
new file mode 100644
index 000000000000..c0c52ec1fca7
--- /dev/null
+++ b/arch/powerpc/kernel/mce.c
@@ -0,0 +1,345 @@
+/*
+ * Machine check exception handling.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright 2013 IBM Corporation
+ * Author: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
+ */
+
+#undef DEBUG
+#define pr_fmt(fmt) "mce: " fmt
+
+#include <linux/types.h>
+#include <linux/ptrace.h>
+#include <linux/percpu.h>
+#include <linux/export.h>
+#include <asm/mce.h>
+
+static DEFINE_PER_CPU(int, mce_nest_count);
+static DEFINE_PER_CPU(struct machine_check_event[MAX_MC_EVT], mce_event);
+
+/* Queue for delayed MCE events. */
+static DEFINE_PER_CPU(int, mce_queue_count);
+static DEFINE_PER_CPU(struct machine_check_event[MAX_MC_EVT], mce_event_queue);
+
+static void mce_set_error_info(struct machine_check_event *mce,
+ struct mce_error_info *mce_err)
+{
+ mce->error_type = mce_err->error_type;
+ switch (mce_err->error_type) {
+ case MCE_ERROR_TYPE_UE:
+ mce->u.ue_error.ue_error_type = mce_err->u.ue_error_type;
+ break;
+ case MCE_ERROR_TYPE_SLB:
+ mce->u.slb_error.slb_error_type = mce_err->u.slb_error_type;
+ break;
+ case MCE_ERROR_TYPE_ERAT:
+ mce->u.erat_error.erat_error_type = mce_err->u.erat_error_type;
+ break;
+ case MCE_ERROR_TYPE_TLB:
+ mce->u.tlb_error.tlb_error_type = mce_err->u.tlb_error_type;
+ break;
+ case MCE_ERROR_TYPE_UNKNOWN:
+ default:
+ break;
+ }
+}
+
+/*
+ * Decode and save high level MCE information into per cpu buffer which
+ * is an array of machine_check_event structure.
+ */
+void save_mce_event(struct pt_regs *regs, long handled,
+ struct mce_error_info *mce_err,
+ uint64_t addr)
+{
+ uint64_t srr1;
+ int index = __get_cpu_var(mce_nest_count)++;
+ struct machine_check_event *mce = &__get_cpu_var(mce_event[index]);
+
+ /*
+ * Return if we don't have enough space to log mce event.
+ * mce_nest_count may go beyond MAX_MC_EVT but that's ok,
+ * the check below will stop buffer overrun.
+ */
+ if (index >= MAX_MC_EVT)
+ return;
+
+ /* Populate generic machine check info */
+ mce->version = MCE_V1;
+ mce->srr0 = regs->nip;
+ mce->srr1 = regs->msr;
+ mce->gpr3 = regs->gpr[3];
+ mce->in_use = 1;
+
+ mce->initiator = MCE_INITIATOR_CPU;
+ if (handled)
+ mce->disposition = MCE_DISPOSITION_RECOVERED;
+ else
+ mce->disposition = MCE_DISPOSITION_NOT_RECOVERED;
+ mce->severity = MCE_SEV_ERROR_SYNC;
+
+ srr1 = regs->msr;
+
+ /*
+ * Populate the mce error_type and type-specific error_type.
+ */
+ mce_set_error_info(mce, mce_err);
+
+ if (!addr)
+ return;
+
+ if (mce->error_type == MCE_ERROR_TYPE_TLB) {
+ mce->u.tlb_error.effective_address_provided = true;
+ mce->u.tlb_error.effective_address = addr;
+ } else if (mce->error_type == MCE_ERROR_TYPE_SLB) {
+ mce->u.slb_error.effective_address_provided = true;
+ mce->u.slb_error.effective_address = addr;
+ } else if (mce->error_type == MCE_ERROR_TYPE_ERAT) {
+ mce->u.erat_error.effective_address_provided = true;
+ mce->u.erat_error.effective_address = addr;
+ } else if (mce->error_type == MCE_ERROR_TYPE_UE) {
+ mce->u.ue_error.effective_address_provided = true;
+ mce->u.ue_error.effective_address = addr;
+ }
+ return;
+}
+
+/*
+ * get_mce_event:
+ * mce Pointer to machine_check_event structure to be filled.
+ * release Flag to indicate whether to free the event slot or not.
+ * 0 <= do not release the mce event. Caller will invoke
+ * release_mce_event() once event has been consumed.
+ * 1 <= release the slot.
+ *
+ * return 1 = success
+ * 0 = failure
+ *
+ * get_mce_event() will be called by platform specific machine check
+ * handle routine and in KVM.
+ * When we call get_mce_event(), we are still in interrupt context and
+ * preemption will not be scheduled until ret_from_expect() routine
+ * is called.
+ */
+int get_mce_event(struct machine_check_event *mce, bool release)
+{
+ int index = __get_cpu_var(mce_nest_count) - 1;
+ struct machine_check_event *mc_evt;
+ int ret = 0;
+
+ /* Sanity check */
+ if (index < 0)
+ return ret;
+
+ /* Check if we have MCE info to process. */
+ if (index < MAX_MC_EVT) {
+ mc_evt = &__get_cpu_var(mce_event[index]);
+ /* Copy the event structure and release the original */
+ if (mce)
+ *mce = *mc_evt;
+ if (release)
+ mc_evt->in_use = 0;
+ ret = 1;
+ }
+ /* Decrement the count to free the slot. */
+ if (release)
+ __get_cpu_var(mce_nest_count)--;
+
+ return ret;
+}
+
+void release_mce_event(void)
+{
+ get_mce_event(NULL, true);
+}
+
+/*
+ * Queue up the MCE event which then can be handled later.
+ */
+void machine_check_queue_event(void)
+{
+ int index;
+ struct machine_check_event evt;
+
+ if (!get_mce_event(&evt, MCE_EVENT_RELEASE))
+ return;
+
+ index = __get_cpu_var(mce_queue_count)++;
+ /* If queue is full, just return for now. */
+ if (index >= MAX_MC_EVT) {
+ __get_cpu_var(mce_queue_count)--;
+ return;
+ }
+ __get_cpu_var(mce_event_queue[index]) = evt;
+}
+
+/*
+ * process pending MCE event from the mce event queue. This function will be
+ * called during syscall exit.
+ */
+void machine_check_process_queued_event(void)
+{
+ int index;
+
+ preempt_disable();
+ /*
+ * For now just print it to console.
+ * TODO: log this error event to FSP or nvram.
+ */
+ while (__get_cpu_var(mce_queue_count) > 0) {
+ index = __get_cpu_var(mce_queue_count) - 1;
+ machine_check_print_event_info(
+ &__get_cpu_var(mce_event_queue[index]));
+ __get_cpu_var(mce_queue_count)--;
+ }
+ preempt_enable();
+}
+
+void machine_check_print_event_info(struct machine_check_event *evt)
+{
+ const char *level, *sevstr, *subtype;
+ static const char *mc_ue_types[] = {
+ "Indeterminate",
+ "Instruction fetch",
+ "Page table walk ifetch",
+ "Load/Store",
+ "Page table walk Load/Store",
+ };
+ static const char *mc_slb_types[] = {
+ "Indeterminate",
+ "Parity",
+ "Multihit",
+ };
+ static const char *mc_erat_types[] = {
+ "Indeterminate",
+ "Parity",
+ "Multihit",
+ };
+ static const char *mc_tlb_types[] = {
+ "Indeterminate",
+ "Parity",
+ "Multihit",
+ };
+
+ /* Print things out */
+ if (evt->version != MCE_V1) {
+ pr_err("Machine Check Exception, Unknown event version %d !\n",
+ evt->version);
+ return;
+ }
+ switch (evt->severity) {
+ case MCE_SEV_NO_ERROR:
+ level = KERN_INFO;
+ sevstr = "Harmless";
+ break;
+ case MCE_SEV_WARNING:
+ level = KERN_WARNING;
+ sevstr = "";
+ break;
+ case MCE_SEV_ERROR_SYNC:
+ level = KERN_ERR;
+ sevstr = "Severe";
+ break;
+ case MCE_SEV_FATAL:
+ default:
+ level = KERN_ERR;
+ sevstr = "Fatal";
+ break;
+ }
+
+ printk("%s%s Machine check interrupt [%s]\n", level, sevstr,
+ evt->disposition == MCE_DISPOSITION_RECOVERED ?
+ "Recovered" : "[Not recovered");
+ printk("%s Initiator: %s\n", level,
+ evt->initiator == MCE_INITIATOR_CPU ? "CPU" : "Unknown");
+ switch (evt->error_type) {
+ case MCE_ERROR_TYPE_UE:
+ subtype = evt->u.ue_error.ue_error_type <
+ ARRAY_SIZE(mc_ue_types) ?
+ mc_ue_types[evt->u.ue_error.ue_error_type]
+ : "Unknown";
+ printk("%s Error type: UE [%s]\n", level, subtype);
+ if (evt->u.ue_error.effective_address_provided)
+ printk("%s Effective address: %016llx\n",
+ level, evt->u.ue_error.effective_address);
+ if (evt->u.ue_error.physical_address_provided)
+ printk("%s Physial address: %016llx\n",
+ level, evt->u.ue_error.physical_address);
+ break;
+ case MCE_ERROR_TYPE_SLB:
+ subtype = evt->u.slb_error.slb_error_type <
+ ARRAY_SIZE(mc_slb_types) ?
+ mc_slb_types[evt->u.slb_error.slb_error_type]
+ : "Unknown";
+ printk("%s Error type: SLB [%s]\n", level, subtype);
+ if (evt->u.slb_error.effective_address_provided)
+ printk("%s Effective address: %016llx\n",
+ level, evt->u.slb_error.effective_address);
+ break;
+ case MCE_ERROR_TYPE_ERAT:
+ subtype = evt->u.erat_error.erat_error_type <
+ ARRAY_SIZE(mc_erat_types) ?
+ mc_erat_types[evt->u.erat_error.erat_error_type]
+ : "Unknown";
+ printk("%s Error type: ERAT [%s]\n", level, subtype);
+ if (evt->u.erat_error.effective_address_provided)
+ printk("%s Effective address: %016llx\n",
+ level, evt->u.erat_error.effective_address);
+ break;
+ case MCE_ERROR_TYPE_TLB:
+ subtype = evt->u.tlb_error.tlb_error_type <
+ ARRAY_SIZE(mc_tlb_types) ?
+ mc_tlb_types[evt->u.tlb_error.tlb_error_type]
+ : "Unknown";
+ printk("%s Error type: TLB [%s]\n", level, subtype);
+ if (evt->u.tlb_error.effective_address_provided)
+ printk("%s Effective address: %016llx\n",
+ level, evt->u.tlb_error.effective_address);
+ break;
+ default:
+ case MCE_ERROR_TYPE_UNKNOWN:
+ printk("%s Error type: Unknown\n", level);
+ break;
+ }
+}
+
+uint64_t get_mce_fault_addr(struct machine_check_event *evt)
+{
+ switch (evt->error_type) {
+ case MCE_ERROR_TYPE_UE:
+ if (evt->u.ue_error.effective_address_provided)
+ return evt->u.ue_error.effective_address;
+ break;
+ case MCE_ERROR_TYPE_SLB:
+ if (evt->u.slb_error.effective_address_provided)
+ return evt->u.slb_error.effective_address;
+ break;
+ case MCE_ERROR_TYPE_ERAT:
+ if (evt->u.erat_error.effective_address_provided)
+ return evt->u.erat_error.effective_address;
+ break;
+ case MCE_ERROR_TYPE_TLB:
+ if (evt->u.tlb_error.effective_address_provided)
+ return evt->u.tlb_error.effective_address;
+ break;
+ default:
+ case MCE_ERROR_TYPE_UNKNOWN:
+ break;
+ }
+ return 0;
+}
+EXPORT_SYMBOL(get_mce_fault_addr);
diff --git a/arch/powerpc/kernel/mce_power.c b/arch/powerpc/kernel/mce_power.c
new file mode 100644
index 000000000000..27c93f41166f
--- /dev/null
+++ b/arch/powerpc/kernel/mce_power.c
@@ -0,0 +1,284 @@
+/*
+ * Machine check exception handling CPU-side for power7 and power8
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright 2013 IBM Corporation
+ * Author: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
+ */
+
+#undef DEBUG
+#define pr_fmt(fmt) "mce_power: " fmt
+
+#include <linux/types.h>
+#include <linux/ptrace.h>
+#include <asm/mmu.h>
+#include <asm/mce.h>
+
+/* flush SLBs and reload */
+static void flush_and_reload_slb(void)
+{
+ struct slb_shadow *slb;
+ unsigned long i, n;
+
+ /* Invalidate all SLBs */
+ asm volatile("slbmte %0,%0; slbia" : : "r" (0));
+
+#ifdef CONFIG_KVM_BOOK3S_HANDLER
+ /*
+ * If machine check is hit when in guest or in transition, we will
+ * only flush the SLBs and continue.
+ */
+ if (get_paca()->kvm_hstate.in_guest)
+ return;
+#endif
+
+ /* For host kernel, reload the SLBs from shadow SLB buffer. */
+ slb = get_slb_shadow();
+ if (!slb)
+ return;
+
+ n = min_t(u32, be32_to_cpu(slb->persistent), SLB_MIN_SIZE);
+
+ /* Load up the SLB entries from shadow SLB */
+ for (i = 0; i < n; i++) {
+ unsigned long rb = be64_to_cpu(slb->save_area[i].esid);
+ unsigned long rs = be64_to_cpu(slb->save_area[i].vsid);
+
+ rb = (rb & ~0xFFFul) | i;
+ asm volatile("slbmte %0,%1" : : "r" (rs), "r" (rb));
+ }
+}
+
+static long mce_handle_derror(uint64_t dsisr, uint64_t slb_error_bits)
+{
+ long handled = 1;
+
+ /*
+ * flush and reload SLBs for SLB errors and flush TLBs for TLB errors.
+ * reset the error bits whenever we handle them so that at the end
+ * we can check whether we handled all of them or not.
+ * */
+ if (dsisr & slb_error_bits) {
+ flush_and_reload_slb();
+ /* reset error bits */
+ dsisr &= ~(slb_error_bits);
+ }
+ if (dsisr & P7_DSISR_MC_TLB_MULTIHIT_MFTLB) {
+ if (cur_cpu_spec && cur_cpu_spec->flush_tlb)
+ cur_cpu_spec->flush_tlb(TLBIEL_INVAL_PAGE);
+ /* reset error bits */
+ dsisr &= ~P7_DSISR_MC_TLB_MULTIHIT_MFTLB;
+ }
+ /* Any other errors we don't understand? */
+ if (dsisr & 0xffffffffUL)
+ handled = 0;
+
+ return handled;
+}
+
+static long mce_handle_derror_p7(uint64_t dsisr)
+{
+ return mce_handle_derror(dsisr, P7_DSISR_MC_SLB_ERRORS);
+}
+
+static long mce_handle_common_ierror(uint64_t srr1)
+{
+ long handled = 0;
+
+ switch (P7_SRR1_MC_IFETCH(srr1)) {
+ case 0:
+ break;
+ case P7_SRR1_MC_IFETCH_SLB_PARITY:
+ case P7_SRR1_MC_IFETCH_SLB_MULTIHIT:
+ /* flush and reload SLBs for SLB errors. */
+ flush_and_reload_slb();
+ handled = 1;
+ break;
+ case P7_SRR1_MC_IFETCH_TLB_MULTIHIT:
+ if (cur_cpu_spec && cur_cpu_spec->flush_tlb) {
+ cur_cpu_spec->flush_tlb(TLBIEL_INVAL_PAGE);
+ handled = 1;
+ }
+ break;
+ default:
+ break;
+ }
+
+ return handled;
+}
+
+static long mce_handle_ierror_p7(uint64_t srr1)
+{
+ long handled = 0;
+
+ handled = mce_handle_common_ierror(srr1);
+
+ if (P7_SRR1_MC_IFETCH(srr1) == P7_SRR1_MC_IFETCH_SLB_BOTH) {
+ flush_and_reload_slb();
+ handled = 1;
+ }
+ return handled;
+}
+
+static void mce_get_common_ierror(struct mce_error_info *mce_err, uint64_t srr1)
+{
+ switch (P7_SRR1_MC_IFETCH(srr1)) {
+ case P7_SRR1_MC_IFETCH_SLB_PARITY:
+ mce_err->error_type = MCE_ERROR_TYPE_SLB;
+ mce_err->u.slb_error_type = MCE_SLB_ERROR_PARITY;
+ break;
+ case P7_SRR1_MC_IFETCH_SLB_MULTIHIT:
+ mce_err->error_type = MCE_ERROR_TYPE_SLB;
+ mce_err->u.slb_error_type = MCE_SLB_ERROR_MULTIHIT;
+ break;
+ case P7_SRR1_MC_IFETCH_TLB_MULTIHIT:
+ mce_err->error_type = MCE_ERROR_TYPE_TLB;
+ mce_err->u.tlb_error_type = MCE_TLB_ERROR_MULTIHIT;
+ break;
+ case P7_SRR1_MC_IFETCH_UE:
+ case P7_SRR1_MC_IFETCH_UE_IFU_INTERNAL:
+ mce_err->error_type = MCE_ERROR_TYPE_UE;
+ mce_err->u.ue_error_type = MCE_UE_ERROR_IFETCH;
+ break;
+ case P7_SRR1_MC_IFETCH_UE_TLB_RELOAD:
+ mce_err->error_type = MCE_ERROR_TYPE_UE;
+ mce_err->u.ue_error_type =
+ MCE_UE_ERROR_PAGE_TABLE_WALK_IFETCH;
+ break;
+ }
+}
+
+static void mce_get_ierror_p7(struct mce_error_info *mce_err, uint64_t srr1)
+{
+ mce_get_common_ierror(mce_err, srr1);
+ if (P7_SRR1_MC_IFETCH(srr1) == P7_SRR1_MC_IFETCH_SLB_BOTH) {
+ mce_err->error_type = MCE_ERROR_TYPE_SLB;
+ mce_err->u.slb_error_type = MCE_SLB_ERROR_INDETERMINATE;
+ }
+}
+
+static void mce_get_derror_p7(struct mce_error_info *mce_err, uint64_t dsisr)
+{
+ if (dsisr & P7_DSISR_MC_UE) {
+ mce_err->error_type = MCE_ERROR_TYPE_UE;
+ mce_err->u.ue_error_type = MCE_UE_ERROR_LOAD_STORE;
+ } else if (dsisr & P7_DSISR_MC_UE_TABLEWALK) {
+ mce_err->error_type = MCE_ERROR_TYPE_UE;
+ mce_err->u.ue_error_type =
+ MCE_UE_ERROR_PAGE_TABLE_WALK_LOAD_STORE;
+ } else if (dsisr & P7_DSISR_MC_ERAT_MULTIHIT) {
+ mce_err->error_type = MCE_ERROR_TYPE_ERAT;
+ mce_err->u.erat_error_type = MCE_ERAT_ERROR_MULTIHIT;
+ } else if (dsisr & P7_DSISR_MC_SLB_MULTIHIT) {
+ mce_err->error_type = MCE_ERROR_TYPE_SLB;
+ mce_err->u.slb_error_type = MCE_SLB_ERROR_MULTIHIT;
+ } else if (dsisr & P7_DSISR_MC_SLB_PARITY_MFSLB) {
+ mce_err->error_type = MCE_ERROR_TYPE_SLB;
+ mce_err->u.slb_error_type = MCE_SLB_ERROR_PARITY;
+ } else if (dsisr & P7_DSISR_MC_TLB_MULTIHIT_MFTLB) {
+ mce_err->error_type = MCE_ERROR_TYPE_TLB;
+ mce_err->u.tlb_error_type = MCE_TLB_ERROR_MULTIHIT;
+ } else if (dsisr & P7_DSISR_MC_SLB_MULTIHIT_PARITY) {
+ mce_err->error_type = MCE_ERROR_TYPE_SLB;
+ mce_err->u.slb_error_type = MCE_SLB_ERROR_INDETERMINATE;
+ }
+}
+
+long __machine_check_early_realmode_p7(struct pt_regs *regs)
+{
+ uint64_t srr1, addr;
+ long handled = 1;
+ struct mce_error_info mce_error_info = { 0 };
+
+ srr1 = regs->msr;
+
+ /*
+ * Handle memory errors depending whether this was a load/store or
+ * ifetch exception. Also, populate the mce error_type and
+ * type-specific error_type from either SRR1 or DSISR, depending
+ * whether this was a load/store or ifetch exception
+ */
+ if (P7_SRR1_MC_LOADSTORE(srr1)) {
+ handled = mce_handle_derror_p7(regs->dsisr);
+ mce_get_derror_p7(&mce_error_info, regs->dsisr);
+ addr = regs->dar;
+ } else {
+ handled = mce_handle_ierror_p7(srr1);
+ mce_get_ierror_p7(&mce_error_info, srr1);
+ addr = regs->nip;
+ }
+
+ save_mce_event(regs, handled, &mce_error_info, addr);
+ return handled;
+}
+
+static void mce_get_ierror_p8(struct mce_error_info *mce_err, uint64_t srr1)
+{
+ mce_get_common_ierror(mce_err, srr1);
+ if (P7_SRR1_MC_IFETCH(srr1) == P8_SRR1_MC_IFETCH_ERAT_MULTIHIT) {
+ mce_err->error_type = MCE_ERROR_TYPE_ERAT;
+ mce_err->u.erat_error_type = MCE_ERAT_ERROR_MULTIHIT;
+ }
+}
+
+static void mce_get_derror_p8(struct mce_error_info *mce_err, uint64_t dsisr)
+{
+ mce_get_derror_p7(mce_err, dsisr);
+ if (dsisr & P8_DSISR_MC_ERAT_MULTIHIT_SEC) {
+ mce_err->error_type = MCE_ERROR_TYPE_ERAT;
+ mce_err->u.erat_error_type = MCE_ERAT_ERROR_MULTIHIT;
+ }
+}
+
+static long mce_handle_ierror_p8(uint64_t srr1)
+{
+ long handled = 0;
+
+ handled = mce_handle_common_ierror(srr1);
+
+ if (P7_SRR1_MC_IFETCH(srr1) == P8_SRR1_MC_IFETCH_ERAT_MULTIHIT) {
+ flush_and_reload_slb();
+ handled = 1;
+ }
+ return handled;
+}
+
+static long mce_handle_derror_p8(uint64_t dsisr)
+{
+ return mce_handle_derror(dsisr, P8_DSISR_MC_SLB_ERRORS);
+}
+
+long __machine_check_early_realmode_p8(struct pt_regs *regs)
+{
+ uint64_t srr1, addr;
+ long handled = 1;
+ struct mce_error_info mce_error_info = { 0 };
+
+ srr1 = regs->msr;
+
+ if (P7_SRR1_MC_LOADSTORE(srr1)) {
+ handled = mce_handle_derror_p8(regs->dsisr);
+ mce_get_derror_p8(&mce_error_info, regs->dsisr);
+ addr = regs->dar;
+ } else {
+ handled = mce_handle_ierror_p8(srr1);
+ mce_get_ierror_p8(&mce_error_info, srr1);
+ addr = regs->nip;
+ }
+
+ save_mce_event(regs, handled, &mce_error_info, addr);
+ return handled;
+}
diff --git a/arch/powerpc/kernel/misc_32.S b/arch/powerpc/kernel/misc_32.S
index e47d268727a4..879f09620f83 100644
--- a/arch/powerpc/kernel/misc_32.S
+++ b/arch/powerpc/kernel/misc_32.S
@@ -344,7 +344,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_UNIFIED_ID_CACHE)
*/
_KPROBE(flush_icache_range)
BEGIN_FTR_SECTION
- isync
+ PURGE_PREFETCHED_INS
blr /* for 601, do nothing */
END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE)
li r5,L1_CACHE_BYTES-1
@@ -448,6 +448,7 @@ _GLOBAL(invalidate_dcache_range)
*/
_GLOBAL(__flush_dcache_icache)
BEGIN_FTR_SECTION
+ PURGE_PREFETCHED_INS
blr
END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE)
rlwinm r3,r3,0,0,31-PAGE_SHIFT /* Get page base address */
@@ -489,6 +490,7 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_44x)
*/
_GLOBAL(__flush_dcache_icache_phys)
BEGIN_FTR_SECTION
+ PURGE_PREFETCHED_INS
blr /* for 601, do nothing */
END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE)
mfmsr r10
diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S
index 64bf8db12b15..3d0249599d52 100644
--- a/arch/powerpc/kernel/misc_64.S
+++ b/arch/powerpc/kernel/misc_64.S
@@ -67,6 +67,7 @@ PPC64_CACHES:
_KPROBE(flush_icache_range)
BEGIN_FTR_SECTION
+ PURGE_PREFETCHED_INS
blr
END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE)
/*
@@ -211,6 +212,11 @@ _GLOBAL(__flush_dcache_icache)
* Different systems have different cache line sizes
*/
+BEGIN_FTR_SECTION
+ PURGE_PREFETCHED_INS
+ blr
+END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE)
+
/* Flush the dcache */
ld r7,PPC64_CACHES@toc(r2)
clrrdi r3,r3,PAGE_SHIFT /* Page align */
diff --git a/arch/powerpc/kernel/paca.c b/arch/powerpc/kernel/paca.c
index 0620eaaaad45..623c356fe34f 100644
--- a/arch/powerpc/kernel/paca.c
+++ b/arch/powerpc/kernel/paca.c
@@ -99,12 +99,28 @@ static inline void free_lppacas(void) { }
* 3 persistent SLBs are registered here. The buffer will be zero
* initially, hence will all be invaild until we actually write them.
*/
-struct slb_shadow slb_shadow[] __cacheline_aligned = {
- [0 ... (NR_CPUS-1)] = {
- .persistent = cpu_to_be32(SLB_NUM_BOLTED),
- .buffer_length = cpu_to_be32(sizeof(struct slb_shadow)),
- },
-};
+static struct slb_shadow *slb_shadow;
+
+static void __init allocate_slb_shadows(int nr_cpus, int limit)
+{
+ int size = PAGE_ALIGN(sizeof(struct slb_shadow) * nr_cpus);
+ slb_shadow = __va(memblock_alloc_base(size, PAGE_SIZE, limit));
+ memset(slb_shadow, 0, size);
+}
+
+static struct slb_shadow * __init init_slb_shadow(int cpu)
+{
+ struct slb_shadow *s = &slb_shadow[cpu];
+
+ s->persistent = cpu_to_be32(SLB_NUM_BOLTED);
+ s->buffer_length = cpu_to_be32(sizeof(*s));
+
+ return s;
+}
+
+#else /* CONFIG_PPC_STD_MMU_64 */
+
+static void __init allocate_slb_shadows(int nr_cpus, int limit) { }
#endif /* CONFIG_PPC_STD_MMU_64 */
@@ -142,7 +158,7 @@ void __init initialise_paca(struct paca_struct *new_paca, int cpu)
new_paca->__current = &init_task;
new_paca->data_offset = 0xfeeeeeeeeeeeeeeeULL;
#ifdef CONFIG_PPC_STD_MMU_64
- new_paca->slb_shadow_ptr = &slb_shadow[cpu];
+ new_paca->slb_shadow_ptr = init_slb_shadow(cpu);
#endif /* CONFIG_PPC_STD_MMU_64 */
}
@@ -190,6 +206,8 @@ void __init allocate_pacas(void)
allocate_lppacas(nr_cpu_ids, limit);
+ allocate_slb_shadows(nr_cpu_ids, limit);
+
/* Can't use for_each_*_cpu, as they aren't functional yet */
for (cpu = 0; cpu < nr_cpu_ids; cpu++)
initialise_paca(&paca[cpu], cpu);
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index 4085aaa9478f..2232aff66059 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -520,9 +520,6 @@ static void __init irqstack_early_init(void)
#ifdef CONFIG_PPC_BOOK3E
static void __init exc_lvl_early_init(void)
{
- extern unsigned int interrupt_base_book3e;
- extern unsigned int exc_debug_debug_book3e;
-
unsigned int i;
for_each_possible_cpu(i) {
@@ -535,8 +532,7 @@ static void __init exc_lvl_early_init(void)
}
if (cpu_has_feature(CPU_FTR_DEBUG_LVL_EXC))
- patch_branch(&interrupt_base_book3e + (0x040 / 4) + 1,
- (unsigned long)&exc_debug_debug_book3e, 0);
+ patch_exception(0x040, exc_debug_debug_book3e);
}
#else
#define exc_lvl_early_init()
@@ -544,7 +540,8 @@ static void __init exc_lvl_early_init(void)
/*
* Stack space used when we detect a bad kernel stack pointer, and
- * early in SMP boots before relocation is enabled.
+ * early in SMP boots before relocation is enabled. Exclusive emergency
+ * stack for machine checks.
*/
static void __init emergency_stack_init(void)
{
@@ -567,6 +564,13 @@ static void __init emergency_stack_init(void)
sp = memblock_alloc_base(THREAD_SIZE, THREAD_SIZE, limit);
sp += THREAD_SIZE;
paca[i].emergency_sp = __va(sp);
+
+#ifdef CONFIG_PPC_BOOK3S_64
+ /* emergency stack for machine check exception handling. */
+ sp = memblock_alloc_base(THREAD_SIZE, THREAD_SIZE, limit);
+ sp += THREAD_SIZE;
+ paca[i].mc_emergency_sp = __va(sp);
+#endif
}
}
diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index c1cf4a1522d9..ac2621af3154 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -369,13 +369,8 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
cpumask_set_cpu(boot_cpuid, cpu_sibling_mask(boot_cpuid));
cpumask_set_cpu(boot_cpuid, cpu_core_mask(boot_cpuid));
- if (smp_ops)
- if (smp_ops->probe)
- max_cpus = smp_ops->probe();
- else
- max_cpus = NR_CPUS;
- else
- max_cpus = 1;
+ if (smp_ops && smp_ops->probe)
+ smp_ops->probe();
}
void smp_prepare_boot_cpu(void)
diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c
index b4e667663d9b..cad777eb613a 100644
--- a/arch/powerpc/kernel/sysfs.c
+++ b/arch/powerpc/kernel/sysfs.c
@@ -108,14 +108,14 @@ void ppc_enable_pmcs(void)
}
EXPORT_SYMBOL(ppc_enable_pmcs);
-#define SYSFS_PMCSETUP(NAME, ADDRESS) \
+#define __SYSFS_SPRSETUP(NAME, ADDRESS, EXTRA) \
static void read_##NAME(void *val) \
{ \
*(unsigned long *)val = mfspr(ADDRESS); \
} \
static void write_##NAME(void *val) \
{ \
- ppc_enable_pmcs(); \
+ EXTRA; \
mtspr(ADDRESS, *(unsigned long *)val); \
} \
static ssize_t show_##NAME(struct device *dev, \
@@ -140,6 +140,10 @@ static ssize_t __used \
return count; \
}
+#define SYSFS_PMCSETUP(NAME, ADDRESS) \
+ __SYSFS_SPRSETUP(NAME, ADDRESS, ppc_enable_pmcs())
+#define SYSFS_SPRSETUP(NAME, ADDRESS) \
+ __SYSFS_SPRSETUP(NAME, ADDRESS, )
/* Let's define all possible registers, we'll only hook up the ones
* that are implemented on the current processor
@@ -175,10 +179,10 @@ SYSFS_PMCSETUP(pmc7, SPRN_PMC7);
SYSFS_PMCSETUP(pmc8, SPRN_PMC8);
SYSFS_PMCSETUP(mmcra, SPRN_MMCRA);
-SYSFS_PMCSETUP(purr, SPRN_PURR);
-SYSFS_PMCSETUP(spurr, SPRN_SPURR);
-SYSFS_PMCSETUP(dscr, SPRN_DSCR);
-SYSFS_PMCSETUP(pir, SPRN_PIR);
+SYSFS_SPRSETUP(purr, SPRN_PURR);
+SYSFS_SPRSETUP(spurr, SPRN_SPURR);
+SYSFS_SPRSETUP(dscr, SPRN_DSCR);
+SYSFS_SPRSETUP(pir, SPRN_PIR);
/*
Lets only enable read for phyp resources and
@@ -249,34 +253,34 @@ SYSFS_PMCSETUP(pa6t_pmc3, SPRN_PA6T_PMC3);
SYSFS_PMCSETUP(pa6t_pmc4, SPRN_PA6T_PMC4);
SYSFS_PMCSETUP(pa6t_pmc5, SPRN_PA6T_PMC5);
#ifdef CONFIG_DEBUG_KERNEL
-SYSFS_PMCSETUP(hid0, SPRN_HID0);
-SYSFS_PMCSETUP(hid1, SPRN_HID1);
-SYSFS_PMCSETUP(hid4, SPRN_HID4);
-SYSFS_PMCSETUP(hid5, SPRN_HID5);
-SYSFS_PMCSETUP(ima0, SPRN_PA6T_IMA0);
-SYSFS_PMCSETUP(ima1, SPRN_PA6T_IMA1);
-SYSFS_PMCSETUP(ima2, SPRN_PA6T_IMA2);
-SYSFS_PMCSETUP(ima3, SPRN_PA6T_IMA3);
-SYSFS_PMCSETUP(ima4, SPRN_PA6T_IMA4);
-SYSFS_PMCSETUP(ima5, SPRN_PA6T_IMA5);
-SYSFS_PMCSETUP(ima6, SPRN_PA6T_IMA6);
-SYSFS_PMCSETUP(ima7, SPRN_PA6T_IMA7);
-SYSFS_PMCSETUP(ima8, SPRN_PA6T_IMA8);
-SYSFS_PMCSETUP(ima9, SPRN_PA6T_IMA9);
-SYSFS_PMCSETUP(imaat, SPRN_PA6T_IMAAT);
-SYSFS_PMCSETUP(btcr, SPRN_PA6T_BTCR);
-SYSFS_PMCSETUP(pccr, SPRN_PA6T_PCCR);
-SYSFS_PMCSETUP(rpccr, SPRN_PA6T_RPCCR);
-SYSFS_PMCSETUP(der, SPRN_PA6T_DER);
-SYSFS_PMCSETUP(mer, SPRN_PA6T_MER);
-SYSFS_PMCSETUP(ber, SPRN_PA6T_BER);
-SYSFS_PMCSETUP(ier, SPRN_PA6T_IER);
-SYSFS_PMCSETUP(sier, SPRN_PA6T_SIER);
-SYSFS_PMCSETUP(siar, SPRN_PA6T_SIAR);
-SYSFS_PMCSETUP(tsr0, SPRN_PA6T_TSR0);
-SYSFS_PMCSETUP(tsr1, SPRN_PA6T_TSR1);
-SYSFS_PMCSETUP(tsr2, SPRN_PA6T_TSR2);
-SYSFS_PMCSETUP(tsr3, SPRN_PA6T_TSR3);
+SYSFS_SPRSETUP(hid0, SPRN_HID0);
+SYSFS_SPRSETUP(hid1, SPRN_HID1);
+SYSFS_SPRSETUP(hid4, SPRN_HID4);
+SYSFS_SPRSETUP(hid5, SPRN_HID5);
+SYSFS_SPRSETUP(ima0, SPRN_PA6T_IMA0);
+SYSFS_SPRSETUP(ima1, SPRN_PA6T_IMA1);
+SYSFS_SPRSETUP(ima2, SPRN_PA6T_IMA2);
+SYSFS_SPRSETUP(ima3, SPRN_PA6T_IMA3);
+SYSFS_SPRSETUP(ima4, SPRN_PA6T_IMA4);
+SYSFS_SPRSETUP(ima5, SPRN_PA6T_IMA5);
+SYSFS_SPRSETUP(ima6, SPRN_PA6T_IMA6);
+SYSFS_SPRSETUP(ima7, SPRN_PA6T_IMA7);
+SYSFS_SPRSETUP(ima8, SPRN_PA6T_IMA8);
+SYSFS_SPRSETUP(ima9, SPRN_PA6T_IMA9);
+SYSFS_SPRSETUP(imaat, SPRN_PA6T_IMAAT);
+SYSFS_SPRSETUP(btcr, SPRN_PA6T_BTCR);
+SYSFS_SPRSETUP(pccr, SPRN_PA6T_PCCR);
+SYSFS_SPRSETUP(rpccr, SPRN_PA6T_RPCCR);
+SYSFS_SPRSETUP(der, SPRN_PA6T_DER);
+SYSFS_SPRSETUP(mer, SPRN_PA6T_MER);
+SYSFS_SPRSETUP(ber, SPRN_PA6T_BER);
+SYSFS_SPRSETUP(ier, SPRN_PA6T_IER);
+SYSFS_SPRSETUP(sier, SPRN_PA6T_SIER);
+SYSFS_SPRSETUP(siar, SPRN_PA6T_SIAR);
+SYSFS_SPRSETUP(tsr0, SPRN_PA6T_TSR0);
+SYSFS_SPRSETUP(tsr1, SPRN_PA6T_TSR1);
+SYSFS_SPRSETUP(tsr2, SPRN_PA6T_TSR2);
+SYSFS_SPRSETUP(tsr3, SPRN_PA6T_TSR3);
#endif /* CONFIG_DEBUG_KERNEL */
#endif /* HAS_PPC_PMC_PA6T */
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index b3b144121cc9..afb1b56ef4fa 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -510,7 +510,6 @@ void timer_interrupt(struct pt_regs * regs)
*/
may_hard_irq_enable();
- __get_cpu_var(irq_stat).timer_irqs++;
#if defined(CONFIG_PPC32) && defined(CONFIG_PMAC)
if (atomic_read(&ppc_n_lost_interrupts) != 0)
@@ -532,10 +531,12 @@ void timer_interrupt(struct pt_regs * regs)
*next_tb = ~(u64)0;
if (evt->event_handler)
evt->event_handler(evt);
+ __get_cpu_var(irq_stat).timer_irqs_event++;
} else {
now = *next_tb - now;
if (now <= DECREMENTER_MAX)
set_dec((int)now);
+ __get_cpu_var(irq_stat).timer_irqs_others++;
}
#ifdef CONFIG_PPC64
diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c
index 907a472f9a9e..330841766b09 100644
--- a/arch/powerpc/kernel/traps.c
+++ b/arch/powerpc/kernel/traps.c
@@ -285,6 +285,21 @@ void system_reset_exception(struct pt_regs *regs)
/* What should we do here? We could issue a shutdown or hard reset. */
}
+
+/*
+ * This function is called in real mode. Strictly no printk's please.
+ *
+ * regs->nip and regs->msr contains srr0 and ssr1.
+ */
+long machine_check_early(struct pt_regs *regs)
+{
+ long handled = 0;
+
+ if (cur_cpu_spec && cur_cpu_spec->machine_check_early)
+ handled = cur_cpu_spec->machine_check_early(regs);
+ return handled;
+}
+
#endif
/*
diff --git a/arch/powerpc/kernel/vio.c b/arch/powerpc/kernel/vio.c
index 76a64821f4a2..826d8bd9e522 100644
--- a/arch/powerpc/kernel/vio.c
+++ b/arch/powerpc/kernel/vio.c
@@ -518,16 +518,18 @@ static dma_addr_t vio_dma_iommu_map_page(struct device *dev, struct page *page,
struct dma_attrs *attrs)
{
struct vio_dev *viodev = to_vio_dev(dev);
+ struct iommu_table *tbl;
dma_addr_t ret = DMA_ERROR_CODE;
- if (vio_cmo_alloc(viodev, roundup(size, IOMMU_PAGE_SIZE))) {
+ tbl = get_iommu_table_base(dev);
+ if (vio_cmo_alloc(viodev, roundup(size, IOMMU_PAGE_SIZE(tbl)))) {
atomic_inc(&viodev->cmo.allocs_failed);
return ret;
}
ret = dma_iommu_ops.map_page(dev, page, offset, size, direction, attrs);
if (unlikely(dma_mapping_error(dev, ret))) {
- vio_cmo_dealloc(viodev, roundup(size, IOMMU_PAGE_SIZE));
+ vio_cmo_dealloc(viodev, roundup(size, IOMMU_PAGE_SIZE(tbl)));
atomic_inc(&viodev->cmo.allocs_failed);
}
@@ -540,10 +542,12 @@ static void vio_dma_iommu_unmap_page(struct device *dev, dma_addr_t dma_handle,
struct dma_attrs *attrs)
{
struct vio_dev *viodev = to_vio_dev(dev);
+ struct iommu_table *tbl;
+ tbl = get_iommu_table_base(dev);
dma_iommu_ops.unmap_page(dev, dma_handle, size, direction, attrs);
- vio_cmo_dealloc(viodev, roundup(size, IOMMU_PAGE_SIZE));
+ vio_cmo_dealloc(viodev, roundup(size, IOMMU_PAGE_SIZE(tbl)));
}
static int vio_dma_iommu_map_sg(struct device *dev, struct scatterlist *sglist,
@@ -551,12 +555,14 @@ static int vio_dma_iommu_map_sg(struct device *dev, struct scatterlist *sglist,
struct dma_attrs *attrs)
{
struct vio_dev *viodev = to_vio_dev(dev);
+ struct iommu_table *tbl;
struct scatterlist *sgl;
int ret, count = 0;
size_t alloc_size = 0;
+ tbl = get_iommu_table_base(dev);
for (sgl = sglist; count < nelems; count++, sgl++)
- alloc_size += roundup(sgl->length, IOMMU_PAGE_SIZE);
+ alloc_size += roundup(sgl->length, IOMMU_PAGE_SIZE(tbl));
if (vio_cmo_alloc(viodev, alloc_size)) {
atomic_inc(&viodev->cmo.allocs_failed);
@@ -572,7 +578,7 @@ static int vio_dma_iommu_map_sg(struct device *dev, struct scatterlist *sglist,
}
for (sgl = sglist, count = 0; count < ret; count++, sgl++)
- alloc_size -= roundup(sgl->dma_length, IOMMU_PAGE_SIZE);
+ alloc_size -= roundup(sgl->dma_length, IOMMU_PAGE_SIZE(tbl));
if (alloc_size)
vio_cmo_dealloc(viodev, alloc_size);
@@ -585,12 +591,14 @@ static void vio_dma_iommu_unmap_sg(struct device *dev,
struct dma_attrs *attrs)
{
struct vio_dev *viodev = to_vio_dev(dev);
+ struct iommu_table *tbl;
struct scatterlist *sgl;
size_t alloc_size = 0;
int count = 0;
+ tbl = get_iommu_table_base(dev);
for (sgl = sglist; count < nelems; count++, sgl++)
- alloc_size += roundup(sgl->dma_length, IOMMU_PAGE_SIZE);
+ alloc_size += roundup(sgl->dma_length, IOMMU_PAGE_SIZE(tbl));
dma_iommu_ops.unmap_sg(dev, sglist, nelems, direction, attrs);
@@ -706,11 +714,14 @@ static int vio_cmo_bus_probe(struct vio_dev *viodev)
{
struct vio_cmo_dev_entry *dev_ent;
struct device *dev = &viodev->dev;
+ struct iommu_table *tbl;
struct vio_driver *viodrv = to_vio_driver(dev->driver);
unsigned long flags;
size_t size;
bool dma_capable = false;
+ tbl = get_iommu_table_base(dev);
+
/* A device requires entitlement if it has a DMA window property */
switch (viodev->family) {
case VDEVICE:
@@ -736,7 +747,8 @@ static int vio_cmo_bus_probe(struct vio_dev *viodev)
return -EINVAL;
}
- viodev->cmo.desired = IOMMU_PAGE_ALIGN(viodrv->get_desired_dma(viodev));
+ viodev->cmo.desired =
+ IOMMU_PAGE_ALIGN(viodrv->get_desired_dma(viodev), tbl);
if (viodev->cmo.desired < VIO_CMO_MIN_ENT)
viodev->cmo.desired = VIO_CMO_MIN_ENT;
size = VIO_CMO_MIN_ENT;
@@ -1176,9 +1188,10 @@ static struct iommu_table *vio_build_iommu_table(struct vio_dev *dev)
&tbl->it_index, &offset, &size);
/* TCE table size - measured in tce entries */
- tbl->it_size = size >> IOMMU_PAGE_SHIFT;
+ tbl->it_page_shift = IOMMU_PAGE_SHIFT_4K;
+ tbl->it_size = size >> tbl->it_page_shift;
/* offset for VIO should always be 0 */
- tbl->it_offset = offset >> IOMMU_PAGE_SHIFT;
+ tbl->it_offset = offset >> tbl->it_page_shift;
tbl->it_busno = 0;
tbl->it_type = TCE_VB;
tbl->it_blocksize = 16;
diff --git a/arch/powerpc/kvm/book3s_hv_ras.c b/arch/powerpc/kvm/book3s_hv_ras.c
index a353c485808c..768a9f977c00 100644
--- a/arch/powerpc/kvm/book3s_hv_ras.c
+++ b/arch/powerpc/kvm/book3s_hv_ras.c
@@ -12,6 +12,7 @@
#include <linux/kvm_host.h>
#include <linux/kernel.h>
#include <asm/opal.h>
+#include <asm/mce.h>
/* SRR1 bits for machine check on POWER7 */
#define SRR1_MC_LDSTERR (1ul << (63-42))
@@ -58,18 +59,6 @@ static void reload_slb(struct kvm_vcpu *vcpu)
}
}
-/* POWER7 TLB flush */
-static void flush_tlb_power7(struct kvm_vcpu *vcpu)
-{
- unsigned long i, rb;
-
- rb = TLBIEL_INVAL_SET_LPID;
- for (i = 0; i < POWER7_TLB_SETS; ++i) {
- asm volatile("tlbiel %0" : : "r" (rb));
- rb += 1 << TLBIEL_INVAL_SET_SHIFT;
- }
-}
-
/*
* On POWER7, see if we can handle a machine check that occurred inside
* the guest in real mode, without switching to the host partition.
@@ -79,9 +68,7 @@ static void flush_tlb_power7(struct kvm_vcpu *vcpu)
static long kvmppc_realmode_mc_power7(struct kvm_vcpu *vcpu)
{
unsigned long srr1 = vcpu->arch.shregs.msr;
-#ifdef CONFIG_PPC_POWERNV
- struct opal_machine_check_event *opal_evt;
-#endif
+ struct machine_check_event mce_evt;
long handled = 1;
if (srr1 & SRR1_MC_LDSTERR) {
@@ -96,7 +83,8 @@ static long kvmppc_realmode_mc_power7(struct kvm_vcpu *vcpu)
DSISR_MC_SLB_PARITY | DSISR_MC_DERAT_MULTI);
}
if (dsisr & DSISR_MC_TLB_MULTI) {
- flush_tlb_power7(vcpu);
+ if (cur_cpu_spec && cur_cpu_spec->flush_tlb)
+ cur_cpu_spec->flush_tlb(TLBIEL_INVAL_SET_LPID);
dsisr &= ~DSISR_MC_TLB_MULTI;
}
/* Any other errors we don't understand? */
@@ -113,28 +101,38 @@ static long kvmppc_realmode_mc_power7(struct kvm_vcpu *vcpu)
reload_slb(vcpu);
break;
case SRR1_MC_IFETCH_TLBMULTI:
- flush_tlb_power7(vcpu);
+ if (cur_cpu_spec && cur_cpu_spec->flush_tlb)
+ cur_cpu_spec->flush_tlb(TLBIEL_INVAL_SET_LPID);
break;
default:
handled = 0;
}
-#ifdef CONFIG_PPC_POWERNV
/*
- * See if OPAL has already handled the condition.
- * We assume that if the condition is recovered then OPAL
+ * See if we have already handled the condition in the linux host.
+ * We assume that if the condition is recovered then linux host
* will have generated an error log event that we will pick
* up and log later.
+ * Don't release mce event now. In case if condition is not
+ * recovered we do guest exit and go back to linux host machine
+ * check handler. Hence we need make sure that current mce event
+ * is available for linux host to consume.
*/
- opal_evt = local_paca->opal_mc_evt;
- if (opal_evt->version == OpalMCE_V1 &&
- (opal_evt->severity == OpalMCE_SEV_NO_ERROR ||
- opal_evt->disposition == OpalMCE_DISPOSITION_RECOVERED))
+ if (!get_mce_event(&mce_evt, MCE_EVENT_DONTRELEASE))
+ goto out;
+
+ if (mce_evt.version == MCE_V1 &&
+ (mce_evt.severity == MCE_SEV_NO_ERROR ||
+ mce_evt.disposition == MCE_DISPOSITION_RECOVERED))
handled = 1;
+out:
+ /*
+ * If we have handled the error, then release the mce event because
+ * we will be delivering machine check to guest.
+ */
if (handled)
- opal_evt->in_use = 0;
-#endif
+ release_mce_event();
return handled;
}
diff --git a/arch/powerpc/lib/code-patching.c b/arch/powerpc/lib/code-patching.c
index 17e5b2364312..d5edbeb8eb82 100644
--- a/arch/powerpc/lib/code-patching.c
+++ b/arch/powerpc/lib/code-patching.c
@@ -159,6 +159,21 @@ unsigned int translate_branch(const unsigned int *dest, const unsigned int *src)
return 0;
}
+#ifdef CONFIG_PPC_BOOK3E_64
+void __patch_exception(int exc, unsigned long addr)
+{
+ extern unsigned int interrupt_base_book3e;
+ unsigned int *ibase = &interrupt_base_book3e;
+
+ /* Our exceptions vectors start with a NOP and -then- a branch
+ * to deal with single stepping from userspace which stops on
+ * the second instruction. Thus we need to patch the second
+ * instruction of the exception, not the first one
+ */
+
+ patch_branch(ibase + (exc / 4) + 1, addr, 0);
+}
+#endif
#ifdef CONFIG_CODE_PATCHING_SELFTEST
diff --git a/arch/powerpc/mm/hash_low_64.S b/arch/powerpc/mm/hash_low_64.S
index d3cbda62857b..1136d26a95ae 100644
--- a/arch/powerpc/mm/hash_low_64.S
+++ b/arch/powerpc/mm/hash_low_64.S
@@ -148,7 +148,10 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_1T_SEGMENT)
and r0,r0,r4 /* _PAGE_RW & _PAGE_DIRTY ->r0 bit 30*/
andc r0,r30,r0 /* r0 = pte & ~r0 */
rlwimi r3,r0,32-1,31,31 /* Insert result into PP lsb */
- ori r3,r3,HPTE_R_C /* Always add "C" bit for perf. */
+ /*
+ * Always add "C" bit for perf. Memory coherence is always enabled
+ */
+ ori r3,r3,HPTE_R_C | HPTE_R_M
/* We eventually do the icache sync here (maybe inline that
* code rather than call a C function...)
@@ -457,7 +460,10 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_1T_SEGMENT)
and r0,r0,r4 /* _PAGE_RW & _PAGE_DIRTY ->r0 bit 30*/
andc r0,r3,r0 /* r0 = pte & ~r0 */
rlwimi r3,r0,32-1,31,31 /* Insert result into PP lsb */
- ori r3,r3,HPTE_R_C /* Always add "C" bit for perf. */
+ /*
+ * Always add "C" bit for perf. Memory coherence is always enabled
+ */
+ ori r3,r3,HPTE_R_C | HPTE_R_M
/* We eventually do the icache sync here (maybe inline that
* code rather than call a C function...)
@@ -795,7 +801,10 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_1T_SEGMENT)
and r0,r0,r4 /* _PAGE_RW & _PAGE_DIRTY ->r0 bit 30*/
andc r0,r30,r0 /* r0 = pte & ~r0 */
rlwimi r3,r0,32-1,31,31 /* Insert result into PP lsb */
- ori r3,r3,HPTE_R_C /* Always add "C" bit for perf. */
+ /*
+ * Always add "C" bit for perf. Memory coherence is always enabled
+ */
+ ori r3,r3,HPTE_R_C | HPTE_R_M
/* We eventually do the icache sync here (maybe inline that
* code rather than call a C function...)
diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
index 6176b3cdf579..de6881259aef 100644
--- a/arch/powerpc/mm/hash_utils_64.c
+++ b/arch/powerpc/mm/hash_utils_64.c
@@ -169,9 +169,10 @@ static unsigned long htab_convert_pte_flags(unsigned long pteflags)
if ((pteflags & _PAGE_USER) && !((pteflags & _PAGE_RW) &&
(pteflags & _PAGE_DIRTY)))
rflags |= 1;
-
- /* Always add C */
- return rflags | HPTE_R_C;
+ /*
+ * Always add "C" bit for perf. Memory coherence is always enabled
+ */
+ return rflags | HPTE_R_C | HPTE_R_M;
}
int htab_bolt_mapping(unsigned long vstart, unsigned long vend,
diff --git a/arch/powerpc/mm/hugepage-hash64.c b/arch/powerpc/mm/hugepage-hash64.c
index 34de9e0cdc34..826893fcb3a7 100644
--- a/arch/powerpc/mm/hugepage-hash64.c
+++ b/arch/powerpc/mm/hugepage-hash64.c
@@ -127,7 +127,11 @@ repeat:
/* Add in WIMG bits */
rflags |= (new_pmd & (_PAGE_WRITETHRU | _PAGE_NO_CACHE |
- _PAGE_COHERENT | _PAGE_GUARDED));
+ _PAGE_GUARDED));
+ /*
+ * enable the memory coherence always
+ */
+ rflags |= HPTE_R_M;
/* Insert into the hash table, primary slot */
slot = ppc_md.hpte_insert(hpte_group, vpn, pa, rflags, 0,
diff --git a/arch/powerpc/mm/hugetlbpage-hash64.c b/arch/powerpc/mm/hugetlbpage-hash64.c
index 0b7fb6761015..a5bcf9301196 100644
--- a/arch/powerpc/mm/hugetlbpage-hash64.c
+++ b/arch/powerpc/mm/hugetlbpage-hash64.c
@@ -99,6 +99,10 @@ int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid,
/* Add in WIMG bits */
rflags |= (new_pte & (_PAGE_WRITETHRU | _PAGE_NO_CACHE |
_PAGE_COHERENT | _PAGE_GUARDED));
+ /*
+ * enable the memory coherence always
+ */
+ rflags |= HPTE_R_M;
slot = hpte_insert_repeating(hash, vpn, pa, rflags, 0,
mmu_psize, ssize);
diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c
index 841e0d00863c..ad90429bbd8b 100644
--- a/arch/powerpc/mm/pgtable.c
+++ b/arch/powerpc/mm/pgtable.c
@@ -174,7 +174,7 @@ void set_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep,
pte_t pte)
{
#ifdef CONFIG_DEBUG_VM
- WARN_ON(pte_present(*ptep));
+ WARN_ON(pte_val(*ptep) & _PAGE_PRESENT);
#endif
/* Note: mm->context.id might not yet have been assigned as
* this context might not have been activated yet when this
diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c
index 9d95786aa80f..02e8681fb865 100644
--- a/arch/powerpc/mm/pgtable_64.c
+++ b/arch/powerpc/mm/pgtable_64.c
@@ -687,7 +687,7 @@ void set_pmd_at(struct mm_struct *mm, unsigned long addr,
pmd_t *pmdp, pmd_t pmd)
{
#ifdef CONFIG_DEBUG_VM
- WARN_ON(!pmd_none(*pmdp));
+ WARN_ON(pmd_val(*pmdp) & _PAGE_PRESENT);
assert_spin_locked(&mm->page_table_lock);
WARN_ON(!pmd_trans_huge(pmd));
#endif
diff --git a/arch/powerpc/mm/tlb_nohash.c b/arch/powerpc/mm/tlb_nohash.c
index 358d74303138..8805b7b87dc6 100644
--- a/arch/powerpc/mm/tlb_nohash.c
+++ b/arch/powerpc/mm/tlb_nohash.c
@@ -518,25 +518,6 @@ static void setup_page_sizes(void)
}
}
-static void __patch_exception(int exc, unsigned long addr)
-{
- extern unsigned int interrupt_base_book3e;
- unsigned int *ibase = &interrupt_base_book3e;
-
- /* Our exceptions vectors start with a NOP and -then- a branch
- * to deal with single stepping from userspace which stops on
- * the second instruction. Thus we need to patch the second
- * instruction of the exception, not the first one
- */
-
- patch_branch(ibase + (exc / 4) + 1, addr, 0);
-}
-
-#define patch_exception(exc, name) do { \
- extern unsigned int name; \
- __patch_exception((exc), (unsigned long)&name); \
-} while (0)
-
static void setup_mmu_htw(void)
{
/* Check if HW tablewalk is present, and if yes, enable it by:
diff --git a/arch/powerpc/platforms/83xx/Kconfig b/arch/powerpc/platforms/83xx/Kconfig
index 670a033264c0..2bdc8c862c46 100644
--- a/arch/powerpc/platforms/83xx/Kconfig
+++ b/arch/powerpc/platforms/83xx/Kconfig
@@ -99,7 +99,6 @@ config SBC834x
config ASP834x
bool "Analogue & Micro ASP 834x"
select PPC_MPC834x
- select REDBOOT
help
This enables support for the Analogue & Micro ASP 83xx
board.
diff --git a/arch/powerpc/platforms/8xx/Kconfig b/arch/powerpc/platforms/8xx/Kconfig
index 8dec3c0911ad..bd6f1a1cf922 100644
--- a/arch/powerpc/platforms/8xx/Kconfig
+++ b/arch/powerpc/platforms/8xx/Kconfig
@@ -45,7 +45,6 @@ config PPC_EP88XC
config PPC_ADDER875
bool "Analogue & Micro Adder 875"
select CPM1
- select REDBOOT
help
This enables support for the Analogue & Micro Adder 875
board.
diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype
index bca2465a9c34..434fda39bf8b 100644
--- a/arch/powerpc/platforms/Kconfig.cputype
+++ b/arch/powerpc/platforms/Kconfig.cputype
@@ -72,6 +72,7 @@ config PPC_BOOK3S_64
select PPC_HAVE_PMU_SUPPORT
select SYS_SUPPORTS_HUGETLBFS
select HAVE_ARCH_TRANSPARENT_HUGEPAGE if PPC_64K_PAGES
+ select ARCH_SUPPORTS_NUMA_BALANCING
config PPC_BOOK3E_64
bool "Embedded processors"
diff --git a/arch/powerpc/platforms/cell/beat_htab.c b/arch/powerpc/platforms/cell/beat_htab.c
index c34ee4e60873..d4d245c0d787 100644
--- a/arch/powerpc/platforms/cell/beat_htab.c
+++ b/arch/powerpc/platforms/cell/beat_htab.c
@@ -111,7 +111,7 @@ static long beat_lpar_hpte_insert(unsigned long hpte_group,
DBG_LOW(" hpte_v=%016lx, hpte_r=%016lx\n", hpte_v, hpte_r);
if (rflags & _PAGE_NO_CACHE)
- hpte_r &= ~_PAGE_COHERENT;
+ hpte_r &= ~HPTE_R_M;
raw_spin_lock(&beat_htab_lock);
lpar_rc = beat_read_mask(hpte_group);
@@ -337,7 +337,7 @@ static long beat_lpar_hpte_insert_v3(unsigned long hpte_group,
DBG_LOW(" hpte_v=%016lx, hpte_r=%016lx\n", hpte_v, hpte_r);
if (rflags & _PAGE_NO_CACHE)
- hpte_r &= ~_PAGE_COHERENT;
+ hpte_r &= ~HPTE_R_M;
/* insert into not-volted entry */
lpar_rc = beat_insert_htab_entry3(0, hpte_group, hpte_v, hpte_r,
diff --git a/arch/powerpc/platforms/cell/iommu.c b/arch/powerpc/platforms/cell/iommu.c
index b53560660b72..2b90ff8a93be 100644
--- a/arch/powerpc/platforms/cell/iommu.c
+++ b/arch/powerpc/platforms/cell/iommu.c
@@ -197,7 +197,7 @@ static int tce_build_cell(struct iommu_table *tbl, long index, long npages,
io_pte = (unsigned long *)tbl->it_base + (index - tbl->it_offset);
- for (i = 0; i < npages; i++, uaddr += IOMMU_PAGE_SIZE)
+ for (i = 0; i < npages; i++, uaddr += tbl->it_page_shift)
io_pte[i] = base_pte | (__pa(uaddr) & CBE_IOPTE_RPN_Mask);
mb();
@@ -430,7 +430,7 @@ static void cell_iommu_setup_hardware(struct cbe_iommu *iommu,
{
cell_iommu_setup_stab(iommu, base, size, 0, 0);
iommu->ptab = cell_iommu_alloc_ptab(iommu, base, size, 0, 0,
- IOMMU_PAGE_SHIFT);
+ IOMMU_PAGE_SHIFT_4K);
cell_iommu_enable_hardware(iommu);
}
@@ -487,8 +487,10 @@ cell_iommu_setup_window(struct cbe_iommu *iommu, struct device_node *np,
window->table.it_blocksize = 16;
window->table.it_base = (unsigned long)iommu->ptab;
window->table.it_index = iommu->nid;
- window->table.it_offset = (offset >> IOMMU_PAGE_SHIFT) + pte_offset;
- window->table.it_size = size >> IOMMU_PAGE_SHIFT;
+ window->table.it_page_shift = IOMMU_PAGE_SHIFT_4K;
+ window->table.it_offset =
+ (offset >> window->table.it_page_shift) + pte_offset;
+ window->table.it_size = size >> window->table.it_page_shift;
iommu_init_table(&window->table, iommu->nid);
@@ -773,7 +775,7 @@ static void __init cell_iommu_init_one(struct device_node *np,
/* Setup the iommu_table */
cell_iommu_setup_window(iommu, np, base, size,
- offset >> IOMMU_PAGE_SHIFT);
+ offset >> IOMMU_PAGE_SHIFT_4K);
}
static void __init cell_disable_iommus(void)
@@ -1122,7 +1124,7 @@ static int __init cell_iommu_fixed_mapping_init(void)
cell_iommu_setup_stab(iommu, dbase, dsize, fbase, fsize);
iommu->ptab = cell_iommu_alloc_ptab(iommu, dbase, dsize, 0, 0,
- IOMMU_PAGE_SHIFT);
+ IOMMU_PAGE_SHIFT_4K);
cell_iommu_setup_fixed_ptab(iommu, np, dbase, dsize,
fbase, fsize);
cell_iommu_enable_hardware(iommu);
diff --git a/arch/powerpc/platforms/pasemi/iommu.c b/arch/powerpc/platforms/pasemi/iommu.c
index 7d2d036754b5..2e576f2ae442 100644
--- a/arch/powerpc/platforms/pasemi/iommu.c
+++ b/arch/powerpc/platforms/pasemi/iommu.c
@@ -138,8 +138,11 @@ static void iommu_table_iobmap_setup(void)
pr_debug(" -> %s\n", __func__);
iommu_table_iobmap.it_busno = 0;
iommu_table_iobmap.it_offset = 0;
+ iommu_table_iobmap.it_page_shift = IOBMAP_PAGE_SHIFT;
+
/* it_size is in number of entries */
- iommu_table_iobmap.it_size = 0x80000000 >> IOBMAP_PAGE_SHIFT;
+ iommu_table_iobmap.it_size =
+ 0x80000000 >> iommu_table_iobmap.it_page_shift;
/* Initialize the common IOMMU code */
iommu_table_iobmap.it_base = (unsigned long)iob_l2_base;
diff --git a/arch/powerpc/platforms/powernv/Kconfig b/arch/powerpc/platforms/powernv/Kconfig
index 9fced3f6d2dc..895e8a20a3fc 100644
--- a/arch/powerpc/platforms/powernv/Kconfig
+++ b/arch/powerpc/platforms/powernv/Kconfig
@@ -13,11 +13,6 @@ config PPC_POWERNV
select ARCH_RANDOM
default y
-config POWERNV_MSI
- bool "Support PCI MSI on PowerNV platform"
- depends on PCI_MSI
- default y
-
config PPC_POWERNV_RTAS
depends on PPC_POWERNV
bool "Support for RTAS based PowerNV platforms such as BML"
diff --git a/arch/powerpc/platforms/powernv/Makefile b/arch/powerpc/platforms/powernv/Makefile
index 873fa1370dc4..8d767fde5a6a 100644
--- a/arch/powerpc/platforms/powernv/Makefile
+++ b/arch/powerpc/platforms/powernv/Makefile
@@ -6,3 +6,4 @@ obj-$(CONFIG_SMP) += smp.o
obj-$(CONFIG_PCI) += pci.o pci-p5ioc2.o pci-ioda.o
obj-$(CONFIG_EEH) += eeh-ioda.o eeh-powernv.o
obj-$(CONFIG_PPC_SCOM) += opal-xscom.o
+obj-$(CONFIG_MEMORY_FAILURE) += opal-memory-errors.o
diff --git a/arch/powerpc/platforms/powernv/eeh-ioda.c b/arch/powerpc/platforms/powernv/eeh-ioda.c
index d7ddcee7feb8..9b1db254898a 100644
--- a/arch/powerpc/platforms/powernv/eeh-ioda.c
+++ b/arch/powerpc/platforms/powernv/eeh-ioda.c
@@ -578,11 +578,8 @@ static int ioda_eeh_get_log(struct eeh_pe *pe, int severity,
return -EIO;
}
- /*
- * FIXME: We probably need log the error in somewhere.
- * Lets make it up in future.
- */
- /* pr_info("%s", phb->diag.blob); */
+ /* The PHB diag-data is always indicative */
+ pnv_pci_dump_phb_diag_data(hose, phb->diag.blob);
spin_unlock_irqrestore(&phb->lock, flags);
@@ -670,143 +667,9 @@ static void ioda_eeh_hub_diag(struct pci_controller *hose)
}
}
-static void ioda_eeh_p7ioc_phb_diag(struct pci_controller *hose,
- struct OpalIoPhbErrorCommon *common)
-{
- struct OpalIoP7IOCPhbErrorData *data;
- int i;
-
- data = (struct OpalIoP7IOCPhbErrorData *)common;
-
- pr_info("P7IOC PHB#%x Diag-data (Version: %d)\n\n",
- hose->global_number, common->version);
-
- pr_info(" brdgCtl: %08x\n", data->brdgCtl);
-
- pr_info(" portStatusReg: %08x\n", data->portStatusReg);
- pr_info(" rootCmplxStatus: %08x\n", data->rootCmplxStatus);
- pr_info(" busAgentStatus: %08x\n", data->busAgentStatus);
-
- pr_info(" deviceStatus: %08x\n", data->deviceStatus);
- pr_info(" slotStatus: %08x\n", data->slotStatus);
- pr_info(" linkStatus: %08x\n", data->linkStatus);
- pr_info(" devCmdStatus: %08x\n", data->devCmdStatus);
- pr_info(" devSecStatus: %08x\n", data->devSecStatus);
-
- pr_info(" rootErrorStatus: %08x\n", data->rootErrorStatus);
- pr_info(" uncorrErrorStatus: %08x\n", data->uncorrErrorStatus);
- pr_info(" corrErrorStatus: %08x\n", data->corrErrorStatus);
- pr_info(" tlpHdr1: %08x\n", data->tlpHdr1);
- pr_info(" tlpHdr2: %08x\n", data->tlpHdr2);
- pr_info(" tlpHdr3: %08x\n", data->tlpHdr3);
- pr_info(" tlpHdr4: %08x\n", data->tlpHdr4);
- pr_info(" sourceId: %08x\n", data->sourceId);
-
- pr_info(" errorClass: %016llx\n", data->errorClass);
- pr_info(" correlator: %016llx\n", data->correlator);
- pr_info(" p7iocPlssr: %016llx\n", data->p7iocPlssr);
- pr_info(" p7iocCsr: %016llx\n", data->p7iocCsr);
- pr_info(" lemFir: %016llx\n", data->lemFir);
- pr_info(" lemErrorMask: %016llx\n", data->lemErrorMask);
- pr_info(" lemWOF: %016llx\n", data->lemWOF);
- pr_info(" phbErrorStatus: %016llx\n", data->phbErrorStatus);
- pr_info(" phbFirstErrorStatus: %016llx\n", data->phbFirstErrorStatus);
- pr_info(" phbErrorLog0: %016llx\n", data->phbErrorLog0);
- pr_info(" phbErrorLog1: %016llx\n", data->phbErrorLog1);
- pr_info(" mmioErrorStatus: %016llx\n", data->mmioErrorStatus);
- pr_info(" mmioFirstErrorStatus: %016llx\n", data->mmioFirstErrorStatus);
- pr_info(" mmioErrorLog0: %016llx\n", data->mmioErrorLog0);
- pr_info(" mmioErrorLog1: %016llx\n", data->mmioErrorLog1);
- pr_info(" dma0ErrorStatus: %016llx\n", data->dma0ErrorStatus);
- pr_info(" dma0FirstErrorStatus: %016llx\n", data->dma0FirstErrorStatus);
- pr_info(" dma0ErrorLog0: %016llx\n", data->dma0ErrorLog0);
- pr_info(" dma0ErrorLog1: %016llx\n", data->dma0ErrorLog1);
- pr_info(" dma1ErrorStatus: %016llx\n", data->dma1ErrorStatus);
- pr_info(" dma1FirstErrorStatus: %016llx\n", data->dma1FirstErrorStatus);
- pr_info(" dma1ErrorLog0: %016llx\n", data->dma1ErrorLog0);
- pr_info(" dma1ErrorLog1: %016llx\n", data->dma1ErrorLog1);
-
- for (i = 0; i < OPAL_P7IOC_NUM_PEST_REGS; i++) {
- if ((data->pestA[i] >> 63) == 0 &&
- (data->pestB[i] >> 63) == 0)
- continue;
-
- pr_info(" PE[%3d] PESTA: %016llx\n", i, data->pestA[i]);
- pr_info(" PESTB: %016llx\n", data->pestB[i]);
- }
-}
-
-static void ioda_eeh_phb3_phb_diag(struct pci_controller *hose,
- struct OpalIoPhbErrorCommon *common)
-{
- struct OpalIoPhb3ErrorData *data;
- int i;
-
- data = (struct OpalIoPhb3ErrorData*)common;
- pr_info("PHB3 PHB#%x Diag-data (Version: %d)\n\n",
- hose->global_number, common->version);
-
- pr_info(" brdgCtl: %08x\n", data->brdgCtl);
-
- pr_info(" portStatusReg: %08x\n", data->portStatusReg);
- pr_info(" rootCmplxStatus: %08x\n", data->rootCmplxStatus);
- pr_info(" busAgentStatus: %08x\n", data->busAgentStatus);
-
- pr_info(" deviceStatus: %08x\n", data->deviceStatus);
- pr_info(" slotStatus: %08x\n", data->slotStatus);
- pr_info(" linkStatus: %08x\n", data->linkStatus);
- pr_info(" devCmdStatus: %08x\n", data->devCmdStatus);
- pr_info(" devSecStatus: %08x\n", data->devSecStatus);
-
- pr_info(" rootErrorStatus: %08x\n", data->rootErrorStatus);
- pr_info(" uncorrErrorStatus: %08x\n", data->uncorrErrorStatus);
- pr_info(" corrErrorStatus: %08x\n", data->corrErrorStatus);
- pr_info(" tlpHdr1: %08x\n", data->tlpHdr1);
- pr_info(" tlpHdr2: %08x\n", data->tlpHdr2);
- pr_info(" tlpHdr3: %08x\n", data->tlpHdr3);
- pr_info(" tlpHdr4: %08x\n", data->tlpHdr4);
- pr_info(" sourceId: %08x\n", data->sourceId);
- pr_info(" errorClass: %016llx\n", data->errorClass);
- pr_info(" correlator: %016llx\n", data->correlator);
- pr_info(" nFir: %016llx\n", data->nFir);
- pr_info(" nFirMask: %016llx\n", data->nFirMask);
- pr_info(" nFirWOF: %016llx\n", data->nFirWOF);
- pr_info(" PhbPlssr: %016llx\n", data->phbPlssr);
- pr_info(" PhbCsr: %016llx\n", data->phbCsr);
- pr_info(" lemFir: %016llx\n", data->lemFir);
- pr_info(" lemErrorMask: %016llx\n", data->lemErrorMask);
- pr_info(" lemWOF: %016llx\n", data->lemWOF);
- pr_info(" phbErrorStatus: %016llx\n", data->phbErrorStatus);
- pr_info(" phbFirstErrorStatus: %016llx\n", data->phbFirstErrorStatus);
- pr_info(" phbErrorLog0: %016llx\n", data->phbErrorLog0);
- pr_info(" phbErrorLog1: %016llx\n", data->phbErrorLog1);
- pr_info(" mmioErrorStatus: %016llx\n", data->mmioErrorStatus);
- pr_info(" mmioFirstErrorStatus: %016llx\n", data->mmioFirstErrorStatus);
- pr_info(" mmioErrorLog0: %016llx\n", data->mmioErrorLog0);
- pr_info(" mmioErrorLog1: %016llx\n", data->mmioErrorLog1);
- pr_info(" dma0ErrorStatus: %016llx\n", data->dma0ErrorStatus);
- pr_info(" dma0FirstErrorStatus: %016llx\n", data->dma0FirstErrorStatus);
- pr_info(" dma0ErrorLog0: %016llx\n", data->dma0ErrorLog0);
- pr_info(" dma0ErrorLog1: %016llx\n", data->dma0ErrorLog1);
- pr_info(" dma1ErrorStatus: %016llx\n", data->dma1ErrorStatus);
- pr_info(" dma1FirstErrorStatus: %016llx\n", data->dma1FirstErrorStatus);
- pr_info(" dma1ErrorLog0: %016llx\n", data->dma1ErrorLog0);
- pr_info(" dma1ErrorLog1: %016llx\n", data->dma1ErrorLog1);
-
- for (i = 0; i < OPAL_PHB3_NUM_PEST_REGS; i++) {
- if ((data->pestA[i] >> 63) == 0 &&
- (data->pestB[i] >> 63) == 0)
- continue;
-
- pr_info(" PE[%3d] PESTA: %016llx\n", i, data->pestA[i]);
- pr_info(" PESTB: %016llx\n", data->pestB[i]);
- }
-}
-
static void ioda_eeh_phb_diag(struct pci_controller *hose)
{
struct pnv_phb *phb = hose->private_data;
- struct OpalIoPhbErrorCommon *common;
long rc;
rc = opal_pci_get_phb_diag_data2(phb->opal_id, phb->diag.blob,
@@ -817,18 +680,7 @@ static void ioda_eeh_phb_diag(struct pci_controller *hose)
return;
}
- common = (struct OpalIoPhbErrorCommon *)phb->diag.blob;
- switch (common->ioType) {
- case OPAL_PHB_ERROR_DATA_TYPE_P7IOC:
- ioda_eeh_p7ioc_phb_diag(hose, common);
- break;
- case OPAL_PHB_ERROR_DATA_TYPE_PHB3:
- ioda_eeh_phb3_phb_diag(hose, common);
- break;
- default:
- pr_warning("%s: Unrecognized I/O chip %d\n",
- __func__, common->ioType);
- }
+ pnv_pci_dump_phb_diag_data(hose, phb->diag.blob);
}
static int ioda_eeh_get_phb_pe(struct pci_controller *hose,
diff --git a/arch/powerpc/platforms/powernv/opal-flash.c b/arch/powerpc/platforms/powernv/opal-flash.c
index 6ffa6b1ec5b7..4aeae4f36e1d 100644
--- a/arch/powerpc/platforms/powernv/opal-flash.c
+++ b/arch/powerpc/platforms/powernv/opal-flash.c
@@ -103,27 +103,6 @@ struct image_header_t {
uint32_t size;
};
-/* Scatter/gather entry */
-struct opal_sg_entry {
- void *data;
- long length;
-};
-
-/* We calculate number of entries based on PAGE_SIZE */
-#define SG_ENTRIES_PER_NODE ((PAGE_SIZE - 16) / sizeof(struct opal_sg_entry))
-
-/*
- * This struct is very similar but not identical to that
- * needed by the opal flash update. All we need to do for
- * opal is rewrite num_entries into a version/length and
- * translate the pointers to absolute.
- */
-struct opal_sg_list {
- unsigned long num_entries;
- struct opal_sg_list *next;
- struct opal_sg_entry entry[SG_ENTRIES_PER_NODE];
-};
-
struct validate_flash_t {
int status; /* Return status */
void *buf; /* Candiate image buffer */
@@ -333,7 +312,7 @@ static struct opal_sg_list *image_data_to_sglist(void)
addr = image_data.data;
size = image_data.size;
- sg1 = kzalloc((sizeof(struct opal_sg_list)), GFP_KERNEL);
+ sg1 = kzalloc(PAGE_SIZE, GFP_KERNEL);
if (!sg1)
return NULL;
@@ -351,8 +330,7 @@ static struct opal_sg_list *image_data_to_sglist(void)
sg1->num_entries++;
if (sg1->num_entries >= SG_ENTRIES_PER_NODE) {
- sg1->next = kzalloc((sizeof(struct opal_sg_list)),
- GFP_KERNEL);
+ sg1->next = kzalloc(PAGE_SIZE, GFP_KERNEL);
if (!sg1->next) {
pr_err("%s : Failed to allocate memory\n",
__func__);
@@ -402,7 +380,10 @@ static int opal_flash_update(int op)
else
sg->next = NULL;
- /* Make num_entries into the version/length field */
+ /*
+ * Convert num_entries to version/length format
+ * to satisfy OPAL.
+ */
sg->num_entries = (SG_LIST_VERSION << 56) |
(sg->num_entries * sizeof(struct opal_sg_entry) + 16);
}
diff --git a/arch/powerpc/platforms/powernv/opal-memory-errors.c b/arch/powerpc/platforms/powernv/opal-memory-errors.c
new file mode 100644
index 000000000000..ec4132239cdf
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/opal-memory-errors.c
@@ -0,0 +1,146 @@
+/*
+ * OPAL asynchronus Memory error handling support in PowreNV.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright 2013 IBM Corporation
+ * Author: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
+ */
+
+#undef DEBUG
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/of.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+
+#include <asm/opal.h>
+#include <asm/cputable.h>
+
+static int opal_mem_err_nb_init;
+static LIST_HEAD(opal_memory_err_list);
+static DEFINE_SPINLOCK(opal_mem_err_lock);
+
+struct OpalMsgNode {
+ struct list_head list;
+ struct opal_msg msg;
+};
+
+static void handle_memory_error_event(struct OpalMemoryErrorData *merr_evt)
+{
+ uint64_t paddr_start, paddr_end;
+
+ pr_debug("%s: Retrived memory error event, type: 0x%x\n",
+ __func__, merr_evt->type);
+ switch (merr_evt->type) {
+ case OPAL_MEM_ERR_TYPE_RESILIENCE:
+ paddr_start = merr_evt->u.resilience.physical_address_start;
+ paddr_end = merr_evt->u.resilience.physical_address_end;
+ break;
+ case OPAL_MEM_ERR_TYPE_DYN_DALLOC:
+ paddr_start = merr_evt->u.dyn_dealloc.physical_address_start;
+ paddr_end = merr_evt->u.dyn_dealloc.physical_address_end;
+ break;
+ default:
+ return;
+ }
+
+ for (; paddr_start < paddr_end; paddr_start += PAGE_SIZE) {
+ memory_failure(paddr_start >> PAGE_SHIFT, 0, 0);
+ }
+}
+
+static void handle_memory_error(void)
+{
+ unsigned long flags;
+ struct OpalMemoryErrorData *merr_evt;
+ struct OpalMsgNode *msg_node;
+
+ spin_lock_irqsave(&opal_mem_err_lock, flags);
+ while (!list_empty(&opal_memory_err_list)) {
+ msg_node = list_entry(opal_memory_err_list.next,
+ struct OpalMsgNode, list);
+ list_del(&msg_node->list);
+ spin_unlock_irqrestore(&opal_mem_err_lock, flags);
+
+ merr_evt = (struct OpalMemoryErrorData *)
+ &msg_node->msg.params[0];
+ handle_memory_error_event(merr_evt);
+ kfree(msg_node);
+ spin_lock_irqsave(&opal_mem_err_lock, flags);
+ }
+ spin_unlock_irqrestore(&opal_mem_err_lock, flags);
+}
+
+static void mem_error_handler(struct work_struct *work)
+{
+ handle_memory_error();
+}
+
+static DECLARE_WORK(mem_error_work, mem_error_handler);
+
+/*
+ * opal_memory_err_event - notifier handler that queues up the opal message
+ * to be preocessed later.
+ */
+static int opal_memory_err_event(struct notifier_block *nb,
+ unsigned long msg_type, void *msg)
+{
+ unsigned long flags;
+ struct OpalMsgNode *msg_node;
+
+ if (msg_type != OPAL_MSG_MEM_ERR)
+ return 0;
+
+ msg_node = kzalloc(sizeof(*msg_node), GFP_ATOMIC);
+ if (!msg_node) {
+ pr_err("MEMORY_ERROR: out of memory, Opal message event not"
+ "handled\n");
+ return -ENOMEM;
+ }
+ memcpy(&msg_node->msg, msg, sizeof(struct opal_msg));
+
+ spin_lock_irqsave(&opal_mem_err_lock, flags);
+ list_add(&msg_node->list, &opal_memory_err_list);
+ spin_unlock_irqrestore(&opal_mem_err_lock, flags);
+
+ schedule_work(&mem_error_work);
+ return 0;
+}
+
+static struct notifier_block opal_mem_err_nb = {
+ .notifier_call = opal_memory_err_event,
+ .next = NULL,
+ .priority = 0,
+};
+
+static int __init opal_mem_err_init(void)
+{
+ int ret;
+
+ if (!opal_mem_err_nb_init) {
+ ret = opal_message_notifier_register(
+ OPAL_MSG_MEM_ERR, &opal_mem_err_nb);
+ if (ret) {
+ pr_err("%s: Can't register OPAL event notifier (%d)\n",
+ __func__, ret);
+ return ret;
+ }
+ opal_mem_err_nb_init = 1;
+ }
+ return 0;
+}
+subsys_initcall(opal_mem_err_init);
diff --git a/arch/powerpc/platforms/powernv/opal-rtc.c b/arch/powerpc/platforms/powernv/opal-rtc.c
index 7d07c7e80ec0..b1885db8fdf3 100644
--- a/arch/powerpc/platforms/powernv/opal-rtc.c
+++ b/arch/powerpc/platforms/powernv/opal-rtc.c
@@ -18,6 +18,7 @@
#include <asm/opal.h>
#include <asm/firmware.h>
+#include <asm/machdep.h>
static void opal_to_tm(u32 y_m_d, u64 h_m_s_ms, struct rtc_time *tm)
{
@@ -48,8 +49,11 @@ unsigned long __init opal_get_boot_time(void)
else
mdelay(10);
}
- if (rc != OPAL_SUCCESS)
+ if (rc != OPAL_SUCCESS) {
+ ppc_md.get_rtc_time = NULL;
+ ppc_md.set_rtc_time = NULL;
return 0;
+ }
y_m_d = be32_to_cpu(__y_m_d);
h_m_s_ms = be64_to_cpu(__h_m_s_ms);
opal_to_tm(y_m_d, h_m_s_ms, &tm);
diff --git a/arch/powerpc/platforms/powernv/opal-wrappers.S b/arch/powerpc/platforms/powernv/opal-wrappers.S
index e7806504e976..719aa5c325c6 100644
--- a/arch/powerpc/platforms/powernv/opal-wrappers.S
+++ b/arch/powerpc/platforms/powernv/opal-wrappers.S
@@ -126,3 +126,5 @@ OPAL_CALL(opal_return_cpu, OPAL_RETURN_CPU);
OPAL_CALL(opal_validate_flash, OPAL_FLASH_VALIDATE);
OPAL_CALL(opal_manage_flash, OPAL_FLASH_MANAGE);
OPAL_CALL(opal_update_flash, OPAL_FLASH_UPDATE);
+OPAL_CALL(opal_get_msg, OPAL_GET_MSG);
+OPAL_CALL(opal_check_completion, OPAL_CHECK_ASYNC_COMPLETION);
diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c
index 1c798cd55372..7a184a0ff183 100644
--- a/arch/powerpc/platforms/powernv/opal.c
+++ b/arch/powerpc/platforms/powernv/opal.c
@@ -18,9 +18,11 @@
#include <linux/interrupt.h>
#include <linux/notifier.h>
#include <linux/slab.h>
+#include <linux/sched.h>
#include <linux/kobject.h>
#include <asm/opal.h>
#include <asm/firmware.h>
+#include <asm/mce.h>
#include "powernv.h"
@@ -38,6 +40,7 @@ extern u64 opal_mc_secondary_handler[];
static unsigned int *opal_irqs;
static unsigned int opal_irq_count;
static ATOMIC_NOTIFIER_HEAD(opal_notifier_head);
+static struct atomic_notifier_head opal_msg_notifier_head[OPAL_MSG_TYPE_MAX];
static DEFINE_SPINLOCK(opal_notifier_lock);
static uint64_t last_notified_mask = 0x0ul;
static atomic_t opal_notifier_hold = ATOMIC_INIT(0);
@@ -88,14 +91,10 @@ static int __init opal_register_exception_handlers(void)
if (!(powerpc_firmware_features & FW_FEATURE_OPAL))
return -ENODEV;
- /* Hookup some exception handlers. We use the fwnmi area at 0x7000
- * to provide the glue space to OPAL
+ /* Hookup some exception handlers except machine check. We use the
+ * fwnmi area at 0x7000 to provide the glue space to OPAL
*/
glue = 0x7000;
- opal_register_exception_handler(OPAL_MACHINE_CHECK_HANDLER,
- __pa(opal_mc_secondary_handler[0]),
- glue);
- glue += 128;
opal_register_exception_handler(OPAL_HYPERVISOR_MAINTENANCE_HANDLER,
0, glue);
glue += 128;
@@ -169,6 +168,95 @@ void opal_notifier_disable(void)
atomic_set(&opal_notifier_hold, 1);
}
+/*
+ * Opal message notifier based on message type. Allow subscribers to get
+ * notified for specific messgae type.
+ */
+int opal_message_notifier_register(enum OpalMessageType msg_type,
+ struct notifier_block *nb)
+{
+ if (!nb) {
+ pr_warning("%s: Invalid argument (%p)\n",
+ __func__, nb);
+ return -EINVAL;
+ }
+ if (msg_type > OPAL_MSG_TYPE_MAX) {
+ pr_warning("%s: Invalid message type argument (%d)\n",
+ __func__, msg_type);
+ return -EINVAL;
+ }
+ return atomic_notifier_chain_register(
+ &opal_msg_notifier_head[msg_type], nb);
+}
+
+static void opal_message_do_notify(uint32_t msg_type, void *msg)
+{
+ /* notify subscribers */
+ atomic_notifier_call_chain(&opal_msg_notifier_head[msg_type],
+ msg_type, msg);
+}
+
+static void opal_handle_message(void)
+{
+ s64 ret;
+ /*
+ * TODO: pre-allocate a message buffer depending on opal-msg-size
+ * value in /proc/device-tree.
+ */
+ static struct opal_msg msg;
+
+ ret = opal_get_msg(__pa(&msg), sizeof(msg));
+ /* No opal message pending. */
+ if (ret == OPAL_RESOURCE)
+ return;
+
+ /* check for errors. */
+ if (ret) {
+ pr_warning("%s: Failed to retrive opal message, err=%lld\n",
+ __func__, ret);
+ return;
+ }
+
+ /* Sanity check */
+ if (msg.msg_type > OPAL_MSG_TYPE_MAX) {
+ pr_warning("%s: Unknown message type: %u\n",
+ __func__, msg.msg_type);
+ return;
+ }
+ opal_message_do_notify(msg.msg_type, (void *)&msg);
+}
+
+static int opal_message_notify(struct notifier_block *nb,
+ unsigned long events, void *change)
+{
+ if (events & OPAL_EVENT_MSG_PENDING)
+ opal_handle_message();
+ return 0;
+}
+
+static struct notifier_block opal_message_nb = {
+ .notifier_call = opal_message_notify,
+ .next = NULL,
+ .priority = 0,
+};
+
+static int __init opal_message_init(void)
+{
+ int ret, i;
+
+ for (i = 0; i < OPAL_MSG_TYPE_MAX; i++)
+ ATOMIC_INIT_NOTIFIER_HEAD(&opal_msg_notifier_head[i]);
+
+ ret = opal_notifier_register(&opal_message_nb);
+ if (ret) {
+ pr_err("%s: Can't register OPAL event notifier (%d)\n",
+ __func__, ret);
+ return ret;
+ }
+ return 0;
+}
+early_initcall(opal_message_init);
+
int opal_get_chars(uint32_t vtermno, char *buf, int count)
{
s64 rc;
@@ -254,119 +342,62 @@ int opal_put_chars(uint32_t vtermno, const char *data, int total_len)
return written;
}
+static int opal_recover_mce(struct pt_regs *regs,
+ struct machine_check_event *evt)
+{
+ int recovered = 0;
+ uint64_t ea = get_mce_fault_addr(evt);
+
+ if (!(regs->msr & MSR_RI)) {
+ /* If MSR_RI isn't set, we cannot recover */
+ recovered = 0;
+ } else if (evt->disposition == MCE_DISPOSITION_RECOVERED) {
+ /* Platform corrected itself */
+ recovered = 1;
+ } else if (ea && !is_kernel_addr(ea)) {
+ /*
+ * Faulting address is not in kernel text. We should be fine.
+ * We need to find which process uses this address.
+ * For now, kill the task if we have received exception when
+ * in userspace.
+ *
+ * TODO: Queue up this address for hwpoisioning later.
+ */
+ if (user_mode(regs) && !is_global_init(current)) {
+ _exception(SIGBUS, regs, BUS_MCEERR_AR, regs->nip);
+ recovered = 1;
+ } else
+ recovered = 0;
+ } else if (user_mode(regs) && !is_global_init(current) &&
+ evt->severity == MCE_SEV_ERROR_SYNC) {
+ /*
+ * If we have received a synchronous error when in userspace
+ * kill the task.
+ */
+ _exception(SIGBUS, regs, BUS_MCEERR_AR, regs->nip);
+ recovered = 1;
+ }
+ return recovered;
+}
+
int opal_machine_check(struct pt_regs *regs)
{
- struct opal_machine_check_event *opal_evt = get_paca()->opal_mc_evt;
- struct opal_machine_check_event evt;
- const char *level, *sevstr, *subtype;
- static const char *opal_mc_ue_types[] = {
- "Indeterminate",
- "Instruction fetch",
- "Page table walk ifetch",
- "Load/Store",
- "Page table walk Load/Store",
- };
- static const char *opal_mc_slb_types[] = {
- "Indeterminate",
- "Parity",
- "Multihit",
- };
- static const char *opal_mc_erat_types[] = {
- "Indeterminate",
- "Parity",
- "Multihit",
- };
- static const char *opal_mc_tlb_types[] = {
- "Indeterminate",
- "Parity",
- "Multihit",
- };
-
- /* Copy the event structure and release the original */
- evt = *opal_evt;
- opal_evt->in_use = 0;
+ struct machine_check_event evt;
+
+ if (!get_mce_event(&evt, MCE_EVENT_RELEASE))
+ return 0;
/* Print things out */
- if (evt.version != OpalMCE_V1) {
+ if (evt.version != MCE_V1) {
pr_err("Machine Check Exception, Unknown event version %d !\n",
evt.version);
return 0;
}
- switch(evt.severity) {
- case OpalMCE_SEV_NO_ERROR:
- level = KERN_INFO;
- sevstr = "Harmless";
- break;
- case OpalMCE_SEV_WARNING:
- level = KERN_WARNING;
- sevstr = "";
- break;
- case OpalMCE_SEV_ERROR_SYNC:
- level = KERN_ERR;
- sevstr = "Severe";
- break;
- case OpalMCE_SEV_FATAL:
- default:
- level = KERN_ERR;
- sevstr = "Fatal";
- break;
- }
+ machine_check_print_event_info(&evt);
- printk("%s%s Machine check interrupt [%s]\n", level, sevstr,
- evt.disposition == OpalMCE_DISPOSITION_RECOVERED ?
- "Recovered" : "[Not recovered");
- printk("%s Initiator: %s\n", level,
- evt.initiator == OpalMCE_INITIATOR_CPU ? "CPU" : "Unknown");
- switch(evt.error_type) {
- case OpalMCE_ERROR_TYPE_UE:
- subtype = evt.u.ue_error.ue_error_type <
- ARRAY_SIZE(opal_mc_ue_types) ?
- opal_mc_ue_types[evt.u.ue_error.ue_error_type]
- : "Unknown";
- printk("%s Error type: UE [%s]\n", level, subtype);
- if (evt.u.ue_error.effective_address_provided)
- printk("%s Effective address: %016llx\n",
- level, evt.u.ue_error.effective_address);
- if (evt.u.ue_error.physical_address_provided)
- printk("%s Physial address: %016llx\n",
- level, evt.u.ue_error.physical_address);
- break;
- case OpalMCE_ERROR_TYPE_SLB:
- subtype = evt.u.slb_error.slb_error_type <
- ARRAY_SIZE(opal_mc_slb_types) ?
- opal_mc_slb_types[evt.u.slb_error.slb_error_type]
- : "Unknown";
- printk("%s Error type: SLB [%s]\n", level, subtype);
- if (evt.u.slb_error.effective_address_provided)
- printk("%s Effective address: %016llx\n",
- level, evt.u.slb_error.effective_address);
- break;
- case OpalMCE_ERROR_TYPE_ERAT:
- subtype = evt.u.erat_error.erat_error_type <
- ARRAY_SIZE(opal_mc_erat_types) ?
- opal_mc_erat_types[evt.u.erat_error.erat_error_type]
- : "Unknown";
- printk("%s Error type: ERAT [%s]\n", level, subtype);
- if (evt.u.erat_error.effective_address_provided)
- printk("%s Effective address: %016llx\n",
- level, evt.u.erat_error.effective_address);
- break;
- case OpalMCE_ERROR_TYPE_TLB:
- subtype = evt.u.tlb_error.tlb_error_type <
- ARRAY_SIZE(opal_mc_tlb_types) ?
- opal_mc_tlb_types[evt.u.tlb_error.tlb_error_type]
- : "Unknown";
- printk("%s Error type: TLB [%s]\n", level, subtype);
- if (evt.u.tlb_error.effective_address_provided)
- printk("%s Effective address: %016llx\n",
- level, evt.u.tlb_error.effective_address);
- break;
- default:
- case OpalMCE_ERROR_TYPE_UNKNOWN:
- printk("%s Error type: Unknown\n", level);
- break;
- }
- return evt.severity == OpalMCE_SEV_FATAL ? 0 : 1;
+ if (opal_recover_mce(regs, &evt))
+ return 1;
+ return 0;
}
static irqreturn_t opal_interrupt(int irq, void *data)
diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
index 2c6d173842b2..f0e68714696e 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -460,7 +460,7 @@ static void pnv_pci_ioda_dma_dev_setup(struct pnv_phb *phb, struct pci_dev *pdev
return;
pe = &phb->ioda.pe_array[pdn->pe_number];
- set_iommu_table_base(&pdev->dev, &pe->tce32_table);
+ set_iommu_table_base_and_group(&pdev->dev, &pe->tce32_table);
}
static void pnv_ioda_setup_bus_dma(struct pnv_ioda_pe *pe, struct pci_bus *bus)
@@ -468,7 +468,7 @@ static void pnv_ioda_setup_bus_dma(struct pnv_ioda_pe *pe, struct pci_bus *bus)
struct pci_dev *dev;
list_for_each_entry(dev, &bus->devices, bus_list) {
- set_iommu_table_base(&dev->dev, &pe->tce32_table);
+ set_iommu_table_base_and_group(&dev->dev, &pe->tce32_table);
if (dev->subordinate)
pnv_ioda_setup_bus_dma(pe, dev->subordinate);
}
@@ -644,7 +644,7 @@ static void pnv_pci_ioda_setup_dma_pe(struct pnv_phb *phb,
iommu_register_group(tbl, pci_domain_nr(pe->pbus), pe->pe_number);
if (pe->pdev)
- set_iommu_table_base(&pe->pdev->dev, tbl);
+ set_iommu_table_base_and_group(&pe->pdev->dev, tbl);
else
pnv_ioda_setup_bus_dma(pe, pe->pbus);
@@ -723,7 +723,7 @@ static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb,
iommu_register_group(tbl, pci_domain_nr(pe->pbus), pe->pe_number);
if (pe->pdev)
- set_iommu_table_base(&pe->pdev->dev, tbl);
+ set_iommu_table_base_and_group(&pe->pdev->dev, tbl);
else
pnv_ioda_setup_bus_dma(pe, pe->pbus);
diff --git a/arch/powerpc/platforms/powernv/pci-p5ioc2.c b/arch/powerpc/platforms/powernv/pci-p5ioc2.c
index f8b4bd8afb2e..e3807d69393e 100644
--- a/arch/powerpc/platforms/powernv/pci-p5ioc2.c
+++ b/arch/powerpc/platforms/powernv/pci-p5ioc2.c
@@ -92,7 +92,7 @@ static void pnv_pci_p5ioc2_dma_dev_setup(struct pnv_phb *phb,
pci_domain_nr(phb->hose->bus), phb->opal_id);
}
- set_iommu_table_base(&pdev->dev, &phb->p5ioc2.iommu_table);
+ set_iommu_table_base_and_group(&pdev->dev, &phb->p5ioc2.iommu_table);
}
static void __init pnv_pci_init_p5ioc2_phb(struct device_node *np, u64 hub_id,
diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c
index 4eb33a9ed532..b555ebc57ef5 100644
--- a/arch/powerpc/platforms/powernv/pci.c
+++ b/arch/powerpc/platforms/powernv/pci.c
@@ -124,77 +124,157 @@ static void pnv_teardown_msi_irqs(struct pci_dev *pdev)
}
#endif /* CONFIG_PCI_MSI */
-static void pnv_pci_dump_p7ioc_diag_data(struct pnv_phb *phb)
+static void pnv_pci_dump_p7ioc_diag_data(struct pci_controller *hose,
+ struct OpalIoPhbErrorCommon *common)
{
- struct OpalIoP7IOCPhbErrorData *data = &phb->diag.p7ioc;
+ struct OpalIoP7IOCPhbErrorData *data;
int i;
- pr_info("PHB %d diagnostic data:\n", phb->hose->global_number);
-
- pr_info(" brdgCtl = 0x%08x\n", data->brdgCtl);
-
- pr_info(" portStatusReg = 0x%08x\n", data->portStatusReg);
- pr_info(" rootCmplxStatus = 0x%08x\n", data->rootCmplxStatus);
- pr_info(" busAgentStatus = 0x%08x\n", data->busAgentStatus);
-
- pr_info(" deviceStatus = 0x%08x\n", data->deviceStatus);
- pr_info(" slotStatus = 0x%08x\n", data->slotStatus);
- pr_info(" linkStatus = 0x%08x\n", data->linkStatus);
- pr_info(" devCmdStatus = 0x%08x\n", data->devCmdStatus);
- pr_info(" devSecStatus = 0x%08x\n", data->devSecStatus);
-
- pr_info(" rootErrorStatus = 0x%08x\n", data->rootErrorStatus);
- pr_info(" uncorrErrorStatus = 0x%08x\n", data->uncorrErrorStatus);
- pr_info(" corrErrorStatus = 0x%08x\n", data->corrErrorStatus);
- pr_info(" tlpHdr1 = 0x%08x\n", data->tlpHdr1);
- pr_info(" tlpHdr2 = 0x%08x\n", data->tlpHdr2);
- pr_info(" tlpHdr3 = 0x%08x\n", data->tlpHdr3);
- pr_info(" tlpHdr4 = 0x%08x\n", data->tlpHdr4);
- pr_info(" sourceId = 0x%08x\n", data->sourceId);
-
- pr_info(" errorClass = 0x%016llx\n", data->errorClass);
- pr_info(" correlator = 0x%016llx\n", data->correlator);
-
- pr_info(" p7iocPlssr = 0x%016llx\n", data->p7iocPlssr);
- pr_info(" p7iocCsr = 0x%016llx\n", data->p7iocCsr);
- pr_info(" lemFir = 0x%016llx\n", data->lemFir);
- pr_info(" lemErrorMask = 0x%016llx\n", data->lemErrorMask);
- pr_info(" lemWOF = 0x%016llx\n", data->lemWOF);
- pr_info(" phbErrorStatus = 0x%016llx\n", data->phbErrorStatus);
- pr_info(" phbFirstErrorStatus = 0x%016llx\n", data->phbFirstErrorStatus);
- pr_info(" phbErrorLog0 = 0x%016llx\n", data->phbErrorLog0);
- pr_info(" phbErrorLog1 = 0x%016llx\n", data->phbErrorLog1);
- pr_info(" mmioErrorStatus = 0x%016llx\n", data->mmioErrorStatus);
- pr_info(" mmioFirstErrorStatus = 0x%016llx\n", data->mmioFirstErrorStatus);
- pr_info(" mmioErrorLog0 = 0x%016llx\n", data->mmioErrorLog0);
- pr_info(" mmioErrorLog1 = 0x%016llx\n", data->mmioErrorLog1);
- pr_info(" dma0ErrorStatus = 0x%016llx\n", data->dma0ErrorStatus);
- pr_info(" dma0FirstErrorStatus = 0x%016llx\n", data->dma0FirstErrorStatus);
- pr_info(" dma0ErrorLog0 = 0x%016llx\n", data->dma0ErrorLog0);
- pr_info(" dma0ErrorLog1 = 0x%016llx\n", data->dma0ErrorLog1);
- pr_info(" dma1ErrorStatus = 0x%016llx\n", data->dma1ErrorStatus);
- pr_info(" dma1FirstErrorStatus = 0x%016llx\n", data->dma1FirstErrorStatus);
- pr_info(" dma1ErrorLog0 = 0x%016llx\n", data->dma1ErrorLog0);
- pr_info(" dma1ErrorLog1 = 0x%016llx\n", data->dma1ErrorLog1);
+ data = (struct OpalIoP7IOCPhbErrorData *)common;
+ pr_info("P7IOC PHB#%d Diag-data (Version: %d)\n\n",
+ hose->global_number, common->version);
+
+ pr_info(" brdgCtl: %08x\n", data->brdgCtl);
+
+ pr_info(" portStatusReg: %08x\n", data->portStatusReg);
+ pr_info(" rootCmplxStatus: %08x\n", data->rootCmplxStatus);
+ pr_info(" busAgentStatus: %08x\n", data->busAgentStatus);
+
+ pr_info(" deviceStatus: %08x\n", data->deviceStatus);
+ pr_info(" slotStatus: %08x\n", data->slotStatus);
+ pr_info(" linkStatus: %08x\n", data->linkStatus);
+ pr_info(" devCmdStatus: %08x\n", data->devCmdStatus);
+ pr_info(" devSecStatus: %08x\n", data->devSecStatus);
+
+ pr_info(" rootErrorStatus: %08x\n", data->rootErrorStatus);
+ pr_info(" uncorrErrorStatus: %08x\n", data->uncorrErrorStatus);
+ pr_info(" corrErrorStatus: %08x\n", data->corrErrorStatus);
+ pr_info(" tlpHdr1: %08x\n", data->tlpHdr1);
+ pr_info(" tlpHdr2: %08x\n", data->tlpHdr2);
+ pr_info(" tlpHdr3: %08x\n", data->tlpHdr3);
+ pr_info(" tlpHdr4: %08x\n", data->tlpHdr4);
+ pr_info(" sourceId: %08x\n", data->sourceId);
+ pr_info(" errorClass: %016llx\n", data->errorClass);
+ pr_info(" correlator: %016llx\n", data->correlator);
+ pr_info(" p7iocPlssr: %016llx\n", data->p7iocPlssr);
+ pr_info(" p7iocCsr: %016llx\n", data->p7iocCsr);
+ pr_info(" lemFir: %016llx\n", data->lemFir);
+ pr_info(" lemErrorMask: %016llx\n", data->lemErrorMask);
+ pr_info(" lemWOF: %016llx\n", data->lemWOF);
+ pr_info(" phbErrorStatus: %016llx\n", data->phbErrorStatus);
+ pr_info(" phbFirstErrorStatus: %016llx\n", data->phbFirstErrorStatus);
+ pr_info(" phbErrorLog0: %016llx\n", data->phbErrorLog0);
+ pr_info(" phbErrorLog1: %016llx\n", data->phbErrorLog1);
+ pr_info(" mmioErrorStatus: %016llx\n", data->mmioErrorStatus);
+ pr_info(" mmioFirstErrorStatus: %016llx\n", data->mmioFirstErrorStatus);
+ pr_info(" mmioErrorLog0: %016llx\n", data->mmioErrorLog0);
+ pr_info(" mmioErrorLog1: %016llx\n", data->mmioErrorLog1);
+ pr_info(" dma0ErrorStatus: %016llx\n", data->dma0ErrorStatus);
+ pr_info(" dma0FirstErrorStatus: %016llx\n", data->dma0FirstErrorStatus);
+ pr_info(" dma0ErrorLog0: %016llx\n", data->dma0ErrorLog0);
+ pr_info(" dma0ErrorLog1: %016llx\n", data->dma0ErrorLog1);
+ pr_info(" dma1ErrorStatus: %016llx\n", data->dma1ErrorStatus);
+ pr_info(" dma1FirstErrorStatus: %016llx\n", data->dma1FirstErrorStatus);
+ pr_info(" dma1ErrorLog0: %016llx\n", data->dma1ErrorLog0);
+ pr_info(" dma1ErrorLog1: %016llx\n", data->dma1ErrorLog1);
for (i = 0; i < OPAL_P7IOC_NUM_PEST_REGS; i++) {
if ((data->pestA[i] >> 63) == 0 &&
(data->pestB[i] >> 63) == 0)
continue;
- pr_info(" PE[%3d] PESTA = 0x%016llx\n", i, data->pestA[i]);
- pr_info(" PESTB = 0x%016llx\n", data->pestB[i]);
+
+ pr_info(" PE[%3d] PESTA: %016llx\n", i, data->pestA[i]);
+ pr_info(" PESTB: %016llx\n", data->pestB[i]);
+ }
+}
+
+static void pnv_pci_dump_phb3_diag_data(struct pci_controller *hose,
+ struct OpalIoPhbErrorCommon *common)
+{
+ struct OpalIoPhb3ErrorData *data;
+ int i;
+
+ data = (struct OpalIoPhb3ErrorData*)common;
+ pr_info("PHB3 PHB#%d Diag-data (Version: %d)\n\n",
+ hose->global_number, common->version);
+
+ pr_info(" brdgCtl: %08x\n", data->brdgCtl);
+
+ pr_info(" portStatusReg: %08x\n", data->portStatusReg);
+ pr_info(" rootCmplxStatus: %08x\n", data->rootCmplxStatus);
+ pr_info(" busAgentStatus: %08x\n", data->busAgentStatus);
+
+ pr_info(" deviceStatus: %08x\n", data->deviceStatus);
+ pr_info(" slotStatus: %08x\n", data->slotStatus);
+ pr_info(" linkStatus: %08x\n", data->linkStatus);
+ pr_info(" devCmdStatus: %08x\n", data->devCmdStatus);
+ pr_info(" devSecStatus: %08x\n", data->devSecStatus);
+
+ pr_info(" rootErrorStatus: %08x\n", data->rootErrorStatus);
+ pr_info(" uncorrErrorStatus: %08x\n", data->uncorrErrorStatus);
+ pr_info(" corrErrorStatus: %08x\n", data->corrErrorStatus);
+ pr_info(" tlpHdr1: %08x\n", data->tlpHdr1);
+ pr_info(" tlpHdr2: %08x\n", data->tlpHdr2);
+ pr_info(" tlpHdr3: %08x\n", data->tlpHdr3);
+ pr_info(" tlpHdr4: %08x\n", data->tlpHdr4);
+ pr_info(" sourceId: %08x\n", data->sourceId);
+ pr_info(" errorClass: %016llx\n", data->errorClass);
+ pr_info(" correlator: %016llx\n", data->correlator);
+
+ pr_info(" nFir: %016llx\n", data->nFir);
+ pr_info(" nFirMask: %016llx\n", data->nFirMask);
+ pr_info(" nFirWOF: %016llx\n", data->nFirWOF);
+ pr_info(" PhbPlssr: %016llx\n", data->phbPlssr);
+ pr_info(" PhbCsr: %016llx\n", data->phbCsr);
+ pr_info(" lemFir: %016llx\n", data->lemFir);
+ pr_info(" lemErrorMask: %016llx\n", data->lemErrorMask);
+ pr_info(" lemWOF: %016llx\n", data->lemWOF);
+ pr_info(" phbErrorStatus: %016llx\n", data->phbErrorStatus);
+ pr_info(" phbFirstErrorStatus: %016llx\n", data->phbFirstErrorStatus);
+ pr_info(" phbErrorLog0: %016llx\n", data->phbErrorLog0);
+ pr_info(" phbErrorLog1: %016llx\n", data->phbErrorLog1);
+ pr_info(" mmioErrorStatus: %016llx\n", data->mmioErrorStatus);
+ pr_info(" mmioFirstErrorStatus: %016llx\n", data->mmioFirstErrorStatus);
+ pr_info(" mmioErrorLog0: %016llx\n", data->mmioErrorLog0);
+ pr_info(" mmioErrorLog1: %016llx\n", data->mmioErrorLog1);
+ pr_info(" dma0ErrorStatus: %016llx\n", data->dma0ErrorStatus);
+ pr_info(" dma0FirstErrorStatus: %016llx\n", data->dma0FirstErrorStatus);
+ pr_info(" dma0ErrorLog0: %016llx\n", data->dma0ErrorLog0);
+ pr_info(" dma0ErrorLog1: %016llx\n", data->dma0ErrorLog1);
+ pr_info(" dma1ErrorStatus: %016llx\n", data->dma1ErrorStatus);
+ pr_info(" dma1FirstErrorStatus: %016llx\n", data->dma1FirstErrorStatus);
+ pr_info(" dma1ErrorLog0: %016llx\n", data->dma1ErrorLog0);
+ pr_info(" dma1ErrorLog1: %016llx\n", data->dma1ErrorLog1);
+
+ for (i = 0; i < OPAL_PHB3_NUM_PEST_REGS; i++) {
+ if ((data->pestA[i] >> 63) == 0 &&
+ (data->pestB[i] >> 63) == 0)
+ continue;
+
+ pr_info(" PE[%3d] PESTA: %016llx\n", i, data->pestA[i]);
+ pr_info(" PESTB: %016llx\n", data->pestB[i]);
}
}
-static void pnv_pci_dump_phb_diag_data(struct pnv_phb *phb)
+void pnv_pci_dump_phb_diag_data(struct pci_controller *hose,
+ unsigned char *log_buff)
{
- switch(phb->model) {
- case PNV_PHB_MODEL_P7IOC:
- pnv_pci_dump_p7ioc_diag_data(phb);
+ struct OpalIoPhbErrorCommon *common;
+
+ if (!hose || !log_buff)
+ return;
+
+ common = (struct OpalIoPhbErrorCommon *)log_buff;
+ switch (common->ioType) {
+ case OPAL_PHB_ERROR_DATA_TYPE_P7IOC:
+ pnv_pci_dump_p7ioc_diag_data(hose, common);
+ break;
+ case OPAL_PHB_ERROR_DATA_TYPE_PHB3:
+ pnv_pci_dump_phb3_diag_data(hose, common);
break;
default:
- pr_warning("PCI %d: Can't decode this PHB diag data\n",
- phb->hose->global_number);
+ pr_warn("%s: Unrecognized ioType %d\n",
+ __func__, common->ioType);
}
}
@@ -222,7 +302,7 @@ static void pnv_pci_handle_eeh_config(struct pnv_phb *phb, u32 pe_no)
* with the normal errors generated when probing empty slots
*/
if (has_diag)
- pnv_pci_dump_phb_diag_data(phb);
+ pnv_pci_dump_phb_diag_data(phb->hose, phb->diag.blob);
else
pr_warning("PCI %d: No diag data available\n",
phb->hose->global_number);
@@ -484,7 +564,8 @@ void pnv_pci_setup_iommu_table(struct iommu_table *tbl,
{
tbl->it_blocksize = 16;
tbl->it_base = (unsigned long)tce_mem;
- tbl->it_offset = dma_offset >> IOMMU_PAGE_SHIFT;
+ tbl->it_page_shift = IOMMU_PAGE_SHIFT_4K;
+ tbl->it_offset = dma_offset >> tbl->it_page_shift;
tbl->it_index = 0;
tbl->it_size = tce_size >> 3;
tbl->it_busno = 0;
@@ -536,7 +617,7 @@ static void pnv_pci_dma_fallback_setup(struct pci_controller *hose,
pdn->iommu_table = pnv_pci_setup_bml_iommu(hose);
if (!pdn->iommu_table)
return;
- set_iommu_table_base(&pdev->dev, pdn->iommu_table);
+ set_iommu_table_base_and_group(&pdev->dev, pdn->iommu_table);
}
static void pnv_pci_dma_dev_setup(struct pci_dev *pdev)
@@ -657,3 +738,32 @@ void __init pnv_pci_init(void)
ppc_md.teardown_msi_irqs = pnv_teardown_msi_irqs;
#endif
}
+
+static int tce_iommu_bus_notifier(struct notifier_block *nb,
+ unsigned long action, void *data)
+{
+ struct device *dev = data;
+
+ switch (action) {
+ case BUS_NOTIFY_ADD_DEVICE:
+ return iommu_add_device(dev);
+ case BUS_NOTIFY_DEL_DEVICE:
+ if (dev->iommu_group)
+ iommu_del_device(dev);
+ return 0;
+ default:
+ return 0;
+ }
+}
+
+static struct notifier_block tce_iommu_bus_nb = {
+ .notifier_call = tce_iommu_bus_notifier,
+};
+
+static int __init tce_iommu_bus_notifier_init(void)
+{
+ bus_register_notifier(&pci_bus_type, &tce_iommu_bus_nb);
+ return 0;
+}
+
+subsys_initcall_sync(tce_iommu_bus_notifier_init);
diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h
index 1ed8d5f40f5a..13f1942a9a5f 100644
--- a/arch/powerpc/platforms/powernv/pci.h
+++ b/arch/powerpc/platforms/powernv/pci.h
@@ -176,6 +176,7 @@ struct pnv_phb {
union {
unsigned char blob[PNV_PCI_DIAG_BUF_SIZE];
struct OpalIoP7IOCPhbErrorData p7ioc;
+ struct OpalIoPhb3ErrorData phb3;
struct OpalIoP7IOCErrorData hub_diag;
} diag;
@@ -186,6 +187,8 @@ extern struct pci_ops pnv_pci_ops;
extern struct pnv_eeh_ops ioda_eeh_ops;
#endif
+void pnv_pci_dump_phb_diag_data(struct pci_controller *hose,
+ unsigned char *log_buff);
int pnv_pci_cfg_read(struct device_node *dn,
int where, int size, u32 *val);
int pnv_pci_cfg_write(struct device_node *dn,
diff --git a/arch/powerpc/platforms/ps3/spu.c b/arch/powerpc/platforms/ps3/spu.c
index e17fa1432d80..a0bca05e26b0 100644
--- a/arch/powerpc/platforms/ps3/spu.c
+++ b/arch/powerpc/platforms/ps3/spu.c
@@ -143,7 +143,7 @@ static void _dump_areas(unsigned int spe_id, unsigned long priv2,
pr_debug("%s:%d: shadow: %lxh\n", func, line, shadow);
}
-inline u64 ps3_get_spe_id(void *arg)
+u64 ps3_get_spe_id(void *arg)
{
return spu_pdata(arg)->spe_id;
}
diff --git a/arch/powerpc/platforms/pseries/Kconfig b/arch/powerpc/platforms/pseries/Kconfig
index 62b4f8025de0..e66643250fee 100644
--- a/arch/powerpc/platforms/pseries/Kconfig
+++ b/arch/powerpc/platforms/pseries/Kconfig
@@ -34,7 +34,7 @@ config PPC_SPLPAR
config PSERIES_MSI
bool
- depends on PCI_MSI && EEH
+ depends on PCI_MSI && PPC_PSERIES && EEH
default y
config PSERIES_ENERGY
diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c
index f253361552ae..e0299183ae54 100644
--- a/arch/powerpc/platforms/pseries/iommu.c
+++ b/arch/powerpc/platforms/pseries/iommu.c
@@ -486,9 +486,10 @@ static void iommu_table_setparms(struct pci_controller *phb,
memset((void *)tbl->it_base, 0, *sizep);
tbl->it_busno = phb->bus->number;
+ tbl->it_page_shift = IOMMU_PAGE_SHIFT_4K;
/* Units of tce entries */
- tbl->it_offset = phb->dma_window_base_cur >> IOMMU_PAGE_SHIFT;
+ tbl->it_offset = phb->dma_window_base_cur >> tbl->it_page_shift;
/* Test if we are going over 2GB of DMA space */
if (phb->dma_window_base_cur + phb->dma_window_size > 0x80000000ul) {
@@ -499,7 +500,7 @@ static void iommu_table_setparms(struct pci_controller *phb,
phb->dma_window_base_cur += phb->dma_window_size;
/* Set the tce table size - measured in entries */
- tbl->it_size = phb->dma_window_size >> IOMMU_PAGE_SHIFT;
+ tbl->it_size = phb->dma_window_size >> tbl->it_page_shift;
tbl->it_index = 0;
tbl->it_blocksize = 16;
@@ -537,11 +538,12 @@ static void iommu_table_setparms_lpar(struct pci_controller *phb,
of_parse_dma_window(dn, dma_window, &tbl->it_index, &offset, &size);
tbl->it_busno = phb->bus->number;
+ tbl->it_page_shift = IOMMU_PAGE_SHIFT_4K;
tbl->it_base = 0;
tbl->it_blocksize = 16;
tbl->it_type = TCE_PCI;
- tbl->it_offset = offset >> IOMMU_PAGE_SHIFT;
- tbl->it_size = size >> IOMMU_PAGE_SHIFT;
+ tbl->it_offset = offset >> tbl->it_page_shift;
+ tbl->it_size = size >> tbl->it_page_shift;
}
static void pci_dma_bus_setup_pSeries(struct pci_bus *bus)
@@ -687,7 +689,8 @@ static void pci_dma_dev_setup_pSeries(struct pci_dev *dev)
iommu_table_setparms(phb, dn, tbl);
PCI_DN(dn)->iommu_table = iommu_init_table(tbl, phb->node);
iommu_register_group(tbl, pci_domain_nr(phb->bus), 0);
- set_iommu_table_base(&dev->dev, PCI_DN(dn)->iommu_table);
+ set_iommu_table_base_and_group(&dev->dev,
+ PCI_DN(dn)->iommu_table);
return;
}
@@ -699,7 +702,8 @@ static void pci_dma_dev_setup_pSeries(struct pci_dev *dev)
dn = dn->parent;
if (dn && PCI_DN(dn))
- set_iommu_table_base(&dev->dev, PCI_DN(dn)->iommu_table);
+ set_iommu_table_base_and_group(&dev->dev,
+ PCI_DN(dn)->iommu_table);
else
printk(KERN_WARNING "iommu: Device %s has no iommu table\n",
pci_name(dev));
@@ -1193,7 +1197,7 @@ static void pci_dma_dev_setup_pSeriesLP(struct pci_dev *dev)
pr_debug(" found DMA window, table: %p\n", pci->iommu_table);
}
- set_iommu_table_base(&dev->dev, pci->iommu_table);
+ set_iommu_table_base_and_group(&dev->dev, pci->iommu_table);
}
static int dma_set_mask_pSeriesLP(struct device *dev, u64 dma_mask)
diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c
index 4fca3def9db9..b02af9ef3ff6 100644
--- a/arch/powerpc/platforms/pseries/lpar.c
+++ b/arch/powerpc/platforms/pseries/lpar.c
@@ -92,7 +92,7 @@ void vpa_init(int cpu)
* PAPR says this feature is SLB-Buffer but firmware never
* reports that. All SPLPAR support SLB shadow buffer.
*/
- addr = __pa(&slb_shadow[cpu]);
+ addr = __pa(paca[cpu].slb_shadow_ptr);
if (firmware_has_feature(FW_FEATURE_SPLPAR)) {
ret = register_slb_shadow(hwcpu, addr);
if (ret)
@@ -153,7 +153,8 @@ static long pSeries_lpar_hpte_insert(unsigned long hpte_group,
/* Make pHyp happy */
if ((rflags & _PAGE_NO_CACHE) && !(rflags & _PAGE_WRITETHRU))
- hpte_r &= ~_PAGE_COHERENT;
+ hpte_r &= ~HPTE_R_M;
+
if (firmware_has_feature(FW_FEATURE_XCMO) && !(hpte_r & HPTE_R_N))
flags |= H_COALESCE_CAND;
diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c
index c1f190858701..49cd16e6b450 100644
--- a/arch/powerpc/platforms/pseries/setup.c
+++ b/arch/powerpc/platforms/pseries/setup.c
@@ -72,7 +72,7 @@
int CMO_PrPSP = -1;
int CMO_SecPSP = -1;
-unsigned long CMO_PageSize = (ASM_CONST(1) << IOMMU_PAGE_SHIFT);
+unsigned long CMO_PageSize = (ASM_CONST(1) << IOMMU_PAGE_SHIFT_4K);
EXPORT_SYMBOL(CMO_PageSize);
int fwnmi_active; /* TRUE if an FWNMI handler is present */
@@ -569,7 +569,7 @@ void pSeries_cmo_feature_init(void)
{
char *ptr, *key, *value, *end;
int call_status;
- int page_order = IOMMU_PAGE_SHIFT;
+ int page_order = IOMMU_PAGE_SHIFT_4K;
pr_debug(" -> fw_cmo_feature_init()\n");
spin_lock(&rtas_data_buf_lock);
diff --git a/arch/powerpc/platforms/wsp/wsp_pci.c b/arch/powerpc/platforms/wsp/wsp_pci.c
index 62cb527493e7..9a15e5b39bb8 100644
--- a/arch/powerpc/platforms/wsp/wsp_pci.c
+++ b/arch/powerpc/platforms/wsp/wsp_pci.c
@@ -260,7 +260,7 @@ static int tce_build_wsp(struct iommu_table *tbl, long index, long npages,
*tcep = proto_tce | (rpn & TCE_RPN_MASK) << TCE_RPN_SHIFT;
dma_debug("[DMA] TCE %p set to 0x%016llx (dma addr: 0x%lx)\n",
- tcep, *tcep, (tbl->it_offset + index) << IOMMU_PAGE_SHIFT);
+ tcep, *tcep, (tbl->it_offset + index) << IOMMU_PAGE_SHIFT_4K);
uaddr += TCE_PAGE_SIZE;
index++;
@@ -381,8 +381,9 @@ static struct wsp_dma_table *wsp_pci_create_dma32_table(struct wsp_phb *phb,
/* Init bits and pieces */
tbl->table.it_blocksize = 16;
- tbl->table.it_offset = addr >> IOMMU_PAGE_SHIFT;
- tbl->table.it_size = size >> IOMMU_PAGE_SHIFT;
+ tbl->table.it_page_shift = IOMMU_PAGE_SHIFT_4K;
+ tbl->table.it_offset = addr >> tbl->table.it_page_shift;
+ tbl->table.it_size = size >> tbl->table.it_page_shift;
/*
* It's already blank but we clear it anyway.
@@ -449,8 +450,8 @@ static void wsp_pci_dma_dev_setup(struct pci_dev *pdev)
if (table) {
pr_info("%s: Setup iommu: 32-bit DMA region 0x%08lx..0x%08lx\n",
pci_name(pdev),
- table->table.it_offset << IOMMU_PAGE_SHIFT,
- (table->table.it_offset << IOMMU_PAGE_SHIFT)
+ table->table.it_offset << IOMMU_PAGE_SHIFT_4K,
+ (table->table.it_offset << IOMMU_PAGE_SHIFT_4K)
+ phb->dma32_region_size - 1);
archdata->dma_data.iommu_table_base = &table->table;
return;
diff --git a/arch/powerpc/sysdev/Kconfig b/arch/powerpc/sysdev/Kconfig
index 13ec968be4c7..7baa70d6dc01 100644
--- a/arch/powerpc/sysdev/Kconfig
+++ b/arch/powerpc/sysdev/Kconfig
@@ -19,7 +19,7 @@ config PPC_MSI_BITMAP
default y if MPIC
default y if FSL_PCI
default y if PPC4xx_MSI
- default y if POWERNV_MSI
+ default y if PPC_POWERNV
source "arch/powerpc/sysdev/xics/Kconfig"
diff --git a/arch/powerpc/sysdev/fsl_pci.c b/arch/powerpc/sysdev/fsl_pci.c
index 4dfd61df8aba..7066e5262468 100644
--- a/arch/powerpc/sysdev/fsl_pci.c
+++ b/arch/powerpc/sysdev/fsl_pci.c
@@ -122,7 +122,7 @@ static int fsl_pci_dma_set_mask(struct device *dev, u64 dma_mask)
* address width of the SoC such that we can address any internal
* SoC address from across PCI if needed
*/
- if ((dev->bus == &pci_bus_type) &&
+ if ((dev_is_pci(dev)) &&
dma_mask >= DMA_BIT_MASK(MAX_PHYS_ADDR_BITS)) {
set_dma_ops(dev, &dma_direct_ops);
set_dma_offset(dev, pci64_dma_offset);
diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c
index af9d3469fb99..a90731b3d44a 100644
--- a/arch/powerpc/xmon/xmon.c
+++ b/arch/powerpc/xmon/xmon.c
@@ -2051,6 +2051,10 @@ static void dump_one_paca(int cpu)
DUMP(p, stab_addr, "lx");
#endif
DUMP(p, emergency_sp, "p");
+#ifdef CONFIG_PPC_BOOK3S_64
+ DUMP(p, mc_emergency_sp, "p");
+ DUMP(p, in_mce, "x");
+#endif
DUMP(p, data_offset, "lx");
DUMP(p, hw_cpu_id, "x");
DUMP(p, cpu_start, "x");
diff --git a/drivers/macintosh/windfarm_lm75_sensor.c b/drivers/macintosh/windfarm_lm75_sensor.c
index 9ef32b3df91f..590214ba736c 100644
--- a/drivers/macintosh/windfarm_lm75_sensor.c
+++ b/drivers/macintosh/windfarm_lm75_sensor.c
@@ -133,7 +133,7 @@ static int wf_lm75_probe(struct i2c_client *client,
lm->inited = 0;
lm->ds1775 = ds1775;
lm->i2c = client;
- lm->sens.name = (char *)name; /* XXX fix constness in structure */
+ lm->sens.name = name;
lm->sens.ops = &wf_lm75_ops;
i2c_set_clientdata(client, lm);
diff --git a/drivers/macintosh/windfarm_max6690_sensor.c b/drivers/macintosh/windfarm_max6690_sensor.c
index 945a25b2f31e..87e439b10318 100644
--- a/drivers/macintosh/windfarm_max6690_sensor.c
+++ b/drivers/macintosh/windfarm_max6690_sensor.c
@@ -95,7 +95,7 @@ static int wf_max6690_probe(struct i2c_client *client,
}
max->i2c = client;
- max->sens.name = (char *)name; /* XXX fix constness in structure */
+ max->sens.name = name;
max->sens.ops = &wf_max6690_ops;
i2c_set_clientdata(client, max);
diff --git a/drivers/net/ethernet/ibm/ibmveth.c b/drivers/net/ethernet/ibm/ibmveth.c
index 952d795230a4..d04dbaba83dd 100644
--- a/drivers/net/ethernet/ibm/ibmveth.c
+++ b/drivers/net/ethernet/ibm/ibmveth.c
@@ -1276,18 +1276,21 @@ static unsigned long ibmveth_get_desired_dma(struct vio_dev *vdev)
{
struct net_device *netdev = dev_get_drvdata(&vdev->dev);
struct ibmveth_adapter *adapter;
+ struct iommu_table *tbl;
unsigned long ret;
int i;
int rxqentries = 1;
+ tbl = get_iommu_table_base(&vdev->dev);
+
/* netdev inits at probe time along with the structures we need below*/
if (netdev == NULL)
- return IOMMU_PAGE_ALIGN(IBMVETH_IO_ENTITLEMENT_DEFAULT);
+ return IOMMU_PAGE_ALIGN(IBMVETH_IO_ENTITLEMENT_DEFAULT, tbl);
adapter = netdev_priv(netdev);
ret = IBMVETH_BUFF_LIST_SIZE + IBMVETH_FILT_LIST_SIZE;
- ret += IOMMU_PAGE_ALIGN(netdev->mtu);
+ ret += IOMMU_PAGE_ALIGN(netdev->mtu, tbl);
for (i = 0; i < IBMVETH_NUM_BUFF_POOLS; i++) {
/* add the size of the active receive buffers */
@@ -1295,11 +1298,12 @@ static unsigned long ibmveth_get_desired_dma(struct vio_dev *vdev)
ret +=
adapter->rx_buff_pool[i].size *
IOMMU_PAGE_ALIGN(adapter->rx_buff_pool[i].
- buff_size);
+ buff_size, tbl);
rxqentries += adapter->rx_buff_pool[i].size;
}
/* add the size of the receive queue entries */
- ret += IOMMU_PAGE_ALIGN(rxqentries * sizeof(struct ibmveth_rx_q_entry));
+ ret += IOMMU_PAGE_ALIGN(
+ rxqentries * sizeof(struct ibmveth_rx_q_entry), tbl);
return ret;
}
diff --git a/drivers/vfio/vfio_iommu_spapr_tce.c b/drivers/vfio/vfio_iommu_spapr_tce.c
index bdae7a04af75..a84788ba662c 100644
--- a/drivers/vfio/vfio_iommu_spapr_tce.c
+++ b/drivers/vfio/vfio_iommu_spapr_tce.c
@@ -81,7 +81,7 @@ static int tce_iommu_enable(struct tce_container *container)
* enforcing the limit based on the max that the guest can map.
*/
down_write(&current->mm->mmap_sem);
- npages = (tbl->it_size << IOMMU_PAGE_SHIFT) >> PAGE_SHIFT;
+ npages = (tbl->it_size << IOMMU_PAGE_SHIFT_4K) >> PAGE_SHIFT;
locked = current->mm->locked_vm + npages;
lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
if (locked > lock_limit && !capable(CAP_IPC_LOCK)) {
@@ -110,7 +110,7 @@ static void tce_iommu_disable(struct tce_container *container)
down_write(&current->mm->mmap_sem);
current->mm->locked_vm -= (container->tbl->it_size <<
- IOMMU_PAGE_SHIFT) >> PAGE_SHIFT;
+ IOMMU_PAGE_SHIFT_4K) >> PAGE_SHIFT;
up_write(&current->mm->mmap_sem);
}
@@ -174,8 +174,8 @@ static long tce_iommu_ioctl(void *iommu_data,
if (info.argsz < minsz)
return -EINVAL;
- info.dma32_window_start = tbl->it_offset << IOMMU_PAGE_SHIFT;
- info.dma32_window_size = tbl->it_size << IOMMU_PAGE_SHIFT;
+ info.dma32_window_start = tbl->it_offset << IOMMU_PAGE_SHIFT_4K;
+ info.dma32_window_size = tbl->it_size << IOMMU_PAGE_SHIFT_4K;
info.flags = 0;
if (copy_to_user((void __user *)arg, &info, minsz))
@@ -205,8 +205,8 @@ static long tce_iommu_ioctl(void *iommu_data,
VFIO_DMA_MAP_FLAG_WRITE))
return -EINVAL;
- if ((param.size & ~IOMMU_PAGE_MASK) ||
- (param.vaddr & ~IOMMU_PAGE_MASK))
+ if ((param.size & ~IOMMU_PAGE_MASK_4K) ||
+ (param.vaddr & ~IOMMU_PAGE_MASK_4K))
return -EINVAL;
/* iova is checked by the IOMMU API */
@@ -220,17 +220,17 @@ static long tce_iommu_ioctl(void *iommu_data,
if (ret)
return ret;
- for (i = 0; i < (param.size >> IOMMU_PAGE_SHIFT); ++i) {
+ for (i = 0; i < (param.size >> IOMMU_PAGE_SHIFT_4K); ++i) {
ret = iommu_put_tce_user_mode(tbl,
- (param.iova >> IOMMU_PAGE_SHIFT) + i,
+ (param.iova >> IOMMU_PAGE_SHIFT_4K) + i,
tce);
if (ret)
break;
- tce += IOMMU_PAGE_SIZE;
+ tce += IOMMU_PAGE_SIZE_4K;
}
if (ret)
iommu_clear_tces_and_put_pages(tbl,
- param.iova >> IOMMU_PAGE_SHIFT, i);
+ param.iova >> IOMMU_PAGE_SHIFT_4K, i);
iommu_flush_tce(tbl);
@@ -256,17 +256,17 @@ static long tce_iommu_ioctl(void *iommu_data,
if (param.flags)
return -EINVAL;
- if (param.size & ~IOMMU_PAGE_MASK)
+ if (param.size & ~IOMMU_PAGE_MASK_4K)
return -EINVAL;
ret = iommu_tce_clear_param_check(tbl, param.iova, 0,
- param.size >> IOMMU_PAGE_SHIFT);
+ param.size >> IOMMU_PAGE_SHIFT_4K);
if (ret)
return ret;
ret = iommu_clear_tces_and_put_pages(tbl,
- param.iova >> IOMMU_PAGE_SHIFT,
- param.size >> IOMMU_PAGE_SHIFT);
+ param.iova >> IOMMU_PAGE_SHIFT_4K,
+ param.size >> IOMMU_PAGE_SHIFT_4K);
iommu_flush_tce(tbl);
return ret;
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 1cedd000cf29..a7b4e310bf42 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1842,7 +1842,7 @@ static inline pgprot_t vm_get_page_prot(unsigned long vm_flags)
}
#endif
-#ifdef CONFIG_ARCH_USES_NUMA_PROT_NONE
+#ifdef CONFIG_NUMA_BALANCING
unsigned long change_prot_numa(struct vm_area_struct *vma,
unsigned long start, unsigned long end);
#endif
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index eca4a3129129..9f73b29d304d 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -613,7 +613,7 @@ static inline int queue_pages_pgd_range(struct vm_area_struct *vma,
return 0;
}
-#ifdef CONFIG_ARCH_USES_NUMA_PROT_NONE
+#ifdef CONFIG_NUMA_BALANCING
/*
* This is used to mark a range of virtual addresses to be inaccessible.
* These are later cleared by a NUMA hinting fault. Depending on these
@@ -627,7 +627,6 @@ unsigned long change_prot_numa(struct vm_area_struct *vma,
unsigned long addr, unsigned long end)
{
int nr_updated;
- BUILD_BUG_ON(_PAGE_NUMA != _PAGE_PROTNONE);
nr_updated = change_protection(vma, addr, end, vma->vm_page_prot, 0, 1);
if (nr_updated)
@@ -641,7 +640,7 @@ static unsigned long change_prot_numa(struct vm_area_struct *vma,
{
return 0;
}
-#endif /* CONFIG_ARCH_USES_NUMA_PROT_NONE */
+#endif /* CONFIG_NUMA_BALANCING */
/*
* Walk through page tables and collect pages to be migrated.