From b1576fec7f4dd4657694fefc97fda4cf28ec68e9 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Tue, 4 Feb 2014 16:04:35 +1100 Subject: powerpc: No need to use dot symbols when branching to a function binutils is smart enough to know that a branch to a function descriptor is actually a branch to the functions text address. Alan tells me that binutils has been doing this for 9 years. Signed-off-by: Anton Blanchard --- arch/powerpc/include/asm/context_tracking.h | 4 ++-- arch/powerpc/include/asm/exception-64e.h | 6 +++--- arch/powerpc/include/asm/exception-64s.h | 2 +- arch/powerpc/include/asm/irqflags.h | 4 ++-- arch/powerpc/include/asm/ppc_asm.h | 2 +- 5 files changed, 9 insertions(+), 9 deletions(-) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/context_tracking.h b/arch/powerpc/include/asm/context_tracking.h index b6f5a33b8ee2..40014921ffff 100644 --- a/arch/powerpc/include/asm/context_tracking.h +++ b/arch/powerpc/include/asm/context_tracking.h @@ -2,9 +2,9 @@ #define _ASM_POWERPC_CONTEXT_TRACKING_H #ifdef CONFIG_CONTEXT_TRACKING -#define SCHEDULE_USER bl .schedule_user +#define SCHEDULE_USER bl schedule_user #else -#define SCHEDULE_USER bl .schedule +#define SCHEDULE_USER bl schedule #endif #endif diff --git a/arch/powerpc/include/asm/exception-64e.h b/arch/powerpc/include/asm/exception-64e.h index a563d9afd179..a8b52b61043f 100644 --- a/arch/powerpc/include/asm/exception-64e.h +++ b/arch/powerpc/include/asm/exception-64e.h @@ -174,10 +174,10 @@ exc_##label##_book3e: mtlr r16; #define TLB_MISS_STATS_D(name) \ addi r9,r13,MMSTAT_DSTATS+name; \ - bl .tlb_stat_inc; + bl tlb_stat_inc; #define TLB_MISS_STATS_I(name) \ addi r9,r13,MMSTAT_ISTATS+name; \ - bl .tlb_stat_inc; + bl tlb_stat_inc; #define TLB_MISS_STATS_X(name) \ ld r8,PACA_EXTLB+EX_TLB_ESR(r13); \ cmpdi cr2,r8,-1; \ @@ -185,7 +185,7 @@ exc_##label##_book3e: addi r9,r13,MMSTAT_DSTATS+name; \ b 62f; \ 61: addi r9,r13,MMSTAT_ISTATS+name; \ -62: bl .tlb_stat_inc; +62: bl tlb_stat_inc; #define TLB_MISS_STATS_SAVE_INFO \ std r14,EX_TLB_ESR(r12); /* save ESR */ #define TLB_MISS_STATS_SAVE_INFO_BOLTED \ diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h index aeaa56cd9b54..8f35cd7d59cc 100644 --- a/arch/powerpc/include/asm/exception-64s.h +++ b/arch/powerpc/include/asm/exception-64s.h @@ -517,7 +517,7 @@ label##_relon_hv: \ #define DISABLE_INTS RECONCILE_IRQ_STATE(r10,r11) #define ADD_NVGPRS \ - bl .save_nvgprs + bl save_nvgprs #define RUNLATCH_ON \ BEGIN_FTR_SECTION \ diff --git a/arch/powerpc/include/asm/irqflags.h b/arch/powerpc/include/asm/irqflags.h index f51a5580bfd0..f62c056e75bf 100644 --- a/arch/powerpc/include/asm/irqflags.h +++ b/arch/powerpc/include/asm/irqflags.h @@ -36,8 +36,8 @@ * have to call a C function so call a wrapper that saves all the * C-clobbered registers. */ -#define TRACE_ENABLE_INTS TRACE_WITH_FRAME_BUFFER(.trace_hardirqs_on) -#define TRACE_DISABLE_INTS TRACE_WITH_FRAME_BUFFER(.trace_hardirqs_off) +#define TRACE_ENABLE_INTS TRACE_WITH_FRAME_BUFFER(trace_hardirqs_on) +#define TRACE_DISABLE_INTS TRACE_WITH_FRAME_BUFFER(trace_hardirqs_off) /* * This is used by assembly code to soft-disable interrupts first and diff --git a/arch/powerpc/include/asm/ppc_asm.h b/arch/powerpc/include/asm/ppc_asm.h index 6586a40a46ce..3128ba3ba7a0 100644 --- a/arch/powerpc/include/asm/ppc_asm.h +++ b/arch/powerpc/include/asm/ppc_asm.h @@ -57,7 +57,7 @@ BEGIN_FW_FTR_SECTION; \ LDX_BE r10,0,r10; /* get log write index */ \ cmpd cr1,r11,r10; \ beq+ cr1,33f; \ - bl .accumulate_stolen_time; \ + bl accumulate_stolen_time; \ ld r12,_MSR(r1); \ andi. r10,r12,MSR_PR; /* Restore cr0 (coming from user) */ \ 33: \ -- cgit v1.2.3 From a0e971ffb9d9dae3b9892fb548bd2497db758f60 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Tue, 4 Feb 2014 16:06:25 +1100 Subject: powerpc: Remove _INIT_GLOBAL(), _STATIC() and _INIT_STATIC() Now there are no users of _INIT_GLOBAL(), _STATIC() and _INIT_STATIC() we can remove them. Signed-off-by: Anton Blanchard --- arch/powerpc/include/asm/ppc_asm.h | 38 -------------------------------------- 1 file changed, 38 deletions(-) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/ppc_asm.h b/arch/powerpc/include/asm/ppc_asm.h index 3128ba3ba7a0..35b23a6584cb 100644 --- a/arch/powerpc/include/asm/ppc_asm.h +++ b/arch/powerpc/include/asm/ppc_asm.h @@ -209,20 +209,6 @@ name: \ .type GLUE(.,name),@function; \ GLUE(.,name): -#define _INIT_GLOBAL(name) \ - __REF; \ - .align 2 ; \ - .globl name; \ - .globl GLUE(.,name); \ - .section ".opd","aw"; \ -name: \ - .quad GLUE(.,name); \ - .quad .TOC.@tocbase; \ - .quad 0; \ - .previous; \ - .type GLUE(.,name),@function; \ -GLUE(.,name): - #define _KPROBE(name) \ .section ".kprobes.text","a"; \ .align 2 ; \ @@ -237,30 +223,6 @@ name: \ .type GLUE(.,name),@function; \ GLUE(.,name): -#define _STATIC(name) \ - .section ".text"; \ - .align 2 ; \ - .section ".opd","aw"; \ -name: \ - .quad GLUE(.,name); \ - .quad .TOC.@tocbase; \ - .quad 0; \ - .previous; \ - .type GLUE(.,name),@function; \ -GLUE(.,name): - -#define _INIT_STATIC(name) \ - __REF; \ - .align 2 ; \ - .section ".opd","aw"; \ -name: \ - .quad GLUE(.,name); \ - .quad .TOC.@tocbase; \ - .quad 0; \ - .previous; \ - .type GLUE(.,name),@function; \ -GLUE(.,name): - #else /* 32-bit */ #define _ENTRY(n) \ -- cgit v1.2.3 From c1fb019477c27bfe309be282d178a08e56f05249 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Tue, 4 Feb 2014 16:07:01 +1100 Subject: powerpc: Create DOTSYM to wrap dot symbol usage There are a few places we have to use dot symbols with the current ABI - the syscall table and the kvm hcall table. Wrap both of these with a new macro called DOTSYM so it will be easy to transition away from dot symbols in a future ABI. Signed-off-by: Anton Blanchard --- arch/powerpc/include/asm/ppc_asm.h | 2 ++ arch/powerpc/include/asm/systbl.h | 6 +++--- arch/powerpc/kernel/systbl.S | 12 ++++++------ arch/powerpc/kvm/book3s_hv_rmhandlers.S | 28 ++++++++++++++-------------- 4 files changed, 25 insertions(+), 23 deletions(-) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/ppc_asm.h b/arch/powerpc/include/asm/ppc_asm.h index 35b23a6584cb..61992d8f99df 100644 --- a/arch/powerpc/include/asm/ppc_asm.h +++ b/arch/powerpc/include/asm/ppc_asm.h @@ -223,6 +223,8 @@ name: \ .type GLUE(.,name),@function; \ GLUE(.,name): +#define DOTSYM(a) GLUE(.,a) + #else /* 32-bit */ #define _ENTRY(n) \ diff --git a/arch/powerpc/include/asm/systbl.h b/arch/powerpc/include/asm/systbl.h index 3ddf70276706..ac062f504736 100644 --- a/arch/powerpc/include/asm/systbl.h +++ b/arch/powerpc/include/asm/systbl.h @@ -62,7 +62,7 @@ COMPAT_SYS_SPU(fcntl) SYSCALL(ni_syscall) SYSCALL_SPU(setpgid) SYSCALL(ni_syscall) -SYSX(sys_ni_syscall,sys_olduname, sys_olduname) +SYSX(sys_ni_syscall,sys_olduname,sys_olduname) SYSCALL_SPU(umask) SYSCALL_SPU(chroot) COMPAT_SYS(ustat) @@ -258,7 +258,7 @@ SYSCALL_SPU(tgkill) COMPAT_SYS_SPU(utimes) COMPAT_SYS_SPU(statfs64) COMPAT_SYS_SPU(fstatfs64) -SYSX(sys_ni_syscall, ppc_fadvise64_64, ppc_fadvise64_64) +SYSX(sys_ni_syscall,ppc_fadvise64_64,ppc_fadvise64_64) PPC_SYS_SPU(rtas) OLDSYS(debug_setcontext) SYSCALL(ni_syscall) @@ -295,7 +295,7 @@ SYSCALL_SPU(mkdirat) SYSCALL_SPU(mknodat) SYSCALL_SPU(fchownat) COMPAT_SYS_SPU(futimesat) -SYSX_SPU(sys_newfstatat, sys_fstatat64, sys_fstatat64) +SYSX_SPU(sys_newfstatat,sys_fstatat64,sys_fstatat64) SYSCALL_SPU(unlinkat) SYSCALL_SPU(renameat) SYSCALL_SPU(linkat) diff --git a/arch/powerpc/kernel/systbl.S b/arch/powerpc/kernel/systbl.S index 75822f97bfea..895c50ca943c 100644 --- a/arch/powerpc/kernel/systbl.S +++ b/arch/powerpc/kernel/systbl.S @@ -17,12 +17,12 @@ #include #ifdef CONFIG_PPC64 -#define SYSCALL(func) .llong .sys_##func,.sys_##func -#define COMPAT_SYS(func) .llong .sys_##func,.compat_sys_##func -#define PPC_SYS(func) .llong .ppc_##func,.ppc_##func -#define OLDSYS(func) .llong .sys_ni_syscall,.sys_ni_syscall -#define SYS32ONLY(func) .llong .sys_ni_syscall,.compat_sys_##func -#define SYSX(f, f3264, f32) .llong .f,.f3264 +#define SYSCALL(func) .llong DOTSYM(sys_##func),DOTSYM(sys_##func) +#define COMPAT_SYS(func) .llong DOTSYM(sys_##func),DOTSYM(compat_sys_##func) +#define PPC_SYS(func) .llong DOTSYM(ppc_##func),DOTSYM(ppc_##func) +#define OLDSYS(func) .llong DOTSYM(sys_ni_syscall),DOTSYM(sys_ni_syscall) +#define SYS32ONLY(func) .llong DOTSYM(sys_ni_syscall),DOTSYM(compat_sys_##func) +#define SYSX(f, f3264, f32) .llong DOTSYM(f),DOTSYM(f3264) #else #define SYSCALL(func) .long sys_##func #define COMPAT_SYS(func) .long sys_##func diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index 7cfabe3881d8..e9593f58a501 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -1795,16 +1795,16 @@ hcall_real_fallback: .globl hcall_real_table hcall_real_table: .long 0 /* 0 - unused */ - .long .kvmppc_h_remove - hcall_real_table - .long .kvmppc_h_enter - hcall_real_table - .long .kvmppc_h_read - hcall_real_table + .long DOTSYM(kvmppc_h_remove) - hcall_real_table + .long DOTSYM(kvmppc_h_enter) - hcall_real_table + .long DOTSYM(kvmppc_h_read) - hcall_real_table .long 0 /* 0x10 - H_CLEAR_MOD */ .long 0 /* 0x14 - H_CLEAR_REF */ - .long .kvmppc_h_protect - hcall_real_table - .long .kvmppc_h_get_tce - hcall_real_table - .long .kvmppc_h_put_tce - hcall_real_table + .long DOTSYM(kvmppc_h_protect) - hcall_real_table + .long DOTSYM(kvmppc_h_get_tce) - hcall_real_table + .long DOTSYM(kvmppc_h_put_tce) - hcall_real_table .long 0 /* 0x24 - H_SET_SPRG0 */ - .long .kvmppc_h_set_dabr - hcall_real_table + .long DOTSYM(kvmppc_h_set_dabr) - hcall_real_table .long 0 /* 0x2c */ .long 0 /* 0x30 */ .long 0 /* 0x34 */ @@ -1820,11 +1820,11 @@ hcall_real_table: .long 0 /* 0x5c */ .long 0 /* 0x60 */ #ifdef CONFIG_KVM_XICS - .long .kvmppc_rm_h_eoi - hcall_real_table - .long .kvmppc_rm_h_cppr - hcall_real_table - .long .kvmppc_rm_h_ipi - hcall_real_table + .long DOTSYM(kvmppc_rm_h_eoi) - hcall_real_table + .long DOTSYM(kvmppc_rm_h_cppr) - hcall_real_table + .long DOTSYM(kvmppc_rm_h_ipi) - hcall_real_table .long 0 /* 0x70 - H_IPOLL */ - .long .kvmppc_rm_h_xirr - hcall_real_table + .long DOTSYM(kvmppc_rm_h_xirr) - hcall_real_table #else .long 0 /* 0x64 - H_EOI */ .long 0 /* 0x68 - H_CPPR */ @@ -1858,7 +1858,7 @@ hcall_real_table: .long 0 /* 0xd4 */ .long 0 /* 0xd8 */ .long 0 /* 0xdc */ - .long .kvmppc_h_cede - hcall_real_table + .long DOTSYM(kvmppc_h_cede) - hcall_real_table .long 0 /* 0xe4 */ .long 0 /* 0xe8 */ .long 0 /* 0xec */ @@ -1875,11 +1875,11 @@ hcall_real_table: .long 0 /* 0x118 */ .long 0 /* 0x11c */ .long 0 /* 0x120 */ - .long .kvmppc_h_bulk_remove - hcall_real_table + .long DOTSYM(kvmppc_h_bulk_remove) - hcall_real_table .long 0 /* 0x128 */ .long 0 /* 0x12c */ .long 0 /* 0x130 */ - .long .kvmppc_h_set_xdabr - hcall_real_table + .long DOTSYM(kvmppc_h_set_xdabr) - hcall_real_table hcall_real_table_end: ignore_hdec: -- cgit v1.2.3 From 7167af7cebedc7c2051184fef0e165aeb67d0b9d Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Tue, 4 Feb 2014 16:07:20 +1100 Subject: powerpc: Remove function descriptors and dot symbols on new ABI ABIv2 doesn't have function descriptors or dot symbols. One new thing it does add is a function global and a local entry point, so add that to our _GLOBAL macro. Signed-off-by: Anton Blanchard --- arch/powerpc/include/asm/ftrace.h | 2 ++ arch/powerpc/include/asm/linkage.h | 2 ++ arch/powerpc/include/asm/ppc_asm.h | 22 ++++++++++++++++++++++ 3 files changed, 26 insertions(+) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/ftrace.h b/arch/powerpc/include/asm/ftrace.h index 169d039ed402..e3661872fbea 100644 --- a/arch/powerpc/include/asm/ftrace.h +++ b/arch/powerpc/include/asm/ftrace.h @@ -61,6 +61,7 @@ struct dyn_arch_ftrace { #endif #if defined(CONFIG_FTRACE_SYSCALLS) && defined(CONFIG_PPC64) && !defined(__ASSEMBLY__) +#if !defined(_CALL_ELF) || _CALL_ELF != 2 #define ARCH_HAS_SYSCALL_MATCH_SYM_NAME static inline bool arch_syscall_match_sym_name(const char *sym, const char *name) { @@ -72,6 +73,7 @@ static inline bool arch_syscall_match_sym_name(const char *sym, const char *name */ return !strcmp(sym + 4, name + 3); } +#endif #endif /* CONFIG_FTRACE_SYSCALLS && CONFIG_PPC64 && !__ASSEMBLY__ */ #endif /* _ASM_POWERPC_FTRACE */ diff --git a/arch/powerpc/include/asm/linkage.h b/arch/powerpc/include/asm/linkage.h index b36f650a13ff..e3ad5c72724a 100644 --- a/arch/powerpc/include/asm/linkage.h +++ b/arch/powerpc/include/asm/linkage.h @@ -2,6 +2,7 @@ #define _ASM_POWERPC_LINKAGE_H #ifdef CONFIG_PPC64 +#if !defined(_CALL_ELF) || _CALL_ELF != 2 #define cond_syscall(x) \ asm ("\t.weak " #x "\n\t.set " #x ", sys_ni_syscall\n" \ "\t.weak ." #x "\n\t.set ." #x ", .sys_ni_syscall\n") @@ -9,5 +10,6 @@ asm ("\t.globl " #alias "\n\t.set " #alias ", " #name "\n" \ "\t.globl ." #alias "\n\t.set ." #alias ", ." #name) #endif +#endif #endif /* _ASM_POWERPC_LINKAGE_H */ diff --git a/arch/powerpc/include/asm/ppc_asm.h b/arch/powerpc/include/asm/ppc_asm.h index 61992d8f99df..5394d41a7140 100644 --- a/arch/powerpc/include/asm/ppc_asm.h +++ b/arch/powerpc/include/asm/ppc_asm.h @@ -192,6 +192,26 @@ END_FW_FTR_SECTION_IFSET(FW_FEATURE_SPLPAR) #define __STK_PARAM(i) (48 + ((i)-3)*8) #define STK_PARAM(i) __STK_PARAM(__REG_##i) +#if defined(_CALL_ELF) && _CALL_ELF == 2 + +#define _GLOBAL(name) \ + .section ".text"; \ + .align 2 ; \ + .type name,@function; \ + .globl name; \ +name: + +#define _KPROBE(name) \ + .section ".kprobes.text","a"; \ + .align 2 ; \ + .type name,@function; \ + .globl name; \ +name: + +#define DOTSYM(a) a + +#else + #define XGLUE(a,b) a##b #define GLUE(a,b) XGLUE(a,b) @@ -225,6 +245,8 @@ GLUE(.,name): #define DOTSYM(a) GLUE(.,a) +#endif + #else /* 32-bit */ #define _ENTRY(n) \ -- cgit v1.2.3 From c71b7eff426fa7d8fd33e0964a7f79a3b41faff9 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Tue, 4 Feb 2014 16:09:15 +1100 Subject: powerpc: Add ABIv2 support to ppc_function_entry Skip over the well known global entry point code for ABIv2. Signed-off-by: Anton Blanchard --- arch/powerpc/include/asm/code-patching.h | 40 ++++++++++++++++++++++++++++---- 1 file changed, 36 insertions(+), 4 deletions(-) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/code-patching.h b/arch/powerpc/include/asm/code-patching.h index 97e02f985df8..37991e154ef8 100644 --- a/arch/powerpc/include/asm/code-patching.h +++ b/arch/powerpc/include/asm/code-patching.h @@ -42,15 +42,47 @@ void __patch_exception(int exc, unsigned long addr); } while (0) #endif +#define OP_RT_RA_MASK 0xffff0000UL +#define LIS_R2 0x3c020000UL +#define ADDIS_R2_R12 0x3c4c0000UL +#define ADDI_R2_R2 0x38420000UL + static inline unsigned long ppc_function_entry(void *func) { -#ifdef CONFIG_PPC64 +#if defined(CONFIG_PPC64) +#if defined(_CALL_ELF) && _CALL_ELF == 2 + u32 *insn = func; + + /* + * A PPC64 ABIv2 function may have a local and a global entry + * point. We need to use the local entry point when patching + * functions, so identify and step over the global entry point + * sequence. + * + * The global entry point sequence is always of the form: + * + * addis r2,r12,XXXX + * addi r2,r2,XXXX + * + * A linker optimisation may convert the addis to lis: + * + * lis r2,XXXX + * addi r2,r2,XXXX + */ + if ((((*insn & OP_RT_RA_MASK) == ADDIS_R2_R12) || + ((*insn & OP_RT_RA_MASK) == LIS_R2)) && + ((*(insn+1) & OP_RT_RA_MASK) == ADDI_R2_R2)) + return (unsigned long)(insn + 2); + else + return (unsigned long)func; +#else /* - * On PPC64 the function pointer actually points to the function's - * descriptor. The first entry in the descriptor is the address - * of the function text. + * On PPC64 ABIv1 the function pointer actually points to the + * function's descriptor. The first entry in the descriptor is the + * address of the function text. */ return ((func_descr_t *)func)->entry; +#endif #else return (unsigned long)func; #endif -- cgit v1.2.3 From b37c10d128a2fa3256d4e67c184177270eac4b86 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Tue, 4 Feb 2014 16:09:02 +1100 Subject: powerpc: Fix ABIv2 issues with stack offsets in assembly code Fix STK_PARAM and use it instead of hardcoding ABIv1 offsets. Signed-off-by: Anton Blanchard --- arch/powerpc/include/asm/ppc_asm.h | 4 ++++ arch/powerpc/lib/copypage_power7.S | 8 ++++---- arch/powerpc/lib/copyuser_power7.S | 24 ++++++++++++------------ arch/powerpc/lib/memcpy_64.S | 8 ++++---- arch/powerpc/lib/memcpy_power7.S | 20 ++++++++++---------- 5 files changed, 34 insertions(+), 30 deletions(-) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/ppc_asm.h b/arch/powerpc/include/asm/ppc_asm.h index 5394d41a7140..3185d11b6691 100644 --- a/arch/powerpc/include/asm/ppc_asm.h +++ b/arch/powerpc/include/asm/ppc_asm.h @@ -189,7 +189,11 @@ END_FW_FTR_SECTION_IFSET(FW_FEATURE_SPLPAR) #define __STK_REG(i) (112 + ((i)-14)*8) #define STK_REG(i) __STK_REG(__REG_##i) +#if defined(_CALL_ELF) && _CALL_ELF == 2 +#define __STK_PARAM(i) (32 + ((i)-3)*8) +#else #define __STK_PARAM(i) (48 + ((i)-3)*8) +#endif #define STK_PARAM(i) __STK_PARAM(__REG_##i) #if defined(_CALL_ELF) && _CALL_ELF == 2 diff --git a/arch/powerpc/lib/copypage_power7.S b/arch/powerpc/lib/copypage_power7.S index 0f1e2398f83c..affc6d308e13 100644 --- a/arch/powerpc/lib/copypage_power7.S +++ b/arch/powerpc/lib/copypage_power7.S @@ -56,15 +56,15 @@ _GLOBAL(copypage_power7) #ifdef CONFIG_ALTIVEC mflr r0 - std r3,48(r1) - std r4,56(r1) + std r3,STK_PARAM(R3)(r1) + std r4,STK_PARAM(R4)(r1) std r0,16(r1) stdu r1,-STACKFRAMESIZE(r1) bl enter_vmx_copy cmpwi r3,0 ld r0,STACKFRAMESIZE+16(r1) - ld r3,STACKFRAMESIZE+48(r1) - ld r4,STACKFRAMESIZE+56(r1) + ld r3,STACKFRAMESIZE+STK_PARAM(R3)(r1) + ld r4,STACKFRAMESIZE+STK_PARAM(R4)(r1) mtlr r0 li r0,(PAGE_SIZE/128) diff --git a/arch/powerpc/lib/copyuser_power7.S b/arch/powerpc/lib/copyuser_power7.S index 62f0540418b9..db0fcbcc1d60 100644 --- a/arch/powerpc/lib/copyuser_power7.S +++ b/arch/powerpc/lib/copyuser_power7.S @@ -85,9 +85,9 @@ .Lexit: addi r1,r1,STACKFRAMESIZE .Ldo_err1: - ld r3,48(r1) - ld r4,56(r1) - ld r5,64(r1) + ld r3,STK_PARAM(R3)(r1) + ld r4,STK_PARAM(R4)(r1) + ld r5,STK_PARAM(R5)(r1) b __copy_tofrom_user_base @@ -96,18 +96,18 @@ _GLOBAL(__copy_tofrom_user_power7) cmpldi r5,16 cmpldi cr1,r5,4096 - std r3,48(r1) - std r4,56(r1) - std r5,64(r1) + std r3,STK_PARAM(R3)(r1) + std r4,STK_PARAM(R4)(r1) + std r5,STK_PARAM(R5)(r1) blt .Lshort_copy bgt cr1,.Lvmx_copy #else cmpldi r5,16 - std r3,48(r1) - std r4,56(r1) - std r5,64(r1) + std r3,STK_PARAM(R3)(r1) + std r4,STK_PARAM(R4)(r1) + std r5,STK_PARAM(R5)(r1) blt .Lshort_copy #endif @@ -298,9 +298,9 @@ err1; stb r0,0(r3) bl enter_vmx_usercopy cmpwi cr1,r3,0 ld r0,STACKFRAMESIZE+16(r1) - ld r3,STACKFRAMESIZE+48(r1) - ld r4,STACKFRAMESIZE+56(r1) - ld r5,STACKFRAMESIZE+64(r1) + ld r3,STACKFRAMESIZE+STK_PARAM(R3)(r1) + ld r4,STACKFRAMESIZE+STK_PARAM(R4)(r1) + ld r5,STACKFRAMESIZE+STK_PARAM(R5)(r1) mtlr r0 /* diff --git a/arch/powerpc/lib/memcpy_64.S b/arch/powerpc/lib/memcpy_64.S index 72ad055168a3..01da956a52fb 100644 --- a/arch/powerpc/lib/memcpy_64.S +++ b/arch/powerpc/lib/memcpy_64.S @@ -12,7 +12,7 @@ .align 7 _GLOBAL(memcpy) BEGIN_FTR_SECTION - std r3,48(r1) /* save destination pointer for return value */ + std r3,STK_PARAM(R3)(r1) /* save destination pointer for return value */ FTR_SECTION_ELSE #ifndef SELFTEST b memcpy_power7 @@ -73,7 +73,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_UNALIGNED_LD_STD) 2: bf cr7*4+3,3f lbz r9,8(r4) stb r9,0(r3) -3: ld r3,48(r1) /* return dest pointer */ +3: ld r3,STK_PARAM(R3)(r1) /* return dest pointer */ blr .Lsrc_unaligned: @@ -156,7 +156,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_UNALIGNED_LD_STD) 2: bf cr7*4+3,3f rotldi r9,r9,8 stb r9,0(r3) -3: ld r3,48(r1) /* return dest pointer */ +3: ld r3,STK_PARAM(R3)(r1) /* return dest pointer */ blr .Ldst_unaligned: @@ -201,5 +201,5 @@ END_FTR_SECTION_IFCLR(CPU_FTR_UNALIGNED_LD_STD) 3: bf cr7*4+3,4f lbz r0,0(r4) stb r0,0(r3) -4: ld r3,48(r1) /* return dest pointer */ +4: ld r3,STK_PARAM(R3)(r1) /* return dest pointer */ blr diff --git a/arch/powerpc/lib/memcpy_power7.S b/arch/powerpc/lib/memcpy_power7.S index bae3f214c2d9..87d8eeccd4b7 100644 --- a/arch/powerpc/lib/memcpy_power7.S +++ b/arch/powerpc/lib/memcpy_power7.S @@ -33,14 +33,14 @@ _GLOBAL(memcpy_power7) cmpldi r5,16 cmpldi cr1,r5,4096 - std r3,48(r1) + std r3,STK_PARAM(R1)(r1) blt .Lshort_copy bgt cr1,.Lvmx_copy #else cmpldi r5,16 - std r3,48(r1) + std r3,STK_PARAM(R1)(r1) blt .Lshort_copy #endif @@ -216,7 +216,7 @@ _GLOBAL(memcpy_power7) lbz r0,0(r4) stb r0,0(r3) -15: ld r3,48(r1) +15: ld r3,STK_PARAM(R3)(r1) blr .Lunwind_stack_nonvmx_copy: @@ -226,16 +226,16 @@ _GLOBAL(memcpy_power7) #ifdef CONFIG_ALTIVEC .Lvmx_copy: mflr r0 - std r4,56(r1) - std r5,64(r1) + std r4,STK_PARAM(R4)(r1) + std r5,STK_PARAM(R5)(r1) std r0,16(r1) stdu r1,-STACKFRAMESIZE(r1) bl enter_vmx_copy cmpwi cr1,r3,0 ld r0,STACKFRAMESIZE+16(r1) - ld r3,STACKFRAMESIZE+48(r1) - ld r4,STACKFRAMESIZE+56(r1) - ld r5,STACKFRAMESIZE+64(r1) + ld r3,STACKFRAMESIZE+STK_PARAM(R3)(r1) + ld r4,STACKFRAMESIZE+STK_PARAM(R4)(r1) + ld r5,STACKFRAMESIZE+STK_PARAM(R5)(r1) mtlr r0 /* @@ -447,7 +447,7 @@ _GLOBAL(memcpy_power7) stb r0,0(r3) 15: addi r1,r1,STACKFRAMESIZE - ld r3,48(r1) + ld r3,STK_PARAM(R3)(r1) b exit_vmx_copy /* tail call optimise */ .Lvmx_unaligned_copy: @@ -651,6 +651,6 @@ _GLOBAL(memcpy_power7) stb r0,0(r3) 15: addi r1,r1,STACKFRAMESIZE - ld r3,48(r1) + ld r3,STK_PARAM(R3)(r1) b exit_vmx_copy /* tail call optimise */ #endif /* CONFiG_ALTIVEC */ -- cgit v1.2.3 From 6403105bfda4d6934b39aeb85ff818b185b42de8 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Mon, 10 Mar 2014 10:52:17 +1100 Subject: powerpc/tm: Fix GOT save offset for ABIv2 The r2 TOC/GOT save offset is 40 on ABIv1 and 24 on ABIv2. Signed-off-by: Anton Blanchard --- arch/powerpc/include/asm/ppc_asm.h | 2 ++ arch/powerpc/kernel/tm.S | 8 ++++---- 2 files changed, 6 insertions(+), 4 deletions(-) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/ppc_asm.h b/arch/powerpc/include/asm/ppc_asm.h index 3185d11b6691..2cc2511ff076 100644 --- a/arch/powerpc/include/asm/ppc_asm.h +++ b/arch/powerpc/include/asm/ppc_asm.h @@ -190,8 +190,10 @@ END_FW_FTR_SECTION_IFSET(FW_FEATURE_SPLPAR) #define STK_REG(i) __STK_REG(__REG_##i) #if defined(_CALL_ELF) && _CALL_ELF == 2 +#define STK_GOT 24 #define __STK_PARAM(i) (32 + ((i)-3)*8) #else +#define STK_GOT 40 #define __STK_PARAM(i) (48 + ((i)-3)*8) #endif #define STK_PARAM(i) __STK_PARAM(__REG_##i) diff --git a/arch/powerpc/kernel/tm.S b/arch/powerpc/kernel/tm.S index 27aad248c002..cf1027efca30 100644 --- a/arch/powerpc/kernel/tm.S +++ b/arch/powerpc/kernel/tm.S @@ -107,7 +107,7 @@ _GLOBAL(tm_reclaim) mflr r0 stw r6, 8(r1) std r0, 16(r1) - std r2, 40(r1) + std r2, STK_GOT(r1) stdu r1, -TM_FRAME_SIZE(r1) /* We've a struct pt_regs at [r1+STACK_FRAME_OVERHEAD]. */ @@ -288,7 +288,7 @@ dont_backup_fp: ld r0, 16(r1) mtcr r4 mtlr r0 - ld r2, 40(r1) + ld r2, STK_GOT(r1) /* Load system default DSCR */ ld r4, DSCR_DEFAULT@toc(r2) @@ -311,7 +311,7 @@ _GLOBAL(__tm_recheckpoint) mflr r0 stw r5, 8(r1) std r0, 16(r1) - std r2, 40(r1) + std r2, STK_GOT(r1) stdu r1, -TM_FRAME_SIZE(r1) /* We've a struct pt_regs at [r1+STACK_FRAME_OVERHEAD]. @@ -447,7 +447,7 @@ restore_gprs: ld r0, 16(r1) mtcr r4 mtlr r0 - ld r2, 40(r1) + ld r2, STK_GOT(r1) /* Load system default DSCR */ ld r4, DSCR_DEFAULT@toc(r2) -- cgit v1.2.3 From d51959d70ffc55d1c829e881a6121e6fbbfb29af Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Mon, 10 Mar 2014 12:51:58 +1100 Subject: powerpc/tracing: TRACE_WITH_FRAME_BUFFER creates invalid stack frames TRACE_WITH_FRAME_BUFFER creates 32 byte stack frames. On ppc64 ABIv1 this is too small and a callee could corrupt the stack by writing to the parameter save area (starting at offset 48). Signed-off-by: Anton Blanchard --- arch/powerpc/include/asm/irqflags.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/irqflags.h b/arch/powerpc/include/asm/irqflags.h index f62c056e75bf..e20eb95429a8 100644 --- a/arch/powerpc/include/asm/irqflags.h +++ b/arch/powerpc/include/asm/irqflags.h @@ -20,9 +20,9 @@ */ #define TRACE_WITH_FRAME_BUFFER(func) \ mflr r0; \ - stdu r1, -32(r1); \ + stdu r1, -STACK_FRAME_OVERHEAD(r1); \ std r0, 16(r1); \ - stdu r1, -32(r1); \ + stdu r1, -STACK_FRAME_OVERHEAD(r1); \ bl func; \ ld r1, 0(r1); \ ld r1, 0(r1); -- cgit v1.2.3 From 07de8377f7488f262f9694a1567ab93b4dda63bc Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Tue, 11 Mar 2014 12:15:27 +1100 Subject: powerpc: Fix ABIv2 issue with dereference_function_descriptor Don't try and dereference a function descriptor on ABIv2. Signed-off-by: Anton Blanchard --- arch/powerpc/include/asm/sections.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/sections.h b/arch/powerpc/include/asm/sections.h index d0e784e0ff48..d1bb96d5a298 100644 --- a/arch/powerpc/include/asm/sections.h +++ b/arch/powerpc/include/asm/sections.h @@ -39,6 +39,7 @@ static inline int overlaps_kernel_text(unsigned long start, unsigned long end) (unsigned long)_stext < end; } +#if !defined(_CALL_ELF) || _CALL_ELF != 2 #undef dereference_function_descriptor static inline void *dereference_function_descriptor(void *ptr) { @@ -49,6 +50,7 @@ static inline void *dereference_function_descriptor(void *ptr) ptr = p; return ptr; } +#endif #endif -- cgit v1.2.3 From 4edebbeae3085e71f75584b6582495459e2e6cb2 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Tue, 18 Mar 2014 19:59:26 +1030 Subject: powerpc: Fix up TOC. for modules. The kernel resolved the '.TOC.' to a fake symbol, so we need to fix it up to point to our .toc section plus 0x8000. Signed-off-by: Rusty Russell --- arch/powerpc/include/asm/module.h | 1 + arch/powerpc/kernel/module_64.c | 29 +++++++++++++++++++++++++++++ 2 files changed, 30 insertions(+) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/module.h b/arch/powerpc/include/asm/module.h index 49fa55bfbac4..c9c7aaaf95f5 100644 --- a/arch/powerpc/include/asm/module.h +++ b/arch/powerpc/include/asm/module.h @@ -35,6 +35,7 @@ struct mod_arch_specific { #ifdef __powerpc64__ unsigned int stubs_section; /* Index of stubs section in module */ unsigned int toc_section; /* What section is the TOC? */ + bool toc_fixed; /* Have we fixed up .TOC.? */ #ifdef CONFIG_DYNAMIC_FTRACE unsigned long toc; unsigned long tramp; diff --git a/arch/powerpc/kernel/module_64.c b/arch/powerpc/kernel/module_64.c index a8694d462079..f6544d7071d6 100644 --- a/arch/powerpc/kernel/module_64.c +++ b/arch/powerpc/kernel/module_64.c @@ -196,6 +196,24 @@ static void dedotify(Elf64_Sym *syms, unsigned int numsyms, char *strtab) } } +static Elf64_Sym *find_dot_toc(Elf64_Shdr *sechdrs, + const char *strtab, + unsigned int symindex) +{ + unsigned int i, numsyms; + Elf64_Sym *syms; + + syms = (Elf64_Sym *)sechdrs[symindex].sh_addr; + numsyms = sechdrs[symindex].sh_size / sizeof(Elf64_Sym); + + for (i = 1; i < numsyms; i++) { + if (syms[i].st_shndx == SHN_UNDEF + && strcmp(strtab + syms[i].st_name, ".TOC.") == 0) + return &syms[i]; + } + return NULL; +} + int module_frob_arch_sections(Elf64_Ehdr *hdr, Elf64_Shdr *sechdrs, char *secstrings, @@ -337,6 +355,17 @@ int apply_relocate_add(Elf64_Shdr *sechdrs, DEBUGP("Applying ADD relocate section %u to %u\n", relsec, sechdrs[relsec].sh_info); + + /* First time we're called, we can fix up .TOC. */ + if (!me->arch.toc_fixed) { + sym = find_dot_toc(sechdrs, strtab, symindex); + /* It's theoretically possible that a module doesn't want a + * .TOC. so don't fail it just for that. */ + if (sym) + sym->st_value = my_r2(sechdrs, me); + me->arch.toc_fixed = true; + } + for (i = 0; i < sechdrs[relsec].sh_size / sizeof(*rela); i++) { /* This is where to make the change */ location = (void *)sechdrs[sechdrs[relsec].sh_info].sh_addr -- cgit v1.2.3 From 169c7cee3131cdf5e2f2d2a6c722c7db0283bcd5 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Thu, 3 Apr 2014 16:01:11 +1100 Subject: powerpc: Add _GLOBAL_TOC for ABIv2 assembly functions exported to modules If an assembly function that calls back into c code is exported to modules, we need to ensure r2 is setup correctly. There are only two places crazy enough to do it (two of which are my fault). Signed-off-by: Anton Blanchard --- arch/powerpc/include/asm/ppc_asm.h | 12 ++++++++++++ arch/powerpc/lib/copyuser_64.S | 2 +- arch/powerpc/lib/memcpy_64.S | 2 +- 3 files changed, 14 insertions(+), 2 deletions(-) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/ppc_asm.h b/arch/powerpc/include/asm/ppc_asm.h index 2cc2511ff076..6400f1814fe8 100644 --- a/arch/powerpc/include/asm/ppc_asm.h +++ b/arch/powerpc/include/asm/ppc_asm.h @@ -207,6 +207,16 @@ END_FW_FTR_SECTION_IFSET(FW_FEATURE_SPLPAR) .globl name; \ name: +#define _GLOBAL_TOC(name) \ + .section ".text"; \ + .align 2 ; \ + .type name,@function; \ + .globl name; \ +name: \ +0: addis r2,r12,(.TOC.-0b)@ha; \ + addi r2,r2,(.TOC.-0b)@l; \ + .localentry name,.-name + #define _KPROBE(name) \ .section ".kprobes.text","a"; \ .align 2 ; \ @@ -235,6 +245,8 @@ name: \ .type GLUE(.,name),@function; \ GLUE(.,name): +#define _GLOBAL_TOC(name) _GLOBAL(name) + #define _KPROBE(name) \ .section ".kprobes.text","a"; \ .align 2 ; \ diff --git a/arch/powerpc/lib/copyuser_64.S b/arch/powerpc/lib/copyuser_64.S index 596a285c0755..0860ee46013c 100644 --- a/arch/powerpc/lib/copyuser_64.S +++ b/arch/powerpc/lib/copyuser_64.S @@ -18,7 +18,7 @@ #endif .align 7 -_GLOBAL(__copy_tofrom_user) +_GLOBAL_TOC(__copy_tofrom_user) BEGIN_FTR_SECTION nop FTR_SECTION_ELSE diff --git a/arch/powerpc/lib/memcpy_64.S b/arch/powerpc/lib/memcpy_64.S index 9d3960c16fde..bc9a2ca591c3 100644 --- a/arch/powerpc/lib/memcpy_64.S +++ b/arch/powerpc/lib/memcpy_64.S @@ -10,7 +10,7 @@ #include .align 7 -_GLOBAL(memcpy) +_GLOBAL_TOC(memcpy) BEGIN_FTR_SECTION std r3,-STACKFRAMESIZE+STK_REG(R31)(r1) /* save destination pointer for return value */ FTR_SECTION_ELSE -- cgit v1.2.3 From 47f86b4e07afd4652ab0b092cbf493bf8b96559e Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Thu, 3 Apr 2014 16:08:38 +1100 Subject: powerpc/kprobes: Fix ABIv2 issues with kprobe_lookup_name Use ppc_function_entry in places where we previously assumed function descriptors exist. Signed-off-by: Anton Blanchard --- arch/powerpc/include/asm/kprobes.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/kprobes.h b/arch/powerpc/include/asm/kprobes.h index 7b6feab6fd26..af15d4d8d604 100644 --- a/arch/powerpc/include/asm/kprobes.h +++ b/arch/powerpc/include/asm/kprobes.h @@ -30,6 +30,7 @@ #include #include #include +#include #define __ARCH_WANT_KPROBES_INSN_SLOT @@ -56,9 +57,9 @@ typedef ppc_opcode_t kprobe_opcode_t; if ((colon = strchr(name, ':')) != NULL) { \ colon++; \ if (*colon != '\0' && *colon != '.') \ - addr = *(kprobe_opcode_t **)addr; \ + addr = (kprobe_opcode_t *)ppc_function_entry(addr); \ } else if (name[0] != '.') \ - addr = *(kprobe_opcode_t **)addr; \ + addr = (kprobe_opcode_t *)ppc_function_entry(addr); \ } else { \ char dot_name[KSYM_NAME_LEN]; \ dot_name[0] = '.'; \ -- cgit v1.2.3 From 83775b85668a85036973c71264a959236e7becbd Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Thu, 3 Apr 2014 20:00:43 +1100 Subject: powerpc/modules: Create is_module_trampoline() ftrace has way too much knowledge of our kernel module trampoline layout hidden inside it. Create is_module_trampoline() that can abstract this away inside the module loader code. Signed-off-by: Anton Blanchard --- arch/powerpc/include/asm/module.h | 1 + arch/powerpc/kernel/module_64.c | 51 +++++++++++++++++++++++++++++++++++---- 2 files changed, 47 insertions(+), 5 deletions(-) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/module.h b/arch/powerpc/include/asm/module.h index c9c7aaaf95f5..f2711f0eb873 100644 --- a/arch/powerpc/include/asm/module.h +++ b/arch/powerpc/include/asm/module.h @@ -78,6 +78,7 @@ struct mod_arch_specific { # endif /* MODULE */ #endif +bool is_module_trampoline(u32 *insns); struct exception_table_entry; void sort_ex_table(struct exception_table_entry *start, diff --git a/arch/powerpc/kernel/module_64.c b/arch/powerpc/kernel/module_64.c index 042360135260..4db5ecdc06e6 100644 --- a/arch/powerpc/kernel/module_64.c +++ b/arch/powerpc/kernel/module_64.c @@ -22,6 +22,7 @@ #include #include #include +#include #include #include #include @@ -102,7 +103,9 @@ static unsigned int local_entry_offset(const Elf64_Sym *sym) jump, actually, to reset r2 (TOC+0x8000). */ struct ppc64_stub_entry { - /* 28 byte jump instruction sequence (7 instructions) */ + /* 28 byte jump instruction sequence (7 instructions). We only + * need 6 instructions on ABIv2 but we always allocate 7 so + * so we don't have to modify the trampoline load instruction. */ u32 jump[7]; u32 unused; /* Data for the above code */ @@ -122,8 +125,8 @@ struct ppc64_stub_entry * end of the stub code, and patch the stub address (32-bits relative * to the TOC ptr, r2) into the stub. */ -static struct ppc64_stub_entry ppc64_stub = -{ .jump = { + +static u32 ppc64_stub_insns[] = { 0x3d620000, /* addis r11,r2, */ 0x396b0000, /* addi r11,r11, */ /* Save current r2 value in magic place on the stack. */ @@ -135,7 +138,45 @@ static struct ppc64_stub_entry ppc64_stub = #endif 0x7d8903a6, /* mtctr r12 */ 0x4e800420 /* bctr */ -} }; +}; + +#ifdef CONFIG_DYNAMIC_FTRACE + +static u32 ppc64_stub_mask[] = { + 0xffff0000, + 0xffff0000, + 0xffffffff, + 0xffffffff, +#if !defined(_CALL_ELF) || _CALL_ELF != 2 + 0xffffffff, +#endif + 0xffffffff, + 0xffffffff +}; + +bool is_module_trampoline(u32 *p) +{ + unsigned int i; + u32 insns[ARRAY_SIZE(ppc64_stub_insns)]; + + BUILD_BUG_ON(sizeof(ppc64_stub_insns) != sizeof(ppc64_stub_mask)); + + if (probe_kernel_read(insns, p, sizeof(insns))) + return -EFAULT; + + for (i = 0; i < ARRAY_SIZE(ppc64_stub_insns); i++) { + u32 insna = insns[i]; + u32 insnb = ppc64_stub_insns[i]; + u32 mask = ppc64_stub_mask[i]; + + if ((insna & mask) != (insnb & mask)) + return false; + } + + return true; +} + +#endif /* Count how many different 24-bit relocations (different symbol, different addend) */ @@ -350,7 +391,7 @@ static inline int create_stub(Elf64_Shdr *sechdrs, { long reladdr; - *entry = ppc64_stub; + memcpy(entry->jump, ppc64_stub_insns, sizeof(ppc64_stub_insns)); /* Stub uses address relative to r2. */ reladdr = (unsigned long)entry - my_r2(sechdrs, me); -- cgit v1.2.3 From dd9fa162505c07e1917c96a1a12ca117b1afe55a Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Fri, 4 Apr 2014 15:58:42 +1100 Subject: powerpc/modules: Create module_trampoline_target() ftrace has way too much knowledge of our kernel module trampoline layout hidden inside it. Create module_trampoline_target() that gives the target address of a kernel module trampoline. Signed-off-by: Anton Blanchard --- arch/powerpc/include/asm/module.h | 2 ++ arch/powerpc/kernel/module_64.c | 29 +++++++++++++++++++++++++++++ 2 files changed, 31 insertions(+) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/module.h b/arch/powerpc/include/asm/module.h index f2711f0eb873..dcfcad139bcc 100644 --- a/arch/powerpc/include/asm/module.h +++ b/arch/powerpc/include/asm/module.h @@ -79,6 +79,8 @@ struct mod_arch_specific { #endif bool is_module_trampoline(u32 *insns); +int module_trampoline_target(struct module *mod, u32 *trampoline, + unsigned long *target); struct exception_table_entry; void sort_ex_table(struct exception_table_entry *start, diff --git a/arch/powerpc/kernel/module_64.c b/arch/powerpc/kernel/module_64.c index 4db5ecdc06e6..ef349d077129 100644 --- a/arch/powerpc/kernel/module_64.c +++ b/arch/powerpc/kernel/module_64.c @@ -176,6 +176,35 @@ bool is_module_trampoline(u32 *p) return true; } +int module_trampoline_target(struct module *mod, u32 *trampoline, + unsigned long *target) +{ + u32 buf[2]; + u16 upper, lower; + long offset; + void *toc_entry; + + if (probe_kernel_read(buf, trampoline, sizeof(buf))) + return -EFAULT; + + upper = buf[0] & 0xffff; + lower = buf[1] & 0xffff; + + /* perform the addis/addi, both signed */ + offset = ((short)upper << 16) + (short)lower; + + /* + * Now get the address this trampoline jumps to. This + * is always 32 bytes into our trampoline stub. + */ + toc_entry = (void *)mod->arch.toc + offset + 32; + + if (probe_kernel_read(target, toc_entry, sizeof(*target))) + return -EFAULT; + + return 0; +} + #endif /* Count how many different 24-bit relocations (different symbol, -- cgit v1.2.3 From 9e0493756076692a8b6095fbee9c4f8dcbbe597a Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Thu, 24 Apr 2014 18:00:07 +1000 Subject: powerpc/eeh: Remove EEH_PE_PHB_DEAD The PE state (for eeh_pe instance) EEH_PE_PHB_DEAD is duplicate to EEH_PE_ISOLATED. Originally, those PHBs (PHB PE) with EEH_PE_PHB_DEAD would be removed from the system. However, it's safe to replace that with EEH_PE_ISOLATED. The patch also clear EEH_PE_RECOVERING after fenced PHB has been handled, either failure or success. It makes the PHB PE state consistent with: PHB functions normally NONE PHB has been removed EEH_PE_ISOLATED PHB fenced, recovery in progress EEH_PE_ISOLATED | RECOVERING Signed-off-by: Gavin Shan Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/include/asm/eeh.h | 1 - arch/powerpc/kernel/eeh.c | 10 ++-------- arch/powerpc/kernel/eeh_driver.c | 10 +++++----- 3 files changed, 7 insertions(+), 14 deletions(-) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h index d4dd41fb951b..a61b06f86d80 100644 --- a/arch/powerpc/include/asm/eeh.h +++ b/arch/powerpc/include/asm/eeh.h @@ -53,7 +53,6 @@ struct device_node; #define EEH_PE_ISOLATED (1 << 0) /* Isolated PE */ #define EEH_PE_RECOVERING (1 << 1) /* Recovering PE */ -#define EEH_PE_PHB_DEAD (1 << 2) /* Dead PHB */ #define EEH_PE_KEEP (1 << 8) /* Keep PE on hotplug */ diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c index e7b76a6bf150..f1676762f6de 100644 --- a/arch/powerpc/kernel/eeh.c +++ b/arch/powerpc/kernel/eeh.c @@ -232,7 +232,6 @@ void eeh_slot_error_detail(struct eeh_pe *pe, int severity) { size_t loglen = 0; struct eeh_dev *edev, *tmp; - bool valid_cfg_log = true; /* * When the PHB is fenced or dead, it's pointless to collect @@ -240,12 +239,7 @@ void eeh_slot_error_detail(struct eeh_pe *pe, int severity) * 0xFF's. For ER, we still retrieve the data from the PCI * config space. */ - if (eeh_probe_mode_dev() && - (pe->type & EEH_PE_PHB) && - (pe->state & (EEH_PE_ISOLATED | EEH_PE_PHB_DEAD))) - valid_cfg_log = false; - - if (valid_cfg_log) { + if (!(pe->type & EEH_PE_PHB)) { eeh_pci_enable(pe, EEH_OPT_THAW_MMIO); eeh_ops->configure_bridge(pe); eeh_pe_restore_bars(pe); @@ -309,7 +303,7 @@ static int eeh_phb_check_failure(struct eeh_pe *pe) /* If the PHB has been in problematic state */ eeh_serialize_lock(&flags); - if (phb_pe->state & (EEH_PE_ISOLATED | EEH_PE_PHB_DEAD)) { + if (phb_pe->state & EEH_PE_ISOLATED) { ret = 0; goto out; } diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c index bb61ca58ca6d..1ddc046c69cf 100644 --- a/arch/powerpc/kernel/eeh_driver.c +++ b/arch/powerpc/kernel/eeh_driver.c @@ -682,8 +682,7 @@ static void eeh_handle_special_event(void) phb_pe = eeh_phb_pe_get(hose); if (!phb_pe) continue; - eeh_pe_state_mark(phb_pe, - EEH_PE_ISOLATED | EEH_PE_PHB_DEAD); + eeh_pe_state_mark(phb_pe, EEH_PE_ISOLATED); } eeh_serialize_unlock(flags); @@ -699,8 +698,7 @@ static void eeh_handle_special_event(void) eeh_remove_event(pe); if (rc == EEH_NEXT_ERR_DEAD_PHB) - eeh_pe_state_mark(pe, - EEH_PE_ISOLATED | EEH_PE_PHB_DEAD); + eeh_pe_state_mark(pe, EEH_PE_ISOLATED); else eeh_pe_state_mark(pe, EEH_PE_ISOLATED | EEH_PE_RECOVERING); @@ -724,12 +722,14 @@ static void eeh_handle_special_event(void) if (rc == EEH_NEXT_ERR_FROZEN_PE || rc == EEH_NEXT_ERR_FENCED_PHB) { eeh_handle_normal_event(pe); + eeh_pe_state_clear(pe, EEH_PE_RECOVERING); } else { pci_lock_rescan_remove(); list_for_each_entry(hose, &hose_list, list_node) { phb_pe = eeh_phb_pe_get(hose); if (!phb_pe || - !(phb_pe->state & EEH_PE_PHB_DEAD)) + !(phb_pe->state & EEH_PE_ISOLATED) || + (phb_pe->state & EEH_PE_RECOVERING)) continue; /* Notify all devices to be down */ -- cgit v1.2.3 From d0914f503f7ba2cd078b123983562be8951296d3 Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Thu, 24 Apr 2014 18:00:12 +1000 Subject: powerpc/eeh: Block PCI-CFG access during PE reset We've observed multiple PE reset failures because of PCI-CFG access during that period. Potentially, some device drivers can't support EEH very well and they can't put the device to motionless state before PE reset. So those device drivers might produce PCI-CFG accesses during PE reset. Also, we could have PCI-CFG access from user space (e.g. "lspci"). Since access to frozen PE should return 0xFF's, we can block PCI-CFG access during the period of PE reset so that we won't get recrusive EEH errors. The patch adds flag EEH_PE_RESET, which is kept during PE reset. The PowerNV/pSeries PCI-CFG accessors reuse the flag to block PCI-CFG accordingly. Signed-off-by: Gavin Shan Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/include/asm/eeh.h | 1 + arch/powerpc/kernel/eeh_driver.c | 13 ++++- arch/powerpc/kernel/rtas_pci.c | 66 +++++++++++++++++------ arch/powerpc/platforms/powernv/pci.c | 100 ++++++++++++++++++++++------------- 4 files changed, 126 insertions(+), 54 deletions(-) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h index a61b06f86d80..fa32d8dbf1cd 100644 --- a/arch/powerpc/include/asm/eeh.h +++ b/arch/powerpc/include/asm/eeh.h @@ -53,6 +53,7 @@ struct device_node; #define EEH_PE_ISOLATED (1 << 0) /* Isolated PE */ #define EEH_PE_RECOVERING (1 << 1) /* Recovering PE */ +#define EEH_PE_RESET (1 << 2) /* PE reset in progress */ #define EEH_PE_KEEP (1 << 8) /* Keep PE on hotplug */ diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c index 1ddc046c69cf..6d91b51a5ddb 100644 --- a/arch/powerpc/kernel/eeh_driver.c +++ b/arch/powerpc/kernel/eeh_driver.c @@ -451,19 +451,28 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus) eeh_pe_dev_traverse(pe, eeh_rmv_device, &removed); } - /* Reset the pci controller. (Asserts RST#; resets config space). + /* + * Reset the pci controller. (Asserts RST#; resets config space). * Reconfigure bridges and devices. Don't try to bring the system * up if the reset failed for some reason. + * + * During the reset, it's very dangerous to have uncontrolled PCI + * config accesses. So we prefer to block them. However, controlled + * PCI config accesses initiated from EEH itself are allowed. */ + eeh_pe_state_mark(pe, EEH_PE_RESET); rc = eeh_reset_pe(pe); - if (rc) + if (rc) { + eeh_pe_state_clear(pe, EEH_PE_RESET); return rc; + } pci_lock_rescan_remove(); /* Restore PE */ eeh_ops->configure_bridge(pe); eeh_pe_restore_bars(pe); + eeh_pe_state_clear(pe, EEH_PE_RESET); /* Give the system 5 seconds to finish running the user-space * hotplug shutdown scripts, e.g. ifdown for ethernet. Yes, diff --git a/arch/powerpc/kernel/rtas_pci.c b/arch/powerpc/kernel/rtas_pci.c index 7d4c7172f38e..c168337aef9d 100644 --- a/arch/powerpc/kernel/rtas_pci.c +++ b/arch/powerpc/kernel/rtas_pci.c @@ -80,10 +80,6 @@ int rtas_read_config(struct pci_dn *pdn, int where, int size, u32 *val) if (ret) return PCIBIOS_DEVICE_NOT_FOUND; - if (returnval == EEH_IO_ERROR_VALUE(size) && - eeh_dev_check_failure(of_node_to_eeh_dev(pdn->node))) - return PCIBIOS_DEVICE_NOT_FOUND; - return PCIBIOS_SUCCESSFUL; } @@ -92,18 +88,39 @@ static int rtas_pci_read_config(struct pci_bus *bus, int where, int size, u32 *val) { struct device_node *busdn, *dn; - - busdn = pci_bus_to_OF_node(bus); + struct pci_dn *pdn; + bool found = false; +#ifdef CONFIG_EEH + struct eeh_dev *edev; +#endif + int ret; /* Search only direct children of the bus */ + *val = 0xFFFFFFFF; + busdn = pci_bus_to_OF_node(bus); for (dn = busdn->child; dn; dn = dn->sibling) { - struct pci_dn *pdn = PCI_DN(dn); + pdn = PCI_DN(dn); if (pdn && pdn->devfn == devfn - && of_device_is_available(dn)) - return rtas_read_config(pdn, where, size, val); + && of_device_is_available(dn)) { + found = true; + break; + } } - return PCIBIOS_DEVICE_NOT_FOUND; + if (!found) + return PCIBIOS_DEVICE_NOT_FOUND; +#ifdef CONFIG_EEH + edev = of_node_to_eeh_dev(dn); + if (edev && edev->pe && edev->pe->state & EEH_PE_RESET) + return PCIBIOS_DEVICE_NOT_FOUND; +#endif + + ret = rtas_read_config(pdn, where, size, val); + if (*val == EEH_IO_ERROR_VALUE(size) && + eeh_dev_check_failure(of_node_to_eeh_dev(dn))) + return PCIBIOS_DEVICE_NOT_FOUND; + + return ret; } int rtas_write_config(struct pci_dn *pdn, int where, int size, u32 val) @@ -136,17 +153,34 @@ static int rtas_pci_write_config(struct pci_bus *bus, int where, int size, u32 val) { struct device_node *busdn, *dn; - - busdn = pci_bus_to_OF_node(bus); + struct pci_dn *pdn; + bool found = false; +#ifdef CONFIG_EEH + struct eeh_dev *edev; +#endif + int ret; /* Search only direct children of the bus */ + busdn = pci_bus_to_OF_node(bus); for (dn = busdn->child; dn; dn = dn->sibling) { - struct pci_dn *pdn = PCI_DN(dn); + pdn = PCI_DN(dn); if (pdn && pdn->devfn == devfn - && of_device_is_available(dn)) - return rtas_write_config(pdn, where, size, val); + && of_device_is_available(dn)) { + found = true; + break; + } } - return PCIBIOS_DEVICE_NOT_FOUND; + + if (!found) + return PCIBIOS_DEVICE_NOT_FOUND; +#ifdef CONFIG_EEH + edev = of_node_to_eeh_dev(dn); + if (edev && edev->pe && (edev->pe->state & EEH_PE_RESET)) + return PCIBIOS_DEVICE_NOT_FOUND; +#endif + ret = rtas_write_config(pdn, where, size, val); + + return ret; } static struct pci_ops rtas_pci_ops = { diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c index 2de283d2076c..f98cf99c9f8c 100644 --- a/arch/powerpc/platforms/powernv/pci.c +++ b/arch/powerpc/platforms/powernv/pci.c @@ -373,9 +373,6 @@ int pnv_pci_cfg_read(struct device_node *dn, struct pci_dn *pdn = PCI_DN(dn); struct pnv_phb *phb = pdn->phb->private_data; u32 bdfn = (pdn->busno << 8) | pdn->devfn; -#ifdef CONFIG_EEH - struct eeh_pe *phb_pe = NULL; -#endif s64 rc; switch (size) { @@ -401,31 +398,9 @@ int pnv_pci_cfg_read(struct device_node *dn, default: return PCIBIOS_FUNC_NOT_SUPPORTED; } + cfg_dbg("%s: bus: %x devfn: %x +%x/%x -> %08x\n", __func__, pdn->busno, pdn->devfn, where, size, *val); - - /* - * Check if the specified PE has been put into frozen - * state. On the other hand, we needn't do that while - * the PHB has been put into frozen state because of - * PHB-fatal errors. - */ -#ifdef CONFIG_EEH - phb_pe = eeh_phb_pe_get(pdn->phb); - if (phb_pe && (phb_pe->state & EEH_PE_ISOLATED)) - return PCIBIOS_SUCCESSFUL; - - if (phb->flags & PNV_PHB_FLAG_EEH) { - if (*val == EEH_IO_ERROR_VALUE(size) && - eeh_dev_check_failure(of_node_to_eeh_dev(dn))) - return PCIBIOS_DEVICE_NOT_FOUND; - } else { - pnv_pci_config_check_eeh(phb, dn); - } -#else - pnv_pci_config_check_eeh(phb, dn); -#endif - return PCIBIOS_SUCCESSFUL; } @@ -452,12 +427,35 @@ int pnv_pci_cfg_write(struct device_node *dn, return PCIBIOS_FUNC_NOT_SUPPORTED; } - /* Check if the PHB got frozen due to an error (no response) */ + return PCIBIOS_SUCCESSFUL; +} + +#if CONFIG_EEH +static bool pnv_pci_cfg_check(struct pci_controller *hose, + struct device_node *dn) +{ + struct eeh_dev *edev = NULL; + struct pnv_phb *phb = hose->private_data; + + /* EEH not enabled ? */ if (!(phb->flags & PNV_PHB_FLAG_EEH)) - pnv_pci_config_check_eeh(phb, dn); + return true; - return PCIBIOS_SUCCESSFUL; + /* PE reset ? */ + edev = of_node_to_eeh_dev(dn); + if (edev && edev->pe && + (edev->pe->state & EEH_PE_RESET)) + return false; + + return true; +} +#else +static inline pnv_pci_cfg_check(struct pci_controller *hose, + struct device_node *dn) +{ + return true; } +#endif /* CONFIG_EEH */ static int pnv_pci_read_config(struct pci_bus *bus, unsigned int devfn, @@ -465,16 +463,33 @@ static int pnv_pci_read_config(struct pci_bus *bus, { struct device_node *dn, *busdn = pci_bus_to_OF_node(bus); struct pci_dn *pdn; + struct pnv_phb *phb; + bool found = false; + int ret; + *val = 0xFFFFFFFF; for (dn = busdn->child; dn; dn = dn->sibling) { pdn = PCI_DN(dn); - if (pdn && pdn->devfn == devfn) - return pnv_pci_cfg_read(dn, where, size, val); + if (pdn && pdn->devfn == devfn) { + phb = pdn->phb->private_data; + found = true; + break; + } } - *val = 0xFFFFFFFF; - return PCIBIOS_DEVICE_NOT_FOUND; + if (!found || !pnv_pci_cfg_check(pdn->phb, dn)) + return PCIBIOS_DEVICE_NOT_FOUND; + ret = pnv_pci_cfg_read(dn, where, size, val); + if (phb->flags & PNV_PHB_FLAG_EEH) { + if (*val == EEH_IO_ERROR_VALUE(size) && + eeh_dev_check_failure(of_node_to_eeh_dev(dn))) + return PCIBIOS_DEVICE_NOT_FOUND; + } else { + pnv_pci_config_check_eeh(phb, dn); + } + + return ret; } static int pnv_pci_write_config(struct pci_bus *bus, @@ -483,14 +498,27 @@ static int pnv_pci_write_config(struct pci_bus *bus, { struct device_node *dn, *busdn = pci_bus_to_OF_node(bus); struct pci_dn *pdn; + struct pnv_phb *phb; + bool found = false; + int ret; for (dn = busdn->child; dn; dn = dn->sibling) { pdn = PCI_DN(dn); - if (pdn && pdn->devfn == devfn) - return pnv_pci_cfg_write(dn, where, size, val); + if (pdn && pdn->devfn == devfn) { + phb = pdn->phb->private_data; + found = true; + break; + } } - return PCIBIOS_DEVICE_NOT_FOUND; + if (!found || !pnv_pci_cfg_check(pdn->phb, dn)) + return PCIBIOS_DEVICE_NOT_FOUND; + + ret = pnv_pci_cfg_write(dn, where, size, val); + if (!(phb->flags & PNV_PHB_FLAG_EEH)) + pnv_pci_config_check_eeh(phb, dn); + + return ret; } struct pci_ops pnv_pci_ops = { -- cgit v1.2.3 From 2a18dfc6ee2ea00bba767f6968f1a107fdd8e687 Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Thu, 24 Apr 2014 18:00:16 +1000 Subject: powerpc/eeh: Use cached capability for log dump When calling into eeh_gather_pci_data() on pSeries platform, we possiblly don't have pci_dev instance yet, but eeh_dev is always ready. So we use cached capability from eeh_dev instead of pci_dev for log dump there. In order to keep things unified, we also cache PCI capability positions to eeh_dev for PowerNV as well. Signed-off-by: Gavin Shan Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/include/asm/eeh.h | 4 ++- arch/powerpc/kernel/eeh.c | 39 ++++++++++++---------------- arch/powerpc/platforms/powernv/eeh-powernv.c | 4 +++ arch/powerpc/platforms/pseries/eeh_pseries.c | 32 +++++++++++++++++++++++ 4 files changed, 56 insertions(+), 23 deletions(-) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h index fa32d8dbf1cd..f0183e36f610 100644 --- a/arch/powerpc/include/asm/eeh.h +++ b/arch/powerpc/include/asm/eeh.h @@ -99,7 +99,9 @@ struct eeh_dev { int config_addr; /* Config address */ int pe_config_addr; /* PE config address */ u32 config_space[16]; /* Saved PCI config space */ - u8 pcie_cap; /* Saved PCIe capability */ + int pcix_cap; /* Saved PCIx capability */ + int pcie_cap; /* Saved PCIe capability */ + int aer_cap; /* Saved AER capability */ struct eeh_pe *pe; /* Associated PE */ struct list_head list; /* Form link list in the PE */ struct pci_controller *phb; /* Associated PHB */ diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c index c6d8f7e6888a..69df8985fc8b 100644 --- a/arch/powerpc/kernel/eeh.c +++ b/arch/powerpc/kernel/eeh.c @@ -145,7 +145,6 @@ static struct eeh_stats eeh_stats; static size_t eeh_gather_pci_data(struct eeh_dev *edev, char * buf, size_t len) { struct device_node *dn = eeh_dev_to_of_node(edev); - struct pci_dev *dev = eeh_dev_to_pci_dev(edev); u32 cfg; int cap, i; int n = 0; @@ -161,13 +160,8 @@ static size_t eeh_gather_pci_data(struct eeh_dev *edev, char * buf, size_t len) n += scnprintf(buf+n, len-n, "cmd/stat:%x\n", cfg); pr_warn("EEH: PCI cmd/status register: %08x\n", cfg); - if (!dev) { - pr_warn("EEH: no PCI device for this of node\n"); - return n; - } - /* Gather bridge-specific registers */ - if (dev->class >> 16 == PCI_BASE_CLASS_BRIDGE) { + if (edev->mode & EEH_DEV_BRIDGE) { eeh_ops->read_config(dn, PCI_SEC_STATUS, 2, &cfg); n += scnprintf(buf+n, len-n, "sec stat:%x\n", cfg); pr_warn("EEH: Bridge secondary status: %04x\n", cfg); @@ -178,7 +172,7 @@ static size_t eeh_gather_pci_data(struct eeh_dev *edev, char * buf, size_t len) } /* Dump out the PCI-X command and status regs */ - cap = pci_find_capability(dev, PCI_CAP_ID_PCIX); + cap = edev->pcix_cap; if (cap) { eeh_ops->read_config(dn, cap, 4, &cfg); n += scnprintf(buf+n, len-n, "pcix-cmd:%x\n", cfg); @@ -189,28 +183,29 @@ static size_t eeh_gather_pci_data(struct eeh_dev *edev, char * buf, size_t len) pr_warn("EEH: PCI-X status: %08x\n", cfg); } - /* If PCI-E capable, dump PCI-E cap 10, and the AER */ - if (pci_is_pcie(dev)) { + /* If PCI-E capable, dump PCI-E cap 10 */ + cap = edev->pcie_cap; + if (cap) { n += scnprintf(buf+n, len-n, "pci-e cap10:\n"); pr_warn("EEH: PCI-E capabilities and status follow:\n"); for (i=0; i<=8; i++) { - eeh_ops->read_config(dn, dev->pcie_cap+4*i, 4, &cfg); + eeh_ops->read_config(dn, cap+4*i, 4, &cfg); n += scnprintf(buf+n, len-n, "%02x:%x\n", 4*i, cfg); pr_warn("EEH: PCI-E %02x: %08x\n", i, cfg); } + } - cap = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ERR); - if (cap) { - n += scnprintf(buf+n, len-n, "pci-e AER:\n"); - pr_warn("EEH: PCI-E AER capability register " - "set follows:\n"); - - for (i=0; i<14; i++) { - eeh_ops->read_config(dn, cap+4*i, 4, &cfg); - n += scnprintf(buf+n, len-n, "%02x:%x\n", 4*i, cfg); - pr_warn("EEH: PCI-E AER %02x: %08x\n", i, cfg); - } + /* If AER capable, dump it */ + cap = edev->aer_cap; + if (cap) { + n += scnprintf(buf+n, len-n, "pci-e AER:\n"); + pr_warn("EEH: PCI-E AER capability register set follows:\n"); + + for (i=0; i<14; i++) { + eeh_ops->read_config(dn, cap+4*i, 4, &cfg); + n += scnprintf(buf+n, len-n, "%02x:%x\n", 4*i, cfg); + pr_warn("EEH: PCI-E AER %02x: %08x\n", i, cfg); } } diff --git a/arch/powerpc/platforms/powernv/eeh-powernv.c b/arch/powerpc/platforms/powernv/eeh-powernv.c index a59788e83b8b..56a206f32f77 100644 --- a/arch/powerpc/platforms/powernv/eeh-powernv.c +++ b/arch/powerpc/platforms/powernv/eeh-powernv.c @@ -126,6 +126,7 @@ static int powernv_eeh_dev_probe(struct pci_dev *dev, void *flag) edev->mode &= 0xFFFFFF00; if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE) edev->mode |= EEH_DEV_BRIDGE; + edev->pcix_cap = pci_find_capability(dev, PCI_CAP_ID_PCIX); if (pci_is_pcie(dev)) { edev->pcie_cap = pci_pcie_cap(dev); @@ -133,6 +134,9 @@ static int powernv_eeh_dev_probe(struct pci_dev *dev, void *flag) edev->mode |= EEH_DEV_ROOT_PORT; else if (pci_pcie_type(dev) == PCI_EXP_TYPE_DOWNSTREAM) edev->mode |= EEH_DEV_DS_PORT; + + edev->aer_cap = pci_find_ext_capability(dev, + PCI_EXT_CAP_ID_ERR); } edev->config_addr = ((dev->bus->number << 8) | dev->devfn); diff --git a/arch/powerpc/platforms/pseries/eeh_pseries.c b/arch/powerpc/platforms/pseries/eeh_pseries.c index 8a8f0472d98f..9d58a53548f2 100644 --- a/arch/powerpc/platforms/pseries/eeh_pseries.c +++ b/arch/powerpc/platforms/pseries/eeh_pseries.c @@ -175,6 +175,36 @@ static int pseries_eeh_find_cap(struct device_node *dn, int cap) return 0; } +static int pseries_eeh_find_ecap(struct device_node *dn, int cap) +{ + struct pci_dn *pdn = PCI_DN(dn); + struct eeh_dev *edev = of_node_to_eeh_dev(dn); + u32 header; + int pos = 256; + int ttl = (4096 - 256) / 8; + + if (!edev || !edev->pcie_cap) + return 0; + if (rtas_read_config(pdn, pos, 4, &header) != PCIBIOS_SUCCESSFUL) + return 0; + else if (!header) + return 0; + + while (ttl-- > 0) { + if (PCI_EXT_CAP_ID(header) == cap && pos) + return pos; + + pos = PCI_EXT_CAP_NEXT(header); + if (pos < 256) + break; + + if (rtas_read_config(pdn, pos, 4, &header) != PCIBIOS_SUCCESSFUL) + break; + } + + return 0; +} + /** * pseries_eeh_of_probe - EEH probe on the given device * @dn: OF node @@ -220,7 +250,9 @@ static void *pseries_eeh_of_probe(struct device_node *dn, void *flag) * or PCIe switch downstream port. */ edev->class_code = class_code; + edev->pcix_cap = pseries_eeh_find_cap(dn, PCI_CAP_ID_PCIX); edev->pcie_cap = pseries_eeh_find_cap(dn, PCI_CAP_ID_EXP); + edev->aer_cap = pseries_eeh_find_ecap(dn, PCI_EXT_CAP_ID_ERR); edev->mode &= 0xFFFFFF00; if ((edev->class_code >> 8) == PCI_CLASS_BRIDGE_PCI) { edev->mode |= EEH_DEV_BRIDGE; -- cgit v1.2.3 From 8a5ad35686fa81da7d8d07e9dd7041ac4a2ac0d7 Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Thu, 24 Apr 2014 18:00:17 +1000 Subject: powerpc/eeh: Cleanup EEH subsystem variables There're 2 EEH subsystem variables: eeh_subsystem_enabled and eeh_probe_mode. We needn't maintain 2 variables and we can just have one variable and introduce different flags. The patch also introduces additional flag EEH_FORCE_DISABLE, which will be used to disable EEH subsystem via boot parameter ("eeh=off") in future. Besides, the patch also introduces flag EEH_ENABLED, which is changed to disable or enable EEH functionality on the fly through debugfs entry in future. With the patch applied, the creteria to check the enabled EEH functionality is changed to: !EEH_FORCE_DISABLED && EEH_ENABLED : Enabled Other cases : Disabled Signed-off-by: Gavin Shan Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/include/asm/eeh.h | 29 +++++++++++++++++++---------- arch/powerpc/kernel/eeh.c | 31 +++++++++++++++---------------- 2 files changed, 34 insertions(+), 26 deletions(-) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h index f0183e36f610..f4a93218fbcb 100644 --- a/arch/powerpc/include/asm/eeh.h +++ b/arch/powerpc/include/asm/eeh.h @@ -32,6 +32,12 @@ struct device_node; #ifdef CONFIG_EEH +/* EEH subsystem flags */ +#define EEH_ENABLED 0x1 /* EEH enabled */ +#define EEH_FORCE_DISABLED 0x2 /* EEH disabled */ +#define EEH_PROBE_MODE_DEV 0x4 /* From PCI device */ +#define EEH_PROBE_MODE_DEVTREE 0x8 /* From device tree */ + /* * The struct is used to trace PE related EEH functionality. * In theory, there will have one instance of the struct to @@ -173,37 +179,40 @@ struct eeh_ops { int (*restore_config)(struct device_node *dn); }; +extern int eeh_subsystem_flags; extern struct eeh_ops *eeh_ops; -extern bool eeh_subsystem_enabled; extern raw_spinlock_t confirm_error_lock; -extern int eeh_probe_mode; static inline bool eeh_enabled(void) { - return eeh_subsystem_enabled; + if ((eeh_subsystem_flags & EEH_FORCE_DISABLED) || + !(eeh_subsystem_flags & EEH_ENABLED)) + return false; + + return true; } static inline void eeh_set_enable(bool mode) { - eeh_subsystem_enabled = mode; + if (mode) + eeh_subsystem_flags |= EEH_ENABLED; + else + eeh_subsystem_flags &= ~EEH_ENABLED; } -#define EEH_PROBE_MODE_DEV (1<<0) /* From PCI device */ -#define EEH_PROBE_MODE_DEVTREE (1<<1) /* From device tree */ - static inline void eeh_probe_mode_set(int flag) { - eeh_probe_mode = flag; + eeh_subsystem_flags |= flag; } static inline int eeh_probe_mode_devtree(void) { - return (eeh_probe_mode == EEH_PROBE_MODE_DEVTREE); + return (eeh_subsystem_flags & EEH_PROBE_MODE_DEVTREE); } static inline int eeh_probe_mode_dev(void) { - return (eeh_probe_mode == EEH_PROBE_MODE_DEV); + return (eeh_subsystem_flags & EEH_PROBE_MODE_DEV); } static inline void eeh_serialize_lock(unsigned long *flags) diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c index 69df8985fc8b..06d2b7c6b661 100644 --- a/arch/powerpc/kernel/eeh.c +++ b/arch/powerpc/kernel/eeh.c @@ -87,22 +87,21 @@ /* Time to wait for a PCI slot to report status, in milliseconds */ #define PCI_BUS_RESET_WAIT_MSEC (5*60*1000) -/* Platform dependent EEH operations */ -struct eeh_ops *eeh_ops = NULL; - -bool eeh_subsystem_enabled = false; -EXPORT_SYMBOL(eeh_subsystem_enabled); - /* - * EEH probe mode support. The intention is to support multiple - * platforms for EEH. Some platforms like pSeries do PCI emunation - * based on device tree. However, other platforms like powernv probe - * PCI devices from hardware. The flag is used to distinguish that. - * In addition, struct eeh_ops::probe would be invoked for particular - * OF node or PCI device so that the corresponding PE would be created - * there. + * EEH probe mode support, which is part of the flags, + * is to support multiple platforms for EEH. Some platforms + * like pSeries do PCI emunation based on device tree. + * However, other platforms like powernv probe PCI devices + * from hardware. The flag is used to distinguish that. + * In addition, struct eeh_ops::probe would be invoked for + * particular OF node or PCI device so that the corresponding + * PE would be created there. */ -int eeh_probe_mode; +int eeh_subsystem_flags; +EXPORT_SYMBOL(eeh_subsystem_flags); + +/* Platform dependent EEH operations */ +struct eeh_ops *eeh_ops = NULL; /* Lock to avoid races due to multiple reports of an error */ DEFINE_RAW_SPINLOCK(confirm_error_lock); @@ -842,8 +841,8 @@ int eeh_init(void) &hose_list, list_node) pci_walk_bus(hose->bus, eeh_ops->dev_probe, NULL); } else { - pr_warning("%s: Invalid probe mode %d\n", - __func__, eeh_probe_mode); + pr_warn("%s: Invalid probe mode %x", + __func__, eeh_subsystem_flags); return -EINVAL; } -- cgit v1.2.3 From d2b0f6f77ee525811b6efe864efa6a4eb82eea73 Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Thu, 24 Apr 2014 18:00:19 +1000 Subject: powerpc/eeh: No hotplug on permanently removed dev The issue was detected in a bit complicated test case where we have multiple hierarchical PEs shown as following figure: +-----------------+ | PE#3 p2p#0 | | p2p#1 | +-----------------+ | +-----------------+ | PE#4 pdev#0 | | pdev#1 | +-----------------+ PE#4 (have 2 PCI devices) is the child of PE#3, which has 2 p2p bridges. We accidentally had less-known scenario: PE#4 was removed permanently from the system because of permanent failure (e.g. exceeding the max allowd failure times in last hour), then we detects EEH errors on PE#3 and tried to recover it. However, eeh_dev instances for pdev#0/1 were not detached from PE#4, which was still connected to PE#3. All of that was because of the fact that we rely on count-based pcibios_release_device(), which isn't reliable enough. When doing recovery for PE#3, we still apply hotplug on PE#4 and pdev#0/1, which are not valid any more. Eventually, we run into kernel crash. The patch fixes above issue from two aspects. For unplug, we simply skip those permanently removed PE, whose state is (EEH_PE_STATE_ISOLATED && !EEH_PE_STATE_RECOVERING) and its frozen count should be greater than EEH_MAX_ALLOWED_FREEZES. For plug, we marked all permanently removed EEH devices with EEH_DEV_REMOVED and return 0xFF's on read its PCI config so that PCI core will omit them. Signed-off-by: Gavin Shan Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/include/asm/eeh.h | 1 + arch/powerpc/include/asm/ppc-pci.h | 1 + arch/powerpc/kernel/eeh_driver.c | 48 ++++++++++++++++++++++++++++++------ arch/powerpc/kernel/eeh_pe.c | 47 +++++++++++++++++++++++++++++------ arch/powerpc/kernel/pci_of_scan.c | 9 +++++++ arch/powerpc/platforms/powernv/pci.c | 13 +++++++--- 6 files changed, 100 insertions(+), 19 deletions(-) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h index f4a93218fbcb..2841ecac4c47 100644 --- a/arch/powerpc/include/asm/eeh.h +++ b/arch/powerpc/include/asm/eeh.h @@ -98,6 +98,7 @@ struct eeh_pe { #define EEH_DEV_NO_HANDLER (1 << 8) /* No error handler */ #define EEH_DEV_SYSFS (1 << 9) /* Sysfs created */ +#define EEH_DEV_REMOVED (1 << 10) /* Removed permanently */ struct eeh_dev { int mode; /* EEH mode */ diff --git a/arch/powerpc/include/asm/ppc-pci.h b/arch/powerpc/include/asm/ppc-pci.h index ed57fa7920c8..db1e2b8eff3c 100644 --- a/arch/powerpc/include/asm/ppc-pci.h +++ b/arch/powerpc/include/asm/ppc-pci.h @@ -58,6 +58,7 @@ int rtas_write_config(struct pci_dn *, int where, int size, u32 val); int rtas_read_config(struct pci_dn *, int where, int size, u32 *val); void eeh_pe_state_mark(struct eeh_pe *pe, int state); void eeh_pe_state_clear(struct eeh_pe *pe, int state); +void eeh_pe_dev_mode_mark(struct eeh_pe *pe, int mode); void eeh_sysfs_add_device(struct pci_dev *pdev); void eeh_sysfs_remove_device(struct pci_dev *pdev); diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c index 1f1e2cc045a9..f99ba9b76322 100644 --- a/arch/powerpc/kernel/eeh_driver.c +++ b/arch/powerpc/kernel/eeh_driver.c @@ -171,6 +171,15 @@ static void eeh_enable_irq(struct pci_dev *dev) } } +static bool eeh_dev_removed(struct eeh_dev *edev) +{ + /* EEH device removed ? */ + if (!edev || (edev->mode & EEH_DEV_REMOVED)) + return true; + + return false; +} + /** * eeh_report_error - Report pci error to each device driver * @data: eeh device @@ -187,10 +196,8 @@ static void *eeh_report_error(void *data, void *userdata) enum pci_ers_result rc, *res = userdata; struct pci_driver *driver; - /* We might not have the associated PCI device, - * then we should continue for next one. - */ - if (!dev) return NULL; + if (!dev || eeh_dev_removed(edev)) + return NULL; dev->error_state = pci_channel_io_frozen; driver = eeh_pcid_get(dev); @@ -230,6 +237,9 @@ static void *eeh_report_mmio_enabled(void *data, void *userdata) enum pci_ers_result rc, *res = userdata; struct pci_driver *driver; + if (!dev || eeh_dev_removed(edev)) + return NULL; + driver = eeh_pcid_get(dev); if (!driver) return NULL; @@ -267,7 +277,8 @@ static void *eeh_report_reset(void *data, void *userdata) enum pci_ers_result rc, *res = userdata; struct pci_driver *driver; - if (!dev) return NULL; + if (!dev || eeh_dev_removed(edev)) + return NULL; dev->error_state = pci_channel_io_normal; driver = eeh_pcid_get(dev); @@ -307,7 +318,8 @@ static void *eeh_report_resume(void *data, void *userdata) struct pci_dev *dev = eeh_dev_to_pci_dev(edev); struct pci_driver *driver; - if (!dev) return NULL; + if (!dev || eeh_dev_removed(edev)) + return NULL; dev->error_state = pci_channel_io_normal; driver = eeh_pcid_get(dev); @@ -343,7 +355,8 @@ static void *eeh_report_failure(void *data, void *userdata) struct pci_dev *dev = eeh_dev_to_pci_dev(edev); struct pci_driver *driver; - if (!dev) return NULL; + if (!dev || eeh_dev_removed(edev)) + return NULL; dev->error_state = pci_channel_io_perm_failure; driver = eeh_pcid_get(dev); @@ -380,6 +393,16 @@ static void *eeh_rmv_device(void *data, void *userdata) if (!dev || (dev->hdr_type & PCI_HEADER_TYPE_BRIDGE)) return NULL; + /* + * We rely on count-based pcibios_release_device() to + * detach permanently offlined PEs. Unfortunately, that's + * not reliable enough. We might have the permanently + * offlined PEs attached, but we needn't take care of + * them and their child devices. + */ + if (eeh_dev_removed(edev)) + return NULL; + driver = eeh_pcid_get(dev); if (driver) { eeh_pcid_put(dev); @@ -694,8 +717,17 @@ perm_error: /* Notify all devices that they're about to go down. */ eeh_pe_dev_traverse(pe, eeh_report_failure, NULL); - /* Shut down the device drivers for good. */ + /* Mark the PE to be removed permanently */ + pe->freeze_count = EEH_MAX_ALLOWED_FREEZES + 1; + + /* + * Shut down the device drivers for good. We mark + * all removed devices correctly to avoid access + * the their PCI config any more. + */ if (frozen_bus) { + eeh_pe_dev_mode_mark(pe, EEH_DEV_REMOVED); + pci_lock_rescan_remove(); pcibios_remove_pci_devices(frozen_bus); pci_unlock_rescan_remove(); diff --git a/arch/powerpc/kernel/eeh_pe.c b/arch/powerpc/kernel/eeh_pe.c index f0c353fa655a..995c2a284630 100644 --- a/arch/powerpc/kernel/eeh_pe.c +++ b/arch/powerpc/kernel/eeh_pe.c @@ -503,13 +503,17 @@ static void *__eeh_pe_state_mark(void *data, void *flag) struct eeh_dev *edev, *tmp; struct pci_dev *pdev; - /* - * Mark the PE with the indicated state. Also, - * the associated PCI device will be put into - * I/O frozen state to avoid I/O accesses from - * the PCI device driver. - */ + /* Keep the state of permanently removed PE intact */ + if ((pe->freeze_count > EEH_MAX_ALLOWED_FREEZES) && + (state & (EEH_PE_ISOLATED | EEH_PE_RECOVERING))) + return NULL; + pe->state |= state; + + /* Offline PCI devices if applicable */ + if (state != EEH_PE_ISOLATED) + return NULL; + eeh_pe_for_each_dev(pe, edev, tmp) { pdev = eeh_dev_to_pci_dev(edev); if (pdev) @@ -532,6 +536,27 @@ void eeh_pe_state_mark(struct eeh_pe *pe, int state) eeh_pe_traverse(pe, __eeh_pe_state_mark, &state); } +static void *__eeh_pe_dev_mode_mark(void *data, void *flag) +{ + struct eeh_dev *edev = data; + int mode = *((int *)flag); + + edev->mode |= mode; + + return NULL; +} + +/** + * eeh_pe_dev_state_mark - Mark state for all device under the PE + * @pe: EEH PE + * + * Mark specific state for all child devices of the PE. + */ +void eeh_pe_dev_mode_mark(struct eeh_pe *pe, int mode) +{ + eeh_pe_dev_traverse(pe, __eeh_pe_dev_mode_mark, &mode); +} + /** * __eeh_pe_state_clear - Clear state for the PE * @data: EEH PE @@ -546,8 +571,16 @@ static void *__eeh_pe_state_clear(void *data, void *flag) struct eeh_pe *pe = (struct eeh_pe *)data; int state = *((int *)flag); + /* Keep the state of permanently removed PE intact */ + if ((pe->freeze_count > EEH_MAX_ALLOWED_FREEZES) && + (state & EEH_PE_ISOLATED)) + return NULL; + pe->state &= ~state; - pe->check_count = 0; + + /* Clear check count since last isolation */ + if (state & EEH_PE_ISOLATED) + pe->check_count = 0; return NULL; } diff --git a/arch/powerpc/kernel/pci_of_scan.c b/arch/powerpc/kernel/pci_of_scan.c index 83c26d829991..ea6470c21f4e 100644 --- a/arch/powerpc/kernel/pci_of_scan.c +++ b/arch/powerpc/kernel/pci_of_scan.c @@ -304,6 +304,9 @@ static struct pci_dev *of_scan_pci_dev(struct pci_bus *bus, struct pci_dev *dev = NULL; const __be32 *reg; int reglen, devfn; +#ifdef CONFIG_EEH + struct eeh_dev *edev = of_node_to_eeh_dev(dn); +#endif pr_debug(" * %s\n", dn->full_name); if (!of_device_is_available(dn)) @@ -321,6 +324,12 @@ static struct pci_dev *of_scan_pci_dev(struct pci_bus *bus, return dev; } + /* Device removed permanently ? */ +#ifdef CONFIG_EEH + if (edev && (edev->mode & EEH_DEV_REMOVED)) + return NULL; +#endif + /* create a new pci_dev for this device */ dev = of_create_pci_dev(dn, bus, devfn); if (!dev) diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c index f98cf99c9f8c..eefbfcc3fd8c 100644 --- a/arch/powerpc/platforms/powernv/pci.c +++ b/arch/powerpc/platforms/powernv/pci.c @@ -441,11 +441,16 @@ static bool pnv_pci_cfg_check(struct pci_controller *hose, if (!(phb->flags & PNV_PHB_FLAG_EEH)) return true; - /* PE reset ? */ + /* PE reset or device removed ? */ edev = of_node_to_eeh_dev(dn); - if (edev && edev->pe && - (edev->pe->state & EEH_PE_RESET)) - return false; + if (edev) { + if (edev->pe && + (edev->pe->state & EEH_PE_RESET)) + return false; + + if (edev->mode & EEH_DEV_REMOVED) + return false; + } return true; } -- cgit v1.2.3 From 26833a5029b710b12f00607fa255ce86909836ad Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Thu, 24 Apr 2014 18:00:23 +1000 Subject: powerpc/eeh: Make the delay for PE reset unified Basically, we have 3 types of resets to fulfil PE reset: fundamental, hot and PHB reset. For the later 2 cases, we need PCI bus reset hold and settlement delay as specified by PCI spec. PowerNV and pSeries platforms are running on top of different firmware and some of the delays have been covered by underly firmware (PowerNV). The patch makes the delays unified to be done in backend, instead of EEH core. Signed-off-by: Gavin Shan Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/include/asm/eeh.h | 10 ++++++++++ arch/powerpc/kernel/eeh.c | 13 ------------- arch/powerpc/platforms/powernv/eeh-ioda.c | 12 +++++++++++- arch/powerpc/platforms/pseries/eeh_pseries.c | 10 +++++++++- 4 files changed, 30 insertions(+), 15 deletions(-) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h index 2841ecac4c47..b76f58c124ca 100644 --- a/arch/powerpc/include/asm/eeh.h +++ b/arch/powerpc/include/asm/eeh.h @@ -38,6 +38,16 @@ struct device_node; #define EEH_PROBE_MODE_DEV 0x4 /* From PCI device */ #define EEH_PROBE_MODE_DEVTREE 0x8 /* From device tree */ +/* + * Delay for PE reset, all in ms + * + * PCI specification has reset hold time of 100 milliseconds. + * We have 250 milliseconds here. The PCI bus settlement time + * is specified as 1.5 seconds and we have 1.8 seconds. + */ +#define EEH_PE_RST_HOLD_TIME 250 +#define EEH_PE_RST_SETTLE_TIME 1800 + /* * The struct is used to trace PE related EEH functionality. * In theory, there will have one instance of the struct to diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c index 1e409a2ff88b..3764fb788d6c 100644 --- a/arch/powerpc/kernel/eeh.c +++ b/arch/powerpc/kernel/eeh.c @@ -639,20 +639,7 @@ static void eeh_reset_pe_once(struct eeh_pe *pe) else eeh_ops->reset(pe, EEH_RESET_HOT); - /* The PCI bus requires that the reset be held high for at least - * a 100 milliseconds. We wait a bit longer 'just in case'. - */ -#define PCI_BUS_RST_HOLD_TIME_MSEC 250 - msleep(PCI_BUS_RST_HOLD_TIME_MSEC); - eeh_ops->reset(pe, EEH_RESET_DEACTIVATE); - - /* After a PCI slot has been reset, the PCI Express spec requires - * a 1.5 second idle time for the bus to stabilize, before starting - * up traffic. - */ -#define PCI_BUS_SETTLE_TIME_MSEC 1800 - msleep(PCI_BUS_SETTLE_TIME_MSEC); } /** diff --git a/arch/powerpc/platforms/powernv/eeh-ioda.c b/arch/powerpc/platforms/powernv/eeh-ioda.c index 0844e00ccdd8..268cd46af8f1 100644 --- a/arch/powerpc/platforms/powernv/eeh-ioda.c +++ b/arch/powerpc/platforms/powernv/eeh-ioda.c @@ -417,9 +417,13 @@ static int ioda_eeh_phb_reset(struct pci_controller *hose, int option) /* * Poll state of the PHB until the request is done - * successfully. + * successfully. The PHB reset is usually PHB complete + * reset followed by hot reset on root bus. So we also + * need the PCI bus settlement delay. */ rc = ioda_eeh_phb_poll(phb); + if (option == EEH_RESET_DEACTIVATE) + msleep(EEH_PE_RST_SETTLE_TIME); out: if (rc != OPAL_SUCCESS) return -EIO; @@ -457,6 +461,8 @@ static int ioda_eeh_root_reset(struct pci_controller *hose, int option) /* Poll state of the PHB until the request is done */ rc = ioda_eeh_phb_poll(phb); + if (option == EEH_RESET_DEACTIVATE) + msleep(EEH_PE_RST_SETTLE_TIME); out: if (rc != OPAL_SUCCESS) return -EIO; @@ -480,11 +486,15 @@ static int ioda_eeh_bridge_reset(struct pci_dev *dev, int option) eeh_ops->read_config(dn, PCI_BRIDGE_CONTROL, 2, &ctrl); ctrl |= PCI_BRIDGE_CTL_BUS_RESET; eeh_ops->write_config(dn, PCI_BRIDGE_CONTROL, 2, ctrl); + + msleep(EEH_PE_RST_HOLD_TIME); break; case EEH_RESET_DEACTIVATE: eeh_ops->read_config(dn, PCI_BRIDGE_CONTROL, 2, &ctrl); ctrl &= ~PCI_BRIDGE_CTL_BUS_RESET; eeh_ops->write_config(dn, PCI_BRIDGE_CONTROL, 2, ctrl); + + msleep(EEH_PE_RST_SETTLE_TIME); break; } diff --git a/arch/powerpc/platforms/pseries/eeh_pseries.c b/arch/powerpc/platforms/pseries/eeh_pseries.c index 2f1ba64fc831..0bec0c02c5e7 100644 --- a/arch/powerpc/platforms/pseries/eeh_pseries.c +++ b/arch/powerpc/platforms/pseries/eeh_pseries.c @@ -532,11 +532,19 @@ static int pseries_eeh_reset(struct eeh_pe *pe, int option) /* If fundamental-reset not supported, try hot-reset */ if (option == EEH_RESET_FUNDAMENTAL && ret == -8) { + option = EEH_RESET_HOT; ret = rtas_call(ibm_set_slot_reset, 4, 1, NULL, config_addr, BUID_HI(pe->phb->buid), - BUID_LO(pe->phb->buid), EEH_RESET_HOT); + BUID_LO(pe->phb->buid), option); } + /* We need reset hold or settlement delay */ + if (option == EEH_RESET_FUNDAMENTAL || + option == EEH_RESET_HOT) + msleep(EEH_PE_RST_HOLD_TIME); + else + msleep(EEH_PE_RST_SETTLE_TIME); + return ret; } -- cgit v1.2.3 From d92a208d086063ecc785b4588f74ab42268cbc4b Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Thu, 24 Apr 2014 18:00:24 +1000 Subject: powerpc/pci: Mask linkDown on resetting PCI bus The problem was initially reported by Wendy who tried pass through IPR adapter, which was connected to PHB root port directly, to KVM based guest. When doing that, pci_reset_bridge_secondary_bus() was called by VFIO driver and linkDown was detected by the root port. That caused all PEs to be frozen. The patch fixes the issue by routing the reset for the secondary bus of root port to underly firmware. For that, one more weak function pci_reset_secondary_bus() is introduced so that the individual platforms can override that and do specific reset for bridge's secondary bus. Reported-by: Wendy Xiong Signed-off-by: Gavin Shan Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/include/asm/machdep.h | 3 +++ arch/powerpc/kernel/pci-common.c | 20 ++++++++++++++++ arch/powerpc/platforms/powernv/eeh-ioda.c | 38 +++++++++++++++++++++++++++++-- arch/powerpc/platforms/powernv/pci-ioda.c | 1 + arch/powerpc/platforms/powernv/pci.h | 1 + drivers/pci/pci.c | 21 ++++++++++------- 6 files changed, 74 insertions(+), 10 deletions(-) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/machdep.h b/arch/powerpc/include/asm/machdep.h index 5b6c03f1058f..240b137ce0cf 100644 --- a/arch/powerpc/include/asm/machdep.h +++ b/arch/powerpc/include/asm/machdep.h @@ -241,6 +241,9 @@ struct machdep_calls { /* Called during PCI resource reassignment */ resource_size_t (*pcibios_window_alignment)(struct pci_bus *, unsigned long type); + /* Reset the secondary bus of bridge */ + void (*pcibios_reset_secondary_bus)(struct pci_dev *dev); + /* Called to shutdown machine specific hardware not already controlled * by other drivers. */ diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c index d9476c1fc959..f9ca5091840c 100644 --- a/arch/powerpc/kernel/pci-common.c +++ b/arch/powerpc/kernel/pci-common.c @@ -21,6 +21,7 @@ #include #include #include +#include #include #include #include @@ -120,6 +121,25 @@ resource_size_t pcibios_window_alignment(struct pci_bus *bus, return 1; } +void pcibios_reset_secondary_bus(struct pci_dev *dev) +{ + u16 ctrl; + + if (ppc_md.pcibios_reset_secondary_bus) { + ppc_md.pcibios_reset_secondary_bus(dev); + return; + } + + pci_read_config_word(dev, PCI_BRIDGE_CONTROL, &ctrl); + ctrl |= PCI_BRIDGE_CTL_BUS_RESET; + pci_write_config_word(dev, PCI_BRIDGE_CONTROL, ctrl); + msleep(2); + + ctrl &= ~PCI_BRIDGE_CTL_BUS_RESET; + pci_write_config_word(dev, PCI_BRIDGE_CONTROL, ctrl); + ssleep(1); +} + static resource_size_t pcibios_io_size(const struct pci_controller *hose) { #ifdef CONFIG_PPC64 diff --git a/arch/powerpc/platforms/powernv/eeh-ioda.c b/arch/powerpc/platforms/powernv/eeh-ioda.c index 268cd46af8f1..58ef80987eed 100644 --- a/arch/powerpc/platforms/powernv/eeh-ioda.c +++ b/arch/powerpc/platforms/powernv/eeh-ioda.c @@ -474,6 +474,8 @@ static int ioda_eeh_bridge_reset(struct pci_dev *dev, int option) { struct device_node *dn = pci_device_to_OF_node(dev); + struct eeh_dev *edev = of_node_to_eeh_dev(dn); + int aer = edev ? edev->aer_cap : 0; u32 ctrl; pr_debug("%s: Reset PCI bus %04x:%02x with option %d\n", @@ -483,24 +485,56 @@ static int ioda_eeh_bridge_reset(struct pci_dev *dev, int option) switch (option) { case EEH_RESET_FUNDAMENTAL: case EEH_RESET_HOT: + /* Don't report linkDown event */ + if (aer) { + eeh_ops->read_config(dn, aer + PCI_ERR_UNCOR_MASK, + 4, &ctrl); + ctrl |= PCI_ERR_UNC_SURPDN; + eeh_ops->write_config(dn, aer + PCI_ERR_UNCOR_MASK, + 4, ctrl); + } + eeh_ops->read_config(dn, PCI_BRIDGE_CONTROL, 2, &ctrl); ctrl |= PCI_BRIDGE_CTL_BUS_RESET; eeh_ops->write_config(dn, PCI_BRIDGE_CONTROL, 2, ctrl); - msleep(EEH_PE_RST_HOLD_TIME); + break; case EEH_RESET_DEACTIVATE: eeh_ops->read_config(dn, PCI_BRIDGE_CONTROL, 2, &ctrl); ctrl &= ~PCI_BRIDGE_CTL_BUS_RESET; eeh_ops->write_config(dn, PCI_BRIDGE_CONTROL, 2, ctrl); - msleep(EEH_PE_RST_SETTLE_TIME); + + /* Continue reporting linkDown event */ + if (aer) { + eeh_ops->read_config(dn, aer + PCI_ERR_UNCOR_MASK, + 4, &ctrl); + ctrl &= ~PCI_ERR_UNC_SURPDN; + eeh_ops->write_config(dn, aer + PCI_ERR_UNCOR_MASK, + 4, ctrl); + } + break; } return 0; } +void pnv_pci_reset_secondary_bus(struct pci_dev *dev) +{ + struct pci_controller *hose; + + if (pci_is_root_bus(dev->bus)) { + hose = pci_bus_to_host(dev->bus); + ioda_eeh_root_reset(hose, EEH_RESET_HOT); + ioda_eeh_root_reset(hose, EEH_RESET_DEACTIVATE); + } else { + ioda_eeh_bridge_reset(dev, EEH_RESET_HOT); + ioda_eeh_bridge_reset(dev, EEH_RESET_DEACTIVATE); + } +} + /** * ioda_eeh_reset - Reset the indicated PE * @pe: EEH PE diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index 98824aa99173..a179ff00be3e 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -1386,6 +1386,7 @@ void __init pnv_pci_init_ioda_phb(struct device_node *np, ppc_md.pcibios_fixup = pnv_pci_ioda_fixup; ppc_md.pcibios_enable_device_hook = pnv_pci_enable_device_hook; ppc_md.pcibios_window_alignment = pnv_pci_window_alignment; + ppc_md.pcibios_reset_secondary_bus = pnv_pci_reset_secondary_bus; pci_add_flags(PCI_REASSIGN_ALL_RSRC); /* Reset IODA tables to a clean state */ diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h index 39ec6978e809..34a09740aad3 100644 --- a/arch/powerpc/platforms/powernv/pci.h +++ b/arch/powerpc/platforms/powernv/pci.h @@ -204,5 +204,6 @@ extern void pnv_pci_init_ioda_hub(struct device_node *np); extern void pnv_pci_init_ioda2_phb(struct device_node *np); extern void pnv_pci_ioda_tce_invalidate(struct iommu_table *tbl, __be64 *startp, __be64 *endp, bool rm); +extern void pnv_pci_reset_secondary_bus(struct pci_dev *dev); #endif /* __POWERNV_PCI_H */ diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index 7325d43bf030..633382d227f4 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -3167,14 +3167,7 @@ static int pci_pm_reset(struct pci_dev *dev, int probe) return 0; } -/** - * pci_reset_bridge_secondary_bus - Reset the secondary bus on a PCI bridge. - * @dev: Bridge device - * - * Use the bridge control register to assert reset on the secondary bus. - * Devices on the secondary bus are left in power-on state. - */ -void pci_reset_bridge_secondary_bus(struct pci_dev *dev) +void __weak pcibios_reset_secondary_bus(struct pci_dev *dev) { u16 ctrl; @@ -3199,6 +3192,18 @@ void pci_reset_bridge_secondary_bus(struct pci_dev *dev) */ ssleep(1); } + +/** + * pci_reset_bridge_secondary_bus - Reset the secondary bus on a PCI bridge. + * @dev: Bridge device + * + * Use the bridge control register to assert reset on the secondary bus. + * Devices on the secondary bus are left in power-on state. + */ +void pci_reset_bridge_secondary_bus(struct pci_dev *dev) +{ + pcibios_reset_secondary_bus(dev); +} EXPORT_SYMBOL_GPL(pci_reset_bridge_secondary_bus); static int pci_parent_bus_reset(struct pci_dev *dev, int probe) -- cgit v1.2.3 From b2b5efcf208ddc9444aca77336627428782a39f4 Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Thu, 24 Apr 2014 18:00:27 +1000 Subject: powerpc/powernv: Fundamental reset on PLX ports The patch intends to support fundamental reset on PLX downstream ports. If the PCI device matches any one of the internal table, which includes PLX vendor ID, bridge device ID, register offset for fundamental reset and bit, fundamental reset will be done accordingly. Otherwise, it will fail back to hot reset. Additional flag (EEH_DEV_FRESET) is introduced to record the last reset type on the PCI bridge. Signed-off-by: Gavin Shan Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/include/asm/eeh.h | 1 + arch/powerpc/platforms/powernv/eeh-ioda.c | 110 +++++++++++++++++++++++++----- 2 files changed, 95 insertions(+), 16 deletions(-) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h index b76f58c124ca..d12529f34524 100644 --- a/arch/powerpc/include/asm/eeh.h +++ b/arch/powerpc/include/asm/eeh.h @@ -109,6 +109,7 @@ struct eeh_pe { #define EEH_DEV_NO_HANDLER (1 << 8) /* No error handler */ #define EEH_DEV_SYSFS (1 << 9) /* Sysfs created */ #define EEH_DEV_REMOVED (1 << 10) /* Removed permanently */ +#define EEH_DEV_FRESET (1 << 11) /* Fundamental reset */ struct eeh_dev { int mode; /* EEH mode */ diff --git a/arch/powerpc/platforms/powernv/eeh-ioda.c b/arch/powerpc/platforms/powernv/eeh-ioda.c index 753f08e36dfa..79d0cdf786d0 100644 --- a/arch/powerpc/platforms/powernv/eeh-ioda.c +++ b/arch/powerpc/platforms/powernv/eeh-ioda.c @@ -477,49 +477,127 @@ out: return 0; } +static bool ioda_eeh_is_plx_dnport(struct pci_dev *dev, int *reg, + int *mask, int *len) +{ + unsigned short *pid; + unsigned short ids[] = { + 0x10b5, 0x8748, 0x0080, 0x0400, /* PLX#8748 */ + 0x0000, 0x0000, 0x0000, 0x0000, /* End flag */ + }; + + if (!pci_is_pcie(dev)) + return false; + if (pci_pcie_type(dev) != PCI_EXP_TYPE_DOWNSTREAM) + return false; + + pid = &ids[0]; + while (!reg) { + if (pid[0] == 0x0) + break; + + if (dev->vendor == pid[0] && + dev->device == pid[1]) { + *reg = pid[2]; + *mask = pid[3]; + *len = 2; + return true; + } + } + + *reg = PCI_BRIDGE_CONTROL; + *mask = PCI_BRIDGE_CTL_BUS_RESET; + *len = 2; + return false; +} + static int ioda_eeh_bridge_reset(struct pci_dev *dev, int option) { struct device_node *dn = pci_device_to_OF_node(dev); struct eeh_dev *edev = of_node_to_eeh_dev(dn); int aer = edev ? edev->aer_cap : 0; - u32 ctrl; + int reg, mask, val, len; + bool is_plx_dnport; pr_debug("%s: Reset PCI bus %04x:%02x with option %d\n", __func__, pci_domain_nr(dev->bus), dev->bus->number, option); + + is_plx_dnport = ioda_eeh_is_plx_dnport(dev, ®, &mask, &len); + if (option == EEH_RESET_FUNDAMENTAL) + if (!is_plx_dnport || !edev) + option = EEH_RESET_HOT; + + if (option == EEH_RESET_HOT) { + reg = PCI_BRIDGE_CONTROL; + mask = PCI_BRIDGE_CTL_BUS_RESET; + len = 2; + } + + if (option == EEH_RESET_DEACTIVATE) { + if (!is_plx_dnport || !edev || + !(edev->mode & EEH_DEV_FRESET)) { + reg = PCI_BRIDGE_CONTROL; + mask = PCI_BRIDGE_CTL_BUS_RESET; + len = 2; + } + } + switch (option) { case EEH_RESET_FUNDAMENTAL: + edev->mode |= EEH_DEV_FRESET; + /* Fall through */ case EEH_RESET_HOT: - /* Don't report linkDown event */ if (aer) { + /* Mask receiver error */ + eeh_ops->read_config(dn, aer + PCI_ERR_COR_MASK, + 4, &val); + val |= PCI_ERR_COR_RCVR; + eeh_ops->write_config(dn, aer + PCI_ERR_COR_MASK, + 4, val); + + /* Mask linkDown */ eeh_ops->read_config(dn, aer + PCI_ERR_UNCOR_MASK, - 4, &ctrl); - ctrl |= PCI_ERR_UNC_SURPDN; + 4, &val); + val |= PCI_ERR_UNC_SURPDN; eeh_ops->write_config(dn, aer + PCI_ERR_UNCOR_MASK, - 4, ctrl); - } + 4, val); + } - eeh_ops->read_config(dn, PCI_BRIDGE_CONTROL, 2, &ctrl); - ctrl |= PCI_BRIDGE_CTL_BUS_RESET; - eeh_ops->write_config(dn, PCI_BRIDGE_CONTROL, 2, ctrl); + eeh_ops->read_config(dn, reg, len, &val); + val |= mask; + eeh_ops->write_config(dn, reg, len, val); msleep(EEH_PE_RST_HOLD_TIME); break; case EEH_RESET_DEACTIVATE: - eeh_ops->read_config(dn, PCI_BRIDGE_CONTROL, 2, &ctrl); - ctrl &= ~PCI_BRIDGE_CTL_BUS_RESET; - eeh_ops->write_config(dn, PCI_BRIDGE_CONTROL, 2, ctrl); + eeh_ops->read_config(dn, reg, len, &val); + val &= ~mask; + eeh_ops->write_config(dn, reg, len, val); msleep(EEH_PE_RST_SETTLE_TIME); - /* Continue reporting linkDown event */ + if (edev) + edev->mode &= ~EEH_DEV_FRESET; if (aer) { + /* Clear receive error and enable it */ + eeh_ops->write_config(dn, aer + PCI_ERR_COR_STATUS, + 4, PCI_ERR_COR_RCVR); + eeh_ops->read_config(dn, aer + PCI_ERR_COR_MASK, + 4, &val); + val &= ~PCI_ERR_COR_RCVR; + eeh_ops->write_config(dn, aer + PCI_ERR_COR_MASK, + 4, val); + + /* Clear linkDown and enable it */ + eeh_ops->write_config(dn, aer + PCI_ERR_UNCOR_STATUS, + 4, PCI_ERR_UNC_SURPDN); eeh_ops->read_config(dn, aer + PCI_ERR_UNCOR_MASK, - 4, &ctrl); - ctrl &= ~PCI_ERR_UNC_SURPDN; + 4, &val); + val &= ~PCI_ERR_UNC_SURPDN; eeh_ops->write_config(dn, aer + PCI_ERR_UNCOR_MASK, - 4, ctrl); + 4, val); } break; -- cgit v1.2.3 From 2196c6f1ed66eef23df3b478cfe71661ae83726e Mon Sep 17 00:00:00 2001 From: Vasant Hegde Date: Wed, 9 Apr 2014 22:48:55 +0530 Subject: powerpc/powernv: Return secondary CPUs to firmware before FW update Firmware update on PowerNV platform takes several minutes. During this time one CPU is stuck in FW and the kernel complains about "soft lockups". This patch returns all secondary CPUs to firmware before starting firmware update process. [ Reworked a bit and cleaned up -- BenH ] Signed-off-by: Vasant Hegde Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/include/asm/opal.h | 1 + arch/powerpc/platforms/powernv/opal-flash.c | 47 ++++++++++++++++++++++++++--- arch/powerpc/platforms/powernv/setup.c | 25 +++++++++++++-- 3 files changed, 66 insertions(+), 7 deletions(-) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h index 66ad7a74116f..81720ff59a10 100644 --- a/arch/powerpc/include/asm/opal.h +++ b/arch/powerpc/include/asm/opal.h @@ -916,6 +916,7 @@ extern void opal_get_rtc_time(struct rtc_time *tm); extern unsigned long opal_get_boot_time(void); extern void opal_nvram_init(void); extern void opal_flash_init(void); +extern void opal_flash_term_callback(void); extern int opal_elog_init(void); extern void opal_platform_dump_init(void); extern void opal_sys_param_init(void); diff --git a/arch/powerpc/platforms/powernv/opal-flash.c b/arch/powerpc/platforms/powernv/opal-flash.c index dc487ff04704..145a80bc5354 100644 --- a/arch/powerpc/platforms/powernv/opal-flash.c +++ b/arch/powerpc/platforms/powernv/opal-flash.c @@ -20,6 +20,7 @@ #include #include #include +#include #include @@ -290,11 +291,6 @@ static int opal_flash_update(int op) /* First entry address */ addr = __pa(list); - pr_alert("FLASH: Image is %u bytes\n", image_data.size); - pr_alert("FLASH: Image update requested\n"); - pr_alert("FLASH: Image will be updated during system reboot\n"); - pr_alert("FLASH: This will take several minutes. Do not power off!\n"); - flash: rc = opal_update_flash(addr); @@ -302,6 +298,47 @@ invalid_img: return rc; } +/* Return CPUs to OPAL before starting FW update */ +static void flash_return_cpu(void *info) +{ + int cpu = smp_processor_id(); + + if (!cpu_online(cpu)) + return; + + /* Disable IRQ */ + hard_irq_disable(); + + /* Return the CPU to OPAL */ + opal_return_cpu(); +} + +/* This gets called just before system reboots */ +void opal_flash_term_callback(void) +{ + struct cpumask mask; + + if (update_flash_data.status != FLASH_IMG_READY) + return; + + pr_alert("FLASH: Flashing new firmware\n"); + pr_alert("FLASH: Image is %u bytes\n", image_data.size); + pr_alert("FLASH: Performing flash and reboot/shutdown\n"); + pr_alert("FLASH: This will take several minutes. Do not power off!\n"); + + /* Small delay to help getting the above message out */ + msleep(500); + + /* Return secondary CPUs to firmware */ + cpumask_copy(&mask, cpu_online_mask); + cpumask_clear_cpu(smp_processor_id(), &mask); + if (!cpumask_empty(&mask)) + smp_call_function_many(&mask, + flash_return_cpu, NULL, false); + /* Hard disable interrupts */ + hard_irq_disable(); +} + /* * Show candidate image status */ diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c index 8723d32632f5..05d63aaeb147 100644 --- a/arch/powerpc/platforms/powernv/setup.c +++ b/arch/powerpc/platforms/powernv/setup.c @@ -98,11 +98,32 @@ static void pnv_show_cpuinfo(struct seq_file *m) of_node_put(root); } +static void pnv_prepare_going_down(void) +{ + /* + * Disable all notifiers from OPAL, we can't + * service interrupts anymore anyway + */ + opal_notifier_disable(); + + /* Soft disable interrupts */ + local_irq_disable(); + + /* + * Return secondary CPUs to firwmare if a flash update + * is pending otherwise we will get all sort of error + * messages about CPU being stuck etc.. This will also + * have the side effect of hard disabling interrupts so + * past this point, the kernel is effectively dead. + */ + opal_flash_term_callback(); +} + static void __noreturn pnv_restart(char *cmd) { long rc = OPAL_BUSY; - opal_notifier_disable(); + pnv_prepare_going_down(); while (rc == OPAL_BUSY || rc == OPAL_BUSY_EVENT) { rc = opal_cec_reboot(); @@ -119,7 +140,7 @@ static void __noreturn pnv_power_off(void) { long rc = OPAL_BUSY; - opal_notifier_disable(); + pnv_prepare_going_down(); while (rc == OPAL_BUSY || rc == OPAL_BUSY_EVENT) { rc = opal_cec_power_down(0); -- cgit v1.2.3 From 2299d03a632c7586403ab43a11b418ee1ae47f1a Mon Sep 17 00:00:00 2001 From: "Gautham R. Shenoy" Date: Tue, 11 Mar 2014 17:01:18 +0530 Subject: powerpc: powernv: Framework to show the correct clock in /proc/cpuinfo Currently, the code in setup-common.c for powerpc assumes that all clock rates are same in a smp system. This value is cached in the variable named ppc_proc_freq and is the value that is reported in /proc/cpuinfo. However on the PowerNV platform, the clock rate is same only across the threads of the same core. Hence the value that is reported in /proc/cpuinfo is incorrect on PowerNV platforms. We need a better way to query and report the correct value of the processor clock in /proc/cpuinfo. The patch achieves this by creating a machdep_call named get_proc_freq() which is expected to returns the frequency in Hz. The code in show_cpuinfo() can invoke this method to display the correct clock rate on platforms that have implemented this method. On the other powerpc platforms it can use the value cached in ppc_proc_freq. Signed-off-by: Gautham R. Shenoy Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/include/asm/machdep.h | 2 ++ arch/powerpc/kernel/setup-common.c | 16 ++++++++++++---- 2 files changed, 14 insertions(+), 4 deletions(-) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/machdep.h b/arch/powerpc/include/asm/machdep.h index 240b137ce0cf..374abc2e41d7 100644 --- a/arch/powerpc/include/asm/machdep.h +++ b/arch/powerpc/include/asm/machdep.h @@ -113,6 +113,8 @@ struct machdep_calls { /* Optional, may be NULL. */ void (*show_cpuinfo)(struct seq_file *m); void (*show_percpuinfo)(struct seq_file *m, int i); + /* Returns the current operating frequency of "cpu" in Hz */ + unsigned long (*get_proc_freq)(unsigned int cpu); void (*init_IRQ)(void); diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c index 79b7612ac6fa..3cf25c89469d 100644 --- a/arch/powerpc/kernel/setup-common.c +++ b/arch/powerpc/kernel/setup-common.c @@ -212,6 +212,7 @@ static int show_cpuinfo(struct seq_file *m, void *v) { unsigned long cpu_id = (unsigned long)v - 1; unsigned int pvr; + unsigned long proc_freq; unsigned short maj; unsigned short min; @@ -263,12 +264,19 @@ static int show_cpuinfo(struct seq_file *m, void *v) #endif /* CONFIG_TAU */ /* - * Assume here that all clock rates are the same in a - * smp system. -- Cort + * Platforms that have variable clock rates, should implement + * the method ppc_md.get_proc_freq() that reports the clock + * rate of a given cpu. The rest can use ppc_proc_freq to + * report the clock rate that is same across all cpus. */ - if (ppc_proc_freq) + if (ppc_md.get_proc_freq) + proc_freq = ppc_md.get_proc_freq(cpu_id); + else + proc_freq = ppc_proc_freq; + + if (proc_freq) seq_printf(m, "clock\t\t: %lu.%06luMHz\n", - ppc_proc_freq / 1000000, ppc_proc_freq % 1000000); + proc_freq / 1000000, proc_freq % 1000000); if (ppc_md.show_percpuinfo != NULL) ppc_md.show_percpuinfo(m, cpu_id); -- cgit v1.2.3 From 7f06f21d40a638e1ca759ceda0f21cd81082607e Mon Sep 17 00:00:00 2001 From: Michael Neuling Date: Fri, 28 Mar 2014 16:40:34 +1100 Subject: powerpc/tm: Add checking to treclaim/trechkpt If we do a treclaim and we are not in TM suspend mode, it results in a TM bad thing (ie. a 0x700 program check). Similarly if we do a trechkpt and we have an active transaction or TEXASR Failure Summary (FS) is not set, we also take a TM bad thing. This should never happen, but if it does (ie. a kernel bug), the cause is almost impossible to debug as the GPR state is mostly userspace and hence we don't get a call chain. This adds some checks in these cases case a BUG_ON() (in asm) in case we ever hit these cases. It moves the register saving around to preserve r1 till later also. Signed-off-by: Michael Neuling Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/include/asm/reg.h | 1 + arch/powerpc/kernel/tm.S | 38 +++++++++++++++++++++++++++++++++++--- 2 files changed, 36 insertions(+), 3 deletions(-) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index e5d2e0bc7e03..29de0152878f 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h @@ -215,6 +215,7 @@ #define SPRN_TEXASR 0x82 /* Transaction EXception & Summary */ #define TEXASR_FS __MASK(63-36) /* Transaction Failure Summary */ #define SPRN_TEXASRU 0x83 /* '' '' '' Upper 32 */ +#define TEXASR_FS __MASK(63-36) /* TEXASR Failure Summary */ #define SPRN_TFHAR 0x80 /* Transaction Failure Handler Addr */ #define SPRN_CTRLF 0x088 #define SPRN_CTRLT 0x098 diff --git a/arch/powerpc/kernel/tm.S b/arch/powerpc/kernel/tm.S index 0535c7fdd12c..508c54b92fa6 100644 --- a/arch/powerpc/kernel/tm.S +++ b/arch/powerpc/kernel/tm.S @@ -10,6 +10,7 @@ #include #include #include +#include #ifdef CONFIG_VSX /* See fpu.S, this is borrowed from there */ @@ -175,6 +176,13 @@ dont_backup_vec: stfd fr0,FPSTATE_FPSCR(r7) dont_backup_fp: + /* Do sanity check on MSR to make sure we are suspended */ + li r7, (MSR_TS_S)@higher + srdi r6, r14, 32 + and r6, r6, r7 +1: tdeqi r6, 0 + EMIT_BUG_ENTRY 1b,__FILE__,__LINE__,0 + /* The moment we treclaim, ALL of our GPRs will switch * to user register state. (FPRs, CCR etc. also!) * Use an sprg and a tm_scratch in the PACA to shuffle. @@ -383,12 +391,10 @@ restore_gprs: /* ******************** CR,LR,CCR,MSR ********** */ ld r4, _CTR(r7) ld r5, _LINK(r7) - ld r6, _CCR(r7) ld r8, _XER(r7) mtctr r4 mtlr r5 - mtcr r6 mtxer r8 /* ******************** TAR ******************** */ @@ -404,7 +410,8 @@ restore_gprs: li r4, 0 mtmsrd r4, 1 - REST_4GPRS(0, r7) /* GPR0-3 */ + REST_GPR(0, r7) /* GPR0 */ + REST_2GPRS(2, r7) /* GPR2-3 */ REST_GPR(4, r7) /* GPR4 */ REST_4GPRS(8, r7) /* GPR8-11 */ REST_2GPRS(12, r7) /* GPR12-13 */ @@ -416,6 +423,31 @@ restore_gprs: mtspr SPRN_DSCR, r5 mtspr SPRN_PPR, r6 + /* Do final sanity check on TEXASR to make sure FS is set. Do this + * here before we load up the userspace r1 so any bugs we hit will get + * a call chain */ + mfspr r5, SPRN_TEXASR + srdi r5, r5, 16 + li r6, (TEXASR_FS)@h + and r6, r6, r5 +1: tdeqi r6, 0 + EMIT_BUG_ENTRY 1b,__FILE__,__LINE__,0 + + /* Do final sanity check on MSR to make sure we are not transactional + * or suspended + */ + mfmsr r6 + li r5, (MSR_TS_MASK)@higher + srdi r6, r6, 32 + and r6, r6, r5 +1: tdnei r6, 0 + EMIT_BUG_ENTRY 1b,__FILE__,__LINE__,0 + + /* Restore CR */ + ld r6, _CCR(r7) + mtcr r6 + + REST_GPR(1, r7) /* GPR1 */ REST_GPR(5, r7) /* GPR5-7 */ REST_GPR(6, r7) ld r7, GPR7(r7) -- cgit v1.2.3 From 00f554fadebb96877ad449758dc90303a9826afe Mon Sep 17 00:00:00 2001 From: Philippe Bergheaud Date: Wed, 30 Apr 2014 09:12:01 +1000 Subject: powerpc: memcpy optimization for 64bit LE Unaligned stores take alignment exceptions on POWER7 running in little-endian. This is a dumb little-endian base memcpy that prevents unaligned stores. Once booted the feature fixup code switches over to the VMX copy loops (which are already endian safe). The question is what we do before that switch over. The base 64bit memcpy takes alignment exceptions on POWER7 so we can't use it as is. Fixing the causes of alignment exception would slow it down, because we'd need to ensure all loads and stores are aligned either through rotate tricks or bytewise loads and stores. Either would be bad for all other 64bit platforms. [ I simplified the loop a bit - Anton ] Signed-off-by: Philippe Bergheaud Signed-off-by: Anton Blanchard Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/include/asm/string.h | 4 ---- arch/powerpc/kernel/ppc_ksyms.c | 2 -- arch/powerpc/lib/Makefile | 2 -- arch/powerpc/lib/memcpy_64.S | 16 ++++++++++++++++ 4 files changed, 16 insertions(+), 8 deletions(-) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/string.h b/arch/powerpc/include/asm/string.h index 0dffad6bcc84..e40010abcaf1 100644 --- a/arch/powerpc/include/asm/string.h +++ b/arch/powerpc/include/asm/string.h @@ -10,9 +10,7 @@ #define __HAVE_ARCH_STRNCMP #define __HAVE_ARCH_STRCAT #define __HAVE_ARCH_MEMSET -#ifdef __BIG_ENDIAN__ #define __HAVE_ARCH_MEMCPY -#endif #define __HAVE_ARCH_MEMMOVE #define __HAVE_ARCH_MEMCMP #define __HAVE_ARCH_MEMCHR @@ -24,9 +22,7 @@ extern int strcmp(const char *,const char *); extern int strncmp(const char *, const char *, __kernel_size_t); extern char * strcat(char *, const char *); extern void * memset(void *,int,__kernel_size_t); -#ifdef __BIG_ENDIAN__ extern void * memcpy(void *,const void *,__kernel_size_t); -#endif extern void * memmove(void *,const void *,__kernel_size_t); extern int memcmp(const void *,const void *,__kernel_size_t); extern void * memchr(const void *,int,__kernel_size_t); diff --git a/arch/powerpc/kernel/ppc_ksyms.c b/arch/powerpc/kernel/ppc_ksyms.c index 450850a49dce..48d17d6fca5b 100644 --- a/arch/powerpc/kernel/ppc_ksyms.c +++ b/arch/powerpc/kernel/ppc_ksyms.c @@ -155,9 +155,7 @@ EXPORT_SYMBOL(__cmpdi2); #endif long long __bswapdi2(long long); EXPORT_SYMBOL(__bswapdi2); -#ifdef __BIG_ENDIAN__ EXPORT_SYMBOL(memcpy); -#endif EXPORT_SYMBOL(memset); EXPORT_SYMBOL(memmove); EXPORT_SYMBOL(memcmp); diff --git a/arch/powerpc/lib/Makefile b/arch/powerpc/lib/Makefile index 95a20e17dbff..59fa2de9546d 100644 --- a/arch/powerpc/lib/Makefile +++ b/arch/powerpc/lib/Makefile @@ -23,9 +23,7 @@ obj-y += checksum_$(CONFIG_WORD_SIZE).o obj-$(CONFIG_PPC64) += checksum_wrappers_64.o endif -ifeq ($(CONFIG_CPU_LITTLE_ENDIAN),) obj-$(CONFIG_PPC64) += memcpy_power7.o memcpy_64.o -endif obj-$(CONFIG_PPC_EMULATE_SSTEP) += sstep.o ldstfp.o diff --git a/arch/powerpc/lib/memcpy_64.S b/arch/powerpc/lib/memcpy_64.S index 72ad055168a3..dc4ba7953b92 100644 --- a/arch/powerpc/lib/memcpy_64.S +++ b/arch/powerpc/lib/memcpy_64.S @@ -12,12 +12,27 @@ .align 7 _GLOBAL(memcpy) BEGIN_FTR_SECTION +#ifdef __LITTLE_ENDIAN__ + cmpdi cr7,r5,0 +#else std r3,48(r1) /* save destination pointer for return value */ +#endif FTR_SECTION_ELSE #ifndef SELFTEST b memcpy_power7 #endif ALT_FTR_SECTION_END_IFCLR(CPU_FTR_VMX_COPY) +#ifdef __LITTLE_ENDIAN__ + /* dumb little-endian memcpy that will get replaced at runtime */ + addi r9,r3,-1 + addi r4,r4,-1 + beqlr cr7 + mtctr r5 +1: lbzu r10,1(r4) + stbu r10,1(r9) + bdnz 1b + blr +#else PPC_MTOCRF(0x01,r5) cmpldi cr1,r5,16 neg r6,r3 # LS 3 bits = # bytes to 8-byte dest bdry @@ -203,3 +218,4 @@ END_FTR_SECTION_IFCLR(CPU_FTR_UNALIGNED_LD_STD) stb r0,0(r3) 4: ld r3,48(r1) /* return dest pointer */ blr +#endif -- cgit v1.2.3 From 965b5608f7da181923d766b83ead06cbdc7dd4b3 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Tue, 20 May 2014 10:20:49 +1000 Subject: Revert "powerpc/powernv: Fundamental reset on PLX ports" This reverts commit b2b5efcf208ddc9444aca77336627428782a39f4. This code was way too board specific, there are quirks as to how the PERST line is wired on different boards, we'll have to revisit this using/creating appropriate firmware interfaces. Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/include/asm/eeh.h | 1 - arch/powerpc/platforms/powernv/eeh-ioda.c | 110 +++++------------------------- 2 files changed, 16 insertions(+), 95 deletions(-) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h index d12529f34524..b76f58c124ca 100644 --- a/arch/powerpc/include/asm/eeh.h +++ b/arch/powerpc/include/asm/eeh.h @@ -109,7 +109,6 @@ struct eeh_pe { #define EEH_DEV_NO_HANDLER (1 << 8) /* No error handler */ #define EEH_DEV_SYSFS (1 << 9) /* Sysfs created */ #define EEH_DEV_REMOVED (1 << 10) /* Removed permanently */ -#define EEH_DEV_FRESET (1 << 11) /* Fundamental reset */ struct eeh_dev { int mode; /* EEH mode */ diff --git a/arch/powerpc/platforms/powernv/eeh-ioda.c b/arch/powerpc/platforms/powernv/eeh-ioda.c index 79d0cdf786d0..753f08e36dfa 100644 --- a/arch/powerpc/platforms/powernv/eeh-ioda.c +++ b/arch/powerpc/platforms/powernv/eeh-ioda.c @@ -477,127 +477,49 @@ out: return 0; } -static bool ioda_eeh_is_plx_dnport(struct pci_dev *dev, int *reg, - int *mask, int *len) -{ - unsigned short *pid; - unsigned short ids[] = { - 0x10b5, 0x8748, 0x0080, 0x0400, /* PLX#8748 */ - 0x0000, 0x0000, 0x0000, 0x0000, /* End flag */ - }; - - if (!pci_is_pcie(dev)) - return false; - if (pci_pcie_type(dev) != PCI_EXP_TYPE_DOWNSTREAM) - return false; - - pid = &ids[0]; - while (!reg) { - if (pid[0] == 0x0) - break; - - if (dev->vendor == pid[0] && - dev->device == pid[1]) { - *reg = pid[2]; - *mask = pid[3]; - *len = 2; - return true; - } - } - - *reg = PCI_BRIDGE_CONTROL; - *mask = PCI_BRIDGE_CTL_BUS_RESET; - *len = 2; - return false; -} - static int ioda_eeh_bridge_reset(struct pci_dev *dev, int option) { struct device_node *dn = pci_device_to_OF_node(dev); struct eeh_dev *edev = of_node_to_eeh_dev(dn); int aer = edev ? edev->aer_cap : 0; - int reg, mask, val, len; - bool is_plx_dnport; + u32 ctrl; pr_debug("%s: Reset PCI bus %04x:%02x with option %d\n", __func__, pci_domain_nr(dev->bus), dev->bus->number, option); - - is_plx_dnport = ioda_eeh_is_plx_dnport(dev, ®, &mask, &len); - if (option == EEH_RESET_FUNDAMENTAL) - if (!is_plx_dnport || !edev) - option = EEH_RESET_HOT; - - if (option == EEH_RESET_HOT) { - reg = PCI_BRIDGE_CONTROL; - mask = PCI_BRIDGE_CTL_BUS_RESET; - len = 2; - } - - if (option == EEH_RESET_DEACTIVATE) { - if (!is_plx_dnport || !edev || - !(edev->mode & EEH_DEV_FRESET)) { - reg = PCI_BRIDGE_CONTROL; - mask = PCI_BRIDGE_CTL_BUS_RESET; - len = 2; - } - } - switch (option) { case EEH_RESET_FUNDAMENTAL: - edev->mode |= EEH_DEV_FRESET; - /* Fall through */ case EEH_RESET_HOT: + /* Don't report linkDown event */ if (aer) { - /* Mask receiver error */ - eeh_ops->read_config(dn, aer + PCI_ERR_COR_MASK, - 4, &val); - val |= PCI_ERR_COR_RCVR; - eeh_ops->write_config(dn, aer + PCI_ERR_COR_MASK, - 4, val); - - /* Mask linkDown */ eeh_ops->read_config(dn, aer + PCI_ERR_UNCOR_MASK, - 4, &val); - val |= PCI_ERR_UNC_SURPDN; + 4, &ctrl); + ctrl |= PCI_ERR_UNC_SURPDN; eeh_ops->write_config(dn, aer + PCI_ERR_UNCOR_MASK, - 4, val); - } + 4, ctrl); + } - eeh_ops->read_config(dn, reg, len, &val); - val |= mask; - eeh_ops->write_config(dn, reg, len, val); + eeh_ops->read_config(dn, PCI_BRIDGE_CONTROL, 2, &ctrl); + ctrl |= PCI_BRIDGE_CTL_BUS_RESET; + eeh_ops->write_config(dn, PCI_BRIDGE_CONTROL, 2, ctrl); msleep(EEH_PE_RST_HOLD_TIME); break; case EEH_RESET_DEACTIVATE: - eeh_ops->read_config(dn, reg, len, &val); - val &= ~mask; - eeh_ops->write_config(dn, reg, len, val); + eeh_ops->read_config(dn, PCI_BRIDGE_CONTROL, 2, &ctrl); + ctrl &= ~PCI_BRIDGE_CTL_BUS_RESET; + eeh_ops->write_config(dn, PCI_BRIDGE_CONTROL, 2, ctrl); msleep(EEH_PE_RST_SETTLE_TIME); - if (edev) - edev->mode &= ~EEH_DEV_FRESET; + /* Continue reporting linkDown event */ if (aer) { - /* Clear receive error and enable it */ - eeh_ops->write_config(dn, aer + PCI_ERR_COR_STATUS, - 4, PCI_ERR_COR_RCVR); - eeh_ops->read_config(dn, aer + PCI_ERR_COR_MASK, - 4, &val); - val &= ~PCI_ERR_COR_RCVR; - eeh_ops->write_config(dn, aer + PCI_ERR_COR_MASK, - 4, val); - - /* Clear linkDown and enable it */ - eeh_ops->write_config(dn, aer + PCI_ERR_UNCOR_STATUS, - 4, PCI_ERR_UNC_SURPDN); eeh_ops->read_config(dn, aer + PCI_ERR_UNCOR_MASK, - 4, &val); - val &= ~PCI_ERR_UNC_SURPDN; + 4, &ctrl); + ctrl &= ~PCI_ERR_UNC_SURPDN; eeh_ops->write_config(dn, aer + PCI_ERR_UNCOR_MASK, - 4, val); + 4, ctrl); } break; -- cgit v1.2.3 From 04c32a516806ec74b62048baf4cddcbb840927db Mon Sep 17 00:00:00 2001 From: Paul Gortmaker Date: Tue, 29 Apr 2014 15:25:16 -0400 Subject: powerpc: Drop return value from set_breakpoint as it is unused None of the callers check the return value, so it might as well not have one at all. Signed-off-by: Paul Gortmaker Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/include/asm/debug.h | 2 +- arch/powerpc/kernel/process.c | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/debug.h b/arch/powerpc/include/asm/debug.h index d2516308ed1e..1d7f966d3b18 100644 --- a/arch/powerpc/include/asm/debug.h +++ b/arch/powerpc/include/asm/debug.h @@ -46,7 +46,7 @@ static inline int debugger_break_match(struct pt_regs *regs) { return 0; } static inline int debugger_fault_handler(struct pt_regs *regs) { return 0; } #endif -int set_breakpoint(struct arch_hw_breakpoint *brk); +void set_breakpoint(struct arch_hw_breakpoint *brk); #ifdef CONFIG_PPC_ADV_DEBUG_REGS extern void do_send_trap(struct pt_regs *regs, unsigned long address, unsigned long error_code, int signal_code, int brkpt); diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 2ae1b99166c6..f895a5062287 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -496,14 +496,14 @@ static inline int set_dawr(struct arch_hw_breakpoint *brk) return 0; } -int set_breakpoint(struct arch_hw_breakpoint *brk) +void set_breakpoint(struct arch_hw_breakpoint *brk) { __get_cpu_var(current_brk) = *brk; if (cpu_has_feature(CPU_FTR_DAWR)) - return set_dawr(brk); - - return set_dabr(brk); + set_dawr(brk); + else + set_dabr(brk); } #ifdef CONFIG_PPC64 -- cgit v1.2.3 From 21f585073d6347651f2262da187606fa1c4ee16d Mon Sep 17 00:00:00 2001 From: Paul Gortmaker Date: Tue, 29 Apr 2014 15:25:17 -0400 Subject: powerpc: Fix smp_processor_id() in preemptible splat in set_breakpoint Currently, on 8641D, which doesn't set CONFIG_HAVE_HW_BREAKPOINT we get the following splat: BUG: using smp_processor_id() in preemptible [00000000] code: login/1382 caller is set_breakpoint+0x1c/0xa0 CPU: 0 PID: 1382 Comm: login Not tainted 3.15.0-rc3-00041-g2aafe1a4d451 #1 Call Trace: [decd5d80] [c0008dc4] show_stack+0x50/0x158 (unreliable) [decd5dc0] [c03c6fa0] dump_stack+0x7c/0xdc [decd5de0] [c01f8818] check_preemption_disabled+0xf4/0x104 [decd5e00] [c00086b8] set_breakpoint+0x1c/0xa0 [decd5e10] [c00d4530] flush_old_exec+0x2bc/0x588 [decd5e40] [c011c468] load_elf_binary+0x2ac/0x1164 [decd5ec0] [c00d35f8] search_binary_handler+0xc4/0x1f8 [decd5ef0] [c00d4ee8] do_execve+0x3d8/0x4b8 [decd5f40] [c001185c] ret_from_syscall+0x0/0x38 --- Exception: c01 at 0xfeee554 LR = 0xfeee7d4 The call path in this case is: flush_thread --> set_debug_reg_defaults --> set_breakpoint --> __get_cpu_var Since preemption is enabled in the cleanup of flush thread, and there is no need to disable it, introduce the distinction between set_breakpoint and __set_breakpoint, leaving only the flush_thread instance as the current user of set_breakpoint. Signed-off-by: Paul Gortmaker Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/include/asm/debug.h | 1 + arch/powerpc/include/asm/hw_breakpoint.h | 2 +- arch/powerpc/kernel/hw_breakpoint.c | 8 ++++---- arch/powerpc/kernel/process.c | 11 +++++++++-- arch/powerpc/kernel/signal.c | 2 +- arch/powerpc/xmon/xmon.c | 2 +- 6 files changed, 17 insertions(+), 9 deletions(-) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/debug.h b/arch/powerpc/include/asm/debug.h index 1d7f966d3b18..a954e4975049 100644 --- a/arch/powerpc/include/asm/debug.h +++ b/arch/powerpc/include/asm/debug.h @@ -47,6 +47,7 @@ static inline int debugger_fault_handler(struct pt_regs *regs) { return 0; } #endif void set_breakpoint(struct arch_hw_breakpoint *brk); +void __set_breakpoint(struct arch_hw_breakpoint *brk); #ifdef CONFIG_PPC_ADV_DEBUG_REGS extern void do_send_trap(struct pt_regs *regs, unsigned long address, unsigned long error_code, int signal_code, int brkpt); diff --git a/arch/powerpc/include/asm/hw_breakpoint.h b/arch/powerpc/include/asm/hw_breakpoint.h index eb0f4ac75c4c..ac6432d9be46 100644 --- a/arch/powerpc/include/asm/hw_breakpoint.h +++ b/arch/powerpc/include/asm/hw_breakpoint.h @@ -79,7 +79,7 @@ static inline void hw_breakpoint_disable(void) brk.address = 0; brk.type = 0; brk.len = 0; - set_breakpoint(&brk); + __set_breakpoint(&brk); } extern void thread_change_pc(struct task_struct *tsk, struct pt_regs *regs); diff --git a/arch/powerpc/kernel/hw_breakpoint.c b/arch/powerpc/kernel/hw_breakpoint.c index b0a1792279bb..0bb5918faaaf 100644 --- a/arch/powerpc/kernel/hw_breakpoint.c +++ b/arch/powerpc/kernel/hw_breakpoint.c @@ -72,7 +72,7 @@ int arch_install_hw_breakpoint(struct perf_event *bp) * If so, DABR will be populated in single_step_dabr_instruction(). */ if (current->thread.last_hit_ubp != bp) - set_breakpoint(info); + __set_breakpoint(info); return 0; } @@ -198,7 +198,7 @@ void thread_change_pc(struct task_struct *tsk, struct pt_regs *regs) info = counter_arch_bp(tsk->thread.last_hit_ubp); regs->msr &= ~MSR_SE; - set_breakpoint(info); + __set_breakpoint(info); tsk->thread.last_hit_ubp = NULL; } @@ -284,7 +284,7 @@ int __kprobes hw_breakpoint_handler(struct die_args *args) if (!(info->type & HW_BRK_TYPE_EXTRANEOUS_IRQ)) perf_bp_event(bp, regs); - set_breakpoint(info); + __set_breakpoint(info); out: rcu_read_unlock(); return rc; @@ -316,7 +316,7 @@ int __kprobes single_step_dabr_instruction(struct die_args *args) if (!(info->type & HW_BRK_TYPE_EXTRANEOUS_IRQ)) perf_bp_event(bp, regs); - set_breakpoint(info); + __set_breakpoint(info); current->thread.last_hit_ubp = NULL; /* diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index f895a5062287..8a1edbe26b8f 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -496,7 +496,7 @@ static inline int set_dawr(struct arch_hw_breakpoint *brk) return 0; } -void set_breakpoint(struct arch_hw_breakpoint *brk) +void __set_breakpoint(struct arch_hw_breakpoint *brk) { __get_cpu_var(current_brk) = *brk; @@ -506,6 +506,13 @@ void set_breakpoint(struct arch_hw_breakpoint *brk) set_dabr(brk); } +void set_breakpoint(struct arch_hw_breakpoint *brk) +{ + preempt_disable(); + __set_breakpoint(brk); + preempt_enable(); +} + #ifdef CONFIG_PPC64 DEFINE_PER_CPU(struct cpu_usage, cpu_usage_array); #endif @@ -835,7 +842,7 @@ struct task_struct *__switch_to(struct task_struct *prev, */ #ifndef CONFIG_HAVE_HW_BREAKPOINT if (unlikely(!hw_brk_match(&__get_cpu_var(current_brk), &new->thread.hw_brk))) - set_breakpoint(&new->thread.hw_brk); + __set_breakpoint(&new->thread.hw_brk); #endif /* CONFIG_HAVE_HW_BREAKPOINT */ #endif diff --git a/arch/powerpc/kernel/signal.c b/arch/powerpc/kernel/signal.c index 8fc4177ed65a..1c794cef2883 100644 --- a/arch/powerpc/kernel/signal.c +++ b/arch/powerpc/kernel/signal.c @@ -134,7 +134,7 @@ static int do_signal(struct pt_regs *regs) */ if (current->thread.hw_brk.address && current->thread.hw_brk.type) - set_breakpoint(¤t->thread.hw_brk); + __set_breakpoint(¤t->thread.hw_brk); #endif /* Re-enable the breakpoints for the signal stack */ thread_change_pc(current, regs); diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c index 08504e75b2c7..d3759b7a5535 100644 --- a/arch/powerpc/xmon/xmon.c +++ b/arch/powerpc/xmon/xmon.c @@ -759,7 +759,7 @@ static void insert_cpu_bpts(void) brk.address = dabr.address; brk.type = (dabr.enabled & HW_BRK_TYPE_DABR) | HW_BRK_TYPE_PRIV_ALL; brk.len = 8; - set_breakpoint(&brk); + __set_breakpoint(&brk); } if (iabr && cpu_has_feature(CPU_FTR_IABR)) mtspr(SPRN_IABR, iabr->address -- cgit v1.2.3 From 1efc563865dbc3710c5af7bc1540360a6d06192c Mon Sep 17 00:00:00 2001 From: Jeff Bailey Date: Sat, 17 May 2014 15:05:43 +0000 Subject: powerpc: Clear ELF personality flag if ELFv2 is not requested. powerpc: Clear ELF personality flag if ELFv2 is not requested. The POWER kernel uses a personality flag to determine whether it should be setting up function descriptors or not (per the updated ABI). This flag wasn't being cleared on a new process but instead was being inherited. The visible effect was that an ELFv2 binary could not execve to an ELFv1 binary. Signed-off-by: Jeff Bailey arch/powerpc/include/asm/elf.h | 2 ++ 1 file changed, 2 insertions(+) Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/include/asm/elf.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/elf.h b/arch/powerpc/include/asm/elf.h index 935b5e7a1436..888d8f3f2524 100644 --- a/arch/powerpc/include/asm/elf.h +++ b/arch/powerpc/include/asm/elf.h @@ -90,6 +90,8 @@ typedef elf_vrregset_t elf_fpxregset_t; do { \ if (((ex).e_flags & 0x3) == 2) \ set_thread_flag(TIF_ELF2ABI); \ + else \ + clear_thread_flag(TIF_ELF2ABI); \ if ((ex).e_ident[EI_CLASS] == ELFCLASS32) \ set_thread_flag(TIF_32BIT); \ else \ -- cgit v1.2.3 From 8c272261194dfda11cc046fbe808e052f6f284eb Mon Sep 17 00:00:00 2001 From: Nishanth Aravamudan Date: Mon, 19 May 2014 11:14:23 -0700 Subject: powerpc/numa: Enable USE_PERCPU_NUMA_NODE_ID Based off 3bccd996 for ia64, convert powerpc to use the generic per-CPU topology tracking, specifically: initialize per cpu numa_node entry in start_secondary remove the powerpc cpu_to_node() define CONFIG_USE_PERCPU_NUMA_NODE_ID if NUMA Signed-off-by: Nishanth Aravamudan Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/Kconfig | 4 ++++ arch/powerpc/include/asm/topology.h | 13 ------------- arch/powerpc/kernel/smp.c | 6 ++++++ 3 files changed, 10 insertions(+), 13 deletions(-) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index e0998997943b..9125964af409 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -453,6 +453,10 @@ config NODES_SHIFT default "4" depends on NEED_MULTIPLE_NODES +config USE_PERCPU_NUMA_NODE_ID + def_bool y + depends on NUMA + config ARCH_SELECT_MEMORY_MODEL def_bool y depends on PPC64 diff --git a/arch/powerpc/include/asm/topology.h b/arch/powerpc/include/asm/topology.h index c9202151079f..5ecf7ea52ad8 100644 --- a/arch/powerpc/include/asm/topology.h +++ b/arch/powerpc/include/asm/topology.h @@ -20,19 +20,6 @@ struct device_node; #include -static inline int cpu_to_node(int cpu) -{ - int nid; - - nid = numa_cpu_lookup_table[cpu]; - - /* - * During early boot, the numa-cpu lookup table might not have been - * setup for all CPUs yet. In such cases, default to node 0. - */ - return (nid < 0) ? 0 : nid; -} - #define parent_node(node) (node) #define cpumask_of_node(node) ((node) == -1 ? \ diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index e2a4232c5871..d7252adea759 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -390,6 +390,7 @@ void smp_prepare_boot_cpu(void) #ifdef CONFIG_PPC64 paca[boot_cpuid].__current = current; #endif + set_numa_node(numa_cpu_lookup_table[boot_cpuid]); current_set[boot_cpuid] = task_thread_info(current); } @@ -750,6 +751,11 @@ void start_secondary(void *unused) } traverse_core_siblings(cpu, true); + /* + * numa_node_id() works after this. + */ + set_numa_node(numa_cpu_lookup_table[cpu]); + smp_wmb(); notify_cpu_starting(cpu); set_cpu_online(cpu, true); -- cgit v1.2.3 From 441c19c8a290f5f1e1b263691641124c84232b6e Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Fri, 23 May 2014 18:15:25 +1000 Subject: powerpc/kvm/book3s_hv: Rework the secondary inhibit code As part of the support for split core on POWER8, we want to be able to block splitting of the core while KVM VMs are active. The logic to do that would be exactly the same as the code we currently have for inhibiting onlining of secondaries. Instead of adding an identical mechanism to block split core, rework the secondary inhibit code to be a "HV KVM is active" check. We can then use that in both the cpu hotplug code and the upcoming split core code. Signed-off-by: Michael Ellerman Signed-off-by: Michael Neuling Acked-by: Alexander Graf Acked-by: Paul Mackerras Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/include/asm/kvm_ppc.h | 7 +++++++ arch/powerpc/include/asm/smp.h | 8 -------- arch/powerpc/kernel/smp.c | 34 +++------------------------------- arch/powerpc/kvm/book3s_hv.c | 8 ++++---- arch/powerpc/kvm/book3s_hv_builtin.c | 31 +++++++++++++++++++++++++++++++ 5 files changed, 45 insertions(+), 43 deletions(-) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index 4096f16502a9..2c8e39951ab5 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -337,6 +337,10 @@ static inline void kvmppc_fast_vcpu_kick(struct kvm_vcpu *vcpu) vcpu->kvm->arch.kvm_ops->fast_vcpu_kick(vcpu); } +extern void kvm_hv_vm_activated(void); +extern void kvm_hv_vm_deactivated(void); +extern bool kvm_hv_mode_active(void); + #else static inline void __init kvm_cma_reserve(void) {} @@ -356,6 +360,9 @@ static inline void kvmppc_fast_vcpu_kick(struct kvm_vcpu *vcpu) { kvm_vcpu_kick(vcpu); } + +static inline bool kvm_hv_mode_active(void) { return false; } + #endif #ifdef CONFIG_KVM_XICS diff --git a/arch/powerpc/include/asm/smp.h b/arch/powerpc/include/asm/smp.h index ff51046b6466..5a6614a7f0b2 100644 --- a/arch/powerpc/include/asm/smp.h +++ b/arch/powerpc/include/asm/smp.h @@ -68,14 +68,6 @@ void generic_mach_cpu_die(void); void generic_set_cpu_dead(unsigned int cpu); void generic_set_cpu_up(unsigned int cpu); int generic_check_cpu_restart(unsigned int cpu); - -extern void inhibit_secondary_onlining(void); -extern void uninhibit_secondary_onlining(void); - -#else /* HOTPLUG_CPU */ -static inline void inhibit_secondary_onlining(void) {} -static inline void uninhibit_secondary_onlining(void) {} - #endif #ifdef CONFIG_PPC64 diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index 4863ea14f270..5cdd9eb3b24c 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -36,6 +36,7 @@ #include #include #include +#include #include #include #include @@ -458,38 +459,9 @@ int generic_check_cpu_restart(unsigned int cpu) return per_cpu(cpu_state, cpu) == CPU_UP_PREPARE; } -static atomic_t secondary_inhibit_count; - -/* - * Don't allow secondary CPU threads to come online - */ -void inhibit_secondary_onlining(void) -{ - /* - * This makes secondary_inhibit_count stable during cpu - * online/offline operations. - */ - get_online_cpus(); - - atomic_inc(&secondary_inhibit_count); - put_online_cpus(); -} -EXPORT_SYMBOL_GPL(inhibit_secondary_onlining); - -/* - * Allow secondary CPU threads to come online again - */ -void uninhibit_secondary_onlining(void) -{ - get_online_cpus(); - atomic_dec(&secondary_inhibit_count); - put_online_cpus(); -} -EXPORT_SYMBOL_GPL(uninhibit_secondary_onlining); - -static int secondaries_inhibited(void) +static bool secondaries_inhibited(void) { - return atomic_read(&secondary_inhibit_count); + return kvm_hv_mode_active(); } #else /* HOTPLUG_CPU */ diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 8227dba5af0f..d7b74f888ad8 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -2317,10 +2317,10 @@ static int kvmppc_core_init_vm_hv(struct kvm *kvm) spin_lock_init(&kvm->arch.slot_phys_lock); /* - * Don't allow secondary CPU threads to come online - * while any KVM VMs exist. + * Track that we now have a HV mode VM active. This blocks secondary + * CPU threads from coming online. */ - inhibit_secondary_onlining(); + kvm_hv_vm_activated(); return 0; } @@ -2336,7 +2336,7 @@ static void kvmppc_free_vcores(struct kvm *kvm) static void kvmppc_core_destroy_vm_hv(struct kvm *kvm) { - uninhibit_secondary_onlining(); + kvm_hv_vm_deactivated(); kvmppc_free_vcores(kvm); if (kvm->arch.rma) { diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c index 8cd0daebb82d..7cde8a665205 100644 --- a/arch/powerpc/kvm/book3s_hv_builtin.c +++ b/arch/powerpc/kvm/book3s_hv_builtin.c @@ -6,6 +6,7 @@ * published by the Free Software Foundation. */ +#include #include #include #include @@ -181,3 +182,33 @@ void __init kvm_cma_reserve(void) kvm_cma_declare_contiguous(selected_size, align_size); } } + +/* + * When running HV mode KVM we need to block certain operations while KVM VMs + * exist in the system. We use a counter of VMs to track this. + * + * One of the operations we need to block is onlining of secondaries, so we + * protect hv_vm_count with get/put_online_cpus(). + */ +static atomic_t hv_vm_count; + +void kvm_hv_vm_activated(void) +{ + get_online_cpus(); + atomic_inc(&hv_vm_count); + put_online_cpus(); +} +EXPORT_SYMBOL_GPL(kvm_hv_vm_activated); + +void kvm_hv_vm_deactivated(void) +{ + get_online_cpus(); + atomic_dec(&hv_vm_count); + put_online_cpus(); +} +EXPORT_SYMBOL_GPL(kvm_hv_vm_deactivated); + +bool kvm_hv_mode_active(void) +{ + return atomic_read(&hv_vm_count) != 0; +} -- cgit v1.2.3 From 8d6f7c5aa3db6f3e5e43d09f8a0166c7d96f33f3 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Fri, 23 May 2014 18:15:26 +1000 Subject: powerpc/powernv: Make it possible to skip the IRQHAPPENED check in power7_nap() To support split core we need to be able to force all secondaries into nap, so the core can detect they are idle and do an unsplit. Currently power7_nap() will return without napping if there is an irq pending. We want to ignore the pending irq and nap anyway, we will deal with the interrupt later. Signed-off-by: Michael Ellerman Signed-off-by: Michael Neuling Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/include/asm/processor.h | 2 +- arch/powerpc/kernel/idle_power7.S | 9 +++++++++ arch/powerpc/platforms/powernv/smp.c | 2 +- 3 files changed, 11 insertions(+), 2 deletions(-) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h index d660dc36831a..6d59072e13a7 100644 --- a/arch/powerpc/include/asm/processor.h +++ b/arch/powerpc/include/asm/processor.h @@ -449,7 +449,7 @@ extern unsigned long cpuidle_disable; enum idle_boot_override {IDLE_NO_OVERRIDE = 0, IDLE_POWERSAVE_OFF}; extern int powersave_nap; /* set if nap mode can be used in idle loop */ -extern void power7_nap(void); +extern void power7_nap(int check_irq); extern void power7_sleep(void); extern void flush_instruction_cache(void); extern void hard_reset_now(void); diff --git a/arch/powerpc/kernel/idle_power7.S b/arch/powerpc/kernel/idle_power7.S index dca6e16c2436..2480256272d4 100644 --- a/arch/powerpc/kernel/idle_power7.S +++ b/arch/powerpc/kernel/idle_power7.S @@ -39,6 +39,10 @@ * Pass requested state in r3: * 0 - nap * 1 - sleep + * + * To check IRQ_HAPPENED in r4 + * 0 - don't check + * 1 - check */ _GLOBAL(power7_powersave_common) /* Use r3 to pass state nap/sleep/winkle */ @@ -71,6 +75,8 @@ _GLOBAL(power7_powersave_common) lbz r0,PACAIRQHAPPENED(r13) cmpwi cr0,r0,0 beq 1f + cmpwi cr0,r4,0 + beq 1f addi r1,r1,INT_FRAME_SIZE ld r0,16(r1) mtlr r0 @@ -114,15 +120,18 @@ _GLOBAL(power7_idle) lwz r4,ADDROFF(powersave_nap)(r3) cmpwi 0,r4,0 beqlr + li r3, 1 /* fall through */ _GLOBAL(power7_nap) + mr r4,r3 li r3,0 b power7_powersave_common /* No return */ _GLOBAL(power7_sleep) li r3,1 + li r4,0 b power7_powersave_common /* No return */ diff --git a/arch/powerpc/platforms/powernv/smp.c b/arch/powerpc/platforms/powernv/smp.c index 1601a1ea02c4..65faf998fe2c 100644 --- a/arch/powerpc/platforms/powernv/smp.c +++ b/arch/powerpc/platforms/powernv/smp.c @@ -159,7 +159,7 @@ static void pnv_smp_cpu_kill_self(void) mtspr(SPRN_LPCR, mfspr(SPRN_LPCR) & ~(u64)LPCR_PECE1); while (!generic_check_cpu_restart(cpu)) { ppc64_runlatch_off(); - power7_nap(); + power7_nap(1); ppc64_runlatch_on(); if (!generic_check_cpu_restart(cpu)) { DBG("CPU%d Unexpected exit while offline !\n", cpu); -- cgit v1.2.3 From 5853aef1ac5c5d83076203e840ca463857a7515d Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Fri, 23 May 2014 18:15:27 +1000 Subject: powerpc: Add threads_per_subcore On POWER8 we have a new concept of a subcore. This is what happens when you take a regular core and split it. A subcore is a grouping of two or four SMT threads, as well as a handfull of SPRs which allows the subcore to appear as if it were a core from the point of view of a guest. Unlike threads_per_core which is fixed at boot, threads_per_subcore can change while the system is running. Most code will not want to use threads_per_subcore. Signed-off-by: Michael Ellerman Signed-off-by: Michael Neuling Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/include/asm/cputhreads.h | 7 +++++++ arch/powerpc/kernel/setup-common.c | 4 +++- 2 files changed, 10 insertions(+), 1 deletion(-) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/cputhreads.h b/arch/powerpc/include/asm/cputhreads.h index ac3eedb9b74a..2bf8e9307be9 100644 --- a/arch/powerpc/include/asm/cputhreads.h +++ b/arch/powerpc/include/asm/cputhreads.h @@ -18,10 +18,12 @@ #ifdef CONFIG_SMP extern int threads_per_core; +extern int threads_per_subcore; extern int threads_shift; extern cpumask_t threads_core_mask; #else #define threads_per_core 1 +#define threads_per_subcore 1 #define threads_shift 0 #define threads_core_mask (CPU_MASK_CPU0) #endif @@ -74,6 +76,11 @@ static inline int cpu_thread_in_core(int cpu) return cpu & (threads_per_core - 1); } +static inline int cpu_thread_in_subcore(int cpu) +{ + return cpu & (threads_per_subcore - 1); +} + static inline int cpu_first_thread_sibling(int cpu) { return cpu & ~(threads_per_core - 1); diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c index 3cf25c89469d..aa0f5edd8570 100644 --- a/arch/powerpc/kernel/setup-common.c +++ b/arch/powerpc/kernel/setup-common.c @@ -390,9 +390,10 @@ void __init check_for_initrd(void) #ifdef CONFIG_SMP -int threads_per_core, threads_shift; +int threads_per_core, threads_per_subcore, threads_shift; cpumask_t threads_core_mask; EXPORT_SYMBOL_GPL(threads_per_core); +EXPORT_SYMBOL_GPL(threads_per_subcore); EXPORT_SYMBOL_GPL(threads_shift); EXPORT_SYMBOL_GPL(threads_core_mask); @@ -401,6 +402,7 @@ static void __init cpu_init_thread_core_maps(int tpc) int i; threads_per_core = tpc; + threads_per_subcore = tpc; cpumask_clear(&threads_core_mask); /* This implementation only supports power of 2 number of threads -- cgit v1.2.3 From e2186023f2d81ee7bb42d2a7dec3d889df7cdace Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Fri, 23 May 2014 18:15:30 +1000 Subject: powerpc/powernv: Add support for POWER8 split core on powernv Upcoming POWER8 chips support a concept called split core. This is where the core can be split into subcores that although not full cores, are able to appear as full cores to a guest. The splitting & unsplitting procedure is mildly complicated, and explained at length in the comments within the patch. One notable detail is that when splitting or unsplitting we need to pull offline cpus out of their offline state to do work as part of the procedure. The interface for changing the split mode is via a sysfs file, eg: $ echo 2 > /sys/devices/system/cpu/subcores_per_core Currently supported values are '1', '2' and '4'. And indicate respectively that the core should be unsplit, split in half, and split in quarters. These modes correspond to threads_per_subcore of 8, 4 and 2. We do not allow changing the split mode while KVM VMs are active. This is to prevent the value changing while userspace is configuring the VM, and also to prevent the mode being changed in such a way that existing guests are unable to be run. CPU hotplug fixes by Srivatsa. max_cpus fixes by Mahesh. cpuset fixes by benh. Fix for irq race by paulus. The rest by mikey and mpe. Signed-off-by: Michael Ellerman Signed-off-by: Michael Neuling Signed-off-by: Srivatsa S. Bhat Signed-off-by: Mahesh Salgaonkar Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/include/asm/reg.h | 9 + arch/powerpc/platforms/powernv/Makefile | 2 +- arch/powerpc/platforms/powernv/powernv.h | 2 + arch/powerpc/platforms/powernv/smp.c | 18 +- arch/powerpc/platforms/powernv/subcore-asm.S | 95 +++++++ arch/powerpc/platforms/powernv/subcore.c | 392 +++++++++++++++++++++++++++ arch/powerpc/platforms/powernv/subcore.h | 18 ++ 7 files changed, 527 insertions(+), 9 deletions(-) create mode 100644 arch/powerpc/platforms/powernv/subcore-asm.S create mode 100644 arch/powerpc/platforms/powernv/subcore.c create mode 100644 arch/powerpc/platforms/powernv/subcore.h (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index 29de0152878f..2cd799b382ec 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h @@ -225,6 +225,7 @@ #define CTRL_TE 0x00c00000 /* thread enable */ #define CTRL_RUNLATCH 0x1 #define SPRN_DAWR 0xB4 +#define SPRN_RPR 0xBA /* Relative Priority Register */ #define SPRN_CIABR 0xBB #define CIABR_PRIV 0x3 #define CIABR_PRIV_USER 1 @@ -273,8 +274,10 @@ #define SPRN_HSRR1 0x13B /* Hypervisor Save/Restore 1 */ #define SPRN_IC 0x350 /* Virtual Instruction Count */ #define SPRN_VTB 0x351 /* Virtual Time Base */ +#define SPRN_LDBAR 0x352 /* LD Base Address Register */ #define SPRN_PMICR 0x354 /* Power Management Idle Control Reg */ #define SPRN_PMSR 0x355 /* Power Management Status Reg */ +#define SPRN_PMMAR 0x356 /* Power Management Memory Activity Register */ #define SPRN_PMCR 0x374 /* Power Management Control Register */ /* HFSCR and FSCR bit numbers are the same */ @@ -434,6 +437,12 @@ #define HID0_BTCD (1<<1) /* Branch target cache disable */ #define HID0_NOPDST (1<<1) /* No-op dst, dstt, etc. instr. */ #define HID0_NOPTI (1<<0) /* No-op dcbt and dcbst instr. */ +/* POWER8 HID0 bits */ +#define HID0_POWER8_4LPARMODE __MASK(61) +#define HID0_POWER8_2LPARMODE __MASK(57) +#define HID0_POWER8_1TO2LPAR __MASK(52) +#define HID0_POWER8_1TO4LPAR __MASK(51) +#define HID0_POWER8_DYNLPARDIS __MASK(48) #define SPRN_HID1 0x3F1 /* Hardware Implementation Register 1 */ #ifdef CONFIG_6xx diff --git a/arch/powerpc/platforms/powernv/Makefile b/arch/powerpc/platforms/powernv/Makefile index 63cebb9b4d45..4ad0d345bc96 100644 --- a/arch/powerpc/platforms/powernv/Makefile +++ b/arch/powerpc/platforms/powernv/Makefile @@ -1,7 +1,7 @@ obj-y += setup.o opal-takeover.o opal-wrappers.o opal.o opal-async.o obj-y += opal-rtc.o opal-nvram.o opal-lpc.o opal-flash.o obj-y += rng.o opal-elog.o opal-dump.o opal-sysparam.o opal-sensor.o -obj-y += opal-msglog.o +obj-y += opal-msglog.o subcore.o subcore-asm.o obj-$(CONFIG_SMP) += smp.o obj-$(CONFIG_PCI) += pci.o pci-p5ioc2.o pci-ioda.o diff --git a/arch/powerpc/platforms/powernv/powernv.h b/arch/powerpc/platforms/powernv/powernv.h index 0051e108ef0f..75501bfede7f 100644 --- a/arch/powerpc/platforms/powernv/powernv.h +++ b/arch/powerpc/platforms/powernv/powernv.h @@ -25,4 +25,6 @@ static inline int pnv_pci_dma_set_mask(struct pci_dev *pdev, u64 dma_mask) extern void pnv_lpc_init(void); +bool cpu_core_split_required(void); + #endif /* _POWERNV_H */ diff --git a/arch/powerpc/platforms/powernv/smp.c b/arch/powerpc/platforms/powernv/smp.c index 65faf998fe2c..0062a43a2e0d 100644 --- a/arch/powerpc/platforms/powernv/smp.c +++ b/arch/powerpc/platforms/powernv/smp.c @@ -161,15 +161,17 @@ static void pnv_smp_cpu_kill_self(void) ppc64_runlatch_off(); power7_nap(1); ppc64_runlatch_on(); - if (!generic_check_cpu_restart(cpu)) { + + /* Reenable IRQs briefly to clear the IPI that woke us */ + local_irq_enable(); + local_irq_disable(); + mb(); + + if (cpu_core_split_required()) + continue; + + if (!generic_check_cpu_restart(cpu)) DBG("CPU%d Unexpected exit while offline !\n", cpu); - /* We may be getting an IPI, so we re-enable - * interrupts to process it, it will be ignored - * since we aren't online (hopefully) - */ - local_irq_enable(); - local_irq_disable(); - } } mtspr(SPRN_LPCR, mfspr(SPRN_LPCR) | LPCR_PECE1); DBG("CPU%d coming online...\n", cpu); diff --git a/arch/powerpc/platforms/powernv/subcore-asm.S b/arch/powerpc/platforms/powernv/subcore-asm.S new file mode 100644 index 000000000000..39bb24aa8f34 --- /dev/null +++ b/arch/powerpc/platforms/powernv/subcore-asm.S @@ -0,0 +1,95 @@ +/* + * Copyright 2013, Michael (Ellerman|Neuling), IBM Corporation. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include +#include + +#include "subcore.h" + + +_GLOBAL(split_core_secondary_loop) + /* + * r3 = u8 *state, used throughout the routine + * r4 = temp + * r5 = temp + * .. + * r12 = MSR + */ + mfmsr r12 + + /* Disable interrupts so SRR0/1 don't get trashed */ + li r4,0 + ori r4,r4,MSR_EE|MSR_SE|MSR_BE|MSR_RI + andc r4,r12,r4 + sync + mtmsrd r4 + + /* Switch to real mode and leave interrupts off */ + li r5, MSR_IR|MSR_DR + andc r5, r4, r5 + + LOAD_REG_ADDR(r4, real_mode) + + mtspr SPRN_SRR0,r4 + mtspr SPRN_SRR1,r5 + rfid + b . /* prevent speculative execution */ + +real_mode: + /* Grab values from unsplit SPRs */ + mfspr r6, SPRN_LDBAR + mfspr r7, SPRN_PMMAR + mfspr r8, SPRN_PMCR + mfspr r9, SPRN_RPR + mfspr r10, SPRN_SDR1 + + /* Order reading the SPRs vs telling the primary we are ready to split */ + sync + + /* Tell thread 0 we are in real mode */ + li r4, SYNC_STEP_REAL_MODE + stb r4, 0(r3) + + li r5, (HID0_POWER8_4LPARMODE | HID0_POWER8_2LPARMODE)@highest + sldi r5, r5, 48 + + /* Loop until we see the split happen in HID0 */ +1: mfspr r4, SPRN_HID0 + and. r4, r4, r5 + beq 1b + + /* + * We only need to initialise the below regs once for each subcore, + * but it's simpler and harmless to do it on each thread. + */ + + /* Make sure various SPRS have sane values */ + li r4, 0 + mtspr SPRN_LPID, r4 + mtspr SPRN_PCR, r4 + mtspr SPRN_HDEC, r4 + + /* Restore SPR values now we are split */ + mtspr SPRN_LDBAR, r6 + mtspr SPRN_PMMAR, r7 + mtspr SPRN_PMCR, r8 + mtspr SPRN_RPR, r9 + mtspr SPRN_SDR1, r10 + + LOAD_REG_ADDR(r5, virtual_mode) + + /* Get out of real mode */ + mtspr SPRN_SRR0,r5 + mtspr SPRN_SRR1,r12 + rfid + b . /* prevent speculative execution */ + +virtual_mode: + blr diff --git a/arch/powerpc/platforms/powernv/subcore.c b/arch/powerpc/platforms/powernv/subcore.c new file mode 100644 index 000000000000..894ecb3eb596 --- /dev/null +++ b/arch/powerpc/platforms/powernv/subcore.c @@ -0,0 +1,392 @@ +/* + * Copyright 2013, Michael (Ellerman|Neuling), IBM Corporation. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#define pr_fmt(fmt) "powernv: " fmt + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include "subcore.h" + + +/* + * Split/unsplit procedure: + * + * A core can be in one of three states, unsplit, 2-way split, and 4-way split. + * + * The mapping to subcores_per_core is simple: + * + * State | subcores_per_core + * ------------|------------------ + * Unsplit | 1 + * 2-way split | 2 + * 4-way split | 4 + * + * The core is split along thread boundaries, the mapping between subcores and + * threads is as follows: + * + * Unsplit: + * ---------------------------- + * Subcore | 0 | + * ---------------------------- + * Thread | 0 1 2 3 4 5 6 7 | + * ---------------------------- + * + * 2-way split: + * ------------------------------------- + * Subcore | 0 | 1 | + * ------------------------------------- + * Thread | 0 1 2 3 | 4 5 6 7 | + * ------------------------------------- + * + * 4-way split: + * ----------------------------------------- + * Subcore | 0 | 1 | 2 | 3 | + * ----------------------------------------- + * Thread | 0 1 | 2 3 | 4 5 | 6 7 | + * ----------------------------------------- + * + * + * Transitions + * ----------- + * + * It is not possible to transition between either of the split states, the + * core must first be unsplit. The legal transitions are: + * + * ----------- --------------- + * | | <----> | 2-way split | + * | | --------------- + * | Unsplit | + * | | --------------- + * | | <----> | 4-way split | + * ----------- --------------- + * + * Unsplitting + * ----------- + * + * Unsplitting is the simpler procedure. It requires thread 0 to request the + * unsplit while all other threads NAP. + * + * Thread 0 clears HID0_POWER8_DYNLPARDIS (Dynamic LPAR Disable). This tells + * the hardware that if all threads except 0 are napping, the hardware should + * unsplit the core. + * + * Non-zero threads are sent to a NAP loop, they don't exit the loop until they + * see the core unsplit. + * + * Core 0 spins waiting for the hardware to see all the other threads napping + * and perform the unsplit. + * + * Once thread 0 sees the unsplit, it IPIs the secondary threads to wake them + * out of NAP. They will then see the core unsplit and exit the NAP loop. + * + * Splitting + * --------- + * + * The basic splitting procedure is fairly straight forward. However it is + * complicated by the fact that after the split occurs, the newly created + * subcores are not in a fully initialised state. + * + * Most notably the subcores do not have the correct value for SDR1, which + * means they must not be running in virtual mode when the split occurs. The + * subcores have separate timebases SPRs but these are pre-synchronised by + * opal. + * + * To begin with secondary threads are sent to an assembly routine. There they + * switch to real mode, so they are immune to the uninitialised SDR1 value. + * Once in real mode they indicate that they are in real mode, and spin waiting + * to see the core split. + * + * Thread 0 waits to see that all secondaries are in real mode, and then begins + * the splitting procedure. It firstly sets HID0_POWER8_DYNLPARDIS, which + * prevents the hardware from unsplitting. Then it sets the appropriate HID bit + * to request the split, and spins waiting to see that the split has happened. + * + * Concurrently the secondaries will notice the split. When they do they set up + * their SPRs, notably SDR1, and then they can return to virtual mode and exit + * the procedure. + */ + +/* Initialised at boot by subcore_init() */ +static int subcores_per_core; + +/* + * Used to communicate to offline cpus that we want them to pop out of the + * offline loop and do a split or unsplit. + * + * 0 - no split happening + * 1 - unsplit in progress + * 2 - split to 2 in progress + * 4 - split to 4 in progress + */ +static int new_split_mode; + +static cpumask_var_t cpu_offline_mask; + +struct split_state { + u8 step; + u8 master; +}; + +static DEFINE_PER_CPU(struct split_state, split_state); + +static void wait_for_sync_step(int step) +{ + int i, cpu = smp_processor_id(); + + for (i = cpu + 1; i < cpu + threads_per_core; i++) + while(per_cpu(split_state, i).step < step) + barrier(); + + /* Order the wait loop vs any subsequent loads/stores. */ + mb(); +} + +static void unsplit_core(void) +{ + u64 hid0, mask; + int i, cpu; + + mask = HID0_POWER8_2LPARMODE | HID0_POWER8_4LPARMODE; + + cpu = smp_processor_id(); + if (cpu_thread_in_core(cpu) != 0) { + while (mfspr(SPRN_HID0) & mask) + power7_nap(0); + + per_cpu(split_state, cpu).step = SYNC_STEP_UNSPLIT; + return; + } + + hid0 = mfspr(SPRN_HID0); + hid0 &= ~HID0_POWER8_DYNLPARDIS; + mtspr(SPRN_HID0, hid0); + + while (mfspr(SPRN_HID0) & mask) + cpu_relax(); + + /* Wake secondaries out of NAP */ + for (i = cpu + 1; i < cpu + threads_per_core; i++) + smp_send_reschedule(i); + + wait_for_sync_step(SYNC_STEP_UNSPLIT); +} + +static void split_core(int new_mode) +{ + struct { u64 value; u64 mask; } split_parms[2] = { + { HID0_POWER8_1TO2LPAR, HID0_POWER8_2LPARMODE }, + { HID0_POWER8_1TO4LPAR, HID0_POWER8_4LPARMODE } + }; + int i, cpu; + u64 hid0; + + /* Convert new_mode (2 or 4) into an index into our parms array */ + i = (new_mode >> 1) - 1; + BUG_ON(i < 0 || i > 1); + + cpu = smp_processor_id(); + if (cpu_thread_in_core(cpu) != 0) { + split_core_secondary_loop(&per_cpu(split_state, cpu).step); + return; + } + + wait_for_sync_step(SYNC_STEP_REAL_MODE); + + /* Write new mode */ + hid0 = mfspr(SPRN_HID0); + hid0 |= HID0_POWER8_DYNLPARDIS | split_parms[i].value; + mtspr(SPRN_HID0, hid0); + + /* Wait for it to happen */ + while (!(mfspr(SPRN_HID0) & split_parms[i].mask)) + cpu_relax(); +} + +static void cpu_do_split(int new_mode) +{ + /* + * At boot subcores_per_core will be 0, so we will always unsplit at + * boot. In the usual case where the core is already unsplit it's a + * nop, and this just ensures the kernel's notion of the mode is + * consistent with the hardware. + */ + if (subcores_per_core != 1) + unsplit_core(); + + if (new_mode != 1) + split_core(new_mode); + + mb(); + per_cpu(split_state, smp_processor_id()).step = SYNC_STEP_FINISHED; +} + +bool cpu_core_split_required(void) +{ + smp_rmb(); + + if (!new_split_mode) + return false; + + cpu_do_split(new_split_mode); + + return true; +} + +static int cpu_update_split_mode(void *data) +{ + int cpu, new_mode = *(int *)data; + + if (this_cpu_ptr(&split_state)->master) { + new_split_mode = new_mode; + smp_wmb(); + + cpumask_andnot(cpu_offline_mask, cpu_present_mask, + cpu_online_mask); + + /* This should work even though the cpu is offline */ + for_each_cpu(cpu, cpu_offline_mask) + smp_send_reschedule(cpu); + } + + cpu_do_split(new_mode); + + if (this_cpu_ptr(&split_state)->master) { + /* Wait for all cpus to finish before we touch subcores_per_core */ + for_each_present_cpu(cpu) { + if (cpu >= setup_max_cpus) + break; + + while(per_cpu(split_state, cpu).step < SYNC_STEP_FINISHED) + barrier(); + } + + new_split_mode = 0; + + /* Make the new mode public */ + subcores_per_core = new_mode; + threads_per_subcore = threads_per_core / subcores_per_core; + + /* Make sure the new mode is written before we exit */ + mb(); + } + + return 0; +} + +static int set_subcores_per_core(int new_mode) +{ + struct split_state *state; + int cpu; + + if (kvm_hv_mode_active()) { + pr_err("Unable to change split core mode while KVM active.\n"); + return -EBUSY; + } + + /* + * We are only called at boot, or from the sysfs write. If that ever + * changes we'll need a lock here. + */ + BUG_ON(new_mode < 1 || new_mode > 4 || new_mode == 3); + + for_each_present_cpu(cpu) { + state = &per_cpu(split_state, cpu); + state->step = SYNC_STEP_INITIAL; + state->master = 0; + } + + get_online_cpus(); + + /* This cpu will update the globals before exiting stop machine */ + this_cpu_ptr(&split_state)->master = 1; + + /* Ensure state is consistent before we call the other cpus */ + mb(); + + stop_machine(cpu_update_split_mode, &new_mode, cpu_online_mask); + + put_online_cpus(); + + return 0; +} + +static ssize_t __used store_subcores_per_core(struct device *dev, + struct device_attribute *attr, const char *buf, + size_t count) +{ + unsigned long val; + int rc; + + /* We are serialised by the attribute lock */ + + rc = sscanf(buf, "%lx", &val); + if (rc != 1) + return -EINVAL; + + switch (val) { + case 1: + case 2: + case 4: + if (subcores_per_core == val) + /* Nothing to do */ + goto out; + break; + default: + return -EINVAL; + } + + rc = set_subcores_per_core(val); + if (rc) + return rc; + +out: + return count; +} + +static ssize_t show_subcores_per_core(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "%x\n", subcores_per_core); +} + +static DEVICE_ATTR(subcores_per_core, 0644, + show_subcores_per_core, store_subcores_per_core); + +static int subcore_init(void) +{ + if (!cpu_has_feature(CPU_FTR_ARCH_207S)) + return 0; + + /* + * We need all threads in a core to be present to split/unsplit so + * continue only if max_cpus are aligned to threads_per_core. + */ + if (setup_max_cpus % threads_per_core) + return 0; + + BUG_ON(!alloc_cpumask_var(&cpu_offline_mask, GFP_KERNEL)); + + set_subcores_per_core(1); + + return device_create_file(cpu_subsys.dev_root, + &dev_attr_subcores_per_core); +} +machine_device_initcall(powernv, subcore_init); diff --git a/arch/powerpc/platforms/powernv/subcore.h b/arch/powerpc/platforms/powernv/subcore.h new file mode 100644 index 000000000000..148abc91debf --- /dev/null +++ b/arch/powerpc/platforms/powernv/subcore.h @@ -0,0 +1,18 @@ +/* + * Copyright 2013, Michael Ellerman, IBM Corporation. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +/* These are ordered and tested with <= */ +#define SYNC_STEP_INITIAL 0 +#define SYNC_STEP_UNSPLIT 1 /* Set by secondary when it sees unsplit */ +#define SYNC_STEP_REAL_MODE 2 /* Set by secondary when in real mode */ +#define SYNC_STEP_FINISHED 3 /* Set by secondary when split/unsplit is done */ + +#ifndef __ASSEMBLY__ +void split_core_secondary_loop(u8 *state); +#endif -- cgit v1.2.3 From 4750afa2c56821c9184c4d1b6606c3c459ca73de Mon Sep 17 00:00:00 2001 From: Preeti U Murthy Date: Mon, 5 May 2014 10:47:14 +0530 Subject: powerpc: Fix comment around arch specific definition of RECLAIM_DISTANCE Commit 32e45ff43eaf5c17f changed the default value of RECLAIM_DISTANCE to 30. However the comment around arch specifc definition of RECLAIM_DISTANCE is not updated to reflect the same. Correct the value mentioned in the comment. Signed-off-by: Preeti U Murthy Cc: Anton Blanchard Cc: Benjamin Herrenschmidt Cc: KOSAKI Motohiro Acked-by: KOSAKI Motohiro Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/include/asm/topology.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/topology.h b/arch/powerpc/include/asm/topology.h index 5ecf7ea52ad8..4d9fab0710e6 100644 --- a/arch/powerpc/include/asm/topology.h +++ b/arch/powerpc/include/asm/topology.h @@ -12,7 +12,7 @@ struct device_node; * Before going off node we want the VM to try and reclaim from the local * node. It does this if the remote distance is larger than RECLAIM_DISTANCE. * With the default REMOTE_DISTANCE of 20 and the default RECLAIM_DISTANCE of - * 20, we never reclaim and go off node straight away. + * 30, we never reclaim and go off node straight away. * * To fix this we choose a smaller value of RECLAIM_DISTANCE. */ -- cgit v1.2.3 From 1739ea9e13e636590dd56c2f4ca85e783da512e7 Mon Sep 17 00:00:00 2001 From: Sam bobroff Date: Wed, 21 May 2014 16:32:38 +1000 Subject: powerpc: Fix regression of per-CPU DSCR setting Since commit "efcac65 powerpc: Per process DSCR + some fixes (try#4)" it is no longer possible to set the DSCR on a per-CPU basis. The old behaviour was to minipulate the DSCR SPR directly but this is no longer sufficient: the value is quickly overwritten by context switching. This patch stores the per-CPU DSCR value in a kernel variable rather than directly in the SPR and it is used whenever a process has not set the DSCR itself. The sysfs interface (/sys/devices/system/cpu/cpuN/dscr) is unchanged. Writes to the old global default (/sys/devices/system/cpu/dscr_default) now set all of the per-CPU values and reads return the last written value. The new per-CPU default is added to the paca_struct and is used everywhere outside of sysfs.c instead of the old global default. Signed-off-by: Sam Bobroff Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/include/asm/paca.h | 3 +++ arch/powerpc/kernel/asm-offsets.c | 1 + arch/powerpc/kernel/entry_64.S | 9 +-------- arch/powerpc/kernel/sysfs.c | 32 +++++++++++++++++++------------- arch/powerpc/kernel/tm.S | 16 ++++------------ arch/powerpc/kvm/book3s_hv_rmhandlers.S | 3 +-- 6 files changed, 29 insertions(+), 35 deletions(-) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h index 8e956a0b6e85..bb0bd25f20d0 100644 --- a/arch/powerpc/include/asm/paca.h +++ b/arch/powerpc/include/asm/paca.h @@ -92,7 +92,10 @@ struct paca_struct { struct slb_shadow *slb_shadow_ptr; struct dtl_entry *dispatch_log; struct dtl_entry *dispatch_log_end; +#endif /* CONFIG_PPC_STD_MMU_64 */ + u64 dscr_default; /* per-CPU default DSCR */ +#ifdef CONFIG_PPC_STD_MMU_64 /* * Now, starting in cacheline 2, the exception save areas */ diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index dba8140ebc20..cba2697406b7 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -247,6 +247,7 @@ int main(void) #endif DEFINE(PACAHWCPUID, offsetof(struct paca_struct, hw_cpu_id)); DEFINE(PACAKEXECSTATE, offsetof(struct paca_struct, kexec_state)); + DEFINE(PACA_DSCR, offsetof(struct paca_struct, dscr_default)); DEFINE(PACA_STARTTIME, offsetof(struct paca_struct, starttime)); DEFINE(PACA_STARTTIME_USER, offsetof(struct paca_struct, starttime_user)); DEFINE(PACA_USER_TIME, offsetof(struct paca_struct, user_time)); diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index 9fde8a1bf1e1..911d45366f59 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -387,12 +387,6 @@ _GLOBAL(ret_from_kernel_thread) li r3,0 b syscall_exit - .section ".toc","aw" -DSCR_DEFAULT: - .tc dscr_default[TC],dscr_default - - .section ".text" - /* * This routine switches between two different tasks. The process * state of one is saved on its kernel stack. Then the state @@ -577,11 +571,10 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) #ifdef CONFIG_PPC64 BEGIN_FTR_SECTION lwz r6,THREAD_DSCR_INHERIT(r4) - ld r7,DSCR_DEFAULT@toc(2) ld r0,THREAD_DSCR(r4) cmpwi r6,0 bne 1f - ld r0,0(r7) + ld r0,PACA_DSCR(r13) 1: BEGIN_FTR_SECTION_NESTED(70) mfspr r8, SPRN_FSCR diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c index e2a1d6fb3297..67fd2fd2620a 100644 --- a/arch/powerpc/kernel/sysfs.c +++ b/arch/powerpc/kernel/sysfs.c @@ -484,7 +484,6 @@ SYSFS_PMCSETUP(pmc8, SPRN_PMC8); SYSFS_PMCSETUP(mmcra, SPRN_MMCRA); SYSFS_SPRSETUP(purr, SPRN_PURR); SYSFS_SPRSETUP(spurr, SPRN_SPURR); -SYSFS_SPRSETUP(dscr, SPRN_DSCR); SYSFS_SPRSETUP(pir, SPRN_PIR); /* @@ -494,12 +493,27 @@ SYSFS_SPRSETUP(pir, SPRN_PIR); */ static DEVICE_ATTR(mmcra, 0600, show_mmcra, store_mmcra); static DEVICE_ATTR(spurr, 0400, show_spurr, NULL); -static DEVICE_ATTR(dscr, 0600, show_dscr, store_dscr); static DEVICE_ATTR(purr, 0400, show_purr, store_purr); static DEVICE_ATTR(pir, 0400, show_pir, NULL); -unsigned long dscr_default = 0; -EXPORT_SYMBOL(dscr_default); +static unsigned long dscr_default; + +static void read_dscr(void *val) +{ + *(unsigned long *)val = get_paca()->dscr_default; +} + +static void write_dscr(void *val) +{ + get_paca()->dscr_default = *(unsigned long *)val; + if (!current->thread.dscr_inherit) { + current->thread.dscr = *(unsigned long *)val; + mtspr(SPRN_DSCR, *(unsigned long *)val); + } +} + +SYSFS_SPRSETUP_SHOW_STORE(dscr); +static DEVICE_ATTR(dscr, 0600, show_dscr, store_dscr); static void add_write_permission_dev_attr(struct device_attribute *attr) { @@ -512,14 +526,6 @@ static ssize_t show_dscr_default(struct device *dev, return sprintf(buf, "%lx\n", dscr_default); } -static void update_dscr(void *dummy) -{ - if (!current->thread.dscr_inherit) { - current->thread.dscr = dscr_default; - mtspr(SPRN_DSCR, dscr_default); - } -} - static ssize_t __used store_dscr_default(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) @@ -532,7 +538,7 @@ static ssize_t __used store_dscr_default(struct device *dev, return -EINVAL; dscr_default = val; - on_each_cpu(update_dscr, NULL, 1); + on_each_cpu(write_dscr, &val, 1); return count; } diff --git a/arch/powerpc/kernel/tm.S b/arch/powerpc/kernel/tm.S index ee061c3715de..2a324f4cb1b9 100644 --- a/arch/powerpc/kernel/tm.S +++ b/arch/powerpc/kernel/tm.S @@ -78,12 +78,6 @@ _GLOBAL(tm_abort) TABORT(R3) blr - .section ".toc","aw" -DSCR_DEFAULT: - .tc dscr_default[TC],dscr_default - - .section ".text" - /* void tm_reclaim(struct thread_struct *thread, * unsigned long orig_msr, * uint8_t cause) @@ -298,9 +292,8 @@ dont_backup_fp: mtlr r0 ld r2, STK_GOT(r1) - /* Load system default DSCR */ - ld r4, DSCR_DEFAULT@toc(r2) - ld r0, 0(r4) + /* Load CPU's default DSCR */ + ld r0, PACA_DSCR(r13) mtspr SPRN_DSCR, r0 blr @@ -479,9 +472,8 @@ restore_gprs: mtlr r0 ld r2, STK_GOT(r1) - /* Load system default DSCR */ - ld r4, DSCR_DEFAULT@toc(r2) - ld r0, 0(r4) + /* Load CPU's default DSCR */ + ld r0, PACA_DSCR(r13) mtspr SPRN_DSCR, r0 blr diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index 9f0ad718e476..12f4ce5b4f78 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -286,8 +286,7 @@ kvm_start_guest: beq kvm_no_guest /* Set HSTATE_DSCR(r13) to something sensible */ - LOAD_REG_ADDR(r6, dscr_default) - ld r6, 0(r6) + ld r6, PACA_DSCR(r13) std r6, HSTATE_DSCR(r13) bl kvmppc_hv_entry -- cgit v1.2.3 From 4926616c77435e735c59288f838e7761baec4a6c Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Tue, 20 May 2014 11:01:28 +1000 Subject: powerpc/powernv: Add calls to support little endian host When running as a powernv "host" system on P8, we need to switch the endianness of interrupt handlers. This does it via the appropriate call to the OPAL firmware which may result in just switching HID0:HILE but depending on the processor version might need to do a few more things. This call must be done early before any other processor has been brought out of firmware. Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Andy Whitcroft Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/include/asm/opal.h | 7 +++++++ arch/powerpc/platforms/powernv/opal-wrappers.S | 1 + arch/powerpc/platforms/powernv/opal.c | 22 ++++++++++++++++++++++ 3 files changed, 30 insertions(+) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h index 81720ff59a10..ea8bba7b66bd 100644 --- a/arch/powerpc/include/asm/opal.h +++ b/arch/powerpc/include/asm/opal.h @@ -154,6 +154,7 @@ extern int opal_enter_rtas(struct rtas_args *args, #define OPAL_LPC_READ 67 #define OPAL_LPC_WRITE 68 #define OPAL_RETURN_CPU 69 +#define OPAL_REINIT_CPUS 70 #define OPAL_ELOG_READ 71 #define OPAL_ELOG_WRITE 72 #define OPAL_ELOG_ACK 73 @@ -725,6 +726,11 @@ struct OpalIoPhb3ErrorData { uint64_t pestB[OPAL_PHB3_NUM_PEST_REGS]; }; +enum { + OPAL_REINIT_CPUS_HILE_BE = (1 << 0), + OPAL_REINIT_CPUS_HILE_LE = (1 << 1), +}; + typedef struct oppanel_line { const char * line; uint64_t line_len; @@ -849,6 +855,7 @@ int64_t opal_pci_next_error(uint64_t phb_id, uint64_t *first_frozen_pe, uint16_t *pci_error_type, uint16_t *severity); int64_t opal_pci_poll(uint64_t phb_id); int64_t opal_return_cpu(void); +int64_t opal_reinit_cpus(uint64_t flags); int64_t opal_xscom_read(uint32_t gcid, uint64_t pcb_addr, __be64 *val); int64_t opal_xscom_write(uint32_t gcid, uint64_t pcb_addr, uint64_t val); diff --git a/arch/powerpc/platforms/powernv/opal-wrappers.S b/arch/powerpc/platforms/powernv/opal-wrappers.S index b5ebc545a373..4abbff22a61f 100644 --- a/arch/powerpc/platforms/powernv/opal-wrappers.S +++ b/arch/powerpc/platforms/powernv/opal-wrappers.S @@ -124,6 +124,7 @@ OPAL_CALL(opal_xscom_write, OPAL_XSCOM_WRITE); OPAL_CALL(opal_lpc_read, OPAL_LPC_READ); OPAL_CALL(opal_lpc_write, OPAL_LPC_WRITE); OPAL_CALL(opal_return_cpu, OPAL_RETURN_CPU); +OPAL_CALL(opal_reinit_cpus, OPAL_REINIT_CPUS); OPAL_CALL(opal_read_elog, OPAL_ELOG_READ); OPAL_CALL(opal_send_ack_elog, OPAL_ELOG_ACK); OPAL_CALL(opal_get_elog_size, OPAL_ELOG_SIZE); diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c index 360ad80c754c..539243e9dc23 100644 --- a/arch/powerpc/platforms/powernv/opal.c +++ b/arch/powerpc/platforms/powernv/opal.c @@ -57,6 +57,21 @@ static DEFINE_SPINLOCK(opal_notifier_lock); static uint64_t last_notified_mask = 0x0ul; static atomic_t opal_notifier_hold = ATOMIC_INIT(0); +static void opal_reinit_cores(void) +{ + /* Do the actual re-init, This will clobber all FPRs, VRs, etc... + * + * It will preserve non volatile GPRs and HSPRG0/1. It will + * also restore HIDs and other SPRs to their original value + * but it might clobber a bunch. + */ +#ifdef __BIG_ENDIAN__ + opal_reinit_cpus(OPAL_REINIT_CPUS_HILE_BE); +#else + opal_reinit_cpus(OPAL_REINIT_CPUS_HILE_LE); +#endif +} + int __init early_init_dt_scan_opal(unsigned long node, const char *uname, int depth, void *data) { @@ -96,6 +111,13 @@ int __init early_init_dt_scan_opal(unsigned long node, printk("OPAL V1 detected !\n"); } + /* Reinit all cores with the right endian */ + opal_reinit_cores(); + + /* Restore some bits */ + if (cur_cpu_spec->cpu_restore) + cur_cpu_spec->cpu_restore(); + return 1; } -- cgit v1.2.3 From 5d73320a96fcce80286f1447864c481b5f0b96fa Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Wed, 4 Jun 2014 10:48:48 +1000 Subject: powerpc: 64bit sendfile is capped at 2GB commit 8f9c0119d7ba (compat: fs: Generic compat_sys_sendfile implementation) changed the PowerPC 64bit sendfile call from sys_sendile64 to sys_sendfile. Unfortunately this broke sendfile of lengths greater than 2G because sys_sendfile caps at MAX_NON_LFS. Restore what we had previously which fixes the bug. Cc: stable@vger.kernel.org Signed-off-by: Anton Blanchard Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/include/asm/systbl.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/systbl.h b/arch/powerpc/include/asm/systbl.h index ac062f504736..35f8f2ffae8d 100644 --- a/arch/powerpc/include/asm/systbl.h +++ b/arch/powerpc/include/asm/systbl.h @@ -190,7 +190,7 @@ SYSCALL_SPU(getcwd) SYSCALL_SPU(capget) SYSCALL_SPU(capset) COMPAT_SYS(sigaltstack) -COMPAT_SYS_SPU(sendfile) +SYSX_SPU(sys_sendfile64,compat_sys_sendfile,sys_sendfile) SYSCALL(ni_syscall) SYSCALL(ni_syscall) PPC_SYS(vfork) -- cgit v1.2.3 From 223ca9d855ce32a4cc2d2b961e6e9d1fb36872ba Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Wed, 4 Jun 2014 14:48:48 +1000 Subject: powerpc/powernv: Fix endian issues in memory error handling code struct OpalMemoryErrorData is passed to us from firmware, so we have to byteswap it. Signed-off-by: Anton Blanchard Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/include/asm/opal.h | 10 +++++----- arch/powerpc/platforms/powernv/opal-memory-errors.c | 8 ++++---- 2 files changed, 9 insertions(+), 9 deletions(-) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h index ea8bba7b66bd..cb15cbb51600 100644 --- a/arch/powerpc/include/asm/opal.h +++ b/arch/powerpc/include/asm/opal.h @@ -510,7 +510,7 @@ enum OpalMemErr_DynErrType { struct OpalMemoryErrorData { enum OpalMemErr_Version version:8; /* 0x00 */ enum OpalMemErrType type:8; /* 0x01 */ - uint16_t flags; /* 0x02 */ + __be16 flags; /* 0x02 */ uint8_t reserved_1[4]; /* 0x04 */ union { @@ -518,15 +518,15 @@ struct OpalMemoryErrorData { struct { enum OpalMemErr_ResilErrType resil_err_type:8; uint8_t reserved_1[7]; - uint64_t physical_address_start; - uint64_t physical_address_end; + __be64 physical_address_start; + __be64 physical_address_end; } resilience; /* Dynamic memory deallocation error info */ struct { enum OpalMemErr_DynErrType dyn_err_type:8; uint8_t reserved_1[7]; - uint64_t physical_address_start; - uint64_t physical_address_end; + __be64 physical_address_start; + __be64 physical_address_end; } dyn_dealloc; } u; }; diff --git a/arch/powerpc/platforms/powernv/opal-memory-errors.c b/arch/powerpc/platforms/powernv/opal-memory-errors.c index ec4132239cdf..b17a34b695ef 100644 --- a/arch/powerpc/platforms/powernv/opal-memory-errors.c +++ b/arch/powerpc/platforms/powernv/opal-memory-errors.c @@ -47,12 +47,12 @@ static void handle_memory_error_event(struct OpalMemoryErrorData *merr_evt) __func__, merr_evt->type); switch (merr_evt->type) { case OPAL_MEM_ERR_TYPE_RESILIENCE: - paddr_start = merr_evt->u.resilience.physical_address_start; - paddr_end = merr_evt->u.resilience.physical_address_end; + paddr_start = be64_to_cpu(merr_evt->u.resilience.physical_address_start); + paddr_end = be64_to_cpu(merr_evt->u.resilience.physical_address_end); break; case OPAL_MEM_ERR_TYPE_DYN_DALLOC: - paddr_start = merr_evt->u.dyn_dealloc.physical_address_start; - paddr_end = merr_evt->u.dyn_dealloc.physical_address_end; + paddr_start = be64_to_cpu(merr_evt->u.dyn_dealloc.physical_address_start); + paddr_end = be64_to_cpu(merr_evt->u.dyn_dealloc.physical_address_end); break; default: return; -- cgit v1.2.3 From a5d862576a64cb3e0c22dc9cc2170e4d750714f9 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Wed, 4 Jun 2014 17:50:47 +1000 Subject: powerpc: Allow ppc_md platform hook to override memory_block_size_bytes The pseries platform code unconditionally overrides memory_block_size_bytes regardless of the running platform. Create a ppc_md hook that so each platform can choose to do what it wants. Signed-off-by: Anton Blanchard Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/include/asm/machdep.h | 3 +++ arch/powerpc/kernel/setup_64.c | 10 ++++++++++ arch/powerpc/platforms/pseries/hotplug-memory.c | 17 +++-------------- arch/powerpc/platforms/pseries/pseries.h | 2 ++ arch/powerpc/platforms/pseries/setup.c | 3 +++ 5 files changed, 21 insertions(+), 14 deletions(-) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/machdep.h b/arch/powerpc/include/asm/machdep.h index 374abc2e41d7..f92b0b54e921 100644 --- a/arch/powerpc/include/asm/machdep.h +++ b/arch/powerpc/include/asm/machdep.h @@ -98,6 +98,9 @@ struct machdep_calls { void (*iommu_save)(void); void (*iommu_restore)(void); #endif +#ifdef CONFIG_MEMORY_HOTPLUG_SPARSE + unsigned long (*memory_block_size)(void); +#endif #endif /* CONFIG_PPC64 */ void (*pci_dma_dev_setup)(struct pci_dev *dev); diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index 90b532ace0d5..ee082d771178 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -36,6 +36,7 @@ #include #include #include +#include #include #include @@ -780,6 +781,15 @@ void __init setup_per_cpu_areas(void) } #endif +#ifdef CONFIG_MEMORY_HOTPLUG_SPARSE +unsigned long memory_block_size_bytes(void) +{ + if (ppc_md.memory_block_size) + return ppc_md.memory_block_size(); + + return MIN_MEMORY_BLOCK_SIZE; +} +#endif #if defined(CONFIG_PPC_INDIRECT_PIO) || defined(CONFIG_PPC_INDIRECT_MMIO) struct ppc_pci_io ppc_pci_io; diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c b/arch/powerpc/platforms/pseries/hotplug-memory.c index 7f75c94af822..7995135170a3 100644 --- a/arch/powerpc/platforms/pseries/hotplug-memory.c +++ b/arch/powerpc/platforms/pseries/hotplug-memory.c @@ -21,7 +21,7 @@ #include #include -static unsigned long get_memblock_size(void) +unsigned long pseries_memory_block_size(void) { struct device_node *np; unsigned int memblock_size = MIN_MEMORY_BLOCK_SIZE; @@ -64,17 +64,6 @@ static unsigned long get_memblock_size(void) return memblock_size; } -/* WARNING: This is going to override the generic definition whenever - * pseries is built-in regardless of what platform is active at boot - * time. This is fine for now as this is the only "option" and it - * should work everywhere. If not, we'll have to turn this into a - * ppc_md. callback - */ -unsigned long memory_block_size_bytes(void) -{ - return get_memblock_size(); -} - #ifdef CONFIG_MEMORY_HOTREMOVE static int pseries_remove_memory(u64 start, u64 size) { @@ -105,7 +94,7 @@ static int pseries_remove_memblock(unsigned long base, unsigned int memblock_siz if (!pfn_valid(start_pfn)) goto out; - block_sz = memory_block_size_bytes(); + block_sz = pseries_memory_block_size(); sections_per_block = block_sz / MIN_MEMORY_BLOCK_SIZE; nid = memory_add_physaddr_to_nid(base); @@ -201,7 +190,7 @@ static int pseries_update_drconf_memory(struct of_prop_reconfig *pr) u32 *p; int i, rc = -EINVAL; - memblock_size = get_memblock_size(); + memblock_size = pseries_memory_block_size(); if (!memblock_size) return -EINVAL; diff --git a/arch/powerpc/platforms/pseries/pseries.h b/arch/powerpc/platforms/pseries/pseries.h index 99219530ea4a..361add62abf1 100644 --- a/arch/powerpc/platforms/pseries/pseries.h +++ b/arch/powerpc/platforms/pseries/pseries.h @@ -64,4 +64,6 @@ extern int dlpar_detach_node(struct device_node *); struct pci_host_bridge; int pseries_root_bridge_prepare(struct pci_host_bridge *bridge); +unsigned long pseries_memory_block_size(void); + #endif /* _PSERIES_PSERIES_H */ diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c index 215c3c269617..adc21a0e3410 100644 --- a/arch/powerpc/platforms/pseries/setup.c +++ b/arch/powerpc/platforms/pseries/setup.c @@ -810,4 +810,7 @@ define_machine(pseries) { #ifdef CONFIG_KEXEC .machine_kexec = pSeries_machine_kexec, #endif +#ifdef CONFIG_MEMORY_HOTPLUG_SPARSE + .memory_block_size = pseries_memory_block_size, +#endif }; -- cgit v1.2.3