From 1ea0d14e480c245683927eecc03a70faf06e80c8 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Thu, 3 Sep 2009 12:27:15 -0700 Subject: x86/i386: Make sure stack-protector segment base is cache aligned The Intel Optimization Reference Guide says: In Intel Atom microarchitecture, the address generation unit assumes that the segment base will be 0 by default. Non-zero segment base will cause load and store operations to experience a delay. - If the segment base isn't aligned to a cache line boundary, the max throughput of memory operations is reduced to one [e]very 9 cycles. [...] Assembly/Compiler Coding Rule 15. (H impact, ML generality) For Intel Atom processors, use segments with base set to 0 whenever possible; avoid non-zero segment base address that is not aligned to cache line boundary at all cost. We can't avoid having a non-zero base for the stack-protector segment, but we can make it cache-aligned. Signed-off-by: Jeremy Fitzhardinge Cc: LKML-Reference: <4AA01893.6000507@goop.org> Signed-off-by: Ingo Molnar --- arch/x86/include/asm/processor.h | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) (limited to 'arch/x86/include/asm/processor.h') diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index c7768269b1cf..e597ecc8753c 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -403,7 +403,17 @@ extern unsigned long kernel_eflags; extern asmlinkage void ignore_sysret(void); #else /* X86_64 */ #ifdef CONFIG_CC_STACKPROTECTOR -DECLARE_PER_CPU(unsigned long, stack_canary); +/* + * Make sure stack canary segment base is cached-aligned: + * "For Intel Atom processors, avoid non zero segment base address + * that is not aligned to cache line boundary at all cost." + * (Optim Ref Manual Assembly/Compiler Coding Rule 15.) + */ +struct stack_canary { + char __pad[20]; /* canary at %gs:20 */ + unsigned long canary; +}; +DECLARE_PER_CPU(struct stack_canary, stack_canary) ____cacheline_aligned; #endif #endif /* X86_64 */ -- cgit v1.2.3 From 53f824520b6d84ca5b4a8fd71addc91dbf64357e Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Thu, 3 Sep 2009 14:31:44 -0700 Subject: x86/i386: Put aligned stack-canary in percpu shared_aligned section Pack aligned things together into a special section to minimize padding holes. Suggested-by: Eric Dumazet Signed-off-by: Jeremy Fitzhardinge Cc: Tejun Heo LKML-Reference: <4AA035C0.9070202@goop.org> [ queued up in tip:x86/asm because it depends on this commit: x86/i386: Make sure stack-protector segment base is cache aligned ] Signed-off-by: Ingo Molnar --- arch/x86/include/asm/processor.h | 2 +- arch/x86/kernel/cpu/common.c | 2 +- include/asm-generic/percpu.h | 3 +++ include/linux/percpu-defs.h | 8 ++++++++ 4 files changed, 13 insertions(+), 2 deletions(-) (limited to 'arch/x86/include/asm/processor.h') diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index e597ecc8753c..ac7e79654f3a 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -413,7 +413,7 @@ struct stack_canary { char __pad[20]; /* canary at %gs:20 */ unsigned long canary; }; -DECLARE_PER_CPU(struct stack_canary, stack_canary) ____cacheline_aligned; +DECLARE_PER_CPU_ALIGNED(struct stack_canary, stack_canary); #endif #endif /* X86_64 */ diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 7d84bc4c1188..f23e236391a3 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1043,7 +1043,7 @@ DEFINE_PER_CPU(struct orig_ist, orig_ist); #else /* CONFIG_X86_64 */ #ifdef CONFIG_CC_STACKPROTECTOR -DEFINE_PER_CPU(struct stack_canary, stack_canary) ____cacheline_aligned; +DEFINE_PER_CPU_ALIGNED(struct stack_canary, stack_canary); #endif /* Make sure %fs and %gs are initialized properly in idle threads */ diff --git a/include/asm-generic/percpu.h b/include/asm-generic/percpu.h index aa00800adacc..90079c373f1c 100644 --- a/include/asm-generic/percpu.h +++ b/include/asm-generic/percpu.h @@ -81,14 +81,17 @@ extern void setup_per_cpu_areas(void); #ifdef MODULE #define PER_CPU_SHARED_ALIGNED_SECTION "" +#define PER_CPU_ALIGNED_SECTION "" #else #define PER_CPU_SHARED_ALIGNED_SECTION ".shared_aligned" +#define PER_CPU_ALIGNED_SECTION ".shared_aligned" #endif #define PER_CPU_FIRST_SECTION ".first" #else #define PER_CPU_SHARED_ALIGNED_SECTION "" +#define PER_CPU_ALIGNED_SECTION ".shared_aligned" #define PER_CPU_FIRST_SECTION "" #endif diff --git a/include/linux/percpu-defs.h b/include/linux/percpu-defs.h index 68438e18fff4..3058cf9dd3d4 100644 --- a/include/linux/percpu-defs.h +++ b/include/linux/percpu-defs.h @@ -66,6 +66,14 @@ DEFINE_PER_CPU_SECTION(type, name, PER_CPU_SHARED_ALIGNED_SECTION) \ ____cacheline_aligned_in_smp +#define DECLARE_PER_CPU_ALIGNED(type, name) \ + DECLARE_PER_CPU_SECTION(type, name, PER_CPU_ALIGNED_SECTION) \ + ____cacheline_aligned + +#define DEFINE_PER_CPU_ALIGNED(type, name) \ + DEFINE_PER_CPU_SECTION(type, name, PER_CPU_ALIGNED_SECTION) \ + ____cacheline_aligned + /* * Declaration/definition used for per-CPU variables that must be page aligned. */ -- cgit v1.2.3 From 5367b6887e7d8c870a5da7d9b8c6e9c207684e43 Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Thu, 10 Sep 2009 02:53:50 +0100 Subject: x86: Fix code patching for paravirt-alternatives on 486 As reported in and , kernels with paravirt-alternatives enabled crash in text_poke_early() on at least some 486-class processors. The problem is that text_poke_early() itself uses inline functions affected by paravirt-alternatives and so will modify instructions that have already been prefetched. Pentium and later processors will invalidate the prefetched instructions in this case, but 486-class processors do not. Change sync_core() to limit prefetching on 486-class (and 386-class) processors, and move the call to sync_core() above the call to the modifiable local_irq_restore(). Signed-off-by: Ben Hutchings LKML-Reference: <1252547631.3423.134.camel@localhost> Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/processor.h | 16 +++++++++++++--- arch/x86/kernel/alternative.c | 2 +- 2 files changed, 14 insertions(+), 4 deletions(-) (limited to 'arch/x86/include/asm/processor.h') diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index c7768269b1cf..2db56c57a281 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -703,13 +703,23 @@ static inline void cpu_relax(void) rep_nop(); } -/* Stop speculative execution: */ +/* Stop speculative execution and prefetching of modified code. */ static inline void sync_core(void) { int tmp; - asm volatile("cpuid" : "=a" (tmp) : "0" (1) - : "ebx", "ecx", "edx", "memory"); +#if defined(CONFIG_M386) || defined(CONFIG_M486) + if (boot_cpu_data.x86 < 5) + /* There is no speculative execution. + * jmp is a barrier to prefetching. */ + asm volatile("jmp 1f\n1:\n" ::: "memory"); + else +#endif + /* cpuid is a barrier to speculative execution. + * Prefetched instructions are automatically + * invalidated when modified. */ + asm volatile("cpuid" : "=a" (tmp) : "0" (1) + : "ebx", "ecx", "edx", "memory"); } static inline void __monitor(const void *eax, unsigned long ecx, diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index f57658702571..b8ebd0b689b1 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -490,8 +490,8 @@ void *text_poke_early(void *addr, const void *opcode, size_t len) unsigned long flags; local_irq_save(flags); memcpy(addr, opcode, len); - local_irq_restore(flags); sync_core(); + local_irq_restore(flags); /* Could also do a CLFLUSH here to speed up CPU recovery; but that causes hangs on some VIA CPUs. */ return addr; -- cgit v1.2.3