From 80552f0f7aebdd8deda8ea455292cbfbf462d655 Mon Sep 17 00:00:00 2001 From: Qian Cai Date: Tue, 16 Apr 2019 10:22:57 -0400 Subject: mm/slab: Remove store_stackinfo() store_stackinfo() does not seem used in actual SLAB debugging. Potentially, it could be added to check_poison_obj() to provide more information but this seems like an overkill due to the declining popularity of SLAB, so just remove it instead. Signed-off-by: Qian Cai Signed-off-by: Borislav Petkov Acked-by: Thomas Gleixner Acked-by: Vlastimil Babka Cc: Andrew Morton Cc: Andy Lutomirski Cc: Christoph Lameter Cc: David Rientjes Cc: Joonsoo Kim Cc: Josh Poimboeuf Cc: linux-mm Cc: Pekka Enberg Cc: rientjes@google.com Cc: sean.j.christopherson@intel.com Link: https://lkml.kernel.org/r/20190416142258.18694-1-cai@lca.pw --- mm/slab.c | 48 ++++++------------------------------------------ 1 file changed, 6 insertions(+), 42 deletions(-) diff --git a/mm/slab.c b/mm/slab.c index 47a380a486ee..e79ef28396e2 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -1467,53 +1467,17 @@ static bool is_debug_pagealloc_cache(struct kmem_cache *cachep) } #ifdef CONFIG_DEBUG_PAGEALLOC -static void store_stackinfo(struct kmem_cache *cachep, unsigned long *addr, - unsigned long caller) -{ - int size = cachep->object_size; - - addr = (unsigned long *)&((char *)addr)[obj_offset(cachep)]; - - if (size < 5 * sizeof(unsigned long)) - return; - - *addr++ = 0x12345678; - *addr++ = caller; - *addr++ = smp_processor_id(); - size -= 3 * sizeof(unsigned long); - { - unsigned long *sptr = &caller; - unsigned long svalue; - - while (!kstack_end(sptr)) { - svalue = *sptr++; - if (kernel_text_address(svalue)) { - *addr++ = svalue; - size -= sizeof(unsigned long); - if (size <= sizeof(unsigned long)) - break; - } - } - - } - *addr++ = 0x87654321; -} - -static void slab_kernel_map(struct kmem_cache *cachep, void *objp, - int map, unsigned long caller) +static void slab_kernel_map(struct kmem_cache *cachep, void *objp, int map) { if (!is_debug_pagealloc_cache(cachep)) return; - if (caller) - store_stackinfo(cachep, objp, caller); - kernel_map_pages(virt_to_page(objp), cachep->size / PAGE_SIZE, map); } #else static inline void slab_kernel_map(struct kmem_cache *cachep, void *objp, - int map, unsigned long caller) {} + int map) {} #endif @@ -1661,7 +1625,7 @@ static void slab_destroy_debugcheck(struct kmem_cache *cachep, if (cachep->flags & SLAB_POISON) { check_poison_obj(cachep, objp); - slab_kernel_map(cachep, objp, 1, 0); + slab_kernel_map(cachep, objp, 1); } if (cachep->flags & SLAB_RED_ZONE) { if (*dbg_redzone1(cachep, objp) != RED_INACTIVE) @@ -2434,7 +2398,7 @@ static void cache_init_objs_debug(struct kmem_cache *cachep, struct page *page) /* need to poison the objs? */ if (cachep->flags & SLAB_POISON) { poison_obj(cachep, objp, POISON_FREE); - slab_kernel_map(cachep, objp, 0, 0); + slab_kernel_map(cachep, objp, 0); } } #endif @@ -2813,7 +2777,7 @@ static void *cache_free_debugcheck(struct kmem_cache *cachep, void *objp, if (cachep->flags & SLAB_POISON) { poison_obj(cachep, objp, POISON_FREE); - slab_kernel_map(cachep, objp, 0, caller); + slab_kernel_map(cachep, objp, 0); } return objp; } @@ -3077,7 +3041,7 @@ static void *cache_alloc_debugcheck_after(struct kmem_cache *cachep, return objp; if (cachep->flags & SLAB_POISON) { check_poison_obj(cachep, objp); - slab_kernel_map(cachep, objp, 1, 0); + slab_kernel_map(cachep, objp, 1); poison_obj(cachep, objp, POISON_INUSE); } if (cachep->flags & SLAB_STORE_USER) -- cgit v1.2.3 From 7dbcf2b0b770eeb803a416ee8dcbef78e6389d40 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 14 Apr 2019 17:59:38 +0200 Subject: x86/irq/64: Limit IST stack overflow check to #DB stack Commit 37fe6a42b343 ("x86: Check stack overflow in detail") added a broad check for the full exception stack area, i.e. it considers the full exception stack area as valid. That's wrong in two aspects: 1) It does not check the individual areas one by one 2) #DF, NMI and #MCE are not enabling interrupts which means that a regular device interrupt cannot happen in their context. In fact if a device interrupt hits one of those IST stacks that's a bug because some code path enabled interrupts while handling the exception. Limit the check to the #DB stack and consider all other IST stacks as 'overflow' or invalid. Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Cc: Andy Lutomirski Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: Josh Poimboeuf Cc: Mitsuo Hayasaka Cc: Nicolai Stange Cc: Sean Christopherson Cc: x86-ml Link: https://lkml.kernel.org/r/20190414160143.682135110@linutronix.de --- arch/x86/kernel/irq_64.c | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c index 0469cd078db1..b50ac9c7397b 100644 --- a/arch/x86/kernel/irq_64.c +++ b/arch/x86/kernel/irq_64.c @@ -26,9 +26,18 @@ int sysctl_panic_on_stackoverflow; /* * Probabilistic stack overflow check: * - * Only check the stack in process context, because everything else - * runs on the big interrupt stacks. Checking reliably is too expensive, - * so we just check from interrupts. + * Regular device interrupts can enter on the following stacks: + * + * - User stack + * + * - Kernel task stack + * + * - Interrupt stack if a device driver reenables interrupts + * which should only happen in really old drivers. + * + * - Debug IST stack + * + * All other contexts are invalid. */ static inline void stack_overflow_check(struct pt_regs *regs) { @@ -53,8 +62,8 @@ static inline void stack_overflow_check(struct pt_regs *regs) return; oist = this_cpu_ptr(&orig_ist); - estack_top = (u64)oist->ist[0] - EXCEPTION_STKSZ + STACK_TOP_MARGIN; - estack_bottom = (u64)oist->ist[N_EXCEPTION_STACKS - 1]; + estack_bottom = (u64)oist->ist[DEBUG_STACK]; + estack_top = estack_bottom - DEBUG_STKSZ + STACK_TOP_MARGIN; if (regs->sp >= estack_top && regs->sp <= estack_bottom) return; -- cgit v1.2.3 From fa33215422fd415a07ec2a00e9f1acdaf0fa8e94 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Sun, 14 Apr 2019 17:59:39 +0200 Subject: x86/dumpstack: Fix off-by-one errors in stack identification The get_stack_info() function is off-by-one when checking whether an address is on a IRQ stack or a IST stack. This prevents an overflowed IRQ or IST stack from being dumped properly. [ tglx: Do the same for 32-bit ] Signed-off-by: Andy Lutomirski Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Reviewed-by: Sean Christopherson Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: Josh Poimboeuf Cc: x86-ml Link: https://lkml.kernel.org/r/20190414160143.785651055@linutronix.de --- arch/x86/kernel/dumpstack_32.c | 4 ++-- arch/x86/kernel/dumpstack_64.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c index cd53f3030e40..d305440ebe9c 100644 --- a/arch/x86/kernel/dumpstack_32.c +++ b/arch/x86/kernel/dumpstack_32.c @@ -41,7 +41,7 @@ static bool in_hardirq_stack(unsigned long *stack, struct stack_info *info) * This is a software stack, so 'end' can be a valid stack pointer. * It just means the stack is empty. */ - if (stack <= begin || stack > end) + if (stack < begin || stack > end) return false; info->type = STACK_TYPE_IRQ; @@ -66,7 +66,7 @@ static bool in_softirq_stack(unsigned long *stack, struct stack_info *info) * This is a software stack, so 'end' can be a valid stack pointer. * It just means the stack is empty. */ - if (stack <= begin || stack > end) + if (stack < begin || stack > end) return false; info->type = STACK_TYPE_SOFTIRQ; diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c index 5cdb9e84da57..90f0fa88cbb3 100644 --- a/arch/x86/kernel/dumpstack_64.c +++ b/arch/x86/kernel/dumpstack_64.c @@ -65,7 +65,7 @@ static bool in_exception_stack(unsigned long *stack, struct stack_info *info) begin = end - (exception_stack_sizes[k] / sizeof(long)); regs = (struct pt_regs *)end - 1; - if (stack <= begin || stack >= end) + if (stack < begin || stack >= end) continue; info->type = STACK_TYPE_EXCEPTION + k; @@ -88,7 +88,7 @@ static bool in_irq_stack(unsigned long *stack, struct stack_info *info) * This is a software stack, so 'end' can be a valid stack pointer. * It just means the stack is empty. */ - if (stack <= begin || stack > end) + if (stack < begin || stack >= end) return false; info->type = STACK_TYPE_IRQ; -- cgit v1.2.3 From 4f44b8f0b33b7111216f0fad353315f796b81617 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Sun, 14 Apr 2019 17:59:40 +0200 Subject: x86/irq/64: Remove a hardcoded irq_stack_union access stack_overflow_check() is using both irq_stack_ptr and irq_stack_union to find the IRQ stack. That's going to break when vmapped irq stacks are introduced. Change it to just use irq_stack_ptr. Signed-off-by: Andy Lutomirski Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Reviewed-by: Sean Christopherson Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: Josh Poimboeuf Cc: Nicolai Stange Cc: x86-ml Link: https://lkml.kernel.org/r/20190414160143.872549191@linutronix.de --- arch/x86/kernel/irq_64.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c index b50ac9c7397b..f6dcc8fea5c0 100644 --- a/arch/x86/kernel/irq_64.c +++ b/arch/x86/kernel/irq_64.c @@ -55,9 +55,8 @@ static inline void stack_overflow_check(struct pt_regs *regs) regs->sp <= curbase + THREAD_SIZE) return; - irq_stack_top = (u64)this_cpu_ptr(irq_stack_union.irq_stack) + - STACK_TOP_MARGIN; irq_stack_bottom = (u64)__this_cpu_read(irq_stack_ptr); + irq_stack_top = irq_stack_bottom - IRQ_STACK_SIZE + STACK_TOP_MARGIN; if (regs->sp >= irq_stack_top && regs->sp <= irq_stack_bottom) return; -- cgit v1.2.3 From df835e7083bee33e98635aca26b39b63ebc6cca7 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 14 Apr 2019 17:59:41 +0200 Subject: x86/irq/64: Sanitize the top/bottom confusion On x86, stacks go top to bottom, but the stack overflow check uses it the other way round, which is just confusing. Clean it up and sanitize the warning string a bit. Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Reviewed-by: Sean Christopherson Cc: "H. Peter Anvin" Cc: Andy Lutomirski Cc: Ingo Molnar Cc: Josh Poimboeuf Cc: Nicolai Stange Cc: x86-ml Link: https://lkml.kernel.org/r/20190414160143.961241397@linutronix.de --- arch/x86/kernel/irq_64.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c index f6dcc8fea5c0..cf200466d5c8 100644 --- a/arch/x86/kernel/irq_64.c +++ b/arch/x86/kernel/irq_64.c @@ -42,7 +42,7 @@ int sysctl_panic_on_stackoverflow; static inline void stack_overflow_check(struct pt_regs *regs) { #ifdef CONFIG_DEBUG_STACKOVERFLOW -#define STACK_TOP_MARGIN 128 +#define STACK_MARGIN 128 struct orig_ist *oist; u64 irq_stack_top, irq_stack_bottom; u64 estack_top, estack_bottom; @@ -51,25 +51,25 @@ static inline void stack_overflow_check(struct pt_regs *regs) if (user_mode(regs)) return; - if (regs->sp >= curbase + sizeof(struct pt_regs) + STACK_TOP_MARGIN && + if (regs->sp >= curbase + sizeof(struct pt_regs) + STACK_MARGIN && regs->sp <= curbase + THREAD_SIZE) return; - irq_stack_bottom = (u64)__this_cpu_read(irq_stack_ptr); - irq_stack_top = irq_stack_bottom - IRQ_STACK_SIZE + STACK_TOP_MARGIN; - if (regs->sp >= irq_stack_top && regs->sp <= irq_stack_bottom) + irq_stack_top = (u64)__this_cpu_read(irq_stack_ptr); + irq_stack_bottom = irq_stack_top - IRQ_STACK_SIZE + STACK_MARGIN; + if (regs->sp >= irq_stack_bottom && regs->sp <= irq_stack_top) return; oist = this_cpu_ptr(&orig_ist); - estack_bottom = (u64)oist->ist[DEBUG_STACK]; - estack_top = estack_bottom - DEBUG_STKSZ + STACK_TOP_MARGIN; - if (regs->sp >= estack_top && regs->sp <= estack_bottom) + estack_top = (u64)oist->ist[DEBUG_STACK]; + estack_bottom = estack_top - DEBUG_STKSZ + STACK_MARGIN; + if (regs->sp >= estack_bottom && regs->sp <= estack_top) return; - WARN_ONCE(1, "do_IRQ(): %s has overflown the kernel stack (cur:%Lx,sp:%lx,irq stk top-bottom:%Lx-%Lx,exception stk top-bottom:%Lx-%Lx,ip:%pF)\n", + WARN_ONCE(1, "do_IRQ(): %s has overflown the kernel stack (cur:%Lx,sp:%lx, irq stack:%Lx-%Lx, exception stack: %Lx-%Lx, ip:%pF)\n", current->comm, curbase, regs->sp, - irq_stack_top, irq_stack_bottom, - estack_top, estack_bottom, (void *)regs->ip); + irq_stack_bottom, irq_stack_top, + estack_bottom, estack_top, (void *)regs->ip); if (sysctl_panic_on_stackoverflow) panic("low stack detected by irq handler - check messages\n"); -- cgit v1.2.3 From 99d334511b337884cadbdfae28da912a4edb1001 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 14 Apr 2019 17:59:42 +0200 Subject: x86/idt: Remove unused macro SISTG Commit d8ba61ba58c8 ("x86/entry/64: Don't use IST entry for #BP stack") removed the last user but left the macro around. Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Reviewed-by: Andy Lutomirski Cc: "H. Peter Anvin" Cc: Dou Liyang Cc: Ingo Molnar Cc: Josh Poimboeuf Cc: Nicolai Stange Cc: Sean Christopherson Cc: x86-ml Link: https://lkml.kernel.org/r/20190414160144.050689789@linutronix.de --- arch/x86/kernel/idt.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/arch/x86/kernel/idt.c b/arch/x86/kernel/idt.c index 01adea278a71..2877606e97de 100644 --- a/arch/x86/kernel/idt.c +++ b/arch/x86/kernel/idt.c @@ -45,10 +45,6 @@ struct idt_data { #define ISTG(_vector, _addr, _ist) \ G(_vector, _addr, _ist, GATE_INTERRUPT, DPL0, __KERNEL_CS) -/* System interrupt gate with interrupt stack */ -#define SISTG(_vector, _addr, _ist) \ - G(_vector, _addr, _ist, GATE_INTERRUPT, DPL3, __KERNEL_CS) - /* Task gate */ #define TSKG(_vector, _gdt) \ G(_vector, NULL, DEFAULT_STACK, GATE_TASK, DPL0, _gdt << 3) -- cgit v1.2.3 From 6f36bd8d2e8c221eaaf4ce5b0ebbb11c00b0ac98 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 14 Apr 2019 17:59:43 +0200 Subject: x86/64: Remove stale CURRENT_MASK Nothing uses that and before people get the wrong ideas, get rid of it. Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Cc: "H. Peter Anvin" Cc: "Kirill A. Shutemov" Cc: Andy Lutomirski Cc: Baoquan He Cc: Ingo Molnar Cc: Josh Poimboeuf Cc: Qian Cai Cc: Sean Christopherson Cc: x86-ml Link: https://lkml.kernel.org/r/20190414160144.139284839@linutronix.de --- arch/x86/include/asm/page_64_types.h | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/x86/include/asm/page_64_types.h b/arch/x86/include/asm/page_64_types.h index 8f657286d599..bcd8c0518604 100644 --- a/arch/x86/include/asm/page_64_types.h +++ b/arch/x86/include/asm/page_64_types.h @@ -14,7 +14,6 @@ #define THREAD_SIZE_ORDER (2 + KASAN_STACK_ORDER) #define THREAD_SIZE (PAGE_SIZE << THREAD_SIZE_ORDER) -#define CURRENT_MASK (~(THREAD_SIZE - 1)) #define EXCEPTION_STACK_ORDER (0 + KASAN_STACK_ORDER) #define EXCEPTION_STKSZ (PAGE_SIZE << EXCEPTION_STACK_ORDER) -- cgit v1.2.3 From 30842211506e376b76394a9cb4e6d0c9d258b8d4 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 14 Apr 2019 17:59:44 +0200 Subject: x86/exceptions: Remove unused stack defines on 32bit Nothing requires those for 32bit builds. Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Cc: "H. Peter Anvin" Cc: Andy Lutomirski Cc: Dave Hansen Cc: Ingo Molnar Cc: Josh Poimboeuf Cc: Michal Hocko Cc: Sean Christopherson Cc: x86-ml Link: https://lkml.kernel.org/r/20190414160144.227822695@linutronix.de --- arch/x86/include/asm/page_32_types.h | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/arch/x86/include/asm/page_32_types.h b/arch/x86/include/asm/page_32_types.h index 0d5c739eebd7..57b5dc15ca7e 100644 --- a/arch/x86/include/asm/page_32_types.h +++ b/arch/x86/include/asm/page_32_types.h @@ -22,11 +22,7 @@ #define THREAD_SIZE_ORDER 1 #define THREAD_SIZE (PAGE_SIZE << THREAD_SIZE_ORDER) -#define DOUBLEFAULT_STACK 1 -#define NMI_STACK 0 -#define DEBUG_STACK 0 -#define MCE_STACK 0 -#define N_EXCEPTION_STACKS 1 +#define N_EXCEPTION_STACKS 1 #ifdef CONFIG_X86_PAE /* -- cgit v1.2.3 From 8f34c5b5afce91d171bb0802631197484cb69b8b Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 14 Apr 2019 17:59:45 +0200 Subject: x86/exceptions: Make IST index zero based The defines for the exception stack (IST) array in the TSS are using the SDM convention IST1 - IST7. That causes all sorts of code to subtract 1 for array indices related to IST. That's confusing at best and does not provide any value. Make the indices zero based and fixup the usage sites. The only code which needs to adjust the 0 based index is the interrupt descriptor setup which needs to add 1 now. Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Reviewed-by: Sean Christopherson Cc: Andy Lutomirski Cc: Baoquan He Cc: "Chang S. Bae" Cc: Dave Hansen Cc: Dominik Brodowski Cc: Dou Liyang Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: Jonathan Corbet Cc: Josh Poimboeuf Cc: "Kirill A. Shutemov" Cc: Konrad Rzeszutek Wilk Cc: linux-doc@vger.kernel.org Cc: Nicolai Stange Cc: Peter Zijlstra Cc: Qian Cai Cc: x86-ml Link: https://lkml.kernel.org/r/20190414160144.331772825@linutronix.de --- Documentation/x86/kernel-stacks | 8 ++++---- arch/x86/entry/entry_64.S | 4 ++-- arch/x86/include/asm/page_64_types.h | 13 ++++++++----- arch/x86/kernel/cpu/common.c | 4 ++-- arch/x86/kernel/dumpstack_64.c | 14 +++++++------- arch/x86/kernel/idt.c | 15 +++++++++------ arch/x86/kernel/irq_64.c | 2 +- arch/x86/mm/fault.c | 2 +- 8 files changed, 34 insertions(+), 28 deletions(-) diff --git a/Documentation/x86/kernel-stacks b/Documentation/x86/kernel-stacks index 9a0aa4d3a866..1b04596caea9 100644 --- a/Documentation/x86/kernel-stacks +++ b/Documentation/x86/kernel-stacks @@ -59,7 +59,7 @@ If that assumption is ever broken then the stacks will become corrupt. The currently assigned IST stacks are :- -* DOUBLEFAULT_STACK. EXCEPTION_STKSZ (PAGE_SIZE). +* ESTACK_DF. EXCEPTION_STKSZ (PAGE_SIZE). Used for interrupt 8 - Double Fault Exception (#DF). @@ -68,7 +68,7 @@ The currently assigned IST stacks are :- Using a separate stack allows the kernel to recover from it well enough in many cases to still output an oops. -* NMI_STACK. EXCEPTION_STKSZ (PAGE_SIZE). +* ESTACK_NMI. EXCEPTION_STKSZ (PAGE_SIZE). Used for non-maskable interrupts (NMI). @@ -76,7 +76,7 @@ The currently assigned IST stacks are :- middle of switching stacks. Using IST for NMI events avoids making assumptions about the previous state of the kernel stack. -* DEBUG_STACK. DEBUG_STKSZ +* ESTACK_DB. DEBUG_STKSZ Used for hardware debug interrupts (interrupt 1) and for software debug interrupts (INT3). @@ -86,7 +86,7 @@ The currently assigned IST stacks are :- avoids making assumptions about the previous state of the kernel stack. -* MCE_STACK. EXCEPTION_STKSZ (PAGE_SIZE). +* ESTACK_MCE. EXCEPTION_STKSZ (PAGE_SIZE). Used for interrupt 18 - Machine Check Exception (#MC). diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 1f0efdb7b629..fd0a50452cb3 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -841,7 +841,7 @@ apicinterrupt IRQ_WORK_VECTOR irq_work_interrupt smp_irq_work_interrupt /* * Exception entry points. */ -#define CPU_TSS_IST(x) PER_CPU_VAR(cpu_tss_rw) + (TSS_ist + ((x) - 1) * 8) +#define CPU_TSS_IST(x) PER_CPU_VAR(cpu_tss_rw) + (TSS_ist + (x) * 8) /** * idtentry - Generate an IDT entry stub @@ -1129,7 +1129,7 @@ apicinterrupt3 HYPERV_STIMER0_VECTOR \ hv_stimer0_callback_vector hv_stimer0_vector_handler #endif /* CONFIG_HYPERV */ -idtentry debug do_debug has_error_code=0 paranoid=1 shift_ist=DEBUG_STACK +idtentry debug do_debug has_error_code=0 paranoid=1 shift_ist=ESTACK_DB idtentry int3 do_int3 has_error_code=0 idtentry stack_segment do_stack_segment has_error_code=1 diff --git a/arch/x86/include/asm/page_64_types.h b/arch/x86/include/asm/page_64_types.h index bcd8c0518604..6ab2c54c1bf9 100644 --- a/arch/x86/include/asm/page_64_types.h +++ b/arch/x86/include/asm/page_64_types.h @@ -24,11 +24,14 @@ #define IRQ_STACK_ORDER (2 + KASAN_STACK_ORDER) #define IRQ_STACK_SIZE (PAGE_SIZE << IRQ_STACK_ORDER) -#define DOUBLEFAULT_STACK 1 -#define NMI_STACK 2 -#define DEBUG_STACK 3 -#define MCE_STACK 4 -#define N_EXCEPTION_STACKS 4 /* hw limit: 7 */ +/* + * The index for the tss.ist[] array. The hardware limit is 7 entries. + */ +#define ESTACK_DF 0 +#define ESTACK_NMI 1 +#define ESTACK_DB 2 +#define ESTACK_MCE 3 +#define N_EXCEPTION_STACKS 4 /* * Set __PAGE_OFFSET to the most negative possible address + diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index cb28e98a0659..0e4cb718fc4a 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -516,7 +516,7 @@ DEFINE_PER_CPU(struct cpu_entry_area *, cpu_entry_area); */ static const unsigned int exception_stack_sizes[N_EXCEPTION_STACKS] = { [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STKSZ, - [DEBUG_STACK - 1] = DEBUG_STKSZ + [ESTACK_DB] = DEBUG_STKSZ }; #endif @@ -1760,7 +1760,7 @@ void cpu_init(void) estacks += exception_stack_sizes[v]; oist->ist[v] = t->x86_tss.ist[v] = (unsigned long)estacks; - if (v == DEBUG_STACK-1) + if (v == ESTACK_DB) per_cpu(debug_stack_addr, cpu) = (unsigned long)estacks; } } diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c index 90f0fa88cbb3..455b47ef9250 100644 --- a/arch/x86/kernel/dumpstack_64.c +++ b/arch/x86/kernel/dumpstack_64.c @@ -18,16 +18,16 @@ #include -static char *exception_stack_names[N_EXCEPTION_STACKS] = { - [ DOUBLEFAULT_STACK-1 ] = "#DF", - [ NMI_STACK-1 ] = "NMI", - [ DEBUG_STACK-1 ] = "#DB", - [ MCE_STACK-1 ] = "#MC", +static const char *exception_stack_names[N_EXCEPTION_STACKS] = { + [ ESTACK_DF ] = "#DF", + [ ESTACK_NMI ] = "NMI", + [ ESTACK_DB ] = "#DB", + [ ESTACK_MCE ] = "#MC", }; -static unsigned long exception_stack_sizes[N_EXCEPTION_STACKS] = { +static const unsigned long exception_stack_sizes[N_EXCEPTION_STACKS] = { [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STKSZ, - [DEBUG_STACK - 1] = DEBUG_STKSZ + [ESTACK_DB] = DEBUG_STKSZ }; const char *stack_type_name(enum stack_type type) diff --git a/arch/x86/kernel/idt.c b/arch/x86/kernel/idt.c index 2877606e97de..2188f734ec61 100644 --- a/arch/x86/kernel/idt.c +++ b/arch/x86/kernel/idt.c @@ -41,9 +41,12 @@ struct idt_data { #define SYSG(_vector, _addr) \ G(_vector, _addr, DEFAULT_STACK, GATE_INTERRUPT, DPL3, __KERNEL_CS) -/* Interrupt gate with interrupt stack */ +/* + * Interrupt gate with interrupt stack. The _ist index is the index in + * the tss.ist[] array, but for the descriptor it needs to start at 1. + */ #define ISTG(_vector, _addr, _ist) \ - G(_vector, _addr, _ist, GATE_INTERRUPT, DPL0, __KERNEL_CS) + G(_vector, _addr, _ist + 1, GATE_INTERRUPT, DPL0, __KERNEL_CS) /* Task gate */ #define TSKG(_vector, _gdt) \ @@ -180,11 +183,11 @@ gate_desc debug_idt_table[IDT_ENTRIES] __page_aligned_bss; * cpu_init() when the TSS has been initialized. */ static const __initconst struct idt_data ist_idts[] = { - ISTG(X86_TRAP_DB, debug, DEBUG_STACK), - ISTG(X86_TRAP_NMI, nmi, NMI_STACK), - ISTG(X86_TRAP_DF, double_fault, DOUBLEFAULT_STACK), + ISTG(X86_TRAP_DB, debug, ESTACK_DB), + ISTG(X86_TRAP_NMI, nmi, ESTACK_NMI), + ISTG(X86_TRAP_DF, double_fault, ESTACK_DF), #ifdef CONFIG_X86_MCE - ISTG(X86_TRAP_MC, &machine_check, MCE_STACK), + ISTG(X86_TRAP_MC, &machine_check, ESTACK_MCE), #endif }; diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c index cf200466d5c8..182e8b245e06 100644 --- a/arch/x86/kernel/irq_64.c +++ b/arch/x86/kernel/irq_64.c @@ -61,7 +61,7 @@ static inline void stack_overflow_check(struct pt_regs *regs) return; oist = this_cpu_ptr(&orig_ist); - estack_top = (u64)oist->ist[DEBUG_STACK]; + estack_top = (u64)oist->ist[ESTACK_DB]; estack_bottom = estack_top - DEBUG_STKSZ + STACK_MARGIN; if (regs->sp >= estack_bottom && regs->sp <= estack_top) return; diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 667f1da36208..0524e1d74f24 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -793,7 +793,7 @@ no_context(struct pt_regs *regs, unsigned long error_code, if (is_vmalloc_addr((void *)address) && (((unsigned long)tsk->stack - 1 - address < PAGE_SIZE) || address - ((unsigned long)tsk->stack + THREAD_SIZE) < PAGE_SIZE)) { - unsigned long stack = this_cpu_read(orig_ist.ist[DOUBLEFAULT_STACK]) - sizeof(void *); + unsigned long stack = this_cpu_read(orig_ist.ist[ESTACK_DF]) - sizeof(void *); /* * We're likely to be running with very little stack space * left. It's plausible that we'd hit this condition but -- cgit v1.2.3 From 881a463cf21dbf83aab2cf6c9a359f34f88c2491 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 14 Apr 2019 17:59:46 +0200 Subject: x86/cpu_entry_area: Cleanup setup functions No point in retrieving the entry area pointer over and over. Do it once and use unsigned int for 'cpu' everywhere. Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Reviewed-by: Sean Christopherson Cc: "H. Peter Anvin" Cc: Andy Lutomirski Cc: Dave Hansen Cc: Ingo Molnar Cc: Josh Poimboeuf Cc: Peter Zijlstra Cc: x86-ml Link: https://lkml.kernel.org/r/20190414160144.419653165@linutronix.de --- arch/x86/mm/cpu_entry_area.c | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/arch/x86/mm/cpu_entry_area.c b/arch/x86/mm/cpu_entry_area.c index 19c6abf9ea31..c2a54f75d335 100644 --- a/arch/x86/mm/cpu_entry_area.c +++ b/arch/x86/mm/cpu_entry_area.c @@ -52,10 +52,10 @@ cea_map_percpu_pages(void *cea_vaddr, void *ptr, int pages, pgprot_t prot) cea_set_pte(cea_vaddr, per_cpu_ptr_to_phys(ptr), prot); } -static void __init percpu_setup_debug_store(int cpu) +static void __init percpu_setup_debug_store(unsigned int cpu) { #ifdef CONFIG_CPU_SUP_INTEL - int npages; + unsigned int npages; void *cea; if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) @@ -79,8 +79,9 @@ static void __init percpu_setup_debug_store(int cpu) } /* Setup the fixmap mappings only once per-processor */ -static void __init setup_cpu_entry_area(int cpu) +static void __init setup_cpu_entry_area(unsigned int cpu) { + struct cpu_entry_area *cea = get_cpu_entry_area(cpu); #ifdef CONFIG_X86_64 /* On 64-bit systems, we use a read-only fixmap GDT and TSS. */ pgprot_t gdt_prot = PAGE_KERNEL_RO; @@ -101,10 +102,9 @@ static void __init setup_cpu_entry_area(int cpu) pgprot_t tss_prot = PAGE_KERNEL; #endif - cea_set_pte(&get_cpu_entry_area(cpu)->gdt, get_cpu_gdt_paddr(cpu), - gdt_prot); + cea_set_pte(&cea->gdt, get_cpu_gdt_paddr(cpu), gdt_prot); - cea_map_percpu_pages(&get_cpu_entry_area(cpu)->entry_stack_page, + cea_map_percpu_pages(&cea->entry_stack_page, per_cpu_ptr(&entry_stack_storage, cpu), 1, PAGE_KERNEL); @@ -128,19 +128,18 @@ static void __init setup_cpu_entry_area(int cpu) BUILD_BUG_ON((offsetof(struct tss_struct, x86_tss) ^ offsetofend(struct tss_struct, x86_tss)) & PAGE_MASK); BUILD_BUG_ON(sizeof(struct tss_struct) % PAGE_SIZE != 0); - cea_map_percpu_pages(&get_cpu_entry_area(cpu)->tss, - &per_cpu(cpu_tss_rw, cpu), + cea_map_percpu_pages(&cea->tss, &per_cpu(cpu_tss_rw, cpu), sizeof(struct tss_struct) / PAGE_SIZE, tss_prot); #ifdef CONFIG_X86_32 - per_cpu(cpu_entry_area, cpu) = get_cpu_entry_area(cpu); + per_cpu(cpu_entry_area, cpu) = cea; #endif #ifdef CONFIG_X86_64 BUILD_BUG_ON(sizeof(exception_stacks) % PAGE_SIZE != 0); BUILD_BUG_ON(sizeof(exception_stacks) != sizeof(((struct cpu_entry_area *)0)->exception_stacks)); - cea_map_percpu_pages(&get_cpu_entry_area(cpu)->exception_stacks, + cea_map_percpu_pages(&cea->exception_stacks, &per_cpu(exception_stacks, cpu), sizeof(exception_stacks) / PAGE_SIZE, PAGE_KERNEL); #endif -- cgit v1.2.3 From 019b17b3ffe48100e52f609ca1c6ed6e5a40cba1 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 14 Apr 2019 17:59:47 +0200 Subject: x86/exceptions: Add structs for exception stacks At the moment everything assumes a full linear mapping of the various exception stacks. Adding guard pages to the cpu entry area mapping of the exception stacks will break that assumption. As a preparatory step convert both the real storage and the effective mapping in the cpu entry area from character arrays to structures. To ensure that both arrays have the same ordering and the same size of the individual stacks fill the members with a macro. The guard size is the only difference between the two resulting structures. For now both have guard size 0 until the preparation of all usage sites is done. Provide a couple of helper macros which are used in the following conversions. Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Reviewed-by: Sean Christopherson Cc: Andy Lutomirski Cc: "Chang S. Bae" Cc: Dave Hansen Cc: Dominik Brodowski Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: Josh Poimboeuf Cc: Konrad Rzeszutek Wilk Cc: Peter Zijlstra Cc: x86-ml Link: https://lkml.kernel.org/r/20190414160144.506807893@linutronix.de --- arch/x86/include/asm/cpu_entry_area.h | 52 +++++++++++++++++++++++++++++++---- arch/x86/kernel/cpu/common.c | 2 +- arch/x86/mm/cpu_entry_area.c | 8 ++---- 3 files changed, 51 insertions(+), 11 deletions(-) diff --git a/arch/x86/include/asm/cpu_entry_area.h b/arch/x86/include/asm/cpu_entry_area.h index 29c706415443..af8c312673de 100644 --- a/arch/x86/include/asm/cpu_entry_area.h +++ b/arch/x86/include/asm/cpu_entry_area.h @@ -7,6 +7,51 @@ #include #include +#ifdef CONFIG_X86_64 + +/* Macro to enforce the same ordering and stack sizes */ +#define ESTACKS_MEMBERS(guardsize) \ + char DF_stack_guard[guardsize]; \ + char DF_stack[EXCEPTION_STKSZ]; \ + char NMI_stack_guard[guardsize]; \ + char NMI_stack[EXCEPTION_STKSZ]; \ + char DB_stack_guard[guardsize]; \ + char DB_stack[DEBUG_STKSZ]; \ + char MCE_stack_guard[guardsize]; \ + char MCE_stack[EXCEPTION_STKSZ]; \ + char IST_top_guard[guardsize]; \ + +/* The exception stacks' physical storage. No guard pages required */ +struct exception_stacks { + ESTACKS_MEMBERS(0) +}; + +/* + * The effective cpu entry area mapping with guard pages. Guard size is + * zero until the code which makes assumptions about linear mappings is + * cleaned up. + */ +struct cea_exception_stacks { + ESTACKS_MEMBERS(0) +}; + +#define CEA_ESTACK_SIZE(st) \ + sizeof(((struct cea_exception_stacks *)0)->st## _stack) + +#define CEA_ESTACK_BOT(ceastp, st) \ + ((unsigned long)&(ceastp)->st## _stack) + +#define CEA_ESTACK_TOP(ceastp, st) \ + (CEA_ESTACK_BOT(ceastp, st) + CEA_ESTACK_SIZE(st)) + +#define CEA_ESTACK_OFFS(st) \ + offsetof(struct cea_exception_stacks, st## _stack) + +#define CEA_ESTACK_PAGES \ + (sizeof(struct cea_exception_stacks) / PAGE_SIZE) + +#endif + /* * cpu_entry_area is a percpu region that contains things needed by the CPU * and early entry/exit code. Real types aren't used for all fields here @@ -32,12 +77,9 @@ struct cpu_entry_area { #ifdef CONFIG_X86_64 /* - * Exception stacks used for IST entries. - * - * In the future, this should have a separate slot for each stack - * with guard pages between them. + * Exception stacks used for IST entries with guard pages. */ - char exception_stacks[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ]; + struct cea_exception_stacks estacks; #endif #ifdef CONFIG_CPU_SUP_INTEL /* diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 0e4cb718fc4a..24b801ea7522 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1754,7 +1754,7 @@ void cpu_init(void) * set up and load the per-CPU TSS */ if (!oist->ist[0]) { - char *estacks = get_cpu_entry_area(cpu)->exception_stacks; + char *estacks = (char *)&get_cpu_entry_area(cpu)->estacks; for (v = 0; v < N_EXCEPTION_STACKS; v++) { estacks += exception_stack_sizes[v]; diff --git a/arch/x86/mm/cpu_entry_area.c b/arch/x86/mm/cpu_entry_area.c index c2a54f75d335..6a09b84c13fe 100644 --- a/arch/x86/mm/cpu_entry_area.c +++ b/arch/x86/mm/cpu_entry_area.c @@ -13,8 +13,7 @@ static DEFINE_PER_CPU_PAGE_ALIGNED(struct entry_stack_page, entry_stack_storage); #ifdef CONFIG_X86_64 -static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks - [(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ]); +static DEFINE_PER_CPU_PAGE_ALIGNED(struct exception_stacks, exception_stacks); #endif struct cpu_entry_area *get_cpu_entry_area(int cpu) @@ -138,9 +137,8 @@ static void __init setup_cpu_entry_area(unsigned int cpu) #ifdef CONFIG_X86_64 BUILD_BUG_ON(sizeof(exception_stacks) % PAGE_SIZE != 0); BUILD_BUG_ON(sizeof(exception_stacks) != - sizeof(((struct cpu_entry_area *)0)->exception_stacks)); - cea_map_percpu_pages(&cea->exception_stacks, - &per_cpu(exception_stacks, cpu), + sizeof(((struct cpu_entry_area *)0)->estacks)); + cea_map_percpu_pages(&cea->estacks, &per_cpu(exception_stacks, cpu), sizeof(exception_stacks) / PAGE_SIZE, PAGE_KERNEL); #endif percpu_setup_debug_store(cpu); -- cgit v1.2.3 From a4af767ae59cc579569bbfe49513a0037d5989ee Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 14 Apr 2019 17:59:48 +0200 Subject: x86/cpu_entry_area: Prepare for IST guard pages To allow guard pages between the IST stacks each stack needs to be mapped individually. Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Cc: "H. Peter Anvin" Cc: Andy Lutomirski Cc: Dave Hansen Cc: Ingo Molnar Cc: Josh Poimboeuf Cc: Peter Zijlstra Cc: Sean Christopherson Cc: x86-ml Link: https://lkml.kernel.org/r/20190414160144.592691557@linutronix.de --- arch/x86/mm/cpu_entry_area.c | 37 ++++++++++++++++++++++++++++++------- 1 file changed, 30 insertions(+), 7 deletions(-) diff --git a/arch/x86/mm/cpu_entry_area.c b/arch/x86/mm/cpu_entry_area.c index 6a09b84c13fe..2b1407662a6d 100644 --- a/arch/x86/mm/cpu_entry_area.c +++ b/arch/x86/mm/cpu_entry_area.c @@ -77,6 +77,34 @@ static void __init percpu_setup_debug_store(unsigned int cpu) #endif } +#ifdef CONFIG_X86_64 + +#define cea_map_stack(name) do { \ + npages = sizeof(estacks->name## _stack) / PAGE_SIZE; \ + cea_map_percpu_pages(cea->estacks.name## _stack, \ + estacks->name## _stack, npages, PAGE_KERNEL); \ + } while (0) + +static void __init percpu_setup_exception_stacks(unsigned int cpu) +{ + struct exception_stacks *estacks = per_cpu_ptr(&exception_stacks, cpu); + struct cpu_entry_area *cea = get_cpu_entry_area(cpu); + unsigned int npages; + + BUILD_BUG_ON(sizeof(exception_stacks) % PAGE_SIZE != 0); + /* + * The exceptions stack mappings in the per cpu area are protected + * by guard pages so each stack must be mapped separately. + */ + cea_map_stack(DF); + cea_map_stack(NMI); + cea_map_stack(DB); + cea_map_stack(MCE); +} +#else +static inline void percpu_setup_exception_stacks(unsigned int cpu) {} +#endif + /* Setup the fixmap mappings only once per-processor */ static void __init setup_cpu_entry_area(unsigned int cpu) { @@ -134,13 +162,8 @@ static void __init setup_cpu_entry_area(unsigned int cpu) per_cpu(cpu_entry_area, cpu) = cea; #endif -#ifdef CONFIG_X86_64 - BUILD_BUG_ON(sizeof(exception_stacks) % PAGE_SIZE != 0); - BUILD_BUG_ON(sizeof(exception_stacks) != - sizeof(((struct cpu_entry_area *)0)->estacks)); - cea_map_percpu_pages(&cea->estacks, &per_cpu(exception_stacks, cpu), - sizeof(exception_stacks) / PAGE_SIZE, PAGE_KERNEL); -#endif + percpu_setup_exception_stacks(cpu); + percpu_setup_debug_store(cpu); } -- cgit v1.2.3 From 7623f37e411156e6e09b95cf5c76e509c5fda640 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 14 Apr 2019 17:59:49 +0200 Subject: x86/cpu_entry_area: Provide exception stack accessor Store a pointer to the per cpu entry area exception stack mappings to allow fast retrieval. Required for converting various places from using the shadow IST array to directly doing address calculations on the actual mapping address. Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Cc: "H. Peter Anvin" Cc: Andy Lutomirski Cc: Dave Hansen Cc: Ingo Molnar Cc: Josh Poimboeuf Cc: Peter Zijlstra Cc: Sean Christopherson Cc: x86-ml Link: https://lkml.kernel.org/r/20190414160144.680960459@linutronix.de --- arch/x86/include/asm/cpu_entry_area.h | 4 ++++ arch/x86/mm/cpu_entry_area.c | 4 ++++ 2 files changed, 8 insertions(+) diff --git a/arch/x86/include/asm/cpu_entry_area.h b/arch/x86/include/asm/cpu_entry_area.h index af8c312673de..9b406f067ecf 100644 --- a/arch/x86/include/asm/cpu_entry_area.h +++ b/arch/x86/include/asm/cpu_entry_area.h @@ -99,6 +99,7 @@ struct cpu_entry_area { #define CPU_ENTRY_AREA_TOT_SIZE (CPU_ENTRY_AREA_SIZE * NR_CPUS) DECLARE_PER_CPU(struct cpu_entry_area *, cpu_entry_area); +DECLARE_PER_CPU(struct cea_exception_stacks *, cea_exception_stacks); extern void setup_cpu_entry_areas(void); extern void cea_set_pte(void *cea_vaddr, phys_addr_t pa, pgprot_t flags); @@ -118,4 +119,7 @@ static inline struct entry_stack *cpu_entry_stack(int cpu) return &get_cpu_entry_area(cpu)->entry_stack_page.stack; } +#define __this_cpu_ist_top_va(name) \ + CEA_ESTACK_TOP(__this_cpu_read(cea_exception_stacks), name) + #endif diff --git a/arch/x86/mm/cpu_entry_area.c b/arch/x86/mm/cpu_entry_area.c index 2b1407662a6d..a00d0d059c8a 100644 --- a/arch/x86/mm/cpu_entry_area.c +++ b/arch/x86/mm/cpu_entry_area.c @@ -14,6 +14,7 @@ static DEFINE_PER_CPU_PAGE_ALIGNED(struct entry_stack_page, entry_stack_storage) #ifdef CONFIG_X86_64 static DEFINE_PER_CPU_PAGE_ALIGNED(struct exception_stacks, exception_stacks); +DEFINE_PER_CPU(struct cea_exception_stacks*, cea_exception_stacks); #endif struct cpu_entry_area *get_cpu_entry_area(int cpu) @@ -92,6 +93,9 @@ static void __init percpu_setup_exception_stacks(unsigned int cpu) unsigned int npages; BUILD_BUG_ON(sizeof(exception_stacks) % PAGE_SIZE != 0); + + per_cpu(cea_exception_stacks, cpu) = &cea->estacks; + /* * The exceptions stack mappings in the per cpu area are protected * by guard pages so each stack must be mapped separately. -- cgit v1.2.3 From d876b67343a648f3613506c7dbfed088fa0c875b Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 14 Apr 2019 17:59:50 +0200 Subject: x86/traps: Use cpu_entry_area instead of orig_ist The orig_ist[] array is a shadow copy of the IST array in the TSS. The reason why it exists is that older kernels used two TSS variants with different pointers into the debug stack. orig_ist[] contains the real starting points. There is no point anymore to do so because the same information can be retrieved using the base address of the cpu entry area mapping and the offsets of the various exception stacks. No functional change. Preparation for removing orig_ist. Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Cc: "H. Peter Anvin" Cc: Andy Lutomirski Cc: Dave Hansen Cc: Ingo Molnar Cc: Josh Poimboeuf Cc: Peter Zijlstra Cc: Sean Christopherson Cc: x86-ml Link: https://lkml.kernel.org/r/20190414160144.784487230@linutronix.de --- arch/x86/mm/fault.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 0524e1d74f24..06c089513d39 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -28,6 +28,7 @@ #include /* vma_pkey() */ #include /* efi_recover_from_page_fault()*/ #include /* store_idt(), ... */ +#include /* exception stack */ #define CREATE_TRACE_POINTS #include @@ -793,7 +794,7 @@ no_context(struct pt_regs *regs, unsigned long error_code, if (is_vmalloc_addr((void *)address) && (((unsigned long)tsk->stack - 1 - address < PAGE_SIZE) || address - ((unsigned long)tsk->stack + THREAD_SIZE) < PAGE_SIZE)) { - unsigned long stack = this_cpu_read(orig_ist.ist[ESTACK_DF]) - sizeof(void *); + unsigned long stack = __this_cpu_ist_top_va(DF) - sizeof(void *); /* * We're likely to be running with very little stack space * left. It's plausible that we'd hit this condition but -- cgit v1.2.3 From bf5882abab773afd1277415e2f826b21de28f30d Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 14 Apr 2019 17:59:51 +0200 Subject: x86/irq/64: Use cpu entry area instead of orig_ist The orig_ist[] array is a shadow copy of the IST array in the TSS. The reason why it exists is that older kernels used two TSS variants with different pointers into the debug stack. orig_ist[] contains the real starting points. There is no point anymore to do so because the same information can be retrieved using the base address of the cpu entry area mapping and the offsets of the various exception stacks. No functional change. Preparation for removing orig_ist. Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Cc: "H. Peter Anvin" Cc: Andy Lutomirski Cc: Ingo Molnar Cc: Josh Poimboeuf Cc: Nicolai Stange Cc: Sean Christopherson Cc: x86-ml Link: https://lkml.kernel.org/r/20190414160144.885741626@linutronix.de --- arch/x86/kernel/irq_64.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c index 182e8b245e06..7eb6f8d11bfd 100644 --- a/arch/x86/kernel/irq_64.c +++ b/arch/x86/kernel/irq_64.c @@ -18,6 +18,8 @@ #include #include #include + +#include #include #include @@ -43,10 +45,9 @@ static inline void stack_overflow_check(struct pt_regs *regs) { #ifdef CONFIG_DEBUG_STACKOVERFLOW #define STACK_MARGIN 128 - struct orig_ist *oist; - u64 irq_stack_top, irq_stack_bottom; - u64 estack_top, estack_bottom; + u64 irq_stack_top, irq_stack_bottom, estack_top, estack_bottom; u64 curbase = (u64)task_stack_page(current); + struct cea_exception_stacks *estacks; if (user_mode(regs)) return; @@ -60,9 +61,9 @@ static inline void stack_overflow_check(struct pt_regs *regs) if (regs->sp >= irq_stack_bottom && regs->sp <= irq_stack_top) return; - oist = this_cpu_ptr(&orig_ist); - estack_top = (u64)oist->ist[ESTACK_DB]; - estack_bottom = estack_top - DEBUG_STKSZ + STACK_MARGIN; + estacks = __this_cpu_read(cea_exception_stacks); + estack_top = CEA_ESTACK_TOP(estacks, DB); + estack_bottom = CEA_ESTACK_BOT(estacks, DB) + STACK_MARGIN; if (regs->sp >= estack_bottom && regs->sp <= estack_top) return; -- cgit v1.2.3 From afcd21dad88b68d646e91ed36948117d58b4c197 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 14 Apr 2019 17:59:52 +0200 Subject: x86/dumpstack/64: Use cpu_entry_area instead of orig_ist The orig_ist[] array is a shadow copy of the IST array in the TSS. The reason why it exists is that older kernels used two TSS variants with different pointers into the debug stack. orig_ist[] contains the real starting points. There is no point anymore to do so because the same information can be retrieved using the base address of the cpu entry area mapping and the offsets of the various exception stacks. No functional change. Preparation for removing orig_ist. Cc: Josh Poimboeuf Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Cc: "H. Peter Anvin" Cc: Andy Lutomirski Cc: Ingo Molnar Cc: Josh Poimboeuf Cc: Sean Christopherson Cc: x86-ml Link: https://lkml.kernel.org/r/20190414160144.974900463@linutronix.de --- arch/x86/kernel/dumpstack_64.c | 39 +++++++++++++++++++++++++++------------ 1 file changed, 27 insertions(+), 12 deletions(-) diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c index 455b47ef9250..f6fbd0438f9e 100644 --- a/arch/x86/kernel/dumpstack_64.c +++ b/arch/x86/kernel/dumpstack_64.c @@ -16,6 +16,7 @@ #include #include +#include #include static const char *exception_stack_names[N_EXCEPTION_STACKS] = { @@ -25,11 +26,6 @@ static const char *exception_stack_names[N_EXCEPTION_STACKS] = { [ ESTACK_MCE ] = "#MC", }; -static const unsigned long exception_stack_sizes[N_EXCEPTION_STACKS] = { - [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STKSZ, - [ESTACK_DB] = DEBUG_STKSZ -}; - const char *stack_type_name(enum stack_type type) { BUILD_BUG_ON(N_EXCEPTION_STACKS != 4); @@ -52,25 +48,44 @@ const char *stack_type_name(enum stack_type type) return NULL; } +struct estack_layout { + unsigned int begin; + unsigned int end; +}; + +#define ESTACK_ENTRY(x) { \ + .begin = offsetof(struct cea_exception_stacks, x## _stack), \ + .end = offsetof(struct cea_exception_stacks, x## _stack_guard) \ + } + +static const struct estack_layout layout[N_EXCEPTION_STACKS] = { + [ ESTACK_DF ] = ESTACK_ENTRY(DF), + [ ESTACK_NMI ] = ESTACK_ENTRY(NMI), + [ ESTACK_DB ] = ESTACK_ENTRY(DB), + [ ESTACK_MCE ] = ESTACK_ENTRY(MCE), +}; + static bool in_exception_stack(unsigned long *stack, struct stack_info *info) { - unsigned long *begin, *end; + unsigned long estacks, begin, end, stk = (unsigned long)stack; struct pt_regs *regs; - unsigned k; + unsigned int k; BUILD_BUG_ON(N_EXCEPTION_STACKS != 4); + estacks = (unsigned long)__this_cpu_read(cea_exception_stacks); + for (k = 0; k < N_EXCEPTION_STACKS; k++) { - end = (unsigned long *)raw_cpu_ptr(&orig_ist)->ist[k]; - begin = end - (exception_stack_sizes[k] / sizeof(long)); + begin = estacks + layout[k].begin; + end = estacks + layout[k].end; regs = (struct pt_regs *)end - 1; - if (stack < begin || stack >= end) + if (stk < begin || stk >= end) continue; info->type = STACK_TYPE_EXCEPTION + k; - info->begin = begin; - info->end = end; + info->begin = (unsigned long *)begin; + info->end = (unsigned long *)end; info->next_sp = (unsigned long *)regs->sp; return true; -- cgit v1.2.3 From f6ef73224a0f0400c3979c8bc68b383f9d2eb9d8 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 14 Apr 2019 17:59:53 +0200 Subject: x86/cpu: Prepare TSS.IST setup for guard pages Convert the TSS.IST setup code to use the cpu entry area information directly instead of assuming a linear mapping of the IST stacks. The store to orig_ist[] is no longer required as there are no users anymore. This is the last preparatory step towards IST guard pages. Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Cc: Andy Lutomirski Cc: "Chang S. Bae" Cc: Dominik Brodowski Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: Josh Poimboeuf Cc: Konrad Rzeszutek Wilk Cc: Peter Zijlstra Cc: Sean Christopherson Cc: x86-ml Link: https://lkml.kernel.org/r/20190414160145.061686012@linutronix.de --- arch/x86/kernel/cpu/common.c | 35 +++++++---------------------------- 1 file changed, 7 insertions(+), 28 deletions(-) diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 24b801ea7522..4b01b71415f5 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -507,19 +507,6 @@ void load_percpu_segment(int cpu) DEFINE_PER_CPU(struct cpu_entry_area *, cpu_entry_area); #endif -#ifdef CONFIG_X86_64 -/* - * Special IST stacks which the CPU switches to when it calls - * an IST-marked descriptor entry. Up to 7 stacks (hardware - * limit), all of them are 4K, except the debug stack which - * is 8K. - */ -static const unsigned int exception_stack_sizes[N_EXCEPTION_STACKS] = { - [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STKSZ, - [ESTACK_DB] = DEBUG_STKSZ -}; -#endif - /* Load the original GDT from the per-cpu structure */ void load_direct_gdt(int cpu) { @@ -1690,17 +1677,14 @@ static void setup_getcpu(int cpu) * initialized (naturally) in the bootstrap process, such as the GDT * and IDT. We reload them nevertheless, this function acts as a * 'CPU state barrier', nothing should get across. - * A lot of state is already set up in PDA init for 64 bit */ #ifdef CONFIG_X86_64 void cpu_init(void) { - struct orig_ist *oist; + int cpu = raw_smp_processor_id(); struct task_struct *me; struct tss_struct *t; - unsigned long v; - int cpu = raw_smp_processor_id(); int i; wait_for_master_cpu(cpu); @@ -1715,7 +1699,6 @@ void cpu_init(void) load_ucode_ap(); t = &per_cpu(cpu_tss_rw, cpu); - oist = &per_cpu(orig_ist, cpu); #ifdef CONFIG_NUMA if (this_cpu_read(numa_node) == 0 && @@ -1753,16 +1736,12 @@ void cpu_init(void) /* * set up and load the per-CPU TSS */ - if (!oist->ist[0]) { - char *estacks = (char *)&get_cpu_entry_area(cpu)->estacks; - - for (v = 0; v < N_EXCEPTION_STACKS; v++) { - estacks += exception_stack_sizes[v]; - oist->ist[v] = t->x86_tss.ist[v] = - (unsigned long)estacks; - if (v == ESTACK_DB) - per_cpu(debug_stack_addr, cpu) = (unsigned long)estacks; - } + if (!t->x86_tss.ist[0]) { + t->x86_tss.ist[ESTACK_DF] = __this_cpu_ist_top_va(DF); + t->x86_tss.ist[ESTACK_NMI] = __this_cpu_ist_top_va(NMI); + t->x86_tss.ist[ESTACK_DB] = __this_cpu_ist_top_va(DB); + t->x86_tss.ist[ESTACK_MCE] = __this_cpu_ist_top_va(MCE); + per_cpu(debug_stack_addr, cpu) = t->x86_tss.ist[ESTACK_DB]; } t->x86_tss.io_bitmap_base = IO_BITMAP_OFFSET; -- cgit v1.2.3 From 4d68c3d0ecd5fcba8876e8a58ac41ffb360de43e Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 14 Apr 2019 17:59:54 +0200 Subject: x86/cpu: Remove orig_ist array All users gone. Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Cc: Andrew Morton Cc: Andy Lutomirski Cc: "Chang S. Bae" Cc: Dave Hansen Cc: Dominik Brodowski Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: Jiri Kosina Cc: Josh Poimboeuf Cc: Juergen Gross Cc: Konrad Rzeszutek Wilk Cc: Nick Desaulniers Cc: Peter Zijlstra Cc: Pingfan Liu Cc: Pu Wen Cc: Sean Christopherson Cc: Vlastimil Babka Cc: x86-ml Link: https://lkml.kernel.org/r/20190414160145.151435667@linutronix.de --- arch/x86/include/asm/processor.h | 9 --------- arch/x86/kernel/cpu/common.c | 6 ------ 2 files changed, 15 deletions(-) diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 2bb3a648fc12..8fcfcd1a8375 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -374,16 +374,7 @@ DECLARE_PER_CPU(unsigned long, cpu_current_top_of_stack); #define cpu_current_top_of_stack cpu_tss_rw.x86_tss.sp1 #endif -/* - * Save the original ist values for checking stack pointers during debugging - */ -struct orig_ist { - unsigned long ist[7]; -}; - #ifdef CONFIG_X86_64 -DECLARE_PER_CPU(struct orig_ist, orig_ist); - union irq_stack_union { char irq_stack[IRQ_STACK_SIZE]; /* diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 4b01b71415f5..8243f198fb7f 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1549,12 +1549,6 @@ void syscall_init(void) X86_EFLAGS_IOPL|X86_EFLAGS_AC|X86_EFLAGS_NT); } -/* - * Copies of the original ist values from the tss are only accessed during - * debugging, no special alignment required. - */ -DEFINE_PER_CPU(struct orig_ist, orig_ist); - static DEFINE_PER_CPU(unsigned long, debug_stack_addr); DEFINE_PER_CPU(int, debug_stack_usage); -- cgit v1.2.3 From 3207426925d2b4da390be8068df1d1c2b36e5918 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 14 Apr 2019 17:59:55 +0200 Subject: x86/exceptions: Disconnect IST index and stack order The entry order of the TSS.IST array and the order of the stack storage/mapping are not required to be the same. With the upcoming split of the debug stack this is going to fall apart as the number of TSS.IST array entries stays the same while the actual stacks are increasing. Make them separate so that code like dumpstack can just utilize the mapping order. The IST index is solely required for the actual TSS.IST array initialization. Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Cc: Andy Lutomirski Cc: Baoquan He Cc: "Chang S. Bae" Cc: Dominik Brodowski Cc: Dou Liyang Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: Jann Horn Cc: Josh Poimboeuf Cc: Kees Cook Cc: "Kirill A. Shutemov" Cc: Konrad Rzeszutek Wilk Cc: Nicolai Stange Cc: Peter Zijlstra Cc: Qian Cai Cc: Sean Christopherson Cc: x86-ml Link: https://lkml.kernel.org/r/20190414160145.241588113@linutronix.de --- arch/x86/entry/entry_64.S | 2 +- arch/x86/include/asm/cpu_entry_area.h | 11 +++++++++++ arch/x86/include/asm/page_64_types.h | 9 ++++----- arch/x86/include/asm/stacktrace.h | 2 ++ arch/x86/kernel/cpu/common.c | 10 +++++----- arch/x86/kernel/idt.c | 8 ++++---- 6 files changed, 27 insertions(+), 15 deletions(-) diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index fd0a50452cb3..5c0348504a4b 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -1129,7 +1129,7 @@ apicinterrupt3 HYPERV_STIMER0_VECTOR \ hv_stimer0_callback_vector hv_stimer0_vector_handler #endif /* CONFIG_HYPERV */ -idtentry debug do_debug has_error_code=0 paranoid=1 shift_ist=ESTACK_DB +idtentry debug do_debug has_error_code=0 paranoid=1 shift_ist=IST_INDEX_DB idtentry int3 do_int3 has_error_code=0 idtentry stack_segment do_stack_segment has_error_code=1 diff --git a/arch/x86/include/asm/cpu_entry_area.h b/arch/x86/include/asm/cpu_entry_area.h index 9b406f067ecf..310eeb62d418 100644 --- a/arch/x86/include/asm/cpu_entry_area.h +++ b/arch/x86/include/asm/cpu_entry_area.h @@ -35,6 +35,17 @@ struct cea_exception_stacks { ESTACKS_MEMBERS(0) }; +/* + * The exception stack ordering in [cea_]exception_stacks + */ +enum exception_stack_ordering { + ESTACK_DF, + ESTACK_NMI, + ESTACK_DB, + ESTACK_MCE, + N_EXCEPTION_STACKS +}; + #define CEA_ESTACK_SIZE(st) \ sizeof(((struct cea_exception_stacks *)0)->st## _stack) diff --git a/arch/x86/include/asm/page_64_types.h b/arch/x86/include/asm/page_64_types.h index 6ab2c54c1bf9..056de887b220 100644 --- a/arch/x86/include/asm/page_64_types.h +++ b/arch/x86/include/asm/page_64_types.h @@ -27,11 +27,10 @@ /* * The index for the tss.ist[] array. The hardware limit is 7 entries. */ -#define ESTACK_DF 0 -#define ESTACK_NMI 1 -#define ESTACK_DB 2 -#define ESTACK_MCE 3 -#define N_EXCEPTION_STACKS 4 +#define IST_INDEX_DF 0 +#define IST_INDEX_NMI 1 +#define IST_INDEX_DB 2 +#define IST_INDEX_MCE 3 /* * Set __PAGE_OFFSET to the most negative possible address + diff --git a/arch/x86/include/asm/stacktrace.h b/arch/x86/include/asm/stacktrace.h index f335aad404a4..d6d758a187b6 100644 --- a/arch/x86/include/asm/stacktrace.h +++ b/arch/x86/include/asm/stacktrace.h @@ -9,6 +9,8 @@ #include #include + +#include #include enum stack_type { diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 8243f198fb7f..143aceaf9a9a 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1731,11 +1731,11 @@ void cpu_init(void) * set up and load the per-CPU TSS */ if (!t->x86_tss.ist[0]) { - t->x86_tss.ist[ESTACK_DF] = __this_cpu_ist_top_va(DF); - t->x86_tss.ist[ESTACK_NMI] = __this_cpu_ist_top_va(NMI); - t->x86_tss.ist[ESTACK_DB] = __this_cpu_ist_top_va(DB); - t->x86_tss.ist[ESTACK_MCE] = __this_cpu_ist_top_va(MCE); - per_cpu(debug_stack_addr, cpu) = t->x86_tss.ist[ESTACK_DB]; + t->x86_tss.ist[IST_INDEX_DF] = __this_cpu_ist_top_va(DF); + t->x86_tss.ist[IST_INDEX_NMI] = __this_cpu_ist_top_va(NMI); + t->x86_tss.ist[IST_INDEX_DB] = __this_cpu_ist_top_va(DB); + t->x86_tss.ist[IST_INDEX_MCE] = __this_cpu_ist_top_va(MCE); + per_cpu(debug_stack_addr, cpu) = t->x86_tss.ist[IST_INDEX_DB]; } t->x86_tss.io_bitmap_base = IO_BITMAP_OFFSET; diff --git a/arch/x86/kernel/idt.c b/arch/x86/kernel/idt.c index 2188f734ec61..6d8917875f44 100644 --- a/arch/x86/kernel/idt.c +++ b/arch/x86/kernel/idt.c @@ -183,11 +183,11 @@ gate_desc debug_idt_table[IDT_ENTRIES] __page_aligned_bss; * cpu_init() when the TSS has been initialized. */ static const __initconst struct idt_data ist_idts[] = { - ISTG(X86_TRAP_DB, debug, ESTACK_DB), - ISTG(X86_TRAP_NMI, nmi, ESTACK_NMI), - ISTG(X86_TRAP_DF, double_fault, ESTACK_DF), + ISTG(X86_TRAP_DB, debug, IST_INDEX_DB), + ISTG(X86_TRAP_NMI, nmi, IST_INDEX_NMI), + ISTG(X86_TRAP_DF, double_fault, IST_INDEX_DF), #ifdef CONFIG_X86_MCE - ISTG(X86_TRAP_MC, &machine_check, ESTACK_MCE), + ISTG(X86_TRAP_MC, &machine_check, IST_INDEX_MCE), #endif }; -- cgit v1.2.3 From 1bdb67e5aa2d5d43c48cb7d93393fcba276c9e71 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 14 Apr 2019 17:59:56 +0200 Subject: x86/exceptions: Enable IST guard pages All usage sites which expected that the exception stacks in the CPU entry area are mapped linearly are fixed up. Enable guard pages between the IST stacks. Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Cc: "H. Peter Anvin" Cc: Andy Lutomirski Cc: Ingo Molnar Cc: Josh Poimboeuf Cc: Sean Christopherson Cc: Thomas Gleixner Cc: x86-ml Link: https://lkml.kernel.org/r/20190414160145.349862042@linutronix.de --- arch/x86/include/asm/cpu_entry_area.h | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/arch/x86/include/asm/cpu_entry_area.h b/arch/x86/include/asm/cpu_entry_area.h index 310eeb62d418..9c96406e6d2b 100644 --- a/arch/x86/include/asm/cpu_entry_area.h +++ b/arch/x86/include/asm/cpu_entry_area.h @@ -26,13 +26,9 @@ struct exception_stacks { ESTACKS_MEMBERS(0) }; -/* - * The effective cpu entry area mapping with guard pages. Guard size is - * zero until the code which makes assumptions about linear mappings is - * cleaned up. - */ +/* The effective cpu entry area mapping with guard pages. */ struct cea_exception_stacks { - ESTACKS_MEMBERS(0) + ESTACKS_MEMBERS(PAGE_SIZE) }; /* -- cgit v1.2.3 From 2a594d4ccf3f10f80b77d71bd3dad10813ac0137 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 14 Apr 2019 17:59:57 +0200 Subject: x86/exceptions: Split debug IST stack The debug IST stack is actually two separate debug stacks to handle #DB recursion. This is required because the CPU starts always at top of stack on exception entry, which means on #DB recursion the second #DB would overwrite the stack of the first. The low level entry code therefore adjusts the top of stack on entry so a secondary #DB starts from a different stack page. But the stack pages are adjacent without a guard page between them. Split the debug stack into 3 stacks which are separated by guard pages. The 3rd stack is never mapped into the cpu_entry_area and is only there to catch triple #DB nesting: --- top of DB_stack <- Initial stack --- end of DB_stack guard page --- top of DB1_stack <- Top of stack after entering first #DB --- end of DB1_stack guard page --- top of DB2_stack <- Top of stack after entering second #DB --- end of DB2_stack guard page If DB2 would not act as the final guard hole, a second #DB would point the top of #DB stack to the stack below #DB1 which would be valid and not catch the not so desired triple nesting. The backing store does not allocate any memory for DB2 and its guard page as it is not going to be mapped into the cpu_entry_area. - Adjust the low level entry code so it adjusts top of #DB with the offset between the stacks instead of exception stack size. - Make the dumpstack code aware of the new stacks. - Adjust the in_debug_stack() implementation and move it into the NMI code where it belongs. As this is NMI hotpath code, it just checks the full area between top of DB_stack and bottom of DB1_stack without checking for the guard page. That's correct because the NMI cannot hit a stackpointer pointing to the guard page between DB and DB1 stack. Even if it would, then the NMI operation still is unaffected, but the resume of the debug exception on the topmost DB stack will crash by touching the guard page. [ bp: Make exception_stack_names static const char * const ] Suggested-by: Andy Lutomirski Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Reviewed-by: Sean Christopherson Cc: Andy Lutomirski Cc: Baoquan He Cc: "Chang S. Bae" Cc: Dave Hansen Cc: Dominik Brodowski Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: Joerg Roedel Cc: Jonathan Corbet Cc: Josh Poimboeuf Cc: Juergen Gross Cc: "Kirill A. Shutemov" Cc: Konrad Rzeszutek Wilk Cc: linux-doc@vger.kernel.org Cc: Masahiro Yamada Cc: Peter Zijlstra Cc: Qian Cai Cc: Sean Christopherson Cc: x86-ml Link: https://lkml.kernel.org/r/20190414160145.439944544@linutronix.de --- Documentation/x86/kernel-stacks | 7 ++++++- arch/x86/entry/entry_64.S | 8 ++++---- arch/x86/include/asm/cpu_entry_area.h | 14 ++++++++++---- arch/x86/include/asm/debugreg.h | 2 -- arch/x86/include/asm/page_64_types.h | 3 --- arch/x86/kernel/asm-offsets_64.c | 2 ++ arch/x86/kernel/cpu/common.c | 11 ----------- arch/x86/kernel/dumpstack_64.c | 12 ++++++++---- arch/x86/kernel/nmi.c | 20 +++++++++++++++++++- arch/x86/mm/cpu_entry_area.c | 4 +++- 10 files changed, 52 insertions(+), 31 deletions(-) diff --git a/Documentation/x86/kernel-stacks b/Documentation/x86/kernel-stacks index 1b04596caea9..d1bfb0b95ee0 100644 --- a/Documentation/x86/kernel-stacks +++ b/Documentation/x86/kernel-stacks @@ -76,7 +76,7 @@ The currently assigned IST stacks are :- middle of switching stacks. Using IST for NMI events avoids making assumptions about the previous state of the kernel stack. -* ESTACK_DB. DEBUG_STKSZ +* ESTACK_DB. EXCEPTION_STKSZ (PAGE_SIZE). Used for hardware debug interrupts (interrupt 1) and for software debug interrupts (INT3). @@ -86,6 +86,11 @@ The currently assigned IST stacks are :- avoids making assumptions about the previous state of the kernel stack. + To handle nested #DB correctly there exist two instances of DB stacks. On + #DB entry the IST stackpointer for #DB is switched to the second instance + so a nested #DB starts from a clean stack. The nested #DB switches + the IST stackpointer to a guard hole to catch triple nesting. + * ESTACK_MCE. EXCEPTION_STKSZ (PAGE_SIZE). Used for interrupt 18 - Machine Check Exception (#MC). diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 5c0348504a4b..ee649f1f279e 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -879,7 +879,7 @@ apicinterrupt IRQ_WORK_VECTOR irq_work_interrupt smp_irq_work_interrupt * @paranoid == 2 is special: the stub will never switch stacks. This is for * #DF: if the thread stack is somehow unusable, we'll still get a useful OOPS. */ -.macro idtentry sym do_sym has_error_code:req paranoid=0 shift_ist=-1 +.macro idtentry sym do_sym has_error_code:req paranoid=0 shift_ist=-1 ist_offset=0 ENTRY(\sym) UNWIND_HINT_IRET_REGS offset=\has_error_code*8 @@ -925,13 +925,13 @@ ENTRY(\sym) .endif .if \shift_ist != -1 - subq $EXCEPTION_STKSZ, CPU_TSS_IST(\shift_ist) + subq $\ist_offset, CPU_TSS_IST(\shift_ist) .endif call \do_sym .if \shift_ist != -1 - addq $EXCEPTION_STKSZ, CPU_TSS_IST(\shift_ist) + addq $\ist_offset, CPU_TSS_IST(\shift_ist) .endif /* these procedures expect "no swapgs" flag in ebx */ @@ -1129,7 +1129,7 @@ apicinterrupt3 HYPERV_STIMER0_VECTOR \ hv_stimer0_callback_vector hv_stimer0_vector_handler #endif /* CONFIG_HYPERV */ -idtentry debug do_debug has_error_code=0 paranoid=1 shift_ist=IST_INDEX_DB +idtentry debug do_debug has_error_code=0 paranoid=1 shift_ist=IST_INDEX_DB ist_offset=DB_STACK_OFFSET idtentry int3 do_int3 has_error_code=0 idtentry stack_segment do_stack_segment has_error_code=1 diff --git a/arch/x86/include/asm/cpu_entry_area.h b/arch/x86/include/asm/cpu_entry_area.h index 9c96406e6d2b..cff3f3f3bfe0 100644 --- a/arch/x86/include/asm/cpu_entry_area.h +++ b/arch/x86/include/asm/cpu_entry_area.h @@ -10,25 +10,29 @@ #ifdef CONFIG_X86_64 /* Macro to enforce the same ordering and stack sizes */ -#define ESTACKS_MEMBERS(guardsize) \ +#define ESTACKS_MEMBERS(guardsize, db2_holesize)\ char DF_stack_guard[guardsize]; \ char DF_stack[EXCEPTION_STKSZ]; \ char NMI_stack_guard[guardsize]; \ char NMI_stack[EXCEPTION_STKSZ]; \ + char DB2_stack_guard[guardsize]; \ + char DB2_stack[db2_holesize]; \ + char DB1_stack_guard[guardsize]; \ + char DB1_stack[EXCEPTION_STKSZ]; \ char DB_stack_guard[guardsize]; \ - char DB_stack[DEBUG_STKSZ]; \ + char DB_stack[EXCEPTION_STKSZ]; \ char MCE_stack_guard[guardsize]; \ char MCE_stack[EXCEPTION_STKSZ]; \ char IST_top_guard[guardsize]; \ /* The exception stacks' physical storage. No guard pages required */ struct exception_stacks { - ESTACKS_MEMBERS(0) + ESTACKS_MEMBERS(0, 0) }; /* The effective cpu entry area mapping with guard pages. */ struct cea_exception_stacks { - ESTACKS_MEMBERS(PAGE_SIZE) + ESTACKS_MEMBERS(PAGE_SIZE, EXCEPTION_STKSZ) }; /* @@ -37,6 +41,8 @@ struct cea_exception_stacks { enum exception_stack_ordering { ESTACK_DF, ESTACK_NMI, + ESTACK_DB2, + ESTACK_DB1, ESTACK_DB, ESTACK_MCE, N_EXCEPTION_STACKS diff --git a/arch/x86/include/asm/debugreg.h b/arch/x86/include/asm/debugreg.h index 9e5ca30738e5..1a8609a15856 100644 --- a/arch/x86/include/asm/debugreg.h +++ b/arch/x86/include/asm/debugreg.h @@ -104,11 +104,9 @@ static inline void debug_stack_usage_dec(void) { __this_cpu_dec(debug_stack_usage); } -int is_debug_stack(unsigned long addr); void debug_stack_set_zero(void); void debug_stack_reset(void); #else /* !X86_64 */ -static inline int is_debug_stack(unsigned long addr) { return 0; } static inline void debug_stack_set_zero(void) { } static inline void debug_stack_reset(void) { } static inline void debug_stack_usage_inc(void) { } diff --git a/arch/x86/include/asm/page_64_types.h b/arch/x86/include/asm/page_64_types.h index 056de887b220..793c14c372cb 100644 --- a/arch/x86/include/asm/page_64_types.h +++ b/arch/x86/include/asm/page_64_types.h @@ -18,9 +18,6 @@ #define EXCEPTION_STACK_ORDER (0 + KASAN_STACK_ORDER) #define EXCEPTION_STKSZ (PAGE_SIZE << EXCEPTION_STACK_ORDER) -#define DEBUG_STACK_ORDER (EXCEPTION_STACK_ORDER + 1) -#define DEBUG_STKSZ (PAGE_SIZE << DEBUG_STACK_ORDER) - #define IRQ_STACK_ORDER (2 + KASAN_STACK_ORDER) #define IRQ_STACK_SIZE (PAGE_SIZE << IRQ_STACK_ORDER) diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c index ddced33184b5..f5281567e28e 100644 --- a/arch/x86/kernel/asm-offsets_64.c +++ b/arch/x86/kernel/asm-offsets_64.c @@ -68,6 +68,8 @@ int main(void) #undef ENTRY OFFSET(TSS_ist, tss_struct, x86_tss.ist); + DEFINE(DB_STACK_OFFSET, offsetof(struct cea_exception_stacks, DB_stack) - + offsetof(struct cea_exception_stacks, DB1_stack)); BLANK(); #ifdef CONFIG_STACKPROTECTOR diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 143aceaf9a9a..88cab45707a9 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1549,17 +1549,7 @@ void syscall_init(void) X86_EFLAGS_IOPL|X86_EFLAGS_AC|X86_EFLAGS_NT); } -static DEFINE_PER_CPU(unsigned long, debug_stack_addr); DEFINE_PER_CPU(int, debug_stack_usage); - -int is_debug_stack(unsigned long addr) -{ - return __this_cpu_read(debug_stack_usage) || - (addr <= __this_cpu_read(debug_stack_addr) && - addr > (__this_cpu_read(debug_stack_addr) - DEBUG_STKSZ)); -} -NOKPROBE_SYMBOL(is_debug_stack); - DEFINE_PER_CPU(u32, debug_idt_ctr); void debug_stack_set_zero(void) @@ -1735,7 +1725,6 @@ void cpu_init(void) t->x86_tss.ist[IST_INDEX_NMI] = __this_cpu_ist_top_va(NMI); t->x86_tss.ist[IST_INDEX_DB] = __this_cpu_ist_top_va(DB); t->x86_tss.ist[IST_INDEX_MCE] = __this_cpu_ist_top_va(MCE); - per_cpu(debug_stack_addr, cpu) = t->x86_tss.ist[IST_INDEX_DB]; } t->x86_tss.io_bitmap_base = IO_BITMAP_OFFSET; diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c index f6fbd0438f9e..fca97bd3d8ae 100644 --- a/arch/x86/kernel/dumpstack_64.c +++ b/arch/x86/kernel/dumpstack_64.c @@ -19,16 +19,18 @@ #include #include -static const char *exception_stack_names[N_EXCEPTION_STACKS] = { +static const char * const exception_stack_names[] = { [ ESTACK_DF ] = "#DF", [ ESTACK_NMI ] = "NMI", + [ ESTACK_DB2 ] = "#DB2", + [ ESTACK_DB1 ] = "#DB1", [ ESTACK_DB ] = "#DB", [ ESTACK_MCE ] = "#MC", }; const char *stack_type_name(enum stack_type type) { - BUILD_BUG_ON(N_EXCEPTION_STACKS != 4); + BUILD_BUG_ON(N_EXCEPTION_STACKS != 6); if (type == STACK_TYPE_IRQ) return "IRQ"; @@ -58,9 +60,11 @@ struct estack_layout { .end = offsetof(struct cea_exception_stacks, x## _stack_guard) \ } -static const struct estack_layout layout[N_EXCEPTION_STACKS] = { +static const struct estack_layout layout[] = { [ ESTACK_DF ] = ESTACK_ENTRY(DF), [ ESTACK_NMI ] = ESTACK_ENTRY(NMI), + [ ESTACK_DB2 ] = { .begin = 0, .end = 0}, + [ ESTACK_DB1 ] = ESTACK_ENTRY(DB1), [ ESTACK_DB ] = ESTACK_ENTRY(DB), [ ESTACK_MCE ] = ESTACK_ENTRY(MCE), }; @@ -71,7 +75,7 @@ static bool in_exception_stack(unsigned long *stack, struct stack_info *info) struct pt_regs *regs; unsigned int k; - BUILD_BUG_ON(N_EXCEPTION_STACKS != 4); + BUILD_BUG_ON(N_EXCEPTION_STACKS != 6); estacks = (unsigned long)__this_cpu_read(cea_exception_stacks); diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c index 18bc9b51ac9b..3755d0310026 100644 --- a/arch/x86/kernel/nmi.c +++ b/arch/x86/kernel/nmi.c @@ -21,13 +21,14 @@ #include #include #include +#include #include #if defined(CONFIG_EDAC) #include #endif -#include +#include #include #include #include @@ -487,6 +488,23 @@ static DEFINE_PER_CPU(unsigned long, nmi_cr2); * switch back to the original IDT. */ static DEFINE_PER_CPU(int, update_debug_stack); + +static bool notrace is_debug_stack(unsigned long addr) +{ + struct cea_exception_stacks *cs = __this_cpu_read(cea_exception_stacks); + unsigned long top = CEA_ESTACK_TOP(cs, DB); + unsigned long bot = CEA_ESTACK_BOT(cs, DB1); + + if (__this_cpu_read(debug_stack_usage)) + return true; + /* + * Note, this covers the guard page between DB and DB1 as well to + * avoid two checks. But by all means @addr can never point into + * the guard page. + */ + return addr >= bot && addr < top; +} +NOKPROBE_SYMBOL(is_debug_stack); #endif dotraplinkage notrace void diff --git a/arch/x86/mm/cpu_entry_area.c b/arch/x86/mm/cpu_entry_area.c index a00d0d059c8a..752ad11d6868 100644 --- a/arch/x86/mm/cpu_entry_area.c +++ b/arch/x86/mm/cpu_entry_area.c @@ -98,10 +98,12 @@ static void __init percpu_setup_exception_stacks(unsigned int cpu) /* * The exceptions stack mappings in the per cpu area are protected - * by guard pages so each stack must be mapped separately. + * by guard pages so each stack must be mapped separately. DB2 is + * not mapped; it just exists to catch triple nesting of #DB. */ cea_map_stack(DF); cea_map_stack(NMI); + cea_map_stack(DB1); cea_map_stack(DB); cea_map_stack(MCE); } -- cgit v1.2.3 From c450c8f532b63475b30e29bc600c25ab0a4ab282 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 14 Apr 2019 17:59:58 +0200 Subject: x86/dumpstack/64: Speedup in_exception_stack() The current implementation of in_exception_stack() iterates over the exception stacks array. Most of the time this is an useless exercise, but even for the actual use cases (perf and ftrace) it takes at least 2 iterations to get to the NMI stack. As the exception stacks and the guard pages are page aligned the loop can be avoided completely. Add a initial check whether the stack pointer is inside the full exception stack area and leave early if not. Create a lookup table which describes the stack area. The table index is the page offset from the beginning of the exception stacks. So for any given stack pointer the page offset is computed and a lookup in the description table is performed. If it is inside a guard page, return. If not, use the descriptor to fill in the info structure. The table is filled at compile time and for the !KASAN case the interesting page descriptors exactly fit into a single cache line. Just the last guard page descriptor is in the next cacheline, but that should not be accessed in the regular case. Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Acked-by: Josh Poimboeuf Cc: "H. Peter Anvin" Cc: Andy Lutomirski Cc: Ingo Molnar Cc: Josh Poimboeuf Cc: Sean Christopherson Cc: x86-ml Link: https://lkml.kernel.org/r/20190414160145.543320386@linutronix.de --- arch/x86/kernel/dumpstack_64.c | 82 ++++++++++++++++++++++++++---------------- 1 file changed, 51 insertions(+), 31 deletions(-) diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c index fca97bd3d8ae..f356d3ea0c70 100644 --- a/arch/x86/kernel/dumpstack_64.c +++ b/arch/x86/kernel/dumpstack_64.c @@ -50,52 +50,72 @@ const char *stack_type_name(enum stack_type type) return NULL; } -struct estack_layout { - unsigned int begin; - unsigned int end; +/** + * struct estack_pages - Page descriptor for exception stacks + * @offs: Offset from the start of the exception stack area + * @size: Size of the exception stack + * @type: Type to store in the stack_info struct + */ +struct estack_pages { + u32 offs; + u16 size; + u16 type; }; -#define ESTACK_ENTRY(x) { \ - .begin = offsetof(struct cea_exception_stacks, x## _stack), \ - .end = offsetof(struct cea_exception_stacks, x## _stack_guard) \ - } +#define EPAGERANGE(st) \ + [PFN_DOWN(CEA_ESTACK_OFFS(st)) ... \ + PFN_DOWN(CEA_ESTACK_OFFS(st) + CEA_ESTACK_SIZE(st) - 1)] = { \ + .offs = CEA_ESTACK_OFFS(st), \ + .size = CEA_ESTACK_SIZE(st), \ + .type = STACK_TYPE_EXCEPTION + ESTACK_ ##st, } -static const struct estack_layout layout[] = { - [ ESTACK_DF ] = ESTACK_ENTRY(DF), - [ ESTACK_NMI ] = ESTACK_ENTRY(NMI), - [ ESTACK_DB2 ] = { .begin = 0, .end = 0}, - [ ESTACK_DB1 ] = ESTACK_ENTRY(DB1), - [ ESTACK_DB ] = ESTACK_ENTRY(DB), - [ ESTACK_MCE ] = ESTACK_ENTRY(MCE), +/* + * Array of exception stack page descriptors. If the stack is larger than + * PAGE_SIZE, all pages covering a particular stack will have the same + * info. The guard pages including the not mapped DB2 stack are zeroed + * out. + */ +static const +struct estack_pages estack_pages[CEA_ESTACK_PAGES] ____cacheline_aligned = { + EPAGERANGE(DF), + EPAGERANGE(NMI), + EPAGERANGE(DB1), + EPAGERANGE(DB), + EPAGERANGE(MCE), }; static bool in_exception_stack(unsigned long *stack, struct stack_info *info) { - unsigned long estacks, begin, end, stk = (unsigned long)stack; + unsigned long begin, end, stk = (unsigned long)stack; + const struct estack_pages *ep; struct pt_regs *regs; unsigned int k; BUILD_BUG_ON(N_EXCEPTION_STACKS != 6); - estacks = (unsigned long)__this_cpu_read(cea_exception_stacks); - - for (k = 0; k < N_EXCEPTION_STACKS; k++) { - begin = estacks + layout[k].begin; - end = estacks + layout[k].end; - regs = (struct pt_regs *)end - 1; + begin = (unsigned long)__this_cpu_read(cea_exception_stacks); + end = begin + sizeof(struct cea_exception_stacks); + /* Bail if @stack is outside the exception stack area. */ + if (stk < begin || stk >= end) + return false; - if (stk < begin || stk >= end) - continue; + /* Calc page offset from start of exception stacks */ + k = (stk - begin) >> PAGE_SHIFT; + /* Lookup the page descriptor */ + ep = &estack_pages[k]; + /* Guard page? */ + if (!ep->size) + return false; - info->type = STACK_TYPE_EXCEPTION + k; - info->begin = (unsigned long *)begin; - info->end = (unsigned long *)end; - info->next_sp = (unsigned long *)regs->sp; + begin += (unsigned long)ep->offs; + end = begin + (unsigned long)ep->size; + regs = (struct pt_regs *)end - 1; - return true; - } - - return false; + info->type = ep->type; + info->begin = (unsigned long *)begin; + info->end = (unsigned long *)end; + info->next_sp = (unsigned long *)regs->sp; + return true; } static bool in_irq_stack(unsigned long *stack, struct stack_info *info) -- cgit v1.2.3 From aa641c287b2f7676f6f0064a8351daf08eca6b0a Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 14 Apr 2019 17:59:59 +0200 Subject: x86/irq/32: Define IRQ_STACK_SIZE On 32-bit IRQ_STACK_SIZE is the same as THREAD_SIZE. To allow sharing struct irq_stack with 32-bit, define IRQ_STACK_SIZE for 32-bit and use it for struct irq_stack. No functional change. Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Cc: Andrew Morton Cc: Andy Lutomirski Cc: Dave Hansen Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: Jiri Kosina Cc: Josh Poimboeuf Cc: Juergen Gross Cc: Michal Hocko Cc: Nick Desaulniers Cc: Sean Christopherson Cc: Suravee Suthikulpanit Cc: Vlastimil Babka Cc: x86-ml Link: https://lkml.kernel.org/r/20190414160145.632513987@linutronix.de --- arch/x86/include/asm/page_32_types.h | 2 ++ arch/x86/include/asm/processor.h | 4 ++-- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/page_32_types.h b/arch/x86/include/asm/page_32_types.h index 57b5dc15ca7e..565ad755c785 100644 --- a/arch/x86/include/asm/page_32_types.h +++ b/arch/x86/include/asm/page_32_types.h @@ -22,6 +22,8 @@ #define THREAD_SIZE_ORDER 1 #define THREAD_SIZE (PAGE_SIZE << THREAD_SIZE_ORDER) +#define IRQ_STACK_SIZE THREAD_SIZE + #define N_EXCEPTION_STACKS 1 #ifdef CONFIG_X86_PAE diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 8fcfcd1a8375..8b4bf732e1b5 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -422,8 +422,8 @@ DECLARE_PER_CPU_ALIGNED(struct stack_canary, stack_canary); * per-CPU IRQ handling stacks */ struct irq_stack { - u32 stack[THREAD_SIZE/sizeof(u32)]; -} __aligned(THREAD_SIZE); + u32 stack[IRQ_STACK_SIZE / sizeof(u32)]; +} __aligned(IRQ_STACK_SIZE); DECLARE_PER_CPU(struct irq_stack *, hardirq_stack); DECLARE_PER_CPU(struct irq_stack *, softirq_stack); -- cgit v1.2.3 From 231c4846b106d526fa212b02b37447d3f2fcc99d Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 14 Apr 2019 18:00:00 +0200 Subject: x86/irq/32: Make irq stack a character array There is no reason to have an u32 array in struct irq_stack. The only purpose of the array is to size the struct properly. Preparatory change for sharing struct irq_stack with 64-bit. Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Cc: Alexey Dobriyan Cc: Andrew Morton Cc: Andy Lutomirski Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: Jiri Kosina Cc: Josh Poimboeuf Cc: Nick Desaulniers Cc: Pingfan Liu Cc: Pu Wen Cc: Sean Christopherson Cc: Vlastimil Babka Cc: x86-ml Link: https://lkml.kernel.org/r/20190414160145.736241969@linutronix.de --- arch/x86/include/asm/processor.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 8b4bf732e1b5..ff3f469ab2d4 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -422,7 +422,7 @@ DECLARE_PER_CPU_ALIGNED(struct stack_canary, stack_canary); * per-CPU IRQ handling stacks */ struct irq_stack { - u32 stack[IRQ_STACK_SIZE / sizeof(u32)]; + char stack[IRQ_STACK_SIZE]; } __aligned(IRQ_STACK_SIZE); DECLARE_PER_CPU(struct irq_stack *, hardirq_stack); -- cgit v1.2.3 From a754fe2b76d1d6bce7069657bba975034f3ad961 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 14 Apr 2019 18:00:01 +0200 Subject: x86/irq/32: Rename hard/softirq_stack to hard/softirq_stack_ptr The percpu storage holds a pointer to the stack not the stack itself. Rename it before sharing struct irq_stack with 64-bit. No functional changes. Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Cc: Andrew Morton Cc: Andy Lutomirski Cc: Dave Hansen Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: Jiri Kosina Cc: Josh Poimboeuf Cc: Juergen Gross Cc: Nick Desaulniers Cc: Nicolai Stange Cc: Sean Christopherson Cc: Vlastimil Babka Cc: x86-ml Link: https://lkml.kernel.org/r/20190414160145.824805922@linutronix.de --- arch/x86/include/asm/processor.h | 4 ++-- arch/x86/kernel/dumpstack_32.c | 4 ++-- arch/x86/kernel/irq_32.c | 19 ++++++++++--------- 3 files changed, 14 insertions(+), 13 deletions(-) diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index ff3f469ab2d4..0b7d866a1ad5 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -425,8 +425,8 @@ struct irq_stack { char stack[IRQ_STACK_SIZE]; } __aligned(IRQ_STACK_SIZE); -DECLARE_PER_CPU(struct irq_stack *, hardirq_stack); -DECLARE_PER_CPU(struct irq_stack *, softirq_stack); +DECLARE_PER_CPU(struct irq_stack *, hardirq_stack_ptr); +DECLARE_PER_CPU(struct irq_stack *, softirq_stack_ptr); #endif /* X86_64 */ extern unsigned int fpu_kernel_xstate_size; diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c index d305440ebe9c..64a59d726639 100644 --- a/arch/x86/kernel/dumpstack_32.c +++ b/arch/x86/kernel/dumpstack_32.c @@ -34,7 +34,7 @@ const char *stack_type_name(enum stack_type type) static bool in_hardirq_stack(unsigned long *stack, struct stack_info *info) { - unsigned long *begin = (unsigned long *)this_cpu_read(hardirq_stack); + unsigned long *begin = (unsigned long *)this_cpu_read(hardirq_stack_ptr); unsigned long *end = begin + (THREAD_SIZE / sizeof(long)); /* @@ -59,7 +59,7 @@ static bool in_hardirq_stack(unsigned long *stack, struct stack_info *info) static bool in_softirq_stack(unsigned long *stack, struct stack_info *info) { - unsigned long *begin = (unsigned long *)this_cpu_read(softirq_stack); + unsigned long *begin = (unsigned long *)this_cpu_read(softirq_stack_ptr); unsigned long *end = begin + (THREAD_SIZE / sizeof(long)); /* diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c index 95600a99ae93..f37489c806fa 100644 --- a/arch/x86/kernel/irq_32.c +++ b/arch/x86/kernel/irq_32.c @@ -51,8 +51,8 @@ static inline int check_stack_overflow(void) { return 0; } static inline void print_stack_overflow(void) { } #endif -DEFINE_PER_CPU(struct irq_stack *, hardirq_stack); -DEFINE_PER_CPU(struct irq_stack *, softirq_stack); +DEFINE_PER_CPU(struct irq_stack *, hardirq_stack_ptr); +DEFINE_PER_CPU(struct irq_stack *, softirq_stack_ptr); static void call_on_stack(void *func, void *stack) { @@ -76,7 +76,7 @@ static inline int execute_on_irq_stack(int overflow, struct irq_desc *desc) u32 *isp, *prev_esp, arg1; curstk = (struct irq_stack *) current_stack(); - irqstk = __this_cpu_read(hardirq_stack); + irqstk = __this_cpu_read(hardirq_stack_ptr); /* * this is where we switch to the IRQ stack. However, if we are @@ -113,21 +113,22 @@ void irq_ctx_init(int cpu) { struct irq_stack *irqstk; - if (per_cpu(hardirq_stack, cpu)) + if (per_cpu(hardirq_stack_ptr, cpu)) return; irqstk = page_address(alloc_pages_node(cpu_to_node(cpu), THREADINFO_GFP, THREAD_SIZE_ORDER)); - per_cpu(hardirq_stack, cpu) = irqstk; + per_cpu(hardirq_stack_ptr, cpu) = irqstk; irqstk = page_address(alloc_pages_node(cpu_to_node(cpu), THREADINFO_GFP, THREAD_SIZE_ORDER)); - per_cpu(softirq_stack, cpu) = irqstk; + per_cpu(softirq_stack_ptr, cpu) = irqstk; - printk(KERN_DEBUG "CPU %u irqstacks, hard=%p soft=%p\n", - cpu, per_cpu(hardirq_stack, cpu), per_cpu(softirq_stack, cpu)); + pr_debug("CPU %u irqstacks, hard=%p soft=%p\n", + cpu, per_cpu(hardirq_stack_ptr, cpu), + per_cpu(softirq_stack_ptr, cpu)); } void do_softirq_own_stack(void) @@ -135,7 +136,7 @@ void do_softirq_own_stack(void) struct irq_stack *irqstk; u32 *isp, *prev_esp; - irqstk = __this_cpu_read(softirq_stack); + irqstk = __this_cpu_read(softirq_stack_ptr); /* build the stack frame on the softirq stack */ isp = (u32 *) ((char *)irqstk + sizeof(*irqstk)); -- cgit v1.2.3 From 758a2e312228410f2f5092ade558109e93dc3ee8 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 14 Apr 2019 18:00:02 +0200 Subject: x86/irq/64: Rename irq_stack_ptr to hardirq_stack_ptr Preparatory patch to share code with 32bit. No functional changes. Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Cc: Alexey Dobriyan Cc: Andrew Morton Cc: Andy Lutomirski Cc: "Chang S. Bae" Cc: Dominik Brodowski Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: Jiri Kosina Cc: Josh Poimboeuf Cc: Konrad Rzeszutek Wilk Cc: Michal Hocko Cc: Mike Rapoport Cc: Nick Desaulniers Cc: Nicolai Stange Cc: Peter Zijlstra Cc: Pingfan Liu Cc: Sean Christopherson Cc: Stephen Rothwell Cc: Vlastimil Babka Cc: x86-ml Link: https://lkml.kernel.org/r/20190414160145.912584074@linutronix.de --- arch/x86/entry/entry_64.S | 2 +- arch/x86/include/asm/processor.h | 2 +- arch/x86/kernel/cpu/common.c | 2 +- arch/x86/kernel/dumpstack_64.c | 2 +- arch/x86/kernel/irq_64.c | 2 +- arch/x86/kernel/setup_percpu.c | 2 +- 6 files changed, 6 insertions(+), 6 deletions(-) diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index ee649f1f279e..726abbe6c6d8 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -431,7 +431,7 @@ END(irq_entries_start) */ movq \old_rsp, PER_CPU_VAR(irq_stack_union + IRQ_STACK_SIZE - 8) - movq PER_CPU_VAR(irq_stack_ptr), %rsp + movq PER_CPU_VAR(hardirq_stack_ptr), %rsp #ifdef CONFIG_DEBUG_ENTRY /* diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 0b7d866a1ad5..5e3dd4e2136d 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -396,7 +396,7 @@ static inline unsigned long cpu_kernelmode_gs_base(int cpu) return (unsigned long)per_cpu(irq_stack_union.gs_base, cpu); } -DECLARE_PER_CPU(char *, irq_stack_ptr); +DECLARE_PER_CPU(char *, hardirq_stack_ptr); DECLARE_PER_CPU(unsigned int, irq_count); extern asmlinkage void ignore_sysret(void); diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 88cab45707a9..13ec72bb8f36 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1510,7 +1510,7 @@ DEFINE_PER_CPU(struct task_struct *, current_task) ____cacheline_aligned = &init_task; EXPORT_PER_CPU_SYMBOL(current_task); -DEFINE_PER_CPU(char *, irq_stack_ptr) = +DEFINE_PER_CPU(char *, hardirq_stack_ptr) = init_per_cpu_var(irq_stack_union.irq_stack) + IRQ_STACK_SIZE; DEFINE_PER_CPU(unsigned int, irq_count) __visible = -1; diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c index f356d3ea0c70..753b8cfe8b8a 100644 --- a/arch/x86/kernel/dumpstack_64.c +++ b/arch/x86/kernel/dumpstack_64.c @@ -120,7 +120,7 @@ static bool in_exception_stack(unsigned long *stack, struct stack_info *info) static bool in_irq_stack(unsigned long *stack, struct stack_info *info) { - unsigned long *end = (unsigned long *)this_cpu_read(irq_stack_ptr); + unsigned long *end = (unsigned long *)this_cpu_read(hardirq_stack_ptr); unsigned long *begin = end - (IRQ_STACK_SIZE / sizeof(long)); /* diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c index 7eb6f8d11bfd..f0c7356c8969 100644 --- a/arch/x86/kernel/irq_64.c +++ b/arch/x86/kernel/irq_64.c @@ -56,7 +56,7 @@ static inline void stack_overflow_check(struct pt_regs *regs) regs->sp <= curbase + THREAD_SIZE) return; - irq_stack_top = (u64)__this_cpu_read(irq_stack_ptr); + irq_stack_top = (u64)__this_cpu_read(hardirq_stack_ptr); irq_stack_bottom = irq_stack_top - IRQ_STACK_SIZE + STACK_MARGIN; if (regs->sp >= irq_stack_bottom && regs->sp <= irq_stack_top) return; diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index 4bf46575568a..657343ecc2da 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c @@ -245,7 +245,7 @@ void __init setup_per_cpu_areas(void) early_per_cpu_map(x86_cpu_to_logical_apicid, cpu); #endif #ifdef CONFIG_X86_64 - per_cpu(irq_stack_ptr, cpu) = + per_cpu(hardirq_stack_ptr, cpu) = per_cpu(irq_stack_union.irq_stack, cpu) + IRQ_STACK_SIZE; #endif -- cgit v1.2.3 From 451f743a64e1cf979f5fe21a1b2a015feb559f72 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 14 Apr 2019 18:00:03 +0200 Subject: x86/irq/32: Invoke irq_ctx_init() from init_IRQ() irq_ctx_init() is invoked from native_init_IRQ() or from xen_init_IRQ() code. There is no reason to have this split. The interrupt stacks must be allocated no matter what. Invoke it from init_IRQ() before invoking the native or XEN init implementation. Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Reviewed-by: Juergen Gross Cc: Andy Lutomirski Cc: Boris Ostrovsky Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: Josh Abraham Cc: Josh Poimboeuf Cc: Juergen Gross Cc: Mike Rapoport Cc: Nicolai Stange Cc: Sean Christopherson Cc: Stefano Stabellini Cc: Stephen Rothwell Cc: x86-ml Cc: xen-devel@lists.xenproject.org Link: https://lkml.kernel.org/r/20190414160146.001162606@linutronix.de --- arch/x86/kernel/irqinit.c | 4 ++-- drivers/xen/events/events_base.c | 1 - 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c index a0693b71cfc1..26b5cb5386b9 100644 --- a/arch/x86/kernel/irqinit.c +++ b/arch/x86/kernel/irqinit.c @@ -91,6 +91,8 @@ void __init init_IRQ(void) for (i = 0; i < nr_legacy_irqs(); i++) per_cpu(vector_irq, 0)[ISA_IRQ_VECTOR(i)] = irq_to_desc(i); + irq_ctx_init(smp_processor_id()); + x86_init.irqs.intr_init(); } @@ -104,6 +106,4 @@ void __init native_init_IRQ(void) if (!acpi_ioapic && !of_ioapic && nr_legacy_irqs()) setup_irq(2, &irq2); - - irq_ctx_init(smp_processor_id()); } diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c index 117e76b2f939..084e45882c73 100644 --- a/drivers/xen/events/events_base.c +++ b/drivers/xen/events/events_base.c @@ -1687,7 +1687,6 @@ void __init xen_init_IRQ(void) #ifdef CONFIG_X86 if (xen_pv_domain()) { - irq_ctx_init(smp_processor_id()); if (xen_initial_domain()) pci_xen_initial_domain(); } -- cgit v1.2.3 From 66c7ceb47f628c8bd4f84a6d01c2725ded6a342d Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 14 Apr 2019 18:00:04 +0200 Subject: x86/irq/32: Handle irq stack allocation failure proper irq_ctx_init() crashes hard on page allocation failures. While that's ok during early boot, it's just wrong in the CPU hotplug bringup code. Check the page allocation failure and return -ENOMEM and handle it at the call sites. On early boot the only way out is to BUG(), but on CPU hotplug there is no reason to crash, so just abort the operation. Rename the function to something more sensible while at it. Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Cc: Alison Schofield Cc: Andrew Morton Cc: Andy Lutomirski Cc: Anshuman Khandual Cc: Boris Ostrovsky Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: Josh Poimboeuf Cc: Juergen Gross Cc: Konrad Rzeszutek Wilk Cc: Nicolai Stange Cc: Pu Wen Cc: Sean Christopherson Cc: Shaokun Zhang Cc: Stefano Stabellini Cc: Suravee Suthikulpanit Cc: x86-ml Cc: xen-devel@lists.xenproject.org Cc: Yazen Ghannam Cc: Yi Wang Cc: Zhenzhong Duan Link: https://lkml.kernel.org/r/20190414160146.089060584@linutronix.de --- arch/x86/include/asm/irq.h | 4 ++-- arch/x86/include/asm/smp.h | 2 +- arch/x86/kernel/irq_32.c | 32 ++++++++++++++++---------------- arch/x86/kernel/irqinit.c | 2 +- arch/x86/kernel/smpboot.c | 15 ++++++++++++--- arch/x86/xen/smp_pv.c | 4 +++- 6 files changed, 35 insertions(+), 24 deletions(-) diff --git a/arch/x86/include/asm/irq.h b/arch/x86/include/asm/irq.h index fbb16e6b6c18..d751e8440a6b 100644 --- a/arch/x86/include/asm/irq.h +++ b/arch/x86/include/asm/irq.h @@ -17,9 +17,9 @@ static inline int irq_canonicalize(int irq) } #ifdef CONFIG_X86_32 -extern void irq_ctx_init(int cpu); +extern int irq_init_percpu_irqstack(unsigned int cpu); #else -# define irq_ctx_init(cpu) do { } while (0) +static inline int irq_init_percpu_irqstack(unsigned int cpu) { return 0; } #endif #define __ARCH_HAS_DO_SOFTIRQ diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h index 2e95b6c1bca3..da545df207b2 100644 --- a/arch/x86/include/asm/smp.h +++ b/arch/x86/include/asm/smp.h @@ -131,7 +131,7 @@ void native_smp_prepare_boot_cpu(void); void native_smp_prepare_cpus(unsigned int max_cpus); void calculate_max_logical_packages(void); void native_smp_cpus_done(unsigned int max_cpus); -void common_cpu_up(unsigned int cpunum, struct task_struct *tidle); +int common_cpu_up(unsigned int cpunum, struct task_struct *tidle); int native_cpu_up(unsigned int cpunum, struct task_struct *tidle); int native_cpu_disable(void); int common_cpu_die(unsigned int cpu); diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c index f37489c806fa..fc34816c6f04 100644 --- a/arch/x86/kernel/irq_32.c +++ b/arch/x86/kernel/irq_32.c @@ -107,28 +107,28 @@ static inline int execute_on_irq_stack(int overflow, struct irq_desc *desc) } /* - * allocate per-cpu stacks for hardirq and for softirq processing + * Allocate per-cpu stacks for hardirq and softirq processing */ -void irq_ctx_init(int cpu) +int irq_init_percpu_irqstack(unsigned int cpu) { - struct irq_stack *irqstk; + int node = cpu_to_node(cpu); + struct page *ph, *ps; if (per_cpu(hardirq_stack_ptr, cpu)) - return; - - irqstk = page_address(alloc_pages_node(cpu_to_node(cpu), - THREADINFO_GFP, - THREAD_SIZE_ORDER)); - per_cpu(hardirq_stack_ptr, cpu) = irqstk; + return 0; - irqstk = page_address(alloc_pages_node(cpu_to_node(cpu), - THREADINFO_GFP, - THREAD_SIZE_ORDER)); - per_cpu(softirq_stack_ptr, cpu) = irqstk; + ph = alloc_pages_node(node, THREADINFO_GFP, THREAD_SIZE_ORDER); + if (!ph) + return -ENOMEM; + ps = alloc_pages_node(node, THREADINFO_GFP, THREAD_SIZE_ORDER); + if (!ps) { + __free_pages(ph, THREAD_SIZE_ORDER); + return -ENOMEM; + } - pr_debug("CPU %u irqstacks, hard=%p soft=%p\n", - cpu, per_cpu(hardirq_stack_ptr, cpu), - per_cpu(softirq_stack_ptr, cpu)); + per_cpu(hardirq_stack_ptr, cpu) = page_address(ph); + per_cpu(softirq_stack_ptr, cpu) = page_address(ps); + return 0; } void do_softirq_own_stack(void) diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c index 26b5cb5386b9..16919a9671fa 100644 --- a/arch/x86/kernel/irqinit.c +++ b/arch/x86/kernel/irqinit.c @@ -91,7 +91,7 @@ void __init init_IRQ(void) for (i = 0; i < nr_legacy_irqs(); i++) per_cpu(vector_irq, 0)[ISA_IRQ_VECTOR(i)] = irq_to_desc(i); - irq_ctx_init(smp_processor_id()); + BUG_ON(irq_init_percpu_irqstack(smp_processor_id())); x86_init.irqs.intr_init(); } diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index ce1a67b70168..c92b21f9e9dc 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -935,20 +935,27 @@ out: return boot_error; } -void common_cpu_up(unsigned int cpu, struct task_struct *idle) +int common_cpu_up(unsigned int cpu, struct task_struct *idle) { + int ret; + /* Just in case we booted with a single CPU. */ alternatives_enable_smp(); per_cpu(current_task, cpu) = idle; + /* Initialize the interrupt stack(s) */ + ret = irq_init_percpu_irqstack(cpu); + if (ret) + return ret; + #ifdef CONFIG_X86_32 /* Stack for startup_32 can be just as for start_secondary onwards */ - irq_ctx_init(cpu); per_cpu(cpu_current_top_of_stack, cpu) = task_top_of_stack(idle); #else initial_gs = per_cpu_offset(cpu); #endif + return 0; } /* @@ -1106,7 +1113,9 @@ int native_cpu_up(unsigned int cpu, struct task_struct *tidle) /* the FPU context is blank, nobody can own it */ per_cpu(fpu_fpregs_owner_ctx, cpu) = NULL; - common_cpu_up(cpu, tidle); + err = common_cpu_up(cpu, tidle); + if (err) + return err; err = do_boot_cpu(apicid, cpu, tidle, &cpu0_nmi_registered); if (err) { diff --git a/arch/x86/xen/smp_pv.c b/arch/x86/xen/smp_pv.c index 145506f9fdbe..590fcf863006 100644 --- a/arch/x86/xen/smp_pv.c +++ b/arch/x86/xen/smp_pv.c @@ -361,7 +361,9 @@ static int xen_pv_cpu_up(unsigned int cpu, struct task_struct *idle) { int rc; - common_cpu_up(cpu, idle); + rc = common_cpu_up(cpu, idle); + if (rc) + return rc; xen_setup_runstate_info(cpu); -- cgit v1.2.3 From 0ac26104208450d35c4e68754ce0c67b3a4d7802 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 14 Apr 2019 18:00:05 +0200 Subject: x86/irq/64: Init hardirq_stack_ptr during CPU hotplug Preparatory change for disentangling the irq stack union as a prerequisite for irq stacks with guard pages. Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Cc: Andy Lutomirski Cc: "Chang S. Bae" Cc: Dominik Brodowski Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: Josh Poimboeuf Cc: Konrad Rzeszutek Wilk Cc: Nicolai Stange Cc: Pavel Tatashin Cc: Peter Zijlstra Cc: Sean Christopherson Cc: x86-ml Cc: Yi Wang Link: https://lkml.kernel.org/r/20190414160146.177558566@linutronix.de --- arch/x86/include/asm/irq.h | 4 ---- arch/x86/kernel/cpu/common.c | 4 +--- arch/x86/kernel/irq_64.c | 15 +++++++++++++++ 3 files changed, 16 insertions(+), 7 deletions(-) diff --git a/arch/x86/include/asm/irq.h b/arch/x86/include/asm/irq.h index d751e8440a6b..8f95686ec27e 100644 --- a/arch/x86/include/asm/irq.h +++ b/arch/x86/include/asm/irq.h @@ -16,11 +16,7 @@ static inline int irq_canonicalize(int irq) return ((irq == 2) ? 9 : irq); } -#ifdef CONFIG_X86_32 extern int irq_init_percpu_irqstack(unsigned int cpu); -#else -static inline int irq_init_percpu_irqstack(unsigned int cpu) { return 0; } -#endif #define __ARCH_HAS_DO_SOFTIRQ diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 13ec72bb8f36..1222080838da 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1510,9 +1510,7 @@ DEFINE_PER_CPU(struct task_struct *, current_task) ____cacheline_aligned = &init_task; EXPORT_PER_CPU_SYMBOL(current_task); -DEFINE_PER_CPU(char *, hardirq_stack_ptr) = - init_per_cpu_var(irq_stack_union.irq_stack) + IRQ_STACK_SIZE; - +DEFINE_PER_CPU(char *, hardirq_stack_ptr); DEFINE_PER_CPU(unsigned int, irq_count) __visible = -1; DEFINE_PER_CPU(int, __preempt_count) = INIT_PREEMPT_COUNT; diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c index f0c7356c8969..c0bea0d7d76a 100644 --- a/arch/x86/kernel/irq_64.c +++ b/arch/x86/kernel/irq_64.c @@ -87,3 +87,18 @@ bool handle_irq(struct irq_desc *desc, struct pt_regs *regs) generic_handle_irq_desc(desc); return true; } + +static int map_irq_stack(unsigned int cpu) +{ + void *va = per_cpu_ptr(irq_stack_union.irq_stack, cpu); + + per_cpu(hardirq_stack_ptr, cpu) = va + IRQ_STACK_SIZE; + return 0; +} + +int irq_init_percpu_irqstack(unsigned int cpu) +{ + if (per_cpu(hardirq_stack_ptr, cpu)) + return 0; + return map_irq_stack(cpu); +} -- cgit v1.2.3 From e6401c13093173aad709a5c6de00cf8d692ee786 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Sun, 14 Apr 2019 18:00:06 +0200 Subject: x86/irq/64: Split the IRQ stack into its own pages MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently, the IRQ stack is hardcoded as the first page of the percpu area, and the stack canary lives on the IRQ stack. The former gets in the way of adding an IRQ stack guard page, and the latter is a potential weakness in the stack canary mechanism. Split the IRQ stack into its own private percpu pages. [ tglx: Make 64 and 32 bit share struct irq_stack ] Signed-off-by: Andy Lutomirski Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Cc: Alexey Dobriyan Cc: Andrew Morton Cc: Ard Biesheuvel Cc: Boris Ostrovsky Cc: Brijesh Singh Cc: "Chang S. Bae" Cc: Dominik Brodowski Cc: Feng Tang Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: Jan Beulich Cc: Jiri Kosina Cc: Joerg Roedel Cc: Jordan Borgner Cc: Josh Poimboeuf Cc: Juergen Gross Cc: Konrad Rzeszutek Wilk Cc: Maran Wilson Cc: Masahiro Yamada Cc: Michal Hocko Cc: Mike Rapoport Cc: Nick Desaulniers Cc: Nicolai Stange Cc: Peter Zijlstra Cc: Pu Wen Cc: "Rafael Ávila de Espíndola" Cc: Sean Christopherson Cc: Stefano Stabellini Cc: Vlastimil Babka Cc: x86-ml Cc: xen-devel@lists.xenproject.org Link: https://lkml.kernel.org/r/20190414160146.267376656@linutronix.de --- arch/x86/entry/entry_64.S | 4 ++-- arch/x86/include/asm/processor.h | 32 ++++++++++++++------------------ arch/x86/include/asm/stackprotector.h | 6 +++--- arch/x86/kernel/asm-offsets_64.c | 2 +- arch/x86/kernel/cpu/common.c | 8 ++++---- arch/x86/kernel/head_64.S | 2 +- arch/x86/kernel/irq_64.c | 5 ++++- arch/x86/kernel/setup_percpu.c | 5 ----- arch/x86/kernel/vmlinux.lds.S | 7 ++++--- arch/x86/tools/relocs.c | 2 +- arch/x86/xen/xen-head.S | 10 +++++----- 11 files changed, 39 insertions(+), 44 deletions(-) diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 726abbe6c6d8..cfe4d6ea258d 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -298,7 +298,7 @@ ENTRY(__switch_to_asm) #ifdef CONFIG_STACKPROTECTOR movq TASK_stack_canary(%rsi), %rbx - movq %rbx, PER_CPU_VAR(irq_stack_union)+stack_canary_offset + movq %rbx, PER_CPU_VAR(fixed_percpu_data) + stack_canary_offset #endif #ifdef CONFIG_RETPOLINE @@ -430,7 +430,7 @@ END(irq_entries_start) * it before we actually move ourselves to the IRQ stack. */ - movq \old_rsp, PER_CPU_VAR(irq_stack_union + IRQ_STACK_SIZE - 8) + movq \old_rsp, PER_CPU_VAR(irq_stack_backing_store + IRQ_STACK_SIZE - 8) movq PER_CPU_VAR(hardirq_stack_ptr), %rsp #ifdef CONFIG_DEBUG_ENTRY diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 5e3dd4e2136d..7e99ef67bff0 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -367,6 +367,13 @@ DECLARE_PER_CPU_PAGE_ALIGNED(struct tss_struct, cpu_tss_rw); #define __KERNEL_TSS_LIMIT \ (IO_BITMAP_OFFSET + IO_BITMAP_BYTES + sizeof(unsigned long) - 1) +/* Per CPU interrupt stacks */ +struct irq_stack { + char stack[IRQ_STACK_SIZE]; +} __aligned(IRQ_STACK_SIZE); + +DECLARE_PER_CPU(struct irq_stack *, hardirq_stack_ptr); + #ifdef CONFIG_X86_32 DECLARE_PER_CPU(unsigned long, cpu_current_top_of_stack); #else @@ -375,28 +382,24 @@ DECLARE_PER_CPU(unsigned long, cpu_current_top_of_stack); #endif #ifdef CONFIG_X86_64 -union irq_stack_union { - char irq_stack[IRQ_STACK_SIZE]; +struct fixed_percpu_data { /* * GCC hardcodes the stack canary as %gs:40. Since the * irq_stack is the object at %gs:0, we reserve the bottom * 48 bytes of the irq stack for the canary. */ - struct { - char gs_base[40]; - unsigned long stack_canary; - }; + char gs_base[40]; + unsigned long stack_canary; }; -DECLARE_PER_CPU_FIRST(union irq_stack_union, irq_stack_union) __visible; -DECLARE_INIT_PER_CPU(irq_stack_union); +DECLARE_PER_CPU_FIRST(struct fixed_percpu_data, fixed_percpu_data) __visible; +DECLARE_INIT_PER_CPU(fixed_percpu_data); static inline unsigned long cpu_kernelmode_gs_base(int cpu) { - return (unsigned long)per_cpu(irq_stack_union.gs_base, cpu); + return (unsigned long)per_cpu(fixed_percpu_data.gs_base, cpu); } -DECLARE_PER_CPU(char *, hardirq_stack_ptr); DECLARE_PER_CPU(unsigned int, irq_count); extern asmlinkage void ignore_sysret(void); @@ -418,14 +421,7 @@ struct stack_canary { }; DECLARE_PER_CPU_ALIGNED(struct stack_canary, stack_canary); #endif -/* - * per-CPU IRQ handling stacks - */ -struct irq_stack { - char stack[IRQ_STACK_SIZE]; -} __aligned(IRQ_STACK_SIZE); - -DECLARE_PER_CPU(struct irq_stack *, hardirq_stack_ptr); +/* Per CPU softirq stack pointer */ DECLARE_PER_CPU(struct irq_stack *, softirq_stack_ptr); #endif /* X86_64 */ diff --git a/arch/x86/include/asm/stackprotector.h b/arch/x86/include/asm/stackprotector.h index 8ec97a62c245..91e29b6a86a5 100644 --- a/arch/x86/include/asm/stackprotector.h +++ b/arch/x86/include/asm/stackprotector.h @@ -13,7 +13,7 @@ * On x86_64, %gs is shared by percpu area and stack canary. All * percpu symbols are zero based and %gs points to the base of percpu * area. The first occupant of the percpu area is always - * irq_stack_union which contains stack_canary at offset 40. Userland + * fixed_percpu_data which contains stack_canary at offset 40. Userland * %gs is always saved and restored on kernel entry and exit using * swapgs, so stack protector doesn't add any complexity there. * @@ -64,7 +64,7 @@ static __always_inline void boot_init_stack_canary(void) u64 tsc; #ifdef CONFIG_X86_64 - BUILD_BUG_ON(offsetof(union irq_stack_union, stack_canary) != 40); + BUILD_BUG_ON(offsetof(struct fixed_percpu_data, stack_canary) != 40); #endif /* * We both use the random pool and the current TSC as a source @@ -79,7 +79,7 @@ static __always_inline void boot_init_stack_canary(void) current->stack_canary = canary; #ifdef CONFIG_X86_64 - this_cpu_write(irq_stack_union.stack_canary, canary); + this_cpu_write(fixed_percpu_data.stack_canary, canary); #else this_cpu_write(stack_canary.canary, canary); #endif diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c index f5281567e28e..d3d075226c0a 100644 --- a/arch/x86/kernel/asm-offsets_64.c +++ b/arch/x86/kernel/asm-offsets_64.c @@ -73,7 +73,7 @@ int main(void) BLANK(); #ifdef CONFIG_STACKPROTECTOR - DEFINE(stack_canary_offset, offsetof(union irq_stack_union, stack_canary)); + DEFINE(stack_canary_offset, offsetof(struct fixed_percpu_data, stack_canary)); BLANK(); #endif diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 1222080838da..801c6f040faa 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1498,9 +1498,9 @@ static __init int setup_clearcpuid(char *arg) __setup("clearcpuid=", setup_clearcpuid); #ifdef CONFIG_X86_64 -DEFINE_PER_CPU_FIRST(union irq_stack_union, - irq_stack_union) __aligned(PAGE_SIZE) __visible; -EXPORT_PER_CPU_SYMBOL_GPL(irq_stack_union); +DEFINE_PER_CPU_FIRST(struct fixed_percpu_data, + fixed_percpu_data) __aligned(PAGE_SIZE) __visible; +EXPORT_PER_CPU_SYMBOL_GPL(fixed_percpu_data); /* * The following percpu variables are hot. Align current_task to @@ -1510,7 +1510,7 @@ DEFINE_PER_CPU(struct task_struct *, current_task) ____cacheline_aligned = &init_task; EXPORT_PER_CPU_SYMBOL(current_task); -DEFINE_PER_CPU(char *, hardirq_stack_ptr); +DEFINE_PER_CPU(struct irq_stack *, hardirq_stack_ptr); DEFINE_PER_CPU(unsigned int, irq_count) __visible = -1; DEFINE_PER_CPU(int, __preempt_count) = INIT_PREEMPT_COUNT; diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index d1dbe8e4eb82..bcd206c8ac90 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S @@ -265,7 +265,7 @@ ENDPROC(start_cpu0) GLOBAL(initial_code) .quad x86_64_start_kernel GLOBAL(initial_gs) - .quad INIT_PER_CPU_VAR(irq_stack_union) + .quad INIT_PER_CPU_VAR(fixed_percpu_data) GLOBAL(initial_stack) /* * The SIZEOF_PTREGS gap is a convention which helps the in-kernel diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c index c0bea0d7d76a..c0f89d136b80 100644 --- a/arch/x86/kernel/irq_64.c +++ b/arch/x86/kernel/irq_64.c @@ -23,6 +23,9 @@ #include #include +DEFINE_PER_CPU_PAGE_ALIGNED(struct irq_stack, irq_stack_backing_store) __visible; +DECLARE_INIT_PER_CPU(irq_stack_backing_store); + int sysctl_panic_on_stackoverflow; /* @@ -90,7 +93,7 @@ bool handle_irq(struct irq_desc *desc, struct pt_regs *regs) static int map_irq_stack(unsigned int cpu) { - void *va = per_cpu_ptr(irq_stack_union.irq_stack, cpu); + void *va = per_cpu_ptr(&irq_stack_backing_store, cpu); per_cpu(hardirq_stack_ptr, cpu) = va + IRQ_STACK_SIZE; return 0; diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index 657343ecc2da..86663874ef04 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c @@ -244,11 +244,6 @@ void __init setup_per_cpu_areas(void) per_cpu(x86_cpu_to_logical_apicid, cpu) = early_per_cpu_map(x86_cpu_to_logical_apicid, cpu); #endif -#ifdef CONFIG_X86_64 - per_cpu(hardirq_stack_ptr, cpu) = - per_cpu(irq_stack_union.irq_stack, cpu) + - IRQ_STACK_SIZE; -#endif #ifdef CONFIG_NUMA per_cpu(x86_cpu_to_node_map, cpu) = early_per_cpu_map(x86_cpu_to_node_map, cpu); diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index bad8c51fee6e..a5af9a7c4be4 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S @@ -403,7 +403,8 @@ SECTIONS */ #define INIT_PER_CPU(x) init_per_cpu__##x = ABSOLUTE(x) + __per_cpu_load INIT_PER_CPU(gdt_page); -INIT_PER_CPU(irq_stack_union); +INIT_PER_CPU(fixed_percpu_data); +INIT_PER_CPU(irq_stack_backing_store); /* * Build-time check on the image size: @@ -412,8 +413,8 @@ INIT_PER_CPU(irq_stack_union); "kernel image bigger than KERNEL_IMAGE_SIZE"); #ifdef CONFIG_SMP -. = ASSERT((irq_stack_union == 0), - "irq_stack_union is not at start of per-cpu area"); +. = ASSERT((fixed_percpu_data == 0), + "fixed_percpu_data is not at start of per-cpu area"); #endif #endif /* CONFIG_X86_32 */ diff --git a/arch/x86/tools/relocs.c b/arch/x86/tools/relocs.c index b629f6992d9f..efa483205e43 100644 --- a/arch/x86/tools/relocs.c +++ b/arch/x86/tools/relocs.c @@ -738,7 +738,7 @@ static void percpu_init(void) * __per_cpu_load * * The "gold" linker incorrectly associates: - * init_per_cpu__irq_stack_union + * init_per_cpu__fixed_percpu_data * init_per_cpu__gdt_page */ static int is_percpu_sym(ElfW(Sym) *sym, const char *symname) diff --git a/arch/x86/xen/xen-head.S b/arch/x86/xen/xen-head.S index 5077ead5e59c..c1d8b90aa4e2 100644 --- a/arch/x86/xen/xen-head.S +++ b/arch/x86/xen/xen-head.S @@ -40,13 +40,13 @@ ENTRY(startup_xen) #ifdef CONFIG_X86_64 /* Set up %gs. * - * The base of %gs always points to the bottom of the irqstack - * union. If the stack protector canary is enabled, it is - * located at %gs:40. Note that, on SMP, the boot cpu uses - * init data section till per cpu areas are set up. + * The base of %gs always points to fixed_percpu_data. If the + * stack protector canary is enabled, it is located at %gs:40. + * Note that, on SMP, the boot cpu uses init data section until + * the per cpu areas are set up. */ movl $MSR_GS_BASE,%ecx - movq $INIT_PER_CPU_VAR(irq_stack_union),%rax + movq $INIT_PER_CPU_VAR(fixed_percpu_data),%rax cdq wrmsr #endif -- cgit v1.2.3 From 18b7a6bef62de1d598fbff23b52114b7775ecf00 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Sun, 14 Apr 2019 18:00:07 +0200 Subject: x86/irq/64: Remap the IRQ stack with guard pages The IRQ stack lives in percpu space, so an IRQ handler that overflows it will overwrite other data structures. Use vmap() to remap the IRQ stack so that it will have the usual guard pages that vmap()/vmalloc() allocations have. With this, the kernel will panic immediately on an IRQ stack overflow. [ tglx: Move the map code to a proper place and invoke it only when a CPU is about to be brought online. No point in installing the map at early boot for all possible CPUs. Fail the CPU bringup if the vmap() fails as done for all other preparatory stages in CPU hotplug. ] Signed-off-by: Andy Lutomirski Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Cc: "H. Peter Anvin" Cc: Andy Lutomirski Cc: Ingo Molnar Cc: Josh Poimboeuf Cc: Nicolai Stange Cc: Sean Christopherson Cc: x86-ml Link: https://lkml.kernel.org/r/20190414160146.363733568@linutronix.de --- arch/x86/kernel/irq_64.c | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c index c0f89d136b80..f107eb2021f6 100644 --- a/arch/x86/kernel/irq_64.c +++ b/arch/x86/kernel/irq_64.c @@ -91,6 +91,35 @@ bool handle_irq(struct irq_desc *desc, struct pt_regs *regs) return true; } +#ifdef CONFIG_VMAP_STACK +/* + * VMAP the backing store with guard pages + */ +static int map_irq_stack(unsigned int cpu) +{ + char *stack = (char *)per_cpu_ptr(&irq_stack_backing_store, cpu); + struct page *pages[IRQ_STACK_SIZE / PAGE_SIZE]; + void *va; + int i; + + for (i = 0; i < IRQ_STACK_SIZE / PAGE_SIZE; i++) { + phys_addr_t pa = per_cpu_ptr_to_phys(stack + (i << PAGE_SHIFT)); + + pages[i] = pfn_to_page(pa >> PAGE_SHIFT); + } + + va = vmap(pages, IRQ_STACK_SIZE / PAGE_SIZE, GFP_KERNEL, PAGE_KERNEL); + if (!va) + return -ENOMEM; + + per_cpu(hardirq_stack_ptr, cpu) = va + IRQ_STACK_SIZE; + return 0; +} +#else +/* + * If VMAP stacks are disabled due to KASAN, just use the per cpu + * backing store without guard pages. + */ static int map_irq_stack(unsigned int cpu) { void *va = per_cpu_ptr(&irq_stack_backing_store, cpu); @@ -98,6 +127,7 @@ static int map_irq_stack(unsigned int cpu) per_cpu(hardirq_stack_ptr, cpu) = va + IRQ_STACK_SIZE; return 0; } +#endif int irq_init_percpu_irqstack(unsigned int cpu) { -- cgit v1.2.3 From 117ed45485413b1977bfc638c32bf5b01d53c62b Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 14 Apr 2019 18:00:08 +0200 Subject: x86/irq/64: Remove stack overflow debug code All stack types on x86 64-bit have guard pages now. So there is no point in executing probabilistic overflow checks as the guard pages are a accurate and reliable overflow prevention. Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Cc: "H. Peter Anvin" Cc: Andy Lutomirski Cc: Ingo Molnar Cc: Josh Poimboeuf Cc: Nicolai Stange Cc: Sean Christopherson Cc: x86-ml Link: https://lkml.kernel.org/r/20190414160146.466354762@linutronix.de --- arch/x86/Kconfig | 2 +- arch/x86/kernel/irq_64.c | 56 ------------------------------------------------ 2 files changed, 1 insertion(+), 57 deletions(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 5ad92419be19..fd06614b09a7 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -14,6 +14,7 @@ config X86_32 select ARCH_WANT_IPC_PARSE_VERSION select CLKSRC_I8253 select CLONE_BACKWARDS + select HAVE_DEBUG_STACKOVERFLOW select MODULES_USE_ELF_REL select OLD_SIGACTION @@ -138,7 +139,6 @@ config X86 select HAVE_COPY_THREAD_TLS select HAVE_C_RECORDMCOUNT select HAVE_DEBUG_KMEMLEAK - select HAVE_DEBUG_STACKOVERFLOW select HAVE_DMA_CONTIGUOUS select HAVE_DYNAMIC_FTRACE select HAVE_DYNAMIC_FTRACE_WITH_REGS diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c index f107eb2021f6..6bf6517a05bb 100644 --- a/arch/x86/kernel/irq_64.c +++ b/arch/x86/kernel/irq_64.c @@ -26,64 +26,8 @@ DEFINE_PER_CPU_PAGE_ALIGNED(struct irq_stack, irq_stack_backing_store) __visible; DECLARE_INIT_PER_CPU(irq_stack_backing_store); -int sysctl_panic_on_stackoverflow; - -/* - * Probabilistic stack overflow check: - * - * Regular device interrupts can enter on the following stacks: - * - * - User stack - * - * - Kernel task stack - * - * - Interrupt stack if a device driver reenables interrupts - * which should only happen in really old drivers. - * - * - Debug IST stack - * - * All other contexts are invalid. - */ -static inline void stack_overflow_check(struct pt_regs *regs) -{ -#ifdef CONFIG_DEBUG_STACKOVERFLOW -#define STACK_MARGIN 128 - u64 irq_stack_top, irq_stack_bottom, estack_top, estack_bottom; - u64 curbase = (u64)task_stack_page(current); - struct cea_exception_stacks *estacks; - - if (user_mode(regs)) - return; - - if (regs->sp >= curbase + sizeof(struct pt_regs) + STACK_MARGIN && - regs->sp <= curbase + THREAD_SIZE) - return; - - irq_stack_top = (u64)__this_cpu_read(hardirq_stack_ptr); - irq_stack_bottom = irq_stack_top - IRQ_STACK_SIZE + STACK_MARGIN; - if (regs->sp >= irq_stack_bottom && regs->sp <= irq_stack_top) - return; - - estacks = __this_cpu_read(cea_exception_stacks); - estack_top = CEA_ESTACK_TOP(estacks, DB); - estack_bottom = CEA_ESTACK_BOT(estacks, DB) + STACK_MARGIN; - if (regs->sp >= estack_bottom && regs->sp <= estack_top) - return; - - WARN_ONCE(1, "do_IRQ(): %s has overflown the kernel stack (cur:%Lx,sp:%lx, irq stack:%Lx-%Lx, exception stack: %Lx-%Lx, ip:%pF)\n", - current->comm, curbase, regs->sp, - irq_stack_bottom, irq_stack_top, - estack_bottom, estack_top, (void *)regs->ip); - - if (sysctl_panic_on_stackoverflow) - panic("low stack detected by irq handler - check messages\n"); -#endif -} - bool handle_irq(struct irq_desc *desc, struct pt_regs *regs) { - stack_overflow_check(regs); - if (IS_ERR_OR_NULL(desc)) return false; -- cgit v1.2.3 From 2c4645439e8f2f6e7c37f158feae6f6a82baa910 Mon Sep 17 00:00:00 2001 From: Jiang Biao Date: Mon, 22 Apr 2019 10:49:43 +0800 Subject: x86/irq: Fix outdated comments INVALIDATE_TLB_VECTOR_START has been removed by: 52aec3308db8("x86/tlb: replace INVALIDATE_TLB_VECTOR by CALL_FUNCTION_VECTOR") while VSYSCALL_EMU_VECTO(204) has also been removed, by: 3ae36655b97a("x86-64: Rework vsyscall emulation and add vsyscall= parameter") so update the comments in accordingly. Signed-off-by: Jiang Biao Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: bp@alien8.de Link: http://lkml.kernel.org/r/20190422024943.71918-1-benbjiang@tencent.com [ Improved the changelog. ] Signed-off-by: Ingo Molnar --- arch/x86/include/asm/irq_vectors.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h index 548d90bbf919..889f8b1b5b7f 100644 --- a/arch/x86/include/asm/irq_vectors.h +++ b/arch/x86/include/asm/irq_vectors.h @@ -18,8 +18,8 @@ * Vectors 0 ... 31 : system traps and exceptions - hardcoded events * Vectors 32 ... 127 : device interrupts * Vector 128 : legacy int80 syscall interface - * Vectors 129 ... INVALIDATE_TLB_VECTOR_START-1 except 204 : device interrupts - * Vectors INVALIDATE_TLB_VECTOR_START ... 255 : special interrupts + * Vectors 129 ... LOCAL_TIMER_VECTOR-1 + * Vectors LOCAL_TIMER_VECTOR ... 255 : special interrupts * * 64-bit x86 has per CPU IDT tables, 32-bit has one shared IDT table. * -- cgit v1.2.3