From 946c191161cef10c667b5ee3179db1714fa5b7c0 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Thu, 20 Oct 2016 11:34:40 -0500 Subject: x86/entry/unwind: Create stack frames for saved interrupt registers With frame pointers, when a task is interrupted, its stack is no longer completely reliable because the function could have been interrupted before it had a chance to save the previous frame pointer on the stack. So the caller of the interrupted function could get skipped by a stack trace. This is problematic for live patching, which needs to know whether a stack trace of a sleeping task can be relied upon. There's currently no way to detect if a sleeping task was interrupted by a page fault exception or preemption before it went to sleep. Another issue is that when dumping the stack of an interrupted task, the unwinder has no way of knowing where the saved pt_regs registers are, so it can't print them. This solves those issues by encoding the pt_regs pointer in the frame pointer on entry from an interrupt or an exception. This patch also updates the unwinder to be able to decode it, because otherwise the unwinder would be broken by this change. Note that this causes a change in the behavior of the unwinder: each instance of a pt_regs on the stack is now considered a "frame". So callers of unwind_get_return_address() will now get an occasional 'regs->ip' address that would have previously been skipped over. Suggested-by: Andy Lutomirski Signed-off-by: Josh Poimboeuf Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/8b9f84a21e39d249049e0547b559ff8da0df0988.1476973742.git.jpoimboe@redhat.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/unwind.h | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/unwind.h b/arch/x86/include/asm/unwind.h index 46de9ac4b990..c5a7f3a930dd 100644 --- a/arch/x86/include/asm/unwind.h +++ b/arch/x86/include/asm/unwind.h @@ -13,6 +13,7 @@ struct unwind_state { int graph_idx; #ifdef CONFIG_FRAME_POINTER unsigned long *bp; + struct pt_regs *regs; #else unsigned long *sp; #endif @@ -47,7 +48,15 @@ unsigned long *unwind_get_return_address_ptr(struct unwind_state *state) if (unwind_done(state)) return NULL; - return state->bp + 1; + return state->regs ? &state->regs->ip : state->bp + 1; +} + +static inline struct pt_regs *unwind_get_entry_regs(struct unwind_state *state) +{ + if (unwind_done(state)) + return NULL; + + return state->regs; } #else /* !CONFIG_FRAME_POINTER */ @@ -58,6 +67,11 @@ unsigned long *unwind_get_return_address_ptr(struct unwind_state *state) return NULL; } +static inline struct pt_regs *unwind_get_entry_regs(struct unwind_state *state) +{ + return NULL; +} + #endif /* CONFIG_FRAME_POINTER */ #endif /* _ASM_X86_UNWIND_H */ -- cgit v1.2.3 From bb5e5ce545f2031c96f7901cd8d1698ea3ca4c9c Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Tue, 25 Oct 2016 09:51:12 -0500 Subject: x86/dumpstack: Remove kernel text addresses from stack dump Printing kernel text addresses in stack dumps is of questionable value, especially now that address randomization is becoming common. It can be a security issue because it leaks kernel addresses. It also affects the usefulness of the stack dump. Linus says: "I actually spend time cleaning up commit messages in logs, because useless data that isn't actually information (random hex numbers) is actively detrimental. It makes commit logs less legible. It also makes it harder to parse dumps. It's not useful. That makes it actively bad. I probably look at more oops reports than most people. I have not found the hex numbers useful for the last five years, because they are just randomized crap. The stack content thing just makes code scroll off the screen etc, for example." The only real downside to removing these addresses is that they can be used to disambiguate duplicate symbol names. However such cases are rare, and the context of the stack dump should be enough to be able to figure it out. There's now a 'faddr2line' script which can be used to convert a function address to a file name and line: $ ./scripts/faddr2line ~/k/vmlinux write_sysrq_trigger+0x51/0x60 write_sysrq_trigger+0x51/0x60: write_sysrq_trigger at drivers/tty/sysrq.c:1098 Or gdb can be used: $ echo "list *write_sysrq_trigger+0x51" |gdb ~/k/vmlinux |grep "is in" (gdb) 0xffffffff815b5d83 is in driver_probe_device (/home/jpoimboe/git/linux/drivers/base/dd.c:378). (But note that when there are duplicate symbol names, gdb will only show the first symbol it finds. faddr2line is recommended over gdb because it handles duplicates and it also does function size checking.) Here's an example of what a stack dump looks like after this change: BUG: unable to handle kernel NULL pointer dereference at (null) IP: sysrq_handle_crash+0x45/0x80 PGD 36bfa067 [ 29.650644] PUD 7aca3067 Oops: 0002 [#1] PREEMPT SMP Modules linked in: ... CPU: 1 PID: 786 Comm: bash Tainted: G E 4.9.0-rc1+ #1 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.9.1-1.fc24 04/01/2014 task: ffff880078582a40 task.stack: ffffc90000ba8000 RIP: 0010:sysrq_handle_crash+0x45/0x80 RSP: 0018:ffffc90000babdc8 EFLAGS: 00010296 RAX: ffff880078582a40 RBX: 0000000000000063 RCX: 0000000000000001 RDX: 0000000000000001 RSI: 0000000000000000 RDI: 0000000000000292 RBP: ffffc90000babdc8 R08: 0000000b31866061 R09: 0000000000000000 R10: 0000000000000001 R11: 0000000000000000 R12: 0000000000000000 R13: 0000000000000007 R14: ffffffff81ee8680 R15: 0000000000000000 FS: 00007ffb43869700(0000) GS:ffff88007d400000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000000 CR3: 000000007a3e9000 CR4: 00000000001406e0 Stack: ffffc90000babe00 ffffffff81572d08 ffffffff81572bd5 0000000000000002 0000000000000000 ffff880079606600 00007ffb4386e000 ffffc90000babe20 ffffffff81573201 ffff880036a3fd00 fffffffffffffffb ffffc90000babe40 Call Trace: __handle_sysrq+0x138/0x220 ? __handle_sysrq+0x5/0x220 write_sysrq_trigger+0x51/0x60 proc_reg_write+0x42/0x70 __vfs_write+0x37/0x140 ? preempt_count_sub+0xa1/0x100 ? __sb_start_write+0xf5/0x210 ? vfs_write+0x183/0x1a0 vfs_write+0xb8/0x1a0 SyS_write+0x58/0xc0 entry_SYSCALL_64_fastpath+0x1f/0xc2 RIP: 0033:0x7ffb42f55940 RSP: 002b:00007ffd33bb6b18 EFLAGS: 00000246 ORIG_RAX: 0000000000000001 RAX: ffffffffffffffda RBX: 0000000000000046 RCX: 00007ffb42f55940 RDX: 0000000000000002 RSI: 00007ffb4386e000 RDI: 0000000000000001 RBP: 0000000000000011 R08: 00007ffb4321ea40 R09: 00007ffb43869700 R10: 00007ffb43869700 R11: 0000000000000246 R12: 0000000000778a10 R13: 00007ffd33bb5c00 R14: 0000000000000007 R15: 0000000000000010 Code: 34 e8 d0 34 bc ff 48 c7 c2 3b 2b 57 81 be 01 00 00 00 48 c7 c7 e0 dd e5 81 e8 a8 55 ba ff c7 05 0e 3f de 00 01 00 00 00 0f ae f8 04 25 00 00 00 00 01 5d c3 e8 4c 49 bc ff 84 c0 75 c3 48 c7 RIP: sysrq_handle_crash+0x45/0x80 RSP: ffffc90000babdc8 CR2: 0000000000000000 Suggested-by: Linus Torvalds Signed-off-by: Josh Poimboeuf Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/69329cb29b8f324bb5fcea14d61d224807fb6488.1477405374.git.jpoimboe@redhat.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/kdebug.h | 1 - arch/x86/kernel/dumpstack.c | 18 ++++-------------- arch/x86/kernel/process_32.c | 7 +++---- arch/x86/kernel/process_64.c | 6 +++--- arch/x86/mm/fault.c | 3 +-- arch/x86/platform/uv/uv_nmi.c | 4 ++-- 6 files changed, 13 insertions(+), 26 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/kdebug.h b/arch/x86/include/asm/kdebug.h index d31881188431..29a594a3b82a 100644 --- a/arch/x86/include/asm/kdebug.h +++ b/arch/x86/include/asm/kdebug.h @@ -21,7 +21,6 @@ enum die_val { DIE_NMIUNKNOWN, }; -extern void printk_address(unsigned long address); extern void die(const char *, struct pt_regs *,long); extern int __must_check __die(const char *, struct pt_regs *, long); extern void show_stack_regs(struct pt_regs *regs); diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c index 64281a1d4e48..f967652500fa 100644 --- a/arch/x86/kernel/dumpstack.c +++ b/arch/x86/kernel/dumpstack.c @@ -46,14 +46,7 @@ static void printk_stack_address(unsigned long address, int reliable, char *log_lvl) { touch_nmi_watchdog(); - printk("%s [<%p>] %s%pB\n", - log_lvl, (void *)address, reliable ? "" : "? ", - (void *)address); -} - -void printk_address(unsigned long address) -{ - pr_cont(" [<%p>] %pS\n", (void *)address, (void *)address); + printk("%s %s%pB\n", log_lvl, reliable ? "" : "? ", (void *)address); } void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, @@ -275,14 +268,11 @@ int __die(const char *str, struct pt_regs *regs, long err) sp = kernel_stack_pointer(regs); savesegment(ss, ss); } - printk(KERN_EMERG "EIP: [<%08lx>] ", regs->ip); - print_symbol("%s", regs->ip); - printk(" SS:ESP %04x:%08lx\n", ss, sp); + printk(KERN_EMERG "EIP: %pS SS:ESP: %04x:%08lx\n", + (void *)regs->ip, ss, sp); #else /* Executive summary in case the oops scrolled away */ - printk(KERN_ALERT "RIP "); - printk_address(regs->ip); - printk(" RSP <%016lx>\n", regs->sp); + printk(KERN_ALERT "RIP: %pS RSP: %016lx\n", (void *)regs->ip, regs->sp); #endif return 0; } diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index bd7be8efdc4c..e3223bc78cb6 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -72,10 +72,9 @@ void __show_regs(struct pt_regs *regs, int all) savesegment(gs, gs); } - printk(KERN_DEFAULT "EIP: %04x:[<%08lx>] EFLAGS: %08lx CPU: %d\n", - (u16)regs->cs, regs->ip, regs->flags, - smp_processor_id()); - print_symbol("EIP is at %s\n", regs->ip); + printk(KERN_DEFAULT "EIP: %pS\n", (void *)regs->ip); + printk(KERN_DEFAULT "EFLAGS: %08lx CPU: %d\n", regs->flags, + smp_processor_id()); printk(KERN_DEFAULT "EAX: %08lx EBX: %08lx ECX: %08lx EDX: %08lx\n", regs->ax, regs->bx, regs->cx, regs->dx); diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index f1c36da4c9b5..c99f1ca35eb5 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -61,10 +61,10 @@ void __show_regs(struct pt_regs *regs, int all) unsigned int fsindex, gsindex; unsigned int ds, cs, es; - printk(KERN_DEFAULT "RIP: %04lx:[<%016lx>] %pS\n", regs->cs & 0xffff, - regs->ip, (void *)regs->ip); + printk(KERN_DEFAULT "RIP: %04lx:%pS\n", regs->cs & 0xffff, + (void *)regs->ip); printk(KERN_DEFAULT "RSP: %04lx:%016lx EFLAGS: %08lx", regs->ss, - regs->sp, regs->flags); + regs->sp, regs->flags); if (regs->orig_ax != -1) pr_cont(" ORIG_RAX: %016lx\n", regs->orig_ax); else diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 9f72ca3b2669..17c55a536fdd 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -679,8 +679,7 @@ show_fault_oops(struct pt_regs *regs, unsigned long error_code, printk(KERN_CONT "paging request"); printk(KERN_CONT " at %p\n", (void *) address); - printk(KERN_ALERT "IP:"); - printk_address(regs->ip); + printk(KERN_ALERT "IP: %pS\n", (void *)regs->ip); dump_pagetable(address); } diff --git a/arch/x86/platform/uv/uv_nmi.c b/arch/x86/platform/uv/uv_nmi.c index cd5173a2733f..8410e7d0a5b5 100644 --- a/arch/x86/platform/uv/uv_nmi.c +++ b/arch/x86/platform/uv/uv_nmi.c @@ -387,8 +387,8 @@ static void uv_nmi_dump_cpu_ip_hdr(void) /* Dump Instruction Pointer info */ static void uv_nmi_dump_cpu_ip(int cpu, struct pt_regs *regs) { - pr_info("UV: %4d %6d %-32.32s ", cpu, current->pid, current->comm); - printk_address(regs->ip); + pr_info("UV: %4d %6d %-32.32s %pS", + cpu, current->pid, current->comm, (void *)regs->ip); } /* -- cgit v1.2.3 From 0ee1dd9f5e7eae4e55f95935b72d4beecb03de9c Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Tue, 25 Oct 2016 09:51:13 -0500 Subject: x86/dumpstack: Remove raw stack dump For mostly historical reasons, the x86 oops dump shows the raw stack values: ... [registers] Stack: ffff880079af7350 ffff880079905400 0000000000000000 ffffc900008f3ae0 ffffffffa0196610 0000000000000001 00010000ffffffff 0000000087654321 0000000000000002 0000000000000000 0000000000000000 0000000000000000 Call Trace: ... This seems to be an artifact from long ago, and probably isn't needed anymore. It generally just adds noise to the dump, and it can be actively harmful because it leaks kernel addresses. Linus says: "The stack dump actually goes back to forever, and it used to be useful back in 1992 or so. But it used to be useful mainly because stacks were simpler and we didn't have very good call traces anyway. I definitely remember having used them - I just do not remember having used them in the last ten+ years. Of course, it's still true that if you can trigger an oops, you've likely already lost the security game, but since the stack dump is so useless, let's aim to just remove it and make games like the above harder." This also removes the related 'kstack=' cmdline option and the 'kstack_depth_to_print' sysctl. Suggested-by: Linus Torvalds Signed-off-by: Josh Poimboeuf Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/e83bd50df52d8fe88e94d2566426ae40d813bf8f.1477405374.git.jpoimboe@redhat.com Signed-off-by: Ingo Molnar --- Documentation/kernel-parameters.txt | 3 -- Documentation/sysctl/kernel.txt | 8 ----- Documentation/x86/x86_64/boot-options.txt | 4 --- arch/x86/include/asm/stacktrace.h | 5 --- arch/x86/kernel/dumpstack.c | 21 ++---------- arch/x86/kernel/dumpstack_32.c | 33 +------------------ arch/x86/kernel/dumpstack_64.c | 53 +------------------------------ kernel/sysctl.c | 7 ---- 8 files changed, 4 insertions(+), 130 deletions(-) (limited to 'arch/x86/include') diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 37babf91f2cb..049a9172ed22 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -1958,9 +1958,6 @@ bytes respectively. Such letter suffixes can also be entirely omitted. kmemcheck=2 (one-shot mode) Default: 2 (one-shot mode) - kstack=N [X86] Print N words from the kernel stack - in oops dumps. - kvm.ignore_msrs=[KVM] Ignore guest accesses to unhandled MSRs. Default is 0 (don't ignore, but inject #GP) diff --git a/Documentation/sysctl/kernel.txt b/Documentation/sysctl/kernel.txt index ffab8b5caa60..065f18478c1c 100644 --- a/Documentation/sysctl/kernel.txt +++ b/Documentation/sysctl/kernel.txt @@ -40,7 +40,6 @@ show up in /proc/sys/kernel: - hung_task_warnings - kexec_load_disabled - kptr_restrict -- kstack_depth_to_print [ X86 only ] - l2cr [ PPC only ] - modprobe ==> Documentation/debugging-modules.txt - modules_disabled @@ -395,13 +394,6 @@ When kptr_restrict is set to (2), kernel pointers printed using ============================================================== -kstack_depth_to_print: (X86 only) - -Controls the number of words to print when dumping the raw -kernel stack. - -============================================================== - l2cr: (PPC only) This flag controls the L2 cache of G3 processor boards. If diff --git a/Documentation/x86/x86_64/boot-options.txt b/Documentation/x86/x86_64/boot-options.txt index 0965a71f9942..61b611e9eeaf 100644 --- a/Documentation/x86/x86_64/boot-options.txt +++ b/Documentation/x86/x86_64/boot-options.txt @@ -277,10 +277,6 @@ IOMMU (input/output memory management unit) space might stop working. Use this option if you have devices that are accessed from userspace directly on some PCI host bridge. -Debugging - - kstack=N Print N words from the kernel stack in oops dumps. - Miscellaneous nogbpages diff --git a/arch/x86/include/asm/stacktrace.h b/arch/x86/include/asm/stacktrace.h index 37f2e0b377ad..1e375b05cfa8 100644 --- a/arch/x86/include/asm/stacktrace.h +++ b/arch/x86/include/asm/stacktrace.h @@ -43,8 +43,6 @@ static inline bool on_stack(struct stack_info *info, void *addr, size_t len) addr + len > begin && addr + len <= end); } -extern int kstack_depth_to_print; - #ifdef CONFIG_X86_32 #define STACKSLOTS_PER_LINE 8 #else @@ -86,9 +84,6 @@ get_stack_pointer(struct task_struct *task, struct pt_regs *regs) void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, unsigned long *stack, char *log_lvl); -void show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, - unsigned long *sp, char *log_lvl); - extern unsigned int code_bytes; /* The form of the top of the frame on the stack */ diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c index f967652500fa..499aa6f0fde5 100644 --- a/arch/x86/kernel/dumpstack.c +++ b/arch/x86/kernel/dumpstack.c @@ -22,7 +22,6 @@ int panic_on_unrecovered_nmi; int panic_on_io_nmi; unsigned int code_bytes = 64; -int kstack_depth_to_print = 3 * STACKSLOTS_PER_LINE; static int die_counter; bool in_task_stack(unsigned long *stack, struct task_struct *task, @@ -171,12 +170,12 @@ void show_stack(struct task_struct *task, unsigned long *sp) if (!sp && task == current) sp = get_stack_pointer(current, NULL); - show_stack_log_lvl(task, NULL, sp, KERN_DEFAULT); + show_trace_log_lvl(task, NULL, sp, KERN_DEFAULT); } void show_stack_regs(struct pt_regs *regs) { - show_stack_log_lvl(current, regs, NULL, KERN_DEFAULT); + show_trace_log_lvl(current, regs, NULL, KERN_DEFAULT); } static arch_spinlock_t die_lock = __ARCH_SPIN_LOCK_UNLOCKED; @@ -295,22 +294,6 @@ void die(const char *str, struct pt_regs *regs, long err) oops_end(flags, regs, sig); } -static int __init kstack_setup(char *s) -{ - ssize_t ret; - unsigned long val; - - if (!s) - return -EINVAL; - - ret = kstrtoul(s, 0, &val); - if (ret) - return ret; - kstack_depth_to_print = val; - return 0; -} -early_param("kstack", kstack_setup); - static int __init code_bytes_setup(char *s) { ssize_t ret; diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c index 06eb322b5f9f..90cf460d50bd 100644 --- a/arch/x86/kernel/dumpstack_32.c +++ b/arch/x86/kernel/dumpstack_32.c @@ -121,36 +121,6 @@ unknown: return -EINVAL; } -void show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, - unsigned long *sp, char *log_lvl) -{ - unsigned long *stack; - int i; - - if (!try_get_task_stack(task)) - return; - - sp = sp ? : get_stack_pointer(task, regs); - - stack = sp; - for (i = 0; i < kstack_depth_to_print; i++) { - if (kstack_end(stack)) - break; - if ((i % STACKSLOTS_PER_LINE) == 0) { - if (i != 0) - pr_cont("\n"); - printk("%s %08lx", log_lvl, *stack++); - } else - pr_cont(" %08lx", *stack++); - touch_nmi_watchdog(); - } - pr_cont("\n"); - show_trace_log_lvl(task, regs, sp, log_lvl); - - put_task_stack(task); -} - - void show_regs(struct pt_regs *regs) { int i; @@ -168,8 +138,7 @@ void show_regs(struct pt_regs *regs) unsigned char c; u8 *ip; - pr_emerg("Stack:\n"); - show_stack_log_lvl(current, regs, NULL, KERN_EMERG); + show_trace_log_lvl(current, regs, NULL, KERN_EMERG); pr_emerg("Code:"); diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c index 36cf1a498227..310abf4542dc 100644 --- a/arch/x86/kernel/dumpstack_64.c +++ b/arch/x86/kernel/dumpstack_64.c @@ -140,56 +140,6 @@ unknown: return -EINVAL; } -void show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, - unsigned long *sp, char *log_lvl) -{ - unsigned long *irq_stack_end; - unsigned long *irq_stack; - unsigned long *stack; - int i; - - if (!try_get_task_stack(task)) - return; - - irq_stack_end = (unsigned long *)this_cpu_read(irq_stack_ptr); - irq_stack = irq_stack_end - (IRQ_STACK_SIZE / sizeof(long)); - - sp = sp ? : get_stack_pointer(task, regs); - - stack = sp; - for (i = 0; i < kstack_depth_to_print; i++) { - unsigned long word; - - if (stack >= irq_stack && stack <= irq_stack_end) { - if (stack == irq_stack_end) { - stack = (unsigned long *) (irq_stack_end[-1]); - pr_cont(" "); - } - } else { - if (kstack_end(stack)) - break; - } - - if (probe_kernel_address(stack, word)) - break; - - if ((i % STACKSLOTS_PER_LINE) == 0) { - if (i != 0) - pr_cont("\n"); - printk("%s %016lx", log_lvl, word); - } else - pr_cont(" %016lx", word); - - stack++; - touch_nmi_watchdog(); - } - - pr_cont("\n"); - show_trace_log_lvl(task, regs, sp, log_lvl); - - put_task_stack(task); -} - void show_regs(struct pt_regs *regs) { int i; @@ -207,8 +157,7 @@ void show_regs(struct pt_regs *regs) unsigned char c; u8 *ip; - printk(KERN_DEFAULT "Stack:\n"); - show_stack_log_lvl(current, regs, NULL, KERN_DEFAULT); + show_trace_log_lvl(current, regs, NULL, KERN_DEFAULT); printk(KERN_DEFAULT "Code: "); diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 706309f9ed84..17a5a8253294 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -989,13 +989,6 @@ static struct ctl_table kern_table[] = { .mode = 0444, .proc_handler = proc_dointvec, }, - { - .procname = "kstack_depth_to_print", - .data = &kstack_depth_to_print, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec, - }, { .procname = "io_delay_type", .data = &io_delay_type, -- cgit v1.2.3 From a01aa6c9f40fe03c82032e7f8b3bcf1e6c93ac0e Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Thu, 27 Oct 2016 17:15:15 +0300 Subject: x86/prctl/uapi: Remove #ifdef for CHECKPOINT_RESTORE As userspace knows nothing about kernel config, thus #ifdefs around ABI prctl constants makes them invisible to userspace. Let it be clean'n'simple: remove #ifdefs. If kernel has CONFIG_CHECKPOINT_RESTORE disabled, sys_prctl() will return -EINVAL for those prctls. Reported-by: Paul Bolle Signed-off-by: Dmitry Safonov Acked-by: Andy Lutomirski Cc: 0x7f454c46@gmail.com Cc: Borislav Petkov Cc: Brian Gerst Cc: Cyrill Gorcunov Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-mm@kvack.org Cc: oleg@redhat.com Fixes: 2eefd8789698 ("x86/arch_prctl/vdso: Add ARCH_MAP_VDSO_*") Link: http://lkml.kernel.org/r/20161027141516.28447-2-dsafonov@virtuozzo.com Signed-off-by: Ingo Molnar --- arch/x86/include/uapi/asm/prctl.h | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/uapi/asm/prctl.h b/arch/x86/include/uapi/asm/prctl.h index ae135de547f5..835aa51c7f6e 100644 --- a/arch/x86/include/uapi/asm/prctl.h +++ b/arch/x86/include/uapi/asm/prctl.h @@ -6,10 +6,8 @@ #define ARCH_GET_FS 0x1003 #define ARCH_GET_GS 0x1004 -#ifdef CONFIG_CHECKPOINT_RESTORE -# define ARCH_MAP_VDSO_X32 0x2001 -# define ARCH_MAP_VDSO_32 0x2002 -# define ARCH_MAP_VDSO_64 0x2003 -#endif +#define ARCH_MAP_VDSO_X32 0x2001 +#define ARCH_MAP_VDSO_32 0x2002 +#define ARCH_MAP_VDSO_64 0x2003 #endif /* _ASM_X86_PRCTL_H */ -- cgit v1.2.3 From 47f10a36003eaf493125a5e6687dd1ff775bfd8c Mon Sep 17 00:00:00 2001 From: He Chen Date: Fri, 11 Nov 2016 17:25:34 +0800 Subject: x86/cpuid: Cleanup cpuid_regs definitions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit cpuid_regs is defined multiple times as structure and enum. Rename the enum and move all of it to processor.h so we don't end up with more instances. Rename the misnomed register enumeration from CR_* to the obvious CPUID_*. [ tglx: Rewrote changelog ] Signed-off-by: He Chen Reviewed-by: Borislav Petkov Cc: Luwei Kang Cc: kvm@vger.kernel.org Cc: Radim Krčmář Cc: Piotr Luc Cc: Paolo Bonzini Link: http://lkml.kernel.org/r/1478856336-9388-2-git-send-email-he.chen@linux.intel.com Signed-off-by: Thomas Gleixner --- arch/x86/events/intel/pt.c | 45 +++++++++++++++++----------------------- arch/x86/include/asm/processor.h | 11 ++++++++++ arch/x86/kernel/cpu/scattered.c | 28 ++++++++++--------------- arch/x86/kernel/cpuid.c | 4 ---- 4 files changed, 41 insertions(+), 47 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/events/intel/pt.c b/arch/x86/events/intel/pt.c index c5047b8f777b..1c1b9fe705c8 100644 --- a/arch/x86/events/intel/pt.c +++ b/arch/x86/events/intel/pt.c @@ -36,13 +36,6 @@ static DEFINE_PER_CPU(struct pt, pt_ctx); static struct pt_pmu pt_pmu; -enum cpuid_regs { - CR_EAX = 0, - CR_ECX, - CR_EDX, - CR_EBX -}; - /* * Capabilities of Intel PT hardware, such as number of address bits or * supported output schemes, are cached and exported to userspace as "caps" @@ -64,21 +57,21 @@ static struct pt_cap_desc { u8 reg; u32 mask; } pt_caps[] = { - PT_CAP(max_subleaf, 0, CR_EAX, 0xffffffff), - PT_CAP(cr3_filtering, 0, CR_EBX, BIT(0)), - PT_CAP(psb_cyc, 0, CR_EBX, BIT(1)), - PT_CAP(ip_filtering, 0, CR_EBX, BIT(2)), - PT_CAP(mtc, 0, CR_EBX, BIT(3)), - PT_CAP(ptwrite, 0, CR_EBX, BIT(4)), - PT_CAP(power_event_trace, 0, CR_EBX, BIT(5)), - PT_CAP(topa_output, 0, CR_ECX, BIT(0)), - PT_CAP(topa_multiple_entries, 0, CR_ECX, BIT(1)), - PT_CAP(single_range_output, 0, CR_ECX, BIT(2)), - PT_CAP(payloads_lip, 0, CR_ECX, BIT(31)), - PT_CAP(num_address_ranges, 1, CR_EAX, 0x3), - PT_CAP(mtc_periods, 1, CR_EAX, 0xffff0000), - PT_CAP(cycle_thresholds, 1, CR_EBX, 0xffff), - PT_CAP(psb_periods, 1, CR_EBX, 0xffff0000), + PT_CAP(max_subleaf, 0, CPUID_EAX, 0xffffffff), + PT_CAP(cr3_filtering, 0, CPUID_EBX, BIT(0)), + PT_CAP(psb_cyc, 0, CPUID_EBX, BIT(1)), + PT_CAP(ip_filtering, 0, CPUID_EBX, BIT(2)), + PT_CAP(mtc, 0, CPUID_EBX, BIT(3)), + PT_CAP(ptwrite, 0, CPUID_EBX, BIT(4)), + PT_CAP(power_event_trace, 0, CPUID_EBX, BIT(5)), + PT_CAP(topa_output, 0, CPUID_ECX, BIT(0)), + PT_CAP(topa_multiple_entries, 0, CPUID_ECX, BIT(1)), + PT_CAP(single_range_output, 0, CPUID_ECX, BIT(2)), + PT_CAP(payloads_lip, 0, CPUID_ECX, BIT(31)), + PT_CAP(num_address_ranges, 1, CPUID_EAX, 0x3), + PT_CAP(mtc_periods, 1, CPUID_EAX, 0xffff0000), + PT_CAP(cycle_thresholds, 1, CPUID_EBX, 0xffff), + PT_CAP(psb_periods, 1, CPUID_EBX, 0xffff0000), }; static u32 pt_cap_get(enum pt_capabilities cap) @@ -213,10 +206,10 @@ static int __init pt_pmu_hw_init(void) for (i = 0; i < PT_CPUID_LEAVES; i++) { cpuid_count(20, i, - &pt_pmu.caps[CR_EAX + i*PT_CPUID_REGS_NUM], - &pt_pmu.caps[CR_EBX + i*PT_CPUID_REGS_NUM], - &pt_pmu.caps[CR_ECX + i*PT_CPUID_REGS_NUM], - &pt_pmu.caps[CR_EDX + i*PT_CPUID_REGS_NUM]); + &pt_pmu.caps[CPUID_EAX + i*PT_CPUID_REGS_NUM], + &pt_pmu.caps[CPUID_EBX + i*PT_CPUID_REGS_NUM], + &pt_pmu.caps[CPUID_ECX + i*PT_CPUID_REGS_NUM], + &pt_pmu.caps[CPUID_EDX + i*PT_CPUID_REGS_NUM]); } ret = -ENOMEM; diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 984a7bf17f6a..8f6ac5be84b7 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -137,6 +137,17 @@ struct cpuinfo_x86 { u32 microcode; }; +struct cpuid_regs { + u32 eax, ebx, ecx, edx; +}; + +enum cpuid_regs_idx { + CPUID_EAX = 0, + CPUID_EBX, + CPUID_ECX, + CPUID_EDX, +}; + #define X86_VENDOR_INTEL 0 #define X86_VENDOR_CYRIX 1 #define X86_VENDOR_AMD 2 diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c index 1db8dc490b66..dbb470e839f8 100644 --- a/arch/x86/kernel/cpu/scattered.c +++ b/arch/x86/kernel/cpu/scattered.c @@ -17,13 +17,6 @@ struct cpuid_bit { u32 sub_leaf; }; -enum cpuid_regs { - CR_EAX = 0, - CR_ECX, - CR_EDX, - CR_EBX -}; - void init_scattered_cpuid_features(struct cpuinfo_x86 *c) { u32 max_level; @@ -31,14 +24,14 @@ void init_scattered_cpuid_features(struct cpuinfo_x86 *c) const struct cpuid_bit *cb; static const struct cpuid_bit cpuid_bits[] = { - { X86_FEATURE_INTEL_PT, CR_EBX,25, 0x00000007, 0 }, - { X86_FEATURE_AVX512_4VNNIW, CR_EDX, 2, 0x00000007, 0 }, - { X86_FEATURE_AVX512_4FMAPS, CR_EDX, 3, 0x00000007, 0 }, - { X86_FEATURE_APERFMPERF, CR_ECX, 0, 0x00000006, 0 }, - { X86_FEATURE_EPB, CR_ECX, 3, 0x00000006, 0 }, - { X86_FEATURE_HW_PSTATE, CR_EDX, 7, 0x80000007, 0 }, - { X86_FEATURE_CPB, CR_EDX, 9, 0x80000007, 0 }, - { X86_FEATURE_PROC_FEEDBACK, CR_EDX,11, 0x80000007, 0 }, + { X86_FEATURE_INTEL_PT, CPUID_EBX, 25, 0x00000007, 0 }, + { X86_FEATURE_AVX512_4VNNIW, CPUID_EDX, 2, 0x00000007, 0 }, + { X86_FEATURE_AVX512_4FMAPS, CPUID_EDX, 3, 0x00000007, 0 }, + { X86_FEATURE_APERFMPERF, CPUID_ECX, 0, 0x00000006, 0 }, + { X86_FEATURE_EPB, CPUID_ECX, 3, 0x00000006, 0 }, + { X86_FEATURE_HW_PSTATE, CPUID_EDX, 7, 0x80000007, 0 }, + { X86_FEATURE_CPB, CPUID_EDX, 9, 0x80000007, 0 }, + { X86_FEATURE_PROC_FEEDBACK, CPUID_EDX, 11, 0x80000007, 0 }, { 0, 0, 0, 0, 0 } }; @@ -50,8 +43,9 @@ void init_scattered_cpuid_features(struct cpuinfo_x86 *c) max_level > (cb->level | 0xffff)) continue; - cpuid_count(cb->level, cb->sub_leaf, ®s[CR_EAX], - ®s[CR_EBX], ®s[CR_ECX], ®s[CR_EDX]); + cpuid_count(cb->level, cb->sub_leaf, ®s[CPUID_EAX], + ®s[CPUID_EBX], ®s[CPUID_ECX], + ®s[CPUID_EDX]); if (regs[cb->reg] & (1 << cb->bit)) set_cpu_cap(c, cb->feature); diff --git a/arch/x86/kernel/cpuid.c b/arch/x86/kernel/cpuid.c index 2836de390f95..9095c80723d6 100644 --- a/arch/x86/kernel/cpuid.c +++ b/arch/x86/kernel/cpuid.c @@ -46,10 +46,6 @@ static struct class *cpuid_class; -struct cpuid_regs { - u32 eax, ebx, ecx, edx; -}; - static void cpuid_smp_cpuid(void *cmd_block) { struct cpuid_regs *cmd = (struct cpuid_regs *)cmd_block; -- cgit v1.2.3 From 47bdf3378d62a627cfb8a54e1180c08d67078b61 Mon Sep 17 00:00:00 2001 From: He Chen Date: Fri, 11 Nov 2016 17:25:35 +0800 Subject: x86/cpuid: Provide get_scattered_cpuid_leaf() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Sparse populated CPUID leafs are collected in a software provided leaf to avoid bloat of the x86_capability array, but there is no way to rebuild the real leafs (e.g. for KVM CPUID enumeration) other than rereading the CPUID leaf from the CPU. While this is possible it is problematic as it does not take software disabled features into account. If a feature is disabled on the host it should not be exposed to a guest either. Add get_scattered_cpuid_leaf() which rebuilds the leaf from the scattered cpuid table information and the active CPU features. [ tglx: Rewrote changelog ] Signed-off-by: He Chen Reviewed-by: Borislav Petkov Cc: Luwei Kang Cc: kvm@vger.kernel.org Cc: Radim Krčmář Cc: Piotr Luc Cc: Borislav Petkov Cc: Paolo Bonzini Link: http://lkml.kernel.org/r/1478856336-9388-3-git-send-email-he.chen@linux.intel.com Signed-off-by: Thomas Gleixner --- arch/x86/include/asm/processor.h | 3 +++ arch/x86/kernel/cpu/scattered.c | 49 ++++++++++++++++++++++++++++++---------- 2 files changed, 40 insertions(+), 12 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 8f6ac5be84b7..e7f8c62701d4 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -189,6 +189,9 @@ extern void identify_secondary_cpu(struct cpuinfo_x86 *); extern void print_cpu_info(struct cpuinfo_x86 *); void print_cpu_msr(struct cpuinfo_x86 *); extern void init_scattered_cpuid_features(struct cpuinfo_x86 *c); +extern u32 get_scattered_cpuid_leaf(unsigned int level, + unsigned int sub_leaf, + enum cpuid_regs_idx reg); extern unsigned int init_intel_cacheinfo(struct cpuinfo_x86 *c); extern void init_amd_cacheinfo(struct cpuinfo_x86 *c); diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c index dbb470e839f8..d1316f9c8329 100644 --- a/arch/x86/kernel/cpu/scattered.c +++ b/arch/x86/kernel/cpu/scattered.c @@ -17,24 +17,25 @@ struct cpuid_bit { u32 sub_leaf; }; +/* Please keep the leaf sorted by cpuid_bit.level for faster search. */ +static const struct cpuid_bit cpuid_bits[] = { + { X86_FEATURE_APERFMPERF, CPUID_ECX, 0, 0x00000006, 0 }, + { X86_FEATURE_EPB, CPUID_ECX, 3, 0x00000006, 0 }, + { X86_FEATURE_INTEL_PT, CPUID_EBX, 25, 0x00000007, 0 }, + { X86_FEATURE_AVX512_4VNNIW, CPUID_EDX, 2, 0x00000007, 0 }, + { X86_FEATURE_AVX512_4FMAPS, CPUID_EDX, 3, 0x00000007, 0 }, + { X86_FEATURE_HW_PSTATE, CPUID_EDX, 7, 0x80000007, 0 }, + { X86_FEATURE_CPB, CPUID_EDX, 9, 0x80000007, 0 }, + { X86_FEATURE_PROC_FEEDBACK, CPUID_EDX, 11, 0x80000007, 0 }, + { 0, 0, 0, 0, 0 } +}; + void init_scattered_cpuid_features(struct cpuinfo_x86 *c) { u32 max_level; u32 regs[4]; const struct cpuid_bit *cb; - static const struct cpuid_bit cpuid_bits[] = { - { X86_FEATURE_INTEL_PT, CPUID_EBX, 25, 0x00000007, 0 }, - { X86_FEATURE_AVX512_4VNNIW, CPUID_EDX, 2, 0x00000007, 0 }, - { X86_FEATURE_AVX512_4FMAPS, CPUID_EDX, 3, 0x00000007, 0 }, - { X86_FEATURE_APERFMPERF, CPUID_ECX, 0, 0x00000006, 0 }, - { X86_FEATURE_EPB, CPUID_ECX, 3, 0x00000006, 0 }, - { X86_FEATURE_HW_PSTATE, CPUID_EDX, 7, 0x80000007, 0 }, - { X86_FEATURE_CPB, CPUID_EDX, 9, 0x80000007, 0 }, - { X86_FEATURE_PROC_FEEDBACK, CPUID_EDX, 11, 0x80000007, 0 }, - { 0, 0, 0, 0, 0 } - }; - for (cb = cpuid_bits; cb->feature; cb++) { /* Verify that the level is valid */ @@ -51,3 +52,27 @@ void init_scattered_cpuid_features(struct cpuinfo_x86 *c) set_cpu_cap(c, cb->feature); } } + +u32 get_scattered_cpuid_leaf(unsigned int level, unsigned int sub_leaf, + enum cpuid_regs_idx reg) +{ + const struct cpuid_bit *cb; + u32 cpuid_val = 0; + + for (cb = cpuid_bits; cb->feature; cb++) { + + if (level > cb->level) + continue; + + if (level < cb->level) + break; + + if (reg == cb->reg && sub_leaf == cb->sub_leaf) { + if (cpu_has(&boot_cpu_data, cb->feature)) + cpuid_val |= BIT(cb->bit); + } + } + + return cpuid_val; +} +EXPORT_SYMBOL_GPL(get_scattered_cpuid_leaf); -- cgit v1.2.3 From a8d9df5a509a232a959e4ef2e281f7ecd77810d6 Mon Sep 17 00:00:00 2001 From: Gayatri Kammela Date: Wed, 16 Nov 2016 12:11:00 -0800 Subject: x86/cpufeatures: Enable new AVX512 cpu features Add a few new AVX512 instruction groups/features for enumeration in /proc/cpuinfo: AVX512IFMA and AVX512VBMI. Clear the flags in fpu_xstate_clear_all_cpu_caps(). CPUID.(EAX=7,ECX=0):EBX[bit 21] AVX512IFMA CPUID.(EAX=7,ECX=0):ECX[bit 1] AVX512VBMI Detailed information of cpuid bits for the features can be found at https://bugzilla.kernel.org/show_bug.cgi?id=187891 Signed-off-by: Gayatri Kammela Reviewed-by: Borislav Petkov Cc: Ravi Shankar Cc: Fenghua Yu Cc: mingo@elte.hu Link: http://lkml.kernel.org/r/1479327060-18668-1-git-send-email-gayatri.kammela@intel.com Signed-off-by: Thomas Gleixner --- arch/x86/include/asm/cpufeatures.h | 2 ++ arch/x86/kernel/fpu/xstate.c | 2 ++ 2 files changed, 4 insertions(+) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index a39629206864..a3e8cd762a23 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -226,6 +226,7 @@ #define X86_FEATURE_RDSEED ( 9*32+18) /* The RDSEED instruction */ #define X86_FEATURE_ADX ( 9*32+19) /* The ADCX and ADOX instructions */ #define X86_FEATURE_SMAP ( 9*32+20) /* Supervisor Mode Access Prevention */ +#define X86_FEATURE_AVX512IFMA ( 9*32+21) /* AVX-512 Integer Fused Multiply-Add instructions */ #define X86_FEATURE_CLFLUSHOPT ( 9*32+23) /* CLFLUSHOPT instruction */ #define X86_FEATURE_CLWB ( 9*32+24) /* CLWB instruction */ #define X86_FEATURE_AVX512PF ( 9*32+26) /* AVX-512 Prefetch */ @@ -279,6 +280,7 @@ #define X86_FEATURE_AVIC (15*32+13) /* Virtual Interrupt Controller */ /* Intel-defined CPU features, CPUID level 0x00000007:0 (ecx), word 16 */ +#define X86_FEATURE_AVX512VBMI (16*32+ 1) /* AVX512 Vector Bit Manipulation instructions*/ #define X86_FEATURE_PKU (16*32+ 3) /* Protection Keys for Userspace */ #define X86_FEATURE_OSPKE (16*32+ 4) /* OS Protection Keys Enable */ diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index 095ef7ddd6ae..ce47452879fd 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -65,6 +65,7 @@ void fpu__xstate_clear_all_cpu_caps(void) setup_clear_cpu_cap(X86_FEATURE_AVX); setup_clear_cpu_cap(X86_FEATURE_AVX2); setup_clear_cpu_cap(X86_FEATURE_AVX512F); + setup_clear_cpu_cap(X86_FEATURE_AVX512IFMA); setup_clear_cpu_cap(X86_FEATURE_AVX512PF); setup_clear_cpu_cap(X86_FEATURE_AVX512ER); setup_clear_cpu_cap(X86_FEATURE_AVX512CD); @@ -73,6 +74,7 @@ void fpu__xstate_clear_all_cpu_caps(void) setup_clear_cpu_cap(X86_FEATURE_AVX512VL); setup_clear_cpu_cap(X86_FEATURE_MPX); setup_clear_cpu_cap(X86_FEATURE_XGETBV1); + setup_clear_cpu_cap(X86_FEATURE_AVX512VBMI); setup_clear_cpu_cap(X86_FEATURE_PKU); setup_clear_cpu_cap(X86_FEATURE_AVX512_4VNNIW); setup_clear_cpu_cap(X86_FEATURE_AVX512_4FMAPS); -- cgit v1.2.3 From a582c540ac1b10f0a7d37415e04c4af42409fd08 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Wed, 16 Nov 2016 10:23:27 -0800 Subject: x86/vdso: Use RDPID in preference to LSL when available RDPID is a new instruction that reads MSR_TSC_AUX quickly. This should be considerably faster than reading the GDT. Add a cpufeature for it and use it from __vdso_getcpu() when available. Tested-by: Megha Dey Signed-off-by: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/4f6c3a22012d10f1c65b9ca15800e01b42c7d39d.1479320367.git.luto@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/include/asm/cpufeatures.h | 1 + arch/x86/include/asm/vgtod.h | 7 ++++++- 2 files changed, 7 insertions(+), 1 deletion(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index a3e8cd762a23..eac7572bf8bb 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -283,6 +283,7 @@ #define X86_FEATURE_AVX512VBMI (16*32+ 1) /* AVX512 Vector Bit Manipulation instructions*/ #define X86_FEATURE_PKU (16*32+ 3) /* Protection Keys for Userspace */ #define X86_FEATURE_OSPKE (16*32+ 4) /* OS Protection Keys Enable */ +#define X86_FEATURE_RDPID (16*32+ 22) /* RDPID instruction */ /* AMD-defined CPU features, CPUID level 0x80000007 (ebx), word 17 */ #define X86_FEATURE_OVERFLOW_RECOV (17*32+0) /* MCA overflow recovery support */ diff --git a/arch/x86/include/asm/vgtod.h b/arch/x86/include/asm/vgtod.h index e728699db774..3a01996db58f 100644 --- a/arch/x86/include/asm/vgtod.h +++ b/arch/x86/include/asm/vgtod.h @@ -89,8 +89,13 @@ static inline unsigned int __getcpu(void) * works on all CPUs. This is volatile so that it orders * correctly wrt barrier() and to keep gcc from cleverly * hoisting it out of the calling function. + * + * If RDPID is available, use it. */ - asm volatile ("lsl %1,%0" : "=r" (p) : "r" (__PER_CPU_SEG)); + alternative_io ("lsl %[p],%[seg]", + ".byte 0xf3,0x0f,0xc7,0xf8", /* RDPID %eax/rax */ + X86_FEATURE_RDPID, + [p] "=a" (p), [seg] "r" (__PER_CPU_SEG)); return p; } -- cgit v1.2.3 From 3d02a9c48d479eb58841805baaf93c5a084b6010 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Fri, 18 Nov 2016 11:46:23 -0600 Subject: x86/dumpstack: Make stack name tags more comprehensible NMI stack dumps are bracketed by the following tags: ... The ending tag is kind of confusing if you don't already know what "EOE" means (end of exception). The same ending tag is also used to mark the end of all other exceptions' stacks. For example: <#DF> ... And similarly, "EOI" is used as the ending tag for interrupts: ... Change the tags to be more comprehensible by making them symmetrical and more XML-esque: ... <#DF> ... ... Signed-off-by: Josh Poimboeuf Acked-by: Frederic Weisbecker Acked-by: Peter Zijlstra (Intel) Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/180196e3754572540b595bc56b947d43658979a7.1479491159.git.jpoimboe@redhat.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/stacktrace.h | 3 +-- arch/x86/kernel/dumpstack.c | 12 ++++++------ arch/x86/kernel/dumpstack_32.c | 19 ++++++++----------- arch/x86/kernel/dumpstack_64.c | 22 ++++++++-------------- 4 files changed, 23 insertions(+), 33 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/stacktrace.h b/arch/x86/include/asm/stacktrace.h index 1e375b05cfa8..a3269c897ec5 100644 --- a/arch/x86/include/asm/stacktrace.h +++ b/arch/x86/include/asm/stacktrace.h @@ -30,8 +30,7 @@ bool in_task_stack(unsigned long *stack, struct task_struct *task, int get_stack_info(unsigned long *stack, struct task_struct *task, struct stack_info *info, unsigned long *visit_mask); -void stack_type_str(enum stack_type type, const char **begin, - const char **end); +const char *stack_type_name(enum stack_type type); static inline bool on_stack(struct stack_info *info, void *addr, size_t len) { diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c index 1e057b01b648..0e5c9d0f6c28 100644 --- a/arch/x86/kernel/dumpstack.c +++ b/arch/x86/kernel/dumpstack.c @@ -76,7 +76,7 @@ void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, * - hardirq stack */ for (regs = NULL; stack; stack = stack_info.next_sp) { - const char *str_begin, *str_end; + const char *stack_name; /* * If we overflowed the task stack into a guard page, jump back @@ -88,9 +88,9 @@ void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, if (get_stack_info(stack, task, &stack_info, &visit_mask)) break; - stack_type_str(stack_info.type, &str_begin, &str_end); - if (str_begin) - printk("%s <%s>\n", log_lvl, str_begin); + stack_name = stack_type_name(stack_info.type); + if (stack_name) + printk("%s <%s>\n", log_lvl, stack_name); /* * Scan the stack, printing any text addresses we find. At the @@ -155,8 +155,8 @@ void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, __show_regs(regs, 0); } - if (str_end) - printk("%s <%s>\n", log_lvl, str_end); + if (stack_name) + printk("%s \n", log_lvl, stack_name); } } diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c index d7d20999d68c..bb3b5b9a6899 100644 --- a/arch/x86/kernel/dumpstack_32.c +++ b/arch/x86/kernel/dumpstack_32.c @@ -16,18 +16,15 @@ #include -void stack_type_str(enum stack_type type, const char **begin, const char **end) +const char *stack_type_name(enum stack_type type) { - switch (type) { - case STACK_TYPE_IRQ: - case STACK_TYPE_SOFTIRQ: - *begin = "IRQ"; - *end = "EOI"; - break; - default: - *begin = NULL; - *end = NULL; - } + if (type == STACK_TYPE_IRQ) + return "IRQ"; + + if (type == STACK_TYPE_SOFTIRQ) + return "SOFTIRQ"; + + return NULL; } static bool in_hardirq_stack(unsigned long *stack, struct stack_info *info) diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c index ab0f8b90b51b..fac189efcc34 100644 --- a/arch/x86/kernel/dumpstack_64.c +++ b/arch/x86/kernel/dumpstack_64.c @@ -28,23 +28,17 @@ static unsigned long exception_stack_sizes[N_EXCEPTION_STACKS] = { [DEBUG_STACK - 1] = DEBUG_STKSZ }; -void stack_type_str(enum stack_type type, const char **begin, const char **end) +const char *stack_type_name(enum stack_type type) { BUILD_BUG_ON(N_EXCEPTION_STACKS != 4); - switch (type) { - case STACK_TYPE_IRQ: - *begin = "IRQ"; - *end = "EOI"; - break; - case STACK_TYPE_EXCEPTION ... STACK_TYPE_EXCEPTION_LAST: - *begin = exception_stack_names[type - STACK_TYPE_EXCEPTION]; - *end = "EOE"; - break; - default: - *begin = NULL; - *end = NULL; - } + if (type == STACK_TYPE_IRQ) + return "IRQ"; + + if (type >= STACK_TYPE_EXCEPTION && type <= STACK_TYPE_EXCEPTION_LAST) + return exception_stack_names[type - STACK_TYPE_EXCEPTION]; + + return NULL; } static bool in_exception_stack(unsigned long *stack, struct stack_info *info) -- cgit v1.2.3