diff options
author | Andy Lutomirski <luto@kernel.org> | 2015-07-15 20:29:35 +0300 |
---|---|---|
committer | Jiri Slaby <jslaby@suse.cz> | 2015-08-25 17:56:58 +0300 |
commit | e0de15fc45a83f94d1ef578f54b427b86a33ab21 (patch) | |
tree | e921cde405a8d50a534b9a3cfdde6420ebf86093 /arch/x86 | |
parent | 3682103fe7e9170cfb806b7ec9dcb3008db51443 (diff) | |
download | linux-e0de15fc45a83f94d1ef578f54b427b86a33ab21.tar.xz |
x86/nmi/64: Switch stacks on userspace NMI entry
commit 9b6e6a8334d56354853f9c255d1395c2ba570e0a upstream.
Returning to userspace is tricky: IRET can fail, and ESPFIX can
rearrange the stack prior to IRET.
The NMI nesting fixup relies on a precise stack layout and
atomic IRET. Rather than trying to teach the NMI nesting fixup
to handle ESPFIX and failed IRET, punt: run NMIs that came from
user mode on the normal kernel stack.
This will make some nested NMIs visible to C code, but the C
code is okay with that.
As a side effect, this should speed up perf: it eliminates an
RDMSR when NMIs come from user mode.
Signed-off-by: Andy Lutomirski <luto@kernel.org>
Reviewed-by: Steven Rostedt <rostedt@goodmis.org>
Reviewed-by: Borislav Petkov <bp@suse.de>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Jiri Slaby <jslaby@suse.cz>
Diffstat (limited to 'arch/x86')
-rw-r--r-- | arch/x86/kernel/entry_64.S | 65 |
1 files changed, 61 insertions, 4 deletions
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index fd27e5b86d7b..691337073e1f 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -1706,19 +1706,76 @@ ENTRY(nmi) * a nested NMI that updated the copy interrupt stack frame, a * jump will be made to the repeat_nmi code that will handle the second * NMI. + * + * However, espfix prevents us from directly returning to userspace + * with a single IRET instruction. Similarly, IRET to user mode + * can fault. We therefore handle NMIs from user space like + * other IST entries. */ /* Use %rdx as out temp variable throughout */ pushq_cfi %rdx CFI_REL_OFFSET rdx, 0 + testb $3, CS-RIP+8(%rsp) + jz .Lnmi_from_kernel + + /* + * NMI from user mode. We need to run on the thread stack, but we + * can't go through the normal entry paths: NMIs are masked, and + * we don't want to enable interrupts, because then we'll end + * up in an awkward situation in which IRQs are on but NMIs + * are off. + */ + + SWAPGS + cld + movq %rsp, %rdx + movq PER_CPU_VAR(kernel_stack), %rsp + addq $KERNEL_STACK_OFFSET, %rsp + pushq 5*8(%rdx) /* pt_regs->ss */ + pushq 4*8(%rdx) /* pt_regs->rsp */ + pushq 3*8(%rdx) /* pt_regs->flags */ + pushq 2*8(%rdx) /* pt_regs->cs */ + pushq 1*8(%rdx) /* pt_regs->rip */ + pushq $-1 /* pt_regs->orig_ax */ + pushq %rdi /* pt_regs->di */ + pushq %rsi /* pt_regs->si */ + pushq (%rdx) /* pt_regs->dx */ + pushq %rcx /* pt_regs->cx */ + pushq %rax /* pt_regs->ax */ + pushq %r8 /* pt_regs->r8 */ + pushq %r9 /* pt_regs->r9 */ + pushq %r10 /* pt_regs->r10 */ + pushq %r11 /* pt_regs->r11 */ + pushq %rbx /* pt_regs->rbx */ + pushq %rbp /* pt_regs->rbp */ + pushq %r12 /* pt_regs->r12 */ + pushq %r13 /* pt_regs->r13 */ + pushq %r14 /* pt_regs->r14 */ + pushq %r15 /* pt_regs->r15 */ + /* - * If %cs was not the kernel segment, then the NMI triggered in user - * space, which means it is definitely not nested. + * At this point we no longer need to worry about stack damage + * due to nesting -- we're on the normal thread stack and we're + * done with the NMI stack. */ - cmpl $__KERNEL_CS, 16(%rsp) - jne first_nmi + movq %rsp, %rdi + movq $-1, %rsi + call do_nmi + + /* + * Return back to user mode. We must *not* do the normal exit + * work, because we don't want to enable interrupts. Fortunately, + * do_nmi doesn't modify pt_regs. + */ + SWAPGS + + addq $6*8, %rsp /* skip bx, bp, and r12-r15 */ + jmp restore_args + +.Lnmi_from_kernel: /* * Check the special variable on the stack to see if NMIs are * executing. |