diff options
Diffstat (limited to 'arch/x86/entry')
-rw-r--r-- | arch/x86/entry/calling.h | 17 | ||||
-rw-r--r-- | arch/x86/entry/common.c | 13 | ||||
-rw-r--r-- | arch/x86/entry/entry_32.S | 6 | ||||
-rw-r--r-- | arch/x86/entry/entry_64.S | 29 | ||||
-rw-r--r-- | arch/x86/entry/syscall_64.c | 25 | ||||
-rw-r--r-- | arch/x86/entry/syscalls/syscall_32.tbl | 4 | ||||
-rw-r--r-- | arch/x86/entry/syscalls/syscalltbl.sh | 35 | ||||
-rw-r--r-- | arch/x86/entry/thunk_32.S | 2 | ||||
-rw-r--r-- | arch/x86/entry/thunk_64.S | 4 | ||||
-rw-r--r-- | arch/x86/entry/vdso/vma.c | 2 |
10 files changed, 99 insertions, 38 deletions
diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h index 830bd984182b..515c0ceeb4a3 100644 --- a/arch/x86/entry/calling.h +++ b/arch/x86/entry/calling.h @@ -314,6 +314,23 @@ For 32-bit we have the following conventions - kernel is built with #endif +/* + * Mitigate Spectre v1 for conditional swapgs code paths. + * + * FENCE_SWAPGS_USER_ENTRY is used in the user entry swapgs code path, to + * prevent a speculative swapgs when coming from kernel space. + * + * FENCE_SWAPGS_KERNEL_ENTRY is used in the kernel entry non-swapgs code path, + * to prevent the swapgs from getting speculatively skipped when coming from + * user space. + */ +.macro FENCE_SWAPGS_USER_ENTRY + ALTERNATIVE "", "lfence", X86_FEATURE_FENCE_SWAPGS_USER +.endm +.macro FENCE_SWAPGS_KERNEL_ENTRY + ALTERNATIVE "", "lfence", X86_FEATURE_FENCE_SWAPGS_KERNEL +.endm + .macro STACKLEAK_ERASE_NOCLOBBER #ifdef CONFIG_GCC_PLUGIN_STACKLEAK PUSH_AND_CLEAR_REGS diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c index 536b574b6161..3f8e22615812 100644 --- a/arch/x86/entry/common.c +++ b/arch/x86/entry/common.c @@ -285,15 +285,16 @@ __visible void do_syscall_64(unsigned long nr, struct pt_regs *regs) if (READ_ONCE(ti->flags) & _TIF_WORK_SYSCALL_ENTRY) nr = syscall_trace_enter(regs); - /* - * NB: Native and x32 syscalls are dispatched from the same - * table. The only functional difference is the x32 bit in - * regs->orig_ax, which changes the behavior of some syscalls. - */ - nr &= __SYSCALL_MASK; if (likely(nr < NR_syscalls)) { nr = array_index_nospec(nr, NR_syscalls); regs->ax = sys_call_table[nr](regs); +#ifdef CONFIG_X86_X32_ABI + } else if (likely((nr & __X32_SYSCALL_BIT) && + (nr & ~__X32_SYSCALL_BIT) < X32_NR_syscalls)) { + nr = array_index_nospec(nr & ~__X32_SYSCALL_BIT, + X32_NR_syscalls); + regs->ax = x32_sys_call_table[nr](regs); +#endif } syscall_return_slowpath(regs); diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S index 4f86928246e7..f83ca5aa8b77 100644 --- a/arch/x86/entry/entry_32.S +++ b/arch/x86/entry/entry_32.S @@ -63,7 +63,7 @@ * enough to patch inline, increasing performance. */ -#ifdef CONFIG_PREEMPT +#ifdef CONFIG_PREEMPTION # define preempt_stop(clobbers) DISABLE_INTERRUPTS(clobbers); TRACE_IRQS_OFF #else # define preempt_stop(clobbers) @@ -1084,7 +1084,7 @@ restore_all: INTERRUPT_RETURN restore_all_kernel: -#ifdef CONFIG_PREEMPT +#ifdef CONFIG_PREEMPTION DISABLE_INTERRUPTS(CLBR_ANY) cmpl $0, PER_CPU_VAR(__preempt_count) jnz .Lno_preempt @@ -1364,7 +1364,7 @@ ENTRY(xen_hypervisor_callback) ENTRY(xen_do_upcall) 1: mov %esp, %eax call xen_evtchn_do_upcall -#ifndef CONFIG_PREEMPT +#ifndef CONFIG_PREEMPTION call xen_maybe_preempt_hcall #endif jmp ret_from_intr diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 3f5a978a02a7..b7c3ea4cb19d 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -519,7 +519,7 @@ ENTRY(interrupt_entry) testb $3, CS-ORIG_RAX+8(%rsp) jz 1f SWAPGS - + FENCE_SWAPGS_USER_ENTRY /* * Switch to the thread stack. The IRET frame and orig_ax are * on the stack, as well as the return address. RDI..R12 are @@ -549,8 +549,10 @@ ENTRY(interrupt_entry) UNWIND_HINT_FUNC movq (%rdi), %rdi + jmp 2f 1: - + FENCE_SWAPGS_KERNEL_ENTRY +2: PUSH_AND_CLEAR_REGS save_ret=1 ENCODE_FRAME_POINTER 8 @@ -662,7 +664,7 @@ GLOBAL(swapgs_restore_regs_and_return_to_usermode) /* Returning to kernel space */ retint_kernel: -#ifdef CONFIG_PREEMPT +#ifdef CONFIG_PREEMPTION /* Interrupts are off */ /* Check if we need preemption */ btl $9, EFLAGS(%rsp) /* were interrupts off? */ @@ -1056,10 +1058,10 @@ ENTRY(native_load_gs_index) ENDPROC(native_load_gs_index) EXPORT_SYMBOL(native_load_gs_index) - _ASM_EXTABLE(.Lgs_change, bad_gs) + _ASM_EXTABLE(.Lgs_change, .Lbad_gs) .section .fixup, "ax" /* running with kernelgs */ -bad_gs: +.Lbad_gs: SWAPGS /* switch back to user gs */ .macro ZAP_GS /* This can't be a string because the preprocessor needs to see it. */ @@ -1113,7 +1115,7 @@ ENTRY(xen_do_hypervisor_callback) /* do_hypervisor_callback(struct *pt_regs) */ call xen_evtchn_do_upcall LEAVE_IRQ_STACK -#ifndef CONFIG_PREEMPT +#ifndef CONFIG_PREEMPTION call xen_maybe_preempt_hcall #endif jmp error_exit @@ -1238,6 +1240,13 @@ ENTRY(paranoid_entry) */ SAVE_AND_SWITCH_TO_KERNEL_CR3 scratch_reg=%rax save_reg=%r14 + /* + * The above SAVE_AND_SWITCH_TO_KERNEL_CR3 macro doesn't do an + * unconditional CR3 write, even in the PTI case. So do an lfence + * to prevent GS speculation, regardless of whether PTI is enabled. + */ + FENCE_SWAPGS_KERNEL_ENTRY + ret END(paranoid_entry) @@ -1288,6 +1297,7 @@ ENTRY(error_entry) * from user mode due to an IRET fault. */ SWAPGS + FENCE_SWAPGS_USER_ENTRY /* We have user CR3. Change to kernel CR3. */ SWITCH_TO_KERNEL_CR3 scratch_reg=%rax @@ -1301,6 +1311,8 @@ ENTRY(error_entry) pushq %r12 ret +.Lerror_entry_done_lfence: + FENCE_SWAPGS_KERNEL_ENTRY .Lerror_entry_done: ret @@ -1318,7 +1330,7 @@ ENTRY(error_entry) cmpq %rax, RIP+8(%rsp) je .Lbstep_iret cmpq $.Lgs_change, RIP+8(%rsp) - jne .Lerror_entry_done + jne .Lerror_entry_done_lfence /* * hack: .Lgs_change can fail with user gsbase. If this happens, fix up @@ -1326,6 +1338,7 @@ ENTRY(error_entry) * .Lgs_change's error handler with kernel gsbase. */ SWAPGS + FENCE_SWAPGS_USER_ENTRY SWITCH_TO_KERNEL_CR3 scratch_reg=%rax jmp .Lerror_entry_done @@ -1340,6 +1353,7 @@ ENTRY(error_entry) * gsbase and CR3. Switch to kernel gsbase and CR3: */ SWAPGS + FENCE_SWAPGS_USER_ENTRY SWITCH_TO_KERNEL_CR3 scratch_reg=%rax /* @@ -1431,6 +1445,7 @@ ENTRY(nmi) swapgs cld + FENCE_SWAPGS_USER_ENTRY SWITCH_TO_KERNEL_CR3 scratch_reg=%rdx movq %rsp, %rdx movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp diff --git a/arch/x86/entry/syscall_64.c b/arch/x86/entry/syscall_64.c index d5252bc1e380..b1bf31713374 100644 --- a/arch/x86/entry/syscall_64.c +++ b/arch/x86/entry/syscall_64.c @@ -10,10 +10,13 @@ /* this is a lie, but it does not hurt as sys_ni_syscall just returns -EINVAL */ extern asmlinkage long sys_ni_syscall(const struct pt_regs *); #define __SYSCALL_64(nr, sym, qual) extern asmlinkage long sym(const struct pt_regs *); +#define __SYSCALL_X32(nr, sym, qual) __SYSCALL_64(nr, sym, qual) #include <asm/syscalls_64.h> #undef __SYSCALL_64 +#undef __SYSCALL_X32 #define __SYSCALL_64(nr, sym, qual) [nr] = sym, +#define __SYSCALL_X32(nr, sym, qual) asmlinkage const sys_call_ptr_t sys_call_table[__NR_syscall_max+1] = { /* @@ -23,3 +26,25 @@ asmlinkage const sys_call_ptr_t sys_call_table[__NR_syscall_max+1] = { [0 ... __NR_syscall_max] = &sys_ni_syscall, #include <asm/syscalls_64.h> }; + +#undef __SYSCALL_64 +#undef __SYSCALL_X32 + +#ifdef CONFIG_X86_X32_ABI + +#define __SYSCALL_64(nr, sym, qual) +#define __SYSCALL_X32(nr, sym, qual) [nr] = sym, + +asmlinkage const sys_call_ptr_t x32_sys_call_table[__NR_syscall_x32_max+1] = { + /* + * Smells like a compiler bug -- it doesn't work + * when the & below is removed. + */ + [0 ... __NR_syscall_x32_max] = &sys_ni_syscall, +#include <asm/syscalls_64.h> +}; + +#undef __SYSCALL_64 +#undef __SYSCALL_X32 + +#endif diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl index c00019abd076..3fe02546aed3 100644 --- a/arch/x86/entry/syscalls/syscall_32.tbl +++ b/arch/x86/entry/syscalls/syscall_32.tbl @@ -186,11 +186,11 @@ 172 i386 prctl sys_prctl __ia32_sys_prctl 173 i386 rt_sigreturn sys_rt_sigreturn sys32_rt_sigreturn 174 i386 rt_sigaction sys_rt_sigaction __ia32_compat_sys_rt_sigaction -175 i386 rt_sigprocmask sys_rt_sigprocmask __ia32_sys_rt_sigprocmask +175 i386 rt_sigprocmask sys_rt_sigprocmask __ia32_compat_sys_rt_sigprocmask 176 i386 rt_sigpending sys_rt_sigpending __ia32_compat_sys_rt_sigpending 177 i386 rt_sigtimedwait sys_rt_sigtimedwait_time32 __ia32_compat_sys_rt_sigtimedwait_time32 178 i386 rt_sigqueueinfo sys_rt_sigqueueinfo __ia32_compat_sys_rt_sigqueueinfo -179 i386 rt_sigsuspend sys_rt_sigsuspend __ia32_sys_rt_sigsuspend +179 i386 rt_sigsuspend sys_rt_sigsuspend __ia32_compat_sys_rt_sigsuspend 180 i386 pread64 sys_pread64 __ia32_compat_sys_x86_pread 181 i386 pwrite64 sys_pwrite64 __ia32_compat_sys_x86_pwrite 182 i386 chown sys_chown16 __ia32_sys_chown16 diff --git a/arch/x86/entry/syscalls/syscalltbl.sh b/arch/x86/entry/syscalls/syscalltbl.sh index 94fcd1951aca..1af2be39e7d9 100644 --- a/arch/x86/entry/syscalls/syscalltbl.sh +++ b/arch/x86/entry/syscalls/syscalltbl.sh @@ -1,13 +1,13 @@ -#!/bin/sh +#!/bin/bash # SPDX-License-Identifier: GPL-2.0 in="$1" out="$2" syscall_macro() { - abi="$1" - nr="$2" - entry="$3" + local abi="$1" + local nr="$2" + local entry="$3" # Entry can be either just a function name or "function/qualifier" real_entry="${entry%%/*}" @@ -21,14 +21,14 @@ syscall_macro() { } emit() { - abi="$1" - nr="$2" - entry="$3" - compat="$4" - umlentry="" + local abi="$1" + local nr="$2" + local entry="$3" + local compat="$4" + local umlentry="" - if [ "$abi" = "64" -a -n "$compat" ]; then - echo "a compat entry for a 64-bit syscall makes no sense" >&2 + if [ "$abi" != "I386" -a -n "$compat" ]; then + echo "a compat entry ($abi: $compat) for a 64-bit syscall makes no sense" >&2 exit 1 fi @@ -62,14 +62,17 @@ grep '^[0-9]' "$in" | sort -n | ( while read nr abi name entry compat; do abi=`echo "$abi" | tr '[a-z]' '[A-Z]'` if [ "$abi" = "COMMON" -o "$abi" = "64" ]; then - # COMMON is the same as 64, except that we don't expect X32 - # programs to use it. Our expectation has nothing to do with - # any generated code, so treat them the same. emit 64 "$nr" "$entry" "$compat" + if [ "$abi" = "COMMON" ]; then + # COMMON means that this syscall exists in the same form for + # 64-bit and X32. + echo "#ifdef CONFIG_X86_X32_ABI" + emit X32 "$nr" "$entry" "$compat" + echo "#endif" + fi elif [ "$abi" = "X32" ]; then - # X32 is equivalent to 64 on an X32-compatible kernel. echo "#ifdef CONFIG_X86_X32_ABI" - emit 64 "$nr" "$entry" "$compat" + emit X32 "$nr" "$entry" "$compat" echo "#endif" elif [ "$abi" = "I386" ]; then emit "$abi" "$nr" "$entry" "$compat" diff --git a/arch/x86/entry/thunk_32.S b/arch/x86/entry/thunk_32.S index cb3464525b37..2713490611a3 100644 --- a/arch/x86/entry/thunk_32.S +++ b/arch/x86/entry/thunk_32.S @@ -34,7 +34,7 @@ THUNK trace_hardirqs_off_thunk,trace_hardirqs_off_caller,1 #endif -#ifdef CONFIG_PREEMPT +#ifdef CONFIG_PREEMPTION THUNK ___preempt_schedule, preempt_schedule THUNK ___preempt_schedule_notrace, preempt_schedule_notrace EXPORT_SYMBOL(___preempt_schedule) diff --git a/arch/x86/entry/thunk_64.S b/arch/x86/entry/thunk_64.S index cc20465b2867..ea5c4167086c 100644 --- a/arch/x86/entry/thunk_64.S +++ b/arch/x86/entry/thunk_64.S @@ -46,7 +46,7 @@ THUNK lockdep_sys_exit_thunk,lockdep_sys_exit #endif -#ifdef CONFIG_PREEMPT +#ifdef CONFIG_PREEMPTION THUNK ___preempt_schedule, preempt_schedule THUNK ___preempt_schedule_notrace, preempt_schedule_notrace EXPORT_SYMBOL(___preempt_schedule) @@ -55,7 +55,7 @@ #if defined(CONFIG_TRACE_IRQFLAGS) \ || defined(CONFIG_DEBUG_LOCK_ALLOC) \ - || defined(CONFIG_PREEMPT) + || defined(CONFIG_PREEMPTION) .L_restore: popq %r11 popq %r10 diff --git a/arch/x86/entry/vdso/vma.c b/arch/x86/entry/vdso/vma.c index 349a61d8bf34..f5937742b290 100644 --- a/arch/x86/entry/vdso/vma.c +++ b/arch/x86/entry/vdso/vma.c @@ -122,7 +122,7 @@ static vm_fault_t vvar_fault(const struct vm_special_mapping *sm, if (tsc_pg && vclock_was_used(VCLOCK_HVCLOCK)) return vmf_insert_pfn(vma, vmf->address, - vmalloc_to_pfn(tsc_pg)); + virt_to_phys(tsc_pg) >> PAGE_SHIFT); } return VM_FAULT_SIGBUS; |