diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2018-10-24 13:22:39 +0300 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2018-10-24 13:22:39 +0300 |
commit | ba9f6f8954afa5224e3ed60332f7b92242b7ed0f (patch) | |
tree | e6513afc476231dc2242728ffbf51353936b46af /arch/x86 | |
parent | a978a5b8d83f795e107a2ff759b28643739be70e (diff) | |
parent | a36700589b85443e28170be59fa11c8a104130a5 (diff) | |
download | linux-ba9f6f8954afa5224e3ed60332f7b92242b7ed0f.tar.xz |
Merge branch 'siginfo-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/ebiederm/user-namespace
Pull siginfo updates from Eric Biederman:
"I have been slowly sorting out siginfo and this is the culmination of
that work.
The primary result is in several ways the signal infrastructure has
been made less error prone. The code has been updated so that manually
specifying SEND_SIG_FORCED is never necessary. The conversion to the
new siginfo sending functions is now complete, which makes it
difficult to send a signal without filling in the proper siginfo
fields.
At the tail end of the patchset comes the optimization of decreasing
the size of struct siginfo in the kernel from 128 bytes to about 48
bytes on 64bit. The fundamental observation that enables this is by
definition none of the known ways to use struct siginfo uses the extra
bytes.
This comes at the cost of a small user space observable difference.
For the rare case of siginfo being injected into the kernel only what
can be copied into kernel_siginfo is delivered to the destination, the
rest of the bytes are set to 0. For cases where the signal and the
si_code are known this is safe, because we know those bytes are not
used. For cases where the signal and si_code combination is unknown
the bits that won't fit into struct kernel_siginfo are tested to
verify they are zero, and the send fails if they are not.
I made an extensive search through userspace code and I could not find
anything that would break because of the above change. If it turns out
I did break something it will take just the revert of a single change
to restore kernel_siginfo to the same size as userspace siginfo.
Testing did reveal dependencies on preferring the signo passed to
sigqueueinfo over si->signo, so bit the bullet and added the
complexity necessary to handle that case.
Testing also revealed bad things can happen if a negative signal
number is passed into the system calls. Something no sane application
will do but something a malicious program or a fuzzer might do. So I
have fixed the code that performs the bounds checks to ensure negative
signal numbers are handled"
* 'siginfo-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/ebiederm/user-namespace: (80 commits)
signal: Guard against negative signal numbers in copy_siginfo_from_user32
signal: Guard against negative signal numbers in copy_siginfo_from_user
signal: In sigqueueinfo prefer sig not si_signo
signal: Use a smaller struct siginfo in the kernel
signal: Distinguish between kernel_siginfo and siginfo
signal: Introduce copy_siginfo_from_user and use it's return value
signal: Remove the need for __ARCH_SI_PREABLE_SIZE and SI_PAD_SIZE
signal: Fail sigqueueinfo if si_signo != sig
signal/sparc: Move EMT_TAGOVF into the generic siginfo.h
signal/unicore32: Use force_sig_fault where appropriate
signal/unicore32: Generate siginfo in ucs32_notify_die
signal/unicore32: Use send_sig_fault where appropriate
signal/arc: Use force_sig_fault where appropriate
signal/arc: Push siginfo generation into unhandled_exception
signal/ia64: Use force_sig_fault where appropriate
signal/ia64: Use the force_sig(SIGSEGV,...) in ia64_rt_sigreturn
signal/ia64: Use the generic force_sigsegv in setup_frame
signal/arm/kvm: Use send_sig_mceerr
signal/arm: Use send_sig_fault where appropriate
signal/arm: Use force_sig_fault where appropriate
...
Diffstat (limited to 'arch/x86')
-rw-r--r-- | arch/x86/entry/vsyscall/vsyscall_64.c | 9 | ||||
-rw-r--r-- | arch/x86/include/asm/compat.h | 2 | ||||
-rw-r--r-- | arch/x86/include/asm/mpx.h | 12 | ||||
-rw-r--r-- | arch/x86/include/asm/ptrace.h | 2 | ||||
-rw-r--r-- | arch/x86/include/asm/trace/mpx.h | 4 | ||||
-rw-r--r-- | arch/x86/include/uapi/asm/siginfo.h | 2 | ||||
-rw-r--r-- | arch/x86/kernel/ptrace.c | 29 | ||||
-rw-r--r-- | arch/x86/kernel/traps.c | 176 | ||||
-rw-r--r-- | arch/x86/kernel/umip.c | 8 | ||||
-rw-r--r-- | arch/x86/kernel/uprobes.c | 2 | ||||
-rw-r--r-- | arch/x86/kvm/mmu.c | 11 | ||||
-rw-r--r-- | arch/x86/mm/fault.c | 167 | ||||
-rw-r--r-- | arch/x86/mm/mpx.c | 30 |
13 files changed, 162 insertions, 292 deletions
diff --git a/arch/x86/entry/vsyscall/vsyscall_64.c b/arch/x86/entry/vsyscall/vsyscall_64.c index 82ed001e8909..85fd85d52ffd 100644 --- a/arch/x86/entry/vsyscall/vsyscall_64.c +++ b/arch/x86/entry/vsyscall/vsyscall_64.c @@ -100,20 +100,13 @@ static bool write_ok_or_segv(unsigned long ptr, size_t size) */ if (!access_ok(VERIFY_WRITE, (void __user *)ptr, size)) { - siginfo_t info; struct thread_struct *thread = ¤t->thread; thread->error_code = 6; /* user fault, no page, write */ thread->cr2 = ptr; thread->trap_nr = X86_TRAP_PF; - clear_siginfo(&info); - info.si_signo = SIGSEGV; - info.si_errno = 0; - info.si_code = SEGV_MAPERR; - info.si_addr = (void __user *)ptr; - - force_sig_info(SIGSEGV, &info, current); + force_sig_fault(SIGSEGV, SEGV_MAPERR, (void __user *)ptr, current); return false; } else { return true; diff --git a/arch/x86/include/asm/compat.h b/arch/x86/include/asm/compat.h index fb97cf7c4137..a0f46bdd9f24 100644 --- a/arch/x86/include/asm/compat.h +++ b/arch/x86/include/asm/compat.h @@ -240,6 +240,6 @@ static inline bool in_compat_syscall(void) struct compat_siginfo; int __copy_siginfo_to_user32(struct compat_siginfo __user *to, - const siginfo_t *from, bool x32_ABI); + const kernel_siginfo_t *from, bool x32_ABI); #endif /* _ASM_X86_COMPAT_H */ diff --git a/arch/x86/include/asm/mpx.h b/arch/x86/include/asm/mpx.h index 61eb4b63c5ec..d0b1434fb0b6 100644 --- a/arch/x86/include/asm/mpx.h +++ b/arch/x86/include/asm/mpx.h @@ -57,8 +57,14 @@ #define MPX_BNDCFG_ADDR_MASK (~((1UL<<MPX_BNDCFG_TAIL)-1)) #define MPX_BNDSTA_ERROR_CODE 0x3 +struct mpx_fault_info { + void __user *addr; + void __user *lower; + void __user *upper; +}; + #ifdef CONFIG_X86_INTEL_MPX -siginfo_t *mpx_generate_siginfo(struct pt_regs *regs); +int mpx_fault_info(struct mpx_fault_info *info, struct pt_regs *regs); int mpx_handle_bd_fault(void); static inline int kernel_managing_mpx_tables(struct mm_struct *mm) { @@ -78,9 +84,9 @@ void mpx_notify_unmap(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long mpx_unmapped_area_check(unsigned long addr, unsigned long len, unsigned long flags); #else -static inline siginfo_t *mpx_generate_siginfo(struct pt_regs *regs) +static inline int mpx_fault_info(struct mpx_fault_info *info, struct pt_regs *regs) { - return NULL; + return -EINVAL; } static inline int mpx_handle_bd_fault(void) { diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h index f236bcd5485d..143c99499531 100644 --- a/arch/x86/include/asm/ptrace.h +++ b/arch/x86/include/asm/ptrace.h @@ -293,7 +293,7 @@ static inline unsigned long regs_get_kernel_stack_nth(struct pt_regs *regs, #define arch_has_block_step() (boot_cpu_data.x86 >= 6) #endif -#define ARCH_HAS_USER_SINGLE_STEP_INFO +#define ARCH_HAS_USER_SINGLE_STEP_REPORT /* * When hitting ptrace_stop(), we cannot return using SYSRET because diff --git a/arch/x86/include/asm/trace/mpx.h b/arch/x86/include/asm/trace/mpx.h index 7bd92db09e8d..54133017267c 100644 --- a/arch/x86/include/asm/trace/mpx.h +++ b/arch/x86/include/asm/trace/mpx.h @@ -11,12 +11,12 @@ TRACE_EVENT(mpx_bounds_register_exception, - TP_PROTO(void *addr_referenced, + TP_PROTO(void __user *addr_referenced, const struct mpx_bndreg *bndreg), TP_ARGS(addr_referenced, bndreg), TP_STRUCT__entry( - __field(void *, addr_referenced) + __field(void __user *, addr_referenced) __field(u64, lower_bound) __field(u64, upper_bound) ), diff --git a/arch/x86/include/uapi/asm/siginfo.h b/arch/x86/include/uapi/asm/siginfo.h index b3d157957177..6642d8be40c4 100644 --- a/arch/x86/include/uapi/asm/siginfo.h +++ b/arch/x86/include/uapi/asm/siginfo.h @@ -7,8 +7,6 @@ typedef long long __kernel_si_clock_t __attribute__((aligned(4))); # define __ARCH_SI_CLOCK_T __kernel_si_clock_t # define __ARCH_SI_ATTRIBUTES __attribute__((aligned(8))) -# else /* x86-64 */ -# define __ARCH_SI_PREAMBLE_SIZE (4 * sizeof(int)) # endif #endif diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index d8f49c7384a3..ffae9b9740fd 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -1359,33 +1359,18 @@ const struct user_regset_view *task_user_regset_view(struct task_struct *task) #endif } -static void fill_sigtrap_info(struct task_struct *tsk, - struct pt_regs *regs, - int error_code, int si_code, - struct siginfo *info) +void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs, + int error_code, int si_code) { tsk->thread.trap_nr = X86_TRAP_DB; tsk->thread.error_code = error_code; - info->si_signo = SIGTRAP; - info->si_code = si_code; - info->si_addr = user_mode(regs) ? (void __user *)regs->ip : NULL; -} - -void user_single_step_siginfo(struct task_struct *tsk, - struct pt_regs *regs, - struct siginfo *info) -{ - fill_sigtrap_info(tsk, regs, 0, TRAP_BRKPT, info); + /* Send us the fake SIGTRAP */ + force_sig_fault(SIGTRAP, si_code, + user_mode(regs) ? (void __user *)regs->ip : NULL, tsk); } -void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs, - int error_code, int si_code) +void user_single_step_report(struct pt_regs *regs) { - struct siginfo info; - - clear_siginfo(&info); - fill_sigtrap_info(tsk, regs, error_code, si_code, &info); - /* Send us the fake SIGTRAP */ - force_sig_info(SIGTRAP, &info, tsk); + send_sigtrap(current, regs, 0, TRAP_BRKPT); } diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 5bd0a997d81e..8f6dcd88202e 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -189,7 +189,7 @@ int fixup_bug(struct pt_regs *regs, int trapnr) } static nokprobe_inline int -do_trap_no_signal(struct task_struct *tsk, int trapnr, char *str, +do_trap_no_signal(struct task_struct *tsk, int trapnr, const char *str, struct pt_regs *regs, long error_code) { if (v8086_mode(regs)) { @@ -202,10 +202,7 @@ do_trap_no_signal(struct task_struct *tsk, int trapnr, char *str, error_code, trapnr)) return 0; } - return -1; - } - - if (!user_mode(regs)) { + } else if (!user_mode(regs)) { if (fixup_exception(regs, trapnr, error_code, 0)) return 0; @@ -214,49 +211,6 @@ do_trap_no_signal(struct task_struct *tsk, int trapnr, char *str, die(str, regs, error_code); } - return -1; -} - -static siginfo_t *fill_trap_info(struct pt_regs *regs, int signr, int trapnr, - siginfo_t *info) -{ - unsigned long siaddr; - int sicode; - - switch (trapnr) { - default: - return SEND_SIG_PRIV; - - case X86_TRAP_DE: - sicode = FPE_INTDIV; - siaddr = uprobe_get_trap_addr(regs); - break; - case X86_TRAP_UD: - sicode = ILL_ILLOPN; - siaddr = uprobe_get_trap_addr(regs); - break; - case X86_TRAP_AC: - sicode = BUS_ADRALN; - siaddr = 0; - break; - } - - info->si_signo = signr; - info->si_errno = 0; - info->si_code = sicode; - info->si_addr = (void __user *)siaddr; - return info; -} - -static void -do_trap(int trapnr, int signr, char *str, struct pt_regs *regs, - long error_code, siginfo_t *info) -{ - struct task_struct *tsk = current; - - - if (!do_trap_no_signal(tsk, trapnr, str, regs, error_code)) - return; /* * We want error_code and trap_nr set for userspace faults and * kernelspace faults which result in die(), but not @@ -269,24 +223,45 @@ do_trap(int trapnr, int signr, char *str, struct pt_regs *regs, tsk->thread.error_code = error_code; tsk->thread.trap_nr = trapnr; + return -1; +} + +static void show_signal(struct task_struct *tsk, int signr, + const char *type, const char *desc, + struct pt_regs *regs, long error_code) +{ if (show_unhandled_signals && unhandled_signal(tsk, signr) && printk_ratelimit()) { - pr_info("%s[%d] trap %s ip:%lx sp:%lx error:%lx", - tsk->comm, tsk->pid, str, + pr_info("%s[%d] %s%s ip:%lx sp:%lx error:%lx", + tsk->comm, task_pid_nr(tsk), type, desc, regs->ip, regs->sp, error_code); print_vma_addr(KERN_CONT " in ", regs->ip); pr_cont("\n"); } +} + +static void +do_trap(int trapnr, int signr, char *str, struct pt_regs *regs, + long error_code, int sicode, void __user *addr) +{ + struct task_struct *tsk = current; + + + if (!do_trap_no_signal(tsk, trapnr, str, regs, error_code)) + return; + + show_signal(tsk, signr, "trap ", str, regs, error_code); - force_sig_info(signr, info ?: SEND_SIG_PRIV, tsk); + if (!sicode) + force_sig(signr, tsk); + else + force_sig_fault(signr, sicode, addr, tsk); } NOKPROBE_SYMBOL(do_trap); static void do_error_trap(struct pt_regs *regs, long error_code, char *str, - unsigned long trapnr, int signr) + unsigned long trapnr, int signr, int sicode, void __user *addr) { - siginfo_t info; - RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU"); /* @@ -299,26 +274,26 @@ static void do_error_trap(struct pt_regs *regs, long error_code, char *str, if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) != NOTIFY_STOP) { cond_local_irq_enable(regs); - clear_siginfo(&info); - do_trap(trapnr, signr, str, regs, error_code, - fill_trap_info(regs, signr, trapnr, &info)); + do_trap(trapnr, signr, str, regs, error_code, sicode, addr); } } -#define DO_ERROR(trapnr, signr, str, name) \ -dotraplinkage void do_##name(struct pt_regs *regs, long error_code) \ -{ \ - do_error_trap(regs, error_code, str, trapnr, signr); \ +#define IP ((void __user *)uprobe_get_trap_addr(regs)) +#define DO_ERROR(trapnr, signr, sicode, addr, str, name) \ +dotraplinkage void do_##name(struct pt_regs *regs, long error_code) \ +{ \ + do_error_trap(regs, error_code, str, trapnr, signr, sicode, addr); \ } -DO_ERROR(X86_TRAP_DE, SIGFPE, "divide error", divide_error) -DO_ERROR(X86_TRAP_OF, SIGSEGV, "overflow", overflow) -DO_ERROR(X86_TRAP_UD, SIGILL, "invalid opcode", invalid_op) -DO_ERROR(X86_TRAP_OLD_MF, SIGFPE, "coprocessor segment overrun",coprocessor_segment_overrun) -DO_ERROR(X86_TRAP_TS, SIGSEGV, "invalid TSS", invalid_TSS) -DO_ERROR(X86_TRAP_NP, SIGBUS, "segment not present", segment_not_present) -DO_ERROR(X86_TRAP_SS, SIGBUS, "stack segment", stack_segment) -DO_ERROR(X86_TRAP_AC, SIGBUS, "alignment check", alignment_check) +DO_ERROR(X86_TRAP_DE, SIGFPE, FPE_INTDIV, IP, "divide error", divide_error) +DO_ERROR(X86_TRAP_OF, SIGSEGV, 0, NULL, "overflow", overflow) +DO_ERROR(X86_TRAP_UD, SIGILL, ILL_ILLOPN, IP, "invalid opcode", invalid_op) +DO_ERROR(X86_TRAP_OLD_MF, SIGFPE, 0, NULL, "coprocessor segment overrun", coprocessor_segment_overrun) +DO_ERROR(X86_TRAP_TS, SIGSEGV, 0, NULL, "invalid TSS", invalid_TSS) +DO_ERROR(X86_TRAP_NP, SIGBUS, 0, NULL, "segment not present", segment_not_present) +DO_ERROR(X86_TRAP_SS, SIGBUS, 0, NULL, "stack segment", stack_segment) +DO_ERROR(X86_TRAP_AC, SIGBUS, BUS_ADRALN, NULL, "alignment check", alignment_check) +#undef IP #ifdef CONFIG_VMAP_STACK __visible void __noreturn handle_stack_overflow(const char *message, @@ -459,7 +434,6 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code) dotraplinkage void do_bounds(struct pt_regs *regs, long error_code) { const struct mpx_bndcsr *bndcsr; - siginfo_t *info; RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU"); if (notify_die(DIE_TRAP, "bounds", regs, error_code, @@ -497,8 +471,11 @@ dotraplinkage void do_bounds(struct pt_regs *regs, long error_code) goto exit_trap; break; /* Success, it was handled */ case 1: /* Bound violation. */ - info = mpx_generate_siginfo(regs); - if (IS_ERR(info)) { + { + struct task_struct *tsk = current; + struct mpx_fault_info mpx; + + if (mpx_fault_info(&mpx, regs)) { /* * We failed to decode the MPX instruction. Act as if * the exception was not caused by MPX. @@ -507,14 +484,20 @@ dotraplinkage void do_bounds(struct pt_regs *regs, long error_code) } /* * Success, we decoded the instruction and retrieved - * an 'info' containing the address being accessed + * an 'mpx' containing the address being accessed * which caused the exception. This information * allows and application to possibly handle the * #BR exception itself. */ - do_trap(X86_TRAP_BR, SIGSEGV, "bounds", regs, error_code, info); - kfree(info); + if (!do_trap_no_signal(tsk, X86_TRAP_BR, "bounds", regs, + error_code)) + break; + + show_signal(tsk, SIGSEGV, "trap ", "bounds", regs, error_code); + + force_sig_bnderr(mpx.addr, mpx.lower, mpx.upper); break; + } case 0: /* No exception caused by Intel MPX operations. */ goto exit_trap; default: @@ -531,12 +514,13 @@ exit_trap: * up here if the kernel has MPX turned off at compile * time.. */ - do_trap(X86_TRAP_BR, SIGSEGV, "bounds", regs, error_code, NULL); + do_trap(X86_TRAP_BR, SIGSEGV, "bounds", regs, error_code, 0, NULL); } dotraplinkage void do_general_protection(struct pt_regs *regs, long error_code) { + const char *desc = "general protection fault"; struct task_struct *tsk; RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU"); @@ -570,25 +554,18 @@ do_general_protection(struct pt_regs *regs, long error_code) kprobe_fault_handler(regs, X86_TRAP_GP)) return; - if (notify_die(DIE_GPF, "general protection fault", regs, error_code, + if (notify_die(DIE_GPF, desc, regs, error_code, X86_TRAP_GP, SIGSEGV) != NOTIFY_STOP) - die("general protection fault", regs, error_code); + die(desc, regs, error_code); return; } tsk->thread.error_code = error_code; tsk->thread.trap_nr = X86_TRAP_GP; - if (show_unhandled_signals && unhandled_signal(tsk, SIGSEGV) && - printk_ratelimit()) { - pr_info("%s[%d] general protection ip:%lx sp:%lx error:%lx", - tsk->comm, task_pid_nr(tsk), - regs->ip, regs->sp, error_code); - print_vma_addr(KERN_CONT " in ", regs->ip); - pr_cont("\n"); - } + show_signal(tsk, SIGSEGV, "", desc, regs, error_code); - force_sig_info(SIGSEGV, SEND_SIG_PRIV, tsk); + force_sig(SIGSEGV, tsk); } NOKPROBE_SYMBOL(do_general_protection); @@ -631,7 +608,7 @@ dotraplinkage void notrace do_int3(struct pt_regs *regs, long error_code) goto exit; cond_local_irq_enable(regs); - do_trap(X86_TRAP_BP, SIGTRAP, "int3", regs, error_code, NULL); + do_trap(X86_TRAP_BP, SIGTRAP, "int3", regs, error_code, 0, NULL); cond_local_irq_disable(regs); exit: @@ -845,7 +822,7 @@ static void math_error(struct pt_regs *regs, int error_code, int trapnr) { struct task_struct *task = current; struct fpu *fpu = &task->thread.fpu; - siginfo_t info; + int si_code; char *str = (trapnr == X86_TRAP_MF) ? "fpu exception" : "simd exception"; @@ -871,18 +848,14 @@ static void math_error(struct pt_regs *regs, int error_code, int trapnr) task->thread.trap_nr = trapnr; task->thread.error_code = error_code; - clear_siginfo(&info); - info.si_signo = SIGFPE; - info.si_errno = 0; - info.si_addr = (void __user *)uprobe_get_trap_addr(regs); - - info.si_code = fpu__exception_code(fpu, trapnr); + si_code = fpu__exception_code(fpu, trapnr); /* Retry when we get spurious exceptions: */ - if (!info.si_code) + if (!si_code) return; - force_sig_info(SIGFPE, &info, task); + force_sig_fault(SIGFPE, si_code, + (void __user *)uprobe_get_trap_addr(regs), task); } dotraplinkage void do_coprocessor_error(struct pt_regs *regs, long error_code) @@ -942,20 +915,13 @@ NOKPROBE_SYMBOL(do_device_not_available); #ifdef CONFIG_X86_32 dotraplinkage void do_iret_error(struct pt_regs *regs, long error_code) { - siginfo_t info; - RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU"); local_irq_enable(); - clear_siginfo(&info); - info.si_signo = SIGILL; - info.si_errno = 0; - info.si_code = ILL_BADSTK; - info.si_addr = NULL; if (notify_die(DIE_TRAP, "iret exception", regs, error_code, X86_TRAP_IRET, SIGILL) != NOTIFY_STOP) { do_trap(X86_TRAP_IRET, SIGILL, "iret exception", regs, error_code, - &info); + ILL_BADSTK, (void __user *)NULL); } } #endif diff --git a/arch/x86/kernel/umip.c b/arch/x86/kernel/umip.c index ff20b35e98dd..f8f3cfda01ae 100644 --- a/arch/x86/kernel/umip.c +++ b/arch/x86/kernel/umip.c @@ -271,19 +271,13 @@ static int emulate_umip_insn(struct insn *insn, int umip_inst, */ static void force_sig_info_umip_fault(void __user *addr, struct pt_regs *regs) { - siginfo_t info; struct task_struct *tsk = current; tsk->thread.cr2 = (unsigned long)addr; tsk->thread.error_code = X86_PF_USER | X86_PF_WRITE; tsk->thread.trap_nr = X86_TRAP_PF; - clear_siginfo(&info); - info.si_signo = SIGSEGV; - info.si_errno = 0; - info.si_code = SEGV_MAPERR; - info.si_addr = addr; - force_sig_info(SIGSEGV, &info, tsk); + force_sig_fault(SIGSEGV, SEGV_MAPERR, addr, tsk); if (!(show_unhandled_signals && unhandled_signal(tsk, SIGSEGV))) return; diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c index deb576b23b7c..843feb94a950 100644 --- a/arch/x86/kernel/uprobes.c +++ b/arch/x86/kernel/uprobes.c @@ -1086,7 +1086,7 @@ arch_uretprobe_hijack_return_addr(unsigned long trampoline_vaddr, struct pt_regs pr_err("return address clobbered: pid=%d, %%sp=%#lx, %%ip=%#lx\n", current->pid, regs->sp, regs->ip); - force_sig_info(SIGSEGV, SEND_SIG_FORCED, current); + force_sig(SIGSEGV, current); } return -1; diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 51b953ad9d4e..e843ec46609d 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -3125,16 +3125,7 @@ static int __direct_map(struct kvm_vcpu *vcpu, int write, int map_writable, static void kvm_send_hwpoison_signal(unsigned long address, struct task_struct *tsk) { - siginfo_t info; - - clear_siginfo(&info); - info.si_signo = SIGBUS; - info.si_errno = 0; - info.si_code = BUS_MCEERR_AR; - info.si_addr = (void __user *)address; - info.si_addr_lsb = PAGE_SHIFT; - - send_sig_info(SIGBUS, &info, tsk); + send_sig_mceerr(BUS_MCEERR_AR, (void __user *)address, PAGE_SHIFT, tsk); } static int kvm_handle_bad_page(struct kvm_vcpu *vcpu, gfn_t gfn, kvm_pfn_t pfn) diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 2b1519bc5381..b24eb4eb9984 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -157,79 +157,6 @@ is_prefetch(struct pt_regs *regs, unsigned long error_code, unsigned long addr) return prefetch; } -/* - * A protection key fault means that the PKRU value did not allow - * access to some PTE. Userspace can figure out what PKRU was - * from the XSAVE state, and this function fills out a field in - * siginfo so userspace can discover which protection key was set - * on the PTE. - * - * If we get here, we know that the hardware signaled a X86_PF_PK - * fault and that there was a VMA once we got in the fault - * handler. It does *not* guarantee that the VMA we find here - * was the one that we faulted on. - * - * 1. T1 : mprotect_key(foo, PAGE_SIZE, pkey=4); - * 2. T1 : set PKRU to deny access to pkey=4, touches page - * 3. T1 : faults... - * 4. T2: mprotect_key(foo, PAGE_SIZE, pkey=5); - * 5. T1 : enters fault handler, takes mmap_sem, etc... - * 6. T1 : reaches here, sees vma_pkey(vma)=5, when we really - * faulted on a pte with its pkey=4. - */ -static void fill_sig_info_pkey(int si_signo, int si_code, siginfo_t *info, - u32 *pkey) -{ - /* This is effectively an #ifdef */ - if (!boot_cpu_has(X86_FEATURE_OSPKE)) - return; - - /* Fault not from Protection Keys: nothing to do */ - if ((si_code != SEGV_PKUERR) || (si_signo != SIGSEGV)) - return; - /* - * force_sig_info_fault() is called from a number of - * contexts, some of which have a VMA and some of which - * do not. The X86_PF_PK handing happens after we have a - * valid VMA, so we should never reach this without a - * valid VMA. - */ - if (!pkey) { - WARN_ONCE(1, "PKU fault with no VMA passed in"); - info->si_pkey = 0; - return; - } - /* - * si_pkey should be thought of as a strong hint, but not - * absolutely guranteed to be 100% accurate because of - * the race explained above. - */ - info->si_pkey = *pkey; -} - -static void -force_sig_info_fault(int si_signo, int si_code, unsigned long address, - struct task_struct *tsk, u32 *pkey, int fault) -{ - unsigned lsb = 0; - siginfo_t info; - - clear_siginfo(&info); - info.si_signo = si_signo; - info.si_errno = 0; - info.si_code = si_code; - info.si_addr = (void __user *)address; - if (fault & VM_FAULT_HWPOISON_LARGE) - lsb = hstate_index_to_shift(VM_FAULT_GET_HINDEX(fault)); - if (fault & VM_FAULT_HWPOISON) - lsb = PAGE_SHIFT; - info.si_addr_lsb = lsb; - - fill_sig_info_pkey(si_signo, si_code, &info, pkey); - - force_sig_info(si_signo, &info, tsk); -} - DEFINE_SPINLOCK(pgd_lock); LIST_HEAD(pgd_list); @@ -734,8 +661,8 @@ no_context(struct pt_regs *regs, unsigned long error_code, tsk->thread.cr2 = address; /* XXX: hwpoison faults will set the wrong code. */ - force_sig_info_fault(signal, si_code, address, - tsk, NULL, 0); + force_sig_fault(signal, si_code, (void __user *)address, + tsk); } /* @@ -862,7 +789,7 @@ static bool is_vsyscall_vaddr(unsigned long vaddr) static void __bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code, - unsigned long address, u32 *pkey, int si_code) + unsigned long address, u32 pkey, int si_code) { struct task_struct *tsk = current; @@ -898,7 +825,10 @@ __bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code, tsk->thread.error_code = error_code; tsk->thread.trap_nr = X86_TRAP_PF; - force_sig_info_fault(SIGSEGV, si_code, address, tsk, pkey, 0); + if (si_code == SEGV_PKUERR) + force_sig_pkuerr((void __user *)address, pkey); + + force_sig_fault(SIGSEGV, si_code, (void __user *)address, tsk); return; } @@ -911,35 +841,29 @@ __bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code, static noinline void bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code, - unsigned long address, u32 *pkey) + unsigned long address) { - __bad_area_nosemaphore(regs, error_code, address, pkey, SEGV_MAPERR); + __bad_area_nosemaphore(regs, error_code, address, 0, SEGV_MAPERR); } static void __bad_area(struct pt_regs *regs, unsigned long error_code, - unsigned long address, struct vm_area_struct *vma, int si_code) + unsigned long address, u32 pkey, int si_code) { struct mm_struct *mm = current->mm; - u32 pkey; - - if (vma) - pkey = vma_pkey(vma); - /* * Something tried to access memory that isn't in our memory map.. * Fix it, but check if it's kernel or user first.. */ up_read(&mm->mmap_sem); - __bad_area_nosemaphore(regs, error_code, address, - (vma) ? &pkey : NULL, si_code); + __bad_area_nosemaphore(regs, error_code, address, pkey, si_code); } static noinline void bad_area(struct pt_regs *regs, unsigned long error_code, unsigned long address) { - __bad_area(regs, error_code, address, NULL, SEGV_MAPERR); + __bad_area(regs, error_code, address, 0, SEGV_MAPERR); } static inline bool bad_area_access_from_pkeys(unsigned long error_code, @@ -968,18 +892,40 @@ bad_area_access_error(struct pt_regs *regs, unsigned long error_code, * But, doing it this way allows compiler optimizations * if pkeys are compiled out. */ - if (bad_area_access_from_pkeys(error_code, vma)) - __bad_area(regs, error_code, address, vma, SEGV_PKUERR); - else - __bad_area(regs, error_code, address, vma, SEGV_ACCERR); + if (bad_area_access_from_pkeys(error_code, vma)) { + /* + * A protection key fault means that the PKRU value did not allow + * access to some PTE. Userspace can figure out what PKRU was + * from the XSAVE state. This function captures the pkey from + * the vma and passes it to userspace so userspace can discover + * which protection key was set on the PTE. + * + * If we get here, we know that the hardware signaled a X86_PF_PK + * fault and that there was a VMA once we got in the fault + * handler. It does *not* guarantee that the VMA we find here + * was the one that we faulted on. + * + * 1. T1 : mprotect_key(foo, PAGE_SIZE, pkey=4); + * 2. T1 : set PKRU to deny access to pkey=4, touches page + * 3. T1 : faults... + * 4. T2: mprotect_key(foo, PAGE_SIZE, pkey=5); + * 5. T1 : enters fault handler, takes mmap_sem, etc... + * 6. T1 : reaches here, sees vma_pkey(vma)=5, when we really + * faulted on a pte with its pkey=4. + */ + u32 pkey = vma_pkey(vma); + + __bad_area(regs, error_code, address, pkey, SEGV_PKUERR); + } else { + __bad_area(regs, error_code, address, 0, SEGV_ACCERR); + } } static void do_sigbus(struct pt_regs *regs, unsigned long error_code, unsigned long address, - u32 *pkey, unsigned int fault) + unsigned int fault) { struct task_struct *tsk = current; - int code = BUS_ADRERR; /* Kernel mode? Handle exceptions or die: */ if (!(error_code & X86_PF_USER)) { @@ -997,18 +943,25 @@ do_sigbus(struct pt_regs *regs, unsigned long error_code, unsigned long address, #ifdef CONFIG_MEMORY_FAILURE if (fault & (VM_FAULT_HWPOISON|VM_FAULT_HWPOISON_LARGE)) { - printk(KERN_ERR + unsigned lsb = 0; + + pr_err( "MCE: Killing %s:%d due to hardware memory corruption fault at %lx\n", tsk->comm, tsk->pid, address); - code = BUS_MCEERR_AR; + if (fault & VM_FAULT_HWPOISON_LARGE) + lsb = hstate_index_to_shift(VM_FAULT_GET_HINDEX(fault)); + if (fault & VM_FAULT_HWPOISON) + lsb = PAGE_SHIFT; + force_sig_mceerr(BUS_MCEERR_AR, (void __user *)address, lsb, tsk); + return; } #endif - force_sig_info_fault(SIGBUS, code, address, tsk, pkey, fault); + force_sig_fault(SIGBUS, BUS_ADRERR, (void __user *)address, tsk); } static noinline void mm_fault_error(struct pt_regs *regs, unsigned long error_code, - unsigned long address, u32 *pkey, vm_fault_t fault) + unsigned long address, vm_fault_t fault) { if (fatal_signal_pending(current) && !(error_code & X86_PF_USER)) { no_context(regs, error_code, address, 0, 0); @@ -1032,9 +985,9 @@ mm_fault_error(struct pt_regs *regs, unsigned long error_code, } else { if (fault & (VM_FAULT_SIGBUS|VM_FAULT_HWPOISON| VM_FAULT_HWPOISON_LARGE)) - do_sigbus(regs, error_code, address, pkey, fault); + do_sigbus(regs, error_code, address, fault); else if (fault & VM_FAULT_SIGSEGV) - bad_area_nosemaphore(regs, error_code, address, pkey); + bad_area_nosemaphore(regs, error_code, address); else BUG(); } @@ -1267,7 +1220,7 @@ do_kern_addr_fault(struct pt_regs *regs, unsigned long hw_error_code, * Don't take the mm semaphore here. If we fixup a prefetch * fault we could otherwise deadlock: */ - bad_area_nosemaphore(regs, hw_error_code, address, NULL); + bad_area_nosemaphore(regs, hw_error_code, address); } NOKPROBE_SYMBOL(do_kern_addr_fault); @@ -1283,7 +1236,6 @@ void do_user_addr_fault(struct pt_regs *regs, struct mm_struct *mm; vm_fault_t fault, major = 0; unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE; - u32 pkey; tsk = current; mm = tsk->mm; @@ -1304,7 +1256,7 @@ void do_user_addr_fault(struct pt_regs *regs, * pages in the user address space. */ if (unlikely(smap_violation(hw_error_code, regs))) { - bad_area_nosemaphore(regs, hw_error_code, address, NULL); + bad_area_nosemaphore(regs, hw_error_code, address); return; } @@ -1313,7 +1265,7 @@ void do_user_addr_fault(struct pt_regs *regs, * in a region with pagefaults disabled then we must not take the fault */ if (unlikely(faulthandler_disabled() || !mm)) { - bad_area_nosemaphore(regs, hw_error_code, address, NULL); + bad_area_nosemaphore(regs, hw_error_code, address); return; } @@ -1403,7 +1355,7 @@ void do_user_addr_fault(struct pt_regs *regs, * Fault from code in kernel from * which we do not expect faults. */ - bad_area_nosemaphore(regs, sw_error_code, address, NULL); + bad_area_nosemaphore(regs, sw_error_code, address); return; } retry: @@ -1467,10 +1419,7 @@ good_area: * (potentially after handling any pending signal during the return to * userland). The return to userland is identified whenever * FAULT_FLAG_USER|FAULT_FLAG_KILLABLE are both set in flags. - * Thus we have to be careful about not touching vma after handling the - * fault, so we read the pkey beforehand. */ - pkey = vma_pkey(vma); fault = handle_mm_fault(vma, address, flags); major |= fault & VM_FAULT_MAJOR; @@ -1499,7 +1448,7 @@ good_area: up_read(&mm->mmap_sem); if (unlikely(fault & VM_FAULT_ERROR)) { - mm_fault_error(regs, sw_error_code, address, &pkey, fault); + mm_fault_error(regs, sw_error_code, address, fault); return; } diff --git a/arch/x86/mm/mpx.c b/arch/x86/mm/mpx.c index e500949bae24..2385538e8065 100644 --- a/arch/x86/mm/mpx.c +++ b/arch/x86/mm/mpx.c @@ -118,14 +118,11 @@ bad_opcode: * anything it wants in to the instructions. We can not * trust anything about it. They might not be valid * instructions or might encode invalid registers, etc... - * - * The caller is expected to kfree() the returned siginfo_t. */ -siginfo_t *mpx_generate_siginfo(struct pt_regs *regs) +int mpx_fault_info(struct mpx_fault_info *info, struct pt_regs *regs) { const struct mpx_bndreg_state *bndregs; const struct mpx_bndreg *bndreg; - siginfo_t *info = NULL; struct insn insn; uint8_t bndregno; int err; @@ -153,11 +150,6 @@ siginfo_t *mpx_generate_siginfo(struct pt_regs *regs) /* now go select the individual register in the set of 4 */ bndreg = &bndregs->bndreg[bndregno]; - info = kzalloc(sizeof(*info), GFP_KERNEL); - if (!info) { - err = -ENOMEM; - goto err_out; - } /* * The registers are always 64-bit, but the upper 32 * bits are ignored in 32-bit mode. Also, note that the @@ -168,27 +160,23 @@ siginfo_t *mpx_generate_siginfo(struct pt_regs *regs) * complains when casting from integers to different-size * pointers. */ - info->si_lower = (void __user *)(unsigned long)bndreg->lower_bound; - info->si_upper = (void __user *)(unsigned long)~bndreg->upper_bound; - info->si_addr_lsb = 0; - info->si_signo = SIGSEGV; - info->si_errno = 0; - info->si_code = SEGV_BNDERR; - info->si_addr = insn_get_addr_ref(&insn, regs); + info->lower = (void __user *)(unsigned long)bndreg->lower_bound; + info->upper = (void __user *)(unsigned long)~bndreg->upper_bound; + info->addr = insn_get_addr_ref(&insn, regs); + /* * We were not able to extract an address from the instruction, * probably because there was something invalid in it. */ - if (info->si_addr == (void __user *)-1) { + if (info->addr == (void __user *)-1) { err = -EINVAL; goto err_out; } - trace_mpx_bounds_register_exception(info->si_addr, bndreg); - return info; + trace_mpx_bounds_register_exception(info->addr, bndreg); + return 0; err_out: /* info might be NULL, but kfree() handles that */ - kfree(info); - return ERR_PTR(err); + return err; } static __user void *mpx_get_bounds_dir(void) |