From af085d9084b48530153f51e6cad19fd0b1a13ed7 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Mon, 13 Feb 2017 19:42:28 -0600 Subject: stacktrace/x86: add function for detecting reliable stack traces For live patching and possibly other use cases, a stack trace is only useful if it can be assured that it's completely reliable. Add a new save_stack_trace_tsk_reliable() function to achieve that. Note that if the target task isn't the current task, and the target task is allowed to run, then it could be writing the stack while the unwinder is reading it, resulting in possible corruption. So the caller of save_stack_trace_tsk_reliable() must ensure that the task is either 'current' or inactive. save_stack_trace_tsk_reliable() relies on the x86 unwinder's detection of pt_regs on the stack. If the pt_regs are not user-mode registers from a syscall, then they indicate an in-kernel interrupt or exception (e.g. preemption or a page fault), in which case the stack is considered unreliable due to the nature of frame pointers. It also relies on the x86 unwinder's detection of other issues, such as: - corrupted stack data - stack grows the wrong way - stack walk doesn't reach the bottom - user didn't provide a large enough entries array Such issues are reported by checking unwind_error() and !unwind_done(). Also add CONFIG_HAVE_RELIABLE_STACKTRACE so arch-independent code can determine at build time whether the function is implemented. Signed-off-by: Josh Poimboeuf Reviewed-by: Miroslav Benes Acked-by: Ingo Molnar # for the x86 changes Signed-off-by: Jiri Kosina --- arch/x86/Kconfig | 1 + arch/x86/include/asm/unwind.h | 6 +++ arch/x86/kernel/stacktrace.c | 96 +++++++++++++++++++++++++++++++++++++++++- arch/x86/kernel/unwind_frame.c | 2 + 4 files changed, 104 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index cc98d5a294ee..2a26852c11b6 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -160,6 +160,7 @@ config X86 select HAVE_PERF_REGS select HAVE_PERF_USER_STACK_DUMP select HAVE_REGS_AND_STACK_ACCESS_API + select HAVE_RELIABLE_STACKTRACE if X86_64 && FRAME_POINTER && STACK_VALIDATION select HAVE_STACK_VALIDATION if X86_64 select HAVE_SYSCALL_TRACEPOINTS select HAVE_UNSTABLE_SCHED_CLOCK diff --git a/arch/x86/include/asm/unwind.h b/arch/x86/include/asm/unwind.h index 6fa75b17aec3..137e9cce2ab4 100644 --- a/arch/x86/include/asm/unwind.h +++ b/arch/x86/include/asm/unwind.h @@ -11,6 +11,7 @@ struct unwind_state { unsigned long stack_mask; struct task_struct *task; int graph_idx; + bool error; #ifdef CONFIG_FRAME_POINTER unsigned long *bp, *orig_sp; struct pt_regs *regs; @@ -40,6 +41,11 @@ void unwind_start(struct unwind_state *state, struct task_struct *task, __unwind_start(state, task, regs, first_frame); } +static inline bool unwind_error(struct unwind_state *state) +{ + return state->error; +} + #ifdef CONFIG_FRAME_POINTER static inline diff --git a/arch/x86/kernel/stacktrace.c b/arch/x86/kernel/stacktrace.c index 8e2b79b88e51..8dabd7bf1673 100644 --- a/arch/x86/kernel/stacktrace.c +++ b/arch/x86/kernel/stacktrace.c @@ -76,6 +76,101 @@ void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace) } EXPORT_SYMBOL_GPL(save_stack_trace_tsk); +#ifdef CONFIG_HAVE_RELIABLE_STACKTRACE + +#define STACKTRACE_DUMP_ONCE(task) ({ \ + static bool __section(.data.unlikely) __dumped; \ + \ + if (!__dumped) { \ + __dumped = true; \ + WARN_ON(1); \ + show_stack(task, NULL); \ + } \ +}) + +static int __save_stack_trace_reliable(struct stack_trace *trace, + struct task_struct *task) +{ + struct unwind_state state; + struct pt_regs *regs; + unsigned long addr; + + for (unwind_start(&state, task, NULL, NULL); !unwind_done(&state); + unwind_next_frame(&state)) { + + regs = unwind_get_entry_regs(&state); + if (regs) { + /* + * Kernel mode registers on the stack indicate an + * in-kernel interrupt or exception (e.g., preemption + * or a page fault), which can make frame pointers + * unreliable. + */ + if (!user_mode(regs)) + return -EINVAL; + + /* + * The last frame contains the user mode syscall + * pt_regs. Skip it and finish the unwind. + */ + unwind_next_frame(&state); + if (!unwind_done(&state)) { + STACKTRACE_DUMP_ONCE(task); + return -EINVAL; + } + break; + } + + addr = unwind_get_return_address(&state); + + /* + * A NULL or invalid return address probably means there's some + * generated code which __kernel_text_address() doesn't know + * about. + */ + if (!addr) { + STACKTRACE_DUMP_ONCE(task); + return -EINVAL; + } + + if (save_stack_address(trace, addr, false)) + return -EINVAL; + } + + /* Check for stack corruption */ + if (unwind_error(&state)) { + STACKTRACE_DUMP_ONCE(task); + return -EINVAL; + } + + if (trace->nr_entries < trace->max_entries) + trace->entries[trace->nr_entries++] = ULONG_MAX; + + return 0; +} + +/* + * This function returns an error if it detects any unreliable features of the + * stack. Otherwise it guarantees that the stack trace is reliable. + * + * If the task is not 'current', the caller *must* ensure the task is inactive. + */ +int save_stack_trace_tsk_reliable(struct task_struct *tsk, + struct stack_trace *trace) +{ + int ret; + + if (!try_get_task_stack(tsk)) + return -EINVAL; + + ret = __save_stack_trace_reliable(trace, tsk); + + put_task_stack(tsk); + + return ret; +} +#endif /* CONFIG_HAVE_RELIABLE_STACKTRACE */ + /* Userspace stacktrace - based on kernel/trace/trace_sysprof.c */ struct stack_frame_user { @@ -138,4 +233,3 @@ void save_stack_trace_user(struct stack_trace *trace) if (trace->nr_entries < trace->max_entries) trace->entries[trace->nr_entries++] = ULONG_MAX; } - diff --git a/arch/x86/kernel/unwind_frame.c b/arch/x86/kernel/unwind_frame.c index 478d15dbaee4..5ed43910e04b 100644 --- a/arch/x86/kernel/unwind_frame.c +++ b/arch/x86/kernel/unwind_frame.c @@ -225,6 +225,8 @@ bool unwind_next_frame(struct unwind_state *state) return true; bad_address: + state->error = true; + /* * When unwinding a non-current task, the task might actually be * running on another CPU, in which case it could be modifying its -- cgit v1.2.3 From 3a404842547c92e71127870a613319a29cdebe49 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Mon, 13 Feb 2017 19:42:29 -0600 Subject: x86/entry: define _TIF_ALLWORK_MASK flags explicitly The _TIF_ALLWORK_MASK macro automatically includes the least-significant 16 bits of the thread_info flags, which is less than obvious and tends to create confusion and surprises when reading or modifying the code. Define the flags explicitly. Signed-off-by: Josh Poimboeuf Reviewed-by: Petr Mladek Reviewed-by: Miroslav Benes Reviewed-by: Kamalesh Babulal Acked-by: Ingo Molnar # for the x86 changes Signed-off-by: Jiri Kosina --- arch/x86/include/asm/thread_info.h | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index ad6f5eb07a95..207d0d981287 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -73,9 +73,6 @@ struct thread_info { * thread information flags * - these are process state flags that various assembly files * may need to access - * - pending work-to-be-done flags are in LSW - * - other flags in MSW - * Warning: layout of LSW is hardcoded in entry.S */ #define TIF_SYSCALL_TRACE 0 /* syscall trace active */ #define TIF_NOTIFY_RESUME 1 /* callback before returning to user */ @@ -103,8 +100,8 @@ struct thread_info { #define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE) #define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME) #define _TIF_SIGPENDING (1 << TIF_SIGPENDING) -#define _TIF_SINGLESTEP (1 << TIF_SINGLESTEP) #define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED) +#define _TIF_SINGLESTEP (1 << TIF_SINGLESTEP) #define _TIF_SYSCALL_EMU (1 << TIF_SYSCALL_EMU) #define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT) #define _TIF_SECCOMP (1 << TIF_SECCOMP) @@ -133,8 +130,10 @@ struct thread_info { /* work to do on any return to user space */ #define _TIF_ALLWORK_MASK \ - ((0x0000FFFF & ~_TIF_SECCOMP) | _TIF_SYSCALL_TRACEPOINT | \ - _TIF_NOHZ) + (_TIF_SYSCALL_TRACE | _TIF_NOTIFY_RESUME | _TIF_SIGPENDING | \ + _TIF_NEED_RESCHED | _TIF_SINGLESTEP | _TIF_SYSCALL_EMU | \ + _TIF_SYSCALL_AUDIT | _TIF_USER_RETURN_NOTIFY | _TIF_UPROBE | \ + _TIF_NOHZ | _TIF_SYSCALL_TRACEPOINT) /* flags to check in __switch_to() */ #define _TIF_WORK_CTXSW \ -- cgit v1.2.3 From afb94c9e0b413bbdea779192eaca076c43ede031 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Mon, 13 Feb 2017 19:42:31 -0600 Subject: livepatch/x86: add TIF_PATCH_PENDING thread flag Add the TIF_PATCH_PENDING thread flag to enable the new livepatch per-task consistency model for x86_64. The bit getting set indicates the thread has a pending patch which needs to be applied when the thread exits the kernel. The bit is placed in the _TIF_ALLWORK_MASK macro, which results in exit_to_usermode_loop() calling klp_update_patch_state() when it's set. Signed-off-by: Josh Poimboeuf Acked-by: Andy Lutomirski Reviewed-by: Petr Mladek Reviewed-by: Miroslav Benes Reviewed-by: Kamalesh Babulal Acked-by: Ingo Molnar # for the x86 changes Signed-off-by: Jiri Kosina --- arch/x86/entry/common.c | 9 ++++++--- arch/x86/include/asm/thread_info.h | 4 +++- 2 files changed, 9 insertions(+), 4 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c index 370c42c7f046..cdefcfdd9e63 100644 --- a/arch/x86/entry/common.c +++ b/arch/x86/entry/common.c @@ -22,6 +22,7 @@ #include #include #include +#include #include #include @@ -130,14 +131,13 @@ static long syscall_trace_enter(struct pt_regs *regs) #define EXIT_TO_USERMODE_LOOP_FLAGS \ (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_UPROBE | \ - _TIF_NEED_RESCHED | _TIF_USER_RETURN_NOTIFY) + _TIF_NEED_RESCHED | _TIF_USER_RETURN_NOTIFY | _TIF_PATCH_PENDING) static void exit_to_usermode_loop(struct pt_regs *regs, u32 cached_flags) { /* * In order to return to user mode, we need to have IRQs off with - * none of _TIF_SIGPENDING, _TIF_NOTIFY_RESUME, _TIF_USER_RETURN_NOTIFY, - * _TIF_UPROBE, or _TIF_NEED_RESCHED set. Several of these flags + * none of EXIT_TO_USERMODE_LOOP_FLAGS set. Several of these flags * can be set at any time on preemptable kernels if we have IRQs on, * so we need to loop. Disabling preemption wouldn't help: doing the * work to clear some of the flags can sleep. @@ -164,6 +164,9 @@ static void exit_to_usermode_loop(struct pt_regs *regs, u32 cached_flags) if (cached_flags & _TIF_USER_RETURN_NOTIFY) fire_user_return_notifiers(); + if (cached_flags & _TIF_PATCH_PENDING) + klp_update_patch_state(current); + /* Disable IRQs and retry */ local_irq_disable(); diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index 207d0d981287..83372dc43943 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -84,6 +84,7 @@ struct thread_info { #define TIF_SECCOMP 8 /* secure computing */ #define TIF_USER_RETURN_NOTIFY 11 /* notify kernel of userspace return */ #define TIF_UPROBE 12 /* breakpointed or singlestepping */ +#define TIF_PATCH_PENDING 13 /* pending live patching update */ #define TIF_NOTSC 16 /* TSC is not accessible in userland */ #define TIF_IA32 17 /* IA32 compatibility process */ #define TIF_NOHZ 19 /* in adaptive nohz mode */ @@ -107,6 +108,7 @@ struct thread_info { #define _TIF_SECCOMP (1 << TIF_SECCOMP) #define _TIF_USER_RETURN_NOTIFY (1 << TIF_USER_RETURN_NOTIFY) #define _TIF_UPROBE (1 << TIF_UPROBE) +#define _TIF_PATCH_PENDING (1 << TIF_PATCH_PENDING) #define _TIF_NOTSC (1 << TIF_NOTSC) #define _TIF_IA32 (1 << TIF_IA32) #define _TIF_NOHZ (1 << TIF_NOHZ) @@ -133,7 +135,7 @@ struct thread_info { (_TIF_SYSCALL_TRACE | _TIF_NOTIFY_RESUME | _TIF_SIGPENDING | \ _TIF_NEED_RESCHED | _TIF_SINGLESTEP | _TIF_SYSCALL_EMU | \ _TIF_SYSCALL_AUDIT | _TIF_USER_RETURN_NOTIFY | _TIF_UPROBE | \ - _TIF_NOHZ | _TIF_SYSCALL_TRACEPOINT) + _TIF_PATCH_PENDING | _TIF_NOHZ | _TIF_SYSCALL_TRACEPOINT) /* flags to check in __switch_to() */ #define _TIF_WORK_CTXSW \ -- cgit v1.2.3