diff options
Diffstat (limited to 'include')
-rw-r--r-- | include/asm-generic/syscall.h | 24 | ||||
-rw-r--r-- | include/linux/context_tracking.h | 6 | ||||
-rw-r--r-- | include/linux/entry-common.h | 171 | ||||
-rw-r--r-- | include/linux/entry-kvm.h | 4 | ||||
-rw-r--r-- | include/linux/sched.h | 2 | ||||
-rw-r--r-- | include/linux/sched/signal.h | 20 | ||||
-rw-r--r-- | include/linux/seccomp.h | 2 | ||||
-rw-r--r-- | include/linux/syscall_user_dispatch.h | 40 | ||||
-rw-r--r-- | include/linux/thread_info.h | 50 | ||||
-rw-r--r-- | include/linux/tracehook.h | 44 | ||||
-rw-r--r-- | include/trace/syscall.h | 6 | ||||
-rw-r--r-- | include/uapi/asm-generic/siginfo.h | 3 | ||||
-rw-r--r-- | include/uapi/linux/prctl.h | 5 |
13 files changed, 308 insertions, 69 deletions
diff --git a/include/asm-generic/syscall.h b/include/asm-generic/syscall.h index f3135e734387..524218ae3825 100644 --- a/include/asm-generic/syscall.h +++ b/include/asm-generic/syscall.h @@ -43,9 +43,9 @@ int syscall_get_nr(struct task_struct *task, struct pt_regs *regs); * @regs: task_pt_regs() of @task * * It's only valid to call this when @task is stopped for system - * call exit tracing (due to TIF_SYSCALL_TRACE or TIF_SYSCALL_AUDIT), - * after tracehook_report_syscall_entry() returned nonzero to prevent - * the system call from taking place. + * call exit tracing (due to %SYSCALL_WORK_SYSCALL_TRACE or + * %SYSCALL_WORK_SYSCALL_AUDIT), after tracehook_report_syscall_entry() + * returned nonzero to prevent the system call from taking place. * * This rolls back the register state in @regs so it's as if the * system call instruction was a no-op. The registers containing @@ -63,7 +63,8 @@ void syscall_rollback(struct task_struct *task, struct pt_regs *regs); * Returns 0 if the system call succeeded, or -ERRORCODE if it failed. * * It's only valid to call this when @task is stopped for tracing on exit - * from a system call, due to %TIF_SYSCALL_TRACE or %TIF_SYSCALL_AUDIT. + * from a system call, due to %SYSCALL_WORK_SYSCALL_TRACE or + * %SYSCALL_WORK_SYSCALL_AUDIT. */ long syscall_get_error(struct task_struct *task, struct pt_regs *regs); @@ -76,7 +77,8 @@ long syscall_get_error(struct task_struct *task, struct pt_regs *regs); * This value is meaningless if syscall_get_error() returned nonzero. * * It's only valid to call this when @task is stopped for tracing on exit - * from a system call, due to %TIF_SYSCALL_TRACE or %TIF_SYSCALL_AUDIT. + * from a system call, due to %SYSCALL_WORK_SYSCALL_TRACE or + * %SYSCALL_WORK_SYSCALL_AUDIT. */ long syscall_get_return_value(struct task_struct *task, struct pt_regs *regs); @@ -93,7 +95,8 @@ long syscall_get_return_value(struct task_struct *task, struct pt_regs *regs); * code; the user sees a failed system call with this errno code. * * It's only valid to call this when @task is stopped for tracing on exit - * from a system call, due to %TIF_SYSCALL_TRACE or %TIF_SYSCALL_AUDIT. + * from a system call, due to %SYSCALL_WORK_SYSCALL_TRACE or + * %SYSCALL_WORK_SYSCALL_AUDIT. */ void syscall_set_return_value(struct task_struct *task, struct pt_regs *regs, int error, long val); @@ -108,7 +111,8 @@ void syscall_set_return_value(struct task_struct *task, struct pt_regs *regs, * @args[0], and so on. * * It's only valid to call this when @task is stopped for tracing on - * entry to a system call, due to %TIF_SYSCALL_TRACE or %TIF_SYSCALL_AUDIT. + * entry to a system call, due to %SYSCALL_WORK_SYSCALL_TRACE or + * %SYSCALL_WORK_SYSCALL_AUDIT. */ void syscall_get_arguments(struct task_struct *task, struct pt_regs *regs, unsigned long *args); @@ -123,7 +127,8 @@ void syscall_get_arguments(struct task_struct *task, struct pt_regs *regs, * The first argument gets value @args[0], and so on. * * It's only valid to call this when @task is stopped for tracing on - * entry to a system call, due to %TIF_SYSCALL_TRACE or %TIF_SYSCALL_AUDIT. + * entry to a system call, due to %SYSCALL_WORK_SYSCALL_TRACE or + * %SYSCALL_WORK_SYSCALL_AUDIT. */ void syscall_set_arguments(struct task_struct *task, struct pt_regs *regs, const unsigned long *args); @@ -135,7 +140,8 @@ void syscall_set_arguments(struct task_struct *task, struct pt_regs *regs, * Returns the AUDIT_ARCH_* based on the system call convention in use. * * It's only valid to call this when @task is stopped on entry to a system - * call, due to %TIF_SYSCALL_TRACE, %TIF_SYSCALL_AUDIT, or %TIF_SECCOMP. + * call, due to %SYSCALL_WORK_SYSCALL_TRACE, %SYSCALL_WORK_SYSCALL_AUDIT, or + * %SYSCALL_WORK_SECCOMP. * * Architectures which permit CONFIG_HAVE_ARCH_SECCOMP_FILTER must * provide an implementation of this. diff --git a/include/linux/context_tracking.h b/include/linux/context_tracking.h index d53cd331c4dd..bceb06498521 100644 --- a/include/linux/context_tracking.h +++ b/include/linux/context_tracking.h @@ -51,7 +51,8 @@ static inline enum ctx_state exception_enter(void) { enum ctx_state prev_ctx; - if (!context_tracking_enabled()) + if (IS_ENABLED(CONFIG_HAVE_CONTEXT_TRACKING_OFFSTACK) || + !context_tracking_enabled()) return 0; prev_ctx = this_cpu_read(context_tracking.state); @@ -63,7 +64,8 @@ static inline enum ctx_state exception_enter(void) static inline void exception_exit(enum ctx_state prev_ctx) { - if (context_tracking_enabled()) { + if (!IS_ENABLED(CONFIG_HAVE_CONTEXT_TRACKING_OFFSTACK) && + context_tracking_enabled()) { if (prev_ctx != CONTEXT_KERNEL) context_tracking_enter(prev_ctx); } diff --git a/include/linux/entry-common.h b/include/linux/entry-common.h index 474f29638d2c..7c581a4c3797 100644 --- a/include/linux/entry-common.h +++ b/include/linux/entry-common.h @@ -13,22 +13,6 @@ * Define dummy _TIF work flags if not defined by the architecture or for * disabled functionality. */ -#ifndef _TIF_SYSCALL_EMU -# define _TIF_SYSCALL_EMU (0) -#endif - -#ifndef _TIF_SYSCALL_TRACEPOINT -# define _TIF_SYSCALL_TRACEPOINT (0) -#endif - -#ifndef _TIF_SECCOMP -# define _TIF_SECCOMP (0) -#endif - -#ifndef _TIF_SYSCALL_AUDIT -# define _TIF_SYSCALL_AUDIT (0) -#endif - #ifndef _TIF_PATCH_PENDING # define _TIF_PATCH_PENDING (0) #endif @@ -37,28 +21,36 @@ # define _TIF_UPROBE (0) #endif +#ifndef _TIF_NOTIFY_SIGNAL +# define _TIF_NOTIFY_SIGNAL (0) +#endif + /* - * TIF flags handled in syscall_enter_from_user_mode() + * SYSCALL_WORK flags handled in syscall_enter_from_user_mode() */ -#ifndef ARCH_SYSCALL_ENTER_WORK -# define ARCH_SYSCALL_ENTER_WORK (0) +#ifndef ARCH_SYSCALL_WORK_ENTER +# define ARCH_SYSCALL_WORK_ENTER (0) #endif -#define SYSCALL_ENTER_WORK \ - (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | _TIF_SECCOMP | \ - _TIF_SYSCALL_TRACEPOINT | _TIF_SYSCALL_EMU | \ - ARCH_SYSCALL_ENTER_WORK) - /* - * TIF flags handled in syscall_exit_to_user_mode() + * SYSCALL_WORK flags handled in syscall_exit_to_user_mode() */ -#ifndef ARCH_SYSCALL_EXIT_WORK -# define ARCH_SYSCALL_EXIT_WORK (0) +#ifndef ARCH_SYSCALL_WORK_EXIT +# define ARCH_SYSCALL_WORK_EXIT (0) #endif -#define SYSCALL_EXIT_WORK \ - (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | \ - _TIF_SYSCALL_TRACEPOINT | ARCH_SYSCALL_EXIT_WORK) +#define SYSCALL_WORK_ENTER (SYSCALL_WORK_SECCOMP | \ + SYSCALL_WORK_SYSCALL_TRACEPOINT | \ + SYSCALL_WORK_SYSCALL_TRACE | \ + SYSCALL_WORK_SYSCALL_EMU | \ + SYSCALL_WORK_SYSCALL_AUDIT | \ + SYSCALL_WORK_SYSCALL_USER_DISPATCH | \ + ARCH_SYSCALL_WORK_ENTER) +#define SYSCALL_WORK_EXIT (SYSCALL_WORK_SYSCALL_TRACEPOINT | \ + SYSCALL_WORK_SYSCALL_TRACE | \ + SYSCALL_WORK_SYSCALL_AUDIT | \ + SYSCALL_WORK_SYSCALL_USER_DISPATCH | \ + ARCH_SYSCALL_WORK_EXIT) /* * TIF flags handled in exit_to_user_mode_loop() @@ -69,7 +61,7 @@ #define EXIT_TO_USER_MODE_WORK \ (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_UPROBE | \ - _TIF_NEED_RESCHED | _TIF_PATCH_PENDING | \ + _TIF_NEED_RESCHED | _TIF_PATCH_PENDING | _TIF_NOTIFY_SIGNAL | \ ARCH_EXIT_TO_USER_MODE_WORK) /** @@ -110,6 +102,27 @@ static inline __must_check int arch_syscall_enter_tracehook(struct pt_regs *regs #endif /** + * enter_from_user_mode - Establish state when coming from user mode + * + * Syscall/interrupt entry disables interrupts, but user mode is traced as + * interrupts enabled. Also with NO_HZ_FULL RCU might be idle. + * + * 1) Tell lockdep that interrupts are disabled + * 2) Invoke context tracking if enabled to reactivate RCU + * 3) Trace interrupts off state + * + * Invoked from architecture specific syscall entry code with interrupts + * disabled. The calling code has to be non-instrumentable. When the + * function returns all state is correct and interrupts are still + * disabled. The subsequent functions can be instrumented. + * + * This is invoked when there is architecture specific functionality to be + * done between establishing state and enabling interrupts. The caller must + * enable interrupts before invoking syscall_enter_from_user_mode_work(). + */ +void enter_from_user_mode(struct pt_regs *regs); + +/** * syscall_enter_from_user_mode_prepare - Establish state and enable interrupts * @regs: Pointer to currents pt_regs * @@ -118,7 +131,8 @@ static inline __must_check int arch_syscall_enter_tracehook(struct pt_regs *regs * function returns all state is correct, interrupts are enabled and the * subsequent functions can be instrumented. * - * This handles lockdep, RCU (context tracking) and tracing state. + * This handles lockdep, RCU (context tracking) and tracing state, i.e. + * the functionality provided by enter_from_user_mode(). * * This is invoked when there is extra architecture specific functionality * to be done between establishing state and handling user mode entry work. @@ -144,8 +158,8 @@ void syscall_enter_from_user_mode_prepare(struct pt_regs *regs); * * It handles the following work items: * - * 1) TIF flag dependent invocations of arch_syscall_enter_tracehook(), - * __secure_computing(), trace_sys_enter() + * 1) syscall_work flag dependent invocations of + * arch_syscall_enter_tracehook(), __secure_computing(), trace_sys_enter() * 2) Invocation of audit_syscall_entry() */ long syscall_enter_from_user_mode_work(struct pt_regs *regs, long syscall); @@ -259,12 +273,13 @@ static __always_inline void arch_exit_to_user_mode(void) { } #endif /** - * arch_do_signal - Architecture specific signal delivery function + * arch_do_signal_or_restart - Architecture specific signal delivery function * @regs: Pointer to currents pt_regs + * @has_signal: actual signal to handle * * Invoked from exit_to_user_mode_loop(). */ -void arch_do_signal(struct pt_regs *regs); +void arch_do_signal_or_restart(struct pt_regs *regs, bool has_signal); /** * arch_syscall_exit_tracehook - Wrapper around tracehook_report_syscall_exit() @@ -286,6 +301,41 @@ static inline void arch_syscall_exit_tracehook(struct pt_regs *regs, bool step) #endif /** + * exit_to_user_mode - Fixup state when exiting to user mode + * + * Syscall/interrupt exit enables interrupts, but the kernel state is + * interrupts disabled when this is invoked. Also tell RCU about it. + * + * 1) Trace interrupts on state + * 2) Invoke context tracking if enabled to adjust RCU state + * 3) Invoke architecture specific last minute exit code, e.g. speculation + * mitigations, etc.: arch_exit_to_user_mode() + * 4) Tell lockdep that interrupts are enabled + * + * Invoked from architecture specific code when syscall_exit_to_user_mode() + * is not suitable as the last step before returning to userspace. Must be + * invoked with interrupts disabled and the caller must be + * non-instrumentable. + * The caller has to invoke syscall_exit_to_user_mode_work() before this. + */ +void exit_to_user_mode(void); + +/** + * syscall_exit_to_user_mode_work - Handle work before returning to user mode + * @regs: Pointer to currents pt_regs + * + * Same as step 1 and 2 of syscall_exit_to_user_mode() but without calling + * exit_to_user_mode() to perform the final transition to user mode. + * + * Calling convention is the same as for syscall_exit_to_user_mode() and it + * returns with all work handled and interrupts disabled. The caller must + * invoke exit_to_user_mode() before actually switching to user mode to + * make the final state transitions. Interrupts must stay disabled between + * return from this function and the invocation of exit_to_user_mode(). + */ +void syscall_exit_to_user_mode_work(struct pt_regs *regs); + +/** * syscall_exit_to_user_mode - Handle work before returning to user mode * @regs: Pointer to currents pt_regs * @@ -307,8 +357,12 @@ static inline void arch_syscall_exit_tracehook(struct pt_regs *regs, bool step) * - Architecture specific one time work arch_exit_to_user_mode_prepare() * - Address limit and lockdep checks * - * 3) Final transition (lockdep, tracing, context tracking, RCU). Invokes - * arch_exit_to_user_mode() to handle e.g. speculation mitigations + * 3) Final transition (lockdep, tracing, context tracking, RCU), i.e. the + * functionality in exit_to_user_mode(). + * + * This is a combination of syscall_exit_to_user_mode_work() (1,2) and + * exit_to_user_mode(). This function is preferred unless there is a + * compelling architectural reason to use the seperate functions. */ void syscall_exit_to_user_mode(struct pt_regs *regs); @@ -341,8 +395,26 @@ void irqentry_enter_from_user_mode(struct pt_regs *regs); void irqentry_exit_to_user_mode(struct pt_regs *regs); #ifndef irqentry_state +/** + * struct irqentry_state - Opaque object for exception state storage + * @exit_rcu: Used exclusively in the irqentry_*() calls; signals whether the + * exit path has to invoke rcu_irq_exit(). + * @lockdep: Used exclusively in the irqentry_nmi_*() calls; ensures that + * lockdep state is restored correctly on exit from nmi. + * + * This opaque object is filled in by the irqentry_*_enter() functions and + * must be passed back into the corresponding irqentry_*_exit() functions + * when the exception is complete. + * + * Callers of irqentry_*_[enter|exit]() must consider this structure opaque + * and all members private. Descriptions of the members are provided to aid in + * the maintenance of the irqentry_*() functions. + */ typedef struct irqentry_state { - bool exit_rcu; + union { + bool exit_rcu; + bool lockdep; + }; } irqentry_state_t; #endif @@ -392,7 +464,7 @@ void irqentry_exit_cond_resched(void); * @state: Return value from matching call to irqentry_enter() * * Depending on the return target (kernel/user) this runs the necessary - * preemption and work checks if possible and reguired and returns to + * preemption and work checks if possible and required and returns to * the caller with interrupts disabled and no further work pending. * * This is the last action before returning to the low level ASM code which @@ -402,4 +474,23 @@ void irqentry_exit_cond_resched(void); */ void noinstr irqentry_exit(struct pt_regs *regs, irqentry_state_t state); +/** + * irqentry_nmi_enter - Handle NMI entry + * @regs: Pointer to currents pt_regs + * + * Similar to irqentry_enter() but taking care of the NMI constraints. + */ +irqentry_state_t noinstr irqentry_nmi_enter(struct pt_regs *regs); + +/** + * irqentry_nmi_exit - Handle return from NMI handling + * @regs: Pointer to pt_regs (NMI entry regs) + * @irq_state: Return value from matching call to irqentry_nmi_enter() + * + * Last action before returning to the low level assembly code. + * + * Counterpart to irqentry_nmi_enter(). + */ +void noinstr irqentry_nmi_exit(struct pt_regs *regs, irqentry_state_t irq_state); + #endif diff --git a/include/linux/entry-kvm.h b/include/linux/entry-kvm.h index 0cef17afb41a..9b93f8584ff7 100644 --- a/include/linux/entry-kvm.h +++ b/include/linux/entry-kvm.h @@ -11,8 +11,8 @@ # define ARCH_XFER_TO_GUEST_MODE_WORK (0) #endif -#define XFER_TO_GUEST_MODE_WORK \ - (_TIF_NEED_RESCHED | _TIF_SIGPENDING | \ +#define XFER_TO_GUEST_MODE_WORK \ + (_TIF_NEED_RESCHED | _TIF_SIGPENDING | _TIF_NOTIFY_SIGNAL | \ _TIF_NOTIFY_RESUME | ARCH_XFER_TO_GUEST_MODE_WORK) struct kvm_vcpu; diff --git a/include/linux/sched.h b/include/linux/sched.h index 76cd21fa5501..1cac7efabc83 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -28,6 +28,7 @@ #include <linux/sched/prio.h> #include <linux/sched/types.h> #include <linux/signal_types.h> +#include <linux/syscall_user_dispatch.h> #include <linux/mm_types_task.h> #include <linux/task_io_accounting.h> #include <linux/posix-timers.h> @@ -987,6 +988,7 @@ struct task_struct { unsigned int sessionid; #endif struct seccomp seccomp; + struct syscall_user_dispatch syscall_dispatch; /* Thread group tracking: */ u64 parent_exec_id; diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h index 1bad18a1d8ba..bd5afa076189 100644 --- a/include/linux/sched/signal.h +++ b/include/linux/sched/signal.h @@ -353,11 +353,25 @@ static inline int restart_syscall(void) return -ERESTARTNOINTR; } -static inline int signal_pending(struct task_struct *p) +static inline int task_sigpending(struct task_struct *p) { return unlikely(test_tsk_thread_flag(p,TIF_SIGPENDING)); } +static inline int signal_pending(struct task_struct *p) +{ +#if defined(TIF_NOTIFY_SIGNAL) + /* + * TIF_NOTIFY_SIGNAL isn't really a signal, but it requires the same + * behavior in terms of ensuring that we break out of wait loops + * so that notify signal callbacks can be processed. + */ + if (unlikely(test_tsk_thread_flag(p, TIF_NOTIFY_SIGNAL))) + return 1; +#endif + return task_sigpending(p); +} + static inline int __fatal_signal_pending(struct task_struct *p) { return unlikely(sigismember(&p->pending.signal, SIGKILL)); @@ -365,7 +379,7 @@ static inline int __fatal_signal_pending(struct task_struct *p) static inline int fatal_signal_pending(struct task_struct *p) { - return signal_pending(p) && __fatal_signal_pending(p); + return task_sigpending(p) && __fatal_signal_pending(p); } static inline int signal_pending_state(long state, struct task_struct *p) @@ -502,7 +516,7 @@ extern int set_user_sigmask(const sigset_t __user *umask, size_t sigsetsize); static inline void restore_saved_sigmask_unless(bool interrupted) { if (interrupted) - WARN_ON(!test_thread_flag(TIF_SIGPENDING)); + WARN_ON(!signal_pending(current)); else restore_saved_sigmask(); } diff --git a/include/linux/seccomp.h b/include/linux/seccomp.h index 02aef2844c38..47763f3999f7 100644 --- a/include/linux/seccomp.h +++ b/include/linux/seccomp.h @@ -42,7 +42,7 @@ struct seccomp { extern int __secure_computing(const struct seccomp_data *sd); static inline int secure_computing(void) { - if (unlikely(test_thread_flag(TIF_SECCOMP))) + if (unlikely(test_syscall_work(SECCOMP))) return __secure_computing(NULL); return 0; } diff --git a/include/linux/syscall_user_dispatch.h b/include/linux/syscall_user_dispatch.h new file mode 100644 index 000000000000..a0ae443fb7df --- /dev/null +++ b/include/linux/syscall_user_dispatch.h @@ -0,0 +1,40 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2020 Collabora Ltd. + */ +#ifndef _SYSCALL_USER_DISPATCH_H +#define _SYSCALL_USER_DISPATCH_H + +#include <linux/thread_info.h> + +#ifdef CONFIG_GENERIC_ENTRY + +struct syscall_user_dispatch { + char __user *selector; + unsigned long offset; + unsigned long len; + bool on_dispatch; +}; + +int set_syscall_user_dispatch(unsigned long mode, unsigned long offset, + unsigned long len, char __user *selector); + +#define clear_syscall_work_syscall_user_dispatch(tsk) \ + clear_task_syscall_work(tsk, SYSCALL_USER_DISPATCH) + +#else +struct syscall_user_dispatch {}; + +static inline int set_syscall_user_dispatch(unsigned long mode, unsigned long offset, + unsigned long len, char __user *selector) +{ + return -EINVAL; +} + +static inline void clear_syscall_work_syscall_user_dispatch(struct task_struct *tsk) +{ +} + +#endif /* CONFIG_GENERIC_ENTRY */ + +#endif /* _SYSCALL_USER_DISPATCH_H */ diff --git a/include/linux/thread_info.h b/include/linux/thread_info.h index e93e249a4e9b..c8a974cead73 100644 --- a/include/linux/thread_info.h +++ b/include/linux/thread_info.h @@ -35,6 +35,24 @@ enum { GOOD_STACK, }; +#ifdef CONFIG_GENERIC_ENTRY +enum syscall_work_bit { + SYSCALL_WORK_BIT_SECCOMP, + SYSCALL_WORK_BIT_SYSCALL_TRACEPOINT, + SYSCALL_WORK_BIT_SYSCALL_TRACE, + SYSCALL_WORK_BIT_SYSCALL_EMU, + SYSCALL_WORK_BIT_SYSCALL_AUDIT, + SYSCALL_WORK_BIT_SYSCALL_USER_DISPATCH, +}; + +#define SYSCALL_WORK_SECCOMP BIT(SYSCALL_WORK_BIT_SECCOMP) +#define SYSCALL_WORK_SYSCALL_TRACEPOINT BIT(SYSCALL_WORK_BIT_SYSCALL_TRACEPOINT) +#define SYSCALL_WORK_SYSCALL_TRACE BIT(SYSCALL_WORK_BIT_SYSCALL_TRACE) +#define SYSCALL_WORK_SYSCALL_EMU BIT(SYSCALL_WORK_BIT_SYSCALL_EMU) +#define SYSCALL_WORK_SYSCALL_AUDIT BIT(SYSCALL_WORK_BIT_SYSCALL_AUDIT) +#define SYSCALL_WORK_SYSCALL_USER_DISPATCH BIT(SYSCALL_WORK_BIT_SYSCALL_USER_DISPATCH) +#endif + #include <asm/thread_info.h> #ifdef __KERNEL__ @@ -97,6 +115,38 @@ static inline int test_ti_thread_flag(struct thread_info *ti, int flag) #define test_thread_flag(flag) \ test_ti_thread_flag(current_thread_info(), flag) +#ifdef CONFIG_GENERIC_ENTRY +#define set_syscall_work(fl) \ + set_bit(SYSCALL_WORK_BIT_##fl, ¤t_thread_info()->syscall_work) +#define test_syscall_work(fl) \ + test_bit(SYSCALL_WORK_BIT_##fl, ¤t_thread_info()->syscall_work) +#define clear_syscall_work(fl) \ + clear_bit(SYSCALL_WORK_BIT_##fl, ¤t_thread_info()->syscall_work) + +#define set_task_syscall_work(t, fl) \ + set_bit(SYSCALL_WORK_BIT_##fl, &task_thread_info(t)->syscall_work) +#define test_task_syscall_work(t, fl) \ + test_bit(SYSCALL_WORK_BIT_##fl, &task_thread_info(t)->syscall_work) +#define clear_task_syscall_work(t, fl) \ + clear_bit(SYSCALL_WORK_BIT_##fl, &task_thread_info(t)->syscall_work) + +#else /* CONFIG_GENERIC_ENTRY */ + +#define set_syscall_work(fl) \ + set_ti_thread_flag(current_thread_info(), TIF_##fl) +#define test_syscall_work(fl) \ + test_ti_thread_flag(current_thread_info(), TIF_##fl) +#define clear_syscall_work(fl) \ + clear_ti_thread_flag(current_thread_info(), TIF_##fl) + +#define set_task_syscall_work(t, fl) \ + set_ti_thread_flag(task_thread_info(t), TIF_##fl) +#define test_task_syscall_work(t, fl) \ + test_ti_thread_flag(task_thread_info(t), TIF_##fl) +#define clear_task_syscall_work(t, fl) \ + clear_ti_thread_flag(task_thread_info(t), TIF_##fl) +#endif /* !CONFIG_GENERIC_ENTRY */ + #define tif_need_resched() test_thread_flag(TIF_NEED_RESCHED) #ifndef CONFIG_HAVE_ARCH_WITHIN_STACK_FRAMES diff --git a/include/linux/tracehook.h b/include/linux/tracehook.h index b480e1a07ed8..54b925224a13 100644 --- a/include/linux/tracehook.h +++ b/include/linux/tracehook.h @@ -83,11 +83,12 @@ static inline int ptrace_report_syscall(struct pt_regs *regs, * tracehook_report_syscall_entry - task is about to attempt a system call * @regs: user register state of current task * - * This will be called if %TIF_SYSCALL_TRACE or %TIF_SYSCALL_EMU have been set, - * when the current task has just entered the kernel for a system call. - * Full user register state is available here. Changing the values - * in @regs can affect the system call number and arguments to be tried. - * It is safe to block here, preventing the system call from beginning. + * This will be called if %SYSCALL_WORK_SYSCALL_TRACE or + * %SYSCALL_WORK_SYSCALL_EMU have been set, when the current task has just + * entered the kernel for a system call. Full user register state is + * available here. Changing the values in @regs can affect the system + * call number and arguments to be tried. It is safe to block here, + * preventing the system call from beginning. * * Returns zero normally, or nonzero if the calling arch code should abort * the system call. That must prevent normal entry so no system call is @@ -109,15 +110,15 @@ static inline __must_check int tracehook_report_syscall_entry( * @regs: user register state of current task * @step: nonzero if simulating single-step or block-step * - * This will be called if %TIF_SYSCALL_TRACE has been set, when the - * current task has just finished an attempted system call. Full + * This will be called if %SYSCALL_WORK_SYSCALL_TRACE has been set, when + * the current task has just finished an attempted system call. Full * user register state is available here. It is safe to block here, * preventing signals from being processed. * * If @step is nonzero, this report is also in lieu of the normal * trap that would follow the system call instruction because * user_enable_block_step() or user_enable_single_step() was used. - * In this case, %TIF_SYSCALL_TRACE might not be set. + * In this case, %SYSCALL_WORK_SYSCALL_TRACE might not be set. * * Called without locks, just before checking for pending signals. */ @@ -198,4 +199,31 @@ static inline void tracehook_notify_resume(struct pt_regs *regs) blkcg_maybe_throttle_current(); } +/* + * called by exit_to_user_mode_loop() if ti_work & _TIF_NOTIFY_SIGNAL. This + * is currently used by TWA_SIGNAL based task_work, which requires breaking + * wait loops to ensure that task_work is noticed and run. + */ +static inline void tracehook_notify_signal(void) +{ +#if defined(TIF_NOTIFY_SIGNAL) + clear_thread_flag(TIF_NOTIFY_SIGNAL); + smp_mb__after_atomic(); + if (current->task_works) + task_work_run(); +#endif +} + +/* + * Called when we have work to process from exit_to_user_mode_loop() + */ +static inline void set_notify_signal(struct task_struct *task) +{ +#if defined(TIF_NOTIFY_SIGNAL) + if (!test_and_set_tsk_thread_flag(task, TIF_NOTIFY_SIGNAL) && + !wake_up_state(task, TASK_INTERRUPTIBLE)) + kick_process(task); +#endif +} + #endif /* <linux/tracehook.h> */ diff --git a/include/trace/syscall.h b/include/trace/syscall.h index dc8ac27d27c1..8e193f3a33b3 100644 --- a/include/trace/syscall.h +++ b/include/trace/syscall.h @@ -37,10 +37,10 @@ struct syscall_metadata { #if defined(CONFIG_TRACEPOINTS) && defined(CONFIG_HAVE_SYSCALL_TRACEPOINTS) static inline void syscall_tracepoint_update(struct task_struct *p) { - if (test_thread_flag(TIF_SYSCALL_TRACEPOINT)) - set_tsk_thread_flag(p, TIF_SYSCALL_TRACEPOINT); + if (test_syscall_work(SYSCALL_TRACEPOINT)) + set_task_syscall_work(p, SYSCALL_TRACEPOINT); else - clear_tsk_thread_flag(p, TIF_SYSCALL_TRACEPOINT); + clear_task_syscall_work(p, SYSCALL_TRACEPOINT); } #else static inline void syscall_tracepoint_update(struct task_struct *p) diff --git a/include/uapi/asm-generic/siginfo.h b/include/uapi/asm-generic/siginfo.h index 7aacf9389010..d2597000407a 100644 --- a/include/uapi/asm-generic/siginfo.h +++ b/include/uapi/asm-generic/siginfo.h @@ -286,7 +286,8 @@ typedef struct siginfo { * SIGSYS si_codes */ #define SYS_SECCOMP 1 /* seccomp triggered */ -#define NSIGSYS 1 +#define SYS_USER_DISPATCH 2 /* syscall user dispatch triggered */ +#define NSIGSYS 2 /* * SIGEMT si_codes diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h index 7f0827705c9a..90deb41c8a34 100644 --- a/include/uapi/linux/prctl.h +++ b/include/uapi/linux/prctl.h @@ -247,4 +247,9 @@ struct prctl_mm_map { #define PR_SET_IO_FLUSHER 57 #define PR_GET_IO_FLUSHER 58 +/* Dispatch syscalls to a userspace handler */ +#define PR_SET_SYSCALL_USER_DISPATCH 59 +# define PR_SYS_DISPATCH_OFF 0 +# define PR_SYS_DISPATCH_ON 1 + #endif /* _LINUX_PRCTL_H */ |