summaryrefslogtreecommitdiff
path: root/include
diff options
context:
space:
mode:
Diffstat (limited to 'include')
-rw-r--r--include/asm-generic/syscall.h24
-rw-r--r--include/linux/context_tracking.h6
-rw-r--r--include/linux/entry-common.h171
-rw-r--r--include/linux/entry-kvm.h4
-rw-r--r--include/linux/sched.h2
-rw-r--r--include/linux/sched/signal.h20
-rw-r--r--include/linux/seccomp.h2
-rw-r--r--include/linux/syscall_user_dispatch.h40
-rw-r--r--include/linux/thread_info.h50
-rw-r--r--include/linux/tracehook.h44
-rw-r--r--include/trace/syscall.h6
-rw-r--r--include/uapi/asm-generic/siginfo.h3
-rw-r--r--include/uapi/linux/prctl.h5
13 files changed, 308 insertions, 69 deletions
diff --git a/include/asm-generic/syscall.h b/include/asm-generic/syscall.h
index f3135e734387..524218ae3825 100644
--- a/include/asm-generic/syscall.h
+++ b/include/asm-generic/syscall.h
@@ -43,9 +43,9 @@ int syscall_get_nr(struct task_struct *task, struct pt_regs *regs);
* @regs: task_pt_regs() of @task
*
* It's only valid to call this when @task is stopped for system
- * call exit tracing (due to TIF_SYSCALL_TRACE or TIF_SYSCALL_AUDIT),
- * after tracehook_report_syscall_entry() returned nonzero to prevent
- * the system call from taking place.
+ * call exit tracing (due to %SYSCALL_WORK_SYSCALL_TRACE or
+ * %SYSCALL_WORK_SYSCALL_AUDIT), after tracehook_report_syscall_entry()
+ * returned nonzero to prevent the system call from taking place.
*
* This rolls back the register state in @regs so it's as if the
* system call instruction was a no-op. The registers containing
@@ -63,7 +63,8 @@ void syscall_rollback(struct task_struct *task, struct pt_regs *regs);
* Returns 0 if the system call succeeded, or -ERRORCODE if it failed.
*
* It's only valid to call this when @task is stopped for tracing on exit
- * from a system call, due to %TIF_SYSCALL_TRACE or %TIF_SYSCALL_AUDIT.
+ * from a system call, due to %SYSCALL_WORK_SYSCALL_TRACE or
+ * %SYSCALL_WORK_SYSCALL_AUDIT.
*/
long syscall_get_error(struct task_struct *task, struct pt_regs *regs);
@@ -76,7 +77,8 @@ long syscall_get_error(struct task_struct *task, struct pt_regs *regs);
* This value is meaningless if syscall_get_error() returned nonzero.
*
* It's only valid to call this when @task is stopped for tracing on exit
- * from a system call, due to %TIF_SYSCALL_TRACE or %TIF_SYSCALL_AUDIT.
+ * from a system call, due to %SYSCALL_WORK_SYSCALL_TRACE or
+ * %SYSCALL_WORK_SYSCALL_AUDIT.
*/
long syscall_get_return_value(struct task_struct *task, struct pt_regs *regs);
@@ -93,7 +95,8 @@ long syscall_get_return_value(struct task_struct *task, struct pt_regs *regs);
* code; the user sees a failed system call with this errno code.
*
* It's only valid to call this when @task is stopped for tracing on exit
- * from a system call, due to %TIF_SYSCALL_TRACE or %TIF_SYSCALL_AUDIT.
+ * from a system call, due to %SYSCALL_WORK_SYSCALL_TRACE or
+ * %SYSCALL_WORK_SYSCALL_AUDIT.
*/
void syscall_set_return_value(struct task_struct *task, struct pt_regs *regs,
int error, long val);
@@ -108,7 +111,8 @@ void syscall_set_return_value(struct task_struct *task, struct pt_regs *regs,
* @args[0], and so on.
*
* It's only valid to call this when @task is stopped for tracing on
- * entry to a system call, due to %TIF_SYSCALL_TRACE or %TIF_SYSCALL_AUDIT.
+ * entry to a system call, due to %SYSCALL_WORK_SYSCALL_TRACE or
+ * %SYSCALL_WORK_SYSCALL_AUDIT.
*/
void syscall_get_arguments(struct task_struct *task, struct pt_regs *regs,
unsigned long *args);
@@ -123,7 +127,8 @@ void syscall_get_arguments(struct task_struct *task, struct pt_regs *regs,
* The first argument gets value @args[0], and so on.
*
* It's only valid to call this when @task is stopped for tracing on
- * entry to a system call, due to %TIF_SYSCALL_TRACE or %TIF_SYSCALL_AUDIT.
+ * entry to a system call, due to %SYSCALL_WORK_SYSCALL_TRACE or
+ * %SYSCALL_WORK_SYSCALL_AUDIT.
*/
void syscall_set_arguments(struct task_struct *task, struct pt_regs *regs,
const unsigned long *args);
@@ -135,7 +140,8 @@ void syscall_set_arguments(struct task_struct *task, struct pt_regs *regs,
* Returns the AUDIT_ARCH_* based on the system call convention in use.
*
* It's only valid to call this when @task is stopped on entry to a system
- * call, due to %TIF_SYSCALL_TRACE, %TIF_SYSCALL_AUDIT, or %TIF_SECCOMP.
+ * call, due to %SYSCALL_WORK_SYSCALL_TRACE, %SYSCALL_WORK_SYSCALL_AUDIT, or
+ * %SYSCALL_WORK_SECCOMP.
*
* Architectures which permit CONFIG_HAVE_ARCH_SECCOMP_FILTER must
* provide an implementation of this.
diff --git a/include/linux/context_tracking.h b/include/linux/context_tracking.h
index d53cd331c4dd..bceb06498521 100644
--- a/include/linux/context_tracking.h
+++ b/include/linux/context_tracking.h
@@ -51,7 +51,8 @@ static inline enum ctx_state exception_enter(void)
{
enum ctx_state prev_ctx;
- if (!context_tracking_enabled())
+ if (IS_ENABLED(CONFIG_HAVE_CONTEXT_TRACKING_OFFSTACK) ||
+ !context_tracking_enabled())
return 0;
prev_ctx = this_cpu_read(context_tracking.state);
@@ -63,7 +64,8 @@ static inline enum ctx_state exception_enter(void)
static inline void exception_exit(enum ctx_state prev_ctx)
{
- if (context_tracking_enabled()) {
+ if (!IS_ENABLED(CONFIG_HAVE_CONTEXT_TRACKING_OFFSTACK) &&
+ context_tracking_enabled()) {
if (prev_ctx != CONTEXT_KERNEL)
context_tracking_enter(prev_ctx);
}
diff --git a/include/linux/entry-common.h b/include/linux/entry-common.h
index 474f29638d2c..7c581a4c3797 100644
--- a/include/linux/entry-common.h
+++ b/include/linux/entry-common.h
@@ -13,22 +13,6 @@
* Define dummy _TIF work flags if not defined by the architecture or for
* disabled functionality.
*/
-#ifndef _TIF_SYSCALL_EMU
-# define _TIF_SYSCALL_EMU (0)
-#endif
-
-#ifndef _TIF_SYSCALL_TRACEPOINT
-# define _TIF_SYSCALL_TRACEPOINT (0)
-#endif
-
-#ifndef _TIF_SECCOMP
-# define _TIF_SECCOMP (0)
-#endif
-
-#ifndef _TIF_SYSCALL_AUDIT
-# define _TIF_SYSCALL_AUDIT (0)
-#endif
-
#ifndef _TIF_PATCH_PENDING
# define _TIF_PATCH_PENDING (0)
#endif
@@ -37,28 +21,36 @@
# define _TIF_UPROBE (0)
#endif
+#ifndef _TIF_NOTIFY_SIGNAL
+# define _TIF_NOTIFY_SIGNAL (0)
+#endif
+
/*
- * TIF flags handled in syscall_enter_from_user_mode()
+ * SYSCALL_WORK flags handled in syscall_enter_from_user_mode()
*/
-#ifndef ARCH_SYSCALL_ENTER_WORK
-# define ARCH_SYSCALL_ENTER_WORK (0)
+#ifndef ARCH_SYSCALL_WORK_ENTER
+# define ARCH_SYSCALL_WORK_ENTER (0)
#endif
-#define SYSCALL_ENTER_WORK \
- (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | _TIF_SECCOMP | \
- _TIF_SYSCALL_TRACEPOINT | _TIF_SYSCALL_EMU | \
- ARCH_SYSCALL_ENTER_WORK)
-
/*
- * TIF flags handled in syscall_exit_to_user_mode()
+ * SYSCALL_WORK flags handled in syscall_exit_to_user_mode()
*/
-#ifndef ARCH_SYSCALL_EXIT_WORK
-# define ARCH_SYSCALL_EXIT_WORK (0)
+#ifndef ARCH_SYSCALL_WORK_EXIT
+# define ARCH_SYSCALL_WORK_EXIT (0)
#endif
-#define SYSCALL_EXIT_WORK \
- (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | \
- _TIF_SYSCALL_TRACEPOINT | ARCH_SYSCALL_EXIT_WORK)
+#define SYSCALL_WORK_ENTER (SYSCALL_WORK_SECCOMP | \
+ SYSCALL_WORK_SYSCALL_TRACEPOINT | \
+ SYSCALL_WORK_SYSCALL_TRACE | \
+ SYSCALL_WORK_SYSCALL_EMU | \
+ SYSCALL_WORK_SYSCALL_AUDIT | \
+ SYSCALL_WORK_SYSCALL_USER_DISPATCH | \
+ ARCH_SYSCALL_WORK_ENTER)
+#define SYSCALL_WORK_EXIT (SYSCALL_WORK_SYSCALL_TRACEPOINT | \
+ SYSCALL_WORK_SYSCALL_TRACE | \
+ SYSCALL_WORK_SYSCALL_AUDIT | \
+ SYSCALL_WORK_SYSCALL_USER_DISPATCH | \
+ ARCH_SYSCALL_WORK_EXIT)
/*
* TIF flags handled in exit_to_user_mode_loop()
@@ -69,7 +61,7 @@
#define EXIT_TO_USER_MODE_WORK \
(_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_UPROBE | \
- _TIF_NEED_RESCHED | _TIF_PATCH_PENDING | \
+ _TIF_NEED_RESCHED | _TIF_PATCH_PENDING | _TIF_NOTIFY_SIGNAL | \
ARCH_EXIT_TO_USER_MODE_WORK)
/**
@@ -110,6 +102,27 @@ static inline __must_check int arch_syscall_enter_tracehook(struct pt_regs *regs
#endif
/**
+ * enter_from_user_mode - Establish state when coming from user mode
+ *
+ * Syscall/interrupt entry disables interrupts, but user mode is traced as
+ * interrupts enabled. Also with NO_HZ_FULL RCU might be idle.
+ *
+ * 1) Tell lockdep that interrupts are disabled
+ * 2) Invoke context tracking if enabled to reactivate RCU
+ * 3) Trace interrupts off state
+ *
+ * Invoked from architecture specific syscall entry code with interrupts
+ * disabled. The calling code has to be non-instrumentable. When the
+ * function returns all state is correct and interrupts are still
+ * disabled. The subsequent functions can be instrumented.
+ *
+ * This is invoked when there is architecture specific functionality to be
+ * done between establishing state and enabling interrupts. The caller must
+ * enable interrupts before invoking syscall_enter_from_user_mode_work().
+ */
+void enter_from_user_mode(struct pt_regs *regs);
+
+/**
* syscall_enter_from_user_mode_prepare - Establish state and enable interrupts
* @regs: Pointer to currents pt_regs
*
@@ -118,7 +131,8 @@ static inline __must_check int arch_syscall_enter_tracehook(struct pt_regs *regs
* function returns all state is correct, interrupts are enabled and the
* subsequent functions can be instrumented.
*
- * This handles lockdep, RCU (context tracking) and tracing state.
+ * This handles lockdep, RCU (context tracking) and tracing state, i.e.
+ * the functionality provided by enter_from_user_mode().
*
* This is invoked when there is extra architecture specific functionality
* to be done between establishing state and handling user mode entry work.
@@ -144,8 +158,8 @@ void syscall_enter_from_user_mode_prepare(struct pt_regs *regs);
*
* It handles the following work items:
*
- * 1) TIF flag dependent invocations of arch_syscall_enter_tracehook(),
- * __secure_computing(), trace_sys_enter()
+ * 1) syscall_work flag dependent invocations of
+ * arch_syscall_enter_tracehook(), __secure_computing(), trace_sys_enter()
* 2) Invocation of audit_syscall_entry()
*/
long syscall_enter_from_user_mode_work(struct pt_regs *regs, long syscall);
@@ -259,12 +273,13 @@ static __always_inline void arch_exit_to_user_mode(void) { }
#endif
/**
- * arch_do_signal - Architecture specific signal delivery function
+ * arch_do_signal_or_restart - Architecture specific signal delivery function
* @regs: Pointer to currents pt_regs
+ * @has_signal: actual signal to handle
*
* Invoked from exit_to_user_mode_loop().
*/
-void arch_do_signal(struct pt_regs *regs);
+void arch_do_signal_or_restart(struct pt_regs *regs, bool has_signal);
/**
* arch_syscall_exit_tracehook - Wrapper around tracehook_report_syscall_exit()
@@ -286,6 +301,41 @@ static inline void arch_syscall_exit_tracehook(struct pt_regs *regs, bool step)
#endif
/**
+ * exit_to_user_mode - Fixup state when exiting to user mode
+ *
+ * Syscall/interrupt exit enables interrupts, but the kernel state is
+ * interrupts disabled when this is invoked. Also tell RCU about it.
+ *
+ * 1) Trace interrupts on state
+ * 2) Invoke context tracking if enabled to adjust RCU state
+ * 3) Invoke architecture specific last minute exit code, e.g. speculation
+ * mitigations, etc.: arch_exit_to_user_mode()
+ * 4) Tell lockdep that interrupts are enabled
+ *
+ * Invoked from architecture specific code when syscall_exit_to_user_mode()
+ * is not suitable as the last step before returning to userspace. Must be
+ * invoked with interrupts disabled and the caller must be
+ * non-instrumentable.
+ * The caller has to invoke syscall_exit_to_user_mode_work() before this.
+ */
+void exit_to_user_mode(void);
+
+/**
+ * syscall_exit_to_user_mode_work - Handle work before returning to user mode
+ * @regs: Pointer to currents pt_regs
+ *
+ * Same as step 1 and 2 of syscall_exit_to_user_mode() but without calling
+ * exit_to_user_mode() to perform the final transition to user mode.
+ *
+ * Calling convention is the same as for syscall_exit_to_user_mode() and it
+ * returns with all work handled and interrupts disabled. The caller must
+ * invoke exit_to_user_mode() before actually switching to user mode to
+ * make the final state transitions. Interrupts must stay disabled between
+ * return from this function and the invocation of exit_to_user_mode().
+ */
+void syscall_exit_to_user_mode_work(struct pt_regs *regs);
+
+/**
* syscall_exit_to_user_mode - Handle work before returning to user mode
* @regs: Pointer to currents pt_regs
*
@@ -307,8 +357,12 @@ static inline void arch_syscall_exit_tracehook(struct pt_regs *regs, bool step)
* - Architecture specific one time work arch_exit_to_user_mode_prepare()
* - Address limit and lockdep checks
*
- * 3) Final transition (lockdep, tracing, context tracking, RCU). Invokes
- * arch_exit_to_user_mode() to handle e.g. speculation mitigations
+ * 3) Final transition (lockdep, tracing, context tracking, RCU), i.e. the
+ * functionality in exit_to_user_mode().
+ *
+ * This is a combination of syscall_exit_to_user_mode_work() (1,2) and
+ * exit_to_user_mode(). This function is preferred unless there is a
+ * compelling architectural reason to use the seperate functions.
*/
void syscall_exit_to_user_mode(struct pt_regs *regs);
@@ -341,8 +395,26 @@ void irqentry_enter_from_user_mode(struct pt_regs *regs);
void irqentry_exit_to_user_mode(struct pt_regs *regs);
#ifndef irqentry_state
+/**
+ * struct irqentry_state - Opaque object for exception state storage
+ * @exit_rcu: Used exclusively in the irqentry_*() calls; signals whether the
+ * exit path has to invoke rcu_irq_exit().
+ * @lockdep: Used exclusively in the irqentry_nmi_*() calls; ensures that
+ * lockdep state is restored correctly on exit from nmi.
+ *
+ * This opaque object is filled in by the irqentry_*_enter() functions and
+ * must be passed back into the corresponding irqentry_*_exit() functions
+ * when the exception is complete.
+ *
+ * Callers of irqentry_*_[enter|exit]() must consider this structure opaque
+ * and all members private. Descriptions of the members are provided to aid in
+ * the maintenance of the irqentry_*() functions.
+ */
typedef struct irqentry_state {
- bool exit_rcu;
+ union {
+ bool exit_rcu;
+ bool lockdep;
+ };
} irqentry_state_t;
#endif
@@ -392,7 +464,7 @@ void irqentry_exit_cond_resched(void);
* @state: Return value from matching call to irqentry_enter()
*
* Depending on the return target (kernel/user) this runs the necessary
- * preemption and work checks if possible and reguired and returns to
+ * preemption and work checks if possible and required and returns to
* the caller with interrupts disabled and no further work pending.
*
* This is the last action before returning to the low level ASM code which
@@ -402,4 +474,23 @@ void irqentry_exit_cond_resched(void);
*/
void noinstr irqentry_exit(struct pt_regs *regs, irqentry_state_t state);
+/**
+ * irqentry_nmi_enter - Handle NMI entry
+ * @regs: Pointer to currents pt_regs
+ *
+ * Similar to irqentry_enter() but taking care of the NMI constraints.
+ */
+irqentry_state_t noinstr irqentry_nmi_enter(struct pt_regs *regs);
+
+/**
+ * irqentry_nmi_exit - Handle return from NMI handling
+ * @regs: Pointer to pt_regs (NMI entry regs)
+ * @irq_state: Return value from matching call to irqentry_nmi_enter()
+ *
+ * Last action before returning to the low level assembly code.
+ *
+ * Counterpart to irqentry_nmi_enter().
+ */
+void noinstr irqentry_nmi_exit(struct pt_regs *regs, irqentry_state_t irq_state);
+
#endif
diff --git a/include/linux/entry-kvm.h b/include/linux/entry-kvm.h
index 0cef17afb41a..9b93f8584ff7 100644
--- a/include/linux/entry-kvm.h
+++ b/include/linux/entry-kvm.h
@@ -11,8 +11,8 @@
# define ARCH_XFER_TO_GUEST_MODE_WORK (0)
#endif
-#define XFER_TO_GUEST_MODE_WORK \
- (_TIF_NEED_RESCHED | _TIF_SIGPENDING | \
+#define XFER_TO_GUEST_MODE_WORK \
+ (_TIF_NEED_RESCHED | _TIF_SIGPENDING | _TIF_NOTIFY_SIGNAL | \
_TIF_NOTIFY_RESUME | ARCH_XFER_TO_GUEST_MODE_WORK)
struct kvm_vcpu;
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 76cd21fa5501..1cac7efabc83 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -28,6 +28,7 @@
#include <linux/sched/prio.h>
#include <linux/sched/types.h>
#include <linux/signal_types.h>
+#include <linux/syscall_user_dispatch.h>
#include <linux/mm_types_task.h>
#include <linux/task_io_accounting.h>
#include <linux/posix-timers.h>
@@ -987,6 +988,7 @@ struct task_struct {
unsigned int sessionid;
#endif
struct seccomp seccomp;
+ struct syscall_user_dispatch syscall_dispatch;
/* Thread group tracking: */
u64 parent_exec_id;
diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h
index 1bad18a1d8ba..bd5afa076189 100644
--- a/include/linux/sched/signal.h
+++ b/include/linux/sched/signal.h
@@ -353,11 +353,25 @@ static inline int restart_syscall(void)
return -ERESTARTNOINTR;
}
-static inline int signal_pending(struct task_struct *p)
+static inline int task_sigpending(struct task_struct *p)
{
return unlikely(test_tsk_thread_flag(p,TIF_SIGPENDING));
}
+static inline int signal_pending(struct task_struct *p)
+{
+#if defined(TIF_NOTIFY_SIGNAL)
+ /*
+ * TIF_NOTIFY_SIGNAL isn't really a signal, but it requires the same
+ * behavior in terms of ensuring that we break out of wait loops
+ * so that notify signal callbacks can be processed.
+ */
+ if (unlikely(test_tsk_thread_flag(p, TIF_NOTIFY_SIGNAL)))
+ return 1;
+#endif
+ return task_sigpending(p);
+}
+
static inline int __fatal_signal_pending(struct task_struct *p)
{
return unlikely(sigismember(&p->pending.signal, SIGKILL));
@@ -365,7 +379,7 @@ static inline int __fatal_signal_pending(struct task_struct *p)
static inline int fatal_signal_pending(struct task_struct *p)
{
- return signal_pending(p) && __fatal_signal_pending(p);
+ return task_sigpending(p) && __fatal_signal_pending(p);
}
static inline int signal_pending_state(long state, struct task_struct *p)
@@ -502,7 +516,7 @@ extern int set_user_sigmask(const sigset_t __user *umask, size_t sigsetsize);
static inline void restore_saved_sigmask_unless(bool interrupted)
{
if (interrupted)
- WARN_ON(!test_thread_flag(TIF_SIGPENDING));
+ WARN_ON(!signal_pending(current));
else
restore_saved_sigmask();
}
diff --git a/include/linux/seccomp.h b/include/linux/seccomp.h
index 02aef2844c38..47763f3999f7 100644
--- a/include/linux/seccomp.h
+++ b/include/linux/seccomp.h
@@ -42,7 +42,7 @@ struct seccomp {
extern int __secure_computing(const struct seccomp_data *sd);
static inline int secure_computing(void)
{
- if (unlikely(test_thread_flag(TIF_SECCOMP)))
+ if (unlikely(test_syscall_work(SECCOMP)))
return __secure_computing(NULL);
return 0;
}
diff --git a/include/linux/syscall_user_dispatch.h b/include/linux/syscall_user_dispatch.h
new file mode 100644
index 000000000000..a0ae443fb7df
--- /dev/null
+++ b/include/linux/syscall_user_dispatch.h
@@ -0,0 +1,40 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2020 Collabora Ltd.
+ */
+#ifndef _SYSCALL_USER_DISPATCH_H
+#define _SYSCALL_USER_DISPATCH_H
+
+#include <linux/thread_info.h>
+
+#ifdef CONFIG_GENERIC_ENTRY
+
+struct syscall_user_dispatch {
+ char __user *selector;
+ unsigned long offset;
+ unsigned long len;
+ bool on_dispatch;
+};
+
+int set_syscall_user_dispatch(unsigned long mode, unsigned long offset,
+ unsigned long len, char __user *selector);
+
+#define clear_syscall_work_syscall_user_dispatch(tsk) \
+ clear_task_syscall_work(tsk, SYSCALL_USER_DISPATCH)
+
+#else
+struct syscall_user_dispatch {};
+
+static inline int set_syscall_user_dispatch(unsigned long mode, unsigned long offset,
+ unsigned long len, char __user *selector)
+{
+ return -EINVAL;
+}
+
+static inline void clear_syscall_work_syscall_user_dispatch(struct task_struct *tsk)
+{
+}
+
+#endif /* CONFIG_GENERIC_ENTRY */
+
+#endif /* _SYSCALL_USER_DISPATCH_H */
diff --git a/include/linux/thread_info.h b/include/linux/thread_info.h
index e93e249a4e9b..c8a974cead73 100644
--- a/include/linux/thread_info.h
+++ b/include/linux/thread_info.h
@@ -35,6 +35,24 @@ enum {
GOOD_STACK,
};
+#ifdef CONFIG_GENERIC_ENTRY
+enum syscall_work_bit {
+ SYSCALL_WORK_BIT_SECCOMP,
+ SYSCALL_WORK_BIT_SYSCALL_TRACEPOINT,
+ SYSCALL_WORK_BIT_SYSCALL_TRACE,
+ SYSCALL_WORK_BIT_SYSCALL_EMU,
+ SYSCALL_WORK_BIT_SYSCALL_AUDIT,
+ SYSCALL_WORK_BIT_SYSCALL_USER_DISPATCH,
+};
+
+#define SYSCALL_WORK_SECCOMP BIT(SYSCALL_WORK_BIT_SECCOMP)
+#define SYSCALL_WORK_SYSCALL_TRACEPOINT BIT(SYSCALL_WORK_BIT_SYSCALL_TRACEPOINT)
+#define SYSCALL_WORK_SYSCALL_TRACE BIT(SYSCALL_WORK_BIT_SYSCALL_TRACE)
+#define SYSCALL_WORK_SYSCALL_EMU BIT(SYSCALL_WORK_BIT_SYSCALL_EMU)
+#define SYSCALL_WORK_SYSCALL_AUDIT BIT(SYSCALL_WORK_BIT_SYSCALL_AUDIT)
+#define SYSCALL_WORK_SYSCALL_USER_DISPATCH BIT(SYSCALL_WORK_BIT_SYSCALL_USER_DISPATCH)
+#endif
+
#include <asm/thread_info.h>
#ifdef __KERNEL__
@@ -97,6 +115,38 @@ static inline int test_ti_thread_flag(struct thread_info *ti, int flag)
#define test_thread_flag(flag) \
test_ti_thread_flag(current_thread_info(), flag)
+#ifdef CONFIG_GENERIC_ENTRY
+#define set_syscall_work(fl) \
+ set_bit(SYSCALL_WORK_BIT_##fl, &current_thread_info()->syscall_work)
+#define test_syscall_work(fl) \
+ test_bit(SYSCALL_WORK_BIT_##fl, &current_thread_info()->syscall_work)
+#define clear_syscall_work(fl) \
+ clear_bit(SYSCALL_WORK_BIT_##fl, &current_thread_info()->syscall_work)
+
+#define set_task_syscall_work(t, fl) \
+ set_bit(SYSCALL_WORK_BIT_##fl, &task_thread_info(t)->syscall_work)
+#define test_task_syscall_work(t, fl) \
+ test_bit(SYSCALL_WORK_BIT_##fl, &task_thread_info(t)->syscall_work)
+#define clear_task_syscall_work(t, fl) \
+ clear_bit(SYSCALL_WORK_BIT_##fl, &task_thread_info(t)->syscall_work)
+
+#else /* CONFIG_GENERIC_ENTRY */
+
+#define set_syscall_work(fl) \
+ set_ti_thread_flag(current_thread_info(), TIF_##fl)
+#define test_syscall_work(fl) \
+ test_ti_thread_flag(current_thread_info(), TIF_##fl)
+#define clear_syscall_work(fl) \
+ clear_ti_thread_flag(current_thread_info(), TIF_##fl)
+
+#define set_task_syscall_work(t, fl) \
+ set_ti_thread_flag(task_thread_info(t), TIF_##fl)
+#define test_task_syscall_work(t, fl) \
+ test_ti_thread_flag(task_thread_info(t), TIF_##fl)
+#define clear_task_syscall_work(t, fl) \
+ clear_ti_thread_flag(task_thread_info(t), TIF_##fl)
+#endif /* !CONFIG_GENERIC_ENTRY */
+
#define tif_need_resched() test_thread_flag(TIF_NEED_RESCHED)
#ifndef CONFIG_HAVE_ARCH_WITHIN_STACK_FRAMES
diff --git a/include/linux/tracehook.h b/include/linux/tracehook.h
index b480e1a07ed8..54b925224a13 100644
--- a/include/linux/tracehook.h
+++ b/include/linux/tracehook.h
@@ -83,11 +83,12 @@ static inline int ptrace_report_syscall(struct pt_regs *regs,
* tracehook_report_syscall_entry - task is about to attempt a system call
* @regs: user register state of current task
*
- * This will be called if %TIF_SYSCALL_TRACE or %TIF_SYSCALL_EMU have been set,
- * when the current task has just entered the kernel for a system call.
- * Full user register state is available here. Changing the values
- * in @regs can affect the system call number and arguments to be tried.
- * It is safe to block here, preventing the system call from beginning.
+ * This will be called if %SYSCALL_WORK_SYSCALL_TRACE or
+ * %SYSCALL_WORK_SYSCALL_EMU have been set, when the current task has just
+ * entered the kernel for a system call. Full user register state is
+ * available here. Changing the values in @regs can affect the system
+ * call number and arguments to be tried. It is safe to block here,
+ * preventing the system call from beginning.
*
* Returns zero normally, or nonzero if the calling arch code should abort
* the system call. That must prevent normal entry so no system call is
@@ -109,15 +110,15 @@ static inline __must_check int tracehook_report_syscall_entry(
* @regs: user register state of current task
* @step: nonzero if simulating single-step or block-step
*
- * This will be called if %TIF_SYSCALL_TRACE has been set, when the
- * current task has just finished an attempted system call. Full
+ * This will be called if %SYSCALL_WORK_SYSCALL_TRACE has been set, when
+ * the current task has just finished an attempted system call. Full
* user register state is available here. It is safe to block here,
* preventing signals from being processed.
*
* If @step is nonzero, this report is also in lieu of the normal
* trap that would follow the system call instruction because
* user_enable_block_step() or user_enable_single_step() was used.
- * In this case, %TIF_SYSCALL_TRACE might not be set.
+ * In this case, %SYSCALL_WORK_SYSCALL_TRACE might not be set.
*
* Called without locks, just before checking for pending signals.
*/
@@ -198,4 +199,31 @@ static inline void tracehook_notify_resume(struct pt_regs *regs)
blkcg_maybe_throttle_current();
}
+/*
+ * called by exit_to_user_mode_loop() if ti_work & _TIF_NOTIFY_SIGNAL. This
+ * is currently used by TWA_SIGNAL based task_work, which requires breaking
+ * wait loops to ensure that task_work is noticed and run.
+ */
+static inline void tracehook_notify_signal(void)
+{
+#if defined(TIF_NOTIFY_SIGNAL)
+ clear_thread_flag(TIF_NOTIFY_SIGNAL);
+ smp_mb__after_atomic();
+ if (current->task_works)
+ task_work_run();
+#endif
+}
+
+/*
+ * Called when we have work to process from exit_to_user_mode_loop()
+ */
+static inline void set_notify_signal(struct task_struct *task)
+{
+#if defined(TIF_NOTIFY_SIGNAL)
+ if (!test_and_set_tsk_thread_flag(task, TIF_NOTIFY_SIGNAL) &&
+ !wake_up_state(task, TASK_INTERRUPTIBLE))
+ kick_process(task);
+#endif
+}
+
#endif /* <linux/tracehook.h> */
diff --git a/include/trace/syscall.h b/include/trace/syscall.h
index dc8ac27d27c1..8e193f3a33b3 100644
--- a/include/trace/syscall.h
+++ b/include/trace/syscall.h
@@ -37,10 +37,10 @@ struct syscall_metadata {
#if defined(CONFIG_TRACEPOINTS) && defined(CONFIG_HAVE_SYSCALL_TRACEPOINTS)
static inline void syscall_tracepoint_update(struct task_struct *p)
{
- if (test_thread_flag(TIF_SYSCALL_TRACEPOINT))
- set_tsk_thread_flag(p, TIF_SYSCALL_TRACEPOINT);
+ if (test_syscall_work(SYSCALL_TRACEPOINT))
+ set_task_syscall_work(p, SYSCALL_TRACEPOINT);
else
- clear_tsk_thread_flag(p, TIF_SYSCALL_TRACEPOINT);
+ clear_task_syscall_work(p, SYSCALL_TRACEPOINT);
}
#else
static inline void syscall_tracepoint_update(struct task_struct *p)
diff --git a/include/uapi/asm-generic/siginfo.h b/include/uapi/asm-generic/siginfo.h
index 7aacf9389010..d2597000407a 100644
--- a/include/uapi/asm-generic/siginfo.h
+++ b/include/uapi/asm-generic/siginfo.h
@@ -286,7 +286,8 @@ typedef struct siginfo {
* SIGSYS si_codes
*/
#define SYS_SECCOMP 1 /* seccomp triggered */
-#define NSIGSYS 1
+#define SYS_USER_DISPATCH 2 /* syscall user dispatch triggered */
+#define NSIGSYS 2
/*
* SIGEMT si_codes
diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h
index 7f0827705c9a..90deb41c8a34 100644
--- a/include/uapi/linux/prctl.h
+++ b/include/uapi/linux/prctl.h
@@ -247,4 +247,9 @@ struct prctl_mm_map {
#define PR_SET_IO_FLUSHER 57
#define PR_GET_IO_FLUSHER 58
+/* Dispatch syscalls to a userspace handler */
+#define PR_SET_SYSCALL_USER_DISPATCH 59
+# define PR_SYS_DISPATCH_OFF 0
+# define PR_SYS_DISPATCH_ON 1
+
#endif /* _LINUX_PRCTL_H */