From d7a5da7a0f7fa7ff081140c4f6f971db98882703 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 15 Dec 2025 17:52:04 +0100 Subject: rseq: Add fields and constants for time slice extension Aside of a Kconfig knob add the following items: - Two flag bits for the rseq user space ABI, which allow user space to query the availability and enablement without a syscall. - A new member to the user space ABI struct rseq, which is going to be used to communicate request and grant between kernel and user space. - A rseq state struct to hold the kernel state of this - Documentation of the new mechanism Signed-off-by: Thomas Gleixner Signed-off-by: Peter Zijlstra (Intel) Link: https://patch.msgid.link/20251215155708.669472597@linutronix.de --- include/uapi/linux/rseq.h | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/rseq.h b/include/uapi/linux/rseq.h index 1b76d508400c..6afc219d1545 100644 --- a/include/uapi/linux/rseq.h +++ b/include/uapi/linux/rseq.h @@ -23,9 +23,15 @@ enum rseq_flags { }; enum rseq_cs_flags_bit { + /* Historical and unsupported bits */ RSEQ_CS_FLAG_NO_RESTART_ON_PREEMPT_BIT = 0, RSEQ_CS_FLAG_NO_RESTART_ON_SIGNAL_BIT = 1, RSEQ_CS_FLAG_NO_RESTART_ON_MIGRATE_BIT = 2, + /* (3) Intentional gap to put new bits into a separate byte */ + + /* User read only feature flags */ + RSEQ_CS_FLAG_SLICE_EXT_AVAILABLE_BIT = 4, + RSEQ_CS_FLAG_SLICE_EXT_ENABLED_BIT = 5, }; enum rseq_cs_flags { @@ -35,6 +41,11 @@ enum rseq_cs_flags { (1U << RSEQ_CS_FLAG_NO_RESTART_ON_SIGNAL_BIT), RSEQ_CS_FLAG_NO_RESTART_ON_MIGRATE = (1U << RSEQ_CS_FLAG_NO_RESTART_ON_MIGRATE_BIT), + + RSEQ_CS_FLAG_SLICE_EXT_AVAILABLE = + (1U << RSEQ_CS_FLAG_SLICE_EXT_AVAILABLE_BIT), + RSEQ_CS_FLAG_SLICE_EXT_ENABLED = + (1U << RSEQ_CS_FLAG_SLICE_EXT_ENABLED_BIT), }; /* @@ -53,6 +64,27 @@ struct rseq_cs { __u64 abort_ip; } __attribute__((aligned(4 * sizeof(__u64)))); +/** + * rseq_slice_ctrl - Time slice extension control structure + * @all: Compound value + * @request: Request for a time slice extension + * @granted: Granted time slice extension + * + * @request is set by user space and can be cleared by user space or kernel + * space. @granted is set and cleared by the kernel and must only be read + * by user space. + */ +struct rseq_slice_ctrl { + union { + __u32 all; + struct { + __u8 request; + __u8 granted; + __u16 __reserved; + }; + }; +}; + /* * struct rseq is aligned on 4 * 8 bytes to ensure it is always * contained within a single cache-line. @@ -141,6 +173,12 @@ struct rseq { */ __u32 mm_cid; + /* + * Time slice extension control structure. CPU local updates from + * kernel and user space. + */ + struct rseq_slice_ctrl slice_ctrl; + /* * Flexible array member at end of structure, after last feature field. */ -- cgit v1.2.3 From 28621ec2d46c6adf7d33a6facbd83e2fa566bd34 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 15 Dec 2025 17:52:12 +0100 Subject: rseq: Add prctl() to enable time slice extensions Implement a prctl() so that tasks can enable the time slice extension mechanism. This fails, when time slice extensions are disabled at compile time or on the kernel command line and when no rseq pointer is registered in the kernel. That allows to implement a single trivial check in the exit to user mode hotpath, to decide whether the whole mechanism needs to be invoked. Signed-off-by: Thomas Gleixner Signed-off-by: Peter Zijlstra (Intel) Link: https://patch.msgid.link/20251215155708.858717691@linutronix.de --- include/linux/rseq.h | 9 ++++++++ include/uapi/linux/prctl.h | 10 +++++++++ kernel/rseq.c | 52 ++++++++++++++++++++++++++++++++++++++++++++++ kernel/sys.c | 6 ++++++ 4 files changed, 77 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/linux/rseq.h b/include/linux/rseq.h index 2266f4dc77b6..3c194a02ad0a 100644 --- a/include/linux/rseq.h +++ b/include/linux/rseq.h @@ -163,4 +163,13 @@ void rseq_syscall(struct pt_regs *regs); static inline void rseq_syscall(struct pt_regs *regs) { } #endif /* !CONFIG_DEBUG_RSEQ */ +#ifdef CONFIG_RSEQ_SLICE_EXTENSION +int rseq_slice_extension_prctl(unsigned long arg2, unsigned long arg3); +#else /* CONFIG_RSEQ_SLICE_EXTENSION */ +static inline int rseq_slice_extension_prctl(unsigned long arg2, unsigned long arg3) +{ + return -ENOTSUPP; +} +#endif /* !CONFIG_RSEQ_SLICE_EXTENSION */ + #endif /* _LINUX_RSEQ_H */ diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h index 51c4e8c82b1e..79944b7ae50a 100644 --- a/include/uapi/linux/prctl.h +++ b/include/uapi/linux/prctl.h @@ -386,4 +386,14 @@ struct prctl_mm_map { # define PR_FUTEX_HASH_SET_SLOTS 1 # define PR_FUTEX_HASH_GET_SLOTS 2 +/* RSEQ time slice extensions */ +#define PR_RSEQ_SLICE_EXTENSION 79 +# define PR_RSEQ_SLICE_EXTENSION_GET 1 +# define PR_RSEQ_SLICE_EXTENSION_SET 2 +/* + * Bits for RSEQ_SLICE_EXTENSION_GET/SET + * PR_RSEQ_SLICE_EXT_ENABLE: Enable + */ +# define PR_RSEQ_SLICE_EXT_ENABLE 0x01 + #endif /* _LINUX_PRCTL_H */ diff --git a/kernel/rseq.c b/kernel/rseq.c index 415d75b6df2c..09848bb14ec2 100644 --- a/kernel/rseq.c +++ b/kernel/rseq.c @@ -71,6 +71,7 @@ #define RSEQ_BUILD_SLOW_PATH #include +#include #include #include #include @@ -501,6 +502,57 @@ efault: #ifdef CONFIG_RSEQ_SLICE_EXTENSION DEFINE_STATIC_KEY_TRUE(rseq_slice_extension_key); +int rseq_slice_extension_prctl(unsigned long arg2, unsigned long arg3) +{ + switch (arg2) { + case PR_RSEQ_SLICE_EXTENSION_GET: + if (arg3) + return -EINVAL; + return current->rseq.slice.state.enabled ? PR_RSEQ_SLICE_EXT_ENABLE : 0; + + case PR_RSEQ_SLICE_EXTENSION_SET: { + u32 rflags, valid = RSEQ_CS_FLAG_SLICE_EXT_AVAILABLE; + bool enable = !!(arg3 & PR_RSEQ_SLICE_EXT_ENABLE); + + if (arg3 & ~PR_RSEQ_SLICE_EXT_ENABLE) + return -EINVAL; + if (!rseq_slice_extension_enabled()) + return -ENOTSUPP; + if (!current->rseq.usrptr) + return -ENXIO; + + /* No change? */ + if (enable == !!current->rseq.slice.state.enabled) + return 0; + + if (get_user(rflags, ¤t->rseq.usrptr->flags)) + goto die; + + if (current->rseq.slice.state.enabled) + valid |= RSEQ_CS_FLAG_SLICE_EXT_ENABLED; + + if ((rflags & valid) != valid) + goto die; + + rflags &= ~RSEQ_CS_FLAG_SLICE_EXT_ENABLED; + rflags |= RSEQ_CS_FLAG_SLICE_EXT_AVAILABLE; + if (enable) + rflags |= RSEQ_CS_FLAG_SLICE_EXT_ENABLED; + + if (put_user(rflags, ¤t->rseq.usrptr->flags)) + goto die; + + current->rseq.slice.state.enabled = enable; + return 0; + } + default: + return -EINVAL; + } +die: + force_sig(SIGSEGV); + return -EFAULT; +} + static int __init rseq_slice_cmdline(char *str) { bool on; diff --git a/kernel/sys.c b/kernel/sys.c index 8b58eece4e58..af71987df81c 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -53,6 +53,7 @@ #include #include #include +#include #include #include @@ -2868,6 +2869,11 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3, case PR_FUTEX_HASH: error = futex_hash_prctl(arg2, arg3, arg4); break; + case PR_RSEQ_SLICE_EXTENSION: + if (arg4 || arg5) + return -EINVAL; + error = rseq_slice_extension_prctl(arg2, arg3); + break; default: trace_task_prctl_unknown(option, arg2, arg3, arg4, arg5); error = -EINVAL; -- cgit v1.2.3 From d6200245c75e832af2087bc60ba2e6641a90eee9 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 19 Jan 2026 11:23:57 +0100 Subject: rseq: Allow registering RSEQ with slice extension Since glibc cares about the number of syscalls required to initialize a new thread, allow initializing rseq with slice extension on. This avoids having to do another prctl(). Requested-by: Mathieu Desnoyers Signed-off-by: Peter Zijlstra (Intel) Link: https://patch.msgid.link/20260121143207.814193010@infradead.org --- include/uapi/linux/rseq.h | 3 ++- kernel/rseq.c | 12 ++++++++++-- 2 files changed, 12 insertions(+), 3 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/rseq.h b/include/uapi/linux/rseq.h index 6afc219d1545..863c4a00a66b 100644 --- a/include/uapi/linux/rseq.h +++ b/include/uapi/linux/rseq.h @@ -19,7 +19,8 @@ enum rseq_cpu_id_state { }; enum rseq_flags { - RSEQ_FLAG_UNREGISTER = (1 << 0), + RSEQ_FLAG_UNREGISTER = (1 << 0), + RSEQ_FLAG_SLICE_EXT_DEFAULT_ON = (1 << 1), }; enum rseq_cs_flags_bit { diff --git a/kernel/rseq.c b/kernel/rseq.c index 275d70114107..1c5490a172a8 100644 --- a/kernel/rseq.c +++ b/kernel/rseq.c @@ -424,7 +424,7 @@ SYSCALL_DEFINE4(rseq, struct rseq __user *, rseq, u32, rseq_len, int, flags, u32 return 0; } - if (unlikely(flags)) + if (unlikely(flags & ~(RSEQ_FLAG_SLICE_EXT_DEFAULT_ON))) return -EINVAL; if (current->rseq.usrptr) { @@ -459,8 +459,12 @@ SYSCALL_DEFINE4(rseq, struct rseq __user *, rseq, u32, rseq_len, int, flags, u32 if (!access_ok(rseq, rseq_len)) return -EFAULT; - if (IS_ENABLED(CONFIG_RSEQ_SLICE_EXTENSION)) + if (IS_ENABLED(CONFIG_RSEQ_SLICE_EXTENSION)) { rseqfl |= RSEQ_CS_FLAG_SLICE_EXT_AVAILABLE; + if (rseq_slice_extension_enabled() && + (flags & RSEQ_FLAG_SLICE_EXT_DEFAULT_ON)) + rseqfl |= RSEQ_CS_FLAG_SLICE_EXT_ENABLED; + } scoped_user_write_access(rseq, efault) { /* @@ -488,6 +492,10 @@ SYSCALL_DEFINE4(rseq, struct rseq __user *, rseq, u32, rseq_len, int, flags, u32 current->rseq.len = rseq_len; current->rseq.sig = sig; +#ifdef CONFIG_RSEQ_SLICE_EXTENSION + current->rseq.slice.state.enabled = !!(rseqfl & RSEQ_CS_FLAG_SLICE_EXT_ENABLED); +#endif + /* * If rseq was previously inactive, and has just been * registered, ensure the cpu_id_start and cpu_id fields -- cgit v1.2.3