summaryrefslogtreecommitdiff
path: root/include/linux/rseq.h
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2026-05-09 05:42:10 +0300
committerLinus Torvalds <torvalds@linux-foundation.org>2026-05-09 05:42:10 +0300
commit7f0023215262221ca08d56be2203e8a4770be033 (patch)
tree33c8dec5486e41d6b1e29dbaab172b3cbc0ddc41 /include/linux/rseq.h
parente5cf0260a7472b4f34a46c418c14bec272aac404 (diff)
parent9f6d929ee2c6f0266edb564bcd2bd47fd6e884a8 (diff)
downloadlinux-7f0023215262221ca08d56be2203e8a4770be033.tar.xz
Merge tag 'sched-urgent-2026-05-09' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull scheduler fixes from Ingo Molnar: - Fix spurious failures in rseq self-tests (Mark Brown) - Fix rseq rseq::cpu_id_start ABI regression due to TCMalloc's creative use of the supposedly read-only field The fix is to introduce a new ABI variant based on a new (larger) rseq area registration size, to keep the TCMalloc use of rseq backwards compatible on new kernels (Thomas Gleixner) - Fix wakeup_preempt_fair() for not waking up task (Vincent Guittot) - Fix s64 mult overflow in vruntime_eligible() (Zhan Xusheng) * tag 'sched-urgent-2026-05-09' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: sched/fair: Fix wakeup_preempt_fair() for not waking up task sched/fair: Fix overflow in vruntime_eligible() selftests/rseq: Expand for optimized RSEQ ABI v2 rseq: Reenable performance optimizations conditionally rseq: Implement read only ABI enforcement for optimized RSEQ V2 mode selftests/rseq: Validate legacy behavior selftests/rseq: Make registration flexible for legacy and optimized mode selftests/rseq: Skip tests if time slice extensions are not available rseq: Revert to historical performance killing behaviour rseq: Don't advertise time slice extensions if disabled rseq: Protect rseq_reset() against interrupts rseq: Set rseq::cpu_id_start to 0 on unregistration selftests/rseq: Don't run tests with runner scripts outside of the scripts
Diffstat (limited to 'include/linux/rseq.h')
-rw-r--r--include/linux/rseq.h37
1 files changed, 26 insertions, 11 deletions
diff --git a/include/linux/rseq.h b/include/linux/rseq.h
index b9d62fc2140d..7ef79b25e714 100644
--- a/include/linux/rseq.h
+++ b/include/linux/rseq.h
@@ -9,6 +9,11 @@
void __rseq_handle_slowpath(struct pt_regs *regs);
+static __always_inline bool rseq_v2(struct task_struct *t)
+{
+ return IS_ENABLED(CONFIG_GENERIC_IRQ_ENTRY) && likely(t->rseq.event.has_rseq > 1);
+}
+
/* Invoked from resume_user_mode_work() */
static inline void rseq_handle_slowpath(struct pt_regs *regs)
{
@@ -16,8 +21,7 @@ static inline void rseq_handle_slowpath(struct pt_regs *regs)
if (current->rseq.event.slowpath)
__rseq_handle_slowpath(regs);
} else {
- /* '&' is intentional to spare one conditional branch */
- if (current->rseq.event.sched_switch & current->rseq.event.has_rseq)
+ if (current->rseq.event.sched_switch && current->rseq.event.has_rseq)
__rseq_handle_slowpath(regs);
}
}
@@ -30,9 +34,9 @@ void __rseq_signal_deliver(int sig, struct pt_regs *regs);
*/
static inline void rseq_signal_deliver(struct ksignal *ksig, struct pt_regs *regs)
{
- if (IS_ENABLED(CONFIG_GENERIC_IRQ_ENTRY)) {
- /* '&' is intentional to spare one conditional branch */
- if (current->rseq.event.has_rseq & current->rseq.event.user_irq)
+ if (rseq_v2(current)) {
+ /* has_rseq is implied in rseq_v2() */
+ if (current->rseq.event.user_irq)
__rseq_signal_deliver(ksig->sig, regs);
} else {
if (current->rseq.event.has_rseq)
@@ -50,15 +54,22 @@ static __always_inline void rseq_sched_switch_event(struct task_struct *t)
{
struct rseq_event *ev = &t->rseq.event;
- if (IS_ENABLED(CONFIG_GENERIC_IRQ_ENTRY)) {
+ /*
+ * Only apply the user_irq optimization for RSEQ ABI V2 registrations.
+ * Legacy users like TCMalloc rely on the original ABI V1 behaviour
+ * which updates IDs on every context swtich.
+ */
+ if (rseq_v2(t)) {
/*
- * Avoid a boat load of conditionals by using simple logic
- * to determine whether NOTIFY_RESUME needs to be raised.
+ * Avoid a boat load of conditionals by using simple logic to
+ * determine whether TIF_NOTIFY_RESUME or TIF_RSEQ needs to be
+ * raised.
*
- * It's required when the CPU or MM CID has changed or
- * the entry was from user space.
+ * It's required when the CPU or MM CID has changed or the entry
+ * was via interrupt from user space. ev->has_rseq does not have
+ * to be evaluated here because rseq_v2() implies has_rseq.
*/
- bool raise = (ev->user_irq | ev->ids_changed) & ev->has_rseq;
+ bool raise = ev->user_irq | ev->ids_changed;
if (raise) {
ev->sched_switch = true;
@@ -66,6 +77,7 @@ static __always_inline void rseq_sched_switch_event(struct task_struct *t)
}
} else {
if (ev->has_rseq) {
+ t->rseq.event.ids_changed = true;
t->rseq.event.sched_switch = true;
rseq_raise_notify_resume(t);
}
@@ -119,6 +131,8 @@ static inline void rseq_virt_userspace_exit(void)
static inline void rseq_reset(struct task_struct *t)
{
+ /* Protect against preemption and membarrier IPI */
+ guard(irqsave)();
memset(&t->rseq, 0, sizeof(t->rseq));
t->rseq.ids.cpu_id = RSEQ_CPU_ID_UNINITIALIZED;
}
@@ -159,6 +173,7 @@ static inline unsigned int rseq_alloc_align(void)
}
#else /* CONFIG_RSEQ */
+static inline bool rseq_v2(struct task_struct *t) { return false; }
static inline void rseq_handle_slowpath(struct pt_regs *regs) { }
static inline void rseq_signal_deliver(struct ksignal *ksig, struct pt_regs *regs) { }
static inline void rseq_sched_switch_event(struct task_struct *t) { }