From c27cea4416a396a1c5b6b3529dd925f92a69e7d3 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 29 Dec 2025 11:10:56 -0800 Subject: rcu: Re-implement RCU Tasks Trace in terms of SRCU-fast This commit saves more than 500 lines of RCU code by re-implementing RCU Tasks Trace in terms of SRCU-fast. Follow-up work will remove more code that does not cause problems by its presence, but that is no longer required. This variant places smp_mb() in rcu_read_{,un}lock_trace(), and in the same place that srcu_read_{,un}lock() would put them. These smp_mb() calls will be removed on common-case architectures in a later commit. In the meantime, it serves to enforce ordering between the underlying srcu_read_{,un}lock_fast() markers and the intervening critical section, even on architectures that permit attaching tracepoints on regions of code not watched by RCU. Such architectures defeat SRCU-fast's use of implicit single-instruction, interrupts-disabled, and atomic-operation RCU read-side critical sections, which have no effect when RCU is not watching. The aforementioned later commit will insert these smp_mb() calls only on architectures that have not used noinstr to prevent attaching tracepoints to code where RCU is not watching. [ paulmck: Apply kernel test robot, Boqun Feng, and Zqiang feedback. ] [ paulmck: Split out Tiny SRCU fixes per Andrii Nakryiko feedback. ] Signed-off-by: Paul E. McKenney Tested-by: kernel test robot Cc: Andrii Nakryiko Cc: Alexei Starovoitov Cc: Peter Zijlstra Cc: bpf@vger.kernel.org Reviewed-by: Joel Fernandes Signed-off-by: Boqun Feng --- include/linux/rcupdate_trace.h | 107 ++++++++++++++++++++++++++++++----------- include/linux/sched.h | 1 + 2 files changed, 80 insertions(+), 28 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rcupdate_trace.h b/include/linux/rcupdate_trace.h index e6c44eb428ab..3f46cbe67000 100644 --- a/include/linux/rcupdate_trace.h +++ b/include/linux/rcupdate_trace.h @@ -12,28 +12,28 @@ #include #include -extern struct lockdep_map rcu_trace_lock_map; +#ifdef CONFIG_TASKS_TRACE_RCU +extern struct srcu_struct rcu_tasks_trace_srcu_struct; +#endif // #ifdef CONFIG_TASKS_TRACE_RCU -#ifdef CONFIG_DEBUG_LOCK_ALLOC +#if defined(CONFIG_DEBUG_LOCK_ALLOC) && defined(CONFIG_TASKS_TRACE_RCU) static inline int rcu_read_lock_trace_held(void) { - return lock_is_held(&rcu_trace_lock_map); + return srcu_read_lock_held(&rcu_tasks_trace_srcu_struct); } -#else /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */ +#else // #if defined(CONFIG_DEBUG_LOCK_ALLOC) && defined(CONFIG_TASKS_TRACE_RCU) static inline int rcu_read_lock_trace_held(void) { return 1; } -#endif /* #else #ifdef CONFIG_DEBUG_LOCK_ALLOC */ +#endif // #else // #if defined(CONFIG_DEBUG_LOCK_ALLOC) && defined(CONFIG_TASKS_TRACE_RCU) #ifdef CONFIG_TASKS_TRACE_RCU -void rcu_read_unlock_trace_special(struct task_struct *t); - /** * rcu_read_lock_trace - mark beginning of RCU-trace read-side critical section * @@ -50,12 +50,14 @@ static inline void rcu_read_lock_trace(void) { struct task_struct *t = current; - WRITE_ONCE(t->trc_reader_nesting, READ_ONCE(t->trc_reader_nesting) + 1); - barrier(); - if (IS_ENABLED(CONFIG_TASKS_TRACE_RCU_READ_MB) && - t->trc_reader_special.b.need_mb) - smp_mb(); // Pairs with update-side barriers - rcu_lock_acquire(&rcu_trace_lock_map); + if (t->trc_reader_nesting++) { + // In case we interrupted a Tasks Trace RCU reader. + rcu_try_lock_acquire(&rcu_tasks_trace_srcu_struct.dep_map); + return; + } + barrier(); // nesting before scp to protect against interrupt handler. + t->trc_reader_scp = srcu_read_lock_fast(&rcu_tasks_trace_srcu_struct); + smp_mb(); // Placeholder for more selective ordering } /** @@ -69,26 +71,75 @@ static inline void rcu_read_lock_trace(void) */ static inline void rcu_read_unlock_trace(void) { - int nesting; + struct srcu_ctr __percpu *scp; struct task_struct *t = current; - rcu_lock_release(&rcu_trace_lock_map); - nesting = READ_ONCE(t->trc_reader_nesting) - 1; - barrier(); // Critical section before disabling. - // Disable IPI-based setting of .need_qs. - WRITE_ONCE(t->trc_reader_nesting, INT_MIN + nesting); - if (likely(!READ_ONCE(t->trc_reader_special.s)) || nesting) { - WRITE_ONCE(t->trc_reader_nesting, nesting); - return; // We assume shallow reader nesting. - } - WARN_ON_ONCE(nesting != 0); - rcu_read_unlock_trace_special(t); + smp_mb(); // Placeholder for more selective ordering + scp = t->trc_reader_scp; + barrier(); // scp before nesting to protect against interrupt handler. + if (!--t->trc_reader_nesting) + srcu_read_unlock_fast(&rcu_tasks_trace_srcu_struct, scp); + else + srcu_lock_release(&rcu_tasks_trace_srcu_struct.dep_map); +} + +/** + * call_rcu_tasks_trace() - Queue a callback trace task-based grace period + * @rhp: structure to be used for queueing the RCU updates. + * @func: actual callback function to be invoked after the grace period + * + * The callback function will be invoked some time after a trace rcu-tasks + * grace period elapses, in other words after all currently executing + * trace rcu-tasks read-side critical sections have completed. These + * read-side critical sections are delimited by calls to rcu_read_lock_trace() + * and rcu_read_unlock_trace(). + * + * See the description of call_rcu() for more detailed information on + * memory ordering guarantees. + */ +static inline void call_rcu_tasks_trace(struct rcu_head *rhp, rcu_callback_t func) +{ + call_srcu(&rcu_tasks_trace_srcu_struct, rhp, func); +} + +/** + * synchronize_rcu_tasks_trace - wait for a trace rcu-tasks grace period + * + * Control will return to the caller some time after a trace rcu-tasks + * grace period has elapsed, in other words after all currently executing + * trace rcu-tasks read-side critical sections have elapsed. These read-side + * critical sections are delimited by calls to rcu_read_lock_trace() + * and rcu_read_unlock_trace(). + * + * This is a very specialized primitive, intended only for a few uses in + * tracing and other situations requiring manipulation of function preambles + * and profiling hooks. The synchronize_rcu_tasks_trace() function is not + * (yet) intended for heavy use from multiple CPUs. + * + * See the description of synchronize_rcu() for more detailed information + * on memory ordering guarantees. + */ +static inline void synchronize_rcu_tasks_trace(void) +{ + synchronize_srcu(&rcu_tasks_trace_srcu_struct); } -void call_rcu_tasks_trace(struct rcu_head *rhp, rcu_callback_t func); -void synchronize_rcu_tasks_trace(void); -void rcu_barrier_tasks_trace(void); +/** + * rcu_barrier_tasks_trace - Wait for in-flight call_rcu_tasks_trace() callbacks. + * + * Note that rcu_barrier_tasks_trace() is not obligated to actually wait, + * for example, if there are no pending callbacks. + */ +static inline void rcu_barrier_tasks_trace(void) +{ + srcu_barrier(&rcu_tasks_trace_srcu_struct); +} + +// Placeholders to enable stepwise transition. +void rcu_tasks_trace_get_gp_data(int *flags, unsigned long *gp_seq); +void __init rcu_tasks_trace_suppress_unused(void); struct task_struct *get_rcu_tasks_trace_gp_kthread(void); + #else /* * The BPF JIT forms these addresses even when it doesn't call these diff --git a/include/linux/sched.h b/include/linux/sched.h index d395f2810fac..fe39d422b37d 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -945,6 +945,7 @@ struct task_struct { #ifdef CONFIG_TASKS_TRACE_RCU int trc_reader_nesting; + struct srcu_ctr __percpu *trc_reader_scp; int trc_ipi_to_cpu; union rcu_special trc_reader_special; struct list_head trc_holdout_list; -- cgit v1.2.3 From a73fc3dcc60b6d7a2075e2fbdca64fd53600f855 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 29 Dec 2025 11:10:58 -0800 Subject: rcu: Clean up after the SRCU-fastification of RCU Tasks Trace Now that RCU Tasks Trace has been re-implemented in terms of SRCU-fast, the ->trc_ipi_to_cpu, ->trc_blkd_cpu, ->trc_blkd_node, ->trc_holdout_list, and ->trc_reader_special task_struct fields are no longer used. In addition, the rcu_tasks_trace_qs(), rcu_tasks_trace_qs_blkd(), exit_tasks_rcu_finish_trace(), and rcu_spawn_tasks_trace_kthread(), show_rcu_tasks_trace_gp_kthread(), rcu_tasks_trace_get_gp_data(), rcu_tasks_trace_torture_stats_print(), and get_rcu_tasks_trace_gp_kthread() functions and all the other functions that they invoke are no longer used. Also, the TRC_NEED_QS and TRC_NEED_QS_CHECKED CPP macros are no longer used. Neither are the rcu_tasks_trace_lazy_ms and rcu_task_ipi_delay rcupdate module parameters and the TASKS_TRACE_RCU_READ_MB Kconfig option. This commit therefore removes all of them. [ paulmck: Apply Alexei Starovoitov feedback. ] Signed-off-by: Paul E. McKenney Cc: Andrii Nakryiko Cc: Alexei Starovoitov Cc: Peter Zijlstra Cc: bpf@vger.kernel.org Reviewed-by: Joel Fernandes Signed-off-by: Boqun Feng --- Documentation/admin-guide/kernel-parameters.txt | 15 ---- include/linux/rcupdate.h | 31 +-------- include/linux/rcupdate_trace.h | 2 - include/linux/sched.h | 5 -- init/init_task.c | 3 - kernel/fork.c | 3 - kernel/rcu/Kconfig | 18 ----- kernel/rcu/rcu.h | 9 --- kernel/rcu/rcuscale.c | 7 -- kernel/rcu/rcutorture.c | 2 - kernel/rcu/tasks.h | 79 +--------------------- .../selftests/rcutorture/configs/rcu/TRACE01 | 1 - .../selftests/rcutorture/configs/rcu/TRACE02 | 1 - 13 files changed, 2 insertions(+), 174 deletions(-) (limited to 'include/linux') diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index a8d0afde7f85..1b8e5cadbecb 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -6249,13 +6249,6 @@ Kernel parameters dynamically) adjusted. This parameter is intended for use in testing. - rcupdate.rcu_task_ipi_delay= [KNL] - Set time in jiffies during which RCU tasks will - avoid sending IPIs, starting with the beginning - of a given grace period. Setting a large - number avoids disturbing real-time workloads, - but lengthens grace periods. - rcupdate.rcu_task_lazy_lim= [KNL] Number of callbacks on a given CPU that will cancel laziness on that CPU. Use -1 to disable @@ -6299,14 +6292,6 @@ Kernel parameters of zero will disable batching. Batching is always disabled for synchronize_rcu_tasks(). - rcupdate.rcu_tasks_trace_lazy_ms= [KNL] - Set timeout in milliseconds RCU Tasks - Trace asynchronous callback batching for - call_rcu_tasks_trace(). A negative value - will take the default. A value of zero will - disable batching. Batching is always disabled - for synchronize_rcu_tasks_trace(). - rcupdate.rcu_self_test= [KNL] Run the RCU early boot self tests diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index c5b30054cd01..bd5a420cf09a 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -175,36 +175,7 @@ void rcu_tasks_torture_stats_print(char *tt, char *tf); # define synchronize_rcu_tasks synchronize_rcu # endif -# ifdef CONFIG_TASKS_TRACE_RCU -// Bits for ->trc_reader_special.b.need_qs field. -#define TRC_NEED_QS 0x1 // Task needs a quiescent state. -#define TRC_NEED_QS_CHECKED 0x2 // Task has been checked for needing quiescent state. - -u8 rcu_trc_cmpxchg_need_qs(struct task_struct *t, u8 old, u8 new); -void rcu_tasks_trace_qs_blkd(struct task_struct *t); - -# define rcu_tasks_trace_qs(t) \ - do { \ - int ___rttq_nesting = READ_ONCE((t)->trc_reader_nesting); \ - \ - if (unlikely(READ_ONCE((t)->trc_reader_special.b.need_qs) == TRC_NEED_QS) && \ - likely(!___rttq_nesting)) { \ - rcu_trc_cmpxchg_need_qs((t), TRC_NEED_QS, TRC_NEED_QS_CHECKED); \ - } else if (___rttq_nesting && ___rttq_nesting != INT_MIN && \ - !READ_ONCE((t)->trc_reader_special.b.blocked)) { \ - rcu_tasks_trace_qs_blkd(t); \ - } \ - } while (0) -void rcu_tasks_trace_torture_stats_print(char *tt, char *tf); -# else -# define rcu_tasks_trace_qs(t) do { } while (0) -# endif - -#define rcu_tasks_qs(t, preempt) \ -do { \ - rcu_tasks_classic_qs((t), (preempt)); \ - rcu_tasks_trace_qs(t); \ -} while (0) +#define rcu_tasks_qs(t, preempt) rcu_tasks_classic_qs((t), (preempt)) # ifdef CONFIG_TASKS_RUDE_RCU void synchronize_rcu_tasks_rude(void); diff --git a/include/linux/rcupdate_trace.h b/include/linux/rcupdate_trace.h index 3f46cbe67000..0bd47f12ecd1 100644 --- a/include/linux/rcupdate_trace.h +++ b/include/linux/rcupdate_trace.h @@ -136,9 +136,7 @@ static inline void rcu_barrier_tasks_trace(void) } // Placeholders to enable stepwise transition. -void rcu_tasks_trace_get_gp_data(int *flags, unsigned long *gp_seq); void __init rcu_tasks_trace_suppress_unused(void); -struct task_struct *get_rcu_tasks_trace_gp_kthread(void); #else /* diff --git a/include/linux/sched.h b/include/linux/sched.h index fe39d422b37d..56156643ccac 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -946,11 +946,6 @@ struct task_struct { #ifdef CONFIG_TASKS_TRACE_RCU int trc_reader_nesting; struct srcu_ctr __percpu *trc_reader_scp; - int trc_ipi_to_cpu; - union rcu_special trc_reader_special; - struct list_head trc_holdout_list; - struct list_head trc_blkd_node; - int trc_blkd_cpu; #endif /* #ifdef CONFIG_TASKS_TRACE_RCU */ struct sched_info sched_info; diff --git a/init/init_task.c b/init/init_task.c index 49b13d7c3985..db92c404d59a 100644 --- a/init/init_task.c +++ b/init/init_task.c @@ -195,9 +195,6 @@ struct task_struct init_task __aligned(L1_CACHE_BYTES) = { #endif #ifdef CONFIG_TASKS_TRACE_RCU .trc_reader_nesting = 0, - .trc_reader_special.s = 0, - .trc_holdout_list = LIST_HEAD_INIT(init_task.trc_holdout_list), - .trc_blkd_node = LIST_HEAD_INIT(init_task.trc_blkd_node), #endif #ifdef CONFIG_CPUSETS .mems_allowed_seq = SEQCNT_SPINLOCK_ZERO(init_task.mems_allowed_seq, diff --git a/kernel/fork.c b/kernel/fork.c index b1f3915d5f8e..d7ed107cbb47 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1828,9 +1828,6 @@ static inline void rcu_copy_process(struct task_struct *p) #endif /* #ifdef CONFIG_TASKS_RCU */ #ifdef CONFIG_TASKS_TRACE_RCU p->trc_reader_nesting = 0; - p->trc_reader_special.s = 0; - INIT_LIST_HEAD(&p->trc_holdout_list); - INIT_LIST_HEAD(&p->trc_blkd_node); #endif /* #ifdef CONFIG_TASKS_TRACE_RCU */ } diff --git a/kernel/rcu/Kconfig b/kernel/rcu/Kconfig index 4d9b21f69eaa..8d5a1ecb7d56 100644 --- a/kernel/rcu/Kconfig +++ b/kernel/rcu/Kconfig @@ -313,24 +313,6 @@ config RCU_NOCB_CPU_CB_BOOST Say Y here if you want to set RT priority for offloading kthreads. Say N here if you are building a !PREEMPT_RT kernel and are unsure. -config TASKS_TRACE_RCU_READ_MB - bool "Tasks Trace RCU readers use memory barriers in user and idle" - depends on RCU_EXPERT && TASKS_TRACE_RCU - default PREEMPT_RT || NR_CPUS < 8 - help - Use this option to further reduce the number of IPIs sent - to CPUs executing in userspace or idle during tasks trace - RCU grace periods. Given that a reasonable setting of - the rcupdate.rcu_task_ipi_delay kernel boot parameter - eliminates such IPIs for many workloads, proper setting - of this Kconfig option is important mostly for aggressive - real-time installations and for battery-powered devices, - hence the default chosen above. - - Say Y here if you hate IPIs. - Say N here if you hate read-side memory barriers. - Take the default if you are unsure. - config RCU_LAZY bool "RCU callback lazy invocation functionality" depends on RCU_NOCB_CPU diff --git a/kernel/rcu/rcu.h b/kernel/rcu/rcu.h index 9cf01832a6c3..dc5d614b372c 100644 --- a/kernel/rcu/rcu.h +++ b/kernel/rcu/rcu.h @@ -544,10 +544,6 @@ struct task_struct *get_rcu_tasks_rude_gp_kthread(void); void rcu_tasks_rude_get_gp_data(int *flags, unsigned long *gp_seq); #endif // # ifdef CONFIG_TASKS_RUDE_RCU -#ifdef CONFIG_TASKS_TRACE_RCU -void rcu_tasks_trace_get_gp_data(int *flags, unsigned long *gp_seq); -#endif - #ifdef CONFIG_TASKS_RCU_GENERIC void tasks_cblist_init_generic(void); #else /* #ifdef CONFIG_TASKS_RCU_GENERIC */ @@ -673,11 +669,6 @@ void show_rcu_tasks_rude_gp_kthread(void); #else static inline void show_rcu_tasks_rude_gp_kthread(void) {} #endif -#if !defined(CONFIG_TINY_RCU) && defined(CONFIG_TASKS_TRACE_RCU) -void show_rcu_tasks_trace_gp_kthread(void); -#else -static inline void show_rcu_tasks_trace_gp_kthread(void) {} -#endif #ifdef CONFIG_TINY_RCU static inline bool rcu_cpu_beenfullyonline(int cpu) { return true; } diff --git a/kernel/rcu/rcuscale.c b/kernel/rcu/rcuscale.c index 7484d8ad5767..1c50f89fbd6f 100644 --- a/kernel/rcu/rcuscale.c +++ b/kernel/rcu/rcuscale.c @@ -400,11 +400,6 @@ static void tasks_trace_scale_read_unlock(int idx) rcu_read_unlock_trace(); } -static void rcu_tasks_trace_scale_stats(void) -{ - rcu_tasks_trace_torture_stats_print(scale_type, SCALE_FLAG); -} - static struct rcu_scale_ops tasks_tracing_ops = { .ptype = RCU_TASKS_FLAVOR, .init = rcu_sync_scale_init, @@ -416,8 +411,6 @@ static struct rcu_scale_ops tasks_tracing_ops = { .gp_barrier = rcu_barrier_tasks_trace, .sync = synchronize_rcu_tasks_trace, .exp_sync = synchronize_rcu_tasks_trace, - .rso_gp_kthread = get_rcu_tasks_trace_gp_kthread, - .stats = IS_ENABLED(CONFIG_TINY_RCU) ? NULL : rcu_tasks_trace_scale_stats, .name = "tasks-tracing" }; diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c index 07e51974b06b..78a6ebe77d35 100644 --- a/kernel/rcu/rcutorture.c +++ b/kernel/rcu/rcutorture.c @@ -1180,8 +1180,6 @@ static struct rcu_torture_ops tasks_tracing_ops = { .exp_sync = synchronize_rcu_tasks_trace, .call = call_rcu_tasks_trace, .cb_barrier = rcu_barrier_tasks_trace, - .gp_kthread_dbg = show_rcu_tasks_trace_gp_kthread, - .get_gp_data = rcu_tasks_trace_get_gp_data, .cbflood_max = 50000, .irq_capable = 1, .slow_gps = 1, diff --git a/kernel/rcu/tasks.h b/kernel/rcu/tasks.h index 1fe789c99f36..1249b47f0a8d 100644 --- a/kernel/rcu/tasks.h +++ b/kernel/rcu/tasks.h @@ -161,11 +161,6 @@ static void tasks_rcu_exit_srcu_stall(struct timer_list *unused); static DEFINE_TIMER(tasks_rcu_exit_srcu_stall_timer, tasks_rcu_exit_srcu_stall); #endif -/* Avoid IPIing CPUs early in the grace period. */ -#define RCU_TASK_IPI_DELAY (IS_ENABLED(CONFIG_TASKS_TRACE_RCU_READ_MB) ? HZ / 2 : 0) -static int rcu_task_ipi_delay __read_mostly = RCU_TASK_IPI_DELAY; -module_param(rcu_task_ipi_delay, int, 0644); - /* Control stall timeouts. Disable with <= 0, otherwise jiffies till stall. */ #define RCU_TASK_BOOT_STALL_TIMEOUT (HZ * 30) #define RCU_TASK_STALL_TIMEOUT (HZ * 60 * 10) @@ -800,8 +795,6 @@ static void rcu_tasks_torture_stats_print_generic(struct rcu_tasks *rtp, char *t #endif // #ifndef CONFIG_TINY_RCU -static void exit_tasks_rcu_finish_trace(struct task_struct *t); - #if defined(CONFIG_TASKS_RCU) //////////////////////////////////////////////////////////////////////// @@ -1321,13 +1314,11 @@ void exit_tasks_rcu_finish(void) raw_spin_lock_irqsave_rcu_node(rtpcp, flags); list_del_init(&t->rcu_tasks_exit_list); raw_spin_unlock_irqrestore_rcu_node(rtpcp, flags); - - exit_tasks_rcu_finish_trace(t); } #else /* #ifdef CONFIG_TASKS_RCU */ void exit_tasks_rcu_start(void) { } -void exit_tasks_rcu_finish(void) { exit_tasks_rcu_finish_trace(current); } +void exit_tasks_rcu_finish(void) { } #endif /* #else #ifdef CONFIG_TASKS_RCU */ #ifdef CONFIG_TASKS_RUDE_RCU @@ -1475,69 +1466,6 @@ void __init rcu_tasks_trace_suppress_unused(void) #endif // #ifndef CONFIG_TINY_RCU } -/* - * Do a cmpxchg() on ->trc_reader_special.b.need_qs, allowing for - * the four-byte operand-size restriction of some platforms. - * - * Returns the old value, which is often ignored. - */ -u8 rcu_trc_cmpxchg_need_qs(struct task_struct *t, u8 old, u8 new) -{ - return cmpxchg(&t->trc_reader_special.b.need_qs, old, new); -} -EXPORT_SYMBOL_GPL(rcu_trc_cmpxchg_need_qs); - -/* Add a newly blocked reader task to its CPU's list. */ -void rcu_tasks_trace_qs_blkd(struct task_struct *t) -{ -} -EXPORT_SYMBOL_GPL(rcu_tasks_trace_qs_blkd); - -/* Communicate task state back to the RCU tasks trace stall warning request. */ -struct trc_stall_chk_rdr { - int nesting; - int ipi_to_cpu; - u8 needqs; -}; - -/* Report any needed quiescent state for this exiting task. */ -static void exit_tasks_rcu_finish_trace(struct task_struct *t) -{ -} - -int rcu_tasks_trace_lazy_ms = -1; -module_param(rcu_tasks_trace_lazy_ms, int, 0444); - -static int __init rcu_spawn_tasks_trace_kthread(void) -{ - return 0; -} - -#if !defined(CONFIG_TINY_RCU) -void show_rcu_tasks_trace_gp_kthread(void) -{ -} -EXPORT_SYMBOL_GPL(show_rcu_tasks_trace_gp_kthread); - -void rcu_tasks_trace_torture_stats_print(char *tt, char *tf) -{ -} -EXPORT_SYMBOL_GPL(rcu_tasks_trace_torture_stats_print); -#endif // !defined(CONFIG_TINY_RCU) - -struct task_struct *get_rcu_tasks_trace_gp_kthread(void) -{ - return NULL; -} -EXPORT_SYMBOL_GPL(get_rcu_tasks_trace_gp_kthread); - -void rcu_tasks_trace_get_gp_data(int *flags, unsigned long *gp_seq) -{ -} -EXPORT_SYMBOL_GPL(rcu_tasks_trace_get_gp_data); - -#else /* #ifdef CONFIG_TASKS_TRACE_RCU */ -static void exit_tasks_rcu_finish_trace(struct task_struct *t) { } #endif /* #else #ifdef CONFIG_TASKS_TRACE_RCU */ #ifndef CONFIG_TINY_RCU @@ -1545,7 +1473,6 @@ void show_rcu_tasks_gp_kthreads(void) { show_rcu_tasks_classic_gp_kthread(); show_rcu_tasks_rude_gp_kthread(); - show_rcu_tasks_trace_gp_kthread(); } #endif /* #ifndef CONFIG_TINY_RCU */ @@ -1684,10 +1611,6 @@ static int __init rcu_init_tasks_generic(void) rcu_spawn_tasks_rude_kthread(); #endif -#ifdef CONFIG_TASKS_TRACE_RCU - rcu_spawn_tasks_trace_kthread(); -#endif - // Run the self-tests. rcu_tasks_initiate_self_tests(); diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TRACE01 b/tools/testing/selftests/rcutorture/configs/rcu/TRACE01 index 85b407467454..18efab346381 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/TRACE01 +++ b/tools/testing/selftests/rcutorture/configs/rcu/TRACE01 @@ -10,5 +10,4 @@ CONFIG_PROVE_LOCKING=n #CHECK#CONFIG_PROVE_RCU=n CONFIG_FORCE_TASKS_TRACE_RCU=y #CHECK#CONFIG_TASKS_TRACE_RCU=y -CONFIG_TASKS_TRACE_RCU_READ_MB=y CONFIG_RCU_EXPERT=y diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TRACE02 b/tools/testing/selftests/rcutorture/configs/rcu/TRACE02 index 9003c56cd764..8da390e82829 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/TRACE02 +++ b/tools/testing/selftests/rcutorture/configs/rcu/TRACE02 @@ -9,6 +9,5 @@ CONFIG_PROVE_LOCKING=y #CHECK#CONFIG_PROVE_RCU=y CONFIG_FORCE_TASKS_TRACE_RCU=y #CHECK#CONFIG_TASKS_TRACE_RCU=y -CONFIG_TASKS_TRACE_RCU_READ_MB=n CONFIG_RCU_EXPERT=y CONFIG_DEBUG_OBJECTS=y -- cgit v1.2.3 From 1a72f4bb6f3eaa5af674cb10802f7064bf71d10a Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 29 Dec 2025 11:11:00 -0800 Subject: rcu: Add noinstr-fast rcu_read_{,un}lock_tasks_trace() APIs When expressing RCU Tasks Trace in terms of SRCU-fast, it was necessary to keep a nesting count and per-CPU srcu_ctr structure pointer in the task_struct structure, which is slow to access. But an alternative is to instead make rcu_read_lock_tasks_trace() and rcu_read_unlock_tasks_trace(), which match the underlying SRCU-fast semantics, avoiding the task_struct accesses. When all callers have switched to the new API, the previous rcu_read_lock_trace() and rcu_read_unlock_trace() APIs will be removed. The rcu_read_{,un}lock_{,tasks_}trace() functions need to use smp_mb() only if invoked where RCU is not watching, that is, from locations where a call to rcu_is_watching() would return false. In architectures that define the ARCH_WANTS_NO_INSTR Kconfig option, use of noinstr and friends ensures that tracing happens only where RCU is watching, so those architectures can dispense entirely with the read-side calls to smp_mb(). Other architectures include these read-side calls by default, but in many installations there might be either larger than average tolerance for risk, prohibition of removing tracing on a running system, or careful review and approval of removing of tracing. Such installations can build their kernels with CONFIG_TASKS_TRACE_RCU_NO_MB=y to avoid those read-side calls to smp_mb(), thus accepting responsibility for run-time removal of tracing from code regions that RCU is not watching. Those wishing to disable read-side memory barriers for an entire architecture can select this TASKS_TRACE_RCU_NO_MB Kconfig option, hence the polarity. [ paulmck: Apply Peter Zijlstra feedback. ] Signed-off-by: Paul E. McKenney Cc: Andrii Nakryiko Cc: Alexei Starovoitov Cc: Peter Zijlstra Cc: bpf@vger.kernel.org Reviewed-by: Joel Fernandes Signed-off-by: Boqun Feng --- include/linux/rcupdate_trace.h | 65 ++++++++++++++++++++++++++++++++++++------ kernel/rcu/Kconfig | 23 +++++++++++++++ 2 files changed, 80 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rcupdate_trace.h b/include/linux/rcupdate_trace.h index 0bd47f12ecd1..f47ba9c07460 100644 --- a/include/linux/rcupdate_trace.h +++ b/include/linux/rcupdate_trace.h @@ -34,6 +34,53 @@ static inline int rcu_read_lock_trace_held(void) #ifdef CONFIG_TASKS_TRACE_RCU +/** + * rcu_read_lock_tasks_trace - mark beginning of RCU-trace read-side critical section + * + * When synchronize_rcu_tasks_trace() is invoked by one task, then that + * task is guaranteed to block until all other tasks exit their read-side + * critical sections. Similarly, if call_rcu_trace() is invoked on one + * task while other tasks are within RCU read-side critical sections, + * invocation of the corresponding RCU callback is deferred until after + * the all the other tasks exit their critical sections. + * + * For more details, please see the documentation for + * srcu_read_lock_fast(). For a description of how implicit RCU + * readers provide the needed ordering for architectures defining the + * ARCH_WANTS_NO_INSTR Kconfig option (and thus promising never to trace + * code where RCU is not watching), please see the __srcu_read_lock_fast() + * (non-kerneldoc) header comment. Otherwise, the smp_mb() below provided + * the needed ordering. + */ +static inline struct srcu_ctr __percpu *rcu_read_lock_tasks_trace(void) +{ + struct srcu_ctr __percpu *ret = __srcu_read_lock_fast(&rcu_tasks_trace_srcu_struct); + + rcu_try_lock_acquire(&rcu_tasks_trace_srcu_struct.dep_map); + if (!IS_ENABLED(CONFIG_TASKS_TRACE_RCU_NO_MB)) + smp_mb(); // Provide ordering on noinstr-incomplete architectures. + return ret; +} + +/** + * rcu_read_unlock_tasks_trace - mark end of RCU-trace read-side critical section + * @scp: return value from corresponding rcu_read_lock_tasks_trace(). + * + * Pairs with the preceding call to rcu_read_lock_tasks_trace() that + * returned the value passed in via scp. + * + * For more details, please see the documentation for rcu_read_unlock(). + * For memory-ordering information, please see the header comment for the + * rcu_read_lock_tasks_trace() function. + */ +static inline void rcu_read_unlock_tasks_trace(struct srcu_ctr __percpu *scp) +{ + if (!IS_ENABLED(CONFIG_TASKS_TRACE_RCU_NO_MB)) + smp_mb(); // Provide ordering on noinstr-incomplete architectures. + __srcu_read_unlock_fast(&rcu_tasks_trace_srcu_struct, scp); + srcu_lock_release(&rcu_tasks_trace_srcu_struct.dep_map); +} + /** * rcu_read_lock_trace - mark beginning of RCU-trace read-side critical section * @@ -50,14 +97,15 @@ static inline void rcu_read_lock_trace(void) { struct task_struct *t = current; + rcu_try_lock_acquire(&rcu_tasks_trace_srcu_struct.dep_map); if (t->trc_reader_nesting++) { // In case we interrupted a Tasks Trace RCU reader. - rcu_try_lock_acquire(&rcu_tasks_trace_srcu_struct.dep_map); return; } barrier(); // nesting before scp to protect against interrupt handler. - t->trc_reader_scp = srcu_read_lock_fast(&rcu_tasks_trace_srcu_struct); - smp_mb(); // Placeholder for more selective ordering + t->trc_reader_scp = __srcu_read_lock_fast(&rcu_tasks_trace_srcu_struct); + if (!IS_ENABLED(CONFIG_TASKS_TRACE_RCU_NO_MB)) + smp_mb(); // Placeholder for more selective ordering } /** @@ -74,13 +122,14 @@ static inline void rcu_read_unlock_trace(void) struct srcu_ctr __percpu *scp; struct task_struct *t = current; - smp_mb(); // Placeholder for more selective ordering scp = t->trc_reader_scp; barrier(); // scp before nesting to protect against interrupt handler. - if (!--t->trc_reader_nesting) - srcu_read_unlock_fast(&rcu_tasks_trace_srcu_struct, scp); - else - srcu_lock_release(&rcu_tasks_trace_srcu_struct.dep_map); + if (!--t->trc_reader_nesting) { + if (!IS_ENABLED(CONFIG_TASKS_TRACE_RCU_NO_MB)) + smp_mb(); // Placeholder for more selective ordering + __srcu_read_unlock_fast(&rcu_tasks_trace_srcu_struct, scp); + } + srcu_lock_release(&rcu_tasks_trace_srcu_struct.dep_map); } /** diff --git a/kernel/rcu/Kconfig b/kernel/rcu/Kconfig index c381a3130116..762299291e09 100644 --- a/kernel/rcu/Kconfig +++ b/kernel/rcu/Kconfig @@ -142,6 +142,29 @@ config TASKS_TRACE_RCU default n select IRQ_WORK +config TASKS_TRACE_RCU_NO_MB + bool "Override RCU Tasks Trace inclusion of read-side memory barriers" + depends on RCU_EXPERT && TASKS_TRACE_RCU + default ARCH_WANTS_NO_INSTR + help + This option prevents the use of read-side memory barriers in + rcu_read_lock_tasks_trace() and rcu_read_unlock_tasks_trace() + even in kernels built with CONFIG_ARCH_WANTS_NO_INSTR=n, that is, + in kernels that do not have noinstr set up in entry/exit code. + By setting this option, you are promising to carefully review + use of ftrace, BPF, and friends to ensure that no tracing + operation is attached to a function that runs in that portion + of the entry/exit code that RCU does not watch, that is, + where rcu_is_watching() returns false. Alternatively, you + might choose to never remove traces except by rebooting. + + Those wishing to disable read-side memory barriers for an entire + architecture can select this Kconfig option, hence the polarity. + + Say Y here if you need speed and will review use of tracing. + Say N here for certain esoteric testing of RCU itself. + Take the default if you are unsure. + config RCU_STALL_COMMON def_bool TREE_RCU help -- cgit v1.2.3 From a525ccd4d3e91ff123960e44a8892fb76c8217ea Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 29 Dec 2025 11:11:03 -0800 Subject: srcu: Create an rcu_tasks_trace_expedite_current() function This commit creates an rcu_tasks_trace_expedite_current() function that expedites the current (and possibly the next) RCU Tasks Trace grace period. If the current RCU Tasks Trace grace period is already waiting, that wait will complete before the expediting takes effect. If there is no RCU Tasks Trace grace period in flight, this function might well create one. Signed-off-by: Paul E. McKenney Cc: Andrii Nakryiko Cc: Alexei Starovoitov Cc: Peter Zijlstra Cc: bpf@vger.kernel.org Reviewed-by: Joel Fernandes Signed-off-by: Boqun Feng --- include/linux/rcupdate_trace.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'include/linux') diff --git a/include/linux/rcupdate_trace.h b/include/linux/rcupdate_trace.h index f47ba9c07460..cee89e51e45c 100644 --- a/include/linux/rcupdate_trace.h +++ b/include/linux/rcupdate_trace.h @@ -184,6 +184,20 @@ static inline void rcu_barrier_tasks_trace(void) srcu_barrier(&rcu_tasks_trace_srcu_struct); } +/** + * rcu_tasks_trace_expedite_current - Expedite the current Tasks Trace RCU grace period + * + * Cause the current Tasks Trace RCU grace period to become expedited. + * The grace period following the current one might also be expedited. + * If there is no current grace period, one might be created. If the + * current grace period is currently sleeping, that sleep will complete + * before expediting will take effect. + */ +static inline void rcu_tasks_trace_expedite_current(void) +{ + srcu_expedite_current(&rcu_tasks_trace_srcu_struct); +} + // Placeholders to enable stepwise transition. void __init rcu_tasks_trace_suppress_unused(void); -- cgit v1.2.3