From e2f08cefcf483f56366e4daa292bb9ba113ec138 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 8 May 2026 10:43:49 -0700 Subject: srcu: Fix kerneldoc header comment typo in srcu_down_read_fast() s/srcu_read_lock_safe()/srcu_read_lock_fast_updown(), there being no such thing as srcu_read_lock_safe(). Signed-off-by: Paul E. McKenney Signed-off-by: Uladzislau Rezki (Sony) --- include/linux/srcu.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/srcu.h b/include/linux/srcu.h index 81b1938512d5..a54ce9e808b9 100644 --- a/include/linux/srcu.h +++ b/include/linux/srcu.h @@ -397,7 +397,7 @@ static inline struct srcu_ctr __percpu *srcu_read_lock_fast_notrace(struct srcu_ * * The same srcu_struct may be used concurrently by srcu_down_read_fast() * and srcu_read_lock_fast(). However, the same definition/initialization - * requirements called out for srcu_read_lock_safe() apply. + * requirements called out for srcu_read_lock_fast_updown() apply. */ static inline struct srcu_ctr __percpu *srcu_down_read_fast(struct srcu_struct *ssp) __acquires_shared(ssp) { -- cgit v1.2.3 From 012c889690edc14d724a5880b4d0fe01c1fbb488 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 8 May 2026 10:43:50 -0700 Subject: checkpatch: Undeprecate rcu_read_lock_trace() and rcu_read_unlock_trace() It turns out that there are BPF use cases that rely on nesting RCU Tasks Trace readers. These use cases are well-served by the old rcu_read_lock_trace() and rcu_read_unlock_trace() functions that maintain a nesting counter in the task_struct structure. But these use cases incur a performance penalty when using the shiny new rcu_read_lock_tasks_trace() and rcu_read_unlock_tasks_trace() functions, which nest in the same way that SRCU does. This means that rcu_read_lock_trace() and rcu_read_unlock_trace() will be with us for some time. Therefore, remove the checkpatch.pl deprecation. Also, the rcu_read_lock_tasks_trace() and rcu_read_unlock_tasks_trace() functions are intended for use only by BPF. Therefore, add them to the list of functions that checkpatch complains about outside of BPF (and of course, RCU). Reported-by: Puranjay Mohan Signed-off-by: Paul E. McKenney Cc: Andy Whitcroft Cc: Joe Perches Cc: Dwaipayan Ray Cc: Lukas Bulwahn Signed-off-by: Uladzislau Rezki (Sony) --- scripts/checkpatch.pl | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl index 0492d6afc9a1..cc5bbd70cb84 100755 --- a/scripts/checkpatch.pl +++ b/scripts/checkpatch.pl @@ -865,8 +865,6 @@ our %deprecated_apis = ( "DEFINE_IDR" => "DEFINE_XARRAY", "idr_init" => "xa_init", "idr_init_base" => "xa_init_flags", - "rcu_read_lock_trace" => "rcu_read_lock_tasks_trace", - "rcu_read_unlock_trace" => "rcu_read_unlock_tasks_trace", ); #Create a search pattern for all these strings to speed up a loop below @@ -7596,12 +7594,15 @@ sub process { # Complain about RCU Tasks Trace used outside of BPF (and of course, RCU). our $rcu_trace_funcs = qr{(?x: + rcu_read_lock_tasks_trace | rcu_read_lock_trace | rcu_read_lock_trace_held | rcu_read_unlock_trace | + rcu_read_unlock_tasks_trace | call_rcu_tasks_trace | synchronize_rcu_tasks_trace | rcu_barrier_tasks_trace | + rcu_tasks_trace_expedite_current | rcu_request_urgent_qs_task )}; our $rcu_trace_paths = qr{(?x: -- cgit v1.2.3 From 5f136351edd37d779e45704d573f5b6d6cab1e6e Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 8 May 2026 10:43:51 -0700 Subject: rcu: Simplify rcu_do_batch() by applying clamp() This commit replaces a nested ?: sequence with clamp(). This does not reduce the number of lines of code, but it does simplify the line that it modifies. Reviewed-by: Frederic Weisbecker Signed-off-by: Paul E. McKenney Signed-off-by: Uladzislau Rezki (Sony) --- kernel/rcu/tree.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 55df6d37145e..e46a5124c3eb 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -2584,7 +2584,7 @@ static void rcu_do_batch(struct rcu_data *rdp) const long npj = NSEC_PER_SEC / HZ; long rrn = READ_ONCE(rcu_resched_ns); - rrn = rrn < NSEC_PER_MSEC ? NSEC_PER_MSEC : rrn > NSEC_PER_SEC ? NSEC_PER_SEC : rrn; + rrn = clamp(rrn, NSEC_PER_MSEC, NSEC_PER_SEC); tlimit = local_clock() + rrn; jlimit = jiffies + (rrn + npj + 1) / npj; jlimit_check = true; -- cgit v1.2.3 From d9b4d36b8c8fb6006e7b62dd0ddc218c87e8196a Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 8 May 2026 10:43:52 -0700 Subject: rcu: Simplify param_set_next_fqs_jiffies() by applying clamp_val() This commit replaces a nested ?: sequence with clamp_val(). This does not reduce the number of lines of code, but it does simplify the line that it modifies. Reviewed-by: Frederic Weisbecker Signed-off-by: Paul E. McKenney Signed-off-by: Uladzislau Rezki (Sony) --- kernel/rcu/tree.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index e46a5124c3eb..09f0cef5014c 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -492,7 +492,7 @@ static int param_set_next_fqs_jiffies(const char *val, const struct kernel_param int ret = kstrtoul(val, 0, &j); if (!ret) { - WRITE_ONCE(*(ulong *)kp->arg, (j > HZ) ? HZ : (j ?: 1)); + WRITE_ONCE(*(ulong *)kp->arg, clamp_val(j, 1, HZ)); adjust_jiffies_till_sched_qs(); } return ret; -- cgit v1.2.3 From e6cb527255c9f873851bd18d2bc375f6e6abb311 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 8 May 2026 10:43:53 -0700 Subject: rcu: Document rcu_access_pointer() feeding into cmpxchg() This commit documents the rcu_access_pointer() use case for fetching the old value of an RCU-protected pointer within a lockless updater for use by an atomic cmpxchg() operation. Reviewed-by: Frederic Weisbecker Reported-by: Maxim Mikityanskiy Signed-off-by: Paul E. McKenney Signed-off-by: Uladzislau Rezki (Sony) --- include/linux/rcupdate.h | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index bfa765132de8..5e95acc33989 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -592,11 +592,13 @@ context_unsafe( \ * lockdep checks for being in an RCU read-side critical section. This is * useful when the value of this pointer is accessed, but the pointer is * not dereferenced, for example, when testing an RCU-protected pointer - * against NULL. Although rcu_access_pointer() may also be used in cases - * where update-side locks prevent the value of the pointer from changing, - * you should instead use rcu_dereference_protected() for this use case. - * Within an RCU read-side critical section, there is little reason to - * use rcu_access_pointer(). + * against NULL. Within an RCU read-side critical section, there is little + * reason to use rcu_access_pointer(). Although rcu_access_pointer() may + * also be used in cases where update-side locks prevent the value of the + * pointer from changing, you should instead use rcu_dereference_protected() + * for this use case. It is also permissible to use rcu_access_pointer() + * within lockless updaters to obtain the old value for an atomic operation, + * for example, for cmpxchg(). * * It is usually best to test the rcu_access_pointer() return value * directly in order to avoid accidental dereferences being introduced -- cgit v1.2.3 From eceb65975256d7f7a5e5a5a2f4b7865eb32eaaf7 Mon Sep 17 00:00:00 2001 From: "Uladzislau Rezki (Sony)" Date: Wed, 11 Mar 2026 19:58:11 +0100 Subject: rcu: Latch normal synchronize_rcu() path on flood Currently, rcu_normal_wake_from_gp is only enabled by default on small systems(<= 16 CPUs) or when a user explicitly set it enabled. Introduce an adaptive latching mechanism: * Track the number of in-flight synchronize_rcu() requests using a new rcu_sr_normal_count counter; * If the count reaches/exceeds RCU_SR_NORMAL_LATCH_THR(64), it sets the rcu_sr_normal_latched, reverting new requests onto the scaled wait_rcu_gp() path; * The latch is cleared only when the pending requests are fully drained(nr == 0); * Enables rcu_normal_wake_from_gp by default for all systems, relying on this dynamic throttling instead of static CPU limits. Testing(synthetic flood workload): * Kernel version: 6.19.0-rc6 * Number of CPUs: 1536 * 60K concurrent synchronize_rcu() calls Perf(cycles, system-wide): total cycles: 932020263832 rcu_sr_normal_add_req(): 2650282811 cycles(~0.28%) Perf report excerpt: 0.01% 0.01% sync_test/... [k] rcu_sr_normal_add_req Measured overhead of rcu_sr_normal_add_req() remained ~0.28% of total CPU cycles in this synthetic stress test. Reviewed-by: Frederic Weisbecker Tested-by: Samir M Suggested-by: Joel Fernandes Signed-off-by: Uladzislau Rezki (Sony) --- Documentation/admin-guide/kernel-parameters.txt | 10 ++--- kernel/rcu/tree.c | 52 ++++++++++++++++++------- 2 files changed, 44 insertions(+), 18 deletions(-) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 4d0f545fb3ec..d5db2e85d551 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -5862,13 +5862,13 @@ Kernel parameters use a call_rcu[_hurry]() path. Please note, this is for a normal grace period. - How to enable it: + How to disable it: - echo 1 > /sys/module/rcutree/parameters/rcu_normal_wake_from_gp - or pass a boot parameter "rcutree.rcu_normal_wake_from_gp=1" + echo 0 > /sys/module/rcutree/parameters/rcu_normal_wake_from_gp + or pass a boot parameter "rcutree.rcu_normal_wake_from_gp=0" - Default is 1 if num_possible_cpus() <= 16 and it is not explicitly - disabled by the boot parameter passing 0. + Default is 1 if it is not explicitly disabled by the boot parameter + passing 0. rcuscale.gp_async= [KNL] Measure performance of asynchronous diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 09f0cef5014c..afb9e7db8f78 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -1632,17 +1632,21 @@ static void rcu_sr_put_wait_head(struct llist_node *node) atomic_set_release(&sr_wn->inuse, 0); } -/* Enable rcu_normal_wake_from_gp automatically on small systems. */ -#define WAKE_FROM_GP_CPU_THRESHOLD 16 - -static int rcu_normal_wake_from_gp = -1; +static int rcu_normal_wake_from_gp = 1; module_param(rcu_normal_wake_from_gp, int, 0644); static struct workqueue_struct *sync_wq; +#define RCU_SR_NORMAL_LATCH_THR 64 + +/* Number of in-flight synchronize_rcu() calls queued on srs_next. */ +static atomic_long_t rcu_sr_normal_count; +static int rcu_sr_normal_latched; /* 0/1 */ + static void rcu_sr_normal_complete(struct llist_node *node) { struct rcu_synchronize *rs = container_of( (struct rcu_head *) node, struct rcu_synchronize, head); + long nr; WARN_ONCE(IS_ENABLED(CONFIG_PROVE_RCU) && !poll_state_synchronize_rcu_full(&rs->oldstate), @@ -1650,6 +1654,15 @@ static void rcu_sr_normal_complete(struct llist_node *node) /* Finally. */ complete(&rs->completion); + nr = atomic_long_dec_return(&rcu_sr_normal_count); + WARN_ON_ONCE(nr < 0); + + /* + * Unlatch: switch back to normal path when fully + * drained and if it has been latched. + */ + if (nr == 0) + (void)cmpxchg_relaxed(&rcu_sr_normal_latched, 1, 0); } static void rcu_sr_normal_gp_cleanup_work(struct work_struct *work) @@ -1795,6 +1808,24 @@ static bool rcu_sr_normal_gp_init(void) static void rcu_sr_normal_add_req(struct rcu_synchronize *rs) { + /* + * Increment before publish to avoid a complete + * vs enqueue race on latch. + */ + long nr = atomic_long_inc_return(&rcu_sr_normal_count); + + /* + * Latch when threshold is reached. Checking for an exact match + * restricts cmpxchg() to a single context. + * + * This latch is intentionally relaxed and best-effort. Concurrent + * set/clear can race and temporarily lose the latch, which is OK + * because it only selects between the fast and fallback paths. + */ + if (nr == RCU_SR_NORMAL_LATCH_THR) + (void)cmpxchg_relaxed(&rcu_sr_normal_latched, 0, 1); + + /* Publish for the GP kthread/worker. */ llist_add((struct llist_node *) &rs->head, &rcu_state.srs_next); } @@ -3278,14 +3309,15 @@ static void synchronize_rcu_normal(void) { struct rcu_synchronize rs; + init_rcu_head_on_stack(&rs.head); trace_rcu_sr_normal(rcu_state.name, &rs.head, TPS("request")); - if (READ_ONCE(rcu_normal_wake_from_gp) < 1) { + if (READ_ONCE(rcu_normal_wake_from_gp) < 1 || + READ_ONCE(rcu_sr_normal_latched)) { wait_rcu_gp(call_rcu_hurry); goto trace_complete_out; } - init_rcu_head_on_stack(&rs.head); init_completion(&rs.completion); /* @@ -3302,10 +3334,10 @@ static void synchronize_rcu_normal(void) /* Now we can wait. */ wait_for_completion(&rs.completion); - destroy_rcu_head_on_stack(&rs.head); trace_complete_out: trace_rcu_sr_normal(rcu_state.name, &rs.head, TPS("complete")); + destroy_rcu_head_on_stack(&rs.head); } /** @@ -4904,12 +4936,6 @@ void __init rcu_init(void) sync_wq = alloc_workqueue("sync_wq", WQ_MEM_RECLAIM | WQ_UNBOUND, 0); WARN_ON(!sync_wq); - /* Respect if explicitly disabled via a boot parameter. */ - if (rcu_normal_wake_from_gp < 0) { - if (num_possible_cpus() <= WAKE_FROM_GP_CPU_THRESHOLD) - rcu_normal_wake_from_gp = 1; - } - /* Fill in default value for rcutree.qovld boot parameter. */ /* -After- the rcu_node ->lock fields are initialized! */ if (qovld < 0) -- cgit v1.2.3 From 42c5468f9cdc0c892fec3c0916b3ac5b670775af Mon Sep 17 00:00:00 2001 From: Zqiang Date: Thu, 23 Apr 2026 19:19:30 +0800 Subject: rcu-tasks: Fix possible boot-time tests failed for the call_rcu_tasks() The following scenarios will cause the call_rcu_tasks() boot-time tests failed: CPU0 CPU1 rcu_init_tasks_generic() ->rcu_tasks_initiate_self_tests() ->call_rcu_tasks_trace(&tests[1].rh, test_rcu_tasks_callback) ->call_rcu_tasks_generic() ->havekthread = smp_load_acquire(&rtp->kthread_ptr) "The havekthread is false" .... rcu_tasks_kthread() ->smp_store_release(&rtp->kthread_ptr, current) ->rcu_tasks_one_gp() ->rcuwait_wait_event() ->rcu_tasks_need_gpcb() ->for (cpu = 0; cpu < dequeue_limit; cpu++) ->rcu_segcblist_n_cbs(&rtpcp->cblist) == 0 ->schedule() ->raw_spin_trylock_rcu_node() ->needwake = (func == wakeme_after_rcu) || (rcu_segcblist_n_cbs(&rtpcp->cblist) == rcu_task_lazy_lim) "the rcu_task_lazy_lim default value is 32, and the func pointer is test_rcu_tasks_callback, lead to needwake is false." ->if (havekthread && !needwake && !timer_pending(&rtpcp->lazy_timer)) "the havekthread is false, will not enter here." .... "the needwake is false lead to rtp_irq_work can not queue, even if the rtp->kthread_ptr already exists at this point." ->if (needwake && READ_ONCE(rtp->kthread_ptr)) ->irq_work_queue(&rtpcp->rtp_irq_work) For the above scenarios, if the call_rcu_tasks() is not called again afterward, the rcu_tasks_kthread will not have a chance to be wakeup, the test_rcu_tasks_callback() will never be called, the boot-time tests failed can happen, this commit therefore check havekthread variable, if it's false and the rtpcp->cblist is empty, set needwake variable is true, if the rtp->kthread_ptr exist, the rtpcp->rtp_irq_work can be queued to wakeup rcu_tasks_kthread. Signed-off-by: Zqiang Signed-off-by: Uladzislau Rezki (Sony) --- kernel/rcu/tasks.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/kernel/rcu/tasks.h b/kernel/rcu/tasks.h index 48f0d803c8e2..f4da5fad70f5 100644 --- a/kernel/rcu/tasks.h +++ b/kernel/rcu/tasks.h @@ -373,7 +373,8 @@ static void call_rcu_tasks_generic(struct rcu_head *rhp, rcu_callback_t func, // Queuing callbacks before initialization not yet supported. if (WARN_ON_ONCE(!rcu_segcblist_is_enabled(&rtpcp->cblist))) rcu_segcblist_init(&rtpcp->cblist); - needwake = (func == wakeme_after_rcu) || + needwake = (!havekthread && rcu_segcblist_empty(&rtpcp->cblist)) || + (func == wakeme_after_rcu) || (rcu_segcblist_n_cbs(&rtpcp->cblist) == rcu_task_lazy_lim); if (havekthread && !needwake && !timer_pending(&rtpcp->lazy_timer)) { if (rtp->lazy_jiffies) -- cgit v1.2.3 From 002668809b068c528838c1ab1ff46c87bdbb095d Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 19 May 2026 21:01:28 +0200 Subject: rcu/nocb: reduce stack usage in nocb_gp_wait() When CONFIG_UBSAN_ALIGNMENT is enabled, the stack usage of nocb_gp_wait() grows above typical warning limits: In file included from kernel/rcu/tree.c:4930: kernel/rcu/tree_nocb.h: In function 'rcu_nocb_gp_kthread': kernel/rcu/tree_nocb.h:866:1: error: the frame size of 1968 bytes is larger than 1280 bytes [-Werror=frame-larger-than=] Apparently, the problem is passing rcu_data from a 'void *' pointer, which gcc assumes may be misaligned. When the function is not inlined into rcu_nocb_gp_kthread(), that is no longer visible to gcc. Add a 'noinline_for_stack' annotation that leads to skipping a lot of the alignment sanitizer checks and keeps the stack usage 60% lower here. Reviewed-by: Kunwu Chan Reviewed-by: Frederic Weisbecker Reviewed-by: Paul E. McKenney Signed-off-by: Arnd Bergmann Signed-off-by: Uladzislau Rezki (Sony) --- kernel/rcu/tree_nocb.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/rcu/tree_nocb.h b/kernel/rcu/tree_nocb.h index 1047b30cd46b..373b877cf171 100644 --- a/kernel/rcu/tree_nocb.h +++ b/kernel/rcu/tree_nocb.h @@ -655,7 +655,7 @@ static void nocb_gp_sleep(struct rcu_data *my_rdp, int cpu) * No-CBs GP kthreads come here to wait for additional callbacks to show up * or for grace periods to end. */ -static void nocb_gp_wait(struct rcu_data *my_rdp) +static noinline_for_stack void nocb_gp_wait(struct rcu_data *my_rdp) { bool bypass = false; int __maybe_unused cpu = my_rdp->cpu; -- cgit v1.2.3