summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorFrederic Weisbecker <frederic@kernel.org>2024-10-10 19:36:09 +0300
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>2024-10-17 16:26:52 +0300
commite66b1e01f2eb3209d08122572f41f7838b79540d (patch)
tree1845c1fcd9f353179a2b7d67626c18d1ccd960c2
parent29e8d96d44f51cf89a62dd042be35d052833b95c (diff)
downloadlinux-e66b1e01f2eb3209d08122572f41f7838b79540d.tar.xz
rcu/nocb: Fix rcuog wake-up from offline softirq
[ Upstream commit f7345ccc62a4b880cf76458db5f320725f28e400 ] After a CPU has set itself offline and before it eventually calls rcutree_report_cpu_dead(), there are still opportunities for callbacks to be enqueued, for example from a softirq. When that happens on NOCB, the rcuog wake-up is deferred through an IPI to an online CPU in order not to call into the scheduler and risk arming the RT-bandwidth after hrtimers have been migrated out and disabled. But performing a synchronized IPI from a softirq is buggy as reported in the following scenario: WARNING: CPU: 1 PID: 26 at kernel/smp.c:633 smp_call_function_single Modules linked in: rcutorture torture CPU: 1 UID: 0 PID: 26 Comm: migration/1 Not tainted 6.11.0-rc1-00012-g9139f93209d1 #1 Stopper: multi_cpu_stop+0x0/0x320 <- __stop_cpus+0xd0/0x120 RIP: 0010:smp_call_function_single <IRQ> swake_up_one_online __call_rcu_nocb_wake __call_rcu_common ? rcu_torture_one_read call_timer_fn __run_timers run_timer_softirq handle_softirqs irq_exit_rcu ? tick_handle_periodic sysvec_apic_timer_interrupt </IRQ> Fix this with forcing deferred rcuog wake up through the NOCB timer when the CPU is offline. The actual wake up will happen from rcutree_report_cpu_dead(). Reported-by: kernel test robot <oliver.sang@intel.com> Closes: https://lore.kernel.org/oe-lkp/202409231644.4c55582d-lkp@intel.com Fixes: 9139f93209d1 ("rcu/nocb: Fix RT throttling hrtimer armed from offline CPU") Reviewed-by: "Joel Fernandes (Google)" <joel@joelfernandes.org> Signed-off-by: Frederic Weisbecker <frederic@kernel.org> Signed-off-by: Neeraj Upadhyay <neeraj.upadhyay@kernel.org> Signed-off-by: Sasha Levin <sashal@kernel.org>
-rw-r--r--kernel/rcu/tree_nocb.h8
1 files changed, 7 insertions, 1 deletions
diff --git a/kernel/rcu/tree_nocb.h b/kernel/rcu/tree_nocb.h
index 2686ba122fa0..3630f712358e 100644
--- a/kernel/rcu/tree_nocb.h
+++ b/kernel/rcu/tree_nocb.h
@@ -569,13 +569,19 @@ static void __call_rcu_nocb_wake(struct rcu_data *rdp, bool was_alldone,
rcu_nocb_unlock(rdp);
wake_nocb_gp_defer(rdp, RCU_NOCB_WAKE_LAZY,
TPS("WakeLazy"));
- } else if (!irqs_disabled_flags(flags)) {
+ } else if (!irqs_disabled_flags(flags) && cpu_online(rdp->cpu)) {
/* ... if queue was empty ... */
rcu_nocb_unlock(rdp);
wake_nocb_gp(rdp, false);
trace_rcu_nocb_wake(rcu_state.name, rdp->cpu,
TPS("WakeEmpty"));
} else {
+ /*
+ * Don't do the wake-up upfront on fragile paths.
+ * Also offline CPUs can't call swake_up_one_online() from
+ * (soft-)IRQs. Rely on the final deferred wake-up from
+ * rcutree_report_cpu_dead()
+ */
rcu_nocb_unlock(rdp);
wake_nocb_gp_defer(rdp, RCU_NOCB_WAKE,
TPS("WakeEmptyIsDeferred"));