From e2c73a6860bdf54f2c6bf8cddc34ddc91a1343e1 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 27 Sep 2021 14:18:51 -0700 Subject: rcu: Remove the RCU_FAST_NO_HZ Kconfig option All of the uses of CONFIG_RCU_FAST_NO_HZ=y that I have seen involve systems with RCU callbacks offloaded. In this situation, all that this Kconfig option does is slow down idle entry/exit with an additional allways-taken early exit. If this is the only use case, then this Kconfig option nothing but an attractive nuisance that needs to go away. This commit therefore removes the RCU_FAST_NO_HZ Kconfig option. Signed-off-by: Paul E. McKenney --- kernel/rcu/tree_plugin.h | 185 +---------------------------------------------- 1 file changed, 2 insertions(+), 183 deletions(-) (limited to 'kernel/rcu/tree_plugin.h') diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index 5199559fbbf0..19f7d578cedb 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -51,8 +51,6 @@ static void __init rcu_bootup_announce_oddness(void) RCU_FANOUT); if (rcu_fanout_exact) pr_info("\tHierarchical RCU autobalancing is disabled.\n"); - if (IS_ENABLED(CONFIG_RCU_FAST_NO_HZ)) - pr_info("\tRCU dyntick-idle grace-period acceleration is enabled.\n"); if (IS_ENABLED(CONFIG_PROVE_RCU)) pr_info("\tRCU lockdep checking is enabled.\n"); if (IS_ENABLED(CONFIG_RCU_STRICT_GRACE_PERIOD)) @@ -1253,16 +1251,14 @@ static void __init rcu_spawn_boost_kthreads(void) #endif /* #else #ifdef CONFIG_RCU_BOOST */ -#if !defined(CONFIG_RCU_FAST_NO_HZ) - /* * Check to see if any future non-offloaded RCU-related work will need * to be done by the current CPU, even if none need be done immediately, * returning 1 if so. This function is part of the RCU implementation; * it is -not- an exported member of the RCU API. * - * Because we not have RCU_FAST_NO_HZ, just check whether or not this - * CPU has RCU callbacks queued. + * Just check whether or not this CPU has non-offloaded RCU callbacks + * queued. */ int rcu_needs_cpu(u64 basemono, u64 *nextevt) { @@ -1271,183 +1267,6 @@ int rcu_needs_cpu(u64 basemono, u64 *nextevt) !rcu_rdp_is_offloaded(this_cpu_ptr(&rcu_data)); } -/* - * Because we do not have RCU_FAST_NO_HZ, don't bother cleaning up - * after it. - */ -static void rcu_cleanup_after_idle(void) -{ -} - -/* - * Do the idle-entry grace-period work, which, because CONFIG_RCU_FAST_NO_HZ=n, - * is nothing. - */ -static void rcu_prepare_for_idle(void) -{ -} - -#else /* #if !defined(CONFIG_RCU_FAST_NO_HZ) */ - -/* - * This code is invoked when a CPU goes idle, at which point we want - * to have the CPU do everything required for RCU so that it can enter - * the energy-efficient dyntick-idle mode. - * - * The following preprocessor symbol controls this: - * - * RCU_IDLE_GP_DELAY gives the number of jiffies that a CPU is permitted - * to sleep in dyntick-idle mode with RCU callbacks pending. This - * is sized to be roughly one RCU grace period. Those energy-efficiency - * benchmarkers who might otherwise be tempted to set this to a large - * number, be warned: Setting RCU_IDLE_GP_DELAY too high can hang your - * system. And if you are -that- concerned about energy efficiency, - * just power the system down and be done with it! - * - * The value below works well in practice. If future workloads require - * adjustment, they can be converted into kernel config parameters, though - * making the state machine smarter might be a better option. - */ -#define RCU_IDLE_GP_DELAY 4 /* Roughly one grace period. */ - -static int rcu_idle_gp_delay = RCU_IDLE_GP_DELAY; -module_param(rcu_idle_gp_delay, int, 0644); - -/* - * Try to advance callbacks on the current CPU, but only if it has been - * awhile since the last time we did so. Afterwards, if there are any - * callbacks ready for immediate invocation, return true. - */ -static bool __maybe_unused rcu_try_advance_all_cbs(void) -{ - bool cbs_ready = false; - struct rcu_data *rdp = this_cpu_ptr(&rcu_data); - struct rcu_node *rnp; - - /* Exit early if we advanced recently. */ - if (jiffies == rdp->last_advance_all) - return false; - rdp->last_advance_all = jiffies; - - rnp = rdp->mynode; - - /* - * Don't bother checking unless a grace period has - * completed since we last checked and there are - * callbacks not yet ready to invoke. - */ - if ((rcu_seq_completed_gp(rdp->gp_seq, - rcu_seq_current(&rnp->gp_seq)) || - unlikely(READ_ONCE(rdp->gpwrap))) && - rcu_segcblist_pend_cbs(&rdp->cblist)) - note_gp_changes(rdp); - - if (rcu_segcblist_ready_cbs(&rdp->cblist)) - cbs_ready = true; - return cbs_ready; -} - -/* - * Allow the CPU to enter dyntick-idle mode unless it has callbacks ready - * to invoke. If the CPU has callbacks, try to advance them. Tell the - * caller about what to set the timeout. - * - * The caller must have disabled interrupts. - */ -int rcu_needs_cpu(u64 basemono, u64 *nextevt) -{ - struct rcu_data *rdp = this_cpu_ptr(&rcu_data); - unsigned long dj; - - lockdep_assert_irqs_disabled(); - - /* If no non-offloaded callbacks, RCU doesn't need the CPU. */ - if (rcu_segcblist_empty(&rdp->cblist) || - rcu_rdp_is_offloaded(rdp)) { - *nextevt = KTIME_MAX; - return 0; - } - - /* Attempt to advance callbacks. */ - if (rcu_try_advance_all_cbs()) { - /* Some ready to invoke, so initiate later invocation. */ - invoke_rcu_core(); - return 1; - } - rdp->last_accelerate = jiffies; - - /* Request timer and round. */ - dj = round_up(rcu_idle_gp_delay + jiffies, rcu_idle_gp_delay) - jiffies; - - *nextevt = basemono + dj * TICK_NSEC; - return 0; -} - -/* - * Prepare a CPU for idle from an RCU perspective. The first major task is to - * sense whether nohz mode has been enabled or disabled via sysfs. The second - * major task is to accelerate (that is, assign grace-period numbers to) any - * recently arrived callbacks. - * - * The caller must have disabled interrupts. - */ -static void rcu_prepare_for_idle(void) -{ - bool needwake; - struct rcu_data *rdp = this_cpu_ptr(&rcu_data); - struct rcu_node *rnp; - int tne; - - lockdep_assert_irqs_disabled(); - if (rcu_rdp_is_offloaded(rdp)) - return; - - /* Handle nohz enablement switches conservatively. */ - tne = READ_ONCE(tick_nohz_active); - if (tne != rdp->tick_nohz_enabled_snap) { - if (!rcu_segcblist_empty(&rdp->cblist)) - invoke_rcu_core(); /* force nohz to see update. */ - rdp->tick_nohz_enabled_snap = tne; - return; - } - if (!tne) - return; - - /* - * If we have not yet accelerated this jiffy, accelerate all - * callbacks on this CPU. - */ - if (rdp->last_accelerate == jiffies) - return; - rdp->last_accelerate = jiffies; - if (rcu_segcblist_pend_cbs(&rdp->cblist)) { - rnp = rdp->mynode; - raw_spin_lock_rcu_node(rnp); /* irqs already disabled. */ - needwake = rcu_accelerate_cbs(rnp, rdp); - raw_spin_unlock_rcu_node(rnp); /* irqs remain disabled. */ - if (needwake) - rcu_gp_kthread_wake(); - } -} - -/* - * Clean up for exit from idle. Attempt to advance callbacks based on - * any grace periods that elapsed while the CPU was idle, and if any - * callbacks are now ready to invoke, initiate invocation. - */ -static void rcu_cleanup_after_idle(void) -{ - struct rcu_data *rdp = this_cpu_ptr(&rcu_data); - - lockdep_assert_irqs_disabled(); - if (rcu_rdp_is_offloaded(rdp)) - return; - if (rcu_try_advance_all_cbs()) - invoke_rcu_core(); -} - -#endif /* #else #if !defined(CONFIG_RCU_FAST_NO_HZ) */ - /* * Is this CPU a NO_HZ_FULL CPU that should ignore RCU so that the * grace-period kthread will do force_quiescent_state() processing? -- cgit v1.2.3 From bc849e9192c75833a85f2e9376a265ab31f8eec7 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 27 Sep 2021 14:30:20 -0700 Subject: rcu: Move rcu_needs_cpu() to tree.c Now that RCU_FAST_NO_HZ is no more, there is but one implementation of the rcu_needs_cpu() function. This commit therefore moves this function from kernel/rcu/tree_plugin.c to kernel/rcu/tree.c. Signed-off-by: Paul E. McKenney --- kernel/rcu/tree.c | 18 ++++++++++++++++++ kernel/rcu/tree_plugin.h | 16 ---------------- 2 files changed, 18 insertions(+), 16 deletions(-) (limited to 'kernel/rcu/tree_plugin.h') diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 5e98257c2910..4ac019e9b25f 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -1075,6 +1075,24 @@ void rcu_irq_enter_irqson(void) local_irq_restore(flags); } +/* + * Check to see if any future non-offloaded RCU-related work will need + * to be done by the current CPU, even if none need be done immediately, + * returning 1 if so. This function is part of the RCU implementation; + * it is -not- an exported member of the RCU API. This is used by + * the idle-entry code to figure out whether it is safe to disable the + * scheduler-clock interrupt. + * + * Just check whether or not this CPU has non-offloaded RCU callbacks + * queued. + */ +int rcu_needs_cpu(u64 basemono, u64 *nextevt) +{ + *nextevt = KTIME_MAX; + return !rcu_segcblist_empty(&this_cpu_ptr(&rcu_data)->cblist) && + !rcu_rdp_is_offloaded(this_cpu_ptr(&rcu_data)); +} + /* * If any sort of urgency was applied to the current CPU (for example, * the scheduler-clock interrupt was enabled on a nohz_full CPU) in order diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index 19f7d578cedb..0575757a0f8f 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -1251,22 +1251,6 @@ static void __init rcu_spawn_boost_kthreads(void) #endif /* #else #ifdef CONFIG_RCU_BOOST */ -/* - * Check to see if any future non-offloaded RCU-related work will need - * to be done by the current CPU, even if none need be done immediately, - * returning 1 if so. This function is part of the RCU implementation; - * it is -not- an exported member of the RCU API. - * - * Just check whether or not this CPU has non-offloaded RCU callbacks - * queued. - */ -int rcu_needs_cpu(u64 basemono, u64 *nextevt) -{ - *nextevt = KTIME_MAX; - return !rcu_segcblist_empty(&this_cpu_ptr(&rcu_data)->cblist) && - !rcu_rdp_is_offloaded(this_cpu_ptr(&rcu_data)); -} - /* * Is this CPU a NO_HZ_FULL CPU that should ignore RCU so that the * grace-period kthread will do force_quiescent_state() processing? -- cgit v1.2.3 From 2407a64f8045552203ee5cb9904ce75ce2fceef4 Mon Sep 17 00:00:00 2001 From: Changbin Du Date: Tue, 28 Sep 2021 08:21:28 +0800 Subject: rcu: in_irq() cleanup This commit replaces the obsolete and ambiguous macro in_irq() with its shiny new in_hardirq() equivalent. Signed-off-by: Changbin Du Signed-off-by: Paul E. McKenney --- include/linux/rcutiny.h | 2 +- kernel/rcu/tree.c | 2 +- kernel/rcu/tree_plugin.h | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) (limited to 'kernel/rcu/tree_plugin.h') diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h index 9be015305f9f..858f4d429946 100644 --- a/include/linux/rcutiny.h +++ b/include/linux/rcutiny.h @@ -85,7 +85,7 @@ static inline void rcu_irq_enter_irqson(void) { } static inline void rcu_irq_exit(void) { } static inline void rcu_irq_exit_check_preempt(void) { } #define rcu_is_idle_cpu(cpu) \ - (is_idle_task(current) && !in_nmi() && !in_irq() && !in_serving_softirq()) + (is_idle_task(current) && !in_nmi() && !in_hardirq() && !in_serving_softirq()) static inline void exit_rcu(void) { } static inline bool rcu_preempt_need_deferred_qs(struct task_struct *t) { diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index ef8d36f580fc..f0f19dc7f19e 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -1467,7 +1467,7 @@ static void rcu_gp_kthread_wake(void) { struct task_struct *t = READ_ONCE(rcu_state.gp_kthread); - if ((current == t && !in_irq() && !in_serving_softirq()) || + if ((current == t && !in_hardirq() && !in_serving_softirq()) || !READ_ONCE(rcu_state.gp_flags) || !t) return; WRITE_ONCE(rcu_state.gp_wake_time, jiffies); diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index 5199559fbbf0..599084c4c21f 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -642,7 +642,7 @@ static void rcu_read_unlock_special(struct task_struct *t) (IS_ENABLED(CONFIG_RCU_BOOST) && irqs_were_disabled && t->rcu_blocked_node); // Need to defer quiescent state until everything is enabled. - if (use_softirq && (in_irq() || (expboost && !irqs_were_disabled))) { + if (use_softirq && (in_hardirq() || (expboost && !irqs_were_disabled))) { // Using softirq, safe to awaken, and either the // wakeup is free or there is either an expedited // GP in flight or a potential need to deboost. -- cgit v1.2.3 From 17ea3718824912e773b0fd78579694b2e75ee597 Mon Sep 17 00:00:00 2001 From: Zhouyi Zhou Date: Sun, 24 Oct 2021 08:36:34 +0800 Subject: rcu: Improve tree_plugin.h comments and add code cleanups This commit cleans up some comments and code in kernel/rcu/tree_plugin.h. Signed-off-by: Zhouyi Zhou Signed-off-by: Paul E. McKenney --- kernel/rcu/tree_plugin.h | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) (limited to 'kernel/rcu/tree_plugin.h') diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index 599084c4c21f..f44580276fbd 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -16,7 +16,7 @@ static bool rcu_rdp_is_offloaded(struct rcu_data *rdp) { /* - * In order to read the offloaded state of an rdp is a safe + * In order to read the offloaded state of an rdp in a safe * and stable way and prevent from its value to be changed * under us, we must either hold the barrier mutex, the cpu * hotplug lock (read or write) or the nocb lock. Local @@ -56,7 +56,7 @@ static void __init rcu_bootup_announce_oddness(void) if (IS_ENABLED(CONFIG_PROVE_RCU)) pr_info("\tRCU lockdep checking is enabled.\n"); if (IS_ENABLED(CONFIG_RCU_STRICT_GRACE_PERIOD)) - pr_info("\tRCU strict (and thus non-scalable) grace periods enabled.\n"); + pr_info("\tRCU strict (and thus non-scalable) grace periods are enabled.\n"); if (RCU_NUM_LVLS >= 4) pr_info("\tFour(or more)-level hierarchy is enabled.\n"); if (RCU_FANOUT_LEAF != 16) @@ -88,13 +88,13 @@ static void __init rcu_bootup_announce_oddness(void) if (rcu_kick_kthreads) pr_info("\tKick kthreads if too-long grace period.\n"); if (IS_ENABLED(CONFIG_DEBUG_OBJECTS_RCU_HEAD)) - pr_info("\tRCU callback double-/use-after-free debug enabled.\n"); + pr_info("\tRCU callback double-/use-after-free debug is enabled.\n"); if (gp_preinit_delay) pr_info("\tRCU debug GP pre-init slowdown %d jiffies.\n", gp_preinit_delay); if (gp_init_delay) pr_info("\tRCU debug GP init slowdown %d jiffies.\n", gp_init_delay); if (gp_cleanup_delay) - pr_info("\tRCU debug GP init slowdown %d jiffies.\n", gp_cleanup_delay); + pr_info("\tRCU debug GP cleanup slowdown %d jiffies.\n", gp_cleanup_delay); if (!use_softirq) pr_info("\tRCU_SOFTIRQ processing moved to rcuc kthreads.\n"); if (IS_ENABLED(CONFIG_RCU_EQS_DEBUG)) @@ -1153,7 +1153,6 @@ static void rcu_preempt_boost_start_gp(struct rcu_node *rnp) /* * Create an RCU-boost kthread for the specified node if one does not * already exist. We only create this kthread for preemptible RCU. - * Returns zero if all is well, a negated errno otherwise. */ static void rcu_spawn_one_boost_kthread(struct rcu_node *rnp) { @@ -1455,7 +1454,7 @@ static void rcu_cleanup_after_idle(void) * CPU unless the grace period has extended for too long. * * This code relies on the fact that all NO_HZ_FULL CPUs are also - * CONFIG_RCU_NOCB_CPU CPUs. + * RCU_NOCB_CPU CPUs. */ static bool rcu_nohz_full_cpu(void) { -- cgit v1.2.3 From c2cf0767e98eb4487444e5c7ebba491a866811ce Mon Sep 17 00:00:00 2001 From: Zqiang Date: Mon, 15 Nov 2021 13:15:46 +0800 Subject: rcu: Avoid running boost kthreads on isolated CPUs When the boost kthreads are created on systems with nohz_full CPUs, the cpus_allowed_ptr is set to housekeeping_cpumask(HK_FLAG_KTHREAD). However, when the rcu_boost_kthread_setaffinity() is called, the original affinity will be changed and these kthreads can subsequently run on nohz_full CPUs. This commit makes rcu_boost_kthread_setaffinity() restrict these boost kthreads to housekeeping CPUs. Signed-off-by: Zqiang Signed-off-by: Paul E. McKenney --- kernel/rcu/tree_plugin.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'kernel/rcu/tree_plugin.h') diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index f44580276fbd..87fc4609b756 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -1203,8 +1203,9 @@ static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu) if ((mask & leaf_node_cpu_bit(rnp, cpu)) && cpu != outgoingcpu) cpumask_set_cpu(cpu, cm); + cpumask_and(cm, cm, housekeeping_cpumask(HK_FLAG_RCU)); if (cpumask_weight(cm) == 0) - cpumask_setall(cm); + cpumask_copy(cm, housekeeping_cpumask(HK_FLAG_RCU)); set_cpus_allowed_ptr(t, cm); free_cpumask_var(cm); } -- cgit v1.2.3 From a4382659487f84c00b5fbb61df25a9ad59396789 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Thu, 16 Sep 2021 14:10:45 +0200 Subject: rcu: Ignore rdp.cpu_no_qs.b.exp on preemptible RCU's rcu_qs() Preemptible RCU does not use the rcu_data structure's ->cpu_no_qs.b.exp, instead using a separate ->exp_deferred_qs field to record the need for an expedited quiescent state. In fact ->cpu_no_qs.b.exp should never be set in preemptible RCU because preemptible RCU's expedited grace periods use other mechanisms to record quiescent states. This commit therefore removes the implicit rcu_qs() reference to ->cpu_no_qs.b.exp in favor of a direct reference to ->cpu_no_qs.b.norm. Signed-off-by: Frederic Weisbecker Signed-off-by: Paul E. McKenney --- kernel/rcu/tree_plugin.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'kernel/rcu/tree_plugin.h') diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index 5199559fbbf0..113c19b086ac 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -277,12 +277,16 @@ static void rcu_preempt_ctxt_queue(struct rcu_node *rnp, struct rcu_data *rdp) * current task, there might be any number of other tasks blocked while * in an RCU read-side critical section. * + * Unlike non-preemptible-RCU, quiescent state reports for expedited + * grace periods are handled separately via deferred quiescent states + * and context switch events. + * * Callers to this function must disable preemption. */ static void rcu_qs(void) { RCU_LOCKDEP_WARN(preemptible(), "rcu_qs() invoked with preemption enabled!!!\n"); - if (__this_cpu_read(rcu_data.cpu_no_qs.s)) { + if (__this_cpu_read(rcu_data.cpu_no_qs.b.norm)) { trace_rcu_grace_period(TPS("rcu_preempt"), __this_cpu_read(rcu_data.gp_seq), TPS("cpuqs")); -- cgit v1.2.3 From 6e16b0f7bae3817ea67f4bef4f84298e880fbf66 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Thu, 16 Sep 2021 14:10:47 +0200 Subject: rcu: Move rcu_data.cpu_no_qs.b.exp reset to rcu_export_exp_rdp() On non-preemptible RCU, move clearing of the rcu_data structure's ->cpu_no_qs.b.exp filed to the actual expedited quiescent state report function, matching hw preemptible RCU handles the ->exp_deferred_qs field. This prepares for removing ->exp_deferred_qs in favor of ->cpu_no_qs.b.exp for both preemptible and non-preemptible RCU. Signed-off-by: Frederic Weisbecker Signed-off-by: Paul E. McKenney --- kernel/rcu/tree_exp.h | 1 + kernel/rcu/tree_plugin.h | 6 ++---- 2 files changed, 3 insertions(+), 4 deletions(-) (limited to 'kernel/rcu/tree_plugin.h') diff --git a/kernel/rcu/tree_exp.h b/kernel/rcu/tree_exp.h index f3947c49eee7..6c6eb3220385 100644 --- a/kernel/rcu/tree_exp.h +++ b/kernel/rcu/tree_exp.h @@ -256,6 +256,7 @@ static void rcu_report_exp_cpu_mult(struct rcu_node *rnp, static void rcu_report_exp_rdp(struct rcu_data *rdp) { WRITE_ONCE(rdp->exp_deferred_qs, false); + WRITE_ONCE(rdp->cpu_no_qs.b.exp, false); rcu_report_exp_cpu_mult(rdp->mynode, rdp->grpmask, true); } diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index 113c19b086ac..6d58b75d2782 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -849,10 +849,8 @@ static void rcu_qs(void) trace_rcu_grace_period(TPS("rcu_sched"), __this_cpu_read(rcu_data.gp_seq), TPS("cpuqs")); __this_cpu_write(rcu_data.cpu_no_qs.b.norm, false); - if (!__this_cpu_read(rcu_data.cpu_no_qs.b.exp)) - return; - __this_cpu_write(rcu_data.cpu_no_qs.b.exp, false); - rcu_report_exp_rdp(this_cpu_ptr(&rcu_data)); + if (__this_cpu_read(rcu_data.cpu_no_qs.b.exp)) + rcu_report_exp_rdp(this_cpu_ptr(&rcu_data)); } /* -- cgit v1.2.3 From 6120b72e25e195b6fa15b0a674479a38166c392a Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Thu, 16 Sep 2021 14:10:48 +0200 Subject: rcu: Remove rcu_data.exp_deferred_qs and convert to rcu_data.cpu no_qs.b.exp Having two fields for the same purpose with subtle differences on different RCU flavours is confusing, especially when both fields always exist on both RCU flavours. Fortunately, it is now safe for preemptible RCU to rely on the rcu_data structure's ->cpu_no_qs.b.exp field, just like non-preemptible RCU. This commit therefore removes the ad-hoc ->exp_deferred_qs field. Signed-off-by: Frederic Weisbecker Signed-off-by: Paul E. McKenney --- kernel/rcu/tree.h | 1 - kernel/rcu/tree_exp.h | 5 ++--- kernel/rcu/tree_plugin.h | 12 ++++++------ 3 files changed, 8 insertions(+), 10 deletions(-) (limited to 'kernel/rcu/tree_plugin.h') diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h index 305cf6aeb408..ea46ed40f6bc 100644 --- a/kernel/rcu/tree.h +++ b/kernel/rcu/tree.h @@ -157,7 +157,6 @@ struct rcu_data { bool core_needs_qs; /* Core waits for quiescent state. */ bool beenonline; /* CPU online at least once. */ bool gpwrap; /* Possible ->gp_seq wrap. */ - bool exp_deferred_qs; /* This CPU awaiting a deferred QS? */ bool cpu_started; /* RCU watching this onlining CPU. */ struct rcu_node *mynode; /* This CPU's leaf of hierarchy */ unsigned long grpmask; /* Mask to apply to leaf qsmask. */ diff --git a/kernel/rcu/tree_exp.h b/kernel/rcu/tree_exp.h index 6c6eb3220385..fc2ee326a6f7 100644 --- a/kernel/rcu/tree_exp.h +++ b/kernel/rcu/tree_exp.h @@ -255,7 +255,6 @@ static void rcu_report_exp_cpu_mult(struct rcu_node *rnp, */ static void rcu_report_exp_rdp(struct rcu_data *rdp) { - WRITE_ONCE(rdp->exp_deferred_qs, false); WRITE_ONCE(rdp->cpu_no_qs.b.exp, false); rcu_report_exp_cpu_mult(rdp->mynode, rdp->grpmask, true); } @@ -656,7 +655,7 @@ static void rcu_exp_handler(void *unused) rcu_dynticks_curr_cpu_in_eqs()) { rcu_report_exp_rdp(rdp); } else { - rdp->exp_deferred_qs = true; + WRITE_ONCE(rdp->cpu_no_qs.b.exp, true); set_tsk_need_resched(t); set_preempt_need_resched(); } @@ -678,7 +677,7 @@ static void rcu_exp_handler(void *unused) if (depth > 0) { raw_spin_lock_irqsave_rcu_node(rnp, flags); if (rnp->expmask & rdp->grpmask) { - rdp->exp_deferred_qs = true; + WRITE_ONCE(rdp->cpu_no_qs.b.exp, true); t->rcu_read_unlock_special.b.exp_hint = true; } raw_spin_unlock_irqrestore_rcu_node(rnp, flags); diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index 6d58b75d2782..e1a9fb96e0b9 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -260,10 +260,10 @@ static void rcu_preempt_ctxt_queue(struct rcu_node *rnp, struct rcu_data *rdp) * no need to check for a subsequent expedited GP. (Though we are * still in a quiescent state in any case.) */ - if (blkd_state & RCU_EXP_BLKD && rdp->exp_deferred_qs) + if (blkd_state & RCU_EXP_BLKD && rdp->cpu_no_qs.b.exp) rcu_report_exp_rdp(rdp); else - WARN_ON_ONCE(rdp->exp_deferred_qs); + WARN_ON_ONCE(rdp->cpu_no_qs.b.exp); } /* @@ -354,7 +354,7 @@ void rcu_note_context_switch(bool preempt) * means that we continue to block the current grace period. */ rcu_qs(); - if (rdp->exp_deferred_qs) + if (rdp->cpu_no_qs.b.exp) rcu_report_exp_rdp(rdp); rcu_tasks_qs(current, preempt); trace_rcu_utilization(TPS("End context switch")); @@ -481,7 +481,7 @@ rcu_preempt_deferred_qs_irqrestore(struct task_struct *t, unsigned long flags) */ special = t->rcu_read_unlock_special; rdp = this_cpu_ptr(&rcu_data); - if (!special.s && !rdp->exp_deferred_qs) { + if (!special.s && !rdp->cpu_no_qs.b.exp) { local_irq_restore(flags); return; } @@ -501,7 +501,7 @@ rcu_preempt_deferred_qs_irqrestore(struct task_struct *t, unsigned long flags) * tasks are handled when removing the task from the * blocked-tasks list below. */ - if (rdp->exp_deferred_qs) + if (rdp->cpu_no_qs.b.exp) rcu_report_exp_rdp(rdp); /* Clean up if blocked during RCU read-side critical section. */ @@ -584,7 +584,7 @@ rcu_preempt_deferred_qs_irqrestore(struct task_struct *t, unsigned long flags) */ static bool rcu_preempt_need_deferred_qs(struct task_struct *t) { - return (__this_cpu_read(rcu_data.exp_deferred_qs) || + return (__this_cpu_read(rcu_data.cpu_no_qs.b.exp) || READ_ONCE(t->rcu_read_unlock_special.s)) && rcu_preempt_depth() == 0; } -- cgit v1.2.3 From 790da248978a0722d92d1471630c881704f7eb0d Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 29 Sep 2021 11:09:34 -0700 Subject: rcu: Make idle entry report expedited quiescent states In non-preemptible kernels, an unfortunately timed expedited grace period can result in the rcu_exp_handler() IPI handler setting the rcu_data structure's cpu_no_qs.b.exp field just as the target CPU enters idle. There are situations in which this field will not be checked until after that CPU exits idle. The resulting grace-period latency does not qualify as "expedited". This commit therefore checks this field upon non-preemptible idle entry in the rcu_preempt_deferred_qs() function. It also qualifies the rcu_core() preempt_count() check with IS_ENABLED(CONFIG_PREEMPT_COUNT) to prevent false-positive quiescent states from count-free kernels. Reported-by: Neeraj Upadhyay Signed-off-by: Paul E. McKenney --- kernel/rcu/tree.c | 2 +- kernel/rcu/tree_plugin.h | 13 ++++++++++++- 2 files changed, 13 insertions(+), 2 deletions(-) (limited to 'kernel/rcu/tree_plugin.h') diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index ef8d36f580fc..a8d1fe35f482 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -2714,7 +2714,7 @@ static __latent_entropy void rcu_core(void) WARN_ON_ONCE(!rdp->beenonline); /* Report any deferred quiescent states if preemption enabled. */ - if (!(preempt_count() & PREEMPT_MASK)) { + if (IS_ENABLED(CONFIG_PREEMPT_COUNT) && (!(preempt_count() & PREEMPT_MASK))) { rcu_preempt_deferred_qs(current); } else if (rcu_preempt_need_deferred_qs(current)) { set_tsk_need_resched(current); diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index e1a9fb96e0b9..8fb1612021a0 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -927,7 +927,18 @@ static bool rcu_preempt_need_deferred_qs(struct task_struct *t) { return false; } -static void rcu_preempt_deferred_qs(struct task_struct *t) { } + +// Except that we do need to respond to a request by an expedited grace +// period for a quiescent state from this CPU. Note that requests from +// tasks are handled when removing the task from the blocked-tasks list +// below. +static void rcu_preempt_deferred_qs(struct task_struct *t) +{ + struct rcu_data *rdp = this_cpu_ptr(&rcu_data); + + if (rdp->cpu_no_qs.b.exp) + rcu_report_exp_rdp(rdp); +} /* * Because there is no preemptible RCU, there can be no readers blocked, -- cgit v1.2.3