From 6a949b7af82db7eb1e52caaed122eab1cf63acee Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Sun, 28 Jul 2019 11:50:56 -0700 Subject: rcu: Force on tick when invoking lots of callbacks Callback invocation can run for a significant time period, and within CONFIG_NO_HZ_FULL=y kernels, this period will be devoid of scheduler-clock interrupts. In-kernel execution without such interrupts can cause all manner of malfunction, with RCU CPU stall warnings being but one result. This commit therefore forces scheduling-clock interrupts on whenever more than a few RCU callbacks are invoked. Because offloaded callback invocation can be preempted, this forcing is withdrawn on each context switch. This in turn requires that the loop invoking RCU callbacks reiterate the forcing periodically. [ paulmck: Apply Joel Fernandes TICK_DEP_MASK_RCU->TICK_DEP_BIT_RCU fix. ] [ paulmck: Remove NO_HZ_FULL check per Frederic Weisbecker feedback. ] Signed-off-by: Paul E. McKenney --- kernel/rcu/tree.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'kernel/rcu/tree.c') diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 81105141b6a8..238f93b4b0a4 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -2151,6 +2151,7 @@ static void rcu_do_batch(struct rcu_data *rdp) rcu_nocb_unlock_irqrestore(rdp, flags); /* Invoke callbacks. */ + tick_dep_set_task(current, TICK_DEP_BIT_RCU); rhp = rcu_cblist_dequeue(&rcl); for (; rhp; rhp = rcu_cblist_dequeue(&rcl)) { debug_rcu_head_unqueue(rhp); @@ -2217,6 +2218,7 @@ static void rcu_do_batch(struct rcu_data *rdp) /* Re-invoke RCU core processing if there are callbacks remaining. */ if (!offloaded && rcu_segcblist_ready_cbs(&rdp->cblist)) invoke_rcu_core(); + tick_dep_clear_task(current, TICK_DEP_BIT_RCU); } /* -- cgit v1.2.3 From 366237e7b0833faa2d8da7a8d7d7da8c3ca802e5 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 10 Jul 2019 08:01:01 -0700 Subject: stop_machine: Provide RCU quiescent state in multi_cpu_stop() When multi_cpu_stop() loops waiting for other tasks, it can trigger an RCU CPU stall warning. This can be misleading because what is instead needed is information on whatever task is blocking multi_cpu_stop(). This commit therefore inserts an RCU quiescent state into the multi_cpu_stop() function's waitloop. Signed-off-by: Paul E. McKenney --- include/linux/rcutree.h | 1 + kernel/rcu/tree.c | 2 +- kernel/stop_machine.c | 1 + 3 files changed, 3 insertions(+), 1 deletion(-) (limited to 'kernel/rcu/tree.c') diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h index 18b1ed9864b0..c5147de885ec 100644 --- a/include/linux/rcutree.h +++ b/include/linux/rcutree.h @@ -37,6 +37,7 @@ void kfree_call_rcu(struct rcu_head *head, rcu_callback_t func); void rcu_barrier(void); bool rcu_eqs_special_set(int cpu); +void rcu_momentary_dyntick_idle(void); unsigned long get_state_synchronize_rcu(void); void cond_synchronize_rcu(unsigned long oldstate); diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 238f93b4b0a4..a5c296d202ae 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -364,7 +364,7 @@ bool rcu_eqs_special_set(int cpu) * * The caller must have disabled interrupts and must not be idle. */ -static void __maybe_unused rcu_momentary_dyntick_idle(void) +void rcu_momentary_dyntick_idle(void) { int special; diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c index c7031a22aa7b..34c4f117d8c7 100644 --- a/kernel/stop_machine.c +++ b/kernel/stop_machine.c @@ -233,6 +233,7 @@ static int multi_cpu_stop(void *data) */ touch_nmi_watchdog(); } + rcu_momentary_dyntick_idle(); } while (curstate != MULTI_STOP_EXIT); local_irq_restore(flags); -- cgit v1.2.3 From 96926686deab853bcacf887501f4ed958e38666b Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 2 Aug 2019 15:12:47 -0700 Subject: rcu: Make CPU-hotplug removal operations enable tick CPU-hotplug removal operations run the multi_cpu_stop() function, which relies on the scheduler to gain control from whatever is running on the various online CPUs, including any nohz_full CPUs running long loops in kernel-mode code. Lack of the scheduler-clock interrupt on such CPUs can delay multi_cpu_stop() for several minutes and can also result in RCU CPU stall warnings. This commit therefore causes CPU-hotplug removal operations to enable the scheduler-clock interrupt on all online CPUs. [ paulmck: Apply Joel Fernandes TICK_DEP_MASK_RCU->TICK_DEP_BIT_RCU fix. ] [ paulmck: Apply simplifications suggested by Frederic Weisbecker. ] Signed-off-by: Paul E. McKenney --- kernel/rcu/tree.c | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'kernel/rcu/tree.c') diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index a5c296d202ae..7c67ea561b36 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -2101,6 +2101,9 @@ int rcutree_dead_cpu(unsigned int cpu) rcu_boost_kthread_setaffinity(rnp, -1); /* Do any needed no-CB deferred wakeups from this CPU. */ do_nocb_deferred_wakeup(per_cpu_ptr(&rcu_data, cpu)); + + // Stop-machine done, so allow nohz_full to disable tick. + tick_dep_clear(TICK_DEP_BIT_RCU); return 0; } @@ -3085,6 +3088,9 @@ int rcutree_online_cpu(unsigned int cpu) return 0; /* Too early in boot for scheduler work. */ sync_sched_exp_online_cleanup(cpu); rcutree_affinity_setting(cpu, -1); + + // Stop-machine done, so allow nohz_full to disable tick. + tick_dep_clear(TICK_DEP_BIT_RCU); return 0; } @@ -3105,6 +3111,9 @@ int rcutree_offline_cpu(unsigned int cpu) raw_spin_unlock_irqrestore_rcu_node(rnp, flags); rcutree_affinity_setting(cpu, cpu); + + // nohz_full CPUs need the tick for stop-machine to work quickly + tick_dep_set(TICK_DEP_BIT_RCU); return 0; } -- cgit v1.2.3 From 79ba7ff5a9925f5c170f51ed7a96d1475eb6c27f Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Sun, 4 Aug 2019 13:17:35 -0700 Subject: rcutorture: Emulate dyntick aspect of userspace nohz_full sojourn During an actual call_rcu() flood, there would be frequent trips to userspace (in-kernel call_rcu() floods must be otherwise housebroken). Userspace execution on nohz_full CPUs implies an RCU dyntick idle/not-idle transition pair, so this commit adds emulation of that pair. Signed-off-by: Paul E. McKenney --- include/linux/rcutiny.h | 1 + kernel/rcu/rcutorture.c | 11 +++++++++++ kernel/rcu/tree.c | 1 + 3 files changed, 13 insertions(+) (limited to 'kernel/rcu/tree.c') diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h index 9bf1dfe7781f..37b6f0c2b79d 100644 --- a/include/linux/rcutiny.h +++ b/include/linux/rcutiny.h @@ -84,6 +84,7 @@ static inline void rcu_scheduler_starting(void) { } #endif /* #else #ifndef CONFIG_SRCU */ static inline void rcu_end_inkernel_boot(void) { } static inline bool rcu_is_watching(void) { return true; } +static inline void rcu_momentary_dyntick_idle(void) { } /* Avoid RCU read-side critical sections leaking across. */ static inline void rcu_all_qs(void) { barrier(); } diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c index ab61f5c1353b..49ad88765ed2 100644 --- a/kernel/rcu/rcutorture.c +++ b/kernel/rcu/rcutorture.c @@ -1761,6 +1761,11 @@ static unsigned long rcu_torture_fwd_prog_cbfree(void) kfree(rfcp); freed++; rcu_torture_fwd_prog_cond_resched(freed); + if (tick_nohz_full_enabled()) { + local_irq_save(flags); + rcu_momentary_dyntick_idle(); + local_irq_restore(flags); + } } return freed; } @@ -1835,6 +1840,7 @@ static void rcu_torture_fwd_prog_nr(int *tested, int *tested_tries) static void rcu_torture_fwd_prog_cr(void) { unsigned long cver; + unsigned long flags; unsigned long gps; int i; long n_launders; @@ -1894,6 +1900,11 @@ static void rcu_torture_fwd_prog_cr(void) } cur_ops->call(&rfcp->rh, rcu_torture_fwd_cb_cr); rcu_torture_fwd_prog_cond_resched(n_launders + n_max_cbs); + if (tick_nohz_full_enabled()) { + local_irq_save(flags); + rcu_momentary_dyntick_idle(); + local_irq_restore(flags); + } } stoppedat = jiffies; n_launders_cb_snap = READ_ONCE(n_launders_cb); diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 7c67ea561b36..66354ef776aa 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -375,6 +375,7 @@ void rcu_momentary_dyntick_idle(void) WARN_ON_ONCE(!(special & RCU_DYNTICK_CTRL_CTR)); rcu_preempt_deferred_qs(current); } +EXPORT_SYMBOL_GPL(rcu_momentary_dyntick_idle); /** * rcu_is_cpu_rrupt_from_idle - see if interrupted from idle -- cgit v1.2.3 From 66e4c33b51bc515ca803c0948cf1525b53ffd631 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 12 Aug 2019 16:14:00 -0700 Subject: rcu: Force tick on for nohz_full CPUs not reaching quiescent states CPUs running for long time periods in the kernel in nohz_full mode might leave the scheduling-clock interrupt disabled for then full duration of their in-kernel execution. This can (among other things) delay grace periods. This commit therefore forces the tick back on for any nohz_full CPU that is failing to pass through a quiescent state upon return from interrupt, which the resched_cpu() will induce. Reported-by: Joel Fernandes [ paulmck: Clear ->rcu_forced_tick as reported by Joel Fernandes testing. ] [ paulmck: Apply Joel Fernandes TICK_DEP_MASK_RCU->TICK_DEP_BIT_RCU fix. ] Signed-off-by: Paul E. McKenney --- kernel/rcu/tree.c | 38 +++++++++++++++++++++++++++++++------- kernel/rcu/tree.h | 1 + 2 files changed, 32 insertions(+), 7 deletions(-) (limited to 'kernel/rcu/tree.c') diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 66354ef776aa..9fda33864ede 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -651,6 +651,12 @@ static __always_inline void rcu_nmi_exit_common(bool irq) */ if (rdp->dynticks_nmi_nesting != 1) { trace_rcu_dyntick(TPS("--="), rdp->dynticks_nmi_nesting, rdp->dynticks_nmi_nesting - 2, rdp->dynticks); + if (tick_nohz_full_cpu(rdp->cpu) && + rdp->dynticks_nmi_nesting == 2 && + rdp->rcu_urgent_qs && !rdp->rcu_forced_tick) { + rdp->rcu_forced_tick = true; + tick_dep_set_cpu(rdp->cpu, TICK_DEP_MASK_RCU); + } WRITE_ONCE(rdp->dynticks_nmi_nesting, /* No store tearing. */ rdp->dynticks_nmi_nesting - 2); return; @@ -886,6 +892,18 @@ void rcu_irq_enter_irqson(void) local_irq_restore(flags); } +/* + * If the scheduler-clock interrupt was enabled on a nohz_full CPU + * in order to get to a quiescent state, disable it. + */ +void rcu_disable_tick_upon_qs(struct rcu_data *rdp) +{ + if (tick_nohz_full_cpu(rdp->cpu) && rdp->rcu_forced_tick) { + tick_dep_clear_cpu(rdp->cpu, TICK_DEP_BIT_RCU); + rdp->rcu_forced_tick = false; + } +} + /** * rcu_is_watching - see if RCU thinks that the current CPU is not idle * @@ -1980,6 +1998,7 @@ rcu_report_qs_rdp(int cpu, struct rcu_data *rdp) if (!offloaded) needwake = rcu_accelerate_cbs(rnp, rdp); + rcu_disable_tick_upon_qs(rdp); rcu_report_qs_rnp(mask, rnp, rnp->gp_seq, flags); /* ^^^ Released rnp->lock */ if (needwake) @@ -2265,6 +2284,7 @@ static void force_qs_rnp(int (*f)(struct rcu_data *rdp)) int cpu; unsigned long flags; unsigned long mask; + struct rcu_data *rdp; struct rcu_node *rnp; rcu_for_each_leaf_node(rnp) { @@ -2289,8 +2309,11 @@ static void force_qs_rnp(int (*f)(struct rcu_data *rdp)) for_each_leaf_node_possible_cpu(rnp, cpu) { unsigned long bit = leaf_node_cpu_bit(rnp, cpu); if ((rnp->qsmask & bit) != 0) { - if (f(per_cpu_ptr(&rcu_data, cpu))) + rdp = per_cpu_ptr(&rcu_data, cpu); + if (f(rdp)) { mask |= bit; + rcu_disable_tick_upon_qs(rdp); + } } } if (mask != 0) { @@ -2318,7 +2341,7 @@ void rcu_force_quiescent_state(void) rnp = __this_cpu_read(rcu_data.mynode); for (; rnp != NULL; rnp = rnp->parent) { ret = (READ_ONCE(rcu_state.gp_flags) & RCU_GP_FLAG_FQS) || - !raw_spin_trylock(&rnp->fqslock); + !raw_spin_trylock(&rnp->fqslock); if (rnp_old != NULL) raw_spin_unlock(&rnp_old->fqslock); if (ret) @@ -2851,7 +2874,7 @@ static void rcu_barrier_callback(struct rcu_head *rhp) { if (atomic_dec_and_test(&rcu_state.barrier_cpu_count)) { rcu_barrier_trace(TPS("LastCB"), -1, - rcu_state.barrier_sequence); + rcu_state.barrier_sequence); complete(&rcu_state.barrier_completion); } else { rcu_barrier_trace(TPS("CB"), -1, rcu_state.barrier_sequence); @@ -2875,7 +2898,7 @@ static void rcu_barrier_func(void *unused) } else { debug_rcu_head_unqueue(&rdp->barrier_head); rcu_barrier_trace(TPS("IRQNQ"), -1, - rcu_state.barrier_sequence); + rcu_state.barrier_sequence); } rcu_nocb_unlock(rdp); } @@ -2902,7 +2925,7 @@ void rcu_barrier(void) /* Did someone else do our work for us? */ if (rcu_seq_done(&rcu_state.barrier_sequence, s)) { rcu_barrier_trace(TPS("EarlyExit"), -1, - rcu_state.barrier_sequence); + rcu_state.barrier_sequence); smp_mb(); /* caller's subsequent code after above check. */ mutex_unlock(&rcu_state.barrier_mutex); return; @@ -2934,11 +2957,11 @@ void rcu_barrier(void) continue; if (rcu_segcblist_n_cbs(&rdp->cblist)) { rcu_barrier_trace(TPS("OnlineQ"), cpu, - rcu_state.barrier_sequence); + rcu_state.barrier_sequence); smp_call_function_single(cpu, rcu_barrier_func, NULL, 1); } else { rcu_barrier_trace(TPS("OnlineNQ"), cpu, - rcu_state.barrier_sequence); + rcu_state.barrier_sequence); } } put_online_cpus(); @@ -3160,6 +3183,7 @@ void rcu_cpu_starting(unsigned int cpu) rdp->rcu_onl_gp_seq = READ_ONCE(rcu_state.gp_seq); rdp->rcu_onl_gp_flags = READ_ONCE(rcu_state.gp_flags); if (rnp->qsmask & mask) { /* RCU waiting on incoming CPU? */ + rcu_disable_tick_upon_qs(rdp); /* Report QS -after- changing ->qsmaskinitnext! */ rcu_report_qs_rnp(mask, rnp, rnp->gp_seq, flags); } else { diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h index c612f306fe89..055c31781d3a 100644 --- a/kernel/rcu/tree.h +++ b/kernel/rcu/tree.h @@ -181,6 +181,7 @@ struct rcu_data { atomic_t dynticks; /* Even value for idle, else odd. */ bool rcu_need_heavy_qs; /* GP old, so heavy quiescent state! */ bool rcu_urgent_qs; /* GP old need light quiescent state. */ + bool rcu_forced_tick; /* Forced tick to provide QS. */ #ifdef CONFIG_RCU_FAST_NO_HZ bool all_lazy; /* All CPU's CBs lazy at idle start? */ unsigned long last_accelerate; /* Last jiffy CBs were accelerated. */ -- cgit v1.2.3 From b200a0489517d9e5a52e983183e890f573454ebd Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 15 Aug 2019 13:24:49 -0700 Subject: rcu: Force nohz_full tick on upon irq enter instead of exit There is interrupt-exit code that forces on the tick for nohz_full CPUs failing to respond to the current grace period in a timely fashion. However, this code must compare ->dynticks_nmi_nesting to the value 2 in the interrupt-exit fastpath. This commit therefore moves this code to the interrupt-entry fastpath, where a lighter-weight comparison to zero may be used. Reported-by: Joel Fernandes [ paulmck: Apply Joel Fernandes TICK_DEP_MASK_RCU->TICK_DEP_BIT_RCU fix. ] Signed-off-by: Paul E. McKenney --- kernel/rcu/tree.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) (limited to 'kernel/rcu/tree.c') diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 9fda33864ede..8dc878406c71 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -651,12 +651,6 @@ static __always_inline void rcu_nmi_exit_common(bool irq) */ if (rdp->dynticks_nmi_nesting != 1) { trace_rcu_dyntick(TPS("--="), rdp->dynticks_nmi_nesting, rdp->dynticks_nmi_nesting - 2, rdp->dynticks); - if (tick_nohz_full_cpu(rdp->cpu) && - rdp->dynticks_nmi_nesting == 2 && - rdp->rcu_urgent_qs && !rdp->rcu_forced_tick) { - rdp->rcu_forced_tick = true; - tick_dep_set_cpu(rdp->cpu, TICK_DEP_MASK_RCU); - } WRITE_ONCE(rdp->dynticks_nmi_nesting, /* No store tearing. */ rdp->dynticks_nmi_nesting - 2); return; @@ -831,6 +825,11 @@ static __always_inline void rcu_nmi_enter_common(bool irq) rcu_cleanup_after_idle(); incby = 1; + } else if (tick_nohz_full_cpu(rdp->cpu) && + rdp->dynticks_nmi_nesting == DYNTICK_IRQ_NONIDLE && + rdp->rcu_urgent_qs && !rdp->rcu_forced_tick) { + rdp->rcu_forced_tick = true; + tick_dep_set_cpu(rdp->cpu, TICK_DEP_BIT_RCU); } trace_rcu_dyntick(incby == 1 ? TPS("Endirq") : TPS("++="), rdp->dynticks_nmi_nesting, -- cgit v1.2.3 From 516e5ae0c94016294d3ef175454215b235d03945 Mon Sep 17 00:00:00 2001 From: "Joel Fernandes (Google)" Date: Thu, 5 Sep 2019 10:26:41 -0700 Subject: rcu: Reset CPU hints when reporting a quiescent state In some cases, tracing shows that need_heavy_qs is still set even though urgent_qs was cleared upon reporting of a quiescent state. One such case is when the softirq reports that a CPU has passed quiescent state. Commit 671a63517cf9 ("rcu: Avoid unnecessary softirq when system is idle") fixed a bug where core_needs_qs was not being cleared. In order to avoid running into similar situations with the urgent-grace-period flags, this commit causes rcu_disable_urgency_upon_qs(), previously rcu_disable_tick_upon_qs(), to clear the urgency hints, ->rcu_urgent_qs and ->rcu_need_heavy_qs. Note that it is possible for CPUs to go offline with these urgency hints still set. This is handled because rcu_disable_urgency_upon_qs() is also invoked during the online process. Because these hints can be cleared both by the corresponding CPU and by the grace-period kthread, this commit also adds a number of READ_ONCE() and WRITE_ONCE() calls. Tested overnight with rcutorture running for 60 minutes on all configurations of RCU. Signed-off-by: "Joel Fernandes (Google)" [ paulmck: Clear urgency flags in rcu_disable_urgency_upon_qs(). ] [ paulmck: Remove ->core_needs_qs from the set cleared at quiescent state. ] [ paulmck: Make rcu_disable_urgency_upon_qs static per kbuild test robot. ] Signed-off-by: Paul E. McKenney --- kernel/rcu/tree.c | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) (limited to 'kernel/rcu/tree.c') diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 8dc878406c71..82caca305cae 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -827,7 +827,7 @@ static __always_inline void rcu_nmi_enter_common(bool irq) incby = 1; } else if (tick_nohz_full_cpu(rdp->cpu) && rdp->dynticks_nmi_nesting == DYNTICK_IRQ_NONIDLE && - rdp->rcu_urgent_qs && !rdp->rcu_forced_tick) { + READ_ONCE(rdp->rcu_urgent_qs) && !rdp->rcu_forced_tick) { rdp->rcu_forced_tick = true; tick_dep_set_cpu(rdp->cpu, TICK_DEP_BIT_RCU); } @@ -892,11 +892,14 @@ void rcu_irq_enter_irqson(void) } /* - * If the scheduler-clock interrupt was enabled on a nohz_full CPU - * in order to get to a quiescent state, disable it. + * If any sort of urgency was applied to the current CPU (for example, + * the scheduler-clock interrupt was enabled on a nohz_full CPU) in order + * to get to a quiescent state, disable it. */ -void rcu_disable_tick_upon_qs(struct rcu_data *rdp) +static void rcu_disable_urgency_upon_qs(struct rcu_data *rdp) { + WRITE_ONCE(rdp->rcu_urgent_qs, false); + WRITE_ONCE(rdp->rcu_need_heavy_qs, false); if (tick_nohz_full_cpu(rdp->cpu) && rdp->rcu_forced_tick) { tick_dep_clear_cpu(rdp->cpu, TICK_DEP_BIT_RCU); rdp->rcu_forced_tick = false; @@ -1997,7 +2000,7 @@ rcu_report_qs_rdp(int cpu, struct rcu_data *rdp) if (!offloaded) needwake = rcu_accelerate_cbs(rnp, rdp); - rcu_disable_tick_upon_qs(rdp); + rcu_disable_urgency_upon_qs(rdp); rcu_report_qs_rnp(mask, rnp, rnp->gp_seq, flags); /* ^^^ Released rnp->lock */ if (needwake) @@ -2311,7 +2314,7 @@ static void force_qs_rnp(int (*f)(struct rcu_data *rdp)) rdp = per_cpu_ptr(&rcu_data, cpu); if (f(rdp)) { mask |= bit; - rcu_disable_tick_upon_qs(rdp); + rcu_disable_urgency_upon_qs(rdp); } } } @@ -3182,7 +3185,7 @@ void rcu_cpu_starting(unsigned int cpu) rdp->rcu_onl_gp_seq = READ_ONCE(rcu_state.gp_seq); rdp->rcu_onl_gp_flags = READ_ONCE(rcu_state.gp_flags); if (rnp->qsmask & mask) { /* RCU waiting on incoming CPU? */ - rcu_disable_tick_upon_qs(rdp); + rcu_disable_urgency_upon_qs(rdp); /* Report QS -after- changing ->qsmaskinitnext! */ rcu_report_qs_rnp(mask, rnp, rnp->gp_seq, flags); } else { -- cgit v1.2.3 From ed93dfc6bc0084485ccad1ff6bd2ea81ab2c03cd Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 13 Sep 2019 14:09:56 -0700 Subject: rcu: Confine ->core_needs_qs accesses to the corresponding CPU Commit 671a63517cf9 ("rcu: Avoid unnecessary softirq when system is idle") fixed a bug that could result in an indefinite number of unnecessary invocations of the RCU_SOFTIRQ handler at the trailing edge of a scheduler-clock interrupt. However, the fix introduced off-CPU stores to ->core_needs_qs. These writes did not conflict with the on-CPU stores because the CPU's leaf rcu_node structure's ->lock was held across all such stores. However, the loads from ->core_needs_qs were not promoted to READ_ONCE() and, worse yet, the code loading from ->core_needs_qs was written assuming that it was only ever updated by the corresponding CPU. So operation has been robust, but only by luck. This situation is therefore an accident waiting to happen. This commit therefore takes a different approach. Instead of clearing ->core_needs_qs from the grace-period kthread's force-quiescent-state processing, it modifies the rcu_pending() function to suppress the rcu_sched_clock_irq() function's call to invoke_rcu_core() if there is no grace period in progress. This avoids the infinite needless RCU_SOFTIRQ handlers while still keeping all accesses to ->core_needs_qs local to the corresponding CPU. Signed-off-by: Paul E. McKenney --- kernel/rcu/tree.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'kernel/rcu/tree.c') diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 82caca305cae..0c8046bc5ec7 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -1989,7 +1989,6 @@ rcu_report_qs_rdp(int cpu, struct rcu_data *rdp) return; } mask = rdp->grpmask; - rdp->core_needs_qs = false; if ((rnp->qsmask & mask) == 0) { raw_spin_unlock_irqrestore_rcu_node(rnp, flags); } else { @@ -2819,6 +2818,7 @@ EXPORT_SYMBOL_GPL(cond_synchronize_rcu); */ static int rcu_pending(void) { + bool gp_in_progress; struct rcu_data *rdp = this_cpu_ptr(&rcu_data); struct rcu_node *rnp = rdp->mynode; @@ -2834,7 +2834,8 @@ static int rcu_pending(void) return 0; /* Is the RCU core waiting for a quiescent state from this CPU? */ - if (rdp->core_needs_qs && !rdp->cpu_no_qs.b.norm) + gp_in_progress = rcu_gp_in_progress(); + if (rdp->core_needs_qs && !rdp->cpu_no_qs.b.norm && gp_in_progress) return 1; /* Does this CPU have callbacks ready to invoke? */ @@ -2842,8 +2843,7 @@ static int rcu_pending(void) return 1; /* Has RCU gone idle with this CPU needing another grace period? */ - if (!rcu_gp_in_progress() && - rcu_segcblist_is_enabled(&rdp->cblist) && + if (!gp_in_progress && rcu_segcblist_is_enabled(&rdp->cblist) && (!IS_ENABLED(CONFIG_RCU_NOCB_CPU) || !rcu_segcblist_is_offloaded(&rdp->cblist)) && !rcu_segcblist_restempty(&rdp->cblist, RCU_NEXT_READY_TAIL)) -- cgit v1.2.3 From dd7dafd1ad50aa9ed7958235431f243ea131ee7d Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Sat, 14 Sep 2019 03:39:22 -0700 Subject: rcu: Make kernel-mode nohz_full CPUs invoke the RCU core processing If a nohz_full CPU is idle or executing in userspace, it makes good sense to keep it out of RCU core processing. After all, the RCU grace-period kthread can see its quiescent states and all of its callbacks are offloaded, so there is nothing for RCU core processing to do. However, if a nohz_full CPU is executing in kernel space, the RCU grace-period kthread cannot do anything for it, so such a CPU must report its own quiescent states. This commit therefore makes nohz_full CPUs skip RCU core processing only if the scheduler-clock interrupt caught them in idle or in userspace. Signed-off-by: Paul E. McKenney --- kernel/rcu/tree.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'kernel/rcu/tree.c') diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 0c8046bc5ec7..4e6ae2699d2e 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -497,7 +497,7 @@ module_param_cb(jiffies_till_next_fqs, &next_fqs_jiffies_ops, &jiffies_till_next module_param(rcu_kick_kthreads, bool, 0644); static void force_qs_rnp(int (*f)(struct rcu_data *rdp)); -static int rcu_pending(void); +static int rcu_pending(int user); /* * Return the number of RCU GPs completed thus far for debug & stats. @@ -2267,7 +2267,7 @@ void rcu_sched_clock_irq(int user) __this_cpu_write(rcu_data.rcu_urgent_qs, false); } rcu_flavor_sched_clock_irq(user); - if (rcu_pending()) + if (rcu_pending(user)) invoke_rcu_core(); trace_rcu_utilization(TPS("End scheduler-tick")); @@ -2816,7 +2816,7 @@ EXPORT_SYMBOL_GPL(cond_synchronize_rcu); * CPU-local state are performed first. However, we must check for CPU * stalls first, else we might not get a chance. */ -static int rcu_pending(void) +static int rcu_pending(int user) { bool gp_in_progress; struct rcu_data *rdp = this_cpu_ptr(&rcu_data); @@ -2829,8 +2829,8 @@ static int rcu_pending(void) if (rcu_nocb_need_deferred_wakeup(rdp)) return 1; - /* Is this CPU a NO_HZ_FULL CPU that should ignore RCU? */ - if (rcu_nohz_full_cpu()) + /* Is this a nohz_full CPU in userspace or idle? (Ignore RCU if so.) */ + if ((user || rcu_is_cpu_rrupt_from_idle()) && rcu_nohz_full_cpu()) return 0; /* Is the RCU core waiting for a quiescent state from this CPU? */ -- cgit v1.2.3 From 05ef9e9eb3dade21413680f41eb0170778e8ae2b Mon Sep 17 00:00:00 2001 From: "Joel Fernandes (Google)" Date: Thu, 15 Aug 2019 22:59:14 -0400 Subject: rcu: Ensure that ->rcu_urgent_qs is set before resched IPI The RCU-specific resched_cpu() function sends a resched IPI to the specified CPU, which can be used to force the tick on for a given nohz_full CPU. This is needed when this nohz_full CPU is looping in the kernel while blocking the current grace period. However, for the tick to actually be forced on in all cases, that CPU's rcu_data structure's ->rcu_urgent_qs flag must be set beforehand. This commit therefore causes rcu_implicit_dynticks_qs() to set this flag prior to invoking resched_cpu() on a holdout nohz_full CPU. Signed-off-by: Joel Fernandes (Google) Signed-off-by: Paul E. McKenney --- kernel/rcu/tree.c | 1 + 1 file changed, 1 insertion(+) (limited to 'kernel/rcu/tree.c') diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 81105141b6a8..0d83b1944e19 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -1073,6 +1073,7 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp) if (tick_nohz_full_cpu(rdp->cpu) && time_after(jiffies, READ_ONCE(rdp->last_fqs_resched) + jtsq * 3)) { + WRITE_ONCE(*ruqp, true); resched_cpu(rdp->cpu); WRITE_ONCE(rdp->last_fqs_resched, jiffies); } -- cgit v1.2.3