diff options
Diffstat (limited to 'kernel/rcu')
-rw-r--r-- | kernel/rcu/rcutorture.c | 213 | ||||
-rw-r--r-- | kernel/rcu/tiny.c | 8 | ||||
-rw-r--r-- | kernel/rcu/tree.c | 46 | ||||
-rw-r--r-- | kernel/rcu/tree.h | 16 | ||||
-rw-r--r-- | kernel/rcu/tree_plugin.h | 316 | ||||
-rw-r--r-- | kernel/rcu/update.c | 32 |
6 files changed, 440 insertions, 191 deletions
diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c index 948a7693748e..0bcd53adac73 100644 --- a/kernel/rcu/rcutorture.c +++ b/kernel/rcu/rcutorture.c @@ -49,11 +49,19 @@ #include <linux/trace_clock.h> #include <asm/byteorder.h> #include <linux/torture.h> +#include <linux/vmalloc.h> MODULE_LICENSE("GPL"); MODULE_AUTHOR("Paul E. McKenney <paulmck@us.ibm.com> and Josh Triplett <josh@joshtriplett.org>"); +torture_param(int, cbflood_inter_holdoff, HZ, + "Holdoff between floods (jiffies)"); +torture_param(int, cbflood_intra_holdoff, 1, + "Holdoff between bursts (jiffies)"); +torture_param(int, cbflood_n_burst, 3, "# bursts in flood, zero to disable"); +torture_param(int, cbflood_n_per_burst, 20000, + "# callbacks per burst in flood"); torture_param(int, fqs_duration, 0, "Duration of fqs bursts (us), 0 to disable"); torture_param(int, fqs_holdoff, 0, "Holdoff time within fqs bursts (us)"); @@ -96,10 +104,12 @@ module_param(torture_type, charp, 0444); MODULE_PARM_DESC(torture_type, "Type of RCU to torture (rcu, rcu_bh, ...)"); static int nrealreaders; +static int ncbflooders; static struct task_struct *writer_task; static struct task_struct **fakewriter_tasks; static struct task_struct **reader_tasks; static struct task_struct *stats_task; +static struct task_struct **cbflood_task; static struct task_struct *fqs_task; static struct task_struct *boost_tasks[NR_CPUS]; static struct task_struct *stall_task; @@ -138,6 +148,7 @@ static long n_rcu_torture_boosts; static long n_rcu_torture_timers; static long n_barrier_attempts; static long n_barrier_successes; +static atomic_long_t n_cbfloods; static struct list_head rcu_torture_removed; static int rcu_torture_writer_state; @@ -182,7 +193,7 @@ static u64 notrace rcu_trace_clock_local(void) #endif /* #else #ifdef CONFIG_RCU_TRACE */ static unsigned long boost_starttime; /* jiffies of next boost test start. */ -DEFINE_MUTEX(boost_mutex); /* protect setting boost_starttime */ +static DEFINE_MUTEX(boost_mutex); /* protect setting boost_starttime */ /* and boost task create/destroy. */ static atomic_t barrier_cbs_count; /* Barrier callbacks registered. */ static bool barrier_phase; /* Test phase. */ @@ -242,7 +253,7 @@ struct rcu_torture_ops { void (*call)(struct rcu_head *head, void (*func)(struct rcu_head *rcu)); void (*cb_barrier)(void); void (*fqs)(void); - void (*stats)(char *page); + void (*stats)(void); int irq_capable; int can_boost; const char *name; @@ -525,21 +536,21 @@ static void srcu_torture_barrier(void) srcu_barrier(&srcu_ctl); } -static void srcu_torture_stats(char *page) +static void srcu_torture_stats(void) { int cpu; int idx = srcu_ctl.completed & 0x1; - page += sprintf(page, "%s%s per-CPU(idx=%d):", - torture_type, TORTURE_FLAG, idx); + pr_alert("%s%s per-CPU(idx=%d):", + torture_type, TORTURE_FLAG, idx); for_each_possible_cpu(cpu) { long c0, c1; c0 = (long)per_cpu_ptr(srcu_ctl.per_cpu_ref, cpu)->c[!idx]; c1 = (long)per_cpu_ptr(srcu_ctl.per_cpu_ref, cpu)->c[idx]; - page += sprintf(page, " %d(%ld,%ld)", cpu, c0, c1); + pr_cont(" %d(%ld,%ld)", cpu, c0, c1); } - sprintf(page, "\n"); + pr_cont("\n"); } static void srcu_torture_synchronize_expedited(void) @@ -707,6 +718,58 @@ checkwait: stutter_wait("rcu_torture_boost"); return 0; } +static void rcu_torture_cbflood_cb(struct rcu_head *rhp) +{ +} + +/* + * RCU torture callback-flood kthread. Repeatedly induces bursts of calls + * to call_rcu() or analogous, increasing the probability of occurrence + * of callback-overflow corner cases. + */ +static int +rcu_torture_cbflood(void *arg) +{ + int err = 1; + int i; + int j; + struct rcu_head *rhp; + + if (cbflood_n_per_burst > 0 && + cbflood_inter_holdoff > 0 && + cbflood_intra_holdoff > 0 && + cur_ops->call && + cur_ops->cb_barrier) { + rhp = vmalloc(sizeof(*rhp) * + cbflood_n_burst * cbflood_n_per_burst); + err = !rhp; + } + if (err) { + VERBOSE_TOROUT_STRING("rcu_torture_cbflood disabled: Bad args or OOM"); + while (!torture_must_stop()) + schedule_timeout_interruptible(HZ); + return 0; + } + VERBOSE_TOROUT_STRING("rcu_torture_cbflood task started"); + do { + schedule_timeout_interruptible(cbflood_inter_holdoff); + atomic_long_inc(&n_cbfloods); + WARN_ON(signal_pending(current)); + for (i = 0; i < cbflood_n_burst; i++) { + for (j = 0; j < cbflood_n_per_burst; j++) { + cur_ops->call(&rhp[i * cbflood_n_per_burst + j], + rcu_torture_cbflood_cb); + } + schedule_timeout_interruptible(cbflood_intra_holdoff); + WARN_ON(signal_pending(current)); + } + cur_ops->cb_barrier(); + stutter_wait("rcu_torture_cbflood"); + } while (!torture_must_stop()); + torture_kthread_stopping("rcu_torture_cbflood"); + return 0; +} + /* * RCU torture force-quiescent-state kthread. Repeatedly induces * bursts of calls to force_quiescent_state(), increasing the probability @@ -1031,10 +1094,15 @@ rcu_torture_reader(void *arg) } /* - * Create an RCU-torture statistics message in the specified buffer. + * Print torture statistics. Caller must ensure that there is only + * one call to this function at a given time!!! This is normally + * accomplished by relying on the module system to only have one copy + * of the module loaded, and then by giving the rcu_torture_stats + * kthread full control (or the init/cleanup functions when rcu_torture_stats + * thread is not running). */ static void -rcu_torture_printk(char *page) +rcu_torture_stats_print(void) { int cpu; int i; @@ -1052,55 +1120,61 @@ rcu_torture_printk(char *page) if (pipesummary[i] != 0) break; } - page += sprintf(page, "%s%s ", torture_type, TORTURE_FLAG); - page += sprintf(page, - "rtc: %p ver: %lu tfle: %d rta: %d rtaf: %d rtf: %d ", - rcu_torture_current, - rcu_torture_current_version, - list_empty(&rcu_torture_freelist), - atomic_read(&n_rcu_torture_alloc), - atomic_read(&n_rcu_torture_alloc_fail), - atomic_read(&n_rcu_torture_free)); - page += sprintf(page, "rtmbe: %d rtbke: %ld rtbre: %ld ", - atomic_read(&n_rcu_torture_mberror), - n_rcu_torture_boost_ktrerror, - n_rcu_torture_boost_rterror); - page += sprintf(page, "rtbf: %ld rtb: %ld nt: %ld ", - n_rcu_torture_boost_failure, - n_rcu_torture_boosts, - n_rcu_torture_timers); - page = torture_onoff_stats(page); - page += sprintf(page, "barrier: %ld/%ld:%ld", - n_barrier_successes, - n_barrier_attempts, - n_rcu_torture_barrier_error); - page += sprintf(page, "\n%s%s ", torture_type, TORTURE_FLAG); + + pr_alert("%s%s ", torture_type, TORTURE_FLAG); + pr_cont("rtc: %p ver: %lu tfle: %d rta: %d rtaf: %d rtf: %d ", + rcu_torture_current, + rcu_torture_current_version, + list_empty(&rcu_torture_freelist), + atomic_read(&n_rcu_torture_alloc), + atomic_read(&n_rcu_torture_alloc_fail), + atomic_read(&n_rcu_torture_free)); + pr_cont("rtmbe: %d rtbke: %ld rtbre: %ld ", + atomic_read(&n_rcu_torture_mberror), + n_rcu_torture_boost_ktrerror, + n_rcu_torture_boost_rterror); + pr_cont("rtbf: %ld rtb: %ld nt: %ld ", + n_rcu_torture_boost_failure, + n_rcu_torture_boosts, + n_rcu_torture_timers); + torture_onoff_stats(); + pr_cont("barrier: %ld/%ld:%ld ", + n_barrier_successes, + n_barrier_attempts, + n_rcu_torture_barrier_error); + pr_cont("cbflood: %ld\n", atomic_long_read(&n_cbfloods)); + + pr_alert("%s%s ", torture_type, TORTURE_FLAG); if (atomic_read(&n_rcu_torture_mberror) != 0 || n_rcu_torture_barrier_error != 0 || n_rcu_torture_boost_ktrerror != 0 || n_rcu_torture_boost_rterror != 0 || n_rcu_torture_boost_failure != 0 || i > 1) { - page += sprintf(page, "!!! "); + pr_cont("%s", "!!! "); atomic_inc(&n_rcu_torture_error); WARN_ON_ONCE(1); } - page += sprintf(page, "Reader Pipe: "); + pr_cont("Reader Pipe: "); for (i = 0; i < RCU_TORTURE_PIPE_LEN + 1; i++) - page += sprintf(page, " %ld", pipesummary[i]); - page += sprintf(page, "\n%s%s ", torture_type, TORTURE_FLAG); - page += sprintf(page, "Reader Batch: "); + pr_cont(" %ld", pipesummary[i]); + pr_cont("\n"); + + pr_alert("%s%s ", torture_type, TORTURE_FLAG); + pr_cont("Reader Batch: "); for (i = 0; i < RCU_TORTURE_PIPE_LEN + 1; i++) - page += sprintf(page, " %ld", batchsummary[i]); - page += sprintf(page, "\n%s%s ", torture_type, TORTURE_FLAG); - page += sprintf(page, "Free-Block Circulation: "); + pr_cont(" %ld", batchsummary[i]); + pr_cont("\n"); + + pr_alert("%s%s ", torture_type, TORTURE_FLAG); + pr_cont("Free-Block Circulation: "); for (i = 0; i < RCU_TORTURE_PIPE_LEN + 1; i++) { - page += sprintf(page, " %d", - atomic_read(&rcu_torture_wcount[i])); + pr_cont(" %d", atomic_read(&rcu_torture_wcount[i])); } - page += sprintf(page, "\n"); + pr_cont("\n"); + if (cur_ops->stats) - cur_ops->stats(page); + cur_ops->stats(); if (rtcv_snap == rcu_torture_current_version && rcu_torture_current != NULL) { int __maybe_unused flags; @@ -1109,10 +1183,9 @@ rcu_torture_printk(char *page) rcutorture_get_gp_data(cur_ops->ttype, &flags, &gpnum, &completed); - page += sprintf(page, - "??? Writer stall state %d g%lu c%lu f%#x\n", - rcu_torture_writer_state, - gpnum, completed, flags); + pr_alert("??? Writer stall state %d g%lu c%lu f%#x\n", + rcu_torture_writer_state, + gpnum, completed, flags); show_rcu_gp_kthreads(); rcutorture_trace_dump(); } @@ -1120,30 +1193,6 @@ rcu_torture_printk(char *page) } /* - * Print torture statistics. Caller must ensure that there is only - * one call to this function at a given time!!! This is normally - * accomplished by relying on the module system to only have one copy - * of the module loaded, and then by giving the rcu_torture_stats - * kthread full control (or the init/cleanup functions when rcu_torture_stats - * thread is not running). - */ -static void -rcu_torture_stats_print(void) -{ - int size = nr_cpu_ids * 200 + 8192; - char *buf; - - buf = kmalloc(size, GFP_KERNEL); - if (!buf) { - pr_err("rcu-torture: Out of memory, need: %d", size); - return; - } - rcu_torture_printk(buf); - pr_alert("%s", buf); - kfree(buf); -} - -/* * Periodically prints torture statistics, if periodic statistics printing * was specified via the stat_interval module parameter. */ @@ -1447,6 +1496,8 @@ rcu_torture_cleanup(void) torture_stop_kthread(rcu_torture_stats, stats_task); torture_stop_kthread(rcu_torture_fqs, fqs_task); + for (i = 0; i < ncbflooders; i++) + torture_stop_kthread(rcu_torture_cbflood, cbflood_task[i]); if ((test_boost == 1 && cur_ops->can_boost) || test_boost == 2) { unregister_cpu_notifier(&rcutorture_cpu_nb); @@ -1693,6 +1744,24 @@ rcu_torture_init(void) goto unwind; if (object_debug) rcu_test_debug_objects(); + if (cbflood_n_burst > 0) { + /* Create the cbflood threads */ + ncbflooders = (num_online_cpus() + 3) / 4; + cbflood_task = kcalloc(ncbflooders, sizeof(*cbflood_task), + GFP_KERNEL); + if (!cbflood_task) { + VERBOSE_TOROUT_ERRSTRING("out of memory"); + firsterr = -ENOMEM; + goto unwind; + } + for (i = 0; i < ncbflooders; i++) { + firsterr = torture_create_kthread(rcu_torture_cbflood, + NULL, + cbflood_task[i]); + if (firsterr) + goto unwind; + } + } rcutorture_record_test_transition(); torture_init_end(); return 0; diff --git a/kernel/rcu/tiny.c b/kernel/rcu/tiny.c index d9efcc13008c..4a55a2416e3c 100644 --- a/kernel/rcu/tiny.c +++ b/kernel/rcu/tiny.c @@ -51,7 +51,7 @@ static long long rcu_dynticks_nesting = DYNTICK_TASK_EXIT_IDLE; #include "tiny_plugin.h" -/* Common code for rcu_idle_enter() and rcu_irq_exit(), see kernel/rcutree.c. */ +/* Common code for rcu_idle_enter() and rcu_irq_exit(), see kernel/rcu/tree.c. */ static void rcu_idle_enter_common(long long newval) { if (newval) { @@ -62,7 +62,7 @@ static void rcu_idle_enter_common(long long newval) } RCU_TRACE(trace_rcu_dyntick(TPS("Start"), rcu_dynticks_nesting, newval)); - if (!is_idle_task(current)) { + if (IS_ENABLED(CONFIG_RCU_TRACE) && !is_idle_task(current)) { struct task_struct *idle __maybe_unused = idle_task(smp_processor_id()); RCU_TRACE(trace_rcu_dyntick(TPS("Entry error: not idle task"), @@ -114,7 +114,7 @@ void rcu_irq_exit(void) } EXPORT_SYMBOL_GPL(rcu_irq_exit); -/* Common code for rcu_idle_exit() and rcu_irq_enter(), see kernel/rcutree.c. */ +/* Common code for rcu_idle_exit() and rcu_irq_enter(), see kernel/rcu/tree.c. */ static void rcu_idle_exit_common(long long oldval) { if (oldval) { @@ -123,7 +123,7 @@ static void rcu_idle_exit_common(long long oldval) return; } RCU_TRACE(trace_rcu_dyntick(TPS("End"), oldval, rcu_dynticks_nesting)); - if (!is_idle_task(current)) { + if (IS_ENABLED(CONFIG_RCU_TRACE) && !is_idle_task(current)) { struct task_struct *idle __maybe_unused = idle_task(smp_processor_id()); RCU_TRACE(trace_rcu_dyntick(TPS("Exit error: not idle task"), diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 1b70cb6fbe3c..be0d0a1b7129 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -79,9 +79,18 @@ static struct lock_class_key rcu_fqs_class[RCU_NUM_LVLS]; * the tracing userspace tools to be able to decipher the string * address to the matching string. */ -#define RCU_STATE_INITIALIZER(sname, sabbr, cr) \ +#ifdef CONFIG_TRACING +# define DEFINE_RCU_TPS(sname) \ static char sname##_varname[] = #sname; \ -static const char *tp_##sname##_varname __used __tracepoint_string = sname##_varname; \ +static const char *tp_##sname##_varname __used __tracepoint_string = sname##_varname; +# define RCU_STATE_NAME(sname) sname##_varname +#else +# define DEFINE_RCU_TPS(sname) +# define RCU_STATE_NAME(sname) __stringify(sname) +#endif + +#define RCU_STATE_INITIALIZER(sname, sabbr, cr) \ +DEFINE_RCU_TPS(sname) \ struct rcu_state sname##_state = { \ .level = { &sname##_state.node[0] }, \ .call = cr, \ @@ -93,7 +102,7 @@ struct rcu_state sname##_state = { \ .orphan_donetail = &sname##_state.orphan_donelist, \ .barrier_mutex = __MUTEX_INITIALIZER(sname##_state.barrier_mutex), \ .onoff_mutex = __MUTEX_INITIALIZER(sname##_state.onoff_mutex), \ - .name = sname##_varname, \ + .name = RCU_STATE_NAME(sname), \ .abbr = sabbr, \ }; \ DEFINE_PER_CPU(struct rcu_data, sname##_data) @@ -819,7 +828,7 @@ bool notrace __rcu_is_watching(void) */ bool notrace rcu_is_watching(void) { - int ret; + bool ret; preempt_disable(); ret = __rcu_is_watching(); @@ -1668,7 +1677,7 @@ static int rcu_gp_fqs(struct rcu_state *rsp, int fqs_state_in) if (fqs_state == RCU_SAVE_DYNTICK) { /* Collect dyntick-idle snapshots. */ if (is_sysidle_rcu_state(rsp)) { - isidle = 1; + isidle = true; maxj = jiffies - ULONG_MAX / 4; } force_qs_rnp(rsp, dyntick_save_progress_counter, @@ -1677,14 +1686,15 @@ static int rcu_gp_fqs(struct rcu_state *rsp, int fqs_state_in) fqs_state = RCU_FORCE_QS; } else { /* Handle dyntick-idle and offline CPUs. */ - isidle = 0; + isidle = false; force_qs_rnp(rsp, rcu_implicit_dynticks_qs, &isidle, &maxj); } /* Clear flag to prevent immediate re-entry. */ if (ACCESS_ONCE(rsp->gp_flags) & RCU_GP_FLAG_FQS) { raw_spin_lock_irq(&rnp->lock); smp_mb__after_unlock_lock(); - ACCESS_ONCE(rsp->gp_flags) &= ~RCU_GP_FLAG_FQS; + ACCESS_ONCE(rsp->gp_flags) = + ACCESS_ONCE(rsp->gp_flags) & ~RCU_GP_FLAG_FQS; raw_spin_unlock_irq(&rnp->lock); } return fqs_state; @@ -1786,7 +1796,7 @@ static int __noreturn rcu_gp_kthread(void *arg) if (rcu_gp_init(rsp)) break; cond_resched(); - flush_signals(current); + WARN_ON(signal_pending(current)); trace_rcu_grace_period(rsp->name, ACCESS_ONCE(rsp->gpnum), TPS("reqwaitsig")); @@ -1832,7 +1842,7 @@ static int __noreturn rcu_gp_kthread(void *arg) } else { /* Deal with stray signal. */ cond_resched(); - flush_signals(current); + WARN_ON(signal_pending(current)); trace_rcu_grace_period(rsp->name, ACCESS_ONCE(rsp->gpnum), TPS("fqswaitsig")); @@ -1928,7 +1938,7 @@ static void rcu_report_qs_rsp(struct rcu_state *rsp, unsigned long flags) { WARN_ON_ONCE(!rcu_gp_in_progress(rsp)); raw_spin_unlock_irqrestore(&rcu_get_root(rsp)->lock, flags); - wake_up(&rsp->gp_wq); /* Memory barrier implied by wake_up() path. */ + rcu_gp_kthread_wake(rsp); } /* @@ -2210,8 +2220,6 @@ static void rcu_cleanup_dead_cpu(int cpu, struct rcu_state *rsp) /* Adjust any no-longer-needed kthreads. */ rcu_boost_kthread_setaffinity(rnp, -1); - /* Remove the dead CPU from the bitmasks in the rcu_node hierarchy. */ - /* Exclude any attempts to start a new grace period. */ mutex_lock(&rsp->onoff_mutex); raw_spin_lock_irqsave(&rsp->orphan_lock, flags); @@ -2449,7 +2457,7 @@ static void force_qs_rnp(struct rcu_state *rsp, for (; cpu <= rnp->grphi; cpu++, bit <<= 1) { if ((rnp->qsmask & bit) != 0) { if ((rnp->qsmaskinit & bit) != 0) - *isidle = 0; + *isidle = false; if (f(per_cpu_ptr(rsp->rda, cpu), isidle, maxj)) mask |= bit; } @@ -2505,9 +2513,10 @@ static void force_quiescent_state(struct rcu_state *rsp) raw_spin_unlock_irqrestore(&rnp_old->lock, flags); return; /* Someone beat us to it. */ } - ACCESS_ONCE(rsp->gp_flags) |= RCU_GP_FLAG_FQS; + ACCESS_ONCE(rsp->gp_flags) = + ACCESS_ONCE(rsp->gp_flags) | RCU_GP_FLAG_FQS; raw_spin_unlock_irqrestore(&rnp_old->lock, flags); - wake_up(&rsp->gp_wq); /* Memory barrier implied by wake_up() path. */ + rcu_gp_kthread_wake(rsp); } /* @@ -3442,6 +3451,7 @@ static int rcu_cpu_notify(struct notifier_block *self, case CPU_UP_PREPARE_FROZEN: rcu_prepare_cpu(cpu); rcu_prepare_kthreads(cpu); + rcu_spawn_all_nocb_kthreads(cpu); break; case CPU_ONLINE: case CPU_DOWN_FAILED: @@ -3489,7 +3499,7 @@ static int rcu_pm_notify(struct notifier_block *self, } /* - * Spawn the kthread that handles this RCU flavor's grace periods. + * Spawn the kthreads that handle each RCU flavor's grace periods. */ static int __init rcu_spawn_gp_kthread(void) { @@ -3498,6 +3508,7 @@ static int __init rcu_spawn_gp_kthread(void) struct rcu_state *rsp; struct task_struct *t; + rcu_scheduler_fully_active = 1; for_each_rcu_flavor(rsp) { t = kthread_run(rcu_gp_kthread, rsp, "%s", rsp->name); BUG_ON(IS_ERR(t)); @@ -3505,8 +3516,9 @@ static int __init rcu_spawn_gp_kthread(void) raw_spin_lock_irqsave(&rnp->lock, flags); rsp->gp_kthread = t; raw_spin_unlock_irqrestore(&rnp->lock, flags); - rcu_spawn_nocb_kthreads(rsp); } + rcu_spawn_nocb_kthreads(); + rcu_spawn_boost_kthreads(); return 0; } early_initcall(rcu_spawn_gp_kthread); diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h index 6a86eb7bac45..ffedcb9d42dc 100644 --- a/kernel/rcu/tree.h +++ b/kernel/rcu/tree.h @@ -350,7 +350,7 @@ struct rcu_data { int nocb_p_count_lazy; /* (approximate). */ wait_queue_head_t nocb_wq; /* For nocb kthreads to sleep on. */ struct task_struct *nocb_kthread; - bool nocb_defer_wakeup; /* Defer wakeup of nocb_kthread. */ + int nocb_defer_wakeup; /* Defer wakeup of nocb_kthread. */ /* The following fields are used by the leader, hence own cacheline. */ struct rcu_head *nocb_gp_head ____cacheline_internodealigned_in_smp; @@ -383,6 +383,11 @@ struct rcu_data { #define RCU_FORCE_QS 3 /* Need to force quiescent state. */ #define RCU_SIGNAL_INIT RCU_SAVE_DYNTICK +/* Values for nocb_defer_wakeup field in struct rcu_data. */ +#define RCU_NOGP_WAKE_NOT 0 +#define RCU_NOGP_WAKE 1 +#define RCU_NOGP_WAKE_FORCE 2 + #define RCU_JIFFIES_TILL_FORCE_QS (1 + (HZ > 250) + (HZ > 500)) /* For jiffies_till_first_fqs and */ /* and jiffies_till_next_fqs. */ @@ -572,6 +577,7 @@ static void rcu_preempt_do_callbacks(void); static int rcu_spawn_one_boost_kthread(struct rcu_state *rsp, struct rcu_node *rnp); #endif /* #ifdef CONFIG_RCU_BOOST */ +static void __init rcu_spawn_boost_kthreads(void); static void rcu_prepare_kthreads(int cpu); static void rcu_cleanup_after_idle(int cpu); static void rcu_prepare_for_idle(int cpu); @@ -589,10 +595,14 @@ static bool __call_rcu_nocb(struct rcu_data *rdp, struct rcu_head *rhp, static bool rcu_nocb_adopt_orphan_cbs(struct rcu_state *rsp, struct rcu_data *rdp, unsigned long flags); -static bool rcu_nocb_need_deferred_wakeup(struct rcu_data *rdp); +static int rcu_nocb_need_deferred_wakeup(struct rcu_data *rdp); static void do_nocb_deferred_wakeup(struct rcu_data *rdp); static void rcu_boot_init_nocb_percpu_data(struct rcu_data *rdp); -static void rcu_spawn_nocb_kthreads(struct rcu_state *rsp); +static void rcu_spawn_all_nocb_kthreads(int cpu); +static void __init rcu_spawn_nocb_kthreads(void); +#ifdef CONFIG_RCU_NOCB_CPU +static void __init rcu_organize_nocb_kthreads(struct rcu_state *rsp); +#endif /* #ifdef CONFIG_RCU_NOCB_CPU */ static void __maybe_unused rcu_kick_nohz_cpu(int cpu); static bool init_nocb_callback_list(struct rcu_data *rdp); static void rcu_sysidle_enter(struct rcu_dynticks *rdtp, int irq); diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index a7997e272564..59318ea32bc8 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -85,33 +85,6 @@ static void __init rcu_bootup_announce_oddness(void) pr_info("\tBoot-time adjustment of leaf fanout to %d.\n", rcu_fanout_leaf); if (nr_cpu_ids != NR_CPUS) pr_info("\tRCU restricting CPUs from NR_CPUS=%d to nr_cpu_ids=%d.\n", NR_CPUS, nr_cpu_ids); -#ifdef CONFIG_RCU_NOCB_CPU -#ifndef CONFIG_RCU_NOCB_CPU_NONE - if (!have_rcu_nocb_mask) { - zalloc_cpumask_var(&rcu_nocb_mask, GFP_KERNEL); - have_rcu_nocb_mask = true; - } -#ifdef CONFIG_RCU_NOCB_CPU_ZERO - pr_info("\tOffload RCU callbacks from CPU 0\n"); - cpumask_set_cpu(0, rcu_nocb_mask); -#endif /* #ifdef CONFIG_RCU_NOCB_CPU_ZERO */ -#ifdef CONFIG_RCU_NOCB_CPU_ALL - pr_info("\tOffload RCU callbacks from all CPUs\n"); - cpumask_copy(rcu_nocb_mask, cpu_possible_mask); -#endif /* #ifdef CONFIG_RCU_NOCB_CPU_ALL */ -#endif /* #ifndef CONFIG_RCU_NOCB_CPU_NONE */ - if (have_rcu_nocb_mask) { - if (!cpumask_subset(rcu_nocb_mask, cpu_possible_mask)) { - pr_info("\tNote: kernel parameter 'rcu_nocbs=' contains nonexistent CPUs.\n"); - cpumask_and(rcu_nocb_mask, cpu_possible_mask, - rcu_nocb_mask); - } - cpulist_scnprintf(nocb_buf, sizeof(nocb_buf), rcu_nocb_mask); - pr_info("\tOffload RCU callbacks from CPUs: %s.\n", nocb_buf); - if (rcu_nocb_poll) - pr_info("\tPoll for callbacks from no-CBs CPUs.\n"); - } -#endif /* #ifdef CONFIG_RCU_NOCB_CPU */ } #ifdef CONFIG_TREE_PREEMPT_RCU @@ -134,7 +107,7 @@ static void __init rcu_bootup_announce(void) * Return the number of RCU-preempt batches processed thus far * for debug and statistics. */ -long rcu_batches_completed_preempt(void) +static long rcu_batches_completed_preempt(void) { return rcu_preempt_state.completed; } @@ -897,7 +870,8 @@ void synchronize_rcu_expedited(void) /* Clean up and exit. */ smp_mb(); /* ensure expedited GP seen before counter increment. */ - ACCESS_ONCE(sync_rcu_preempt_exp_count)++; + ACCESS_ONCE(sync_rcu_preempt_exp_count) = + sync_rcu_preempt_exp_count + 1; unlock_mb_ret: mutex_unlock(&sync_rcu_preempt_exp_mutex); mb_ret: @@ -1462,14 +1436,13 @@ static struct smp_hotplug_thread rcu_cpu_thread_spec = { }; /* - * Spawn all kthreads -- called as soon as the scheduler is running. + * Spawn boost kthreads -- called as soon as the scheduler is running. */ -static int __init rcu_spawn_kthreads(void) +static void __init rcu_spawn_boost_kthreads(void) { struct rcu_node *rnp; int cpu; - rcu_scheduler_fully_active = 1; for_each_possible_cpu(cpu) per_cpu(rcu_cpu_has_work, cpu) = 0; BUG_ON(smpboot_register_percpu_thread(&rcu_cpu_thread_spec)); @@ -1479,9 +1452,7 @@ static int __init rcu_spawn_kthreads(void) rcu_for_each_leaf_node(rcu_state_p, rnp) (void)rcu_spawn_one_boost_kthread(rcu_state_p, rnp); } - return 0; } -early_initcall(rcu_spawn_kthreads); static void rcu_prepare_kthreads(int cpu) { @@ -1519,12 +1490,9 @@ static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu) { } -static int __init rcu_scheduler_really_started(void) +static void __init rcu_spawn_boost_kthreads(void) { - rcu_scheduler_fully_active = 1; - return 0; } -early_initcall(rcu_scheduler_really_started); static void rcu_prepare_kthreads(int cpu) { @@ -1625,7 +1593,7 @@ static bool __maybe_unused rcu_try_advance_all_cbs(void) /* Exit early if we advanced recently. */ if (jiffies == rdtp->last_advance_all) - return 0; + return false; rdtp->last_advance_all = jiffies; for_each_rcu_flavor(rsp) { @@ -2075,7 +2043,7 @@ static void wake_nocb_leader(struct rcu_data *rdp, bool force) if (!ACCESS_ONCE(rdp_leader->nocb_kthread)) return; if (ACCESS_ONCE(rdp_leader->nocb_leader_sleep) || force) { - /* Prior xchg orders against prior callback enqueue. */ + /* Prior smp_mb__after_atomic() orders against prior enqueue. */ ACCESS_ONCE(rdp_leader->nocb_leader_sleep) = false; wake_up(&rdp_leader->nocb_wq); } @@ -2104,6 +2072,7 @@ static void __call_rcu_nocb_enqueue(struct rcu_data *rdp, ACCESS_ONCE(*old_rhpp) = rhp; atomic_long_add(rhcount, &rdp->nocb_q_count); atomic_long_add(rhcount_lazy, &rdp->nocb_q_count_lazy); + smp_mb__after_atomic(); /* Store *old_rhpp before _wake test. */ /* If we are not being polled and there is a kthread, awaken it ... */ t = ACCESS_ONCE(rdp->nocb_kthread); @@ -2120,16 +2089,23 @@ static void __call_rcu_nocb_enqueue(struct rcu_data *rdp, trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, TPS("WakeEmpty")); } else { - rdp->nocb_defer_wakeup = true; + rdp->nocb_defer_wakeup = RCU_NOGP_WAKE; trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, TPS("WakeEmptyIsDeferred")); } rdp->qlen_last_fqs_check = 0; } else if (len > rdp->qlen_last_fqs_check + qhimark) { /* ... or if many callbacks queued. */ - wake_nocb_leader(rdp, true); + if (!irqs_disabled_flags(flags)) { + wake_nocb_leader(rdp, true); + trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, + TPS("WakeOvf")); + } else { + rdp->nocb_defer_wakeup = RCU_NOGP_WAKE_FORCE; + trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, + TPS("WakeOvfIsDeferred")); + } rdp->qlen_last_fqs_check = LONG_MAX / 2; - trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, TPS("WakeOvf")); } else { trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, TPS("WakeNot")); } @@ -2150,7 +2126,7 @@ static bool __call_rcu_nocb(struct rcu_data *rdp, struct rcu_head *rhp, { if (!rcu_is_nocb_cpu(rdp->cpu)) - return 0; + return false; __call_rcu_nocb_enqueue(rdp, rhp, &rhp->next, 1, lazy, flags); if (__is_kfree_rcu_offset((unsigned long)rhp->func)) trace_rcu_kfree_callback(rdp->rsp->name, rhp, @@ -2161,7 +2137,18 @@ static bool __call_rcu_nocb(struct rcu_data *rdp, struct rcu_head *rhp, trace_rcu_callback(rdp->rsp->name, rhp, -atomic_long_read(&rdp->nocb_q_count_lazy), -atomic_long_read(&rdp->nocb_q_count)); - return 1; + + /* + * If called from an extended quiescent state with interrupts + * disabled, invoke the RCU core in order to allow the idle-entry + * deferred-wakeup check to function. + */ + if (irqs_disabled_flags(flags) && + !rcu_is_watching() && + cpu_online(smp_processor_id())) + invoke_rcu_core(); + + return true; } /* @@ -2177,7 +2164,7 @@ static bool __maybe_unused rcu_nocb_adopt_orphan_cbs(struct rcu_state *rsp, /* If this is not a no-CBs CPU, tell the caller to do it the old way. */ if (!rcu_is_nocb_cpu(smp_processor_id())) - return 0; + return false; rsp->qlen = 0; rsp->qlen_lazy = 0; @@ -2196,7 +2183,7 @@ static bool __maybe_unused rcu_nocb_adopt_orphan_cbs(struct rcu_state *rsp, rsp->orphan_nxtlist = NULL; rsp->orphan_nxttail = &rsp->orphan_nxtlist; } - return 1; + return true; } /* @@ -2229,7 +2216,7 @@ static void rcu_nocb_wait_gp(struct rcu_data *rdp) (d = ULONG_CMP_GE(ACCESS_ONCE(rnp->completed), c))); if (likely(d)) break; - flush_signals(current); + WARN_ON(signal_pending(current)); trace_rcu_future_gp(rnp, rdp, c, TPS("ResumeWait")); } trace_rcu_future_gp(rnp, rdp, c, TPS("EndWait")); @@ -2288,7 +2275,7 @@ wait_again: if (!rcu_nocb_poll) trace_rcu_nocb_wake(my_rdp->rsp->name, my_rdp->cpu, "WokeEmpty"); - flush_signals(current); + WARN_ON(signal_pending(current)); schedule_timeout_interruptible(1); /* Rescan in case we were a victim of memory ordering. */ @@ -2327,6 +2314,7 @@ wait_again: atomic_long_add(rdp->nocb_gp_count, &rdp->nocb_follower_count); atomic_long_add(rdp->nocb_gp_count_lazy, &rdp->nocb_follower_count_lazy); + smp_mb__after_atomic(); /* Store *tail before wakeup. */ if (rdp != my_rdp && tail == &rdp->nocb_follower_head) { /* * List was empty, wake up the follower. @@ -2367,7 +2355,7 @@ static void nocb_follower_wait(struct rcu_data *rdp) if (!rcu_nocb_poll) trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, "WokeEmpty"); - flush_signals(current); + WARN_ON(signal_pending(current)); schedule_timeout_interruptible(1); } } @@ -2428,15 +2416,16 @@ static int rcu_nocb_kthread(void *arg) list = next; } trace_rcu_batch_end(rdp->rsp->name, c, !!list, 0, 0, 1); - ACCESS_ONCE(rdp->nocb_p_count) -= c; - ACCESS_ONCE(rdp->nocb_p_count_lazy) -= cl; + ACCESS_ONCE(rdp->nocb_p_count) = rdp->nocb_p_count - c; + ACCESS_ONCE(rdp->nocb_p_count_lazy) = + rdp->nocb_p_count_lazy - cl; rdp->n_nocbs_invoked += c; } return 0; } /* Is a deferred wakeup of rcu_nocb_kthread() required? */ -static bool rcu_nocb_need_deferred_wakeup(struct rcu_data *rdp) +static int rcu_nocb_need_deferred_wakeup(struct rcu_data *rdp) { return ACCESS_ONCE(rdp->nocb_defer_wakeup); } @@ -2444,11 +2433,79 @@ static bool rcu_nocb_need_deferred_wakeup(struct rcu_data *rdp) /* Do a deferred wakeup of rcu_nocb_kthread(). */ static void do_nocb_deferred_wakeup(struct rcu_data *rdp) { + int ndw; + if (!rcu_nocb_need_deferred_wakeup(rdp)) return; - ACCESS_ONCE(rdp->nocb_defer_wakeup) = false; - wake_nocb_leader(rdp, false); - trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, TPS("DeferredWakeEmpty")); + ndw = ACCESS_ONCE(rdp->nocb_defer_wakeup); + ACCESS_ONCE(rdp->nocb_defer_wakeup) = RCU_NOGP_WAKE_NOT; + wake_nocb_leader(rdp, ndw == RCU_NOGP_WAKE_FORCE); + trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, TPS("DeferredWake")); +} + +void __init rcu_init_nohz(void) +{ + int cpu; + bool need_rcu_nocb_mask = true; + struct rcu_state *rsp; + +#ifdef CONFIG_RCU_NOCB_CPU_NONE + need_rcu_nocb_mask = false; +#endif /* #ifndef CONFIG_RCU_NOCB_CPU_NONE */ + +#if defined(CONFIG_NO_HZ_FULL) + if (tick_nohz_full_running && cpumask_weight(tick_nohz_full_mask)) + need_rcu_nocb_mask = true; +#endif /* #if defined(CONFIG_NO_HZ_FULL) */ + + if (!have_rcu_nocb_mask && need_rcu_nocb_mask) { + if (!zalloc_cpumask_var(&rcu_nocb_mask, GFP_KERNEL)) { + pr_info("rcu_nocb_mask allocation failed, callback offloading disabled.\n"); + return; + } + have_rcu_nocb_mask = true; + } + if (!have_rcu_nocb_mask) + return; + +#ifdef CONFIG_RCU_NOCB_CPU_ZERO + pr_info("\tOffload RCU callbacks from CPU 0\n"); + cpumask_set_cpu(0, rcu_nocb_mask); +#endif /* #ifdef CONFIG_RCU_NOCB_CPU_ZERO */ +#ifdef CONFIG_RCU_NOCB_CPU_ALL + pr_info("\tOffload RCU callbacks from all CPUs\n"); + cpumask_copy(rcu_nocb_mask, cpu_possible_mask); +#endif /* #ifdef CONFIG_RCU_NOCB_CPU_ALL */ +#if defined(CONFIG_NO_HZ_FULL) + if (tick_nohz_full_running) + cpumask_or(rcu_nocb_mask, rcu_nocb_mask, tick_nohz_full_mask); +#endif /* #if defined(CONFIG_NO_HZ_FULL) */ + + if (!cpumask_subset(rcu_nocb_mask, cpu_possible_mask)) { + pr_info("\tNote: kernel parameter 'rcu_nocbs=' contains nonexistent CPUs.\n"); + cpumask_and(rcu_nocb_mask, cpu_possible_mask, + rcu_nocb_mask); + } + cpulist_scnprintf(nocb_buf, sizeof(nocb_buf), rcu_nocb_mask); + pr_info("\tOffload RCU callbacks from CPUs: %s.\n", nocb_buf); + if (rcu_nocb_poll) + pr_info("\tPoll for callbacks from no-CBs CPUs.\n"); + + for_each_rcu_flavor(rsp) { + for_each_cpu(cpu, rcu_nocb_mask) { + struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu); + + /* + * If there are early callbacks, they will need + * to be moved to the nocb lists. + */ + WARN_ON_ONCE(rdp->nxttail[RCU_NEXT_TAIL] != + &rdp->nxtlist && + rdp->nxttail[RCU_NEXT_TAIL] != NULL); + init_nocb_callback_list(rdp); + } + rcu_organize_nocb_kthreads(rsp); + } } /* Initialize per-rcu_data variables for no-CBs CPUs. */ @@ -2459,15 +2516,85 @@ static void __init rcu_boot_init_nocb_percpu_data(struct rcu_data *rdp) rdp->nocb_follower_tail = &rdp->nocb_follower_head; } +/* + * If the specified CPU is a no-CBs CPU that does not already have its + * rcuo kthread for the specified RCU flavor, spawn it. If the CPUs are + * brought online out of order, this can require re-organizing the + * leader-follower relationships. + */ +static void rcu_spawn_one_nocb_kthread(struct rcu_state *rsp, int cpu) +{ + struct rcu_data *rdp; + struct rcu_data *rdp_last; + struct rcu_data *rdp_old_leader; + struct rcu_data *rdp_spawn = per_cpu_ptr(rsp->rda, cpu); + struct task_struct *t; + + /* + * If this isn't a no-CBs CPU or if it already has an rcuo kthread, + * then nothing to do. + */ + if (!rcu_is_nocb_cpu(cpu) || rdp_spawn->nocb_kthread) + return; + + /* If we didn't spawn the leader first, reorganize! */ + rdp_old_leader = rdp_spawn->nocb_leader; + if (rdp_old_leader != rdp_spawn && !rdp_old_leader->nocb_kthread) { + rdp_last = NULL; + rdp = rdp_old_leader; + do { + rdp->nocb_leader = rdp_spawn; + if (rdp_last && rdp != rdp_spawn) + rdp_last->nocb_next_follower = rdp; + rdp_last = rdp; + rdp = rdp->nocb_next_follower; + rdp_last->nocb_next_follower = NULL; + } while (rdp); + rdp_spawn->nocb_next_follower = rdp_old_leader; + } + + /* Spawn the kthread for this CPU and RCU flavor. */ + t = kthread_run(rcu_nocb_kthread, rdp_spawn, + "rcuo%c/%d", rsp->abbr, cpu); + BUG_ON(IS_ERR(t)); + ACCESS_ONCE(rdp_spawn->nocb_kthread) = t; +} + +/* + * If the specified CPU is a no-CBs CPU that does not already have its + * rcuo kthreads, spawn them. + */ +static void rcu_spawn_all_nocb_kthreads(int cpu) +{ + struct rcu_state *rsp; + + if (rcu_scheduler_fully_active) + for_each_rcu_flavor(rsp) + rcu_spawn_one_nocb_kthread(rsp, cpu); +} + +/* + * Once the scheduler is running, spawn rcuo kthreads for all online + * no-CBs CPUs. This assumes that the early_initcall()s happen before + * non-boot CPUs come online -- if this changes, we will need to add + * some mutual exclusion. + */ +static void __init rcu_spawn_nocb_kthreads(void) +{ + int cpu; + + for_each_online_cpu(cpu) + rcu_spawn_all_nocb_kthreads(cpu); +} + /* How many follower CPU IDs per leader? Default of -1 for sqrt(nr_cpu_ids). */ static int rcu_nocb_leader_stride = -1; module_param(rcu_nocb_leader_stride, int, 0444); /* - * Create a kthread for each RCU flavor for each no-CBs CPU. - * Also initialize leader-follower relationships. + * Initialize leader-follower relationships for all no-CBs CPU. */ -static void __init rcu_spawn_nocb_kthreads(struct rcu_state *rsp) +static void __init rcu_organize_nocb_kthreads(struct rcu_state *rsp) { int cpu; int ls = rcu_nocb_leader_stride; @@ -2475,14 +2602,9 @@ static void __init rcu_spawn_nocb_kthreads(struct rcu_state *rsp) struct rcu_data *rdp; struct rcu_data *rdp_leader = NULL; /* Suppress misguided gcc warn. */ struct rcu_data *rdp_prev = NULL; - struct task_struct *t; - if (rcu_nocb_mask == NULL) + if (!have_rcu_nocb_mask) return; -#if defined(CONFIG_NO_HZ_FULL) && !defined(CONFIG_NO_HZ_FULL_ALL) - if (tick_nohz_full_running) - cpumask_or(rcu_nocb_mask, rcu_nocb_mask, tick_nohz_full_mask); -#endif /* #if defined(CONFIG_NO_HZ_FULL) && !defined(CONFIG_NO_HZ_FULL_ALL) */ if (ls == -1) { ls = int_sqrt(nr_cpu_ids); rcu_nocb_leader_stride = ls; @@ -2505,21 +2627,15 @@ static void __init rcu_spawn_nocb_kthreads(struct rcu_state *rsp) rdp_prev->nocb_next_follower = rdp; } rdp_prev = rdp; - - /* Spawn the kthread for this CPU. */ - t = kthread_run(rcu_nocb_kthread, rdp, - "rcuo%c/%d", rsp->abbr, cpu); - BUG_ON(IS_ERR(t)); - ACCESS_ONCE(rdp->nocb_kthread) = t; } } /* Prevent __call_rcu() from enqueuing callbacks on no-CBs CPUs */ static bool init_nocb_callback_list(struct rcu_data *rdp) { - if (rcu_nocb_mask == NULL || - !cpumask_test_cpu(rdp->cpu, rcu_nocb_mask)) + if (!rcu_is_nocb_cpu(rdp->cpu)) return false; + rdp->nxttail[RCU_NEXT_TAIL] = NULL; return true; } @@ -2541,21 +2657,21 @@ static void rcu_init_one_nocb(struct rcu_node *rnp) static bool __call_rcu_nocb(struct rcu_data *rdp, struct rcu_head *rhp, bool lazy, unsigned long flags) { - return 0; + return false; } static bool __maybe_unused rcu_nocb_adopt_orphan_cbs(struct rcu_state *rsp, struct rcu_data *rdp, unsigned long flags) { - return 0; + return false; } static void __init rcu_boot_init_nocb_percpu_data(struct rcu_data *rdp) { } -static bool rcu_nocb_need_deferred_wakeup(struct rcu_data *rdp) +static int rcu_nocb_need_deferred_wakeup(struct rcu_data *rdp) { return false; } @@ -2564,7 +2680,11 @@ static void do_nocb_deferred_wakeup(struct rcu_data *rdp) { } -static void __init rcu_spawn_nocb_kthreads(struct rcu_state *rsp) +static void rcu_spawn_all_nocb_kthreads(int cpu) +{ +} + +static void __init rcu_spawn_nocb_kthreads(void) { } @@ -2595,16 +2715,6 @@ static void __maybe_unused rcu_kick_nohz_cpu(int cpu) #ifdef CONFIG_NO_HZ_FULL_SYSIDLE -/* - * Define RCU flavor that holds sysidle state. This needs to be the - * most active flavor of RCU. - */ -#ifdef CONFIG_PREEMPT_RCU -static struct rcu_state *rcu_sysidle_state = &rcu_preempt_state; -#else /* #ifdef CONFIG_PREEMPT_RCU */ -static struct rcu_state *rcu_sysidle_state = &rcu_sched_state; -#endif /* #else #ifdef CONFIG_PREEMPT_RCU */ - static int full_sysidle_state; /* Current system-idle state. */ #define RCU_SYSIDLE_NOT 0 /* Some CPU is not idle. */ #define RCU_SYSIDLE_SHORT 1 /* All CPUs idle for brief period. */ @@ -2622,6 +2732,10 @@ static void rcu_sysidle_enter(struct rcu_dynticks *rdtp, int irq) { unsigned long j; + /* If there are no nohz_full= CPUs, no need to track this. */ + if (!tick_nohz_full_enabled()) + return; + /* Adjust nesting, check for fully idle. */ if (irq) { rdtp->dynticks_idle_nesting--; @@ -2687,6 +2801,10 @@ void rcu_sysidle_force_exit(void) */ static void rcu_sysidle_exit(struct rcu_dynticks *rdtp, int irq) { + /* If there are no nohz_full= CPUs, no need to track this. */ + if (!tick_nohz_full_enabled()) + return; + /* Adjust nesting, check for already non-idle. */ if (irq) { rdtp->dynticks_idle_nesting++; @@ -2741,12 +2859,16 @@ static void rcu_sysidle_check_cpu(struct rcu_data *rdp, bool *isidle, unsigned long j; struct rcu_dynticks *rdtp = rdp->dynticks; + /* If there are no nohz_full= CPUs, don't check system-wide idleness. */ + if (!tick_nohz_full_enabled()) + return; + /* * If some other CPU has already reported non-idle, if this is * not the flavor of RCU that tracks sysidle state, or if this * is an offline or the timekeeping CPU, nothing to do. */ - if (!*isidle || rdp->rsp != rcu_sysidle_state || + if (!*isidle || rdp->rsp != rcu_state_p || cpu_is_offline(rdp->cpu) || rdp->cpu == tick_do_timer_cpu) return; if (rcu_gp_in_progress(rdp->rsp)) @@ -2772,7 +2894,7 @@ static void rcu_sysidle_check_cpu(struct rcu_data *rdp, bool *isidle, */ static bool is_sysidle_rcu_state(struct rcu_state *rsp) { - return rsp == rcu_sysidle_state; + return rsp == rcu_state_p; } /* @@ -2850,7 +2972,7 @@ static void rcu_sysidle_cancel(void) static void rcu_sysidle_report(struct rcu_state *rsp, int isidle, unsigned long maxj, bool gpkt) { - if (rsp != rcu_sysidle_state) + if (rsp != rcu_state_p) return; /* Wrong flavor, ignore. */ if (gpkt && nr_cpu_ids <= CONFIG_NO_HZ_FULL_SYSIDLE_SMALL) return; /* Running state machine from timekeeping CPU. */ @@ -2867,6 +2989,10 @@ static void rcu_sysidle_report(struct rcu_state *rsp, int isidle, static void rcu_sysidle_report_gp(struct rcu_state *rsp, int isidle, unsigned long maxj) { + /* If there are no nohz_full= CPUs, no need to track this. */ + if (!tick_nohz_full_enabled()) + return; + rcu_sysidle_report(rsp, isidle, maxj, true); } @@ -2893,7 +3019,8 @@ static void rcu_sysidle_cb(struct rcu_head *rhp) /* * Check to see if the system is fully idle, other than the timekeeping CPU. - * The caller must have disabled interrupts. + * The caller must have disabled interrupts. This is not intended to be + * called unless tick_nohz_full_enabled(). */ bool rcu_sys_is_idle(void) { @@ -2919,13 +3046,12 @@ bool rcu_sys_is_idle(void) /* Scan all the CPUs looking for nonidle CPUs. */ for_each_possible_cpu(cpu) { - rdp = per_cpu_ptr(rcu_sysidle_state->rda, cpu); + rdp = per_cpu_ptr(rcu_state_p->rda, cpu); rcu_sysidle_check_cpu(rdp, &isidle, &maxj); if (!isidle) break; } - rcu_sysidle_report(rcu_sysidle_state, - isidle, maxj, false); + rcu_sysidle_report(rcu_state_p, isidle, maxj, false); oldrss = rss; rss = ACCESS_ONCE(full_sysidle_state); } @@ -2952,7 +3078,7 @@ bool rcu_sys_is_idle(void) * provided by the memory allocator. */ if (nr_cpu_ids > CONFIG_NO_HZ_FULL_SYSIDLE_SMALL && - !rcu_gp_in_progress(rcu_sysidle_state) && + !rcu_gp_in_progress(rcu_state_p) && !rsh.inuse && xchg(&rsh.inuse, 1) == 0) call_rcu(&rsh.rh, rcu_sysidle_cb); return false; diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c index 4056d7992a6c..ea8ea7b16e11 100644 --- a/kernel/rcu/update.c +++ b/kernel/rcu/update.c @@ -137,6 +137,38 @@ int notrace debug_lockdep_rcu_enabled(void) EXPORT_SYMBOL_GPL(debug_lockdep_rcu_enabled); /** + * rcu_read_lock_held() - might we be in RCU read-side critical section? + * + * If CONFIG_DEBUG_LOCK_ALLOC is selected, returns nonzero iff in an RCU + * read-side critical section. In absence of CONFIG_DEBUG_LOCK_ALLOC, + * this assumes we are in an RCU read-side critical section unless it can + * prove otherwise. This is useful for debug checks in functions that + * require that they be called within an RCU read-side critical section. + * + * Checks debug_lockdep_rcu_enabled() to prevent false positives during boot + * and while lockdep is disabled. + * + * Note that rcu_read_lock() and the matching rcu_read_unlock() must + * occur in the same context, for example, it is illegal to invoke + * rcu_read_unlock() in process context if the matching rcu_read_lock() + * was invoked from within an irq handler. + * + * Note that rcu_read_lock() is disallowed if the CPU is either idle or + * offline from an RCU perspective, so check for those as well. + */ +int rcu_read_lock_held(void) +{ + if (!debug_lockdep_rcu_enabled()) + return 1; + if (!rcu_is_watching()) + return 0; + if (!rcu_lockdep_current_cpu_online()) + return 0; + return lock_is_held(&rcu_lock_map); +} +EXPORT_SYMBOL_GPL(rcu_read_lock_held); + +/** * rcu_read_lock_bh_held() - might we be in RCU-bh read-side critical section? * * Check for bottom half being disabled, which covers both the |