summaryrefslogtreecommitdiff
path: root/kernel/rcu/tree_plugin.h
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/rcu/tree_plugin.h')
-rw-r--r--kernel/rcu/tree_plugin.h267
1 files changed, 132 insertions, 135 deletions
diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
index 0a571e9a0f1d..8c0ec0f5a027 100644
--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h
@@ -58,38 +58,33 @@ static bool __read_mostly rcu_nocb_poll; /* Offload kthread are to poll. */
*/
static void __init rcu_bootup_announce_oddness(void)
{
-#ifdef CONFIG_RCU_TRACE
- pr_info("\tRCU debugfs-based tracing is enabled.\n");
-#endif
-#if (defined(CONFIG_64BIT) && CONFIG_RCU_FANOUT != 64) || (!defined(CONFIG_64BIT) && CONFIG_RCU_FANOUT != 32)
- pr_info("\tCONFIG_RCU_FANOUT set to non-default value of %d\n",
- CONFIG_RCU_FANOUT);
-#endif
-#ifdef CONFIG_RCU_FANOUT_EXACT
- pr_info("\tHierarchical RCU autobalancing is disabled.\n");
-#endif
-#ifdef CONFIG_RCU_FAST_NO_HZ
- pr_info("\tRCU dyntick-idle grace-period acceleration is enabled.\n");
-#endif
-#ifdef CONFIG_PROVE_RCU
- pr_info("\tRCU lockdep checking is enabled.\n");
-#endif
-#ifdef CONFIG_RCU_TORTURE_TEST_RUNNABLE
- pr_info("\tRCU torture testing starts during boot.\n");
-#endif
-#if defined(CONFIG_RCU_CPU_STALL_INFO)
- pr_info("\tAdditional per-CPU info printed with stalls.\n");
-#endif
-#if NUM_RCU_LVL_4 != 0
- pr_info("\tFour-level hierarchy is enabled.\n");
-#endif
+ if (IS_ENABLED(CONFIG_RCU_TRACE))
+ pr_info("\tRCU debugfs-based tracing is enabled.\n");
+ if ((IS_ENABLED(CONFIG_64BIT) && CONFIG_RCU_FANOUT != 64) ||
+ (!IS_ENABLED(CONFIG_64BIT) && CONFIG_RCU_FANOUT != 32))
+ pr_info("\tCONFIG_RCU_FANOUT set to non-default value of %d\n",
+ CONFIG_RCU_FANOUT);
+ if (IS_ENABLED(CONFIG_RCU_FANOUT_EXACT))
+ pr_info("\tHierarchical RCU autobalancing is disabled.\n");
+ if (IS_ENABLED(CONFIG_RCU_FAST_NO_HZ))
+ pr_info("\tRCU dyntick-idle grace-period acceleration is enabled.\n");
+ if (IS_ENABLED(CONFIG_PROVE_RCU))
+ pr_info("\tRCU lockdep checking is enabled.\n");
+ if (IS_ENABLED(CONFIG_RCU_TORTURE_TEST_RUNNABLE))
+ pr_info("\tRCU torture testing starts during boot.\n");
+ if (IS_ENABLED(CONFIG_RCU_CPU_STALL_INFO))
+ pr_info("\tAdditional per-CPU info printed with stalls.\n");
+ if (NUM_RCU_LVL_4 != 0)
+ pr_info("\tFour-level hierarchy is enabled.\n");
+ if (CONFIG_RCU_FANOUT_LEAF != 16)
+ pr_info("\tBuild-time adjustment of leaf fanout to %d.\n",
+ CONFIG_RCU_FANOUT_LEAF);
if (rcu_fanout_leaf != CONFIG_RCU_FANOUT_LEAF)
pr_info("\tBoot-time adjustment of leaf fanout to %d.\n", rcu_fanout_leaf);
if (nr_cpu_ids != NR_CPUS)
pr_info("\tRCU restricting CPUs from NR_CPUS=%d to nr_cpu_ids=%d.\n", NR_CPUS, nr_cpu_ids);
-#ifdef CONFIG_RCU_BOOST
- pr_info("\tRCU kthread priority: %d.\n", kthread_prio);
-#endif
+ if (IS_ENABLED(CONFIG_RCU_BOOST))
+ pr_info("\tRCU kthread priority: %d.\n", kthread_prio);
}
#ifdef CONFIG_PREEMPT_RCU
@@ -180,7 +175,7 @@ static void rcu_preempt_note_context_switch(void)
* But first, note that the current CPU must still be
* on line!
*/
- WARN_ON_ONCE((rdp->grpmask & rnp->qsmaskinit) == 0);
+ WARN_ON_ONCE((rdp->grpmask & rcu_rnp_online_cpus(rnp)) == 0);
WARN_ON_ONCE(!list_empty(&t->rcu_node_entry));
if ((rnp->qsmask & rdp->grpmask) && rnp->gp_tasks != NULL) {
list_add(&t->rcu_node_entry, rnp->gp_tasks->prev);
@@ -233,43 +228,6 @@ static int rcu_preempt_blocked_readers_cgp(struct rcu_node *rnp)
}
/*
- * Record a quiescent state for all tasks that were previously queued
- * on the specified rcu_node structure and that were blocking the current
- * RCU grace period. The caller must hold the specified rnp->lock with
- * irqs disabled, and this lock is released upon return, but irqs remain
- * disabled.
- */
-static void rcu_report_unblock_qs_rnp(struct rcu_node *rnp, unsigned long flags)
- __releases(rnp->lock)
-{
- unsigned long mask;
- struct rcu_node *rnp_p;
-
- if (rnp->qsmask != 0 || rcu_preempt_blocked_readers_cgp(rnp)) {
- raw_spin_unlock_irqrestore(&rnp->lock, flags);
- return; /* Still need more quiescent states! */
- }
-
- rnp_p = rnp->parent;
- if (rnp_p == NULL) {
- /*
- * Either there is only one rcu_node in the tree,
- * or tasks were kicked up to root rcu_node due to
- * CPUs going offline.
- */
- rcu_report_qs_rsp(&rcu_preempt_state, flags);
- return;
- }
-
- /* Report up the rest of the hierarchy. */
- mask = rnp->grpmask;
- raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */
- raw_spin_lock(&rnp_p->lock); /* irqs already disabled. */
- smp_mb__after_unlock_lock();
- rcu_report_qs_rnp(mask, &rcu_preempt_state, rnp_p, flags);
-}
-
-/*
* Advance a ->blkd_tasks-list pointer to the next entry, instead
* returning NULL if at the end of the list.
*/
@@ -300,7 +258,6 @@ static bool rcu_preempt_has_tasks(struct rcu_node *rnp)
*/
void rcu_read_unlock_special(struct task_struct *t)
{
- bool empty;
bool empty_exp;
bool empty_norm;
bool empty_exp_now;
@@ -334,7 +291,13 @@ void rcu_read_unlock_special(struct task_struct *t)
}
/* Hardware IRQ handlers cannot block, complain if they get here. */
- if (WARN_ON_ONCE(in_irq() || in_serving_softirq())) {
+ if (in_irq() || in_serving_softirq()) {
+ lockdep_rcu_suspicious(__FILE__, __LINE__,
+ "rcu_read_unlock() from irq or softirq with blocking in critical section!!!\n");
+ pr_alert("->rcu_read_unlock_special: %#x (b: %d, nq: %d)\n",
+ t->rcu_read_unlock_special.s,
+ t->rcu_read_unlock_special.b.blocked,
+ t->rcu_read_unlock_special.b.need_qs);
local_irq_restore(flags);
return;
}
@@ -356,7 +319,6 @@ void rcu_read_unlock_special(struct task_struct *t)
break;
raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */
}
- empty = !rcu_preempt_has_tasks(rnp);
empty_norm = !rcu_preempt_blocked_readers_cgp(rnp);
empty_exp = !rcu_preempted_readers_exp(rnp);
smp_mb(); /* ensure expedited fastpath sees end of RCU c-s. */
@@ -377,14 +339,6 @@ void rcu_read_unlock_special(struct task_struct *t)
#endif /* #ifdef CONFIG_RCU_BOOST */
/*
- * If this was the last task on the list, go see if we
- * need to propagate ->qsmaskinit bit clearing up the
- * rcu_node tree.
- */
- if (!empty && !rcu_preempt_has_tasks(rnp))
- rcu_cleanup_dead_rnp(rnp);
-
- /*
* If this was the last task on the current list, and if
* we aren't waiting on any CPUs, report the quiescent state.
* Note that rcu_report_unblock_qs_rnp() releases rnp->lock,
@@ -399,7 +353,8 @@ void rcu_read_unlock_special(struct task_struct *t)
rnp->grplo,
rnp->grphi,
!!rnp->gp_tasks);
- rcu_report_unblock_qs_rnp(rnp, flags);
+ rcu_report_unblock_qs_rnp(&rcu_preempt_state,
+ rnp, flags);
} else {
raw_spin_unlock_irqrestore(&rnp->lock, flags);
}
@@ -520,10 +475,6 @@ static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp)
WARN_ON_ONCE(rnp->qsmask);
}
-#ifdef CONFIG_HOTPLUG_CPU
-
-#endif /* #ifdef CONFIG_HOTPLUG_CPU */
-
/*
* Check for a quiescent state from the current CPU. When a task blocks,
* the task is recorded in the corresponding CPU's rcu_node structure,
@@ -585,7 +536,7 @@ void synchronize_rcu(void)
"Illegal synchronize_rcu() in RCU read-side critical section");
if (!rcu_scheduler_active)
return;
- if (rcu_expedited)
+ if (rcu_gp_is_expedited())
synchronize_rcu_expedited();
else
wait_rcu_gp(call_rcu);
@@ -630,9 +581,6 @@ static int sync_rcu_preempt_exp_done(struct rcu_node *rnp)
* recursively up the tree. (Calm down, calm down, we do the recursion
* iteratively!)
*
- * Most callers will set the "wake" flag, but the task initiating the
- * expedited grace period need not wake itself.
- *
* Caller must hold sync_rcu_preempt_exp_mutex.
*/
static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp,
@@ -667,29 +615,85 @@ static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp,
/*
* Snapshot the tasks blocking the newly started preemptible-RCU expedited
- * grace period for the specified rcu_node structure. If there are no such
- * tasks, report it up the rcu_node hierarchy.
+ * grace period for the specified rcu_node structure, phase 1. If there
+ * are such tasks, set the ->expmask bits up the rcu_node tree and also
+ * set the ->expmask bits on the leaf rcu_node structures to tell phase 2
+ * that work is needed here.
*
- * Caller must hold sync_rcu_preempt_exp_mutex and must exclude
- * CPU hotplug operations.
+ * Caller must hold sync_rcu_preempt_exp_mutex.
*/
static void
-sync_rcu_preempt_exp_init(struct rcu_state *rsp, struct rcu_node *rnp)
+sync_rcu_preempt_exp_init1(struct rcu_state *rsp, struct rcu_node *rnp)
{
unsigned long flags;
- int must_wait = 0;
+ unsigned long mask;
+ struct rcu_node *rnp_up;
raw_spin_lock_irqsave(&rnp->lock, flags);
smp_mb__after_unlock_lock();
+ WARN_ON_ONCE(rnp->expmask);
+ WARN_ON_ONCE(rnp->exp_tasks);
if (!rcu_preempt_has_tasks(rnp)) {
+ /* No blocked tasks, nothing to do. */
raw_spin_unlock_irqrestore(&rnp->lock, flags);
- } else {
+ return;
+ }
+ /* Call for Phase 2 and propagate ->expmask bits up the tree. */
+ rnp->expmask = 1;
+ rnp_up = rnp;
+ while (rnp_up->parent) {
+ mask = rnp_up->grpmask;
+ rnp_up = rnp_up->parent;
+ if (rnp_up->expmask & mask)
+ break;
+ raw_spin_lock(&rnp_up->lock); /* irqs already off */
+ smp_mb__after_unlock_lock();
+ rnp_up->expmask |= mask;
+ raw_spin_unlock(&rnp_up->lock); /* irqs still off */
+ }
+ raw_spin_unlock_irqrestore(&rnp->lock, flags);
+}
+
+/*
+ * Snapshot the tasks blocking the newly started preemptible-RCU expedited
+ * grace period for the specified rcu_node structure, phase 2. If the
+ * leaf rcu_node structure has its ->expmask field set, check for tasks.
+ * If there are some, clear ->expmask and set ->exp_tasks accordingly,
+ * then initiate RCU priority boosting. Otherwise, clear ->expmask and
+ * invoke rcu_report_exp_rnp() to clear out the upper-level ->expmask bits,
+ * enabling rcu_read_unlock_special() to do the bit-clearing.
+ *
+ * Caller must hold sync_rcu_preempt_exp_mutex.
+ */
+static void
+sync_rcu_preempt_exp_init2(struct rcu_state *rsp, struct rcu_node *rnp)
+{
+ unsigned long flags;
+
+ raw_spin_lock_irqsave(&rnp->lock, flags);
+ smp_mb__after_unlock_lock();
+ if (!rnp->expmask) {
+ /* Phase 1 didn't do anything, so Phase 2 doesn't either. */
+ raw_spin_unlock_irqrestore(&rnp->lock, flags);
+ return;
+ }
+
+ /* Phase 1 is over. */
+ rnp->expmask = 0;
+
+ /*
+ * If there are still blocked tasks, set up ->exp_tasks so that
+ * rcu_read_unlock_special() will wake us and then boost them.
+ */
+ if (rcu_preempt_has_tasks(rnp)) {
rnp->exp_tasks = rnp->blkd_tasks.next;
rcu_initiate_boost(rnp, flags); /* releases rnp->lock */
- must_wait = 1;
+ return;
}
- if (!must_wait)
- rcu_report_exp_rnp(rsp, rnp, false); /* Don't wake self. */
+
+ /* No longer any blocked tasks, so undo bit setting. */
+ raw_spin_unlock_irqrestore(&rnp->lock, flags);
+ rcu_report_exp_rnp(rsp, rnp, false);
}
/**
@@ -706,7 +710,6 @@ sync_rcu_preempt_exp_init(struct rcu_state *rsp, struct rcu_node *rnp)
*/
void synchronize_rcu_expedited(void)
{
- unsigned long flags;
struct rcu_node *rnp;
struct rcu_state *rsp = &rcu_preempt_state;
unsigned long snap;
@@ -757,19 +760,16 @@ void synchronize_rcu_expedited(void)
/* force all RCU readers onto ->blkd_tasks lists. */
synchronize_sched_expedited();
- /* Initialize ->expmask for all non-leaf rcu_node structures. */
- rcu_for_each_nonleaf_node_breadth_first(rsp, rnp) {
- raw_spin_lock_irqsave(&rnp->lock, flags);
- smp_mb__after_unlock_lock();
- rnp->expmask = rnp->qsmaskinit;
- raw_spin_unlock_irqrestore(&rnp->lock, flags);
- }
-
- /* Snapshot current state of ->blkd_tasks lists. */
+ /*
+ * Snapshot current state of ->blkd_tasks lists into ->expmask.
+ * Phase 1 sets bits and phase 2 permits rcu_read_unlock_special()
+ * to start clearing them. Doing this in one phase leads to
+ * strange races between setting and clearing bits, so just say "no"!
+ */
+ rcu_for_each_leaf_node(rsp, rnp)
+ sync_rcu_preempt_exp_init1(rsp, rnp);
rcu_for_each_leaf_node(rsp, rnp)
- sync_rcu_preempt_exp_init(rsp, rnp);
- if (NUM_RCU_NODES > 1)
- sync_rcu_preempt_exp_init(rsp, rcu_get_root(rsp));
+ sync_rcu_preempt_exp_init2(rsp, rnp);
put_online_cpus();
@@ -859,8 +859,6 @@ static int rcu_preempt_blocked_readers_cgp(struct rcu_node *rnp)
return 0;
}
-#ifdef CONFIG_HOTPLUG_CPU
-
/*
* Because there is no preemptible RCU, there can be no readers blocked.
*/
@@ -869,8 +867,6 @@ static bool rcu_preempt_has_tasks(struct rcu_node *rnp)
return false;
}
-#endif /* #ifdef CONFIG_HOTPLUG_CPU */
-
/*
* Because preemptible RCU does not exist, we never have to check for
* tasks blocked within RCU read-side critical sections.
@@ -1170,7 +1166,7 @@ static void rcu_preempt_boost_start_gp(struct rcu_node *rnp)
* Returns zero if all is well, a negated errno otherwise.
*/
static int rcu_spawn_one_boost_kthread(struct rcu_state *rsp,
- struct rcu_node *rnp)
+ struct rcu_node *rnp)
{
int rnp_index = rnp - &rsp->node[0];
unsigned long flags;
@@ -1180,7 +1176,7 @@ static int rcu_spawn_one_boost_kthread(struct rcu_state *rsp,
if (&rcu_preempt_state != rsp)
return 0;
- if (!rcu_scheduler_fully_active || rnp->qsmaskinit == 0)
+ if (!rcu_scheduler_fully_active || rcu_rnp_online_cpus(rnp) == 0)
return 0;
rsp->boost = 1;
@@ -1273,7 +1269,7 @@ static void rcu_cpu_kthread(unsigned int cpu)
static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu)
{
struct task_struct *t = rnp->boost_kthread_task;
- unsigned long mask = rnp->qsmaskinit;
+ unsigned long mask = rcu_rnp_online_cpus(rnp);
cpumask_var_t cm;
int cpu;
@@ -1945,7 +1941,8 @@ static bool rcu_nocb_cpu_needs_barrier(struct rcu_state *rsp, int cpu)
rhp = ACCESS_ONCE(rdp->nocb_follower_head);
/* Having no rcuo kthread but CBs after scheduler starts is bad! */
- if (!ACCESS_ONCE(rdp->nocb_kthread) && rhp) {
+ if (!ACCESS_ONCE(rdp->nocb_kthread) && rhp &&
+ rcu_scheduler_fully_active) {
/* RCU callback enqueued before CPU first came online??? */
pr_err("RCU: Never-onlined no-CBs CPU %d has CB %p\n",
cpu, rhp->func);
@@ -2392,18 +2389,8 @@ void __init rcu_init_nohz(void)
pr_info("\tPoll for callbacks from no-CBs CPUs.\n");
for_each_rcu_flavor(rsp) {
- for_each_cpu(cpu, rcu_nocb_mask) {
- struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu);
-
- /*
- * If there are early callbacks, they will need
- * to be moved to the nocb lists.
- */
- WARN_ON_ONCE(rdp->nxttail[RCU_NEXT_TAIL] !=
- &rdp->nxtlist &&
- rdp->nxttail[RCU_NEXT_TAIL] != NULL);
- init_nocb_callback_list(rdp);
- }
+ for_each_cpu(cpu, rcu_nocb_mask)
+ init_nocb_callback_list(per_cpu_ptr(rsp->rda, cpu));
rcu_organize_nocb_kthreads(rsp);
}
}
@@ -2540,6 +2527,16 @@ static bool init_nocb_callback_list(struct rcu_data *rdp)
if (!rcu_is_nocb_cpu(rdp->cpu))
return false;
+ /* If there are early-boot callbacks, move them to nocb lists. */
+ if (rdp->nxtlist) {
+ rdp->nocb_head = rdp->nxtlist;
+ rdp->nocb_tail = rdp->nxttail[RCU_NEXT_TAIL];
+ atomic_long_set(&rdp->nocb_q_count, rdp->qlen);
+ atomic_long_set(&rdp->nocb_q_count_lazy, rdp->qlen_lazy);
+ rdp->nxtlist = NULL;
+ rdp->qlen = 0;
+ rdp->qlen_lazy = 0;
+ }
rdp->nxttail[RCU_NEXT_TAIL] = NULL;
return true;
}
@@ -2763,7 +2760,8 @@ static void rcu_sysidle_exit(int irq)
/*
* Check to see if the current CPU is idle. Note that usermode execution
- * does not count as idle. The caller must have disabled interrupts.
+ * does not count as idle. The caller must have disabled interrupts,
+ * and must be running on tick_do_timer_cpu.
*/
static void rcu_sysidle_check_cpu(struct rcu_data *rdp, bool *isidle,
unsigned long *maxj)
@@ -2784,8 +2782,8 @@ static void rcu_sysidle_check_cpu(struct rcu_data *rdp, bool *isidle,
if (!*isidle || rdp->rsp != rcu_state_p ||
cpu_is_offline(rdp->cpu) || rdp->cpu == tick_do_timer_cpu)
return;
- if (rcu_gp_in_progress(rdp->rsp))
- WARN_ON_ONCE(smp_processor_id() != tick_do_timer_cpu);
+ /* Verify affinity of current kthread. */
+ WARN_ON_ONCE(smp_processor_id() != tick_do_timer_cpu);
/* Pick up current idle and NMI-nesting counter and check. */
cur = atomic_read(&rdtp->dynticks_idle);
@@ -3068,11 +3066,10 @@ static void rcu_bind_gp_kthread(void)
return;
#ifdef CONFIG_NO_HZ_FULL_SYSIDLE
cpu = tick_do_timer_cpu;
- if (cpu >= 0 && cpu < nr_cpu_ids && raw_smp_processor_id() != cpu)
+ if (cpu >= 0 && cpu < nr_cpu_ids)
set_cpus_allowed_ptr(current, cpumask_of(cpu));
#else /* #ifdef CONFIG_NO_HZ_FULL_SYSIDLE */
- if (!is_housekeeping_cpu(raw_smp_processor_id()))
- housekeeping_affine(current);
+ housekeeping_affine(current);
#endif /* #else #ifdef CONFIG_NO_HZ_FULL_SYSIDLE */
}