summaryrefslogtreecommitdiff
path: root/kernel/rcu/tree.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/rcu/tree.c')
-rw-r--r--kernel/rcu/tree.c203
1 files changed, 155 insertions, 48 deletions
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 1250e4bd4b85..f9c0ca2ccf0c 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -534,8 +534,8 @@ module_param(rcu_kick_kthreads, bool, 0644);
* How long the grace period must be before we start recruiting
* quiescent-state help from rcu_note_context_switch().
*/
-static ulong jiffies_till_sched_qs = HZ / 20;
-module_param(jiffies_till_sched_qs, ulong, 0644);
+static ulong jiffies_till_sched_qs = HZ / 10;
+module_param(jiffies_till_sched_qs, ulong, 0444);
static bool rcu_start_gp_advanced(struct rcu_state *rsp, struct rcu_node *rnp,
struct rcu_data *rdp);
@@ -734,7 +734,7 @@ static int rcu_future_needs_gp(struct rcu_state *rsp)
int idx = (READ_ONCE(rnp->completed) + 1) & 0x1;
int *fp = &rnp->need_future_gp[idx];
- RCU_LOCKDEP_WARN(!irqs_disabled(), "rcu_future_needs_gp() invoked with irqs enabled!!!");
+ lockdep_assert_irqs_disabled();
return READ_ONCE(*fp);
}
@@ -746,7 +746,7 @@ static int rcu_future_needs_gp(struct rcu_state *rsp)
static bool
cpu_needs_another_gp(struct rcu_state *rsp, struct rcu_data *rdp)
{
- RCU_LOCKDEP_WARN(!irqs_disabled(), "cpu_needs_another_gp() invoked with irqs enabled!!!");
+ lockdep_assert_irqs_disabled();
if (rcu_gp_in_progress(rsp))
return false; /* No, a grace period is already in progress. */
if (rcu_future_needs_gp(rsp))
@@ -773,7 +773,7 @@ static void rcu_eqs_enter_common(bool user)
struct rcu_data *rdp;
struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks);
- RCU_LOCKDEP_WARN(!irqs_disabled(), "rcu_eqs_enter_common() invoked with irqs enabled!!!");
+ lockdep_assert_irqs_disabled();
trace_rcu_dyntick(TPS("Start"), rdtp->dynticks_nesting, 0);
if (IS_ENABLED(CONFIG_RCU_EQS_DEBUG) &&
!user && !is_idle_task(current)) {
@@ -837,10 +837,13 @@ static void rcu_eqs_enter(bool user)
* We crowbar the ->dynticks_nesting field to zero to allow for
* the possibility of usermode upcalls having messed up our count
* of interrupt nesting level during the prior busy period.
+ *
+ * If you add or remove a call to rcu_idle_enter(), be sure to test with
+ * CONFIG_RCU_EQS_DEBUG=y.
*/
void rcu_idle_enter(void)
{
- RCU_LOCKDEP_WARN(!irqs_disabled(), "rcu_idle_enter() invoked with irqs enabled!!!");
+ lockdep_assert_irqs_disabled();
rcu_eqs_enter(false);
}
@@ -852,10 +855,13 @@ void rcu_idle_enter(void)
* is permitted between this call and rcu_user_exit(). This way the
* CPU doesn't need to maintain the tick for RCU maintenance purposes
* when the CPU runs in userspace.
+ *
+ * If you add or remove a call to rcu_user_enter(), be sure to test with
+ * CONFIG_RCU_EQS_DEBUG=y.
*/
void rcu_user_enter(void)
{
- RCU_LOCKDEP_WARN(!irqs_disabled(), "rcu_user_enter() invoked with irqs enabled!!!");
+ lockdep_assert_irqs_disabled();
rcu_eqs_enter(true);
}
#endif /* CONFIG_NO_HZ_FULL */
@@ -875,13 +881,21 @@ void rcu_user_enter(void)
* Use things like work queues to work around this limitation.
*
* You have been warned.
+ *
+ * If you add or remove a call to rcu_irq_exit(), be sure to test with
+ * CONFIG_RCU_EQS_DEBUG=y.
*/
void rcu_irq_exit(void)
{
struct rcu_dynticks *rdtp;
- RCU_LOCKDEP_WARN(!irqs_disabled(), "rcu_irq_exit() invoked with irqs enabled!!!");
+ lockdep_assert_irqs_disabled();
rdtp = this_cpu_ptr(&rcu_dynticks);
+
+ /* Page faults can happen in NMI handlers, so check... */
+ if (rdtp->dynticks_nmi_nesting)
+ return;
+
WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) &&
rdtp->dynticks_nesting < 1);
if (rdtp->dynticks_nesting <= 1) {
@@ -894,6 +908,9 @@ void rcu_irq_exit(void)
/*
* Wrapper for rcu_irq_exit() where interrupts are enabled.
+ *
+ * If you add or remove a call to rcu_irq_exit_irqson(), be sure to test
+ * with CONFIG_RCU_EQS_DEBUG=y.
*/
void rcu_irq_exit_irqson(void)
{
@@ -942,7 +959,7 @@ static void rcu_eqs_exit(bool user)
struct rcu_dynticks *rdtp;
long long oldval;
- RCU_LOCKDEP_WARN(!irqs_disabled(), "rcu_eqs_exit() invoked with irqs enabled!!!");
+ lockdep_assert_irqs_disabled();
rdtp = this_cpu_ptr(&rcu_dynticks);
oldval = rdtp->dynticks_nesting;
WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && oldval < 0);
@@ -966,6 +983,9 @@ static void rcu_eqs_exit(bool user)
* allow for the possibility of usermode upcalls messing up our count
* of interrupt nesting level during the busy period that is just
* now starting.
+ *
+ * If you add or remove a call to rcu_idle_exit(), be sure to test with
+ * CONFIG_RCU_EQS_DEBUG=y.
*/
void rcu_idle_exit(void)
{
@@ -982,6 +1002,9 @@ void rcu_idle_exit(void)
*
* Exit RCU idle mode while entering the kernel because it can
* run a RCU read side critical section anytime.
+ *
+ * If you add or remove a call to rcu_user_exit(), be sure to test with
+ * CONFIG_RCU_EQS_DEBUG=y.
*/
void rcu_user_exit(void)
{
@@ -1007,14 +1030,22 @@ void rcu_user_exit(void)
* Use things like work queues to work around this limitation.
*
* You have been warned.
+ *
+ * If you add or remove a call to rcu_irq_enter(), be sure to test with
+ * CONFIG_RCU_EQS_DEBUG=y.
*/
void rcu_irq_enter(void)
{
struct rcu_dynticks *rdtp;
long long oldval;
- RCU_LOCKDEP_WARN(!irqs_disabled(), "rcu_irq_enter() invoked with irqs enabled!!!");
+ lockdep_assert_irqs_disabled();
rdtp = this_cpu_ptr(&rcu_dynticks);
+
+ /* Page faults can happen in NMI handlers, so check... */
+ if (rdtp->dynticks_nmi_nesting)
+ return;
+
oldval = rdtp->dynticks_nesting;
rdtp->dynticks_nesting++;
WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) &&
@@ -1027,6 +1058,9 @@ void rcu_irq_enter(void)
/*
* Wrapper for rcu_irq_enter() where interrupts are enabled.
+ *
+ * If you add or remove a call to rcu_irq_enter_irqson(), be sure to test
+ * with CONFIG_RCU_EQS_DEBUG=y.
*/
void rcu_irq_enter_irqson(void)
{
@@ -1045,6 +1079,9 @@ void rcu_irq_enter_irqson(void)
* that the CPU is active. This implementation permits nested NMIs, as
* long as the nesting level does not overflow an int. (You will probably
* run out of stack space first.)
+ *
+ * If you add or remove a call to rcu_nmi_enter(), be sure to test
+ * with CONFIG_RCU_EQS_DEBUG=y.
*/
void rcu_nmi_enter(void)
{
@@ -1077,6 +1114,9 @@ void rcu_nmi_enter(void)
* RCU-idle period, update rdtp->dynticks and rdtp->dynticks_nmi_nesting
* to let the RCU grace-period handling know that the CPU is back to
* being RCU-idle.
+ *
+ * If you add or remove a call to rcu_nmi_exit(), be sure to test
+ * with CONFIG_RCU_EQS_DEBUG=y.
*/
void rcu_nmi_exit(void)
{
@@ -1197,6 +1237,22 @@ static int rcu_is_cpu_rrupt_from_idle(void)
}
/*
+ * We are reporting a quiescent state on behalf of some other CPU, so
+ * it is our responsibility to check for and handle potential overflow
+ * of the rcu_node ->gpnum counter with respect to the rcu_data counters.
+ * After all, the CPU might be in deep idle state, and thus executing no
+ * code whatsoever.
+ */
+static void rcu_gpnum_ovf(struct rcu_node *rnp, struct rcu_data *rdp)
+{
+ lockdep_assert_held(&rnp->lock);
+ if (ULONG_CMP_LT(READ_ONCE(rdp->gpnum) + ULONG_MAX / 4, rnp->gpnum))
+ WRITE_ONCE(rdp->gpwrap, true);
+ if (ULONG_CMP_LT(rdp->rcu_iw_gpnum + ULONG_MAX / 4, rnp->gpnum))
+ rdp->rcu_iw_gpnum = rnp->gpnum + ULONG_MAX / 4;
+}
+
+/*
* Snapshot the specified CPU's dynticks counter so that we can later
* credit them with an implicit quiescent state. Return 1 if this CPU
* is in dynticks idle mode, which is an extended quiescent state.
@@ -1206,15 +1262,34 @@ static int dyntick_save_progress_counter(struct rcu_data *rdp)
rdp->dynticks_snap = rcu_dynticks_snap(rdp->dynticks);
if (rcu_dynticks_in_eqs(rdp->dynticks_snap)) {
trace_rcu_fqs(rdp->rsp->name, rdp->gpnum, rdp->cpu, TPS("dti"));
- if (ULONG_CMP_LT(READ_ONCE(rdp->gpnum) + ULONG_MAX / 4,
- rdp->mynode->gpnum))
- WRITE_ONCE(rdp->gpwrap, true);
+ rcu_gpnum_ovf(rdp->mynode, rdp);
return 1;
}
return 0;
}
/*
+ * Handler for the irq_work request posted when a grace period has
+ * gone on for too long, but not yet long enough for an RCU CPU
+ * stall warning. Set state appropriately, but just complain if
+ * there is unexpected state on entry.
+ */
+static void rcu_iw_handler(struct irq_work *iwp)
+{
+ struct rcu_data *rdp;
+ struct rcu_node *rnp;
+
+ rdp = container_of(iwp, struct rcu_data, rcu_iw);
+ rnp = rdp->mynode;
+ raw_spin_lock_rcu_node(rnp);
+ if (!WARN_ON_ONCE(!rdp->rcu_iw_pending)) {
+ rdp->rcu_iw_gpnum = rnp->gpnum;
+ rdp->rcu_iw_pending = false;
+ }
+ raw_spin_unlock_rcu_node(rnp);
+}
+
+/*
* Return true if the specified CPU has passed through a quiescent
* state by virtue of being in or having passed through an dynticks
* idle state since the last call to dyntick_save_progress_counter()
@@ -1225,8 +1300,7 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp)
unsigned long jtsq;
bool *rnhqp;
bool *ruqp;
- unsigned long rjtsc;
- struct rcu_node *rnp;
+ struct rcu_node *rnp = rdp->mynode;
/*
* If the CPU passed through or entered a dynticks idle phase with
@@ -1239,34 +1313,25 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp)
if (rcu_dynticks_in_eqs_since(rdp->dynticks, rdp->dynticks_snap)) {
trace_rcu_fqs(rdp->rsp->name, rdp->gpnum, rdp->cpu, TPS("dti"));
rdp->dynticks_fqs++;
+ rcu_gpnum_ovf(rnp, rdp);
return 1;
}
- /* Compute and saturate jiffies_till_sched_qs. */
- jtsq = jiffies_till_sched_qs;
- rjtsc = rcu_jiffies_till_stall_check();
- if (jtsq > rjtsc / 2) {
- WRITE_ONCE(jiffies_till_sched_qs, rjtsc);
- jtsq = rjtsc / 2;
- } else if (jtsq < 1) {
- WRITE_ONCE(jiffies_till_sched_qs, 1);
- jtsq = 1;
- }
-
/*
* Has this CPU encountered a cond_resched_rcu_qs() since the
* beginning of the grace period? For this to be the case,
* the CPU has to have noticed the current grace period. This
* might not be the case for nohz_full CPUs looping in the kernel.
*/
- rnp = rdp->mynode;
+ jtsq = jiffies_till_sched_qs;
ruqp = per_cpu_ptr(&rcu_dynticks.rcu_urgent_qs, rdp->cpu);
if (time_after(jiffies, rdp->rsp->gp_start + jtsq) &&
READ_ONCE(rdp->rcu_qs_ctr_snap) != per_cpu(rcu_dynticks.rcu_qs_ctr, rdp->cpu) &&
READ_ONCE(rdp->gpnum) == rnp->gpnum && !rdp->gpwrap) {
trace_rcu_fqs(rdp->rsp->name, rdp->gpnum, rdp->cpu, TPS("rqc"));
+ rcu_gpnum_ovf(rnp, rdp);
return 1;
- } else {
+ } else if (time_after(jiffies, rdp->rsp->gp_start + jtsq)) {
/* Load rcu_qs_ctr before store to rcu_urgent_qs. */
smp_store_release(ruqp, true);
}
@@ -1275,6 +1340,7 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp)
if (!(rdp->grpmask & rcu_rnp_online_cpus(rnp))) {
trace_rcu_fqs(rdp->rsp->name, rdp->gpnum, rdp->cpu, TPS("ofl"));
rdp->offline_fqs++;
+ rcu_gpnum_ovf(rnp, rdp);
return 1;
}
@@ -1294,10 +1360,6 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp)
* updates are only once every few jiffies, the probability of
* lossage (and thus of slight grace-period extension) is
* quite low.
- *
- * Note that if the jiffies_till_sched_qs boot/sysfs parameter
- * is set too high, we override with half of the RCU CPU stall
- * warning delay.
*/
rnhqp = &per_cpu(rcu_dynticks.rcu_need_heavy_qs, rdp->cpu);
if (!READ_ONCE(*rnhqp) &&
@@ -1306,15 +1368,26 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp)
WRITE_ONCE(*rnhqp, true);
/* Store rcu_need_heavy_qs before rcu_urgent_qs. */
smp_store_release(ruqp, true);
- rdp->rsp->jiffies_resched += 5; /* Re-enable beating. */
+ rdp->rsp->jiffies_resched += jtsq; /* Re-enable beating. */
}
/*
- * If more than halfway to RCU CPU stall-warning time, do
- * a resched_cpu() to try to loosen things up a bit.
+ * If more than halfway to RCU CPU stall-warning time, do a
+ * resched_cpu() to try to loosen things up a bit. Also check to
+ * see if the CPU is getting hammered with interrupts, but only
+ * once per grace period, just to keep the IPIs down to a dull roar.
*/
- if (jiffies - rdp->rsp->gp_start > rcu_jiffies_till_stall_check() / 2)
+ if (jiffies - rdp->rsp->gp_start > rcu_jiffies_till_stall_check() / 2) {
resched_cpu(rdp->cpu);
+ if (IS_ENABLED(CONFIG_IRQ_WORK) &&
+ !rdp->rcu_iw_pending && rdp->rcu_iw_gpnum != rnp->gpnum &&
+ (rnp->ffmask & rdp->grpmask)) {
+ init_irq_work(&rdp->rcu_iw, rcu_iw_handler);
+ rdp->rcu_iw_pending = true;
+ rdp->rcu_iw_gpnum = rnp->gpnum;
+ irq_work_queue_on(&rdp->rcu_iw, rdp->cpu);
+ }
+ }
return 0;
}
@@ -1503,6 +1576,7 @@ static void print_cpu_stall(struct rcu_state *rsp)
{
int cpu;
unsigned long flags;
+ struct rcu_data *rdp = this_cpu_ptr(rsp->rda);
struct rcu_node *rnp = rcu_get_root(rsp);
long totqlen = 0;
@@ -1518,7 +1592,9 @@ static void print_cpu_stall(struct rcu_state *rsp)
*/
pr_err("INFO: %s self-detected stall on CPU", rsp->name);
print_cpu_stall_info_begin();
+ raw_spin_lock_irqsave_rcu_node(rdp->mynode, flags);
print_cpu_stall_info(rsp, smp_processor_id());
+ raw_spin_unlock_irqrestore_rcu_node(rdp->mynode, flags);
print_cpu_stall_info_end();
for_each_possible_cpu(cpu)
totqlen += rcu_segcblist_n_cbs(&per_cpu_ptr(rsp->rda,
@@ -1912,6 +1988,7 @@ static bool __note_gp_changes(struct rcu_state *rsp, struct rcu_node *rnp,
rdp->core_needs_qs = need_gp;
zero_cpu_stall_ticks(rdp);
WRITE_ONCE(rdp->gpwrap, false);
+ rcu_gpnum_ovf(rnp, rdp);
}
return ret;
}
@@ -3087,9 +3164,10 @@ __call_rcu(struct rcu_head *head, rcu_callback_t func,
* read-side critical sections have completed. call_rcu_sched() assumes
* that the read-side critical sections end on enabling of preemption
* or on voluntary preemption.
- * RCU read-side critical sections are delimited by :
- * - rcu_read_lock_sched() and rcu_read_unlock_sched(), OR
- * - anything that disables preemption.
+ * RCU read-side critical sections are delimited by:
+ *
+ * - rcu_read_lock_sched() and rcu_read_unlock_sched(), OR
+ * - anything that disables preemption.
*
* These may be nested.
*
@@ -3114,11 +3192,12 @@ EXPORT_SYMBOL_GPL(call_rcu_sched);
* handler. This means that read-side critical sections in process
* context must not be interrupted by softirqs. This interface is to be
* used when most of the read-side critical sections are in softirq context.
- * RCU read-side critical sections are delimited by :
- * - rcu_read_lock() and rcu_read_unlock(), if in interrupt context.
- * OR
- * - rcu_read_lock_bh() and rcu_read_unlock_bh(), if in process context.
- * These may be nested.
+ * RCU read-side critical sections are delimited by:
+ *
+ * - rcu_read_lock() and rcu_read_unlock(), if in interrupt context, OR
+ * - rcu_read_lock_bh() and rcu_read_unlock_bh(), if in process context.
+ *
+ * These may be nested.
*
* See the description of call_rcu() for more detailed information on
* memory ordering guarantees.
@@ -3690,6 +3769,8 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp)
rdp->cpu_no_qs.b.norm = true;
rdp->rcu_qs_ctr_snap = per_cpu(rcu_dynticks.rcu_qs_ctr, cpu);
rdp->core_needs_qs = false;
+ rdp->rcu_iw_pending = false;
+ rdp->rcu_iw_gpnum = rnp->gpnum - 1;
trace_rcu_grace_period(rsp->name, rdp->gpnum, TPS("cpuonl"));
raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
}
@@ -3727,10 +3808,24 @@ static void rcutree_affinity_setting(unsigned int cpu, int outgoing)
*/
int rcutree_online_cpu(unsigned int cpu)
{
- sync_sched_exp_online_cleanup(cpu);
- rcutree_affinity_setting(cpu, -1);
+ unsigned long flags;
+ struct rcu_data *rdp;
+ struct rcu_node *rnp;
+ struct rcu_state *rsp;
+
+ for_each_rcu_flavor(rsp) {
+ rdp = per_cpu_ptr(rsp->rda, cpu);
+ rnp = rdp->mynode;
+ raw_spin_lock_irqsave_rcu_node(rnp, flags);
+ rnp->ffmask |= rdp->grpmask;
+ raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
+ }
if (IS_ENABLED(CONFIG_TREE_SRCU))
srcu_online_cpu(cpu);
+ if (rcu_scheduler_active == RCU_SCHEDULER_INACTIVE)
+ return 0; /* Too early in boot for scheduler work. */
+ sync_sched_exp_online_cleanup(cpu);
+ rcutree_affinity_setting(cpu, -1);
return 0;
}
@@ -3740,6 +3835,19 @@ int rcutree_online_cpu(unsigned int cpu)
*/
int rcutree_offline_cpu(unsigned int cpu)
{
+ unsigned long flags;
+ struct rcu_data *rdp;
+ struct rcu_node *rnp;
+ struct rcu_state *rsp;
+
+ for_each_rcu_flavor(rsp) {
+ rdp = per_cpu_ptr(rsp->rda, cpu);
+ rnp = rdp->mynode;
+ raw_spin_lock_irqsave_rcu_node(rnp, flags);
+ rnp->ffmask &= ~rdp->grpmask;
+ raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
+ }
+
rcutree_affinity_setting(cpu, cpu);
if (IS_ENABLED(CONFIG_TREE_SRCU))
srcu_offline_cpu(cpu);
@@ -4188,8 +4296,7 @@ void __init rcu_init(void)
for_each_online_cpu(cpu) {
rcutree_prepare_cpu(cpu);
rcu_cpu_starting(cpu);
- if (IS_ENABLED(CONFIG_TREE_SRCU))
- srcu_online_cpu(cpu);
+ rcutree_online_cpu(cpu);
}
}