summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/RCU/trace.txt20
-rw-r--r--kernel/rcu/tree.c24
-rw-r--r--kernel/rcu/tree.h9
-rw-r--r--kernel/rcu/tree_plugin.h55
-rw-r--r--kernel/rcu/tree_trace.c3
5 files changed, 86 insertions, 25 deletions
diff --git a/Documentation/RCU/trace.txt b/Documentation/RCU/trace.txt
index f3778f8952da..b8c3c813ea57 100644
--- a/Documentation/RCU/trace.txt
+++ b/Documentation/RCU/trace.txt
@@ -396,14 +396,14 @@ o Each element of the form "3/3 ..>. 0:7 ^0" represents one rcu_node
The output of "cat rcu/rcu_sched/rcu_pending" looks as follows:
- 0!np=26111 qsp=29 rpq=5386 cbr=1 cng=570 gpc=3674 gps=577 nn=15903
- 1!np=28913 qsp=35 rpq=6097 cbr=1 cng=448 gpc=3700 gps=554 nn=18113
- 2!np=32740 qsp=37 rpq=6202 cbr=0 cng=476 gpc=4627 gps=546 nn=20889
- 3 np=23679 qsp=22 rpq=5044 cbr=1 cng=415 gpc=3403 gps=347 nn=14469
- 4!np=30714 qsp=4 rpq=5574 cbr=0 cng=528 gpc=3931 gps=639 nn=20042
- 5 np=28910 qsp=2 rpq=5246 cbr=0 cng=428 gpc=4105 gps=709 nn=18422
- 6!np=38648 qsp=5 rpq=7076 cbr=0 cng=840 gpc=4072 gps=961 nn=25699
- 7 np=37275 qsp=2 rpq=6873 cbr=0 cng=868 gpc=3416 gps=971 nn=25147
+ 0!np=26111 qsp=29 rpq=5386 cbr=1 cng=570 gpc=3674 gps=577 nn=15903 ndw=0
+ 1!np=28913 qsp=35 rpq=6097 cbr=1 cng=448 gpc=3700 gps=554 nn=18113 ndw=0
+ 2!np=32740 qsp=37 rpq=6202 cbr=0 cng=476 gpc=4627 gps=546 nn=20889 ndw=0
+ 3 np=23679 qsp=22 rpq=5044 cbr=1 cng=415 gpc=3403 gps=347 nn=14469 ndw=0
+ 4!np=30714 qsp=4 rpq=5574 cbr=0 cng=528 gpc=3931 gps=639 nn=20042 ndw=0
+ 5 np=28910 qsp=2 rpq=5246 cbr=0 cng=428 gpc=4105 gps=709 nn=18422 ndw=0
+ 6!np=38648 qsp=5 rpq=7076 cbr=0 cng=840 gpc=4072 gps=961 nn=25699 ndw=0
+ 7 np=37275 qsp=2 rpq=6873 cbr=0 cng=868 gpc=3416 gps=971 nn=25147 ndw=0
The fields are as follows:
@@ -432,6 +432,10 @@ o "gpc" is the number of times that an old grace period had
o "gps" is the number of times that a new grace period had started,
but this CPU was not yet aware of it.
+o "ndw" is the number of times that a wakeup of an rcuo
+ callback-offload kthread had to be deferred in order to avoid
+ deadlock.
+
o "nn" is the number of times that this CPU needed nothing.
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index abef9c358d47..264f0284c0bd 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -369,6 +369,9 @@ static struct rcu_node *rcu_get_root(struct rcu_state *rsp)
static void rcu_eqs_enter_common(struct rcu_dynticks *rdtp, long long oldval,
bool user)
{
+ struct rcu_state *rsp;
+ struct rcu_data *rdp;
+
trace_rcu_dyntick(TPS("Start"), oldval, rdtp->dynticks_nesting);
if (!user && !is_idle_task(current)) {
struct task_struct *idle __maybe_unused =
@@ -380,6 +383,10 @@ static void rcu_eqs_enter_common(struct rcu_dynticks *rdtp, long long oldval,
current->pid, current->comm,
idle->pid, idle->comm); /* must be idle task! */
}
+ for_each_rcu_flavor(rsp) {
+ rdp = this_cpu_ptr(rsp->rda);
+ do_nocb_deferred_wakeup(rdp);
+ }
rcu_prepare_for_idle(smp_processor_id());
/* CPUs seeing atomic_inc() must see prior RCU read-side crit sects */
smp_mb__before_atomic_inc(); /* See above. */
@@ -1928,13 +1935,13 @@ rcu_send_cbs_to_orphanage(int cpu, struct rcu_state *rsp,
* Adopt the RCU callbacks from the specified rcu_state structure's
* orphanage. The caller must hold the ->orphan_lock.
*/
-static void rcu_adopt_orphan_cbs(struct rcu_state *rsp)
+static void rcu_adopt_orphan_cbs(struct rcu_state *rsp, unsigned long flags)
{
int i;
struct rcu_data *rdp = __this_cpu_ptr(rsp->rda);
/* No-CBs CPUs are handled specially. */
- if (rcu_nocb_adopt_orphan_cbs(rsp, rdp))
+ if (rcu_nocb_adopt_orphan_cbs(rsp, rdp, flags))
return;
/* Do the accounting first. */
@@ -2013,7 +2020,7 @@ static void rcu_cleanup_dead_cpu(int cpu, struct rcu_state *rsp)
/* Orphan the dead CPU's callbacks, and adopt them if appropriate. */
rcu_send_cbs_to_orphanage(cpu, rsp, rnp, rdp);
- rcu_adopt_orphan_cbs(rsp);
+ rcu_adopt_orphan_cbs(rsp, flags);
/* Remove the outgoing CPU from the masks in the rcu_node hierarchy. */
mask = rdp->grpmask; /* rnp->grplo is constant. */
@@ -2330,6 +2337,9 @@ __rcu_process_callbacks(struct rcu_state *rsp)
/* If there are callbacks ready, invoke them. */
if (cpu_has_callbacks_ready_to_invoke(rdp))
invoke_rcu_callbacks(rsp, rdp);
+
+ /* Do any needed deferred wakeups of rcuo kthreads. */
+ do_nocb_deferred_wakeup(rdp);
}
/*
@@ -2464,7 +2474,7 @@ __call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu),
if (cpu != -1)
rdp = per_cpu_ptr(rsp->rda, cpu);
- offline = !__call_rcu_nocb(rdp, head, lazy);
+ offline = !__call_rcu_nocb(rdp, head, lazy, flags);
WARN_ON_ONCE(offline);
/* _call_rcu() is illegal on offline CPU; leak the callback. */
local_irq_restore(flags);
@@ -2817,6 +2827,12 @@ static int __rcu_pending(struct rcu_state *rsp, struct rcu_data *rdp)
return 1;
}
+ /* Does this CPU need a deferred NOCB wakeup? */
+ if (rcu_nocb_need_deferred_wakeup(rdp)) {
+ rdp->n_rp_nocb_defer_wakeup++;
+ return 1;
+ }
+
/* nothing to do */
rdp->n_rp_need_nothing++;
return 0;
diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h
index 8e34d8674a4e..a87adfc2916b 100644
--- a/kernel/rcu/tree.h
+++ b/kernel/rcu/tree.h
@@ -317,6 +317,7 @@ struct rcu_data {
unsigned long n_rp_cpu_needs_gp;
unsigned long n_rp_gp_completed;
unsigned long n_rp_gp_started;
+ unsigned long n_rp_nocb_defer_wakeup;
unsigned long n_rp_need_nothing;
/* 6) _rcu_barrier() and OOM callbacks. */
@@ -335,6 +336,7 @@ struct rcu_data {
int nocb_p_count_lazy; /* (approximate). */
wait_queue_head_t nocb_wq; /* For nocb kthreads to sleep on. */
struct task_struct *nocb_kthread;
+ bool nocb_defer_wakeup; /* Defer wakeup of nocb_kthread. */
#endif /* #ifdef CONFIG_RCU_NOCB_CPU */
/* 8) RCU CPU stall data. */
@@ -550,9 +552,12 @@ static void rcu_nocb_gp_set(struct rcu_node *rnp, int nrq);
static void rcu_nocb_gp_cleanup(struct rcu_state *rsp, struct rcu_node *rnp);
static void rcu_init_one_nocb(struct rcu_node *rnp);
static bool __call_rcu_nocb(struct rcu_data *rdp, struct rcu_head *rhp,
- bool lazy);
+ bool lazy, unsigned long flags);
static bool rcu_nocb_adopt_orphan_cbs(struct rcu_state *rsp,
- struct rcu_data *rdp);
+ struct rcu_data *rdp,
+ unsigned long flags);
+static bool rcu_nocb_need_deferred_wakeup(struct rcu_data *rdp);
+static void do_nocb_deferred_wakeup(struct rcu_data *rdp);
static void rcu_boot_init_nocb_percpu_data(struct rcu_data *rdp);
static void rcu_spawn_nocb_kthreads(struct rcu_state *rsp);
static void rcu_kick_nohz_cpu(int cpu);
diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
index b023e5407111..752ffaa0d681 100644
--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h
@@ -2104,7 +2104,8 @@ bool rcu_is_nocb_cpu(int cpu)
static void __call_rcu_nocb_enqueue(struct rcu_data *rdp,
struct rcu_head *rhp,
struct rcu_head **rhtp,
- int rhcount, int rhcount_lazy)
+ int rhcount, int rhcount_lazy,
+ unsigned long flags)
{
int len;
struct rcu_head **old_rhpp;
@@ -2125,9 +2126,16 @@ static void __call_rcu_nocb_enqueue(struct rcu_data *rdp,
}
len = atomic_long_read(&rdp->nocb_q_count);
if (old_rhpp == &rdp->nocb_head) {
- wake_up(&rdp->nocb_wq); /* ... only if queue was empty ... */
+ if (!irqs_disabled_flags(flags)) {
+ wake_up(&rdp->nocb_wq); /* ... if queue was empty ... */
+ trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu,
+ TPS("WakeEmpty"));
+ } else {
+ rdp->nocb_defer_wakeup = true;
+ trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu,
+ TPS("WakeEmptyIsDeferred"));
+ }
rdp->qlen_last_fqs_check = 0;
- trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, TPS("WakeEmpty"));
} else if (len > rdp->qlen_last_fqs_check + qhimark) {
wake_up_process(t); /* ... or if many callbacks queued. */
rdp->qlen_last_fqs_check = LONG_MAX / 2;
@@ -2148,12 +2156,12 @@ static void __call_rcu_nocb_enqueue(struct rcu_data *rdp,
* "rcuo" kthread can find it.
*/
static bool __call_rcu_nocb(struct rcu_data *rdp, struct rcu_head *rhp,
- bool lazy)
+ bool lazy, unsigned long flags)
{
if (!rcu_is_nocb_cpu(rdp->cpu))
return 0;
- __call_rcu_nocb_enqueue(rdp, rhp, &rhp->next, 1, lazy);
+ __call_rcu_nocb_enqueue(rdp, rhp, &rhp->next, 1, lazy, flags);
if (__is_kfree_rcu_offset((unsigned long)rhp->func))
trace_rcu_kfree_callback(rdp->rsp->name, rhp,
(unsigned long)rhp->func,
@@ -2171,7 +2179,8 @@ static bool __call_rcu_nocb(struct rcu_data *rdp, struct rcu_head *rhp,
* not a no-CBs CPU.
*/
static bool __maybe_unused rcu_nocb_adopt_orphan_cbs(struct rcu_state *rsp,
- struct rcu_data *rdp)
+ struct rcu_data *rdp,
+ unsigned long flags)
{
long ql = rsp->qlen;
long qll = rsp->qlen_lazy;
@@ -2185,14 +2194,14 @@ static bool __maybe_unused rcu_nocb_adopt_orphan_cbs(struct rcu_state *rsp,
/* First, enqueue the donelist, if any. This preserves CB ordering. */
if (rsp->orphan_donelist != NULL) {
__call_rcu_nocb_enqueue(rdp, rsp->orphan_donelist,
- rsp->orphan_donetail, ql, qll);
+ rsp->orphan_donetail, ql, qll, flags);
ql = qll = 0;
rsp->orphan_donelist = NULL;
rsp->orphan_donetail = &rsp->orphan_donelist;
}
if (rsp->orphan_nxtlist != NULL) {
__call_rcu_nocb_enqueue(rdp, rsp->orphan_nxtlist,
- rsp->orphan_nxttail, ql, qll);
+ rsp->orphan_nxttail, ql, qll, flags);
ql = qll = 0;
rsp->orphan_nxtlist = NULL;
rsp->orphan_nxttail = &rsp->orphan_nxtlist;
@@ -2314,6 +2323,22 @@ static int rcu_nocb_kthread(void *arg)
return 0;
}
+/* Is a deferred wakeup of rcu_nocb_kthread() required? */
+static bool rcu_nocb_need_deferred_wakeup(struct rcu_data *rdp)
+{
+ return ACCESS_ONCE(rdp->nocb_defer_wakeup);
+}
+
+/* Do a deferred wakeup of rcu_nocb_kthread(). */
+static void do_nocb_deferred_wakeup(struct rcu_data *rdp)
+{
+ if (!rcu_nocb_need_deferred_wakeup(rdp))
+ return;
+ ACCESS_ONCE(rdp->nocb_defer_wakeup) = false;
+ wake_up(&rdp->nocb_wq);
+ trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, TPS("DeferredWakeEmpty"));
+}
+
/* Initialize per-rcu_data variables for no-CBs CPUs. */
static void __init rcu_boot_init_nocb_percpu_data(struct rcu_data *rdp)
{
@@ -2369,13 +2394,14 @@ static void rcu_init_one_nocb(struct rcu_node *rnp)
}
static bool __call_rcu_nocb(struct rcu_data *rdp, struct rcu_head *rhp,
- bool lazy)
+ bool lazy, unsigned long flags)
{
return 0;
}
static bool __maybe_unused rcu_nocb_adopt_orphan_cbs(struct rcu_state *rsp,
- struct rcu_data *rdp)
+ struct rcu_data *rdp,
+ unsigned long flags)
{
return 0;
}
@@ -2384,6 +2410,15 @@ static void __init rcu_boot_init_nocb_percpu_data(struct rcu_data *rdp)
{
}
+static bool rcu_nocb_need_deferred_wakeup(struct rcu_data *rdp)
+{
+ return false;
+}
+
+static void do_nocb_deferred_wakeup(struct rcu_data *rdp)
+{
+}
+
static void __init rcu_spawn_nocb_kthreads(struct rcu_state *rsp)
{
}
diff --git a/kernel/rcu/tree_trace.c b/kernel/rcu/tree_trace.c
index 3596797b7e46..4def475336d4 100644
--- a/kernel/rcu/tree_trace.c
+++ b/kernel/rcu/tree_trace.c
@@ -364,9 +364,10 @@ static void print_one_rcu_pending(struct seq_file *m, struct rcu_data *rdp)
rdp->n_rp_report_qs,
rdp->n_rp_cb_ready,
rdp->n_rp_cpu_needs_gp);
- seq_printf(m, "gpc=%ld gps=%ld nn=%ld\n",
+ seq_printf(m, "gpc=%ld gps=%ld nn=%ld ndw%ld\n",
rdp->n_rp_gp_completed,
rdp->n_rp_gp_started,
+ rdp->n_rp_nocb_defer_wakeup,
rdp->n_rp_need_nothing);
}