From 9b2e4f1880b789be1f24f9684f7a54b90310b5c0 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 30 Sep 2011 12:10:22 -0700 Subject: rcu: Track idleness independent of idle tasks Earlier versions of RCU used the scheduling-clock tick to detect idleness by checking for the idle task, but handled idleness differently for CONFIG_NO_HZ=y. But there are now a number of uses of RCU read-side critical sections in the idle task, for example, for tracing. A more fine-grained detection of idleness is therefore required. This commit presses the old dyntick-idle code into full-time service, so that rcu_idle_enter(), previously known as rcu_enter_nohz(), is always invoked at the beginning of an idle loop iteration. Similarly, rcu_idle_exit(), previously known as rcu_exit_nohz(), is always invoked at the end of an idle-loop iteration. This allows the idle task to use RCU everywhere except between consecutive rcu_idle_enter() and rcu_idle_exit() calls, in turn allowing architecture maintainers to specify exactly where in the idle loop that RCU may be used. Because some of the userspace upcall uses can result in what looks to RCU like half of an interrupt, it is not possible to expect that the irq_enter() and irq_exit() hooks will give exact counts. This patch therefore expands the ->dynticks_nesting counter to 64 bits and uses two separate bitfields to count process/idle transitions and interrupt entry/exit transitions. It is presumed that userspace upcalls do not happen in the idle loop or from usermode execution (though usermode might do a system call that results in an upcall). The counter is hard-reset on each process/idle transition, which avoids the interrupt entry/exit error from accumulating. Overflow is avoided by the 64-bitness of the ->dyntick_nesting counter. This commit also adds warnings if a non-idle task asks RCU to enter idle state (and these checks will need some adjustment before applying Frederic's OS-jitter patches (http://lkml.org/lkml/2011/10/7/246). In addition, validation of ->dynticks and ->dynticks_nesting is added. Signed-off-by: Paul E. McKenney Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett --- include/trace/events/rcu.h | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'include/trace') diff --git a/include/trace/events/rcu.h b/include/trace/events/rcu.h index 669fbd62ec25..e5771804c507 100644 --- a/include/trace/events/rcu.h +++ b/include/trace/events/rcu.h @@ -246,19 +246,21 @@ TRACE_EVENT(rcu_fqs, */ TRACE_EVENT(rcu_dyntick, - TP_PROTO(char *polarity), + TP_PROTO(char *polarity, int nesting), - TP_ARGS(polarity), + TP_ARGS(polarity, nesting), TP_STRUCT__entry( __field(char *, polarity) + __field(int, nesting) ), TP_fast_assign( __entry->polarity = polarity; + __entry->nesting = nesting; ), - TP_printk("%s", __entry->polarity) + TP_printk("%s %d", __entry->polarity, __entry->nesting) ); /* @@ -443,7 +445,7 @@ TRACE_EVENT(rcu_batch_end, #define trace_rcu_unlock_preempted_task(rcuname, gpnum, pid) do { } while (0) #define trace_rcu_quiescent_state_report(rcuname, gpnum, mask, qsmask, level, grplo, grphi, gp_tasks) do { } while (0) #define trace_rcu_fqs(rcuname, gpnum, cpu, qsevent) do { } while (0) -#define trace_rcu_dyntick(polarity) do { } while (0) +#define trace_rcu_dyntick(polarity, nesting) do { } while (0) #define trace_rcu_callback(rcuname, rhp, qlen) do { } while (0) #define trace_rcu_kfree_callback(rcuname, rhp, offset, qlen) do { } while (0) #define trace_rcu_batch_start(rcuname, qlen, blimit) do { } while (0) -- cgit v1.2.3 From 91afaf300269aa99a4d646969b3258b74294ac4d Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Sun, 2 Oct 2011 07:44:32 -0700 Subject: rcu: Add failure tracing to rcutorture Trace the rcutorture RCU accesses and dump the trace buffer when the first failure is detected. Signed-off-by: Paul E. McKenney Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett --- include/linux/rcupdate.h | 8 ++++++++ include/trace/events/rcu.h | 26 ++++++++++++++++++++++++++ kernel/rcupdate.c | 10 ++++++++++ kernel/rcutorture.c | 18 ++++++++++++++++++ 4 files changed, 62 insertions(+) (limited to 'include/trace') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index cd1ad4b04c6d..8d315b013e37 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -51,6 +51,8 @@ extern int rcutorture_runnable; /* for sysctl */ #if defined(CONFIG_TREE_RCU) || defined(CONFIG_TREE_PREEMPT_RCU) extern void rcutorture_record_test_transition(void); extern void rcutorture_record_progress(unsigned long vernum); +extern void do_trace_rcu_torture_read(char *rcutorturename, + struct rcu_head *rhp); #else static inline void rcutorture_record_test_transition(void) { @@ -58,6 +60,12 @@ static inline void rcutorture_record_test_transition(void) static inline void rcutorture_record_progress(unsigned long vernum) { } +#ifdef CONFIG_RCU_TRACE +extern void do_trace_rcu_torture_read(char *rcutorturename, + struct rcu_head *rhp); +#else +#define do_trace_rcu_torture_read(rcutorturename, rhp) do { } while (0) +#endif #endif #define UINT_CMP_GE(a, b) (UINT_MAX / 2 >= (a) - (b)) diff --git a/include/trace/events/rcu.h b/include/trace/events/rcu.h index e5771804c507..172620a92b1a 100644 --- a/include/trace/events/rcu.h +++ b/include/trace/events/rcu.h @@ -437,6 +437,31 @@ TRACE_EVENT(rcu_batch_end, __entry->rcuname, __entry->callbacks_invoked) ); +/* + * Tracepoint for rcutorture readers. The first argument is the name + * of the RCU flavor from rcutorture's viewpoint and the second argument + * is the callback address. + */ +TRACE_EVENT(rcu_torture_read, + + TP_PROTO(char *rcutorturename, struct rcu_head *rhp), + + TP_ARGS(rcutorturename, rhp), + + TP_STRUCT__entry( + __field(char *, rcutorturename) + __field(struct rcu_head *, rhp) + ), + + TP_fast_assign( + __entry->rcutorturename = rcutorturename; + __entry->rhp = rhp; + ), + + TP_printk("%s torture read %p", + __entry->rcutorturename, __entry->rhp) +); + #else /* #ifdef CONFIG_RCU_TRACE */ #define trace_rcu_grace_period(rcuname, gpnum, gpevent) do { } while (0) @@ -452,6 +477,7 @@ TRACE_EVENT(rcu_batch_end, #define trace_rcu_invoke_callback(rcuname, rhp) do { } while (0) #define trace_rcu_invoke_kfree_callback(rcuname, rhp, offset) do { } while (0) #define trace_rcu_batch_end(rcuname, callbacks_invoked) do { } while (0) +#define trace_rcu_torture_read(rcutorturename, rhp) do { } while (0) #endif /* #else #ifdef CONFIG_RCU_TRACE */ diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c index c5b98e565aee..92e771d7b44b 100644 --- a/kernel/rcupdate.c +++ b/kernel/rcupdate.c @@ -316,3 +316,13 @@ struct debug_obj_descr rcuhead_debug_descr = { }; EXPORT_SYMBOL_GPL(rcuhead_debug_descr); #endif /* #ifdef CONFIG_DEBUG_OBJECTS_RCU_HEAD */ + +#if defined(CONFIG_TREE_RCU) || defined(CONFIG_TREE_PREEMPT_RCU) || defined(CONFIG_RCU_TRACE) +void do_trace_rcu_torture_read(char *rcutorturename, struct rcu_head *rhp) +{ + trace_rcu_torture_read(rcutorturename, rhp); +} +EXPORT_SYMBOL_GPL(do_trace_rcu_torture_read); +#else +#define do_trace_rcu_torture_read(rcutorturename, rhp) do { } while (0) +#endif diff --git a/kernel/rcutorture.c b/kernel/rcutorture.c index 764825c2685c..df35228e743b 100644 --- a/kernel/rcutorture.c +++ b/kernel/rcutorture.c @@ -913,6 +913,18 @@ rcu_torture_fakewriter(void *arg) return 0; } +void rcutorture_trace_dump(void) +{ + static atomic_t beenhere = ATOMIC_INIT(0); + + if (atomic_read(&beenhere)) + return; + if (atomic_xchg(&beenhere, 1) != 0) + return; + do_trace_rcu_torture_read(cur_ops->name, (struct rcu_head *)~0UL); + ftrace_dump(DUMP_ALL); +} + /* * RCU torture reader from timer handler. Dereferences rcu_torture_current, * incrementing the corresponding element of the pipeline array. The @@ -934,6 +946,7 @@ static void rcu_torture_timer(unsigned long unused) rcu_read_lock_bh_held() || rcu_read_lock_sched_held() || srcu_read_lock_held(&srcu_ctl)); + do_trace_rcu_torture_read(cur_ops->name, &p->rtort_rcu); if (p == NULL) { /* Leave because rcu_torture_writer is not yet underway */ cur_ops->readunlock(idx); @@ -951,6 +964,8 @@ static void rcu_torture_timer(unsigned long unused) /* Should not happen, but... */ pipe_count = RCU_TORTURE_PIPE_LEN; } + if (pipe_count > 1) + rcutorture_trace_dump(); __this_cpu_inc(rcu_torture_count[pipe_count]); completed = cur_ops->completed() - completed; if (completed > RCU_TORTURE_PIPE_LEN) { @@ -994,6 +1009,7 @@ rcu_torture_reader(void *arg) rcu_read_lock_bh_held() || rcu_read_lock_sched_held() || srcu_read_lock_held(&srcu_ctl)); + do_trace_rcu_torture_read(cur_ops->name, &p->rtort_rcu); if (p == NULL) { /* Wait for rcu_torture_writer to get underway */ cur_ops->readunlock(idx); @@ -1009,6 +1025,8 @@ rcu_torture_reader(void *arg) /* Should not happen, but... */ pipe_count = RCU_TORTURE_PIPE_LEN; } + if (pipe_count > 1) + rcutorture_trace_dump(); __this_cpu_inc(rcu_torture_count[pipe_count]); completed = cur_ops->completed() - completed; if (completed > RCU_TORTURE_PIPE_LEN) { -- cgit v1.2.3 From 4145fa7fbee3ec1e61c52825b146192885d9759f Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 31 Oct 2011 15:01:54 -0700 Subject: rcu: Deconfuse dynticks entry-exit tracing The trace_rcu_dyntick() trace event did not print both the old and the new value of the nesting level, and furthermore printed only the low-order 32 bits of it. This could result in some confusion when interpreting trace-event dumps, so this commit prints both the old and the new value, prints the full 64 bits, and also selects the process-entry/exit increment to print nicely in hexadecimal. Signed-off-by: Paul E. McKenney Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett --- include/trace/events/rcu.h | 15 +++++++++------ kernel/rcu.h | 7 +++++++ kernel/rcutiny.c | 28 +++++++++++++++++----------- kernel/rcutree.c | 35 ++++++++++++++++++++--------------- 4 files changed, 53 insertions(+), 32 deletions(-) (limited to 'include/trace') diff --git a/include/trace/events/rcu.h b/include/trace/events/rcu.h index 172620a92b1a..c29fb2f55909 100644 --- a/include/trace/events/rcu.h +++ b/include/trace/events/rcu.h @@ -246,21 +246,24 @@ TRACE_EVENT(rcu_fqs, */ TRACE_EVENT(rcu_dyntick, - TP_PROTO(char *polarity, int nesting), + TP_PROTO(char *polarity, long long oldnesting, long long newnesting), - TP_ARGS(polarity, nesting), + TP_ARGS(polarity, oldnesting, newnesting), TP_STRUCT__entry( __field(char *, polarity) - __field(int, nesting) + __field(long long, oldnesting) + __field(long long, newnesting) ), TP_fast_assign( __entry->polarity = polarity; - __entry->nesting = nesting; + __entry->oldnesting = oldnesting; + __entry->newnesting = newnesting; ), - TP_printk("%s %d", __entry->polarity, __entry->nesting) + TP_printk("%s %llx %llx", __entry->polarity, + __entry->oldnesting, __entry->newnesting) ); /* @@ -470,7 +473,7 @@ TRACE_EVENT(rcu_torture_read, #define trace_rcu_unlock_preempted_task(rcuname, gpnum, pid) do { } while (0) #define trace_rcu_quiescent_state_report(rcuname, gpnum, mask, qsmask, level, grplo, grphi, gp_tasks) do { } while (0) #define trace_rcu_fqs(rcuname, gpnum, cpu, qsevent) do { } while (0) -#define trace_rcu_dyntick(polarity, nesting) do { } while (0) +#define trace_rcu_dyntick(polarity, oldnesting, newnesting) do { } while (0) #define trace_rcu_callback(rcuname, rhp, qlen) do { } while (0) #define trace_rcu_kfree_callback(rcuname, rhp, offset, qlen) do { } while (0) #define trace_rcu_batch_start(rcuname, qlen, blimit) do { } while (0) diff --git a/kernel/rcu.h b/kernel/rcu.h index f600868d550d..aa88baab5f78 100644 --- a/kernel/rcu.h +++ b/kernel/rcu.h @@ -29,6 +29,13 @@ #define RCU_TRACE(stmt) #endif /* #else #ifdef CONFIG_RCU_TRACE */ +/* + * Process-level increment to ->dynticks_nesting field. This allows for + * architectures that use half-interrupts and half-exceptions from + * process context. + */ +#define DYNTICK_TASK_NESTING (LLONG_MAX / 2 - 1) + /* * debug_rcu_head_queue()/debug_rcu_head_unqueue() are used internally * by call_rcu() and rcu callback execution, and are therefore not part of the diff --git a/kernel/rcutiny.c b/kernel/rcutiny.c index b4e0b4981768..9b9bdf666fb5 100644 --- a/kernel/rcutiny.c +++ b/kernel/rcutiny.c @@ -53,20 +53,21 @@ static void __call_rcu(struct rcu_head *head, #include "rcutiny_plugin.h" -static long long rcu_dynticks_nesting = LLONG_MAX / 2; +static long long rcu_dynticks_nesting = DYNTICK_TASK_NESTING; /* Common code for rcu_idle_enter() and rcu_irq_exit(), see kernel/rcutree.c. */ -static void rcu_idle_enter_common(void) +static void rcu_idle_enter_common(long long oldval) { if (rcu_dynticks_nesting) { - RCU_TRACE(trace_rcu_dyntick("--=", rcu_dynticks_nesting)); + RCU_TRACE(trace_rcu_dyntick("--=", + oldval, rcu_dynticks_nesting)); return; } - RCU_TRACE(trace_rcu_dyntick("Start", rcu_dynticks_nesting)); + RCU_TRACE(trace_rcu_dyntick("Start", oldval, rcu_dynticks_nesting)); if (!idle_cpu(smp_processor_id())) { WARN_ON_ONCE(1); /* must be idle task! */ RCU_TRACE(trace_rcu_dyntick("Error on entry: not idle task", - rcu_dynticks_nesting)); + oldval, rcu_dynticks_nesting)); ftrace_dump(DUMP_ALL); } rcu_sched_qs(0); /* implies rcu_bh_qsctr_inc(0) */ @@ -79,10 +80,12 @@ static void rcu_idle_enter_common(void) void rcu_idle_enter(void) { unsigned long flags; + long long oldval; local_irq_save(flags); + oldval = rcu_dynticks_nesting; rcu_dynticks_nesting = 0; - rcu_idle_enter_common(); + rcu_idle_enter_common(oldval); local_irq_restore(flags); } @@ -92,11 +95,13 @@ void rcu_idle_enter(void) void rcu_irq_exit(void) { unsigned long flags; + long long oldval; local_irq_save(flags); + oldval = rcu_dynticks_nesting; rcu_dynticks_nesting--; WARN_ON_ONCE(rcu_dynticks_nesting < 0); - rcu_idle_enter_common(); + rcu_idle_enter_common(oldval); local_irq_restore(flags); } @@ -104,14 +109,15 @@ void rcu_irq_exit(void) static void rcu_idle_exit_common(long long oldval) { if (oldval) { - RCU_TRACE(trace_rcu_dyntick("++=", rcu_dynticks_nesting)); + RCU_TRACE(trace_rcu_dyntick("++=", + oldval, rcu_dynticks_nesting)); return; } - RCU_TRACE(trace_rcu_dyntick("End", oldval)); + RCU_TRACE(trace_rcu_dyntick("End", oldval, rcu_dynticks_nesting)); if (!idle_cpu(smp_processor_id())) { WARN_ON_ONCE(1); /* must be idle task! */ RCU_TRACE(trace_rcu_dyntick("Error on exit: not idle task", - oldval)); + oldval, rcu_dynticks_nesting)); ftrace_dump(DUMP_ALL); } } @@ -127,7 +133,7 @@ void rcu_idle_exit(void) local_irq_save(flags); oldval = rcu_dynticks_nesting; WARN_ON_ONCE(oldval != 0); - rcu_dynticks_nesting = LLONG_MAX / 2; + rcu_dynticks_nesting = DYNTICK_TASK_NESTING; rcu_idle_exit_common(oldval); local_irq_restore(flags); } diff --git a/kernel/rcutree.c b/kernel/rcutree.c index 489b62a67d35..06e40dd53b23 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -196,7 +196,7 @@ void rcu_note_context_switch(int cpu) EXPORT_SYMBOL_GPL(rcu_note_context_switch); DEFINE_PER_CPU(struct rcu_dynticks, rcu_dynticks) = { - .dynticks_nesting = LLONG_MAX / 2, + .dynticks_nesting = DYNTICK_TASK_NESTING, .dynticks = ATOMIC_INIT(1), }; @@ -348,17 +348,17 @@ static int rcu_implicit_offline_qs(struct rcu_data *rdp) * we really have entered idle, and must do the appropriate accounting. * The caller must have disabled interrupts. */ -static void rcu_idle_enter_common(struct rcu_dynticks *rdtp) +static void rcu_idle_enter_common(struct rcu_dynticks *rdtp, long long oldval) { if (rdtp->dynticks_nesting) { - trace_rcu_dyntick("--=", rdtp->dynticks_nesting); + trace_rcu_dyntick("--=", oldval, rdtp->dynticks_nesting); return; } - trace_rcu_dyntick("Start", rdtp->dynticks_nesting); + trace_rcu_dyntick("Start", oldval, rdtp->dynticks_nesting); if (!idle_cpu(smp_processor_id())) { WARN_ON_ONCE(1); /* must be idle task! */ trace_rcu_dyntick("Error on entry: not idle task", - rdtp->dynticks_nesting); + oldval, rdtp->dynticks_nesting); ftrace_dump(DUMP_ALL); } /* CPUs seeing atomic_inc() must see prior RCU read-side crit sects */ @@ -383,12 +383,14 @@ static void rcu_idle_enter_common(struct rcu_dynticks *rdtp) void rcu_idle_enter(void) { unsigned long flags; + long long oldval; struct rcu_dynticks *rdtp; local_irq_save(flags); rdtp = &__get_cpu_var(rcu_dynticks); + oldval = rdtp->dynticks_nesting; rdtp->dynticks_nesting = 0; - rcu_idle_enter_common(rdtp); + rcu_idle_enter_common(rdtp, oldval); local_irq_restore(flags); } @@ -411,13 +413,15 @@ void rcu_idle_enter(void) void rcu_irq_exit(void) { unsigned long flags; + long long oldval; struct rcu_dynticks *rdtp; local_irq_save(flags); rdtp = &__get_cpu_var(rcu_dynticks); + oldval = rdtp->dynticks_nesting; rdtp->dynticks_nesting--; WARN_ON_ONCE(rdtp->dynticks_nesting < 0); - rcu_idle_enter_common(rdtp); + rcu_idle_enter_common(rdtp, oldval); local_irq_restore(flags); } @@ -431,7 +435,7 @@ void rcu_irq_exit(void) static void rcu_idle_exit_common(struct rcu_dynticks *rdtp, long long oldval) { if (oldval) { - trace_rcu_dyntick("++=", rdtp->dynticks_nesting); + trace_rcu_dyntick("++=", oldval, rdtp->dynticks_nesting); return; } smp_mb__before_atomic_inc(); /* Force ordering w/previous sojourn. */ @@ -439,10 +443,11 @@ static void rcu_idle_exit_common(struct rcu_dynticks *rdtp, long long oldval) /* CPUs seeing atomic_inc() must see later RCU read-side crit sects */ smp_mb__after_atomic_inc(); /* See above. */ WARN_ON_ONCE(!(atomic_read(&rdtp->dynticks) & 0x1)); - trace_rcu_dyntick("End", oldval); + trace_rcu_dyntick("End", oldval, rdtp->dynticks_nesting); if (!idle_cpu(smp_processor_id())) { WARN_ON_ONCE(1); /* must be idle task! */ - trace_rcu_dyntick("Error on exit: not idle task", oldval); + trace_rcu_dyntick("Error on exit: not idle task", + oldval, rdtp->dynticks_nesting); ftrace_dump(DUMP_ALL); } } @@ -453,8 +458,8 @@ static void rcu_idle_exit_common(struct rcu_dynticks *rdtp, long long oldval) * Exit idle mode, in other words, -enter- the mode in which RCU * read-side critical sections can occur. * - * We crowbar the ->dynticks_nesting field to LLONG_MAX/2 to allow for - * the possibility of usermode upcalls messing up our count + * We crowbar the ->dynticks_nesting field to DYNTICK_TASK_NESTING to + * allow for the possibility of usermode upcalls messing up our count * of interrupt nesting level during the busy period that is just * now starting. */ @@ -468,7 +473,7 @@ void rcu_idle_exit(void) rdtp = &__get_cpu_var(rcu_dynticks); oldval = rdtp->dynticks_nesting; WARN_ON_ONCE(oldval != 0); - rdtp->dynticks_nesting = LLONG_MAX / 2; + rdtp->dynticks_nesting = DYNTICK_TASK_NESTING; rcu_idle_exit_common(rdtp, oldval); local_irq_restore(flags); } @@ -2012,7 +2017,7 @@ rcu_boot_init_percpu_data(int cpu, struct rcu_state *rsp) rdp->nxttail[i] = &rdp->nxtlist; rdp->qlen = 0; rdp->dynticks = &per_cpu(rcu_dynticks, cpu); - WARN_ON_ONCE(rdp->dynticks->dynticks_nesting != LLONG_MAX / 2); + WARN_ON_ONCE(rdp->dynticks->dynticks_nesting != DYNTICK_TASK_NESTING); WARN_ON_ONCE(atomic_read(&rdp->dynticks->dynticks) != 1); rdp->cpu = cpu; rdp->rsp = rsp; @@ -2040,7 +2045,7 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp, int preemptible) rdp->qlen_last_fqs_check = 0; rdp->n_force_qs_snap = rsp->n_force_qs; rdp->blimit = blimit; - WARN_ON_ONCE(rdp->dynticks->dynticks_nesting != LLONG_MAX / 2); + WARN_ON_ONCE(rdp->dynticks->dynticks_nesting != DYNTICK_TASK_NESTING); WARN_ON_ONCE((atomic_read(&rdp->dynticks->dynticks) & 0x1) != 1); raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ -- cgit v1.2.3 From 045fb9315a2129023d70a0eecf0942e18fca4fcd Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 22 Nov 2011 12:13:03 -0800 Subject: rcu: Update trace_rcu_dyntick() header comment This commit updates the trace_rcu_dyntick() header comment to reflect events added by commit 4b4f421. Signed-off-by: Paul E. McKenney Signed-off-by: Paul E. McKenney --- include/trace/events/rcu.h | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) (limited to 'include/trace') diff --git a/include/trace/events/rcu.h b/include/trace/events/rcu.h index c29fb2f55909..7f6877a35051 100644 --- a/include/trace/events/rcu.h +++ b/include/trace/events/rcu.h @@ -241,8 +241,16 @@ TRACE_EVENT(rcu_fqs, /* * Tracepoint for dyntick-idle entry/exit events. These take a string - * as argument: "Start" for entering dyntick-idle mode and "End" for - * leaving it. + * as argument: "Start" for entering dyntick-idle mode, "End" for + * leaving it, "--=" for events moving towards idle, and "++=" for events + * moving away from idle. "Error on entry: not idle task" and "Error on + * exit: not idle task" indicate that a non-idle task is erroneously + * toying with the idle loop. + * + * These events also take a pair of numbers, which indicate the nesting + * depth before and after the event of interest. Note that task-related + * events use the upper bits of each number, while interrupt-related + * events use the lower bits. */ TRACE_EVENT(rcu_dyntick, -- cgit v1.2.3 From 433cdddcd9ac5558068edd7f8d4707a70f7710f5 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 22 Nov 2011 14:58:03 -0800 Subject: rcu: Add tracing for RCU_FAST_NO_HZ This commit adds trace_rcu_prep_idle(), which is invoked from rcu_prepare_for_idle() and rcu_wake_cpu() to trace attempts on the part of RCU to force CPUs into dyntick-idle mode. Signed-off-by: Paul E. McKenney Signed-off-by: Paul E. McKenney --- include/trace/events/rcu.h | 37 +++++++++++++++++++++++++++++++++++++ kernel/rcutree_plugin.h | 18 +++++++++++++++--- 2 files changed, 52 insertions(+), 3 deletions(-) (limited to 'include/trace') diff --git a/include/trace/events/rcu.h b/include/trace/events/rcu.h index 7f6877a35051..debe453c9623 100644 --- a/include/trace/events/rcu.h +++ b/include/trace/events/rcu.h @@ -274,6 +274,42 @@ TRACE_EVENT(rcu_dyntick, __entry->oldnesting, __entry->newnesting) ); +/* + * Tracepoint for RCU preparation for idle, the goal being to get RCU + * processing done so that the current CPU can shut off its scheduling + * clock and enter dyntick-idle mode. One way to accomplish this is + * to drain all RCU callbacks from this CPU, and the other is to have + * done everything RCU requires for the current grace period. In this + * latter case, the CPU will be awakened at the end of the current grace + * period in order to process the remainder of its callbacks. + * + * These tracepoints take a string as argument: + * + * "No callbacks": Nothing to do, no callbacks on this CPU. + * "In holdoff": Nothing to do, holding off after unsuccessful attempt. + * "Dyntick with callbacks": Callbacks remain, but RCU doesn't need CPU. + * "Begin holdoff": Attempt failed, don't retry until next jiffy. + * "More callbacks": Still more callbacks, try again to clear them out. + * "Callbacks drained": All callbacks processed, off to dyntick idle! + * "CPU awakened at GP end": + */ +TRACE_EVENT(rcu_prep_idle, + + TP_PROTO(char *reason), + + TP_ARGS(reason), + + TP_STRUCT__entry( + __field(char *, reason) + ), + + TP_fast_assign( + __entry->reason = reason; + ), + + TP_printk("%s", __entry->reason) +); + /* * Tracepoint for the registration of a single RCU callback function. * The first argument is the type of RCU, the second argument is @@ -482,6 +518,7 @@ TRACE_EVENT(rcu_torture_read, #define trace_rcu_quiescent_state_report(rcuname, gpnum, mask, qsmask, level, grplo, grphi, gp_tasks) do { } while (0) #define trace_rcu_fqs(rcuname, gpnum, cpu, qsevent) do { } while (0) #define trace_rcu_dyntick(polarity, oldnesting, newnesting) do { } while (0) +#define trace_rcu_prep_idle(reason) do { } while (0) #define trace_rcu_callback(rcuname, rhp, qlen) do { } while (0) #define trace_rcu_kfree_callback(rcuname, rhp, offset, qlen) do { } while (0) #define trace_rcu_batch_start(rcuname, qlen, blimit) do { } while (0) diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h index b70ca8cc52e1..6467f5669ab7 100644 --- a/kernel/rcutree_plugin.h +++ b/kernel/rcutree_plugin.h @@ -2031,10 +2031,13 @@ static void rcu_prepare_for_idle(int cpu) /* If no callbacks or in the holdoff period, enter dyntick-idle. */ if (!rcu_cpu_has_callbacks(cpu)) { per_cpu(rcu_dyntick_holdoff, cpu) = jiffies - 1; + trace_rcu_prep_idle("No callbacks"); return; } - if (per_cpu(rcu_dyntick_holdoff, cpu) == jiffies) + if (per_cpu(rcu_dyntick_holdoff, cpu) == jiffies) { + trace_rcu_prep_idle("In holdoff"); return; + } /* Check and update the rcu_dyntick_drain sequencing. */ if (per_cpu(rcu_dyntick_drain, cpu) <= 0) { @@ -2044,9 +2047,11 @@ static void rcu_prepare_for_idle(int cpu) /* We have hit the limit, so time to give up. */ per_cpu(rcu_dyntick_holdoff, cpu) = jiffies; if (!rcu_pending(cpu)) { + trace_rcu_prep_idle("Dyntick with callbacks"); per_cpu(rcu_awake_at_gp_end, cpu) = 1; return; /* Nothing to do immediately. */ } + trace_rcu_prep_idle("Begin holdoff"); invoke_rcu_core(); /* Force the CPU out of dyntick-idle. */ return; } @@ -2073,9 +2078,15 @@ static void rcu_prepare_for_idle(int cpu) c = c || per_cpu(rcu_bh_data, cpu).nxtlist; } - /* If RCU callbacks are still pending, RCU still needs this CPU. */ - if (c) + /* + * If RCU callbacks are still pending, RCU still needs this CPU. + * So try forcing the callbacks through the grace period. + */ + if (c) { + trace_rcu_prep_idle("More callbacks"); invoke_rcu_core(); + } else + trace_rcu_prep_idle("Callbacks drained"); } /* @@ -2085,6 +2096,7 @@ static void rcu_prepare_for_idle(int cpu) */ static void rcu_wake_cpu(void *unused) { + trace_rcu_prep_idle("CPU awakened at GP end"); invoke_rcu_core(); } -- cgit v1.2.3 From f535a607c13c7b674e0788ca5765779aa74a01c3 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 22 Nov 2011 20:43:02 -0800 Subject: rcu: Eliminate RCU_FAST_NO_HZ grace-period hang With the new implementation of RCU_FAST_NO_HZ, it was possible to hang RCU grace periods as follows: o CPU 0 attempts to go idle, cycles several times through the rcu_prepare_for_idle() loop, then goes dyntick-idle when RCU needs nothing more from it, while still having at least on RCU callback pending. o CPU 1 goes idle with no callbacks. Both CPUs can then stay in dyntick-idle mode indefinitely, preventing the RCU grace period from ever completing, possibly hanging the system. This commit therefore prevents CPUs that have RCU callbacks from entering dyntick-idle mode. This approach also eliminates the need for the end-of-grace-period IPIs used previously. Signed-off-by: Paul E. McKenney Signed-off-by: Paul E. McKenney --- include/trace/events/rcu.h | 1 - kernel/rcutree.c | 2 -- kernel/rcutree.h | 3 -- kernel/rcutree_plugin.h | 78 ++-------------------------------------------- 4 files changed, 2 insertions(+), 82 deletions(-) (limited to 'include/trace') diff --git a/include/trace/events/rcu.h b/include/trace/events/rcu.h index debe453c9623..8dd6fcb94946 100644 --- a/include/trace/events/rcu.h +++ b/include/trace/events/rcu.h @@ -287,7 +287,6 @@ TRACE_EVENT(rcu_dyntick, * * "No callbacks": Nothing to do, no callbacks on this CPU. * "In holdoff": Nothing to do, holding off after unsuccessful attempt. - * "Dyntick with callbacks": Callbacks remain, but RCU doesn't need CPU. * "Begin holdoff": Attempt failed, don't retry until next jiffy. * "More callbacks": Still more callbacks, try again to clear them out. * "Callbacks drained": All callbacks processed, off to dyntick idle! diff --git a/kernel/rcutree.c b/kernel/rcutree.c index 7fb8b0e60811..13fab4a9f9fb 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -1086,7 +1086,6 @@ static void rcu_report_qs_rsp(struct rcu_state *rsp, unsigned long flags) * callbacks are waiting on the grace period that just now * completed. */ - rcu_schedule_wake_gp_end(); if (*rdp->nxttail[RCU_WAIT_TAIL] == NULL) { raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ @@ -1672,7 +1671,6 @@ static void rcu_process_callbacks(struct softirq_action *unused) &__get_cpu_var(rcu_sched_data)); __rcu_process_callbacks(&rcu_bh_state, &__get_cpu_var(rcu_bh_data)); rcu_preempt_process_callbacks(); - rcu_wake_cpus_for_gp_end(); trace_rcu_utilization("End RCU core"); } diff --git a/kernel/rcutree.h b/kernel/rcutree.h index ea32405177c9..70d8a557090f 100644 --- a/kernel/rcutree.h +++ b/kernel/rcutree.h @@ -88,7 +88,6 @@ struct rcu_dynticks { /* Process level is worth LLONG_MAX/2. */ int dynticks_nmi_nesting; /* Track NMI nesting level. */ atomic_t dynticks; /* Even value for idle, else odd. */ - int wake_gp_end; /* A GP ended, need to wake up CPUs. */ }; /* RCU's kthread states for tracing. */ @@ -469,7 +468,5 @@ static void rcu_yield(void (*f)(unsigned long), unsigned long arg); static void rcu_cpu_kthread_setrt(int cpu, int to_rt); static void __cpuinit rcu_prepare_kthreads(int cpu); static void rcu_prepare_for_idle(int cpu); -static void rcu_wake_cpus_for_gp_end(void); -static void rcu_schedule_wake_gp_end(void); #endif /* #ifndef RCU_TREE_NONCORE */ diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h index c4daf1e19e01..3d84dbc113d6 100644 --- a/kernel/rcutree_plugin.h +++ b/kernel/rcutree_plugin.h @@ -1964,28 +1964,11 @@ static void rcu_prepare_for_idle(int cpu) { } -/* - * CPUs are never putting themselves to sleep with callbacks pending, - * so there is no need to awaken them. - */ -static void rcu_wake_cpus_for_gp_end(void) -{ -} - -/* - * CPUs are never putting themselves to sleep with callbacks pending, - * so there is no need to schedule the act of awakening them. - */ -static void rcu_schedule_wake_gp_end(void) -{ -} - #else /* #if !defined(CONFIG_RCU_FAST_NO_HZ) */ #define RCU_NEEDS_CPU_FLUSHES 5 static DEFINE_PER_CPU(int, rcu_dyntick_drain); static DEFINE_PER_CPU(unsigned long, rcu_dyntick_holdoff); -static DEFINE_PER_CPU(bool, rcu_awake_at_gp_end); /* * Allow the CPU to enter dyntick-idle mode if either: (1) There are no @@ -2032,26 +2015,16 @@ static void rcu_prepare_for_idle(int cpu) local_irq_save(flags); /* - * If there are no callbacks on this CPU or if RCU has no further - * need for this CPU at the moment, enter dyntick-idle mode. - * Also reset state so as to not prejudice later attempts. + * If there are no callbacks on this CPU, enter dyntick-idle mode. + * Also reset state to avoid prejudicing later attempts. */ if (!rcu_cpu_has_callbacks(cpu)) { per_cpu(rcu_dyntick_holdoff, cpu) = jiffies - 1; per_cpu(rcu_dyntick_drain, cpu) = 0; - per_cpu(rcu_awake_at_gp_end, cpu) = 0; local_irq_restore(flags); trace_rcu_prep_idle("No callbacks"); return; } - if (!rcu_pending(cpu)) { - per_cpu(rcu_dyntick_holdoff, cpu) = jiffies - 1; - per_cpu(rcu_dyntick_drain, cpu) = 0; - per_cpu(rcu_awake_at_gp_end, cpu) = 1; - local_irq_restore(flags); - trace_rcu_prep_idle("Dyntick with callbacks"); - return; /* Nothing to do immediately. */ - } /* * If in holdoff mode, just return. We will presumably have @@ -2067,7 +2040,6 @@ static void rcu_prepare_for_idle(int cpu) if (per_cpu(rcu_dyntick_drain, cpu) <= 0) { /* First time through, initialize the counter. */ per_cpu(rcu_dyntick_drain, cpu) = RCU_NEEDS_CPU_FLUSHES; - per_cpu(rcu_awake_at_gp_end, cpu) = 0; } else if (--per_cpu(rcu_dyntick_drain, cpu) <= 0) { /* We have hit the limit, so time to give up. */ per_cpu(rcu_dyntick_holdoff, cpu) = jiffies; @@ -2113,50 +2085,4 @@ static void rcu_prepare_for_idle(int cpu) } } -/* - * Wake up a CPU by invoking the RCU core. Intended for use by - * rcu_wake_cpus_for_gp_end(), which passes this function to - * smp_call_function_single(). - */ -static void rcu_wake_cpu(void *unused) -{ - trace_rcu_prep_idle("CPU awakened at GP end"); - invoke_rcu_core(); -} - -/* - * If an RCU grace period ended recently, scan the rcu_awake_at_gp_end - * per-CPU variables, and wake up any CPUs that requested a wakeup. - */ -static void rcu_wake_cpus_for_gp_end(void) -{ - int cpu; - struct rcu_dynticks *rdtp = &__get_cpu_var(rcu_dynticks); - - if (!rdtp->wake_gp_end) - return; - rdtp->wake_gp_end = 0; - for_each_online_cpu(cpu) { - if (per_cpu(rcu_awake_at_gp_end, cpu)) { - per_cpu(rcu_awake_at_gp_end, cpu) = 0; - smp_call_function_single(cpu, rcu_wake_cpu, NULL, 0); - } - } -} - -/* - * A grace period has just ended, and so we will need to awaken CPUs - * that now have work to do. But we cannot send IPIs with interrupts - * disabled, so just set a flag so that this will happen upon exit - * from RCU core processing. - */ -static void rcu_schedule_wake_gp_end(void) -{ - struct rcu_dynticks *rdtp = &__get_cpu_var(rcu_dynticks); - - rdtp->wake_gp_end = 1; -} - -/* @@@ need tracing as well. */ - #endif /* #else #if !defined(CONFIG_RCU_FAST_NO_HZ) */ -- cgit v1.2.3 From 7cb92499000e3c86dae653077b1465458a039ef6 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 28 Nov 2011 12:28:34 -0800 Subject: rcu: Permit dyntick-idle with callbacks pending The current implementation of RCU_FAST_NO_HZ prevents CPUs from entering dyntick-idle state if they have RCU callbacks pending. Unfortunately, this has the side-effect of often preventing them from entering this state, especially if at least one other CPU is not in dyntick-idle state. However, the resulting per-tick wakeup is wasteful in many cases: if the CPU has already fully responded to the current RCU grace period, there will be nothing for it to do until this grace period ends, which will frequently take several jiffies. This commit therefore permits a CPU that has done everything that the current grace period has asked of it (rcu_pending() == 0) even if it still as RCU callbacks pending. However, such a CPU posts a timer to wake it up several jiffies later (6 jiffies, based on experience with grace-period lengths). This wakeup is required to handle situations that can result in all CPUs being in dyntick-idle mode, thus failing to ever complete the current grace period. If a CPU wakes up before the timer goes off, then it cancels that timer, thus avoiding spurious wakeups. Signed-off-by: Paul E. McKenney Signed-off-by: Paul E. McKenney --- include/trace/events/rcu.h | 3 +- kernel/rcutree.c | 3 ++ kernel/rcutree.h | 2 ++ kernel/rcutree_plugin.h | 75 +++++++++++++++++++++++++++++++++++++++++++--- 4 files changed, 78 insertions(+), 5 deletions(-) (limited to 'include/trace') diff --git a/include/trace/events/rcu.h b/include/trace/events/rcu.h index 8dd6fcb94946..c75418c3ccb8 100644 --- a/include/trace/events/rcu.h +++ b/include/trace/events/rcu.h @@ -288,9 +288,10 @@ TRACE_EVENT(rcu_dyntick, * "No callbacks": Nothing to do, no callbacks on this CPU. * "In holdoff": Nothing to do, holding off after unsuccessful attempt. * "Begin holdoff": Attempt failed, don't retry until next jiffy. + * "Dyntick with callbacks": Entering dyntick-idle despite callbacks. * "More callbacks": Still more callbacks, try again to clear them out. * "Callbacks drained": All callbacks processed, off to dyntick idle! - * "CPU awakened at GP end": + * "Timer": Timer fired to cause CPU to continue processing callbacks. */ TRACE_EVENT(rcu_prep_idle, diff --git a/kernel/rcutree.c b/kernel/rcutree.c index 69bb37287cc8..bf085d7f6a3f 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -448,6 +448,7 @@ static void rcu_idle_exit_common(struct rcu_dynticks *rdtp, long long oldval) /* CPUs seeing atomic_inc() must see later RCU read-side crit sects */ smp_mb__after_atomic_inc(); /* See above. */ WARN_ON_ONCE(!(atomic_read(&rdtp->dynticks) & 0x1)); + rcu_cleanup_after_idle(smp_processor_id()); trace_rcu_dyntick("End", oldval, rdtp->dynticks_nesting); if (!is_idle_task(current)) { struct task_struct *idle = idle_task(smp_processor_id()); @@ -2057,6 +2058,7 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp, int preemptible) rdp->dynticks->dynticks_nesting = DYNTICK_TASK_NESTING; atomic_set(&rdp->dynticks->dynticks, (atomic_read(&rdp->dynticks->dynticks) & ~0x1) + 1); + rcu_prepare_for_idle_init(cpu); raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ /* @@ -2138,6 +2140,7 @@ static int __cpuinit rcu_cpu_notify(struct notifier_block *self, rcu_send_cbs_to_online(&rcu_bh_state); rcu_send_cbs_to_online(&rcu_sched_state); rcu_preempt_send_cbs_to_online(); + rcu_cleanup_after_idle(cpu); break; case CPU_DEAD: case CPU_DEAD_FROZEN: diff --git a/kernel/rcutree.h b/kernel/rcutree.h index 70d8a557090f..9bcfbc9d16c6 100644 --- a/kernel/rcutree.h +++ b/kernel/rcutree.h @@ -467,6 +467,8 @@ static void rcu_yield(void (*f)(unsigned long), unsigned long arg); #endif /* #ifdef CONFIG_RCU_BOOST */ static void rcu_cpu_kthread_setrt(int cpu, int to_rt); static void __cpuinit rcu_prepare_kthreads(int cpu); +static void rcu_prepare_for_idle_init(int cpu); +static void rcu_cleanup_after_idle(int cpu); static void rcu_prepare_for_idle(int cpu); #endif /* #ifndef RCU_TREE_NONCORE */ diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h index 42ca5a400ae3..dbcea6b93aea 100644 --- a/kernel/rcutree_plugin.h +++ b/kernel/rcutree_plugin.h @@ -1947,15 +1947,29 @@ EXPORT_SYMBOL_GPL(synchronize_sched_expedited); * 1 if so. This function is part of the RCU implementation; it is -not- * an exported member of the RCU API. * - * Because we have preemptible RCU, just check whether this CPU needs - * any flavor of RCU. Do not chew up lots of CPU cycles with preemption - * disabled in a most-likely vain attempt to cause RCU not to need this CPU. + * Because we not have RCU_FAST_NO_HZ, just check whether this CPU needs + * any flavor of RCU. */ int rcu_needs_cpu(int cpu) { return rcu_cpu_has_callbacks(cpu); } +/* + * Because we do not have RCU_FAST_NO_HZ, don't bother initializing for it. + */ +static void rcu_prepare_for_idle_init(int cpu) +{ +} + +/* + * Because we do not have RCU_FAST_NO_HZ, don't bother cleaning up + * after it. + */ +static void rcu_cleanup_after_idle(int cpu) +{ +} + /* * Do the idle-entry grace-period work, which, because CONFIG_RCU_FAST_NO_HZ=y, * is nothing. @@ -1966,9 +1980,12 @@ static void rcu_prepare_for_idle(int cpu) #else /* #if !defined(CONFIG_RCU_FAST_NO_HZ) */ -#define RCU_NEEDS_CPU_FLUSHES 5 +#define RCU_NEEDS_CPU_FLUSHES 5 /* Allow for callback self-repost. */ +#define RCU_IDLE_GP_DELAY 6 /* Roughly one grace period. */ static DEFINE_PER_CPU(int, rcu_dyntick_drain); static DEFINE_PER_CPU(unsigned long, rcu_dyntick_holdoff); +static DEFINE_PER_CPU(struct hrtimer, rcu_idle_gp_timer); +static ktime_t rcu_idle_gp_wait; /* * Allow the CPU to enter dyntick-idle mode if either: (1) There are no @@ -1988,6 +2005,47 @@ int rcu_needs_cpu(int cpu) return per_cpu(rcu_dyntick_holdoff, cpu) == jiffies; } +/* + * Timer handler used to force CPU to start pushing its remaining RCU + * callbacks in the case where it entered dyntick-idle mode with callbacks + * pending. The hander doesn't really need to do anything because the + * real work is done upon re-entry to idle, or by the next scheduling-clock + * interrupt should idle not be re-entered. + */ +static enum hrtimer_restart rcu_idle_gp_timer_func(struct hrtimer *hrtp) +{ + trace_rcu_prep_idle("Timer"); + return HRTIMER_NORESTART; +} + +/* + * Initialize the timer used to pull CPUs out of dyntick-idle mode. + */ +static void rcu_prepare_for_idle_init(int cpu) +{ + static int firsttime = 1; + struct hrtimer *hrtp = &per_cpu(rcu_idle_gp_timer, cpu); + + hrtimer_init(hrtp, CLOCK_MONOTONIC, HRTIMER_MODE_REL); + hrtp->function = rcu_idle_gp_timer_func; + if (firsttime) { + unsigned int upj = jiffies_to_usecs(RCU_IDLE_GP_DELAY); + + rcu_idle_gp_wait = ns_to_ktime(upj * (u64)1000); + firsttime = 0; + } +} + +/* + * Clean up for exit from idle. Because we are exiting from idle, there + * is no longer any point to rcu_idle_gp_timer, so cancel it. This will + * do nothing if this timer is not active, so just cancel it unconditionally. + */ +static void rcu_cleanup_after_idle(int cpu) +{ + hrtimer_cancel(&per_cpu(rcu_idle_gp_timer, cpu)); +} + /* * Check to see if any RCU-related work can be done by the current CPU, * and if so, schedule a softirq to get it done. This function is part @@ -2040,6 +2098,15 @@ static void rcu_prepare_for_idle(int cpu) /* First time through, initialize the counter. */ per_cpu(rcu_dyntick_drain, cpu) = RCU_NEEDS_CPU_FLUSHES; } else if (--per_cpu(rcu_dyntick_drain, cpu) <= 0) { + /* Can we go dyntick-idle despite still having callbacks? */ + if (!rcu_pending(cpu)) { + trace_rcu_prep_idle("Dyntick with callbacks"); + per_cpu(rcu_dyntick_holdoff, cpu) = jiffies - 1; + hrtimer_start(&per_cpu(rcu_idle_gp_timer, cpu), + rcu_idle_gp_wait, HRTIMER_MODE_REL); + return; /* Nothing more to do immediately. */ + } + /* We have hit the limit, so time to give up. */ per_cpu(rcu_dyntick_holdoff, cpu) = jiffies; local_irq_restore(flags); -- cgit v1.2.3 From 4968c300e1fa5389fdf1f1ebd8b8e4aec9aa4a9e Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 7 Dec 2011 16:32:40 -0800 Subject: rcu: Augment rcu_batch_end tracing for idle and callback state The current rcu_batch_end event trace records only the name of the RCU flavor and the total number of callbacks that remain queued on the current CPU. This is insufficient for testing and tuning the new dyntick-idle RCU_FAST_NO_HZ code, so this commit adds idle state along with whether or not any of the callbacks that were ready to invoke at the beginning of rcu_do_batch() are still queued. Signed-off-by: Paul E. McKenney Signed-off-by: Paul E. McKenney --- include/trace/events/rcu.h | 38 +++++++++++++++++++++++++++++--------- kernel/rcutiny.c | 10 ++++++++-- kernel/rcutiny_plugin.h | 25 +++++++++++++++++++++++++ kernel/rcutree.c | 8 ++++++-- 4 files changed, 68 insertions(+), 13 deletions(-) (limited to 'include/trace') diff --git a/include/trace/events/rcu.h b/include/trace/events/rcu.h index c75418c3ccb8..d2d88bed891b 100644 --- a/include/trace/events/rcu.h +++ b/include/trace/events/rcu.h @@ -461,27 +461,46 @@ TRACE_EVENT(rcu_invoke_kfree_callback, /* * Tracepoint for exiting rcu_do_batch after RCU callbacks have been - * invoked. The first argument is the name of the RCU flavor and - * the second argument is number of callbacks actually invoked. + * invoked. The first argument is the name of the RCU flavor, + * the second argument is number of callbacks actually invoked, + * the third argument (cb) is whether or not any of the callbacks that + * were ready to invoke at the beginning of this batch are still + * queued, the fourth argument (nr) is the return value of need_resched(), + * the fifth argument (iit) is 1 if the current task is the idle task, + * and the sixth argument (risk) is the return value from + * rcu_is_callbacks_kthread(). */ TRACE_EVENT(rcu_batch_end, - TP_PROTO(char *rcuname, int callbacks_invoked), + TP_PROTO(char *rcuname, int callbacks_invoked, + bool cb, bool nr, bool iit, bool risk), - TP_ARGS(rcuname, callbacks_invoked), + TP_ARGS(rcuname, callbacks_invoked, cb, nr, iit, risk), TP_STRUCT__entry( __field(char *, rcuname) __field(int, callbacks_invoked) + __field(bool, cb) + __field(bool, nr) + __field(bool, iit) + __field(bool, risk) ), TP_fast_assign( __entry->rcuname = rcuname; __entry->callbacks_invoked = callbacks_invoked; - ), - - TP_printk("%s CBs-invoked=%d", - __entry->rcuname, __entry->callbacks_invoked) + __entry->cb = cb; + __entry->nr = nr; + __entry->iit = iit; + __entry->risk = risk; + ), + + TP_printk("%s CBs-invoked=%d idle=%c%c%c%c", + __entry->rcuname, __entry->callbacks_invoked, + __entry->cb ? 'C' : '.', + __entry->nr ? 'S' : '.', + __entry->iit ? 'I' : '.', + __entry->risk ? 'R' : '.') ); /* @@ -524,7 +543,8 @@ TRACE_EVENT(rcu_torture_read, #define trace_rcu_batch_start(rcuname, qlen, blimit) do { } while (0) #define trace_rcu_invoke_callback(rcuname, rhp) do { } while (0) #define trace_rcu_invoke_kfree_callback(rcuname, rhp, offset) do { } while (0) -#define trace_rcu_batch_end(rcuname, callbacks_invoked) do { } while (0) +#define trace_rcu_batch_end(rcuname, callbacks_invoked, cb, nr, iit, risk) \ + do { } while (0) #define trace_rcu_torture_read(rcutorturename, rhp) do { } while (0) #endif /* #else #ifdef CONFIG_RCU_TRACE */ diff --git a/kernel/rcutiny.c b/kernel/rcutiny.c index e5bd94954fa3..977296dca0a4 100644 --- a/kernel/rcutiny.c +++ b/kernel/rcutiny.c @@ -259,7 +259,11 @@ static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp) /* If no RCU callbacks ready to invoke, just return. */ if (&rcp->rcucblist == rcp->donetail) { RCU_TRACE(trace_rcu_batch_start(rcp->name, 0, -1)); - RCU_TRACE(trace_rcu_batch_end(rcp->name, 0)); + RCU_TRACE(trace_rcu_batch_end(rcp->name, 0, + ACCESS_ONCE(rcp->rcucblist), + need_resched(), + is_idle_task(current), + rcu_is_callbacks_kthread())); return; } @@ -288,7 +292,9 @@ static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp) RCU_TRACE(cb_count++); } RCU_TRACE(rcu_trace_sub_qlen(rcp, cb_count)); - RCU_TRACE(trace_rcu_batch_end(rcp->name, cb_count)); + RCU_TRACE(trace_rcu_batch_end(rcp->name, cb_count, 0, need_resched(), + is_idle_task(current), + rcu_is_callbacks_kthread())); } static void rcu_process_callbacks(struct softirq_action *unused) diff --git a/kernel/rcutiny_plugin.h b/kernel/rcutiny_plugin.h index 2b0484a5dc28..dfa97cbb3910 100644 --- a/kernel/rcutiny_plugin.h +++ b/kernel/rcutiny_plugin.h @@ -885,6 +885,19 @@ static void invoke_rcu_callbacks(void) wake_up(&rcu_kthread_wq); } +#ifdef CONFIG_RCU_TRACE + +/* + * Is the current CPU running the RCU-callbacks kthread? + * Caller must have preemption disabled. + */ +static bool rcu_is_callbacks_kthread(void) +{ + return rcu_kthread_task == current; +} + +#endif /* #ifdef CONFIG_RCU_TRACE */ + /* * This kthread invokes RCU callbacks whose grace periods have * elapsed. It is awakened as needed, and takes the place of the @@ -938,6 +951,18 @@ void invoke_rcu_callbacks(void) raise_softirq(RCU_SOFTIRQ); } +#ifdef CONFIG_RCU_TRACE + +/* + * There is no callback kthread, so this thread is never it. + */ +static bool rcu_is_callbacks_kthread(void) +{ + return false; +} + +#endif /* #ifdef CONFIG_RCU_TRACE */ + void rcu_init(void) { open_softirq(RCU_SOFTIRQ, rcu_process_callbacks); diff --git a/kernel/rcutree.c b/kernel/rcutree.c index 2b2e1a996a65..6c4a6722abfd 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -1373,7 +1373,9 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp) /* If no callbacks are ready, just return.*/ if (!cpu_has_callbacks_ready_to_invoke(rdp)) { trace_rcu_batch_start(rsp->name, 0, 0); - trace_rcu_batch_end(rsp->name, 0); + trace_rcu_batch_end(rsp->name, 0, !!ACCESS_ONCE(rdp->nxtlist), + need_resched(), is_idle_task(current), + rcu_is_callbacks_kthread()); return; } @@ -1409,7 +1411,9 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp) } local_irq_save(flags); - trace_rcu_batch_end(rsp->name, count); + trace_rcu_batch_end(rsp->name, count, !!list, need_resched(), + is_idle_task(current), + rcu_is_callbacks_kthread()); /* Update count, and requeue any remaining callbacks. */ rdp->qlen -= count; -- cgit v1.2.3