diff options
author | Ingo Molnar <mingo@kernel.org> | 2018-07-17 10:16:02 +0300 |
---|---|---|
committer | Ingo Molnar <mingo@kernel.org> | 2018-07-17 10:16:02 +0300 |
commit | ea73a5c6929b9c7d30b7b424414645641cb7d1d9 (patch) | |
tree | 19de3c2469c6476be82b143535a018d0053f9460 /include | |
parent | 9d3cce1e8b8561fed5f383d22a4d6949db4eadbe (diff) | |
parent | 18952651dae8efcc6d565c97f8fe5629b399cb3e (diff) | |
download | linux-ea73a5c6929b9c7d30b7b424414645641cb7d1d9.tar.xz |
Merge branch 'for-mingo' of git://git.kernel.org/pub/scm/linux/kernel/git/paulmck/linux-rcu into core/rcu
Pull RCU updates from Paul E. McKenney:
- An optimization and a fix for RCU expedited grace periods, with
the fix being from Boqun Feng.
- Miscellaneous fixes, including a lockdep-annotation fix from
Boqun Feng.
- SRCU updates.
- Updates to rcutorture and associated scripting.
- Introduce grace-period sequence numbers to the RCU-bh, RCU-preempt,
and RCU-sched flavors, replacing the old ->gpnum and ->completed
pair of fields. This change allows lockless code to obtain the
complete grace-period state with a single READ_ONCE(), which is
needed to maintain tolerable lock contention during the upcoming
consolidation of the three RCU flavors. Note that grace-period
sequence numbers are already used by rcu_barrier(), expedited
RCU grace periods, and SRCU, and are thus already heavily used
and well-tested. Joel Fernandes contributed a number of excellent
fixes and improvements.
- Clean up some grace-period-reporting loose ends, including
improving the handling of quiescent states from offline CPUs
and fixing some false-positive WARN_ON_ONCE() invocations.
(Strictly speaking, the WARN_ON_ONCE() invocations were quite
correct, but their invariants were (harmlessly) violated by the
earlier sloppy handling of quiescent states from offline CPUs.)
In addition, improve grace-period forward-progress guarantees so
as to allow removal of fail-safe checks that required otherwise
needless lock acquisitions. Finally, add more diagnostics to
help debug the upcoming consolidation of the RCU-bh, RCU-preempt,
and RCU-sched flavors.
- Additional miscellaneous fixes, including those contributed by
Byungchul Park, Mauro Carvalho Chehab, Joe Perches, Joel Fernandes,
Steven Rostedt, Andrea Parri, and Neil Brown.
- Additional torture-test changes, including several contributed by
Arnd Bergmann and Joel Fernandes.
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'include')
-rw-r--r-- | include/linux/rculist.h | 19 | ||||
-rw-r--r-- | include/linux/rcupdate.h | 20 | ||||
-rw-r--r-- | include/linux/rcutiny.h | 2 | ||||
-rw-r--r-- | include/linux/srcu.h | 17 | ||||
-rw-r--r-- | include/linux/torture.h | 4 | ||||
-rw-r--r-- | include/trace/events/rcu.h | 112 |
6 files changed, 103 insertions, 71 deletions
diff --git a/include/linux/rculist.h b/include/linux/rculist.h index 36df6ccbc874..4786c2235b98 100644 --- a/include/linux/rculist.h +++ b/include/linux/rculist.h @@ -396,7 +396,16 @@ static inline void list_splice_tail_init_rcu(struct list_head *list, * @member: the name of the list_head within the struct. * * Continue to iterate over list of given type, continuing after - * the current position. + * the current position which must have been in the list when the RCU read + * lock was taken. + * This would typically require either that you obtained the node from a + * previous walk of the list in the same RCU read-side critical section, or + * that you held some sort of non-RCU reference (such as a reference count) + * to keep the node alive *and* in the list. + * + * This iterator is similar to list_for_each_entry_from_rcu() except + * this starts after the given position and that one starts at the given + * position. */ #define list_for_each_entry_continue_rcu(pos, head, member) \ for (pos = list_entry_rcu(pos->member.next, typeof(*pos), member); \ @@ -411,6 +420,14 @@ static inline void list_splice_tail_init_rcu(struct list_head *list, * * Iterate over the tail of a list starting from a given position, * which must have been in the list when the RCU read lock was taken. + * This would typically require either that you obtained the node from a + * previous walk of the list in the same RCU read-side critical section, or + * that you held some sort of non-RCU reference (such as a reference count) + * to keep the node alive *and* in the list. + * + * This iterator is similar to list_for_each_entry_continue_rcu() except + * this starts from the given position and that one starts from the position + * after the given position. */ #define list_for_each_entry_from_rcu(pos, head, member) \ for (; &(pos)->member != (head); \ diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 65163aa0bb04..75e5b393cf44 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -64,7 +64,6 @@ void rcu_barrier_tasks(void); void __rcu_read_lock(void); void __rcu_read_unlock(void); -void rcu_read_unlock_special(struct task_struct *t); void synchronize_rcu(void); /* @@ -159,11 +158,11 @@ static inline void rcu_init_nohz(void) { } } while (0) /* - * Note a voluntary context switch for RCU-tasks benefit. This is a - * macro rather than an inline function to avoid #include hell. + * Note a quasi-voluntary context switch for RCU-tasks's benefit. + * This is a macro rather than an inline function to avoid #include hell. */ #ifdef CONFIG_TASKS_RCU -#define rcu_note_voluntary_context_switch_lite(t) \ +#define rcu_tasks_qs(t) \ do { \ if (READ_ONCE((t)->rcu_tasks_holdout)) \ WRITE_ONCE((t)->rcu_tasks_holdout, false); \ @@ -171,14 +170,14 @@ static inline void rcu_init_nohz(void) { } #define rcu_note_voluntary_context_switch(t) \ do { \ rcu_all_qs(); \ - rcu_note_voluntary_context_switch_lite(t); \ + rcu_tasks_qs(t); \ } while (0) void call_rcu_tasks(struct rcu_head *head, rcu_callback_t func); void synchronize_rcu_tasks(void); void exit_tasks_rcu_start(void); void exit_tasks_rcu_finish(void); #else /* #ifdef CONFIG_TASKS_RCU */ -#define rcu_note_voluntary_context_switch_lite(t) do { } while (0) +#define rcu_tasks_qs(t) do { } while (0) #define rcu_note_voluntary_context_switch(t) rcu_all_qs() #define call_rcu_tasks call_rcu_sched #define synchronize_rcu_tasks synchronize_sched @@ -195,8 +194,8 @@ static inline void exit_tasks_rcu_finish(void) { } */ #define cond_resched_tasks_rcu_qs() \ do { \ - if (!cond_resched()) \ - rcu_note_voluntary_context_switch_lite(current); \ + rcu_tasks_qs(current); \ + cond_resched(); \ } while (0) /* @@ -567,8 +566,8 @@ static inline void rcu_preempt_sleep_check(void) { } * This is simply an identity function, but it documents where a pointer * is handed off from RCU to some other synchronization mechanism, for * example, reference counting or locking. In C11, it would map to - * kill_dependency(). It could be used as follows: - * `` + * kill_dependency(). It could be used as follows:: + * * rcu_read_lock(); * p = rcu_dereference(gp); * long_lived = is_long_lived(p); @@ -579,7 +578,6 @@ static inline void rcu_preempt_sleep_check(void) { } * p = rcu_pointer_handoff(p); * } * rcu_read_unlock(); - *`` */ #define rcu_pointer_handoff(p) (p) diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h index 7b3c82e8a625..8d9a0ea8f0b5 100644 --- a/include/linux/rcutiny.h +++ b/include/linux/rcutiny.h @@ -93,7 +93,7 @@ static inline void kfree_call_rcu(struct rcu_head *head, #define rcu_note_context_switch(preempt) \ do { \ rcu_sched_qs(); \ - rcu_note_voluntary_context_switch_lite(current); \ + rcu_tasks_qs(current); \ } while (0) static inline int rcu_needs_cpu(u64 basemono, u64 *nextevt) diff --git a/include/linux/srcu.h b/include/linux/srcu.h index 91494d7e8e41..3e72a291c401 100644 --- a/include/linux/srcu.h +++ b/include/linux/srcu.h @@ -195,6 +195,16 @@ static inline int srcu_read_lock(struct srcu_struct *sp) __acquires(sp) return retval; } +/* Used by tracing, cannot be traced and cannot invoke lockdep. */ +static inline notrace int +srcu_read_lock_notrace(struct srcu_struct *sp) __acquires(sp) +{ + int retval; + + retval = __srcu_read_lock(sp); + return retval; +} + /** * srcu_read_unlock - unregister a old reader from an SRCU-protected structure. * @sp: srcu_struct in which to unregister the old reader. @@ -209,6 +219,13 @@ static inline void srcu_read_unlock(struct srcu_struct *sp, int idx) __srcu_read_unlock(sp, idx); } +/* Used by tracing, cannot be traced and cannot call lockdep. */ +static inline notrace void +srcu_read_unlock_notrace(struct srcu_struct *sp, int idx) __releases(sp) +{ + __srcu_read_unlock(sp, idx); +} + /** * smp_mb__after_srcu_read_unlock - ensure full ordering after srcu_read_unlock * diff --git a/include/linux/torture.h b/include/linux/torture.h index 66272862070b..61dfd93b6ee4 100644 --- a/include/linux/torture.h +++ b/include/linux/torture.h @@ -64,6 +64,8 @@ struct torture_random_state { long trs_count; }; #define DEFINE_TORTURE_RANDOM(name) struct torture_random_state name = { 0, 0 } +#define DEFINE_TORTURE_RANDOM_PERCPU(name) \ + DEFINE_PER_CPU(struct torture_random_state, name) unsigned long torture_random(struct torture_random_state *trsp); /* Task shuffler, which causes CPUs to occasionally go idle. */ @@ -79,7 +81,7 @@ void stutter_wait(const char *title); int torture_stutter_init(int s); /* Initialization and cleanup. */ -bool torture_init_begin(char *ttype, bool v); +bool torture_init_begin(char *ttype, int v); void torture_init_end(void); bool torture_cleanup_begin(void); void torture_cleanup_end(void); diff --git a/include/trace/events/rcu.h b/include/trace/events/rcu.h index 5936aac357ab..a8d07feff6a0 100644 --- a/include/trace/events/rcu.h +++ b/include/trace/events/rcu.h @@ -52,6 +52,7 @@ TRACE_EVENT(rcu_utilization, * "cpuqs": CPU passes through a quiescent state. * "cpuonl": CPU comes online. * "cpuofl": CPU goes offline. + * "cpuofl-bgp": CPU goes offline while blocking a grace period. * "reqwait": GP kthread sleeps waiting for grace-period request. * "reqwaitsig": GP kthread awakened by signal from reqwait state. * "fqswait": GP kthread waiting until time to force quiescent states. @@ -63,24 +64,24 @@ TRACE_EVENT(rcu_utilization, */ TRACE_EVENT(rcu_grace_period, - TP_PROTO(const char *rcuname, unsigned long gpnum, const char *gpevent), + TP_PROTO(const char *rcuname, unsigned long gp_seq, const char *gpevent), - TP_ARGS(rcuname, gpnum, gpevent), + TP_ARGS(rcuname, gp_seq, gpevent), TP_STRUCT__entry( __field(const char *, rcuname) - __field(unsigned long, gpnum) + __field(unsigned long, gp_seq) __field(const char *, gpevent) ), TP_fast_assign( __entry->rcuname = rcuname; - __entry->gpnum = gpnum; + __entry->gp_seq = gp_seq; __entry->gpevent = gpevent; ), TP_printk("%s %lu %s", - __entry->rcuname, __entry->gpnum, __entry->gpevent) + __entry->rcuname, __entry->gp_seq, __entry->gpevent) ); /* @@ -90,8 +91,8 @@ TRACE_EVENT(rcu_grace_period, * * "Startleaf": Request a grace period based on leaf-node data. * "Prestarted": Someone beat us to the request - * "Startedleaf": Leaf-node start proved sufficient. - * "Startedleafroot": Leaf-node start proved sufficient after checking root. + * "Startedleaf": Leaf node marked for future GP. + * "Startedleafroot": All nodes from leaf to root marked for future GP. * "Startedroot": Requested a nocb grace period based on root-node data. * "NoGPkthread": The RCU grace-period kthread has not yet started. * "StartWait": Start waiting for the requested grace period. @@ -102,17 +103,16 @@ TRACE_EVENT(rcu_grace_period, */ TRACE_EVENT(rcu_future_grace_period, - TP_PROTO(const char *rcuname, unsigned long gpnum, unsigned long completed, - unsigned long c, u8 level, int grplo, int grphi, + TP_PROTO(const char *rcuname, unsigned long gp_seq, + unsigned long gp_seq_req, u8 level, int grplo, int grphi, const char *gpevent), - TP_ARGS(rcuname, gpnum, completed, c, level, grplo, grphi, gpevent), + TP_ARGS(rcuname, gp_seq, gp_seq_req, level, grplo, grphi, gpevent), TP_STRUCT__entry( __field(const char *, rcuname) - __field(unsigned long, gpnum) - __field(unsigned long, completed) - __field(unsigned long, c) + __field(unsigned long, gp_seq) + __field(unsigned long, gp_seq_req) __field(u8, level) __field(int, grplo) __field(int, grphi) @@ -121,19 +121,17 @@ TRACE_EVENT(rcu_future_grace_period, TP_fast_assign( __entry->rcuname = rcuname; - __entry->gpnum = gpnum; - __entry->completed = completed; - __entry->c = c; + __entry->gp_seq = gp_seq; + __entry->gp_seq_req = gp_seq_req; __entry->level = level; __entry->grplo = grplo; __entry->grphi = grphi; __entry->gpevent = gpevent; ), - TP_printk("%s %lu %lu %lu %u %d %d %s", - __entry->rcuname, __entry->gpnum, __entry->completed, - __entry->c, __entry->level, __entry->grplo, __entry->grphi, - __entry->gpevent) + TP_printk("%s %lu %lu %u %d %d %s", + __entry->rcuname, __entry->gp_seq, __entry->gp_seq_req, __entry->level, + __entry->grplo, __entry->grphi, __entry->gpevent) ); /* @@ -145,14 +143,14 @@ TRACE_EVENT(rcu_future_grace_period, */ TRACE_EVENT(rcu_grace_period_init, - TP_PROTO(const char *rcuname, unsigned long gpnum, u8 level, + TP_PROTO(const char *rcuname, unsigned long gp_seq, u8 level, int grplo, int grphi, unsigned long qsmask), - TP_ARGS(rcuname, gpnum, level, grplo, grphi, qsmask), + TP_ARGS(rcuname, gp_seq, level, grplo, grphi, qsmask), TP_STRUCT__entry( __field(const char *, rcuname) - __field(unsigned long, gpnum) + __field(unsigned long, gp_seq) __field(u8, level) __field(int, grplo) __field(int, grphi) @@ -161,7 +159,7 @@ TRACE_EVENT(rcu_grace_period_init, TP_fast_assign( __entry->rcuname = rcuname; - __entry->gpnum = gpnum; + __entry->gp_seq = gp_seq; __entry->level = level; __entry->grplo = grplo; __entry->grphi = grphi; @@ -169,7 +167,7 @@ TRACE_EVENT(rcu_grace_period_init, ), TP_printk("%s %lu %u %d %d %lx", - __entry->rcuname, __entry->gpnum, __entry->level, + __entry->rcuname, __entry->gp_seq, __entry->level, __entry->grplo, __entry->grphi, __entry->qsmask) ); @@ -301,24 +299,24 @@ TRACE_EVENT(rcu_nocb_wake, */ TRACE_EVENT(rcu_preempt_task, - TP_PROTO(const char *rcuname, int pid, unsigned long gpnum), + TP_PROTO(const char *rcuname, int pid, unsigned long gp_seq), - TP_ARGS(rcuname, pid, gpnum), + TP_ARGS(rcuname, pid, gp_seq), TP_STRUCT__entry( __field(const char *, rcuname) - __field(unsigned long, gpnum) + __field(unsigned long, gp_seq) __field(int, pid) ), TP_fast_assign( __entry->rcuname = rcuname; - __entry->gpnum = gpnum; + __entry->gp_seq = gp_seq; __entry->pid = pid; ), TP_printk("%s %lu %d", - __entry->rcuname, __entry->gpnum, __entry->pid) + __entry->rcuname, __entry->gp_seq, __entry->pid) ); /* @@ -328,23 +326,23 @@ TRACE_EVENT(rcu_preempt_task, */ TRACE_EVENT(rcu_unlock_preempted_task, - TP_PROTO(const char *rcuname, unsigned long gpnum, int pid), + TP_PROTO(const char *rcuname, unsigned long gp_seq, int pid), - TP_ARGS(rcuname, gpnum, pid), + TP_ARGS(rcuname, gp_seq, pid), TP_STRUCT__entry( __field(const char *, rcuname) - __field(unsigned long, gpnum) + __field(unsigned long, gp_seq) __field(int, pid) ), TP_fast_assign( __entry->rcuname = rcuname; - __entry->gpnum = gpnum; + __entry->gp_seq = gp_seq; __entry->pid = pid; ), - TP_printk("%s %lu %d", __entry->rcuname, __entry->gpnum, __entry->pid) + TP_printk("%s %lu %d", __entry->rcuname, __entry->gp_seq, __entry->pid) ); /* @@ -357,15 +355,15 @@ TRACE_EVENT(rcu_unlock_preempted_task, */ TRACE_EVENT(rcu_quiescent_state_report, - TP_PROTO(const char *rcuname, unsigned long gpnum, + TP_PROTO(const char *rcuname, unsigned long gp_seq, unsigned long mask, unsigned long qsmask, u8 level, int grplo, int grphi, int gp_tasks), - TP_ARGS(rcuname, gpnum, mask, qsmask, level, grplo, grphi, gp_tasks), + TP_ARGS(rcuname, gp_seq, mask, qsmask, level, grplo, grphi, gp_tasks), TP_STRUCT__entry( __field(const char *, rcuname) - __field(unsigned long, gpnum) + __field(unsigned long, gp_seq) __field(unsigned long, mask) __field(unsigned long, qsmask) __field(u8, level) @@ -376,7 +374,7 @@ TRACE_EVENT(rcu_quiescent_state_report, TP_fast_assign( __entry->rcuname = rcuname; - __entry->gpnum = gpnum; + __entry->gp_seq = gp_seq; __entry->mask = mask; __entry->qsmask = qsmask; __entry->level = level; @@ -386,41 +384,41 @@ TRACE_EVENT(rcu_quiescent_state_report, ), TP_printk("%s %lu %lx>%lx %u %d %d %u", - __entry->rcuname, __entry->gpnum, + __entry->rcuname, __entry->gp_seq, __entry->mask, __entry->qsmask, __entry->level, __entry->grplo, __entry->grphi, __entry->gp_tasks) ); /* * Tracepoint for quiescent states detected by force_quiescent_state(). - * These trace events include the type of RCU, the grace-period number that - * was blocked by the CPU, the CPU itself, and the type of quiescent state, - * which can be "dti" for dyntick-idle mode, "ofl" for CPU offline, "kick" - * when kicking a CPU that has been in dyntick-idle mode for too long, or - * "rqc" if the CPU got a quiescent state via its rcu_qs_ctr. + * These trace events include the type of RCU, the grace-period number + * that was blocked by the CPU, the CPU itself, and the type of quiescent + * state, which can be "dti" for dyntick-idle mode, "kick" when kicking + * a CPU that has been in dyntick-idle mode for too long, or "rqc" if the + * CPU got a quiescent state via its rcu_qs_ctr. */ TRACE_EVENT(rcu_fqs, - TP_PROTO(const char *rcuname, unsigned long gpnum, int cpu, const char *qsevent), + TP_PROTO(const char *rcuname, unsigned long gp_seq, int cpu, const char *qsevent), - TP_ARGS(rcuname, gpnum, cpu, qsevent), + TP_ARGS(rcuname, gp_seq, cpu, qsevent), TP_STRUCT__entry( __field(const char *, rcuname) - __field(unsigned long, gpnum) + __field(unsigned long, gp_seq) __field(int, cpu) __field(const char *, qsevent) ), TP_fast_assign( __entry->rcuname = rcuname; - __entry->gpnum = gpnum; + __entry->gp_seq = gp_seq; __entry->cpu = cpu; __entry->qsevent = qsevent; ), TP_printk("%s %lu %d %s", - __entry->rcuname, __entry->gpnum, + __entry->rcuname, __entry->gp_seq, __entry->cpu, __entry->qsevent) ); @@ -753,23 +751,23 @@ TRACE_EVENT(rcu_barrier, #else /* #ifdef CONFIG_RCU_TRACE */ -#define trace_rcu_grace_period(rcuname, gpnum, gpevent) do { } while (0) -#define trace_rcu_future_grace_period(rcuname, gpnum, completed, c, \ +#define trace_rcu_grace_period(rcuname, gp_seq, gpevent) do { } while (0) +#define trace_rcu_future_grace_period(rcuname, gp_seq, gp_seq_req, \ level, grplo, grphi, event) \ do { } while (0) -#define trace_rcu_grace_period_init(rcuname, gpnum, level, grplo, grphi, \ +#define trace_rcu_grace_period_init(rcuname, gp_seq, level, grplo, grphi, \ qsmask) do { } while (0) #define trace_rcu_exp_grace_period(rcuname, gqseq, gpevent) \ do { } while (0) #define trace_rcu_exp_funnel_lock(rcuname, level, grplo, grphi, gpevent) \ do { } while (0) #define trace_rcu_nocb_wake(rcuname, cpu, reason) do { } while (0) -#define trace_rcu_preempt_task(rcuname, pid, gpnum) do { } while (0) -#define trace_rcu_unlock_preempted_task(rcuname, gpnum, pid) do { } while (0) -#define trace_rcu_quiescent_state_report(rcuname, gpnum, mask, qsmask, level, \ +#define trace_rcu_preempt_task(rcuname, pid, gp_seq) do { } while (0) +#define trace_rcu_unlock_preempted_task(rcuname, gp_seq, pid) do { } while (0) +#define trace_rcu_quiescent_state_report(rcuname, gp_seq, mask, qsmask, level, \ grplo, grphi, gp_tasks) do { } \ while (0) -#define trace_rcu_fqs(rcuname, gpnum, cpu, qsevent) do { } while (0) +#define trace_rcu_fqs(rcuname, gp_seq, cpu, qsevent) do { } while (0) #define trace_rcu_dyntick(polarity, oldnesting, newnesting, dyntick) do { } while (0) #define trace_rcu_callback(rcuname, rhp, qlen_lazy, qlen) do { } while (0) #define trace_rcu_kfree_callback(rcuname, rhp, offset, qlen_lazy, qlen) \ |