summaryrefslogtreecommitdiff
path: root/include
diff options
context:
space:
mode:
authorIngo Molnar <mingo@kernel.org>2018-07-17 10:16:02 +0300
committerIngo Molnar <mingo@kernel.org>2018-07-17 10:16:02 +0300
commitea73a5c6929b9c7d30b7b424414645641cb7d1d9 (patch)
tree19de3c2469c6476be82b143535a018d0053f9460 /include
parent9d3cce1e8b8561fed5f383d22a4d6949db4eadbe (diff)
parent18952651dae8efcc6d565c97f8fe5629b399cb3e (diff)
downloadlinux-ea73a5c6929b9c7d30b7b424414645641cb7d1d9.tar.xz
Merge branch 'for-mingo' of git://git.kernel.org/pub/scm/linux/kernel/git/paulmck/linux-rcu into core/rcu
Pull RCU updates from Paul E. McKenney: - An optimization and a fix for RCU expedited grace periods, with the fix being from Boqun Feng. - Miscellaneous fixes, including a lockdep-annotation fix from Boqun Feng. - SRCU updates. - Updates to rcutorture and associated scripting. - Introduce grace-period sequence numbers to the RCU-bh, RCU-preempt, and RCU-sched flavors, replacing the old ->gpnum and ->completed pair of fields. This change allows lockless code to obtain the complete grace-period state with a single READ_ONCE(), which is needed to maintain tolerable lock contention during the upcoming consolidation of the three RCU flavors. Note that grace-period sequence numbers are already used by rcu_barrier(), expedited RCU grace periods, and SRCU, and are thus already heavily used and well-tested. Joel Fernandes contributed a number of excellent fixes and improvements. - Clean up some grace-period-reporting loose ends, including improving the handling of quiescent states from offline CPUs and fixing some false-positive WARN_ON_ONCE() invocations. (Strictly speaking, the WARN_ON_ONCE() invocations were quite correct, but their invariants were (harmlessly) violated by the earlier sloppy handling of quiescent states from offline CPUs.) In addition, improve grace-period forward-progress guarantees so as to allow removal of fail-safe checks that required otherwise needless lock acquisitions. Finally, add more diagnostics to help debug the upcoming consolidation of the RCU-bh, RCU-preempt, and RCU-sched flavors. - Additional miscellaneous fixes, including those contributed by Byungchul Park, Mauro Carvalho Chehab, Joe Perches, Joel Fernandes, Steven Rostedt, Andrea Parri, and Neil Brown. - Additional torture-test changes, including several contributed by Arnd Bergmann and Joel Fernandes. Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'include')
-rw-r--r--include/linux/rculist.h19
-rw-r--r--include/linux/rcupdate.h20
-rw-r--r--include/linux/rcutiny.h2
-rw-r--r--include/linux/srcu.h17
-rw-r--r--include/linux/torture.h4
-rw-r--r--include/trace/events/rcu.h112
6 files changed, 103 insertions, 71 deletions
diff --git a/include/linux/rculist.h b/include/linux/rculist.h
index 36df6ccbc874..4786c2235b98 100644
--- a/include/linux/rculist.h
+++ b/include/linux/rculist.h
@@ -396,7 +396,16 @@ static inline void list_splice_tail_init_rcu(struct list_head *list,
* @member: the name of the list_head within the struct.
*
* Continue to iterate over list of given type, continuing after
- * the current position.
+ * the current position which must have been in the list when the RCU read
+ * lock was taken.
+ * This would typically require either that you obtained the node from a
+ * previous walk of the list in the same RCU read-side critical section, or
+ * that you held some sort of non-RCU reference (such as a reference count)
+ * to keep the node alive *and* in the list.
+ *
+ * This iterator is similar to list_for_each_entry_from_rcu() except
+ * this starts after the given position and that one starts at the given
+ * position.
*/
#define list_for_each_entry_continue_rcu(pos, head, member) \
for (pos = list_entry_rcu(pos->member.next, typeof(*pos), member); \
@@ -411,6 +420,14 @@ static inline void list_splice_tail_init_rcu(struct list_head *list,
*
* Iterate over the tail of a list starting from a given position,
* which must have been in the list when the RCU read lock was taken.
+ * This would typically require either that you obtained the node from a
+ * previous walk of the list in the same RCU read-side critical section, or
+ * that you held some sort of non-RCU reference (such as a reference count)
+ * to keep the node alive *and* in the list.
+ *
+ * This iterator is similar to list_for_each_entry_continue_rcu() except
+ * this starts from the given position and that one starts from the position
+ * after the given position.
*/
#define list_for_each_entry_from_rcu(pos, head, member) \
for (; &(pos)->member != (head); \
diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 65163aa0bb04..75e5b393cf44 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -64,7 +64,6 @@ void rcu_barrier_tasks(void);
void __rcu_read_lock(void);
void __rcu_read_unlock(void);
-void rcu_read_unlock_special(struct task_struct *t);
void synchronize_rcu(void);
/*
@@ -159,11 +158,11 @@ static inline void rcu_init_nohz(void) { }
} while (0)
/*
- * Note a voluntary context switch for RCU-tasks benefit. This is a
- * macro rather than an inline function to avoid #include hell.
+ * Note a quasi-voluntary context switch for RCU-tasks's benefit.
+ * This is a macro rather than an inline function to avoid #include hell.
*/
#ifdef CONFIG_TASKS_RCU
-#define rcu_note_voluntary_context_switch_lite(t) \
+#define rcu_tasks_qs(t) \
do { \
if (READ_ONCE((t)->rcu_tasks_holdout)) \
WRITE_ONCE((t)->rcu_tasks_holdout, false); \
@@ -171,14 +170,14 @@ static inline void rcu_init_nohz(void) { }
#define rcu_note_voluntary_context_switch(t) \
do { \
rcu_all_qs(); \
- rcu_note_voluntary_context_switch_lite(t); \
+ rcu_tasks_qs(t); \
} while (0)
void call_rcu_tasks(struct rcu_head *head, rcu_callback_t func);
void synchronize_rcu_tasks(void);
void exit_tasks_rcu_start(void);
void exit_tasks_rcu_finish(void);
#else /* #ifdef CONFIG_TASKS_RCU */
-#define rcu_note_voluntary_context_switch_lite(t) do { } while (0)
+#define rcu_tasks_qs(t) do { } while (0)
#define rcu_note_voluntary_context_switch(t) rcu_all_qs()
#define call_rcu_tasks call_rcu_sched
#define synchronize_rcu_tasks synchronize_sched
@@ -195,8 +194,8 @@ static inline void exit_tasks_rcu_finish(void) { }
*/
#define cond_resched_tasks_rcu_qs() \
do { \
- if (!cond_resched()) \
- rcu_note_voluntary_context_switch_lite(current); \
+ rcu_tasks_qs(current); \
+ cond_resched(); \
} while (0)
/*
@@ -567,8 +566,8 @@ static inline void rcu_preempt_sleep_check(void) { }
* This is simply an identity function, but it documents where a pointer
* is handed off from RCU to some other synchronization mechanism, for
* example, reference counting or locking. In C11, it would map to
- * kill_dependency(). It could be used as follows:
- * ``
+ * kill_dependency(). It could be used as follows::
+ *
* rcu_read_lock();
* p = rcu_dereference(gp);
* long_lived = is_long_lived(p);
@@ -579,7 +578,6 @@ static inline void rcu_preempt_sleep_check(void) { }
* p = rcu_pointer_handoff(p);
* }
* rcu_read_unlock();
- *``
*/
#define rcu_pointer_handoff(p) (p)
diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h
index 7b3c82e8a625..8d9a0ea8f0b5 100644
--- a/include/linux/rcutiny.h
+++ b/include/linux/rcutiny.h
@@ -93,7 +93,7 @@ static inline void kfree_call_rcu(struct rcu_head *head,
#define rcu_note_context_switch(preempt) \
do { \
rcu_sched_qs(); \
- rcu_note_voluntary_context_switch_lite(current); \
+ rcu_tasks_qs(current); \
} while (0)
static inline int rcu_needs_cpu(u64 basemono, u64 *nextevt)
diff --git a/include/linux/srcu.h b/include/linux/srcu.h
index 91494d7e8e41..3e72a291c401 100644
--- a/include/linux/srcu.h
+++ b/include/linux/srcu.h
@@ -195,6 +195,16 @@ static inline int srcu_read_lock(struct srcu_struct *sp) __acquires(sp)
return retval;
}
+/* Used by tracing, cannot be traced and cannot invoke lockdep. */
+static inline notrace int
+srcu_read_lock_notrace(struct srcu_struct *sp) __acquires(sp)
+{
+ int retval;
+
+ retval = __srcu_read_lock(sp);
+ return retval;
+}
+
/**
* srcu_read_unlock - unregister a old reader from an SRCU-protected structure.
* @sp: srcu_struct in which to unregister the old reader.
@@ -209,6 +219,13 @@ static inline void srcu_read_unlock(struct srcu_struct *sp, int idx)
__srcu_read_unlock(sp, idx);
}
+/* Used by tracing, cannot be traced and cannot call lockdep. */
+static inline notrace void
+srcu_read_unlock_notrace(struct srcu_struct *sp, int idx) __releases(sp)
+{
+ __srcu_read_unlock(sp, idx);
+}
+
/**
* smp_mb__after_srcu_read_unlock - ensure full ordering after srcu_read_unlock
*
diff --git a/include/linux/torture.h b/include/linux/torture.h
index 66272862070b..61dfd93b6ee4 100644
--- a/include/linux/torture.h
+++ b/include/linux/torture.h
@@ -64,6 +64,8 @@ struct torture_random_state {
long trs_count;
};
#define DEFINE_TORTURE_RANDOM(name) struct torture_random_state name = { 0, 0 }
+#define DEFINE_TORTURE_RANDOM_PERCPU(name) \
+ DEFINE_PER_CPU(struct torture_random_state, name)
unsigned long torture_random(struct torture_random_state *trsp);
/* Task shuffler, which causes CPUs to occasionally go idle. */
@@ -79,7 +81,7 @@ void stutter_wait(const char *title);
int torture_stutter_init(int s);
/* Initialization and cleanup. */
-bool torture_init_begin(char *ttype, bool v);
+bool torture_init_begin(char *ttype, int v);
void torture_init_end(void);
bool torture_cleanup_begin(void);
void torture_cleanup_end(void);
diff --git a/include/trace/events/rcu.h b/include/trace/events/rcu.h
index 5936aac357ab..a8d07feff6a0 100644
--- a/include/trace/events/rcu.h
+++ b/include/trace/events/rcu.h
@@ -52,6 +52,7 @@ TRACE_EVENT(rcu_utilization,
* "cpuqs": CPU passes through a quiescent state.
* "cpuonl": CPU comes online.
* "cpuofl": CPU goes offline.
+ * "cpuofl-bgp": CPU goes offline while blocking a grace period.
* "reqwait": GP kthread sleeps waiting for grace-period request.
* "reqwaitsig": GP kthread awakened by signal from reqwait state.
* "fqswait": GP kthread waiting until time to force quiescent states.
@@ -63,24 +64,24 @@ TRACE_EVENT(rcu_utilization,
*/
TRACE_EVENT(rcu_grace_period,
- TP_PROTO(const char *rcuname, unsigned long gpnum, const char *gpevent),
+ TP_PROTO(const char *rcuname, unsigned long gp_seq, const char *gpevent),
- TP_ARGS(rcuname, gpnum, gpevent),
+ TP_ARGS(rcuname, gp_seq, gpevent),
TP_STRUCT__entry(
__field(const char *, rcuname)
- __field(unsigned long, gpnum)
+ __field(unsigned long, gp_seq)
__field(const char *, gpevent)
),
TP_fast_assign(
__entry->rcuname = rcuname;
- __entry->gpnum = gpnum;
+ __entry->gp_seq = gp_seq;
__entry->gpevent = gpevent;
),
TP_printk("%s %lu %s",
- __entry->rcuname, __entry->gpnum, __entry->gpevent)
+ __entry->rcuname, __entry->gp_seq, __entry->gpevent)
);
/*
@@ -90,8 +91,8 @@ TRACE_EVENT(rcu_grace_period,
*
* "Startleaf": Request a grace period based on leaf-node data.
* "Prestarted": Someone beat us to the request
- * "Startedleaf": Leaf-node start proved sufficient.
- * "Startedleafroot": Leaf-node start proved sufficient after checking root.
+ * "Startedleaf": Leaf node marked for future GP.
+ * "Startedleafroot": All nodes from leaf to root marked for future GP.
* "Startedroot": Requested a nocb grace period based on root-node data.
* "NoGPkthread": The RCU grace-period kthread has not yet started.
* "StartWait": Start waiting for the requested grace period.
@@ -102,17 +103,16 @@ TRACE_EVENT(rcu_grace_period,
*/
TRACE_EVENT(rcu_future_grace_period,
- TP_PROTO(const char *rcuname, unsigned long gpnum, unsigned long completed,
- unsigned long c, u8 level, int grplo, int grphi,
+ TP_PROTO(const char *rcuname, unsigned long gp_seq,
+ unsigned long gp_seq_req, u8 level, int grplo, int grphi,
const char *gpevent),
- TP_ARGS(rcuname, gpnum, completed, c, level, grplo, grphi, gpevent),
+ TP_ARGS(rcuname, gp_seq, gp_seq_req, level, grplo, grphi, gpevent),
TP_STRUCT__entry(
__field(const char *, rcuname)
- __field(unsigned long, gpnum)
- __field(unsigned long, completed)
- __field(unsigned long, c)
+ __field(unsigned long, gp_seq)
+ __field(unsigned long, gp_seq_req)
__field(u8, level)
__field(int, grplo)
__field(int, grphi)
@@ -121,19 +121,17 @@ TRACE_EVENT(rcu_future_grace_period,
TP_fast_assign(
__entry->rcuname = rcuname;
- __entry->gpnum = gpnum;
- __entry->completed = completed;
- __entry->c = c;
+ __entry->gp_seq = gp_seq;
+ __entry->gp_seq_req = gp_seq_req;
__entry->level = level;
__entry->grplo = grplo;
__entry->grphi = grphi;
__entry->gpevent = gpevent;
),
- TP_printk("%s %lu %lu %lu %u %d %d %s",
- __entry->rcuname, __entry->gpnum, __entry->completed,
- __entry->c, __entry->level, __entry->grplo, __entry->grphi,
- __entry->gpevent)
+ TP_printk("%s %lu %lu %u %d %d %s",
+ __entry->rcuname, __entry->gp_seq, __entry->gp_seq_req, __entry->level,
+ __entry->grplo, __entry->grphi, __entry->gpevent)
);
/*
@@ -145,14 +143,14 @@ TRACE_EVENT(rcu_future_grace_period,
*/
TRACE_EVENT(rcu_grace_period_init,
- TP_PROTO(const char *rcuname, unsigned long gpnum, u8 level,
+ TP_PROTO(const char *rcuname, unsigned long gp_seq, u8 level,
int grplo, int grphi, unsigned long qsmask),
- TP_ARGS(rcuname, gpnum, level, grplo, grphi, qsmask),
+ TP_ARGS(rcuname, gp_seq, level, grplo, grphi, qsmask),
TP_STRUCT__entry(
__field(const char *, rcuname)
- __field(unsigned long, gpnum)
+ __field(unsigned long, gp_seq)
__field(u8, level)
__field(int, grplo)
__field(int, grphi)
@@ -161,7 +159,7 @@ TRACE_EVENT(rcu_grace_period_init,
TP_fast_assign(
__entry->rcuname = rcuname;
- __entry->gpnum = gpnum;
+ __entry->gp_seq = gp_seq;
__entry->level = level;
__entry->grplo = grplo;
__entry->grphi = grphi;
@@ -169,7 +167,7 @@ TRACE_EVENT(rcu_grace_period_init,
),
TP_printk("%s %lu %u %d %d %lx",
- __entry->rcuname, __entry->gpnum, __entry->level,
+ __entry->rcuname, __entry->gp_seq, __entry->level,
__entry->grplo, __entry->grphi, __entry->qsmask)
);
@@ -301,24 +299,24 @@ TRACE_EVENT(rcu_nocb_wake,
*/
TRACE_EVENT(rcu_preempt_task,
- TP_PROTO(const char *rcuname, int pid, unsigned long gpnum),
+ TP_PROTO(const char *rcuname, int pid, unsigned long gp_seq),
- TP_ARGS(rcuname, pid, gpnum),
+ TP_ARGS(rcuname, pid, gp_seq),
TP_STRUCT__entry(
__field(const char *, rcuname)
- __field(unsigned long, gpnum)
+ __field(unsigned long, gp_seq)
__field(int, pid)
),
TP_fast_assign(
__entry->rcuname = rcuname;
- __entry->gpnum = gpnum;
+ __entry->gp_seq = gp_seq;
__entry->pid = pid;
),
TP_printk("%s %lu %d",
- __entry->rcuname, __entry->gpnum, __entry->pid)
+ __entry->rcuname, __entry->gp_seq, __entry->pid)
);
/*
@@ -328,23 +326,23 @@ TRACE_EVENT(rcu_preempt_task,
*/
TRACE_EVENT(rcu_unlock_preempted_task,
- TP_PROTO(const char *rcuname, unsigned long gpnum, int pid),
+ TP_PROTO(const char *rcuname, unsigned long gp_seq, int pid),
- TP_ARGS(rcuname, gpnum, pid),
+ TP_ARGS(rcuname, gp_seq, pid),
TP_STRUCT__entry(
__field(const char *, rcuname)
- __field(unsigned long, gpnum)
+ __field(unsigned long, gp_seq)
__field(int, pid)
),
TP_fast_assign(
__entry->rcuname = rcuname;
- __entry->gpnum = gpnum;
+ __entry->gp_seq = gp_seq;
__entry->pid = pid;
),
- TP_printk("%s %lu %d", __entry->rcuname, __entry->gpnum, __entry->pid)
+ TP_printk("%s %lu %d", __entry->rcuname, __entry->gp_seq, __entry->pid)
);
/*
@@ -357,15 +355,15 @@ TRACE_EVENT(rcu_unlock_preempted_task,
*/
TRACE_EVENT(rcu_quiescent_state_report,
- TP_PROTO(const char *rcuname, unsigned long gpnum,
+ TP_PROTO(const char *rcuname, unsigned long gp_seq,
unsigned long mask, unsigned long qsmask,
u8 level, int grplo, int grphi, int gp_tasks),
- TP_ARGS(rcuname, gpnum, mask, qsmask, level, grplo, grphi, gp_tasks),
+ TP_ARGS(rcuname, gp_seq, mask, qsmask, level, grplo, grphi, gp_tasks),
TP_STRUCT__entry(
__field(const char *, rcuname)
- __field(unsigned long, gpnum)
+ __field(unsigned long, gp_seq)
__field(unsigned long, mask)
__field(unsigned long, qsmask)
__field(u8, level)
@@ -376,7 +374,7 @@ TRACE_EVENT(rcu_quiescent_state_report,
TP_fast_assign(
__entry->rcuname = rcuname;
- __entry->gpnum = gpnum;
+ __entry->gp_seq = gp_seq;
__entry->mask = mask;
__entry->qsmask = qsmask;
__entry->level = level;
@@ -386,41 +384,41 @@ TRACE_EVENT(rcu_quiescent_state_report,
),
TP_printk("%s %lu %lx>%lx %u %d %d %u",
- __entry->rcuname, __entry->gpnum,
+ __entry->rcuname, __entry->gp_seq,
__entry->mask, __entry->qsmask, __entry->level,
__entry->grplo, __entry->grphi, __entry->gp_tasks)
);
/*
* Tracepoint for quiescent states detected by force_quiescent_state().
- * These trace events include the type of RCU, the grace-period number that
- * was blocked by the CPU, the CPU itself, and the type of quiescent state,
- * which can be "dti" for dyntick-idle mode, "ofl" for CPU offline, "kick"
- * when kicking a CPU that has been in dyntick-idle mode for too long, or
- * "rqc" if the CPU got a quiescent state via its rcu_qs_ctr.
+ * These trace events include the type of RCU, the grace-period number
+ * that was blocked by the CPU, the CPU itself, and the type of quiescent
+ * state, which can be "dti" for dyntick-idle mode, "kick" when kicking
+ * a CPU that has been in dyntick-idle mode for too long, or "rqc" if the
+ * CPU got a quiescent state via its rcu_qs_ctr.
*/
TRACE_EVENT(rcu_fqs,
- TP_PROTO(const char *rcuname, unsigned long gpnum, int cpu, const char *qsevent),
+ TP_PROTO(const char *rcuname, unsigned long gp_seq, int cpu, const char *qsevent),
- TP_ARGS(rcuname, gpnum, cpu, qsevent),
+ TP_ARGS(rcuname, gp_seq, cpu, qsevent),
TP_STRUCT__entry(
__field(const char *, rcuname)
- __field(unsigned long, gpnum)
+ __field(unsigned long, gp_seq)
__field(int, cpu)
__field(const char *, qsevent)
),
TP_fast_assign(
__entry->rcuname = rcuname;
- __entry->gpnum = gpnum;
+ __entry->gp_seq = gp_seq;
__entry->cpu = cpu;
__entry->qsevent = qsevent;
),
TP_printk("%s %lu %d %s",
- __entry->rcuname, __entry->gpnum,
+ __entry->rcuname, __entry->gp_seq,
__entry->cpu, __entry->qsevent)
);
@@ -753,23 +751,23 @@ TRACE_EVENT(rcu_barrier,
#else /* #ifdef CONFIG_RCU_TRACE */
-#define trace_rcu_grace_period(rcuname, gpnum, gpevent) do { } while (0)
-#define trace_rcu_future_grace_period(rcuname, gpnum, completed, c, \
+#define trace_rcu_grace_period(rcuname, gp_seq, gpevent) do { } while (0)
+#define trace_rcu_future_grace_period(rcuname, gp_seq, gp_seq_req, \
level, grplo, grphi, event) \
do { } while (0)
-#define trace_rcu_grace_period_init(rcuname, gpnum, level, grplo, grphi, \
+#define trace_rcu_grace_period_init(rcuname, gp_seq, level, grplo, grphi, \
qsmask) do { } while (0)
#define trace_rcu_exp_grace_period(rcuname, gqseq, gpevent) \
do { } while (0)
#define trace_rcu_exp_funnel_lock(rcuname, level, grplo, grphi, gpevent) \
do { } while (0)
#define trace_rcu_nocb_wake(rcuname, cpu, reason) do { } while (0)
-#define trace_rcu_preempt_task(rcuname, pid, gpnum) do { } while (0)
-#define trace_rcu_unlock_preempted_task(rcuname, gpnum, pid) do { } while (0)
-#define trace_rcu_quiescent_state_report(rcuname, gpnum, mask, qsmask, level, \
+#define trace_rcu_preempt_task(rcuname, pid, gp_seq) do { } while (0)
+#define trace_rcu_unlock_preempted_task(rcuname, gp_seq, pid) do { } while (0)
+#define trace_rcu_quiescent_state_report(rcuname, gp_seq, mask, qsmask, level, \
grplo, grphi, gp_tasks) do { } \
while (0)
-#define trace_rcu_fqs(rcuname, gpnum, cpu, qsevent) do { } while (0)
+#define trace_rcu_fqs(rcuname, gp_seq, cpu, qsevent) do { } while (0)
#define trace_rcu_dyntick(polarity, oldnesting, newnesting, dyntick) do { } while (0)
#define trace_rcu_callback(rcuname, rhp, qlen_lazy, qlen) do { } while (0)
#define trace_rcu_kfree_callback(rcuname, rhp, offset, qlen_lazy, qlen) \