summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBoqun Feng <boqun.feng@gmail.com>2026-01-04 13:52:53 +0300
committerBoqun Feng <boqun.feng@gmail.com>2026-01-04 13:52:53 +0300
commit60908279164a1dc651b7f4685cfbfe5161a4a797 (patch)
tree5f8ebdb1d737f6119ec600731464b4c135b95586
parent8f0b4cce4481fb22653697cced8d0d04027cb1e8 (diff)
parent760f05bc830d86667c07af6c80dc58d599061a67 (diff)
downloadlinux-60908279164a1dc651b7f4685cfbfe5161a4a797.tar.xz
Merge branch 'rcu-tasks-trace.20260101a'
* rcu-tasks-trace.20260101a: rcutorture: Test rcu_tasks_trace_expedite_current() srcu: Create an rcu_tasks_trace_expedite_current() function checkpatch: Deprecate rcu_read_{,un}lock_trace() rcu: Update Requirements.rst for RCU Tasks Trace rcu: Add noinstr-fast rcu_read_{,un}lock_tasks_trace() APIs rcu: Move rcu_tasks_trace_srcu_struct out of #ifdef CONFIG_TASKS_RCU_GENERIC rcu: Clean up after the SRCU-fastification of RCU Tasks Trace context_tracking: Remove rcu_task_trace_heavyweight_{enter,exit}() rcu: Re-implement RCU Tasks Trace in terms of SRCU-fast
-rw-r--r--Documentation/RCU/Design/Requirements/Requirements.rst12
-rw-r--r--Documentation/admin-guide/kernel-parameters.txt15
-rw-r--r--include/linux/rcupdate.h31
-rw-r--r--include/linux/rcupdate_trace.h166
-rw-r--r--include/linux/sched.h6
-rw-r--r--init/init_task.c3
-rw-r--r--kernel/context_tracking.c20
-rw-r--r--kernel/fork.c3
-rw-r--r--kernel/rcu/Kconfig43
-rw-r--r--kernel/rcu/rcu.h9
-rw-r--r--kernel/rcu/rcuscale.c7
-rw-r--r--kernel/rcu/rcutorture.c3
-rw-r--r--kernel/rcu/tasks.h708
-rwxr-xr-xscripts/checkpatch.pl4
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TRACE011
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TRACE021
16 files changed, 191 insertions, 841 deletions
diff --git a/Documentation/RCU/Design/Requirements/Requirements.rst b/Documentation/RCU/Design/Requirements/Requirements.rst
index ba417a08b93d..b5cdbba3ec2e 100644
--- a/Documentation/RCU/Design/Requirements/Requirements.rst
+++ b/Documentation/RCU/Design/Requirements/Requirements.rst
@@ -2780,12 +2780,12 @@ Tasks Trace RCU
~~~~~~~~~~~~~~~
Some forms of tracing need to sleep in readers, but cannot tolerate
-SRCU's read-side overhead, which includes a full memory barrier in both
-srcu_read_lock() and srcu_read_unlock(). This need is handled by a
-Tasks Trace RCU that uses scheduler locking and IPIs to synchronize with
-readers. Real-time systems that cannot tolerate IPIs may build their
-kernels with ``CONFIG_TASKS_TRACE_RCU_READ_MB=y``, which avoids the IPIs at
-the expense of adding full memory barriers to the read-side primitives.
+SRCU's read-side overhead, which includes a full memory barrier in
+both srcu_read_lock() and srcu_read_unlock(). This need is handled by
+a Tasks Trace RCU API implemented as thin wrappers around SRCU-fast,
+which avoids the read-side memory barriers, at least for architectures
+that apply noinstr to kernel entry/exit code (or that build with
+``CONFIG_TASKS_TRACE_RCU_NO_MB=y``.
The tasks-trace-RCU API is also reasonably compact,
consisting of rcu_read_lock_trace(), rcu_read_unlock_trace(),
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index a8d0afde7f85..1b8e5cadbecb 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -6249,13 +6249,6 @@ Kernel parameters
dynamically) adjusted. This parameter is intended
for use in testing.
- rcupdate.rcu_task_ipi_delay= [KNL]
- Set time in jiffies during which RCU tasks will
- avoid sending IPIs, starting with the beginning
- of a given grace period. Setting a large
- number avoids disturbing real-time workloads,
- but lengthens grace periods.
-
rcupdate.rcu_task_lazy_lim= [KNL]
Number of callbacks on a given CPU that will
cancel laziness on that CPU. Use -1 to disable
@@ -6299,14 +6292,6 @@ Kernel parameters
of zero will disable batching. Batching is
always disabled for synchronize_rcu_tasks().
- rcupdate.rcu_tasks_trace_lazy_ms= [KNL]
- Set timeout in milliseconds RCU Tasks
- Trace asynchronous callback batching for
- call_rcu_tasks_trace(). A negative value
- will take the default. A value of zero will
- disable batching. Batching is always disabled
- for synchronize_rcu_tasks_trace().
-
rcupdate.rcu_self_test= [KNL]
Run the RCU early boot self tests
diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index c5b30054cd01..bd5a420cf09a 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -175,36 +175,7 @@ void rcu_tasks_torture_stats_print(char *tt, char *tf);
# define synchronize_rcu_tasks synchronize_rcu
# endif
-# ifdef CONFIG_TASKS_TRACE_RCU
-// Bits for ->trc_reader_special.b.need_qs field.
-#define TRC_NEED_QS 0x1 // Task needs a quiescent state.
-#define TRC_NEED_QS_CHECKED 0x2 // Task has been checked for needing quiescent state.
-
-u8 rcu_trc_cmpxchg_need_qs(struct task_struct *t, u8 old, u8 new);
-void rcu_tasks_trace_qs_blkd(struct task_struct *t);
-
-# define rcu_tasks_trace_qs(t) \
- do { \
- int ___rttq_nesting = READ_ONCE((t)->trc_reader_nesting); \
- \
- if (unlikely(READ_ONCE((t)->trc_reader_special.b.need_qs) == TRC_NEED_QS) && \
- likely(!___rttq_nesting)) { \
- rcu_trc_cmpxchg_need_qs((t), TRC_NEED_QS, TRC_NEED_QS_CHECKED); \
- } else if (___rttq_nesting && ___rttq_nesting != INT_MIN && \
- !READ_ONCE((t)->trc_reader_special.b.blocked)) { \
- rcu_tasks_trace_qs_blkd(t); \
- } \
- } while (0)
-void rcu_tasks_trace_torture_stats_print(char *tt, char *tf);
-# else
-# define rcu_tasks_trace_qs(t) do { } while (0)
-# endif
-
-#define rcu_tasks_qs(t, preempt) \
-do { \
- rcu_tasks_classic_qs((t), (preempt)); \
- rcu_tasks_trace_qs(t); \
-} while (0)
+#define rcu_tasks_qs(t, preempt) rcu_tasks_classic_qs((t), (preempt))
# ifdef CONFIG_TASKS_RUDE_RCU
void synchronize_rcu_tasks_rude(void);
diff --git a/include/linux/rcupdate_trace.h b/include/linux/rcupdate_trace.h
index e6c44eb428ab..cee89e51e45c 100644
--- a/include/linux/rcupdate_trace.h
+++ b/include/linux/rcupdate_trace.h
@@ -12,27 +12,74 @@
#include <linux/rcupdate.h>
#include <linux/cleanup.h>
-extern struct lockdep_map rcu_trace_lock_map;
+#ifdef CONFIG_TASKS_TRACE_RCU
+extern struct srcu_struct rcu_tasks_trace_srcu_struct;
+#endif // #ifdef CONFIG_TASKS_TRACE_RCU
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
+#if defined(CONFIG_DEBUG_LOCK_ALLOC) && defined(CONFIG_TASKS_TRACE_RCU)
static inline int rcu_read_lock_trace_held(void)
{
- return lock_is_held(&rcu_trace_lock_map);
+ return srcu_read_lock_held(&rcu_tasks_trace_srcu_struct);
}
-#else /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
+#else // #if defined(CONFIG_DEBUG_LOCK_ALLOC) && defined(CONFIG_TASKS_TRACE_RCU)
static inline int rcu_read_lock_trace_held(void)
{
return 1;
}
-#endif /* #else #ifdef CONFIG_DEBUG_LOCK_ALLOC */
+#endif // #else // #if defined(CONFIG_DEBUG_LOCK_ALLOC) && defined(CONFIG_TASKS_TRACE_RCU)
#ifdef CONFIG_TASKS_TRACE_RCU
-void rcu_read_unlock_trace_special(struct task_struct *t);
+/**
+ * rcu_read_lock_tasks_trace - mark beginning of RCU-trace read-side critical section
+ *
+ * When synchronize_rcu_tasks_trace() is invoked by one task, then that
+ * task is guaranteed to block until all other tasks exit their read-side
+ * critical sections. Similarly, if call_rcu_trace() is invoked on one
+ * task while other tasks are within RCU read-side critical sections,
+ * invocation of the corresponding RCU callback is deferred until after
+ * the all the other tasks exit their critical sections.
+ *
+ * For more details, please see the documentation for
+ * srcu_read_lock_fast(). For a description of how implicit RCU
+ * readers provide the needed ordering for architectures defining the
+ * ARCH_WANTS_NO_INSTR Kconfig option (and thus promising never to trace
+ * code where RCU is not watching), please see the __srcu_read_lock_fast()
+ * (non-kerneldoc) header comment. Otherwise, the smp_mb() below provided
+ * the needed ordering.
+ */
+static inline struct srcu_ctr __percpu *rcu_read_lock_tasks_trace(void)
+{
+ struct srcu_ctr __percpu *ret = __srcu_read_lock_fast(&rcu_tasks_trace_srcu_struct);
+
+ rcu_try_lock_acquire(&rcu_tasks_trace_srcu_struct.dep_map);
+ if (!IS_ENABLED(CONFIG_TASKS_TRACE_RCU_NO_MB))
+ smp_mb(); // Provide ordering on noinstr-incomplete architectures.
+ return ret;
+}
+
+/**
+ * rcu_read_unlock_tasks_trace - mark end of RCU-trace read-side critical section
+ * @scp: return value from corresponding rcu_read_lock_tasks_trace().
+ *
+ * Pairs with the preceding call to rcu_read_lock_tasks_trace() that
+ * returned the value passed in via scp.
+ *
+ * For more details, please see the documentation for rcu_read_unlock().
+ * For memory-ordering information, please see the header comment for the
+ * rcu_read_lock_tasks_trace() function.
+ */
+static inline void rcu_read_unlock_tasks_trace(struct srcu_ctr __percpu *scp)
+{
+ if (!IS_ENABLED(CONFIG_TASKS_TRACE_RCU_NO_MB))
+ smp_mb(); // Provide ordering on noinstr-incomplete architectures.
+ __srcu_read_unlock_fast(&rcu_tasks_trace_srcu_struct, scp);
+ srcu_lock_release(&rcu_tasks_trace_srcu_struct.dep_map);
+}
/**
* rcu_read_lock_trace - mark beginning of RCU-trace read-side critical section
@@ -50,12 +97,15 @@ static inline void rcu_read_lock_trace(void)
{
struct task_struct *t = current;
- WRITE_ONCE(t->trc_reader_nesting, READ_ONCE(t->trc_reader_nesting) + 1);
- barrier();
- if (IS_ENABLED(CONFIG_TASKS_TRACE_RCU_READ_MB) &&
- t->trc_reader_special.b.need_mb)
- smp_mb(); // Pairs with update-side barriers
- rcu_lock_acquire(&rcu_trace_lock_map);
+ rcu_try_lock_acquire(&rcu_tasks_trace_srcu_struct.dep_map);
+ if (t->trc_reader_nesting++) {
+ // In case we interrupted a Tasks Trace RCU reader.
+ return;
+ }
+ barrier(); // nesting before scp to protect against interrupt handler.
+ t->trc_reader_scp = __srcu_read_lock_fast(&rcu_tasks_trace_srcu_struct);
+ if (!IS_ENABLED(CONFIG_TASKS_TRACE_RCU_NO_MB))
+ smp_mb(); // Placeholder for more selective ordering
}
/**
@@ -69,26 +119,88 @@ static inline void rcu_read_lock_trace(void)
*/
static inline void rcu_read_unlock_trace(void)
{
- int nesting;
+ struct srcu_ctr __percpu *scp;
struct task_struct *t = current;
- rcu_lock_release(&rcu_trace_lock_map);
- nesting = READ_ONCE(t->trc_reader_nesting) - 1;
- barrier(); // Critical section before disabling.
- // Disable IPI-based setting of .need_qs.
- WRITE_ONCE(t->trc_reader_nesting, INT_MIN + nesting);
- if (likely(!READ_ONCE(t->trc_reader_special.s)) || nesting) {
- WRITE_ONCE(t->trc_reader_nesting, nesting);
- return; // We assume shallow reader nesting.
+ scp = t->trc_reader_scp;
+ barrier(); // scp before nesting to protect against interrupt handler.
+ if (!--t->trc_reader_nesting) {
+ if (!IS_ENABLED(CONFIG_TASKS_TRACE_RCU_NO_MB))
+ smp_mb(); // Placeholder for more selective ordering
+ __srcu_read_unlock_fast(&rcu_tasks_trace_srcu_struct, scp);
}
- WARN_ON_ONCE(nesting != 0);
- rcu_read_unlock_trace_special(t);
+ srcu_lock_release(&rcu_tasks_trace_srcu_struct.dep_map);
}
-void call_rcu_tasks_trace(struct rcu_head *rhp, rcu_callback_t func);
-void synchronize_rcu_tasks_trace(void);
-void rcu_barrier_tasks_trace(void);
-struct task_struct *get_rcu_tasks_trace_gp_kthread(void);
+/**
+ * call_rcu_tasks_trace() - Queue a callback trace task-based grace period
+ * @rhp: structure to be used for queueing the RCU updates.
+ * @func: actual callback function to be invoked after the grace period
+ *
+ * The callback function will be invoked some time after a trace rcu-tasks
+ * grace period elapses, in other words after all currently executing
+ * trace rcu-tasks read-side critical sections have completed. These
+ * read-side critical sections are delimited by calls to rcu_read_lock_trace()
+ * and rcu_read_unlock_trace().
+ *
+ * See the description of call_rcu() for more detailed information on
+ * memory ordering guarantees.
+ */
+static inline void call_rcu_tasks_trace(struct rcu_head *rhp, rcu_callback_t func)
+{
+ call_srcu(&rcu_tasks_trace_srcu_struct, rhp, func);
+}
+
+/**
+ * synchronize_rcu_tasks_trace - wait for a trace rcu-tasks grace period
+ *
+ * Control will return to the caller some time after a trace rcu-tasks
+ * grace period has elapsed, in other words after all currently executing
+ * trace rcu-tasks read-side critical sections have elapsed. These read-side
+ * critical sections are delimited by calls to rcu_read_lock_trace()
+ * and rcu_read_unlock_trace().
+ *
+ * This is a very specialized primitive, intended only for a few uses in
+ * tracing and other situations requiring manipulation of function preambles
+ * and profiling hooks. The synchronize_rcu_tasks_trace() function is not
+ * (yet) intended for heavy use from multiple CPUs.
+ *
+ * See the description of synchronize_rcu() for more detailed information
+ * on memory ordering guarantees.
+ */
+static inline void synchronize_rcu_tasks_trace(void)
+{
+ synchronize_srcu(&rcu_tasks_trace_srcu_struct);
+}
+
+/**
+ * rcu_barrier_tasks_trace - Wait for in-flight call_rcu_tasks_trace() callbacks.
+ *
+ * Note that rcu_barrier_tasks_trace() is not obligated to actually wait,
+ * for example, if there are no pending callbacks.
+ */
+static inline void rcu_barrier_tasks_trace(void)
+{
+ srcu_barrier(&rcu_tasks_trace_srcu_struct);
+}
+
+/**
+ * rcu_tasks_trace_expedite_current - Expedite the current Tasks Trace RCU grace period
+ *
+ * Cause the current Tasks Trace RCU grace period to become expedited.
+ * The grace period following the current one might also be expedited.
+ * If there is no current grace period, one might be created. If the
+ * current grace period is currently sleeping, that sleep will complete
+ * before expediting will take effect.
+ */
+static inline void rcu_tasks_trace_expedite_current(void)
+{
+ srcu_expedite_current(&rcu_tasks_trace_srcu_struct);
+}
+
+// Placeholders to enable stepwise transition.
+void __init rcu_tasks_trace_suppress_unused(void);
+
#else
/*
* The BPF JIT forms these addresses even when it doesn't call these
diff --git a/include/linux/sched.h b/include/linux/sched.h
index d395f2810fac..56156643ccac 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -945,11 +945,7 @@ struct task_struct {
#ifdef CONFIG_TASKS_TRACE_RCU
int trc_reader_nesting;
- int trc_ipi_to_cpu;
- union rcu_special trc_reader_special;
- struct list_head trc_holdout_list;
- struct list_head trc_blkd_node;
- int trc_blkd_cpu;
+ struct srcu_ctr __percpu *trc_reader_scp;
#endif /* #ifdef CONFIG_TASKS_TRACE_RCU */
struct sched_info sched_info;
diff --git a/init/init_task.c b/init/init_task.c
index 49b13d7c3985..db92c404d59a 100644
--- a/init/init_task.c
+++ b/init/init_task.c
@@ -195,9 +195,6 @@ struct task_struct init_task __aligned(L1_CACHE_BYTES) = {
#endif
#ifdef CONFIG_TASKS_TRACE_RCU
.trc_reader_nesting = 0,
- .trc_reader_special.s = 0,
- .trc_holdout_list = LIST_HEAD_INIT(init_task.trc_holdout_list),
- .trc_blkd_node = LIST_HEAD_INIT(init_task.trc_blkd_node),
#endif
#ifdef CONFIG_CPUSETS
.mems_allowed_seq = SEQCNT_SPINLOCK_ZERO(init_task.mems_allowed_seq,
diff --git a/kernel/context_tracking.c b/kernel/context_tracking.c
index fb5be6e9b423..a743e7ffa6c0 100644
--- a/kernel/context_tracking.c
+++ b/kernel/context_tracking.c
@@ -54,24 +54,6 @@ static __always_inline void rcu_task_enter(void)
#endif /* #if defined(CONFIG_TASKS_RCU) && defined(CONFIG_NO_HZ_FULL) */
}
-/* Turn on heavyweight RCU tasks trace readers on kernel exit. */
-static __always_inline void rcu_task_trace_heavyweight_enter(void)
-{
-#ifdef CONFIG_TASKS_TRACE_RCU
- if (IS_ENABLED(CONFIG_TASKS_TRACE_RCU_READ_MB))
- current->trc_reader_special.b.need_mb = true;
-#endif /* #ifdef CONFIG_TASKS_TRACE_RCU */
-}
-
-/* Turn off heavyweight RCU tasks trace readers on kernel entry. */
-static __always_inline void rcu_task_trace_heavyweight_exit(void)
-{
-#ifdef CONFIG_TASKS_TRACE_RCU
- if (IS_ENABLED(CONFIG_TASKS_TRACE_RCU_READ_MB))
- current->trc_reader_special.b.need_mb = false;
-#endif /* #ifdef CONFIG_TASKS_TRACE_RCU */
-}
-
/*
* Record entry into an extended quiescent state. This is only to be
* called when not already in an extended quiescent state, that is,
@@ -85,7 +67,6 @@ static noinstr void ct_kernel_exit_state(int offset)
* critical sections, and we also must force ordering with the
* next idle sojourn.
*/
- rcu_task_trace_heavyweight_enter(); // Before CT state update!
// RCU is still watching. Better not be in extended quiescent state!
WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && !rcu_is_watching_curr_cpu());
(void)ct_state_inc(offset);
@@ -108,7 +89,6 @@ static noinstr void ct_kernel_enter_state(int offset)
*/
seq = ct_state_inc(offset);
// RCU is now watching. Better not be in an extended quiescent state!
- rcu_task_trace_heavyweight_exit(); // After CT state update!
WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && !(seq & CT_RCU_WATCHING));
}
diff --git a/kernel/fork.c b/kernel/fork.c
index b1f3915d5f8e..d7ed107cbb47 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1828,9 +1828,6 @@ static inline void rcu_copy_process(struct task_struct *p)
#endif /* #ifdef CONFIG_TASKS_RCU */
#ifdef CONFIG_TASKS_TRACE_RCU
p->trc_reader_nesting = 0;
- p->trc_reader_special.s = 0;
- INIT_LIST_HEAD(&p->trc_holdout_list);
- INIT_LIST_HEAD(&p->trc_blkd_node);
#endif /* #ifdef CONFIG_TASKS_TRACE_RCU */
}
diff --git a/kernel/rcu/Kconfig b/kernel/rcu/Kconfig
index 4d9b21f69eaa..762299291e09 100644
--- a/kernel/rcu/Kconfig
+++ b/kernel/rcu/Kconfig
@@ -82,7 +82,7 @@ config NEED_SRCU_NMI_SAFE
def_bool HAVE_NMI && !ARCH_HAS_NMI_SAFE_THIS_CPU_OPS && !TINY_SRCU
config TASKS_RCU_GENERIC
- def_bool TASKS_RCU || TASKS_RUDE_RCU || TASKS_TRACE_RCU
+ def_bool TASKS_RCU || TASKS_RUDE_RCU
help
This option enables generic infrastructure code supporting
task-based RCU implementations. Not for manual selection.
@@ -142,6 +142,29 @@ config TASKS_TRACE_RCU
default n
select IRQ_WORK
+config TASKS_TRACE_RCU_NO_MB
+ bool "Override RCU Tasks Trace inclusion of read-side memory barriers"
+ depends on RCU_EXPERT && TASKS_TRACE_RCU
+ default ARCH_WANTS_NO_INSTR
+ help
+ This option prevents the use of read-side memory barriers in
+ rcu_read_lock_tasks_trace() and rcu_read_unlock_tasks_trace()
+ even in kernels built with CONFIG_ARCH_WANTS_NO_INSTR=n, that is,
+ in kernels that do not have noinstr set up in entry/exit code.
+ By setting this option, you are promising to carefully review
+ use of ftrace, BPF, and friends to ensure that no tracing
+ operation is attached to a function that runs in that portion
+ of the entry/exit code that RCU does not watch, that is,
+ where rcu_is_watching() returns false. Alternatively, you
+ might choose to never remove traces except by rebooting.
+
+ Those wishing to disable read-side memory barriers for an entire
+ architecture can select this Kconfig option, hence the polarity.
+
+ Say Y here if you need speed and will review use of tracing.
+ Say N here for certain esoteric testing of RCU itself.
+ Take the default if you are unsure.
+
config RCU_STALL_COMMON
def_bool TREE_RCU
help
@@ -313,24 +336,6 @@ config RCU_NOCB_CPU_CB_BOOST
Say Y here if you want to set RT priority for offloading kthreads.
Say N here if you are building a !PREEMPT_RT kernel and are unsure.
-config TASKS_TRACE_RCU_READ_MB
- bool "Tasks Trace RCU readers use memory barriers in user and idle"
- depends on RCU_EXPERT && TASKS_TRACE_RCU
- default PREEMPT_RT || NR_CPUS < 8
- help
- Use this option to further reduce the number of IPIs sent
- to CPUs executing in userspace or idle during tasks trace
- RCU grace periods. Given that a reasonable setting of
- the rcupdate.rcu_task_ipi_delay kernel boot parameter
- eliminates such IPIs for many workloads, proper setting
- of this Kconfig option is important mostly for aggressive
- real-time installations and for battery-powered devices,
- hence the default chosen above.
-
- Say Y here if you hate IPIs.
- Say N here if you hate read-side memory barriers.
- Take the default if you are unsure.
-
config RCU_LAZY
bool "RCU callback lazy invocation functionality"
depends on RCU_NOCB_CPU
diff --git a/kernel/rcu/rcu.h b/kernel/rcu/rcu.h
index 9cf01832a6c3..dc5d614b372c 100644
--- a/kernel/rcu/rcu.h
+++ b/kernel/rcu/rcu.h
@@ -544,10 +544,6 @@ struct task_struct *get_rcu_tasks_rude_gp_kthread(void);
void rcu_tasks_rude_get_gp_data(int *flags, unsigned long *gp_seq);
#endif // # ifdef CONFIG_TASKS_RUDE_RCU
-#ifdef CONFIG_TASKS_TRACE_RCU
-void rcu_tasks_trace_get_gp_data(int *flags, unsigned long *gp_seq);
-#endif
-
#ifdef CONFIG_TASKS_RCU_GENERIC
void tasks_cblist_init_generic(void);
#else /* #ifdef CONFIG_TASKS_RCU_GENERIC */
@@ -673,11 +669,6 @@ void show_rcu_tasks_rude_gp_kthread(void);
#else
static inline void show_rcu_tasks_rude_gp_kthread(void) {}
#endif
-#if !defined(CONFIG_TINY_RCU) && defined(CONFIG_TASKS_TRACE_RCU)
-void show_rcu_tasks_trace_gp_kthread(void);
-#else
-static inline void show_rcu_tasks_trace_gp_kthread(void) {}
-#endif
#ifdef CONFIG_TINY_RCU
static inline bool rcu_cpu_beenfullyonline(int cpu) { return true; }
diff --git a/kernel/rcu/rcuscale.c b/kernel/rcu/rcuscale.c
index 7484d8ad5767..1c50f89fbd6f 100644
--- a/kernel/rcu/rcuscale.c
+++ b/kernel/rcu/rcuscale.c
@@ -400,11 +400,6 @@ static void tasks_trace_scale_read_unlock(int idx)
rcu_read_unlock_trace();
}
-static void rcu_tasks_trace_scale_stats(void)
-{
- rcu_tasks_trace_torture_stats_print(scale_type, SCALE_FLAG);
-}
-
static struct rcu_scale_ops tasks_tracing_ops = {
.ptype = RCU_TASKS_FLAVOR,
.init = rcu_sync_scale_init,
@@ -416,8 +411,6 @@ static struct rcu_scale_ops tasks_tracing_ops = {
.gp_barrier = rcu_barrier_tasks_trace,
.sync = synchronize_rcu_tasks_trace,
.exp_sync = synchronize_rcu_tasks_trace,
- .rso_gp_kthread = get_rcu_tasks_trace_gp_kthread,
- .stats = IS_ENABLED(CONFIG_TINY_RCU) ? NULL : rcu_tasks_trace_scale_stats,
.name = "tasks-tracing"
};
diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c
index 07e51974b06b..d00b043823ae 100644
--- a/kernel/rcu/rcutorture.c
+++ b/kernel/rcu/rcutorture.c
@@ -1178,10 +1178,9 @@ static struct rcu_torture_ops tasks_tracing_ops = {
.deferred_free = rcu_tasks_tracing_torture_deferred_free,
.sync = synchronize_rcu_tasks_trace,
.exp_sync = synchronize_rcu_tasks_trace,
+ .exp_current = rcu_tasks_trace_expedite_current,
.call = call_rcu_tasks_trace,
.cb_barrier = rcu_barrier_tasks_trace,
- .gp_kthread_dbg = show_rcu_tasks_trace_gp_kthread,
- .get_gp_data = rcu_tasks_trace_get_gp_data,
.cbflood_max = 50000,
.irq_capable = 1,
.slow_gps = 1,
diff --git a/kernel/rcu/tasks.h b/kernel/rcu/tasks.h
index 2dc044fd126e..76f952196a29 100644
--- a/kernel/rcu/tasks.h
+++ b/kernel/rcu/tasks.h
@@ -161,11 +161,6 @@ static void tasks_rcu_exit_srcu_stall(struct timer_list *unused);
static DEFINE_TIMER(tasks_rcu_exit_srcu_stall_timer, tasks_rcu_exit_srcu_stall);
#endif
-/* Avoid IPIing CPUs early in the grace period. */
-#define RCU_TASK_IPI_DELAY (IS_ENABLED(CONFIG_TASKS_TRACE_RCU_READ_MB) ? HZ / 2 : 0)
-static int rcu_task_ipi_delay __read_mostly = RCU_TASK_IPI_DELAY;
-module_param(rcu_task_ipi_delay, int, 0644);
-
/* Control stall timeouts. Disable with <= 0, otherwise jiffies till stall. */
#define RCU_TASK_BOOT_STALL_TIMEOUT (HZ * 30)
#define RCU_TASK_STALL_TIMEOUT (HZ * 60 * 10)
@@ -718,7 +713,6 @@ static void __init rcu_tasks_bootup_oddness(void)
#endif /* #ifdef CONFIG_TASKS_TRACE_RCU */
}
-
/* Dump out rcutorture-relevant state common to all RCU-tasks flavors. */
static void show_rcu_tasks_generic_gp_kthread(struct rcu_tasks *rtp, char *s)
{
@@ -801,9 +795,7 @@ static void rcu_tasks_torture_stats_print_generic(struct rcu_tasks *rtp, char *t
#endif // #ifndef CONFIG_TINY_RCU
-static void exit_tasks_rcu_finish_trace(struct task_struct *t);
-
-#if defined(CONFIG_TASKS_RCU) || defined(CONFIG_TASKS_TRACE_RCU)
+#if defined(CONFIG_TASKS_RCU)
////////////////////////////////////////////////////////////////////////
//
@@ -898,7 +890,7 @@ static void rcu_tasks_wait_gp(struct rcu_tasks *rtp)
rtp->postgp_func(rtp);
}
-#endif /* #if defined(CONFIG_TASKS_RCU) || defined(CONFIG_TASKS_TRACE_RCU) */
+#endif /* #if defined(CONFIG_TASKS_RCU) */
#ifdef CONFIG_TASKS_RCU
@@ -1322,13 +1314,11 @@ void exit_tasks_rcu_finish(void)
raw_spin_lock_irqsave_rcu_node(rtpcp, flags);
list_del_init(&t->rcu_tasks_exit_list);
raw_spin_unlock_irqrestore_rcu_node(rtpcp, flags);
-
- exit_tasks_rcu_finish_trace(t);
}
#else /* #ifdef CONFIG_TASKS_RCU */
void exit_tasks_rcu_start(void) { }
-void exit_tasks_rcu_finish(void) { exit_tasks_rcu_finish_trace(current); }
+void exit_tasks_rcu_finish(void) { }
#endif /* #else #ifdef CONFIG_TASKS_RCU */
#ifdef CONFIG_TASKS_RUDE_RCU
@@ -1449,682 +1439,11 @@ EXPORT_SYMBOL_GPL(rcu_tasks_rude_get_gp_data);
#endif /* #ifdef CONFIG_TASKS_RUDE_RCU */
-////////////////////////////////////////////////////////////////////////
-//
-// Tracing variant of Tasks RCU. This variant is designed to be used
-// to protect tracing hooks, including those of BPF. This variant
-// therefore:
-//
-// 1. Has explicit read-side markers to allow finite grace periods
-// in the face of in-kernel loops for PREEMPT=n builds.
-//
-// 2. Protects code in the idle loop, exception entry/exit, and
-// CPU-hotplug code paths, similar to the capabilities of SRCU.
-//
-// 3. Avoids expensive read-side instructions, having overhead similar
-// to that of Preemptible RCU.
-//
-// There are of course downsides. For example, the grace-period code
-// can send IPIs to CPUs, even when those CPUs are in the idle loop or
-// in nohz_full userspace. If needed, these downsides can be at least
-// partially remedied.
-//
-// Perhaps most important, this variant of RCU does not affect the vanilla
-// flavors, rcu_preempt and rcu_sched. The fact that RCU Tasks Trace
-// readers can operate from idle, offline, and exception entry/exit in no
-// way allows rcu_preempt and rcu_sched readers to also do so.
-//
-// The implementation uses rcu_tasks_wait_gp(), which relies on function
-// pointers in the rcu_tasks structure. The rcu_spawn_tasks_trace_kthread()
-// function sets these function pointers up so that rcu_tasks_wait_gp()
-// invokes these functions in this order:
-//
-// rcu_tasks_trace_pregp_step():
-// Disables CPU hotplug, adds all currently executing tasks to the
-// holdout list, then checks the state of all tasks that blocked
-// or were preempted within their current RCU Tasks Trace read-side
-// critical section, adding them to the holdout list if appropriate.
-// Finally, this function re-enables CPU hotplug.
-// The ->pertask_func() pointer is NULL, so there is no per-task processing.
-// rcu_tasks_trace_postscan():
-// Invokes synchronize_rcu() to wait for late-stage exiting tasks
-// to finish exiting.
-// check_all_holdout_tasks_trace(), repeatedly until holdout list is empty:
-// Scans the holdout list, attempting to identify a quiescent state
-// for each task on the list. If there is a quiescent state, the
-// corresponding task is removed from the holdout list. Once this
-// list is empty, the grace period has completed.
-// rcu_tasks_trace_postgp():
-// Provides the needed full memory barrier and does debug checks.
-//
-// The exit_tasks_rcu_finish_trace() synchronizes with exiting tasks.
-//
-// Pre-grace-period update-side code is ordered before the grace period
-// via the ->cbs_lock and barriers in rcu_tasks_kthread(). Pre-grace-period
-// read-side code is ordered before the grace period by atomic operations
-// on .b.need_qs flag of each task involved in this process, or by scheduler
-// context-switch ordering (for locked-down non-running readers).
-
-// The lockdep state must be outside of #ifdef to be useful.
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
-static struct lock_class_key rcu_lock_trace_key;
-struct lockdep_map rcu_trace_lock_map =
- STATIC_LOCKDEP_MAP_INIT("rcu_read_lock_trace", &rcu_lock_trace_key);
-EXPORT_SYMBOL_GPL(rcu_trace_lock_map);
-#endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
-
-#ifdef CONFIG_TASKS_TRACE_RCU
-
-// Record outstanding IPIs to each CPU. No point in sending two...
-static DEFINE_PER_CPU(bool, trc_ipi_to_cpu);
-
-// The number of detections of task quiescent state relying on
-// heavyweight readers executing explicit memory barriers.
-static unsigned long n_heavy_reader_attempts;
-static unsigned long n_heavy_reader_updates;
-static unsigned long n_heavy_reader_ofl_updates;
-static unsigned long n_trc_holdouts;
-
-void call_rcu_tasks_trace(struct rcu_head *rhp, rcu_callback_t func);
-DEFINE_RCU_TASKS(rcu_tasks_trace, rcu_tasks_wait_gp, call_rcu_tasks_trace,
- "RCU Tasks Trace");
-
-/* Load from ->trc_reader_special.b.need_qs with proper ordering. */
-static u8 rcu_ld_need_qs(struct task_struct *t)
-{
- smp_mb(); // Enforce full grace-period ordering.
- return smp_load_acquire(&t->trc_reader_special.b.need_qs);
-}
-
-/* Store to ->trc_reader_special.b.need_qs with proper ordering. */
-static void rcu_st_need_qs(struct task_struct *t, u8 v)
-{
- smp_store_release(&t->trc_reader_special.b.need_qs, v);
- smp_mb(); // Enforce full grace-period ordering.
-}
-
-/*
- * Do a cmpxchg() on ->trc_reader_special.b.need_qs, allowing for
- * the four-byte operand-size restriction of some platforms.
- *
- * Returns the old value, which is often ignored.
- */
-u8 rcu_trc_cmpxchg_need_qs(struct task_struct *t, u8 old, u8 new)
-{
- return cmpxchg(&t->trc_reader_special.b.need_qs, old, new);
-}
-EXPORT_SYMBOL_GPL(rcu_trc_cmpxchg_need_qs);
-
-/*
- * If we are the last reader, signal the grace-period kthread.
- * Also remove from the per-CPU list of blocked tasks.
- */
-void rcu_read_unlock_trace_special(struct task_struct *t)
-{
- unsigned long flags;
- struct rcu_tasks_percpu *rtpcp;
- union rcu_special trs;
-
- // Open-coded full-word version of rcu_ld_need_qs().
- smp_mb(); // Enforce full grace-period ordering.
- trs = smp_load_acquire(&t->trc_reader_special);
-
- if (IS_ENABLED(CONFIG_TASKS_TRACE_RCU_READ_MB) && t->trc_reader_special.b.need_mb)
- smp_mb(); // Pairs with update-side barriers.
- // Update .need_qs before ->trc_reader_nesting for irq/NMI handlers.
- if (trs.b.need_qs == (TRC_NEED_QS_CHECKED | TRC_NEED_QS)) {
- u8 result = rcu_trc_cmpxchg_need_qs(t, TRC_NEED_QS_CHECKED | TRC_NEED_QS,
- TRC_NEED_QS_CHECKED);
-
- WARN_ONCE(result != trs.b.need_qs, "%s: result = %d", __func__, result);
- }
- if (trs.b.blocked) {
- rtpcp = per_cpu_ptr(rcu_tasks_trace.rtpcpu, t->trc_blkd_cpu);
- raw_spin_lock_irqsave_rcu_node(rtpcp, flags);
- list_del_init(&t->trc_blkd_node);
- WRITE_ONCE(t->trc_reader_special.b.blocked, false);
- raw_spin_unlock_irqrestore_rcu_node(rtpcp, flags);
- }
- WRITE_ONCE(t->trc_reader_nesting, 0);
-}
-EXPORT_SYMBOL_GPL(rcu_read_unlock_trace_special);
-
-/* Add a newly blocked reader task to its CPU's list. */
-void rcu_tasks_trace_qs_blkd(struct task_struct *t)
-{
- unsigned long flags;
- struct rcu_tasks_percpu *rtpcp;
-
- local_irq_save(flags);
- rtpcp = this_cpu_ptr(rcu_tasks_trace.rtpcpu);
- raw_spin_lock_rcu_node(rtpcp); // irqs already disabled
- t->trc_blkd_cpu = smp_processor_id();
- if (!rtpcp->rtp_blkd_tasks.next)
- INIT_LIST_HEAD(&rtpcp->rtp_blkd_tasks);
- list_add(&t->trc_blkd_node, &rtpcp->rtp_blkd_tasks);
- WRITE_ONCE(t->trc_reader_special.b.blocked, true);
- raw_spin_unlock_irqrestore_rcu_node(rtpcp, flags);
-}
-EXPORT_SYMBOL_GPL(rcu_tasks_trace_qs_blkd);
-
-/* Add a task to the holdout list, if it is not already on the list. */
-static void trc_add_holdout(struct task_struct *t, struct list_head *bhp)
-{
- if (list_empty(&t->trc_holdout_list)) {
- get_task_struct(t);
- list_add(&t->trc_holdout_list, bhp);
- n_trc_holdouts++;
- }
-}
-
-/* Remove a task from the holdout list, if it is in fact present. */
-static void trc_del_holdout(struct task_struct *t)
-{
- if (!list_empty(&t->trc_holdout_list)) {
- list_del_init(&t->trc_holdout_list);
- put_task_struct(t);
- n_trc_holdouts--;
- }
-}
-
-/* IPI handler to check task state. */
-static void trc_read_check_handler(void *t_in)
-{
- int nesting;
- struct task_struct *t = current;
- struct task_struct *texp = t_in;
-
- // If the task is no longer running on this CPU, leave.
- if (unlikely(texp != t))
- goto reset_ipi; // Already on holdout list, so will check later.
-
- // If the task is not in a read-side critical section, and
- // if this is the last reader, awaken the grace-period kthread.
- nesting = READ_ONCE(t->trc_reader_nesting);
- if (likely(!nesting)) {
- rcu_trc_cmpxchg_need_qs(t, 0, TRC_NEED_QS_CHECKED);
- goto reset_ipi;
- }
- // If we are racing with an rcu_read_unlock_trace(), try again later.
- if (unlikely(nesting < 0))
- goto reset_ipi;
-
- // Get here if the task is in a read-side critical section.
- // Set its state so that it will update state for the grace-period
- // kthread upon exit from that critical section.
- rcu_trc_cmpxchg_need_qs(t, 0, TRC_NEED_QS | TRC_NEED_QS_CHECKED);
-
-reset_ipi:
- // Allow future IPIs to be sent on CPU and for task.
- // Also order this IPI handler against any later manipulations of
- // the intended task.
- smp_store_release(per_cpu_ptr(&trc_ipi_to_cpu, smp_processor_id()), false); // ^^^
- smp_store_release(&texp->trc_ipi_to_cpu, -1); // ^^^
-}
-
-/* Callback function for scheduler to check locked-down task. */
-static int trc_inspect_reader(struct task_struct *t, void *bhp_in)
-{
- struct list_head *bhp = bhp_in;
- int cpu = task_cpu(t);
- int nesting;
- bool ofl = cpu_is_offline(cpu);
-
- if (task_curr(t) && !ofl) {
- // If no chance of heavyweight readers, do it the hard way.
- if (!IS_ENABLED(CONFIG_TASKS_TRACE_RCU_READ_MB))
- return -EINVAL;
-
- // If heavyweight readers are enabled on the remote task,
- // we can inspect its state despite its currently running.
- // However, we cannot safely change its state.
- n_heavy_reader_attempts++;
- // Check for "running" idle tasks on offline CPUs.
- if (!rcu_watching_zero_in_eqs(cpu, &t->trc_reader_nesting))
- return -EINVAL; // No quiescent state, do it the hard way.
- n_heavy_reader_updates++;
- nesting = 0;
- } else {
- // The task is not running, so C-language access is safe.
- nesting = t->trc_reader_nesting;
- WARN_ON_ONCE(ofl && task_curr(t) && (t != idle_task(task_cpu(t))));
- if (IS_ENABLED(CONFIG_TASKS_TRACE_RCU_READ_MB) && ofl)
- n_heavy_reader_ofl_updates++;
- }
-
- // If not exiting a read-side critical section, mark as checked
- // so that the grace-period kthread will remove it from the
- // holdout list.
- if (!nesting) {
- rcu_trc_cmpxchg_need_qs(t, 0, TRC_NEED_QS_CHECKED);
- return 0; // In QS, so done.
- }
- if (nesting < 0)
- return -EINVAL; // Reader transitioning, try again later.
-
- // The task is in a read-side critical section, so set up its
- // state so that it will update state upon exit from that critical
- // section.
- if (!rcu_trc_cmpxchg_need_qs(t, 0, TRC_NEED_QS | TRC_NEED_QS_CHECKED))
- trc_add_holdout(t, bhp);
- return 0;
-}
-
-/* Attempt to extract the state for the specified task. */
-static void trc_wait_for_one_reader(struct task_struct *t,
- struct list_head *bhp)
-{
- int cpu;
-
- // If a previous IPI is still in flight, let it complete.
- if (smp_load_acquire(&t->trc_ipi_to_cpu) != -1) // Order IPI
- return;
-
- // The current task had better be in a quiescent state.
- if (t == current) {
- rcu_trc_cmpxchg_need_qs(t, 0, TRC_NEED_QS_CHECKED);
- WARN_ON_ONCE(READ_ONCE(t->trc_reader_nesting));
- return;
- }
-
- // Attempt to nail down the task for inspection.
- get_task_struct(t);
- if (!task_call_func(t, trc_inspect_reader, bhp)) {
- put_task_struct(t);
- return;
- }
- put_task_struct(t);
-
- // If this task is not yet on the holdout list, then we are in
- // an RCU read-side critical section. Otherwise, the invocation of
- // trc_add_holdout() that added it to the list did the necessary
- // get_task_struct(). Either way, the task cannot be freed out
- // from under this code.
-
- // If currently running, send an IPI, either way, add to list.
- trc_add_holdout(t, bhp);
- if (task_curr(t) &&
- time_after(jiffies + 1, rcu_tasks_trace.gp_start + rcu_task_ipi_delay)) {
- // The task is currently running, so try IPIing it.
- cpu = task_cpu(t);
-
- // If there is already an IPI outstanding, let it happen.
- if (per_cpu(trc_ipi_to_cpu, cpu) || t->trc_ipi_to_cpu >= 0)
- return;
-
- per_cpu(trc_ipi_to_cpu, cpu) = true;
- t->trc_ipi_to_cpu = cpu;
- rcu_tasks_trace.n_ipis++;
- if (smp_call_function_single(cpu, trc_read_check_handler, t, 0)) {
- // Just in case there is some other reason for
- // failure than the target CPU being offline.
- WARN_ONCE(1, "%s(): smp_call_function_single() failed for CPU: %d\n",
- __func__, cpu);
- rcu_tasks_trace.n_ipis_fails++;
- per_cpu(trc_ipi_to_cpu, cpu) = false;
- t->trc_ipi_to_cpu = -1;
- }
- }
-}
-
-/*
- * Initialize for first-round processing for the specified task.
- * Return false if task is NULL or already taken care of, true otherwise.
- */
-static bool rcu_tasks_trace_pertask_prep(struct task_struct *t, bool notself)
-{
- // During early boot when there is only the one boot CPU, there
- // is no idle task for the other CPUs. Also, the grace-period
- // kthread is always in a quiescent state. In addition, just return
- // if this task is already on the list.
- if (unlikely(t == NULL) || (t == current && notself) || !list_empty(&t->trc_holdout_list))
- return false;
-
- rcu_st_need_qs(t, 0);
- t->trc_ipi_to_cpu = -1;
- return true;
-}
-
-/* Do first-round processing for the specified task. */
-static void rcu_tasks_trace_pertask(struct task_struct *t, struct list_head *hop)
-{
- if (rcu_tasks_trace_pertask_prep(t, true))
- trc_wait_for_one_reader(t, hop);
-}
-
-/* Initialize for a new RCU-tasks-trace grace period. */
-static void rcu_tasks_trace_pregp_step(struct list_head *hop)
-{
- LIST_HEAD(blkd_tasks);
- int cpu;
- unsigned long flags;
- struct rcu_tasks_percpu *rtpcp;
- struct task_struct *t;
-
- // There shouldn't be any old IPIs, but...
- for_each_possible_cpu(cpu)
- WARN_ON_ONCE(per_cpu(trc_ipi_to_cpu, cpu));
-
- // Disable CPU hotplug across the CPU scan for the benefit of
- // any IPIs that might be needed. This also waits for all readers
- // in CPU-hotplug code paths.
- cpus_read_lock();
-
- // These rcu_tasks_trace_pertask_prep() calls are serialized to
- // allow safe access to the hop list.
- for_each_online_cpu(cpu) {
- rcu_read_lock();
- // Note that cpu_curr_snapshot() picks up the target
- // CPU's current task while its runqueue is locked with
- // an smp_mb__after_spinlock(). This ensures that either
- // the grace-period kthread will see that task's read-side
- // critical section or the task will see the updater's pre-GP
- // accesses. The trailing smp_mb() in cpu_curr_snapshot()
- // does not currently play a role other than simplify
- // that function's ordering semantics. If these simplified
- // ordering semantics continue to be redundant, that smp_mb()
- // might be removed.
- t = cpu_curr_snapshot(cpu);
- if (rcu_tasks_trace_pertask_prep(t, true))
- trc_add_holdout(t, hop);
- rcu_read_unlock();
- cond_resched_tasks_rcu_qs();
- }
-
- // Only after all running tasks have been accounted for is it
- // safe to take care of the tasks that have blocked within their
- // current RCU tasks trace read-side critical section.
- for_each_possible_cpu(cpu) {
- rtpcp = per_cpu_ptr(rcu_tasks_trace.rtpcpu, cpu);
- raw_spin_lock_irqsave_rcu_node(rtpcp, flags);
- list_splice_init(&rtpcp->rtp_blkd_tasks, &blkd_tasks);
- while (!list_empty(&blkd_tasks)) {
- rcu_read_lock();
- t = list_first_entry(&blkd_tasks, struct task_struct, trc_blkd_node);
- list_del_init(&t->trc_blkd_node);
- list_add(&t->trc_blkd_node, &rtpcp->rtp_blkd_tasks);
- raw_spin_unlock_irqrestore_rcu_node(rtpcp, flags);
- rcu_tasks_trace_pertask(t, hop);
- rcu_read_unlock();
- raw_spin_lock_irqsave_rcu_node(rtpcp, flags);
- }
- raw_spin_unlock_irqrestore_rcu_node(rtpcp, flags);
- cond_resched_tasks_rcu_qs();
- }
-
- // Re-enable CPU hotplug now that the holdout list is populated.
- cpus_read_unlock();
-}
-
-/*
- * Do intermediate processing between task and holdout scans.
- */
-static void rcu_tasks_trace_postscan(struct list_head *hop)
-{
- // Wait for late-stage exiting tasks to finish exiting.
- // These might have passed the call to exit_tasks_rcu_finish().
-
- // If you remove the following line, update rcu_trace_implies_rcu_gp()!!!
- synchronize_rcu();
- // Any tasks that exit after this point will set
- // TRC_NEED_QS_CHECKED in ->trc_reader_special.b.need_qs.
-}
-
-/* Communicate task state back to the RCU tasks trace stall warning request. */
-struct trc_stall_chk_rdr {
- int nesting;
- int ipi_to_cpu;
- u8 needqs;
-};
-
-static int trc_check_slow_task(struct task_struct *t, void *arg)
-{
- struct trc_stall_chk_rdr *trc_rdrp = arg;
-
- if (task_curr(t) && cpu_online(task_cpu(t)))
- return false; // It is running, so decline to inspect it.
- trc_rdrp->nesting = READ_ONCE(t->trc_reader_nesting);
- trc_rdrp->ipi_to_cpu = READ_ONCE(t->trc_ipi_to_cpu);
- trc_rdrp->needqs = rcu_ld_need_qs(t);
- return true;
-}
-
-/* Show the state of a task stalling the current RCU tasks trace GP. */
-static void show_stalled_task_trace(struct task_struct *t, bool *firstreport)
-{
- int cpu;
- struct trc_stall_chk_rdr trc_rdr;
- bool is_idle_tsk = is_idle_task(t);
-
- if (*firstreport) {
- pr_err("INFO: rcu_tasks_trace detected stalls on tasks:\n");
- *firstreport = false;
- }
- cpu = task_cpu(t);
- if (!task_call_func(t, trc_check_slow_task, &trc_rdr))
- pr_alert("P%d: %c%c\n",
- t->pid,
- ".I"[t->trc_ipi_to_cpu >= 0],
- ".i"[is_idle_tsk]);
- else
- pr_alert("P%d: %c%c%c%c nesting: %d%c%c cpu: %d%s\n",
- t->pid,
- ".I"[trc_rdr.ipi_to_cpu >= 0],
- ".i"[is_idle_tsk],
- ".N"[cpu >= 0 && tick_nohz_full_cpu(cpu)],
- ".B"[!!data_race(t->trc_reader_special.b.blocked)],
- trc_rdr.nesting,
- " !CN"[trc_rdr.needqs & 0x3],
- " ?"[trc_rdr.needqs > 0x3],
- cpu, cpu_online(cpu) ? "" : "(offline)");
- sched_show_task(t);
-}
-
-/* List stalled IPIs for RCU tasks trace. */
-static void show_stalled_ipi_trace(void)
-{
- int cpu;
-
- for_each_possible_cpu(cpu)
- if (per_cpu(trc_ipi_to_cpu, cpu))
- pr_alert("\tIPI outstanding to CPU %d\n", cpu);
-}
-
-/* Do one scan of the holdout list. */
-static void check_all_holdout_tasks_trace(struct list_head *hop,
- bool needreport, bool *firstreport)
-{
- struct task_struct *g, *t;
-
- // Disable CPU hotplug across the holdout list scan for IPIs.
- cpus_read_lock();
-
- list_for_each_entry_safe(t, g, hop, trc_holdout_list) {
- // If safe and needed, try to check the current task.
- if (READ_ONCE(t->trc_ipi_to_cpu) == -1 &&
- !(rcu_ld_need_qs(t) & TRC_NEED_QS_CHECKED))
- trc_wait_for_one_reader(t, hop);
-
- // If check succeeded, remove this task from the list.
- if (smp_load_acquire(&t->trc_ipi_to_cpu) == -1 &&
- rcu_ld_need_qs(t) == TRC_NEED_QS_CHECKED)
- trc_del_holdout(t);
- else if (needreport)
- show_stalled_task_trace(t, firstreport);
- cond_resched_tasks_rcu_qs();
- }
-
- // Re-enable CPU hotplug now that the holdout list scan has completed.
- cpus_read_unlock();
-
- if (needreport) {
- if (*firstreport)
- pr_err("INFO: rcu_tasks_trace detected stalls? (Late IPI?)\n");
- show_stalled_ipi_trace();
- }
-}
-
-static void rcu_tasks_trace_empty_fn(void *unused)
-{
-}
-
-/* Wait for grace period to complete and provide ordering. */
-static void rcu_tasks_trace_postgp(struct rcu_tasks *rtp)
-{
- int cpu;
-
- // Wait for any lingering IPI handlers to complete. Note that
- // if a CPU has gone offline or transitioned to userspace in the
- // meantime, all IPI handlers should have been drained beforehand.
- // Yes, this assumes that CPUs process IPIs in order. If that ever
- // changes, there will need to be a recheck and/or timed wait.
- for_each_online_cpu(cpu)
- if (WARN_ON_ONCE(smp_load_acquire(per_cpu_ptr(&trc_ipi_to_cpu, cpu))))
- smp_call_function_single(cpu, rcu_tasks_trace_empty_fn, NULL, 1);
-
- smp_mb(); // Caller's code must be ordered after wakeup.
- // Pairs with pretty much every ordering primitive.
-}
-
-/* Report any needed quiescent state for this exiting task. */
-static void exit_tasks_rcu_finish_trace(struct task_struct *t)
-{
- union rcu_special trs = READ_ONCE(t->trc_reader_special);
-
- rcu_trc_cmpxchg_need_qs(t, 0, TRC_NEED_QS_CHECKED);
- WARN_ON_ONCE(READ_ONCE(t->trc_reader_nesting));
- if (WARN_ON_ONCE(rcu_ld_need_qs(t) & TRC_NEED_QS || trs.b.blocked))
- rcu_read_unlock_trace_special(t);
- else
- WRITE_ONCE(t->trc_reader_nesting, 0);
-}
-
-/**
- * call_rcu_tasks_trace() - Queue a callback trace task-based grace period
- * @rhp: structure to be used for queueing the RCU updates.
- * @func: actual callback function to be invoked after the grace period
- *
- * The callback function will be invoked some time after a trace rcu-tasks
- * grace period elapses, in other words after all currently executing
- * trace rcu-tasks read-side critical sections have completed. These
- * read-side critical sections are delimited by calls to rcu_read_lock_trace()
- * and rcu_read_unlock_trace().
- *
- * See the description of call_rcu() for more detailed information on
- * memory ordering guarantees.
- */
-void call_rcu_tasks_trace(struct rcu_head *rhp, rcu_callback_t func)
-{
- call_rcu_tasks_generic(rhp, func, &rcu_tasks_trace);
-}
-EXPORT_SYMBOL_GPL(call_rcu_tasks_trace);
-
-/**
- * synchronize_rcu_tasks_trace - wait for a trace rcu-tasks grace period
- *
- * Control will return to the caller some time after a trace rcu-tasks
- * grace period has elapsed, in other words after all currently executing
- * trace rcu-tasks read-side critical sections have elapsed. These read-side
- * critical sections are delimited by calls to rcu_read_lock_trace()
- * and rcu_read_unlock_trace().
- *
- * This is a very specialized primitive, intended only for a few uses in
- * tracing and other situations requiring manipulation of function preambles
- * and profiling hooks. The synchronize_rcu_tasks_trace() function is not
- * (yet) intended for heavy use from multiple CPUs.
- *
- * See the description of synchronize_rcu() for more detailed information
- * on memory ordering guarantees.
- */
-void synchronize_rcu_tasks_trace(void)
-{
- RCU_LOCKDEP_WARN(lock_is_held(&rcu_trace_lock_map), "Illegal synchronize_rcu_tasks_trace() in RCU Tasks Trace read-side critical section");
- synchronize_rcu_tasks_generic(&rcu_tasks_trace);
-}
-EXPORT_SYMBOL_GPL(synchronize_rcu_tasks_trace);
-
-/**
- * rcu_barrier_tasks_trace - Wait for in-flight call_rcu_tasks_trace() callbacks.
- *
- * Although the current implementation is guaranteed to wait, it is not
- * obligated to, for example, if there are no pending callbacks.
- */
-void rcu_barrier_tasks_trace(void)
-{
- rcu_barrier_tasks_generic(&rcu_tasks_trace);
-}
-EXPORT_SYMBOL_GPL(rcu_barrier_tasks_trace);
-
-int rcu_tasks_trace_lazy_ms = -1;
-module_param(rcu_tasks_trace_lazy_ms, int, 0444);
-
-static int __init rcu_spawn_tasks_trace_kthread(void)
-{
- if (IS_ENABLED(CONFIG_TASKS_TRACE_RCU_READ_MB)) {
- rcu_tasks_trace.gp_sleep = HZ / 10;
- rcu_tasks_trace.init_fract = HZ / 10;
- } else {
- rcu_tasks_trace.gp_sleep = HZ / 200;
- if (rcu_tasks_trace.gp_sleep <= 0)
- rcu_tasks_trace.gp_sleep = 1;
- rcu_tasks_trace.init_fract = HZ / 200;
- if (rcu_tasks_trace.init_fract <= 0)
- rcu_tasks_trace.init_fract = 1;
- }
- if (rcu_tasks_trace_lazy_ms >= 0)
- rcu_tasks_trace.lazy_jiffies = msecs_to_jiffies(rcu_tasks_trace_lazy_ms);
- rcu_tasks_trace.pregp_func = rcu_tasks_trace_pregp_step;
- rcu_tasks_trace.postscan_func = rcu_tasks_trace_postscan;
- rcu_tasks_trace.holdouts_func = check_all_holdout_tasks_trace;
- rcu_tasks_trace.postgp_func = rcu_tasks_trace_postgp;
- rcu_spawn_tasks_kthread_generic(&rcu_tasks_trace);
- return 0;
-}
-
-#if !defined(CONFIG_TINY_RCU)
-void show_rcu_tasks_trace_gp_kthread(void)
-{
- char buf[64];
-
- snprintf(buf, sizeof(buf), "N%lu h:%lu/%lu/%lu",
- data_race(n_trc_holdouts),
- data_race(n_heavy_reader_ofl_updates),
- data_race(n_heavy_reader_updates),
- data_race(n_heavy_reader_attempts));
- show_rcu_tasks_generic_gp_kthread(&rcu_tasks_trace, buf);
-}
-EXPORT_SYMBOL_GPL(show_rcu_tasks_trace_gp_kthread);
-
-void rcu_tasks_trace_torture_stats_print(char *tt, char *tf)
-{
- rcu_tasks_torture_stats_print_generic(&rcu_tasks_trace, tt, tf, "");
-}
-EXPORT_SYMBOL_GPL(rcu_tasks_trace_torture_stats_print);
-#endif // !defined(CONFIG_TINY_RCU)
-
-struct task_struct *get_rcu_tasks_trace_gp_kthread(void)
-{
- return rcu_tasks_trace.kthread_ptr;
-}
-EXPORT_SYMBOL_GPL(get_rcu_tasks_trace_gp_kthread);
-
-void rcu_tasks_trace_get_gp_data(int *flags, unsigned long *gp_seq)
-{
- *flags = 0;
- *gp_seq = rcu_seq_current(&rcu_tasks_trace.tasks_gp_seq);
-}
-EXPORT_SYMBOL_GPL(rcu_tasks_trace_get_gp_data);
-
-#else /* #ifdef CONFIG_TASKS_TRACE_RCU */
-static void exit_tasks_rcu_finish_trace(struct task_struct *t) { }
-#endif /* #else #ifdef CONFIG_TASKS_TRACE_RCU */
-
#ifndef CONFIG_TINY_RCU
void show_rcu_tasks_gp_kthreads(void)
{
show_rcu_tasks_classic_gp_kthread();
show_rcu_tasks_rude_gp_kthread();
- show_rcu_tasks_trace_gp_kthread();
}
#endif /* #ifndef CONFIG_TINY_RCU */
@@ -2251,10 +1570,6 @@ void __init tasks_cblist_init_generic(void)
#ifdef CONFIG_TASKS_RUDE_RCU
cblist_init_generic(&rcu_tasks_rude);
#endif
-
-#ifdef CONFIG_TASKS_TRACE_RCU
- cblist_init_generic(&rcu_tasks_trace);
-#endif
}
static int __init rcu_init_tasks_generic(void)
@@ -2267,10 +1582,6 @@ static int __init rcu_init_tasks_generic(void)
rcu_spawn_tasks_rude_kthread();
#endif
-#ifdef CONFIG_TASKS_TRACE_RCU
- rcu_spawn_tasks_trace_kthread();
-#endif
-
// Run the self-tests.
rcu_tasks_initiate_self_tests();
@@ -2281,3 +1592,16 @@ core_initcall(rcu_init_tasks_generic);
#else /* #ifdef CONFIG_TASKS_RCU_GENERIC */
static inline void rcu_tasks_bootup_oddness(void) {}
#endif /* #else #ifdef CONFIG_TASKS_RCU_GENERIC */
+
+#ifdef CONFIG_TASKS_TRACE_RCU
+
+////////////////////////////////////////////////////////////////////////
+//
+// Tracing variant of Tasks RCU. This variant is designed to be used
+// to protect tracing hooks, including those of BPF. This variant
+// is implemented via a straightforward mapping onto SRCU-fast.
+
+DEFINE_SRCU_FAST(rcu_tasks_trace_srcu_struct);
+EXPORT_SYMBOL_GPL(rcu_tasks_trace_srcu_struct);
+
+#endif /* #else #ifdef CONFIG_TASKS_TRACE_RCU */
diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl
index c0250244cf7a..362a8d1cd327 100755
--- a/scripts/checkpatch.pl
+++ b/scripts/checkpatch.pl
@@ -863,7 +863,9 @@ our %deprecated_apis = (
#These should be enough to drive away new IDR users
"DEFINE_IDR" => "DEFINE_XARRAY",
"idr_init" => "xa_init",
- "idr_init_base" => "xa_init_flags"
+ "idr_init_base" => "xa_init_flags",
+ "rcu_read_lock_trace" => "rcu_read_lock_tasks_trace",
+ "rcu_read_unlock_trace" => "rcu_read_unlock_tasks_trace",
);
#Create a search pattern for all these strings to speed up a loop below
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TRACE01 b/tools/testing/selftests/rcutorture/configs/rcu/TRACE01
index 85b407467454..18efab346381 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TRACE01
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TRACE01
@@ -10,5 +10,4 @@ CONFIG_PROVE_LOCKING=n
#CHECK#CONFIG_PROVE_RCU=n
CONFIG_FORCE_TASKS_TRACE_RCU=y
#CHECK#CONFIG_TASKS_TRACE_RCU=y
-CONFIG_TASKS_TRACE_RCU_READ_MB=y
CONFIG_RCU_EXPERT=y
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TRACE02 b/tools/testing/selftests/rcutorture/configs/rcu/TRACE02
index 9003c56cd764..8da390e82829 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TRACE02
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TRACE02
@@ -9,6 +9,5 @@ CONFIG_PROVE_LOCKING=y
#CHECK#CONFIG_PROVE_RCU=y
CONFIG_FORCE_TASKS_TRACE_RCU=y
#CHECK#CONFIG_TASKS_TRACE_RCU=y
-CONFIG_TASKS_TRACE_RCU_READ_MB=n
CONFIG_RCU_EXPERT=y
CONFIG_DEBUG_OBJECTS=y