diff options
author | Paul E. McKenney <paulmck@kernel.org> | 2022-09-16 00:29:07 +0300 |
---|---|---|
committer | Paul E. McKenney <paulmck@kernel.org> | 2022-10-21 00:39:18 +0300 |
commit | 2e83b879fb91dafe995967b46a1d38a5b0889242 (patch) | |
tree | 1a8870ca5c7cf6031d08b98b6c221dde54bc5d30 /kernel/rcu | |
parent | 5d0f5953b60f5f7a278085b55ddc73e2932f4c33 (diff) | |
download | linux-2e83b879fb91dafe995967b46a1d38a5b0889242.tar.xz |
srcu: Create an srcu_read_lock_nmisafe() and srcu_read_unlock_nmisafe()
On strict load-store architectures, the use of this_cpu_inc() by
srcu_read_lock() and srcu_read_unlock() is not NMI-safe in TREE SRCU.
To see this suppose that an NMI arrives in the middle of srcu_read_lock(),
just after it has read ->srcu_lock_count, but before it has written
the incremented value back to memory. If that NMI handler also does
srcu_read_lock() and srcu_read_lock() on that same srcu_struct structure,
then upon return from that NMI handler, the interrupted srcu_read_lock()
will overwrite the NMI handler's update to ->srcu_lock_count, but
leave unchanged the NMI handler's update by srcu_read_unlock() to
->srcu_unlock_count.
This can result in a too-short SRCU grace period, which can in turn
result in arbitrary memory corruption.
If the NMI handler instead interrupts the srcu_read_unlock(), this
can result in eternal SRCU grace periods, which is not much better.
This commit therefore creates a pair of new srcu_read_lock_nmisafe()
and srcu_read_unlock_nmisafe() functions, which allow SRCU readers in
both NMI handlers and in process and IRQ context. It is bad practice
to mix the existing and the new _nmisafe() primitives on the same
srcu_struct structure. Use one set or the other, not both.
Just to underline that "bad practice" point, using srcu_read_lock() at
process level and srcu_read_lock_nmisafe() in your NMI handler will not,
repeat NOT, work. If you do not immediately understand why this is the
case, please review the earlier paragraphs in this commit log.
[ paulmck: Apply kernel test robot feedback. ]
[ paulmck: Apply feedback from Randy Dunlap. ]
[ paulmck: Apply feedback from John Ogness. ]
[ paulmck: Apply feedback from Frederic Weisbecker. ]
Link: https://lore.kernel.org/all/20220910221947.171557773@linutronix.de/
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
Acked-by: Randy Dunlap <rdunlap@infradead.org> # build-tested
Reviewed-by: Frederic Weisbecker <frederic@kernel.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: John Ogness <john.ogness@linutronix.de>
Cc: Petr Mladek <pmladek@suse.com>
Diffstat (limited to 'kernel/rcu')
-rw-r--r-- | kernel/rcu/Kconfig | 3 | ||||
-rw-r--r-- | kernel/rcu/rcutorture.c | 11 | ||||
-rw-r--r-- | kernel/rcu/srcutree.c | 43 |
3 files changed, 51 insertions, 6 deletions
diff --git a/kernel/rcu/Kconfig b/kernel/rcu/Kconfig index d471d22a5e21..f53ad63b2bc6 100644 --- a/kernel/rcu/Kconfig +++ b/kernel/rcu/Kconfig @@ -72,6 +72,9 @@ config TREE_SRCU help This option selects the full-fledged version of SRCU. +config NEED_SRCU_NMI_SAFE + def_bool HAVE_NMI && !ARCH_HAS_NMI_SAFE_THIS_CPU_OPS && !TINY_SRCU + config TASKS_RCU_GENERIC def_bool TASKS_RCU || TASKS_RUDE_RCU || TASKS_TRACE_RCU select SRCU diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c index 503c2aa845a4..b4c74ce10225 100644 --- a/kernel/rcu/rcutorture.c +++ b/kernel/rcu/rcutorture.c @@ -615,10 +615,14 @@ static struct rcu_torture_ops rcu_busted_ops = { DEFINE_STATIC_SRCU(srcu_ctl); static struct srcu_struct srcu_ctld; static struct srcu_struct *srcu_ctlp = &srcu_ctl; +static struct rcu_torture_ops srcud_ops; static int srcu_torture_read_lock(void) __acquires(srcu_ctlp) { - return srcu_read_lock(srcu_ctlp); + if (cur_ops == &srcud_ops) + return srcu_read_lock_nmisafe(srcu_ctlp); + else + return srcu_read_lock(srcu_ctlp); } static void @@ -642,7 +646,10 @@ srcu_read_delay(struct torture_random_state *rrsp, struct rt_read_seg *rtrsp) static void srcu_torture_read_unlock(int idx) __releases(srcu_ctlp) { - srcu_read_unlock(srcu_ctlp, idx); + if (cur_ops == &srcud_ops) + srcu_read_unlock_nmisafe(srcu_ctlp, idx); + else + srcu_read_unlock(srcu_ctlp, idx); } static int torture_srcu_read_lock_held(void) diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c index 25e9458da6a2..32a94b254d29 100644 --- a/kernel/rcu/srcutree.c +++ b/kernel/rcu/srcutree.c @@ -654,6 +654,41 @@ void __srcu_read_unlock(struct srcu_struct *ssp, int idx) } EXPORT_SYMBOL_GPL(__srcu_read_unlock); +#ifdef CONFIG_NEED_SRCU_NMI_SAFE + +/* + * Counts the new reader in the appropriate per-CPU element of the + * srcu_struct, but in an NMI-safe manner using RMW atomics. + * Returns an index that must be passed to the matching srcu_read_unlock(). + */ +int __srcu_read_lock_nmisafe(struct srcu_struct *ssp) +{ + int idx; + struct srcu_data *sdp = raw_cpu_ptr(ssp->sda); + + idx = READ_ONCE(ssp->srcu_idx) & 0x1; + atomic_long_inc(&sdp->srcu_lock_count[idx]); + smp_mb__after_atomic(); /* B */ /* Avoid leaking the critical section. */ + return idx; +} +EXPORT_SYMBOL_GPL(__srcu_read_lock_nmisafe); + +/* + * Removes the count for the old reader from the appropriate per-CPU + * element of the srcu_struct. Note that this may well be a different + * CPU than that which was incremented by the corresponding srcu_read_lock(). + */ +void __srcu_read_unlock_nmisafe(struct srcu_struct *ssp, int idx) +{ + struct srcu_data *sdp = raw_cpu_ptr(ssp->sda); + + smp_mb__before_atomic(); /* C */ /* Avoid leaking the critical section. */ + atomic_long_inc(&sdp->srcu_unlock_count[idx]); +} +EXPORT_SYMBOL_GPL(__srcu_read_unlock_nmisafe); + +#endif // CONFIG_NEED_SRCU_NMI_SAFE + /* * Start an SRCU grace period. */ @@ -1090,7 +1125,7 @@ static unsigned long srcu_gp_start_if_needed(struct srcu_struct *ssp, int ss_state; check_init_srcu_struct(ssp); - idx = srcu_read_lock(ssp); + idx = __srcu_read_lock_nmisafe(ssp); ss_state = smp_load_acquire(&ssp->srcu_size_state); if (ss_state < SRCU_SIZE_WAIT_CALL) sdp = per_cpu_ptr(ssp->sda, 0); @@ -1123,7 +1158,7 @@ static unsigned long srcu_gp_start_if_needed(struct srcu_struct *ssp, srcu_funnel_gp_start(ssp, sdp, s, do_norm); else if (needexp) srcu_funnel_exp_start(ssp, sdp_mynode, s); - srcu_read_unlock(ssp, idx); + __srcu_read_unlock_nmisafe(ssp, idx); return s; } @@ -1427,13 +1462,13 @@ void srcu_barrier(struct srcu_struct *ssp) /* Initial count prevents reaching zero until all CBs are posted. */ atomic_set(&ssp->srcu_barrier_cpu_cnt, 1); - idx = srcu_read_lock(ssp); + idx = __srcu_read_lock_nmisafe(ssp); if (smp_load_acquire(&ssp->srcu_size_state) < SRCU_SIZE_WAIT_BARRIER) srcu_barrier_one_cpu(ssp, per_cpu_ptr(ssp->sda, 0)); else for_each_possible_cpu(cpu) srcu_barrier_one_cpu(ssp, per_cpu_ptr(ssp->sda, cpu)); - srcu_read_unlock(ssp, idx); + __srcu_read_unlock_nmisafe(ssp, idx); /* Remove the initial count, at which point reaching zero can happen. */ if (atomic_dec_and_test(&ssp->srcu_barrier_cpu_cnt)) |