From 2f1f043e7bea3fbf4c1869df2f7a0312bc8ca2bf Mon Sep 17 00:00:00 2001 From: Boqun Feng Date: Thu, 12 Jan 2023 22:59:53 -0800 Subject: locking/lockdep: Introduce lock_sync() Currently, functions like synchronize_srcu() do not have lockdep annotations resembling those of other write-side locking primitives. Such annotations might look as follows: lock_acquire(); lock_release(); Such annotations would tell lockdep that synchronize_srcu() acts like an empty critical section that waits for other (read-side) critical sections to finish. This would definitely catch some deadlock, but as pointed out by Paul Mckenney [1], this could also introduce false positives because of irq-safe/unsafe detection. Of course, there are tricks could help with this: might_sleep(); // Existing statement in __synchronize_srcu(). if (IS_ENABLED(CONFIG_PROVE_LOCKING)) { local_irq_disable(); lock_acquire(); lock_release(); local_irq_enable(); } But it would be better for lockdep to provide a separate annonation for functions like synchronize_srcu(), so that people won't need to repeat the ugly tricks above. Therefore introduce lock_sync(), which is simply an lock+unlock pair with no irq safe/unsafe deadlock check. This works because the to-be-annontated functions do not create real critical sections, and there is therefore no way that irq can create extra dependencies. [1]: https://lore.kernel.org/lkml/20180412021233.ewncg5jjuzjw3x62@tardis/ Signed-off-by: Boqun Feng Acked-by: Waiman Long Signed-off-by: Paul E. McKenney [ boqun: Fix typos reported by Davidlohr Bueso and Paul E. Mckenney ] Acked-by: Peter Zijlstra (Intel) Signed-off-by: Boqun Feng --- include/linux/lockdep.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h index 1023f349af71..14d9dbedc6c1 100644 --- a/include/linux/lockdep.h +++ b/include/linux/lockdep.h @@ -268,6 +268,10 @@ extern void lock_acquire(struct lockdep_map *lock, unsigned int subclass, extern void lock_release(struct lockdep_map *lock, unsigned long ip); +extern void lock_sync(struct lockdep_map *lock, unsigned int subclass, + int read, int check, struct lockdep_map *nest_lock, + unsigned long ip); + /* lock_is_held_type() returns */ #define LOCK_STATE_UNKNOWN -1 #define LOCK_STATE_NOT_HELD 0 @@ -554,6 +558,7 @@ do { \ #define lock_map_acquire_read(l) lock_acquire_shared_recursive(l, 0, 0, NULL, _THIS_IP_) #define lock_map_acquire_tryread(l) lock_acquire_shared_recursive(l, 0, 1, NULL, _THIS_IP_) #define lock_map_release(l) lock_release(l, _THIS_IP_) +#define lock_map_sync(l) lock_sync(l, 0, 0, 1, NULL, _THIS_IP_) #ifdef CONFIG_PROVE_LOCKING # define might_lock(lock) \ -- cgit v1.2.3 From f0f44752f5f61ee4e3bd88ae033fdb888320aafe Mon Sep 17 00:00:00 2001 From: Boqun Feng Date: Thu, 12 Jan 2023 22:59:54 -0800 Subject: rcu: Annotate SRCU's update-side lockdep dependencies Although all flavors of RCU readers are annotated correctly with lockdep as recursive read locks, they do not set the lock_acquire 'check' parameter. This means that RCU read locks are not added to the lockdep dependency graph, which in turn means that lockdep cannot detect RCU-based deadlocks. This is not a problem for RCU flavors having atomic read-side critical sections because context-based annotations can catch these deadlocks, see for example the RCU_LOCKDEP_WARN() statement in synchronize_rcu(). But context-based annotations are not helpful for sleepable RCU, especially given that it is perfectly legal to do synchronize_srcu(&srcu1) within an srcu_read_lock(&srcu2). However, we can detect SRCU-based by: (1) Making srcu_read_lock() a 'check'ed recursive read lock and (2) Making synchronize_srcu() a empty write lock critical section. Even better, with the newly introduced lock_sync(), we can avoid false positives about irq-unsafe/safe. This commit therefore makes it so. Note that NMI-safe SRCU read side critical sections are currently not annotated, but might be annotated in the future. Signed-off-by: Boqun Feng Signed-off-by: Paul E. McKenney [ boqun: Add comments for annotation per Waiman's suggestion ] [ boqun: Fix comment warning reported by Stephen Rothwell ] Acked-by: Peter Zijlstra (Intel) Signed-off-by: Boqun Feng --- include/linux/srcu.h | 34 ++++++++++++++++++++++++++++++++-- kernel/rcu/srcutiny.c | 2 ++ kernel/rcu/srcutree.c | 2 ++ 3 files changed, 36 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/srcu.h b/include/linux/srcu.h index 74796cd7e7a9..41c4b26fb1c1 100644 --- a/include/linux/srcu.h +++ b/include/linux/srcu.h @@ -102,6 +102,32 @@ static inline int srcu_read_lock_held(const struct srcu_struct *ssp) return lock_is_held(&ssp->dep_map); } +/* + * Annotations provide deadlock detection for SRCU. + * + * Similar to other lockdep annotations, except there is an additional + * srcu_lock_sync(), which is basically an empty *write*-side critical section, + * see lock_sync() for more information. + */ + +/* Annotates a srcu_read_lock() */ +static inline void srcu_lock_acquire(struct lockdep_map *map) +{ + lock_map_acquire_read(map); +} + +/* Annotates a srcu_read_lock() */ +static inline void srcu_lock_release(struct lockdep_map *map) +{ + lock_map_release(map); +} + +/* Annotates a synchronize_srcu() */ +static inline void srcu_lock_sync(struct lockdep_map *map) +{ + lock_map_sync(map); +} + #else /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */ static inline int srcu_read_lock_held(const struct srcu_struct *ssp) @@ -109,6 +135,10 @@ static inline int srcu_read_lock_held(const struct srcu_struct *ssp) return 1; } +#define srcu_lock_acquire(m) do { } while (0) +#define srcu_lock_release(m) do { } while (0) +#define srcu_lock_sync(m) do { } while (0) + #endif /* #else #ifdef CONFIG_DEBUG_LOCK_ALLOC */ #define SRCU_NMI_UNKNOWN 0x0 @@ -182,7 +212,7 @@ static inline int srcu_read_lock(struct srcu_struct *ssp) __acquires(ssp) srcu_check_nmi_safety(ssp, false); retval = __srcu_read_lock(ssp); - rcu_lock_acquire(&(ssp)->dep_map); + srcu_lock_acquire(&(ssp)->dep_map); return retval; } @@ -254,7 +284,7 @@ static inline void srcu_read_unlock(struct srcu_struct *ssp, int idx) { WARN_ON_ONCE(idx & ~0x1); srcu_check_nmi_safety(ssp, false); - rcu_lock_release(&(ssp)->dep_map); + srcu_lock_release(&(ssp)->dep_map); __srcu_read_unlock(ssp, idx); } diff --git a/kernel/rcu/srcutiny.c b/kernel/rcu/srcutiny.c index b12fb0cec44d..336af24e0fe3 100644 --- a/kernel/rcu/srcutiny.c +++ b/kernel/rcu/srcutiny.c @@ -197,6 +197,8 @@ void synchronize_srcu(struct srcu_struct *ssp) { struct rcu_synchronize rs; + srcu_lock_sync(&ssp->dep_map); + RCU_LOCKDEP_WARN(lockdep_is_held(ssp) || lock_is_held(&rcu_bh_lock_map) || lock_is_held(&rcu_lock_map) || diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c index ab4ee58af84b..c541b82646b6 100644 --- a/kernel/rcu/srcutree.c +++ b/kernel/rcu/srcutree.c @@ -1307,6 +1307,8 @@ static void __synchronize_srcu(struct srcu_struct *ssp, bool do_norm) { struct rcu_synchronize rcu; + srcu_lock_sync(&ssp->dep_map); + RCU_LOCKDEP_WARN(lockdep_is_held(ssp) || lock_is_held(&rcu_bh_lock_map) || lock_is_held(&rcu_lock_map) || -- cgit v1.2.3 From 0471db447cb7de56bbe2fedd9256b4d2b8ef642a Mon Sep 17 00:00:00 2001 From: Boqun Feng Date: Fri, 13 Jan 2023 15:57:22 -0800 Subject: locking/lockdep: Improve the deadlock scenario print for sync and read lock Lock scenario print is always a weak spot of lockdep splats. Improvement can be made if we rework the dependency search and the error printing. However without touching the graph search, we can improve a little for the circular deadlock case, since we have the to-be-added lock dependency, and know whether these two locks are read/write/sync. In order to know whether a held_lock is sync or not, a bit was "stolen" from ->references, which reduce our limit for the same lock class nesting from 2^12 to 2^11, and it should still be good enough. Besides, since we now have bit in held_lock for sync, we don't need the "hardirqoffs being 1" trick, and also we can avoid the __lock_release() if we jump out of __lock_acquire() before the held_lock stored. With these changes, a deadlock case evolved with read lock and sync gets a better print-out from: [...] Possible unsafe locking scenario: [...] [...] CPU0 CPU1 [...] ---- ---- [...] lock(srcuA); [...] lock(srcuB); [...] lock(srcuA); [...] lock(srcuB); to [...] Possible unsafe locking scenario: [...] [...] CPU0 CPU1 [...] ---- ---- [...] rlock(srcuA); [...] lock(srcuB); [...] lock(srcuA); [...] sync(srcuB); Signed-off-by: Boqun Feng Signed-off-by: Paul E. McKenney Acked-by: Peter Zijlstra (Intel) Signed-off-by: Boqun Feng --- include/linux/lockdep.h | 3 ++- kernel/locking/lockdep.c | 48 ++++++++++++++++++++++++++++++++---------------- 2 files changed, 34 insertions(+), 17 deletions(-) (limited to 'include/linux') diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h index 14d9dbedc6c1..b32256e9e944 100644 --- a/include/linux/lockdep.h +++ b/include/linux/lockdep.h @@ -134,7 +134,8 @@ struct held_lock { unsigned int read:2; /* see lock_acquire() comment */ unsigned int check:1; /* see lock_acquire() comment */ unsigned int hardirqs_off:1; - unsigned int references:12; /* 32 bits */ + unsigned int sync:1; + unsigned int references:11; /* 32 bits */ unsigned int pin_count; }; diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index 3ee3b278789d..dcd1d5bfc1e0 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c @@ -1881,6 +1881,8 @@ print_circular_lock_scenario(struct held_lock *src, struct lock_class *source = hlock_class(src); struct lock_class *target = hlock_class(tgt); struct lock_class *parent = prt->class; + int src_read = src->read; + int tgt_read = tgt->read; /* * A direct locking problem where unsafe_class lock is taken @@ -1908,7 +1910,10 @@ print_circular_lock_scenario(struct held_lock *src, printk(" Possible unsafe locking scenario:\n\n"); printk(" CPU0 CPU1\n"); printk(" ---- ----\n"); - printk(" lock("); + if (tgt_read != 0) + printk(" rlock("); + else + printk(" lock("); __print_lock_name(target); printk(KERN_CONT ");\n"); printk(" lock("); @@ -1917,7 +1922,12 @@ print_circular_lock_scenario(struct held_lock *src, printk(" lock("); __print_lock_name(target); printk(KERN_CONT ");\n"); - printk(" lock("); + if (src_read != 0) + printk(" rlock("); + else if (src->sync) + printk(" sync("); + else + printk(" lock("); __print_lock_name(source); printk(KERN_CONT ");\n"); printk("\n *** DEADLOCK ***\n\n"); @@ -4531,7 +4541,13 @@ mark_usage(struct task_struct *curr, struct held_lock *hlock, int check) return 0; } } - if (!hlock->hardirqs_off) { + + /* + * For lock_sync(), don't mark the ENABLED usage, since lock_sync() + * creates no critical section and no extra dependency can be introduced + * by interrupts + */ + if (!hlock->hardirqs_off && !hlock->sync) { if (hlock->read) { if (!mark_lock(curr, hlock, LOCK_ENABLED_HARDIRQ_READ)) @@ -4910,7 +4926,7 @@ static int __lock_is_held(const struct lockdep_map *lock, int read); static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass, int trylock, int read, int check, int hardirqs_off, struct lockdep_map *nest_lock, unsigned long ip, - int references, int pin_count) + int references, int pin_count, int sync) { struct task_struct *curr = current; struct lock_class *class = NULL; @@ -4961,7 +4977,8 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass, class_idx = class - lock_classes; - if (depth) { /* we're holding locks */ + if (depth && !sync) { + /* we're holding locks and the new held lock is not a sync */ hlock = curr->held_locks + depth - 1; if (hlock->class_idx == class_idx && nest_lock) { if (!references) @@ -4995,6 +5012,7 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass, hlock->trylock = trylock; hlock->read = read; hlock->check = check; + hlock->sync = !!sync; hlock->hardirqs_off = !!hardirqs_off; hlock->references = references; #ifdef CONFIG_LOCK_STAT @@ -5056,6 +5074,10 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass, if (!validate_chain(curr, hlock, chain_head, chain_key)) return 0; + /* For lock_sync(), we are done here since no actual critical section */ + if (hlock->sync) + return 1; + curr->curr_chain_key = chain_key; curr->lockdep_depth++; check_chain_key(curr); @@ -5197,7 +5219,7 @@ static int reacquire_held_locks(struct task_struct *curr, unsigned int depth, hlock->read, hlock->check, hlock->hardirqs_off, hlock->nest_lock, hlock->acquire_ip, - hlock->references, hlock->pin_count)) { + hlock->references, hlock->pin_count, 0)) { case 0: return 1; case 1: @@ -5667,7 +5689,7 @@ void lock_acquire(struct lockdep_map *lock, unsigned int subclass, lockdep_recursion_inc(); __lock_acquire(lock, subclass, trylock, read, check, - irqs_disabled_flags(flags), nest_lock, ip, 0, 0); + irqs_disabled_flags(flags), nest_lock, ip, 0, 0, 0); lockdep_recursion_finish(); raw_local_irq_restore(flags); } @@ -5700,11 +5722,6 @@ EXPORT_SYMBOL_GPL(lock_release); * APIs are used to wait for one or multiple critical sections (on other CPUs * or threads), and it means that calling these APIs inside these critical * sections is potential deadlock. - * - * This annotation acts as an acquire+release annotation pair with hardirqoff - * being 1. Since there's no critical section, no interrupt can create extra - * dependencies "inside" the annotation, hardirqoff == 1 allows us to avoid - * false positives. */ void lock_sync(struct lockdep_map *lock, unsigned subclass, int read, int check, struct lockdep_map *nest_lock, unsigned long ip) @@ -5718,10 +5735,9 @@ void lock_sync(struct lockdep_map *lock, unsigned subclass, int read, check_flags(flags); lockdep_recursion_inc(); - __lock_acquire(lock, subclass, 0, read, check, 1, nest_lock, ip, 0, 0); - - if (__lock_release(lock, ip)) - check_chain_key(current); + __lock_acquire(lock, subclass, 0, read, check, + irqs_disabled_flags(flags), nest_lock, ip, 0, 0, 1); + check_chain_key(current); lockdep_recursion_finish(); raw_local_irq_restore(flags); } -- cgit v1.2.3 From 3e67cb8a3c6251c86e5d058d8ee4e1909bc25af0 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 17 Mar 2023 08:06:56 -0700 Subject: srcu: Add whitespace to __SRCU_STRUCT_INIT() & __DEFINE_SRCU() This is a whitespace-only commit with no change in functionality. Its purpose is to prepare for later commits that: (1) Cause statically allocated srcu_struct structures to rely on compile-time initialization and (2) Move fields from the srcu_struct structure to a new srcu_usage structure. Cc: Christoph Hellwig Tested-by: Sachin Sant Tested-by: "Zhang, Qiang1" Tested-by: Joel Fernandes (Google) Signed-off-by: Paul E. McKenney --- include/linux/srcutree.h | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) (limited to 'include/linux') diff --git a/include/linux/srcutree.h b/include/linux/srcutree.h index 558057b517b7..488d0e5d1ba3 100644 --- a/include/linux/srcutree.h +++ b/include/linux/srcutree.h @@ -108,13 +108,13 @@ struct srcu_struct { #define SRCU_STATE_SCAN1 1 #define SRCU_STATE_SCAN2 2 -#define __SRCU_STRUCT_INIT(name, pcpu_name) \ -{ \ - .sda = &pcpu_name, \ - .lock = __SPIN_LOCK_UNLOCKED(name.lock), \ - .srcu_gp_seq_needed = -1UL, \ - .work = __DELAYED_WORK_INITIALIZER(name.work, NULL, 0), \ - __SRCU_DEP_MAP_INIT(name) \ +#define __SRCU_STRUCT_INIT(name, pcpu_name) \ +{ \ + .sda = &pcpu_name, \ + .lock = __SPIN_LOCK_UNLOCKED(name.lock), \ + .srcu_gp_seq_needed = -1UL, \ + .work = __DELAYED_WORK_INITIALIZER(name.work, NULL, 0), \ + __SRCU_DEP_MAP_INIT(name) \ } /* @@ -137,15 +137,15 @@ struct srcu_struct { * See include/linux/percpu-defs.h for the rules on per-CPU variables. */ #ifdef MODULE -# define __DEFINE_SRCU(name, is_static) \ - is_static struct srcu_struct name; \ - extern struct srcu_struct * const __srcu_struct_##name; \ - struct srcu_struct * const __srcu_struct_##name \ +# define __DEFINE_SRCU(name, is_static) \ + is_static struct srcu_struct name; \ + extern struct srcu_struct * const __srcu_struct_##name; \ + struct srcu_struct * const __srcu_struct_##name \ __section("___srcu_struct_ptrs") = &name #else -# define __DEFINE_SRCU(name, is_static) \ - static DEFINE_PER_CPU(struct srcu_data, name##_srcu_data); \ - is_static struct srcu_struct name = \ +# define __DEFINE_SRCU(name, is_static) \ + static DEFINE_PER_CPU(struct srcu_data, name##_srcu_data); \ + is_static struct srcu_struct name = \ __SRCU_STRUCT_INIT(name, name##_srcu_data) #endif #define DEFINE_SRCU(name) __DEFINE_SRCU(name, /* not static */) -- cgit v1.2.3 From f4d01a259374ef358cd6b00a96b4dfc0fb05a844 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 17 Mar 2023 13:28:04 -0700 Subject: srcu: Use static init for statically allocated in-module srcu_struct Further shrinking the srcu_struct structure is eased by requiring that in-module srcu_struct structures rely more heavily on static initialization. In particular, this preserves the property that a module-load-time srcu_struct initialization can fail only due to memory-allocation failure of the per-CPU srcu_data structures. It might also slightly improve robustness by keeping the number of memory allocations that must succeed down percpu_alloc() call. This is in preparation for splitting an srcu_usage structure out of the srcu_struct structure. [ paulmck: Fold in qiang1.zhang@intel.com feedback. ] Cc: Christoph Hellwig Tested-by: Sachin Sant Tested-by: "Zhang, Qiang1" Tested-by: Joel Fernandes (Google) Signed-off-by: Paul E. McKenney --- include/linux/srcutree.h | 19 ++++++++++++++----- kernel/rcu/srcutree.c | 19 +++++++++++++------ 2 files changed, 27 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/srcutree.h b/include/linux/srcutree.h index 488d0e5d1ba3..3ce6deee1dbe 100644 --- a/include/linux/srcutree.h +++ b/include/linux/srcutree.h @@ -108,15 +108,24 @@ struct srcu_struct { #define SRCU_STATE_SCAN1 1 #define SRCU_STATE_SCAN2 2 -#define __SRCU_STRUCT_INIT(name, pcpu_name) \ -{ \ - .sda = &pcpu_name, \ +#define __SRCU_STRUCT_INIT_COMMON(name) \ .lock = __SPIN_LOCK_UNLOCKED(name.lock), \ .srcu_gp_seq_needed = -1UL, \ .work = __DELAYED_WORK_INITIALIZER(name.work, NULL, 0), \ - __SRCU_DEP_MAP_INIT(name) \ + __SRCU_DEP_MAP_INIT(name) + +#define __SRCU_STRUCT_INIT_MODULE(name) \ +{ \ + __SRCU_STRUCT_INIT_COMMON(name) \ } +#define __SRCU_STRUCT_INIT(name, pcpu_name) \ +{ \ + .sda = &pcpu_name, \ + __SRCU_STRUCT_INIT_COMMON(name) \ +} + + /* * Define and initialize a srcu struct at build time. * Do -not- call init_srcu_struct() nor cleanup_srcu_struct() on it. @@ -138,7 +147,7 @@ struct srcu_struct { */ #ifdef MODULE # define __DEFINE_SRCU(name, is_static) \ - is_static struct srcu_struct name; \ + is_static struct srcu_struct name = __SRCU_STRUCT_INIT_MODULE(name); \ extern struct srcu_struct * const __srcu_struct_##name; \ struct srcu_struct * const __srcu_struct_##name \ __section("___srcu_struct_ptrs") = &name diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c index ab4ee58af84b..7e6e7dfb1a87 100644 --- a/kernel/rcu/srcutree.c +++ b/kernel/rcu/srcutree.c @@ -1873,13 +1873,14 @@ void __init srcu_init(void) static int srcu_module_coming(struct module *mod) { int i; + struct srcu_struct *ssp; struct srcu_struct **sspp = mod->srcu_struct_ptrs; - int ret; for (i = 0; i < mod->num_srcu_structs; i++) { - ret = init_srcu_struct(*(sspp++)); - if (WARN_ON_ONCE(ret)) - return ret; + ssp = *(sspp++); + ssp->sda = alloc_percpu(struct srcu_data); + if (WARN_ON_ONCE(!ssp->sda)) + return -ENOMEM; } return 0; } @@ -1888,10 +1889,16 @@ static int srcu_module_coming(struct module *mod) static void srcu_module_going(struct module *mod) { int i; + struct srcu_struct *ssp; struct srcu_struct **sspp = mod->srcu_struct_ptrs; - for (i = 0; i < mod->num_srcu_structs; i++) - cleanup_srcu_struct(*(sspp++)); + for (i = 0; i < mod->num_srcu_structs; i++) { + ssp = *(sspp++); + if (!rcu_seq_state(smp_load_acquire(&ssp->srcu_gp_seq_needed)) && + !WARN_ON_ONCE(!ssp->sda_is_static)) + cleanup_srcu_struct(ssp); + free_percpu(ssp->sda); + } } /* Handle one module, either coming or going. */ -- cgit v1.2.3 From 95433f7263011e0e6e83caef85d98896dd99cab7 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 16 Mar 2023 17:58:51 -0700 Subject: srcu: Begin offloading srcu_struct fields to srcu_update MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The current srcu_struct structure is on the order of 200 bytes in size (depending on architecture and .config), which is much better than the old-style 26K bytes, but still all too inconvenient when one is trying to achieve good cache locality on a fastpath involving SRCU readers. However, only a few fields in srcu_struct are used by SRCU readers. The remaining fields could be offloaded to a new srcu_update structure, thus shrinking the srcu_struct structure down to a few tens of bytes. This commit begins this noble quest, a quest that is complicated by open-coded initialization of the srcu_struct within the srcu_notifier_head structure. This complication is addressed by updating the srcu_notifier_head structure's open coding, given that there does not appear to be a straightforward way of abstracting that initialization. This commit moves only the ->node pointer to srcu_update. Later commits will move additional fields. [ paulmck: Fold in qiang1.zhang@intel.com's memory-leak fix. ] Link: https://lore.kernel.org/all/20230320055751.4120251-1-qiang1.zhang@intel.com/ Suggested-by: Christoph Hellwig Cc: "Rafael J. Wysocki" Cc: "Michał Mirosław" Cc: Dmitry Osipenko Tested-by: Sachin Sant Tested-by: "Zhang, Qiang1" Acked-by: Rafael J. Wysocki Tested-by: Joel Fernandes (Google) Signed-off-by: Paul E. McKenney --- include/linux/notifier.h | 5 ++++- include/linux/srcutiny.h | 6 +++--- include/linux/srcutree.h | 27 ++++++++++++++++++--------- kernel/rcu/rcu.h | 6 ++++-- kernel/rcu/srcutree.c | 28 +++++++++++++++++++--------- 5 files changed, 48 insertions(+), 24 deletions(-) (limited to 'include/linux') diff --git a/include/linux/notifier.h b/include/linux/notifier.h index aef88c2d1173..2aba75145144 100644 --- a/include/linux/notifier.h +++ b/include/linux/notifier.h @@ -73,6 +73,9 @@ struct raw_notifier_head { struct srcu_notifier_head { struct mutex mutex; +#ifdef CONFIG_TREE_SRCU + struct srcu_usage srcuu; +#endif struct srcu_struct srcu; struct notifier_block __rcu *head; }; @@ -107,7 +110,7 @@ extern void srcu_init_notifier_head(struct srcu_notifier_head *nh); { \ .mutex = __MUTEX_INITIALIZER(name.mutex), \ .head = NULL, \ - .srcu = __SRCU_STRUCT_INIT(name.srcu, pcpu), \ + .srcu = __SRCU_STRUCT_INIT(name.srcu, name.srcuu, pcpu), \ } #define ATOMIC_NOTIFIER_HEAD(name) \ diff --git a/include/linux/srcutiny.h b/include/linux/srcutiny.h index 5aa5e0faf6a1..ebd72491af99 100644 --- a/include/linux/srcutiny.h +++ b/include/linux/srcutiny.h @@ -31,7 +31,7 @@ struct srcu_struct { void srcu_drive_gp(struct work_struct *wp); -#define __SRCU_STRUCT_INIT(name, __ignored) \ +#define __SRCU_STRUCT_INIT(name, __ignored, ___ignored) \ { \ .srcu_wq = __SWAIT_QUEUE_HEAD_INITIALIZER(name.srcu_wq), \ .srcu_cb_tail = &name.srcu_cb_head, \ @@ -44,9 +44,9 @@ void srcu_drive_gp(struct work_struct *wp); * Tree SRCU, which needs some per-CPU data. */ #define DEFINE_SRCU(name) \ - struct srcu_struct name = __SRCU_STRUCT_INIT(name, name) + struct srcu_struct name = __SRCU_STRUCT_INIT(name, name, name) #define DEFINE_STATIC_SRCU(name) \ - static struct srcu_struct name = __SRCU_STRUCT_INIT(name, name) + static struct srcu_struct name = __SRCU_STRUCT_INIT(name, name, name) void synchronize_srcu(struct srcu_struct *ssp); diff --git a/include/linux/srcutree.h b/include/linux/srcutree.h index 3ce6deee1dbe..276f325f1296 100644 --- a/include/linux/srcutree.h +++ b/include/linux/srcutree.h @@ -57,11 +57,17 @@ struct srcu_node { int grphi; /* Biggest CPU for node. */ }; +/* + * Per-SRCU-domain structure, update-side data linked from srcu_struct. + */ +struct srcu_usage { + struct srcu_node *node; /* Combining tree. */ +}; + /* * Per-SRCU-domain structure, similar in function to rcu_state. */ struct srcu_struct { - struct srcu_node *node; /* Combining tree. */ struct srcu_node *level[RCU_NUM_LVLS + 1]; /* First node at each level. */ int srcu_size_state; /* Small-to-big transition state. */ @@ -90,6 +96,7 @@ struct srcu_struct { unsigned long reschedule_count; struct delayed_work work; struct lockdep_map dep_map; + struct srcu_usage *srcu_sup; /* Update-side data. */ }; /* Values for size state variable (->srcu_size_state). */ @@ -108,24 +115,24 @@ struct srcu_struct { #define SRCU_STATE_SCAN1 1 #define SRCU_STATE_SCAN2 2 -#define __SRCU_STRUCT_INIT_COMMON(name) \ +#define __SRCU_STRUCT_INIT_COMMON(name, usage_name) \ .lock = __SPIN_LOCK_UNLOCKED(name.lock), \ .srcu_gp_seq_needed = -1UL, \ .work = __DELAYED_WORK_INITIALIZER(name.work, NULL, 0), \ + .srcu_sup = &usage_name, \ __SRCU_DEP_MAP_INIT(name) -#define __SRCU_STRUCT_INIT_MODULE(name) \ +#define __SRCU_STRUCT_INIT_MODULE(name, usage_name) \ { \ - __SRCU_STRUCT_INIT_COMMON(name) \ + __SRCU_STRUCT_INIT_COMMON(name, usage_name) \ } -#define __SRCU_STRUCT_INIT(name, pcpu_name) \ +#define __SRCU_STRUCT_INIT(name, usage_name, pcpu_name) \ { \ .sda = &pcpu_name, \ - __SRCU_STRUCT_INIT_COMMON(name) \ + __SRCU_STRUCT_INIT_COMMON(name, usage_name) \ } - /* * Define and initialize a srcu struct at build time. * Do -not- call init_srcu_struct() nor cleanup_srcu_struct() on it. @@ -147,15 +154,17 @@ struct srcu_struct { */ #ifdef MODULE # define __DEFINE_SRCU(name, is_static) \ - is_static struct srcu_struct name = __SRCU_STRUCT_INIT_MODULE(name); \ + static struct srcu_usage name##_srcu_usage; \ + is_static struct srcu_struct name = __SRCU_STRUCT_INIT_MODULE(name, name##_srcu_usage); \ extern struct srcu_struct * const __srcu_struct_##name; \ struct srcu_struct * const __srcu_struct_##name \ __section("___srcu_struct_ptrs") = &name #else # define __DEFINE_SRCU(name, is_static) \ static DEFINE_PER_CPU(struct srcu_data, name##_srcu_data); \ + static struct srcu_usage name##_srcu_usage; \ is_static struct srcu_struct name = \ - __SRCU_STRUCT_INIT(name, name##_srcu_data) + __SRCU_STRUCT_INIT(name, name##_srcu_usage, name##_srcu_data) #endif #define DEFINE_SRCU(name) __DEFINE_SRCU(name, /* not static */) #define DEFINE_STATIC_SRCU(name) __DEFINE_SRCU(name, static) diff --git a/kernel/rcu/rcu.h b/kernel/rcu/rcu.h index 115616ac3bfa..8d18d4bf0e29 100644 --- a/kernel/rcu/rcu.h +++ b/kernel/rcu/rcu.h @@ -341,11 +341,13 @@ extern void rcu_init_geometry(void); * specified state structure (for SRCU) or the only rcu_state structure * (for RCU). */ -#define srcu_for_each_node_breadth_first(sp, rnp) \ +#define _rcu_for_each_node_breadth_first(sp, rnp) \ for ((rnp) = &(sp)->node[0]; \ (rnp) < &(sp)->node[rcu_num_nodes]; (rnp)++) #define rcu_for_each_node_breadth_first(rnp) \ - srcu_for_each_node_breadth_first(&rcu_state, rnp) + _rcu_for_each_node_breadth_first(&rcu_state, rnp) +#define srcu_for_each_node_breadth_first(ssp, rnp) \ + _rcu_for_each_node_breadth_first(ssp->srcu_sup, rnp) /* * Scan the leaves of the rcu_node hierarchy for the rcu_state structure. diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c index 7e6e7dfb1a87..049e20dbec76 100644 --- a/kernel/rcu/srcutree.c +++ b/kernel/rcu/srcutree.c @@ -173,12 +173,12 @@ static bool init_srcu_struct_nodes(struct srcu_struct *ssp, gfp_t gfp_flags) /* Initialize geometry if it has not already been initialized. */ rcu_init_geometry(); - ssp->node = kcalloc(rcu_num_nodes, sizeof(*ssp->node), gfp_flags); - if (!ssp->node) + ssp->srcu_sup->node = kcalloc(rcu_num_nodes, sizeof(*ssp->srcu_sup->node), gfp_flags); + if (!ssp->srcu_sup->node) return false; /* Work out the overall tree geometry. */ - ssp->level[0] = &ssp->node[0]; + ssp->level[0] = &ssp->srcu_sup->node[0]; for (i = 1; i < rcu_num_lvls; i++) ssp->level[i] = ssp->level[i - 1] + num_rcu_lvl[i - 1]; rcu_init_levelspread(levelspread, num_rcu_lvl); @@ -195,7 +195,7 @@ static bool init_srcu_struct_nodes(struct srcu_struct *ssp, gfp_t gfp_flags) snp->srcu_gp_seq_needed_exp = SRCU_SNP_INIT_SEQ; snp->grplo = -1; snp->grphi = -1; - if (snp == &ssp->node[0]) { + if (snp == &ssp->srcu_sup->node[0]) { /* Root node, special case. */ snp->srcu_parent = NULL; continue; @@ -236,8 +236,12 @@ static bool init_srcu_struct_nodes(struct srcu_struct *ssp, gfp_t gfp_flags) */ static int init_srcu_struct_fields(struct srcu_struct *ssp, bool is_static) { + if (!is_static) + ssp->srcu_sup = kzalloc(sizeof(*ssp->srcu_sup), GFP_KERNEL); + if (!ssp->srcu_sup) + return -ENOMEM; ssp->srcu_size_state = SRCU_SIZE_SMALL; - ssp->node = NULL; + ssp->srcu_sup->node = NULL; mutex_init(&ssp->srcu_cb_mutex); mutex_init(&ssp->srcu_gp_mutex); ssp->srcu_idx = 0; @@ -249,8 +253,11 @@ static int init_srcu_struct_fields(struct srcu_struct *ssp, bool is_static) ssp->sda_is_static = is_static; if (!is_static) ssp->sda = alloc_percpu(struct srcu_data); - if (!ssp->sda) + if (!ssp->sda) { + if (!is_static) + kfree(ssp->srcu_sup); return -ENOMEM; + } init_srcu_struct_data(ssp); ssp->srcu_gp_seq_needed_exp = 0; ssp->srcu_last_gp_end = ktime_get_mono_fast_ns(); @@ -259,6 +266,7 @@ static int init_srcu_struct_fields(struct srcu_struct *ssp, bool is_static) if (!ssp->sda_is_static) { free_percpu(ssp->sda); ssp->sda = NULL; + kfree(ssp->srcu_sup); return -ENOMEM; } } else { @@ -656,13 +664,15 @@ void cleanup_srcu_struct(struct srcu_struct *ssp) rcu_seq_current(&ssp->srcu_gp_seq), ssp->srcu_gp_seq_needed); return; /* Caller forgot to stop doing call_srcu()? */ } + kfree(ssp->srcu_sup->node); + ssp->srcu_sup->node = NULL; + ssp->srcu_size_state = SRCU_SIZE_SMALL; if (!ssp->sda_is_static) { free_percpu(ssp->sda); ssp->sda = NULL; + kfree(ssp->srcu_sup); + ssp->srcu_sup = NULL; } - kfree(ssp->node); - ssp->node = NULL; - ssp->srcu_size_state = SRCU_SIZE_SMALL; } EXPORT_SYMBOL_GPL(cleanup_srcu_struct); -- cgit v1.2.3 From 208f41b1312443401353bec0c1939e2bfc28adce Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 17 Mar 2023 14:43:08 -0700 Subject: srcu: Move ->level from srcu_struct to srcu_usage This commit moves the ->level[] array from the srcu_struct structure to the srcu_usage structure to reduce the size of the former in order to improve cache locality. Suggested-by: Christoph Hellwig Tested-by: Sachin Sant Tested-by: "Zhang, Qiang1" Tested-by: Joel Fernandes (Google) Signed-off-by: Paul E. McKenney --- include/linux/srcutree.h | 4 ++-- kernel/rcu/srcutree.c | 14 +++++++------- 2 files changed, 9 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/srcutree.h b/include/linux/srcutree.h index 276f325f1296..c7373fe5c14b 100644 --- a/include/linux/srcutree.h +++ b/include/linux/srcutree.h @@ -62,14 +62,14 @@ struct srcu_node { */ struct srcu_usage { struct srcu_node *node; /* Combining tree. */ + struct srcu_node *level[RCU_NUM_LVLS + 1]; + /* First node at each level. */ }; /* * Per-SRCU-domain structure, similar in function to rcu_state. */ struct srcu_struct { - struct srcu_node *level[RCU_NUM_LVLS + 1]; - /* First node at each level. */ int srcu_size_state; /* Small-to-big transition state. */ struct mutex srcu_cb_mutex; /* Serialize CB preparation. */ spinlock_t __private lock; /* Protect counters and size state. */ diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c index 049e20dbec76..acb0862faafa 100644 --- a/kernel/rcu/srcutree.c +++ b/kernel/rcu/srcutree.c @@ -178,9 +178,9 @@ static bool init_srcu_struct_nodes(struct srcu_struct *ssp, gfp_t gfp_flags) return false; /* Work out the overall tree geometry. */ - ssp->level[0] = &ssp->srcu_sup->node[0]; + ssp->srcu_sup->level[0] = &ssp->srcu_sup->node[0]; for (i = 1; i < rcu_num_lvls; i++) - ssp->level[i] = ssp->level[i - 1] + num_rcu_lvl[i - 1]; + ssp->srcu_sup->level[i] = ssp->srcu_sup->level[i - 1] + num_rcu_lvl[i - 1]; rcu_init_levelspread(levelspread, num_rcu_lvl); /* Each pass through this loop initializes one srcu_node structure. */ @@ -202,10 +202,10 @@ static bool init_srcu_struct_nodes(struct srcu_struct *ssp, gfp_t gfp_flags) } /* Non-root node. */ - if (snp == ssp->level[level + 1]) + if (snp == ssp->srcu_sup->level[level + 1]) level++; - snp->srcu_parent = ssp->level[level - 1] + - (snp - ssp->level[level]) / + snp->srcu_parent = ssp->srcu_sup->level[level - 1] + + (snp - ssp->srcu_sup->level[level]) / levelspread[level - 1]; } @@ -214,7 +214,7 @@ static bool init_srcu_struct_nodes(struct srcu_struct *ssp, gfp_t gfp_flags) * leaves of the srcu_node tree. */ level = rcu_num_lvls - 1; - snp_first = ssp->level[level]; + snp_first = ssp->srcu_sup->level[level]; for_each_possible_cpu(cpu) { sdp = per_cpu_ptr(ssp->sda, cpu); sdp->mynode = &snp_first[cpu / levelspread[level]]; @@ -889,7 +889,7 @@ static void srcu_gp_end(struct srcu_struct *ssp) srcu_for_each_node_breadth_first(ssp, snp) { spin_lock_irq_rcu_node(snp); cbs = false; - last_lvl = snp >= ssp->level[rcu_num_lvls - 1]; + last_lvl = snp >= ssp->srcu_sup->level[rcu_num_lvls - 1]; if (last_lvl) cbs = ss_state < SRCU_SIZE_BIG || snp->srcu_have_cbs[idx] == gpseq; snp->srcu_have_cbs[idx] = gpseq; -- cgit v1.2.3 From a0d8cbd3821369dc9478cabd605417afb9eb24dc Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 17 Mar 2023 17:16:30 -0700 Subject: srcu: Move ->srcu_size_state from srcu_struct to srcu_usage This commit moves the ->srcu_size_state field from the srcu_struct structure to the srcu_usage structure to reduce the size of the former in order to improve cache locality. Suggested-by: Christoph Hellwig Tested-by: Sachin Sant Tested-by: "Zhang, Qiang1" Tested-by: Joel Fernandes (Google) Signed-off-by: Paul E. McKenney --- include/linux/srcutree.h | 2 +- kernel/rcu/srcutree.c | 37 +++++++++++++++++++------------------ 2 files changed, 20 insertions(+), 19 deletions(-) (limited to 'include/linux') diff --git a/include/linux/srcutree.h b/include/linux/srcutree.h index c7373fe5c14b..443d27a214ef 100644 --- a/include/linux/srcutree.h +++ b/include/linux/srcutree.h @@ -64,13 +64,13 @@ struct srcu_usage { struct srcu_node *node; /* Combining tree. */ struct srcu_node *level[RCU_NUM_LVLS + 1]; /* First node at each level. */ + int srcu_size_state; /* Small-to-big transition state. */ }; /* * Per-SRCU-domain structure, similar in function to rcu_state. */ struct srcu_struct { - int srcu_size_state; /* Small-to-big transition state. */ struct mutex srcu_cb_mutex; /* Serialize CB preparation. */ spinlock_t __private lock; /* Protect counters and size state. */ struct mutex srcu_gp_mutex; /* Serialize GP work. */ diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c index acb0862faafa..8428a184d506 100644 --- a/kernel/rcu/srcutree.c +++ b/kernel/rcu/srcutree.c @@ -225,7 +225,7 @@ static bool init_srcu_struct_nodes(struct srcu_struct *ssp, gfp_t gfp_flags) } sdp->grpmask = 1 << (cpu - sdp->mynode->grplo); } - smp_store_release(&ssp->srcu_size_state, SRCU_SIZE_WAIT_BARRIER); + smp_store_release(&ssp->srcu_sup->srcu_size_state, SRCU_SIZE_WAIT_BARRIER); return true; } @@ -240,7 +240,7 @@ static int init_srcu_struct_fields(struct srcu_struct *ssp, bool is_static) ssp->srcu_sup = kzalloc(sizeof(*ssp->srcu_sup), GFP_KERNEL); if (!ssp->srcu_sup) return -ENOMEM; - ssp->srcu_size_state = SRCU_SIZE_SMALL; + ssp->srcu_sup->srcu_size_state = SRCU_SIZE_SMALL; ssp->srcu_sup->node = NULL; mutex_init(&ssp->srcu_cb_mutex); mutex_init(&ssp->srcu_gp_mutex); @@ -261,7 +261,7 @@ static int init_srcu_struct_fields(struct srcu_struct *ssp, bool is_static) init_srcu_struct_data(ssp); ssp->srcu_gp_seq_needed_exp = 0; ssp->srcu_last_gp_end = ktime_get_mono_fast_ns(); - if (READ_ONCE(ssp->srcu_size_state) == SRCU_SIZE_SMALL && SRCU_SIZING_IS_INIT()) { + if (READ_ONCE(ssp->srcu_sup->srcu_size_state) == SRCU_SIZE_SMALL && SRCU_SIZING_IS_INIT()) { if (!init_srcu_struct_nodes(ssp, GFP_ATOMIC)) { if (!ssp->sda_is_static) { free_percpu(ssp->sda); @@ -270,7 +270,7 @@ static int init_srcu_struct_fields(struct srcu_struct *ssp, bool is_static) return -ENOMEM; } } else { - WRITE_ONCE(ssp->srcu_size_state, SRCU_SIZE_BIG); + WRITE_ONCE(ssp->srcu_sup->srcu_size_state, SRCU_SIZE_BIG); } } smp_store_release(&ssp->srcu_gp_seq_needed, 0); /* Init done. */ @@ -315,7 +315,7 @@ EXPORT_SYMBOL_GPL(init_srcu_struct); static void __srcu_transition_to_big(struct srcu_struct *ssp) { lockdep_assert_held(&ACCESS_PRIVATE(ssp, lock)); - smp_store_release(&ssp->srcu_size_state, SRCU_SIZE_ALLOC); + smp_store_release(&ssp->srcu_sup->srcu_size_state, SRCU_SIZE_ALLOC); } /* @@ -326,10 +326,10 @@ static void srcu_transition_to_big(struct srcu_struct *ssp) unsigned long flags; /* Double-checked locking on ->srcu_size-state. */ - if (smp_load_acquire(&ssp->srcu_size_state) != SRCU_SIZE_SMALL) + if (smp_load_acquire(&ssp->srcu_sup->srcu_size_state) != SRCU_SIZE_SMALL) return; spin_lock_irqsave_rcu_node(ssp, flags); - if (smp_load_acquire(&ssp->srcu_size_state) != SRCU_SIZE_SMALL) { + if (smp_load_acquire(&ssp->srcu_sup->srcu_size_state) != SRCU_SIZE_SMALL) { spin_unlock_irqrestore_rcu_node(ssp, flags); return; } @@ -345,7 +345,7 @@ static void spin_lock_irqsave_check_contention(struct srcu_struct *ssp) { unsigned long j; - if (!SRCU_SIZING_IS_CONTEND() || ssp->srcu_size_state) + if (!SRCU_SIZING_IS_CONTEND() || ssp->srcu_sup->srcu_size_state) return; j = jiffies; if (ssp->srcu_size_jiffies != j) { @@ -666,7 +666,7 @@ void cleanup_srcu_struct(struct srcu_struct *ssp) } kfree(ssp->srcu_sup->node); ssp->srcu_sup->node = NULL; - ssp->srcu_size_state = SRCU_SIZE_SMALL; + ssp->srcu_sup->srcu_size_state = SRCU_SIZE_SMALL; if (!ssp->sda_is_static) { free_percpu(ssp->sda); ssp->sda = NULL; @@ -770,7 +770,7 @@ static void srcu_gp_start(struct srcu_struct *ssp) struct srcu_data *sdp; int state; - if (smp_load_acquire(&ssp->srcu_size_state) < SRCU_SIZE_WAIT_BARRIER) + if (smp_load_acquire(&ssp->srcu_sup->srcu_size_state) < SRCU_SIZE_WAIT_BARRIER) sdp = per_cpu_ptr(ssp->sda, get_boot_cpu_id()); else sdp = this_cpu_ptr(ssp->sda); @@ -880,7 +880,7 @@ static void srcu_gp_end(struct srcu_struct *ssp) /* A new grace period can start at this point. But only one. */ /* Initiate callback invocation as needed. */ - ss_state = smp_load_acquire(&ssp->srcu_size_state); + ss_state = smp_load_acquire(&ssp->srcu_sup->srcu_size_state); if (ss_state < SRCU_SIZE_WAIT_BARRIER) { srcu_schedule_cbs_sdp(per_cpu_ptr(ssp->sda, get_boot_cpu_id()), cbdelay); @@ -940,7 +940,7 @@ static void srcu_gp_end(struct srcu_struct *ssp) if (ss_state == SRCU_SIZE_ALLOC) init_srcu_struct_nodes(ssp, GFP_KERNEL); else - smp_store_release(&ssp->srcu_size_state, ss_state + 1); + smp_store_release(&ssp->srcu_sup->srcu_size_state, ss_state + 1); } } @@ -1002,7 +1002,7 @@ static void srcu_funnel_gp_start(struct srcu_struct *ssp, struct srcu_data *sdp, unsigned long snp_seq; /* Ensure that snp node tree is fully initialized before traversing it */ - if (smp_load_acquire(&ssp->srcu_size_state) < SRCU_SIZE_WAIT_BARRIER) + if (smp_load_acquire(&ssp->srcu_sup->srcu_size_state) < SRCU_SIZE_WAIT_BARRIER) snp_leaf = NULL; else snp_leaf = sdp->mynode; @@ -1209,7 +1209,7 @@ static unsigned long srcu_gp_start_if_needed(struct srcu_struct *ssp, * sequence number cannot wrap around in the meantime. */ idx = __srcu_read_lock_nmisafe(ssp); - ss_state = smp_load_acquire(&ssp->srcu_size_state); + ss_state = smp_load_acquire(&ssp->srcu_sup->srcu_size_state); if (ss_state < SRCU_SIZE_WAIT_CALL) sdp = per_cpu_ptr(ssp->sda, get_boot_cpu_id()); else @@ -1546,7 +1546,7 @@ void srcu_barrier(struct srcu_struct *ssp) atomic_set(&ssp->srcu_barrier_cpu_cnt, 1); idx = __srcu_read_lock_nmisafe(ssp); - if (smp_load_acquire(&ssp->srcu_size_state) < SRCU_SIZE_WAIT_BARRIER) + if (smp_load_acquire(&ssp->srcu_sup->srcu_size_state) < SRCU_SIZE_WAIT_BARRIER) srcu_barrier_one_cpu(ssp, per_cpu_ptr(ssp->sda, get_boot_cpu_id())); else for_each_possible_cpu(cpu) @@ -1784,7 +1784,7 @@ void srcu_torture_stats_print(struct srcu_struct *ssp, char *tt, char *tf) int cpu; int idx; unsigned long s0 = 0, s1 = 0; - int ss_state = READ_ONCE(ssp->srcu_size_state); + int ss_state = READ_ONCE(ssp->srcu_sup->srcu_size_state); int ss_state_idx = ss_state; idx = ssp->srcu_idx & 0x1; @@ -1871,8 +1871,9 @@ void __init srcu_init(void) ssp = list_first_entry(&srcu_boot_list, struct srcu_struct, work.work.entry); list_del_init(&ssp->work.work.entry); - if (SRCU_SIZING_IS(SRCU_SIZING_INIT) && ssp->srcu_size_state == SRCU_SIZE_SMALL) - ssp->srcu_size_state = SRCU_SIZE_ALLOC; + if (SRCU_SIZING_IS(SRCU_SIZING_INIT) && + ssp->srcu_sup->srcu_size_state == SRCU_SIZE_SMALL) + ssp->srcu_sup->srcu_size_state = SRCU_SIZE_ALLOC; queue_work(rcu_gp_wq, &ssp->work.work); } } -- cgit v1.2.3 From 574dc1a7efe490dffe5c1ce0285306feec16a880 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 17 Mar 2023 17:22:27 -0700 Subject: srcu: Move ->srcu_cb_mutex from srcu_struct to srcu_usage This commit moves the ->srcu_cb_mutex field from the srcu_struct structure to the srcu_usage structure to reduce the size of the former in order to improve cache locality. Suggested-by: Christoph Hellwig Tested-by: Sachin Sant Tested-by: "Zhang, Qiang1" Tested-by: Joel Fernandes (Google) Signed-off-by: Paul E. McKenney --- include/linux/srcutree.h | 2 +- kernel/rcu/srcutree.c | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/srcutree.h b/include/linux/srcutree.h index 443d27a214ef..231de66ceb15 100644 --- a/include/linux/srcutree.h +++ b/include/linux/srcutree.h @@ -65,13 +65,13 @@ struct srcu_usage { struct srcu_node *level[RCU_NUM_LVLS + 1]; /* First node at each level. */ int srcu_size_state; /* Small-to-big transition state. */ + struct mutex srcu_cb_mutex; /* Serialize CB preparation. */ }; /* * Per-SRCU-domain structure, similar in function to rcu_state. */ struct srcu_struct { - struct mutex srcu_cb_mutex; /* Serialize CB preparation. */ spinlock_t __private lock; /* Protect counters and size state. */ struct mutex srcu_gp_mutex; /* Serialize GP work. */ unsigned int srcu_idx; /* Current rdr array element. */ diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c index 8428a184d506..1814f3bfc219 100644 --- a/kernel/rcu/srcutree.c +++ b/kernel/rcu/srcutree.c @@ -242,7 +242,7 @@ static int init_srcu_struct_fields(struct srcu_struct *ssp, bool is_static) return -ENOMEM; ssp->srcu_sup->srcu_size_state = SRCU_SIZE_SMALL; ssp->srcu_sup->node = NULL; - mutex_init(&ssp->srcu_cb_mutex); + mutex_init(&ssp->srcu_sup->srcu_cb_mutex); mutex_init(&ssp->srcu_gp_mutex); ssp->srcu_idx = 0; ssp->srcu_gp_seq = 0; @@ -861,7 +861,7 @@ static void srcu_gp_end(struct srcu_struct *ssp) int ss_state; /* Prevent more than one additional grace period. */ - mutex_lock(&ssp->srcu_cb_mutex); + mutex_lock(&ssp->srcu_sup->srcu_cb_mutex); /* End the current grace period. */ spin_lock_irq_rcu_node(ssp); @@ -921,7 +921,7 @@ static void srcu_gp_end(struct srcu_struct *ssp) } /* Callback initiation done, allow grace periods after next. */ - mutex_unlock(&ssp->srcu_cb_mutex); + mutex_unlock(&ssp->srcu_sup->srcu_cb_mutex); /* Start a new grace period if needed. */ spin_lock_irq_rcu_node(ssp); -- cgit v1.2.3 From b3fb11f7e9c3c64dd86403409a070c996d8ac081 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 17 Mar 2023 18:29:38 -0700 Subject: srcu: Move ->lock from srcu_struct to srcu_usage This commit moves the ->lock field from the srcu_struct structure to the srcu_usage structure to reduce the size of the former in order to improve cache locality. Suggested-by: Christoph Hellwig Tested-by: Sachin Sant Tested-by: "Zhang, Qiang1" Tested-by: Joel Fernandes (Google) Signed-off-by: Paul E. McKenney --- include/linux/srcutree.h | 11 ++++++---- kernel/rcu/srcutree.c | 56 ++++++++++++++++++++++++------------------------ 2 files changed, 35 insertions(+), 32 deletions(-) (limited to 'include/linux') diff --git a/include/linux/srcutree.h b/include/linux/srcutree.h index 231de66ceb15..694d87b81917 100644 --- a/include/linux/srcutree.h +++ b/include/linux/srcutree.h @@ -66,13 +66,13 @@ struct srcu_usage { /* First node at each level. */ int srcu_size_state; /* Small-to-big transition state. */ struct mutex srcu_cb_mutex; /* Serialize CB preparation. */ + spinlock_t __private lock; /* Protect counters and size state. */ }; /* * Per-SRCU-domain structure, similar in function to rcu_state. */ struct srcu_struct { - spinlock_t __private lock; /* Protect counters and size state. */ struct mutex srcu_gp_mutex; /* Serialize GP work. */ unsigned int srcu_idx; /* Current rdr array element. */ unsigned long srcu_gp_seq; /* Grace-period seq #. */ @@ -116,7 +116,6 @@ struct srcu_struct { #define SRCU_STATE_SCAN2 2 #define __SRCU_STRUCT_INIT_COMMON(name, usage_name) \ - .lock = __SPIN_LOCK_UNLOCKED(name.lock), \ .srcu_gp_seq_needed = -1UL, \ .work = __DELAYED_WORK_INITIALIZER(name.work, NULL, 0), \ .srcu_sup = &usage_name, \ @@ -154,7 +153,9 @@ struct srcu_struct { */ #ifdef MODULE # define __DEFINE_SRCU(name, is_static) \ - static struct srcu_usage name##_srcu_usage; \ + static struct srcu_usage name##_srcu_usage = { \ + .lock = __SPIN_LOCK_UNLOCKED(name.lock), \ + }; \ is_static struct srcu_struct name = __SRCU_STRUCT_INIT_MODULE(name, name##_srcu_usage); \ extern struct srcu_struct * const __srcu_struct_##name; \ struct srcu_struct * const __srcu_struct_##name \ @@ -162,7 +163,9 @@ struct srcu_struct { #else # define __DEFINE_SRCU(name, is_static) \ static DEFINE_PER_CPU(struct srcu_data, name##_srcu_data); \ - static struct srcu_usage name##_srcu_usage; \ + static struct srcu_usage name##_srcu_usage = { \ + .lock = __SPIN_LOCK_UNLOCKED(name.lock), \ + }; \ is_static struct srcu_struct name = \ __SRCU_STRUCT_INIT(name, name##_srcu_usage, name##_srcu_data) #endif diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c index c2a024a60f1a..c42248cf18f6 100644 --- a/kernel/rcu/srcutree.c +++ b/kernel/rcu/srcutree.c @@ -103,7 +103,7 @@ do { \ #define spin_trylock_irqsave_rcu_node(p, flags) \ ({ \ - bool ___locked = spin_trylock_irqsave(&ACCESS_PRIVATE(p, lock), flags); \ + bool ___locked = spin_trylock_irqsave(&ACCESS_PRIVATE(p, lock), flags); \ \ if (___locked) \ smp_mb__after_unlock_lock(); \ @@ -241,7 +241,7 @@ static int init_srcu_struct_fields(struct srcu_struct *ssp, bool is_static) if (!ssp->srcu_sup) return -ENOMEM; if (!is_static) - spin_lock_init(&ACCESS_PRIVATE(ssp, lock)); + spin_lock_init(&ACCESS_PRIVATE(ssp->srcu_sup, lock)); ssp->srcu_sup->srcu_size_state = SRCU_SIZE_SMALL; ssp->srcu_sup->node = NULL; mutex_init(&ssp->srcu_sup->srcu_cb_mutex); @@ -314,7 +314,7 @@ EXPORT_SYMBOL_GPL(init_srcu_struct); */ static void __srcu_transition_to_big(struct srcu_struct *ssp) { - lockdep_assert_held(&ACCESS_PRIVATE(ssp, lock)); + lockdep_assert_held(&ACCESS_PRIVATE(ssp->srcu_sup, lock)); smp_store_release(&ssp->srcu_sup->srcu_size_state, SRCU_SIZE_ALLOC); } @@ -328,13 +328,13 @@ static void srcu_transition_to_big(struct srcu_struct *ssp) /* Double-checked locking on ->srcu_size-state. */ if (smp_load_acquire(&ssp->srcu_sup->srcu_size_state) != SRCU_SIZE_SMALL) return; - spin_lock_irqsave_rcu_node(ssp, flags); + spin_lock_irqsave_rcu_node(ssp->srcu_sup, flags); if (smp_load_acquire(&ssp->srcu_sup->srcu_size_state) != SRCU_SIZE_SMALL) { - spin_unlock_irqrestore_rcu_node(ssp, flags); + spin_unlock_irqrestore_rcu_node(ssp->srcu_sup, flags); return; } __srcu_transition_to_big(ssp); - spin_unlock_irqrestore_rcu_node(ssp, flags); + spin_unlock_irqrestore_rcu_node(ssp->srcu_sup, flags); } /* @@ -369,9 +369,9 @@ static void spin_lock_irqsave_sdp_contention(struct srcu_data *sdp, unsigned lon if (spin_trylock_irqsave_rcu_node(sdp, *flags)) return; - spin_lock_irqsave_rcu_node(ssp, *flags); + spin_lock_irqsave_rcu_node(ssp->srcu_sup, *flags); spin_lock_irqsave_check_contention(ssp); - spin_unlock_irqrestore_rcu_node(ssp, *flags); + spin_unlock_irqrestore_rcu_node(ssp->srcu_sup, *flags); spin_lock_irqsave_rcu_node(sdp, *flags); } @@ -383,9 +383,9 @@ static void spin_lock_irqsave_sdp_contention(struct srcu_data *sdp, unsigned lon */ static void spin_lock_irqsave_ssp_contention(struct srcu_struct *ssp, unsigned long *flags) { - if (spin_trylock_irqsave_rcu_node(ssp, *flags)) + if (spin_trylock_irqsave_rcu_node(ssp->srcu_sup, *flags)) return; - spin_lock_irqsave_rcu_node(ssp, *flags); + spin_lock_irqsave_rcu_node(ssp->srcu_sup, *flags); spin_lock_irqsave_check_contention(ssp); } @@ -404,13 +404,13 @@ static void check_init_srcu_struct(struct srcu_struct *ssp) /* The smp_load_acquire() pairs with the smp_store_release(). */ if (!rcu_seq_state(smp_load_acquire(&ssp->srcu_gp_seq_needed))) /*^^^*/ return; /* Already initialized. */ - spin_lock_irqsave_rcu_node(ssp, flags); + spin_lock_irqsave_rcu_node(ssp->srcu_sup, flags); if (!rcu_seq_state(ssp->srcu_gp_seq_needed)) { - spin_unlock_irqrestore_rcu_node(ssp, flags); + spin_unlock_irqrestore_rcu_node(ssp->srcu_sup, flags); return; } init_srcu_struct_fields(ssp, true); - spin_unlock_irqrestore_rcu_node(ssp, flags); + spin_unlock_irqrestore_rcu_node(ssp->srcu_sup, flags); } /* @@ -774,7 +774,7 @@ static void srcu_gp_start(struct srcu_struct *ssp) sdp = per_cpu_ptr(ssp->sda, get_boot_cpu_id()); else sdp = this_cpu_ptr(ssp->sda); - lockdep_assert_held(&ACCESS_PRIVATE(ssp, lock)); + lockdep_assert_held(&ACCESS_PRIVATE(ssp->srcu_sup, lock)); WARN_ON_ONCE(ULONG_CMP_GE(ssp->srcu_gp_seq, ssp->srcu_gp_seq_needed)); spin_lock_rcu_node(sdp); /* Interrupts already disabled. */ rcu_segcblist_advance(&sdp->srcu_cblist, @@ -864,7 +864,7 @@ static void srcu_gp_end(struct srcu_struct *ssp) mutex_lock(&ssp->srcu_sup->srcu_cb_mutex); /* End the current grace period. */ - spin_lock_irq_rcu_node(ssp); + spin_lock_irq_rcu_node(ssp->srcu_sup); idx = rcu_seq_state(ssp->srcu_gp_seq); WARN_ON_ONCE(idx != SRCU_STATE_SCAN2); if (ULONG_CMP_LT(READ_ONCE(ssp->srcu_gp_seq), READ_ONCE(ssp->srcu_gp_seq_needed_exp))) @@ -875,7 +875,7 @@ static void srcu_gp_end(struct srcu_struct *ssp) gpseq = rcu_seq_current(&ssp->srcu_gp_seq); if (ULONG_CMP_LT(ssp->srcu_gp_seq_needed_exp, gpseq)) WRITE_ONCE(ssp->srcu_gp_seq_needed_exp, gpseq); - spin_unlock_irq_rcu_node(ssp); + spin_unlock_irq_rcu_node(ssp->srcu_sup); mutex_unlock(&ssp->srcu_gp_mutex); /* A new grace period can start at this point. But only one. */ @@ -924,15 +924,15 @@ static void srcu_gp_end(struct srcu_struct *ssp) mutex_unlock(&ssp->srcu_sup->srcu_cb_mutex); /* Start a new grace period if needed. */ - spin_lock_irq_rcu_node(ssp); + spin_lock_irq_rcu_node(ssp->srcu_sup); gpseq = rcu_seq_current(&ssp->srcu_gp_seq); if (!rcu_seq_state(gpseq) && ULONG_CMP_LT(gpseq, ssp->srcu_gp_seq_needed)) { srcu_gp_start(ssp); - spin_unlock_irq_rcu_node(ssp); + spin_unlock_irq_rcu_node(ssp->srcu_sup); srcu_reschedule(ssp, 0); } else { - spin_unlock_irq_rcu_node(ssp); + spin_unlock_irq_rcu_node(ssp->srcu_sup); } /* Transition to big if needed. */ @@ -975,7 +975,7 @@ static void srcu_funnel_exp_start(struct srcu_struct *ssp, struct srcu_node *snp spin_lock_irqsave_ssp_contention(ssp, &flags); if (ULONG_CMP_LT(ssp->srcu_gp_seq_needed_exp, s)) WRITE_ONCE(ssp->srcu_gp_seq_needed_exp, s); - spin_unlock_irqrestore_rcu_node(ssp, flags); + spin_unlock_irqrestore_rcu_node(ssp->srcu_sup, flags); } /* @@ -1064,7 +1064,7 @@ static void srcu_funnel_gp_start(struct srcu_struct *ssp, struct srcu_data *sdp, else if (list_empty(&ssp->work.work.entry)) list_add(&ssp->work.work.entry, &srcu_boot_list); } - spin_unlock_irqrestore_rcu_node(ssp, flags); + spin_unlock_irqrestore_rcu_node(ssp->srcu_sup, flags); } /* @@ -1599,17 +1599,17 @@ static void srcu_advance_state(struct srcu_struct *ssp) */ idx = rcu_seq_state(smp_load_acquire(&ssp->srcu_gp_seq)); /* ^^^ */ if (idx == SRCU_STATE_IDLE) { - spin_lock_irq_rcu_node(ssp); + spin_lock_irq_rcu_node(ssp->srcu_sup); if (ULONG_CMP_GE(ssp->srcu_gp_seq, ssp->srcu_gp_seq_needed)) { WARN_ON_ONCE(rcu_seq_state(ssp->srcu_gp_seq)); - spin_unlock_irq_rcu_node(ssp); + spin_unlock_irq_rcu_node(ssp->srcu_sup); mutex_unlock(&ssp->srcu_gp_mutex); return; } idx = rcu_seq_state(READ_ONCE(ssp->srcu_gp_seq)); if (idx == SRCU_STATE_IDLE) srcu_gp_start(ssp); - spin_unlock_irq_rcu_node(ssp); + spin_unlock_irq_rcu_node(ssp->srcu_sup); if (idx != SRCU_STATE_IDLE) { mutex_unlock(&ssp->srcu_gp_mutex); return; /* Someone else started the grace period. */ @@ -1623,10 +1623,10 @@ static void srcu_advance_state(struct srcu_struct *ssp) return; /* readers present, retry later. */ } srcu_flip(ssp); - spin_lock_irq_rcu_node(ssp); + spin_lock_irq_rcu_node(ssp->srcu_sup); rcu_seq_set_state(&ssp->srcu_gp_seq, SRCU_STATE_SCAN2); ssp->srcu_n_exp_nodelay = 0; - spin_unlock_irq_rcu_node(ssp); + spin_unlock_irq_rcu_node(ssp->srcu_sup); } if (rcu_seq_state(READ_ONCE(ssp->srcu_gp_seq)) == SRCU_STATE_SCAN2) { @@ -1710,7 +1710,7 @@ static void srcu_reschedule(struct srcu_struct *ssp, unsigned long delay) { bool pushgp = true; - spin_lock_irq_rcu_node(ssp); + spin_lock_irq_rcu_node(ssp->srcu_sup); if (ULONG_CMP_GE(ssp->srcu_gp_seq, ssp->srcu_gp_seq_needed)) { if (!WARN_ON_ONCE(rcu_seq_state(ssp->srcu_gp_seq))) { /* All requests fulfilled, time to go idle. */ @@ -1720,7 +1720,7 @@ static void srcu_reschedule(struct srcu_struct *ssp, unsigned long delay) /* Outstanding request and no GP. Start one. */ srcu_gp_start(ssp); } - spin_unlock_irq_rcu_node(ssp); + spin_unlock_irq_rcu_node(ssp->srcu_sup); if (pushgp) queue_delayed_work(rcu_gp_wq, &ssp->work, delay); -- cgit v1.2.3 From e3a6ab25cfa0fcdcb31c346b9871a566d440980d Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 17 Mar 2023 19:13:16 -0700 Subject: srcu: Move ->srcu_gp_mutex from srcu_struct to srcu_usage This commit moves the ->srcu_gp_mutex field from the srcu_struct structure to the srcu_usage structure to reduce the size of the former in order to improve cache locality. Suggested-by: Christoph Hellwig Tested-by: Sachin Sant Tested-by: "Zhang, Qiang1" Tested-by: Joel Fernandes (Google) Signed-off-by: Paul E. McKenney --- include/linux/srcutree.h | 2 +- kernel/rcu/srcutree.c | 14 +++++++------- 2 files changed, 8 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/srcutree.h b/include/linux/srcutree.h index 694d87b81917..d04e3da6181c 100644 --- a/include/linux/srcutree.h +++ b/include/linux/srcutree.h @@ -67,13 +67,13 @@ struct srcu_usage { int srcu_size_state; /* Small-to-big transition state. */ struct mutex srcu_cb_mutex; /* Serialize CB preparation. */ spinlock_t __private lock; /* Protect counters and size state. */ + struct mutex srcu_gp_mutex; /* Serialize GP work. */ }; /* * Per-SRCU-domain structure, similar in function to rcu_state. */ struct srcu_struct { - struct mutex srcu_gp_mutex; /* Serialize GP work. */ unsigned int srcu_idx; /* Current rdr array element. */ unsigned long srcu_gp_seq; /* Grace-period seq #. */ unsigned long srcu_gp_seq_needed; /* Latest gp_seq needed. */ diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c index c42248cf18f6..a36066798de7 100644 --- a/kernel/rcu/srcutree.c +++ b/kernel/rcu/srcutree.c @@ -245,7 +245,7 @@ static int init_srcu_struct_fields(struct srcu_struct *ssp, bool is_static) ssp->srcu_sup->srcu_size_state = SRCU_SIZE_SMALL; ssp->srcu_sup->node = NULL; mutex_init(&ssp->srcu_sup->srcu_cb_mutex); - mutex_init(&ssp->srcu_gp_mutex); + mutex_init(&ssp->srcu_sup->srcu_gp_mutex); ssp->srcu_idx = 0; ssp->srcu_gp_seq = 0; ssp->srcu_barrier_seq = 0; @@ -876,7 +876,7 @@ static void srcu_gp_end(struct srcu_struct *ssp) if (ULONG_CMP_LT(ssp->srcu_gp_seq_needed_exp, gpseq)) WRITE_ONCE(ssp->srcu_gp_seq_needed_exp, gpseq); spin_unlock_irq_rcu_node(ssp->srcu_sup); - mutex_unlock(&ssp->srcu_gp_mutex); + mutex_unlock(&ssp->srcu_sup->srcu_gp_mutex); /* A new grace period can start at this point. But only one. */ /* Initiate callback invocation as needed. */ @@ -1585,7 +1585,7 @@ static void srcu_advance_state(struct srcu_struct *ssp) { int idx; - mutex_lock(&ssp->srcu_gp_mutex); + mutex_lock(&ssp->srcu_sup->srcu_gp_mutex); /* * Because readers might be delayed for an extended period after @@ -1603,7 +1603,7 @@ static void srcu_advance_state(struct srcu_struct *ssp) if (ULONG_CMP_GE(ssp->srcu_gp_seq, ssp->srcu_gp_seq_needed)) { WARN_ON_ONCE(rcu_seq_state(ssp->srcu_gp_seq)); spin_unlock_irq_rcu_node(ssp->srcu_sup); - mutex_unlock(&ssp->srcu_gp_mutex); + mutex_unlock(&ssp->srcu_sup->srcu_gp_mutex); return; } idx = rcu_seq_state(READ_ONCE(ssp->srcu_gp_seq)); @@ -1611,7 +1611,7 @@ static void srcu_advance_state(struct srcu_struct *ssp) srcu_gp_start(ssp); spin_unlock_irq_rcu_node(ssp->srcu_sup); if (idx != SRCU_STATE_IDLE) { - mutex_unlock(&ssp->srcu_gp_mutex); + mutex_unlock(&ssp->srcu_sup->srcu_gp_mutex); return; /* Someone else started the grace period. */ } } @@ -1619,7 +1619,7 @@ static void srcu_advance_state(struct srcu_struct *ssp) if (rcu_seq_state(READ_ONCE(ssp->srcu_gp_seq)) == SRCU_STATE_SCAN1) { idx = 1 ^ (ssp->srcu_idx & 1); if (!try_check_zero(ssp, idx, 1)) { - mutex_unlock(&ssp->srcu_gp_mutex); + mutex_unlock(&ssp->srcu_sup->srcu_gp_mutex); return; /* readers present, retry later. */ } srcu_flip(ssp); @@ -1637,7 +1637,7 @@ static void srcu_advance_state(struct srcu_struct *ssp) */ idx = 1 ^ (ssp->srcu_idx & 1); if (!try_check_zero(ssp, idx, 2)) { - mutex_unlock(&ssp->srcu_gp_mutex); + mutex_unlock(&ssp->srcu_sup->srcu_gp_mutex); return; /* readers present, retry later. */ } ssp->srcu_n_exp_nodelay = 0; -- cgit v1.2.3 From 03200b5ca3b4d4edf634dc052bf3b8eb8dc8bbbc Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 17 Mar 2023 19:30:50 -0700 Subject: srcu: Move grace-period fields from srcu_struct to srcu_usage This commit moves the ->srcu_gp_seq, ->srcu_gp_seq_needed, ->srcu_gp_seq_needed_exp, ->srcu_gp_start, and ->srcu_last_gp_end fields from the srcu_struct structure to the srcu_usage structure to reduce the size of the former in order to improve cache locality. Suggested-by: Christoph Hellwig Tested-by: Sachin Sant Tested-by: "Zhang, Qiang1" Tested-by: Joel Fernandes (Google) Signed-off-by: Paul E. McKenney --- include/linux/srcutree.h | 25 ++++----- kernel/rcu/srcutree.c | 128 +++++++++++++++++++++++------------------------ 2 files changed, 77 insertions(+), 76 deletions(-) (limited to 'include/linux') diff --git a/include/linux/srcutree.h b/include/linux/srcutree.h index d04e3da6181c..372e35b0e8b6 100644 --- a/include/linux/srcutree.h +++ b/include/linux/srcutree.h @@ -68,6 +68,11 @@ struct srcu_usage { struct mutex srcu_cb_mutex; /* Serialize CB preparation. */ spinlock_t __private lock; /* Protect counters and size state. */ struct mutex srcu_gp_mutex; /* Serialize GP work. */ + unsigned long srcu_gp_seq; /* Grace-period seq #. */ + unsigned long srcu_gp_seq_needed; /* Latest gp_seq needed. */ + unsigned long srcu_gp_seq_needed_exp; /* Furthest future exp GP. */ + unsigned long srcu_gp_start; /* Last GP start timestamp (jiffies) */ + unsigned long srcu_last_gp_end; /* Last GP end timestamp (ns) */ }; /* @@ -75,11 +80,6 @@ struct srcu_usage { */ struct srcu_struct { unsigned int srcu_idx; /* Current rdr array element. */ - unsigned long srcu_gp_seq; /* Grace-period seq #. */ - unsigned long srcu_gp_seq_needed; /* Latest gp_seq needed. */ - unsigned long srcu_gp_seq_needed_exp; /* Furthest future exp GP. */ - unsigned long srcu_gp_start; /* Last GP start timestamp (jiffies) */ - unsigned long srcu_last_gp_end; /* Last GP end timestamp (ns) */ unsigned long srcu_size_jiffies; /* Current contention-measurement interval. */ unsigned long srcu_n_lock_retries; /* Contention events in current interval. */ unsigned long srcu_n_exp_nodelay; /* # expedited no-delays in current GP phase. */ @@ -115,8 +115,13 @@ struct srcu_struct { #define SRCU_STATE_SCAN1 1 #define SRCU_STATE_SCAN2 2 -#define __SRCU_STRUCT_INIT_COMMON(name, usage_name) \ +#define __SRCU_USAGE_INIT(name) \ +{ \ + .lock = __SPIN_LOCK_UNLOCKED(name.lock), \ .srcu_gp_seq_needed = -1UL, \ +} + +#define __SRCU_STRUCT_INIT_COMMON(name, usage_name) \ .work = __DELAYED_WORK_INITIALIZER(name.work, NULL, 0), \ .srcu_sup = &usage_name, \ __SRCU_DEP_MAP_INIT(name) @@ -153,9 +158,7 @@ struct srcu_struct { */ #ifdef MODULE # define __DEFINE_SRCU(name, is_static) \ - static struct srcu_usage name##_srcu_usage = { \ - .lock = __SPIN_LOCK_UNLOCKED(name.lock), \ - }; \ + static struct srcu_usage name##_srcu_usage = __SRCU_USAGE_INIT(name##_srcu_usage); \ is_static struct srcu_struct name = __SRCU_STRUCT_INIT_MODULE(name, name##_srcu_usage); \ extern struct srcu_struct * const __srcu_struct_##name; \ struct srcu_struct * const __srcu_struct_##name \ @@ -163,9 +166,7 @@ struct srcu_struct { #else # define __DEFINE_SRCU(name, is_static) \ static DEFINE_PER_CPU(struct srcu_data, name##_srcu_data); \ - static struct srcu_usage name##_srcu_usage = { \ - .lock = __SPIN_LOCK_UNLOCKED(name.lock), \ - }; \ + static struct srcu_usage name##_srcu_usage = __SRCU_USAGE_INIT(name##_srcu_usage); \ is_static struct srcu_struct name = \ __SRCU_STRUCT_INIT(name, name##_srcu_usage, name##_srcu_data) #endif diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c index a36066798de7..340eb685cf64 100644 --- a/kernel/rcu/srcutree.c +++ b/kernel/rcu/srcutree.c @@ -135,8 +135,8 @@ static void init_srcu_struct_data(struct srcu_struct *ssp) spin_lock_init(&ACCESS_PRIVATE(sdp, lock)); rcu_segcblist_init(&sdp->srcu_cblist); sdp->srcu_cblist_invoking = false; - sdp->srcu_gp_seq_needed = ssp->srcu_gp_seq; - sdp->srcu_gp_seq_needed_exp = ssp->srcu_gp_seq; + sdp->srcu_gp_seq_needed = ssp->srcu_sup->srcu_gp_seq; + sdp->srcu_gp_seq_needed_exp = ssp->srcu_sup->srcu_gp_seq; sdp->mynode = NULL; sdp->cpu = cpu; INIT_WORK(&sdp->work, srcu_invoke_callbacks); @@ -247,7 +247,7 @@ static int init_srcu_struct_fields(struct srcu_struct *ssp, bool is_static) mutex_init(&ssp->srcu_sup->srcu_cb_mutex); mutex_init(&ssp->srcu_sup->srcu_gp_mutex); ssp->srcu_idx = 0; - ssp->srcu_gp_seq = 0; + ssp->srcu_sup->srcu_gp_seq = 0; ssp->srcu_barrier_seq = 0; mutex_init(&ssp->srcu_barrier_mutex); atomic_set(&ssp->srcu_barrier_cpu_cnt, 0); @@ -261,8 +261,8 @@ static int init_srcu_struct_fields(struct srcu_struct *ssp, bool is_static) return -ENOMEM; } init_srcu_struct_data(ssp); - ssp->srcu_gp_seq_needed_exp = 0; - ssp->srcu_last_gp_end = ktime_get_mono_fast_ns(); + ssp->srcu_sup->srcu_gp_seq_needed_exp = 0; + ssp->srcu_sup->srcu_last_gp_end = ktime_get_mono_fast_ns(); if (READ_ONCE(ssp->srcu_sup->srcu_size_state) == SRCU_SIZE_SMALL && SRCU_SIZING_IS_INIT()) { if (!init_srcu_struct_nodes(ssp, GFP_ATOMIC)) { if (!ssp->sda_is_static) { @@ -275,7 +275,7 @@ static int init_srcu_struct_fields(struct srcu_struct *ssp, bool is_static) WRITE_ONCE(ssp->srcu_sup->srcu_size_state, SRCU_SIZE_BIG); } } - smp_store_release(&ssp->srcu_gp_seq_needed, 0); /* Init done. */ + smp_store_release(&ssp->srcu_sup->srcu_gp_seq_needed, 0); /* Init done. */ return 0; } @@ -402,10 +402,10 @@ static void check_init_srcu_struct(struct srcu_struct *ssp) unsigned long flags; /* The smp_load_acquire() pairs with the smp_store_release(). */ - if (!rcu_seq_state(smp_load_acquire(&ssp->srcu_gp_seq_needed))) /*^^^*/ + if (!rcu_seq_state(smp_load_acquire(&ssp->srcu_sup->srcu_gp_seq_needed))) /*^^^*/ return; /* Already initialized. */ spin_lock_irqsave_rcu_node(ssp->srcu_sup, flags); - if (!rcu_seq_state(ssp->srcu_gp_seq_needed)) { + if (!rcu_seq_state(ssp->srcu_sup->srcu_gp_seq_needed)) { spin_unlock_irqrestore_rcu_node(ssp->srcu_sup, flags); return; } @@ -616,11 +616,11 @@ static unsigned long srcu_get_delay(struct srcu_struct *ssp) unsigned long j; unsigned long jbase = SRCU_INTERVAL; - if (ULONG_CMP_LT(READ_ONCE(ssp->srcu_gp_seq), READ_ONCE(ssp->srcu_gp_seq_needed_exp))) + if (ULONG_CMP_LT(READ_ONCE(ssp->srcu_sup->srcu_gp_seq), READ_ONCE(ssp->srcu_sup->srcu_gp_seq_needed_exp))) jbase = 0; - if (rcu_seq_state(READ_ONCE(ssp->srcu_gp_seq))) { + if (rcu_seq_state(READ_ONCE(ssp->srcu_sup->srcu_gp_seq))) { j = jiffies - 1; - gpstart = READ_ONCE(ssp->srcu_gp_start); + gpstart = READ_ONCE(ssp->srcu_sup->srcu_gp_start); if (time_after(j, gpstart)) jbase += j - gpstart; if (!jbase) { @@ -656,12 +656,12 @@ void cleanup_srcu_struct(struct srcu_struct *ssp) if (WARN_ON(rcu_segcblist_n_cbs(&sdp->srcu_cblist))) return; /* Forgot srcu_barrier(), so just leak it! */ } - if (WARN_ON(rcu_seq_state(READ_ONCE(ssp->srcu_gp_seq)) != SRCU_STATE_IDLE) || - WARN_ON(rcu_seq_current(&ssp->srcu_gp_seq) != ssp->srcu_gp_seq_needed) || + if (WARN_ON(rcu_seq_state(READ_ONCE(ssp->srcu_sup->srcu_gp_seq)) != SRCU_STATE_IDLE) || + WARN_ON(rcu_seq_current(&ssp->srcu_sup->srcu_gp_seq) != ssp->srcu_sup->srcu_gp_seq_needed) || WARN_ON(srcu_readers_active(ssp))) { pr_info("%s: Active srcu_struct %p read state: %d gp state: %lu/%lu\n", - __func__, ssp, rcu_seq_state(READ_ONCE(ssp->srcu_gp_seq)), - rcu_seq_current(&ssp->srcu_gp_seq), ssp->srcu_gp_seq_needed); + __func__, ssp, rcu_seq_state(READ_ONCE(ssp->srcu_sup->srcu_gp_seq)), + rcu_seq_current(&ssp->srcu_sup->srcu_gp_seq), ssp->srcu_sup->srcu_gp_seq_needed); return; /* Caller forgot to stop doing call_srcu()? */ } kfree(ssp->srcu_sup->node); @@ -775,18 +775,18 @@ static void srcu_gp_start(struct srcu_struct *ssp) else sdp = this_cpu_ptr(ssp->sda); lockdep_assert_held(&ACCESS_PRIVATE(ssp->srcu_sup, lock)); - WARN_ON_ONCE(ULONG_CMP_GE(ssp->srcu_gp_seq, ssp->srcu_gp_seq_needed)); + WARN_ON_ONCE(ULONG_CMP_GE(ssp->srcu_sup->srcu_gp_seq, ssp->srcu_sup->srcu_gp_seq_needed)); spin_lock_rcu_node(sdp); /* Interrupts already disabled. */ rcu_segcblist_advance(&sdp->srcu_cblist, - rcu_seq_current(&ssp->srcu_gp_seq)); + rcu_seq_current(&ssp->srcu_sup->srcu_gp_seq)); (void)rcu_segcblist_accelerate(&sdp->srcu_cblist, - rcu_seq_snap(&ssp->srcu_gp_seq)); + rcu_seq_snap(&ssp->srcu_sup->srcu_gp_seq)); spin_unlock_rcu_node(sdp); /* Interrupts remain disabled. */ - WRITE_ONCE(ssp->srcu_gp_start, jiffies); + WRITE_ONCE(ssp->srcu_sup->srcu_gp_start, jiffies); WRITE_ONCE(ssp->srcu_n_exp_nodelay, 0); smp_mb(); /* Order prior store to ->srcu_gp_seq_needed vs. GP start. */ - rcu_seq_start(&ssp->srcu_gp_seq); - state = rcu_seq_state(ssp->srcu_gp_seq); + rcu_seq_start(&ssp->srcu_sup->srcu_gp_seq); + state = rcu_seq_state(ssp->srcu_sup->srcu_gp_seq); WARN_ON_ONCE(state != SRCU_STATE_SCAN1); } @@ -865,16 +865,16 @@ static void srcu_gp_end(struct srcu_struct *ssp) /* End the current grace period. */ spin_lock_irq_rcu_node(ssp->srcu_sup); - idx = rcu_seq_state(ssp->srcu_gp_seq); + idx = rcu_seq_state(ssp->srcu_sup->srcu_gp_seq); WARN_ON_ONCE(idx != SRCU_STATE_SCAN2); - if (ULONG_CMP_LT(READ_ONCE(ssp->srcu_gp_seq), READ_ONCE(ssp->srcu_gp_seq_needed_exp))) + if (ULONG_CMP_LT(READ_ONCE(ssp->srcu_sup->srcu_gp_seq), READ_ONCE(ssp->srcu_sup->srcu_gp_seq_needed_exp))) cbdelay = 0; - WRITE_ONCE(ssp->srcu_last_gp_end, ktime_get_mono_fast_ns()); - rcu_seq_end(&ssp->srcu_gp_seq); - gpseq = rcu_seq_current(&ssp->srcu_gp_seq); - if (ULONG_CMP_LT(ssp->srcu_gp_seq_needed_exp, gpseq)) - WRITE_ONCE(ssp->srcu_gp_seq_needed_exp, gpseq); + WRITE_ONCE(ssp->srcu_sup->srcu_last_gp_end, ktime_get_mono_fast_ns()); + rcu_seq_end(&ssp->srcu_sup->srcu_gp_seq); + gpseq = rcu_seq_current(&ssp->srcu_sup->srcu_gp_seq); + if (ULONG_CMP_LT(ssp->srcu_sup->srcu_gp_seq_needed_exp, gpseq)) + WRITE_ONCE(ssp->srcu_sup->srcu_gp_seq_needed_exp, gpseq); spin_unlock_irq_rcu_node(ssp->srcu_sup); mutex_unlock(&ssp->srcu_sup->srcu_gp_mutex); /* A new grace period can start at this point. But only one. */ @@ -925,9 +925,9 @@ static void srcu_gp_end(struct srcu_struct *ssp) /* Start a new grace period if needed. */ spin_lock_irq_rcu_node(ssp->srcu_sup); - gpseq = rcu_seq_current(&ssp->srcu_gp_seq); + gpseq = rcu_seq_current(&ssp->srcu_sup->srcu_gp_seq); if (!rcu_seq_state(gpseq) && - ULONG_CMP_LT(gpseq, ssp->srcu_gp_seq_needed)) { + ULONG_CMP_LT(gpseq, ssp->srcu_sup->srcu_gp_seq_needed)) { srcu_gp_start(ssp); spin_unlock_irq_rcu_node(ssp->srcu_sup); srcu_reschedule(ssp, 0); @@ -960,7 +960,7 @@ static void srcu_funnel_exp_start(struct srcu_struct *ssp, struct srcu_node *snp if (snp) for (; snp != NULL; snp = snp->srcu_parent) { sgsne = READ_ONCE(snp->srcu_gp_seq_needed_exp); - if (WARN_ON_ONCE(rcu_seq_done(&ssp->srcu_gp_seq, s)) || + if (WARN_ON_ONCE(rcu_seq_done(&ssp->srcu_sup->srcu_gp_seq, s)) || (!srcu_invl_snp_seq(sgsne) && ULONG_CMP_GE(sgsne, s))) return; spin_lock_irqsave_rcu_node(snp, flags); @@ -973,8 +973,8 @@ static void srcu_funnel_exp_start(struct srcu_struct *ssp, struct srcu_node *snp spin_unlock_irqrestore_rcu_node(snp, flags); } spin_lock_irqsave_ssp_contention(ssp, &flags); - if (ULONG_CMP_LT(ssp->srcu_gp_seq_needed_exp, s)) - WRITE_ONCE(ssp->srcu_gp_seq_needed_exp, s); + if (ULONG_CMP_LT(ssp->srcu_sup->srcu_gp_seq_needed_exp, s)) + WRITE_ONCE(ssp->srcu_sup->srcu_gp_seq_needed_exp, s); spin_unlock_irqrestore_rcu_node(ssp->srcu_sup, flags); } @@ -1010,7 +1010,7 @@ static void srcu_funnel_gp_start(struct srcu_struct *ssp, struct srcu_data *sdp, if (snp_leaf) /* Each pass through the loop does one level of the srcu_node tree. */ for (snp = snp_leaf; snp != NULL; snp = snp->srcu_parent) { - if (WARN_ON_ONCE(rcu_seq_done(&ssp->srcu_gp_seq, s)) && snp != snp_leaf) + if (WARN_ON_ONCE(rcu_seq_done(&ssp->srcu_sup->srcu_gp_seq, s)) && snp != snp_leaf) return; /* GP already done and CBs recorded. */ spin_lock_irqsave_rcu_node(snp, flags); snp_seq = snp->srcu_have_cbs[idx]; @@ -1037,20 +1037,20 @@ static void srcu_funnel_gp_start(struct srcu_struct *ssp, struct srcu_data *sdp, /* Top of tree, must ensure the grace period will be started. */ spin_lock_irqsave_ssp_contention(ssp, &flags); - if (ULONG_CMP_LT(ssp->srcu_gp_seq_needed, s)) { + if (ULONG_CMP_LT(ssp->srcu_sup->srcu_gp_seq_needed, s)) { /* * Record need for grace period s. Pair with load * acquire setting up for initialization. */ - smp_store_release(&ssp->srcu_gp_seq_needed, s); /*^^^*/ + smp_store_release(&ssp->srcu_sup->srcu_gp_seq_needed, s); /*^^^*/ } - if (!do_norm && ULONG_CMP_LT(ssp->srcu_gp_seq_needed_exp, s)) - WRITE_ONCE(ssp->srcu_gp_seq_needed_exp, s); + if (!do_norm && ULONG_CMP_LT(ssp->srcu_sup->srcu_gp_seq_needed_exp, s)) + WRITE_ONCE(ssp->srcu_sup->srcu_gp_seq_needed_exp, s); /* If grace period not already in progress, start it. */ - if (!WARN_ON_ONCE(rcu_seq_done(&ssp->srcu_gp_seq, s)) && - rcu_seq_state(ssp->srcu_gp_seq) == SRCU_STATE_IDLE) { - WARN_ON_ONCE(ULONG_CMP_GE(ssp->srcu_gp_seq, ssp->srcu_gp_seq_needed)); + if (!WARN_ON_ONCE(rcu_seq_done(&ssp->srcu_sup->srcu_gp_seq, s)) && + rcu_seq_state(ssp->srcu_sup->srcu_gp_seq) == SRCU_STATE_IDLE) { + WARN_ON_ONCE(ULONG_CMP_GE(ssp->srcu_sup->srcu_gp_seq, ssp->srcu_sup->srcu_gp_seq_needed)); srcu_gp_start(ssp); // And how can that list_add() in the "else" clause @@ -1164,18 +1164,18 @@ static bool srcu_might_be_idle(struct srcu_struct *ssp) /* First, see if enough time has passed since the last GP. */ t = ktime_get_mono_fast_ns(); - tlast = READ_ONCE(ssp->srcu_last_gp_end); + tlast = READ_ONCE(ssp->srcu_sup->srcu_last_gp_end); if (exp_holdoff == 0 || time_in_range_open(t, tlast, tlast + exp_holdoff)) return false; /* Too soon after last GP. */ /* Next, check for probable idleness. */ - curseq = rcu_seq_current(&ssp->srcu_gp_seq); + curseq = rcu_seq_current(&ssp->srcu_sup->srcu_gp_seq); smp_mb(); /* Order ->srcu_gp_seq with ->srcu_gp_seq_needed. */ - if (ULONG_CMP_LT(curseq, READ_ONCE(ssp->srcu_gp_seq_needed))) + if (ULONG_CMP_LT(curseq, READ_ONCE(ssp->srcu_sup->srcu_gp_seq_needed))) return false; /* Grace period in progress, so not idle. */ smp_mb(); /* Order ->srcu_gp_seq with prior access. */ - if (curseq != rcu_seq_current(&ssp->srcu_gp_seq)) + if (curseq != rcu_seq_current(&ssp->srcu_sup->srcu_gp_seq)) return false; /* GP # changed, so not idle. */ return true; /* With reasonable probability, idle! */ } @@ -1218,8 +1218,8 @@ static unsigned long srcu_gp_start_if_needed(struct srcu_struct *ssp, if (rhp) rcu_segcblist_enqueue(&sdp->srcu_cblist, rhp); rcu_segcblist_advance(&sdp->srcu_cblist, - rcu_seq_current(&ssp->srcu_gp_seq)); - s = rcu_seq_snap(&ssp->srcu_gp_seq); + rcu_seq_current(&ssp->srcu_sup->srcu_gp_seq)); + s = rcu_seq_snap(&ssp->srcu_sup->srcu_gp_seq); (void)rcu_segcblist_accelerate(&sdp->srcu_cblist, s); if (ULONG_CMP_LT(sdp->srcu_gp_seq_needed, s)) { sdp->srcu_gp_seq_needed = s; @@ -1430,7 +1430,7 @@ unsigned long get_state_synchronize_srcu(struct srcu_struct *ssp) // Any prior manipulation of SRCU-protected data must happen // before the load from ->srcu_gp_seq. smp_mb(); - return rcu_seq_snap(&ssp->srcu_gp_seq); + return rcu_seq_snap(&ssp->srcu_sup->srcu_gp_seq); } EXPORT_SYMBOL_GPL(get_state_synchronize_srcu); @@ -1477,7 +1477,7 @@ EXPORT_SYMBOL_GPL(start_poll_synchronize_srcu); */ bool poll_state_synchronize_srcu(struct srcu_struct *ssp, unsigned long cookie) { - if (!rcu_seq_done(&ssp->srcu_gp_seq, cookie)) + if (!rcu_seq_done(&ssp->srcu_sup->srcu_gp_seq, cookie)) return false; // Ensure that the end of the SRCU grace period happens before // any subsequent code that the caller might execute. @@ -1597,16 +1597,16 @@ static void srcu_advance_state(struct srcu_struct *ssp) * The load-acquire ensures that we see the accesses performed * by the prior grace period. */ - idx = rcu_seq_state(smp_load_acquire(&ssp->srcu_gp_seq)); /* ^^^ */ + idx = rcu_seq_state(smp_load_acquire(&ssp->srcu_sup->srcu_gp_seq)); /* ^^^ */ if (idx == SRCU_STATE_IDLE) { spin_lock_irq_rcu_node(ssp->srcu_sup); - if (ULONG_CMP_GE(ssp->srcu_gp_seq, ssp->srcu_gp_seq_needed)) { - WARN_ON_ONCE(rcu_seq_state(ssp->srcu_gp_seq)); + if (ULONG_CMP_GE(ssp->srcu_sup->srcu_gp_seq, ssp->srcu_sup->srcu_gp_seq_needed)) { + WARN_ON_ONCE(rcu_seq_state(ssp->srcu_sup->srcu_gp_seq)); spin_unlock_irq_rcu_node(ssp->srcu_sup); mutex_unlock(&ssp->srcu_sup->srcu_gp_mutex); return; } - idx = rcu_seq_state(READ_ONCE(ssp->srcu_gp_seq)); + idx = rcu_seq_state(READ_ONCE(ssp->srcu_sup->srcu_gp_seq)); if (idx == SRCU_STATE_IDLE) srcu_gp_start(ssp); spin_unlock_irq_rcu_node(ssp->srcu_sup); @@ -1616,7 +1616,7 @@ static void srcu_advance_state(struct srcu_struct *ssp) } } - if (rcu_seq_state(READ_ONCE(ssp->srcu_gp_seq)) == SRCU_STATE_SCAN1) { + if (rcu_seq_state(READ_ONCE(ssp->srcu_sup->srcu_gp_seq)) == SRCU_STATE_SCAN1) { idx = 1 ^ (ssp->srcu_idx & 1); if (!try_check_zero(ssp, idx, 1)) { mutex_unlock(&ssp->srcu_sup->srcu_gp_mutex); @@ -1624,12 +1624,12 @@ static void srcu_advance_state(struct srcu_struct *ssp) } srcu_flip(ssp); spin_lock_irq_rcu_node(ssp->srcu_sup); - rcu_seq_set_state(&ssp->srcu_gp_seq, SRCU_STATE_SCAN2); + rcu_seq_set_state(&ssp->srcu_sup->srcu_gp_seq, SRCU_STATE_SCAN2); ssp->srcu_n_exp_nodelay = 0; spin_unlock_irq_rcu_node(ssp->srcu_sup); } - if (rcu_seq_state(READ_ONCE(ssp->srcu_gp_seq)) == SRCU_STATE_SCAN2) { + if (rcu_seq_state(READ_ONCE(ssp->srcu_sup->srcu_gp_seq)) == SRCU_STATE_SCAN2) { /* * SRCU read-side critical sections are normally short, @@ -1666,7 +1666,7 @@ static void srcu_invoke_callbacks(struct work_struct *work) rcu_cblist_init(&ready_cbs); spin_lock_irq_rcu_node(sdp); rcu_segcblist_advance(&sdp->srcu_cblist, - rcu_seq_current(&ssp->srcu_gp_seq)); + rcu_seq_current(&ssp->srcu_sup->srcu_gp_seq)); if (sdp->srcu_cblist_invoking || !rcu_segcblist_ready_cbs(&sdp->srcu_cblist)) { spin_unlock_irq_rcu_node(sdp); @@ -1694,7 +1694,7 @@ static void srcu_invoke_callbacks(struct work_struct *work) spin_lock_irq_rcu_node(sdp); rcu_segcblist_add_len(&sdp->srcu_cblist, -len); (void)rcu_segcblist_accelerate(&sdp->srcu_cblist, - rcu_seq_snap(&ssp->srcu_gp_seq)); + rcu_seq_snap(&ssp->srcu_sup->srcu_gp_seq)); sdp->srcu_cblist_invoking = false; more = rcu_segcblist_ready_cbs(&sdp->srcu_cblist); spin_unlock_irq_rcu_node(sdp); @@ -1711,12 +1711,12 @@ static void srcu_reschedule(struct srcu_struct *ssp, unsigned long delay) bool pushgp = true; spin_lock_irq_rcu_node(ssp->srcu_sup); - if (ULONG_CMP_GE(ssp->srcu_gp_seq, ssp->srcu_gp_seq_needed)) { - if (!WARN_ON_ONCE(rcu_seq_state(ssp->srcu_gp_seq))) { + if (ULONG_CMP_GE(ssp->srcu_sup->srcu_gp_seq, ssp->srcu_sup->srcu_gp_seq_needed)) { + if (!WARN_ON_ONCE(rcu_seq_state(ssp->srcu_sup->srcu_gp_seq))) { /* All requests fulfilled, time to go idle. */ pushgp = false; } - } else if (!rcu_seq_state(ssp->srcu_gp_seq)) { + } else if (!rcu_seq_state(ssp->srcu_sup->srcu_gp_seq)) { /* Outstanding request and no GP. Start one. */ srcu_gp_start(ssp); } @@ -1762,7 +1762,7 @@ void srcutorture_get_gp_data(enum rcutorture_type test_type, if (test_type != SRCU_FLAVOR) return; *flags = 0; - *gp_seq = rcu_seq_current(&ssp->srcu_gp_seq); + *gp_seq = rcu_seq_current(&ssp->srcu_sup->srcu_gp_seq); } EXPORT_SYMBOL_GPL(srcutorture_get_gp_data); @@ -1791,7 +1791,7 @@ void srcu_torture_stats_print(struct srcu_struct *ssp, char *tt, char *tf) if (ss_state < 0 || ss_state >= ARRAY_SIZE(srcu_size_state_name)) ss_state_idx = ARRAY_SIZE(srcu_size_state_name) - 1; pr_alert("%s%s Tree SRCU g%ld state %d (%s)", - tt, tf, rcu_seq_current(&ssp->srcu_gp_seq), ss_state, + tt, tf, rcu_seq_current(&ssp->srcu_sup->srcu_gp_seq), ss_state, srcu_size_state_name[ss_state_idx]); if (!ssp->sda) { // Called after cleanup_srcu_struct(), perhaps. @@ -1905,7 +1905,7 @@ static void srcu_module_going(struct module *mod) for (i = 0; i < mod->num_srcu_structs; i++) { ssp = *(sspp++); - if (!rcu_seq_state(smp_load_acquire(&ssp->srcu_gp_seq_needed)) && + if (!rcu_seq_state(smp_load_acquire(&ssp->srcu_sup->srcu_gp_seq_needed)) && !WARN_ON_ONCE(!ssp->sda_is_static)) cleanup_srcu_struct(ssp); free_percpu(ssp->sda); -- cgit v1.2.3 From 3b46679c623c2766f4c56fd3f9ce8edbb38c5d20 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 17 Mar 2023 20:01:02 -0700 Subject: srcu: Move heuristics fields from srcu_struct to srcu_usage This commit moves the ->srcu_size_jiffies, ->srcu_n_lock_retries, and ->srcu_n_exp_nodelay fields from the srcu_struct structure to the srcu_usage structure to reduce the size of the former in order to improve cache locality. Suggested-by: Christoph Hellwig Tested-by: Sachin Sant Tested-by: "Zhang, Qiang1" Tested-by: Joel Fernandes (Google) Signed-off-by: Paul E. McKenney --- include/linux/srcutree.h | 6 +++--- kernel/rcu/srcutree.c | 18 +++++++++--------- 2 files changed, 12 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/include/linux/srcutree.h b/include/linux/srcutree.h index 372e35b0e8b6..3023492d8d89 100644 --- a/include/linux/srcutree.h +++ b/include/linux/srcutree.h @@ -73,6 +73,9 @@ struct srcu_usage { unsigned long srcu_gp_seq_needed_exp; /* Furthest future exp GP. */ unsigned long srcu_gp_start; /* Last GP start timestamp (jiffies) */ unsigned long srcu_last_gp_end; /* Last GP end timestamp (ns) */ + unsigned long srcu_size_jiffies; /* Current contention-measurement interval. */ + unsigned long srcu_n_lock_retries; /* Contention events in current interval. */ + unsigned long srcu_n_exp_nodelay; /* # expedited no-delays in current GP phase. */ }; /* @@ -80,9 +83,6 @@ struct srcu_usage { */ struct srcu_struct { unsigned int srcu_idx; /* Current rdr array element. */ - unsigned long srcu_size_jiffies; /* Current contention-measurement interval. */ - unsigned long srcu_n_lock_retries; /* Contention events in current interval. */ - unsigned long srcu_n_exp_nodelay; /* # expedited no-delays in current GP phase. */ struct srcu_data __percpu *sda; /* Per-CPU srcu_data array. */ bool sda_is_static; /* May ->sda be passed to free_percpu()? */ unsigned long srcu_barrier_seq; /* srcu_barrier seq #. */ diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c index 340eb685cf64..291fb520bce0 100644 --- a/kernel/rcu/srcutree.c +++ b/kernel/rcu/srcutree.c @@ -348,11 +348,11 @@ static void spin_lock_irqsave_check_contention(struct srcu_struct *ssp) if (!SRCU_SIZING_IS_CONTEND() || ssp->srcu_sup->srcu_size_state) return; j = jiffies; - if (ssp->srcu_size_jiffies != j) { - ssp->srcu_size_jiffies = j; - ssp->srcu_n_lock_retries = 0; + if (ssp->srcu_sup->srcu_size_jiffies != j) { + ssp->srcu_sup->srcu_size_jiffies = j; + ssp->srcu_sup->srcu_n_lock_retries = 0; } - if (++ssp->srcu_n_lock_retries <= small_contention_lim) + if (++ssp->srcu_sup->srcu_n_lock_retries <= small_contention_lim) return; __srcu_transition_to_big(ssp); } @@ -624,8 +624,8 @@ static unsigned long srcu_get_delay(struct srcu_struct *ssp) if (time_after(j, gpstart)) jbase += j - gpstart; if (!jbase) { - WRITE_ONCE(ssp->srcu_n_exp_nodelay, READ_ONCE(ssp->srcu_n_exp_nodelay) + 1); - if (READ_ONCE(ssp->srcu_n_exp_nodelay) > srcu_max_nodelay_phase) + WRITE_ONCE(ssp->srcu_sup->srcu_n_exp_nodelay, READ_ONCE(ssp->srcu_sup->srcu_n_exp_nodelay) + 1); + if (READ_ONCE(ssp->srcu_sup->srcu_n_exp_nodelay) > srcu_max_nodelay_phase) jbase = 1; } } @@ -783,7 +783,7 @@ static void srcu_gp_start(struct srcu_struct *ssp) rcu_seq_snap(&ssp->srcu_sup->srcu_gp_seq)); spin_unlock_rcu_node(sdp); /* Interrupts remain disabled. */ WRITE_ONCE(ssp->srcu_sup->srcu_gp_start, jiffies); - WRITE_ONCE(ssp->srcu_n_exp_nodelay, 0); + WRITE_ONCE(ssp->srcu_sup->srcu_n_exp_nodelay, 0); smp_mb(); /* Order prior store to ->srcu_gp_seq_needed vs. GP start. */ rcu_seq_start(&ssp->srcu_sup->srcu_gp_seq); state = rcu_seq_state(ssp->srcu_sup->srcu_gp_seq); @@ -1625,7 +1625,7 @@ static void srcu_advance_state(struct srcu_struct *ssp) srcu_flip(ssp); spin_lock_irq_rcu_node(ssp->srcu_sup); rcu_seq_set_state(&ssp->srcu_sup->srcu_gp_seq, SRCU_STATE_SCAN2); - ssp->srcu_n_exp_nodelay = 0; + ssp->srcu_sup->srcu_n_exp_nodelay = 0; spin_unlock_irq_rcu_node(ssp->srcu_sup); } @@ -1640,7 +1640,7 @@ static void srcu_advance_state(struct srcu_struct *ssp) mutex_unlock(&ssp->srcu_sup->srcu_gp_mutex); return; /* readers present, retry later. */ } - ssp->srcu_n_exp_nodelay = 0; + ssp->srcu_sup->srcu_n_exp_nodelay = 0; srcu_gp_end(ssp); /* Releases ->srcu_gp_mutex. */ } } -- cgit v1.2.3 From 660349ac79cb22bb64c44b026d879069783e97d5 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 17 Mar 2023 20:22:58 -0700 Subject: srcu: Move ->sda_is_static from srcu_struct to srcu_usage This commit moves the ->sda_is_static field from the srcu_struct structure to the srcu_usage structure to reduce the size of the former in order to improve cache locality. Suggested-by: Christoph Hellwig Tested-by: Sachin Sant Tested-by: "Zhang, Qiang1" Tested-by: Joel Fernandes (Google) Signed-off-by: Paul E. McKenney --- include/linux/srcutree.h | 2 +- kernel/rcu/srcutree.c | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/srcutree.h b/include/linux/srcutree.h index 3023492d8d89..d3534ecb806e 100644 --- a/include/linux/srcutree.h +++ b/include/linux/srcutree.h @@ -76,6 +76,7 @@ struct srcu_usage { unsigned long srcu_size_jiffies; /* Current contention-measurement interval. */ unsigned long srcu_n_lock_retries; /* Contention events in current interval. */ unsigned long srcu_n_exp_nodelay; /* # expedited no-delays in current GP phase. */ + bool sda_is_static; /* May ->sda be passed to free_percpu()? */ }; /* @@ -84,7 +85,6 @@ struct srcu_usage { struct srcu_struct { unsigned int srcu_idx; /* Current rdr array element. */ struct srcu_data __percpu *sda; /* Per-CPU srcu_data array. */ - bool sda_is_static; /* May ->sda be passed to free_percpu()? */ unsigned long srcu_barrier_seq; /* srcu_barrier seq #. */ struct mutex srcu_barrier_mutex; /* Serialize barrier ops. */ struct completion srcu_barrier_completion; diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c index 291fb520bce0..20f2373f7e25 100644 --- a/kernel/rcu/srcutree.c +++ b/kernel/rcu/srcutree.c @@ -252,7 +252,7 @@ static int init_srcu_struct_fields(struct srcu_struct *ssp, bool is_static) mutex_init(&ssp->srcu_barrier_mutex); atomic_set(&ssp->srcu_barrier_cpu_cnt, 0); INIT_DELAYED_WORK(&ssp->work, process_srcu); - ssp->sda_is_static = is_static; + ssp->srcu_sup->sda_is_static = is_static; if (!is_static) ssp->sda = alloc_percpu(struct srcu_data); if (!ssp->sda) { @@ -265,7 +265,7 @@ static int init_srcu_struct_fields(struct srcu_struct *ssp, bool is_static) ssp->srcu_sup->srcu_last_gp_end = ktime_get_mono_fast_ns(); if (READ_ONCE(ssp->srcu_sup->srcu_size_state) == SRCU_SIZE_SMALL && SRCU_SIZING_IS_INIT()) { if (!init_srcu_struct_nodes(ssp, GFP_ATOMIC)) { - if (!ssp->sda_is_static) { + if (!ssp->srcu_sup->sda_is_static) { free_percpu(ssp->sda); ssp->sda = NULL; kfree(ssp->srcu_sup); @@ -667,7 +667,7 @@ void cleanup_srcu_struct(struct srcu_struct *ssp) kfree(ssp->srcu_sup->node); ssp->srcu_sup->node = NULL; ssp->srcu_sup->srcu_size_state = SRCU_SIZE_SMALL; - if (!ssp->sda_is_static) { + if (!ssp->srcu_sup->sda_is_static) { free_percpu(ssp->sda); ssp->sda = NULL; kfree(ssp->srcu_sup); @@ -1906,7 +1906,7 @@ static void srcu_module_going(struct module *mod) for (i = 0; i < mod->num_srcu_structs; i++) { ssp = *(sspp++); if (!rcu_seq_state(smp_load_acquire(&ssp->srcu_sup->srcu_gp_seq_needed)) && - !WARN_ON_ONCE(!ssp->sda_is_static)) + !WARN_ON_ONCE(!ssp->srcu_sup->sda_is_static)) cleanup_srcu_struct(ssp); free_percpu(ssp->sda); } -- cgit v1.2.3 From d20162e0bfc222183a7c94cd00e74b6bbf1a605b Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 17 Mar 2023 21:08:18 -0700 Subject: srcu: Move srcu_barrier() fields from srcu_struct to srcu_usage This commit moves the ->srcu_barrier_seq, ->srcu_barrier_mutex, ->srcu_barrier_completion, and ->srcu_barrier_cpu_cnt fields from the srcu_struct structure to the srcu_usage structure to reduce the size of the former in order to improve cache locality. Suggested-by: Christoph Hellwig Tested-by: Sachin Sant Tested-by: "Zhang, Qiang1" Tested-by: Joel Fernandes (Google) Signed-off-by: Paul E. McKenney --- include/linux/srcutree.h | 14 +++++++------- kernel/rcu/srcutree.c | 38 +++++++++++++++++++------------------- 2 files changed, 26 insertions(+), 26 deletions(-) (limited to 'include/linux') diff --git a/include/linux/srcutree.h b/include/linux/srcutree.h index d3534ecb806e..d544ec1c0c8e 100644 --- a/include/linux/srcutree.h +++ b/include/linux/srcutree.h @@ -77,6 +77,13 @@ struct srcu_usage { unsigned long srcu_n_lock_retries; /* Contention events in current interval. */ unsigned long srcu_n_exp_nodelay; /* # expedited no-delays in current GP phase. */ bool sda_is_static; /* May ->sda be passed to free_percpu()? */ + unsigned long srcu_barrier_seq; /* srcu_barrier seq #. */ + struct mutex srcu_barrier_mutex; /* Serialize barrier ops. */ + struct completion srcu_barrier_completion; + /* Awaken barrier rq at end. */ + atomic_t srcu_barrier_cpu_cnt; /* # CPUs not yet posting a */ + /* callback for the barrier */ + /* operation. */ }; /* @@ -85,13 +92,6 @@ struct srcu_usage { struct srcu_struct { unsigned int srcu_idx; /* Current rdr array element. */ struct srcu_data __percpu *sda; /* Per-CPU srcu_data array. */ - unsigned long srcu_barrier_seq; /* srcu_barrier seq #. */ - struct mutex srcu_barrier_mutex; /* Serialize barrier ops. */ - struct completion srcu_barrier_completion; - /* Awaken barrier rq at end. */ - atomic_t srcu_barrier_cpu_cnt; /* # CPUs not yet posting a */ - /* callback for the barrier */ - /* operation. */ unsigned long reschedule_jiffies; unsigned long reschedule_count; struct delayed_work work; diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c index 20f2373f7e25..97d1fe9a160c 100644 --- a/kernel/rcu/srcutree.c +++ b/kernel/rcu/srcutree.c @@ -248,9 +248,9 @@ static int init_srcu_struct_fields(struct srcu_struct *ssp, bool is_static) mutex_init(&ssp->srcu_sup->srcu_gp_mutex); ssp->srcu_idx = 0; ssp->srcu_sup->srcu_gp_seq = 0; - ssp->srcu_barrier_seq = 0; - mutex_init(&ssp->srcu_barrier_mutex); - atomic_set(&ssp->srcu_barrier_cpu_cnt, 0); + ssp->srcu_sup->srcu_barrier_seq = 0; + mutex_init(&ssp->srcu_sup->srcu_barrier_mutex); + atomic_set(&ssp->srcu_sup->srcu_barrier_cpu_cnt, 0); INIT_DELAYED_WORK(&ssp->work, process_srcu); ssp->srcu_sup->sda_is_static = is_static; if (!is_static) @@ -1496,8 +1496,8 @@ static void srcu_barrier_cb(struct rcu_head *rhp) sdp = container_of(rhp, struct srcu_data, srcu_barrier_head); ssp = sdp->ssp; - if (atomic_dec_and_test(&ssp->srcu_barrier_cpu_cnt)) - complete(&ssp->srcu_barrier_completion); + if (atomic_dec_and_test(&ssp->srcu_sup->srcu_barrier_cpu_cnt)) + complete(&ssp->srcu_sup->srcu_barrier_completion); } /* @@ -1511,13 +1511,13 @@ static void srcu_barrier_cb(struct rcu_head *rhp) static void srcu_barrier_one_cpu(struct srcu_struct *ssp, struct srcu_data *sdp) { spin_lock_irq_rcu_node(sdp); - atomic_inc(&ssp->srcu_barrier_cpu_cnt); + atomic_inc(&ssp->srcu_sup->srcu_barrier_cpu_cnt); sdp->srcu_barrier_head.func = srcu_barrier_cb; debug_rcu_head_queue(&sdp->srcu_barrier_head); if (!rcu_segcblist_entrain(&sdp->srcu_cblist, &sdp->srcu_barrier_head)) { debug_rcu_head_unqueue(&sdp->srcu_barrier_head); - atomic_dec(&ssp->srcu_barrier_cpu_cnt); + atomic_dec(&ssp->srcu_sup->srcu_barrier_cpu_cnt); } spin_unlock_irq_rcu_node(sdp); } @@ -1530,20 +1530,20 @@ void srcu_barrier(struct srcu_struct *ssp) { int cpu; int idx; - unsigned long s = rcu_seq_snap(&ssp->srcu_barrier_seq); + unsigned long s = rcu_seq_snap(&ssp->srcu_sup->srcu_barrier_seq); check_init_srcu_struct(ssp); - mutex_lock(&ssp->srcu_barrier_mutex); - if (rcu_seq_done(&ssp->srcu_barrier_seq, s)) { + mutex_lock(&ssp->srcu_sup->srcu_barrier_mutex); + if (rcu_seq_done(&ssp->srcu_sup->srcu_barrier_seq, s)) { smp_mb(); /* Force ordering following return. */ - mutex_unlock(&ssp->srcu_barrier_mutex); + mutex_unlock(&ssp->srcu_sup->srcu_barrier_mutex); return; /* Someone else did our work for us. */ } - rcu_seq_start(&ssp->srcu_barrier_seq); - init_completion(&ssp->srcu_barrier_completion); + rcu_seq_start(&ssp->srcu_sup->srcu_barrier_seq); + init_completion(&ssp->srcu_sup->srcu_barrier_completion); /* Initial count prevents reaching zero until all CBs are posted. */ - atomic_set(&ssp->srcu_barrier_cpu_cnt, 1); + atomic_set(&ssp->srcu_sup->srcu_barrier_cpu_cnt, 1); idx = __srcu_read_lock_nmisafe(ssp); if (smp_load_acquire(&ssp->srcu_sup->srcu_size_state) < SRCU_SIZE_WAIT_BARRIER) @@ -1554,12 +1554,12 @@ void srcu_barrier(struct srcu_struct *ssp) __srcu_read_unlock_nmisafe(ssp, idx); /* Remove the initial count, at which point reaching zero can happen. */ - if (atomic_dec_and_test(&ssp->srcu_barrier_cpu_cnt)) - complete(&ssp->srcu_barrier_completion); - wait_for_completion(&ssp->srcu_barrier_completion); + if (atomic_dec_and_test(&ssp->srcu_sup->srcu_barrier_cpu_cnt)) + complete(&ssp->srcu_sup->srcu_barrier_completion); + wait_for_completion(&ssp->srcu_sup->srcu_barrier_completion); - rcu_seq_end(&ssp->srcu_barrier_seq); - mutex_unlock(&ssp->srcu_barrier_mutex); + rcu_seq_end(&ssp->srcu_sup->srcu_barrier_seq); + mutex_unlock(&ssp->srcu_sup->srcu_barrier_mutex); } EXPORT_SYMBOL_GPL(srcu_barrier); -- cgit v1.2.3 From fd1b3f8e097b7fbbab8ac4a802b24fc23c703dcf Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 17 Mar 2023 21:30:32 -0700 Subject: srcu: Move work-scheduling fields from srcu_struct to srcu_usage This commit moves the ->reschedule_jiffies, ->reschedule_count, and ->work fields from the srcu_struct structure to the srcu_usage structure to reduce the size of the former in order to improve cache locality. However, this means that the container_of() calls cannot get a pointer to the srcu_struct because they are no longer in the srcu_struct. This issue is addressed by adding a ->srcu_ssp field in the srcu_usage structure that references the corresponding srcu_struct structure. And given the presence of the sup pointer to the srcu_usage structure, replace some ssp->srcu_usage-> instances with sup->. [ paulmck Apply feedback from kernel test robot. ] Link: https://lore.kernel.org/oe-kbuild-all/202303191400.iO5BOqka-lkp@intel.com/ Suggested-by: Christoph Hellwig Tested-by: Sachin Sant Tested-by: "Zhang, Qiang1" Tested-by: Joel Fernandes (Google) Signed-off-by: Paul E. McKenney --- include/linux/srcutree.h | 9 +++++---- kernel/rcu/srcutree.c | 41 ++++++++++++++++++++++------------------- 2 files changed, 27 insertions(+), 23 deletions(-) (limited to 'include/linux') diff --git a/include/linux/srcutree.h b/include/linux/srcutree.h index d544ec1c0c8e..cd0cdd8142c5 100644 --- a/include/linux/srcutree.h +++ b/include/linux/srcutree.h @@ -84,6 +84,10 @@ struct srcu_usage { atomic_t srcu_barrier_cpu_cnt; /* # CPUs not yet posting a */ /* callback for the barrier */ /* operation. */ + unsigned long reschedule_jiffies; + unsigned long reschedule_count; + struct delayed_work work; + struct srcu_struct *srcu_ssp; }; /* @@ -92,9 +96,6 @@ struct srcu_usage { struct srcu_struct { unsigned int srcu_idx; /* Current rdr array element. */ struct srcu_data __percpu *sda; /* Per-CPU srcu_data array. */ - unsigned long reschedule_jiffies; - unsigned long reschedule_count; - struct delayed_work work; struct lockdep_map dep_map; struct srcu_usage *srcu_sup; /* Update-side data. */ }; @@ -119,10 +120,10 @@ struct srcu_struct { { \ .lock = __SPIN_LOCK_UNLOCKED(name.lock), \ .srcu_gp_seq_needed = -1UL, \ + .work = __DELAYED_WORK_INITIALIZER(name.work, NULL, 0), \ } #define __SRCU_STRUCT_INIT_COMMON(name, usage_name) \ - .work = __DELAYED_WORK_INITIALIZER(name.work, NULL, 0), \ .srcu_sup = &usage_name, \ __SRCU_DEP_MAP_INIT(name) diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c index 97d1fe9a160c..169a6513b739 100644 --- a/kernel/rcu/srcutree.c +++ b/kernel/rcu/srcutree.c @@ -251,7 +251,7 @@ static int init_srcu_struct_fields(struct srcu_struct *ssp, bool is_static) ssp->srcu_sup->srcu_barrier_seq = 0; mutex_init(&ssp->srcu_sup->srcu_barrier_mutex); atomic_set(&ssp->srcu_sup->srcu_barrier_cpu_cnt, 0); - INIT_DELAYED_WORK(&ssp->work, process_srcu); + INIT_DELAYED_WORK(&ssp->srcu_sup->work, process_srcu); ssp->srcu_sup->sda_is_static = is_static; if (!is_static) ssp->sda = alloc_percpu(struct srcu_data); @@ -275,6 +275,7 @@ static int init_srcu_struct_fields(struct srcu_struct *ssp, bool is_static) WRITE_ONCE(ssp->srcu_sup->srcu_size_state, SRCU_SIZE_BIG); } } + ssp->srcu_sup->srcu_ssp = ssp; smp_store_release(&ssp->srcu_sup->srcu_gp_seq_needed, 0); /* Init done. */ return 0; } @@ -647,7 +648,7 @@ void cleanup_srcu_struct(struct srcu_struct *ssp) return; /* Just leak it! */ if (WARN_ON(srcu_readers_active(ssp))) return; /* Just leak it! */ - flush_delayed_work(&ssp->work); + flush_delayed_work(&ssp->srcu_sup->work); for_each_possible_cpu(cpu) { struct srcu_data *sdp = per_cpu_ptr(ssp->sda, cpu); @@ -1059,10 +1060,10 @@ static void srcu_funnel_gp_start(struct srcu_struct *ssp, struct srcu_data *sdp, // can only be executed during early boot when there is only // the one boot CPU running with interrupts still disabled. if (likely(srcu_init_done)) - queue_delayed_work(rcu_gp_wq, &ssp->work, + queue_delayed_work(rcu_gp_wq, &ssp->srcu_sup->work, !!srcu_get_delay(ssp)); - else if (list_empty(&ssp->work.work.entry)) - list_add(&ssp->work.work.entry, &srcu_boot_list); + else if (list_empty(&ssp->srcu_sup->work.work.entry)) + list_add(&ssp->srcu_sup->work.work.entry, &srcu_boot_list); } spin_unlock_irqrestore_rcu_node(ssp->srcu_sup, flags); } @@ -1723,7 +1724,7 @@ static void srcu_reschedule(struct srcu_struct *ssp, unsigned long delay) spin_unlock_irq_rcu_node(ssp->srcu_sup); if (pushgp) - queue_delayed_work(rcu_gp_wq, &ssp->work, delay); + queue_delayed_work(rcu_gp_wq, &ssp->srcu_sup->work, delay); } /* @@ -1734,22 +1735,24 @@ static void process_srcu(struct work_struct *work) unsigned long curdelay; unsigned long j; struct srcu_struct *ssp; + struct srcu_usage *sup; - ssp = container_of(work, struct srcu_struct, work.work); + sup = container_of(work, struct srcu_usage, work.work); + ssp = sup->srcu_ssp; srcu_advance_state(ssp); curdelay = srcu_get_delay(ssp); if (curdelay) { - WRITE_ONCE(ssp->reschedule_count, 0); + WRITE_ONCE(sup->reschedule_count, 0); } else { j = jiffies; - if (READ_ONCE(ssp->reschedule_jiffies) == j) { - WRITE_ONCE(ssp->reschedule_count, READ_ONCE(ssp->reschedule_count) + 1); - if (READ_ONCE(ssp->reschedule_count) > srcu_max_nodelay) + if (READ_ONCE(sup->reschedule_jiffies) == j) { + WRITE_ONCE(sup->reschedule_count, READ_ONCE(sup->reschedule_count) + 1); + if (READ_ONCE(sup->reschedule_count) > srcu_max_nodelay) curdelay = 1; } else { - WRITE_ONCE(ssp->reschedule_count, 1); - WRITE_ONCE(ssp->reschedule_jiffies, j); + WRITE_ONCE(sup->reschedule_count, 1); + WRITE_ONCE(sup->reschedule_jiffies, j); } } srcu_reschedule(ssp, curdelay); @@ -1848,7 +1851,7 @@ early_initcall(srcu_bootup_announce); void __init srcu_init(void) { - struct srcu_struct *ssp; + struct srcu_usage *sup; /* Decide on srcu_struct-size strategy. */ if (SRCU_SIZING_IS(SRCU_SIZING_AUTO)) { @@ -1868,13 +1871,13 @@ void __init srcu_init(void) */ srcu_init_done = true; while (!list_empty(&srcu_boot_list)) { - ssp = list_first_entry(&srcu_boot_list, struct srcu_struct, + sup = list_first_entry(&srcu_boot_list, struct srcu_usage, work.work.entry); - list_del_init(&ssp->work.work.entry); + list_del_init(&sup->work.work.entry); if (SRCU_SIZING_IS(SRCU_SIZING_INIT) && - ssp->srcu_sup->srcu_size_state == SRCU_SIZE_SMALL) - ssp->srcu_sup->srcu_size_state = SRCU_SIZE_ALLOC; - queue_work(rcu_gp_wq, &ssp->work.work); + sup->srcu_size_state == SRCU_SIZE_SMALL) + sup->srcu_size_state = SRCU_SIZE_ALLOC; + queue_work(rcu_gp_wq, &sup->work.work); } } -- cgit v1.2.3 From e15a19306004b3d7b6a5fe269e4e7cb7934aa3fe Mon Sep 17 00:00:00 2001 From: Pingfan Liu Date: Wed, 4 Jan 2023 12:29:01 -0800 Subject: srcu: Add comments for srcu_size_state The SRCU_SIZE_* names are not self-explanatory, so this commit therefore adds comments to the definitions. Signed-off-by: Pingfan Liu Cc: Lai Jiangshan Cc: "Paul E. McKenney" Cc: Frederic Weisbecker Cc: Josh Triplett Cc: Steven Rostedt Cc: Mathieu Desnoyers Cc: "Zhang, Qiang1" To: rcu@vger.kernel.org Reviewed-by: Paul E. McKenney Reviewed-by: Joel Fernandes (Google) Signed-off-by: Paul E. McKenney Signed-off-by: Joel Fernandes (Google) --- include/linux/srcutree.h | 33 +++++++++++++++++++++++---------- 1 file changed, 23 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/srcutree.h b/include/linux/srcutree.h index 558057b517b7..a6910805f9c5 100644 --- a/include/linux/srcutree.h +++ b/include/linux/srcutree.h @@ -92,16 +92,29 @@ struct srcu_struct { struct lockdep_map dep_map; }; -/* Values for size state variable (->srcu_size_state). */ -#define SRCU_SIZE_SMALL 0 -#define SRCU_SIZE_ALLOC 1 -#define SRCU_SIZE_WAIT_BARRIER 2 -#define SRCU_SIZE_WAIT_CALL 3 -#define SRCU_SIZE_WAIT_CBS1 4 -#define SRCU_SIZE_WAIT_CBS2 5 -#define SRCU_SIZE_WAIT_CBS3 6 -#define SRCU_SIZE_WAIT_CBS4 7 -#define SRCU_SIZE_BIG 8 +// Values for size state variable (->srcu_size_state). Once the state +// has been set to SRCU_SIZE_ALLOC, the grace-period code advances through +// this state machine one step per grace period until the SRCU_SIZE_BIG state +// is reached. Otherwise, the state machine remains in the SRCU_SIZE_SMALL +// state indefinitely. +#define SRCU_SIZE_SMALL 0 // No srcu_node combining tree, ->node == NULL +#define SRCU_SIZE_ALLOC 1 // An srcu_node tree is being allocated, initialized, + // and then referenced by ->node. It will not be used. +#define SRCU_SIZE_WAIT_BARRIER 2 // The srcu_node tree starts being used by everything + // except call_srcu(), especially by srcu_barrier(). + // By the end of this state, all CPUs and threads + // are aware of this tree's existence. +#define SRCU_SIZE_WAIT_CALL 3 // The srcu_node tree starts being used by call_srcu(). + // By the end of this state, all of the call_srcu() + // invocations that were running on a non-boot CPU + // and using the boot CPU's callback queue will have + // completed. +#define SRCU_SIZE_WAIT_CBS1 4 // Don't trust the ->srcu_have_cbs[] grace-period +#define SRCU_SIZE_WAIT_CBS2 5 // sequence elements or the ->srcu_data_have_cbs[] +#define SRCU_SIZE_WAIT_CBS3 6 // CPU-bitmask elements until all four elements of +#define SRCU_SIZE_WAIT_CBS4 7 // each array have been initialized. +#define SRCU_SIZE_BIG 8 // The srcu_node combining tree is fully initialized + // and all aspects of it are being put to use. /* Values for state variable (bottom bits of ->srcu_gp_seq). */ #define SRCU_STATE_IDLE 0 -- cgit v1.2.3 From 58d7668242647e661a20efe065519abd6454287e Mon Sep 17 00:00:00 2001 From: "Joel Fernandes (Google)" Date: Tue, 24 Jan 2023 17:31:26 +0000 Subject: tick/nohz: Fix cpu_is_hotpluggable() by checking with nohz subsystem For CONFIG_NO_HZ_FULL systems, the tick_do_timer_cpu cannot be offlined. However, cpu_is_hotpluggable() still returns true for those CPUs. This causes torture tests that do offlining to end up trying to offline this CPU causing test failures. Such failure happens on all architectures. Fix the repeated error messages thrown by this (even if the hotplug errors are harmless) by asking the opinion of the nohz subsystem on whether the CPU can be hotplugged. [ Apply Frederic Weisbecker feedback on refactoring tick_nohz_cpu_down(). ] For drivers/base/ portion: Acked-by: Greg Kroah-Hartman Acked-by: Frederic Weisbecker Cc: Frederic Weisbecker Cc: "Paul E. McKenney" Cc: Zhouyi Zhou Cc: Will Deacon Cc: Marc Zyngier Cc: rcu Cc: stable@vger.kernel.org Fixes: 2987557f52b9 ("driver-core/cpu: Expose hotpluggability to the rest of the kernel") Signed-off-by: Paul E. McKenney Signed-off-by: Joel Fernandes (Google) --- drivers/base/cpu.c | 3 ++- include/linux/tick.h | 2 ++ kernel/time/tick-sched.c | 11 ++++++++--- 3 files changed, 12 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/drivers/base/cpu.c b/drivers/base/cpu.c index 182c6122f815..c1815b9dae68 100644 --- a/drivers/base/cpu.c +++ b/drivers/base/cpu.c @@ -487,7 +487,8 @@ static const struct attribute_group *cpu_root_attr_groups[] = { bool cpu_is_hotpluggable(unsigned int cpu) { struct device *dev = get_cpu_device(cpu); - return dev && container_of(dev, struct cpu, dev)->hotpluggable; + return dev && container_of(dev, struct cpu, dev)->hotpluggable + && tick_nohz_cpu_hotpluggable(cpu); } EXPORT_SYMBOL_GPL(cpu_is_hotpluggable); diff --git a/include/linux/tick.h b/include/linux/tick.h index bfd571f18cfd..9459fef5b857 100644 --- a/include/linux/tick.h +++ b/include/linux/tick.h @@ -216,6 +216,7 @@ extern void tick_nohz_dep_set_signal(struct task_struct *tsk, enum tick_dep_bits bit); extern void tick_nohz_dep_clear_signal(struct signal_struct *signal, enum tick_dep_bits bit); +extern bool tick_nohz_cpu_hotpluggable(unsigned int cpu); /* * The below are tick_nohz_[set,clear]_dep() wrappers that optimize off-cases @@ -280,6 +281,7 @@ static inline void tick_nohz_full_add_cpus_to(struct cpumask *mask) { } static inline void tick_nohz_dep_set_cpu(int cpu, enum tick_dep_bits bit) { } static inline void tick_nohz_dep_clear_cpu(int cpu, enum tick_dep_bits bit) { } +static inline bool tick_nohz_cpu_hotpluggable(unsigned int cpu) { return true; } static inline void tick_dep_set(enum tick_dep_bits bit) { } static inline void tick_dep_clear(enum tick_dep_bits bit) { } diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index b0e3c9205946..68d81a4283c8 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -527,7 +527,7 @@ void __init tick_nohz_full_setup(cpumask_var_t cpumask) tick_nohz_full_running = true; } -static int tick_nohz_cpu_down(unsigned int cpu) +bool tick_nohz_cpu_hotpluggable(unsigned int cpu) { /* * The tick_do_timer_cpu CPU handles housekeeping duty (unbound @@ -535,8 +535,13 @@ static int tick_nohz_cpu_down(unsigned int cpu) * CPUs. It must remain online when nohz full is enabled. */ if (tick_nohz_full_running && tick_do_timer_cpu == cpu) - return -EBUSY; - return 0; + return false; + return true; +} + +static int tick_nohz_cpu_down(unsigned int cpu) +{ + return tick_nohz_cpu_hotpluggable(cpu) ? 0 : -EBUSY; } void __init tick_nohz_init(void) -- cgit v1.2.3