From 8203d6d0ee784cfb2ebf89053f7fe399abc867d7 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Sun, 2 Aug 2015 13:53:17 -0700 Subject: rcu: Use single-stage IPI algorithm for RCU expedited grace period The current preemptible-RCU expedited grace-period algorithm invokes synchronize_sched_expedited() to enqueue all tasks currently running in a preemptible-RCU read-side critical section, then waits for all the ->blkd_tasks lists to drain. This works, but results in both an IPI and a double context switch even on CPUs that do not happen to be running in a preemptible RCU read-side critical section. This commit implements a new algorithm that causes less OS jitter. This new algorithm IPIs all online CPUs that are not idle (from an RCU perspective), but refrains from self-IPIs. If a CPU receiving this IPI is not in a preemptible RCU read-side critical section (or is just now exiting one), it pushes quiescence up the rcu_node tree, otherwise, it sets a flag that will be handled by the upcoming outermost rcu_read_unlock(), which will then push quiescence up the tree. The expedited grace period must of course wait on any pre-existing blocked readers, and newly blocked readers must be queued carefully based on the state of both the normal and the expedited grace periods. This new queueing approach also avoids the need to update boost state, courtesy of the fact that blocked tasks are no longer ever migrated to the root rcu_node structure. Signed-off-by: Paul E. McKenney --- include/linux/sched.h | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/sched.h b/include/linux/sched.h index a4ab9daa387c..7fa8c4d372e7 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1330,10 +1330,12 @@ struct sched_dl_entity { union rcu_special { struct { - bool blocked; - bool need_qs; - } b; - short s; + u8 blocked; + u8 need_qs; + u8 exp_need_qs; + u8 pad; /* Otherwise the compiler can store garbage here. */ + } b; /* Bits. */ + u32 s; /* Set of bits. */ }; struct rcu_node; -- cgit v1.2.3 From b6a4ae766e3133a4db73fabc81e522d1601cb623 Mon Sep 17 00:00:00 2001 From: Boqun Feng Date: Wed, 29 Jul 2015 13:29:38 +0800 Subject: rcu: Use rcu_callback_t in call_rcu*() and friends As we now have rcu_callback_t typedefs as the type of rcu callbacks, we should use it in call_rcu*() and friends as the type of parameters. This could save us a few lines of code and make it clear which function requires an rcu callbacks rather than other callbacks as its argument. Besides, this can also help cscope to generate a better database for code reading. Signed-off-by: Boqun Feng Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett --- include/linux/rcupdate.h | 10 +++++----- include/linux/rcutiny.h | 2 +- include/linux/rcutree.h | 2 +- kernel/rcu/rcutorture.c | 4 ++-- kernel/rcu/srcu.c | 2 +- kernel/rcu/tiny.c | 8 ++++---- kernel/rcu/tree.c | 8 ++++---- kernel/rcu/tree.h | 2 +- kernel/rcu/tree_plugin.h | 2 +- kernel/rcu/update.c | 2 +- 10 files changed, 21 insertions(+), 21 deletions(-) (limited to 'include') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 581abf848566..d63bb77dab35 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -160,7 +160,7 @@ void do_trace_rcu_torture_read(const char *rcutorturename, * more than one CPU). */ void call_rcu(struct rcu_head *head, - void (*func)(struct rcu_head *head)); + rcu_callback_t func); #else /* #ifdef CONFIG_PREEMPT_RCU */ @@ -191,7 +191,7 @@ void call_rcu(struct rcu_head *head, * memory ordering guarantees. */ void call_rcu_bh(struct rcu_head *head, - void (*func)(struct rcu_head *head)); + rcu_callback_t func); /** * call_rcu_sched() - Queue an RCU for invocation after sched grace period. @@ -213,7 +213,7 @@ void call_rcu_bh(struct rcu_head *head, * memory ordering guarantees. */ void call_rcu_sched(struct rcu_head *head, - void (*func)(struct rcu_head *rcu)); + rcu_callback_t func); void synchronize_sched(void); @@ -274,7 +274,7 @@ do { \ * See the description of call_rcu() for more detailed information on * memory ordering guarantees. */ -void call_rcu_tasks(struct rcu_head *head, void (*func)(struct rcu_head *head)); +void call_rcu_tasks(struct rcu_head *head, rcu_callback_t func); void synchronize_rcu_tasks(void); void rcu_barrier_tasks(void); @@ -1065,7 +1065,7 @@ static inline notrace void rcu_read_unlock_sched_notrace(void) #define __kfree_rcu(head, offset) \ do { \ BUILD_BUG_ON(!__is_kfree_rcu_offset(offset)); \ - kfree_call_rcu(head, (void (*)(struct rcu_head *))(unsigned long)(offset)); \ + kfree_call_rcu(head, (rcu_callback_t)(unsigned long)(offset)); \ } while (0) /** diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h index ff968b7af3a4..c8a0722f77ea 100644 --- a/include/linux/rcutiny.h +++ b/include/linux/rcutiny.h @@ -83,7 +83,7 @@ static inline void synchronize_sched_expedited(void) } static inline void kfree_call_rcu(struct rcu_head *head, - void (*func)(struct rcu_head *rcu)) + rcu_callback_t func) { call_rcu(head, func); } diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h index 5abec82f325e..60d15a080d7c 100644 --- a/include/linux/rcutree.h +++ b/include/linux/rcutree.h @@ -48,7 +48,7 @@ void synchronize_rcu_bh(void); void synchronize_sched_expedited(void); void synchronize_rcu_expedited(void); -void kfree_call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu)); +void kfree_call_rcu(struct rcu_head *head, rcu_callback_t func); /** * synchronize_rcu_bh_expedited - Brute-force RCU-bh grace period diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c index 77192953dee5..51c8e7f02f48 100644 --- a/kernel/rcu/rcutorture.c +++ b/kernel/rcu/rcutorture.c @@ -448,7 +448,7 @@ static void synchronize_rcu_busted(void) } static void -call_rcu_busted(struct rcu_head *head, void (*func)(struct rcu_head *rcu)) +call_rcu_busted(struct rcu_head *head, rcu_callback_t func) { /* This is a deliberate bug for testing purposes only! */ func(head); @@ -523,7 +523,7 @@ static void srcu_torture_synchronize(void) } static void srcu_torture_call(struct rcu_head *head, - void (*func)(struct rcu_head *head)) + rcu_callback_t func) { call_srcu(srcu_ctlp, head, func); } diff --git a/kernel/rcu/srcu.c b/kernel/rcu/srcu.c index d3fcb2ec8536..9e6122540d28 100644 --- a/kernel/rcu/srcu.c +++ b/kernel/rcu/srcu.c @@ -387,7 +387,7 @@ static void srcu_flip(struct srcu_struct *sp) * srcu_struct structure. */ void call_srcu(struct srcu_struct *sp, struct rcu_head *head, - void (*func)(struct rcu_head *head)) + rcu_callback_t func) { unsigned long flags; diff --git a/kernel/rcu/tiny.c b/kernel/rcu/tiny.c index d0471056d0af..944b1b491ed8 100644 --- a/kernel/rcu/tiny.c +++ b/kernel/rcu/tiny.c @@ -44,7 +44,7 @@ struct rcu_ctrlblk; static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp); static void rcu_process_callbacks(struct softirq_action *unused); static void __call_rcu(struct rcu_head *head, - void (*func)(struct rcu_head *rcu), + rcu_callback_t func, struct rcu_ctrlblk *rcp); #include "tiny_plugin.h" @@ -203,7 +203,7 @@ EXPORT_SYMBOL_GPL(synchronize_sched); * Helper function for call_rcu() and call_rcu_bh(). */ static void __call_rcu(struct rcu_head *head, - void (*func)(struct rcu_head *rcu), + rcu_callback_t func, struct rcu_ctrlblk *rcp) { unsigned long flags; @@ -229,7 +229,7 @@ static void __call_rcu(struct rcu_head *head, * period. But since we have but one CPU, that would be after any * quiescent state. */ -void call_rcu_sched(struct rcu_head *head, void (*func)(struct rcu_head *rcu)) +void call_rcu_sched(struct rcu_head *head, rcu_callback_t func) { __call_rcu(head, func, &rcu_sched_ctrlblk); } @@ -239,7 +239,7 @@ EXPORT_SYMBOL_GPL(call_rcu_sched); * Post an RCU bottom-half callback to be invoked after any subsequent * quiescent state. */ -void call_rcu_bh(struct rcu_head *head, void (*func)(struct rcu_head *rcu)) +void call_rcu_bh(struct rcu_head *head, rcu_callback_t func) { __call_rcu(head, func, &rcu_bh_ctrlblk); } diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 775d36cc0050..b9d9e0249e2f 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -3017,7 +3017,7 @@ static void rcu_leak_callback(struct rcu_head *rhp) * is expected to specify a CPU. */ static void -__call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu), +__call_rcu(struct rcu_head *head, rcu_callback_t func, struct rcu_state *rsp, int cpu, bool lazy) { unsigned long flags; @@ -3088,7 +3088,7 @@ __call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu), /* * Queue an RCU-sched callback for invocation after a grace period. */ -void call_rcu_sched(struct rcu_head *head, void (*func)(struct rcu_head *rcu)) +void call_rcu_sched(struct rcu_head *head, rcu_callback_t func) { __call_rcu(head, func, &rcu_sched_state, -1, 0); } @@ -3097,7 +3097,7 @@ EXPORT_SYMBOL_GPL(call_rcu_sched); /* * Queue an RCU callback for invocation after a quicker grace period. */ -void call_rcu_bh(struct rcu_head *head, void (*func)(struct rcu_head *rcu)) +void call_rcu_bh(struct rcu_head *head, rcu_callback_t func) { __call_rcu(head, func, &rcu_bh_state, -1, 0); } @@ -3111,7 +3111,7 @@ EXPORT_SYMBOL_GPL(call_rcu_bh); * function may only be called from __kfree_rcu(). */ void kfree_call_rcu(struct rcu_head *head, - void (*func)(struct rcu_head *rcu)) + rcu_callback_t func) { __call_rcu(head, func, rcu_state_p, -1, 1); } diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h index 2e991f8361e4..ad11529375cc 100644 --- a/kernel/rcu/tree.h +++ b/kernel/rcu/tree.h @@ -584,7 +584,7 @@ static void rcu_print_detail_task_stall(struct rcu_state *rsp); static int rcu_print_task_stall(struct rcu_node *rnp); static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp); static void rcu_preempt_check_callbacks(void); -void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu)); +void call_rcu(struct rcu_head *head, rcu_callback_t func); static void __init __rcu_init_preempt(void); static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags); static void rcu_preempt_boost_start_gp(struct rcu_node *rnp); diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index b2bf3963a0ae..06116ae6dfd7 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -500,7 +500,7 @@ static void rcu_preempt_do_callbacks(void) /* * Queue a preemptible-RCU callback for invocation after a grace period. */ -void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu)) +void call_rcu(struct rcu_head *head, rcu_callback_t func) { __call_rcu(head, func, rcu_state_p, -1, 0); } diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c index 7a0b3bc7c5ed..5f748c5a40f0 100644 --- a/kernel/rcu/update.c +++ b/kernel/rcu/update.c @@ -534,7 +534,7 @@ static void rcu_spawn_tasks_kthread(void); * Post an RCU-tasks callback. First call must be from process context * after the scheduler if fully operational. */ -void call_rcu_tasks(struct rcu_head *rhp, void (*func)(struct rcu_head *rhp)) +void call_rcu_tasks(struct rcu_head *rhp, rcu_callback_t func) { unsigned long flags; bool needwake; -- cgit v1.2.3 From bb73c52bad3666997ed2ec83c0c80c3f8ef55008 Mon Sep 17 00:00:00 2001 From: Boqun Feng Date: Thu, 30 Jul 2015 16:55:38 -0700 Subject: rcu: Don't disable preemption for Tiny and Tree RCU readers Because preempt_disable() maps to barrier() for non-debug builds, it forces the compiler to spill and reload registers. Because Tree RCU and Tiny RCU now only appear in CONFIG_PREEMPT=n builds, these barrier() instances generate needless extra code for each instance of rcu_read_lock() and rcu_read_unlock(). This extra code slows down Tree RCU and bloats Tiny RCU. This commit therefore removes the preempt_disable() and preempt_enable() from the non-preemptible implementations of __rcu_read_lock() and __rcu_read_unlock(), respectively. However, for debug purposes, preempt_disable() and preempt_enable() are still invoked if CONFIG_PREEMPT_COUNT=y, because this allows detection of sleeping inside atomic sections in non-preemptible kernels. However, Tiny and Tree RCU operates by coalescing all RCU read-side critical sections on a given CPU that lie between successive quiescent states. It is therefore necessary to compensate for removing barriers from __rcu_read_lock() and __rcu_read_unlock() by adding them to a couple of the RCU functions invoked during quiescent states, namely to rcu_all_qs() and rcu_note_context_switch(). However, note that the latter is more paranoia than necessity, at least until link-time optimizations become more aggressive. This is based on an earlier patch by Paul E. McKenney, fixing a bug encountered in kernels built with CONFIG_PREEMPT=n and CONFIG_PREEMPT_COUNT=y. Signed-off-by: Boqun Feng Signed-off-by: Paul E. McKenney --- include/linux/rcupdate.h | 6 ++++-- include/linux/rcutiny.h | 1 + kernel/rcu/tree.c | 9 +++++++++ 3 files changed, 14 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index d63bb77dab35..6c3ceceb6148 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -297,12 +297,14 @@ void synchronize_rcu(void); static inline void __rcu_read_lock(void) { - preempt_disable(); + if (IS_ENABLED(CONFIG_PREEMPT_COUNT)) + preempt_disable(); } static inline void __rcu_read_unlock(void) { - preempt_enable(); + if (IS_ENABLED(CONFIG_PREEMPT_COUNT)) + preempt_enable(); } static inline void synchronize_rcu(void) diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h index c8a0722f77ea..4c1aaf9cce7b 100644 --- a/include/linux/rcutiny.h +++ b/include/linux/rcutiny.h @@ -216,6 +216,7 @@ static inline bool rcu_is_watching(void) static inline void rcu_all_qs(void) { + barrier(); /* Avoid RCU read-side critical sections leaking across. */ } #endif /* __LINUX_RCUTINY_H */ diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index b9d9e0249e2f..93c0f23c3e45 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -337,12 +337,14 @@ static void rcu_momentary_dyntick_idle(void) */ void rcu_note_context_switch(void) { + barrier(); /* Avoid RCU read-side critical sections leaking down. */ trace_rcu_utilization(TPS("Start context switch")); rcu_sched_qs(); rcu_preempt_note_context_switch(); if (unlikely(raw_cpu_read(rcu_sched_qs_mask))) rcu_momentary_dyntick_idle(); trace_rcu_utilization(TPS("End context switch")); + barrier(); /* Avoid RCU read-side critical sections leaking up. */ } EXPORT_SYMBOL_GPL(rcu_note_context_switch); @@ -353,12 +355,19 @@ EXPORT_SYMBOL_GPL(rcu_note_context_switch); * RCU flavors in desperate need of a quiescent state, which will normally * be none of them). Either way, do a lightweight quiescent state for * all RCU flavors. + * + * The barrier() calls are redundant in the common case when this is + * called externally, but just in case this is called from within this + * file. + * */ void rcu_all_qs(void) { + barrier(); /* Avoid RCU read-side critical sections leaking down. */ if (unlikely(raw_cpu_read(rcu_sched_qs_mask))) rcu_momentary_dyntick_idle(); this_cpu_inc(rcu_qs_ctr); + barrier(); /* Avoid RCU read-side critical sections leaking up. */ } EXPORT_SYMBOL_GPL(rcu_all_qs); -- cgit v1.2.3 From 49f5903b473c5f63f3b57856d1bd4593db0a2eef Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 1 Sep 2015 00:42:57 -0700 Subject: rcu: Move preemption disabling out of __srcu_read_lock() Currently, __srcu_read_lock() cannot be invoked from restricted environments because it contains calls to preempt_disable() and preempt_enable(), both of which can invoke lockdep, which is a bad idea in some restricted execution modes. This commit therefore moves the preempt_disable() and preempt_enable() from __srcu_read_lock() to srcu_read_lock(). It also inserts the preempt_disable() and preempt_enable() around the call to __srcu_read_lock() in do_exit(). Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett --- include/linux/srcu.h | 5 ++++- kernel/exit.c | 2 ++ kernel/rcu/srcu.c | 2 -- 3 files changed, 6 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/srcu.h b/include/linux/srcu.h index bdeb4567b71e..f5f80c5643ac 100644 --- a/include/linux/srcu.h +++ b/include/linux/srcu.h @@ -215,8 +215,11 @@ static inline int srcu_read_lock_held(struct srcu_struct *sp) */ static inline int srcu_read_lock(struct srcu_struct *sp) __acquires(sp) { - int retval = __srcu_read_lock(sp); + int retval; + preempt_disable(); + retval = __srcu_read_lock(sp); + preempt_enable(); rcu_lock_acquire(&(sp)->dep_map); return retval; } diff --git a/kernel/exit.c b/kernel/exit.c index ea95ee1b5ef7..0e93b63bbc59 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -761,7 +761,9 @@ void do_exit(long code) */ flush_ptrace_hw_breakpoint(tsk); + TASKS_RCU(preempt_disable()); TASKS_RCU(tasks_rcu_i = __srcu_read_lock(&tasks_rcu_exit_srcu)); + TASKS_RCU(preempt_enable()); exit_notify(tsk, group_dead); proc_exit_connector(tsk); #ifdef CONFIG_NUMA diff --git a/kernel/rcu/srcu.c b/kernel/rcu/srcu.c index 9e6122540d28..a63a1ea5a41b 100644 --- a/kernel/rcu/srcu.c +++ b/kernel/rcu/srcu.c @@ -298,11 +298,9 @@ int __srcu_read_lock(struct srcu_struct *sp) int idx; idx = READ_ONCE(sp->completed) & 0x1; - preempt_disable(); __this_cpu_inc(sp->per_cpu_ref->c[idx]); smp_mb(); /* B */ /* Avoid leaking the critical section. */ __this_cpu_inc(sp->per_cpu_ref->seq[idx]); - preempt_enable(); return idx; } EXPORT_SYMBOL_GPL(__srcu_read_lock); -- cgit v1.2.3 From c3ac7cf1847a4e68c909984f60d36adef2088e35 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 10 Sep 2015 16:29:02 -0700 Subject: rcu: Add rcu_pointer_handoff() This commit adds an rcu_pointer_handoff() that is intended to mark situations where a structure's protection transitions from RCU to some other mechanism (locking, reference counting, whatever). These markings should allow external tools to more easily spot bugs involving leaking pointers out of RCU read-side critical sections. Signed-off-by: Paul E. McKenney --- include/linux/rcupdate.h | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) (limited to 'include') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 6c3ceceb6148..587eb057e2fa 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -812,6 +812,28 @@ static inline void rcu_preempt_sleep_check(void) */ #define rcu_dereference_sched(p) rcu_dereference_sched_check(p, 0) +/** + * rcu_pointer_handoff() - Hand off a pointer from RCU to other mechanism + * @p: The pointer to hand off + * + * This is simply an identity function, but it documents where a pointer + * is handed off from RCU to some other synchronization mechanism, for + * example, reference counting or locking. In C11, it would map to + * kill_dependency(). It could be used as follows: + * + * rcu_read_lock(); + * p = rcu_dereference(gp); + * long_lived = is_long_lived(p); + * if (long_lived) { + * if (!atomic_inc_not_zero(p->refcnt)) + * long_lived = false; + * else + * p = rcu_pointer_handoff(p); + * } + * rcu_read_unlock(); + */ +#define rcu_pointer_handoff(p) (p) + /** * rcu_read_lock() - mark the beginning of an RCU read-side critical section * -- cgit v1.2.3 From 8db70b132dd57696cfc7560203a72e90c51bfdda Mon Sep 17 00:00:00 2001 From: Patrick Marlier Date: Fri, 11 Sep 2015 15:50:35 -0700 Subject: rculist: Make list_entry_rcu() use lockless_dereference() The current list_entry_rcu() implementation copies the pointer to a stack variable, then invokes rcu_dereference_raw() on it. This results in an additional store-load pair. Now, most compilers will emit normal store and load instructions, which might seem to be of negligible overhead, but this results in a load-hit-store situation that can cause surprisingly long pipeline stalls, even on modern microprocessors. The problem is that it takes time for the store to get the store buffer updated, which can delay the subsequent load, which immediately follows. This commit therefore switches to the lockless_dereference() primitive, which does not expect the __rcu annotations (that are anyway not present in the list_head structure) and which, like rcu_dereference_raw(), does not check for an enclosing RCU read-side critical section. Most importantly, it does not copy the pointer, thus avoiding the load-hit-store overhead. Signed-off-by: Patrick Marlier [ paulmck: Switched to lockless_dereference() to suppress sparse warnings. ] Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett --- include/linux/rculist.h | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/rculist.h b/include/linux/rculist.h index 17c6b1f84a77..5ed540986019 100644 --- a/include/linux/rculist.h +++ b/include/linux/rculist.h @@ -247,10 +247,7 @@ static inline void list_splice_init_rcu(struct list_head *list, * primitives such as list_add_rcu() as long as it's guarded by rcu_read_lock(). */ #define list_entry_rcu(ptr, type, member) \ -({ \ - typeof(*ptr) __rcu *__ptr = (typeof(*ptr) __rcu __force *)ptr; \ - container_of((typeof(ptr))rcu_dereference_raw(__ptr), type, member); \ -}) + container_of(lockless_dereference(ptr), type, member) /** * Where are list_empty_rcu() and list_first_entry_rcu()? -- cgit v1.2.3 From e62e3f620ba8d437f4998441fc11cf3dc9d466d1 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 14 Sep 2015 12:23:01 -0700 Subject: rcu: Remove deprecated rcu_lockdep_assert() The old rcu_lockdep_assert() was retained to ease handling of incoming patches, but any use will result in deprecated warnings. However, its replacement, RCU_LOCKDEP_WARN(), is now upstream. It is therefore time to remove rcu_lockdep_assert(), which this commit does. Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett --- include/linux/rcupdate.h | 21 --------------------- 1 file changed, 21 deletions(-) (limited to 'include') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 587eb057e2fa..a0189ba67fde 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -537,28 +537,8 @@ static inline int rcu_read_lock_sched_held(void) #endif /* #else #ifdef CONFIG_DEBUG_LOCK_ALLOC */ -/* Deprecate rcu_lockdep_assert(): Use RCU_LOCKDEP_WARN() instead. */ -static inline void __attribute((deprecated)) deprecate_rcu_lockdep_assert(void) -{ -} - #ifdef CONFIG_PROVE_RCU -/** - * rcu_lockdep_assert - emit lockdep splat if specified condition not met - * @c: condition to check - * @s: informative message - */ -#define rcu_lockdep_assert(c, s) \ - do { \ - static bool __section(.data.unlikely) __warned; \ - deprecate_rcu_lockdep_assert(); \ - if (debug_lockdep_rcu_enabled() && !__warned && !(c)) { \ - __warned = true; \ - lockdep_rcu_suspicious(__FILE__, __LINE__, s); \ - } \ - } while (0) - /** * RCU_LOCKDEP_WARN - emit lockdep splat if specified condition is met * @c: condition to check @@ -596,7 +576,6 @@ static inline void rcu_preempt_sleep_check(void) #else /* #ifdef CONFIG_PROVE_RCU */ -#define rcu_lockdep_assert(c, s) deprecate_rcu_lockdep_assert() #define RCU_LOCKDEP_WARN(c, s) do { } while (0) #define rcu_sleep_check() do { } while (0) -- cgit v1.2.3 From 7f5f873c6a0772970d5fee1f364231207051ecd8 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 18 Sep 2015 08:45:22 -0700 Subject: rculist: Use WRITE_ONCE() when deleting from reader-visible list The various RCU list-deletion macros (list_del_rcu(), hlist_del_init_rcu(), hlist_del_rcu(), hlist_bl_del_init_rcu(), hlist_bl_del_rcu(), hlist_nulls_del_init_rcu(), and hlist_nulls_del_rcu()) do plain stores into the ->next pointer of the preceding list elemment. Unfortunately, the compiler is within its rights to (for example) use byte-at-a-time writes to update the pointer, which would fatally confuse concurrent readers. This patch therefore adds the needed WRITE_ONCE() macros. KernelThreadSanitizer (KTSAN) reported the __hlist_del() issue, which is a problem when __hlist_del() is invoked by hlist_del_rcu(). Reported-by: Dmitry Vyukov Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett --- include/linux/list.h | 5 +++-- include/linux/list_bl.h | 5 +++-- include/linux/list_nulls.h | 3 ++- 3 files changed, 8 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/list.h b/include/linux/list.h index 3e3e64a61002..993395a2e55c 100644 --- a/include/linux/list.h +++ b/include/linux/list.h @@ -87,7 +87,7 @@ static inline void list_add_tail(struct list_head *new, struct list_head *head) static inline void __list_del(struct list_head * prev, struct list_head * next) { next->prev = prev; - prev->next = next; + WRITE_ONCE(prev->next, next); } /** @@ -615,7 +615,8 @@ static inline void __hlist_del(struct hlist_node *n) { struct hlist_node *next = n->next; struct hlist_node **pprev = n->pprev; - *pprev = next; + + WRITE_ONCE(*pprev, next); if (next) next->pprev = pprev; } diff --git a/include/linux/list_bl.h b/include/linux/list_bl.h index 2eb88556c5c5..8132214e8efd 100644 --- a/include/linux/list_bl.h +++ b/include/linux/list_bl.h @@ -93,9 +93,10 @@ static inline void __hlist_bl_del(struct hlist_bl_node *n) LIST_BL_BUG_ON((unsigned long)n & LIST_BL_LOCKMASK); /* pprev may be `first`, so be careful not to lose the lock bit */ - *pprev = (struct hlist_bl_node *) + WRITE_ONCE(*pprev, + (struct hlist_bl_node *) ((unsigned long)next | - ((unsigned long)*pprev & LIST_BL_LOCKMASK)); + ((unsigned long)*pprev & LIST_BL_LOCKMASK))); if (next) next->pprev = pprev; } diff --git a/include/linux/list_nulls.h b/include/linux/list_nulls.h index f266661d2666..444d2b1313bd 100644 --- a/include/linux/list_nulls.h +++ b/include/linux/list_nulls.h @@ -76,7 +76,8 @@ static inline void __hlist_nulls_del(struct hlist_nulls_node *n) { struct hlist_nulls_node *next = n->next; struct hlist_nulls_node **pprev = n->pprev; - *pprev = next; + + WRITE_ONCE(*pprev, next); if (!is_a_nulls(next)) next->pprev = pprev; } -- cgit v1.2.3 From cc44ca848f5e517aeca9f5eabbe13609a3f71450 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Fri, 21 Aug 2015 19:42:44 +0200 Subject: rcu: Create rcu_sync infrastructure The rcu_sync infrastructure can be thought of as infrastructure to be used to implement reader-writer primitives having extremely lightweight readers during times when there are no writers. The first use is in the percpu_rwsem used by the VFS subsystem. This infrastructure is functionally equivalent to struct rcu_sync_struct { atomic_t counter; }; /* Check possibility of fast-path read-side operations. */ static inline bool rcu_sync_is_idle(struct rcu_sync_struct *rss) { return atomic_read(&rss->counter) == 0; } /* Tell readers to use slowpaths. */ static inline void rcu_sync_enter(struct rcu_sync_struct *rss) { atomic_inc(&rss->counter); synchronize_sched(); } /* Allow readers to once again use fastpaths. */ static inline void rcu_sync_exit(struct rcu_sync_struct *rss) { synchronize_sched(); atomic_dec(&rss->counter); } The main difference is that it records the state and only calls synchronize_sched() if required. At least some of the calls to synchronize_sched() will be optimized away when rcu_sync_enter() and rcu_sync_exit() are invoked repeatedly in quick succession. Signed-off-by: Oleg Nesterov Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett --- include/linux/rcu_sync.h | 94 +++++++++++++++++++++++++ kernel/rcu/Makefile | 2 +- kernel/rcu/sync.c | 175 +++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 270 insertions(+), 1 deletion(-) create mode 100644 include/linux/rcu_sync.h create mode 100644 kernel/rcu/sync.c (limited to 'include') diff --git a/include/linux/rcu_sync.h b/include/linux/rcu_sync.h new file mode 100644 index 000000000000..cb044df2e21c --- /dev/null +++ b/include/linux/rcu_sync.h @@ -0,0 +1,94 @@ +/* + * RCU-based infrastructure for lightweight reader-writer locking + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * Copyright (c) 2015, Red Hat, Inc. + * + * Author: Oleg Nesterov + */ + +#ifndef _LINUX_RCU_SYNC_H_ +#define _LINUX_RCU_SYNC_H_ + +#include +#include + +/* Structure to mediate between updaters and fastpath-using readers. */ +struct rcu_sync { + int gp_state; + int gp_count; + wait_queue_head_t gp_wait; + + int cb_state; + struct rcu_head cb_head; + + void (*sync)(void); + void (*call)(struct rcu_head *, void (*)(struct rcu_head *)); +}; + +#define ___RCU_SYNC_INIT(name) \ + .gp_state = 0, \ + .gp_count = 0, \ + .gp_wait = __WAIT_QUEUE_HEAD_INITIALIZER(name.gp_wait), \ + .cb_state = 0 + +#define __RCU_SCHED_SYNC_INIT(name) { \ + ___RCU_SYNC_INIT(name), \ + .sync = synchronize_sched, \ + .call = call_rcu_sched, \ +} + +#define __RCU_BH_SYNC_INIT(name) { \ + ___RCU_SYNC_INIT(name), \ + .sync = synchronize_rcu_bh, \ + .call = call_rcu_bh, \ +} + +#define __RCU_SYNC_INIT(name) { \ + ___RCU_SYNC_INIT(name), \ + .sync = synchronize_rcu, \ + .call = call_rcu, \ +} + +#define DEFINE_RCU_SCHED_SYNC(name) \ + struct rcu_sync name = __RCU_SCHED_SYNC_INIT(name) + +#define DEFINE_RCU_BH_SYNC(name) \ + struct rcu_sync name = __RCU_BH_SYNC_INIT(name) + +#define DEFINE_RCU_SYNC(name) \ + struct rcu_sync name = __RCU_SYNC_INIT(name) + +/** + * rcu_sync_is_idle() - Are readers permitted to use their fastpaths? + * @rsp: Pointer to rcu_sync structure to use for synchronization + * + * Returns true if readers are permitted to use their fastpaths. + * Must be invoked within an RCU read-side critical section whose + * flavor matches that of the rcu_sync struture. + */ +static inline bool rcu_sync_is_idle(struct rcu_sync *rsp) +{ + return !rsp->gp_state; /* GP_IDLE */ +} + +enum rcu_sync_type { RCU_SYNC, RCU_SCHED_SYNC, RCU_BH_SYNC }; + +extern void rcu_sync_init(struct rcu_sync *, enum rcu_sync_type); +extern void rcu_sync_enter(struct rcu_sync *); +extern void rcu_sync_exit(struct rcu_sync *); + +#endif /* _LINUX_RCU_SYNC_H_ */ diff --git a/kernel/rcu/Makefile b/kernel/rcu/Makefile index 50a808424b06..61a16569ffbf 100644 --- a/kernel/rcu/Makefile +++ b/kernel/rcu/Makefile @@ -1,4 +1,4 @@ -obj-y += update.o +obj-y += update.o sync.o obj-$(CONFIG_SRCU) += srcu.o obj-$(CONFIG_RCU_TORTURE_TEST) += rcutorture.o obj-$(CONFIG_TREE_RCU) += tree.o diff --git a/kernel/rcu/sync.c b/kernel/rcu/sync.c new file mode 100644 index 000000000000..0a11df43be23 --- /dev/null +++ b/kernel/rcu/sync.c @@ -0,0 +1,175 @@ +/* + * RCU-based infrastructure for lightweight reader-writer locking + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * Copyright (c) 2015, Red Hat, Inc. + * + * Author: Oleg Nesterov + */ + +#include +#include + +enum { GP_IDLE = 0, GP_PENDING, GP_PASSED }; +enum { CB_IDLE = 0, CB_PENDING, CB_REPLAY }; + +#define rss_lock gp_wait.lock + +/** + * rcu_sync_init() - Initialize an rcu_sync structure + * @rsp: Pointer to rcu_sync structure to be initialized + * @type: Flavor of RCU with which to synchronize rcu_sync structure + */ +void rcu_sync_init(struct rcu_sync *rsp, enum rcu_sync_type type) +{ + memset(rsp, 0, sizeof(*rsp)); + init_waitqueue_head(&rsp->gp_wait); + + switch (type) { + case RCU_SYNC: + rsp->sync = synchronize_rcu; + rsp->call = call_rcu; + break; + + case RCU_SCHED_SYNC: + rsp->sync = synchronize_sched; + rsp->call = call_rcu_sched; + break; + + case RCU_BH_SYNC: + rsp->sync = synchronize_rcu_bh; + rsp->call = call_rcu_bh; + break; + } +} + +/** + * rcu_sync_enter() - Force readers onto slowpath + * @rsp: Pointer to rcu_sync structure to use for synchronization + * + * This function is used by updaters who need readers to make use of + * a slowpath during the update. After this function returns, all + * subsequent calls to rcu_sync_is_idle() will return false, which + * tells readers to stay off their fastpaths. A later call to + * rcu_sync_exit() re-enables reader slowpaths. + * + * When called in isolation, rcu_sync_enter() must wait for a grace + * period, however, closely spaced calls to rcu_sync_enter() can + * optimize away the grace-period wait via a state machine implemented + * by rcu_sync_enter(), rcu_sync_exit(), and rcu_sync_func(). + */ +void rcu_sync_enter(struct rcu_sync *rsp) +{ + bool need_wait, need_sync; + + spin_lock_irq(&rsp->rss_lock); + need_wait = rsp->gp_count++; + need_sync = rsp->gp_state == GP_IDLE; + if (need_sync) + rsp->gp_state = GP_PENDING; + spin_unlock_irq(&rsp->rss_lock); + + BUG_ON(need_wait && need_sync); + + if (need_sync) { + rsp->sync(); + rsp->gp_state = GP_PASSED; + wake_up_all(&rsp->gp_wait); + } else if (need_wait) { + wait_event(rsp->gp_wait, rsp->gp_state == GP_PASSED); + } else { + /* + * Possible when there's a pending CB from a rcu_sync_exit(). + * Nobody has yet been allowed the 'fast' path and thus we can + * avoid doing any sync(). The callback will get 'dropped'. + */ + BUG_ON(rsp->gp_state != GP_PASSED); + } +} + +/** + * rcu_sync_func() - Callback function managing reader access to fastpath + * @rsp: Pointer to rcu_sync structure to use for synchronization + * + * This function is passed to one of the call_rcu() functions by + * rcu_sync_exit(), so that it is invoked after a grace period following the + * that invocation of rcu_sync_exit(). It takes action based on events that + * have taken place in the meantime, so that closely spaced rcu_sync_enter() + * and rcu_sync_exit() pairs need not wait for a grace period. + * + * If another rcu_sync_enter() is invoked before the grace period + * ended, reset state to allow the next rcu_sync_exit() to let the + * readers back onto their fastpaths (after a grace period). If both + * another rcu_sync_enter() and its matching rcu_sync_exit() are invoked + * before the grace period ended, re-invoke call_rcu() on behalf of that + * rcu_sync_exit(). Otherwise, set all state back to idle so that readers + * can again use their fastpaths. + */ +static void rcu_sync_func(struct rcu_head *rcu) +{ + struct rcu_sync *rsp = container_of(rcu, struct rcu_sync, cb_head); + unsigned long flags; + + BUG_ON(rsp->gp_state != GP_PASSED); + BUG_ON(rsp->cb_state == CB_IDLE); + + spin_lock_irqsave(&rsp->rss_lock, flags); + if (rsp->gp_count) { + /* + * A new rcu_sync_begin() has happened; drop the callback. + */ + rsp->cb_state = CB_IDLE; + } else if (rsp->cb_state == CB_REPLAY) { + /* + * A new rcu_sync_exit() has happened; requeue the callback + * to catch a later GP. + */ + rsp->cb_state = CB_PENDING; + rsp->call(&rsp->cb_head, rcu_sync_func); + } else { + /* + * We're at least a GP after rcu_sync_exit(); eveybody will now + * have observed the write side critical section. Let 'em rip!. + */ + rsp->cb_state = CB_IDLE; + rsp->gp_state = GP_IDLE; + } + spin_unlock_irqrestore(&rsp->rss_lock, flags); +} + +/** + * rcu_sync_exit() - Allow readers back onto fast patch after grace period + * @rsp: Pointer to rcu_sync structure to use for synchronization + * + * This function is used by updaters who have completed, and can therefore + * now allow readers to make use of their fastpaths after a grace period + * has elapsed. After this grace period has completed, all subsequent + * calls to rcu_sync_is_idle() will return true, which tells readers that + * they can once again use their fastpaths. + */ +void rcu_sync_exit(struct rcu_sync *rsp) +{ + spin_lock_irq(&rsp->rss_lock); + if (!--rsp->gp_count) { + if (rsp->cb_state == CB_IDLE) { + rsp->cb_state = CB_PENDING; + rsp->call(&rsp->cb_head, rcu_sync_func); + } else if (rsp->cb_state == CB_PENDING) { + rsp->cb_state = CB_REPLAY; + } + } + spin_unlock_irq(&rsp->rss_lock); +} -- cgit v1.2.3 From 82e8c565be8a72957570d7da8dd9b441db7bb648 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Fri, 21 Aug 2015 19:42:47 +0200 Subject: rcu_sync: Simplify rcu_sync using new rcu_sync_ops structure This commit adds the new struct rcu_sync_ops which holds sync/call methods, and turns the function pointers in rcu_sync_struct into an array of struct rcu_sync_ops. This simplifies the "init" helpers by collapsing a switch statement and explicit multiple definitions into a simple assignment and a helper macro, respectively. Signed-off-by: Oleg Nesterov Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett --- include/linux/rcu_sync.h | 60 +++++++++++++++++++----------------------------- kernel/rcu/sync.c | 42 +++++++++++++++++---------------- 2 files changed, 45 insertions(+), 57 deletions(-) (limited to 'include') diff --git a/include/linux/rcu_sync.h b/include/linux/rcu_sync.h index cb044df2e21c..c6d2272c4459 100644 --- a/include/linux/rcu_sync.h +++ b/include/linux/rcu_sync.h @@ -26,6 +26,8 @@ #include #include +enum rcu_sync_type { RCU_SYNC, RCU_SCHED_SYNC, RCU_BH_SYNC }; + /* Structure to mediate between updaters and fastpath-using readers. */ struct rcu_sync { int gp_state; @@ -35,43 +37,9 @@ struct rcu_sync { int cb_state; struct rcu_head cb_head; - void (*sync)(void); - void (*call)(struct rcu_head *, void (*)(struct rcu_head *)); + enum rcu_sync_type gp_type; }; -#define ___RCU_SYNC_INIT(name) \ - .gp_state = 0, \ - .gp_count = 0, \ - .gp_wait = __WAIT_QUEUE_HEAD_INITIALIZER(name.gp_wait), \ - .cb_state = 0 - -#define __RCU_SCHED_SYNC_INIT(name) { \ - ___RCU_SYNC_INIT(name), \ - .sync = synchronize_sched, \ - .call = call_rcu_sched, \ -} - -#define __RCU_BH_SYNC_INIT(name) { \ - ___RCU_SYNC_INIT(name), \ - .sync = synchronize_rcu_bh, \ - .call = call_rcu_bh, \ -} - -#define __RCU_SYNC_INIT(name) { \ - ___RCU_SYNC_INIT(name), \ - .sync = synchronize_rcu, \ - .call = call_rcu, \ -} - -#define DEFINE_RCU_SCHED_SYNC(name) \ - struct rcu_sync name = __RCU_SCHED_SYNC_INIT(name) - -#define DEFINE_RCU_BH_SYNC(name) \ - struct rcu_sync name = __RCU_BH_SYNC_INIT(name) - -#define DEFINE_RCU_SYNC(name) \ - struct rcu_sync name = __RCU_SYNC_INIT(name) - /** * rcu_sync_is_idle() - Are readers permitted to use their fastpaths? * @rsp: Pointer to rcu_sync structure to use for synchronization @@ -85,10 +53,28 @@ static inline bool rcu_sync_is_idle(struct rcu_sync *rsp) return !rsp->gp_state; /* GP_IDLE */ } -enum rcu_sync_type { RCU_SYNC, RCU_SCHED_SYNC, RCU_BH_SYNC }; - extern void rcu_sync_init(struct rcu_sync *, enum rcu_sync_type); extern void rcu_sync_enter(struct rcu_sync *); extern void rcu_sync_exit(struct rcu_sync *); +#define __RCU_SYNC_INITIALIZER(name, type) { \ + .gp_state = 0, \ + .gp_count = 0, \ + .gp_wait = __WAIT_QUEUE_HEAD_INITIALIZER(name.gp_wait), \ + .cb_state = 0, \ + .gp_type = type, \ + } + +#define __DEFINE_RCU_SYNC(name, type) \ + struct rcu_sync_struct name = __RCU_SYNC_INITIALIZER(name, type) + +#define DEFINE_RCU_SYNC(name) \ + __DEFINE_RCU_SYNC(name, RCU_SYNC) + +#define DEFINE_RCU_SCHED_SYNC(name) \ + __DEFINE_RCU_SYNC(name, RCU_SCHED_SYNC) + +#define DEFINE_RCU_BH_SYNC(name) \ + __DEFINE_RCU_SYNC(name, RCU_BH_SYNC) + #endif /* _LINUX_RCU_SYNC_H_ */ diff --git a/kernel/rcu/sync.c b/kernel/rcu/sync.c index 0a11df43be23..5a9aa4c394f1 100644 --- a/kernel/rcu/sync.c +++ b/kernel/rcu/sync.c @@ -23,6 +23,24 @@ #include #include +static const struct { + void (*sync)(void); + void (*call)(struct rcu_head *, void (*)(struct rcu_head *)); +} gp_ops[] = { + [RCU_SYNC] = { + .sync = synchronize_rcu, + .call = call_rcu, + }, + [RCU_SCHED_SYNC] = { + .sync = synchronize_sched, + .call = call_rcu_sched, + }, + [RCU_BH_SYNC] = { + .sync = synchronize_rcu_bh, + .call = call_rcu_bh, + }, +}; + enum { GP_IDLE = 0, GP_PENDING, GP_PASSED }; enum { CB_IDLE = 0, CB_PENDING, CB_REPLAY }; @@ -37,23 +55,7 @@ void rcu_sync_init(struct rcu_sync *rsp, enum rcu_sync_type type) { memset(rsp, 0, sizeof(*rsp)); init_waitqueue_head(&rsp->gp_wait); - - switch (type) { - case RCU_SYNC: - rsp->sync = synchronize_rcu; - rsp->call = call_rcu; - break; - - case RCU_SCHED_SYNC: - rsp->sync = synchronize_sched; - rsp->call = call_rcu_sched; - break; - - case RCU_BH_SYNC: - rsp->sync = synchronize_rcu_bh; - rsp->call = call_rcu_bh; - break; - } + rsp->gp_type = type; } /** @@ -85,7 +87,7 @@ void rcu_sync_enter(struct rcu_sync *rsp) BUG_ON(need_wait && need_sync); if (need_sync) { - rsp->sync(); + gp_ops[rsp->gp_type].sync(); rsp->gp_state = GP_PASSED; wake_up_all(&rsp->gp_wait); } else if (need_wait) { @@ -138,7 +140,7 @@ static void rcu_sync_func(struct rcu_head *rcu) * to catch a later GP. */ rsp->cb_state = CB_PENDING; - rsp->call(&rsp->cb_head, rcu_sync_func); + gp_ops[rsp->gp_type].call(&rsp->cb_head, rcu_sync_func); } else { /* * We're at least a GP after rcu_sync_exit(); eveybody will now @@ -166,7 +168,7 @@ void rcu_sync_exit(struct rcu_sync *rsp) if (!--rsp->gp_count) { if (rsp->cb_state == CB_IDLE) { rsp->cb_state = CB_PENDING; - rsp->call(&rsp->cb_head, rcu_sync_func); + gp_ops[rsp->gp_type].call(&rsp->cb_head, rcu_sync_func); } else if (rsp->cb_state == CB_PENDING) { rsp->cb_state = CB_REPLAY; } -- cgit v1.2.3 From 3a518b76af7bb411efe6dd090fbf098e29accb2e Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Fri, 21 Aug 2015 19:42:50 +0200 Subject: rcu_sync: Add CONFIG_PROVE_RCU checks This commit validates that the caller of rcu_sync_is_idle() holds the corresponding type of RCU read-side lock, but only in kernels built with CONFIG_PROVE_RCU=y. This validation is carried out via a new rcu_sync_ops->held() method that is checked within rcu_sync_is_idle(). Note that although this does add code to the fast path, it only does so in kernels built with CONFIG_PROVE_RCU=y. Suggested-by: "Paul E. McKenney" Signed-off-by: Oleg Nesterov Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett --- include/linux/rcu_sync.h | 6 ++++++ kernel/rcu/sync.c | 20 ++++++++++++++++++++ 2 files changed, 26 insertions(+) (limited to 'include') diff --git a/include/linux/rcu_sync.h b/include/linux/rcu_sync.h index c6d2272c4459..1f2d4fc30b04 100644 --- a/include/linux/rcu_sync.h +++ b/include/linux/rcu_sync.h @@ -40,6 +40,8 @@ struct rcu_sync { enum rcu_sync_type gp_type; }; +extern bool __rcu_sync_is_idle(struct rcu_sync *); + /** * rcu_sync_is_idle() - Are readers permitted to use their fastpaths? * @rsp: Pointer to rcu_sync structure to use for synchronization @@ -50,7 +52,11 @@ struct rcu_sync { */ static inline bool rcu_sync_is_idle(struct rcu_sync *rsp) { +#ifdef CONFIG_PROVE_RCU + return __rcu_sync_is_idle(rsp); +#else return !rsp->gp_state; /* GP_IDLE */ +#endif } extern void rcu_sync_init(struct rcu_sync *, enum rcu_sync_type); diff --git a/kernel/rcu/sync.c b/kernel/rcu/sync.c index 5a9aa4c394f1..01c9807a7f73 100644 --- a/kernel/rcu/sync.c +++ b/kernel/rcu/sync.c @@ -23,21 +23,33 @@ #include #include +#ifdef CONFIG_PROVE_RCU +#define __INIT_HELD(func) .held = func, +#else +#define __INIT_HELD(func) +#endif + static const struct { void (*sync)(void); void (*call)(struct rcu_head *, void (*)(struct rcu_head *)); +#ifdef CONFIG_PROVE_RCU + int (*held)(void); +#endif } gp_ops[] = { [RCU_SYNC] = { .sync = synchronize_rcu, .call = call_rcu, + __INIT_HELD(rcu_read_lock_held) }, [RCU_SCHED_SYNC] = { .sync = synchronize_sched, .call = call_rcu_sched, + __INIT_HELD(rcu_read_lock_sched_held) }, [RCU_BH_SYNC] = { .sync = synchronize_rcu_bh, .call = call_rcu_bh, + __INIT_HELD(rcu_read_lock_bh_held) }, }; @@ -46,6 +58,14 @@ enum { CB_IDLE = 0, CB_PENDING, CB_REPLAY }; #define rss_lock gp_wait.lock +#ifdef CONFIG_PROVE_RCU +bool __rcu_sync_is_idle(struct rcu_sync *rsp) +{ + WARN_ON(!gp_ops[rsp->gp_type].held()); + return rsp->gp_state == GP_IDLE; +} +#endif + /** * rcu_sync_init() - Initialize an rcu_sync structure * @rsp: Pointer to rcu_sync structure to be initialized -- cgit v1.2.3 From 07899a6e5f56136028c44a57ad0451e797365ac3 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Fri, 21 Aug 2015 19:42:52 +0200 Subject: rcu_sync: Introduce rcu_sync_dtor() This commit allows rcu_sync structures to be safely deallocated, The trick is to add a new ->wait field to the gp_ops array. This field is a pointer to the rcu_barrier() function corresponding to the flavor of RCU in question. This allows a new rcu_sync_dtor() to wait for any outstanding callbacks before freeing the rcu_sync structure. Signed-off-by: Oleg Nesterov Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett --- include/linux/rcu_sync.h | 1 + kernel/rcu/sync.c | 26 ++++++++++++++++++++++++++ 2 files changed, 27 insertions(+) (limited to 'include') diff --git a/include/linux/rcu_sync.h b/include/linux/rcu_sync.h index 1f2d4fc30b04..8069d6468bc4 100644 --- a/include/linux/rcu_sync.h +++ b/include/linux/rcu_sync.h @@ -62,6 +62,7 @@ static inline bool rcu_sync_is_idle(struct rcu_sync *rsp) extern void rcu_sync_init(struct rcu_sync *, enum rcu_sync_type); extern void rcu_sync_enter(struct rcu_sync *); extern void rcu_sync_exit(struct rcu_sync *); +extern void rcu_sync_dtor(struct rcu_sync *); #define __RCU_SYNC_INITIALIZER(name, type) { \ .gp_state = 0, \ diff --git a/kernel/rcu/sync.c b/kernel/rcu/sync.c index 01c9807a7f73..1e353f0a2b66 100644 --- a/kernel/rcu/sync.c +++ b/kernel/rcu/sync.c @@ -32,6 +32,7 @@ static const struct { void (*sync)(void); void (*call)(struct rcu_head *, void (*)(struct rcu_head *)); + void (*wait)(void); #ifdef CONFIG_PROVE_RCU int (*held)(void); #endif @@ -39,16 +40,19 @@ static const struct { [RCU_SYNC] = { .sync = synchronize_rcu, .call = call_rcu, + .wait = rcu_barrier, __INIT_HELD(rcu_read_lock_held) }, [RCU_SCHED_SYNC] = { .sync = synchronize_sched, .call = call_rcu_sched, + .wait = rcu_barrier_sched, __INIT_HELD(rcu_read_lock_sched_held) }, [RCU_BH_SYNC] = { .sync = synchronize_rcu_bh, .call = call_rcu_bh, + .wait = rcu_barrier_bh, __INIT_HELD(rcu_read_lock_bh_held) }, }; @@ -195,3 +199,25 @@ void rcu_sync_exit(struct rcu_sync *rsp) } spin_unlock_irq(&rsp->rss_lock); } + +/** + * rcu_sync_dtor() - Clean up an rcu_sync structure + * @rsp: Pointer to rcu_sync structure to be cleaned up + */ +void rcu_sync_dtor(struct rcu_sync *rsp) +{ + int cb_state; + + BUG_ON(rsp->gp_count); + + spin_lock_irq(&rsp->rss_lock); + if (rsp->cb_state == CB_REPLAY) + rsp->cb_state = CB_PENDING; + cb_state = rsp->cb_state; + spin_unlock_irq(&rsp->rss_lock); + + if (cb_state != CB_IDLE) { + gp_ops[rsp->gp_type].wait(); + BUG_ON(rsp->cb_state != CB_IDLE); + } +} -- cgit v1.2.3 From 001dac627ff37433d5528ffb0d897cd19c2b1e43 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Fri, 21 Aug 2015 19:42:57 +0200 Subject: locking/percpu-rwsem: Make use of the rcu_sync infrastructure Currently down_write/up_write calls synchronize_sched_expedited() twice, which is evil. Change this code to rely on rcu-sync primitives. This avoids the _expedited "big hammer", and this can be faster in the contended case or even in the case when a single thread does down_write/up_write in a loop. Of course, a single down_write() will take more time, but otoh it will be much more friendly to the whole system. To simplify the review this patch doesn't update the comments, fixed by the next change. Signed-off-by: Oleg Nesterov Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett --- include/linux/percpu-rwsem.h | 3 ++- kernel/locking/percpu-rwsem.c | 18 +++++++----------- 2 files changed, 9 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/include/linux/percpu-rwsem.h b/include/linux/percpu-rwsem.h index 834c4e52cb2d..c2fa3ecb0dce 100644 --- a/include/linux/percpu-rwsem.h +++ b/include/linux/percpu-rwsem.h @@ -5,11 +5,12 @@ #include #include #include +#include #include struct percpu_rw_semaphore { + struct rcu_sync rss; unsigned int __percpu *fast_read_ctr; - atomic_t write_ctr; struct rw_semaphore rw_sem; atomic_t slow_read_ctr; wait_queue_head_t write_waitq; diff --git a/kernel/locking/percpu-rwsem.c b/kernel/locking/percpu-rwsem.c index 9529a30ec57b..183a71151ac0 100644 --- a/kernel/locking/percpu-rwsem.c +++ b/kernel/locking/percpu-rwsem.c @@ -17,7 +17,7 @@ int __percpu_init_rwsem(struct percpu_rw_semaphore *brw, /* ->rw_sem represents the whole percpu_rw_semaphore for lockdep */ __init_rwsem(&brw->rw_sem, name, rwsem_key); - atomic_set(&brw->write_ctr, 0); + rcu_sync_init(&brw->rss, RCU_SCHED_SYNC); atomic_set(&brw->slow_read_ctr, 0); init_waitqueue_head(&brw->write_waitq); return 0; @@ -33,6 +33,7 @@ void percpu_free_rwsem(struct percpu_rw_semaphore *brw) if (!brw->fast_read_ctr) return; + rcu_sync_dtor(&brw->rss); free_percpu(brw->fast_read_ctr); brw->fast_read_ctr = NULL; /* catch use after free bugs */ } @@ -62,13 +63,12 @@ void percpu_free_rwsem(struct percpu_rw_semaphore *brw) */ static bool update_fast_ctr(struct percpu_rw_semaphore *brw, unsigned int val) { - bool success = false; + bool success; preempt_disable(); - if (likely(!atomic_read(&brw->write_ctr))) { + success = rcu_sync_is_idle(&brw->rss); + if (likely(success)) __this_cpu_add(*brw->fast_read_ctr, val); - success = true; - } preempt_enable(); return success; @@ -149,8 +149,6 @@ static int clear_fast_ctr(struct percpu_rw_semaphore *brw) */ void percpu_down_write(struct percpu_rw_semaphore *brw) { - /* tell update_fast_ctr() there is a pending writer */ - atomic_inc(&brw->write_ctr); /* * 1. Ensures that write_ctr != 0 is visible to any down_read/up_read * so that update_fast_ctr() can't succeed. @@ -162,7 +160,7 @@ void percpu_down_write(struct percpu_rw_semaphore *brw) * fast-path, it executes a full memory barrier before we return. * See R_W case in the comment above update_fast_ctr(). */ - synchronize_sched_expedited(); + rcu_sync_enter(&brw->rss); /* exclude other writers, and block the new readers completely */ down_write(&brw->rw_sem); @@ -183,8 +181,6 @@ void percpu_up_write(struct percpu_rw_semaphore *brw) * Insert the barrier before the next fast-path in down_read, * see W_R case in the comment above update_fast_ctr(). */ - synchronize_sched_expedited(); - /* the last writer unblocks update_fast_ctr() */ - atomic_dec(&brw->write_ctr); + rcu_sync_exit(&brw->rss); } EXPORT_SYMBOL_GPL(percpu_up_write); -- cgit v1.2.3 From 4bace7344d6dbd7a1b0b801abf24ea9878064317 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Fri, 11 Sep 2015 17:59:18 +0200 Subject: rcu_sync: Cleanup the CONFIG_PROVE_RCU checks 1. Rename __rcu_sync_is_idle() to rcu_sync_lockdep_assert() and change it to use rcu_lockdep_assert(). 2. Change rcu_sync_is_idle() to return rsp->gp_state == GP_IDLE unconditonally, this way we can remove the same check from rcu_sync_lockdep_assert() and clearly isolate the debugging code. Note: rcu_sync_enter()->wait_event(gp_state == GP_PASSED) needs another CONFIG_PROVE_RCU check, the same as is done in ->sync(); but this needs some simple preparations in the core RCU code to avoid the code duplication. Signed-off-by: Oleg Nesterov Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett --- include/linux/rcu_sync.h | 7 +++---- kernel/rcu/sync.c | 6 +++--- 2 files changed, 6 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/linux/rcu_sync.h b/include/linux/rcu_sync.h index 8069d6468bc4..a63a33e6196e 100644 --- a/include/linux/rcu_sync.h +++ b/include/linux/rcu_sync.h @@ -40,7 +40,7 @@ struct rcu_sync { enum rcu_sync_type gp_type; }; -extern bool __rcu_sync_is_idle(struct rcu_sync *); +extern void rcu_sync_lockdep_assert(struct rcu_sync *); /** * rcu_sync_is_idle() - Are readers permitted to use their fastpaths? @@ -53,10 +53,9 @@ extern bool __rcu_sync_is_idle(struct rcu_sync *); static inline bool rcu_sync_is_idle(struct rcu_sync *rsp) { #ifdef CONFIG_PROVE_RCU - return __rcu_sync_is_idle(rsp); -#else - return !rsp->gp_state; /* GP_IDLE */ + rcu_sync_lockdep_assert(rsp); #endif + return !rsp->gp_state; /* GP_IDLE */ } extern void rcu_sync_init(struct rcu_sync *, enum rcu_sync_type); diff --git a/kernel/rcu/sync.c b/kernel/rcu/sync.c index 1e353f0a2b66..be922c9f3d37 100644 --- a/kernel/rcu/sync.c +++ b/kernel/rcu/sync.c @@ -63,10 +63,10 @@ enum { CB_IDLE = 0, CB_PENDING, CB_REPLAY }; #define rss_lock gp_wait.lock #ifdef CONFIG_PROVE_RCU -bool __rcu_sync_is_idle(struct rcu_sync *rsp) +void rcu_sync_lockdep_assert(struct rcu_sync *rsp) { - WARN_ON(!gp_ops[rsp->gp_type].held()); - return rsp->gp_state == GP_IDLE; + RCU_LOCKDEP_WARN(!gp_ops[rsp->gp_type].held(), + "suspicious rcu_sync_is_idle() usage"); } #endif -- cgit v1.2.3 From 02ef3c4a2aae65a1632b27770bfea3f83ca06772 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 7 Aug 2015 15:14:30 -0700 Subject: cpu: Remove try_get_online_cpus() Now that synchronize_sched_expedited() no longer uses it, there are no users of try_get_online_cpus() in mainline. This commit therefore removes it. Signed-off-by: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner --- include/linux/cpu.h | 2 -- kernel/cpu.c | 13 ------------- 2 files changed, 15 deletions(-) (limited to 'include') diff --git a/include/linux/cpu.h b/include/linux/cpu.h index 23c30bdcca86..d2ca8c38f9c4 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -228,7 +228,6 @@ extern struct bus_type cpu_subsys; extern void cpu_hotplug_begin(void); extern void cpu_hotplug_done(void); extern void get_online_cpus(void); -extern bool try_get_online_cpus(void); extern void put_online_cpus(void); extern void cpu_hotplug_disable(void); extern void cpu_hotplug_enable(void); @@ -246,7 +245,6 @@ int cpu_down(unsigned int cpu); static inline void cpu_hotplug_begin(void) {} static inline void cpu_hotplug_done(void) {} #define get_online_cpus() do { } while (0) -#define try_get_online_cpus() true #define put_online_cpus() do { } while (0) #define cpu_hotplug_disable() do { } while (0) #define cpu_hotplug_enable() do { } while (0) diff --git a/kernel/cpu.c b/kernel/cpu.c index 82cf9dff4295..14a9cdf8abe9 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -102,19 +102,6 @@ void get_online_cpus(void) } EXPORT_SYMBOL_GPL(get_online_cpus); -bool try_get_online_cpus(void) -{ - if (cpu_hotplug.active_writer == current) - return true; - if (!mutex_trylock(&cpu_hotplug.lock)) - return false; - cpuhp_lock_acquire_tryread(); - atomic_inc(&cpu_hotplug.refcount); - mutex_unlock(&cpu_hotplug.lock); - return true; -} -EXPORT_SYMBOL_GPL(try_get_online_cpus); - void put_online_cpus(void) { int refcount; -- cgit v1.2.3