From 1c97be677f72b3c338312aecd36d8fff20322f32 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Sun, 20 Sep 2015 22:02:17 -0700 Subject: list: Use WRITE_ONCE() when adding to lists and hlists Code that does lockless emptiness testing of non-RCU lists is relying on the list-addition code to write the list head's ->next pointer atomically. This commit therefore adds WRITE_ONCE() to list-addition pointer stores that could affect the head's ->next pointer. Reported-by: Dmitry Vyukov Signed-off-by: Paul E. McKenney --- include/linux/list.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/list.h b/include/linux/list.h index 993395a2e55c..d7e31fe398b3 100644 --- a/include/linux/list.h +++ b/include/linux/list.h @@ -42,7 +42,7 @@ static inline void __list_add(struct list_head *new, next->prev = new; new->next = next; new->prev = prev; - prev->next = new; + WRITE_ONCE(prev->next, new); } #else extern void __list_add(struct list_head *new, @@ -642,7 +642,7 @@ static inline void hlist_add_head(struct hlist_node *n, struct hlist_head *h) n->next = first; if (first) first->pprev = &n->next; - h->first = n; + WRITE_ONCE(h->first, n); n->pprev = &h->first; } @@ -653,14 +653,14 @@ static inline void hlist_add_before(struct hlist_node *n, n->pprev = next->pprev; n->next = next; next->pprev = &n->next; - *(n->pprev) = n; + WRITE_ONCE(*(n->pprev), n); } static inline void hlist_add_behind(struct hlist_node *n, struct hlist_node *prev) { n->next = prev->next; - prev->next = n; + WRITE_ONCE(prev->next, n); n->pprev = &prev->next; if (n->next) -- cgit v1.2.3 From 1658d35ead5d8dd76f2b2d6ad0e32c08d123faa2 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Sun, 20 Sep 2015 17:03:16 -0700 Subject: list: Use READ_ONCE() when testing for empty lists Most of the list-empty-check macros (list_empty(), hlist_empty(), hlist_bl_empty(), hlist_nulls_empty(), and hlist_nulls_empty()) use an unadorned load to check the list header. Given that these macros are sometimes invoked without the protection of a lock, this is not sufficient. This commit therefore adds READ_ONCE() calls to them. This commit does not touch llist_empty() because it already has the needed ACCESS_ONCE(). Reported-by: Dmitry Vyukov Signed-off-by: Paul E. McKenney --- include/linux/list.h | 4 ++-- include/linux/list_bl.h | 2 +- include/linux/list_nulls.h | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/list.h b/include/linux/list.h index d7e31fe398b3..06c2d887a918 100644 --- a/include/linux/list.h +++ b/include/linux/list.h @@ -186,7 +186,7 @@ static inline int list_is_last(const struct list_head *list, */ static inline int list_empty(const struct list_head *head) { - return head->next == head; + return READ_ONCE(head->next) == head; } /** @@ -608,7 +608,7 @@ static inline int hlist_unhashed(const struct hlist_node *h) static inline int hlist_empty(const struct hlist_head *h) { - return !h->first; + return !READ_ONCE(h->first); } static inline void __hlist_del(struct hlist_node *n) diff --git a/include/linux/list_bl.h b/include/linux/list_bl.h index 8132214e8efd..ee7229a6c06a 100644 --- a/include/linux/list_bl.h +++ b/include/linux/list_bl.h @@ -70,7 +70,7 @@ static inline void hlist_bl_set_first(struct hlist_bl_head *h, static inline int hlist_bl_empty(const struct hlist_bl_head *h) { - return !((unsigned long)h->first & ~LIST_BL_LOCKMASK); + return !((unsigned long)READ_ONCE(h->first) & ~LIST_BL_LOCKMASK); } static inline void hlist_bl_add_head(struct hlist_bl_node *n, diff --git a/include/linux/list_nulls.h b/include/linux/list_nulls.h index 444d2b1313bd..b01fe1009084 100644 --- a/include/linux/list_nulls.h +++ b/include/linux/list_nulls.h @@ -57,7 +57,7 @@ static inline int hlist_nulls_unhashed(const struct hlist_nulls_node *h) static inline int hlist_nulls_empty(const struct hlist_nulls_head *h) { - return is_a_nulls(h->first); + return is_a_nulls(READ_ONCE(h->first)); } static inline void hlist_nulls_add_head(struct hlist_nulls_node *n, -- cgit v1.2.3 From 5a9be7c628c5273f84abacebf7faf2488376e0f0 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 24 Nov 2015 15:44:06 -0800 Subject: rcu: Add rcu_normal kernel parameter to suppress expediting Although expedited grace periods can be quite useful, and although their OS jitter has been greatly reduced, they can still pose problems for extreme real-time workloads. This commit therefore adds a rcu_normal kernel boot parameter (which can also be manipulated via sysfs) to suppress expedited grace periods, that is, to treat requests for expedited grace periods as if they were requests for normal grace periods. If both rcu_expedited and rcu_normal are specified, rcu_normal wins. This means that if you are relying on expedited grace periods to speed up boot, you will want to specify rcu_expedited on the kernel command line, and then specify rcu_normal via sysfs once boot completes. Signed-off-by: Paul E. McKenney --- Documentation/kernel-parameters.txt | 19 ++++++++++++++----- include/linux/rcupdate.h | 6 ++++++ kernel/ksysfs.c | 22 ++++++++++++++++++++-- kernel/rcu/srcu.c | 2 +- kernel/rcu/tree.c | 6 ++++++ kernel/rcu/tree_plugin.h | 6 ++++++ kernel/rcu/update.c | 12 ++++++++++++ 7 files changed, 65 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 742f69d18fc8..7673943d3085 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -3296,6 +3296,12 @@ bytes respectively. Such letter suffixes can also be entirely omitted. rcutorture.verbose= [KNL] Enable additional printk() statements. + rcupdate.rcu_cpu_stall_suppress= [KNL] + Suppress RCU CPU stall warning messages. + + rcupdate.rcu_cpu_stall_timeout= [KNL] + Set timeout for RCU CPU stall warning messages. + rcupdate.rcu_expedited= [KNL] Use expedited grace-period primitives, for example, synchronize_rcu_expedited() instead @@ -3303,11 +3309,14 @@ bytes respectively. Such letter suffixes can also be entirely omitted. but can increase CPU utilization, degrade real-time latency, and degrade energy efficiency. - rcupdate.rcu_cpu_stall_suppress= [KNL] - Suppress RCU CPU stall warning messages. - - rcupdate.rcu_cpu_stall_timeout= [KNL] - Set timeout for RCU CPU stall warning messages. + rcupdate.rcu_normal= [KNL] + Use only normal grace-period primitives, + for example, synchronize_rcu() instead of + synchronize_rcu_expedited(). This improves + real-time latency, CPU utilization, and energy + efficiency, but can expose users to increased + grace-period latency. This parameter overrides + rcupdate.rcu_expedited. rcupdate.rcu_task_stall_timeout= [KNL] Set timeout in jiffies for RCU task stall warning diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index a0189ba67fde..98d9f30c02d4 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -49,9 +49,14 @@ #include extern int rcu_expedited; /* for sysctl */ +extern int rcu_normal; /* also for sysctl */ #ifdef CONFIG_TINY_RCU /* Tiny RCU doesn't expedite, as its purpose in life is instead to be tiny. */ +static inline bool rcu_gp_is_normal(void) /* Internal RCU use. */ +{ + return true; +} static inline bool rcu_gp_is_expedited(void) /* Internal RCU use. */ { return false; @@ -65,6 +70,7 @@ static inline void rcu_unexpedite_gp(void) { } #else /* #ifdef CONFIG_TINY_RCU */ +bool rcu_gp_is_normal(void); /* Internal RCU use. */ bool rcu_gp_is_expedited(void); /* Internal RCU use. */ void rcu_expedite_gp(void); void rcu_unexpedite_gp(void); diff --git a/kernel/ksysfs.c b/kernel/ksysfs.c index e83b26464061..b4e2fa52d8bc 100644 --- a/kernel/ksysfs.c +++ b/kernel/ksysfs.c @@ -20,7 +20,7 @@ #include #include -#include /* rcu_expedited */ +#include /* rcu_expedited and rcu_normal */ #define KERNEL_ATTR_RO(_name) \ static struct kobj_attribute _name##_attr = __ATTR_RO(_name) @@ -148,7 +148,7 @@ int rcu_expedited; static ssize_t rcu_expedited_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { - return sprintf(buf, "%d\n", rcu_expedited); + return sprintf(buf, "%d\n", READ_ONCE(rcu_expedited)); } static ssize_t rcu_expedited_store(struct kobject *kobj, struct kobj_attribute *attr, @@ -161,6 +161,23 @@ static ssize_t rcu_expedited_store(struct kobject *kobj, } KERNEL_ATTR_RW(rcu_expedited); +int rcu_normal; +static ssize_t rcu_normal_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + return sprintf(buf, "%d\n", READ_ONCE(rcu_normal)); +} +static ssize_t rcu_normal_store(struct kobject *kobj, + struct kobj_attribute *attr, + const char *buf, size_t count) +{ + if (kstrtoint(buf, 0, &rcu_normal)) + return -EINVAL; + + return count; +} +KERNEL_ATTR_RW(rcu_normal); + /* * Make /sys/kernel/notes give the raw contents of our kernel .notes section. */ @@ -203,6 +220,7 @@ static struct attribute * kernel_attrs[] = { &vmcoreinfo_attr.attr, #endif &rcu_expedited_attr.attr, + &rcu_normal_attr.attr, NULL }; diff --git a/kernel/rcu/srcu.c b/kernel/rcu/srcu.c index a63a1ea5a41b..9b9cdd549caa 100644 --- a/kernel/rcu/srcu.c +++ b/kernel/rcu/srcu.c @@ -489,7 +489,7 @@ static void __synchronize_srcu(struct srcu_struct *sp, int trycount) */ void synchronize_srcu(struct srcu_struct *sp) { - __synchronize_srcu(sp, rcu_gp_is_expedited() + __synchronize_srcu(sp, (rcu_gp_is_expedited() && !rcu_gp_is_normal()) ? SYNCHRONIZE_SRCU_EXP_TRYCOUNT : SYNCHRONIZE_SRCU_TRYCOUNT); } diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 6a652d1f3d7f..489992997c06 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -3841,6 +3841,12 @@ void synchronize_sched_expedited(void) if (rcu_blocking_is_gp()) return; + /* If expedited grace periods are prohibited, fall back to normal. */ + if (rcu_gp_is_normal()) { + wait_rcu_gp(call_rcu_sched); + return; + } + /* Take a snapshot of the sequence number. */ s = rcu_exp_gp_seq_snap(rsp); diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index 57ba873d2f18..d45df3781551 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -746,6 +746,12 @@ void synchronize_rcu_expedited(void) struct rcu_state *rsp = rcu_state_p; unsigned long s; + /* If expedited grace periods are prohibited, fall back to normal. */ + if (rcu_gp_is_normal()) { + wait_rcu_gp(call_rcu); + return; + } + s = rcu_exp_gp_seq_snap(rsp); rnp_unlock = exp_funnel_lock(rsp, s); diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c index 5f748c5a40f0..8fccda3a794d 100644 --- a/kernel/rcu/update.c +++ b/kernel/rcu/update.c @@ -61,6 +61,7 @@ MODULE_ALIAS("rcupdate"); #define MODULE_PARAM_PREFIX "rcupdate." module_param(rcu_expedited, int, 0); +module_param(rcu_normal, int, 0); #if defined(CONFIG_DEBUG_LOCK_ALLOC) && defined(CONFIG_PREEMPT_COUNT) /** @@ -113,6 +114,17 @@ EXPORT_SYMBOL(rcu_read_lock_sched_held); #ifndef CONFIG_TINY_RCU +/* + * Should expedited grace-period primitives always fall back to their + * non-expedited counterparts? Intended for use within RCU. Note + * that if the user specifies both rcu_expedited and rcu_normal, then + * rcu_normal wins. + */ +bool rcu_gp_is_normal(void) +{ + return READ_ONCE(rcu_normal); +} + static atomic_t rcu_expedited_nesting = ATOMIC_INIT(IS_ENABLED(CONFIG_RCU_EXPEDITE_BOOT) ? 1 : 0); -- cgit v1.2.3 From 46a5d164db53ba6066b11889abb7fa6bddbe5cf7 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 7 Oct 2015 09:10:48 -0700 Subject: rcu: Stop disabling interrupts in scheduler fastpaths We need the scheduler's fastpaths to be, well, fast, and unnecessarily disabling and re-enabling interrupts is not necessarily consistent with this goal. Especially given that there are regions of the scheduler that already have interrupts disabled. This commit therefore moves the call to rcu_note_context_switch() to one of the interrupts-disabled regions of the scheduler, and removes the now-redundant disabling and re-enabling of interrupts from rcu_note_context_switch() and the functions it calls. Reported-by: Peter Zijlstra Signed-off-by: Paul E. McKenney [ paulmck: Shift rcu_note_context_switch() to avoid deadlock, as suggested by Peter Zijlstra. ] --- include/linux/rcutree.h | 2 +- kernel/rcu/tree.c | 27 ++++++++++++--------------- kernel/rcu/tree_plugin.h | 14 ++++++-------- kernel/sched/core.c | 6 ++++-- 4 files changed, 23 insertions(+), 26 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h index 60d15a080d7c..9d3eda39bcd2 100644 --- a/include/linux/rcutree.h +++ b/include/linux/rcutree.h @@ -37,7 +37,7 @@ void rcu_cpu_stall_reset(void); /* * Note a virtualization-based context switch. This is simply a * wrapper around rcu_note_context_switch(), which allows TINY_RCU - * to save a few bytes. + * to save a few bytes. The caller must have disabled interrupts. */ static inline void rcu_virt_note_context_switch(int cpu) { diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index ed3bc0578cc5..93941d3434ad 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -242,8 +242,6 @@ static int rcu_gp_in_progress(struct rcu_state *rsp) */ void rcu_sched_qs(void) { - unsigned long flags; - if (!__this_cpu_read(rcu_sched_data.cpu_no_qs.s)) return; trace_rcu_grace_period(TPS("rcu_sched"), @@ -252,13 +250,9 @@ void rcu_sched_qs(void) __this_cpu_write(rcu_sched_data.cpu_no_qs.b.norm, false); if (!__this_cpu_read(rcu_sched_data.cpu_no_qs.b.exp)) return; - local_irq_save(flags); - if (__this_cpu_read(rcu_sched_data.cpu_no_qs.b.exp)) { - __this_cpu_write(rcu_sched_data.cpu_no_qs.b.exp, false); - rcu_report_exp_rdp(&rcu_sched_state, - this_cpu_ptr(&rcu_sched_data), true); - } - local_irq_restore(flags); + __this_cpu_write(rcu_sched_data.cpu_no_qs.b.exp, false); + rcu_report_exp_rdp(&rcu_sched_state, + this_cpu_ptr(&rcu_sched_data), true); } void rcu_bh_qs(void) @@ -295,17 +289,16 @@ EXPORT_PER_CPU_SYMBOL_GPL(rcu_qs_ctr); * We inform the RCU core by emulating a zero-duration dyntick-idle * period, which we in turn do by incrementing the ->dynticks counter * by two. + * + * The caller must have disabled interrupts. */ static void rcu_momentary_dyntick_idle(void) { - unsigned long flags; struct rcu_data *rdp; struct rcu_dynticks *rdtp; int resched_mask; struct rcu_state *rsp; - local_irq_save(flags); - /* * Yes, we can lose flag-setting operations. This is OK, because * the flag will be set again after some delay. @@ -335,13 +328,12 @@ static void rcu_momentary_dyntick_idle(void) smp_mb__after_atomic(); /* Later stuff after QS. */ break; } - local_irq_restore(flags); } /* * Note a context switch. This is a quiescent state for RCU-sched, * and requires special handling for preemptible RCU. - * The caller must have disabled preemption. + * The caller must have disabled interrupts. */ void rcu_note_context_switch(void) { @@ -371,9 +363,14 @@ EXPORT_SYMBOL_GPL(rcu_note_context_switch); */ void rcu_all_qs(void) { + unsigned long flags; + barrier(); /* Avoid RCU read-side critical sections leaking down. */ - if (unlikely(raw_cpu_read(rcu_sched_qs_mask))) + if (unlikely(raw_cpu_read(rcu_sched_qs_mask))) { + local_irq_save(flags); rcu_momentary_dyntick_idle(); + local_irq_restore(flags); + } this_cpu_inc(rcu_qs_ctr); barrier(); /* Avoid RCU read-side critical sections leaking up. */ } diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index 8e9d4a4d0326..e6da888cc908 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -146,8 +146,8 @@ static void __init rcu_bootup_announce(void) * the corresponding expedited grace period will also be the end of the * normal grace period. */ -static void rcu_preempt_ctxt_queue(struct rcu_node *rnp, struct rcu_data *rdp, - unsigned long flags) __releases(rnp->lock) +static void rcu_preempt_ctxt_queue(struct rcu_node *rnp, struct rcu_data *rdp) + __releases(rnp->lock) /* But leaves rrupts disabled. */ { int blkd_state = (rnp->gp_tasks ? RCU_GP_TASKS : 0) + (rnp->exp_tasks ? RCU_EXP_TASKS : 0) + @@ -235,7 +235,7 @@ static void rcu_preempt_ctxt_queue(struct rcu_node *rnp, struct rcu_data *rdp, rnp->gp_tasks = &t->rcu_node_entry; if (!rnp->exp_tasks && (blkd_state & RCU_EXP_BLKD)) rnp->exp_tasks = &t->rcu_node_entry; - raw_spin_unlock(&rnp->lock); + raw_spin_unlock(&rnp->lock); /* rrupts remain disabled. */ /* * Report the quiescent state for the expedited GP. This expedited @@ -250,7 +250,6 @@ static void rcu_preempt_ctxt_queue(struct rcu_node *rnp, struct rcu_data *rdp, } else { WARN_ON_ONCE(t->rcu_read_unlock_special.b.exp_need_qs); } - local_irq_restore(flags); } /* @@ -285,12 +284,11 @@ static void rcu_preempt_qs(void) * predating the current grace period drain, in other words, until * rnp->gp_tasks becomes NULL. * - * Caller must disable preemption. + * Caller must disable interrupts. */ static void rcu_preempt_note_context_switch(void) { struct task_struct *t = current; - unsigned long flags; struct rcu_data *rdp; struct rcu_node *rnp; @@ -300,7 +298,7 @@ static void rcu_preempt_note_context_switch(void) /* Possibly blocking in an RCU read-side critical section. */ rdp = this_cpu_ptr(rcu_state_p->rda); rnp = rdp->mynode; - raw_spin_lock_irqsave_rcu_node(rnp, flags); + raw_spin_lock_rcu_node(rnp); t->rcu_read_unlock_special.b.blocked = true; t->rcu_blocked_node = rnp; @@ -316,7 +314,7 @@ static void rcu_preempt_note_context_switch(void) (rnp->qsmask & rdp->grpmask) ? rnp->gpnum : rnp->gpnum + 1); - rcu_preempt_ctxt_queue(rnp, rdp, flags); + rcu_preempt_ctxt_queue(rnp, rdp); } else if (t->rcu_read_lock_nesting < 0 && t->rcu_read_unlock_special.s) { diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 4d568ac9319e..ec72de234feb 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -3085,7 +3085,6 @@ static void __sched notrace __schedule(bool preempt) cpu = smp_processor_id(); rq = cpu_rq(cpu); - rcu_note_context_switch(); prev = rq->curr; /* @@ -3104,13 +3103,16 @@ static void __sched notrace __schedule(bool preempt) if (sched_feat(HRTICK)) hrtick_clear(rq); + local_irq_disable(); + rcu_note_context_switch(); + /* * Make sure that signal_pending_state()->signal_pending() below * can't be reordered with __set_current_state(TASK_INTERRUPTIBLE) * done by the caller to avoid the race with signal_wake_up(). */ smp_mb__before_spinlock(); - raw_spin_lock_irq(&rq->lock); + raw_spin_lock(&rq->lock); lockdep_pin_lock(&rq->lock); rq->clock_skip_update <<= 1; /* promote REQ to ACT */ -- cgit v1.2.3 From 7d86dccf28a3ae2f790f399fc82d4c82521fd078 Mon Sep 17 00:00:00 2001 From: Petko Manolov Date: Mon, 12 Oct 2015 18:23:51 +0300 Subject: list: Introduces generic list_splice_tail_init_rcu() The list_splice_init_rcu() can be used as a stack onto which full lists are pushed, but queue-like behavior is now needed by some security policies. This requires a list_splice_tail_init_rcu(). This commit therefore supplies a list_splice_tail_init_rcu() by pulling code common it and to list_splice_init_rcu() into a new __list_splice_init_rcu() function. This new function is based on the existing list_splice_init_rcu() implementation. Signed-off-by: Petko Manolov Cc: Mimi Zohar Signed-off-by: Paul E. McKenney --- include/linux/rculist.h | 69 +++++++++++++++++++++++++++++++++++-------------- 1 file changed, 49 insertions(+), 20 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rculist.h b/include/linux/rculist.h index 5ed540986019..e99d834545b6 100644 --- a/include/linux/rculist.h +++ b/include/linux/rculist.h @@ -179,32 +179,31 @@ static inline void list_replace_rcu(struct list_head *old, } /** - * list_splice_init_rcu - splice an RCU-protected list into an existing list. + * __list_splice_init_rcu - join an RCU-protected list into an existing list. * @list: the RCU-protected list to splice - * @head: the place in the list to splice the first list into + * @prev: points to the last element of the existing list + * @next: points to the first element of the existing list * @sync: function to sync: synchronize_rcu(), synchronize_sched(), ... * - * @head can be RCU-read traversed concurrently with this function. + * The list pointed to by @prev and @next can be RCU-read traversed + * concurrently with this function. * * Note that this function blocks. * - * Important note: the caller must take whatever action is necessary to - * prevent any other updates to @head. In principle, it is possible - * to modify the list as soon as sync() begins execution. - * If this sort of thing becomes necessary, an alternative version - * based on call_rcu() could be created. But only if -really- - * needed -- there is no shortage of RCU API members. + * Important note: the caller must take whatever action is necessary to prevent + * any other updates to the existing list. In principle, it is possible to + * modify the list as soon as sync() begins execution. If this sort of thing + * becomes necessary, an alternative version based on call_rcu() could be + * created. But only if -really- needed -- there is no shortage of RCU API + * members. */ -static inline void list_splice_init_rcu(struct list_head *list, - struct list_head *head, - void (*sync)(void)) +static inline void __list_splice_init_rcu(struct list_head *list, + struct list_head *prev, + struct list_head *next, + void (*sync)(void)) { struct list_head *first = list->next; struct list_head *last = list->prev; - struct list_head *at = head->next; - - if (list_empty(list)) - return; /* * "first" and "last" tracking list, so initialize it. RCU readers @@ -231,10 +230,40 @@ static inline void list_splice_init_rcu(struct list_head *list, * this function. */ - last->next = at; - rcu_assign_pointer(list_next_rcu(head), first); - first->prev = head; - at->prev = last; + last->next = next; + rcu_assign_pointer(list_next_rcu(prev), first); + first->prev = prev; + next->prev = last; +} + +/** + * list_splice_init_rcu - splice an RCU-protected list into an existing list, + * designed for stacks. + * @list: the RCU-protected list to splice + * @head: the place in the existing list to splice the first list into + * @sync: function to sync: synchronize_rcu(), synchronize_sched(), ... + */ +static inline void list_splice_init_rcu(struct list_head *list, + struct list_head *head, + void (*sync)(void)) +{ + if (!list_empty(list)) + __list_splice_init_rcu(list, head, head->next, sync); +} + +/** + * list_splice_tail_init_rcu - splice an RCU-protected list into an existing + * list, designed for queues. + * @list: the RCU-protected list to splice + * @head: the place in the existing list to splice the first list into + * @sync: function to sync: synchronize_rcu(), synchronize_sched(), ... + */ +static inline void list_splice_tail_init_rcu(struct list_head *list, + struct list_head *head, + void (*sync)(void)) +{ + if (!list_empty(list)) + __list_splice_init_rcu(list, head->prev, head, sync); } /** -- cgit v1.2.3 From 2f073848c3cc8aff2655ab7c46d8c0de90cf4e50 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 12 Oct 2015 16:56:42 -0700 Subject: list: Use WRITE_ONCE() when initializing list_head structures Code that does lockless emptiness testing of non-RCU lists is relying on INIT_LIST_HEAD() to write the list head's ->next pointer atomically, particularly when INIT_LIST_HEAD() is invoked from list_del_init(). This commit therefore adds WRITE_ONCE() to this function's pointer stores that could affect the head's ->next pointer. Reported-by: Andrey Konovalov Signed-off-by: Paul E. McKenney --- include/linux/list.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/list.h b/include/linux/list.h index 06c2d887a918..5356f4d661a7 100644 --- a/include/linux/list.h +++ b/include/linux/list.h @@ -24,7 +24,7 @@ static inline void INIT_LIST_HEAD(struct list_head *list) { - list->next = list; + WRITE_ONCE(list->next, list); list->prev = list; } -- cgit v1.2.3 From 79cfea0273876d9c438f3227b8f68c8c7ae31583 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 7 Dec 2015 13:09:52 -0800 Subject: rcu: Remove TINY_RCU bloat from pointless boot parameters The rcu_expedited, rcu_normal, and rcu_normal_after_boot kernel boot parameters are pointless in the case of TINY_RCU because in that case synchronous grace periods, both expedited and normal, are no-ops. However, these three symbols contribute several hundred bytes of bloat. This commit therefore uses CPP directives to avoid compiling this code in TINY_RCU kernels. Reported-by: kbuild test robot Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett --- Documentation/kernel-parameters.txt | 13 ++++++++----- include/linux/rcupdate.h | 9 ++++++++- kernel/ksysfs.c | 4 ++++ kernel/rcu/update.c | 7 ++++--- 4 files changed, 24 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 197305bbb9b7..d8186da15ca1 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -3308,20 +3308,23 @@ bytes respectively. Such letter suffixes can also be entirely omitted. of synchronize_rcu(). This reduces latency, but can increase CPU utilization, degrade real-time latency, and degrade energy efficiency. + No effect on CONFIG_TINY_RCU kernels. rcupdate.rcu_normal= [KNL] Use only normal grace-period primitives, for example, synchronize_rcu() instead of synchronize_rcu_expedited(). This improves - real-time latency, CPU utilization, and energy - efficiency, but can expose users to increased - grace-period latency. This parameter overrides - rcupdate.rcu_expedited. + real-time latency, CPU utilization, and + energy efficiency, but can expose users to + increased grace-period latency. This parameter + overrides rcupdate.rcu_expedited. No effect on + CONFIG_TINY_RCU kernels. rcupdate.rcu_normal_after_boot= [KNL] Once boot has completed (that is, after rcu_end_inkernel_boot() has been invoked), use - only normal grace-period primitives. + only normal grace-period primitives. No effect + on CONFIG_TINY_RCU kernels. rcupdate.rcu_task_stall_timeout= [KNL] Set timeout in jiffies for RCU task stall warning diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 98d9f30c02d4..47e95b80bebd 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -48,8 +48,10 @@ #include +#ifndef CONFIG_TINY_RCU extern int rcu_expedited; /* for sysctl */ extern int rcu_normal; /* also for sysctl */ +#endif /* #ifndef CONFIG_TINY_RCU */ #ifdef CONFIG_TINY_RCU /* Tiny RCU doesn't expedite, as its purpose in life is instead to be tiny. */ @@ -327,7 +329,6 @@ static inline int rcu_preempt_depth(void) /* Internal to kernel */ void rcu_init(void); -void rcu_end_inkernel_boot(void); void rcu_sched_qs(void); void rcu_bh_qs(void); void rcu_check_callbacks(int user); @@ -335,6 +336,12 @@ struct notifier_block; int rcu_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu); +#ifndef CONFIG_TINY_RCU +void rcu_end_inkernel_boot(void); +#else /* #ifndef CONFIG_TINY_RCU */ +static inline void rcu_end_inkernel_boot(void) { } +#endif /* #ifndef CONFIG_TINY_RCU */ + #ifdef CONFIG_RCU_STALL_COMMON void rcu_sysrq_start(void); void rcu_sysrq_end(void); diff --git a/kernel/ksysfs.c b/kernel/ksysfs.c index b4e2fa52d8bc..152da4a48867 100644 --- a/kernel/ksysfs.c +++ b/kernel/ksysfs.c @@ -144,6 +144,7 @@ static ssize_t fscaps_show(struct kobject *kobj, } KERNEL_ATTR_RO(fscaps); +#ifndef CONFIG_TINY_RCU int rcu_expedited; static ssize_t rcu_expedited_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) @@ -177,6 +178,7 @@ static ssize_t rcu_normal_store(struct kobject *kobj, return count; } KERNEL_ATTR_RW(rcu_normal); +#endif /* #ifndef CONFIG_TINY_RCU */ /* * Make /sys/kernel/notes give the raw contents of our kernel .notes section. @@ -219,8 +221,10 @@ static struct attribute * kernel_attrs[] = { &kexec_crash_size_attr.attr, &vmcoreinfo_attr.attr, #endif +#ifndef CONFIG_TINY_RCU &rcu_expedited_attr.attr, &rcu_normal_attr.attr, +#endif NULL }; diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c index 12b91f5a60a6..76b94e19430b 100644 --- a/kernel/rcu/update.c +++ b/kernel/rcu/update.c @@ -60,11 +60,12 @@ MODULE_ALIAS("rcupdate"); #endif #define MODULE_PARAM_PREFIX "rcupdate." +#ifndef CONFIG_TINY_RCU module_param(rcu_expedited, int, 0); module_param(rcu_normal, int, 0); - static int rcu_normal_after_boot; module_param(rcu_normal_after_boot, int, 0); +#endif /* #ifndef CONFIG_TINY_RCU */ #if defined(CONFIG_DEBUG_LOCK_ALLOC) && defined(CONFIG_PREEMPT_COUNT) /** @@ -172,8 +173,6 @@ void rcu_unexpedite_gp(void) } EXPORT_SYMBOL_GPL(rcu_unexpedite_gp); -#endif /* #ifndef CONFIG_TINY_RCU */ - /* * Inform RCU of the end of the in-kernel boot sequence. */ @@ -185,6 +184,8 @@ void rcu_end_inkernel_boot(void) WRITE_ONCE(rcu_normal, 1); } +#endif /* #ifndef CONFIG_TINY_RCU */ + #ifdef CONFIG_PREEMPT_RCU /* -- cgit v1.2.3 From 7c9906ca5e582a773fff696975e312cef58a7386 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Sat, 31 Oct 2015 00:59:01 -0700 Subject: rcu: Don't redundantly disable irqs in rcu_irq_{enter,exit}() This commit replaces a local_irq_save()/local_irq_restore() pair with a lockdep assertion that interrupts are already disabled. This should remove the corresponding overhead from the interrupt entry/exit fastpaths. This change was inspired by the fact that Iftekhar Ahmed's mutation testing showed that removing rcu_irq_enter()'s call to local_ird_restore() had no effect, which might indicate that interrupts were always enabled anyway. Signed-off-by: Paul E. McKenney --- include/linux/rcupdate.h | 4 ++-- include/linux/rcutiny.h | 8 ++++++++ include/linux/rcutree.h | 2 ++ include/linux/tracepoint.h | 4 ++-- kernel/rcu/tree.c | 32 ++++++++++++++++++++++++++------ 5 files changed, 40 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index a0189ba67fde..f2b667df1131 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -379,9 +379,9 @@ static inline void rcu_init_nohz(void) */ #define RCU_NONIDLE(a) \ do { \ - rcu_irq_enter(); \ + rcu_irq_enter_irqson(); \ do { a; } while (0); \ - rcu_irq_exit(); \ + rcu_irq_exit_irqson(); \ } while (0) /* diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h index 4c1aaf9cce7b..64809aea661c 100644 --- a/include/linux/rcutiny.h +++ b/include/linux/rcutiny.h @@ -181,6 +181,14 @@ static inline void rcu_irq_enter(void) { } +static inline void rcu_irq_exit_irqson(void) +{ +} + +static inline void rcu_irq_enter_irqson(void) +{ +} + static inline void rcu_irq_exit(void) { } diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h index 9d3eda39bcd2..ad1eda9fa4da 100644 --- a/include/linux/rcutree.h +++ b/include/linux/rcutree.h @@ -97,6 +97,8 @@ void rcu_idle_enter(void); void rcu_idle_exit(void); void rcu_irq_enter(void); void rcu_irq_exit(void); +void rcu_irq_enter_irqson(void); +void rcu_irq_exit_irqson(void); void exit_rcu(void); diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h index 696a339c592c..7834a8a8bf1e 100644 --- a/include/linux/tracepoint.h +++ b/include/linux/tracepoint.h @@ -171,8 +171,8 @@ extern void syscall_unregfunc(void); TP_PROTO(data_proto), \ TP_ARGS(data_args), \ TP_CONDITION(cond), \ - rcu_irq_enter(), \ - rcu_irq_exit()); \ + rcu_irq_enter_irqson(), \ + rcu_irq_exit_irqson()); \ } #else #define __DECLARE_TRACE_RCU(name, proto, args, cond, data_proto, data_args) diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index d6863bceeb45..40940b0d0310 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -732,7 +732,7 @@ void rcu_user_enter(void) * * Exit from an interrupt handler, which might possibly result in entering * idle mode, in other words, leaving the mode in which read-side critical - * sections can occur. + * sections can occur. The caller must have disabled interrupts. * * This code assumes that the idle loop never does anything that might * result in unbalanced calls to irq_enter() and irq_exit(). If your @@ -745,11 +745,10 @@ void rcu_user_enter(void) */ void rcu_irq_exit(void) { - unsigned long flags; long long oldval; struct rcu_dynticks *rdtp; - local_irq_save(flags); + RCU_LOCKDEP_WARN(!irqs_disabled(), "rcu_irq_exit() invoked with irqs enabled!!!"); rdtp = this_cpu_ptr(&rcu_dynticks); oldval = rdtp->dynticks_nesting; rdtp->dynticks_nesting--; @@ -760,6 +759,17 @@ void rcu_irq_exit(void) else rcu_eqs_enter_common(oldval, true); rcu_sysidle_enter(1); +} + +/* + * Wrapper for rcu_irq_exit() where interrupts are enabled. + */ +void rcu_irq_exit_irqson(void) +{ + unsigned long flags; + + local_irq_save(flags); + rcu_irq_exit(); local_irq_restore(flags); } @@ -857,7 +867,7 @@ void rcu_user_exit(void) * * Enter an interrupt handler, which might possibly result in exiting * idle mode, in other words, entering the mode in which read-side critical - * sections can occur. + * sections can occur. The caller must have disabled interrupts. * * Note that the Linux kernel is fully capable of entering an interrupt * handler that it never exits, for example when doing upcalls to @@ -873,11 +883,10 @@ void rcu_user_exit(void) */ void rcu_irq_enter(void) { - unsigned long flags; struct rcu_dynticks *rdtp; long long oldval; - local_irq_save(flags); + RCU_LOCKDEP_WARN(!irqs_disabled(), "rcu_irq_enter() invoked with irqs enabled!!!"); rdtp = this_cpu_ptr(&rcu_dynticks); oldval = rdtp->dynticks_nesting; rdtp->dynticks_nesting++; @@ -888,6 +897,17 @@ void rcu_irq_enter(void) else rcu_eqs_exit_common(oldval, true); rcu_sysidle_exit(1); +} + +/* + * Wrapper for rcu_irq_enter() where interrupts are enabled. + */ +void rcu_irq_enter_irqson(void) +{ + unsigned long flags; + + local_irq_save(flags); + rcu_irq_enter(); local_irq_restore(flags); } -- cgit v1.2.3 From f039f0af081746933d5dec3229637a18fab791ed Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Mon, 2 Nov 2015 13:21:47 +1100 Subject: rcu: Fix comment for rcu_dereference_raw_notrace rcu_dereference_raw() calls indirectly rcu_read_lock_held() while rcu_dereference_raw_notrace() does not so fix the comment about the latter. Signed-off-by: Alexey Kardashevskiy Signed-off-by: Paul E. McKenney --- include/linux/rcupdate.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index f2b667df1131..85aabcd8b564 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -741,7 +741,7 @@ static inline void rcu_preempt_sleep_check(void) * The tracing infrastructure traces RCU (we want that), but unfortunately * some of the RCU checks causes tracing to lock up the system. * - * The tracing version of rcu_dereference_raw() must not call + * The no-tracing version of rcu_dereference_raw() must not call * rcu_read_lock_held(). */ #define rcu_dereference_raw_notrace(p) __rcu_dereference_check((p), 1, __rcu) -- cgit v1.2.3 From 69b907297f4edf13182e3fa3adc0160df077746c Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Sat, 5 Dec 2015 18:14:19 -0800 Subject: list: Add lockless list traversal primitives Although list_for_each_entry_rcu() can in theory be used anywhere preemption is disabled, it can result in calls to lockdep, which cannot be used in certain constrained execution environments, such as exception handlers that do not map the entire kernel into their address spaces. This commit therefore adds list_entry_lockless() and list_for_each_entry_lockless(), which never invoke lockdep and can therefore safely be used from these constrained environments, but only as long as those environments are non-preemptible (or items are never deleted from the list). Use synchronize_sched(), call_rcu_sched(), or synchronize_sched_expedited() in updates for the needed grace periods. Of course, if items are never deleted from the list, there is no need to wait for grace periods. Signed-off-by: Alexey Kardashevskiy Signed-off-by: Paul E. McKenney --- include/linux/rculist.h | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) (limited to 'include/linux') diff --git a/include/linux/rculist.h b/include/linux/rculist.h index 5ed540986019..1fad79861e14 100644 --- a/include/linux/rculist.h +++ b/include/linux/rculist.h @@ -304,6 +304,42 @@ static inline void list_splice_init_rcu(struct list_head *list, &pos->member != (head); \ pos = list_entry_rcu(pos->member.next, typeof(*pos), member)) +/** + * list_entry_lockless - get the struct for this entry + * @ptr: the &struct list_head pointer. + * @type: the type of the struct this is embedded in. + * @member: the name of the list_head within the struct. + * + * This primitive may safely run concurrently with the _rcu list-mutation + * primitives such as list_add_rcu(), but requires some implicit RCU + * read-side guarding. One example is running within a special + * exception-time environment where preemption is disabled and where + * lockdep cannot be invoked (in which case updaters must use RCU-sched, + * as in synchronize_sched(), call_rcu_sched(), and friends). Another + * example is when items are added to the list, but never deleted. + */ +#define list_entry_lockless(ptr, type, member) \ + container_of((typeof(ptr))lockless_dereference(ptr), type, member) + +/** + * list_for_each_entry_lockless - iterate over rcu list of given type + * @pos: the type * to use as a loop cursor. + * @head: the head for your list. + * @member: the name of the list_struct within the struct. + * + * This primitive may safely run concurrently with the _rcu list-mutation + * primitives such as list_add_rcu(), but requires some implicit RCU + * read-side guarding. One example is running within a special + * exception-time environment where preemption is disabled and where + * lockdep cannot be invoked (in which case updaters must use RCU-sched, + * as in synchronize_sched(), call_rcu_sched(), and friends). Another + * example is when items are added to the list, but never deleted. + */ +#define list_for_each_entry_lockless(pos, head, member) \ + for (pos = list_entry_lockless((head)->next, typeof(*pos), member); \ + &pos->member != (head); \ + pos = list_entry_lockless(pos->member.next, typeof(*pos), member)) + /** * list_for_each_entry_continue_rcu - continue iteration over list of given type * @pos: the type * to use as a loop cursor. -- cgit v1.2.3