From 7d0ae8086b828311250c6afdf800b568ac9bd693 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Tue, 3 Mar 2015 14:57:58 -0800
Subject: rcu: Convert ACCESS_ONCE() to READ_ONCE() and WRITE_ONCE()

This commit moves from the old ACCESS_ONCE() API to the new READ_ONCE()
and WRITE_ONCE() APIs.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
[ paulmck:  Updated to include kernel/torture.c as suggested by Jason Low. ]
---
 include/linux/rculist.h  |  6 +++---
 include/linux/rcupdate.h | 16 ++++++++--------
 2 files changed, 11 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/include/linux/rculist.h b/include/linux/rculist.h
index a18b16f1dc0e..665397247e82 100644
--- a/include/linux/rculist.h
+++ b/include/linux/rculist.h
@@ -29,8 +29,8 @@
  */
 static inline void INIT_LIST_HEAD_RCU(struct list_head *list)
 {
-	ACCESS_ONCE(list->next) = list;
-	ACCESS_ONCE(list->prev) = list;
+	WRITE_ONCE(list->next, list);
+	WRITE_ONCE(list->prev, list);
 }
 
 /*
@@ -288,7 +288,7 @@ static inline void list_splice_init_rcu(struct list_head *list,
 #define list_first_or_null_rcu(ptr, type, member) \
 ({ \
 	struct list_head *__ptr = (ptr); \
-	struct list_head *__next = ACCESS_ONCE(__ptr->next); \
+	struct list_head *__next = READ_ONCE(__ptr->next); \
 	likely(__ptr != __next) ? list_entry_rcu(__next, type, member) : NULL; \
 })
 
diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 573a5afd5ed8..87bb0eee665b 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -364,8 +364,8 @@ extern struct srcu_struct tasks_rcu_exit_srcu;
 #define rcu_note_voluntary_context_switch(t) \
 	do { \
 		rcu_all_qs(); \
-		if (ACCESS_ONCE((t)->rcu_tasks_holdout)) \
-			ACCESS_ONCE((t)->rcu_tasks_holdout) = false; \
+		if (READ_ONCE((t)->rcu_tasks_holdout)) \
+			WRITE_ONCE((t)->rcu_tasks_holdout, false); \
 	} while (0)
 #else /* #ifdef CONFIG_TASKS_RCU */
 #define TASKS_RCU(x) do { } while (0)
@@ -609,7 +609,7 @@ static inline void rcu_preempt_sleep_check(void)
 
 #define __rcu_access_pointer(p, space) \
 ({ \
-	typeof(*p) *_________p1 = (typeof(*p) *__force)ACCESS_ONCE(p); \
+	typeof(*p) *_________p1 = (typeof(*p) *__force)READ_ONCE(p); \
 	rcu_dereference_sparse(p, space); \
 	((typeof(*p) __force __kernel *)(_________p1)); \
 })
@@ -630,7 +630,7 @@ static inline void rcu_preempt_sleep_check(void)
 
 #define __rcu_access_index(p, space) \
 ({ \
-	typeof(p) _________p1 = ACCESS_ONCE(p); \
+	typeof(p) _________p1 = READ_ONCE(p); \
 	rcu_dereference_sparse(p, space); \
 	(_________p1); \
 })
@@ -659,7 +659,7 @@ static inline void rcu_preempt_sleep_check(void)
  */
 #define lockless_dereference(p) \
 ({ \
-	typeof(p) _________p1 = ACCESS_ONCE(p); \
+	typeof(p) _________p1 = READ_ONCE(p); \
 	smp_read_barrier_depends(); /* Dependency order vs. p above. */ \
 	(_________p1); \
 })
@@ -702,7 +702,7 @@ static inline void rcu_preempt_sleep_check(void)
  * @p: The pointer to read
  *
  * Return the value of the specified RCU-protected pointer, but omit the
- * smp_read_barrier_depends() and keep the ACCESS_ONCE().  This is useful
+ * smp_read_barrier_depends() and keep the READ_ONCE().  This is useful
  * when the value of this pointer is accessed, but the pointer is not
  * dereferenced, for example, when testing an RCU-protected pointer against
  * NULL.  Although rcu_access_pointer() may also be used in cases where
@@ -791,7 +791,7 @@ static inline void rcu_preempt_sleep_check(void)
  * @p: The index to read
  *
  * Return the value of the specified RCU-protected index, but omit the
- * smp_read_barrier_depends() and keep the ACCESS_ONCE().  This is useful
+ * smp_read_barrier_depends() and keep the READ_ONCE().  This is useful
  * when the value of this index is accessed, but the index is not
  * dereferenced, for example, when testing an RCU-protected index against
  * -1.  Although rcu_access_index() may also be used in cases where
@@ -827,7 +827,7 @@ static inline void rcu_preempt_sleep_check(void)
  * @c: The conditions under which the dereference will take place
  *
  * Return the value of the specified RCU-protected pointer, but omit
- * both the smp_read_barrier_depends() and the ACCESS_ONCE().  This
+ * both the smp_read_barrier_depends() and the READ_ONCE().  This
  * is useful in cases where update-side locks prevent the value of the
  * pointer from changing.  Please note that this primitive does -not-
  * prevent the compiler from repeating this reference or combining it
-- 
cgit v1.2.3


From 1ebee8017d84ec8a0ba893cf7b8be3f70ead088b Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Sun, 19 Apr 2015 18:21:47 -0700
Subject: rcu: Eliminate array-index-based RCU primitives

Now that rcu_access_index() and rcu_dereference_index_check() are no
longer used, the commit removes them from the RCU API.  This means that
RCU's data dependencies now involve only pointers, give or take the
occasional cast to and then back from an integer type to do pointer
arithmetic.  This in turn eliminates the need for a number of operations
on values carrying RCU data dependencies.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: linux-edac@vger.kernel.org
Cc: Tony Luck <tony.luck@intel.com>
Acked-by: Borislav Petkov <bp@suse.de>
---
 include/linux/rcupdate.h | 50 ------------------------------------------------
 1 file changed, 50 deletions(-)

(limited to 'include')

diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 87bb0eee665b..b97842ff71d2 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -628,21 +628,6 @@ static inline void rcu_preempt_sleep_check(void)
 	((typeof(*p) __force __kernel *)(p)); \
 })
 
-#define __rcu_access_index(p, space) \
-({ \
-	typeof(p) _________p1 = READ_ONCE(p); \
-	rcu_dereference_sparse(p, space); \
-	(_________p1); \
-})
-#define __rcu_dereference_index_check(p, c) \
-({ \
-	/* Dependency order vs. p above. */ \
-	typeof(p) _________p1 = lockless_dereference(p); \
-	rcu_lockdep_assert(c, \
-			   "suspicious rcu_dereference_index_check() usage"); \
-	(_________p1); \
-})
-
 /**
  * RCU_INITIALIZER() - statically initialize an RCU-protected global variable
  * @v: The value to statically initialize with.
@@ -786,41 +771,6 @@ static inline void rcu_preempt_sleep_check(void)
  */
 #define rcu_dereference_raw_notrace(p) __rcu_dereference_check((p), 1, __rcu)
 
-/**
- * rcu_access_index() - fetch RCU index with no dereferencing
- * @p: The index to read
- *
- * Return the value of the specified RCU-protected index, but omit the
- * smp_read_barrier_depends() and keep the READ_ONCE().  This is useful
- * when the value of this index is accessed, but the index is not
- * dereferenced, for example, when testing an RCU-protected index against
- * -1.  Although rcu_access_index() may also be used in cases where
- * update-side locks prevent the value of the index from changing, you
- * should instead use rcu_dereference_index_protected() for this use case.
- */
-#define rcu_access_index(p) __rcu_access_index((p), __rcu)
-
-/**
- * rcu_dereference_index_check() - rcu_dereference for indices with debug checking
- * @p: The pointer to read, prior to dereferencing
- * @c: The conditions under which the dereference will take place
- *
- * Similar to rcu_dereference_check(), but omits the sparse checking.
- * This allows rcu_dereference_index_check() to be used on integers,
- * which can then be used as array indices.  Attempting to use
- * rcu_dereference_check() on an integer will give compiler warnings
- * because the sparse address-space mechanism relies on dereferencing
- * the RCU-protected pointer.  Dereferencing integers is not something
- * that even gcc will put up with.
- *
- * Note that this function does not implicitly check for RCU read-side
- * critical sections.  If this function gains lots of uses, it might
- * make sense to provide versions for each flavor of RCU, but it does
- * not make sense as of early 2010.
- */
-#define rcu_dereference_index_check(p, c) \
-	__rcu_dereference_index_check((p), (c))
-
 /**
  * rcu_dereference_protected() - fetch RCU pointer when updates prevented
  * @p: The pointer to read, prior to dereferencing
-- 
cgit v1.2.3


From d956028e99b30726b0bce0ca684b40b1ad67b514 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Tue, 31 Mar 2015 09:39:41 +0100
Subject: documentation: memory-barriers: Fix smp_mb__before_spinlock()
 semantics

Our current documentation claims that, when followed by an ACQUIRE,
smp_mb__before_spinlock() orders prior loads against subsequent loads
and stores, which isn't the intent.  This commit therefore fixes the
documentation to state that this sequence orders only prior stores
against subsequent loads and stores.

In addition, the original intent of smp_mb__before_spinlock() was to only
order prior loads against subsequent stores, however, people have started
using it as if it ordered prior loads against subsequent loads and stores.
This commit therefore also updates smp_mb__before_spinlock()'s header
comment to reflect this new reality.

Cc: Oleg Nesterov <oleg@redhat.com>
Cc: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 Documentation/memory-barriers.txt | 7 +++----
 include/linux/spinlock.h          | 2 +-
 2 files changed, 4 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/Documentation/memory-barriers.txt b/Documentation/memory-barriers.txt
index f95746189b5d..1f362fd2ecb4 100644
--- a/Documentation/memory-barriers.txt
+++ b/Documentation/memory-barriers.txt
@@ -1784,10 +1784,9 @@ for each construct.  These operations all imply certain barriers:
 
      Memory operations issued before the ACQUIRE may be completed after
      the ACQUIRE operation has completed.  An smp_mb__before_spinlock(),
-     combined with a following ACQUIRE, orders prior loads against
-     subsequent loads and stores and also orders prior stores against
-     subsequent stores.  Note that this is weaker than smp_mb()!  The
-     smp_mb__before_spinlock() primitive is free on many architectures.
+     combined with a following ACQUIRE, orders prior stores against
+     subsequent loads and stores. Note that this is weaker than smp_mb()!
+     The smp_mb__before_spinlock() primitive is free on many architectures.
 
  (2) RELEASE operation implication:
 
diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h
index 3e18379dfa6f..0063b24b4f36 100644
--- a/include/linux/spinlock.h
+++ b/include/linux/spinlock.h
@@ -120,7 +120,7 @@ do {								\
 /*
  * Despite its name it doesn't necessarily has to be a full barrier.
  * It should only guarantee that a STORE before the critical section
- * can not be reordered with a LOAD inside this section.
+ * can not be reordered with LOADs and STOREs inside this section.
  * spin_lock() is the one-way barrier, this LOAD can not escape out
  * of the region. So the default implementation simply ensures that
  * a STORE can not move into the critical section, smp_wmb() should
-- 
cgit v1.2.3


From 3382adbc1bb8c80ea512243acf6059564287620b Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Wed, 4 Mar 2015 15:41:24 -0800
Subject: rcu: Eliminate a few CONFIG_RCU_NOCB_CPU_ALL #ifdefs

This commit converts several CONFIG_RCU_NOCB_CPU_ALL #ifdefs to
instead use IS_ENABLED().  This change should help avoid hiding
code from compiler diagnostics.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 include/linux/rcupdate.h |  4 ++--
 include/linux/rcutree.h  |  2 --
 kernel/rcu/tree_plugin.h | 22 ++++++++++++----------
 3 files changed, 14 insertions(+), 14 deletions(-)

(limited to 'include')

diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 87bb0eee665b..5ec20bc4af76 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -1153,13 +1153,13 @@ static inline notrace void rcu_read_unlock_sched_notrace(void)
 #define kfree_rcu(ptr, rcu_head)					\
 	__kfree_rcu(&((ptr)->rcu_head), offsetof(typeof(*(ptr)), rcu_head))
 
-#if defined(CONFIG_TINY_RCU) || defined(CONFIG_RCU_NOCB_CPU_ALL)
+#ifdef CONFIG_TINY_RCU
 static inline int rcu_needs_cpu(unsigned long *delta_jiffies)
 {
 	*delta_jiffies = ULONG_MAX;
 	return 0;
 }
-#endif /* #if defined(CONFIG_TINY_RCU) || defined(CONFIG_RCU_NOCB_CPU_ALL) */
+#endif /* #ifdef CONFIG_TINY_RCU */
 
 #if defined(CONFIG_RCU_NOCB_CPU_ALL)
 static inline bool rcu_is_nocb_cpu(int cpu) { return true; }
diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h
index d2e583a6aaca..0bd400b02430 100644
--- a/include/linux/rcutree.h
+++ b/include/linux/rcutree.h
@@ -31,9 +31,7 @@
 #define __LINUX_RCUTREE_H
 
 void rcu_note_context_switch(void);
-#ifndef CONFIG_RCU_NOCB_CPU_ALL
 int rcu_needs_cpu(unsigned long *delta_jiffies);
-#endif /* #ifndef CONFIG_RCU_NOCB_CPU_ALL */
 void rcu_cpu_stall_reset(void);
 
 /*
diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
index 5c0122f09ed0..0730bfcf65db 100644
--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h
@@ -1372,13 +1372,12 @@ static void rcu_prepare_kthreads(int cpu)
  * Because we not have RCU_FAST_NO_HZ, just check whether this CPU needs
  * any flavor of RCU.
  */
-#ifndef CONFIG_RCU_NOCB_CPU_ALL
 int rcu_needs_cpu(unsigned long *delta_jiffies)
 {
 	*delta_jiffies = ULONG_MAX;
-	return rcu_cpu_has_callbacks(NULL);
+	return IS_ENABLED(CONFIG_RCU_NOCB_CPU_ALL)
+	       ? 0 : rcu_cpu_has_callbacks(NULL);
 }
-#endif /* #ifndef CONFIG_RCU_NOCB_CPU_ALL */
 
 /*
  * Because we do not have RCU_FAST_NO_HZ, don't bother cleaning up
@@ -1485,11 +1484,15 @@ static bool __maybe_unused rcu_try_advance_all_cbs(void)
  *
  * The caller must have disabled interrupts.
  */
-#ifndef CONFIG_RCU_NOCB_CPU_ALL
 int rcu_needs_cpu(unsigned long *dj)
 {
 	struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks);
 
+	if (IS_ENABLED(CONFIG_RCU_NOCB_CPU_ALL)) {
+		*dj = ULONG_MAX;
+		return 0;
+	}
+
 	/* Snapshot to detect later posting of non-lazy callback. */
 	rdtp->nonlazy_posted_snap = rdtp->nonlazy_posted;
 
@@ -1516,7 +1519,6 @@ int rcu_needs_cpu(unsigned long *dj)
 	}
 	return 0;
 }
-#endif /* #ifndef CONFIG_RCU_NOCB_CPU_ALL */
 
 /*
  * Prepare a CPU for idle from an RCU perspective.  The first major task
@@ -1530,7 +1532,6 @@ int rcu_needs_cpu(unsigned long *dj)
  */
 static void rcu_prepare_for_idle(void)
 {
-#ifndef CONFIG_RCU_NOCB_CPU_ALL
 	bool needwake;
 	struct rcu_data *rdp;
 	struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks);
@@ -1538,6 +1539,9 @@ static void rcu_prepare_for_idle(void)
 	struct rcu_state *rsp;
 	int tne;
 
+	if (IS_ENABLED(CONFIG_RCU_NOCB_CPU_ALL))
+		return;
+
 	/* Handle nohz enablement switches conservatively. */
 	tne = READ_ONCE(tick_nohz_active);
 	if (tne != rdtp->tick_nohz_enabled_snap) {
@@ -1585,7 +1589,6 @@ static void rcu_prepare_for_idle(void)
 		if (needwake)
 			rcu_gp_kthread_wake(rsp);
 	}
-#endif /* #ifndef CONFIG_RCU_NOCB_CPU_ALL */
 }
 
 /*
@@ -1595,12 +1598,11 @@ static void rcu_prepare_for_idle(void)
  */
 static void rcu_cleanup_after_idle(void)
 {
-#ifndef CONFIG_RCU_NOCB_CPU_ALL
-	if (rcu_is_nocb_cpu(smp_processor_id()))
+	if (IS_ENABLED(CONFIG_RCU_NOCB_CPU_ALL) ||
+	    rcu_is_nocb_cpu(smp_processor_id()))
 		return;
 	if (rcu_try_advance_all_cbs())
 		invoke_rcu_core();
-#endif /* #ifndef CONFIG_RCU_NOCB_CPU_ALL */
 }
 
 /*
-- 
cgit v1.2.3


From 5af4692a75daf08dddc93dbb4cd2a1b3d3b617af Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Sat, 25 Apr 2015 12:48:29 -0700
Subject: smp: Make control dependencies work on Alpha, improve documentation

The current formulation of control dependencies fails on DEC Alpha,
which does not respect dependencies of any kind unless an explicit
memory barrier is provided.  This means that the current fomulation of
control dependencies fails on Alpha.  This commit therefore creates a
READ_ONCE_CTRL() that has the same overhead on non-Alpha systems, but
causes Alpha to produce the needed ordering.  This commit also applies
READ_ONCE_CTRL() to the one known use of control dependencies.

Use of READ_ONCE_CTRL() also has the beneficial effect of adding a bit
of self-documentation to control dependencies.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
 Documentation/memory-barriers.txt | 55 +++++++++++++++++++++++----------------
 include/linux/compiler.h          | 16 ++++++++++++
 kernel/events/ring_buffer.c       |  2 +-
 3 files changed, 50 insertions(+), 23 deletions(-)

(limited to 'include')

diff --git a/Documentation/memory-barriers.txt b/Documentation/memory-barriers.txt
index f95746189b5d..a3014bcc5b08 100644
--- a/Documentation/memory-barriers.txt
+++ b/Documentation/memory-barriers.txt
@@ -617,16 +617,16 @@ case what's actually required is:
 However, stores are not speculated.  This means that ordering -is- provided
 for load-store control dependencies, as in the following example:
 
-	q = ACCESS_ONCE(a);
+	q = READ_ONCE_CTRL(a);
 	if (q) {
 		ACCESS_ONCE(b) = p;
 	}
 
-Control dependencies pair normally with other types of barriers.
-That said, please note that ACCESS_ONCE() is not optional!  Without the
-ACCESS_ONCE(), might combine the load from 'a' with other loads from
-'a', and the store to 'b' with other stores to 'b', with possible highly
-counterintuitive effects on ordering.
+Control dependencies pair normally with other types of barriers.  That
+said, please note that READ_ONCE_CTRL() is not optional!  Without the
+READ_ONCE_CTRL(), the compiler might combine the load from 'a' with
+other loads from 'a', and the store to 'b' with other stores to 'b',
+with possible highly counterintuitive effects on ordering.
 
 Worse yet, if the compiler is able to prove (say) that the value of
 variable 'a' is always non-zero, it would be well within its rights
@@ -636,12 +636,15 @@ as follows:
 	q = a;
 	b = p;  /* BUG: Compiler and CPU can both reorder!!! */
 
-So don't leave out the ACCESS_ONCE().
+Finally, the READ_ONCE_CTRL() includes an smp_read_barrier_depends()
+that DEC Alpha needs in order to respect control depedencies.
+
+So don't leave out the READ_ONCE_CTRL().
 
 It is tempting to try to enforce ordering on identical stores on both
 branches of the "if" statement as follows:
 
-	q = ACCESS_ONCE(a);
+	q = READ_ONCE_CTRL(a);
 	if (q) {
 		barrier();
 		ACCESS_ONCE(b) = p;
@@ -655,7 +658,7 @@ branches of the "if" statement as follows:
 Unfortunately, current compilers will transform this as follows at high
 optimization levels:
 
-	q = ACCESS_ONCE(a);
+	q = READ_ONCE_CTRL(a);
 	barrier();
 	ACCESS_ONCE(b) = p;  /* BUG: No ordering vs. load from a!!! */
 	if (q) {
@@ -685,7 +688,7 @@ memory barriers, for example, smp_store_release():
 In contrast, without explicit memory barriers, two-legged-if control
 ordering is guaranteed only when the stores differ, for example:
 
-	q = ACCESS_ONCE(a);
+	q = READ_ONCE_CTRL(a);
 	if (q) {
 		ACCESS_ONCE(b) = p;
 		do_something();
@@ -694,14 +697,14 @@ ordering is guaranteed only when the stores differ, for example:
 		do_something_else();
 	}
 
-The initial ACCESS_ONCE() is still required to prevent the compiler from
-proving the value of 'a'.
+The initial READ_ONCE_CTRL() is still required to prevent the compiler
+from proving the value of 'a'.
 
 In addition, you need to be careful what you do with the local variable 'q',
 otherwise the compiler might be able to guess the value and again remove
 the needed conditional.  For example:
 
-	q = ACCESS_ONCE(a);
+	q = READ_ONCE_CTRL(a);
 	if (q % MAX) {
 		ACCESS_ONCE(b) = p;
 		do_something();
@@ -714,7 +717,7 @@ If MAX is defined to be 1, then the compiler knows that (q % MAX) is
 equal to zero, in which case the compiler is within its rights to
 transform the above code into the following:
 
-	q = ACCESS_ONCE(a);
+	q = READ_ONCE_CTRL(a);
 	ACCESS_ONCE(b) = p;
 	do_something_else();
 
@@ -725,7 +728,7 @@ is gone, and the barrier won't bring it back.  Therefore, if you are
 relying on this ordering, you should make sure that MAX is greater than
 one, perhaps as follows:
 
-	q = ACCESS_ONCE(a);
+	q = READ_ONCE_CTRL(a);
 	BUILD_BUG_ON(MAX <= 1); /* Order load from a with store to b. */
 	if (q % MAX) {
 		ACCESS_ONCE(b) = p;
@@ -742,14 +745,15 @@ of the 'if' statement.
 You must also be careful not to rely too much on boolean short-circuit
 evaluation.  Consider this example:
 
-	q = ACCESS_ONCE(a);
+	q = READ_ONCE_CTRL(a);
 	if (a || 1 > 0)
 		ACCESS_ONCE(b) = 1;
 
-Because the second condition is always true, the compiler can transform
-this example as following, defeating control dependency:
+Because the first condition cannot fault and the second condition is
+always true, the compiler can transform this example as following,
+defeating control dependency:
 
-	q = ACCESS_ONCE(a);
+	q = READ_ONCE_CTRL(a);
 	ACCESS_ONCE(b) = 1;
 
 This example underscores the need to ensure that the compiler cannot
@@ -762,8 +766,8 @@ demonstrated by two related examples, with the initial values of
 x and y both being zero:
 
 	CPU 0                     CPU 1
-	=====================     =====================
-	r1 = ACCESS_ONCE(x);      r2 = ACCESS_ONCE(y);
+	=======================   =======================
+	r1 = READ_ONCE_CTRL(x);   r2 = READ_ONCE_CTRL(y);
 	if (r1 > 0)               if (r2 > 0)
 	  ACCESS_ONCE(y) = 1;       ACCESS_ONCE(x) = 1;
 
@@ -783,7 +787,8 @@ But because control dependencies do -not- provide transitivity, the above
 assertion can fail after the combined three-CPU example completes.  If you
 need the three-CPU example to provide ordering, you will need smp_mb()
 between the loads and stores in the CPU 0 and CPU 1 code fragments,
-that is, just before or just after the "if" statements.
+that is, just before or just after the "if" statements.  Furthermore,
+the original two-CPU example is very fragile and should be avoided.
 
 These two examples are the LB and WWC litmus tests from this paper:
 http://www.cl.cam.ac.uk/users/pes20/ppc-supplemental/test6.pdf and this
@@ -791,6 +796,12 @@ site: https://www.cl.cam.ac.uk/~pes20/ppcmem/index.html.
 
 In summary:
 
+  (*) Control dependencies must be headed by READ_ONCE_CTRL().
+      Or, as a much less preferable alternative, interpose
+      be headed by READ_ONCE() or an ACCESS_ONCE() read and must
+      have smp_read_barrier_depends() between this read and the
+      control-dependent write.
+
   (*) Control dependencies can order prior loads against later stores.
       However, they do -not- guarantee any other sort of ordering:
       Not prior loads against later loads, nor prior stores against
diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index 867722591be2..5d66777914db 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -252,6 +252,22 @@ static __always_inline void __write_once_size(volatile void *p, void *res, int s
 #define WRITE_ONCE(x, val) \
 	({ typeof(x) __val = (val); __write_once_size(&(x), &__val, sizeof(__val)); __val; })
 
+/**
+ * READ_ONCE_CTRL - Read a value heading a control dependency
+ * @x: The value to be read, heading the control dependency
+ *
+ * Control dependencies are tricky.  See Documentation/memory-barriers.txt
+ * for important information on how to use them.  Note that in many cases,
+ * use of smp_load_acquire() will be much simpler.  Control dependencies
+ * should be avoided except on the hottest of hotpaths.
+ */
+#define READ_ONCE_CTRL(x) \
+({ \
+	typeof(x) __val = READ_ONCE(x); \
+	smp_read_barrier_depends(); /* Enforce control dependency. */ \
+	__val; \
+})
+
 #endif /* __KERNEL__ */
 
 #endif /* __ASSEMBLY__ */
diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c
index 232f00f273cb..17fcb73c4a50 100644
--- a/kernel/events/ring_buffer.c
+++ b/kernel/events/ring_buffer.c
@@ -141,7 +141,7 @@ int perf_output_begin(struct perf_output_handle *handle,
 	perf_output_get_handle(handle);
 
 	do {
-		tail = ACCESS_ONCE(rb->user_page->data_tail);
+		tail = READ_ONCE_CTRL(rb->user_page->data_tail);
 		offset = head = local_read(&rb->head);
 		if (!rb->overwrite &&
 		    unlikely(CIRC_SPACE(head, tail, perf_data_size(rb)) < size))
-- 
cgit v1.2.3


From f517700cce37ffcb36e7afae0294fd11c72ed134 Mon Sep 17 00:00:00 2001
From: Ying Xue <ying.xue@windriver.com>
Date: Thu, 26 Mar 2015 13:27:08 +0800
Subject: rculist: Fix another sparse warning

This fixes the following sparse warnings:

make C=1 CF=-D__CHECK_ENDIAN__ net/tipc/name_table.o
net/tipc/name_table.c:977:17: error: incompatible types in comparison expression (different address spaces)
net/tipc/name_table.c:977:17: error: incompatible types in comparison expression (different address spaces)

To silence these spare complaints, an RCU annotation should be added to
"next" pointer of hlist_node structure through hlist_next_rcu() macro
when iterating over a hlist with hlist_for_each_entry_from_rcu().

Signed-off-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 include/linux/rculist.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/rculist.h b/include/linux/rculist.h
index 665397247e82..17c6b1f84a77 100644
--- a/include/linux/rculist.h
+++ b/include/linux/rculist.h
@@ -549,8 +549,8 @@ static inline void hlist_add_behind_rcu(struct hlist_node *n,
  */
 #define hlist_for_each_entry_from_rcu(pos, member)			\
 	for (; pos;							\
-	     pos = hlist_entry_safe(rcu_dereference((pos)->member.next),\
-			typeof(*(pos)), member))
+	     pos = hlist_entry_safe(rcu_dereference_raw(hlist_next_rcu(	\
+			&(pos)->member)), typeof(*(pos)), member))
 
 #endif	/* __KERNEL__ */
 #endif
-- 
cgit v1.2.3


From 51952bc633064311410b041fad38da1614f4539e Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Tue, 21 Apr 2015 11:15:30 -0700
Subject: rcu: Further shrink Tiny RCU by making empty functions static inlines

The Tiny RCU counterparts to rcu_idle_enter(), rcu_idle_exit(),
rcu_irq_enter(), and rcu_irq_exit() are empty functions, but each has
EXPORT_SYMBOL_GPL(), which needlessly consumes extra memory, especially
in kernels built with module support.  This commit therefore moves these
functions to static inlines in rcutiny.h, removing the need for exports.

This won't affect the size of the tiniest kernels, which are likely
built without module support, but might help semi-tiny kernels that
might include module support.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Reviewed-by: Josh Triplett <josh@joshtriplett.org>
---
 include/linux/rcupdate.h |  4 ----
 include/linux/rcutiny.h  | 16 ++++++++++++++++
 include/linux/rcutree.h  |  5 +++++
 kernel/rcu/tiny.c        | 33 ---------------------------------
 4 files changed, 21 insertions(+), 37 deletions(-)

(limited to 'include')

diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 87bb0eee665b..1b3d7bcb3a6c 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -292,10 +292,6 @@ void rcu_sched_qs(void);
 void rcu_bh_qs(void);
 void rcu_check_callbacks(int user);
 struct notifier_block;
-void rcu_idle_enter(void);
-void rcu_idle_exit(void);
-void rcu_irq_enter(void);
-void rcu_irq_exit(void);
 int rcu_cpu_notify(struct notifier_block *self,
 		   unsigned long action, void *hcpu);
 
diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h
index 937edaeb150d..3df6c1ec4e25 100644
--- a/include/linux/rcutiny.h
+++ b/include/linux/rcutiny.h
@@ -159,6 +159,22 @@ static inline void rcu_cpu_stall_reset(void)
 {
 }
 
+static inline void rcu_idle_enter(void)
+{
+}
+
+static inline void rcu_idle_exit(void)
+{
+}
+
+static inline void rcu_irq_enter(void)
+{
+}
+
+static inline void rcu_irq_exit(void)
+{
+}
+
 static inline void exit_rcu(void)
 {
 }
diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h
index d2e583a6aaca..f22d83f49e56 100644
--- a/include/linux/rcutree.h
+++ b/include/linux/rcutree.h
@@ -93,6 +93,11 @@ void rcu_force_quiescent_state(void);
 void rcu_bh_force_quiescent_state(void);
 void rcu_sched_force_quiescent_state(void);
 
+void rcu_idle_enter(void);
+void rcu_idle_exit(void);
+void rcu_irq_enter(void);
+void rcu_irq_exit(void);
+
 void exit_rcu(void);
 
 void rcu_scheduler_starting(void);
diff --git a/kernel/rcu/tiny.c b/kernel/rcu/tiny.c
index 069742d61c68..a501b4ab9b1c 100644
--- a/kernel/rcu/tiny.c
+++ b/kernel/rcu/tiny.c
@@ -49,39 +49,6 @@ static void __call_rcu(struct rcu_head *head,
 
 #include "tiny_plugin.h"
 
-/*
- * Enter idle, which is an extended quiescent state if we have fully
- * entered that mode.
- */
-void rcu_idle_enter(void)
-{
-}
-EXPORT_SYMBOL_GPL(rcu_idle_enter);
-
-/*
- * Exit an interrupt handler towards idle.
- */
-void rcu_irq_exit(void)
-{
-}
-EXPORT_SYMBOL_GPL(rcu_irq_exit);
-
-/*
- * Exit idle, so that we are no longer in an extended quiescent state.
- */
-void rcu_idle_exit(void)
-{
-}
-EXPORT_SYMBOL_GPL(rcu_idle_exit);
-
-/*
- * Enter an interrupt handler, moving away from idle.
- */
-void rcu_irq_enter(void)
-{
-}
-EXPORT_SYMBOL_GPL(rcu_irq_enter);
-
 #if defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_RCU_TRACE)
 
 /*
-- 
cgit v1.2.3