From f7d71f2052555ae57b47322f2c2f6c29ff2438ae Mon Sep 17 00:00:00 2001
From: Waiman Long <Waiman.Long@hp.com>
Date: Fri, 19 Jun 2015 11:50:00 -0400
Subject: locking/qrwlock: Rename functions to queued_*()

To sync up with the naming convention used in qspinlock, all the
qrwlock functions were renamed to started with "queued" instead of
"queue".

Signed-off-by: Waiman Long <Waiman.Long@hp.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Douglas Hatch <doug.hatch@hp.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Scott J Norton <scott.norton@hp.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Will Deacon <will.deacon@arm.com>
Link: http://lkml.kernel.org/r/1434729002-57724-2-git-send-email-Waiman.Long@hp.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/asm-generic/qrwlock.h | 58 +++++++++++++++++++++----------------------
 1 file changed, 29 insertions(+), 29 deletions(-)

(limited to 'include')

diff --git a/include/asm-generic/qrwlock.h b/include/asm-generic/qrwlock.h
index 6383d54bf983..55e3ee1d2415 100644
--- a/include/asm-generic/qrwlock.h
+++ b/include/asm-generic/qrwlock.h
@@ -36,33 +36,33 @@
 /*
  * External function declarations
  */
-extern void queue_read_lock_slowpath(struct qrwlock *lock);
-extern void queue_write_lock_slowpath(struct qrwlock *lock);
+extern void queued_read_lock_slowpath(struct qrwlock *lock);
+extern void queued_write_lock_slowpath(struct qrwlock *lock);
 
 /**
- * queue_read_can_lock- would read_trylock() succeed?
+ * queued_read_can_lock- would read_trylock() succeed?
  * @lock: Pointer to queue rwlock structure
  */
-static inline int queue_read_can_lock(struct qrwlock *lock)
+static inline int queued_read_can_lock(struct qrwlock *lock)
 {
 	return !(atomic_read(&lock->cnts) & _QW_WMASK);
 }
 
 /**
- * queue_write_can_lock- would write_trylock() succeed?
+ * queued_write_can_lock- would write_trylock() succeed?
  * @lock: Pointer to queue rwlock structure
  */
-static inline int queue_write_can_lock(struct qrwlock *lock)
+static inline int queued_write_can_lock(struct qrwlock *lock)
 {
 	return !atomic_read(&lock->cnts);
 }
 
 /**
- * queue_read_trylock - try to acquire read lock of a queue rwlock
+ * queued_read_trylock - try to acquire read lock of a queue rwlock
  * @lock : Pointer to queue rwlock structure
  * Return: 1 if lock acquired, 0 if failed
  */
-static inline int queue_read_trylock(struct qrwlock *lock)
+static inline int queued_read_trylock(struct qrwlock *lock)
 {
 	u32 cnts;
 
@@ -77,11 +77,11 @@ static inline int queue_read_trylock(struct qrwlock *lock)
 }
 
 /**
- * queue_write_trylock - try to acquire write lock of a queue rwlock
+ * queued_write_trylock - try to acquire write lock of a queue rwlock
  * @lock : Pointer to queue rwlock structure
  * Return: 1 if lock acquired, 0 if failed
  */
-static inline int queue_write_trylock(struct qrwlock *lock)
+static inline int queued_write_trylock(struct qrwlock *lock)
 {
 	u32 cnts;
 
@@ -93,10 +93,10 @@ static inline int queue_write_trylock(struct qrwlock *lock)
 				     cnts, cnts | _QW_LOCKED) == cnts);
 }
 /**
- * queue_read_lock - acquire read lock of a queue rwlock
+ * queued_read_lock - acquire read lock of a queue rwlock
  * @lock: Pointer to queue rwlock structure
  */
-static inline void queue_read_lock(struct qrwlock *lock)
+static inline void queued_read_lock(struct qrwlock *lock)
 {
 	u32 cnts;
 
@@ -105,27 +105,27 @@ static inline void queue_read_lock(struct qrwlock *lock)
 		return;
 
 	/* The slowpath will decrement the reader count, if necessary. */
-	queue_read_lock_slowpath(lock);
+	queued_read_lock_slowpath(lock);
 }
 
 /**
- * queue_write_lock - acquire write lock of a queue rwlock
+ * queued_write_lock - acquire write lock of a queue rwlock
  * @lock : Pointer to queue rwlock structure
  */
-static inline void queue_write_lock(struct qrwlock *lock)
+static inline void queued_write_lock(struct qrwlock *lock)
 {
 	/* Optimize for the unfair lock case where the fair flag is 0. */
 	if (atomic_cmpxchg(&lock->cnts, 0, _QW_LOCKED) == 0)
 		return;
 
-	queue_write_lock_slowpath(lock);
+	queued_write_lock_slowpath(lock);
 }
 
 /**
- * queue_read_unlock - release read lock of a queue rwlock
+ * queued_read_unlock - release read lock of a queue rwlock
  * @lock : Pointer to queue rwlock structure
  */
-static inline void queue_read_unlock(struct qrwlock *lock)
+static inline void queued_read_unlock(struct qrwlock *lock)
 {
 	/*
 	 * Atomically decrement the reader count
@@ -134,12 +134,12 @@ static inline void queue_read_unlock(struct qrwlock *lock)
 	atomic_sub(_QR_BIAS, &lock->cnts);
 }
 
-#ifndef queue_write_unlock
+#ifndef queued_write_unlock
 /**
- * queue_write_unlock - release write lock of a queue rwlock
+ * queued_write_unlock - release write lock of a queue rwlock
  * @lock : Pointer to queue rwlock structure
  */
-static inline void queue_write_unlock(struct qrwlock *lock)
+static inline void queued_write_unlock(struct qrwlock *lock)
 {
 	/*
 	 * If the writer field is atomic, it can be cleared directly.
@@ -154,13 +154,13 @@ static inline void queue_write_unlock(struct qrwlock *lock)
  * Remapping rwlock architecture specific functions to the corresponding
  * queue rwlock functions.
  */
-#define arch_read_can_lock(l)	queue_read_can_lock(l)
-#define arch_write_can_lock(l)	queue_write_can_lock(l)
-#define arch_read_lock(l)	queue_read_lock(l)
-#define arch_write_lock(l)	queue_write_lock(l)
-#define arch_read_trylock(l)	queue_read_trylock(l)
-#define arch_write_trylock(l)	queue_write_trylock(l)
-#define arch_read_unlock(l)	queue_read_unlock(l)
-#define arch_write_unlock(l)	queue_write_unlock(l)
+#define arch_read_can_lock(l)	queued_read_can_lock(l)
+#define arch_write_can_lock(l)	queued_write_can_lock(l)
+#define arch_read_lock(l)	queued_read_lock(l)
+#define arch_write_lock(l)	queued_write_lock(l)
+#define arch_read_trylock(l)	queued_read_trylock(l)
+#define arch_write_trylock(l)	queued_write_trylock(l)
+#define arch_read_unlock(l)	queued_read_unlock(l)
+#define arch_write_unlock(l)	queued_write_unlock(l)
 
 #endif /* __ASM_GENERIC_QRWLOCK_H */
-- 
cgit v1.2.3


From 0e06e5be70d392aa842c1455ec2d0baf62aeed48 Mon Sep 17 00:00:00 2001
From: Waiman Long <Waiman.Long@hp.com>
Date: Fri, 19 Jun 2015 11:50:01 -0400
Subject: locking/qrwlock: Better optimization for interrupt context readers

The qrwlock is fair in the process context, but becoming unfair when
in the interrupt context to support use cases like the tasklist_lock.

The current code isn't that well-documented on what happens when
in the interrupt context. The rspin_until_writer_unlock() will only
spin if the writer has gotten the lock. If the writer is still in the
waiting state, the increment in the reader count will cause the writer
to remain in the waiting state and the new interrupt context reader
will get the lock and return immediately. The current code, however,
does an additional read of the lock value which is not necessary as
the information has already been there in the fast path. This may
sometime cause an additional cacheline transfer when the lock is
highly contended.

This patch passes the lock value information gotten in the fast path
to the slow path to eliminate the additional read. It also documents
the action for the interrupt context readers more clearly.

Signed-off-by: Waiman Long <Waiman.Long@hp.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Will Deacon <will.deacon@arm.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Douglas Hatch <doug.hatch@hp.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Scott J Norton <scott.norton@hp.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/1434729002-57724-3-git-send-email-Waiman.Long@hp.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/asm-generic/qrwlock.h |  4 ++--
 kernel/locking/qrwlock.c      | 13 +++++++------
 2 files changed, 9 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/include/asm-generic/qrwlock.h b/include/asm-generic/qrwlock.h
index 55e3ee1d2415..deb9e8b0eb9e 100644
--- a/include/asm-generic/qrwlock.h
+++ b/include/asm-generic/qrwlock.h
@@ -36,7 +36,7 @@
 /*
  * External function declarations
  */
-extern void queued_read_lock_slowpath(struct qrwlock *lock);
+extern void queued_read_lock_slowpath(struct qrwlock *lock, u32 cnts);
 extern void queued_write_lock_slowpath(struct qrwlock *lock);
 
 /**
@@ -105,7 +105,7 @@ static inline void queued_read_lock(struct qrwlock *lock)
 		return;
 
 	/* The slowpath will decrement the reader count, if necessary. */
-	queued_read_lock_slowpath(lock);
+	queued_read_lock_slowpath(lock, cnts);
 }
 
 /**
diff --git a/kernel/locking/qrwlock.c b/kernel/locking/qrwlock.c
index 49057d413b6e..d9c36c5f5711 100644
--- a/kernel/locking/qrwlock.c
+++ b/kernel/locking/qrwlock.c
@@ -62,20 +62,21 @@ rspin_until_writer_unlock(struct qrwlock *lock, u32 cnts)
 /**
  * queued_read_lock_slowpath - acquire read lock of a queue rwlock
  * @lock: Pointer to queue rwlock structure
+ * @cnts: Current qrwlock lock value
  */
-void queued_read_lock_slowpath(struct qrwlock *lock)
+void queued_read_lock_slowpath(struct qrwlock *lock, u32 cnts)
 {
-	u32 cnts;
-
 	/*
 	 * Readers come here when they cannot get the lock without waiting
 	 */
 	if (unlikely(in_interrupt())) {
 		/*
-		 * Readers in interrupt context will spin until the lock is
-		 * available without waiting in the queue.
+		 * Readers in interrupt context will get the lock immediately
+		 * if the writer is just waiting (not holding the lock yet).
+		 * The rspin_until_writer_unlock() function returns immediately
+		 * in this case. Otherwise, they will spin until the lock
+		 * is available without waiting in the queue.
 		 */
-		cnts = smp_load_acquire((u32 *)&lock->cnts);
 		rspin_until_writer_unlock(lock, cnts);
 		return;
 	}
-- 
cgit v1.2.3


From 56d1defe0bbddaa97d6e74b51490904130fd4f1d Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Wed, 15 Jul 2015 15:47:25 +0200
Subject: atomic: Prepare generic atomic implementation for logic ops

Clean up the #ifdef guards a bit to prepare for architectures to
supply their own logic ops.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/asm-generic/atomic.h | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/asm-generic/atomic.h b/include/asm-generic/atomic.h
index 1973ad2b13f4..92947e0a532a 100644
--- a/include/asm-generic/atomic.h
+++ b/include/asm-generic/atomic.h
@@ -98,14 +98,22 @@ ATOMIC_OP_RETURN(add, +)
 ATOMIC_OP_RETURN(sub, -)
 #endif
 
-#ifndef atomic_clear_mask
+#ifndef atomic_and
 ATOMIC_OP(and, &)
+#endif
+
+#ifndef atomic_clear_mask
 #define atomic_clear_mask(i, v) atomic_and(~(i), (v))
 #endif
 
-#ifndef atomic_set_mask
+#ifndef atomic_or
+#ifndef CONFIG_ARCH_HAS_ATOMIC_OR
 #define CONFIG_ARCH_HAS_ATOMIC_OR
+#endif
 ATOMIC_OP(or, |)
+#endif
+
+#ifndef atomic_set_mask
 #define atomic_set_mask(i, v)	atomic_or((i), (v))
 #endif
 
-- 
cgit v1.2.3


From e6942b7de2dfe44ebde9bae57dadece5abca9de8 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Wed, 23 Apr 2014 19:32:50 +0200
Subject: atomic: Provide atomic_{or,xor,and}

Implement atomic logic ops -- atomic_{or,xor,and}.

These will replace the atomic_{set,clear}_mask functions that are
available on some archs.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/alpha/include/asm/atomic.h        |  1 -
 arch/arc/include/asm/atomic.h          |  1 -
 arch/arm/include/asm/atomic.h          |  1 -
 arch/arm64/include/asm/atomic.h        |  1 -
 arch/avr32/include/asm/atomic.h        |  2 --
 arch/blackfin/include/asm/atomic.h     |  2 --
 arch/frv/include/asm/atomic.h          |  2 --
 arch/h8300/include/asm/atomic.h        |  2 --
 arch/hexagon/include/asm/atomic.h      |  2 --
 arch/ia64/include/asm/atomic.h         |  2 --
 arch/m32r/include/asm/atomic.h         |  2 --
 arch/m68k/include/asm/atomic.h         |  2 --
 arch/metag/include/asm/atomic_lnkget.h |  2 --
 arch/mips/include/asm/atomic.h         |  2 --
 arch/mn10300/include/asm/atomic.h      |  2 --
 arch/parisc/include/asm/atomic.h       |  2 --
 arch/powerpc/include/asm/atomic.h      |  2 --
 arch/s390/include/asm/atomic.h         |  2 --
 arch/sh/include/asm/atomic-grb.h       |  2 --
 arch/sparc/include/asm/atomic_32.h     |  2 --
 arch/sparc/include/asm/atomic_64.h     |  2 --
 arch/tile/include/asm/atomic_32.h      |  2 --
 arch/tile/include/asm/atomic_64.h      |  2 --
 arch/x86/include/asm/atomic.h          |  2 --
 arch/xtensa/include/asm/atomic.h       |  2 --
 include/asm-generic/atomic.h           | 21 ++++++++++++---------
 include/asm-generic/atomic64.h         |  4 ++++
 include/linux/atomic.h                 | 13 -------------
 lib/atomic64.c                         |  3 +++
 29 files changed, 19 insertions(+), 68 deletions(-)

(limited to 'include')

diff --git a/arch/alpha/include/asm/atomic.h b/arch/alpha/include/asm/atomic.h
index 0eff853398d2..e8c956098424 100644
--- a/arch/alpha/include/asm/atomic.h
+++ b/arch/alpha/include/asm/atomic.h
@@ -110,7 +110,6 @@ static __inline__ long atomic64_##op##_return(long i, atomic64_t * v)	\
 ATOMIC_OPS(add)
 ATOMIC_OPS(sub)
 
-#define CONFIG_ARCH_HAS_ATOMIC_OR
 #define atomic_andnot atomic_andnot
 #define atomic64_andnot atomic64_andnot
 
diff --git a/arch/arc/include/asm/atomic.h b/arch/arc/include/asm/atomic.h
index e90b701fc6a8..2a847821dee1 100644
--- a/arch/arc/include/asm/atomic.h
+++ b/arch/arc/include/asm/atomic.h
@@ -144,7 +144,6 @@ static inline int atomic_##op##_return(int i, atomic_t *v)		\
 ATOMIC_OPS(add, +=, add)
 ATOMIC_OPS(sub, -=, sub)
 
-#define CONFIG_ARCH_HAS_ATOMIC_OR
 #define atomic_andnot atomic_andnot
 
 ATOMIC_OP(and, &=, and)
diff --git a/arch/arm/include/asm/atomic.h b/arch/arm/include/asm/atomic.h
index ff214bac9cb4..82b75a7cb762 100644
--- a/arch/arm/include/asm/atomic.h
+++ b/arch/arm/include/asm/atomic.h
@@ -194,7 +194,6 @@ static inline int __atomic_add_unless(atomic_t *v, int a, int u)
 ATOMIC_OPS(add, +=, add)
 ATOMIC_OPS(sub, -=, sub)
 
-#define CONFIG_ARCH_HAS_ATOMIC_OR
 #define atomic_andnot atomic_andnot
 
 ATOMIC_OP(and, &=, and)
diff --git a/arch/arm64/include/asm/atomic.h b/arch/arm64/include/asm/atomic.h
index 2876173397b2..866a71fca9a3 100644
--- a/arch/arm64/include/asm/atomic.h
+++ b/arch/arm64/include/asm/atomic.h
@@ -85,7 +85,6 @@ static inline int atomic_##op##_return(int i, atomic_t *v)		\
 ATOMIC_OPS(add, add)
 ATOMIC_OPS(sub, sub)
 
-#define CONFIG_ARCH_HAS_ATOMIC_OR
 #define atomic_andnot atomic_andnot
 
 ATOMIC_OP(and, and)
diff --git a/arch/avr32/include/asm/atomic.h b/arch/avr32/include/asm/atomic.h
index 115d3005e4bc..97c9bdf83409 100644
--- a/arch/avr32/include/asm/atomic.h
+++ b/arch/avr32/include/asm/atomic.h
@@ -51,8 +51,6 @@ static inline void atomic_##op(int i, atomic_t *v)			\
 	(void)__atomic_##op##_return(i, v);				\
 }
 
-#define CONFIG_ARCH_HAS_ATOMIC_OR
-
 ATOMIC_OP(and, and)
 ATOMIC_OP(or, or)
 ATOMIC_OP(xor, eor)
diff --git a/arch/blackfin/include/asm/atomic.h b/arch/blackfin/include/asm/atomic.h
index eafa55b81a7b..2d6a7a3823c3 100644
--- a/arch/blackfin/include/asm/atomic.h
+++ b/arch/blackfin/include/asm/atomic.h
@@ -28,8 +28,6 @@ asmlinkage int __raw_atomic_test_asm(const volatile int *ptr, int value);
 #define atomic_add_return(i, v) __raw_atomic_add_asm(&(v)->counter, i)
 #define atomic_sub_return(i, v) __raw_atomic_add_asm(&(v)->counter, -(i))
 
-#define CONFIG_ARCH_HAS_ATOMIC_OR
-
 #define atomic_or(i, v)  (void)__raw_atomic_or_asm(&(v)->counter, i)
 #define atomic_and(i, v) (void)__raw_atomic_and_asm(&(v)->counter, i)
 #define atomic_xor(i, v) (void)__raw_atomic_xor_asm(&(v)->counter, i)
diff --git a/arch/frv/include/asm/atomic.h b/arch/frv/include/asm/atomic.h
index 74d22454d7c6..fc48bea26b40 100644
--- a/arch/frv/include/asm/atomic.h
+++ b/arch/frv/include/asm/atomic.h
@@ -192,8 +192,6 @@ static inline void atomic64_##op(long long i, atomic64_t *v)		\
 	(void)__atomic64_fetch_##op(i, &v->counter);			\
 }
 
-#define CONFIG_ARCH_HAS_ATOMIC_OR
-
 ATOMIC_OP(or)
 ATOMIC_OP(and)
 ATOMIC_OP(xor)
diff --git a/arch/h8300/include/asm/atomic.h b/arch/h8300/include/asm/atomic.h
index f181f820be33..c4d061f09c44 100644
--- a/arch/h8300/include/asm/atomic.h
+++ b/arch/h8300/include/asm/atomic.h
@@ -41,8 +41,6 @@ static inline void atomic_##op(int i, atomic_t *v)		\
 ATOMIC_OP_RETURN(add, +=)
 ATOMIC_OP_RETURN(sub, -=)
 
-#define CONFIG_ARCH_HAS_ATOMIC_OR
-
 ATOMIC_OP(and, &=)
 ATOMIC_OP(or,  |=)
 ATOMIC_OP(xor, ^=)
diff --git a/arch/hexagon/include/asm/atomic.h b/arch/hexagon/include/asm/atomic.h
index 4efe2c7c0dd8..811d61f6422d 100644
--- a/arch/hexagon/include/asm/atomic.h
+++ b/arch/hexagon/include/asm/atomic.h
@@ -132,8 +132,6 @@ static inline int atomic_##op##_return(int i, atomic_t *v)		\
 ATOMIC_OPS(add)
 ATOMIC_OPS(sub)
 
-#define CONFIG_ARCH_HAS_ATOMIC_OR
-
 ATOMIC_OP(and)
 ATOMIC_OP(or)
 ATOMIC_OP(xor)
diff --git a/arch/ia64/include/asm/atomic.h b/arch/ia64/include/asm/atomic.h
index 0809ef5d6b9a..be4beeb77d57 100644
--- a/arch/ia64/include/asm/atomic.h
+++ b/arch/ia64/include/asm/atomic.h
@@ -69,8 +69,6 @@ ATOMIC_OP(sub, -)
 		: ia64_atomic_sub(__ia64_asr_i, v);			\
 })
 
-#define CONFIG_ARCH_HAS_ATOMIC_OR
-
 ATOMIC_OP(and, &)
 ATOMIC_OP(or, |)
 ATOMIC_OP(xor, ^)
diff --git a/arch/m32r/include/asm/atomic.h b/arch/m32r/include/asm/atomic.h
index 7245463c1e98..b2a13fbd5be0 100644
--- a/arch/m32r/include/asm/atomic.h
+++ b/arch/m32r/include/asm/atomic.h
@@ -94,8 +94,6 @@ static __inline__ int atomic_##op##_return(int i, atomic_t *v)		\
 ATOMIC_OPS(add)
 ATOMIC_OPS(sub)
 
-#define CONFIG_ARCH_HAS_ATOMIC_OR
-
 ATOMIC_OP(and)
 ATOMIC_OP(or)
 ATOMIC_OP(xor)
diff --git a/arch/m68k/include/asm/atomic.h b/arch/m68k/include/asm/atomic.h
index c30e43ea49a3..93ebd96aa494 100644
--- a/arch/m68k/include/asm/atomic.h
+++ b/arch/m68k/include/asm/atomic.h
@@ -77,8 +77,6 @@ static inline int atomic_##op##_return(int i, atomic_t * v)		\
 ATOMIC_OPS(add, +=, add)
 ATOMIC_OPS(sub, -=, sub)
 
-#define CONFIG_ARCH_HAS_ATOMIC_OR
-
 ATOMIC_OP(and, &=, and)
 ATOMIC_OP(or, |=, or)
 ATOMIC_OP(xor, ^=, eor)
diff --git a/arch/metag/include/asm/atomic_lnkget.h b/arch/metag/include/asm/atomic_lnkget.h
index 930c12cb8d37..0642606de901 100644
--- a/arch/metag/include/asm/atomic_lnkget.h
+++ b/arch/metag/include/asm/atomic_lnkget.h
@@ -74,8 +74,6 @@ static inline int atomic_##op##_return(int i, atomic_t *v)		\
 ATOMIC_OPS(add)
 ATOMIC_OPS(sub)
 
-#define CONFIG_ARCH_HAS_ATOMIC_OR
-
 ATOMIC_OP(and)
 ATOMIC_OP(or)
 ATOMIC_OP(xor)
diff --git a/arch/mips/include/asm/atomic.h b/arch/mips/include/asm/atomic.h
index 0430ba6ab762..4c42fd9af777 100644
--- a/arch/mips/include/asm/atomic.h
+++ b/arch/mips/include/asm/atomic.h
@@ -137,8 +137,6 @@ static __inline__ int atomic_##op##_return(int i, atomic_t * v)		      \
 ATOMIC_OPS(add, +=, addu)
 ATOMIC_OPS(sub, -=, subu)
 
-#define CONFIG_ARCH_HAS_ATOMIC_OR
-
 ATOMIC_OP(and, &=, and)
 ATOMIC_OP(or, |=, or)
 ATOMIC_OP(xor, ^=, xor)
diff --git a/arch/mn10300/include/asm/atomic.h b/arch/mn10300/include/asm/atomic.h
index 03eea8158cf9..f5a63f0bda46 100644
--- a/arch/mn10300/include/asm/atomic.h
+++ b/arch/mn10300/include/asm/atomic.h
@@ -89,8 +89,6 @@ static inline int atomic_##op##_return(int i, atomic_t *v)		\
 ATOMIC_OPS(add)
 ATOMIC_OPS(sub)
 
-#define CONFIG_ARCH_HAS_ATOMIC_OR
-
 ATOMIC_OP(and)
 ATOMIC_OP(or)
 ATOMIC_OP(xor)
diff --git a/arch/parisc/include/asm/atomic.h b/arch/parisc/include/asm/atomic.h
index be2c50ddebd6..2536965d00ea 100644
--- a/arch/parisc/include/asm/atomic.h
+++ b/arch/parisc/include/asm/atomic.h
@@ -126,8 +126,6 @@ static __inline__ int atomic_##op##_return(int i, atomic_t *v)		\
 ATOMIC_OPS(add, +=)
 ATOMIC_OPS(sub, -=)
 
-#define CONFIG_ARCH_HAS_ATOMIC_OR
-
 ATOMIC_OP(and, &=)
 ATOMIC_OP(or, |=)
 ATOMIC_OP(xor, ^=)
diff --git a/arch/powerpc/include/asm/atomic.h b/arch/powerpc/include/asm/atomic.h
index 6ca89e2aca15..55f106ed12bf 100644
--- a/arch/powerpc/include/asm/atomic.h
+++ b/arch/powerpc/include/asm/atomic.h
@@ -67,8 +67,6 @@ static __inline__ int atomic_##op##_return(int a, atomic_t *v)		\
 ATOMIC_OPS(add, add)
 ATOMIC_OPS(sub, subf)
 
-#define CONFIG_ARCH_HAS_ATOMIC_OR
-
 ATOMIC_OP(and, and)
 ATOMIC_OP(or, or)
 ATOMIC_OP(xor, xor)
diff --git a/arch/s390/include/asm/atomic.h b/arch/s390/include/asm/atomic.h
index b3859d8e001f..d761aeff72da 100644
--- a/arch/s390/include/asm/atomic.h
+++ b/arch/s390/include/asm/atomic.h
@@ -282,8 +282,6 @@ static inline void atomic64_##op(long i, atomic64_t *v)			\
 	__ATOMIC64_LOOP(v, i, __ATOMIC64_##OP, __ATOMIC64_NO_BARRIER);	\
 }
 
-#define CONFIG_ARCH_HAS_ATOMIC_OR
-
 ATOMIC64_OP(and, AND)
 ATOMIC64_OP(or, OR)
 ATOMIC64_OP(xor, XOR)
diff --git a/arch/sh/include/asm/atomic-grb.h b/arch/sh/include/asm/atomic-grb.h
index 4b03830d48c7..b94df40e5f2d 100644
--- a/arch/sh/include/asm/atomic-grb.h
+++ b/arch/sh/include/asm/atomic-grb.h
@@ -48,8 +48,6 @@ static inline int atomic_##op##_return(int i, atomic_t *v)		\
 ATOMIC_OPS(add)
 ATOMIC_OPS(sub)
 
-#define CONFIG_ARCH_HAS_ATOMIC_OR
-
 ATOMIC_OP(and)
 ATOMIC_OP(or)
 ATOMIC_OP(xor)
diff --git a/arch/sparc/include/asm/atomic_32.h b/arch/sparc/include/asm/atomic_32.h
index e19d8880b146..7dcbebbcaec6 100644
--- a/arch/sparc/include/asm/atomic_32.h
+++ b/arch/sparc/include/asm/atomic_32.h
@@ -17,8 +17,6 @@
 #include <asm/barrier.h>
 #include <asm-generic/atomic64.h>
 
-#define CONFIG_ARCH_HAS_ATOMIC_OR
-
 #define ATOMIC_INIT(i)  { (i) }
 
 int atomic_add_return(int, atomic_t *);
diff --git a/arch/sparc/include/asm/atomic_64.h b/arch/sparc/include/asm/atomic_64.h
index d6af27c93450..917084ace49d 100644
--- a/arch/sparc/include/asm/atomic_64.h
+++ b/arch/sparc/include/asm/atomic_64.h
@@ -33,8 +33,6 @@ long atomic64_##op##_return(long, atomic64_t *);
 ATOMIC_OPS(add)
 ATOMIC_OPS(sub)
 
-#define CONFIG_ARCH_HAS_ATOMIC_OR
-
 ATOMIC_OP(and)
 ATOMIC_OP(or)
 ATOMIC_OP(xor)
diff --git a/arch/tile/include/asm/atomic_32.h b/arch/tile/include/asm/atomic_32.h
index 94237922f0dd..d320ce253d86 100644
--- a/arch/tile/include/asm/atomic_32.h
+++ b/arch/tile/include/asm/atomic_32.h
@@ -41,8 +41,6 @@ static inline void atomic_##op(int i, atomic_t *v)			\
 	_atomic_##op((unsigned long *)&v->counter, i);			\
 }
 
-#define CONFIG_ARCH_HAS_ATOMIC_OR
-
 ATOMIC_OP(and)
 ATOMIC_OP(or)
 ATOMIC_OP(xor)
diff --git a/arch/tile/include/asm/atomic_64.h b/arch/tile/include/asm/atomic_64.h
index d07d9fc6e2a1..096a56d6ead4 100644
--- a/arch/tile/include/asm/atomic_64.h
+++ b/arch/tile/include/asm/atomic_64.h
@@ -58,8 +58,6 @@ static inline int __atomic_add_unless(atomic_t *v, int a, int u)
 	return oldval;
 }
 
-#define CONFIG_ARCH_HAS_ATOMIC_OR
-
 static inline void atomic_and(int i, atomic_t *v)
 {
 	__insn_fetchand4((void *)&v->counter, i);
diff --git a/arch/x86/include/asm/atomic.h b/arch/x86/include/asm/atomic.h
index f3a3ec040694..b3493023efda 100644
--- a/arch/x86/include/asm/atomic.h
+++ b/arch/x86/include/asm/atomic.h
@@ -191,8 +191,6 @@ static inline void atomic_##op(int i, atomic_t *v)			\
 			: "memory");					\
 }
 
-#define CONFIG_ARCH_HAS_ATOMIC_OR
-
 ATOMIC_OP(and)
 ATOMIC_OP(or)
 ATOMIC_OP(xor)
diff --git a/arch/xtensa/include/asm/atomic.h b/arch/xtensa/include/asm/atomic.h
index 4dd2450300a6..31371f43c23b 100644
--- a/arch/xtensa/include/asm/atomic.h
+++ b/arch/xtensa/include/asm/atomic.h
@@ -145,8 +145,6 @@ static inline int atomic_##op##_return(int i, atomic_t * v)		\
 ATOMIC_OPS(add)
 ATOMIC_OPS(sub)
 
-#define CONFIG_ARCH_HAS_ATOMIC_OR
-
 ATOMIC_OP(and)
 ATOMIC_OP(or)
 ATOMIC_OP(xor)
diff --git a/include/asm-generic/atomic.h b/include/asm-generic/atomic.h
index 92947e0a532a..a41b0b8f7404 100644
--- a/include/asm-generic/atomic.h
+++ b/include/asm-generic/atomic.h
@@ -102,24 +102,27 @@ ATOMIC_OP_RETURN(sub, -)
 ATOMIC_OP(and, &)
 #endif
 
-#ifndef atomic_clear_mask
-#define atomic_clear_mask(i, v) atomic_and(~(i), (v))
-#endif
-
 #ifndef atomic_or
-#ifndef CONFIG_ARCH_HAS_ATOMIC_OR
-#define CONFIG_ARCH_HAS_ATOMIC_OR
-#endif
 ATOMIC_OP(or, |)
 #endif
 
-#ifndef atomic_set_mask
-#define atomic_set_mask(i, v)	atomic_or((i), (v))
+#ifndef atomic_xor
+ATOMIC_OP(xor, ^)
 #endif
 
 #undef ATOMIC_OP_RETURN
 #undef ATOMIC_OP
 
+static inline __deprecated void atomic_clear_mask(unsigned int mask, atomic_t *v)
+{
+	atomic_and(~mask, v);
+}
+
+static inline __deprecated void atomic_set_mask(unsigned int mask, atomic_t *v)
+{
+	atomic_or(mask, v);
+}
+
 /*
  * Atomic operations that C can't guarantee us.  Useful for
  * resource counting etc..
diff --git a/include/asm-generic/atomic64.h b/include/asm-generic/atomic64.h
index 30ad9c86cebb..d48e78ccad3d 100644
--- a/include/asm-generic/atomic64.h
+++ b/include/asm-generic/atomic64.h
@@ -32,6 +32,10 @@ extern long long atomic64_##op##_return(long long a, atomic64_t *v);
 ATOMIC64_OPS(add)
 ATOMIC64_OPS(sub)
 
+ATOMIC64_OP(and)
+ATOMIC64_OP(or)
+ATOMIC64_OP(xor)
+
 #undef ATOMIC64_OPS
 #undef ATOMIC64_OP_RETURN
 #undef ATOMIC64_OP
diff --git a/include/linux/atomic.h b/include/linux/atomic.h
index 5b08a8540ecf..7d6279012a1f 100644
--- a/include/linux/atomic.h
+++ b/include/linux/atomic.h
@@ -111,19 +111,6 @@ static inline int atomic_dec_if_positive(atomic_t *v)
 }
 #endif
 
-#ifndef CONFIG_ARCH_HAS_ATOMIC_OR
-static inline void atomic_or(int i, atomic_t *v)
-{
-	int old;
-	int new;
-
-	do {
-		old = atomic_read(v);
-		new = old | i;
-	} while (atomic_cmpxchg(v, old, new) != old);
-}
-#endif /* #ifndef CONFIG_ARCH_HAS_ATOMIC_OR */
-
 #include <asm-generic/atomic-long.h>
 #ifdef CONFIG_GENERIC_ATOMIC64
 #include <asm-generic/atomic64.h>
diff --git a/lib/atomic64.c b/lib/atomic64.c
index 1298c05ef528..2886ebac6567 100644
--- a/lib/atomic64.c
+++ b/lib/atomic64.c
@@ -102,6 +102,9 @@ EXPORT_SYMBOL(atomic64_##op##_return);
 
 ATOMIC64_OPS(add, +=)
 ATOMIC64_OPS(sub, -=)
+ATOMIC64_OP(and, &=)
+ATOMIC64_OP(or, |=)
+ATOMIC64_OP(xor, ^=)
 
 #undef ATOMIC64_OPS
 #undef ATOMIC64_OP_RETURN
-- 
cgit v1.2.3


From de9e432cb5de1bf2952919dc0b22e4bec0ed8d53 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Fri, 24 Apr 2015 01:12:32 +0200
Subject: atomic: Collapse all atomic_{set,clear}_mask definitions

Move the now generic definitions of atomic_{set,clear}_mask() into
linux/atomic.h to avoid endless and pointless repetition.

Also, provide an atomic_andnot() wrapper for those few archs that can
implement that.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/arc/include/asm/atomic.h          | 10 ----------
 arch/blackfin/include/asm/atomic.h     | 10 ----------
 arch/frv/include/asm/atomic.h          | 10 ----------
 arch/h8300/include/asm/atomic.h        | 10 ----------
 arch/m32r/include/asm/atomic.h         | 11 -----------
 arch/m68k/include/asm/atomic.h         | 10 ----------
 arch/metag/include/asm/atomic_lnkget.h | 10 ----------
 arch/metag/include/asm/atomic_lock1.h  | 10 ----------
 arch/mn10300/include/asm/atomic.h      | 24 ------------------------
 arch/powerpc/kernel/misc_32.S          | 19 -------------------
 arch/s390/include/asm/atomic.h         | 10 ----------
 arch/sh/include/asm/atomic.h           | 10 ----------
 arch/x86/include/asm/atomic.h          | 10 ----------
 arch/xtensa/include/asm/atomic.h       | 10 ----------
 include/asm-generic/atomic.h           | 10 ----------
 include/linux/atomic.h                 | 25 +++++++++++++++++++++++++
 16 files changed, 25 insertions(+), 174 deletions(-)

(limited to 'include')

diff --git a/arch/arc/include/asm/atomic.h b/arch/arc/include/asm/atomic.h
index 2a847821dee1..d8a85e706fba 100644
--- a/arch/arc/include/asm/atomic.h
+++ b/arch/arc/include/asm/atomic.h
@@ -155,16 +155,6 @@ ATOMIC_OP(xor, ^=, xor)
 #undef ATOMIC_OP_RETURN
 #undef ATOMIC_OP
 
-static inline __deprecated void atomic_clear_mask(unsigned int mask, atomic_t *v)
-{
-	atomic_and(~mask, v);
-}
-
-static inline __deprecated void atomic_set_mask(unsigned int mask, atomic_t *v)
-{
-	atomic_or(mask, v);
-}
-
 /**
  * __atomic_add_unless - add unless the number is a given value
  * @v: pointer of type atomic_t
diff --git a/arch/blackfin/include/asm/atomic.h b/arch/blackfin/include/asm/atomic.h
index 2d6a7a3823c3..1c1c42330c99 100644
--- a/arch/blackfin/include/asm/atomic.h
+++ b/arch/blackfin/include/asm/atomic.h
@@ -32,16 +32,6 @@ asmlinkage int __raw_atomic_test_asm(const volatile int *ptr, int value);
 #define atomic_and(i, v) (void)__raw_atomic_and_asm(&(v)->counter, i)
 #define atomic_xor(i, v) (void)__raw_atomic_xor_asm(&(v)->counter, i)
 
-static inline __deprecated void atomic_clear_mask(unsigned int mask, atomic_t *v)
-{
-	atomic_and(~mask, v);
-}
-
-static inline __deprecated void atomic_set_mask(unsigned int mask, atomic_t *v)
-{
-	atomic_or(mask, v);
-}
-
 #endif
 
 #include <asm-generic/atomic.h>
diff --git a/arch/frv/include/asm/atomic.h b/arch/frv/include/asm/atomic.h
index fc48bea26b40..0da689def4cc 100644
--- a/arch/frv/include/asm/atomic.h
+++ b/arch/frv/include/asm/atomic.h
@@ -198,14 +198,4 @@ ATOMIC_OP(xor)
 
 #undef ATOMIC_OP
 
-static inline __deprecated void atomic_clear_mask(unsigned int mask, atomic_t *v)
-{
-	atomic_and(~mask, v);
-}
-
-static inline __deprecated void atomic_set_mask(unsigned int mask, atomic_t *v)
-{
-	atomic_or(mask, v);
-}
-
 #endif /* _ASM_ATOMIC_H */
diff --git a/arch/h8300/include/asm/atomic.h b/arch/h8300/include/asm/atomic.h
index c4d061f09c44..702ee539f87d 100644
--- a/arch/h8300/include/asm/atomic.h
+++ b/arch/h8300/include/asm/atomic.h
@@ -89,14 +89,4 @@ static inline int __atomic_add_unless(atomic_t *v, int a, int u)
 	return ret;
 }
 
-static inline __deprecated void atomic_clear_mask(unsigned int mask, atomic_t *v)
-{
-	atomic_and(~mask, v);
-}
-
-static inline __deprecated void atomic_set_mask(unsigned int mask, atomic_t *v)
-{
-	atomic_or(mask, v);
-}
-
 #endif /* __ARCH_H8300_ATOMIC __ */
diff --git a/arch/m32r/include/asm/atomic.h b/arch/m32r/include/asm/atomic.h
index b2a13fbd5be0..025e2a170493 100644
--- a/arch/m32r/include/asm/atomic.h
+++ b/arch/m32r/include/asm/atomic.h
@@ -243,15 +243,4 @@ static __inline__ int __atomic_add_unless(atomic_t *v, int a, int u)
 	return c;
 }
 
-
-static __inline__ __deprecated void atomic_clear_mask(unsigned int mask, atomic_t *v)
-{
-	atomic_and(~mask, v);
-}
-
-static __inline__ __deprecated void atomic_set_mask(unsigned int mask, atomic_t *v)
-{
-	atomic_or(mask, v);
-}
-
 #endif	/* _ASM_M32R_ATOMIC_H */
diff --git a/arch/m68k/include/asm/atomic.h b/arch/m68k/include/asm/atomic.h
index 93ebd96aa494..039fac120cc0 100644
--- a/arch/m68k/include/asm/atomic.h
+++ b/arch/m68k/include/asm/atomic.h
@@ -174,16 +174,6 @@ static inline int atomic_add_negative(int i, atomic_t *v)
 	return c != 0;
 }
 
-static inline __deprecated void atomic_clear_mask(unsigned int mask, atomic_t *v)
-{
-	atomic_and(~mask, v);
-}
-
-static inline __deprecated void atomic_set_mask(unsigned int mask, atomic_t *v)
-{
-	atomic_or(mask, v);
-}
-
 static __inline__ int __atomic_add_unless(atomic_t *v, int a, int u)
 {
 	int c, old;
diff --git a/arch/metag/include/asm/atomic_lnkget.h b/arch/metag/include/asm/atomic_lnkget.h
index 0642606de901..21c4c268b86c 100644
--- a/arch/metag/include/asm/atomic_lnkget.h
+++ b/arch/metag/include/asm/atomic_lnkget.h
@@ -82,16 +82,6 @@ ATOMIC_OP(xor)
 #undef ATOMIC_OP_RETURN
 #undef ATOMIC_OP
 
-static inline __deprecated void atomic_clear_mask(unsigned int mask, atomic_t *v)
-{
-	atomic_and(~mask, v);
-}
-
-static inline __deprecated void atomic_set_mask(unsigned int mask, atomic_t *v)
-{
-	atomic_or(mask, v);
-}
-
 static inline int atomic_cmpxchg(atomic_t *v, int old, int new)
 {
 	int result, temp;
diff --git a/arch/metag/include/asm/atomic_lock1.h b/arch/metag/include/asm/atomic_lock1.h
index 7d88725a85da..f8efe380fe8b 100644
--- a/arch/metag/include/asm/atomic_lock1.h
+++ b/arch/metag/include/asm/atomic_lock1.h
@@ -76,16 +76,6 @@ ATOMIC_OP(xor, ^=)
 #undef ATOMIC_OP_RETURN
 #undef ATOMIC_OP
 
-static inline __deprecated void atomic_clear_mask(unsigned int mask, atomic_t *v)
-{
-	atomic_and(~mask, v);
-}
-
-static inline __deprecated void atomic_set_mask(unsigned int mask, atomic_t *v)
-{
-	atomic_or(mask, v);
-}
-
 static inline int atomic_cmpxchg(atomic_t *v, int old, int new)
 {
 	int ret;
diff --git a/arch/mn10300/include/asm/atomic.h b/arch/mn10300/include/asm/atomic.h
index f5a63f0bda46..375e59140c9c 100644
--- a/arch/mn10300/include/asm/atomic.h
+++ b/arch/mn10300/include/asm/atomic.h
@@ -131,30 +131,6 @@ static inline void atomic_dec(atomic_t *v)
 #define atomic_xchg(ptr, v)		(xchg(&(ptr)->counter, (v)))
 #define atomic_cmpxchg(v, old, new)	(cmpxchg(&((v)->counter), (old), (new)))
 
-/**
- * atomic_clear_mask - Atomically clear bits in memory
- * @mask: Mask of the bits to be cleared
- * @v: pointer to word in memory
- *
- * Atomically clears the bits set in mask from the memory word specified.
- */
-static inline __deprecated void atomic_clear_mask(unsigned int mask, atomic_t *v)
-{
-	atomic_and(~mask, v);
-}
-
-/**
- * atomic_set_mask - Atomically set bits in memory
- * @mask: Mask of the bits to be set
- * @v: pointer to word in memory
- *
- * Atomically sets the bits set in mask from the memory word specified.
- */
-static inline __deprecated void atomic_set_mask(unsigned int mask, atomic_t *v)
-{
-	atomic_or(mask, v);
-}
-
 #endif /* __KERNEL__ */
 #endif /* CONFIG_SMP */
 #endif /* _ASM_ATOMIC_H */
diff --git a/arch/powerpc/kernel/misc_32.S b/arch/powerpc/kernel/misc_32.S
index 7c6bb4b17b49..ed3ab509faca 100644
--- a/arch/powerpc/kernel/misc_32.S
+++ b/arch/powerpc/kernel/misc_32.S
@@ -595,25 +595,6 @@ _GLOBAL(copy_page)
 	li	r11,4
 	b	2b
 
-/*
- * void atomic_clear_mask(atomic_t mask, atomic_t *addr)
- * void atomic_set_mask(atomic_t mask, atomic_t *addr);
- */
-_GLOBAL(atomic_clear_mask)
-10:	lwarx	r5,0,r4
-	andc	r5,r5,r3
-	PPC405_ERR77(0,r4)
-	stwcx.	r5,0,r4
-	bne-	10b
-	blr
-_GLOBAL(atomic_set_mask)
-10:	lwarx	r5,0,r4
-	or	r5,r5,r3
-	PPC405_ERR77(0,r4)
-	stwcx.	r5,0,r4
-	bne-	10b
-	blr
-
 /*
  * Extended precision shifts.
  *
diff --git a/arch/s390/include/asm/atomic.h b/arch/s390/include/asm/atomic.h
index d761aeff72da..117fa5c921c1 100644
--- a/arch/s390/include/asm/atomic.h
+++ b/arch/s390/include/asm/atomic.h
@@ -132,16 +132,6 @@ ATOMIC_OP(xor, XOR)
 
 #undef ATOMIC_OP
 
-static inline __deprecated void atomic_clear_mask(unsigned int mask, atomic_t *v)
-{
-	atomic_and(~mask, v);
-}
-
-static inline __deprecated void atomic_set_mask(unsigned int mask, atomic_t *v)
-{
-	atomic_or(mask, v);
-}
-
 #define atomic_xchg(v, new) (xchg(&((v)->counter), new))
 
 static inline int atomic_cmpxchg(atomic_t *v, int old, int new)
diff --git a/arch/sh/include/asm/atomic.h b/arch/sh/include/asm/atomic.h
index cee0245257e1..05b9f74ce2d5 100644
--- a/arch/sh/include/asm/atomic.h
+++ b/arch/sh/include/asm/atomic.h
@@ -25,16 +25,6 @@
 #include <asm/atomic-irq.h>
 #endif
 
-static inline __deprecated void atomic_clear_mask(unsigned int mask, atomic_t *v)
-{
-	atomic_and(~mask, v);
-}
-
-static inline __deprecated void atomic_set_mask(unsigned int mask, atomic_t *v)
-{
-	atomic_or(mask, v);
-}
-
 #define atomic_add_negative(a, v)	(atomic_add_return((a), (v)) < 0)
 #define atomic_dec_return(v)		atomic_sub_return(1, (v))
 #define atomic_inc_return(v)		atomic_add_return(1, (v))
diff --git a/arch/x86/include/asm/atomic.h b/arch/x86/include/asm/atomic.h
index b3493023efda..fb52aa644aab 100644
--- a/arch/x86/include/asm/atomic.h
+++ b/arch/x86/include/asm/atomic.h
@@ -234,16 +234,6 @@ static __always_inline short int atomic_inc_short(short int *v)
 	return *v;
 }
 
-static inline __deprecated void atomic_clear_mask(unsigned int mask, atomic_t *v)
-{
-	atomic_and(~mask, v);
-}
-
-static inline __deprecated void atomic_set_mask(unsigned int mask, atomic_t *v)
-{
-	atomic_or(mask, v);
-}
-
 #ifdef CONFIG_X86_32
 # include <asm/atomic64_32.h>
 #else
diff --git a/arch/xtensa/include/asm/atomic.h b/arch/xtensa/include/asm/atomic.h
index 31371f43c23b..e0be67936990 100644
--- a/arch/xtensa/include/asm/atomic.h
+++ b/arch/xtensa/include/asm/atomic.h
@@ -153,16 +153,6 @@ ATOMIC_OP(xor)
 #undef ATOMIC_OP_RETURN
 #undef ATOMIC_OP
 
-static inline __deprecated void atomic_set_mask(unsigned int mask, atomic_t *v)
-{
-	atomic_or(mask, v);
-}
-
-static inline __deprecated void atomic_clear_mask(unsigned int mask, atomic_t *v)
-{
-	atomic_and(~mask, v);
-}
-
 /**
  * atomic_sub_and_test - subtract value from variable and test result
  * @i: integer value to subtract
diff --git a/include/asm-generic/atomic.h b/include/asm-generic/atomic.h
index a41b0b8f7404..d4d7e337fdcb 100644
--- a/include/asm-generic/atomic.h
+++ b/include/asm-generic/atomic.h
@@ -113,16 +113,6 @@ ATOMIC_OP(xor, ^)
 #undef ATOMIC_OP_RETURN
 #undef ATOMIC_OP
 
-static inline __deprecated void atomic_clear_mask(unsigned int mask, atomic_t *v)
-{
-	atomic_and(~mask, v);
-}
-
-static inline __deprecated void atomic_set_mask(unsigned int mask, atomic_t *v)
-{
-	atomic_or(mask, v);
-}
-
 /*
  * Atomic operations that C can't guarantee us.  Useful for
  * resource counting etc..
diff --git a/include/linux/atomic.h b/include/linux/atomic.h
index 7d6279012a1f..8b98b423388f 100644
--- a/include/linux/atomic.h
+++ b/include/linux/atomic.h
@@ -28,6 +28,23 @@ static inline int atomic_add_unless(atomic_t *v, int a, int u)
 #define atomic_inc_not_zero(v)		atomic_add_unless((v), 1, 0)
 #endif
 
+#ifndef atomic_andnot
+static inline void atomic_andnot(int i, atomic_t *v)
+{
+	atomic_and(~i, v);
+}
+#endif
+
+static inline __deprecated void atomic_clear_mask(unsigned int mask, atomic_t *v)
+{
+	atomic_andnot(mask, v);
+}
+
+static inline __deprecated void atomic_set_mask(unsigned int mask, atomic_t *v)
+{
+	atomic_or(mask, v);
+}
+
 /**
  * atomic_inc_not_zero_hint - increment if not null
  * @v: pointer of type atomic_t
@@ -115,4 +132,12 @@ static inline int atomic_dec_if_positive(atomic_t *v)
 #ifdef CONFIG_GENERIC_ATOMIC64
 #include <asm-generic/atomic64.h>
 #endif
+
+#ifndef atomic64_andnot
+static inline void atomic64_andnot(long long i, atomic64_t *v)
+{
+	atomic64_and(~i, v);
+}
+#endif
+
 #endif /* _LINUX_ATOMIC_H */
-- 
cgit v1.2.3


From 76695af20c015206cffb84b15912be6797d0cca2 Mon Sep 17 00:00:00 2001
From: Andrey Konovalov <andreyknvl@google.com>
Date: Sun, 2 Aug 2015 17:11:04 +0200
Subject: locking, arch: use WRITE_ONCE()/READ_ONCE() in
 smp_store_release()/smp_load_acquire()

Replace ACCESS_ONCE() macro in smp_store_release() and smp_load_acquire()
with WRITE_ONCE() and READ_ONCE() on x86, arm, arm64, ia64, metag, mips,
powerpc, s390, sparc and asm-generic since ACCESS_ONCE() does not work
reliably on non-scalar types.

WRITE_ONCE() and READ_ONCE() were introduced in the following commits:

  230fa253df63 ("kernel: Provide READ_ONCE and ASSIGN_ONCE")
  43239cbe79fc ("kernel: Change ASSIGN_ONCE(val, x) to WRITE_ONCE(x, val)")

Signed-off-by: Andrey Konovalov <andreyknvl@google.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Davidlohr Bueso <dbueso@suse.de>
Acked-by: Michael Ellerman <mpe@ellerman.id.au> (powerpc)
Acked-by: Ralf Baechle <ralf@linux-mips.org>
Cc: Alexander Duyck <alexander.h.duyck@redhat.com>
Cc: Andre Przywara <andre.przywara@arm.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Borislav Petkov <bp@suse.de>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Christian Borntraeger <borntraeger@de.ibm.com>
Cc: David S. Miller <davem@davemloft.net>
Cc: Davidlohr Bueso <dave@stgolabs.net>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: Fenghua Yu <fenghua.yu@intel.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: James Hogan <james.hogan@imgtec.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Russell King <linux@arm.linux.org.uk>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Will Deacon <will.deacon@arm.com>
Cc: linux-arch@vger.kernel.org
Link: http://lkml.kernel.org/r/1438528264-714-1-git-send-email-andreyknvl@google.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/arm/include/asm/barrier.h      | 4 ++--
 arch/arm64/include/asm/barrier.h    | 4 ++--
 arch/ia64/include/asm/barrier.h     | 4 ++--
 arch/metag/include/asm/barrier.h    | 4 ++--
 arch/mips/include/asm/barrier.h     | 4 ++--
 arch/powerpc/include/asm/barrier.h  | 4 ++--
 arch/s390/include/asm/barrier.h     | 4 ++--
 arch/sparc/include/asm/barrier_64.h | 4 ++--
 arch/x86/include/asm/barrier.h      | 8 ++++----
 include/asm-generic/barrier.h       | 4 ++--
 10 files changed, 22 insertions(+), 22 deletions(-)

(limited to 'include')

diff --git a/arch/arm/include/asm/barrier.h b/arch/arm/include/asm/barrier.h
index 6c2327e1c732..70393574e0fa 100644
--- a/arch/arm/include/asm/barrier.h
+++ b/arch/arm/include/asm/barrier.h
@@ -67,12 +67,12 @@
 do {									\
 	compiletime_assert_atomic_type(*p);				\
 	smp_mb();							\
-	ACCESS_ONCE(*p) = (v);						\
+	WRITE_ONCE(*p, v);						\
 } while (0)
 
 #define smp_load_acquire(p)						\
 ({									\
-	typeof(*p) ___p1 = ACCESS_ONCE(*p);				\
+	typeof(*p) ___p1 = READ_ONCE(*p);				\
 	compiletime_assert_atomic_type(*p);				\
 	smp_mb();							\
 	___p1;								\
diff --git a/arch/arm64/include/asm/barrier.h b/arch/arm64/include/asm/barrier.h
index 0fa47c4275cb..ef93b20bc964 100644
--- a/arch/arm64/include/asm/barrier.h
+++ b/arch/arm64/include/asm/barrier.h
@@ -44,12 +44,12 @@
 do {									\
 	compiletime_assert_atomic_type(*p);				\
 	barrier();							\
-	ACCESS_ONCE(*p) = (v);						\
+	WRITE_ONCE(*p, v);						\
 } while (0)
 
 #define smp_load_acquire(p)						\
 ({									\
-	typeof(*p) ___p1 = ACCESS_ONCE(*p);				\
+	typeof(*p) ___p1 = READ_ONCE(*p);				\
 	compiletime_assert_atomic_type(*p);				\
 	barrier();							\
 	___p1;								\
diff --git a/arch/ia64/include/asm/barrier.h b/arch/ia64/include/asm/barrier.h
index 843ba435e43b..df896a1c41d3 100644
--- a/arch/ia64/include/asm/barrier.h
+++ b/arch/ia64/include/asm/barrier.h
@@ -66,12 +66,12 @@
 do {									\
 	compiletime_assert_atomic_type(*p);				\
 	barrier();							\
-	ACCESS_ONCE(*p) = (v);						\
+	WRITE_ONCE(*p, v);						\
 } while (0)
 
 #define smp_load_acquire(p)						\
 ({									\
-	typeof(*p) ___p1 = ACCESS_ONCE(*p);				\
+	typeof(*p) ___p1 = READ_ONCE(*p);				\
 	compiletime_assert_atomic_type(*p);				\
 	barrier();							\
 	___p1;								\
diff --git a/arch/metag/include/asm/barrier.h b/arch/metag/include/asm/barrier.h
index 5a696e507930..172b7e5efc53 100644
--- a/arch/metag/include/asm/barrier.h
+++ b/arch/metag/include/asm/barrier.h
@@ -90,12 +90,12 @@ static inline void fence(void)
 do {									\
 	compiletime_assert_atomic_type(*p);				\
 	smp_mb();							\
-	ACCESS_ONCE(*p) = (v);						\
+	WRITE_ONCE(*p, v);						\
 } while (0)
 
 #define smp_load_acquire(p)						\
 ({									\
-	typeof(*p) ___p1 = ACCESS_ONCE(*p);				\
+	typeof(*p) ___p1 = READ_ONCE(*p);				\
 	compiletime_assert_atomic_type(*p);				\
 	smp_mb();							\
 	___p1;								\
diff --git a/arch/mips/include/asm/barrier.h b/arch/mips/include/asm/barrier.h
index 7ecba84656d4..752e0b86c171 100644
--- a/arch/mips/include/asm/barrier.h
+++ b/arch/mips/include/asm/barrier.h
@@ -133,12 +133,12 @@
 do {									\
 	compiletime_assert_atomic_type(*p);				\
 	smp_mb();							\
-	ACCESS_ONCE(*p) = (v);						\
+	WRITE_ONCE(*p, v);						\
 } while (0)
 
 #define smp_load_acquire(p)						\
 ({									\
-	typeof(*p) ___p1 = ACCESS_ONCE(*p);				\
+	typeof(*p) ___p1 = READ_ONCE(*p);				\
 	compiletime_assert_atomic_type(*p);				\
 	smp_mb();							\
 	___p1;								\
diff --git a/arch/powerpc/include/asm/barrier.h b/arch/powerpc/include/asm/barrier.h
index 51ccc7232042..0eca6efc0631 100644
--- a/arch/powerpc/include/asm/barrier.h
+++ b/arch/powerpc/include/asm/barrier.h
@@ -76,12 +76,12 @@
 do {									\
 	compiletime_assert_atomic_type(*p);				\
 	smp_lwsync();							\
-	ACCESS_ONCE(*p) = (v);						\
+	WRITE_ONCE(*p, v);						\
 } while (0)
 
 #define smp_load_acquire(p)						\
 ({									\
-	typeof(*p) ___p1 = ACCESS_ONCE(*p);				\
+	typeof(*p) ___p1 = READ_ONCE(*p);				\
 	compiletime_assert_atomic_type(*p);				\
 	smp_lwsync();							\
 	___p1;								\
diff --git a/arch/s390/include/asm/barrier.h b/arch/s390/include/asm/barrier.h
index e6f8615a11eb..d48fe0162331 100644
--- a/arch/s390/include/asm/barrier.h
+++ b/arch/s390/include/asm/barrier.h
@@ -42,12 +42,12 @@
 do {									\
 	compiletime_assert_atomic_type(*p);				\
 	barrier();							\
-	ACCESS_ONCE(*p) = (v);						\
+	WRITE_ONCE(*p, v);						\
 } while (0)
 
 #define smp_load_acquire(p)						\
 ({									\
-	typeof(*p) ___p1 = ACCESS_ONCE(*p);				\
+	typeof(*p) ___p1 = READ_ONCE(*p);				\
 	compiletime_assert_atomic_type(*p);				\
 	barrier();							\
 	___p1;								\
diff --git a/arch/sparc/include/asm/barrier_64.h b/arch/sparc/include/asm/barrier_64.h
index 809941e33e12..14a928601657 100644
--- a/arch/sparc/include/asm/barrier_64.h
+++ b/arch/sparc/include/asm/barrier_64.h
@@ -60,12 +60,12 @@ do {	__asm__ __volatile__("ba,pt	%%xcc, 1f\n\t" \
 do {									\
 	compiletime_assert_atomic_type(*p);				\
 	barrier();							\
-	ACCESS_ONCE(*p) = (v);						\
+	WRITE_ONCE(*p, v);						\
 } while (0)
 
 #define smp_load_acquire(p)						\
 ({									\
-	typeof(*p) ___p1 = ACCESS_ONCE(*p);				\
+	typeof(*p) ___p1 = READ_ONCE(*p);				\
 	compiletime_assert_atomic_type(*p);				\
 	barrier();							\
 	___p1;								\
diff --git a/arch/x86/include/asm/barrier.h b/arch/x86/include/asm/barrier.h
index e51a8f803f55..d2bcfbed11c3 100644
--- a/arch/x86/include/asm/barrier.h
+++ b/arch/x86/include/asm/barrier.h
@@ -57,12 +57,12 @@
 do {									\
 	compiletime_assert_atomic_type(*p);				\
 	smp_mb();							\
-	ACCESS_ONCE(*p) = (v);						\
+	WRITE_ONCE(*p, v);						\
 } while (0)
 
 #define smp_load_acquire(p)						\
 ({									\
-	typeof(*p) ___p1 = ACCESS_ONCE(*p);				\
+	typeof(*p) ___p1 = READ_ONCE(*p);				\
 	compiletime_assert_atomic_type(*p);				\
 	smp_mb();							\
 	___p1;								\
@@ -74,12 +74,12 @@ do {									\
 do {									\
 	compiletime_assert_atomic_type(*p);				\
 	barrier();							\
-	ACCESS_ONCE(*p) = (v);						\
+	WRITE_ONCE(*p, v);						\
 } while (0)
 
 #define smp_load_acquire(p)						\
 ({									\
-	typeof(*p) ___p1 = ACCESS_ONCE(*p);				\
+	typeof(*p) ___p1 = READ_ONCE(*p);				\
 	compiletime_assert_atomic_type(*p);				\
 	barrier();							\
 	___p1;								\
diff --git a/include/asm-generic/barrier.h b/include/asm-generic/barrier.h
index 55e3abc2d027..b42afada1280 100644
--- a/include/asm-generic/barrier.h
+++ b/include/asm-generic/barrier.h
@@ -108,12 +108,12 @@
 do {									\
 	compiletime_assert_atomic_type(*p);				\
 	smp_mb();							\
-	ACCESS_ONCE(*p) = (v);						\
+	WRITE_ONCE(*p, v);						\
 } while (0)
 
 #define smp_load_acquire(p)						\
 ({									\
-	typeof(*p) ___p1 = ACCESS_ONCE(*p);				\
+	typeof(*p) ___p1 = READ_ONCE(*p);				\
 	compiletime_assert_atomic_type(*p);				\
 	smp_mb();							\
 	___p1;								\
-- 
cgit v1.2.3


From 76b235c6bcb16062d663e2ee96db0b69f2e6bc14 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Fri, 24 Jul 2015 14:45:44 +0200
Subject: jump_label: Rename JUMP_LABEL_{EN,DIS}ABLE to JUMP_LABEL_{JMP,NOP}

Since we've already stepped away from ENABLE is a JMP and DISABLE is a
NOP with the branch_default bits, and are going to make it even worse,
rename it to make it all clearer.

This way we don't mix multiple levels of logic attributes, but have a
plain 'physical' name for what the current instruction patching status
of a jump label is.

This is a first step in removing the naming confusion that has led to
a stream of avoidable bugs such as:

  a833581e372a ("x86, perf: Fix static_key bug in load_mm_cr4()")

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
[ Beefed up the changelog. ]
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/arm/kernel/jump_label.c     |  2 +-
 arch/arm64/kernel/jump_label.c   |  2 +-
 arch/mips/kernel/jump_label.c    |  2 +-
 arch/powerpc/kernel/jump_label.c |  2 +-
 arch/s390/kernel/jump_label.c    |  2 +-
 arch/sparc/kernel/jump_label.c   |  2 +-
 arch/x86/kernel/jump_label.c     |  2 +-
 include/linux/jump_label.h       |  4 ++--
 kernel/jump_label.c              | 18 +++++++++---------
 9 files changed, 18 insertions(+), 18 deletions(-)

(limited to 'include')

diff --git a/arch/arm/kernel/jump_label.c b/arch/arm/kernel/jump_label.c
index e39cbf488cfe..845a5dd9c42b 100644
--- a/arch/arm/kernel/jump_label.c
+++ b/arch/arm/kernel/jump_label.c
@@ -12,7 +12,7 @@ static void __arch_jump_label_transform(struct jump_entry *entry,
 	void *addr = (void *)entry->code;
 	unsigned int insn;
 
-	if (type == JUMP_LABEL_ENABLE)
+	if (type == JUMP_LABEL_JMP)
 		insn = arm_gen_branch(entry->code, entry->target);
 	else
 		insn = arm_gen_nop();
diff --git a/arch/arm64/kernel/jump_label.c b/arch/arm64/kernel/jump_label.c
index 4f1fec7a46db..c2dd1ad3e648 100644
--- a/arch/arm64/kernel/jump_label.c
+++ b/arch/arm64/kernel/jump_label.c
@@ -28,7 +28,7 @@ void arch_jump_label_transform(struct jump_entry *entry,
 	void *addr = (void *)entry->code;
 	u32 insn;
 
-	if (type == JUMP_LABEL_ENABLE) {
+	if (type == JUMP_LABEL_JMP) {
 		insn = aarch64_insn_gen_branch_imm(entry->code,
 						   entry->target,
 						   AARCH64_INSN_BRANCH_NOLINK);
diff --git a/arch/mips/kernel/jump_label.c b/arch/mips/kernel/jump_label.c
index dda800e9e731..3e586daa3a32 100644
--- a/arch/mips/kernel/jump_label.c
+++ b/arch/mips/kernel/jump_label.c
@@ -51,7 +51,7 @@ void arch_jump_label_transform(struct jump_entry *e,
 	/* Target must have the right alignment and ISA must be preserved. */
 	BUG_ON((e->target & J_ALIGN_MASK) != J_ISA_BIT);
 
-	if (type == JUMP_LABEL_ENABLE) {
+	if (type == JUMP_LABEL_JMP) {
 		insn.j_format.opcode = J_ISA_BIT ? mm_j32_op : j_op;
 		insn.j_format.target = e->target >> J_RANGE_SHIFT;
 	} else {
diff --git a/arch/powerpc/kernel/jump_label.c b/arch/powerpc/kernel/jump_label.c
index a1ed8a8c7cb4..6472472093d0 100644
--- a/arch/powerpc/kernel/jump_label.c
+++ b/arch/powerpc/kernel/jump_label.c
@@ -17,7 +17,7 @@ void arch_jump_label_transform(struct jump_entry *entry,
 {
 	u32 *addr = (u32 *)(unsigned long)entry->code;
 
-	if (type == JUMP_LABEL_ENABLE)
+	if (type == JUMP_LABEL_JMP)
 		patch_branch(addr, entry->target, 0);
 	else
 		patch_instruction(addr, PPC_INST_NOP);
diff --git a/arch/s390/kernel/jump_label.c b/arch/s390/kernel/jump_label.c
index a90299600483..a83d2248fea9 100644
--- a/arch/s390/kernel/jump_label.c
+++ b/arch/s390/kernel/jump_label.c
@@ -64,7 +64,7 @@ static void __jump_label_transform(struct jump_entry *entry,
 {
 	struct insn old, new;
 
-	if (type == JUMP_LABEL_ENABLE) {
+	if (type == JUMP_LABEL_JMP) {
 		jump_label_make_nop(entry, &old);
 		jump_label_make_branch(entry, &new);
 	} else {
diff --git a/arch/sparc/kernel/jump_label.c b/arch/sparc/kernel/jump_label.c
index 48565c11e82a..59bbeff55024 100644
--- a/arch/sparc/kernel/jump_label.c
+++ b/arch/sparc/kernel/jump_label.c
@@ -16,7 +16,7 @@ void arch_jump_label_transform(struct jump_entry *entry,
 	u32 val;
 	u32 *insn = (u32 *) (unsigned long) entry->code;
 
-	if (type == JUMP_LABEL_ENABLE) {
+	if (type == JUMP_LABEL_JMP) {
 		s32 off = (s32)entry->target - (s32)entry->code;
 
 #ifdef CONFIG_SPARC64
diff --git a/arch/x86/kernel/jump_label.c b/arch/x86/kernel/jump_label.c
index 26d5a55a2736..e565e0e4d216 100644
--- a/arch/x86/kernel/jump_label.c
+++ b/arch/x86/kernel/jump_label.c
@@ -45,7 +45,7 @@ static void __jump_label_transform(struct jump_entry *entry,
 	const unsigned char default_nop[] = { STATIC_KEY_INIT_NOP };
 	const unsigned char *ideal_nop = ideal_nops[NOP_ATOMIC5];
 
-	if (type == JUMP_LABEL_ENABLE) {
+	if (type == JUMP_LABEL_JMP) {
 		if (init) {
 			/*
 			 * Jump label is enabled for the first time.
diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h
index f4de473f226b..6a8b4fe10ad8 100644
--- a/include/linux/jump_label.h
+++ b/include/linux/jump_label.h
@@ -86,8 +86,8 @@ struct static_key {
 #ifndef __ASSEMBLY__
 
 enum jump_label_type {
-	JUMP_LABEL_DISABLE = 0,
-	JUMP_LABEL_ENABLE,
+	JUMP_LABEL_NOP = 0,
+	JUMP_LABEL_JMP,
 };
 
 struct module;
diff --git a/kernel/jump_label.c b/kernel/jump_label.c
index 52ebaca1b9fc..96d8945c8bf3 100644
--- a/kernel/jump_label.c
+++ b/kernel/jump_label.c
@@ -65,9 +65,9 @@ void static_key_slow_inc(struct static_key *key)
 	jump_label_lock();
 	if (atomic_read(&key->enabled) == 0) {
 		if (!jump_label_get_branch_default(key))
-			jump_label_update(key, JUMP_LABEL_ENABLE);
+			jump_label_update(key, JUMP_LABEL_JMP);
 		else
-			jump_label_update(key, JUMP_LABEL_DISABLE);
+			jump_label_update(key, JUMP_LABEL_NOP);
 	}
 	atomic_inc(&key->enabled);
 	jump_label_unlock();
@@ -88,9 +88,9 @@ static void __static_key_slow_dec(struct static_key *key,
 		schedule_delayed_work(work, rate_limit);
 	} else {
 		if (!jump_label_get_branch_default(key))
-			jump_label_update(key, JUMP_LABEL_DISABLE);
+			jump_label_update(key, JUMP_LABEL_NOP);
 		else
-			jump_label_update(key, JUMP_LABEL_ENABLE);
+			jump_label_update(key, JUMP_LABEL_JMP);
 	}
 	jump_label_unlock();
 }
@@ -184,9 +184,9 @@ static enum jump_label_type jump_label_type(struct static_key *key)
 	bool state = static_key_enabled(key);
 
 	if ((!true_branch && state) || (true_branch && !state))
-		return JUMP_LABEL_ENABLE;
+		return JUMP_LABEL_JMP;
 
-	return JUMP_LABEL_DISABLE;
+	return JUMP_LABEL_NOP;
 }
 
 void __init jump_label_init(void)
@@ -276,7 +276,7 @@ void jump_label_apply_nops(struct module *mod)
 		return;
 
 	for (iter = iter_start; iter < iter_stop; iter++) {
-		arch_jump_label_transform_static(iter, JUMP_LABEL_DISABLE);
+		arch_jump_label_transform_static(iter, JUMP_LABEL_NOP);
 	}
 }
 
@@ -318,8 +318,8 @@ static int jump_label_add_module(struct module *mod)
 		jlm->next = key->next;
 		key->next = jlm;
 
-		if (jump_label_type(key) == JUMP_LABEL_ENABLE)
-			__jump_label_update(key, iter, iter_stop, JUMP_LABEL_ENABLE);
+		if (jump_label_type(key) == JUMP_LABEL_JMP)
+			__jump_label_update(key, iter, iter_stop, JUMP_LABEL_JMP);
 	}
 
 	return 0;
-- 
cgit v1.2.3


From a1efb01feca597b2abbc89873b40ef8ec6690168 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Fri, 24 Jul 2015 14:55:40 +0200
Subject: jump_label, locking/static_keys: Rename JUMP_LABEL_TYPE_* and related
 helpers to the static_key* pattern

Rename the JUMP_LABEL_TYPE_* macros to be JUMP_TYPE_* and move the
inline helpers into kernel/jump_label.c, since that's the only place
they're ever used.

Also rename the helpers where it's all about static keys.

This is the second step in removing the naming confusion that has led to
a stream of avoidable bugs such as:

  a833581e372a ("x86, perf: Fix static_key bug in load_mm_cr4()")

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/jump_label.h | 25 +++++--------------------
 kernel/jump_label.c        | 25 ++++++++++++++++---------
 2 files changed, 21 insertions(+), 29 deletions(-)

(limited to 'include')

diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h
index 6a8b4fe10ad8..0ddb208b8449 100644
--- a/include/linux/jump_label.h
+++ b/include/linux/jump_label.h
@@ -101,24 +101,9 @@ static inline int static_key_count(struct static_key *key)
 
 #ifdef HAVE_JUMP_LABEL
 
-#define JUMP_LABEL_TYPE_FALSE_BRANCH	0UL
-#define JUMP_LABEL_TYPE_TRUE_BRANCH	1UL
-#define JUMP_LABEL_TYPE_MASK		1UL
-
-static
-inline struct jump_entry *jump_label_get_entries(struct static_key *key)
-{
-	return (struct jump_entry *)((unsigned long)key->entries
-						& ~JUMP_LABEL_TYPE_MASK);
-}
-
-static inline bool jump_label_get_branch_default(struct static_key *key)
-{
-	if (((unsigned long)key->entries & JUMP_LABEL_TYPE_MASK) ==
-	    JUMP_LABEL_TYPE_TRUE_BRANCH)
-		return true;
-	return false;
-}
+#define JUMP_TYPE_FALSE	0UL
+#define JUMP_TYPE_TRUE	1UL
+#define JUMP_TYPE_MASK	1UL
 
 static __always_inline bool static_key_false(struct static_key *key)
 {
@@ -147,10 +132,10 @@ extern void jump_label_apply_nops(struct module *mod);
 
 #define STATIC_KEY_INIT_TRUE ((struct static_key)		\
 	{ .enabled = ATOMIC_INIT(1),				\
-	  .entries = (void *)JUMP_LABEL_TYPE_TRUE_BRANCH })
+	  .entries = (void *)JUMP_TYPE_TRUE })
 #define STATIC_KEY_INIT_FALSE ((struct static_key)		\
 	{ .enabled = ATOMIC_INIT(0),				\
-	  .entries = (void *)JUMP_LABEL_TYPE_FALSE_BRANCH })
+	  .entries = (void *)JUMP_TYPE_FALSE })
 
 #else  /* !HAVE_JUMP_LABEL */
 
diff --git a/kernel/jump_label.c b/kernel/jump_label.c
index 96d8945c8bf3..85a2a0086c67 100644
--- a/kernel/jump_label.c
+++ b/kernel/jump_label.c
@@ -56,6 +56,11 @@ jump_label_sort_entries(struct jump_entry *start, struct jump_entry *stop)
 
 static void jump_label_update(struct static_key *key, int enable);
 
+static inline bool static_key_type(struct static_key *key)
+{
+	return (unsigned long)key->entries & JUMP_TYPE_MASK;
+}
+
 void static_key_slow_inc(struct static_key *key)
 {
 	STATIC_KEY_CHECK_USE();
@@ -64,7 +69,7 @@ void static_key_slow_inc(struct static_key *key)
 
 	jump_label_lock();
 	if (atomic_read(&key->enabled) == 0) {
-		if (!jump_label_get_branch_default(key))
+		if (!static_key_type(key))
 			jump_label_update(key, JUMP_LABEL_JMP);
 		else
 			jump_label_update(key, JUMP_LABEL_NOP);
@@ -87,7 +92,7 @@ static void __static_key_slow_dec(struct static_key *key,
 		atomic_inc(&key->enabled);
 		schedule_delayed_work(work, rate_limit);
 	} else {
-		if (!jump_label_get_branch_default(key))
+		if (!static_key_type(key))
 			jump_label_update(key, JUMP_LABEL_NOP);
 		else
 			jump_label_update(key, JUMP_LABEL_JMP);
@@ -178,15 +183,17 @@ static void __jump_label_update(struct static_key *key,
 	}
 }
 
-static enum jump_label_type jump_label_type(struct static_key *key)
+static inline struct jump_entry *static_key_entries(struct static_key *key)
 {
-	bool true_branch = jump_label_get_branch_default(key);
-	bool state = static_key_enabled(key);
+	return (struct jump_entry *)((unsigned long)key->entries & ~JUMP_TYPE_MASK);
+}
 
-	if ((!true_branch && state) || (true_branch && !state))
-		return JUMP_LABEL_JMP;
+static enum jump_label_type jump_label_type(struct static_key *key)
+{
+	bool enabled = static_key_enabled(key);
+	bool type = static_key_type(key);
 
-	return JUMP_LABEL_NOP;
+	return enabled ^ type;
 }
 
 void __init jump_label_init(void)
@@ -442,7 +449,7 @@ int jump_label_text_reserved(void *start, void *end)
 static void jump_label_update(struct static_key *key, int enable)
 {
 	struct jump_entry *stop = __stop___jump_table;
-	struct jump_entry *entry = jump_label_get_entries(key);
+	struct jump_entry *entry = static_key_entries(key);
 #ifdef CONFIG_MODULES
 	struct module *mod;
 
-- 
cgit v1.2.3


From e33886b38cc82a9fc3b2d655dfc7f50467594138 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Fri, 24 Jul 2015 15:03:40 +0200
Subject: locking/static_keys: Add static_key_{en,dis}able() helpers

Add two helpers to make it easier to treat the refcount as boolean.

Suggested-by: Jason Baron <jasonbaron0@gmail.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/jump_label.h | 20 ++++++++++++++++++++
 kernel/sched/core.c        |  6 ++----
 2 files changed, 22 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h
index 0ddb208b8449..65f0ebac63cf 100644
--- a/include/linux/jump_label.h
+++ b/include/linux/jump_label.h
@@ -198,6 +198,26 @@ static inline bool static_key_enabled(struct static_key *key)
 	return static_key_count(key) > 0;
 }
 
+static inline void static_key_enable(struct static_key *key)
+{
+	int count = static_key_count(key);
+
+	WARN_ON_ONCE(count < 0 || count > 1);
+
+	if (!count)
+		static_key_slow_inc(key);
+}
+
+static inline void static_key_disable(struct static_key *key)
+{
+	int count = static_key_count(key);
+
+	WARN_ON_ONCE(count < 0 || count > 1);
+
+	if (count)
+		static_key_slow_dec(key);
+}
+
 #endif	/* _LINUX_JUMP_LABEL_H */
 
 #endif /* __ASSEMBLY__ */
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 78b4bad10081..66ae8baf42fe 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -164,14 +164,12 @@ struct static_key sched_feat_keys[__SCHED_FEAT_NR] = {
 
 static void sched_feat_disable(int i)
 {
-	if (static_key_enabled(&sched_feat_keys[i]))
-		static_key_slow_dec(&sched_feat_keys[i]);
+	static_key_disable(&sched_feat_keys[i]);
 }
 
 static void sched_feat_enable(int i)
 {
-	if (!static_key_enabled(&sched_feat_keys[i]))
-		static_key_slow_inc(&sched_feat_keys[i]);
+	static_key_enable(&sched_feat_keys[i]);
 }
 #else
 static void sched_feat_disable(int i) { };
-- 
cgit v1.2.3


From 11276d5306b8e5b438a36bbff855fe792d7eaa61 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Fri, 24 Jul 2015 15:09:55 +0200
Subject: locking/static_keys: Add a new static_key interface

There are various problems and short-comings with the current
static_key interface:

 - static_key_{true,false}() read like a branch depending on the key
   value, instead of the actual likely/unlikely branch depending on
   init value.

 - static_key_{true,false}() are, as stated above, tied to the
   static_key init values STATIC_KEY_INIT_{TRUE,FALSE}.

 - we're limited to the 2 (out of 4) possible options that compile to
   a default NOP because that's what our arch_static_branch() assembly
   emits.

So provide a new static_key interface:

  DEFINE_STATIC_KEY_TRUE(name);
  DEFINE_STATIC_KEY_FALSE(name);

Which define a key of different types with an initial true/false
value.

Then allow:

   static_branch_likely()
   static_branch_unlikely()

to take a key of either type and emit the right instruction for the
case.

This means adding a second arch_static_branch_jump() assembly helper
which emits a JMP per default.

In order to determine the right instruction for the right state,
encode the branch type in the LSB of jump_entry::key.

This is the final step in removing the naming confusion that has led to
a stream of avoidable bugs such as:

  a833581e372a ("x86, perf: Fix static_key bug in load_mm_cr4()")

... but it also allows new static key combinations that will give us
performance enhancements in the subsequent patches.

Tested-by: Rabin Vincent <rabin@rab.in> # arm
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Michael Ellerman <mpe@ellerman.id.au> # ppc
Acked-by: Heiko Carstens <heiko.carstens@de.ibm.com> # s390
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/arm/include/asm/jump_label.h     |  25 ++++--
 arch/arm64/include/asm/jump_label.h   |  18 +++-
 arch/mips/include/asm/jump_label.h    |  19 ++++-
 arch/powerpc/include/asm/jump_label.h |  19 ++++-
 arch/s390/include/asm/jump_label.h    |  19 ++++-
 arch/sparc/include/asm/jump_label.h   |  35 ++++++--
 arch/x86/include/asm/jump_label.h     |  21 ++++-
 include/linux/jump_label.h            | 149 +++++++++++++++++++++++++++++++---
 kernel/jump_label.c                   |  37 +++++++--
 9 files changed, 298 insertions(+), 44 deletions(-)

(limited to 'include')

diff --git a/arch/arm/include/asm/jump_label.h b/arch/arm/include/asm/jump_label.h
index 5f337dc5c108..34f7b6980d21 100644
--- a/arch/arm/include/asm/jump_label.h
+++ b/arch/arm/include/asm/jump_label.h
@@ -4,23 +4,32 @@
 #ifndef __ASSEMBLY__
 
 #include <linux/types.h>
+#include <asm/unified.h>
 
 #define JUMP_LABEL_NOP_SIZE 4
 
-#ifdef CONFIG_THUMB2_KERNEL
-#define JUMP_LABEL_NOP	"nop.w"
-#else
-#define JUMP_LABEL_NOP	"nop"
-#endif
+static __always_inline bool arch_static_branch(struct static_key *key, bool branch)
+{
+	asm_volatile_goto("1:\n\t"
+		 WASM(nop) "\n\t"
+		 ".pushsection __jump_table,  \"aw\"\n\t"
+		 ".word 1b, %l[l_yes], %c0\n\t"
+		 ".popsection\n\t"
+		 : :  "i" (&((char *)key)[branch]) :  : l_yes);
+
+	return false;
+l_yes:
+	return true;
+}
 
-static __always_inline bool arch_static_branch(struct static_key *key)
+static __always_inline bool arch_static_branch_jump(struct static_key *key, bool branch)
 {
 	asm_volatile_goto("1:\n\t"
-		 JUMP_LABEL_NOP "\n\t"
+		 WASM(b) " %l[l_yes]\n\t"
 		 ".pushsection __jump_table,  \"aw\"\n\t"
 		 ".word 1b, %l[l_yes], %c0\n\t"
 		 ".popsection\n\t"
-		 : :  "i" (key) :  : l_yes);
+		 : :  "i" (&((char *)key)[branch]) :  : l_yes);
 
 	return false;
 l_yes:
diff --git a/arch/arm64/include/asm/jump_label.h b/arch/arm64/include/asm/jump_label.h
index c0e5165c2f76..1b5e0e843c3a 100644
--- a/arch/arm64/include/asm/jump_label.h
+++ b/arch/arm64/include/asm/jump_label.h
@@ -26,14 +26,28 @@
 
 #define JUMP_LABEL_NOP_SIZE		AARCH64_INSN_SIZE
 
-static __always_inline bool arch_static_branch(struct static_key *key)
+static __always_inline bool arch_static_branch(struct static_key *key, bool branch)
 {
 	asm goto("1: nop\n\t"
 		 ".pushsection __jump_table,  \"aw\"\n\t"
 		 ".align 3\n\t"
 		 ".quad 1b, %l[l_yes], %c0\n\t"
 		 ".popsection\n\t"
-		 :  :  "i"(key) :  : l_yes);
+		 :  :  "i"(&((char *)key)[branch]) :  : l_yes);
+
+	return false;
+l_yes:
+	return true;
+}
+
+static __always_inline bool arch_static_branch_jump(struct static_key *key, bool branch)
+{
+	asm goto("1: b %l[l_yes]\n\t"
+		 ".pushsection __jump_table,  \"aw\"\n\t"
+		 ".align 3\n\t"
+		 ".quad 1b, %l[l_yes], %c0\n\t"
+		 ".popsection\n\t"
+		 :  :  "i"(&((char *)key)[branch]) :  : l_yes);
 
 	return false;
 l_yes:
diff --git a/arch/mips/include/asm/jump_label.h b/arch/mips/include/asm/jump_label.h
index 608aa57799c8..e77672539e8e 100644
--- a/arch/mips/include/asm/jump_label.h
+++ b/arch/mips/include/asm/jump_label.h
@@ -26,14 +26,29 @@
 #define NOP_INSN "nop"
 #endif
 
-static __always_inline bool arch_static_branch(struct static_key *key)
+static __always_inline bool arch_static_branch(struct static_key *key, bool branch)
 {
 	asm_volatile_goto("1:\t" NOP_INSN "\n\t"
 		"nop\n\t"
 		".pushsection __jump_table,  \"aw\"\n\t"
 		WORD_INSN " 1b, %l[l_yes], %0\n\t"
 		".popsection\n\t"
-		: :  "i" (key) : : l_yes);
+		: :  "i" (&((char *)key)[branch]) : : l_yes);
+
+	return false;
+l_yes:
+	return true;
+}
+
+static __always_inline bool arch_static_branch_jump(struct static_key *key, bool branch)
+{
+	asm_volatile_goto("1:\tj %l[l_yes]\n\t"
+		"nop\n\t"
+		".pushsection __jump_table,  \"aw\"\n\t"
+		WORD_INSN " 1b, %l[l_yes], %0\n\t"
+		".popsection\n\t"
+		: :  "i" (&((char *)key)[branch]) : : l_yes);
+
 	return false;
 l_yes:
 	return true;
diff --git a/arch/powerpc/include/asm/jump_label.h b/arch/powerpc/include/asm/jump_label.h
index efbf9a322a23..47e155f15433 100644
--- a/arch/powerpc/include/asm/jump_label.h
+++ b/arch/powerpc/include/asm/jump_label.h
@@ -18,14 +18,29 @@
 #define JUMP_ENTRY_TYPE		stringify_in_c(FTR_ENTRY_LONG)
 #define JUMP_LABEL_NOP_SIZE	4
 
-static __always_inline bool arch_static_branch(struct static_key *key)
+static __always_inline bool arch_static_branch(struct static_key *key, bool branch)
 {
 	asm_volatile_goto("1:\n\t"
 		 "nop\n\t"
 		 ".pushsection __jump_table,  \"aw\"\n\t"
 		 JUMP_ENTRY_TYPE "1b, %l[l_yes], %c0\n\t"
 		 ".popsection \n\t"
-		 : :  "i" (key) : : l_yes);
+		 : :  "i" (&((char *)key)[branch]) : : l_yes);
+
+	return false;
+l_yes:
+	return true;
+}
+
+static __always_inline bool arch_static_branch_jump(struct static_key *key, bool branch)
+{
+	asm_volatile_goto("1:\n\t"
+		 "b %l[l_yes]\n\t"
+		 ".pushsection __jump_table,  \"aw\"\n\t"
+		 JUMP_ENTRY_TYPE "1b, %l[l_yes], %c0\n\t"
+		 ".popsection \n\t"
+		 : :  "i" (&((char *)key)[branch]) : : l_yes);
+
 	return false;
 l_yes:
 	return true;
diff --git a/arch/s390/include/asm/jump_label.h b/arch/s390/include/asm/jump_label.h
index 69972b7957ee..7f9fd5e3f1bf 100644
--- a/arch/s390/include/asm/jump_label.h
+++ b/arch/s390/include/asm/jump_label.h
@@ -12,14 +12,29 @@
  * We use a brcl 0,2 instruction for jump labels at compile time so it
  * can be easily distinguished from a hotpatch generated instruction.
  */
-static __always_inline bool arch_static_branch(struct static_key *key)
+static __always_inline bool arch_static_branch(struct static_key *key, bool branch)
 {
 	asm_volatile_goto("0:	brcl 0,"__stringify(JUMP_LABEL_NOP_OFFSET)"\n"
 		".pushsection __jump_table, \"aw\"\n"
 		".balign 8\n"
 		".quad 0b, %l[label], %0\n"
 		".popsection\n"
-		: : "X" (key) : : label);
+		: : "X" (&((char *)key)[branch]) : : label);
+
+	return false;
+label:
+	return true;
+}
+
+static __always_inline bool arch_static_branch_jump(struct static_key *key, bool branch)
+{
+	asm_volatile_goto("0:	brcl 15, %l[label]\n"
+		".pushsection __jump_table, \"aw\"\n"
+		".balign 8\n"
+		".quad 0b, %l[label], %0\n"
+		".popsection\n"
+		: : "X" (&((char *)key)[branch]) : : label);
+
 	return false;
 label:
 	return true;
diff --git a/arch/sparc/include/asm/jump_label.h b/arch/sparc/include/asm/jump_label.h
index cc9b04a2b11b..62d0354d1727 100644
--- a/arch/sparc/include/asm/jump_label.h
+++ b/arch/sparc/include/asm/jump_label.h
@@ -7,16 +7,33 @@
 
 #define JUMP_LABEL_NOP_SIZE 4
 
-static __always_inline bool arch_static_branch(struct static_key *key)
+static __always_inline bool arch_static_branch(struct static_key *key, bool branch)
 {
-		asm_volatile_goto("1:\n\t"
-			 "nop\n\t"
-			 "nop\n\t"
-			 ".pushsection __jump_table,  \"aw\"\n\t"
-			 ".align 4\n\t"
-			 ".word 1b, %l[l_yes], %c0\n\t"
-			 ".popsection \n\t"
-			 : :  "i" (key) : : l_yes);
+	asm_volatile_goto("1:\n\t"
+		 "nop\n\t"
+		 "nop\n\t"
+		 ".pushsection __jump_table,  \"aw\"\n\t"
+		 ".align 4\n\t"
+		 ".word 1b, %l[l_yes], %c0\n\t"
+		 ".popsection \n\t"
+		 : :  "i" (&((char *)key)[branch]) : : l_yes);
+
+	return false;
+l_yes:
+	return true;
+}
+
+static __always_inline bool arch_static_branch_jump(struct static_key *key, bool branch)
+{
+	asm_volatile_goto("1:\n\t"
+		 "b %l[l_yes]\n\t"
+		 "nop\n\t"
+		 ".pushsection __jump_table,  \"aw\"\n\t"
+		 ".align 4\n\t"
+		 ".word 1b, %l[l_yes], %c0\n\t"
+		 ".popsection \n\t"
+		 : :  "i" (&((char *)key)[branch]) : : l_yes);
+
 	return false;
 l_yes:
 	return true;
diff --git a/arch/x86/include/asm/jump_label.h b/arch/x86/include/asm/jump_label.h
index a4c1cf7e93f8..28d7a857f9d1 100644
--- a/arch/x86/include/asm/jump_label.h
+++ b/arch/x86/include/asm/jump_label.h
@@ -16,7 +16,7 @@
 # define STATIC_KEY_INIT_NOP GENERIC_NOP5_ATOMIC
 #endif
 
-static __always_inline bool arch_static_branch(struct static_key *key)
+static __always_inline bool arch_static_branch(struct static_key *key, bool branch)
 {
 	asm_volatile_goto("1:"
 		".byte " __stringify(STATIC_KEY_INIT_NOP) "\n\t"
@@ -24,7 +24,24 @@ static __always_inline bool arch_static_branch(struct static_key *key)
 		_ASM_ALIGN "\n\t"
 		_ASM_PTR "1b, %l[l_yes], %c0 \n\t"
 		".popsection \n\t"
-		: :  "i" (key) : : l_yes);
+		: :  "i" (&((char *)key)[branch]) : : l_yes);
+
+	return false;
+l_yes:
+	return true;
+}
+
+static __always_inline bool arch_static_branch_jump(struct static_key *key, bool branch)
+{
+	asm_volatile_goto("1:"
+		".byte 0xe9\n\t .long %l[l_yes] - 2f\n\t"
+		"2:\n\t"
+		".pushsection __jump_table,  \"aw\" \n\t"
+		_ASM_ALIGN "\n\t"
+		_ASM_PTR "1b, %l[l_yes], %c0 \n\t"
+		".popsection \n\t"
+		: :  "i" (&((char *)key)[branch]) : : l_yes);
+
 	return false;
 l_yes:
 	return true;
diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h
index 65f0ebac63cf..e337a1961933 100644
--- a/include/linux/jump_label.h
+++ b/include/linux/jump_label.h
@@ -107,12 +107,12 @@ static inline int static_key_count(struct static_key *key)
 
 static __always_inline bool static_key_false(struct static_key *key)
 {
-	return arch_static_branch(key);
+	return arch_static_branch(key, false);
 }
 
 static __always_inline bool static_key_true(struct static_key *key)
 {
-	return !static_key_false(key);
+	return !arch_static_branch(key, true);
 }
 
 extern struct jump_entry __start___jump_table[];
@@ -130,12 +130,12 @@ extern void static_key_slow_inc(struct static_key *key);
 extern void static_key_slow_dec(struct static_key *key);
 extern void jump_label_apply_nops(struct module *mod);
 
-#define STATIC_KEY_INIT_TRUE ((struct static_key)		\
+#define STATIC_KEY_INIT_TRUE					\
 	{ .enabled = ATOMIC_INIT(1),				\
-	  .entries = (void *)JUMP_TYPE_TRUE })
-#define STATIC_KEY_INIT_FALSE ((struct static_key)		\
+	  .entries = (void *)JUMP_TYPE_TRUE }
+#define STATIC_KEY_INIT_FALSE					\
 	{ .enabled = ATOMIC_INIT(0),				\
-	  .entries = (void *)JUMP_TYPE_FALSE })
+	  .entries = (void *)JUMP_TYPE_FALSE }
 
 #else  /* !HAVE_JUMP_LABEL */
 
@@ -183,10 +183,8 @@ static inline int jump_label_apply_nops(struct module *mod)
 	return 0;
 }
 
-#define STATIC_KEY_INIT_TRUE ((struct static_key) \
-		{ .enabled = ATOMIC_INIT(1) })
-#define STATIC_KEY_INIT_FALSE ((struct static_key) \
-		{ .enabled = ATOMIC_INIT(0) })
+#define STATIC_KEY_INIT_TRUE	{ .enabled = ATOMIC_INIT(1) }
+#define STATIC_KEY_INIT_FALSE	{ .enabled = ATOMIC_INIT(0) }
 
 #endif	/* HAVE_JUMP_LABEL */
 
@@ -218,6 +216,137 @@ static inline void static_key_disable(struct static_key *key)
 		static_key_slow_dec(key);
 }
 
+/* -------------------------------------------------------------------------- */
+
+/*
+ * Two type wrappers around static_key, such that we can use compile time
+ * type differentiation to emit the right code.
+ *
+ * All the below code is macros in order to play type games.
+ */
+
+struct static_key_true {
+	struct static_key key;
+};
+
+struct static_key_false {
+	struct static_key key;
+};
+
+#define STATIC_KEY_TRUE_INIT  (struct static_key_true) { .key = STATIC_KEY_INIT_TRUE,  }
+#define STATIC_KEY_FALSE_INIT (struct static_key_false){ .key = STATIC_KEY_INIT_FALSE, }
+
+#define DEFINE_STATIC_KEY_TRUE(name)	\
+	struct static_key_true name = STATIC_KEY_TRUE_INIT
+
+#define DEFINE_STATIC_KEY_FALSE(name)	\
+	struct static_key_false name = STATIC_KEY_FALSE_INIT
+
+#ifdef HAVE_JUMP_LABEL
+
+/*
+ * Combine the right initial value (type) with the right branch order
+ * to generate the desired result.
+ *
+ *
+ * type\branch|	likely (1)	      |	unlikely (0)
+ * -----------+-----------------------+------------------
+ *            |                       |
+ *  true (1)  |	   ...		      |	   ...
+ *            |    NOP		      |	   JMP L
+ *            |    <br-stmts>	      |	1: ...
+ *            |	L: ...		      |
+ *            |			      |
+ *            |			      |	L: <br-stmts>
+ *            |			      |	   jmp 1b
+ *            |                       |
+ * -----------+-----------------------+------------------
+ *            |                       |
+ *  false (0) |	   ...		      |	   ...
+ *            |    JMP L	      |	   NOP
+ *            |    <br-stmts>	      |	1: ...
+ *            |	L: ...		      |
+ *            |			      |
+ *            |			      |	L: <br-stmts>
+ *            |			      |	   jmp 1b
+ *            |                       |
+ * -----------+-----------------------+------------------
+ *
+ * The initial value is encoded in the LSB of static_key::entries,
+ * type: 0 = false, 1 = true.
+ *
+ * The branch type is encoded in the LSB of jump_entry::key,
+ * branch: 0 = unlikely, 1 = likely.
+ *
+ * This gives the following logic table:
+ *
+ *	enabled	type	branch	  instuction
+ * -----------------------------+-----------
+ *	0	0	0	| NOP
+ *	0	0	1	| JMP
+ *	0	1	0	| NOP
+ *	0	1	1	| JMP
+ *
+ *	1	0	0	| JMP
+ *	1	0	1	| NOP
+ *	1	1	0	| JMP
+ *	1	1	1	| NOP
+ *
+ * Which gives the following functions:
+ *
+ *   dynamic: instruction = enabled ^ branch
+ *   static:  instruction = type ^ branch
+ *
+ * See jump_label_type() / jump_label_init_type().
+ */
+
+extern bool ____wrong_branch_error(void);
+
+#define static_branch_likely(x)							\
+({										\
+	bool branch;								\
+	if (__builtin_types_compatible_p(typeof(*x), struct static_key_true))	\
+		branch = !arch_static_branch(&(x)->key, true);			\
+	else if (__builtin_types_compatible_p(typeof(*x), struct static_key_false)) \
+		branch = !arch_static_branch_jump(&(x)->key, true);		\
+	else									\
+		branch = ____wrong_branch_error();				\
+	branch;									\
+})
+
+#define static_branch_unlikely(x)						\
+({										\
+	bool branch;								\
+	if (__builtin_types_compatible_p(typeof(*x), struct static_key_true))	\
+		branch = arch_static_branch_jump(&(x)->key, false);		\
+	else if (__builtin_types_compatible_p(typeof(*x), struct static_key_false)) \
+		branch = arch_static_branch(&(x)->key, false);			\
+	else									\
+		branch = ____wrong_branch_error();				\
+	branch;									\
+})
+
+#else /* !HAVE_JUMP_LABEL */
+
+#define static_branch_likely(x)		likely(static_key_enabled(&(x)->key))
+#define static_branch_unlikely(x)	unlikely(static_key_enabled(&(x)->key))
+
+#endif /* HAVE_JUMP_LABEL */
+
+/*
+ * Advanced usage; refcount, branch is enabled when: count != 0
+ */
+
+#define static_branch_inc(x)		static_key_slow_inc(&(x)->key)
+#define static_branch_dec(x)		static_key_slow_dec(&(x)->key)
+
+/*
+ * Normal usage; boolean enable/disable.
+ */
+
+#define static_branch_enable(x)		static_key_enable(&(x)->key)
+#define static_branch_disable(x)	static_key_disable(&(x)->key)
+
 #endif	/* _LINUX_JUMP_LABEL_H */
 
 #endif /* __ASSEMBLY__ */
diff --git a/kernel/jump_label.c b/kernel/jump_label.c
index 2e7cc1e4b4b5..8fd00d892286 100644
--- a/kernel/jump_label.c
+++ b/kernel/jump_label.c
@@ -165,16 +165,22 @@ static inline bool static_key_type(struct static_key *key)
 
 static inline struct static_key *jump_entry_key(struct jump_entry *entry)
 {
-	return (struct static_key *)((unsigned long)entry->key);
+	return (struct static_key *)((unsigned long)entry->key & ~1UL);
+}
+
+static bool jump_entry_branch(struct jump_entry *entry)
+{
+	return (unsigned long)entry->key & 1UL;
 }
 
 static enum jump_label_type jump_label_type(struct jump_entry *entry)
 {
 	struct static_key *key = jump_entry_key(entry);
 	bool enabled = static_key_enabled(key);
-	bool type = static_key_type(key);
+	bool branch = jump_entry_branch(entry);
 
-	return enabled ^ type;
+	/* See the comment in linux/jump_label.h */
+	return enabled ^ branch;
 }
 
 static void __jump_label_update(struct static_key *key,
@@ -205,7 +211,10 @@ void __init jump_label_init(void)
 	for (iter = iter_start; iter < iter_stop; iter++) {
 		struct static_key *iterk;
 
-		arch_jump_label_transform_static(iter, jump_label_type(iter));
+		/* rewrite NOPs */
+		if (jump_label_type(iter) == JUMP_LABEL_NOP)
+			arch_jump_label_transform_static(iter, JUMP_LABEL_NOP);
+
 		iterk = jump_entry_key(iter);
 		if (iterk == key)
 			continue;
@@ -225,6 +234,16 @@ void __init jump_label_init(void)
 
 #ifdef CONFIG_MODULES
 
+static enum jump_label_type jump_label_init_type(struct jump_entry *entry)
+{
+	struct static_key *key = jump_entry_key(entry);
+	bool type = static_key_type(key);
+	bool branch = jump_entry_branch(entry);
+
+	/* See the comment in linux/jump_label.h */
+	return type ^ branch;
+}
+
 struct static_key_mod {
 	struct static_key_mod *next;
 	struct jump_entry *entries;
@@ -276,8 +295,11 @@ void jump_label_apply_nops(struct module *mod)
 	if (iter_start == iter_stop)
 		return;
 
-	for (iter = iter_start; iter < iter_stop; iter++)
-		arch_jump_label_transform_static(iter, JUMP_LABEL_NOP);
+	for (iter = iter_start; iter < iter_stop; iter++) {
+		/* Only write NOPs for arch_branch_static(). */
+		if (jump_label_init_type(iter) == JUMP_LABEL_NOP)
+			arch_jump_label_transform_static(iter, JUMP_LABEL_NOP);
+	}
 }
 
 static int jump_label_add_module(struct module *mod)
@@ -318,7 +340,8 @@ static int jump_label_add_module(struct module *mod)
 		jlm->next = key->next;
 		key->next = jlm;
 
-		if (jump_label_type(iter) == JUMP_LABEL_JMP)
+		/* Only update if we've changed from our initial state */
+		if (jump_label_type(iter) != jump_label_init_type(iter))
 			__jump_label_update(key, iter, iter_stop);
 	}
 
-- 
cgit v1.2.3


From 412758cb26704e5087ca2976ec3b28fb2bdbfad4 Mon Sep 17 00:00:00 2001
From: Jason Baron <jbaron@akamai.com>
Date: Thu, 30 Jul 2015 03:59:48 +0000
Subject: jump label, locking/static_keys: Update docs

Signed-off-by: Jason Baron <jbaron@akamai.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: benh@kernel.crashing.org
Cc: bp@alien8.de
Cc: davem@davemloft.net
Cc: ddaney@caviumnetworks.com
Cc: heiko.carstens@de.ibm.com
Cc: linux-kernel@vger.kernel.org
Cc: liuj97@gmail.com
Cc: luto@amacapital.net
Cc: michael@ellerman.id.au
Cc: rabin@rab.in
Cc: ralf@linux-mips.org
Cc: rostedt@goodmis.org
Cc: vbabka@suse.cz
Cc: will.deacon@arm.com
Link: http://lkml.kernel.org/r/6b50f2f6423a2244f37f4b1d2d6c211b9dcdf4f8.1438227999.git.jbaron@akamai.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 Documentation/static-keys.txt | 99 +++++++++++++++++++++++--------------------
 include/linux/jump_label.h    | 67 ++++++++++++++++++++---------
 2 files changed, 98 insertions(+), 68 deletions(-)

(limited to 'include')

diff --git a/Documentation/static-keys.txt b/Documentation/static-keys.txt
index c4407a41b0fc..f4cb0b2d5cd7 100644
--- a/Documentation/static-keys.txt
+++ b/Documentation/static-keys.txt
@@ -1,7 +1,22 @@
 			Static Keys
 			-----------
 
-By: Jason Baron <jbaron@redhat.com>
+DEPRECATED API:
+
+The use of 'struct static_key' directly, is now DEPRECATED. In addition
+static_key_{true,false}() is also DEPRECATED. IE DO NOT use the following:
+
+struct static_key false = STATIC_KEY_INIT_FALSE;
+struct static_key true = STATIC_KEY_INIT_TRUE;
+static_key_true()
+static_key_false()
+
+The updated API replacements are:
+
+DEFINE_STATIC_KEY_TRUE(key);
+DEFINE_STATIC_KEY_FALSE(key);
+static_key_likely()
+statick_key_unlikely()
 
 0) Abstract
 
@@ -9,22 +24,22 @@ Static keys allows the inclusion of seldom used features in
 performance-sensitive fast-path kernel code, via a GCC feature and a code
 patching technique. A quick example:
 
-	struct static_key key = STATIC_KEY_INIT_FALSE;
+	DEFINE_STATIC_KEY_FALSE(key);
 
 	...
 
-        if (static_key_false(&key))
+        if (static_branch_unlikely(&key))
                 do unlikely code
         else
                 do likely code
 
 	...
-	static_key_slow_inc();
+	static_branch_enable(&key);
 	...
-	static_key_slow_inc();
+	static_branch_disable(&key);
 	...
 
-The static_key_false() branch will be generated into the code with as little
+The static_branch_unlikely() branch will be generated into the code with as little
 impact to the likely code path as possible.
 
 
@@ -56,7 +71,7 @@ the branch site to change the branch direction.
 
 For example, if we have a simple branch that is disabled by default:
 
-	if (static_key_false(&key))
+	if (static_branch_unlikely(&key))
 		printk("I am the true branch\n");
 
 Thus, by default the 'printk' will not be emitted. And the code generated will
@@ -75,68 +90,55 @@ the basis for the static keys facility.
 
 In order to make use of this optimization you must first define a key:
 
-	struct static_key key;
-
-Which is initialized as:
-
-	struct static_key key = STATIC_KEY_INIT_TRUE;
+	DEFINE_STATIC_KEY_TRUE(key);
 
 or:
 
-	struct static_key key = STATIC_KEY_INIT_FALSE;
+	DEFINE_STATIC_KEY_FALSE(key);
+
 
-If the key is not initialized, it is default false. The 'struct static_key',
-must be a 'global'. That is, it can't be allocated on the stack or dynamically
+The key must be global, that is, it can't be allocated on the stack or dynamically
 allocated at run-time.
 
 The key is then used in code as:
 
-        if (static_key_false(&key))
+        if (static_branch_unlikely(&key))
                 do unlikely code
         else
                 do likely code
 
 Or:
 
-        if (static_key_true(&key))
+        if (static_branch_likely(&key))
                 do likely code
         else
                 do unlikely code
 
-A key that is initialized via 'STATIC_KEY_INIT_FALSE', must be used in a
-'static_key_false()' construct. Likewise, a key initialized via
-'STATIC_KEY_INIT_TRUE' must be used in a 'static_key_true()' construct. A
-single key can be used in many branches, but all the branches must match the
-way that the key has been initialized.
+Keys defined via DEFINE_STATIC_KEY_TRUE(), or DEFINE_STATIC_KEY_FALSE, may
+be used in either static_branch_likely() or static_branch_unlikely()
+statemnts.
 
-The branch(es) can then be switched via:
+Branch(es) can be set true via:
 
-	static_key_slow_inc(&key);
-	...
-	static_key_slow_dec(&key);
+static_branch_enable(&key);
 
-Thus, 'static_key_slow_inc()' means 'make the branch true', and
-'static_key_slow_dec()' means 'make the branch false' with appropriate
-reference counting. For example, if the key is initialized true, a
-static_key_slow_dec(), will switch the branch to false. And a subsequent
-static_key_slow_inc(), will change the branch back to true. Likewise, if the
-key is initialized false, a 'static_key_slow_inc()', will change the branch to
-true. And then a 'static_key_slow_dec()', will again make the branch false.
+or false via:
+
+static_branch_disable(&key);
 
-An example usage in the kernel is the implementation of tracepoints:
+The branch(es) can then be switched via reference counts:
 
-        static inline void trace_##name(proto)                          \
-        {                                                               \
-                if (static_key_false(&__tracepoint_##name.key))		\
-                        __DO_TRACE(&__tracepoint_##name,                \
-                                TP_PROTO(data_proto),                   \
-                                TP_ARGS(data_args),                     \
-                                TP_CONDITION(cond));                    \
-        }
+	static_branch_inc(&key);
+	...
+	static_branch_dec(&key);
 
-Tracepoints are disabled by default, and can be placed in performance critical
-pieces of the kernel. Thus, by using a static key, the tracepoints can have
-absolutely minimal impact when not in use.
+Thus, 'static_branch_inc()' means 'make the branch true', and
+'static_branch_dec()' means 'make the branch false' with appropriate
+reference counting. For example, if the key is initialized true, a
+static_branch_dec(), will switch the branch to false. And a subsequent
+static_branch_inc(), will change the branch back to true. Likewise, if the
+key is initialized false, a 'static_branch_inc()', will change the branch to
+true. And then a 'static_branch_dec()', will again make the branch false.
 
 
 4) Architecture level code patching interface, 'jump labels'
@@ -150,9 +152,12 @@ simply fall back to a traditional, load, test, and jump sequence.
 
 * #define JUMP_LABEL_NOP_SIZE, see: arch/x86/include/asm/jump_label.h
 
-* __always_inline bool arch_static_branch(struct static_key *key), see:
+* __always_inline bool arch_static_branch(struct static_key *key, bool branch), see:
 					arch/x86/include/asm/jump_label.h
 
+* __always_inline bool arch_static_branch_jump(struct static_key *key, bool branch),
+					see: arch/x86/include/asm/jump_label.h
+
 * void arch_jump_label_transform(struct jump_entry *entry, enum jump_label_type type),
 					see: arch/x86/kernel/jump_label.c
 
@@ -173,7 +178,7 @@ SYSCALL_DEFINE0(getppid)
 {
         int pid;
 
-+       if (static_key_false(&key))
++       if (static_branch_unlikely(&key))
 +               printk("I am the true branch\n");
 
         rcu_read_lock();
diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h
index e337a1961933..7f653e8f6690 100644
--- a/include/linux/jump_label.h
+++ b/include/linux/jump_label.h
@@ -7,17 +7,52 @@
  * Copyright (C) 2009-2012 Jason Baron <jbaron@redhat.com>
  * Copyright (C) 2011-2012 Peter Zijlstra <pzijlstr@redhat.com>
  *
+ * DEPRECATED API:
+ *
+ * The use of 'struct static_key' directly, is now DEPRECATED. In addition
+ * static_key_{true,false}() is also DEPRECATED. IE DO NOT use the following:
+ *
+ * struct static_key false = STATIC_KEY_INIT_FALSE;
+ * struct static_key true = STATIC_KEY_INIT_TRUE;
+ * static_key_true()
+ * static_key_false()
+ *
+ * The updated API replacements are:
+ *
+ * DEFINE_STATIC_KEY_TRUE(key);
+ * DEFINE_STATIC_KEY_FALSE(key);
+ * static_key_likely()
+ * statick_key_unlikely()
+ *
  * Jump labels provide an interface to generate dynamic branches using
- * self-modifying code. Assuming toolchain and architecture support, the result
- * of a "if (static_key_false(&key))" statement is an unconditional branch (which
- * defaults to false - and the true block is placed out of line).
+ * self-modifying code. Assuming toolchain and architecture support, if we
+ * define a "key" that is initially false via "DEFINE_STATIC_KEY_FALSE(key)",
+ * an "if (static_branch_unlikely(&key))" statement is an unconditional branch
+ * (which defaults to false - and the true block is placed out of line).
+ * Similarly, we can define an initially true key via
+ * "DEFINE_STATIC_KEY_TRUE(key)", and use it in the same
+ * "if (static_branch_unlikely(&key))", in which case we will generate an
+ * unconditional branch to the out-of-line true branch. Keys that are
+ * initially true or false can be using in both static_branch_unlikely()
+ * and static_branch_likely() statements.
  *
- * However at runtime we can change the branch target using
- * static_key_slow_{inc,dec}(). These function as a 'reference' count on the key
- * object, and for as long as there are references all branches referring to
- * that particular key will point to the (out of line) true block.
+ * At runtime we can change the branch target by setting the key
+ * to true via a call to static_branch_enable(), or false using
+ * static_branch_disable(). If the direction of the branch is switched by
+ * these calls then we run-time modify the branch target via a
+ * no-op -> jump or jump -> no-op conversion. For example, for an
+ * initially false key that is used in an "if (static_branch_unlikely(&key))"
+ * statement, setting the key to true requires us to patch in a jump
+ * to the out-of-line of true branch.
  *
- * Since this relies on modifying code, the static_key_slow_{inc,dec}() functions
+ * In addtion to static_branch_{enable,disable}, we can also reference count
+ * the key or branch direction via static_branch_{inc,dec}. Thus,
+ * static_branch_inc() can be thought of as a 'make more true' and
+ * static_branch_dec() as a 'make more false'. The inc()/dec()
+ * interface is meant to be used exclusively from the inc()/dec() for a given
+ * key.
+ *
+ * Since this relies on modifying code, the branch modifying functions
  * must be considered absolute slow paths (machine wide synchronization etc.).
  * OTOH, since the affected branches are unconditional, their runtime overhead
  * will be absolutely minimal, esp. in the default (off) case where the total
@@ -29,20 +64,10 @@
  * cause significant performance degradation. Struct static_key_deferred and
  * static_key_slow_dec_deferred() provide for this.
  *
- * Lacking toolchain and or architecture support, jump labels fall back to a simple
- * conditional branch.
- *
- * struct static_key my_key = STATIC_KEY_INIT_TRUE;
- *
- *   if (static_key_true(&my_key)) {
- *   }
- *
- * will result in the true case being in-line and starts the key with a single
- * reference. Mixing static_key_true() and static_key_false() on the same key is not
- * allowed.
+ * Lacking toolchain and or architecture support, static keys fall back to a
+ * simple conditional branch.
  *
- * Not initializing the key (static data is initialized to 0s anyway) is the
- * same as using STATIC_KEY_INIT_FALSE.
+ * Additional babbling in: Documentation/static-keys.txt
  */
 
 #if defined(CC_HAVE_ASM_GOTO) && defined(CONFIG_JUMP_LABEL)
-- 
cgit v1.2.3


From ba33034fffc1189d95301bd865f1c799256e72a2 Mon Sep 17 00:00:00 2001
From: Christian Borntraeger <borntraeger@de.ibm.com>
Date: Tue, 4 Aug 2015 09:55:48 +0200
Subject: locking, compiler.h: Cast away attributes in the WRITE_ONCE() magic

The kernel build bot showed a new warning triggered by commit:

  76695af20c01 ("locking, arch: use WRITE_ONCE()/READ_ONCE() in smp_store_release()/smp_load_acquire()")

because Sparse does not like WRITE_ONCE() accessing elements
from the (sparse) RCU address space:

  fs/afs/inode.c:448:9: sparse: incorrect type in initializer (different address spaces)
  fs/afs/inode.c:448:9:    expected struct afs_permits *__val
  fs/afs/inode.c:448:9:    got void [noderef] <asn:4>*<noident>

Solution is to force cast away the sparse attributes for the initializer
of the union in WRITE_ONCE().

(And as this now gets too long, also split the macro into multiple lines.)

Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Andrey Konovalov <andreyknvl@google.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Paul McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/1438674948-38310-2-git-send-email-borntraeger@de.ibm.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/compiler.h | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index e08a6ae7c0a4..c836eb2dc44d 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -252,7 +252,12 @@ static __always_inline void __write_once_size(volatile void *p, void *res, int s
 	({ union { typeof(x) __val; char __c[1]; } __u; __read_once_size(&(x), __u.__c, sizeof(x)); __u.__val; })
 
 #define WRITE_ONCE(x, val) \
-	({ union { typeof(x) __val; char __c[1]; } __u = { .__val = (val) }; __write_once_size(&(x), __u.__c, sizeof(x)); __u.__val; })
+({							\
+	union { typeof(x) __val; char __c[1]; } __u =	\
+		{ .__val = (__force typeof(x)) (val) }; \
+	__write_once_size(&(x), __u.__c, sizeof(x));	\
+	__u.__val;					\
+})
 
 /**
  * READ_ONCE_CTRL - Read a value heading a control dependency
-- 
cgit v1.2.3


From 654672d4ba1a6001c365833be895f9477c4d5eab Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Thu, 6 Aug 2015 17:54:37 +0100
Subject: locking/atomics: Add _{acquire|release|relaxed}() variants of some
 atomic operations

Whilst porting the generic qrwlock code over to arm64, it became
apparent that any portable locking code needs finer-grained control of
the memory-ordering guarantees provided by our atomic routines.

In particular: xchg, cmpxchg, {add,sub}_return are often used in
situations where full barrier semantics (currently the only option
available) are not required. For example, when a reader increments a
reader count to obtain a lock, checking the old value to see if a writer
was present, only acquire semantics are strictly needed.

This patch introduces three new ordering semantics for these operations:

  - *_relaxed: No ordering guarantees. This is similar to what we have
               already for the non-return atomics (e.g. atomic_add).

  - *_acquire: ACQUIRE semantics, similar to smp_load_acquire.

  - *_release: RELEASE semantics, similar to smp_store_release.

In memory-ordering speak, this means that the acquire/release semantics
are RCpc as opposed to RCsc. Consequently a RELEASE followed by an
ACQUIRE does not imply a full barrier, as already documented in
memory-barriers.txt.

Currently, all the new macros are conditionally mapped to the full-mb
variants, however if the *_relaxed version is provided by the
architecture, then the acquire/release variants are constructed by
supplementing the relaxed routine with an explicit barrier.

Signed-off-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Waiman.Long@hp.com
Cc: paulmck@linux.vnet.ibm.com
Link: http://lkml.kernel.org/r/1438880084-18856-2-git-send-email-will.deacon@arm.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/atomic.h | 323 +++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 323 insertions(+)

(limited to 'include')

diff --git a/include/linux/atomic.h b/include/linux/atomic.h
index 8b98b423388f..00a5763e850e 100644
--- a/include/linux/atomic.h
+++ b/include/linux/atomic.h
@@ -2,6 +2,329 @@
 #ifndef _LINUX_ATOMIC_H
 #define _LINUX_ATOMIC_H
 #include <asm/atomic.h>
+#include <asm/barrier.h>
+
+/*
+ * Relaxed variants of xchg, cmpxchg and some atomic operations.
+ *
+ * We support four variants:
+ *
+ * - Fully ordered: The default implementation, no suffix required.
+ * - Acquire: Provides ACQUIRE semantics, _acquire suffix.
+ * - Release: Provides RELEASE semantics, _release suffix.
+ * - Relaxed: No ordering guarantees, _relaxed suffix.
+ *
+ * For compound atomics performing both a load and a store, ACQUIRE
+ * semantics apply only to the load and RELEASE semantics only to the
+ * store portion of the operation. Note that a failed cmpxchg_acquire
+ * does -not- imply any memory ordering constraints.
+ *
+ * See Documentation/memory-barriers.txt for ACQUIRE/RELEASE definitions.
+ */
+
+#ifndef atomic_read_acquire
+#define  atomic_read_acquire(v)		smp_load_acquire(&(v)->counter)
+#endif
+
+#ifndef atomic_set_release
+#define  atomic_set_release(v, i)	smp_store_release(&(v)->counter, (i))
+#endif
+
+/*
+ * The idea here is to build acquire/release variants by adding explicit
+ * barriers on top of the relaxed variant. In the case where the relaxed
+ * variant is already fully ordered, no additional barriers are needed.
+ */
+#define __atomic_op_acquire(op, args...)				\
+({									\
+	typeof(op##_relaxed(args)) __ret  = op##_relaxed(args);		\
+	smp_mb__after_atomic();						\
+	__ret;								\
+})
+
+#define __atomic_op_release(op, args...)				\
+({									\
+	smp_mb__before_atomic();					\
+	op##_relaxed(args);						\
+})
+
+#define __atomic_op_fence(op, args...)					\
+({									\
+	typeof(op##_relaxed(args)) __ret;				\
+	smp_mb__before_atomic();					\
+	__ret = op##_relaxed(args);					\
+	smp_mb__after_atomic();						\
+	__ret;								\
+})
+
+/* atomic_add_return_relaxed */
+#ifndef atomic_add_return_relaxed
+#define  atomic_add_return_relaxed	atomic_add_return
+#define  atomic_add_return_acquire	atomic_add_return
+#define  atomic_add_return_release	atomic_add_return
+
+#else /* atomic_add_return_relaxed */
+
+#ifndef atomic_add_return_acquire
+#define  atomic_add_return_acquire(...)					\
+	__atomic_op_acquire(atomic_add_return, __VA_ARGS__)
+#endif
+
+#ifndef atomic_add_return_release
+#define  atomic_add_return_release(...)					\
+	__atomic_op_release(atomic_add_return, __VA_ARGS__)
+#endif
+
+#ifndef atomic_add_return
+#define  atomic_add_return(...)						\
+	__atomic_op_fence(atomic_add_return, __VA_ARGS__)
+#endif
+#endif /* atomic_add_return_relaxed */
+
+/* atomic_sub_return_relaxed */
+#ifndef atomic_sub_return_relaxed
+#define  atomic_sub_return_relaxed	atomic_sub_return
+#define  atomic_sub_return_acquire	atomic_sub_return
+#define  atomic_sub_return_release	atomic_sub_return
+
+#else /* atomic_sub_return_relaxed */
+
+#ifndef atomic_sub_return_acquire
+#define  atomic_sub_return_acquire(...)					\
+	__atomic_op_acquire(atomic_sub_return, __VA_ARGS__)
+#endif
+
+#ifndef atomic_sub_return_release
+#define  atomic_sub_return_release(...)					\
+	__atomic_op_release(atomic_sub_return, __VA_ARGS__)
+#endif
+
+#ifndef atomic_sub_return
+#define  atomic_sub_return(...)						\
+	__atomic_op_fence(atomic_sub_return, __VA_ARGS__)
+#endif
+#endif /* atomic_sub_return_relaxed */
+
+/* atomic_xchg_relaxed */
+#ifndef atomic_xchg_relaxed
+#define  atomic_xchg_relaxed		atomic_xchg
+#define  atomic_xchg_acquire		atomic_xchg
+#define  atomic_xchg_release		atomic_xchg
+
+#else /* atomic_xchg_relaxed */
+
+#ifndef atomic_xchg_acquire
+#define  atomic_xchg_acquire(...)					\
+	__atomic_op_acquire(atomic_xchg, __VA_ARGS__)
+#endif
+
+#ifndef atomic_xchg_release
+#define  atomic_xchg_release(...)					\
+	__atomic_op_release(atomic_xchg, __VA_ARGS__)
+#endif
+
+#ifndef atomic_xchg
+#define  atomic_xchg(...)						\
+	__atomic_op_fence(atomic_xchg, __VA_ARGS__)
+#endif
+#endif /* atomic_xchg_relaxed */
+
+/* atomic_cmpxchg_relaxed */
+#ifndef atomic_cmpxchg_relaxed
+#define  atomic_cmpxchg_relaxed		atomic_cmpxchg
+#define  atomic_cmpxchg_acquire		atomic_cmpxchg
+#define  atomic_cmpxchg_release		atomic_cmpxchg
+
+#else /* atomic_cmpxchg_relaxed */
+
+#ifndef atomic_cmpxchg_acquire
+#define  atomic_cmpxchg_acquire(...)					\
+	__atomic_op_acquire(atomic_cmpxchg, __VA_ARGS__)
+#endif
+
+#ifndef atomic_cmpxchg_release
+#define  atomic_cmpxchg_release(...)					\
+	__atomic_op_release(atomic_cmpxchg, __VA_ARGS__)
+#endif
+
+#ifndef atomic_cmpxchg
+#define  atomic_cmpxchg(...)						\
+	__atomic_op_fence(atomic_cmpxchg, __VA_ARGS__)
+#endif
+#endif /* atomic_cmpxchg_relaxed */
+
+#ifndef atomic64_read_acquire
+#define  atomic64_read_acquire(v)	smp_load_acquire(&(v)->counter)
+#endif
+
+#ifndef atomic64_set_release
+#define  atomic64_set_release(v, i)	smp_store_release(&(v)->counter, (i))
+#endif
+
+/* atomic64_add_return_relaxed */
+#ifndef atomic64_add_return_relaxed
+#define  atomic64_add_return_relaxed	atomic64_add_return
+#define  atomic64_add_return_acquire	atomic64_add_return
+#define  atomic64_add_return_release	atomic64_add_return
+
+#else /* atomic64_add_return_relaxed */
+
+#ifndef atomic64_add_return_acquire
+#define  atomic64_add_return_acquire(...)				\
+	__atomic_op_acquire(atomic64_add_return, __VA_ARGS__)
+#endif
+
+#ifndef atomic64_add_return_release
+#define  atomic64_add_return_release(...)				\
+	__atomic_op_release(atomic64_add_return, __VA_ARGS__)
+#endif
+
+#ifndef atomic64_add_return
+#define  atomic64_add_return(...)					\
+	__atomic_op_fence(atomic64_add_return, __VA_ARGS__)
+#endif
+#endif /* atomic64_add_return_relaxed */
+
+/* atomic64_sub_return_relaxed */
+#ifndef atomic64_sub_return_relaxed
+#define  atomic64_sub_return_relaxed	atomic64_sub_return
+#define  atomic64_sub_return_acquire	atomic64_sub_return
+#define  atomic64_sub_return_release	atomic64_sub_return
+
+#else /* atomic64_sub_return_relaxed */
+
+#ifndef atomic64_sub_return_acquire
+#define  atomic64_sub_return_acquire(...)				\
+	__atomic_op_acquire(atomic64_sub_return, __VA_ARGS__)
+#endif
+
+#ifndef atomic64_sub_return_release
+#define  atomic64_sub_return_release(...)				\
+	__atomic_op_release(atomic64_sub_return, __VA_ARGS__)
+#endif
+
+#ifndef atomic64_sub_return
+#define  atomic64_sub_return(...)					\
+	__atomic_op_fence(atomic64_sub_return, __VA_ARGS__)
+#endif
+#endif /* atomic64_sub_return_relaxed */
+
+/* atomic64_xchg_relaxed */
+#ifndef atomic64_xchg_relaxed
+#define  atomic64_xchg_relaxed		atomic64_xchg
+#define  atomic64_xchg_acquire		atomic64_xchg
+#define  atomic64_xchg_release		atomic64_xchg
+
+#else /* atomic64_xchg_relaxed */
+
+#ifndef atomic64_xchg_acquire
+#define  atomic64_xchg_acquire(...)					\
+	__atomic_op_acquire(atomic64_xchg, __VA_ARGS__)
+#endif
+
+#ifndef atomic64_xchg_release
+#define  atomic64_xchg_release(...)					\
+	__atomic_op_release(atomic64_xchg, __VA_ARGS__)
+#endif
+
+#ifndef atomic64_xchg
+#define  atomic64_xchg(...)						\
+	__atomic_op_fence(atomic64_xchg, __VA_ARGS__)
+#endif
+#endif /* atomic64_xchg_relaxed */
+
+/* atomic64_cmpxchg_relaxed */
+#ifndef atomic64_cmpxchg_relaxed
+#define  atomic64_cmpxchg_relaxed	atomic64_cmpxchg
+#define  atomic64_cmpxchg_acquire	atomic64_cmpxchg
+#define  atomic64_cmpxchg_release	atomic64_cmpxchg
+
+#else /* atomic64_cmpxchg_relaxed */
+
+#ifndef atomic64_cmpxchg_acquire
+#define  atomic64_cmpxchg_acquire(...)					\
+	__atomic_op_acquire(atomic64_cmpxchg, __VA_ARGS__)
+#endif
+
+#ifndef atomic64_cmpxchg_release
+#define  atomic64_cmpxchg_release(...)					\
+	__atomic_op_release(atomic64_cmpxchg, __VA_ARGS__)
+#endif
+
+#ifndef atomic64_cmpxchg
+#define  atomic64_cmpxchg(...)						\
+	__atomic_op_fence(atomic64_cmpxchg, __VA_ARGS__)
+#endif
+#endif /* atomic64_cmpxchg_relaxed */
+
+/* cmpxchg_relaxed */
+#ifndef cmpxchg_relaxed
+#define  cmpxchg_relaxed		cmpxchg
+#define  cmpxchg_acquire		cmpxchg
+#define  cmpxchg_release		cmpxchg
+
+#else /* cmpxchg_relaxed */
+
+#ifndef cmpxchg_acquire
+#define  cmpxchg_acquire(...)						\
+	__atomic_op_acquire(cmpxchg, __VA_ARGS__)
+#endif
+
+#ifndef cmpxchg_release
+#define  cmpxchg_release(...)						\
+	__atomic_op_release(cmpxchg, __VA_ARGS__)
+#endif
+
+#ifndef cmpxchg
+#define  cmpxchg(...)							\
+	__atomic_op_fence(cmpxchg, __VA_ARGS__)
+#endif
+#endif /* cmpxchg_relaxed */
+
+/* cmpxchg64_relaxed */
+#ifndef cmpxchg64_relaxed
+#define  cmpxchg64_relaxed		cmpxchg64
+#define  cmpxchg64_acquire		cmpxchg64
+#define  cmpxchg64_release		cmpxchg64
+
+#else /* cmpxchg64_relaxed */
+
+#ifndef cmpxchg64_acquire
+#define  cmpxchg64_acquire(...)						\
+	__atomic_op_acquire(cmpxchg64, __VA_ARGS__)
+#endif
+
+#ifndef cmpxchg64_release
+#define  cmpxchg64_release(...)						\
+	__atomic_op_release(cmpxchg64, __VA_ARGS__)
+#endif
+
+#ifndef cmpxchg64
+#define  cmpxchg64(...)							\
+	__atomic_op_fence(cmpxchg64, __VA_ARGS__)
+#endif
+#endif /* cmpxchg64_relaxed */
+
+/* xchg_relaxed */
+#ifndef xchg_relaxed
+#define  xchg_relaxed			xchg
+#define  xchg_acquire			xchg
+#define  xchg_release			xchg
+
+#else /* xchg_relaxed */
+
+#ifndef xchg_acquire
+#define  xchg_acquire(...)		__atomic_op_acquire(xchg, __VA_ARGS__)
+#endif
+
+#ifndef xchg_release
+#define  xchg_release(...)		__atomic_op_release(xchg, __VA_ARGS__)
+#endif
+
+#ifndef xchg
+#define  xchg(...)			__atomic_op_fence(xchg, __VA_ARGS__)
+#endif
+#endif /* xchg_relaxed */
 
 /**
  * atomic_add_unless - add unless the number is already a given value
-- 
cgit v1.2.3


From 586b610e43a5ad5096640312fefa6ce931738c7d Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Thu, 6 Aug 2015 17:54:38 +0100
Subject: locking, asm-generic: Rework atomic-long.h to avoid bulk code
 duplication

We can use some (admittedly ugly) macros to generate the 32-bit and
64-bit based atomic_long implementations from the same code.

Signed-off-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Waiman.Long@hp.com
Cc: paulmck@linux.vnet.ibm.com
Link: http://lkml.kernel.org/r/1438880084-18856-3-git-send-email-will.deacon@arm.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/asm-generic/atomic-long.h | 189 ++++++++------------------------------
 1 file changed, 40 insertions(+), 149 deletions(-)

(limited to 'include')

diff --git a/include/asm-generic/atomic-long.h b/include/asm-generic/atomic-long.h
index b7babf0206b8..beaea541adfb 100644
--- a/include/asm-generic/atomic-long.h
+++ b/include/asm-generic/atomic-long.h
@@ -23,236 +23,127 @@
 typedef atomic64_t atomic_long_t;
 
 #define ATOMIC_LONG_INIT(i)	ATOMIC64_INIT(i)
+#define ATOMIC_LONG_PFX(x)	atomic64 ## x
 
-static inline long atomic_long_read(atomic_long_t *l)
-{
-	atomic64_t *v = (atomic64_t *)l;
-
-	return (long)atomic64_read(v);
-}
-
-static inline void atomic_long_set(atomic_long_t *l, long i)
-{
-	atomic64_t *v = (atomic64_t *)l;
-
-	atomic64_set(v, i);
-}
-
-static inline void atomic_long_inc(atomic_long_t *l)
-{
-	atomic64_t *v = (atomic64_t *)l;
-
-	atomic64_inc(v);
-}
-
-static inline void atomic_long_dec(atomic_long_t *l)
-{
-	atomic64_t *v = (atomic64_t *)l;
-
-	atomic64_dec(v);
-}
-
-static inline void atomic_long_add(long i, atomic_long_t *l)
-{
-	atomic64_t *v = (atomic64_t *)l;
-
-	atomic64_add(i, v);
-}
-
-static inline void atomic_long_sub(long i, atomic_long_t *l)
-{
-	atomic64_t *v = (atomic64_t *)l;
-
-	atomic64_sub(i, v);
-}
-
-static inline int atomic_long_sub_and_test(long i, atomic_long_t *l)
-{
-	atomic64_t *v = (atomic64_t *)l;
-
-	return atomic64_sub_and_test(i, v);
-}
-
-static inline int atomic_long_dec_and_test(atomic_long_t *l)
-{
-	atomic64_t *v = (atomic64_t *)l;
-
-	return atomic64_dec_and_test(v);
-}
-
-static inline int atomic_long_inc_and_test(atomic_long_t *l)
-{
-	atomic64_t *v = (atomic64_t *)l;
-
-	return atomic64_inc_and_test(v);
-}
-
-static inline int atomic_long_add_negative(long i, atomic_long_t *l)
-{
-	atomic64_t *v = (atomic64_t *)l;
-
-	return atomic64_add_negative(i, v);
-}
-
-static inline long atomic_long_add_return(long i, atomic_long_t *l)
-{
-	atomic64_t *v = (atomic64_t *)l;
-
-	return (long)atomic64_add_return(i, v);
-}
-
-static inline long atomic_long_sub_return(long i, atomic_long_t *l)
-{
-	atomic64_t *v = (atomic64_t *)l;
-
-	return (long)atomic64_sub_return(i, v);
-}
-
-static inline long atomic_long_inc_return(atomic_long_t *l)
-{
-	atomic64_t *v = (atomic64_t *)l;
-
-	return (long)atomic64_inc_return(v);
-}
-
-static inline long atomic_long_dec_return(atomic_long_t *l)
-{
-	atomic64_t *v = (atomic64_t *)l;
-
-	return (long)atomic64_dec_return(v);
-}
-
-static inline long atomic_long_add_unless(atomic_long_t *l, long a, long u)
-{
-	atomic64_t *v = (atomic64_t *)l;
-
-	return (long)atomic64_add_unless(v, a, u);
-}
-
-#define atomic_long_inc_not_zero(l) atomic64_inc_not_zero((atomic64_t *)(l))
-
-#define atomic_long_cmpxchg(l, old, new) \
-	(atomic64_cmpxchg((atomic64_t *)(l), (old), (new)))
-#define atomic_long_xchg(v, new) \
-	(atomic64_xchg((atomic64_t *)(v), (new)))
-
-#else  /*  BITS_PER_LONG == 64  */
+#else
 
 typedef atomic_t atomic_long_t;
 
 #define ATOMIC_LONG_INIT(i)	ATOMIC_INIT(i)
+#define ATOMIC_LONG_PFX(x)	atomic ## x
+
+#endif
+
 static inline long atomic_long_read(atomic_long_t *l)
 {
-	atomic_t *v = (atomic_t *)l;
+	ATOMIC_LONG_PFX(_t) *v = (ATOMIC_LONG_PFX(_t) *)l;
 
-	return (long)atomic_read(v);
+	return (long)ATOMIC_LONG_PFX(_read)(v);
 }
 
 static inline void atomic_long_set(atomic_long_t *l, long i)
 {
-	atomic_t *v = (atomic_t *)l;
+	ATOMIC_LONG_PFX(_t) *v = (ATOMIC_LONG_PFX(_t) *)l;
 
-	atomic_set(v, i);
+	ATOMIC_LONG_PFX(_set)(v, i);
 }
 
 static inline void atomic_long_inc(atomic_long_t *l)
 {
-	atomic_t *v = (atomic_t *)l;
+	ATOMIC_LONG_PFX(_t) *v = (ATOMIC_LONG_PFX(_t) *)l;
 
-	atomic_inc(v);
+	ATOMIC_LONG_PFX(_inc)(v);
 }
 
 static inline void atomic_long_dec(atomic_long_t *l)
 {
-	atomic_t *v = (atomic_t *)l;
+	ATOMIC_LONG_PFX(_t) *v = (ATOMIC_LONG_PFX(_t) *)l;
 
-	atomic_dec(v);
+	ATOMIC_LONG_PFX(_dec)(v);
 }
 
 static inline void atomic_long_add(long i, atomic_long_t *l)
 {
-	atomic_t *v = (atomic_t *)l;
+	ATOMIC_LONG_PFX(_t) *v = (ATOMIC_LONG_PFX(_t) *)l;
 
-	atomic_add(i, v);
+	ATOMIC_LONG_PFX(_add)(i, v);
 }
 
 static inline void atomic_long_sub(long i, atomic_long_t *l)
 {
-	atomic_t *v = (atomic_t *)l;
+	ATOMIC_LONG_PFX(_t) *v = (ATOMIC_LONG_PFX(_t) *)l;
 
-	atomic_sub(i, v);
+	ATOMIC_LONG_PFX(_sub)(i, v);
 }
 
 static inline int atomic_long_sub_and_test(long i, atomic_long_t *l)
 {
-	atomic_t *v = (atomic_t *)l;
+	ATOMIC_LONG_PFX(_t) *v = (ATOMIC_LONG_PFX(_t) *)l;
 
-	return atomic_sub_and_test(i, v);
+	return ATOMIC_LONG_PFX(_sub_and_test)(i, v);
 }
 
 static inline int atomic_long_dec_and_test(atomic_long_t *l)
 {
-	atomic_t *v = (atomic_t *)l;
+	ATOMIC_LONG_PFX(_t) *v = (ATOMIC_LONG_PFX(_t) *)l;
 
-	return atomic_dec_and_test(v);
+	return ATOMIC_LONG_PFX(_dec_and_test)(v);
 }
 
 static inline int atomic_long_inc_and_test(atomic_long_t *l)
 {
-	atomic_t *v = (atomic_t *)l;
+	ATOMIC_LONG_PFX(_t) *v = (ATOMIC_LONG_PFX(_t) *)l;
 
-	return atomic_inc_and_test(v);
+	return ATOMIC_LONG_PFX(_inc_and_test)(v);
 }
 
 static inline int atomic_long_add_negative(long i, atomic_long_t *l)
 {
-	atomic_t *v = (atomic_t *)l;
+	ATOMIC_LONG_PFX(_t) *v = (ATOMIC_LONG_PFX(_t) *)l;
 
-	return atomic_add_negative(i, v);
+	return ATOMIC_LONG_PFX(_add_negative)(i, v);
 }
 
 static inline long atomic_long_add_return(long i, atomic_long_t *l)
 {
-	atomic_t *v = (atomic_t *)l;
+	ATOMIC_LONG_PFX(_t) *v = (ATOMIC_LONG_PFX(_t) *)l;
 
-	return (long)atomic_add_return(i, v);
+	return (long)ATOMIC_LONG_PFX(_add_return)(i, v);
 }
 
 static inline long atomic_long_sub_return(long i, atomic_long_t *l)
 {
-	atomic_t *v = (atomic_t *)l;
+	ATOMIC_LONG_PFX(_t) *v = (ATOMIC_LONG_PFX(_t) *)l;
 
-	return (long)atomic_sub_return(i, v);
+	return (long)ATOMIC_LONG_PFX(_sub_return)(i, v);
 }
 
 static inline long atomic_long_inc_return(atomic_long_t *l)
 {
-	atomic_t *v = (atomic_t *)l;
+	ATOMIC_LONG_PFX(_t) *v = (ATOMIC_LONG_PFX(_t) *)l;
 
-	return (long)atomic_inc_return(v);
+	return (long)ATOMIC_LONG_PFX(_inc_return)(v);
 }
 
 static inline long atomic_long_dec_return(atomic_long_t *l)
 {
-	atomic_t *v = (atomic_t *)l;
+	ATOMIC_LONG_PFX(_t) *v = (ATOMIC_LONG_PFX(_t) *)l;
 
-	return (long)atomic_dec_return(v);
+	return (long)ATOMIC_LONG_PFX(_dec_return)(v);
 }
 
 static inline long atomic_long_add_unless(atomic_long_t *l, long a, long u)
 {
-	atomic_t *v = (atomic_t *)l;
+	ATOMIC_LONG_PFX(_t) *v = (ATOMIC_LONG_PFX(_t) *)l;
 
-	return (long)atomic_add_unless(v, a, u);
+	return (long)ATOMIC_LONG_PFX(_add_unless)(v, a, u);
 }
 
-#define atomic_long_inc_not_zero(l) atomic_inc_not_zero((atomic_t *)(l))
-
+#define atomic_long_inc_not_zero(l) \
+	ATOMIC_LONG_PFX(_inc_not_zero)((ATOMIC_LONG_PFX(_t) *)(l))
 #define atomic_long_cmpxchg(l, old, new) \
-	(atomic_cmpxchg((atomic_t *)(l), (old), (new)))
+	(ATOMIC_LONG_PFX(_cmpxchg)((ATOMIC_LONG_PFX(_t) *)(l), (old), (new)))
 #define atomic_long_xchg(v, new) \
-	(atomic_xchg((atomic_t *)(v), (new)))
-
-#endif  /*  BITS_PER_LONG == 64  */
+	(ATOMIC_LONG_PFX(_xchg)((ATOMIC_LONG_PFX(_t) *)(v), (new)))
 
 #endif  /*  _ASM_GENERIC_ATOMIC_LONG_H  */
-- 
cgit v1.2.3


From 6d79ef2d30ee5af7315535d1e7bf6fce0008f815 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Thu, 6 Aug 2015 17:54:39 +0100
Subject: locking, asm-generic: Add _{relaxed|acquire|release}() variants for
 'atomic_long_t'

This patch adds 'atomic_long_t' wrappers for the new relaxed atomic operations.

Signed-off-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Waiman.Long@hp.com
Cc: paulmck@linux.vnet.ibm.com
Link: http://lkml.kernel.org/r/1438880084-18856-4-git-send-email-will.deacon@arm.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/asm-generic/atomic-long.h | 86 +++++++++++++++++++++++++++------------
 1 file changed, 59 insertions(+), 27 deletions(-)

(limited to 'include')

diff --git a/include/asm-generic/atomic-long.h b/include/asm-generic/atomic-long.h
index beaea541adfb..a94cbebbc33d 100644
--- a/include/asm-generic/atomic-long.h
+++ b/include/asm-generic/atomic-long.h
@@ -34,19 +34,69 @@ typedef atomic_t atomic_long_t;
 
 #endif
 
-static inline long atomic_long_read(atomic_long_t *l)
-{
-	ATOMIC_LONG_PFX(_t) *v = (ATOMIC_LONG_PFX(_t) *)l;
-
-	return (long)ATOMIC_LONG_PFX(_read)(v);
+#define ATOMIC_LONG_READ_OP(mo)						\
+static inline long atomic_long_read##mo(atomic_long_t *l)		\
+{									\
+	ATOMIC_LONG_PFX(_t) *v = (ATOMIC_LONG_PFX(_t) *)l;		\
+									\
+	return (long)ATOMIC_LONG_PFX(_read##mo)(v);			\
 }
+ATOMIC_LONG_READ_OP()
+ATOMIC_LONG_READ_OP(_acquire)
 
-static inline void atomic_long_set(atomic_long_t *l, long i)
-{
-	ATOMIC_LONG_PFX(_t) *v = (ATOMIC_LONG_PFX(_t) *)l;
+#undef ATOMIC_LONG_READ_OP
 
-	ATOMIC_LONG_PFX(_set)(v, i);
+#define ATOMIC_LONG_SET_OP(mo)						\
+static inline void atomic_long_set##mo(atomic_long_t *l, long i)	\
+{									\
+	ATOMIC_LONG_PFX(_t) *v = (ATOMIC_LONG_PFX(_t) *)l;		\
+									\
+	ATOMIC_LONG_PFX(_set##mo)(v, i);				\
+}
+ATOMIC_LONG_SET_OP()
+ATOMIC_LONG_SET_OP(_release)
+
+#undef ATOMIC_LONG_SET_OP
+
+#define ATOMIC_LONG_ADD_SUB_OP(op, mo)					\
+static inline long							\
+atomic_long_##op##_return##mo(long i, atomic_long_t *l)			\
+{									\
+	ATOMIC_LONG_PFX(_t) *v = (ATOMIC_LONG_PFX(_t) *)l;		\
+									\
+	return (long)ATOMIC_LONG_PFX(_##op##_return##mo)(i, v);		\
 }
+ATOMIC_LONG_ADD_SUB_OP(add,)
+ATOMIC_LONG_ADD_SUB_OP(add, _relaxed)
+ATOMIC_LONG_ADD_SUB_OP(add, _acquire)
+ATOMIC_LONG_ADD_SUB_OP(add, _release)
+ATOMIC_LONG_ADD_SUB_OP(sub,)
+ATOMIC_LONG_ADD_SUB_OP(sub, _relaxed)
+ATOMIC_LONG_ADD_SUB_OP(sub, _acquire)
+ATOMIC_LONG_ADD_SUB_OP(sub, _release)
+
+#undef ATOMIC_LONG_ADD_SUB_OP
+
+#define atomic_long_cmpxchg_relaxed(l, old, new) \
+	(ATOMIC_LONG_PFX(_cmpxchg_relaxed)((ATOMIC_LONG_PFX(_t) *)(l), \
+					   (old), (new)))
+#define atomic_long_cmpxchg_acquire(l, old, new) \
+	(ATOMIC_LONG_PFX(_cmpxchg_acquire)((ATOMIC_LONG_PFX(_t) *)(l), \
+					   (old), (new)))
+#define atomic_long_cmpxchg_release(l, old, new) \
+	(ATOMIC_LONG_PFX(_cmpxchg_release)((ATOMIC_LONG_PFX(_t) *)(l), \
+					   (old), (new)))
+#define atomic_long_cmpxchg(l, old, new) \
+	(ATOMIC_LONG_PFX(_cmpxchg)((ATOMIC_LONG_PFX(_t) *)(l), (old), (new)))
+
+#define atomic_long_xchg_relaxed(v, new) \
+	(ATOMIC_LONG_PFX(_xchg_relaxed)((ATOMIC_LONG_PFX(_t) *)(v), (new)))
+#define atomic_long_xchg_acquire(v, new) \
+	(ATOMIC_LONG_PFX(_xchg_acquire)((ATOMIC_LONG_PFX(_t) *)(v), (new)))
+#define atomic_long_xchg_release(v, new) \
+	(ATOMIC_LONG_PFX(_xchg_release)((ATOMIC_LONG_PFX(_t) *)(v), (new)))
+#define atomic_long_xchg(v, new) \
+	(ATOMIC_LONG_PFX(_xchg)((ATOMIC_LONG_PFX(_t) *)(v), (new)))
 
 static inline void atomic_long_inc(atomic_long_t *l)
 {
@@ -104,20 +154,6 @@ static inline int atomic_long_add_negative(long i, atomic_long_t *l)
 	return ATOMIC_LONG_PFX(_add_negative)(i, v);
 }
 
-static inline long atomic_long_add_return(long i, atomic_long_t *l)
-{
-	ATOMIC_LONG_PFX(_t) *v = (ATOMIC_LONG_PFX(_t) *)l;
-
-	return (long)ATOMIC_LONG_PFX(_add_return)(i, v);
-}
-
-static inline long atomic_long_sub_return(long i, atomic_long_t *l)
-{
-	ATOMIC_LONG_PFX(_t) *v = (ATOMIC_LONG_PFX(_t) *)l;
-
-	return (long)ATOMIC_LONG_PFX(_sub_return)(i, v);
-}
-
 static inline long atomic_long_inc_return(atomic_long_t *l)
 {
 	ATOMIC_LONG_PFX(_t) *v = (ATOMIC_LONG_PFX(_t) *)l;
@@ -141,9 +177,5 @@ static inline long atomic_long_add_unless(atomic_long_t *l, long a, long u)
 
 #define atomic_long_inc_not_zero(l) \
 	ATOMIC_LONG_PFX(_inc_not_zero)((ATOMIC_LONG_PFX(_t) *)(l))
-#define atomic_long_cmpxchg(l, old, new) \
-	(ATOMIC_LONG_PFX(_cmpxchg)((ATOMIC_LONG_PFX(_t) *)(l), (old), (new)))
-#define atomic_long_xchg(v, new) \
-	(ATOMIC_LONG_PFX(_xchg)((ATOMIC_LONG_PFX(_t) *)(v), (new)))
 
 #endif  /*  _ASM_GENERIC_ATOMIC_LONG_H  */
-- 
cgit v1.2.3


From 2b2a85a4d3534b8884fcfa5bb52837f0e1c672bc Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Thu, 6 Aug 2015 17:54:41 +0100
Subject: locking/qrwlock: Implement queue_write_unlock() using
 smp_store_release()

Since the following commit:

  536fa402221f ("compiler: Allow 1- and 2-byte smp_load_acquire() and smp_store_release()")

smp_store_release() supports byte accesses, so use that in writer unlock
and remove the conditional macro override.

Signed-off-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Waiman Long <Waiman.Long@hp.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: paulmck@linux.vnet.ibm.com
Link: http://lkml.kernel.org/r/1438880084-18856-6-git-send-email-will.deacon@arm.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/include/asm/qrwlock.h | 10 ----------
 include/asm-generic/qrwlock.h  |  9 +--------
 2 files changed, 1 insertion(+), 18 deletions(-)

(limited to 'include')

diff --git a/arch/x86/include/asm/qrwlock.h b/arch/x86/include/asm/qrwlock.h
index a8810bf135ab..c537cbb038a7 100644
--- a/arch/x86/include/asm/qrwlock.h
+++ b/arch/x86/include/asm/qrwlock.h
@@ -2,16 +2,6 @@
 #define _ASM_X86_QRWLOCK_H
 
 #include <asm-generic/qrwlock_types.h>
-
-#ifndef CONFIG_X86_PPRO_FENCE
-#define queued_write_unlock queued_write_unlock
-static inline void queued_write_unlock(struct qrwlock *lock)
-{
-        barrier();
-        ACCESS_ONCE(*(u8 *)&lock->cnts) = 0;
-}
-#endif
-
 #include <asm-generic/qrwlock.h>
 
 #endif /* _ASM_X86_QRWLOCK_H */
diff --git a/include/asm-generic/qrwlock.h b/include/asm-generic/qrwlock.h
index deb9e8b0eb9e..eb673dde8879 100644
--- a/include/asm-generic/qrwlock.h
+++ b/include/asm-generic/qrwlock.h
@@ -134,21 +134,14 @@ static inline void queued_read_unlock(struct qrwlock *lock)
 	atomic_sub(_QR_BIAS, &lock->cnts);
 }
 
-#ifndef queued_write_unlock
 /**
  * queued_write_unlock - release write lock of a queue rwlock
  * @lock : Pointer to queue rwlock structure
  */
 static inline void queued_write_unlock(struct qrwlock *lock)
 {
-	/*
-	 * If the writer field is atomic, it can be cleared directly.
-	 * Otherwise, an atomic subtraction will be used to clear it.
-	 */
-	smp_mb__before_atomic();
-	atomic_sub(_QW_LOCKED, &lock->cnts);
+	smp_store_release((u8 *)&lock->cnts, 0);
 }
-#endif
 
 /*
  * Remapping rwlock architecture specific functions to the corresponding
-- 
cgit v1.2.3


From 77e430e3e45662b696dc49aa53ea0f7ac63f2574 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Thu, 6 Aug 2015 17:54:42 +0100
Subject: locking/qrwlock: Make use of _{acquire|release|relaxed}() atomics

The qrwlock implementation is slightly heavy in its use of memory
barriers, mainly through the use of _cmpxchg() and _return() atomics, which
imply full barrier semantics.

This patch modifies the qrwlock code to use the more relaxed atomic
routines so that we can reduce the unnecessary barrier overhead on
weakly-ordered architectures.

Signed-off-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Waiman.Long@hp.com
Cc: paulmck@linux.vnet.ibm.com
Link: http://lkml.kernel.org/r/1438880084-18856-7-git-send-email-will.deacon@arm.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/asm-generic/qrwlock.h | 13 ++++++-------
 kernel/locking/qrwlock.c      | 24 ++++++++++++------------
 2 files changed, 18 insertions(+), 19 deletions(-)

(limited to 'include')

diff --git a/include/asm-generic/qrwlock.h b/include/asm-generic/qrwlock.h
index eb673dde8879..54a8e65e18b6 100644
--- a/include/asm-generic/qrwlock.h
+++ b/include/asm-generic/qrwlock.h
@@ -68,7 +68,7 @@ static inline int queued_read_trylock(struct qrwlock *lock)
 
 	cnts = atomic_read(&lock->cnts);
 	if (likely(!(cnts & _QW_WMASK))) {
-		cnts = (u32)atomic_add_return(_QR_BIAS, &lock->cnts);
+		cnts = (u32)atomic_add_return_acquire(_QR_BIAS, &lock->cnts);
 		if (likely(!(cnts & _QW_WMASK)))
 			return 1;
 		atomic_sub(_QR_BIAS, &lock->cnts);
@@ -89,8 +89,8 @@ static inline int queued_write_trylock(struct qrwlock *lock)
 	if (unlikely(cnts))
 		return 0;
 
-	return likely(atomic_cmpxchg(&lock->cnts,
-				     cnts, cnts | _QW_LOCKED) == cnts);
+	return likely(atomic_cmpxchg_acquire(&lock->cnts,
+					     cnts, cnts | _QW_LOCKED) == cnts);
 }
 /**
  * queued_read_lock - acquire read lock of a queue rwlock
@@ -100,7 +100,7 @@ static inline void queued_read_lock(struct qrwlock *lock)
 {
 	u32 cnts;
 
-	cnts = atomic_add_return(_QR_BIAS, &lock->cnts);
+	cnts = atomic_add_return_acquire(_QR_BIAS, &lock->cnts);
 	if (likely(!(cnts & _QW_WMASK)))
 		return;
 
@@ -115,7 +115,7 @@ static inline void queued_read_lock(struct qrwlock *lock)
 static inline void queued_write_lock(struct qrwlock *lock)
 {
 	/* Optimize for the unfair lock case where the fair flag is 0. */
-	if (atomic_cmpxchg(&lock->cnts, 0, _QW_LOCKED) == 0)
+	if (atomic_cmpxchg_acquire(&lock->cnts, 0, _QW_LOCKED) == 0)
 		return;
 
 	queued_write_lock_slowpath(lock);
@@ -130,8 +130,7 @@ static inline void queued_read_unlock(struct qrwlock *lock)
 	/*
 	 * Atomically decrement the reader count
 	 */
-	smp_mb__before_atomic();
-	atomic_sub(_QR_BIAS, &lock->cnts);
+	(void)atomic_sub_return_release(_QR_BIAS, &lock->cnts);
 }
 
 /**
diff --git a/kernel/locking/qrwlock.c b/kernel/locking/qrwlock.c
index 6a7a3b8d5ac9..f17a3e3b3550 100644
--- a/kernel/locking/qrwlock.c
+++ b/kernel/locking/qrwlock.c
@@ -55,7 +55,7 @@ rspin_until_writer_unlock(struct qrwlock *lock, u32 cnts)
 {
 	while ((cnts & _QW_WMASK) == _QW_LOCKED) {
 		cpu_relax_lowlatency();
-		cnts = smp_load_acquire((u32 *)&lock->cnts);
+		cnts = atomic_read_acquire(&lock->cnts);
 	}
 }
 
@@ -74,8 +74,9 @@ void queued_read_lock_slowpath(struct qrwlock *lock, u32 cnts)
 		 * Readers in interrupt context will get the lock immediately
 		 * if the writer is just waiting (not holding the lock yet).
 		 * The rspin_until_writer_unlock() function returns immediately
-		 * in this case. Otherwise, they will spin until the lock
-		 * is available without waiting in the queue.
+		 * in this case. Otherwise, they will spin (with ACQUIRE
+		 * semantics) until the lock is available without waiting in
+		 * the queue.
 		 */
 		rspin_until_writer_unlock(lock, cnts);
 		return;
@@ -88,12 +89,11 @@ void queued_read_lock_slowpath(struct qrwlock *lock, u32 cnts)
 	arch_spin_lock(&lock->lock);
 
 	/*
-	 * At the head of the wait queue now, increment the reader count
-	 * and wait until the writer, if it has the lock, has gone away.
-	 * At ths stage, it is not possible for a writer to remain in the
-	 * waiting state (_QW_WAITING). So there won't be any deadlock.
+	 * The ACQUIRE semantics of the following spinning code ensure
+	 * that accesses can't leak upwards out of our subsequent critical
+	 * section in the case that the lock is currently held for write.
 	 */
-	cnts = atomic_add_return(_QR_BIAS, &lock->cnts) - _QR_BIAS;
+	cnts = atomic_add_return_acquire(_QR_BIAS, &lock->cnts) - _QR_BIAS;
 	rspin_until_writer_unlock(lock, cnts);
 
 	/*
@@ -116,7 +116,7 @@ void queued_write_lock_slowpath(struct qrwlock *lock)
 
 	/* Try to acquire the lock directly if no reader is present */
 	if (!atomic_read(&lock->cnts) &&
-	    (atomic_cmpxchg(&lock->cnts, 0, _QW_LOCKED) == 0))
+	    (atomic_cmpxchg_acquire(&lock->cnts, 0, _QW_LOCKED) == 0))
 		goto unlock;
 
 	/*
@@ -127,7 +127,7 @@ void queued_write_lock_slowpath(struct qrwlock *lock)
 		struct __qrwlock *l = (struct __qrwlock *)lock;
 
 		if (!READ_ONCE(l->wmode) &&
-		   (cmpxchg(&l->wmode, 0, _QW_WAITING) == 0))
+		   (cmpxchg_relaxed(&l->wmode, 0, _QW_WAITING) == 0))
 			break;
 
 		cpu_relax_lowlatency();
@@ -137,8 +137,8 @@ void queued_write_lock_slowpath(struct qrwlock *lock)
 	for (;;) {
 		cnts = atomic_read(&lock->cnts);
 		if ((cnts == _QW_WAITING) &&
-		    (atomic_cmpxchg(&lock->cnts, _QW_WAITING,
-				    _QW_LOCKED) == _QW_WAITING))
+		    (atomic_cmpxchg_acquire(&lock->cnts, _QW_WAITING,
+					    _QW_LOCKED) == _QW_WAITING))
 			break;
 
 		cpu_relax_lowlatency();
-- 
cgit v1.2.3


From cd074aea9261784e44f292e1132830ec221802c6 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Thu, 6 Aug 2015 17:54:43 +0100
Subject: locking, include/llist: Use linux/atomic.h instead of asm/cmpxchg.h

Including an asm/ header directly is best avoided, so use linux/atomic.h
instead of asm/cmpxchg.h in linux/llist.h.

Signed-off-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Waiman.Long@hp.com
Cc: paulmck@linux.vnet.ibm.com
Link: http://lkml.kernel.org/r/1438880084-18856-8-git-send-email-will.deacon@arm.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/llist.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/llist.h b/include/linux/llist.h
index fbf10a0bc095..fd4ca0b4fe0f 100644
--- a/include/linux/llist.h
+++ b/include/linux/llist.h
@@ -55,8 +55,8 @@
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  */
 
+#include <linux/atomic.h>
 #include <linux/kernel.h>
-#include <asm/cmpxchg.h>
 
 struct llist_head {
 	struct llist_node *first;
-- 
cgit v1.2.3