From 3ca0ff571b092ee4d807f1168caa428d95b0173b Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Tue, 23 Aug 2016 13:36:04 +0200
Subject: locking/mutex: Rework mutex::owner

The current mutex implementation has an atomic lock word and a
non-atomic owner field.

This disparity leads to a number of issues with the current mutex code
as it means that we can have a locked mutex without an explicit owner
(because the owner field has not been set, or already cleared).

This leads to a number of weird corner cases, esp. between the
optimistic spinning and debug code. Where the optimistic spinning
code needs the owner field updated inside the lock region, the debug
code is more relaxed because the whole lock is serialized by the
wait_lock.

Also, the spinning code itself has a few corner cases where we need to
deal with a held lock without an owner field.

Furthermore, it becomes even more of a problem when trying to fix
starvation cases in the current code. We end up stacking special case
on special case.

To solve this rework the basic mutex implementation to be a single
atomic word that contains the owner and uses the low bits for extra
state.

This matches how PI futexes and rt_mutex already work. By having the
owner an integral part of the lock state a lot of the problems
dissapear and we get a better option to deal with starvation cases,
direct owner handoff.

Changing the basic mutex does however invalidate all the arch specific
mutex code; this patch leaves that unused in-place, a later patch will
remove that.

Tested-by: Jason Low <jason.low2@hpe.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Will Deacon <will.deacon@arm.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/mutex-debug.h | 24 -----------------------
 include/linux/mutex.h       | 46 +++++++++++++++++++++++++++++----------------
 2 files changed, 30 insertions(+), 40 deletions(-)
 delete mode 100644 include/linux/mutex-debug.h

(limited to 'include')

diff --git a/include/linux/mutex-debug.h b/include/linux/mutex-debug.h
deleted file mode 100644
index 4ac8b1977b73..000000000000
--- a/include/linux/mutex-debug.h
+++ /dev/null
@@ -1,24 +0,0 @@
-#ifndef __LINUX_MUTEX_DEBUG_H
-#define __LINUX_MUTEX_DEBUG_H
-
-#include <linux/linkage.h>
-#include <linux/lockdep.h>
-#include <linux/debug_locks.h>
-
-/*
- * Mutexes - debugging helpers:
- */
-
-#define __DEBUG_MUTEX_INITIALIZER(lockname)				\
-	, .magic = &lockname
-
-#define mutex_init(mutex)						\
-do {									\
-	static struct lock_class_key __key;				\
-									\
-	__mutex_init((mutex), #mutex, &__key);				\
-} while (0)
-
-extern void mutex_destroy(struct mutex *lock);
-
-#endif
diff --git a/include/linux/mutex.h b/include/linux/mutex.h
index 2cb7531e7d7a..4d3bccabbea5 100644
--- a/include/linux/mutex.h
+++ b/include/linux/mutex.h
@@ -18,6 +18,7 @@
 #include <linux/atomic.h>
 #include <asm/processor.h>
 #include <linux/osq_lock.h>
+#include <linux/debug_locks.h>
 
 /*
  * Simple, straightforward mutexes with strict semantics:
@@ -48,16 +49,12 @@
  *   locks and tasks (and only those tasks)
  */
 struct mutex {
-	/* 1: unlocked, 0: locked, negative: locked, possible waiters */
-	atomic_t		count;
+	atomic_long_t		owner;
 	spinlock_t		wait_lock;
-	struct list_head	wait_list;
-#if defined(CONFIG_DEBUG_MUTEXES) || defined(CONFIG_MUTEX_SPIN_ON_OWNER)
-	struct task_struct	*owner;
-#endif
 #ifdef CONFIG_MUTEX_SPIN_ON_OWNER
 	struct optimistic_spin_queue osq; /* Spinner MCS lock */
 #endif
+	struct list_head	wait_list;
 #ifdef CONFIG_DEBUG_MUTEXES
 	void			*magic;
 #endif
@@ -66,6 +63,11 @@ struct mutex {
 #endif
 };
 
+static inline struct task_struct *__mutex_owner(struct mutex *lock)
+{
+	return (struct task_struct *)(atomic_long_read(&lock->owner) & ~0x03);
+}
+
 /*
  * This is the control structure for tasks blocked on mutex,
  * which resides on the blocked task's kernel stack:
@@ -79,9 +81,20 @@ struct mutex_waiter {
 };
 
 #ifdef CONFIG_DEBUG_MUTEXES
-# include <linux/mutex-debug.h>
+
+#define __DEBUG_MUTEX_INITIALIZER(lockname)				\
+	, .magic = &lockname
+
+extern void mutex_destroy(struct mutex *lock);
+
 #else
+
 # define __DEBUG_MUTEX_INITIALIZER(lockname)
+
+static inline void mutex_destroy(struct mutex *lock) {}
+
+#endif
+
 /**
  * mutex_init - initialize the mutex
  * @mutex: the mutex to be initialized
@@ -90,14 +103,12 @@ struct mutex_waiter {
  *
  * It is not allowed to initialize an already locked mutex.
  */
-# define mutex_init(mutex) \
-do {							\
-	static struct lock_class_key __key;		\
-							\
-	__mutex_init((mutex), #mutex, &__key);		\
+#define mutex_init(mutex)						\
+do {									\
+	static struct lock_class_key __key;				\
+									\
+	__mutex_init((mutex), #mutex, &__key);				\
 } while (0)
-static inline void mutex_destroy(struct mutex *lock) {}
-#endif
 
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
 # define __DEP_MAP_MUTEX_INITIALIZER(lockname) \
@@ -107,7 +118,7 @@ static inline void mutex_destroy(struct mutex *lock) {}
 #endif
 
 #define __MUTEX_INITIALIZER(lockname) \
-		{ .count = ATOMIC_INIT(1) \
+		{ .owner = ATOMIC_LONG_INIT(0) \
 		, .wait_lock = __SPIN_LOCK_UNLOCKED(lockname.wait_lock) \
 		, .wait_list = LIST_HEAD_INIT(lockname.wait_list) \
 		__DEBUG_MUTEX_INITIALIZER(lockname) \
@@ -127,7 +138,10 @@ extern void __mutex_init(struct mutex *lock, const char *name,
  */
 static inline int mutex_is_locked(struct mutex *lock)
 {
-	return atomic_read(&lock->count) != 1;
+	/*
+	 * XXX think about spin_is_locked
+	 */
+	return __mutex_owner(lock) != NULL;
 }
 
 /*
-- 
cgit v1.2.3


From 890658b7ab48d1362a0362df842cecc73c83146f Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Tue, 23 Aug 2016 13:36:04 +0200
Subject: locking/mutex: Kill arch specific code

Its all generic atomic_long_t stuff now.

Tested-by: Jason Low <jason.low2@hpe.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-arch@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/alpha/include/asm/mutex.h      |   9 ---
 arch/arc/include/asm/mutex.h        |  18 -----
 arch/arm/include/asm/mutex.h        |  21 ------
 arch/arm64/include/asm/Kbuild       |   1 -
 arch/avr32/include/asm/mutex.h      |   9 ---
 arch/blackfin/include/asm/Kbuild    |   1 -
 arch/c6x/include/asm/mutex.h        |   6 --
 arch/cris/include/asm/mutex.h       |   9 ---
 arch/frv/include/asm/mutex.h        |   9 ---
 arch/h8300/include/asm/mutex.h      |   9 ---
 arch/hexagon/include/asm/mutex.h    |   8 ---
 arch/ia64/include/asm/mutex.h       |  90 ------------------------
 arch/m32r/include/asm/mutex.h       |   9 ---
 arch/m68k/include/asm/Kbuild        |   1 -
 arch/metag/include/asm/Kbuild       |   1 -
 arch/microblaze/include/asm/mutex.h |   1 -
 arch/mips/include/asm/Kbuild        |   1 -
 arch/mn10300/include/asm/mutex.h    |  16 -----
 arch/nios2/include/asm/mutex.h      |   1 -
 arch/openrisc/include/asm/mutex.h   |  27 --------
 arch/parisc/include/asm/Kbuild      |   1 -
 arch/powerpc/include/asm/mutex.h    | 132 ------------------------------------
 arch/s390/include/asm/mutex.h       |   9 ---
 arch/score/include/asm/mutex.h      |   6 --
 arch/sh/include/asm/mutex-llsc.h    | 109 -----------------------------
 arch/sh/include/asm/mutex.h         |  12 ----
 arch/sparc/include/asm/Kbuild       |   1 -
 arch/tile/include/asm/Kbuild        |   1 -
 arch/um/include/asm/Kbuild          |   1 -
 arch/unicore32/include/asm/mutex.h  |  20 ------
 arch/x86/include/asm/mutex.h        |   5 --
 arch/x86/include/asm/mutex_32.h     | 110 ------------------------------
 arch/x86/include/asm/mutex_64.h     | 127 ----------------------------------
 arch/xtensa/include/asm/mutex.h     |   9 ---
 include/asm-generic/mutex-dec.h     |  88 ------------------------
 include/asm-generic/mutex-null.h    |  19 ------
 include/asm-generic/mutex-xchg.h    | 120 --------------------------------
 include/asm-generic/mutex.h         |   9 ---
 38 files changed, 1026 deletions(-)
 delete mode 100644 arch/alpha/include/asm/mutex.h
 delete mode 100644 arch/arc/include/asm/mutex.h
 delete mode 100644 arch/arm/include/asm/mutex.h
 delete mode 100644 arch/avr32/include/asm/mutex.h
 delete mode 100644 arch/c6x/include/asm/mutex.h
 delete mode 100644 arch/cris/include/asm/mutex.h
 delete mode 100644 arch/frv/include/asm/mutex.h
 delete mode 100644 arch/h8300/include/asm/mutex.h
 delete mode 100644 arch/hexagon/include/asm/mutex.h
 delete mode 100644 arch/ia64/include/asm/mutex.h
 delete mode 100644 arch/m32r/include/asm/mutex.h
 delete mode 100644 arch/microblaze/include/asm/mutex.h
 delete mode 100644 arch/mn10300/include/asm/mutex.h
 delete mode 100644 arch/nios2/include/asm/mutex.h
 delete mode 100644 arch/openrisc/include/asm/mutex.h
 delete mode 100644 arch/powerpc/include/asm/mutex.h
 delete mode 100644 arch/s390/include/asm/mutex.h
 delete mode 100644 arch/score/include/asm/mutex.h
 delete mode 100644 arch/sh/include/asm/mutex-llsc.h
 delete mode 100644 arch/sh/include/asm/mutex.h
 delete mode 100644 arch/unicore32/include/asm/mutex.h
 delete mode 100644 arch/x86/include/asm/mutex.h
 delete mode 100644 arch/x86/include/asm/mutex_32.h
 delete mode 100644 arch/x86/include/asm/mutex_64.h
 delete mode 100644 arch/xtensa/include/asm/mutex.h
 delete mode 100644 include/asm-generic/mutex-dec.h
 delete mode 100644 include/asm-generic/mutex-null.h
 delete mode 100644 include/asm-generic/mutex-xchg.h
 delete mode 100644 include/asm-generic/mutex.h

(limited to 'include')

diff --git a/arch/alpha/include/asm/mutex.h b/arch/alpha/include/asm/mutex.h
deleted file mode 100644
index 458c1f7fbc18..000000000000
--- a/arch/alpha/include/asm/mutex.h
+++ /dev/null
@@ -1,9 +0,0 @@
-/*
- * Pull in the generic implementation for the mutex fastpath.
- *
- * TODO: implement optimized primitives instead, or leave the generic
- * implementation in place, or pick the atomic_xchg() based generic
- * implementation. (see asm-generic/mutex-xchg.h for details)
- */
-
-#include <asm-generic/mutex-dec.h>
diff --git a/arch/arc/include/asm/mutex.h b/arch/arc/include/asm/mutex.h
deleted file mode 100644
index a2f88ff9f506..000000000000
--- a/arch/arc/include/asm/mutex.h
+++ /dev/null
@@ -1,18 +0,0 @@
-/*
- * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-/*
- * xchg() based mutex fast path maintains a state of 0 or 1, as opposed to
- * atomic dec based which can "count" any number of lock contenders.
- * This ideally needs to be fixed in core, but for now switching to dec ver.
- */
-#if defined(CONFIG_SMP) && (CONFIG_NR_CPUS > 2)
-#include <asm-generic/mutex-dec.h>
-#else
-#include <asm-generic/mutex-xchg.h>
-#endif
diff --git a/arch/arm/include/asm/mutex.h b/arch/arm/include/asm/mutex.h
deleted file mode 100644
index 87c044910fe0..000000000000
--- a/arch/arm/include/asm/mutex.h
+++ /dev/null
@@ -1,21 +0,0 @@
-/*
- * arch/arm/include/asm/mutex.h
- *
- * ARM optimized mutex locking primitives
- *
- * Please look into asm-generic/mutex-xchg.h for a formal definition.
- */
-#ifndef _ASM_MUTEX_H
-#define _ASM_MUTEX_H
-/*
- * On pre-ARMv6 hardware this results in a swp-based implementation,
- * which is the most efficient. For ARMv6+, we have exclusive memory
- * accessors and use atomic_dec to avoid the extra xchg operations
- * on the locking slowpaths.
- */
-#if __LINUX_ARM_ARCH__ < 6
-#include <asm-generic/mutex-xchg.h>
-#else
-#include <asm-generic/mutex-dec.h>
-#endif
-#endif	/* _ASM_MUTEX_H */
diff --git a/arch/arm64/include/asm/Kbuild b/arch/arm64/include/asm/Kbuild
index 44e1d7f10add..b4ab238a59ec 100644
--- a/arch/arm64/include/asm/Kbuild
+++ b/arch/arm64/include/asm/Kbuild
@@ -24,7 +24,6 @@ generic-y += mm-arch-hooks.h
 generic-y += mman.h
 generic-y += msgbuf.h
 generic-y += msi.h
-generic-y += mutex.h
 generic-y += poll.h
 generic-y += preempt.h
 generic-y += resource.h
diff --git a/arch/avr32/include/asm/mutex.h b/arch/avr32/include/asm/mutex.h
deleted file mode 100644
index 458c1f7fbc18..000000000000
--- a/arch/avr32/include/asm/mutex.h
+++ /dev/null
@@ -1,9 +0,0 @@
-/*
- * Pull in the generic implementation for the mutex fastpath.
- *
- * TODO: implement optimized primitives instead, or leave the generic
- * implementation in place, or pick the atomic_xchg() based generic
- * implementation. (see asm-generic/mutex-xchg.h for details)
- */
-
-#include <asm-generic/mutex-dec.h>
diff --git a/arch/blackfin/include/asm/Kbuild b/arch/blackfin/include/asm/Kbuild
index 91d49c0a3118..2fb67b59d188 100644
--- a/arch/blackfin/include/asm/Kbuild
+++ b/arch/blackfin/include/asm/Kbuild
@@ -24,7 +24,6 @@ generic-y += mcs_spinlock.h
 generic-y += mm-arch-hooks.h
 generic-y += mman.h
 generic-y += msgbuf.h
-generic-y += mutex.h
 generic-y += param.h
 generic-y += percpu.h
 generic-y += pgalloc.h
diff --git a/arch/c6x/include/asm/mutex.h b/arch/c6x/include/asm/mutex.h
deleted file mode 100644
index 7a7248e0462d..000000000000
--- a/arch/c6x/include/asm/mutex.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef _ASM_C6X_MUTEX_H
-#define _ASM_C6X_MUTEX_H
-
-#include <asm-generic/mutex-null.h>
-
-#endif /* _ASM_C6X_MUTEX_H */
diff --git a/arch/cris/include/asm/mutex.h b/arch/cris/include/asm/mutex.h
deleted file mode 100644
index 458c1f7fbc18..000000000000
--- a/arch/cris/include/asm/mutex.h
+++ /dev/null
@@ -1,9 +0,0 @@
-/*
- * Pull in the generic implementation for the mutex fastpath.
- *
- * TODO: implement optimized primitives instead, or leave the generic
- * implementation in place, or pick the atomic_xchg() based generic
- * implementation. (see asm-generic/mutex-xchg.h for details)
- */
-
-#include <asm-generic/mutex-dec.h>
diff --git a/arch/frv/include/asm/mutex.h b/arch/frv/include/asm/mutex.h
deleted file mode 100644
index 458c1f7fbc18..000000000000
--- a/arch/frv/include/asm/mutex.h
+++ /dev/null
@@ -1,9 +0,0 @@
-/*
- * Pull in the generic implementation for the mutex fastpath.
- *
- * TODO: implement optimized primitives instead, or leave the generic
- * implementation in place, or pick the atomic_xchg() based generic
- * implementation. (see asm-generic/mutex-xchg.h for details)
- */
-
-#include <asm-generic/mutex-dec.h>
diff --git a/arch/h8300/include/asm/mutex.h b/arch/h8300/include/asm/mutex.h
deleted file mode 100644
index 458c1f7fbc18..000000000000
--- a/arch/h8300/include/asm/mutex.h
+++ /dev/null
@@ -1,9 +0,0 @@
-/*
- * Pull in the generic implementation for the mutex fastpath.
- *
- * TODO: implement optimized primitives instead, or leave the generic
- * implementation in place, or pick the atomic_xchg() based generic
- * implementation. (see asm-generic/mutex-xchg.h for details)
- */
-
-#include <asm-generic/mutex-dec.h>
diff --git a/arch/hexagon/include/asm/mutex.h b/arch/hexagon/include/asm/mutex.h
deleted file mode 100644
index 58b52de1bc22..000000000000
--- a/arch/hexagon/include/asm/mutex.h
+++ /dev/null
@@ -1,8 +0,0 @@
-/*
- * Pull in the generic implementation for the mutex fastpath.
- *
- * TODO: implement optimized primitives instead, or leave the generic
- * implementation in place, or pick the atomic_xchg() based generic
- * implementation. (see asm-generic/mutex-xchg.h for details)
- */
-#include <asm-generic/mutex-xchg.h>
diff --git a/arch/ia64/include/asm/mutex.h b/arch/ia64/include/asm/mutex.h
deleted file mode 100644
index 28cb819e0ff9..000000000000
--- a/arch/ia64/include/asm/mutex.h
+++ /dev/null
@@ -1,90 +0,0 @@
-/*
- * ia64 implementation of the mutex fastpath.
- *
- * Copyright (C) 2006 Ken Chen <kenneth.w.chen@intel.com>
- *
- */
-
-#ifndef _ASM_MUTEX_H
-#define _ASM_MUTEX_H
-
-/**
- *  __mutex_fastpath_lock - try to take the lock by moving the count
- *                          from 1 to a 0 value
- *  @count: pointer of type atomic_t
- *  @fail_fn: function to call if the original value was not 1
- *
- * Change the count from 1 to a value lower than 1, and call <fail_fn> if
- * it wasn't 1 originally. This function MUST leave the value lower than
- * 1 even when the "1" assertion wasn't true.
- */
-static inline void
-__mutex_fastpath_lock(atomic_t *count, void (*fail_fn)(atomic_t *))
-{
-	if (unlikely(ia64_fetchadd4_acq(count, -1) != 1))
-		fail_fn(count);
-}
-
-/**
- *  __mutex_fastpath_lock_retval - try to take the lock by moving the count
- *                                 from 1 to a 0 value
- *  @count: pointer of type atomic_t
- *
- * Change the count from 1 to a value lower than 1. This function returns 0
- * if the fastpath succeeds, or -1 otherwise.
- */
-static inline int
-__mutex_fastpath_lock_retval(atomic_t *count)
-{
-	if (unlikely(ia64_fetchadd4_acq(count, -1) != 1))
-		return -1;
-	return 0;
-}
-
-/**
- *  __mutex_fastpath_unlock - try to promote the count from 0 to 1
- *  @count: pointer of type atomic_t
- *  @fail_fn: function to call if the original value was not 0
- *
- * Try to promote the count from 0 to 1. If it wasn't 0, call <fail_fn>.
- * In the failure case, this function is allowed to either set the value to
- * 1, or to set it to a value lower than 1.
- *
- * If the implementation sets it to a value of lower than 1, then the
- * __mutex_slowpath_needs_to_unlock() macro needs to return 1, it needs
- * to return 0 otherwise.
- */
-static inline void
-__mutex_fastpath_unlock(atomic_t *count, void (*fail_fn)(atomic_t *))
-{
-	int ret = ia64_fetchadd4_rel(count, 1);
-	if (unlikely(ret < 0))
-		fail_fn(count);
-}
-
-#define __mutex_slowpath_needs_to_unlock()		1
-
-/**
- * __mutex_fastpath_trylock - try to acquire the mutex, without waiting
- *
- *  @count: pointer of type atomic_t
- *  @fail_fn: fallback function
- *
- * Change the count from 1 to a value lower than 1, and return 0 (failure)
- * if it wasn't 1 originally, or return 1 (success) otherwise. This function
- * MUST leave the value lower than 1 even when the "1" assertion wasn't true.
- * Additionally, if the value was < 0 originally, this function must not leave
- * it to 0 on failure.
- *
- * If the architecture has no effective trylock variant, it should call the
- * <fail_fn> spinlock-based trylock variant unconditionally.
- */
-static inline int
-__mutex_fastpath_trylock(atomic_t *count, int (*fail_fn)(atomic_t *))
-{
-	if (atomic_read(count) == 1 && cmpxchg_acq(count, 1, 0) == 1)
-		return 1;
-	return 0;
-}
-
-#endif
diff --git a/arch/m32r/include/asm/mutex.h b/arch/m32r/include/asm/mutex.h
deleted file mode 100644
index 458c1f7fbc18..000000000000
--- a/arch/m32r/include/asm/mutex.h
+++ /dev/null
@@ -1,9 +0,0 @@
-/*
- * Pull in the generic implementation for the mutex fastpath.
- *
- * TODO: implement optimized primitives instead, or leave the generic
- * implementation in place, or pick the atomic_xchg() based generic
- * implementation. (see asm-generic/mutex-xchg.h for details)
- */
-
-#include <asm-generic/mutex-dec.h>
diff --git a/arch/m68k/include/asm/Kbuild b/arch/m68k/include/asm/Kbuild
index eb85bd9c6180..1f2e5d31cb24 100644
--- a/arch/m68k/include/asm/Kbuild
+++ b/arch/m68k/include/asm/Kbuild
@@ -20,7 +20,6 @@ generic-y += local64.h
 generic-y += mcs_spinlock.h
 generic-y += mm-arch-hooks.h
 generic-y += mman.h
-generic-y += mutex.h
 generic-y += percpu.h
 generic-y += preempt.h
 generic-y += resource.h
diff --git a/arch/metag/include/asm/Kbuild b/arch/metag/include/asm/Kbuild
index 29acb89daaaa..167150c701d1 100644
--- a/arch/metag/include/asm/Kbuild
+++ b/arch/metag/include/asm/Kbuild
@@ -27,7 +27,6 @@ generic-y += local64.h
 generic-y += mcs_spinlock.h
 generic-y += mm-arch-hooks.h
 generic-y += msgbuf.h
-generic-y += mutex.h
 generic-y += param.h
 generic-y += pci.h
 generic-y += percpu.h
diff --git a/arch/microblaze/include/asm/mutex.h b/arch/microblaze/include/asm/mutex.h
deleted file mode 100644
index ff6101aa2c71..000000000000
--- a/arch/microblaze/include/asm/mutex.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/mutex-dec.h>
diff --git a/arch/mips/include/asm/Kbuild b/arch/mips/include/asm/Kbuild
index 9740066cc631..3269b742a75e 100644
--- a/arch/mips/include/asm/Kbuild
+++ b/arch/mips/include/asm/Kbuild
@@ -9,7 +9,6 @@ generic-y += irq_work.h
 generic-y += local64.h
 generic-y += mcs_spinlock.h
 generic-y += mm-arch-hooks.h
-generic-y += mutex.h
 generic-y += parport.h
 generic-y += percpu.h
 generic-y += preempt.h
diff --git a/arch/mn10300/include/asm/mutex.h b/arch/mn10300/include/asm/mutex.h
deleted file mode 100644
index 84f5490c6fb4..000000000000
--- a/arch/mn10300/include/asm/mutex.h
+++ /dev/null
@@ -1,16 +0,0 @@
-/* MN10300 Mutex fastpath
- *
- * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
- * Written by David Howells (dhowells@redhat.com)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public Licence
- * as published by the Free Software Foundation; either version
- * 2 of the Licence, or (at your option) any later version.
- *
- *
- * TODO: implement optimized primitives instead, or leave the generic
- * implementation in place, or pick the atomic_xchg() based generic
- * implementation. (see asm-generic/mutex-xchg.h for details)
- */
-#include <asm-generic/mutex-null.h>
diff --git a/arch/nios2/include/asm/mutex.h b/arch/nios2/include/asm/mutex.h
deleted file mode 100644
index ff6101aa2c71..000000000000
--- a/arch/nios2/include/asm/mutex.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/mutex-dec.h>
diff --git a/arch/openrisc/include/asm/mutex.h b/arch/openrisc/include/asm/mutex.h
deleted file mode 100644
index b85a0cfa9fc9..000000000000
--- a/arch/openrisc/include/asm/mutex.h
+++ /dev/null
@@ -1,27 +0,0 @@
-/*
- * OpenRISC Linux
- *
- * Linux architectural port borrowing liberally from similar works of
- * others.  All original copyrights apply as per the original source
- * declaration.
- *
- * OpenRISC implementation:
- * Copyright (C) 2003 Matjaz Breskvar <phoenix@bsemi.com>
- * Copyright (C) 2010-2011 Jonas Bonn <jonas@southpole.se>
- * et al.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- */
-
-/*
- * Pull in the generic implementation for the mutex fastpath.
- *
- * TODO: implement optimized primitives instead, or leave the generic
- * implementation in place, or pick the atomic_xchg() based generic
- * implementation. (see asm-generic/mutex-xchg.h for details)
- */
-
-#include <asm-generic/mutex-dec.h>
diff --git a/arch/parisc/include/asm/Kbuild b/arch/parisc/include/asm/Kbuild
index f9b3a81aefcd..91f53c07f410 100644
--- a/arch/parisc/include/asm/Kbuild
+++ b/arch/parisc/include/asm/Kbuild
@@ -16,7 +16,6 @@ generic-y += local.h
 generic-y += local64.h
 generic-y += mcs_spinlock.h
 generic-y += mm-arch-hooks.h
-generic-y += mutex.h
 generic-y += param.h
 generic-y += percpu.h
 generic-y += poll.h
diff --git a/arch/powerpc/include/asm/mutex.h b/arch/powerpc/include/asm/mutex.h
deleted file mode 100644
index 078155fa1189..000000000000
--- a/arch/powerpc/include/asm/mutex.h
+++ /dev/null
@@ -1,132 +0,0 @@
-/*
- * Optimised mutex implementation of include/asm-generic/mutex-dec.h algorithm
- */
-#ifndef _ASM_POWERPC_MUTEX_H
-#define _ASM_POWERPC_MUTEX_H
-
-static inline int __mutex_cmpxchg_lock(atomic_t *v, int old, int new)
-{
-	int t;
-
-	__asm__ __volatile__ (
-"1:	lwarx	%0,0,%1		# mutex trylock\n\
-	cmpw	0,%0,%2\n\
-	bne-	2f\n"
-	PPC405_ERR77(0,%1)
-"	stwcx.	%3,0,%1\n\
-	bne-	1b"
-	PPC_ACQUIRE_BARRIER
-	"\n\
-2:"
-	: "=&r" (t)
-	: "r" (&v->counter), "r" (old), "r" (new)
-	: "cc", "memory");
-
-	return t;
-}
-
-static inline int __mutex_dec_return_lock(atomic_t *v)
-{
-	int t;
-
-	__asm__ __volatile__(
-"1:	lwarx	%0,0,%1		# mutex lock\n\
-	addic	%0,%0,-1\n"
-	PPC405_ERR77(0,%1)
-"	stwcx.	%0,0,%1\n\
-	bne-	1b"
-	PPC_ACQUIRE_BARRIER
-	: "=&r" (t)
-	: "r" (&v->counter)
-	: "cc", "memory");
-
-	return t;
-}
-
-static inline int __mutex_inc_return_unlock(atomic_t *v)
-{
-	int t;
-
-	__asm__ __volatile__(
-	PPC_RELEASE_BARRIER
-"1:	lwarx	%0,0,%1		# mutex unlock\n\
-	addic	%0,%0,1\n"
-	PPC405_ERR77(0,%1)
-"	stwcx.	%0,0,%1 \n\
-	bne-	1b"
-	: "=&r" (t)
-	: "r" (&v->counter)
-	: "cc", "memory");
-
-	return t;
-}
-
-/**
- *  __mutex_fastpath_lock - try to take the lock by moving the count
- *                          from 1 to a 0 value
- *  @count: pointer of type atomic_t
- *  @fail_fn: function to call if the original value was not 1
- *
- * Change the count from 1 to a value lower than 1, and call <fail_fn> if
- * it wasn't 1 originally. This function MUST leave the value lower than
- * 1 even when the "1" assertion wasn't true.
- */
-static inline void
-__mutex_fastpath_lock(atomic_t *count, void (*fail_fn)(atomic_t *))
-{
-	if (unlikely(__mutex_dec_return_lock(count) < 0))
-		fail_fn(count);
-}
-
-/**
- *  __mutex_fastpath_lock_retval - try to take the lock by moving the count
- *                                 from 1 to a 0 value
- *  @count: pointer of type atomic_t
- *
- * Change the count from 1 to a value lower than 1. This function returns 0
- * if the fastpath succeeds, or -1 otherwise.
- */
-static inline int
-__mutex_fastpath_lock_retval(atomic_t *count)
-{
-	if (unlikely(__mutex_dec_return_lock(count) < 0))
-		return -1;
-	return 0;
-}
-
-/**
- *  __mutex_fastpath_unlock - try to promote the count from 0 to 1
- *  @count: pointer of type atomic_t
- *  @fail_fn: function to call if the original value was not 0
- *
- * Try to promote the count from 0 to 1. If it wasn't 0, call <fail_fn>.
- * In the failure case, this function is allowed to either set the value to
- * 1, or to set it to a value lower than 1.
- */
-static inline void
-__mutex_fastpath_unlock(atomic_t *count, void (*fail_fn)(atomic_t *))
-{
-	if (unlikely(__mutex_inc_return_unlock(count) <= 0))
-		fail_fn(count);
-}
-
-#define __mutex_slowpath_needs_to_unlock()		1
-
-/**
- * __mutex_fastpath_trylock - try to acquire the mutex, without waiting
- *
- *  @count: pointer of type atomic_t
- *  @fail_fn: fallback function
- *
- * Change the count from 1 to 0, and return 1 (success), or if the count
- * was not 1, then return 0 (failure).
- */
-static inline int
-__mutex_fastpath_trylock(atomic_t *count, int (*fail_fn)(atomic_t *))
-{
-	if (likely(atomic_read(count) == 1 && __mutex_cmpxchg_lock(count, 1, 0) == 1))
-		return 1;
-	return 0;
-}
-
-#endif
diff --git a/arch/s390/include/asm/mutex.h b/arch/s390/include/asm/mutex.h
deleted file mode 100644
index 458c1f7fbc18..000000000000
--- a/arch/s390/include/asm/mutex.h
+++ /dev/null
@@ -1,9 +0,0 @@
-/*
- * Pull in the generic implementation for the mutex fastpath.
- *
- * TODO: implement optimized primitives instead, or leave the generic
- * implementation in place, or pick the atomic_xchg() based generic
- * implementation. (see asm-generic/mutex-xchg.h for details)
- */
-
-#include <asm-generic/mutex-dec.h>
diff --git a/arch/score/include/asm/mutex.h b/arch/score/include/asm/mutex.h
deleted file mode 100644
index 10d48fe4db97..000000000000
--- a/arch/score/include/asm/mutex.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef _ASM_SCORE_MUTEX_H
-#define _ASM_SCORE_MUTEX_H
-
-#include <asm-generic/mutex-dec.h>
-
-#endif /* _ASM_SCORE_MUTEX_H */
diff --git a/arch/sh/include/asm/mutex-llsc.h b/arch/sh/include/asm/mutex-llsc.h
deleted file mode 100644
index dad29b687bd3..000000000000
--- a/arch/sh/include/asm/mutex-llsc.h
+++ /dev/null
@@ -1,109 +0,0 @@
-/*
- * arch/sh/include/asm/mutex-llsc.h
- *
- * SH-4A optimized mutex locking primitives
- *
- * Please look into asm-generic/mutex-xchg.h for a formal definition.
- */
-#ifndef __ASM_SH_MUTEX_LLSC_H
-#define __ASM_SH_MUTEX_LLSC_H
-
-/*
- * Attempting to lock a mutex on SH4A is done like in ARMv6+ architecure.
- * with a bastardized atomic decrement (it is not a reliable atomic decrement
- * but it satisfies the defined semantics for our purpose, while being
- * smaller and faster than a real atomic decrement or atomic swap.
- * The idea is to attempt  decrementing the lock value only once. If once
- * decremented it isn't zero, or if its store-back fails due to a dispute
- * on the exclusive store, we simply bail out immediately through the slow
- * path where the lock will be reattempted until it succeeds.
- */
-static inline void
-__mutex_fastpath_lock(atomic_t *count, void (*fail_fn)(atomic_t *))
-{
-	int __done, __res;
-
-	__asm__ __volatile__ (
-		"movli.l	@%2, %0	\n"
-		"add		#-1, %0	\n"
-		"movco.l	%0, @%2	\n"
-		"movt		%1	\n"
-		: "=&z" (__res), "=&r" (__done)
-		: "r" (&(count)->counter)
-		: "t");
-
-	if (unlikely(!__done || __res != 0))
-		fail_fn(count);
-}
-
-static inline int
-__mutex_fastpath_lock_retval(atomic_t *count)
-{
-	int __done, __res;
-
-	__asm__ __volatile__ (
-		"movli.l	@%2, %0	\n"
-		"add		#-1, %0	\n"
-		"movco.l	%0, @%2	\n"
-		"movt		%1	\n"
-		: "=&z" (__res), "=&r" (__done)
-		: "r" (&(count)->counter)
-		: "t");
-
-	if (unlikely(!__done || __res != 0))
-		__res = -1;
-
-	return __res;
-}
-
-static inline void
-__mutex_fastpath_unlock(atomic_t *count, void (*fail_fn)(atomic_t *))
-{
-	int __done, __res;
-
-	__asm__ __volatile__ (
-		"movli.l	@%2, %0	\n\t"
-		"add		#1, %0	\n\t"
-		"movco.l	%0, @%2 \n\t"
-		"movt		%1	\n\t"
-		: "=&z" (__res), "=&r" (__done)
-		: "r" (&(count)->counter)
-		: "t");
-
-	if (unlikely(!__done || __res <= 0))
-		fail_fn(count);
-}
-
-/*
- * If the unlock was done on a contended lock, or if the unlock simply fails
- * then the mutex remains locked.
- */
-#define __mutex_slowpath_needs_to_unlock()	1
-
-/*
- * For __mutex_fastpath_trylock we do an atomic decrement and check the
- * result and put it in the __res variable.
- */
-static inline int
-__mutex_fastpath_trylock(atomic_t *count, int (*fail_fn)(atomic_t *))
-{
-	int __res, __orig;
-
-	__asm__ __volatile__ (
-		"1: movli.l	@%2, %0		\n\t"
-		"dt		%0		\n\t"
-		"movco.l	%0,@%2		\n\t"
-		"bf		1b		\n\t"
-		"cmp/eq		#0,%0		\n\t"
-		"bt		2f		\n\t"
-		"mov		#0, %1		\n\t"
-		"bf		3f		\n\t"
-		"2: mov		#1, %1		\n\t"
-		"3:				"
-		: "=&z" (__orig), "=&r" (__res)
-		: "r" (&count->counter)
-		: "t");
-
-	return __res;
-}
-#endif /* __ASM_SH_MUTEX_LLSC_H */
diff --git a/arch/sh/include/asm/mutex.h b/arch/sh/include/asm/mutex.h
deleted file mode 100644
index d8e37716a4a0..000000000000
--- a/arch/sh/include/asm/mutex.h
+++ /dev/null
@@ -1,12 +0,0 @@
-/*
- * Pull in the generic implementation for the mutex fastpath.
- *
- * TODO: implement optimized primitives instead, or leave the generic
- * implementation in place, or pick the atomic_xchg() based generic
- * implementation. (see asm-generic/mutex-xchg.h for details)
- */
-#if defined(CONFIG_CPU_SH4A)
-#include <asm/mutex-llsc.h>
-#else
-#include <asm-generic/mutex-dec.h>
-#endif
diff --git a/arch/sparc/include/asm/Kbuild b/arch/sparc/include/asm/Kbuild
index cfc918067f80..0569bfac4afb 100644
--- a/arch/sparc/include/asm/Kbuild
+++ b/arch/sparc/include/asm/Kbuild
@@ -15,7 +15,6 @@ generic-y += local64.h
 generic-y += mcs_spinlock.h
 generic-y += mm-arch-hooks.h
 generic-y += module.h
-generic-y += mutex.h
 generic-y += preempt.h
 generic-y += rwsem.h
 generic-y += serial.h
diff --git a/arch/tile/include/asm/Kbuild b/arch/tile/include/asm/Kbuild
index ba35c41c71ff..2d1f5638974c 100644
--- a/arch/tile/include/asm/Kbuild
+++ b/arch/tile/include/asm/Kbuild
@@ -21,7 +21,6 @@ generic-y += local64.h
 generic-y += mcs_spinlock.h
 generic-y += mm-arch-hooks.h
 generic-y += msgbuf.h
-generic-y += mutex.h
 generic-y += param.h
 generic-y += parport.h
 generic-y += poll.h
diff --git a/arch/um/include/asm/Kbuild b/arch/um/include/asm/Kbuild
index 904f3ebf4220..052f7f6d0551 100644
--- a/arch/um/include/asm/Kbuild
+++ b/arch/um/include/asm/Kbuild
@@ -17,7 +17,6 @@ generic-y += irq_work.h
 generic-y += kdebug.h
 generic-y += mcs_spinlock.h
 generic-y += mm-arch-hooks.h
-generic-y += mutex.h
 generic-y += param.h
 generic-y += pci.h
 generic-y += percpu.h
diff --git a/arch/unicore32/include/asm/mutex.h b/arch/unicore32/include/asm/mutex.h
deleted file mode 100644
index fab7d0e8adf6..000000000000
--- a/arch/unicore32/include/asm/mutex.h
+++ /dev/null
@@ -1,20 +0,0 @@
-/*
- * linux/arch/unicore32/include/asm/mutex.h
- *
- * Code specific to PKUnity SoC and UniCore ISA
- *
- * Copyright (C) 2001-2010 GUAN Xue-tao
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * UniCore optimized mutex locking primitives
- *
- * Please look into asm-generic/mutex-xchg.h for a formal definition.
- */
-#ifndef __UNICORE_MUTEX_H__
-#define __UNICORE_MUTEX_H__
-
-# include <asm-generic/mutex-xchg.h>
-#endif
diff --git a/arch/x86/include/asm/mutex.h b/arch/x86/include/asm/mutex.h
deleted file mode 100644
index 7d3a48275394..000000000000
--- a/arch/x86/include/asm/mutex.h
+++ /dev/null
@@ -1,5 +0,0 @@
-#ifdef CONFIG_X86_32
-# include <asm/mutex_32.h>
-#else
-# include <asm/mutex_64.h>
-#endif
diff --git a/arch/x86/include/asm/mutex_32.h b/arch/x86/include/asm/mutex_32.h
deleted file mode 100644
index e9355a84fc67..000000000000
--- a/arch/x86/include/asm/mutex_32.h
+++ /dev/null
@@ -1,110 +0,0 @@
-/*
- * Assembly implementation of the mutex fastpath, based on atomic
- * decrement/increment.
- *
- * started by Ingo Molnar:
- *
- *  Copyright (C) 2004, 2005, 2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
- */
-#ifndef _ASM_X86_MUTEX_32_H
-#define _ASM_X86_MUTEX_32_H
-
-#include <asm/alternative.h>
-
-/**
- *  __mutex_fastpath_lock - try to take the lock by moving the count
- *                          from 1 to a 0 value
- *  @count: pointer of type atomic_t
- *  @fn: function to call if the original value was not 1
- *
- * Change the count from 1 to a value lower than 1, and call <fn> if it
- * wasn't 1 originally. This function MUST leave the value lower than 1
- * even when the "1" assertion wasn't true.
- */
-#define __mutex_fastpath_lock(count, fail_fn)			\
-do {								\
-	unsigned int dummy;					\
-								\
-	typecheck(atomic_t *, count);				\
-	typecheck_fn(void (*)(atomic_t *), fail_fn);		\
-								\
-	asm volatile(LOCK_PREFIX "   decl (%%eax)\n"		\
-		     "   jns 1f	\n"				\
-		     "   call " #fail_fn "\n"			\
-		     "1:\n"					\
-		     : "=a" (dummy)				\
-		     : "a" (count)				\
-		     : "memory", "ecx", "edx");			\
-} while (0)
-
-
-/**
- *  __mutex_fastpath_lock_retval - try to take the lock by moving the count
- *                                 from 1 to a 0 value
- *  @count: pointer of type atomic_t
- *
- * Change the count from 1 to a value lower than 1. This function returns 0
- * if the fastpath succeeds, or -1 otherwise.
- */
-static inline int __mutex_fastpath_lock_retval(atomic_t *count)
-{
-	if (unlikely(atomic_dec_return(count) < 0))
-		return -1;
-	else
-		return 0;
-}
-
-/**
- *  __mutex_fastpath_unlock - try to promote the mutex from 0 to 1
- *  @count: pointer of type atomic_t
- *  @fail_fn: function to call if the original value was not 0
- *
- * try to promote the mutex from 0 to 1. if it wasn't 0, call <fail_fn>.
- * In the failure case, this function is allowed to either set the value
- * to 1, or to set it to a value lower than 1.
- *
- * If the implementation sets it to a value of lower than 1, the
- * __mutex_slowpath_needs_to_unlock() macro needs to return 1, it needs
- * to return 0 otherwise.
- */
-#define __mutex_fastpath_unlock(count, fail_fn)			\
-do {								\
-	unsigned int dummy;					\
-								\
-	typecheck(atomic_t *, count);				\
-	typecheck_fn(void (*)(atomic_t *), fail_fn);		\
-								\
-	asm volatile(LOCK_PREFIX "   incl (%%eax)\n"		\
-		     "   jg	1f\n"				\
-		     "   call " #fail_fn "\n"			\
-		     "1:\n"					\
-		     : "=a" (dummy)				\
-		     : "a" (count)				\
-		     : "memory", "ecx", "edx");			\
-} while (0)
-
-#define __mutex_slowpath_needs_to_unlock()	1
-
-/**
- * __mutex_fastpath_trylock - try to acquire the mutex, without waiting
- *
- *  @count: pointer of type atomic_t
- *  @fail_fn: fallback function
- *
- * Change the count from 1 to a value lower than 1, and return 0 (failure)
- * if it wasn't 1 originally, or return 1 (success) otherwise. This function
- * MUST leave the value lower than 1 even when the "1" assertion wasn't true.
- * Additionally, if the value was < 0 originally, this function must not leave
- * it to 0 on failure.
- */
-static inline int __mutex_fastpath_trylock(atomic_t *count,
-					   int (*fail_fn)(atomic_t *))
-{
-	/* cmpxchg because it never induces a false contention state. */
-	if (likely(atomic_read(count) == 1 && atomic_cmpxchg(count, 1, 0) == 1))
-		return 1;
-
-	return 0;
-}
-
-#endif /* _ASM_X86_MUTEX_32_H */
diff --git a/arch/x86/include/asm/mutex_64.h b/arch/x86/include/asm/mutex_64.h
deleted file mode 100644
index d9850758464e..000000000000
--- a/arch/x86/include/asm/mutex_64.h
+++ /dev/null
@@ -1,127 +0,0 @@
-/*
- * Assembly implementation of the mutex fastpath, based on atomic
- * decrement/increment.
- *
- * started by Ingo Molnar:
- *
- *  Copyright (C) 2004, 2005, 2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
- */
-#ifndef _ASM_X86_MUTEX_64_H
-#define _ASM_X86_MUTEX_64_H
-
-/**
- * __mutex_fastpath_lock - decrement and call function if negative
- * @v: pointer of type atomic_t
- * @fail_fn: function to call if the result is negative
- *
- * Atomically decrements @v and calls <fail_fn> if the result is negative.
- */
-#ifdef CC_HAVE_ASM_GOTO
-static inline void __mutex_fastpath_lock(atomic_t *v,
-					 void (*fail_fn)(atomic_t *))
-{
-	asm_volatile_goto(LOCK_PREFIX "   decl %0\n"
-			  "   jns %l[exit]\n"
-			  : : "m" (v->counter)
-			  : "memory", "cc"
-			  : exit);
-	fail_fn(v);
-exit:
-	return;
-}
-#else
-#define __mutex_fastpath_lock(v, fail_fn)			\
-do {								\
-	unsigned long dummy;					\
-								\
-	typecheck(atomic_t *, v);				\
-	typecheck_fn(void (*)(atomic_t *), fail_fn);		\
-								\
-	asm volatile(LOCK_PREFIX "   decl (%%rdi)\n"		\
-		     "   jns 1f		\n"			\
-		     "   call " #fail_fn "\n"			\
-		     "1:"					\
-		     : "=D" (dummy)				\
-		     : "D" (v)					\
-		     : "rax", "rsi", "rdx", "rcx",		\
-		       "r8", "r9", "r10", "r11", "memory");	\
-} while (0)
-#endif
-
-/**
- *  __mutex_fastpath_lock_retval - try to take the lock by moving the count
- *                                 from 1 to a 0 value
- *  @count: pointer of type atomic_t
- *
- * Change the count from 1 to a value lower than 1. This function returns 0
- * if the fastpath succeeds, or -1 otherwise.
- */
-static inline int __mutex_fastpath_lock_retval(atomic_t *count)
-{
-	if (unlikely(atomic_dec_return(count) < 0))
-		return -1;
-	else
-		return 0;
-}
-
-/**
- * __mutex_fastpath_unlock - increment and call function if nonpositive
- * @v: pointer of type atomic_t
- * @fail_fn: function to call if the result is nonpositive
- *
- * Atomically increments @v and calls <fail_fn> if the result is nonpositive.
- */
-#ifdef CC_HAVE_ASM_GOTO
-static inline void __mutex_fastpath_unlock(atomic_t *v,
-					   void (*fail_fn)(atomic_t *))
-{
-	asm_volatile_goto(LOCK_PREFIX "   incl %0\n"
-			  "   jg %l[exit]\n"
-			  : : "m" (v->counter)
-			  : "memory", "cc"
-			  : exit);
-	fail_fn(v);
-exit:
-	return;
-}
-#else
-#define __mutex_fastpath_unlock(v, fail_fn)			\
-do {								\
-	unsigned long dummy;					\
-								\
-	typecheck(atomic_t *, v);				\
-	typecheck_fn(void (*)(atomic_t *), fail_fn);		\
-								\
-	asm volatile(LOCK_PREFIX "   incl (%%rdi)\n"		\
-		     "   jg 1f\n"				\
-		     "   call " #fail_fn "\n"			\
-		     "1:"					\
-		     : "=D" (dummy)				\
-		     : "D" (v)					\
-		     : "rax", "rsi", "rdx", "rcx",		\
-		       "r8", "r9", "r10", "r11", "memory");	\
-} while (0)
-#endif
-
-#define __mutex_slowpath_needs_to_unlock()	1
-
-/**
- * __mutex_fastpath_trylock - try to acquire the mutex, without waiting
- *
- *  @count: pointer of type atomic_t
- *  @fail_fn: fallback function
- *
- * Change the count from 1 to 0 and return 1 (success), or return 0 (failure)
- * if it wasn't 1 originally. [the fallback function is never used on
- * x86_64, because all x86_64 CPUs have a CMPXCHG instruction.]
- */
-static inline int __mutex_fastpath_trylock(atomic_t *count,
-					   int (*fail_fn)(atomic_t *))
-{
-	if (likely(atomic_read(count) == 1 && atomic_cmpxchg(count, 1, 0) == 1))
-		return 1;
-
-	return 0;
-}
-
-#endif /* _ASM_X86_MUTEX_64_H */
diff --git a/arch/xtensa/include/asm/mutex.h b/arch/xtensa/include/asm/mutex.h
deleted file mode 100644
index 458c1f7fbc18..000000000000
--- a/arch/xtensa/include/asm/mutex.h
+++ /dev/null
@@ -1,9 +0,0 @@
-/*
- * Pull in the generic implementation for the mutex fastpath.
- *
- * TODO: implement optimized primitives instead, or leave the generic
- * implementation in place, or pick the atomic_xchg() based generic
- * implementation. (see asm-generic/mutex-xchg.h for details)
- */
-
-#include <asm-generic/mutex-dec.h>
diff --git a/include/asm-generic/mutex-dec.h b/include/asm-generic/mutex-dec.h
deleted file mode 100644
index c54829d3de37..000000000000
--- a/include/asm-generic/mutex-dec.h
+++ /dev/null
@@ -1,88 +0,0 @@
-/*
- * include/asm-generic/mutex-dec.h
- *
- * Generic implementation of the mutex fastpath, based on atomic
- * decrement/increment.
- */
-#ifndef _ASM_GENERIC_MUTEX_DEC_H
-#define _ASM_GENERIC_MUTEX_DEC_H
-
-/**
- *  __mutex_fastpath_lock - try to take the lock by moving the count
- *                          from 1 to a 0 value
- *  @count: pointer of type atomic_t
- *  @fail_fn: function to call if the original value was not 1
- *
- * Change the count from 1 to a value lower than 1, and call <fail_fn> if
- * it wasn't 1 originally. This function MUST leave the value lower than
- * 1 even when the "1" assertion wasn't true.
- */
-static inline void
-__mutex_fastpath_lock(atomic_t *count, void (*fail_fn)(atomic_t *))
-{
-	if (unlikely(atomic_dec_return_acquire(count) < 0))
-		fail_fn(count);
-}
-
-/**
- *  __mutex_fastpath_lock_retval - try to take the lock by moving the count
- *                                 from 1 to a 0 value
- *  @count: pointer of type atomic_t
- *
- * Change the count from 1 to a value lower than 1. This function returns 0
- * if the fastpath succeeds, or -1 otherwise.
- */
-static inline int
-__mutex_fastpath_lock_retval(atomic_t *count)
-{
-	if (unlikely(atomic_dec_return_acquire(count) < 0))
-		return -1;
-	return 0;
-}
-
-/**
- *  __mutex_fastpath_unlock - try to promote the count from 0 to 1
- *  @count: pointer of type atomic_t
- *  @fail_fn: function to call if the original value was not 0
- *
- * Try to promote the count from 0 to 1. If it wasn't 0, call <fail_fn>.
- * In the failure case, this function is allowed to either set the value to
- * 1, or to set it to a value lower than 1.
- *
- * If the implementation sets it to a value of lower than 1, then the
- * __mutex_slowpath_needs_to_unlock() macro needs to return 1, it needs
- * to return 0 otherwise.
- */
-static inline void
-__mutex_fastpath_unlock(atomic_t *count, void (*fail_fn)(atomic_t *))
-{
-	if (unlikely(atomic_inc_return_release(count) <= 0))
-		fail_fn(count);
-}
-
-#define __mutex_slowpath_needs_to_unlock()		1
-
-/**
- * __mutex_fastpath_trylock - try to acquire the mutex, without waiting
- *
- *  @count: pointer of type atomic_t
- *  @fail_fn: fallback function
- *
- * Change the count from 1 to a value lower than 1, and return 0 (failure)
- * if it wasn't 1 originally, or return 1 (success) otherwise. This function
- * MUST leave the value lower than 1 even when the "1" assertion wasn't true.
- * Additionally, if the value was < 0 originally, this function must not leave
- * it to 0 on failure.
- *
- * If the architecture has no effective trylock variant, it should call the
- * <fail_fn> spinlock-based trylock variant unconditionally.
- */
-static inline int
-__mutex_fastpath_trylock(atomic_t *count, int (*fail_fn)(atomic_t *))
-{
-	if (likely(atomic_read(count) == 1 && atomic_cmpxchg_acquire(count, 1, 0) == 1))
-		return 1;
-	return 0;
-}
-
-#endif
diff --git a/include/asm-generic/mutex-null.h b/include/asm-generic/mutex-null.h
deleted file mode 100644
index 61069ed334e2..000000000000
--- a/include/asm-generic/mutex-null.h
+++ /dev/null
@@ -1,19 +0,0 @@
-/*
- * include/asm-generic/mutex-null.h
- *
- * Generic implementation of the mutex fastpath, based on NOP :-)
- *
- * This is used by the mutex-debugging infrastructure, but it can also
- * be used by architectures that (for whatever reason) want to use the
- * spinlock based slowpath.
- */
-#ifndef _ASM_GENERIC_MUTEX_NULL_H
-#define _ASM_GENERIC_MUTEX_NULL_H
-
-#define __mutex_fastpath_lock(count, fail_fn)		fail_fn(count)
-#define __mutex_fastpath_lock_retval(count)		(-1)
-#define __mutex_fastpath_unlock(count, fail_fn)		fail_fn(count)
-#define __mutex_fastpath_trylock(count, fail_fn)	fail_fn(count)
-#define __mutex_slowpath_needs_to_unlock()		1
-
-#endif
diff --git a/include/asm-generic/mutex-xchg.h b/include/asm-generic/mutex-xchg.h
deleted file mode 100644
index 3269ec4e195f..000000000000
--- a/include/asm-generic/mutex-xchg.h
+++ /dev/null
@@ -1,120 +0,0 @@
-/*
- * include/asm-generic/mutex-xchg.h
- *
- * Generic implementation of the mutex fastpath, based on xchg().
- *
- * NOTE: An xchg based implementation might be less optimal than an atomic
- *       decrement/increment based implementation. If your architecture
- *       has a reasonable atomic dec/inc then you should probably use
- *	 asm-generic/mutex-dec.h instead, or you could open-code an
- *	 optimized version in asm/mutex.h.
- */
-#ifndef _ASM_GENERIC_MUTEX_XCHG_H
-#define _ASM_GENERIC_MUTEX_XCHG_H
-
-/**
- *  __mutex_fastpath_lock - try to take the lock by moving the count
- *                          from 1 to a 0 value
- *  @count: pointer of type atomic_t
- *  @fail_fn: function to call if the original value was not 1
- *
- * Change the count from 1 to a value lower than 1, and call <fail_fn> if it
- * wasn't 1 originally. This function MUST leave the value lower than 1
- * even when the "1" assertion wasn't true.
- */
-static inline void
-__mutex_fastpath_lock(atomic_t *count, void (*fail_fn)(atomic_t *))
-{
-	if (unlikely(atomic_xchg(count, 0) != 1))
-		/*
-		 * We failed to acquire the lock, so mark it contended
-		 * to ensure that any waiting tasks are woken up by the
-		 * unlock slow path.
-		 */
-		if (likely(atomic_xchg_acquire(count, -1) != 1))
-			fail_fn(count);
-}
-
-/**
- *  __mutex_fastpath_lock_retval - try to take the lock by moving the count
- *                                 from 1 to a 0 value
- *  @count: pointer of type atomic_t
- *
- * Change the count from 1 to a value lower than 1. This function returns 0
- * if the fastpath succeeds, or -1 otherwise.
- */
-static inline int
-__mutex_fastpath_lock_retval(atomic_t *count)
-{
-	if (unlikely(atomic_xchg_acquire(count, 0) != 1))
-		if (likely(atomic_xchg(count, -1) != 1))
-			return -1;
-	return 0;
-}
-
-/**
- *  __mutex_fastpath_unlock - try to promote the mutex from 0 to 1
- *  @count: pointer of type atomic_t
- *  @fail_fn: function to call if the original value was not 0
- *
- * try to promote the mutex from 0 to 1. if it wasn't 0, call <function>
- * In the failure case, this function is allowed to either set the value to
- * 1, or to set it to a value lower than one.
- * If the implementation sets it to a value of lower than one, the
- * __mutex_slowpath_needs_to_unlock() macro needs to return 1, it needs
- * to return 0 otherwise.
- */
-static inline void
-__mutex_fastpath_unlock(atomic_t *count, void (*fail_fn)(atomic_t *))
-{
-	if (unlikely(atomic_xchg_release(count, 1) != 0))
-		fail_fn(count);
-}
-
-#define __mutex_slowpath_needs_to_unlock()		0
-
-/**
- * __mutex_fastpath_trylock - try to acquire the mutex, without waiting
- *
- *  @count: pointer of type atomic_t
- *  @fail_fn: spinlock based trylock implementation
- *
- * Change the count from 1 to a value lower than 1, and return 0 (failure)
- * if it wasn't 1 originally, or return 1 (success) otherwise. This function
- * MUST leave the value lower than 1 even when the "1" assertion wasn't true.
- * Additionally, if the value was < 0 originally, this function must not leave
- * it to 0 on failure.
- *
- * If the architecture has no effective trylock variant, it should call the
- * <fail_fn> spinlock-based trylock variant unconditionally.
- */
-static inline int
-__mutex_fastpath_trylock(atomic_t *count, int (*fail_fn)(atomic_t *))
-{
-	int prev;
-
-	if (atomic_read(count) != 1)
-		return 0;
-
-	prev = atomic_xchg_acquire(count, 0);
-	if (unlikely(prev < 0)) {
-		/*
-		 * The lock was marked contended so we must restore that
-		 * state. If while doing so we get back a prev value of 1
-		 * then we just own it.
-		 *
-		 * [ In the rare case of the mutex going to 1, to 0, to -1
-		 *   and then back to 0 in this few-instructions window,
-		 *   this has the potential to trigger the slowpath for the
-		 *   owner's unlock path needlessly, but that's not a problem
-		 *   in practice. ]
-		 */
-		prev = atomic_xchg_acquire(count, prev);
-		if (prev < 0)
-			prev = 0;
-	}
-
-	return prev;
-}
-
-#endif
diff --git a/include/asm-generic/mutex.h b/include/asm-generic/mutex.h
deleted file mode 100644
index fe91ab502793..000000000000
--- a/include/asm-generic/mutex.h
+++ /dev/null
@@ -1,9 +0,0 @@
-#ifndef __ASM_GENERIC_MUTEX_H
-#define __ASM_GENERIC_MUTEX_H
-/*
- * Pull in the generic implementation for the mutex fastpath,
- * which is a reasonable default on many architectures.
- */
-
-#include <asm-generic/mutex-dec.h>
-#endif /* __ASM_GENERIC_MUTEX_H */
-- 
cgit v1.2.3


From 0f5225b024d4bffd682aab008c35862e8fdc1865 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Fri, 7 Oct 2016 17:43:51 +0200
Subject: locking/mutex, drm: Introduce mutex_trylock_recursive()

By popular DRM demand, introduce mutex_trylock_recursive() to fix up the
two GEM users.

Without this it is very easy for these drivers to get stuck in
low-memory situations and trigger OOM. Work is in progress to remove the
need for this in at least i915.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
Cc: David Airlie <airlied@linux.ie>
Cc: Davidlohr Bueso <dave@stgolabs.net>
Cc: Ding Tianhong <dingtianhong@huawei.com>
Cc: Imre Deak <imre.deak@intel.com>
Cc: Jason Low <jason.low2@hpe.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Paul E. McKenney <paulmck@us.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rob Clark <robdclark@gmail.com>
Cc: Terry Rudd <terry.rudd@hpe.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Tim Chen <tim.c.chen@linux.intel.com>
Cc: Will Deacon <Will.Deacon@arm.com>
Cc: dri-devel@lists.freedesktop.org
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 drivers/gpu/drm/i915/i915_gem_shrinker.c | 15 ++++++++++++---
 drivers/gpu/drm/msm/msm_gem_shrinker.c   | 16 ++++++++++++----
 include/linux/mutex.h                    | 31 +++++++++++++++++++++++++++++++
 scripts/checkpatch.pl                    |  6 ++++++
 4 files changed, 61 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/drivers/gpu/drm/i915/i915_gem_shrinker.c b/drivers/gpu/drm/i915/i915_gem_shrinker.c
index e9bd2a81d03a..c450076d2f9b 100644
--- a/drivers/gpu/drm/i915/i915_gem_shrinker.c
+++ b/drivers/gpu/drm/i915/i915_gem_shrinker.c
@@ -227,11 +227,20 @@ unsigned long i915_gem_shrink_all(struct drm_i915_private *dev_priv)
 
 static bool i915_gem_shrinker_lock(struct drm_device *dev, bool *unlock)
 {
-	if (!mutex_trylock(&dev->struct_mutex))
+	switch (mutex_trylock_recursive(&dev->struct_mutex)) {
+	case MUTEX_TRYLOCK_FAILED:
 		return false;
 
-	*unlock = true;
-	return true;
+	case MUTEX_TRYLOCK_SUCCESS:
+		*unlock = true;
+		return true;
+
+	case MUTEX_TRYLOCK_RECURSIVE:
+		*unlock = false;
+		return true;
+	}
+
+	BUG();
 }
 
 static unsigned long
diff --git a/drivers/gpu/drm/msm/msm_gem_shrinker.c b/drivers/gpu/drm/msm/msm_gem_shrinker.c
index 6d2e885bd58e..b77bca75bb5f 100644
--- a/drivers/gpu/drm/msm/msm_gem_shrinker.c
+++ b/drivers/gpu/drm/msm/msm_gem_shrinker.c
@@ -20,13 +20,21 @@
 
 static bool msm_gem_shrinker_lock(struct drm_device *dev, bool *unlock)
 {
-	if (!mutex_trylock(&dev->struct_mutex))
+	switch (mutex_trylock_recursive(&dev->struct_mutex)) {
+	case MUTEX_TRYLOCK_FAILED:
 		return false;
 
-	*unlock = true;
-	return true;
-}
+	case MUTEX_TRYLOCK_SUCCESS:
+		*unlock = true;
+		return true;
+
+	case MUTEX_TRYLOCK_RECURSIVE:
+		*unlock = false;
+		return true;
+	}
 
+	BUG();
+}
 
 static unsigned long
 msm_gem_shrinker_count(struct shrinker *shrinker, struct shrink_control *sc)
diff --git a/include/linux/mutex.h b/include/linux/mutex.h
index 4d3bccabbea5..6a902f0a2148 100644
--- a/include/linux/mutex.h
+++ b/include/linux/mutex.h
@@ -189,4 +189,35 @@ extern void mutex_unlock(struct mutex *lock);
 
 extern int atomic_dec_and_mutex_lock(atomic_t *cnt, struct mutex *lock);
 
+/*
+ * These values are chosen such that FAIL and SUCCESS match the
+ * values of the regular mutex_trylock().
+ */
+enum mutex_trylock_recursive_enum {
+	MUTEX_TRYLOCK_FAILED    = 0,
+	MUTEX_TRYLOCK_SUCCESS   = 1,
+	MUTEX_TRYLOCK_RECURSIVE,
+};
+
+/**
+ * mutex_trylock_recursive - trylock variant that allows recursive locking
+ * @lock: mutex to be locked
+ *
+ * This function should not be used, _ever_. It is purely for hysterical GEM
+ * raisins, and once those are gone this will be removed.
+ *
+ * Returns:
+ *  MUTEX_TRYLOCK_FAILED    - trylock failed,
+ *  MUTEX_TRYLOCK_SUCCESS   - lock acquired,
+ *  MUTEX_TRYLOCK_RECURSIVE - we already owned the lock.
+ */
+static inline __deprecated __must_check enum mutex_trylock_recursive_enum
+mutex_trylock_recursive(struct mutex *lock)
+{
+	if (unlikely(__mutex_owner(lock) == current))
+		return MUTEX_TRYLOCK_RECURSIVE;
+
+	return mutex_trylock(lock);
+}
+
 #endif /* __LINUX_MUTEX_H */
diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl
index a8368d1c4348..23f462f64a3f 100755
--- a/scripts/checkpatch.pl
+++ b/scripts/checkpatch.pl
@@ -6076,6 +6076,12 @@ sub process {
 			}
 		}
 
+# check for mutex_trylock_recursive usage
+		if ($line =~ /mutex_trylock_recursive/) {
+			ERROR("LOCKING",
+			      "recursive locking is bad, do not use this ever.\n" . $herecurr);
+		}
+
 # check for lockdep_set_novalidate_class
 		if ($line =~ /^.\s*lockdep_set_novalidate_class\s*\(/ ||
 		    $line =~ /__lockdep_no_validate__\s*\)/ ) {
-- 
cgit v1.2.3


From 43496d35513b25ad468bef91e51a39d61a0d8464 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Wed, 16 Nov 2016 10:36:05 +0100
Subject: locking/mutex: Don't mark mutex_trylock_recursive() as deprecated,
 temporarily
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Until the DRM drivers are fixed to not use mutex_trylock_recursive(),
allyes/modconfig builds will emit an API deprecation warning:

 drivers/gpu/drm/i915/i915_gem_shrinker.c: In function ‘i915_gem_shrinker_lock’:
 drivers/gpu/drm/i915/i915_gem_shrinker.c:230:2: warning: ‘mutex_trylock_recursive’ is deprecated [-Wdeprecated-declarations]
   switch (mutex_trylock_recursive(&dev->struct_mutex)) {
	    ^

Don't pollute the kernel log until the DRM code is fixed. Hopefully
the checkpatch warning is enough to keep people from using this new
API, and we'll be NAK-ing new users as well.

Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
Cc: David Airlie <airlied@linux.ie>
Cc: Davidlohr Bueso <dave@stgolabs.net>
Cc: Ding Tianhong <dingtianhong@huawei.com>
Cc: Imre Deak <imre.deak@intel.com>
Cc: Jason Low <jason.low2@hpe.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Paul E. McKenney <paulmck@us.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rob Clark <robdclark@gmail.com>
Cc: Terry Rudd <terry.rudd@hpe.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Tim Chen <tim.c.chen@linux.intel.com>
Cc: Will Deacon <Will.Deacon@arm.com>
Cc: dri-devel@lists.freedesktop.org
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/mutex.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/mutex.h b/include/linux/mutex.h
index 6a902f0a2148..b97870f2debd 100644
--- a/include/linux/mutex.h
+++ b/include/linux/mutex.h
@@ -211,7 +211,7 @@ enum mutex_trylock_recursive_enum {
  *  MUTEX_TRYLOCK_SUCCESS   - lock acquired,
  *  MUTEX_TRYLOCK_RECURSIVE - we already owned the lock.
  */
-static inline __deprecated __must_check enum mutex_trylock_recursive_enum
+static inline /* __deprecated */ __must_check enum mutex_trylock_recursive_enum
 mutex_trylock_recursive(struct mutex *lock)
 {
 	if (unlikely(__mutex_owner(lock) == current))
-- 
cgit v1.2.3


From 6d0d287891a022ebba572327cbd70b5de69a63a2 Mon Sep 17 00:00:00 2001
From: Christian Borntraeger <borntraeger@de.ibm.com>
Date: Wed, 16 Nov 2016 13:23:05 +0100
Subject: locking/core: Provide common cpu_relax_yield() definition

No need to duplicate the same define everywhere. Since
the only user is stop-machine and the only provider is
s390, we can use a default implementation of cpu_relax_yield()
in sched.h.

Suggested-by: Russell King <rmk+kernel@armlinux.org.uk>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
Reviewed-by: David Hildenbrand <david@redhat.com>
Acked-by: Russell King <rmk+kernel@armlinux.org.uk>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Nicholas Piggin <npiggin@gmail.com>
Cc: Noam Camus <noamc@ezchip.com>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Will Deacon <will.deacon@arm.com>
Cc: kvm@vger.kernel.org
Cc: linux-arch@vger.kernel.org
Cc: linux-s390 <linux-s390@vger.kernel.org>
Cc: linuxppc-dev@lists.ozlabs.org
Cc: sparclinux@vger.kernel.org
Cc: virtualization@lists.linux-foundation.org
Cc: xen-devel@lists.xenproject.org
Link: http://lkml.kernel.org/r/1479298985-191589-1-git-send-email-borntraeger@de.ibm.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/alpha/include/asm/processor.h      | 1 -
 arch/arc/include/asm/processor.h        | 3 ---
 arch/arm/include/asm/processor.h        | 2 --
 arch/arm64/include/asm/processor.h      | 2 --
 arch/avr32/include/asm/processor.h      | 1 -
 arch/blackfin/include/asm/processor.h   | 1 -
 arch/c6x/include/asm/processor.h        | 1 -
 arch/cris/include/asm/processor.h       | 1 -
 arch/frv/include/asm/processor.h        | 1 -
 arch/h8300/include/asm/processor.h      | 1 -
 arch/hexagon/include/asm/processor.h    | 1 -
 arch/ia64/include/asm/processor.h       | 1 -
 arch/m32r/include/asm/processor.h       | 1 -
 arch/m68k/include/asm/processor.h       | 1 -
 arch/metag/include/asm/processor.h      | 1 -
 arch/microblaze/include/asm/processor.h | 1 -
 arch/mips/include/asm/processor.h       | 1 -
 arch/mn10300/include/asm/processor.h    | 1 -
 arch/nios2/include/asm/processor.h      | 1 -
 arch/openrisc/include/asm/processor.h   | 1 -
 arch/parisc/include/asm/processor.h     | 1 -
 arch/powerpc/include/asm/processor.h    | 2 --
 arch/s390/include/asm/processor.h       | 1 +
 arch/score/include/asm/processor.h      | 1 -
 arch/sh/include/asm/processor.h         | 1 -
 arch/sparc/include/asm/processor_32.h   | 1 -
 arch/sparc/include/asm/processor_64.h   | 1 -
 arch/tile/include/asm/processor.h       | 2 --
 arch/unicore32/include/asm/processor.h  | 1 -
 arch/x86/include/asm/processor.h        | 2 --
 arch/x86/um/asm/processor.h             | 1 -
 arch/xtensa/include/asm/processor.h     | 1 -
 include/linux/sched.h                   | 4 ++++
 33 files changed, 5 insertions(+), 38 deletions(-)

(limited to 'include')

diff --git a/arch/alpha/include/asm/processor.h b/arch/alpha/include/asm/processor.h
index 31e8dbeef10b..2fec2dee3020 100644
--- a/arch/alpha/include/asm/processor.h
+++ b/arch/alpha/include/asm/processor.h
@@ -58,7 +58,6 @@ unsigned long get_wchan(struct task_struct *p);
   ((tsk) == current ? rdusp() : task_thread_info(tsk)->pcb.usp)
 
 #define cpu_relax()	barrier()
-#define cpu_relax_yield() cpu_relax()
 
 #define ARCH_HAS_PREFETCH
 #define ARCH_HAS_PREFETCHW
diff --git a/arch/arc/include/asm/processor.h b/arch/arc/include/asm/processor.h
index d102a49ad8c5..6e1242da0159 100644
--- a/arch/arc/include/asm/processor.h
+++ b/arch/arc/include/asm/processor.h
@@ -60,15 +60,12 @@ struct task_struct;
 #ifndef CONFIG_EZNPS_MTM_EXT
 
 #define cpu_relax()		barrier()
-#define cpu_relax_yield()	cpu_relax()
 
 #else
 
 #define cpu_relax()     \
 	__asm__ __volatile__ (".word %0" : : "i"(CTOP_INST_SCHD_RW) : "memory")
 
-#define cpu_relax_yield()	cpu_relax()
-
 #endif
 
 #define copy_segments(tsk, mm)      do { } while (0)
diff --git a/arch/arm/include/asm/processor.h b/arch/arm/include/asm/processor.h
index 9e71c58bfa6f..c3d5fc124a05 100644
--- a/arch/arm/include/asm/processor.h
+++ b/arch/arm/include/asm/processor.h
@@ -82,8 +82,6 @@ unsigned long get_wchan(struct task_struct *p);
 #define cpu_relax()			barrier()
 #endif
 
-#define cpu_relax_yield()  	              cpu_relax()
-
 #define task_pt_regs(p) \
 	((struct pt_regs *)(THREAD_START_SP + task_stack_page(p)) - 1)
 
diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h
index 6132f64af68d..747c65a616ed 100644
--- a/arch/arm64/include/asm/processor.h
+++ b/arch/arm64/include/asm/processor.h
@@ -149,8 +149,6 @@ static inline void cpu_relax(void)
 	asm volatile("yield" ::: "memory");
 }
 
-#define cpu_relax_yield()                     cpu_relax()
-
 /* Thread switching */
 extern struct task_struct *cpu_switch_to(struct task_struct *prev,
 					 struct task_struct *next);
diff --git a/arch/avr32/include/asm/processor.h b/arch/avr32/include/asm/processor.h
index ee62365ad0da..972adcc1e8f4 100644
--- a/arch/avr32/include/asm/processor.h
+++ b/arch/avr32/include/asm/processor.h
@@ -92,7 +92,6 @@ extern struct avr32_cpuinfo boot_cpu_data;
 #define TASK_UNMAPPED_BASE	(PAGE_ALIGN(TASK_SIZE / 3))
 
 #define cpu_relax()		barrier()
-#define cpu_relax_yield()	cpu_relax()
 #define cpu_sync_pipeline()	asm volatile("sub pc, -2" : : : "memory")
 
 struct cpu_context {
diff --git a/arch/blackfin/include/asm/processor.h b/arch/blackfin/include/asm/processor.h
index 57acfb1acfe5..85d4af97c986 100644
--- a/arch/blackfin/include/asm/processor.h
+++ b/arch/blackfin/include/asm/processor.h
@@ -92,7 +92,6 @@ unsigned long get_wchan(struct task_struct *p);
 #define	KSTK_ESP(tsk)	((tsk) == current ? rdusp() : (tsk)->thread.usp)
 
 #define cpu_relax()    	smp_mb()
-#define cpu_relax_yield()      cpu_relax()
 
 /* Get the Silicon Revision of the chip */
 static inline uint32_t __pure bfin_revid(void)
diff --git a/arch/c6x/include/asm/processor.h b/arch/c6x/include/asm/processor.h
index 1fd22e727e44..b9eb3da7f278 100644
--- a/arch/c6x/include/asm/processor.h
+++ b/arch/c6x/include/asm/processor.h
@@ -121,7 +121,6 @@ extern unsigned long get_wchan(struct task_struct *p);
 #define KSTK_ESP(task)	(task_pt_regs(task)->sp)
 
 #define cpu_relax()		do { } while (0)
-#define cpu_relax_yield()             cpu_relax()
 
 extern const struct seq_operations cpuinfo_op;
 
diff --git a/arch/cris/include/asm/processor.h b/arch/cris/include/asm/processor.h
index 1a578417397f..15b815df29c1 100644
--- a/arch/cris/include/asm/processor.h
+++ b/arch/cris/include/asm/processor.h
@@ -63,7 +63,6 @@ static inline void release_thread(struct task_struct *dead_task)
 #define init_stack      (init_thread_union.stack)
 
 #define cpu_relax()     barrier()
-#define cpu_relax_yield() cpu_relax()
 
 void default_idle(void);
 
diff --git a/arch/frv/include/asm/processor.h b/arch/frv/include/asm/processor.h
index c1e5f2a0ca31..ddaeb9cc9143 100644
--- a/arch/frv/include/asm/processor.h
+++ b/arch/frv/include/asm/processor.h
@@ -107,7 +107,6 @@ unsigned long get_wchan(struct task_struct *p);
 #define	KSTK_ESP(tsk)	((tsk)->thread.frame0->sp)
 
 #define cpu_relax() barrier()
-#define cpu_relax_yield() cpu_relax()
 
 /* data cache prefetch */
 #define ARCH_HAS_PREFETCH
diff --git a/arch/h8300/include/asm/processor.h b/arch/h8300/include/asm/processor.h
index 42d605369217..65132d7ae9e5 100644
--- a/arch/h8300/include/asm/processor.h
+++ b/arch/h8300/include/asm/processor.h
@@ -127,7 +127,6 @@ unsigned long get_wchan(struct task_struct *p);
 #define	KSTK_ESP(tsk)	((tsk) == current ? rdusp() : (tsk)->thread.usp)
 
 #define cpu_relax()    barrier()
-#define cpu_relax_yield() cpu_relax()
 
 #define HARD_RESET_NOW() ({		\
 	local_irq_disable();		\
diff --git a/arch/hexagon/include/asm/processor.h b/arch/hexagon/include/asm/processor.h
index 5d694cccc85a..45a825402f63 100644
--- a/arch/hexagon/include/asm/processor.h
+++ b/arch/hexagon/include/asm/processor.h
@@ -56,7 +56,6 @@ struct thread_struct {
 }
 
 #define cpu_relax() __vmyield()
-#define cpu_relax_yield() cpu_relax()
 
 /*
  * Decides where the kernel will search for a free chunk of vm space during
diff --git a/arch/ia64/include/asm/processor.h b/arch/ia64/include/asm/processor.h
index 0c2c3b256f6c..03911a336406 100644
--- a/arch/ia64/include/asm/processor.h
+++ b/arch/ia64/include/asm/processor.h
@@ -547,7 +547,6 @@ ia64_eoi (void)
 }
 
 #define cpu_relax()	ia64_hint(ia64_hint_pause)
-#define cpu_relax_yield() cpu_relax()
 
 static inline int
 ia64_get_irr(unsigned int vector)
diff --git a/arch/m32r/include/asm/processor.h b/arch/m32r/include/asm/processor.h
index 9b83a13290fc..5767367550c6 100644
--- a/arch/m32r/include/asm/processor.h
+++ b/arch/m32r/include/asm/processor.h
@@ -133,6 +133,5 @@ unsigned long get_wchan(struct task_struct *p);
 #define KSTK_ESP(tsk)  ((tsk)->thread.sp)
 
 #define cpu_relax()	barrier()
-#define cpu_relax_yield() cpu_relax()
 
 #endif /* _ASM_M32R_PROCESSOR_H */
diff --git a/arch/m68k/include/asm/processor.h b/arch/m68k/include/asm/processor.h
index b0d044224ce5..f5f790c31bf8 100644
--- a/arch/m68k/include/asm/processor.h
+++ b/arch/m68k/include/asm/processor.h
@@ -156,6 +156,5 @@ unsigned long get_wchan(struct task_struct *p);
 #define task_pt_regs(tsk)	((struct pt_regs *) ((tsk)->thread.esp0))
 
 #define cpu_relax()	barrier()
-#define cpu_relax_yield() cpu_relax()
 
 #endif
diff --git a/arch/metag/include/asm/processor.h b/arch/metag/include/asm/processor.h
index ee302a637f24..ec6a49076980 100644
--- a/arch/metag/include/asm/processor.h
+++ b/arch/metag/include/asm/processor.h
@@ -152,7 +152,6 @@ unsigned long get_wchan(struct task_struct *p);
 #define user_stack_pointer(regs)        ((regs)->ctx.AX[0].U0)
 
 #define cpu_relax()     barrier()
-#define cpu_relax_yield() cpu_relax()
 
 extern void setup_priv(void);
 
diff --git a/arch/microblaze/include/asm/processor.h b/arch/microblaze/include/asm/processor.h
index 08ec1f725b7f..37ef196e4519 100644
--- a/arch/microblaze/include/asm/processor.h
+++ b/arch/microblaze/include/asm/processor.h
@@ -22,7 +22,6 @@
 extern const struct seq_operations cpuinfo_op;
 
 # define cpu_relax()		barrier()
-# define cpu_relax_yield() cpu_relax()
 
 #define task_pt_regs(tsk) \
 		(((struct pt_regs *)(THREAD_SIZE + task_stack_page(tsk))) - 1)
diff --git a/arch/mips/include/asm/processor.h b/arch/mips/include/asm/processor.h
index 8ea95e77ec9d..95b8c471f572 100644
--- a/arch/mips/include/asm/processor.h
+++ b/arch/mips/include/asm/processor.h
@@ -389,7 +389,6 @@ unsigned long get_wchan(struct task_struct *p);
 #define KSTK_STATUS(tsk) (task_pt_regs(tsk)->cp0_status)
 
 #define cpu_relax()	barrier()
-#define cpu_relax_yield() cpu_relax()
 
 /*
  * Return_address is a replacement for __builtin_return_address(count)
diff --git a/arch/mn10300/include/asm/processor.h b/arch/mn10300/include/asm/processor.h
index d11397bc9429..18e17abf7664 100644
--- a/arch/mn10300/include/asm/processor.h
+++ b/arch/mn10300/include/asm/processor.h
@@ -69,7 +69,6 @@ extern void print_cpu_info(struct mn10300_cpuinfo *);
 extern void dodgy_tsc(void);
 
 #define cpu_relax() barrier()
-#define cpu_relax_yield() cpu_relax()
 
 /*
  * User space process size: 1.75GB (default).
diff --git a/arch/nios2/include/asm/processor.h b/arch/nios2/include/asm/processor.h
index d32c17669123..3bbbc3d798e5 100644
--- a/arch/nios2/include/asm/processor.h
+++ b/arch/nios2/include/asm/processor.h
@@ -88,7 +88,6 @@ extern unsigned long get_wchan(struct task_struct *p);
 #define KSTK_ESP(tsk)	((tsk)->thread.kregs->sp)
 
 #define cpu_relax()	barrier()
-#define cpu_relax_yield() cpu_relax()
 
 #endif /* __ASSEMBLY__ */
 
diff --git a/arch/openrisc/include/asm/processor.h b/arch/openrisc/include/asm/processor.h
index 7f47fc7b7eae..a908e6c30a00 100644
--- a/arch/openrisc/include/asm/processor.h
+++ b/arch/openrisc/include/asm/processor.h
@@ -92,7 +92,6 @@ extern unsigned long thread_saved_pc(struct task_struct *t);
 #define init_stack      (init_thread_union.stack)
 
 #define cpu_relax()     barrier()
-#define cpu_relax_yield() cpu_relax()
 
 #endif /* __ASSEMBLY__ */
 #endif /* __ASM_OPENRISC_PROCESSOR_H */
diff --git a/arch/parisc/include/asm/processor.h b/arch/parisc/include/asm/processor.h
index a4a07f4f7c20..ca40741378be 100644
--- a/arch/parisc/include/asm/processor.h
+++ b/arch/parisc/include/asm/processor.h
@@ -309,7 +309,6 @@ extern unsigned long get_wchan(struct task_struct *p);
 #define KSTK_ESP(tsk)	((tsk)->thread.regs.gr[30])
 
 #define cpu_relax()	barrier()
-#define cpu_relax_yield() cpu_relax()
 
 /*
  * parisc_requires_coherency() is used to identify the combined VIPT/PIPT
diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h
index 5684e6872473..dac83fcb9445 100644
--- a/arch/powerpc/include/asm/processor.h
+++ b/arch/powerpc/include/asm/processor.h
@@ -404,8 +404,6 @@ static inline unsigned long __pack_fe01(unsigned int fpmode)
 #define cpu_relax()	barrier()
 #endif
 
-#define cpu_relax_yield() cpu_relax()
-
 /* Check that a certain kernel stack pointer is valid in task_struct p */
 int validate_sp(unsigned long sp, struct task_struct *p,
                        unsigned long nbytes);
diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h
index 9e32f25bdea3..9d3a21aedc97 100644
--- a/arch/s390/include/asm/processor.h
+++ b/arch/s390/include/asm/processor.h
@@ -234,6 +234,7 @@ static inline unsigned short stap(void)
 /*
  * Give up the time slice of the virtual PU.
  */
+#define cpu_relax_yield cpu_relax_yield
 void cpu_relax_yield(void);
 
 #define cpu_relax() barrier()
diff --git a/arch/score/include/asm/processor.h b/arch/score/include/asm/processor.h
index a1e97c063ed4..d9a922d8711b 100644
--- a/arch/score/include/asm/processor.h
+++ b/arch/score/include/asm/processor.h
@@ -24,7 +24,6 @@ extern unsigned long get_wchan(struct task_struct *p);
 #define current_text_addr() ({ __label__ _l; _l: &&_l; })
 
 #define cpu_relax()		barrier()
-#define cpu_relax_yield()	cpu_relax()
 #define release_thread(thread)	do {} while (0)
 
 /*
diff --git a/arch/sh/include/asm/processor.h b/arch/sh/include/asm/processor.h
index 9454ff1ad0d9..5addd69f70ef 100644
--- a/arch/sh/include/asm/processor.h
+++ b/arch/sh/include/asm/processor.h
@@ -97,7 +97,6 @@ extern struct sh_cpuinfo cpu_data[];
 
 #define cpu_sleep()	__asm__ __volatile__ ("sleep" : : : "memory")
 #define cpu_relax()	barrier()
-#define cpu_relax_yield() cpu_relax()
 
 void default_idle(void);
 void stop_this_cpu(void *);
diff --git a/arch/sparc/include/asm/processor_32.h b/arch/sparc/include/asm/processor_32.h
index fc32b7311481..365d4cb267b4 100644
--- a/arch/sparc/include/asm/processor_32.h
+++ b/arch/sparc/include/asm/processor_32.h
@@ -119,7 +119,6 @@ extern struct task_struct *last_task_used_math;
 int do_mathemu(struct pt_regs *regs, struct task_struct *fpt);
 
 #define cpu_relax()	barrier()
-#define cpu_relax_yield() cpu_relax()
 
 extern void (*sparc_idle)(void);
 
diff --git a/arch/sparc/include/asm/processor_64.h b/arch/sparc/include/asm/processor_64.h
index 12787dfeb11c..6448cfc8292f 100644
--- a/arch/sparc/include/asm/processor_64.h
+++ b/arch/sparc/include/asm/processor_64.h
@@ -216,7 +216,6 @@ unsigned long get_wchan(struct task_struct *task);
 				     "nop\n\t"				\
 				     ".previous"			\
 				     ::: "memory")
-#define cpu_relax_yield() cpu_relax()
 
 /* Prefetch support.  This is tuned for UltraSPARC-III and later.
  * UltraSPARC-I will treat these as nops, and UltraSPARC-II has
diff --git a/arch/tile/include/asm/processor.h b/arch/tile/include/asm/processor.h
index c1c228b16f7f..0bc9968b97a1 100644
--- a/arch/tile/include/asm/processor.h
+++ b/arch/tile/include/asm/processor.h
@@ -264,8 +264,6 @@ static inline void cpu_relax(void)
 	barrier();
 }
 
-#define cpu_relax_yield() cpu_relax()
-
 /* Info on this processor (see fs/proc/cpuinfo.c) */
 struct seq_operations;
 extern const struct seq_operations cpuinfo_op;
diff --git a/arch/unicore32/include/asm/processor.h b/arch/unicore32/include/asm/processor.h
index eeefe7c529eb..4eaa42167667 100644
--- a/arch/unicore32/include/asm/processor.h
+++ b/arch/unicore32/include/asm/processor.h
@@ -71,7 +71,6 @@ extern void release_thread(struct task_struct *);
 unsigned long get_wchan(struct task_struct *p);
 
 #define cpu_relax()			barrier()
-#define cpu_relax_yield()		cpu_relax()
 
 #define task_pt_regs(p) \
 	((struct pt_regs *)(THREAD_START_SP + task_stack_page(p)) - 1)
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 7513c996f673..c84605bb2a15 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -588,8 +588,6 @@ static __always_inline void cpu_relax(void)
 	rep_nop();
 }
 
-#define cpu_relax_yield() cpu_relax()
-
 /* Stop speculative execution and prefetching of modified code. */
 static inline void sync_core(void)
 {
diff --git a/arch/x86/um/asm/processor.h b/arch/x86/um/asm/processor.h
index b4bd63b22b7f..c77db2288982 100644
--- a/arch/x86/um/asm/processor.h
+++ b/arch/x86/um/asm/processor.h
@@ -26,7 +26,6 @@ static inline void rep_nop(void)
 }
 
 #define cpu_relax()		rep_nop()
-#define cpu_relax_yield()	cpu_relax()
 
 #define task_pt_regs(t) (&(t)->thread.regs)
 
diff --git a/arch/xtensa/include/asm/processor.h b/arch/xtensa/include/asm/processor.h
index 7d8d6bececfc..86ffcd68e496 100644
--- a/arch/xtensa/include/asm/processor.h
+++ b/arch/xtensa/include/asm/processor.h
@@ -206,7 +206,6 @@ extern unsigned long get_wchan(struct task_struct *p);
 #define KSTK_ESP(tsk)		(task_pt_regs(tsk)->areg[1])
 
 #define cpu_relax()  barrier()
-#define cpu_relax_yield() cpu_relax()
 
 /* Special register access. */
 
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 348f51b0ec92..c1aa3b02f6ac 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2444,6 +2444,10 @@ static inline void calc_load_enter_idle(void) { }
 static inline void calc_load_exit_idle(void) { }
 #endif /* CONFIG_NO_HZ_COMMON */
 
+#ifndef cpu_relax_yield
+#define cpu_relax_yield() cpu_relax()
+#endif
+
 /*
  * Do not use outside of architecture code which knows its limitations.
  *
-- 
cgit v1.2.3


From 194a6b5b9cb6b91a5f7d86984165a3bc55188599 Mon Sep 17 00:00:00 2001
From: Waiman Long <longman@redhat.com>
Date: Thu, 17 Nov 2016 11:46:38 -0500
Subject: sched/wake_q: Rename WAKE_Q to DEFINE_WAKE_Q

Currently the wake_q data structure is defined by the WAKE_Q() macro.
This macro, however, looks like a function doing something as "wake" is
a verb. Even checkpatch.pl was confused as it reported warnings like

  WARNING: Missing a blank line after declarations
  #548: FILE: kernel/futex.c:3665:
  +	int ret;
  +	WAKE_Q(wake_q);

This patch renames the WAKE_Q() macro to DEFINE_WAKE_Q() which clarifies
what the macro is doing and eliminates the checkpatch.pl warnings.

Signed-off-by: Waiman Long <longman@redhat.com>
Acked-by: Davidlohr Bueso <dave@stgolabs.net>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/1479401198-1765-1-git-send-email-longman@redhat.com
[ Resolved conflict and added missing rename. ]
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h       |  4 ++--
 ipc/mqueue.c                |  4 ++--
 ipc/msg.c                   |  8 ++++----
 kernel/futex.c              |  8 ++++----
 kernel/locking/mutex.c      |  2 +-
 kernel/locking/rtmutex.c    |  2 +-
 kernel/locking/rwsem-xadd.c | 10 +++++-----
 7 files changed, 19 insertions(+), 19 deletions(-)

(limited to 'include')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index c1aa3b02f6ac..dc37cbe2b13c 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -989,7 +989,7 @@ enum cpu_idle_type {
  * already in a wake queue, the wakeup will happen soon and the second
  * waker can just skip it.
  *
- * The WAKE_Q macro declares and initializes the list head.
+ * The DEFINE_WAKE_Q macro declares and initializes the list head.
  * wake_up_q() does NOT reinitialize the list; it's expected to be
  * called near the end of a function, where the fact that the queue is
  * not used again will be easy to see by inspection.
@@ -1009,7 +1009,7 @@ struct wake_q_head {
 
 #define WAKE_Q_TAIL ((struct wake_q_node *) 0x01)
 
-#define WAKE_Q(name)					\
+#define DEFINE_WAKE_Q(name)				\
 	struct wake_q_head name = { WAKE_Q_TAIL, &name.first }
 
 extern void wake_q_add(struct wake_q_head *head,
diff --git a/ipc/mqueue.c b/ipc/mqueue.c
index 8cbd6e6894d5..7a2d8f0c8ae5 100644
--- a/ipc/mqueue.c
+++ b/ipc/mqueue.c
@@ -967,7 +967,7 @@ SYSCALL_DEFINE5(mq_timedsend, mqd_t, mqdes, const char __user *, u_msg_ptr,
 	struct timespec ts;
 	struct posix_msg_tree_node *new_leaf = NULL;
 	int ret = 0;
-	WAKE_Q(wake_q);
+	DEFINE_WAKE_Q(wake_q);
 
 	if (u_abs_timeout) {
 		int res = prepare_timeout(u_abs_timeout, &expires, &ts);
@@ -1151,7 +1151,7 @@ SYSCALL_DEFINE5(mq_timedreceive, mqd_t, mqdes, char __user *, u_msg_ptr,
 			msg_ptr = wait.msg;
 		}
 	} else {
-		WAKE_Q(wake_q);
+		DEFINE_WAKE_Q(wake_q);
 
 		msg_ptr = msg_get(info);
 
diff --git a/ipc/msg.c b/ipc/msg.c
index e12307d0c920..32e9bd837cde 100644
--- a/ipc/msg.c
+++ b/ipc/msg.c
@@ -235,7 +235,7 @@ static void freeque(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp)
 {
 	struct msg_msg *msg, *t;
 	struct msg_queue *msq = container_of(ipcp, struct msg_queue, q_perm);
-	WAKE_Q(wake_q);
+	DEFINE_WAKE_Q(wake_q);
 
 	expunge_all(msq, -EIDRM, &wake_q);
 	ss_wakeup(msq, &wake_q, true);
@@ -397,7 +397,7 @@ static int msgctl_down(struct ipc_namespace *ns, int msqid, int cmd,
 		goto out_up;
 	case IPC_SET:
 	{
-		WAKE_Q(wake_q);
+		DEFINE_WAKE_Q(wake_q);
 
 		if (msqid64.msg_qbytes > ns->msg_ctlmnb &&
 		    !capable(CAP_SYS_RESOURCE)) {
@@ -634,7 +634,7 @@ long do_msgsnd(int msqid, long mtype, void __user *mtext,
 	struct msg_msg *msg;
 	int err;
 	struct ipc_namespace *ns;
-	WAKE_Q(wake_q);
+	DEFINE_WAKE_Q(wake_q);
 
 	ns = current->nsproxy->ipc_ns;
 
@@ -850,7 +850,7 @@ long do_msgrcv(int msqid, void __user *buf, size_t bufsz, long msgtyp, int msgfl
 	struct msg_queue *msq;
 	struct ipc_namespace *ns;
 	struct msg_msg *msg, *copy = NULL;
-	WAKE_Q(wake_q);
+	DEFINE_WAKE_Q(wake_q);
 
 	ns = current->nsproxy->ipc_ns;
 
diff --git a/kernel/futex.c b/kernel/futex.c
index 2c4be467fecd..9246d9f593d1 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -1298,7 +1298,7 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this,
 	struct task_struct *new_owner;
 	struct futex_pi_state *pi_state = this->pi_state;
 	u32 uninitialized_var(curval), newval;
-	WAKE_Q(wake_q);
+	DEFINE_WAKE_Q(wake_q);
 	bool deboost;
 	int ret = 0;
 
@@ -1415,7 +1415,7 @@ futex_wake(u32 __user *uaddr, unsigned int flags, int nr_wake, u32 bitset)
 	struct futex_q *this, *next;
 	union futex_key key = FUTEX_KEY_INIT;
 	int ret;
-	WAKE_Q(wake_q);
+	DEFINE_WAKE_Q(wake_q);
 
 	if (!bitset)
 		return -EINVAL;
@@ -1469,7 +1469,7 @@ futex_wake_op(u32 __user *uaddr1, unsigned int flags, u32 __user *uaddr2,
 	struct futex_hash_bucket *hb1, *hb2;
 	struct futex_q *this, *next;
 	int ret, op_ret;
-	WAKE_Q(wake_q);
+	DEFINE_WAKE_Q(wake_q);
 
 retry:
 	ret = get_futex_key(uaddr1, flags & FLAGS_SHARED, &key1, VERIFY_READ);
@@ -1708,7 +1708,7 @@ static int futex_requeue(u32 __user *uaddr1, unsigned int flags,
 	struct futex_pi_state *pi_state = NULL;
 	struct futex_hash_bucket *hb1, *hb2;
 	struct futex_q *this, *next;
-	WAKE_Q(wake_q);
+	DEFINE_WAKE_Q(wake_q);
 
 	if (requeue_pi) {
 		/*
diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c
index a65e09a046ac..c0731685603f 100644
--- a/kernel/locking/mutex.c
+++ b/kernel/locking/mutex.c
@@ -840,7 +840,7 @@ static noinline void __sched __mutex_unlock_slowpath(struct mutex *lock, unsigne
 {
 	struct task_struct *next = NULL;
 	unsigned long owner, flags;
-	WAKE_Q(wake_q);
+	DEFINE_WAKE_Q(wake_q);
 
 	mutex_release(&lock->dep_map, 1, ip);
 
diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c
index 1ec0f48962b3..d8d7a153b711 100644
--- a/kernel/locking/rtmutex.c
+++ b/kernel/locking/rtmutex.c
@@ -1382,7 +1382,7 @@ rt_mutex_fastunlock(struct rt_mutex *lock,
 		    bool (*slowfn)(struct rt_mutex *lock,
 				   struct wake_q_head *wqh))
 {
-	WAKE_Q(wake_q);
+	DEFINE_WAKE_Q(wake_q);
 
 	if (likely(rt_mutex_cmpxchg_release(lock, current, NULL))) {
 		rt_mutex_deadlock_account_unlock(current);
diff --git a/kernel/locking/rwsem-xadd.c b/kernel/locking/rwsem-xadd.c
index 2fa2e2e64950..263e7449282a 100644
--- a/kernel/locking/rwsem-xadd.c
+++ b/kernel/locking/rwsem-xadd.c
@@ -225,7 +225,7 @@ struct rw_semaphore __sched *rwsem_down_read_failed(struct rw_semaphore *sem)
 	long count, adjustment = -RWSEM_ACTIVE_READ_BIAS;
 	struct rwsem_waiter waiter;
 	struct task_struct *tsk = current;
-	WAKE_Q(wake_q);
+	DEFINE_WAKE_Q(wake_q);
 
 	waiter.task = tsk;
 	waiter.type = RWSEM_WAITING_FOR_READ;
@@ -461,7 +461,7 @@ __rwsem_down_write_failed_common(struct rw_semaphore *sem, int state)
 	bool waiting = true; /* any queued threads before us */
 	struct rwsem_waiter waiter;
 	struct rw_semaphore *ret = sem;
-	WAKE_Q(wake_q);
+	DEFINE_WAKE_Q(wake_q);
 
 	/* undo write bias from down_write operation, stop active locking */
 	count = atomic_long_sub_return(RWSEM_ACTIVE_WRITE_BIAS, &sem->count);
@@ -495,7 +495,7 @@ __rwsem_down_write_failed_common(struct rw_semaphore *sem, int state)
 		 * wake any read locks that were queued ahead of us.
 		 */
 		if (count > RWSEM_WAITING_BIAS) {
-			WAKE_Q(wake_q);
+			DEFINE_WAKE_Q(wake_q);
 
 			__rwsem_mark_wake(sem, RWSEM_WAKE_READERS, &wake_q);
 			/*
@@ -571,7 +571,7 @@ __visible
 struct rw_semaphore *rwsem_wake(struct rw_semaphore *sem)
 {
 	unsigned long flags;
-	WAKE_Q(wake_q);
+	DEFINE_WAKE_Q(wake_q);
 
 	/*
 	 * If a spinner is present, it is not necessary to do the wakeup.
@@ -625,7 +625,7 @@ __visible
 struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *sem)
 {
 	unsigned long flags;
-	WAKE_Q(wake_q);
+	DEFINE_WAKE_Q(wake_q);
 
 	raw_spin_lock_irqsave(&sem->wait_lock, flags);
 
-- 
cgit v1.2.3


From d9345c65eb7930ac6755cf593ee7686f4029ccf4 Mon Sep 17 00:00:00 2001
From: Pan Xinhui <xinhui.pan@linux.vnet.ibm.com>
Date: Wed, 2 Nov 2016 05:08:28 -0400
Subject: sched/core: Introduce the vcpu_is_preempted(cpu) interface

This patch is the first step to add support to improve lock holder
preemption beaviour.

vcpu_is_preempted(cpu) does the obvious thing: it tells us whether a
vCPU is preempted or not.

Defaults to false on architectures that don't support it.

Suggested-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Tested-by: Juergen Gross <jgross@suse.com>
Signed-off-by: Pan Xinhui <xinhui.pan@linux.vnet.ibm.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
[ Translated the changelog to English. ]
Acked-by: Christian Borntraeger <borntraeger@de.ibm.com>
Acked-by: Paolo Bonzini <pbonzini@redhat.com>
Cc: David.Laight@ACULAB.COM
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: benh@kernel.crashing.org
Cc: boqun.feng@gmail.com
Cc: bsingharora@gmail.com
Cc: dave@stgolabs.net
Cc: kernellwp@gmail.com
Cc: konrad.wilk@oracle.com
Cc: linuxppc-dev@lists.ozlabs.org
Cc: mpe@ellerman.id.au
Cc: paulmck@linux.vnet.ibm.com
Cc: paulus@samba.org
Cc: rkrcmar@redhat.com
Cc: virtualization@lists.linux-foundation.org
Cc: will.deacon@arm.com
Cc: xen-devel-request@lists.xenproject.org
Cc: xen-devel@lists.xenproject.org
Link: http://lkml.kernel.org/r/1478077718-37424-2-git-send-email-xinhui.pan@linux.vnet.ibm.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h | 12 ++++++++++++
 1 file changed, 12 insertions(+)

(limited to 'include')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index dc37cbe2b13c..37261afbf16a 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -3510,6 +3510,18 @@ static inline void set_task_cpu(struct task_struct *p, unsigned int cpu)
 
 #endif /* CONFIG_SMP */
 
+/*
+ * In order to reduce various lock holder preemption latencies provide an
+ * interface to see if a vCPU is currently running or not.
+ *
+ * This allows us to terminate optimistic spin loops and block, analogous to
+ * the native optimistic spin heuristic of testing if the lock owner task is
+ * running or not.
+ */
+#ifndef vcpu_is_preempted
+# define vcpu_is_preempted(cpu)	false
+#endif
+
 extern long sched_setaffinity(pid_t pid, const struct cpumask *new_mask);
 extern long sched_getaffinity(pid_t pid, struct cpumask *mask);
 
-- 
cgit v1.2.3


From 4ec6e863625625a54f527464ab91ce1a1cb16c42 Mon Sep 17 00:00:00 2001
From: Pan Xinhui <xinhui.pan@linux.vnet.ibm.com>
Date: Wed, 2 Nov 2016 05:08:34 -0400
Subject: kvm: Introduce kvm_write_guest_offset_cached()

It allows us to update some status or field of a structure partially.

We can also save a kvm_read_guest_cached() call if we just update one
fild of the struct regardless of its current value.

Signed-off-by: Pan Xinhui <xinhui.pan@linux.vnet.ibm.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Paolo Bonzini <pbonzini@redhat.com>
Cc: David.Laight@ACULAB.COM
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: benh@kernel.crashing.org
Cc: boqun.feng@gmail.com
Cc: borntraeger@de.ibm.com
Cc: bsingharora@gmail.com
Cc: dave@stgolabs.net
Cc: jgross@suse.com
Cc: kernellwp@gmail.com
Cc: konrad.wilk@oracle.com
Cc: linuxppc-dev@lists.ozlabs.org
Cc: mpe@ellerman.id.au
Cc: paulmck@linux.vnet.ibm.com
Cc: paulus@samba.org
Cc: rkrcmar@redhat.com
Cc: virtualization@lists.linux-foundation.org
Cc: will.deacon@arm.com
Cc: xen-devel-request@lists.xenproject.org
Cc: xen-devel@lists.xenproject.org
Link: http://lkml.kernel.org/r/1478077718-37424-8-git-send-email-xinhui.pan@linux.vnet.ibm.com
[ Typo fixes. ]
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/kvm_host.h |  2 ++
 virt/kvm/kvm_main.c      | 20 ++++++++++++++------
 2 files changed, 16 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 01c0b9cc3915..6f0023797b33 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -645,6 +645,8 @@ int kvm_write_guest(struct kvm *kvm, gpa_t gpa, const void *data,
 		    unsigned long len);
 int kvm_write_guest_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc,
 			   void *data, unsigned long len);
+int kvm_write_guest_offset_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc,
+			   void *data, int offset, unsigned long len);
 int kvm_gfn_to_hva_cache_init(struct kvm *kvm, struct gfn_to_hva_cache *ghc,
 			      gpa_t gpa, unsigned long len);
 int kvm_clear_guest_page(struct kvm *kvm, gfn_t gfn, int offset, int len);
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 5c360347a1e9..2f38ce55016f 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -1972,30 +1972,38 @@ int kvm_gfn_to_hva_cache_init(struct kvm *kvm, struct gfn_to_hva_cache *ghc,
 }
 EXPORT_SYMBOL_GPL(kvm_gfn_to_hva_cache_init);
 
-int kvm_write_guest_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc,
-			   void *data, unsigned long len)
+int kvm_write_guest_offset_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc,
+			   void *data, int offset, unsigned long len)
 {
 	struct kvm_memslots *slots = kvm_memslots(kvm);
 	int r;
+	gpa_t gpa = ghc->gpa + offset;
 
-	BUG_ON(len > ghc->len);
+	BUG_ON(len + offset > ghc->len);
 
 	if (slots->generation != ghc->generation)
 		kvm_gfn_to_hva_cache_init(kvm, ghc, ghc->gpa, ghc->len);
 
 	if (unlikely(!ghc->memslot))
-		return kvm_write_guest(kvm, ghc->gpa, data, len);
+		return kvm_write_guest(kvm, gpa, data, len);
 
 	if (kvm_is_error_hva(ghc->hva))
 		return -EFAULT;
 
-	r = __copy_to_user((void __user *)ghc->hva, data, len);
+	r = __copy_to_user((void __user *)ghc->hva + offset, data, len);
 	if (r)
 		return -EFAULT;
-	mark_page_dirty_in_slot(ghc->memslot, ghc->gpa >> PAGE_SHIFT);
+	mark_page_dirty_in_slot(ghc->memslot, gpa >> PAGE_SHIFT);
 
 	return 0;
 }
+EXPORT_SYMBOL_GPL(kvm_write_guest_offset_cached);
+
+int kvm_write_guest_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc,
+			   void *data, unsigned long len)
+{
+	return kvm_write_guest_offset_cached(kvm, ghc, data, 0, len);
+}
 EXPORT_SYMBOL_GPL(kvm_write_guest_cached);
 
 int kvm_read_guest_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc,
-- 
cgit v1.2.3


From f4ec57b632fe15ed7a355099cb96ed4b647416de Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Thu, 24 Nov 2016 12:46:26 +0100
Subject: locking/ww_mutex: Use relaxed atomics

The stamp is a sequence number, we don't care about memory ordering.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Boqun Feng <boqun.feng@gmail.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Will Deacon <will.deacon@arm.com>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/ww_mutex.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/ww_mutex.h b/include/linux/ww_mutex.h
index 2bb5deb0012e..7b0066814fa0 100644
--- a/include/linux/ww_mutex.h
+++ b/include/linux/ww_mutex.h
@@ -120,7 +120,7 @@ static inline void ww_acquire_init(struct ww_acquire_ctx *ctx,
 				   struct ww_class *ww_class)
 {
 	ctx->task = current;
-	ctx->stamp = atomic_long_inc_return(&ww_class->stamp);
+	ctx->stamp = atomic_long_inc_return_relaxed(&ww_class->stamp);
 	ctx->acquired = 0;
 #ifdef CONFIG_DEBUG_MUTEXES
 	ctx->ww_class = ww_class;
-- 
cgit v1.2.3