Merge branch 'pci/msi-affinity' into next

Conflicts: drivers/nvme/host/pci.c
author: Bjorn Helgaas <bhelgaas@google.com> 2016-08-01 20:34:01 +0300
committer: Bjorn Helgaas <bhelgaas@google.com> 2016-08-01 20:34:01 +0300
commit: 9454c23852ca6d7aec89fd6fd46a046c323caac3 (patch)
tree: 794be65345027b5adea3720a43124fee338333a5 /kernel/locking/qspinlock.c
parent: a04bee8285a71cdbb9076c3dc38be1f0b9a6b4b3 (diff)
parent: 4ef33685aa0957d771e068b60a5f3ca6b47ade1c (diff)
download: linux-9454c23852ca6d7aec89fd6fd46a046c323caac3.tar.xz
1 files changed, 60 insertions, 0 deletions
diff --git a/kernel/locking/qspinlock.c b/kernel/locking/qspinlock.c
index ce2f75e32ae1..5fc8c311b8fe 100644
--- a/kernel/locking/qspinlock.c
+++ b/kernel/locking/qspinlock.c
@@ -267,6 +267,66 @@ static __always_inline u32  __pv_wait_head_or_lock(struct qspinlock *lock,
 #define queued_spin_lock_slowpath	native_queued_spin_lock_slowpath
 #endif
 
+/*
+ * queued_spin_lock_slowpath() can (load-)ACQUIRE the lock before
+ * issuing an _unordered_ store to set _Q_LOCKED_VAL.
+ *
+ * This means that the store can be delayed, but no later than the
+ * store-release from the unlock. This means that simply observing
+ * _Q_LOCKED_VAL is not sufficient to determine if the lock is acquired.
+ *
+ * There are two paths that can issue the unordered store:
+ *
+ *  (1) clear_pending_set_locked():	*,1,0 -> *,0,1
+ *
+ *  (2) set_locked():			t,0,0 -> t,0,1 ; t != 0
+ *      atomic_cmpxchg_relaxed():	t,0,0 -> 0,0,1
+ *
+ * However, in both cases we have other !0 state we've set before to queue
+ * ourseves:
+ *
+ * For (1) we have the atomic_cmpxchg_acquire() that set _Q_PENDING_VAL, our
+ * load is constrained by that ACQUIRE to not pass before that, and thus must
+ * observe the store.
+ *
+ * For (2) we have a more intersting scenario. We enqueue ourselves using
+ * xchg_tail(), which ends up being a RELEASE. This in itself is not
+ * sufficient, however that is followed by an smp_cond_acquire() on the same
+ * word, giving a RELEASE->ACQUIRE ordering. This again constrains our load and
+ * guarantees we must observe that store.
+ *
+ * Therefore both cases have other !0 state that is observable before the
+ * unordered locked byte store comes through. This means we can use that to
+ * wait for the lock store, and then wait for an unlock.
+ */
+#ifndef queued_spin_unlock_wait
+void queued_spin_unlock_wait(struct qspinlock *lock)
+{
+	u32 val;
+
+	for (;;) {
+		val = atomic_read(&lock->val);
+
+		if (!val) /* not locked, we're done */
+			goto done;
+
+		if (val & _Q_LOCKED_MASK) /* locked, go wait for unlock */
+			break;
+
+		/* not locked, but pending, wait until we observe the lock */
+		cpu_relax();
+	}
+
+	/* any unlock is good */
+	while (atomic_read(&lock->val) & _Q_LOCKED_MASK)
+		cpu_relax();
+
+done:
+	smp_rmb(); /* CTRL + RMB -> ACQUIRE */
+}
+EXPORT_SYMBOL(queued_spin_unlock_wait);
+#endif
+
 #endif /* _GEN_PV_LOCK_SLOWPATH */
 
 /**
author	Bjorn Helgaas <bhelgaas@google.com>	2016-08-01 20:34:01 +0300
committer	Bjorn Helgaas <bhelgaas@google.com>	2016-08-01 20:34:01 +0300
commit	9454c23852ca6d7aec89fd6fd46a046c323caac3 (patch)
tree	794be65345027b5adea3720a43124fee338333a5 /kernel/locking/qspinlock.c
parent	a04bee8285a71cdbb9076c3dc38be1f0b9a6b4b3 (diff)
parent	4ef33685aa0957d771e068b60a5f3ca6b47ade1c (diff)
download	linux-9454c23852ca6d7aec89fd6fd46a046c323caac3.tar.xz