summaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/delayacct.c17
-rw-r--r--kernel/locking/lockdep.c70
-rw-r--r--kernel/locking/mcs_spinlock.h10
-rw-r--r--kernel/locking/mutex.c3
-rw-r--r--kernel/locking/qspinlock.c247
-rw-r--r--kernel/locking/qspinlock_paravirt.h49
-rw-r--r--kernel/locking/qspinlock_stat.h9
-rw-r--r--kernel/locking/rwsem-xadd.c25
-rw-r--r--kernel/stop_machine.c24
9 files changed, 198 insertions, 256 deletions
diff --git a/kernel/delayacct.c b/kernel/delayacct.c
index e2764d767f18..ca8ac2824f0b 100644
--- a/kernel/delayacct.c
+++ b/kernel/delayacct.c
@@ -44,23 +44,24 @@ void __delayacct_tsk_init(struct task_struct *tsk)
{
tsk->delays = kmem_cache_zalloc(delayacct_cache, GFP_KERNEL);
if (tsk->delays)
- spin_lock_init(&tsk->delays->lock);
+ raw_spin_lock_init(&tsk->delays->lock);
}
/*
* Finish delay accounting for a statistic using its timestamps (@start),
* accumalator (@total) and @count
*/
-static void delayacct_end(spinlock_t *lock, u64 *start, u64 *total, u32 *count)
+static void delayacct_end(raw_spinlock_t *lock, u64 *start, u64 *total,
+ u32 *count)
{
s64 ns = ktime_get_ns() - *start;
unsigned long flags;
if (ns > 0) {
- spin_lock_irqsave(lock, flags);
+ raw_spin_lock_irqsave(lock, flags);
*total += ns;
(*count)++;
- spin_unlock_irqrestore(lock, flags);
+ raw_spin_unlock_irqrestore(lock, flags);
}
}
@@ -127,7 +128,7 @@ int __delayacct_add_tsk(struct taskstats *d, struct task_struct *tsk)
/* zero XXX_total, non-zero XXX_count implies XXX stat overflowed */
- spin_lock_irqsave(&tsk->delays->lock, flags);
+ raw_spin_lock_irqsave(&tsk->delays->lock, flags);
tmp = d->blkio_delay_total + tsk->delays->blkio_delay;
d->blkio_delay_total = (tmp < d->blkio_delay_total) ? 0 : tmp;
tmp = d->swapin_delay_total + tsk->delays->swapin_delay;
@@ -137,7 +138,7 @@ int __delayacct_add_tsk(struct taskstats *d, struct task_struct *tsk)
d->blkio_count += tsk->delays->blkio_count;
d->swapin_count += tsk->delays->swapin_count;
d->freepages_count += tsk->delays->freepages_count;
- spin_unlock_irqrestore(&tsk->delays->lock, flags);
+ raw_spin_unlock_irqrestore(&tsk->delays->lock, flags);
return 0;
}
@@ -147,10 +148,10 @@ __u64 __delayacct_blkio_ticks(struct task_struct *tsk)
__u64 ret;
unsigned long flags;
- spin_lock_irqsave(&tsk->delays->lock, flags);
+ raw_spin_lock_irqsave(&tsk->delays->lock, flags);
ret = nsec_to_clock_t(tsk->delays->blkio_delay +
tsk->delays->swapin_delay);
- spin_unlock_irqrestore(&tsk->delays->lock, flags);
+ raw_spin_unlock_irqrestore(&tsk->delays->lock, flags);
return ret;
}
diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
index 023386338269..edcac5de7ebc 100644
--- a/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c
@@ -561,20 +561,24 @@ static void print_lock(struct held_lock *hlock)
printk(KERN_CONT ", at: %pS\n", (void *)hlock->acquire_ip);
}
-static void lockdep_print_held_locks(struct task_struct *curr)
+static void lockdep_print_held_locks(struct task_struct *p)
{
- int i, depth = curr->lockdep_depth;
+ int i, depth = READ_ONCE(p->lockdep_depth);
- if (!depth) {
- printk("no locks held by %s/%d.\n", curr->comm, task_pid_nr(curr));
+ if (!depth)
+ printk("no locks held by %s/%d.\n", p->comm, task_pid_nr(p));
+ else
+ printk("%d lock%s held by %s/%d:\n", depth,
+ depth > 1 ? "s" : "", p->comm, task_pid_nr(p));
+ /*
+ * It's not reliable to print a task's held locks if it's not sleeping
+ * and it's not the current task.
+ */
+ if (p->state == TASK_RUNNING && p != current)
return;
- }
- printk("%d lock%s held by %s/%d:\n",
- depth, depth > 1 ? "s" : "", curr->comm, task_pid_nr(curr));
-
for (i = 0; i < depth; i++) {
printk(" #%d: ", i);
- print_lock(curr->held_locks + i);
+ print_lock(p->held_locks + i);
}
}
@@ -4451,8 +4455,6 @@ EXPORT_SYMBOL_GPL(debug_check_no_locks_held);
void debug_show_all_locks(void)
{
struct task_struct *g, *p;
- int count = 10;
- int unlock = 1;
if (unlikely(!debug_locks)) {
pr_warn("INFO: lockdep is turned off.\n");
@@ -4460,50 +4462,18 @@ void debug_show_all_locks(void)
}
pr_warn("\nShowing all locks held in the system:\n");
- /*
- * Here we try to get the tasklist_lock as hard as possible,
- * if not successful after 2 seconds we ignore it (but keep
- * trying). This is to enable a debug printout even if a
- * tasklist_lock-holding task deadlocks or crashes.
- */
-retry:
- if (!read_trylock(&tasklist_lock)) {
- if (count == 10)
- pr_warn("hm, tasklist_lock locked, retrying... ");
- if (count) {
- count--;
- pr_cont(" #%d", 10-count);
- mdelay(200);
- goto retry;
- }
- pr_cont(" ignoring it.\n");
- unlock = 0;
- } else {
- if (count != 10)
- pr_cont(" locked it.\n");
- }
-
- do_each_thread(g, p) {
- /*
- * It's not reliable to print a task's held locks
- * if it's not sleeping (or if it's not the current
- * task):
- */
- if (p->state == TASK_RUNNING && p != current)
+ rcu_read_lock();
+ for_each_process_thread(g, p) {
+ if (!p->lockdep_depth)
continue;
- if (p->lockdep_depth)
- lockdep_print_held_locks(p);
- if (!unlock)
- if (read_trylock(&tasklist_lock))
- unlock = 1;
+ lockdep_print_held_locks(p);
touch_nmi_watchdog();
- } while_each_thread(g, p);
+ touch_all_softlockup_watchdogs();
+ }
+ rcu_read_unlock();
pr_warn("\n");
pr_warn("=============================================\n\n");
-
- if (unlock)
- read_unlock(&tasklist_lock);
}
EXPORT_SYMBOL_GPL(debug_show_all_locks);
#endif
diff --git a/kernel/locking/mcs_spinlock.h b/kernel/locking/mcs_spinlock.h
index f046b7ce9dd6..5e10153b4d3c 100644
--- a/kernel/locking/mcs_spinlock.h
+++ b/kernel/locking/mcs_spinlock.h
@@ -23,13 +23,15 @@ struct mcs_spinlock {
#ifndef arch_mcs_spin_lock_contended
/*
- * Using smp_load_acquire() provides a memory barrier that ensures
- * subsequent operations happen after the lock is acquired.
+ * Using smp_cond_load_acquire() provides the acquire semantics
+ * required so that subsequent operations happen after the
+ * lock is acquired. Additionally, some architectures such as
+ * ARM64 would like to do spin-waiting instead of purely
+ * spinning, and smp_cond_load_acquire() provides that behavior.
*/
#define arch_mcs_spin_lock_contended(l) \
do { \
- while (!(smp_load_acquire(l))) \
- cpu_relax(); \
+ smp_cond_load_acquire(l, VAL); \
} while (0)
#endif
diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c
index 2048359f33d2..f44f658ae629 100644
--- a/kernel/locking/mutex.c
+++ b/kernel/locking/mutex.c
@@ -139,8 +139,9 @@ static inline bool __mutex_trylock(struct mutex *lock)
static __always_inline bool __mutex_trylock_fast(struct mutex *lock)
{
unsigned long curr = (unsigned long)current;
+ unsigned long zero = 0UL;
- if (!atomic_long_cmpxchg_acquire(&lock->owner, 0UL, curr))
+ if (atomic_long_try_cmpxchg_acquire(&lock->owner, &zero, curr))
return true;
return false;
diff --git a/kernel/locking/qspinlock.c b/kernel/locking/qspinlock.c
index d880296245c5..bfaeb05123ff 100644
--- a/kernel/locking/qspinlock.c
+++ b/kernel/locking/qspinlock.c
@@ -12,11 +12,11 @@
* GNU General Public License for more details.
*
* (C) Copyright 2013-2015 Hewlett-Packard Development Company, L.P.
- * (C) Copyright 2013-2014 Red Hat, Inc.
+ * (C) Copyright 2013-2014,2018 Red Hat, Inc.
* (C) Copyright 2015 Intel Corp.
* (C) Copyright 2015 Hewlett-Packard Enterprise Development LP
*
- * Authors: Waiman Long <waiman.long@hpe.com>
+ * Authors: Waiman Long <longman@redhat.com>
* Peter Zijlstra <peterz@infradead.org>
*/
@@ -33,6 +33,11 @@
#include <asm/qspinlock.h>
/*
+ * Include queued spinlock statistics code
+ */
+#include "qspinlock_stat.h"
+
+/*
* The basic principle of a queue-based spinlock can best be understood
* by studying a classic queue-based spinlock implementation called the
* MCS lock. The paper below provides a good description for this kind
@@ -77,6 +82,18 @@
#endif
/*
+ * The pending bit spinning loop count.
+ * This heuristic is used to limit the number of lockword accesses
+ * made by atomic_cond_read_relaxed when waiting for the lock to
+ * transition out of the "== _Q_PENDING_VAL" state. We don't spin
+ * indefinitely because there's no guarantee that we'll make forward
+ * progress.
+ */
+#ifndef _Q_PENDING_LOOPS
+#define _Q_PENDING_LOOPS 1
+#endif
+
+/*
* Per-CPU queue node structures; we can never have more than 4 nested
* contexts: task, softirq, hardirq, nmi.
*
@@ -114,41 +131,18 @@ static inline __pure struct mcs_spinlock *decode_tail(u32 tail)
#define _Q_LOCKED_PENDING_MASK (_Q_LOCKED_MASK | _Q_PENDING_MASK)
-/*
- * By using the whole 2nd least significant byte for the pending bit, we
- * can allow better optimization of the lock acquisition for the pending
- * bit holder.
+#if _Q_PENDING_BITS == 8
+/**
+ * clear_pending - clear the pending bit.
+ * @lock: Pointer to queued spinlock structure
*
- * This internal structure is also used by the set_locked function which
- * is not restricted to _Q_PENDING_BITS == 8.
+ * *,1,* -> *,0,*
*/
-struct __qspinlock {
- union {
- atomic_t val;
-#ifdef __LITTLE_ENDIAN
- struct {
- u8 locked;
- u8 pending;
- };
- struct {
- u16 locked_pending;
- u16 tail;
- };
-#else
- struct {
- u16 tail;
- u16 locked_pending;
- };
- struct {
- u8 reserved[2];
- u8 pending;
- u8 locked;
- };
-#endif
- };
-};
+static __always_inline void clear_pending(struct qspinlock *lock)
+{
+ WRITE_ONCE(lock->pending, 0);
+}
-#if _Q_PENDING_BITS == 8
/**
* clear_pending_set_locked - take ownership and clear the pending bit.
* @lock: Pointer to queued spinlock structure
@@ -159,9 +153,7 @@ struct __qspinlock {
*/
static __always_inline void clear_pending_set_locked(struct qspinlock *lock)
{
- struct __qspinlock *l = (void *)lock;
-
- WRITE_ONCE(l->locked_pending, _Q_LOCKED_VAL);
+ WRITE_ONCE(lock->locked_pending, _Q_LOCKED_VAL);
}
/*
@@ -176,19 +168,28 @@ static __always_inline void clear_pending_set_locked(struct qspinlock *lock)
*/
static __always_inline u32 xchg_tail(struct qspinlock *lock, u32 tail)
{
- struct __qspinlock *l = (void *)lock;
-
/*
- * Use release semantics to make sure that the MCS node is properly
- * initialized before changing the tail code.
+ * We can use relaxed semantics since the caller ensures that the
+ * MCS node is properly initialized before updating the tail.
*/
- return (u32)xchg_release(&l->tail,
+ return (u32)xchg_relaxed(&lock->tail,
tail >> _Q_TAIL_OFFSET) << _Q_TAIL_OFFSET;
}
#else /* _Q_PENDING_BITS == 8 */
/**
+ * clear_pending - clear the pending bit.
+ * @lock: Pointer to queued spinlock structure
+ *
+ * *,1,* -> *,0,*
+ */
+static __always_inline void clear_pending(struct qspinlock *lock)
+{
+ atomic_andnot(_Q_PENDING_VAL, &lock->val);
+}
+
+/**
* clear_pending_set_locked - take ownership and clear the pending bit.
* @lock: Pointer to queued spinlock structure
*
@@ -216,10 +217,11 @@ static __always_inline u32 xchg_tail(struct qspinlock *lock, u32 tail)
for (;;) {
new = (val & _Q_LOCKED_PENDING_MASK) | tail;
/*
- * Use release semantics to make sure that the MCS node is
- * properly initialized before changing the tail code.
+ * We can use relaxed semantics since the caller ensures that
+ * the MCS node is properly initialized before updating the
+ * tail.
*/
- old = atomic_cmpxchg_release(&lock->val, val, new);
+ old = atomic_cmpxchg_relaxed(&lock->val, val, new);
if (old == val)
break;
@@ -237,9 +239,7 @@ static __always_inline u32 xchg_tail(struct qspinlock *lock, u32 tail)
*/
static __always_inline void set_locked(struct qspinlock *lock)
{
- struct __qspinlock *l = (void *)lock;
-
- WRITE_ONCE(l->locked, _Q_LOCKED_VAL);
+ WRITE_ONCE(lock->locked, _Q_LOCKED_VAL);
}
@@ -294,86 +294,83 @@ static __always_inline u32 __pv_wait_head_or_lock(struct qspinlock *lock,
void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val)
{
struct mcs_spinlock *prev, *next, *node;
- u32 new, old, tail;
+ u32 old, tail;
int idx;
BUILD_BUG_ON(CONFIG_NR_CPUS >= (1U << _Q_TAIL_CPU_BITS));
if (pv_enabled())
- goto queue;
+ goto pv_queue;
if (virt_spin_lock(lock))
return;
/*
- * wait for in-progress pending->locked hand-overs
+ * Wait for in-progress pending->locked hand-overs with a bounded
+ * number of spins so that we guarantee forward progress.
*
* 0,1,0 -> 0,0,1
*/
if (val == _Q_PENDING_VAL) {
- while ((val = atomic_read(&lock->val)) == _Q_PENDING_VAL)
- cpu_relax();
+ int cnt = _Q_PENDING_LOOPS;
+ val = atomic_cond_read_relaxed(&lock->val,
+ (VAL != _Q_PENDING_VAL) || !cnt--);
}
/*
+ * If we observe any contention; queue.
+ */
+ if (val & ~_Q_LOCKED_MASK)
+ goto queue;
+
+ /*
* trylock || pending
*
* 0,0,0 -> 0,0,1 ; trylock
* 0,0,1 -> 0,1,1 ; pending
*/
- for (;;) {
+ val = atomic_fetch_or_acquire(_Q_PENDING_VAL, &lock->val);
+ if (!(val & ~_Q_LOCKED_MASK)) {
/*
- * If we observe any contention; queue.
+ * We're pending, wait for the owner to go away.
+ *
+ * *,1,1 -> *,1,0
+ *
+ * this wait loop must be a load-acquire such that we match the
+ * store-release that clears the locked bit and create lock
+ * sequentiality; this is because not all
+ * clear_pending_set_locked() implementations imply full
+ * barriers.
*/
- if (val & ~_Q_LOCKED_MASK)
- goto queue;
-
- new = _Q_LOCKED_VAL;
- if (val == new)
- new |= _Q_PENDING_VAL;
+ if (val & _Q_LOCKED_MASK) {
+ atomic_cond_read_acquire(&lock->val,
+ !(VAL & _Q_LOCKED_MASK));
+ }
/*
- * Acquire semantic is required here as the function may
- * return immediately if the lock was free.
+ * take ownership and clear the pending bit.
+ *
+ * *,1,0 -> *,0,1
*/
- old = atomic_cmpxchg_acquire(&lock->val, val, new);
- if (old == val)
- break;
-
- val = old;
- }
-
- /*
- * we won the trylock
- */
- if (new == _Q_LOCKED_VAL)
+ clear_pending_set_locked(lock);
+ qstat_inc(qstat_lock_pending, true);
return;
+ }
/*
- * we're pending, wait for the owner to go away.
- *
- * *,1,1 -> *,1,0
- *
- * this wait loop must be a load-acquire such that we match the
- * store-release that clears the locked bit and create lock
- * sequentiality; this is because not all clear_pending_set_locked()
- * implementations imply full barriers.
- */
- smp_cond_load_acquire(&lock->val.counter, !(VAL & _Q_LOCKED_MASK));
-
- /*
- * take ownership and clear the pending bit.
- *
- * *,1,0 -> *,0,1
+ * If pending was clear but there are waiters in the queue, then
+ * we need to undo our setting of pending before we queue ourselves.
*/
- clear_pending_set_locked(lock);
- return;
+ if (!(val & _Q_PENDING_MASK))
+ clear_pending(lock);
/*
* End of pending bit optimistic spinning and beginning of MCS
* queuing.
*/
queue:
+ qstat_inc(qstat_lock_slowpath, true);
+pv_queue:
node = this_cpu_ptr(&mcs_nodes[0]);
idx = node->count++;
tail = encode_tail(smp_processor_id(), idx);
@@ -400,12 +397,18 @@ queue:
goto release;
/*
+ * Ensure that the initialisation of @node is complete before we
+ * publish the updated tail via xchg_tail() and potentially link
+ * @node into the waitqueue via WRITE_ONCE(prev->next, node) below.
+ */
+ smp_wmb();
+
+ /*
+ * Publish the updated tail.
* We have already touched the queueing cacheline; don't bother with
* pending stuff.
*
* p,*,* -> n,*,*
- *
- * RELEASE, such that the stores to @node must be complete.
*/
old = xchg_tail(lock, tail);
next = NULL;
@@ -417,14 +420,8 @@ queue:
if (old & _Q_TAIL_MASK) {
prev = decode_tail(old);
- /*
- * We must ensure that the stores to @node are observed before
- * the write to prev->next. The address dependency from
- * xchg_tail is not sufficient to ensure this because the read
- * component of xchg_tail is unordered with respect to the
- * initialisation of @node.
- */
- smp_store_release(&prev->next, node);
+ /* Link @node into the waitqueue. */
+ WRITE_ONCE(prev->next, node);
pv_wait_node(node, prev);
arch_mcs_spin_lock_contended(&node->locked);
@@ -453,8 +450,8 @@ queue:
*
* The PV pv_wait_head_or_lock function, if active, will acquire
* the lock and return a non-zero value. So we have to skip the
- * smp_cond_load_acquire() call. As the next PV queue head hasn't been
- * designated yet, there is no way for the locked value to become
+ * atomic_cond_read_acquire() call. As the next PV queue head hasn't
+ * been designated yet, there is no way for the locked value to become
* _Q_SLOW_VAL. So both the set_locked() and the
* atomic_cmpxchg_relaxed() calls will be safe.
*
@@ -464,44 +461,38 @@ queue:
if ((val = pv_wait_head_or_lock(lock, node)))
goto locked;
- val = smp_cond_load_acquire(&lock->val.counter, !(VAL & _Q_LOCKED_PENDING_MASK));
+ val = atomic_cond_read_acquire(&lock->val, !(VAL & _Q_LOCKED_PENDING_MASK));
locked:
/*
* claim the lock:
*
* n,0,0 -> 0,0,1 : lock, uncontended
- * *,0,0 -> *,0,1 : lock, contended
+ * *,*,0 -> *,*,1 : lock, contended
*
- * If the queue head is the only one in the queue (lock value == tail),
- * clear the tail code and grab the lock. Otherwise, we only need
- * to grab the lock.
+ * If the queue head is the only one in the queue (lock value == tail)
+ * and nobody is pending, clear the tail code and grab the lock.
+ * Otherwise, we only need to grab the lock.
*/
- for (;;) {
- /* In the PV case we might already have _Q_LOCKED_VAL set */
- if ((val & _Q_TAIL_MASK) != tail) {
- set_locked(lock);
- break;
- }
- /*
- * The smp_cond_load_acquire() call above has provided the
- * necessary acquire semantics required for locking. At most
- * two iterations of this loop may be ran.
- */
- old = atomic_cmpxchg_relaxed(&lock->val, val, _Q_LOCKED_VAL);
- if (old == val)
- goto release; /* No contention */
- val = old;
- }
+ /*
+ * In the PV case we might already have _Q_LOCKED_VAL set.
+ *
+ * The atomic_cond_read_acquire() call above has provided the
+ * necessary acquire semantics required for locking.
+ */
+ if (((val & _Q_TAIL_MASK) == tail) &&
+ atomic_try_cmpxchg_relaxed(&lock->val, &val, _Q_LOCKED_VAL))
+ goto release; /* No contention */
+
+ /* Either somebody is queued behind us or _Q_PENDING_VAL is set */
+ set_locked(lock);
/*
* contended path; wait for next if not observed yet, release.
*/
- if (!next) {
- while (!(next = READ_ONCE(node->next)))
- cpu_relax();
- }
+ if (!next)
+ next = smp_cond_load_relaxed(&node->next, (VAL));
arch_mcs_spin_unlock_contended(&next->locked);
pv_kick_node(lock, next);
diff --git a/kernel/locking/qspinlock_paravirt.h b/kernel/locking/qspinlock_paravirt.h
index 6ee477765e6c..5a0cf5f9008c 100644
--- a/kernel/locking/qspinlock_paravirt.h
+++ b/kernel/locking/qspinlock_paravirt.h
@@ -56,11 +56,6 @@ struct pv_node {
};
/*
- * Include queued spinlock statistics code
- */
-#include "qspinlock_stat.h"
-
-/*
* Hybrid PV queued/unfair lock
*
* By replacing the regular queued_spin_trylock() with the function below,
@@ -87,8 +82,6 @@ struct pv_node {
#define queued_spin_trylock(l) pv_hybrid_queued_unfair_trylock(l)
static inline bool pv_hybrid_queued_unfair_trylock(struct qspinlock *lock)
{
- struct __qspinlock *l = (void *)lock;
-
/*
* Stay in unfair lock mode as long as queued mode waiters are
* present in the MCS wait queue but the pending bit isn't set.
@@ -97,7 +90,7 @@ static inline bool pv_hybrid_queued_unfair_trylock(struct qspinlock *lock)
int val = atomic_read(&lock->val);
if (!(val & _Q_LOCKED_PENDING_MASK) &&
- (cmpxchg_acquire(&l->locked, 0, _Q_LOCKED_VAL) == 0)) {
+ (cmpxchg_acquire(&lock->locked, 0, _Q_LOCKED_VAL) == 0)) {
qstat_inc(qstat_pv_lock_stealing, true);
return true;
}
@@ -117,16 +110,7 @@ static inline bool pv_hybrid_queued_unfair_trylock(struct qspinlock *lock)
#if _Q_PENDING_BITS == 8
static __always_inline void set_pending(struct qspinlock *lock)
{
- struct __qspinlock *l = (void *)lock;
-
- WRITE_ONCE(l->pending, 1);
-}
-
-static __always_inline void clear_pending(struct qspinlock *lock)
-{
- struct __qspinlock *l = (void *)lock;
-
- WRITE_ONCE(l->pending, 0);
+ WRITE_ONCE(lock->pending, 1);
}
/*
@@ -136,10 +120,8 @@ static __always_inline void clear_pending(struct qspinlock *lock)
*/
static __always_inline int trylock_clear_pending(struct qspinlock *lock)
{
- struct __qspinlock *l = (void *)lock;
-
- return !READ_ONCE(l->locked) &&
- (cmpxchg_acquire(&l->locked_pending, _Q_PENDING_VAL,
+ return !READ_ONCE(lock->locked) &&
+ (cmpxchg_acquire(&lock->locked_pending, _Q_PENDING_VAL,
_Q_LOCKED_VAL) == _Q_PENDING_VAL);
}
#else /* _Q_PENDING_BITS == 8 */
@@ -148,11 +130,6 @@ static __always_inline void set_pending(struct qspinlock *lock)
atomic_or(_Q_PENDING_VAL, &lock->val);
}
-static __always_inline void clear_pending(struct qspinlock *lock)
-{
- atomic_andnot(_Q_PENDING_VAL, &lock->val);
-}
-
static __always_inline int trylock_clear_pending(struct qspinlock *lock)
{
int val = atomic_read(&lock->val);
@@ -384,7 +361,6 @@ static void pv_wait_node(struct mcs_spinlock *node, struct mcs_spinlock *prev)
static void pv_kick_node(struct qspinlock *lock, struct mcs_spinlock *node)
{
struct pv_node *pn = (struct pv_node *)node;
- struct __qspinlock *l = (void *)lock;
/*
* If the vCPU is indeed halted, advance its state to match that of
@@ -413,7 +389,7 @@ static void pv_kick_node(struct qspinlock *lock, struct mcs_spinlock *node)
* the hash table later on at unlock time, no atomic instruction is
* needed.
*/
- WRITE_ONCE(l->locked, _Q_SLOW_VAL);
+ WRITE_ONCE(lock->locked, _Q_SLOW_VAL);
(void)pv_hash(lock, pn);
}
@@ -428,7 +404,6 @@ static u32
pv_wait_head_or_lock(struct qspinlock *lock, struct mcs_spinlock *node)
{
struct pv_node *pn = (struct pv_node *)node;
- struct __qspinlock *l = (void *)lock;
struct qspinlock **lp = NULL;
int waitcnt = 0;
int loop;
@@ -443,7 +418,7 @@ pv_wait_head_or_lock(struct qspinlock *lock, struct mcs_spinlock *node)
/*
* Tracking # of slowpath locking operations
*/
- qstat_inc(qstat_pv_lock_slowpath, true);
+ qstat_inc(qstat_lock_slowpath, true);
for (;; waitcnt++) {
/*
@@ -479,13 +454,13 @@ pv_wait_head_or_lock(struct qspinlock *lock, struct mcs_spinlock *node)
*
* Matches the smp_rmb() in __pv_queued_spin_unlock().
*/
- if (xchg(&l->locked, _Q_SLOW_VAL) == 0) {
+ if (xchg(&lock->locked, _Q_SLOW_VAL) == 0) {
/*
* The lock was free and now we own the lock.
* Change the lock value back to _Q_LOCKED_VAL
* and unhash the table.
*/
- WRITE_ONCE(l->locked, _Q_LOCKED_VAL);
+ WRITE_ONCE(lock->locked, _Q_LOCKED_VAL);
WRITE_ONCE(*lp, NULL);
goto gotlock;
}
@@ -493,7 +468,7 @@ pv_wait_head_or_lock(struct qspinlock *lock, struct mcs_spinlock *node)
WRITE_ONCE(pn->state, vcpu_hashed);
qstat_inc(qstat_pv_wait_head, true);
qstat_inc(qstat_pv_wait_again, waitcnt);
- pv_wait(&l->locked, _Q_SLOW_VAL);
+ pv_wait(&lock->locked, _Q_SLOW_VAL);
/*
* Because of lock stealing, the queue head vCPU may not be
@@ -518,7 +493,6 @@ gotlock:
__visible void
__pv_queued_spin_unlock_slowpath(struct qspinlock *lock, u8 locked)
{
- struct __qspinlock *l = (void *)lock;
struct pv_node *node;
if (unlikely(locked != _Q_SLOW_VAL)) {
@@ -547,7 +521,7 @@ __pv_queued_spin_unlock_slowpath(struct qspinlock *lock, u8 locked)
* Now that we have a reference to the (likely) blocked pv_node,
* release the lock.
*/
- smp_store_release(&l->locked, 0);
+ smp_store_release(&lock->locked, 0);
/*
* At this point the memory pointed at by lock can be freed/reused,
@@ -573,7 +547,6 @@ __pv_queued_spin_unlock_slowpath(struct qspinlock *lock, u8 locked)
#ifndef __pv_queued_spin_unlock
__visible void __pv_queued_spin_unlock(struct qspinlock *lock)
{
- struct __qspinlock *l = (void *)lock;
u8 locked;
/*
@@ -581,7 +554,7 @@ __visible void __pv_queued_spin_unlock(struct qspinlock *lock)
* unhash. Otherwise it would be possible to have multiple @lock
* entries, which would be BAD.
*/
- locked = cmpxchg_release(&l->locked, _Q_LOCKED_VAL, 0);
+ locked = cmpxchg_release(&lock->locked, _Q_LOCKED_VAL, 0);
if (likely(locked == _Q_LOCKED_VAL))
return;
diff --git a/kernel/locking/qspinlock_stat.h b/kernel/locking/qspinlock_stat.h
index 4a30ef63c607..6bd78c0740fc 100644
--- a/kernel/locking/qspinlock_stat.h
+++ b/kernel/locking/qspinlock_stat.h
@@ -22,13 +22,14 @@
* pv_kick_wake - # of vCPU kicks used for computing pv_latency_wake
* pv_latency_kick - average latency (ns) of vCPU kick operation
* pv_latency_wake - average latency (ns) from vCPU kick to wakeup
- * pv_lock_slowpath - # of locking operations via the slowpath
* pv_lock_stealing - # of lock stealing operations
* pv_spurious_wakeup - # of spurious wakeups in non-head vCPUs
* pv_wait_again - # of wait's after a queue head vCPU kick
* pv_wait_early - # of early vCPU wait's
* pv_wait_head - # of vCPU wait's at the queue head
* pv_wait_node - # of vCPU wait's at a non-head queue node
+ * lock_pending - # of locking operations via pending code
+ * lock_slowpath - # of locking operations via MCS lock queue
*
* Writing to the "reset_counters" file will reset all the above counter
* values.
@@ -46,13 +47,14 @@ enum qlock_stats {
qstat_pv_kick_wake,
qstat_pv_latency_kick,
qstat_pv_latency_wake,
- qstat_pv_lock_slowpath,
qstat_pv_lock_stealing,
qstat_pv_spurious_wakeup,
qstat_pv_wait_again,
qstat_pv_wait_early,
qstat_pv_wait_head,
qstat_pv_wait_node,
+ qstat_lock_pending,
+ qstat_lock_slowpath,
qstat_num, /* Total number of statistical counters */
qstat_reset_cnts = qstat_num,
};
@@ -73,12 +75,13 @@ static const char * const qstat_names[qstat_num + 1] = {
[qstat_pv_spurious_wakeup] = "pv_spurious_wakeup",
[qstat_pv_latency_kick] = "pv_latency_kick",
[qstat_pv_latency_wake] = "pv_latency_wake",
- [qstat_pv_lock_slowpath] = "pv_lock_slowpath",
[qstat_pv_lock_stealing] = "pv_lock_stealing",
[qstat_pv_wait_again] = "pv_wait_again",
[qstat_pv_wait_early] = "pv_wait_early",
[qstat_pv_wait_head] = "pv_wait_head",
[qstat_pv_wait_node] = "pv_wait_node",
+ [qstat_lock_pending] = "lock_pending",
+ [qstat_lock_slowpath] = "lock_slowpath",
[qstat_reset_cnts] = "reset_counters",
};
diff --git a/kernel/locking/rwsem-xadd.c b/kernel/locking/rwsem-xadd.c
index a90336779375..3064c50e181e 100644
--- a/kernel/locking/rwsem-xadd.c
+++ b/kernel/locking/rwsem-xadd.c
@@ -347,6 +347,15 @@ static inline bool rwsem_try_write_lock_unqueued(struct rw_semaphore *sem)
}
}
+static inline bool owner_on_cpu(struct task_struct *owner)
+{
+ /*
+ * As lock holder preemption issue, we both skip spinning if
+ * task is not on cpu or its cpu is preempted
+ */
+ return owner->on_cpu && !vcpu_is_preempted(task_cpu(owner));
+}
+
static inline bool rwsem_can_spin_on_owner(struct rw_semaphore *sem)
{
struct task_struct *owner;
@@ -359,17 +368,10 @@ static inline bool rwsem_can_spin_on_owner(struct rw_semaphore *sem)
rcu_read_lock();
owner = READ_ONCE(sem->owner);
- if (!owner || !is_rwsem_owner_spinnable(owner)) {
- ret = !owner; /* !owner is spinnable */
- goto done;
+ if (owner) {
+ ret = is_rwsem_owner_spinnable(owner) &&
+ owner_on_cpu(owner);
}
-
- /*
- * As lock holder preemption issue, we both skip spinning if task is not
- * on cpu or its cpu is preempted
- */
- ret = owner->on_cpu && !vcpu_is_preempted(task_cpu(owner));
-done:
rcu_read_unlock();
return ret;
}
@@ -398,8 +400,7 @@ static noinline bool rwsem_spin_on_owner(struct rw_semaphore *sem)
* abort spinning when need_resched or owner is not running or
* owner's cpu is preempted.
*/
- if (!owner->on_cpu || need_resched() ||
- vcpu_is_preempted(task_cpu(owner))) {
+ if (need_resched() || !owner_on_cpu(owner)) {
rcu_read_unlock();
return false;
}
diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c
index 64c0291b579c..f89014a2c238 100644
--- a/kernel/stop_machine.c
+++ b/kernel/stop_machine.c
@@ -37,7 +37,7 @@ struct cpu_stop_done {
struct cpu_stopper {
struct task_struct *thread;
- spinlock_t lock;
+ raw_spinlock_t lock;
bool enabled; /* is this stopper enabled? */
struct list_head works; /* list of pending works */
@@ -81,13 +81,13 @@ static bool cpu_stop_queue_work(unsigned int cpu, struct cpu_stop_work *work)
unsigned long flags;
bool enabled;
- spin_lock_irqsave(&stopper->lock, flags);
+ raw_spin_lock_irqsave(&stopper->lock, flags);
enabled = stopper->enabled;
if (enabled)
__cpu_stop_queue_work(stopper, work, &wakeq);
else if (work->done)
cpu_stop_signal_done(work->done);
- spin_unlock_irqrestore(&stopper->lock, flags);
+ raw_spin_unlock_irqrestore(&stopper->lock, flags);
wake_up_q(&wakeq);
@@ -237,8 +237,8 @@ static int cpu_stop_queue_two_works(int cpu1, struct cpu_stop_work *work1,
DEFINE_WAKE_Q(wakeq);
int err;
retry:
- spin_lock_irq(&stopper1->lock);
- spin_lock_nested(&stopper2->lock, SINGLE_DEPTH_NESTING);
+ raw_spin_lock_irq(&stopper1->lock);
+ raw_spin_lock_nested(&stopper2->lock, SINGLE_DEPTH_NESTING);
err = -ENOENT;
if (!stopper1->enabled || !stopper2->enabled)
@@ -261,8 +261,8 @@ retry:
__cpu_stop_queue_work(stopper1, work1, &wakeq);
__cpu_stop_queue_work(stopper2, work2, &wakeq);
unlock:
- spin_unlock(&stopper2->lock);
- spin_unlock_irq(&stopper1->lock);
+ raw_spin_unlock(&stopper2->lock);
+ raw_spin_unlock_irq(&stopper1->lock);
if (unlikely(err == -EDEADLK)) {
while (stop_cpus_in_progress)
@@ -457,9 +457,9 @@ static int cpu_stop_should_run(unsigned int cpu)
unsigned long flags;
int run;
- spin_lock_irqsave(&stopper->lock, flags);
+ raw_spin_lock_irqsave(&stopper->lock, flags);
run = !list_empty(&stopper->works);
- spin_unlock_irqrestore(&stopper->lock, flags);
+ raw_spin_unlock_irqrestore(&stopper->lock, flags);
return run;
}
@@ -470,13 +470,13 @@ static void cpu_stopper_thread(unsigned int cpu)
repeat:
work = NULL;
- spin_lock_irq(&stopper->lock);
+ raw_spin_lock_irq(&stopper->lock);
if (!list_empty(&stopper->works)) {
work = list_first_entry(&stopper->works,
struct cpu_stop_work, list);
list_del_init(&work->list);
}
- spin_unlock_irq(&stopper->lock);
+ raw_spin_unlock_irq(&stopper->lock);
if (work) {
cpu_stop_fn_t fn = work->fn;
@@ -550,7 +550,7 @@ static int __init cpu_stop_init(void)
for_each_possible_cpu(cpu) {
struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu);
- spin_lock_init(&stopper->lock);
+ raw_spin_lock_init(&stopper->lock);
INIT_LIST_HEAD(&stopper->works);
}