diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2015-11-04 02:40:38 +0300 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2015-11-04 02:40:38 +0300 |
commit | 281422869942c19f05a08d4017c633d08d390938 (patch) | |
tree | 78e1fb9155e3e82697f96ce04e7b40e8f7c5147c /kernel/locking | |
parent | f5a8160c1e055c0fd8d16a5b3ac97c638365b0db (diff) | |
parent | b33e18f61bd18227a456016a77b1a968f5bc1d65 (diff) | |
download | linux-281422869942c19f05a08d4017c633d08d390938.tar.xz |
Merge branch 'core-rcu-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull RCU changes from Ingo Molnar:
"The main changes in this cycle were:
- Improvements to expedited grace periods (Paul E McKenney)
- Performance improvements to and locktorture tests for percpu-rwsem
(Oleg Nesterov, Paul E McKenney)
- Torture-test changes (Paul E McKenney, Davidlohr Bueso)
- Documentation updates (Paul E McKenney)
- Miscellaneous fixes (Paul E McKenney, Boqun Feng, Oleg Nesterov,
Patrick Marlier)"
* 'core-rcu-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (54 commits)
fs/writeback, rcu: Don't use list_entry_rcu() for pointer offsetting in bdi_split_work_to_wbs()
rcu: Better hotplug handling for synchronize_sched_expedited()
rcu: Enable stall warnings for synchronize_rcu_expedited()
rcu: Add tasks to expedited stall-warning messages
rcu: Add online/offline info to expedited stall warning message
rcu: Consolidate expedited CPU selection
rcu: Prepare for consolidating expedited CPU selection
cpu: Remove try_get_online_cpus()
rcu: Stop excluding CPU hotplug in synchronize_sched_expedited()
rcu: Stop silencing lockdep false positive for expedited grace periods
rcu: Switch synchronize_sched_expedited() to IPI
locktorture: Fix module unwind when bad torture_type specified
torture: Forgive non-plural arguments
rcutorture: Fix unused-function warning for torturing_tasks()
rcutorture: Fix module unwind when bad torture_type specified
rcu_sync: Cleanup the CONFIG_PROVE_RCU checks
locking/percpu-rwsem: Clean up the lockdep annotations in percpu_down_read()
locking/percpu-rwsem: Fix the comments outdated by rcu_sync
locking/percpu-rwsem: Make use of the rcu_sync infrastructure
locking/percpu-rwsem: Make percpu_free_rwsem() after kzalloc() safe
...
Diffstat (limited to 'kernel/locking')
-rw-r--r-- | kernel/locking/locktorture.c | 164 | ||||
-rw-r--r-- | kernel/locking/percpu-rwsem.c | 90 |
2 files changed, 194 insertions, 60 deletions
diff --git a/kernel/locking/locktorture.c b/kernel/locking/locktorture.c index 32244186f1f2..8ef1919d63b2 100644 --- a/kernel/locking/locktorture.c +++ b/kernel/locking/locktorture.c @@ -17,12 +17,14 @@ * * Copyright (C) IBM Corporation, 2014 * - * Author: Paul E. McKenney <paulmck@us.ibm.com> + * Authors: Paul E. McKenney <paulmck@us.ibm.com> + * Davidlohr Bueso <dave@stgolabs.net> * Based on kernel/rcu/torture.c. */ #include <linux/kernel.h> #include <linux/module.h> #include <linux/kthread.h> +#include <linux/sched/rt.h> #include <linux/spinlock.h> #include <linux/rwlock.h> #include <linux/mutex.h> @@ -34,6 +36,7 @@ #include <linux/moduleparam.h> #include <linux/delay.h> #include <linux/slab.h> +#include <linux/percpu-rwsem.h> #include <linux/torture.h> MODULE_LICENSE("GPL"); @@ -91,11 +94,13 @@ struct lock_torture_ops { void (*init)(void); int (*writelock)(void); void (*write_delay)(struct torture_random_state *trsp); + void (*task_boost)(struct torture_random_state *trsp); void (*writeunlock)(void); int (*readlock)(void); void (*read_delay)(struct torture_random_state *trsp); void (*readunlock)(void); - unsigned long flags; + + unsigned long flags; /* for irq spinlocks */ const char *name; }; @@ -139,9 +144,15 @@ static void torture_lock_busted_write_unlock(void) /* BUGGY, do not use in real life!!! */ } +static void torture_boost_dummy(struct torture_random_state *trsp) +{ + /* Only rtmutexes care about priority */ +} + static struct lock_torture_ops lock_busted_ops = { .writelock = torture_lock_busted_write_lock, .write_delay = torture_lock_busted_write_delay, + .task_boost = torture_boost_dummy, .writeunlock = torture_lock_busted_write_unlock, .readlock = NULL, .read_delay = NULL, @@ -185,6 +196,7 @@ static void torture_spin_lock_write_unlock(void) __releases(torture_spinlock) static struct lock_torture_ops spin_lock_ops = { .writelock = torture_spin_lock_write_lock, .write_delay = torture_spin_lock_write_delay, + .task_boost = torture_boost_dummy, .writeunlock = torture_spin_lock_write_unlock, .readlock = NULL, .read_delay = NULL, @@ -211,6 +223,7 @@ __releases(torture_spinlock) static struct lock_torture_ops spin_lock_irq_ops = { .writelock = torture_spin_lock_write_lock_irq, .write_delay = torture_spin_lock_write_delay, + .task_boost = torture_boost_dummy, .writeunlock = torture_lock_spin_write_unlock_irq, .readlock = NULL, .read_delay = NULL, @@ -275,6 +288,7 @@ static void torture_rwlock_read_unlock(void) __releases(torture_rwlock) static struct lock_torture_ops rw_lock_ops = { .writelock = torture_rwlock_write_lock, .write_delay = torture_rwlock_write_delay, + .task_boost = torture_boost_dummy, .writeunlock = torture_rwlock_write_unlock, .readlock = torture_rwlock_read_lock, .read_delay = torture_rwlock_read_delay, @@ -315,6 +329,7 @@ __releases(torture_rwlock) static struct lock_torture_ops rw_lock_irq_ops = { .writelock = torture_rwlock_write_lock_irq, .write_delay = torture_rwlock_write_delay, + .task_boost = torture_boost_dummy, .writeunlock = torture_rwlock_write_unlock_irq, .readlock = torture_rwlock_read_lock_irq, .read_delay = torture_rwlock_read_delay, @@ -354,6 +369,7 @@ static void torture_mutex_unlock(void) __releases(torture_mutex) static struct lock_torture_ops mutex_lock_ops = { .writelock = torture_mutex_lock, .write_delay = torture_mutex_delay, + .task_boost = torture_boost_dummy, .writeunlock = torture_mutex_unlock, .readlock = NULL, .read_delay = NULL, @@ -361,6 +377,90 @@ static struct lock_torture_ops mutex_lock_ops = { .name = "mutex_lock" }; +#ifdef CONFIG_RT_MUTEXES +static DEFINE_RT_MUTEX(torture_rtmutex); + +static int torture_rtmutex_lock(void) __acquires(torture_rtmutex) +{ + rt_mutex_lock(&torture_rtmutex); + return 0; +} + +static void torture_rtmutex_boost(struct torture_random_state *trsp) +{ + int policy; + struct sched_param param; + const unsigned int factor = 50000; /* yes, quite arbitrary */ + + if (!rt_task(current)) { + /* + * (1) Boost priority once every ~50k operations. When the + * task tries to take the lock, the rtmutex it will account + * for the new priority, and do any corresponding pi-dance. + */ + if (!(torture_random(trsp) % + (cxt.nrealwriters_stress * factor))) { + policy = SCHED_FIFO; + param.sched_priority = MAX_RT_PRIO - 1; + } else /* common case, do nothing */ + return; + } else { + /* + * The task will remain boosted for another ~500k operations, + * then restored back to its original prio, and so forth. + * + * When @trsp is nil, we want to force-reset the task for + * stopping the kthread. + */ + if (!trsp || !(torture_random(trsp) % + (cxt.nrealwriters_stress * factor * 2))) { + policy = SCHED_NORMAL; + param.sched_priority = 0; + } else /* common case, do nothing */ + return; + } + + sched_setscheduler_nocheck(current, policy, ¶m); +} + +static void torture_rtmutex_delay(struct torture_random_state *trsp) +{ + const unsigned long shortdelay_us = 2; + const unsigned long longdelay_ms = 100; + + /* + * We want a short delay mostly to emulate likely code, and + * we want a long delay occasionally to force massive contention. + */ + if (!(torture_random(trsp) % + (cxt.nrealwriters_stress * 2000 * longdelay_ms))) + mdelay(longdelay_ms); + if (!(torture_random(trsp) % + (cxt.nrealwriters_stress * 2 * shortdelay_us))) + udelay(shortdelay_us); +#ifdef CONFIG_PREEMPT + if (!(torture_random(trsp) % (cxt.nrealwriters_stress * 20000))) + preempt_schedule(); /* Allow test to be preempted. */ +#endif +} + +static void torture_rtmutex_unlock(void) __releases(torture_rtmutex) +{ + rt_mutex_unlock(&torture_rtmutex); +} + +static struct lock_torture_ops rtmutex_lock_ops = { + .writelock = torture_rtmutex_lock, + .write_delay = torture_rtmutex_delay, + .task_boost = torture_rtmutex_boost, + .writeunlock = torture_rtmutex_unlock, + .readlock = NULL, + .read_delay = NULL, + .readunlock = NULL, + .name = "rtmutex_lock" +}; +#endif + static DECLARE_RWSEM(torture_rwsem); static int torture_rwsem_down_write(void) __acquires(torture_rwsem) { @@ -419,6 +519,7 @@ static void torture_rwsem_up_read(void) __releases(torture_rwsem) static struct lock_torture_ops rwsem_lock_ops = { .writelock = torture_rwsem_down_write, .write_delay = torture_rwsem_write_delay, + .task_boost = torture_boost_dummy, .writeunlock = torture_rwsem_up_write, .readlock = torture_rwsem_down_read, .read_delay = torture_rwsem_read_delay, @@ -426,6 +527,48 @@ static struct lock_torture_ops rwsem_lock_ops = { .name = "rwsem_lock" }; +#include <linux/percpu-rwsem.h> +static struct percpu_rw_semaphore pcpu_rwsem; + +void torture_percpu_rwsem_init(void) +{ + BUG_ON(percpu_init_rwsem(&pcpu_rwsem)); +} + +static int torture_percpu_rwsem_down_write(void) __acquires(pcpu_rwsem) +{ + percpu_down_write(&pcpu_rwsem); + return 0; +} + +static void torture_percpu_rwsem_up_write(void) __releases(pcpu_rwsem) +{ + percpu_up_write(&pcpu_rwsem); +} + +static int torture_percpu_rwsem_down_read(void) __acquires(pcpu_rwsem) +{ + percpu_down_read(&pcpu_rwsem); + return 0; +} + +static void torture_percpu_rwsem_up_read(void) __releases(pcpu_rwsem) +{ + percpu_up_read(&pcpu_rwsem); +} + +static struct lock_torture_ops percpu_rwsem_lock_ops = { + .init = torture_percpu_rwsem_init, + .writelock = torture_percpu_rwsem_down_write, + .write_delay = torture_rwsem_write_delay, + .task_boost = torture_boost_dummy, + .writeunlock = torture_percpu_rwsem_up_write, + .readlock = torture_percpu_rwsem_down_read, + .read_delay = torture_rwsem_read_delay, + .readunlock = torture_percpu_rwsem_up_read, + .name = "percpu_rwsem_lock" +}; + /* * Lock torture writer kthread. Repeatedly acquires and releases * the lock, checking for duplicate acquisitions. @@ -442,6 +585,7 @@ static int lock_torture_writer(void *arg) if ((torture_random(&rand) & 0xfffff) == 0) schedule_timeout_uninterruptible(1); + cxt.cur_ops->task_boost(&rand); cxt.cur_ops->writelock(); if (WARN_ON_ONCE(lock_is_write_held)) lwsp->n_lock_fail++; @@ -456,6 +600,8 @@ static int lock_torture_writer(void *arg) stutter_wait("lock_torture_writer"); } while (!torture_must_stop()); + + cxt.cur_ops->task_boost(NULL); /* reset prio */ torture_kthread_stopping("lock_torture_writer"); return 0; } @@ -642,7 +788,11 @@ static int __init lock_torture_init(void) &spin_lock_ops, &spin_lock_irq_ops, &rw_lock_ops, &rw_lock_irq_ops, &mutex_lock_ops, +#ifdef CONFIG_RT_MUTEXES + &rtmutex_lock_ops, +#endif &rwsem_lock_ops, + &percpu_rwsem_lock_ops, }; if (!torture_init_begin(torture_type, verbose, &torture_runnable)) @@ -661,11 +811,11 @@ static int __init lock_torture_init(void) for (i = 0; i < ARRAY_SIZE(torture_ops); i++) pr_alert(" %s", torture_ops[i]->name); pr_alert("\n"); - torture_init_end(); - return -EINVAL; + firsterr = -EINVAL; + goto unwind; } if (cxt.cur_ops->init) - cxt.cur_ops->init(); /* no "goto unwind" prior to this point!!! */ + cxt.cur_ops->init(); if (nwriters_stress >= 0) cxt.nrealwriters_stress = nwriters_stress; @@ -676,6 +826,10 @@ static int __init lock_torture_init(void) if (strncmp(torture_type, "mutex", 5) == 0) cxt.debug_lock = true; #endif +#ifdef CONFIG_DEBUG_RT_MUTEXES + if (strncmp(torture_type, "rtmutex", 7) == 0) + cxt.debug_lock = true; +#endif #ifdef CONFIG_DEBUG_SPINLOCK if ((strncmp(torture_type, "spin", 4) == 0) || (strncmp(torture_type, "rw_lock", 7) == 0)) diff --git a/kernel/locking/percpu-rwsem.c b/kernel/locking/percpu-rwsem.c index f32567254867..f231e0bb311c 100644 --- a/kernel/locking/percpu-rwsem.c +++ b/kernel/locking/percpu-rwsem.c @@ -17,50 +17,43 @@ int __percpu_init_rwsem(struct percpu_rw_semaphore *brw, /* ->rw_sem represents the whole percpu_rw_semaphore for lockdep */ __init_rwsem(&brw->rw_sem, name, rwsem_key); - atomic_set(&brw->write_ctr, 0); + rcu_sync_init(&brw->rss, RCU_SCHED_SYNC); atomic_set(&brw->slow_read_ctr, 0); init_waitqueue_head(&brw->write_waitq); return 0; } +EXPORT_SYMBOL_GPL(__percpu_init_rwsem); void percpu_free_rwsem(struct percpu_rw_semaphore *brw) { + /* + * XXX: temporary kludge. The error path in alloc_super() + * assumes that percpu_free_rwsem() is safe after kzalloc(). + */ + if (!brw->fast_read_ctr) + return; + + rcu_sync_dtor(&brw->rss); free_percpu(brw->fast_read_ctr); brw->fast_read_ctr = NULL; /* catch use after free bugs */ } /* - * This is the fast-path for down_read/up_read, it only needs to ensure - * there is no pending writer (atomic_read(write_ctr) == 0) and inc/dec the - * fast per-cpu counter. The writer uses synchronize_sched_expedited() to - * serialize with the preempt-disabled section below. - * - * The nontrivial part is that we should guarantee acquire/release semantics - * in case when - * - * R_W: down_write() comes after up_read(), the writer should see all - * changes done by the reader - * or - * W_R: down_read() comes after up_write(), the reader should see all - * changes done by the writer + * This is the fast-path for down_read/up_read. If it succeeds we rely + * on the barriers provided by rcu_sync_enter/exit; see the comments in + * percpu_down_write() and percpu_up_write(). * * If this helper fails the callers rely on the normal rw_semaphore and * atomic_dec_and_test(), so in this case we have the necessary barriers. - * - * But if it succeeds we do not have any barriers, atomic_read(write_ctr) or - * __this_cpu_add() below can be reordered with any LOAD/STORE done by the - * reader inside the critical section. See the comments in down_write and - * up_write below. */ static bool update_fast_ctr(struct percpu_rw_semaphore *brw, unsigned int val) { - bool success = false; + bool success; preempt_disable(); - if (likely(!atomic_read(&brw->write_ctr))) { + success = rcu_sync_is_idle(&brw->rss); + if (likely(success)) __this_cpu_add(*brw->fast_read_ctr, val); - success = true; - } preempt_enable(); return success; @@ -77,16 +70,17 @@ static bool update_fast_ctr(struct percpu_rw_semaphore *brw, unsigned int val) void percpu_down_read(struct percpu_rw_semaphore *brw) { might_sleep(); - if (likely(update_fast_ctr(brw, +1))) { - rwsem_acquire_read(&brw->rw_sem.dep_map, 0, 0, _RET_IP_); + rwsem_acquire_read(&brw->rw_sem.dep_map, 0, 0, _RET_IP_); + + if (likely(update_fast_ctr(brw, +1))) return; - } - down_read(&brw->rw_sem); + /* Avoid rwsem_acquire_read() and rwsem_release() */ + __down_read(&brw->rw_sem); atomic_inc(&brw->slow_read_ctr); - /* avoid up_read()->rwsem_release() */ __up_read(&brw->rw_sem); } +EXPORT_SYMBOL_GPL(percpu_down_read); int percpu_down_read_trylock(struct percpu_rw_semaphore *brw) { @@ -112,6 +106,7 @@ void percpu_up_read(struct percpu_rw_semaphore *brw) if (atomic_dec_and_test(&brw->slow_read_ctr)) wake_up_all(&brw->write_waitq); } +EXPORT_SYMBOL_GPL(percpu_up_read); static int clear_fast_ctr(struct percpu_rw_semaphore *brw) { @@ -126,33 +121,17 @@ static int clear_fast_ctr(struct percpu_rw_semaphore *brw) return sum; } -/* - * A writer increments ->write_ctr to force the readers to switch to the - * slow mode, note the atomic_read() check in update_fast_ctr(). - * - * After that the readers can only inc/dec the slow ->slow_read_ctr counter, - * ->fast_read_ctr is stable. Once the writer moves its sum into the slow - * counter it represents the number of active readers. - * - * Finally the writer takes ->rw_sem for writing and blocks the new readers, - * then waits until the slow counter becomes zero. - */ void percpu_down_write(struct percpu_rw_semaphore *brw) { - /* tell update_fast_ctr() there is a pending writer */ - atomic_inc(&brw->write_ctr); /* - * 1. Ensures that write_ctr != 0 is visible to any down_read/up_read - * so that update_fast_ctr() can't succeed. - * - * 2. Ensures we see the result of every previous this_cpu_add() in - * update_fast_ctr(). + * Make rcu_sync_is_idle() == F and thus disable the fast-path in + * percpu_down_read() and percpu_up_read(), and wait for gp pass. * - * 3. Ensures that if any reader has exited its critical section via - * fast-path, it executes a full memory barrier before we return. - * See R_W case in the comment above update_fast_ctr(). + * The latter synchronises us with the preceding readers which used + * the fast-past, so we can not miss the result of __this_cpu_add() + * or anything else inside their criticial sections. */ - synchronize_sched_expedited(); + rcu_sync_enter(&brw->rss); /* exclude other writers, and block the new readers completely */ down_write(&brw->rw_sem); @@ -163,16 +142,17 @@ void percpu_down_write(struct percpu_rw_semaphore *brw) /* wait for all readers to complete their percpu_up_read() */ wait_event(brw->write_waitq, !atomic_read(&brw->slow_read_ctr)); } +EXPORT_SYMBOL_GPL(percpu_down_write); void percpu_up_write(struct percpu_rw_semaphore *brw) { /* release the lock, but the readers can't use the fast-path */ up_write(&brw->rw_sem); /* - * Insert the barrier before the next fast-path in down_read, - * see W_R case in the comment above update_fast_ctr(). + * Enable the fast-path in percpu_down_read() and percpu_up_read() + * but only after another gp pass; this adds the necessary barrier + * to ensure the reader can't miss the changes done by us. */ - synchronize_sched_expedited(); - /* the last writer unblocks update_fast_ctr() */ - atomic_dec(&brw->write_ctr); + rcu_sync_exit(&brw->rss); } +EXPORT_SYMBOL_GPL(percpu_up_write); |