locking/percpu-rwsem: Optimize readers and reduce global impact

Currently the percpu-rwsem switches to (global) atomic ops while a writer is waiting; which could be quite a while and slows down releasing the readers. This patch cures this problem by ordering the reader-state vs reader-count (see the comments in __percpu_down_read() and percpu_down_write()). This changes a global atomic op into a full memory barrier, which doesn't have the global cacheline contention. This also enables using the percpu-rwsem with rcu_sync disabled in order to bias the implementation differently, reducing the writer latency by adding some cost to readers. Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Reviewed-by: Oleg Nesterov <oleg@redhat.com> Cc: Andrew Morton <akpm@linux-foundation.org> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com> Cc: Paul McKenney <paulmck@linux.vnet.ibm.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: linux-kernel@vger.kernel.org [ Fixed modular build. ] Signed-off-by: Ingo Molnar <mingo@kernel.org>
author: Peter Zijlstra <peterz@infradead.org> 2016-07-14 21:08:46 +0300
committer: Ingo Molnar <mingo@kernel.org> 2016-08-10 15:34:01 +0300
commit: 80127a39681bd68c959f0953f84a830cbd7c3b1c (patch)
tree: 223bcc2a5cbec5c0873f8fae85a98797f94e6c56 /include/linux/percpu-rwsem.h
parent: 08be8f63c40c030b5cf95b4368e314e563a86301 (diff)
download: linux-80127a39681bd68c959f0953f84a830cbd7c3b1c.tar.xz
1 files changed, 75 insertions, 9 deletions
diff --git a/include/linux/percpu-rwsem.h b/include/linux/percpu-rwsem.h
index c2fa3ecb0dce..146efefde2a1 100644
--- a/include/linux/percpu-rwsem.h
+++ b/include/linux/percpu-rwsem.h
@@ -10,30 +10,96 @@
 
 struct percpu_rw_semaphore {
 	struct rcu_sync		rss;
-	unsigned int __percpu	*fast_read_ctr;
+	unsigned int __percpu	*read_count;
 	struct rw_semaphore	rw_sem;
-	atomic_t		slow_read_ctr;
-	wait_queue_head_t	write_waitq;
+	wait_queue_head_t	writer;
+	int			readers_block;
 };
 
-extern void percpu_down_read(struct percpu_rw_semaphore *);
-extern int  percpu_down_read_trylock(struct percpu_rw_semaphore *);
-extern void percpu_up_read(struct percpu_rw_semaphore *);
+extern int __percpu_down_read(struct percpu_rw_semaphore *, int);
+extern void __percpu_up_read(struct percpu_rw_semaphore *);
+
+static inline void percpu_down_read(struct percpu_rw_semaphore *sem)
+{
+	might_sleep();
+
+	rwsem_acquire_read(&sem->rw_sem.dep_map, 0, 0, _RET_IP_);
+
+	preempt_disable();
+	/*
+	 * We are in an RCU-sched read-side critical section, so the writer
+	 * cannot both change sem->state from readers_fast and start checking
+	 * counters while we are here. So if we see !sem->state, we know that
+	 * the writer won't be checking until we're past the preempt_enable()
+	 * and that one the synchronize_sched() is done, the writer will see
+	 * anything we did within this RCU-sched read-size critical section.
+	 */
+	__this_cpu_inc(*sem->read_count);
+	if (unlikely(!rcu_sync_is_idle(&sem->rss)))
+		__percpu_down_read(sem, false); /* Unconditional memory barrier */
+	preempt_enable();
+	/*
+	 * The barrier() from preempt_enable() prevents the compiler from
+	 * bleeding the critical section out.
+	 */
+}
+
+static inline int percpu_down_read_trylock(struct percpu_rw_semaphore *sem)
+{
+	int ret = 1;
+
+	preempt_disable();
+	/*
+	 * Same as in percpu_down_read().
+	 */
+	__this_cpu_inc(*sem->read_count);
+	if (unlikely(!rcu_sync_is_idle(&sem->rss)))
+		ret = __percpu_down_read(sem, true); /* Unconditional memory barrier */
+	preempt_enable();
+	/*
+	 * The barrier() from preempt_enable() prevents the compiler from
+	 * bleeding the critical section out.
+	 */
+
+	if (ret)
+		rwsem_acquire_read(&sem->rw_sem.dep_map, 0, 1, _RET_IP_);
+
+	return ret;
+}
+
+static inline void percpu_up_read(struct percpu_rw_semaphore *sem)
+{
+	/*
+	 * The barrier() in preempt_disable() prevents the compiler from
+	 * bleeding the critical section out.
+	 */
+	preempt_disable();
+	/*
+	 * Same as in percpu_down_read().
+	 */
+	if (likely(rcu_sync_is_idle(&sem->rss)))
+		__this_cpu_dec(*sem->read_count);
+	else
+		__percpu_up_read(sem); /* Unconditional memory barrier */
+	preempt_enable();
+
+	rwsem_release(&sem->rw_sem.dep_map, 1, _RET_IP_);
+}
 
 extern void percpu_down_write(struct percpu_rw_semaphore *);
 extern void percpu_up_write(struct percpu_rw_semaphore *);
 
 extern int __percpu_init_rwsem(struct percpu_rw_semaphore *,
 				const char *, struct lock_class_key *);
+
 extern void percpu_free_rwsem(struct percpu_rw_semaphore *);
 
-#define percpu_init_rwsem(brw)	\
+#define percpu_init_rwsem(sem)					\
 ({								\
 	static struct lock_class_key rwsem_key;			\
-	__percpu_init_rwsem(brw, #brw, &rwsem_key);		\
+	__percpu_init_rwsem(sem, #sem, &rwsem_key);		\
 })
 
-
 #define percpu_rwsem_is_held(sem) lockdep_is_held(&(sem)->rw_sem)
 
 static inline void percpu_rwsem_release(struct percpu_rw_semaphore *sem,
author	Peter Zijlstra <peterz@infradead.org>	2016-07-14 21:08:46 +0300
committer	Ingo Molnar <mingo@kernel.org>	2016-08-10 15:34:01 +0300
commit	80127a39681bd68c959f0953f84a830cbd7c3b1c (patch)
tree	223bcc2a5cbec5c0873f8fae85a98797f94e6c56 /include/linux/percpu-rwsem.h
parent	08be8f63c40c030b5cf95b4368e314e563a86301 (diff)
download	linux-80127a39681bd68c959f0953f84a830cbd7c3b1c.tar.xz