parisc: Rewrite light-weight syscall and futex code

The parisc architecture lacks general hardware support for compare and swap. Particularly for userspace, it is difficult to implement software atomic support. Page faults in critical regions can cause processes to sleep and block the forward progress of other processes. Thus, it is essential that page faults be disabled in critical regions. For performance reasons, we also need to disable external interrupts in critical regions. In order to do this, we need a mechanism to trigger COW breaks outside the critical region. Fortunately, parisc has the "stbys,e" instruction. When the leftmost byte of a word is addressed, this instruction triggers all the exceptions of a normal store but it does not write to memory. Thus, we can use it to trigger COW breaks outside the critical region without modifying the data that is to be updated atomically. COW breaks occur randomly. So even if we have priviously executed a "stbys,e" instruction, we still need to disable pagefaults around the critical region. If a fault occurs in the critical region, we return -EAGAIN. I had to add a wrapper around _arch_futex_atomic_op_inuser() as I found in testing that returning -EAGAIN caused problems for some processes even though it is listed as a possible return value. The patch implements the above. The code no longer attempts to sleep with interrupts disabled and I haven't seen any stalls with the change. I have attempted to merge common code and streamline the fast path. In the futex code, we only compute the spinlock address once. I eliminated some debug code in the original CAS routine that just made the flow more complicated. I don't clip the arguments when called from wide mode. As a result, the LWS routines should work when called from 64-bit processes. I defined TASK_PAGEFAULT_DISABLED offset for use in the lws_pagefault_disable and lws_pagefault_enable macros. Since we now disable interrupts on the gateway page where necessary, it might be possible to allow processes to be scheduled when they are on the gateway page. Change has been tested on c8000 and rp3440. It improves glibc build and test time by about 10%. In v2, I removed the lws_atomic_xchg and and lws_atomic_store calls. I also removed the bug fixes that were not directly related to this patch. In v3, I removed the code to force interruptions from arch_futex_atomic_op_inuser(). It is always called with page faults disabled, so this code had no effect. In v4, I fixed a typo in depi_safe line. In v5, I moved the code to disable/enable page faults inside the spinlocks. Signed-off-by: John David Anglin <dave.anglin@bell.net> Signed-off-by: Helge Deller <deller@gmx.de>
author: John David Anglin <dave.anglin@bell.net> 2022-01-05 00:44:32 +0300
committer: Helge Deller <deller@gmx.de> 2022-01-07 03:29:21 +0300
commit: d0585d742ff2d82accd26c661c60a6d260429c4a (patch)
tree: 90f590646e9483128d54301315c21f7f7046d291 /arch/parisc/include
parent: 20dda87bdc6567e864942ead40bc149ebbe3ae79 (diff)
download: linux-d0585d742ff2d82accd26c661c60a6d260429c4a.tar.xz
1 files changed, 37 insertions, 22 deletions
diff --git a/arch/parisc/include/asm/futex.h b/arch/parisc/include/asm/futex.h
index 9cd4dd6e63ad..b5835325d44b 100644
--- a/arch/parisc/include/asm/futex.h
+++ b/arch/parisc/include/asm/futex.h
@@ -8,39 +8,47 @@
 #include <asm/errno.h>
 
 /* The following has to match the LWS code in syscall.S.  We have
-   sixteen four-word locks. */
+ * 256 four-word locks. We use bits 20-27 of the futex virtual
+ * address for the hash index.
+ */
+
+static inline unsigned long _futex_hash_index(unsigned long ua)
+{
+	return (ua >> 2) & 0x3fc;
+}
 
 static inline void
-_futex_spin_lock(u32 __user *uaddr)
+_futex_spin_lock_irqsave(arch_spinlock_t *s, unsigned long *flags)
 {
-	extern u32 lws_lock_start[];
-	long index = ((long)uaddr & 0x7f8) >> 1;
-	arch_spinlock_t *s = (arch_spinlock_t *)&lws_lock_start[index];
-	preempt_disable();
+	local_irq_save(*flags);
 	arch_spin_lock(s);
 }
 
 static inline void
-_futex_spin_unlock(u32 __user *uaddr)
+_futex_spin_unlock_irqrestore(arch_spinlock_t *s, unsigned long *flags)
 {
-	extern u32 lws_lock_start[];
-	long index = ((long)uaddr & 0x7f8) >> 1;
-	arch_spinlock_t *s = (arch_spinlock_t *)&lws_lock_start[index];
 	arch_spin_unlock(s);
-	preempt_enable();
+	local_irq_restore(*flags);
 }
 
 static inline int
 arch_futex_atomic_op_inuser(int op, int oparg, int *oval, u32 __user *uaddr)
 {
+	extern u32 lws_lock_start[];
+	unsigned long ua = (unsigned long)uaddr;
+	arch_spinlock_t *s;
+	unsigned long flags;
 	int oldval, ret;
 	u32 tmp;
 
-	ret = -EFAULT;
+	s = (arch_spinlock_t *)&lws_lock_start[_futex_hash_index(ua)];
+	_futex_spin_lock_irqsave(s, &flags);
 
-	_futex_spin_lock(uaddr);
-	if (unlikely(get_user(oldval, uaddr) != 0))
+	/* Return -EFAULT if we encounter a page fault or COW break */
+	if (unlikely(get_user(oldval, uaddr) != 0)) {
+		ret = -EFAULT;
 		goto out_pagefault_enable;
+	}
 
 	ret = 0;
 	tmp = oldval;
@@ -63,13 +71,14 @@ arch_futex_atomic_op_inuser(int op, int oparg, int *oval, u32 __user *uaddr)
 		break;
 	default:
 		ret = -ENOSYS;
+		goto out_pagefault_enable;
 	}
 
-	if (ret == 0 && unlikely(put_user(tmp, uaddr) != 0))
+	if (unlikely(put_user(tmp, uaddr) != 0))
 		ret = -EFAULT;
 
 out_pagefault_enable:
-	_futex_spin_unlock(uaddr);
+	_futex_spin_unlock_irqrestore(s, &flags);
 
 	if (!ret)
 		*oval = oldval;
@@ -81,7 +90,11 @@ static inline int
 futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
 			      u32 oldval, u32 newval)
 {
+	extern u32 lws_lock_start[];
+	unsigned long ua = (unsigned long)uaddr;
+	arch_spinlock_t *s;
 	u32 val;
+	unsigned long flags;
 
 	/* futex.c wants to do a cmpxchg_inatomic on kernel NULL, which is
 	 * our gateway page, and causes no end of trouble...
@@ -94,23 +107,25 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
 
 	/* HPPA has no cmpxchg in hardware and therefore the
 	 * best we can do here is use an array of locks. The
-	 * lock selected is based on a hash of the userspace
-	 * address. This should scale to a couple of CPUs.
+	 * lock selected is based on a hash of the virtual
+	 * address of the futex. This should scale to a couple
+	 * of CPUs.
 	 */
 
-	_futex_spin_lock(uaddr);
+	s = (arch_spinlock_t *)&lws_lock_start[_futex_hash_index(ua)];
+	_futex_spin_lock_irqsave(s, &flags);
 	if (unlikely(get_user(val, uaddr) != 0)) {
-		_futex_spin_unlock(uaddr);
+		_futex_spin_unlock_irqrestore(s, &flags);
 		return -EFAULT;
 	}
 
 	if (val == oldval && unlikely(put_user(newval, uaddr) != 0)) {
-		_futex_spin_unlock(uaddr);
+		_futex_spin_unlock_irqrestore(s, &flags);
 		return -EFAULT;
 	}
 
 	*uval = val;
-	_futex_spin_unlock(uaddr);
+	_futex_spin_unlock_irqrestore(s, &flags);
 
 	return 0;
 }
author	John David Anglin <dave.anglin@bell.net>	2022-01-05 00:44:32 +0300
committer	Helge Deller <deller@gmx.de>	2022-01-07 03:29:21 +0300
commit	d0585d742ff2d82accd26c661c60a6d260429c4a (patch)
tree	90f590646e9483128d54301315c21f7f7046d291 /arch/parisc/include
parent	20dda87bdc6567e864942ead40bc149ebbe3ae79 (diff)
download	linux-d0585d742ff2d82accd26c661c60a6d260429c4a.tar.xz