6 files changed, 313 insertions, 199 deletions
diff --git a/arch/s390/lib/delay.c b/arch/s390/lib/delay.c
index be14c58cb989..c1ea14e3c927 100644
--- a/arch/s390/lib/delay.c
+++ b/arch/s390/lib/delay.c
@@ -7,6 +7,7 @@
  */
 
 #include <linux/processor.h>
+#include <linux/export.h>
 #include <linux/delay.h>
 #include <asm/div64.h>
 #include <asm/timex.h>
diff --git a/arch/s390/lib/mem.S b/arch/s390/lib/mem.S
index 08f60a42b9a6..d026debf250c 100644
--- a/arch/s390/lib/mem.S
+++ b/arch/s390/lib/mem.S
@@ -34,8 +34,7 @@ SYM_FUNC_START(__memmove)
 	la	%r3,256(%r3)
 	brctg	%r0,.Lmemmove_forward_loop
 .Lmemmove_forward_remainder:
-	larl	%r5,.Lmemmove_mvc
-	ex	%r4,0(%r5)
+	exrl	%r4,.Lmemmove_mvc
 .Lmemmove_exit:
 	BR_EX	%r14
 .Lmemmove_reverse:
@@ -83,8 +82,7 @@ SYM_FUNC_START(__memset)
 	la	%r1,256(%r1)
 	brctg	%r3,.Lmemset_clear_loop
 .Lmemset_clear_remainder:
-	larl	%r3,.Lmemset_xc
-	ex	%r4,0(%r3)
+	exrl	%r4,.Lmemset_xc
 .Lmemset_exit:
 	BR_EX	%r14
 .Lmemset_fill:
@@ -102,8 +100,7 @@ SYM_FUNC_START(__memset)
 	brctg	%r5,.Lmemset_fill_loop
 .Lmemset_fill_remainder:
 	stc	%r3,0(%r1)
-	larl	%r5,.Lmemset_mvc
-	ex	%r4,0(%r5)
+	exrl	%r4,.Lmemset_mvc
 	BR_EX	%r14
 .Lmemset_fill_exit:
 	stc	%r3,0(%r1)
@@ -132,8 +129,7 @@ SYM_FUNC_START(__memcpy)
 	lgr	%r1,%r2
 	jnz	.Lmemcpy_loop
 .Lmemcpy_remainder:
-	larl	%r5,.Lmemcpy_mvc
-	ex	%r4,0(%r5)
+	exrl	%r4,.Lmemcpy_mvc
 .Lmemcpy_exit:
 	BR_EX	%r14
 .Lmemcpy_loop:
@@ -175,8 +171,7 @@ SYM_FUNC_START(__memset\bits)
 	brctg	%r5,.L__memset_loop\bits
 .L__memset_remainder\bits:
 	\insn	%r3,0(%r1)
-	larl	%r5,.L__memset_mvc\bits
-	ex	%r4,0(%r5)
+	exrl	%r4,.L__memset_mvc\bits
 	BR_EX	%r14
 .L__memset_store\bits:
 	\insn	%r3,0(%r2)
diff --git a/arch/s390/lib/spinlock.c b/arch/s390/lib/spinlock.c
index a81a01c44927..ad9da4038511 100644
--- a/arch/s390/lib/spinlock.c
+++ b/arch/s390/lib/spinlock.c
@@ -10,11 +10,13 @@
 #include <linux/export.h>
 #include <linux/spinlock.h>
 #include <linux/jiffies.h>
+#include <linux/sysctl.h>
 #include <linux/init.h>
 #include <linux/smp.h>
 #include <linux/percpu.h>
 #include <linux/io.h>
 #include <asm/alternative.h>
+#include <asm/machine.h>
 #include <asm/asm.h>
 
 int spin_retry = -1;
@@ -37,6 +39,23 @@ static int __init spin_retry_setup(char *str)
 }
 __setup("spin_retry=", spin_retry_setup);
 
+static const struct ctl_table s390_spin_sysctl_table[] = {
+	{
+		.procname	= "spin_retry",
+		.data		= &spin_retry,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	},
+};
+
+static int __init init_s390_spin_sysctls(void)
+{
+	register_sysctl_init("kernel", s390_spin_sysctl_table);
+	return 0;
+}
+arch_initcall(init_s390_spin_sysctls);
+
 struct spin_wait {
 	struct spin_wait *next, *prev;
 	int node_id;
@@ -141,7 +160,7 @@ static inline void arch_spin_lock_queued(arch_spinlock_t *lp)
 
 	ix = get_lowcore()->spinlock_index++;
 	barrier();
-	lockval = SPINLOCK_LOCKVAL;	/* cpu + 1 */
+	lockval = spinlock_lockval();	/* cpu + 1 */
 	node = this_cpu_ptr(&spin_wait[ix]);
 	node->prev = node->next = NULL;
 	node_id = node->node_id;
@@ -212,7 +231,7 @@ static inline void arch_spin_lock_queued(arch_spinlock_t *lp)
 		if (count-- >= 0)
 			continue;
 		count = spin_retry;
-		if (!MACHINE_IS_LPAR || arch_vcpu_is_preempted(owner - 1))
+		if (!machine_is_lpar() || arch_vcpu_is_preempted(owner - 1))
 			smp_yield_cpu(owner - 1);
 	}
 
@@ -232,7 +251,7 @@ static inline void arch_spin_lock_classic(arch_spinlock_t *lp)
 {
 	int lockval, old, new, owner, count;
 
-	lockval = SPINLOCK_LOCKVAL;	/* cpu + 1 */
+	lockval = spinlock_lockval();	/* cpu + 1 */
 
 	/* Pass the virtual CPU to the lock holder if it is not running */
 	owner = arch_spin_yield_target(READ_ONCE(lp->lock), NULL);
@@ -255,7 +274,7 @@ static inline void arch_spin_lock_classic(arch_spinlock_t *lp)
 		if (count-- >= 0)
 			continue;
 		count = spin_retry;
-		if (!MACHINE_IS_LPAR || arch_vcpu_is_preempted(owner - 1))
+		if (!machine_is_lpar() || arch_vcpu_is_preempted(owner - 1))
 			smp_yield_cpu(owner - 1);
 	}
 }
@@ -271,7 +290,7 @@ EXPORT_SYMBOL(arch_spin_lock_wait);
 
 int arch_spin_trylock_retry(arch_spinlock_t *lp)
 {
-	int cpu = SPINLOCK_LOCKVAL;
+	int cpu = spinlock_lockval();
 	int owner, count;
 
 	for (count = spin_retry; count > 0; count--) {
@@ -337,7 +356,7 @@ void arch_spin_relax(arch_spinlock_t *lp)
 	cpu = READ_ONCE(lp->lock) & _Q_LOCK_CPU_MASK;
 	if (!cpu)
 		return;
-	if (MACHINE_IS_LPAR && !arch_vcpu_is_preempted(cpu - 1))
+	if (machine_is_lpar() && !arch_vcpu_is_preempted(cpu - 1))
 		return;
 	smp_yield_cpu(cpu - 1);
 }
diff --git a/arch/s390/lib/string.c b/arch/s390/lib/string.c
index 373fa1f01937..099de76e8b1a 100644
--- a/arch/s390/lib/string.c
+++ b/arch/s390/lib/string.c
@@ -78,50 +78,6 @@ EXPORT_SYMBOL(strnlen);
 #endif
 
 /**
- * strcpy - Copy a %NUL terminated string
- * @dest: Where to copy the string to
- * @src: Where to copy the string from
- *
- * returns a pointer to @dest
- */
-#ifdef __HAVE_ARCH_STRCPY
-char *strcpy(char *dest, const char *src)
-{
-	char *ret = dest;
-
-	asm volatile(
-		"	lghi	0,0\n"
-		"0:	mvst	%[dest],%[src]\n"
-		"	jo	0b\n"
-		: [dest] "+&a" (dest), [src] "+&a" (src)
-		:
-		: "cc", "memory", "0");
-	return ret;
-}
-EXPORT_SYMBOL(strcpy);
-#endif
-
-/**
- * strncpy - Copy a length-limited, %NUL-terminated string
- * @dest: Where to copy the string to
- * @src: Where to copy the string from
- * @n: The maximum number of bytes to copy
- *
- * The result is not %NUL-terminated if the source exceeds
- * @n bytes.
- */
-#ifdef __HAVE_ARCH_STRNCPY
-char *strncpy(char *dest, const char *src, size_t n)
-{
-	size_t len = __strnend(src, n) - src;
-	memset(dest + len, 0, n - len);
-	memcpy(dest, src, len);
-	return dest;
-}
-EXPORT_SYMBOL(strncpy);
-#endif
-
-/**
  * strcat - Append one %NUL-terminated string to another
  * @dest: The string to be appended to
  * @src: The string to append to it
@@ -181,9 +137,6 @@ EXPORT_SYMBOL(strlcat);
  * @n: The maximum numbers of bytes to copy
  *
  * returns a pointer to @dest
- *
- * Note that in contrast to strncpy, strncat ensures the result is
- * terminated.
  */
 #ifdef __HAVE_ARCH_STRNCAT
 char *strncat(char *dest, const char *src, size_t n)
diff --git a/arch/s390/lib/uaccess.c b/arch/s390/lib/uaccess.c
index c7c269d5c491..1a6ba105e071 100644
--- a/arch/s390/lib/uaccess.c
+++ b/arch/s390/lib/uaccess.c
@@ -8,73 +8,85 @@
  *		 Gerald Schaefer (gerald.schaefer@de.ibm.com)
  */
 
+#include <linux/kprobes.h>
 #include <linux/uaccess.h>
 #include <linux/export.h>
 #include <linux/mm.h>
 #include <asm/asm-extable.h>
 #include <asm/ctlreg.h>
+#include <asm/skey.h>
 
 #ifdef CONFIG_DEBUG_ENTRY
 void debug_user_asce(int exit)
 {
+	struct lowcore *lc = get_lowcore();
 	struct ctlreg cr1, cr7;
 
 	local_ctl_store(1, &cr1);
 	local_ctl_store(7, &cr7);
-	if (cr1.val == get_lowcore()->kernel_asce.val && cr7.val == get_lowcore()->user_asce.val)
+	if (cr1.val == lc->user_asce.val && cr7.val == lc->user_asce.val)
 		return;
 	panic("incorrect ASCE on kernel %s\n"
 	      "cr1:    %016lx cr7:  %016lx\n"
 	      "kernel: %016lx user: %016lx\n",
 	      exit ? "exit" : "entry", cr1.val, cr7.val,
-	      get_lowcore()->kernel_asce.val, get_lowcore()->user_asce.val);
+	      lc->kernel_asce.val, lc->user_asce.val);
 }
 #endif /*CONFIG_DEBUG_ENTRY */
 
-static unsigned long raw_copy_from_user_key(void *to, const void __user *from,
-					    unsigned long size, unsigned long key)
+union oac {
+	unsigned int val;
+	struct {
+		struct {
+			unsigned short key : 4;
+			unsigned short	   : 4;
+			unsigned short as  : 2;
+			unsigned short	   : 4;
+			unsigned short k   : 1;
+			unsigned short a   : 1;
+		} oac1;
+		struct {
+			unsigned short key : 4;
+			unsigned short	   : 4;
+			unsigned short as  : 2;
+			unsigned short	   : 4;
+			unsigned short k   : 1;
+			unsigned short a   : 1;
+		} oac2;
+	};
+};
+
+static uaccess_kmsan_or_inline __must_check unsigned long
+raw_copy_from_user_key(void *to, const void __user *from, unsigned long size, unsigned long key)
 {
-	unsigned long rem;
+	unsigned long osize;
 	union oac spec = {
 		.oac2.key = key,
 		.oac2.as = PSW_BITS_AS_SECONDARY,
 		.oac2.k = 1,
 		.oac2.a = 1,
 	};
+	int cc;
 
-	asm volatile(
-		"	lr	0,%[spec]\n"
-		"0:	mvcos	0(%[to]),0(%[from]),%[size]\n"
-		"1:	jz	5f\n"
-		"	algr	%[size],%[val]\n"
-		"	slgr	%[from],%[val]\n"
-		"	slgr	%[to],%[val]\n"
-		"	j	0b\n"
-		"2:	la	%[rem],4095(%[from])\n"	/* rem = from + 4095 */
-		"	nr	%[rem],%[val]\n"	/* rem = (from + 4095) & -4096 */
-		"	slgr	%[rem],%[from]\n"
-		"	clgr	%[size],%[rem]\n"	/* copy crosses next page boundary? */
-		"	jnh	6f\n"
-		"3:	mvcos	0(%[to]),0(%[from]),%[rem]\n"
-		"4:	slgr	%[size],%[rem]\n"
-		"	j	6f\n"
-		"5:	slgr	%[size],%[size]\n"
-		"6:\n"
-		EX_TABLE(0b, 2b)
-		EX_TABLE(1b, 2b)
-		EX_TABLE(3b, 6b)
-		EX_TABLE(4b, 6b)
-		: [size] "+&a" (size), [from] "+&a" (from), [to] "+&a" (to), [rem] "=&a" (rem)
-		: [val] "a" (-4096UL), [spec] "d" (spec.val)
-		: "cc", "memory", "0");
-	return size;
-}
-
-unsigned long raw_copy_from_user(void *to, const void __user *from, unsigned long n)
-{
-	return raw_copy_from_user_key(to, from, n, 0);
+	while (1) {
+		osize = size;
+		asm_inline volatile(
+			"	lr	%%r0,%[spec]\n"
+			"0:	mvcos	%[to],%[from],%[size]\n"
+			"1:	nopr	%%r7\n"
+			CC_IPM(cc)
+			EX_TABLE_UA_MVCOS_FROM(0b, 0b)
+			EX_TABLE_UA_MVCOS_FROM(1b, 0b)
+			: CC_OUT(cc, cc), [size] "+d" (size), [to] "=Q" (*(char *)to)
+			: [spec] "d" (spec.val), [from] "Q" (*(const char __user *)from)
+			: CC_CLOBBER_LIST("memory", "0"));
+		if (CC_TRANSFORM(cc) == 0)
+			return osize - size;
+		size -= 4096;
+		to += 4096;
+		from += 4096;
+	}
 }
-EXPORT_SYMBOL(raw_copy_from_user);
 
 unsigned long _copy_from_user_key(void *to, const void __user *from,
 				  unsigned long n, unsigned long key)
@@ -93,50 +105,37 @@ unsigned long _copy_from_user_key(void *to, const void __user *from,
 }
 EXPORT_SYMBOL(_copy_from_user_key);
 
-static unsigned long raw_copy_to_user_key(void __user *to, const void *from,
-					  unsigned long size, unsigned long key)
+static uaccess_kmsan_or_inline __must_check unsigned long
+raw_copy_to_user_key(void __user *to, const void *from, unsigned long size, unsigned long key)
 {
-	unsigned long rem;
+	unsigned long osize;
 	union oac spec = {
 		.oac1.key = key,
 		.oac1.as = PSW_BITS_AS_SECONDARY,
 		.oac1.k = 1,
 		.oac1.a = 1,
 	};
+	int cc;
 
-	asm volatile(
-		"	lr	0,%[spec]\n"
-		"0:	mvcos	0(%[to]),0(%[from]),%[size]\n"
-		"1:	jz	5f\n"
-		"	algr	%[size],%[val]\n"
-		"	slgr	%[to],%[val]\n"
-		"	slgr	%[from],%[val]\n"
-		"	j	0b\n"
-		"2:	la	%[rem],4095(%[to])\n"	/* rem = to + 4095 */
-		"	nr	%[rem],%[val]\n"	/* rem = (to + 4095) & -4096 */
-		"	slgr	%[rem],%[to]\n"
-		"	clgr	%[size],%[rem]\n"	/* copy crosses next page boundary? */
-		"	jnh	6f\n"
-		"3:	mvcos	0(%[to]),0(%[from]),%[rem]\n"
-		"4:	slgr	%[size],%[rem]\n"
-		"	j	6f\n"
-		"5:	slgr	%[size],%[size]\n"
-		"6:\n"
-		EX_TABLE(0b, 2b)
-		EX_TABLE(1b, 2b)
-		EX_TABLE(3b, 6b)
-		EX_TABLE(4b, 6b)
-		: [size] "+&a" (size), [to] "+&a" (to), [from] "+&a" (from), [rem] "=&a" (rem)
-		: [val] "a" (-4096UL), [spec] "d" (spec.val)
-		: "cc", "memory", "0");
-	return size;
-}
-
-unsigned long raw_copy_to_user(void __user *to, const void *from, unsigned long n)
-{
-	return raw_copy_to_user_key(to, from, n, 0);
+	while (1) {
+		osize = size;
+		asm_inline volatile(
+			"	lr	%%r0,%[spec]\n"
+			"0:	mvcos	%[to],%[from],%[size]\n"
+			"1:	nopr	%%r7\n"
+			CC_IPM(cc)
+			EX_TABLE_UA_MVCOS_TO(0b, 0b)
+			EX_TABLE_UA_MVCOS_TO(1b, 0b)
+			: CC_OUT(cc, cc), [size] "+d" (size), [to] "=Q" (*(char __user *)to)
+			: [spec] "d" (spec.val), [from] "Q" (*(const char *)from)
+			: CC_CLOBBER_LIST("memory", "0"));
+		if (CC_TRANSFORM(cc) == 0)
+			return osize - size;
+		size -= 4096;
+		to += 4096;
+		from += 4096;
+	}
 }
-EXPORT_SYMBOL(raw_copy_to_user);
 
 unsigned long _copy_to_user_key(void __user *to, const void *from,
 				unsigned long n, unsigned long key)
@@ -149,38 +148,188 @@ unsigned long _copy_to_user_key(void __user *to, const void *from,
 }
 EXPORT_SYMBOL(_copy_to_user_key);
 
-unsigned long __clear_user(void __user *to, unsigned long size)
+#define CMPXCHG_USER_KEY_MAX_LOOPS 128
+
+static nokprobe_inline int __cmpxchg_user_key_small(unsigned long address, unsigned int *uval,
+						    unsigned int old, unsigned int new,
+						    unsigned int mask, unsigned long key)
 {
-	unsigned long rem;
-	union oac spec = {
-		.oac1.as = PSW_BITS_AS_SECONDARY,
-		.oac1.a = 1,
-	};
+	unsigned long count;
+	unsigned int prev;
+	bool sacf_flag;
+	int rc = 0;
+
+	skey_regions_initialize();
+	sacf_flag = enable_sacf_uaccess();
+	asm_inline volatile(
+		"20:	spka	0(%[key])\n"
+		"	sacf	256\n"
+		"	llill	%[count],%[max_loops]\n"
+		"0:	l	%[prev],%[address]\n"
+		"1:	nr	%[prev],%[mask]\n"
+		"	xilf	%[mask],0xffffffff\n"
+		"	or	%[new],%[prev]\n"
+		"	or	%[prev],%[tmp]\n"
+		"2:	lr	%[tmp],%[prev]\n"
+		"3:	cs	%[prev],%[new],%[address]\n"
+		"4:	jnl	5f\n"
+		"	xr	%[tmp],%[prev]\n"
+		"	xr	%[new],%[tmp]\n"
+		"	nr	%[tmp],%[mask]\n"
+		"	jnz	5f\n"
+		"	brct	%[count],2b\n"
+		"5:	sacf	768\n"
+		"	spka	%[default_key]\n"
+		"21:\n"
+		EX_TABLE_UA_LOAD_REG(0b, 5b, %[rc], %[prev])
+		EX_TABLE_UA_LOAD_REG(1b, 5b, %[rc], %[prev])
+		EX_TABLE_UA_LOAD_REG(3b, 5b, %[rc], %[prev])
+		EX_TABLE_UA_LOAD_REG(4b, 5b, %[rc], %[prev])
+		SKEY_REGION(20b, 21b)
+		: [rc] "+&d" (rc),
+		[prev] "=&d" (prev),
+		[address] "+Q" (*(int *)address),
+		[tmp] "+&d" (old),
+		[new] "+&d" (new),
+		[mask] "+&d" (mask),
+		[count] "=a" (count)
+		: [key] "%[count]" (key << 4),
+		[default_key] "J" (PAGE_DEFAULT_KEY),
+		[max_loops] "J" (CMPXCHG_USER_KEY_MAX_LOOPS)
+		: "memory", "cc");
+	disable_sacf_uaccess(sacf_flag);
+	*uval = prev;
+	if (!count)
+		rc = -EAGAIN;
+	return rc;
+}
+
+int __kprobes __cmpxchg_user_key1(unsigned long address, unsigned char *uval,
+				  unsigned char old, unsigned char new, unsigned long key)
+{
+	unsigned int prev, shift, mask, _old, _new;
+	int rc;
+
+	shift = (3 ^ (address & 3)) << 3;
+	address ^= address & 3;
+	_old = (unsigned int)old << shift;
+	_new = (unsigned int)new << shift;
+	mask = ~(0xff << shift);
+	rc = __cmpxchg_user_key_small(address, &prev, _old, _new, mask, key);
+	*uval = prev >> shift;
+	return rc;
+}
+EXPORT_SYMBOL(__cmpxchg_user_key1);
+
+int __kprobes __cmpxchg_user_key2(unsigned long address, unsigned short *uval,
+				  unsigned short old, unsigned short new, unsigned long key)
+{
+	unsigned int prev, shift, mask, _old, _new;
+	int rc;
+
+	shift = (2 ^ (address & 2)) << 3;
+	address ^= address & 2;
+	_old = (unsigned int)old << shift;
+	_new = (unsigned int)new << shift;
+	mask = ~(0xffff << shift);
+	rc = __cmpxchg_user_key_small(address, &prev, _old, _new, mask, key);
+	*uval = prev >> shift;
+	return rc;
+}
+EXPORT_SYMBOL(__cmpxchg_user_key2);
+
+int __kprobes __cmpxchg_user_key4(unsigned long address, unsigned int *uval,
+				  unsigned int old, unsigned int new, unsigned long key)
+{
+	unsigned int prev = old;
+	bool sacf_flag;
+	int rc = 0;
+
+	skey_regions_initialize();
+	sacf_flag = enable_sacf_uaccess();
+	asm_inline volatile(
+		"20:	spka	0(%[key])\n"
+		"	sacf	256\n"
+		"0:	cs	%[prev],%[new],%[address]\n"
+		"1:	sacf	768\n"
+		"	spka	%[default_key]\n"
+		"21:\n"
+		EX_TABLE_UA_LOAD_REG(0b, 1b, %[rc], %[prev])
+		EX_TABLE_UA_LOAD_REG(1b, 1b, %[rc], %[prev])
+		SKEY_REGION(20b, 21b)
+		: [rc] "+&d" (rc),
+		[prev] "+&d" (prev),
+		[address] "+Q" (*(int *)address)
+		: [new] "d" (new),
+		[key] "a" (key << 4),
+		[default_key] "J" (PAGE_DEFAULT_KEY)
+		: "memory", "cc");
+	disable_sacf_uaccess(sacf_flag);
+	*uval = prev;
+	return rc;
+}
+EXPORT_SYMBOL(__cmpxchg_user_key4);
+
+int __kprobes __cmpxchg_user_key8(unsigned long address, unsigned long *uval,
+				  unsigned long old, unsigned long new, unsigned long key)
+{
+	unsigned long prev = old;
+	bool sacf_flag;
+	int rc = 0;
+
+	skey_regions_initialize();
+	sacf_flag = enable_sacf_uaccess();
+	asm_inline volatile(
+		"20:	spka	0(%[key])\n"
+		"	sacf	256\n"
+		"0:	csg	%[prev],%[new],%[address]\n"
+		"1:	sacf	768\n"
+		"	spka	%[default_key]\n"
+		"21:\n"
+		EX_TABLE_UA_LOAD_REG(0b, 1b, %[rc], %[prev])
+		EX_TABLE_UA_LOAD_REG(1b, 1b, %[rc], %[prev])
+		SKEY_REGION(20b, 21b)
+		: [rc] "+&d" (rc),
+		[prev] "+&d" (prev),
+		[address] "+QS" (*(long *)address)
+		: [new] "d" (new),
+		[key] "a" (key << 4),
+		[default_key] "J" (PAGE_DEFAULT_KEY)
+		: "memory", "cc");
+	disable_sacf_uaccess(sacf_flag);
+	*uval = prev;
+	return rc;
+}
+EXPORT_SYMBOL(__cmpxchg_user_key8);
+
+int __kprobes __cmpxchg_user_key16(unsigned long address, __uint128_t *uval,
+				   __uint128_t old, __uint128_t new, unsigned long key)
+{
+	__uint128_t prev = old;
+	bool sacf_flag;
+	int rc = 0;
 
-	asm volatile(
-		"	lr	0,%[spec]\n"
-		"0:	mvcos	0(%[to]),0(%[zeropg]),%[size]\n"
-		"1:	jz	5f\n"
-		"	algr	%[size],%[val]\n"
-		"	slgr	%[to],%[val]\n"
-		"	j	0b\n"
-		"2:	la	%[rem],4095(%[to])\n"	/* rem = to + 4095 */
-		"	nr	%[rem],%[val]\n"	/* rem = (to + 4095) & -4096 */
-		"	slgr	%[rem],%[to]\n"
-		"	clgr	%[size],%[rem]\n"	/* copy crosses next page boundary? */
-		"	jnh	6f\n"
-		"3:	mvcos	0(%[to]),0(%[zeropg]),%[rem]\n"
-		"4:	slgr	%[size],%[rem]\n"
-		"	j	6f\n"
-		"5:	slgr	%[size],%[size]\n"
-		"6:\n"
-		EX_TABLE(0b, 2b)
-		EX_TABLE(1b, 2b)
-		EX_TABLE(3b, 6b)
-		EX_TABLE(4b, 6b)
-		: [size] "+&a" (size), [to] "+&a" (to), [rem] "=&a" (rem)
-		: [val] "a" (-4096UL), [zeropg] "a" (empty_zero_page), [spec] "d" (spec.val)
-		: "cc", "memory", "0");
-	return size;
+	skey_regions_initialize();
+	sacf_flag = enable_sacf_uaccess();
+	asm_inline volatile(
+		"20:	spka	0(%[key])\n"
+		"	sacf	256\n"
+		"0:	cdsg	%[prev],%[new],%[address]\n"
+		"1:	sacf	768\n"
+		"	spka	%[default_key]\n"
+		"21:\n"
+		EX_TABLE_UA_LOAD_REGPAIR(0b, 1b, %[rc], %[prev])
+		EX_TABLE_UA_LOAD_REGPAIR(1b, 1b, %[rc], %[prev])
+		SKEY_REGION(20b, 21b)
+		: [rc] "+&d" (rc),
+		[prev] "+&d" (prev),
+		[address] "+QS" (*(__int128_t *)address)
+		: [new] "d" (new),
+		[key] "a" (key << 4),
+		[default_key] "J" (PAGE_DEFAULT_KEY)
+		: "memory", "cc");
+	disable_sacf_uaccess(sacf_flag);
+	*uval = prev;
+	return rc;
 }
-EXPORT_SYMBOL(__clear_user);
+EXPORT_SYMBOL(__cmpxchg_user_key16);
diff --git a/arch/s390/lib/xor.c b/arch/s390/lib/xor.c
index fb924a8041dc..ce7bcf7c0032 100644
--- a/arch/s390/lib/xor.c
+++ b/arch/s390/lib/xor.c
@@ -15,7 +15,6 @@ static void xor_xc_2(unsigned long bytes, unsigned long * __restrict p1,
 		     const unsigned long * __restrict p2)
 {
 	asm volatile(
-		"	larl	1,2f\n"
 		"	aghi	%0,-1\n"
 		"	jm	3f\n"
 		"	srlg	0,%0,8\n"
@@ -25,12 +24,12 @@ static void xor_xc_2(unsigned long bytes, unsigned long * __restrict p1,
 		"	la	%1,256(%1)\n"
 		"	la	%2,256(%2)\n"
 		"	brctg	0,0b\n"
-		"1:	ex	%0,0(1)\n"
+		"1:	exrl	%0,2f\n"
 		"	j	3f\n"
 		"2:	xc	0(1,%1),0(%2)\n"
 		"3:\n"
 		: : "d" (bytes), "a" (p1), "a" (p2)
-		: "0", "1", "cc", "memory");
+		: "0", "cc", "memory");
 }
 
 static void xor_xc_3(unsigned long bytes, unsigned long * __restrict p1,
@@ -38,9 +37,8 @@ static void xor_xc_3(unsigned long bytes, unsigned long * __restrict p1,
 		     const unsigned long * __restrict p3)
 {
 	asm volatile(
-		"	larl	1,2f\n"
 		"	aghi	%0,-1\n"
-		"	jm	3f\n"
+		"	jm	4f\n"
 		"	srlg	0,%0,8\n"
 		"	ltgr	0,0\n"
 		"	jz	1f\n"
@@ -50,14 +48,14 @@ static void xor_xc_3(unsigned long bytes, unsigned long * __restrict p1,
 		"	la	%2,256(%2)\n"
 		"	la	%3,256(%3)\n"
 		"	brctg	0,0b\n"
-		"1:	ex	%0,0(1)\n"
-		"	ex	%0,6(1)\n"
-		"	j	3f\n"
+		"1:	exrl	%0,2f\n"
+		"	exrl	%0,3f\n"
+		"	j	4f\n"
 		"2:	xc	0(1,%1),0(%2)\n"
-		"	xc	0(1,%1),0(%3)\n"
-		"3:\n"
+		"3:	xc	0(1,%1),0(%3)\n"
+		"4:\n"
 		: "+d" (bytes), "+a" (p1), "+a" (p2), "+a" (p3)
-		: : "0", "1", "cc", "memory");
+		: : "0", "cc", "memory");
 }
 
 static void xor_xc_4(unsigned long bytes, unsigned long * __restrict p1,
@@ -66,9 +64,8 @@ static void xor_xc_4(unsigned long bytes, unsigned long * __restrict p1,
 		     const unsigned long * __restrict p4)
 {
 	asm volatile(
-		"	larl	1,2f\n"
 		"	aghi	%0,-1\n"
-		"	jm	3f\n"
+		"	jm	5f\n"
 		"	srlg	0,%0,8\n"
 		"	ltgr	0,0\n"
 		"	jz	1f\n"
@@ -80,16 +77,16 @@ static void xor_xc_4(unsigned long bytes, unsigned long * __restrict p1,
 		"	la	%3,256(%3)\n"
 		"	la	%4,256(%4)\n"
 		"	brctg	0,0b\n"
-		"1:	ex	%0,0(1)\n"
-		"	ex	%0,6(1)\n"
-		"	ex	%0,12(1)\n"
-		"	j	3f\n"
+		"1:	exrl	%0,2f\n"
+		"	exrl	%0,3f\n"
+		"	exrl	%0,4f\n"
+		"	j	5f\n"
 		"2:	xc	0(1,%1),0(%2)\n"
-		"	xc	0(1,%1),0(%3)\n"
-		"	xc	0(1,%1),0(%4)\n"
-		"3:\n"
+		"3:	xc	0(1,%1),0(%3)\n"
+		"4:	xc	0(1,%1),0(%4)\n"
+		"5:\n"
 		: "+d" (bytes), "+a" (p1), "+a" (p2), "+a" (p3), "+a" (p4)
-		: : "0", "1", "cc", "memory");
+		: : "0", "cc", "memory");
 }
 
 static void xor_xc_5(unsigned long bytes, unsigned long * __restrict p1,
@@ -101,7 +98,7 @@ static void xor_xc_5(unsigned long bytes, unsigned long * __restrict p1,
 	asm volatile(
 		"	larl	1,2f\n"
 		"	aghi	%0,-1\n"
-		"	jm	3f\n"
+		"	jm	6f\n"
 		"	srlg	0,%0,8\n"
 		"	ltgr	0,0\n"
 		"	jz	1f\n"
@@ -115,19 +112,19 @@ static void xor_xc_5(unsigned long bytes, unsigned long * __restrict p1,
 		"	la	%4,256(%4)\n"
 		"	la	%5,256(%5)\n"
 		"	brctg	0,0b\n"
-		"1:	ex	%0,0(1)\n"
-		"	ex	%0,6(1)\n"
-		"	ex	%0,12(1)\n"
-		"	ex	%0,18(1)\n"
-		"	j	3f\n"
+		"1:	exrl	%0,2f\n"
+		"	exrl	%0,3f\n"
+		"	exrl	%0,4f\n"
+		"	exrl	%0,5f\n"
+		"	j	6f\n"
 		"2:	xc	0(1,%1),0(%2)\n"
-		"	xc	0(1,%1),0(%3)\n"
-		"	xc	0(1,%1),0(%4)\n"
-		"	xc	0(1,%1),0(%5)\n"
-		"3:\n"
+		"3:	xc	0(1,%1),0(%3)\n"
+		"4:	xc	0(1,%1),0(%4)\n"
+		"5:	xc	0(1,%1),0(%5)\n"
+		"6:\n"
 		: "+d" (bytes), "+a" (p1), "+a" (p2), "+a" (p3), "+a" (p4),
 		  "+a" (p5)
-		: : "0", "1", "cc", "memory");
+		: : "0", "cc", "memory");
 }
 
 struct xor_block_template xor_block_xc = {