149 files changed, 2539 insertions, 2960 deletions
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index 0c16dc443e2f..bf680c26a33c 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -74,8 +74,8 @@ config S390
 	select ARCH_ENABLE_MEMORY_HOTPLUG if SPARSEMEM
 	select ARCH_ENABLE_MEMORY_HOTREMOVE
 	select ARCH_ENABLE_SPLIT_PMD_PTLOCK if PGTABLE_LEVELS > 2
+	select ARCH_ENABLE_THP_MIGRATION if TRANSPARENT_HUGEPAGE
 	select ARCH_HAS_CPU_FINALIZE_INIT
-	select ARCH_HAS_CRC32
 	select ARCH_HAS_CURRENT_STACK_POINTER
 	select ARCH_HAS_DEBUG_VIRTUAL
 	select ARCH_HAS_DEBUG_VM_PGTABLE
@@ -103,6 +103,7 @@ config S390
 	select ARCH_HAS_UBSAN
 	select ARCH_HAS_VDSO_TIME_DATA
 	select ARCH_HAVE_NMI_SAFE_CMPXCHG
+	select ARCH_HAVE_TRACE_MMIO_ACCESS
 	select ARCH_INLINE_READ_LOCK
 	select ARCH_INLINE_READ_LOCK_BH
 	select ARCH_INLINE_READ_LOCK_IRQ
@@ -132,6 +133,7 @@ config S390
 	select ARCH_INLINE_WRITE_UNLOCK_IRQ
 	select ARCH_INLINE_WRITE_UNLOCK_IRQRESTORE
 	select ARCH_MHP_MEMMAP_ON_MEMORY_ENABLE
+	select ARCH_MODULE_NEEDS_WEAK_PER_CPU
 	select ARCH_STACKWALK
 	select ARCH_SUPPORTS_ATOMIC_RMW
 	select ARCH_SUPPORTS_DEBUG_PAGEALLOC
@@ -150,6 +152,7 @@ config S390
 	select ARCH_WANT_KERNEL_PMD_MKWRITE
 	select ARCH_WANT_LD_ORPHAN_WARN
 	select ARCH_WANT_OPTIMIZE_HUGETLB_VMEMMAP
+	select ARCH_WANTS_THP_SWAP
 	select BUILDTIME_TABLE_SORT
 	select CLONE_BACKWARDS2
 	select DCACHE_WORD_ACCESS if !KMSAN
@@ -176,10 +179,10 @@ config S390
 	select HAVE_ARCH_KCSAN
 	select HAVE_ARCH_KMSAN
 	select HAVE_ARCH_KFENCE
+	select HAVE_ARCH_KSTACK_ERASE
 	select HAVE_ARCH_RANDOMIZE_KSTACK_OFFSET
 	select HAVE_ARCH_SECCOMP_FILTER
 	select HAVE_ARCH_SOFT_DIRTY
-	select HAVE_ARCH_STACKLEAK
 	select HAVE_ARCH_TRACEHOOK
 	select HAVE_ARCH_TRANSPARENT_HUGEPAGE
 	select HAVE_ARCH_VMAP_STACK
@@ -199,7 +202,6 @@ config S390
 	select HAVE_GUP_FAST
 	select HAVE_FENTRY
 	select HAVE_FTRACE_GRAPH_FUNC
-	select HAVE_FTRACE_MCOUNT_RECORD
 	select HAVE_FUNCTION_ARG_ACCESS_API
 	select HAVE_FUNCTION_ERROR_INJECTION
 	select HAVE_FUNCTION_GRAPH_FREGS
diff --git a/arch/s390/appldata/appldata_base.c b/arch/s390/appldata/appldata_base.c
index dd7ba7587dd5..ad2b0baa527c 100644
--- a/arch/s390/appldata/appldata_base.c
+++ b/arch/s390/appldata/appldata_base.c
@@ -12,6 +12,7 @@
 #define KMSG_COMPONENT	"appldata"
 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
 
+#include <linux/export.h>
 #include <linux/module.h>
 #include <linux/sched/stat.h>
 #include <linux/init.h>
diff --git a/arch/s390/boot/Makefile b/arch/s390/boot/Makefile
index bee49626be4b..02f2cf082748 100644
--- a/arch/s390/boot/Makefile
+++ b/arch/s390/boot/Makefile
@@ -19,15 +19,15 @@ CC_FLAGS_MARCH_MINIMUM := -march=z10
 
 KBUILD_AFLAGS := $(filter-out $(CC_FLAGS_MARCH),$(KBUILD_AFLAGS_DECOMPRESSOR))
 KBUILD_CFLAGS := $(filter-out $(CC_FLAGS_MARCH),$(KBUILD_CFLAGS_DECOMPRESSOR))
-KBUILD_AFLAGS += $(CC_FLAGS_MARCH_MINIMUM)
-KBUILD_CFLAGS += $(CC_FLAGS_MARCH_MINIMUM)
+KBUILD_AFLAGS += $(CC_FLAGS_MARCH_MINIMUM) -D__DISABLE_EXPORTS
+KBUILD_CFLAGS += $(CC_FLAGS_MARCH_MINIMUM) -D__DISABLE_EXPORTS
 
 CFLAGS_sclp_early_core.o += -I$(srctree)/drivers/s390/char
 
 obj-y	:= head.o als.o startup.o physmem_info.o ipl_parm.o ipl_report.o vmem.o
 obj-y	+= string.o ebcdic.o sclp_early_core.o mem.o ipl_vmparm.o cmdline.o
 obj-y	+= version.o pgm_check.o ctype.o ipl_data.o relocs.o alternative.o
-obj-y	+= uv.o printk.o
+obj-y	+= uv.o printk.o trampoline.o
 obj-$(CONFIG_RANDOMIZE_BASE)	+= kaslr.o
 obj-y	+= $(if $(CONFIG_KERNEL_UNCOMPRESSED),,decompressor.o) info.o
 obj-$(CONFIG_KERNEL_ZSTD) += clz_ctz.o
diff --git a/arch/s390/boot/als.c b/arch/s390/boot/als.c
index 79afb5fa7f1f..25a20986b96e 100644
--- a/arch/s390/boot/als.c
+++ b/arch/s390/boot/als.c
@@ -65,7 +65,7 @@ static void facility_mismatch(void)
 	boot_emerg("The Linux kernel requires more recent processor hardware\n");
 	boot_emerg("Detected machine-type number: %4x\n", id.machine);
 	print_missing_facilities();
-	boot_emerg("See Principles of Operations for facility bits\n");
+	boot_emerg("See z/Architecture Principles of Operation - Facility Indications\n");
 	disabled_wait();
 }
 
diff --git a/arch/s390/boot/boot.h b/arch/s390/boot/boot.h
index e045cae6e80a..c0152db285f0 100644
--- a/arch/s390/boot/boot.h
+++ b/arch/s390/boot/boot.h
@@ -6,7 +6,7 @@
 
 #define IPL_START	0x200
 
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
 
 #include <linux/printk.h>
 #include <asm/physmem_info.h>
@@ -74,6 +74,7 @@ void print_stacktrace(unsigned long sp);
 void error(char *m);
 int get_random(unsigned long limit, unsigned long *value);
 void boot_rb_dump(void);
+void __noreturn jump_to_kernel(psw_t *psw);
 
 #ifndef boot_fmt
 #define boot_fmt(fmt)	fmt
@@ -121,5 +122,5 @@ static inline bool intersects(unsigned long addr0, unsigned long size0,
 {
 	return addr0 + size0 > addr1 && addr1 + size1 > addr0;
 }
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
 #endif /* BOOT_BOOT_H */
diff --git a/arch/s390/boot/ipl_data.c b/arch/s390/boot/ipl_data.c
index 0846e2b249c6..c4130a80b058 100644
--- a/arch/s390/boot/ipl_data.c
+++ b/arch/s390/boot/ipl_data.c
@@ -16,7 +16,9 @@ struct ipl_lowcore {
 	struct ccw0	ccwpgm[2];			/* 0x0008 */
 	u8		fill[56];			/* 0x0018 */
 	struct ccw0	ccwpgmcc[20];			/* 0x0050 */
-	u8		pad_0xf0[0x01a0-0x00f0];	/* 0x00f0 */
+	u8		pad_0xf0[0x0140-0x00f0];	/* 0x00f0 */
+	psw_t		svc_old_psw;			/* 0x0140 */
+	u8		pad_0x150[0x01a0-0x0150];	/* 0x0150 */
 	psw_t		restart_psw;			/* 0x01a0 */
 	psw_t		external_new_psw;		/* 0x01b0 */
 	psw_t		svc_new_psw;			/* 0x01c0 */
@@ -75,6 +77,11 @@ static struct ipl_lowcore ipl_lowcore __used __section(".ipldata") = {
 		[18] = CCW0(CCW_CMD_READ_IPL, 0x690, 0x50, CCW_FLAG_SLI | CCW_FLAG_CC),
 		[19] = CCW0(CCW_CMD_READ_IPL, 0x6e0, 0x50, CCW_FLAG_SLI),
 	},
+	/*
+	 * Let the GDB's lx-symbols command find the jump_to_kernel symbol
+	 * without having to load decompressor symbols.
+	 */
+	.svc_old_psw	  = { .mask = 0, .addr = (unsigned long)jump_to_kernel },
 	.restart_psw	  = { .mask = 0, .addr = IPL_START, },
 	.external_new_psw = { .mask = PSW_MASK_DISABLED, .addr = __LC_EXT_NEW_PSW, },
 	.svc_new_psw	  = { .mask = PSW_MASK_DISABLED, .addr = __LC_SVC_NEW_PSW, },
diff --git a/arch/s390/boot/startup.c b/arch/s390/boot/startup.c
index da8337e63a3e..93684a775716 100644
--- a/arch/s390/boot/startup.c
+++ b/arch/s390/boot/startup.c
@@ -384,7 +384,7 @@ static unsigned long setup_kernel_memory_layout(unsigned long kernel_size)
 		kernel_start = round_down(kernel_end - kernel_size, THREAD_SIZE);
 		boot_debug("Randomization range: 0x%016lx-0x%016lx\n", vmax - kaslr_len, vmax);
 		boot_debug("kernel image:        0x%016lx-0x%016lx (kaslr)\n", kernel_start,
-			   kernel_size + kernel_size);
+			   kernel_start + kernel_size);
 	} else if (vmax < __NO_KASLR_END_KERNEL || vsize > __NO_KASLR_END_KERNEL) {
 		kernel_start = round_down(vmax - kernel_size, THREAD_SIZE);
 		boot_debug("kernel image:        0x%016lx-0x%016lx (constrained)\n", kernel_start,
@@ -642,5 +642,5 @@ void startup_kernel(void)
 	psw.addr = __kaslr_offset + vmlinux.entry;
 	psw.mask = PSW_KERNEL_BITS;
 	boot_debug("Starting kernel at:  0x%016lx\n", psw.addr);
-	__load_psw(psw);
+	jump_to_kernel(&psw);
 }
diff --git a/arch/s390/boot/trampoline.S b/arch/s390/boot/trampoline.S
new file mode 100644
index 000000000000..1cb5adf005ea
--- /dev/null
+++ b/arch/s390/boot/trampoline.S
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#include <linux/linkage.h>
+
+# This function is identical to __load_psw(), but the lx-symbols GDB command
+# puts a breakpoint on it, so it needs to be kept separate.
+SYM_CODE_START(jump_to_kernel)
+	lpswe 0(%r2)
+SYM_CODE_END(jump_to_kernel)
diff --git a/arch/s390/configs/debug_defconfig b/arch/s390/configs/debug_defconfig
index 8ecad727497e..6b33429f1c4d 100644
--- a/arch/s390/configs/debug_defconfig
+++ b/arch/s390/configs/debug_defconfig
@@ -248,7 +248,6 @@ CONFIG_NETFILTER_XT_MATCH_CONNLIMIT=m
 CONFIG_NETFILTER_XT_MATCH_CONNMARK=m
 CONFIG_NETFILTER_XT_MATCH_CONNTRACK=m
 CONFIG_NETFILTER_XT_MATCH_CPU=m
-CONFIG_NETFILTER_XT_MATCH_DCCP=m
 CONFIG_NETFILTER_XT_MATCH_DEVGROUP=m
 CONFIG_NETFILTER_XT_MATCH_DSCP=m
 CONFIG_NETFILTER_XT_MATCH_ESP=m
@@ -804,8 +803,6 @@ CONFIG_CRYPTO_USER_API_HASH=m
 CONFIG_CRYPTO_USER_API_SKCIPHER=m
 CONFIG_CRYPTO_USER_API_RNG=m
 CONFIG_CRYPTO_USER_API_AEAD=m
-CONFIG_CRYPTO_SHA512_S390=m
-CONFIG_CRYPTO_SHA1_S390=m
 CONFIG_CRYPTO_SHA3_256_S390=m
 CONFIG_CRYPTO_SHA3_512_S390=m
 CONFIG_CRYPTO_GHASH_S390=m
@@ -819,6 +816,7 @@ CONFIG_PKEY_EP11=m
 CONFIG_PKEY_PCKMO=m
 CONFIG_PKEY_UV=m
 CONFIG_CRYPTO_PAES_S390=m
+CONFIG_CRYPTO_PHMAC_S390=m
 CONFIG_CRYPTO_DEV_VIRTIO=m
 CONFIG_SYSTEM_BLACKLIST_KEYRING=y
 CONFIG_CRYPTO_KRB5=m
diff --git a/arch/s390/configs/defconfig b/arch/s390/configs/defconfig
index c13a77765162..b75eb2775850 100644
--- a/arch/s390/configs/defconfig
+++ b/arch/s390/configs/defconfig
@@ -239,7 +239,6 @@ CONFIG_NETFILTER_XT_MATCH_CONNLIMIT=m
 CONFIG_NETFILTER_XT_MATCH_CONNMARK=m
 CONFIG_NETFILTER_XT_MATCH_CONNTRACK=m
 CONFIG_NETFILTER_XT_MATCH_CPU=m
-CONFIG_NETFILTER_XT_MATCH_DCCP=m
 CONFIG_NETFILTER_XT_MATCH_DEVGROUP=m
 CONFIG_NETFILTER_XT_MATCH_DSCP=m
 CONFIG_NETFILTER_XT_MATCH_ESP=m
@@ -791,8 +790,6 @@ CONFIG_CRYPTO_USER_API_HASH=m
 CONFIG_CRYPTO_USER_API_SKCIPHER=m
 CONFIG_CRYPTO_USER_API_RNG=m
 CONFIG_CRYPTO_USER_API_AEAD=m
-CONFIG_CRYPTO_SHA512_S390=m
-CONFIG_CRYPTO_SHA1_S390=m
 CONFIG_CRYPTO_SHA3_256_S390=m
 CONFIG_CRYPTO_SHA3_512_S390=m
 CONFIG_CRYPTO_GHASH_S390=m
@@ -806,6 +803,7 @@ CONFIG_PKEY_EP11=m
 CONFIG_PKEY_PCKMO=m
 CONFIG_PKEY_UV=m
 CONFIG_CRYPTO_PAES_S390=m
+CONFIG_CRYPTO_PHMAC_S390=m
 CONFIG_CRYPTO_DEV_VIRTIO=m
 CONFIG_SYSTEM_BLACKLIST_KEYRING=y
 CONFIG_CRYPTO_KRB5=m
diff --git a/arch/s390/crypto/Kconfig b/arch/s390/crypto/Kconfig
index e2c27588b21a..03f73fbd38b6 100644
--- a/arch/s390/crypto/Kconfig
+++ b/arch/s390/crypto/Kconfig
@@ -2,26 +2,6 @@
 
 menu "Accelerated Cryptographic Algorithms for CPU (s390)"
 
-config CRYPTO_SHA512_S390
-	tristate "Hash functions: SHA-384 and SHA-512"
-	select CRYPTO_HASH
-	help
-	  SHA-384 and SHA-512 secure hash algorithms (FIPS 180)
-
-	  Architecture: s390
-
-	  It is available as of z10.
-
-config CRYPTO_SHA1_S390
-	tristate "Hash functions: SHA-1"
-	select CRYPTO_HASH
-	help
-	  SHA-1 secure hash algorithm (FIPS 180)
-
-	  Architecture: s390
-
-	  It is available as of z990.
-
 config CRYPTO_SHA3_256_S390
 	tristate "Hash functions: SHA3-224 and SHA3-256"
 	select CRYPTO_HASH
diff --git a/arch/s390/crypto/Makefile b/arch/s390/crypto/Makefile
index 21757d86cd49..998f4b656b18 100644
--- a/arch/s390/crypto/Makefile
+++ b/arch/s390/crypto/Makefile
@@ -3,8 +3,6 @@
 # Cryptographic API
 #
 
-obj-$(CONFIG_CRYPTO_SHA1_S390) += sha1_s390.o sha_common.o
-obj-$(CONFIG_CRYPTO_SHA512_S390) += sha512_s390.o sha_common.o
 obj-$(CONFIG_CRYPTO_SHA3_256_S390) += sha3_256_s390.o sha_common.o
 obj-$(CONFIG_CRYPTO_SHA3_512_S390) += sha3_512_s390.o sha_common.o
 obj-$(CONFIG_CRYPTO_DES_S390) += des_s390.o
@@ -13,4 +11,5 @@ obj-$(CONFIG_CRYPTO_PAES_S390) += paes_s390.o
 obj-$(CONFIG_S390_PRNG) += prng.o
 obj-$(CONFIG_CRYPTO_GHASH_S390) += ghash_s390.o
 obj-$(CONFIG_CRYPTO_HMAC_S390) += hmac_s390.o
+obj-$(CONFIG_CRYPTO_PHMAC_S390) += phmac_s390.o
 obj-y += arch_random.o
diff --git a/arch/s390/crypto/arch_random.c b/arch/s390/crypto/arch_random.c
index a8a2407381af..083e8d5eada2 100644
--- a/arch/s390/crypto/arch_random.c
+++ b/arch/s390/crypto/arch_random.c
@@ -6,6 +6,7 @@
  * Author(s): Harald Freudenberger
  */
 
+#include <linux/export.h>
 #include <linux/kernel.h>
 #include <linux/atomic.h>
 #include <linux/random.h>
diff --git a/arch/s390/crypto/hmac_s390.c b/arch/s390/crypto/hmac_s390.c
index 93a1098d9f8d..58444da9b004 100644
--- a/arch/s390/crypto/hmac_s390.c
+++ b/arch/s390/crypto/hmac_s390.c
@@ -290,6 +290,7 @@ static int s390_hmac_export(struct shash_desc *desc, void *out)
 	struct s390_kmac_sha2_ctx *ctx = shash_desc_ctx(desc);
 	unsigned int bs = crypto_shash_blocksize(desc->tfm);
 	unsigned int ds = bs / 2;
+	u64 lo = ctx->buflen[0];
 	union {
 		u8 *u8;
 		u64 *u64;
@@ -301,9 +302,10 @@ static int s390_hmac_export(struct shash_desc *desc, void *out)
 	else
 		memcpy(p.u8, ctx->param, ds);
 	p.u8 += ds;
-	put_unaligned(ctx->buflen[0], p.u64++);
+	lo += bs;
+	put_unaligned(lo, p.u64++);
 	if (ds == SHA512_DIGEST_SIZE)
-		put_unaligned(ctx->buflen[1], p.u64);
+		put_unaligned(ctx->buflen[1] + (lo < bs), p.u64);
 	return err;
 }
 
@@ -316,14 +318,16 @@ static int s390_hmac_import(struct shash_desc *desc, const void *in)
 		const u8 *u8;
 		const u64 *u64;
 	} p = { .u8 = in };
+	u64 lo;
 	int err;
 
 	err = s390_hmac_sha2_init(desc);
 	memcpy(ctx->param, p.u8, ds);
 	p.u8 += ds;
-	ctx->buflen[0] = get_unaligned(p.u64++);
+	lo = get_unaligned(p.u64++);
+	ctx->buflen[0] = lo - bs;
 	if (ds == SHA512_DIGEST_SIZE)
-		ctx->buflen[1] = get_unaligned(p.u64);
+		ctx->buflen[1] = get_unaligned(p.u64) - (lo < bs);
 	if (ctx->buflen[0] | ctx->buflen[1])
 		ctx->gr0.ikp = 1;
 	return err;
diff --git a/arch/s390/crypto/paes_s390.c b/arch/s390/crypto/paes_s390.c
index 8a340c16acb4..a624a43a2b54 100644
--- a/arch/s390/crypto/paes_s390.c
+++ b/arch/s390/crypto/paes_s390.c
@@ -1633,7 +1633,7 @@ static int __init paes_s390_init(void)
 	/* with this pseudo devie alloc and start a crypto engine */
 	paes_crypto_engine =
 		crypto_engine_alloc_init_and_set(paes_dev.this_device,
-						 true, NULL, false, MAX_QLEN);
+						 true, false, MAX_QLEN);
 	if (!paes_crypto_engine) {
 		rc = -ENOMEM;
 		goto out_err;
diff --git a/arch/s390/crypto/phmac_s390.c b/arch/s390/crypto/phmac_s390.c
new file mode 100644
index 000000000000..7ecfdc4fba2d
--- /dev/null
+++ b/arch/s390/crypto/phmac_s390.c
@@ -0,0 +1,1048 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Copyright IBM Corp. 2025
+ *
+ * s390 specific HMAC support for protected keys.
+ */
+
+#define KMSG_COMPONENT	"phmac_s390"
+#define pr_fmt(fmt)	KMSG_COMPONENT ": " fmt
+
+#include <asm/cpacf.h>
+#include <asm/pkey.h>
+#include <crypto/engine.h>
+#include <crypto/hash.h>
+#include <crypto/internal/hash.h>
+#include <crypto/sha2.h>
+#include <linux/atomic.h>
+#include <linux/cpufeature.h>
+#include <linux/delay.h>
+#include <linux/miscdevice.h>
+#include <linux/module.h>
+#include <linux/spinlock.h>
+
+static struct crypto_engine *phmac_crypto_engine;
+#define MAX_QLEN 10
+
+/*
+ * A simple hash walk helper
+ */
+
+struct hash_walk_helper {
+	struct crypto_hash_walk walk;
+	const u8 *walkaddr;
+	int walkbytes;
+};
+
+/*
+ * Prepare hash walk helper.
+ * Set up the base hash walk, fill walkaddr and walkbytes.
+ * Returns 0 on success or negative value on error.
+ */
+static inline int hwh_prepare(struct ahash_request *req,
+			      struct hash_walk_helper *hwh)
+{
+	hwh->walkbytes = crypto_hash_walk_first(req, &hwh->walk);
+	if (hwh->walkbytes < 0)
+		return hwh->walkbytes;
+	hwh->walkaddr = hwh->walk.data;
+	return 0;
+}
+
+/*
+ * Advance hash walk helper by n bytes.
+ * Progress the walkbytes and walkaddr fields by n bytes.
+ * If walkbytes is then 0, pull next hunk from hash walk
+ * and update walkbytes and walkaddr.
+ * If n is negative, unmap hash walk and return error.
+ * Returns 0 on success or negative value on error.
+ */
+static inline int hwh_advance(struct hash_walk_helper *hwh, int n)
+{
+	if (n < 0)
+		return crypto_hash_walk_done(&hwh->walk, n);
+
+	hwh->walkbytes -= n;
+	hwh->walkaddr += n;
+	if (hwh->walkbytes > 0)
+		return 0;
+
+	hwh->walkbytes = crypto_hash_walk_done(&hwh->walk, 0);
+	if (hwh->walkbytes < 0)
+		return hwh->walkbytes;
+
+	hwh->walkaddr = hwh->walk.data;
+	return 0;
+}
+
+/*
+ * KMAC param block layout for sha2 function codes:
+ * The layout of the param block for the KMAC instruction depends on the
+ * blocksize of the used hashing sha2-algorithm function codes. The param block
+ * contains the hash chaining value (cv), the input message bit-length (imbl)
+ * and the hmac-secret (key). To prevent code duplication, the sizes of all
+ * these are calculated based on the blocksize.
+ *
+ * param-block:
+ * +-------+
+ * | cv    |
+ * +-------+
+ * | imbl  |
+ * +-------+
+ * | key   |
+ * +-------+
+ *
+ * sizes:
+ * part | sh2-alg | calculation | size | type
+ * -----+---------+-------------+------+--------
+ * cv   | 224/256 | blocksize/2 |   32 |  u64[8]
+ *      | 384/512 |             |   64 | u128[8]
+ * imbl | 224/256 | blocksize/8 |    8 |     u64
+ *      | 384/512 |             |   16 |    u128
+ * key  | 224/256 | blocksize   |   96 |  u8[96]
+ *      | 384/512 |             |  160 | u8[160]
+ */
+
+#define MAX_DIGEST_SIZE		SHA512_DIGEST_SIZE
+#define MAX_IMBL_SIZE		sizeof(u128)
+#define MAX_BLOCK_SIZE		SHA512_BLOCK_SIZE
+
+#define SHA2_CV_SIZE(bs)	((bs) >> 1)
+#define SHA2_IMBL_SIZE(bs)	((bs) >> 3)
+
+#define SHA2_IMBL_OFFSET(bs)	(SHA2_CV_SIZE(bs))
+#define SHA2_KEY_OFFSET(bs)	(SHA2_CV_SIZE(bs) + SHA2_IMBL_SIZE(bs))
+
+#define PHMAC_MAX_KEYSIZE       256
+#define PHMAC_SHA256_PK_SIZE	(SHA256_BLOCK_SIZE + 32)
+#define PHMAC_SHA512_PK_SIZE	(SHA512_BLOCK_SIZE + 32)
+#define PHMAC_MAX_PK_SIZE	PHMAC_SHA512_PK_SIZE
+
+/* phmac protected key struct */
+struct phmac_protkey {
+	u32 type;
+	u32 len;
+	u8 protkey[PHMAC_MAX_PK_SIZE];
+};
+
+#define PK_STATE_NO_KEY		     0
+#define PK_STATE_CONVERT_IN_PROGRESS 1
+#define PK_STATE_VALID		     2
+
+/* phmac tfm context */
+struct phmac_tfm_ctx {
+	/* source key material used to derive a protected key from */
+	u8 keybuf[PHMAC_MAX_KEYSIZE];
+	unsigned int keylen;
+
+	/* cpacf function code to use with this protected key type */
+	long fc;
+
+	/* nr of requests enqueued via crypto engine which use this tfm ctx */
+	atomic_t via_engine_ctr;
+
+	/* spinlock to atomic read/update all the following fields */
+	spinlock_t pk_lock;
+
+	/* see PK_STATE* defines above, < 0 holds convert failure rc  */
+	int pk_state;
+	/* if state is valid, pk holds the protected key */
+	struct phmac_protkey pk;
+};
+
+union kmac_gr0 {
+	unsigned long reg;
+	struct {
+		unsigned long		: 48;
+		unsigned long ikp	:  1;
+		unsigned long iimp	:  1;
+		unsigned long ccup	:  1;
+		unsigned long		:  6;
+		unsigned long fc	:  7;
+	};
+};
+
+struct kmac_sha2_ctx {
+	u8 param[MAX_DIGEST_SIZE + MAX_IMBL_SIZE + PHMAC_MAX_PK_SIZE];
+	union kmac_gr0 gr0;
+	u8 buf[MAX_BLOCK_SIZE];
+	u64 buflen[2];
+};
+
+/* phmac request context */
+struct phmac_req_ctx {
+	struct hash_walk_helper hwh;
+	struct kmac_sha2_ctx kmac_ctx;
+	bool final;
+};
+
+/*
+ * Pkey 'token' struct used to derive a protected key value from a clear key.
+ */
+struct hmac_clrkey_token {
+	u8  type;
+	u8  res0[3];
+	u8  version;
+	u8  res1[3];
+	u32 keytype;
+	u32 len;
+	u8 key[];
+} __packed;
+
+static int hash_key(const u8 *in, unsigned int inlen,
+		    u8 *digest, unsigned int digestsize)
+{
+	unsigned long func;
+	union {
+		struct sha256_paramblock {
+			u32 h[8];
+			u64 mbl;
+		} sha256;
+		struct sha512_paramblock {
+			u64 h[8];
+			u128 mbl;
+		} sha512;
+	} __packed param;
+
+#define PARAM_INIT(x, y, z)		   \
+	param.sha##x.h[0] = SHA##y ## _H0; \
+	param.sha##x.h[1] = SHA##y ## _H1; \
+	param.sha##x.h[2] = SHA##y ## _H2; \
+	param.sha##x.h[3] = SHA##y ## _H3; \
+	param.sha##x.h[4] = SHA##y ## _H4; \
+	param.sha##x.h[5] = SHA##y ## _H5; \
+	param.sha##x.h[6] = SHA##y ## _H6; \
+	param.sha##x.h[7] = SHA##y ## _H7; \
+	param.sha##x.mbl = (z)
+
+	switch (digestsize) {
+	case SHA224_DIGEST_SIZE:
+		func = CPACF_KLMD_SHA_256;
+		PARAM_INIT(256, 224, inlen * 8);
+		break;
+	case SHA256_DIGEST_SIZE:
+		func = CPACF_KLMD_SHA_256;
+		PARAM_INIT(256, 256, inlen * 8);
+		break;
+	case SHA384_DIGEST_SIZE:
+		func = CPACF_KLMD_SHA_512;
+		PARAM_INIT(512, 384, inlen * 8);
+		break;
+	case SHA512_DIGEST_SIZE:
+		func = CPACF_KLMD_SHA_512;
+		PARAM_INIT(512, 512, inlen * 8);
+		break;
+	default:
+		return -EINVAL;
+	}
+
+#undef PARAM_INIT
+
+	cpacf_klmd(func, &param, in, inlen);
+
+	memcpy(digest, &param, digestsize);
+
+	return 0;
+}
+
+/*
+ * make_clrkey_token() - wrap the clear key into a pkey clearkey token.
+ */
+static inline int make_clrkey_token(const u8 *clrkey, size_t clrkeylen,
+				    unsigned int digestsize, u8 *dest)
+{
+	struct hmac_clrkey_token *token = (struct hmac_clrkey_token *)dest;
+	unsigned int blocksize;
+	int rc;
+
+	token->type = 0x00;
+	token->version = 0x02;
+	switch (digestsize) {
+	case SHA224_DIGEST_SIZE:
+	case SHA256_DIGEST_SIZE:
+		token->keytype = PKEY_KEYTYPE_HMAC_512;
+		blocksize = 64;
+		break;
+	case SHA384_DIGEST_SIZE:
+	case SHA512_DIGEST_SIZE:
+		token->keytype = PKEY_KEYTYPE_HMAC_1024;
+		blocksize = 128;
+		break;
+	default:
+		return -EINVAL;
+	}
+	token->len = blocksize;
+
+	if (clrkeylen > blocksize) {
+		rc = hash_key(clrkey, clrkeylen, token->key, digestsize);
+		if (rc)
+			return rc;
+	} else {
+		memcpy(token->key, clrkey, clrkeylen);
+	}
+
+	return 0;
+}
+
+/*
+ * phmac_tfm_ctx_setkey() - Set key value into tfm context, maybe construct
+ * a clear key token digestible by pkey from a clear key value.
+ */
+static inline int phmac_tfm_ctx_setkey(struct phmac_tfm_ctx *tfm_ctx,
+				       const u8 *key, unsigned int keylen)
+{
+	if (keylen > sizeof(tfm_ctx->keybuf))
+		return -EINVAL;
+
+	memcpy(tfm_ctx->keybuf, key, keylen);
+	tfm_ctx->keylen = keylen;
+
+	return 0;
+}
+
+/*
+ * Convert the raw key material into a protected key via PKEY api.
+ * This function may sleep - don't call in non-sleeping context.
+ */
+static inline int convert_key(const u8 *key, unsigned int keylen,
+			      struct phmac_protkey *pk)
+{
+	int rc, i;
+
+	pk->len = sizeof(pk->protkey);
+
+	/*
+	 * In case of a busy card retry with increasing delay
+	 * of 200, 400, 800 and 1600 ms - in total 3 s.
+	 */
+	for (rc = -EIO, i = 0; rc && i < 5; i++) {
+		if (rc == -EBUSY && msleep_interruptible((1 << i) * 100)) {
+			rc = -EINTR;
+			goto out;
+		}
+		rc = pkey_key2protkey(key, keylen,
+				      pk->protkey, &pk->len, &pk->type,
+				      PKEY_XFLAG_NOMEMALLOC);
+	}
+
+out:
+	pr_debug("rc=%d\n", rc);
+	return rc;
+}
+
+/*
+ * (Re-)Convert the raw key material from the tfm ctx into a protected
+ * key via convert_key() function. Update the pk_state, pk_type, pk_len
+ * and the protected key in the tfm context.
+ * Please note this function may be invoked concurrently with the very
+ * same tfm context. The pk_lock spinlock in the context ensures an
+ * atomic update of the pk and the pk state but does not guarantee any
+ * order of update. So a fresh converted valid protected key may get
+ * updated with an 'old' expired key value. As the cpacf instructions
+ * detect this, refuse to operate with an invalid key and the calling
+ * code triggers a (re-)conversion this does no harm. This may lead to
+ * unnecessary additional conversion but never to invalid data on the
+ * hash operation.
+ */
+static int phmac_convert_key(struct phmac_tfm_ctx *tfm_ctx)
+{
+	struct phmac_protkey pk;
+	int rc;
+
+	spin_lock_bh(&tfm_ctx->pk_lock);
+	tfm_ctx->pk_state = PK_STATE_CONVERT_IN_PROGRESS;
+	spin_unlock_bh(&tfm_ctx->pk_lock);
+
+	rc = convert_key(tfm_ctx->keybuf, tfm_ctx->keylen, &pk);
+
+	/* update context */
+	spin_lock_bh(&tfm_ctx->pk_lock);
+	if (rc) {
+		tfm_ctx->pk_state = rc;
+	} else {
+		tfm_ctx->pk_state = PK_STATE_VALID;
+		tfm_ctx->pk = pk;
+	}
+	spin_unlock_bh(&tfm_ctx->pk_lock);
+
+	memzero_explicit(&pk, sizeof(pk));
+	pr_debug("rc=%d\n", rc);
+	return rc;
+}
+
+/*
+ * kmac_sha2_set_imbl - sets the input message bit-length based on the blocksize
+ */
+static inline void kmac_sha2_set_imbl(u8 *param, u64 buflen_lo,
+				      u64 buflen_hi, unsigned int blocksize)
+{
+	u8 *imbl = param + SHA2_IMBL_OFFSET(blocksize);
+
+	switch (blocksize) {
+	case SHA256_BLOCK_SIZE:
+		*(u64 *)imbl = buflen_lo * BITS_PER_BYTE;
+		break;
+	case SHA512_BLOCK_SIZE:
+		*(u128 *)imbl = (((u128)buflen_hi << 64) + buflen_lo) << 3;
+		break;
+	default:
+		break;
+	}
+}
+
+static int phmac_kmac_update(struct ahash_request *req, bool maysleep)
+{
+	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+	struct phmac_tfm_ctx *tfm_ctx = crypto_ahash_ctx(tfm);
+	struct phmac_req_ctx *req_ctx = ahash_request_ctx(req);
+	struct kmac_sha2_ctx *ctx = &req_ctx->kmac_ctx;
+	struct hash_walk_helper *hwh = &req_ctx->hwh;
+	unsigned int bs = crypto_ahash_blocksize(tfm);
+	unsigned int offset, k, n;
+	int rc = 0;
+
+	/*
+	 * The walk is always mapped when this function is called.
+	 * Note that in case of partial processing or failure the walk
+	 * is NOT unmapped here. So a follow up task may reuse the walk
+	 * or in case of unrecoverable failure needs to unmap it.
+	 */
+
+	while (hwh->walkbytes > 0) {
+		/* check sha2 context buffer */
+		offset = ctx->buflen[0] % bs;
+		if (offset + hwh->walkbytes < bs)
+			goto store;
+
+		if (offset) {
+			/* fill ctx buffer up to blocksize and process this block */
+			n = bs - offset;
+			memcpy(ctx->buf + offset, hwh->walkaddr, n);
+			ctx->gr0.iimp = 1;
+			for (;;) {
+				k = _cpacf_kmac(&ctx->gr0.reg, ctx->param, ctx->buf, bs);
+				if (likely(k == bs))
+					break;
+				if (unlikely(k > 0)) {
+					/*
+					 * Can't deal with hunks smaller than blocksize.
+					 * And kmac should always return the nr of
+					 * processed bytes as 0 or a multiple of the
+					 * blocksize.
+					 */
+					rc = -EIO;
+					goto out;
+				}
+				/* protected key is invalid and needs re-conversion */
+				if (!maysleep) {
+					rc = -EKEYEXPIRED;
+					goto out;
+				}
+				rc = phmac_convert_key(tfm_ctx);
+				if (rc)
+					goto out;
+				spin_lock_bh(&tfm_ctx->pk_lock);
+				memcpy(ctx->param + SHA2_KEY_OFFSET(bs),
+				       tfm_ctx->pk.protkey, tfm_ctx->pk.len);
+				spin_unlock_bh(&tfm_ctx->pk_lock);
+			}
+			ctx->buflen[0] += n;
+			if (ctx->buflen[0] < n)
+				ctx->buflen[1]++;
+			rc = hwh_advance(hwh, n);
+			if (unlikely(rc))
+				goto out;
+			offset = 0;
+		}
+
+		/* process as many blocks as possible from the walk */
+		while (hwh->walkbytes >= bs) {
+			n = (hwh->walkbytes / bs) * bs;
+			ctx->gr0.iimp = 1;
+			k = _cpacf_kmac(&ctx->gr0.reg, ctx->param, hwh->walkaddr, n);
+			if (likely(k > 0)) {
+				ctx->buflen[0] += k;
+				if (ctx->buflen[0] < k)
+					ctx->buflen[1]++;
+				rc = hwh_advance(hwh, k);
+				if (unlikely(rc))
+					goto out;
+			}
+			if (unlikely(k < n)) {
+				/* protected key is invalid and needs re-conversion */
+				if (!maysleep) {
+					rc = -EKEYEXPIRED;
+					goto out;
+				}
+				rc = phmac_convert_key(tfm_ctx);
+				if (rc)
+					goto out;
+				spin_lock_bh(&tfm_ctx->pk_lock);
+				memcpy(ctx->param + SHA2_KEY_OFFSET(bs),
+				       tfm_ctx->pk.protkey, tfm_ctx->pk.len);
+				spin_unlock_bh(&tfm_ctx->pk_lock);
+			}
+		}
+
+store:
+		/* store incomplete block in context buffer */
+		if (hwh->walkbytes) {
+			memcpy(ctx->buf + offset, hwh->walkaddr, hwh->walkbytes);
+			ctx->buflen[0] += hwh->walkbytes;
+			if (ctx->buflen[0] < hwh->walkbytes)
+				ctx->buflen[1]++;
+			rc = hwh_advance(hwh, hwh->walkbytes);
+			if (unlikely(rc))
+				goto out;
+		}
+
+	} /* end of while (hwh->walkbytes > 0) */
+
+out:
+	pr_debug("rc=%d\n", rc);
+	return rc;
+}
+
+static int phmac_kmac_final(struct ahash_request *req, bool maysleep)
+{
+	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+	struct phmac_tfm_ctx *tfm_ctx = crypto_ahash_ctx(tfm);
+	struct phmac_req_ctx *req_ctx = ahash_request_ctx(req);
+	struct kmac_sha2_ctx *ctx = &req_ctx->kmac_ctx;
+	unsigned int ds = crypto_ahash_digestsize(tfm);
+	unsigned int bs = crypto_ahash_blocksize(tfm);
+	unsigned int k, n;
+	int rc = 0;
+
+	n = ctx->buflen[0] % bs;
+	ctx->gr0.iimp = 0;
+	kmac_sha2_set_imbl(ctx->param, ctx->buflen[0], ctx->buflen[1], bs);
+	for (;;) {
+		k = _cpacf_kmac(&ctx->gr0.reg, ctx->param, ctx->buf, n);
+		if (likely(k == n))
+			break;
+		if (unlikely(k > 0)) {
+			/* Can't deal with hunks smaller than blocksize. */
+			rc = -EIO;
+			goto out;
+		}
+		/* protected key is invalid and needs re-conversion */
+		if (!maysleep) {
+			rc = -EKEYEXPIRED;
+			goto out;
+		}
+		rc = phmac_convert_key(tfm_ctx);
+		if (rc)
+			goto out;
+		spin_lock_bh(&tfm_ctx->pk_lock);
+		memcpy(ctx->param + SHA2_KEY_OFFSET(bs),
+		       tfm_ctx->pk.protkey, tfm_ctx->pk.len);
+		spin_unlock_bh(&tfm_ctx->pk_lock);
+	}
+
+	memcpy(req->result, ctx->param, ds);
+
+out:
+	pr_debug("rc=%d\n", rc);
+	return rc;
+}
+
+static int phmac_init(struct ahash_request *req)
+{
+	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+	struct phmac_tfm_ctx *tfm_ctx = crypto_ahash_ctx(tfm);
+	struct phmac_req_ctx *req_ctx = ahash_request_ctx(req);
+	struct kmac_sha2_ctx *kmac_ctx = &req_ctx->kmac_ctx;
+	unsigned int bs = crypto_ahash_blocksize(tfm);
+	int rc = 0;
+
+	/* zero request context (includes the kmac sha2 context) */
+	memset(req_ctx, 0, sizeof(*req_ctx));
+
+	/*
+	 * setkey() should have set a valid fc into the tfm context.
+	 * Copy this function code into the gr0 field of the kmac context.
+	 */
+	if (!tfm_ctx->fc) {
+		rc = -ENOKEY;
+		goto out;
+	}
+	kmac_ctx->gr0.fc = tfm_ctx->fc;
+
+	/*
+	 * Copy the pk from tfm ctx into kmac ctx. The protected key
+	 * may be outdated but update() and final() will handle this.
+	 */
+	spin_lock_bh(&tfm_ctx->pk_lock);
+	memcpy(kmac_ctx->param + SHA2_KEY_OFFSET(bs),
+	       tfm_ctx->pk.protkey, tfm_ctx->pk.len);
+	spin_unlock_bh(&tfm_ctx->pk_lock);
+
+out:
+	pr_debug("rc=%d\n", rc);
+	return rc;
+}
+
+static int phmac_update(struct ahash_request *req)
+{
+	struct phmac_req_ctx *req_ctx = ahash_request_ctx(req);
+	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+	struct phmac_tfm_ctx *tfm_ctx = crypto_ahash_ctx(tfm);
+	struct kmac_sha2_ctx *kmac_ctx = &req_ctx->kmac_ctx;
+	struct hash_walk_helper *hwh = &req_ctx->hwh;
+	int rc;
+
+	/* prep the walk in the request context */
+	rc = hwh_prepare(req, hwh);
+	if (rc)
+		goto out;
+
+	/* Try synchronous operation if no active engine usage */
+	if (!atomic_read(&tfm_ctx->via_engine_ctr)) {
+		rc = phmac_kmac_update(req, false);
+		if (rc == 0)
+			goto out;
+	}
+
+	/*
+	 * If sync operation failed or key expired or there are already
+	 * requests enqueued via engine, fallback to async. Mark tfm as
+	 * using engine to serialize requests.
+	 */
+	if (rc == 0 || rc == -EKEYEXPIRED) {
+		atomic_inc(&tfm_ctx->via_engine_ctr);
+		rc = crypto_transfer_hash_request_to_engine(phmac_crypto_engine, req);
+		if (rc != -EINPROGRESS)
+			atomic_dec(&tfm_ctx->via_engine_ctr);
+	}
+
+	if (rc != -EINPROGRESS) {
+		hwh_advance(hwh, rc);
+		memzero_explicit(kmac_ctx, sizeof(*kmac_ctx));
+	}
+
+out:
+	pr_debug("rc=%d\n", rc);
+	return rc;
+}
+
+static int phmac_final(struct ahash_request *req)
+{
+	struct phmac_req_ctx *req_ctx = ahash_request_ctx(req);
+	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+	struct phmac_tfm_ctx *tfm_ctx = crypto_ahash_ctx(tfm);
+	struct kmac_sha2_ctx *kmac_ctx = &req_ctx->kmac_ctx;
+	int rc = 0;
+
+	/* Try synchronous operation if no active engine usage */
+	if (!atomic_read(&tfm_ctx->via_engine_ctr)) {
+		rc = phmac_kmac_final(req, false);
+		if (rc == 0)
+			goto out;
+	}
+
+	/*
+	 * If sync operation failed or key expired or there are already
+	 * requests enqueued via engine, fallback to async. Mark tfm as
+	 * using engine to serialize requests.
+	 */
+	if (rc == 0 || rc == -EKEYEXPIRED) {
+		req->nbytes = 0;
+		req_ctx->final = true;
+		atomic_inc(&tfm_ctx->via_engine_ctr);
+		rc = crypto_transfer_hash_request_to_engine(phmac_crypto_engine, req);
+		if (rc != -EINPROGRESS)
+			atomic_dec(&tfm_ctx->via_engine_ctr);
+	}
+
+out:
+	if (rc != -EINPROGRESS)
+		memzero_explicit(kmac_ctx, sizeof(*kmac_ctx));
+	pr_debug("rc=%d\n", rc);
+	return rc;
+}
+
+static int phmac_finup(struct ahash_request *req)
+{
+	struct phmac_req_ctx *req_ctx = ahash_request_ctx(req);
+	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+	struct phmac_tfm_ctx *tfm_ctx = crypto_ahash_ctx(tfm);
+	struct kmac_sha2_ctx *kmac_ctx = &req_ctx->kmac_ctx;
+	struct hash_walk_helper *hwh = &req_ctx->hwh;
+	int rc;
+
+	/* prep the walk in the request context */
+	rc = hwh_prepare(req, hwh);
+	if (rc)
+		goto out;
+
+	/* Try synchronous operations if no active engine usage */
+	if (!atomic_read(&tfm_ctx->via_engine_ctr)) {
+		rc = phmac_kmac_update(req, false);
+		if (rc == 0)
+			req->nbytes = 0;
+	}
+	if (!rc && !req->nbytes && !atomic_read(&tfm_ctx->via_engine_ctr)) {
+		rc = phmac_kmac_final(req, false);
+		if (rc == 0)
+			goto out;
+	}
+
+	/*
+	 * If sync operation failed or key expired or there are already
+	 * requests enqueued via engine, fallback to async. Mark tfm as
+	 * using engine to serialize requests.
+	 */
+	if (rc == 0 || rc == -EKEYEXPIRED) {
+		req_ctx->final = true;
+		atomic_inc(&tfm_ctx->via_engine_ctr);
+		rc = crypto_transfer_hash_request_to_engine(phmac_crypto_engine, req);
+		if (rc != -EINPROGRESS)
+			atomic_dec(&tfm_ctx->via_engine_ctr);
+	}
+
+	if (rc != -EINPROGRESS)
+		hwh_advance(hwh, rc);
+
+out:
+	if (rc != -EINPROGRESS)
+		memzero_explicit(kmac_ctx, sizeof(*kmac_ctx));
+	pr_debug("rc=%d\n", rc);
+	return rc;
+}
+
+static int phmac_digest(struct ahash_request *req)
+{
+	int rc;
+
+	rc = phmac_init(req);
+	if (rc)
+		goto out;
+
+	rc = phmac_finup(req);
+
+out:
+	pr_debug("rc=%d\n", rc);
+	return rc;
+}
+
+static int phmac_setkey(struct crypto_ahash *tfm,
+			const u8 *key, unsigned int keylen)
+{
+	struct phmac_tfm_ctx *tfm_ctx = crypto_ahash_ctx(tfm);
+	unsigned int ds = crypto_ahash_digestsize(tfm);
+	unsigned int bs = crypto_ahash_blocksize(tfm);
+	unsigned int tmpkeylen;
+	u8 *tmpkey = NULL;
+	int rc = 0;
+
+	if (!crypto_ahash_tested(tfm)) {
+		/*
+		 * selftest running: key is a raw hmac clear key and needs
+		 * to get embedded into a 'clear key token' in order to have
+		 * it correctly processed by the pkey module.
+		 */
+		tmpkeylen = sizeof(struct hmac_clrkey_token) + bs;
+		tmpkey = kzalloc(tmpkeylen, GFP_KERNEL);
+		if (!tmpkey) {
+			rc = -ENOMEM;
+			goto out;
+		}
+		rc = make_clrkey_token(key, keylen, ds, tmpkey);
+		if (rc)
+			goto out;
+		keylen = tmpkeylen;
+		key = tmpkey;
+	}
+
+	/* copy raw key into tfm context */
+	rc = phmac_tfm_ctx_setkey(tfm_ctx, key, keylen);
+	if (rc)
+		goto out;
+
+	/* convert raw key into protected key */
+	rc = phmac_convert_key(tfm_ctx);
+	if (rc)
+		goto out;
+
+	/* set function code in tfm context, check for valid pk type */
+	switch (ds) {
+	case SHA224_DIGEST_SIZE:
+		if (tfm_ctx->pk.type != PKEY_KEYTYPE_HMAC_512)
+			rc = -EINVAL;
+		else
+			tfm_ctx->fc = CPACF_KMAC_PHMAC_SHA_224;
+		break;
+	case SHA256_DIGEST_SIZE:
+		if (tfm_ctx->pk.type != PKEY_KEYTYPE_HMAC_512)
+			rc = -EINVAL;
+		else
+			tfm_ctx->fc = CPACF_KMAC_PHMAC_SHA_256;
+		break;
+	case SHA384_DIGEST_SIZE:
+		if (tfm_ctx->pk.type != PKEY_KEYTYPE_HMAC_1024)
+			rc = -EINVAL;
+		else
+			tfm_ctx->fc = CPACF_KMAC_PHMAC_SHA_384;
+		break;
+	case SHA512_DIGEST_SIZE:
+		if (tfm_ctx->pk.type != PKEY_KEYTYPE_HMAC_1024)
+			rc = -EINVAL;
+		else
+			tfm_ctx->fc = CPACF_KMAC_PHMAC_SHA_512;
+		break;
+	default:
+		tfm_ctx->fc = 0;
+		rc = -EINVAL;
+	}
+
+out:
+	kfree(tmpkey);
+	pr_debug("rc=%d\n", rc);
+	return rc;
+}
+
+static int phmac_export(struct ahash_request *req, void *out)
+{
+	struct phmac_req_ctx *req_ctx = ahash_request_ctx(req);
+	struct kmac_sha2_ctx *ctx = &req_ctx->kmac_ctx;
+
+	memcpy(out, ctx, sizeof(*ctx));
+
+	return 0;
+}
+
+static int phmac_import(struct ahash_request *req, const void *in)
+{
+	struct phmac_req_ctx *req_ctx = ahash_request_ctx(req);
+	struct kmac_sha2_ctx *ctx = &req_ctx->kmac_ctx;
+
+	memset(req_ctx, 0, sizeof(*req_ctx));
+	memcpy(ctx, in, sizeof(*ctx));
+
+	return 0;
+}
+
+static int phmac_init_tfm(struct crypto_ahash *tfm)
+{
+	struct phmac_tfm_ctx *tfm_ctx = crypto_ahash_ctx(tfm);
+
+	memset(tfm_ctx, 0, sizeof(*tfm_ctx));
+	spin_lock_init(&tfm_ctx->pk_lock);
+
+	crypto_ahash_set_reqsize(tfm, sizeof(struct phmac_req_ctx));
+
+	return 0;
+}
+
+static void phmac_exit_tfm(struct crypto_ahash *tfm)
+{
+	struct phmac_tfm_ctx *tfm_ctx = crypto_ahash_ctx(tfm);
+
+	memzero_explicit(tfm_ctx->keybuf, sizeof(tfm_ctx->keybuf));
+	memzero_explicit(&tfm_ctx->pk, sizeof(tfm_ctx->pk));
+}
+
+static int phmac_do_one_request(struct crypto_engine *engine, void *areq)
+{
+	struct ahash_request *req = ahash_request_cast(areq);
+	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+	struct phmac_tfm_ctx *tfm_ctx = crypto_ahash_ctx(tfm);
+	struct phmac_req_ctx *req_ctx = ahash_request_ctx(req);
+	struct kmac_sha2_ctx *kmac_ctx = &req_ctx->kmac_ctx;
+	struct hash_walk_helper *hwh = &req_ctx->hwh;
+	int rc = -EINVAL;
+
+	/*
+	 * Three kinds of requests come in here:
+	 * update when req->nbytes > 0 and req_ctx->final is false
+	 * final when req->nbytes = 0 and req_ctx->final is true
+	 * finup when req->nbytes > 0 and req_ctx->final is true
+	 * For update and finup the hwh walk needs to be prepared and
+	 * up to date but the actual nr of bytes in req->nbytes may be
+	 * any non zero number. For final there is no hwh walk needed.
+	 */
+
+	if (req->nbytes) {
+		rc = phmac_kmac_update(req, true);
+		if (rc == -EKEYEXPIRED) {
+			/*
+			 * Protected key expired, conversion is in process.
+			 * Trigger a re-schedule of this request by returning
+			 * -ENOSPC ("hardware queue full") to the crypto engine.
+			 * To avoid immediately re-invocation of this callback,
+			 * tell scheduler to voluntarily give up the CPU here.
+			 */
+			pr_debug("rescheduling request\n");
+			cond_resched();
+			return -ENOSPC;
+		} else if (rc) {
+			hwh_advance(hwh, rc);
+			goto out;
+		}
+		req->nbytes = 0;
+	}
+
+	if (req_ctx->final) {
+		rc = phmac_kmac_final(req, true);
+		if (rc == -EKEYEXPIRED) {
+			/*
+			 * Protected key expired, conversion is in process.
+			 * Trigger a re-schedule of this request by returning
+			 * -ENOSPC ("hardware queue full") to the crypto engine.
+			 * To avoid immediately re-invocation of this callback,
+			 * tell scheduler to voluntarily give up the CPU here.
+			 */
+			pr_debug("rescheduling request\n");
+			cond_resched();
+			return -ENOSPC;
+		}
+	}
+
+out:
+	if (rc || req_ctx->final)
+		memzero_explicit(kmac_ctx, sizeof(*kmac_ctx));
+	pr_debug("request complete with rc=%d\n", rc);
+	local_bh_disable();
+	atomic_dec(&tfm_ctx->via_engine_ctr);
+	crypto_finalize_hash_request(engine, req, rc);
+	local_bh_enable();
+	return rc;
+}
+
+#define S390_ASYNC_PHMAC_ALG(x)						\
+{									\
+	.base = {							\
+		.init	  = phmac_init,					\
+		.update	  = phmac_update,				\
+		.final	  = phmac_final,				\
+		.finup	  = phmac_finup,				\
+		.digest	  = phmac_digest,				\
+		.setkey	  = phmac_setkey,				\
+		.import	  = phmac_import,				\
+		.export	  = phmac_export,				\
+		.init_tfm = phmac_init_tfm,				\
+		.exit_tfm = phmac_exit_tfm,				\
+		.halg = {						\
+			.digestsize = SHA##x##_DIGEST_SIZE,		\
+			.statesize  = sizeof(struct kmac_sha2_ctx),	\
+			.base = {					\
+				.cra_name = "phmac(sha" #x ")",		\
+				.cra_driver_name = "phmac_s390_sha" #x,	\
+				.cra_blocksize = SHA##x##_BLOCK_SIZE,	\
+				.cra_priority = 400,			\
+				.cra_flags = CRYPTO_ALG_ASYNC |		\
+					     CRYPTO_ALG_NO_FALLBACK,	\
+				.cra_ctxsize = sizeof(struct phmac_tfm_ctx), \
+				.cra_module = THIS_MODULE,		\
+			},						\
+		},							\
+	},								\
+	.op = {								\
+		.do_one_request = phmac_do_one_request,			\
+	},								\
+}
+
+static struct phmac_alg {
+	unsigned int fc;
+	struct ahash_engine_alg alg;
+	bool registered;
+} phmac_algs[] = {
+	{
+		.fc = CPACF_KMAC_PHMAC_SHA_224,
+		.alg = S390_ASYNC_PHMAC_ALG(224),
+	}, {
+		.fc = CPACF_KMAC_PHMAC_SHA_256,
+		.alg = S390_ASYNC_PHMAC_ALG(256),
+	}, {
+		.fc = CPACF_KMAC_PHMAC_SHA_384,
+		.alg = S390_ASYNC_PHMAC_ALG(384),
+	}, {
+		.fc = CPACF_KMAC_PHMAC_SHA_512,
+		.alg = S390_ASYNC_PHMAC_ALG(512),
+	}
+};
+
+static struct miscdevice phmac_dev = {
+	.name	= "phmac",
+	.minor	= MISC_DYNAMIC_MINOR,
+};
+
+static void s390_phmac_exit(void)
+{
+	struct phmac_alg *phmac;
+	int i;
+
+	if (phmac_crypto_engine) {
+		crypto_engine_stop(phmac_crypto_engine);
+		crypto_engine_exit(phmac_crypto_engine);
+	}
+
+	for (i = ARRAY_SIZE(phmac_algs) - 1; i >= 0; i--) {
+		phmac = &phmac_algs[i];
+		if (phmac->registered)
+			crypto_engine_unregister_ahash(&phmac->alg);
+	}
+
+	misc_deregister(&phmac_dev);
+}
+
+static int __init s390_phmac_init(void)
+{
+	struct phmac_alg *phmac;
+	int i, rc;
+
+	/* for selftest cpacf klmd subfunction is needed */
+	if (!cpacf_query_func(CPACF_KLMD, CPACF_KLMD_SHA_256))
+		return -ENODEV;
+	if (!cpacf_query_func(CPACF_KLMD, CPACF_KLMD_SHA_512))
+		return -ENODEV;
+
+	/* register a simple phmac pseudo misc device */
+	rc = misc_register(&phmac_dev);
+	if (rc)
+		return rc;
+
+	/* with this pseudo device alloc and start a crypto engine */
+	phmac_crypto_engine =
+		crypto_engine_alloc_init_and_set(phmac_dev.this_device,
+						 true, false, MAX_QLEN);
+	if (!phmac_crypto_engine) {
+		rc = -ENOMEM;
+		goto out_err;
+	}
+	rc = crypto_engine_start(phmac_crypto_engine);
+	if (rc) {
+		crypto_engine_exit(phmac_crypto_engine);
+		phmac_crypto_engine = NULL;
+		goto out_err;
+	}
+
+	for (i = 0; i < ARRAY_SIZE(phmac_algs); i++) {
+		phmac = &phmac_algs[i];
+		if (!cpacf_query_func(CPACF_KMAC, phmac->fc))
+			continue;
+		rc = crypto_engine_register_ahash(&phmac->alg);
+		if (rc)
+			goto out_err;
+		phmac->registered = true;
+		pr_debug("%s registered\n", phmac->alg.base.halg.base.cra_name);
+	}
+
+	return 0;
+
+out_err:
+	s390_phmac_exit();
+	return rc;
+}
+
+module_init(s390_phmac_init);
+module_exit(s390_phmac_exit);
+
+MODULE_ALIAS_CRYPTO("phmac(sha224)");
+MODULE_ALIAS_CRYPTO("phmac(sha256)");
+MODULE_ALIAS_CRYPTO("phmac(sha384)");
+MODULE_ALIAS_CRYPTO("phmac(sha512)");
+
+MODULE_DESCRIPTION("S390 HMAC driver for protected keys");
+MODULE_LICENSE("GPL");
diff --git a/arch/s390/crypto/sha.h b/arch/s390/crypto/sha.h
index d757ccbce2b4..cadb4b13622a 100644
--- a/arch/s390/crypto/sha.h
+++ b/arch/s390/crypto/sha.h
@@ -27,6 +27,9 @@ struct s390_sha_ctx {
 			u64 state[SHA512_DIGEST_SIZE / sizeof(u64)];
 			u64 count_hi;
 		} sha512;
+		struct {
+			__le64 state[SHA3_STATE_SIZE / sizeof(u64)];
+		} sha3;
 	};
 	int func;		/* KIMD function to use */
 	bool first_message_part;
diff --git a/arch/s390/crypto/sha1_s390.c b/arch/s390/crypto/sha1_s390.c
deleted file mode 100644
index d229cbd2ba22..000000000000
--- a/arch/s390/crypto/sha1_s390.c
+++ /dev/null
@@ -1,103 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0+
-/*
- * Cryptographic API.
- *
- * s390 implementation of the SHA1 Secure Hash Algorithm.
- *
- * Derived from cryptoapi implementation, adapted for in-place
- * scatterlist interface.  Originally based on the public domain
- * implementation written by Steve Reid.
- *
- * s390 Version:
- *   Copyright IBM Corp. 2003, 2007
- *   Author(s): Thomas Spatzier
- *		Jan Glauber (jan.glauber@de.ibm.com)
- *
- * Derived from "crypto/sha1_generic.c"
- *   Copyright (c) Alan Smithee.
- *   Copyright (c) Andrew McDonald <andrew@mcdonald.org.uk>
- *   Copyright (c) Jean-Francois Dive <jef@linuxbe.org>
- */
-#include <asm/cpacf.h>
-#include <crypto/internal/hash.h>
-#include <crypto/sha1.h>
-#include <linux/cpufeature.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-
-#include "sha.h"
-
-static int s390_sha1_init(struct shash_desc *desc)
-{
-	struct s390_sha_ctx *sctx = shash_desc_ctx(desc);
-
-	sctx->state[0] = SHA1_H0;
-	sctx->state[1] = SHA1_H1;
-	sctx->state[2] = SHA1_H2;
-	sctx->state[3] = SHA1_H3;
-	sctx->state[4] = SHA1_H4;
-	sctx->count = 0;
-	sctx->func = CPACF_KIMD_SHA_1;
-
-	return 0;
-}
-
-static int s390_sha1_export(struct shash_desc *desc, void *out)
-{
-	struct s390_sha_ctx *sctx = shash_desc_ctx(desc);
-	struct sha1_state *octx = out;
-
-	octx->count = sctx->count;
-	memcpy(octx->state, sctx->state, sizeof(octx->state));
-	return 0;
-}
-
-static int s390_sha1_import(struct shash_desc *desc, const void *in)
-{
-	struct s390_sha_ctx *sctx = shash_desc_ctx(desc);
-	const struct sha1_state *ictx = in;
-
-	sctx->count = ictx->count;
-	memcpy(sctx->state, ictx->state, sizeof(ictx->state));
-	sctx->func = CPACF_KIMD_SHA_1;
-	return 0;
-}
-
-static struct shash_alg alg = {
-	.digestsize	=	SHA1_DIGEST_SIZE,
-	.init		=	s390_sha1_init,
-	.update		=	s390_sha_update_blocks,
-	.finup		=	s390_sha_finup,
-	.export		=	s390_sha1_export,
-	.import		=	s390_sha1_import,
-	.descsize	=	S390_SHA_CTX_SIZE,
-	.statesize	=	SHA1_STATE_SIZE,
-	.base		=	{
-		.cra_name	=	"sha1",
-		.cra_driver_name=	"sha1-s390",
-		.cra_priority	=	300,
-		.cra_flags	=	CRYPTO_AHASH_ALG_BLOCK_ONLY |
-					CRYPTO_AHASH_ALG_FINUP_MAX,
-		.cra_blocksize	=	SHA1_BLOCK_SIZE,
-		.cra_module	=	THIS_MODULE,
-	}
-};
-
-static int __init sha1_s390_init(void)
-{
-	if (!cpacf_query_func(CPACF_KIMD, CPACF_KIMD_SHA_1))
-		return -ENODEV;
-	return crypto_register_shash(&alg);
-}
-
-static void __exit sha1_s390_fini(void)
-{
-	crypto_unregister_shash(&alg);
-}
-
-module_cpu_feature_match(S390_CPU_FEATURE_MSA, sha1_s390_init);
-module_exit(sha1_s390_fini);
-
-MODULE_ALIAS_CRYPTO("sha1");
-MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION("SHA1 Secure Hash Algorithm");
diff --git a/arch/s390/crypto/sha3_256_s390.c b/arch/s390/crypto/sha3_256_s390.c
index 4a7731ac6bcd..03bb4f4bab70 100644
--- a/arch/s390/crypto/sha3_256_s390.c
+++ b/arch/s390/crypto/sha3_256_s390.c
@@ -35,23 +35,33 @@ static int sha3_256_init(struct shash_desc *desc)
 static int sha3_256_export(struct shash_desc *desc, void *out)
 {
 	struct s390_sha_ctx *sctx = shash_desc_ctx(desc);
-	struct sha3_state *octx = out;
+	union {
+		u8 *u8;
+		u64 *u64;
+	} p = { .u8 = out };
+	int i;
 
 	if (sctx->first_message_part) {
-		memset(sctx->state, 0, sizeof(sctx->state));
-		sctx->first_message_part = 0;
+		memset(out, 0, SHA3_STATE_SIZE);
+		return 0;
 	}
-	memcpy(octx->st, sctx->state, sizeof(octx->st));
+	for (i = 0; i < SHA3_STATE_SIZE / 8; i++)
+		put_unaligned(le64_to_cpu(sctx->sha3.state[i]), p.u64++);
 	return 0;
 }
 
 static int sha3_256_import(struct shash_desc *desc, const void *in)
 {
 	struct s390_sha_ctx *sctx = shash_desc_ctx(desc);
-	const struct sha3_state *ictx = in;
-
+	union {
+		const u8 *u8;
+		const u64 *u64;
+	} p = { .u8 = in };
+	int i;
+
+	for (i = 0; i < SHA3_STATE_SIZE / 8; i++)
+		sctx->sha3.state[i] = cpu_to_le64(get_unaligned(p.u64++));
 	sctx->count = 0;
-	memcpy(sctx->state, ictx->st, sizeof(ictx->st));
 	sctx->first_message_part = 0;
 	sctx->func = CPACF_KIMD_SHA3_256;
 
diff --git a/arch/s390/crypto/sha3_512_s390.c b/arch/s390/crypto/sha3_512_s390.c
index 018f02fff444..a5c9690eecb1 100644
--- a/arch/s390/crypto/sha3_512_s390.c
+++ b/arch/s390/crypto/sha3_512_s390.c
@@ -34,24 +34,33 @@ static int sha3_512_init(struct shash_desc *desc)
 static int sha3_512_export(struct shash_desc *desc, void *out)
 {
 	struct s390_sha_ctx *sctx = shash_desc_ctx(desc);
-	struct sha3_state *octx = out;
-
+	union {
+		u8 *u8;
+		u64 *u64;
+	} p = { .u8 = out };
+	int i;
 
 	if (sctx->first_message_part) {
-		memset(sctx->state, 0, sizeof(sctx->state));
-		sctx->first_message_part = 0;
+		memset(out, 0, SHA3_STATE_SIZE);
+		return 0;
 	}
-	memcpy(octx->st, sctx->state, sizeof(octx->st));
+	for (i = 0; i < SHA3_STATE_SIZE / 8; i++)
+		put_unaligned(le64_to_cpu(sctx->sha3.state[i]), p.u64++);
 	return 0;
 }
 
 static int sha3_512_import(struct shash_desc *desc, const void *in)
 {
 	struct s390_sha_ctx *sctx = shash_desc_ctx(desc);
-	const struct sha3_state *ictx = in;
-
+	union {
+		const u8 *u8;
+		const u64 *u64;
+	} p = { .u8 = in };
+	int i;
+
+	for (i = 0; i < SHA3_STATE_SIZE / 8; i++)
+		sctx->sha3.state[i] = cpu_to_le64(get_unaligned(p.u64++));
 	sctx->count = 0;
-	memcpy(sctx->state, ictx->st, sizeof(ictx->st));
 	sctx->first_message_part = 0;
 	sctx->func = CPACF_KIMD_SHA3_512;
 
diff --git a/arch/s390/crypto/sha512_s390.c b/arch/s390/crypto/sha512_s390.c
deleted file mode 100644
index 33711a29618c..000000000000
--- a/arch/s390/crypto/sha512_s390.c
+++ /dev/null
@@ -1,151 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0+
-/*
- * Cryptographic API.
- *
- * s390 implementation of the SHA512 and SHA38 Secure Hash Algorithm.
- *
- * Copyright IBM Corp. 2007
- * Author(s): Jan Glauber (jang@de.ibm.com)
- */
-#include <asm/cpacf.h>
-#include <crypto/internal/hash.h>
-#include <crypto/sha2.h>
-#include <linux/cpufeature.h>
-#include <linux/errno.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-
-#include "sha.h"
-
-static int sha512_init(struct shash_desc *desc)
-{
-	struct s390_sha_ctx *ctx = shash_desc_ctx(desc);
-
-	ctx->sha512.state[0] = SHA512_H0;
-	ctx->sha512.state[1] = SHA512_H1;
-	ctx->sha512.state[2] = SHA512_H2;
-	ctx->sha512.state[3] = SHA512_H3;
-	ctx->sha512.state[4] = SHA512_H4;
-	ctx->sha512.state[5] = SHA512_H5;
-	ctx->sha512.state[6] = SHA512_H6;
-	ctx->sha512.state[7] = SHA512_H7;
-	ctx->count = 0;
-	ctx->sha512.count_hi = 0;
-	ctx->func = CPACF_KIMD_SHA_512;
-
-	return 0;
-}
-
-static int sha512_export(struct shash_desc *desc, void *out)
-{
-	struct s390_sha_ctx *sctx = shash_desc_ctx(desc);
-	struct sha512_state *octx = out;
-
-	octx->count[0] = sctx->count;
-	octx->count[1] = sctx->sha512.count_hi;
-	memcpy(octx->state, sctx->state, sizeof(octx->state));
-	return 0;
-}
-
-static int sha512_import(struct shash_desc *desc, const void *in)
-{
-	struct s390_sha_ctx *sctx = shash_desc_ctx(desc);
-	const struct sha512_state *ictx = in;
-
-	sctx->count = ictx->count[0];
-	sctx->sha512.count_hi = ictx->count[1];
-
-	memcpy(sctx->state, ictx->state, sizeof(ictx->state));
-	sctx->func = CPACF_KIMD_SHA_512;
-	return 0;
-}
-
-static struct shash_alg sha512_alg = {
-	.digestsize	=	SHA512_DIGEST_SIZE,
-	.init		=	sha512_init,
-	.update		=	s390_sha_update_blocks,
-	.finup		=	s390_sha_finup,
-	.export		=	sha512_export,
-	.import		=	sha512_import,
-	.descsize	=	sizeof(struct s390_sha_ctx),
-	.statesize	=	SHA512_STATE_SIZE,
-	.base		=	{
-		.cra_name	=	"sha512",
-		.cra_driver_name=	"sha512-s390",
-		.cra_priority	=	300,
-		.cra_flags	=	CRYPTO_AHASH_ALG_BLOCK_ONLY |
-					CRYPTO_AHASH_ALG_FINUP_MAX,
-		.cra_blocksize	=	SHA512_BLOCK_SIZE,
-		.cra_module	=	THIS_MODULE,
-	}
-};
-
-MODULE_ALIAS_CRYPTO("sha512");
-
-static int sha384_init(struct shash_desc *desc)
-{
-	struct s390_sha_ctx *ctx = shash_desc_ctx(desc);
-
-	ctx->sha512.state[0] = SHA384_H0;
-	ctx->sha512.state[1] = SHA384_H1;
-	ctx->sha512.state[2] = SHA384_H2;
-	ctx->sha512.state[3] = SHA384_H3;
-	ctx->sha512.state[4] = SHA384_H4;
-	ctx->sha512.state[5] = SHA384_H5;
-	ctx->sha512.state[6] = SHA384_H6;
-	ctx->sha512.state[7] = SHA384_H7;
-	ctx->count = 0;
-	ctx->sha512.count_hi = 0;
-	ctx->func = CPACF_KIMD_SHA_512;
-
-	return 0;
-}
-
-static struct shash_alg sha384_alg = {
-	.digestsize	=	SHA384_DIGEST_SIZE,
-	.init		=	sha384_init,
-	.update		=	s390_sha_update_blocks,
-	.finup		=	s390_sha_finup,
-	.export		=	sha512_export,
-	.import		=	sha512_import,
-	.descsize	=	sizeof(struct s390_sha_ctx),
-	.statesize	=	SHA512_STATE_SIZE,
-	.base		=	{
-		.cra_name	=	"sha384",
-		.cra_driver_name=	"sha384-s390",
-		.cra_priority	=	300,
-		.cra_blocksize	=	SHA384_BLOCK_SIZE,
-		.cra_flags	=	CRYPTO_AHASH_ALG_BLOCK_ONLY |
-					CRYPTO_AHASH_ALG_FINUP_MAX,
-		.cra_ctxsize	=	sizeof(struct s390_sha_ctx),
-		.cra_module	=	THIS_MODULE,
-	}
-};
-
-MODULE_ALIAS_CRYPTO("sha384");
-
-static int __init init(void)
-{
-	int ret;
-
-	if (!cpacf_query_func(CPACF_KIMD, CPACF_KIMD_SHA_512))
-		return -ENODEV;
-	if ((ret = crypto_register_shash(&sha512_alg)) < 0)
-		goto out;
-	if ((ret = crypto_register_shash(&sha384_alg)) < 0)
-		crypto_unregister_shash(&sha512_alg);
-out:
-	return ret;
-}
-
-static void __exit fini(void)
-{
-	crypto_unregister_shash(&sha512_alg);
-	crypto_unregister_shash(&sha384_alg);
-}
-
-module_cpu_feature_match(S390_CPU_FEATURE_MSA, init);
-module_exit(fini);
-
-MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION("SHA512 and SHA-384 Secure Hash Algorithm");
diff --git a/arch/s390/crypto/sha_common.c b/arch/s390/crypto/sha_common.c
index b5e2c365ea05..d6f839618794 100644
--- a/arch/s390/crypto/sha_common.c
+++ b/arch/s390/crypto/sha_common.c
@@ -9,6 +9,7 @@
  */
 
 #include <crypto/internal/hash.h>
+#include <linux/export.h>
 #include <linux/module.h>
 #include <asm/cpacf.h>
 #include "sha.h"
diff --git a/arch/s390/hypfs/hypfs.h b/arch/s390/hypfs/hypfs.h
index 83ebf54cca6b..4dc2e068e0ff 100644
--- a/arch/s390/hypfs/hypfs.h
+++ b/arch/s390/hypfs/hypfs.h
@@ -48,7 +48,7 @@ void hypfs_sprp_exit(void);
 
 int __hypfs_fs_init(void);
 
-static inline int hypfs_fs_init(void)
+static __always_inline int hypfs_fs_init(void)
 {
 	if (IS_ENABLED(CONFIG_S390_HYPFS_FS))
 		return __hypfs_fs_init();
diff --git a/arch/s390/hypfs/hypfs_diag.h b/arch/s390/hypfs/hypfs_diag.h
index 7090eff27fef..b5218135b8fe 100644
--- a/arch/s390/hypfs/hypfs_diag.h
+++ b/arch/s390/hypfs/hypfs_diag.h
@@ -19,7 +19,7 @@ int diag204_store(void *buf, int pages);
 int __hypfs_diag_fs_init(void);
 void __hypfs_diag_fs_exit(void);
 
-static inline int hypfs_diag_fs_init(void)
+static __always_inline int hypfs_diag_fs_init(void)
 {
 	if (IS_ENABLED(CONFIG_S390_HYPFS_FS))
 		return __hypfs_diag_fs_init();
diff --git a/arch/s390/include/asm/alternative.h b/arch/s390/include/asm/alternative.h
index c7bf60a541e9..1c56480def9e 100644
--- a/arch/s390/include/asm/alternative.h
+++ b/arch/s390/include/asm/alternative.h
@@ -51,7 +51,7 @@
 					 ALT_TYPE_SPEC << ALT_TYPE_SHIFT	| \
 					 (facility) << ALT_DATA_SHIFT)
 
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
 
 #include <linux/types.h>
 #include <linux/stddef.h>
@@ -183,7 +183,7 @@ static inline void apply_alternatives(struct alt_instr *start, struct alt_instr
 /* Use this macro if clobbers are needed without inputs. */
 #define ASM_NO_INPUT_CLOBBER(clobber...) : clobber
 
-#else  /* __ASSEMBLY__ */
+#else  /* __ASSEMBLER__ */
 
 /*
  * Issue one struct alt_instr descriptor entry (need to put it into
@@ -233,6 +233,6 @@ static inline void apply_alternatives(struct alt_instr *start, struct alt_instr
 	.popsection
 .endm
 
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
 
 #endif /* _ASM_S390_ALTERNATIVE_H */
diff --git a/arch/s390/include/asm/ap.h b/arch/s390/include/asm/ap.h
index 395b02d6a133..352108727d7e 100644
--- a/arch/s390/include/asm/ap.h
+++ b/arch/s390/include/asm/ap.h
@@ -103,7 +103,7 @@ struct ap_tapq_hwinfo {
 			unsigned int accel :  1; /* A */
 			unsigned int ep11  :  1; /* X */
 			unsigned int apxa  :  1; /* APXA */
-			unsigned int	   :  1;
+			unsigned int slcf  :  1; /* Cmd filtering avail. */
 			unsigned int class :  8;
 			unsigned int bs	   :  2; /* SE bind/assoc */
 			unsigned int	   : 14;
diff --git a/arch/s390/include/asm/asm-const.h b/arch/s390/include/asm/asm-const.h
index 11f615eb0066..1cfffad9eea0 100644
--- a/arch/s390/include/asm/asm-const.h
+++ b/arch/s390/include/asm/asm-const.h
@@ -2,7 +2,7 @@
 #ifndef _ASM_S390_ASM_CONST_H
 #define _ASM_S390_ASM_CONST_H
 
-#ifdef __ASSEMBLY__
+#ifdef __ASSEMBLER__
 #  define stringify_in_c(...)	__VA_ARGS__
 #else
 /* This version of stringify will deal with commas... */
diff --git a/arch/s390/include/asm/cpacf.h b/arch/s390/include/asm/cpacf.h
index 54cb97603ec0..4bc5317fbb12 100644
--- a/arch/s390/include/asm/cpacf.h
+++ b/arch/s390/include/asm/cpacf.h
@@ -129,6 +129,10 @@
 #define CPACF_KMAC_HMAC_SHA_256	0x71
 #define CPACF_KMAC_HMAC_SHA_384	0x72
 #define CPACF_KMAC_HMAC_SHA_512	0x73
+#define CPACF_KMAC_PHMAC_SHA_224	0x78
+#define CPACF_KMAC_PHMAC_SHA_256	0x79
+#define CPACF_KMAC_PHMAC_SHA_384	0x7a
+#define CPACF_KMAC_PHMAC_SHA_512	0x7b
 
 /*
  * Function codes for the PCKMO (PERFORM CRYPTOGRAPHIC KEY MANAGEMENT)
diff --git a/arch/s390/include/asm/cpu.h b/arch/s390/include/asm/cpu.h
index 26c710cd3485..5672e3fab52b 100644
--- a/arch/s390/include/asm/cpu.h
+++ b/arch/s390/include/asm/cpu.h
@@ -9,7 +9,7 @@
 #ifndef _ASM_S390_CPU_H
 #define _ASM_S390_CPU_H
 
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
 
 #include <linux/types.h>
 #include <linux/jump_label.h>
@@ -24,5 +24,5 @@ struct cpuid
 
 DECLARE_STATIC_KEY_FALSE(cpu_has_bear);
 
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
 #endif /* _ASM_S390_CPU_H */
diff --git a/arch/s390/include/asm/cpu_mf-insn.h b/arch/s390/include/asm/cpu_mf-insn.h
index a68b362e0964..941663939cc7 100644
--- a/arch/s390/include/asm/cpu_mf-insn.h
+++ b/arch/s390/include/asm/cpu_mf-insn.h
@@ -8,7 +8,7 @@
 #ifndef _ASM_S390_CPU_MF_INSN_H
 #define _ASM_S390_CPU_MF_INSN_H
 
-#ifdef __ASSEMBLY__
+#ifdef __ASSEMBLER__
 
 /* Macro to generate the STCCTM instruction with a customized
  * M3 field designating the counter set.
@@ -17,6 +17,6 @@
 	.insn	rsy,0xeb0000000017,\r1,\m3 & 0xf,\db2
 .endm
 
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
 
 #endif
diff --git a/arch/s390/include/asm/ctlreg.h b/arch/s390/include/asm/ctlreg.h
index e6527f51ad0b..e93cc240a1ed 100644
--- a/arch/s390/include/asm/ctlreg.h
+++ b/arch/s390/include/asm/ctlreg.h
@@ -80,7 +80,7 @@
 #define CR14_EXTERNAL_DAMAGE_SUBMASK		BIT(CR14_EXTERNAL_DAMAGE_SUBMASK_BIT)
 #define CR14_WARNING_SUBMASK			BIT(CR14_WARNING_SUBMASK_BIT)
 
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
 
 #include <linux/bug.h>
 
@@ -252,5 +252,5 @@ union ctlreg15 {
 	};
 };
 
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
 #endif /* __ASM_S390_CTLREG_H */
diff --git a/arch/s390/include/asm/dwarf.h b/arch/s390/include/asm/dwarf.h
index 390906b8e386..e3ad6798d0cd 100644
--- a/arch/s390/include/asm/dwarf.h
+++ b/arch/s390/include/asm/dwarf.h
@@ -2,7 +2,7 @@
 #ifndef _ASM_S390_DWARF_H
 #define _ASM_S390_DWARF_H
 
-#ifdef __ASSEMBLY__
+#ifdef __ASSEMBLER__
 
 #define CFI_STARTPROC		.cfi_startproc
 #define CFI_ENDPROC		.cfi_endproc
@@ -33,6 +33,6 @@
 	.cfi_sections .eh_frame, .debug_frame
 #endif
 
-#endif	/* __ASSEMBLY__ */
+#endif	/* __ASSEMBLER__ */
 
 #endif	/* _ASM_S390_DWARF_H */
diff --git a/arch/s390/include/asm/entry-common.h b/arch/s390/include/asm/entry-common.h
index 35555c944630..979af986a8fe 100644
--- a/arch/s390/include/asm/entry-common.h
+++ b/arch/s390/include/asm/entry-common.h
@@ -59,4 +59,14 @@ static inline void arch_exit_to_user_mode_prepare(struct pt_regs *regs,
 
 #define arch_exit_to_user_mode_prepare arch_exit_to_user_mode_prepare
 
+static __always_inline bool arch_in_rcu_eqs(void)
+{
+	if (IS_ENABLED(CONFIG_KVM))
+		return current->flags & PF_VCPU;
+
+	return false;
+}
+
+#define arch_in_rcu_eqs arch_in_rcu_eqs
+
 #endif
diff --git a/arch/s390/include/asm/extmem.h b/arch/s390/include/asm/extmem.h
index e0a06060afdd..225ee89c3f5e 100644
--- a/arch/s390/include/asm/extmem.h
+++ b/arch/s390/include/asm/extmem.h
@@ -6,7 +6,7 @@
 
 #ifndef _ASM_S390X_DCSS_H
 #define _ASM_S390X_DCSS_H
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
 
 /*
  * DCSS segment is defined as a contiguous range of pages using DEFSEG command.
diff --git a/arch/s390/include/asm/fpu-insn-asm.h b/arch/s390/include/asm/fpu-insn-asm.h
index d296322be4bc..cc0468fdf2d0 100644
--- a/arch/s390/include/asm/fpu-insn-asm.h
+++ b/arch/s390/include/asm/fpu-insn-asm.h
@@ -16,7 +16,7 @@
 #error only <asm/fpu-insn.h> can be included directly
 #endif
 
-#ifdef __ASSEMBLY__
+#ifdef __ASSEMBLER__
 
 /* Macros to generate vector instruction byte code */
 
@@ -750,5 +750,5 @@
 	MRXBOPC	0, 0x77, v1, v2, v3
 .endm
 
-#endif	/* __ASSEMBLY__ */
+#endif	/* __ASSEMBLER__ */
 #endif	/* __ASM_S390_FPU_INSN_ASM_H */
diff --git a/arch/s390/include/asm/fpu-insn.h b/arch/s390/include/asm/fpu-insn.h
index f668bffd6dd3..135bb89c0a89 100644
--- a/arch/s390/include/asm/fpu-insn.h
+++ b/arch/s390/include/asm/fpu-insn.h
@@ -9,7 +9,7 @@
 
 #include <asm/fpu-insn-asm.h>
 
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
 
 #include <linux/instrumented.h>
 #include <asm/asm-extable.h>
@@ -475,5 +475,5 @@ static __always_inline void fpu_vzero(u8 v)
 		     : "memory");
 }
 
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
 #endif	/* __ASM_S390_FPU_INSN_H */
diff --git a/arch/s390/include/asm/ftrace.h b/arch/s390/include/asm/ftrace.h
index 185331e91f83..bee2d16c2951 100644
--- a/arch/s390/include/asm/ftrace.h
+++ b/arch/s390/include/asm/ftrace.h
@@ -5,7 +5,7 @@
 #define ARCH_SUPPORTS_FTRACE_OPS 1
 #define MCOUNT_INSN_SIZE	6
 
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
 #include <asm/stacktrace.h>
 
 static __always_inline unsigned long return_address(unsigned int n)
@@ -134,7 +134,7 @@ void ftrace_graph_func(unsigned long ip, unsigned long parent_ip,
 		       struct ftrace_ops *op, struct ftrace_regs *fregs);
 #define ftrace_graph_func ftrace_graph_func
 
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
 
 #ifdef CONFIG_FUNCTION_TRACER
 
diff --git a/arch/s390/include/asm/gmap.h b/arch/s390/include/asm/gmap.h
index 9f2814d0e1e9..66c5808fd011 100644
--- a/arch/s390/include/asm/gmap.h
+++ b/arch/s390/include/asm/gmap.h
@@ -110,7 +110,6 @@ int gmap_map_segment(struct gmap *gmap, unsigned long from,
 int gmap_unmap_segment(struct gmap *gmap, unsigned long to, unsigned long len);
 unsigned long __gmap_translate(struct gmap *, unsigned long gaddr);
 int __gmap_link(struct gmap *gmap, unsigned long gaddr, unsigned long vmaddr);
-void gmap_discard(struct gmap *, unsigned long from, unsigned long to);
 void __gmap_zap(struct gmap *, unsigned long gaddr);
 void gmap_unlink(struct mm_struct *, unsigned long *table, unsigned long vmaddr);
 
@@ -134,7 +133,6 @@ int gmap_protect_one(struct gmap *gmap, unsigned long gaddr, int prot, unsigned
 
 void gmap_sync_dirty_log_pmd(struct gmap *gmap, unsigned long dirty_bitmap[4],
 			     unsigned long gaddr, unsigned long vmaddr);
-int s390_disable_cow_sharing(void);
 int s390_replace_asce(struct gmap *gmap);
 void s390_uv_destroy_pfns(unsigned long count, unsigned long *pfns);
 int __s390_uv_destroy_range(struct mm_struct *mm, unsigned long start,
diff --git a/arch/s390/include/asm/gmap_helpers.h b/arch/s390/include/asm/gmap_helpers.h
new file mode 100644
index 000000000000..5356446a61c4
--- /dev/null
+++ b/arch/s390/include/asm/gmap_helpers.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ *  Helper functions for KVM guest address space mapping code
+ *
+ *    Copyright IBM Corp. 2025
+ */
+
+#ifndef _ASM_S390_GMAP_HELPERS_H
+#define _ASM_S390_GMAP_HELPERS_H
+
+void gmap_helper_zap_one_page(struct mm_struct *mm, unsigned long vmaddr);
+void gmap_helper_discard(struct mm_struct *mm, unsigned long vmaddr, unsigned long end);
+int gmap_helper_disable_cow_sharing(void);
+
+#endif /* _ASM_S390_GMAP_HELPERS_H */
diff --git a/arch/s390/include/asm/irq.h b/arch/s390/include/asm/irq.h
index bde6a496df5f..697497e7d13e 100644
--- a/arch/s390/include/asm/irq.h
+++ b/arch/s390/include/asm/irq.h
@@ -25,7 +25,7 @@
 #define EXT_IRQ_CP_SERVICE	0x2603
 #define EXT_IRQ_IUCV		0x4000
 
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
 
 #include <linux/hardirq.h>
 #include <linux/percpu.h>
@@ -120,6 +120,6 @@ void irq_subclass_unregister(enum irq_subclass subclass);
 
 #define irq_canonicalize(irq)  (irq)
 
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
 
 #endif /* _ASM_IRQ_H */
diff --git a/arch/s390/include/asm/jump_label.h b/arch/s390/include/asm/jump_label.h
index bf78cf381dfc..d9cbc18f6b2e 100644
--- a/arch/s390/include/asm/jump_label.h
+++ b/arch/s390/include/asm/jump_label.h
@@ -4,7 +4,7 @@
 
 #define HAVE_JUMP_LABEL_BATCH
 
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
 
 #include <linux/types.h>
 #include <linux/stringify.h>
@@ -51,5 +51,5 @@ label:
 	return true;
 }
 
-#endif  /* __ASSEMBLY__ */
+#endif  /* __ASSEMBLER__ */
 #endif
diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index cb89e54ada25..f870d09515cc 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -716,6 +716,9 @@ extern char sie_exit;
 bool kvm_s390_pv_is_protected(struct kvm *kvm);
 bool kvm_s390_pv_cpu_is_protected(struct kvm_vcpu *vcpu);
 
+extern int kvm_s390_enter_exit_sie(struct kvm_s390_sie_block *scb,
+				   u64 *gprs, unsigned long gasce);
+
 extern int kvm_s390_gisc_register(struct kvm *kvm, u32 gisc);
 extern int kvm_s390_gisc_unregister(struct kvm *kvm, u32 gisc);
 
diff --git a/arch/s390/include/asm/lowcore.h b/arch/s390/include/asm/lowcore.h
index e99e9c87b1ce..d9c853db9a40 100644
--- a/arch/s390/include/asm/lowcore.h
+++ b/arch/s390/include/asm/lowcore.h
@@ -22,7 +22,7 @@
 
 #define LOWCORE_ALT_ADDRESS	_AC(0x70000, UL)
 
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
 
 struct pgm_tdb {
 	u64 data[32];
@@ -237,7 +237,7 @@ static inline void set_prefix(__u32 address)
 	asm volatile("spx %0" : : "Q" (address) : "memory");
 }
 
-#else /* __ASSEMBLY__ */
+#else /* __ASSEMBLER__ */
 
 .macro GET_LC reg
 	ALTERNATIVE "lghi	\reg,0",					\
@@ -251,5 +251,5 @@ static inline void set_prefix(__u32 address)
 		ALT_FEATURE(MFEATURE_LOWCORE)
 .endm
 
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
 #endif /* _ASM_S390_LOWCORE_H */
diff --git a/arch/s390/include/asm/machine.h b/arch/s390/include/asm/machine.h
index 8abe5afdbfc4..9bd4a9dc7778 100644
--- a/arch/s390/include/asm/machine.h
+++ b/arch/s390/include/asm/machine.h
@@ -20,7 +20,7 @@
 #define MFEATURE_LPAR		9
 #define MFEATURE_DIAG288	10
 
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
 
 #include <linux/bitops.h>
 #include <asm/alternative.h>
@@ -100,5 +100,5 @@ DEFINE_MACHINE_HAS_FEATURE(lpar, MFEATURE_LPAR)
 #define machine_is_kvm	machine_has_kvm
 #define machine_is_lpar	machine_has_lpar
 
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
 #endif /* __ASM_S390_MACHINE_H */
diff --git a/arch/s390/include/asm/mem_encrypt.h b/arch/s390/include/asm/mem_encrypt.h
index b85e13505a0f..28c83ec1f243 100644
--- a/arch/s390/include/asm/mem_encrypt.h
+++ b/arch/s390/include/asm/mem_encrypt.h
@@ -2,11 +2,11 @@
 #ifndef S390_MEM_ENCRYPT_H__
 #define S390_MEM_ENCRYPT_H__
 
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
 
 int set_memory_encrypted(unsigned long vaddr, int numpages);
 int set_memory_decrypted(unsigned long vaddr, int numpages);
 
-#endif	/* __ASSEMBLY__ */
+#endif	/* __ASSEMBLER__ */
 
 #endif	/* S390_MEM_ENCRYPT_H__ */
diff --git a/arch/s390/include/asm/nmi.h b/arch/s390/include/asm/nmi.h
index 227466ce9e41..6454c1531854 100644
--- a/arch/s390/include/asm/nmi.h
+++ b/arch/s390/include/asm/nmi.h
@@ -33,7 +33,7 @@
 #define MCCK_CODE_FC_VALID		BIT(63 - 43)
 #define MCCK_CODE_CPU_TIMER_VALID	BIT(63 - 46)
 
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
 
 union mci {
 	unsigned long val;
@@ -104,5 +104,5 @@ void nmi_free_mcesa(u64 *mcesad);
 void s390_handle_mcck(void);
 void s390_do_machine_check(struct pt_regs *regs);
 
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
 #endif /* _ASM_S390_NMI_H */
diff --git a/arch/s390/include/asm/nospec-branch.h b/arch/s390/include/asm/nospec-branch.h
index 192835a3e24d..81c4813cff18 100644
--- a/arch/s390/include/asm/nospec-branch.h
+++ b/arch/s390/include/asm/nospec-branch.h
@@ -2,7 +2,7 @@
 #ifndef _ASM_S390_EXPOLINE_H
 #define _ASM_S390_EXPOLINE_H
 
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
 
 #include <linux/types.h>
 #include <asm/facility.h>
@@ -26,8 +26,6 @@ static inline bool nospec_uses_trampoline(void)
 	return __is_defined(CC_USING_EXPOLINE) && !nospec_disable;
 }
 
-#ifdef CONFIG_EXPOLINE_EXTERN
-
 void __s390_indirect_jump_r1(void);
 void __s390_indirect_jump_r2(void);
 void __s390_indirect_jump_r3(void);
@@ -44,8 +42,6 @@ void __s390_indirect_jump_r13(void);
 void __s390_indirect_jump_r14(void);
 void __s390_indirect_jump_r15(void);
 
-#endif
-
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
 
 #endif /* _ASM_S390_EXPOLINE_H */
diff --git a/arch/s390/include/asm/nospec-insn.h b/arch/s390/include/asm/nospec-insn.h
index cb15dd25bf21..6ce6b56e282b 100644
--- a/arch/s390/include/asm/nospec-insn.h
+++ b/arch/s390/include/asm/nospec-insn.h
@@ -3,9 +3,10 @@
 #define _ASM_S390_NOSPEC_ASM_H
 
 #include <linux/linkage.h>
+#include <linux/export.h>
 #include <asm/dwarf.h>
 
-#ifdef __ASSEMBLY__
+#ifdef __ASSEMBLER__
 
 #ifdef CC_USING_EXPOLINE
 
@@ -128,6 +129,6 @@
 	.endm
 #endif /* CC_USING_EXPOLINE */
 
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
 
 #endif /* _ASM_S390_NOSPEC_ASM_H */
diff --git a/arch/s390/include/asm/page.h b/arch/s390/include/asm/page.h
index 4e5dbabdf202..9240a363c893 100644
--- a/arch/s390/include/asm/page.h
+++ b/arch/s390/include/asm/page.h
@@ -33,7 +33,7 @@
 #define HAVE_ARCH_HUGETLB_UNMAPPED_AREA
 
 #include <asm/setup.h>
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
 
 void __storage_key_init_range(unsigned long start, unsigned long end);
 
@@ -130,11 +130,19 @@ typedef pte_t *pgtable_t;
 static inline void page_set_storage_key(unsigned long addr,
 					unsigned char skey, int mapped)
 {
-	if (!mapped)
-		asm volatile(".insn rrf,0xb22b0000,%0,%1,8,0"
-			     : : "d" (skey), "a" (addr));
-	else
-		asm volatile("sske %0,%1" : : "d" (skey), "a" (addr));
+	if (!mapped) {
+		asm volatile(
+			"	.insn	rrf,0xb22b0000,%[skey],%[addr],8,0"
+			:
+			: [skey] "d" (skey), [addr] "a" (addr)
+			: "memory");
+	} else {
+		asm volatile(
+			"	sske	 %[skey],%[addr]"
+			:
+			: [skey] "d" (skey), [addr] "a" (addr)
+			: "memory");
+	}
 }
 
 static inline unsigned char page_get_storage_key(unsigned long addr)
@@ -274,7 +282,7 @@ static inline unsigned long virt_to_pfn(const void *kaddr)
 
 #define VM_DATA_DEFAULT_FLAGS	VM_DATA_FLAGS_NON_EXEC
 
-#endif /* !__ASSEMBLY__ */
+#endif /* !__ASSEMBLER__ */
 
 #include <asm-generic/memory_model.h>
 #include <asm-generic/getorder.h>
diff --git a/arch/s390/include/asm/pci_dma.h b/arch/s390/include/asm/pci_dma.h
index 42d7cc4262ca..d12e17201661 100644
--- a/arch/s390/include/asm/pci_dma.h
+++ b/arch/s390/include/asm/pci_dma.h
@@ -25,6 +25,7 @@ enum zpci_ioat_dtype {
 #define ZPCI_KEY			(PAGE_DEFAULT_KEY << 5)
 
 #define ZPCI_TABLE_SIZE_RT	(1UL << 42)
+#define ZPCI_TABLE_SIZE_RS	(1UL << 53)
 
 #define ZPCI_IOTA_STO_FLAG	(ZPCI_IOTA_IOT_ENABLED | ZPCI_KEY | ZPCI_IOTA_DT_ST)
 #define ZPCI_IOTA_RTTO_FLAG	(ZPCI_IOTA_IOT_ENABLED | ZPCI_KEY | ZPCI_IOTA_DT_RT)
@@ -55,6 +56,8 @@ enum zpci_ioat_dtype {
 #define ZPCI_PT_BITS			8
 #define ZPCI_ST_SHIFT			(ZPCI_PT_BITS + PAGE_SHIFT)
 #define ZPCI_RT_SHIFT			(ZPCI_ST_SHIFT + ZPCI_TABLE_BITS)
+#define ZPCI_RS_SHIFT			(ZPCI_RT_SHIFT + ZPCI_TABLE_BITS)
+#define ZPCI_RF_SHIFT			(ZPCI_RS_SHIFT + ZPCI_TABLE_BITS)
 
 #define ZPCI_RTE_FLAG_MASK		0x3fffUL
 #define ZPCI_RTE_ADDR_MASK		(~ZPCI_RTE_FLAG_MASK)
diff --git a/arch/s390/include/asm/percpu.h b/arch/s390/include/asm/percpu.h
index 84f6b8357b45..96af7d964014 100644
--- a/arch/s390/include/asm/percpu.h
+++ b/arch/s390/include/asm/percpu.h
@@ -16,10 +16,9 @@
  * For 64 bit module code, the module may be more than 4G above the
  * per cpu area, use weak definitions to force the compiler to
  * generate external references.
+ * Therefore, we have enabled CONFIG_ARCH_MODULE_NEEDS_WEAK_PER_CPU
+ * in the Kconfig.
  */
-#if defined(MODULE)
-#define ARCH_NEEDS_WEAK_PER_CPU
-#endif
 
 /*
  * We use a compare-and-swap loop since that uses less cpu cycles than
diff --git a/arch/s390/include/asm/pgalloc.h b/arch/s390/include/asm/pgalloc.h
index 005497ffebda..5345398df653 100644
--- a/arch/s390/include/asm/pgalloc.h
+++ b/arch/s390/include/asm/pgalloc.h
@@ -97,7 +97,7 @@ static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long vmaddr)
 	if (!table)
 		return NULL;
 	crst_table_init(table, _SEGMENT_ENTRY_EMPTY);
-	if (!pagetable_pmd_ctor(virt_to_ptdesc(table))) {
+	if (!pagetable_pmd_ctor(mm, virt_to_ptdesc(table))) {
 		crst_table_free(mm, table);
 		return NULL;
 	}
diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h
index f8a6b54986ec..c1a7a92f0575 100644
--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@ -915,7 +915,7 @@ static inline int pmd_protnone(pmd_t pmd)
 }
 #endif
 
-static inline int pte_swp_exclusive(pte_t pte)
+static inline bool pte_swp_exclusive(pte_t pte)
 {
 	return pte_val(pte) & _PAGE_SWP_EXCLUSIVE;
 }
@@ -963,6 +963,12 @@ static inline pmd_t pmd_clear_soft_dirty(pmd_t pmd)
 	return clear_pmd_bit(pmd, __pgprot(_SEGMENT_ENTRY_SOFT_DIRTY));
 }
 
+#ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION
+#define pmd_swp_soft_dirty(pmd)		pmd_soft_dirty(pmd)
+#define pmd_swp_mksoft_dirty(pmd)	pmd_mksoft_dirty(pmd)
+#define pmd_swp_clear_soft_dirty(pmd)	pmd_clear_soft_dirty(pmd)
+#endif
+
 /*
  * query functions pte_write/pte_dirty/pte_young only work if
  * pte_present() is true. Undefined behaviour if not..
@@ -1448,16 +1454,6 @@ static inline pte_t mk_pte_phys(unsigned long physpage, pgprot_t pgprot)
 	return pte_mkyoung(__pte);
 }
 
-static inline pte_t mk_pte(struct page *page, pgprot_t pgprot)
-{
-	unsigned long physpage = page_to_phys(page);
-	pte_t __pte = mk_pte_phys(physpage, pgprot);
-
-	if (pte_write(__pte) && PageDirty(page))
-		__pte = pte_mkdirty(__pte);
-	return __pte;
-}
-
 #define pgd_index(address) (((address) >> PGDIR_SHIFT) & (PTRS_PER_PGD-1))
 #define p4d_index(address) (((address) >> P4D_SHIFT) & (PTRS_PER_P4D-1))
 #define pud_index(address) (((address) >> PUD_SHIFT) & (PTRS_PER_PUD-1))
@@ -1879,7 +1875,6 @@ static inline pmd_t pmdp_collapse_flush(struct vm_area_struct *vma,
 #define pmdp_collapse_flush pmdp_collapse_flush
 
 #define pfn_pmd(pfn, pgprot)	mk_pmd_phys(((pfn) << PAGE_SHIFT), (pgprot))
-#define mk_pmd(page, pgprot)	pfn_pmd(page_to_pfn(page), (pgprot))
 
 static inline int pmd_trans_huge(pmd_t pmd)
 {
@@ -1990,6 +1985,45 @@ static inline unsigned long __swp_offset_rste(swp_entry_t entry)
 
 #define __rste_to_swp_entry(rste)	((swp_entry_t) { rste })
 
+/*
+ * s390 has different layout for PTE and region / segment table entries (RSTE).
+ * This is also true for swap entries, and their swap type and offset encoding.
+ * For hugetlbfs PTE_MARKER support, s390 has internal __swp_type_rste() and
+ * __swp_offset_rste() helpers to correctly handle RSTE swap entries.
+ *
+ * But common swap code does not know about this difference, and only uses
+ * __swp_type(), __swp_offset() and __swp_entry() helpers for conversion between
+ * arch-dependent and arch-independent representation of swp_entry_t for all
+ * pagetable levels. On s390, those helpers only work for PTE swap entries.
+ *
+ * Therefore, implement __pmd_to_swp_entry() to build a fake PTE swap entry
+ * and return the arch-dependent representation of that. Correspondingly,
+ * implement __swp_entry_to_pmd() to convert that into a proper PMD swap
+ * entry again. With this, the arch-dependent swp_entry_t representation will
+ * always look like a PTE swap entry in common code.
+ *
+ * This is somewhat similar to fake PTEs in hugetlbfs code for s390, but only
+ * requires conversion of the swap type and offset, and not all the possible
+ * PTE bits.
+ */
+static inline swp_entry_t __pmd_to_swp_entry(pmd_t pmd)
+{
+	swp_entry_t arch_entry;
+	pte_t pte;
+
+	arch_entry = __rste_to_swp_entry(pmd_val(pmd));
+	pte = mk_swap_pte(__swp_type_rste(arch_entry), __swp_offset_rste(arch_entry));
+	return __pte_to_swp_entry(pte);
+}
+
+static inline pmd_t __swp_entry_to_pmd(swp_entry_t arch_entry)
+{
+	pmd_t pmd;
+
+	pmd = __pmd(mk_swap_rste(__swp_type(arch_entry), __swp_offset(arch_entry)));
+	return pmd;
+}
+
 extern int vmem_add_mapping(unsigned long start, unsigned long size);
 extern void vmem_remove_mapping(unsigned long start, unsigned long size);
 extern int __vmem_map_4k_page(unsigned long addr, unsigned long phys, pgprot_t prot, bool alloc);
diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h
index 6c8063cb8fe7..6a9c08b80eda 100644
--- a/arch/s390/include/asm/processor.h
+++ b/arch/s390/include/asm/processor.h
@@ -26,7 +26,7 @@
 
 #define RESTART_FLAG_CTLREGS	_AC(1 << 0, U)
 
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
 
 #include <linux/cpumask.h>
 #include <linux/linkage.h>
@@ -418,6 +418,6 @@ static __always_inline void bpon(void)
 		);
 }
 
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
 
 #endif /* __ASM_S390_PROCESSOR_H */
diff --git a/arch/s390/include/asm/ptrace.h b/arch/s390/include/asm/ptrace.h
index 62c0ab4a4b9d..dfa770b15fad 100644
--- a/arch/s390/include/asm/ptrace.h
+++ b/arch/s390/include/asm/ptrace.h
@@ -54,7 +54,7 @@
 			 PSW_DEFAULT_KEY | PSW_MASK_BASE | PSW_MASK_MCHECK | \
 			 PSW_MASK_PSTATE | PSW_ASC_PRIMARY)
 
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
 
 struct psw_bits {
 	unsigned long	     :	1;
@@ -265,7 +265,7 @@ static __always_inline unsigned long regs_get_kernel_stack_nth(struct pt_regs *r
 	addr = kernel_stack_pointer(regs) + n * sizeof(long);
 	if (!regs_within_kernel_stack(regs, addr))
 		return 0;
-	return READ_ONCE_NOCHECK(addr);
+	return READ_ONCE_NOCHECK(*(unsigned long *)addr);
 }
 
 /**
@@ -292,5 +292,5 @@ static inline void regs_set_return_value(struct pt_regs *regs, unsigned long rc)
 	regs->gprs[2] = rc;
 }
 
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
 #endif /* _S390_PTRACE_H */
diff --git a/arch/s390/include/asm/purgatory.h b/arch/s390/include/asm/purgatory.h
index e297bcfc476f..4c7a43bc43a1 100644
--- a/arch/s390/include/asm/purgatory.h
+++ b/arch/s390/include/asm/purgatory.h
@@ -7,11 +7,11 @@
 
 #ifndef _S390_PURGATORY_H_
 #define _S390_PURGATORY_H_
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
 
 #include <linux/purgatory.h>
 
 int verify_sha256_digest(void);
 
-#endif	/* __ASSEMBLY__ */
+#endif	/* __ASSEMBLER__ */
 #endif /* _S390_PURGATORY_H_ */
diff --git a/arch/s390/include/asm/sclp.h b/arch/s390/include/asm/sclp.h
index 1e62919bacf4..0f184dbdbe5e 100644
--- a/arch/s390/include/asm/sclp.h
+++ b/arch/s390/include/asm/sclp.h
@@ -21,7 +21,7 @@
 #define SCLP_ERRNOTIFY_AQ_INFO_LOG		2
 #define SCLP_ERRNOTIFY_AQ_OPTICS_DATA		3
 
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
 #include <linux/uio.h>
 #include <asm/chpid.h>
 #include <asm/cpu.h>
@@ -199,5 +199,5 @@ static inline int sclp_get_core_info(struct sclp_core_info *info, int early)
 	return _sclp_get_core_info(info);
 }
 
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
 #endif /* _ASM_S390_SCLP_H */
diff --git a/arch/s390/include/asm/setup.h b/arch/s390/include/asm/setup.h
index 031e881b4d88..7c57ac968bf6 100644
--- a/arch/s390/include/asm/setup.h
+++ b/arch/s390/include/asm/setup.h
@@ -24,7 +24,7 @@
 
 #define LEGACY_COMMAND_LINE_SIZE	896
 
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
 
 #include <asm/lowcore.h>
 #include <asm/types.h>
@@ -41,6 +41,8 @@ struct parmarea {
 	char command_line[COMMAND_LINE_SIZE];		/* 0x10480 */
 };
 
+extern char arch_hw_string[128];
+
 extern struct parmarea parmarea;
 
 extern unsigned int zlib_dfltcc_support;
@@ -100,5 +102,5 @@ static __always_inline u32 gen_lpswe(unsigned long addr)
 	BUILD_BUG_ON(addr > 0xfff);
 	return 0xb2b20000 | addr;
 }
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
 #endif /* _ASM_S390_SETUP_H */
diff --git a/arch/s390/include/asm/sigp.h b/arch/s390/include/asm/sigp.h
index 472943b77066..97d77868f83c 100644
--- a/arch/s390/include/asm/sigp.h
+++ b/arch/s390/include/asm/sigp.h
@@ -36,7 +36,7 @@
 #define SIGP_STATUS_INCORRECT_STATE	0x00000200UL
 #define SIGP_STATUS_NOT_RUNNING		0x00000400UL
 
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
 
 #include <asm/asm.h>
 
@@ -68,6 +68,6 @@ static inline int __pcpu_sigp(u16 addr, u8 order, unsigned long parm,
 	return cc;
 }
 
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
 
 #endif /* __S390_ASM_SIGP_H */
diff --git a/arch/s390/include/asm/skey.h b/arch/s390/include/asm/skey.h
new file mode 100644
index 000000000000..84e7cf28b712
--- /dev/null
+++ b/arch/s390/include/asm/skey.h
@@ -0,0 +1,32 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ASM_SKEY_H
+#define __ASM_SKEY_H
+
+#include <asm/rwonce.h>
+
+struct skey_region {
+	unsigned long start;
+	unsigned long end;
+};
+
+#define SKEY_REGION(_start, _end)			\
+	stringify_in_c(.section .skey_region,"a";)	\
+	stringify_in_c(.balign 8;)			\
+	stringify_in_c(.quad (_start);)			\
+	stringify_in_c(.quad (_end);)			\
+	stringify_in_c(.previous)
+
+extern int skey_regions_initialized;
+extern struct skey_region __skey_region_start[];
+extern struct skey_region __skey_region_end[];
+
+void __skey_regions_initialize(void);
+
+static inline void skey_regions_initialize(void)
+{
+	if (READ_ONCE(skey_regions_initialized))
+		return;
+	__skey_regions_initialize();
+}
+
+#endif /* __ASM_SKEY_H */
diff --git a/arch/s390/include/asm/syscall.h b/arch/s390/include/asm/syscall.h
index 0213ec800b57..bd4cb00ccd5e 100644
--- a/arch/s390/include/asm/syscall.h
+++ b/arch/s390/include/asm/syscall.h
@@ -24,6 +24,18 @@ static inline long syscall_get_nr(struct task_struct *task,
 		(regs->int_code & 0xffff) : -1;
 }
 
+static inline void syscall_set_nr(struct task_struct *task,
+				  struct pt_regs *regs,
+				  int nr)
+{
+	/*
+	 * Unlike syscall_get_nr(), syscall_set_nr() can be called only when
+	 * the target task is stopped for tracing on entering syscall, so
+	 * there is no need to have the same check syscall_get_nr() has.
+	 */
+	regs->int_code = (regs->int_code & ~0xffff) | (nr & 0xffff);
+}
+
 static inline void syscall_rollback(struct task_struct *task,
 				    struct pt_regs *regs)
 {
@@ -76,6 +88,15 @@ static inline void syscall_get_arguments(struct task_struct *task,
 	args[0] = regs->orig_gpr2 & mask;
 }
 
+static inline void syscall_set_arguments(struct task_struct *task,
+					 struct pt_regs *regs,
+					 const unsigned long *args)
+{
+	regs->orig_gpr2 = args[0];
+	for (int n = 1; n < 6; n++)
+		regs->gprs[2 + n] = args[n];
+}
+
 static inline int syscall_get_arch(struct task_struct *task)
 {
 #ifdef CONFIG_COMPAT
diff --git a/arch/s390/include/asm/thread_info.h b/arch/s390/include/asm/thread_info.h
index 391eb04d26d8..f6ed2c8192c8 100644
--- a/arch/s390/include/asm/thread_info.h
+++ b/arch/s390/include/asm/thread_info.h
@@ -24,7 +24,7 @@
 
 #define STACK_INIT_OFFSET (THREAD_SIZE - STACK_FRAME_OVERHEAD - __PT_SIZE)
 
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
 
 /*
  * low level task data that entry.S needs immediate access to
diff --git a/arch/s390/include/asm/timex.h b/arch/s390/include/asm/timex.h
index bed8d0b5a282..59dfb8780f62 100644
--- a/arch/s390/include/asm/timex.h
+++ b/arch/s390/include/asm/timex.h
@@ -196,13 +196,6 @@ static inline unsigned long get_tod_clock_fast(void)
 	asm volatile("stckf %0" : "=Q" (clk) : : "cc");
 	return clk;
 }
-
-static inline cycles_t get_cycles(void)
-{
-	return (cycles_t) get_tod_clock() >> 2;
-}
-#define get_cycles get_cycles
-
 int get_phys_clock(unsigned long *clock);
 void init_cpu_timer(void);
 
@@ -230,6 +223,12 @@ static inline unsigned long get_tod_clock_monotonic(void)
 	return tod;
 }
 
+static inline cycles_t get_cycles(void)
+{
+	return (cycles_t)get_tod_clock_monotonic() >> 2;
+}
+#define get_cycles get_cycles
+
 /**
  * tod_to_ns - convert a TOD format value to nanoseconds
  * @todval: to be converted TOD format value
diff --git a/arch/s390/include/asm/tlb.h b/arch/s390/include/asm/tlb.h
index f20601995bb0..1e50f6f1ad9d 100644
--- a/arch/s390/include/asm/tlb.h
+++ b/arch/s390/include/asm/tlb.h
@@ -36,11 +36,12 @@ static inline bool __tlb_remove_folio_pages(struct mmu_gather *tlb,
 
 #include <asm/tlbflush.h>
 #include <asm-generic/tlb.h>
+#include <asm/gmap.h>
 
 /*
  * Release the page cache reference for a pte removed by
  * tlb_ptep_clear_flush. In both flush modes the tlb for a page cache page
- * has already been freed, so just do free_page_and_swap_cache.
+ * has already been freed, so just do free_folio_and_swap_cache.
  *
  * s390 doesn't delay rmap removal.
  */
@@ -49,7 +50,7 @@ static inline bool __tlb_remove_page_size(struct mmu_gather *tlb,
 {
 	VM_WARN_ON_ONCE(delay_rmap);
 
-	free_page_and_swap_cache(page);
+	free_folio_and_swap_cache(page_folio(page));
 	return false;
 }
 
diff --git a/arch/s390/include/asm/tpi.h b/arch/s390/include/asm/tpi.h
index f76e5fdff23a..71c8b6f76cdd 100644
--- a/arch/s390/include/asm/tpi.h
+++ b/arch/s390/include/asm/tpi.h
@@ -5,7 +5,7 @@
 #include <linux/types.h>
 #include <uapi/asm/schid.h>
 
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
 
 /* I/O-Interruption Code as stored by TEST PENDING INTERRUPTION (TPI). */
 struct tpi_info {
@@ -32,6 +32,6 @@ struct tpi_adapter_info {
 	u32 :27;
 } __packed __aligned(4);
 
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
 
 #endif /* _ASM_S390_TPI_H */
diff --git a/arch/s390/include/asm/types.h b/arch/s390/include/asm/types.h
index 0b5d550a0478..53695b2196f7 100644
--- a/arch/s390/include/asm/types.h
+++ b/arch/s390/include/asm/types.h
@@ -5,7 +5,7 @@
 
 #include <uapi/asm/types.h>
 
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
 
 union register_pair {
 	unsigned __int128 pair;
@@ -15,5 +15,5 @@ union register_pair {
 	};
 };
 
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
 #endif /* _ASM_S390_TYPES_H */
diff --git a/arch/s390/include/asm/uaccess.h b/arch/s390/include/asm/uaccess.h
index a43fc88c0050..3e5b8b677057 100644
--- a/arch/s390/include/asm/uaccess.h
+++ b/arch/s390/include/asm/uaccess.h
@@ -473,188 +473,30 @@ do {									\
 
 void __cmpxchg_user_key_called_with_bad_pointer(void);
 
-#define CMPXCHG_USER_KEY_MAX_LOOPS 128
-
-static __always_inline int __cmpxchg_user_key(unsigned long address, void *uval,
-					      __uint128_t old, __uint128_t new,
-					      unsigned long key, int size)
+int __cmpxchg_user_key1(unsigned long address, unsigned char *uval,
+			unsigned char old, unsigned char new, unsigned long key);
+int __cmpxchg_user_key2(unsigned long address, unsigned short *uval,
+			unsigned short old, unsigned short new, unsigned long key);
+int __cmpxchg_user_key4(unsigned long address, unsigned int *uval,
+			unsigned int old, unsigned int new, unsigned long key);
+int __cmpxchg_user_key8(unsigned long address, unsigned long *uval,
+			unsigned long old, unsigned long new, unsigned long key);
+int __cmpxchg_user_key16(unsigned long address, __uint128_t *uval,
+			 __uint128_t old, __uint128_t new, unsigned long key);
+
+static __always_inline int _cmpxchg_user_key(unsigned long address, void *uval,
+					     __uint128_t old, __uint128_t new,
+					     unsigned long key, int size)
 {
-	bool sacf_flag;
-	int rc = 0;
-
 	switch (size) {
-	case 1: {
-		unsigned int prev, shift, mask, _old, _new;
-		unsigned long count;
-
-		shift = (3 ^ (address & 3)) << 3;
-		address ^= address & 3;
-		_old = ((unsigned int)old & 0xff) << shift;
-		_new = ((unsigned int)new & 0xff) << shift;
-		mask = ~(0xff << shift);
-		sacf_flag = enable_sacf_uaccess();
-		asm_inline volatile(
-			"	spka	0(%[key])\n"
-			"	sacf	256\n"
-			"	llill	%[count],%[max_loops]\n"
-			"0:	l	%[prev],%[address]\n"
-			"1:	nr	%[prev],%[mask]\n"
-			"	xilf	%[mask],0xffffffff\n"
-			"	or	%[new],%[prev]\n"
-			"	or	%[prev],%[tmp]\n"
-			"2:	lr	%[tmp],%[prev]\n"
-			"3:	cs	%[prev],%[new],%[address]\n"
-			"4:	jnl	5f\n"
-			"	xr	%[tmp],%[prev]\n"
-			"	xr	%[new],%[tmp]\n"
-			"	nr	%[tmp],%[mask]\n"
-			"	jnz	5f\n"
-			"	brct	%[count],2b\n"
-			"5:	sacf	768\n"
-			"	spka	%[default_key]\n"
-			EX_TABLE_UA_LOAD_REG(0b, 5b, %[rc], %[prev])
-			EX_TABLE_UA_LOAD_REG(1b, 5b, %[rc], %[prev])
-			EX_TABLE_UA_LOAD_REG(3b, 5b, %[rc], %[prev])
-			EX_TABLE_UA_LOAD_REG(4b, 5b, %[rc], %[prev])
-			: [rc] "+&d" (rc),
-			  [prev] "=&d" (prev),
-			  [address] "+Q" (*(int *)address),
-			  [tmp] "+&d" (_old),
-			  [new] "+&d" (_new),
-			  [mask] "+&d" (mask),
-			  [count] "=a" (count)
-			: [key] "%[count]" (key << 4),
-			  [default_key] "J" (PAGE_DEFAULT_KEY),
-			  [max_loops] "J" (CMPXCHG_USER_KEY_MAX_LOOPS)
-			: "memory", "cc");
-		disable_sacf_uaccess(sacf_flag);
-		*(unsigned char *)uval = prev >> shift;
-		if (!count)
-			rc = -EAGAIN;
-		return rc;
-	}
-	case 2: {
-		unsigned int prev, shift, mask, _old, _new;
-		unsigned long count;
-
-		shift = (2 ^ (address & 2)) << 3;
-		address ^= address & 2;
-		_old = ((unsigned int)old & 0xffff) << shift;
-		_new = ((unsigned int)new & 0xffff) << shift;
-		mask = ~(0xffff << shift);
-		sacf_flag = enable_sacf_uaccess();
-		asm_inline volatile(
-			"	spka	0(%[key])\n"
-			"	sacf	256\n"
-			"	llill	%[count],%[max_loops]\n"
-			"0:	l	%[prev],%[address]\n"
-			"1:	nr	%[prev],%[mask]\n"
-			"	xilf	%[mask],0xffffffff\n"
-			"	or	%[new],%[prev]\n"
-			"	or	%[prev],%[tmp]\n"
-			"2:	lr	%[tmp],%[prev]\n"
-			"3:	cs	%[prev],%[new],%[address]\n"
-			"4:	jnl	5f\n"
-			"	xr	%[tmp],%[prev]\n"
-			"	xr	%[new],%[tmp]\n"
-			"	nr	%[tmp],%[mask]\n"
-			"	jnz	5f\n"
-			"	brct	%[count],2b\n"
-			"5:	sacf	768\n"
-			"	spka	%[default_key]\n"
-			EX_TABLE_UA_LOAD_REG(0b, 5b, %[rc], %[prev])
-			EX_TABLE_UA_LOAD_REG(1b, 5b, %[rc], %[prev])
-			EX_TABLE_UA_LOAD_REG(3b, 5b, %[rc], %[prev])
-			EX_TABLE_UA_LOAD_REG(4b, 5b, %[rc], %[prev])
-			: [rc] "+&d" (rc),
-			  [prev] "=&d" (prev),
-			  [address] "+Q" (*(int *)address),
-			  [tmp] "+&d" (_old),
-			  [new] "+&d" (_new),
-			  [mask] "+&d" (mask),
-			  [count] "=a" (count)
-			: [key] "%[count]" (key << 4),
-			  [default_key] "J" (PAGE_DEFAULT_KEY),
-			  [max_loops] "J" (CMPXCHG_USER_KEY_MAX_LOOPS)
-			: "memory", "cc");
-		disable_sacf_uaccess(sacf_flag);
-		*(unsigned short *)uval = prev >> shift;
-		if (!count)
-			rc = -EAGAIN;
-		return rc;
-	}
-	case 4:	{
-		unsigned int prev = old;
-
-		sacf_flag = enable_sacf_uaccess();
-		asm_inline volatile(
-			"	spka	0(%[key])\n"
-			"	sacf	256\n"
-			"0:	cs	%[prev],%[new],%[address]\n"
-			"1:	sacf	768\n"
-			"	spka	%[default_key]\n"
-			EX_TABLE_UA_LOAD_REG(0b, 1b, %[rc], %[prev])
-			EX_TABLE_UA_LOAD_REG(1b, 1b, %[rc], %[prev])
-			: [rc] "+&d" (rc),
-			  [prev] "+&d" (prev),
-			  [address] "+Q" (*(int *)address)
-			: [new] "d" ((unsigned int)new),
-			  [key] "a" (key << 4),
-			  [default_key] "J" (PAGE_DEFAULT_KEY)
-			: "memory", "cc");
-		disable_sacf_uaccess(sacf_flag);
-		*(unsigned int *)uval = prev;
-		return rc;
-	}
-	case 8: {
-		unsigned long prev = old;
-
-		sacf_flag = enable_sacf_uaccess();
-		asm_inline volatile(
-			"	spka	0(%[key])\n"
-			"	sacf	256\n"
-			"0:	csg	%[prev],%[new],%[address]\n"
-			"1:	sacf	768\n"
-			"	spka	%[default_key]\n"
-			EX_TABLE_UA_LOAD_REG(0b, 1b, %[rc], %[prev])
-			EX_TABLE_UA_LOAD_REG(1b, 1b, %[rc], %[prev])
-			: [rc] "+&d" (rc),
-			  [prev] "+&d" (prev),
-			  [address] "+QS" (*(long *)address)
-			: [new] "d" ((unsigned long)new),
-			  [key] "a" (key << 4),
-			  [default_key] "J" (PAGE_DEFAULT_KEY)
-			: "memory", "cc");
-		disable_sacf_uaccess(sacf_flag);
-		*(unsigned long *)uval = prev;
-		return rc;
-	}
-	case 16: {
-		__uint128_t prev = old;
-
-		sacf_flag = enable_sacf_uaccess();
-		asm_inline volatile(
-			"	spka	0(%[key])\n"
-			"	sacf	256\n"
-			"0:	cdsg	%[prev],%[new],%[address]\n"
-			"1:	sacf	768\n"
-			"	spka	%[default_key]\n"
-			EX_TABLE_UA_LOAD_REGPAIR(0b, 1b, %[rc], %[prev])
-			EX_TABLE_UA_LOAD_REGPAIR(1b, 1b, %[rc], %[prev])
-			: [rc] "+&d" (rc),
-			  [prev] "+&d" (prev),
-			  [address] "+QS" (*(__int128_t *)address)
-			: [new] "d" (new),
-			  [key] "a" (key << 4),
-			  [default_key] "J" (PAGE_DEFAULT_KEY)
-			: "memory", "cc");
-		disable_sacf_uaccess(sacf_flag);
-		*(__uint128_t *)uval = prev;
-		return rc;
-	}
+	case 1:  return __cmpxchg_user_key1(address, uval, old, new, key);
+	case 2:  return __cmpxchg_user_key2(address, uval, old, new, key);
+	case 4:  return __cmpxchg_user_key4(address, uval, old, new, key);
+	case 8:  return __cmpxchg_user_key8(address, uval, old, new, key);
+	case 16: return __cmpxchg_user_key16(address, uval, old, new, key);
+	default: __cmpxchg_user_key_called_with_bad_pointer();
 	}
-	__cmpxchg_user_key_called_with_bad_pointer();
-	return rc;
+	return 0;
 }
 
 /**
@@ -686,8 +528,8 @@ static __always_inline int __cmpxchg_user_key(unsigned long address, void *uval,
 	BUILD_BUG_ON(sizeof(*(__ptr)) != sizeof(*(__uval)));		\
 	might_fault();							\
 	__chk_user_ptr(__ptr);						\
-	__cmpxchg_user_key((unsigned long)(__ptr), (void *)(__uval),	\
-			   (old), (new), (key), sizeof(*(__ptr)));	\
+	_cmpxchg_user_key((unsigned long)(__ptr), (void *)(__uval),	\
+			  (old), (new), (key), sizeof(*(__ptr)));	\
 })
 
 #endif /* __S390_UACCESS_H */
diff --git a/arch/s390/include/asm/uv.h b/arch/s390/include/asm/uv.h
index b008402ec9aa..8018549a1ad2 100644
--- a/arch/s390/include/asm/uv.h
+++ b/arch/s390/include/asm/uv.h
@@ -16,7 +16,6 @@
 #include <linux/bug.h>
 #include <linux/sched.h>
 #include <asm/page.h>
-#include <asm/gmap.h>
 #include <asm/asm.h>
 
 #define UVC_CC_OK	0
diff --git a/arch/s390/include/asm/vdso.h b/arch/s390/include/asm/vdso.h
index 420a073fdde5..8e2fffa0ca68 100644
--- a/arch/s390/include/asm/vdso.h
+++ b/arch/s390/include/asm/vdso.h
@@ -4,11 +4,11 @@
 
 #include <vdso/datapage.h>
 
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
 
 int vdso_getcpu_init(void);
 
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
 
 #define __VDSO_PAGES	4
 
diff --git a/arch/s390/include/asm/vdso/getrandom.h b/arch/s390/include/asm/vdso/getrandom.h
index f8713ce39bb2..6741a27199f8 100644
--- a/arch/s390/include/asm/vdso/getrandom.h
+++ b/arch/s390/include/asm/vdso/getrandom.h
@@ -3,7 +3,7 @@
 #ifndef __ASM_VDSO_GETRANDOM_H
 #define __ASM_VDSO_GETRANDOM_H
 
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
 
 #include <vdso/datapage.h>
 #include <asm/vdso/vsyscall.h>
@@ -23,6 +23,6 @@ static __always_inline ssize_t getrandom_syscall(void *buffer, size_t len, unsig
 	return syscall3(__NR_getrandom, (long)buffer, (long)len, (long)flags);
 }
 
-#endif /* !__ASSEMBLY__ */
+#endif /* !__ASSEMBLER__ */
 
 #endif /* __ASM_VDSO_GETRANDOM_H */
diff --git a/arch/s390/include/asm/vdso/gettimeofday.h b/arch/s390/include/asm/vdso/gettimeofday.h
index fb4564308e9d..c31ac5f61c83 100644
--- a/arch/s390/include/asm/vdso/gettimeofday.h
+++ b/arch/s390/include/asm/vdso/gettimeofday.h
@@ -16,13 +16,7 @@
 
 static inline u64 __arch_get_hw_counter(s32 clock_mode, const struct vdso_time_data *vd)
 {
-	u64 adj, now;
-
-	now = get_tod_clock();
-	adj = vd->arch_data.tod_steering_end - now;
-	if (unlikely((s64) adj > 0))
-		now += (vd->arch_data.tod_steering_delta < 0) ? (adj >> 15) : -(adj >> 15);
-	return now;
+	return get_tod_clock() - vd->arch_data.tod_delta;
 }
 
 static __always_inline
diff --git a/arch/s390/include/asm/vdso/time_data.h b/arch/s390/include/asm/vdso/time_data.h
index 8a08752422e6..25c4e0d9f596 100644
--- a/arch/s390/include/asm/vdso/time_data.h
+++ b/arch/s390/include/asm/vdso/time_data.h
@@ -5,8 +5,7 @@
 #include <linux/types.h>
 
 struct arch_vdso_time_data {
-	__s64 tod_steering_delta;
-	__u64 tod_steering_end;
+	__s64 tod_delta;
 };
 
 #endif /* __S390_ASM_VDSO_TIME_DATA_H */
diff --git a/arch/s390/include/asm/vdso/vsyscall.h b/arch/s390/include/asm/vdso/vsyscall.h
index d346ebe51301..b00acec8ddbc 100644
--- a/arch/s390/include/asm/vdso/vsyscall.h
+++ b/arch/s390/include/asm/vdso/vsyscall.h
@@ -2,7 +2,7 @@
 #ifndef __ASM_VDSO_VSYSCALL_H
 #define __ASM_VDSO_VSYSCALL_H
 
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
 
 #include <linux/hrtimer.h>
 #include <vdso/datapage.h>
@@ -11,6 +11,6 @@
 /* The asm-generic header needs to be included after the definitions above */
 #include <asm-generic/vdso/vsyscall.h>
 
-#endif /* !__ASSEMBLY__ */
+#endif /* !__ASSEMBLER__ */
 
 #endif /* __ASM_VDSO_VSYSCALL_H */
diff --git a/arch/s390/include/uapi/asm/ptrace.h b/arch/s390/include/uapi/asm/ptrace.h
index bb0826024bb9..ea202072f1ad 100644
--- a/arch/s390/include/uapi/asm/ptrace.h
+++ b/arch/s390/include/uapi/asm/ptrace.h
@@ -242,7 +242,8 @@
 #define PTRACE_OLDSETOPTIONS		21
 #define PTRACE_SYSEMU			31
 #define PTRACE_SYSEMU_SINGLESTEP	32
-#ifndef __ASSEMBLY__
+
+#ifndef __ASSEMBLER__
 #include <linux/stddef.h>
 #include <linux/types.h>
 
@@ -450,6 +451,6 @@ struct user_regs_struct {
 	unsigned long ieee_instruction_pointer;	/* obsolete, always 0 */
 };
 
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
 
 #endif /* _UAPI_S390_PTRACE_H */
diff --git a/arch/s390/include/uapi/asm/schid.h b/arch/s390/include/uapi/asm/schid.h
index a3e1cf168553..d804d1a5b1b3 100644
--- a/arch/s390/include/uapi/asm/schid.h
+++ b/arch/s390/include/uapi/asm/schid.h
@@ -4,7 +4,7 @@
 
 #include <linux/types.h>
 
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
 
 struct subchannel_id {
 	__u32 cssid : 8;
@@ -15,6 +15,6 @@ struct subchannel_id {
 	__u32 sch_no : 16;
 } __attribute__ ((packed, aligned(4)));
 
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
 
 #endif /* _UAPIASM_SCHID_H */
diff --git a/arch/s390/include/uapi/asm/types.h b/arch/s390/include/uapi/asm/types.h
index 84457dbb26b4..4ab468c5032e 100644
--- a/arch/s390/include/uapi/asm/types.h
+++ b/arch/s390/include/uapi/asm/types.h
@@ -10,7 +10,7 @@
 
 #include <asm-generic/int-ll64.h>
 
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
 
 typedef unsigned long addr_t;
 typedef __signed__ long saddr_t;
@@ -25,6 +25,6 @@ typedef struct {
 	};
 } __attribute__((packed, aligned(4))) __vector128;
 
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
 
 #endif /* _UAPI_S390_TYPES_H */
diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile
index db5f3a3faefb..eb06ff888314 100644
--- a/arch/s390/kernel/Makefile
+++ b/arch/s390/kernel/Makefile
@@ -41,12 +41,12 @@ obj-y	+= processor.o syscall.o ptrace.o signal.o cpcmd.o ebcdic.o nmi.o
 obj-y	+= debug.o irq.o ipl.o dis.o vdso.o cpufeature.o
 obj-y	+= sysinfo.o lgr.o os_info.o ctlreg.o
 obj-y	+= runtime_instr.o cache.o fpu.o dumpstack.o guarded_storage.o sthyi.o
-obj-y	+= entry.o reipl.o kdebugfs.o alternative.o
+obj-y	+= entry.o reipl.o kdebugfs.o alternative.o skey.o
 obj-y	+= nospec-branch.o ipl_vmparm.o machine_kexec_reloc.o unwind_bc.o
 obj-y	+= smp.o text_amode31.o stacktrace.o abs_lowcore.o facility.o uv.o wti.o
 obj-y	+= diag/
 
-extra-y				+= vmlinux.lds
+always-$(KBUILD_BUILTIN)	+= vmlinux.lds
 
 obj-$(CONFIG_SYSFS)		+= nospec-sysfs.o
 CFLAGS_REMOVE_nospec-branch.o	+= $(CC_FLAGS_EXPOLINE)
diff --git a/arch/s390/kernel/cpacf.c b/arch/s390/kernel/cpacf.c
index 4b9b34f95d72..3bebc47beeab 100644
--- a/arch/s390/kernel/cpacf.c
+++ b/arch/s390/kernel/cpacf.c
@@ -101,7 +101,7 @@ static const struct bin_attribute *const cpacf_attrs[] = {
 
 static const struct attribute_group cpacf_attr_grp = {
 	.name = "cpacf",
-	.bin_attrs_new = cpacf_attrs,
+	.bin_attrs = cpacf_attrs,
 };
 
 static int __init cpacf_init(void)
diff --git a/arch/s390/kernel/cpufeature.c b/arch/s390/kernel/cpufeature.c
index 76210f001028..c9eef9ed876b 100644
--- a/arch/s390/kernel/cpufeature.c
+++ b/arch/s390/kernel/cpufeature.c
@@ -4,6 +4,7 @@
  */
 
 #include <linux/cpufeature.h>
+#include <linux/export.h>
 #include <linux/bug.h>
 #include <asm/machine.h>
 #include <asm/elf.h>
diff --git a/arch/s390/kernel/crash_dump.c b/arch/s390/kernel/crash_dump.c
index adb164223f8c..d4839de8ce9d 100644
--- a/arch/s390/kernel/crash_dump.c
+++ b/arch/s390/kernel/crash_dump.c
@@ -7,6 +7,7 @@
  */
 
 #include <linux/crash_dump.h>
+#include <linux/export.h>
 #include <asm/lowcore.h>
 #include <linux/kernel.h>
 #include <linux/init.h>
diff --git a/arch/s390/kernel/ctlreg.c b/arch/s390/kernel/ctlreg.c
index 8cc26cf2c64a..a0501f4c7e7a 100644
--- a/arch/s390/kernel/ctlreg.c
+++ b/arch/s390/kernel/ctlreg.c
@@ -5,6 +5,7 @@
 
 #include <linux/irqflags.h>
 #include <linux/spinlock.h>
+#include <linux/export.h>
 #include <linux/kernel.h>
 #include <linux/init.h>
 #include <linux/smp.h>
diff --git a/arch/s390/kernel/debug.c b/arch/s390/kernel/debug.c
index 2a41be2f7925..c62100dc62c8 100644
--- a/arch/s390/kernel/debug.c
+++ b/arch/s390/kernel/debug.c
@@ -1677,7 +1677,7 @@ EXPORT_SYMBOL(debug_dflt_header_fn);
 
 /*
  * prints debug data sprintf-formatted:
- * debug_sprinf_event/exception calls must be used together with this view
+ * debug_sprintf_event/exception calls must be used together with this view
  */
 
 #define DEBUG_SPRINTF_MAX_ARGS 10
diff --git a/arch/s390/kernel/dis.c b/arch/s390/kernel/dis.c
index 94eb8168ea44..63a1d4226ff8 100644
--- a/arch/s390/kernel/dis.c
+++ b/arch/s390/kernel/dis.c
@@ -17,7 +17,6 @@
 #include <linux/init.h>
 #include <linux/interrupt.h>
 #include <linux/delay.h>
-#include <linux/export.h>
 #include <linux/kallsyms.h>
 #include <linux/reboot.h>
 #include <linux/kprobes.h>
diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c
index 54cf0923050f..9adfbdd377dc 100644
--- a/arch/s390/kernel/early.c
+++ b/arch/s390/kernel/early.c
@@ -105,6 +105,8 @@ static inline void strim_all(char *str)
 	}
 }
 
+char arch_hw_string[128];
+
 static noinline __init void setup_arch_string(void)
 {
 	struct sysinfo_1_1_1 *mach = (struct sysinfo_1_1_1 *)&sysinfo_page;
@@ -131,6 +133,7 @@ static noinline __init void setup_arch_string(void)
 			machine_is_vm() ? "z/VM" :
 			machine_is_kvm() ? "KVM" : "unknown");
 	}
+	sprintf(arch_hw_string, "HW: %s (%s)", mstr, hvstr);
 	dump_stack_set_arch_desc("%s (%s)", mstr, hvstr);
 }
 
@@ -154,6 +157,7 @@ void __init __do_early_pgm_check(struct pt_regs *regs)
 
 	regs->int_code = lc->pgm_int_code;
 	regs->int_parm_long = lc->trans_exc_code;
+	regs->last_break = lc->pgm_last_break;
 	ip = __rewind_psw(regs->psw, regs->int_code >> 16);
 
 	/* Monitor Event? Might be a warning */
diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S
index 0f00f4b06d51..75b0fbb236d0 100644
--- a/arch/s390/kernel/entry.S
+++ b/arch/s390/kernel/entry.S
@@ -124,7 +124,7 @@ _LPP_OFFSET	= __LC_LPP
 #endif
 
 	.macro STACKLEAK_ERASE
-#ifdef CONFIG_GCC_PLUGIN_STACKLEAK
+#ifdef CONFIG_KSTACK_ERASE
 	brasl	%r14,stackleak_erase_on_task_stack
 #endif
 	.endm
diff --git a/arch/s390/kernel/facility.c b/arch/s390/kernel/facility.c
index f02127219a27..d028b0be5c1d 100644
--- a/arch/s390/kernel/facility.c
+++ b/arch/s390/kernel/facility.c
@@ -3,6 +3,7 @@
  * Copyright IBM Corp. 2023
  */
 
+#include <linux/export.h>
 #include <asm/facility.h>
 
 unsigned int stfle_size(void)
diff --git a/arch/s390/kernel/fpu.c b/arch/s390/kernel/fpu.c
index 6f2e87920288..03a8973aec3c 100644
--- a/arch/s390/kernel/fpu.c
+++ b/arch/s390/kernel/fpu.c
@@ -5,6 +5,8 @@
  * Copyright IBM Corp. 2015
  * Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
  */
+
+#include <linux/export.h>
 #include <linux/kernel.h>
 #include <linux/cpu.h>
 #include <linux/sched.h>
diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c
index ff15f91affde..961a3d60a4dd 100644
--- a/arch/s390/kernel/ipl.c
+++ b/arch/s390/kernel/ipl.c
@@ -596,7 +596,7 @@ static struct attribute *ipl_fcp_attrs[] = {
 
 static const struct attribute_group ipl_fcp_attr_group = {
 	.attrs = ipl_fcp_attrs,
-	.bin_attrs_new = ipl_fcp_bin_attrs,
+	.bin_attrs = ipl_fcp_bin_attrs,
 };
 
 static struct attribute *ipl_nvme_attrs[] = {
@@ -610,7 +610,7 @@ static struct attribute *ipl_nvme_attrs[] = {
 
 static const struct attribute_group ipl_nvme_attr_group = {
 	.attrs = ipl_nvme_attrs,
-	.bin_attrs_new = ipl_nvme_bin_attrs,
+	.bin_attrs = ipl_nvme_bin_attrs,
 };
 
 static struct attribute *ipl_eckd_attrs[] = {
@@ -623,7 +623,7 @@ static struct attribute *ipl_eckd_attrs[] = {
 
 static const struct attribute_group ipl_eckd_attr_group = {
 	.attrs = ipl_eckd_attrs,
-	.bin_attrs_new = ipl_eckd_bin_attrs,
+	.bin_attrs = ipl_eckd_bin_attrs,
 };
 
 /* CCW ipl device attributes */
@@ -920,7 +920,7 @@ static struct attribute *reipl_fcp_attrs[] = {
 
 static const struct attribute_group reipl_fcp_attr_group = {
 	.attrs = reipl_fcp_attrs,
-	.bin_attrs_new = reipl_fcp_bin_attrs,
+	.bin_attrs = reipl_fcp_bin_attrs,
 };
 
 static struct kobj_attribute sys_reipl_fcp_clear_attr =
@@ -958,7 +958,7 @@ static struct attribute *reipl_nvme_attrs[] = {
 
 static const struct attribute_group reipl_nvme_attr_group = {
 	.attrs = reipl_nvme_attrs,
-	.bin_attrs_new = reipl_nvme_bin_attrs
+	.bin_attrs = reipl_nvme_bin_attrs
 };
 
 static ssize_t reipl_nvme_clear_show(struct kobject *kobj,
@@ -1051,7 +1051,7 @@ static struct attribute *reipl_eckd_attrs[] = {
 
 static const struct attribute_group reipl_eckd_attr_group = {
 	.attrs = reipl_eckd_attrs,
-	.bin_attrs_new = reipl_eckd_bin_attrs
+	.bin_attrs = reipl_eckd_bin_attrs
 };
 
 static ssize_t reipl_eckd_clear_show(struct kobject *kobj,
@@ -1596,7 +1596,7 @@ static const struct bin_attribute *const dump_fcp_bin_attrs[] = {
 static const struct attribute_group dump_fcp_attr_group = {
 	.name  = IPL_FCP_STR,
 	.attrs = dump_fcp_attrs,
-	.bin_attrs_new = dump_fcp_bin_attrs,
+	.bin_attrs = dump_fcp_bin_attrs,
 };
 
 /* NVME dump device attributes */
@@ -1630,7 +1630,7 @@ static const struct bin_attribute *const dump_nvme_bin_attrs[] = {
 static const struct attribute_group dump_nvme_attr_group = {
 	.name  = IPL_NVME_STR,
 	.attrs = dump_nvme_attrs,
-	.bin_attrs_new = dump_nvme_bin_attrs,
+	.bin_attrs = dump_nvme_bin_attrs,
 };
 
 /* ECKD dump device attributes */
@@ -1664,7 +1664,7 @@ static const struct bin_attribute *const dump_eckd_bin_attrs[] = {
 static const struct attribute_group dump_eckd_attr_group = {
 	.name  = IPL_ECKD_STR,
 	.attrs = dump_eckd_attrs,
-	.bin_attrs_new = dump_eckd_bin_attrs,
+	.bin_attrs = dump_eckd_bin_attrs,
 };
 
 /* CCW dump device attributes */
diff --git a/arch/s390/kernel/nmi.c b/arch/s390/kernel/nmi.c
index 3da371c144eb..11f33243a23f 100644
--- a/arch/s390/kernel/nmi.c
+++ b/arch/s390/kernel/nmi.c
@@ -9,6 +9,7 @@
  */
 
 #include <linux/kernel_stat.h>
+#include <linux/utsname.h>
 #include <linux/cpufeature.h>
 #include <linux/init.h>
 #include <linux/errno.h>
@@ -21,7 +22,6 @@
 #include <linux/module.h>
 #include <linux/sched/signal.h>
 #include <linux/kvm_host.h>
-#include <linux/export.h>
 #include <asm/lowcore.h>
 #include <asm/ctlreg.h>
 #include <asm/fpu.h>
@@ -116,18 +116,82 @@ static __always_inline char *u64_to_hex(char *dest, u64 val)
 	return dest;
 }
 
+static notrace void nmi_print_info(void)
+{
+	struct lowcore *lc = get_lowcore();
+	char message[100];
+	char *ptr;
+	int i;
+
+	ptr = nmi_puts(message, "Unrecoverable machine check, code: ");
+	ptr = u64_to_hex(ptr, lc->mcck_interruption_code);
+	ptr = nmi_puts(ptr, "\n");
+	sclp_emergency_printk(message);
+
+	ptr = nmi_puts(message, init_utsname()->release);
+	ptr = nmi_puts(ptr, "\n");
+	sclp_emergency_printk(message);
+
+	ptr = nmi_puts(message, arch_hw_string);
+	ptr = nmi_puts(ptr, "\n");
+	sclp_emergency_printk(message);
+
+	ptr = nmi_puts(message, "PSW: ");
+	ptr = u64_to_hex(ptr, lc->mcck_old_psw.mask);
+	ptr = nmi_puts(ptr, " ");
+	ptr = u64_to_hex(ptr, lc->mcck_old_psw.addr);
+	ptr = nmi_puts(ptr, " PFX: ");
+	ptr = u64_to_hex(ptr, (u64)get_lowcore());
+	ptr = nmi_puts(ptr, "\n");
+	sclp_emergency_printk(message);
+
+	ptr = nmi_puts(message, "LBA: ");
+	ptr = u64_to_hex(ptr, lc->last_break_save_area);
+	ptr = nmi_puts(ptr, " EDC: ");
+	ptr = u64_to_hex(ptr, lc->external_damage_code);
+	ptr = nmi_puts(ptr, " FSA: ");
+	ptr = u64_to_hex(ptr, lc->failing_storage_address);
+	ptr = nmi_puts(ptr, "\n");
+	sclp_emergency_printk(message);
+
+	ptr = nmi_puts(message, "CRS:\n");
+	sclp_emergency_printk(message);
+	ptr = message;
+	for (i = 0; i < 16; i++) {
+		ptr = u64_to_hex(ptr, lc->cregs_save_area[i].val);
+		ptr = nmi_puts(ptr, " ");
+		if ((i + 1) % 4 == 0) {
+			ptr = nmi_puts(ptr, "\n");
+			sclp_emergency_printk(message);
+			ptr = message;
+		}
+	}
+
+	ptr = nmi_puts(message, "GPRS:\n");
+	sclp_emergency_printk(message);
+	ptr = message;
+	for (i = 0; i < 16; i++) {
+		ptr = u64_to_hex(ptr, lc->gpregs_save_area[i]);
+		ptr = nmi_puts(ptr, " ");
+		if ((i + 1) % 4 == 0) {
+			ptr = nmi_puts(ptr, "\n");
+			sclp_emergency_printk(message);
+			ptr = message;
+		}
+	}
+
+	ptr = nmi_puts(message, "System stopped\n");
+	sclp_emergency_printk(message);
+}
+
 static notrace void s390_handle_damage(void)
 {
 	struct lowcore *lc = get_lowcore();
 	union ctlreg0 cr0, cr0_new;
-	char message[100];
 	psw_t psw_save;
-	char *ptr;
 
 	smp_emergency_stop();
 	diag_amode31_ops.diag308_reset();
-	ptr = nmi_puts(message, "System stopped due to unrecoverable machine check, code: 0x");
-	u64_to_hex(ptr, lc->mcck_interruption_code);
 
 	/*
 	 * Disable low address protection and make machine check new PSW a
@@ -141,7 +205,7 @@ static notrace void s390_handle_damage(void)
 	psw_bits(lc->mcck_new_psw).io = 0;
 	psw_bits(lc->mcck_new_psw).ext = 0;
 	psw_bits(lc->mcck_new_psw).wait = 1;
-	sclp_emergency_printk(message);
+	nmi_print_info();
 
 	/*
 	 * Restore machine check new PSW and control register 0 to original
diff --git a/arch/s390/kernel/perf_cpum_cf.c b/arch/s390/kernel/perf_cpum_cf.c
index 6a262e198e35..4d09954ebf49 100644
--- a/arch/s390/kernel/perf_cpum_cf.c
+++ b/arch/s390/kernel/perf_cpum_cf.c
@@ -14,7 +14,6 @@
 #include <linux/percpu.h>
 #include <linux/notifier.h>
 #include <linux/init.h>
-#include <linux/export.h>
 #include <linux/miscdevice.h>
 #include <linux/perf_event.h>
 
diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c
index 91469401f2c9..f432869f8921 100644
--- a/arch/s390/kernel/perf_cpum_sf.c
+++ b/arch/s390/kernel/perf_cpum_sf.c
@@ -14,7 +14,6 @@
 #include <linux/percpu.h>
 #include <linux/pid.h>
 #include <linux/notifier.h>
-#include <linux/export.h>
 #include <linux/slab.h>
 #include <linux/mm.h>
 #include <linux/moduleparam.h>
diff --git a/arch/s390/kernel/perf_event.c b/arch/s390/kernel/perf_event.c
index 2b9611c4718e..91b8716c883a 100644
--- a/arch/s390/kernel/perf_event.c
+++ b/arch/s390/kernel/perf_event.c
@@ -12,7 +12,6 @@
 #include <linux/perf_event.h>
 #include <linux/kvm_host.h>
 #include <linux/percpu.h>
-#include <linux/export.h>
 #include <linux/seq_file.h>
 #include <linux/spinlock.h>
 #include <linux/uaccess.h>
diff --git a/arch/s390/kernel/perf_pai_crypto.c b/arch/s390/kernel/perf_pai_crypto.c
index 63875270941b..f373a1009c45 100644
--- a/arch/s390/kernel/perf_pai_crypto.c
+++ b/arch/s390/kernel/perf_pai_crypto.c
@@ -13,7 +13,6 @@
 #include <linux/percpu.h>
 #include <linux/notifier.h>
 #include <linux/init.h>
-#include <linux/export.h>
 #include <linux/io.h>
 #include <linux/perf_event.h>
 #include <asm/ctlreg.h>
@@ -696,7 +695,7 @@ static const char * const paicrypt_ctrnames[] = {
 	[111] = "PCC_COMPUTE_LAST_BLOCK_CMAC_USING_AES_256",
 	[112] = "PCC_COMPUTE_LAST_BLOCK_CMAC_USING_ENCRYPTED_AES_128",
 	[113] = "PCC_COMPUTE_LAST_BLOCK_CMAC_USING_ENCRYPTED_AES_192",
-	[114] = "PCC_COMPUTE_LAST_BLOCK_CMAC_USING_ENCRYPTED_AES_256A",
+	[114] = "PCC_COMPUTE_LAST_BLOCK_CMAC_USING_ENCRYPTED_AES_256",
 	[115] = "PCC_COMPUTE_XTS_PARAMETER_USING_AES_128",
 	[116] = "PCC_COMPUTE_XTS_PARAMETER_USING_AES_256",
 	[117] = "PCC_COMPUTE_XTS_PARAMETER_USING_ENCRYPTED_AES_128",
diff --git a/arch/s390/kernel/perf_pai_ext.c b/arch/s390/kernel/perf_pai_ext.c
index fd14d5ebccbc..d827473e7f87 100644
--- a/arch/s390/kernel/perf_pai_ext.c
+++ b/arch/s390/kernel/perf_pai_ext.c
@@ -14,7 +14,6 @@
 #include <linux/percpu.h>
 #include <linux/notifier.h>
 #include <linux/init.h>
-#include <linux/export.h>
 #include <linux/io.h>
 #include <linux/perf_event.h>
 #include <asm/ctlreg.h>
diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c
index 9637aee43c40..f55f09cda6f8 100644
--- a/arch/s390/kernel/process.c
+++ b/arch/s390/kernel/process.c
@@ -27,7 +27,6 @@
 #include <linux/compat.h>
 #include <linux/kprobes.h>
 #include <linux/random.h>
-#include <linux/export.h>
 #include <linux/init_task.h>
 #include <linux/entry-common.h>
 #include <linux/io.h>
diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c
index e1240f6b29fa..494216c4b4f3 100644
--- a/arch/s390/kernel/ptrace.c
+++ b/arch/s390/kernel/ptrace.c
@@ -1209,7 +1209,7 @@ static int s390_runtime_instr_set(struct task_struct *target,
 
 static const struct user_regset s390_regsets[] = {
 	{
-		.core_note_type = NT_PRSTATUS,
+		USER_REGSET_NOTE_TYPE(PRSTATUS),
 		.n = sizeof(s390_regs) / sizeof(long),
 		.size = sizeof(long),
 		.align = sizeof(long),
@@ -1217,7 +1217,7 @@ static const struct user_regset s390_regsets[] = {
 		.set = s390_regs_set,
 	},
 	{
-		.core_note_type = NT_PRFPREG,
+		USER_REGSET_NOTE_TYPE(PRFPREG),
 		.n = sizeof(s390_fp_regs) / sizeof(long),
 		.size = sizeof(long),
 		.align = sizeof(long),
@@ -1225,7 +1225,7 @@ static const struct user_regset s390_regsets[] = {
 		.set = s390_fpregs_set,
 	},
 	{
-		.core_note_type = NT_S390_SYSTEM_CALL,
+		USER_REGSET_NOTE_TYPE(S390_SYSTEM_CALL),
 		.n = 1,
 		.size = sizeof(unsigned int),
 		.align = sizeof(unsigned int),
@@ -1233,7 +1233,7 @@ static const struct user_regset s390_regsets[] = {
 		.set = s390_system_call_set,
 	},
 	{
-		.core_note_type = NT_S390_LAST_BREAK,
+		USER_REGSET_NOTE_TYPE(S390_LAST_BREAK),
 		.n = 1,
 		.size = sizeof(long),
 		.align = sizeof(long),
@@ -1241,7 +1241,7 @@ static const struct user_regset s390_regsets[] = {
 		.set = s390_last_break_set,
 	},
 	{
-		.core_note_type = NT_S390_TDB,
+		USER_REGSET_NOTE_TYPE(S390_TDB),
 		.n = 1,
 		.size = 256,
 		.align = 1,
@@ -1249,7 +1249,7 @@ static const struct user_regset s390_regsets[] = {
 		.set = s390_tdb_set,
 	},
 	{
-		.core_note_type = NT_S390_VXRS_LOW,
+		USER_REGSET_NOTE_TYPE(S390_VXRS_LOW),
 		.n = __NUM_VXRS_LOW,
 		.size = sizeof(__u64),
 		.align = sizeof(__u64),
@@ -1257,7 +1257,7 @@ static const struct user_regset s390_regsets[] = {
 		.set = s390_vxrs_low_set,
 	},
 	{
-		.core_note_type = NT_S390_VXRS_HIGH,
+		USER_REGSET_NOTE_TYPE(S390_VXRS_HIGH),
 		.n = __NUM_VXRS_HIGH,
 		.size = sizeof(__vector128),
 		.align = sizeof(__vector128),
@@ -1265,7 +1265,7 @@ static const struct user_regset s390_regsets[] = {
 		.set = s390_vxrs_high_set,
 	},
 	{
-		.core_note_type = NT_S390_GS_CB,
+		USER_REGSET_NOTE_TYPE(S390_GS_CB),
 		.n = sizeof(struct gs_cb) / sizeof(__u64),
 		.size = sizeof(__u64),
 		.align = sizeof(__u64),
@@ -1273,7 +1273,7 @@ static const struct user_regset s390_regsets[] = {
 		.set = s390_gs_cb_set,
 	},
 	{
-		.core_note_type = NT_S390_GS_BC,
+		USER_REGSET_NOTE_TYPE(S390_GS_BC),
 		.n = sizeof(struct gs_cb) / sizeof(__u64),
 		.size = sizeof(__u64),
 		.align = sizeof(__u64),
@@ -1281,7 +1281,7 @@ static const struct user_regset s390_regsets[] = {
 		.set = s390_gs_bc_set,
 	},
 	{
-		.core_note_type = NT_S390_RI_CB,
+		USER_REGSET_NOTE_TYPE(S390_RI_CB),
 		.n = sizeof(struct runtime_instr_cb) / sizeof(__u64),
 		.size = sizeof(__u64),
 		.align = sizeof(__u64),
@@ -1413,7 +1413,7 @@ static int s390_compat_last_break_set(struct task_struct *target,
 
 static const struct user_regset s390_compat_regsets[] = {
 	{
-		.core_note_type = NT_PRSTATUS,
+		USER_REGSET_NOTE_TYPE(PRSTATUS),
 		.n = sizeof(s390_compat_regs) / sizeof(compat_long_t),
 		.size = sizeof(compat_long_t),
 		.align = sizeof(compat_long_t),
@@ -1421,7 +1421,7 @@ static const struct user_regset s390_compat_regsets[] = {
 		.set = s390_compat_regs_set,
 	},
 	{
-		.core_note_type = NT_PRFPREG,
+		USER_REGSET_NOTE_TYPE(PRFPREG),
 		.n = sizeof(s390_fp_regs) / sizeof(compat_long_t),
 		.size = sizeof(compat_long_t),
 		.align = sizeof(compat_long_t),
@@ -1429,7 +1429,7 @@ static const struct user_regset s390_compat_regsets[] = {
 		.set = s390_fpregs_set,
 	},
 	{
-		.core_note_type = NT_S390_SYSTEM_CALL,
+		USER_REGSET_NOTE_TYPE(S390_SYSTEM_CALL),
 		.n = 1,
 		.size = sizeof(compat_uint_t),
 		.align = sizeof(compat_uint_t),
@@ -1437,7 +1437,7 @@ static const struct user_regset s390_compat_regsets[] = {
 		.set = s390_system_call_set,
 	},
 	{
-		.core_note_type = NT_S390_LAST_BREAK,
+		USER_REGSET_NOTE_TYPE(S390_LAST_BREAK),
 		.n = 1,
 		.size = sizeof(long),
 		.align = sizeof(long),
@@ -1445,7 +1445,7 @@ static const struct user_regset s390_compat_regsets[] = {
 		.set = s390_compat_last_break_set,
 	},
 	{
-		.core_note_type = NT_S390_TDB,
+		USER_REGSET_NOTE_TYPE(S390_TDB),
 		.n = 1,
 		.size = 256,
 		.align = 1,
@@ -1453,7 +1453,7 @@ static const struct user_regset s390_compat_regsets[] = {
 		.set = s390_tdb_set,
 	},
 	{
-		.core_note_type = NT_S390_VXRS_LOW,
+		USER_REGSET_NOTE_TYPE(S390_VXRS_LOW),
 		.n = __NUM_VXRS_LOW,
 		.size = sizeof(__u64),
 		.align = sizeof(__u64),
@@ -1461,7 +1461,7 @@ static const struct user_regset s390_compat_regsets[] = {
 		.set = s390_vxrs_low_set,
 	},
 	{
-		.core_note_type = NT_S390_VXRS_HIGH,
+		USER_REGSET_NOTE_TYPE(S390_VXRS_HIGH),
 		.n = __NUM_VXRS_HIGH,
 		.size = sizeof(__vector128),
 		.align = sizeof(__vector128),
@@ -1469,7 +1469,7 @@ static const struct user_regset s390_compat_regsets[] = {
 		.set = s390_vxrs_high_set,
 	},
 	{
-		.core_note_type = NT_S390_HIGH_GPRS,
+		USER_REGSET_NOTE_TYPE(S390_HIGH_GPRS),
 		.n = sizeof(s390_compat_regs_high) / sizeof(compat_long_t),
 		.size = sizeof(compat_long_t),
 		.align = sizeof(compat_long_t),
@@ -1477,7 +1477,7 @@ static const struct user_regset s390_compat_regsets[] = {
 		.set = s390_compat_regs_high_set,
 	},
 	{
-		.core_note_type = NT_S390_GS_CB,
+		USER_REGSET_NOTE_TYPE(S390_GS_CB),
 		.n = sizeof(struct gs_cb) / sizeof(__u64),
 		.size = sizeof(__u64),
 		.align = sizeof(__u64),
@@ -1485,7 +1485,7 @@ static const struct user_regset s390_compat_regsets[] = {
 		.set = s390_gs_cb_set,
 	},
 	{
-		.core_note_type = NT_S390_GS_BC,
+		USER_REGSET_NOTE_TYPE(S390_GS_BC),
 		.n = sizeof(struct gs_cb) / sizeof(__u64),
 		.size = sizeof(__u64),
 		.align = sizeof(__u64),
@@ -1493,7 +1493,7 @@ static const struct user_regset s390_compat_regsets[] = {
 		.set = s390_gs_bc_set,
 	},
 	{
-		.core_note_type = NT_S390_RI_CB,
+		USER_REGSET_NOTE_TYPE(S390_RI_CB),
 		.n = sizeof(struct runtime_instr_cb) / sizeof(__u64),
 		.size = sizeof(__u64),
 		.align = sizeof(__u64),
diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
index f244c5560e7f..7b529868789f 100644
--- a/arch/s390/kernel/setup.c
+++ b/arch/s390/kernel/setup.c
@@ -605,7 +605,7 @@ static void __init reserve_crashkernel(void)
 	int rc;
 
 	rc = parse_crashkernel(boot_command_line, ident_map_size,
-			       &crash_size, &crash_base, NULL, NULL);
+			       &crash_size, &crash_base, NULL, NULL, NULL);
 
 	crash_base = ALIGN(crash_base, KEXEC_CRASH_MEM_ALIGN);
 	crash_size = ALIGN(crash_size, KEXEC_CRASH_MEM_ALIGN);
@@ -719,6 +719,11 @@ static void __init memblock_add_physmem_info(void)
 	memblock_set_node(0, ULONG_MAX, &memblock.memory, 0);
 }
 
+static void __init setup_high_memory(void)
+{
+	high_memory = __va(ident_map_size);
+}
+
 /*
  * Reserve memory used for lowcore.
  */
@@ -951,6 +956,7 @@ void __init setup_arch(char **cmdline_p)
 
 	free_physmem_info();
 	setup_memory_end();
+	setup_high_memory();
 	memblock_dump_all();
 	setup_memory();
 
diff --git a/arch/s390/kernel/skey.c b/arch/s390/kernel/skey.c
new file mode 100644
index 000000000000..ba049fd103c2
--- /dev/null
+++ b/arch/s390/kernel/skey.c
@@ -0,0 +1,48 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <asm/rwonce.h>
+#include <asm/page.h>
+#include <asm/skey.h>
+
+int skey_regions_initialized;
+
+static inline unsigned long load_real_address(unsigned long address)
+{
+	unsigned long real;
+
+	asm volatile(
+		"	lra	%[real],0(%[address])\n"
+		: [real] "=d" (real)
+		: [address] "a" (address)
+		: "cc");
+	return real;
+}
+
+/*
+ * Initialize storage keys of registered memory regions with the
+ * default key. This is useful for code which is executed with a
+ * non-default access key.
+ */
+void __skey_regions_initialize(void)
+{
+	unsigned long address, real;
+	struct skey_region *r, *end;
+
+	r = __skey_region_start;
+	end = __skey_region_end;
+	while (r < end) {
+		address = r->start & PAGE_MASK;
+		do {
+			real = load_real_address(address);
+			page_set_storage_key(real, PAGE_DEFAULT_KEY, 1);
+			address += PAGE_SIZE;
+		} while (address < r->end);
+		r++;
+	}
+	/*
+	 * Make sure storage keys are initialized before
+	 * skey_regions_initialized is changed.
+	 */
+	barrier();
+	WRITE_ONCE(skey_regions_initialized, 1);
+}
diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c
index 81f12bb77f62..e88ebe5339fc 100644
--- a/arch/s390/kernel/smp.c
+++ b/arch/s390/kernel/smp.c
@@ -175,13 +175,10 @@ static struct pcpu *pcpu_find_address(const struct cpumask *mask, u16 address)
 
 static void pcpu_ec_call(struct pcpu *pcpu, int ec_bit)
 {
-	int order;
-
 	if (test_and_set_bit(ec_bit, &pcpu->ec_mask))
 		return;
-	order = pcpu_running(pcpu) ? SIGP_EXTERNAL_CALL : SIGP_EMERGENCY_SIGNAL;
 	pcpu->ec_clk = get_tod_clock_fast();
-	pcpu_sigp_retry(pcpu, order, 0);
+	pcpu_sigp_retry(pcpu, SIGP_EXTERNAL_CALL, 0);
 }
 
 static int pcpu_alloc_lowcore(struct pcpu *pcpu, int cpu)
@@ -433,16 +430,16 @@ void notrace smp_emergency_stop(void)
 	cpumask_copy(&cpumask, cpu_online_mask);
 	cpumask_clear_cpu(smp_processor_id(), &cpumask);
 
-	end = get_tod_clock() + (1000000UL << 12);
+	end = get_tod_clock_monotonic() + (1000000UL << 12);
 	for_each_cpu(cpu, &cpumask) {
 		struct pcpu *pcpu = per_cpu_ptr(&pcpu_devices, cpu);
 		set_bit(ec_stop_cpu, &pcpu->ec_mask);
 		while (__pcpu_sigp(pcpu->address, SIGP_EMERGENCY_SIGNAL,
 				   0, NULL) == SIGP_CC_BUSY &&
-		       get_tod_clock() < end)
+		       get_tod_clock_monotonic() < end)
 			cpu_relax();
 	}
-	while (get_tod_clock() < end) {
+	while (get_tod_clock_monotonic() < end) {
 		for_each_cpu(cpu, &cpumask)
 			if (pcpu_stopped(per_cpu_ptr(&pcpu_devices, cpu)))
 				cpumask_clear_cpu(cpu, &cpumask);
diff --git a/arch/s390/kernel/sthyi.c b/arch/s390/kernel/sthyi.c
index d40f0b983e74..f4ccdbed4b89 100644
--- a/arch/s390/kernel/sthyi.c
+++ b/arch/s390/kernel/sthyi.c
@@ -5,6 +5,8 @@
  * Copyright IBM Corp. 2016
  * Author(s): Janosch Frank <frankja@linux.vnet.ibm.com>
  */
+
+#include <linux/export.h>
 #include <linux/errno.h>
 #include <linux/pagemap.h>
 #include <linux/vmalloc.h>
diff --git a/arch/s390/kernel/syscalls/syscall.tbl b/arch/s390/kernel/syscalls/syscall.tbl
index a4569b96ef06..8a6744d658db 100644
--- a/arch/s390/kernel/syscalls/syscall.tbl
+++ b/arch/s390/kernel/syscalls/syscall.tbl
@@ -470,3 +470,5 @@
 465  common	listxattrat		sys_listxattrat			sys_listxattrat
 466  common	removexattrat		sys_removexattrat		sys_removexattrat
 467  common	open_tree_attr		sys_open_tree_attr		sys_open_tree_attr
+468  common	file_getattr		sys_file_getattr		sys_file_getattr
+469  common	file_setattr		sys_file_setattr		sys_file_setattr
diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c
index fed17d407a44..63517b85f4c9 100644
--- a/arch/s390/kernel/time.c
+++ b/arch/s390/kernel/time.c
@@ -69,8 +69,6 @@ unsigned char ptff_function_mask[16];
 
 static unsigned long lpar_offset;
 static unsigned long initial_leap_seconds;
-static unsigned long tod_steering_end;
-static long tod_steering_delta;
 
 /*
  * Get time offsets with PTFF
@@ -80,9 +78,7 @@ void __init time_early_init(void)
 	struct ptff_qto qto;
 	struct ptff_qui qui;
 
-	/* Initialize TOD steering parameters */
-	tod_steering_end = tod_clock_base.tod;
-	vdso_k_time_data->arch_data.tod_steering_end = tod_steering_end;
+	vdso_k_time_data->arch_data.tod_delta = tod_clock_base.tod;
 
 	if (!test_facility(28))
 		return;
@@ -226,21 +222,7 @@ void __init read_persistent_wall_and_boot_offset(struct timespec64 *wall_time,
 
 static u64 read_tod_clock(struct clocksource *cs)
 {
-	unsigned long now, adj;
-
-	preempt_disable(); /* protect from changes to steering parameters */
-	now = get_tod_clock();
-	adj = tod_steering_end - now;
-	if (unlikely((s64) adj > 0))
-		/*
-		 * manually steer by 1 cycle every 2^16 cycles. This
-		 * corresponds to shifting the tod delta by 15. 1s is
-		 * therefore steered in ~9h. The adjust will decrease
-		 * over time, until it finally reaches 0.
-		 */
-		now += (tod_steering_delta < 0) ? (adj >> 15) : -(adj >> 15);
-	preempt_enable();
-	return now;
+	return get_tod_clock_monotonic();
 }
 
 static struct clocksource clocksource_tod = {
@@ -369,26 +351,11 @@ static inline int check_sync_clock(void)
  */
 static void clock_sync_global(long delta)
 {
-	unsigned long now, adj;
 	struct ptff_qto qto;
 
 	/* Fixup the monotonic sched clock. */
 	tod_clock_base.eitod += delta;
-	/* Adjust TOD steering parameters. */
-	now = get_tod_clock();
-	adj = tod_steering_end - now;
-	if (unlikely((s64) adj >= 0))
-		/* Calculate how much of the old adjustment is left. */
-		tod_steering_delta = (tod_steering_delta < 0) ?
-			-(adj >> 15) : (adj >> 15);
-	tod_steering_delta += delta;
-	if ((abs(tod_steering_delta) >> 48) != 0)
-		panic("TOD clock sync offset %li is too large to drift\n",
-		      tod_steering_delta);
-	tod_steering_end = now + (abs(tod_steering_delta) << 15);
-	vdso_k_time_data->arch_data.tod_steering_end = tod_steering_end;
-	vdso_k_time_data->arch_data.tod_steering_delta = tod_steering_delta;
-
+	vdso_k_time_data->arch_data.tod_delta = tod_clock_base.tod;
 	/* Update LPAR offset. */
 	if (ptff_query(PTFF_QTO) && ptff(&qto, sizeof(qto), PTFF_QTO) == 0)
 		lpar_offset = qto.tod_epoch_difference;
@@ -430,7 +397,7 @@ struct clock_sync_data {
 /*
  * Server Time Protocol (STP) code.
  */
-static bool stp_online;
+static bool stp_online = true;
 static struct stp_sstpi stp_info;
 static void *stp_page;
 
@@ -456,7 +423,6 @@ static void __init stp_reset(void)
 	if (rc == 0)
 		set_bit(CLOCK_SYNC_HAS_STP, &clock_sync_flags);
 	else if (stp_online) {
-		pr_warn("The real or virtual hardware system does not provide an STP interface\n");
 		free_page((unsigned long) stp_page);
 		stp_page = NULL;
 		stp_online = false;
@@ -580,7 +546,7 @@ static int stp_sync_clock(void *data)
 		atomic_dec(&sync->cpus);
 		/* Wait for in_sync to be set. */
 		while (READ_ONCE(sync->in_sync) == 0)
-			__udelay(1);
+			;
 	}
 	if (sync->in_sync != 1)
 		/* Didn't work. Clear per-cpu in sync bit again. */
@@ -591,81 +557,6 @@ static int stp_sync_clock(void *data)
 	return 0;
 }
 
-static int stp_clear_leap(void)
-{
-	struct __kernel_timex txc;
-	int ret;
-
-	memset(&txc, 0, sizeof(txc));
-
-	ret = do_adjtimex(&txc);
-	if (ret < 0)
-		return ret;
-
-	txc.modes = ADJ_STATUS;
-	txc.status &= ~(STA_INS|STA_DEL);
-	return do_adjtimex(&txc);
-}
-
-static void stp_check_leap(void)
-{
-	struct stp_stzi stzi;
-	struct stp_lsoib *lsoib = &stzi.lsoib;
-	struct __kernel_timex txc;
-	int64_t timediff;
-	int leapdiff, ret;
-
-	if (!stp_info.lu || !check_sync_clock()) {
-		/*
-		 * Either a scheduled leap second was removed by the operator,
-		 * or STP is out of sync. In both cases, clear the leap second
-		 * kernel flags.
-		 */
-		if (stp_clear_leap() < 0)
-			pr_err("failed to clear leap second flags\n");
-		return;
-	}
-
-	if (chsc_stzi(stp_page, &stzi, sizeof(stzi))) {
-		pr_err("stzi failed\n");
-		return;
-	}
-
-	timediff = tod_to_ns(lsoib->nlsout - get_tod_clock()) / NSEC_PER_SEC;
-	leapdiff = lsoib->nlso - lsoib->also;
-
-	if (leapdiff != 1 && leapdiff != -1) {
-		pr_err("Cannot schedule %d leap seconds\n", leapdiff);
-		return;
-	}
-
-	if (timediff < 0) {
-		if (stp_clear_leap() < 0)
-			pr_err("failed to clear leap second flags\n");
-	} else if (timediff < 7200) {
-		memset(&txc, 0, sizeof(txc));
-		ret = do_adjtimex(&txc);
-		if (ret < 0)
-			return;
-
-		txc.modes = ADJ_STATUS;
-		if (leapdiff > 0)
-			txc.status |= STA_INS;
-		else
-			txc.status |= STA_DEL;
-		ret = do_adjtimex(&txc);
-		if (ret < 0)
-			pr_err("failed to set leap second flags\n");
-		/* arm Timer to clear leap second flags */
-		mod_timer(&stp_timer, jiffies + secs_to_jiffies(14400));
-	} else {
-		/* The day the leap second is scheduled for hasn't been reached. Retry
-		 * in one hour.
-		 */
-		mod_timer(&stp_timer, jiffies + secs_to_jiffies(3600));
-	}
-}
-
 /*
  * STP work. Check for the STP state and take over the clock
  * synchronization if the STP clock source is usable.
@@ -707,8 +598,6 @@ static void stp_work_fn(struct work_struct *work)
 		 * Retry after a second.
 		 */
 		mod_timer(&stp_timer, jiffies + msecs_to_jiffies(MSEC_PER_SEC));
-	else if (stp_info.lu)
-		stp_check_leap();
 
 out_unlock:
 	mutex_unlock(&stp_mutex);
diff --git a/arch/s390/kernel/topology.c b/arch/s390/kernel/topology.c
index 3df048e190b1..46569b8e47dd 100644
--- a/arch/s390/kernel/topology.c
+++ b/arch/s390/kernel/topology.c
@@ -531,11 +531,11 @@ static const struct cpumask *cpu_drawer_mask(int cpu)
 }
 
 static struct sched_domain_topology_level s390_topology[] = {
-	{ cpu_thread_mask, cpu_smt_flags, SD_INIT_NAME(SMT) },
-	{ cpu_coregroup_mask, cpu_core_flags, SD_INIT_NAME(MC) },
-	{ cpu_book_mask, SD_INIT_NAME(BOOK) },
-	{ cpu_drawer_mask, SD_INIT_NAME(DRAWER) },
-	{ cpu_cpu_mask, SD_INIT_NAME(PKG) },
+	SDTL_INIT(cpu_thread_mask, cpu_smt_flags, SMT),
+	SDTL_INIT(cpu_coregroup_mask, cpu_core_flags, MC),
+	SDTL_INIT(cpu_book_mask, NULL, BOOK),
+	SDTL_INIT(cpu_drawer_mask, NULL, DRAWER),
+	SDTL_INIT(cpu_cpu_mask, NULL, PKG),
 	{ NULL, },
 };
 
diff --git a/arch/s390/kernel/unwind_bc.c b/arch/s390/kernel/unwind_bc.c
index cd44be2b6ce8..0f88caca4eaf 100644
--- a/arch/s390/kernel/unwind_bc.c
+++ b/arch/s390/kernel/unwind_bc.c
@@ -1,4 +1,6 @@
 /* SPDX-License-Identifier: GPL-2.0 */
+
+#include <linux/export.h>
 #include <linux/sched.h>
 #include <linux/sched/task.h>
 #include <linux/sched/task_stack.h>
diff --git a/arch/s390/kernel/uv.c b/arch/s390/kernel/uv.c
index 4ab0b6b4866e..47f574cd1728 100644
--- a/arch/s390/kernel/uv.c
+++ b/arch/s390/kernel/uv.c
@@ -7,6 +7,7 @@
 #define KMSG_COMPONENT "prot_virt"
 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
 
+#include <linux/export.h>
 #include <linux/kernel.h>
 #include <linux/types.h>
 #include <linux/sizes.h>
@@ -15,6 +16,7 @@
 #include <linux/pagemap.h>
 #include <linux/swap.h>
 #include <linux/pagewalk.h>
+#include <linux/backing-dev.h>
 #include <asm/facility.h>
 #include <asm/sections.h>
 #include <asm/uv.h>
@@ -135,7 +137,7 @@ int uv_destroy_folio(struct folio *folio)
 {
 	int rc;
 
-	/* See gmap_make_secure(): large folios cannot be secure */
+	/* Large folios cannot be secure */
 	if (unlikely(folio_test_large(folio)))
 		return 0;
 
@@ -184,7 +186,7 @@ int uv_convert_from_secure_folio(struct folio *folio)
 {
 	int rc;
 
-	/* See gmap_make_secure(): large folios cannot be secure */
+	/* Large folios cannot be secure */
 	if (unlikely(folio_test_large(folio)))
 		return 0;
 
@@ -324,32 +326,87 @@ static int make_folio_secure(struct mm_struct *mm, struct folio *folio, struct u
 }
 
 /**
- * s390_wiggle_split_folio() - try to drain extra references to a folio and optionally split.
+ * s390_wiggle_split_folio() - try to drain extra references to a folio and
+ *			       split the folio if it is large.
  * @mm:    the mm containing the folio to work on
  * @folio: the folio
- * @split: whether to split a large folio
  *
  * Context: Must be called while holding an extra reference to the folio;
  *          the mm lock should not be held.
- * Return: 0 if the folio was split successfully;
- *         -EAGAIN if the folio was not split successfully but another attempt
- *                 can be made, or if @split was set to false;
- *         -EINVAL in case of other errors. See split_folio().
+ * Return: 0 if the operation was successful;
+ *	   -EAGAIN if splitting the large folio was not successful,
+ *		   but another attempt can be made;
+ *	   -EINVAL in case of other folio splitting errors. See split_folio().
  */
-static int s390_wiggle_split_folio(struct mm_struct *mm, struct folio *folio, bool split)
+static int s390_wiggle_split_folio(struct mm_struct *mm, struct folio *folio)
 {
-	int rc;
+	int rc, tried_splits;
 
 	lockdep_assert_not_held(&mm->mmap_lock);
 	folio_wait_writeback(folio);
 	lru_add_drain_all();
-	if (split) {
+
+	if (!folio_test_large(folio))
+		return 0;
+
+	for (tried_splits = 0; tried_splits < 2; tried_splits++) {
+		struct address_space *mapping;
+		loff_t lstart, lend;
+		struct inode *inode;
+
 		folio_lock(folio);
 		rc = split_folio(folio);
+		if (rc != -EBUSY) {
+			folio_unlock(folio);
+			return rc;
+		}
+
+		/*
+		 * Splitting with -EBUSY can fail for various reasons, but we
+		 * have to handle one case explicitly for now: some mappings
+		 * don't allow for splitting dirty folios; writeback will
+		 * mark them clean again, including marking all page table
+		 * entries mapping the folio read-only, to catch future write
+		 * attempts.
+		 *
+		 * While the system should be writing back dirty folios in the
+		 * background, we obtained this folio by looking up a writable
+		 * page table entry. On these problematic mappings, writable
+		 * page table entries imply dirty folios, preventing the
+		 * split in the first place.
+		 *
+		 * To prevent a livelock when trigger writeback manually and
+		 * letting the caller look up the folio again in the page
+		 * table (turning it dirty), immediately try to split again.
+		 *
+		 * This is only a problem for some mappings (e.g., XFS);
+		 * mappings that do not support writeback (e.g., shmem) do not
+		 * apply.
+		 */
+		if (!folio_test_dirty(folio) || folio_test_anon(folio) ||
+		    !folio->mapping || !mapping_can_writeback(folio->mapping)) {
+			folio_unlock(folio);
+			break;
+		}
+
+		/*
+		 * Ideally, we'd only trigger writeback on this exact folio. But
+		 * there is no easy way to do that, so we'll stabilize the
+		 * mapping while we still hold the folio lock, so we can drop
+		 * the folio lock to trigger writeback on the range currently
+		 * covered by the folio instead.
+		 */
+		mapping = folio->mapping;
+		lstart = folio_pos(folio);
+		lend = lstart + folio_size(folio) - 1;
+		inode = igrab(mapping->host);
 		folio_unlock(folio);
 
-		if (rc != -EBUSY)
-			return rc;
+		if (unlikely(!inode))
+			break;
+
+		filemap_write_and_wait_range(mapping, lstart, lend);
+		iput(mapping->host);
 	}
 	return -EAGAIN;
 }
@@ -393,8 +450,11 @@ int make_hva_secure(struct mm_struct *mm, unsigned long hva, struct uv_cb_header
 	folio_walk_end(&fw, vma);
 	mmap_read_unlock(mm);
 
-	if (rc == -E2BIG || rc == -EBUSY)
-		rc = s390_wiggle_split_folio(mm, folio, rc == -E2BIG);
+	if (rc == -E2BIG || rc == -EBUSY) {
+		rc = s390_wiggle_split_folio(mm, folio);
+		if (!rc)
+			rc = -EAGAIN;
+	}
 	folio_put(folio);
 
 	return rc;
@@ -403,15 +463,15 @@ EXPORT_SYMBOL_GPL(make_hva_secure);
 
 /*
  * To be called with the folio locked or with an extra reference! This will
- * prevent gmap_make_secure from touching the folio concurrently. Having 2
- * parallel arch_make_folio_accessible is fine, as the UV calls will become a
- * no-op if the folio is already exported.
+ * prevent kvm_s390_pv_make_secure() from touching the folio concurrently.
+ * Having 2 parallel arch_make_folio_accessible is fine, as the UV calls will
+ * become a no-op if the folio is already exported.
  */
 int arch_make_folio_accessible(struct folio *folio)
 {
 	int rc = 0;
 
-	/* See gmap_make_secure(): large folios cannot be secure */
+	/* Large folios cannot be secure */
 	if (unlikely(folio_test_large(folio)))
 		return 0;
 
diff --git a/arch/s390/kernel/vmlinux.lds.S b/arch/s390/kernel/vmlinux.lds.S
index ff1ddba96352..1c606dfa595d 100644
--- a/arch/s390/kernel/vmlinux.lds.S
+++ b/arch/s390/kernel/vmlinux.lds.S
@@ -71,6 +71,13 @@ SECTIONS
 	. = ALIGN(PAGE_SIZE);
 	__end_ro_after_init = .;
 
+	. = ALIGN(8);
+	.skey_region_table : {
+		__skey_region_start = .;
+		KEEP(*(.skey_region))
+		__skey_region_end = .;
+	}
+
 	.data.rel.ro : {
 		*(.data.rel.ro .data.rel.ro.*)
 	}
diff --git a/arch/s390/kvm/Makefile b/arch/s390/kvm/Makefile
index f0ffe874adc2..9a723c48b05a 100644
--- a/arch/s390/kvm/Makefile
+++ b/arch/s390/kvm/Makefile
@@ -8,7 +8,7 @@ include $(srctree)/virt/kvm/Makefile.kvm
 ccflags-y := -Ivirt/kvm -Iarch/s390/kvm
 
 kvm-y += kvm-s390.o intercept.o interrupt.o priv.o sigp.o
-kvm-y += diag.o gaccess.o guestdbg.o vsie.o pv.o gmap.o gmap-vsie.o
+kvm-y += diag.o gaccess.o guestdbg.o vsie.o pv.o gmap-vsie.o
 
 kvm-$(CONFIG_VFIO_PCI_ZDEV_KVM) += pci.o
 obj-$(CONFIG_KVM) += kvm.o
diff --git a/arch/s390/kvm/diag.c b/arch/s390/kvm/diag.c
index 74f73141f9b9..53233dec8cad 100644
--- a/arch/s390/kvm/diag.c
+++ b/arch/s390/kvm/diag.c
@@ -11,12 +11,30 @@
 #include <linux/kvm.h>
 #include <linux/kvm_host.h>
 #include <asm/gmap.h>
+#include <asm/gmap_helpers.h>
 #include <asm/virtio-ccw.h>
 #include "kvm-s390.h"
 #include "trace.h"
 #include "trace-s390.h"
 #include "gaccess.h"
 
+static void do_discard_gfn_range(struct kvm_vcpu *vcpu, gfn_t gfn_start, gfn_t gfn_end)
+{
+	struct kvm_memslot_iter iter;
+	struct kvm_memory_slot *slot;
+	struct kvm_memslots *slots;
+	unsigned long start, end;
+
+	slots = kvm_vcpu_memslots(vcpu);
+
+	kvm_for_each_memslot_in_gfn_range(&iter, slots, gfn_start, gfn_end) {
+		slot = iter.slot;
+		start = __gfn_to_hva_memslot(slot, max(gfn_start, slot->base_gfn));
+		end = __gfn_to_hva_memslot(slot, min(gfn_end, slot->base_gfn + slot->npages));
+		gmap_helper_discard(vcpu->kvm->mm, start, end);
+	}
+}
+
 static int diag_release_pages(struct kvm_vcpu *vcpu)
 {
 	unsigned long start, end;
@@ -32,12 +50,13 @@ static int diag_release_pages(struct kvm_vcpu *vcpu)
 
 	VCPU_EVENT(vcpu, 5, "diag release pages %lX %lX", start, end);
 
+	mmap_read_lock(vcpu->kvm->mm);
 	/*
 	 * We checked for start >= end above, so lets check for the
 	 * fast path (no prefix swap page involved)
 	 */
 	if (end <= prefix || start >= prefix + 2 * PAGE_SIZE) {
-		gmap_discard(vcpu->arch.gmap, start, end);
+		do_discard_gfn_range(vcpu, gpa_to_gfn(start), gpa_to_gfn(end));
 	} else {
 		/*
 		 * This is slow path.  gmap_discard will check for start
@@ -45,13 +64,14 @@ static int diag_release_pages(struct kvm_vcpu *vcpu)
 		 * prefix and let gmap_discard make some of these calls
 		 * NOPs.
 		 */
-		gmap_discard(vcpu->arch.gmap, start, prefix);
+		do_discard_gfn_range(vcpu, gpa_to_gfn(start), gpa_to_gfn(prefix));
 		if (start <= prefix)
-			gmap_discard(vcpu->arch.gmap, 0, PAGE_SIZE);
+			do_discard_gfn_range(vcpu, 0, 1);
 		if (end > prefix + PAGE_SIZE)
-			gmap_discard(vcpu->arch.gmap, PAGE_SIZE, 2 * PAGE_SIZE);
-		gmap_discard(vcpu->arch.gmap, prefix + 2 * PAGE_SIZE, end);
+			do_discard_gfn_range(vcpu, 1, 2);
+		do_discard_gfn_range(vcpu, gpa_to_gfn(prefix) + 2, gpa_to_gfn(end));
 	}
+	mmap_read_unlock(vcpu->kvm->mm);
 	return 0;
 }
 
diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c
index f6fded15633a..21c2e61fece4 100644
--- a/arch/s390/kvm/gaccess.c
+++ b/arch/s390/kvm/gaccess.c
@@ -16,9 +16,10 @@
 #include <asm/gmap.h>
 #include <asm/dat-bits.h>
 #include "kvm-s390.h"
-#include "gmap.h"
 #include "gaccess.h"
 
+#define GMAP_SHADOW_FAKE_TABLE 1ULL
+
 /*
  * vaddress union in order to easily decode a virtual address into its
  * region first index, region second index etc. parts.
@@ -318,7 +319,7 @@ enum prot_type {
 	PROT_TYPE_DAT  = 3,
 	PROT_TYPE_IEP  = 4,
 	/* Dummy value for passing an initialized value when code != PGM_PROTECTION */
-	PROT_NONE,
+	PROT_TYPE_DUMMY,
 };
 
 static int trans_exc_ending(struct kvm_vcpu *vcpu, int code, unsigned long gva, u8 ar,
@@ -334,7 +335,7 @@ static int trans_exc_ending(struct kvm_vcpu *vcpu, int code, unsigned long gva,
 	switch (code) {
 	case PGM_PROTECTION:
 		switch (prot) {
-		case PROT_NONE:
+		case PROT_TYPE_DUMMY:
 			/* We should never get here, acts like termination */
 			WARN_ON_ONCE(1);
 			break;
@@ -804,7 +805,7 @@ static int guest_range_to_gpas(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar,
 			gpa = kvm_s390_real_to_abs(vcpu, ga);
 			if (!kvm_is_gpa_in_memslot(vcpu->kvm, gpa)) {
 				rc = PGM_ADDRESSING;
-				prot = PROT_NONE;
+				prot = PROT_TYPE_DUMMY;
 			}
 		}
 		if (rc)
@@ -962,7 +963,7 @@ int access_guest_with_key(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar,
 		if (rc == PGM_PROTECTION)
 			prot = PROT_TYPE_KEYC;
 		else
-			prot = PROT_NONE;
+			prot = PROT_TYPE_DUMMY;
 		rc = trans_exc_ending(vcpu, rc, ga, ar, mode, prot, terminate);
 	}
 out_unlock:
diff --git a/arch/s390/kvm/gmap-vsie.c b/arch/s390/kvm/gmap-vsie.c
index a6d1dbb04c97..56ef153eb8fe 100644
--- a/arch/s390/kvm/gmap-vsie.c
+++ b/arch/s390/kvm/gmap-vsie.c
@@ -22,7 +22,6 @@
 #include <asm/uv.h>
 
 #include "kvm-s390.h"
-#include "gmap.h"
 
 /**
  * gmap_find_shadow - find a specific asce in the list of shadow tables
diff --git a/arch/s390/kvm/gmap.c b/arch/s390/kvm/gmap.c
deleted file mode 100644
index 6d8944d1b4a0..000000000000
--- a/arch/s390/kvm/gmap.c
+++ /dev/null
@@ -1,121 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Guest memory management for KVM/s390
- *
- * Copyright IBM Corp. 2008, 2020, 2024
- *
- *    Author(s): Claudio Imbrenda <imbrenda@linux.ibm.com>
- *               Martin Schwidefsky <schwidefsky@de.ibm.com>
- *               David Hildenbrand <david@redhat.com>
- *               Janosch Frank <frankja@linux.vnet.ibm.com>
- */
-
-#include <linux/compiler.h>
-#include <linux/kvm.h>
-#include <linux/kvm_host.h>
-#include <linux/pgtable.h>
-#include <linux/pagemap.h>
-
-#include <asm/lowcore.h>
-#include <asm/gmap.h>
-#include <asm/uv.h>
-
-#include "gmap.h"
-
-/**
- * gmap_make_secure() - make one guest page secure
- * @gmap: the guest gmap
- * @gaddr: the guest address that needs to be made secure
- * @uvcb: the UVCB specifying which operation needs to be performed
- *
- * Context: needs to be called with kvm->srcu held.
- * Return: 0 on success, < 0 in case of error.
- */
-int gmap_make_secure(struct gmap *gmap, unsigned long gaddr, void *uvcb)
-{
-	struct kvm *kvm = gmap->private;
-	unsigned long vmaddr;
-
-	lockdep_assert_held(&kvm->srcu);
-
-	vmaddr = gfn_to_hva(kvm, gpa_to_gfn(gaddr));
-	if (kvm_is_error_hva(vmaddr))
-		return -EFAULT;
-	return make_hva_secure(gmap->mm, vmaddr, uvcb);
-}
-
-int gmap_convert_to_secure(struct gmap *gmap, unsigned long gaddr)
-{
-	struct uv_cb_cts uvcb = {
-		.header.cmd = UVC_CMD_CONV_TO_SEC_STOR,
-		.header.len = sizeof(uvcb),
-		.guest_handle = gmap->guest_handle,
-		.gaddr = gaddr,
-	};
-
-	return gmap_make_secure(gmap, gaddr, &uvcb);
-}
-
-/**
- * __gmap_destroy_page() - Destroy a guest page.
- * @gmap: the gmap of the guest
- * @page: the page to destroy
- *
- * An attempt will be made to destroy the given guest page. If the attempt
- * fails, an attempt is made to export the page. If both attempts fail, an
- * appropriate error is returned.
- *
- * Context: must be called holding the mm lock for gmap->mm
- */
-static int __gmap_destroy_page(struct gmap *gmap, struct page *page)
-{
-	struct folio *folio = page_folio(page);
-	int rc;
-
-	/*
-	 * See gmap_make_secure(): large folios cannot be secure. Small
-	 * folio implies FW_LEVEL_PTE.
-	 */
-	if (folio_test_large(folio))
-		return -EFAULT;
-
-	rc = uv_destroy_folio(folio);
-	/*
-	 * Fault handlers can race; it is possible that two CPUs will fault
-	 * on the same secure page. One CPU can destroy the page, reboot,
-	 * re-enter secure mode and import it, while the second CPU was
-	 * stuck at the beginning of the handler. At some point the second
-	 * CPU will be able to progress, and it will not be able to destroy
-	 * the page. In that case we do not want to terminate the process,
-	 * we instead try to export the page.
-	 */
-	if (rc)
-		rc = uv_convert_from_secure_folio(folio);
-
-	return rc;
-}
-
-/**
- * gmap_destroy_page() - Destroy a guest page.
- * @gmap: the gmap of the guest
- * @gaddr: the guest address to destroy
- *
- * An attempt will be made to destroy the given guest page. If the attempt
- * fails, an attempt is made to export the page. If both attempts fail, an
- * appropriate error is returned.
- *
- * Context: may sleep.
- */
-int gmap_destroy_page(struct gmap *gmap, unsigned long gaddr)
-{
-	struct page *page;
-	int rc = 0;
-
-	mmap_read_lock(gmap->mm);
-	page = gfn_to_page(gmap->private, gpa_to_gfn(gaddr));
-	if (page)
-		rc = __gmap_destroy_page(gmap, page);
-	kvm_release_page_clean(page);
-	mmap_read_unlock(gmap->mm);
-	return rc;
-}
diff --git a/arch/s390/kvm/gmap.h b/arch/s390/kvm/gmap.h
deleted file mode 100644
index c8f031c9ea5f..000000000000
--- a/arch/s390/kvm/gmap.h
+++ /dev/null
@@ -1,39 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- *  KVM guest address space mapping code
- *
- *    Copyright IBM Corp. 2007, 2016, 2025
- *    Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
- *               Claudio Imbrenda <imbrenda@linux.ibm.com>
- */
-
-#ifndef ARCH_KVM_S390_GMAP_H
-#define ARCH_KVM_S390_GMAP_H
-
-#define GMAP_SHADOW_FAKE_TABLE 1ULL
-
-int gmap_make_secure(struct gmap *gmap, unsigned long gaddr, void *uvcb);
-int gmap_convert_to_secure(struct gmap *gmap, unsigned long gaddr);
-int gmap_destroy_page(struct gmap *gmap, unsigned long gaddr);
-struct gmap *gmap_shadow(struct gmap *parent, unsigned long asce, int edat_level);
-
-/**
- * gmap_shadow_valid - check if a shadow guest address space matches the
- *                     given properties and is still valid
- * @sg: pointer to the shadow guest address space structure
- * @asce: ASCE for which the shadow table is requested
- * @edat_level: edat level to be used for the shadow translation
- *
- * Returns 1 if the gmap shadow is still valid and matches the given
- * properties, the caller can continue using it. Returns 0 otherwise, the
- * caller has to request a new shadow gmap in this case.
- *
- */
-static inline int gmap_shadow_valid(struct gmap *sg, unsigned long asce, int edat_level)
-{
-	if (sg->removed)
-		return 0;
-	return sg->orig_asce == asce && sg->edat_level == edat_level;
-}
-
-#endif
diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c
index a06a000f196c..c7908950c1f4 100644
--- a/arch/s390/kvm/intercept.c
+++ b/arch/s390/kvm/intercept.c
@@ -21,7 +21,6 @@
 #include "gaccess.h"
 #include "trace.h"
 #include "trace-s390.h"
-#include "gmap.h"
 
 u8 kvm_s390_get_ilen(struct kvm_vcpu *vcpu)
 {
@@ -545,7 +544,7 @@ static int handle_pv_uvc(struct kvm_vcpu *vcpu)
 			  guest_uvcb->header.cmd);
 		return 0;
 	}
-	rc = gmap_make_secure(vcpu->arch.gmap, uvcb.gaddr, &uvcb);
+	rc = kvm_s390_pv_make_secure(vcpu->kvm, uvcb.gaddr, &uvcb);
 	/*
 	 * If the unpin did not succeed, the guest will exit again for the UVC
 	 * and we will retry the unpin.
@@ -653,10 +652,8 @@ int kvm_handle_sie_intercept(struct kvm_vcpu *vcpu)
 		break;
 	case ICPT_PV_PREF:
 		rc = 0;
-		gmap_convert_to_secure(vcpu->arch.gmap,
-				       kvm_s390_get_prefix(vcpu));
-		gmap_convert_to_secure(vcpu->arch.gmap,
-				       kvm_s390_get_prefix(vcpu) + PAGE_SIZE);
+		kvm_s390_pv_convert_to_secure(vcpu->kvm, kvm_s390_get_prefix(vcpu));
+		kvm_s390_pv_convert_to_secure(vcpu->kvm, kvm_s390_get_prefix(vcpu) + PAGE_SIZE);
 		break;
 	default:
 		return -EOPNOTSUPP;
diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index 60c360c18690..2a92a8b9e4c2 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -14,6 +14,7 @@
 #include <linux/interrupt.h>
 #include <linux/kvm_host.h>
 #include <linux/hrtimer.h>
+#include <linux/export.h>
 #include <linux/mmu_context.h>
 #include <linux/nospec.h>
 #include <linux/signal.h>
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 3f3175193fd7..bf6fa8b9ca73 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -14,6 +14,7 @@
 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
 
 #include <linux/compiler.h>
+#include <linux/export.h>
 #include <linux/err.h>
 #include <linux/fs.h>
 #include <linux/hrtimer.h>
@@ -40,6 +41,7 @@
 #include <asm/machine.h>
 #include <asm/stp.h>
 #include <asm/gmap.h>
+#include <asm/gmap_helpers.h>
 #include <asm/nmi.h>
 #include <asm/isc.h>
 #include <asm/sclp.h>
@@ -52,7 +54,6 @@
 #include "kvm-s390.h"
 #include "gaccess.h"
 #include "pci.h"
-#include "gmap.h"
 
 #define CREATE_TRACE_POINTS
 #include "trace.h"
@@ -2674,7 +2675,9 @@ static int kvm_s390_handle_pv(struct kvm *kvm, struct kvm_pv_cmd *cmd)
 		if (r)
 			break;
 
-		r = s390_disable_cow_sharing();
+		mmap_write_lock(kvm->mm);
+		r = gmap_helper_disable_cow_sharing();
+		mmap_write_unlock(kvm->mm);
 		if (r)
 			break;
 
@@ -4973,7 +4976,7 @@ static int vcpu_post_run_handle_fault(struct kvm_vcpu *vcpu)
 		 * previous protected guest. The old pages need to be destroyed
 		 * so the new guest can use them.
 		 */
-		if (gmap_destroy_page(vcpu->arch.gmap, gaddr)) {
+		if (kvm_s390_pv_destroy_page(vcpu->kvm, gaddr)) {
 			/*
 			 * Either KVM messed up the secure guest mapping or the
 			 * same page is mapped into multiple secure guests.
@@ -4995,7 +4998,7 @@ static int vcpu_post_run_handle_fault(struct kvm_vcpu *vcpu)
 		 * guest has not been imported yet. Try to import the page into
 		 * the protected guest.
 		 */
-		rc = gmap_convert_to_secure(vcpu->arch.gmap, gaddr);
+		rc = kvm_s390_pv_convert_to_secure(vcpu->kvm, gaddr);
 		if (rc == -EINVAL)
 			send_sig(SIGSEGV, current, 0);
 		if (rc != -ENXIO)
@@ -5060,6 +5063,30 @@ static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
 	return vcpu_post_run_handle_fault(vcpu);
 }
 
+int noinstr kvm_s390_enter_exit_sie(struct kvm_s390_sie_block *scb,
+				    u64 *gprs, unsigned long gasce)
+{
+	int ret;
+
+	guest_state_enter_irqoff();
+
+	/*
+	 * The guest_state_{enter,exit}_irqoff() functions inform lockdep and
+	 * tracing that entry to the guest will enable host IRQs, and exit from
+	 * the guest will disable host IRQs.
+	 *
+	 * We must not use lockdep/tracing/RCU in this critical section, so we
+	 * use the low-level arch_local_irq_*() helpers to enable/disable IRQs.
+	 */
+	arch_local_irq_enable();
+	ret = sie64a(scb, gprs, gasce);
+	arch_local_irq_disable();
+
+	guest_state_exit_irqoff();
+
+	return ret;
+}
+
 #define PSW_INT_MASK (PSW_MASK_EXT | PSW_MASK_IO | PSW_MASK_MCHECK)
 static int __vcpu_run(struct kvm_vcpu *vcpu)
 {
@@ -5080,20 +5107,27 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
 		kvm_vcpu_srcu_read_unlock(vcpu);
 		/*
 		 * As PF_VCPU will be used in fault handler, between
-		 * guest_enter and guest_exit should be no uaccess.
+		 * guest_timing_enter_irqoff and guest_timing_exit_irqoff
+		 * should be no uaccess.
 		 */
-		local_irq_disable();
-		guest_enter_irqoff();
-		__disable_cpu_timer_accounting(vcpu);
-		local_irq_enable();
 		if (kvm_s390_pv_cpu_is_protected(vcpu)) {
 			memcpy(sie_page->pv_grregs,
 			       vcpu->run->s.regs.gprs,
 			       sizeof(sie_page->pv_grregs));
 		}
-		exit_reason = sie64a(vcpu->arch.sie_block,
-				     vcpu->run->s.regs.gprs,
-				     vcpu->arch.gmap->asce);
+
+		local_irq_disable();
+		guest_timing_enter_irqoff();
+		__disable_cpu_timer_accounting(vcpu);
+
+		exit_reason = kvm_s390_enter_exit_sie(vcpu->arch.sie_block,
+						      vcpu->run->s.regs.gprs,
+						      vcpu->arch.gmap->asce);
+
+		__enable_cpu_timer_accounting(vcpu);
+		guest_timing_exit_irqoff();
+		local_irq_enable();
+
 		if (kvm_s390_pv_cpu_is_protected(vcpu)) {
 			memcpy(vcpu->run->s.regs.gprs,
 			       sie_page->pv_grregs,
@@ -5109,10 +5143,6 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
 				vcpu->arch.sie_block->gpsw.mask &= ~PSW_INT_MASK;
 			}
 		}
-		local_irq_disable();
-		__enable_cpu_timer_accounting(vcpu);
-		guest_exit_irqoff();
-		local_irq_enable();
 		kvm_vcpu_srcu_read_lock(vcpu);
 
 		rc = vcpu_post_run(vcpu, exit_reason);
diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h
index 8d3bbb2dd8d2..c44fe0c3a097 100644
--- a/arch/s390/kvm/kvm-s390.h
+++ b/arch/s390/kvm/kvm-s390.h
@@ -308,6 +308,9 @@ int kvm_s390_pv_dump_stor_state(struct kvm *kvm, void __user *buff_user,
 				u64 *gaddr, u64 buff_user_len, u16 *rc, u16 *rrc);
 int kvm_s390_pv_dump_complete(struct kvm *kvm, void __user *buff_user,
 			      u16 *rc, u16 *rrc);
+int kvm_s390_pv_destroy_page(struct kvm *kvm, unsigned long gaddr);
+int kvm_s390_pv_convert_to_secure(struct kvm *kvm, unsigned long gaddr);
+int kvm_s390_pv_make_secure(struct kvm *kvm, unsigned long gaddr, void *uvcb);
 
 static inline u64 kvm_s390_pv_get_handle(struct kvm *kvm)
 {
@@ -319,6 +322,41 @@ static inline u64 kvm_s390_pv_cpu_get_handle(struct kvm_vcpu *vcpu)
 	return vcpu->arch.pv.handle;
 }
 
+/**
+ * __kvm_s390_pv_destroy_page() - Destroy a guest page.
+ * @page: the page to destroy
+ *
+ * An attempt will be made to destroy the given guest page. If the attempt
+ * fails, an attempt is made to export the page. If both attempts fail, an
+ * appropriate error is returned.
+ *
+ * Context: must be called holding the mm lock for gmap->mm
+ */
+static inline int __kvm_s390_pv_destroy_page(struct page *page)
+{
+	struct folio *folio = page_folio(page);
+	int rc;
+
+	/* Large folios cannot be secure. Small folio implies FW_LEVEL_PTE. */
+	if (folio_test_large(folio))
+		return -EFAULT;
+
+	rc = uv_destroy_folio(folio);
+	/*
+	 * Fault handlers can race; it is possible that two CPUs will fault
+	 * on the same secure page. One CPU can destroy the page, reboot,
+	 * re-enter secure mode and import it, while the second CPU was
+	 * stuck at the beginning of the handler. At some point the second
+	 * CPU will be able to progress, and it will not be able to destroy
+	 * the page. In that case we do not want to terminate the process,
+	 * we instead try to export the page.
+	 */
+	if (rc)
+		rc = uv_convert_from_secure_folio(folio);
+
+	return rc;
+}
+
 /* implemented in interrupt.c */
 int kvm_s390_handle_wait(struct kvm_vcpu *vcpu);
 void kvm_s390_vcpu_wakeup(struct kvm_vcpu *vcpu);
@@ -398,6 +436,10 @@ void kvm_s390_vsie_gmap_notifier(struct gmap *gmap, unsigned long start,
 				 unsigned long end);
 void kvm_s390_vsie_init(struct kvm *kvm);
 void kvm_s390_vsie_destroy(struct kvm *kvm);
+int gmap_shadow_valid(struct gmap *sg, unsigned long asce, int edat_level);
+
+/* implemented in gmap-vsie.c */
+struct gmap *gmap_shadow(struct gmap *parent, unsigned long asce, int edat_level);
 
 /* implemented in sigp.c */
 int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu);
diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c
index 1a49b89706f8..9253c70897a8 100644
--- a/arch/s390/kvm/priv.c
+++ b/arch/s390/kvm/priv.c
@@ -1248,6 +1248,8 @@ static inline int __do_essa(struct kvm_vcpu *vcpu, const int orc)
 
 static int handle_essa(struct kvm_vcpu *vcpu)
 {
+	lockdep_assert_held(&vcpu->kvm->srcu);
+
 	/* entries expected to be 1FF */
 	int entries = (vcpu->arch.sie_block->cbrlo & ~PAGE_MASK) >> 3;
 	unsigned long *cbrlo;
@@ -1297,12 +1299,8 @@ static int handle_essa(struct kvm_vcpu *vcpu)
 		/* Retry the ESSA instruction */
 		kvm_s390_retry_instr(vcpu);
 	} else {
-		int srcu_idx;
-
 		mmap_read_lock(vcpu->kvm->mm);
-		srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
 		i = __do_essa(vcpu, orc);
-		srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
 		mmap_read_unlock(vcpu->kvm->mm);
 		if (i < 0)
 			return i;
diff --git a/arch/s390/kvm/pv.c b/arch/s390/kvm/pv.c
index 22c012aa5206..25ede8354514 100644
--- a/arch/s390/kvm/pv.c
+++ b/arch/s390/kvm/pv.c
@@ -5,6 +5,8 @@
  * Copyright IBM Corp. 2019, 2020
  *    Author(s): Janosch Frank <frankja@linux.ibm.com>
  */
+
+#include <linux/export.h>
 #include <linux/kvm.h>
 #include <linux/kvm_host.h>
 #include <linux/minmax.h>
@@ -17,7 +19,6 @@
 #include <linux/sched/mm.h>
 #include <linux/mmu_notifier.h>
 #include "kvm-s390.h"
-#include "gmap.h"
 
 bool kvm_s390_pv_is_protected(struct kvm *kvm)
 {
@@ -34,6 +35,64 @@ bool kvm_s390_pv_cpu_is_protected(struct kvm_vcpu *vcpu)
 EXPORT_SYMBOL_GPL(kvm_s390_pv_cpu_is_protected);
 
 /**
+ * kvm_s390_pv_make_secure() - make one guest page secure
+ * @kvm: the guest
+ * @gaddr: the guest address that needs to be made secure
+ * @uvcb: the UVCB specifying which operation needs to be performed
+ *
+ * Context: needs to be called with kvm->srcu held.
+ * Return: 0 on success, < 0 in case of error.
+ */
+int kvm_s390_pv_make_secure(struct kvm *kvm, unsigned long gaddr, void *uvcb)
+{
+	unsigned long vmaddr;
+
+	lockdep_assert_held(&kvm->srcu);
+
+	vmaddr = gfn_to_hva(kvm, gpa_to_gfn(gaddr));
+	if (kvm_is_error_hva(vmaddr))
+		return -EFAULT;
+	return make_hva_secure(kvm->mm, vmaddr, uvcb);
+}
+
+int kvm_s390_pv_convert_to_secure(struct kvm *kvm, unsigned long gaddr)
+{
+	struct uv_cb_cts uvcb = {
+		.header.cmd = UVC_CMD_CONV_TO_SEC_STOR,
+		.header.len = sizeof(uvcb),
+		.guest_handle = kvm_s390_pv_get_handle(kvm),
+		.gaddr = gaddr,
+	};
+
+	return kvm_s390_pv_make_secure(kvm, gaddr, &uvcb);
+}
+
+/**
+ * kvm_s390_pv_destroy_page() - Destroy a guest page.
+ * @kvm: the guest
+ * @gaddr: the guest address to destroy
+ *
+ * An attempt will be made to destroy the given guest page. If the attempt
+ * fails, an attempt is made to export the page. If both attempts fail, an
+ * appropriate error is returned.
+ *
+ * Context: may sleep.
+ */
+int kvm_s390_pv_destroy_page(struct kvm *kvm, unsigned long gaddr)
+{
+	struct page *page;
+	int rc = 0;
+
+	mmap_read_lock(kvm->mm);
+	page = gfn_to_page(kvm, gpa_to_gfn(gaddr));
+	if (page)
+		rc = __kvm_s390_pv_destroy_page(page);
+	kvm_release_page_clean(page);
+	mmap_read_unlock(kvm->mm);
+	return rc;
+}
+
+/**
  * struct pv_vm_to_be_destroyed - Represents a protected VM that needs to
  * be destroyed
  *
@@ -638,7 +697,7 @@ static int unpack_one(struct kvm *kvm, unsigned long addr, u64 tweak,
 		.tweak[0] = tweak,
 		.tweak[1] = offset,
 	};
-	int ret = gmap_make_secure(kvm->arch.gmap, addr, &uvcb);
+	int ret = kvm_s390_pv_make_secure(kvm, addr, &uvcb);
 	unsigned long vmaddr;
 	bool unlocked;
 
diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c
index a78df3a4f353..347268f89f2f 100644
--- a/arch/s390/kvm/vsie.c
+++ b/arch/s390/kvm/vsie.c
@@ -23,7 +23,6 @@
 #include <asm/facility.h>
 #include "kvm-s390.h"
 #include "gaccess.h"
-#include "gmap.h"
 
 enum vsie_page_flags {
 	VSIE_PAGE_IN_USE = 0,
@@ -68,6 +67,24 @@ struct vsie_page {
 	__u8 fac[S390_ARCH_FAC_LIST_SIZE_BYTE];	/* 0x0800 */
 };
 
+/**
+ * gmap_shadow_valid() - check if a shadow guest address space matches the
+ *                       given properties and is still valid
+ * @sg: pointer to the shadow guest address space structure
+ * @asce: ASCE for which the shadow table is requested
+ * @edat_level: edat level to be used for the shadow translation
+ *
+ * Returns 1 if the gmap shadow is still valid and matches the given
+ * properties, the caller can continue using it. Returns 0 otherwise; the
+ * caller has to request a new shadow gmap in this case.
+ */
+int gmap_shadow_valid(struct gmap *sg, unsigned long asce, int edat_level)
+{
+	if (sg->removed)
+		return 0;
+	return sg->orig_asce == asce && sg->edat_level == edat_level;
+}
+
 /* trigger a validity icpt for the given scb */
 static int set_validity_icpt(struct kvm_s390_sie_block *scb,
 			     __u16 reason_code)
@@ -1153,10 +1170,6 @@ static int do_vsie_run(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
 	    vcpu->arch.sie_block->fpf & FPF_BPBC)
 		set_thread_flag(TIF_ISOLATE_BP_GUEST);
 
-	local_irq_disable();
-	guest_enter_irqoff();
-	local_irq_enable();
-
 	/*
 	 * Simulate a SIE entry of the VCPU (see sie64a), so VCPU blocking
 	 * and VCPU requests also hinder the vSIE from running and lead
@@ -1166,15 +1179,16 @@ static int do_vsie_run(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
 	vcpu->arch.sie_block->prog0c |= PROG_IN_SIE;
 	current->thread.gmap_int_code = 0;
 	barrier();
-	if (!kvm_s390_vcpu_sie_inhibited(vcpu))
-		rc = sie64a(scb_s, vcpu->run->s.regs.gprs, vsie_page->gmap->asce);
+	if (!kvm_s390_vcpu_sie_inhibited(vcpu)) {
+		local_irq_disable();
+		guest_timing_enter_irqoff();
+		rc = kvm_s390_enter_exit_sie(scb_s, vcpu->run->s.regs.gprs, vsie_page->gmap->asce);
+		guest_timing_exit_irqoff();
+		local_irq_enable();
+	}
 	barrier();
 	vcpu->arch.sie_block->prog0c &= ~PROG_IN_SIE;
 
-	local_irq_disable();
-	guest_exit_irqoff();
-	local_irq_enable();
-
 	/* restore guest state for bp isolation override */
 	if (!guest_bp_isolation)
 		clear_thread_flag(TIF_ISOLATE_BP_GUEST);
diff --git a/arch/s390/lib/Makefile b/arch/s390/lib/Makefile
index cd35cdbfa871..f43f897d3fc0 100644
--- a/arch/s390/lib/Makefile
+++ b/arch/s390/lib/Makefile
@@ -3,7 +3,6 @@
 # Makefile for s390-specific library files..
 #
 
-obj-y += crypto/
 lib-y += delay.o string.o uaccess.o find.o spinlock.o tishift.o
 lib-y += csum-partial.o
 obj-y += mem.o xor.o
@@ -25,6 +24,3 @@ obj-$(CONFIG_S390_MODULES_SANITY_TEST_HELPERS) += test_modules_helpers.o
 lib-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o
 
 obj-$(CONFIG_EXPOLINE_EXTERN) += expoline.o
-
-obj-$(CONFIG_CRC32_ARCH) += crc32-s390.o
-crc32-s390-y := crc32.o crc32le-vx.o crc32be-vx.o
diff --git a/arch/s390/lib/crc32-vx.h b/arch/s390/lib/crc32-vx.h
deleted file mode 100644
index 652c96e1a822..000000000000
--- a/arch/s390/lib/crc32-vx.h
+++ /dev/null
@@ -1,12 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-
-#ifndef _CRC32_VX_S390_H
-#define _CRC32_VX_S390_H
-
-#include <linux/types.h>
-
-u32 crc32_be_vgfm_16(u32 crc, unsigned char const *buf, size_t size);
-u32 crc32_le_vgfm_16(u32 crc, unsigned char const *buf, size_t size);
-u32 crc32c_le_vgfm_16(u32 crc, unsigned char const *buf, size_t size);
-
-#endif /* _CRC32_VX_S390_H */
diff --git a/arch/s390/lib/crc32.c b/arch/s390/lib/crc32.c
deleted file mode 100644
index 3c4b344417c1..000000000000
--- a/arch/s390/lib/crc32.c
+++ /dev/null
@@ -1,77 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * CRC-32 implemented with the z/Architecture Vector Extension Facility.
- *
- * Copyright IBM Corp. 2015
- * Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
- */
-#define KMSG_COMPONENT	"crc32-vx"
-#define pr_fmt(fmt)	KMSG_COMPONENT ": " fmt
-
-#include <linux/module.h>
-#include <linux/cpufeature.h>
-#include <linux/crc32.h>
-#include <asm/fpu.h>
-#include "crc32-vx.h"
-
-#define VX_MIN_LEN		64
-#define VX_ALIGNMENT		16L
-#define VX_ALIGN_MASK		(VX_ALIGNMENT - 1)
-
-/*
- * DEFINE_CRC32_VX() - Define a CRC-32 function using the vector extension
- *
- * Creates a function to perform a particular CRC-32 computation. Depending
- * on the message buffer, the hardware-accelerated or software implementation
- * is used.   Note that the message buffer is aligned to improve fetch
- * operations of VECTOR LOAD MULTIPLE instructions.
- */
-#define DEFINE_CRC32_VX(___fname, ___crc32_vx, ___crc32_sw)		    \
-	u32 ___fname(u32 crc, const u8 *data, size_t datalen)		    \
-	{								    \
-		unsigned long prealign, aligned, remaining;		    \
-		DECLARE_KERNEL_FPU_ONSTACK16(vxstate);			    \
-									    \
-		if (datalen < VX_MIN_LEN + VX_ALIGN_MASK || !cpu_has_vx())  \
-			return ___crc32_sw(crc, data, datalen);		    \
-									    \
-		if ((unsigned long)data & VX_ALIGN_MASK) {		    \
-			prealign = VX_ALIGNMENT -			    \
-				  ((unsigned long)data & VX_ALIGN_MASK);    \
-			datalen -= prealign;				    \
-			crc = ___crc32_sw(crc, data, prealign);		    \
-			data = (void *)((unsigned long)data + prealign);    \
-		}							    \
-									    \
-		aligned = datalen & ~VX_ALIGN_MASK;			    \
-		remaining = datalen & VX_ALIGN_MASK;			    \
-									    \
-		kernel_fpu_begin(&vxstate, KERNEL_VXR_LOW);		    \
-		crc = ___crc32_vx(crc, data, aligned);			    \
-		kernel_fpu_end(&vxstate, KERNEL_VXR_LOW);		    \
-									    \
-		if (remaining)						    \
-			crc = ___crc32_sw(crc, data + aligned, remaining);  \
-									    \
-		return crc;						    \
-	}								    \
-	EXPORT_SYMBOL(___fname);
-
-DEFINE_CRC32_VX(crc32_le_arch, crc32_le_vgfm_16, crc32_le_base)
-DEFINE_CRC32_VX(crc32_be_arch, crc32_be_vgfm_16, crc32_be_base)
-DEFINE_CRC32_VX(crc32c_arch, crc32c_le_vgfm_16, crc32c_base)
-
-u32 crc32_optimizations(void)
-{
-	if (cpu_has_vx()) {
-		return CRC32_LE_OPTIMIZATION |
-		       CRC32_BE_OPTIMIZATION |
-		       CRC32C_OPTIMIZATION;
-	}
-	return 0;
-}
-EXPORT_SYMBOL(crc32_optimizations);
-
-MODULE_AUTHOR("Hendrik Brueckner <brueckner@linux.vnet.ibm.com>");
-MODULE_DESCRIPTION("CRC-32 algorithms using z/Architecture Vector Extension Facility");
-MODULE_LICENSE("GPL");
diff --git a/arch/s390/lib/crc32be-vx.c b/arch/s390/lib/crc32be-vx.c
deleted file mode 100644
index fed7c9c70d05..000000000000
--- a/arch/s390/lib/crc32be-vx.c
+++ /dev/null
@@ -1,174 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Hardware-accelerated CRC-32 variants for Linux on z Systems
- *
- * Use the z/Architecture Vector Extension Facility to accelerate the
- * computing of CRC-32 checksums.
- *
- * This CRC-32 implementation algorithm processes the most-significant
- * bit first (BE).
- *
- * Copyright IBM Corp. 2015
- * Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
- */
-
-#include <linux/types.h>
-#include <asm/fpu.h>
-#include "crc32-vx.h"
-
-/* Vector register range containing CRC-32 constants */
-#define CONST_R1R2		9
-#define CONST_R3R4		10
-#define CONST_R5		11
-#define CONST_R6		12
-#define CONST_RU_POLY		13
-#define CONST_CRC_POLY		14
-
-/*
- * The CRC-32 constant block contains reduction constants to fold and
- * process particular chunks of the input data stream in parallel.
- *
- * For the CRC-32 variants, the constants are precomputed according to
- * these definitions:
- *
- *	R1 = x4*128+64 mod P(x)
- *	R2 = x4*128    mod P(x)
- *	R3 = x128+64   mod P(x)
- *	R4 = x128      mod P(x)
- *	R5 = x96       mod P(x)
- *	R6 = x64       mod P(x)
- *
- *	Barret reduction constant, u, is defined as floor(x**64 / P(x)).
- *
- *	where P(x) is the polynomial in the normal domain and the P'(x) is the
- *	polynomial in the reversed (bitreflected) domain.
- *
- * Note that the constant definitions below are extended in order to compute
- * intermediate results with a single VECTOR GALOIS FIELD MULTIPLY instruction.
- * The rightmost doubleword can be 0 to prevent contribution to the result or
- * can be multiplied by 1 to perform an XOR without the need for a separate
- * VECTOR EXCLUSIVE OR instruction.
- *
- * CRC-32 (IEEE 802.3 Ethernet, ...) polynomials:
- *
- *	P(x)  = 0x04C11DB7
- *	P'(x) = 0xEDB88320
- */
-
-static unsigned long constants_CRC_32_BE[] = {
-	0x08833794c, 0x0e6228b11,	/* R1, R2 */
-	0x0c5b9cd4c, 0x0e8a45605,	/* R3, R4 */
-	0x0f200aa66, 1UL << 32,		/* R5, x32 */
-	0x0490d678d, 1,			/* R6, 1 */
-	0x104d101df, 0,			/* u */
-	0x104C11DB7, 0,			/* P(x) */
-};
-
-/**
- * crc32_be_vgfm_16 - Compute CRC-32 (BE variant) with vector registers
- * @crc: Initial CRC value, typically ~0.
- * @buf: Input buffer pointer, performance might be improved if the
- *	  buffer is on a doubleword boundary.
- * @size: Size of the buffer, must be 64 bytes or greater.
- *
- * Register usage:
- *	V0:	Initial CRC value and intermediate constants and results.
- *	V1..V4:	Data for CRC computation.
- *	V5..V8:	Next data chunks that are fetched from the input buffer.
- *	V9..V14: CRC-32 constants.
- */
-u32 crc32_be_vgfm_16(u32 crc, unsigned char const *buf, size_t size)
-{
-	/* Load CRC-32 constants */
-	fpu_vlm(CONST_R1R2, CONST_CRC_POLY, &constants_CRC_32_BE);
-	fpu_vzero(0);
-
-	/* Load the initial CRC value into the leftmost word of V0. */
-	fpu_vlvgf(0, crc, 0);
-
-	/* Load a 64-byte data chunk and XOR with CRC */
-	fpu_vlm(1, 4, buf);
-	fpu_vx(1, 0, 1);
-	buf += 64;
-	size -= 64;
-
-	while (size >= 64) {
-		/* Load the next 64-byte data chunk into V5 to V8 */
-		fpu_vlm(5, 8, buf);
-
-		/*
-		 * Perform a GF(2) multiplication of the doublewords in V1 with
-		 * the reduction constants in V0.  The intermediate result is
-		 * then folded (accumulated) with the next data chunk in V5 and
-		 * stored in V1.  Repeat this step for the register contents
-		 * in V2, V3, and V4 respectively.
-		 */
-		fpu_vgfmag(1, CONST_R1R2, 1, 5);
-		fpu_vgfmag(2, CONST_R1R2, 2, 6);
-		fpu_vgfmag(3, CONST_R1R2, 3, 7);
-		fpu_vgfmag(4, CONST_R1R2, 4, 8);
-		buf += 64;
-		size -= 64;
-	}
-
-	/* Fold V1 to V4 into a single 128-bit value in V1 */
-	fpu_vgfmag(1, CONST_R3R4, 1, 2);
-	fpu_vgfmag(1, CONST_R3R4, 1, 3);
-	fpu_vgfmag(1, CONST_R3R4, 1, 4);
-
-	while (size >= 16) {
-		fpu_vl(2, buf);
-		fpu_vgfmag(1, CONST_R3R4, 1, 2);
-		buf += 16;
-		size -= 16;
-	}
-
-	/*
-	 * The R5 constant is used to fold a 128-bit value into an 96-bit value
-	 * that is XORed with the next 96-bit input data chunk.  To use a single
-	 * VGFMG instruction, multiply the rightmost 64-bit with x^32 (1<<32) to
-	 * form an intermediate 96-bit value (with appended zeros) which is then
-	 * XORed with the intermediate reduction result.
-	 */
-	fpu_vgfmg(1, CONST_R5, 1);
-
-	/*
-	 * Further reduce the remaining 96-bit value to a 64-bit value using a
-	 * single VGFMG, the rightmost doubleword is multiplied with 0x1. The
-	 * intermediate result is then XORed with the product of the leftmost
-	 * doubleword with R6.	The result is a 64-bit value and is subject to
-	 * the Barret reduction.
-	 */
-	fpu_vgfmg(1, CONST_R6, 1);
-
-	/*
-	 * The input values to the Barret reduction are the degree-63 polynomial
-	 * in V1 (R(x)), degree-32 generator polynomial, and the reduction
-	 * constant u.	The Barret reduction result is the CRC value of R(x) mod
-	 * P(x).
-	 *
-	 * The Barret reduction algorithm is defined as:
-	 *
-	 *    1. T1(x) = floor( R(x) / x^32 ) GF2MUL u
-	 *    2. T2(x) = floor( T1(x) / x^32 ) GF2MUL P(x)
-	 *    3. C(x)  = R(x) XOR T2(x) mod x^32
-	 *
-	 * Note: To compensate the division by x^32, use the vector unpack
-	 * instruction to move the leftmost word into the leftmost doubleword
-	 * of the vector register.  The rightmost doubleword is multiplied
-	 * with zero to not contribute to the intermediate results.
-	 */
-
-	/* T1(x) = floor( R(x) / x^32 ) GF2MUL u */
-	fpu_vupllf(2, 1);
-	fpu_vgfmg(2, CONST_RU_POLY, 2);
-
-	/*
-	 * Compute the GF(2) product of the CRC polynomial in VO with T1(x) in
-	 * V2 and XOR the intermediate result, T2(x),  with the value in V1.
-	 * The final result is in the rightmost word of V2.
-	 */
-	fpu_vupllf(2, 2);
-	fpu_vgfmag(2, CONST_CRC_POLY, 2, 1);
-	return fpu_vlgvf(2, 3);
-}
diff --git a/arch/s390/lib/crc32le-vx.c b/arch/s390/lib/crc32le-vx.c
deleted file mode 100644
index 2f629f394df7..000000000000
--- a/arch/s390/lib/crc32le-vx.c
+++ /dev/null
@@ -1,240 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Hardware-accelerated CRC-32 variants for Linux on z Systems
- *
- * Use the z/Architecture Vector Extension Facility to accelerate the
- * computing of bitreflected CRC-32 checksums for IEEE 802.3 Ethernet
- * and Castagnoli.
- *
- * This CRC-32 implementation algorithm is bitreflected and processes
- * the least-significant bit first (Little-Endian).
- *
- * Copyright IBM Corp. 2015
- * Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
- */
-
-#include <linux/types.h>
-#include <asm/fpu.h>
-#include "crc32-vx.h"
-
-/* Vector register range containing CRC-32 constants */
-#define CONST_PERM_LE2BE	9
-#define CONST_R2R1		10
-#define CONST_R4R3		11
-#define CONST_R5		12
-#define CONST_RU_POLY		13
-#define CONST_CRC_POLY		14
-
-/*
- * The CRC-32 constant block contains reduction constants to fold and
- * process particular chunks of the input data stream in parallel.
- *
- * For the CRC-32 variants, the constants are precomputed according to
- * these definitions:
- *
- *	R1 = [(x4*128+32 mod P'(x) << 32)]' << 1
- *	R2 = [(x4*128-32 mod P'(x) << 32)]' << 1
- *	R3 = [(x128+32 mod P'(x) << 32)]'   << 1
- *	R4 = [(x128-32 mod P'(x) << 32)]'   << 1
- *	R5 = [(x64 mod P'(x) << 32)]'	    << 1
- *	R6 = [(x32 mod P'(x) << 32)]'	    << 1
- *
- *	The bitreflected Barret reduction constant, u', is defined as
- *	the bit reversal of floor(x**64 / P(x)).
- *
- *	where P(x) is the polynomial in the normal domain and the P'(x) is the
- *	polynomial in the reversed (bitreflected) domain.
- *
- * CRC-32 (IEEE 802.3 Ethernet, ...) polynomials:
- *
- *	P(x)  = 0x04C11DB7
- *	P'(x) = 0xEDB88320
- *
- * CRC-32C (Castagnoli) polynomials:
- *
- *	P(x)  = 0x1EDC6F41
- *	P'(x) = 0x82F63B78
- */
-
-static unsigned long constants_CRC_32_LE[] = {
-	0x0f0e0d0c0b0a0908, 0x0706050403020100,	/* BE->LE mask */
-	0x1c6e41596, 0x154442bd4,		/* R2, R1 */
-	0x0ccaa009e, 0x1751997d0,		/* R4, R3 */
-	0x0, 0x163cd6124,			/* R5 */
-	0x0, 0x1f7011641,			/* u' */
-	0x0, 0x1db710641			/* P'(x) << 1 */
-};
-
-static unsigned long constants_CRC_32C_LE[] = {
-	0x0f0e0d0c0b0a0908, 0x0706050403020100,	/* BE->LE mask */
-	0x09e4addf8, 0x740eef02,		/* R2, R1 */
-	0x14cd00bd6, 0xf20c0dfe,		/* R4, R3 */
-	0x0, 0x0dd45aab8,			/* R5 */
-	0x0, 0x0dea713f1,			/* u' */
-	0x0, 0x105ec76f0			/* P'(x) << 1 */
-};
-
-/**
- * crc32_le_vgfm_generic - Compute CRC-32 (LE variant) with vector registers
- * @crc: Initial CRC value, typically ~0.
- * @buf: Input buffer pointer, performance might be improved if the
- *	 buffer is on a doubleword boundary.
- * @size: Size of the buffer, must be 64 bytes or greater.
- * @constants: CRC-32 constant pool base pointer.
- *
- * Register usage:
- *	V0:	  Initial CRC value and intermediate constants and results.
- *	V1..V4:	  Data for CRC computation.
- *	V5..V8:	  Next data chunks that are fetched from the input buffer.
- *	V9:	  Constant for BE->LE conversion and shift operations
- *	V10..V14: CRC-32 constants.
- */
-static u32 crc32_le_vgfm_generic(u32 crc, unsigned char const *buf, size_t size, unsigned long *constants)
-{
-	/* Load CRC-32 constants */
-	fpu_vlm(CONST_PERM_LE2BE, CONST_CRC_POLY, constants);
-
-	/*
-	 * Load the initial CRC value.
-	 *
-	 * The CRC value is loaded into the rightmost word of the
-	 * vector register and is later XORed with the LSB portion
-	 * of the loaded input data.
-	 */
-	fpu_vzero(0);			/* Clear V0 */
-	fpu_vlvgf(0, crc, 3);		/* Load CRC into rightmost word */
-
-	/* Load a 64-byte data chunk and XOR with CRC */
-	fpu_vlm(1, 4, buf);
-	fpu_vperm(1, 1, 1, CONST_PERM_LE2BE);
-	fpu_vperm(2, 2, 2, CONST_PERM_LE2BE);
-	fpu_vperm(3, 3, 3, CONST_PERM_LE2BE);
-	fpu_vperm(4, 4, 4, CONST_PERM_LE2BE);
-
-	fpu_vx(1, 0, 1);		/* V1 ^= CRC */
-	buf += 64;
-	size -= 64;
-
-	while (size >= 64) {
-		fpu_vlm(5, 8, buf);
-		fpu_vperm(5, 5, 5, CONST_PERM_LE2BE);
-		fpu_vperm(6, 6, 6, CONST_PERM_LE2BE);
-		fpu_vperm(7, 7, 7, CONST_PERM_LE2BE);
-		fpu_vperm(8, 8, 8, CONST_PERM_LE2BE);
-		/*
-		 * Perform a GF(2) multiplication of the doublewords in V1 with
-		 * the R1 and R2 reduction constants in V0.  The intermediate
-		 * result is then folded (accumulated) with the next data chunk
-		 * in V5 and stored in V1. Repeat this step for the register
-		 * contents in V2, V3, and V4 respectively.
-		 */
-		fpu_vgfmag(1, CONST_R2R1, 1, 5);
-		fpu_vgfmag(2, CONST_R2R1, 2, 6);
-		fpu_vgfmag(3, CONST_R2R1, 3, 7);
-		fpu_vgfmag(4, CONST_R2R1, 4, 8);
-		buf += 64;
-		size -= 64;
-	}
-
-	/*
-	 * Fold V1 to V4 into a single 128-bit value in V1.  Multiply V1 with R3
-	 * and R4 and accumulating the next 128-bit chunk until a single 128-bit
-	 * value remains.
-	 */
-	fpu_vgfmag(1, CONST_R4R3, 1, 2);
-	fpu_vgfmag(1, CONST_R4R3, 1, 3);
-	fpu_vgfmag(1, CONST_R4R3, 1, 4);
-
-	while (size >= 16) {
-		fpu_vl(2, buf);
-		fpu_vperm(2, 2, 2, CONST_PERM_LE2BE);
-		fpu_vgfmag(1, CONST_R4R3, 1, 2);
-		buf += 16;
-		size -= 16;
-	}
-
-	/*
-	 * Set up a vector register for byte shifts.  The shift value must
-	 * be loaded in bits 1-4 in byte element 7 of a vector register.
-	 * Shift by 8 bytes: 0x40
-	 * Shift by 4 bytes: 0x20
-	 */
-	fpu_vleib(9, 0x40, 7);
-
-	/*
-	 * Prepare V0 for the next GF(2) multiplication: shift V0 by 8 bytes
-	 * to move R4 into the rightmost doubleword and set the leftmost
-	 * doubleword to 0x1.
-	 */
-	fpu_vsrlb(0, CONST_R4R3, 9);
-	fpu_vleig(0, 1, 0);
-
-	/*
-	 * Compute GF(2) product of V1 and V0.	The rightmost doubleword
-	 * of V1 is multiplied with R4.  The leftmost doubleword of V1 is
-	 * multiplied by 0x1 and is then XORed with rightmost product.
-	 * Implicitly, the intermediate leftmost product becomes padded
-	 */
-	fpu_vgfmg(1, 0, 1);
-
-	/*
-	 * Now do the final 32-bit fold by multiplying the rightmost word
-	 * in V1 with R5 and XOR the result with the remaining bits in V1.
-	 *
-	 * To achieve this by a single VGFMAG, right shift V1 by a word
-	 * and store the result in V2 which is then accumulated.  Use the
-	 * vector unpack instruction to load the rightmost half of the
-	 * doubleword into the rightmost doubleword element of V1; the other
-	 * half is loaded in the leftmost doubleword.
-	 * The vector register with CONST_R5 contains the R5 constant in the
-	 * rightmost doubleword and the leftmost doubleword is zero to ignore
-	 * the leftmost product of V1.
-	 */
-	fpu_vleib(9, 0x20, 7);		  /* Shift by words */
-	fpu_vsrlb(2, 1, 9);		  /* Store remaining bits in V2 */
-	fpu_vupllf(1, 1);		  /* Split rightmost doubleword */
-	fpu_vgfmag(1, CONST_R5, 1, 2);	  /* V1 = (V1 * R5) XOR V2 */
-
-	/*
-	 * Apply a Barret reduction to compute the final 32-bit CRC value.
-	 *
-	 * The input values to the Barret reduction are the degree-63 polynomial
-	 * in V1 (R(x)), degree-32 generator polynomial, and the reduction
-	 * constant u.	The Barret reduction result is the CRC value of R(x) mod
-	 * P(x).
-	 *
-	 * The Barret reduction algorithm is defined as:
-	 *
-	 *    1. T1(x) = floor( R(x) / x^32 ) GF2MUL u
-	 *    2. T2(x) = floor( T1(x) / x^32 ) GF2MUL P(x)
-	 *    3. C(x)  = R(x) XOR T2(x) mod x^32
-	 *
-	 *  Note: The leftmost doubleword of vector register containing
-	 *  CONST_RU_POLY is zero and, thus, the intermediate GF(2) product
-	 *  is zero and does not contribute to the final result.
-	 */
-
-	/* T1(x) = floor( R(x) / x^32 ) GF2MUL u */
-	fpu_vupllf(2, 1);
-	fpu_vgfmg(2, CONST_RU_POLY, 2);
-
-	/*
-	 * Compute the GF(2) product of the CRC polynomial with T1(x) in
-	 * V2 and XOR the intermediate result, T2(x), with the value in V1.
-	 * The final result is stored in word element 2 of V2.
-	 */
-	fpu_vupllf(2, 2);
-	fpu_vgfmag(2, CONST_CRC_POLY, 2, 1);
-
-	return fpu_vlgvf(2, 2);
-}
-
-u32 crc32_le_vgfm_16(u32 crc, unsigned char const *buf, size_t size)
-{
-	return crc32_le_vgfm_generic(crc, buf, size, &constants_CRC_32_LE[0]);
-}
-
-u32 crc32c_le_vgfm_16(u32 crc, unsigned char const *buf, size_t size)
-{
-	return crc32_le_vgfm_generic(crc, buf, size, &constants_CRC_32C_LE[0]);
-}
diff --git a/arch/s390/lib/crypto/Kconfig b/arch/s390/lib/crypto/Kconfig
deleted file mode 100644
index e3f855ef4393..000000000000
--- a/arch/s390/lib/crypto/Kconfig
+++ /dev/null
@@ -1,13 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0-only
-
-config CRYPTO_CHACHA_S390
-	tristate
-	default CRYPTO_LIB_CHACHA
-	select CRYPTO_LIB_CHACHA_GENERIC
-	select CRYPTO_ARCH_HAVE_LIB_CHACHA
-
-config CRYPTO_SHA256_S390
-	tristate
-	default CRYPTO_LIB_SHA256
-	select CRYPTO_ARCH_HAVE_LIB_SHA256
-	select CRYPTO_LIB_SHA256_GENERIC
diff --git a/arch/s390/lib/crypto/Makefile b/arch/s390/lib/crypto/Makefile
deleted file mode 100644
index 920197967f46..000000000000
--- a/arch/s390/lib/crypto/Makefile
+++ /dev/null
@@ -1,6 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0-only
-
-obj-$(CONFIG_CRYPTO_CHACHA_S390) += chacha_s390.o
-chacha_s390-y := chacha-glue.o chacha-s390.o
-
-obj-$(CONFIG_CRYPTO_SHA256_S390) += sha256.o
diff --git a/arch/s390/lib/crypto/chacha-glue.c b/arch/s390/lib/crypto/chacha-glue.c
deleted file mode 100644
index f95ba3483bbc..000000000000
--- a/arch/s390/lib/crypto/chacha-glue.c
+++ /dev/null
@@ -1,56 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * ChaCha stream cipher (s390 optimized)
- *
- * Copyright IBM Corp. 2021
- */
-
-#define KMSG_COMPONENT "chacha_s390"
-#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
-
-#include <crypto/chacha.h>
-#include <linux/cpufeature.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/sizes.h>
-#include <asm/fpu.h>
-#include "chacha-s390.h"
-
-void hchacha_block_arch(const struct chacha_state *state,
-			u32 out[HCHACHA_OUT_WORDS], int nrounds)
-{
-	/* TODO: implement hchacha_block_arch() in assembly */
-	hchacha_block_generic(state, out, nrounds);
-}
-EXPORT_SYMBOL(hchacha_block_arch);
-
-void chacha_crypt_arch(struct chacha_state *state, u8 *dst, const u8 *src,
-		       unsigned int bytes, int nrounds)
-{
-	/* s390 chacha20 implementation has 20 rounds hard-coded,
-	 * it cannot handle a block of data or less, but otherwise
-	 * it can handle data of arbitrary size
-	 */
-	if (bytes <= CHACHA_BLOCK_SIZE || nrounds != 20 || !cpu_has_vx()) {
-		chacha_crypt_generic(state, dst, src, bytes, nrounds);
-	} else {
-		DECLARE_KERNEL_FPU_ONSTACK32(vxstate);
-
-		kernel_fpu_begin(&vxstate, KERNEL_VXR);
-		chacha20_vx(dst, src, bytes, &state->x[4], &state->x[12]);
-		kernel_fpu_end(&vxstate, KERNEL_VXR);
-
-		state->x[12] += round_up(bytes, CHACHA_BLOCK_SIZE) /
-				CHACHA_BLOCK_SIZE;
-	}
-}
-EXPORT_SYMBOL(chacha_crypt_arch);
-
-bool chacha_is_arch_optimized(void)
-{
-	return cpu_has_vx();
-}
-EXPORT_SYMBOL(chacha_is_arch_optimized);
-
-MODULE_DESCRIPTION("ChaCha stream cipher (s390 optimized)");
-MODULE_LICENSE("GPL v2");
diff --git a/arch/s390/lib/crypto/chacha-s390.S b/arch/s390/lib/crypto/chacha-s390.S
deleted file mode 100644
index 63f3102678c0..000000000000
--- a/arch/s390/lib/crypto/chacha-s390.S
+++ /dev/null
@@ -1,908 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Original implementation written by Andy Polyakov, @dot-asm.
- * This is an adaptation of the original code for kernel use.
- *
- * Copyright (C) 2006-2019 CRYPTOGAMS by <appro@openssl.org>. All Rights Reserved.
- */
-
-#include <linux/linkage.h>
-#include <asm/nospec-insn.h>
-#include <asm/fpu-insn.h>
-
-#define SP	%r15
-#define FRAME	(16 * 8 + 4 * 8)
-
-	.data
-	.balign	32
-
-SYM_DATA_START_LOCAL(sigma)
-	.long	0x61707865,0x3320646e,0x79622d32,0x6b206574	# endian-neutral
-	.long	1,0,0,0
-	.long	2,0,0,0
-	.long	3,0,0,0
-	.long	0x03020100,0x07060504,0x0b0a0908,0x0f0e0d0c	# byte swap
-
-	.long	0,1,2,3
-	.long	0x61707865,0x61707865,0x61707865,0x61707865	# smashed sigma
-	.long	0x3320646e,0x3320646e,0x3320646e,0x3320646e
-	.long	0x79622d32,0x79622d32,0x79622d32,0x79622d32
-	.long	0x6b206574,0x6b206574,0x6b206574,0x6b206574
-SYM_DATA_END(sigma)
-
-	.previous
-
-	GEN_BR_THUNK %r14
-
-	.text
-
-#############################################################################
-# void chacha20_vx_4x(u8 *out, counst u8 *inp, size_t len,
-#		      counst u32 *key, const u32 *counter)
-
-#define	OUT		%r2
-#define	INP		%r3
-#define	LEN		%r4
-#define	KEY		%r5
-#define	COUNTER		%r6
-
-#define BEPERM		%v31
-#define CTR		%v26
-
-#define K0		%v16
-#define K1		%v17
-#define K2		%v18
-#define K3		%v19
-
-#define XA0		%v0
-#define XA1		%v1
-#define XA2		%v2
-#define XA3		%v3
-
-#define XB0		%v4
-#define XB1		%v5
-#define XB2		%v6
-#define XB3		%v7
-
-#define XC0		%v8
-#define XC1		%v9
-#define XC2		%v10
-#define XC3		%v11
-
-#define XD0		%v12
-#define XD1		%v13
-#define XD2		%v14
-#define XD3		%v15
-
-#define XT0		%v27
-#define XT1		%v28
-#define XT2		%v29
-#define XT3		%v30
-
-SYM_FUNC_START(chacha20_vx_4x)
-	stmg	%r6,%r7,6*8(SP)
-
-	larl	%r7,sigma
-	lhi	%r0,10
-	lhi	%r1,0
-
-	VL	K0,0,,%r7		# load sigma
-	VL	K1,0,,KEY		# load key
-	VL	K2,16,,KEY
-	VL	K3,0,,COUNTER		# load counter
-
-	VL	BEPERM,0x40,,%r7
-	VL	CTR,0x50,,%r7
-
-	VLM	XA0,XA3,0x60,%r7,4	# load [smashed] sigma
-
-	VREPF	XB0,K1,0		# smash the key
-	VREPF	XB1,K1,1
-	VREPF	XB2,K1,2
-	VREPF	XB3,K1,3
-
-	VREPF	XD0,K3,0
-	VREPF	XD1,K3,1
-	VREPF	XD2,K3,2
-	VREPF	XD3,K3,3
-	VAF	XD0,XD0,CTR
-
-	VREPF	XC0,K2,0
-	VREPF	XC1,K2,1
-	VREPF	XC2,K2,2
-	VREPF	XC3,K2,3
-
-.Loop_4x:
-	VAF	XA0,XA0,XB0
-	VX	XD0,XD0,XA0
-	VERLLF	XD0,XD0,16
-
-	VAF	XA1,XA1,XB1
-	VX	XD1,XD1,XA1
-	VERLLF	XD1,XD1,16
-
-	VAF	XA2,XA2,XB2
-	VX	XD2,XD2,XA2
-	VERLLF	XD2,XD2,16
-
-	VAF	XA3,XA3,XB3
-	VX	XD3,XD3,XA3
-	VERLLF	XD3,XD3,16
-
-	VAF	XC0,XC0,XD0
-	VX	XB0,XB0,XC0
-	VERLLF	XB0,XB0,12
-
-	VAF	XC1,XC1,XD1
-	VX	XB1,XB1,XC1
-	VERLLF	XB1,XB1,12
-
-	VAF	XC2,XC2,XD2
-	VX	XB2,XB2,XC2
-	VERLLF	XB2,XB2,12
-
-	VAF	XC3,XC3,XD3
-	VX	XB3,XB3,XC3
-	VERLLF	XB3,XB3,12
-
-	VAF	XA0,XA0,XB0
-	VX	XD0,XD0,XA0
-	VERLLF	XD0,XD0,8
-
-	VAF	XA1,XA1,XB1
-	VX	XD1,XD1,XA1
-	VERLLF	XD1,XD1,8
-
-	VAF	XA2,XA2,XB2
-	VX	XD2,XD2,XA2
-	VERLLF	XD2,XD2,8
-
-	VAF	XA3,XA3,XB3
-	VX	XD3,XD3,XA3
-	VERLLF	XD3,XD3,8
-
-	VAF	XC0,XC0,XD0
-	VX	XB0,XB0,XC0
-	VERLLF	XB0,XB0,7
-
-	VAF	XC1,XC1,XD1
-	VX	XB1,XB1,XC1
-	VERLLF	XB1,XB1,7
-
-	VAF	XC2,XC2,XD2
-	VX	XB2,XB2,XC2
-	VERLLF	XB2,XB2,7
-
-	VAF	XC3,XC3,XD3
-	VX	XB3,XB3,XC3
-	VERLLF	XB3,XB3,7
-
-	VAF	XA0,XA0,XB1
-	VX	XD3,XD3,XA0
-	VERLLF	XD3,XD3,16
-
-	VAF	XA1,XA1,XB2
-	VX	XD0,XD0,XA1
-	VERLLF	XD0,XD0,16
-
-	VAF	XA2,XA2,XB3
-	VX	XD1,XD1,XA2
-	VERLLF	XD1,XD1,16
-
-	VAF	XA3,XA3,XB0
-	VX	XD2,XD2,XA3
-	VERLLF	XD2,XD2,16
-
-	VAF	XC2,XC2,XD3
-	VX	XB1,XB1,XC2
-	VERLLF	XB1,XB1,12
-
-	VAF	XC3,XC3,XD0
-	VX	XB2,XB2,XC3
-	VERLLF	XB2,XB2,12
-
-	VAF	XC0,XC0,XD1
-	VX	XB3,XB3,XC0
-	VERLLF	XB3,XB3,12
-
-	VAF	XC1,XC1,XD2
-	VX	XB0,XB0,XC1
-	VERLLF	XB0,XB0,12
-
-	VAF	XA0,XA0,XB1
-	VX	XD3,XD3,XA0
-	VERLLF	XD3,XD3,8
-
-	VAF	XA1,XA1,XB2
-	VX	XD0,XD0,XA1
-	VERLLF	XD0,XD0,8
-
-	VAF	XA2,XA2,XB3
-	VX	XD1,XD1,XA2
-	VERLLF	XD1,XD1,8
-
-	VAF	XA3,XA3,XB0
-	VX	XD2,XD2,XA3
-	VERLLF	XD2,XD2,8
-
-	VAF	XC2,XC2,XD3
-	VX	XB1,XB1,XC2
-	VERLLF	XB1,XB1,7
-
-	VAF	XC3,XC3,XD0
-	VX	XB2,XB2,XC3
-	VERLLF	XB2,XB2,7
-
-	VAF	XC0,XC0,XD1
-	VX	XB3,XB3,XC0
-	VERLLF	XB3,XB3,7
-
-	VAF	XC1,XC1,XD2
-	VX	XB0,XB0,XC1
-	VERLLF	XB0,XB0,7
-	brct	%r0,.Loop_4x
-
-	VAF	XD0,XD0,CTR
-
-	VMRHF	XT0,XA0,XA1		# transpose data
-	VMRHF	XT1,XA2,XA3
-	VMRLF	XT2,XA0,XA1
-	VMRLF	XT3,XA2,XA3
-	VPDI	XA0,XT0,XT1,0b0000
-	VPDI	XA1,XT0,XT1,0b0101
-	VPDI	XA2,XT2,XT3,0b0000
-	VPDI	XA3,XT2,XT3,0b0101
-
-	VMRHF	XT0,XB0,XB1
-	VMRHF	XT1,XB2,XB3
-	VMRLF	XT2,XB0,XB1
-	VMRLF	XT3,XB2,XB3
-	VPDI	XB0,XT0,XT1,0b0000
-	VPDI	XB1,XT0,XT1,0b0101
-	VPDI	XB2,XT2,XT3,0b0000
-	VPDI	XB3,XT2,XT3,0b0101
-
-	VMRHF	XT0,XC0,XC1
-	VMRHF	XT1,XC2,XC3
-	VMRLF	XT2,XC0,XC1
-	VMRLF	XT3,XC2,XC3
-	VPDI	XC0,XT0,XT1,0b0000
-	VPDI	XC1,XT0,XT1,0b0101
-	VPDI	XC2,XT2,XT3,0b0000
-	VPDI	XC3,XT2,XT3,0b0101
-
-	VMRHF	XT0,XD0,XD1
-	VMRHF	XT1,XD2,XD3
-	VMRLF	XT2,XD0,XD1
-	VMRLF	XT3,XD2,XD3
-	VPDI	XD0,XT0,XT1,0b0000
-	VPDI	XD1,XT0,XT1,0b0101
-	VPDI	XD2,XT2,XT3,0b0000
-	VPDI	XD3,XT2,XT3,0b0101
-
-	VAF	XA0,XA0,K0
-	VAF	XB0,XB0,K1
-	VAF	XC0,XC0,K2
-	VAF	XD0,XD0,K3
-
-	VPERM	XA0,XA0,XA0,BEPERM
-	VPERM	XB0,XB0,XB0,BEPERM
-	VPERM	XC0,XC0,XC0,BEPERM
-	VPERM	XD0,XD0,XD0,BEPERM
-
-	VLM	XT0,XT3,0,INP,0
-
-	VX	XT0,XT0,XA0
-	VX	XT1,XT1,XB0
-	VX	XT2,XT2,XC0
-	VX	XT3,XT3,XD0
-
-	VSTM	XT0,XT3,0,OUT,0
-
-	la	INP,0x40(INP)
-	la	OUT,0x40(OUT)
-	aghi	LEN,-0x40
-
-	VAF	XA0,XA1,K0
-	VAF	XB0,XB1,K1
-	VAF	XC0,XC1,K2
-	VAF	XD0,XD1,K3
-
-	VPERM	XA0,XA0,XA0,BEPERM
-	VPERM	XB0,XB0,XB0,BEPERM
-	VPERM	XC0,XC0,XC0,BEPERM
-	VPERM	XD0,XD0,XD0,BEPERM
-
-	clgfi	LEN,0x40
-	jl	.Ltail_4x
-
-	VLM	XT0,XT3,0,INP,0
-
-	VX	XT0,XT0,XA0
-	VX	XT1,XT1,XB0
-	VX	XT2,XT2,XC0
-	VX	XT3,XT3,XD0
-
-	VSTM	XT0,XT3,0,OUT,0
-
-	la	INP,0x40(INP)
-	la	OUT,0x40(OUT)
-	aghi	LEN,-0x40
-	je	.Ldone_4x
-
-	VAF	XA0,XA2,K0
-	VAF	XB0,XB2,K1
-	VAF	XC0,XC2,K2
-	VAF	XD0,XD2,K3
-
-	VPERM	XA0,XA0,XA0,BEPERM
-	VPERM	XB0,XB0,XB0,BEPERM
-	VPERM	XC0,XC0,XC0,BEPERM
-	VPERM	XD0,XD0,XD0,BEPERM
-
-	clgfi	LEN,0x40
-	jl	.Ltail_4x
-
-	VLM	XT0,XT3,0,INP,0
-
-	VX	XT0,XT0,XA0
-	VX	XT1,XT1,XB0
-	VX	XT2,XT2,XC0
-	VX	XT3,XT3,XD0
-
-	VSTM	XT0,XT3,0,OUT,0
-
-	la	INP,0x40(INP)
-	la	OUT,0x40(OUT)
-	aghi	LEN,-0x40
-	je	.Ldone_4x
-
-	VAF	XA0,XA3,K0
-	VAF	XB0,XB3,K1
-	VAF	XC0,XC3,K2
-	VAF	XD0,XD3,K3
-
-	VPERM	XA0,XA0,XA0,BEPERM
-	VPERM	XB0,XB0,XB0,BEPERM
-	VPERM	XC0,XC0,XC0,BEPERM
-	VPERM	XD0,XD0,XD0,BEPERM
-
-	clgfi	LEN,0x40
-	jl	.Ltail_4x
-
-	VLM	XT0,XT3,0,INP,0
-
-	VX	XT0,XT0,XA0
-	VX	XT1,XT1,XB0
-	VX	XT2,XT2,XC0
-	VX	XT3,XT3,XD0
-
-	VSTM	XT0,XT3,0,OUT,0
-
-.Ldone_4x:
-	lmg	%r6,%r7,6*8(SP)
-	BR_EX	%r14
-
-.Ltail_4x:
-	VLR	XT0,XC0
-	VLR	XT1,XD0
-
-	VST	XA0,8*8+0x00,,SP
-	VST	XB0,8*8+0x10,,SP
-	VST	XT0,8*8+0x20,,SP
-	VST	XT1,8*8+0x30,,SP
-
-	lghi	%r1,0
-
-.Loop_tail_4x:
-	llgc	%r5,0(%r1,INP)
-	llgc	%r6,8*8(%r1,SP)
-	xr	%r6,%r5
-	stc	%r6,0(%r1,OUT)
-	la	%r1,1(%r1)
-	brct	LEN,.Loop_tail_4x
-
-	lmg	%r6,%r7,6*8(SP)
-	BR_EX	%r14
-SYM_FUNC_END(chacha20_vx_4x)
-
-#undef	OUT
-#undef	INP
-#undef	LEN
-#undef	KEY
-#undef	COUNTER
-
-#undef BEPERM
-
-#undef K0
-#undef K1
-#undef K2
-#undef K3
-
-
-#############################################################################
-# void chacha20_vx(u8 *out, counst u8 *inp, size_t len,
-#		   counst u32 *key, const u32 *counter)
-
-#define	OUT		%r2
-#define	INP		%r3
-#define	LEN		%r4
-#define	KEY		%r5
-#define	COUNTER		%r6
-
-#define BEPERM		%v31
-
-#define K0		%v27
-#define K1		%v24
-#define K2		%v25
-#define K3		%v26
-
-#define A0		%v0
-#define B0		%v1
-#define C0		%v2
-#define D0		%v3
-
-#define A1		%v4
-#define B1		%v5
-#define C1		%v6
-#define D1		%v7
-
-#define A2		%v8
-#define B2		%v9
-#define C2		%v10
-#define D2		%v11
-
-#define A3		%v12
-#define B3		%v13
-#define C3		%v14
-#define D3		%v15
-
-#define A4		%v16
-#define B4		%v17
-#define C4		%v18
-#define D4		%v19
-
-#define A5		%v20
-#define B5		%v21
-#define C5		%v22
-#define D5		%v23
-
-#define T0		%v27
-#define T1		%v28
-#define T2		%v29
-#define T3		%v30
-
-SYM_FUNC_START(chacha20_vx)
-	clgfi	LEN,256
-	jle	chacha20_vx_4x
-	stmg	%r6,%r7,6*8(SP)
-
-	lghi	%r1,-FRAME
-	lgr	%r0,SP
-	la	SP,0(%r1,SP)
-	stg	%r0,0(SP)		# back-chain
-
-	larl	%r7,sigma
-	lhi	%r0,10
-
-	VLM	K1,K2,0,KEY,0		# load key
-	VL	K3,0,,COUNTER		# load counter
-
-	VLM	K0,BEPERM,0,%r7,4	# load sigma, increments, ...
-
-.Loop_outer_vx:
-	VLR	A0,K0
-	VLR	B0,K1
-	VLR	A1,K0
-	VLR	B1,K1
-	VLR	A2,K0
-	VLR	B2,K1
-	VLR	A3,K0
-	VLR	B3,K1
-	VLR	A4,K0
-	VLR	B4,K1
-	VLR	A5,K0
-	VLR	B5,K1
-
-	VLR	D0,K3
-	VAF	D1,K3,T1		# K[3]+1
-	VAF	D2,K3,T2		# K[3]+2
-	VAF	D3,K3,T3		# K[3]+3
-	VAF	D4,D2,T2		# K[3]+4
-	VAF	D5,D2,T3		# K[3]+5
-
-	VLR	C0,K2
-	VLR	C1,K2
-	VLR	C2,K2
-	VLR	C3,K2
-	VLR	C4,K2
-	VLR	C5,K2
-
-	VLR	T1,D1
-	VLR	T2,D2
-	VLR	T3,D3
-
-.Loop_vx:
-	VAF	A0,A0,B0
-	VAF	A1,A1,B1
-	VAF	A2,A2,B2
-	VAF	A3,A3,B3
-	VAF	A4,A4,B4
-	VAF	A5,A5,B5
-	VX	D0,D0,A0
-	VX	D1,D1,A1
-	VX	D2,D2,A2
-	VX	D3,D3,A3
-	VX	D4,D4,A4
-	VX	D5,D5,A5
-	VERLLF	D0,D0,16
-	VERLLF	D1,D1,16
-	VERLLF	D2,D2,16
-	VERLLF	D3,D3,16
-	VERLLF	D4,D4,16
-	VERLLF	D5,D5,16
-
-	VAF	C0,C0,D0
-	VAF	C1,C1,D1
-	VAF	C2,C2,D2
-	VAF	C3,C3,D3
-	VAF	C4,C4,D4
-	VAF	C5,C5,D5
-	VX	B0,B0,C0
-	VX	B1,B1,C1
-	VX	B2,B2,C2
-	VX	B3,B3,C3
-	VX	B4,B4,C4
-	VX	B5,B5,C5
-	VERLLF	B0,B0,12
-	VERLLF	B1,B1,12
-	VERLLF	B2,B2,12
-	VERLLF	B3,B3,12
-	VERLLF	B4,B4,12
-	VERLLF	B5,B5,12
-
-	VAF	A0,A0,B0
-	VAF	A1,A1,B1
-	VAF	A2,A2,B2
-	VAF	A3,A3,B3
-	VAF	A4,A4,B4
-	VAF	A5,A5,B5
-	VX	D0,D0,A0
-	VX	D1,D1,A1
-	VX	D2,D2,A2
-	VX	D3,D3,A3
-	VX	D4,D4,A4
-	VX	D5,D5,A5
-	VERLLF	D0,D0,8
-	VERLLF	D1,D1,8
-	VERLLF	D2,D2,8
-	VERLLF	D3,D3,8
-	VERLLF	D4,D4,8
-	VERLLF	D5,D5,8
-
-	VAF	C0,C0,D0
-	VAF	C1,C1,D1
-	VAF	C2,C2,D2
-	VAF	C3,C3,D3
-	VAF	C4,C4,D4
-	VAF	C5,C5,D5
-	VX	B0,B0,C0
-	VX	B1,B1,C1
-	VX	B2,B2,C2
-	VX	B3,B3,C3
-	VX	B4,B4,C4
-	VX	B5,B5,C5
-	VERLLF	B0,B0,7
-	VERLLF	B1,B1,7
-	VERLLF	B2,B2,7
-	VERLLF	B3,B3,7
-	VERLLF	B4,B4,7
-	VERLLF	B5,B5,7
-
-	VSLDB	C0,C0,C0,8
-	VSLDB	C1,C1,C1,8
-	VSLDB	C2,C2,C2,8
-	VSLDB	C3,C3,C3,8
-	VSLDB	C4,C4,C4,8
-	VSLDB	C5,C5,C5,8
-	VSLDB	B0,B0,B0,4
-	VSLDB	B1,B1,B1,4
-	VSLDB	B2,B2,B2,4
-	VSLDB	B3,B3,B3,4
-	VSLDB	B4,B4,B4,4
-	VSLDB	B5,B5,B5,4
-	VSLDB	D0,D0,D0,12
-	VSLDB	D1,D1,D1,12
-	VSLDB	D2,D2,D2,12
-	VSLDB	D3,D3,D3,12
-	VSLDB	D4,D4,D4,12
-	VSLDB	D5,D5,D5,12
-
-	VAF	A0,A0,B0
-	VAF	A1,A1,B1
-	VAF	A2,A2,B2
-	VAF	A3,A3,B3
-	VAF	A4,A4,B4
-	VAF	A5,A5,B5
-	VX	D0,D0,A0
-	VX	D1,D1,A1
-	VX	D2,D2,A2
-	VX	D3,D3,A3
-	VX	D4,D4,A4
-	VX	D5,D5,A5
-	VERLLF	D0,D0,16
-	VERLLF	D1,D1,16
-	VERLLF	D2,D2,16
-	VERLLF	D3,D3,16
-	VERLLF	D4,D4,16
-	VERLLF	D5,D5,16
-
-	VAF	C0,C0,D0
-	VAF	C1,C1,D1
-	VAF	C2,C2,D2
-	VAF	C3,C3,D3
-	VAF	C4,C4,D4
-	VAF	C5,C5,D5
-	VX	B0,B0,C0
-	VX	B1,B1,C1
-	VX	B2,B2,C2
-	VX	B3,B3,C3
-	VX	B4,B4,C4
-	VX	B5,B5,C5
-	VERLLF	B0,B0,12
-	VERLLF	B1,B1,12
-	VERLLF	B2,B2,12
-	VERLLF	B3,B3,12
-	VERLLF	B4,B4,12
-	VERLLF	B5,B5,12
-
-	VAF	A0,A0,B0
-	VAF	A1,A1,B1
-	VAF	A2,A2,B2
-	VAF	A3,A3,B3
-	VAF	A4,A4,B4
-	VAF	A5,A5,B5
-	VX	D0,D0,A0
-	VX	D1,D1,A1
-	VX	D2,D2,A2
-	VX	D3,D3,A3
-	VX	D4,D4,A4
-	VX	D5,D5,A5
-	VERLLF	D0,D0,8
-	VERLLF	D1,D1,8
-	VERLLF	D2,D2,8
-	VERLLF	D3,D3,8
-	VERLLF	D4,D4,8
-	VERLLF	D5,D5,8
-
-	VAF	C0,C0,D0
-	VAF	C1,C1,D1
-	VAF	C2,C2,D2
-	VAF	C3,C3,D3
-	VAF	C4,C4,D4
-	VAF	C5,C5,D5
-	VX	B0,B0,C0
-	VX	B1,B1,C1
-	VX	B2,B2,C2
-	VX	B3,B3,C3
-	VX	B4,B4,C4
-	VX	B5,B5,C5
-	VERLLF	B0,B0,7
-	VERLLF	B1,B1,7
-	VERLLF	B2,B2,7
-	VERLLF	B3,B3,7
-	VERLLF	B4,B4,7
-	VERLLF	B5,B5,7
-
-	VSLDB	C0,C0,C0,8
-	VSLDB	C1,C1,C1,8
-	VSLDB	C2,C2,C2,8
-	VSLDB	C3,C3,C3,8
-	VSLDB	C4,C4,C4,8
-	VSLDB	C5,C5,C5,8
-	VSLDB	B0,B0,B0,12
-	VSLDB	B1,B1,B1,12
-	VSLDB	B2,B2,B2,12
-	VSLDB	B3,B3,B3,12
-	VSLDB	B4,B4,B4,12
-	VSLDB	B5,B5,B5,12
-	VSLDB	D0,D0,D0,4
-	VSLDB	D1,D1,D1,4
-	VSLDB	D2,D2,D2,4
-	VSLDB	D3,D3,D3,4
-	VSLDB	D4,D4,D4,4
-	VSLDB	D5,D5,D5,4
-	brct	%r0,.Loop_vx
-
-	VAF	A0,A0,K0
-	VAF	B0,B0,K1
-	VAF	C0,C0,K2
-	VAF	D0,D0,K3
-	VAF	A1,A1,K0
-	VAF	D1,D1,T1		# +K[3]+1
-
-	VPERM	A0,A0,A0,BEPERM
-	VPERM	B0,B0,B0,BEPERM
-	VPERM	C0,C0,C0,BEPERM
-	VPERM	D0,D0,D0,BEPERM
-
-	clgfi	LEN,0x40
-	jl	.Ltail_vx
-
-	VAF	D2,D2,T2		# +K[3]+2
-	VAF	D3,D3,T3		# +K[3]+3
-	VLM	T0,T3,0,INP,0
-
-	VX	A0,A0,T0
-	VX	B0,B0,T1
-	VX	C0,C0,T2
-	VX	D0,D0,T3
-
-	VLM	K0,T3,0,%r7,4		# re-load sigma and increments
-
-	VSTM	A0,D0,0,OUT,0
-
-	la	INP,0x40(INP)
-	la	OUT,0x40(OUT)
-	aghi	LEN,-0x40
-	je	.Ldone_vx
-
-	VAF	B1,B1,K1
-	VAF	C1,C1,K2
-
-	VPERM	A0,A1,A1,BEPERM
-	VPERM	B0,B1,B1,BEPERM
-	VPERM	C0,C1,C1,BEPERM
-	VPERM	D0,D1,D1,BEPERM
-
-	clgfi	LEN,0x40
-	jl	.Ltail_vx
-
-	VLM	A1,D1,0,INP,0
-
-	VX	A0,A0,A1
-	VX	B0,B0,B1
-	VX	C0,C0,C1
-	VX	D0,D0,D1
-
-	VSTM	A0,D0,0,OUT,0
-
-	la	INP,0x40(INP)
-	la	OUT,0x40(OUT)
-	aghi	LEN,-0x40
-	je	.Ldone_vx
-
-	VAF	A2,A2,K0
-	VAF	B2,B2,K1
-	VAF	C2,C2,K2
-
-	VPERM	A0,A2,A2,BEPERM
-	VPERM	B0,B2,B2,BEPERM
-	VPERM	C0,C2,C2,BEPERM
-	VPERM	D0,D2,D2,BEPERM
-
-	clgfi	LEN,0x40
-	jl	.Ltail_vx
-
-	VLM	A1,D1,0,INP,0
-
-	VX	A0,A0,A1
-	VX	B0,B0,B1
-	VX	C0,C0,C1
-	VX	D0,D0,D1
-
-	VSTM	A0,D0,0,OUT,0
-
-	la	INP,0x40(INP)
-	la	OUT,0x40(OUT)
-	aghi	LEN,-0x40
-	je	.Ldone_vx
-
-	VAF	A3,A3,K0
-	VAF	B3,B3,K1
-	VAF	C3,C3,K2
-	VAF	D2,K3,T3		# K[3]+3
-
-	VPERM	A0,A3,A3,BEPERM
-	VPERM	B0,B3,B3,BEPERM
-	VPERM	C0,C3,C3,BEPERM
-	VPERM	D0,D3,D3,BEPERM
-
-	clgfi	LEN,0x40
-	jl	.Ltail_vx
-
-	VAF	D3,D2,T1		# K[3]+4
-	VLM	A1,D1,0,INP,0
-
-	VX	A0,A0,A1
-	VX	B0,B0,B1
-	VX	C0,C0,C1
-	VX	D0,D0,D1
-
-	VSTM	A0,D0,0,OUT,0
-
-	la	INP,0x40(INP)
-	la	OUT,0x40(OUT)
-	aghi	LEN,-0x40
-	je	.Ldone_vx
-
-	VAF	A4,A4,K0
-	VAF	B4,B4,K1
-	VAF	C4,C4,K2
-	VAF	D4,D4,D3		# +K[3]+4
-	VAF	D3,D3,T1		# K[3]+5
-	VAF	K3,D2,T3		# K[3]+=6
-
-	VPERM	A0,A4,A4,BEPERM
-	VPERM	B0,B4,B4,BEPERM
-	VPERM	C0,C4,C4,BEPERM
-	VPERM	D0,D4,D4,BEPERM
-
-	clgfi	LEN,0x40
-	jl	.Ltail_vx
-
-	VLM	A1,D1,0,INP,0
-
-	VX	A0,A0,A1
-	VX	B0,B0,B1
-	VX	C0,C0,C1
-	VX	D0,D0,D1
-
-	VSTM	A0,D0,0,OUT,0
-
-	la	INP,0x40(INP)
-	la	OUT,0x40(OUT)
-	aghi	LEN,-0x40
-	je	.Ldone_vx
-
-	VAF	A5,A5,K0
-	VAF	B5,B5,K1
-	VAF	C5,C5,K2
-	VAF	D5,D5,D3		# +K[3]+5
-
-	VPERM	A0,A5,A5,BEPERM
-	VPERM	B0,B5,B5,BEPERM
-	VPERM	C0,C5,C5,BEPERM
-	VPERM	D0,D5,D5,BEPERM
-
-	clgfi	LEN,0x40
-	jl	.Ltail_vx
-
-	VLM	A1,D1,0,INP,0
-
-	VX	A0,A0,A1
-	VX	B0,B0,B1
-	VX	C0,C0,C1
-	VX	D0,D0,D1
-
-	VSTM	A0,D0,0,OUT,0
-
-	la	INP,0x40(INP)
-	la	OUT,0x40(OUT)
-	lhi	%r0,10
-	aghi	LEN,-0x40
-	jne	.Loop_outer_vx
-
-.Ldone_vx:
-	lmg	%r6,%r7,FRAME+6*8(SP)
-	la	SP,FRAME(SP)
-	BR_EX	%r14
-
-.Ltail_vx:
-	VSTM	A0,D0,8*8,SP,3
-	lghi	%r1,0
-
-.Loop_tail_vx:
-	llgc	%r5,0(%r1,INP)
-	llgc	%r6,8*8(%r1,SP)
-	xr	%r6,%r5
-	stc	%r6,0(%r1,OUT)
-	la	%r1,1(%r1)
-	brct	LEN,.Loop_tail_vx
-
-	lmg	%r6,%r7,FRAME+6*8(SP)
-	la	SP,FRAME(SP)
-	BR_EX	%r14
-SYM_FUNC_END(chacha20_vx)
-
-.previous
diff --git a/arch/s390/lib/crypto/chacha-s390.h b/arch/s390/lib/crypto/chacha-s390.h
deleted file mode 100644
index 733744ce30f5..000000000000
--- a/arch/s390/lib/crypto/chacha-s390.h
+++ /dev/null
@@ -1,14 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * s390 ChaCha stream cipher.
- *
- * Copyright IBM Corp. 2021
- */
-
-#ifndef _CHACHA_S390_H
-#define _CHACHA_S390_H
-
-void chacha20_vx(u8 *out, const u8 *inp, size_t len, const u32 *key,
-		 const u32 *counter);
-
-#endif /* _CHACHA_S390_H */
diff --git a/arch/s390/lib/crypto/sha256.c b/arch/s390/lib/crypto/sha256.c
deleted file mode 100644
index 7dfe120fafab..000000000000
--- a/arch/s390/lib/crypto/sha256.c
+++ /dev/null
@@ -1,47 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * SHA-256 optimized using the CP Assist for Cryptographic Functions (CPACF)
- *
- * Copyright 2025 Google LLC
- */
-#include <asm/cpacf.h>
-#include <crypto/internal/sha2.h>
-#include <linux/cpufeature.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-
-static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_cpacf_sha256);
-
-void sha256_blocks_arch(u32 state[SHA256_STATE_WORDS],
-			const u8 *data, size_t nblocks)
-{
-	if (static_branch_likely(&have_cpacf_sha256))
-		cpacf_kimd(CPACF_KIMD_SHA_256, state, data,
-			   nblocks * SHA256_BLOCK_SIZE);
-	else
-		sha256_blocks_generic(state, data, nblocks);
-}
-EXPORT_SYMBOL_GPL(sha256_blocks_arch);
-
-bool sha256_is_arch_optimized(void)
-{
-	return static_key_enabled(&have_cpacf_sha256);
-}
-EXPORT_SYMBOL_GPL(sha256_is_arch_optimized);
-
-static int __init sha256_s390_mod_init(void)
-{
-	if (cpu_have_feature(S390_CPU_FEATURE_MSA) &&
-	    cpacf_query_func(CPACF_KIMD, CPACF_KIMD_SHA_256))
-		static_branch_enable(&have_cpacf_sha256);
-	return 0;
-}
-subsys_initcall(sha256_s390_mod_init);
-
-static void __exit sha256_s390_mod_exit(void)
-{
-}
-module_exit(sha256_s390_mod_exit);
-
-MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION("SHA-256 using the CP Assist for Cryptographic Functions (CPACF)");
diff --git a/arch/s390/lib/delay.c b/arch/s390/lib/delay.c
index be14c58cb989..c1ea14e3c927 100644
--- a/arch/s390/lib/delay.c
+++ b/arch/s390/lib/delay.c
@@ -7,6 +7,7 @@
  */
 
 #include <linux/processor.h>
+#include <linux/export.h>
 #include <linux/delay.h>
 #include <asm/div64.h>
 #include <asm/timex.h>
diff --git a/arch/s390/lib/uaccess.c b/arch/s390/lib/uaccess.c
index fa7d98fa1320..1a6ba105e071 100644
--- a/arch/s390/lib/uaccess.c
+++ b/arch/s390/lib/uaccess.c
@@ -8,11 +8,13 @@
  *		 Gerald Schaefer (gerald.schaefer@de.ibm.com)
  */
 
+#include <linux/kprobes.h>
 #include <linux/uaccess.h>
 #include <linux/export.h>
 #include <linux/mm.h>
 #include <asm/asm-extable.h>
 #include <asm/ctlreg.h>
+#include <asm/skey.h>
 
 #ifdef CONFIG_DEBUG_ENTRY
 void debug_user_asce(int exit)
@@ -145,3 +147,189 @@ unsigned long _copy_to_user_key(void __user *to, const void *from,
 	return raw_copy_to_user_key(to, from, n, key);
 }
 EXPORT_SYMBOL(_copy_to_user_key);
+
+#define CMPXCHG_USER_KEY_MAX_LOOPS 128
+
+static nokprobe_inline int __cmpxchg_user_key_small(unsigned long address, unsigned int *uval,
+						    unsigned int old, unsigned int new,
+						    unsigned int mask, unsigned long key)
+{
+	unsigned long count;
+	unsigned int prev;
+	bool sacf_flag;
+	int rc = 0;
+
+	skey_regions_initialize();
+	sacf_flag = enable_sacf_uaccess();
+	asm_inline volatile(
+		"20:	spka	0(%[key])\n"
+		"	sacf	256\n"
+		"	llill	%[count],%[max_loops]\n"
+		"0:	l	%[prev],%[address]\n"
+		"1:	nr	%[prev],%[mask]\n"
+		"	xilf	%[mask],0xffffffff\n"
+		"	or	%[new],%[prev]\n"
+		"	or	%[prev],%[tmp]\n"
+		"2:	lr	%[tmp],%[prev]\n"
+		"3:	cs	%[prev],%[new],%[address]\n"
+		"4:	jnl	5f\n"
+		"	xr	%[tmp],%[prev]\n"
+		"	xr	%[new],%[tmp]\n"
+		"	nr	%[tmp],%[mask]\n"
+		"	jnz	5f\n"
+		"	brct	%[count],2b\n"
+		"5:	sacf	768\n"
+		"	spka	%[default_key]\n"
+		"21:\n"
+		EX_TABLE_UA_LOAD_REG(0b, 5b, %[rc], %[prev])
+		EX_TABLE_UA_LOAD_REG(1b, 5b, %[rc], %[prev])
+		EX_TABLE_UA_LOAD_REG(3b, 5b, %[rc], %[prev])
+		EX_TABLE_UA_LOAD_REG(4b, 5b, %[rc], %[prev])
+		SKEY_REGION(20b, 21b)
+		: [rc] "+&d" (rc),
+		[prev] "=&d" (prev),
+		[address] "+Q" (*(int *)address),
+		[tmp] "+&d" (old),
+		[new] "+&d" (new),
+		[mask] "+&d" (mask),
+		[count] "=a" (count)
+		: [key] "%[count]" (key << 4),
+		[default_key] "J" (PAGE_DEFAULT_KEY),
+		[max_loops] "J" (CMPXCHG_USER_KEY_MAX_LOOPS)
+		: "memory", "cc");
+	disable_sacf_uaccess(sacf_flag);
+	*uval = prev;
+	if (!count)
+		rc = -EAGAIN;
+	return rc;
+}
+
+int __kprobes __cmpxchg_user_key1(unsigned long address, unsigned char *uval,
+				  unsigned char old, unsigned char new, unsigned long key)
+{
+	unsigned int prev, shift, mask, _old, _new;
+	int rc;
+
+	shift = (3 ^ (address & 3)) << 3;
+	address ^= address & 3;
+	_old = (unsigned int)old << shift;
+	_new = (unsigned int)new << shift;
+	mask = ~(0xff << shift);
+	rc = __cmpxchg_user_key_small(address, &prev, _old, _new, mask, key);
+	*uval = prev >> shift;
+	return rc;
+}
+EXPORT_SYMBOL(__cmpxchg_user_key1);
+
+int __kprobes __cmpxchg_user_key2(unsigned long address, unsigned short *uval,
+				  unsigned short old, unsigned short new, unsigned long key)
+{
+	unsigned int prev, shift, mask, _old, _new;
+	int rc;
+
+	shift = (2 ^ (address & 2)) << 3;
+	address ^= address & 2;
+	_old = (unsigned int)old << shift;
+	_new = (unsigned int)new << shift;
+	mask = ~(0xffff << shift);
+	rc = __cmpxchg_user_key_small(address, &prev, _old, _new, mask, key);
+	*uval = prev >> shift;
+	return rc;
+}
+EXPORT_SYMBOL(__cmpxchg_user_key2);
+
+int __kprobes __cmpxchg_user_key4(unsigned long address, unsigned int *uval,
+				  unsigned int old, unsigned int new, unsigned long key)
+{
+	unsigned int prev = old;
+	bool sacf_flag;
+	int rc = 0;
+
+	skey_regions_initialize();
+	sacf_flag = enable_sacf_uaccess();
+	asm_inline volatile(
+		"20:	spka	0(%[key])\n"
+		"	sacf	256\n"
+		"0:	cs	%[prev],%[new],%[address]\n"
+		"1:	sacf	768\n"
+		"	spka	%[default_key]\n"
+		"21:\n"
+		EX_TABLE_UA_LOAD_REG(0b, 1b, %[rc], %[prev])
+		EX_TABLE_UA_LOAD_REG(1b, 1b, %[rc], %[prev])
+		SKEY_REGION(20b, 21b)
+		: [rc] "+&d" (rc),
+		[prev] "+&d" (prev),
+		[address] "+Q" (*(int *)address)
+		: [new] "d" (new),
+		[key] "a" (key << 4),
+		[default_key] "J" (PAGE_DEFAULT_KEY)
+		: "memory", "cc");
+	disable_sacf_uaccess(sacf_flag);
+	*uval = prev;
+	return rc;
+}
+EXPORT_SYMBOL(__cmpxchg_user_key4);
+
+int __kprobes __cmpxchg_user_key8(unsigned long address, unsigned long *uval,
+				  unsigned long old, unsigned long new, unsigned long key)
+{
+	unsigned long prev = old;
+	bool sacf_flag;
+	int rc = 0;
+
+	skey_regions_initialize();
+	sacf_flag = enable_sacf_uaccess();
+	asm_inline volatile(
+		"20:	spka	0(%[key])\n"
+		"	sacf	256\n"
+		"0:	csg	%[prev],%[new],%[address]\n"
+		"1:	sacf	768\n"
+		"	spka	%[default_key]\n"
+		"21:\n"
+		EX_TABLE_UA_LOAD_REG(0b, 1b, %[rc], %[prev])
+		EX_TABLE_UA_LOAD_REG(1b, 1b, %[rc], %[prev])
+		SKEY_REGION(20b, 21b)
+		: [rc] "+&d" (rc),
+		[prev] "+&d" (prev),
+		[address] "+QS" (*(long *)address)
+		: [new] "d" (new),
+		[key] "a" (key << 4),
+		[default_key] "J" (PAGE_DEFAULT_KEY)
+		: "memory", "cc");
+	disable_sacf_uaccess(sacf_flag);
+	*uval = prev;
+	return rc;
+}
+EXPORT_SYMBOL(__cmpxchg_user_key8);
+
+int __kprobes __cmpxchg_user_key16(unsigned long address, __uint128_t *uval,
+				   __uint128_t old, __uint128_t new, unsigned long key)
+{
+	__uint128_t prev = old;
+	bool sacf_flag;
+	int rc = 0;
+
+	skey_regions_initialize();
+	sacf_flag = enable_sacf_uaccess();
+	asm_inline volatile(
+		"20:	spka	0(%[key])\n"
+		"	sacf	256\n"
+		"0:	cdsg	%[prev],%[new],%[address]\n"
+		"1:	sacf	768\n"
+		"	spka	%[default_key]\n"
+		"21:\n"
+		EX_TABLE_UA_LOAD_REGPAIR(0b, 1b, %[rc], %[prev])
+		EX_TABLE_UA_LOAD_REGPAIR(1b, 1b, %[rc], %[prev])
+		SKEY_REGION(20b, 21b)
+		: [rc] "+&d" (rc),
+		[prev] "+&d" (prev),
+		[address] "+QS" (*(__int128_t *)address)
+		: [new] "d" (new),
+		[key] "a" (key << 4),
+		[default_key] "J" (PAGE_DEFAULT_KEY)
+		: "memory", "cc");
+	disable_sacf_uaccess(sacf_flag);
+	*uval = prev;
+	return rc;
+}
+EXPORT_SYMBOL(__cmpxchg_user_key16);
diff --git a/arch/s390/mm/Makefile b/arch/s390/mm/Makefile
index 9726b91fe7e4..bd0401cc7ca5 100644
--- a/arch/s390/mm/Makefile
+++ b/arch/s390/mm/Makefile
@@ -12,3 +12,5 @@ obj-$(CONFIG_HUGETLB_PAGE)	+= hugetlbpage.o
 obj-$(CONFIG_PTDUMP)		+= dump_pagetables.o
 obj-$(CONFIG_PGSTE)		+= gmap.o
 obj-$(CONFIG_PFAULT)		+= pfault.o
+
+obj-$(subst m,y,$(CONFIG_KVM))	+= gmap_helpers.o
diff --git a/arch/s390/mm/dump_pagetables.c b/arch/s390/mm/dump_pagetables.c
index d3e943752fa0..9af2aae0a515 100644
--- a/arch/s390/mm/dump_pagetables.c
+++ b/arch/s390/mm/dump_pagetables.c
@@ -147,11 +147,48 @@ static void note_page(struct ptdump_state *pt_st, unsigned long addr, int level,
 	}
 }
 
+static void note_page_pte(struct ptdump_state *pt_st, unsigned long addr, pte_t pte)
+{
+	note_page(pt_st, addr, 4, pte_val(pte));
+}
+
+static void note_page_pmd(struct ptdump_state *pt_st, unsigned long addr, pmd_t pmd)
+{
+	note_page(pt_st, addr, 3, pmd_val(pmd));
+}
+
+static void note_page_pud(struct ptdump_state *pt_st, unsigned long addr, pud_t pud)
+{
+	note_page(pt_st, addr, 2, pud_val(pud));
+}
+
+static void note_page_p4d(struct ptdump_state *pt_st, unsigned long addr, p4d_t p4d)
+{
+	note_page(pt_st, addr, 1, p4d_val(p4d));
+}
+
+static void note_page_pgd(struct ptdump_state *pt_st, unsigned long addr, pgd_t pgd)
+{
+	note_page(pt_st, addr, 0, pgd_val(pgd));
+}
+
+static void note_page_flush(struct ptdump_state *pt_st)
+{
+	pte_t pte_zero = {0};
+
+	note_page(pt_st, 0, -1, pte_val(pte_zero));
+}
+
 bool ptdump_check_wx(void)
 {
 	struct pg_state st = {
 		.ptdump = {
-			.note_page = note_page,
+			.note_page_pte = note_page_pte,
+			.note_page_pmd = note_page_pmd,
+			.note_page_pud = note_page_pud,
+			.note_page_p4d = note_page_p4d,
+			.note_page_pgd = note_page_pgd,
+			.note_page_flush = note_page_flush,
 			.range = (struct ptdump_range[]) {
 				{.start = 0, .end = max_addr},
 				{.start = 0, .end = 0},
@@ -190,7 +227,12 @@ static int ptdump_show(struct seq_file *m, void *v)
 {
 	struct pg_state st = {
 		.ptdump = {
-			.note_page = note_page,
+			.note_page_pte = note_page_pte,
+			.note_page_pmd = note_page_pmd,
+			.note_page_pud = note_page_pud,
+			.note_page_p4d = note_page_p4d,
+			.note_page_pgd = note_page_pgd,
+			.note_page_flush = note_page_flush,
 			.range = (struct ptdump_range[]) {
 				{.start = 0, .end = max_addr},
 				{.start = 0, .end = 0},
@@ -205,11 +247,9 @@ static int ptdump_show(struct seq_file *m, void *v)
 		.marker = markers,
 	};
 
-	get_online_mems();
 	mutex_lock(&cpa_mutex);
 	ptdump_walk_pgd(&st.ptdump, &init_mm, NULL);
 	mutex_unlock(&cpa_mutex);
-	put_online_mems();
 	return 0;
 }
 DEFINE_SHOW_ATTRIBUTE(ptdump);
diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c
index da84ff6770de..e1ad05bfd28a 100644
--- a/arch/s390/mm/fault.c
+++ b/arch/s390/mm/fault.c
@@ -40,7 +40,6 @@
 #include <asm/ptrace.h>
 #include <asm/fault.h>
 #include <asm/diag.h>
-#include <asm/gmap.h>
 #include <asm/irq.h>
 #include <asm/facility.h>
 #include <asm/uv.h>
@@ -442,6 +441,8 @@ void do_secure_storage_access(struct pt_regs *regs)
 		if (rc)
 			BUG();
 	} else {
+		if (faulthandler_disabled())
+			return handle_fault_error_nolock(regs, 0);
 		mm = current->mm;
 		mmap_read_lock(mm);
 		vma = find_vma(mm, addr);
diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c
index a94bd4870c65..c7defe4ed1f6 100644
--- a/arch/s390/mm/gmap.c
+++ b/arch/s390/mm/gmap.c
@@ -9,6 +9,7 @@
  */
 
 #include <linux/cpufeature.h>
+#include <linux/export.h>
 #include <linux/kernel.h>
 #include <linux/pagewalk.h>
 #include <linux/swap.h>
@@ -22,9 +23,9 @@
 #include <asm/page-states.h>
 #include <asm/pgalloc.h>
 #include <asm/machine.h>
+#include <asm/gmap_helpers.h>
 #include <asm/gmap.h>
 #include <asm/page.h>
-#include <asm/tlb.h>
 
 /*
  * The address is saved in a radix tree directly; NULL would be ambiguous,
@@ -620,63 +621,20 @@ EXPORT_SYMBOL(__gmap_link);
  */
 void __gmap_zap(struct gmap *gmap, unsigned long gaddr)
 {
-	struct vm_area_struct *vma;
 	unsigned long vmaddr;
-	spinlock_t *ptl;
-	pte_t *ptep;
+
+	mmap_assert_locked(gmap->mm);
 
 	/* Find the vm address for the guest address */
 	vmaddr = (unsigned long) radix_tree_lookup(&gmap->guest_to_host,
 						   gaddr >> PMD_SHIFT);
 	if (vmaddr) {
 		vmaddr |= gaddr & ~PMD_MASK;
-
-		vma = vma_lookup(gmap->mm, vmaddr);
-		if (!vma || is_vm_hugetlb_page(vma))
-			return;
-
-		/* Get pointer to the page table entry */
-		ptep = get_locked_pte(gmap->mm, vmaddr, &ptl);
-		if (likely(ptep)) {
-			ptep_zap_unused(gmap->mm, vmaddr, ptep, 0);
-			pte_unmap_unlock(ptep, ptl);
-		}
+		gmap_helper_zap_one_page(gmap->mm, vmaddr);
 	}
 }
 EXPORT_SYMBOL_GPL(__gmap_zap);
 
-void gmap_discard(struct gmap *gmap, unsigned long from, unsigned long to)
-{
-	unsigned long gaddr, vmaddr, size;
-	struct vm_area_struct *vma;
-
-	mmap_read_lock(gmap->mm);
-	for (gaddr = from; gaddr < to;
-	     gaddr = (gaddr + PMD_SIZE) & PMD_MASK) {
-		/* Find the vm address for the guest address */
-		vmaddr = (unsigned long)
-			radix_tree_lookup(&gmap->guest_to_host,
-					  gaddr >> PMD_SHIFT);
-		if (!vmaddr)
-			continue;
-		vmaddr |= gaddr & ~PMD_MASK;
-		/* Find vma in the parent mm */
-		vma = find_vma(gmap->mm, vmaddr);
-		if (!vma)
-			continue;
-		/*
-		 * We do not discard pages that are backed by
-		 * hugetlbfs, so we don't have to refault them.
-		 */
-		if (is_vm_hugetlb_page(vma))
-			continue;
-		size = min(to - gaddr, PMD_SIZE - (gaddr & ~PMD_MASK));
-		zap_page_range_single(vma, vmaddr, size, NULL);
-	}
-	mmap_read_unlock(gmap->mm);
-}
-EXPORT_SYMBOL_GPL(gmap_discard);
-
 static LIST_HEAD(gmap_notifier_list);
 static DEFINE_SPINLOCK(gmap_notifier_lock);
 
@@ -2269,138 +2227,6 @@ int s390_enable_sie(void)
 }
 EXPORT_SYMBOL_GPL(s390_enable_sie);
 
-static int find_zeropage_pte_entry(pte_t *pte, unsigned long addr,
-				   unsigned long end, struct mm_walk *walk)
-{
-	unsigned long *found_addr = walk->private;
-
-	/* Return 1 of the page is a zeropage. */
-	if (is_zero_pfn(pte_pfn(*pte))) {
-		/*
-		 * Shared zeropage in e.g., a FS DAX mapping? We cannot do the
-		 * right thing and likely don't care: FAULT_FLAG_UNSHARE
-		 * currently only works in COW mappings, which is also where
-		 * mm_forbids_zeropage() is checked.
-		 */
-		if (!is_cow_mapping(walk->vma->vm_flags))
-			return -EFAULT;
-
-		*found_addr = addr;
-		return 1;
-	}
-	return 0;
-}
-
-static const struct mm_walk_ops find_zeropage_ops = {
-	.pte_entry	= find_zeropage_pte_entry,
-	.walk_lock	= PGWALK_WRLOCK,
-};
-
-/*
- * Unshare all shared zeropages, replacing them by anonymous pages. Note that
- * we cannot simply zap all shared zeropages, because this could later
- * trigger unexpected userfaultfd missing events.
- *
- * This must be called after mm->context.allow_cow_sharing was
- * set to 0, to avoid future mappings of shared zeropages.
- *
- * mm contracts with s390, that even if mm were to remove a page table,
- * and racing with walk_page_range_vma() calling pte_offset_map_lock()
- * would fail, it will never insert a page table containing empty zero
- * pages once mm_forbids_zeropage(mm) i.e.
- * mm->context.allow_cow_sharing is set to 0.
- */
-static int __s390_unshare_zeropages(struct mm_struct *mm)
-{
-	struct vm_area_struct *vma;
-	VMA_ITERATOR(vmi, mm, 0);
-	unsigned long addr;
-	vm_fault_t fault;
-	int rc;
-
-	for_each_vma(vmi, vma) {
-		/*
-		 * We could only look at COW mappings, but it's more future
-		 * proof to catch unexpected zeropages in other mappings and
-		 * fail.
-		 */
-		if ((vma->vm_flags & VM_PFNMAP) || is_vm_hugetlb_page(vma))
-			continue;
-		addr = vma->vm_start;
-
-retry:
-		rc = walk_page_range_vma(vma, addr, vma->vm_end,
-					 &find_zeropage_ops, &addr);
-		if (rc < 0)
-			return rc;
-		else if (!rc)
-			continue;
-
-		/* addr was updated by find_zeropage_pte_entry() */
-		fault = handle_mm_fault(vma, addr,
-					FAULT_FLAG_UNSHARE | FAULT_FLAG_REMOTE,
-					NULL);
-		if (fault & VM_FAULT_OOM)
-			return -ENOMEM;
-		/*
-		 * See break_ksm(): even after handle_mm_fault() returned 0, we
-		 * must start the lookup from the current address, because
-		 * handle_mm_fault() may back out if there's any difficulty.
-		 *
-		 * VM_FAULT_SIGBUS and VM_FAULT_SIGSEGV are unexpected but
-		 * maybe they could trigger in the future on concurrent
-		 * truncation. In that case, the shared zeropage would be gone
-		 * and we can simply retry and make progress.
-		 */
-		cond_resched();
-		goto retry;
-	}
-
-	return 0;
-}
-
-static int __s390_disable_cow_sharing(struct mm_struct *mm)
-{
-	int rc;
-
-	if (!mm->context.allow_cow_sharing)
-		return 0;
-
-	mm->context.allow_cow_sharing = 0;
-
-	/* Replace all shared zeropages by anonymous pages. */
-	rc = __s390_unshare_zeropages(mm);
-	/*
-	 * Make sure to disable KSM (if enabled for the whole process or
-	 * individual VMAs). Note that nothing currently hinders user space
-	 * from re-enabling it.
-	 */
-	if (!rc)
-		rc = ksm_disable(mm);
-	if (rc)
-		mm->context.allow_cow_sharing = 1;
-	return rc;
-}
-
-/*
- * Disable most COW-sharing of memory pages for the whole process:
- * (1) Disable KSM and unmerge/unshare any KSM pages.
- * (2) Disallow shared zeropages and unshare any zerpages that are mapped.
- *
- * Not that we currently don't bother with COW-shared pages that are shared
- * with parent/child processes due to fork().
- */
-int s390_disable_cow_sharing(void)
-{
-	int rc;
-
-	mmap_write_lock(current->mm);
-	rc = __s390_disable_cow_sharing(current->mm);
-	mmap_write_unlock(current->mm);
-	return rc;
-}
-EXPORT_SYMBOL_GPL(s390_disable_cow_sharing);
-
 /*
  * Enable storage key handling from now on and initialize the storage
  * keys with the default key.
@@ -2468,7 +2294,7 @@ int s390_enable_skey(void)
 		goto out_up;
 
 	mm->context.uses_skeys = 1;
-	rc = __s390_disable_cow_sharing(mm);
+	rc = gmap_helper_disable_cow_sharing();
 	if (rc) {
 		mm->context.uses_skeys = 0;
 		goto out_up;
diff --git a/arch/s390/mm/gmap_helpers.c b/arch/s390/mm/gmap_helpers.c
new file mode 100644
index 000000000000..b63f427e7289
--- /dev/null
+++ b/arch/s390/mm/gmap_helpers.c
@@ -0,0 +1,223 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *  Helper functions for KVM guest address space mapping code
+ *
+ *    Copyright IBM Corp. 2007, 2025
+ */
+
+#include <linux/export.h>
+#include <linux/mm_types.h>
+#include <linux/mmap_lock.h>
+#include <linux/mm.h>
+#include <linux/hugetlb.h>
+#include <linux/swap.h>
+#include <linux/swapops.h>
+#include <linux/pagewalk.h>
+#include <linux/ksm.h>
+#include <asm/gmap_helpers.h>
+
+/**
+ * ptep_zap_swap_entry() - discard a swap entry.
+ * @mm: the mm
+ * @entry: the swap entry that needs to be zapped
+ *
+ * Discards the given swap entry. If the swap entry was an actual swap
+ * entry (and not a migration entry, for example), the actual swapped
+ * page is also discarded from swap.
+ */
+static void ptep_zap_swap_entry(struct mm_struct *mm, swp_entry_t entry)
+{
+	if (!non_swap_entry(entry))
+		dec_mm_counter(mm, MM_SWAPENTS);
+	else if (is_migration_entry(entry))
+		dec_mm_counter(mm, mm_counter(pfn_swap_entry_folio(entry)));
+	free_swap_and_cache(entry);
+}
+
+/**
+ * gmap_helper_zap_one_page() - discard a page if it was swapped.
+ * @mm: the mm
+ * @vmaddr: the userspace virtual address that needs to be discarded
+ *
+ * If the given address maps to a swap entry, discard it.
+ *
+ * Context: needs to be called while holding the mmap lock.
+ */
+void gmap_helper_zap_one_page(struct mm_struct *mm, unsigned long vmaddr)
+{
+	struct vm_area_struct *vma;
+	spinlock_t *ptl;
+	pte_t *ptep;
+
+	mmap_assert_locked(mm);
+
+	/* Find the vm address for the guest address */
+	vma = vma_lookup(mm, vmaddr);
+	if (!vma || is_vm_hugetlb_page(vma))
+		return;
+
+	/* Get pointer to the page table entry */
+	ptep = get_locked_pte(mm, vmaddr, &ptl);
+	if (unlikely(!ptep))
+		return;
+	if (pte_swap(*ptep))
+		ptep_zap_swap_entry(mm, pte_to_swp_entry(*ptep));
+	pte_unmap_unlock(ptep, ptl);
+}
+EXPORT_SYMBOL_GPL(gmap_helper_zap_one_page);
+
+/**
+ * gmap_helper_discard() - discard user pages in the given range
+ * @mm: the mm
+ * @vmaddr: starting userspace address
+ * @end: end address (first address outside the range)
+ *
+ * All userpace pages in the range [@vamddr, @end) are discarded and unmapped.
+ *
+ * Context: needs to be called while holding the mmap lock.
+ */
+void gmap_helper_discard(struct mm_struct *mm, unsigned long vmaddr, unsigned long end)
+{
+	struct vm_area_struct *vma;
+
+	mmap_assert_locked(mm);
+
+	while (vmaddr < end) {
+		vma = find_vma_intersection(mm, vmaddr, end);
+		if (!vma)
+			return;
+		if (!is_vm_hugetlb_page(vma))
+			zap_page_range_single(vma, vmaddr, min(end, vma->vm_end) - vmaddr, NULL);
+		vmaddr = vma->vm_end;
+	}
+}
+EXPORT_SYMBOL_GPL(gmap_helper_discard);
+
+static int find_zeropage_pte_entry(pte_t *pte, unsigned long addr,
+				   unsigned long end, struct mm_walk *walk)
+{
+	unsigned long *found_addr = walk->private;
+
+	/* Return 1 of the page is a zeropage. */
+	if (is_zero_pfn(pte_pfn(*pte))) {
+		/*
+		 * Shared zeropage in e.g., a FS DAX mapping? We cannot do the
+		 * right thing and likely don't care: FAULT_FLAG_UNSHARE
+		 * currently only works in COW mappings, which is also where
+		 * mm_forbids_zeropage() is checked.
+		 */
+		if (!is_cow_mapping(walk->vma->vm_flags))
+			return -EFAULT;
+
+		*found_addr = addr;
+		return 1;
+	}
+	return 0;
+}
+
+static const struct mm_walk_ops find_zeropage_ops = {
+	.pte_entry      = find_zeropage_pte_entry,
+	.walk_lock      = PGWALK_WRLOCK,
+};
+
+/** __gmap_helper_unshare_zeropages() - unshare all shared zeropages
+ * @mm: the mm whose zero pages are to be unshared
+ *
+ * Unshare all shared zeropages, replacing them by anonymous pages. Note that
+ * we cannot simply zap all shared zeropages, because this could later
+ * trigger unexpected userfaultfd missing events.
+ *
+ * This must be called after mm->context.allow_cow_sharing was
+ * set to 0, to avoid future mappings of shared zeropages.
+ *
+ * mm contracts with s390, that even if mm were to remove a page table,
+ * and racing with walk_page_range_vma() calling pte_offset_map_lock()
+ * would fail, it will never insert a page table containing empty zero
+ * pages once mm_forbids_zeropage(mm) i.e.
+ * mm->context.allow_cow_sharing is set to 0.
+ */
+static int __gmap_helper_unshare_zeropages(struct mm_struct *mm)
+{
+	struct vm_area_struct *vma;
+	VMA_ITERATOR(vmi, mm, 0);
+	unsigned long addr;
+	vm_fault_t fault;
+	int rc;
+
+	for_each_vma(vmi, vma) {
+		/*
+		 * We could only look at COW mappings, but it's more future
+		 * proof to catch unexpected zeropages in other mappings and
+		 * fail.
+		 */
+		if ((vma->vm_flags & VM_PFNMAP) || is_vm_hugetlb_page(vma))
+			continue;
+		addr = vma->vm_start;
+
+retry:
+		rc = walk_page_range_vma(vma, addr, vma->vm_end,
+					 &find_zeropage_ops, &addr);
+		if (rc < 0)
+			return rc;
+		else if (!rc)
+			continue;
+
+		/* addr was updated by find_zeropage_pte_entry() */
+		fault = handle_mm_fault(vma, addr,
+					FAULT_FLAG_UNSHARE | FAULT_FLAG_REMOTE,
+					NULL);
+		if (fault & VM_FAULT_OOM)
+			return -ENOMEM;
+		/*
+		 * See break_ksm(): even after handle_mm_fault() returned 0, we
+		 * must start the lookup from the current address, because
+		 * handle_mm_fault() may back out if there's any difficulty.
+		 *
+		 * VM_FAULT_SIGBUS and VM_FAULT_SIGSEGV are unexpected but
+		 * maybe they could trigger in the future on concurrent
+		 * truncation. In that case, the shared zeropage would be gone
+		 * and we can simply retry and make progress.
+		 */
+		cond_resched();
+		goto retry;
+	}
+
+	return 0;
+}
+
+/**
+ * gmap_helper_disable_cow_sharing() - disable all COW sharing
+ *
+ * Disable most COW-sharing of memory pages for the whole process:
+ * (1) Disable KSM and unmerge/unshare any KSM pages.
+ * (2) Disallow shared zeropages and unshare any zerpages that are mapped.
+ *
+ * Not that we currently don't bother with COW-shared pages that are shared
+ * with parent/child processes due to fork().
+ */
+int gmap_helper_disable_cow_sharing(void)
+{
+	struct mm_struct *mm = current->mm;
+	int rc;
+
+	mmap_assert_write_locked(mm);
+
+	if (!mm->context.allow_cow_sharing)
+		return 0;
+
+	mm->context.allow_cow_sharing = 0;
+
+	/* Replace all shared zeropages by anonymous pages. */
+	rc = __gmap_helper_unshare_zeropages(mm);
+	/*
+	 * Make sure to disable KSM (if enabled for the whole process or
+	 * individual VMAs). Note that nothing currently hinders user space
+	 * from re-enabling it.
+	 */
+	if (!rc)
+		rc = ksm_disable(mm);
+	if (rc)
+		mm->context.allow_cow_sharing = 1;
+	return rc;
+}
+EXPORT_SYMBOL_GPL(gmap_helper_disable_cow_sharing);
diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c
index afa085e8186c..e4953453d254 100644
--- a/arch/s390/mm/init.c
+++ b/arch/s390/mm/init.c
@@ -40,7 +40,6 @@
 #include <asm/kfence.h>
 #include <asm/dma.h>
 #include <asm/abs_lowcore.h>
-#include <asm/tlb.h>
 #include <asm/tlbflush.h>
 #include <asm/sections.h>
 #include <asm/sclp.h>
@@ -143,7 +142,7 @@ bool force_dma_unencrypted(struct device *dev)
 }
 
 /* protected virtualization */
-static void pv_init(void)
+static void __init pv_init(void)
 {
 	if (!is_prot_virt_guest())
 		return;
diff --git a/arch/s390/mm/pgalloc.c b/arch/s390/mm/pgalloc.c
index d177bea0bd73..d2f6f1f6d2fc 100644
--- a/arch/s390/mm/pgalloc.c
+++ b/arch/s390/mm/pgalloc.c
@@ -12,8 +12,6 @@
 #include <asm/mmu_context.h>
 #include <asm/page-states.h>
 #include <asm/pgalloc.h>
-#include <asm/gmap.h>
-#include <asm/tlb.h>
 #include <asm/tlbflush.h>
 
 unsigned long *crst_table_alloc(struct mm_struct *mm)
@@ -144,7 +142,7 @@ unsigned long *page_table_alloc(struct mm_struct *mm)
 	ptdesc = pagetable_alloc(GFP_KERNEL, 0);
 	if (!ptdesc)
 		return NULL;
-	if (!pagetable_pte_ctor(ptdesc)) {
+	if (!pagetable_pte_ctor(mm, ptdesc)) {
 		pagetable_free(ptdesc);
 		return NULL;
 	}
@@ -175,11 +173,6 @@ void pte_free_defer(struct mm_struct *mm, pgtable_t pgtable)
 	struct ptdesc *ptdesc = virt_to_ptdesc(pgtable);
 
 	call_rcu(&ptdesc->pt_rcu_head, pte_free_now);
-	/*
-	 * THPs are not allowed for KVM guests. Warn if pgste ever reaches here.
-	 * Turn to the generic pte_free_defer() version once gmap is removed.
-	 */
-	WARN_ON_ONCE(mm_has_pgste(mm));
 }
 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
 
diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c
index 9901934284ec..60688be4e876 100644
--- a/arch/s390/mm/pgtable.c
+++ b/arch/s390/mm/pgtable.c
@@ -5,6 +5,7 @@
  */
 
 #include <linux/cpufeature.h>
+#include <linux/export.h>
 #include <linux/sched.h>
 #include <linux/kernel.h>
 #include <linux/errno.h>
@@ -20,7 +21,6 @@
 #include <linux/ksm.h>
 #include <linux/mman.h>
 
-#include <asm/tlb.h>
 #include <asm/tlbflush.h>
 #include <asm/mmu_context.h>
 #include <asm/page-states.h>
diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c
index 448dd6ed1069..f48ef361bc83 100644
--- a/arch/s390/mm/vmem.c
+++ b/arch/s390/mm/vmem.c
@@ -64,13 +64,12 @@ void *vmem_crst_alloc(unsigned long val)
 
 pte_t __ref *vmem_pte_alloc(void)
 {
-	unsigned long size = PTRS_PER_PTE * sizeof(pte_t);
 	pte_t *pte;
 
 	if (slab_is_available())
-		pte = (pte_t *) page_table_alloc(&init_mm);
+		pte = (pte_t *)page_table_alloc(&init_mm);
 	else
-		pte = (pte_t *) memblock_alloc(size, size);
+		pte = (pte_t *)memblock_alloc(PAGE_SIZE, PAGE_SIZE);
 	if (!pte)
 		return NULL;
 	memset64((u64 *)pte, _PAGE_INVALID, PTRS_PER_PTE);
diff --git a/arch/s390/net/bpf_jit.h b/arch/s390/net/bpf_jit.h
deleted file mode 100644
index 7822ea92e54a..000000000000
--- a/arch/s390/net/bpf_jit.h
+++ /dev/null
@@ -1,55 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * BPF Jit compiler defines
- *
- * Copyright IBM Corp. 2012,2015
- *
- * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
- *	      Michael Holzheu <holzheu@linux.vnet.ibm.com>
- */
-
-#ifndef __ARCH_S390_NET_BPF_JIT_H
-#define __ARCH_S390_NET_BPF_JIT_H
-
-#ifndef __ASSEMBLY__
-
-#include <linux/filter.h>
-#include <linux/types.h>
-
-#endif /* __ASSEMBLY__ */
-
-/*
- * Stackframe layout (packed stack):
- *
- *				    ^ high
- *	      +---------------+     |
- *	      | old backchain |     |
- *	      +---------------+     |
- *	      |   r15 - r6    |     |
- *	      +---------------+     |
- *	      | 4 byte align  |     |
- *	      | tail_call_cnt |     |
- * BFP	   -> +===============+     |
- *	      |		      |     |
- *	      |   BPF stack   |     |
- *	      |		      |     |
- * R15+160 -> +---------------+     |
- *	      | new backchain |     |
- * R15+152 -> +---------------+     |
- *	      | + 152 byte SA |     |
- * R15	   -> +---------------+     + low
- *
- * We get 160 bytes stack space from calling function, but only use
- * 12 * 8 byte for old backchain, r15..r6, and tail_call_cnt.
- *
- * The stack size used by the BPF program ("BPF stack" above) is passed
- * via "aux->stack_depth".
- */
-#define STK_SPACE_ADD	(160)
-#define STK_160_UNUSED	(160 - 12 * 8)
-#define STK_OFF		(STK_SPACE_ADD - STK_160_UNUSED)
-
-#define STK_OFF_R6	(160 - 11 * 8)	/* Offset of r6 on stack */
-#define STK_OFF_TCCNT	(160 - 12 * 8)	/* Offset of tail_call_cnt on stack */
-
-#endif /* __ARCH_S390_NET_BPF_JIT_H */
diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c
index 0776dfde2dba..bb17efe29d65 100644
--- a/arch/s390/net/bpf_jit_comp.c
+++ b/arch/s390/net/bpf_jit_comp.c
@@ -32,7 +32,6 @@
 #include <asm/set_memory.h>
 #include <asm/text-patching.h>
 #include <asm/unwind.h>
-#include "bpf_jit.h"
 
 struct bpf_jit {
 	u32 seen;		/* Flags to remember seen eBPF instructions */
@@ -48,14 +47,13 @@ struct bpf_jit {
 	int lit64;		/* Current position in 64-bit literal pool */
 	int base_ip;		/* Base address for literal pool */
 	int exit_ip;		/* Address of exit */
-	int r1_thunk_ip;	/* Address of expoline thunk for 'br %r1' */
-	int r14_thunk_ip;	/* Address of expoline thunk for 'br %r14' */
 	int tail_call_start;	/* Tail call start offset */
 	int excnt;		/* Number of exception table entries */
 	int prologue_plt_ret;	/* Return address for prologue hotpatch PLT */
 	int prologue_plt;	/* Start of prologue hotpatch PLT */
 	int kern_arena;		/* Pool offset of kernel arena address */
 	u64 user_arena;		/* User arena address */
+	u32 frame_off;		/* Offset of struct bpf_prog from %r15 */
 };
 
 #define SEEN_MEM	BIT(0)		/* use mem[] for temporary storage */
@@ -127,6 +125,18 @@ static inline void reg_set_seen(struct bpf_jit *jit, u32 b1)
 		jit->seen_regs |= (1 << r1);
 }
 
+static s32 off_to_pcrel(struct bpf_jit *jit, u32 off)
+{
+	return off - jit->prg;
+}
+
+static s64 ptr_to_pcrel(struct bpf_jit *jit, const void *ptr)
+{
+	if (jit->prg_buf)
+		return (const u8 *)ptr - ((const u8 *)jit->prg_buf + jit->prg);
+	return 0;
+}
+
 #define REG_SET_SEEN(b1)					\
 ({								\
 	reg_set_seen(jit, b1);					\
@@ -201,7 +211,7 @@ static inline void reg_set_seen(struct bpf_jit *jit, u32 b1)
 
 #define EMIT4_PCREL_RIC(op, mask, target)			\
 ({								\
-	int __rel = ((target) - jit->prg) / 2;			\
+	int __rel = off_to_pcrel(jit, target) / 2;		\
 	_EMIT4((op) | (mask) << 20 | (__rel & 0xffff));		\
 })
 
@@ -239,7 +249,7 @@ static inline void reg_set_seen(struct bpf_jit *jit, u32 b1)
 
 #define EMIT6_PCREL_RIEB(op1, op2, b1, b2, mask, target)	\
 ({								\
-	unsigned int rel = (int)((target) - jit->prg) / 2;	\
+	unsigned int rel = off_to_pcrel(jit, target) / 2;	\
 	_EMIT6((op1) | reg(b1, b2) << 16 | (rel & 0xffff),	\
 	       (op2) | (mask) << 12);				\
 	REG_SET_SEEN(b1);					\
@@ -248,7 +258,7 @@ static inline void reg_set_seen(struct bpf_jit *jit, u32 b1)
 
 #define EMIT6_PCREL_RIEC(op1, op2, b1, imm, mask, target)	\
 ({								\
-	unsigned int rel = (int)((target) - jit->prg) / 2;	\
+	unsigned int rel = off_to_pcrel(jit, target) / 2;	\
 	_EMIT6((op1) | (reg_high(b1) | (mask)) << 16 |		\
 		(rel & 0xffff), (op2) | ((imm) & 0xff) << 8);	\
 	REG_SET_SEEN(b1);					\
@@ -257,29 +267,41 @@ static inline void reg_set_seen(struct bpf_jit *jit, u32 b1)
 
 #define EMIT6_PCREL(op1, op2, b1, b2, i, off, mask)		\
 ({								\
-	int rel = (addrs[(i) + (off) + 1] - jit->prg) / 2;	\
+	int rel = off_to_pcrel(jit, addrs[(i) + (off) + 1]) / 2;\
 	_EMIT6((op1) | reg(b1, b2) << 16 | (rel & 0xffff), (op2) | (mask));\
 	REG_SET_SEEN(b1);					\
 	REG_SET_SEEN(b2);					\
 })
 
+static void emit6_pcrel_ril(struct bpf_jit *jit, u32 op, s64 pcrel)
+{
+	u32 pc32dbl = (s32)(pcrel / 2);
+
+	_EMIT6(op | pc32dbl >> 16, pc32dbl & 0xffff);
+}
+
+static void emit6_pcrel_rilb(struct bpf_jit *jit, u32 op, u8 b, s64 pcrel)
+{
+	emit6_pcrel_ril(jit, op | reg_high(b) << 16, pcrel);
+	REG_SET_SEEN(b);
+}
+
 #define EMIT6_PCREL_RILB(op, b, target)				\
-({								\
-	unsigned int rel = (int)((target) - jit->prg) / 2;	\
-	_EMIT6((op) | reg_high(b) << 16 | rel >> 16, rel & 0xffff);\
-	REG_SET_SEEN(b);					\
-})
+	emit6_pcrel_rilb(jit, op, b, off_to_pcrel(jit, target))
 
-#define EMIT6_PCREL_RIL(op, target)				\
-({								\
-	unsigned int rel = (int)((target) - jit->prg) / 2;	\
-	_EMIT6((op) | rel >> 16, rel & 0xffff);			\
-})
+#define EMIT6_PCREL_RILB_PTR(op, b, target_ptr)			\
+	emit6_pcrel_rilb(jit, op, b, ptr_to_pcrel(jit, target_ptr))
+
+static void emit6_pcrel_rilc(struct bpf_jit *jit, u32 op, u8 mask, s64 pcrel)
+{
+	emit6_pcrel_ril(jit, op | mask << 20, pcrel);
+}
 
 #define EMIT6_PCREL_RILC(op, mask, target)			\
-({								\
-	EMIT6_PCREL_RIL((op) | (mask) << 20, (target));		\
-})
+	emit6_pcrel_rilc(jit, op, mask, off_to_pcrel(jit, target))
+
+#define EMIT6_PCREL_RILC_PTR(op, mask, target_ptr)		\
+	emit6_pcrel_rilc(jit, op, mask, ptr_to_pcrel(jit, target_ptr))
 
 #define _EMIT6_IMM(op, imm)					\
 ({								\
@@ -404,11 +426,25 @@ static void jit_fill_hole(void *area, unsigned int size)
 }
 
 /*
+ * Caller-allocated part of the frame.
+ * Thanks to packed stack, its otherwise unused initial part can be used for
+ * the BPF stack and for the next frame.
+ */
+struct prog_frame {
+	u64 unused[8];
+	/* BPF stack starts here and grows towards 0 */
+	u32 tail_call_cnt;
+	u32 pad;
+	u64 r6[10];  /* r6 - r15 */
+	u64 backchain;
+} __packed;
+
+/*
  * Save registers from "rs" (register start) to "re" (register end) on stack
  */
 static void save_regs(struct bpf_jit *jit, u32 rs, u32 re)
 {
-	u32 off = STK_OFF_R6 + (rs - 6) * 8;
+	u32 off = offsetof(struct prog_frame, r6) + (rs - 6) * 8;
 
 	if (rs == re)
 		/* stg %rs,off(%r15) */
@@ -421,12 +457,9 @@ static void save_regs(struct bpf_jit *jit, u32 rs, u32 re)
 /*
  * Restore registers from "rs" (register start) to "re" (register end) on stack
  */
-static void restore_regs(struct bpf_jit *jit, u32 rs, u32 re, u32 stack_depth)
+static void restore_regs(struct bpf_jit *jit, u32 rs, u32 re)
 {
-	u32 off = STK_OFF_R6 + (rs - 6) * 8;
-
-	if (jit->seen & SEEN_STACK)
-		off += STK_OFF + stack_depth;
+	u32 off = jit->frame_off + offsetof(struct prog_frame, r6) + (rs - 6) * 8;
 
 	if (rs == re)
 		/* lg %rs,off(%r15) */
@@ -470,8 +503,7 @@ static int get_end(u16 seen_regs, int start)
  * Save and restore clobbered registers (6-15) on stack.
  * We save/restore registers in chunks with gap >= 2 registers.
  */
-static void save_restore_regs(struct bpf_jit *jit, int op, u32 stack_depth,
-			      u16 extra_regs)
+static void save_restore_regs(struct bpf_jit *jit, int op, u16 extra_regs)
 {
 	u16 seen_regs = jit->seen_regs | extra_regs;
 	const int last = 15, save_restore_size = 6;
@@ -494,7 +526,7 @@ static void save_restore_regs(struct bpf_jit *jit, int op, u32 stack_depth,
 		if (op == REGS_SAVE)
 			save_regs(jit, rs, re);
 		else
-			restore_regs(jit, rs, re, stack_depth);
+			restore_regs(jit, rs, re);
 		re++;
 	} while (re <= last);
 }
@@ -503,7 +535,7 @@ static void bpf_skip(struct bpf_jit *jit, int size)
 {
 	if (size >= 6 && !is_valid_rel(size)) {
 		/* brcl 0xf,size */
-		EMIT6_PCREL_RIL(0xc0f4000000, size);
+		EMIT6_PCREL_RILC(0xc0040000, 0xf, size);
 		size -= 6;
 	} else if (size >= 4 && is_valid_rel(size)) {
 		/* brc 0xf,size */
@@ -544,18 +576,27 @@ static void bpf_jit_plt(struct bpf_plt *plt, void *ret, void *target)
 {
 	memcpy(plt, &bpf_plt, sizeof(*plt));
 	plt->ret = ret;
-	plt->target = target;
+	/*
+	 * (target == NULL) implies that the branch to this PLT entry was
+	 * patched and became a no-op. However, some CPU could have jumped
+	 * to this PLT entry before patching and may be still executing it.
+	 *
+	 * Since the intention in this case is to make the PLT entry a no-op,
+	 * make the target point to the return label instead of NULL.
+	 */
+	plt->target = target ?: ret;
 }
 
 /*
  * Emit function prologue
  *
  * Save registers and create stack frame if necessary.
- * See stack frame layout description in "bpf_jit.h"!
+ * Stack frame layout is described by struct prog_frame.
  */
-static void bpf_jit_prologue(struct bpf_jit *jit, struct bpf_prog *fp,
-			     u32 stack_depth)
+static void bpf_jit_prologue(struct bpf_jit *jit, struct bpf_prog *fp)
 {
+	BUILD_BUG_ON(sizeof(struct prog_frame) != STACK_FRAME_OVERHEAD);
+
 	/* No-op for hotpatching */
 	/* brcl 0,prologue_plt */
 	EMIT6_PCREL_RILC(0xc0040000, 0, jit->prologue_plt);
@@ -563,8 +604,9 @@ static void bpf_jit_prologue(struct bpf_jit *jit, struct bpf_prog *fp,
 
 	if (!bpf_is_subprog(fp)) {
 		/* Initialize the tail call counter in the main program. */
-		/* xc STK_OFF_TCCNT(4,%r15),STK_OFF_TCCNT(%r15) */
-		_EMIT6(0xd703f000 | STK_OFF_TCCNT, 0xf000 | STK_OFF_TCCNT);
+		/* xc tail_call_cnt(4,%r15),tail_call_cnt(%r15) */
+		_EMIT6(0xd703f000 | offsetof(struct prog_frame, tail_call_cnt),
+		       0xf000 | offsetof(struct prog_frame, tail_call_cnt));
 	} else {
 		/*
 		 * Skip the tail call counter initialization in subprograms.
@@ -587,7 +629,7 @@ static void bpf_jit_prologue(struct bpf_jit *jit, struct bpf_prog *fp,
 		jit->seen_regs |= NVREGS;
 	} else {
 		/* Save registers */
-		save_restore_regs(jit, REGS_SAVE, stack_depth,
+		save_restore_regs(jit, REGS_SAVE,
 				  fp->aux->exception_boundary ? NVREGS : 0);
 	}
 	/* Setup literal pool */
@@ -605,43 +647,32 @@ static void bpf_jit_prologue(struct bpf_jit *jit, struct bpf_prog *fp,
 	}
 	/* Setup stack and backchain */
 	if (is_first_pass(jit) || (jit->seen & SEEN_STACK)) {
-		if (is_first_pass(jit) || (jit->seen & SEEN_FUNC))
-			/* lgr %w1,%r15 (backchain) */
-			EMIT4(0xb9040000, REG_W1, REG_15);
-		/* la %bfp,STK_160_UNUSED(%r15) (BPF frame pointer) */
-		EMIT4_DISP(0x41000000, BPF_REG_FP, REG_15, STK_160_UNUSED);
-		/* aghi %r15,-STK_OFF */
-		EMIT4_IMM(0xa70b0000, REG_15, -(STK_OFF + stack_depth));
-		if (is_first_pass(jit) || (jit->seen & SEEN_FUNC))
-			/* stg %w1,152(%r15) (backchain) */
-			EMIT6_DISP_LH(0xe3000000, 0x0024, REG_W1, REG_0,
-				      REG_15, 152);
+		/* lgr %w1,%r15 (backchain) */
+		EMIT4(0xb9040000, REG_W1, REG_15);
+		/* la %bfp,unused_end(%r15) (BPF frame pointer) */
+		EMIT4_DISP(0x41000000, BPF_REG_FP, REG_15,
+			   offsetofend(struct prog_frame, unused));
+		/* aghi %r15,-frame_off */
+		EMIT4_IMM(0xa70b0000, REG_15, -jit->frame_off);
+		/* stg %w1,backchain(%r15) */
+		EMIT6_DISP_LH(0xe3000000, 0x0024, REG_W1, REG_0,
+			      REG_15,
+			      offsetof(struct prog_frame, backchain));
 	}
 }
 
 /*
- * Emit an expoline for a jump that follows
- */
-static void emit_expoline(struct bpf_jit *jit)
-{
-	/* exrl %r0,.+10 */
-	EMIT6_PCREL_RIL(0xc6000000, jit->prg + 10);
-	/* j . */
-	EMIT4_PCREL(0xa7f40000, 0);
-}
-
-/*
- * Emit __s390_indirect_jump_r1 thunk if necessary
+ * Jump using a register either directly or via an expoline thunk
  */
-static void emit_r1_thunk(struct bpf_jit *jit)
-{
-	if (nospec_uses_trampoline()) {
-		jit->r1_thunk_ip = jit->prg;
-		emit_expoline(jit);
-		/* br %r1 */
-		_EMIT2(0x07f1);
-	}
-}
+#define EMIT_JUMP_REG(reg) do {						\
+	if (nospec_uses_trampoline())					\
+		/* brcl 0xf,__s390_indirect_jump_rN */			\
+		EMIT6_PCREL_RILC_PTR(0xc0040000, 0x0f,			\
+				     __s390_indirect_jump_r ## reg);	\
+	else								\
+		/* br %rN */						\
+		_EMIT2(0x07f0 | reg);					\
+} while (0)
 
 /*
  * Call r1 either directly or via __s390_indirect_jump_r1 thunk
@@ -650,7 +681,8 @@ static void call_r1(struct bpf_jit *jit)
 {
 	if (nospec_uses_trampoline())
 		/* brasl %r14,__s390_indirect_jump_r1 */
-		EMIT6_PCREL_RILB(0xc0050000, REG_14, jit->r1_thunk_ip);
+		EMIT6_PCREL_RILB_PTR(0xc0050000, REG_14,
+				     __s390_indirect_jump_r1);
 	else
 		/* basr %r14,%r1 */
 		EMIT2(0x0d00, REG_14, REG_1);
@@ -659,23 +691,14 @@ static void call_r1(struct bpf_jit *jit)
 /*
  * Function epilogue
  */
-static void bpf_jit_epilogue(struct bpf_jit *jit, u32 stack_depth)
+static void bpf_jit_epilogue(struct bpf_jit *jit)
 {
 	jit->exit_ip = jit->prg;
 	/* Load exit code: lgr %r2,%b0 */
 	EMIT4(0xb9040000, REG_2, BPF_REG_0);
 	/* Restore registers */
-	save_restore_regs(jit, REGS_RESTORE, stack_depth, 0);
-	if (nospec_uses_trampoline()) {
-		jit->r14_thunk_ip = jit->prg;
-		/* Generate __s390_indirect_jump_r14 thunk */
-		emit_expoline(jit);
-	}
-	/* br %r14 */
-	_EMIT2(0x07fe);
-
-	if (is_first_pass(jit) || (jit->seen & SEEN_FUNC))
-		emit_r1_thunk(jit);
+	save_restore_regs(jit, REGS_RESTORE, 0);
+	EMIT_JUMP_REG(14);
 
 	jit->prg = ALIGN(jit->prg, 8);
 	jit->prologue_plt = jit->prg;
@@ -856,7 +879,7 @@ static int sign_extend(struct bpf_jit *jit, int r, u8 size, u8 flags)
  * stack space for the large switch statement.
  */
 static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp,
-				 int i, bool extra_pass, u32 stack_depth)
+				 int i, bool extra_pass)
 {
 	struct bpf_insn *insn = &fp->insnsi[i];
 	s32 branch_oc_off = insn->off;
@@ -1777,9 +1800,10 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp,
 		 * Note 2: We assume that the verifier does not let us call the
 		 * main program, which clears the tail call counter on entry.
 		 */
-		/* mvc STK_OFF_TCCNT(4,%r15),N(%r15) */
-		_EMIT6(0xd203f000 | STK_OFF_TCCNT,
-		       0xf000 | (STK_OFF_TCCNT + STK_OFF + stack_depth));
+		/* mvc tail_call_cnt(4,%r15),frame_off+tail_call_cnt(%r15) */
+		_EMIT6(0xd203f000 | offsetof(struct prog_frame, tail_call_cnt),
+		       0xf000 | (jit->frame_off +
+				 offsetof(struct prog_frame, tail_call_cnt)));
 
 		/* Sign-extend the kfunc arguments. */
 		if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL) {
@@ -1830,10 +1854,8 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp,
 		 *         goto out;
 		 */
 
-		if (jit->seen & SEEN_STACK)
-			off = STK_OFF_TCCNT + STK_OFF + stack_depth;
-		else
-			off = STK_OFF_TCCNT;
+		off = jit->frame_off +
+		      offsetof(struct prog_frame, tail_call_cnt);
 		/* lhi %w0,1 */
 		EMIT4_IMM(0xa7080000, REG_W0, 1);
 		/* laal %w1,%w0,off(%r15) */
@@ -1863,7 +1885,7 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp,
 		/*
 		 * Restore registers before calling function
 		 */
-		save_restore_regs(jit, REGS_RESTORE, stack_depth, 0);
+		save_restore_regs(jit, REGS_RESTORE, 0);
 
 		/*
 		 * goto *(prog->bpf_func + tail_call_start);
@@ -1877,7 +1899,8 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp,
 			/* aghi %r1,tail_call_start */
 			EMIT4_IMM(0xa70b0000, REG_1, jit->tail_call_start);
 			/* brcl 0xf,__s390_indirect_jump_r1 */
-			EMIT6_PCREL_RILC(0xc0040000, 0xf, jit->r1_thunk_ip);
+			EMIT6_PCREL_RILC_PTR(0xc0040000, 0xf,
+					     __s390_indirect_jump_r1);
 		} else {
 			/* bc 0xf,tail_call_start(%r1) */
 			_EMIT4(0x47f01000 + jit->tail_call_start);
@@ -2155,7 +2178,7 @@ static int bpf_set_addr(struct bpf_jit *jit, int i)
  * Compile eBPF program into s390x code
  */
 static int bpf_jit_prog(struct bpf_jit *jit, struct bpf_prog *fp,
-			bool extra_pass, u32 stack_depth)
+			bool extra_pass)
 {
 	int i, insn_count, lit32_size, lit64_size;
 	u64 kern_arena;
@@ -2164,24 +2187,30 @@ static int bpf_jit_prog(struct bpf_jit *jit, struct bpf_prog *fp,
 	jit->lit64 = jit->lit64_start;
 	jit->prg = 0;
 	jit->excnt = 0;
+	if (is_first_pass(jit) || (jit->seen & SEEN_STACK))
+		jit->frame_off = sizeof(struct prog_frame) -
+				 offsetofend(struct prog_frame, unused) +
+				 round_up(fp->aux->stack_depth, 8);
+	else
+		jit->frame_off = 0;
 
 	kern_arena = bpf_arena_get_kern_vm_start(fp->aux->arena);
 	if (kern_arena)
 		jit->kern_arena = _EMIT_CONST_U64(kern_arena);
 	jit->user_arena = bpf_arena_get_user_vm_start(fp->aux->arena);
 
-	bpf_jit_prologue(jit, fp, stack_depth);
+	bpf_jit_prologue(jit, fp);
 	if (bpf_set_addr(jit, 0) < 0)
 		return -1;
 	for (i = 0; i < fp->len; i += insn_count) {
-		insn_count = bpf_jit_insn(jit, fp, i, extra_pass, stack_depth);
+		insn_count = bpf_jit_insn(jit, fp, i, extra_pass);
 		if (insn_count < 0)
 			return -1;
 		/* Next instruction address */
 		if (bpf_set_addr(jit, i + insn_count) < 0)
 			return -1;
 	}
-	bpf_jit_epilogue(jit, stack_depth);
+	bpf_jit_epilogue(jit);
 
 	lit32_size = jit->lit32 - jit->lit32_start;
 	lit64_size = jit->lit64 - jit->lit64_start;
@@ -2257,7 +2286,6 @@ static struct bpf_binary_header *bpf_jit_alloc(struct bpf_jit *jit,
  */
 struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
 {
-	u32 stack_depth = round_up(fp->aux->stack_depth, 8);
 	struct bpf_prog *tmp, *orig_fp = fp;
 	struct bpf_binary_header *header;
 	struct s390_jit_data *jit_data;
@@ -2310,7 +2338,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
 	 *   - 3:   Calculate program size and addrs array
 	 */
 	for (pass = 1; pass <= 3; pass++) {
-		if (bpf_jit_prog(&jit, fp, extra_pass, stack_depth)) {
+		if (bpf_jit_prog(&jit, fp, extra_pass)) {
 			fp = orig_fp;
 			goto free_addrs;
 		}
@@ -2324,7 +2352,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
 		goto free_addrs;
 	}
 skip_init_ctx:
-	if (bpf_jit_prog(&jit, fp, extra_pass, stack_depth)) {
+	if (bpf_jit_prog(&jit, fp, extra_pass)) {
 		bpf_jit_binary_free(header);
 		fp = orig_fp;
 		goto free_addrs;
@@ -2585,9 +2613,8 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im,
 	if (nr_stack_args > MAX_NR_STACK_ARGS)
 		return -ENOTSUPP;
 
-	/* Return to %r14, since func_addr and %r0 are not available. */
-	if ((!func_addr && !(flags & BPF_TRAMP_F_ORIG_STACK)) ||
-	    (flags & BPF_TRAMP_F_INDIRECT))
+	/* Return to %r14 in the struct_ops case. */
+	if (flags & BPF_TRAMP_F_INDIRECT)
 		flags |= BPF_TRAMP_F_SKIP_FRAME;
 
 	/*
@@ -2645,9 +2672,10 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im,
 	/* stg %r1,backchain_off(%r15) */
 	EMIT6_DISP_LH(0xe3000000, 0x0024, REG_1, REG_0, REG_15,
 		      tjit->backchain_off);
-	/* mvc tccnt_off(4,%r15),stack_size+STK_OFF_TCCNT(%r15) */
+	/* mvc tccnt_off(4,%r15),stack_size+tail_call_cnt(%r15) */
 	_EMIT6(0xd203f000 | tjit->tccnt_off,
-	       0xf000 | (tjit->stack_size + STK_OFF_TCCNT));
+	       0xf000 | (tjit->stack_size +
+			 offsetof(struct prog_frame, tail_call_cnt)));
 	/* stmg %r2,%rN,fwd_reg_args_off(%r15) */
 	if (nr_reg_args)
 		EMIT6_DISP_LH(0xeb000000, 0x0024, REG_2,
@@ -2784,8 +2812,9 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im,
 				       (nr_stack_args * sizeof(u64) - 1) << 16 |
 				       tjit->stack_args_off,
 			       0xf000 | tjit->orig_stack_args_off);
-		/* mvc STK_OFF_TCCNT(4,%r15),tccnt_off(%r15) */
-		_EMIT6(0xd203f000 | STK_OFF_TCCNT, 0xf000 | tjit->tccnt_off);
+		/* mvc tail_call_cnt(4,%r15),tccnt_off(%r15) */
+		_EMIT6(0xd203f000 | offsetof(struct prog_frame, tail_call_cnt),
+		       0xf000 | tjit->tccnt_off);
 		/* lgr %r1,%r8 */
 		EMIT4(0xb9040000, REG_1, REG_8);
 		/* %r1() */
@@ -2842,22 +2871,16 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im,
 	if (flags & (BPF_TRAMP_F_CALL_ORIG | BPF_TRAMP_F_RET_FENTRY_RET))
 		EMIT6_DISP_LH(0xe3000000, 0x0004, REG_2, REG_0, REG_15,
 			      tjit->retval_off);
-	/* mvc stack_size+STK_OFF_TCCNT(4,%r15),tccnt_off(%r15) */
-	_EMIT6(0xd203f000 | (tjit->stack_size + STK_OFF_TCCNT),
+	/* mvc stack_size+tail_call_cnt(4,%r15),tccnt_off(%r15) */
+	_EMIT6(0xd203f000 | (tjit->stack_size +
+			     offsetof(struct prog_frame, tail_call_cnt)),
 	       0xf000 | tjit->tccnt_off);
 	/* aghi %r15,stack_size */
 	EMIT4_IMM(0xa70b0000, REG_15, tjit->stack_size);
-	/* Emit an expoline for the following indirect jump. */
-	if (nospec_uses_trampoline())
-		emit_expoline(jit);
 	if (flags & BPF_TRAMP_F_SKIP_FRAME)
-		/* br %r14 */
-		_EMIT2(0x07fe);
+		EMIT_JUMP_REG(14);
 	else
-		/* br %r1 */
-		_EMIT2(0x07f1);
-
-	emit_r1_thunk(jit);
+		EMIT_JUMP_REG(1);
 
 	return 0;
 }
diff --git a/arch/s390/net/pnet.c b/arch/s390/net/pnet.c
index 79211bec0fc8..03089ef479b2 100644
--- a/arch/s390/net/pnet.c
+++ b/arch/s390/net/pnet.c
@@ -6,6 +6,7 @@
  */
 
 #include <linux/device.h>
+#include <linux/export.h>
 #include <linux/module.h>
 #include <linux/pci.h>
 #include <linux/types.h>
diff --git a/arch/s390/pci/pci_bus.c b/arch/s390/pci/pci_bus.c
index 81bdb54ad5e3..45a1c36c5a54 100644
--- a/arch/s390/pci/pci_bus.c
+++ b/arch/s390/pci/pci_bus.c
@@ -13,7 +13,6 @@
 #include <linux/kernel.h>
 #include <linux/slab.h>
 #include <linux/err.h>
-#include <linux/export.h>
 #include <linux/delay.h>
 #include <linux/seq_file.h>
 #include <linux/jump_label.h>
diff --git a/arch/s390/pci/pci_event.c b/arch/s390/pci/pci_event.c
index 2fbee3887d13..d930416d4c90 100644
--- a/arch/s390/pci/pci_event.c
+++ b/arch/s390/pci/pci_event.c
@@ -54,6 +54,7 @@ static inline bool ers_result_indicates_abort(pci_ers_result_t ers_res)
 	case PCI_ERS_RESULT_CAN_RECOVER:
 	case PCI_ERS_RESULT_RECOVERED:
 	case PCI_ERS_RESULT_NEED_RESET:
+	case PCI_ERS_RESULT_NONE:
 		return false;
 	default:
 		return true;
@@ -78,10 +79,6 @@ static bool is_driver_supported(struct pci_driver *driver)
 		return false;
 	if (!driver->err_handler->error_detected)
 		return false;
-	if (!driver->err_handler->slot_reset)
-		return false;
-	if (!driver->err_handler->resume)
-		return false;
 	return true;
 }
 
@@ -106,6 +103,10 @@ static pci_ers_result_t zpci_event_do_error_state_clear(struct pci_dev *pdev,
 	struct zpci_dev *zdev = to_zpci(pdev);
 	int rc;
 
+	/* The underlying device may have been disabled by the event */
+	if (!zdev_enabled(zdev))
+		return PCI_ERS_RESULT_NEED_RESET;
+
 	pr_info("%s: Unblocking device access for examination\n", pci_name(pdev));
 	rc = zpci_reset_load_store_blocked(zdev);
 	if (rc) {
@@ -114,16 +115,18 @@ static pci_ers_result_t zpci_event_do_error_state_clear(struct pci_dev *pdev,
 		return PCI_ERS_RESULT_NEED_RESET;
 	}
 
-	if (driver->err_handler->mmio_enabled) {
+	if (driver->err_handler->mmio_enabled)
 		ers_res = driver->err_handler->mmio_enabled(pdev);
-		if (ers_result_indicates_abort(ers_res)) {
-			pr_info("%s: Automatic recovery failed after MMIO re-enable\n",
-				pci_name(pdev));
-			return ers_res;
-		} else if (ers_res == PCI_ERS_RESULT_NEED_RESET) {
-			pr_debug("%s: Driver needs reset to recover\n", pci_name(pdev));
-			return ers_res;
-		}
+	else
+		ers_res = PCI_ERS_RESULT_NONE;
+
+	if (ers_result_indicates_abort(ers_res)) {
+		pr_info("%s: Automatic recovery failed after MMIO re-enable\n",
+			pci_name(pdev));
+		return ers_res;
+	} else if (ers_res == PCI_ERS_RESULT_NEED_RESET) {
+		pr_debug("%s: Driver needs reset to recover\n", pci_name(pdev));
+		return ers_res;
 	}
 
 	pr_debug("%s: Unblocking DMA\n", pci_name(pdev));
@@ -150,7 +153,12 @@ static pci_ers_result_t zpci_event_do_reset(struct pci_dev *pdev,
 		return ers_res;
 	}
 	pdev->error_state = pci_channel_io_normal;
-	ers_res = driver->err_handler->slot_reset(pdev);
+
+	if (driver->err_handler->slot_reset)
+		ers_res = driver->err_handler->slot_reset(pdev);
+	else
+		ers_res = PCI_ERS_RESULT_NONE;
+
 	if (ers_result_indicates_abort(ers_res)) {
 		pr_info("%s: Automatic recovery failed after slot reset\n", pci_name(pdev));
 		return ers_res;
@@ -214,7 +222,7 @@ static pci_ers_result_t zpci_event_attempt_error_recovery(struct pci_dev *pdev)
 		goto out_unlock;
 	}
 
-	if (ers_res == PCI_ERS_RESULT_CAN_RECOVER) {
+	if (ers_res != PCI_ERS_RESULT_NEED_RESET) {
 		ers_res = zpci_event_do_error_state_clear(pdev, driver);
 		if (ers_result_indicates_abort(ers_res)) {
 			status_str = "failed (abort on MMIO enable)";
@@ -225,6 +233,16 @@ static pci_ers_result_t zpci_event_attempt_error_recovery(struct pci_dev *pdev)
 	if (ers_res == PCI_ERS_RESULT_NEED_RESET)
 		ers_res = zpci_event_do_reset(pdev, driver);
 
+	/*
+	 * ers_res can be PCI_ERS_RESULT_NONE either because the driver
+	 * decided to return it, indicating that it abstains from voting
+	 * on how to recover, or because it didn't implement the callback.
+	 * Both cases assume, that if there is nothing else causing a
+	 * disconnect, we recovered successfully.
+	 */
+	if (ers_res == PCI_ERS_RESULT_NONE)
+		ers_res = PCI_ERS_RESULT_RECOVERED;
+
 	if (ers_res != PCI_ERS_RESULT_RECOVERED) {
 		pr_err("%s: Automatic recovery failed; operator intervention is required\n",
 		       pci_name(pdev));
@@ -273,6 +291,8 @@ static void __zpci_event_error(struct zpci_ccdf_err *ccdf)
 	struct zpci_dev *zdev = get_zdev_by_fid(ccdf->fid);
 	struct pci_dev *pdev = NULL;
 	pci_ers_result_t ers_res;
+	u32 fh = 0;
+	int rc;
 
 	zpci_dbg(3, "err fid:%x, fh:%x, pec:%x\n",
 		 ccdf->fid, ccdf->fh, ccdf->pec);
@@ -281,6 +301,15 @@ static void __zpci_event_error(struct zpci_ccdf_err *ccdf)
 
 	if (zdev) {
 		mutex_lock(&zdev->state_lock);
+		rc = clp_refresh_fh(zdev->fid, &fh);
+		if (rc)
+			goto no_pdev;
+		if (!fh || ccdf->fh != fh) {
+			/* Ignore events with stale handles */
+			zpci_dbg(3, "err fid:%x, fh:%x (stale %x)\n",
+				 ccdf->fid, fh, ccdf->fh);
+			goto no_pdev;
+		}
 		zpci_update_fh(zdev, ccdf->fh);
 		if (zdev->zbus->bus)
 			pdev = pci_get_slot(zdev->zbus->bus, zdev->devfn);
diff --git a/arch/s390/pci/pci_kvm_hook.c b/arch/s390/pci/pci_kvm_hook.c
index ff34baf50a3e..df5b25dbe9ca 100644
--- a/arch/s390/pci/pci_kvm_hook.c
+++ b/arch/s390/pci/pci_kvm_hook.c
@@ -5,7 +5,9 @@
  * Copyright (C) IBM Corp. 2022.  All rights reserved.
  *	Author(s): Pierre Morel <pmorel@linux.ibm.com>
  */
+
 #include <linux/kvm_host.h>
+#include <linux/export.h>
 
 struct zpci_kvm_hook zpci_kvm_hook;
 EXPORT_SYMBOL_GPL(zpci_kvm_hook);
diff --git a/arch/s390/pci/pci_sysfs.c b/arch/s390/pci/pci_sysfs.c
index 0ecad08e1b1e..0ee0924cfab7 100644
--- a/arch/s390/pci/pci_sysfs.c
+++ b/arch/s390/pci/pci_sysfs.c
@@ -218,7 +218,7 @@ static struct attribute *zpci_dev_attrs[] = {
 
 const struct attribute_group zpci_attr_group = {
 	.attrs = zpci_dev_attrs,
-	.bin_attrs_new = zpci_bin_attrs,
+	.bin_attrs = zpci_bin_attrs,
 };
 
 static struct attribute *pfip_attrs[] = {
diff --git a/arch/s390/purgatory/purgatory.c b/arch/s390/purgatory/purgatory.c
index 030efda05dbe..ecb38102187c 100644
--- a/arch/s390/purgatory/purgatory.c
+++ b/arch/s390/purgatory/purgatory.c
@@ -16,7 +16,7 @@ int verify_sha256_digest(void)
 {
 	struct kexec_sha_region *ptr, *end;
 	u8 digest[SHA256_DIGEST_SIZE];
-	struct sha256_state sctx;
+	struct sha256_ctx sctx;
 
 	sha256_init(&sctx);
 	end = purgatory_sha_regions + ARRAY_SIZE(purgatory_sha_regions);