summaryrefslogtreecommitdiff
path: root/arch/s390
diff options
context:
space:
mode:
Diffstat (limited to 'arch/s390')
-rw-r--r--arch/s390/Kconfig49
-rw-r--r--arch/s390/Kconfig.debug10
-rw-r--r--arch/s390/Makefile6
-rw-r--r--arch/s390/Makefile.postlink8
-rw-r--r--arch/s390/appldata/appldata_base.c1
-rw-r--r--arch/s390/boot/.gitignore1
-rw-r--r--arch/s390/boot/Makefile8
-rw-r--r--arch/s390/boot/als.c10
-rw-r--r--arch/s390/boot/alternative.c135
-rw-r--r--arch/s390/boot/boot.h39
-rw-r--r--arch/s390/boot/decompressor.c12
-rw-r--r--arch/s390/boot/head.S25
-rw-r--r--arch/s390/boot/ipl_data.c9
-rw-r--r--arch/s390/boot/ipl_parm.c60
-rw-r--r--arch/s390/boot/ipl_report.c3
-rw-r--r--arch/s390/boot/kaslr.c4
-rw-r--r--arch/s390/boot/pgm_check.c92
-rw-r--r--arch/s390/boot/pgm_check_info.c65
-rw-r--r--arch/s390/boot/physmem_info.c189
-rw-r--r--arch/s390/boot/printk.c227
-rw-r--r--arch/s390/boot/startup.c180
-rw-r--r--arch/s390/boot/string.c12
-rw-r--r--arch/s390/boot/trampoline.S9
-rw-r--r--arch/s390/boot/vmem.c142
-rw-r--r--arch/s390/boot/vmlinux.lds.S2
-rw-r--r--arch/s390/configs/debug_defconfig74
-rw-r--r--arch/s390/configs/defconfig69
-rw-r--r--arch/s390/configs/mmtypes.config2
-rw-r--r--arch/s390/configs/zfcpdump_defconfig5
-rw-r--r--arch/s390/crypto/Kconfig52
-rw-r--r--arch/s390/crypto/Makefile7
-rw-r--r--arch/s390/crypto/aes_s390.c45
-rw-r--r--arch/s390/crypto/arch_random.c1
-rw-r--r--arch/s390/crypto/chacha-glue.c130
-rw-r--r--arch/s390/crypto/chacha-s390.S908
-rw-r--r--arch/s390/crypto/chacha-s390.h14
-rw-r--r--arch/s390/crypto/ghash_s390.c104
-rw-r--r--arch/s390/crypto/hmac_s390.c178
-rw-r--r--arch/s390/crypto/paes_s390.c1815
-rw-r--r--arch/s390/crypto/phmac_s390.c1048
-rw-r--r--arch/s390/crypto/sha.h25
-rw-r--r--arch/s390/crypto/sha1_s390.c103
-rw-r--r--arch/s390/crypto/sha256_s390.c143
-rw-r--r--arch/s390/crypto/sha3_256_s390.c80
-rw-r--r--arch/s390/crypto/sha3_512_s390.c88
-rw-r--r--arch/s390/crypto/sha512_s390.c149
-rw-r--r--arch/s390/crypto/sha_common.c85
-rw-r--r--arch/s390/hypfs/hypfs.h2
-rw-r--r--arch/s390/hypfs/hypfs_dbfs.c19
-rw-r--r--arch/s390/hypfs/hypfs_diag.h2
-rw-r--r--arch/s390/hypfs/hypfs_diag0c.c5
-rw-r--r--arch/s390/hypfs/hypfs_diag_fs.c5
-rw-r--r--arch/s390/hypfs/hypfs_vm.c5
-rw-r--r--arch/s390/hypfs/inode.c7
-rw-r--r--arch/s390/include/asm/abs_lowcore.h7
-rw-r--r--arch/s390/include/asm/alternative.h17
-rw-r--r--arch/s390/include/asm/ap.h2
-rw-r--r--arch/s390/include/asm/appldata.h3
-rw-r--r--arch/s390/include/asm/asce.h36
-rw-r--r--arch/s390/include/asm/asm-const.h2
-rw-r--r--arch/s390/include/asm/asm-extable.h22
-rw-r--r--arch/s390/include/asm/asm.h2
-rw-r--r--arch/s390/include/asm/atomic_ops.h8
-rw-r--r--arch/s390/include/asm/bitops.h8
-rw-r--r--arch/s390/include/asm/boot_data.h51
-rw-r--r--arch/s390/include/asm/cpacf.h22
-rw-r--r--arch/s390/include/asm/cpu.h4
-rw-r--r--arch/s390/include/asm/cpu_mf-insn.h4
-rw-r--r--arch/s390/include/asm/cpu_mf.h6
-rw-r--r--arch/s390/include/asm/cpufeature.h15
-rw-r--r--arch/s390/include/asm/ctlreg.h4
-rw-r--r--arch/s390/include/asm/current.h18
-rw-r--r--arch/s390/include/asm/diag.h2
-rw-r--r--arch/s390/include/asm/diag288.h41
-rw-r--r--arch/s390/include/asm/dwarf.h4
-rw-r--r--arch/s390/include/asm/elf.h32
-rw-r--r--arch/s390/include/asm/entry-common.h10
-rw-r--r--arch/s390/include/asm/extmem.h2
-rw-r--r--arch/s390/include/asm/fpu-insn-asm.h4
-rw-r--r--arch/s390/include/asm/fpu-insn.h187
-rw-r--r--arch/s390/include/asm/fpu.h7
-rw-r--r--arch/s390/include/asm/ftrace.h5
-rw-r--r--arch/s390/include/asm/futex.h113
-rw-r--r--arch/s390/include/asm/gmap.h21
-rw-r--r--arch/s390/include/asm/gmap_helpers.h15
-rw-r--r--arch/s390/include/asm/hugetlb.h19
-rw-r--r--arch/s390/include/asm/io.h4
-rw-r--r--arch/s390/include/asm/irq.h5
-rw-r--r--arch/s390/include/asm/jump_label.h4
-rw-r--r--arch/s390/include/asm/kfence.h17
-rw-r--r--arch/s390/include/asm/kvm_host.h349
-rw-r--r--arch/s390/include/asm/kvm_host_types.h348
-rw-r--r--arch/s390/include/asm/lowcore.h28
-rw-r--r--arch/s390/include/asm/machine.h104
-rw-r--r--arch/s390/include/asm/march.h4
-rw-r--r--arch/s390/include/asm/mem_encrypt.h4
-rw-r--r--arch/s390/include/asm/mmu.h3
-rw-r--r--arch/s390/include/asm/mmu_context.h20
-rw-r--r--arch/s390/include/asm/nmi.h4
-rw-r--r--arch/s390/include/asm/nospec-branch.h8
-rw-r--r--arch/s390/include/asm/nospec-insn.h5
-rw-r--r--arch/s390/include/asm/page.h85
-rw-r--r--arch/s390/include/asm/pci.h7
-rw-r--r--arch/s390/include/asm/pci_clp.h4
-rw-r--r--arch/s390/include/asm/pci_dma.h3
-rw-r--r--arch/s390/include/asm/percpu.h5
-rw-r--r--arch/s390/include/asm/pgalloc.h3
-rw-r--r--arch/s390/include/asm/pgtable.h102
-rw-r--r--arch/s390/include/asm/physmem_info.h4
-rw-r--r--arch/s390/include/asm/pkey.h15
-rw-r--r--arch/s390/include/asm/processor.h29
-rw-r--r--arch/s390/include/asm/ptrace.h53
-rw-r--r--arch/s390/include/asm/purgatory.h4
-rw-r--r--arch/s390/include/asm/sclp.h6
-rw-r--r--arch/s390/include/asm/setup.h48
-rw-r--r--arch/s390/include/asm/sigp.h4
-rw-r--r--arch/s390/include/asm/skey.h32
-rw-r--r--arch/s390/include/asm/smp.h24
-rw-r--r--arch/s390/include/asm/spinlock.h20
-rw-r--r--arch/s390/include/asm/string.h20
-rw-r--r--arch/s390/include/asm/syscall.h27
-rw-r--r--arch/s390/include/asm/sysinfo.h28
-rw-r--r--arch/s390/include/asm/thread_info.h12
-rw-r--r--arch/s390/include/asm/timex.h18
-rw-r--r--arch/s390/include/asm/tlb.h7
-rw-r--r--arch/s390/include/asm/tlbflush.h8
-rw-r--r--arch/s390/include/asm/topology.h6
-rw-r--r--arch/s390/include/asm/tpi.h4
-rw-r--r--arch/s390/include/asm/types.h4
-rw-r--r--arch/s390/include/asm/uaccess.h730
-rw-r--r--arch/s390/include/asm/uv.h12
-rw-r--r--arch/s390/include/asm/vdso.h8
-rw-r--r--arch/s390/include/asm/vdso/getrandom.h16
-rw-r--r--arch/s390/include/asm/vdso/gettimeofday.h23
-rw-r--r--arch/s390/include/asm/vdso/time_data.h3
-rw-r--r--arch/s390/include/asm/vdso/vsyscall.h24
-rw-r--r--arch/s390/include/asm/word-at-a-time.h2
-rw-r--r--arch/s390/include/uapi/asm/ptrace.h5
-rw-r--r--arch/s390/include/uapi/asm/schid.h4
-rw-r--r--arch/s390/include/uapi/asm/types.h4
-rw-r--r--arch/s390/kernel/Makefile4
-rw-r--r--arch/s390/kernel/abs_lowcore.c1
-rw-r--r--arch/s390/kernel/alternative.c65
-rw-r--r--arch/s390/kernel/asm-offsets.c13
-rw-r--r--arch/s390/kernel/cert_store.c4
-rw-r--r--arch/s390/kernel/cpacf.c2
-rw-r--r--arch/s390/kernel/cpufeature.c6
-rw-r--r--arch/s390/kernel/crash_dump.c65
-rw-r--r--arch/s390/kernel/ctlreg.c1
-rw-r--r--arch/s390/kernel/debug.c4
-rw-r--r--arch/s390/kernel/diag/diag.c4
-rw-r--r--arch/s390/kernel/dis.c1
-rw-r--r--arch/s390/kernel/dumpstack.c8
-rw-r--r--arch/s390/kernel/early.c124
-rw-r--r--arch/s390/kernel/entry.S46
-rw-r--r--arch/s390/kernel/entry.h2
-rw-r--r--arch/s390/kernel/facility.c1
-rw-r--r--arch/s390/kernel/fpu.c2
-rw-r--r--arch/s390/kernel/ftrace.c19
-rw-r--r--arch/s390/kernel/guarded_storage.c3
-rw-r--r--arch/s390/kernel/head64.S4
-rw-r--r--arch/s390/kernel/hiperdispatch.c3
-rw-r--r--arch/s390/kernel/ipl.c68
-rw-r--r--arch/s390/kernel/irq.c9
-rw-r--r--arch/s390/kernel/kprobes.c5
-rw-r--r--arch/s390/kernel/machine_kexec.c6
-rw-r--r--arch/s390/kernel/nmi.c85
-rw-r--r--arch/s390/kernel/perf_cpum_cf.c14
-rw-r--r--arch/s390/kernel/perf_cpum_cf_events.c171
-rw-r--r--arch/s390/kernel/perf_cpum_sf.c9
-rw-r--r--arch/s390/kernel/perf_event.c1
-rw-r--r--arch/s390/kernel/perf_pai_crypto.c6
-rw-r--r--arch/s390/kernel/perf_pai_ext.c4
-rw-r--r--arch/s390/kernel/process.c1
-rw-r--r--arch/s390/kernel/processor.c30
-rw-r--r--arch/s390/kernel/ptrace.c98
-rw-r--r--arch/s390/kernel/setup.c69
-rw-r--r--arch/s390/kernel/skey.c48
-rw-r--r--arch/s390/kernel/smp.c30
-rw-r--r--arch/s390/kernel/stacktrace.c1
-rw-r--r--arch/s390/kernel/sthyi.c2
-rw-r--r--arch/s390/kernel/syscall.c51
-rw-r--r--arch/s390/kernel/syscalls/syscall.tbl3
-rw-r--r--arch/s390/kernel/sysinfo.c48
-rw-r--r--arch/s390/kernel/time.c132
-rw-r--r--arch/s390/kernel/topology.c29
-rw-r--r--arch/s390/kernel/traps.c117
-rw-r--r--arch/s390/kernel/unwind_bc.c2
-rw-r--r--arch/s390/kernel/uv.c467
-rw-r--r--arch/s390/kernel/vdso.c99
-rw-r--r--arch/s390/kernel/vdso32/Makefile2
-rw-r--r--arch/s390/kernel/vdso32/vdso32.lds.S7
-rw-r--r--arch/s390/kernel/vdso64/Makefile2
-rw-r--r--arch/s390/kernel/vdso64/vdso64.lds.S8
-rw-r--r--arch/s390/kernel/vmlinux.lds.S8
-rw-r--r--arch/s390/kvm/Makefile2
-rw-r--r--arch/s390/kvm/diag.c30
-rw-r--r--arch/s390/kvm/gaccess.c53
-rw-r--r--arch/s390/kvm/gmap-vsie.c141
-rw-r--r--arch/s390/kvm/intercept.c16
-rw-r--r--arch/s390/kvm/interrupt.c37
-rw-r--r--arch/s390/kvm/kvm-s390.c350
-rw-r--r--arch/s390/kvm/kvm-s390.h61
-rw-r--r--arch/s390/kvm/pci.c17
-rw-r--r--arch/s390/kvm/priv.c6
-rw-r--r--arch/s390/kvm/pv.c82
-rw-r--r--arch/s390/kvm/trace-s390.h4
-rw-r--r--arch/s390/kvm/vsie.c140
-rw-r--r--arch/s390/lib/Makefile3
-rw-r--r--arch/s390/lib/crc32-glue.c92
-rw-r--r--arch/s390/lib/crc32-vx.h12
-rw-r--r--arch/s390/lib/crc32be-vx.c174
-rw-r--r--arch/s390/lib/crc32le-vx.c240
-rw-r--r--arch/s390/lib/delay.c1
-rw-r--r--arch/s390/lib/spinlock.c31
-rw-r--r--arch/s390/lib/string.c47
-rw-r--r--arch/s390/lib/uaccess.c357
-rw-r--r--arch/s390/mm/Makefile4
-rw-r--r--arch/s390/mm/cmm.c6
-rw-r--r--arch/s390/mm/dump_pagetables.c56
-rw-r--r--arch/s390/mm/extable.c77
-rw-r--r--arch/s390/mm/extmem.c27
-rw-r--r--arch/s390/mm/fault.c37
-rw-r--r--arch/s390/mm/gmap.c858
-rw-r--r--arch/s390/mm/gmap_helpers.c223
-rw-r--r--arch/s390/mm/hugetlbpage.c11
-rw-r--r--arch/s390/mm/init.c42
-rw-r--r--arch/s390/mm/mmap.c9
-rw-r--r--arch/s390/mm/pageattr.c9
-rw-r--r--arch/s390/mm/pfault.c5
-rw-r--r--arch/s390/mm/pgalloc.c53
-rw-r--r--arch/s390/mm/pgtable.c90
-rw-r--r--arch/s390/mm/vmem.c16
-rw-r--r--arch/s390/net/bpf_jit.h55
-rw-r--r--arch/s390/net/bpf_jit_comp.c271
-rw-r--r--arch/s390/net/pnet.c1
-rw-r--r--arch/s390/pci/Makefile2
-rw-r--r--arch/s390/pci/pci.c89
-rw-r--r--arch/s390/pci/pci_bus.c48
-rw-r--r--arch/s390/pci/pci_bus.h7
-rw-r--r--arch/s390/pci/pci_clp.c7
-rw-r--r--arch/s390/pci/pci_event.c81
-rw-r--r--arch/s390/pci/pci_fixup.c23
-rw-r--r--arch/s390/pci/pci_insn.c12
-rw-r--r--arch/s390/pci/pci_iov.c56
-rw-r--r--arch/s390/pci/pci_iov.h7
-rw-r--r--arch/s390/pci/pci_kvm_hook.c2
-rw-r--r--arch/s390/pci/pci_mmio.c36
-rw-r--r--arch/s390/pci/pci_sysfs.c13
-rw-r--r--arch/s390/purgatory/Makefile6
-rw-r--r--arch/s390/purgatory/purgatory.c2
-rw-r--r--arch/s390/tools/gen_facilities.c3
-rw-r--r--arch/s390/tools/gen_opcode_table.c27
253 files changed, 8400 insertions, 7262 deletions
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index 6e9545d8b0c7..bf680c26a33c 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -41,9 +41,6 @@ config AUDIT_ARCH
config NO_IOPORT_MAP
def_bool y
-config PCI_QUIRKS
- def_bool n
-
config ARCH_SUPPORTS_UPROBES
def_bool y
@@ -52,13 +49,19 @@ config KASAN_SHADOW_OFFSET
depends on KASAN
default 0x1C000000000000
-config GCC_ASM_FLAG_OUTPUT_BROKEN
+config CC_ASM_FLAG_OUTPUT_BROKEN
def_bool CC_IS_GCC && GCC_VERSION < 140200
help
GCC versions before 14.2.0 may die with an internal
compiler error in some configurations if flag output
operands are used within inline assemblies.
+config CC_HAS_ASM_AOR_FORMAT_FLAGS
+ def_bool !(CC_IS_CLANG && CLANG_VERSION < 190100)
+ help
+ Clang versions before 19.1.0 do not support A,
+ O, and R inline assembly format flags.
+
config S390
def_bool y
#
@@ -67,12 +70,12 @@ config S390
imply IMA_SECURE_AND_OR_TRUSTED_BOOT
select ALTERNATE_USER_ADDRESS_SPACE
select ARCH_32BIT_USTAT_F_TINODE
- select ARCH_BINFMT_ELF_STATE
select ARCH_CORRECT_STACKTRACE_ON_KRETPROBE
select ARCH_ENABLE_MEMORY_HOTPLUG if SPARSEMEM
select ARCH_ENABLE_MEMORY_HOTREMOVE
select ARCH_ENABLE_SPLIT_PMD_PTLOCK if PGTABLE_LEVELS > 2
- select ARCH_HAS_CRC32
+ select ARCH_ENABLE_THP_MIGRATION if TRANSPARENT_HUGEPAGE
+ select ARCH_HAS_CPU_FINALIZE_INIT
select ARCH_HAS_CURRENT_STACK_POINTER
select ARCH_HAS_DEBUG_VIRTUAL
select ARCH_HAS_DEBUG_VM_PGTABLE
@@ -89,6 +92,7 @@ config S390
select ARCH_HAS_MEM_ENCRYPT
select ARCH_HAS_NMI_SAFE_THIS_CPU_OPS
select ARCH_HAS_PREEMPT_LAZY
+ select ARCH_HAS_PTDUMP
select ARCH_HAS_PTE_SPECIAL
select ARCH_HAS_SCALED_CPUTIME
select ARCH_HAS_SET_DIRECT_MAP
@@ -99,6 +103,7 @@ config S390
select ARCH_HAS_UBSAN
select ARCH_HAS_VDSO_TIME_DATA
select ARCH_HAVE_NMI_SAFE_CMPXCHG
+ select ARCH_HAVE_TRACE_MMIO_ACCESS
select ARCH_INLINE_READ_LOCK
select ARCH_INLINE_READ_LOCK_BH
select ARCH_INLINE_READ_LOCK_IRQ
@@ -128,11 +133,13 @@ config S390
select ARCH_INLINE_WRITE_UNLOCK_IRQ
select ARCH_INLINE_WRITE_UNLOCK_IRQRESTORE
select ARCH_MHP_MEMMAP_ON_MEMORY_ENABLE
+ select ARCH_MODULE_NEEDS_WEAK_PER_CPU
select ARCH_STACKWALK
select ARCH_SUPPORTS_ATOMIC_RMW
select ARCH_SUPPORTS_DEBUG_PAGEALLOC
select ARCH_SUPPORTS_HUGETLBFS
select ARCH_SUPPORTS_INT128 if CC_HAS_INT128 && CC_IS_CLANG
+ select ARCH_SUPPORTS_MSEAL_SYSTEM_MAPPINGS
select ARCH_SUPPORTS_NUMA_BALANCING
select ARCH_SUPPORTS_PER_VMA_LOCK
select ARCH_USE_BUILTIN_BSWAP
@@ -141,9 +148,11 @@ config S390
select ARCH_WANTS_NO_INSTR
select ARCH_WANT_DEFAULT_BPF_JIT
select ARCH_WANT_IPC_PARSE_VERSION
+ select ARCH_WANT_IRQS_OFF_ACTIVATE_MM
select ARCH_WANT_KERNEL_PMD_MKWRITE
select ARCH_WANT_LD_ORPHAN_WARN
select ARCH_WANT_OPTIMIZE_HUGETLB_VMEMMAP
+ select ARCH_WANTS_THP_SWAP
select BUILDTIME_TABLE_SORT
select CLONE_BACKWARDS2
select DCACHE_WORD_ACCESS if !KMSAN
@@ -156,9 +165,9 @@ config S390
select GENERIC_CPU_VULNERABILITIES
select GENERIC_ENTRY
select GENERIC_GETTIMEOFDAY
- select GENERIC_PTDUMP
select GENERIC_SMP_IDLE_THREAD
select GENERIC_TIME_VSYSCALL
+ select GENERIC_VDSO_DATA_STORE
select GENERIC_VDSO_TIME_NS
select GENERIC_IOREMAP if PCI
select HAVE_ALIGNED_STRUCT_PAGE
@@ -170,14 +179,15 @@ config S390
select HAVE_ARCH_KCSAN
select HAVE_ARCH_KMSAN
select HAVE_ARCH_KFENCE
+ select HAVE_ARCH_KSTACK_ERASE
select HAVE_ARCH_RANDOMIZE_KSTACK_OFFSET
select HAVE_ARCH_SECCOMP_FILTER
select HAVE_ARCH_SOFT_DIRTY
- select HAVE_ARCH_STACKLEAK
select HAVE_ARCH_TRACEHOOK
select HAVE_ARCH_TRANSPARENT_HUGEPAGE
select HAVE_ARCH_VMAP_STACK
select HAVE_ASM_MODVERSIONS
+ select HAVE_BUILDTIME_MCOUNT_SORT
select HAVE_CMPXCHG_DOUBLE
select HAVE_CMPXCHG_LOCAL
select HAVE_DEBUG_KMEMLEAK
@@ -192,7 +202,6 @@ config S390
select HAVE_GUP_FAST
select HAVE_FENTRY
select HAVE_FTRACE_GRAPH_FUNC
- select HAVE_FTRACE_MCOUNT_RECORD
select HAVE_FUNCTION_ARG_ACCESS_API
select HAVE_FUNCTION_ERROR_INJECTION
select HAVE_FUNCTION_GRAPH_FREGS
@@ -234,6 +243,7 @@ config S390
select HAVE_SYSCALL_TRACEPOINTS
select HAVE_VIRT_CPU_ACCOUNTING
select HAVE_VIRT_CPU_ACCOUNTING_IDLE
+ select HOTPLUG_SMT
select IOMMU_HELPER if PCI
select IOMMU_SUPPORT if PCI
select KASAN_VMALLOC if KASAN
@@ -251,6 +261,7 @@ config S390
select PCI_DOMAINS if PCI
select PCI_MSI if PCI
select PCI_MSI_ARCH_FALLBACKS if PCI_MSI
+ select PCI_QUIRKS if PCI
select SPARSE_IRQ
select SWIOTLB
select SYSCTL_EXCEPTION_TRACE
@@ -324,6 +335,10 @@ config HAVE_MARCH_Z16_FEATURES
def_bool n
select HAVE_MARCH_Z15_FEATURES
+config HAVE_MARCH_Z17_FEATURES
+ def_bool n
+ select HAVE_MARCH_Z16_FEATURES
+
choice
prompt "Processor type"
default MARCH_Z196
@@ -389,6 +404,14 @@ config MARCH_Z16
Select this to enable optimizations for IBM z16 (3931 and
3932 series).
+config MARCH_Z17
+ bool "IBM z17"
+ select HAVE_MARCH_Z17_FEATURES
+ depends on $(cc-option,-march=z17)
+ help
+ Select this to enable optimizations for IBM z17 (9175 and
+ 9176 series).
+
endchoice
config MARCH_Z10_TUNE
@@ -412,6 +435,9 @@ config MARCH_Z15_TUNE
config MARCH_Z16_TUNE
def_bool TUNE_Z16 || MARCH_Z16 && TUNE_DEFAULT
+config MARCH_Z17_TUNE
+ def_bool TUNE_Z17 || MARCH_Z17 && TUNE_DEFAULT
+
choice
prompt "Tune code generation"
default TUNE_DEFAULT
@@ -456,6 +482,10 @@ config TUNE_Z16
bool "IBM z16"
depends on $(cc-option,-mtune=z16)
+config TUNE_Z17
+ bool "IBM z17"
+ depends on $(cc-option,-mtune=z17)
+
endchoice
config 64BIT
@@ -623,6 +653,7 @@ endchoice
config RELOCATABLE
def_bool y
+ select ARCH_VMLINUX_NEEDS_RELOCS
help
This builds a kernel image that retains relocation information
so it can be loaded at an arbitrary address.
diff --git a/arch/s390/Kconfig.debug b/arch/s390/Kconfig.debug
index c4300ea4abf8..7955d7eee7d8 100644
--- a/arch/s390/Kconfig.debug
+++ b/arch/s390/Kconfig.debug
@@ -13,6 +13,16 @@ config DEBUG_ENTRY
If unsure, say N.
+config STRICT_MM_TYPECHECKS
+ bool "Strict Memory Management Type Checks"
+ depends on DEBUG_KERNEL
+ help
+ Enable strict type checking for memory management types like pte_t
+ and pmd_t. This generates slightly worse code and should be used
+ for debug builds.
+
+ If unsure, say N.
+
config CIO_INJECT
bool "CIO Inject interfaces"
depends on DEBUG_KERNEL && DEBUG_FS
diff --git a/arch/s390/Makefile b/arch/s390/Makefile
index 3f25498dac65..7679bc16b692 100644
--- a/arch/s390/Makefile
+++ b/arch/s390/Makefile
@@ -15,14 +15,14 @@ KBUILD_CFLAGS_MODULE += -fPIC
KBUILD_AFLAGS += -m64
KBUILD_CFLAGS += -m64
KBUILD_CFLAGS += -fPIC
-LDFLAGS_vmlinux := -no-pie --emit-relocs --discard-none
+LDFLAGS_vmlinux := $(call ld-option,-no-pie)
extra_tools := relocs
aflags_dwarf := -Wa,-gdwarf-2
KBUILD_AFLAGS_DECOMPRESSOR := $(CLANG_FLAGS) -m64 -D__ASSEMBLY__
ifndef CONFIG_AS_IS_LLVM
KBUILD_AFLAGS_DECOMPRESSOR += $(if $(CONFIG_DEBUG_INFO),$(aflags_dwarf))
endif
-KBUILD_CFLAGS_DECOMPRESSOR := $(CLANG_FLAGS) -m64 -O2 -mpacked-stack
+KBUILD_CFLAGS_DECOMPRESSOR := $(CLANG_FLAGS) -m64 -O2 -mpacked-stack -std=gnu11
KBUILD_CFLAGS_DECOMPRESSOR += -DDISABLE_BRANCH_PROFILING -D__NO_FORTIFY
KBUILD_CFLAGS_DECOMPRESSOR += -D__DECOMPRESSOR
KBUILD_CFLAGS_DECOMPRESSOR += -fno-delete-null-pointer-checks -msoft-float -mbackchain
@@ -48,6 +48,7 @@ mflags-$(CONFIG_MARCH_Z13) := -march=z13
mflags-$(CONFIG_MARCH_Z14) := -march=z14
mflags-$(CONFIG_MARCH_Z15) := -march=z15
mflags-$(CONFIG_MARCH_Z16) := -march=z16
+mflags-$(CONFIG_MARCH_Z17) := -march=z17
export CC_FLAGS_MARCH := $(mflags-y)
@@ -61,6 +62,7 @@ cflags-$(CONFIG_MARCH_Z13_TUNE) += -mtune=z13
cflags-$(CONFIG_MARCH_Z14_TUNE) += -mtune=z14
cflags-$(CONFIG_MARCH_Z15_TUNE) += -mtune=z15
cflags-$(CONFIG_MARCH_Z16_TUNE) += -mtune=z16
+cflags-$(CONFIG_MARCH_Z17_TUNE) += -mtune=z17
cflags-y += -Wa,-I$(srctree)/arch/$(ARCH)/include
diff --git a/arch/s390/Makefile.postlink b/arch/s390/Makefile.postlink
index df82f5410769..c2b737500a91 100644
--- a/arch/s390/Makefile.postlink
+++ b/arch/s390/Makefile.postlink
@@ -19,14 +19,8 @@ quiet_cmd_relocs = RELOCS $(OUT_RELOCS)/relocs.S
mkdir -p $(OUT_RELOCS); \
$(CMD_RELOCS) $@ > $(OUT_RELOCS)/relocs.S
-quiet_cmd_strip_relocs = RSTRIP $@
- cmd_strip_relocs = \
- $(OBJCOPY) --remove-section='.rel.*' --remove-section='.rel__*' \
- --remove-section='.rela.*' --remove-section='.rela__*' $@
-
-vmlinux: FORCE
+vmlinux.unstripped: FORCE
$(call cmd,relocs)
- $(call cmd,strip_relocs)
clean:
@rm -f $(OUT_RELOCS)/relocs.S
diff --git a/arch/s390/appldata/appldata_base.c b/arch/s390/appldata/appldata_base.c
index dd7ba7587dd5..ad2b0baa527c 100644
--- a/arch/s390/appldata/appldata_base.c
+++ b/arch/s390/appldata/appldata_base.c
@@ -12,6 +12,7 @@
#define KMSG_COMPONENT "appldata"
#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+#include <linux/export.h>
#include <linux/module.h>
#include <linux/sched/stat.h>
#include <linux/init.h>
diff --git a/arch/s390/boot/.gitignore b/arch/s390/boot/.gitignore
index f5ef099e2fd3..af2a6a7bc028 100644
--- a/arch/s390/boot/.gitignore
+++ b/arch/s390/boot/.gitignore
@@ -5,4 +5,5 @@ relocs.S
section_cmp.*
vmlinux
vmlinux.lds
+vmlinux.map
vmlinux.syms
diff --git a/arch/s390/boot/Makefile b/arch/s390/boot/Makefile
index 8bc1308ac892..02f2cf082748 100644
--- a/arch/s390/boot/Makefile
+++ b/arch/s390/boot/Makefile
@@ -19,15 +19,15 @@ CC_FLAGS_MARCH_MINIMUM := -march=z10
KBUILD_AFLAGS := $(filter-out $(CC_FLAGS_MARCH),$(KBUILD_AFLAGS_DECOMPRESSOR))
KBUILD_CFLAGS := $(filter-out $(CC_FLAGS_MARCH),$(KBUILD_CFLAGS_DECOMPRESSOR))
-KBUILD_AFLAGS += $(CC_FLAGS_MARCH_MINIMUM)
-KBUILD_CFLAGS += $(CC_FLAGS_MARCH_MINIMUM)
+KBUILD_AFLAGS += $(CC_FLAGS_MARCH_MINIMUM) -D__DISABLE_EXPORTS
+KBUILD_CFLAGS += $(CC_FLAGS_MARCH_MINIMUM) -D__DISABLE_EXPORTS
CFLAGS_sclp_early_core.o += -I$(srctree)/drivers/s390/char
obj-y := head.o als.o startup.o physmem_info.o ipl_parm.o ipl_report.o vmem.o
obj-y += string.o ebcdic.o sclp_early_core.o mem.o ipl_vmparm.o cmdline.o
-obj-y += version.o pgm_check_info.o ctype.o ipl_data.o relocs.o alternative.o
-obj-y += uv.o printk.o
+obj-y += version.o pgm_check.o ctype.o ipl_data.o relocs.o alternative.o
+obj-y += uv.o printk.o trampoline.o
obj-$(CONFIG_RANDOMIZE_BASE) += kaslr.o
obj-y += $(if $(CONFIG_KERNEL_UNCOMPRESSED),,decompressor.o) info.o
obj-$(CONFIG_KERNEL_ZSTD) += clz_ctz.o
diff --git a/arch/s390/boot/als.c b/arch/s390/boot/als.c
index 11e0c3d5dbc8..25a20986b96e 100644
--- a/arch/s390/boot/als.c
+++ b/arch/s390/boot/als.c
@@ -46,7 +46,7 @@ void print_missing_facilities(void)
* z/VM adds a four character prefix.
*/
if (strlen(als_str) > 70) {
- boot_printk("%s\n", als_str);
+ boot_emerg("%s\n", als_str);
*als_str = '\0';
}
u16_to_decimal(val_str, i * BITS_PER_LONG + j);
@@ -54,7 +54,7 @@ void print_missing_facilities(void)
first = 0;
}
}
- boot_printk("%s\n", als_str);
+ boot_emerg("%s\n", als_str);
}
static void facility_mismatch(void)
@@ -62,10 +62,10 @@ static void facility_mismatch(void)
struct cpuid id;
get_cpu_id(&id);
- boot_printk("The Linux kernel requires more recent processor hardware\n");
- boot_printk("Detected machine-type number: %4x\n", id.machine);
+ boot_emerg("The Linux kernel requires more recent processor hardware\n");
+ boot_emerg("Detected machine-type number: %4x\n", id.machine);
print_missing_facilities();
- boot_printk("See Principles of Operations for facility bits\n");
+ boot_emerg("See z/Architecture Principles of Operation - Facility Indications\n");
disabled_wait();
}
diff --git a/arch/s390/boot/alternative.c b/arch/s390/boot/alternative.c
index abc08d2c873d..19ea7934b918 100644
--- a/arch/s390/boot/alternative.c
+++ b/arch/s390/boot/alternative.c
@@ -1,3 +1,138 @@
// SPDX-License-Identifier: GPL-2.0
+#define boot_fmt(fmt) "alt: " fmt
+#include "boot.h"
+
+#define a_debug boot_debug
#include "../kernel/alternative.c"
+
+static void alt_debug_all(int type)
+{
+ int i;
+
+ switch (type) {
+ case ALT_TYPE_FACILITY:
+ for (i = 0; i < ARRAY_SIZE(alt_debug.facilities); i++)
+ alt_debug.facilities[i] = -1UL;
+ break;
+ case ALT_TYPE_FEATURE:
+ for (i = 0; i < ARRAY_SIZE(alt_debug.mfeatures); i++)
+ alt_debug.mfeatures[i] = -1UL;
+ break;
+ case ALT_TYPE_SPEC:
+ alt_debug.spec = 1;
+ break;
+ }
+}
+
+static void alt_debug_modify(int type, unsigned int nr, bool clear)
+{
+ switch (type) {
+ case ALT_TYPE_FACILITY:
+ if (clear)
+ __clear_facility(nr, alt_debug.facilities);
+ else
+ __set_facility(nr, alt_debug.facilities);
+ break;
+ case ALT_TYPE_FEATURE:
+ if (clear)
+ __clear_machine_feature(nr, alt_debug.mfeatures);
+ else
+ __set_machine_feature(nr, alt_debug.mfeatures);
+ break;
+ }
+}
+
+static char *alt_debug_parse(int type, char *str)
+{
+ unsigned long val, endval;
+ char *endp;
+ bool clear;
+ int i;
+
+ if (*str == ':') {
+ str++;
+ } else {
+ alt_debug_all(type);
+ return str;
+ }
+ clear = false;
+ if (*str == '!') {
+ alt_debug_all(type);
+ clear = true;
+ str++;
+ }
+ while (*str) {
+ val = simple_strtoull(str, &endp, 0);
+ if (str == endp)
+ break;
+ str = endp;
+ if (*str == '-') {
+ str++;
+ endval = simple_strtoull(str, &endp, 0);
+ if (str == endp)
+ break;
+ str = endp;
+ while (val <= endval) {
+ alt_debug_modify(type, val, clear);
+ val++;
+ }
+ } else {
+ alt_debug_modify(type, val, clear);
+ }
+ if (*str != ',')
+ break;
+ str++;
+ }
+ return str;
+}
+
+/*
+ * Use debug-alternative command line parameter for debugging:
+ * "debug-alternative"
+ * -> print debug message for every single alternative
+ *
+ * "debug-alternative=0;2"
+ * -> print debug message for all alternatives with type 0 and 2
+ *
+ * "debug-alternative=0:0-7"
+ * -> print debug message for all alternatives with type 0 and with
+ * facility numbers within the range of 0-7
+ * (if type 0 is ALT_TYPE_FACILITY)
+ *
+ * "debug-alternative=0:!8;1"
+ * -> print debug message for all alternatives with type 0, for all
+ * facility number, except facility 8, and in addition print all
+ * alternatives with type 1
+ */
+void alt_debug_setup(char *str)
+{
+ unsigned long type;
+ char *endp;
+ int i;
+
+ if (!str) {
+ alt_debug_all(ALT_TYPE_FACILITY);
+ alt_debug_all(ALT_TYPE_FEATURE);
+ alt_debug_all(ALT_TYPE_SPEC);
+ return;
+ }
+ while (*str) {
+ type = simple_strtoull(str, &endp, 0);
+ if (str == endp)
+ break;
+ str = endp;
+ switch (type) {
+ case ALT_TYPE_FACILITY:
+ case ALT_TYPE_FEATURE:
+ str = alt_debug_parse(type, str);
+ break;
+ case ALT_TYPE_SPEC:
+ alt_debug_all(ALT_TYPE_SPEC);
+ break;
+ }
+ if (*str != ';')
+ break;
+ str++;
+ }
+}
diff --git a/arch/s390/boot/boot.h b/arch/s390/boot/boot.h
index 56244fe78182..c0152db285f0 100644
--- a/arch/s390/boot/boot.h
+++ b/arch/s390/boot/boot.h
@@ -6,15 +6,11 @@
#define IPL_START 0x200
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
+#include <linux/printk.h>
#include <asm/physmem_info.h>
-struct machine_info {
- unsigned char has_edat1 : 1;
- unsigned char has_edat2 : 1;
-};
-
struct vmlinux_info {
unsigned long entry;
unsigned long image_size; /* does not include .bss */
@@ -47,13 +43,16 @@ void physmem_set_usable_limit(unsigned long limit);
void physmem_reserve(enum reserved_range_type type, unsigned long addr, unsigned long size);
void physmem_free(enum reserved_range_type type);
/* for continuous/multiple allocations per type */
-unsigned long physmem_alloc_top_down(enum reserved_range_type type, unsigned long size,
- unsigned long align);
+unsigned long physmem_alloc_or_die(enum reserved_range_type type, unsigned long size,
+ unsigned long align);
+unsigned long physmem_alloc(enum reserved_range_type type, unsigned long size,
+ unsigned long align, bool die_on_oom);
/* for single allocations, 1 per type */
unsigned long physmem_alloc_range(enum reserved_range_type type, unsigned long size,
unsigned long align, unsigned long min, unsigned long max,
bool die_on_oom);
unsigned long get_physmem_alloc_pos(void);
+void dump_physmem_reserved(void);
bool ipl_report_certs_intersects(unsigned long addr, unsigned long size,
unsigned long *intersection_start);
bool is_ipl_block_dump(void);
@@ -65,16 +64,34 @@ void parse_boot_command_line(void);
void verify_facilities(void);
void print_missing_facilities(void);
void sclp_early_setup_buffer(void);
-void print_pgm_check_info(void);
+void alt_debug_setup(char *str);
+void do_pgm_check(struct pt_regs *regs);
unsigned long randomize_within_range(unsigned long size, unsigned long align,
unsigned long min, unsigned long max);
void setup_vmem(unsigned long kernel_start, unsigned long kernel_end, unsigned long asce_limit);
-void __printf(1, 2) boot_printk(const char *fmt, ...);
+int __printf(1, 2) boot_printk(const char *fmt, ...);
void print_stacktrace(unsigned long sp);
void error(char *m);
int get_random(unsigned long limit, unsigned long *value);
+void boot_rb_dump(void);
+void __noreturn jump_to_kernel(psw_t *psw);
+
+#ifndef boot_fmt
+#define boot_fmt(fmt) fmt
+#endif
+
+#define boot_emerg(fmt, ...) boot_printk(KERN_EMERG boot_fmt(fmt), ##__VA_ARGS__)
+#define boot_alert(fmt, ...) boot_printk(KERN_ALERT boot_fmt(fmt), ##__VA_ARGS__)
+#define boot_crit(fmt, ...) boot_printk(KERN_CRIT boot_fmt(fmt), ##__VA_ARGS__)
+#define boot_err(fmt, ...) boot_printk(KERN_ERR boot_fmt(fmt), ##__VA_ARGS__)
+#define boot_warn(fmt, ...) boot_printk(KERN_WARNING boot_fmt(fmt), ##__VA_ARGS__)
+#define boot_notice(fmt, ...) boot_printk(KERN_NOTICE boot_fmt(fmt), ##__VA_ARGS__)
+#define boot_info(fmt, ...) boot_printk(KERN_INFO boot_fmt(fmt), ##__VA_ARGS__)
+#define boot_debug(fmt, ...) boot_printk(KERN_DEBUG boot_fmt(fmt), ##__VA_ARGS__)
extern struct machine_info machine;
+extern int boot_console_loglevel;
+extern bool boot_ignore_loglevel;
/* Symbols defined by linker scripts */
extern const char kernel_version[];
@@ -105,5 +122,5 @@ static inline bool intersects(unsigned long addr0, unsigned long size0,
{
return addr0 + size0 > addr1 && addr1 + size1 > addr0;
}
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif /* BOOT_BOOT_H */
diff --git a/arch/s390/boot/decompressor.c b/arch/s390/boot/decompressor.c
index f478e8e9cbda..03500b9d9fb9 100644
--- a/arch/s390/boot/decompressor.c
+++ b/arch/s390/boot/decompressor.c
@@ -9,6 +9,7 @@
#include <linux/kernel.h>
#include <linux/string.h>
+#include <asm/boot_data.h>
#include <asm/page.h>
#include "decompressor.h"
#include "boot.h"
@@ -63,6 +64,15 @@ static unsigned long free_mem_end_ptr = (unsigned long) _end + BOOT_HEAP_SIZE;
#include "../../../../lib/decompress_unzstd.c"
#endif
+static void decompress_error(char *m)
+{
+ if (bootdebug)
+ boot_rb_dump();
+ boot_emerg("Decompression error: %s\n", m);
+ boot_emerg(" -- System halted\n");
+ disabled_wait();
+}
+
unsigned long mem_safe_offset(void)
{
return ALIGN(free_mem_end_ptr, PAGE_SIZE);
@@ -71,5 +81,5 @@ unsigned long mem_safe_offset(void)
void deploy_kernel(void *output)
{
__decompress(_compressed_start, _compressed_end - _compressed_start,
- NULL, NULL, output, vmlinux.image_size, NULL, error);
+ NULL, NULL, output, vmlinux.image_size, NULL, decompress_error);
}
diff --git a/arch/s390/boot/head.S b/arch/s390/boot/head.S
index 0a47b16f6412..0b511d5c030b 100644
--- a/arch/s390/boot/head.S
+++ b/arch/s390/boot/head.S
@@ -254,8 +254,9 @@ SYM_CODE_START_LOCAL(startup_normal)
xc 0xf00(256),0xf00
larl %r13,.Lctl
lctlg %c0,%c15,0(%r13) # load control registers
- stcke __LC_BOOT_CLOCK
- mvc __LC_LAST_UPDATE_CLOCK(8),__LC_BOOT_CLOCK+1
+ larl %r13,tod_clock_base
+ stcke 0(%r13)
+ mvc __LC_LAST_UPDATE_CLOCK(8),1(%r13)
larl %r13,6f
spt 0(%r13)
mvc __LC_LAST_UPDATE_TIMER(8),0(%r13)
@@ -292,12 +293,6 @@ SYM_CODE_END(startup_normal)
#include "head_kdump.S"
-#
-# This program check is active immediately after kernel start
-# and until early_pgm_check_handler is set in kernel/early.c
-# It simply saves general/control registers and psw in
-# the save area and does disabled wait with a faulty address.
-#
SYM_CODE_START_LOCAL(startup_pgm_check_handler)
stmg %r8,%r15,__LC_SAVE_AREA
la %r8,4095
@@ -311,8 +306,18 @@ SYM_CODE_START_LOCAL(startup_pgm_check_handler)
oi __LC_RETURN_PSW+1,0x2 # set wait state bit
larl %r9,.Lold_psw_disabled_wait
stg %r9,__LC_PGM_NEW_PSW+8
- larl %r15,_dump_info_stack_end-STACK_FRAME_OVERHEAD
- brasl %r14,print_pgm_check_info
+ larl %r15,_dump_info_stack_end-(STACK_FRAME_OVERHEAD+__PT_SIZE)
+ la %r2,STACK_FRAME_OVERHEAD(%r15)
+ mvc __PT_PSW(16,%r2),__LC_PSW_SAVE_AREA-4095(%r8)
+ mvc __PT_R0(128,%r2),__LC_GPREGS_SAVE_AREA-4095(%r8)
+ mvc __PT_LAST_BREAK(8,%r2),__LC_PGM_LAST_BREAK
+ mvc __PT_INT_CODE(4,%r2),__LC_PGM_INT_CODE
+ brasl %r14,do_pgm_check
+ larl %r9,startup_pgm_check_handler
+ stg %r9,__LC_PGM_NEW_PSW+8
+ mvc __LC_RETURN_PSW(16),STACK_FRAME_OVERHEAD+__PT_PSW(%r15)
+ lmg %r0,%r15,STACK_FRAME_OVERHEAD+__PT_R0(%r15)
+ lpswe __LC_RETURN_PSW
.Lold_psw_disabled_wait:
la %r8,4095
lmg %r0,%r15,__LC_GPREGS_SAVE_AREA-4095(%r8)
diff --git a/arch/s390/boot/ipl_data.c b/arch/s390/boot/ipl_data.c
index 0846e2b249c6..c4130a80b058 100644
--- a/arch/s390/boot/ipl_data.c
+++ b/arch/s390/boot/ipl_data.c
@@ -16,7 +16,9 @@ struct ipl_lowcore {
struct ccw0 ccwpgm[2]; /* 0x0008 */
u8 fill[56]; /* 0x0018 */
struct ccw0 ccwpgmcc[20]; /* 0x0050 */
- u8 pad_0xf0[0x01a0-0x00f0]; /* 0x00f0 */
+ u8 pad_0xf0[0x0140-0x00f0]; /* 0x00f0 */
+ psw_t svc_old_psw; /* 0x0140 */
+ u8 pad_0x150[0x01a0-0x0150]; /* 0x0150 */
psw_t restart_psw; /* 0x01a0 */
psw_t external_new_psw; /* 0x01b0 */
psw_t svc_new_psw; /* 0x01c0 */
@@ -75,6 +77,11 @@ static struct ipl_lowcore ipl_lowcore __used __section(".ipldata") = {
[18] = CCW0(CCW_CMD_READ_IPL, 0x690, 0x50, CCW_FLAG_SLI | CCW_FLAG_CC),
[19] = CCW0(CCW_CMD_READ_IPL, 0x6e0, 0x50, CCW_FLAG_SLI),
},
+ /*
+ * Let the GDB's lx-symbols command find the jump_to_kernel symbol
+ * without having to load decompressor symbols.
+ */
+ .svc_old_psw = { .mask = 0, .addr = (unsigned long)jump_to_kernel },
.restart_psw = { .mask = 0, .addr = IPL_START, },
.external_new_psw = { .mask = PSW_MASK_DISABLED, .addr = __LC_EXT_NEW_PSW, },
.svc_new_psw = { .mask = PSW_MASK_DISABLED, .addr = __LC_SVC_NEW_PSW, },
diff --git a/arch/s390/boot/ipl_parm.c b/arch/s390/boot/ipl_parm.c
index 557462e62cd7..f584d7da29cb 100644
--- a/arch/s390/boot/ipl_parm.c
+++ b/arch/s390/boot/ipl_parm.c
@@ -5,6 +5,7 @@
#include <linux/pgtable.h>
#include <asm/abs_lowcore.h>
#include <asm/page-states.h>
+#include <asm/machine.h>
#include <asm/ebcdic.h>
#include <asm/sclp.h>
#include <asm/sections.h>
@@ -34,29 +35,14 @@ int vmalloc_size_set;
static inline int __diag308(unsigned long subcode, void *addr)
{
- unsigned long reg1, reg2;
- union register_pair r1;
- psw_t old;
-
- r1.even = (unsigned long) addr;
- r1.odd = 0;
- asm volatile(
- " mvc 0(16,%[psw_old]),0(%[psw_pgm])\n"
- " epsw %[reg1],%[reg2]\n"
- " st %[reg1],0(%[psw_pgm])\n"
- " st %[reg2],4(%[psw_pgm])\n"
- " larl %[reg1],1f\n"
- " stg %[reg1],8(%[psw_pgm])\n"
+ union register_pair r1 = { .even = (unsigned long)addr, .odd = 0 };
+
+ asm_inline volatile(
" diag %[r1],%[subcode],0x308\n"
- "1: mvc 0(16,%[psw_pgm]),0(%[psw_old])\n"
- : [r1] "+&d" (r1.pair),
- [reg1] "=&d" (reg1),
- [reg2] "=&a" (reg2),
- "+Q" (get_lowcore()->program_new_psw),
- "=Q" (old)
- : [subcode] "d" (subcode),
- [psw_old] "a" (&old),
- [psw_pgm] "a" (&get_lowcore()->program_new_psw)
+ "0:\n"
+ EX_TABLE(0b, 0b)
+ : [r1] "+d" (r1.pair)
+ : [subcode] "d" (subcode)
: "cc", "memory");
return r1.odd;
}
@@ -193,7 +179,7 @@ void setup_boot_command_line(void)
if (has_ebcdic_char(parmarea.command_line))
EBCASC(parmarea.command_line, COMMAND_LINE_SIZE);
/* copy arch command line */
- strcpy(early_command_line, strim(parmarea.command_line));
+ strscpy(early_command_line, strim(parmarea.command_line));
/* append IPL PARM data to the boot command line */
if (!is_prot_virt_guest() && ipl_block_valid)
@@ -215,7 +201,7 @@ static void check_cleared_facilities(void)
for (i = 0; i < ARRAY_SIZE(als); i++) {
if ((stfle_fac_list[i] & als[i]) != als[i]) {
- boot_printk("Warning: The Linux kernel requires facilities cleared via command line option\n");
+ boot_emerg("The Linux kernel requires facilities cleared via command line option\n");
print_missing_facilities();
break;
}
@@ -267,7 +253,8 @@ void parse_boot_command_line(void)
int rc;
__kaslr_enabled = IS_ENABLED(CONFIG_RANDOMIZE_BASE);
- args = strcpy(command_line_buf, early_command_line);
+ strscpy(command_line_buf, early_command_line);
+ args = command_line_buf;
while (*args) {
args = next_arg(args, &param, &val);
@@ -295,6 +282,9 @@ void parse_boot_command_line(void)
if (!strcmp(param, "facilities") && val)
modify_fac_list(val);
+ if (!strcmp(param, "debug-alternative"))
+ alt_debug_setup(val);
+
if (!strcmp(param, "nokaslr"))
__kaslr_enabled = 0;
@@ -312,6 +302,24 @@ void parse_boot_command_line(void)
}
#endif
if (!strcmp(param, "relocate_lowcore") && test_facility(193))
- relocate_lowcore = 1;
+ set_machine_feature(MFEATURE_LOWCORE);
+ if (!strcmp(param, "earlyprintk"))
+ boot_earlyprintk = true;
+ if (!strcmp(param, "debug"))
+ boot_console_loglevel = CONSOLE_LOGLEVEL_DEBUG;
+ if (!strcmp(param, "bootdebug")) {
+ bootdebug = true;
+ if (val)
+ strscpy(bootdebug_filter, val);
+ }
+ if (!strcmp(param, "quiet"))
+ boot_console_loglevel = CONSOLE_LOGLEVEL_QUIET;
+ if (!strcmp(param, "ignore_loglevel"))
+ boot_ignore_loglevel = true;
+ if (!strcmp(param, "loglevel")) {
+ boot_console_loglevel = simple_strtoull(val, NULL, 10);
+ if (boot_console_loglevel < CONSOLE_LOGLEVEL_MIN)
+ boot_console_loglevel = CONSOLE_LOGLEVEL_MIN;
+ }
}
}
diff --git a/arch/s390/boot/ipl_report.c b/arch/s390/boot/ipl_report.c
index d00898852a88..f73cd757a5f7 100644
--- a/arch/s390/boot/ipl_report.c
+++ b/arch/s390/boot/ipl_report.c
@@ -30,7 +30,6 @@ static unsigned long get_cert_comp_list_size(void)
{
struct ipl_rb_certificate_entry *cert;
struct ipl_rb_component_entry *comp;
- size_t size;
/*
* Find the length for the IPL report boot data
@@ -155,7 +154,7 @@ void save_ipl_cert_comp_list(void)
return;
size = get_cert_comp_list_size();
- early_ipl_comp_list_addr = physmem_alloc_top_down(RR_CERT_COMP_LIST, size, sizeof(int));
+ early_ipl_comp_list_addr = physmem_alloc_or_die(RR_CERT_COMP_LIST, size, sizeof(int));
ipl_cert_list_addr = early_ipl_comp_list_addr + early_ipl_comp_list_size;
copy_components_bootdata();
diff --git a/arch/s390/boot/kaslr.c b/arch/s390/boot/kaslr.c
index f864d2bff775..941f4c9e27cc 100644
--- a/arch/s390/boot/kaslr.c
+++ b/arch/s390/boot/kaslr.c
@@ -32,7 +32,7 @@ struct prng_parm {
static int check_prng(void)
{
if (!cpacf_query_func(CPACF_KMC, CPACF_KMC_PRNG)) {
- boot_printk("KASLR disabled: CPU has no PRNG\n");
+ boot_warn("KASLR disabled: CPU has no PRNG\n");
return 0;
}
if (cpacf_query_func(CPACF_PRNO, CPACF_PRNO_TRNG))
@@ -168,7 +168,7 @@ static unsigned long iterate_valid_positions(unsigned long size, unsigned long a
* cannot have chains.
*
* On the other hand, "dynamic" or "repetitive" allocations are done via
- * physmem_alloc_top_down(). These allocations are tightly packed together
+ * physmem_alloc_or_die(). These allocations are tightly packed together
* top down from the end of online memory. physmem_alloc_pos represents
* current position where those allocations start.
*
diff --git a/arch/s390/boot/pgm_check.c b/arch/s390/boot/pgm_check.c
new file mode 100644
index 000000000000..fa621fa5bc02
--- /dev/null
+++ b/arch/s390/boot/pgm_check.c
@@ -0,0 +1,92 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/kernel.h>
+#include <linux/stdarg.h>
+#include <linux/string.h>
+#include <linux/ctype.h>
+#include <asm/stacktrace.h>
+#include <asm/boot_data.h>
+#include <asm/lowcore.h>
+#include <asm/setup.h>
+#include <asm/sclp.h>
+#include <asm/uv.h>
+#include "boot.h"
+
+void print_stacktrace(unsigned long sp)
+{
+ struct stack_info boot_stack = { STACK_TYPE_TASK, (unsigned long)_stack_start,
+ (unsigned long)_stack_end };
+ bool first = true;
+
+ boot_emerg("Call Trace:\n");
+ while (!(sp & 0x7) && on_stack(&boot_stack, sp, sizeof(struct stack_frame))) {
+ struct stack_frame *sf = (struct stack_frame *)sp;
+
+ if (first)
+ boot_emerg("(sp:%016lx [<%016lx>] %pS)\n", sp, sf->gprs[8], (void *)sf->gprs[8]);
+ else
+ boot_emerg(" sp:%016lx [<%016lx>] %pS\n", sp, sf->gprs[8], (void *)sf->gprs[8]);
+ if (sf->back_chain <= sp)
+ break;
+ sp = sf->back_chain;
+ first = false;
+ }
+}
+
+extern struct exception_table_entry __start___ex_table[];
+extern struct exception_table_entry __stop___ex_table[];
+
+static inline unsigned long extable_insn(const struct exception_table_entry *x)
+{
+ return (unsigned long)&x->insn + x->insn;
+}
+
+static bool ex_handler(struct pt_regs *regs)
+{
+ const struct exception_table_entry *ex;
+
+ for (ex = __start___ex_table; ex < __stop___ex_table; ex++) {
+ if (extable_insn(ex) != regs->psw.addr)
+ continue;
+ if (ex->type != EX_TYPE_FIXUP)
+ return false;
+ regs->psw.addr = extable_fixup(ex);
+ return true;
+ }
+ return false;
+}
+
+void do_pgm_check(struct pt_regs *regs)
+{
+ struct psw_bits *psw = &psw_bits(regs->psw);
+ unsigned long *gpregs = regs->gprs;
+
+ if (ex_handler(regs))
+ return;
+ if (bootdebug)
+ boot_rb_dump();
+ boot_emerg("Linux version %s\n", kernel_version);
+ if (!is_prot_virt_guest() && early_command_line[0])
+ boot_emerg("Kernel command line: %s\n", early_command_line);
+ boot_emerg("Kernel fault: interruption code %04x ilc:%d\n",
+ regs->int_code & 0xffff, regs->int_code >> 17);
+ if (kaslr_enabled()) {
+ boot_emerg("Kernel random base: %lx\n", __kaslr_offset);
+ boot_emerg("Kernel random base phys: %lx\n", __kaslr_offset_phys);
+ }
+ boot_emerg("PSW : %016lx %016lx (%pS)\n",
+ regs->psw.mask, regs->psw.addr, (void *)regs->psw.addr);
+ boot_emerg(" R:%x T:%x IO:%x EX:%x Key:%x M:%x W:%x P:%x AS:%x CC:%x PM:%x RI:%x EA:%x\n",
+ psw->per, psw->dat, psw->io, psw->ext, psw->key, psw->mcheck,
+ psw->wait, psw->pstate, psw->as, psw->cc, psw->pm, psw->ri, psw->eaba);
+ boot_emerg("GPRS: %016lx %016lx %016lx %016lx\n", gpregs[0], gpregs[1], gpregs[2], gpregs[3]);
+ boot_emerg(" %016lx %016lx %016lx %016lx\n", gpregs[4], gpregs[5], gpregs[6], gpregs[7]);
+ boot_emerg(" %016lx %016lx %016lx %016lx\n", gpregs[8], gpregs[9], gpregs[10], gpregs[11]);
+ boot_emerg(" %016lx %016lx %016lx %016lx\n", gpregs[12], gpregs[13], gpregs[14], gpregs[15]);
+ print_stacktrace(gpregs[15]);
+ boot_emerg("Last Breaking-Event-Address:\n");
+ boot_emerg(" [<%016lx>] %pS\n", regs->last_break, (void *)regs->last_break);
+ /* Convert to disabled wait PSW */
+ psw->io = 0;
+ psw->ext = 0;
+ psw->wait = 1;
+}
diff --git a/arch/s390/boot/pgm_check_info.c b/arch/s390/boot/pgm_check_info.c
deleted file mode 100644
index 5abe59fb3bc0..000000000000
--- a/arch/s390/boot/pgm_check_info.c
+++ /dev/null
@@ -1,65 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#include <linux/kernel.h>
-#include <linux/stdarg.h>
-#include <linux/string.h>
-#include <linux/ctype.h>
-#include <asm/stacktrace.h>
-#include <asm/boot_data.h>
-#include <asm/lowcore.h>
-#include <asm/setup.h>
-#include <asm/sclp.h>
-#include <asm/uv.h>
-#include "boot.h"
-
-void print_stacktrace(unsigned long sp)
-{
- struct stack_info boot_stack = { STACK_TYPE_TASK, (unsigned long)_stack_start,
- (unsigned long)_stack_end };
- bool first = true;
-
- boot_printk("Call Trace:\n");
- while (!(sp & 0x7) && on_stack(&boot_stack, sp, sizeof(struct stack_frame))) {
- struct stack_frame *sf = (struct stack_frame *)sp;
-
- boot_printk(first ? "(sp:%016lx [<%016lx>] %pS)\n" :
- " sp:%016lx [<%016lx>] %pS\n",
- sp, sf->gprs[8], (void *)sf->gprs[8]);
- if (sf->back_chain <= sp)
- break;
- sp = sf->back_chain;
- first = false;
- }
-}
-
-void print_pgm_check_info(void)
-{
- unsigned long *gpregs = (unsigned long *)get_lowcore()->gpregs_save_area;
- struct psw_bits *psw = &psw_bits(get_lowcore()->psw_save_area);
-
- boot_printk("Linux version %s\n", kernel_version);
- if (!is_prot_virt_guest() && early_command_line[0])
- boot_printk("Kernel command line: %s\n", early_command_line);
- boot_printk("Kernel fault: interruption code %04x ilc:%x\n",
- get_lowcore()->pgm_code, get_lowcore()->pgm_ilc >> 1);
- if (kaslr_enabled()) {
- boot_printk("Kernel random base: %lx\n", __kaslr_offset);
- boot_printk("Kernel random base phys: %lx\n", __kaslr_offset_phys);
- }
- boot_printk("PSW : %016lx %016lx (%pS)\n",
- get_lowcore()->psw_save_area.mask,
- get_lowcore()->psw_save_area.addr,
- (void *)get_lowcore()->psw_save_area.addr);
- boot_printk(
- " R:%x T:%x IO:%x EX:%x Key:%x M:%x W:%x P:%x AS:%x CC:%x PM:%x RI:%x EA:%x\n",
- psw->per, psw->dat, psw->io, psw->ext, psw->key, psw->mcheck,
- psw->wait, psw->pstate, psw->as, psw->cc, psw->pm, psw->ri,
- psw->eaba);
- boot_printk("GPRS: %016lx %016lx %016lx %016lx\n", gpregs[0], gpregs[1], gpregs[2], gpregs[3]);
- boot_printk(" %016lx %016lx %016lx %016lx\n", gpregs[4], gpregs[5], gpregs[6], gpregs[7]);
- boot_printk(" %016lx %016lx %016lx %016lx\n", gpregs[8], gpregs[9], gpregs[10], gpregs[11]);
- boot_printk(" %016lx %016lx %016lx %016lx\n", gpregs[12], gpregs[13], gpregs[14], gpregs[15]);
- print_stacktrace(get_lowcore()->gpregs_save_area[15]);
- boot_printk("Last Breaking-Event-Address:\n");
- boot_printk(" [<%016lx>] %pS\n", (unsigned long)get_lowcore()->pgm_last_break,
- (void *)get_lowcore()->pgm_last_break);
-}
diff --git a/arch/s390/boot/physmem_info.c b/arch/s390/boot/physmem_info.c
index 7617aa2d2f7e..45e3d057cfaa 100644
--- a/arch/s390/boot/physmem_info.c
+++ b/arch/s390/boot/physmem_info.c
@@ -1,4 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
+#define boot_fmt(fmt) "physmem: " fmt
#include <linux/processor.h>
#include <linux/errno.h>
#include <linux/init.h>
@@ -28,7 +29,7 @@ static struct physmem_range *__get_physmem_range_ptr(u32 n)
return &physmem_info.online[n];
if (unlikely(!physmem_info.online_extended)) {
physmem_info.online_extended = (struct physmem_range *)physmem_alloc_range(
- RR_MEM_DETECT_EXTENDED, ENTRIES_EXTENDED_MAX, sizeof(long), 0,
+ RR_MEM_DETECT_EXT, ENTRIES_EXTENDED_MAX, sizeof(long), 0,
physmem_alloc_pos, true);
}
return &physmem_info.online_extended[n - MEM_INLINED_ENTRIES];
@@ -58,36 +59,22 @@ void add_physmem_online_range(u64 start, u64 end)
static int __diag260(unsigned long rx1, unsigned long rx2)
{
- unsigned long reg1, reg2, ry;
union register_pair rx;
int cc, exception;
- psw_t old;
+ unsigned long ry;
rx.even = rx1;
rx.odd = rx2;
ry = 0x10; /* storage configuration */
exception = 1;
- asm volatile(
- " mvc 0(16,%[psw_old]),0(%[psw_pgm])\n"
- " epsw %[reg1],%[reg2]\n"
- " st %[reg1],0(%[psw_pgm])\n"
- " st %[reg2],4(%[psw_pgm])\n"
- " larl %[reg1],1f\n"
- " stg %[reg1],8(%[psw_pgm])\n"
+ asm_inline volatile(
" diag %[rx],%[ry],0x260\n"
- " lhi %[exc],0\n"
- "1: mvc 0(16,%[psw_pgm]),0(%[psw_old])\n"
+ "0: lhi %[exc],0\n"
+ "1:\n"
CC_IPM(cc)
- : CC_OUT(cc, cc),
- [exc] "+d" (exception),
- [reg1] "=&d" (reg1),
- [reg2] "=&a" (reg2),
- [ry] "+&d" (ry),
- "+Q" (get_lowcore()->program_new_psw),
- "=Q" (old)
- : [rx] "d" (rx.pair),
- [psw_old] "a" (&old),
- [psw_pgm] "a" (&get_lowcore()->program_new_psw)
+ EX_TABLE(0b, 1b)
+ : CC_OUT(cc, cc), [exc] "+d" (exception), [ry] "+d" (ry)
+ : [rx] "d" (rx.pair)
: CC_CLOBBER_LIST("memory"));
cc = exception ? -1 : CC_TRANSFORM(cc);
return cc == 0 ? ry : -1;
@@ -117,29 +104,15 @@ static int diag260(void)
static int diag500_storage_limit(unsigned long *max_physmem_end)
{
unsigned long storage_limit;
- unsigned long reg1, reg2;
- psw_t old;
-
- asm volatile(
- " mvc 0(16,%[psw_old]),0(%[psw_pgm])\n"
- " epsw %[reg1],%[reg2]\n"
- " st %[reg1],0(%[psw_pgm])\n"
- " st %[reg2],4(%[psw_pgm])\n"
- " larl %[reg1],1f\n"
- " stg %[reg1],8(%[psw_pgm])\n"
- " lghi 1,%[subcode]\n"
- " lghi 2,0\n"
- " diag 2,4,0x500\n"
- "1: mvc 0(16,%[psw_pgm]),0(%[psw_old])\n"
- " lgr %[slimit],2\n"
- : [reg1] "=&d" (reg1),
- [reg2] "=&a" (reg2),
- [slimit] "=d" (storage_limit),
- "=Q" (get_lowcore()->program_new_psw),
- "=Q" (old)
- : [psw_old] "a" (&old),
- [psw_pgm] "a" (&get_lowcore()->program_new_psw),
- [subcode] "i" (DIAG500_SC_STOR_LIMIT)
+
+ asm_inline volatile(
+ " lghi %%r1,%[subcode]\n"
+ " lghi %%r2,0\n"
+ " diag %%r2,%%r4,0x500\n"
+ "0: lgr %[slimit],%%r2\n"
+ EX_TABLE(0b, 0b)
+ : [slimit] "=d" (storage_limit)
+ : [subcode] "i" (DIAG500_SC_STOR_LIMIT)
: "memory", "1", "2");
if (!storage_limit)
return -EINVAL;
@@ -150,31 +123,17 @@ static int diag500_storage_limit(unsigned long *max_physmem_end)
static int tprot(unsigned long addr)
{
- unsigned long reg1, reg2;
int cc, exception;
- psw_t old;
exception = 1;
- asm volatile(
- " mvc 0(16,%[psw_old]),0(%[psw_pgm])\n"
- " epsw %[reg1],%[reg2]\n"
- " st %[reg1],0(%[psw_pgm])\n"
- " st %[reg2],4(%[psw_pgm])\n"
- " larl %[reg1],1f\n"
- " stg %[reg1],8(%[psw_pgm])\n"
+ asm_inline volatile(
" tprot 0(%[addr]),0\n"
- " lhi %[exc],0\n"
- "1: mvc 0(16,%[psw_pgm]),0(%[psw_old])\n"
+ "0: lhi %[exc],0\n"
+ "1:\n"
CC_IPM(cc)
- : CC_OUT(cc, cc),
- [exc] "+d" (exception),
- [reg1] "=&d" (reg1),
- [reg2] "=&a" (reg2),
- "=Q" (get_lowcore()->program_new_psw.addr),
- "=Q" (old)
- : [psw_old] "a" (&old),
- [psw_pgm] "a" (&get_lowcore()->program_new_psw),
- [addr] "a" (addr)
+ EX_TABLE(0b, 1b)
+ : CC_OUT(cc, cc), [exc] "+d" (exception)
+ : [addr] "a" (addr)
: CC_CLOBBER_LIST("memory"));
cc = exception ? -EFAULT : CC_TRANSFORM(cc);
return cc;
@@ -207,11 +166,16 @@ unsigned long detect_max_physmem_end(void)
max_physmem_end = search_mem_end();
physmem_info.info_source = MEM_DETECT_BIN_SEARCH;
}
+ boot_debug("Max physical memory: 0x%016lx (info source: %s)\n", max_physmem_end,
+ get_physmem_info_source());
return max_physmem_end;
}
void detect_physmem_online_ranges(unsigned long max_physmem_end)
{
+ unsigned long start, end;
+ int i;
+
if (!sclp_early_read_storage_info()) {
physmem_info.info_source = MEM_DETECT_SCLP_STOR_INFO;
} else if (physmem_info.info_source == MEM_DETECT_DIAG500_STOR_LIMIT) {
@@ -226,12 +190,16 @@ void detect_physmem_online_ranges(unsigned long max_physmem_end)
} else if (max_physmem_end) {
add_physmem_online_range(0, max_physmem_end);
}
+ boot_debug("Online memory ranges (info source: %s):\n", get_physmem_info_source());
+ for_each_physmem_online_range(i, &start, &end)
+ boot_debug(" online [%d]: 0x%016lx-0x%016lx\n", i, start, end);
}
void physmem_set_usable_limit(unsigned long limit)
{
physmem_info.usable = limit;
physmem_alloc_pos = limit;
+ boot_debug("Usable memory limit: 0x%016lx\n", limit);
}
static void die_oom(unsigned long size, unsigned long align, unsigned long min, unsigned long max)
@@ -241,38 +209,47 @@ static void die_oom(unsigned long size, unsigned long align, unsigned long min,
enum reserved_range_type t;
int i;
- boot_printk("Linux version %s\n", kernel_version);
+ boot_emerg("Linux version %s\n", kernel_version);
if (!is_prot_virt_guest() && early_command_line[0])
- boot_printk("Kernel command line: %s\n", early_command_line);
- boot_printk("Out of memory allocating %lx bytes %lx aligned in range %lx:%lx\n",
- size, align, min, max);
- boot_printk("Reserved memory ranges:\n");
+ boot_emerg("Kernel command line: %s\n", early_command_line);
+ boot_emerg("Out of memory allocating %lu bytes 0x%lx aligned in range %lx:%lx\n",
+ size, align, min, max);
+ boot_emerg("Reserved memory ranges:\n");
for_each_physmem_reserved_range(t, range, &start, &end) {
- boot_printk("%016lx %016lx %s\n", start, end, get_rr_type_name(t));
+ boot_emerg("%016lx %016lx %s\n", start, end, get_rr_type_name(t));
total_reserved_mem += end - start;
}
- boot_printk("Usable online memory ranges (info source: %s [%x]):\n",
- get_physmem_info_source(), physmem_info.info_source);
+ boot_emerg("Usable online memory ranges (info source: %s [%d]):\n",
+ get_physmem_info_source(), physmem_info.info_source);
for_each_physmem_usable_range(i, &start, &end) {
- boot_printk("%016lx %016lx\n", start, end);
+ boot_emerg("%016lx %016lx\n", start, end);
total_mem += end - start;
}
- boot_printk("Usable online memory total: %lx Reserved: %lx Free: %lx\n",
- total_mem, total_reserved_mem,
- total_mem > total_reserved_mem ? total_mem - total_reserved_mem : 0);
+ boot_emerg("Usable online memory total: %lu Reserved: %lu Free: %lu\n",
+ total_mem, total_reserved_mem,
+ total_mem > total_reserved_mem ? total_mem - total_reserved_mem : 0);
print_stacktrace(current_frame_address());
- boot_printk("\n\n -- System halted\n");
+ boot_emerg(" -- System halted\n");
disabled_wait();
}
-void physmem_reserve(enum reserved_range_type type, unsigned long addr, unsigned long size)
+static void _physmem_reserve(enum reserved_range_type type, unsigned long addr, unsigned long size)
{
physmem_info.reserved[type].start = addr;
physmem_info.reserved[type].end = addr + size;
}
+void physmem_reserve(enum reserved_range_type type, unsigned long addr, unsigned long size)
+{
+ _physmem_reserve(type, addr, size);
+ boot_debug("%-14s 0x%016lx-0x%016lx %s\n", "Reserve:", addr, addr + size,
+ get_rr_type_name(type));
+}
+
void physmem_free(enum reserved_range_type type)
{
+ boot_debug("%-14s 0x%016lx-0x%016lx %s\n", "Free:", physmem_info.reserved[type].start,
+ physmem_info.reserved[type].end, get_rr_type_name(type));
physmem_info.reserved[type].start = 0;
physmem_info.reserved[type].end = 0;
}
@@ -339,41 +316,73 @@ unsigned long physmem_alloc_range(enum reserved_range_type type, unsigned long s
max = min(max, physmem_alloc_pos);
addr = __physmem_alloc_range(size, align, min, max, 0, NULL, die_on_oom);
if (addr)
- physmem_reserve(type, addr, size);
+ _physmem_reserve(type, addr, size);
+ boot_debug("%-14s 0x%016lx-0x%016lx %s\n", "Alloc range:", addr, addr + size,
+ get_rr_type_name(type));
return addr;
}
-unsigned long physmem_alloc_top_down(enum reserved_range_type type, unsigned long size,
- unsigned long align)
+unsigned long physmem_alloc(enum reserved_range_type type, unsigned long size,
+ unsigned long align, bool die_on_oom)
{
struct reserved_range *range = &physmem_info.reserved[type];
- struct reserved_range *new_range;
+ struct reserved_range *new_range = NULL;
unsigned int ranges_left;
unsigned long addr;
addr = __physmem_alloc_range(size, align, 0, physmem_alloc_pos, physmem_alloc_ranges,
- &ranges_left, true);
+ &ranges_left, die_on_oom);
+ if (!addr)
+ return 0;
/* if not a consecutive allocation of the same type or first allocation */
if (range->start != addr + size) {
if (range->end) {
- physmem_alloc_pos = __physmem_alloc_range(
- sizeof(struct reserved_range), 0, 0, physmem_alloc_pos,
- physmem_alloc_ranges, &ranges_left, true);
- new_range = (struct reserved_range *)physmem_alloc_pos;
+ addr = __physmem_alloc_range(sizeof(struct reserved_range), 0, 0,
+ physmem_alloc_pos, physmem_alloc_ranges,
+ &ranges_left, true);
+ new_range = (struct reserved_range *)addr;
+ addr = __physmem_alloc_range(size, align, 0, addr, ranges_left,
+ &ranges_left, die_on_oom);
+ if (!addr)
+ return 0;
*new_range = *range;
range->chain = new_range;
- addr = __physmem_alloc_range(size, align, 0, physmem_alloc_pos,
- ranges_left, &ranges_left, true);
}
range->end = addr + size;
}
+ if (type != RR_VMEM) {
+ boot_debug("%-14s 0x%016lx-0x%016lx %-20s align 0x%lx split %d\n", "Alloc topdown:",
+ addr, addr + size, get_rr_type_name(type), align, !!new_range);
+ }
range->start = addr;
physmem_alloc_pos = addr;
physmem_alloc_ranges = ranges_left;
return addr;
}
+unsigned long physmem_alloc_or_die(enum reserved_range_type type, unsigned long size,
+ unsigned long align)
+{
+ return physmem_alloc(type, size, align, true);
+}
+
unsigned long get_physmem_alloc_pos(void)
{
return physmem_alloc_pos;
}
+
+void dump_physmem_reserved(void)
+{
+ struct reserved_range *range;
+ enum reserved_range_type t;
+ unsigned long start, end;
+
+ boot_debug("Reserved memory ranges:\n");
+ for_each_physmem_reserved_range(t, range, &start, &end) {
+ if (end) {
+ boot_debug("%-14s 0x%016lx-0x%016lx @%012lx chain %012lx\n",
+ get_rr_type_name(t), start, end, (unsigned long)range,
+ (unsigned long)range->chain);
+ }
+ }
+}
diff --git a/arch/s390/boot/printk.c b/arch/s390/boot/printk.c
index 35f18f2b936e..4bb6bc95704e 100644
--- a/arch/s390/boot/printk.c
+++ b/arch/s390/boot/printk.c
@@ -5,21 +5,113 @@
#include <linux/ctype.h>
#include <asm/stacktrace.h>
#include <asm/boot_data.h>
+#include <asm/sections.h>
#include <asm/lowcore.h>
#include <asm/setup.h>
+#include <asm/timex.h>
#include <asm/sclp.h>
#include <asm/uv.h>
#include "boot.h"
+int boot_console_loglevel = CONFIG_CONSOLE_LOGLEVEL_DEFAULT;
+bool boot_ignore_loglevel;
+char __bootdata(boot_rb)[PAGE_SIZE * 2];
+bool __bootdata(boot_earlyprintk);
+size_t __bootdata(boot_rb_off);
+char __bootdata(bootdebug_filter)[128];
+bool __bootdata(bootdebug);
+
+static void boot_rb_add(const char *str, size_t len)
+{
+ /* leave double '\0' in the end */
+ size_t avail = sizeof(boot_rb) - boot_rb_off - 1;
+
+ /* store strings separated by '\0' */
+ if (len + 1 > avail)
+ boot_rb_off = 0;
+ avail = sizeof(boot_rb) - boot_rb_off - 1;
+ strscpy(boot_rb + boot_rb_off, str, avail);
+ boot_rb_off += len + 1;
+}
+
+static void print_rb_entry(const char *str)
+{
+ sclp_early_printk(printk_skip_level(str));
+}
+
+static bool debug_messages_printed(void)
+{
+ return boot_earlyprintk && (boot_ignore_loglevel || boot_console_loglevel > LOGLEVEL_DEBUG);
+}
+
+void boot_rb_dump(void)
+{
+ if (debug_messages_printed())
+ return;
+ sclp_early_printk("Boot messages ring buffer:\n");
+ boot_rb_foreach(print_rb_entry);
+}
+
const char hex_asc[] = "0123456789abcdef";
static char *as_hex(char *dst, unsigned long val, int pad)
{
- char *p, *end = p = dst + max(pad, (int)__fls(val | 1) / 4 + 1);
+ char *p = dst + max(pad, (int)__fls(val | 1) / 4 + 1);
- for (*p-- = 0; p >= dst; val >>= 4)
+ for (*p-- = '\0'; p >= dst; val >>= 4)
*p-- = hex_asc[val & 0x0f];
- return end;
+ return dst;
+}
+
+#define MAX_NUMLEN 21
+static char *as_dec(char *buf, unsigned long val, bool is_signed)
+{
+ bool negative = false;
+ char *p = buf + MAX_NUMLEN;
+
+ if (is_signed && (long)val < 0) {
+ val = (val == LONG_MIN ? LONG_MIN : -(long)val);
+ negative = true;
+ }
+
+ *--p = '\0';
+ do {
+ *--p = '0' + (val % 10);
+ val /= 10;
+ } while (val);
+
+ if (negative)
+ *--p = '-';
+ return p;
+}
+
+static ssize_t strpad(char *dst, size_t dst_size, const char *src,
+ int _pad, bool zero_pad, bool decimal)
+{
+ ssize_t len = strlen(src), pad = _pad;
+ char *p = dst;
+
+ if (max(len, abs(pad)) >= dst_size)
+ return -E2BIG;
+
+ if (pad > len) {
+ if (decimal && zero_pad && *src == '-') {
+ *p++ = '-';
+ src++;
+ len--;
+ pad--;
+ }
+ memset(p, zero_pad ? '0' : ' ', pad - len);
+ p += pad - len;
+ }
+ memcpy(p, src, len);
+ p += len;
+ if (pad < 0 && -pad > len) {
+ memset(p, ' ', -pad - len);
+ p += -pad - len;
+ }
+ *p = '\0';
+ return p - dst;
}
static char *symstart(char *p)
@@ -58,35 +150,93 @@ static noinline char *findsym(unsigned long ip, unsigned short *off, unsigned sh
return NULL;
}
-static noinline char *strsym(void *ip)
+#define MAX_SYMLEN 64
+static noinline char *strsym(char *buf, void *ip)
{
- static char buf[64];
unsigned short off;
unsigned short len;
char *p;
p = findsym((unsigned long)ip, &off, &len);
if (p) {
- strncpy(buf, p, sizeof(buf));
+ strscpy(buf, p, MAX_SYMLEN);
/* reserve 15 bytes for offset/len in symbol+0x1234/0x1234 */
- p = buf + strnlen(buf, sizeof(buf) - 15);
- strcpy(p, "+0x");
- p = as_hex(p + 3, off, 0);
- strcpy(p, "/0x");
- as_hex(p + 3, len, 0);
+ p = buf + strnlen(buf, MAX_SYMLEN - 15);
+ strscpy(p, "+0x", MAX_SYMLEN - (p - buf));
+ as_hex(p + 3, off, 0);
+ strcat(p, "/0x");
+ as_hex(p + strlen(p), len, 0);
} else {
as_hex(buf, (unsigned long)ip, 16);
}
return buf;
}
-void boot_printk(const char *fmt, ...)
+static inline int printk_loglevel(const char *buf)
+{
+ if (buf[0] == KERN_SOH_ASCII && buf[1]) {
+ switch (buf[1]) {
+ case '0' ... '7':
+ return buf[1] - '0';
+ }
+ }
+ return MESSAGE_LOGLEVEL_DEFAULT;
+}
+
+static void boot_console_earlyprintk(const char *buf)
+{
+ int level = printk_loglevel(buf);
+
+ /* always print emergency messages */
+ if (level > LOGLEVEL_EMERG && !boot_earlyprintk)
+ return;
+ buf = printk_skip_level(buf);
+ /* print debug messages only when bootdebug is enabled */
+ if (level == LOGLEVEL_DEBUG && (!bootdebug || !bootdebug_filter_match(skip_timestamp(buf))))
+ return;
+ if (boot_ignore_loglevel || level < boot_console_loglevel)
+ sclp_early_printk(buf);
+}
+
+static char *add_timestamp(char *buf)
+{
+#ifdef CONFIG_PRINTK_TIME
+ unsigned long ns = tod_to_ns(__get_tod_clock_monotonic());
+ char ts[MAX_NUMLEN];
+
+ *buf++ = '[';
+ buf += strpad(buf, MAX_NUMLEN, as_dec(ts, ns / NSEC_PER_SEC, 0), 5, 0, 0);
+ *buf++ = '.';
+ buf += strpad(buf, MAX_NUMLEN, as_dec(ts, (ns % NSEC_PER_SEC) / NSEC_PER_USEC, 0), 6, 1, 0);
+ *buf++ = ']';
+ *buf++ = ' ';
+#endif
+ return buf;
+}
+
+#define va_arg_len_type(args, lenmod, typemod) \
+ ((lenmod == 'l') ? va_arg(args, typemod long) : \
+ (lenmod == 'h') ? (typemod short)va_arg(args, typemod int) : \
+ (lenmod == 'H') ? (typemod char)va_arg(args, typemod int) : \
+ (lenmod == 'z') ? va_arg(args, typemod long) : \
+ va_arg(args, typemod int))
+
+int boot_printk(const char *fmt, ...)
{
char buf[1024] = { 0 };
char *end = buf + sizeof(buf) - 1; /* make sure buf is 0 terminated */
- unsigned long pad;
- char *p = buf;
+ bool zero_pad, decimal;
+ char *strval, *p = buf;
+ char valbuf[MAX(MAX_SYMLEN, MAX_NUMLEN)];
va_list args;
+ char lenmod;
+ ssize_t len;
+ int pad;
+
+ *p++ = KERN_SOH_ASCII;
+ *p++ = printk_get_level(fmt) ?: '0' + MESSAGE_LOGLEVEL_DEFAULT;
+ p = add_timestamp(p);
+ fmt = printk_skip_level(fmt);
va_start(args, fmt);
for (; p < end && *fmt; fmt++) {
@@ -94,31 +244,56 @@ void boot_printk(const char *fmt, ...)
*p++ = *fmt;
continue;
}
- pad = isdigit(*++fmt) ? simple_strtol(fmt, (char **)&fmt, 10) : 0;
+ if (*++fmt == '%') {
+ *p++ = '%';
+ continue;
+ }
+ zero_pad = (*fmt == '0');
+ pad = simple_strtol(fmt, (char **)&fmt, 10);
+ lenmod = (*fmt == 'h' || *fmt == 'l' || *fmt == 'z') ? *fmt++ : 0;
+ if (lenmod == 'h' && *fmt == 'h') {
+ lenmod = 'H';
+ fmt++;
+ }
+ decimal = false;
switch (*fmt) {
case 's':
- p = buf + strlcat(buf, va_arg(args, char *), sizeof(buf));
+ if (lenmod)
+ goto out;
+ strval = va_arg(args, char *);
+ zero_pad = false;
break;
case 'p':
- if (*++fmt != 'S')
+ if (*++fmt != 'S' || lenmod)
goto out;
- p = buf + strlcat(buf, strsym(va_arg(args, void *)), sizeof(buf));
+ strval = strsym(valbuf, va_arg(args, void *));
+ zero_pad = false;
break;
- case 'l':
- if (*++fmt != 'x' || end - p <= max(sizeof(long) * 2, pad))
- goto out;
- p = as_hex(p, va_arg(args, unsigned long), pad);
+ case 'd':
+ case 'i':
+ strval = as_dec(valbuf, va_arg_len_type(args, lenmod, signed), 1);
+ decimal = true;
+ break;
+ case 'u':
+ strval = as_dec(valbuf, va_arg_len_type(args, lenmod, unsigned), 0);
break;
case 'x':
- if (end - p <= max(sizeof(int) * 2, pad))
- goto out;
- p = as_hex(p, va_arg(args, unsigned int), pad);
+ strval = as_hex(valbuf, va_arg_len_type(args, lenmod, unsigned), 0);
break;
default:
goto out;
}
+ len = strpad(p, end - p, strval, pad, zero_pad, decimal);
+ if (len == -E2BIG)
+ break;
+ p += len;
}
out:
va_end(args);
- sclp_early_printk(buf);
+ len = strlen(buf);
+ if (len) {
+ boot_rb_add(buf, len);
+ boot_console_earlyprintk(buf);
+ }
+ return len;
}
diff --git a/arch/s390/boot/startup.c b/arch/s390/boot/startup.c
index e6b06692ddc8..93684a775716 100644
--- a/arch/s390/boot/startup.c
+++ b/arch/s390/boot/startup.c
@@ -1,13 +1,18 @@
// SPDX-License-Identifier: GPL-2.0
+#define boot_fmt(fmt) "startup: " fmt
#include <linux/string.h>
#include <linux/elf.h>
#include <asm/page-states.h>
#include <asm/boot_data.h>
#include <asm/extmem.h>
#include <asm/sections.h>
+#include <asm/diag288.h>
#include <asm/maccess.h>
+#include <asm/machine.h>
+#include <asm/sysinfo.h>
#include <asm/cpu_mf.h>
#include <asm/setup.h>
+#include <asm/timex.h>
#include <asm/kasan.h>
#include <asm/kexec.h>
#include <asm/sclp.h>
@@ -33,63 +38,128 @@ unsigned long __bootdata_preserved(max_mappable);
unsigned long __bootdata_preserved(page_noexec_mask);
unsigned long __bootdata_preserved(segment_noexec_mask);
unsigned long __bootdata_preserved(region_noexec_mask);
-int __bootdata_preserved(relocate_lowcore);
+union tod_clock __bootdata_preserved(tod_clock_base);
+u64 __bootdata_preserved(clock_comparator_max) = -1UL;
u64 __bootdata_preserved(stfle_fac_list[16]);
struct oldmem_data __bootdata_preserved(oldmem_data);
-struct machine_info machine;
-
void error(char *x)
{
- boot_printk("\n\n%s\n\n -- System halted", x);
+ boot_emerg("%s\n", x);
+ boot_emerg(" -- System halted\n");
disabled_wait();
}
+static char sysinfo_page[PAGE_SIZE] __aligned(PAGE_SIZE);
+
+static void detect_machine_type(void)
+{
+ struct sysinfo_3_2_2 *vmms = (struct sysinfo_3_2_2 *)&sysinfo_page;
+
+ /* Check current-configuration-level */
+ if (stsi(NULL, 0, 0, 0) <= 2) {
+ set_machine_feature(MFEATURE_LPAR);
+ return;
+ }
+ /* Get virtual-machine cpu information. */
+ if (stsi(vmms, 3, 2, 2) || !vmms->count)
+ return;
+ /* Detect known hypervisors */
+ if (!memcmp(vmms->vm[0].cpi, "\xd2\xe5\xd4", 3))
+ set_machine_feature(MFEATURE_KVM);
+ else if (!memcmp(vmms->vm[0].cpi, "\xa9\x61\xe5\xd4", 4))
+ set_machine_feature(MFEATURE_VM);
+}
+
+static void detect_diag288(void)
+{
+ /* "BEGIN" in EBCDIC character set */
+ static const char cmd[] = "\xc2\xc5\xc7\xc9\xd5";
+ unsigned long action, len;
+
+ action = machine_is_vm() ? (unsigned long)cmd : LPARWDT_RESTART;
+ len = machine_is_vm() ? sizeof(cmd) : 0;
+ if (__diag288(WDT_FUNC_INIT, MIN_INTERVAL, action, len))
+ return;
+ __diag288(WDT_FUNC_CANCEL, 0, 0, 0);
+ set_machine_feature(MFEATURE_DIAG288);
+}
+
+static void detect_diag9c(void)
+{
+ unsigned int cpu;
+ int rc = 1;
+
+ cpu = stap();
+ asm_inline volatile(
+ " diag %[cpu],%%r0,0x9c\n"
+ "0: lhi %[rc],0\n"
+ "1:\n"
+ EX_TABLE(0b, 1b)
+ : [rc] "+d" (rc)
+ : [cpu] "d" (cpu)
+ : "cc", "memory");
+ if (!rc)
+ set_machine_feature(MFEATURE_DIAG9C);
+}
+
+static void reset_tod_clock(void)
+{
+ union tod_clock clk;
+
+ if (store_tod_clock_ext_cc(&clk) == 0)
+ return;
+ /* TOD clock not running. Set the clock to Unix Epoch. */
+ if (set_tod_clock(TOD_UNIX_EPOCH) || store_tod_clock_ext_cc(&clk))
+ disabled_wait();
+ memset(&tod_clock_base, 0, sizeof(tod_clock_base));
+ tod_clock_base.tod = TOD_UNIX_EPOCH;
+ get_lowcore()->last_update_clock = TOD_UNIX_EPOCH;
+}
+
static void detect_facilities(void)
{
- if (test_facility(8)) {
- machine.has_edat1 = 1;
+ if (cpu_has_edat1())
local_ctl_set_bit(0, CR0_EDAT_BIT);
- }
- if (test_facility(78))
- machine.has_edat2 = 1;
page_noexec_mask = -1UL;
segment_noexec_mask = -1UL;
region_noexec_mask = -1UL;
- if (!test_facility(130)) {
+ if (!cpu_has_nx()) {
page_noexec_mask &= ~_PAGE_NOEXEC;
segment_noexec_mask &= ~_SEGMENT_ENTRY_NOEXEC;
region_noexec_mask &= ~_REGION_ENTRY_NOEXEC;
}
+ if (IS_ENABLED(CONFIG_PCI) && test_facility(153))
+ set_machine_feature(MFEATURE_PCI_MIO);
+ reset_tod_clock();
+ if (test_facility(139) && (tod_clock_base.tod >> 63)) {
+ /* Enable signed clock comparator comparisons */
+ set_machine_feature(MFEATURE_SCC);
+ clock_comparator_max = -1UL >> 1;
+ local_ctl_set_bit(0, CR0_CLOCK_COMPARATOR_SIGN_BIT);
+ }
+ if (test_facility(50) && test_facility(73)) {
+ set_machine_feature(MFEATURE_TX);
+ local_ctl_set_bit(0, CR0_TRANSACTIONAL_EXECUTION_BIT);
+ }
+ if (cpu_has_vx())
+ local_ctl_set_bit(0, CR0_VECTOR_BIT);
}
static int cmma_test_essa(void)
{
- unsigned long reg1, reg2, tmp = 0;
+ unsigned long tmp = 0;
int rc = 1;
- psw_t old;
/* Test ESSA_GET_STATE */
- asm volatile(
- " mvc 0(16,%[psw_old]),0(%[psw_pgm])\n"
- " epsw %[reg1],%[reg2]\n"
- " st %[reg1],0(%[psw_pgm])\n"
- " st %[reg2],4(%[psw_pgm])\n"
- " larl %[reg1],1f\n"
- " stg %[reg1],8(%[psw_pgm])\n"
+ asm_inline volatile(
" .insn rrf,0xb9ab0000,%[tmp],%[tmp],%[cmd],0\n"
- " la %[rc],0\n"
- "1: mvc 0(16,%[psw_pgm]),0(%[psw_old])\n"
- : [reg1] "=&d" (reg1),
- [reg2] "=&a" (reg2),
- [rc] "+&d" (rc),
- [tmp] "=&d" (tmp),
- "+Q" (get_lowcore()->program_new_psw),
- "=Q" (old)
- : [psw_old] "a" (&old),
- [psw_pgm] "a" (&get_lowcore()->program_new_psw),
- [cmd] "i" (ESSA_GET_STATE)
+ "0: lhi %[rc],0\n"
+ "1:\n"
+ EX_TABLE(0b, 1b)
+ : [rc] "+d" (rc), [tmp] "+d" (tmp)
+ : [cmd] "i" (ESSA_GET_STATE)
: "cc", "memory");
return rc;
}
@@ -143,7 +213,7 @@ static void rescue_initrd(unsigned long min, unsigned long max)
return;
old_addr = addr;
physmem_free(RR_INITRD);
- addr = physmem_alloc_top_down(RR_INITRD, size, 0);
+ addr = physmem_alloc_or_die(RR_INITRD, size, 0);
memmove((void *)addr, (void *)old_addr, size);
}
@@ -222,12 +292,16 @@ static void setup_ident_map_size(unsigned long max_physmem_end)
if (oldmem_data.start) {
__kaslr_enabled = 0;
ident_map_size = min(ident_map_size, oldmem_data.size);
+ boot_debug("kdump memory limit: 0x%016lx\n", oldmem_data.size);
} else if (ipl_block_valid && is_ipl_block_dump()) {
__kaslr_enabled = 0;
- if (!sclp_early_get_hsa_size(&hsa_size) && hsa_size)
+ if (!sclp_early_get_hsa_size(&hsa_size) && hsa_size) {
ident_map_size = min(ident_map_size, hsa_size);
+ boot_debug("Stand-alone dump limit: 0x%016lx\n", hsa_size);
+ }
}
#endif
+ boot_debug("Identity map size: 0x%016lx\n", ident_map_size);
}
#define FIXMAP_SIZE round_up(MEMCPY_REAL_SIZE + ABS_LOWCORE_MAP_SIZE, sizeof(struct lowcore))
@@ -267,6 +341,7 @@ static unsigned long setup_kernel_memory_layout(unsigned long kernel_size)
BUILD_BUG_ON(!IS_ALIGNED(__NO_KASLR_START_KERNEL, THREAD_SIZE));
BUILD_BUG_ON(__NO_KASLR_END_KERNEL > _REGION1_SIZE);
vsize = get_vmem_size(ident_map_size, vmemmap_size, vmalloc_size, _REGION3_SIZE);
+ boot_debug("vmem size estimated: 0x%016lx\n", vsize);
if (IS_ENABLED(CONFIG_KASAN) || __NO_KASLR_END_KERNEL > _REGION2_SIZE ||
(vsize > _REGION2_SIZE && kaslr_enabled())) {
asce_limit = _REGION1_SIZE;
@@ -290,8 +365,10 @@ static unsigned long setup_kernel_memory_layout(unsigned long kernel_size)
* otherwise asce_limit and rte_size would have been adjusted.
*/
vmax = adjust_to_uv_max(asce_limit);
+ boot_debug("%d level paging 0x%016lx vmax\n", vmax == _REGION1_SIZE ? 4 : 3, vmax);
#ifdef CONFIG_KASAN
BUILD_BUG_ON(__NO_KASLR_END_KERNEL > KASAN_SHADOW_START);
+ boot_debug("KASAN shadow area: 0x%016lx-0x%016lx\n", KASAN_SHADOW_START, KASAN_SHADOW_END);
/* force vmalloc and modules below kasan shadow */
vmax = min(vmax, KASAN_SHADOW_START);
#endif
@@ -305,19 +382,27 @@ static unsigned long setup_kernel_memory_layout(unsigned long kernel_size)
pos = 0;
kernel_end = vmax - pos * THREAD_SIZE;
kernel_start = round_down(kernel_end - kernel_size, THREAD_SIZE);
+ boot_debug("Randomization range: 0x%016lx-0x%016lx\n", vmax - kaslr_len, vmax);
+ boot_debug("kernel image: 0x%016lx-0x%016lx (kaslr)\n", kernel_start,
+ kernel_start + kernel_size);
} else if (vmax < __NO_KASLR_END_KERNEL || vsize > __NO_KASLR_END_KERNEL) {
kernel_start = round_down(vmax - kernel_size, THREAD_SIZE);
- boot_printk("The kernel base address is forced to %lx\n", kernel_start);
+ boot_debug("kernel image: 0x%016lx-0x%016lx (constrained)\n", kernel_start,
+ kernel_start + kernel_size);
} else {
kernel_start = __NO_KASLR_START_KERNEL;
+ boot_debug("kernel image: 0x%016lx-0x%016lx (nokaslr)\n", kernel_start,
+ kernel_start + kernel_size);
}
__kaslr_offset = kernel_start;
+ boot_debug("__kaslr_offset: 0x%016lx\n", __kaslr_offset);
MODULES_END = round_down(kernel_start, _SEGMENT_SIZE);
MODULES_VADDR = MODULES_END - MODULES_LEN;
VMALLOC_END = MODULES_VADDR;
if (IS_ENABLED(CONFIG_KMSAN))
VMALLOC_END -= MODULES_LEN * 2;
+ boot_debug("modules area: 0x%016lx-0x%016lx\n", MODULES_VADDR, MODULES_END);
/* allow vmalloc area to occupy up to about 1/2 of the rest virtual space left */
vsize = (VMALLOC_END - FIXMAP_SIZE) / 2;
@@ -329,10 +414,15 @@ static unsigned long setup_kernel_memory_layout(unsigned long kernel_size)
VMALLOC_END -= vmalloc_size * 2;
}
VMALLOC_START = VMALLOC_END - vmalloc_size;
+ boot_debug("vmalloc area: 0x%016lx-0x%016lx\n", VMALLOC_START, VMALLOC_END);
__memcpy_real_area = round_down(VMALLOC_START - MEMCPY_REAL_SIZE, PAGE_SIZE);
+ boot_debug("memcpy real area: 0x%016lx-0x%016lx\n", __memcpy_real_area,
+ __memcpy_real_area + MEMCPY_REAL_SIZE);
__abs_lowcore = round_down(__memcpy_real_area - ABS_LOWCORE_MAP_SIZE,
sizeof(struct lowcore));
+ boot_debug("abs lowcore: 0x%016lx-0x%016lx\n", __abs_lowcore,
+ __abs_lowcore + ABS_LOWCORE_MAP_SIZE);
/* split remaining virtual space between 1:1 mapping & vmemmap array */
pages = __abs_lowcore / (PAGE_SIZE + sizeof(struct page));
@@ -352,8 +442,11 @@ static unsigned long setup_kernel_memory_layout(unsigned long kernel_size)
BUILD_BUG_ON(MAX_DCSS_ADDR > (1UL << MAX_PHYSMEM_BITS));
max_mappable = max(ident_map_size, MAX_DCSS_ADDR);
max_mappable = min(max_mappable, vmemmap_start);
- if (IS_ENABLED(CONFIG_RANDOMIZE_IDENTITY_BASE))
- __identity_base = round_down(vmemmap_start - max_mappable, rte_size);
+#ifdef CONFIG_RANDOMIZE_IDENTITY_BASE
+ __identity_base = round_down(vmemmap_start - max_mappable, rte_size);
+#endif
+ boot_debug("identity map: 0x%016lx-0x%016lx\n", __identity_base,
+ __identity_base + ident_map_size);
return asce_limit;
}
@@ -412,6 +505,10 @@ void startup_kernel(void)
psw_t psw;
setup_lpp();
+ store_ipl_parmblock();
+ uv_query_info();
+ setup_boot_command_line();
+ parse_boot_command_line();
/*
* Non-randomized kernel physical start address must be _SEGMENT_SIZE
@@ -431,13 +528,14 @@ void startup_kernel(void)
oldmem_data.start = parmarea.oldmem_base;
oldmem_data.size = parmarea.oldmem_size;
- store_ipl_parmblock();
read_ipl_report();
- uv_query_info();
sclp_early_read_info();
- setup_boot_command_line();
- parse_boot_command_line();
+ sclp_early_detect_machine_features();
detect_facilities();
+ detect_diag9c();
+ detect_machine_type();
+ /* detect_diag288() needs machine type */
+ detect_diag288();
cmma_init();
sanitize_prot_virt_host();
max_physmem_end = detect_max_physmem_end();
@@ -526,6 +624,7 @@ void startup_kernel(void)
__kaslr_offset, __kaslr_offset_phys);
kaslr_adjust_got(__kaslr_offset);
setup_vmem(__kaslr_offset, __kaslr_offset + kernel_size, asce_limit);
+ dump_physmem_reserved();
copy_bootdata();
__apply_alternatives((struct alt_instr *)_vmlinux_info.alt_instructions,
(struct alt_instr *)_vmlinux_info.alt_instructions_end,
@@ -542,5 +641,6 @@ void startup_kernel(void)
*/
psw.addr = __kaslr_offset + vmlinux.entry;
psw.mask = PSW_KERNEL_BITS;
- __load_psw(psw);
+ boot_debug("Starting kernel at: 0x%016lx\n", psw.addr);
+ jump_to_kernel(&psw);
}
diff --git a/arch/s390/boot/string.c b/arch/s390/boot/string.c
index f6b9b1df48a8..bd68161434a6 100644
--- a/arch/s390/boot/string.c
+++ b/arch/s390/boot/string.c
@@ -29,6 +29,18 @@ int strncmp(const char *cs, const char *ct, size_t count)
return 0;
}
+ssize_t sized_strscpy(char *dst, const char *src, size_t count)
+{
+ size_t len;
+
+ if (count == 0)
+ return -E2BIG;
+ len = strnlen(src, count - 1);
+ memcpy(dst, src, len);
+ dst[len] = '\0';
+ return src[len] ? -E2BIG : len;
+}
+
void *memset64(uint64_t *s, uint64_t v, size_t count)
{
uint64_t *xs = s;
diff --git a/arch/s390/boot/trampoline.S b/arch/s390/boot/trampoline.S
new file mode 100644
index 000000000000..1cb5adf005ea
--- /dev/null
+++ b/arch/s390/boot/trampoline.S
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#include <linux/linkage.h>
+
+# This function is identical to __load_psw(), but the lx-symbols GDB command
+# puts a breakpoint on it, so it needs to be kept separate.
+SYM_CODE_START(jump_to_kernel)
+ lpswe 0(%r2)
+SYM_CODE_END(jump_to_kernel)
diff --git a/arch/s390/boot/vmem.c b/arch/s390/boot/vmem.c
index 881a1ece422f..cea3de4dce8c 100644
--- a/arch/s390/boot/vmem.c
+++ b/arch/s390/boot/vmem.c
@@ -1,4 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
+#define boot_fmt(fmt) "vmem: " fmt
+#include <linux/cpufeature.h>
#include <linux/sched/task.h>
#include <linux/pgtable.h>
#include <linux/kasan.h>
@@ -9,10 +11,12 @@
#include <asm/ctlreg.h>
#include <asm/physmem_info.h>
#include <asm/maccess.h>
+#include <asm/machine.h>
#include <asm/abs_lowcore.h>
#include "decompressor.h"
#include "boot.h"
+#define INVALID_PHYS_ADDR (~(phys_addr_t)0)
struct ctlreg __bootdata_preserved(s390_invalid_asce);
#ifdef CONFIG_PROC_FS
@@ -31,12 +35,42 @@ enum populate_mode {
POPULATE_IDENTITY,
POPULATE_KERNEL,
#ifdef CONFIG_KASAN
+ /* KASAN modes should be last and grouped together, see is_kasan_populate_mode() */
POPULATE_KASAN_MAP_SHADOW,
POPULATE_KASAN_ZERO_SHADOW,
POPULATE_KASAN_SHALLOW
#endif
};
+#define POPULATE_MODE_NAME(t) case POPULATE_ ## t: return #t
+static inline const char *get_populate_mode_name(enum populate_mode t)
+{
+ switch (t) {
+ POPULATE_MODE_NAME(NONE);
+ POPULATE_MODE_NAME(DIRECT);
+ POPULATE_MODE_NAME(LOWCORE);
+ POPULATE_MODE_NAME(ABS_LOWCORE);
+ POPULATE_MODE_NAME(IDENTITY);
+ POPULATE_MODE_NAME(KERNEL);
+#ifdef CONFIG_KASAN
+ POPULATE_MODE_NAME(KASAN_MAP_SHADOW);
+ POPULATE_MODE_NAME(KASAN_ZERO_SHADOW);
+ POPULATE_MODE_NAME(KASAN_SHALLOW);
+#endif
+ default:
+ return "UNKNOWN";
+ }
+}
+
+static bool is_kasan_populate_mode(enum populate_mode mode)
+{
+#ifdef CONFIG_KASAN
+ return mode >= POPULATE_KASAN_MAP_SHADOW;
+#else
+ return false;
+#endif
+}
+
static void pgtable_populate(unsigned long addr, unsigned long end, enum populate_mode mode);
#ifdef CONFIG_KASAN
@@ -52,9 +86,12 @@ static pte_t pte_z;
static inline void kasan_populate(unsigned long start, unsigned long end, enum populate_mode mode)
{
- start = PAGE_ALIGN_DOWN(__sha(start));
- end = PAGE_ALIGN(__sha(end));
- pgtable_populate(start, end, mode);
+ unsigned long sha_start = PAGE_ALIGN_DOWN(__sha(start));
+ unsigned long sha_end = PAGE_ALIGN(__sha(end));
+
+ boot_debug("%-17s 0x%016lx-0x%016lx >> 0x%016lx-0x%016lx\n", get_populate_mode_name(mode),
+ start, end, sha_start, sha_end);
+ pgtable_populate(sha_start, sha_end, mode);
}
static void kasan_populate_shadow(unsigned long kernel_start, unsigned long kernel_end)
@@ -200,7 +237,7 @@ static void *boot_crst_alloc(unsigned long val)
unsigned long size = PAGE_SIZE << CRST_ALLOC_ORDER;
unsigned long *table;
- table = (unsigned long *)physmem_alloc_top_down(RR_VMEM, size, size);
+ table = (unsigned long *)physmem_alloc_or_die(RR_VMEM, size, size);
crst_table_init(table, val);
__arch_set_page_dat(table, 1UL << CRST_ALLOC_ORDER);
return table;
@@ -216,7 +253,7 @@ static pte_t *boot_pte_alloc(void)
* during POPULATE_KASAN_MAP_SHADOW when EDAT is off
*/
if (!pte_leftover) {
- pte_leftover = (void *)physmem_alloc_top_down(RR_VMEM, PAGE_SIZE, PAGE_SIZE);
+ pte_leftover = (void *)physmem_alloc_or_die(RR_VMEM, PAGE_SIZE, PAGE_SIZE);
pte = pte_leftover + _PAGE_TABLE_SIZE;
__arch_set_page_dat(pte, 1);
} else {
@@ -228,11 +265,12 @@ static pte_t *boot_pte_alloc(void)
return pte;
}
-static unsigned long _pa(unsigned long addr, unsigned long size, enum populate_mode mode)
+static unsigned long resolve_pa_may_alloc(unsigned long addr, unsigned long size,
+ enum populate_mode mode)
{
switch (mode) {
case POPULATE_NONE:
- return -1;
+ return INVALID_PHYS_ADDR;
case POPULATE_DIRECT:
return addr;
case POPULATE_LOWCORE:
@@ -245,38 +283,64 @@ static unsigned long _pa(unsigned long addr, unsigned long size, enum populate_m
return __identity_pa(addr);
#ifdef CONFIG_KASAN
case POPULATE_KASAN_MAP_SHADOW:
- addr = physmem_alloc_top_down(RR_VMEM, size, size);
- memset((void *)addr, 0, size);
- return addr;
+ /* Allow to fail large page allocations, this will fall back to 1mb/4k pages */
+ addr = physmem_alloc(RR_VMEM, size, size, size == PAGE_SIZE);
+ if (addr) {
+ memset((void *)addr, 0, size);
+ return addr;
+ }
+ return INVALID_PHYS_ADDR;
#endif
default:
- return -1;
+ return INVALID_PHYS_ADDR;
}
}
-static bool large_allowed(enum populate_mode mode)
+static bool large_page_mapping_allowed(enum populate_mode mode)
{
- return (mode == POPULATE_DIRECT) || (mode == POPULATE_IDENTITY) || (mode == POPULATE_KERNEL);
+ switch (mode) {
+ case POPULATE_DIRECT:
+ case POPULATE_IDENTITY:
+ case POPULATE_KERNEL:
+#ifdef CONFIG_KASAN
+ case POPULATE_KASAN_MAP_SHADOW:
+#endif
+ return true;
+ default:
+ return false;
+ }
}
-static bool can_large_pud(pud_t *pu_dir, unsigned long addr, unsigned long end,
- enum populate_mode mode)
+static unsigned long try_get_large_pud_pa(pud_t *pu_dir, unsigned long addr, unsigned long end,
+ enum populate_mode mode)
{
- unsigned long size = end - addr;
+ unsigned long pa, size = end - addr;
+
+ if (!cpu_has_edat2() || !large_page_mapping_allowed(mode) ||
+ !IS_ALIGNED(addr, PUD_SIZE) || (size < PUD_SIZE))
+ return INVALID_PHYS_ADDR;
+
+ pa = resolve_pa_may_alloc(addr, size, mode);
+ if (!IS_ALIGNED(pa, PUD_SIZE))
+ return INVALID_PHYS_ADDR;
- return machine.has_edat2 && large_allowed(mode) &&
- IS_ALIGNED(addr, PUD_SIZE) && (size >= PUD_SIZE) &&
- IS_ALIGNED(_pa(addr, size, mode), PUD_SIZE);
+ return pa;
}
-static bool can_large_pmd(pmd_t *pm_dir, unsigned long addr, unsigned long end,
- enum populate_mode mode)
+static unsigned long try_get_large_pmd_pa(pmd_t *pm_dir, unsigned long addr, unsigned long end,
+ enum populate_mode mode)
{
- unsigned long size = end - addr;
+ unsigned long pa, size = end - addr;
- return machine.has_edat1 && large_allowed(mode) &&
- IS_ALIGNED(addr, PMD_SIZE) && (size >= PMD_SIZE) &&
- IS_ALIGNED(_pa(addr, size, mode), PMD_SIZE);
+ if (!cpu_has_edat1() || !large_page_mapping_allowed(mode) ||
+ !IS_ALIGNED(addr, PMD_SIZE) || (size < PMD_SIZE))
+ return INVALID_PHYS_ADDR;
+
+ pa = resolve_pa_may_alloc(addr, size, mode);
+ if (!IS_ALIGNED(pa, PMD_SIZE))
+ return INVALID_PHYS_ADDR;
+
+ return pa;
}
static void pgtable_pte_populate(pmd_t *pmd, unsigned long addr, unsigned long end,
@@ -290,7 +354,7 @@ static void pgtable_pte_populate(pmd_t *pmd, unsigned long addr, unsigned long e
if (pte_none(*pte)) {
if (kasan_pte_populate_zero_shadow(pte, mode))
continue;
- entry = __pte(_pa(addr, PAGE_SIZE, mode));
+ entry = __pte(resolve_pa_may_alloc(addr, PAGE_SIZE, mode));
entry = set_pte_bit(entry, PAGE_KERNEL);
set_pte(pte, entry);
pages++;
@@ -303,7 +367,7 @@ static void pgtable_pte_populate(pmd_t *pmd, unsigned long addr, unsigned long e
static void pgtable_pmd_populate(pud_t *pud, unsigned long addr, unsigned long end,
enum populate_mode mode)
{
- unsigned long next, pages = 0;
+ unsigned long pa, next, pages = 0;
pmd_t *pmd, entry;
pte_t *pte;
@@ -313,8 +377,9 @@ static void pgtable_pmd_populate(pud_t *pud, unsigned long addr, unsigned long e
if (pmd_none(*pmd)) {
if (kasan_pmd_populate_zero_shadow(pmd, addr, next, mode))
continue;
- if (can_large_pmd(pmd, addr, next, mode)) {
- entry = __pmd(_pa(addr, _SEGMENT_SIZE, mode));
+ pa = try_get_large_pmd_pa(pmd, addr, next, mode);
+ if (pa != INVALID_PHYS_ADDR) {
+ entry = __pmd(pa);
entry = set_pmd_bit(entry, SEGMENT_KERNEL);
set_pmd(pmd, entry);
pages++;
@@ -334,7 +399,7 @@ static void pgtable_pmd_populate(pud_t *pud, unsigned long addr, unsigned long e
static void pgtable_pud_populate(p4d_t *p4d, unsigned long addr, unsigned long end,
enum populate_mode mode)
{
- unsigned long next, pages = 0;
+ unsigned long pa, next, pages = 0;
pud_t *pud, entry;
pmd_t *pmd;
@@ -344,8 +409,9 @@ static void pgtable_pud_populate(p4d_t *p4d, unsigned long addr, unsigned long e
if (pud_none(*pud)) {
if (kasan_pud_populate_zero_shadow(pud, addr, next, mode))
continue;
- if (can_large_pud(pud, addr, next, mode)) {
- entry = __pud(_pa(addr, _REGION3_SIZE, mode));
+ pa = try_get_large_pud_pa(pud, addr, next, mode);
+ if (pa != INVALID_PHYS_ADDR) {
+ entry = __pud(pa);
entry = set_pud_bit(entry, REGION3_KERNEL);
set_pud(pud, entry);
pages++;
@@ -388,6 +454,13 @@ static void pgtable_populate(unsigned long addr, unsigned long end, enum populat
pgd_t *pgd;
p4d_t *p4d;
+ if (!is_kasan_populate_mode(mode)) {
+ boot_debug("%-17s 0x%016lx-0x%016lx -> 0x%016lx-0x%016lx\n",
+ get_populate_mode_name(mode), addr, end,
+ resolve_pa_may_alloc(addr, 0, mode),
+ resolve_pa_may_alloc(end - 1, 0, mode) + 1);
+ }
+
pgd = pgd_offset(&init_mm, addr);
for (; addr < end; addr = next, pgd++) {
next = pgd_addr_end(addr, end);
@@ -445,7 +518,7 @@ void setup_vmem(unsigned long kernel_start, unsigned long kernel_end, unsigned l
__arch_set_page_dat((void *)swapper_pg_dir, 1UL << CRST_ALLOC_ORDER);
__arch_set_page_dat((void *)invalid_pg_dir, 1UL << CRST_ALLOC_ORDER);
- if (relocate_lowcore)
+ if (machine_has_relocated_lowcore())
lowcore_address = LOWCORE_ALT_ADDRESS;
/*
@@ -457,6 +530,9 @@ void setup_vmem(unsigned long kernel_start, unsigned long kernel_end, unsigned l
lowcore_address + sizeof(struct lowcore),
POPULATE_LOWCORE);
for_each_physmem_usable_range(i, &start, &end) {
+ /* Do not map lowcore with identity mapping */
+ if (!start)
+ start = sizeof(struct lowcore);
pgtable_populate((unsigned long)__identity_va(start),
(unsigned long)__identity_va(end),
POPULATE_IDENTITY);
diff --git a/arch/s390/boot/vmlinux.lds.S b/arch/s390/boot/vmlinux.lds.S
index 66670212a361..50988022f9ea 100644
--- a/arch/s390/boot/vmlinux.lds.S
+++ b/arch/s390/boot/vmlinux.lds.S
@@ -40,6 +40,7 @@ SECTIONS
*(.rodata.*)
_erodata = . ;
}
+ EXCEPTION_TABLE(16)
.got : {
*(.got)
}
@@ -165,7 +166,6 @@ SECTIONS
/DISCARD/ : {
COMMON_DISCARDS
*(.eh_frame)
- *(__ex_table)
*(*__ksymtab*)
*(___kcrctab*)
}
diff --git a/arch/s390/configs/debug_defconfig b/arch/s390/configs/debug_defconfig
index d6beec5292a0..5e616bc988ac 100644
--- a/arch/s390/configs/debug_defconfig
+++ b/arch/s390/configs/debug_defconfig
@@ -5,6 +5,7 @@ CONFIG_WATCH_QUEUE=y
CONFIG_AUDIT=y
CONFIG_NO_HZ_IDLE=y
CONFIG_HIGH_RES_TIMERS=y
+CONFIG_POSIX_AUX_CLOCKS=y
CONFIG_BPF_SYSCALL=y
CONFIG_BPF_JIT=y
CONFIG_BPF_JIT_ALWAYS_ON=y
@@ -19,6 +20,7 @@ CONFIG_TASK_XACCT=y
CONFIG_TASK_IO_ACCOUNTING=y
CONFIG_IKCONFIG=y
CONFIG_IKCONFIG_PROC=y
+CONFIG_SCHED_PROXY_EXEC=y
CONFIG_NUMA_BALANCING=y
CONFIG_MEMCG=y
CONFIG_BLK_CGROUP=y
@@ -38,11 +40,11 @@ CONFIG_USER_NS=y
CONFIG_CHECKPOINT_RESTORE=y
CONFIG_SCHED_AUTOGROUP=y
CONFIG_EXPERT=y
-# CONFIG_SYSFS_SYSCALL is not set
CONFIG_PROFILING=y
CONFIG_KEXEC=y
CONFIG_KEXEC_FILE=y
CONFIG_KEXEC_SIG=y
+CONFIG_CRASH_DM_CRYPT=y
CONFIG_LIVEPATCH=y
CONFIG_MARCH_Z13=y
CONFIG_NR_CPUS=512
@@ -92,7 +94,6 @@ CONFIG_UNIXWARE_DISKLABEL=y
CONFIG_IOSCHED_BFQ=y
CONFIG_BINFMT_MISC=m
CONFIG_ZSWAP=y
-CONFIG_ZSWAP_ZPOOL_DEFAULT_ZBUD=y
CONFIG_ZSMALLOC_STAT=y
CONFIG_SLAB_BUCKETS=y
CONFIG_SLUB_STATS=y
@@ -107,6 +108,7 @@ CONFIG_CMA_AREAS=7
CONFIG_MEM_SOFT_DIRTY=y
CONFIG_DEFERRED_STRUCT_PAGE_INIT=y
CONFIG_IDLE_PAGE_TRACKING=y
+CONFIG_ZONE_DEVICE=y
CONFIG_PERCPU_STATS=y
CONFIG_GUP_TEST=y
CONFIG_ANON_VMA_NAME=y
@@ -225,17 +227,19 @@ CONFIG_NETFILTER_XT_TARGET_CONNMARK=m
CONFIG_NETFILTER_XT_TARGET_CONNSECMARK=m
CONFIG_NETFILTER_XT_TARGET_CT=m
CONFIG_NETFILTER_XT_TARGET_DSCP=m
+CONFIG_NETFILTER_XT_TARGET_HL=m
CONFIG_NETFILTER_XT_TARGET_HMARK=m
CONFIG_NETFILTER_XT_TARGET_IDLETIMER=m
CONFIG_NETFILTER_XT_TARGET_LOG=m
CONFIG_NETFILTER_XT_TARGET_MARK=m
+CONFIG_NETFILTER_XT_NAT=m
CONFIG_NETFILTER_XT_TARGET_NETMAP=m
CONFIG_NETFILTER_XT_TARGET_NFLOG=m
CONFIG_NETFILTER_XT_TARGET_NFQUEUE=m
CONFIG_NETFILTER_XT_TARGET_REDIRECT=m
+CONFIG_NETFILTER_XT_TARGET_MASQUERADE=m
CONFIG_NETFILTER_XT_TARGET_TEE=m
CONFIG_NETFILTER_XT_TARGET_TPROXY=m
-CONFIG_NETFILTER_XT_TARGET_TRACE=m
CONFIG_NETFILTER_XT_TARGET_SECMARK=m
CONFIG_NETFILTER_XT_TARGET_TCPMSS=m
CONFIG_NETFILTER_XT_TARGET_TCPOPTSTRIP=m
@@ -321,16 +325,8 @@ CONFIG_IP_NF_MATCH_AH=m
CONFIG_IP_NF_MATCH_ECN=m
CONFIG_IP_NF_MATCH_RPFILTER=m
CONFIG_IP_NF_MATCH_TTL=m
-CONFIG_IP_NF_FILTER=m
CONFIG_IP_NF_TARGET_REJECT=m
-CONFIG_IP_NF_NAT=m
-CONFIG_IP_NF_TARGET_MASQUERADE=m
-CONFIG_IP_NF_MANGLE=m
CONFIG_IP_NF_TARGET_ECN=m
-CONFIG_IP_NF_TARGET_TTL=m
-CONFIG_IP_NF_RAW=m
-CONFIG_IP_NF_SECURITY=m
-CONFIG_IP_NF_ARPFILTER=m
CONFIG_IP_NF_ARP_MANGLE=m
CONFIG_NFT_FIB_IPV6=m
CONFIG_IP6_NF_IPTABLES=m
@@ -343,15 +339,9 @@ CONFIG_IP6_NF_MATCH_IPV6HEADER=m
CONFIG_IP6_NF_MATCH_MH=m
CONFIG_IP6_NF_MATCH_RPFILTER=m
CONFIG_IP6_NF_MATCH_RT=m
-CONFIG_IP6_NF_TARGET_HL=m
-CONFIG_IP6_NF_FILTER=m
CONFIG_IP6_NF_TARGET_REJECT=m
-CONFIG_IP6_NF_MANGLE=m
-CONFIG_IP6_NF_RAW=m
-CONFIG_IP6_NF_SECURITY=m
-CONFIG_IP6_NF_NAT=m
-CONFIG_IP6_NF_TARGET_MASQUERADE=m
CONFIG_NF_TABLES_BRIDGE=m
+CONFIG_IP_SCTP=m
CONFIG_RDS=m
CONFIG_RDS_RDMA=m
CONFIG_RDS_TCP=m
@@ -386,6 +376,7 @@ CONFIG_NET_SCH_FQ_CODEL=m
CONFIG_NET_SCH_INGRESS=m
CONFIG_NET_SCH_PLUG=m
CONFIG_NET_SCH_ETS=m
+CONFIG_NET_SCH_DUALPI2=m
CONFIG_NET_CLS_BASIC=m
CONFIG_NET_CLS_ROUTE4=m
CONFIG_NET_CLS_FW=m
@@ -395,6 +386,9 @@ CONFIG_CLS_U32_MARK=y
CONFIG_NET_CLS_FLOW=m
CONFIG_NET_CLS_CGROUP=y
CONFIG_NET_CLS_BPF=m
+CONFIG_NET_CLS_FLOWER=m
+CONFIG_NET_CLS_MATCHALL=m
+CONFIG_NET_EMATCH=y
CONFIG_NET_CLS_ACT=y
CONFIG_NET_ACT_POLICE=m
CONFIG_NET_ACT_GACT=m
@@ -405,6 +399,9 @@ CONFIG_NET_ACT_PEDIT=m
CONFIG_NET_ACT_SIMP=m
CONFIG_NET_ACT_SKBEDIT=m
CONFIG_NET_ACT_CSUM=m
+CONFIG_NET_ACT_VLAN=m
+CONFIG_NET_ACT_TUNNEL_KEY=m
+CONFIG_NET_ACT_CT=m
CONFIG_NET_ACT_GATE=m
CONFIG_NET_TC_SKB_EXT=y
CONFIG_DNS_RESOLVER=y
@@ -469,6 +466,7 @@ CONFIG_SCSI_DH_ALUA=m
CONFIG_MD=y
CONFIG_BLK_DEV_MD=y
# CONFIG_MD_BITMAP_FILE is not set
+CONFIG_MD_LINEAR=m
CONFIG_MD_CLUSTER=m
CONFIG_BCACHE=m
CONFIG_BLK_DEV_DM=y
@@ -500,6 +498,7 @@ CONFIG_DM_VDO=m
CONFIG_NETDEVICES=y
CONFIG_BONDING=m
CONFIG_DUMMY=m
+CONFIG_OVPN=m
CONFIG_EQUALIZER=m
CONFIG_IFB=m
CONFIG_MACVLAN=m
@@ -627,8 +626,17 @@ CONFIG_VIRTIO_PCI=m
CONFIG_VIRTIO_BALLOON=m
CONFIG_VIRTIO_MEM=m
CONFIG_VIRTIO_INPUT=y
+CONFIG_VDPA=m
+CONFIG_VDPA_SIM=m
+CONFIG_VDPA_SIM_NET=m
+CONFIG_VDPA_SIM_BLOCK=m
+CONFIG_VDPA_USER=m
+CONFIG_MLX5_VDPA_NET=m
+CONFIG_VP_VDPA=m
CONFIG_VHOST_NET=m
CONFIG_VHOST_VSOCK=m
+CONFIG_VHOST_VDPA=m
+CONFIG_DEV_DAX=m
CONFIG_EXT4_FS=y
CONFIG_EXT4_FS_POSIX_ACL=y
CONFIG_EXT4_FS_SECURITY=y
@@ -723,11 +731,10 @@ CONFIG_NLS_UTF8=m
CONFIG_DLM=m
CONFIG_UNICODE=y
CONFIG_PERSISTENT_KEYRINGS=y
+CONFIG_BIG_KEYS=y
CONFIG_ENCRYPTED_KEYS=m
CONFIG_KEY_NOTIFICATIONS=y
CONFIG_SECURITY=y
-CONFIG_HARDENED_USERCOPY=y
-CONFIG_FORTIFY_SOURCE=y
CONFIG_SECURITY_SELINUX=y
CONFIG_SECURITY_SELINUX_BOOTPARAM=y
CONFIG_SECURITY_LOCKDOWN_LSM=y
@@ -740,13 +747,16 @@ CONFIG_IMA=y
CONFIG_IMA_DEFAULT_HASH_SHA256=y
CONFIG_IMA_WRITE_POLICY=y
CONFIG_IMA_APPRAISE=y
-CONFIG_LSM="yama,loadpin,safesetid,integrity,selinux,smack,tomoyo,apparmor"
+CONFIG_FORTIFY_SOURCE=y
+CONFIG_HARDENED_USERCOPY=y
CONFIG_BUG_ON_DATA_CORRUPTION=y
CONFIG_CRYPTO_USER=m
-# CONFIG_CRYPTO_MANAGER_DISABLE_TESTS is not set
+CONFIG_CRYPTO_SELFTESTS=y
+CONFIG_CRYPTO_SELFTESTS_FULL=y
+CONFIG_CRYPTO_NULL=y
CONFIG_CRYPTO_PCRYPT=m
CONFIG_CRYPTO_CRYPTD=m
-CONFIG_CRYPTO_TEST=m
+CONFIG_CRYPTO_BENCHMARK=m
CONFIG_CRYPTO_DH=m
CONFIG_CRYPTO_ECDH=m
CONFIG_CRYPTO_ECDSA=m
@@ -756,7 +766,6 @@ CONFIG_CRYPTO_AES_TI=m
CONFIG_CRYPTO_ANUBIS=m
CONFIG_CRYPTO_ARIA=m
CONFIG_CRYPTO_BLOWFISH=m
-CONFIG_CRYPTO_CAMELLIA=m
CONFIG_CRYPTO_CAST5=m
CONFIG_CRYPTO_CAST6=m
CONFIG_CRYPTO_DES=m
@@ -773,7 +782,6 @@ CONFIG_CRYPTO_HCTR2=m
CONFIG_CRYPTO_LRW=m
CONFIG_CRYPTO_PCBC=m
CONFIG_CRYPTO_AEGIS128=m
-CONFIG_CRYPTO_CHACHA20POLY1305=m
CONFIG_CRYPTO_GCM=y
CONFIG_CRYPTO_SEQIV=y
CONFIG_CRYPTO_MD4=m
@@ -793,15 +801,11 @@ CONFIG_CRYPTO_USER_API_HASH=m
CONFIG_CRYPTO_USER_API_SKCIPHER=m
CONFIG_CRYPTO_USER_API_RNG=m
CONFIG_CRYPTO_USER_API_AEAD=m
-CONFIG_CRYPTO_SHA512_S390=m
-CONFIG_CRYPTO_SHA1_S390=m
-CONFIG_CRYPTO_SHA256_S390=m
CONFIG_CRYPTO_SHA3_256_S390=m
CONFIG_CRYPTO_SHA3_512_S390=m
CONFIG_CRYPTO_GHASH_S390=m
CONFIG_CRYPTO_AES_S390=m
CONFIG_CRYPTO_DES_S390=m
-CONFIG_CRYPTO_CHACHA_S390=m
CONFIG_CRYPTO_HMAC_S390=m
CONFIG_ZCRYPT=m
CONFIG_PKEY=m
@@ -810,14 +814,13 @@ CONFIG_PKEY_EP11=m
CONFIG_PKEY_PCKMO=m
CONFIG_PKEY_UV=m
CONFIG_CRYPTO_PAES_S390=m
+CONFIG_CRYPTO_PHMAC_S390=m
CONFIG_CRYPTO_DEV_VIRTIO=m
CONFIG_SYSTEM_BLACKLIST_KEYRING=y
+CONFIG_CRYPTO_KRB5=m
+CONFIG_CRYPTO_KRB5_SELFTESTS=y
CONFIG_CORDIC=m
-CONFIG_CRYPTO_LIB_CURVE25519=m
-CONFIG_CRYPTO_LIB_CHACHA20POLY1305=m
-CONFIG_CRC4=m
-CONFIG_CRC7=m
-CONFIG_CRC8=m
+CONFIG_TRACE_MMIO_ACCESS=y
CONFIG_RANDOM32_SELFTEST=y
CONFIG_XZ_DEC_MICROLZMA=y
CONFIG_DMA_CMA=y
@@ -875,6 +878,7 @@ CONFIG_RCU_CPU_STALL_TIMEOUT=300
CONFIG_LATENCYTOP=y
CONFIG_BOOTTIME_TRACING=y
CONFIG_FUNCTION_GRAPH_RETVAL=y
+CONFIG_FUNCTION_GRAPH_RETADDR=y
CONFIG_FPROBE=y
CONFIG_FUNCTION_PROFILER=y
CONFIG_STACK_TRACER=y
@@ -887,12 +891,14 @@ CONFIG_USER_EVENTS=y
CONFIG_HIST_TRIGGERS=y
CONFIG_FTRACE_STARTUP_TEST=y
# CONFIG_EVENT_TRACE_STARTUP_TEST is not set
+CONFIG_FTRACE_SORT_STARTUP_TEST=y
CONFIG_SAMPLES=y
CONFIG_SAMPLE_TRACE_PRINTK=m
CONFIG_SAMPLE_FTRACE_DIRECT=m
CONFIG_SAMPLE_FTRACE_DIRECT_MULTI=m
CONFIG_SAMPLE_FTRACE_OPS=m
CONFIG_DEBUG_ENTRY=y
+CONFIG_STRICT_MM_TYPECHECKS=y
CONFIG_CIO_INJECT=y
CONFIG_KUNIT=m
CONFIG_KUNIT_DEBUGFS=y
diff --git a/arch/s390/configs/defconfig b/arch/s390/configs/defconfig
index 8cfbfb10bba8..094599cdaf4d 100644
--- a/arch/s390/configs/defconfig
+++ b/arch/s390/configs/defconfig
@@ -4,6 +4,7 @@ CONFIG_WATCH_QUEUE=y
CONFIG_AUDIT=y
CONFIG_NO_HZ_IDLE=y
CONFIG_HIGH_RES_TIMERS=y
+CONFIG_POSIX_AUX_CLOCKS=y
CONFIG_BPF_SYSCALL=y
CONFIG_BPF_JIT=y
CONFIG_BPF_JIT_ALWAYS_ON=y
@@ -17,6 +18,7 @@ CONFIG_TASK_XACCT=y
CONFIG_TASK_IO_ACCOUNTING=y
CONFIG_IKCONFIG=y
CONFIG_IKCONFIG_PROC=y
+CONFIG_SCHED_PROXY_EXEC=y
CONFIG_NUMA_BALANCING=y
CONFIG_MEMCG=y
CONFIG_BLK_CGROUP=y
@@ -36,16 +38,16 @@ CONFIG_USER_NS=y
CONFIG_CHECKPOINT_RESTORE=y
CONFIG_SCHED_AUTOGROUP=y
CONFIG_EXPERT=y
-# CONFIG_SYSFS_SYSCALL is not set
CONFIG_PROFILING=y
CONFIG_KEXEC=y
CONFIG_KEXEC_FILE=y
CONFIG_KEXEC_SIG=y
+CONFIG_CRASH_DM_CRYPT=y
CONFIG_LIVEPATCH=y
CONFIG_MARCH_Z13=y
CONFIG_NR_CPUS=512
CONFIG_NUMA=y
-CONFIG_HZ_100=y
+CONFIG_HZ_1000=y
CONFIG_CERT_STORE=y
CONFIG_EXPOLINE=y
CONFIG_EXPOLINE_AUTO=y
@@ -86,7 +88,6 @@ CONFIG_UNIXWARE_DISKLABEL=y
CONFIG_IOSCHED_BFQ=y
CONFIG_BINFMT_MISC=m
CONFIG_ZSWAP=y
-CONFIG_ZSWAP_ZPOOL_DEFAULT_ZBUD=y
CONFIG_ZSMALLOC_STAT=y
CONFIG_SLAB_BUCKETS=y
# CONFIG_COMPAT_BRK is not set
@@ -99,6 +100,7 @@ CONFIG_CMA_AREAS=7
CONFIG_MEM_SOFT_DIRTY=y
CONFIG_DEFERRED_STRUCT_PAGE_INIT=y
CONFIG_IDLE_PAGE_TRACKING=y
+CONFIG_ZONE_DEVICE=y
CONFIG_PERCPU_STATS=y
CONFIG_ANON_VMA_NAME=y
CONFIG_USERFAULTFD=y
@@ -216,17 +218,19 @@ CONFIG_NETFILTER_XT_TARGET_CONNMARK=m
CONFIG_NETFILTER_XT_TARGET_CONNSECMARK=m
CONFIG_NETFILTER_XT_TARGET_CT=m
CONFIG_NETFILTER_XT_TARGET_DSCP=m
+CONFIG_NETFILTER_XT_TARGET_HL=m
CONFIG_NETFILTER_XT_TARGET_HMARK=m
CONFIG_NETFILTER_XT_TARGET_IDLETIMER=m
CONFIG_NETFILTER_XT_TARGET_LOG=m
CONFIG_NETFILTER_XT_TARGET_MARK=m
+CONFIG_NETFILTER_XT_NAT=m
CONFIG_NETFILTER_XT_TARGET_NETMAP=m
CONFIG_NETFILTER_XT_TARGET_NFLOG=m
CONFIG_NETFILTER_XT_TARGET_NFQUEUE=m
CONFIG_NETFILTER_XT_TARGET_REDIRECT=m
+CONFIG_NETFILTER_XT_TARGET_MASQUERADE=m
CONFIG_NETFILTER_XT_TARGET_TEE=m
CONFIG_NETFILTER_XT_TARGET_TPROXY=m
-CONFIG_NETFILTER_XT_TARGET_TRACE=m
CONFIG_NETFILTER_XT_TARGET_SECMARK=m
CONFIG_NETFILTER_XT_TARGET_TCPMSS=m
CONFIG_NETFILTER_XT_TARGET_TCPOPTSTRIP=m
@@ -312,16 +316,8 @@ CONFIG_IP_NF_MATCH_AH=m
CONFIG_IP_NF_MATCH_ECN=m
CONFIG_IP_NF_MATCH_RPFILTER=m
CONFIG_IP_NF_MATCH_TTL=m
-CONFIG_IP_NF_FILTER=m
CONFIG_IP_NF_TARGET_REJECT=m
-CONFIG_IP_NF_NAT=m
-CONFIG_IP_NF_TARGET_MASQUERADE=m
-CONFIG_IP_NF_MANGLE=m
CONFIG_IP_NF_TARGET_ECN=m
-CONFIG_IP_NF_TARGET_TTL=m
-CONFIG_IP_NF_RAW=m
-CONFIG_IP_NF_SECURITY=m
-CONFIG_IP_NF_ARPFILTER=m
CONFIG_IP_NF_ARP_MANGLE=m
CONFIG_NFT_FIB_IPV6=m
CONFIG_IP6_NF_IPTABLES=m
@@ -334,15 +330,9 @@ CONFIG_IP6_NF_MATCH_IPV6HEADER=m
CONFIG_IP6_NF_MATCH_MH=m
CONFIG_IP6_NF_MATCH_RPFILTER=m
CONFIG_IP6_NF_MATCH_RT=m
-CONFIG_IP6_NF_TARGET_HL=m
-CONFIG_IP6_NF_FILTER=m
CONFIG_IP6_NF_TARGET_REJECT=m
-CONFIG_IP6_NF_MANGLE=m
-CONFIG_IP6_NF_RAW=m
-CONFIG_IP6_NF_SECURITY=m
-CONFIG_IP6_NF_NAT=m
-CONFIG_IP6_NF_TARGET_MASQUERADE=m
CONFIG_NF_TABLES_BRIDGE=m
+CONFIG_IP_SCTP=m
CONFIG_RDS=m
CONFIG_RDS_RDMA=m
CONFIG_RDS_TCP=m
@@ -376,6 +366,7 @@ CONFIG_NET_SCH_FQ_CODEL=m
CONFIG_NET_SCH_INGRESS=m
CONFIG_NET_SCH_PLUG=m
CONFIG_NET_SCH_ETS=m
+CONFIG_NET_SCH_DUALPI2=m
CONFIG_NET_CLS_BASIC=m
CONFIG_NET_CLS_ROUTE4=m
CONFIG_NET_CLS_FW=m
@@ -385,6 +376,9 @@ CONFIG_CLS_U32_MARK=y
CONFIG_NET_CLS_FLOW=m
CONFIG_NET_CLS_CGROUP=y
CONFIG_NET_CLS_BPF=m
+CONFIG_NET_CLS_FLOWER=m
+CONFIG_NET_CLS_MATCHALL=m
+CONFIG_NET_EMATCH=y
CONFIG_NET_CLS_ACT=y
CONFIG_NET_ACT_POLICE=m
CONFIG_NET_ACT_GACT=m
@@ -395,6 +389,9 @@ CONFIG_NET_ACT_PEDIT=m
CONFIG_NET_ACT_SIMP=m
CONFIG_NET_ACT_SKBEDIT=m
CONFIG_NET_ACT_CSUM=m
+CONFIG_NET_ACT_VLAN=m
+CONFIG_NET_ACT_TUNNEL_KEY=m
+CONFIG_NET_ACT_CT=m
CONFIG_NET_ACT_GATE=m
CONFIG_NET_TC_SKB_EXT=y
CONFIG_DNS_RESOLVER=y
@@ -459,6 +456,7 @@ CONFIG_SCSI_DH_ALUA=m
CONFIG_MD=y
CONFIG_BLK_DEV_MD=y
# CONFIG_MD_BITMAP_FILE is not set
+CONFIG_MD_LINEAR=m
CONFIG_MD_CLUSTER=m
CONFIG_BCACHE=m
CONFIG_BLK_DEV_DM=y
@@ -490,6 +488,7 @@ CONFIG_DM_VDO=m
CONFIG_NETDEVICES=y
CONFIG_BONDING=m
CONFIG_DUMMY=m
+CONFIG_OVPN=m
CONFIG_EQUALIZER=m
CONFIG_IFB=m
CONFIG_MACVLAN=m
@@ -617,8 +616,17 @@ CONFIG_VIRTIO_PCI=m
CONFIG_VIRTIO_BALLOON=m
CONFIG_VIRTIO_MEM=m
CONFIG_VIRTIO_INPUT=y
+CONFIG_VDPA=m
+CONFIG_VDPA_SIM=m
+CONFIG_VDPA_SIM_NET=m
+CONFIG_VDPA_SIM_BLOCK=m
+CONFIG_VDPA_USER=m
+CONFIG_MLX5_VDPA_NET=m
+CONFIG_VP_VDPA=m
CONFIG_VHOST_NET=m
CONFIG_VHOST_VSOCK=m
+CONFIG_VHOST_VDPA=m
+CONFIG_DEV_DAX=m
CONFIG_EXT4_FS=y
CONFIG_EXT4_FS_POSIX_ACL=y
CONFIG_EXT4_FS_SECURITY=y
@@ -672,7 +680,6 @@ CONFIG_TMPFS_POSIX_ACL=y
CONFIG_TMPFS_INODE64=y
CONFIG_TMPFS_QUOTA=y
CONFIG_HUGETLBFS=y
-CONFIG_CONFIGFS_FS=m
CONFIG_ECRYPT_FS=m
CONFIG_CRAMFS=m
CONFIG_SQUASHFS=m
@@ -710,6 +717,7 @@ CONFIG_NLS_UTF8=m
CONFIG_DLM=m
CONFIG_UNICODE=y
CONFIG_PERSISTENT_KEYRINGS=y
+CONFIG_BIG_KEYS=y
CONFIG_ENCRYPTED_KEYS=m
CONFIG_KEY_NOTIFICATIONS=y
CONFIG_SECURITY=y
@@ -725,14 +733,14 @@ CONFIG_IMA=y
CONFIG_IMA_DEFAULT_HASH_SHA256=y
CONFIG_IMA_WRITE_POLICY=y
CONFIG_IMA_APPRAISE=y
-CONFIG_LSM="yama,loadpin,safesetid,integrity,selinux,smack,tomoyo,apparmor"
CONFIG_BUG_ON_DATA_CORRUPTION=y
CONFIG_CRYPTO_FIPS=y
CONFIG_CRYPTO_USER=m
-# CONFIG_CRYPTO_MANAGER_DISABLE_TESTS is not set
+CONFIG_CRYPTO_SELFTESTS=y
+CONFIG_CRYPTO_NULL=y
CONFIG_CRYPTO_PCRYPT=m
CONFIG_CRYPTO_CRYPTD=m
-CONFIG_CRYPTO_TEST=m
+CONFIG_CRYPTO_BENCHMARK=m
CONFIG_CRYPTO_DH=m
CONFIG_CRYPTO_ECDH=m
CONFIG_CRYPTO_ECDSA=m
@@ -742,7 +750,6 @@ CONFIG_CRYPTO_AES_TI=m
CONFIG_CRYPTO_ANUBIS=m
CONFIG_CRYPTO_ARIA=m
CONFIG_CRYPTO_BLOWFISH=m
-CONFIG_CRYPTO_CAMELLIA=m
CONFIG_CRYPTO_CAST5=m
CONFIG_CRYPTO_CAST6=m
CONFIG_CRYPTO_DES=m
@@ -759,7 +766,6 @@ CONFIG_CRYPTO_HCTR2=m
CONFIG_CRYPTO_LRW=m
CONFIG_CRYPTO_PCBC=m
CONFIG_CRYPTO_AEGIS128=m
-CONFIG_CRYPTO_CHACHA20POLY1305=m
CONFIG_CRYPTO_GCM=y
CONFIG_CRYPTO_SEQIV=y
CONFIG_CRYPTO_MD4=m
@@ -780,15 +786,11 @@ CONFIG_CRYPTO_USER_API_HASH=m
CONFIG_CRYPTO_USER_API_SKCIPHER=m
CONFIG_CRYPTO_USER_API_RNG=m
CONFIG_CRYPTO_USER_API_AEAD=m
-CONFIG_CRYPTO_SHA512_S390=m
-CONFIG_CRYPTO_SHA1_S390=m
-CONFIG_CRYPTO_SHA256_S390=m
CONFIG_CRYPTO_SHA3_256_S390=m
CONFIG_CRYPTO_SHA3_512_S390=m
CONFIG_CRYPTO_GHASH_S390=m
CONFIG_CRYPTO_AES_S390=m
CONFIG_CRYPTO_DES_S390=m
-CONFIG_CRYPTO_CHACHA_S390=m
CONFIG_CRYPTO_HMAC_S390=m
CONFIG_ZCRYPT=m
CONFIG_PKEY=m
@@ -797,15 +799,13 @@ CONFIG_PKEY_EP11=m
CONFIG_PKEY_PCKMO=m
CONFIG_PKEY_UV=m
CONFIG_CRYPTO_PAES_S390=m
+CONFIG_CRYPTO_PHMAC_S390=m
CONFIG_CRYPTO_DEV_VIRTIO=m
CONFIG_SYSTEM_BLACKLIST_KEYRING=y
+CONFIG_CRYPTO_KRB5=m
+CONFIG_CRYPTO_KRB5_SELFTESTS=y
CONFIG_CORDIC=m
CONFIG_PRIME_NUMBERS=m
-CONFIG_CRYPTO_LIB_CURVE25519=m
-CONFIG_CRYPTO_LIB_CHACHA20POLY1305=m
-CONFIG_CRC4=m
-CONFIG_CRC7=m
-CONFIG_CRC8=m
CONFIG_XZ_DEC_MICROLZMA=y
CONFIG_DMA_CMA=y
CONFIG_CMA_SIZE_MBYTES=0
@@ -826,6 +826,7 @@ CONFIG_RCU_CPU_STALL_TIMEOUT=60
CONFIG_LATENCYTOP=y
CONFIG_BOOTTIME_TRACING=y
CONFIG_FUNCTION_GRAPH_RETVAL=y
+CONFIG_FUNCTION_GRAPH_RETADDR=y
CONFIG_FPROBE=y
CONFIG_FUNCTION_PROFILER=y
CONFIG_STACK_TRACER=y
diff --git a/arch/s390/configs/mmtypes.config b/arch/s390/configs/mmtypes.config
new file mode 100644
index 000000000000..fe32b442d789
--- /dev/null
+++ b/arch/s390/configs/mmtypes.config
@@ -0,0 +1,2 @@
+# Help: Enable strict memory management typechecks
+CONFIG_STRICT_MM_TYPECHECKS=y
diff --git a/arch/s390/configs/zfcpdump_defconfig b/arch/s390/configs/zfcpdump_defconfig
index bcbaa069de96..ed0b137353ad 100644
--- a/arch/s390/configs/zfcpdump_defconfig
+++ b/arch/s390/configs/zfcpdump_defconfig
@@ -1,5 +1,6 @@
CONFIG_NO_HZ_IDLE=y
CONFIG_HIGH_RES_TIMERS=y
+CONFIG_POSIX_AUX_CLOCKS=y
CONFIG_BPF_SYSCALL=y
# CONFIG_CPU_ISOLATION is not set
# CONFIG_UTS_NS is not set
@@ -11,7 +12,7 @@ CONFIG_CC_OPTIMIZE_FOR_SIZE=y
CONFIG_KEXEC=y
CONFIG_MARCH_Z13=y
CONFIG_NR_CPUS=2
-CONFIG_HZ_100=y
+CONFIG_HZ_1000=y
# CONFIG_CHSC_SCH is not set
# CONFIG_SCM_BUS is not set
# CONFIG_AP is not set
@@ -62,7 +63,6 @@ CONFIG_ZFCP=y
# CONFIG_INOTIFY_USER is not set
# CONFIG_MISC_FILESYSTEMS is not set
# CONFIG_NETWORK_FILESYSTEMS is not set
-CONFIG_LSM="yama,loadpin,safesetid,integrity"
# CONFIG_ZLIB_DFLTCC is not set
CONFIG_XZ_DEC_MICROLZMA=y
CONFIG_PRINTK_TIME=y
@@ -71,7 +71,6 @@ CONFIG_DEBUG_KERNEL=y
CONFIG_DEBUG_INFO_DWARF4=y
CONFIG_DEBUG_FS=y
CONFIG_PANIC_ON_OOPS=y
-# CONFIG_SCHED_DEBUG is not set
CONFIG_RCU_CPU_STALL_TIMEOUT=60
# CONFIG_RCU_TRACE is not set
# CONFIG_FTRACE is not set
diff --git a/arch/s390/crypto/Kconfig b/arch/s390/crypto/Kconfig
index b760232537f1..03f73fbd38b6 100644
--- a/arch/s390/crypto/Kconfig
+++ b/arch/s390/crypto/Kconfig
@@ -2,42 +2,8 @@
menu "Accelerated Cryptographic Algorithms for CPU (s390)"
-config CRYPTO_SHA512_S390
- tristate "Hash functions: SHA-384 and SHA-512"
- depends on S390
- select CRYPTO_HASH
- help
- SHA-384 and SHA-512 secure hash algorithms (FIPS 180)
-
- Architecture: s390
-
- It is available as of z10.
-
-config CRYPTO_SHA1_S390
- tristate "Hash functions: SHA-1"
- depends on S390
- select CRYPTO_HASH
- help
- SHA-1 secure hash algorithm (FIPS 180)
-
- Architecture: s390
-
- It is available as of z990.
-
-config CRYPTO_SHA256_S390
- tristate "Hash functions: SHA-224 and SHA-256"
- depends on S390
- select CRYPTO_HASH
- help
- SHA-224 and SHA-256 secure hash algorithms (FIPS 180)
-
- Architecture: s390
-
- It is available as of z9.
-
config CRYPTO_SHA3_256_S390
tristate "Hash functions: SHA3-224 and SHA3-256"
- depends on S390
select CRYPTO_HASH
help
SHA3-224 and SHA3-256 secure hash algorithms (FIPS 202)
@@ -48,7 +14,6 @@ config CRYPTO_SHA3_256_S390
config CRYPTO_SHA3_512_S390
tristate "Hash functions: SHA3-384 and SHA3-512"
- depends on S390
select CRYPTO_HASH
help
SHA3-384 and SHA3-512 secure hash algorithms (FIPS 202)
@@ -59,7 +24,6 @@ config CRYPTO_SHA3_512_S390
config CRYPTO_GHASH_S390
tristate "Hash functions: GHASH"
- depends on S390
select CRYPTO_HASH
help
GCM GHASH hash function (NIST SP800-38D)
@@ -70,7 +34,6 @@ config CRYPTO_GHASH_S390
config CRYPTO_AES_S390
tristate "Ciphers: AES, modes: ECB, CBC, CTR, XTS, GCM"
- depends on S390
select CRYPTO_ALGAPI
select CRYPTO_SKCIPHER
help
@@ -92,7 +55,6 @@ config CRYPTO_AES_S390
config CRYPTO_DES_S390
tristate "Ciphers: DES and Triple DES EDE, modes: ECB, CBC, CTR"
- depends on S390
select CRYPTO_ALGAPI
select CRYPTO_SKCIPHER
select CRYPTO_LIB_DES
@@ -107,22 +69,8 @@ config CRYPTO_DES_S390
As of z990 the ECB and CBC mode are hardware accelerated.
As of z196 the CTR mode is hardware accelerated.
-config CRYPTO_CHACHA_S390
- tristate "Ciphers: ChaCha20"
- depends on S390
- select CRYPTO_SKCIPHER
- select CRYPTO_LIB_CHACHA_GENERIC
- select CRYPTO_ARCH_HAVE_LIB_CHACHA
- help
- Length-preserving cipher: ChaCha20 stream cipher (RFC 7539)
-
- Architecture: s390
-
- It is available as of z13.
-
config CRYPTO_HMAC_S390
tristate "Keyed-hash message authentication code: HMAC"
- depends on S390
select CRYPTO_HASH
help
s390 specific HMAC hardware support for SHA224, SHA256, SHA384 and
diff --git a/arch/s390/crypto/Makefile b/arch/s390/crypto/Makefile
index 14dafadbcbed..998f4b656b18 100644
--- a/arch/s390/crypto/Makefile
+++ b/arch/s390/crypto/Makefile
@@ -3,18 +3,13 @@
# Cryptographic API
#
-obj-$(CONFIG_CRYPTO_SHA1_S390) += sha1_s390.o sha_common.o
-obj-$(CONFIG_CRYPTO_SHA256_S390) += sha256_s390.o sha_common.o
-obj-$(CONFIG_CRYPTO_SHA512_S390) += sha512_s390.o sha_common.o
obj-$(CONFIG_CRYPTO_SHA3_256_S390) += sha3_256_s390.o sha_common.o
obj-$(CONFIG_CRYPTO_SHA3_512_S390) += sha3_512_s390.o sha_common.o
obj-$(CONFIG_CRYPTO_DES_S390) += des_s390.o
obj-$(CONFIG_CRYPTO_AES_S390) += aes_s390.o
obj-$(CONFIG_CRYPTO_PAES_S390) += paes_s390.o
-obj-$(CONFIG_CRYPTO_CHACHA_S390) += chacha_s390.o
obj-$(CONFIG_S390_PRNG) += prng.o
obj-$(CONFIG_CRYPTO_GHASH_S390) += ghash_s390.o
obj-$(CONFIG_CRYPTO_HMAC_S390) += hmac_s390.o
+obj-$(CONFIG_CRYPTO_PHMAC_S390) += phmac_s390.o
obj-y += arch_random.o
-
-chacha_s390-y := chacha-glue.o chacha-s390.o
diff --git a/arch/s390/crypto/aes_s390.c b/arch/s390/crypto/aes_s390.c
index 9c46b1b630b1..5d36f4020dfa 100644
--- a/arch/s390/crypto/aes_s390.c
+++ b/arch/s390/crypto/aes_s390.c
@@ -66,7 +66,6 @@ struct s390_xts_ctx {
struct gcm_sg_walk {
struct scatter_walk walk;
unsigned int walk_bytes;
- u8 *walk_ptr;
unsigned int walk_bytes_remain;
u8 buf[AES_BLOCK_SIZE];
unsigned int buf_bytes;
@@ -787,29 +786,20 @@ static void gcm_walk_start(struct gcm_sg_walk *gw, struct scatterlist *sg,
static inline unsigned int _gcm_sg_clamp_and_map(struct gcm_sg_walk *gw)
{
- struct scatterlist *nextsg;
-
- gw->walk_bytes = scatterwalk_clamp(&gw->walk, gw->walk_bytes_remain);
- while (!gw->walk_bytes) {
- nextsg = sg_next(gw->walk.sg);
- if (!nextsg)
- return 0;
- scatterwalk_start(&gw->walk, nextsg);
- gw->walk_bytes = scatterwalk_clamp(&gw->walk,
- gw->walk_bytes_remain);
- }
- gw->walk_ptr = scatterwalk_map(&gw->walk);
+ if (gw->walk_bytes_remain == 0)
+ return 0;
+ gw->walk_bytes = scatterwalk_next(&gw->walk, gw->walk_bytes_remain);
return gw->walk_bytes;
}
static inline void _gcm_sg_unmap_and_advance(struct gcm_sg_walk *gw,
- unsigned int nbytes)
+ unsigned int nbytes, bool out)
{
gw->walk_bytes_remain -= nbytes;
- scatterwalk_unmap(gw->walk_ptr);
- scatterwalk_advance(&gw->walk, nbytes);
- scatterwalk_done(&gw->walk, 0, gw->walk_bytes_remain);
- gw->walk_ptr = NULL;
+ if (out)
+ scatterwalk_done_dst(&gw->walk, nbytes);
+ else
+ scatterwalk_done_src(&gw->walk, nbytes);
}
static int gcm_in_walk_go(struct gcm_sg_walk *gw, unsigned int minbytesneeded)
@@ -835,16 +825,16 @@ static int gcm_in_walk_go(struct gcm_sg_walk *gw, unsigned int minbytesneeded)
}
if (!gw->buf_bytes && gw->walk_bytes >= minbytesneeded) {
- gw->ptr = gw->walk_ptr;
+ gw->ptr = gw->walk.addr;
gw->nbytes = gw->walk_bytes;
goto out;
}
while (1) {
n = min(gw->walk_bytes, AES_BLOCK_SIZE - gw->buf_bytes);
- memcpy(gw->buf + gw->buf_bytes, gw->walk_ptr, n);
+ memcpy(gw->buf + gw->buf_bytes, gw->walk.addr, n);
gw->buf_bytes += n;
- _gcm_sg_unmap_and_advance(gw, n);
+ _gcm_sg_unmap_and_advance(gw, n, false);
if (gw->buf_bytes >= minbytesneeded) {
gw->ptr = gw->buf;
gw->nbytes = gw->buf_bytes;
@@ -876,13 +866,12 @@ static int gcm_out_walk_go(struct gcm_sg_walk *gw, unsigned int minbytesneeded)
}
if (gw->walk_bytes >= minbytesneeded) {
- gw->ptr = gw->walk_ptr;
+ gw->ptr = gw->walk.addr;
gw->nbytes = gw->walk_bytes;
goto out;
}
- scatterwalk_unmap(gw->walk_ptr);
- gw->walk_ptr = NULL;
+ scatterwalk_unmap(&gw->walk);
gw->ptr = gw->buf;
gw->nbytes = sizeof(gw->buf);
@@ -904,7 +893,7 @@ static int gcm_in_walk_done(struct gcm_sg_walk *gw, unsigned int bytesdone)
} else
gw->buf_bytes = 0;
} else
- _gcm_sg_unmap_and_advance(gw, bytesdone);
+ _gcm_sg_unmap_and_advance(gw, bytesdone, false);
return bytesdone;
}
@@ -921,11 +910,11 @@ static int gcm_out_walk_done(struct gcm_sg_walk *gw, unsigned int bytesdone)
if (!_gcm_sg_clamp_and_map(gw))
return i;
n = min(gw->walk_bytes, bytesdone - i);
- memcpy(gw->walk_ptr, gw->buf + i, n);
- _gcm_sg_unmap_and_advance(gw, n);
+ memcpy(gw->walk.addr, gw->buf + i, n);
+ _gcm_sg_unmap_and_advance(gw, n, true);
}
} else
- _gcm_sg_unmap_and_advance(gw, bytesdone);
+ _gcm_sg_unmap_and_advance(gw, bytesdone, true);
return bytesdone;
}
diff --git a/arch/s390/crypto/arch_random.c b/arch/s390/crypto/arch_random.c
index a8a2407381af..083e8d5eada2 100644
--- a/arch/s390/crypto/arch_random.c
+++ b/arch/s390/crypto/arch_random.c
@@ -6,6 +6,7 @@
* Author(s): Harald Freudenberger
*/
+#include <linux/export.h>
#include <linux/kernel.h>
#include <linux/atomic.h>
#include <linux/random.h>
diff --git a/arch/s390/crypto/chacha-glue.c b/arch/s390/crypto/chacha-glue.c
deleted file mode 100644
index f8b0c52e77a4..000000000000
--- a/arch/s390/crypto/chacha-glue.c
+++ /dev/null
@@ -1,130 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * s390 ChaCha stream cipher.
- *
- * Copyright IBM Corp. 2021
- */
-
-#define KMSG_COMPONENT "chacha_s390"
-#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
-
-#include <crypto/internal/chacha.h>
-#include <crypto/internal/skcipher.h>
-#include <crypto/algapi.h>
-#include <linux/cpufeature.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/sizes.h>
-#include <asm/fpu.h>
-#include "chacha-s390.h"
-
-static void chacha20_crypt_s390(u32 *state, u8 *dst, const u8 *src,
- unsigned int nbytes, const u32 *key,
- u32 *counter)
-{
- DECLARE_KERNEL_FPU_ONSTACK32(vxstate);
-
- kernel_fpu_begin(&vxstate, KERNEL_VXR);
- chacha20_vx(dst, src, nbytes, key, counter);
- kernel_fpu_end(&vxstate, KERNEL_VXR);
-
- *counter += round_up(nbytes, CHACHA_BLOCK_SIZE) / CHACHA_BLOCK_SIZE;
-}
-
-static int chacha20_s390(struct skcipher_request *req)
-{
- struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
- struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm);
- u32 state[CHACHA_STATE_WORDS] __aligned(16);
- struct skcipher_walk walk;
- unsigned int nbytes;
- int rc;
-
- rc = skcipher_walk_virt(&walk, req, false);
- chacha_init_generic(state, ctx->key, req->iv);
-
- while (walk.nbytes > 0) {
- nbytes = walk.nbytes;
- if (nbytes < walk.total)
- nbytes = round_down(nbytes, walk.stride);
-
- if (nbytes <= CHACHA_BLOCK_SIZE) {
- chacha_crypt_generic(state, walk.dst.virt.addr,
- walk.src.virt.addr, nbytes,
- ctx->nrounds);
- } else {
- chacha20_crypt_s390(state, walk.dst.virt.addr,
- walk.src.virt.addr, nbytes,
- &state[4], &state[12]);
- }
- rc = skcipher_walk_done(&walk, walk.nbytes - nbytes);
- }
- return rc;
-}
-
-void hchacha_block_arch(const u32 *state, u32 *stream, int nrounds)
-{
- /* TODO: implement hchacha_block_arch() in assembly */
- hchacha_block_generic(state, stream, nrounds);
-}
-EXPORT_SYMBOL(hchacha_block_arch);
-
-void chacha_init_arch(u32 *state, const u32 *key, const u8 *iv)
-{
- chacha_init_generic(state, key, iv);
-}
-EXPORT_SYMBOL(chacha_init_arch);
-
-void chacha_crypt_arch(u32 *state, u8 *dst, const u8 *src,
- unsigned int bytes, int nrounds)
-{
- /* s390 chacha20 implementation has 20 rounds hard-coded,
- * it cannot handle a block of data or less, but otherwise
- * it can handle data of arbitrary size
- */
- if (bytes <= CHACHA_BLOCK_SIZE || nrounds != 20 || !cpu_has_vx())
- chacha_crypt_generic(state, dst, src, bytes, nrounds);
- else
- chacha20_crypt_s390(state, dst, src, bytes,
- &state[4], &state[12]);
-}
-EXPORT_SYMBOL(chacha_crypt_arch);
-
-static struct skcipher_alg chacha_algs[] = {
- {
- .base.cra_name = "chacha20",
- .base.cra_driver_name = "chacha20-s390",
- .base.cra_priority = 900,
- .base.cra_blocksize = 1,
- .base.cra_ctxsize = sizeof(struct chacha_ctx),
- .base.cra_module = THIS_MODULE,
-
- .min_keysize = CHACHA_KEY_SIZE,
- .max_keysize = CHACHA_KEY_SIZE,
- .ivsize = CHACHA_IV_SIZE,
- .chunksize = CHACHA_BLOCK_SIZE,
- .setkey = chacha20_setkey,
- .encrypt = chacha20_s390,
- .decrypt = chacha20_s390,
- }
-};
-
-static int __init chacha_mod_init(void)
-{
- return IS_REACHABLE(CONFIG_CRYPTO_SKCIPHER) ?
- crypto_register_skciphers(chacha_algs, ARRAY_SIZE(chacha_algs)) : 0;
-}
-
-static void __exit chacha_mod_fini(void)
-{
- if (IS_REACHABLE(CONFIG_CRYPTO_SKCIPHER))
- crypto_unregister_skciphers(chacha_algs, ARRAY_SIZE(chacha_algs));
-}
-
-module_cpu_feature_match(S390_CPU_FEATURE_VXRS, chacha_mod_init);
-module_exit(chacha_mod_fini);
-
-MODULE_DESCRIPTION("ChaCha20 stream cipher");
-MODULE_LICENSE("GPL v2");
-
-MODULE_ALIAS_CRYPTO("chacha20");
diff --git a/arch/s390/crypto/chacha-s390.S b/arch/s390/crypto/chacha-s390.S
deleted file mode 100644
index 63f3102678c0..000000000000
--- a/arch/s390/crypto/chacha-s390.S
+++ /dev/null
@@ -1,908 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Original implementation written by Andy Polyakov, @dot-asm.
- * This is an adaptation of the original code for kernel use.
- *
- * Copyright (C) 2006-2019 CRYPTOGAMS by <appro@openssl.org>. All Rights Reserved.
- */
-
-#include <linux/linkage.h>
-#include <asm/nospec-insn.h>
-#include <asm/fpu-insn.h>
-
-#define SP %r15
-#define FRAME (16 * 8 + 4 * 8)
-
- .data
- .balign 32
-
-SYM_DATA_START_LOCAL(sigma)
- .long 0x61707865,0x3320646e,0x79622d32,0x6b206574 # endian-neutral
- .long 1,0,0,0
- .long 2,0,0,0
- .long 3,0,0,0
- .long 0x03020100,0x07060504,0x0b0a0908,0x0f0e0d0c # byte swap
-
- .long 0,1,2,3
- .long 0x61707865,0x61707865,0x61707865,0x61707865 # smashed sigma
- .long 0x3320646e,0x3320646e,0x3320646e,0x3320646e
- .long 0x79622d32,0x79622d32,0x79622d32,0x79622d32
- .long 0x6b206574,0x6b206574,0x6b206574,0x6b206574
-SYM_DATA_END(sigma)
-
- .previous
-
- GEN_BR_THUNK %r14
-
- .text
-
-#############################################################################
-# void chacha20_vx_4x(u8 *out, counst u8 *inp, size_t len,
-# counst u32 *key, const u32 *counter)
-
-#define OUT %r2
-#define INP %r3
-#define LEN %r4
-#define KEY %r5
-#define COUNTER %r6
-
-#define BEPERM %v31
-#define CTR %v26
-
-#define K0 %v16
-#define K1 %v17
-#define K2 %v18
-#define K3 %v19
-
-#define XA0 %v0
-#define XA1 %v1
-#define XA2 %v2
-#define XA3 %v3
-
-#define XB0 %v4
-#define XB1 %v5
-#define XB2 %v6
-#define XB3 %v7
-
-#define XC0 %v8
-#define XC1 %v9
-#define XC2 %v10
-#define XC3 %v11
-
-#define XD0 %v12
-#define XD1 %v13
-#define XD2 %v14
-#define XD3 %v15
-
-#define XT0 %v27
-#define XT1 %v28
-#define XT2 %v29
-#define XT3 %v30
-
-SYM_FUNC_START(chacha20_vx_4x)
- stmg %r6,%r7,6*8(SP)
-
- larl %r7,sigma
- lhi %r0,10
- lhi %r1,0
-
- VL K0,0,,%r7 # load sigma
- VL K1,0,,KEY # load key
- VL K2,16,,KEY
- VL K3,0,,COUNTER # load counter
-
- VL BEPERM,0x40,,%r7
- VL CTR,0x50,,%r7
-
- VLM XA0,XA3,0x60,%r7,4 # load [smashed] sigma
-
- VREPF XB0,K1,0 # smash the key
- VREPF XB1,K1,1
- VREPF XB2,K1,2
- VREPF XB3,K1,3
-
- VREPF XD0,K3,0
- VREPF XD1,K3,1
- VREPF XD2,K3,2
- VREPF XD3,K3,3
- VAF XD0,XD0,CTR
-
- VREPF XC0,K2,0
- VREPF XC1,K2,1
- VREPF XC2,K2,2
- VREPF XC3,K2,3
-
-.Loop_4x:
- VAF XA0,XA0,XB0
- VX XD0,XD0,XA0
- VERLLF XD0,XD0,16
-
- VAF XA1,XA1,XB1
- VX XD1,XD1,XA1
- VERLLF XD1,XD1,16
-
- VAF XA2,XA2,XB2
- VX XD2,XD2,XA2
- VERLLF XD2,XD2,16
-
- VAF XA3,XA3,XB3
- VX XD3,XD3,XA3
- VERLLF XD3,XD3,16
-
- VAF XC0,XC0,XD0
- VX XB0,XB0,XC0
- VERLLF XB0,XB0,12
-
- VAF XC1,XC1,XD1
- VX XB1,XB1,XC1
- VERLLF XB1,XB1,12
-
- VAF XC2,XC2,XD2
- VX XB2,XB2,XC2
- VERLLF XB2,XB2,12
-
- VAF XC3,XC3,XD3
- VX XB3,XB3,XC3
- VERLLF XB3,XB3,12
-
- VAF XA0,XA0,XB0
- VX XD0,XD0,XA0
- VERLLF XD0,XD0,8
-
- VAF XA1,XA1,XB1
- VX XD1,XD1,XA1
- VERLLF XD1,XD1,8
-
- VAF XA2,XA2,XB2
- VX XD2,XD2,XA2
- VERLLF XD2,XD2,8
-
- VAF XA3,XA3,XB3
- VX XD3,XD3,XA3
- VERLLF XD3,XD3,8
-
- VAF XC0,XC0,XD0
- VX XB0,XB0,XC0
- VERLLF XB0,XB0,7
-
- VAF XC1,XC1,XD1
- VX XB1,XB1,XC1
- VERLLF XB1,XB1,7
-
- VAF XC2,XC2,XD2
- VX XB2,XB2,XC2
- VERLLF XB2,XB2,7
-
- VAF XC3,XC3,XD3
- VX XB3,XB3,XC3
- VERLLF XB3,XB3,7
-
- VAF XA0,XA0,XB1
- VX XD3,XD3,XA0
- VERLLF XD3,XD3,16
-
- VAF XA1,XA1,XB2
- VX XD0,XD0,XA1
- VERLLF XD0,XD0,16
-
- VAF XA2,XA2,XB3
- VX XD1,XD1,XA2
- VERLLF XD1,XD1,16
-
- VAF XA3,XA3,XB0
- VX XD2,XD2,XA3
- VERLLF XD2,XD2,16
-
- VAF XC2,XC2,XD3
- VX XB1,XB1,XC2
- VERLLF XB1,XB1,12
-
- VAF XC3,XC3,XD0
- VX XB2,XB2,XC3
- VERLLF XB2,XB2,12
-
- VAF XC0,XC0,XD1
- VX XB3,XB3,XC0
- VERLLF XB3,XB3,12
-
- VAF XC1,XC1,XD2
- VX XB0,XB0,XC1
- VERLLF XB0,XB0,12
-
- VAF XA0,XA0,XB1
- VX XD3,XD3,XA0
- VERLLF XD3,XD3,8
-
- VAF XA1,XA1,XB2
- VX XD0,XD0,XA1
- VERLLF XD0,XD0,8
-
- VAF XA2,XA2,XB3
- VX XD1,XD1,XA2
- VERLLF XD1,XD1,8
-
- VAF XA3,XA3,XB0
- VX XD2,XD2,XA3
- VERLLF XD2,XD2,8
-
- VAF XC2,XC2,XD3
- VX XB1,XB1,XC2
- VERLLF XB1,XB1,7
-
- VAF XC3,XC3,XD0
- VX XB2,XB2,XC3
- VERLLF XB2,XB2,7
-
- VAF XC0,XC0,XD1
- VX XB3,XB3,XC0
- VERLLF XB3,XB3,7
-
- VAF XC1,XC1,XD2
- VX XB0,XB0,XC1
- VERLLF XB0,XB0,7
- brct %r0,.Loop_4x
-
- VAF XD0,XD0,CTR
-
- VMRHF XT0,XA0,XA1 # transpose data
- VMRHF XT1,XA2,XA3
- VMRLF XT2,XA0,XA1
- VMRLF XT3,XA2,XA3
- VPDI XA0,XT0,XT1,0b0000
- VPDI XA1,XT0,XT1,0b0101
- VPDI XA2,XT2,XT3,0b0000
- VPDI XA3,XT2,XT3,0b0101
-
- VMRHF XT0,XB0,XB1
- VMRHF XT1,XB2,XB3
- VMRLF XT2,XB0,XB1
- VMRLF XT3,XB2,XB3
- VPDI XB0,XT0,XT1,0b0000
- VPDI XB1,XT0,XT1,0b0101
- VPDI XB2,XT2,XT3,0b0000
- VPDI XB3,XT2,XT3,0b0101
-
- VMRHF XT0,XC0,XC1
- VMRHF XT1,XC2,XC3
- VMRLF XT2,XC0,XC1
- VMRLF XT3,XC2,XC3
- VPDI XC0,XT0,XT1,0b0000
- VPDI XC1,XT0,XT1,0b0101
- VPDI XC2,XT2,XT3,0b0000
- VPDI XC3,XT2,XT3,0b0101
-
- VMRHF XT0,XD0,XD1
- VMRHF XT1,XD2,XD3
- VMRLF XT2,XD0,XD1
- VMRLF XT3,XD2,XD3
- VPDI XD0,XT0,XT1,0b0000
- VPDI XD1,XT0,XT1,0b0101
- VPDI XD2,XT2,XT3,0b0000
- VPDI XD3,XT2,XT3,0b0101
-
- VAF XA0,XA0,K0
- VAF XB0,XB0,K1
- VAF XC0,XC0,K2
- VAF XD0,XD0,K3
-
- VPERM XA0,XA0,XA0,BEPERM
- VPERM XB0,XB0,XB0,BEPERM
- VPERM XC0,XC0,XC0,BEPERM
- VPERM XD0,XD0,XD0,BEPERM
-
- VLM XT0,XT3,0,INP,0
-
- VX XT0,XT0,XA0
- VX XT1,XT1,XB0
- VX XT2,XT2,XC0
- VX XT3,XT3,XD0
-
- VSTM XT0,XT3,0,OUT,0
-
- la INP,0x40(INP)
- la OUT,0x40(OUT)
- aghi LEN,-0x40
-
- VAF XA0,XA1,K0
- VAF XB0,XB1,K1
- VAF XC0,XC1,K2
- VAF XD0,XD1,K3
-
- VPERM XA0,XA0,XA0,BEPERM
- VPERM XB0,XB0,XB0,BEPERM
- VPERM XC0,XC0,XC0,BEPERM
- VPERM XD0,XD0,XD0,BEPERM
-
- clgfi LEN,0x40
- jl .Ltail_4x
-
- VLM XT0,XT3,0,INP,0
-
- VX XT0,XT0,XA0
- VX XT1,XT1,XB0
- VX XT2,XT2,XC0
- VX XT3,XT3,XD0
-
- VSTM XT0,XT3,0,OUT,0
-
- la INP,0x40(INP)
- la OUT,0x40(OUT)
- aghi LEN,-0x40
- je .Ldone_4x
-
- VAF XA0,XA2,K0
- VAF XB0,XB2,K1
- VAF XC0,XC2,K2
- VAF XD0,XD2,K3
-
- VPERM XA0,XA0,XA0,BEPERM
- VPERM XB0,XB0,XB0,BEPERM
- VPERM XC0,XC0,XC0,BEPERM
- VPERM XD0,XD0,XD0,BEPERM
-
- clgfi LEN,0x40
- jl .Ltail_4x
-
- VLM XT0,XT3,0,INP,0
-
- VX XT0,XT0,XA0
- VX XT1,XT1,XB0
- VX XT2,XT2,XC0
- VX XT3,XT3,XD0
-
- VSTM XT0,XT3,0,OUT,0
-
- la INP,0x40(INP)
- la OUT,0x40(OUT)
- aghi LEN,-0x40
- je .Ldone_4x
-
- VAF XA0,XA3,K0
- VAF XB0,XB3,K1
- VAF XC0,XC3,K2
- VAF XD0,XD3,K3
-
- VPERM XA0,XA0,XA0,BEPERM
- VPERM XB0,XB0,XB0,BEPERM
- VPERM XC0,XC0,XC0,BEPERM
- VPERM XD0,XD0,XD0,BEPERM
-
- clgfi LEN,0x40
- jl .Ltail_4x
-
- VLM XT0,XT3,0,INP,0
-
- VX XT0,XT0,XA0
- VX XT1,XT1,XB0
- VX XT2,XT2,XC0
- VX XT3,XT3,XD0
-
- VSTM XT0,XT3,0,OUT,0
-
-.Ldone_4x:
- lmg %r6,%r7,6*8(SP)
- BR_EX %r14
-
-.Ltail_4x:
- VLR XT0,XC0
- VLR XT1,XD0
-
- VST XA0,8*8+0x00,,SP
- VST XB0,8*8+0x10,,SP
- VST XT0,8*8+0x20,,SP
- VST XT1,8*8+0x30,,SP
-
- lghi %r1,0
-
-.Loop_tail_4x:
- llgc %r5,0(%r1,INP)
- llgc %r6,8*8(%r1,SP)
- xr %r6,%r5
- stc %r6,0(%r1,OUT)
- la %r1,1(%r1)
- brct LEN,.Loop_tail_4x
-
- lmg %r6,%r7,6*8(SP)
- BR_EX %r14
-SYM_FUNC_END(chacha20_vx_4x)
-
-#undef OUT
-#undef INP
-#undef LEN
-#undef KEY
-#undef COUNTER
-
-#undef BEPERM
-
-#undef K0
-#undef K1
-#undef K2
-#undef K3
-
-
-#############################################################################
-# void chacha20_vx(u8 *out, counst u8 *inp, size_t len,
-# counst u32 *key, const u32 *counter)
-
-#define OUT %r2
-#define INP %r3
-#define LEN %r4
-#define KEY %r5
-#define COUNTER %r6
-
-#define BEPERM %v31
-
-#define K0 %v27
-#define K1 %v24
-#define K2 %v25
-#define K3 %v26
-
-#define A0 %v0
-#define B0 %v1
-#define C0 %v2
-#define D0 %v3
-
-#define A1 %v4
-#define B1 %v5
-#define C1 %v6
-#define D1 %v7
-
-#define A2 %v8
-#define B2 %v9
-#define C2 %v10
-#define D2 %v11
-
-#define A3 %v12
-#define B3 %v13
-#define C3 %v14
-#define D3 %v15
-
-#define A4 %v16
-#define B4 %v17
-#define C4 %v18
-#define D4 %v19
-
-#define A5 %v20
-#define B5 %v21
-#define C5 %v22
-#define D5 %v23
-
-#define T0 %v27
-#define T1 %v28
-#define T2 %v29
-#define T3 %v30
-
-SYM_FUNC_START(chacha20_vx)
- clgfi LEN,256
- jle chacha20_vx_4x
- stmg %r6,%r7,6*8(SP)
-
- lghi %r1,-FRAME
- lgr %r0,SP
- la SP,0(%r1,SP)
- stg %r0,0(SP) # back-chain
-
- larl %r7,sigma
- lhi %r0,10
-
- VLM K1,K2,0,KEY,0 # load key
- VL K3,0,,COUNTER # load counter
-
- VLM K0,BEPERM,0,%r7,4 # load sigma, increments, ...
-
-.Loop_outer_vx:
- VLR A0,K0
- VLR B0,K1
- VLR A1,K0
- VLR B1,K1
- VLR A2,K0
- VLR B2,K1
- VLR A3,K0
- VLR B3,K1
- VLR A4,K0
- VLR B4,K1
- VLR A5,K0
- VLR B5,K1
-
- VLR D0,K3
- VAF D1,K3,T1 # K[3]+1
- VAF D2,K3,T2 # K[3]+2
- VAF D3,K3,T3 # K[3]+3
- VAF D4,D2,T2 # K[3]+4
- VAF D5,D2,T3 # K[3]+5
-
- VLR C0,K2
- VLR C1,K2
- VLR C2,K2
- VLR C3,K2
- VLR C4,K2
- VLR C5,K2
-
- VLR T1,D1
- VLR T2,D2
- VLR T3,D3
-
-.Loop_vx:
- VAF A0,A0,B0
- VAF A1,A1,B1
- VAF A2,A2,B2
- VAF A3,A3,B3
- VAF A4,A4,B4
- VAF A5,A5,B5
- VX D0,D0,A0
- VX D1,D1,A1
- VX D2,D2,A2
- VX D3,D3,A3
- VX D4,D4,A4
- VX D5,D5,A5
- VERLLF D0,D0,16
- VERLLF D1,D1,16
- VERLLF D2,D2,16
- VERLLF D3,D3,16
- VERLLF D4,D4,16
- VERLLF D5,D5,16
-
- VAF C0,C0,D0
- VAF C1,C1,D1
- VAF C2,C2,D2
- VAF C3,C3,D3
- VAF C4,C4,D4
- VAF C5,C5,D5
- VX B0,B0,C0
- VX B1,B1,C1
- VX B2,B2,C2
- VX B3,B3,C3
- VX B4,B4,C4
- VX B5,B5,C5
- VERLLF B0,B0,12
- VERLLF B1,B1,12
- VERLLF B2,B2,12
- VERLLF B3,B3,12
- VERLLF B4,B4,12
- VERLLF B5,B5,12
-
- VAF A0,A0,B0
- VAF A1,A1,B1
- VAF A2,A2,B2
- VAF A3,A3,B3
- VAF A4,A4,B4
- VAF A5,A5,B5
- VX D0,D0,A0
- VX D1,D1,A1
- VX D2,D2,A2
- VX D3,D3,A3
- VX D4,D4,A4
- VX D5,D5,A5
- VERLLF D0,D0,8
- VERLLF D1,D1,8
- VERLLF D2,D2,8
- VERLLF D3,D3,8
- VERLLF D4,D4,8
- VERLLF D5,D5,8
-
- VAF C0,C0,D0
- VAF C1,C1,D1
- VAF C2,C2,D2
- VAF C3,C3,D3
- VAF C4,C4,D4
- VAF C5,C5,D5
- VX B0,B0,C0
- VX B1,B1,C1
- VX B2,B2,C2
- VX B3,B3,C3
- VX B4,B4,C4
- VX B5,B5,C5
- VERLLF B0,B0,7
- VERLLF B1,B1,7
- VERLLF B2,B2,7
- VERLLF B3,B3,7
- VERLLF B4,B4,7
- VERLLF B5,B5,7
-
- VSLDB C0,C0,C0,8
- VSLDB C1,C1,C1,8
- VSLDB C2,C2,C2,8
- VSLDB C3,C3,C3,8
- VSLDB C4,C4,C4,8
- VSLDB C5,C5,C5,8
- VSLDB B0,B0,B0,4
- VSLDB B1,B1,B1,4
- VSLDB B2,B2,B2,4
- VSLDB B3,B3,B3,4
- VSLDB B4,B4,B4,4
- VSLDB B5,B5,B5,4
- VSLDB D0,D0,D0,12
- VSLDB D1,D1,D1,12
- VSLDB D2,D2,D2,12
- VSLDB D3,D3,D3,12
- VSLDB D4,D4,D4,12
- VSLDB D5,D5,D5,12
-
- VAF A0,A0,B0
- VAF A1,A1,B1
- VAF A2,A2,B2
- VAF A3,A3,B3
- VAF A4,A4,B4
- VAF A5,A5,B5
- VX D0,D0,A0
- VX D1,D1,A1
- VX D2,D2,A2
- VX D3,D3,A3
- VX D4,D4,A4
- VX D5,D5,A5
- VERLLF D0,D0,16
- VERLLF D1,D1,16
- VERLLF D2,D2,16
- VERLLF D3,D3,16
- VERLLF D4,D4,16
- VERLLF D5,D5,16
-
- VAF C0,C0,D0
- VAF C1,C1,D1
- VAF C2,C2,D2
- VAF C3,C3,D3
- VAF C4,C4,D4
- VAF C5,C5,D5
- VX B0,B0,C0
- VX B1,B1,C1
- VX B2,B2,C2
- VX B3,B3,C3
- VX B4,B4,C4
- VX B5,B5,C5
- VERLLF B0,B0,12
- VERLLF B1,B1,12
- VERLLF B2,B2,12
- VERLLF B3,B3,12
- VERLLF B4,B4,12
- VERLLF B5,B5,12
-
- VAF A0,A0,B0
- VAF A1,A1,B1
- VAF A2,A2,B2
- VAF A3,A3,B3
- VAF A4,A4,B4
- VAF A5,A5,B5
- VX D0,D0,A0
- VX D1,D1,A1
- VX D2,D2,A2
- VX D3,D3,A3
- VX D4,D4,A4
- VX D5,D5,A5
- VERLLF D0,D0,8
- VERLLF D1,D1,8
- VERLLF D2,D2,8
- VERLLF D3,D3,8
- VERLLF D4,D4,8
- VERLLF D5,D5,8
-
- VAF C0,C0,D0
- VAF C1,C1,D1
- VAF C2,C2,D2
- VAF C3,C3,D3
- VAF C4,C4,D4
- VAF C5,C5,D5
- VX B0,B0,C0
- VX B1,B1,C1
- VX B2,B2,C2
- VX B3,B3,C3
- VX B4,B4,C4
- VX B5,B5,C5
- VERLLF B0,B0,7
- VERLLF B1,B1,7
- VERLLF B2,B2,7
- VERLLF B3,B3,7
- VERLLF B4,B4,7
- VERLLF B5,B5,7
-
- VSLDB C0,C0,C0,8
- VSLDB C1,C1,C1,8
- VSLDB C2,C2,C2,8
- VSLDB C3,C3,C3,8
- VSLDB C4,C4,C4,8
- VSLDB C5,C5,C5,8
- VSLDB B0,B0,B0,12
- VSLDB B1,B1,B1,12
- VSLDB B2,B2,B2,12
- VSLDB B3,B3,B3,12
- VSLDB B4,B4,B4,12
- VSLDB B5,B5,B5,12
- VSLDB D0,D0,D0,4
- VSLDB D1,D1,D1,4
- VSLDB D2,D2,D2,4
- VSLDB D3,D3,D3,4
- VSLDB D4,D4,D4,4
- VSLDB D5,D5,D5,4
- brct %r0,.Loop_vx
-
- VAF A0,A0,K0
- VAF B0,B0,K1
- VAF C0,C0,K2
- VAF D0,D0,K3
- VAF A1,A1,K0
- VAF D1,D1,T1 # +K[3]+1
-
- VPERM A0,A0,A0,BEPERM
- VPERM B0,B0,B0,BEPERM
- VPERM C0,C0,C0,BEPERM
- VPERM D0,D0,D0,BEPERM
-
- clgfi LEN,0x40
- jl .Ltail_vx
-
- VAF D2,D2,T2 # +K[3]+2
- VAF D3,D3,T3 # +K[3]+3
- VLM T0,T3,0,INP,0
-
- VX A0,A0,T0
- VX B0,B0,T1
- VX C0,C0,T2
- VX D0,D0,T3
-
- VLM K0,T3,0,%r7,4 # re-load sigma and increments
-
- VSTM A0,D0,0,OUT,0
-
- la INP,0x40(INP)
- la OUT,0x40(OUT)
- aghi LEN,-0x40
- je .Ldone_vx
-
- VAF B1,B1,K1
- VAF C1,C1,K2
-
- VPERM A0,A1,A1,BEPERM
- VPERM B0,B1,B1,BEPERM
- VPERM C0,C1,C1,BEPERM
- VPERM D0,D1,D1,BEPERM
-
- clgfi LEN,0x40
- jl .Ltail_vx
-
- VLM A1,D1,0,INP,0
-
- VX A0,A0,A1
- VX B0,B0,B1
- VX C0,C0,C1
- VX D0,D0,D1
-
- VSTM A0,D0,0,OUT,0
-
- la INP,0x40(INP)
- la OUT,0x40(OUT)
- aghi LEN,-0x40
- je .Ldone_vx
-
- VAF A2,A2,K0
- VAF B2,B2,K1
- VAF C2,C2,K2
-
- VPERM A0,A2,A2,BEPERM
- VPERM B0,B2,B2,BEPERM
- VPERM C0,C2,C2,BEPERM
- VPERM D0,D2,D2,BEPERM
-
- clgfi LEN,0x40
- jl .Ltail_vx
-
- VLM A1,D1,0,INP,0
-
- VX A0,A0,A1
- VX B0,B0,B1
- VX C0,C0,C1
- VX D0,D0,D1
-
- VSTM A0,D0,0,OUT,0
-
- la INP,0x40(INP)
- la OUT,0x40(OUT)
- aghi LEN,-0x40
- je .Ldone_vx
-
- VAF A3,A3,K0
- VAF B3,B3,K1
- VAF C3,C3,K2
- VAF D2,K3,T3 # K[3]+3
-
- VPERM A0,A3,A3,BEPERM
- VPERM B0,B3,B3,BEPERM
- VPERM C0,C3,C3,BEPERM
- VPERM D0,D3,D3,BEPERM
-
- clgfi LEN,0x40
- jl .Ltail_vx
-
- VAF D3,D2,T1 # K[3]+4
- VLM A1,D1,0,INP,0
-
- VX A0,A0,A1
- VX B0,B0,B1
- VX C0,C0,C1
- VX D0,D0,D1
-
- VSTM A0,D0,0,OUT,0
-
- la INP,0x40(INP)
- la OUT,0x40(OUT)
- aghi LEN,-0x40
- je .Ldone_vx
-
- VAF A4,A4,K0
- VAF B4,B4,K1
- VAF C4,C4,K2
- VAF D4,D4,D3 # +K[3]+4
- VAF D3,D3,T1 # K[3]+5
- VAF K3,D2,T3 # K[3]+=6
-
- VPERM A0,A4,A4,BEPERM
- VPERM B0,B4,B4,BEPERM
- VPERM C0,C4,C4,BEPERM
- VPERM D0,D4,D4,BEPERM
-
- clgfi LEN,0x40
- jl .Ltail_vx
-
- VLM A1,D1,0,INP,0
-
- VX A0,A0,A1
- VX B0,B0,B1
- VX C0,C0,C1
- VX D0,D0,D1
-
- VSTM A0,D0,0,OUT,0
-
- la INP,0x40(INP)
- la OUT,0x40(OUT)
- aghi LEN,-0x40
- je .Ldone_vx
-
- VAF A5,A5,K0
- VAF B5,B5,K1
- VAF C5,C5,K2
- VAF D5,D5,D3 # +K[3]+5
-
- VPERM A0,A5,A5,BEPERM
- VPERM B0,B5,B5,BEPERM
- VPERM C0,C5,C5,BEPERM
- VPERM D0,D5,D5,BEPERM
-
- clgfi LEN,0x40
- jl .Ltail_vx
-
- VLM A1,D1,0,INP,0
-
- VX A0,A0,A1
- VX B0,B0,B1
- VX C0,C0,C1
- VX D0,D0,D1
-
- VSTM A0,D0,0,OUT,0
-
- la INP,0x40(INP)
- la OUT,0x40(OUT)
- lhi %r0,10
- aghi LEN,-0x40
- jne .Loop_outer_vx
-
-.Ldone_vx:
- lmg %r6,%r7,FRAME+6*8(SP)
- la SP,FRAME(SP)
- BR_EX %r14
-
-.Ltail_vx:
- VSTM A0,D0,8*8,SP,3
- lghi %r1,0
-
-.Loop_tail_vx:
- llgc %r5,0(%r1,INP)
- llgc %r6,8*8(%r1,SP)
- xr %r6,%r5
- stc %r6,0(%r1,OUT)
- la %r1,1(%r1)
- brct LEN,.Loop_tail_vx
-
- lmg %r6,%r7,FRAME+6*8(SP)
- la SP,FRAME(SP)
- BR_EX %r14
-SYM_FUNC_END(chacha20_vx)
-
-.previous
diff --git a/arch/s390/crypto/chacha-s390.h b/arch/s390/crypto/chacha-s390.h
deleted file mode 100644
index 733744ce30f5..000000000000
--- a/arch/s390/crypto/chacha-s390.h
+++ /dev/null
@@ -1,14 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * s390 ChaCha stream cipher.
- *
- * Copyright IBM Corp. 2021
- */
-
-#ifndef _CHACHA_S390_H
-#define _CHACHA_S390_H
-
-void chacha20_vx(u8 *out, const u8 *inp, size_t len, const u32 *key,
- const u32 *counter);
-
-#endif /* _CHACHA_S390_H */
diff --git a/arch/s390/crypto/ghash_s390.c b/arch/s390/crypto/ghash_s390.c
index 0800a2a5799f..dcbcee37cb63 100644
--- a/arch/s390/crypto/ghash_s390.c
+++ b/arch/s390/crypto/ghash_s390.c
@@ -8,29 +8,28 @@
* Author(s): Gerald Schaefer <gerald.schaefer@de.ibm.com>
*/
+#include <asm/cpacf.h>
+#include <crypto/ghash.h>
#include <crypto/internal/hash.h>
-#include <linux/module.h>
#include <linux/cpufeature.h>
-#include <asm/cpacf.h>
-
-#define GHASH_BLOCK_SIZE 16
-#define GHASH_DIGEST_SIZE 16
+#include <linux/err.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/string.h>
-struct ghash_ctx {
+struct s390_ghash_ctx {
u8 key[GHASH_BLOCK_SIZE];
};
-struct ghash_desc_ctx {
+struct s390_ghash_desc_ctx {
u8 icv[GHASH_BLOCK_SIZE];
u8 key[GHASH_BLOCK_SIZE];
- u8 buffer[GHASH_BLOCK_SIZE];
- u32 bytes;
};
static int ghash_init(struct shash_desc *desc)
{
- struct ghash_desc_ctx *dctx = shash_desc_ctx(desc);
- struct ghash_ctx *ctx = crypto_shash_ctx(desc->tfm);
+ struct s390_ghash_ctx *ctx = crypto_shash_ctx(desc->tfm);
+ struct s390_ghash_desc_ctx *dctx = shash_desc_ctx(desc);
memset(dctx, 0, sizeof(*dctx));
memcpy(dctx->key, ctx->key, GHASH_BLOCK_SIZE);
@@ -41,7 +40,7 @@ static int ghash_init(struct shash_desc *desc)
static int ghash_setkey(struct crypto_shash *tfm,
const u8 *key, unsigned int keylen)
{
- struct ghash_ctx *ctx = crypto_shash_ctx(tfm);
+ struct s390_ghash_ctx *ctx = crypto_shash_ctx(tfm);
if (keylen != GHASH_BLOCK_SIZE)
return -EINVAL;
@@ -54,80 +53,71 @@ static int ghash_setkey(struct crypto_shash *tfm,
static int ghash_update(struct shash_desc *desc,
const u8 *src, unsigned int srclen)
{
- struct ghash_desc_ctx *dctx = shash_desc_ctx(desc);
+ struct s390_ghash_desc_ctx *dctx = shash_desc_ctx(desc);
unsigned int n;
- u8 *buf = dctx->buffer;
-
- if (dctx->bytes) {
- u8 *pos = buf + (GHASH_BLOCK_SIZE - dctx->bytes);
- n = min(srclen, dctx->bytes);
- dctx->bytes -= n;
- srclen -= n;
-
- memcpy(pos, src, n);
- src += n;
+ n = srclen & ~(GHASH_BLOCK_SIZE - 1);
+ cpacf_kimd(CPACF_KIMD_GHASH, dctx, src, n);
+ return srclen - n;
+}
- if (!dctx->bytes) {
- cpacf_kimd(CPACF_KIMD_GHASH, dctx, buf,
- GHASH_BLOCK_SIZE);
- }
- }
+static void ghash_flush(struct s390_ghash_desc_ctx *dctx, const u8 *src,
+ unsigned int len)
+{
+ if (len) {
+ u8 buf[GHASH_BLOCK_SIZE] = {};
- n = srclen & ~(GHASH_BLOCK_SIZE - 1);
- if (n) {
- cpacf_kimd(CPACF_KIMD_GHASH, dctx, src, n);
- src += n;
- srclen -= n;
+ memcpy(buf, src, len);
+ cpacf_kimd(CPACF_KIMD_GHASH, dctx, buf, GHASH_BLOCK_SIZE);
+ memzero_explicit(buf, sizeof(buf));
}
+}
- if (srclen) {
- dctx->bytes = GHASH_BLOCK_SIZE - srclen;
- memcpy(buf, src, srclen);
- }
+static int ghash_finup(struct shash_desc *desc, const u8 *src,
+ unsigned int len, u8 *dst)
+{
+ struct s390_ghash_desc_ctx *dctx = shash_desc_ctx(desc);
+ ghash_flush(dctx, src, len);
+ memcpy(dst, dctx->icv, GHASH_BLOCK_SIZE);
return 0;
}
-static int ghash_flush(struct ghash_desc_ctx *dctx)
+static int ghash_export(struct shash_desc *desc, void *out)
{
- u8 *buf = dctx->buffer;
-
- if (dctx->bytes) {
- u8 *pos = buf + (GHASH_BLOCK_SIZE - dctx->bytes);
-
- memset(pos, 0, dctx->bytes);
- cpacf_kimd(CPACF_KIMD_GHASH, dctx, buf, GHASH_BLOCK_SIZE);
- dctx->bytes = 0;
- }
+ struct s390_ghash_desc_ctx *dctx = shash_desc_ctx(desc);
+ memcpy(out, dctx->icv, GHASH_DIGEST_SIZE);
return 0;
}
-static int ghash_final(struct shash_desc *desc, u8 *dst)
+static int ghash_import(struct shash_desc *desc, const void *in)
{
- struct ghash_desc_ctx *dctx = shash_desc_ctx(desc);
- int ret;
+ struct s390_ghash_ctx *ctx = crypto_shash_ctx(desc->tfm);
+ struct s390_ghash_desc_ctx *dctx = shash_desc_ctx(desc);
- ret = ghash_flush(dctx);
- if (!ret)
- memcpy(dst, dctx->icv, GHASH_BLOCK_SIZE);
- return ret;
+ memcpy(dctx->icv, in, GHASH_DIGEST_SIZE);
+ memcpy(dctx->key, ctx->key, GHASH_BLOCK_SIZE);
+ return 0;
}
static struct shash_alg ghash_alg = {
.digestsize = GHASH_DIGEST_SIZE,
.init = ghash_init,
.update = ghash_update,
- .final = ghash_final,
+ .finup = ghash_finup,
.setkey = ghash_setkey,
- .descsize = sizeof(struct ghash_desc_ctx),
+ .export = ghash_export,
+ .import = ghash_import,
+ .statesize = sizeof(struct ghash_desc_ctx),
+ .descsize = sizeof(struct s390_ghash_desc_ctx),
.base = {
.cra_name = "ghash",
.cra_driver_name = "ghash-s390",
.cra_priority = 300,
+ .cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY,
.cra_blocksize = GHASH_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct ghash_ctx),
+ .cra_ctxsize = sizeof(struct s390_ghash_ctx),
.cra_module = THIS_MODULE,
},
};
diff --git a/arch/s390/crypto/hmac_s390.c b/arch/s390/crypto/hmac_s390.c
index bba9a818dfdc..58444da9b004 100644
--- a/arch/s390/crypto/hmac_s390.c
+++ b/arch/s390/crypto/hmac_s390.c
@@ -9,10 +9,14 @@
#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
#include <asm/cpacf.h>
-#include <crypto/sha2.h>
#include <crypto/internal/hash.h>
+#include <crypto/hmac.h>
+#include <crypto/sha2.h>
#include <linux/cpufeature.h>
+#include <linux/errno.h>
+#include <linux/kernel.h>
#include <linux/module.h>
+#include <linux/string.h>
/*
* KMAC param block layout for sha2 function codes:
@@ -71,32 +75,31 @@ union s390_kmac_gr0 {
struct s390_kmac_sha2_ctx {
u8 param[MAX_DIGEST_SIZE + MAX_IMBL_SIZE + MAX_BLOCK_SIZE];
union s390_kmac_gr0 gr0;
- u8 buf[MAX_BLOCK_SIZE];
- unsigned int buflen;
+ u64 buflen[2];
};
/*
* kmac_sha2_set_imbl - sets the input message bit-length based on the blocksize
*/
-static inline void kmac_sha2_set_imbl(u8 *param, unsigned int buflen,
- unsigned int blocksize)
+static inline void kmac_sha2_set_imbl(u8 *param, u64 buflen_lo,
+ u64 buflen_hi, unsigned int blocksize)
{
u8 *imbl = param + SHA2_IMBL_OFFSET(blocksize);
switch (blocksize) {
case SHA256_BLOCK_SIZE:
- *(u64 *)imbl = (u64)buflen * BITS_PER_BYTE;
+ *(u64 *)imbl = buflen_lo * BITS_PER_BYTE;
break;
case SHA512_BLOCK_SIZE:
- *(u128 *)imbl = (u128)buflen * BITS_PER_BYTE;
+ *(u128 *)imbl = (((u128)buflen_hi << 64) + buflen_lo) << 3;
break;
default:
break;
}
}
-static int hash_key(const u8 *in, unsigned int inlen,
- u8 *digest, unsigned int digestsize)
+static int hash_data(const u8 *in, unsigned int inlen,
+ u8 *digest, unsigned int digestsize, bool final)
{
unsigned long func;
union {
@@ -123,19 +126,23 @@ static int hash_key(const u8 *in, unsigned int inlen,
switch (digestsize) {
case SHA224_DIGEST_SIZE:
- func = CPACF_KLMD_SHA_256;
+ func = final ? CPACF_KLMD_SHA_256 : CPACF_KIMD_SHA_256;
PARAM_INIT(256, 224, inlen * 8);
+ if (!final)
+ digestsize = SHA256_DIGEST_SIZE;
break;
case SHA256_DIGEST_SIZE:
- func = CPACF_KLMD_SHA_256;
+ func = final ? CPACF_KLMD_SHA_256 : CPACF_KIMD_SHA_256;
PARAM_INIT(256, 256, inlen * 8);
break;
case SHA384_DIGEST_SIZE:
- func = CPACF_KLMD_SHA_512;
+ func = final ? CPACF_KLMD_SHA_512 : CPACF_KIMD_SHA_512;
PARAM_INIT(512, 384, inlen * 8);
+ if (!final)
+ digestsize = SHA512_DIGEST_SIZE;
break;
case SHA512_DIGEST_SIZE:
- func = CPACF_KLMD_SHA_512;
+ func = final ? CPACF_KLMD_SHA_512 : CPACF_KIMD_SHA_512;
PARAM_INIT(512, 512, inlen * 8);
break;
default:
@@ -151,6 +158,12 @@ static int hash_key(const u8 *in, unsigned int inlen,
return 0;
}
+static int hash_key(const u8 *in, unsigned int inlen,
+ u8 *digest, unsigned int digestsize)
+{
+ return hash_data(in, inlen, digest, digestsize, true);
+}
+
static int s390_hmac_sha2_setkey(struct crypto_shash *tfm,
const u8 *key, unsigned int keylen)
{
@@ -176,7 +189,8 @@ static int s390_hmac_sha2_init(struct shash_desc *desc)
memcpy(ctx->param + SHA2_KEY_OFFSET(bs),
tfm_ctx->key, bs);
- ctx->buflen = 0;
+ ctx->buflen[0] = 0;
+ ctx->buflen[1] = 0;
ctx->gr0.reg = 0;
switch (crypto_shash_digestsize(desc->tfm)) {
case SHA224_DIGEST_SIZE:
@@ -203,48 +217,31 @@ static int s390_hmac_sha2_update(struct shash_desc *desc,
{
struct s390_kmac_sha2_ctx *ctx = shash_desc_ctx(desc);
unsigned int bs = crypto_shash_blocksize(desc->tfm);
- unsigned int offset, n;
-
- /* check current buffer */
- offset = ctx->buflen % bs;
- ctx->buflen += len;
- if (offset + len < bs)
- goto store;
-
- /* process one stored block */
- if (offset) {
- n = bs - offset;
- memcpy(ctx->buf + offset, data, n);
- ctx->gr0.iimp = 1;
- _cpacf_kmac(&ctx->gr0.reg, ctx->param, ctx->buf, bs);
- data += n;
- len -= n;
- offset = 0;
- }
- /* process as many blocks as possible */
- if (len >= bs) {
- n = (len / bs) * bs;
- ctx->gr0.iimp = 1;
- _cpacf_kmac(&ctx->gr0.reg, ctx->param, data, n);
- data += n;
- len -= n;
- }
-store:
- /* store incomplete block in buffer */
- if (len)
- memcpy(ctx->buf + offset, data, len);
+ unsigned int n = round_down(len, bs);
- return 0;
+ ctx->buflen[0] += n;
+ if (ctx->buflen[0] < n)
+ ctx->buflen[1]++;
+
+ /* process as many blocks as possible */
+ ctx->gr0.iimp = 1;
+ _cpacf_kmac(&ctx->gr0.reg, ctx->param, data, n);
+ return len - n;
}
-static int s390_hmac_sha2_final(struct shash_desc *desc, u8 *out)
+static int s390_hmac_sha2_finup(struct shash_desc *desc, const u8 *src,
+ unsigned int len, u8 *out)
{
struct s390_kmac_sha2_ctx *ctx = shash_desc_ctx(desc);
unsigned int bs = crypto_shash_blocksize(desc->tfm);
+ ctx->buflen[0] += len;
+ if (ctx->buflen[0] < len)
+ ctx->buflen[1]++;
+
ctx->gr0.iimp = 0;
- kmac_sha2_set_imbl(ctx->param, ctx->buflen, bs);
- _cpacf_kmac(&ctx->gr0.reg, ctx->param, ctx->buf, ctx->buflen % bs);
+ kmac_sha2_set_imbl(ctx->param, ctx->buflen[0], ctx->buflen[1], bs);
+ _cpacf_kmac(&ctx->gr0.reg, ctx->param, src, len);
memcpy(out, ctx->param, crypto_shash_digestsize(desc->tfm));
return 0;
@@ -262,7 +259,7 @@ static int s390_hmac_sha2_digest(struct shash_desc *desc,
return rc;
ctx->gr0.iimp = 0;
- kmac_sha2_set_imbl(ctx->param, len,
+ kmac_sha2_set_imbl(ctx->param, len, 0,
crypto_shash_blocksize(desc->tfm));
_cpacf_kmac(&ctx->gr0.reg, ctx->param, data, len);
memcpy(out, ctx->param, ds);
@@ -270,22 +267,93 @@ static int s390_hmac_sha2_digest(struct shash_desc *desc,
return 0;
}
-#define S390_HMAC_SHA2_ALG(x) { \
+static int s390_hmac_export_zero(struct shash_desc *desc, void *out)
+{
+ struct crypto_shash *tfm = desc->tfm;
+ u8 ipad[SHA512_BLOCK_SIZE];
+ struct s390_hmac_ctx *ctx;
+ unsigned int bs;
+ int err, i;
+
+ ctx = crypto_shash_ctx(tfm);
+ bs = crypto_shash_blocksize(tfm);
+ for (i = 0; i < bs; i++)
+ ipad[i] = ctx->key[i] ^ HMAC_IPAD_VALUE;
+
+ err = hash_data(ipad, bs, out, crypto_shash_digestsize(tfm), false);
+ memzero_explicit(ipad, sizeof(ipad));
+ return err;
+}
+
+static int s390_hmac_export(struct shash_desc *desc, void *out)
+{
+ struct s390_kmac_sha2_ctx *ctx = shash_desc_ctx(desc);
+ unsigned int bs = crypto_shash_blocksize(desc->tfm);
+ unsigned int ds = bs / 2;
+ u64 lo = ctx->buflen[0];
+ union {
+ u8 *u8;
+ u64 *u64;
+ } p = { .u8 = out };
+ int err = 0;
+
+ if (!ctx->gr0.ikp)
+ err = s390_hmac_export_zero(desc, out);
+ else
+ memcpy(p.u8, ctx->param, ds);
+ p.u8 += ds;
+ lo += bs;
+ put_unaligned(lo, p.u64++);
+ if (ds == SHA512_DIGEST_SIZE)
+ put_unaligned(ctx->buflen[1] + (lo < bs), p.u64);
+ return err;
+}
+
+static int s390_hmac_import(struct shash_desc *desc, const void *in)
+{
+ struct s390_kmac_sha2_ctx *ctx = shash_desc_ctx(desc);
+ unsigned int bs = crypto_shash_blocksize(desc->tfm);
+ unsigned int ds = bs / 2;
+ union {
+ const u8 *u8;
+ const u64 *u64;
+ } p = { .u8 = in };
+ u64 lo;
+ int err;
+
+ err = s390_hmac_sha2_init(desc);
+ memcpy(ctx->param, p.u8, ds);
+ p.u8 += ds;
+ lo = get_unaligned(p.u64++);
+ ctx->buflen[0] = lo - bs;
+ if (ds == SHA512_DIGEST_SIZE)
+ ctx->buflen[1] = get_unaligned(p.u64) - (lo < bs);
+ if (ctx->buflen[0] | ctx->buflen[1])
+ ctx->gr0.ikp = 1;
+ return err;
+}
+
+#define S390_HMAC_SHA2_ALG(x, ss) { \
.fc = CPACF_KMAC_HMAC_SHA_##x, \
.alg = { \
.init = s390_hmac_sha2_init, \
.update = s390_hmac_sha2_update, \
- .final = s390_hmac_sha2_final, \
+ .finup = s390_hmac_sha2_finup, \
.digest = s390_hmac_sha2_digest, \
.setkey = s390_hmac_sha2_setkey, \
+ .export = s390_hmac_export, \
+ .import = s390_hmac_import, \
.descsize = sizeof(struct s390_kmac_sha2_ctx), \
.halg = { \
+ .statesize = ss, \
.digestsize = SHA##x##_DIGEST_SIZE, \
.base = { \
.cra_name = "hmac(sha" #x ")", \
.cra_driver_name = "hmac_s390_sha" #x, \
.cra_blocksize = SHA##x##_BLOCK_SIZE, \
.cra_priority = 400, \
+ .cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY | \
+ CRYPTO_AHASH_ALG_FINUP_MAX, \
.cra_ctxsize = sizeof(struct s390_hmac_ctx), \
.cra_module = THIS_MODULE, \
}, \
@@ -298,10 +366,10 @@ static struct s390_hmac_alg {
unsigned int fc;
struct shash_alg alg;
} s390_hmac_algs[] = {
- S390_HMAC_SHA2_ALG(224),
- S390_HMAC_SHA2_ALG(256),
- S390_HMAC_SHA2_ALG(384),
- S390_HMAC_SHA2_ALG(512),
+ S390_HMAC_SHA2_ALG(224, sizeof(struct crypto_sha256_state)),
+ S390_HMAC_SHA2_ALG(256, sizeof(struct crypto_sha256_state)),
+ S390_HMAC_SHA2_ALG(384, SHA512_STATE_SIZE),
+ S390_HMAC_SHA2_ALG(512, SHA512_STATE_SIZE),
};
static __always_inline void _s390_hmac_algs_unregister(void)
diff --git a/arch/s390/crypto/paes_s390.c b/arch/s390/crypto/paes_s390.c
index 511093713a6f..a624a43a2b54 100644
--- a/arch/s390/crypto/paes_s390.c
+++ b/arch/s390/crypto/paes_s390.c
@@ -5,7 +5,7 @@
* s390 implementation of the AES Cipher Algorithm with protected keys.
*
* s390 Version:
- * Copyright IBM Corp. 2017, 2023
+ * Copyright IBM Corp. 2017, 2025
* Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
* Harald Freudenberger <freude@de.ibm.com>
*/
@@ -13,16 +13,18 @@
#define KMSG_COMPONENT "paes_s390"
#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
-#include <crypto/aes.h>
-#include <crypto/algapi.h>
-#include <linux/bug.h>
-#include <linux/err.h>
-#include <linux/module.h>
+#include <linux/atomic.h>
#include <linux/cpufeature.h>
+#include <linux/delay.h>
+#include <linux/err.h>
#include <linux/init.h>
+#include <linux/miscdevice.h>
+#include <linux/module.h>
#include <linux/mutex.h>
#include <linux/spinlock.h>
-#include <linux/delay.h>
+#include <crypto/aes.h>
+#include <crypto/algapi.h>
+#include <crypto/engine.h>
#include <crypto/internal/skcipher.h>
#include <crypto/xts.h>
#include <asm/cpacf.h>
@@ -44,23 +46,61 @@ static DEFINE_MUTEX(ctrblk_lock);
static cpacf_mask_t km_functions, kmc_functions, kmctr_functions;
+static struct crypto_engine *paes_crypto_engine;
+#define MAX_QLEN 10
+
+/*
+ * protected key specific stuff
+ */
+
struct paes_protkey {
u32 type;
u32 len;
u8 protkey[PXTS_256_PROTKEY_SIZE];
};
-struct key_blob {
- /*
- * Small keys will be stored in the keybuf. Larger keys are
- * stored in extra allocated memory. In both cases does
- * key point to the memory where the key is stored.
- * The code distinguishes by checking keylen against
- * sizeof(keybuf). See the two following helper functions.
- */
- u8 *key;
- u8 keybuf[128];
+#define PK_STATE_NO_KEY 0
+#define PK_STATE_CONVERT_IN_PROGRESS 1
+#define PK_STATE_VALID 2
+
+struct s390_paes_ctx {
+ /* source key material used to derive a protected key from */
+ u8 keybuf[PAES_MAX_KEYSIZE];
+ unsigned int keylen;
+
+ /* cpacf function code to use with this protected key type */
+ long fc;
+
+ /* nr of requests enqueued via crypto engine which use this tfm ctx */
+ atomic_t via_engine_ctr;
+
+ /* spinlock to atomic read/update all the following fields */
+ spinlock_t pk_lock;
+
+ /* see PK_STATE* defines above, < 0 holds convert failure rc */
+ int pk_state;
+ /* if state is valid, pk holds the protected key */
+ struct paes_protkey pk;
+};
+
+struct s390_pxts_ctx {
+ /* source key material used to derive a protected key from */
+ u8 keybuf[2 * PAES_MAX_KEYSIZE];
unsigned int keylen;
+
+ /* cpacf function code to use with this protected key type */
+ long fc;
+
+ /* nr of requests enqueued via crypto engine which use this tfm ctx */
+ atomic_t via_engine_ctr;
+
+ /* spinlock to atomic read/update all the following fields */
+ spinlock_t pk_lock;
+
+ /* see PK_STATE* defines above, < 0 holds convert failure rc */
+ int pk_state;
+ /* if state is valid, pk[] hold(s) the protected key(s) */
+ struct paes_protkey pk[2];
};
/*
@@ -89,214 +129,370 @@ static inline u32 make_clrkey_token(const u8 *ck, size_t cklen, u8 *dest)
return sizeof(*token) + cklen;
}
-static inline int _key_to_kb(struct key_blob *kb,
- const u8 *key,
- unsigned int keylen)
+/*
+ * paes_ctx_setkey() - Set key value into context, maybe construct
+ * a clear key token digestible by pkey from a clear key value.
+ */
+static inline int paes_ctx_setkey(struct s390_paes_ctx *ctx,
+ const u8 *key, unsigned int keylen)
{
+ if (keylen > sizeof(ctx->keybuf))
+ return -EINVAL;
+
switch (keylen) {
case 16:
case 24:
case 32:
/* clear key value, prepare pkey clear key token in keybuf */
- memset(kb->keybuf, 0, sizeof(kb->keybuf));
- kb->keylen = make_clrkey_token(key, keylen, kb->keybuf);
- kb->key = kb->keybuf;
+ memset(ctx->keybuf, 0, sizeof(ctx->keybuf));
+ ctx->keylen = make_clrkey_token(key, keylen, ctx->keybuf);
break;
default:
/* other key material, let pkey handle this */
- if (keylen <= sizeof(kb->keybuf))
- kb->key = kb->keybuf;
- else {
- kb->key = kmalloc(keylen, GFP_KERNEL);
- if (!kb->key)
- return -ENOMEM;
- }
- memcpy(kb->key, key, keylen);
- kb->keylen = keylen;
+ memcpy(ctx->keybuf, key, keylen);
+ ctx->keylen = keylen;
break;
}
return 0;
}
-static inline int _xts_key_to_kb(struct key_blob *kb,
- const u8 *key,
- unsigned int keylen)
+/*
+ * pxts_ctx_setkey() - Set key value into context, maybe construct
+ * a clear key token digestible by pkey from a clear key value.
+ */
+static inline int pxts_ctx_setkey(struct s390_pxts_ctx *ctx,
+ const u8 *key, unsigned int keylen)
{
size_t cklen = keylen / 2;
- memset(kb->keybuf, 0, sizeof(kb->keybuf));
+ if (keylen > sizeof(ctx->keybuf))
+ return -EINVAL;
switch (keylen) {
case 32:
case 64:
/* clear key value, prepare pkey clear key tokens in keybuf */
- kb->key = kb->keybuf;
- kb->keylen = make_clrkey_token(key, cklen, kb->key);
- kb->keylen += make_clrkey_token(key + cklen, cklen,
- kb->key + kb->keylen);
+ memset(ctx->keybuf, 0, sizeof(ctx->keybuf));
+ ctx->keylen = make_clrkey_token(key, cklen, ctx->keybuf);
+ ctx->keylen += make_clrkey_token(key + cklen, cklen,
+ ctx->keybuf + ctx->keylen);
break;
default:
/* other key material, let pkey handle this */
- if (keylen <= sizeof(kb->keybuf)) {
- kb->key = kb->keybuf;
- } else {
- kb->key = kmalloc(keylen, GFP_KERNEL);
- if (!kb->key)
- return -ENOMEM;
- }
- memcpy(kb->key, key, keylen);
- kb->keylen = keylen;
+ memcpy(ctx->keybuf, key, keylen);
+ ctx->keylen = keylen;
break;
}
return 0;
}
-static inline void _free_kb_keybuf(struct key_blob *kb)
+/*
+ * Convert the raw key material into a protected key via PKEY api.
+ * This function may sleep - don't call in non-sleeping context.
+ */
+static inline int convert_key(const u8 *key, unsigned int keylen,
+ struct paes_protkey *pk)
{
- if (kb->key && kb->key != kb->keybuf
- && kb->keylen > sizeof(kb->keybuf)) {
- kfree_sensitive(kb->key);
- kb->key = NULL;
+ int rc, i;
+
+ pk->len = sizeof(pk->protkey);
+
+ /*
+ * In case of a busy card retry with increasing delay
+ * of 200, 400, 800 and 1600 ms - in total 3 s.
+ */
+ for (rc = -EIO, i = 0; rc && i < 5; i++) {
+ if (rc == -EBUSY && msleep_interruptible((1 << i) * 100)) {
+ rc = -EINTR;
+ goto out;
+ }
+ rc = pkey_key2protkey(key, keylen,
+ pk->protkey, &pk->len, &pk->type,
+ PKEY_XFLAG_NOMEMALLOC);
}
- memzero_explicit(kb->keybuf, sizeof(kb->keybuf));
+
+out:
+ pr_debug("rc=%d\n", rc);
+ return rc;
}
-struct s390_paes_ctx {
- struct key_blob kb;
+/*
+ * (Re-)Convert the raw key material from the ctx into a protected key
+ * via convert_key() function. Update the pk_state, pk_type, pk_len
+ * and the protected key in the tfm context.
+ * Please note this function may be invoked concurrently with the very
+ * same tfm context. The pk_lock spinlock in the context ensures an
+ * atomic update of the pk and the pk state but does not guarantee any
+ * order of update. So a fresh converted valid protected key may get
+ * updated with an 'old' expired key value. As the cpacf instructions
+ * detect this, refuse to operate with an invalid key and the calling
+ * code triggers a (re-)conversion this does no harm. This may lead to
+ * unnecessary additional conversion but never to invalid data on en-
+ * or decrypt operations.
+ */
+static int paes_convert_key(struct s390_paes_ctx *ctx)
+{
struct paes_protkey pk;
- spinlock_t pk_lock;
- unsigned long fc;
-};
+ int rc;
-struct s390_pxts_ctx {
- struct key_blob kb;
- struct paes_protkey pk[2];
- spinlock_t pk_lock;
- unsigned long fc;
-};
+ spin_lock_bh(&ctx->pk_lock);
+ ctx->pk_state = PK_STATE_CONVERT_IN_PROGRESS;
+ spin_unlock_bh(&ctx->pk_lock);
-static inline int __paes_keyblob2pkey(const u8 *key, unsigned int keylen,
- struct paes_protkey *pk)
-{
- int i, rc = -EIO;
+ rc = convert_key(ctx->keybuf, ctx->keylen, &pk);
- /* try three times in case of busy card */
- for (i = 0; rc && i < 3; i++) {
- if (rc == -EBUSY && in_task()) {
- if (msleep_interruptible(1000))
- return -EINTR;
- }
- rc = pkey_key2protkey(key, keylen, pk->protkey, &pk->len,
- &pk->type);
+ /* update context */
+ spin_lock_bh(&ctx->pk_lock);
+ if (rc) {
+ ctx->pk_state = rc;
+ } else {
+ ctx->pk_state = PK_STATE_VALID;
+ ctx->pk = pk;
}
+ spin_unlock_bh(&ctx->pk_lock);
+ memzero_explicit(&pk, sizeof(pk));
+ pr_debug("rc=%d\n", rc);
return rc;
}
-static inline int __paes_convert_key(struct s390_paes_ctx *ctx)
+/*
+ * (Re-)Convert the raw xts key material from the ctx into a
+ * protected key via convert_key() function. Update the pk_state,
+ * pk_type, pk_len and the protected key in the tfm context.
+ * See also comments on function paes_convert_key.
+ */
+static int pxts_convert_key(struct s390_pxts_ctx *ctx)
{
- struct paes_protkey pk;
+ struct paes_protkey pk0, pk1;
+ size_t split_keylen;
int rc;
- pk.len = sizeof(pk.protkey);
- rc = __paes_keyblob2pkey(ctx->kb.key, ctx->kb.keylen, &pk);
+ spin_lock_bh(&ctx->pk_lock);
+ ctx->pk_state = PK_STATE_CONVERT_IN_PROGRESS;
+ spin_unlock_bh(&ctx->pk_lock);
+
+ rc = convert_key(ctx->keybuf, ctx->keylen, &pk0);
if (rc)
- return rc;
+ goto out;
+
+ switch (pk0.type) {
+ case PKEY_KEYTYPE_AES_128:
+ case PKEY_KEYTYPE_AES_256:
+ /* second keytoken required */
+ if (ctx->keylen % 2) {
+ rc = -EINVAL;
+ goto out;
+ }
+ split_keylen = ctx->keylen / 2;
+ rc = convert_key(ctx->keybuf + split_keylen,
+ split_keylen, &pk1);
+ if (rc)
+ goto out;
+ if (pk0.type != pk1.type) {
+ rc = -EINVAL;
+ goto out;
+ }
+ break;
+ case PKEY_KEYTYPE_AES_XTS_128:
+ case PKEY_KEYTYPE_AES_XTS_256:
+ /* single key */
+ pk1.type = 0;
+ break;
+ default:
+ /* unsupported protected keytype */
+ rc = -EINVAL;
+ goto out;
+ }
+out:
+ /* update context */
spin_lock_bh(&ctx->pk_lock);
- memcpy(&ctx->pk, &pk, sizeof(pk));
+ if (rc) {
+ ctx->pk_state = rc;
+ } else {
+ ctx->pk_state = PK_STATE_VALID;
+ ctx->pk[0] = pk0;
+ ctx->pk[1] = pk1;
+ }
spin_unlock_bh(&ctx->pk_lock);
- return 0;
+ memzero_explicit(&pk0, sizeof(pk0));
+ memzero_explicit(&pk1, sizeof(pk1));
+ pr_debug("rc=%d\n", rc);
+ return rc;
}
-static int ecb_paes_init(struct crypto_skcipher *tfm)
-{
- struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm);
+/*
+ * PAES ECB implementation
+ */
- ctx->kb.key = NULL;
- spin_lock_init(&ctx->pk_lock);
+struct ecb_param {
+ u8 key[PAES_256_PROTKEY_SIZE];
+} __packed;
- return 0;
-}
+struct s390_pecb_req_ctx {
+ unsigned long modifier;
+ struct skcipher_walk walk;
+ bool param_init_done;
+ struct ecb_param param;
+};
-static void ecb_paes_exit(struct crypto_skcipher *tfm)
+static int ecb_paes_setkey(struct crypto_skcipher *tfm, const u8 *in_key,
+ unsigned int key_len)
{
struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm);
-
- _free_kb_keybuf(&ctx->kb);
-}
-
-static inline int __ecb_paes_set_key(struct s390_paes_ctx *ctx)
-{
- unsigned long fc;
+ long fc;
int rc;
- rc = __paes_convert_key(ctx);
+ /* set raw key into context */
+ rc = paes_ctx_setkey(ctx, in_key, key_len);
if (rc)
- return rc;
+ goto out;
- /* Pick the correct function code based on the protected key type */
- fc = (ctx->pk.type == PKEY_KEYTYPE_AES_128) ? CPACF_KM_PAES_128 :
- (ctx->pk.type == PKEY_KEYTYPE_AES_192) ? CPACF_KM_PAES_192 :
- (ctx->pk.type == PKEY_KEYTYPE_AES_256) ? CPACF_KM_PAES_256 : 0;
+ /* convert key into protected key */
+ rc = paes_convert_key(ctx);
+ if (rc)
+ goto out;
- /* Check if the function code is available */
+ /* Pick the correct function code based on the protected key type */
+ switch (ctx->pk.type) {
+ case PKEY_KEYTYPE_AES_128:
+ fc = CPACF_KM_PAES_128;
+ break;
+ case PKEY_KEYTYPE_AES_192:
+ fc = CPACF_KM_PAES_192;
+ break;
+ case PKEY_KEYTYPE_AES_256:
+ fc = CPACF_KM_PAES_256;
+ break;
+ default:
+ fc = 0;
+ break;
+ }
ctx->fc = (fc && cpacf_test_func(&km_functions, fc)) ? fc : 0;
- return ctx->fc ? 0 : -EINVAL;
+ rc = fc ? 0 : -EINVAL;
+
+out:
+ pr_debug("rc=%d\n", rc);
+ return rc;
}
-static int ecb_paes_set_key(struct crypto_skcipher *tfm, const u8 *in_key,
- unsigned int key_len)
+static int ecb_paes_do_crypt(struct s390_paes_ctx *ctx,
+ struct s390_pecb_req_ctx *req_ctx,
+ bool maysleep)
{
- struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm);
- int rc;
-
- _free_kb_keybuf(&ctx->kb);
- rc = _key_to_kb(&ctx->kb, in_key, key_len);
+ struct ecb_param *param = &req_ctx->param;
+ struct skcipher_walk *walk = &req_ctx->walk;
+ unsigned int nbytes, n, k;
+ int pk_state, rc = 0;
+
+ if (!req_ctx->param_init_done) {
+ /* fetch and check protected key state */
+ spin_lock_bh(&ctx->pk_lock);
+ pk_state = ctx->pk_state;
+ switch (pk_state) {
+ case PK_STATE_NO_KEY:
+ rc = -ENOKEY;
+ break;
+ case PK_STATE_CONVERT_IN_PROGRESS:
+ rc = -EKEYEXPIRED;
+ break;
+ case PK_STATE_VALID:
+ memcpy(param->key, ctx->pk.protkey, sizeof(param->key));
+ req_ctx->param_init_done = true;
+ break;
+ default:
+ rc = pk_state < 0 ? pk_state : -EIO;
+ break;
+ }
+ spin_unlock_bh(&ctx->pk_lock);
+ }
if (rc)
- return rc;
+ goto out;
- return __ecb_paes_set_key(ctx);
+ /*
+ * Note that in case of partial processing or failure the walk
+ * is NOT unmapped here. So a follow up task may reuse the walk
+ * or in case of unrecoverable failure needs to unmap it.
+ */
+ while ((nbytes = walk->nbytes) != 0) {
+ /* only use complete blocks */
+ n = nbytes & ~(AES_BLOCK_SIZE - 1);
+ k = cpacf_km(ctx->fc | req_ctx->modifier, param,
+ walk->dst.virt.addr, walk->src.virt.addr, n);
+ if (k)
+ rc = skcipher_walk_done(walk, nbytes - k);
+ if (k < n) {
+ if (!maysleep) {
+ rc = -EKEYEXPIRED;
+ goto out;
+ }
+ rc = paes_convert_key(ctx);
+ if (rc)
+ goto out;
+ spin_lock_bh(&ctx->pk_lock);
+ memcpy(param->key, ctx->pk.protkey, sizeof(param->key));
+ spin_unlock_bh(&ctx->pk_lock);
+ }
+ }
+
+out:
+ pr_debug("rc=%d\n", rc);
+ return rc;
}
static int ecb_paes_crypt(struct skcipher_request *req, unsigned long modifier)
{
+ struct s390_pecb_req_ctx *req_ctx = skcipher_request_ctx(req);
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm);
- struct {
- u8 key[PAES_256_PROTKEY_SIZE];
- } param;
- struct skcipher_walk walk;
- unsigned int nbytes, n, k;
+ struct skcipher_walk *walk = &req_ctx->walk;
int rc;
- rc = skcipher_walk_virt(&walk, req, false);
+ /*
+ * Attempt synchronous encryption first. If it fails, schedule the request
+ * asynchronously via the crypto engine. To preserve execution order,
+ * once a request is queued to the engine, further requests using the same
+ * tfm will also be routed through the engine.
+ */
+
+ rc = skcipher_walk_virt(walk, req, false);
if (rc)
- return rc;
+ goto out;
- spin_lock_bh(&ctx->pk_lock);
- memcpy(param.key, ctx->pk.protkey, PAES_256_PROTKEY_SIZE);
- spin_unlock_bh(&ctx->pk_lock);
+ req_ctx->modifier = modifier;
+ req_ctx->param_init_done = false;
- while ((nbytes = walk.nbytes) != 0) {
- /* only use complete blocks */
- n = nbytes & ~(AES_BLOCK_SIZE - 1);
- k = cpacf_km(ctx->fc | modifier, &param,
- walk.dst.virt.addr, walk.src.virt.addr, n);
- if (k)
- rc = skcipher_walk_done(&walk, nbytes - k);
- if (k < n) {
- if (__paes_convert_key(ctx))
- return skcipher_walk_done(&walk, -EIO);
- spin_lock_bh(&ctx->pk_lock);
- memcpy(param.key, ctx->pk.protkey, PAES_256_PROTKEY_SIZE);
- spin_unlock_bh(&ctx->pk_lock);
- }
+ /* Try synchronous operation if no active engine usage */
+ if (!atomic_read(&ctx->via_engine_ctr)) {
+ rc = ecb_paes_do_crypt(ctx, req_ctx, false);
+ if (rc == 0)
+ goto out;
+ }
+
+ /*
+ * If sync operation failed or key expired or there are already
+ * requests enqueued via engine, fallback to async. Mark tfm as
+ * using engine to serialize requests.
+ */
+ if (rc == 0 || rc == -EKEYEXPIRED) {
+ atomic_inc(&ctx->via_engine_ctr);
+ rc = crypto_transfer_skcipher_request_to_engine(paes_crypto_engine, req);
+ if (rc != -EINPROGRESS)
+ atomic_dec(&ctx->via_engine_ctr);
}
+
+ if (rc != -EINPROGRESS)
+ skcipher_walk_done(walk, rc);
+
+out:
+ if (rc != -EINPROGRESS)
+ memzero_explicit(&req_ctx->param, sizeof(req_ctx->param));
+ pr_debug("rc=%d\n", rc);
return rc;
}
@@ -310,112 +506,256 @@ static int ecb_paes_decrypt(struct skcipher_request *req)
return ecb_paes_crypt(req, CPACF_DECRYPT);
}
-static struct skcipher_alg ecb_paes_alg = {
- .base.cra_name = "ecb(paes)",
- .base.cra_driver_name = "ecb-paes-s390",
- .base.cra_priority = 401, /* combo: aes + ecb + 1 */
- .base.cra_blocksize = AES_BLOCK_SIZE,
- .base.cra_ctxsize = sizeof(struct s390_paes_ctx),
- .base.cra_module = THIS_MODULE,
- .base.cra_list = LIST_HEAD_INIT(ecb_paes_alg.base.cra_list),
- .init = ecb_paes_init,
- .exit = ecb_paes_exit,
- .min_keysize = PAES_MIN_KEYSIZE,
- .max_keysize = PAES_MAX_KEYSIZE,
- .setkey = ecb_paes_set_key,
- .encrypt = ecb_paes_encrypt,
- .decrypt = ecb_paes_decrypt,
-};
-
-static int cbc_paes_init(struct crypto_skcipher *tfm)
+static int ecb_paes_init(struct crypto_skcipher *tfm)
{
struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm);
- ctx->kb.key = NULL;
+ memset(ctx, 0, sizeof(*ctx));
spin_lock_init(&ctx->pk_lock);
+ crypto_skcipher_set_reqsize(tfm, sizeof(struct s390_pecb_req_ctx));
+
return 0;
}
-static void cbc_paes_exit(struct crypto_skcipher *tfm)
+static void ecb_paes_exit(struct crypto_skcipher *tfm)
{
struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm);
- _free_kb_keybuf(&ctx->kb);
+ memzero_explicit(ctx, sizeof(*ctx));
}
-static inline int __cbc_paes_set_key(struct s390_paes_ctx *ctx)
+static int ecb_paes_do_one_request(struct crypto_engine *engine, void *areq)
{
- unsigned long fc;
+ struct skcipher_request *req = skcipher_request_cast(areq);
+ struct s390_pecb_req_ctx *req_ctx = skcipher_request_ctx(req);
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm);
+ struct skcipher_walk *walk = &req_ctx->walk;
int rc;
- rc = __paes_convert_key(ctx);
- if (rc)
- return rc;
+ /* walk has already been prepared */
+
+ rc = ecb_paes_do_crypt(ctx, req_ctx, true);
+ if (rc == -EKEYEXPIRED) {
+ /*
+ * Protected key expired, conversion is in process.
+ * Trigger a re-schedule of this request by returning
+ * -ENOSPC ("hardware queue is full") to the crypto engine.
+ * To avoid immediately re-invocation of this callback,
+ * tell the scheduler to voluntarily give up the CPU here.
+ */
+ cond_resched();
+ pr_debug("rescheduling request\n");
+ return -ENOSPC;
+ } else if (rc) {
+ skcipher_walk_done(walk, rc);
+ }
- /* Pick the correct function code based on the protected key type */
- fc = (ctx->pk.type == PKEY_KEYTYPE_AES_128) ? CPACF_KMC_PAES_128 :
- (ctx->pk.type == PKEY_KEYTYPE_AES_192) ? CPACF_KMC_PAES_192 :
- (ctx->pk.type == PKEY_KEYTYPE_AES_256) ? CPACF_KMC_PAES_256 : 0;
+ memzero_explicit(&req_ctx->param, sizeof(req_ctx->param));
+ pr_debug("request complete with rc=%d\n", rc);
+ local_bh_disable();
+ atomic_dec(&ctx->via_engine_ctr);
+ crypto_finalize_skcipher_request(engine, req, rc);
+ local_bh_enable();
+ return rc;
+}
- /* Check if the function code is available */
- ctx->fc = (fc && cpacf_test_func(&kmc_functions, fc)) ? fc : 0;
+static struct skcipher_engine_alg ecb_paes_alg = {
+ .base = {
+ .base.cra_name = "ecb(paes)",
+ .base.cra_driver_name = "ecb-paes-s390",
+ .base.cra_priority = 401, /* combo: aes + ecb + 1 */
+ .base.cra_blocksize = AES_BLOCK_SIZE,
+ .base.cra_ctxsize = sizeof(struct s390_paes_ctx),
+ .base.cra_module = THIS_MODULE,
+ .base.cra_list = LIST_HEAD_INIT(ecb_paes_alg.base.base.cra_list),
+ .init = ecb_paes_init,
+ .exit = ecb_paes_exit,
+ .min_keysize = PAES_MIN_KEYSIZE,
+ .max_keysize = PAES_MAX_KEYSIZE,
+ .setkey = ecb_paes_setkey,
+ .encrypt = ecb_paes_encrypt,
+ .decrypt = ecb_paes_decrypt,
+ },
+ .op = {
+ .do_one_request = ecb_paes_do_one_request,
+ },
+};
- return ctx->fc ? 0 : -EINVAL;
-}
+/*
+ * PAES CBC implementation
+ */
+
+struct cbc_param {
+ u8 iv[AES_BLOCK_SIZE];
+ u8 key[PAES_256_PROTKEY_SIZE];
+} __packed;
+
+struct s390_pcbc_req_ctx {
+ unsigned long modifier;
+ struct skcipher_walk walk;
+ bool param_init_done;
+ struct cbc_param param;
+};
-static int cbc_paes_set_key(struct crypto_skcipher *tfm, const u8 *in_key,
- unsigned int key_len)
+static int cbc_paes_setkey(struct crypto_skcipher *tfm, const u8 *in_key,
+ unsigned int key_len)
{
struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm);
+ long fc;
int rc;
- _free_kb_keybuf(&ctx->kb);
- rc = _key_to_kb(&ctx->kb, in_key, key_len);
+ /* set raw key into context */
+ rc = paes_ctx_setkey(ctx, in_key, key_len);
if (rc)
- return rc;
+ goto out;
- return __cbc_paes_set_key(ctx);
+ /* convert raw key into protected key */
+ rc = paes_convert_key(ctx);
+ if (rc)
+ goto out;
+
+ /* Pick the correct function code based on the protected key type */
+ switch (ctx->pk.type) {
+ case PKEY_KEYTYPE_AES_128:
+ fc = CPACF_KMC_PAES_128;
+ break;
+ case PKEY_KEYTYPE_AES_192:
+ fc = CPACF_KMC_PAES_192;
+ break;
+ case PKEY_KEYTYPE_AES_256:
+ fc = CPACF_KMC_PAES_256;
+ break;
+ default:
+ fc = 0;
+ break;
+ }
+ ctx->fc = (fc && cpacf_test_func(&kmc_functions, fc)) ? fc : 0;
+
+ rc = fc ? 0 : -EINVAL;
+
+out:
+ pr_debug("rc=%d\n", rc);
+ return rc;
}
-static int cbc_paes_crypt(struct skcipher_request *req, unsigned long modifier)
+static int cbc_paes_do_crypt(struct s390_paes_ctx *ctx,
+ struct s390_pcbc_req_ctx *req_ctx,
+ bool maysleep)
{
- struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
- struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm);
- struct {
- u8 iv[AES_BLOCK_SIZE];
- u8 key[PAES_256_PROTKEY_SIZE];
- } param;
- struct skcipher_walk walk;
+ struct cbc_param *param = &req_ctx->param;
+ struct skcipher_walk *walk = &req_ctx->walk;
unsigned int nbytes, n, k;
- int rc;
-
- rc = skcipher_walk_virt(&walk, req, false);
+ int pk_state, rc = 0;
+
+ if (!req_ctx->param_init_done) {
+ /* fetch and check protected key state */
+ spin_lock_bh(&ctx->pk_lock);
+ pk_state = ctx->pk_state;
+ switch (pk_state) {
+ case PK_STATE_NO_KEY:
+ rc = -ENOKEY;
+ break;
+ case PK_STATE_CONVERT_IN_PROGRESS:
+ rc = -EKEYEXPIRED;
+ break;
+ case PK_STATE_VALID:
+ memcpy(param->key, ctx->pk.protkey, sizeof(param->key));
+ req_ctx->param_init_done = true;
+ break;
+ default:
+ rc = pk_state < 0 ? pk_state : -EIO;
+ break;
+ }
+ spin_unlock_bh(&ctx->pk_lock);
+ }
if (rc)
- return rc;
+ goto out;
- memcpy(param.iv, walk.iv, AES_BLOCK_SIZE);
- spin_lock_bh(&ctx->pk_lock);
- memcpy(param.key, ctx->pk.protkey, PAES_256_PROTKEY_SIZE);
- spin_unlock_bh(&ctx->pk_lock);
+ memcpy(param->iv, walk->iv, AES_BLOCK_SIZE);
- while ((nbytes = walk.nbytes) != 0) {
+ /*
+ * Note that in case of partial processing or failure the walk
+ * is NOT unmapped here. So a follow up task may reuse the walk
+ * or in case of unrecoverable failure needs to unmap it.
+ */
+ while ((nbytes = walk->nbytes) != 0) {
/* only use complete blocks */
n = nbytes & ~(AES_BLOCK_SIZE - 1);
- k = cpacf_kmc(ctx->fc | modifier, &param,
- walk.dst.virt.addr, walk.src.virt.addr, n);
+ k = cpacf_kmc(ctx->fc | req_ctx->modifier, param,
+ walk->dst.virt.addr, walk->src.virt.addr, n);
if (k) {
- memcpy(walk.iv, param.iv, AES_BLOCK_SIZE);
- rc = skcipher_walk_done(&walk, nbytes - k);
+ memcpy(walk->iv, param->iv, AES_BLOCK_SIZE);
+ rc = skcipher_walk_done(walk, nbytes - k);
}
if (k < n) {
- if (__paes_convert_key(ctx))
- return skcipher_walk_done(&walk, -EIO);
+ if (!maysleep) {
+ rc = -EKEYEXPIRED;
+ goto out;
+ }
+ rc = paes_convert_key(ctx);
+ if (rc)
+ goto out;
spin_lock_bh(&ctx->pk_lock);
- memcpy(param.key, ctx->pk.protkey, PAES_256_PROTKEY_SIZE);
+ memcpy(param->key, ctx->pk.protkey, sizeof(param->key));
spin_unlock_bh(&ctx->pk_lock);
}
}
+
+out:
+ pr_debug("rc=%d\n", rc);
+ return rc;
+}
+
+static int cbc_paes_crypt(struct skcipher_request *req, unsigned long modifier)
+{
+ struct s390_pcbc_req_ctx *req_ctx = skcipher_request_ctx(req);
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm);
+ struct skcipher_walk *walk = &req_ctx->walk;
+ int rc;
+
+ /*
+ * Attempt synchronous encryption first. If it fails, schedule the request
+ * asynchronously via the crypto engine. To preserve execution order,
+ * once a request is queued to the engine, further requests using the same
+ * tfm will also be routed through the engine.
+ */
+
+ rc = skcipher_walk_virt(walk, req, false);
+ if (rc)
+ goto out;
+
+ req_ctx->modifier = modifier;
+ req_ctx->param_init_done = false;
+
+ /* Try synchronous operation if no active engine usage */
+ if (!atomic_read(&ctx->via_engine_ctr)) {
+ rc = cbc_paes_do_crypt(ctx, req_ctx, false);
+ if (rc == 0)
+ goto out;
+ }
+
+ /*
+ * If sync operation failed or key expired or there are already
+ * requests enqueued via engine, fallback to async. Mark tfm as
+ * using engine to serialize requests.
+ */
+ if (rc == 0 || rc == -EKEYEXPIRED) {
+ atomic_inc(&ctx->via_engine_ctr);
+ rc = crypto_transfer_skcipher_request_to_engine(paes_crypto_engine, req);
+ if (rc != -EINPROGRESS)
+ atomic_dec(&ctx->via_engine_ctr);
+ }
+
+ if (rc != -EINPROGRESS)
+ skcipher_walk_done(walk, rc);
+
+out:
+ if (rc != -EINPROGRESS)
+ memzero_explicit(&req_ctx->param, sizeof(req_ctx->param));
+ pr_debug("rc=%d\n", rc);
return rc;
}
@@ -429,496 +769,882 @@ static int cbc_paes_decrypt(struct skcipher_request *req)
return cbc_paes_crypt(req, CPACF_DECRYPT);
}
-static struct skcipher_alg cbc_paes_alg = {
- .base.cra_name = "cbc(paes)",
- .base.cra_driver_name = "cbc-paes-s390",
- .base.cra_priority = 402, /* ecb-paes-s390 + 1 */
- .base.cra_blocksize = AES_BLOCK_SIZE,
- .base.cra_ctxsize = sizeof(struct s390_paes_ctx),
- .base.cra_module = THIS_MODULE,
- .base.cra_list = LIST_HEAD_INIT(cbc_paes_alg.base.cra_list),
- .init = cbc_paes_init,
- .exit = cbc_paes_exit,
- .min_keysize = PAES_MIN_KEYSIZE,
- .max_keysize = PAES_MAX_KEYSIZE,
- .ivsize = AES_BLOCK_SIZE,
- .setkey = cbc_paes_set_key,
- .encrypt = cbc_paes_encrypt,
- .decrypt = cbc_paes_decrypt,
-};
-
-static int xts_paes_init(struct crypto_skcipher *tfm)
+static int cbc_paes_init(struct crypto_skcipher *tfm)
{
- struct s390_pxts_ctx *ctx = crypto_skcipher_ctx(tfm);
+ struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm);
- ctx->kb.key = NULL;
+ memset(ctx, 0, sizeof(*ctx));
spin_lock_init(&ctx->pk_lock);
+ crypto_skcipher_set_reqsize(tfm, sizeof(struct s390_pcbc_req_ctx));
+
return 0;
}
-static void xts_paes_exit(struct crypto_skcipher *tfm)
+static void cbc_paes_exit(struct crypto_skcipher *tfm)
{
- struct s390_pxts_ctx *ctx = crypto_skcipher_ctx(tfm);
+ struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm);
- _free_kb_keybuf(&ctx->kb);
+ memzero_explicit(ctx, sizeof(*ctx));
}
-static inline int __xts_paes_convert_key(struct s390_pxts_ctx *ctx)
+static int cbc_paes_do_one_request(struct crypto_engine *engine, void *areq)
{
- struct paes_protkey pk0, pk1;
- size_t split_keylen;
+ struct skcipher_request *req = skcipher_request_cast(areq);
+ struct s390_pcbc_req_ctx *req_ctx = skcipher_request_ctx(req);
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm);
+ struct skcipher_walk *walk = &req_ctx->walk;
int rc;
- pk0.len = sizeof(pk0.protkey);
- pk1.len = sizeof(pk1.protkey);
-
- rc = __paes_keyblob2pkey(ctx->kb.key, ctx->kb.keylen, &pk0);
- if (rc)
- return rc;
+ /* walk has already been prepared */
+
+ rc = cbc_paes_do_crypt(ctx, req_ctx, true);
+ if (rc == -EKEYEXPIRED) {
+ /*
+ * Protected key expired, conversion is in process.
+ * Trigger a re-schedule of this request by returning
+ * -ENOSPC ("hardware queue is full") to the crypto engine.
+ * To avoid immediately re-invocation of this callback,
+ * tell the scheduler to voluntarily give up the CPU here.
+ */
+ cond_resched();
+ pr_debug("rescheduling request\n");
+ return -ENOSPC;
+ } else if (rc) {
+ skcipher_walk_done(walk, rc);
+ }
- switch (pk0.type) {
- case PKEY_KEYTYPE_AES_128:
- case PKEY_KEYTYPE_AES_256:
- /* second keytoken required */
- if (ctx->kb.keylen % 2)
- return -EINVAL;
- split_keylen = ctx->kb.keylen / 2;
+ memzero_explicit(&req_ctx->param, sizeof(req_ctx->param));
+ pr_debug("request complete with rc=%d\n", rc);
+ local_bh_disable();
+ atomic_dec(&ctx->via_engine_ctr);
+ crypto_finalize_skcipher_request(engine, req, rc);
+ local_bh_enable();
+ return rc;
+}
- rc = __paes_keyblob2pkey(ctx->kb.key + split_keylen,
- split_keylen, &pk1);
- if (rc)
- return rc;
+static struct skcipher_engine_alg cbc_paes_alg = {
+ .base = {
+ .base.cra_name = "cbc(paes)",
+ .base.cra_driver_name = "cbc-paes-s390",
+ .base.cra_priority = 402, /* cbc-paes-s390 + 1 */
+ .base.cra_blocksize = AES_BLOCK_SIZE,
+ .base.cra_ctxsize = sizeof(struct s390_paes_ctx),
+ .base.cra_module = THIS_MODULE,
+ .base.cra_list = LIST_HEAD_INIT(cbc_paes_alg.base.base.cra_list),
+ .init = cbc_paes_init,
+ .exit = cbc_paes_exit,
+ .min_keysize = PAES_MIN_KEYSIZE,
+ .max_keysize = PAES_MAX_KEYSIZE,
+ .ivsize = AES_BLOCK_SIZE,
+ .setkey = cbc_paes_setkey,
+ .encrypt = cbc_paes_encrypt,
+ .decrypt = cbc_paes_decrypt,
+ },
+ .op = {
+ .do_one_request = cbc_paes_do_one_request,
+ },
+};
- if (pk0.type != pk1.type)
- return -EINVAL;
- break;
- case PKEY_KEYTYPE_AES_XTS_128:
- case PKEY_KEYTYPE_AES_XTS_256:
- /* single key */
- pk1.type = 0;
- break;
- default:
- /* unsupported protected keytype */
- return -EINVAL;
- }
+/*
+ * PAES CTR implementation
+ */
- spin_lock_bh(&ctx->pk_lock);
- ctx->pk[0] = pk0;
- ctx->pk[1] = pk1;
- spin_unlock_bh(&ctx->pk_lock);
+struct ctr_param {
+ u8 key[PAES_256_PROTKEY_SIZE];
+} __packed;
- return 0;
-}
+struct s390_pctr_req_ctx {
+ unsigned long modifier;
+ struct skcipher_walk walk;
+ bool param_init_done;
+ struct ctr_param param;
+};
-static inline int __xts_paes_set_key(struct s390_pxts_ctx *ctx)
+static int ctr_paes_setkey(struct crypto_skcipher *tfm, const u8 *in_key,
+ unsigned int key_len)
{
- unsigned long fc;
+ struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm);
+ long fc;
int rc;
- rc = __xts_paes_convert_key(ctx);
+ /* set raw key into context */
+ rc = paes_ctx_setkey(ctx, in_key, key_len);
if (rc)
- return rc;
+ goto out;
+
+ /* convert raw key into protected key */
+ rc = paes_convert_key(ctx);
+ if (rc)
+ goto out;
/* Pick the correct function code based on the protected key type */
- switch (ctx->pk[0].type) {
+ switch (ctx->pk.type) {
case PKEY_KEYTYPE_AES_128:
- fc = CPACF_KM_PXTS_128;
- break;
- case PKEY_KEYTYPE_AES_256:
- fc = CPACF_KM_PXTS_256;
+ fc = CPACF_KMCTR_PAES_128;
break;
- case PKEY_KEYTYPE_AES_XTS_128:
- fc = CPACF_KM_PXTS_128_FULL;
+ case PKEY_KEYTYPE_AES_192:
+ fc = CPACF_KMCTR_PAES_192;
break;
- case PKEY_KEYTYPE_AES_XTS_256:
- fc = CPACF_KM_PXTS_256_FULL;
+ case PKEY_KEYTYPE_AES_256:
+ fc = CPACF_KMCTR_PAES_256;
break;
default:
fc = 0;
break;
}
+ ctx->fc = (fc && cpacf_test_func(&kmctr_functions, fc)) ? fc : 0;
- /* Check if the function code is available */
- ctx->fc = (fc && cpacf_test_func(&km_functions, fc)) ? fc : 0;
+ rc = fc ? 0 : -EINVAL;
+
+out:
+ pr_debug("rc=%d\n", rc);
+ return rc;
+}
+
+static inline unsigned int __ctrblk_init(u8 *ctrptr, u8 *iv, unsigned int nbytes)
+{
+ unsigned int i, n;
+
+ /* only use complete blocks, max. PAGE_SIZE */
+ memcpy(ctrptr, iv, AES_BLOCK_SIZE);
+ n = (nbytes > PAGE_SIZE) ? PAGE_SIZE : nbytes & ~(AES_BLOCK_SIZE - 1);
+ for (i = (n / AES_BLOCK_SIZE) - 1; i > 0; i--) {
+ memcpy(ctrptr + AES_BLOCK_SIZE, ctrptr, AES_BLOCK_SIZE);
+ crypto_inc(ctrptr + AES_BLOCK_SIZE, AES_BLOCK_SIZE);
+ ctrptr += AES_BLOCK_SIZE;
+ }
+ return n;
+}
+
+static int ctr_paes_do_crypt(struct s390_paes_ctx *ctx,
+ struct s390_pctr_req_ctx *req_ctx,
+ bool maysleep)
+{
+ struct ctr_param *param = &req_ctx->param;
+ struct skcipher_walk *walk = &req_ctx->walk;
+ u8 buf[AES_BLOCK_SIZE], *ctrptr;
+ unsigned int nbytes, n, k;
+ int pk_state, locked, rc = 0;
+
+ if (!req_ctx->param_init_done) {
+ /* fetch and check protected key state */
+ spin_lock_bh(&ctx->pk_lock);
+ pk_state = ctx->pk_state;
+ switch (pk_state) {
+ case PK_STATE_NO_KEY:
+ rc = -ENOKEY;
+ break;
+ case PK_STATE_CONVERT_IN_PROGRESS:
+ rc = -EKEYEXPIRED;
+ break;
+ case PK_STATE_VALID:
+ memcpy(param->key, ctx->pk.protkey, sizeof(param->key));
+ req_ctx->param_init_done = true;
+ break;
+ default:
+ rc = pk_state < 0 ? pk_state : -EIO;
+ break;
+ }
+ spin_unlock_bh(&ctx->pk_lock);
+ }
+ if (rc)
+ goto out;
+
+ locked = mutex_trylock(&ctrblk_lock);
+
+ /*
+ * Note that in case of partial processing or failure the walk
+ * is NOT unmapped here. So a follow up task may reuse the walk
+ * or in case of unrecoverable failure needs to unmap it.
+ */
+ while ((nbytes = walk->nbytes) >= AES_BLOCK_SIZE) {
+ n = AES_BLOCK_SIZE;
+ if (nbytes >= 2 * AES_BLOCK_SIZE && locked)
+ n = __ctrblk_init(ctrblk, walk->iv, nbytes);
+ ctrptr = (n > AES_BLOCK_SIZE) ? ctrblk : walk->iv;
+ k = cpacf_kmctr(ctx->fc, param, walk->dst.virt.addr,
+ walk->src.virt.addr, n, ctrptr);
+ if (k) {
+ if (ctrptr == ctrblk)
+ memcpy(walk->iv, ctrptr + k - AES_BLOCK_SIZE,
+ AES_BLOCK_SIZE);
+ crypto_inc(walk->iv, AES_BLOCK_SIZE);
+ rc = skcipher_walk_done(walk, nbytes - k);
+ }
+ if (k < n) {
+ if (!maysleep) {
+ if (locked)
+ mutex_unlock(&ctrblk_lock);
+ rc = -EKEYEXPIRED;
+ goto out;
+ }
+ rc = paes_convert_key(ctx);
+ if (rc) {
+ if (locked)
+ mutex_unlock(&ctrblk_lock);
+ goto out;
+ }
+ spin_lock_bh(&ctx->pk_lock);
+ memcpy(param->key, ctx->pk.protkey, sizeof(param->key));
+ spin_unlock_bh(&ctx->pk_lock);
+ }
+ }
+ if (locked)
+ mutex_unlock(&ctrblk_lock);
+
+ /* final block may be < AES_BLOCK_SIZE, copy only nbytes */
+ if (nbytes) {
+ memset(buf, 0, AES_BLOCK_SIZE);
+ memcpy(buf, walk->src.virt.addr, nbytes);
+ while (1) {
+ if (cpacf_kmctr(ctx->fc, param, buf,
+ buf, AES_BLOCK_SIZE,
+ walk->iv) == AES_BLOCK_SIZE)
+ break;
+ if (!maysleep) {
+ rc = -EKEYEXPIRED;
+ goto out;
+ }
+ rc = paes_convert_key(ctx);
+ if (rc)
+ goto out;
+ spin_lock_bh(&ctx->pk_lock);
+ memcpy(param->key, ctx->pk.protkey, sizeof(param->key));
+ spin_unlock_bh(&ctx->pk_lock);
+ }
+ memcpy(walk->dst.virt.addr, buf, nbytes);
+ crypto_inc(walk->iv, AES_BLOCK_SIZE);
+ rc = skcipher_walk_done(walk, 0);
+ }
+
+out:
+ pr_debug("rc=%d\n", rc);
+ return rc;
+}
+
+static int ctr_paes_crypt(struct skcipher_request *req)
+{
+ struct s390_pctr_req_ctx *req_ctx = skcipher_request_ctx(req);
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm);
+ struct skcipher_walk *walk = &req_ctx->walk;
+ int rc;
+
+ /*
+ * Attempt synchronous encryption first. If it fails, schedule the request
+ * asynchronously via the crypto engine. To preserve execution order,
+ * once a request is queued to the engine, further requests using the same
+ * tfm will also be routed through the engine.
+ */
+
+ rc = skcipher_walk_virt(walk, req, false);
+ if (rc)
+ goto out;
+
+ req_ctx->param_init_done = false;
+
+ /* Try synchronous operation if no active engine usage */
+ if (!atomic_read(&ctx->via_engine_ctr)) {
+ rc = ctr_paes_do_crypt(ctx, req_ctx, false);
+ if (rc == 0)
+ goto out;
+ }
+
+ /*
+ * If sync operation failed or key expired or there are already
+ * requests enqueued via engine, fallback to async. Mark tfm as
+ * using engine to serialize requests.
+ */
+ if (rc == 0 || rc == -EKEYEXPIRED) {
+ atomic_inc(&ctx->via_engine_ctr);
+ rc = crypto_transfer_skcipher_request_to_engine(paes_crypto_engine, req);
+ if (rc != -EINPROGRESS)
+ atomic_dec(&ctx->via_engine_ctr);
+ }
+
+ if (rc != -EINPROGRESS)
+ skcipher_walk_done(walk, rc);
+
+out:
+ if (rc != -EINPROGRESS)
+ memzero_explicit(&req_ctx->param, sizeof(req_ctx->param));
+ pr_debug("rc=%d\n", rc);
+ return rc;
+}
+
+static int ctr_paes_init(struct crypto_skcipher *tfm)
+{
+ struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm);
+
+ memset(ctx, 0, sizeof(*ctx));
+ spin_lock_init(&ctx->pk_lock);
+
+ crypto_skcipher_set_reqsize(tfm, sizeof(struct s390_pctr_req_ctx));
+
+ return 0;
+}
+
+static void ctr_paes_exit(struct crypto_skcipher *tfm)
+{
+ struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm);
+
+ memzero_explicit(ctx, sizeof(*ctx));
+}
+
+static int ctr_paes_do_one_request(struct crypto_engine *engine, void *areq)
+{
+ struct skcipher_request *req = skcipher_request_cast(areq);
+ struct s390_pctr_req_ctx *req_ctx = skcipher_request_ctx(req);
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm);
+ struct skcipher_walk *walk = &req_ctx->walk;
+ int rc;
- return ctx->fc ? 0 : -EINVAL;
+ /* walk has already been prepared */
+
+ rc = ctr_paes_do_crypt(ctx, req_ctx, true);
+ if (rc == -EKEYEXPIRED) {
+ /*
+ * Protected key expired, conversion is in process.
+ * Trigger a re-schedule of this request by returning
+ * -ENOSPC ("hardware queue is full") to the crypto engine.
+ * To avoid immediately re-invocation of this callback,
+ * tell the scheduler to voluntarily give up the CPU here.
+ */
+ cond_resched();
+ pr_debug("rescheduling request\n");
+ return -ENOSPC;
+ } else if (rc) {
+ skcipher_walk_done(walk, rc);
+ }
+
+ memzero_explicit(&req_ctx->param, sizeof(req_ctx->param));
+ pr_debug("request complete with rc=%d\n", rc);
+ local_bh_disable();
+ atomic_dec(&ctx->via_engine_ctr);
+ crypto_finalize_skcipher_request(engine, req, rc);
+ local_bh_enable();
+ return rc;
}
-static int xts_paes_set_key(struct crypto_skcipher *tfm, const u8 *in_key,
- unsigned int in_keylen)
+static struct skcipher_engine_alg ctr_paes_alg = {
+ .base = {
+ .base.cra_name = "ctr(paes)",
+ .base.cra_driver_name = "ctr-paes-s390",
+ .base.cra_priority = 402, /* ecb-paes-s390 + 1 */
+ .base.cra_blocksize = 1,
+ .base.cra_ctxsize = sizeof(struct s390_paes_ctx),
+ .base.cra_module = THIS_MODULE,
+ .base.cra_list = LIST_HEAD_INIT(ctr_paes_alg.base.base.cra_list),
+ .init = ctr_paes_init,
+ .exit = ctr_paes_exit,
+ .min_keysize = PAES_MIN_KEYSIZE,
+ .max_keysize = PAES_MAX_KEYSIZE,
+ .ivsize = AES_BLOCK_SIZE,
+ .setkey = ctr_paes_setkey,
+ .encrypt = ctr_paes_crypt,
+ .decrypt = ctr_paes_crypt,
+ .chunksize = AES_BLOCK_SIZE,
+ },
+ .op = {
+ .do_one_request = ctr_paes_do_one_request,
+ },
+};
+
+/*
+ * PAES XTS implementation
+ */
+
+struct xts_full_km_param {
+ u8 key[64];
+ u8 tweak[16];
+ u8 nap[16];
+ u8 wkvp[32];
+} __packed;
+
+struct xts_km_param {
+ u8 key[PAES_256_PROTKEY_SIZE];
+ u8 init[16];
+} __packed;
+
+struct xts_pcc_param {
+ u8 key[PAES_256_PROTKEY_SIZE];
+ u8 tweak[16];
+ u8 block[16];
+ u8 bit[16];
+ u8 xts[16];
+} __packed;
+
+struct s390_pxts_req_ctx {
+ unsigned long modifier;
+ struct skcipher_walk walk;
+ bool param_init_done;
+ union {
+ struct xts_full_km_param full_km_param;
+ struct xts_km_param km_param;
+ } param;
+};
+
+static int xts_paes_setkey(struct crypto_skcipher *tfm, const u8 *in_key,
+ unsigned int in_keylen)
{
struct s390_pxts_ctx *ctx = crypto_skcipher_ctx(tfm);
u8 ckey[2 * AES_MAX_KEY_SIZE];
unsigned int ckey_len;
+ long fc;
int rc;
if ((in_keylen == 32 || in_keylen == 64) &&
xts_verify_key(tfm, in_key, in_keylen))
return -EINVAL;
- _free_kb_keybuf(&ctx->kb);
- rc = _xts_key_to_kb(&ctx->kb, in_key, in_keylen);
+ /* set raw key into context */
+ rc = pxts_ctx_setkey(ctx, in_key, in_keylen);
if (rc)
- return rc;
+ goto out;
- rc = __xts_paes_set_key(ctx);
+ /* convert raw key(s) into protected key(s) */
+ rc = pxts_convert_key(ctx);
if (rc)
- return rc;
+ goto out;
/*
- * It is not possible on a single protected key (e.g. full AES-XTS) to
- * check, if k1 and k2 are the same.
- */
- if (ctx->pk[0].type == PKEY_KEYTYPE_AES_XTS_128 ||
- ctx->pk[0].type == PKEY_KEYTYPE_AES_XTS_256)
- return 0;
- /*
* xts_verify_key verifies the key length is not odd and makes
* sure that the two keys are not the same. This can be done
- * on the two protected keys as well
+ * on the two protected keys as well - but not for full xts keys.
*/
- ckey_len = (ctx->pk[0].type == PKEY_KEYTYPE_AES_128) ?
- AES_KEYSIZE_128 : AES_KEYSIZE_256;
- memcpy(ckey, ctx->pk[0].protkey, ckey_len);
- memcpy(ckey + ckey_len, ctx->pk[1].protkey, ckey_len);
- return xts_verify_key(tfm, ckey, 2*ckey_len);
+ if (ctx->pk[0].type == PKEY_KEYTYPE_AES_128 ||
+ ctx->pk[0].type == PKEY_KEYTYPE_AES_256) {
+ ckey_len = (ctx->pk[0].type == PKEY_KEYTYPE_AES_128) ?
+ AES_KEYSIZE_128 : AES_KEYSIZE_256;
+ memcpy(ckey, ctx->pk[0].protkey, ckey_len);
+ memcpy(ckey + ckey_len, ctx->pk[1].protkey, ckey_len);
+ rc = xts_verify_key(tfm, ckey, 2 * ckey_len);
+ memzero_explicit(ckey, sizeof(ckey));
+ if (rc)
+ goto out;
+ }
+
+ /* Pick the correct function code based on the protected key type */
+ switch (ctx->pk[0].type) {
+ case PKEY_KEYTYPE_AES_128:
+ fc = CPACF_KM_PXTS_128;
+ break;
+ case PKEY_KEYTYPE_AES_256:
+ fc = CPACF_KM_PXTS_256;
+ break;
+ case PKEY_KEYTYPE_AES_XTS_128:
+ fc = CPACF_KM_PXTS_128_FULL;
+ break;
+ case PKEY_KEYTYPE_AES_XTS_256:
+ fc = CPACF_KM_PXTS_256_FULL;
+ break;
+ default:
+ fc = 0;
+ break;
+ }
+ ctx->fc = (fc && cpacf_test_func(&km_functions, fc)) ? fc : 0;
+
+ rc = fc ? 0 : -EINVAL;
+
+out:
+ pr_debug("rc=%d\n", rc);
+ return rc;
}
-static int paes_xts_crypt_full(struct skcipher_request *req,
- unsigned long modifier)
+static int xts_paes_do_crypt_fullkey(struct s390_pxts_ctx *ctx,
+ struct s390_pxts_req_ctx *req_ctx,
+ bool maysleep)
{
- struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
- struct s390_pxts_ctx *ctx = crypto_skcipher_ctx(tfm);
+ struct xts_full_km_param *param = &req_ctx->param.full_km_param;
+ struct skcipher_walk *walk = &req_ctx->walk;
unsigned int keylen, offset, nbytes, n, k;
- struct {
- u8 key[64];
- u8 tweak[16];
- u8 nap[16];
- u8 wkvp[32];
- } fxts_param = {
- .nap = {0},
- };
- struct skcipher_walk walk;
- int rc;
+ int rc = 0;
- rc = skcipher_walk_virt(&walk, req, false);
- if (rc)
- return rc;
+ /*
+ * The calling function xts_paes_do_crypt() ensures the
+ * protected key state is always PK_STATE_VALID when this
+ * function is invoked.
+ */
keylen = (ctx->pk[0].type == PKEY_KEYTYPE_AES_XTS_128) ? 32 : 64;
offset = (ctx->pk[0].type == PKEY_KEYTYPE_AES_XTS_128) ? 32 : 0;
- spin_lock_bh(&ctx->pk_lock);
- memcpy(fxts_param.key + offset, ctx->pk[0].protkey, keylen);
- memcpy(fxts_param.wkvp, ctx->pk[0].protkey + keylen,
- sizeof(fxts_param.wkvp));
- spin_unlock_bh(&ctx->pk_lock);
- memcpy(fxts_param.tweak, walk.iv, sizeof(fxts_param.tweak));
- fxts_param.nap[0] = 0x01; /* initial alpha power (1, little-endian) */
+ if (!req_ctx->param_init_done) {
+ memset(param, 0, sizeof(*param));
+ spin_lock_bh(&ctx->pk_lock);
+ memcpy(param->key + offset, ctx->pk[0].protkey, keylen);
+ memcpy(param->wkvp, ctx->pk[0].protkey + keylen, sizeof(param->wkvp));
+ spin_unlock_bh(&ctx->pk_lock);
+ memcpy(param->tweak, walk->iv, sizeof(param->tweak));
+ param->nap[0] = 0x01; /* initial alpha power (1, little-endian) */
+ req_ctx->param_init_done = true;
+ }
- while ((nbytes = walk.nbytes) != 0) {
+ /*
+ * Note that in case of partial processing or failure the walk
+ * is NOT unmapped here. So a follow up task may reuse the walk
+ * or in case of unrecoverable failure needs to unmap it.
+ */
+ while ((nbytes = walk->nbytes) != 0) {
/* only use complete blocks */
n = nbytes & ~(AES_BLOCK_SIZE - 1);
- k = cpacf_km(ctx->fc | modifier, fxts_param.key + offset,
- walk.dst.virt.addr, walk.src.virt.addr, n);
+ k = cpacf_km(ctx->fc | req_ctx->modifier, param->key + offset,
+ walk->dst.virt.addr, walk->src.virt.addr, n);
if (k)
- rc = skcipher_walk_done(&walk, nbytes - k);
+ rc = skcipher_walk_done(walk, nbytes - k);
if (k < n) {
- if (__xts_paes_convert_key(ctx))
- return skcipher_walk_done(&walk, -EIO);
+ if (!maysleep) {
+ rc = -EKEYEXPIRED;
+ goto out;
+ }
+ rc = pxts_convert_key(ctx);
+ if (rc)
+ goto out;
spin_lock_bh(&ctx->pk_lock);
- memcpy(fxts_param.key + offset, ctx->pk[0].protkey,
- keylen);
- memcpy(fxts_param.wkvp, ctx->pk[0].protkey + keylen,
- sizeof(fxts_param.wkvp));
+ memcpy(param->key + offset, ctx->pk[0].protkey, keylen);
+ memcpy(param->wkvp, ctx->pk[0].protkey + keylen, sizeof(param->wkvp));
spin_unlock_bh(&ctx->pk_lock);
}
}
+out:
+ pr_debug("rc=%d\n", rc);
return rc;
}
-static int paes_xts_crypt(struct skcipher_request *req, unsigned long modifier)
+static inline int __xts_2keys_prep_param(struct s390_pxts_ctx *ctx,
+ struct xts_km_param *param,
+ struct skcipher_walk *walk,
+ unsigned int keylen,
+ unsigned int offset, bool maysleep)
{
- struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
- struct s390_pxts_ctx *ctx = crypto_skcipher_ctx(tfm);
+ struct xts_pcc_param pcc_param;
+ unsigned long cc = 1;
+ int rc = 0;
+
+ while (cc) {
+ memset(&pcc_param, 0, sizeof(pcc_param));
+ memcpy(pcc_param.tweak, walk->iv, sizeof(pcc_param.tweak));
+ spin_lock_bh(&ctx->pk_lock);
+ memcpy(pcc_param.key + offset, ctx->pk[1].protkey, keylen);
+ memcpy(param->key + offset, ctx->pk[0].protkey, keylen);
+ spin_unlock_bh(&ctx->pk_lock);
+ cc = cpacf_pcc(ctx->fc, pcc_param.key + offset);
+ if (cc) {
+ if (!maysleep) {
+ rc = -EKEYEXPIRED;
+ break;
+ }
+ rc = pxts_convert_key(ctx);
+ if (rc)
+ break;
+ continue;
+ }
+ memcpy(param->init, pcc_param.xts, 16);
+ }
+
+ memzero_explicit(pcc_param.key, sizeof(pcc_param.key));
+ return rc;
+}
+
+static int xts_paes_do_crypt_2keys(struct s390_pxts_ctx *ctx,
+ struct s390_pxts_req_ctx *req_ctx,
+ bool maysleep)
+{
+ struct xts_km_param *param = &req_ctx->param.km_param;
+ struct skcipher_walk *walk = &req_ctx->walk;
unsigned int keylen, offset, nbytes, n, k;
- struct {
- u8 key[PAES_256_PROTKEY_SIZE];
- u8 tweak[16];
- u8 block[16];
- u8 bit[16];
- u8 xts[16];
- } pcc_param;
- struct {
- u8 key[PAES_256_PROTKEY_SIZE];
- u8 init[16];
- } xts_param;
- struct skcipher_walk walk;
- int rc;
+ int rc = 0;
- rc = skcipher_walk_virt(&walk, req, false);
- if (rc)
- return rc;
+ /*
+ * The calling function xts_paes_do_crypt() ensures the
+ * protected key state is always PK_STATE_VALID when this
+ * function is invoked.
+ */
keylen = (ctx->pk[0].type == PKEY_KEYTYPE_AES_128) ? 48 : 64;
offset = (ctx->pk[0].type == PKEY_KEYTYPE_AES_128) ? 16 : 0;
- memset(&pcc_param, 0, sizeof(pcc_param));
- memcpy(pcc_param.tweak, walk.iv, sizeof(pcc_param.tweak));
- spin_lock_bh(&ctx->pk_lock);
- memcpy(pcc_param.key + offset, ctx->pk[1].protkey, keylen);
- memcpy(xts_param.key + offset, ctx->pk[0].protkey, keylen);
- spin_unlock_bh(&ctx->pk_lock);
- cpacf_pcc(ctx->fc, pcc_param.key + offset);
- memcpy(xts_param.init, pcc_param.xts, 16);
+ if (!req_ctx->param_init_done) {
+ rc = __xts_2keys_prep_param(ctx, param, walk,
+ keylen, offset, maysleep);
+ if (rc)
+ goto out;
+ req_ctx->param_init_done = true;
+ }
- while ((nbytes = walk.nbytes) != 0) {
+ /*
+ * Note that in case of partial processing or failure the walk
+ * is NOT unmapped here. So a follow up task may reuse the walk
+ * or in case of unrecoverable failure needs to unmap it.
+ */
+ while ((nbytes = walk->nbytes) != 0) {
/* only use complete blocks */
n = nbytes & ~(AES_BLOCK_SIZE - 1);
- k = cpacf_km(ctx->fc | modifier, xts_param.key + offset,
- walk.dst.virt.addr, walk.src.virt.addr, n);
+ k = cpacf_km(ctx->fc | req_ctx->modifier, param->key + offset,
+ walk->dst.virt.addr, walk->src.virt.addr, n);
if (k)
- rc = skcipher_walk_done(&walk, nbytes - k);
+ rc = skcipher_walk_done(walk, nbytes - k);
if (k < n) {
- if (__xts_paes_convert_key(ctx))
- return skcipher_walk_done(&walk, -EIO);
+ if (!maysleep) {
+ rc = -EKEYEXPIRED;
+ goto out;
+ }
+ rc = pxts_convert_key(ctx);
+ if (rc)
+ goto out;
spin_lock_bh(&ctx->pk_lock);
- memcpy(xts_param.key + offset,
- ctx->pk[0].protkey, keylen);
+ memcpy(param->key + offset, ctx->pk[0].protkey, keylen);
spin_unlock_bh(&ctx->pk_lock);
}
}
+out:
+ pr_debug("rc=%d\n", rc);
return rc;
}
-static inline int xts_paes_crypt(struct skcipher_request *req, unsigned long modifier)
+static int xts_paes_do_crypt(struct s390_pxts_ctx *ctx,
+ struct s390_pxts_req_ctx *req_ctx,
+ bool maysleep)
{
- struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
- struct s390_pxts_ctx *ctx = crypto_skcipher_ctx(tfm);
+ int pk_state, rc = 0;
+
+ /* fetch and check protected key state */
+ spin_lock_bh(&ctx->pk_lock);
+ pk_state = ctx->pk_state;
+ switch (pk_state) {
+ case PK_STATE_NO_KEY:
+ rc = -ENOKEY;
+ break;
+ case PK_STATE_CONVERT_IN_PROGRESS:
+ rc = -EKEYEXPIRED;
+ break;
+ case PK_STATE_VALID:
+ break;
+ default:
+ rc = pk_state < 0 ? pk_state : -EIO;
+ break;
+ }
+ spin_unlock_bh(&ctx->pk_lock);
+ if (rc)
+ goto out;
+ /* Call the 'real' crypt function based on the xts prot key type. */
switch (ctx->fc) {
case CPACF_KM_PXTS_128:
case CPACF_KM_PXTS_256:
- return paes_xts_crypt(req, modifier);
+ rc = xts_paes_do_crypt_2keys(ctx, req_ctx, maysleep);
+ break;
case CPACF_KM_PXTS_128_FULL:
case CPACF_KM_PXTS_256_FULL:
- return paes_xts_crypt_full(req, modifier);
+ rc = xts_paes_do_crypt_fullkey(ctx, req_ctx, maysleep);
+ break;
default:
- return -EINVAL;
+ rc = -EINVAL;
}
-}
-static int xts_paes_encrypt(struct skcipher_request *req)
-{
- return xts_paes_crypt(req, 0);
+out:
+ pr_debug("rc=%d\n", rc);
+ return rc;
}
-static int xts_paes_decrypt(struct skcipher_request *req)
+static inline int xts_paes_crypt(struct skcipher_request *req, unsigned long modifier)
{
- return xts_paes_crypt(req, CPACF_DECRYPT);
-}
+ struct s390_pxts_req_ctx *req_ctx = skcipher_request_ctx(req);
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ struct s390_pxts_ctx *ctx = crypto_skcipher_ctx(tfm);
+ struct skcipher_walk *walk = &req_ctx->walk;
+ int rc;
-static struct skcipher_alg xts_paes_alg = {
- .base.cra_name = "xts(paes)",
- .base.cra_driver_name = "xts-paes-s390",
- .base.cra_priority = 402, /* ecb-paes-s390 + 1 */
- .base.cra_blocksize = AES_BLOCK_SIZE,
- .base.cra_ctxsize = sizeof(struct s390_pxts_ctx),
- .base.cra_module = THIS_MODULE,
- .base.cra_list = LIST_HEAD_INIT(xts_paes_alg.base.cra_list),
- .init = xts_paes_init,
- .exit = xts_paes_exit,
- .min_keysize = 2 * PAES_MIN_KEYSIZE,
- .max_keysize = 2 * PAES_MAX_KEYSIZE,
- .ivsize = AES_BLOCK_SIZE,
- .setkey = xts_paes_set_key,
- .encrypt = xts_paes_encrypt,
- .decrypt = xts_paes_decrypt,
-};
+ /*
+ * Attempt synchronous encryption first. If it fails, schedule the request
+ * asynchronously via the crypto engine. To preserve execution order,
+ * once a request is queued to the engine, further requests using the same
+ * tfm will also be routed through the engine.
+ */
-static int ctr_paes_init(struct crypto_skcipher *tfm)
-{
- struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm);
+ rc = skcipher_walk_virt(walk, req, false);
+ if (rc)
+ goto out;
- ctx->kb.key = NULL;
- spin_lock_init(&ctx->pk_lock);
+ req_ctx->modifier = modifier;
+ req_ctx->param_init_done = false;
- return 0;
-}
+ /* Try synchronous operation if no active engine usage */
+ if (!atomic_read(&ctx->via_engine_ctr)) {
+ rc = xts_paes_do_crypt(ctx, req_ctx, false);
+ if (rc == 0)
+ goto out;
+ }
-static void ctr_paes_exit(struct crypto_skcipher *tfm)
-{
- struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm);
+ /*
+ * If sync operation failed or key expired or there are already
+ * requests enqueued via engine, fallback to async. Mark tfm as
+ * using engine to serialize requests.
+ */
+ if (rc == 0 || rc == -EKEYEXPIRED) {
+ atomic_inc(&ctx->via_engine_ctr);
+ rc = crypto_transfer_skcipher_request_to_engine(paes_crypto_engine, req);
+ if (rc != -EINPROGRESS)
+ atomic_dec(&ctx->via_engine_ctr);
+ }
+
+ if (rc != -EINPROGRESS)
+ skcipher_walk_done(walk, rc);
- _free_kb_keybuf(&ctx->kb);
+out:
+ if (rc != -EINPROGRESS)
+ memzero_explicit(&req_ctx->param, sizeof(req_ctx->param));
+ pr_debug("rc=%d\n", rc);
+ return rc;
}
-static inline int __ctr_paes_set_key(struct s390_paes_ctx *ctx)
+static int xts_paes_encrypt(struct skcipher_request *req)
{
- unsigned long fc;
- int rc;
-
- rc = __paes_convert_key(ctx);
- if (rc)
- return rc;
-
- /* Pick the correct function code based on the protected key type */
- fc = (ctx->pk.type == PKEY_KEYTYPE_AES_128) ? CPACF_KMCTR_PAES_128 :
- (ctx->pk.type == PKEY_KEYTYPE_AES_192) ? CPACF_KMCTR_PAES_192 :
- (ctx->pk.type == PKEY_KEYTYPE_AES_256) ?
- CPACF_KMCTR_PAES_256 : 0;
-
- /* Check if the function code is available */
- ctx->fc = (fc && cpacf_test_func(&kmctr_functions, fc)) ? fc : 0;
+ return xts_paes_crypt(req, 0);
+}
- return ctx->fc ? 0 : -EINVAL;
+static int xts_paes_decrypt(struct skcipher_request *req)
+{
+ return xts_paes_crypt(req, CPACF_DECRYPT);
}
-static int ctr_paes_set_key(struct crypto_skcipher *tfm, const u8 *in_key,
- unsigned int key_len)
+static int xts_paes_init(struct crypto_skcipher *tfm)
{
- struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm);
- int rc;
+ struct s390_pxts_ctx *ctx = crypto_skcipher_ctx(tfm);
- _free_kb_keybuf(&ctx->kb);
- rc = _key_to_kb(&ctx->kb, in_key, key_len);
- if (rc)
- return rc;
+ memset(ctx, 0, sizeof(*ctx));
+ spin_lock_init(&ctx->pk_lock);
- return __ctr_paes_set_key(ctx);
+ crypto_skcipher_set_reqsize(tfm, sizeof(struct s390_pxts_req_ctx));
+
+ return 0;
}
-static unsigned int __ctrblk_init(u8 *ctrptr, u8 *iv, unsigned int nbytes)
+static void xts_paes_exit(struct crypto_skcipher *tfm)
{
- unsigned int i, n;
+ struct s390_pxts_ctx *ctx = crypto_skcipher_ctx(tfm);
- /* only use complete blocks, max. PAGE_SIZE */
- memcpy(ctrptr, iv, AES_BLOCK_SIZE);
- n = (nbytes > PAGE_SIZE) ? PAGE_SIZE : nbytes & ~(AES_BLOCK_SIZE - 1);
- for (i = (n / AES_BLOCK_SIZE) - 1; i > 0; i--) {
- memcpy(ctrptr + AES_BLOCK_SIZE, ctrptr, AES_BLOCK_SIZE);
- crypto_inc(ctrptr + AES_BLOCK_SIZE, AES_BLOCK_SIZE);
- ctrptr += AES_BLOCK_SIZE;
- }
- return n;
+ memzero_explicit(ctx, sizeof(*ctx));
}
-static int ctr_paes_crypt(struct skcipher_request *req)
+static int xts_paes_do_one_request(struct crypto_engine *engine, void *areq)
{
+ struct skcipher_request *req = skcipher_request_cast(areq);
+ struct s390_pxts_req_ctx *req_ctx = skcipher_request_ctx(req);
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
- struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm);
- u8 buf[AES_BLOCK_SIZE], *ctrptr;
- struct {
- u8 key[PAES_256_PROTKEY_SIZE];
- } param;
- struct skcipher_walk walk;
- unsigned int nbytes, n, k;
- int rc, locked;
-
- rc = skcipher_walk_virt(&walk, req, false);
- if (rc)
- return rc;
-
- spin_lock_bh(&ctx->pk_lock);
- memcpy(param.key, ctx->pk.protkey, PAES_256_PROTKEY_SIZE);
- spin_unlock_bh(&ctx->pk_lock);
-
- locked = mutex_trylock(&ctrblk_lock);
+ struct s390_pxts_ctx *ctx = crypto_skcipher_ctx(tfm);
+ struct skcipher_walk *walk = &req_ctx->walk;
+ int rc;
- while ((nbytes = walk.nbytes) >= AES_BLOCK_SIZE) {
- n = AES_BLOCK_SIZE;
- if (nbytes >= 2*AES_BLOCK_SIZE && locked)
- n = __ctrblk_init(ctrblk, walk.iv, nbytes);
- ctrptr = (n > AES_BLOCK_SIZE) ? ctrblk : walk.iv;
- k = cpacf_kmctr(ctx->fc, &param, walk.dst.virt.addr,
- walk.src.virt.addr, n, ctrptr);
- if (k) {
- if (ctrptr == ctrblk)
- memcpy(walk.iv, ctrptr + k - AES_BLOCK_SIZE,
- AES_BLOCK_SIZE);
- crypto_inc(walk.iv, AES_BLOCK_SIZE);
- rc = skcipher_walk_done(&walk, nbytes - k);
- }
- if (k < n) {
- if (__paes_convert_key(ctx)) {
- if (locked)
- mutex_unlock(&ctrblk_lock);
- return skcipher_walk_done(&walk, -EIO);
- }
- spin_lock_bh(&ctx->pk_lock);
- memcpy(param.key, ctx->pk.protkey, PAES_256_PROTKEY_SIZE);
- spin_unlock_bh(&ctx->pk_lock);
- }
- }
- if (locked)
- mutex_unlock(&ctrblk_lock);
- /*
- * final block may be < AES_BLOCK_SIZE, copy only nbytes
- */
- if (nbytes) {
- memset(buf, 0, AES_BLOCK_SIZE);
- memcpy(buf, walk.src.virt.addr, nbytes);
- while (1) {
- if (cpacf_kmctr(ctx->fc, &param, buf,
- buf, AES_BLOCK_SIZE,
- walk.iv) == AES_BLOCK_SIZE)
- break;
- if (__paes_convert_key(ctx))
- return skcipher_walk_done(&walk, -EIO);
- spin_lock_bh(&ctx->pk_lock);
- memcpy(param.key, ctx->pk.protkey, PAES_256_PROTKEY_SIZE);
- spin_unlock_bh(&ctx->pk_lock);
- }
- memcpy(walk.dst.virt.addr, buf, nbytes);
- crypto_inc(walk.iv, AES_BLOCK_SIZE);
- rc = skcipher_walk_done(&walk, nbytes);
+ /* walk has already been prepared */
+
+ rc = xts_paes_do_crypt(ctx, req_ctx, true);
+ if (rc == -EKEYEXPIRED) {
+ /*
+ * Protected key expired, conversion is in process.
+ * Trigger a re-schedule of this request by returning
+ * -ENOSPC ("hardware queue is full") to the crypto engine.
+ * To avoid immediately re-invocation of this callback,
+ * tell the scheduler to voluntarily give up the CPU here.
+ */
+ cond_resched();
+ pr_debug("rescheduling request\n");
+ return -ENOSPC;
+ } else if (rc) {
+ skcipher_walk_done(walk, rc);
}
+ memzero_explicit(&req_ctx->param, sizeof(req_ctx->param));
+ pr_debug("request complete with rc=%d\n", rc);
+ local_bh_disable();
+ atomic_dec(&ctx->via_engine_ctr);
+ crypto_finalize_skcipher_request(engine, req, rc);
+ local_bh_enable();
return rc;
}
-static struct skcipher_alg ctr_paes_alg = {
- .base.cra_name = "ctr(paes)",
- .base.cra_driver_name = "ctr-paes-s390",
- .base.cra_priority = 402, /* ecb-paes-s390 + 1 */
- .base.cra_blocksize = 1,
- .base.cra_ctxsize = sizeof(struct s390_paes_ctx),
- .base.cra_module = THIS_MODULE,
- .base.cra_list = LIST_HEAD_INIT(ctr_paes_alg.base.cra_list),
- .init = ctr_paes_init,
- .exit = ctr_paes_exit,
- .min_keysize = PAES_MIN_KEYSIZE,
- .max_keysize = PAES_MAX_KEYSIZE,
- .ivsize = AES_BLOCK_SIZE,
- .setkey = ctr_paes_set_key,
- .encrypt = ctr_paes_crypt,
- .decrypt = ctr_paes_crypt,
- .chunksize = AES_BLOCK_SIZE,
+static struct skcipher_engine_alg xts_paes_alg = {
+ .base = {
+ .base.cra_name = "xts(paes)",
+ .base.cra_driver_name = "xts-paes-s390",
+ .base.cra_priority = 402, /* ecb-paes-s390 + 1 */
+ .base.cra_blocksize = AES_BLOCK_SIZE,
+ .base.cra_ctxsize = sizeof(struct s390_pxts_ctx),
+ .base.cra_module = THIS_MODULE,
+ .base.cra_list = LIST_HEAD_INIT(xts_paes_alg.base.base.cra_list),
+ .init = xts_paes_init,
+ .exit = xts_paes_exit,
+ .min_keysize = 2 * PAES_MIN_KEYSIZE,
+ .max_keysize = 2 * PAES_MAX_KEYSIZE,
+ .ivsize = AES_BLOCK_SIZE,
+ .setkey = xts_paes_setkey,
+ .encrypt = xts_paes_encrypt,
+ .decrypt = xts_paes_decrypt,
+ },
+ .op = {
+ .do_one_request = xts_paes_do_one_request,
+ },
};
-static inline void __crypto_unregister_skcipher(struct skcipher_alg *alg)
+/*
+ * alg register, unregister, module init, exit
+ */
+
+static struct miscdevice paes_dev = {
+ .name = "paes",
+ .minor = MISC_DYNAMIC_MINOR,
+};
+
+static inline void __crypto_unregister_skcipher(struct skcipher_engine_alg *alg)
{
- if (!list_empty(&alg->base.cra_list))
- crypto_unregister_skcipher(alg);
+ if (!list_empty(&alg->base.base.cra_list))
+ crypto_engine_unregister_skcipher(alg);
}
static void paes_s390_fini(void)
{
+ if (paes_crypto_engine) {
+ crypto_engine_stop(paes_crypto_engine);
+ crypto_engine_exit(paes_crypto_engine);
+ }
__crypto_unregister_skcipher(&ctr_paes_alg);
__crypto_unregister_skcipher(&xts_paes_alg);
__crypto_unregister_skcipher(&cbc_paes_alg);
__crypto_unregister_skcipher(&ecb_paes_alg);
if (ctrblk)
- free_page((unsigned long) ctrblk);
+ free_page((unsigned long)ctrblk);
+ misc_deregister(&paes_dev);
}
static int __init paes_s390_init(void)
{
int rc;
+ /* register a simple paes pseudo misc device */
+ rc = misc_register(&paes_dev);
+ if (rc)
+ return rc;
+
+ /* with this pseudo devie alloc and start a crypto engine */
+ paes_crypto_engine =
+ crypto_engine_alloc_init_and_set(paes_dev.this_device,
+ true, false, MAX_QLEN);
+ if (!paes_crypto_engine) {
+ rc = -ENOMEM;
+ goto out_err;
+ }
+ rc = crypto_engine_start(paes_crypto_engine);
+ if (rc) {
+ crypto_engine_exit(paes_crypto_engine);
+ paes_crypto_engine = NULL;
+ goto out_err;
+ }
+
/* Query available functions for KM, KMC and KMCTR */
cpacf_query(CPACF_KM, &km_functions);
cpacf_query(CPACF_KMC, &kmc_functions);
@@ -927,40 +1653,45 @@ static int __init paes_s390_init(void)
if (cpacf_test_func(&km_functions, CPACF_KM_PAES_128) ||
cpacf_test_func(&km_functions, CPACF_KM_PAES_192) ||
cpacf_test_func(&km_functions, CPACF_KM_PAES_256)) {
- rc = crypto_register_skcipher(&ecb_paes_alg);
+ rc = crypto_engine_register_skcipher(&ecb_paes_alg);
if (rc)
goto out_err;
+ pr_debug("%s registered\n", ecb_paes_alg.base.base.cra_driver_name);
}
if (cpacf_test_func(&kmc_functions, CPACF_KMC_PAES_128) ||
cpacf_test_func(&kmc_functions, CPACF_KMC_PAES_192) ||
cpacf_test_func(&kmc_functions, CPACF_KMC_PAES_256)) {
- rc = crypto_register_skcipher(&cbc_paes_alg);
+ rc = crypto_engine_register_skcipher(&cbc_paes_alg);
if (rc)
goto out_err;
+ pr_debug("%s registered\n", cbc_paes_alg.base.base.cra_driver_name);
}
if (cpacf_test_func(&km_functions, CPACF_KM_PXTS_128) ||
cpacf_test_func(&km_functions, CPACF_KM_PXTS_256)) {
- rc = crypto_register_skcipher(&xts_paes_alg);
+ rc = crypto_engine_register_skcipher(&xts_paes_alg);
if (rc)
goto out_err;
+ pr_debug("%s registered\n", xts_paes_alg.base.base.cra_driver_name);
}
if (cpacf_test_func(&kmctr_functions, CPACF_KMCTR_PAES_128) ||
cpacf_test_func(&kmctr_functions, CPACF_KMCTR_PAES_192) ||
cpacf_test_func(&kmctr_functions, CPACF_KMCTR_PAES_256)) {
- ctrblk = (u8 *) __get_free_page(GFP_KERNEL);
+ ctrblk = (u8 *)__get_free_page(GFP_KERNEL);
if (!ctrblk) {
rc = -ENOMEM;
goto out_err;
}
- rc = crypto_register_skcipher(&ctr_paes_alg);
+ rc = crypto_engine_register_skcipher(&ctr_paes_alg);
if (rc)
goto out_err;
+ pr_debug("%s registered\n", ctr_paes_alg.base.base.cra_driver_name);
}
return 0;
+
out_err:
paes_s390_fini();
return rc;
diff --git a/arch/s390/crypto/phmac_s390.c b/arch/s390/crypto/phmac_s390.c
new file mode 100644
index 000000000000..7ecfdc4fba2d
--- /dev/null
+++ b/arch/s390/crypto/phmac_s390.c
@@ -0,0 +1,1048 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Copyright IBM Corp. 2025
+ *
+ * s390 specific HMAC support for protected keys.
+ */
+
+#define KMSG_COMPONENT "phmac_s390"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
+#include <asm/cpacf.h>
+#include <asm/pkey.h>
+#include <crypto/engine.h>
+#include <crypto/hash.h>
+#include <crypto/internal/hash.h>
+#include <crypto/sha2.h>
+#include <linux/atomic.h>
+#include <linux/cpufeature.h>
+#include <linux/delay.h>
+#include <linux/miscdevice.h>
+#include <linux/module.h>
+#include <linux/spinlock.h>
+
+static struct crypto_engine *phmac_crypto_engine;
+#define MAX_QLEN 10
+
+/*
+ * A simple hash walk helper
+ */
+
+struct hash_walk_helper {
+ struct crypto_hash_walk walk;
+ const u8 *walkaddr;
+ int walkbytes;
+};
+
+/*
+ * Prepare hash walk helper.
+ * Set up the base hash walk, fill walkaddr and walkbytes.
+ * Returns 0 on success or negative value on error.
+ */
+static inline int hwh_prepare(struct ahash_request *req,
+ struct hash_walk_helper *hwh)
+{
+ hwh->walkbytes = crypto_hash_walk_first(req, &hwh->walk);
+ if (hwh->walkbytes < 0)
+ return hwh->walkbytes;
+ hwh->walkaddr = hwh->walk.data;
+ return 0;
+}
+
+/*
+ * Advance hash walk helper by n bytes.
+ * Progress the walkbytes and walkaddr fields by n bytes.
+ * If walkbytes is then 0, pull next hunk from hash walk
+ * and update walkbytes and walkaddr.
+ * If n is negative, unmap hash walk and return error.
+ * Returns 0 on success or negative value on error.
+ */
+static inline int hwh_advance(struct hash_walk_helper *hwh, int n)
+{
+ if (n < 0)
+ return crypto_hash_walk_done(&hwh->walk, n);
+
+ hwh->walkbytes -= n;
+ hwh->walkaddr += n;
+ if (hwh->walkbytes > 0)
+ return 0;
+
+ hwh->walkbytes = crypto_hash_walk_done(&hwh->walk, 0);
+ if (hwh->walkbytes < 0)
+ return hwh->walkbytes;
+
+ hwh->walkaddr = hwh->walk.data;
+ return 0;
+}
+
+/*
+ * KMAC param block layout for sha2 function codes:
+ * The layout of the param block for the KMAC instruction depends on the
+ * blocksize of the used hashing sha2-algorithm function codes. The param block
+ * contains the hash chaining value (cv), the input message bit-length (imbl)
+ * and the hmac-secret (key). To prevent code duplication, the sizes of all
+ * these are calculated based on the blocksize.
+ *
+ * param-block:
+ * +-------+
+ * | cv |
+ * +-------+
+ * | imbl |
+ * +-------+
+ * | key |
+ * +-------+
+ *
+ * sizes:
+ * part | sh2-alg | calculation | size | type
+ * -----+---------+-------------+------+--------
+ * cv | 224/256 | blocksize/2 | 32 | u64[8]
+ * | 384/512 | | 64 | u128[8]
+ * imbl | 224/256 | blocksize/8 | 8 | u64
+ * | 384/512 | | 16 | u128
+ * key | 224/256 | blocksize | 96 | u8[96]
+ * | 384/512 | | 160 | u8[160]
+ */
+
+#define MAX_DIGEST_SIZE SHA512_DIGEST_SIZE
+#define MAX_IMBL_SIZE sizeof(u128)
+#define MAX_BLOCK_SIZE SHA512_BLOCK_SIZE
+
+#define SHA2_CV_SIZE(bs) ((bs) >> 1)
+#define SHA2_IMBL_SIZE(bs) ((bs) >> 3)
+
+#define SHA2_IMBL_OFFSET(bs) (SHA2_CV_SIZE(bs))
+#define SHA2_KEY_OFFSET(bs) (SHA2_CV_SIZE(bs) + SHA2_IMBL_SIZE(bs))
+
+#define PHMAC_MAX_KEYSIZE 256
+#define PHMAC_SHA256_PK_SIZE (SHA256_BLOCK_SIZE + 32)
+#define PHMAC_SHA512_PK_SIZE (SHA512_BLOCK_SIZE + 32)
+#define PHMAC_MAX_PK_SIZE PHMAC_SHA512_PK_SIZE
+
+/* phmac protected key struct */
+struct phmac_protkey {
+ u32 type;
+ u32 len;
+ u8 protkey[PHMAC_MAX_PK_SIZE];
+};
+
+#define PK_STATE_NO_KEY 0
+#define PK_STATE_CONVERT_IN_PROGRESS 1
+#define PK_STATE_VALID 2
+
+/* phmac tfm context */
+struct phmac_tfm_ctx {
+ /* source key material used to derive a protected key from */
+ u8 keybuf[PHMAC_MAX_KEYSIZE];
+ unsigned int keylen;
+
+ /* cpacf function code to use with this protected key type */
+ long fc;
+
+ /* nr of requests enqueued via crypto engine which use this tfm ctx */
+ atomic_t via_engine_ctr;
+
+ /* spinlock to atomic read/update all the following fields */
+ spinlock_t pk_lock;
+
+ /* see PK_STATE* defines above, < 0 holds convert failure rc */
+ int pk_state;
+ /* if state is valid, pk holds the protected key */
+ struct phmac_protkey pk;
+};
+
+union kmac_gr0 {
+ unsigned long reg;
+ struct {
+ unsigned long : 48;
+ unsigned long ikp : 1;
+ unsigned long iimp : 1;
+ unsigned long ccup : 1;
+ unsigned long : 6;
+ unsigned long fc : 7;
+ };
+};
+
+struct kmac_sha2_ctx {
+ u8 param[MAX_DIGEST_SIZE + MAX_IMBL_SIZE + PHMAC_MAX_PK_SIZE];
+ union kmac_gr0 gr0;
+ u8 buf[MAX_BLOCK_SIZE];
+ u64 buflen[2];
+};
+
+/* phmac request context */
+struct phmac_req_ctx {
+ struct hash_walk_helper hwh;
+ struct kmac_sha2_ctx kmac_ctx;
+ bool final;
+};
+
+/*
+ * Pkey 'token' struct used to derive a protected key value from a clear key.
+ */
+struct hmac_clrkey_token {
+ u8 type;
+ u8 res0[3];
+ u8 version;
+ u8 res1[3];
+ u32 keytype;
+ u32 len;
+ u8 key[];
+} __packed;
+
+static int hash_key(const u8 *in, unsigned int inlen,
+ u8 *digest, unsigned int digestsize)
+{
+ unsigned long func;
+ union {
+ struct sha256_paramblock {
+ u32 h[8];
+ u64 mbl;
+ } sha256;
+ struct sha512_paramblock {
+ u64 h[8];
+ u128 mbl;
+ } sha512;
+ } __packed param;
+
+#define PARAM_INIT(x, y, z) \
+ param.sha##x.h[0] = SHA##y ## _H0; \
+ param.sha##x.h[1] = SHA##y ## _H1; \
+ param.sha##x.h[2] = SHA##y ## _H2; \
+ param.sha##x.h[3] = SHA##y ## _H3; \
+ param.sha##x.h[4] = SHA##y ## _H4; \
+ param.sha##x.h[5] = SHA##y ## _H5; \
+ param.sha##x.h[6] = SHA##y ## _H6; \
+ param.sha##x.h[7] = SHA##y ## _H7; \
+ param.sha##x.mbl = (z)
+
+ switch (digestsize) {
+ case SHA224_DIGEST_SIZE:
+ func = CPACF_KLMD_SHA_256;
+ PARAM_INIT(256, 224, inlen * 8);
+ break;
+ case SHA256_DIGEST_SIZE:
+ func = CPACF_KLMD_SHA_256;
+ PARAM_INIT(256, 256, inlen * 8);
+ break;
+ case SHA384_DIGEST_SIZE:
+ func = CPACF_KLMD_SHA_512;
+ PARAM_INIT(512, 384, inlen * 8);
+ break;
+ case SHA512_DIGEST_SIZE:
+ func = CPACF_KLMD_SHA_512;
+ PARAM_INIT(512, 512, inlen * 8);
+ break;
+ default:
+ return -EINVAL;
+ }
+
+#undef PARAM_INIT
+
+ cpacf_klmd(func, &param, in, inlen);
+
+ memcpy(digest, &param, digestsize);
+
+ return 0;
+}
+
+/*
+ * make_clrkey_token() - wrap the clear key into a pkey clearkey token.
+ */
+static inline int make_clrkey_token(const u8 *clrkey, size_t clrkeylen,
+ unsigned int digestsize, u8 *dest)
+{
+ struct hmac_clrkey_token *token = (struct hmac_clrkey_token *)dest;
+ unsigned int blocksize;
+ int rc;
+
+ token->type = 0x00;
+ token->version = 0x02;
+ switch (digestsize) {
+ case SHA224_DIGEST_SIZE:
+ case SHA256_DIGEST_SIZE:
+ token->keytype = PKEY_KEYTYPE_HMAC_512;
+ blocksize = 64;
+ break;
+ case SHA384_DIGEST_SIZE:
+ case SHA512_DIGEST_SIZE:
+ token->keytype = PKEY_KEYTYPE_HMAC_1024;
+ blocksize = 128;
+ break;
+ default:
+ return -EINVAL;
+ }
+ token->len = blocksize;
+
+ if (clrkeylen > blocksize) {
+ rc = hash_key(clrkey, clrkeylen, token->key, digestsize);
+ if (rc)
+ return rc;
+ } else {
+ memcpy(token->key, clrkey, clrkeylen);
+ }
+
+ return 0;
+}
+
+/*
+ * phmac_tfm_ctx_setkey() - Set key value into tfm context, maybe construct
+ * a clear key token digestible by pkey from a clear key value.
+ */
+static inline int phmac_tfm_ctx_setkey(struct phmac_tfm_ctx *tfm_ctx,
+ const u8 *key, unsigned int keylen)
+{
+ if (keylen > sizeof(tfm_ctx->keybuf))
+ return -EINVAL;
+
+ memcpy(tfm_ctx->keybuf, key, keylen);
+ tfm_ctx->keylen = keylen;
+
+ return 0;
+}
+
+/*
+ * Convert the raw key material into a protected key via PKEY api.
+ * This function may sleep - don't call in non-sleeping context.
+ */
+static inline int convert_key(const u8 *key, unsigned int keylen,
+ struct phmac_protkey *pk)
+{
+ int rc, i;
+
+ pk->len = sizeof(pk->protkey);
+
+ /*
+ * In case of a busy card retry with increasing delay
+ * of 200, 400, 800 and 1600 ms - in total 3 s.
+ */
+ for (rc = -EIO, i = 0; rc && i < 5; i++) {
+ if (rc == -EBUSY && msleep_interruptible((1 << i) * 100)) {
+ rc = -EINTR;
+ goto out;
+ }
+ rc = pkey_key2protkey(key, keylen,
+ pk->protkey, &pk->len, &pk->type,
+ PKEY_XFLAG_NOMEMALLOC);
+ }
+
+out:
+ pr_debug("rc=%d\n", rc);
+ return rc;
+}
+
+/*
+ * (Re-)Convert the raw key material from the tfm ctx into a protected
+ * key via convert_key() function. Update the pk_state, pk_type, pk_len
+ * and the protected key in the tfm context.
+ * Please note this function may be invoked concurrently with the very
+ * same tfm context. The pk_lock spinlock in the context ensures an
+ * atomic update of the pk and the pk state but does not guarantee any
+ * order of update. So a fresh converted valid protected key may get
+ * updated with an 'old' expired key value. As the cpacf instructions
+ * detect this, refuse to operate with an invalid key and the calling
+ * code triggers a (re-)conversion this does no harm. This may lead to
+ * unnecessary additional conversion but never to invalid data on the
+ * hash operation.
+ */
+static int phmac_convert_key(struct phmac_tfm_ctx *tfm_ctx)
+{
+ struct phmac_protkey pk;
+ int rc;
+
+ spin_lock_bh(&tfm_ctx->pk_lock);
+ tfm_ctx->pk_state = PK_STATE_CONVERT_IN_PROGRESS;
+ spin_unlock_bh(&tfm_ctx->pk_lock);
+
+ rc = convert_key(tfm_ctx->keybuf, tfm_ctx->keylen, &pk);
+
+ /* update context */
+ spin_lock_bh(&tfm_ctx->pk_lock);
+ if (rc) {
+ tfm_ctx->pk_state = rc;
+ } else {
+ tfm_ctx->pk_state = PK_STATE_VALID;
+ tfm_ctx->pk = pk;
+ }
+ spin_unlock_bh(&tfm_ctx->pk_lock);
+
+ memzero_explicit(&pk, sizeof(pk));
+ pr_debug("rc=%d\n", rc);
+ return rc;
+}
+
+/*
+ * kmac_sha2_set_imbl - sets the input message bit-length based on the blocksize
+ */
+static inline void kmac_sha2_set_imbl(u8 *param, u64 buflen_lo,
+ u64 buflen_hi, unsigned int blocksize)
+{
+ u8 *imbl = param + SHA2_IMBL_OFFSET(blocksize);
+
+ switch (blocksize) {
+ case SHA256_BLOCK_SIZE:
+ *(u64 *)imbl = buflen_lo * BITS_PER_BYTE;
+ break;
+ case SHA512_BLOCK_SIZE:
+ *(u128 *)imbl = (((u128)buflen_hi << 64) + buflen_lo) << 3;
+ break;
+ default:
+ break;
+ }
+}
+
+static int phmac_kmac_update(struct ahash_request *req, bool maysleep)
+{
+ struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+ struct phmac_tfm_ctx *tfm_ctx = crypto_ahash_ctx(tfm);
+ struct phmac_req_ctx *req_ctx = ahash_request_ctx(req);
+ struct kmac_sha2_ctx *ctx = &req_ctx->kmac_ctx;
+ struct hash_walk_helper *hwh = &req_ctx->hwh;
+ unsigned int bs = crypto_ahash_blocksize(tfm);
+ unsigned int offset, k, n;
+ int rc = 0;
+
+ /*
+ * The walk is always mapped when this function is called.
+ * Note that in case of partial processing or failure the walk
+ * is NOT unmapped here. So a follow up task may reuse the walk
+ * or in case of unrecoverable failure needs to unmap it.
+ */
+
+ while (hwh->walkbytes > 0) {
+ /* check sha2 context buffer */
+ offset = ctx->buflen[0] % bs;
+ if (offset + hwh->walkbytes < bs)
+ goto store;
+
+ if (offset) {
+ /* fill ctx buffer up to blocksize and process this block */
+ n = bs - offset;
+ memcpy(ctx->buf + offset, hwh->walkaddr, n);
+ ctx->gr0.iimp = 1;
+ for (;;) {
+ k = _cpacf_kmac(&ctx->gr0.reg, ctx->param, ctx->buf, bs);
+ if (likely(k == bs))
+ break;
+ if (unlikely(k > 0)) {
+ /*
+ * Can't deal with hunks smaller than blocksize.
+ * And kmac should always return the nr of
+ * processed bytes as 0 or a multiple of the
+ * blocksize.
+ */
+ rc = -EIO;
+ goto out;
+ }
+ /* protected key is invalid and needs re-conversion */
+ if (!maysleep) {
+ rc = -EKEYEXPIRED;
+ goto out;
+ }
+ rc = phmac_convert_key(tfm_ctx);
+ if (rc)
+ goto out;
+ spin_lock_bh(&tfm_ctx->pk_lock);
+ memcpy(ctx->param + SHA2_KEY_OFFSET(bs),
+ tfm_ctx->pk.protkey, tfm_ctx->pk.len);
+ spin_unlock_bh(&tfm_ctx->pk_lock);
+ }
+ ctx->buflen[0] += n;
+ if (ctx->buflen[0] < n)
+ ctx->buflen[1]++;
+ rc = hwh_advance(hwh, n);
+ if (unlikely(rc))
+ goto out;
+ offset = 0;
+ }
+
+ /* process as many blocks as possible from the walk */
+ while (hwh->walkbytes >= bs) {
+ n = (hwh->walkbytes / bs) * bs;
+ ctx->gr0.iimp = 1;
+ k = _cpacf_kmac(&ctx->gr0.reg, ctx->param, hwh->walkaddr, n);
+ if (likely(k > 0)) {
+ ctx->buflen[0] += k;
+ if (ctx->buflen[0] < k)
+ ctx->buflen[1]++;
+ rc = hwh_advance(hwh, k);
+ if (unlikely(rc))
+ goto out;
+ }
+ if (unlikely(k < n)) {
+ /* protected key is invalid and needs re-conversion */
+ if (!maysleep) {
+ rc = -EKEYEXPIRED;
+ goto out;
+ }
+ rc = phmac_convert_key(tfm_ctx);
+ if (rc)
+ goto out;
+ spin_lock_bh(&tfm_ctx->pk_lock);
+ memcpy(ctx->param + SHA2_KEY_OFFSET(bs),
+ tfm_ctx->pk.protkey, tfm_ctx->pk.len);
+ spin_unlock_bh(&tfm_ctx->pk_lock);
+ }
+ }
+
+store:
+ /* store incomplete block in context buffer */
+ if (hwh->walkbytes) {
+ memcpy(ctx->buf + offset, hwh->walkaddr, hwh->walkbytes);
+ ctx->buflen[0] += hwh->walkbytes;
+ if (ctx->buflen[0] < hwh->walkbytes)
+ ctx->buflen[1]++;
+ rc = hwh_advance(hwh, hwh->walkbytes);
+ if (unlikely(rc))
+ goto out;
+ }
+
+ } /* end of while (hwh->walkbytes > 0) */
+
+out:
+ pr_debug("rc=%d\n", rc);
+ return rc;
+}
+
+static int phmac_kmac_final(struct ahash_request *req, bool maysleep)
+{
+ struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+ struct phmac_tfm_ctx *tfm_ctx = crypto_ahash_ctx(tfm);
+ struct phmac_req_ctx *req_ctx = ahash_request_ctx(req);
+ struct kmac_sha2_ctx *ctx = &req_ctx->kmac_ctx;
+ unsigned int ds = crypto_ahash_digestsize(tfm);
+ unsigned int bs = crypto_ahash_blocksize(tfm);
+ unsigned int k, n;
+ int rc = 0;
+
+ n = ctx->buflen[0] % bs;
+ ctx->gr0.iimp = 0;
+ kmac_sha2_set_imbl(ctx->param, ctx->buflen[0], ctx->buflen[1], bs);
+ for (;;) {
+ k = _cpacf_kmac(&ctx->gr0.reg, ctx->param, ctx->buf, n);
+ if (likely(k == n))
+ break;
+ if (unlikely(k > 0)) {
+ /* Can't deal with hunks smaller than blocksize. */
+ rc = -EIO;
+ goto out;
+ }
+ /* protected key is invalid and needs re-conversion */
+ if (!maysleep) {
+ rc = -EKEYEXPIRED;
+ goto out;
+ }
+ rc = phmac_convert_key(tfm_ctx);
+ if (rc)
+ goto out;
+ spin_lock_bh(&tfm_ctx->pk_lock);
+ memcpy(ctx->param + SHA2_KEY_OFFSET(bs),
+ tfm_ctx->pk.protkey, tfm_ctx->pk.len);
+ spin_unlock_bh(&tfm_ctx->pk_lock);
+ }
+
+ memcpy(req->result, ctx->param, ds);
+
+out:
+ pr_debug("rc=%d\n", rc);
+ return rc;
+}
+
+static int phmac_init(struct ahash_request *req)
+{
+ struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+ struct phmac_tfm_ctx *tfm_ctx = crypto_ahash_ctx(tfm);
+ struct phmac_req_ctx *req_ctx = ahash_request_ctx(req);
+ struct kmac_sha2_ctx *kmac_ctx = &req_ctx->kmac_ctx;
+ unsigned int bs = crypto_ahash_blocksize(tfm);
+ int rc = 0;
+
+ /* zero request context (includes the kmac sha2 context) */
+ memset(req_ctx, 0, sizeof(*req_ctx));
+
+ /*
+ * setkey() should have set a valid fc into the tfm context.
+ * Copy this function code into the gr0 field of the kmac context.
+ */
+ if (!tfm_ctx->fc) {
+ rc = -ENOKEY;
+ goto out;
+ }
+ kmac_ctx->gr0.fc = tfm_ctx->fc;
+
+ /*
+ * Copy the pk from tfm ctx into kmac ctx. The protected key
+ * may be outdated but update() and final() will handle this.
+ */
+ spin_lock_bh(&tfm_ctx->pk_lock);
+ memcpy(kmac_ctx->param + SHA2_KEY_OFFSET(bs),
+ tfm_ctx->pk.protkey, tfm_ctx->pk.len);
+ spin_unlock_bh(&tfm_ctx->pk_lock);
+
+out:
+ pr_debug("rc=%d\n", rc);
+ return rc;
+}
+
+static int phmac_update(struct ahash_request *req)
+{
+ struct phmac_req_ctx *req_ctx = ahash_request_ctx(req);
+ struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+ struct phmac_tfm_ctx *tfm_ctx = crypto_ahash_ctx(tfm);
+ struct kmac_sha2_ctx *kmac_ctx = &req_ctx->kmac_ctx;
+ struct hash_walk_helper *hwh = &req_ctx->hwh;
+ int rc;
+
+ /* prep the walk in the request context */
+ rc = hwh_prepare(req, hwh);
+ if (rc)
+ goto out;
+
+ /* Try synchronous operation if no active engine usage */
+ if (!atomic_read(&tfm_ctx->via_engine_ctr)) {
+ rc = phmac_kmac_update(req, false);
+ if (rc == 0)
+ goto out;
+ }
+
+ /*
+ * If sync operation failed or key expired or there are already
+ * requests enqueued via engine, fallback to async. Mark tfm as
+ * using engine to serialize requests.
+ */
+ if (rc == 0 || rc == -EKEYEXPIRED) {
+ atomic_inc(&tfm_ctx->via_engine_ctr);
+ rc = crypto_transfer_hash_request_to_engine(phmac_crypto_engine, req);
+ if (rc != -EINPROGRESS)
+ atomic_dec(&tfm_ctx->via_engine_ctr);
+ }
+
+ if (rc != -EINPROGRESS) {
+ hwh_advance(hwh, rc);
+ memzero_explicit(kmac_ctx, sizeof(*kmac_ctx));
+ }
+
+out:
+ pr_debug("rc=%d\n", rc);
+ return rc;
+}
+
+static int phmac_final(struct ahash_request *req)
+{
+ struct phmac_req_ctx *req_ctx = ahash_request_ctx(req);
+ struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+ struct phmac_tfm_ctx *tfm_ctx = crypto_ahash_ctx(tfm);
+ struct kmac_sha2_ctx *kmac_ctx = &req_ctx->kmac_ctx;
+ int rc = 0;
+
+ /* Try synchronous operation if no active engine usage */
+ if (!atomic_read(&tfm_ctx->via_engine_ctr)) {
+ rc = phmac_kmac_final(req, false);
+ if (rc == 0)
+ goto out;
+ }
+
+ /*
+ * If sync operation failed or key expired or there are already
+ * requests enqueued via engine, fallback to async. Mark tfm as
+ * using engine to serialize requests.
+ */
+ if (rc == 0 || rc == -EKEYEXPIRED) {
+ req->nbytes = 0;
+ req_ctx->final = true;
+ atomic_inc(&tfm_ctx->via_engine_ctr);
+ rc = crypto_transfer_hash_request_to_engine(phmac_crypto_engine, req);
+ if (rc != -EINPROGRESS)
+ atomic_dec(&tfm_ctx->via_engine_ctr);
+ }
+
+out:
+ if (rc != -EINPROGRESS)
+ memzero_explicit(kmac_ctx, sizeof(*kmac_ctx));
+ pr_debug("rc=%d\n", rc);
+ return rc;
+}
+
+static int phmac_finup(struct ahash_request *req)
+{
+ struct phmac_req_ctx *req_ctx = ahash_request_ctx(req);
+ struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+ struct phmac_tfm_ctx *tfm_ctx = crypto_ahash_ctx(tfm);
+ struct kmac_sha2_ctx *kmac_ctx = &req_ctx->kmac_ctx;
+ struct hash_walk_helper *hwh = &req_ctx->hwh;
+ int rc;
+
+ /* prep the walk in the request context */
+ rc = hwh_prepare(req, hwh);
+ if (rc)
+ goto out;
+
+ /* Try synchronous operations if no active engine usage */
+ if (!atomic_read(&tfm_ctx->via_engine_ctr)) {
+ rc = phmac_kmac_update(req, false);
+ if (rc == 0)
+ req->nbytes = 0;
+ }
+ if (!rc && !req->nbytes && !atomic_read(&tfm_ctx->via_engine_ctr)) {
+ rc = phmac_kmac_final(req, false);
+ if (rc == 0)
+ goto out;
+ }
+
+ /*
+ * If sync operation failed or key expired or there are already
+ * requests enqueued via engine, fallback to async. Mark tfm as
+ * using engine to serialize requests.
+ */
+ if (rc == 0 || rc == -EKEYEXPIRED) {
+ req_ctx->final = true;
+ atomic_inc(&tfm_ctx->via_engine_ctr);
+ rc = crypto_transfer_hash_request_to_engine(phmac_crypto_engine, req);
+ if (rc != -EINPROGRESS)
+ atomic_dec(&tfm_ctx->via_engine_ctr);
+ }
+
+ if (rc != -EINPROGRESS)
+ hwh_advance(hwh, rc);
+
+out:
+ if (rc != -EINPROGRESS)
+ memzero_explicit(kmac_ctx, sizeof(*kmac_ctx));
+ pr_debug("rc=%d\n", rc);
+ return rc;
+}
+
+static int phmac_digest(struct ahash_request *req)
+{
+ int rc;
+
+ rc = phmac_init(req);
+ if (rc)
+ goto out;
+
+ rc = phmac_finup(req);
+
+out:
+ pr_debug("rc=%d\n", rc);
+ return rc;
+}
+
+static int phmac_setkey(struct crypto_ahash *tfm,
+ const u8 *key, unsigned int keylen)
+{
+ struct phmac_tfm_ctx *tfm_ctx = crypto_ahash_ctx(tfm);
+ unsigned int ds = crypto_ahash_digestsize(tfm);
+ unsigned int bs = crypto_ahash_blocksize(tfm);
+ unsigned int tmpkeylen;
+ u8 *tmpkey = NULL;
+ int rc = 0;
+
+ if (!crypto_ahash_tested(tfm)) {
+ /*
+ * selftest running: key is a raw hmac clear key and needs
+ * to get embedded into a 'clear key token' in order to have
+ * it correctly processed by the pkey module.
+ */
+ tmpkeylen = sizeof(struct hmac_clrkey_token) + bs;
+ tmpkey = kzalloc(tmpkeylen, GFP_KERNEL);
+ if (!tmpkey) {
+ rc = -ENOMEM;
+ goto out;
+ }
+ rc = make_clrkey_token(key, keylen, ds, tmpkey);
+ if (rc)
+ goto out;
+ keylen = tmpkeylen;
+ key = tmpkey;
+ }
+
+ /* copy raw key into tfm context */
+ rc = phmac_tfm_ctx_setkey(tfm_ctx, key, keylen);
+ if (rc)
+ goto out;
+
+ /* convert raw key into protected key */
+ rc = phmac_convert_key(tfm_ctx);
+ if (rc)
+ goto out;
+
+ /* set function code in tfm context, check for valid pk type */
+ switch (ds) {
+ case SHA224_DIGEST_SIZE:
+ if (tfm_ctx->pk.type != PKEY_KEYTYPE_HMAC_512)
+ rc = -EINVAL;
+ else
+ tfm_ctx->fc = CPACF_KMAC_PHMAC_SHA_224;
+ break;
+ case SHA256_DIGEST_SIZE:
+ if (tfm_ctx->pk.type != PKEY_KEYTYPE_HMAC_512)
+ rc = -EINVAL;
+ else
+ tfm_ctx->fc = CPACF_KMAC_PHMAC_SHA_256;
+ break;
+ case SHA384_DIGEST_SIZE:
+ if (tfm_ctx->pk.type != PKEY_KEYTYPE_HMAC_1024)
+ rc = -EINVAL;
+ else
+ tfm_ctx->fc = CPACF_KMAC_PHMAC_SHA_384;
+ break;
+ case SHA512_DIGEST_SIZE:
+ if (tfm_ctx->pk.type != PKEY_KEYTYPE_HMAC_1024)
+ rc = -EINVAL;
+ else
+ tfm_ctx->fc = CPACF_KMAC_PHMAC_SHA_512;
+ break;
+ default:
+ tfm_ctx->fc = 0;
+ rc = -EINVAL;
+ }
+
+out:
+ kfree(tmpkey);
+ pr_debug("rc=%d\n", rc);
+ return rc;
+}
+
+static int phmac_export(struct ahash_request *req, void *out)
+{
+ struct phmac_req_ctx *req_ctx = ahash_request_ctx(req);
+ struct kmac_sha2_ctx *ctx = &req_ctx->kmac_ctx;
+
+ memcpy(out, ctx, sizeof(*ctx));
+
+ return 0;
+}
+
+static int phmac_import(struct ahash_request *req, const void *in)
+{
+ struct phmac_req_ctx *req_ctx = ahash_request_ctx(req);
+ struct kmac_sha2_ctx *ctx = &req_ctx->kmac_ctx;
+
+ memset(req_ctx, 0, sizeof(*req_ctx));
+ memcpy(ctx, in, sizeof(*ctx));
+
+ return 0;
+}
+
+static int phmac_init_tfm(struct crypto_ahash *tfm)
+{
+ struct phmac_tfm_ctx *tfm_ctx = crypto_ahash_ctx(tfm);
+
+ memset(tfm_ctx, 0, sizeof(*tfm_ctx));
+ spin_lock_init(&tfm_ctx->pk_lock);
+
+ crypto_ahash_set_reqsize(tfm, sizeof(struct phmac_req_ctx));
+
+ return 0;
+}
+
+static void phmac_exit_tfm(struct crypto_ahash *tfm)
+{
+ struct phmac_tfm_ctx *tfm_ctx = crypto_ahash_ctx(tfm);
+
+ memzero_explicit(tfm_ctx->keybuf, sizeof(tfm_ctx->keybuf));
+ memzero_explicit(&tfm_ctx->pk, sizeof(tfm_ctx->pk));
+}
+
+static int phmac_do_one_request(struct crypto_engine *engine, void *areq)
+{
+ struct ahash_request *req = ahash_request_cast(areq);
+ struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+ struct phmac_tfm_ctx *tfm_ctx = crypto_ahash_ctx(tfm);
+ struct phmac_req_ctx *req_ctx = ahash_request_ctx(req);
+ struct kmac_sha2_ctx *kmac_ctx = &req_ctx->kmac_ctx;
+ struct hash_walk_helper *hwh = &req_ctx->hwh;
+ int rc = -EINVAL;
+
+ /*
+ * Three kinds of requests come in here:
+ * update when req->nbytes > 0 and req_ctx->final is false
+ * final when req->nbytes = 0 and req_ctx->final is true
+ * finup when req->nbytes > 0 and req_ctx->final is true
+ * For update and finup the hwh walk needs to be prepared and
+ * up to date but the actual nr of bytes in req->nbytes may be
+ * any non zero number. For final there is no hwh walk needed.
+ */
+
+ if (req->nbytes) {
+ rc = phmac_kmac_update(req, true);
+ if (rc == -EKEYEXPIRED) {
+ /*
+ * Protected key expired, conversion is in process.
+ * Trigger a re-schedule of this request by returning
+ * -ENOSPC ("hardware queue full") to the crypto engine.
+ * To avoid immediately re-invocation of this callback,
+ * tell scheduler to voluntarily give up the CPU here.
+ */
+ pr_debug("rescheduling request\n");
+ cond_resched();
+ return -ENOSPC;
+ } else if (rc) {
+ hwh_advance(hwh, rc);
+ goto out;
+ }
+ req->nbytes = 0;
+ }
+
+ if (req_ctx->final) {
+ rc = phmac_kmac_final(req, true);
+ if (rc == -EKEYEXPIRED) {
+ /*
+ * Protected key expired, conversion is in process.
+ * Trigger a re-schedule of this request by returning
+ * -ENOSPC ("hardware queue full") to the crypto engine.
+ * To avoid immediately re-invocation of this callback,
+ * tell scheduler to voluntarily give up the CPU here.
+ */
+ pr_debug("rescheduling request\n");
+ cond_resched();
+ return -ENOSPC;
+ }
+ }
+
+out:
+ if (rc || req_ctx->final)
+ memzero_explicit(kmac_ctx, sizeof(*kmac_ctx));
+ pr_debug("request complete with rc=%d\n", rc);
+ local_bh_disable();
+ atomic_dec(&tfm_ctx->via_engine_ctr);
+ crypto_finalize_hash_request(engine, req, rc);
+ local_bh_enable();
+ return rc;
+}
+
+#define S390_ASYNC_PHMAC_ALG(x) \
+{ \
+ .base = { \
+ .init = phmac_init, \
+ .update = phmac_update, \
+ .final = phmac_final, \
+ .finup = phmac_finup, \
+ .digest = phmac_digest, \
+ .setkey = phmac_setkey, \
+ .import = phmac_import, \
+ .export = phmac_export, \
+ .init_tfm = phmac_init_tfm, \
+ .exit_tfm = phmac_exit_tfm, \
+ .halg = { \
+ .digestsize = SHA##x##_DIGEST_SIZE, \
+ .statesize = sizeof(struct kmac_sha2_ctx), \
+ .base = { \
+ .cra_name = "phmac(sha" #x ")", \
+ .cra_driver_name = "phmac_s390_sha" #x, \
+ .cra_blocksize = SHA##x##_BLOCK_SIZE, \
+ .cra_priority = 400, \
+ .cra_flags = CRYPTO_ALG_ASYNC | \
+ CRYPTO_ALG_NO_FALLBACK, \
+ .cra_ctxsize = sizeof(struct phmac_tfm_ctx), \
+ .cra_module = THIS_MODULE, \
+ }, \
+ }, \
+ }, \
+ .op = { \
+ .do_one_request = phmac_do_one_request, \
+ }, \
+}
+
+static struct phmac_alg {
+ unsigned int fc;
+ struct ahash_engine_alg alg;
+ bool registered;
+} phmac_algs[] = {
+ {
+ .fc = CPACF_KMAC_PHMAC_SHA_224,
+ .alg = S390_ASYNC_PHMAC_ALG(224),
+ }, {
+ .fc = CPACF_KMAC_PHMAC_SHA_256,
+ .alg = S390_ASYNC_PHMAC_ALG(256),
+ }, {
+ .fc = CPACF_KMAC_PHMAC_SHA_384,
+ .alg = S390_ASYNC_PHMAC_ALG(384),
+ }, {
+ .fc = CPACF_KMAC_PHMAC_SHA_512,
+ .alg = S390_ASYNC_PHMAC_ALG(512),
+ }
+};
+
+static struct miscdevice phmac_dev = {
+ .name = "phmac",
+ .minor = MISC_DYNAMIC_MINOR,
+};
+
+static void s390_phmac_exit(void)
+{
+ struct phmac_alg *phmac;
+ int i;
+
+ if (phmac_crypto_engine) {
+ crypto_engine_stop(phmac_crypto_engine);
+ crypto_engine_exit(phmac_crypto_engine);
+ }
+
+ for (i = ARRAY_SIZE(phmac_algs) - 1; i >= 0; i--) {
+ phmac = &phmac_algs[i];
+ if (phmac->registered)
+ crypto_engine_unregister_ahash(&phmac->alg);
+ }
+
+ misc_deregister(&phmac_dev);
+}
+
+static int __init s390_phmac_init(void)
+{
+ struct phmac_alg *phmac;
+ int i, rc;
+
+ /* for selftest cpacf klmd subfunction is needed */
+ if (!cpacf_query_func(CPACF_KLMD, CPACF_KLMD_SHA_256))
+ return -ENODEV;
+ if (!cpacf_query_func(CPACF_KLMD, CPACF_KLMD_SHA_512))
+ return -ENODEV;
+
+ /* register a simple phmac pseudo misc device */
+ rc = misc_register(&phmac_dev);
+ if (rc)
+ return rc;
+
+ /* with this pseudo device alloc and start a crypto engine */
+ phmac_crypto_engine =
+ crypto_engine_alloc_init_and_set(phmac_dev.this_device,
+ true, false, MAX_QLEN);
+ if (!phmac_crypto_engine) {
+ rc = -ENOMEM;
+ goto out_err;
+ }
+ rc = crypto_engine_start(phmac_crypto_engine);
+ if (rc) {
+ crypto_engine_exit(phmac_crypto_engine);
+ phmac_crypto_engine = NULL;
+ goto out_err;
+ }
+
+ for (i = 0; i < ARRAY_SIZE(phmac_algs); i++) {
+ phmac = &phmac_algs[i];
+ if (!cpacf_query_func(CPACF_KMAC, phmac->fc))
+ continue;
+ rc = crypto_engine_register_ahash(&phmac->alg);
+ if (rc)
+ goto out_err;
+ phmac->registered = true;
+ pr_debug("%s registered\n", phmac->alg.base.halg.base.cra_name);
+ }
+
+ return 0;
+
+out_err:
+ s390_phmac_exit();
+ return rc;
+}
+
+module_init(s390_phmac_init);
+module_exit(s390_phmac_exit);
+
+MODULE_ALIAS_CRYPTO("phmac(sha224)");
+MODULE_ALIAS_CRYPTO("phmac(sha256)");
+MODULE_ALIAS_CRYPTO("phmac(sha384)");
+MODULE_ALIAS_CRYPTO("phmac(sha512)");
+
+MODULE_DESCRIPTION("S390 HMAC driver for protected keys");
+MODULE_LICENSE("GPL");
diff --git a/arch/s390/crypto/sha.h b/arch/s390/crypto/sha.h
index 2bb22db54c31..cadb4b13622a 100644
--- a/arch/s390/crypto/sha.h
+++ b/arch/s390/crypto/sha.h
@@ -10,27 +10,36 @@
#ifndef _CRYPTO_ARCH_S390_SHA_H
#define _CRYPTO_ARCH_S390_SHA_H
-#include <linux/crypto.h>
-#include <crypto/sha1.h>
#include <crypto/sha2.h>
#include <crypto/sha3.h>
+#include <linux/types.h>
/* must be big enough for the largest SHA variant */
-#define SHA3_STATE_SIZE 200
#define CPACF_MAX_PARMBLOCK_SIZE SHA3_STATE_SIZE
#define SHA_MAX_BLOCK_SIZE SHA3_224_BLOCK_SIZE
+#define S390_SHA_CTX_SIZE sizeof(struct s390_sha_ctx)
struct s390_sha_ctx {
u64 count; /* message length in bytes */
- u32 state[CPACF_MAX_PARMBLOCK_SIZE / sizeof(u32)];
- u8 buf[SHA_MAX_BLOCK_SIZE];
+ union {
+ u32 state[CPACF_MAX_PARMBLOCK_SIZE / sizeof(u32)];
+ struct {
+ u64 state[SHA512_DIGEST_SIZE / sizeof(u64)];
+ u64 count_hi;
+ } sha512;
+ struct {
+ __le64 state[SHA3_STATE_SIZE / sizeof(u64)];
+ } sha3;
+ };
int func; /* KIMD function to use */
- int first_message_part;
+ bool first_message_part;
};
struct shash_desc;
-int s390_sha_update(struct shash_desc *desc, const u8 *data, unsigned int len);
-int s390_sha_final(struct shash_desc *desc, u8 *out);
+int s390_sha_update_blocks(struct shash_desc *desc, const u8 *data,
+ unsigned int len);
+int s390_sha_finup(struct shash_desc *desc, const u8 *src, unsigned int len,
+ u8 *out);
#endif
diff --git a/arch/s390/crypto/sha1_s390.c b/arch/s390/crypto/sha1_s390.c
deleted file mode 100644
index bc3a22704e09..000000000000
--- a/arch/s390/crypto/sha1_s390.c
+++ /dev/null
@@ -1,103 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0+
-/*
- * Cryptographic API.
- *
- * s390 implementation of the SHA1 Secure Hash Algorithm.
- *
- * Derived from cryptoapi implementation, adapted for in-place
- * scatterlist interface. Originally based on the public domain
- * implementation written by Steve Reid.
- *
- * s390 Version:
- * Copyright IBM Corp. 2003, 2007
- * Author(s): Thomas Spatzier
- * Jan Glauber (jan.glauber@de.ibm.com)
- *
- * Derived from "crypto/sha1_generic.c"
- * Copyright (c) Alan Smithee.
- * Copyright (c) Andrew McDonald <andrew@mcdonald.org.uk>
- * Copyright (c) Jean-Francois Dive <jef@linuxbe.org>
- */
-#include <crypto/internal/hash.h>
-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/cpufeature.h>
-#include <crypto/sha1.h>
-#include <asm/cpacf.h>
-
-#include "sha.h"
-
-static int s390_sha1_init(struct shash_desc *desc)
-{
- struct s390_sha_ctx *sctx = shash_desc_ctx(desc);
-
- sctx->state[0] = SHA1_H0;
- sctx->state[1] = SHA1_H1;
- sctx->state[2] = SHA1_H2;
- sctx->state[3] = SHA1_H3;
- sctx->state[4] = SHA1_H4;
- sctx->count = 0;
- sctx->func = CPACF_KIMD_SHA_1;
-
- return 0;
-}
-
-static int s390_sha1_export(struct shash_desc *desc, void *out)
-{
- struct s390_sha_ctx *sctx = shash_desc_ctx(desc);
- struct sha1_state *octx = out;
-
- octx->count = sctx->count;
- memcpy(octx->state, sctx->state, sizeof(octx->state));
- memcpy(octx->buffer, sctx->buf, sizeof(octx->buffer));
- return 0;
-}
-
-static int s390_sha1_import(struct shash_desc *desc, const void *in)
-{
- struct s390_sha_ctx *sctx = shash_desc_ctx(desc);
- const struct sha1_state *ictx = in;
-
- sctx->count = ictx->count;
- memcpy(sctx->state, ictx->state, sizeof(ictx->state));
- memcpy(sctx->buf, ictx->buffer, sizeof(ictx->buffer));
- sctx->func = CPACF_KIMD_SHA_1;
- return 0;
-}
-
-static struct shash_alg alg = {
- .digestsize = SHA1_DIGEST_SIZE,
- .init = s390_sha1_init,
- .update = s390_sha_update,
- .final = s390_sha_final,
- .export = s390_sha1_export,
- .import = s390_sha1_import,
- .descsize = sizeof(struct s390_sha_ctx),
- .statesize = sizeof(struct sha1_state),
- .base = {
- .cra_name = "sha1",
- .cra_driver_name= "sha1-s390",
- .cra_priority = 300,
- .cra_blocksize = SHA1_BLOCK_SIZE,
- .cra_module = THIS_MODULE,
- }
-};
-
-static int __init sha1_s390_init(void)
-{
- if (!cpacf_query_func(CPACF_KIMD, CPACF_KIMD_SHA_1))
- return -ENODEV;
- return crypto_register_shash(&alg);
-}
-
-static void __exit sha1_s390_fini(void)
-{
- crypto_unregister_shash(&alg);
-}
-
-module_cpu_feature_match(S390_CPU_FEATURE_MSA, sha1_s390_init);
-module_exit(sha1_s390_fini);
-
-MODULE_ALIAS_CRYPTO("sha1");
-MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION("SHA1 Secure Hash Algorithm");
diff --git a/arch/s390/crypto/sha256_s390.c b/arch/s390/crypto/sha256_s390.c
deleted file mode 100644
index 6f1ccdf93d3e..000000000000
--- a/arch/s390/crypto/sha256_s390.c
+++ /dev/null
@@ -1,143 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0+
-/*
- * Cryptographic API.
- *
- * s390 implementation of the SHA256 and SHA224 Secure Hash Algorithm.
- *
- * s390 Version:
- * Copyright IBM Corp. 2005, 2011
- * Author(s): Jan Glauber (jang@de.ibm.com)
- */
-#include <crypto/internal/hash.h>
-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/cpufeature.h>
-#include <crypto/sha2.h>
-#include <asm/cpacf.h>
-
-#include "sha.h"
-
-static int s390_sha256_init(struct shash_desc *desc)
-{
- struct s390_sha_ctx *sctx = shash_desc_ctx(desc);
-
- sctx->state[0] = SHA256_H0;
- sctx->state[1] = SHA256_H1;
- sctx->state[2] = SHA256_H2;
- sctx->state[3] = SHA256_H3;
- sctx->state[4] = SHA256_H4;
- sctx->state[5] = SHA256_H5;
- sctx->state[6] = SHA256_H6;
- sctx->state[7] = SHA256_H7;
- sctx->count = 0;
- sctx->func = CPACF_KIMD_SHA_256;
-
- return 0;
-}
-
-static int sha256_export(struct shash_desc *desc, void *out)
-{
- struct s390_sha_ctx *sctx = shash_desc_ctx(desc);
- struct sha256_state *octx = out;
-
- octx->count = sctx->count;
- memcpy(octx->state, sctx->state, sizeof(octx->state));
- memcpy(octx->buf, sctx->buf, sizeof(octx->buf));
- return 0;
-}
-
-static int sha256_import(struct shash_desc *desc, const void *in)
-{
- struct s390_sha_ctx *sctx = shash_desc_ctx(desc);
- const struct sha256_state *ictx = in;
-
- sctx->count = ictx->count;
- memcpy(sctx->state, ictx->state, sizeof(ictx->state));
- memcpy(sctx->buf, ictx->buf, sizeof(ictx->buf));
- sctx->func = CPACF_KIMD_SHA_256;
- return 0;
-}
-
-static struct shash_alg sha256_alg = {
- .digestsize = SHA256_DIGEST_SIZE,
- .init = s390_sha256_init,
- .update = s390_sha_update,
- .final = s390_sha_final,
- .export = sha256_export,
- .import = sha256_import,
- .descsize = sizeof(struct s390_sha_ctx),
- .statesize = sizeof(struct sha256_state),
- .base = {
- .cra_name = "sha256",
- .cra_driver_name= "sha256-s390",
- .cra_priority = 300,
- .cra_blocksize = SHA256_BLOCK_SIZE,
- .cra_module = THIS_MODULE,
- }
-};
-
-static int s390_sha224_init(struct shash_desc *desc)
-{
- struct s390_sha_ctx *sctx = shash_desc_ctx(desc);
-
- sctx->state[0] = SHA224_H0;
- sctx->state[1] = SHA224_H1;
- sctx->state[2] = SHA224_H2;
- sctx->state[3] = SHA224_H3;
- sctx->state[4] = SHA224_H4;
- sctx->state[5] = SHA224_H5;
- sctx->state[6] = SHA224_H6;
- sctx->state[7] = SHA224_H7;
- sctx->count = 0;
- sctx->func = CPACF_KIMD_SHA_256;
-
- return 0;
-}
-
-static struct shash_alg sha224_alg = {
- .digestsize = SHA224_DIGEST_SIZE,
- .init = s390_sha224_init,
- .update = s390_sha_update,
- .final = s390_sha_final,
- .export = sha256_export,
- .import = sha256_import,
- .descsize = sizeof(struct s390_sha_ctx),
- .statesize = sizeof(struct sha256_state),
- .base = {
- .cra_name = "sha224",
- .cra_driver_name= "sha224-s390",
- .cra_priority = 300,
- .cra_blocksize = SHA224_BLOCK_SIZE,
- .cra_module = THIS_MODULE,
- }
-};
-
-static int __init sha256_s390_init(void)
-{
- int ret;
-
- if (!cpacf_query_func(CPACF_KIMD, CPACF_KIMD_SHA_256))
- return -ENODEV;
- ret = crypto_register_shash(&sha256_alg);
- if (ret < 0)
- goto out;
- ret = crypto_register_shash(&sha224_alg);
- if (ret < 0)
- crypto_unregister_shash(&sha256_alg);
-out:
- return ret;
-}
-
-static void __exit sha256_s390_fini(void)
-{
- crypto_unregister_shash(&sha224_alg);
- crypto_unregister_shash(&sha256_alg);
-}
-
-module_cpu_feature_match(S390_CPU_FEATURE_MSA, sha256_s390_init);
-module_exit(sha256_s390_fini);
-
-MODULE_ALIAS_CRYPTO("sha256");
-MODULE_ALIAS_CRYPTO("sha224");
-MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION("SHA256 and SHA224 Secure Hash Algorithm");
diff --git a/arch/s390/crypto/sha3_256_s390.c b/arch/s390/crypto/sha3_256_s390.c
index a84ef692f572..03bb4f4bab70 100644
--- a/arch/s390/crypto/sha3_256_s390.c
+++ b/arch/s390/crypto/sha3_256_s390.c
@@ -8,12 +8,14 @@
* Copyright IBM Corp. 2019
* Author(s): Joerg Schmidbauer (jschmidb@de.ibm.com)
*/
+#include <asm/cpacf.h>
#include <crypto/internal/hash.h>
-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/cpufeature.h>
#include <crypto/sha3.h>
-#include <asm/cpacf.h>
+#include <linux/cpufeature.h>
+#include <linux/errno.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/string.h>
#include "sha.h"
@@ -21,11 +23,11 @@ static int sha3_256_init(struct shash_desc *desc)
{
struct s390_sha_ctx *sctx = shash_desc_ctx(desc);
- if (!test_facility(86)) /* msa 12 */
+ sctx->first_message_part = test_facility(86);
+ if (!sctx->first_message_part)
memset(sctx->state, 0, sizeof(sctx->state));
sctx->count = 0;
sctx->func = CPACF_KIMD_SHA3_256;
- sctx->first_message_part = 1;
return 0;
}
@@ -33,25 +35,34 @@ static int sha3_256_init(struct shash_desc *desc)
static int sha3_256_export(struct shash_desc *desc, void *out)
{
struct s390_sha_ctx *sctx = shash_desc_ctx(desc);
- struct sha3_state *octx = out;
-
- octx->rsiz = sctx->count;
- memcpy(octx->st, sctx->state, sizeof(octx->st));
- memcpy(octx->buf, sctx->buf, sizeof(octx->buf));
- octx->partial = sctx->first_message_part;
-
+ union {
+ u8 *u8;
+ u64 *u64;
+ } p = { .u8 = out };
+ int i;
+
+ if (sctx->first_message_part) {
+ memset(out, 0, SHA3_STATE_SIZE);
+ return 0;
+ }
+ for (i = 0; i < SHA3_STATE_SIZE / 8; i++)
+ put_unaligned(le64_to_cpu(sctx->sha3.state[i]), p.u64++);
return 0;
}
static int sha3_256_import(struct shash_desc *desc, const void *in)
{
struct s390_sha_ctx *sctx = shash_desc_ctx(desc);
- const struct sha3_state *ictx = in;
-
- sctx->count = ictx->rsiz;
- memcpy(sctx->state, ictx->st, sizeof(ictx->st));
- memcpy(sctx->buf, ictx->buf, sizeof(ictx->buf));
- sctx->first_message_part = ictx->partial;
+ union {
+ const u8 *u8;
+ const u64 *u64;
+ } p = { .u8 = in };
+ int i;
+
+ for (i = 0; i < SHA3_STATE_SIZE / 8; i++)
+ sctx->sha3.state[i] = cpu_to_le64(get_unaligned(p.u64++));
+ sctx->count = 0;
+ sctx->first_message_part = 0;
sctx->func = CPACF_KIMD_SHA3_256;
return 0;
@@ -60,30 +71,26 @@ static int sha3_256_import(struct shash_desc *desc, const void *in)
static int sha3_224_import(struct shash_desc *desc, const void *in)
{
struct s390_sha_ctx *sctx = shash_desc_ctx(desc);
- const struct sha3_state *ictx = in;
- sctx->count = ictx->rsiz;
- memcpy(sctx->state, ictx->st, sizeof(ictx->st));
- memcpy(sctx->buf, ictx->buf, sizeof(ictx->buf));
- sctx->first_message_part = ictx->partial;
+ sha3_256_import(desc, in);
sctx->func = CPACF_KIMD_SHA3_224;
-
return 0;
}
static struct shash_alg sha3_256_alg = {
.digestsize = SHA3_256_DIGEST_SIZE, /* = 32 */
.init = sha3_256_init,
- .update = s390_sha_update,
- .final = s390_sha_final,
+ .update = s390_sha_update_blocks,
+ .finup = s390_sha_finup,
.export = sha3_256_export,
.import = sha3_256_import,
- .descsize = sizeof(struct s390_sha_ctx),
- .statesize = sizeof(struct sha3_state),
+ .descsize = S390_SHA_CTX_SIZE,
+ .statesize = SHA3_STATE_SIZE,
.base = {
.cra_name = "sha3-256",
.cra_driver_name = "sha3-256-s390",
.cra_priority = 300,
+ .cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY,
.cra_blocksize = SHA3_256_BLOCK_SIZE,
.cra_module = THIS_MODULE,
}
@@ -93,28 +100,25 @@ static int sha3_224_init(struct shash_desc *desc)
{
struct s390_sha_ctx *sctx = shash_desc_ctx(desc);
- if (!test_facility(86)) /* msa 12 */
- memset(sctx->state, 0, sizeof(sctx->state));
- sctx->count = 0;
+ sha3_256_init(desc);
sctx->func = CPACF_KIMD_SHA3_224;
- sctx->first_message_part = 1;
-
return 0;
}
static struct shash_alg sha3_224_alg = {
.digestsize = SHA3_224_DIGEST_SIZE,
.init = sha3_224_init,
- .update = s390_sha_update,
- .final = s390_sha_final,
+ .update = s390_sha_update_blocks,
+ .finup = s390_sha_finup,
.export = sha3_256_export, /* same as for 256 */
.import = sha3_224_import, /* function code different! */
- .descsize = sizeof(struct s390_sha_ctx),
- .statesize = sizeof(struct sha3_state),
+ .descsize = S390_SHA_CTX_SIZE,
+ .statesize = SHA3_STATE_SIZE,
.base = {
.cra_name = "sha3-224",
.cra_driver_name = "sha3-224-s390",
.cra_priority = 300,
+ .cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY,
.cra_blocksize = SHA3_224_BLOCK_SIZE,
.cra_module = THIS_MODULE,
}
diff --git a/arch/s390/crypto/sha3_512_s390.c b/arch/s390/crypto/sha3_512_s390.c
index 07528fc98ff7..a5c9690eecb1 100644
--- a/arch/s390/crypto/sha3_512_s390.c
+++ b/arch/s390/crypto/sha3_512_s390.c
@@ -7,12 +7,14 @@
* Copyright IBM Corp. 2019
* Author(s): Joerg Schmidbauer (jschmidb@de.ibm.com)
*/
+#include <asm/cpacf.h>
#include <crypto/internal/hash.h>
-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/cpufeature.h>
#include <crypto/sha3.h>
-#include <asm/cpacf.h>
+#include <linux/cpufeature.h>
+#include <linux/errno.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/string.h>
#include "sha.h"
@@ -20,11 +22,11 @@ static int sha3_512_init(struct shash_desc *desc)
{
struct s390_sha_ctx *sctx = shash_desc_ctx(desc);
- if (!test_facility(86)) /* msa 12 */
+ sctx->first_message_part = test_facility(86);
+ if (!sctx->first_message_part)
memset(sctx->state, 0, sizeof(sctx->state));
sctx->count = 0;
sctx->func = CPACF_KIMD_SHA3_512;
- sctx->first_message_part = 1;
return 0;
}
@@ -32,30 +34,34 @@ static int sha3_512_init(struct shash_desc *desc)
static int sha3_512_export(struct shash_desc *desc, void *out)
{
struct s390_sha_ctx *sctx = shash_desc_ctx(desc);
- struct sha3_state *octx = out;
-
- octx->rsiz = sctx->count;
- octx->rsizw = sctx->count >> 32;
-
- memcpy(octx->st, sctx->state, sizeof(octx->st));
- memcpy(octx->buf, sctx->buf, sizeof(octx->buf));
- octx->partial = sctx->first_message_part;
-
+ union {
+ u8 *u8;
+ u64 *u64;
+ } p = { .u8 = out };
+ int i;
+
+ if (sctx->first_message_part) {
+ memset(out, 0, SHA3_STATE_SIZE);
+ return 0;
+ }
+ for (i = 0; i < SHA3_STATE_SIZE / 8; i++)
+ put_unaligned(le64_to_cpu(sctx->sha3.state[i]), p.u64++);
return 0;
}
static int sha3_512_import(struct shash_desc *desc, const void *in)
{
struct s390_sha_ctx *sctx = shash_desc_ctx(desc);
- const struct sha3_state *ictx = in;
-
- if (unlikely(ictx->rsizw))
- return -ERANGE;
- sctx->count = ictx->rsiz;
-
- memcpy(sctx->state, ictx->st, sizeof(ictx->st));
- memcpy(sctx->buf, ictx->buf, sizeof(ictx->buf));
- sctx->first_message_part = ictx->partial;
+ union {
+ const u8 *u8;
+ const u64 *u64;
+ } p = { .u8 = in };
+ int i;
+
+ for (i = 0; i < SHA3_STATE_SIZE / 8; i++)
+ sctx->sha3.state[i] = cpu_to_le64(get_unaligned(p.u64++));
+ sctx->count = 0;
+ sctx->first_message_part = 0;
sctx->func = CPACF_KIMD_SHA3_512;
return 0;
@@ -64,33 +70,26 @@ static int sha3_512_import(struct shash_desc *desc, const void *in)
static int sha3_384_import(struct shash_desc *desc, const void *in)
{
struct s390_sha_ctx *sctx = shash_desc_ctx(desc);
- const struct sha3_state *ictx = in;
-
- if (unlikely(ictx->rsizw))
- return -ERANGE;
- sctx->count = ictx->rsiz;
- memcpy(sctx->state, ictx->st, sizeof(ictx->st));
- memcpy(sctx->buf, ictx->buf, sizeof(ictx->buf));
- sctx->first_message_part = ictx->partial;
+ sha3_512_import(desc, in);
sctx->func = CPACF_KIMD_SHA3_384;
-
return 0;
}
static struct shash_alg sha3_512_alg = {
.digestsize = SHA3_512_DIGEST_SIZE,
.init = sha3_512_init,
- .update = s390_sha_update,
- .final = s390_sha_final,
+ .update = s390_sha_update_blocks,
+ .finup = s390_sha_finup,
.export = sha3_512_export,
.import = sha3_512_import,
- .descsize = sizeof(struct s390_sha_ctx),
- .statesize = sizeof(struct sha3_state),
+ .descsize = S390_SHA_CTX_SIZE,
+ .statesize = SHA3_STATE_SIZE,
.base = {
.cra_name = "sha3-512",
.cra_driver_name = "sha3-512-s390",
.cra_priority = 300,
+ .cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY,
.cra_blocksize = SHA3_512_BLOCK_SIZE,
.cra_module = THIS_MODULE,
}
@@ -102,28 +101,25 @@ static int sha3_384_init(struct shash_desc *desc)
{
struct s390_sha_ctx *sctx = shash_desc_ctx(desc);
- if (!test_facility(86)) /* msa 12 */
- memset(sctx->state, 0, sizeof(sctx->state));
- sctx->count = 0;
+ sha3_512_init(desc);
sctx->func = CPACF_KIMD_SHA3_384;
- sctx->first_message_part = 1;
-
return 0;
}
static struct shash_alg sha3_384_alg = {
.digestsize = SHA3_384_DIGEST_SIZE,
.init = sha3_384_init,
- .update = s390_sha_update,
- .final = s390_sha_final,
+ .update = s390_sha_update_blocks,
+ .finup = s390_sha_finup,
.export = sha3_512_export, /* same as for 512 */
.import = sha3_384_import, /* function code different! */
- .descsize = sizeof(struct s390_sha_ctx),
- .statesize = sizeof(struct sha3_state),
+ .descsize = S390_SHA_CTX_SIZE,
+ .statesize = SHA3_STATE_SIZE,
.base = {
.cra_name = "sha3-384",
.cra_driver_name = "sha3-384-s390",
.cra_priority = 300,
+ .cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY,
.cra_blocksize = SHA3_384_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct s390_sha_ctx),
.cra_module = THIS_MODULE,
diff --git a/arch/s390/crypto/sha512_s390.c b/arch/s390/crypto/sha512_s390.c
deleted file mode 100644
index 04f11c407763..000000000000
--- a/arch/s390/crypto/sha512_s390.c
+++ /dev/null
@@ -1,149 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0+
-/*
- * Cryptographic API.
- *
- * s390 implementation of the SHA512 and SHA38 Secure Hash Algorithm.
- *
- * Copyright IBM Corp. 2007
- * Author(s): Jan Glauber (jang@de.ibm.com)
- */
-#include <crypto/internal/hash.h>
-#include <crypto/sha2.h>
-#include <linux/errno.h>
-#include <linux/init.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/cpufeature.h>
-#include <asm/cpacf.h>
-
-#include "sha.h"
-
-static int sha512_init(struct shash_desc *desc)
-{
- struct s390_sha_ctx *ctx = shash_desc_ctx(desc);
-
- *(__u64 *)&ctx->state[0] = SHA512_H0;
- *(__u64 *)&ctx->state[2] = SHA512_H1;
- *(__u64 *)&ctx->state[4] = SHA512_H2;
- *(__u64 *)&ctx->state[6] = SHA512_H3;
- *(__u64 *)&ctx->state[8] = SHA512_H4;
- *(__u64 *)&ctx->state[10] = SHA512_H5;
- *(__u64 *)&ctx->state[12] = SHA512_H6;
- *(__u64 *)&ctx->state[14] = SHA512_H7;
- ctx->count = 0;
- ctx->func = CPACF_KIMD_SHA_512;
-
- return 0;
-}
-
-static int sha512_export(struct shash_desc *desc, void *out)
-{
- struct s390_sha_ctx *sctx = shash_desc_ctx(desc);
- struct sha512_state *octx = out;
-
- octx->count[0] = sctx->count;
- octx->count[1] = 0;
- memcpy(octx->state, sctx->state, sizeof(octx->state));
- memcpy(octx->buf, sctx->buf, sizeof(octx->buf));
- return 0;
-}
-
-static int sha512_import(struct shash_desc *desc, const void *in)
-{
- struct s390_sha_ctx *sctx = shash_desc_ctx(desc);
- const struct sha512_state *ictx = in;
-
- if (unlikely(ictx->count[1]))
- return -ERANGE;
- sctx->count = ictx->count[0];
-
- memcpy(sctx->state, ictx->state, sizeof(ictx->state));
- memcpy(sctx->buf, ictx->buf, sizeof(ictx->buf));
- sctx->func = CPACF_KIMD_SHA_512;
- return 0;
-}
-
-static struct shash_alg sha512_alg = {
- .digestsize = SHA512_DIGEST_SIZE,
- .init = sha512_init,
- .update = s390_sha_update,
- .final = s390_sha_final,
- .export = sha512_export,
- .import = sha512_import,
- .descsize = sizeof(struct s390_sha_ctx),
- .statesize = sizeof(struct sha512_state),
- .base = {
- .cra_name = "sha512",
- .cra_driver_name= "sha512-s390",
- .cra_priority = 300,
- .cra_blocksize = SHA512_BLOCK_SIZE,
- .cra_module = THIS_MODULE,
- }
-};
-
-MODULE_ALIAS_CRYPTO("sha512");
-
-static int sha384_init(struct shash_desc *desc)
-{
- struct s390_sha_ctx *ctx = shash_desc_ctx(desc);
-
- *(__u64 *)&ctx->state[0] = SHA384_H0;
- *(__u64 *)&ctx->state[2] = SHA384_H1;
- *(__u64 *)&ctx->state[4] = SHA384_H2;
- *(__u64 *)&ctx->state[6] = SHA384_H3;
- *(__u64 *)&ctx->state[8] = SHA384_H4;
- *(__u64 *)&ctx->state[10] = SHA384_H5;
- *(__u64 *)&ctx->state[12] = SHA384_H6;
- *(__u64 *)&ctx->state[14] = SHA384_H7;
- ctx->count = 0;
- ctx->func = CPACF_KIMD_SHA_512;
-
- return 0;
-}
-
-static struct shash_alg sha384_alg = {
- .digestsize = SHA384_DIGEST_SIZE,
- .init = sha384_init,
- .update = s390_sha_update,
- .final = s390_sha_final,
- .export = sha512_export,
- .import = sha512_import,
- .descsize = sizeof(struct s390_sha_ctx),
- .statesize = sizeof(struct sha512_state),
- .base = {
- .cra_name = "sha384",
- .cra_driver_name= "sha384-s390",
- .cra_priority = 300,
- .cra_blocksize = SHA384_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct s390_sha_ctx),
- .cra_module = THIS_MODULE,
- }
-};
-
-MODULE_ALIAS_CRYPTO("sha384");
-
-static int __init init(void)
-{
- int ret;
-
- if (!cpacf_query_func(CPACF_KIMD, CPACF_KIMD_SHA_512))
- return -ENODEV;
- if ((ret = crypto_register_shash(&sha512_alg)) < 0)
- goto out;
- if ((ret = crypto_register_shash(&sha384_alg)) < 0)
- crypto_unregister_shash(&sha512_alg);
-out:
- return ret;
-}
-
-static void __exit fini(void)
-{
- crypto_unregister_shash(&sha512_alg);
- crypto_unregister_shash(&sha384_alg);
-}
-
-module_cpu_feature_match(S390_CPU_FEATURE_MSA, init);
-module_exit(fini);
-
-MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION("SHA512 and SHA-384 Secure Hash Algorithm");
diff --git a/arch/s390/crypto/sha_common.c b/arch/s390/crypto/sha_common.c
index 961d7d522af1..d6f839618794 100644
--- a/arch/s390/crypto/sha_common.c
+++ b/arch/s390/crypto/sha_common.c
@@ -9,54 +9,38 @@
*/
#include <crypto/internal/hash.h>
+#include <linux/export.h>
#include <linux/module.h>
#include <asm/cpacf.h>
#include "sha.h"
-int s390_sha_update(struct shash_desc *desc, const u8 *data, unsigned int len)
+int s390_sha_update_blocks(struct shash_desc *desc, const u8 *data,
+ unsigned int len)
{
- struct s390_sha_ctx *ctx = shash_desc_ctx(desc);
unsigned int bsize = crypto_shash_blocksize(desc->tfm);
- unsigned int index, n;
+ struct s390_sha_ctx *ctx = shash_desc_ctx(desc);
+ unsigned int n;
int fc;
- /* how much is already in the buffer? */
- index = ctx->count % bsize;
- ctx->count += len;
-
- if ((index + len) < bsize)
- goto store;
-
fc = ctx->func;
if (ctx->first_message_part)
- fc |= test_facility(86) ? CPACF_KIMD_NIP : 0;
-
- /* process one stored block */
- if (index) {
- memcpy(ctx->buf + index, data, bsize - index);
- cpacf_kimd(fc, ctx->state, ctx->buf, bsize);
- ctx->first_message_part = 0;
- fc &= ~CPACF_KIMD_NIP;
- data += bsize - index;
- len -= bsize - index;
- index = 0;
- }
+ fc |= CPACF_KIMD_NIP;
/* process as many blocks as possible */
- if (len >= bsize) {
- n = (len / bsize) * bsize;
- cpacf_kimd(fc, ctx->state, data, n);
- ctx->first_message_part = 0;
- data += n;
- len -= n;
+ n = (len / bsize) * bsize;
+ ctx->count += n;
+ switch (ctx->func) {
+ case CPACF_KLMD_SHA_512:
+ case CPACF_KLMD_SHA3_384:
+ if (ctx->count < n)
+ ctx->sha512.count_hi++;
+ break;
}
-store:
- if (len)
- memcpy(ctx->buf + index , data, len);
-
- return 0;
+ cpacf_kimd(fc, ctx->state, data, n);
+ ctx->first_message_part = 0;
+ return len - n;
}
-EXPORT_SYMBOL_GPL(s390_sha_update);
+EXPORT_SYMBOL_GPL(s390_sha_update_blocks);
static int s390_crypto_shash_parmsize(int func)
{
@@ -77,15 +61,15 @@ static int s390_crypto_shash_parmsize(int func)
}
}
-int s390_sha_final(struct shash_desc *desc, u8 *out)
+int s390_sha_finup(struct shash_desc *desc, const u8 *src, unsigned int len,
+ u8 *out)
{
struct s390_sha_ctx *ctx = shash_desc_ctx(desc);
- unsigned int bsize = crypto_shash_blocksize(desc->tfm);
- u64 bits;
- unsigned int n;
int mbl_offset, fc;
+ u64 bits;
+
+ ctx->count += len;
- n = ctx->count % bsize;
bits = ctx->count * 8;
mbl_offset = s390_crypto_shash_parmsize(ctx->func);
if (mbl_offset < 0)
@@ -95,17 +79,16 @@ int s390_sha_final(struct shash_desc *desc, u8 *out)
/* set total msg bit length (mbl) in CPACF parmblock */
switch (ctx->func) {
- case CPACF_KLMD_SHA_1:
- case CPACF_KLMD_SHA_256:
- memcpy(ctx->state + mbl_offset, &bits, sizeof(bits));
- break;
case CPACF_KLMD_SHA_512:
- /*
- * the SHA512 parmblock has a 128-bit mbl field, clear
- * high-order u64 field, copy bits to low-order u64 field
- */
- memset(ctx->state + mbl_offset, 0x00, sizeof(bits));
+ /* The SHA512 parmblock has a 128-bit mbl field. */
+ if (ctx->count < len)
+ ctx->sha512.count_hi++;
+ ctx->sha512.count_hi <<= 3;
+ ctx->sha512.count_hi |= ctx->count >> 61;
mbl_offset += sizeof(u64) / sizeof(u32);
+ fallthrough;
+ case CPACF_KLMD_SHA_1:
+ case CPACF_KLMD_SHA_256:
memcpy(ctx->state + mbl_offset, &bits, sizeof(bits));
break;
case CPACF_KLMD_SHA3_224:
@@ -121,16 +104,14 @@ int s390_sha_final(struct shash_desc *desc, u8 *out)
fc |= test_facility(86) ? CPACF_KLMD_DUFOP : 0;
if (ctx->first_message_part)
fc |= CPACF_KLMD_NIP;
- cpacf_klmd(fc, ctx->state, ctx->buf, n);
+ cpacf_klmd(fc, ctx->state, src, len);
/* copy digest to out */
memcpy(out, ctx->state, crypto_shash_digestsize(desc->tfm));
- /* wipe context */
- memset(ctx, 0, sizeof *ctx);
return 0;
}
-EXPORT_SYMBOL_GPL(s390_sha_final);
+EXPORT_SYMBOL_GPL(s390_sha_finup);
MODULE_LICENSE("GPL");
MODULE_DESCRIPTION("s390 SHA cipher common functions");
diff --git a/arch/s390/hypfs/hypfs.h b/arch/s390/hypfs/hypfs.h
index 83ebf54cca6b..4dc2e068e0ff 100644
--- a/arch/s390/hypfs/hypfs.h
+++ b/arch/s390/hypfs/hypfs.h
@@ -48,7 +48,7 @@ void hypfs_sprp_exit(void);
int __hypfs_fs_init(void);
-static inline int hypfs_fs_init(void)
+static __always_inline int hypfs_fs_init(void)
{
if (IS_ENABLED(CONFIG_S390_HYPFS_FS))
return __hypfs_fs_init();
diff --git a/arch/s390/hypfs/hypfs_dbfs.c b/arch/s390/hypfs/hypfs_dbfs.c
index 5d9effb0867c..41a0d2066fa0 100644
--- a/arch/s390/hypfs/hypfs_dbfs.c
+++ b/arch/s390/hypfs/hypfs_dbfs.c
@@ -6,6 +6,7 @@
* Author(s): Michael Holzheu <holzheu@linux.vnet.ibm.com>
*/
+#include <linux/security.h>
#include <linux/slab.h>
#include "hypfs.h"
@@ -66,23 +67,27 @@ static long dbfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
long rc;
mutex_lock(&df->lock);
- if (df->unlocked_ioctl)
- rc = df->unlocked_ioctl(file, cmd, arg);
- else
- rc = -ENOTTY;
+ rc = df->unlocked_ioctl(file, cmd, arg);
mutex_unlock(&df->lock);
return rc;
}
-static const struct file_operations dbfs_ops = {
+static const struct file_operations dbfs_ops_ioctl = {
.read = dbfs_read,
.unlocked_ioctl = dbfs_ioctl,
};
+static const struct file_operations dbfs_ops = {
+ .read = dbfs_read,
+};
+
void hypfs_dbfs_create_file(struct hypfs_dbfs_file *df)
{
- df->dentry = debugfs_create_file(df->name, 0400, dbfs_dir, df,
- &dbfs_ops);
+ const struct file_operations *fops = &dbfs_ops;
+
+ if (df->unlocked_ioctl && !security_locked_down(LOCKDOWN_DEBUGFS))
+ fops = &dbfs_ops_ioctl;
+ df->dentry = debugfs_create_file(df->name, 0400, dbfs_dir, df, fops);
mutex_init(&df->lock);
}
diff --git a/arch/s390/hypfs/hypfs_diag.h b/arch/s390/hypfs/hypfs_diag.h
index 7090eff27fef..b5218135b8fe 100644
--- a/arch/s390/hypfs/hypfs_diag.h
+++ b/arch/s390/hypfs/hypfs_diag.h
@@ -19,7 +19,7 @@ int diag204_store(void *buf, int pages);
int __hypfs_diag_fs_init(void);
void __hypfs_diag_fs_exit(void);
-static inline int hypfs_diag_fs_init(void)
+static __always_inline int hypfs_diag_fs_init(void)
{
if (IS_ENABLED(CONFIG_S390_HYPFS_FS))
return __hypfs_diag_fs_init();
diff --git a/arch/s390/hypfs/hypfs_diag0c.c b/arch/s390/hypfs/hypfs_diag0c.c
index 4131f0daa5ea..61220e717af0 100644
--- a/arch/s390/hypfs/hypfs_diag0c.c
+++ b/arch/s390/hypfs/hypfs_diag0c.c
@@ -9,6 +9,7 @@
#include <linux/slab.h>
#include <linux/cpu.h>
+#include <asm/machine.h>
#include <asm/diag.h>
#include <asm/hypfs.h>
#include "hypfs.h"
@@ -107,7 +108,7 @@ static struct hypfs_dbfs_file dbfs_file_0c = {
*/
int __init hypfs_diag0c_init(void)
{
- if (!MACHINE_IS_VM)
+ if (!machine_is_vm())
return 0;
hypfs_dbfs_create_file(&dbfs_file_0c);
return 0;
@@ -118,7 +119,7 @@ int __init hypfs_diag0c_init(void)
*/
void hypfs_diag0c_exit(void)
{
- if (!MACHINE_IS_VM)
+ if (!machine_is_vm())
return;
hypfs_dbfs_remove_file(&dbfs_file_0c);
}
diff --git a/arch/s390/hypfs/hypfs_diag_fs.c b/arch/s390/hypfs/hypfs_diag_fs.c
index 00a6d370a280..ede951dc0085 100644
--- a/arch/s390/hypfs/hypfs_diag_fs.c
+++ b/arch/s390/hypfs/hypfs_diag_fs.c
@@ -16,6 +16,7 @@
#include <linux/string.h>
#include <linux/vmalloc.h>
#include <linux/mm.h>
+#include <asm/machine.h>
#include <asm/diag.h>
#include <asm/ebcdic.h>
#include "hypfs_diag.h"
@@ -208,6 +209,8 @@ static int hypfs_create_cpu_files(struct dentry *cpus_dir, void *cpu_info)
snprintf(buffer, TMP_SIZE, "%d", cpu_info__cpu_addr(diag204_get_info_type(),
cpu_info));
cpu_dir = hypfs_mkdir(cpus_dir, buffer);
+ if (IS_ERR(cpu_dir))
+ return PTR_ERR(cpu_dir);
rc = hypfs_create_u64(cpu_dir, "mgmtime",
cpu_info__acc_time(diag204_get_info_type(), cpu_info) -
cpu_info__lp_time(diag204_get_info_type(), cpu_info));
@@ -382,7 +385,7 @@ static void diag224_delete_name_table(void)
int __init __hypfs_diag_fs_init(void)
{
- if (MACHINE_IS_LPAR)
+ if (machine_is_lpar())
return diag224_get_name_table();
return 0;
}
diff --git a/arch/s390/hypfs/hypfs_vm.c b/arch/s390/hypfs/hypfs_vm.c
index 3db40ad853e0..4db2895e4da3 100644
--- a/arch/s390/hypfs/hypfs_vm.c
+++ b/arch/s390/hypfs/hypfs_vm.c
@@ -11,6 +11,7 @@
#include <linux/string.h>
#include <linux/vmalloc.h>
#include <asm/extable.h>
+#include <asm/machine.h>
#include <asm/diag.h>
#include <asm/ebcdic.h>
#include <asm/timex.h>
@@ -121,7 +122,7 @@ static struct hypfs_dbfs_file dbfs_file_2fc = {
int hypfs_vm_init(void)
{
- if (!MACHINE_IS_VM)
+ if (!machine_is_vm())
return 0;
if (diag2fc(0, all_guests, NULL) > 0)
diag2fc_guest_query = all_guests;
@@ -135,7 +136,7 @@ int hypfs_vm_init(void)
void hypfs_vm_exit(void)
{
- if (!MACHINE_IS_VM)
+ if (!machine_is_vm())
return;
hypfs_dbfs_remove_file(&dbfs_file_2fc);
}
diff --git a/arch/s390/hypfs/inode.c b/arch/s390/hypfs/inode.c
index d428635abf08..96409573c75d 100644
--- a/arch/s390/hypfs/inode.c
+++ b/arch/s390/hypfs/inode.c
@@ -24,6 +24,7 @@
#include <linux/kobject.h>
#include <linux/seq_file.h>
#include <linux/uio.h>
+#include <asm/machine.h>
#include <asm/ebcdic.h>
#include "hypfs.h"
@@ -184,7 +185,7 @@ static ssize_t hypfs_write_iter(struct kiocb *iocb, struct iov_iter *from)
goto out;
}
hypfs_delete_tree(sb->s_root);
- if (MACHINE_IS_VM)
+ if (machine_is_vm())
rc = hypfs_vm_create_files(sb->s_root);
else
rc = hypfs_diag_create_files(sb->s_root);
@@ -273,7 +274,7 @@ static int hypfs_fill_super(struct super_block *sb, struct fs_context *fc)
sb->s_root = root_dentry = d_make_root(root_inode);
if (!root_dentry)
return -ENOMEM;
- if (MACHINE_IS_VM)
+ if (machine_is_vm())
rc = hypfs_vm_create_files(root_dentry);
else
rc = hypfs_diag_create_files(root_dentry);
@@ -341,7 +342,7 @@ static struct dentry *hypfs_create_file(struct dentry *parent, const char *name,
struct inode *inode;
inode_lock(d_inode(parent));
- dentry = lookup_one_len(name, parent, strlen(name));
+ dentry = lookup_noperm(&QSTR(name), parent);
if (IS_ERR(dentry)) {
dentry = ERR_PTR(-ENOMEM);
goto fail;
diff --git a/arch/s390/include/asm/abs_lowcore.h b/arch/s390/include/asm/abs_lowcore.h
index 004d17ea05cf..317c07c09ae4 100644
--- a/arch/s390/include/asm/abs_lowcore.h
+++ b/arch/s390/include/asm/abs_lowcore.h
@@ -25,11 +25,4 @@ static inline void put_abs_lowcore(struct lowcore *lc)
put_cpu();
}
-extern int relocate_lowcore;
-
-static inline int have_relocated_lowcore(void)
-{
- return relocate_lowcore;
-}
-
#endif /* _ASM_S390_ABS_LOWCORE_H */
diff --git a/arch/s390/include/asm/alternative.h b/arch/s390/include/asm/alternative.h
index 73e781b56bfe..1c56480def9e 100644
--- a/arch/s390/include/asm/alternative.h
+++ b/arch/s390/include/asm/alternative.h
@@ -32,8 +32,8 @@
#define ALT_CTX_ALL (ALT_CTX_EARLY | ALT_CTX_LATE)
#define ALT_TYPE_FACILITY 0
-#define ALT_TYPE_SPEC 1
-#define ALT_TYPE_LOWCORE 2
+#define ALT_TYPE_FEATURE 1
+#define ALT_TYPE_SPEC 2
#define ALT_DATA_SHIFT 0
#define ALT_TYPE_SHIFT 20
@@ -43,14 +43,15 @@
ALT_TYPE_FACILITY << ALT_TYPE_SHIFT | \
(facility) << ALT_DATA_SHIFT)
+#define ALT_FEATURE(feature) (ALT_CTX_EARLY << ALT_CTX_SHIFT | \
+ ALT_TYPE_FEATURE << ALT_TYPE_SHIFT | \
+ (feature) << ALT_DATA_SHIFT)
+
#define ALT_SPEC(facility) (ALT_CTX_LATE << ALT_CTX_SHIFT | \
ALT_TYPE_SPEC << ALT_TYPE_SHIFT | \
(facility) << ALT_DATA_SHIFT)
-#define ALT_LOWCORE (ALT_CTX_EARLY << ALT_CTX_SHIFT | \
- ALT_TYPE_LOWCORE << ALT_TYPE_SHIFT)
-
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <linux/types.h>
#include <linux/stddef.h>
@@ -182,7 +183,7 @@ static inline void apply_alternatives(struct alt_instr *start, struct alt_instr
/* Use this macro if clobbers are needed without inputs. */
#define ASM_NO_INPUT_CLOBBER(clobber...) : clobber
-#else /* __ASSEMBLY__ */
+#else /* __ASSEMBLER__ */
/*
* Issue one struct alt_instr descriptor entry (need to put it into
@@ -232,6 +233,6 @@ static inline void apply_alternatives(struct alt_instr *start, struct alt_instr
.popsection
.endm
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif /* _ASM_S390_ALTERNATIVE_H */
diff --git a/arch/s390/include/asm/ap.h b/arch/s390/include/asm/ap.h
index 395b02d6a133..352108727d7e 100644
--- a/arch/s390/include/asm/ap.h
+++ b/arch/s390/include/asm/ap.h
@@ -103,7 +103,7 @@ struct ap_tapq_hwinfo {
unsigned int accel : 1; /* A */
unsigned int ep11 : 1; /* X */
unsigned int apxa : 1; /* APXA */
- unsigned int : 1;
+ unsigned int slcf : 1; /* Cmd filtering avail. */
unsigned int class : 8;
unsigned int bs : 2; /* SE bind/assoc */
unsigned int : 14;
diff --git a/arch/s390/include/asm/appldata.h b/arch/s390/include/asm/appldata.h
index a92ebbc7aa7a..99b2902c10fd 100644
--- a/arch/s390/include/asm/appldata.h
+++ b/arch/s390/include/asm/appldata.h
@@ -9,6 +9,7 @@
#define _ASM_S390_APPLDATA_H
#include <linux/io.h>
+#include <asm/machine.h>
#include <asm/diag.h>
#define APPLDATA_START_INTERVAL_REC 0x80
@@ -48,7 +49,7 @@ static inline int appldata_asm(struct appldata_parameter_list *parm_list,
{
int ry;
- if (!MACHINE_IS_VM)
+ if (!machine_is_vm())
return -EOPNOTSUPP;
parm_list->diag = 0xdc;
parm_list->function = fn;
diff --git a/arch/s390/include/asm/asce.h b/arch/s390/include/asm/asce.h
new file mode 100644
index 000000000000..f6dfaaba735a
--- /dev/null
+++ b/arch/s390/include/asm/asce.h
@@ -0,0 +1,36 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef _ASM_S390_ASCE_H
+#define _ASM_S390_ASCE_H
+
+#include <linux/thread_info.h>
+#include <linux/irqflags.h>
+#include <asm/lowcore.h>
+#include <asm/ctlreg.h>
+
+static inline bool enable_sacf_uaccess(void)
+{
+ unsigned long flags;
+
+ if (test_thread_flag(TIF_ASCE_PRIMARY))
+ return true;
+ local_irq_save(flags);
+ local_ctl_load(1, &get_lowcore()->kernel_asce);
+ set_thread_flag(TIF_ASCE_PRIMARY);
+ local_irq_restore(flags);
+ return false;
+}
+
+static inline void disable_sacf_uaccess(bool previous)
+{
+ unsigned long flags;
+
+ if (previous)
+ return;
+ local_irq_save(flags);
+ local_ctl_load(1, &get_lowcore()->user_asce);
+ clear_thread_flag(TIF_ASCE_PRIMARY);
+ local_irq_restore(flags);
+}
+
+#endif /* _ASM_S390_ASCE_H */
diff --git a/arch/s390/include/asm/asm-const.h b/arch/s390/include/asm/asm-const.h
index 11f615eb0066..1cfffad9eea0 100644
--- a/arch/s390/include/asm/asm-const.h
+++ b/arch/s390/include/asm/asm-const.h
@@ -2,7 +2,7 @@
#ifndef _ASM_S390_ASM_CONST_H
#define _ASM_S390_ASM_CONST_H
-#ifdef __ASSEMBLY__
+#ifdef __ASSEMBLER__
# define stringify_in_c(...) __VA_ARGS__
#else
/* This version of stringify will deal with commas... */
diff --git a/arch/s390/include/asm/asm-extable.h b/arch/s390/include/asm/asm-extable.h
index 4a6b0a8b6412..d23ea0c94e4e 100644
--- a/arch/s390/include/asm/asm-extable.h
+++ b/arch/s390/include/asm/asm-extable.h
@@ -9,11 +9,13 @@
#define EX_TYPE_NONE 0
#define EX_TYPE_FIXUP 1
#define EX_TYPE_BPF 2
-#define EX_TYPE_UA_STORE 3
-#define EX_TYPE_UA_LOAD_MEM 4
+#define EX_TYPE_UA_FAULT 3
#define EX_TYPE_UA_LOAD_REG 5
#define EX_TYPE_UA_LOAD_REGPAIR 6
#define EX_TYPE_ZEROPAD 7
+#define EX_TYPE_FPC 8
+#define EX_TYPE_UA_MVCOS_TO 9
+#define EX_TYPE_UA_MVCOS_FROM 10
#define EX_DATA_REG_ERR_SHIFT 0
#define EX_DATA_REG_ERR GENMASK(3, 0)
@@ -69,11 +71,8 @@
#define EX_TABLE_AMODE31(_fault, _target) \
__EX_TABLE(.amode31.ex_table, _fault, _target, EX_TYPE_FIXUP, __stringify(%%r0), __stringify(%%r0), 0)
-#define EX_TABLE_UA_STORE(_fault, _target, _regerr) \
- __EX_TABLE(__ex_table, _fault, _target, EX_TYPE_UA_STORE, _regerr, _regerr, 0)
-
-#define EX_TABLE_UA_LOAD_MEM(_fault, _target, _regerr, _regmem, _len) \
- __EX_TABLE(__ex_table, _fault, _target, EX_TYPE_UA_LOAD_MEM, _regerr, _regmem, _len)
+#define EX_TABLE_UA_FAULT(_fault, _target, _regerr) \
+ __EX_TABLE(__ex_table, _fault, _target, EX_TYPE_UA_FAULT, _regerr, _regerr, 0)
#define EX_TABLE_UA_LOAD_REG(_fault, _target, _regerr, _regzero) \
__EX_TABLE(__ex_table, _fault, _target, EX_TYPE_UA_LOAD_REG, _regerr, _regzero, 0)
@@ -84,4 +83,13 @@
#define EX_TABLE_ZEROPAD(_fault, _target, _regdata, _regaddr) \
__EX_TABLE(__ex_table, _fault, _target, EX_TYPE_ZEROPAD, _regdata, _regaddr, 0)
+#define EX_TABLE_FPC(_fault, _target) \
+ __EX_TABLE(__ex_table, _fault, _target, EX_TYPE_FPC, __stringify(%%r0), __stringify(%%r0), 0)
+
+#define EX_TABLE_UA_MVCOS_TO(_fault, _target) \
+ __EX_TABLE(__ex_table, _fault, _target, EX_TYPE_UA_MVCOS_TO, __stringify(%%r0), __stringify(%%r0), 0)
+
+#define EX_TABLE_UA_MVCOS_FROM(_fault, _target) \
+ __EX_TABLE(__ex_table, _fault, _target, EX_TYPE_UA_MVCOS_FROM, __stringify(%%r0), __stringify(%%r0), 0)
+
#endif /* __ASM_EXTABLE_H */
diff --git a/arch/s390/include/asm/asm.h b/arch/s390/include/asm/asm.h
index ec011b94af2a..e9062b01e2a2 100644
--- a/arch/s390/include/asm/asm.h
+++ b/arch/s390/include/asm/asm.h
@@ -28,7 +28,7 @@
* [var] also contains the program mask. CC_TRANSFORM() moves the condition
* code to the two least significant bits and sets all other bits to zero.
*/
-#if defined(__GCC_ASM_FLAG_OUTPUTS__) && !(IS_ENABLED(CONFIG_GCC_ASM_FLAG_OUTPUT_BROKEN))
+#if defined(__GCC_ASM_FLAG_OUTPUTS__) && !(IS_ENABLED(CONFIG_CC_ASM_FLAG_OUTPUT_BROKEN))
#define __HAVE_ASM_FLAG_OUTPUTS__
diff --git a/arch/s390/include/asm/atomic_ops.h b/arch/s390/include/asm/atomic_ops.h
index 585678bbcd7a..21c26d842832 100644
--- a/arch/s390/include/asm/atomic_ops.h
+++ b/arch/s390/include/asm/atomic_ops.h
@@ -163,10 +163,10 @@ __ATOMIC64_OPS(__atomic64_xor, "xgr")
#undef __ATOMIC64_OPS
-#define __atomic_add_const(val, ptr) __atomic_add(val, ptr)
-#define __atomic_add_const_barrier(val, ptr) __atomic_add(val, ptr)
-#define __atomic64_add_const(val, ptr) __atomic64_add(val, ptr)
-#define __atomic64_add_const_barrier(val, ptr) __atomic64_add(val, ptr)
+#define __atomic_add_const(val, ptr) ((void)__atomic_add(val, ptr))
+#define __atomic_add_const_barrier(val, ptr) ((void)__atomic_add(val, ptr))
+#define __atomic64_add_const(val, ptr) ((void)__atomic64_add(val, ptr))
+#define __atomic64_add_const_barrier(val, ptr) ((void)__atomic64_add(val, ptr))
#endif /* MARCH_HAS_Z196_FEATURES */
diff --git a/arch/s390/include/asm/bitops.h b/arch/s390/include/asm/bitops.h
index 15aa64e3020e..a5ca0a947691 100644
--- a/arch/s390/include/asm/bitops.h
+++ b/arch/s390/include/asm/bitops.h
@@ -53,14 +53,18 @@ static __always_inline bool arch_test_bit(unsigned long nr, const volatile unsig
unsigned long mask;
int cc;
- if (__builtin_constant_p(nr)) {
+ /*
+ * With CONFIG_PROFILE_ALL_BRANCHES enabled gcc fails to
+ * handle __builtin_constant_p() in some cases.
+ */
+ if (!IS_ENABLED(CONFIG_PROFILE_ALL_BRANCHES) && __builtin_constant_p(nr)) {
addr = (const volatile unsigned char *)ptr;
addr += (nr ^ (BITS_PER_LONG - BITS_PER_BYTE)) / BITS_PER_BYTE;
mask = 1UL << (nr & (BITS_PER_BYTE - 1));
asm volatile(
" tm %[addr],%[mask]\n"
: "=@cc" (cc)
- : [addr] "R" (*addr), [mask] "I" (mask)
+ : [addr] "Q" (*addr), [mask] "I" (mask)
);
return cc == 3;
}
diff --git a/arch/s390/include/asm/boot_data.h b/arch/s390/include/asm/boot_data.h
index f7eed27b3220..f55f8227058e 100644
--- a/arch/s390/include/asm/boot_data.h
+++ b/arch/s390/include/asm/boot_data.h
@@ -1,6 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _ASM_S390_BOOT_DATA_H
+#include <linux/string.h>
#include <asm/setup.h>
#include <asm/ipl.h>
@@ -15,4 +16,54 @@ extern unsigned long ipl_cert_list_size;
extern unsigned long early_ipl_comp_list_addr;
extern unsigned long early_ipl_comp_list_size;
+extern char boot_rb[PAGE_SIZE * 2];
+extern bool boot_earlyprintk;
+extern size_t boot_rb_off;
+extern char bootdebug_filter[128];
+extern bool bootdebug;
+
+#define boot_rb_foreach(cb) \
+ do { \
+ size_t off = boot_rb_off + strlen(boot_rb + boot_rb_off) + 1; \
+ size_t len; \
+ for (; off < sizeof(boot_rb) && (len = strlen(boot_rb + off)); off += len + 1) \
+ cb(boot_rb + off); \
+ for (off = 0; off < boot_rb_off && (len = strlen(boot_rb + off)); off += len + 1) \
+ cb(boot_rb + off); \
+ } while (0)
+
+/*
+ * bootdebug_filter is a comma separated list of strings,
+ * where each string can be a prefix of the message.
+ */
+static inline bool bootdebug_filter_match(const char *buf)
+{
+ char *p = bootdebug_filter, *s;
+ char *end;
+
+ if (!*p)
+ return true;
+
+ end = p + strlen(p);
+ while (p < end) {
+ p = skip_spaces(p);
+ s = memscan(p, ',', end - p);
+ if (!strncmp(p, buf, s - p))
+ return true;
+ p = s + 1;
+ }
+ return false;
+}
+
+static inline const char *skip_timestamp(const char *buf)
+{
+#ifdef CONFIG_PRINTK_TIME
+ const char *p = memchr(buf, ']', strlen(buf));
+
+ if (p && p[1] == ' ')
+ return p + 2;
+#endif
+ return buf;
+}
+
#endif /* _ASM_S390_BOOT_DATA_H */
diff --git a/arch/s390/include/asm/cpacf.h b/arch/s390/include/asm/cpacf.h
index 59ab1192e2d5..4bc5317fbb12 100644
--- a/arch/s390/include/asm/cpacf.h
+++ b/arch/s390/include/asm/cpacf.h
@@ -129,6 +129,10 @@
#define CPACF_KMAC_HMAC_SHA_256 0x71
#define CPACF_KMAC_HMAC_SHA_384 0x72
#define CPACF_KMAC_HMAC_SHA_512 0x73
+#define CPACF_KMAC_PHMAC_SHA_224 0x78
+#define CPACF_KMAC_PHMAC_SHA_256 0x79
+#define CPACF_KMAC_PHMAC_SHA_384 0x7a
+#define CPACF_KMAC_PHMAC_SHA_512 0x7b
/*
* Function codes for the PCKMO (PERFORM CRYPTOGRAPHIC KEY MANAGEMENT)
@@ -649,18 +653,30 @@ static inline void cpacf_trng(u8 *ucbuf, unsigned long ucbuf_len,
* instruction
* @func: the function code passed to PCC; see CPACF_KM_xxx defines
* @param: address of parameter block; see POP for details on each func
+ *
+ * Returns the condition code, this is
+ * 0 - cc code 0 (normal completion)
+ * 1 - cc code 1 (protected key wkvp mismatch or src operand out of range)
+ * 2 - cc code 2 (something invalid, scalar multiply infinity, ...)
+ * Condition code 3 (partial completion) is handled within the asm code
+ * and never returned.
*/
-static inline void cpacf_pcc(unsigned long func, void *param)
+static inline int cpacf_pcc(unsigned long func, void *param)
{
+ int cc;
+
asm volatile(
" lgr 0,%[fc]\n"
" lgr 1,%[pba]\n"
"0: .insn rre,%[opc] << 16,0,0\n" /* PCC opcode */
" brc 1,0b\n" /* handle partial completion */
- :
+ CC_IPM(cc)
+ : CC_OUT(cc, cc)
: [fc] "d" (func), [pba] "d" ((unsigned long)param),
[opc] "i" (CPACF_PCC)
- : "cc", "memory", "0", "1");
+ : CC_CLOBBER_LIST("memory", "0", "1"));
+
+ return CC_TRANSFORM(cc);
}
/**
diff --git a/arch/s390/include/asm/cpu.h b/arch/s390/include/asm/cpu.h
index 26c710cd3485..5672e3fab52b 100644
--- a/arch/s390/include/asm/cpu.h
+++ b/arch/s390/include/asm/cpu.h
@@ -9,7 +9,7 @@
#ifndef _ASM_S390_CPU_H
#define _ASM_S390_CPU_H
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <linux/types.h>
#include <linux/jump_label.h>
@@ -24,5 +24,5 @@ struct cpuid
DECLARE_STATIC_KEY_FALSE(cpu_has_bear);
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif /* _ASM_S390_CPU_H */
diff --git a/arch/s390/include/asm/cpu_mf-insn.h b/arch/s390/include/asm/cpu_mf-insn.h
index a68b362e0964..941663939cc7 100644
--- a/arch/s390/include/asm/cpu_mf-insn.h
+++ b/arch/s390/include/asm/cpu_mf-insn.h
@@ -8,7 +8,7 @@
#ifndef _ASM_S390_CPU_MF_INSN_H
#define _ASM_S390_CPU_MF_INSN_H
-#ifdef __ASSEMBLY__
+#ifdef __ASSEMBLER__
/* Macro to generate the STCCTM instruction with a customized
* M3 field designating the counter set.
@@ -17,6 +17,6 @@
.insn rsy,0xeb0000000017,\r1,\m3 & 0xf,\db2
.endm
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif
diff --git a/arch/s390/include/asm/cpu_mf.h b/arch/s390/include/asm/cpu_mf.h
index e1a279e0d6a6..1798fbd59068 100644
--- a/arch/s390/include/asm/cpu_mf.h
+++ b/arch/s390/include/asm/cpu_mf.h
@@ -171,7 +171,7 @@ static inline int qctri(struct cpumf_ctr_info *info)
{
int rc = -EINVAL;
- asm volatile (
+ asm_inline volatile (
"0: qctri %1\n"
"1: lhi %0,0\n"
"2:\n"
@@ -185,7 +185,7 @@ static inline int lcctl(u64 ctl)
{
int cc;
- asm volatile (
+ asm_inline volatile (
" lcctl %[ctl]\n"
CC_IPM(cc)
: CC_OUT(cc, cc)
@@ -200,7 +200,7 @@ static inline int __ecctr(u64 ctr, u64 *content)
u64 _content;
int cc;
- asm volatile (
+ asm_inline volatile (
" ecctr %[_content],%[ctr]\n"
CC_IPM(cc)
: CC_OUT(cc, cc), [_content] "=d" (_content)
diff --git a/arch/s390/include/asm/cpufeature.h b/arch/s390/include/asm/cpufeature.h
index 931204613753..6c6a99660e78 100644
--- a/arch/s390/include/asm/cpufeature.h
+++ b/arch/s390/include/asm/cpufeature.h
@@ -9,10 +9,13 @@
#ifndef __ASM_S390_CPUFEATURE_H
#define __ASM_S390_CPUFEATURE_H
+#include <asm/facility.h>
+
enum {
S390_CPU_FEATURE_MSA,
S390_CPU_FEATURE_VXRS,
S390_CPU_FEATURE_UV,
+ S390_CPU_FEATURE_D288,
MAX_CPU_FEATURES
};
@@ -20,4 +23,16 @@ enum {
int cpu_have_feature(unsigned int nr);
+#define cpu_has_bear() test_facility(193)
+#define cpu_has_edat1() test_facility(8)
+#define cpu_has_edat2() test_facility(78)
+#define cpu_has_gs() test_facility(133)
+#define cpu_has_idte() test_facility(3)
+#define cpu_has_nx() test_facility(130)
+#define cpu_has_rdp() test_facility(194)
+#define cpu_has_seq_insn() test_facility(85)
+#define cpu_has_tlb_lc() test_facility(51)
+#define cpu_has_topology() test_facility(11)
+#define cpu_has_vx() test_facility(129)
+
#endif /* __ASM_S390_CPUFEATURE_H */
diff --git a/arch/s390/include/asm/ctlreg.h b/arch/s390/include/asm/ctlreg.h
index e6527f51ad0b..e93cc240a1ed 100644
--- a/arch/s390/include/asm/ctlreg.h
+++ b/arch/s390/include/asm/ctlreg.h
@@ -80,7 +80,7 @@
#define CR14_EXTERNAL_DAMAGE_SUBMASK BIT(CR14_EXTERNAL_DAMAGE_SUBMASK_BIT)
#define CR14_WARNING_SUBMASK BIT(CR14_WARNING_SUBMASK_BIT)
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <linux/bug.h>
@@ -252,5 +252,5 @@ union ctlreg15 {
};
};
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif /* __ASM_S390_CTLREG_H */
diff --git a/arch/s390/include/asm/current.h b/arch/s390/include/asm/current.h
index d03a922c641e..f9529f7cf62c 100644
--- a/arch/s390/include/asm/current.h
+++ b/arch/s390/include/asm/current.h
@@ -11,9 +11,25 @@
#define _S390_CURRENT_H
#include <asm/lowcore.h>
+#include <asm/machine.h>
struct task_struct;
-#define current ((struct task_struct *const)get_lowcore()->current_task)
+static __always_inline struct task_struct *get_current(void)
+{
+ unsigned long ptr, lc_current;
+
+ lc_current = offsetof(struct lowcore, current_task);
+ asm_inline(
+ ALTERNATIVE(" lg %[ptr],%[offzero](%%r0)\n",
+ " lg %[ptr],%[offalt](%%r0)\n",
+ ALT_FEATURE(MFEATURE_LOWCORE))
+ : [ptr] "=d" (ptr)
+ : [offzero] "i" (lc_current),
+ [offalt] "i" (lc_current + LOWCORE_ALT_ADDRESS));
+ return (struct task_struct *)ptr;
+}
+
+#define current get_current()
#endif /* !(_S390_CURRENT_H) */
diff --git a/arch/s390/include/asm/diag.h b/arch/s390/include/asm/diag.h
index 5790630e31f0..8db8db3b1018 100644
--- a/arch/s390/include/asm/diag.h
+++ b/arch/s390/include/asm/diag.h
@@ -66,7 +66,7 @@ static inline void diag10_range(unsigned long start_pfn, unsigned long num_pfn)
end_addr = pfn_to_phys(start_pfn + num_pfn - 1);
diag_stat_inc(DIAG_STAT_X010);
- asm volatile(
+ asm_inline volatile(
"0: diag %0,%1,0x10\n"
"1: nopr %%r7\n"
EX_TABLE(0b, 1b)
diff --git a/arch/s390/include/asm/diag288.h b/arch/s390/include/asm/diag288.h
new file mode 100644
index 000000000000..5e1b43cea9d6
--- /dev/null
+++ b/arch/s390/include/asm/diag288.h
@@ -0,0 +1,41 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef _ASM_S390_DIAG288_H
+#define _ASM_S390_DIAG288_H
+
+#include <asm/asm-extable.h>
+#include <asm/types.h>
+
+#define MIN_INTERVAL 15 /* Minimal time supported by diag288 */
+#define MAX_INTERVAL 3600 /* One hour should be enough - pure estimation */
+
+#define WDT_DEFAULT_TIMEOUT 30
+
+/* Function codes - init, change, cancel */
+#define WDT_FUNC_INIT 0
+#define WDT_FUNC_CHANGE 1
+#define WDT_FUNC_CANCEL 2
+#define WDT_FUNC_CONCEAL 0x80000000
+
+/* Action codes for LPAR watchdog */
+#define LPARWDT_RESTART 0
+
+static inline int __diag288(unsigned int func, unsigned int timeout,
+ unsigned long action, unsigned int len)
+{
+ union register_pair r1 = { .even = func, .odd = timeout, };
+ union register_pair r3 = { .even = action, .odd = len, };
+ int rc = -EINVAL;
+
+ asm volatile(
+ " diag %[r1],%[r3],0x288\n"
+ "0: lhi %[rc],0\n"
+ "1:"
+ EX_TABLE(0b, 1b)
+ : [rc] "+d" (rc)
+ : [r1] "d" (r1.pair), [r3] "d" (r3.pair)
+ : "cc", "memory");
+ return rc;
+}
+
+#endif /* _ASM_S390_DIAG288_H */
diff --git a/arch/s390/include/asm/dwarf.h b/arch/s390/include/asm/dwarf.h
index 390906b8e386..e3ad6798d0cd 100644
--- a/arch/s390/include/asm/dwarf.h
+++ b/arch/s390/include/asm/dwarf.h
@@ -2,7 +2,7 @@
#ifndef _ASM_S390_DWARF_H
#define _ASM_S390_DWARF_H
-#ifdef __ASSEMBLY__
+#ifdef __ASSEMBLER__
#define CFI_STARTPROC .cfi_startproc
#define CFI_ENDPROC .cfi_endproc
@@ -33,6 +33,6 @@
.cfi_sections .eh_frame, .debug_frame
#endif
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif /* _ASM_S390_DWARF_H */
diff --git a/arch/s390/include/asm/elf.h b/arch/s390/include/asm/elf.h
index 8f2c23cc52b6..a03df312081e 100644
--- a/arch/s390/include/asm/elf.h
+++ b/arch/s390/include/asm/elf.h
@@ -158,9 +158,6 @@ enum {
#define ELF_DATA ELFDATA2MSB
#define ELF_ARCH EM_S390
-/* s390 specific phdr types */
-#define PT_S390_PGSTE 0x70000000
-
/*
* ELF register definitions..
*/
@@ -191,35 +188,6 @@ typedef s390_compat_regs compat_elf_gregset_t;
&& (x)->e_ident[EI_CLASS] == ELF_CLASS)
#define compat_start_thread start_thread31
-struct arch_elf_state {
- int rc;
-};
-
-#define INIT_ARCH_ELF_STATE { .rc = 0 }
-
-#define arch_check_elf(ehdr, interp, interp_ehdr, state) (0)
-#ifdef CONFIG_PGSTE
-#define arch_elf_pt_proc(ehdr, phdr, elf, interp, state) \
-({ \
- struct arch_elf_state *_state = state; \
- if ((phdr)->p_type == PT_S390_PGSTE && \
- !page_table_allocate_pgste && \
- !test_thread_flag(TIF_PGSTE) && \
- !current->mm->context.alloc_pgste) { \
- set_thread_flag(TIF_PGSTE); \
- set_pt_regs_flag(task_pt_regs(current), \
- PIF_EXECVE_PGSTE_RESTART); \
- _state->rc = -EAGAIN; \
- } \
- _state->rc; \
-})
-#else
-#define arch_elf_pt_proc(ehdr, phdr, elf, interp, state) \
-({ \
- (state)->rc; \
-})
-#endif
-
/* For SVR4/S390 the function pointer to be registered with `atexit` is
passed in R14. */
#define ELF_PLAT_INIT(_r, load_addr) \
diff --git a/arch/s390/include/asm/entry-common.h b/arch/s390/include/asm/entry-common.h
index 35555c944630..979af986a8fe 100644
--- a/arch/s390/include/asm/entry-common.h
+++ b/arch/s390/include/asm/entry-common.h
@@ -59,4 +59,14 @@ static inline void arch_exit_to_user_mode_prepare(struct pt_regs *regs,
#define arch_exit_to_user_mode_prepare arch_exit_to_user_mode_prepare
+static __always_inline bool arch_in_rcu_eqs(void)
+{
+ if (IS_ENABLED(CONFIG_KVM))
+ return current->flags & PF_VCPU;
+
+ return false;
+}
+
+#define arch_in_rcu_eqs arch_in_rcu_eqs
+
#endif
diff --git a/arch/s390/include/asm/extmem.h b/arch/s390/include/asm/extmem.h
index e0a06060afdd..225ee89c3f5e 100644
--- a/arch/s390/include/asm/extmem.h
+++ b/arch/s390/include/asm/extmem.h
@@ -6,7 +6,7 @@
#ifndef _ASM_S390X_DCSS_H
#define _ASM_S390X_DCSS_H
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
/*
* DCSS segment is defined as a contiguous range of pages using DEFSEG command.
diff --git a/arch/s390/include/asm/fpu-insn-asm.h b/arch/s390/include/asm/fpu-insn-asm.h
index d296322be4bc..cc0468fdf2d0 100644
--- a/arch/s390/include/asm/fpu-insn-asm.h
+++ b/arch/s390/include/asm/fpu-insn-asm.h
@@ -16,7 +16,7 @@
#error only <asm/fpu-insn.h> can be included directly
#endif
-#ifdef __ASSEMBLY__
+#ifdef __ASSEMBLER__
/* Macros to generate vector instruction byte code */
@@ -750,5 +750,5 @@
MRXBOPC 0, 0x77, v1, v2, v3
.endm
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif /* __ASM_S390_FPU_INSN_ASM_H */
diff --git a/arch/s390/include/asm/fpu-insn.h b/arch/s390/include/asm/fpu-insn.h
index de510c9f6efa..135bb89c0a89 100644
--- a/arch/s390/include/asm/fpu-insn.h
+++ b/arch/s390/include/asm/fpu-insn.h
@@ -9,7 +9,7 @@
#include <asm/fpu-insn-asm.h>
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <linux/instrumented.h>
#include <asm/asm-extable.h>
@@ -100,19 +100,12 @@ static __always_inline void fpu_lfpc(unsigned int *fpc)
*/
static inline void fpu_lfpc_safe(unsigned int *fpc)
{
- u32 tmp;
-
instrument_read(fpc, sizeof(*fpc));
asm_inline volatile(
- "0: lfpc %[fpc]\n"
- "1: nopr %%r7\n"
- ".pushsection .fixup, \"ax\"\n"
- "2: lghi %[tmp],0\n"
- " sfpc %[tmp]\n"
- " jg 1b\n"
- ".popsection\n"
- EX_TABLE(1b, 2b)
- : [tmp] "=d" (tmp)
+ " lfpc %[fpc]\n"
+ "0: nopr %%r7\n"
+ EX_TABLE_FPC(0b, 0b)
+ :
: [fpc] "Q" (*fpc)
: "memory");
}
@@ -183,7 +176,19 @@ static __always_inline void fpu_vgfmg(u8 v1, u8 v2, u8 v3)
: "memory");
}
-#ifdef CONFIG_CC_IS_CLANG
+#ifdef CONFIG_CC_HAS_ASM_AOR_FORMAT_FLAGS
+
+static __always_inline void fpu_vl(u8 v1, const void *vxr)
+{
+ instrument_read(vxr, sizeof(__vector128));
+ asm volatile("VL %[v1],%O[vxr],,%R[vxr]\n"
+ :
+ : [vxr] "Q" (*(__vector128 *)vxr),
+ [v1] "I" (v1)
+ : "memory");
+}
+
+#else /* CONFIG_CC_HAS_ASM_AOR_FORMAT_FLAGS */
static __always_inline void fpu_vl(u8 v1, const void *vxr)
{
@@ -197,19 +202,7 @@ static __always_inline void fpu_vl(u8 v1, const void *vxr)
: "memory", "1");
}
-#else /* CONFIG_CC_IS_CLANG */
-
-static __always_inline void fpu_vl(u8 v1, const void *vxr)
-{
- instrument_read(vxr, sizeof(__vector128));
- asm volatile("VL %[v1],%O[vxr],,%R[vxr]\n"
- :
- : [vxr] "Q" (*(__vector128 *)vxr),
- [v1] "I" (v1)
- : "memory");
-}
-
-#endif /* CONFIG_CC_IS_CLANG */
+#endif /* CONFIG_CC_HAS_ASM_AOR_FORMAT_FLAGS */
static __always_inline void fpu_vleib(u8 v, s16 val, u8 index)
{
@@ -238,7 +231,7 @@ static __always_inline u64 fpu_vlgvf(u8 v, u16 index)
return val;
}
-#ifdef CONFIG_CC_IS_CLANG
+#ifdef CONFIG_CC_HAS_ASM_AOR_FORMAT_FLAGS
static __always_inline void fpu_vll(u8 v1, u32 index, const void *vxr)
{
@@ -246,17 +239,15 @@ static __always_inline void fpu_vll(u8 v1, u32 index, const void *vxr)
size = min(index + 1, sizeof(__vector128));
instrument_read(vxr, size);
- asm volatile(
- " la 1,%[vxr]\n"
- " VLL %[v1],%[index],0,1\n"
- :
- : [vxr] "R" (*(u8 *)vxr),
- [index] "d" (index),
- [v1] "I" (v1)
- : "memory", "1");
+ asm volatile("VLL %[v1],%[index],%O[vxr],%R[vxr]\n"
+ :
+ : [vxr] "Q" (*(u8 *)vxr),
+ [index] "d" (index),
+ [v1] "I" (v1)
+ : "memory");
}
-#else /* CONFIG_CC_IS_CLANG */
+#else /* CONFIG_CC_HAS_ASM_AOR_FORMAT_FLAGS */
static __always_inline void fpu_vll(u8 v1, u32 index, const void *vxr)
{
@@ -264,17 +255,19 @@ static __always_inline void fpu_vll(u8 v1, u32 index, const void *vxr)
size = min(index + 1, sizeof(__vector128));
instrument_read(vxr, size);
- asm volatile("VLL %[v1],%[index],%O[vxr],%R[vxr]\n"
- :
- : [vxr] "Q" (*(u8 *)vxr),
- [index] "d" (index),
- [v1] "I" (v1)
- : "memory");
+ asm volatile(
+ " la 1,%[vxr]\n"
+ " VLL %[v1],%[index],0,1\n"
+ :
+ : [vxr] "R" (*(u8 *)vxr),
+ [index] "d" (index),
+ [v1] "I" (v1)
+ : "memory", "1");
}
-#endif /* CONFIG_CC_IS_CLANG */
+#endif /* CONFIG_CC_HAS_ASM_AOR_FORMAT_FLAGS */
-#ifdef CONFIG_CC_IS_CLANG
+#ifdef CONFIG_CC_HAS_ASM_AOR_FORMAT_FLAGS
#define fpu_vlm(_v1, _v3, _vxrs) \
({ \
@@ -284,17 +277,15 @@ static __always_inline void fpu_vll(u8 v1, u32 index, const void *vxr)
} *_v = (void *)(_vxrs); \
\
instrument_read(_v, size); \
- asm volatile( \
- " la 1,%[vxrs]\n" \
- " VLM %[v1],%[v3],0,1\n" \
- : \
- : [vxrs] "R" (*_v), \
- [v1] "I" (_v1), [v3] "I" (_v3) \
- : "memory", "1"); \
+ asm volatile("VLM %[v1],%[v3],%O[vxrs],%R[vxrs]\n" \
+ : \
+ : [vxrs] "Q" (*_v), \
+ [v1] "I" (_v1), [v3] "I" (_v3) \
+ : "memory"); \
(_v3) - (_v1) + 1; \
})
-#else /* CONFIG_CC_IS_CLANG */
+#else /* CONFIG_CC_HAS_ASM_AOR_FORMAT_FLAGS */
#define fpu_vlm(_v1, _v3, _vxrs) \
({ \
@@ -304,15 +295,17 @@ static __always_inline void fpu_vll(u8 v1, u32 index, const void *vxr)
} *_v = (void *)(_vxrs); \
\
instrument_read(_v, size); \
- asm volatile("VLM %[v1],%[v3],%O[vxrs],%R[vxrs]\n" \
- : \
- : [vxrs] "Q" (*_v), \
- [v1] "I" (_v1), [v3] "I" (_v3) \
- : "memory"); \
+ asm volatile( \
+ " la 1,%[vxrs]\n" \
+ " VLM %[v1],%[v3],0,1\n" \
+ : \
+ : [vxrs] "R" (*_v), \
+ [v1] "I" (_v1), [v3] "I" (_v3) \
+ : "memory", "1"); \
(_v3) - (_v1) + 1; \
})
-#endif /* CONFIG_CC_IS_CLANG */
+#endif /* CONFIG_CC_HAS_ASM_AOR_FORMAT_FLAGS */
static __always_inline void fpu_vlr(u8 v1, u8 v2)
{
@@ -362,7 +355,18 @@ static __always_inline void fpu_vsrlb(u8 v1, u8 v2, u8 v3)
: "memory");
}
-#ifdef CONFIG_CC_IS_CLANG
+#ifdef CONFIG_CC_HAS_ASM_AOR_FORMAT_FLAGS
+
+static __always_inline void fpu_vst(u8 v1, const void *vxr)
+{
+ instrument_write(vxr, sizeof(__vector128));
+ asm volatile("VST %[v1],%O[vxr],,%R[vxr]\n"
+ : [vxr] "=Q" (*(__vector128 *)vxr)
+ : [v1] "I" (v1)
+ : "memory");
+}
+
+#else /* CONFIG_CC_HAS_ASM_AOR_FORMAT_FLAGS */
static __always_inline void fpu_vst(u8 v1, const void *vxr)
{
@@ -375,20 +379,23 @@ static __always_inline void fpu_vst(u8 v1, const void *vxr)
: "memory", "1");
}
-#else /* CONFIG_CC_IS_CLANG */
+#endif /* CONFIG_CC_HAS_ASM_AOR_FORMAT_FLAGS */
-static __always_inline void fpu_vst(u8 v1, const void *vxr)
+#ifdef CONFIG_CC_HAS_ASM_AOR_FORMAT_FLAGS
+
+static __always_inline void fpu_vstl(u8 v1, u32 index, const void *vxr)
{
- instrument_write(vxr, sizeof(__vector128));
- asm volatile("VST %[v1],%O[vxr],,%R[vxr]\n"
- : [vxr] "=Q" (*(__vector128 *)vxr)
- : [v1] "I" (v1)
+ unsigned int size;
+
+ size = min(index + 1, sizeof(__vector128));
+ instrument_write(vxr, size);
+ asm volatile("VSTL %[v1],%[index],%O[vxr],%R[vxr]\n"
+ : [vxr] "=Q" (*(u8 *)vxr)
+ : [index] "d" (index), [v1] "I" (v1)
: "memory");
}
-#endif /* CONFIG_CC_IS_CLANG */
-
-#ifdef CONFIG_CC_IS_CLANG
+#else /* CONFIG_CC_HAS_ASM_AOR_FORMAT_FLAGS */
static __always_inline void fpu_vstl(u8 v1, u32 index, const void *vxr)
{
@@ -404,23 +411,9 @@ static __always_inline void fpu_vstl(u8 v1, u32 index, const void *vxr)
: "memory", "1");
}
-#else /* CONFIG_CC_IS_CLANG */
+#endif /* CONFIG_CC_HAS_ASM_AOR_FORMAT_FLAGS */
-static __always_inline void fpu_vstl(u8 v1, u32 index, const void *vxr)
-{
- unsigned int size;
-
- size = min(index + 1, sizeof(__vector128));
- instrument_write(vxr, size);
- asm volatile("VSTL %[v1],%[index],%O[vxr],%R[vxr]\n"
- : [vxr] "=Q" (*(u8 *)vxr)
- : [index] "d" (index), [v1] "I" (v1)
- : "memory");
-}
-
-#endif /* CONFIG_CC_IS_CLANG */
-
-#ifdef CONFIG_CC_IS_CLANG
+#ifdef CONFIG_CC_HAS_ASM_AOR_FORMAT_FLAGS
#define fpu_vstm(_v1, _v3, _vxrs) \
({ \
@@ -430,16 +423,14 @@ static __always_inline void fpu_vstl(u8 v1, u32 index, const void *vxr)
} *_v = (void *)(_vxrs); \
\
instrument_write(_v, size); \
- asm volatile( \
- " la 1,%[vxrs]\n" \
- " VSTM %[v1],%[v3],0,1\n" \
- : [vxrs] "=R" (*_v) \
- : [v1] "I" (_v1), [v3] "I" (_v3) \
- : "memory", "1"); \
+ asm volatile("VSTM %[v1],%[v3],%O[vxrs],%R[vxrs]\n" \
+ : [vxrs] "=Q" (*_v) \
+ : [v1] "I" (_v1), [v3] "I" (_v3) \
+ : "memory"); \
(_v3) - (_v1) + 1; \
})
-#else /* CONFIG_CC_IS_CLANG */
+#else /* CONFIG_CC_HAS_ASM_AOR_FORMAT_FLAGS */
#define fpu_vstm(_v1, _v3, _vxrs) \
({ \
@@ -449,14 +440,16 @@ static __always_inline void fpu_vstl(u8 v1, u32 index, const void *vxr)
} *_v = (void *)(_vxrs); \
\
instrument_write(_v, size); \
- asm volatile("VSTM %[v1],%[v3],%O[vxrs],%R[vxrs]\n" \
- : [vxrs] "=Q" (*_v) \
- : [v1] "I" (_v1), [v3] "I" (_v3) \
- : "memory"); \
+ asm volatile( \
+ " la 1,%[vxrs]\n" \
+ " VSTM %[v1],%[v3],0,1\n" \
+ : [vxrs] "=R" (*_v) \
+ : [v1] "I" (_v1), [v3] "I" (_v3) \
+ : "memory", "1"); \
(_v3) - (_v1) + 1; \
})
-#endif /* CONFIG_CC_IS_CLANG */
+#endif /* CONFIG_CC_HAS_ASM_AOR_FORMAT_FLAGS */
static __always_inline void fpu_vupllf(u8 v1, u8 v2)
{
@@ -482,5 +475,5 @@ static __always_inline void fpu_vzero(u8 v)
: "memory");
}
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif /* __ASM_S390_FPU_INSN_H */
diff --git a/arch/s390/include/asm/fpu.h b/arch/s390/include/asm/fpu.h
index c84cb33913e2..960c6c67ad6c 100644
--- a/arch/s390/include/asm/fpu.h
+++ b/arch/s390/include/asm/fpu.h
@@ -44,6 +44,7 @@
#ifndef _ASM_S390_FPU_H
#define _ASM_S390_FPU_H
+#include <linux/cpufeature.h>
#include <linux/processor.h>
#include <linux/preempt.h>
#include <linux/string.h>
@@ -51,12 +52,6 @@
#include <asm/sigcontext.h>
#include <asm/fpu-types.h>
#include <asm/fpu-insn.h>
-#include <asm/facility.h>
-
-static inline bool cpu_has_vx(void)
-{
- return likely(test_facility(129));
-}
enum {
KERNEL_FPC_BIT = 0,
diff --git a/arch/s390/include/asm/ftrace.h b/arch/s390/include/asm/ftrace.h
index a3b73a4f626e..bee2d16c2951 100644
--- a/arch/s390/include/asm/ftrace.h
+++ b/arch/s390/include/asm/ftrace.h
@@ -5,7 +5,7 @@
#define ARCH_SUPPORTS_FTRACE_OPS 1
#define MCOUNT_INSN_SIZE 6
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <asm/stacktrace.h>
static __always_inline unsigned long return_address(unsigned int n)
@@ -51,6 +51,7 @@ static inline unsigned long ftrace_call_adjust(unsigned long addr)
{
return addr;
}
+#define ftrace_get_symaddr(fentry_ip) ((unsigned long)(fentry_ip))
#include <linux/ftrace_regs.h>
@@ -133,7 +134,7 @@ void ftrace_graph_func(unsigned long ip, unsigned long parent_ip,
struct ftrace_ops *op, struct ftrace_regs *fregs);
#define ftrace_graph_func ftrace_graph_func
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#ifdef CONFIG_FUNCTION_TRACER
diff --git a/arch/s390/include/asm/futex.h b/arch/s390/include/asm/futex.h
index 752a2310f0d6..942f21c39697 100644
--- a/arch/s390/include/asm/futex.h
+++ b/arch/s390/include/asm/futex.h
@@ -2,80 +2,101 @@
#ifndef _ASM_S390_FUTEX_H
#define _ASM_S390_FUTEX_H
+#include <linux/instrumented.h>
#include <linux/uaccess.h>
#include <linux/futex.h>
#include <asm/asm-extable.h>
#include <asm/mmu_context.h>
#include <asm/errno.h>
-#define __futex_atomic_op(insn, ret, oldval, newval, uaddr, oparg) \
- asm volatile( \
- " sacf 256\n" \
- "0: l %1,0(%6)\n" \
- "1:"insn \
- "2: cs %1,%2,0(%6)\n" \
- "3: jl 1b\n" \
- " lhi %0,0\n" \
- "4: sacf 768\n" \
- EX_TABLE(0b,4b) EX_TABLE(1b,4b) \
- EX_TABLE(2b,4b) EX_TABLE(3b,4b) \
- : "=d" (ret), "=&d" (oldval), "=&d" (newval), \
- "=m" (*uaddr) \
- : "0" (-EFAULT), "d" (oparg), "a" (uaddr), \
- "m" (*uaddr) : "cc");
+#define FUTEX_OP_FUNC(name, insn) \
+static uaccess_kmsan_or_inline int \
+__futex_atomic_##name(int oparg, int *old, u32 __user *uaddr) \
+{ \
+ bool sacf_flag; \
+ int rc, new; \
+ \
+ instrument_copy_from_user_before(old, uaddr, sizeof(*old)); \
+ sacf_flag = enable_sacf_uaccess(); \
+ asm_inline volatile( \
+ " sacf 256\n" \
+ "0: l %[old],%[uaddr]\n" \
+ "1:"insn \
+ "2: cs %[old],%[new],%[uaddr]\n" \
+ "3: jl 1b\n" \
+ " lhi %[rc],0\n" \
+ "4: sacf 768\n" \
+ EX_TABLE_UA_FAULT(0b, 4b, %[rc]) \
+ EX_TABLE_UA_FAULT(1b, 4b, %[rc]) \
+ EX_TABLE_UA_FAULT(2b, 4b, %[rc]) \
+ EX_TABLE_UA_FAULT(3b, 4b, %[rc]) \
+ : [rc] "=d" (rc), [old] "=&d" (*old), \
+ [new] "=&d" (new), [uaddr] "+Q" (*uaddr) \
+ : [oparg] "d" (oparg) \
+ : "cc"); \
+ disable_sacf_uaccess(sacf_flag); \
+ if (!rc) \
+ instrument_copy_from_user_after(old, uaddr, sizeof(*old), 0); \
+ return rc; \
+}
+
+FUTEX_OP_FUNC(set, "lr %[new],%[oparg]\n")
+FUTEX_OP_FUNC(add, "lr %[new],%[old]\n ar %[new],%[oparg]\n")
+FUTEX_OP_FUNC(or, "lr %[new],%[old]\n or %[new],%[oparg]\n")
+FUTEX_OP_FUNC(and, "lr %[new],%[old]\n nr %[new],%[oparg]\n")
+FUTEX_OP_FUNC(xor, "lr %[new],%[old]\n xr %[new],%[oparg]\n")
-static inline int arch_futex_atomic_op_inuser(int op, int oparg, int *oval,
- u32 __user *uaddr)
+static inline
+int arch_futex_atomic_op_inuser(int op, int oparg, int *oval, u32 __user *uaddr)
{
- int oldval = 0, newval, ret;
+ int old, rc;
switch (op) {
case FUTEX_OP_SET:
- __futex_atomic_op("lr %2,%5\n",
- ret, oldval, newval, uaddr, oparg);
+ rc = __futex_atomic_set(oparg, &old, uaddr);
break;
case FUTEX_OP_ADD:
- __futex_atomic_op("lr %2,%1\nar %2,%5\n",
- ret, oldval, newval, uaddr, oparg);
+ rc = __futex_atomic_add(oparg, &old, uaddr);
break;
case FUTEX_OP_OR:
- __futex_atomic_op("lr %2,%1\nor %2,%5\n",
- ret, oldval, newval, uaddr, oparg);
+ rc = __futex_atomic_or(oparg, &old, uaddr);
break;
case FUTEX_OP_ANDN:
- __futex_atomic_op("lr %2,%1\nnr %2,%5\n",
- ret, oldval, newval, uaddr, ~oparg);
+ rc = __futex_atomic_and(~oparg, &old, uaddr);
break;
case FUTEX_OP_XOR:
- __futex_atomic_op("lr %2,%1\nxr %2,%5\n",
- ret, oldval, newval, uaddr, oparg);
+ rc = __futex_atomic_xor(oparg, &old, uaddr);
break;
default:
- ret = -ENOSYS;
+ rc = -ENOSYS;
}
-
- if (!ret)
- *oval = oldval;
-
- return ret;
+ if (!rc)
+ *oval = old;
+ return rc;
}
-static inline int futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
- u32 oldval, u32 newval)
+static uaccess_kmsan_or_inline
+int futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, u32 oldval, u32 newval)
{
- int ret;
+ bool sacf_flag;
+ int rc;
- asm volatile(
- " sacf 256\n"
- "0: cs %1,%4,0(%5)\n"
- "1: la %0,0\n"
- "2: sacf 768\n"
- EX_TABLE(0b,2b) EX_TABLE(1b,2b)
- : "=d" (ret), "+d" (oldval), "=m" (*uaddr)
- : "0" (-EFAULT), "d" (newval), "a" (uaddr), "m" (*uaddr)
+ instrument_copy_from_user_before(uval, uaddr, sizeof(*uval));
+ sacf_flag = enable_sacf_uaccess();
+ asm_inline volatile(
+ " sacf 256\n"
+ "0: cs %[old],%[new],%[uaddr]\n"
+ "1: lhi %[rc],0\n"
+ "2: sacf 768\n"
+ EX_TABLE_UA_FAULT(0b, 2b, %[rc])
+ EX_TABLE_UA_FAULT(1b, 2b, %[rc])
+ : [rc] "=d" (rc), [old] "+d" (oldval), [uaddr] "+Q" (*uaddr)
+ : [new] "d" (newval)
: "cc", "memory");
+ disable_sacf_uaccess(sacf_flag);
*uval = oldval;
- return ret;
+ instrument_copy_from_user_after(uval, uaddr, sizeof(*uval), 0);
+ return rc;
}
#endif /* _ASM_S390_FUTEX_H */
diff --git a/arch/s390/include/asm/gmap.h b/arch/s390/include/asm/gmap.h
index 13f51a6a5bb1..66c5808fd011 100644
--- a/arch/s390/include/asm/gmap.h
+++ b/arch/s390/include/asm/gmap.h
@@ -23,7 +23,6 @@
/**
* struct gmap_struct - guest address space
* @list: list head for the mm->context gmap list
- * @crst_list: list of all crst tables used in the guest address space
* @mm: pointer to the parent mm_struct
* @guest_to_host: radix tree with guest to host address translation
* @host_to_guest: radix tree with pointer to segment table entries
@@ -35,7 +34,6 @@
* @guest_handle: protected virtual machine handle for the ultravisor
* @host_to_rmap: radix tree with gmap_rmap lists
* @children: list of shadow gmap structures
- * @pt_list: list of all page tables used in the shadow guest address space
* @shadow_lock: spinlock to protect the shadow gmap list
* @parent: pointer to the parent gmap for shadow guest address spaces
* @orig_asce: ASCE for which the shadow page table has been created
@@ -45,7 +43,6 @@
*/
struct gmap {
struct list_head list;
- struct list_head crst_list;
struct mm_struct *mm;
struct radix_tree_root guest_to_host;
struct radix_tree_root host_to_guest;
@@ -61,7 +58,6 @@ struct gmap {
/* Additional data for shadow guest address spaces */
struct radix_tree_root host_to_rmap;
struct list_head children;
- struct list_head pt_list;
spinlock_t shadow_lock;
struct gmap *parent;
unsigned long orig_asce;
@@ -106,23 +102,20 @@ struct gmap *gmap_create(struct mm_struct *mm, unsigned long limit);
void gmap_remove(struct gmap *gmap);
struct gmap *gmap_get(struct gmap *gmap);
void gmap_put(struct gmap *gmap);
+void gmap_free(struct gmap *gmap);
+struct gmap *gmap_alloc(unsigned long limit);
int gmap_map_segment(struct gmap *gmap, unsigned long from,
unsigned long to, unsigned long len);
int gmap_unmap_segment(struct gmap *gmap, unsigned long to, unsigned long len);
unsigned long __gmap_translate(struct gmap *, unsigned long gaddr);
-unsigned long gmap_translate(struct gmap *, unsigned long gaddr);
int __gmap_link(struct gmap *gmap, unsigned long gaddr, unsigned long vmaddr);
-int gmap_fault(struct gmap *, unsigned long gaddr, unsigned int fault_flags);
-void gmap_discard(struct gmap *, unsigned long from, unsigned long to);
void __gmap_zap(struct gmap *, unsigned long gaddr);
void gmap_unlink(struct mm_struct *, unsigned long *table, unsigned long vmaddr);
int gmap_read_table(struct gmap *gmap, unsigned long gaddr, unsigned long *val);
-struct gmap *gmap_shadow(struct gmap *parent, unsigned long asce,
- int edat_level);
-int gmap_shadow_valid(struct gmap *sg, unsigned long asce, int edat_level);
+void gmap_unshadow(struct gmap *sg);
int gmap_shadow_r2t(struct gmap *sg, unsigned long saddr, unsigned long r2t,
int fake);
int gmap_shadow_r3t(struct gmap *sg, unsigned long saddr, unsigned long r3t,
@@ -131,24 +124,20 @@ int gmap_shadow_sgt(struct gmap *sg, unsigned long saddr, unsigned long sgt,
int fake);
int gmap_shadow_pgt(struct gmap *sg, unsigned long saddr, unsigned long pgt,
int fake);
-int gmap_shadow_pgt_lookup(struct gmap *sg, unsigned long saddr,
- unsigned long *pgt, int *dat_protection, int *fake);
int gmap_shadow_page(struct gmap *sg, unsigned long saddr, pte_t pte);
void gmap_register_pte_notifier(struct gmap_notifier *);
void gmap_unregister_pte_notifier(struct gmap_notifier *);
-int gmap_mprotect_notify(struct gmap *, unsigned long start,
- unsigned long len, int prot);
+int gmap_protect_one(struct gmap *gmap, unsigned long gaddr, int prot, unsigned long bits);
void gmap_sync_dirty_log_pmd(struct gmap *gmap, unsigned long dirty_bitmap[4],
unsigned long gaddr, unsigned long vmaddr);
-int s390_disable_cow_sharing(void);
-void s390_unlist_old_asce(struct gmap *gmap);
int s390_replace_asce(struct gmap *gmap);
void s390_uv_destroy_pfns(unsigned long count, unsigned long *pfns);
int __s390_uv_destroy_range(struct mm_struct *mm, unsigned long start,
unsigned long end, bool interruptible);
+unsigned long *gmap_table_walk(struct gmap *gmap, unsigned long gaddr, int level);
/**
* s390_uv_destroy_range - Destroy a range of pages in the given mm.
diff --git a/arch/s390/include/asm/gmap_helpers.h b/arch/s390/include/asm/gmap_helpers.h
new file mode 100644
index 000000000000..5356446a61c4
--- /dev/null
+++ b/arch/s390/include/asm/gmap_helpers.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Helper functions for KVM guest address space mapping code
+ *
+ * Copyright IBM Corp. 2025
+ */
+
+#ifndef _ASM_S390_GMAP_HELPERS_H
+#define _ASM_S390_GMAP_HELPERS_H
+
+void gmap_helper_zap_one_page(struct mm_struct *mm, unsigned long vmaddr);
+void gmap_helper_discard(struct mm_struct *mm, unsigned long vmaddr, unsigned long end);
+int gmap_helper_disable_cow_sharing(void);
+
+#endif /* _ASM_S390_GMAP_HELPERS_H */
diff --git a/arch/s390/include/asm/hugetlb.h b/arch/s390/include/asm/hugetlb.h
index 7c52acaf9f82..931fcc413598 100644
--- a/arch/s390/include/asm/hugetlb.h
+++ b/arch/s390/include/asm/hugetlb.h
@@ -9,12 +9,13 @@
#ifndef _ASM_S390_HUGETLB_H
#define _ASM_S390_HUGETLB_H
+#include <linux/cpufeature.h>
#include <linux/pgtable.h>
#include <linux/swap.h>
#include <linux/swapops.h>
#include <asm/page.h>
-#define hugepages_supported() (MACHINE_HAS_EDAT1)
+#define hugepages_supported() cpu_has_edat1()
#define __HAVE_ARCH_HUGE_SET_HUGE_PTE_AT
void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
@@ -25,8 +26,16 @@ void __set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
#define __HAVE_ARCH_HUGE_PTEP_GET
pte_t huge_ptep_get(struct mm_struct *mm, unsigned long addr, pte_t *ptep);
+pte_t __huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr,
+ pte_t *ptep);
+
#define __HAVE_ARCH_HUGE_PTEP_GET_AND_CLEAR
-pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep);
+static inline pte_t huge_ptep_get_and_clear(struct mm_struct *mm,
+ unsigned long addr, pte_t *ptep,
+ unsigned long sz)
+{
+ return __huge_ptep_get_and_clear(mm, addr, ptep);
+}
static inline void arch_clear_hugetlb_flags(struct folio *folio)
{
@@ -48,7 +57,7 @@ static inline void huge_pte_clear(struct mm_struct *mm, unsigned long addr,
static inline pte_t huge_ptep_clear_flush(struct vm_area_struct *vma,
unsigned long address, pte_t *ptep)
{
- return huge_ptep_get_and_clear(vma->vm_mm, address, ptep);
+ return __huge_ptep_get_and_clear(vma->vm_mm, address, ptep);
}
#define __HAVE_ARCH_HUGE_PTEP_SET_ACCESS_FLAGS
@@ -59,7 +68,7 @@ static inline int huge_ptep_set_access_flags(struct vm_area_struct *vma,
int changed = !pte_same(huge_ptep_get(vma->vm_mm, addr, ptep), pte);
if (changed) {
- huge_ptep_get_and_clear(vma->vm_mm, addr, ptep);
+ __huge_ptep_get_and_clear(vma->vm_mm, addr, ptep);
__set_huge_pte_at(vma->vm_mm, addr, ptep, pte);
}
return changed;
@@ -69,7 +78,7 @@ static inline int huge_ptep_set_access_flags(struct vm_area_struct *vma,
static inline void huge_ptep_set_wrprotect(struct mm_struct *mm,
unsigned long addr, pte_t *ptep)
{
- pte_t pte = huge_ptep_get_and_clear(mm, addr, ptep);
+ pte_t pte = __huge_ptep_get_and_clear(mm, addr, ptep);
__set_huge_pte_at(mm, addr, ptep, pte_wrprotect(pte));
}
diff --git a/arch/s390/include/asm/io.h b/arch/s390/include/asm/io.h
index fc9933a743d6..faddb9aef3b8 100644
--- a/arch/s390/include/asm/io.h
+++ b/arch/s390/include/asm/io.h
@@ -33,9 +33,7 @@ void unxlate_dev_mem_ptr(phys_addr_t phys, void *addr);
#define _PAGE_IOREMAP pgprot_val(PAGE_KERNEL)
#define ioremap_wc(addr, size) \
- ioremap_prot((addr), (size), pgprot_val(pgprot_writecombine(PAGE_KERNEL)))
-#define ioremap_wt(addr, size) \
- ioremap_prot((addr), (size), pgprot_val(pgprot_writethrough(PAGE_KERNEL)))
+ ioremap_prot((addr), (size), pgprot_writecombine(PAGE_KERNEL))
static inline void __iomem *ioport_map(unsigned long port, unsigned int nr)
{
diff --git a/arch/s390/include/asm/irq.h b/arch/s390/include/asm/irq.h
index d9e705f4a697..697497e7d13e 100644
--- a/arch/s390/include/asm/irq.h
+++ b/arch/s390/include/asm/irq.h
@@ -25,7 +25,7 @@
#define EXT_IRQ_CP_SERVICE 0x2603
#define EXT_IRQ_IUCV 0x4000
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <linux/hardirq.h>
#include <linux/percpu.h>
@@ -54,7 +54,6 @@ enum interruption_class {
IRQIO_C70,
IRQIO_TAP,
IRQIO_VMR,
- IRQIO_LCS,
IRQIO_CTC,
IRQIO_ADM,
IRQIO_CSC,
@@ -121,6 +120,6 @@ void irq_subclass_unregister(enum irq_subclass subclass);
#define irq_canonicalize(irq) (irq)
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif /* _ASM_IRQ_H */
diff --git a/arch/s390/include/asm/jump_label.h b/arch/s390/include/asm/jump_label.h
index bf78cf381dfc..d9cbc18f6b2e 100644
--- a/arch/s390/include/asm/jump_label.h
+++ b/arch/s390/include/asm/jump_label.h
@@ -4,7 +4,7 @@
#define HAVE_JUMP_LABEL_BATCH
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <linux/types.h>
#include <linux/stringify.h>
@@ -51,5 +51,5 @@ label:
return true;
}
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif
diff --git a/arch/s390/include/asm/kfence.h b/arch/s390/include/asm/kfence.h
index e47fd8cbe701..e95e35eb8a3f 100644
--- a/arch/s390/include/asm/kfence.h
+++ b/arch/s390/include/asm/kfence.h
@@ -12,27 +12,16 @@ void __kernel_map_pages(struct page *page, int numpages, int enable);
static __always_inline bool arch_kfence_init_pool(void)
{
- return true;
-}
-
-#define arch_kfence_test_address(addr) ((addr) & PAGE_MASK)
-
-/*
- * Do not split kfence pool to 4k mapping with arch_kfence_init_pool(),
- * but earlier where page table allocations still happen with memblock.
- * Reason is that arch_kfence_init_pool() gets called when the system
- * is still in a limbo state - disabling and enabling bottom halves is
- * not yet allowed, but that is what our page_table_alloc() would do.
- */
-static __always_inline void kfence_split_mapping(void)
-{
#ifdef CONFIG_KFENCE
unsigned long pool_pages = KFENCE_POOL_SIZE >> PAGE_SHIFT;
set_memory_4k((unsigned long)__kfence_pool, pool_pages);
#endif
+ return true;
}
+#define arch_kfence_test_address(addr) ((addr) & PAGE_MASK)
+
static inline bool kfence_protect_page(unsigned long addr, bool protect)
{
__kernel_map_pages(virt_to_page((void *)addr), 1, !protect);
diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index 97c7c8127543..f870d09515cc 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -20,16 +20,17 @@
#include <linux/module.h>
#include <linux/pci.h>
#include <linux/mmu_notifier.h>
+#include <asm/kvm_host_types.h>
#include <asm/debug.h>
#include <asm/cpu.h>
#include <asm/fpu.h>
#include <asm/isc.h>
#include <asm/guarded_storage.h>
-#define KVM_S390_BSCA_CPU_SLOTS 64
-#define KVM_S390_ESCA_CPU_SLOTS 248
#define KVM_MAX_VCPUS 255
+#define KVM_INTERNAL_MEM_SLOTS 1
+
/*
* These seem to be used for allocating ->chip in the routing table, which we
* don't use. 1 is as small as we can get to reduce the needed memory. If we
@@ -49,342 +50,6 @@
#define KVM_REQ_REFRESH_GUEST_PREFIX \
KVM_ARCH_REQ_FLAGS(6, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
-#define SIGP_CTRL_C 0x80
-#define SIGP_CTRL_SCN_MASK 0x3f
-
-union bsca_sigp_ctrl {
- __u8 value;
- struct {
- __u8 c : 1;
- __u8 r : 1;
- __u8 scn : 6;
- };
-};
-
-union esca_sigp_ctrl {
- __u16 value;
- struct {
- __u8 c : 1;
- __u8 reserved: 7;
- __u8 scn;
- };
-};
-
-struct esca_entry {
- union esca_sigp_ctrl sigp_ctrl;
- __u16 reserved1[3];
- __u64 sda;
- __u64 reserved2[6];
-};
-
-struct bsca_entry {
- __u8 reserved0;
- union bsca_sigp_ctrl sigp_ctrl;
- __u16 reserved[3];
- __u64 sda;
- __u64 reserved2[2];
-};
-
-union ipte_control {
- unsigned long val;
- struct {
- unsigned long k : 1;
- unsigned long kh : 31;
- unsigned long kg : 32;
- };
-};
-
-/*
- * Utility is defined as two bytes but having it four bytes wide
- * generates more efficient code. Since the following bytes are
- * reserved this makes no functional difference.
- */
-union sca_utility {
- __u32 val;
- struct {
- __u32 mtcr : 1;
- __u32 : 31;
- };
-};
-
-struct bsca_block {
- union ipte_control ipte_control;
- __u64 reserved[5];
- __u64 mcn;
- union sca_utility utility;
- __u8 reserved2[4];
- struct bsca_entry cpu[KVM_S390_BSCA_CPU_SLOTS];
-};
-
-struct esca_block {
- union ipte_control ipte_control;
- __u64 reserved1[6];
- union sca_utility utility;
- __u8 reserved2[4];
- __u64 mcn[4];
- __u64 reserved3[20];
- struct esca_entry cpu[KVM_S390_ESCA_CPU_SLOTS];
-};
-
-/*
- * This struct is used to store some machine check info from lowcore
- * for machine checks that happen while the guest is running.
- * This info in host's lowcore might be overwritten by a second machine
- * check from host when host is in the machine check's high-level handling.
- * The size is 24 bytes.
- */
-struct mcck_volatile_info {
- __u64 mcic;
- __u64 failing_storage_address;
- __u32 ext_damage_code;
- __u32 reserved;
-};
-
-#define CR0_INITIAL_MASK (CR0_UNUSED_56 | CR0_INTERRUPT_KEY_SUBMASK | \
- CR0_MEASUREMENT_ALERT_SUBMASK)
-#define CR14_INITIAL_MASK (CR14_UNUSED_32 | CR14_UNUSED_33 | \
- CR14_EXTERNAL_DAMAGE_SUBMASK)
-
-#define SIDAD_SIZE_MASK 0xff
-#define sida_addr(sie_block) phys_to_virt((sie_block)->sidad & PAGE_MASK)
-#define sida_size(sie_block) \
- ((((sie_block)->sidad & SIDAD_SIZE_MASK) + 1) * PAGE_SIZE)
-
-#define CPUSTAT_STOPPED 0x80000000
-#define CPUSTAT_WAIT 0x10000000
-#define CPUSTAT_ECALL_PEND 0x08000000
-#define CPUSTAT_STOP_INT 0x04000000
-#define CPUSTAT_IO_INT 0x02000000
-#define CPUSTAT_EXT_INT 0x01000000
-#define CPUSTAT_RUNNING 0x00800000
-#define CPUSTAT_RETAINED 0x00400000
-#define CPUSTAT_TIMING_SUB 0x00020000
-#define CPUSTAT_SIE_SUB 0x00010000
-#define CPUSTAT_RRF 0x00008000
-#define CPUSTAT_SLSV 0x00004000
-#define CPUSTAT_SLSR 0x00002000
-#define CPUSTAT_ZARCH 0x00000800
-#define CPUSTAT_MCDS 0x00000100
-#define CPUSTAT_KSS 0x00000200
-#define CPUSTAT_SM 0x00000080
-#define CPUSTAT_IBS 0x00000040
-#define CPUSTAT_GED2 0x00000010
-#define CPUSTAT_G 0x00000008
-#define CPUSTAT_GED 0x00000004
-#define CPUSTAT_J 0x00000002
-#define CPUSTAT_P 0x00000001
-
-struct kvm_s390_sie_block {
- atomic_t cpuflags; /* 0x0000 */
- __u32 : 1; /* 0x0004 */
- __u32 prefix : 18;
- __u32 : 1;
- __u32 ibc : 12;
- __u8 reserved08[4]; /* 0x0008 */
-#define PROG_IN_SIE (1<<0)
- __u32 prog0c; /* 0x000c */
- union {
- __u8 reserved10[16]; /* 0x0010 */
- struct {
- __u64 pv_handle_cpu;
- __u64 pv_handle_config;
- };
- };
-#define PROG_BLOCK_SIE (1<<0)
-#define PROG_REQUEST (1<<1)
- atomic_t prog20; /* 0x0020 */
- __u8 reserved24[4]; /* 0x0024 */
- __u64 cputm; /* 0x0028 */
- __u64 ckc; /* 0x0030 */
- __u64 epoch; /* 0x0038 */
- __u32 svcc; /* 0x0040 */
-#define LCTL_CR0 0x8000
-#define LCTL_CR6 0x0200
-#define LCTL_CR9 0x0040
-#define LCTL_CR10 0x0020
-#define LCTL_CR11 0x0010
-#define LCTL_CR14 0x0002
- __u16 lctl; /* 0x0044 */
- __s16 icpua; /* 0x0046 */
-#define ICTL_OPEREXC 0x80000000
-#define ICTL_PINT 0x20000000
-#define ICTL_LPSW 0x00400000
-#define ICTL_STCTL 0x00040000
-#define ICTL_ISKE 0x00004000
-#define ICTL_SSKE 0x00002000
-#define ICTL_RRBE 0x00001000
-#define ICTL_TPROT 0x00000200
- __u32 ictl; /* 0x0048 */
-#define ECA_CEI 0x80000000
-#define ECA_IB 0x40000000
-#define ECA_SIGPI 0x10000000
-#define ECA_MVPGI 0x01000000
-#define ECA_AIV 0x00200000
-#define ECA_VX 0x00020000
-#define ECA_PROTEXCI 0x00002000
-#define ECA_APIE 0x00000008
-#define ECA_SII 0x00000001
- __u32 eca; /* 0x004c */
-#define ICPT_INST 0x04
-#define ICPT_PROGI 0x08
-#define ICPT_INSTPROGI 0x0C
-#define ICPT_EXTREQ 0x10
-#define ICPT_EXTINT 0x14
-#define ICPT_IOREQ 0x18
-#define ICPT_WAIT 0x1c
-#define ICPT_VALIDITY 0x20
-#define ICPT_STOP 0x28
-#define ICPT_OPEREXC 0x2C
-#define ICPT_PARTEXEC 0x38
-#define ICPT_IOINST 0x40
-#define ICPT_KSS 0x5c
-#define ICPT_MCHKREQ 0x60
-#define ICPT_INT_ENABLE 0x64
-#define ICPT_PV_INSTR 0x68
-#define ICPT_PV_NOTIFY 0x6c
-#define ICPT_PV_PREF 0x70
- __u8 icptcode; /* 0x0050 */
- __u8 icptstatus; /* 0x0051 */
- __u16 ihcpu; /* 0x0052 */
- __u8 reserved54; /* 0x0054 */
-#define IICTL_CODE_NONE 0x00
-#define IICTL_CODE_MCHK 0x01
-#define IICTL_CODE_EXT 0x02
-#define IICTL_CODE_IO 0x03
-#define IICTL_CODE_RESTART 0x04
-#define IICTL_CODE_SPECIFICATION 0x10
-#define IICTL_CODE_OPERAND 0x11
- __u8 iictl; /* 0x0055 */
- __u16 ipa; /* 0x0056 */
- __u32 ipb; /* 0x0058 */
- __u32 scaoh; /* 0x005c */
-#define FPF_BPBC 0x20
- __u8 fpf; /* 0x0060 */
-#define ECB_GS 0x40
-#define ECB_TE 0x10
-#define ECB_SPECI 0x08
-#define ECB_SRSI 0x04
-#define ECB_HOSTPROTINT 0x02
-#define ECB_PTF 0x01
- __u8 ecb; /* 0x0061 */
-#define ECB2_CMMA 0x80
-#define ECB2_IEP 0x20
-#define ECB2_PFMFI 0x08
-#define ECB2_ESCA 0x04
-#define ECB2_ZPCI_LSI 0x02
- __u8 ecb2; /* 0x0062 */
-#define ECB3_AISI 0x20
-#define ECB3_AISII 0x10
-#define ECB3_DEA 0x08
-#define ECB3_AES 0x04
-#define ECB3_RI 0x01
- __u8 ecb3; /* 0x0063 */
-#define ESCA_SCAOL_MASK ~0x3fU
- __u32 scaol; /* 0x0064 */
- __u8 sdf; /* 0x0068 */
- __u8 epdx; /* 0x0069 */
- __u8 cpnc; /* 0x006a */
- __u8 reserved6b; /* 0x006b */
- __u32 todpr; /* 0x006c */
-#define GISA_FORMAT1 0x00000001
- __u32 gd; /* 0x0070 */
- __u8 reserved74[12]; /* 0x0074 */
- __u64 mso; /* 0x0080 */
- __u64 msl; /* 0x0088 */
- psw_t gpsw; /* 0x0090 */
- __u64 gg14; /* 0x00a0 */
- __u64 gg15; /* 0x00a8 */
- __u8 reservedb0[8]; /* 0x00b0 */
-#define HPID_KVM 0x4
-#define HPID_VSIE 0x5
- __u8 hpid; /* 0x00b8 */
- __u8 reservedb9[7]; /* 0x00b9 */
- union {
- struct {
- __u32 eiparams; /* 0x00c0 */
- __u16 extcpuaddr; /* 0x00c4 */
- __u16 eic; /* 0x00c6 */
- };
- __u64 mcic; /* 0x00c0 */
- } __packed;
- __u32 reservedc8; /* 0x00c8 */
- union {
- struct {
- __u16 pgmilc; /* 0x00cc */
- __u16 iprcc; /* 0x00ce */
- };
- __u32 edc; /* 0x00cc */
- } __packed;
- union {
- struct {
- __u32 dxc; /* 0x00d0 */
- __u16 mcn; /* 0x00d4 */
- __u8 perc; /* 0x00d6 */
- __u8 peratmid; /* 0x00d7 */
- };
- __u64 faddr; /* 0x00d0 */
- } __packed;
- __u64 peraddr; /* 0x00d8 */
- __u8 eai; /* 0x00e0 */
- __u8 peraid; /* 0x00e1 */
- __u8 oai; /* 0x00e2 */
- __u8 armid; /* 0x00e3 */
- __u8 reservede4[4]; /* 0x00e4 */
- union {
- __u64 tecmc; /* 0x00e8 */
- struct {
- __u16 subchannel_id; /* 0x00e8 */
- __u16 subchannel_nr; /* 0x00ea */
- __u32 io_int_parm; /* 0x00ec */
- __u32 io_int_word; /* 0x00f0 */
- };
- } __packed;
- __u8 reservedf4[8]; /* 0x00f4 */
-#define CRYCB_FORMAT_MASK 0x00000003
-#define CRYCB_FORMAT0 0x00000000
-#define CRYCB_FORMAT1 0x00000001
-#define CRYCB_FORMAT2 0x00000003
- __u32 crycbd; /* 0x00fc */
- __u64 gcr[16]; /* 0x0100 */
- union {
- __u64 gbea; /* 0x0180 */
- __u64 sidad;
- };
- __u8 reserved188[8]; /* 0x0188 */
- __u64 sdnxo; /* 0x0190 */
- __u8 reserved198[8]; /* 0x0198 */
- __u32 fac; /* 0x01a0 */
- __u8 reserved1a4[20]; /* 0x01a4 */
- __u64 cbrlo; /* 0x01b8 */
- __u8 reserved1c0[8]; /* 0x01c0 */
-#define ECD_HOSTREGMGMT 0x20000000
-#define ECD_MEF 0x08000000
-#define ECD_ETOKENF 0x02000000
-#define ECD_ECC 0x00200000
-#define ECD_HMAC 0x00004000
- __u32 ecd; /* 0x01c8 */
- __u8 reserved1cc[18]; /* 0x01cc */
- __u64 pp; /* 0x01de */
- __u8 reserved1e6[2]; /* 0x01e6 */
- __u64 itdba; /* 0x01e8 */
- __u64 riccbd; /* 0x01f0 */
- __u64 gvrd; /* 0x01f8 */
-} __packed __aligned(512);
-
-struct kvm_s390_itdb {
- __u8 data[256];
-};
-
-struct sie_page {
- struct kvm_s390_sie_block sie_block;
- struct mcck_volatile_info mcck_info; /* 0x0200 */
- __u8 reserved218[360]; /* 0x0218 */
- __u64 pv_grregs[16]; /* 0x0380 */
- __u8 reserved400[512]; /* 0x0400 */
- struct kvm_s390_itdb itdb; /* 0x0600 */
- __u8 reserved700[2304]; /* 0x0700 */
-};
-
struct kvm_vcpu_stat {
struct kvm_vcpu_stat_generic generic;
u64 exit_userspace;
@@ -931,12 +596,14 @@ struct sie_page2 {
u8 reserved928[0x1000 - 0x928]; /* 0x0928 */
};
+struct vsie_page;
+
struct kvm_s390_vsie {
struct mutex mutex;
struct radix_tree_root addr_to_page;
int page_count;
int next;
- struct page *pages[KVM_MAX_VCPUS];
+ struct vsie_page *pages[KVM_MAX_VCPUS];
};
struct kvm_s390_gisa_iam {
@@ -1049,10 +716,12 @@ extern char sie_exit;
bool kvm_s390_pv_is_protected(struct kvm *kvm);
bool kvm_s390_pv_cpu_is_protected(struct kvm_vcpu *vcpu);
+extern int kvm_s390_enter_exit_sie(struct kvm_s390_sie_block *scb,
+ u64 *gprs, unsigned long gasce);
+
extern int kvm_s390_gisc_register(struct kvm *kvm, u32 gisc);
extern int kvm_s390_gisc_unregister(struct kvm *kvm, u32 gisc);
-static inline void kvm_arch_sync_events(struct kvm *kvm) {}
static inline void kvm_arch_free_memslot(struct kvm *kvm,
struct kvm_memory_slot *slot) {}
static inline void kvm_arch_memslots_updated(struct kvm *kvm, u64 gen) {}
diff --git a/arch/s390/include/asm/kvm_host_types.h b/arch/s390/include/asm/kvm_host_types.h
new file mode 100644
index 000000000000..1394d3fb648f
--- /dev/null
+++ b/arch/s390/include/asm/kvm_host_types.h
@@ -0,0 +1,348 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef _ASM_KVM_HOST_TYPES_H
+#define _ASM_KVM_HOST_TYPES_H
+
+#include <linux/atomic.h>
+#include <linux/types.h>
+
+#define KVM_S390_BSCA_CPU_SLOTS 64
+#define KVM_S390_ESCA_CPU_SLOTS 248
+
+#define SIGP_CTRL_C 0x80
+#define SIGP_CTRL_SCN_MASK 0x3f
+
+union bsca_sigp_ctrl {
+ __u8 value;
+ struct {
+ __u8 c : 1;
+ __u8 r : 1;
+ __u8 scn : 6;
+ };
+};
+
+union esca_sigp_ctrl {
+ __u16 value;
+ struct {
+ __u8 c : 1;
+ __u8 reserved: 7;
+ __u8 scn;
+ };
+};
+
+struct esca_entry {
+ union esca_sigp_ctrl sigp_ctrl;
+ __u16 reserved1[3];
+ __u64 sda;
+ __u64 reserved2[6];
+};
+
+struct bsca_entry {
+ __u8 reserved0;
+ union bsca_sigp_ctrl sigp_ctrl;
+ __u16 reserved[3];
+ __u64 sda;
+ __u64 reserved2[2];
+};
+
+union ipte_control {
+ unsigned long val;
+ struct {
+ unsigned long k : 1;
+ unsigned long kh : 31;
+ unsigned long kg : 32;
+ };
+};
+
+/*
+ * Utility is defined as two bytes but having it four bytes wide
+ * generates more efficient code. Since the following bytes are
+ * reserved this makes no functional difference.
+ */
+union sca_utility {
+ __u32 val;
+ struct {
+ __u32 mtcr : 1;
+ __u32 : 31;
+ };
+};
+
+struct bsca_block {
+ union ipte_control ipte_control;
+ __u64 reserved[5];
+ __u64 mcn;
+ union sca_utility utility;
+ __u8 reserved2[4];
+ struct bsca_entry cpu[KVM_S390_BSCA_CPU_SLOTS];
+};
+
+struct esca_block {
+ union ipte_control ipte_control;
+ __u64 reserved1[6];
+ union sca_utility utility;
+ __u8 reserved2[4];
+ __u64 mcn[4];
+ __u64 reserved3[20];
+ struct esca_entry cpu[KVM_S390_ESCA_CPU_SLOTS];
+};
+
+/*
+ * This struct is used to store some machine check info from lowcore
+ * for machine checks that happen while the guest is running.
+ * This info in host's lowcore might be overwritten by a second machine
+ * check from host when host is in the machine check's high-level handling.
+ * The size is 24 bytes.
+ */
+struct mcck_volatile_info {
+ __u64 mcic;
+ __u64 failing_storage_address;
+ __u32 ext_damage_code;
+ __u32 reserved;
+};
+
+#define CR0_INITIAL_MASK (CR0_UNUSED_56 | CR0_INTERRUPT_KEY_SUBMASK | \
+ CR0_MEASUREMENT_ALERT_SUBMASK)
+#define CR14_INITIAL_MASK (CR14_UNUSED_32 | CR14_UNUSED_33 | \
+ CR14_EXTERNAL_DAMAGE_SUBMASK)
+
+#define SIDAD_SIZE_MASK 0xff
+#define sida_addr(sie_block) phys_to_virt((sie_block)->sidad & PAGE_MASK)
+#define sida_size(sie_block) \
+ ((((sie_block)->sidad & SIDAD_SIZE_MASK) + 1) * PAGE_SIZE)
+
+#define CPUSTAT_STOPPED 0x80000000
+#define CPUSTAT_WAIT 0x10000000
+#define CPUSTAT_ECALL_PEND 0x08000000
+#define CPUSTAT_STOP_INT 0x04000000
+#define CPUSTAT_IO_INT 0x02000000
+#define CPUSTAT_EXT_INT 0x01000000
+#define CPUSTAT_RUNNING 0x00800000
+#define CPUSTAT_RETAINED 0x00400000
+#define CPUSTAT_TIMING_SUB 0x00020000
+#define CPUSTAT_SIE_SUB 0x00010000
+#define CPUSTAT_RRF 0x00008000
+#define CPUSTAT_SLSV 0x00004000
+#define CPUSTAT_SLSR 0x00002000
+#define CPUSTAT_ZARCH 0x00000800
+#define CPUSTAT_MCDS 0x00000100
+#define CPUSTAT_KSS 0x00000200
+#define CPUSTAT_SM 0x00000080
+#define CPUSTAT_IBS 0x00000040
+#define CPUSTAT_GED2 0x00000010
+#define CPUSTAT_G 0x00000008
+#define CPUSTAT_GED 0x00000004
+#define CPUSTAT_J 0x00000002
+#define CPUSTAT_P 0x00000001
+
+struct kvm_s390_sie_block {
+ atomic_t cpuflags; /* 0x0000 */
+ __u32 : 1; /* 0x0004 */
+ __u32 prefix : 18;
+ __u32 : 1;
+ __u32 ibc : 12;
+ __u8 reserved08[4]; /* 0x0008 */
+#define PROG_IN_SIE (1<<0)
+ __u32 prog0c; /* 0x000c */
+ union {
+ __u8 reserved10[16]; /* 0x0010 */
+ struct {
+ __u64 pv_handle_cpu;
+ __u64 pv_handle_config;
+ };
+ };
+#define PROG_BLOCK_SIE (1<<0)
+#define PROG_REQUEST (1<<1)
+ atomic_t prog20; /* 0x0020 */
+ __u8 reserved24[4]; /* 0x0024 */
+ __u64 cputm; /* 0x0028 */
+ __u64 ckc; /* 0x0030 */
+ __u64 epoch; /* 0x0038 */
+ __u32 svcc; /* 0x0040 */
+#define LCTL_CR0 0x8000
+#define LCTL_CR6 0x0200
+#define LCTL_CR9 0x0040
+#define LCTL_CR10 0x0020
+#define LCTL_CR11 0x0010
+#define LCTL_CR14 0x0002
+ __u16 lctl; /* 0x0044 */
+ __s16 icpua; /* 0x0046 */
+#define ICTL_OPEREXC 0x80000000
+#define ICTL_PINT 0x20000000
+#define ICTL_LPSW 0x00400000
+#define ICTL_STCTL 0x00040000
+#define ICTL_ISKE 0x00004000
+#define ICTL_SSKE 0x00002000
+#define ICTL_RRBE 0x00001000
+#define ICTL_TPROT 0x00000200
+ __u32 ictl; /* 0x0048 */
+#define ECA_CEI 0x80000000
+#define ECA_IB 0x40000000
+#define ECA_SIGPI 0x10000000
+#define ECA_MVPGI 0x01000000
+#define ECA_AIV 0x00200000
+#define ECA_VX 0x00020000
+#define ECA_PROTEXCI 0x00002000
+#define ECA_APIE 0x00000008
+#define ECA_SII 0x00000001
+ __u32 eca; /* 0x004c */
+#define ICPT_INST 0x04
+#define ICPT_PROGI 0x08
+#define ICPT_INSTPROGI 0x0C
+#define ICPT_EXTREQ 0x10
+#define ICPT_EXTINT 0x14
+#define ICPT_IOREQ 0x18
+#define ICPT_WAIT 0x1c
+#define ICPT_VALIDITY 0x20
+#define ICPT_STOP 0x28
+#define ICPT_OPEREXC 0x2C
+#define ICPT_PARTEXEC 0x38
+#define ICPT_IOINST 0x40
+#define ICPT_KSS 0x5c
+#define ICPT_MCHKREQ 0x60
+#define ICPT_INT_ENABLE 0x64
+#define ICPT_PV_INSTR 0x68
+#define ICPT_PV_NOTIFY 0x6c
+#define ICPT_PV_PREF 0x70
+ __u8 icptcode; /* 0x0050 */
+ __u8 icptstatus; /* 0x0051 */
+ __u16 ihcpu; /* 0x0052 */
+ __u8 reserved54; /* 0x0054 */
+#define IICTL_CODE_NONE 0x00
+#define IICTL_CODE_MCHK 0x01
+#define IICTL_CODE_EXT 0x02
+#define IICTL_CODE_IO 0x03
+#define IICTL_CODE_RESTART 0x04
+#define IICTL_CODE_SPECIFICATION 0x10
+#define IICTL_CODE_OPERAND 0x11
+ __u8 iictl; /* 0x0055 */
+ __u16 ipa; /* 0x0056 */
+ __u32 ipb; /* 0x0058 */
+ __u32 scaoh; /* 0x005c */
+#define FPF_BPBC 0x20
+ __u8 fpf; /* 0x0060 */
+#define ECB_GS 0x40
+#define ECB_TE 0x10
+#define ECB_SPECI 0x08
+#define ECB_SRSI 0x04
+#define ECB_HOSTPROTINT 0x02
+#define ECB_PTF 0x01
+ __u8 ecb; /* 0x0061 */
+#define ECB2_CMMA 0x80
+#define ECB2_IEP 0x20
+#define ECB2_PFMFI 0x08
+#define ECB2_ESCA 0x04
+#define ECB2_ZPCI_LSI 0x02
+ __u8 ecb2; /* 0x0062 */
+#define ECB3_AISI 0x20
+#define ECB3_AISII 0x10
+#define ECB3_DEA 0x08
+#define ECB3_AES 0x04
+#define ECB3_RI 0x01
+ __u8 ecb3; /* 0x0063 */
+#define ESCA_SCAOL_MASK ~0x3fU
+ __u32 scaol; /* 0x0064 */
+ __u8 sdf; /* 0x0068 */
+ __u8 epdx; /* 0x0069 */
+ __u8 cpnc; /* 0x006a */
+ __u8 reserved6b; /* 0x006b */
+ __u32 todpr; /* 0x006c */
+#define GISA_FORMAT1 0x00000001
+ __u32 gd; /* 0x0070 */
+ __u8 reserved74[12]; /* 0x0074 */
+ __u64 mso; /* 0x0080 */
+ __u64 msl; /* 0x0088 */
+ psw_t gpsw; /* 0x0090 */
+ __u64 gg14; /* 0x00a0 */
+ __u64 gg15; /* 0x00a8 */
+ __u8 reservedb0[8]; /* 0x00b0 */
+#define HPID_KVM 0x4
+#define HPID_VSIE 0x5
+ __u8 hpid; /* 0x00b8 */
+ __u8 reservedb9[7]; /* 0x00b9 */
+ union {
+ struct {
+ __u32 eiparams; /* 0x00c0 */
+ __u16 extcpuaddr; /* 0x00c4 */
+ __u16 eic; /* 0x00c6 */
+ };
+ __u64 mcic; /* 0x00c0 */
+ } __packed;
+ __u32 reservedc8; /* 0x00c8 */
+ union {
+ struct {
+ __u16 pgmilc; /* 0x00cc */
+ __u16 iprcc; /* 0x00ce */
+ };
+ __u32 edc; /* 0x00cc */
+ } __packed;
+ union {
+ struct {
+ __u32 dxc; /* 0x00d0 */
+ __u16 mcn; /* 0x00d4 */
+ __u8 perc; /* 0x00d6 */
+ __u8 peratmid; /* 0x00d7 */
+ };
+ __u64 faddr; /* 0x00d0 */
+ } __packed;
+ __u64 peraddr; /* 0x00d8 */
+ __u8 eai; /* 0x00e0 */
+ __u8 peraid; /* 0x00e1 */
+ __u8 oai; /* 0x00e2 */
+ __u8 armid; /* 0x00e3 */
+ __u8 reservede4[4]; /* 0x00e4 */
+ union {
+ __u64 tecmc; /* 0x00e8 */
+ struct {
+ __u16 subchannel_id; /* 0x00e8 */
+ __u16 subchannel_nr; /* 0x00ea */
+ __u32 io_int_parm; /* 0x00ec */
+ __u32 io_int_word; /* 0x00f0 */
+ };
+ } __packed;
+ __u8 reservedf4[8]; /* 0x00f4 */
+#define CRYCB_FORMAT_MASK 0x00000003
+#define CRYCB_FORMAT0 0x00000000
+#define CRYCB_FORMAT1 0x00000001
+#define CRYCB_FORMAT2 0x00000003
+ __u32 crycbd; /* 0x00fc */
+ __u64 gcr[16]; /* 0x0100 */
+ union {
+ __u64 gbea; /* 0x0180 */
+ __u64 sidad;
+ };
+ __u8 reserved188[8]; /* 0x0188 */
+ __u64 sdnxo; /* 0x0190 */
+ __u8 reserved198[8]; /* 0x0198 */
+ __u32 fac; /* 0x01a0 */
+ __u8 reserved1a4[20]; /* 0x01a4 */
+ __u64 cbrlo; /* 0x01b8 */
+ __u8 reserved1c0[8]; /* 0x01c0 */
+#define ECD_HOSTREGMGMT 0x20000000
+#define ECD_MEF 0x08000000
+#define ECD_ETOKENF 0x02000000
+#define ECD_ECC 0x00200000
+#define ECD_HMAC 0x00004000
+ __u32 ecd; /* 0x01c8 */
+ __u8 reserved1cc[18]; /* 0x01cc */
+ __u64 pp; /* 0x01de */
+ __u8 reserved1e6[2]; /* 0x01e6 */
+ __u64 itdba; /* 0x01e8 */
+ __u64 riccbd; /* 0x01f0 */
+ __u64 gvrd; /* 0x01f8 */
+} __packed __aligned(512);
+
+struct kvm_s390_itdb {
+ __u8 data[256];
+};
+
+struct sie_page {
+ struct kvm_s390_sie_block sie_block;
+ struct mcck_volatile_info mcck_info; /* 0x0200 */
+ __u8 reserved218[360]; /* 0x0218 */
+ __u64 pv_grregs[16]; /* 0x0380 */
+ __u8 reserved400[512]; /* 0x0400 */
+ struct kvm_s390_itdb itdb; /* 0x0600 */
+ __u8 reserved700[2304]; /* 0x0700 */
+};
+
+#endif /* _ASM_KVM_HOST_TYPES_H */
diff --git a/arch/s390/include/asm/lowcore.h b/arch/s390/include/asm/lowcore.h
index 42a092fa1029..d9c853db9a40 100644
--- a/arch/s390/include/asm/lowcore.h
+++ b/arch/s390/include/asm/lowcore.h
@@ -10,6 +10,7 @@
#define _ASM_S390_LOWCORE_H
#include <linux/types.h>
+#include <asm/machine.h>
#include <asm/ptrace.h>
#include <asm/ctlreg.h>
#include <asm/cpu.h>
@@ -21,7 +22,7 @@
#define LOWCORE_ALT_ADDRESS _AC(0x70000, UL)
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
struct pgm_tdb {
u64 data[32];
@@ -126,7 +127,7 @@ struct lowcore {
__u64 int_clock; /* 0x0318 */
__u8 pad_0x0320[0x0328-0x0320]; /* 0x0320 */
__u64 clock_comparator; /* 0x0328 */
- __u64 boot_clock[2]; /* 0x0330 */
+ __u8 pad_0x0330[0x0340-0x0330]; /* 0x0330 */
/* Current process. */
__u64 current_task; /* 0x0340 */
@@ -163,9 +164,7 @@ struct lowcore {
__u32 spinlock_index; /* 0x03b0 */
__u8 pad_0x03b4[0x03b8-0x03b4]; /* 0x03b4 */
__u64 percpu_offset; /* 0x03b8 */
- __u8 pad_0x03c0[0x03c8-0x03c0]; /* 0x03c0 */
- __u64 machine_flags; /* 0x03c8 */
- __u8 pad_0x03d0[0x0400-0x03d0]; /* 0x03d0 */
+ __u8 pad_0x03c0[0x0400-0x03c0]; /* 0x03c0 */
__u32 return_lpswe; /* 0x0400 */
__u32 return_mcck_lpswe; /* 0x0404 */
@@ -222,9 +221,12 @@ static __always_inline struct lowcore *get_lowcore(void)
if (__is_defined(__DECOMPRESSOR))
return NULL;
- asm(ALTERNATIVE("llilh %[lc],0", "llilh %[lc],%[alt]", ALT_LOWCORE)
- : [lc] "=d" (lc)
- : [alt] "i" (LOWCORE_ALT_ADDRESS >> 16));
+ asm_inline(
+ ALTERNATIVE(" lghi %[lc],0",
+ " llilh %[lc],%[alt]",
+ ALT_FEATURE(MFEATURE_LOWCORE))
+ : [lc] "=d" (lc)
+ : [alt] "i" (LOWCORE_ALT_ADDRESS >> 16));
return lc;
}
@@ -235,19 +237,19 @@ static inline void set_prefix(__u32 address)
asm volatile("spx %0" : : "Q" (address) : "memory");
}
-#else /* __ASSEMBLY__ */
+#else /* __ASSEMBLER__ */
.macro GET_LC reg
- ALTERNATIVE "llilh \reg,0", \
+ ALTERNATIVE "lghi \reg,0", \
__stringify(llilh \reg, LOWCORE_ALT_ADDRESS >> 16), \
- ALT_LOWCORE
+ ALT_FEATURE(MFEATURE_LOWCORE)
.endm
.macro STMG_LC start, end, savearea
ALTERNATIVE "stmg \start, \end, \savearea", \
__stringify(stmg \start, \end, LOWCORE_ALT_ADDRESS + \savearea), \
- ALT_LOWCORE
+ ALT_FEATURE(MFEATURE_LOWCORE)
.endm
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif /* _ASM_S390_LOWCORE_H */
diff --git a/arch/s390/include/asm/machine.h b/arch/s390/include/asm/machine.h
new file mode 100644
index 000000000000..9bd4a9dc7778
--- /dev/null
+++ b/arch/s390/include/asm/machine.h
@@ -0,0 +1,104 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright IBM Corp. 2024
+ */
+
+#ifndef __ASM_S390_MACHINE_H
+#define __ASM_S390_MACHINE_H
+
+#include <linux/const.h>
+
+#define MFEATURE_LOWCORE 0
+#define MFEATURE_PCI_MIO 1
+#define MFEATURE_SCC 2
+#define MFEATURE_TLB_GUEST 3
+#define MFEATURE_TX 4
+#define MFEATURE_ESOP 5
+#define MFEATURE_DIAG9C 6
+#define MFEATURE_VM 7
+#define MFEATURE_KVM 8
+#define MFEATURE_LPAR 9
+#define MFEATURE_DIAG288 10
+
+#ifndef __ASSEMBLER__
+
+#include <linux/bitops.h>
+#include <asm/alternative.h>
+
+extern unsigned long machine_features[1];
+
+#define MAX_MFEATURE_BIT (sizeof(machine_features) * BITS_PER_BYTE)
+
+static inline void __set_machine_feature(unsigned int nr, unsigned long *mfeatures)
+{
+ if (nr >= MAX_MFEATURE_BIT)
+ return;
+ __set_bit(nr, mfeatures);
+}
+
+static inline void set_machine_feature(unsigned int nr)
+{
+ __set_machine_feature(nr, machine_features);
+}
+
+static inline void __clear_machine_feature(unsigned int nr, unsigned long *mfeatures)
+{
+ if (nr >= MAX_MFEATURE_BIT)
+ return;
+ __clear_bit(nr, mfeatures);
+}
+
+static inline void clear_machine_feature(unsigned int nr)
+{
+ __clear_machine_feature(nr, machine_features);
+}
+
+static bool __test_machine_feature(unsigned int nr, unsigned long *mfeatures)
+{
+ if (nr >= MAX_MFEATURE_BIT)
+ return false;
+ return test_bit(nr, mfeatures);
+}
+
+static bool test_machine_feature(unsigned int nr)
+{
+ return __test_machine_feature(nr, machine_features);
+}
+
+static __always_inline bool __test_machine_feature_constant(unsigned int nr)
+{
+ asm goto(
+ ALTERNATIVE("brcl 15,%l[l_no]", "brcl 0,0", ALT_FEATURE(%[nr]))
+ :
+ : [nr] "i" (nr)
+ :
+ : l_no);
+ return true;
+l_no:
+ return false;
+}
+
+#define DEFINE_MACHINE_HAS_FEATURE(name, feature) \
+static __always_inline bool machine_has_##name(void) \
+{ \
+ if (!__is_defined(__DECOMPRESSOR) && __builtin_constant_p(feature)) \
+ return __test_machine_feature_constant(feature); \
+ return test_machine_feature(feature); \
+}
+
+DEFINE_MACHINE_HAS_FEATURE(relocated_lowcore, MFEATURE_LOWCORE)
+DEFINE_MACHINE_HAS_FEATURE(scc, MFEATURE_SCC)
+DEFINE_MACHINE_HAS_FEATURE(tlb_guest, MFEATURE_TLB_GUEST)
+DEFINE_MACHINE_HAS_FEATURE(tx, MFEATURE_TX)
+DEFINE_MACHINE_HAS_FEATURE(esop, MFEATURE_ESOP)
+DEFINE_MACHINE_HAS_FEATURE(diag9c, MFEATURE_DIAG9C)
+DEFINE_MACHINE_HAS_FEATURE(vm, MFEATURE_VM)
+DEFINE_MACHINE_HAS_FEATURE(kvm, MFEATURE_KVM)
+DEFINE_MACHINE_HAS_FEATURE(lpar, MFEATURE_LPAR)
+
+#define machine_is_vm machine_has_vm
+#define machine_is_kvm machine_has_kvm
+#define machine_is_lpar machine_has_lpar
+
+#endif /* __ASSEMBLER__ */
+#endif /* __ASM_S390_MACHINE_H */
diff --git a/arch/s390/include/asm/march.h b/arch/s390/include/asm/march.h
index fd9eef3be44c..11a71bd14954 100644
--- a/arch/s390/include/asm/march.h
+++ b/arch/s390/include/asm/march.h
@@ -33,6 +33,10 @@
#define MARCH_HAS_Z16_FEATURES 1
#endif
+#ifdef CONFIG_HAVE_MARCH_Z17_FEATURES
+#define MARCH_HAS_Z17_FEATURES 1
+#endif
+
#endif /* __DECOMPRESSOR */
#endif /* __ASM_S390_MARCH_H */
diff --git a/arch/s390/include/asm/mem_encrypt.h b/arch/s390/include/asm/mem_encrypt.h
index b85e13505a0f..28c83ec1f243 100644
--- a/arch/s390/include/asm/mem_encrypt.h
+++ b/arch/s390/include/asm/mem_encrypt.h
@@ -2,11 +2,11 @@
#ifndef S390_MEM_ENCRYPT_H__
#define S390_MEM_ENCRYPT_H__
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
int set_memory_encrypted(unsigned long vaddr, int numpages);
int set_memory_decrypted(unsigned long vaddr, int numpages);
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif /* S390_MEM_ENCRYPT_H__ */
diff --git a/arch/s390/include/asm/mmu.h b/arch/s390/include/asm/mmu.h
index 4c2dc7abc285..f07e49b419ab 100644
--- a/arch/s390/include/asm/mmu.h
+++ b/arch/s390/include/asm/mmu.h
@@ -22,10 +22,7 @@ typedef struct {
* The following bitfields need a down_write on the mm
* semaphore when they are written to. As they are only
* written once, they can be read without a lock.
- *
- * The mmu context allocates 4K page tables.
*/
- unsigned int alloc_pgste:1;
/* The mmu context uses extended page tables. */
unsigned int has_pgste:1;
/* The mmu context uses storage keys. */
diff --git a/arch/s390/include/asm/mmu_context.h b/arch/s390/include/asm/mmu_context.h
index d56eb0a1f37b..d9b8501bc93d 100644
--- a/arch/s390/include/asm/mmu_context.h
+++ b/arch/s390/include/asm/mmu_context.h
@@ -13,6 +13,7 @@
#include <linux/mm_types.h>
#include <asm/tlbflush.h>
#include <asm/ctlreg.h>
+#include <asm/asce.h>
#include <asm-generic/mm_hooks.h>
#define init_new_context init_new_context
@@ -29,9 +30,6 @@ static inline int init_new_context(struct task_struct *tsk,
mm->context.gmap_asce = 0;
mm->context.flush_mm = 0;
#ifdef CONFIG_PGSTE
- mm->context.alloc_pgste = page_table_allocate_pgste ||
- test_thread_flag(TIF_PGSTE) ||
- (current->mm && current->mm->context.alloc_pgste);
mm->context.has_pgste = 0;
mm->context.uses_skeys = 0;
mm->context.uses_cmm = 0;
@@ -80,7 +78,8 @@ static inline void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *
else
get_lowcore()->user_asce.val = next->context.asce;
cpumask_set_cpu(cpu, &next->context.cpu_attach_mask);
- /* Clear previous user-ASCE from CR7 */
+ /* Clear previous user-ASCE from CR1 and CR7 */
+ local_ctl_load(1, &s390_invalid_asce);
local_ctl_load(7, &s390_invalid_asce);
if (prev != next)
cpumask_clear_cpu(cpu, &prev->context.cpu_attach_mask);
@@ -102,6 +101,7 @@ static inline void finish_arch_post_lock_switch(void)
{
struct task_struct *tsk = current;
struct mm_struct *mm = tsk->mm;
+ unsigned long flags;
if (mm) {
preempt_disable();
@@ -111,15 +111,25 @@ static inline void finish_arch_post_lock_switch(void)
__tlb_flush_mm_lazy(mm);
preempt_enable();
}
+ local_irq_save(flags);
+ if (test_thread_flag(TIF_ASCE_PRIMARY))
+ local_ctl_load(1, &get_lowcore()->kernel_asce);
+ else
+ local_ctl_load(1, &get_lowcore()->user_asce);
local_ctl_load(7, &get_lowcore()->user_asce);
+ local_irq_restore(flags);
}
#define activate_mm activate_mm
static inline void activate_mm(struct mm_struct *prev,
struct mm_struct *next)
{
- switch_mm(prev, next, current);
+ switch_mm_irqs_off(prev, next, current);
cpumask_set_cpu(smp_processor_id(), mm_cpumask(next));
+ if (test_thread_flag(TIF_ASCE_PRIMARY))
+ local_ctl_load(1, &get_lowcore()->kernel_asce);
+ else
+ local_ctl_load(1, &get_lowcore()->user_asce);
local_ctl_load(7, &get_lowcore()->user_asce);
}
diff --git a/arch/s390/include/asm/nmi.h b/arch/s390/include/asm/nmi.h
index 227466ce9e41..6454c1531854 100644
--- a/arch/s390/include/asm/nmi.h
+++ b/arch/s390/include/asm/nmi.h
@@ -33,7 +33,7 @@
#define MCCK_CODE_FC_VALID BIT(63 - 43)
#define MCCK_CODE_CPU_TIMER_VALID BIT(63 - 46)
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
union mci {
unsigned long val;
@@ -104,5 +104,5 @@ void nmi_free_mcesa(u64 *mcesad);
void s390_handle_mcck(void);
void s390_do_machine_check(struct pt_regs *regs);
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif /* _ASM_S390_NMI_H */
diff --git a/arch/s390/include/asm/nospec-branch.h b/arch/s390/include/asm/nospec-branch.h
index 192835a3e24d..81c4813cff18 100644
--- a/arch/s390/include/asm/nospec-branch.h
+++ b/arch/s390/include/asm/nospec-branch.h
@@ -2,7 +2,7 @@
#ifndef _ASM_S390_EXPOLINE_H
#define _ASM_S390_EXPOLINE_H
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <linux/types.h>
#include <asm/facility.h>
@@ -26,8 +26,6 @@ static inline bool nospec_uses_trampoline(void)
return __is_defined(CC_USING_EXPOLINE) && !nospec_disable;
}
-#ifdef CONFIG_EXPOLINE_EXTERN
-
void __s390_indirect_jump_r1(void);
void __s390_indirect_jump_r2(void);
void __s390_indirect_jump_r3(void);
@@ -44,8 +42,6 @@ void __s390_indirect_jump_r13(void);
void __s390_indirect_jump_r14(void);
void __s390_indirect_jump_r15(void);
-#endif
-
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif /* _ASM_S390_EXPOLINE_H */
diff --git a/arch/s390/include/asm/nospec-insn.h b/arch/s390/include/asm/nospec-insn.h
index cb15dd25bf21..6ce6b56e282b 100644
--- a/arch/s390/include/asm/nospec-insn.h
+++ b/arch/s390/include/asm/nospec-insn.h
@@ -3,9 +3,10 @@
#define _ASM_S390_NOSPEC_ASM_H
#include <linux/linkage.h>
+#include <linux/export.h>
#include <asm/dwarf.h>
-#ifdef __ASSEMBLY__
+#ifdef __ASSEMBLER__
#ifdef CC_USING_EXPOLINE
@@ -128,6 +129,6 @@
.endm
#endif /* CC_USING_EXPOLINE */
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif /* _ASM_S390_NOSPEC_ASM_H */
diff --git a/arch/s390/include/asm/page.h b/arch/s390/include/asm/page.h
index 4f43cdd9835b..9240a363c893 100644
--- a/arch/s390/include/asm/page.h
+++ b/arch/s390/include/asm/page.h
@@ -33,7 +33,7 @@
#define HAVE_ARCH_HUGETLB_UNMAPPED_AREA
#include <asm/setup.h>
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
void __storage_key_init_range(unsigned long start, unsigned long end);
@@ -71,9 +71,11 @@ static inline void copy_page(void *to, void *from)
#define vma_alloc_zeroed_movable_folio(vma, vaddr) \
vma_alloc_folio(GFP_HIGHUSER_MOVABLE | __GFP_ZERO, 0, vma, vaddr)
-/*
- * These are used to make use of C type-checking..
- */
+#ifdef CONFIG_STRICT_MM_TYPECHECKS
+#define STRICT_MM_TYPECHECKS
+#endif
+
+#ifdef STRICT_MM_TYPECHECKS
typedef struct { unsigned long pgprot; } pgprot_t;
typedef struct { unsigned long pgste; } pgste_t;
@@ -82,52 +84,65 @@ typedef struct { unsigned long pmd; } pmd_t;
typedef struct { unsigned long pud; } pud_t;
typedef struct { unsigned long p4d; } p4d_t;
typedef struct { unsigned long pgd; } pgd_t;
-typedef pte_t *pgtable_t;
-#define pgprot_val(x) ((x).pgprot)
-#define pgste_val(x) ((x).pgste)
-
-static inline unsigned long pte_val(pte_t pte)
-{
- return pte.pte;
+#define DEFINE_PGVAL_FUNC(name) \
+static __always_inline unsigned long name ## _val(name ## _t name) \
+{ \
+ return name.name; \
}
-static inline unsigned long pmd_val(pmd_t pmd)
-{
- return pmd.pmd;
-}
+#else /* STRICT_MM_TYPECHECKS */
-static inline unsigned long pud_val(pud_t pud)
-{
- return pud.pud;
-}
+typedef unsigned long pgprot_t;
+typedef unsigned long pgste_t;
+typedef unsigned long pte_t;
+typedef unsigned long pmd_t;
+typedef unsigned long pud_t;
+typedef unsigned long p4d_t;
+typedef unsigned long pgd_t;
-static inline unsigned long p4d_val(p4d_t p4d)
-{
- return p4d.p4d;
+#define DEFINE_PGVAL_FUNC(name) \
+static __always_inline unsigned long name ## _val(name ## _t name) \
+{ \
+ return name; \
}
-static inline unsigned long pgd_val(pgd_t pgd)
-{
- return pgd.pgd;
-}
+#endif /* STRICT_MM_TYPECHECKS */
+
+DEFINE_PGVAL_FUNC(pgprot)
+DEFINE_PGVAL_FUNC(pgste)
+DEFINE_PGVAL_FUNC(pte)
+DEFINE_PGVAL_FUNC(pmd)
+DEFINE_PGVAL_FUNC(pud)
+DEFINE_PGVAL_FUNC(p4d)
+DEFINE_PGVAL_FUNC(pgd)
+
+typedef pte_t *pgtable_t;
+#define __pgprot(x) ((pgprot_t) { (x) } )
#define __pgste(x) ((pgste_t) { (x) } )
#define __pte(x) ((pte_t) { (x) } )
#define __pmd(x) ((pmd_t) { (x) } )
#define __pud(x) ((pud_t) { (x) } )
#define __p4d(x) ((p4d_t) { (x) } )
#define __pgd(x) ((pgd_t) { (x) } )
-#define __pgprot(x) ((pgprot_t) { (x) } )
static inline void page_set_storage_key(unsigned long addr,
unsigned char skey, int mapped)
{
- if (!mapped)
- asm volatile(".insn rrf,0xb22b0000,%0,%1,8,0"
- : : "d" (skey), "a" (addr));
- else
- asm volatile("sske %0,%1" : : "d" (skey), "a" (addr));
+ if (!mapped) {
+ asm volatile(
+ " .insn rrf,0xb22b0000,%[skey],%[addr],8,0"
+ :
+ : [skey] "d" (skey), [addr] "a" (addr)
+ : "memory");
+ } else {
+ asm volatile(
+ " sske %[skey],%[addr]"
+ :
+ : [skey] "d" (skey), [addr] "a" (addr)
+ : "memory");
+ }
}
static inline unsigned char page_get_storage_key(unsigned long addr)
@@ -184,7 +199,11 @@ extern struct vm_layout vm_layout;
#define __kaslr_offset vm_layout.kaslr_offset
#define __kaslr_offset_phys vm_layout.kaslr_offset_phys
+#ifdef CONFIG_RANDOMIZE_IDENTITY_BASE
#define __identity_base vm_layout.identity_base
+#else
+#define __identity_base 0UL
+#endif
#define ident_map_size vm_layout.identity_size
static inline unsigned long kaslr_offset(void)
@@ -263,7 +282,7 @@ static inline unsigned long virt_to_pfn(const void *kaddr)
#define VM_DATA_DEFAULT_FLAGS VM_DATA_FLAGS_NON_EXEC
-#endif /* !__ASSEMBLY__ */
+#endif /* !__ASSEMBLER__ */
#include <asm-generic/memory_model.h>
#include <asm-generic/getorder.h>
diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h
index 474e1f8d1d3c..41f900f693d9 100644
--- a/arch/s390/include/asm/pci.h
+++ b/arch/s390/include/asm/pci.h
@@ -11,6 +11,9 @@
#include <asm/pci_insn.h>
#include <asm/sclp.h>
+#define ARCH_GENERIC_PCI_MMAP_RESOURCE 1
+#define arch_can_pci_mmap_wc() 1
+
#define PCIBIOS_MIN_IO 0x1000
#define PCIBIOS_MIN_MEM 0x10000000
@@ -144,7 +147,7 @@ struct zpci_dev {
u8 util_str_avail : 1;
u8 irqs_registered : 1;
u8 tid_avail : 1;
- u8 reserved : 1;
+ u8 rtr_avail : 1; /* Relaxed translation allowed */
unsigned int devfn; /* DEVFN part of the RID*/
u8 pfip[CLP_PFIP_NR_SEGMENTS]; /* pci function internal path */
@@ -217,6 +220,7 @@ extern struct airq_iv *zpci_aif_sbv;
struct zpci_dev *zpci_create_device(u32 fid, u32 fh, enum zpci_state state);
int zpci_add_device(struct zpci_dev *zdev);
int zpci_enable_device(struct zpci_dev *);
+int zpci_reenable_device(struct zpci_dev *zdev);
int zpci_disable_device(struct zpci_dev *);
int zpci_scan_configured_device(struct zpci_dev *zdev, u32 fh);
int zpci_deconfigure_device(struct zpci_dev *zdev);
@@ -245,6 +249,7 @@ void update_uid_checking(bool new);
/* IOMMU Interface */
int zpci_init_iommu(struct zpci_dev *zdev);
void zpci_destroy_iommu(struct zpci_dev *zdev);
+int zpci_iommu_register_ioat(struct zpci_dev *zdev, u8 *status);
#ifdef CONFIG_PCI
static inline bool zpci_use_mio(struct zpci_dev *zdev)
diff --git a/arch/s390/include/asm/pci_clp.h b/arch/s390/include/asm/pci_clp.h
index 3fff2f7095c8..7ebff39c84b3 100644
--- a/arch/s390/include/asm/pci_clp.h
+++ b/arch/s390/include/asm/pci_clp.h
@@ -156,7 +156,9 @@ struct clp_rsp_query_pci_grp {
u16 : 4;
u16 noi : 12; /* number of interrupts */
u8 version;
- u8 : 6;
+ u8 : 2;
+ u8 rtr : 1; /* Relaxed translation requirement */
+ u8 : 3;
u8 frame : 1;
u8 refresh : 1; /* TLB refresh mode */
u16 : 3;
diff --git a/arch/s390/include/asm/pci_dma.h b/arch/s390/include/asm/pci_dma.h
index 42d7cc4262ca..d12e17201661 100644
--- a/arch/s390/include/asm/pci_dma.h
+++ b/arch/s390/include/asm/pci_dma.h
@@ -25,6 +25,7 @@ enum zpci_ioat_dtype {
#define ZPCI_KEY (PAGE_DEFAULT_KEY << 5)
#define ZPCI_TABLE_SIZE_RT (1UL << 42)
+#define ZPCI_TABLE_SIZE_RS (1UL << 53)
#define ZPCI_IOTA_STO_FLAG (ZPCI_IOTA_IOT_ENABLED | ZPCI_KEY | ZPCI_IOTA_DT_ST)
#define ZPCI_IOTA_RTTO_FLAG (ZPCI_IOTA_IOT_ENABLED | ZPCI_KEY | ZPCI_IOTA_DT_RT)
@@ -55,6 +56,8 @@ enum zpci_ioat_dtype {
#define ZPCI_PT_BITS 8
#define ZPCI_ST_SHIFT (ZPCI_PT_BITS + PAGE_SHIFT)
#define ZPCI_RT_SHIFT (ZPCI_ST_SHIFT + ZPCI_TABLE_BITS)
+#define ZPCI_RS_SHIFT (ZPCI_RT_SHIFT + ZPCI_TABLE_BITS)
+#define ZPCI_RF_SHIFT (ZPCI_RS_SHIFT + ZPCI_TABLE_BITS)
#define ZPCI_RTE_FLAG_MASK 0x3fffUL
#define ZPCI_RTE_ADDR_MASK (~ZPCI_RTE_FLAG_MASK)
diff --git a/arch/s390/include/asm/percpu.h b/arch/s390/include/asm/percpu.h
index 84f6b8357b45..96af7d964014 100644
--- a/arch/s390/include/asm/percpu.h
+++ b/arch/s390/include/asm/percpu.h
@@ -16,10 +16,9 @@
* For 64 bit module code, the module may be more than 4G above the
* per cpu area, use weak definitions to force the compiler to
* generate external references.
+ * Therefore, we have enabled CONFIG_ARCH_MODULE_NEEDS_WEAK_PER_CPU
+ * in the Kconfig.
*/
-#if defined(MODULE)
-#define ARCH_NEEDS_WEAK_PER_CPU
-#endif
/*
* We use a compare-and-swap loop since that uses less cpu cycles than
diff --git a/arch/s390/include/asm/pgalloc.h b/arch/s390/include/asm/pgalloc.h
index b19b6ed2ab53..5345398df653 100644
--- a/arch/s390/include/asm/pgalloc.h
+++ b/arch/s390/include/asm/pgalloc.h
@@ -26,7 +26,6 @@ unsigned long *page_table_alloc(struct mm_struct *);
struct ptdesc *page_table_alloc_pgste(struct mm_struct *mm);
void page_table_free(struct mm_struct *, unsigned long *);
void page_table_free_pgste(struct ptdesc *ptdesc);
-extern int page_table_allocate_pgste;
static inline void crst_table_init(unsigned long *crst, unsigned long entry)
{
@@ -98,7 +97,7 @@ static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long vmaddr)
if (!table)
return NULL;
crst_table_init(table, _SEGMENT_ENTRY_EMPTY);
- if (!pagetable_pmd_ctor(virt_to_ptdesc(table))) {
+ if (!pagetable_pmd_ctor(mm, virt_to_ptdesc(table))) {
crst_table_free(mm, table);
return NULL;
}
diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h
index a3b51056a177..c1a7a92f0575 100644
--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@ -14,6 +14,7 @@
#include <linux/sched.h>
#include <linux/mm_types.h>
+#include <linux/cpufeature.h>
#include <linux/page-flags.h>
#include <linux/radix-tree.h>
#include <linux/atomic.h>
@@ -420,9 +421,10 @@ void setup_protection_map(void);
#define PGSTE_HC_BIT 0x0020000000000000UL
#define PGSTE_GR_BIT 0x0004000000000000UL
#define PGSTE_GC_BIT 0x0002000000000000UL
-#define PGSTE_UC_BIT 0x0000800000000000UL /* user dirty (migration) */
-#define PGSTE_IN_BIT 0x0000400000000000UL /* IPTE notify bit */
-#define PGSTE_VSIE_BIT 0x0000200000000000UL /* ref'd in a shadow table */
+#define PGSTE_ST2_MASK 0x0000ffff00000000UL
+#define PGSTE_UC_BIT 0x0000000000008000UL /* user dirty (migration) */
+#define PGSTE_IN_BIT 0x0000000000004000UL /* IPTE notify bit */
+#define PGSTE_VSIE_BIT 0x0000000000002000UL /* ref'd in a shadow table */
/* Guest Page State used for virtualization */
#define _PGSTE_GPS_ZERO 0x0000000080000000UL
@@ -582,13 +584,14 @@ static inline int mm_is_protected(struct mm_struct *mm)
return 0;
}
-static inline int mm_alloc_pgste(struct mm_struct *mm)
+static inline pgste_t clear_pgste_bit(pgste_t pgste, unsigned long mask)
{
-#ifdef CONFIG_PGSTE
- if (unlikely(mm->context.alloc_pgste))
- return 1;
-#endif
- return 0;
+ return __pgste(pgste_val(pgste) & ~mask);
+}
+
+static inline pgste_t set_pgste_bit(pgste_t pgste, unsigned long mask)
+{
+ return __pgste(pgste_val(pgste) | mask);
}
static inline pte_t clear_pte_bit(pte_t pte, pgprot_t prot)
@@ -912,7 +915,7 @@ static inline int pmd_protnone(pmd_t pmd)
}
#endif
-static inline int pte_swp_exclusive(pte_t pte)
+static inline bool pte_swp_exclusive(pte_t pte)
{
return pte_val(pte) & _PAGE_SWP_EXCLUSIVE;
}
@@ -960,6 +963,12 @@ static inline pmd_t pmd_clear_soft_dirty(pmd_t pmd)
return clear_pmd_bit(pmd, __pgprot(_SEGMENT_ENTRY_SOFT_DIRTY));
}
+#ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION
+#define pmd_swp_soft_dirty(pmd) pmd_soft_dirty(pmd)
+#define pmd_swp_mksoft_dirty(pmd) pmd_mksoft_dirty(pmd)
+#define pmd_swp_clear_soft_dirty(pmd) pmd_clear_soft_dirty(pmd)
+#endif
+
/*
* query functions pte_write/pte_dirty/pte_young only work if
* pte_present() is true. Undefined behaviour if not..
@@ -1338,7 +1347,7 @@ static inline void flush_tlb_fix_spurious_fault(struct vm_area_struct *vma,
* PTE does not have _PAGE_PROTECT set, to avoid unnecessary overhead.
* A local RDP can be used to do the flush.
*/
- if (MACHINE_HAS_RDP && !(pte_val(*ptep) & _PAGE_PROTECT))
+ if (cpu_has_rdp() && !(pte_val(*ptep) & _PAGE_PROTECT))
__ptep_rdp(address, ptep, 0, 0, 1);
}
#define flush_tlb_fix_spurious_fault flush_tlb_fix_spurious_fault
@@ -1353,7 +1362,7 @@ static inline int ptep_set_access_flags(struct vm_area_struct *vma,
{
if (pte_same(*ptep, entry))
return 0;
- if (MACHINE_HAS_RDP && !mm_has_pgste(vma->vm_mm) && pte_allow_rdp(*ptep, entry))
+ if (cpu_has_rdp() && !mm_has_pgste(vma->vm_mm) && pte_allow_rdp(*ptep, entry))
ptep_reset_dat_prot(vma->vm_mm, addr, ptep, entry);
else
ptep_xchg_direct(vma->vm_mm, addr, ptep, entry);
@@ -1401,9 +1410,6 @@ void gmap_pmdp_idte_global(struct mm_struct *mm, unsigned long vmaddr);
#define pgprot_writecombine pgprot_writecombine
pgprot_t pgprot_writecombine(pgprot_t prot);
-#define pgprot_writethrough pgprot_writethrough
-pgprot_t pgprot_writethrough(pgprot_t prot);
-
#define PFN_PTE_SHIFT PAGE_SHIFT
/*
@@ -1448,16 +1454,6 @@ static inline pte_t mk_pte_phys(unsigned long physpage, pgprot_t pgprot)
return pte_mkyoung(__pte);
}
-static inline pte_t mk_pte(struct page *page, pgprot_t pgprot)
-{
- unsigned long physpage = page_to_phys(page);
- pte_t __pte = mk_pte_phys(physpage, pgprot);
-
- if (pte_write(__pte) && PageDirty(page))
- __pte = pte_mkdirty(__pte);
- return __pte;
-}
-
#define pgd_index(address) (((address) >> PGDIR_SHIFT) & (PTRS_PER_PGD-1))
#define p4d_index(address) (((address) >> P4D_SHIFT) & (PTRS_PER_P4D-1))
#define pud_index(address) (((address) >> PUD_SHIFT) & (PTRS_PER_PUD-1))
@@ -1879,7 +1875,6 @@ static inline pmd_t pmdp_collapse_flush(struct vm_area_struct *vma,
#define pmdp_collapse_flush pmdp_collapse_flush
#define pfn_pmd(pfn, pgprot) mk_pmd_phys(((pfn) << PAGE_SHIFT), (pgprot))
-#define mk_pmd(page, pgprot) pfn_pmd(page_to_pfn(page), (pgprot))
static inline int pmd_trans_huge(pmd_t pmd)
{
@@ -1889,7 +1884,7 @@ static inline int pmd_trans_huge(pmd_t pmd)
#define has_transparent_hugepage has_transparent_hugepage
static inline int has_transparent_hugepage(void)
{
- return MACHINE_HAS_EDAT1 ? 1 : 0;
+ return cpu_has_edat1() ? 1 : 0;
}
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
@@ -1990,6 +1985,45 @@ static inline unsigned long __swp_offset_rste(swp_entry_t entry)
#define __rste_to_swp_entry(rste) ((swp_entry_t) { rste })
+/*
+ * s390 has different layout for PTE and region / segment table entries (RSTE).
+ * This is also true for swap entries, and their swap type and offset encoding.
+ * For hugetlbfs PTE_MARKER support, s390 has internal __swp_type_rste() and
+ * __swp_offset_rste() helpers to correctly handle RSTE swap entries.
+ *
+ * But common swap code does not know about this difference, and only uses
+ * __swp_type(), __swp_offset() and __swp_entry() helpers for conversion between
+ * arch-dependent and arch-independent representation of swp_entry_t for all
+ * pagetable levels. On s390, those helpers only work for PTE swap entries.
+ *
+ * Therefore, implement __pmd_to_swp_entry() to build a fake PTE swap entry
+ * and return the arch-dependent representation of that. Correspondingly,
+ * implement __swp_entry_to_pmd() to convert that into a proper PMD swap
+ * entry again. With this, the arch-dependent swp_entry_t representation will
+ * always look like a PTE swap entry in common code.
+ *
+ * This is somewhat similar to fake PTEs in hugetlbfs code for s390, but only
+ * requires conversion of the swap type and offset, and not all the possible
+ * PTE bits.
+ */
+static inline swp_entry_t __pmd_to_swp_entry(pmd_t pmd)
+{
+ swp_entry_t arch_entry;
+ pte_t pte;
+
+ arch_entry = __rste_to_swp_entry(pmd_val(pmd));
+ pte = mk_swap_pte(__swp_type_rste(arch_entry), __swp_offset_rste(arch_entry));
+ return __pte_to_swp_entry(pte);
+}
+
+static inline pmd_t __swp_entry_to_pmd(swp_entry_t arch_entry)
+{
+ pmd_t pmd;
+
+ pmd = __pmd(mk_swap_rste(__swp_type(arch_entry), __swp_offset(arch_entry)));
+ return pmd;
+}
+
extern int vmem_add_mapping(unsigned long start, unsigned long size);
extern void vmem_remove_mapping(unsigned long start, unsigned long size);
extern int __vmem_map_4k_page(unsigned long addr, unsigned long phys, pgprot_t prot, bool alloc);
@@ -2007,4 +2041,18 @@ extern void s390_reset_cmma(struct mm_struct *mm);
#define pmd_pgtable(pmd) \
((pgtable_t)__va(pmd_val(pmd) & -sizeof(pte_t)*PTRS_PER_PTE))
+static inline unsigned long gmap_pgste_get_pgt_addr(unsigned long *pgt)
+{
+ unsigned long *pgstes, res;
+
+ pgstes = pgt + _PAGE_ENTRIES;
+
+ res = (pgstes[0] & PGSTE_ST2_MASK) << 16;
+ res |= pgstes[1] & PGSTE_ST2_MASK;
+ res |= (pgstes[2] & PGSTE_ST2_MASK) >> 16;
+ res |= (pgstes[3] & PGSTE_ST2_MASK) >> 32;
+
+ return res;
+}
+
#endif /* _S390_PAGE_H */
diff --git a/arch/s390/include/asm/physmem_info.h b/arch/s390/include/asm/physmem_info.h
index 51b68a43e195..7ef3bbec98b0 100644
--- a/arch/s390/include/asm/physmem_info.h
+++ b/arch/s390/include/asm/physmem_info.h
@@ -26,7 +26,7 @@ enum reserved_range_type {
RR_AMODE31,
RR_IPLREPORT,
RR_CERT_COMP_LIST,
- RR_MEM_DETECT_EXTENDED,
+ RR_MEM_DETECT_EXT,
RR_VMEM,
RR_MAX
};
@@ -128,7 +128,7 @@ static inline const char *get_rr_type_name(enum reserved_range_type t)
RR_TYPE_NAME(AMODE31);
RR_TYPE_NAME(IPLREPORT);
RR_TYPE_NAME(CERT_COMP_LIST);
- RR_TYPE_NAME(MEM_DETECT_EXTENDED);
+ RR_TYPE_NAME(MEM_DETECT_EXT);
RR_TYPE_NAME(VMEM);
default:
return "UNKNOWN";
diff --git a/arch/s390/include/asm/pkey.h b/arch/s390/include/asm/pkey.h
index 5dca1a46a9f6..b7b59faf16f4 100644
--- a/arch/s390/include/asm/pkey.h
+++ b/arch/s390/include/asm/pkey.h
@@ -20,9 +20,22 @@
* @param key pointer to a buffer containing the key blob
* @param keylen size of the key blob in bytes
* @param protkey pointer to buffer receiving the protected key
+ * @param xflags additional execution flags (see PKEY_XFLAG_* definitions below)
+ * As of now the only supported flag is PKEY_XFLAG_NOMEMALLOC.
* @return 0 on success, negative errno value on failure
*/
int pkey_key2protkey(const u8 *key, u32 keylen,
- u8 *protkey, u32 *protkeylen, u32 *protkeytype);
+ u8 *protkey, u32 *protkeylen, u32 *protkeytype,
+ u32 xflags);
+
+/*
+ * If this flag is given in the xflags parameter, the pkey implementation
+ * is not allowed to allocate memory but instead should fall back to use
+ * preallocated memory or simple fail with -ENOMEM.
+ * This flag is for protected key derive within a cipher or similar
+ * which must not allocate memory which would cause io operations - see
+ * also the CRYPTO_ALG_ALLOCATES_MEMORY flag in crypto.h.
+ */
+#define PKEY_XFLAG_NOMEMALLOC 0x0001
#endif /* _KAPI_PKEY_H */
diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h
index 4f8d5592c298..6a9c08b80eda 100644
--- a/arch/s390/include/asm/processor.h
+++ b/arch/s390/include/asm/processor.h
@@ -26,11 +26,12 @@
#define RESTART_FLAG_CTLREGS _AC(1 << 0, U)
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <linux/cpumask.h>
#include <linux/linkage.h>
#include <linux/irqflags.h>
+#include <linux/bitops.h>
#include <asm/fpu-types.h>
#include <asm/cpu.h>
#include <asm/page.h>
@@ -62,33 +63,27 @@ static __always_inline struct pcpu *this_pcpu(void)
static __always_inline void set_cpu_flag(int flag)
{
- this_pcpu()->flags |= (1UL << flag);
+ set_bit(flag, &this_pcpu()->flags);
}
static __always_inline void clear_cpu_flag(int flag)
{
- this_pcpu()->flags &= ~(1UL << flag);
+ clear_bit(flag, &this_pcpu()->flags);
}
static __always_inline bool test_cpu_flag(int flag)
{
- return this_pcpu()->flags & (1UL << flag);
+ return test_bit(flag, &this_pcpu()->flags);
}
static __always_inline bool test_and_set_cpu_flag(int flag)
{
- if (test_cpu_flag(flag))
- return true;
- set_cpu_flag(flag);
- return false;
+ return test_and_set_bit(flag, &this_pcpu()->flags);
}
static __always_inline bool test_and_clear_cpu_flag(int flag)
{
- if (!test_cpu_flag(flag))
- return false;
- clear_cpu_flag(flag);
- return true;
+ return test_and_clear_bit(flag, &this_pcpu()->flags);
}
/*
@@ -97,7 +92,7 @@ static __always_inline bool test_and_clear_cpu_flag(int flag)
*/
static __always_inline bool test_cpu_flag_of(int flag, int cpu)
{
- return per_cpu(pcpu_devices, cpu).flags & (1UL << flag);
+ return test_bit(flag, &per_cpu(pcpu_devices, cpu).flags);
}
#define arch_needs_cpu() test_cpu_flag(CIF_NOHZ_DELAY)
@@ -416,9 +411,13 @@ static __always_inline bool regs_irqs_disabled(struct pt_regs *regs)
static __always_inline void bpon(void)
{
- asm volatile(ALTERNATIVE("nop", ".insn rrf,0xb2e80000,0,0,13,0", ALT_SPEC(82)));
+ asm_inline volatile(
+ ALTERNATIVE(" nop\n",
+ " .insn rrf,0xb2e80000,0,0,13,0\n",
+ ALT_SPEC(82))
+ );
}
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif /* __ASM_S390_PROCESSOR_H */
diff --git a/arch/s390/include/asm/ptrace.h b/arch/s390/include/asm/ptrace.h
index 788bc4467445..dfa770b15fad 100644
--- a/arch/s390/include/asm/ptrace.h
+++ b/arch/s390/include/asm/ptrace.h
@@ -9,16 +9,15 @@
#include <linux/bits.h>
#include <uapi/asm/ptrace.h>
+#include <asm/thread_info.h>
#include <asm/tpi.h>
#define PIF_SYSCALL 0 /* inside a system call */
-#define PIF_EXECVE_PGSTE_RESTART 1 /* restart execve for PGSTE binaries */
#define PIF_SYSCALL_RET_SET 2 /* return value was set via ptrace */
#define PIF_GUEST_FAULT 3 /* indicates program check in sie64a */
#define PIF_FTRACE_FULL_REGS 4 /* all register contents valid (ftrace) */
#define _PIF_SYSCALL BIT(PIF_SYSCALL)
-#define _PIF_EXECVE_PGSTE_RESTART BIT(PIF_EXECVE_PGSTE_RESTART)
#define _PIF_SYSCALL_RET_SET BIT(PIF_SYSCALL_RET_SET)
#define _PIF_GUEST_FAULT BIT(PIF_GUEST_FAULT)
#define _PIF_FTRACE_FULL_REGS BIT(PIF_FTRACE_FULL_REGS)
@@ -55,7 +54,7 @@
PSW_DEFAULT_KEY | PSW_MASK_BASE | PSW_MASK_MCHECK | \
PSW_MASK_PSTATE | PSW_ASC_PRIMARY)
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
struct psw_bits {
unsigned long : 1;
@@ -128,7 +127,6 @@ struct pt_regs {
struct tpi_info tpi_info;
};
unsigned long flags;
- unsigned long cr1;
unsigned long last_break;
};
@@ -231,8 +229,44 @@ static inline void instruction_pointer_set(struct pt_regs *regs,
int regs_query_register_offset(const char *name);
const char *regs_query_register_name(unsigned int offset);
-unsigned long regs_get_register(struct pt_regs *regs, unsigned int offset);
-unsigned long regs_get_kernel_stack_nth(struct pt_regs *regs, unsigned int n);
+
+static __always_inline unsigned long kernel_stack_pointer(struct pt_regs *regs)
+{
+ return regs->gprs[15];
+}
+
+static __always_inline unsigned long regs_get_register(struct pt_regs *regs, unsigned int offset)
+{
+ if (offset >= NUM_GPRS)
+ return 0;
+ return regs->gprs[offset];
+}
+
+static __always_inline int regs_within_kernel_stack(struct pt_regs *regs, unsigned long addr)
+{
+ unsigned long ksp = kernel_stack_pointer(regs);
+
+ return (addr & ~(THREAD_SIZE - 1)) == (ksp & ~(THREAD_SIZE - 1));
+}
+
+/**
+ * regs_get_kernel_stack_nth() - get Nth entry of the stack
+ * @regs:pt_regs which contains kernel stack pointer.
+ * @n:stack entry number.
+ *
+ * regs_get_kernel_stack_nth() returns @n th entry of the kernel stack which
+ * is specifined by @regs. If the @n th entry is NOT in the kernel stack,
+ * this returns 0.
+ */
+static __always_inline unsigned long regs_get_kernel_stack_nth(struct pt_regs *regs, unsigned int n)
+{
+ unsigned long addr;
+
+ addr = kernel_stack_pointer(regs) + n * sizeof(long);
+ if (!regs_within_kernel_stack(regs, addr))
+ return 0;
+ return READ_ONCE_NOCHECK(*(unsigned long *)addr);
+}
/**
* regs_get_kernel_argument() - get Nth function argument in kernel
@@ -253,15 +287,10 @@ static inline unsigned long regs_get_kernel_argument(struct pt_regs *regs,
return regs_get_kernel_stack_nth(regs, argoffset + n);
}
-static inline unsigned long kernel_stack_pointer(struct pt_regs *regs)
-{
- return regs->gprs[15];
-}
-
static inline void regs_set_return_value(struct pt_regs *regs, unsigned long rc)
{
regs->gprs[2] = rc;
}
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif /* _S390_PTRACE_H */
diff --git a/arch/s390/include/asm/purgatory.h b/arch/s390/include/asm/purgatory.h
index e297bcfc476f..4c7a43bc43a1 100644
--- a/arch/s390/include/asm/purgatory.h
+++ b/arch/s390/include/asm/purgatory.h
@@ -7,11 +7,11 @@
#ifndef _S390_PURGATORY_H_
#define _S390_PURGATORY_H_
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <linux/purgatory.h>
int verify_sha256_digest(void);
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif /* _S390_PURGATORY_H_ */
diff --git a/arch/s390/include/asm/sclp.h b/arch/s390/include/asm/sclp.h
index 4da3b2956285..0f184dbdbe5e 100644
--- a/arch/s390/include/asm/sclp.h
+++ b/arch/s390/include/asm/sclp.h
@@ -21,7 +21,7 @@
#define SCLP_ERRNOTIFY_AQ_INFO_LOG 2
#define SCLP_ERRNOTIFY_AQ_OPTICS_DATA 3
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <linux/uio.h>
#include <asm/chpid.h>
#include <asm/cpu.h>
@@ -168,10 +168,12 @@ int sclp_early_read_storage_info(void);
int sclp_early_get_core_info(struct sclp_core_info *info);
void sclp_early_get_ipl_info(struct sclp_ipl_info *info);
void sclp_early_detect(void);
+void sclp_early_detect_machine_features(void);
void sclp_early_printk(const char *s);
void __sclp_early_printk(const char *s, unsigned int len);
void sclp_emergency_printk(const char *s);
+int sclp_init(void);
int sclp_early_get_memsize(unsigned long *mem);
int sclp_early_get_hsa_size(unsigned long *hsa_size);
int _sclp_get_core_info(struct sclp_core_info *info);
@@ -197,5 +199,5 @@ static inline int sclp_get_core_info(struct sclp_core_info *info, int early)
return _sclp_get_core_info(info);
}
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif /* _ASM_S390_SCLP_H */
diff --git a/arch/s390/include/asm/setup.h b/arch/s390/include/asm/setup.h
index 70b920b32827..7c57ac968bf6 100644
--- a/arch/s390/include/asm/setup.h
+++ b/arch/s390/include/asm/setup.h
@@ -13,28 +13,6 @@
#define PARMAREA 0x10400
#define COMMAND_LINE_SIZE CONFIG_COMMAND_LINE_SIZE
-/*
- * Machine features detected in early.c
- */
-
-#define MACHINE_FLAG_VM BIT(0)
-#define MACHINE_FLAG_KVM BIT(1)
-#define MACHINE_FLAG_LPAR BIT(2)
-#define MACHINE_FLAG_DIAG9C BIT(3)
-#define MACHINE_FLAG_ESOP BIT(4)
-#define MACHINE_FLAG_IDTE BIT(5)
-#define MACHINE_FLAG_EDAT1 BIT(7)
-#define MACHINE_FLAG_EDAT2 BIT(8)
-#define MACHINE_FLAG_TOPOLOGY BIT(10)
-#define MACHINE_FLAG_TE BIT(11)
-#define MACHINE_FLAG_TLB_LC BIT(12)
-#define MACHINE_FLAG_TLB_GUEST BIT(14)
-#define MACHINE_FLAG_NX BIT(15)
-#define MACHINE_FLAG_GS BIT(16)
-#define MACHINE_FLAG_SCC BIT(17)
-#define MACHINE_FLAG_PCI_MIO BIT(18)
-#define MACHINE_FLAG_RDP BIT(19)
-#define MACHINE_FLAG_SEQ_INSN BIT(20)
#define LPP_MAGIC BIT(31)
#define LPP_PID_MASK _AC(0xffffffff, UL)
@@ -46,7 +24,7 @@
#define LEGACY_COMMAND_LINE_SIZE 896
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <asm/lowcore.h>
#include <asm/types.h>
@@ -63,6 +41,8 @@ struct parmarea {
char command_line[COMMAND_LINE_SIZE]; /* 0x10480 */
};
+extern char arch_hw_string[128];
+
extern struct parmarea parmarea;
extern unsigned int zlib_dfltcc_support;
@@ -78,26 +58,6 @@ extern unsigned long max_mappable;
/* The Write Back bit position in the physaddr is given by the SLPC PCI */
extern unsigned long mio_wb_bit_mask;
-#define MACHINE_IS_VM (get_lowcore()->machine_flags & MACHINE_FLAG_VM)
-#define MACHINE_IS_KVM (get_lowcore()->machine_flags & MACHINE_FLAG_KVM)
-#define MACHINE_IS_LPAR (get_lowcore()->machine_flags & MACHINE_FLAG_LPAR)
-
-#define MACHINE_HAS_DIAG9C (get_lowcore()->machine_flags & MACHINE_FLAG_DIAG9C)
-#define MACHINE_HAS_ESOP (get_lowcore()->machine_flags & MACHINE_FLAG_ESOP)
-#define MACHINE_HAS_IDTE (get_lowcore()->machine_flags & MACHINE_FLAG_IDTE)
-#define MACHINE_HAS_EDAT1 (get_lowcore()->machine_flags & MACHINE_FLAG_EDAT1)
-#define MACHINE_HAS_EDAT2 (get_lowcore()->machine_flags & MACHINE_FLAG_EDAT2)
-#define MACHINE_HAS_TOPOLOGY (get_lowcore()->machine_flags & MACHINE_FLAG_TOPOLOGY)
-#define MACHINE_HAS_TE (get_lowcore()->machine_flags & MACHINE_FLAG_TE)
-#define MACHINE_HAS_TLB_LC (get_lowcore()->machine_flags & MACHINE_FLAG_TLB_LC)
-#define MACHINE_HAS_TLB_GUEST (get_lowcore()->machine_flags & MACHINE_FLAG_TLB_GUEST)
-#define MACHINE_HAS_NX (get_lowcore()->machine_flags & MACHINE_FLAG_NX)
-#define MACHINE_HAS_GS (get_lowcore()->machine_flags & MACHINE_FLAG_GS)
-#define MACHINE_HAS_SCC (get_lowcore()->machine_flags & MACHINE_FLAG_SCC)
-#define MACHINE_HAS_PCI_MIO (get_lowcore()->machine_flags & MACHINE_FLAG_PCI_MIO)
-#define MACHINE_HAS_RDP (get_lowcore()->machine_flags & MACHINE_FLAG_RDP)
-#define MACHINE_HAS_SEQ_INSN (get_lowcore()->machine_flags & MACHINE_FLAG_SEQ_INSN)
-
/*
* Console mode. Override with conmode=
*/
@@ -142,5 +102,5 @@ static __always_inline u32 gen_lpswe(unsigned long addr)
BUILD_BUG_ON(addr > 0xfff);
return 0xb2b20000 | addr;
}
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif /* _ASM_S390_SETUP_H */
diff --git a/arch/s390/include/asm/sigp.h b/arch/s390/include/asm/sigp.h
index 472943b77066..97d77868f83c 100644
--- a/arch/s390/include/asm/sigp.h
+++ b/arch/s390/include/asm/sigp.h
@@ -36,7 +36,7 @@
#define SIGP_STATUS_INCORRECT_STATE 0x00000200UL
#define SIGP_STATUS_NOT_RUNNING 0x00000400UL
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <asm/asm.h>
@@ -68,6 +68,6 @@ static inline int __pcpu_sigp(u16 addr, u8 order, unsigned long parm,
return cc;
}
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif /* __S390_ASM_SIGP_H */
diff --git a/arch/s390/include/asm/skey.h b/arch/s390/include/asm/skey.h
new file mode 100644
index 000000000000..84e7cf28b712
--- /dev/null
+++ b/arch/s390/include/asm/skey.h
@@ -0,0 +1,32 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ASM_SKEY_H
+#define __ASM_SKEY_H
+
+#include <asm/rwonce.h>
+
+struct skey_region {
+ unsigned long start;
+ unsigned long end;
+};
+
+#define SKEY_REGION(_start, _end) \
+ stringify_in_c(.section .skey_region,"a";) \
+ stringify_in_c(.balign 8;) \
+ stringify_in_c(.quad (_start);) \
+ stringify_in_c(.quad (_end);) \
+ stringify_in_c(.previous)
+
+extern int skey_regions_initialized;
+extern struct skey_region __skey_region_start[];
+extern struct skey_region __skey_region_end[];
+
+void __skey_regions_initialize(void);
+
+static inline void skey_regions_initialize(void)
+{
+ if (READ_ONCE(skey_regions_initialized))
+ return;
+ __skey_regions_initialize();
+}
+
+#endif /* __ASM_SKEY_H */
diff --git a/arch/s390/include/asm/smp.h b/arch/s390/include/asm/smp.h
index 7feca96c48c6..03f4d01664f8 100644
--- a/arch/s390/include/asm/smp.h
+++ b/arch/s390/include/asm/smp.h
@@ -7,11 +7,29 @@
#ifndef __ASM_SMP_H
#define __ASM_SMP_H
-#include <asm/sigp.h>
-#include <asm/lowcore.h>
#include <asm/processor.h>
+#include <asm/lowcore.h>
+#include <asm/machine.h>
+#include <asm/sigp.h>
+
+static __always_inline unsigned int raw_smp_processor_id(void)
+{
+ unsigned long lc_cpu_nr;
+ unsigned int cpu;
+
+ BUILD_BUG_ON(sizeof_field(struct lowcore, cpu_nr) != sizeof(cpu));
+ lc_cpu_nr = offsetof(struct lowcore, cpu_nr);
+ asm_inline(
+ ALTERNATIVE(" ly %[cpu],%[offzero](%%r0)\n",
+ " ly %[cpu],%[offalt](%%r0)\n",
+ ALT_FEATURE(MFEATURE_LOWCORE))
+ : [cpu] "=d" (cpu)
+ : [offzero] "i" (lc_cpu_nr),
+ [offalt] "i" (lc_cpu_nr + LOWCORE_ALT_ADDRESS),
+ "m" (((struct lowcore *)0)->cpu_nr));
+ return cpu;
+}
-#define raw_smp_processor_id() (get_lowcore()->cpu_nr)
#define arch_scale_cpu_capacity smp_cpu_get_capacity
extern struct mutex smp_cpu_state_mutex;
diff --git a/arch/s390/include/asm/spinlock.h b/arch/s390/include/asm/spinlock.h
index f87dd0a84855..f9935db9fd76 100644
--- a/arch/s390/include/asm/spinlock.h
+++ b/arch/s390/include/asm/spinlock.h
@@ -16,7 +16,23 @@
#include <asm/processor.h>
#include <asm/alternative.h>
-#define SPINLOCK_LOCKVAL (get_lowcore()->spinlock_lockval)
+static __always_inline unsigned int spinlock_lockval(void)
+{
+ unsigned long lc_lockval;
+ unsigned int lockval;
+
+ BUILD_BUG_ON(sizeof_field(struct lowcore, spinlock_lockval) != sizeof(lockval));
+ lc_lockval = offsetof(struct lowcore, spinlock_lockval);
+ asm_inline(
+ ALTERNATIVE(" ly %[lockval],%[offzero](%%r0)\n",
+ " ly %[lockval],%[offalt](%%r0)\n",
+ ALT_FEATURE(MFEATURE_LOWCORE))
+ : [lockval] "=d" (lockval)
+ : [offzero] "i" (lc_lockval),
+ [offalt] "i" (lc_lockval + LOWCORE_ALT_ADDRESS),
+ "m" (((struct lowcore *)0)->spinlock_lockval));
+ return lockval;
+}
extern int spin_retry;
@@ -60,7 +76,7 @@ static inline int arch_spin_trylock_once(arch_spinlock_t *lp)
int old = 0;
barrier();
- return likely(arch_try_cmpxchg(&lp->lock, &old, SPINLOCK_LOCKVAL));
+ return likely(arch_try_cmpxchg(&lp->lock, &old, spinlock_lockval()));
}
static inline void arch_spin_lock(arch_spinlock_t *lp)
diff --git a/arch/s390/include/asm/string.h b/arch/s390/include/asm/string.h
index 2ab868cbae6c..f8f68f4ef255 100644
--- a/arch/s390/include/asm/string.h
+++ b/arch/s390/include/asm/string.h
@@ -26,11 +26,9 @@ void *memmove(void *dest, const void *src, size_t n);
#define __HAVE_ARCH_MEMSCAN /* inline & arch function */
#define __HAVE_ARCH_STRCAT /* inline & arch function */
#define __HAVE_ARCH_STRCMP /* arch function */
-#define __HAVE_ARCH_STRCPY /* inline & arch function */
#define __HAVE_ARCH_STRLCAT /* arch function */
#define __HAVE_ARCH_STRLEN /* inline & arch function */
#define __HAVE_ARCH_STRNCAT /* arch function */
-#define __HAVE_ARCH_STRNCPY /* arch function */
#define __HAVE_ARCH_STRNLEN /* inline & arch function */
#define __HAVE_ARCH_STRSTR /* arch function */
#define __HAVE_ARCH_MEMSET16 /* arch function */
@@ -42,7 +40,6 @@ int memcmp(const void *s1, const void *s2, size_t n);
int strcmp(const char *s1, const char *s2);
size_t strlcat(char *dest, const char *src, size_t n);
char *strncat(char *dest, const char *src, size_t n);
-char *strncpy(char *dest, const char *src, size_t n);
char *strstr(const char *s1, const char *s2);
#endif /* !defined(CONFIG_KASAN) && !defined(CONFIG_KMSAN) */
@@ -155,22 +152,6 @@ static inline char *strcat(char *dst, const char *src)
}
#endif
-#ifdef __HAVE_ARCH_STRCPY
-static inline char *strcpy(char *dst, const char *src)
-{
- char *ret = dst;
-
- asm volatile(
- " lghi 0,0\n"
- "0: mvst %[dst],%[src]\n"
- " jo 0b"
- : [dst] "+&a" (dst), [src] "+&a" (src)
- :
- : "cc", "memory", "0");
- return ret;
-}
-#endif
-
#if defined(__HAVE_ARCH_STRLEN) || (defined(CONFIG_KASAN) && !defined(__SANITIZE_ADDRESS__))
static inline size_t __no_sanitize_prefix_strfunc(strlen)(const char *s)
{
@@ -208,7 +189,6 @@ static inline size_t strnlen(const char * s, size_t n)
void *memchr(const void * s, int c, size_t n);
void *memscan(void *s, int c, size_t n);
char *strcat(char *dst, const char *src);
-char *strcpy(char *dst, const char *src);
size_t strlen(const char *s);
size_t strnlen(const char * s, size_t n);
#endif /* !IN_ARCH_STRING_C */
diff --git a/arch/s390/include/asm/syscall.h b/arch/s390/include/asm/syscall.h
index 27e3d804b311..bd4cb00ccd5e 100644
--- a/arch/s390/include/asm/syscall.h
+++ b/arch/s390/include/asm/syscall.h
@@ -24,6 +24,18 @@ static inline long syscall_get_nr(struct task_struct *task,
(regs->int_code & 0xffff) : -1;
}
+static inline void syscall_set_nr(struct task_struct *task,
+ struct pt_regs *regs,
+ int nr)
+{
+ /*
+ * Unlike syscall_get_nr(), syscall_set_nr() can be called only when
+ * the target task is stopped for tracing on entering syscall, so
+ * there is no need to have the same check syscall_get_nr() has.
+ */
+ regs->int_code = (regs->int_code & ~0xffff) | (nr & 0xffff);
+}
+
static inline void syscall_rollback(struct task_struct *task,
struct pt_regs *regs)
{
@@ -65,19 +77,26 @@ static inline void syscall_get_arguments(struct task_struct *task,
unsigned long *args)
{
unsigned long mask = -1UL;
- unsigned int n = 6;
#ifdef CONFIG_COMPAT
if (test_tsk_thread_flag(task, TIF_31BIT))
mask = 0xffffffff;
#endif
- while (n-- > 0)
- if (n > 0)
- args[n] = regs->gprs[2 + n] & mask;
+ for (int i = 1; i < 6; i++)
+ args[i] = regs->gprs[2 + i] & mask;
args[0] = regs->orig_gpr2 & mask;
}
+static inline void syscall_set_arguments(struct task_struct *task,
+ struct pt_regs *regs,
+ const unsigned long *args)
+{
+ regs->orig_gpr2 = args[0];
+ for (int n = 1; n < 6; n++)
+ regs->gprs[2 + n] = args[n];
+}
+
static inline int syscall_get_arch(struct task_struct *task)
{
#ifdef CONFIG_COMPAT
diff --git a/arch/s390/include/asm/sysinfo.h b/arch/s390/include/asm/sysinfo.h
index edca5a751df4..9088c5267f35 100644
--- a/arch/s390/include/asm/sysinfo.h
+++ b/arch/s390/include/asm/sysinfo.h
@@ -11,8 +11,34 @@
#ifndef __ASM_S390_SYSINFO_H
#define __ASM_S390_SYSINFO_H
-#include <asm/bitsperlong.h>
#include <linux/uuid.h>
+#include <asm/bitsperlong.h>
+#include <asm/asm.h>
+
+/*
+ * stsi - store system information
+ *
+ * Returns the current configuration level if function code 0 was specified.
+ * Otherwise returns 0 on success or a negative value on error.
+ */
+static inline int stsi(void *sysinfo, int fc, int sel1, int sel2)
+{
+ int r0 = (fc << 28) | sel1;
+ int cc;
+
+ asm volatile(
+ " lr %%r0,%[r0]\n"
+ " lr %%r1,%[r1]\n"
+ " stsi %[sysinfo]\n"
+ " lr %[r0],%%r0\n"
+ CC_IPM(cc)
+ : CC_OUT(cc, cc), [r0] "+d" (r0), [sysinfo] "=Q" (*(char *)sysinfo)
+ : [r1] "d" (sel2)
+ : CC_CLOBBER_LIST("0", "1", "memory"));
+ if (cc == 3)
+ return -EOPNOTSUPP;
+ return fc ? 0 : (unsigned int)r0 >> 28;
+}
struct sysinfo_1_1_1 {
unsigned char p:1;
diff --git a/arch/s390/include/asm/thread_info.h b/arch/s390/include/asm/thread_info.h
index c33f7144d1b9..f6ed2c8192c8 100644
--- a/arch/s390/include/asm/thread_info.h
+++ b/arch/s390/include/asm/thread_info.h
@@ -9,9 +9,7 @@
#define _ASM_THREAD_INFO_H
#include <linux/bits.h>
-#ifndef ASM_OFFSETS_C
-#include <asm/asm-offsets.h>
-#endif
+#include <vdso/page.h>
/*
* General size of kernel stacks
@@ -26,9 +24,7 @@
#define STACK_INIT_OFFSET (THREAD_SIZE - STACK_FRAME_OVERHEAD - __PT_SIZE)
-#ifndef __ASSEMBLY__
-#include <asm/lowcore.h>
-#include <asm/page.h>
+#ifndef __ASSEMBLER__
/*
* low level task data that entry.S needs immediate access to
@@ -67,7 +63,7 @@ void arch_setup_new_exec(void);
#define TIF_NEED_RESCHED_LAZY 3 /* lazy rescheduling needed */
#define TIF_UPROBE 4 /* breakpointed or single-stepping */
#define TIF_PATCH_PENDING 5 /* pending live patching update */
-#define TIF_PGSTE 6 /* New mm's will use 4K page tables */
+#define TIF_ASCE_PRIMARY 6 /* primary asce is kernel asce */
#define TIF_NOTIFY_SIGNAL 7 /* signal notifications exist */
#define TIF_GUARDED_STORAGE 8 /* load guarded storage control block */
#define TIF_ISOLATE_BP_GUEST 9 /* Run KVM guests with isolated BP */
@@ -89,7 +85,7 @@ void arch_setup_new_exec(void);
#define _TIF_NEED_RESCHED_LAZY BIT(TIF_NEED_RESCHED_LAZY)
#define _TIF_UPROBE BIT(TIF_UPROBE)
#define _TIF_PATCH_PENDING BIT(TIF_PATCH_PENDING)
-#define _TIF_PGSTE BIT(TIF_PGSTE)
+#define _TIF_ASCE_PRIMARY BIT(TIF_ASCE_PRIMARY)
#define _TIF_NOTIFY_SIGNAL BIT(TIF_NOTIFY_SIGNAL)
#define _TIF_GUARDED_STORAGE BIT(TIF_GUARDED_STORAGE)
#define _TIF_ISOLATE_BP_GUEST BIT(TIF_ISOLATE_BP_GUEST)
diff --git a/arch/s390/include/asm/timex.h b/arch/s390/include/asm/timex.h
index a9460bd6555b..59dfb8780f62 100644
--- a/arch/s390/include/asm/timex.h
+++ b/arch/s390/include/asm/timex.h
@@ -13,6 +13,7 @@
#include <linux/preempt.h>
#include <linux/time64.h>
#include <asm/lowcore.h>
+#include <asm/machine.h>
#include <asm/asm.h>
/* The value of the TOD clock for 1.1.1970. */
@@ -195,13 +196,6 @@ static inline unsigned long get_tod_clock_fast(void)
asm volatile("stckf %0" : "=Q" (clk) : : "cc");
return clk;
}
-
-static inline cycles_t get_cycles(void)
-{
- return (cycles_t) get_tod_clock() >> 2;
-}
-#define get_cycles get_cycles
-
int get_phys_clock(unsigned long *clock);
void init_cpu_timer(void);
@@ -229,6 +223,12 @@ static inline unsigned long get_tod_clock_monotonic(void)
return tod;
}
+static inline cycles_t get_cycles(void)
+{
+ return (cycles_t)get_tod_clock_monotonic() >> 2;
+}
+#define get_cycles get_cycles
+
/**
* tod_to_ns - convert a TOD format value to nanoseconds
* @todval: to be converted TOD format value
@@ -267,7 +267,7 @@ static __always_inline u128 eitod_to_ns(u128 todval)
*/
static inline int tod_after(unsigned long a, unsigned long b)
{
- if (MACHINE_HAS_SCC)
+ if (machine_has_scc())
return (long) a > (long) b;
return a > b;
}
@@ -281,7 +281,7 @@ static inline int tod_after(unsigned long a, unsigned long b)
*/
static inline int tod_after_eq(unsigned long a, unsigned long b)
{
- if (MACHINE_HAS_SCC)
+ if (machine_has_scc())
return (long) a >= (long) b;
return a >= b;
}
diff --git a/arch/s390/include/asm/tlb.h b/arch/s390/include/asm/tlb.h
index 72655fd2d867..1e50f6f1ad9d 100644
--- a/arch/s390/include/asm/tlb.h
+++ b/arch/s390/include/asm/tlb.h
@@ -36,11 +36,12 @@ static inline bool __tlb_remove_folio_pages(struct mmu_gather *tlb,
#include <asm/tlbflush.h>
#include <asm-generic/tlb.h>
+#include <asm/gmap.h>
/*
* Release the page cache reference for a pte removed by
* tlb_ptep_clear_flush. In both flush modes the tlb for a page cache page
- * has already been freed, so just do free_page_and_swap_cache.
+ * has already been freed, so just do free_folio_and_swap_cache.
*
* s390 doesn't delay rmap removal.
*/
@@ -49,7 +50,7 @@ static inline bool __tlb_remove_page_size(struct mmu_gather *tlb,
{
VM_WARN_ON_ONCE(delay_rmap);
- free_page_and_swap_cache(page);
+ free_folio_and_swap_cache(page_folio(page));
return false;
}
@@ -84,7 +85,7 @@ static inline void pte_free_tlb(struct mmu_gather *tlb, pgtable_t pte,
tlb->mm->context.flush_mm = 1;
tlb->freed_tables = 1;
tlb->cleared_pmds = 1;
- if (mm_alloc_pgste(tlb->mm))
+ if (mm_has_pgste(tlb->mm))
gmap_unlink(tlb->mm, (unsigned long *)pte, address);
tlb_remove_ptdesc(tlb, virt_to_ptdesc(pte));
}
diff --git a/arch/s390/include/asm/tlbflush.h b/arch/s390/include/asm/tlbflush.h
index 9dfd46dd03c6..75491baa2197 100644
--- a/arch/s390/include/asm/tlbflush.h
+++ b/arch/s390/include/asm/tlbflush.h
@@ -2,9 +2,11 @@
#ifndef _S390_TLBFLUSH_H
#define _S390_TLBFLUSH_H
+#include <linux/cpufeature.h>
#include <linux/mm.h>
#include <linux/sched.h>
#include <asm/processor.h>
+#include <asm/machine.h>
/*
* Flush all TLB entries on the local CPU.
@@ -22,7 +24,7 @@ static inline void __tlb_flush_idte(unsigned long asce)
unsigned long opt;
opt = IDTE_PTOA;
- if (MACHINE_HAS_TLB_GUEST)
+ if (machine_has_tlb_guest())
opt |= IDTE_GUEST_ASCE;
/* Global TLB flush for the mm */
asm volatile("idte 0,%1,%0" : : "a" (opt), "a" (asce) : "cc");
@@ -52,7 +54,7 @@ static inline void __tlb_flush_mm(struct mm_struct *mm)
cpumask_copy(mm_cpumask(mm), &mm->context.cpu_attach_mask);
barrier();
gmap_asce = READ_ONCE(mm->context.gmap_asce);
- if (MACHINE_HAS_IDTE && gmap_asce != -1UL) {
+ if (cpu_has_idte() && gmap_asce != -1UL) {
if (gmap_asce)
__tlb_flush_idte(gmap_asce);
__tlb_flush_idte(mm->context.asce);
@@ -66,7 +68,7 @@ static inline void __tlb_flush_mm(struct mm_struct *mm)
static inline void __tlb_flush_kernel(void)
{
- if (MACHINE_HAS_IDTE)
+ if (cpu_has_idte())
__tlb_flush_idte(init_mm.context.asce);
else
__tlb_flush_global();
diff --git a/arch/s390/include/asm/topology.h b/arch/s390/include/asm/topology.h
index cef06bffad80..44110847342a 100644
--- a/arch/s390/include/asm/topology.h
+++ b/arch/s390/include/asm/topology.h
@@ -61,6 +61,12 @@ static inline void topology_expect_change(void) { }
#endif /* CONFIG_SCHED_TOPOLOGY */
+static inline bool topology_is_primary_thread(unsigned int cpu)
+{
+ return smp_get_base_cpu(cpu) == cpu;
+}
+#define topology_is_primary_thread topology_is_primary_thread
+
#define POLARIZATION_UNKNOWN (-1)
#define POLARIZATION_HRZ (0)
#define POLARIZATION_VL (1)
diff --git a/arch/s390/include/asm/tpi.h b/arch/s390/include/asm/tpi.h
index f76e5fdff23a..71c8b6f76cdd 100644
--- a/arch/s390/include/asm/tpi.h
+++ b/arch/s390/include/asm/tpi.h
@@ -5,7 +5,7 @@
#include <linux/types.h>
#include <uapi/asm/schid.h>
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
/* I/O-Interruption Code as stored by TEST PENDING INTERRUPTION (TPI). */
struct tpi_info {
@@ -32,6 +32,6 @@ struct tpi_adapter_info {
u32 :27;
} __packed __aligned(4);
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif /* _ASM_S390_TPI_H */
diff --git a/arch/s390/include/asm/types.h b/arch/s390/include/asm/types.h
index 0b5d550a0478..53695b2196f7 100644
--- a/arch/s390/include/asm/types.h
+++ b/arch/s390/include/asm/types.h
@@ -5,7 +5,7 @@
#include <uapi/asm/types.h>
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
union register_pair {
unsigned __int128 pair;
@@ -15,5 +15,5 @@ union register_pair {
};
};
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif /* _ASM_S390_TYPES_H */
diff --git a/arch/s390/include/asm/uaccess.h b/arch/s390/include/asm/uaccess.h
index a81f897a81ce..3e5b8b677057 100644
--- a/arch/s390/include/asm/uaccess.h
+++ b/arch/s390/include/asm/uaccess.h
@@ -13,25 +13,81 @@
/*
* User space memory access functions
*/
+#include <linux/pgtable.h>
#include <asm/asm-extable.h>
#include <asm/processor.h>
#include <asm/extable.h>
#include <asm/facility.h>
#include <asm-generic/access_ok.h>
+#include <asm/asce.h>
#include <linux/instrumented.h>
void debug_user_asce(int exit);
-unsigned long __must_check
-raw_copy_from_user(void *to, const void __user *from, unsigned long n);
-
-unsigned long __must_check
-raw_copy_to_user(void __user *to, const void *from, unsigned long n);
+#ifdef CONFIG_KMSAN
+#define uaccess_kmsan_or_inline noinline __maybe_unused __no_sanitize_memory
+#else
+#define uaccess_kmsan_or_inline __always_inline
+#endif
-#ifndef CONFIG_KASAN
#define INLINE_COPY_FROM_USER
#define INLINE_COPY_TO_USER
-#endif
+
+static uaccess_kmsan_or_inline __must_check unsigned long
+raw_copy_from_user(void *to, const void __user *from, unsigned long size)
+{
+ unsigned long osize;
+ int cc;
+
+ while (1) {
+ osize = size;
+ asm_inline volatile(
+ " lhi %%r0,%[spec]\n"
+ "0: mvcos %[to],%[from],%[size]\n"
+ "1: nopr %%r7\n"
+ CC_IPM(cc)
+ EX_TABLE_UA_MVCOS_FROM(0b, 0b)
+ EX_TABLE_UA_MVCOS_FROM(1b, 0b)
+ : CC_OUT(cc, cc), [size] "+d" (size), [to] "=Q" (*(char *)to)
+ : [spec] "I" (0x81), [from] "Q" (*(const char __user *)from)
+ : CC_CLOBBER_LIST("memory", "0"));
+ if (__builtin_constant_p(osize) && osize <= 4096)
+ return osize - size;
+ if (likely(CC_TRANSFORM(cc) == 0))
+ return osize - size;
+ size -= 4096;
+ to += 4096;
+ from += 4096;
+ }
+}
+
+static uaccess_kmsan_or_inline __must_check unsigned long
+raw_copy_to_user(void __user *to, const void *from, unsigned long size)
+{
+ unsigned long osize;
+ int cc;
+
+ while (1) {
+ osize = size;
+ asm_inline volatile(
+ " llilh %%r0,%[spec]\n"
+ "0: mvcos %[to],%[from],%[size]\n"
+ "1: nopr %%r7\n"
+ CC_IPM(cc)
+ EX_TABLE_UA_MVCOS_TO(0b, 0b)
+ EX_TABLE_UA_MVCOS_TO(1b, 0b)
+ : CC_OUT(cc, cc), [size] "+d" (size), [to] "=Q" (*(char __user *)to)
+ : [spec] "I" (0x81), [from] "Q" (*(const char *)from)
+ : CC_CLOBBER_LIST("memory", "0"));
+ if (__builtin_constant_p(osize) && osize <= 4096)
+ return osize - size;
+ if (likely(CC_TRANSFORM(cc) == 0))
+ return osize - size;
+ size -= 4096;
+ to += 4096;
+ from += 4096;
+ }
+}
unsigned long __must_check
_copy_from_user_key(void *to, const void __user *from, unsigned long n, unsigned long key);
@@ -55,63 +111,65 @@ copy_to_user_key(void __user *to, const void *from, unsigned long n, unsigned lo
return n;
}
-union oac {
- unsigned int val;
- struct {
- struct {
- unsigned short key : 4;
- unsigned short : 4;
- unsigned short as : 2;
- unsigned short : 4;
- unsigned short k : 1;
- unsigned short a : 1;
- } oac1;
- struct {
- unsigned short key : 4;
- unsigned short : 4;
- unsigned short as : 2;
- unsigned short : 4;
- unsigned short k : 1;
- unsigned short a : 1;
- } oac2;
- };
-};
-
int __noreturn __put_user_bad(void);
-#ifdef CONFIG_KMSAN
-#define get_put_user_noinstr_attributes \
- noinline __maybe_unused __no_sanitize_memory
-#else
-#define get_put_user_noinstr_attributes __always_inline
-#endif
+#ifdef CONFIG_CC_HAS_ASM_GOTO_OUTPUT
-#define DEFINE_PUT_USER(type) \
-static get_put_user_noinstr_attributes int \
+#define DEFINE_PUT_USER_NOINSTR(type) \
+static uaccess_kmsan_or_inline int \
+__put_user_##type##_noinstr(unsigned type __user *to, \
+ unsigned type *from, \
+ unsigned long size) \
+{ \
+ asm goto( \
+ " llilh %%r0,%[spec]\n" \
+ "0: mvcos %[to],%[from],%[size]\n" \
+ "1: nopr %%r7\n" \
+ EX_TABLE(0b, %l[Efault]) \
+ EX_TABLE(1b, %l[Efault]) \
+ : [to] "+Q" (*to) \
+ : [size] "d" (size), [from] "Q" (*from), \
+ [spec] "I" (0x81) \
+ : "cc", "0" \
+ : Efault \
+ ); \
+ return 0; \
+Efault: \
+ return -EFAULT; \
+}
+
+#else /* CONFIG_CC_HAS_ASM_GOTO_OUTPUT */
+
+#define DEFINE_PUT_USER_NOINSTR(type) \
+static uaccess_kmsan_or_inline int \
__put_user_##type##_noinstr(unsigned type __user *to, \
unsigned type *from, \
unsigned long size) \
{ \
- union oac __oac_spec = { \
- .oac1.as = PSW_BITS_AS_SECONDARY, \
- .oac1.a = 1, \
- }; \
int rc; \
\
- asm volatile( \
- " lr 0,%[spec]\n" \
- "0: mvcos %[_to],%[_from],%[_size]\n" \
- "1: xr %[rc],%[rc]\n" \
+ asm_inline volatile( \
+ " llilh %%r0,%[spec]\n" \
+ "0: mvcos %[to],%[from],%[size]\n" \
+ "1: lhi %[rc],0\n" \
"2:\n" \
- EX_TABLE_UA_STORE(0b, 2b, %[rc]) \
- EX_TABLE_UA_STORE(1b, 2b, %[rc]) \
- : [rc] "=&d" (rc), [_to] "+Q" (*(to)) \
- : [_size] "d" (size), [_from] "Q" (*(from)), \
- [spec] "d" (__oac_spec.val) \
+ EX_TABLE_UA_FAULT(0b, 2b, %[rc]) \
+ EX_TABLE_UA_FAULT(1b, 2b, %[rc]) \
+ : [rc] "=d" (rc), [to] "+Q" (*to) \
+ : [size] "d" (size), [from] "Q" (*from), \
+ [spec] "I" (0x81) \
: "cc", "0"); \
return rc; \
-} \
- \
+}
+
+#endif /* CONFIG_CC_HAS_ASM_GOTO_OUTPUT */
+
+DEFINE_PUT_USER_NOINSTR(char);
+DEFINE_PUT_USER_NOINSTR(short);
+DEFINE_PUT_USER_NOINSTR(int);
+DEFINE_PUT_USER_NOINSTR(long);
+
+#define DEFINE_PUT_USER(type) \
static __always_inline int \
__put_user_##type(unsigned type __user *to, unsigned type *from, \
unsigned long size) \
@@ -128,69 +186,111 @@ DEFINE_PUT_USER(short);
DEFINE_PUT_USER(int);
DEFINE_PUT_USER(long);
-static __always_inline int __put_user_fn(void *x, void __user *ptr, unsigned long size)
-{
- int rc;
+#define __put_user(x, ptr) \
+({ \
+ __typeof__(*(ptr)) __x = (x); \
+ int __prc; \
+ \
+ __chk_user_ptr(ptr); \
+ switch (sizeof(*(ptr))) { \
+ case 1: \
+ __prc = __put_user_char((unsigned char __user *)(ptr), \
+ (unsigned char *)&__x, \
+ sizeof(*(ptr))); \
+ break; \
+ case 2: \
+ __prc = __put_user_short((unsigned short __user *)(ptr),\
+ (unsigned short *)&__x, \
+ sizeof(*(ptr))); \
+ break; \
+ case 4: \
+ __prc = __put_user_int((unsigned int __user *)(ptr), \
+ (unsigned int *)&__x, \
+ sizeof(*(ptr))); \
+ break; \
+ case 8: \
+ __prc = __put_user_long((unsigned long __user *)(ptr), \
+ (unsigned long *)&__x, \
+ sizeof(*(ptr))); \
+ break; \
+ default: \
+ __prc = __put_user_bad(); \
+ break; \
+ } \
+ __builtin_expect(__prc, 0); \
+})
- switch (size) {
- case 1:
- rc = __put_user_char((unsigned char __user *)ptr,
- (unsigned char *)x,
- size);
- break;
- case 2:
- rc = __put_user_short((unsigned short __user *)ptr,
- (unsigned short *)x,
- size);
- break;
- case 4:
- rc = __put_user_int((unsigned int __user *)ptr,
- (unsigned int *)x,
- size);
- break;
- case 8:
- rc = __put_user_long((unsigned long __user *)ptr,
- (unsigned long *)x,
- size);
- break;
- default:
- __put_user_bad();
- break;
- }
- return rc;
-}
+#define put_user(x, ptr) \
+({ \
+ might_fault(); \
+ __put_user(x, ptr); \
+})
int __noreturn __get_user_bad(void);
-#define DEFINE_GET_USER(type) \
-static get_put_user_noinstr_attributes int \
+#ifdef CONFIG_CC_HAS_ASM_GOTO_OUTPUT
+
+#define DEFINE_GET_USER_NOINSTR(type) \
+static uaccess_kmsan_or_inline int \
__get_user_##type##_noinstr(unsigned type *to, \
- unsigned type __user *from, \
+ const unsigned type __user *from, \
+ unsigned long size) \
+{ \
+ asm goto( \
+ " lhi %%r0,%[spec]\n" \
+ "0: mvcos %[to],%[from],%[size]\n" \
+ "1: nopr %%r7\n" \
+ EX_TABLE(0b, %l[Efault]) \
+ EX_TABLE(1b, %l[Efault]) \
+ : [to] "=Q" (*to) \
+ : [size] "d" (size), [from] "Q" (*from), \
+ [spec] "I" (0x81) \
+ : "cc", "0" \
+ : Efault \
+ ); \
+ return 0; \
+Efault: \
+ *to = 0; \
+ return -EFAULT; \
+}
+
+#else /* CONFIG_CC_HAS_ASM_GOTO_OUTPUT */
+
+#define DEFINE_GET_USER_NOINSTR(type) \
+static uaccess_kmsan_or_inline int \
+__get_user_##type##_noinstr(unsigned type *to, \
+ const unsigned type __user *from, \
unsigned long size) \
{ \
- union oac __oac_spec = { \
- .oac2.as = PSW_BITS_AS_SECONDARY, \
- .oac2.a = 1, \
- }; \
int rc; \
\
- asm volatile( \
- " lr 0,%[spec]\n" \
- "0: mvcos 0(%[_to]),%[_from],%[_size]\n" \
- "1: xr %[rc],%[rc]\n" \
+ asm_inline volatile( \
+ " lhi %%r0,%[spec]\n" \
+ "0: mvcos %[to],%[from],%[size]\n" \
+ "1: lhi %[rc],0\n" \
"2:\n" \
- EX_TABLE_UA_LOAD_MEM(0b, 2b, %[rc], %[_to], %[_ksize]) \
- EX_TABLE_UA_LOAD_MEM(1b, 2b, %[rc], %[_to], %[_ksize]) \
- : [rc] "=&d" (rc), "=Q" (*(to)) \
- : [_size] "d" (size), [_from] "Q" (*(from)), \
- [spec] "d" (__oac_spec.val), [_to] "a" (to), \
- [_ksize] "K" (size) \
+ EX_TABLE_UA_FAULT(0b, 2b, %[rc]) \
+ EX_TABLE_UA_FAULT(1b, 2b, %[rc]) \
+ : [rc] "=d" (rc), [to] "=Q" (*to) \
+ : [size] "d" (size), [from] "Q" (*from), \
+ [spec] "I" (0x81) \
: "cc", "0"); \
+ if (likely(!rc)) \
+ return 0; \
+ *to = 0; \
return rc; \
-} \
- \
+}
+
+#endif /* CONFIG_CC_HAS_ASM_GOTO_OUTPUT */
+
+DEFINE_GET_USER_NOINSTR(char);
+DEFINE_GET_USER_NOINSTR(short);
+DEFINE_GET_USER_NOINSTR(int);
+DEFINE_GET_USER_NOINSTR(long);
+
+#define DEFINE_GET_USER(type) \
static __always_inline int \
-__get_user_##type(unsigned type *to, unsigned type __user *from, \
+__get_user_##type(unsigned type *to, const unsigned type __user *from, \
unsigned long size) \
{ \
int rc; \
@@ -205,107 +305,50 @@ DEFINE_GET_USER(short);
DEFINE_GET_USER(int);
DEFINE_GET_USER(long);
-static __always_inline int __get_user_fn(void *x, const void __user *ptr, unsigned long size)
-{
- int rc;
-
- switch (size) {
- case 1:
- rc = __get_user_char((unsigned char *)x,
- (unsigned char __user *)ptr,
- size);
- break;
- case 2:
- rc = __get_user_short((unsigned short *)x,
- (unsigned short __user *)ptr,
- size);
- break;
- case 4:
- rc = __get_user_int((unsigned int *)x,
- (unsigned int __user *)ptr,
- size);
- break;
- case 8:
- rc = __get_user_long((unsigned long *)x,
- (unsigned long __user *)ptr,
- size);
- break;
- default:
- __get_user_bad();
- break;
- }
- return rc;
-}
-
-/*
- * These are the main single-value transfer routines. They automatically
- * use the right size if we just have the right pointer type.
- */
-#define __put_user(x, ptr) \
-({ \
- __typeof__(*(ptr)) __x = (x); \
- int __pu_err = -EFAULT; \
- \
- __chk_user_ptr(ptr); \
- switch (sizeof(*(ptr))) { \
- case 1: \
- case 2: \
- case 4: \
- case 8: \
- __pu_err = __put_user_fn(&__x, ptr, sizeof(*(ptr))); \
- break; \
- default: \
- __put_user_bad(); \
- break; \
- } \
- __builtin_expect(__pu_err, 0); \
-})
-
-#define put_user(x, ptr) \
-({ \
- might_fault(); \
- __put_user(x, ptr); \
-})
-
#define __get_user(x, ptr) \
({ \
- int __gu_err = -EFAULT; \
+ const __user void *____guptr = (ptr); \
+ int __grc; \
\
__chk_user_ptr(ptr); \
switch (sizeof(*(ptr))) { \
case 1: { \
+ const unsigned char __user *__guptr = ____guptr; \
unsigned char __x; \
\
- __gu_err = __get_user_fn(&__x, ptr, sizeof(*(ptr))); \
+ __grc = __get_user_char(&__x, __guptr, sizeof(*(ptr))); \
(x) = *(__force __typeof__(*(ptr)) *)&__x; \
break; \
}; \
case 2: { \
+ const unsigned short __user *__guptr = ____guptr; \
unsigned short __x; \
\
- __gu_err = __get_user_fn(&__x, ptr, sizeof(*(ptr))); \
+ __grc = __get_user_short(&__x, __guptr, sizeof(*(ptr)));\
(x) = *(__force __typeof__(*(ptr)) *)&__x; \
break; \
}; \
case 4: { \
+ const unsigned int __user *__guptr = ____guptr; \
unsigned int __x; \
\
- __gu_err = __get_user_fn(&__x, ptr, sizeof(*(ptr))); \
+ __grc = __get_user_int(&__x, __guptr, sizeof(*(ptr))); \
(x) = *(__force __typeof__(*(ptr)) *)&__x; \
break; \
}; \
case 8: { \
+ const unsigned long __user *__guptr = ____guptr; \
unsigned long __x; \
\
- __gu_err = __get_user_fn(&__x, ptr, sizeof(*(ptr))); \
+ __grc = __get_user_long(&__x, __guptr, sizeof(*(ptr))); \
(x) = *(__force __typeof__(*(ptr)) *)&__x; \
break; \
}; \
default: \
- __get_user_bad(); \
+ __grc = __get_user_bad(); \
break; \
} \
- __builtin_expect(__gu_err, 0); \
+ __builtin_expect(__grc, 0); \
})
#define get_user(x, ptr) \
@@ -321,12 +364,34 @@ long __must_check strncpy_from_user(char *dst, const char __user *src, long coun
long __must_check strnlen_user(const char __user *src, long count);
-/*
- * Zero Userspace
- */
-unsigned long __must_check __clear_user(void __user *to, unsigned long size);
+static uaccess_kmsan_or_inline __must_check unsigned long
+__clear_user(void __user *to, unsigned long size)
+{
+ unsigned long osize;
+ int cc;
+
+ while (1) {
+ osize = size;
+ asm_inline volatile(
+ " llilh %%r0,%[spec]\n"
+ "0: mvcos %[to],%[from],%[size]\n"
+ "1: nopr %%r7\n"
+ CC_IPM(cc)
+ EX_TABLE_UA_MVCOS_TO(0b, 0b)
+ EX_TABLE_UA_MVCOS_TO(1b, 0b)
+ : CC_OUT(cc, cc), [size] "+d" (size), [to] "=Q" (*(char __user *)to)
+ : [spec] "I" (0x81), [from] "Q" (*(const char *)empty_zero_page)
+ : CC_CLOBBER_LIST("memory", "0"));
+ if (__builtin_constant_p(osize) && osize <= 4096)
+ return osize - size;
+ if (CC_TRANSFORM(cc) == 0)
+ return osize - size;
+ size -= 4096;
+ to += 4096;
+ }
+}
-static inline unsigned long __must_check clear_user(void __user *to, unsigned long n)
+static __always_inline unsigned long __must_check clear_user(void __user *to, unsigned long n)
{
might_fault();
return __clear_user(to, n);
@@ -341,282 +406,97 @@ static inline void *s390_kernel_write(void *dst, const void *src, size_t size)
return __s390_kernel_write(dst, src, size);
}
-int __noreturn __put_kernel_bad(void);
+void __noreturn __mvc_kernel_nofault_bad(void);
-#define __put_kernel_asm(val, to, insn) \
-({ \
- int __rc; \
- \
- asm volatile( \
- "0: " insn " %[_val],%[_to]\n" \
- "1: xr %[rc],%[rc]\n" \
- "2:\n" \
- EX_TABLE_UA_STORE(0b, 2b, %[rc]) \
- EX_TABLE_UA_STORE(1b, 2b, %[rc]) \
- : [rc] "=d" (__rc), [_to] "+Q" (*(to)) \
- : [_val] "d" (val) \
- : "cc"); \
- __rc; \
-})
+#if defined(CONFIG_CC_HAS_ASM_GOTO_OUTPUT) && defined(CONFIG_CC_HAS_ASM_AOR_FORMAT_FLAGS)
-#define __put_kernel_nofault(dst, src, type, err_label) \
+#define __mvc_kernel_nofault(dst, src, type, err_label) \
do { \
- unsigned long __x = (unsigned long)(*((type *)(src))); \
- int __pk_err; \
- \
switch (sizeof(type)) { \
case 1: \
- __pk_err = __put_kernel_asm(__x, (type *)(dst), "stc"); \
- break; \
case 2: \
- __pk_err = __put_kernel_asm(__x, (type *)(dst), "sth"); \
- break; \
case 4: \
- __pk_err = __put_kernel_asm(__x, (type *)(dst), "st"); \
- break; \
case 8: \
- __pk_err = __put_kernel_asm(__x, (type *)(dst), "stg"); \
+ asm goto( \
+ "0: mvc %O[_dst](%[_len],%R[_dst]),%[_src]\n" \
+ "1: nopr %%r7\n" \
+ EX_TABLE(0b, %l[err_label]) \
+ EX_TABLE(1b, %l[err_label]) \
+ : [_dst] "=Q" (*(type *)dst) \
+ : [_src] "Q" (*(type *)(src)), \
+ [_len] "I" (sizeof(type)) \
+ : \
+ : err_label); \
break; \
default: \
- __pk_err = __put_kernel_bad(); \
+ __mvc_kernel_nofault_bad(); \
break; \
} \
- if (unlikely(__pk_err)) \
- goto err_label; \
} while (0)
-int __noreturn __get_kernel_bad(void);
+#else /* CONFIG_CC_HAS_ASM_GOTO_OUTPUT) && CONFIG_CC_HAS_ASM_AOR_FORMAT_FLAGS */
-#define __get_kernel_asm(val, from, insn) \
-({ \
- int __rc; \
- \
- asm volatile( \
- "0: " insn " %[_val],%[_from]\n" \
- "1: xr %[rc],%[rc]\n" \
- "2:\n" \
- EX_TABLE_UA_LOAD_REG(0b, 2b, %[rc], %[_val]) \
- EX_TABLE_UA_LOAD_REG(1b, 2b, %[rc], %[_val]) \
- : [rc] "=d" (__rc), [_val] "=d" (val) \
- : [_from] "Q" (*(from)) \
- : "cc"); \
- __rc; \
-})
-
-#define __get_kernel_nofault(dst, src, type, err_label) \
+#define __mvc_kernel_nofault(dst, src, type, err_label) \
do { \
- int __gk_err; \
+ type *(__dst) = (type *)(dst); \
+ int __rc; \
\
switch (sizeof(type)) { \
- case 1: { \
- unsigned char __x; \
- \
- __gk_err = __get_kernel_asm(__x, (type *)(src), "ic"); \
- *((type *)(dst)) = (type)__x; \
- break; \
- }; \
- case 2: { \
- unsigned short __x; \
- \
- __gk_err = __get_kernel_asm(__x, (type *)(src), "lh"); \
- *((type *)(dst)) = (type)__x; \
- break; \
- }; \
- case 4: { \
- unsigned int __x; \
- \
- __gk_err = __get_kernel_asm(__x, (type *)(src), "l"); \
- *((type *)(dst)) = (type)__x; \
- break; \
- }; \
- case 8: { \
- unsigned long __x; \
- \
- __gk_err = __get_kernel_asm(__x, (type *)(src), "lg"); \
- *((type *)(dst)) = (type)__x; \
+ case 1: \
+ case 2: \
+ case 4: \
+ case 8: \
+ asm_inline volatile( \
+ "0: mvc 0(%[_len],%[_dst]),%[_src]\n" \
+ "1: lhi %[_rc],0\n" \
+ "2:\n" \
+ EX_TABLE_UA_FAULT(0b, 2b, %[_rc]) \
+ EX_TABLE_UA_FAULT(1b, 2b, %[_rc]) \
+ : [_rc] "=d" (__rc), \
+ "=m" (*__dst) \
+ : [_src] "Q" (*(type *)(src)), \
+ [_dst] "a" (__dst), \
+ [_len] "I" (sizeof(type))); \
+ if (__rc) \
+ goto err_label; \
break; \
- }; \
default: \
- __gk_err = __get_kernel_bad(); \
+ __mvc_kernel_nofault_bad(); \
break; \
} \
- if (unlikely(__gk_err)) \
- goto err_label; \
} while (0)
-void __cmpxchg_user_key_called_with_bad_pointer(void);
+#endif /* CONFIG_CC_HAS_ASM_GOTO_OUTPUT && CONFIG_CC_HAS_ASM_AOR_FORMAT_FLAGS */
-#define CMPXCHG_USER_KEY_MAX_LOOPS 128
+#define __get_kernel_nofault __mvc_kernel_nofault
+#define __put_kernel_nofault __mvc_kernel_nofault
-static __always_inline int __cmpxchg_user_key(unsigned long address, void *uval,
- __uint128_t old, __uint128_t new,
- unsigned long key, int size)
-{
- int rc = 0;
+void __cmpxchg_user_key_called_with_bad_pointer(void);
+int __cmpxchg_user_key1(unsigned long address, unsigned char *uval,
+ unsigned char old, unsigned char new, unsigned long key);
+int __cmpxchg_user_key2(unsigned long address, unsigned short *uval,
+ unsigned short old, unsigned short new, unsigned long key);
+int __cmpxchg_user_key4(unsigned long address, unsigned int *uval,
+ unsigned int old, unsigned int new, unsigned long key);
+int __cmpxchg_user_key8(unsigned long address, unsigned long *uval,
+ unsigned long old, unsigned long new, unsigned long key);
+int __cmpxchg_user_key16(unsigned long address, __uint128_t *uval,
+ __uint128_t old, __uint128_t new, unsigned long key);
+
+static __always_inline int _cmpxchg_user_key(unsigned long address, void *uval,
+ __uint128_t old, __uint128_t new,
+ unsigned long key, int size)
+{
switch (size) {
- case 1: {
- unsigned int prev, shift, mask, _old, _new;
- unsigned long count;
-
- shift = (3 ^ (address & 3)) << 3;
- address ^= address & 3;
- _old = ((unsigned int)old & 0xff) << shift;
- _new = ((unsigned int)new & 0xff) << shift;
- mask = ~(0xff << shift);
- asm volatile(
- " spka 0(%[key])\n"
- " sacf 256\n"
- " llill %[count],%[max_loops]\n"
- "0: l %[prev],%[address]\n"
- "1: nr %[prev],%[mask]\n"
- " xilf %[mask],0xffffffff\n"
- " or %[new],%[prev]\n"
- " or %[prev],%[tmp]\n"
- "2: lr %[tmp],%[prev]\n"
- "3: cs %[prev],%[new],%[address]\n"
- "4: jnl 5f\n"
- " xr %[tmp],%[prev]\n"
- " xr %[new],%[tmp]\n"
- " nr %[tmp],%[mask]\n"
- " jnz 5f\n"
- " brct %[count],2b\n"
- "5: sacf 768\n"
- " spka %[default_key]\n"
- EX_TABLE_UA_LOAD_REG(0b, 5b, %[rc], %[prev])
- EX_TABLE_UA_LOAD_REG(1b, 5b, %[rc], %[prev])
- EX_TABLE_UA_LOAD_REG(3b, 5b, %[rc], %[prev])
- EX_TABLE_UA_LOAD_REG(4b, 5b, %[rc], %[prev])
- : [rc] "+&d" (rc),
- [prev] "=&d" (prev),
- [address] "+Q" (*(int *)address),
- [tmp] "+&d" (_old),
- [new] "+&d" (_new),
- [mask] "+&d" (mask),
- [count] "=a" (count)
- : [key] "%[count]" (key << 4),
- [default_key] "J" (PAGE_DEFAULT_KEY),
- [max_loops] "J" (CMPXCHG_USER_KEY_MAX_LOOPS)
- : "memory", "cc");
- *(unsigned char *)uval = prev >> shift;
- if (!count)
- rc = -EAGAIN;
- return rc;
- }
- case 2: {
- unsigned int prev, shift, mask, _old, _new;
- unsigned long count;
-
- shift = (2 ^ (address & 2)) << 3;
- address ^= address & 2;
- _old = ((unsigned int)old & 0xffff) << shift;
- _new = ((unsigned int)new & 0xffff) << shift;
- mask = ~(0xffff << shift);
- asm volatile(
- " spka 0(%[key])\n"
- " sacf 256\n"
- " llill %[count],%[max_loops]\n"
- "0: l %[prev],%[address]\n"
- "1: nr %[prev],%[mask]\n"
- " xilf %[mask],0xffffffff\n"
- " or %[new],%[prev]\n"
- " or %[prev],%[tmp]\n"
- "2: lr %[tmp],%[prev]\n"
- "3: cs %[prev],%[new],%[address]\n"
- "4: jnl 5f\n"
- " xr %[tmp],%[prev]\n"
- " xr %[new],%[tmp]\n"
- " nr %[tmp],%[mask]\n"
- " jnz 5f\n"
- " brct %[count],2b\n"
- "5: sacf 768\n"
- " spka %[default_key]\n"
- EX_TABLE_UA_LOAD_REG(0b, 5b, %[rc], %[prev])
- EX_TABLE_UA_LOAD_REG(1b, 5b, %[rc], %[prev])
- EX_TABLE_UA_LOAD_REG(3b, 5b, %[rc], %[prev])
- EX_TABLE_UA_LOAD_REG(4b, 5b, %[rc], %[prev])
- : [rc] "+&d" (rc),
- [prev] "=&d" (prev),
- [address] "+Q" (*(int *)address),
- [tmp] "+&d" (_old),
- [new] "+&d" (_new),
- [mask] "+&d" (mask),
- [count] "=a" (count)
- : [key] "%[count]" (key << 4),
- [default_key] "J" (PAGE_DEFAULT_KEY),
- [max_loops] "J" (CMPXCHG_USER_KEY_MAX_LOOPS)
- : "memory", "cc");
- *(unsigned short *)uval = prev >> shift;
- if (!count)
- rc = -EAGAIN;
- return rc;
- }
- case 4: {
- unsigned int prev = old;
-
- asm volatile(
- " spka 0(%[key])\n"
- " sacf 256\n"
- "0: cs %[prev],%[new],%[address]\n"
- "1: sacf 768\n"
- " spka %[default_key]\n"
- EX_TABLE_UA_LOAD_REG(0b, 1b, %[rc], %[prev])
- EX_TABLE_UA_LOAD_REG(1b, 1b, %[rc], %[prev])
- : [rc] "+&d" (rc),
- [prev] "+&d" (prev),
- [address] "+Q" (*(int *)address)
- : [new] "d" ((unsigned int)new),
- [key] "a" (key << 4),
- [default_key] "J" (PAGE_DEFAULT_KEY)
- : "memory", "cc");
- *(unsigned int *)uval = prev;
- return rc;
- }
- case 8: {
- unsigned long prev = old;
-
- asm volatile(
- " spka 0(%[key])\n"
- " sacf 256\n"
- "0: csg %[prev],%[new],%[address]\n"
- "1: sacf 768\n"
- " spka %[default_key]\n"
- EX_TABLE_UA_LOAD_REG(0b, 1b, %[rc], %[prev])
- EX_TABLE_UA_LOAD_REG(1b, 1b, %[rc], %[prev])
- : [rc] "+&d" (rc),
- [prev] "+&d" (prev),
- [address] "+QS" (*(long *)address)
- : [new] "d" ((unsigned long)new),
- [key] "a" (key << 4),
- [default_key] "J" (PAGE_DEFAULT_KEY)
- : "memory", "cc");
- *(unsigned long *)uval = prev;
- return rc;
- }
- case 16: {
- __uint128_t prev = old;
-
- asm volatile(
- " spka 0(%[key])\n"
- " sacf 256\n"
- "0: cdsg %[prev],%[new],%[address]\n"
- "1: sacf 768\n"
- " spka %[default_key]\n"
- EX_TABLE_UA_LOAD_REGPAIR(0b, 1b, %[rc], %[prev])
- EX_TABLE_UA_LOAD_REGPAIR(1b, 1b, %[rc], %[prev])
- : [rc] "+&d" (rc),
- [prev] "+&d" (prev),
- [address] "+QS" (*(__int128_t *)address)
- : [new] "d" (new),
- [key] "a" (key << 4),
- [default_key] "J" (PAGE_DEFAULT_KEY)
- : "memory", "cc");
- *(__uint128_t *)uval = prev;
- return rc;
- }
+ case 1: return __cmpxchg_user_key1(address, uval, old, new, key);
+ case 2: return __cmpxchg_user_key2(address, uval, old, new, key);
+ case 4: return __cmpxchg_user_key4(address, uval, old, new, key);
+ case 8: return __cmpxchg_user_key8(address, uval, old, new, key);
+ case 16: return __cmpxchg_user_key16(address, uval, old, new, key);
+ default: __cmpxchg_user_key_called_with_bad_pointer();
}
- __cmpxchg_user_key_called_with_bad_pointer();
- return rc;
+ return 0;
}
/**
@@ -648,8 +528,8 @@ static __always_inline int __cmpxchg_user_key(unsigned long address, void *uval,
BUILD_BUG_ON(sizeof(*(__ptr)) != sizeof(*(__uval))); \
might_fault(); \
__chk_user_ptr(__ptr); \
- __cmpxchg_user_key((unsigned long)(__ptr), (void *)(__uval), \
- (old), (new), (key), sizeof(*(__ptr))); \
+ _cmpxchg_user_key((unsigned long)(__ptr), (void *)(__uval), \
+ (old), (new), (key), sizeof(*(__ptr))); \
})
#endif /* __S390_UACCESS_H */
diff --git a/arch/s390/include/asm/uv.h b/arch/s390/include/asm/uv.h
index dc332609f2c3..8018549a1ad2 100644
--- a/arch/s390/include/asm/uv.h
+++ b/arch/s390/include/asm/uv.h
@@ -16,7 +16,6 @@
#include <linux/bug.h>
#include <linux/sched.h>
#include <asm/page.h>
-#include <asm/gmap.h>
#include <asm/asm.h>
#define UVC_CC_OK 0
@@ -616,8 +615,9 @@ static inline int uv_remove_shared(unsigned long addr)
return share(addr, UVC_CMD_REMOVE_SHARED_ACCESS);
}
-int uv_get_secret_metadata(const u8 secret_id[UV_SECRET_ID_LEN],
- struct uv_secret_list_item_hdr *secret);
+int uv_find_secret(const u8 secret_id[UV_SECRET_ID_LEN],
+ struct uv_secret_list *list,
+ struct uv_secret_list_item_hdr *secret);
int uv_retrieve_secret(u16 secret_idx, u8 *buf, size_t buf_size);
extern int prot_virt_host;
@@ -628,12 +628,12 @@ static inline int is_prot_virt_host(void)
}
int uv_pin_shared(unsigned long paddr);
-int gmap_make_secure(struct gmap *gmap, unsigned long gaddr, void *uvcb);
-int gmap_destroy_page(struct gmap *gmap, unsigned long gaddr);
int uv_destroy_folio(struct folio *folio);
int uv_destroy_pte(pte_t pte);
int uv_convert_from_secure_pte(pte_t pte);
-int gmap_convert_to_secure(struct gmap *gmap, unsigned long gaddr);
+int make_hva_secure(struct mm_struct *mm, unsigned long hva, struct uv_cb_header *uvcb);
+int uv_convert_from_secure(unsigned long paddr);
+int uv_convert_from_secure_folio(struct folio *folio);
void setup_uv(void);
diff --git a/arch/s390/include/asm/vdso.h b/arch/s390/include/asm/vdso.h
index 92c73e4d97a9..8e2fffa0ca68 100644
--- a/arch/s390/include/asm/vdso.h
+++ b/arch/s390/include/asm/vdso.h
@@ -4,15 +4,13 @@
#include <vdso/datapage.h>
-#ifndef __ASSEMBLY__
-
-extern struct vdso_data *vdso_data;
+#ifndef __ASSEMBLER__
int vdso_getcpu_init(void);
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
-#define __VVAR_PAGES 2
+#define __VDSO_PAGES 4
#define VDSO_VERSION_STRING LINUX_2.6.29
diff --git a/arch/s390/include/asm/vdso/getrandom.h b/arch/s390/include/asm/vdso/getrandom.h
index 36355af7160b..6741a27199f8 100644
--- a/arch/s390/include/asm/vdso/getrandom.h
+++ b/arch/s390/include/asm/vdso/getrandom.h
@@ -3,7 +3,7 @@
#ifndef __ASM_VDSO_GETRANDOM_H
#define __ASM_VDSO_GETRANDOM_H
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <vdso/datapage.h>
#include <asm/vdso/vsyscall.h>
@@ -23,18 +23,6 @@ static __always_inline ssize_t getrandom_syscall(void *buffer, size_t len, unsig
return syscall3(__NR_getrandom, (long)buffer, (long)len, (long)flags);
}
-static __always_inline const struct vdso_rng_data *__arch_get_vdso_rng_data(void)
-{
- /*
- * The RNG data is in the real VVAR data page, but if a task belongs to a time namespace
- * then VVAR_DATA_PAGE_OFFSET points to the namespace-specific VVAR page and VVAR_TIMENS_
- * PAGE_OFFSET points to the real VVAR page.
- */
- if (IS_ENABLED(CONFIG_TIME_NS) && _vdso_data->clock_mode == VDSO_CLOCKMODE_TIMENS)
- return (void *)&_vdso_rng_data + VVAR_TIMENS_PAGE_OFFSET * PAGE_SIZE;
- return &_vdso_rng_data;
-}
-
-#endif /* !__ASSEMBLY__ */
+#endif /* !__ASSEMBLER__ */
#endif /* __ASM_VDSO_GETRANDOM_H */
diff --git a/arch/s390/include/asm/vdso/gettimeofday.h b/arch/s390/include/asm/vdso/gettimeofday.h
index 7937765ccfa5..c31ac5f61c83 100644
--- a/arch/s390/include/asm/vdso/gettimeofday.h
+++ b/arch/s390/include/asm/vdso/gettimeofday.h
@@ -14,20 +14,9 @@
#include <linux/compiler.h>
-static __always_inline const struct vdso_data *__arch_get_vdso_data(void)
+static inline u64 __arch_get_hw_counter(s32 clock_mode, const struct vdso_time_data *vd)
{
- return _vdso_data;
-}
-
-static inline u64 __arch_get_hw_counter(s32 clock_mode, const struct vdso_data *vd)
-{
- u64 adj, now;
-
- now = get_tod_clock();
- adj = vd->arch_data.tod_steering_end - now;
- if (unlikely((s64) adj > 0))
- now += (vd->arch_data.tod_steering_delta < 0) ? (adj >> 15) : -(adj >> 15);
- return now;
+ return get_tod_clock() - vd->arch_data.tod_delta;
}
static __always_inline
@@ -49,12 +38,4 @@ long clock_getres_fallback(clockid_t clkid, struct __kernel_timespec *ts)
return syscall2(__NR_clock_getres, (long)clkid, (long)ts);
}
-#ifdef CONFIG_TIME_NS
-static __always_inline
-const struct vdso_data *__arch_get_timens_vdso_data(const struct vdso_data *vd)
-{
- return _timens_data;
-}
-#endif
-
#endif
diff --git a/arch/s390/include/asm/vdso/time_data.h b/arch/s390/include/asm/vdso/time_data.h
index 8a08752422e6..25c4e0d9f596 100644
--- a/arch/s390/include/asm/vdso/time_data.h
+++ b/arch/s390/include/asm/vdso/time_data.h
@@ -5,8 +5,7 @@
#include <linux/types.h>
struct arch_vdso_time_data {
- __s64 tod_steering_delta;
- __u64 tod_steering_end;
+ __s64 tod_delta;
};
#endif /* __S390_ASM_VDSO_TIME_DATA_H */
diff --git a/arch/s390/include/asm/vdso/vsyscall.h b/arch/s390/include/asm/vdso/vsyscall.h
index 3eb576ecd3bd..b00acec8ddbc 100644
--- a/arch/s390/include/asm/vdso/vsyscall.h
+++ b/arch/s390/include/asm/vdso/vsyscall.h
@@ -2,35 +2,15 @@
#ifndef __ASM_VDSO_VSYSCALL_H
#define __ASM_VDSO_VSYSCALL_H
-#define __VDSO_RND_DATA_OFFSET 768
-
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <linux/hrtimer.h>
#include <vdso/datapage.h>
#include <asm/vdso.h>
-enum vvar_pages {
- VVAR_DATA_PAGE_OFFSET,
- VVAR_TIMENS_PAGE_OFFSET,
- VVAR_NR_PAGES
-};
-
-static __always_inline struct vdso_data *__s390_get_k_vdso_data(void)
-{
- return vdso_data;
-}
-#define __arch_get_k_vdso_data __s390_get_k_vdso_data
-
-static __always_inline struct vdso_rng_data *__s390_get_k_vdso_rnd_data(void)
-{
- return (void *)vdso_data + __VDSO_RND_DATA_OFFSET;
-}
-#define __arch_get_k_vdso_rng_data __s390_get_k_vdso_rnd_data
-
/* The asm-generic header needs to be included after the definitions above */
#include <asm-generic/vdso/vsyscall.h>
-#endif /* !__ASSEMBLY__ */
+#endif /* !__ASSEMBLER__ */
#endif /* __ASM_VDSO_VSYSCALL_H */
diff --git a/arch/s390/include/asm/word-at-a-time.h b/arch/s390/include/asm/word-at-a-time.h
index 203acd6e431b..eaa19dee7699 100644
--- a/arch/s390/include/asm/word-at-a-time.h
+++ b/arch/s390/include/asm/word-at-a-time.h
@@ -52,7 +52,7 @@ static inline unsigned long load_unaligned_zeropad(const void *addr)
{
unsigned long data;
- asm volatile(
+ asm_inline volatile(
"0: lg %[data],0(%[addr])\n"
"1: nopr %%r7\n"
EX_TABLE_ZEROPAD(0b, 1b, %[data], %[addr])
diff --git a/arch/s390/include/uapi/asm/ptrace.h b/arch/s390/include/uapi/asm/ptrace.h
index bb0826024bb9..ea202072f1ad 100644
--- a/arch/s390/include/uapi/asm/ptrace.h
+++ b/arch/s390/include/uapi/asm/ptrace.h
@@ -242,7 +242,8 @@
#define PTRACE_OLDSETOPTIONS 21
#define PTRACE_SYSEMU 31
#define PTRACE_SYSEMU_SINGLESTEP 32
-#ifndef __ASSEMBLY__
+
+#ifndef __ASSEMBLER__
#include <linux/stddef.h>
#include <linux/types.h>
@@ -450,6 +451,6 @@ struct user_regs_struct {
unsigned long ieee_instruction_pointer; /* obsolete, always 0 */
};
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif /* _UAPI_S390_PTRACE_H */
diff --git a/arch/s390/include/uapi/asm/schid.h b/arch/s390/include/uapi/asm/schid.h
index a3e1cf168553..d804d1a5b1b3 100644
--- a/arch/s390/include/uapi/asm/schid.h
+++ b/arch/s390/include/uapi/asm/schid.h
@@ -4,7 +4,7 @@
#include <linux/types.h>
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
struct subchannel_id {
__u32 cssid : 8;
@@ -15,6 +15,6 @@ struct subchannel_id {
__u32 sch_no : 16;
} __attribute__ ((packed, aligned(4)));
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif /* _UAPIASM_SCHID_H */
diff --git a/arch/s390/include/uapi/asm/types.h b/arch/s390/include/uapi/asm/types.h
index 84457dbb26b4..4ab468c5032e 100644
--- a/arch/s390/include/uapi/asm/types.h
+++ b/arch/s390/include/uapi/asm/types.h
@@ -10,7 +10,7 @@
#include <asm-generic/int-ll64.h>
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
typedef unsigned long addr_t;
typedef __signed__ long saddr_t;
@@ -25,6 +25,6 @@ typedef struct {
};
} __attribute__((packed, aligned(4))) __vector128;
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif /* _UAPI_S390_TYPES_H */
diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile
index db5f3a3faefb..eb06ff888314 100644
--- a/arch/s390/kernel/Makefile
+++ b/arch/s390/kernel/Makefile
@@ -41,12 +41,12 @@ obj-y += processor.o syscall.o ptrace.o signal.o cpcmd.o ebcdic.o nmi.o
obj-y += debug.o irq.o ipl.o dis.o vdso.o cpufeature.o
obj-y += sysinfo.o lgr.o os_info.o ctlreg.o
obj-y += runtime_instr.o cache.o fpu.o dumpstack.o guarded_storage.o sthyi.o
-obj-y += entry.o reipl.o kdebugfs.o alternative.o
+obj-y += entry.o reipl.o kdebugfs.o alternative.o skey.o
obj-y += nospec-branch.o ipl_vmparm.o machine_kexec_reloc.o unwind_bc.o
obj-y += smp.o text_amode31.o stacktrace.o abs_lowcore.o facility.o uv.o wti.o
obj-y += diag/
-extra-y += vmlinux.lds
+always-$(KBUILD_BUILTIN) += vmlinux.lds
obj-$(CONFIG_SYSFS) += nospec-sysfs.o
CFLAGS_REMOVE_nospec-branch.o += $(CC_FLAGS_EXPOLINE)
diff --git a/arch/s390/kernel/abs_lowcore.c b/arch/s390/kernel/abs_lowcore.c
index 88f0b91d7a73..6252b7d115dd 100644
--- a/arch/s390/kernel/abs_lowcore.c
+++ b/arch/s390/kernel/abs_lowcore.c
@@ -5,7 +5,6 @@
#include <asm/sections.h>
unsigned long __bootdata_preserved(__abs_lowcore);
-int __bootdata_preserved(relocate_lowcore);
int abs_lowcore_map(int cpu, struct lowcore *lc, bool alloc)
{
diff --git a/arch/s390/kernel/alternative.c b/arch/s390/kernel/alternative.c
index 8d5d0de35de0..90c0e6408992 100644
--- a/arch/s390/kernel/alternative.c
+++ b/arch/s390/kernel/alternative.c
@@ -1,41 +1,90 @@
// SPDX-License-Identifier: GPL-2.0
+#ifndef pr_fmt
+#define pr_fmt(fmt) "alt: " fmt
+#endif
+
#include <linux/uaccess.h>
+#include <linux/printk.h>
#include <asm/nospec-branch.h>
#include <asm/abs_lowcore.h>
#include <asm/alternative.h>
#include <asm/facility.h>
+#include <asm/sections.h>
+#include <asm/machine.h>
+
+#ifndef a_debug
+#define a_debug pr_debug
+#endif
+
+#ifndef __kernel_va
+#define __kernel_va(x) (void *)(x)
+#endif
+
+unsigned long __bootdata_preserved(machine_features[1]);
+
+struct alt_debug {
+ unsigned long facilities[MAX_FACILITY_BIT / BITS_PER_LONG];
+ unsigned long mfeatures[MAX_MFEATURE_BIT / BITS_PER_LONG];
+ int spec;
+};
+
+static struct alt_debug __bootdata_preserved(alt_debug);
+
+static void alternative_dump(u8 *old, u8 *new, unsigned int len, unsigned int type, unsigned int data)
+{
+ char oinsn[33], ninsn[33];
+ unsigned long kptr;
+ unsigned int pos;
+
+ for (pos = 0; pos < len && 2 * pos < sizeof(oinsn) - 3; pos++)
+ hex_byte_pack(&oinsn[2 * pos], old[pos]);
+ oinsn[2 * pos] = 0;
+ for (pos = 0; pos < len && 2 * pos < sizeof(ninsn) - 3; pos++)
+ hex_byte_pack(&ninsn[2 * pos], new[pos]);
+ ninsn[2 * pos] = 0;
+ kptr = (unsigned long)__kernel_va(old);
+ a_debug("[%d/%3d] %016lx: %s -> %s\n", type, data, kptr, oinsn, ninsn);
+}
void __apply_alternatives(struct alt_instr *start, struct alt_instr *end, unsigned int ctx)
{
- u8 *instr, *replacement;
+ struct alt_debug *d;
struct alt_instr *a;
- bool replace;
+ bool debug, replace;
+ u8 *old, *new;
/*
* The scan order should be from start to end. A later scanned
* alternative code can overwrite previously scanned alternative code.
*/
+ d = &alt_debug;
for (a = start; a < end; a++) {
if (!(a->ctx & ctx))
continue;
switch (a->type) {
case ALT_TYPE_FACILITY:
replace = test_facility(a->data);
+ debug = __test_facility(a->data, d->facilities);
+ break;
+ case ALT_TYPE_FEATURE:
+ replace = test_machine_feature(a->data);
+ debug = __test_machine_feature(a->data, d->mfeatures);
break;
case ALT_TYPE_SPEC:
replace = nobp_enabled();
- break;
- case ALT_TYPE_LOWCORE:
- replace = have_relocated_lowcore();
+ debug = d->spec;
break;
default:
replace = false;
+ debug = false;
}
if (!replace)
continue;
- instr = (u8 *)&a->instr_offset + a->instr_offset;
- replacement = (u8 *)&a->repl_offset + a->repl_offset;
- s390_kernel_write(instr, replacement, a->instrlen);
+ old = (u8 *)&a->instr_offset + a->instr_offset;
+ new = (u8 *)&a->repl_offset + a->repl_offset;
+ if (debug)
+ alternative_dump(old, new, a->instrlen, a->type, a->data);
+ s390_kernel_write(old, new, a->instrlen);
}
}
diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c
index 36709112ae7a..95ecad9c7d7d 100644
--- a/arch/s390/kernel/asm-offsets.c
+++ b/arch/s390/kernel/asm-offsets.c
@@ -5,15 +5,14 @@
* and format the required data.
*/
-#define ASM_OFFSETS_C
-
#include <linux/kbuild.h>
-#include <linux/kvm_host.h>
#include <linux/sched.h>
#include <linux/purgatory.h>
#include <linux/pgtable.h>
-#include <linux/ftrace.h>
+#include <linux/ftrace_regs.h>
+#include <asm/kvm_host_types.h>
#include <asm/stacktrace.h>
+#include <asm/ptrace.h>
int main(void)
{
@@ -49,8 +48,8 @@ int main(void)
OFFSET(__PT_R14, pt_regs, gprs[14]);
OFFSET(__PT_R15, pt_regs, gprs[15]);
OFFSET(__PT_ORIG_GPR2, pt_regs, orig_gpr2);
+ OFFSET(__PT_INT_CODE, pt_regs, int_code);
OFFSET(__PT_FLAGS, pt_regs, flags);
- OFFSET(__PT_CR1, pt_regs, cr1);
OFFSET(__PT_LAST_BREAK, pt_regs, last_break);
DEFINE(__PT_SIZE, sizeof(struct pt_regs));
BLANK();
@@ -76,7 +75,8 @@ int main(void)
OFFSET(__LC_EXT_CPU_ADDR, lowcore, ext_cpu_addr);
OFFSET(__LC_EXT_INT_CODE, lowcore, ext_int_code);
OFFSET(__LC_PGM_ILC, lowcore, pgm_ilc);
- OFFSET(__LC_PGM_INT_CODE, lowcore, pgm_code);
+ OFFSET(__LC_PGM_CODE, lowcore, pgm_code);
+ OFFSET(__LC_PGM_INT_CODE, lowcore, pgm_int_code);
OFFSET(__LC_DATA_EXC_CODE, lowcore, data_exc_code);
OFFSET(__LC_MON_CLASS_NR, lowcore, mon_class_num);
OFFSET(__LC_PER_CODE, lowcore, per_code);
@@ -122,7 +122,6 @@ int main(void)
OFFSET(__LC_LAST_UPDATE_TIMER, lowcore, last_update_timer);
OFFSET(__LC_LAST_UPDATE_CLOCK, lowcore, last_update_clock);
OFFSET(__LC_INT_CLOCK, lowcore, int_clock);
- OFFSET(__LC_BOOT_CLOCK, lowcore, boot_clock);
OFFSET(__LC_CURRENT, lowcore, current_task);
OFFSET(__LC_KERNEL_STACK, lowcore, kernel_stack);
OFFSET(__LC_ASYNC_STACK, lowcore, async_stack);
diff --git a/arch/s390/kernel/cert_store.c b/arch/s390/kernel/cert_store.c
index bf983513dd33..c217a5e64094 100644
--- a/arch/s390/kernel/cert_store.c
+++ b/arch/s390/kernel/cert_store.c
@@ -138,7 +138,7 @@ static void cert_store_key_describe(const struct key *key, struct seq_file *m)
* First 64 bytes of the key description is key name in EBCDIC CP 500.
* Convert it to ASCII for displaying in /proc/keys.
*/
- strscpy(ascii, key->description, sizeof(ascii));
+ strscpy(ascii, key->description);
EBCASC_500(ascii, VC_NAME_LEN_BYTES);
seq_puts(m, ascii);
@@ -235,7 +235,7 @@ static int __diag320(unsigned long subcode, void *addr)
{
union register_pair rp = { .even = (unsigned long)addr, };
- asm volatile(
+ asm_inline volatile(
" diag %[rp],%[subcode],0x320\n"
"0: nopr %%r7\n"
EX_TABLE(0b, 0b)
diff --git a/arch/s390/kernel/cpacf.c b/arch/s390/kernel/cpacf.c
index 4b9b34f95d72..3bebc47beeab 100644
--- a/arch/s390/kernel/cpacf.c
+++ b/arch/s390/kernel/cpacf.c
@@ -101,7 +101,7 @@ static const struct bin_attribute *const cpacf_attrs[] = {
static const struct attribute_group cpacf_attr_grp = {
.name = "cpacf",
- .bin_attrs_new = cpacf_attrs,
+ .bin_attrs = cpacf_attrs,
};
static int __init cpacf_init(void)
diff --git a/arch/s390/kernel/cpufeature.c b/arch/s390/kernel/cpufeature.c
index 1b2ae42a0c15..c9eef9ed876b 100644
--- a/arch/s390/kernel/cpufeature.c
+++ b/arch/s390/kernel/cpufeature.c
@@ -4,12 +4,15 @@
*/
#include <linux/cpufeature.h>
+#include <linux/export.h>
#include <linux/bug.h>
+#include <asm/machine.h>
#include <asm/elf.h>
enum {
TYPE_HWCAP,
TYPE_FACILITY,
+ TYPE_MACHINE,
};
struct s390_cpu_feature {
@@ -21,6 +24,7 @@ static struct s390_cpu_feature s390_cpu_features[MAX_CPU_FEATURES] = {
[S390_CPU_FEATURE_MSA] = {.type = TYPE_HWCAP, .num = HWCAP_NR_MSA},
[S390_CPU_FEATURE_VXRS] = {.type = TYPE_HWCAP, .num = HWCAP_NR_VXRS},
[S390_CPU_FEATURE_UV] = {.type = TYPE_FACILITY, .num = 158},
+ [S390_CPU_FEATURE_D288] = {.type = TYPE_MACHINE, .num = MFEATURE_DIAG288},
};
/*
@@ -38,6 +42,8 @@ int cpu_have_feature(unsigned int num)
return !!(elf_hwcap & BIT(feature->num));
case TYPE_FACILITY:
return test_facility(feature->num);
+ case TYPE_MACHINE:
+ return test_machine_feature(feature->num);
default:
WARN_ON_ONCE(1);
return 0;
diff --git a/arch/s390/kernel/crash_dump.c b/arch/s390/kernel/crash_dump.c
index 276cb4c1e11b..d4839de8ce9d 100644
--- a/arch/s390/kernel/crash_dump.c
+++ b/arch/s390/kernel/crash_dump.c
@@ -7,6 +7,7 @@
*/
#include <linux/crash_dump.h>
+#include <linux/export.h>
#include <asm/lowcore.h>
#include <linux/kernel.h>
#include <linux/init.h>
@@ -246,15 +247,6 @@ bool is_kdump_kernel(void)
}
EXPORT_SYMBOL_GPL(is_kdump_kernel);
-static const char *nt_name(Elf64_Word type)
-{
- const char *name = "LINUX";
-
- if (type == NT_PRPSINFO || type == NT_PRSTATUS || type == NT_PRFPREG)
- name = KEXEC_CORE_NOTE_NAME;
- return name;
-}
-
/*
* Initialize ELF note
*/
@@ -279,10 +271,8 @@ static void *nt_init_name(void *buf, Elf64_Word type, void *desc, int d_len,
return PTR_ADD(buf, len);
}
-static inline void *nt_init(void *buf, Elf64_Word type, void *desc, int d_len)
-{
- return nt_init_name(buf, type, desc, d_len, nt_name(type));
-}
+#define nt_init(buf, type, desc) \
+ nt_init_name(buf, NT_ ## type, &(desc), sizeof(desc), NN_ ## type)
/*
* Calculate the size of ELF note
@@ -298,10 +288,7 @@ static size_t nt_size_name(int d_len, const char *name)
return size;
}
-static inline size_t nt_size(Elf64_Word type, int d_len)
-{
- return nt_size_name(d_len, nt_name(type));
-}
+#define nt_size(type, desc) nt_size_name(sizeof(desc), NN_ ## type)
/*
* Fill ELF notes for one CPU with save area registers
@@ -322,18 +309,16 @@ static void *fill_cpu_elf_notes(void *ptr, int cpu, struct save_area *sa)
memcpy(&nt_fpregset.fpc, &sa->fpc, sizeof(sa->fpc));
memcpy(&nt_fpregset.fprs, &sa->fprs, sizeof(sa->fprs));
/* Create ELF notes for the CPU */
- ptr = nt_init(ptr, NT_PRSTATUS, &nt_prstatus, sizeof(nt_prstatus));
- ptr = nt_init(ptr, NT_PRFPREG, &nt_fpregset, sizeof(nt_fpregset));
- ptr = nt_init(ptr, NT_S390_TIMER, &sa->timer, sizeof(sa->timer));
- ptr = nt_init(ptr, NT_S390_TODCMP, &sa->todcmp, sizeof(sa->todcmp));
- ptr = nt_init(ptr, NT_S390_TODPREG, &sa->todpreg, sizeof(sa->todpreg));
- ptr = nt_init(ptr, NT_S390_CTRS, &sa->ctrs, sizeof(sa->ctrs));
- ptr = nt_init(ptr, NT_S390_PREFIX, &sa->prefix, sizeof(sa->prefix));
+ ptr = nt_init(ptr, PRSTATUS, nt_prstatus);
+ ptr = nt_init(ptr, PRFPREG, nt_fpregset);
+ ptr = nt_init(ptr, S390_TIMER, sa->timer);
+ ptr = nt_init(ptr, S390_TODCMP, sa->todcmp);
+ ptr = nt_init(ptr, S390_TODPREG, sa->todpreg);
+ ptr = nt_init(ptr, S390_CTRS, sa->ctrs);
+ ptr = nt_init(ptr, S390_PREFIX, sa->prefix);
if (cpu_has_vx()) {
- ptr = nt_init(ptr, NT_S390_VXRS_HIGH,
- &sa->vxrs_high, sizeof(sa->vxrs_high));
- ptr = nt_init(ptr, NT_S390_VXRS_LOW,
- &sa->vxrs_low, sizeof(sa->vxrs_low));
+ ptr = nt_init(ptr, S390_VXRS_HIGH, sa->vxrs_high);
+ ptr = nt_init(ptr, S390_VXRS_LOW, sa->vxrs_low);
}
return ptr;
}
@@ -346,16 +331,16 @@ static size_t get_cpu_elf_notes_size(void)
struct save_area *sa = NULL;
size_t size;
- size = nt_size(NT_PRSTATUS, sizeof(struct elf_prstatus));
- size += nt_size(NT_PRFPREG, sizeof(elf_fpregset_t));
- size += nt_size(NT_S390_TIMER, sizeof(sa->timer));
- size += nt_size(NT_S390_TODCMP, sizeof(sa->todcmp));
- size += nt_size(NT_S390_TODPREG, sizeof(sa->todpreg));
- size += nt_size(NT_S390_CTRS, sizeof(sa->ctrs));
- size += nt_size(NT_S390_PREFIX, sizeof(sa->prefix));
+ size = nt_size(PRSTATUS, struct elf_prstatus);
+ size += nt_size(PRFPREG, elf_fpregset_t);
+ size += nt_size(S390_TIMER, sa->timer);
+ size += nt_size(S390_TODCMP, sa->todcmp);
+ size += nt_size(S390_TODPREG, sa->todpreg);
+ size += nt_size(S390_CTRS, sa->ctrs);
+ size += nt_size(S390_PREFIX, sa->prefix);
if (cpu_has_vx()) {
- size += nt_size(NT_S390_VXRS_HIGH, sizeof(sa->vxrs_high));
- size += nt_size(NT_S390_VXRS_LOW, sizeof(sa->vxrs_low));
+ size += nt_size(S390_VXRS_HIGH, sa->vxrs_high);
+ size += nt_size(S390_VXRS_LOW, sa->vxrs_low);
}
return size;
@@ -370,8 +355,8 @@ static void *nt_prpsinfo(void *ptr)
memset(&prpsinfo, 0, sizeof(prpsinfo));
prpsinfo.pr_sname = 'R';
- strcpy(prpsinfo.pr_fname, "vmlinux");
- return nt_init(ptr, NT_PRPSINFO, &prpsinfo, sizeof(prpsinfo));
+ strscpy(prpsinfo.pr_fname, "vmlinux");
+ return nt_init(ptr, PRPSINFO, prpsinfo);
}
/*
@@ -610,7 +595,7 @@ static size_t get_elfcorehdr_size(int phdr_count)
/* PT_NOTES */
size += sizeof(Elf64_Phdr);
/* nt_prpsinfo */
- size += nt_size(NT_PRPSINFO, sizeof(struct elf_prpsinfo));
+ size += nt_size(PRPSINFO, struct elf_prpsinfo);
/* regsets */
size += get_cpu_cnt() * get_cpu_elf_notes_size();
/* nt_vmcoreinfo */
diff --git a/arch/s390/kernel/ctlreg.c b/arch/s390/kernel/ctlreg.c
index 8cc26cf2c64a..a0501f4c7e7a 100644
--- a/arch/s390/kernel/ctlreg.c
+++ b/arch/s390/kernel/ctlreg.c
@@ -5,6 +5,7 @@
#include <linux/irqflags.h>
#include <linux/spinlock.h>
+#include <linux/export.h>
#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/smp.h>
diff --git a/arch/s390/kernel/debug.c b/arch/s390/kernel/debug.c
index ce038e9205f7..c62100dc62c8 100644
--- a/arch/s390/kernel/debug.c
+++ b/arch/s390/kernel/debug.c
@@ -251,7 +251,7 @@ static debug_info_t *debug_info_alloc(const char *name, int pages_per_area,
rc->level = level;
rc->buf_size = buf_size;
rc->entry_size = sizeof(debug_entry_t) + buf_size;
- strscpy(rc->name, name, sizeof(rc->name));
+ strscpy(rc->name, name);
memset(rc->views, 0, DEBUG_MAX_VIEWS * sizeof(struct debug_view *));
memset(rc->debugfs_entries, 0, DEBUG_MAX_VIEWS * sizeof(struct dentry *));
refcount_set(&(rc->ref_count), 0);
@@ -1677,7 +1677,7 @@ EXPORT_SYMBOL(debug_dflt_header_fn);
/*
* prints debug data sprintf-formatted:
- * debug_sprinf_event/exception calls must be used together with this view
+ * debug_sprintf_event/exception calls must be used together with this view
*/
#define DEBUG_SPRINTF_MAX_ARGS 10
diff --git a/arch/s390/kernel/diag/diag.c b/arch/s390/kernel/diag/diag.c
index e15b8dee3228..56b862ba9be8 100644
--- a/arch/s390/kernel/diag/diag.c
+++ b/arch/s390/kernel/diag/diag.c
@@ -195,7 +195,7 @@ static inline int __diag204(unsigned long *subcode, unsigned long size, void *ad
{
union register_pair rp = { .even = *subcode, .odd = size };
- asm volatile(
+ asm_inline volatile(
" diag %[addr],%[rp],0x204\n"
"0: nopr %%r7\n"
EX_TABLE(0b,0b)
@@ -286,7 +286,7 @@ int diag224(void *ptr)
int rc = -EOPNOTSUPP;
diag_stat_inc(DIAG_STAT_X224);
- asm volatile("\n"
+ asm_inline volatile("\n"
" diag %[type],%[addr],0x224\n"
"0: lhi %[rc],0\n"
"1:\n"
diff --git a/arch/s390/kernel/dis.c b/arch/s390/kernel/dis.c
index 94eb8168ea44..63a1d4226ff8 100644
--- a/arch/s390/kernel/dis.c
+++ b/arch/s390/kernel/dis.c
@@ -17,7 +17,6 @@
#include <linux/init.h>
#include <linux/interrupt.h>
#include <linux/delay.h>
-#include <linux/export.h>
#include <linux/kallsyms.h>
#include <linux/reboot.h>
#include <linux/kprobes.h>
diff --git a/arch/s390/kernel/dumpstack.c b/arch/s390/kernel/dumpstack.c
index 1ecd0580561f..dd410962ecbe 100644
--- a/arch/s390/kernel/dumpstack.c
+++ b/arch/s390/kernel/dumpstack.c
@@ -17,6 +17,7 @@
#include <linux/sched.h>
#include <linux/sched/debug.h>
#include <linux/sched/task_stack.h>
+#include <asm/asm-offsets.h>
#include <asm/processor.h>
#include <asm/debug.h>
#include <asm/dis.h>
@@ -198,13 +199,8 @@ void __noreturn die(struct pt_regs *regs, const char *str)
console_verbose();
spin_lock_irq(&die_lock);
bust_spinlocks(1);
- printk("%s: %04x ilc:%d [#%d] ", str, regs->int_code & 0xffff,
+ printk("%s: %04x ilc:%d [#%d]", str, regs->int_code & 0xffff,
regs->int_code >> 17, ++die_counter);
-#ifdef CONFIG_PREEMPT
- pr_cont("PREEMPT ");
-#elif defined(CONFIG_PREEMPT_RT)
- pr_cont("PREEMPT_RT ");
-#endif
pr_cont("SMP ");
if (debug_pagealloc_enabled())
pr_cont("DEBUG_PAGEALLOC");
diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c
index 62f8f5a750a3..9adfbdd377dc 100644
--- a/arch/s390/kernel/early.c
+++ b/arch/s390/kernel/early.c
@@ -8,6 +8,7 @@
#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
#include <linux/sched/debug.h>
+#include <linux/cpufeature.h>
#include <linux/compiler.h>
#include <linux/init.h>
#include <linux/errno.h>
@@ -21,6 +22,8 @@
#include <asm/asm-extable.h>
#include <linux/memblock.h>
#include <asm/access-regs.h>
+#include <asm/asm-offsets.h>
+#include <asm/machine.h>
#include <asm/diag.h>
#include <asm/ebcdic.h>
#include <asm/fpu.h>
@@ -36,12 +39,14 @@
#include <asm/boot_data.h>
#include "entry.h"
-#define decompressor_handled_param(param) \
-static int __init ignore_decompressor_param_##param(char *s) \
+#define __decompressor_handled_param(func, param) \
+static int __init ignore_decompressor_param_##func(char *s) \
{ \
return 0; \
} \
-early_param(#param, ignore_decompressor_param_##param)
+early_param(#param, ignore_decompressor_param_##func)
+
+#define decompressor_handled_param(param) __decompressor_handled_param(param, param)
decompressor_handled_param(mem);
decompressor_handled_param(vmalloc);
@@ -50,6 +55,8 @@ decompressor_handled_param(facilities);
decompressor_handled_param(nokaslr);
decompressor_handled_param(cmma);
decompressor_handled_param(relocate_lowcore);
+decompressor_handled_param(bootdebug);
+__decompressor_handled_param(debug_alternative, debug-alternative);
#if IS_ENABLED(CONFIG_KVM)
decompressor_handled_param(prot_virt);
#endif
@@ -58,25 +65,10 @@ static void __init kasan_early_init(void)
{
#ifdef CONFIG_KASAN
init_task.kasan_depth = 0;
- sclp_early_printk("KernelAddressSanitizer initialized\n");
+ pr_info("KernelAddressSanitizer initialized\n");
#endif
}
-static void __init reset_tod_clock(void)
-{
- union tod_clock clk;
-
- if (store_tod_clock_ext_cc(&clk) == 0)
- return;
- /* TOD clock not running. Set the clock to Unix Epoch. */
- if (set_tod_clock(TOD_UNIX_EPOCH) || store_tod_clock_ext_cc(&clk))
- disabled_wait();
-
- memset(&tod_clock_base, 0, sizeof(tod_clock_base));
- tod_clock_base.tod = TOD_UNIX_EPOCH;
- get_lowcore()->last_update_clock = TOD_UNIX_EPOCH;
-}
-
/*
* Initialize storage key for kernel pages
*/
@@ -95,26 +87,6 @@ static noinline __init void init_kernel_storage_key(void)
static __initdata char sysinfo_page[PAGE_SIZE] __aligned(PAGE_SIZE);
-static noinline __init void detect_machine_type(void)
-{
- struct sysinfo_3_2_2 *vmms = (struct sysinfo_3_2_2 *)&sysinfo_page;
-
- /* Check current-configuration-level */
- if (stsi(NULL, 0, 0, 0) <= 2) {
- get_lowcore()->machine_flags |= MACHINE_FLAG_LPAR;
- return;
- }
- /* Get virtual-machine cpu information. */
- if (stsi(vmms, 3, 2, 2) || !vmms->count)
- return;
-
- /* Detect known hypervisors */
- if (!memcmp(vmms->vm[0].cpi, "\xd2\xe5\xd4", 3))
- get_lowcore()->machine_flags |= MACHINE_FLAG_KVM;
- else if (!memcmp(vmms->vm[0].cpi, "\xa9\x61\xe5\xd4", 4))
- get_lowcore()->machine_flags |= MACHINE_FLAG_VM;
-}
-
/* Remove leading, trailing and double whitespace. */
static inline void strim_all(char *str)
{
@@ -133,6 +105,8 @@ static inline void strim_all(char *str)
}
}
+char arch_hw_string[128];
+
static noinline __init void setup_arch_string(void)
{
struct sysinfo_1_1_1 *mach = (struct sysinfo_1_1_1 *)&sysinfo_page;
@@ -155,10 +129,11 @@ static noinline __init void setup_arch_string(void)
strim_all(hvstr);
} else {
sprintf(hvstr, "%s",
- MACHINE_IS_LPAR ? "LPAR" :
- MACHINE_IS_VM ? "z/VM" :
- MACHINE_IS_KVM ? "KVM" : "unknown");
+ machine_is_lpar() ? "LPAR" :
+ machine_is_vm() ? "z/VM" :
+ machine_is_kvm() ? "KVM" : "unknown");
}
+ sprintf(arch_hw_string, "HW: %s (%s)", mstr, hvstr);
dump_stack_set_arch_desc("%s (%s)", mstr, hvstr);
}
@@ -166,9 +141,8 @@ static __init void setup_topology(void)
{
int max_mnest;
- if (!test_facility(11))
+ if (!cpu_has_topology())
return;
- get_lowcore()->machine_flags |= MACHINE_FLAG_TOPOLOGY;
for (max_mnest = 6; max_mnest > 1; max_mnest--) {
if (stsi(&sysinfo_page, 15, 1, max_mnest) == 0)
break;
@@ -183,6 +157,7 @@ void __init __do_early_pgm_check(struct pt_regs *regs)
regs->int_code = lc->pgm_int_code;
regs->int_parm_long = lc->trans_exc_code;
+ regs->last_break = lc->pgm_last_break;
ip = __rewind_psw(regs->psw, regs->int_code >> 16);
/* Monitor Event? Might be a warning */
@@ -217,65 +192,10 @@ static noinline __init void setup_lowcore_early(void)
lc->return_mcck_lpswe = gen_lpswe(__LC_RETURN_MCCK_PSW);
}
-static __init void detect_diag9c(void)
-{
- unsigned int cpu_address;
- int rc;
-
- cpu_address = stap();
- diag_stat_inc(DIAG_STAT_X09C);
- asm volatile(
- " diag %2,0,0x9c\n"
- "0: la %0,0\n"
- "1:\n"
- EX_TABLE(0b,1b)
- : "=d" (rc) : "0" (-EOPNOTSUPP), "d" (cpu_address) : "cc");
- if (!rc)
- get_lowcore()->machine_flags |= MACHINE_FLAG_DIAG9C;
-}
-
-static __init void detect_machine_facilities(void)
-{
- if (test_facility(8)) {
- get_lowcore()->machine_flags |= MACHINE_FLAG_EDAT1;
- system_ctl_set_bit(0, CR0_EDAT_BIT);
- }
- if (test_facility(78))
- get_lowcore()->machine_flags |= MACHINE_FLAG_EDAT2;
- if (test_facility(3))
- get_lowcore()->machine_flags |= MACHINE_FLAG_IDTE;
- if (test_facility(50) && test_facility(73)) {
- get_lowcore()->machine_flags |= MACHINE_FLAG_TE;
- system_ctl_set_bit(0, CR0_TRANSACTIONAL_EXECUTION_BIT);
- }
- if (test_facility(51))
- get_lowcore()->machine_flags |= MACHINE_FLAG_TLB_LC;
- if (test_facility(129))
- system_ctl_set_bit(0, CR0_VECTOR_BIT);
- if (test_facility(130))
- get_lowcore()->machine_flags |= MACHINE_FLAG_NX;
- if (test_facility(133))
- get_lowcore()->machine_flags |= MACHINE_FLAG_GS;
- if (test_facility(139) && (tod_clock_base.tod >> 63)) {
- /* Enabled signed clock comparator comparisons */
- get_lowcore()->machine_flags |= MACHINE_FLAG_SCC;
- clock_comparator_max = -1ULL >> 1;
- system_ctl_set_bit(0, CR0_CLOCK_COMPARATOR_SIGN_BIT);
- }
- if (IS_ENABLED(CONFIG_PCI) && test_facility(153)) {
- get_lowcore()->machine_flags |= MACHINE_FLAG_PCI_MIO;
- /* the control bit is set during PCI initialization */
- }
- if (test_facility(194))
- get_lowcore()->machine_flags |= MACHINE_FLAG_RDP;
- if (test_facility(85))
- get_lowcore()->machine_flags |= MACHINE_FLAG_SEQ_INSN;
-}
-
static inline void save_vector_registers(void)
{
#ifdef CONFIG_CRASH_DUMP
- if (test_facility(129))
+ if (cpu_has_vx())
save_vx_regs(boot_cpu_vector_save_area);
#endif
}
@@ -307,17 +227,13 @@ static void __init sort_amode31_extable(void)
void __init startup_init(void)
{
kasan_early_init();
- reset_tod_clock();
time_early_init();
init_kernel_storage_key();
lockdep_off();
sort_amode31_extable();
setup_lowcore_early();
- detect_machine_type();
setup_arch_string();
setup_boot_command_line();
- detect_diag9c();
- detect_machine_facilities();
save_vector_registers();
setup_topology();
sclp_early_detect();
diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S
index 4cc3408c4dac..75b0fbb236d0 100644
--- a/arch/s390/kernel/entry.S
+++ b/arch/s390/kernel/entry.S
@@ -29,6 +29,7 @@
#include <asm/nmi.h>
#include <asm/nospec-insn.h>
#include <asm/lowcore.h>
+#include <asm/machine.h>
_LPP_OFFSET = __LC_LPP
@@ -44,7 +45,7 @@ _LPP_OFFSET = __LC_LPP
ALTERNATIVE_2 "b \lpswe;nopr", \
".insn siy,0xeb0000000071,\address,0", ALT_FACILITY(193), \
__stringify(.insn siy,0xeb0000000071,LOWCORE_ALT_ADDRESS+\address,0), \
- ALT_LOWCORE
+ ALT_FEATURE(MFEATURE_LOWCORE)
.endm
.macro MBEAR reg, lowcore
@@ -67,7 +68,7 @@ _LPP_OFFSET = __LC_LPP
clg %r14,__LC_RESTART_STACK(\lowcore)
je \oklabel
la %r14,\savearea(\lowcore)
- j stack_overflow
+ j stack_invalid
.endm
/*
@@ -115,7 +116,7 @@ _LPP_OFFSET = __LC_LPP
.macro SIEEXIT sie_control,lowcore
lg %r9,\sie_control # get control block pointer
ni __SIE_PROG0C+3(%r9),0xfe # no longer in SIE
- lctlg %c1,%c1,__LC_KERNEL_ASCE(\lowcore) # load primary asce
+ lctlg %c1,%c1,__LC_USER_ASCE(\lowcore) # load primary asce
lg %r9,__LC_CURRENT(\lowcore)
mvi __TI_sie(%r9),0
larl %r9,sie_exit # skip forward to sie_exit
@@ -123,7 +124,7 @@ _LPP_OFFSET = __LC_LPP
#endif
.macro STACKLEAK_ERASE
-#ifdef CONFIG_GCC_PLUGIN_STACKLEAK
+#ifdef CONFIG_KSTACK_ERASE
brasl %r14,stackleak_erase_on_task_stack
#endif
.endm
@@ -207,7 +208,7 @@ SYM_FUNC_START(__sie64a)
lg %r14,__SF_SIE_CONTROL(%r15) # get control block pointer
ni __SIE_PROG0C+3(%r14),0xfe # no longer in SIE
GET_LC %r14
- lctlg %c1,%c1,__LC_KERNEL_ASCE(%r14) # load primary asce
+ lctlg %c1,%c1,__LC_USER_ASCE(%r14) # load primary asce
lg %r14,__LC_CURRENT(%r14)
mvi __TI_sie(%r14),0
SYM_INNER_LABEL(sie_exit, SYM_L_GLOBAL)
@@ -239,7 +240,6 @@ SYM_CODE_START(system_call)
lghi %r14,0
.Lsysc_per:
STBEAR __LC_LAST_BREAK(%r13)
- lctlg %c1,%c1,__LC_KERNEL_ASCE(%r13)
lg %r15,__LC_KERNEL_STACK(%r13)
xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
stmg %r0,%r7,STACK_FRAME_OVERHEAD+__PT_R0(%r15)
@@ -260,7 +260,6 @@ SYM_CODE_START(system_call)
lgr %r3,%r14
brasl %r14,__do_syscall
STACKLEAK_ERASE
- lctlg %c1,%c1,__LC_USER_ASCE(%r13)
mvc __LC_RETURN_PSW(16,%r13),STACK_FRAME_OVERHEAD+__PT_PSW(%r15)
BPON
LBEAR STACK_FRAME_OVERHEAD+__PT_LAST_BREAK(%r15)
@@ -277,7 +276,6 @@ SYM_CODE_START(ret_from_fork)
brasl %r14,__ret_from_fork
STACKLEAK_ERASE
GET_LC %r13
- lctlg %c1,%c1,__LC_USER_ASCE(%r13)
mvc __LC_RETURN_PSW(16,%r13),STACK_FRAME_OVERHEAD+__PT_PSW(%r15)
BPON
LBEAR STACK_FRAME_OVERHEAD+__PT_LAST_BREAK(%r15)
@@ -298,10 +296,7 @@ SYM_CODE_START(pgm_check_handler)
lmg %r8,%r9,__LC_PGM_OLD_PSW(%r13)
xgr %r10,%r10
tmhh %r8,0x0001 # coming from user space?
- jno .Lpgm_skip_asce
- lctlg %c1,%c1,__LC_KERNEL_ASCE(%r13)
- j 3f # -> fault in user space
-.Lpgm_skip_asce:
+ jo 3f # -> fault in user space
#if IS_ENABLED(CONFIG_KVM)
lg %r11,__LC_CURRENT(%r13)
tm __TI_sie(%r11),0xff
@@ -315,7 +310,7 @@ SYM_CODE_START(pgm_check_handler)
tm __LC_PGM_ILC+3(%r13),0x80 # check for per exception
jnz .Lpgm_svcper # -> single stepped svc
2: aghi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE)
- # CHECK_VMAP_STACK branches to stack_overflow or 4f
+ # CHECK_VMAP_STACK branches to stack_invalid or 4f
CHECK_VMAP_STACK __LC_SAVE_AREA,%r13,4f
3: lg %r15,__LC_KERNEL_STACK(%r13)
4: la %r11,STACK_FRAME_OVERHEAD(%r15)
@@ -339,7 +334,6 @@ SYM_CODE_START(pgm_check_handler)
tmhh %r8,0x0001 # returning to user space?
jno .Lpgm_exit_kernel
STACKLEAK_ERASE
- lctlg %c1,%c1,__LC_USER_ASCE(%r13)
BPON
stpt __LC_EXIT_TIMER(%r13)
.Lpgm_exit_kernel:
@@ -383,8 +377,7 @@ SYM_CODE_START(\name)
#endif
0: aghi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE)
j 2f
-1: lctlg %c1,%c1,__LC_KERNEL_ASCE(%r13)
- lg %r15,__LC_KERNEL_STACK(%r13)
+1: lg %r15,__LC_KERNEL_STACK(%r13)
2: xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
la %r11,STACK_FRAME_OVERHEAD(%r15)
stmg %r0,%r7,__PT_R0(%r11)
@@ -407,7 +400,6 @@ SYM_CODE_START(\name)
tmhh %r8,0x0001 # returning to user ?
jno 2f
STACKLEAK_ERASE
- lctlg %c1,%c1,__LC_USER_ASCE(%r13)
BPON
stpt __LC_EXIT_TIMER(%r13)
2: LBEAR __PT_LAST_BREAK(%r11)
@@ -467,7 +459,7 @@ SYM_CODE_START(mcck_int_handler)
clgrjl %r9,%r14, 4f
larl %r14,.Lsie_leave
clgrjhe %r9,%r14, 4f
- lg %r10,__LC_PCPU
+ lg %r10,__LC_PCPU(%r13)
oi __PCPU_FLAGS+7(%r10), _CIF_MCCK_GUEST
4: BPENTER __SF_SIE_FLAGS(%r15),_TIF_ISOLATE_BP_GUEST
SIEEXIT __SF_SIE_CONTROL(%r15),%r13
@@ -475,8 +467,6 @@ SYM_CODE_START(mcck_int_handler)
.Lmcck_user:
lg %r15,__LC_MCCK_STACK(%r13)
la %r11,STACK_FRAME_OVERHEAD(%r15)
- stctg %c1,%c1,__PT_CR1(%r11)
- lctlg %c1,%c1,__LC_KERNEL_ASCE(%r13)
xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
lay %r14,__LC_GPREGS_SAVE_AREA(%r13)
mvc __PT_R0(128,%r11),0(%r14)
@@ -494,7 +484,6 @@ SYM_CODE_START(mcck_int_handler)
xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
lgr %r2,%r11 # pass pointer to pt_regs
brasl %r14,s390_do_machine_check
- lctlg %c1,%c1,__PT_CR1(%r11)
lmg %r0,%r10,__PT_R0(%r11)
mvc __LC_RETURN_MCCK_PSW(16,%r13),__PT_PSW(%r11) # move return PSW
tm __LC_RETURN_MCCK_PSW+1(%r13),0x01 # returning to user ?
@@ -590,22 +579,23 @@ SYM_CODE_END(early_pgm_check_handler)
.section .kprobes.text, "ax"
/*
- * The synchronous or the asynchronous stack overflowed. We are dead.
+ * The synchronous or the asynchronous stack pointer is invalid. We are dead.
* No need to properly save the registers, we are going to panic anyway.
* Setup a pt_regs so that show_trace can provide a good call trace.
*/
-SYM_CODE_START(stack_overflow)
+SYM_CODE_START(stack_invalid)
GET_LC %r15
lg %r15,__LC_NODAT_STACK(%r15) # change to panic stack
la %r11,STACK_FRAME_OVERHEAD(%r15)
stmg %r0,%r7,__PT_R0(%r11)
stmg %r8,%r9,__PT_PSW(%r11)
mvc __PT_R8(64,%r11),0(%r14)
- stg %r10,__PT_ORIG_GPR2(%r11) # store last break to orig_gpr2
+ GET_LC %r2
+ mvc __PT_ORIG_GPR2(8,%r11),__LC_PGM_LAST_BREAK(%r2)
xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
lgr %r2,%r11 # pass pointer to pt_regs
- jg kernel_stack_overflow
-SYM_CODE_END(stack_overflow)
+ jg kernel_stack_invalid
+SYM_CODE_END(stack_invalid)
.section .data, "aw"
.balign 4
@@ -621,7 +611,7 @@ SYM_DATA_END(daton_psw)
.balign 8
#define SYSCALL(esame,emu) .quad __s390x_ ## esame
SYM_DATA_START(sys_call_table)
-#include "asm/syscall_table.h"
+#include <asm/syscall_table.h>
SYM_DATA_END(sys_call_table)
#undef SYSCALL
@@ -629,7 +619,7 @@ SYM_DATA_END(sys_call_table)
#define SYSCALL(esame,emu) .quad __s390_ ## emu
SYM_DATA_START(sys_call_table_emu)
-#include "asm/syscall_table.h"
+#include <asm/syscall_table.h>
SYM_DATA_END(sys_call_table_emu)
#undef SYSCALL
#endif
diff --git a/arch/s390/kernel/entry.h b/arch/s390/kernel/entry.h
index a1f28879c87e..dd55cc6bbc28 100644
--- a/arch/s390/kernel/entry.h
+++ b/arch/s390/kernel/entry.h
@@ -31,7 +31,7 @@ void do_secure_storage_access(struct pt_regs *regs);
void do_non_secure_storage_access(struct pt_regs *regs);
void do_secure_storage_violation(struct pt_regs *regs);
void do_report_trap(struct pt_regs *regs, int si_signo, int si_code, char *str);
-void kernel_stack_overflow(struct pt_regs * regs);
+void kernel_stack_invalid(struct pt_regs *regs);
void handle_signal32(struct ksignal *ksig, sigset_t *oldset,
struct pt_regs *regs);
diff --git a/arch/s390/kernel/facility.c b/arch/s390/kernel/facility.c
index f02127219a27..d028b0be5c1d 100644
--- a/arch/s390/kernel/facility.c
+++ b/arch/s390/kernel/facility.c
@@ -3,6 +3,7 @@
* Copyright IBM Corp. 2023
*/
+#include <linux/export.h>
#include <asm/facility.h>
unsigned int stfle_size(void)
diff --git a/arch/s390/kernel/fpu.c b/arch/s390/kernel/fpu.c
index 6f2e87920288..03a8973aec3c 100644
--- a/arch/s390/kernel/fpu.c
+++ b/arch/s390/kernel/fpu.c
@@ -5,6 +5,8 @@
* Copyright IBM Corp. 2015
* Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
*/
+
+#include <linux/export.h>
#include <linux/kernel.h>
#include <linux/cpu.h>
#include <linux/sched.h>
diff --git a/arch/s390/kernel/ftrace.c b/arch/s390/kernel/ftrace.c
index c0b2c97efefb..e94bb98f5231 100644
--- a/arch/s390/kernel/ftrace.c
+++ b/arch/s390/kernel/ftrace.c
@@ -13,6 +13,7 @@
#include <linux/kernel.h>
#include <linux/types.h>
#include <linux/kmsan-checks.h>
+#include <linux/cpufeature.h>
#include <linux/kprobes.h>
#include <linux/execmem.h>
#include <trace/syscall.h>
@@ -69,7 +70,7 @@ static const char *ftrace_shared_hotpatch_trampoline(const char **end)
bool ftrace_need_init_nop(void)
{
- return !MACHINE_HAS_SEQ_INSN;
+ return !cpu_has_seq_insn();
}
int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec)
@@ -189,7 +190,7 @@ static int ftrace_modify_trampoline_call(struct dyn_ftrace *rec,
int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr,
unsigned long addr)
{
- if (MACHINE_HAS_SEQ_INSN)
+ if (cpu_has_seq_insn())
return ftrace_patch_branch_insn(rec->ip, old_addr, addr);
else
return ftrace_modify_trampoline_call(rec, old_addr, addr);
@@ -213,8 +214,8 @@ static int ftrace_patch_branch_mask(void *addr, u16 expected, bool enable)
int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec,
unsigned long addr)
{
- /* Expect brcl 0xf,... for the !MACHINE_HAS_SEQ_INSN case */
- if (MACHINE_HAS_SEQ_INSN)
+ /* Expect brcl 0xf,... for the !cpu_has_seq_insn() case */
+ if (cpu_has_seq_insn())
return ftrace_patch_branch_insn(rec->ip, addr, 0);
else
return ftrace_patch_branch_mask((void *)rec->ip, 0xc0f4, false);
@@ -234,7 +235,7 @@ static int ftrace_make_trampoline_call(struct dyn_ftrace *rec, unsigned long add
int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
{
- if (MACHINE_HAS_SEQ_INSN)
+ if (cpu_has_seq_insn())
return ftrace_patch_branch_insn(rec->ip, 0, addr);
else
return ftrace_make_trampoline_call(rec, addr);
@@ -266,18 +267,14 @@ void ftrace_graph_func(unsigned long ip, unsigned long parent_ip,
struct ftrace_ops *op, struct ftrace_regs *fregs)
{
unsigned long *parent = &arch_ftrace_regs(fregs)->regs.gprs[14];
- int bit;
+ unsigned long sp = arch_ftrace_regs(fregs)->regs.gprs[15];
if (unlikely(ftrace_graph_is_dead()))
return;
if (unlikely(atomic_read(&current->tracing_graph_pause)))
return;
- bit = ftrace_test_recursion_trylock(ip, *parent);
- if (bit < 0)
- return;
- if (!function_graph_enter_regs(*parent, ip, 0, parent, fregs))
+ if (!function_graph_enter_regs(*parent, ip, 0, (unsigned long *)sp, fregs))
*parent = (unsigned long)&return_to_handler;
- ftrace_test_recursion_unlock(bit);
}
#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
diff --git a/arch/s390/kernel/guarded_storage.c b/arch/s390/kernel/guarded_storage.c
index 0b68168d9566..cf26d7a37425 100644
--- a/arch/s390/kernel/guarded_storage.c
+++ b/arch/s390/kernel/guarded_storage.c
@@ -4,6 +4,7 @@
* Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
*/
+#include <linux/cpufeature.h>
#include <linux/kernel.h>
#include <linux/syscalls.h>
#include <linux/signal.h>
@@ -109,7 +110,7 @@ static int gs_broadcast(void)
SYSCALL_DEFINE2(s390_guarded_storage, int, command,
struct gs_cb __user *, gs_cb)
{
- if (!MACHINE_HAS_GS)
+ if (!cpu_has_gs())
return -EOPNOTSUPP;
switch (command) {
case GS_ENABLE:
diff --git a/arch/s390/kernel/head64.S b/arch/s390/kernel/head64.S
index 396034b2fe67..7edb9ded199c 100644
--- a/arch/s390/kernel/head64.S
+++ b/arch/s390/kernel/head64.S
@@ -18,12 +18,10 @@
__HEAD
SYM_CODE_START(startup_continue)
- larl %r1,tod_clock_base
- GET_LC %r2
- mvc 0(16,%r1),__LC_BOOT_CLOCK(%r2)
#
# Setup stack
#
+ GET_LC %r2
larl %r14,init_task
stg %r14,__LC_CURRENT(%r2)
larl %r15,init_thread_union+STACK_INIT_OFFSET
diff --git a/arch/s390/kernel/hiperdispatch.c b/arch/s390/kernel/hiperdispatch.c
index 7857a7e8e56c..e7b66d046e8d 100644
--- a/arch/s390/kernel/hiperdispatch.c
+++ b/arch/s390/kernel/hiperdispatch.c
@@ -45,6 +45,7 @@
* therefore delaying the throughput loss caused by using SMP threads.
*/
+#include <linux/cpufeature.h>
#include <linux/cpumask.h>
#include <linux/debugfs.h>
#include <linux/device.h>
@@ -87,7 +88,7 @@ static DECLARE_DELAYED_WORK(hd_capacity_work, hd_capacity_work_fn);
static int hd_set_hiperdispatch_mode(int enable)
{
- if (!MACHINE_HAS_TOPOLOGY)
+ if (!cpu_has_topology())
enable = 0;
if (hd_enabled == enable)
return 0;
diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c
index 69be2309cde0..961a3d60a4dd 100644
--- a/arch/s390/kernel/ipl.c
+++ b/arch/s390/kernel/ipl.c
@@ -22,6 +22,7 @@
#include <linux/debug_locks.h>
#include <linux/vmalloc.h>
#include <asm/asm-extable.h>
+#include <asm/machine.h>
#include <asm/diag.h>
#include <asm/ipl.h>
#include <asm/smp.h>
@@ -185,7 +186,7 @@ static inline int __diag308(unsigned long subcode, unsigned long addr)
r1.even = addr;
r1.odd = 0;
- asm volatile(
+ asm_inline volatile(
" diag %[r1],%[subcode],0x308\n"
"0: nopr %%r7\n"
EX_TABLE(0b,0b)
@@ -269,7 +270,7 @@ static ssize_t sys_##_prefix##_##_name##_store(struct kobject *kobj, \
{ \
if (len >= sizeof(_value)) \
return -E2BIG; \
- len = strscpy(_value, buf, sizeof(_value)); \
+ len = strscpy(_value, buf); \
if ((ssize_t)len < 0) \
return len; \
strim(_value); \
@@ -595,7 +596,7 @@ static struct attribute *ipl_fcp_attrs[] = {
static const struct attribute_group ipl_fcp_attr_group = {
.attrs = ipl_fcp_attrs,
- .bin_attrs_new = ipl_fcp_bin_attrs,
+ .bin_attrs = ipl_fcp_bin_attrs,
};
static struct attribute *ipl_nvme_attrs[] = {
@@ -609,7 +610,7 @@ static struct attribute *ipl_nvme_attrs[] = {
static const struct attribute_group ipl_nvme_attr_group = {
.attrs = ipl_nvme_attrs,
- .bin_attrs_new = ipl_nvme_bin_attrs,
+ .bin_attrs = ipl_nvme_bin_attrs,
};
static struct attribute *ipl_eckd_attrs[] = {
@@ -622,7 +623,7 @@ static struct attribute *ipl_eckd_attrs[] = {
static const struct attribute_group ipl_eckd_attr_group = {
.attrs = ipl_eckd_attrs,
- .bin_attrs_new = ipl_eckd_bin_attrs,
+ .bin_attrs = ipl_eckd_bin_attrs,
};
/* CCW ipl device attributes */
@@ -685,7 +686,7 @@ static int __init ipl_init(void)
goto out;
switch (ipl_info.type) {
case IPL_TYPE_CCW:
- if (MACHINE_IS_VM)
+ if (machine_is_vm())
rc = sysfs_create_group(&ipl_kset->kobj,
&ipl_ccw_attr_group_vm);
else
@@ -919,7 +920,7 @@ static struct attribute *reipl_fcp_attrs[] = {
static const struct attribute_group reipl_fcp_attr_group = {
.attrs = reipl_fcp_attrs,
- .bin_attrs_new = reipl_fcp_bin_attrs,
+ .bin_attrs = reipl_fcp_bin_attrs,
};
static struct kobj_attribute sys_reipl_fcp_clear_attr =
@@ -957,7 +958,7 @@ static struct attribute *reipl_nvme_attrs[] = {
static const struct attribute_group reipl_nvme_attr_group = {
.attrs = reipl_nvme_attrs,
- .bin_attrs_new = reipl_nvme_bin_attrs
+ .bin_attrs = reipl_nvme_bin_attrs
};
static ssize_t reipl_nvme_clear_show(struct kobject *kobj,
@@ -1050,7 +1051,7 @@ static struct attribute *reipl_eckd_attrs[] = {
static const struct attribute_group reipl_eckd_attr_group = {
.attrs = reipl_eckd_attrs,
- .bin_attrs_new = reipl_eckd_bin_attrs
+ .bin_attrs = reipl_eckd_bin_attrs
};
static ssize_t reipl_eckd_clear_show(struct kobject *kobj,
@@ -1272,7 +1273,7 @@ static void reipl_block_ccw_fill_parms(struct ipl_parameter_block *ipb)
ipb->ccw.flags = IPL_PB0_FLAG_LOADPARM;
/* VM PARM */
- if (MACHINE_IS_VM && ipl_block_valid &&
+ if (machine_is_vm() && ipl_block_valid &&
(ipl_block.ccw.vm_flags & IPL_PB0_CCW_VM_FLAG_VP)) {
ipb->ccw.vm_flags |= IPL_PB0_CCW_VM_FLAG_VP;
@@ -1286,7 +1287,7 @@ static int __init reipl_nss_init(void)
{
int rc;
- if (!MACHINE_IS_VM)
+ if (!machine_is_vm())
return 0;
reipl_block_nss = (void *) get_zeroed_page(GFP_KERNEL);
@@ -1311,8 +1312,8 @@ static int __init reipl_ccw_init(void)
return -ENOMEM;
rc = sysfs_create_group(&reipl_kset->kobj,
- MACHINE_IS_VM ? &reipl_ccw_attr_group_vm
- : &reipl_ccw_attr_group_lpar);
+ machine_is_vm() ? &reipl_ccw_attr_group_vm
+ : &reipl_ccw_attr_group_lpar);
if (rc)
return rc;
@@ -1595,7 +1596,7 @@ static const struct bin_attribute *const dump_fcp_bin_attrs[] = {
static const struct attribute_group dump_fcp_attr_group = {
.name = IPL_FCP_STR,
.attrs = dump_fcp_attrs,
- .bin_attrs_new = dump_fcp_bin_attrs,
+ .bin_attrs = dump_fcp_bin_attrs,
};
/* NVME dump device attributes */
@@ -1629,7 +1630,7 @@ static const struct bin_attribute *const dump_nvme_bin_attrs[] = {
static const struct attribute_group dump_nvme_attr_group = {
.name = IPL_NVME_STR,
.attrs = dump_nvme_attrs,
- .bin_attrs_new = dump_nvme_bin_attrs,
+ .bin_attrs = dump_nvme_bin_attrs,
};
/* ECKD dump device attributes */
@@ -1663,7 +1664,7 @@ static const struct bin_attribute *const dump_eckd_bin_attrs[] = {
static const struct attribute_group dump_eckd_attr_group = {
.name = IPL_ECKD_STR,
.attrs = dump_eckd_attrs,
- .bin_attrs_new = dump_eckd_bin_attrs,
+ .bin_attrs = dump_eckd_bin_attrs,
};
/* CCW dump device attributes */
@@ -1987,7 +1988,7 @@ static void vmcmd_run(struct shutdown_trigger *trigger)
static int vmcmd_init(void)
{
- if (!MACHINE_IS_VM)
+ if (!machine_is_vm())
return -EOPNOTSUPP;
vmcmd_kset = kset_create_and_add("vmcmd", NULL, firmware_kobj);
if (!vmcmd_kset)
@@ -2248,26 +2249,28 @@ static int __init s390_ipl_init(void)
__initcall(s390_ipl_init);
-static void __init strncpy_skip_quote(char *dst, char *src, int n)
+static void __init strscpy_skip_quote(char *dst, char *src, int n)
{
int sx, dx;
- dx = 0;
- for (sx = 0; src[sx] != 0; sx++) {
+ if (!n)
+ return;
+ for (sx = 0, dx = 0; src[sx]; sx++) {
if (src[sx] == '"')
continue;
- dst[dx++] = src[sx];
- if (dx >= n)
+ dst[dx] = src[sx];
+ if (dx + 1 == n)
break;
+ dx++;
}
+ dst[dx] = '\0';
}
static int __init vmcmd_on_reboot_setup(char *str)
{
- if (!MACHINE_IS_VM)
+ if (!machine_is_vm())
return 1;
- strncpy_skip_quote(vmcmd_on_reboot, str, VMCMD_MAX_SIZE);
- vmcmd_on_reboot[VMCMD_MAX_SIZE] = 0;
+ strscpy_skip_quote(vmcmd_on_reboot, str, sizeof(vmcmd_on_reboot));
on_reboot_trigger.action = &vmcmd_action;
return 1;
}
@@ -2275,10 +2278,9 @@ __setup("vmreboot=", vmcmd_on_reboot_setup);
static int __init vmcmd_on_panic_setup(char *str)
{
- if (!MACHINE_IS_VM)
+ if (!machine_is_vm())
return 1;
- strncpy_skip_quote(vmcmd_on_panic, str, VMCMD_MAX_SIZE);
- vmcmd_on_panic[VMCMD_MAX_SIZE] = 0;
+ strscpy_skip_quote(vmcmd_on_panic, str, sizeof(vmcmd_on_panic));
on_panic_trigger.action = &vmcmd_action;
return 1;
}
@@ -2286,10 +2288,9 @@ __setup("vmpanic=", vmcmd_on_panic_setup);
static int __init vmcmd_on_halt_setup(char *str)
{
- if (!MACHINE_IS_VM)
+ if (!machine_is_vm())
return 1;
- strncpy_skip_quote(vmcmd_on_halt, str, VMCMD_MAX_SIZE);
- vmcmd_on_halt[VMCMD_MAX_SIZE] = 0;
+ strscpy_skip_quote(vmcmd_on_halt, str, sizeof(vmcmd_on_halt));
on_halt_trigger.action = &vmcmd_action;
return 1;
}
@@ -2297,10 +2298,9 @@ __setup("vmhalt=", vmcmd_on_halt_setup);
static int __init vmcmd_on_poff_setup(char *str)
{
- if (!MACHINE_IS_VM)
+ if (!machine_is_vm())
return 1;
- strncpy_skip_quote(vmcmd_on_poff, str, VMCMD_MAX_SIZE);
- vmcmd_on_poff[VMCMD_MAX_SIZE] = 0;
+ strscpy_skip_quote(vmcmd_on_poff, str, sizeof(vmcmd_on_poff));
on_poff_trigger.action = &vmcmd_action;
return 1;
}
diff --git a/arch/s390/kernel/irq.c b/arch/s390/kernel/irq.c
index ef7be599e1f7..bdf9c7cb5685 100644
--- a/arch/s390/kernel/irq.c
+++ b/arch/s390/kernel/irq.c
@@ -9,6 +9,7 @@
*/
#include <linux/kernel_stat.h>
+#include <linux/cpufeature.h>
#include <linux/interrupt.h>
#include <linux/seq_file.h>
#include <linux/proc_fs.h>
@@ -25,6 +26,7 @@
#include <asm/irq_regs.h>
#include <asm/cputime.h>
#include <asm/lowcore.h>
+#include <asm/machine.h>
#include <asm/irq.h>
#include <asm/hw_irq.h>
#include <asm/stacktrace.h>
@@ -84,7 +86,6 @@ static const struct irq_class irqclass_sub_desc[] = {
{.irq = IRQIO_C70, .name = "C70", .desc = "[I/O] 3270"},
{.irq = IRQIO_TAP, .name = "TAP", .desc = "[I/O] Tape"},
{.irq = IRQIO_VMR, .name = "VMR", .desc = "[I/O] Unit Record Devices"},
- {.irq = IRQIO_LCS, .name = "LCS", .desc = "[I/O] LCS"},
{.irq = IRQIO_CTC, .name = "CTC", .desc = "[I/O] CTC"},
{.irq = IRQIO_ADM, .name = "ADM", .desc = "[I/O] EADM Subchannel"},
{.irq = IRQIO_CSC, .name = "CSC", .desc = "[I/O] CHSC Subchannel"},
@@ -149,7 +150,7 @@ void noinstr do_io_irq(struct pt_regs *regs)
if (user_mode(regs)) {
update_timer_sys();
- if (static_branch_likely(&cpu_has_bear))
+ if (cpu_has_bear())
current->thread.last_break = regs->last_break;
}
@@ -164,7 +165,7 @@ void noinstr do_io_irq(struct pt_regs *regs)
do_irq_async(regs, THIN_INTERRUPT);
else
do_irq_async(regs, IO_INTERRUPT);
- } while (MACHINE_IS_LPAR && irq_pending(regs));
+ } while (machine_is_lpar() && irq_pending(regs));
irq_exit_rcu();
@@ -185,7 +186,7 @@ void noinstr do_ext_irq(struct pt_regs *regs)
if (user_mode(regs)) {
update_timer_sys();
- if (static_branch_likely(&cpu_has_bear))
+ if (cpu_has_bear())
current->thread.last_break = regs->last_break;
}
diff --git a/arch/s390/kernel/kprobes.c b/arch/s390/kernel/kprobes.c
index 8b80ea57125f..c450120b4474 100644
--- a/arch/s390/kernel/kprobes.c
+++ b/arch/s390/kernel/kprobes.c
@@ -13,6 +13,7 @@
#include <linux/ptrace.h>
#include <linux/preempt.h>
#include <linux/stop_machine.h>
+#include <linux/cpufeature.h>
#include <linux/kdebug.h>
#include <linux/uaccess.h>
#include <linux/extable.h>
@@ -153,7 +154,7 @@ void arch_arm_kprobe(struct kprobe *p)
{
struct swap_insn_args args = {.p = p, .arm_kprobe = 1};
- if (MACHINE_HAS_SEQ_INSN) {
+ if (cpu_has_seq_insn()) {
swap_instruction(&args);
text_poke_sync();
} else {
@@ -166,7 +167,7 @@ void arch_disarm_kprobe(struct kprobe *p)
{
struct swap_insn_args args = {.p = p, .arm_kprobe = 0};
- if (MACHINE_HAS_SEQ_INSN) {
+ if (cpu_has_seq_insn()) {
swap_instruction(&args);
text_poke_sync();
} else {
diff --git a/arch/s390/kernel/machine_kexec.c b/arch/s390/kernel/machine_kexec.c
index 8f681ccfb83a..baeb3dcfc1c8 100644
--- a/arch/s390/kernel/machine_kexec.c
+++ b/arch/s390/kernel/machine_kexec.c
@@ -13,7 +13,9 @@
#include <linux/reboot.h>
#include <linux/ftrace.h>
#include <linux/debug_locks.h>
+#include <linux/cpufeature.h>
#include <asm/guarded_storage.h>
+#include <asm/machine.h>
#include <asm/pfault.h>
#include <asm/cio.h>
#include <asm/fpu.h>
@@ -94,7 +96,7 @@ static noinline void __machine_kdump(void *image)
mcesa = __va(get_lowcore()->mcesad & MCESA_ORIGIN_MASK);
if (cpu_has_vx())
save_vx_regs((__vector128 *) mcesa->vector_save_area);
- if (MACHINE_HAS_GS) {
+ if (cpu_has_gs()) {
local_ctl_store(2, &cr2_old.reg);
cr2_new = cr2_old;
cr2_new.gse = 1;
@@ -178,7 +180,7 @@ void arch_kexec_unprotect_crashkres(void)
static int machine_kexec_prepare_kdump(void)
{
#ifdef CONFIG_CRASH_DUMP
- if (MACHINE_IS_VM)
+ if (machine_is_vm())
diag10_range(PFN_DOWN(crashk_res.start),
PFN_DOWN(crashk_res.end - crashk_res.start + 1));
return 0;
diff --git a/arch/s390/kernel/nmi.c b/arch/s390/kernel/nmi.c
index fbd218b6fc8e..11f33243a23f 100644
--- a/arch/s390/kernel/nmi.c
+++ b/arch/s390/kernel/nmi.c
@@ -9,6 +9,8 @@
*/
#include <linux/kernel_stat.h>
+#include <linux/utsname.h>
+#include <linux/cpufeature.h>
#include <linux/init.h>
#include <linux/errno.h>
#include <linux/entry-common.h>
@@ -20,7 +22,6 @@
#include <linux/module.h>
#include <linux/sched/signal.h>
#include <linux/kvm_host.h>
-#include <linux/export.h>
#include <asm/lowcore.h>
#include <asm/ctlreg.h>
#include <asm/fpu.h>
@@ -45,7 +46,7 @@ static DEFINE_PER_CPU(struct mcck_struct, cpu_mcck);
static inline int nmi_needs_mcesa(void)
{
- return cpu_has_vx() || MACHINE_HAS_GS;
+ return cpu_has_vx() || cpu_has_gs();
}
/*
@@ -61,7 +62,7 @@ void __init nmi_alloc_mcesa_early(u64 *mcesad)
if (!nmi_needs_mcesa())
return;
*mcesad = __pa(&boot_mcesa);
- if (MACHINE_HAS_GS)
+ if (cpu_has_gs())
*mcesad |= ilog2(MCESA_MAX_SIZE);
}
@@ -73,14 +74,14 @@ int nmi_alloc_mcesa(u64 *mcesad)
*mcesad = 0;
if (!nmi_needs_mcesa())
return 0;
- size = MACHINE_HAS_GS ? MCESA_MAX_SIZE : MCESA_MIN_SIZE;
+ size = cpu_has_gs() ? MCESA_MAX_SIZE : MCESA_MIN_SIZE;
origin = kmalloc(size, GFP_KERNEL);
if (!origin)
return -ENOMEM;
/* The pointer is stored with mcesa_bits ORed in */
kmemleak_not_leak(origin);
*mcesad = __pa(origin);
- if (MACHINE_HAS_GS)
+ if (cpu_has_gs())
*mcesad |= ilog2(MCESA_MAX_SIZE);
return 0;
}
@@ -115,18 +116,82 @@ static __always_inline char *u64_to_hex(char *dest, u64 val)
return dest;
}
+static notrace void nmi_print_info(void)
+{
+ struct lowcore *lc = get_lowcore();
+ char message[100];
+ char *ptr;
+ int i;
+
+ ptr = nmi_puts(message, "Unrecoverable machine check, code: ");
+ ptr = u64_to_hex(ptr, lc->mcck_interruption_code);
+ ptr = nmi_puts(ptr, "\n");
+ sclp_emergency_printk(message);
+
+ ptr = nmi_puts(message, init_utsname()->release);
+ ptr = nmi_puts(ptr, "\n");
+ sclp_emergency_printk(message);
+
+ ptr = nmi_puts(message, arch_hw_string);
+ ptr = nmi_puts(ptr, "\n");
+ sclp_emergency_printk(message);
+
+ ptr = nmi_puts(message, "PSW: ");
+ ptr = u64_to_hex(ptr, lc->mcck_old_psw.mask);
+ ptr = nmi_puts(ptr, " ");
+ ptr = u64_to_hex(ptr, lc->mcck_old_psw.addr);
+ ptr = nmi_puts(ptr, " PFX: ");
+ ptr = u64_to_hex(ptr, (u64)get_lowcore());
+ ptr = nmi_puts(ptr, "\n");
+ sclp_emergency_printk(message);
+
+ ptr = nmi_puts(message, "LBA: ");
+ ptr = u64_to_hex(ptr, lc->last_break_save_area);
+ ptr = nmi_puts(ptr, " EDC: ");
+ ptr = u64_to_hex(ptr, lc->external_damage_code);
+ ptr = nmi_puts(ptr, " FSA: ");
+ ptr = u64_to_hex(ptr, lc->failing_storage_address);
+ ptr = nmi_puts(ptr, "\n");
+ sclp_emergency_printk(message);
+
+ ptr = nmi_puts(message, "CRS:\n");
+ sclp_emergency_printk(message);
+ ptr = message;
+ for (i = 0; i < 16; i++) {
+ ptr = u64_to_hex(ptr, lc->cregs_save_area[i].val);
+ ptr = nmi_puts(ptr, " ");
+ if ((i + 1) % 4 == 0) {
+ ptr = nmi_puts(ptr, "\n");
+ sclp_emergency_printk(message);
+ ptr = message;
+ }
+ }
+
+ ptr = nmi_puts(message, "GPRS:\n");
+ sclp_emergency_printk(message);
+ ptr = message;
+ for (i = 0; i < 16; i++) {
+ ptr = u64_to_hex(ptr, lc->gpregs_save_area[i]);
+ ptr = nmi_puts(ptr, " ");
+ if ((i + 1) % 4 == 0) {
+ ptr = nmi_puts(ptr, "\n");
+ sclp_emergency_printk(message);
+ ptr = message;
+ }
+ }
+
+ ptr = nmi_puts(message, "System stopped\n");
+ sclp_emergency_printk(message);
+}
+
static notrace void s390_handle_damage(void)
{
struct lowcore *lc = get_lowcore();
union ctlreg0 cr0, cr0_new;
- char message[100];
psw_t psw_save;
- char *ptr;
smp_emergency_stop();
diag_amode31_ops.diag308_reset();
- ptr = nmi_puts(message, "System stopped due to unrecoverable machine check, code: 0x");
- u64_to_hex(ptr, lc->mcck_interruption_code);
/*
* Disable low address protection and make machine check new PSW a
@@ -140,7 +205,7 @@ static notrace void s390_handle_damage(void)
psw_bits(lc->mcck_new_psw).io = 0;
psw_bits(lc->mcck_new_psw).ext = 0;
psw_bits(lc->mcck_new_psw).wait = 1;
- sclp_emergency_printk(message);
+ nmi_print_info();
/*
* Restore machine check new PSW and control register 0 to original
diff --git a/arch/s390/kernel/perf_cpum_cf.c b/arch/s390/kernel/perf_cpum_cf.c
index 33205dd410e4..4d09954ebf49 100644
--- a/arch/s390/kernel/perf_cpum_cf.c
+++ b/arch/s390/kernel/perf_cpum_cf.c
@@ -14,7 +14,6 @@
#include <linux/percpu.h>
#include <linux/notifier.h>
#include <linux/init.h>
-#include <linux/export.h>
#include <linux/miscdevice.h>
#include <linux/perf_event.h>
@@ -442,7 +441,7 @@ static void cpum_cf_make_setsize(enum cpumf_ctr_set ctrset)
ctrset_size = 48;
else if (cpumf_ctr_info.csvn >= 3 && cpumf_ctr_info.csvn <= 5)
ctrset_size = 128;
- else if (cpumf_ctr_info.csvn == 6 || cpumf_ctr_info.csvn == 7)
+ else if (cpumf_ctr_info.csvn >= 6 && cpumf_ctr_info.csvn <= 8)
ctrset_size = 160;
break;
case CPUMF_CTR_SET_MT_DIAG:
@@ -858,18 +857,13 @@ static int cpumf_pmu_event_type(struct perf_event *event)
static int cpumf_pmu_event_init(struct perf_event *event)
{
unsigned int type = event->attr.type;
- int err;
+ int err = -ENOENT;
if (type == PERF_TYPE_HARDWARE || type == PERF_TYPE_RAW)
err = __hw_perf_event_init(event, type);
else if (event->pmu->type == type)
/* Registered as unknown PMU */
err = __hw_perf_event_init(event, cpumf_pmu_event_type(event));
- else
- return -ENOENT;
-
- if (unlikely(err) && event->destroy)
- event->destroy(event);
return err;
}
@@ -985,8 +979,6 @@ static int cfdiag_push_sample(struct perf_event *event,
}
overflow = perf_event_overflow(event, &data, &regs);
- if (overflow)
- event->pmu->stop(event, 0);
perf_event_update_userpage(event);
return overflow;
@@ -1819,8 +1811,6 @@ static int cfdiag_event_init(struct perf_event *event)
event->destroy = hw_perf_event_destroy;
err = cfdiag_event_init2(event);
- if (unlikely(err))
- event->destroy(event);
out:
return err;
}
diff --git a/arch/s390/kernel/perf_cpum_cf_events.c b/arch/s390/kernel/perf_cpum_cf_events.c
index e4a6bfc91080..7ace1f9e4ccf 100644
--- a/arch/s390/kernel/perf_cpum_cf_events.c
+++ b/arch/s390/kernel/perf_cpum_cf_events.c
@@ -237,7 +237,6 @@ CPUMF_EVENT_ATTR(cf_z14, TX_C_TABORT_NO_SPECIAL, 0x00f4);
CPUMF_EVENT_ATTR(cf_z14, TX_C_TABORT_SPECIAL, 0x00f5);
CPUMF_EVENT_ATTR(cf_z14, MT_DIAG_CYCLES_ONE_THR_ACTIVE, 0x01c0);
CPUMF_EVENT_ATTR(cf_z14, MT_DIAG_CYCLES_TWO_THR_ACTIVE, 0x01c1);
-
CPUMF_EVENT_ATTR(cf_z15, L1D_RO_EXCL_WRITES, 0x0080);
CPUMF_EVENT_ATTR(cf_z15, DTLB2_WRITES, 0x0081);
CPUMF_EVENT_ATTR(cf_z15, DTLB2_MISSES, 0x0082);
@@ -291,8 +290,8 @@ CPUMF_EVENT_ATTR(cf_z15, TX_C_TABORT_NO_SPECIAL, 0x00f4);
CPUMF_EVENT_ATTR(cf_z15, TX_C_TABORT_SPECIAL, 0x00f5);
CPUMF_EVENT_ATTR(cf_z15, DFLT_ACCESS, 0x00f7);
CPUMF_EVENT_ATTR(cf_z15, DFLT_CYCLES, 0x00fc);
-CPUMF_EVENT_ATTR(cf_z15, DFLT_CC, 0x00108);
-CPUMF_EVENT_ATTR(cf_z15, DFLT_CCFINISH, 0x00109);
+CPUMF_EVENT_ATTR(cf_z15, DFLT_CC, 0x0108);
+CPUMF_EVENT_ATTR(cf_z15, DFLT_CCFINISH, 0x0109);
CPUMF_EVENT_ATTR(cf_z15, MT_DIAG_CYCLES_ONE_THR_ACTIVE, 0x01c0);
CPUMF_EVENT_ATTR(cf_z15, MT_DIAG_CYCLES_TWO_THR_ACTIVE, 0x01c1);
CPUMF_EVENT_ATTR(cf_z16, L1D_RO_EXCL_WRITES, 0x0080);
@@ -365,6 +364,83 @@ CPUMF_EVENT_ATTR(cf_z16, NNPA_WAIT_LOCK, 0x010d);
CPUMF_EVENT_ATTR(cf_z16, NNPA_HOLD_LOCK, 0x010e);
CPUMF_EVENT_ATTR(cf_z16, MT_DIAG_CYCLES_ONE_THR_ACTIVE, 0x01c0);
CPUMF_EVENT_ATTR(cf_z16, MT_DIAG_CYCLES_TWO_THR_ACTIVE, 0x01c1);
+CPUMF_EVENT_ATTR(cf_z17, L1D_RO_EXCL_WRITES, 0x0080);
+CPUMF_EVENT_ATTR(cf_z17, DTLB2_WRITES, 0x0081);
+CPUMF_EVENT_ATTR(cf_z17, DTLB2_MISSES, 0x0082);
+CPUMF_EVENT_ATTR(cf_z17, CRSTE_1MB_WRITES, 0x0083);
+CPUMF_EVENT_ATTR(cf_z17, DTLB2_GPAGE_WRITES, 0x0084);
+CPUMF_EVENT_ATTR(cf_z17, ITLB2_WRITES, 0x0086);
+CPUMF_EVENT_ATTR(cf_z17, ITLB2_MISSES, 0x0087);
+CPUMF_EVENT_ATTR(cf_z17, TLB2_PTE_WRITES, 0x0089);
+CPUMF_EVENT_ATTR(cf_z17, TLB2_CRSTE_WRITES, 0x008a);
+CPUMF_EVENT_ATTR(cf_z17, TLB2_ENGINES_BUSY, 0x008b);
+CPUMF_EVENT_ATTR(cf_z17, TX_C_TEND, 0x008c);
+CPUMF_EVENT_ATTR(cf_z17, TX_NC_TEND, 0x008d);
+CPUMF_EVENT_ATTR(cf_z17, L1C_TLB2_MISSES, 0x008f);
+CPUMF_EVENT_ATTR(cf_z17, DCW_REQ, 0x0091);
+CPUMF_EVENT_ATTR(cf_z17, DCW_REQ_IV, 0x0092);
+CPUMF_EVENT_ATTR(cf_z17, DCW_REQ_CHIP_HIT, 0x0093);
+CPUMF_EVENT_ATTR(cf_z17, DCW_REQ_DRAWER_HIT, 0x0094);
+CPUMF_EVENT_ATTR(cf_z17, DCW_ON_CHIP, 0x0095);
+CPUMF_EVENT_ATTR(cf_z17, DCW_ON_CHIP_IV, 0x0096);
+CPUMF_EVENT_ATTR(cf_z17, DCW_ON_CHIP_CHIP_HIT, 0x0097);
+CPUMF_EVENT_ATTR(cf_z17, DCW_ON_CHIP_DRAWER_HIT, 0x0098);
+CPUMF_EVENT_ATTR(cf_z17, DCW_ON_MODULE, 0x0099);
+CPUMF_EVENT_ATTR(cf_z17, DCW_ON_DRAWER, 0x009a);
+CPUMF_EVENT_ATTR(cf_z17, DCW_OFF_DRAWER, 0x009b);
+CPUMF_EVENT_ATTR(cf_z17, DCW_ON_CHIP_MEMORY, 0x009c);
+CPUMF_EVENT_ATTR(cf_z17, DCW_ON_MODULE_MEMORY, 0x009d);
+CPUMF_EVENT_ATTR(cf_z17, DCW_ON_DRAWER_MEMORY, 0x009e);
+CPUMF_EVENT_ATTR(cf_z17, DCW_OFF_DRAWER_MEMORY, 0x009f);
+CPUMF_EVENT_ATTR(cf_z17, IDCW_ON_MODULE_IV, 0x00a0);
+CPUMF_EVENT_ATTR(cf_z17, IDCW_ON_MODULE_CHIP_HIT, 0x00a1);
+CPUMF_EVENT_ATTR(cf_z17, IDCW_ON_MODULE_DRAWER_HIT, 0x00a2);
+CPUMF_EVENT_ATTR(cf_z17, IDCW_ON_DRAWER_IV, 0x00a3);
+CPUMF_EVENT_ATTR(cf_z17, IDCW_ON_DRAWER_CHIP_HIT, 0x00a4);
+CPUMF_EVENT_ATTR(cf_z17, IDCW_ON_DRAWER_DRAWER_HIT, 0x00a5);
+CPUMF_EVENT_ATTR(cf_z17, IDCW_OFF_DRAWER_IV, 0x00a6);
+CPUMF_EVENT_ATTR(cf_z17, IDCW_OFF_DRAWER_CHIP_HIT, 0x00a7);
+CPUMF_EVENT_ATTR(cf_z17, IDCW_OFF_DRAWER_DRAWER_HIT, 0x00a8);
+CPUMF_EVENT_ATTR(cf_z17, ICW_REQ, 0x00a9);
+CPUMF_EVENT_ATTR(cf_z17, ICW_REQ_IV, 0x00aa);
+CPUMF_EVENT_ATTR(cf_z17, ICW_REQ_CHIP_HIT, 0x00ab);
+CPUMF_EVENT_ATTR(cf_z17, ICW_REQ_DRAWER_HIT, 0x00ac);
+CPUMF_EVENT_ATTR(cf_z17, ICW_ON_CHIP, 0x00ad);
+CPUMF_EVENT_ATTR(cf_z17, ICW_ON_CHIP_IV, 0x00ae);
+CPUMF_EVENT_ATTR(cf_z17, ICW_ON_CHIP_CHIP_HIT, 0x00af);
+CPUMF_EVENT_ATTR(cf_z17, ICW_ON_CHIP_DRAWER_HIT, 0x00b0);
+CPUMF_EVENT_ATTR(cf_z17, ICW_ON_MODULE, 0x00b1);
+CPUMF_EVENT_ATTR(cf_z17, ICW_ON_DRAWER, 0x00b2);
+CPUMF_EVENT_ATTR(cf_z17, ICW_OFF_DRAWER, 0x00b3);
+CPUMF_EVENT_ATTR(cf_z17, CYCLES_SAMETHRD, 0x00ca);
+CPUMF_EVENT_ATTR(cf_z17, CYCLES_DIFFTHRD, 0x00cb);
+CPUMF_EVENT_ATTR(cf_z17, INST_SAMETHRD, 0x00cc);
+CPUMF_EVENT_ATTR(cf_z17, INST_DIFFTHRD, 0x00cd);
+CPUMF_EVENT_ATTR(cf_z17, WRONG_BRANCH_PREDICTION, 0x00ce);
+CPUMF_EVENT_ATTR(cf_z17, VX_BCD_EXECUTION_SLOTS, 0x00e1);
+CPUMF_EVENT_ATTR(cf_z17, DECIMAL_INSTRUCTIONS, 0x00e2);
+CPUMF_EVENT_ATTR(cf_z17, LAST_HOST_TRANSLATIONS, 0x00e8);
+CPUMF_EVENT_ATTR(cf_z17, TX_NC_TABORT, 0x00f4);
+CPUMF_EVENT_ATTR(cf_z17, TX_C_TABORT_NO_SPECIAL, 0x00f5);
+CPUMF_EVENT_ATTR(cf_z17, TX_C_TABORT_SPECIAL, 0x00f6);
+CPUMF_EVENT_ATTR(cf_z17, DFLT_ACCESS, 0x00f8);
+CPUMF_EVENT_ATTR(cf_z17, DFLT_CYCLES, 0x00fd);
+CPUMF_EVENT_ATTR(cf_z17, SORTL, 0x0100);
+CPUMF_EVENT_ATTR(cf_z17, DFLT_CC, 0x0109);
+CPUMF_EVENT_ATTR(cf_z17, DFLT_CCFINISH, 0x010a);
+CPUMF_EVENT_ATTR(cf_z17, NNPA_INVOCATIONS, 0x010b);
+CPUMF_EVENT_ATTR(cf_z17, NNPA_COMPLETIONS, 0x010c);
+CPUMF_EVENT_ATTR(cf_z17, NNPA_WAIT_LOCK, 0x010d);
+CPUMF_EVENT_ATTR(cf_z17, NNPA_HOLD_LOCK, 0x010e);
+CPUMF_EVENT_ATTR(cf_z17, NNPA_INST_ONCHIP, 0x0110);
+CPUMF_EVENT_ATTR(cf_z17, NNPA_INST_OFFCHIP, 0x0111);
+CPUMF_EVENT_ATTR(cf_z17, NNPA_INST_DIFF, 0x0112);
+CPUMF_EVENT_ATTR(cf_z17, NNPA_4K_PREFETCH, 0x0114);
+CPUMF_EVENT_ATTR(cf_z17, NNPA_COMPL_LOCK, 0x0115);
+CPUMF_EVENT_ATTR(cf_z17, NNPA_RETRY_LOCK, 0x0116);
+CPUMF_EVENT_ATTR(cf_z17, NNPA_RETRY_LOCK_WITH_PLO, 0x0117);
+CPUMF_EVENT_ATTR(cf_z17, MT_DIAG_CYCLES_ONE_THR_ACTIVE, 0x01c0);
+CPUMF_EVENT_ATTR(cf_z17, MT_DIAG_CYCLES_TWO_THR_ACTIVE, 0x01c1);
static struct attribute *cpumcf_fvn1_pmu_event_attr[] __initdata = {
CPUMF_EVENT_PTR(cf_fvn1, CPU_CYCLES),
@@ -414,7 +490,7 @@ static struct attribute *cpumcf_svn_12345_pmu_event_attr[] __initdata = {
NULL,
};
-static struct attribute *cpumcf_svn_67_pmu_event_attr[] __initdata = {
+static struct attribute *cpumcf_svn_678_pmu_event_attr[] __initdata = {
CPUMF_EVENT_PTR(cf_svn_12345, PRNG_FUNCTIONS),
CPUMF_EVENT_PTR(cf_svn_12345, PRNG_CYCLES),
CPUMF_EVENT_PTR(cf_svn_12345, PRNG_BLOCKED_FUNCTIONS),
@@ -779,6 +855,87 @@ static struct attribute *cpumcf_z16_pmu_event_attr[] __initdata = {
NULL,
};
+static struct attribute *cpumcf_z17_pmu_event_attr[] __initdata = {
+ CPUMF_EVENT_PTR(cf_z17, L1D_RO_EXCL_WRITES),
+ CPUMF_EVENT_PTR(cf_z17, DTLB2_WRITES),
+ CPUMF_EVENT_PTR(cf_z17, DTLB2_MISSES),
+ CPUMF_EVENT_PTR(cf_z17, CRSTE_1MB_WRITES),
+ CPUMF_EVENT_PTR(cf_z17, DTLB2_GPAGE_WRITES),
+ CPUMF_EVENT_PTR(cf_z17, ITLB2_WRITES),
+ CPUMF_EVENT_PTR(cf_z17, ITLB2_MISSES),
+ CPUMF_EVENT_PTR(cf_z17, TLB2_PTE_WRITES),
+ CPUMF_EVENT_PTR(cf_z17, TLB2_CRSTE_WRITES),
+ CPUMF_EVENT_PTR(cf_z17, TLB2_ENGINES_BUSY),
+ CPUMF_EVENT_PTR(cf_z17, TX_C_TEND),
+ CPUMF_EVENT_PTR(cf_z17, TX_NC_TEND),
+ CPUMF_EVENT_PTR(cf_z17, L1C_TLB2_MISSES),
+ CPUMF_EVENT_PTR(cf_z17, DCW_REQ),
+ CPUMF_EVENT_PTR(cf_z17, DCW_REQ_IV),
+ CPUMF_EVENT_PTR(cf_z17, DCW_REQ_CHIP_HIT),
+ CPUMF_EVENT_PTR(cf_z17, DCW_REQ_DRAWER_HIT),
+ CPUMF_EVENT_PTR(cf_z17, DCW_ON_CHIP),
+ CPUMF_EVENT_PTR(cf_z17, DCW_ON_CHIP_IV),
+ CPUMF_EVENT_PTR(cf_z17, DCW_ON_CHIP_CHIP_HIT),
+ CPUMF_EVENT_PTR(cf_z17, DCW_ON_CHIP_DRAWER_HIT),
+ CPUMF_EVENT_PTR(cf_z17, DCW_ON_MODULE),
+ CPUMF_EVENT_PTR(cf_z17, DCW_ON_DRAWER),
+ CPUMF_EVENT_PTR(cf_z17, DCW_OFF_DRAWER),
+ CPUMF_EVENT_PTR(cf_z17, DCW_ON_CHIP_MEMORY),
+ CPUMF_EVENT_PTR(cf_z17, DCW_ON_MODULE_MEMORY),
+ CPUMF_EVENT_PTR(cf_z17, DCW_ON_DRAWER_MEMORY),
+ CPUMF_EVENT_PTR(cf_z17, DCW_OFF_DRAWER_MEMORY),
+ CPUMF_EVENT_PTR(cf_z17, IDCW_ON_MODULE_IV),
+ CPUMF_EVENT_PTR(cf_z17, IDCW_ON_MODULE_CHIP_HIT),
+ CPUMF_EVENT_PTR(cf_z17, IDCW_ON_MODULE_DRAWER_HIT),
+ CPUMF_EVENT_PTR(cf_z17, IDCW_ON_DRAWER_IV),
+ CPUMF_EVENT_PTR(cf_z17, IDCW_ON_DRAWER_CHIP_HIT),
+ CPUMF_EVENT_PTR(cf_z17, IDCW_ON_DRAWER_DRAWER_HIT),
+ CPUMF_EVENT_PTR(cf_z17, IDCW_OFF_DRAWER_IV),
+ CPUMF_EVENT_PTR(cf_z17, IDCW_OFF_DRAWER_CHIP_HIT),
+ CPUMF_EVENT_PTR(cf_z17, IDCW_OFF_DRAWER_DRAWER_HIT),
+ CPUMF_EVENT_PTR(cf_z17, ICW_REQ),
+ CPUMF_EVENT_PTR(cf_z17, ICW_REQ_IV),
+ CPUMF_EVENT_PTR(cf_z17, ICW_REQ_CHIP_HIT),
+ CPUMF_EVENT_PTR(cf_z17, ICW_REQ_DRAWER_HIT),
+ CPUMF_EVENT_PTR(cf_z17, ICW_ON_CHIP),
+ CPUMF_EVENT_PTR(cf_z17, ICW_ON_CHIP_IV),
+ CPUMF_EVENT_PTR(cf_z17, ICW_ON_CHIP_CHIP_HIT),
+ CPUMF_EVENT_PTR(cf_z17, ICW_ON_CHIP_DRAWER_HIT),
+ CPUMF_EVENT_PTR(cf_z17, ICW_ON_MODULE),
+ CPUMF_EVENT_PTR(cf_z17, ICW_ON_DRAWER),
+ CPUMF_EVENT_PTR(cf_z17, ICW_OFF_DRAWER),
+ CPUMF_EVENT_PTR(cf_z17, CYCLES_SAMETHRD),
+ CPUMF_EVENT_PTR(cf_z17, CYCLES_DIFFTHRD),
+ CPUMF_EVENT_PTR(cf_z17, INST_SAMETHRD),
+ CPUMF_EVENT_PTR(cf_z17, INST_DIFFTHRD),
+ CPUMF_EVENT_PTR(cf_z17, WRONG_BRANCH_PREDICTION),
+ CPUMF_EVENT_PTR(cf_z17, VX_BCD_EXECUTION_SLOTS),
+ CPUMF_EVENT_PTR(cf_z17, DECIMAL_INSTRUCTIONS),
+ CPUMF_EVENT_PTR(cf_z17, LAST_HOST_TRANSLATIONS),
+ CPUMF_EVENT_PTR(cf_z17, TX_NC_TABORT),
+ CPUMF_EVENT_PTR(cf_z17, TX_C_TABORT_NO_SPECIAL),
+ CPUMF_EVENT_PTR(cf_z17, TX_C_TABORT_SPECIAL),
+ CPUMF_EVENT_PTR(cf_z17, DFLT_ACCESS),
+ CPUMF_EVENT_PTR(cf_z17, DFLT_CYCLES),
+ CPUMF_EVENT_PTR(cf_z17, SORTL),
+ CPUMF_EVENT_PTR(cf_z17, DFLT_CC),
+ CPUMF_EVENT_PTR(cf_z17, DFLT_CCFINISH),
+ CPUMF_EVENT_PTR(cf_z17, NNPA_INVOCATIONS),
+ CPUMF_EVENT_PTR(cf_z17, NNPA_COMPLETIONS),
+ CPUMF_EVENT_PTR(cf_z17, NNPA_WAIT_LOCK),
+ CPUMF_EVENT_PTR(cf_z17, NNPA_HOLD_LOCK),
+ CPUMF_EVENT_PTR(cf_z17, NNPA_INST_ONCHIP),
+ CPUMF_EVENT_PTR(cf_z17, NNPA_INST_OFFCHIP),
+ CPUMF_EVENT_PTR(cf_z17, NNPA_INST_DIFF),
+ CPUMF_EVENT_PTR(cf_z17, NNPA_4K_PREFETCH),
+ CPUMF_EVENT_PTR(cf_z17, NNPA_COMPL_LOCK),
+ CPUMF_EVENT_PTR(cf_z17, NNPA_RETRY_LOCK),
+ CPUMF_EVENT_PTR(cf_z17, NNPA_RETRY_LOCK_WITH_PLO),
+ CPUMF_EVENT_PTR(cf_z17, MT_DIAG_CYCLES_ONE_THR_ACTIVE),
+ CPUMF_EVENT_PTR(cf_z17, MT_DIAG_CYCLES_TWO_THR_ACTIVE),
+ NULL,
+};
+
/* END: CPUM_CF COUNTER DEFINITIONS ===================================== */
static struct attribute_group cpumcf_pmu_events_group = {
@@ -859,7 +1016,7 @@ __init const struct attribute_group **cpumf_cf_event_group(void)
if (ci.csvn >= 1 && ci.csvn <= 5)
csvn = cpumcf_svn_12345_pmu_event_attr;
else if (ci.csvn >= 6)
- csvn = cpumcf_svn_67_pmu_event_attr;
+ csvn = cpumcf_svn_678_pmu_event_attr;
/* Determine model-specific counter set(s) */
get_cpu_id(&cpu_id);
@@ -892,6 +1049,10 @@ __init const struct attribute_group **cpumf_cf_event_group(void)
case 0x3932:
model = cpumcf_z16_pmu_event_attr;
break;
+ case 0x9175:
+ case 0x9176:
+ model = cpumcf_z17_pmu_event_attr;
+ break;
default:
model = none;
break;
diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c
index 5f60248cb468..f432869f8921 100644
--- a/arch/s390/kernel/perf_cpum_sf.c
+++ b/arch/s390/kernel/perf_cpum_sf.c
@@ -14,7 +14,6 @@
#include <linux/percpu.h>
#include <linux/pid.h>
#include <linux/notifier.h>
-#include <linux/export.h>
#include <linux/slab.h>
#include <linux/mm.h>
#include <linux/moduleparam.h>
@@ -885,9 +884,6 @@ static int cpumsf_pmu_event_init(struct perf_event *event)
event->attr.exclude_idle = 0;
err = __hw_perf_event_init(event);
- if (unlikely(err))
- if (event->destroy)
- event->destroy(event);
return err;
}
@@ -1075,10 +1071,7 @@ static int perf_push_sample(struct perf_event *event,
overflow = 0;
if (perf_event_exclude(event, &regs, sde_regs))
goto out;
- if (perf_event_overflow(event, &data, &regs)) {
- overflow = 1;
- event->pmu->stop(event, 0);
- }
+ overflow = perf_event_overflow(event, &data, &regs);
perf_event_update_userpage(event);
out:
return overflow;
diff --git a/arch/s390/kernel/perf_event.c b/arch/s390/kernel/perf_event.c
index 2b9611c4718e..91b8716c883a 100644
--- a/arch/s390/kernel/perf_event.c
+++ b/arch/s390/kernel/perf_event.c
@@ -12,7 +12,6 @@
#include <linux/perf_event.h>
#include <linux/kvm_host.h>
#include <linux/percpu.h>
-#include <linux/export.h>
#include <linux/seq_file.h>
#include <linux/spinlock.h>
#include <linux/uaccess.h>
diff --git a/arch/s390/kernel/perf_pai_crypto.c b/arch/s390/kernel/perf_pai_crypto.c
index 10725f5a6f0f..f373a1009c45 100644
--- a/arch/s390/kernel/perf_pai_crypto.c
+++ b/arch/s390/kernel/perf_pai_crypto.c
@@ -13,7 +13,6 @@
#include <linux/percpu.h>
#include <linux/notifier.h>
#include <linux/init.h>
-#include <linux/export.h>
#include <linux/io.h>
#include <linux/perf_event.h>
#include <asm/ctlreg.h>
@@ -518,7 +517,8 @@ static void paicrypt_have_samples(void)
/* Called on schedule-in and schedule-out. No access to event structure,
* but for sampling only event CRYPTO_ALL is allowed.
*/
-static void paicrypt_sched_task(struct perf_event_pmu_context *pmu_ctx, bool sched_in)
+static void paicrypt_sched_task(struct perf_event_pmu_context *pmu_ctx,
+ struct task_struct *task, bool sched_in)
{
/* We started with a clean page on event installation. So read out
* results on schedule_out and if page was dirty, save old values.
@@ -695,7 +695,7 @@ static const char * const paicrypt_ctrnames[] = {
[111] = "PCC_COMPUTE_LAST_BLOCK_CMAC_USING_AES_256",
[112] = "PCC_COMPUTE_LAST_BLOCK_CMAC_USING_ENCRYPTED_AES_128",
[113] = "PCC_COMPUTE_LAST_BLOCK_CMAC_USING_ENCRYPTED_AES_192",
- [114] = "PCC_COMPUTE_LAST_BLOCK_CMAC_USING_ENCRYPTED_AES_256A",
+ [114] = "PCC_COMPUTE_LAST_BLOCK_CMAC_USING_ENCRYPTED_AES_256",
[115] = "PCC_COMPUTE_XTS_PARAMETER_USING_AES_128",
[116] = "PCC_COMPUTE_XTS_PARAMETER_USING_AES_256",
[117] = "PCC_COMPUTE_XTS_PARAMETER_USING_ENCRYPTED_AES_128",
diff --git a/arch/s390/kernel/perf_pai_ext.c b/arch/s390/kernel/perf_pai_ext.c
index a8f0bad99cf0..d827473e7f87 100644
--- a/arch/s390/kernel/perf_pai_ext.c
+++ b/arch/s390/kernel/perf_pai_ext.c
@@ -14,7 +14,6 @@
#include <linux/percpu.h>
#include <linux/notifier.h>
#include <linux/init.h>
-#include <linux/export.h>
#include <linux/io.h>
#include <linux/perf_event.h>
#include <asm/ctlreg.h>
@@ -542,7 +541,8 @@ static void paiext_have_samples(void)
/* Called on schedule-in and schedule-out. No access to event structure,
* but for sampling only event NNPA_ALL is allowed.
*/
-static void paiext_sched_task(struct perf_event_pmu_context *pmu_ctx, bool sched_in)
+static void paiext_sched_task(struct perf_event_pmu_context *pmu_ctx,
+ struct task_struct *task, bool sched_in)
{
/* We started with a clean page on event installation. So read out
* results on schedule_out and if page was dirty, save old values.
diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c
index 9637aee43c40..f55f09cda6f8 100644
--- a/arch/s390/kernel/process.c
+++ b/arch/s390/kernel/process.c
@@ -27,7 +27,6 @@
#include <linux/compat.h>
#include <linux/kprobes.h>
#include <linux/random.h>
-#include <linux/export.h>
#include <linux/init_task.h>
#include <linux/entry-common.h>
#include <linux/io.h>
diff --git a/arch/s390/kernel/processor.c b/arch/s390/kernel/processor.c
index 5ce9a795a0fe..11f70c1e2797 100644
--- a/arch/s390/kernel/processor.c
+++ b/arch/s390/kernel/processor.c
@@ -8,6 +8,7 @@
#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
#include <linux/stop_machine.h>
+#include <linux/cpufeature.h>
#include <linux/bitops.h>
#include <linux/kernel.h>
#include <linux/random.h>
@@ -19,6 +20,7 @@
#include <linux/cpu.h>
#include <linux/smp.h>
#include <asm/text-patching.h>
+#include <asm/machine.h>
#include <asm/diag.h>
#include <asm/facility.h>
#include <asm/elf.h>
@@ -72,7 +74,7 @@ void notrace stop_machine_yield(const struct cpumask *cpumask)
this_cpu = smp_processor_id();
if (__this_cpu_inc_return(cpu_relax_retry) >= spin_retry) {
__this_cpu_write(cpu_relax_retry, 0);
- cpu = cpumask_next_wrap(this_cpu, cpumask, this_cpu, false);
+ cpu = cpumask_next_wrap(this_cpu, cpumask);
if (cpu >= nr_cpu_ids)
return;
if (arch_vcpu_is_preempted(cpu))
@@ -209,14 +211,14 @@ static int __init setup_hwcaps(void)
elf_hwcap |= HWCAP_DFP;
/* huge page support */
- if (MACHINE_HAS_EDAT1)
+ if (cpu_has_edat1())
elf_hwcap |= HWCAP_HPAGE;
/* 64-bit register support for 31-bit processes */
elf_hwcap |= HWCAP_HIGH_GPRS;
/* transactional execution */
- if (MACHINE_HAS_TE)
+ if (machine_has_tx())
elf_hwcap |= HWCAP_TE;
/* vector */
@@ -244,10 +246,10 @@ static int __init setup_hwcaps(void)
elf_hwcap |= HWCAP_NNPA;
/* guarded storage */
- if (MACHINE_HAS_GS)
+ if (cpu_has_gs())
elf_hwcap |= HWCAP_GS;
- if (MACHINE_HAS_PCI_MIO)
+ if (test_machine_feature(MFEATURE_PCI_MIO))
elf_hwcap |= HWCAP_PCI_MIO;
/* virtualization support */
@@ -266,31 +268,35 @@ static int __init setup_elf_platform(void)
add_device_randomness(&cpu_id, sizeof(cpu_id));
switch (cpu_id.machine) {
default: /* Use "z10" as default. */
- strcpy(elf_platform, "z10");
+ strscpy(elf_platform, "z10");
break;
case 0x2817:
case 0x2818:
- strcpy(elf_platform, "z196");
+ strscpy(elf_platform, "z196");
break;
case 0x2827:
case 0x2828:
- strcpy(elf_platform, "zEC12");
+ strscpy(elf_platform, "zEC12");
break;
case 0x2964:
case 0x2965:
- strcpy(elf_platform, "z13");
+ strscpy(elf_platform, "z13");
break;
case 0x3906:
case 0x3907:
- strcpy(elf_platform, "z14");
+ strscpy(elf_platform, "z14");
break;
case 0x8561:
case 0x8562:
- strcpy(elf_platform, "z15");
+ strscpy(elf_platform, "z15");
break;
case 0x3931:
case 0x3932:
- strcpy(elf_platform, "z16");
+ strscpy(elf_platform, "z16");
+ break;
+ case 0x9175:
+ case 0x9176:
+ strscpy(elf_platform, "z17");
break;
}
return 0;
diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c
index 1cfed8b710b8..494216c4b4f3 100644
--- a/arch/s390/kernel/ptrace.c
+++ b/arch/s390/kernel/ptrace.c
@@ -7,10 +7,10 @@
* Martin Schwidefsky (schwidefsky@de.ibm.com)
*/
-#include "asm/ptrace.h"
#include <linux/kernel.h>
#include <linux/sched.h>
#include <linux/sched/task_stack.h>
+#include <linux/cpufeature.h>
#include <linux/mm.h>
#include <linux/smp.h>
#include <linux/errno.h>
@@ -31,6 +31,9 @@
#include <asm/unistd.h>
#include <asm/runtime_instr.h>
#include <asm/facility.h>
+#include <asm/machine.h>
+#include <asm/ptrace.h>
+#include <asm/rwonce.h>
#include <asm/fpu.h>
#include "entry.h"
@@ -60,7 +63,7 @@ void update_cr_regs(struct task_struct *task)
cr0_new = cr0_old;
cr2_new = cr2_old;
/* Take care of the enable/disable of transactional execution. */
- if (MACHINE_HAS_TE) {
+ if (machine_has_tx()) {
/* Set or clear transaction execution TXC bit 8. */
cr0_new.tcx = 1;
if (task->thread.per_flags & PER_FLAG_NO_TE)
@@ -75,7 +78,7 @@ void update_cr_regs(struct task_struct *task)
}
}
/* Take care of enable/disable of guarded storage. */
- if (MACHINE_HAS_GS) {
+ if (cpu_has_gs()) {
cr2_new.gse = 0;
if (task->thread.gs_cb)
cr2_new.gse = 1;
@@ -470,18 +473,18 @@ long arch_ptrace(struct task_struct *child, long request,
case PTRACE_GET_LAST_BREAK:
return put_user(child->thread.last_break, (unsigned long __user *)data);
case PTRACE_ENABLE_TE:
- if (!MACHINE_HAS_TE)
+ if (!machine_has_tx())
return -EIO;
child->thread.per_flags &= ~PER_FLAG_NO_TE;
return 0;
case PTRACE_DISABLE_TE:
- if (!MACHINE_HAS_TE)
+ if (!machine_has_tx())
return -EIO;
child->thread.per_flags |= PER_FLAG_NO_TE;
child->thread.per_flags &= ~PER_FLAG_TE_ABORT_RAND;
return 0;
case PTRACE_TE_ABORT_RAND:
- if (!MACHINE_HAS_TE || (child->thread.per_flags & PER_FLAG_NO_TE))
+ if (!machine_has_tx() || (child->thread.per_flags & PER_FLAG_NO_TE))
return -EIO;
switch (data) {
case 0UL:
@@ -1033,7 +1036,7 @@ static int s390_gs_cb_get(struct task_struct *target,
{
struct gs_cb *data = target->thread.gs_cb;
- if (!MACHINE_HAS_GS)
+ if (!cpu_has_gs())
return -ENODEV;
if (!data)
return -ENODATA;
@@ -1050,7 +1053,7 @@ static int s390_gs_cb_set(struct task_struct *target,
struct gs_cb gs_cb = { }, *data = NULL;
int rc;
- if (!MACHINE_HAS_GS)
+ if (!cpu_has_gs())
return -ENODEV;
if (!target->thread.gs_cb) {
data = kzalloc(sizeof(*data), GFP_KERNEL);
@@ -1087,7 +1090,7 @@ static int s390_gs_bc_get(struct task_struct *target,
{
struct gs_cb *data = target->thread.gs_bc_cb;
- if (!MACHINE_HAS_GS)
+ if (!cpu_has_gs())
return -ENODEV;
if (!data)
return -ENODATA;
@@ -1101,7 +1104,7 @@ static int s390_gs_bc_set(struct task_struct *target,
{
struct gs_cb *data = target->thread.gs_bc_cb;
- if (!MACHINE_HAS_GS)
+ if (!cpu_has_gs())
return -ENODEV;
if (!data) {
data = kzalloc(sizeof(*data), GFP_KERNEL);
@@ -1206,7 +1209,7 @@ static int s390_runtime_instr_set(struct task_struct *target,
static const struct user_regset s390_regsets[] = {
{
- .core_note_type = NT_PRSTATUS,
+ USER_REGSET_NOTE_TYPE(PRSTATUS),
.n = sizeof(s390_regs) / sizeof(long),
.size = sizeof(long),
.align = sizeof(long),
@@ -1214,7 +1217,7 @@ static const struct user_regset s390_regsets[] = {
.set = s390_regs_set,
},
{
- .core_note_type = NT_PRFPREG,
+ USER_REGSET_NOTE_TYPE(PRFPREG),
.n = sizeof(s390_fp_regs) / sizeof(long),
.size = sizeof(long),
.align = sizeof(long),
@@ -1222,7 +1225,7 @@ static const struct user_regset s390_regsets[] = {
.set = s390_fpregs_set,
},
{
- .core_note_type = NT_S390_SYSTEM_CALL,
+ USER_REGSET_NOTE_TYPE(S390_SYSTEM_CALL),
.n = 1,
.size = sizeof(unsigned int),
.align = sizeof(unsigned int),
@@ -1230,7 +1233,7 @@ static const struct user_regset s390_regsets[] = {
.set = s390_system_call_set,
},
{
- .core_note_type = NT_S390_LAST_BREAK,
+ USER_REGSET_NOTE_TYPE(S390_LAST_BREAK),
.n = 1,
.size = sizeof(long),
.align = sizeof(long),
@@ -1238,7 +1241,7 @@ static const struct user_regset s390_regsets[] = {
.set = s390_last_break_set,
},
{
- .core_note_type = NT_S390_TDB,
+ USER_REGSET_NOTE_TYPE(S390_TDB),
.n = 1,
.size = 256,
.align = 1,
@@ -1246,7 +1249,7 @@ static const struct user_regset s390_regsets[] = {
.set = s390_tdb_set,
},
{
- .core_note_type = NT_S390_VXRS_LOW,
+ USER_REGSET_NOTE_TYPE(S390_VXRS_LOW),
.n = __NUM_VXRS_LOW,
.size = sizeof(__u64),
.align = sizeof(__u64),
@@ -1254,7 +1257,7 @@ static const struct user_regset s390_regsets[] = {
.set = s390_vxrs_low_set,
},
{
- .core_note_type = NT_S390_VXRS_HIGH,
+ USER_REGSET_NOTE_TYPE(S390_VXRS_HIGH),
.n = __NUM_VXRS_HIGH,
.size = sizeof(__vector128),
.align = sizeof(__vector128),
@@ -1262,7 +1265,7 @@ static const struct user_regset s390_regsets[] = {
.set = s390_vxrs_high_set,
},
{
- .core_note_type = NT_S390_GS_CB,
+ USER_REGSET_NOTE_TYPE(S390_GS_CB),
.n = sizeof(struct gs_cb) / sizeof(__u64),
.size = sizeof(__u64),
.align = sizeof(__u64),
@@ -1270,7 +1273,7 @@ static const struct user_regset s390_regsets[] = {
.set = s390_gs_cb_set,
},
{
- .core_note_type = NT_S390_GS_BC,
+ USER_REGSET_NOTE_TYPE(S390_GS_BC),
.n = sizeof(struct gs_cb) / sizeof(__u64),
.size = sizeof(__u64),
.align = sizeof(__u64),
@@ -1278,7 +1281,7 @@ static const struct user_regset s390_regsets[] = {
.set = s390_gs_bc_set,
},
{
- .core_note_type = NT_S390_RI_CB,
+ USER_REGSET_NOTE_TYPE(S390_RI_CB),
.n = sizeof(struct runtime_instr_cb) / sizeof(__u64),
.size = sizeof(__u64),
.align = sizeof(__u64),
@@ -1410,7 +1413,7 @@ static int s390_compat_last_break_set(struct task_struct *target,
static const struct user_regset s390_compat_regsets[] = {
{
- .core_note_type = NT_PRSTATUS,
+ USER_REGSET_NOTE_TYPE(PRSTATUS),
.n = sizeof(s390_compat_regs) / sizeof(compat_long_t),
.size = sizeof(compat_long_t),
.align = sizeof(compat_long_t),
@@ -1418,7 +1421,7 @@ static const struct user_regset s390_compat_regsets[] = {
.set = s390_compat_regs_set,
},
{
- .core_note_type = NT_PRFPREG,
+ USER_REGSET_NOTE_TYPE(PRFPREG),
.n = sizeof(s390_fp_regs) / sizeof(compat_long_t),
.size = sizeof(compat_long_t),
.align = sizeof(compat_long_t),
@@ -1426,7 +1429,7 @@ static const struct user_regset s390_compat_regsets[] = {
.set = s390_fpregs_set,
},
{
- .core_note_type = NT_S390_SYSTEM_CALL,
+ USER_REGSET_NOTE_TYPE(S390_SYSTEM_CALL),
.n = 1,
.size = sizeof(compat_uint_t),
.align = sizeof(compat_uint_t),
@@ -1434,7 +1437,7 @@ static const struct user_regset s390_compat_regsets[] = {
.set = s390_system_call_set,
},
{
- .core_note_type = NT_S390_LAST_BREAK,
+ USER_REGSET_NOTE_TYPE(S390_LAST_BREAK),
.n = 1,
.size = sizeof(long),
.align = sizeof(long),
@@ -1442,7 +1445,7 @@ static const struct user_regset s390_compat_regsets[] = {
.set = s390_compat_last_break_set,
},
{
- .core_note_type = NT_S390_TDB,
+ USER_REGSET_NOTE_TYPE(S390_TDB),
.n = 1,
.size = 256,
.align = 1,
@@ -1450,7 +1453,7 @@ static const struct user_regset s390_compat_regsets[] = {
.set = s390_tdb_set,
},
{
- .core_note_type = NT_S390_VXRS_LOW,
+ USER_REGSET_NOTE_TYPE(S390_VXRS_LOW),
.n = __NUM_VXRS_LOW,
.size = sizeof(__u64),
.align = sizeof(__u64),
@@ -1458,7 +1461,7 @@ static const struct user_regset s390_compat_regsets[] = {
.set = s390_vxrs_low_set,
},
{
- .core_note_type = NT_S390_VXRS_HIGH,
+ USER_REGSET_NOTE_TYPE(S390_VXRS_HIGH),
.n = __NUM_VXRS_HIGH,
.size = sizeof(__vector128),
.align = sizeof(__vector128),
@@ -1466,7 +1469,7 @@ static const struct user_regset s390_compat_regsets[] = {
.set = s390_vxrs_high_set,
},
{
- .core_note_type = NT_S390_HIGH_GPRS,
+ USER_REGSET_NOTE_TYPE(S390_HIGH_GPRS),
.n = sizeof(s390_compat_regs_high) / sizeof(compat_long_t),
.size = sizeof(compat_long_t),
.align = sizeof(compat_long_t),
@@ -1474,7 +1477,7 @@ static const struct user_regset s390_compat_regsets[] = {
.set = s390_compat_regs_high_set,
},
{
- .core_note_type = NT_S390_GS_CB,
+ USER_REGSET_NOTE_TYPE(S390_GS_CB),
.n = sizeof(struct gs_cb) / sizeof(__u64),
.size = sizeof(__u64),
.align = sizeof(__u64),
@@ -1482,7 +1485,7 @@ static const struct user_regset s390_compat_regsets[] = {
.set = s390_gs_cb_set,
},
{
- .core_note_type = NT_S390_GS_BC,
+ USER_REGSET_NOTE_TYPE(S390_GS_BC),
.n = sizeof(struct gs_cb) / sizeof(__u64),
.size = sizeof(__u64),
.align = sizeof(__u64),
@@ -1490,7 +1493,7 @@ static const struct user_regset s390_compat_regsets[] = {
.set = s390_gs_bc_set,
},
{
- .core_note_type = NT_S390_RI_CB,
+ USER_REGSET_NOTE_TYPE(S390_RI_CB),
.n = sizeof(struct runtime_instr_cb) / sizeof(__u64),
.size = sizeof(__u64),
.align = sizeof(__u64),
@@ -1521,13 +1524,6 @@ static const char *gpr_names[NUM_GPRS] = {
"r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15",
};
-unsigned long regs_get_register(struct pt_regs *regs, unsigned int offset)
-{
- if (offset >= NUM_GPRS)
- return 0;
- return regs->gprs[offset];
-}
-
int regs_query_register_offset(const char *name)
{
unsigned long offset;
@@ -1547,29 +1543,3 @@ const char *regs_query_register_name(unsigned int offset)
return NULL;
return gpr_names[offset];
}
-
-static int regs_within_kernel_stack(struct pt_regs *regs, unsigned long addr)
-{
- unsigned long ksp = kernel_stack_pointer(regs);
-
- return (addr & ~(THREAD_SIZE - 1)) == (ksp & ~(THREAD_SIZE - 1));
-}
-
-/**
- * regs_get_kernel_stack_nth() - get Nth entry of the stack
- * @regs:pt_regs which contains kernel stack pointer.
- * @n:stack entry number.
- *
- * regs_get_kernel_stack_nth() returns @n th entry of the kernel stack which
- * is specifined by @regs. If the @n th entry is NOT in the kernel stack,
- * this returns 0.
- */
-unsigned long regs_get_kernel_stack_nth(struct pt_regs *regs, unsigned int n)
-{
- unsigned long addr;
-
- addr = kernel_stack_pointer(regs) + n * sizeof(long);
- if (!regs_within_kernel_stack(regs, addr))
- return 0;
- return *(unsigned long *)addr;
-}
diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
index 1298f0860733..7b529868789f 100644
--- a/arch/s390/kernel/setup.c
+++ b/arch/s390/kernel/setup.c
@@ -54,6 +54,7 @@
#include <asm/archrandom.h>
#include <asm/boot_data.h>
+#include <asm/machine.h>
#include <asm/ipl.h>
#include <asm/facility.h>
#include <asm/smp.h>
@@ -157,6 +158,12 @@ u64 __bootdata_preserved(stfle_fac_list[16]);
EXPORT_SYMBOL(stfle_fac_list);
struct oldmem_data __bootdata_preserved(oldmem_data);
+char __bootdata(boot_rb)[PAGE_SIZE * 2];
+bool __bootdata(boot_earlyprintk);
+size_t __bootdata(boot_rb_off);
+char __bootdata(bootdebug_filter)[128];
+bool __bootdata(bootdebug);
+
unsigned long __bootdata_preserved(VMALLOC_START);
EXPORT_SYMBOL(VMALLOC_START);
@@ -174,8 +181,6 @@ unsigned long __bootdata_preserved(MODULES_END);
struct lowcore *lowcore_ptr[NR_CPUS];
EXPORT_SYMBOL(lowcore_ptr);
-DEFINE_STATIC_KEY_FALSE(cpu_has_bear);
-
/*
* The Write Back bit position in the physaddr is given by the SLPC PCI.
* Leaving the mask zero always uses write through which is safe
@@ -245,7 +250,7 @@ static void __init conmode_default(void)
char query_buffer[1024];
char *ptr;
- if (MACHINE_IS_VM) {
+ if (machine_is_vm()) {
cpcmd("QUERY CONSOLE", query_buffer, 1024, NULL);
console_devno = simple_strtoul(query_buffer + 5, NULL, 16);
ptr = strstr(query_buffer, "SUBCHANNEL =");
@@ -283,7 +288,7 @@ static void __init conmode_default(void)
SET_CONSOLE_SCLP;
#endif
}
- } else if (MACHINE_IS_KVM) {
+ } else if (machine_is_kvm()) {
if (sclp.has_vt220 && IS_ENABLED(CONFIG_SCLP_VT220_CONSOLE))
SET_CONSOLE_VT220;
else if (sclp.has_linemode && IS_ENABLED(CONFIG_SCLP_CONSOLE))
@@ -409,7 +414,6 @@ static void __init setup_lowcore(void)
lc->clock_comparator = clock_comparator_max;
lc->current_task = (unsigned long)&init_task;
lc->lpp = LPP_MAGIC;
- lc->machine_flags = get_lowcore()->machine_flags;
lc->preempt_count = get_lowcore()->preempt_count;
nmi_alloc_mcesa_early(&lc->mcesad);
lc->sys_enter_timer = get_lowcore()->sys_enter_timer;
@@ -601,7 +605,7 @@ static void __init reserve_crashkernel(void)
int rc;
rc = parse_crashkernel(boot_command_line, ident_map_size,
- &crash_size, &crash_base, NULL, NULL);
+ &crash_size, &crash_base, NULL, NULL, NULL);
crash_base = ALIGN(crash_base, KEXEC_CRASH_MEM_ALIGN);
crash_size = ALIGN(crash_size, KEXEC_CRASH_MEM_ALIGN);
@@ -646,7 +650,7 @@ static void __init reserve_crashkernel(void)
return;
}
- if (!oldmem_data.start && MACHINE_IS_VM)
+ if (!oldmem_data.start && machine_is_vm())
diag10_range(PFN_DOWN(crash_base), PFN_DOWN(crash_size));
crashk_res.start = crash_base;
crashk_res.end = crash_base + crash_size - 1;
@@ -686,7 +690,7 @@ static void __init reserve_physmem_info(void)
{
unsigned long addr, size;
- if (get_physmem_reserved(RR_MEM_DETECT_EXTENDED, &addr, &size))
+ if (get_physmem_reserved(RR_MEM_DETECT_EXT, &addr, &size))
memblock_reserve(addr, size);
}
@@ -694,7 +698,7 @@ static void __init free_physmem_info(void)
{
unsigned long addr, size;
- if (get_physmem_reserved(RR_MEM_DETECT_EXTENDED, &addr, &size))
+ if (get_physmem_reserved(RR_MEM_DETECT_EXT, &addr, &size))
memblock_phys_free(addr, size);
}
@@ -715,6 +719,11 @@ static void __init memblock_add_physmem_info(void)
memblock_set_node(0, ULONG_MAX, &memblock.memory, 0);
}
+static void __init setup_high_memory(void)
+{
+ high_memory = __va(ident_map_size);
+}
+
/*
* Reserve memory used for lowcore.
*/
@@ -724,7 +733,7 @@ static void __init reserve_lowcore(void)
void *lowcore_end = lowcore_start + sizeof(struct lowcore);
void *start, *end;
- if ((void *)__identity_base < lowcore_end) {
+ if (absolute_pointer(__identity_base) < lowcore_end) {
start = max(lowcore_start, (void *)__identity_base);
end = min(lowcore_end, (void *)(__identity_base + ident_map_size));
memblock_reserve(__pa(start), __pa(end));
@@ -866,6 +875,23 @@ static void __init log_component_list(void)
}
/*
+ * Print avoiding interpretation of % in buf and taking bootdebug option
+ * into consideration.
+ */
+static void __init print_rb_entry(const char *buf)
+{
+ char fmt[] = KERN_SOH "0boot: %s";
+ int level = printk_get_level(buf);
+
+ buf = skip_timestamp(printk_skip_level(buf));
+ if (level == KERN_DEBUG[1] && (!bootdebug || !bootdebug_filter_match(buf)))
+ return;
+
+ fmt[1] = level;
+ printk(fmt, buf);
+}
+
+/*
* Setup function called from init/main.c just after the banner
* was printed.
*/
@@ -875,17 +901,20 @@ void __init setup_arch(char **cmdline_p)
/*
* print what head.S has found out about the machine
*/
- if (MACHINE_IS_VM)
+ if (machine_is_vm())
pr_info("Linux is running as a z/VM "
"guest operating system in 64-bit mode\n");
- else if (MACHINE_IS_KVM)
+ else if (machine_is_kvm())
pr_info("Linux is running under KVM in 64-bit mode\n");
- else if (MACHINE_IS_LPAR)
+ else if (machine_is_lpar())
pr_info("Linux is running natively in 64-bit mode\n");
else
pr_info("Linux is running as a guest in 64-bit mode\n");
+ /* Print decompressor messages if not already printed */
+ if (!boot_earlyprintk)
+ boot_rb_foreach(print_rb_entry);
- if (have_relocated_lowcore())
+ if (machine_has_relocated_lowcore())
pr_info("Lowcore relocated to 0x%px\n", get_lowcore());
log_component_list();
@@ -927,6 +956,7 @@ void __init setup_arch(char **cmdline_p)
free_physmem_info();
setup_memory_end();
+ setup_high_memory();
memblock_dump_all();
setup_memory();
@@ -935,7 +965,7 @@ void __init setup_arch(char **cmdline_p)
setup_uv();
dma_contiguous_reserve(ident_map_size);
vmcp_cma_reserve();
- if (MACHINE_HAS_EDAT2)
+ if (cpu_has_edat2())
hugetlb_cma_reserve(PUD_SHIFT - PAGE_SHIFT);
reserve_crashkernel();
@@ -955,10 +985,6 @@ void __init setup_arch(char **cmdline_p)
numa_setup();
smp_detect_cpus();
topology_init_early();
-
- if (test_facility(193))
- static_branch_enable(&cpu_has_bear);
-
setup_protection_map();
/*
* Create kernel page tables.
@@ -987,3 +1013,8 @@ void __init setup_arch(char **cmdline_p)
/* Add system specific data to the random pool */
setup_randomness();
}
+
+void __init arch_cpu_finalize_init(void)
+{
+ sclp_init();
+}
diff --git a/arch/s390/kernel/skey.c b/arch/s390/kernel/skey.c
new file mode 100644
index 000000000000..ba049fd103c2
--- /dev/null
+++ b/arch/s390/kernel/skey.c
@@ -0,0 +1,48 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <asm/rwonce.h>
+#include <asm/page.h>
+#include <asm/skey.h>
+
+int skey_regions_initialized;
+
+static inline unsigned long load_real_address(unsigned long address)
+{
+ unsigned long real;
+
+ asm volatile(
+ " lra %[real],0(%[address])\n"
+ : [real] "=d" (real)
+ : [address] "a" (address)
+ : "cc");
+ return real;
+}
+
+/*
+ * Initialize storage keys of registered memory regions with the
+ * default key. This is useful for code which is executed with a
+ * non-default access key.
+ */
+void __skey_regions_initialize(void)
+{
+ unsigned long address, real;
+ struct skey_region *r, *end;
+
+ r = __skey_region_start;
+ end = __skey_region_end;
+ while (r < end) {
+ address = r->start & PAGE_MASK;
+ do {
+ real = load_real_address(address);
+ page_set_storage_key(real, PAGE_DEFAULT_KEY, 1);
+ address += PAGE_SIZE;
+ } while (address < r->end);
+ r++;
+ }
+ /*
+ * Make sure storage keys are initialized before
+ * skey_regions_initialized is changed.
+ */
+ barrier();
+ WRITE_ONCE(skey_regions_initialized, 1);
+}
diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c
index 7b08399b0846..e88ebe5339fc 100644
--- a/arch/s390/kernel/smp.c
+++ b/arch/s390/kernel/smp.c
@@ -18,6 +18,7 @@
#define KMSG_COMPONENT "cpu"
#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+#include <linux/cpufeature.h>
#include <linux/workqueue.h>
#include <linux/memblock.h>
#include <linux/export.h>
@@ -38,6 +39,7 @@
#include <linux/kprobes.h>
#include <asm/access-regs.h>
#include <asm/asm-offsets.h>
+#include <asm/machine.h>
#include <asm/ctlreg.h>
#include <asm/pfault.h>
#include <asm/diag.h>
@@ -97,13 +99,6 @@ __vector128 __initdata boot_cpu_vector_save_area[__NUM_VXRS];
static unsigned int smp_max_threads __initdata = -1U;
cpumask_t cpu_setup_mask;
-static int __init early_nosmt(char *s)
-{
- smp_max_threads = 1;
- return 0;
-}
-early_param("nosmt", early_nosmt);
-
static int __init early_smt(char *s)
{
get_option(&s, &smp_max_threads);
@@ -180,13 +175,10 @@ static struct pcpu *pcpu_find_address(const struct cpumask *mask, u16 address)
static void pcpu_ec_call(struct pcpu *pcpu, int ec_bit)
{
- int order;
-
if (test_and_set_bit(ec_bit, &pcpu->ec_mask))
return;
- order = pcpu_running(pcpu) ? SIGP_EXTERNAL_CALL : SIGP_EMERGENCY_SIGNAL;
pcpu->ec_clk = get_tod_clock_fast();
- pcpu_sigp_retry(pcpu, order, 0);
+ pcpu_sigp_retry(pcpu, SIGP_EXTERNAL_CALL, 0);
}
static int pcpu_alloc_lowcore(struct pcpu *pcpu, int cpu)
@@ -263,13 +255,12 @@ static void pcpu_prepare_secondary(struct pcpu *pcpu, int cpu)
lc->percpu_offset = __per_cpu_offset[cpu];
lc->kernel_asce = get_lowcore()->kernel_asce;
lc->user_asce = s390_invalid_asce;
- lc->machine_flags = get_lowcore()->machine_flags;
lc->user_timer = lc->system_timer =
lc->steal_timer = lc->avg_steal_timer = 0;
abs_lc = get_abs_lowcore();
memcpy(lc->cregs_save_area, abs_lc->cregs_save_area, sizeof(lc->cregs_save_area));
put_abs_lowcore(abs_lc);
- lc->cregs_save_area[1] = lc->kernel_asce;
+ lc->cregs_save_area[1] = lc->user_asce;
lc->cregs_save_area[7] = lc->user_asce;
save_access_regs((unsigned int *) lc->access_regs_save_area);
arch_spin_lock_setup(cpu);
@@ -416,7 +407,7 @@ EXPORT_SYMBOL(arch_vcpu_is_preempted);
void notrace smp_yield_cpu(int cpu)
{
- if (!MACHINE_HAS_DIAG9C)
+ if (!machine_has_diag9c())
return;
diag_stat_inc_norecursion(DIAG_STAT_X09C);
asm volatile("diag %0,0,0x9c"
@@ -439,16 +430,16 @@ void notrace smp_emergency_stop(void)
cpumask_copy(&cpumask, cpu_online_mask);
cpumask_clear_cpu(smp_processor_id(), &cpumask);
- end = get_tod_clock() + (1000000UL << 12);
+ end = get_tod_clock_monotonic() + (1000000UL << 12);
for_each_cpu(cpu, &cpumask) {
struct pcpu *pcpu = per_cpu_ptr(&pcpu_devices, cpu);
set_bit(ec_stop_cpu, &pcpu->ec_mask);
while (__pcpu_sigp(pcpu->address, SIGP_EMERGENCY_SIGNAL,
0, NULL) == SIGP_CC_BUSY &&
- get_tod_clock() < end)
+ get_tod_clock_monotonic() < end)
cpu_relax();
}
- while (get_tod_clock() < end) {
+ while (get_tod_clock_monotonic() < end) {
for_each_cpu(cpu, &cpumask)
if (pcpu_stopped(per_cpu_ptr(&pcpu_devices, cpu)))
cpumask_clear_cpu(cpu, &cpumask);
@@ -561,10 +552,10 @@ int smp_store_status(int cpu)
if (__pcpu_sigp_relax(pcpu->address, SIGP_STORE_STATUS_AT_ADDRESS,
pa) != SIGP_CC_ORDER_CODE_ACCEPTED)
return -EIO;
- if (!cpu_has_vx() && !MACHINE_HAS_GS)
+ if (!cpu_has_vx() && !cpu_has_gs())
return 0;
pa = lc->mcesad & MCESA_ORIGIN_MASK;
- if (MACHINE_HAS_GS)
+ if (cpu_has_gs())
pa |= lc->mcesad & MCESA_LC_MASK;
if (__pcpu_sigp_relax(pcpu->address, SIGP_STORE_ADDITIONAL_STATUS,
pa) != SIGP_CC_ORDER_CODE_ACCEPTED)
@@ -807,6 +798,7 @@ void __init smp_detect_cpus(void)
mtid = boot_core_type ? sclp.mtid : sclp.mtid_cp;
mtid = (mtid < smp_max_threads) ? mtid : smp_max_threads - 1;
pcpu_set_smt(mtid);
+ cpu_smt_set_num_threads(smp_cpu_mtid + 1, smp_cpu_mtid + 1);
/* Print number of CPUs */
c_cpus = s_cpus = 0;
diff --git a/arch/s390/kernel/stacktrace.c b/arch/s390/kernel/stacktrace.c
index 40edfde25f5b..b153a395f46d 100644
--- a/arch/s390/kernel/stacktrace.c
+++ b/arch/s390/kernel/stacktrace.c
@@ -9,6 +9,7 @@
#include <linux/stacktrace.h>
#include <linux/uaccess.h>
#include <linux/compat.h>
+#include <asm/asm-offsets.h>
#include <asm/stacktrace.h>
#include <asm/unwind.h>
#include <asm/kprobes.h>
diff --git a/arch/s390/kernel/sthyi.c b/arch/s390/kernel/sthyi.c
index d40f0b983e74..f4ccdbed4b89 100644
--- a/arch/s390/kernel/sthyi.c
+++ b/arch/s390/kernel/sthyi.c
@@ -5,6 +5,8 @@
* Copyright IBM Corp. 2016
* Author(s): Janosch Frank <frankja@linux.vnet.ibm.com>
*/
+
+#include <linux/export.h>
#include <linux/errno.h>
#include <linux/pagemap.h>
#include <linux/vmalloc.h>
diff --git a/arch/s390/kernel/syscall.c b/arch/s390/kernel/syscall.c
index 5ec28028315b..4fee74553ca2 100644
--- a/arch/s390/kernel/syscall.c
+++ b/arch/s390/kernel/syscall.c
@@ -12,6 +12,7 @@
* platform.
*/
+#include <linux/cpufeature.h>
#include <linux/errno.h>
#include <linux/sched.h>
#include <linux/mm.h>
@@ -81,25 +82,35 @@ SYSCALL_DEFINE0(ni_syscall)
return -ENOSYS;
}
-static void do_syscall(struct pt_regs *regs)
+void noinstr __do_syscall(struct pt_regs *regs, int per_trap)
{
unsigned long nr;
+ add_random_kstack_offset();
+ enter_from_user_mode(regs);
+ regs->psw = get_lowcore()->svc_old_psw;
+ regs->int_code = get_lowcore()->svc_int_code;
+ update_timer_sys();
+ if (cpu_has_bear())
+ current->thread.last_break = regs->last_break;
+ local_irq_enable();
+ regs->orig_gpr2 = regs->gprs[2];
+ if (unlikely(per_trap))
+ set_thread_flag(TIF_PER_TRAP);
+ regs->flags = 0;
+ set_pt_regs_flag(regs, PIF_SYSCALL);
nr = regs->int_code & 0xffff;
- if (!nr) {
+ if (likely(!nr)) {
nr = regs->gprs[1] & 0xffff;
regs->int_code &= ~0xffffUL;
regs->int_code |= nr;
}
-
regs->gprs[2] = nr;
-
if (nr == __NR_restart_syscall && !(current->restart_block.arch_data & 1)) {
regs->psw.addr = current->restart_block.arch_data;
current->restart_block.arch_data = 1;
}
nr = syscall_enter_from_user_mode_work(regs, nr);
-
/*
* In the s390 ptrace ABI, both the syscall number and the return value
* use gpr2. However, userspace puts the syscall number either in the
@@ -107,37 +118,11 @@ static void do_syscall(struct pt_regs *regs)
* work, the ptrace code sets PIF_SYSCALL_RET_SET, which is checked here
* and if set, the syscall will be skipped.
*/
-
if (unlikely(test_and_clear_pt_regs_flag(regs, PIF_SYSCALL_RET_SET)))
goto out;
regs->gprs[2] = -ENOSYS;
- if (likely(nr >= NR_syscalls))
- goto out;
- do {
+ if (likely(nr < NR_syscalls))
regs->gprs[2] = current->thread.sys_call_table[nr](regs);
- } while (test_and_clear_pt_regs_flag(regs, PIF_EXECVE_PGSTE_RESTART));
out:
- syscall_exit_to_user_mode_work(regs);
-}
-
-void noinstr __do_syscall(struct pt_regs *regs, int per_trap)
-{
- add_random_kstack_offset();
- enter_from_user_mode(regs);
- regs->psw = get_lowcore()->svc_old_psw;
- regs->int_code = get_lowcore()->svc_int_code;
- update_timer_sys();
- if (static_branch_likely(&cpu_has_bear))
- current->thread.last_break = regs->last_break;
-
- local_irq_enable();
- regs->orig_gpr2 = regs->gprs[2];
-
- if (per_trap)
- set_thread_flag(TIF_PER_TRAP);
-
- regs->flags = 0;
- set_pt_regs_flag(regs, PIF_SYSCALL);
- do_syscall(regs);
- exit_to_user_mode();
+ syscall_exit_to_user_mode(regs);
}
diff --git a/arch/s390/kernel/syscalls/syscall.tbl b/arch/s390/kernel/syscalls/syscall.tbl
index e9115b4d8b63..8a6744d658db 100644
--- a/arch/s390/kernel/syscalls/syscall.tbl
+++ b/arch/s390/kernel/syscalls/syscall.tbl
@@ -469,3 +469,6 @@
464 common getxattrat sys_getxattrat sys_getxattrat
465 common listxattrat sys_listxattrat sys_listxattrat
466 common removexattrat sys_removexattrat sys_removexattrat
+467 common open_tree_attr sys_open_tree_attr sys_open_tree_attr
+468 common file_getattr sys_file_getattr sys_file_getattr
+469 common file_setattr sys_file_setattr sys_file_setattr
diff --git a/arch/s390/kernel/sysinfo.c b/arch/s390/kernel/sysinfo.c
index 88055f58fbda..1ea84e942bd4 100644
--- a/arch/s390/kernel/sysinfo.c
+++ b/arch/s390/kernel/sysinfo.c
@@ -5,6 +5,7 @@
* Martin Schwidefsky <schwidefsky@de.ibm.com>,
*/
+#include <linux/cpufeature.h>
#include <linux/debugfs.h>
#include <linux/kernel.h>
#include <linux/mm.h>
@@ -15,54 +16,17 @@
#include <linux/export.h>
#include <linux/slab.h>
#include <asm/asm-extable.h>
+#include <asm/machine.h>
#include <asm/ebcdic.h>
#include <asm/debug.h>
#include <asm/sysinfo.h>
#include <asm/cpcmd.h>
#include <asm/topology.h>
#include <asm/fpu.h>
+#include <asm/asm.h>
int topology_max_mnest;
-static inline int __stsi(void *sysinfo, int fc, int sel1, int sel2, int *lvl)
-{
- int r0 = (fc << 28) | sel1;
- int rc = 0;
-
- asm volatile(
- " lr 0,%[r0]\n"
- " lr 1,%[r1]\n"
- " stsi 0(%[sysinfo])\n"
- "0: jz 2f\n"
- "1: lhi %[rc],%[retval]\n"
- "2: lr %[r0],0\n"
- EX_TABLE(0b, 1b)
- : [r0] "+d" (r0), [rc] "+d" (rc)
- : [r1] "d" (sel2),
- [sysinfo] "a" (sysinfo),
- [retval] "K" (-EOPNOTSUPP)
- : "cc", "0", "1", "memory");
- *lvl = ((unsigned int) r0) >> 28;
- return rc;
-}
-
-/*
- * stsi - store system information
- *
- * Returns the current configuration level if function code 0 was specified.
- * Otherwise returns 0 on success or a negative value on error.
- */
-int stsi(void *sysinfo, int fc, int sel1, int sel2)
-{
- int lvl, rc;
-
- rc = __stsi(sysinfo, fc, sel1, sel2, &lvl);
- if (rc)
- return rc;
- return fc ? 0 : lvl;
-}
-EXPORT_SYMBOL(stsi);
-
#ifdef CONFIG_PROC_FS
static bool convert_ext_name(unsigned char encoding, char *name, size_t len)
@@ -154,7 +118,7 @@ static void stsi_15_1_x(struct seq_file *m, struct sysinfo_15_1_x *info)
int i;
seq_putc(m, '\n');
- if (!MACHINE_HAS_TOPOLOGY)
+ if (!cpu_has_topology())
return;
if (stsi(info, 15, 1, topology_max_mnest))
return;
@@ -415,7 +379,7 @@ static struct service_level service_level_vm = {
static __init int create_proc_service_level(void)
{
proc_create_seq("service_levels", 0, NULL, &service_level_seq_ops);
- if (MACHINE_IS_VM)
+ if (machine_is_vm())
register_service_level(&service_level_vm);
return 0;
}
@@ -559,7 +523,7 @@ static __init int stsi_init_debugfs(void)
sf = &stsi_file[i];
debugfs_create_file(sf->name, 0400, stsi_root, NULL, sf->fops);
}
- if (IS_ENABLED(CONFIG_SCHED_TOPOLOGY) && MACHINE_HAS_TOPOLOGY) {
+ if (IS_ENABLED(CONFIG_SCHED_TOPOLOGY) && cpu_has_topology()) {
char link_to[10];
sprintf(link_to, "15_1_%d", topology_mnest_limit());
diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c
index e9f47c3a6197..63517b85f4c9 100644
--- a/arch/s390/kernel/time.c
+++ b/arch/s390/kernel/time.c
@@ -54,10 +54,10 @@
#include <asm/cio.h>
#include "entry.h"
-union tod_clock tod_clock_base __section(".data");
+union tod_clock __bootdata_preserved(tod_clock_base);
EXPORT_SYMBOL_GPL(tod_clock_base);
-u64 clock_comparator_max = -1ULL;
+u64 __bootdata_preserved(clock_comparator_max);
EXPORT_SYMBOL_GPL(clock_comparator_max);
static DEFINE_PER_CPU(struct clock_event_device, comparators);
@@ -69,8 +69,6 @@ unsigned char ptff_function_mask[16];
static unsigned long lpar_offset;
static unsigned long initial_leap_seconds;
-static unsigned long tod_steering_end;
-static long tod_steering_delta;
/*
* Get time offsets with PTFF
@@ -79,12 +77,8 @@ void __init time_early_init(void)
{
struct ptff_qto qto;
struct ptff_qui qui;
- int cs;
- /* Initialize TOD steering parameters */
- tod_steering_end = tod_clock_base.tod;
- for (cs = 0; cs < CS_BASES; cs++)
- vdso_data[cs].arch_data.tod_steering_end = tod_steering_end;
+ vdso_k_time_data->arch_data.tod_delta = tod_clock_base.tod;
if (!test_facility(28))
return;
@@ -228,21 +222,7 @@ void __init read_persistent_wall_and_boot_offset(struct timespec64 *wall_time,
static u64 read_tod_clock(struct clocksource *cs)
{
- unsigned long now, adj;
-
- preempt_disable(); /* protect from changes to steering parameters */
- now = get_tod_clock();
- adj = tod_steering_end - now;
- if (unlikely((s64) adj > 0))
- /*
- * manually steer by 1 cycle every 2^16 cycles. This
- * corresponds to shifting the tod delta by 15. 1s is
- * therefore steered in ~9h. The adjust will decrease
- * over time, until it finally reaches 0.
- */
- now += (tod_steering_delta < 0) ? (adj >> 15) : -(adj >> 15);
- preempt_enable();
- return now;
+ return get_tod_clock_monotonic();
}
static struct clocksource clocksource_tod = {
@@ -371,29 +351,11 @@ static inline int check_sync_clock(void)
*/
static void clock_sync_global(long delta)
{
- unsigned long now, adj;
struct ptff_qto qto;
- int cs;
/* Fixup the monotonic sched clock. */
tod_clock_base.eitod += delta;
- /* Adjust TOD steering parameters. */
- now = get_tod_clock();
- adj = tod_steering_end - now;
- if (unlikely((s64) adj >= 0))
- /* Calculate how much of the old adjustment is left. */
- tod_steering_delta = (tod_steering_delta < 0) ?
- -(adj >> 15) : (adj >> 15);
- tod_steering_delta += delta;
- if ((abs(tod_steering_delta) >> 48) != 0)
- panic("TOD clock sync offset %li is too large to drift\n",
- tod_steering_delta);
- tod_steering_end = now + (abs(tod_steering_delta) << 15);
- for (cs = 0; cs < CS_BASES; cs++) {
- vdso_data[cs].arch_data.tod_steering_end = tod_steering_end;
- vdso_data[cs].arch_data.tod_steering_delta = tod_steering_delta;
- }
-
+ vdso_k_time_data->arch_data.tod_delta = tod_clock_base.tod;
/* Update LPAR offset. */
if (ptff_query(PTFF_QTO) && ptff(&qto, sizeof(qto), PTFF_QTO) == 0)
lpar_offset = qto.tod_epoch_difference;
@@ -435,7 +397,7 @@ struct clock_sync_data {
/*
* Server Time Protocol (STP) code.
*/
-static bool stp_online;
+static bool stp_online = true;
static struct stp_sstpi stp_info;
static void *stp_page;
@@ -461,7 +423,6 @@ static void __init stp_reset(void)
if (rc == 0)
set_bit(CLOCK_SYNC_HAS_STP, &clock_sync_flags);
else if (stp_online) {
- pr_warn("The real or virtual hardware system does not provide an STP interface\n");
free_page((unsigned long) stp_page);
stp_page = NULL;
stp_online = false;
@@ -585,7 +546,7 @@ static int stp_sync_clock(void *data)
atomic_dec(&sync->cpus);
/* Wait for in_sync to be set. */
while (READ_ONCE(sync->in_sync) == 0)
- __udelay(1);
+ ;
}
if (sync->in_sync != 1)
/* Didn't work. Clear per-cpu in sync bit again. */
@@ -596,81 +557,6 @@ static int stp_sync_clock(void *data)
return 0;
}
-static int stp_clear_leap(void)
-{
- struct __kernel_timex txc;
- int ret;
-
- memset(&txc, 0, sizeof(txc));
-
- ret = do_adjtimex(&txc);
- if (ret < 0)
- return ret;
-
- txc.modes = ADJ_STATUS;
- txc.status &= ~(STA_INS|STA_DEL);
- return do_adjtimex(&txc);
-}
-
-static void stp_check_leap(void)
-{
- struct stp_stzi stzi;
- struct stp_lsoib *lsoib = &stzi.lsoib;
- struct __kernel_timex txc;
- int64_t timediff;
- int leapdiff, ret;
-
- if (!stp_info.lu || !check_sync_clock()) {
- /*
- * Either a scheduled leap second was removed by the operator,
- * or STP is out of sync. In both cases, clear the leap second
- * kernel flags.
- */
- if (stp_clear_leap() < 0)
- pr_err("failed to clear leap second flags\n");
- return;
- }
-
- if (chsc_stzi(stp_page, &stzi, sizeof(stzi))) {
- pr_err("stzi failed\n");
- return;
- }
-
- timediff = tod_to_ns(lsoib->nlsout - get_tod_clock()) / NSEC_PER_SEC;
- leapdiff = lsoib->nlso - lsoib->also;
-
- if (leapdiff != 1 && leapdiff != -1) {
- pr_err("Cannot schedule %d leap seconds\n", leapdiff);
- return;
- }
-
- if (timediff < 0) {
- if (stp_clear_leap() < 0)
- pr_err("failed to clear leap second flags\n");
- } else if (timediff < 7200) {
- memset(&txc, 0, sizeof(txc));
- ret = do_adjtimex(&txc);
- if (ret < 0)
- return;
-
- txc.modes = ADJ_STATUS;
- if (leapdiff > 0)
- txc.status |= STA_INS;
- else
- txc.status |= STA_DEL;
- ret = do_adjtimex(&txc);
- if (ret < 0)
- pr_err("failed to set leap second flags\n");
- /* arm Timer to clear leap second flags */
- mod_timer(&stp_timer, jiffies + secs_to_jiffies(14400));
- } else {
- /* The day the leap second is scheduled for hasn't been reached. Retry
- * in one hour.
- */
- mod_timer(&stp_timer, jiffies + secs_to_jiffies(3600));
- }
-}
-
/*
* STP work. Check for the STP state and take over the clock
* synchronization if the STP clock source is usable.
@@ -685,7 +571,7 @@ static void stp_work_fn(struct work_struct *work)
if (!stp_online) {
chsc_sstpc(stp_page, STP_OP_CTRL, 0x0000, NULL);
- del_timer_sync(&stp_timer);
+ timer_delete_sync(&stp_timer);
goto out_unlock;
}
@@ -712,8 +598,6 @@ static void stp_work_fn(struct work_struct *work)
* Retry after a second.
*/
mod_timer(&stp_timer, jiffies + msecs_to_jiffies(MSEC_PER_SEC));
- else if (stp_info.lu)
- stp_check_leap();
out_unlock:
mutex_unlock(&stp_mutex);
diff --git a/arch/s390/kernel/topology.c b/arch/s390/kernel/topology.c
index 211cc8382e4a..46569b8e47dd 100644
--- a/arch/s390/kernel/topology.c
+++ b/arch/s390/kernel/topology.c
@@ -6,6 +6,7 @@
#define KMSG_COMPONENT "cpu"
#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+#include <linux/cpufeature.h>
#include <linux/workqueue.h>
#include <linux/memblock.h>
#include <linux/uaccess.h>
@@ -240,7 +241,7 @@ int topology_set_cpu_management(int fc)
{
int cpu, rc;
- if (!MACHINE_HAS_TOPOLOGY)
+ if (!cpu_has_topology())
return -EOPNOTSUPP;
if (fc)
rc = ptf(PTF_VERTICAL);
@@ -315,13 +316,13 @@ static int __arch_update_cpu_topology(void)
hd_status = 0;
rc = 0;
mutex_lock(&smp_cpu_state_mutex);
- if (MACHINE_HAS_TOPOLOGY) {
+ if (cpu_has_topology()) {
rc = 1;
store_topology(info);
tl_to_masks(info);
}
update_cpu_masks();
- if (!MACHINE_HAS_TOPOLOGY)
+ if (!cpu_has_topology())
topology_update_polarization_simple();
if (cpu_management == 1)
hd_status = hd_enable_hiperdispatch();
@@ -376,7 +377,7 @@ static void set_topology_timer(void)
void topology_expect_change(void)
{
- if (!MACHINE_HAS_TOPOLOGY)
+ if (!cpu_has_topology())
return;
/* This is racy, but it doesn't matter since it is just a heuristic.
* Worst case is that we poll in a higher frequency for a bit longer.
@@ -500,7 +501,7 @@ int topology_cpu_init(struct cpu *cpu)
int rc;
rc = sysfs_create_group(&cpu->dev.kobj, &topology_cpu_attr_group);
- if (rc || !MACHINE_HAS_TOPOLOGY)
+ if (rc || !cpu_has_topology())
return rc;
rc = sysfs_create_group(&cpu->dev.kobj, &topology_extra_cpu_attr_group);
if (rc)
@@ -530,11 +531,11 @@ static const struct cpumask *cpu_drawer_mask(int cpu)
}
static struct sched_domain_topology_level s390_topology[] = {
- { cpu_thread_mask, cpu_smt_flags, SD_INIT_NAME(SMT) },
- { cpu_coregroup_mask, cpu_core_flags, SD_INIT_NAME(MC) },
- { cpu_book_mask, SD_INIT_NAME(BOOK) },
- { cpu_drawer_mask, SD_INIT_NAME(DRAWER) },
- { cpu_cpu_mask, SD_INIT_NAME(PKG) },
+ SDTL_INIT(cpu_thread_mask, cpu_smt_flags, SMT),
+ SDTL_INIT(cpu_coregroup_mask, cpu_core_flags, MC),
+ SDTL_INIT(cpu_book_mask, NULL, BOOK),
+ SDTL_INIT(cpu_drawer_mask, NULL, DRAWER),
+ SDTL_INIT(cpu_cpu_mask, NULL, PKG),
{ NULL, },
};
@@ -569,12 +570,12 @@ void __init topology_init_early(void)
set_sched_topology(s390_topology);
if (topology_mode == TOPOLOGY_MODE_UNINITIALIZED) {
- if (MACHINE_HAS_TOPOLOGY)
+ if (cpu_has_topology())
topology_mode = TOPOLOGY_MODE_HW;
else
topology_mode = TOPOLOGY_MODE_SINGLE;
}
- if (!MACHINE_HAS_TOPOLOGY)
+ if (!cpu_has_topology())
goto out;
tl_info = memblock_alloc_or_panic(PAGE_SIZE, PAGE_SIZE);
info = tl_info;
@@ -596,7 +597,7 @@ static inline int topology_get_mode(int enabled)
{
if (!enabled)
return TOPOLOGY_MODE_SINGLE;
- return MACHINE_HAS_TOPOLOGY ? TOPOLOGY_MODE_HW : TOPOLOGY_MODE_PACKAGE;
+ return cpu_has_topology() ? TOPOLOGY_MODE_HW : TOPOLOGY_MODE_PACKAGE;
}
static inline int topology_is_enabled(void)
@@ -686,7 +687,7 @@ static int __init topology_init(void)
int rc = 0;
timer_setup(&topology_timer, topology_timer_fn, TIMER_DEFERRABLE);
- if (MACHINE_HAS_TOPOLOGY)
+ if (cpu_has_topology())
set_topology_timer();
else
topology_update_polarization_simple();
diff --git a/arch/s390/kernel/traps.c b/arch/s390/kernel/traps.c
index 24fee11b030d..19687dab32f7 100644
--- a/arch/s390/kernel/traps.c
+++ b/arch/s390/kernel/traps.c
@@ -3,18 +3,13 @@
* S390 version
* Copyright IBM Corp. 1999, 2000
* Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com),
- * Denis Joseph Barrow (djbarrow@de.ibm.com,barrow_dj@yahoo.com),
+ * Denis Joseph Barrow (djbarrow@de.ibm.com,barrow_dj@yahoo.com),
*
* Derived from "arch/i386/kernel/traps.c"
* Copyright (C) 1991, 1992 Linus Torvalds
*/
-/*
- * 'Traps.c' handles hardware traps and faults after we have saved some
- * state in 'asm.s'.
- */
-#include "asm/irqflags.h"
-#include "asm/ptrace.h"
+#include <linux/cpufeature.h>
#include <linux/kprobes.h>
#include <linux/kdebug.h>
#include <linux/randomize_kstack.h>
@@ -29,6 +24,8 @@
#include <linux/entry-common.h>
#include <linux/kmsan.h>
#include <asm/asm-extable.h>
+#include <asm/irqflags.h>
+#include <asm/ptrace.h>
#include <asm/vtime.h>
#include <asm/fpu.h>
#include <asm/fault.h>
@@ -42,7 +39,7 @@ static inline void __user *get_trap_ip(struct pt_regs *regs)
address = current->thread.trap_tdb.data[3];
else
address = regs->psw.addr;
- return (void __user *) (address - (regs->int_code >> 16));
+ return (void __user *)(address - (regs->int_code >> 16));
}
#ifdef CONFIG_GENERIC_BUG
@@ -57,16 +54,15 @@ void do_report_trap(struct pt_regs *regs, int si_signo, int si_code, char *str)
if (user_mode(regs)) {
force_sig_fault(si_signo, si_code, get_trap_ip(regs));
report_user_fault(regs, si_signo, 0);
- } else {
+ } else {
if (!fixup_exception(regs))
die(regs, str);
- }
+ }
}
static void do_trap(struct pt_regs *regs, int si_signo, int si_code, char *str)
{
- if (notify_die(DIE_TRAP, str, regs, 0,
- regs->int_code, si_signo) == NOTIFY_STOP)
+ if (notify_die(DIE_TRAP, str, regs, 0, regs->int_code, si_signo) == NOTIFY_STOP)
return;
do_report_trap(regs, si_signo, si_code, str);
}
@@ -78,8 +74,7 @@ void do_per_trap(struct pt_regs *regs)
return;
if (!current->ptrace)
return;
- force_sig_fault(SIGTRAP, TRAP_HWBKPT,
- (void __force __user *) current->thread.per_event.address);
+ force_sig_fault(SIGTRAP, TRAP_HWBKPT, (void __force __user *)current->thread.per_event.address);
}
NOKPROBE_SYMBOL(do_per_trap);
@@ -98,36 +93,25 @@ static void name(struct pt_regs *regs) \
do_trap(regs, signr, sicode, str); \
}
-DO_ERROR_INFO(addressing_exception, SIGILL, ILL_ILLADR,
- "addressing exception")
-DO_ERROR_INFO(execute_exception, SIGILL, ILL_ILLOPN,
- "execute exception")
-DO_ERROR_INFO(divide_exception, SIGFPE, FPE_INTDIV,
- "fixpoint divide exception")
-DO_ERROR_INFO(overflow_exception, SIGFPE, FPE_INTOVF,
- "fixpoint overflow exception")
-DO_ERROR_INFO(hfp_overflow_exception, SIGFPE, FPE_FLTOVF,
- "HFP overflow exception")
-DO_ERROR_INFO(hfp_underflow_exception, SIGFPE, FPE_FLTUND,
- "HFP underflow exception")
-DO_ERROR_INFO(hfp_significance_exception, SIGFPE, FPE_FLTRES,
- "HFP significance exception")
-DO_ERROR_INFO(hfp_divide_exception, SIGFPE, FPE_FLTDIV,
- "HFP divide exception")
-DO_ERROR_INFO(hfp_sqrt_exception, SIGFPE, FPE_FLTINV,
- "HFP square root exception")
-DO_ERROR_INFO(operand_exception, SIGILL, ILL_ILLOPN,
- "operand exception")
-DO_ERROR_INFO(privileged_op, SIGILL, ILL_PRVOPC,
- "privileged operation")
-DO_ERROR_INFO(special_op_exception, SIGILL, ILL_ILLOPN,
- "special operation exception")
-DO_ERROR_INFO(transaction_exception, SIGILL, ILL_ILLOPN,
- "transaction constraint exception")
+DO_ERROR_INFO(addressing_exception, SIGILL, ILL_ILLADR, "addressing exception")
+DO_ERROR_INFO(divide_exception, SIGFPE, FPE_INTDIV, "fixpoint divide exception")
+DO_ERROR_INFO(execute_exception, SIGILL, ILL_ILLOPN, "execute exception")
+DO_ERROR_INFO(hfp_divide_exception, SIGFPE, FPE_FLTDIV, "HFP divide exception")
+DO_ERROR_INFO(hfp_overflow_exception, SIGFPE, FPE_FLTOVF, "HFP overflow exception")
+DO_ERROR_INFO(hfp_significance_exception, SIGFPE, FPE_FLTRES, "HFP significance exception")
+DO_ERROR_INFO(hfp_sqrt_exception, SIGFPE, FPE_FLTINV, "HFP square root exception")
+DO_ERROR_INFO(hfp_underflow_exception, SIGFPE, FPE_FLTUND, "HFP underflow exception")
+DO_ERROR_INFO(operand_exception, SIGILL, ILL_ILLOPN, "operand exception")
+DO_ERROR_INFO(overflow_exception, SIGFPE, FPE_INTOVF, "fixpoint overflow exception")
+DO_ERROR_INFO(privileged_op, SIGILL, ILL_PRVOPC, "privileged operation")
+DO_ERROR_INFO(special_op_exception, SIGILL, ILL_ILLOPN, "special operation exception")
+DO_ERROR_INFO(specification_exception, SIGILL, ILL_ILLOPN, "specification exception");
+DO_ERROR_INFO(transaction_exception, SIGILL, ILL_ILLOPN, "transaction constraint exception")
static inline void do_fp_trap(struct pt_regs *regs, __u32 fpc)
{
int si_code = 0;
+
/* FPC[2] is Data Exception Code */
if ((fpc & 0x00000300) == 0) {
/* bits 6 and 7 of DXC are 0 iff IEEE exception */
@@ -153,36 +137,35 @@ static void translation_specification_exception(struct pt_regs *regs)
static void illegal_op(struct pt_regs *regs)
{
- __u8 opcode[6];
- __u16 __user *location;
int is_uprobe_insn = 0;
+ u16 __user *location;
int signal = 0;
+ u16 opcode;
location = get_trap_ip(regs);
-
if (user_mode(regs)) {
- if (get_user(*((__u16 *) opcode), (__u16 __user *) location))
+ if (get_user(opcode, location))
return;
- if (*((__u16 *) opcode) == S390_BREAKPOINT_U16) {
+ if (opcode == S390_BREAKPOINT_U16) {
if (current->ptrace)
force_sig_fault(SIGTRAP, TRAP_BRKPT, location);
else
signal = SIGILL;
#ifdef CONFIG_UPROBES
- } else if (*((__u16 *) opcode) == UPROBE_SWBP_INSN) {
+ } else if (opcode == UPROBE_SWBP_INSN) {
is_uprobe_insn = 1;
#endif
- } else
+ } else {
signal = SIGILL;
+ }
}
/*
- * We got either an illegal op in kernel mode, or user space trapped
+ * This is either an illegal op in kernel mode, or user space trapped
* on a uprobes illegal instruction. See if kprobes or uprobes picks
* it up. If not, SIGILL.
*/
if (is_uprobe_insn || !user_mode(regs)) {
- if (notify_die(DIE_BPT, "bpt", regs, 0,
- 3, SIGTRAP) != NOTIFY_STOP)
+ if (notify_die(DIE_BPT, "bpt", regs, 0, 3, SIGTRAP) != NOTIFY_STOP)
signal = SIGILL;
}
if (signal)
@@ -190,18 +173,10 @@ static void illegal_op(struct pt_regs *regs)
}
NOKPROBE_SYMBOL(illegal_op);
-DO_ERROR_INFO(specification_exception, SIGILL, ILL_ILLOPN,
- "specification exception");
-
static void vector_exception(struct pt_regs *regs)
{
int si_code, vic;
- if (!cpu_has_vx()) {
- do_trap(regs, SIGILL, ILL_ILLOPN, "illegal operation");
- return;
- }
-
/* get vector interrupt code from fpc */
save_user_fpu_regs();
vic = (current->thread.ufpu.fpc & 0xf00) >> 8;
@@ -249,7 +224,6 @@ static void monitor_event_exception(struct pt_regs *regs)
{
if (user_mode(regs))
return;
-
switch (report_bug(regs->psw.addr - (regs->int_code >> 16), regs)) {
case BUG_TRAP_TYPE_NONE:
fixup_exception(regs);
@@ -262,7 +236,7 @@ static void monitor_event_exception(struct pt_regs *regs)
}
}
-void kernel_stack_overflow(struct pt_regs *regs)
+void kernel_stack_invalid(struct pt_regs *regs)
{
/*
* Normally regs are unpoisoned by the generic entry code, but
@@ -270,12 +244,12 @@ void kernel_stack_overflow(struct pt_regs *regs)
*/
kmsan_unpoison_entry_regs(regs);
bust_spinlocks(1);
- printk("Kernel stack overflow.\n");
+ pr_emerg("Kernel stack pointer invalid\n");
show_regs(regs);
bust_spinlocks(0);
- panic("Corrupt kernel stack, can't continue.");
+ panic("Invalid kernel stack pointer, cannot continue");
}
-NOKPROBE_SYMBOL(kernel_stack_overflow);
+NOKPROBE_SYMBOL(kernel_stack_invalid);
static void __init test_monitor_call(void)
{
@@ -283,12 +257,12 @@ static void __init test_monitor_call(void)
if (!IS_ENABLED(CONFIG_BUG))
return;
- asm volatile(
+ asm_inline volatile(
" mc 0,0\n"
- "0: xgr %0,%0\n"
+ "0: lhi %[val],0\n"
"1:\n"
- EX_TABLE(0b,1b)
- : "+d" (val));
+ EX_TABLE(0b, 1b)
+ : [val] "+d" (val));
if (!val)
panic("Monitor call doesn't work!\n");
}
@@ -323,7 +297,6 @@ void noinstr __do_pgm_check(struct pt_regs *regs)
teid.val = lc->trans_exc_code;
regs->int_code = lc->pgm_int_code;
regs->int_parm_long = teid.val;
-
/*
* In case of a guest fault, short-circuit the fault handler and return.
* This way the sie64a() function will return 0; fault address and
@@ -336,23 +309,19 @@ void noinstr __do_pgm_check(struct pt_regs *regs)
current->thread.gmap_int_code = regs->int_code & 0xffff;
return;
}
-
state = irqentry_enter(regs);
-
if (user_mode(regs)) {
update_timer_sys();
- if (!static_branch_likely(&cpu_has_bear)) {
+ if (!cpu_has_bear()) {
if (regs->last_break < 4096)
regs->last_break = 1;
}
current->thread.last_break = regs->last_break;
}
-
if (lc->pgm_code & 0x0200) {
/* transaction abort */
current->thread.trap_tdb = lc->pgm_tdb;
}
-
if (lc->pgm_code & PGM_INT_CODE_PER) {
if (user_mode(regs)) {
struct per_event *ev = &current->thread.per_event;
@@ -368,11 +337,9 @@ void noinstr __do_pgm_check(struct pt_regs *regs)
goto out;
}
}
-
if (!irqs_disabled_flags(regs->psw.mask))
trace_hardirqs_on();
__arch_local_irq_ssm(regs->psw.mask & ~PSW_MASK_PER);
-
trapnr = regs->int_code & PGM_INT_CODE_MASK;
if (trapnr)
pgm_check_table[trapnr](regs);
diff --git a/arch/s390/kernel/unwind_bc.c b/arch/s390/kernel/unwind_bc.c
index cd44be2b6ce8..0f88caca4eaf 100644
--- a/arch/s390/kernel/unwind_bc.c
+++ b/arch/s390/kernel/unwind_bc.c
@@ -1,4 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0 */
+
+#include <linux/export.h>
#include <linux/sched.h>
#include <linux/sched/task.h>
#include <linux/sched/task_stack.h>
diff --git a/arch/s390/kernel/uv.c b/arch/s390/kernel/uv.c
index 6f9654a191ad..47f574cd1728 100644
--- a/arch/s390/kernel/uv.c
+++ b/arch/s390/kernel/uv.c
@@ -7,6 +7,7 @@
#define KMSG_COMPONENT "prot_virt"
#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+#include <linux/export.h>
#include <linux/kernel.h>
#include <linux/types.h>
#include <linux/sizes.h>
@@ -15,23 +16,11 @@
#include <linux/pagemap.h>
#include <linux/swap.h>
#include <linux/pagewalk.h>
+#include <linux/backing-dev.h>
#include <asm/facility.h>
#include <asm/sections.h>
#include <asm/uv.h>
-#if !IS_ENABLED(CONFIG_KVM)
-unsigned long __gmap_translate(struct gmap *gmap, unsigned long gaddr)
-{
- return 0;
-}
-
-int gmap_fault(struct gmap *gmap, unsigned long gaddr,
- unsigned int fault_flags)
-{
- return 0;
-}
-#endif
-
/* the bootdata_preserved fields come from ones in arch/s390/boot/uv.c */
int __bootdata_preserved(prot_virt_guest);
EXPORT_SYMBOL(prot_virt_guest);
@@ -148,7 +137,7 @@ int uv_destroy_folio(struct folio *folio)
{
int rc;
- /* See gmap_make_secure(): large folios cannot be secure */
+ /* Large folios cannot be secure */
if (unlikely(folio_test_large(folio)))
return 0;
@@ -159,6 +148,7 @@ int uv_destroy_folio(struct folio *folio)
folio_put(folio);
return rc;
}
+EXPORT_SYMBOL(uv_destroy_folio);
/*
* The present PTE still indirectly holds a folio reference through the mapping.
@@ -175,7 +165,7 @@ int uv_destroy_pte(pte_t pte)
*
* @paddr: Absolute host address of page to be exported
*/
-static int uv_convert_from_secure(unsigned long paddr)
+int uv_convert_from_secure(unsigned long paddr)
{
struct uv_cb_cfs uvcb = {
.header.cmd = UVC_CMD_CONV_FROM_SEC_STOR,
@@ -187,15 +177,16 @@ static int uv_convert_from_secure(unsigned long paddr)
return -EINVAL;
return 0;
}
+EXPORT_SYMBOL_GPL(uv_convert_from_secure);
/*
* The caller must already hold a reference to the folio.
*/
-static int uv_convert_from_secure_folio(struct folio *folio)
+int uv_convert_from_secure_folio(struct folio *folio)
{
int rc;
- /* See gmap_make_secure(): large folios cannot be secure */
+ /* Large folios cannot be secure */
if (unlikely(folio_test_large(folio)))
return 0;
@@ -206,6 +197,7 @@ static int uv_convert_from_secure_folio(struct folio *folio)
folio_put(folio);
return rc;
}
+EXPORT_SYMBOL_GPL(uv_convert_from_secure_folio);
/*
* The present PTE still indirectly holds a folio reference through the mapping.
@@ -216,6 +208,39 @@ int uv_convert_from_secure_pte(pte_t pte)
return uv_convert_from_secure_folio(pfn_folio(pte_pfn(pte)));
}
+/**
+ * should_export_before_import - Determine whether an export is needed
+ * before an import-like operation
+ * @uvcb: the Ultravisor control block of the UVC to be performed
+ * @mm: the mm of the process
+ *
+ * Returns whether an export is needed before every import-like operation.
+ * This is needed for shared pages, which don't trigger a secure storage
+ * exception when accessed from a different guest.
+ *
+ * Although considered as one, the Unpin Page UVC is not an actual import,
+ * so it is not affected.
+ *
+ * No export is needed also when there is only one protected VM, because the
+ * page cannot belong to the wrong VM in that case (there is no "other VM"
+ * it can belong to).
+ *
+ * Return: true if an export is needed before every import, otherwise false.
+ */
+static bool should_export_before_import(struct uv_cb_header *uvcb, struct mm_struct *mm)
+{
+ /*
+ * The misc feature indicates, among other things, that importing a
+ * shared page from a different protected VM will automatically also
+ * transfer its ownership.
+ */
+ if (uv_has_feature(BIT_UV_FEAT_MISC))
+ return false;
+ if (uvcb->cmd == UVC_CMD_UNPIN_PAGE_SHARED)
+ return false;
+ return atomic_read(&mm->context.protected_count) > 1;
+}
+
/*
* Calculate the expected ref_count for a folio that would otherwise have no
* further pins. This was cribbed from similar functions in other places in
@@ -237,13 +262,31 @@ static int expected_folio_refs(struct folio *folio)
return res;
}
-static int make_folio_secure(struct folio *folio, struct uv_cb_header *uvcb)
+/**
+ * __make_folio_secure() - make a folio secure
+ * @folio: the folio to make secure
+ * @uvcb: the uvcb that describes the UVC to be used
+ *
+ * The folio @folio will be made secure if possible, @uvcb will be passed
+ * as-is to the UVC.
+ *
+ * Return: 0 on success;
+ * -EBUSY if the folio is in writeback or has too many references;
+ * -EAGAIN if the UVC needs to be attempted again;
+ * -ENXIO if the address is not mapped;
+ * -EINVAL if the UVC failed for other reasons.
+ *
+ * Context: The caller must hold exactly one extra reference on the folio
+ * (it's the same logic as split_folio()), and the folio must be
+ * locked.
+ */
+static int __make_folio_secure(struct folio *folio, struct uv_cb_header *uvcb)
{
int expected, cc = 0;
if (folio_test_writeback(folio))
- return -EAGAIN;
- expected = expected_folio_refs(folio);
+ return -EBUSY;
+ expected = expected_folio_refs(folio) + 1;
if (!folio_ref_freeze(folio, expected))
return -EBUSY;
set_bit(PG_arch_1, &folio->flags);
@@ -268,262 +311,167 @@ static int make_folio_secure(struct folio *folio, struct uv_cb_header *uvcb)
return uvcb->rc == 0x10a ? -ENXIO : -EINVAL;
}
-/**
- * should_export_before_import - Determine whether an export is needed
- * before an import-like operation
- * @uvcb: the Ultravisor control block of the UVC to be performed
- * @mm: the mm of the process
- *
- * Returns whether an export is needed before every import-like operation.
- * This is needed for shared pages, which don't trigger a secure storage
- * exception when accessed from a different guest.
- *
- * Although considered as one, the Unpin Page UVC is not an actual import,
- * so it is not affected.
- *
- * No export is needed also when there is only one protected VM, because the
- * page cannot belong to the wrong VM in that case (there is no "other VM"
- * it can belong to).
- *
- * Return: true if an export is needed before every import, otherwise false.
- */
-static bool should_export_before_import(struct uv_cb_header *uvcb, struct mm_struct *mm)
+static int make_folio_secure(struct mm_struct *mm, struct folio *folio, struct uv_cb_header *uvcb)
{
- /*
- * The misc feature indicates, among other things, that importing a
- * shared page from a different protected VM will automatically also
- * transfer its ownership.
- */
- if (uv_has_feature(BIT_UV_FEAT_MISC))
- return false;
- if (uvcb->cmd == UVC_CMD_UNPIN_PAGE_SHARED)
- return false;
- return atomic_read(&mm->context.protected_count) > 1;
-}
+ int rc;
-/*
- * Drain LRU caches: the local one on first invocation and the ones of all
- * CPUs on successive invocations. Returns "true" on the first invocation.
- */
-static bool drain_lru(bool *drain_lru_called)
-{
- /*
- * If we have tried a local drain and the folio refcount
- * still does not match our expected safe value, try with a
- * system wide drain. This is needed if the pagevecs holding
- * the page are on a different CPU.
- */
- if (*drain_lru_called) {
- lru_add_drain_all();
- /* We give up here, don't retry immediately. */
- return false;
- }
- /*
- * We are here if the folio refcount does not match the
- * expected safe value. The main culprits are usually
- * pagevecs. With lru_add_drain() we drain the pagevecs
- * on the local CPU so that hopefully the refcount will
- * reach the expected safe value.
- */
- lru_add_drain();
- *drain_lru_called = true;
- /* The caller should try again immediately */
- return true;
+ if (!folio_trylock(folio))
+ return -EAGAIN;
+ if (should_export_before_import(uvcb, mm))
+ uv_convert_from_secure(folio_to_phys(folio));
+ rc = __make_folio_secure(folio, uvcb);
+ folio_unlock(folio);
+
+ return rc;
}
-/*
- * Requests the Ultravisor to make a page accessible to a guest.
- * If it's brought in the first time, it will be cleared. If
- * it has been exported before, it will be decrypted and integrity
- * checked.
+/**
+ * s390_wiggle_split_folio() - try to drain extra references to a folio and
+ * split the folio if it is large.
+ * @mm: the mm containing the folio to work on
+ * @folio: the folio
+ *
+ * Context: Must be called while holding an extra reference to the folio;
+ * the mm lock should not be held.
+ * Return: 0 if the operation was successful;
+ * -EAGAIN if splitting the large folio was not successful,
+ * but another attempt can be made;
+ * -EINVAL in case of other folio splitting errors. See split_folio().
*/
-int gmap_make_secure(struct gmap *gmap, unsigned long gaddr, void *uvcb)
+static int s390_wiggle_split_folio(struct mm_struct *mm, struct folio *folio)
{
- struct vm_area_struct *vma;
- bool drain_lru_called = false;
- spinlock_t *ptelock;
- unsigned long uaddr;
- struct folio *folio;
- pte_t *ptep;
- int rc;
+ int rc, tried_splits;
-again:
- rc = -EFAULT;
- mmap_read_lock(gmap->mm);
+ lockdep_assert_not_held(&mm->mmap_lock);
+ folio_wait_writeback(folio);
+ lru_add_drain_all();
- uaddr = __gmap_translate(gmap, gaddr);
- if (IS_ERR_VALUE(uaddr))
- goto out;
- vma = vma_lookup(gmap->mm, uaddr);
- if (!vma)
- goto out;
- /*
- * Secure pages cannot be huge and userspace should not combine both.
- * In case userspace does it anyway this will result in an -EFAULT for
- * the unpack. The guest is thus never reaching secure mode. If
- * userspace is playing dirty tricky with mapping huge pages later
- * on this will result in a segmentation fault.
- */
- if (is_vm_hugetlb_page(vma))
- goto out;
-
- rc = -ENXIO;
- ptep = get_locked_pte(gmap->mm, uaddr, &ptelock);
- if (!ptep)
- goto out;
- if (pte_present(*ptep) && !(pte_val(*ptep) & _PAGE_INVALID) && pte_write(*ptep)) {
- folio = page_folio(pte_page(*ptep));
- rc = -EAGAIN;
- if (folio_test_large(folio)) {
- rc = -E2BIG;
- } else if (folio_trylock(folio)) {
- if (should_export_before_import(uvcb, gmap->mm))
- uv_convert_from_secure(PFN_PHYS(folio_pfn(folio)));
- rc = make_folio_secure(folio, uvcb);
+ if (!folio_test_large(folio))
+ return 0;
+
+ for (tried_splits = 0; tried_splits < 2; tried_splits++) {
+ struct address_space *mapping;
+ loff_t lstart, lend;
+ struct inode *inode;
+
+ folio_lock(folio);
+ rc = split_folio(folio);
+ if (rc != -EBUSY) {
folio_unlock(folio);
+ return rc;
}
/*
- * Once we drop the PTL, the folio may get unmapped and
- * freed immediately. We need a temporary reference.
+ * Splitting with -EBUSY can fail for various reasons, but we
+ * have to handle one case explicitly for now: some mappings
+ * don't allow for splitting dirty folios; writeback will
+ * mark them clean again, including marking all page table
+ * entries mapping the folio read-only, to catch future write
+ * attempts.
+ *
+ * While the system should be writing back dirty folios in the
+ * background, we obtained this folio by looking up a writable
+ * page table entry. On these problematic mappings, writable
+ * page table entries imply dirty folios, preventing the
+ * split in the first place.
+ *
+ * To prevent a livelock when trigger writeback manually and
+ * letting the caller look up the folio again in the page
+ * table (turning it dirty), immediately try to split again.
+ *
+ * This is only a problem for some mappings (e.g., XFS);
+ * mappings that do not support writeback (e.g., shmem) do not
+ * apply.
*/
- if (rc == -EAGAIN || rc == -E2BIG)
- folio_get(folio);
- }
- pte_unmap_unlock(ptep, ptelock);
-out:
- mmap_read_unlock(gmap->mm);
-
- switch (rc) {
- case -E2BIG:
- folio_lock(folio);
- rc = split_folio(folio);
- folio_unlock(folio);
- folio_put(folio);
-
- switch (rc) {
- case 0:
- /* Splitting succeeded, try again immediately. */
- goto again;
- case -EAGAIN:
- /* Additional folio references. */
- if (drain_lru(&drain_lru_called))
- goto again;
- return -EAGAIN;
- case -EBUSY:
- /* Unexpected race. */
- return -EAGAIN;
+ if (!folio_test_dirty(folio) || folio_test_anon(folio) ||
+ !folio->mapping || !mapping_can_writeback(folio->mapping)) {
+ folio_unlock(folio);
+ break;
}
- WARN_ON_ONCE(1);
- return -ENXIO;
- case -EAGAIN:
+
/*
- * If we are here because the UVC returned busy or partial
- * completion, this is just a useless check, but it is safe.
+ * Ideally, we'd only trigger writeback on this exact folio. But
+ * there is no easy way to do that, so we'll stabilize the
+ * mapping while we still hold the folio lock, so we can drop
+ * the folio lock to trigger writeback on the range currently
+ * covered by the folio instead.
*/
- folio_wait_writeback(folio);
- folio_put(folio);
- return -EAGAIN;
- case -EBUSY:
- /* Additional folio references. */
- if (drain_lru(&drain_lru_called))
- goto again;
- return -EAGAIN;
- case -ENXIO:
- if (gmap_fault(gmap, gaddr, FAULT_FLAG_WRITE))
- return -EFAULT;
- return -EAGAIN;
- }
- return rc;
-}
-EXPORT_SYMBOL_GPL(gmap_make_secure);
+ mapping = folio->mapping;
+ lstart = folio_pos(folio);
+ lend = lstart + folio_size(folio) - 1;
+ inode = igrab(mapping->host);
+ folio_unlock(folio);
-int gmap_convert_to_secure(struct gmap *gmap, unsigned long gaddr)
-{
- struct uv_cb_cts uvcb = {
- .header.cmd = UVC_CMD_CONV_TO_SEC_STOR,
- .header.len = sizeof(uvcb),
- .guest_handle = gmap->guest_handle,
- .gaddr = gaddr,
- };
+ if (unlikely(!inode))
+ break;
- return gmap_make_secure(gmap, gaddr, &uvcb);
+ filemap_write_and_wait_range(mapping, lstart, lend);
+ iput(mapping->host);
+ }
+ return -EAGAIN;
}
-EXPORT_SYMBOL_GPL(gmap_convert_to_secure);
-/**
- * gmap_destroy_page - Destroy a guest page.
- * @gmap: the gmap of the guest
- * @gaddr: the guest address to destroy
- *
- * An attempt will be made to destroy the given guest page. If the attempt
- * fails, an attempt is made to export the page. If both attempts fail, an
- * appropriate error is returned.
- */
-int gmap_destroy_page(struct gmap *gmap, unsigned long gaddr)
+int make_hva_secure(struct mm_struct *mm, unsigned long hva, struct uv_cb_header *uvcb)
{
struct vm_area_struct *vma;
struct folio_walk fw;
- unsigned long uaddr;
struct folio *folio;
int rc;
- rc = -EFAULT;
- mmap_read_lock(gmap->mm);
-
- uaddr = __gmap_translate(gmap, gaddr);
- if (IS_ERR_VALUE(uaddr))
- goto out;
- vma = vma_lookup(gmap->mm, uaddr);
- if (!vma)
- goto out;
- /*
- * Huge pages should not be able to become secure
- */
- if (is_vm_hugetlb_page(vma))
- goto out;
+ mmap_read_lock(mm);
+ vma = vma_lookup(mm, hva);
+ if (!vma) {
+ mmap_read_unlock(mm);
+ return -EFAULT;
+ }
+ folio = folio_walk_start(&fw, vma, hva, 0);
+ if (!folio) {
+ mmap_read_unlock(mm);
+ return -ENXIO;
+ }
- rc = 0;
- folio = folio_walk_start(&fw, vma, uaddr, 0);
- if (!folio)
- goto out;
- /*
- * See gmap_make_secure(): large folios cannot be secure. Small
- * folio implies FW_LEVEL_PTE.
- */
- if (folio_test_large(folio) || !pte_write(fw.pte))
- goto out_walk_end;
- rc = uv_destroy_folio(folio);
+ folio_get(folio);
/*
- * Fault handlers can race; it is possible that two CPUs will fault
- * on the same secure page. One CPU can destroy the page, reboot,
- * re-enter secure mode and import it, while the second CPU was
- * stuck at the beginning of the handler. At some point the second
- * CPU will be able to progress, and it will not be able to destroy
- * the page. In that case we do not want to terminate the process,
- * we instead try to export the page.
+ * Secure pages cannot be huge and userspace should not combine both.
+ * In case userspace does it anyway this will result in an -EFAULT for
+ * the unpack. The guest is thus never reaching secure mode.
+ * If userspace plays dirty tricks and decides to map huge pages at a
+ * later point in time, it will receive a segmentation fault or
+ * KVM_RUN will return -EFAULT.
*/
- if (rc)
- rc = uv_convert_from_secure_folio(folio);
-out_walk_end:
+ if (folio_test_hugetlb(folio))
+ rc = -EFAULT;
+ else if (folio_test_large(folio))
+ rc = -E2BIG;
+ else if (!pte_write(fw.pte) || (pte_val(fw.pte) & _PAGE_INVALID))
+ rc = -ENXIO;
+ else
+ rc = make_folio_secure(mm, folio, uvcb);
folio_walk_end(&fw, vma);
-out:
- mmap_read_unlock(gmap->mm);
+ mmap_read_unlock(mm);
+
+ if (rc == -E2BIG || rc == -EBUSY) {
+ rc = s390_wiggle_split_folio(mm, folio);
+ if (!rc)
+ rc = -EAGAIN;
+ }
+ folio_put(folio);
+
return rc;
}
-EXPORT_SYMBOL_GPL(gmap_destroy_page);
+EXPORT_SYMBOL_GPL(make_hva_secure);
/*
* To be called with the folio locked or with an extra reference! This will
- * prevent gmap_make_secure from touching the folio concurrently. Having 2
- * parallel arch_make_folio_accessible is fine, as the UV calls will become a
- * no-op if the folio is already exported.
+ * prevent kvm_s390_pv_make_secure() from touching the folio concurrently.
+ * Having 2 parallel arch_make_folio_accessible is fine, as the UV calls will
+ * become a no-op if the folio is already exported.
*/
int arch_make_folio_accessible(struct folio *folio)
{
int rc = 0;
- /* See gmap_make_secure(): large folios cannot be secure */
+ /* Large folios cannot be secure */
if (unlikely(folio_test_large(folio)))
return 0;
@@ -894,7 +842,12 @@ out_kobj:
device_initcall(uv_sysfs_init);
/*
- * Find the secret with the secret_id in the provided list.
+ * Locate a secret in the list by its id.
+ * @secret_id: search pattern.
+ * @list: ephemeral buffer space
+ * @secret: output data, containing the secret's metadata.
+ *
+ * Search for a secret with the given secret_id in the Ultravisor secret store.
*
* Context: might sleep.
*/
@@ -915,12 +868,15 @@ static int find_secret_in_page(const u8 secret_id[UV_SECRET_ID_LEN],
/*
* Do the actual search for `uv_get_secret_metadata`.
+ * @secret_id: search pattern.
+ * @list: ephemeral buffer space
+ * @secret: output data, containing the secret's metadata.
*
* Context: might sleep.
*/
-static int find_secret(const u8 secret_id[UV_SECRET_ID_LEN],
- struct uv_secret_list *list,
- struct uv_secret_list_item_hdr *secret)
+int uv_find_secret(const u8 secret_id[UV_SECRET_ID_LEN],
+ struct uv_secret_list *list,
+ struct uv_secret_list_item_hdr *secret)
{
u16 start_idx = 0;
u16 list_rc;
@@ -942,36 +898,7 @@ static int find_secret(const u8 secret_id[UV_SECRET_ID_LEN],
return -ENOENT;
}
-
-/**
- * uv_get_secret_metadata() - get secret metadata for a given secret id.
- * @secret_id: search pattern.
- * @secret: output data, containing the secret's metadata.
- *
- * Search for a secret with the given secret_id in the Ultravisor secret store.
- *
- * Context: might sleep.
- *
- * Return:
- * * %0: - Found entry; secret->idx and secret->type are valid.
- * * %ENOENT - No entry found.
- * * %ENODEV: - Not supported: UV not available or command not available.
- * * %EIO: - Other unexpected UV error.
- */
-int uv_get_secret_metadata(const u8 secret_id[UV_SECRET_ID_LEN],
- struct uv_secret_list_item_hdr *secret)
-{
- struct uv_secret_list *buf;
- int rc;
-
- buf = kzalloc(sizeof(*buf), GFP_KERNEL);
- if (!buf)
- return -ENOMEM;
- rc = find_secret(secret_id, buf, secret);
- kfree(buf);
- return rc;
-}
-EXPORT_SYMBOL_GPL(uv_get_secret_metadata);
+EXPORT_SYMBOL_GPL(uv_find_secret);
/**
* uv_retrieve_secret() - get the secret value for the secret index.
diff --git a/arch/s390/kernel/vdso.c b/arch/s390/kernel/vdso.c
index 598b512cde01..430feb1a5013 100644
--- a/arch/s390/kernel/vdso.c
+++ b/arch/s390/kernel/vdso.c
@@ -16,8 +16,8 @@
#include <linux/mm.h>
#include <linux/slab.h>
#include <linux/smp.h>
-#include <linux/time_namespace.h>
#include <linux/random.h>
+#include <linux/vdso_datastore.h>
#include <vdso/datapage.h>
#include <asm/vdso/vsyscall.h>
#include <asm/alternative.h>
@@ -26,85 +26,6 @@
extern char vdso64_start[], vdso64_end[];
extern char vdso32_start[], vdso32_end[];
-static struct vm_special_mapping vvar_mapping;
-
-static union vdso_data_store vdso_data_store __page_aligned_data;
-
-struct vdso_data *vdso_data = vdso_data_store.data;
-
-#ifdef CONFIG_TIME_NS
-struct vdso_data *arch_get_vdso_data(void *vvar_page)
-{
- return (struct vdso_data *)(vvar_page);
-}
-
-/*
- * The VVAR page layout depends on whether a task belongs to the root or
- * non-root time namespace. Whenever a task changes its namespace, the VVAR
- * page tables are cleared and then they will be re-faulted with a
- * corresponding layout.
- * See also the comment near timens_setup_vdso_data() for details.
- */
-int vdso_join_timens(struct task_struct *task, struct time_namespace *ns)
-{
- struct mm_struct *mm = task->mm;
- VMA_ITERATOR(vmi, mm, 0);
- struct vm_area_struct *vma;
-
- mmap_read_lock(mm);
- for_each_vma(vmi, vma) {
- if (!vma_is_special_mapping(vma, &vvar_mapping))
- continue;
- zap_vma_pages(vma);
- break;
- }
- mmap_read_unlock(mm);
- return 0;
-}
-#endif
-
-static vm_fault_t vvar_fault(const struct vm_special_mapping *sm,
- struct vm_area_struct *vma, struct vm_fault *vmf)
-{
- struct page *timens_page = find_timens_vvar_page(vma);
- unsigned long addr, pfn;
- vm_fault_t err;
-
- switch (vmf->pgoff) {
- case VVAR_DATA_PAGE_OFFSET:
- pfn = virt_to_pfn(vdso_data);
- if (timens_page) {
- /*
- * Fault in VVAR page too, since it will be accessed
- * to get clock data anyway.
- */
- addr = vmf->address + VVAR_TIMENS_PAGE_OFFSET * PAGE_SIZE;
- err = vmf_insert_pfn(vma, addr, pfn);
- if (unlikely(err & VM_FAULT_ERROR))
- return err;
- pfn = page_to_pfn(timens_page);
- }
- break;
-#ifdef CONFIG_TIME_NS
- case VVAR_TIMENS_PAGE_OFFSET:
- /*
- * If a task belongs to a time namespace then a namespace
- * specific VVAR is mapped with the VVAR_DATA_PAGE_OFFSET and
- * the real VVAR page is mapped with the VVAR_TIMENS_PAGE_OFFSET
- * offset.
- * See also the comment near timens_setup_vdso_data().
- */
- if (!timens_page)
- return VM_FAULT_SIGBUS;
- pfn = virt_to_pfn(vdso_data);
- break;
-#endif /* CONFIG_TIME_NS */
- default:
- return VM_FAULT_SIGBUS;
- }
- return vmf_insert_pfn(vma, vmf->address, pfn);
-}
-
static int vdso_mremap(const struct vm_special_mapping *sm,
struct vm_area_struct *vma)
{
@@ -112,11 +33,6 @@ static int vdso_mremap(const struct vm_special_mapping *sm,
return 0;
}
-static struct vm_special_mapping vvar_mapping = {
- .name = "[vvar]",
- .fault = vvar_fault,
-};
-
static struct vm_special_mapping vdso64_mapping = {
.name = "[vdso]",
.mremap = vdso_mremap,
@@ -142,7 +58,7 @@ static int map_vdso(unsigned long addr, unsigned long vdso_mapping_len)
struct vm_area_struct *vma;
int rc;
- BUILD_BUG_ON(VVAR_NR_PAGES != __VVAR_PAGES);
+ BUILD_BUG_ON(VDSO_NR_PAGES != __VDSO_PAGES);
if (mmap_write_lock_killable(mm))
return -EINTR;
@@ -157,17 +73,14 @@ static int map_vdso(unsigned long addr, unsigned long vdso_mapping_len)
rc = vvar_start;
if (IS_ERR_VALUE(vvar_start))
goto out;
- vma = _install_special_mapping(mm, vvar_start, VVAR_NR_PAGES*PAGE_SIZE,
- VM_READ|VM_MAYREAD|VM_IO|VM_DONTDUMP|
- VM_PFNMAP,
- &vvar_mapping);
+ vma = vdso_install_vvar_mapping(mm, vvar_start);
rc = PTR_ERR(vma);
if (IS_ERR(vma))
goto out;
- vdso_text_start = vvar_start + VVAR_NR_PAGES * PAGE_SIZE;
+ vdso_text_start = vvar_start + VDSO_NR_PAGES * PAGE_SIZE;
/* VM_MAYWRITE for COW so gdb can set breakpoints */
vma = _install_special_mapping(mm, vdso_text_start, vdso_text_len,
- VM_READ|VM_EXEC|
+ VM_READ|VM_EXEC|VM_SEALED_SYSMAP|
VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC,
vdso_mapping);
if (IS_ERR(vma)) {
@@ -220,7 +133,7 @@ unsigned long vdso_text_size(void)
unsigned long vdso_size(void)
{
- return vdso_text_size() + VVAR_NR_PAGES * PAGE_SIZE;
+ return vdso_text_size() + VDSO_NR_PAGES * PAGE_SIZE;
}
int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
diff --git a/arch/s390/kernel/vdso32/Makefile b/arch/s390/kernel/vdso32/Makefile
index 2c5afb88d298..1e4ddd1a683f 100644
--- a/arch/s390/kernel/vdso32/Makefile
+++ b/arch/s390/kernel/vdso32/Makefile
@@ -2,7 +2,7 @@
# List of files in the vdso
# Include the generic Makefile to check the built vdso.
-include $(srctree)/lib/vdso/Makefile
+include $(srctree)/lib/vdso/Makefile.include
obj-vdso32 = vdso_user_wrapper-32.o note-32.o
# Build rules
diff --git a/arch/s390/kernel/vdso32/vdso32.lds.S b/arch/s390/kernel/vdso32/vdso32.lds.S
index c916c4f73f76..9630d58c2080 100644
--- a/arch/s390/kernel/vdso32/vdso32.lds.S
+++ b/arch/s390/kernel/vdso32/vdso32.lds.S
@@ -6,16 +6,15 @@
#include <asm/page.h>
#include <asm/vdso.h>
+#include <vdso/datapage.h>
OUTPUT_FORMAT("elf32-s390", "elf32-s390", "elf32-s390")
OUTPUT_ARCH(s390:31-bit)
SECTIONS
{
- PROVIDE(_vdso_data = . - __VVAR_PAGES * PAGE_SIZE);
-#ifdef CONFIG_TIME_NS
- PROVIDE(_timens_data = _vdso_data + PAGE_SIZE);
-#endif
+ VDSO_VVAR_SYMS
+
. = SIZEOF_HEADERS;
.hash : { *(.hash) } :text
diff --git a/arch/s390/kernel/vdso64/Makefile b/arch/s390/kernel/vdso64/Makefile
index ad206f2068d8..d8f0df742809 100644
--- a/arch/s390/kernel/vdso64/Makefile
+++ b/arch/s390/kernel/vdso64/Makefile
@@ -2,7 +2,7 @@
# List of files in the vdso
# Include the generic Makefile to check the built vdso.
-include $(srctree)/lib/vdso/Makefile
+include $(srctree)/lib/vdso/Makefile.include
obj-vdso64 = vdso_user_wrapper.o note.o vgetrandom-chacha.o
obj-cvdso64 = vdso64_generic.o getcpu.o vgetrandom.o
VDSO_CFLAGS_REMOVE := -pg $(CC_FLAGS_FTRACE) $(CC_FLAGS_EXPOLINE)
diff --git a/arch/s390/kernel/vdso64/vdso64.lds.S b/arch/s390/kernel/vdso64/vdso64.lds.S
index ec42b7d9cb53..e4f6551ae898 100644
--- a/arch/s390/kernel/vdso64/vdso64.lds.S
+++ b/arch/s390/kernel/vdso64/vdso64.lds.S
@@ -7,17 +7,15 @@
#include <asm/vdso/vsyscall.h>
#include <asm/page.h>
#include <asm/vdso.h>
+#include <vdso/datapage.h>
OUTPUT_FORMAT("elf64-s390", "elf64-s390", "elf64-s390")
OUTPUT_ARCH(s390:64-bit)
SECTIONS
{
- PROVIDE(_vdso_data = . - __VVAR_PAGES * PAGE_SIZE);
- PROVIDE(_vdso_rng_data = _vdso_data + __VDSO_RND_DATA_OFFSET);
-#ifdef CONFIG_TIME_NS
- PROVIDE(_timens_data = _vdso_data + PAGE_SIZE);
-#endif
+ VDSO_VVAR_SYMS
+
. = SIZEOF_HEADERS;
.hash : { *(.hash) } :text
diff --git a/arch/s390/kernel/vmlinux.lds.S b/arch/s390/kernel/vmlinux.lds.S
index 377b9aaf8c92..1c606dfa595d 100644
--- a/arch/s390/kernel/vmlinux.lds.S
+++ b/arch/s390/kernel/vmlinux.lds.S
@@ -52,7 +52,6 @@ SECTIONS
SOFTIRQENTRY_TEXT
FTRACE_HOTPATCH_TRAMPOLINES_TEXT
*(.text.*_indirect_*)
- *(.fixup)
*(.gnu.warning)
. = ALIGN(PAGE_SIZE);
_etext = .; /* End of text section */
@@ -72,6 +71,13 @@ SECTIONS
. = ALIGN(PAGE_SIZE);
__end_ro_after_init = .;
+ . = ALIGN(8);
+ .skey_region_table : {
+ __skey_region_start = .;
+ KEEP(*(.skey_region))
+ __skey_region_end = .;
+ }
+
.data.rel.ro : {
*(.data.rel.ro .data.rel.ro.*)
}
diff --git a/arch/s390/kvm/Makefile b/arch/s390/kvm/Makefile
index 02217fb4ae10..9a723c48b05a 100644
--- a/arch/s390/kvm/Makefile
+++ b/arch/s390/kvm/Makefile
@@ -8,7 +8,7 @@ include $(srctree)/virt/kvm/Makefile.kvm
ccflags-y := -Ivirt/kvm -Iarch/s390/kvm
kvm-y += kvm-s390.o intercept.o interrupt.o priv.o sigp.o
-kvm-y += diag.o gaccess.o guestdbg.o vsie.o pv.o
+kvm-y += diag.o gaccess.o guestdbg.o vsie.o pv.o gmap-vsie.o
kvm-$(CONFIG_VFIO_PCI_ZDEV_KVM) += pci.o
obj-$(CONFIG_KVM) += kvm.o
diff --git a/arch/s390/kvm/diag.c b/arch/s390/kvm/diag.c
index 74f73141f9b9..53233dec8cad 100644
--- a/arch/s390/kvm/diag.c
+++ b/arch/s390/kvm/diag.c
@@ -11,12 +11,30 @@
#include <linux/kvm.h>
#include <linux/kvm_host.h>
#include <asm/gmap.h>
+#include <asm/gmap_helpers.h>
#include <asm/virtio-ccw.h>
#include "kvm-s390.h"
#include "trace.h"
#include "trace-s390.h"
#include "gaccess.h"
+static void do_discard_gfn_range(struct kvm_vcpu *vcpu, gfn_t gfn_start, gfn_t gfn_end)
+{
+ struct kvm_memslot_iter iter;
+ struct kvm_memory_slot *slot;
+ struct kvm_memslots *slots;
+ unsigned long start, end;
+
+ slots = kvm_vcpu_memslots(vcpu);
+
+ kvm_for_each_memslot_in_gfn_range(&iter, slots, gfn_start, gfn_end) {
+ slot = iter.slot;
+ start = __gfn_to_hva_memslot(slot, max(gfn_start, slot->base_gfn));
+ end = __gfn_to_hva_memslot(slot, min(gfn_end, slot->base_gfn + slot->npages));
+ gmap_helper_discard(vcpu->kvm->mm, start, end);
+ }
+}
+
static int diag_release_pages(struct kvm_vcpu *vcpu)
{
unsigned long start, end;
@@ -32,12 +50,13 @@ static int diag_release_pages(struct kvm_vcpu *vcpu)
VCPU_EVENT(vcpu, 5, "diag release pages %lX %lX", start, end);
+ mmap_read_lock(vcpu->kvm->mm);
/*
* We checked for start >= end above, so lets check for the
* fast path (no prefix swap page involved)
*/
if (end <= prefix || start >= prefix + 2 * PAGE_SIZE) {
- gmap_discard(vcpu->arch.gmap, start, end);
+ do_discard_gfn_range(vcpu, gpa_to_gfn(start), gpa_to_gfn(end));
} else {
/*
* This is slow path. gmap_discard will check for start
@@ -45,13 +64,14 @@ static int diag_release_pages(struct kvm_vcpu *vcpu)
* prefix and let gmap_discard make some of these calls
* NOPs.
*/
- gmap_discard(vcpu->arch.gmap, start, prefix);
+ do_discard_gfn_range(vcpu, gpa_to_gfn(start), gpa_to_gfn(prefix));
if (start <= prefix)
- gmap_discard(vcpu->arch.gmap, 0, PAGE_SIZE);
+ do_discard_gfn_range(vcpu, 0, 1);
if (end > prefix + PAGE_SIZE)
- gmap_discard(vcpu->arch.gmap, PAGE_SIZE, 2 * PAGE_SIZE);
- gmap_discard(vcpu->arch.gmap, prefix + 2 * PAGE_SIZE, end);
+ do_discard_gfn_range(vcpu, 1, 2);
+ do_discard_gfn_range(vcpu, gpa_to_gfn(prefix) + 2, gpa_to_gfn(end));
}
+ mmap_read_unlock(vcpu->kvm->mm);
return 0;
}
diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c
index 9816b0060fbe..21c2e61fece4 100644
--- a/arch/s390/kvm/gaccess.c
+++ b/arch/s390/kvm/gaccess.c
@@ -18,6 +18,8 @@
#include "kvm-s390.h"
#include "gaccess.h"
+#define GMAP_SHADOW_FAKE_TABLE 1ULL
+
/*
* vaddress union in order to easily decode a virtual address into its
* region first index, region second index etc. parts.
@@ -317,7 +319,7 @@ enum prot_type {
PROT_TYPE_DAT = 3,
PROT_TYPE_IEP = 4,
/* Dummy value for passing an initialized value when code != PGM_PROTECTION */
- PROT_NONE,
+ PROT_TYPE_DUMMY,
};
static int trans_exc_ending(struct kvm_vcpu *vcpu, int code, unsigned long gva, u8 ar,
@@ -333,7 +335,7 @@ static int trans_exc_ending(struct kvm_vcpu *vcpu, int code, unsigned long gva,
switch (code) {
case PGM_PROTECTION:
switch (prot) {
- case PROT_NONE:
+ case PROT_TYPE_DUMMY:
/* We should never get here, acts like termination */
WARN_ON_ONCE(1);
break;
@@ -803,7 +805,7 @@ static int guest_range_to_gpas(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar,
gpa = kvm_s390_real_to_abs(vcpu, ga);
if (!kvm_is_gpa_in_memslot(vcpu->kvm, gpa)) {
rc = PGM_ADDRESSING;
- prot = PROT_NONE;
+ prot = PROT_TYPE_DUMMY;
}
}
if (rc)
@@ -961,7 +963,7 @@ int access_guest_with_key(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar,
if (rc == PGM_PROTECTION)
prot = PROT_TYPE_KEYC;
else
- prot = PROT_NONE;
+ prot = PROT_TYPE_DUMMY;
rc = trans_exc_ending(vcpu, rc, ga, ar, mode, prot, terminate);
}
out_unlock:
@@ -1393,6 +1395,44 @@ shadow_pgt:
}
/**
+ * shadow_pgt_lookup() - find a shadow page table
+ * @sg: pointer to the shadow guest address space structure
+ * @saddr: the address in the shadow aguest address space
+ * @pgt: parent gmap address of the page table to get shadowed
+ * @dat_protection: if the pgtable is marked as protected by dat
+ * @fake: pgt references contiguous guest memory block, not a pgtable
+ *
+ * Returns 0 if the shadow page table was found and -EAGAIN if the page
+ * table was not found.
+ *
+ * Called with sg->mm->mmap_lock in read.
+ */
+static int shadow_pgt_lookup(struct gmap *sg, unsigned long saddr, unsigned long *pgt,
+ int *dat_protection, int *fake)
+{
+ unsigned long pt_index;
+ unsigned long *table;
+ struct page *page;
+ int rc;
+
+ spin_lock(&sg->guest_table_lock);
+ table = gmap_table_walk(sg, saddr, 1); /* get segment pointer */
+ if (table && !(*table & _SEGMENT_ENTRY_INVALID)) {
+ /* Shadow page tables are full pages (pte+pgste) */
+ page = pfn_to_page(*table >> PAGE_SHIFT);
+ pt_index = gmap_pgste_get_pgt_addr(page_to_virt(page));
+ *pgt = pt_index & ~GMAP_SHADOW_FAKE_TABLE;
+ *dat_protection = !!(*table & _SEGMENT_ENTRY_PROTECT);
+ *fake = !!(pt_index & GMAP_SHADOW_FAKE_TABLE);
+ rc = 0;
+ } else {
+ rc = -EAGAIN;
+ }
+ spin_unlock(&sg->guest_table_lock);
+ return rc;
+}
+
+/**
* kvm_s390_shadow_fault - handle fault on a shadow page table
* @vcpu: virtual cpu
* @sg: pointer to the shadow guest address space structure
@@ -1415,6 +1455,9 @@ int kvm_s390_shadow_fault(struct kvm_vcpu *vcpu, struct gmap *sg,
int dat_protection, fake;
int rc;
+ if (KVM_BUG_ON(!gmap_is_shadow(sg), vcpu->kvm))
+ return -EFAULT;
+
mmap_read_lock(sg->mm);
/*
* We don't want any guest-2 tables to change - so the parent
@@ -1423,7 +1466,7 @@ int kvm_s390_shadow_fault(struct kvm_vcpu *vcpu, struct gmap *sg,
*/
ipte_lock(vcpu->kvm);
- rc = gmap_shadow_pgt_lookup(sg, saddr, &pgt, &dat_protection, &fake);
+ rc = shadow_pgt_lookup(sg, saddr, &pgt, &dat_protection, &fake);
if (rc)
rc = kvm_s390_shadow_tables(sg, saddr, &pgt, &dat_protection,
&fake);
diff --git a/arch/s390/kvm/gmap-vsie.c b/arch/s390/kvm/gmap-vsie.c
new file mode 100644
index 000000000000..56ef153eb8fe
--- /dev/null
+++ b/arch/s390/kvm/gmap-vsie.c
@@ -0,0 +1,141 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Guest memory management for KVM/s390 nested VMs.
+ *
+ * Copyright IBM Corp. 2008, 2020, 2024
+ *
+ * Author(s): Claudio Imbrenda <imbrenda@linux.ibm.com>
+ * Martin Schwidefsky <schwidefsky@de.ibm.com>
+ * David Hildenbrand <david@redhat.com>
+ * Janosch Frank <frankja@linux.vnet.ibm.com>
+ */
+
+#include <linux/compiler.h>
+#include <linux/kvm.h>
+#include <linux/kvm_host.h>
+#include <linux/pgtable.h>
+#include <linux/pagemap.h>
+#include <linux/mman.h>
+
+#include <asm/lowcore.h>
+#include <asm/gmap.h>
+#include <asm/uv.h>
+
+#include "kvm-s390.h"
+
+/**
+ * gmap_find_shadow - find a specific asce in the list of shadow tables
+ * @parent: pointer to the parent gmap
+ * @asce: ASCE for which the shadow table is created
+ * @edat_level: edat level to be used for the shadow translation
+ *
+ * Returns the pointer to a gmap if a shadow table with the given asce is
+ * already available, ERR_PTR(-EAGAIN) if another one is just being created,
+ * otherwise NULL
+ *
+ * Context: Called with parent->shadow_lock held
+ */
+static struct gmap *gmap_find_shadow(struct gmap *parent, unsigned long asce, int edat_level)
+{
+ struct gmap *sg;
+
+ lockdep_assert_held(&parent->shadow_lock);
+ list_for_each_entry(sg, &parent->children, list) {
+ if (!gmap_shadow_valid(sg, asce, edat_level))
+ continue;
+ if (!sg->initialized)
+ return ERR_PTR(-EAGAIN);
+ refcount_inc(&sg->ref_count);
+ return sg;
+ }
+ return NULL;
+}
+
+/**
+ * gmap_shadow - create/find a shadow guest address space
+ * @parent: pointer to the parent gmap
+ * @asce: ASCE for which the shadow table is created
+ * @edat_level: edat level to be used for the shadow translation
+ *
+ * The pages of the top level page table referred by the asce parameter
+ * will be set to read-only and marked in the PGSTEs of the kvm process.
+ * The shadow table will be removed automatically on any change to the
+ * PTE mapping for the source table.
+ *
+ * Returns a guest address space structure, ERR_PTR(-ENOMEM) if out of memory,
+ * ERR_PTR(-EAGAIN) if the caller has to retry and ERR_PTR(-EFAULT) if the
+ * parent gmap table could not be protected.
+ */
+struct gmap *gmap_shadow(struct gmap *parent, unsigned long asce, int edat_level)
+{
+ struct gmap *sg, *new;
+ unsigned long limit;
+ int rc;
+
+ if (KVM_BUG_ON(parent->mm->context.allow_gmap_hpage_1m, (struct kvm *)parent->private) ||
+ KVM_BUG_ON(gmap_is_shadow(parent), (struct kvm *)parent->private))
+ return ERR_PTR(-EFAULT);
+ spin_lock(&parent->shadow_lock);
+ sg = gmap_find_shadow(parent, asce, edat_level);
+ spin_unlock(&parent->shadow_lock);
+ if (sg)
+ return sg;
+ /* Create a new shadow gmap */
+ limit = -1UL >> (33 - (((asce & _ASCE_TYPE_MASK) >> 2) * 11));
+ if (asce & _ASCE_REAL_SPACE)
+ limit = -1UL;
+ new = gmap_alloc(limit);
+ if (!new)
+ return ERR_PTR(-ENOMEM);
+ new->mm = parent->mm;
+ new->parent = gmap_get(parent);
+ new->private = parent->private;
+ new->orig_asce = asce;
+ new->edat_level = edat_level;
+ new->initialized = false;
+ spin_lock(&parent->shadow_lock);
+ /* Recheck if another CPU created the same shadow */
+ sg = gmap_find_shadow(parent, asce, edat_level);
+ if (sg) {
+ spin_unlock(&parent->shadow_lock);
+ gmap_free(new);
+ return sg;
+ }
+ if (asce & _ASCE_REAL_SPACE) {
+ /* only allow one real-space gmap shadow */
+ list_for_each_entry(sg, &parent->children, list) {
+ if (sg->orig_asce & _ASCE_REAL_SPACE) {
+ spin_lock(&sg->guest_table_lock);
+ gmap_unshadow(sg);
+ spin_unlock(&sg->guest_table_lock);
+ list_del(&sg->list);
+ gmap_put(sg);
+ break;
+ }
+ }
+ }
+ refcount_set(&new->ref_count, 2);
+ list_add(&new->list, &parent->children);
+ if (asce & _ASCE_REAL_SPACE) {
+ /* nothing to protect, return right away */
+ new->initialized = true;
+ spin_unlock(&parent->shadow_lock);
+ return new;
+ }
+ spin_unlock(&parent->shadow_lock);
+ /* protect after insertion, so it will get properly invalidated */
+ mmap_read_lock(parent->mm);
+ rc = __kvm_s390_mprotect_many(parent, asce & _ASCE_ORIGIN,
+ ((asce & _ASCE_TABLE_LENGTH) + 1),
+ PROT_READ, GMAP_NOTIFY_SHADOW);
+ mmap_read_unlock(parent->mm);
+ spin_lock(&parent->shadow_lock);
+ new->initialized = true;
+ if (rc) {
+ list_del(&new->list);
+ gmap_free(new);
+ new = ERR_PTR(rc);
+ }
+ spin_unlock(&parent->shadow_lock);
+ return new;
+}
diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c
index 5bbaadf75dc6..c7908950c1f4 100644
--- a/arch/s390/kvm/intercept.c
+++ b/arch/s390/kvm/intercept.c
@@ -94,7 +94,7 @@ static int handle_validity(struct kvm_vcpu *vcpu)
vcpu->stat.exit_validity++;
trace_kvm_s390_intercept_validity(vcpu, viwhy);
- KVM_EVENT(3, "validity intercept 0x%x for pid %u (kvm 0x%pK)", viwhy,
+ KVM_EVENT(3, "validity intercept 0x%x for pid %u (kvm 0x%p)", viwhy,
current->pid, vcpu->kvm);
/* do not warn on invalid runtime instrumentation mode */
@@ -367,7 +367,7 @@ static int handle_mvpg_pei(struct kvm_vcpu *vcpu)
reg2, &srcaddr, GACC_FETCH, 0);
if (rc)
return kvm_s390_inject_prog_cond(vcpu, rc);
- rc = gmap_fault(vcpu->arch.gmap, srcaddr, 0);
+ rc = kvm_s390_handle_dat_fault(vcpu, srcaddr, 0);
if (rc != 0)
return rc;
@@ -376,7 +376,7 @@ static int handle_mvpg_pei(struct kvm_vcpu *vcpu)
reg1, &dstaddr, GACC_STORE, 0);
if (rc)
return kvm_s390_inject_prog_cond(vcpu, rc);
- rc = gmap_fault(vcpu->arch.gmap, dstaddr, FAULT_FLAG_WRITE);
+ rc = kvm_s390_handle_dat_fault(vcpu, dstaddr, FOLL_WRITE);
if (rc != 0)
return rc;
@@ -544,12 +544,12 @@ static int handle_pv_uvc(struct kvm_vcpu *vcpu)
guest_uvcb->header.cmd);
return 0;
}
- rc = gmap_make_secure(vcpu->arch.gmap, uvcb.gaddr, &uvcb);
+ rc = kvm_s390_pv_make_secure(vcpu->kvm, uvcb.gaddr, &uvcb);
/*
* If the unpin did not succeed, the guest will exit again for the UVC
* and we will retry the unpin.
*/
- if (rc == -EINVAL)
+ if (rc == -EINVAL || rc == -ENXIO)
return 0;
/*
* If we got -EAGAIN here, we simply return it. It will eventually
@@ -652,10 +652,8 @@ int kvm_handle_sie_intercept(struct kvm_vcpu *vcpu)
break;
case ICPT_PV_PREF:
rc = 0;
- gmap_convert_to_secure(vcpu->arch.gmap,
- kvm_s390_get_prefix(vcpu));
- gmap_convert_to_secure(vcpu->arch.gmap,
- kvm_s390_get_prefix(vcpu) + PAGE_SIZE);
+ kvm_s390_pv_convert_to_secure(vcpu->kvm, kvm_s390_get_prefix(vcpu));
+ kvm_s390_pv_convert_to_secure(vcpu->kvm, kvm_s390_get_prefix(vcpu) + PAGE_SIZE);
break;
default:
return -EOPNOTSUPP;
diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index d4f031e086fc..2a92a8b9e4c2 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -10,9 +10,11 @@
#define KMSG_COMPONENT "kvm-s390"
#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+#include <linux/cpufeature.h>
#include <linux/interrupt.h>
#include <linux/kvm_host.h>
#include <linux/hrtimer.h>
+#include <linux/export.h>
#include <linux/mmu_context.h>
#include <linux/nospec.h>
#include <linux/signal.h>
@@ -577,7 +579,7 @@ static int __write_machine_check(struct kvm_vcpu *vcpu,
/* take care of lazy register loading */
kvm_s390_fpu_store(vcpu->run);
save_access_regs(vcpu->run->s.regs.acrs);
- if (MACHINE_HAS_GS && vcpu->arch.gs_enabled)
+ if (cpu_has_gs() && vcpu->arch.gs_enabled)
save_gs_cb(current->thread.gs_cb);
/* Extended save area */
@@ -948,8 +950,7 @@ static int __must_check __deliver_prog(struct kvm_vcpu *vcpu)
rc |= put_guest_lc(vcpu, ilen, (u16 *) __LC_PGM_ILC);
rc |= put_guest_lc(vcpu, vcpu->arch.sie_block->gbea,
(u64 *) __LC_PGM_LAST_BREAK);
- rc |= put_guest_lc(vcpu, pgm_info.code,
- (u16 *)__LC_PGM_INT_CODE);
+ rc |= put_guest_lc(vcpu, pgm_info.code, (u16 *)__LC_PGM_CODE);
rc |= write_guest_lc(vcpu, __LC_PGM_OLD_PSW,
&vcpu->arch.sie_block->gpsw, sizeof(psw_t));
rc |= read_guest_lc(vcpu, __LC_PGM_NEW_PSW,
@@ -2893,7 +2894,8 @@ int kvm_set_routing_entry(struct kvm *kvm,
struct kvm_kernel_irq_routing_entry *e,
const struct kvm_irq_routing_entry *ue)
{
- u64 uaddr;
+ u64 uaddr_s, uaddr_i;
+ int idx;
switch (ue->type) {
/* we store the userspace addresses instead of the guest addresses */
@@ -2901,14 +2903,16 @@ int kvm_set_routing_entry(struct kvm *kvm,
if (kvm_is_ucontrol(kvm))
return -EINVAL;
e->set = set_adapter_int;
- uaddr = gmap_translate(kvm->arch.gmap, ue->u.adapter.summary_addr);
- if (uaddr == -EFAULT)
- return -EFAULT;
- e->adapter.summary_addr = uaddr;
- uaddr = gmap_translate(kvm->arch.gmap, ue->u.adapter.ind_addr);
- if (uaddr == -EFAULT)
+
+ idx = srcu_read_lock(&kvm->srcu);
+ uaddr_s = gpa_to_hva(kvm, ue->u.adapter.summary_addr);
+ uaddr_i = gpa_to_hva(kvm, ue->u.adapter.ind_addr);
+ srcu_read_unlock(&kvm->srcu, idx);
+
+ if (kvm_is_error_hva(uaddr_s) || kvm_is_error_hva(uaddr_i))
return -EFAULT;
- e->adapter.ind_addr = uaddr;
+ e->adapter.summary_addr = uaddr_s;
+ e->adapter.ind_addr = uaddr_i;
e->adapter.summary_offset = ue->u.adapter.summary_offset;
e->adapter.ind_offset = ue->u.adapter.ind_offset;
e->adapter.adapter_id = ue->u.adapter.adapter_id;
@@ -3158,7 +3162,7 @@ void kvm_s390_gisa_clear(struct kvm *kvm)
if (!gi->origin)
return;
gisa_clear_ipm(gi->origin);
- VM_EVENT(kvm, 3, "gisa 0x%pK cleared", gi->origin);
+ VM_EVENT(kvm, 3, "gisa 0x%p cleared", gi->origin);
}
void kvm_s390_gisa_init(struct kvm *kvm)
@@ -3171,11 +3175,10 @@ void kvm_s390_gisa_init(struct kvm *kvm)
gi->alert.mask = 0;
spin_lock_init(&gi->alert.ref_lock);
gi->expires = 50 * 1000; /* 50 usec */
- hrtimer_init(&gi->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
- gi->timer.function = gisa_vcpu_kicker;
+ hrtimer_setup(&gi->timer, gisa_vcpu_kicker, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
memset(gi->origin, 0, sizeof(struct kvm_s390_gisa));
gi->origin->next_alert = (u32)virt_to_phys(gi->origin);
- VM_EVENT(kvm, 3, "gisa 0x%pK initialized", gi->origin);
+ VM_EVENT(kvm, 3, "gisa 0x%p initialized", gi->origin);
}
void kvm_s390_gisa_enable(struct kvm *kvm)
@@ -3216,7 +3219,7 @@ void kvm_s390_gisa_destroy(struct kvm *kvm)
process_gib_alert_list();
hrtimer_cancel(&gi->timer);
gi->origin = NULL;
- VM_EVENT(kvm, 3, "gisa 0x%pK destroyed", gisa);
+ VM_EVENT(kvm, 3, "gisa 0x%p destroyed", gisa);
}
void kvm_s390_gisa_disable(struct kvm *kvm)
@@ -3465,7 +3468,7 @@ int __init kvm_s390_gib_init(u8 nisc)
}
}
- KVM_EVENT(3, "gib 0x%pK (nisc=%d) initialized", gib, gib->nisc);
+ KVM_EVENT(3, "gib 0x%p (nisc=%d) initialized", gib, gib->nisc);
goto out;
out_unreg_gal:
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index d8080c27d45b..bf6fa8b9ca73 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -14,6 +14,7 @@
#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
#include <linux/compiler.h>
+#include <linux/export.h>
#include <linux/err.h>
#include <linux/fs.h>
#include <linux/hrtimer.h>
@@ -23,6 +24,7 @@
#include <linux/mman.h>
#include <linux/module.h>
#include <linux/moduleparam.h>
+#include <linux/cpufeature.h>
#include <linux/random.h>
#include <linux/slab.h>
#include <linux/timer.h>
@@ -36,8 +38,10 @@
#include <asm/access-regs.h>
#include <asm/asm-offsets.h>
#include <asm/lowcore.h>
+#include <asm/machine.h>
#include <asm/stp.h>
#include <asm/gmap.h>
+#include <asm/gmap_helpers.h>
#include <asm/nmi.h>
#include <asm/isc.h>
#include <asm/sclp.h>
@@ -442,13 +446,13 @@ static void __init kvm_s390_cpu_feat_init(void)
if (test_facility(201)) /* PFCR */
pfcr_query(&kvm_s390_available_subfunc.pfcr);
- if (MACHINE_HAS_ESOP)
+ if (machine_has_esop())
allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP);
/*
* We need SIE support, ESOP (PROT_READ protection for gmap_shadow),
* 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing).
*/
- if (!sclp.has_sief2 || !MACHINE_HAS_ESOP || !sclp.has_64bscao ||
+ if (!sclp.has_sief2 || !machine_has_esop() || !sclp.has_64bscao ||
!test_facility(3) || !nested)
return;
allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2);
@@ -637,7 +641,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
r = min_t(unsigned int, num_online_cpus(), r);
break;
case KVM_CAP_S390_COW:
- r = MACHINE_HAS_ESOP;
+ r = machine_has_esop();
break;
case KVM_CAP_S390_VECTOR_REGISTERS:
r = test_facility(129);
@@ -1019,7 +1023,7 @@ static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *att
}
mutex_unlock(&kvm->lock);
VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit);
- VM_EVENT(kvm, 3, "New guest asce: 0x%pK",
+ VM_EVENT(kvm, 3, "New guest asce: 0x%p",
(void *) kvm->arch.gmap->asce);
break;
}
@@ -2671,7 +2675,9 @@ static int kvm_s390_handle_pv(struct kvm *kvm, struct kvm_pv_cmd *cmd)
if (r)
break;
- r = s390_disable_cow_sharing();
+ mmap_write_lock(kvm->mm);
+ r = gmap_helper_disable_cow_sharing();
+ mmap_write_unlock(kvm->mm);
if (r)
break;
@@ -3395,7 +3401,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
/* we emulate STHYI in kvm */
set_kvm_facility(kvm->arch.model.fac_mask, 74);
set_kvm_facility(kvm->arch.model.fac_list, 74);
- if (MACHINE_HAS_TLB_GUEST) {
+ if (machine_has_tlb_guest()) {
set_kvm_facility(kvm->arch.model.fac_mask, 147);
set_kvm_facility(kvm->arch.model.fac_list, 147);
}
@@ -3428,8 +3434,20 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
VM_EVENT(kvm, 3, "vm created with type %lu", type);
if (type & KVM_VM_S390_UCONTROL) {
+ struct kvm_userspace_memory_region2 fake_memslot = {
+ .slot = KVM_S390_UCONTROL_MEMSLOT,
+ .guest_phys_addr = 0,
+ .userspace_addr = 0,
+ .memory_size = ALIGN_DOWN(TASK_SIZE, _SEGMENT_SIZE),
+ .flags = 0,
+ };
+
kvm->arch.gmap = NULL;
kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT;
+ /* one flat fake memslot covering the whole address-space */
+ mutex_lock(&kvm->slots_lock);
+ KVM_BUG_ON(kvm_set_internal_memslot(kvm, &fake_memslot), kvm);
+ mutex_unlock(&kvm->slots_lock);
} else {
if (sclp.hamax == U64_MAX)
kvm->arch.mem_limit = TASK_SIZE_MAX;
@@ -3451,7 +3469,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
kvm_s390_gisa_init(kvm);
INIT_LIST_HEAD(&kvm->arch.pv.need_cleanup);
kvm->arch.pv.set_aside = NULL;
- KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
+ KVM_EVENT(3, "vm 0x%p created by pid %u", kvm, current->pid);
return 0;
out_err:
@@ -3514,7 +3532,7 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
kvm_s390_destroy_adapters(kvm);
kvm_s390_clear_float_irqs(kvm);
kvm_s390_vsie_destroy(kvm);
- KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
+ KVM_EVENT(3, "vm 0x%p destroyed", kvm);
}
/* Section: vcpu related */
@@ -3635,7 +3653,7 @@ static int sca_switch_to_extended(struct kvm *kvm)
free_page((unsigned long)old_sca);
- VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)",
+ VM_EVENT(kvm, 2, "Switched to ESCA (0x%p -> 0x%p)",
old_sca, kvm->arch.sca);
return 0;
}
@@ -3879,8 +3897,8 @@ static int kvm_s390_vcpu_setup(struct kvm_vcpu *vcpu)
kvm_s390_vcpu_setup_model(vcpu);
- /* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */
- if (MACHINE_HAS_ESOP)
+ /* pgste_set_pte has special handling for !machine_has_esop() */
+ if (machine_has_esop())
vcpu->arch.sie_block->ecb |= ECB_HOSTPROTINT;
if (test_kvm_facility(vcpu->kvm, 9))
vcpu->arch.sie_block->ecb |= ECB_SRSI;
@@ -3930,8 +3948,8 @@ static int kvm_s390_vcpu_setup(struct kvm_vcpu *vcpu)
if (rc)
return rc;
}
- hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
- vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
+ hrtimer_setup(&vcpu->arch.ckc_timer, kvm_s390_idle_wakeup, CLOCK_MONOTONIC,
+ HRTIMER_MODE_REL);
vcpu->arch.sie_block->hpid = HPID_KVM;
@@ -4012,7 +4030,7 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
goto out_free_sie_block;
}
- VM_EVENT(vcpu->kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK",
+ VM_EVENT(vcpu->kvm, 3, "create cpu %d at 0x%p, sie block at 0x%p",
vcpu->vcpu_id, vcpu, vcpu->arch.sie_block);
trace_kvm_s390_create_vcpu(vcpu->vcpu_id, vcpu, vcpu->arch.sie_block);
@@ -4498,6 +4516,75 @@ static bool ibs_enabled(struct kvm_vcpu *vcpu)
return kvm_s390_test_cpuflags(vcpu, CPUSTAT_IBS);
}
+static int __kvm_s390_fixup_fault_sync(struct gmap *gmap, gpa_t gaddr, unsigned int flags)
+{
+ struct kvm *kvm = gmap->private;
+ gfn_t gfn = gpa_to_gfn(gaddr);
+ bool unlocked;
+ hva_t vmaddr;
+ gpa_t tmp;
+ int rc;
+
+ if (kvm_is_ucontrol(kvm)) {
+ tmp = __gmap_translate(gmap, gaddr);
+ gfn = gpa_to_gfn(tmp);
+ }
+
+ vmaddr = gfn_to_hva(kvm, gfn);
+ rc = fixup_user_fault(gmap->mm, vmaddr, FAULT_FLAG_WRITE, &unlocked);
+ if (!rc)
+ rc = __gmap_link(gmap, gaddr, vmaddr);
+ return rc;
+}
+
+/**
+ * __kvm_s390_mprotect_many() - Apply specified protection to guest pages
+ * @gmap: the gmap of the guest
+ * @gpa: the starting guest address
+ * @npages: how many pages to protect
+ * @prot: indicates access rights: PROT_NONE, PROT_READ or PROT_WRITE
+ * @bits: pgste notification bits to set
+ *
+ * Returns: 0 in case of success, < 0 in case of error - see gmap_protect_one()
+ *
+ * Context: kvm->srcu and gmap->mm need to be held in read mode
+ */
+int __kvm_s390_mprotect_many(struct gmap *gmap, gpa_t gpa, u8 npages, unsigned int prot,
+ unsigned long bits)
+{
+ unsigned int fault_flag = (prot & PROT_WRITE) ? FAULT_FLAG_WRITE : 0;
+ gpa_t end = gpa + npages * PAGE_SIZE;
+ int rc;
+
+ for (; gpa < end; gpa = ALIGN(gpa + 1, rc)) {
+ rc = gmap_protect_one(gmap, gpa, prot, bits);
+ if (rc == -EAGAIN) {
+ __kvm_s390_fixup_fault_sync(gmap, gpa, fault_flag);
+ rc = gmap_protect_one(gmap, gpa, prot, bits);
+ }
+ if (rc < 0)
+ return rc;
+ }
+
+ return 0;
+}
+
+static int kvm_s390_mprotect_notify_prefix(struct kvm_vcpu *vcpu)
+{
+ gpa_t gaddr = kvm_s390_get_prefix(vcpu);
+ int idx, rc;
+
+ idx = srcu_read_lock(&vcpu->kvm->srcu);
+ mmap_read_lock(vcpu->arch.gmap->mm);
+
+ rc = __kvm_s390_mprotect_many(vcpu->arch.gmap, gaddr, 2, PROT_WRITE, GMAP_NOTIFY_MPROT);
+
+ mmap_read_unlock(vcpu->arch.gmap->mm);
+ srcu_read_unlock(&vcpu->kvm->srcu, idx);
+
+ return rc;
+}
+
static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
{
retry:
@@ -4513,9 +4600,8 @@ retry:
*/
if (kvm_check_request(KVM_REQ_REFRESH_GUEST_PREFIX, vcpu)) {
int rc;
- rc = gmap_mprotect_notify(vcpu->arch.gmap,
- kvm_s390_get_prefix(vcpu),
- PAGE_SIZE * 2, PROT_WRITE);
+
+ rc = kvm_s390_mprotect_notify_prefix(vcpu);
if (rc) {
kvm_make_request(KVM_REQ_REFRESH_GUEST_PREFIX, vcpu);
return rc;
@@ -4766,11 +4852,112 @@ static int vcpu_post_run_addressing_exception(struct kvm_vcpu *vcpu)
return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
}
+static void kvm_s390_assert_primary_as(struct kvm_vcpu *vcpu)
+{
+ KVM_BUG(current->thread.gmap_teid.as != PSW_BITS_AS_PRIMARY, vcpu->kvm,
+ "Unexpected program interrupt 0x%x, TEID 0x%016lx",
+ current->thread.gmap_int_code, current->thread.gmap_teid.val);
+}
+
+/*
+ * __kvm_s390_handle_dat_fault() - handle a dat fault for the gmap of a vcpu
+ * @vcpu: the vCPU whose gmap is to be fixed up
+ * @gfn: the guest frame number used for memslots (including fake memslots)
+ * @gaddr: the gmap address, does not have to match @gfn for ucontrol gmaps
+ * @flags: FOLL_* flags
+ *
+ * Return: 0 on success, < 0 in case of error.
+ * Context: The mm lock must not be held before calling. May sleep.
+ */
+int __kvm_s390_handle_dat_fault(struct kvm_vcpu *vcpu, gfn_t gfn, gpa_t gaddr, unsigned int flags)
+{
+ struct kvm_memory_slot *slot;
+ unsigned int fault_flags;
+ bool writable, unlocked;
+ unsigned long vmaddr;
+ struct page *page;
+ kvm_pfn_t pfn;
+ int rc;
+
+ slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn);
+ if (!slot || slot->flags & KVM_MEMSLOT_INVALID)
+ return vcpu_post_run_addressing_exception(vcpu);
+
+ fault_flags = flags & FOLL_WRITE ? FAULT_FLAG_WRITE : 0;
+ if (vcpu->arch.gmap->pfault_enabled)
+ flags |= FOLL_NOWAIT;
+ vmaddr = __gfn_to_hva_memslot(slot, gfn);
+
+try_again:
+ pfn = __kvm_faultin_pfn(slot, gfn, flags, &writable, &page);
+
+ /* Access outside memory, inject addressing exception */
+ if (is_noslot_pfn(pfn))
+ return vcpu_post_run_addressing_exception(vcpu);
+ /* Signal pending: try again */
+ if (pfn == KVM_PFN_ERR_SIGPENDING)
+ return -EAGAIN;
+
+ /* Needs I/O, try to setup async pfault (only possible with FOLL_NOWAIT) */
+ if (pfn == KVM_PFN_ERR_NEEDS_IO) {
+ trace_kvm_s390_major_guest_pfault(vcpu);
+ if (kvm_arch_setup_async_pf(vcpu))
+ return 0;
+ vcpu->stat.pfault_sync++;
+ /* Could not setup async pfault, try again synchronously */
+ flags &= ~FOLL_NOWAIT;
+ goto try_again;
+ }
+ /* Any other error */
+ if (is_error_pfn(pfn))
+ return -EFAULT;
+
+ /* Success */
+ mmap_read_lock(vcpu->arch.gmap->mm);
+ /* Mark the userspace PTEs as young and/or dirty, to avoid page fault loops */
+ rc = fixup_user_fault(vcpu->arch.gmap->mm, vmaddr, fault_flags, &unlocked);
+ if (!rc)
+ rc = __gmap_link(vcpu->arch.gmap, gaddr, vmaddr);
+ scoped_guard(spinlock, &vcpu->kvm->mmu_lock) {
+ kvm_release_faultin_page(vcpu->kvm, page, false, writable);
+ }
+ mmap_read_unlock(vcpu->arch.gmap->mm);
+ return rc;
+}
+
+static int vcpu_dat_fault_handler(struct kvm_vcpu *vcpu, unsigned long gaddr, unsigned int flags)
+{
+ unsigned long gaddr_tmp;
+ gfn_t gfn;
+
+ gfn = gpa_to_gfn(gaddr);
+ if (kvm_is_ucontrol(vcpu->kvm)) {
+ /*
+ * This translates the per-vCPU guest address into a
+ * fake guest address, which can then be used with the
+ * fake memslots that are identity mapping userspace.
+ * This allows ucontrol VMs to use the normal fault
+ * resolution path, like normal VMs.
+ */
+ mmap_read_lock(vcpu->arch.gmap->mm);
+ gaddr_tmp = __gmap_translate(vcpu->arch.gmap, gaddr);
+ mmap_read_unlock(vcpu->arch.gmap->mm);
+ if (gaddr_tmp == -EFAULT) {
+ vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
+ vcpu->run->s390_ucontrol.trans_exc_code = gaddr;
+ vcpu->run->s390_ucontrol.pgm_code = PGM_SEGMENT_TRANSLATION;
+ return -EREMOTE;
+ }
+ gfn = gpa_to_gfn(gaddr_tmp);
+ }
+ return __kvm_s390_handle_dat_fault(vcpu, gfn, gaddr, flags);
+}
+
static int vcpu_post_run_handle_fault(struct kvm_vcpu *vcpu)
{
unsigned int flags = 0;
unsigned long gaddr;
- int rc = 0;
+ int rc;
gaddr = current->thread.gmap_teid.addr * PAGE_SIZE;
if (kvm_s390_cur_gmap_fault_is_write())
@@ -4780,30 +4967,16 @@ static int vcpu_post_run_handle_fault(struct kvm_vcpu *vcpu)
case 0:
vcpu->stat.exit_null++;
break;
- case PGM_NON_SECURE_STORAGE_ACCESS:
- KVM_BUG(current->thread.gmap_teid.as != PSW_BITS_AS_PRIMARY, vcpu->kvm,
- "Unexpected program interrupt 0x%x, TEID 0x%016lx",
- current->thread.gmap_int_code, current->thread.gmap_teid.val);
- /*
- * This is normal operation; a page belonging to a protected
- * guest has not been imported yet. Try to import the page into
- * the protected guest.
- */
- if (gmap_convert_to_secure(vcpu->arch.gmap, gaddr) == -EINVAL)
- send_sig(SIGSEGV, current, 0);
- break;
case PGM_SECURE_STORAGE_ACCESS:
case PGM_SECURE_STORAGE_VIOLATION:
- KVM_BUG(current->thread.gmap_teid.as != PSW_BITS_AS_PRIMARY, vcpu->kvm,
- "Unexpected program interrupt 0x%x, TEID 0x%016lx",
- current->thread.gmap_int_code, current->thread.gmap_teid.val);
+ kvm_s390_assert_primary_as(vcpu);
/*
* This can happen after a reboot with asynchronous teardown;
* the new guest (normal or protected) will run on top of the
* previous protected guest. The old pages need to be destroyed
* so the new guest can use them.
*/
- if (gmap_destroy_page(vcpu->arch.gmap, gaddr)) {
+ if (kvm_s390_pv_destroy_page(vcpu->kvm, gaddr)) {
/*
* Either KVM messed up the secure guest mapping or the
* same page is mapped into multiple secure guests.
@@ -4818,6 +4991,20 @@ static int vcpu_post_run_handle_fault(struct kvm_vcpu *vcpu)
send_sig(SIGSEGV, current, 0);
}
break;
+ case PGM_NON_SECURE_STORAGE_ACCESS:
+ kvm_s390_assert_primary_as(vcpu);
+ /*
+ * This is normal operation; a page belonging to a protected
+ * guest has not been imported yet. Try to import the page into
+ * the protected guest.
+ */
+ rc = kvm_s390_pv_convert_to_secure(vcpu->kvm, gaddr);
+ if (rc == -EINVAL)
+ send_sig(SIGSEGV, current, 0);
+ if (rc != -ENXIO)
+ break;
+ flags = FAULT_FLAG_WRITE;
+ fallthrough;
case PGM_PROTECTION:
case PGM_SEGMENT_TRANSLATION:
case PGM_PAGE_TRANSLATION:
@@ -4825,40 +5012,15 @@ static int vcpu_post_run_handle_fault(struct kvm_vcpu *vcpu)
case PGM_REGION_FIRST_TRANS:
case PGM_REGION_SECOND_TRANS:
case PGM_REGION_THIRD_TRANS:
- KVM_BUG(current->thread.gmap_teid.as != PSW_BITS_AS_PRIMARY, vcpu->kvm,
- "Unexpected program interrupt 0x%x, TEID 0x%016lx",
- current->thread.gmap_int_code, current->thread.gmap_teid.val);
- if (vcpu->arch.gmap->pfault_enabled) {
- rc = gmap_fault(vcpu->arch.gmap, gaddr, flags | FAULT_FLAG_RETRY_NOWAIT);
- if (rc == -EFAULT)
- return vcpu_post_run_addressing_exception(vcpu);
- if (rc == -EAGAIN) {
- trace_kvm_s390_major_guest_pfault(vcpu);
- if (kvm_arch_setup_async_pf(vcpu))
- return 0;
- vcpu->stat.pfault_sync++;
- } else {
- return rc;
- }
- }
- rc = gmap_fault(vcpu->arch.gmap, gaddr, flags);
- if (rc == -EFAULT) {
- if (kvm_is_ucontrol(vcpu->kvm)) {
- vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
- vcpu->run->s390_ucontrol.trans_exc_code = gaddr;
- vcpu->run->s390_ucontrol.pgm_code = 0x10;
- return -EREMOTE;
- }
- return vcpu_post_run_addressing_exception(vcpu);
- }
- break;
+ kvm_s390_assert_primary_as(vcpu);
+ return vcpu_dat_fault_handler(vcpu, gaddr, flags);
default:
KVM_BUG(1, vcpu->kvm, "Unexpected program interrupt 0x%x, TEID 0x%016lx",
current->thread.gmap_int_code, current->thread.gmap_teid.val);
send_sig(SIGSEGV, current, 0);
break;
}
- return rc;
+ return 0;
}
static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
@@ -4901,6 +5063,30 @@ static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
return vcpu_post_run_handle_fault(vcpu);
}
+int noinstr kvm_s390_enter_exit_sie(struct kvm_s390_sie_block *scb,
+ u64 *gprs, unsigned long gasce)
+{
+ int ret;
+
+ guest_state_enter_irqoff();
+
+ /*
+ * The guest_state_{enter,exit}_irqoff() functions inform lockdep and
+ * tracing that entry to the guest will enable host IRQs, and exit from
+ * the guest will disable host IRQs.
+ *
+ * We must not use lockdep/tracing/RCU in this critical section, so we
+ * use the low-level arch_local_irq_*() helpers to enable/disable IRQs.
+ */
+ arch_local_irq_enable();
+ ret = sie64a(scb, gprs, gasce);
+ arch_local_irq_disable();
+
+ guest_state_exit_irqoff();
+
+ return ret;
+}
+
#define PSW_INT_MASK (PSW_MASK_EXT | PSW_MASK_IO | PSW_MASK_MCHECK)
static int __vcpu_run(struct kvm_vcpu *vcpu)
{
@@ -4921,20 +5107,27 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
kvm_vcpu_srcu_read_unlock(vcpu);
/*
* As PF_VCPU will be used in fault handler, between
- * guest_enter and guest_exit should be no uaccess.
+ * guest_timing_enter_irqoff and guest_timing_exit_irqoff
+ * should be no uaccess.
*/
- local_irq_disable();
- guest_enter_irqoff();
- __disable_cpu_timer_accounting(vcpu);
- local_irq_enable();
if (kvm_s390_pv_cpu_is_protected(vcpu)) {
memcpy(sie_page->pv_grregs,
vcpu->run->s.regs.gprs,
sizeof(sie_page->pv_grregs));
}
- exit_reason = sie64a(vcpu->arch.sie_block,
- vcpu->run->s.regs.gprs,
- vcpu->arch.gmap->asce);
+
+ local_irq_disable();
+ guest_timing_enter_irqoff();
+ __disable_cpu_timer_accounting(vcpu);
+
+ exit_reason = kvm_s390_enter_exit_sie(vcpu->arch.sie_block,
+ vcpu->run->s.regs.gprs,
+ vcpu->arch.gmap->asce);
+
+ __enable_cpu_timer_accounting(vcpu);
+ guest_timing_exit_irqoff();
+ local_irq_enable();
+
if (kvm_s390_pv_cpu_is_protected(vcpu)) {
memcpy(vcpu->run->s.regs.gprs,
sie_page->pv_grregs,
@@ -4950,10 +5143,6 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
vcpu->arch.sie_block->gpsw.mask &= ~PSW_INT_MASK;
}
}
- local_irq_disable();
- __enable_cpu_timer_accounting(vcpu);
- guest_exit_irqoff();
- local_irq_enable();
kvm_vcpu_srcu_read_lock(vcpu);
rc = vcpu_post_run(vcpu, exit_reason);
@@ -5019,7 +5208,7 @@ static void sync_regs_fmt2(struct kvm_vcpu *vcpu)
vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
vcpu->arch.sie_block->fpf |= kvm_run->s.regs.bpbc ? FPF_BPBC : 0;
}
- if (MACHINE_HAS_GS) {
+ if (cpu_has_gs()) {
preempt_disable();
local_ctl_set_bit(2, CR2_GUARDED_STORAGE_BIT);
if (current->thread.gs_cb) {
@@ -5085,7 +5274,7 @@ static void store_regs_fmt2(struct kvm_vcpu *vcpu)
kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
kvm_run->s.regs.bpbc = (vcpu->arch.sie_block->fpf & FPF_BPBC) == FPF_BPBC;
kvm_run->s.regs.diag318 = vcpu->arch.diag318_info.val;
- if (MACHINE_HAS_GS) {
+ if (cpu_has_gs()) {
preempt_disable();
local_ctl_set_bit(2, CR2_GUARDED_STORAGE_BIT);
if (vcpu->arch.gs_enabled)
@@ -5737,7 +5926,9 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
}
#endif
case KVM_S390_VCPU_FAULT: {
- r = gmap_fault(vcpu->arch.gmap, arg, 0);
+ idx = srcu_read_lock(&vcpu->kvm->srcu);
+ r = vcpu_dat_fault_handler(vcpu, arg, 0);
+ srcu_read_unlock(&vcpu->kvm->srcu, idx);
break;
}
case KVM_ENABLE_CAP:
@@ -5853,7 +6044,7 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
{
gpa_t size;
- if (kvm_is_ucontrol(kvm))
+ if (kvm_is_ucontrol(kvm) && new->id < KVM_USER_MEM_SLOTS)
return -EINVAL;
/* When we are protected, we should not change the memory slots */
@@ -5905,6 +6096,9 @@ void kvm_arch_commit_memory_region(struct kvm *kvm,
{
int rc = 0;
+ if (kvm_is_ucontrol(kvm))
+ return;
+
switch (change) {
case KVM_MR_DELETE:
rc = gmap_unmap_segment(kvm->arch.gmap, old->base_gfn * PAGE_SIZE,
diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h
index 597d7a71deeb..c44fe0c3a097 100644
--- a/arch/s390/kvm/kvm-s390.h
+++ b/arch/s390/kvm/kvm-s390.h
@@ -20,6 +20,8 @@
#include <asm/processor.h>
#include <asm/sclp.h>
+#define KVM_S390_UCONTROL_MEMSLOT (KVM_USER_MEM_SLOTS + 0)
+
static inline void kvm_s390_fpu_store(struct kvm_run *run)
{
fpu_stfpc(&run->s.regs.fpc);
@@ -279,6 +281,15 @@ static inline u32 kvm_s390_get_gisa_desc(struct kvm *kvm)
return gd;
}
+static inline hva_t gpa_to_hva(struct kvm *kvm, gpa_t gpa)
+{
+ hva_t hva = gfn_to_hva(kvm, gpa_to_gfn(gpa));
+
+ if (!kvm_is_error_hva(hva))
+ hva |= offset_in_page(gpa);
+ return hva;
+}
+
/* implemented in pv.c */
int kvm_s390_pv_destroy_cpu(struct kvm_vcpu *vcpu, u16 *rc, u16 *rrc);
int kvm_s390_pv_create_cpu(struct kvm_vcpu *vcpu, u16 *rc, u16 *rrc);
@@ -297,6 +308,9 @@ int kvm_s390_pv_dump_stor_state(struct kvm *kvm, void __user *buff_user,
u64 *gaddr, u64 buff_user_len, u16 *rc, u16 *rrc);
int kvm_s390_pv_dump_complete(struct kvm *kvm, void __user *buff_user,
u16 *rc, u16 *rrc);
+int kvm_s390_pv_destroy_page(struct kvm *kvm, unsigned long gaddr);
+int kvm_s390_pv_convert_to_secure(struct kvm *kvm, unsigned long gaddr);
+int kvm_s390_pv_make_secure(struct kvm *kvm, unsigned long gaddr, void *uvcb);
static inline u64 kvm_s390_pv_get_handle(struct kvm *kvm)
{
@@ -308,6 +322,41 @@ static inline u64 kvm_s390_pv_cpu_get_handle(struct kvm_vcpu *vcpu)
return vcpu->arch.pv.handle;
}
+/**
+ * __kvm_s390_pv_destroy_page() - Destroy a guest page.
+ * @page: the page to destroy
+ *
+ * An attempt will be made to destroy the given guest page. If the attempt
+ * fails, an attempt is made to export the page. If both attempts fail, an
+ * appropriate error is returned.
+ *
+ * Context: must be called holding the mm lock for gmap->mm
+ */
+static inline int __kvm_s390_pv_destroy_page(struct page *page)
+{
+ struct folio *folio = page_folio(page);
+ int rc;
+
+ /* Large folios cannot be secure. Small folio implies FW_LEVEL_PTE. */
+ if (folio_test_large(folio))
+ return -EFAULT;
+
+ rc = uv_destroy_folio(folio);
+ /*
+ * Fault handlers can race; it is possible that two CPUs will fault
+ * on the same secure page. One CPU can destroy the page, reboot,
+ * re-enter secure mode and import it, while the second CPU was
+ * stuck at the beginning of the handler. At some point the second
+ * CPU will be able to progress, and it will not be able to destroy
+ * the page. In that case we do not want to terminate the process,
+ * we instead try to export the page.
+ */
+ if (rc)
+ rc = uv_convert_from_secure_folio(folio);
+
+ return rc;
+}
+
/* implemented in interrupt.c */
int kvm_s390_handle_wait(struct kvm_vcpu *vcpu);
void kvm_s390_vcpu_wakeup(struct kvm_vcpu *vcpu);
@@ -387,6 +436,10 @@ void kvm_s390_vsie_gmap_notifier(struct gmap *gmap, unsigned long start,
unsigned long end);
void kvm_s390_vsie_init(struct kvm *kvm);
void kvm_s390_vsie_destroy(struct kvm *kvm);
+int gmap_shadow_valid(struct gmap *sg, unsigned long asce, int edat_level);
+
+/* implemented in gmap-vsie.c */
+struct gmap *gmap_shadow(struct gmap *parent, unsigned long asce, int edat_level);
/* implemented in sigp.c */
int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu);
@@ -408,6 +461,14 @@ void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu);
void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm);
__u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu);
int kvm_s390_cpus_from_pv(struct kvm *kvm, u16 *rc, u16 *rrc);
+int __kvm_s390_handle_dat_fault(struct kvm_vcpu *vcpu, gfn_t gfn, gpa_t gaddr, unsigned int flags);
+int __kvm_s390_mprotect_many(struct gmap *gmap, gpa_t gpa, u8 npages, unsigned int prot,
+ unsigned long bits);
+
+static inline int kvm_s390_handle_dat_fault(struct kvm_vcpu *vcpu, gpa_t gaddr, unsigned int flags)
+{
+ return __kvm_s390_handle_dat_fault(vcpu, gpa_to_gfn(gaddr), gaddr, flags);
+}
/* implemented in diag.c */
int kvm_s390_handle_diag(struct kvm_vcpu *vcpu);
diff --git a/arch/s390/kvm/pci.c b/arch/s390/kvm/pci.c
index 9b9e7fdd5380..8c40154ff50f 100644
--- a/arch/s390/kvm/pci.c
+++ b/arch/s390/kvm/pci.c
@@ -433,7 +433,6 @@ static void kvm_s390_pci_dev_release(struct zpci_dev *zdev)
static int kvm_s390_pci_register_kvm(void *opaque, struct kvm *kvm)
{
struct zpci_dev *zdev = opaque;
- u8 status;
int rc;
if (!zdev)
@@ -480,13 +479,7 @@ static int kvm_s390_pci_register_kvm(void *opaque, struct kvm *kvm)
*/
zdev->gisa = (u32)virt_to_phys(&kvm->arch.sie_page2->gisa);
- rc = zpci_enable_device(zdev);
- if (rc)
- goto clear_gisa;
-
- /* Re-register the IOMMU that was already created */
- rc = zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma,
- virt_to_phys(zdev->dma_table), &status);
+ rc = zpci_reenable_device(zdev);
if (rc)
goto clear_gisa;
@@ -516,7 +509,6 @@ static void kvm_s390_pci_unregister_kvm(void *opaque)
{
struct zpci_dev *zdev = opaque;
struct kvm *kvm;
- u8 status;
if (!zdev)
return;
@@ -550,12 +542,7 @@ static void kvm_s390_pci_unregister_kvm(void *opaque)
goto out;
}
- if (zpci_enable_device(zdev))
- goto out;
-
- /* Re-register the IOMMU that was already created */
- zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma,
- virt_to_phys(zdev->dma_table), &status);
+ zpci_reenable_device(zdev);
out:
spin_lock(&kvm->arch.kzdev_list_lock);
diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c
index 1a49b89706f8..9253c70897a8 100644
--- a/arch/s390/kvm/priv.c
+++ b/arch/s390/kvm/priv.c
@@ -1248,6 +1248,8 @@ static inline int __do_essa(struct kvm_vcpu *vcpu, const int orc)
static int handle_essa(struct kvm_vcpu *vcpu)
{
+ lockdep_assert_held(&vcpu->kvm->srcu);
+
/* entries expected to be 1FF */
int entries = (vcpu->arch.sie_block->cbrlo & ~PAGE_MASK) >> 3;
unsigned long *cbrlo;
@@ -1297,12 +1299,8 @@ static int handle_essa(struct kvm_vcpu *vcpu)
/* Retry the ESSA instruction */
kvm_s390_retry_instr(vcpu);
} else {
- int srcu_idx;
-
mmap_read_lock(vcpu->kvm->mm);
- srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
i = __do_essa(vcpu, orc);
- srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
mmap_read_unlock(vcpu->kvm->mm);
if (i < 0)
return i;
diff --git a/arch/s390/kvm/pv.c b/arch/s390/kvm/pv.c
index 75e81ba26d04..25ede8354514 100644
--- a/arch/s390/kvm/pv.c
+++ b/arch/s390/kvm/pv.c
@@ -5,6 +5,8 @@
* Copyright IBM Corp. 2019, 2020
* Author(s): Janosch Frank <frankja@linux.ibm.com>
*/
+
+#include <linux/export.h>
#include <linux/kvm.h>
#include <linux/kvm_host.h>
#include <linux/minmax.h>
@@ -33,6 +35,64 @@ bool kvm_s390_pv_cpu_is_protected(struct kvm_vcpu *vcpu)
EXPORT_SYMBOL_GPL(kvm_s390_pv_cpu_is_protected);
/**
+ * kvm_s390_pv_make_secure() - make one guest page secure
+ * @kvm: the guest
+ * @gaddr: the guest address that needs to be made secure
+ * @uvcb: the UVCB specifying which operation needs to be performed
+ *
+ * Context: needs to be called with kvm->srcu held.
+ * Return: 0 on success, < 0 in case of error.
+ */
+int kvm_s390_pv_make_secure(struct kvm *kvm, unsigned long gaddr, void *uvcb)
+{
+ unsigned long vmaddr;
+
+ lockdep_assert_held(&kvm->srcu);
+
+ vmaddr = gfn_to_hva(kvm, gpa_to_gfn(gaddr));
+ if (kvm_is_error_hva(vmaddr))
+ return -EFAULT;
+ return make_hva_secure(kvm->mm, vmaddr, uvcb);
+}
+
+int kvm_s390_pv_convert_to_secure(struct kvm *kvm, unsigned long gaddr)
+{
+ struct uv_cb_cts uvcb = {
+ .header.cmd = UVC_CMD_CONV_TO_SEC_STOR,
+ .header.len = sizeof(uvcb),
+ .guest_handle = kvm_s390_pv_get_handle(kvm),
+ .gaddr = gaddr,
+ };
+
+ return kvm_s390_pv_make_secure(kvm, gaddr, &uvcb);
+}
+
+/**
+ * kvm_s390_pv_destroy_page() - Destroy a guest page.
+ * @kvm: the guest
+ * @gaddr: the guest address to destroy
+ *
+ * An attempt will be made to destroy the given guest page. If the attempt
+ * fails, an attempt is made to export the page. If both attempts fail, an
+ * appropriate error is returned.
+ *
+ * Context: may sleep.
+ */
+int kvm_s390_pv_destroy_page(struct kvm *kvm, unsigned long gaddr)
+{
+ struct page *page;
+ int rc = 0;
+
+ mmap_read_lock(kvm->mm);
+ page = gfn_to_page(kvm, gpa_to_gfn(gaddr));
+ if (page)
+ rc = __kvm_s390_pv_destroy_page(page);
+ kvm_release_page_clean(page);
+ mmap_read_unlock(kvm->mm);
+ return rc;
+}
+
+/**
* struct pv_vm_to_be_destroyed - Represents a protected VM that needs to
* be destroyed
*
@@ -637,11 +697,29 @@ static int unpack_one(struct kvm *kvm, unsigned long addr, u64 tweak,
.tweak[0] = tweak,
.tweak[1] = offset,
};
- int ret = gmap_make_secure(kvm->arch.gmap, addr, &uvcb);
+ int ret = kvm_s390_pv_make_secure(kvm, addr, &uvcb);
+ unsigned long vmaddr;
+ bool unlocked;
*rc = uvcb.header.rc;
*rrc = uvcb.header.rrc;
+ if (ret == -ENXIO) {
+ mmap_read_lock(kvm->mm);
+ vmaddr = gfn_to_hva(kvm, gpa_to_gfn(addr));
+ if (kvm_is_error_hva(vmaddr)) {
+ ret = -EFAULT;
+ } else {
+ ret = fixup_user_fault(kvm->mm, vmaddr, FAULT_FLAG_WRITE, &unlocked);
+ if (!ret)
+ ret = __gmap_link(kvm->arch.gmap, addr, vmaddr);
+ }
+ mmap_read_unlock(kvm->mm);
+ if (!ret)
+ return -EAGAIN;
+ return ret;
+ }
+
if (ret && ret != -EAGAIN)
KVM_UV_EVENT(kvm, 3, "PROTVIRT VM UNPACK: failed addr %llx with rc %x rrc %x",
uvcb.gaddr, *rc, *rrc);
@@ -660,6 +738,8 @@ int kvm_s390_pv_unpack(struct kvm *kvm, unsigned long addr, unsigned long size,
KVM_UV_EVENT(kvm, 3, "PROTVIRT VM UNPACK: start addr %lx size %lx",
addr, size);
+ guard(srcu)(&kvm->srcu);
+
while (offset < size) {
ret = unpack_one(kvm, addr, tweak, offset, rc, rrc);
if (ret == -EAGAIN) {
diff --git a/arch/s390/kvm/trace-s390.h b/arch/s390/kvm/trace-s390.h
index 9ac92dbf680d..9e28f165c114 100644
--- a/arch/s390/kvm/trace-s390.h
+++ b/arch/s390/kvm/trace-s390.h
@@ -56,7 +56,7 @@ TRACE_EVENT(kvm_s390_create_vcpu,
__entry->sie_block = sie_block;
),
- TP_printk("create cpu %d at 0x%pK, sie block at 0x%pK",
+ TP_printk("create cpu %d at 0x%p, sie block at 0x%p",
__entry->id, __entry->vcpu, __entry->sie_block)
);
@@ -255,7 +255,7 @@ TRACE_EVENT(kvm_s390_enable_css,
__entry->kvm = kvm;
),
- TP_printk("enabling channel I/O support (kvm @ %pK)\n",
+ TP_printk("enabling channel I/O support (kvm @ %p)\n",
__entry->kvm)
);
diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c
index a687695d8f68..347268f89f2f 100644
--- a/arch/s390/kvm/vsie.c
+++ b/arch/s390/kvm/vsie.c
@@ -13,6 +13,7 @@
#include <linux/bitmap.h>
#include <linux/sched/signal.h>
#include <linux/io.h>
+#include <linux/mman.h>
#include <asm/gmap.h>
#include <asm/mmu_context.h>
@@ -23,6 +24,10 @@
#include "kvm-s390.h"
#include "gaccess.h"
+enum vsie_page_flags {
+ VSIE_PAGE_IN_USE = 0,
+};
+
struct vsie_page {
struct kvm_s390_sie_block scb_s; /* 0x0000 */
/*
@@ -46,11 +51,40 @@ struct vsie_page {
gpa_t gvrd_gpa; /* 0x0240 */
gpa_t riccbd_gpa; /* 0x0248 */
gpa_t sdnx_gpa; /* 0x0250 */
- __u8 reserved[0x0700 - 0x0258]; /* 0x0258 */
+ /*
+ * guest address of the original SCB. Remains set for free vsie
+ * pages, so we can properly look them up in our addr_to_page
+ * radix tree.
+ */
+ gpa_t scb_gpa; /* 0x0258 */
+ /*
+ * Flags: must be set/cleared atomically after the vsie page can be
+ * looked up by other CPUs.
+ */
+ unsigned long flags; /* 0x0260 */
+ __u8 reserved[0x0700 - 0x0268]; /* 0x0268 */
struct kvm_s390_crypto_cb crycb; /* 0x0700 */
__u8 fac[S390_ARCH_FAC_LIST_SIZE_BYTE]; /* 0x0800 */
};
+/**
+ * gmap_shadow_valid() - check if a shadow guest address space matches the
+ * given properties and is still valid
+ * @sg: pointer to the shadow guest address space structure
+ * @asce: ASCE for which the shadow table is requested
+ * @edat_level: edat level to be used for the shadow translation
+ *
+ * Returns 1 if the gmap shadow is still valid and matches the given
+ * properties, the caller can continue using it. Returns 0 otherwise; the
+ * caller has to request a new shadow gmap in this case.
+ */
+int gmap_shadow_valid(struct gmap *sg, unsigned long asce, int edat_level)
+{
+ if (sg->removed)
+ return 0;
+ return sg->orig_asce == asce && sg->edat_level == edat_level;
+}
+
/* trigger a validity icpt for the given scb */
static int set_validity_icpt(struct kvm_s390_sie_block *scb,
__u16 reason_code)
@@ -584,7 +618,6 @@ void kvm_s390_vsie_gmap_notifier(struct gmap *gmap, unsigned long start,
struct kvm *kvm = gmap->private;
struct vsie_page *cur;
unsigned long prefix;
- struct page *page;
int i;
if (!gmap_is_shadow(gmap))
@@ -594,10 +627,9 @@ void kvm_s390_vsie_gmap_notifier(struct gmap *gmap, unsigned long start,
* therefore we can safely reference them all the time.
*/
for (i = 0; i < kvm->arch.vsie.page_count; i++) {
- page = READ_ONCE(kvm->arch.vsie.pages[i]);
- if (!page)
+ cur = READ_ONCE(kvm->arch.vsie.pages[i]);
+ if (!cur)
continue;
- cur = page_to_virt(page);
if (READ_ONCE(cur->gmap) != gmap)
continue;
prefix = cur->scb_s.prefix << GUEST_PREFIX_SHIFT;
@@ -1138,10 +1170,6 @@ static int do_vsie_run(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
vcpu->arch.sie_block->fpf & FPF_BPBC)
set_thread_flag(TIF_ISOLATE_BP_GUEST);
- local_irq_disable();
- guest_enter_irqoff();
- local_irq_enable();
-
/*
* Simulate a SIE entry of the VCPU (see sie64a), so VCPU blocking
* and VCPU requests also hinder the vSIE from running and lead
@@ -1151,15 +1179,16 @@ static int do_vsie_run(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
vcpu->arch.sie_block->prog0c |= PROG_IN_SIE;
current->thread.gmap_int_code = 0;
barrier();
- if (!kvm_s390_vcpu_sie_inhibited(vcpu))
- rc = sie64a(scb_s, vcpu->run->s.regs.gprs, vsie_page->gmap->asce);
+ if (!kvm_s390_vcpu_sie_inhibited(vcpu)) {
+ local_irq_disable();
+ guest_timing_enter_irqoff();
+ rc = kvm_s390_enter_exit_sie(scb_s, vcpu->run->s.regs.gprs, vsie_page->gmap->asce);
+ guest_timing_exit_irqoff();
+ local_irq_enable();
+ }
barrier();
vcpu->arch.sie_block->prog0c &= ~PROG_IN_SIE;
- local_irq_disable();
- guest_exit_irqoff();
- local_irq_enable();
-
/* restore guest state for bp isolation override */
if (!guest_bp_isolation)
clear_thread_flag(TIF_ISOLATE_BP_GUEST);
@@ -1345,6 +1374,20 @@ static int vsie_run(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
return rc;
}
+/* Try getting a given vsie page, returning "true" on success. */
+static inline bool try_get_vsie_page(struct vsie_page *vsie_page)
+{
+ if (test_bit(VSIE_PAGE_IN_USE, &vsie_page->flags))
+ return false;
+ return !test_and_set_bit(VSIE_PAGE_IN_USE, &vsie_page->flags);
+}
+
+/* Put a vsie page acquired through get_vsie_page / try_get_vsie_page. */
+static void put_vsie_page(struct vsie_page *vsie_page)
+{
+ clear_bit(VSIE_PAGE_IN_USE, &vsie_page->flags);
+}
+
/*
* Get or create a vsie page for a scb address.
*
@@ -1355,16 +1398,21 @@ static int vsie_run(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
static struct vsie_page *get_vsie_page(struct kvm *kvm, unsigned long addr)
{
struct vsie_page *vsie_page;
- struct page *page;
int nr_vcpus;
rcu_read_lock();
- page = radix_tree_lookup(&kvm->arch.vsie.addr_to_page, addr >> 9);
+ vsie_page = radix_tree_lookup(&kvm->arch.vsie.addr_to_page, addr >> 9);
rcu_read_unlock();
- if (page) {
- if (page_ref_inc_return(page) == 2)
- return page_to_virt(page);
- page_ref_dec(page);
+ if (vsie_page) {
+ if (try_get_vsie_page(vsie_page)) {
+ if (vsie_page->scb_gpa == addr)
+ return vsie_page;
+ /*
+ * We raced with someone reusing + putting this vsie
+ * page before we grabbed it.
+ */
+ put_vsie_page(vsie_page);
+ }
}
/*
@@ -1375,36 +1423,40 @@ static struct vsie_page *get_vsie_page(struct kvm *kvm, unsigned long addr)
mutex_lock(&kvm->arch.vsie.mutex);
if (kvm->arch.vsie.page_count < nr_vcpus) {
- page = alloc_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO | GFP_DMA);
- if (!page) {
+ vsie_page = (void *)__get_free_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO | GFP_DMA);
+ if (!vsie_page) {
mutex_unlock(&kvm->arch.vsie.mutex);
return ERR_PTR(-ENOMEM);
}
- page_ref_inc(page);
- kvm->arch.vsie.pages[kvm->arch.vsie.page_count] = page;
+ __set_bit(VSIE_PAGE_IN_USE, &vsie_page->flags);
+ kvm->arch.vsie.pages[kvm->arch.vsie.page_count] = vsie_page;
kvm->arch.vsie.page_count++;
} else {
/* reuse an existing entry that belongs to nobody */
while (true) {
- page = kvm->arch.vsie.pages[kvm->arch.vsie.next];
- if (page_ref_inc_return(page) == 2)
+ vsie_page = kvm->arch.vsie.pages[kvm->arch.vsie.next];
+ if (try_get_vsie_page(vsie_page))
break;
- page_ref_dec(page);
kvm->arch.vsie.next++;
kvm->arch.vsie.next %= nr_vcpus;
}
- radix_tree_delete(&kvm->arch.vsie.addr_to_page, page->index >> 9);
+ if (vsie_page->scb_gpa != ULONG_MAX)
+ radix_tree_delete(&kvm->arch.vsie.addr_to_page,
+ vsie_page->scb_gpa >> 9);
}
- page->index = addr;
- /* double use of the same address */
- if (radix_tree_insert(&kvm->arch.vsie.addr_to_page, addr >> 9, page)) {
- page_ref_dec(page);
+ /* Mark it as invalid until it resides in the tree. */
+ vsie_page->scb_gpa = ULONG_MAX;
+
+ /* Double use of the same address or allocation failure. */
+ if (radix_tree_insert(&kvm->arch.vsie.addr_to_page, addr >> 9,
+ vsie_page)) {
+ put_vsie_page(vsie_page);
mutex_unlock(&kvm->arch.vsie.mutex);
return NULL;
}
+ vsie_page->scb_gpa = addr;
mutex_unlock(&kvm->arch.vsie.mutex);
- vsie_page = page_to_virt(page);
memset(&vsie_page->scb_s, 0, sizeof(struct kvm_s390_sie_block));
release_gmap_shadow(vsie_page);
vsie_page->fault_addr = 0;
@@ -1412,14 +1464,6 @@ static struct vsie_page *get_vsie_page(struct kvm *kvm, unsigned long addr)
return vsie_page;
}
-/* put a vsie page acquired via get_vsie_page */
-static void put_vsie_page(struct kvm *kvm, struct vsie_page *vsie_page)
-{
- struct page *page = pfn_to_page(__pa(vsie_page) >> PAGE_SHIFT);
-
- page_ref_dec(page);
-}
-
int kvm_s390_handle_vsie(struct kvm_vcpu *vcpu)
{
struct vsie_page *vsie_page;
@@ -1470,7 +1514,7 @@ out_unshadow:
out_unpin_scb:
unpin_scb(vcpu, vsie_page, scb_addr);
out_put:
- put_vsie_page(vcpu->kvm, vsie_page);
+ put_vsie_page(vsie_page);
return rc < 0 ? rc : 0;
}
@@ -1486,18 +1530,18 @@ void kvm_s390_vsie_init(struct kvm *kvm)
void kvm_s390_vsie_destroy(struct kvm *kvm)
{
struct vsie_page *vsie_page;
- struct page *page;
int i;
mutex_lock(&kvm->arch.vsie.mutex);
for (i = 0; i < kvm->arch.vsie.page_count; i++) {
- page = kvm->arch.vsie.pages[i];
+ vsie_page = kvm->arch.vsie.pages[i];
kvm->arch.vsie.pages[i] = NULL;
- vsie_page = page_to_virt(page);
release_gmap_shadow(vsie_page);
/* free the radix tree entry */
- radix_tree_delete(&kvm->arch.vsie.addr_to_page, page->index >> 9);
- __free_page(page);
+ if (vsie_page->scb_gpa != ULONG_MAX)
+ radix_tree_delete(&kvm->arch.vsie.addr_to_page,
+ vsie_page->scb_gpa >> 9);
+ free_page((unsigned long)vsie_page);
}
kvm->arch.vsie.page_count = 0;
mutex_unlock(&kvm->arch.vsie.mutex);
diff --git a/arch/s390/lib/Makefile b/arch/s390/lib/Makefile
index 14bbfe50033c..f43f897d3fc0 100644
--- a/arch/s390/lib/Makefile
+++ b/arch/s390/lib/Makefile
@@ -24,6 +24,3 @@ obj-$(CONFIG_S390_MODULES_SANITY_TEST_HELPERS) += test_modules_helpers.o
lib-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o
obj-$(CONFIG_EXPOLINE_EXTERN) += expoline.o
-
-obj-$(CONFIG_CRC32_ARCH) += crc32-s390.o
-crc32-s390-y := crc32-glue.o crc32le-vx.o crc32be-vx.o
diff --git a/arch/s390/lib/crc32-glue.c b/arch/s390/lib/crc32-glue.c
deleted file mode 100644
index 137080e61f90..000000000000
--- a/arch/s390/lib/crc32-glue.c
+++ /dev/null
@@ -1,92 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * CRC-32 implemented with the z/Architecture Vector Extension Facility.
- *
- * Copyright IBM Corp. 2015
- * Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
- */
-#define KMSG_COMPONENT "crc32-vx"
-#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
-
-#include <linux/module.h>
-#include <linux/cpufeature.h>
-#include <linux/crc32.h>
-#include <asm/fpu.h>
-#include "crc32-vx.h"
-
-#define VX_MIN_LEN 64
-#define VX_ALIGNMENT 16L
-#define VX_ALIGN_MASK (VX_ALIGNMENT - 1)
-
-static DEFINE_STATIC_KEY_FALSE(have_vxrs);
-
-/*
- * DEFINE_CRC32_VX() - Define a CRC-32 function using the vector extension
- *
- * Creates a function to perform a particular CRC-32 computation. Depending
- * on the message buffer, the hardware-accelerated or software implementation
- * is used. Note that the message buffer is aligned to improve fetch
- * operations of VECTOR LOAD MULTIPLE instructions.
- */
-#define DEFINE_CRC32_VX(___fname, ___crc32_vx, ___crc32_sw) \
- u32 ___fname(u32 crc, const u8 *data, size_t datalen) \
- { \
- unsigned long prealign, aligned, remaining; \
- DECLARE_KERNEL_FPU_ONSTACK16(vxstate); \
- \
- if (datalen < VX_MIN_LEN + VX_ALIGN_MASK || \
- !static_branch_likely(&have_vxrs)) \
- return ___crc32_sw(crc, data, datalen); \
- \
- if ((unsigned long)data & VX_ALIGN_MASK) { \
- prealign = VX_ALIGNMENT - \
- ((unsigned long)data & VX_ALIGN_MASK); \
- datalen -= prealign; \
- crc = ___crc32_sw(crc, data, prealign); \
- data = (void *)((unsigned long)data + prealign); \
- } \
- \
- aligned = datalen & ~VX_ALIGN_MASK; \
- remaining = datalen & VX_ALIGN_MASK; \
- \
- kernel_fpu_begin(&vxstate, KERNEL_VXR_LOW); \
- crc = ___crc32_vx(crc, data, aligned); \
- kernel_fpu_end(&vxstate, KERNEL_VXR_LOW); \
- \
- if (remaining) \
- crc = ___crc32_sw(crc, data + aligned, remaining); \
- \
- return crc; \
- } \
- EXPORT_SYMBOL(___fname);
-
-DEFINE_CRC32_VX(crc32_le_arch, crc32_le_vgfm_16, crc32_le_base)
-DEFINE_CRC32_VX(crc32_be_arch, crc32_be_vgfm_16, crc32_be_base)
-DEFINE_CRC32_VX(crc32c_le_arch, crc32c_le_vgfm_16, crc32c_le_base)
-
-static int __init crc32_s390_init(void)
-{
- if (cpu_have_feature(S390_CPU_FEATURE_VXRS))
- static_branch_enable(&have_vxrs);
- return 0;
-}
-arch_initcall(crc32_s390_init);
-
-static void __exit crc32_s390_exit(void)
-{
-}
-module_exit(crc32_s390_exit);
-
-u32 crc32_optimizations(void)
-{
- if (static_key_enabled(&have_vxrs))
- return CRC32_LE_OPTIMIZATION |
- CRC32_BE_OPTIMIZATION |
- CRC32C_OPTIMIZATION;
- return 0;
-}
-EXPORT_SYMBOL(crc32_optimizations);
-
-MODULE_AUTHOR("Hendrik Brueckner <brueckner@linux.vnet.ibm.com>");
-MODULE_DESCRIPTION("CRC-32 algorithms using z/Architecture Vector Extension Facility");
-MODULE_LICENSE("GPL");
diff --git a/arch/s390/lib/crc32-vx.h b/arch/s390/lib/crc32-vx.h
deleted file mode 100644
index 652c96e1a822..000000000000
--- a/arch/s390/lib/crc32-vx.h
+++ /dev/null
@@ -1,12 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-
-#ifndef _CRC32_VX_S390_H
-#define _CRC32_VX_S390_H
-
-#include <linux/types.h>
-
-u32 crc32_be_vgfm_16(u32 crc, unsigned char const *buf, size_t size);
-u32 crc32_le_vgfm_16(u32 crc, unsigned char const *buf, size_t size);
-u32 crc32c_le_vgfm_16(u32 crc, unsigned char const *buf, size_t size);
-
-#endif /* _CRC32_VX_S390_H */
diff --git a/arch/s390/lib/crc32be-vx.c b/arch/s390/lib/crc32be-vx.c
deleted file mode 100644
index fed7c9c70d05..000000000000
--- a/arch/s390/lib/crc32be-vx.c
+++ /dev/null
@@ -1,174 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Hardware-accelerated CRC-32 variants for Linux on z Systems
- *
- * Use the z/Architecture Vector Extension Facility to accelerate the
- * computing of CRC-32 checksums.
- *
- * This CRC-32 implementation algorithm processes the most-significant
- * bit first (BE).
- *
- * Copyright IBM Corp. 2015
- * Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
- */
-
-#include <linux/types.h>
-#include <asm/fpu.h>
-#include "crc32-vx.h"
-
-/* Vector register range containing CRC-32 constants */
-#define CONST_R1R2 9
-#define CONST_R3R4 10
-#define CONST_R5 11
-#define CONST_R6 12
-#define CONST_RU_POLY 13
-#define CONST_CRC_POLY 14
-
-/*
- * The CRC-32 constant block contains reduction constants to fold and
- * process particular chunks of the input data stream in parallel.
- *
- * For the CRC-32 variants, the constants are precomputed according to
- * these definitions:
- *
- * R1 = x4*128+64 mod P(x)
- * R2 = x4*128 mod P(x)
- * R3 = x128+64 mod P(x)
- * R4 = x128 mod P(x)
- * R5 = x96 mod P(x)
- * R6 = x64 mod P(x)
- *
- * Barret reduction constant, u, is defined as floor(x**64 / P(x)).
- *
- * where P(x) is the polynomial in the normal domain and the P'(x) is the
- * polynomial in the reversed (bitreflected) domain.
- *
- * Note that the constant definitions below are extended in order to compute
- * intermediate results with a single VECTOR GALOIS FIELD MULTIPLY instruction.
- * The rightmost doubleword can be 0 to prevent contribution to the result or
- * can be multiplied by 1 to perform an XOR without the need for a separate
- * VECTOR EXCLUSIVE OR instruction.
- *
- * CRC-32 (IEEE 802.3 Ethernet, ...) polynomials:
- *
- * P(x) = 0x04C11DB7
- * P'(x) = 0xEDB88320
- */
-
-static unsigned long constants_CRC_32_BE[] = {
- 0x08833794c, 0x0e6228b11, /* R1, R2 */
- 0x0c5b9cd4c, 0x0e8a45605, /* R3, R4 */
- 0x0f200aa66, 1UL << 32, /* R5, x32 */
- 0x0490d678d, 1, /* R6, 1 */
- 0x104d101df, 0, /* u */
- 0x104C11DB7, 0, /* P(x) */
-};
-
-/**
- * crc32_be_vgfm_16 - Compute CRC-32 (BE variant) with vector registers
- * @crc: Initial CRC value, typically ~0.
- * @buf: Input buffer pointer, performance might be improved if the
- * buffer is on a doubleword boundary.
- * @size: Size of the buffer, must be 64 bytes or greater.
- *
- * Register usage:
- * V0: Initial CRC value and intermediate constants and results.
- * V1..V4: Data for CRC computation.
- * V5..V8: Next data chunks that are fetched from the input buffer.
- * V9..V14: CRC-32 constants.
- */
-u32 crc32_be_vgfm_16(u32 crc, unsigned char const *buf, size_t size)
-{
- /* Load CRC-32 constants */
- fpu_vlm(CONST_R1R2, CONST_CRC_POLY, &constants_CRC_32_BE);
- fpu_vzero(0);
-
- /* Load the initial CRC value into the leftmost word of V0. */
- fpu_vlvgf(0, crc, 0);
-
- /* Load a 64-byte data chunk and XOR with CRC */
- fpu_vlm(1, 4, buf);
- fpu_vx(1, 0, 1);
- buf += 64;
- size -= 64;
-
- while (size >= 64) {
- /* Load the next 64-byte data chunk into V5 to V8 */
- fpu_vlm(5, 8, buf);
-
- /*
- * Perform a GF(2) multiplication of the doublewords in V1 with
- * the reduction constants in V0. The intermediate result is
- * then folded (accumulated) with the next data chunk in V5 and
- * stored in V1. Repeat this step for the register contents
- * in V2, V3, and V4 respectively.
- */
- fpu_vgfmag(1, CONST_R1R2, 1, 5);
- fpu_vgfmag(2, CONST_R1R2, 2, 6);
- fpu_vgfmag(3, CONST_R1R2, 3, 7);
- fpu_vgfmag(4, CONST_R1R2, 4, 8);
- buf += 64;
- size -= 64;
- }
-
- /* Fold V1 to V4 into a single 128-bit value in V1 */
- fpu_vgfmag(1, CONST_R3R4, 1, 2);
- fpu_vgfmag(1, CONST_R3R4, 1, 3);
- fpu_vgfmag(1, CONST_R3R4, 1, 4);
-
- while (size >= 16) {
- fpu_vl(2, buf);
- fpu_vgfmag(1, CONST_R3R4, 1, 2);
- buf += 16;
- size -= 16;
- }
-
- /*
- * The R5 constant is used to fold a 128-bit value into an 96-bit value
- * that is XORed with the next 96-bit input data chunk. To use a single
- * VGFMG instruction, multiply the rightmost 64-bit with x^32 (1<<32) to
- * form an intermediate 96-bit value (with appended zeros) which is then
- * XORed with the intermediate reduction result.
- */
- fpu_vgfmg(1, CONST_R5, 1);
-
- /*
- * Further reduce the remaining 96-bit value to a 64-bit value using a
- * single VGFMG, the rightmost doubleword is multiplied with 0x1. The
- * intermediate result is then XORed with the product of the leftmost
- * doubleword with R6. The result is a 64-bit value and is subject to
- * the Barret reduction.
- */
- fpu_vgfmg(1, CONST_R6, 1);
-
- /*
- * The input values to the Barret reduction are the degree-63 polynomial
- * in V1 (R(x)), degree-32 generator polynomial, and the reduction
- * constant u. The Barret reduction result is the CRC value of R(x) mod
- * P(x).
- *
- * The Barret reduction algorithm is defined as:
- *
- * 1. T1(x) = floor( R(x) / x^32 ) GF2MUL u
- * 2. T2(x) = floor( T1(x) / x^32 ) GF2MUL P(x)
- * 3. C(x) = R(x) XOR T2(x) mod x^32
- *
- * Note: To compensate the division by x^32, use the vector unpack
- * instruction to move the leftmost word into the leftmost doubleword
- * of the vector register. The rightmost doubleword is multiplied
- * with zero to not contribute to the intermediate results.
- */
-
- /* T1(x) = floor( R(x) / x^32 ) GF2MUL u */
- fpu_vupllf(2, 1);
- fpu_vgfmg(2, CONST_RU_POLY, 2);
-
- /*
- * Compute the GF(2) product of the CRC polynomial in VO with T1(x) in
- * V2 and XOR the intermediate result, T2(x), with the value in V1.
- * The final result is in the rightmost word of V2.
- */
- fpu_vupllf(2, 2);
- fpu_vgfmag(2, CONST_CRC_POLY, 2, 1);
- return fpu_vlgvf(2, 3);
-}
diff --git a/arch/s390/lib/crc32le-vx.c b/arch/s390/lib/crc32le-vx.c
deleted file mode 100644
index 2f629f394df7..000000000000
--- a/arch/s390/lib/crc32le-vx.c
+++ /dev/null
@@ -1,240 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Hardware-accelerated CRC-32 variants for Linux on z Systems
- *
- * Use the z/Architecture Vector Extension Facility to accelerate the
- * computing of bitreflected CRC-32 checksums for IEEE 802.3 Ethernet
- * and Castagnoli.
- *
- * This CRC-32 implementation algorithm is bitreflected and processes
- * the least-significant bit first (Little-Endian).
- *
- * Copyright IBM Corp. 2015
- * Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
- */
-
-#include <linux/types.h>
-#include <asm/fpu.h>
-#include "crc32-vx.h"
-
-/* Vector register range containing CRC-32 constants */
-#define CONST_PERM_LE2BE 9
-#define CONST_R2R1 10
-#define CONST_R4R3 11
-#define CONST_R5 12
-#define CONST_RU_POLY 13
-#define CONST_CRC_POLY 14
-
-/*
- * The CRC-32 constant block contains reduction constants to fold and
- * process particular chunks of the input data stream in parallel.
- *
- * For the CRC-32 variants, the constants are precomputed according to
- * these definitions:
- *
- * R1 = [(x4*128+32 mod P'(x) << 32)]' << 1
- * R2 = [(x4*128-32 mod P'(x) << 32)]' << 1
- * R3 = [(x128+32 mod P'(x) << 32)]' << 1
- * R4 = [(x128-32 mod P'(x) << 32)]' << 1
- * R5 = [(x64 mod P'(x) << 32)]' << 1
- * R6 = [(x32 mod P'(x) << 32)]' << 1
- *
- * The bitreflected Barret reduction constant, u', is defined as
- * the bit reversal of floor(x**64 / P(x)).
- *
- * where P(x) is the polynomial in the normal domain and the P'(x) is the
- * polynomial in the reversed (bitreflected) domain.
- *
- * CRC-32 (IEEE 802.3 Ethernet, ...) polynomials:
- *
- * P(x) = 0x04C11DB7
- * P'(x) = 0xEDB88320
- *
- * CRC-32C (Castagnoli) polynomials:
- *
- * P(x) = 0x1EDC6F41
- * P'(x) = 0x82F63B78
- */
-
-static unsigned long constants_CRC_32_LE[] = {
- 0x0f0e0d0c0b0a0908, 0x0706050403020100, /* BE->LE mask */
- 0x1c6e41596, 0x154442bd4, /* R2, R1 */
- 0x0ccaa009e, 0x1751997d0, /* R4, R3 */
- 0x0, 0x163cd6124, /* R5 */
- 0x0, 0x1f7011641, /* u' */
- 0x0, 0x1db710641 /* P'(x) << 1 */
-};
-
-static unsigned long constants_CRC_32C_LE[] = {
- 0x0f0e0d0c0b0a0908, 0x0706050403020100, /* BE->LE mask */
- 0x09e4addf8, 0x740eef02, /* R2, R1 */
- 0x14cd00bd6, 0xf20c0dfe, /* R4, R3 */
- 0x0, 0x0dd45aab8, /* R5 */
- 0x0, 0x0dea713f1, /* u' */
- 0x0, 0x105ec76f0 /* P'(x) << 1 */
-};
-
-/**
- * crc32_le_vgfm_generic - Compute CRC-32 (LE variant) with vector registers
- * @crc: Initial CRC value, typically ~0.
- * @buf: Input buffer pointer, performance might be improved if the
- * buffer is on a doubleword boundary.
- * @size: Size of the buffer, must be 64 bytes or greater.
- * @constants: CRC-32 constant pool base pointer.
- *
- * Register usage:
- * V0: Initial CRC value and intermediate constants and results.
- * V1..V4: Data for CRC computation.
- * V5..V8: Next data chunks that are fetched from the input buffer.
- * V9: Constant for BE->LE conversion and shift operations
- * V10..V14: CRC-32 constants.
- */
-static u32 crc32_le_vgfm_generic(u32 crc, unsigned char const *buf, size_t size, unsigned long *constants)
-{
- /* Load CRC-32 constants */
- fpu_vlm(CONST_PERM_LE2BE, CONST_CRC_POLY, constants);
-
- /*
- * Load the initial CRC value.
- *
- * The CRC value is loaded into the rightmost word of the
- * vector register and is later XORed with the LSB portion
- * of the loaded input data.
- */
- fpu_vzero(0); /* Clear V0 */
- fpu_vlvgf(0, crc, 3); /* Load CRC into rightmost word */
-
- /* Load a 64-byte data chunk and XOR with CRC */
- fpu_vlm(1, 4, buf);
- fpu_vperm(1, 1, 1, CONST_PERM_LE2BE);
- fpu_vperm(2, 2, 2, CONST_PERM_LE2BE);
- fpu_vperm(3, 3, 3, CONST_PERM_LE2BE);
- fpu_vperm(4, 4, 4, CONST_PERM_LE2BE);
-
- fpu_vx(1, 0, 1); /* V1 ^= CRC */
- buf += 64;
- size -= 64;
-
- while (size >= 64) {
- fpu_vlm(5, 8, buf);
- fpu_vperm(5, 5, 5, CONST_PERM_LE2BE);
- fpu_vperm(6, 6, 6, CONST_PERM_LE2BE);
- fpu_vperm(7, 7, 7, CONST_PERM_LE2BE);
- fpu_vperm(8, 8, 8, CONST_PERM_LE2BE);
- /*
- * Perform a GF(2) multiplication of the doublewords in V1 with
- * the R1 and R2 reduction constants in V0. The intermediate
- * result is then folded (accumulated) with the next data chunk
- * in V5 and stored in V1. Repeat this step for the register
- * contents in V2, V3, and V4 respectively.
- */
- fpu_vgfmag(1, CONST_R2R1, 1, 5);
- fpu_vgfmag(2, CONST_R2R1, 2, 6);
- fpu_vgfmag(3, CONST_R2R1, 3, 7);
- fpu_vgfmag(4, CONST_R2R1, 4, 8);
- buf += 64;
- size -= 64;
- }
-
- /*
- * Fold V1 to V4 into a single 128-bit value in V1. Multiply V1 with R3
- * and R4 and accumulating the next 128-bit chunk until a single 128-bit
- * value remains.
- */
- fpu_vgfmag(1, CONST_R4R3, 1, 2);
- fpu_vgfmag(1, CONST_R4R3, 1, 3);
- fpu_vgfmag(1, CONST_R4R3, 1, 4);
-
- while (size >= 16) {
- fpu_vl(2, buf);
- fpu_vperm(2, 2, 2, CONST_PERM_LE2BE);
- fpu_vgfmag(1, CONST_R4R3, 1, 2);
- buf += 16;
- size -= 16;
- }
-
- /*
- * Set up a vector register for byte shifts. The shift value must
- * be loaded in bits 1-4 in byte element 7 of a vector register.
- * Shift by 8 bytes: 0x40
- * Shift by 4 bytes: 0x20
- */
- fpu_vleib(9, 0x40, 7);
-
- /*
- * Prepare V0 for the next GF(2) multiplication: shift V0 by 8 bytes
- * to move R4 into the rightmost doubleword and set the leftmost
- * doubleword to 0x1.
- */
- fpu_vsrlb(0, CONST_R4R3, 9);
- fpu_vleig(0, 1, 0);
-
- /*
- * Compute GF(2) product of V1 and V0. The rightmost doubleword
- * of V1 is multiplied with R4. The leftmost doubleword of V1 is
- * multiplied by 0x1 and is then XORed with rightmost product.
- * Implicitly, the intermediate leftmost product becomes padded
- */
- fpu_vgfmg(1, 0, 1);
-
- /*
- * Now do the final 32-bit fold by multiplying the rightmost word
- * in V1 with R5 and XOR the result with the remaining bits in V1.
- *
- * To achieve this by a single VGFMAG, right shift V1 by a word
- * and store the result in V2 which is then accumulated. Use the
- * vector unpack instruction to load the rightmost half of the
- * doubleword into the rightmost doubleword element of V1; the other
- * half is loaded in the leftmost doubleword.
- * The vector register with CONST_R5 contains the R5 constant in the
- * rightmost doubleword and the leftmost doubleword is zero to ignore
- * the leftmost product of V1.
- */
- fpu_vleib(9, 0x20, 7); /* Shift by words */
- fpu_vsrlb(2, 1, 9); /* Store remaining bits in V2 */
- fpu_vupllf(1, 1); /* Split rightmost doubleword */
- fpu_vgfmag(1, CONST_R5, 1, 2); /* V1 = (V1 * R5) XOR V2 */
-
- /*
- * Apply a Barret reduction to compute the final 32-bit CRC value.
- *
- * The input values to the Barret reduction are the degree-63 polynomial
- * in V1 (R(x)), degree-32 generator polynomial, and the reduction
- * constant u. The Barret reduction result is the CRC value of R(x) mod
- * P(x).
- *
- * The Barret reduction algorithm is defined as:
- *
- * 1. T1(x) = floor( R(x) / x^32 ) GF2MUL u
- * 2. T2(x) = floor( T1(x) / x^32 ) GF2MUL P(x)
- * 3. C(x) = R(x) XOR T2(x) mod x^32
- *
- * Note: The leftmost doubleword of vector register containing
- * CONST_RU_POLY is zero and, thus, the intermediate GF(2) product
- * is zero and does not contribute to the final result.
- */
-
- /* T1(x) = floor( R(x) / x^32 ) GF2MUL u */
- fpu_vupllf(2, 1);
- fpu_vgfmg(2, CONST_RU_POLY, 2);
-
- /*
- * Compute the GF(2) product of the CRC polynomial with T1(x) in
- * V2 and XOR the intermediate result, T2(x), with the value in V1.
- * The final result is stored in word element 2 of V2.
- */
- fpu_vupllf(2, 2);
- fpu_vgfmag(2, CONST_CRC_POLY, 2, 1);
-
- return fpu_vlgvf(2, 2);
-}
-
-u32 crc32_le_vgfm_16(u32 crc, unsigned char const *buf, size_t size)
-{
- return crc32_le_vgfm_generic(crc, buf, size, &constants_CRC_32_LE[0]);
-}
-
-u32 crc32c_le_vgfm_16(u32 crc, unsigned char const *buf, size_t size)
-{
- return crc32_le_vgfm_generic(crc, buf, size, &constants_CRC_32C_LE[0]);
-}
diff --git a/arch/s390/lib/delay.c b/arch/s390/lib/delay.c
index be14c58cb989..c1ea14e3c927 100644
--- a/arch/s390/lib/delay.c
+++ b/arch/s390/lib/delay.c
@@ -7,6 +7,7 @@
*/
#include <linux/processor.h>
+#include <linux/export.h>
#include <linux/delay.h>
#include <asm/div64.h>
#include <asm/timex.h>
diff --git a/arch/s390/lib/spinlock.c b/arch/s390/lib/spinlock.c
index a81a01c44927..ad9da4038511 100644
--- a/arch/s390/lib/spinlock.c
+++ b/arch/s390/lib/spinlock.c
@@ -10,11 +10,13 @@
#include <linux/export.h>
#include <linux/spinlock.h>
#include <linux/jiffies.h>
+#include <linux/sysctl.h>
#include <linux/init.h>
#include <linux/smp.h>
#include <linux/percpu.h>
#include <linux/io.h>
#include <asm/alternative.h>
+#include <asm/machine.h>
#include <asm/asm.h>
int spin_retry = -1;
@@ -37,6 +39,23 @@ static int __init spin_retry_setup(char *str)
}
__setup("spin_retry=", spin_retry_setup);
+static const struct ctl_table s390_spin_sysctl_table[] = {
+ {
+ .procname = "spin_retry",
+ .data = &spin_retry,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+};
+
+static int __init init_s390_spin_sysctls(void)
+{
+ register_sysctl_init("kernel", s390_spin_sysctl_table);
+ return 0;
+}
+arch_initcall(init_s390_spin_sysctls);
+
struct spin_wait {
struct spin_wait *next, *prev;
int node_id;
@@ -141,7 +160,7 @@ static inline void arch_spin_lock_queued(arch_spinlock_t *lp)
ix = get_lowcore()->spinlock_index++;
barrier();
- lockval = SPINLOCK_LOCKVAL; /* cpu + 1 */
+ lockval = spinlock_lockval(); /* cpu + 1 */
node = this_cpu_ptr(&spin_wait[ix]);
node->prev = node->next = NULL;
node_id = node->node_id;
@@ -212,7 +231,7 @@ static inline void arch_spin_lock_queued(arch_spinlock_t *lp)
if (count-- >= 0)
continue;
count = spin_retry;
- if (!MACHINE_IS_LPAR || arch_vcpu_is_preempted(owner - 1))
+ if (!machine_is_lpar() || arch_vcpu_is_preempted(owner - 1))
smp_yield_cpu(owner - 1);
}
@@ -232,7 +251,7 @@ static inline void arch_spin_lock_classic(arch_spinlock_t *lp)
{
int lockval, old, new, owner, count;
- lockval = SPINLOCK_LOCKVAL; /* cpu + 1 */
+ lockval = spinlock_lockval(); /* cpu + 1 */
/* Pass the virtual CPU to the lock holder if it is not running */
owner = arch_spin_yield_target(READ_ONCE(lp->lock), NULL);
@@ -255,7 +274,7 @@ static inline void arch_spin_lock_classic(arch_spinlock_t *lp)
if (count-- >= 0)
continue;
count = spin_retry;
- if (!MACHINE_IS_LPAR || arch_vcpu_is_preempted(owner - 1))
+ if (!machine_is_lpar() || arch_vcpu_is_preempted(owner - 1))
smp_yield_cpu(owner - 1);
}
}
@@ -271,7 +290,7 @@ EXPORT_SYMBOL(arch_spin_lock_wait);
int arch_spin_trylock_retry(arch_spinlock_t *lp)
{
- int cpu = SPINLOCK_LOCKVAL;
+ int cpu = spinlock_lockval();
int owner, count;
for (count = spin_retry; count > 0; count--) {
@@ -337,7 +356,7 @@ void arch_spin_relax(arch_spinlock_t *lp)
cpu = READ_ONCE(lp->lock) & _Q_LOCK_CPU_MASK;
if (!cpu)
return;
- if (MACHINE_IS_LPAR && !arch_vcpu_is_preempted(cpu - 1))
+ if (machine_is_lpar() && !arch_vcpu_is_preempted(cpu - 1))
return;
smp_yield_cpu(cpu - 1);
}
diff --git a/arch/s390/lib/string.c b/arch/s390/lib/string.c
index 373fa1f01937..099de76e8b1a 100644
--- a/arch/s390/lib/string.c
+++ b/arch/s390/lib/string.c
@@ -78,50 +78,6 @@ EXPORT_SYMBOL(strnlen);
#endif
/**
- * strcpy - Copy a %NUL terminated string
- * @dest: Where to copy the string to
- * @src: Where to copy the string from
- *
- * returns a pointer to @dest
- */
-#ifdef __HAVE_ARCH_STRCPY
-char *strcpy(char *dest, const char *src)
-{
- char *ret = dest;
-
- asm volatile(
- " lghi 0,0\n"
- "0: mvst %[dest],%[src]\n"
- " jo 0b\n"
- : [dest] "+&a" (dest), [src] "+&a" (src)
- :
- : "cc", "memory", "0");
- return ret;
-}
-EXPORT_SYMBOL(strcpy);
-#endif
-
-/**
- * strncpy - Copy a length-limited, %NUL-terminated string
- * @dest: Where to copy the string to
- * @src: Where to copy the string from
- * @n: The maximum number of bytes to copy
- *
- * The result is not %NUL-terminated if the source exceeds
- * @n bytes.
- */
-#ifdef __HAVE_ARCH_STRNCPY
-char *strncpy(char *dest, const char *src, size_t n)
-{
- size_t len = __strnend(src, n) - src;
- memset(dest + len, 0, n - len);
- memcpy(dest, src, len);
- return dest;
-}
-EXPORT_SYMBOL(strncpy);
-#endif
-
-/**
* strcat - Append one %NUL-terminated string to another
* @dest: The string to be appended to
* @src: The string to append to it
@@ -181,9 +137,6 @@ EXPORT_SYMBOL(strlcat);
* @n: The maximum numbers of bytes to copy
*
* returns a pointer to @dest
- *
- * Note that in contrast to strncpy, strncat ensures the result is
- * terminated.
*/
#ifdef __HAVE_ARCH_STRNCAT
char *strncat(char *dest, const char *src, size_t n)
diff --git a/arch/s390/lib/uaccess.c b/arch/s390/lib/uaccess.c
index c7c269d5c491..1a6ba105e071 100644
--- a/arch/s390/lib/uaccess.c
+++ b/arch/s390/lib/uaccess.c
@@ -8,73 +8,85 @@
* Gerald Schaefer (gerald.schaefer@de.ibm.com)
*/
+#include <linux/kprobes.h>
#include <linux/uaccess.h>
#include <linux/export.h>
#include <linux/mm.h>
#include <asm/asm-extable.h>
#include <asm/ctlreg.h>
+#include <asm/skey.h>
#ifdef CONFIG_DEBUG_ENTRY
void debug_user_asce(int exit)
{
+ struct lowcore *lc = get_lowcore();
struct ctlreg cr1, cr7;
local_ctl_store(1, &cr1);
local_ctl_store(7, &cr7);
- if (cr1.val == get_lowcore()->kernel_asce.val && cr7.val == get_lowcore()->user_asce.val)
+ if (cr1.val == lc->user_asce.val && cr7.val == lc->user_asce.val)
return;
panic("incorrect ASCE on kernel %s\n"
"cr1: %016lx cr7: %016lx\n"
"kernel: %016lx user: %016lx\n",
exit ? "exit" : "entry", cr1.val, cr7.val,
- get_lowcore()->kernel_asce.val, get_lowcore()->user_asce.val);
+ lc->kernel_asce.val, lc->user_asce.val);
}
#endif /*CONFIG_DEBUG_ENTRY */
-static unsigned long raw_copy_from_user_key(void *to, const void __user *from,
- unsigned long size, unsigned long key)
+union oac {
+ unsigned int val;
+ struct {
+ struct {
+ unsigned short key : 4;
+ unsigned short : 4;
+ unsigned short as : 2;
+ unsigned short : 4;
+ unsigned short k : 1;
+ unsigned short a : 1;
+ } oac1;
+ struct {
+ unsigned short key : 4;
+ unsigned short : 4;
+ unsigned short as : 2;
+ unsigned short : 4;
+ unsigned short k : 1;
+ unsigned short a : 1;
+ } oac2;
+ };
+};
+
+static uaccess_kmsan_or_inline __must_check unsigned long
+raw_copy_from_user_key(void *to, const void __user *from, unsigned long size, unsigned long key)
{
- unsigned long rem;
+ unsigned long osize;
union oac spec = {
.oac2.key = key,
.oac2.as = PSW_BITS_AS_SECONDARY,
.oac2.k = 1,
.oac2.a = 1,
};
+ int cc;
- asm volatile(
- " lr 0,%[spec]\n"
- "0: mvcos 0(%[to]),0(%[from]),%[size]\n"
- "1: jz 5f\n"
- " algr %[size],%[val]\n"
- " slgr %[from],%[val]\n"
- " slgr %[to],%[val]\n"
- " j 0b\n"
- "2: la %[rem],4095(%[from])\n" /* rem = from + 4095 */
- " nr %[rem],%[val]\n" /* rem = (from + 4095) & -4096 */
- " slgr %[rem],%[from]\n"
- " clgr %[size],%[rem]\n" /* copy crosses next page boundary? */
- " jnh 6f\n"
- "3: mvcos 0(%[to]),0(%[from]),%[rem]\n"
- "4: slgr %[size],%[rem]\n"
- " j 6f\n"
- "5: slgr %[size],%[size]\n"
- "6:\n"
- EX_TABLE(0b, 2b)
- EX_TABLE(1b, 2b)
- EX_TABLE(3b, 6b)
- EX_TABLE(4b, 6b)
- : [size] "+&a" (size), [from] "+&a" (from), [to] "+&a" (to), [rem] "=&a" (rem)
- : [val] "a" (-4096UL), [spec] "d" (spec.val)
- : "cc", "memory", "0");
- return size;
-}
-
-unsigned long raw_copy_from_user(void *to, const void __user *from, unsigned long n)
-{
- return raw_copy_from_user_key(to, from, n, 0);
+ while (1) {
+ osize = size;
+ asm_inline volatile(
+ " lr %%r0,%[spec]\n"
+ "0: mvcos %[to],%[from],%[size]\n"
+ "1: nopr %%r7\n"
+ CC_IPM(cc)
+ EX_TABLE_UA_MVCOS_FROM(0b, 0b)
+ EX_TABLE_UA_MVCOS_FROM(1b, 0b)
+ : CC_OUT(cc, cc), [size] "+d" (size), [to] "=Q" (*(char *)to)
+ : [spec] "d" (spec.val), [from] "Q" (*(const char __user *)from)
+ : CC_CLOBBER_LIST("memory", "0"));
+ if (CC_TRANSFORM(cc) == 0)
+ return osize - size;
+ size -= 4096;
+ to += 4096;
+ from += 4096;
+ }
}
-EXPORT_SYMBOL(raw_copy_from_user);
unsigned long _copy_from_user_key(void *to, const void __user *from,
unsigned long n, unsigned long key)
@@ -93,50 +105,37 @@ unsigned long _copy_from_user_key(void *to, const void __user *from,
}
EXPORT_SYMBOL(_copy_from_user_key);
-static unsigned long raw_copy_to_user_key(void __user *to, const void *from,
- unsigned long size, unsigned long key)
+static uaccess_kmsan_or_inline __must_check unsigned long
+raw_copy_to_user_key(void __user *to, const void *from, unsigned long size, unsigned long key)
{
- unsigned long rem;
+ unsigned long osize;
union oac spec = {
.oac1.key = key,
.oac1.as = PSW_BITS_AS_SECONDARY,
.oac1.k = 1,
.oac1.a = 1,
};
+ int cc;
- asm volatile(
- " lr 0,%[spec]\n"
- "0: mvcos 0(%[to]),0(%[from]),%[size]\n"
- "1: jz 5f\n"
- " algr %[size],%[val]\n"
- " slgr %[to],%[val]\n"
- " slgr %[from],%[val]\n"
- " j 0b\n"
- "2: la %[rem],4095(%[to])\n" /* rem = to + 4095 */
- " nr %[rem],%[val]\n" /* rem = (to + 4095) & -4096 */
- " slgr %[rem],%[to]\n"
- " clgr %[size],%[rem]\n" /* copy crosses next page boundary? */
- " jnh 6f\n"
- "3: mvcos 0(%[to]),0(%[from]),%[rem]\n"
- "4: slgr %[size],%[rem]\n"
- " j 6f\n"
- "5: slgr %[size],%[size]\n"
- "6:\n"
- EX_TABLE(0b, 2b)
- EX_TABLE(1b, 2b)
- EX_TABLE(3b, 6b)
- EX_TABLE(4b, 6b)
- : [size] "+&a" (size), [to] "+&a" (to), [from] "+&a" (from), [rem] "=&a" (rem)
- : [val] "a" (-4096UL), [spec] "d" (spec.val)
- : "cc", "memory", "0");
- return size;
-}
-
-unsigned long raw_copy_to_user(void __user *to, const void *from, unsigned long n)
-{
- return raw_copy_to_user_key(to, from, n, 0);
+ while (1) {
+ osize = size;
+ asm_inline volatile(
+ " lr %%r0,%[spec]\n"
+ "0: mvcos %[to],%[from],%[size]\n"
+ "1: nopr %%r7\n"
+ CC_IPM(cc)
+ EX_TABLE_UA_MVCOS_TO(0b, 0b)
+ EX_TABLE_UA_MVCOS_TO(1b, 0b)
+ : CC_OUT(cc, cc), [size] "+d" (size), [to] "=Q" (*(char __user *)to)
+ : [spec] "d" (spec.val), [from] "Q" (*(const char *)from)
+ : CC_CLOBBER_LIST("memory", "0"));
+ if (CC_TRANSFORM(cc) == 0)
+ return osize - size;
+ size -= 4096;
+ to += 4096;
+ from += 4096;
+ }
}
-EXPORT_SYMBOL(raw_copy_to_user);
unsigned long _copy_to_user_key(void __user *to, const void *from,
unsigned long n, unsigned long key)
@@ -149,38 +148,188 @@ unsigned long _copy_to_user_key(void __user *to, const void *from,
}
EXPORT_SYMBOL(_copy_to_user_key);
-unsigned long __clear_user(void __user *to, unsigned long size)
+#define CMPXCHG_USER_KEY_MAX_LOOPS 128
+
+static nokprobe_inline int __cmpxchg_user_key_small(unsigned long address, unsigned int *uval,
+ unsigned int old, unsigned int new,
+ unsigned int mask, unsigned long key)
{
- unsigned long rem;
- union oac spec = {
- .oac1.as = PSW_BITS_AS_SECONDARY,
- .oac1.a = 1,
- };
+ unsigned long count;
+ unsigned int prev;
+ bool sacf_flag;
+ int rc = 0;
+
+ skey_regions_initialize();
+ sacf_flag = enable_sacf_uaccess();
+ asm_inline volatile(
+ "20: spka 0(%[key])\n"
+ " sacf 256\n"
+ " llill %[count],%[max_loops]\n"
+ "0: l %[prev],%[address]\n"
+ "1: nr %[prev],%[mask]\n"
+ " xilf %[mask],0xffffffff\n"
+ " or %[new],%[prev]\n"
+ " or %[prev],%[tmp]\n"
+ "2: lr %[tmp],%[prev]\n"
+ "3: cs %[prev],%[new],%[address]\n"
+ "4: jnl 5f\n"
+ " xr %[tmp],%[prev]\n"
+ " xr %[new],%[tmp]\n"
+ " nr %[tmp],%[mask]\n"
+ " jnz 5f\n"
+ " brct %[count],2b\n"
+ "5: sacf 768\n"
+ " spka %[default_key]\n"
+ "21:\n"
+ EX_TABLE_UA_LOAD_REG(0b, 5b, %[rc], %[prev])
+ EX_TABLE_UA_LOAD_REG(1b, 5b, %[rc], %[prev])
+ EX_TABLE_UA_LOAD_REG(3b, 5b, %[rc], %[prev])
+ EX_TABLE_UA_LOAD_REG(4b, 5b, %[rc], %[prev])
+ SKEY_REGION(20b, 21b)
+ : [rc] "+&d" (rc),
+ [prev] "=&d" (prev),
+ [address] "+Q" (*(int *)address),
+ [tmp] "+&d" (old),
+ [new] "+&d" (new),
+ [mask] "+&d" (mask),
+ [count] "=a" (count)
+ : [key] "%[count]" (key << 4),
+ [default_key] "J" (PAGE_DEFAULT_KEY),
+ [max_loops] "J" (CMPXCHG_USER_KEY_MAX_LOOPS)
+ : "memory", "cc");
+ disable_sacf_uaccess(sacf_flag);
+ *uval = prev;
+ if (!count)
+ rc = -EAGAIN;
+ return rc;
+}
+
+int __kprobes __cmpxchg_user_key1(unsigned long address, unsigned char *uval,
+ unsigned char old, unsigned char new, unsigned long key)
+{
+ unsigned int prev, shift, mask, _old, _new;
+ int rc;
+
+ shift = (3 ^ (address & 3)) << 3;
+ address ^= address & 3;
+ _old = (unsigned int)old << shift;
+ _new = (unsigned int)new << shift;
+ mask = ~(0xff << shift);
+ rc = __cmpxchg_user_key_small(address, &prev, _old, _new, mask, key);
+ *uval = prev >> shift;
+ return rc;
+}
+EXPORT_SYMBOL(__cmpxchg_user_key1);
+
+int __kprobes __cmpxchg_user_key2(unsigned long address, unsigned short *uval,
+ unsigned short old, unsigned short new, unsigned long key)
+{
+ unsigned int prev, shift, mask, _old, _new;
+ int rc;
+
+ shift = (2 ^ (address & 2)) << 3;
+ address ^= address & 2;
+ _old = (unsigned int)old << shift;
+ _new = (unsigned int)new << shift;
+ mask = ~(0xffff << shift);
+ rc = __cmpxchg_user_key_small(address, &prev, _old, _new, mask, key);
+ *uval = prev >> shift;
+ return rc;
+}
+EXPORT_SYMBOL(__cmpxchg_user_key2);
+
+int __kprobes __cmpxchg_user_key4(unsigned long address, unsigned int *uval,
+ unsigned int old, unsigned int new, unsigned long key)
+{
+ unsigned int prev = old;
+ bool sacf_flag;
+ int rc = 0;
+
+ skey_regions_initialize();
+ sacf_flag = enable_sacf_uaccess();
+ asm_inline volatile(
+ "20: spka 0(%[key])\n"
+ " sacf 256\n"
+ "0: cs %[prev],%[new],%[address]\n"
+ "1: sacf 768\n"
+ " spka %[default_key]\n"
+ "21:\n"
+ EX_TABLE_UA_LOAD_REG(0b, 1b, %[rc], %[prev])
+ EX_TABLE_UA_LOAD_REG(1b, 1b, %[rc], %[prev])
+ SKEY_REGION(20b, 21b)
+ : [rc] "+&d" (rc),
+ [prev] "+&d" (prev),
+ [address] "+Q" (*(int *)address)
+ : [new] "d" (new),
+ [key] "a" (key << 4),
+ [default_key] "J" (PAGE_DEFAULT_KEY)
+ : "memory", "cc");
+ disable_sacf_uaccess(sacf_flag);
+ *uval = prev;
+ return rc;
+}
+EXPORT_SYMBOL(__cmpxchg_user_key4);
+
+int __kprobes __cmpxchg_user_key8(unsigned long address, unsigned long *uval,
+ unsigned long old, unsigned long new, unsigned long key)
+{
+ unsigned long prev = old;
+ bool sacf_flag;
+ int rc = 0;
+
+ skey_regions_initialize();
+ sacf_flag = enable_sacf_uaccess();
+ asm_inline volatile(
+ "20: spka 0(%[key])\n"
+ " sacf 256\n"
+ "0: csg %[prev],%[new],%[address]\n"
+ "1: sacf 768\n"
+ " spka %[default_key]\n"
+ "21:\n"
+ EX_TABLE_UA_LOAD_REG(0b, 1b, %[rc], %[prev])
+ EX_TABLE_UA_LOAD_REG(1b, 1b, %[rc], %[prev])
+ SKEY_REGION(20b, 21b)
+ : [rc] "+&d" (rc),
+ [prev] "+&d" (prev),
+ [address] "+QS" (*(long *)address)
+ : [new] "d" (new),
+ [key] "a" (key << 4),
+ [default_key] "J" (PAGE_DEFAULT_KEY)
+ : "memory", "cc");
+ disable_sacf_uaccess(sacf_flag);
+ *uval = prev;
+ return rc;
+}
+EXPORT_SYMBOL(__cmpxchg_user_key8);
+
+int __kprobes __cmpxchg_user_key16(unsigned long address, __uint128_t *uval,
+ __uint128_t old, __uint128_t new, unsigned long key)
+{
+ __uint128_t prev = old;
+ bool sacf_flag;
+ int rc = 0;
- asm volatile(
- " lr 0,%[spec]\n"
- "0: mvcos 0(%[to]),0(%[zeropg]),%[size]\n"
- "1: jz 5f\n"
- " algr %[size],%[val]\n"
- " slgr %[to],%[val]\n"
- " j 0b\n"
- "2: la %[rem],4095(%[to])\n" /* rem = to + 4095 */
- " nr %[rem],%[val]\n" /* rem = (to + 4095) & -4096 */
- " slgr %[rem],%[to]\n"
- " clgr %[size],%[rem]\n" /* copy crosses next page boundary? */
- " jnh 6f\n"
- "3: mvcos 0(%[to]),0(%[zeropg]),%[rem]\n"
- "4: slgr %[size],%[rem]\n"
- " j 6f\n"
- "5: slgr %[size],%[size]\n"
- "6:\n"
- EX_TABLE(0b, 2b)
- EX_TABLE(1b, 2b)
- EX_TABLE(3b, 6b)
- EX_TABLE(4b, 6b)
- : [size] "+&a" (size), [to] "+&a" (to), [rem] "=&a" (rem)
- : [val] "a" (-4096UL), [zeropg] "a" (empty_zero_page), [spec] "d" (spec.val)
- : "cc", "memory", "0");
- return size;
+ skey_regions_initialize();
+ sacf_flag = enable_sacf_uaccess();
+ asm_inline volatile(
+ "20: spka 0(%[key])\n"
+ " sacf 256\n"
+ "0: cdsg %[prev],%[new],%[address]\n"
+ "1: sacf 768\n"
+ " spka %[default_key]\n"
+ "21:\n"
+ EX_TABLE_UA_LOAD_REGPAIR(0b, 1b, %[rc], %[prev])
+ EX_TABLE_UA_LOAD_REGPAIR(1b, 1b, %[rc], %[prev])
+ SKEY_REGION(20b, 21b)
+ : [rc] "+&d" (rc),
+ [prev] "+&d" (prev),
+ [address] "+QS" (*(__int128_t *)address)
+ : [new] "d" (new),
+ [key] "a" (key << 4),
+ [default_key] "J" (PAGE_DEFAULT_KEY)
+ : "memory", "cc");
+ disable_sacf_uaccess(sacf_flag);
+ *uval = prev;
+ return rc;
}
-EXPORT_SYMBOL(__clear_user);
+EXPORT_SYMBOL(__cmpxchg_user_key16);
diff --git a/arch/s390/mm/Makefile b/arch/s390/mm/Makefile
index f6c2db7a8669..bd0401cc7ca5 100644
--- a/arch/s390/mm/Makefile
+++ b/arch/s390/mm/Makefile
@@ -9,6 +9,8 @@ obj-y += page-states.o pageattr.o pgtable.o pgalloc.o extable.o
obj-$(CONFIG_CMM) += cmm.o
obj-$(CONFIG_DEBUG_VIRTUAL) += physaddr.o
obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o
-obj-$(CONFIG_PTDUMP_CORE) += dump_pagetables.o
+obj-$(CONFIG_PTDUMP) += dump_pagetables.o
obj-$(CONFIG_PGSTE) += gmap.o
obj-$(CONFIG_PFAULT) += pfault.o
+
+obj-$(subst m,y,$(CONFIG_KVM)) += gmap_helpers.o
diff --git a/arch/s390/mm/cmm.c b/arch/s390/mm/cmm.c
index 39f44b6256e0..e2a6eb92420f 100644
--- a/arch/s390/mm/cmm.c
+++ b/arch/s390/mm/cmm.c
@@ -201,7 +201,7 @@ static void cmm_set_timer(void)
{
if (cmm_timed_pages_target <= 0 || cmm_timeout_seconds <= 0) {
if (timer_pending(&cmm_timer))
- del_timer(&cmm_timer);
+ timer_delete(&cmm_timer);
return;
}
mod_timer(&cmm_timer, jiffies + secs_to_jiffies(cmm_timeout_seconds));
@@ -424,7 +424,7 @@ out_smsg:
#endif
unregister_sysctl_table(cmm_sysctl_header);
out_sysctl:
- del_timer_sync(&cmm_timer);
+ timer_delete_sync(&cmm_timer);
return rc;
}
module_init(cmm_init);
@@ -437,7 +437,7 @@ static void __exit cmm_exit(void)
#endif
unregister_oom_notifier(&cmm_oom_nb);
kthread_stop(cmm_thread_ptr);
- del_timer_sync(&cmm_timer);
+ timer_delete_sync(&cmm_timer);
cmm_free_pages(cmm_pages, &cmm_pages, &cmm_page_list);
cmm_free_pages(cmm_timed_pages, &cmm_timed_pages, &cmm_timed_page_list);
}
diff --git a/arch/s390/mm/dump_pagetables.c b/arch/s390/mm/dump_pagetables.c
index fa54f3bc0c8d..9af2aae0a515 100644
--- a/arch/s390/mm/dump_pagetables.c
+++ b/arch/s390/mm/dump_pagetables.c
@@ -1,4 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/cpufeature.h>
#include <linux/set_memory.h>
#include <linux/ptdump.h>
#include <linux/seq_file.h>
@@ -82,7 +84,7 @@ static void note_prot_wx(struct pg_state *st, unsigned long addr)
* in which case we have two lpswe instructions in lowcore that need
* to be executable.
*/
- if (addr == PAGE_SIZE && (nospec_uses_trampoline() || !static_key_enabled(&cpu_has_bear)))
+ if (addr == PAGE_SIZE && (nospec_uses_trampoline() || !cpu_has_bear()))
return;
WARN_ONCE(IS_ENABLED(CONFIG_DEBUG_WX),
"s390/mm: Found insecure W+X mapping at address %pS\n",
@@ -145,11 +147,48 @@ static void note_page(struct ptdump_state *pt_st, unsigned long addr, int level,
}
}
+static void note_page_pte(struct ptdump_state *pt_st, unsigned long addr, pte_t pte)
+{
+ note_page(pt_st, addr, 4, pte_val(pte));
+}
+
+static void note_page_pmd(struct ptdump_state *pt_st, unsigned long addr, pmd_t pmd)
+{
+ note_page(pt_st, addr, 3, pmd_val(pmd));
+}
+
+static void note_page_pud(struct ptdump_state *pt_st, unsigned long addr, pud_t pud)
+{
+ note_page(pt_st, addr, 2, pud_val(pud));
+}
+
+static void note_page_p4d(struct ptdump_state *pt_st, unsigned long addr, p4d_t p4d)
+{
+ note_page(pt_st, addr, 1, p4d_val(p4d));
+}
+
+static void note_page_pgd(struct ptdump_state *pt_st, unsigned long addr, pgd_t pgd)
+{
+ note_page(pt_st, addr, 0, pgd_val(pgd));
+}
+
+static void note_page_flush(struct ptdump_state *pt_st)
+{
+ pte_t pte_zero = {0};
+
+ note_page(pt_st, 0, -1, pte_val(pte_zero));
+}
+
bool ptdump_check_wx(void)
{
struct pg_state st = {
.ptdump = {
- .note_page = note_page,
+ .note_page_pte = note_page_pte,
+ .note_page_pmd = note_page_pmd,
+ .note_page_pud = note_page_pud,
+ .note_page_p4d = note_page_p4d,
+ .note_page_pgd = note_page_pgd,
+ .note_page_flush = note_page_flush,
.range = (struct ptdump_range[]) {
{.start = 0, .end = max_addr},
{.start = 0, .end = 0},
@@ -167,7 +206,7 @@ bool ptdump_check_wx(void)
},
};
- if (!MACHINE_HAS_NX)
+ if (!cpu_has_nx())
return true;
ptdump_walk_pgd(&st.ptdump, &init_mm, NULL);
if (st.wx_pages) {
@@ -176,7 +215,7 @@ bool ptdump_check_wx(void)
return false;
} else {
pr_info("Checked W+X mappings: passed, no %sW+X pages found\n",
- (nospec_uses_trampoline() || !static_key_enabled(&cpu_has_bear)) ?
+ (nospec_uses_trampoline() || !cpu_has_bear()) ?
"unexpected " : "");
return true;
@@ -188,7 +227,12 @@ static int ptdump_show(struct seq_file *m, void *v)
{
struct pg_state st = {
.ptdump = {
- .note_page = note_page,
+ .note_page_pte = note_page_pte,
+ .note_page_pmd = note_page_pmd,
+ .note_page_pud = note_page_pud,
+ .note_page_p4d = note_page_p4d,
+ .note_page_pgd = note_page_pgd,
+ .note_page_flush = note_page_flush,
.range = (struct ptdump_range[]) {
{.start = 0, .end = max_addr},
{.start = 0, .end = 0},
@@ -203,11 +247,9 @@ static int ptdump_show(struct seq_file *m, void *v)
.marker = markers,
};
- get_online_mems();
mutex_lock(&cpa_mutex);
ptdump_walk_pgd(&st.ptdump, &init_mm, NULL);
mutex_unlock(&cpa_mutex);
- put_online_mems();
return 0;
}
DEFINE_SHOW_ATTRIBUTE(ptdump);
diff --git a/arch/s390/mm/extable.c b/arch/s390/mm/extable.c
index 0a0738a473af..7498e858c401 100644
--- a/arch/s390/mm/extable.c
+++ b/arch/s390/mm/extable.c
@@ -7,6 +7,7 @@
#include <linux/panic.h>
#include <asm/asm-extable.h>
#include <asm/extable.h>
+#include <asm/fpu.h>
const struct exception_table_entry *s390_search_extables(unsigned long addr)
{
@@ -26,7 +27,7 @@ static bool ex_handler_fixup(const struct exception_table_entry *ex, struct pt_r
return true;
}
-static bool ex_handler_ua_store(const struct exception_table_entry *ex, struct pt_regs *regs)
+static bool ex_handler_ua_fault(const struct exception_table_entry *ex, struct pt_regs *regs)
{
unsigned int reg_err = FIELD_GET(EX_DATA_REG_ERR, ex->data);
@@ -35,18 +36,6 @@ static bool ex_handler_ua_store(const struct exception_table_entry *ex, struct p
return true;
}
-static bool ex_handler_ua_load_mem(const struct exception_table_entry *ex, struct pt_regs *regs)
-{
- unsigned int reg_addr = FIELD_GET(EX_DATA_REG_ADDR, ex->data);
- unsigned int reg_err = FIELD_GET(EX_DATA_REG_ERR, ex->data);
- size_t len = FIELD_GET(EX_DATA_LEN, ex->data);
-
- regs->gprs[reg_err] = -EFAULT;
- memset((void *)regs->gprs[reg_addr], 0, len);
- regs->psw.addr = extable_fixup(ex);
- return true;
-}
-
static bool ex_handler_ua_load_reg(const struct exception_table_entry *ex,
bool pair, struct pt_regs *regs)
{
@@ -77,6 +66,56 @@ static bool ex_handler_zeropad(const struct exception_table_entry *ex, struct pt
return true;
}
+static bool ex_handler_fpc(const struct exception_table_entry *ex, struct pt_regs *regs)
+{
+ fpu_sfpc(0);
+ regs->psw.addr = extable_fixup(ex);
+ return true;
+}
+
+struct insn_ssf {
+ u64 opc1 : 8;
+ u64 r3 : 4;
+ u64 opc2 : 4;
+ u64 b1 : 4;
+ u64 d1 : 12;
+ u64 b2 : 4;
+ u64 d2 : 12;
+} __packed;
+
+static bool ex_handler_ua_mvcos(const struct exception_table_entry *ex,
+ bool from, struct pt_regs *regs)
+{
+ unsigned long uaddr, remainder;
+ struct insn_ssf *insn;
+
+ /*
+ * If the faulting user space access crossed a page boundary retry by
+ * limiting the access to the first page (adjust length accordingly).
+ * Then the mvcos instruction will either complete with condition code
+ * zero, or generate another fault where the user space access did not
+ * cross a page boundary.
+ * If the faulting user space access did not cross a page boundary set
+ * length to zero and retry. In this case no user space access will
+ * happen, and the mvcos instruction will complete with condition code
+ * zero.
+ * In both cases the instruction will complete with condition code
+ * zero (copying finished), and the register which contains the
+ * length, indicates the number of bytes copied.
+ */
+ regs->psw.addr = extable_fixup(ex);
+ insn = (struct insn_ssf *)regs->psw.addr;
+ if (from)
+ uaddr = regs->gprs[insn->b2] + insn->d2;
+ else
+ uaddr = regs->gprs[insn->b1] + insn->d1;
+ remainder = PAGE_SIZE - (uaddr & (PAGE_SIZE - 1));
+ if (regs->gprs[insn->r3] <= remainder)
+ remainder = 0;
+ regs->gprs[insn->r3] = remainder;
+ return true;
+}
+
bool fixup_exception(struct pt_regs *regs)
{
const struct exception_table_entry *ex;
@@ -89,16 +128,20 @@ bool fixup_exception(struct pt_regs *regs)
return ex_handler_fixup(ex, regs);
case EX_TYPE_BPF:
return ex_handler_bpf(ex, regs);
- case EX_TYPE_UA_STORE:
- return ex_handler_ua_store(ex, regs);
- case EX_TYPE_UA_LOAD_MEM:
- return ex_handler_ua_load_mem(ex, regs);
+ case EX_TYPE_UA_FAULT:
+ return ex_handler_ua_fault(ex, regs);
case EX_TYPE_UA_LOAD_REG:
return ex_handler_ua_load_reg(ex, false, regs);
case EX_TYPE_UA_LOAD_REGPAIR:
return ex_handler_ua_load_reg(ex, true, regs);
case EX_TYPE_ZEROPAD:
return ex_handler_zeropad(ex, regs);
+ case EX_TYPE_FPC:
+ return ex_handler_fpc(ex, regs);
+ case EX_TYPE_UA_MVCOS_TO:
+ return ex_handler_ua_mvcos(ex, false, regs);
+ case EX_TYPE_UA_MVCOS_FROM:
+ return ex_handler_ua_mvcos(ex, true, regs);
}
panic("invalid exception table entry");
}
diff --git a/arch/s390/mm/extmem.c b/arch/s390/mm/extmem.c
index 4692136c0af1..f7da53e212f5 100644
--- a/arch/s390/mm/extmem.c
+++ b/arch/s390/mm/extmem.c
@@ -21,6 +21,7 @@
#include <linux/ioport.h>
#include <linux/refcount.h>
#include <linux/pgtable.h>
+#include <asm/machine.h>
#include <asm/diag.h>
#include <asm/page.h>
#include <asm/ebcdic.h>
@@ -255,7 +256,7 @@ segment_type (char* name)
int rc;
struct dcss_segment seg;
- if (!MACHINE_IS_VM)
+ if (!machine_is_vm())
return -ENOSYS;
dcss_mkname(name, seg.dcss_name);
@@ -418,7 +419,7 @@ segment_load (char *name, int do_nonshared, unsigned long *addr,
struct dcss_segment *seg;
int rc;
- if (!MACHINE_IS_VM)
+ if (!machine_is_vm())
return -ENOSYS;
mutex_lock(&dcss_lock);
@@ -529,6 +530,14 @@ segment_modify_shared (char *name, int do_nonshared)
return rc;
}
+static void __dcss_diag_purge_on_cpu_0(void *data)
+{
+ struct dcss_segment *seg = (struct dcss_segment *)data;
+ unsigned long dummy;
+
+ dcss_diag(&purgeseg_scode, seg->dcss_name, &dummy, &dummy);
+}
+
/*
* Decrease the use count of a DCSS segment and remove
* it from the address space if nobody is using it
@@ -537,10 +546,9 @@ segment_modify_shared (char *name, int do_nonshared)
void
segment_unload(char *name)
{
- unsigned long dummy;
struct dcss_segment *seg;
- if (!MACHINE_IS_VM)
+ if (!machine_is_vm())
return;
mutex_lock(&dcss_lock);
@@ -555,7 +563,14 @@ segment_unload(char *name)
kfree(seg->res);
vmem_remove_mapping(seg->start_addr, seg->end - seg->start_addr + 1);
list_del(&seg->list);
- dcss_diag(&purgeseg_scode, seg->dcss_name, &dummy, &dummy);
+ /*
+ * Workaround for z/VM issue, where calling the DCSS unload diag on
+ * a non-IPL CPU would cause bogus sclp maximum memory detection on
+ * next IPL.
+ * IPL CPU 0 cannot be set offline, so the dcss_diag() call can
+ * directly be scheduled to that CPU.
+ */
+ smp_call_function_single(0, __dcss_diag_purge_on_cpu_0, seg, 1);
kfree(seg);
out_unlock:
mutex_unlock(&dcss_lock);
@@ -572,7 +587,7 @@ segment_save(char *name)
char cmd2[80];
int i, response;
- if (!MACHINE_IS_VM)
+ if (!machine_is_vm())
return;
mutex_lock(&dcss_lock);
diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c
index 9b681f74dccc..e1ad05bfd28a 100644
--- a/arch/s390/mm/fault.c
+++ b/arch/s390/mm/fault.c
@@ -11,11 +11,11 @@
#include <linux/kernel_stat.h>
#include <linux/mmu_context.h>
+#include <linux/cpufeature.h>
#include <linux/perf_event.h>
#include <linux/signal.h>
#include <linux/sched.h>
#include <linux/sched/debug.h>
-#include <linux/jump_label.h>
#include <linux/kernel.h>
#include <linux/errno.h>
#include <linux/string.h>
@@ -40,22 +40,11 @@
#include <asm/ptrace.h>
#include <asm/fault.h>
#include <asm/diag.h>
-#include <asm/gmap.h>
#include <asm/irq.h>
#include <asm/facility.h>
#include <asm/uv.h>
#include "../kernel/entry.h"
-static DEFINE_STATIC_KEY_FALSE(have_store_indication);
-
-static int __init fault_init(void)
-{
- if (test_facility(75))
- static_branch_enable(&have_store_indication);
- return 0;
-}
-early_initcall(fault_init);
-
/*
* Find out which address space caused the exception.
*/
@@ -81,7 +70,7 @@ static __always_inline bool fault_is_write(struct pt_regs *regs)
{
union teid teid = { .val = regs->int_parm_long };
- if (static_branch_likely(&have_store_indication))
+ if (test_facility(75))
return teid.fsi == TEID_FSI_STORE;
return false;
}
@@ -175,6 +164,23 @@ static void dump_fault_info(struct pt_regs *regs)
int show_unhandled_signals = 1;
+static const struct ctl_table s390_fault_sysctl_table[] = {
+ {
+ .procname = "userprocess_debug",
+ .data = &show_unhandled_signals,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+};
+
+static int __init init_s390_fault_sysctls(void)
+{
+ register_sysctl_init("kernel", s390_fault_sysctl_table);
+ return 0;
+}
+arch_initcall(init_s390_fault_sysctls);
+
void report_user_fault(struct pt_regs *regs, long signr, int is_mm_fault)
{
static DEFINE_RATELIMIT_STATE(rs, DEFAULT_RATELIMIT_INTERVAL, DEFAULT_RATELIMIT_BURST);
@@ -369,6 +375,7 @@ void do_protection_exception(struct pt_regs *regs)
if (unlikely(!teid.b61)) {
if (user_mode(regs)) {
/* Low-address protection in user mode: cannot happen */
+ dump_fault_info(regs);
die(regs, "Low-address protection");
}
/*
@@ -377,7 +384,7 @@ void do_protection_exception(struct pt_regs *regs)
*/
return handle_fault_error_nolock(regs, 0);
}
- if (unlikely(MACHINE_HAS_NX && teid.b56)) {
+ if (unlikely(cpu_has_nx() && teid.b56)) {
regs->int_parm_long = (teid.addr * PAGE_SIZE) | (regs->psw.addr & PAGE_MASK);
return handle_fault_error_nolock(regs, SEGV_ACCERR);
}
@@ -434,6 +441,8 @@ void do_secure_storage_access(struct pt_regs *regs)
if (rc)
BUG();
} else {
+ if (faulthandler_disabled())
+ return handle_fault_error_nolock(regs, 0);
mm = current->mm;
mmap_read_lock(mm);
vma = find_vma(mm, addr);
diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c
index 16b8a36c56de..c7defe4ed1f6 100644
--- a/arch/s390/mm/gmap.c
+++ b/arch/s390/mm/gmap.c
@@ -8,6 +8,8 @@
* Janosch Frank <frankja@linux.vnet.ibm.com>
*/
+#include <linux/cpufeature.h>
+#include <linux/export.h>
#include <linux/kernel.h>
#include <linux/pagewalk.h>
#include <linux/swap.h>
@@ -20,9 +22,20 @@
#include <linux/pgtable.h>
#include <asm/page-states.h>
#include <asm/pgalloc.h>
+#include <asm/machine.h>
+#include <asm/gmap_helpers.h>
#include <asm/gmap.h>
#include <asm/page.h>
-#include <asm/tlb.h>
+
+/*
+ * The address is saved in a radix tree directly; NULL would be ambiguous,
+ * since 0 is a valid address, and NULL is returned when nothing was found.
+ * The lower bits are ignored by all users of the macro, so it can be used
+ * to distinguish a valid address 0 from a NULL.
+ */
+#define VALID_GADDR_FLAG 1
+#define IS_GADDR_VALID(gaddr) ((gaddr) & VALID_GADDR_FLAG)
+#define MAKE_VALID_GADDR(gaddr) (((gaddr) & HPAGE_MASK) | VALID_GADDR_FLAG)
#define GMAP_SHADOW_FAKE_TABLE 1ULL
@@ -43,7 +56,7 @@ static struct page *gmap_alloc_crst(void)
*
* Returns a guest address space structure.
*/
-static struct gmap *gmap_alloc(unsigned long limit)
+struct gmap *gmap_alloc(unsigned long limit)
{
struct gmap *gmap;
struct page *page;
@@ -70,9 +83,7 @@ static struct gmap *gmap_alloc(unsigned long limit)
gmap = kzalloc(sizeof(struct gmap), GFP_KERNEL_ACCOUNT);
if (!gmap)
goto out;
- INIT_LIST_HEAD(&gmap->crst_list);
INIT_LIST_HEAD(&gmap->children);
- INIT_LIST_HEAD(&gmap->pt_list);
INIT_RADIX_TREE(&gmap->guest_to_host, GFP_KERNEL_ACCOUNT);
INIT_RADIX_TREE(&gmap->host_to_guest, GFP_ATOMIC | __GFP_ACCOUNT);
INIT_RADIX_TREE(&gmap->host_to_rmap, GFP_ATOMIC | __GFP_ACCOUNT);
@@ -82,8 +93,6 @@ static struct gmap *gmap_alloc(unsigned long limit)
page = gmap_alloc_crst();
if (!page)
goto out_free;
- page->index = 0;
- list_add(&page->lru, &gmap->crst_list);
table = page_to_virt(page);
crst_table_init(table, etype);
gmap->table = table;
@@ -97,6 +106,7 @@ out_free:
out:
return NULL;
}
+EXPORT_SYMBOL_GPL(gmap_alloc);
/**
* gmap_create - create a guest address space
@@ -128,7 +138,7 @@ EXPORT_SYMBOL_GPL(gmap_create);
static void gmap_flush_tlb(struct gmap *gmap)
{
- if (MACHINE_HAS_IDTE)
+ if (cpu_has_idte())
__tlb_flush_idte(gmap->asce);
else
__tlb_flush_global();
@@ -185,32 +195,46 @@ static void gmap_rmap_radix_tree_free(struct radix_tree_root *root)
} while (nr > 0);
}
+static void gmap_free_crst(unsigned long *table, bool free_ptes)
+{
+ bool is_segment = (table[0] & _SEGMENT_ENTRY_TYPE_MASK) == 0;
+ int i;
+
+ if (is_segment) {
+ if (!free_ptes)
+ goto out;
+ for (i = 0; i < _CRST_ENTRIES; i++)
+ if (!(table[i] & _SEGMENT_ENTRY_INVALID))
+ page_table_free_pgste(page_ptdesc(phys_to_page(table[i])));
+ } else {
+ for (i = 0; i < _CRST_ENTRIES; i++)
+ if (!(table[i] & _REGION_ENTRY_INVALID))
+ gmap_free_crst(__va(table[i] & PAGE_MASK), free_ptes);
+ }
+
+out:
+ free_pages((unsigned long)table, CRST_ALLOC_ORDER);
+}
+
/**
* gmap_free - free a guest address space
* @gmap: pointer to the guest address space structure
*
* No locks required. There are no references to this gmap anymore.
*/
-static void gmap_free(struct gmap *gmap)
+void gmap_free(struct gmap *gmap)
{
- struct page *page, *next;
-
/* Flush tlb of all gmaps (if not already done for shadows) */
if (!(gmap_is_shadow(gmap) && gmap->removed))
gmap_flush_tlb(gmap);
/* Free all segment & region tables. */
- list_for_each_entry_safe(page, next, &gmap->crst_list, lru)
- __free_pages(page, CRST_ALLOC_ORDER);
+ gmap_free_crst(gmap->table, gmap_is_shadow(gmap));
+
gmap_radix_tree_free(&gmap->guest_to_host);
gmap_radix_tree_free(&gmap->host_to_guest);
/* Free additional data for a shadow gmap */
if (gmap_is_shadow(gmap)) {
- struct ptdesc *ptdesc, *n;
-
- /* Free all page tables. */
- list_for_each_entry_safe(ptdesc, n, &gmap->pt_list, pt_list)
- page_table_free_pgste(ptdesc);
gmap_rmap_radix_tree_free(&gmap->host_to_rmap);
/* Release reference to the parent */
gmap_put(gmap->parent);
@@ -218,6 +242,7 @@ static void gmap_free(struct gmap *gmap)
kfree(gmap);
}
+EXPORT_SYMBOL_GPL(gmap_free);
/**
* gmap_get - increase reference counter for guest address space
@@ -298,10 +323,8 @@ static int gmap_alloc_table(struct gmap *gmap, unsigned long *table,
crst_table_init(new, init);
spin_lock(&gmap->guest_table_lock);
if (*table & _REGION_ENTRY_INVALID) {
- list_add(&page->lru, &gmap->crst_list);
*table = __pa(new) | _REGION_ENTRY_LENGTH |
(*table & _REGION_ENTRY_TYPE_MASK);
- page->index = gaddr;
page = NULL;
}
spin_unlock(&gmap->guest_table_lock);
@@ -310,21 +333,23 @@ static int gmap_alloc_table(struct gmap *gmap, unsigned long *table,
return 0;
}
-/**
- * __gmap_segment_gaddr - find virtual address from segment pointer
- * @entry: pointer to a segment table entry in the guest address space
- *
- * Returns the virtual address in the guest address space for the segment
- */
-static unsigned long __gmap_segment_gaddr(unsigned long *entry)
+static unsigned long host_to_guest_lookup(struct gmap *gmap, unsigned long vmaddr)
{
- struct page *page;
- unsigned long offset;
+ return (unsigned long)radix_tree_lookup(&gmap->host_to_guest, vmaddr >> PMD_SHIFT);
+}
+
+static unsigned long host_to_guest_delete(struct gmap *gmap, unsigned long vmaddr)
+{
+ return (unsigned long)radix_tree_delete(&gmap->host_to_guest, vmaddr >> PMD_SHIFT);
+}
- offset = (unsigned long) entry / sizeof(unsigned long);
- offset = (offset & (PTRS_PER_PMD - 1)) * PMD_SIZE;
- page = pmd_pgtable_page((pmd_t *) entry);
- return page->index + offset;
+static pmd_t *host_to_guest_pmd_delete(struct gmap *gmap, unsigned long vmaddr,
+ unsigned long *gaddr)
+{
+ *gaddr = host_to_guest_delete(gmap, vmaddr);
+ if (IS_GADDR_VALID(*gaddr))
+ return (pmd_t *)gmap_table_walk(gmap, *gaddr, 1);
+ return NULL;
}
/**
@@ -336,16 +361,19 @@ static unsigned long __gmap_segment_gaddr(unsigned long *entry)
*/
static int __gmap_unlink_by_vmaddr(struct gmap *gmap, unsigned long vmaddr)
{
- unsigned long *entry;
+ unsigned long gaddr;
int flush = 0;
+ pmd_t *pmdp;
BUG_ON(gmap_is_shadow(gmap));
spin_lock(&gmap->guest_table_lock);
- entry = radix_tree_delete(&gmap->host_to_guest, vmaddr >> PMD_SHIFT);
- if (entry) {
- flush = (*entry != _SEGMENT_ENTRY_EMPTY);
- *entry = _SEGMENT_ENTRY_EMPTY;
+
+ pmdp = host_to_guest_pmd_delete(gmap, vmaddr, &gaddr);
+ if (pmdp) {
+ flush = (pmd_val(*pmdp) != _SEGMENT_ENTRY_EMPTY);
+ *pmdp = __pmd(_SEGMENT_ENTRY_EMPTY);
}
+
spin_unlock(&gmap->guest_table_lock);
return flush;
}
@@ -464,26 +492,6 @@ unsigned long __gmap_translate(struct gmap *gmap, unsigned long gaddr)
EXPORT_SYMBOL_GPL(__gmap_translate);
/**
- * gmap_translate - translate a guest address to a user space address
- * @gmap: pointer to guest mapping meta data structure
- * @gaddr: guest address
- *
- * Returns user space address which corresponds to the guest address or
- * -EFAULT if no such mapping exists.
- * This function does not establish potentially missing page table entries.
- */
-unsigned long gmap_translate(struct gmap *gmap, unsigned long gaddr)
-{
- unsigned long rc;
-
- mmap_read_lock(gmap->mm);
- rc = __gmap_translate(gmap, gaddr);
- mmap_read_unlock(gmap->mm);
- return rc;
-}
-EXPORT_SYMBOL_GPL(gmap_translate);
-
-/**
* gmap_unlink - disconnect a page table from the gmap shadow tables
* @mm: pointer to the parent mm_struct
* @table: pointer to the host page table
@@ -582,7 +590,8 @@ int __gmap_link(struct gmap *gmap, unsigned long gaddr, unsigned long vmaddr)
spin_lock(&gmap->guest_table_lock);
if (*table == _SEGMENT_ENTRY_EMPTY) {
rc = radix_tree_insert(&gmap->host_to_guest,
- vmaddr >> PMD_SHIFT, table);
+ vmaddr >> PMD_SHIFT,
+ (void *)MAKE_VALID_GADDR(gaddr));
if (!rc) {
if (pmd_leaf(*pmd)) {
*table = (pmd_val(*pmd) &
@@ -605,193 +614,27 @@ int __gmap_link(struct gmap *gmap, unsigned long gaddr, unsigned long vmaddr)
radix_tree_preload_end();
return rc;
}
-
-/**
- * fixup_user_fault_nowait - manually resolve a user page fault without waiting
- * @mm: mm_struct of target mm
- * @address: user address
- * @fault_flags:flags to pass down to handle_mm_fault()
- * @unlocked: did we unlock the mmap_lock while retrying
- *
- * This function behaves similarly to fixup_user_fault(), but it guarantees
- * that the fault will be resolved without waiting. The function might drop
- * and re-acquire the mm lock, in which case @unlocked will be set to true.
- *
- * The guarantee is that the fault is handled without waiting, but the
- * function itself might sleep, due to the lock.
- *
- * Context: Needs to be called with mm->mmap_lock held in read mode, and will
- * return with the lock held in read mode; @unlocked will indicate whether
- * the lock has been dropped and re-acquired. This is the same behaviour as
- * fixup_user_fault().
- *
- * Return: 0 on success, -EAGAIN if the fault cannot be resolved without
- * waiting, -EFAULT if the fault cannot be resolved, -ENOMEM if out of
- * memory.
- */
-static int fixup_user_fault_nowait(struct mm_struct *mm, unsigned long address,
- unsigned int fault_flags, bool *unlocked)
-{
- struct vm_area_struct *vma;
- unsigned int test_flags;
- vm_fault_t fault;
- int rc;
-
- fault_flags |= FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_RETRY_NOWAIT;
- test_flags = fault_flags & FAULT_FLAG_WRITE ? VM_WRITE : VM_READ;
-
- vma = find_vma(mm, address);
- if (unlikely(!vma || address < vma->vm_start))
- return -EFAULT;
- if (unlikely(!(vma->vm_flags & test_flags)))
- return -EFAULT;
-
- fault = handle_mm_fault(vma, address, fault_flags, NULL);
- /* the mm lock has been dropped, take it again */
- if (fault & VM_FAULT_COMPLETED) {
- *unlocked = true;
- mmap_read_lock(mm);
- return 0;
- }
- /* the mm lock has not been dropped */
- if (fault & VM_FAULT_ERROR) {
- rc = vm_fault_to_errno(fault, 0);
- BUG_ON(!rc);
- return rc;
- }
- /* the mm lock has not been dropped because of FAULT_FLAG_RETRY_NOWAIT */
- if (fault & VM_FAULT_RETRY)
- return -EAGAIN;
- /* nothing needed to be done and the mm lock has not been dropped */
- return 0;
-}
-
-/**
- * __gmap_fault - resolve a fault on a guest address
- * @gmap: pointer to guest mapping meta data structure
- * @gaddr: guest address
- * @fault_flags: flags to pass down to handle_mm_fault()
- *
- * Context: Needs to be called with mm->mmap_lock held in read mode. Might
- * drop and re-acquire the lock. Will always return with the lock held.
- */
-static int __gmap_fault(struct gmap *gmap, unsigned long gaddr, unsigned int fault_flags)
-{
- unsigned long vmaddr;
- bool unlocked;
- int rc = 0;
-
-retry:
- unlocked = false;
-
- vmaddr = __gmap_translate(gmap, gaddr);
- if (IS_ERR_VALUE(vmaddr))
- return vmaddr;
-
- if (fault_flags & FAULT_FLAG_RETRY_NOWAIT)
- rc = fixup_user_fault_nowait(gmap->mm, vmaddr, fault_flags, &unlocked);
- else
- rc = fixup_user_fault(gmap->mm, vmaddr, fault_flags, &unlocked);
- if (rc)
- return rc;
- /*
- * In the case that fixup_user_fault unlocked the mmap_lock during
- * fault-in, redo __gmap_translate() to avoid racing with a
- * map/unmap_segment.
- * In particular, __gmap_translate(), fixup_user_fault{,_nowait}(),
- * and __gmap_link() must all be called atomically in one go; if the
- * lock had been dropped in between, a retry is needed.
- */
- if (unlocked)
- goto retry;
-
- return __gmap_link(gmap, gaddr, vmaddr);
-}
-
-/**
- * gmap_fault - resolve a fault on a guest address
- * @gmap: pointer to guest mapping meta data structure
- * @gaddr: guest address
- * @fault_flags: flags to pass down to handle_mm_fault()
- *
- * Returns 0 on success, -ENOMEM for out of memory conditions, -EFAULT if the
- * vm address is already mapped to a different guest segment, and -EAGAIN if
- * FAULT_FLAG_RETRY_NOWAIT was specified and the fault could not be processed
- * immediately.
- */
-int gmap_fault(struct gmap *gmap, unsigned long gaddr, unsigned int fault_flags)
-{
- int rc;
-
- mmap_read_lock(gmap->mm);
- rc = __gmap_fault(gmap, gaddr, fault_flags);
- mmap_read_unlock(gmap->mm);
- return rc;
-}
-EXPORT_SYMBOL_GPL(gmap_fault);
+EXPORT_SYMBOL(__gmap_link);
/*
* this function is assumed to be called with mmap_lock held
*/
void __gmap_zap(struct gmap *gmap, unsigned long gaddr)
{
- struct vm_area_struct *vma;
unsigned long vmaddr;
- spinlock_t *ptl;
- pte_t *ptep;
+
+ mmap_assert_locked(gmap->mm);
/* Find the vm address for the guest address */
vmaddr = (unsigned long) radix_tree_lookup(&gmap->guest_to_host,
gaddr >> PMD_SHIFT);
if (vmaddr) {
vmaddr |= gaddr & ~PMD_MASK;
-
- vma = vma_lookup(gmap->mm, vmaddr);
- if (!vma || is_vm_hugetlb_page(vma))
- return;
-
- /* Get pointer to the page table entry */
- ptep = get_locked_pte(gmap->mm, vmaddr, &ptl);
- if (likely(ptep)) {
- ptep_zap_unused(gmap->mm, vmaddr, ptep, 0);
- pte_unmap_unlock(ptep, ptl);
- }
+ gmap_helper_zap_one_page(gmap->mm, vmaddr);
}
}
EXPORT_SYMBOL_GPL(__gmap_zap);
-void gmap_discard(struct gmap *gmap, unsigned long from, unsigned long to)
-{
- unsigned long gaddr, vmaddr, size;
- struct vm_area_struct *vma;
-
- mmap_read_lock(gmap->mm);
- for (gaddr = from; gaddr < to;
- gaddr = (gaddr + PMD_SIZE) & PMD_MASK) {
- /* Find the vm address for the guest address */
- vmaddr = (unsigned long)
- radix_tree_lookup(&gmap->guest_to_host,
- gaddr >> PMD_SHIFT);
- if (!vmaddr)
- continue;
- vmaddr |= gaddr & ~PMD_MASK;
- /* Find vma in the parent mm */
- vma = find_vma(gmap->mm, vmaddr);
- if (!vma)
- continue;
- /*
- * We do not discard pages that are backed by
- * hugetlbfs, so we don't have to refault them.
- */
- if (is_vm_hugetlb_page(vma))
- continue;
- size = min(to - gaddr, PMD_SIZE - (gaddr & ~PMD_MASK));
- zap_page_range_single(vma, vmaddr, size, NULL);
- }
- mmap_read_unlock(gmap->mm);
-}
-EXPORT_SYMBOL_GPL(gmap_discard);
-
static LIST_HEAD(gmap_notifier_list);
static DEFINE_SPINLOCK(gmap_notifier_lock);
@@ -853,8 +696,7 @@ static void gmap_call_notifier(struct gmap *gmap, unsigned long start,
*
* Note: Can also be called for shadow gmaps.
*/
-static inline unsigned long *gmap_table_walk(struct gmap *gmap,
- unsigned long gaddr, int level)
+unsigned long *gmap_table_walk(struct gmap *gmap, unsigned long gaddr, int level)
{
const int asce_type = gmap->asce & _ASCE_TYPE_MASK;
unsigned long *table = gmap->table;
@@ -905,6 +747,7 @@ static inline unsigned long *gmap_table_walk(struct gmap *gmap,
}
return table;
}
+EXPORT_SYMBOL(gmap_table_walk);
/**
* gmap_pte_op_walk - walk the gmap page table, get the page table lock
@@ -1101,86 +944,40 @@ static int gmap_protect_pte(struct gmap *gmap, unsigned long gaddr,
* @prot: indicates access rights: PROT_NONE, PROT_READ or PROT_WRITE
* @bits: pgste notification bits to set
*
- * Returns 0 if successfully protected, -ENOMEM if out of memory and
- * -EFAULT if gaddr is invalid (or mapping for shadows is missing).
+ * Returns:
+ * PAGE_SIZE if a small page was successfully protected;
+ * HPAGE_SIZE if a large page was successfully protected;
+ * -ENOMEM if out of memory;
+ * -EFAULT if gaddr is invalid (or mapping for shadows is missing);
+ * -EAGAIN if the guest mapping is missing and should be fixed by the caller.
*
- * Called with sg->mm->mmap_lock in read.
+ * Context: Called with sg->mm->mmap_lock in read.
*/
-static int gmap_protect_range(struct gmap *gmap, unsigned long gaddr,
- unsigned long len, int prot, unsigned long bits)
+int gmap_protect_one(struct gmap *gmap, unsigned long gaddr, int prot, unsigned long bits)
{
- unsigned long vmaddr, dist;
pmd_t *pmdp;
- int rc;
+ int rc = 0;
BUG_ON(gmap_is_shadow(gmap));
- while (len) {
- rc = -EAGAIN;
- pmdp = gmap_pmd_op_walk(gmap, gaddr);
- if (pmdp) {
- if (!pmd_leaf(*pmdp)) {
- rc = gmap_protect_pte(gmap, gaddr, pmdp, prot,
- bits);
- if (!rc) {
- len -= PAGE_SIZE;
- gaddr += PAGE_SIZE;
- }
- } else {
- rc = gmap_protect_pmd(gmap, gaddr, pmdp, prot,
- bits);
- if (!rc) {
- dist = HPAGE_SIZE - (gaddr & ~HPAGE_MASK);
- len = len < dist ? 0 : len - dist;
- gaddr = (gaddr & HPAGE_MASK) + HPAGE_SIZE;
- }
- }
- gmap_pmd_op_end(gmap, pmdp);
- }
- if (rc) {
- if (rc == -EINVAL)
- return rc;
- /* -EAGAIN, fixup of userspace mm and gmap */
- vmaddr = __gmap_translate(gmap, gaddr);
- if (IS_ERR_VALUE(vmaddr))
- return vmaddr;
- rc = gmap_pte_op_fixup(gmap, gaddr, vmaddr, prot);
- if (rc)
- return rc;
- }
- }
- return 0;
-}
+ pmdp = gmap_pmd_op_walk(gmap, gaddr);
+ if (!pmdp)
+ return -EAGAIN;
-/**
- * gmap_mprotect_notify - change access rights for a range of ptes and
- * call the notifier if any pte changes again
- * @gmap: pointer to guest mapping meta data structure
- * @gaddr: virtual address in the guest address space
- * @len: size of area
- * @prot: indicates access rights: PROT_NONE, PROT_READ or PROT_WRITE
- *
- * Returns 0 if for each page in the given range a gmap mapping exists,
- * the new access rights could be set and the notifier could be armed.
- * If the gmap mapping is missing for one or more pages -EFAULT is
- * returned. If no memory could be allocated -ENOMEM is returned.
- * This function establishes missing page table entries.
- */
-int gmap_mprotect_notify(struct gmap *gmap, unsigned long gaddr,
- unsigned long len, int prot)
-{
- int rc;
+ if (!pmd_leaf(*pmdp)) {
+ rc = gmap_protect_pte(gmap, gaddr, pmdp, prot, bits);
+ if (!rc)
+ rc = PAGE_SIZE;
+ } else {
+ rc = gmap_protect_pmd(gmap, gaddr, pmdp, prot, bits);
+ if (!rc)
+ rc = HPAGE_SIZE;
+ }
+ gmap_pmd_op_end(gmap, pmdp);
- if ((gaddr & ~PAGE_MASK) || (len & ~PAGE_MASK) || gmap_is_shadow(gmap))
- return -EINVAL;
- if (!MACHINE_HAS_ESOP && prot == PROT_READ)
- return -EINVAL;
- mmap_read_lock(gmap->mm);
- rc = gmap_protect_range(gmap, gaddr, len, prot, GMAP_NOTIFY_MPROT);
- mmap_read_unlock(gmap->mm);
return rc;
}
-EXPORT_SYMBOL_GPL(gmap_mprotect_notify);
+EXPORT_SYMBOL_GPL(gmap_protect_one);
/**
* gmap_read_table - get an unsigned long value from a guest page table using
@@ -1414,7 +1211,6 @@ static void gmap_unshadow_pgt(struct gmap *sg, unsigned long raddr)
__gmap_unshadow_pgt(sg, raddr, __va(pgt));
/* Free page table */
ptdesc = page_ptdesc(phys_to_page(pgt));
- list_del(&ptdesc->pt_list);
page_table_free_pgste(ptdesc);
}
@@ -1442,7 +1238,6 @@ static void __gmap_unshadow_sgt(struct gmap *sg, unsigned long raddr,
__gmap_unshadow_pgt(sg, raddr, __va(pgt));
/* Free page table */
ptdesc = page_ptdesc(phys_to_page(pgt));
- list_del(&ptdesc->pt_list);
page_table_free_pgste(ptdesc);
}
}
@@ -1472,7 +1267,6 @@ static void gmap_unshadow_sgt(struct gmap *sg, unsigned long raddr)
__gmap_unshadow_sgt(sg, raddr, __va(sgt));
/* Free segment table */
page = phys_to_page(sgt);
- list_del(&page->lru);
__free_pages(page, CRST_ALLOC_ORDER);
}
@@ -1500,7 +1294,6 @@ static void __gmap_unshadow_r3t(struct gmap *sg, unsigned long raddr,
__gmap_unshadow_sgt(sg, raddr, __va(sgt));
/* Free segment table */
page = phys_to_page(sgt);
- list_del(&page->lru);
__free_pages(page, CRST_ALLOC_ORDER);
}
}
@@ -1530,7 +1323,6 @@ static void gmap_unshadow_r3t(struct gmap *sg, unsigned long raddr)
__gmap_unshadow_r3t(sg, raddr, __va(r3t));
/* Free region 3 table */
page = phys_to_page(r3t);
- list_del(&page->lru);
__free_pages(page, CRST_ALLOC_ORDER);
}
@@ -1558,7 +1350,6 @@ static void __gmap_unshadow_r2t(struct gmap *sg, unsigned long raddr,
__gmap_unshadow_r3t(sg, raddr, __va(r3t));
/* Free region 3 table */
page = phys_to_page(r3t);
- list_del(&page->lru);
__free_pages(page, CRST_ALLOC_ORDER);
}
}
@@ -1588,7 +1379,6 @@ static void gmap_unshadow_r2t(struct gmap *sg, unsigned long raddr)
__gmap_unshadow_r2t(sg, raddr, __va(r2t));
/* Free region 2 table */
page = phys_to_page(r2t);
- list_del(&page->lru);
__free_pages(page, CRST_ALLOC_ORDER);
}
@@ -1620,7 +1410,6 @@ static void __gmap_unshadow_r1t(struct gmap *sg, unsigned long raddr,
r1t[i] = _REGION1_ENTRY_EMPTY;
/* Free region 2 table */
page = phys_to_page(r2t);
- list_del(&page->lru);
__free_pages(page, CRST_ALLOC_ORDER);
}
}
@@ -1631,7 +1420,7 @@ static void __gmap_unshadow_r1t(struct gmap *sg, unsigned long raddr,
*
* Called with sg->guest_table_lock
*/
-static void gmap_unshadow(struct gmap *sg)
+void gmap_unshadow(struct gmap *sg)
{
unsigned long *table;
@@ -1657,143 +1446,7 @@ static void gmap_unshadow(struct gmap *sg)
break;
}
}
-
-/**
- * gmap_find_shadow - find a specific asce in the list of shadow tables
- * @parent: pointer to the parent gmap
- * @asce: ASCE for which the shadow table is created
- * @edat_level: edat level to be used for the shadow translation
- *
- * Returns the pointer to a gmap if a shadow table with the given asce is
- * already available, ERR_PTR(-EAGAIN) if another one is just being created,
- * otherwise NULL
- */
-static struct gmap *gmap_find_shadow(struct gmap *parent, unsigned long asce,
- int edat_level)
-{
- struct gmap *sg;
-
- list_for_each_entry(sg, &parent->children, list) {
- if (sg->orig_asce != asce || sg->edat_level != edat_level ||
- sg->removed)
- continue;
- if (!sg->initialized)
- return ERR_PTR(-EAGAIN);
- refcount_inc(&sg->ref_count);
- return sg;
- }
- return NULL;
-}
-
-/**
- * gmap_shadow_valid - check if a shadow guest address space matches the
- * given properties and is still valid
- * @sg: pointer to the shadow guest address space structure
- * @asce: ASCE for which the shadow table is requested
- * @edat_level: edat level to be used for the shadow translation
- *
- * Returns 1 if the gmap shadow is still valid and matches the given
- * properties, the caller can continue using it. Returns 0 otherwise, the
- * caller has to request a new shadow gmap in this case.
- *
- */
-int gmap_shadow_valid(struct gmap *sg, unsigned long asce, int edat_level)
-{
- if (sg->removed)
- return 0;
- return sg->orig_asce == asce && sg->edat_level == edat_level;
-}
-EXPORT_SYMBOL_GPL(gmap_shadow_valid);
-
-/**
- * gmap_shadow - create/find a shadow guest address space
- * @parent: pointer to the parent gmap
- * @asce: ASCE for which the shadow table is created
- * @edat_level: edat level to be used for the shadow translation
- *
- * The pages of the top level page table referred by the asce parameter
- * will be set to read-only and marked in the PGSTEs of the kvm process.
- * The shadow table will be removed automatically on any change to the
- * PTE mapping for the source table.
- *
- * Returns a guest address space structure, ERR_PTR(-ENOMEM) if out of memory,
- * ERR_PTR(-EAGAIN) if the caller has to retry and ERR_PTR(-EFAULT) if the
- * parent gmap table could not be protected.
- */
-struct gmap *gmap_shadow(struct gmap *parent, unsigned long asce,
- int edat_level)
-{
- struct gmap *sg, *new;
- unsigned long limit;
- int rc;
-
- BUG_ON(parent->mm->context.allow_gmap_hpage_1m);
- BUG_ON(gmap_is_shadow(parent));
- spin_lock(&parent->shadow_lock);
- sg = gmap_find_shadow(parent, asce, edat_level);
- spin_unlock(&parent->shadow_lock);
- if (sg)
- return sg;
- /* Create a new shadow gmap */
- limit = -1UL >> (33 - (((asce & _ASCE_TYPE_MASK) >> 2) * 11));
- if (asce & _ASCE_REAL_SPACE)
- limit = -1UL;
- new = gmap_alloc(limit);
- if (!new)
- return ERR_PTR(-ENOMEM);
- new->mm = parent->mm;
- new->parent = gmap_get(parent);
- new->private = parent->private;
- new->orig_asce = asce;
- new->edat_level = edat_level;
- new->initialized = false;
- spin_lock(&parent->shadow_lock);
- /* Recheck if another CPU created the same shadow */
- sg = gmap_find_shadow(parent, asce, edat_level);
- if (sg) {
- spin_unlock(&parent->shadow_lock);
- gmap_free(new);
- return sg;
- }
- if (asce & _ASCE_REAL_SPACE) {
- /* only allow one real-space gmap shadow */
- list_for_each_entry(sg, &parent->children, list) {
- if (sg->orig_asce & _ASCE_REAL_SPACE) {
- spin_lock(&sg->guest_table_lock);
- gmap_unshadow(sg);
- spin_unlock(&sg->guest_table_lock);
- list_del(&sg->list);
- gmap_put(sg);
- break;
- }
- }
- }
- refcount_set(&new->ref_count, 2);
- list_add(&new->list, &parent->children);
- if (asce & _ASCE_REAL_SPACE) {
- /* nothing to protect, return right away */
- new->initialized = true;
- spin_unlock(&parent->shadow_lock);
- return new;
- }
- spin_unlock(&parent->shadow_lock);
- /* protect after insertion, so it will get properly invalidated */
- mmap_read_lock(parent->mm);
- rc = gmap_protect_range(parent, asce & _ASCE_ORIGIN,
- ((asce & _ASCE_TABLE_LENGTH) + 1) * PAGE_SIZE,
- PROT_READ, GMAP_NOTIFY_SHADOW);
- mmap_read_unlock(parent->mm);
- spin_lock(&parent->shadow_lock);
- new->initialized = true;
- if (rc) {
- list_del(&new->list);
- gmap_free(new);
- new = ERR_PTR(rc);
- }
- spin_unlock(&parent->shadow_lock);
- return new;
-}
-EXPORT_SYMBOL_GPL(gmap_shadow);
+EXPORT_SYMBOL(gmap_unshadow);
/**
* gmap_shadow_r2t - create an empty shadow region 2 table
@@ -1827,9 +1480,6 @@ int gmap_shadow_r2t(struct gmap *sg, unsigned long saddr, unsigned long r2t,
page = gmap_alloc_crst();
if (!page)
return -ENOMEM;
- page->index = r2t & _REGION_ENTRY_ORIGIN;
- if (fake)
- page->index |= GMAP_SHADOW_FAKE_TABLE;
s_r2t = page_to_phys(page);
/* Install shadow region second table */
spin_lock(&sg->guest_table_lock);
@@ -1851,7 +1501,6 @@ int gmap_shadow_r2t(struct gmap *sg, unsigned long saddr, unsigned long r2t,
_REGION_ENTRY_TYPE_R1 | _REGION_ENTRY_INVALID;
if (sg->edat_level >= 1)
*table |= (r2t & _REGION_ENTRY_PROTECT);
- list_add(&page->lru, &sg->crst_list);
if (fake) {
/* nothing to protect for fake tables */
*table &= ~_REGION_ENTRY_INVALID;
@@ -1911,9 +1560,6 @@ int gmap_shadow_r3t(struct gmap *sg, unsigned long saddr, unsigned long r3t,
page = gmap_alloc_crst();
if (!page)
return -ENOMEM;
- page->index = r3t & _REGION_ENTRY_ORIGIN;
- if (fake)
- page->index |= GMAP_SHADOW_FAKE_TABLE;
s_r3t = page_to_phys(page);
/* Install shadow region second table */
spin_lock(&sg->guest_table_lock);
@@ -1935,7 +1581,6 @@ int gmap_shadow_r3t(struct gmap *sg, unsigned long saddr, unsigned long r3t,
_REGION_ENTRY_TYPE_R2 | _REGION_ENTRY_INVALID;
if (sg->edat_level >= 1)
*table |= (r3t & _REGION_ENTRY_PROTECT);
- list_add(&page->lru, &sg->crst_list);
if (fake) {
/* nothing to protect for fake tables */
*table &= ~_REGION_ENTRY_INVALID;
@@ -1995,9 +1640,6 @@ int gmap_shadow_sgt(struct gmap *sg, unsigned long saddr, unsigned long sgt,
page = gmap_alloc_crst();
if (!page)
return -ENOMEM;
- page->index = sgt & _REGION_ENTRY_ORIGIN;
- if (fake)
- page->index |= GMAP_SHADOW_FAKE_TABLE;
s_sgt = page_to_phys(page);
/* Install shadow region second table */
spin_lock(&sg->guest_table_lock);
@@ -2019,7 +1661,6 @@ int gmap_shadow_sgt(struct gmap *sg, unsigned long saddr, unsigned long sgt,
_REGION_ENTRY_TYPE_R3 | _REGION_ENTRY_INVALID;
if (sg->edat_level >= 1)
*table |= sgt & _REGION_ENTRY_PROTECT;
- list_add(&page->lru, &sg->crst_list);
if (fake) {
/* nothing to protect for fake tables */
*table &= ~_REGION_ENTRY_INVALID;
@@ -2052,45 +1693,22 @@ out_free:
}
EXPORT_SYMBOL_GPL(gmap_shadow_sgt);
-/**
- * gmap_shadow_pgt_lookup - find a shadow page table
- * @sg: pointer to the shadow guest address space structure
- * @saddr: the address in the shadow aguest address space
- * @pgt: parent gmap address of the page table to get shadowed
- * @dat_protection: if the pgtable is marked as protected by dat
- * @fake: pgt references contiguous guest memory block, not a pgtable
- *
- * Returns 0 if the shadow page table was found and -EAGAIN if the page
- * table was not found.
- *
- * Called with sg->mm->mmap_lock in read.
- */
-int gmap_shadow_pgt_lookup(struct gmap *sg, unsigned long saddr,
- unsigned long *pgt, int *dat_protection,
- int *fake)
+static void gmap_pgste_set_pgt_addr(struct ptdesc *ptdesc, unsigned long pgt_addr)
{
- unsigned long *table;
- struct page *page;
- int rc;
+ unsigned long *pgstes = page_to_virt(ptdesc_page(ptdesc));
- BUG_ON(!gmap_is_shadow(sg));
- spin_lock(&sg->guest_table_lock);
- table = gmap_table_walk(sg, saddr, 1); /* get segment pointer */
- if (table && !(*table & _SEGMENT_ENTRY_INVALID)) {
- /* Shadow page tables are full pages (pte+pgste) */
- page = pfn_to_page(*table >> PAGE_SHIFT);
- *pgt = page->index & ~GMAP_SHADOW_FAKE_TABLE;
- *dat_protection = !!(*table & _SEGMENT_ENTRY_PROTECT);
- *fake = !!(page->index & GMAP_SHADOW_FAKE_TABLE);
- rc = 0;
- } else {
- rc = -EAGAIN;
- }
- spin_unlock(&sg->guest_table_lock);
- return rc;
+ pgstes += _PAGE_ENTRIES;
+
+ pgstes[0] &= ~PGSTE_ST2_MASK;
+ pgstes[1] &= ~PGSTE_ST2_MASK;
+ pgstes[2] &= ~PGSTE_ST2_MASK;
+ pgstes[3] &= ~PGSTE_ST2_MASK;
+ pgstes[0] |= (pgt_addr >> 16) & PGSTE_ST2_MASK;
+ pgstes[1] |= pgt_addr & PGSTE_ST2_MASK;
+ pgstes[2] |= (pgt_addr << 16) & PGSTE_ST2_MASK;
+ pgstes[3] |= (pgt_addr << 32) & PGSTE_ST2_MASK;
}
-EXPORT_SYMBOL_GPL(gmap_shadow_pgt_lookup);
/**
* gmap_shadow_pgt - instantiate a shadow page table
@@ -2119,9 +1737,10 @@ int gmap_shadow_pgt(struct gmap *sg, unsigned long saddr, unsigned long pgt,
ptdesc = page_table_alloc_pgste(sg->mm);
if (!ptdesc)
return -ENOMEM;
- ptdesc->pt_index = pgt & _SEGMENT_ENTRY_ORIGIN;
+ origin = pgt & _SEGMENT_ENTRY_ORIGIN;
if (fake)
- ptdesc->pt_index |= GMAP_SHADOW_FAKE_TABLE;
+ origin |= GMAP_SHADOW_FAKE_TABLE;
+ gmap_pgste_set_pgt_addr(ptdesc, origin);
s_pgt = page_to_phys(ptdesc_page(ptdesc));
/* Install shadow page table */
spin_lock(&sg->guest_table_lock);
@@ -2140,7 +1759,6 @@ int gmap_shadow_pgt(struct gmap *sg, unsigned long saddr, unsigned long pgt,
/* mark as invalid as long as the parent table is not protected */
*table = (unsigned long) s_pgt | _SEGMENT_ENTRY |
(pgt & _SEGMENT_ENTRY_PROTECT) | _SEGMENT_ENTRY_INVALID;
- list_add(&ptdesc->pt_list, &sg->pt_list);
if (fake) {
/* nothing to protect for fake tables */
*table &= ~_SEGMENT_ENTRY_INVALID;
@@ -2318,7 +1936,6 @@ void ptep_notify(struct mm_struct *mm, unsigned long vmaddr,
pte_t *pte, unsigned long bits)
{
unsigned long offset, gaddr = 0;
- unsigned long *table;
struct gmap *gmap, *sg, *next;
offset = ((unsigned long) pte) & (255 * sizeof(pte_t));
@@ -2326,12 +1943,9 @@ void ptep_notify(struct mm_struct *mm, unsigned long vmaddr,
rcu_read_lock();
list_for_each_entry_rcu(gmap, &mm->context.gmap_list, list) {
spin_lock(&gmap->guest_table_lock);
- table = radix_tree_lookup(&gmap->host_to_guest,
- vmaddr >> PMD_SHIFT);
- if (table)
- gaddr = __gmap_segment_gaddr(table) + offset;
+ gaddr = host_to_guest_lookup(gmap, vmaddr) + offset;
spin_unlock(&gmap->guest_table_lock);
- if (!table)
+ if (!IS_GADDR_VALID(gaddr))
continue;
if (!list_empty(&gmap->children) && (bits & PGSTE_VSIE_BIT)) {
@@ -2371,10 +1985,10 @@ static void gmap_pmdp_xchg(struct gmap *gmap, pmd_t *pmdp, pmd_t new,
gaddr &= HPAGE_MASK;
pmdp_notify_gmap(gmap, pmdp, gaddr);
new = clear_pmd_bit(new, __pgprot(_SEGMENT_ENTRY_GMAP_IN));
- if (MACHINE_HAS_TLB_GUEST)
+ if (machine_has_tlb_guest())
__pmdp_idte(gaddr, (pmd_t *)pmdp, IDTE_GUEST_ASCE, gmap->asce,
IDTE_GLOBAL);
- else if (MACHINE_HAS_IDTE)
+ else if (cpu_has_idte())
__pmdp_idte(gaddr, (pmd_t *)pmdp, 0, 0, IDTE_GLOBAL);
else
__pmdp_csp(pmdp);
@@ -2391,10 +2005,8 @@ static void gmap_pmdp_clear(struct mm_struct *mm, unsigned long vmaddr,
rcu_read_lock();
list_for_each_entry_rcu(gmap, &mm->context.gmap_list, list) {
spin_lock(&gmap->guest_table_lock);
- pmdp = (pmd_t *)radix_tree_delete(&gmap->host_to_guest,
- vmaddr >> PMD_SHIFT);
+ pmdp = host_to_guest_pmd_delete(gmap, vmaddr, &gaddr);
if (pmdp) {
- gaddr = __gmap_segment_gaddr((unsigned long *)pmdp);
pmdp_notify_gmap(gmap, pmdp, gaddr);
WARN_ON(pmd_val(*pmdp) & ~(_SEGMENT_ENTRY_HARDWARE_BITS_LARGE |
_SEGMENT_ENTRY_GMAP_UC |
@@ -2438,28 +2050,25 @@ EXPORT_SYMBOL_GPL(gmap_pmdp_csp);
*/
void gmap_pmdp_idte_local(struct mm_struct *mm, unsigned long vmaddr)
{
- unsigned long *entry, gaddr;
+ unsigned long gaddr;
struct gmap *gmap;
pmd_t *pmdp;
rcu_read_lock();
list_for_each_entry_rcu(gmap, &mm->context.gmap_list, list) {
spin_lock(&gmap->guest_table_lock);
- entry = radix_tree_delete(&gmap->host_to_guest,
- vmaddr >> PMD_SHIFT);
- if (entry) {
- pmdp = (pmd_t *)entry;
- gaddr = __gmap_segment_gaddr(entry);
+ pmdp = host_to_guest_pmd_delete(gmap, vmaddr, &gaddr);
+ if (pmdp) {
pmdp_notify_gmap(gmap, pmdp, gaddr);
- WARN_ON(*entry & ~(_SEGMENT_ENTRY_HARDWARE_BITS_LARGE |
- _SEGMENT_ENTRY_GMAP_UC |
- _SEGMENT_ENTRY));
- if (MACHINE_HAS_TLB_GUEST)
+ WARN_ON(pmd_val(*pmdp) & ~(_SEGMENT_ENTRY_HARDWARE_BITS_LARGE |
+ _SEGMENT_ENTRY_GMAP_UC |
+ _SEGMENT_ENTRY));
+ if (machine_has_tlb_guest())
__pmdp_idte(gaddr, pmdp, IDTE_GUEST_ASCE,
gmap->asce, IDTE_LOCAL);
- else if (MACHINE_HAS_IDTE)
+ else if (cpu_has_idte())
__pmdp_idte(gaddr, pmdp, 0, 0, IDTE_LOCAL);
- *entry = _SEGMENT_ENTRY_EMPTY;
+ *pmdp = __pmd(_SEGMENT_ENTRY_EMPTY);
}
spin_unlock(&gmap->guest_table_lock);
}
@@ -2474,30 +2083,27 @@ EXPORT_SYMBOL_GPL(gmap_pmdp_idte_local);
*/
void gmap_pmdp_idte_global(struct mm_struct *mm, unsigned long vmaddr)
{
- unsigned long *entry, gaddr;
+ unsigned long gaddr;
struct gmap *gmap;
pmd_t *pmdp;
rcu_read_lock();
list_for_each_entry_rcu(gmap, &mm->context.gmap_list, list) {
spin_lock(&gmap->guest_table_lock);
- entry = radix_tree_delete(&gmap->host_to_guest,
- vmaddr >> PMD_SHIFT);
- if (entry) {
- pmdp = (pmd_t *)entry;
- gaddr = __gmap_segment_gaddr(entry);
+ pmdp = host_to_guest_pmd_delete(gmap, vmaddr, &gaddr);
+ if (pmdp) {
pmdp_notify_gmap(gmap, pmdp, gaddr);
- WARN_ON(*entry & ~(_SEGMENT_ENTRY_HARDWARE_BITS_LARGE |
- _SEGMENT_ENTRY_GMAP_UC |
- _SEGMENT_ENTRY));
- if (MACHINE_HAS_TLB_GUEST)
+ WARN_ON(pmd_val(*pmdp) & ~(_SEGMENT_ENTRY_HARDWARE_BITS_LARGE |
+ _SEGMENT_ENTRY_GMAP_UC |
+ _SEGMENT_ENTRY));
+ if (machine_has_tlb_guest())
__pmdp_idte(gaddr, pmdp, IDTE_GUEST_ASCE,
gmap->asce, IDTE_GLOBAL);
- else if (MACHINE_HAS_IDTE)
+ else if (cpu_has_idte())
__pmdp_idte(gaddr, pmdp, 0, 0, IDTE_GLOBAL);
else
__pmdp_csp(pmdp);
- *entry = _SEGMENT_ENTRY_EMPTY;
+ *pmdp = __pmd(_SEGMENT_ENTRY_EMPTY);
}
spin_unlock(&gmap->guest_table_lock);
}
@@ -2612,9 +2218,6 @@ int s390_enable_sie(void)
/* Do we have pgstes? if yes, we are done */
if (mm_has_pgste(mm))
return 0;
- /* Fail if the page tables are 2K */
- if (!mm_alloc_pgste(mm))
- return -EINVAL;
mmap_write_lock(mm);
mm->context.has_pgste = 1;
/* split thp mappings and disable thp for future mappings */
@@ -2624,138 +2227,6 @@ int s390_enable_sie(void)
}
EXPORT_SYMBOL_GPL(s390_enable_sie);
-static int find_zeropage_pte_entry(pte_t *pte, unsigned long addr,
- unsigned long end, struct mm_walk *walk)
-{
- unsigned long *found_addr = walk->private;
-
- /* Return 1 of the page is a zeropage. */
- if (is_zero_pfn(pte_pfn(*pte))) {
- /*
- * Shared zeropage in e.g., a FS DAX mapping? We cannot do the
- * right thing and likely don't care: FAULT_FLAG_UNSHARE
- * currently only works in COW mappings, which is also where
- * mm_forbids_zeropage() is checked.
- */
- if (!is_cow_mapping(walk->vma->vm_flags))
- return -EFAULT;
-
- *found_addr = addr;
- return 1;
- }
- return 0;
-}
-
-static const struct mm_walk_ops find_zeropage_ops = {
- .pte_entry = find_zeropage_pte_entry,
- .walk_lock = PGWALK_WRLOCK,
-};
-
-/*
- * Unshare all shared zeropages, replacing them by anonymous pages. Note that
- * we cannot simply zap all shared zeropages, because this could later
- * trigger unexpected userfaultfd missing events.
- *
- * This must be called after mm->context.allow_cow_sharing was
- * set to 0, to avoid future mappings of shared zeropages.
- *
- * mm contracts with s390, that even if mm were to remove a page table,
- * and racing with walk_page_range_vma() calling pte_offset_map_lock()
- * would fail, it will never insert a page table containing empty zero
- * pages once mm_forbids_zeropage(mm) i.e.
- * mm->context.allow_cow_sharing is set to 0.
- */
-static int __s390_unshare_zeropages(struct mm_struct *mm)
-{
- struct vm_area_struct *vma;
- VMA_ITERATOR(vmi, mm, 0);
- unsigned long addr;
- vm_fault_t fault;
- int rc;
-
- for_each_vma(vmi, vma) {
- /*
- * We could only look at COW mappings, but it's more future
- * proof to catch unexpected zeropages in other mappings and
- * fail.
- */
- if ((vma->vm_flags & VM_PFNMAP) || is_vm_hugetlb_page(vma))
- continue;
- addr = vma->vm_start;
-
-retry:
- rc = walk_page_range_vma(vma, addr, vma->vm_end,
- &find_zeropage_ops, &addr);
- if (rc < 0)
- return rc;
- else if (!rc)
- continue;
-
- /* addr was updated by find_zeropage_pte_entry() */
- fault = handle_mm_fault(vma, addr,
- FAULT_FLAG_UNSHARE | FAULT_FLAG_REMOTE,
- NULL);
- if (fault & VM_FAULT_OOM)
- return -ENOMEM;
- /*
- * See break_ksm(): even after handle_mm_fault() returned 0, we
- * must start the lookup from the current address, because
- * handle_mm_fault() may back out if there's any difficulty.
- *
- * VM_FAULT_SIGBUS and VM_FAULT_SIGSEGV are unexpected but
- * maybe they could trigger in the future on concurrent
- * truncation. In that case, the shared zeropage would be gone
- * and we can simply retry and make progress.
- */
- cond_resched();
- goto retry;
- }
-
- return 0;
-}
-
-static int __s390_disable_cow_sharing(struct mm_struct *mm)
-{
- int rc;
-
- if (!mm->context.allow_cow_sharing)
- return 0;
-
- mm->context.allow_cow_sharing = 0;
-
- /* Replace all shared zeropages by anonymous pages. */
- rc = __s390_unshare_zeropages(mm);
- /*
- * Make sure to disable KSM (if enabled for the whole process or
- * individual VMAs). Note that nothing currently hinders user space
- * from re-enabling it.
- */
- if (!rc)
- rc = ksm_disable(mm);
- if (rc)
- mm->context.allow_cow_sharing = 1;
- return rc;
-}
-
-/*
- * Disable most COW-sharing of memory pages for the whole process:
- * (1) Disable KSM and unmerge/unshare any KSM pages.
- * (2) Disallow shared zeropages and unshare any zerpages that are mapped.
- *
- * Not that we currently don't bother with COW-shared pages that are shared
- * with parent/child processes due to fork().
- */
-int s390_disable_cow_sharing(void)
-{
- int rc;
-
- mmap_write_lock(current->mm);
- rc = __s390_disable_cow_sharing(current->mm);
- mmap_write_unlock(current->mm);
- return rc;
-}
-EXPORT_SYMBOL_GPL(s390_disable_cow_sharing);
-
/*
* Enable storage key handling from now on and initialize the storage
* keys with the default key.
@@ -2823,7 +2294,7 @@ int s390_enable_skey(void)
goto out_up;
mm->context.uses_skeys = 1;
- rc = __s390_disable_cow_sharing(mm);
+ rc = gmap_helper_disable_cow_sharing();
if (rc) {
mm->context.uses_skeys = 0;
goto out_up;
@@ -2943,49 +2414,6 @@ int __s390_uv_destroy_range(struct mm_struct *mm, unsigned long start,
EXPORT_SYMBOL_GPL(__s390_uv_destroy_range);
/**
- * s390_unlist_old_asce - Remove the topmost level of page tables from the
- * list of page tables of the gmap.
- * @gmap: the gmap whose table is to be removed
- *
- * On s390x, KVM keeps a list of all pages containing the page tables of the
- * gmap (the CRST list). This list is used at tear down time to free all
- * pages that are now not needed anymore.
- *
- * This function removes the topmost page of the tree (the one pointed to by
- * the ASCE) from the CRST list.
- *
- * This means that it will not be freed when the VM is torn down, and needs
- * to be handled separately by the caller, unless a leak is actually
- * intended. Notice that this function will only remove the page from the
- * list, the page will still be used as a top level page table (and ASCE).
- */
-void s390_unlist_old_asce(struct gmap *gmap)
-{
- struct page *old;
-
- old = virt_to_page(gmap->table);
- spin_lock(&gmap->guest_table_lock);
- list_del(&old->lru);
- /*
- * Sometimes the topmost page might need to be "removed" multiple
- * times, for example if the VM is rebooted into secure mode several
- * times concurrently, or if s390_replace_asce fails after calling
- * s390_remove_old_asce and is attempted again later. In that case
- * the old asce has been removed from the list, and therefore it
- * will not be freed when the VM terminates, but the ASCE is still
- * in use and still pointed to.
- * A subsequent call to replace_asce will follow the pointer and try
- * to remove the same page from the list again.
- * Therefore it's necessary that the page of the ASCE has valid
- * pointers, so list_del can work (and do nothing) without
- * dereferencing stale or invalid pointers.
- */
- INIT_LIST_HEAD(&old->lru);
- spin_unlock(&gmap->guest_table_lock);
-}
-EXPORT_SYMBOL_GPL(s390_unlist_old_asce);
-
-/**
* s390_replace_asce - Try to replace the current ASCE of a gmap with a copy
* @gmap: the gmap whose ASCE needs to be replaced
*
@@ -3004,8 +2432,6 @@ int s390_replace_asce(struct gmap *gmap)
struct page *page;
void *table;
- s390_unlist_old_asce(gmap);
-
/* Replacing segment type ASCEs would cause serious issues */
if ((gmap->asce & _ASCE_TYPE_MASK) == _ASCE_TYPE_SEGMENT)
return -EINVAL;
@@ -3013,19 +2439,9 @@ int s390_replace_asce(struct gmap *gmap)
page = gmap_alloc_crst();
if (!page)
return -ENOMEM;
- page->index = 0;
table = page_to_virt(page);
memcpy(table, gmap->table, 1UL << (CRST_ALLOC_ORDER + PAGE_SHIFT));
- /*
- * The caller has to deal with the old ASCE, but here we make sure
- * the new one is properly added to the CRST list, so that
- * it will be freed when the VM is torn down.
- */
- spin_lock(&gmap->guest_table_lock);
- list_add(&page->lru, &gmap->crst_list);
- spin_unlock(&gmap->guest_table_lock);
-
/* Set new table origin while preserving existing ASCE control bits */
asce = (gmap->asce & ~_ASCE_ORIGIN) | __pa(table);
WRITE_ONCE(gmap->asce, asce);
diff --git a/arch/s390/mm/gmap_helpers.c b/arch/s390/mm/gmap_helpers.c
new file mode 100644
index 000000000000..b63f427e7289
--- /dev/null
+++ b/arch/s390/mm/gmap_helpers.c
@@ -0,0 +1,223 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Helper functions for KVM guest address space mapping code
+ *
+ * Copyright IBM Corp. 2007, 2025
+ */
+
+#include <linux/export.h>
+#include <linux/mm_types.h>
+#include <linux/mmap_lock.h>
+#include <linux/mm.h>
+#include <linux/hugetlb.h>
+#include <linux/swap.h>
+#include <linux/swapops.h>
+#include <linux/pagewalk.h>
+#include <linux/ksm.h>
+#include <asm/gmap_helpers.h>
+
+/**
+ * ptep_zap_swap_entry() - discard a swap entry.
+ * @mm: the mm
+ * @entry: the swap entry that needs to be zapped
+ *
+ * Discards the given swap entry. If the swap entry was an actual swap
+ * entry (and not a migration entry, for example), the actual swapped
+ * page is also discarded from swap.
+ */
+static void ptep_zap_swap_entry(struct mm_struct *mm, swp_entry_t entry)
+{
+ if (!non_swap_entry(entry))
+ dec_mm_counter(mm, MM_SWAPENTS);
+ else if (is_migration_entry(entry))
+ dec_mm_counter(mm, mm_counter(pfn_swap_entry_folio(entry)));
+ free_swap_and_cache(entry);
+}
+
+/**
+ * gmap_helper_zap_one_page() - discard a page if it was swapped.
+ * @mm: the mm
+ * @vmaddr: the userspace virtual address that needs to be discarded
+ *
+ * If the given address maps to a swap entry, discard it.
+ *
+ * Context: needs to be called while holding the mmap lock.
+ */
+void gmap_helper_zap_one_page(struct mm_struct *mm, unsigned long vmaddr)
+{
+ struct vm_area_struct *vma;
+ spinlock_t *ptl;
+ pte_t *ptep;
+
+ mmap_assert_locked(mm);
+
+ /* Find the vm address for the guest address */
+ vma = vma_lookup(mm, vmaddr);
+ if (!vma || is_vm_hugetlb_page(vma))
+ return;
+
+ /* Get pointer to the page table entry */
+ ptep = get_locked_pte(mm, vmaddr, &ptl);
+ if (unlikely(!ptep))
+ return;
+ if (pte_swap(*ptep))
+ ptep_zap_swap_entry(mm, pte_to_swp_entry(*ptep));
+ pte_unmap_unlock(ptep, ptl);
+}
+EXPORT_SYMBOL_GPL(gmap_helper_zap_one_page);
+
+/**
+ * gmap_helper_discard() - discard user pages in the given range
+ * @mm: the mm
+ * @vmaddr: starting userspace address
+ * @end: end address (first address outside the range)
+ *
+ * All userpace pages in the range [@vamddr, @end) are discarded and unmapped.
+ *
+ * Context: needs to be called while holding the mmap lock.
+ */
+void gmap_helper_discard(struct mm_struct *mm, unsigned long vmaddr, unsigned long end)
+{
+ struct vm_area_struct *vma;
+
+ mmap_assert_locked(mm);
+
+ while (vmaddr < end) {
+ vma = find_vma_intersection(mm, vmaddr, end);
+ if (!vma)
+ return;
+ if (!is_vm_hugetlb_page(vma))
+ zap_page_range_single(vma, vmaddr, min(end, vma->vm_end) - vmaddr, NULL);
+ vmaddr = vma->vm_end;
+ }
+}
+EXPORT_SYMBOL_GPL(gmap_helper_discard);
+
+static int find_zeropage_pte_entry(pte_t *pte, unsigned long addr,
+ unsigned long end, struct mm_walk *walk)
+{
+ unsigned long *found_addr = walk->private;
+
+ /* Return 1 of the page is a zeropage. */
+ if (is_zero_pfn(pte_pfn(*pte))) {
+ /*
+ * Shared zeropage in e.g., a FS DAX mapping? We cannot do the
+ * right thing and likely don't care: FAULT_FLAG_UNSHARE
+ * currently only works in COW mappings, which is also where
+ * mm_forbids_zeropage() is checked.
+ */
+ if (!is_cow_mapping(walk->vma->vm_flags))
+ return -EFAULT;
+
+ *found_addr = addr;
+ return 1;
+ }
+ return 0;
+}
+
+static const struct mm_walk_ops find_zeropage_ops = {
+ .pte_entry = find_zeropage_pte_entry,
+ .walk_lock = PGWALK_WRLOCK,
+};
+
+/** __gmap_helper_unshare_zeropages() - unshare all shared zeropages
+ * @mm: the mm whose zero pages are to be unshared
+ *
+ * Unshare all shared zeropages, replacing them by anonymous pages. Note that
+ * we cannot simply zap all shared zeropages, because this could later
+ * trigger unexpected userfaultfd missing events.
+ *
+ * This must be called after mm->context.allow_cow_sharing was
+ * set to 0, to avoid future mappings of shared zeropages.
+ *
+ * mm contracts with s390, that even if mm were to remove a page table,
+ * and racing with walk_page_range_vma() calling pte_offset_map_lock()
+ * would fail, it will never insert a page table containing empty zero
+ * pages once mm_forbids_zeropage(mm) i.e.
+ * mm->context.allow_cow_sharing is set to 0.
+ */
+static int __gmap_helper_unshare_zeropages(struct mm_struct *mm)
+{
+ struct vm_area_struct *vma;
+ VMA_ITERATOR(vmi, mm, 0);
+ unsigned long addr;
+ vm_fault_t fault;
+ int rc;
+
+ for_each_vma(vmi, vma) {
+ /*
+ * We could only look at COW mappings, but it's more future
+ * proof to catch unexpected zeropages in other mappings and
+ * fail.
+ */
+ if ((vma->vm_flags & VM_PFNMAP) || is_vm_hugetlb_page(vma))
+ continue;
+ addr = vma->vm_start;
+
+retry:
+ rc = walk_page_range_vma(vma, addr, vma->vm_end,
+ &find_zeropage_ops, &addr);
+ if (rc < 0)
+ return rc;
+ else if (!rc)
+ continue;
+
+ /* addr was updated by find_zeropage_pte_entry() */
+ fault = handle_mm_fault(vma, addr,
+ FAULT_FLAG_UNSHARE | FAULT_FLAG_REMOTE,
+ NULL);
+ if (fault & VM_FAULT_OOM)
+ return -ENOMEM;
+ /*
+ * See break_ksm(): even after handle_mm_fault() returned 0, we
+ * must start the lookup from the current address, because
+ * handle_mm_fault() may back out if there's any difficulty.
+ *
+ * VM_FAULT_SIGBUS and VM_FAULT_SIGSEGV are unexpected but
+ * maybe they could trigger in the future on concurrent
+ * truncation. In that case, the shared zeropage would be gone
+ * and we can simply retry and make progress.
+ */
+ cond_resched();
+ goto retry;
+ }
+
+ return 0;
+}
+
+/**
+ * gmap_helper_disable_cow_sharing() - disable all COW sharing
+ *
+ * Disable most COW-sharing of memory pages for the whole process:
+ * (1) Disable KSM and unmerge/unshare any KSM pages.
+ * (2) Disallow shared zeropages and unshare any zerpages that are mapped.
+ *
+ * Not that we currently don't bother with COW-shared pages that are shared
+ * with parent/child processes due to fork().
+ */
+int gmap_helper_disable_cow_sharing(void)
+{
+ struct mm_struct *mm = current->mm;
+ int rc;
+
+ mmap_assert_write_locked(mm);
+
+ if (!mm->context.allow_cow_sharing)
+ return 0;
+
+ mm->context.allow_cow_sharing = 0;
+
+ /* Replace all shared zeropages by anonymous pages. */
+ rc = __gmap_helper_unshare_zeropages(mm);
+ /*
+ * Make sure to disable KSM (if enabled for the whole process or
+ * individual VMAs). Note that nothing currently hinders user space
+ * from re-enabling it.
+ */
+ if (!rc)
+ rc = ksm_disable(mm);
+ if (rc)
+ mm->context.allow_cow_sharing = 1;
+ return rc;
+}
+EXPORT_SYMBOL_GPL(gmap_helper_disable_cow_sharing);
diff --git a/arch/s390/mm/hugetlbpage.c b/arch/s390/mm/hugetlbpage.c
index d9ce199953de..e88c02c9e642 100644
--- a/arch/s390/mm/hugetlbpage.c
+++ b/arch/s390/mm/hugetlbpage.c
@@ -9,12 +9,13 @@
#define KMSG_COMPONENT "hugetlb"
#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
-#include <asm/pgalloc.h>
+#include <linux/cpufeature.h>
#include <linux/mm.h>
#include <linux/hugetlb.h>
#include <linux/mman.h>
#include <linux/sched/mm.h>
#include <linux/security.h>
+#include <asm/pgalloc.h>
/*
* If the bit selected by single-bit bitmask "a" is set within "x", move
@@ -188,8 +189,8 @@ pte_t huge_ptep_get(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
return __rste_to_pte(pte_val(*ptep));
}
-pte_t huge_ptep_get_and_clear(struct mm_struct *mm,
- unsigned long addr, pte_t *ptep)
+pte_t __huge_ptep_get_and_clear(struct mm_struct *mm,
+ unsigned long addr, pte_t *ptep)
{
pte_t pte = huge_ptep_get(mm, addr, ptep);
pmd_t *pmdp = (pmd_t *) ptep;
@@ -248,9 +249,9 @@ pte_t *huge_pte_offset(struct mm_struct *mm,
bool __init arch_hugetlb_valid_size(unsigned long size)
{
- if (MACHINE_HAS_EDAT1 && size == PMD_SIZE)
+ if (cpu_has_edat1() && size == PMD_SIZE)
return true;
- else if (MACHINE_HAS_EDAT2 && size == PUD_SIZE)
+ else if (cpu_has_edat2() && size == PUD_SIZE)
return true;
else
return false;
diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c
index f2298f7a3f21..e4953453d254 100644
--- a/arch/s390/mm/init.c
+++ b/arch/s390/mm/init.c
@@ -8,6 +8,7 @@
* Copyright (C) 1995 Linus Torvalds
*/
+#include <linux/cpufeature.h>
#include <linux/signal.h>
#include <linux/sched.h>
#include <linux/kernel.h>
@@ -39,7 +40,6 @@
#include <asm/kfence.h>
#include <asm/dma.h>
#include <asm/abs_lowcore.h>
-#include <asm/tlb.h>
#include <asm/tlbflush.h>
#include <asm/sections.h>
#include <asm/sclp.h>
@@ -73,8 +73,6 @@ static void __init setup_zero_pages(void)
{
unsigned long total_pages = memblock_estimated_nr_free_pages();
unsigned int order;
- struct page *page;
- int i;
/* Latest machines require a mapping granularity of 512KB */
order = 7;
@@ -83,16 +81,7 @@ static void __init setup_zero_pages(void)
while (order > 2 && (total_pages >> 10) < (1UL << order))
order--;
- empty_zero_page = __get_free_pages(GFP_KERNEL | __GFP_ZERO, order);
- if (!empty_zero_page)
- panic("Out of memory in setup_zero_pages");
-
- page = virt_to_page((void *) empty_zero_page);
- split_page(page, order);
- for (i = 1 << order; i > 0; i--) {
- mark_page_reserved(page);
- page++;
- }
+ empty_zero_page = (unsigned long)memblock_alloc_or_panic(PAGE_SIZE << order, PAGE_SIZE);
zero_page_mask = ((PAGE_SIZE << order) - 1) & PAGE_MASK;
}
@@ -117,7 +106,7 @@ void mark_rodata_ro(void)
{
unsigned long size = __end_ro_after_init - __start_ro_after_init;
- if (MACHINE_HAS_NX)
+ if (cpu_has_nx())
system_ctl_set_bit(0, CR0_INSTRUCTION_EXEC_PROTECTION_BIT);
__set_memory_ro(__start_ro_after_init, __end_ro_after_init);
pr_info("Write protected read-only-after-init data: %luk\n", size >> 10);
@@ -153,7 +142,7 @@ bool force_dma_unencrypted(struct device *dev)
}
/* protected virtualization */
-static void pv_init(void)
+static void __init pv_init(void)
{
if (!is_prot_virt_guest())
return;
@@ -165,19 +154,13 @@ static void pv_init(void)
swiotlb_update_mem_attributes();
}
-void __init mem_init(void)
+void __init arch_mm_preinit(void)
{
cpumask_set_cpu(0, &init_mm.context.cpu_attach_mask);
cpumask_set_cpu(0, mm_cpumask(&init_mm));
- set_max_mapnr(max_low_pfn);
- high_memory = (void *) __va(max_low_pfn * PAGE_SIZE);
-
pv_init();
- kfence_split_mapping();
- /* this will put all low memory onto the freelists */
- memblock_free_all();
setup_zero_pages(); /* Setup zeroed pages. */
}
@@ -239,16 +222,13 @@ struct s390_cma_mem_data {
static int s390_cma_check_range(struct cma *cma, void *data)
{
struct s390_cma_mem_data *mem_data;
- unsigned long start, end;
mem_data = data;
- start = cma_get_base(cma);
- end = start + cma_get_size(cma);
- if (end < mem_data->start)
- return 0;
- if (start >= mem_data->end)
- return 0;
- return -EBUSY;
+
+ if (cma_intersects(cma, mem_data->start, mem_data->end))
+ return -EBUSY;
+
+ return 0;
}
static int s390_cma_mem_notifier(struct notifier_block *nb,
@@ -285,7 +265,7 @@ int arch_add_memory(int nid, u64 start, u64 size,
unsigned long size_pages = PFN_DOWN(size);
int rc;
- if (WARN_ON_ONCE(params->pgprot.pgprot != PAGE_KERNEL.pgprot))
+ if (WARN_ON_ONCE(pgprot_val(params->pgprot) != pgprot_val(PAGE_KERNEL)))
return -EINVAL;
VM_BUG_ON(!mhp_range_allowed(start, size, true));
diff --git a/arch/s390/mm/mmap.c b/arch/s390/mm/mmap.c
index 76f376876e0d..40a526d28184 100644
--- a/arch/s390/mm/mmap.c
+++ b/arch/s390/mm/mmap.c
@@ -51,7 +51,6 @@ static inline unsigned long mmap_base(unsigned long rnd,
{
unsigned long gap = rlim_stack->rlim_cur;
unsigned long pad = stack_maxrandom_size() + stack_guard_gap;
- unsigned long gap_min, gap_max;
/* Values close to RLIM_INFINITY can overflow. */
if (gap + pad > gap)
@@ -61,13 +60,7 @@ static inline unsigned long mmap_base(unsigned long rnd,
* Top of mmap area (just below the process stack).
* Leave at least a ~128 MB hole.
*/
- gap_min = SZ_128M;
- gap_max = (STACK_TOP / 6) * 5;
-
- if (gap < gap_min)
- gap = gap_min;
- else if (gap > gap_max)
- gap = gap_max;
+ gap = clamp(gap, SZ_128M, (STACK_TOP / 6) * 5);
return PAGE_ALIGN(STACK_TOP - gap - rnd);
}
diff --git a/arch/s390/mm/pageattr.c b/arch/s390/mm/pageattr.c
index eae97fb61712..348e759840e7 100644
--- a/arch/s390/mm/pageattr.c
+++ b/arch/s390/mm/pageattr.c
@@ -3,6 +3,7 @@
* Copyright IBM Corp. 2011
* Author(s): Jan Glauber <jang@linux.vnet.ibm.com>
*/
+#include <linux/cpufeature.h>
#include <linux/hugetlb.h>
#include <linux/proc_fs.h>
#include <linux/vmalloc.h>
@@ -27,7 +28,7 @@ void __storage_key_init_range(unsigned long start, unsigned long end)
unsigned long boundary, size;
while (start < end) {
- if (MACHINE_HAS_EDAT1) {
+ if (cpu_has_edat1()) {
/* set storage keys for a 1MB frame */
size = 1UL << 20;
boundary = (start + size) & ~(size - 1);
@@ -63,7 +64,7 @@ static void pgt_set(unsigned long *old, unsigned long new, unsigned long addr,
unsigned long *table, mask;
mask = 0;
- if (MACHINE_HAS_EDAT2) {
+ if (cpu_has_edat2()) {
switch (dtt) {
case CRDTE_DTT_REGION3:
mask = ~(PTRS_PER_PUD * sizeof(pud_t) - 1);
@@ -77,7 +78,7 @@ static void pgt_set(unsigned long *old, unsigned long new, unsigned long addr,
}
table = (unsigned long *)((unsigned long)old & mask);
crdte(*old, new, table, dtt, addr, get_lowcore()->kernel_asce.val);
- } else if (MACHINE_HAS_IDTE) {
+ } else if (cpu_has_idte()) {
cspg(old, *old, new);
} else {
csp((unsigned int *)old + 1, *old, new);
@@ -373,7 +374,7 @@ int __set_memory(unsigned long addr, unsigned long numpages, unsigned long flags
unsigned long end;
int rc;
- if (!MACHINE_HAS_NX)
+ if (!cpu_has_nx())
flags &= ~(SET_MEMORY_NX | SET_MEMORY_X);
if (!flags)
return 0;
diff --git a/arch/s390/mm/pfault.c b/arch/s390/mm/pfault.c
index 1aac13bb8f53..e6175d75e4b0 100644
--- a/arch/s390/mm/pfault.c
+++ b/arch/s390/mm/pfault.c
@@ -9,6 +9,7 @@
#include <linux/init.h>
#include <linux/irq.h>
#include <asm/asm-extable.h>
+#include <asm/asm-offsets.h>
#include <asm/pfault.h>
#include <asm/diag.h>
@@ -56,7 +57,7 @@ int __pfault_init(void)
if (pfault_disable)
return rc;
diag_stat_inc(DIAG_STAT_X258);
- asm volatile(
+ asm_inline volatile(
" diag %[refbk],%[rc],0x258\n"
"0: nopr %%r7\n"
EX_TABLE(0b, 0b)
@@ -78,7 +79,7 @@ void __pfault_fini(void)
if (pfault_disable)
return;
diag_stat_inc(DIAG_STAT_X258);
- asm volatile(
+ asm_inline volatile(
" diag %[refbk],0,0x258\n"
"0: nopr %%r7\n"
EX_TABLE(0b, 0b)
diff --git a/arch/s390/mm/pgalloc.c b/arch/s390/mm/pgalloc.c
index cd2fef79ad2c..d2f6f1f6d2fc 100644
--- a/arch/s390/mm/pgalloc.c
+++ b/arch/s390/mm/pgalloc.c
@@ -12,35 +12,8 @@
#include <asm/mmu_context.h>
#include <asm/page-states.h>
#include <asm/pgalloc.h>
-#include <asm/gmap.h>
-#include <asm/tlb.h>
#include <asm/tlbflush.h>
-#ifdef CONFIG_PGSTE
-
-int page_table_allocate_pgste = 0;
-EXPORT_SYMBOL(page_table_allocate_pgste);
-
-static const struct ctl_table page_table_sysctl[] = {
- {
- .procname = "allocate_pgste",
- .data = &page_table_allocate_pgste,
- .maxlen = sizeof(int),
- .mode = S_IRUGO | S_IWUSR,
- .proc_handler = proc_dointvec_minmax,
- .extra1 = SYSCTL_ZERO,
- .extra2 = SYSCTL_ONE,
- },
-};
-
-static int __init page_table_register_sysctl(void)
-{
- return register_sysctl("vm", page_table_sysctl) ? 0 : -ENOMEM;
-}
-__initcall(page_table_register_sysctl);
-
-#endif /* CONFIG_PGSTE */
-
unsigned long *crst_table_alloc(struct mm_struct *mm)
{
struct ptdesc *ptdesc = pagetable_alloc(GFP_KERNEL, CRST_ALLOC_ORDER);
@@ -63,11 +36,15 @@ void crst_table_free(struct mm_struct *mm, unsigned long *table)
static void __crst_table_upgrade(void *arg)
{
struct mm_struct *mm = arg;
+ struct ctlreg asce;
/* change all active ASCEs to avoid the creation of new TLBs */
if (current->active_mm == mm) {
- get_lowcore()->user_asce.val = mm->context.asce;
- local_ctl_load(7, &get_lowcore()->user_asce);
+ asce.val = mm->context.asce;
+ get_lowcore()->user_asce = asce;
+ local_ctl_load(7, &asce);
+ if (!test_thread_flag(TIF_ASCE_PRIMARY))
+ local_ctl_load(1, &asce);
}
__tlb_flush_local();
}
@@ -77,6 +54,8 @@ int crst_table_upgrade(struct mm_struct *mm, unsigned long end)
unsigned long *pgd = NULL, *p4d = NULL, *__pgd;
unsigned long asce_limit = mm->context.asce_limit;
+ mmap_assert_write_locked(mm);
+
/* upgrade should only happen from 3 to 4, 3 to 5, or 4 to 5 levels */
VM_BUG_ON(asce_limit < _REGION2_SIZE);
@@ -100,13 +79,6 @@ int crst_table_upgrade(struct mm_struct *mm, unsigned long end)
spin_lock_bh(&mm->page_table_lock);
- /*
- * This routine gets called with mmap_lock lock held and there is
- * no reason to optimize for the case of otherwise. However, if
- * that would ever change, the below check will let us know.
- */
- VM_BUG_ON(asce_limit != mm->context.asce_limit);
-
if (p4d) {
__pgd = (unsigned long *) mm->pgd;
p4d_populate(mm, (p4d_t *) p4d, (pud_t *) __pgd);
@@ -170,14 +142,12 @@ unsigned long *page_table_alloc(struct mm_struct *mm)
ptdesc = pagetable_alloc(GFP_KERNEL, 0);
if (!ptdesc)
return NULL;
- if (!pagetable_pte_ctor(ptdesc)) {
+ if (!pagetable_pte_ctor(mm, ptdesc)) {
pagetable_free(ptdesc);
return NULL;
}
table = ptdesc_to_virt(ptdesc);
__arch_set_page_dat(table, 1);
- /* pt_list is used by gmap only */
- INIT_LIST_HEAD(&ptdesc->pt_list);
memset64((u64 *)table, _PAGE_INVALID, PTRS_PER_PTE);
memset64((u64 *)table + PTRS_PER_PTE, 0, PTRS_PER_PTE);
return table;
@@ -203,11 +173,6 @@ void pte_free_defer(struct mm_struct *mm, pgtable_t pgtable)
struct ptdesc *ptdesc = virt_to_ptdesc(pgtable);
call_rcu(&ptdesc->pt_rcu_head, pte_free_now);
- /*
- * THPs are not allowed for KVM guests. Warn if pgste ever reaches here.
- * Turn to the generic pte_free_defer() version once gmap is removed.
- */
- WARN_ON_ONCE(mm_has_pgste(mm));
}
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c
index f05e62e037c2..60688be4e876 100644
--- a/arch/s390/mm/pgtable.c
+++ b/arch/s390/mm/pgtable.c
@@ -4,6 +4,8 @@
* Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
*/
+#include <linux/cpufeature.h>
+#include <linux/export.h>
#include <linux/sched.h>
#include <linux/kernel.h>
#include <linux/errno.h>
@@ -19,10 +21,10 @@
#include <linux/ksm.h>
#include <linux/mman.h>
-#include <asm/tlb.h>
#include <asm/tlbflush.h>
#include <asm/mmu_context.h>
#include <asm/page-states.h>
+#include <asm/machine.h>
pgprot_t pgprot_writecombine(pgprot_t prot)
{
@@ -34,22 +36,12 @@ pgprot_t pgprot_writecombine(pgprot_t prot)
}
EXPORT_SYMBOL_GPL(pgprot_writecombine);
-pgprot_t pgprot_writethrough(pgprot_t prot)
-{
- /*
- * mio_wb_bit_mask may be set on a different CPU, but it is only set
- * once at init and only read afterwards.
- */
- return __pgprot(pgprot_val(prot) & ~mio_wb_bit_mask);
-}
-EXPORT_SYMBOL_GPL(pgprot_writethrough);
-
static inline void ptep_ipte_local(struct mm_struct *mm, unsigned long addr,
pte_t *ptep, int nodat)
{
unsigned long opt, asce;
- if (MACHINE_HAS_TLB_GUEST) {
+ if (machine_has_tlb_guest()) {
opt = 0;
asce = READ_ONCE(mm->context.gmap_asce);
if (asce == 0UL || nodat)
@@ -69,7 +61,7 @@ static inline void ptep_ipte_global(struct mm_struct *mm, unsigned long addr,
{
unsigned long opt, asce;
- if (MACHINE_HAS_TLB_GUEST) {
+ if (machine_has_tlb_guest()) {
opt = 0;
asce = READ_ONCE(mm->context.gmap_asce);
if (asce == 0UL || nodat)
@@ -94,7 +86,7 @@ static inline pte_t ptep_flush_direct(struct mm_struct *mm,
if (unlikely(pte_val(old) & _PAGE_INVALID))
return old;
atomic_inc(&mm->context.flush_count);
- if (MACHINE_HAS_TLB_LC &&
+ if (cpu_has_tlb_lc() &&
cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id())))
ptep_ipte_local(mm, addr, ptep, nodat);
else
@@ -173,10 +165,10 @@ static inline pgste_t pgste_update_all(pte_t pte, pgste_t pgste,
skey = (unsigned long) page_get_storage_key(address);
bits = skey & (_PAGE_CHANGED | _PAGE_REFERENCED);
/* Transfer page changed & referenced bit to guest bits in pgste */
- pgste_val(pgste) |= bits << 48; /* GR bit & GC bit */
+ pgste = set_pgste_bit(pgste, bits << 48); /* GR bit & GC bit */
/* Copy page access key and fetch protection bit to pgste */
- pgste_val(pgste) &= ~(PGSTE_ACC_BITS | PGSTE_FP_BIT);
- pgste_val(pgste) |= (skey & (_PAGE_ACC_BITS | _PAGE_FP_BIT)) << 56;
+ pgste = clear_pgste_bit(pgste, PGSTE_ACC_BITS | PGSTE_FP_BIT);
+ pgste = set_pgste_bit(pgste, (skey & (_PAGE_ACC_BITS | _PAGE_FP_BIT)) << 56);
#endif
return pgste;
@@ -210,7 +202,7 @@ static inline pgste_t pgste_set_pte(pte_t *ptep, pgste_t pgste, pte_t entry)
if ((pte_val(entry) & _PAGE_PRESENT) &&
(pte_val(entry) & _PAGE_WRITE) &&
!(pte_val(entry) & _PAGE_INVALID)) {
- if (!MACHINE_HAS_ESOP) {
+ if (!machine_has_esop()) {
/*
* Without enhanced suppression-on-protection force
* the dirty bit on for all writable ptes.
@@ -220,7 +212,7 @@ static inline pgste_t pgste_set_pte(pte_t *ptep, pgste_t pgste, pte_t entry)
}
if (!(pte_val(entry) & _PAGE_PROTECT))
/* This pte allows write access, set user-dirty */
- pgste_val(pgste) |= PGSTE_UC_BIT;
+ pgste = set_pgste_bit(pgste, PGSTE_UC_BIT);
}
#endif
set_pte(ptep, entry);
@@ -236,7 +228,7 @@ static inline pgste_t pgste_pte_notify(struct mm_struct *mm,
bits = pgste_val(pgste) & (PGSTE_IN_BIT | PGSTE_VSIE_BIT);
if (bits) {
- pgste_val(pgste) ^= bits;
+ pgste = __pgste(pgste_val(pgste) ^ bits);
ptep_notify(mm, addr, ptep, bits);
}
#endif
@@ -374,7 +366,7 @@ void ptep_modify_prot_commit(struct vm_area_struct *vma, unsigned long addr,
static inline void pmdp_idte_local(struct mm_struct *mm,
unsigned long addr, pmd_t *pmdp)
{
- if (MACHINE_HAS_TLB_GUEST)
+ if (machine_has_tlb_guest())
__pmdp_idte(addr, pmdp, IDTE_NODAT | IDTE_GUEST_ASCE,
mm->context.asce, IDTE_LOCAL);
else
@@ -386,12 +378,12 @@ static inline void pmdp_idte_local(struct mm_struct *mm,
static inline void pmdp_idte_global(struct mm_struct *mm,
unsigned long addr, pmd_t *pmdp)
{
- if (MACHINE_HAS_TLB_GUEST) {
+ if (machine_has_tlb_guest()) {
__pmdp_idte(addr, pmdp, IDTE_NODAT | IDTE_GUEST_ASCE,
mm->context.asce, IDTE_GLOBAL);
if (mm_has_pgste(mm) && mm->context.allow_gmap_hpage_1m)
gmap_pmdp_idte_global(mm, addr);
- } else if (MACHINE_HAS_IDTE) {
+ } else if (cpu_has_idte()) {
__pmdp_idte(addr, pmdp, 0, 0, IDTE_GLOBAL);
if (mm_has_pgste(mm) && mm->context.allow_gmap_hpage_1m)
gmap_pmdp_idte_global(mm, addr);
@@ -411,7 +403,7 @@ static inline pmd_t pmdp_flush_direct(struct mm_struct *mm,
if (pmd_val(old) & _SEGMENT_ENTRY_INVALID)
return old;
atomic_inc(&mm->context.flush_count);
- if (MACHINE_HAS_TLB_LC &&
+ if (cpu_has_tlb_lc() &&
cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id())))
pmdp_idte_local(mm, addr, pmdp);
else
@@ -505,7 +497,7 @@ EXPORT_SYMBOL(pmdp_xchg_lazy);
static inline void pudp_idte_local(struct mm_struct *mm,
unsigned long addr, pud_t *pudp)
{
- if (MACHINE_HAS_TLB_GUEST)
+ if (machine_has_tlb_guest())
__pudp_idte(addr, pudp, IDTE_NODAT | IDTE_GUEST_ASCE,
mm->context.asce, IDTE_LOCAL);
else
@@ -515,10 +507,10 @@ static inline void pudp_idte_local(struct mm_struct *mm,
static inline void pudp_idte_global(struct mm_struct *mm,
unsigned long addr, pud_t *pudp)
{
- if (MACHINE_HAS_TLB_GUEST)
+ if (machine_has_tlb_guest())
__pudp_idte(addr, pudp, IDTE_NODAT | IDTE_GUEST_ASCE,
mm->context.asce, IDTE_GLOBAL);
- else if (MACHINE_HAS_IDTE)
+ else if (cpu_has_idte())
__pudp_idte(addr, pudp, 0, 0, IDTE_GLOBAL);
else
/*
@@ -537,7 +529,7 @@ static inline pud_t pudp_flush_direct(struct mm_struct *mm,
if (pud_val(old) & _REGION_ENTRY_INVALID)
return old;
atomic_inc(&mm->context.flush_count);
- if (MACHINE_HAS_TLB_LC &&
+ if (cpu_has_tlb_lc() &&
cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id())))
pudp_idte_local(mm, addr, pudp);
else
@@ -609,7 +601,7 @@ void ptep_set_pte_at(struct mm_struct *mm, unsigned long addr,
/* the mm_has_pgste() check is done in set_pte_at() */
preempt_disable();
pgste = pgste_get_lock(ptep);
- pgste_val(pgste) &= ~_PGSTE_GPS_ZERO;
+ pgste = clear_pgste_bit(pgste, _PGSTE_GPS_ZERO);
pgste_set_key(ptep, pgste, entry, mm);
pgste = pgste_set_pte(ptep, pgste, entry);
pgste_set_unlock(ptep, pgste);
@@ -622,7 +614,7 @@ void ptep_set_notify(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
preempt_disable();
pgste = pgste_get_lock(ptep);
- pgste_val(pgste) |= PGSTE_IN_BIT;
+ pgste = set_pgste_bit(pgste, PGSTE_IN_BIT);
pgste_set_unlock(ptep, pgste);
preempt_enable();
}
@@ -667,7 +659,7 @@ int ptep_force_prot(struct mm_struct *mm, unsigned long addr,
entry = clear_pte_bit(entry, __pgprot(_PAGE_INVALID));
entry = set_pte_bit(entry, __pgprot(_PAGE_PROTECT));
}
- pgste_val(pgste) |= bit;
+ pgste = set_pgste_bit(pgste, bit);
pgste = pgste_set_pte(ptep, pgste, entry);
pgste_set_unlock(ptep, pgste);
return 0;
@@ -687,7 +679,7 @@ int ptep_shadow_pte(struct mm_struct *mm, unsigned long saddr,
if (!(pte_val(spte) & _PAGE_INVALID) &&
!((pte_val(spte) & _PAGE_PROTECT) &&
!(pte_val(pte) & _PAGE_PROTECT))) {
- pgste_val(spgste) |= PGSTE_VSIE_BIT;
+ spgste = set_pgste_bit(spgste, PGSTE_VSIE_BIT);
tpgste = pgste_get_lock(tptep);
tpte = __pte((pte_val(spte) & PAGE_MASK) |
(pte_val(pte) & _PAGE_PROTECT));
@@ -745,7 +737,7 @@ void ptep_zap_unused(struct mm_struct *mm, unsigned long addr,
pte_clear(mm, addr, ptep);
}
if (reset)
- pgste_val(pgste) &= ~(_PGSTE_GPS_USAGE_MASK | _PGSTE_GPS_NODAT);
+ pgste = clear_pgste_bit(pgste, _PGSTE_GPS_USAGE_MASK | _PGSTE_GPS_NODAT);
pgste_set_unlock(ptep, pgste);
preempt_enable();
}
@@ -758,8 +750,8 @@ void ptep_zap_key(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
/* Clear storage key ACC and F, but set R/C */
preempt_disable();
pgste = pgste_get_lock(ptep);
- pgste_val(pgste) &= ~(PGSTE_ACC_BITS | PGSTE_FP_BIT);
- pgste_val(pgste) |= PGSTE_GR_BIT | PGSTE_GC_BIT;
+ pgste = clear_pgste_bit(pgste, PGSTE_ACC_BITS | PGSTE_FP_BIT);
+ pgste = set_pgste_bit(pgste, PGSTE_GR_BIT | PGSTE_GC_BIT);
ptev = pte_val(*ptep);
if (!(ptev & _PAGE_INVALID) && (ptev & _PAGE_WRITE))
page_set_storage_key(ptev & PAGE_MASK, PAGE_DEFAULT_KEY, 0);
@@ -780,13 +772,13 @@ bool ptep_test_and_clear_uc(struct mm_struct *mm, unsigned long addr,
pgste = pgste_get_lock(ptep);
dirty = !!(pgste_val(pgste) & PGSTE_UC_BIT);
- pgste_val(pgste) &= ~PGSTE_UC_BIT;
+ pgste = clear_pgste_bit(pgste, PGSTE_UC_BIT);
pte = *ptep;
if (dirty && (pte_val(pte) & _PAGE_PRESENT)) {
pgste = pgste_pte_notify(mm, addr, ptep, pgste);
nodat = !!(pgste_val(pgste) & _PGSTE_GPS_NODAT);
ptep_ipte_global(mm, addr, ptep, nodat);
- if (MACHINE_HAS_ESOP || !(pte_val(pte) & _PAGE_WRITE))
+ if (machine_has_esop() || !(pte_val(pte) & _PAGE_WRITE))
pte = set_pte_bit(pte, __pgprot(_PAGE_PROTECT));
else
pte = set_pte_bit(pte, __pgprot(_PAGE_INVALID));
@@ -842,11 +834,11 @@ again:
if (!ptep)
goto again;
new = old = pgste_get_lock(ptep);
- pgste_val(new) &= ~(PGSTE_GR_BIT | PGSTE_GC_BIT |
- PGSTE_ACC_BITS | PGSTE_FP_BIT);
+ new = clear_pgste_bit(new, PGSTE_GR_BIT | PGSTE_GC_BIT |
+ PGSTE_ACC_BITS | PGSTE_FP_BIT);
keyul = (unsigned long) key;
- pgste_val(new) |= (keyul & (_PAGE_CHANGED | _PAGE_REFERENCED)) << 48;
- pgste_val(new) |= (keyul & (_PAGE_ACC_BITS | _PAGE_FP_BIT)) << 56;
+ new = set_pgste_bit(new, (keyul & (_PAGE_CHANGED | _PAGE_REFERENCED)) << 48);
+ new = set_pgste_bit(new, (keyul & (_PAGE_ACC_BITS | _PAGE_FP_BIT)) << 56);
if (!(pte_val(*ptep) & _PAGE_INVALID)) {
unsigned long bits, skey;
@@ -857,12 +849,12 @@ again:
/* Set storage key ACC and FP */
page_set_storage_key(paddr, skey, !nq);
/* Merge host changed & referenced into pgste */
- pgste_val(new) |= bits << 52;
+ new = set_pgste_bit(new, bits << 52);
}
/* changing the guest storage key is considered a change of the page */
if ((pgste_val(new) ^ pgste_val(old)) &
(PGSTE_ACC_BITS | PGSTE_FP_BIT | PGSTE_GR_BIT | PGSTE_GC_BIT))
- pgste_val(new) |= PGSTE_UC_BIT;
+ new = set_pgste_bit(new, PGSTE_UC_BIT);
pgste_set_unlock(ptep, new);
pte_unmap_unlock(ptep, ptl);
@@ -950,19 +942,19 @@ again:
goto again;
new = old = pgste_get_lock(ptep);
/* Reset guest reference bit only */
- pgste_val(new) &= ~PGSTE_GR_BIT;
+ new = clear_pgste_bit(new, PGSTE_GR_BIT);
if (!(pte_val(*ptep) & _PAGE_INVALID)) {
paddr = pte_val(*ptep) & PAGE_MASK;
cc = page_reset_referenced(paddr);
/* Merge real referenced bit into host-set */
- pgste_val(new) |= ((unsigned long) cc << 53) & PGSTE_HR_BIT;
+ new = set_pgste_bit(new, ((unsigned long)cc << 53) & PGSTE_HR_BIT);
}
/* Reflect guest's logical view, not physical */
cc |= (pgste_val(old) & (PGSTE_GR_BIT | PGSTE_GC_BIT)) >> 49;
/* Changing the guest storage key is considered a change of the page */
if ((pgste_val(new) ^ pgste_val(old)) & PGSTE_GR_BIT)
- pgste_val(new) |= PGSTE_UC_BIT;
+ new = set_pgste_bit(new, PGSTE_UC_BIT);
pgste_set_unlock(ptep, new);
pte_unmap_unlock(ptep, ptl);
@@ -1126,7 +1118,7 @@ int pgste_perform_essa(struct mm_struct *mm, unsigned long hva, int orc,
if (res)
pgstev |= _PGSTE_GPS_ZERO;
- pgste_val(pgste) = pgstev;
+ pgste = __pgste(pgstev);
pgste_set_unlock(ptep, pgste);
pte_unmap_unlock(ptep, ptl);
return res;
@@ -1159,8 +1151,8 @@ int set_pgste_bits(struct mm_struct *mm, unsigned long hva,
return -EFAULT;
new = pgste_get_lock(ptep);
- pgste_val(new) &= ~bits;
- pgste_val(new) |= value & bits;
+ new = clear_pgste_bit(new, bits);
+ new = set_pgste_bit(new, value & bits);
pgste_set_unlock(ptep, new);
pte_unmap_unlock(ptep, ptl);
diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c
index 7c684c54e721..f48ef361bc83 100644
--- a/arch/s390/mm/vmem.c
+++ b/arch/s390/mm/vmem.c
@@ -4,6 +4,7 @@
*/
#include <linux/memory_hotplug.h>
+#include <linux/cpufeature.h>
#include <linux/memblock.h>
#include <linux/pfn.h>
#include <linux/mm.h>
@@ -63,13 +64,12 @@ void *vmem_crst_alloc(unsigned long val)
pte_t __ref *vmem_pte_alloc(void)
{
- unsigned long size = PTRS_PER_PTE * sizeof(pte_t);
pte_t *pte;
if (slab_is_available())
- pte = (pte_t *) page_table_alloc(&init_mm);
+ pte = (pte_t *)page_table_alloc(&init_mm);
else
- pte = (pte_t *) memblock_alloc(size, size);
+ pte = (pte_t *)memblock_alloc(PAGE_SIZE, PAGE_SIZE);
if (!pte)
return NULL;
memset64((u64 *)pte, _PAGE_INVALID, PTRS_PER_PTE);
@@ -249,12 +249,12 @@ static int __ref modify_pmd_table(pud_t *pud, unsigned long addr,
} else if (pmd_none(*pmd)) {
if (IS_ALIGNED(addr, PMD_SIZE) &&
IS_ALIGNED(next, PMD_SIZE) &&
- MACHINE_HAS_EDAT1 && direct &&
+ cpu_has_edat1() && direct &&
!debug_pagealloc_enabled()) {
set_pmd(pmd, __pmd(__pa(addr) | prot));
pages++;
continue;
- } else if (!direct && MACHINE_HAS_EDAT1) {
+ } else if (!direct && cpu_has_edat1()) {
void *new_page;
/*
@@ -335,7 +335,7 @@ static int modify_pud_table(p4d_t *p4d, unsigned long addr, unsigned long end,
} else if (pud_none(*pud)) {
if (IS_ALIGNED(addr, PUD_SIZE) &&
IS_ALIGNED(next, PUD_SIZE) &&
- MACHINE_HAS_EDAT2 && direct &&
+ cpu_has_edat2() && direct &&
!debug_pagealloc_enabled()) {
set_pud(pud, __pud(__pa(addr) | prot));
pages++;
@@ -659,10 +659,10 @@ void __init vmem_map_init(void)
* prefix page is used to return to the previous context with
* an LPSWE instruction and therefore must be executable.
*/
- if (!static_key_enabled(&cpu_has_bear))
+ if (!cpu_has_bear())
set_memory_x(0, 1);
if (debug_pagealloc_enabled())
- __set_memory_4k(__va(0), __va(0) + ident_map_size);
+ __set_memory_4k(__va(0), absolute_pointer(__va(0)) + ident_map_size);
pr_info("Write protected kernel read-only data: %luk\n",
(unsigned long)(__end_rodata - _stext) >> 10);
}
diff --git a/arch/s390/net/bpf_jit.h b/arch/s390/net/bpf_jit.h
deleted file mode 100644
index 7822ea92e54a..000000000000
--- a/arch/s390/net/bpf_jit.h
+++ /dev/null
@@ -1,55 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * BPF Jit compiler defines
- *
- * Copyright IBM Corp. 2012,2015
- *
- * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
- * Michael Holzheu <holzheu@linux.vnet.ibm.com>
- */
-
-#ifndef __ARCH_S390_NET_BPF_JIT_H
-#define __ARCH_S390_NET_BPF_JIT_H
-
-#ifndef __ASSEMBLY__
-
-#include <linux/filter.h>
-#include <linux/types.h>
-
-#endif /* __ASSEMBLY__ */
-
-/*
- * Stackframe layout (packed stack):
- *
- * ^ high
- * +---------------+ |
- * | old backchain | |
- * +---------------+ |
- * | r15 - r6 | |
- * +---------------+ |
- * | 4 byte align | |
- * | tail_call_cnt | |
- * BFP -> +===============+ |
- * | | |
- * | BPF stack | |
- * | | |
- * R15+160 -> +---------------+ |
- * | new backchain | |
- * R15+152 -> +---------------+ |
- * | + 152 byte SA | |
- * R15 -> +---------------+ + low
- *
- * We get 160 bytes stack space from calling function, but only use
- * 12 * 8 byte for old backchain, r15..r6, and tail_call_cnt.
- *
- * The stack size used by the BPF program ("BPF stack" above) is passed
- * via "aux->stack_depth".
- */
-#define STK_SPACE_ADD (160)
-#define STK_160_UNUSED (160 - 12 * 8)
-#define STK_OFF (STK_SPACE_ADD - STK_160_UNUSED)
-
-#define STK_OFF_R6 (160 - 11 * 8) /* Offset of r6 on stack */
-#define STK_OFF_TCCNT (160 - 12 * 8) /* Offset of tail_call_cnt on stack */
-
-#endif /* __ARCH_S390_NET_BPF_JIT_H */
diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c
index 9d440a0b729e..bb17efe29d65 100644
--- a/arch/s390/net/bpf_jit_comp.c
+++ b/arch/s390/net/bpf_jit_comp.c
@@ -32,7 +32,6 @@
#include <asm/set_memory.h>
#include <asm/text-patching.h>
#include <asm/unwind.h>
-#include "bpf_jit.h"
struct bpf_jit {
u32 seen; /* Flags to remember seen eBPF instructions */
@@ -48,14 +47,13 @@ struct bpf_jit {
int lit64; /* Current position in 64-bit literal pool */
int base_ip; /* Base address for literal pool */
int exit_ip; /* Address of exit */
- int r1_thunk_ip; /* Address of expoline thunk for 'br %r1' */
- int r14_thunk_ip; /* Address of expoline thunk for 'br %r14' */
int tail_call_start; /* Tail call start offset */
int excnt; /* Number of exception table entries */
int prologue_plt_ret; /* Return address for prologue hotpatch PLT */
int prologue_plt; /* Start of prologue hotpatch PLT */
int kern_arena; /* Pool offset of kernel arena address */
u64 user_arena; /* User arena address */
+ u32 frame_off; /* Offset of struct bpf_prog from %r15 */
};
#define SEEN_MEM BIT(0) /* use mem[] for temporary storage */
@@ -127,6 +125,18 @@ static inline void reg_set_seen(struct bpf_jit *jit, u32 b1)
jit->seen_regs |= (1 << r1);
}
+static s32 off_to_pcrel(struct bpf_jit *jit, u32 off)
+{
+ return off - jit->prg;
+}
+
+static s64 ptr_to_pcrel(struct bpf_jit *jit, const void *ptr)
+{
+ if (jit->prg_buf)
+ return (const u8 *)ptr - ((const u8 *)jit->prg_buf + jit->prg);
+ return 0;
+}
+
#define REG_SET_SEEN(b1) \
({ \
reg_set_seen(jit, b1); \
@@ -201,7 +211,7 @@ static inline void reg_set_seen(struct bpf_jit *jit, u32 b1)
#define EMIT4_PCREL_RIC(op, mask, target) \
({ \
- int __rel = ((target) - jit->prg) / 2; \
+ int __rel = off_to_pcrel(jit, target) / 2; \
_EMIT4((op) | (mask) << 20 | (__rel & 0xffff)); \
})
@@ -239,7 +249,7 @@ static inline void reg_set_seen(struct bpf_jit *jit, u32 b1)
#define EMIT6_PCREL_RIEB(op1, op2, b1, b2, mask, target) \
({ \
- unsigned int rel = (int)((target) - jit->prg) / 2; \
+ unsigned int rel = off_to_pcrel(jit, target) / 2; \
_EMIT6((op1) | reg(b1, b2) << 16 | (rel & 0xffff), \
(op2) | (mask) << 12); \
REG_SET_SEEN(b1); \
@@ -248,7 +258,7 @@ static inline void reg_set_seen(struct bpf_jit *jit, u32 b1)
#define EMIT6_PCREL_RIEC(op1, op2, b1, imm, mask, target) \
({ \
- unsigned int rel = (int)((target) - jit->prg) / 2; \
+ unsigned int rel = off_to_pcrel(jit, target) / 2; \
_EMIT6((op1) | (reg_high(b1) | (mask)) << 16 | \
(rel & 0xffff), (op2) | ((imm) & 0xff) << 8); \
REG_SET_SEEN(b1); \
@@ -257,29 +267,41 @@ static inline void reg_set_seen(struct bpf_jit *jit, u32 b1)
#define EMIT6_PCREL(op1, op2, b1, b2, i, off, mask) \
({ \
- int rel = (addrs[(i) + (off) + 1] - jit->prg) / 2; \
+ int rel = off_to_pcrel(jit, addrs[(i) + (off) + 1]) / 2;\
_EMIT6((op1) | reg(b1, b2) << 16 | (rel & 0xffff), (op2) | (mask));\
REG_SET_SEEN(b1); \
REG_SET_SEEN(b2); \
})
+static void emit6_pcrel_ril(struct bpf_jit *jit, u32 op, s64 pcrel)
+{
+ u32 pc32dbl = (s32)(pcrel / 2);
+
+ _EMIT6(op | pc32dbl >> 16, pc32dbl & 0xffff);
+}
+
+static void emit6_pcrel_rilb(struct bpf_jit *jit, u32 op, u8 b, s64 pcrel)
+{
+ emit6_pcrel_ril(jit, op | reg_high(b) << 16, pcrel);
+ REG_SET_SEEN(b);
+}
+
#define EMIT6_PCREL_RILB(op, b, target) \
-({ \
- unsigned int rel = (int)((target) - jit->prg) / 2; \
- _EMIT6((op) | reg_high(b) << 16 | rel >> 16, rel & 0xffff);\
- REG_SET_SEEN(b); \
-})
+ emit6_pcrel_rilb(jit, op, b, off_to_pcrel(jit, target))
-#define EMIT6_PCREL_RIL(op, target) \
-({ \
- unsigned int rel = (int)((target) - jit->prg) / 2; \
- _EMIT6((op) | rel >> 16, rel & 0xffff); \
-})
+#define EMIT6_PCREL_RILB_PTR(op, b, target_ptr) \
+ emit6_pcrel_rilb(jit, op, b, ptr_to_pcrel(jit, target_ptr))
+
+static void emit6_pcrel_rilc(struct bpf_jit *jit, u32 op, u8 mask, s64 pcrel)
+{
+ emit6_pcrel_ril(jit, op | mask << 20, pcrel);
+}
#define EMIT6_PCREL_RILC(op, mask, target) \
-({ \
- EMIT6_PCREL_RIL((op) | (mask) << 20, (target)); \
-})
+ emit6_pcrel_rilc(jit, op, mask, off_to_pcrel(jit, target))
+
+#define EMIT6_PCREL_RILC_PTR(op, mask, target_ptr) \
+ emit6_pcrel_rilc(jit, op, mask, ptr_to_pcrel(jit, target_ptr))
#define _EMIT6_IMM(op, imm) \
({ \
@@ -404,11 +426,25 @@ static void jit_fill_hole(void *area, unsigned int size)
}
/*
+ * Caller-allocated part of the frame.
+ * Thanks to packed stack, its otherwise unused initial part can be used for
+ * the BPF stack and for the next frame.
+ */
+struct prog_frame {
+ u64 unused[8];
+ /* BPF stack starts here and grows towards 0 */
+ u32 tail_call_cnt;
+ u32 pad;
+ u64 r6[10]; /* r6 - r15 */
+ u64 backchain;
+} __packed;
+
+/*
* Save registers from "rs" (register start) to "re" (register end) on stack
*/
static void save_regs(struct bpf_jit *jit, u32 rs, u32 re)
{
- u32 off = STK_OFF_R6 + (rs - 6) * 8;
+ u32 off = offsetof(struct prog_frame, r6) + (rs - 6) * 8;
if (rs == re)
/* stg %rs,off(%r15) */
@@ -421,12 +457,9 @@ static void save_regs(struct bpf_jit *jit, u32 rs, u32 re)
/*
* Restore registers from "rs" (register start) to "re" (register end) on stack
*/
-static void restore_regs(struct bpf_jit *jit, u32 rs, u32 re, u32 stack_depth)
+static void restore_regs(struct bpf_jit *jit, u32 rs, u32 re)
{
- u32 off = STK_OFF_R6 + (rs - 6) * 8;
-
- if (jit->seen & SEEN_STACK)
- off += STK_OFF + stack_depth;
+ u32 off = jit->frame_off + offsetof(struct prog_frame, r6) + (rs - 6) * 8;
if (rs == re)
/* lg %rs,off(%r15) */
@@ -470,8 +503,7 @@ static int get_end(u16 seen_regs, int start)
* Save and restore clobbered registers (6-15) on stack.
* We save/restore registers in chunks with gap >= 2 registers.
*/
-static void save_restore_regs(struct bpf_jit *jit, int op, u32 stack_depth,
- u16 extra_regs)
+static void save_restore_regs(struct bpf_jit *jit, int op, u16 extra_regs)
{
u16 seen_regs = jit->seen_regs | extra_regs;
const int last = 15, save_restore_size = 6;
@@ -494,7 +526,7 @@ static void save_restore_regs(struct bpf_jit *jit, int op, u32 stack_depth,
if (op == REGS_SAVE)
save_regs(jit, rs, re);
else
- restore_regs(jit, rs, re, stack_depth);
+ restore_regs(jit, rs, re);
re++;
} while (re <= last);
}
@@ -503,7 +535,7 @@ static void bpf_skip(struct bpf_jit *jit, int size)
{
if (size >= 6 && !is_valid_rel(size)) {
/* brcl 0xf,size */
- EMIT6_PCREL_RIL(0xc0f4000000, size);
+ EMIT6_PCREL_RILC(0xc0040000, 0xf, size);
size -= 6;
} else if (size >= 4 && is_valid_rel(size)) {
/* brc 0xf,size */
@@ -544,18 +576,27 @@ static void bpf_jit_plt(struct bpf_plt *plt, void *ret, void *target)
{
memcpy(plt, &bpf_plt, sizeof(*plt));
plt->ret = ret;
- plt->target = target;
+ /*
+ * (target == NULL) implies that the branch to this PLT entry was
+ * patched and became a no-op. However, some CPU could have jumped
+ * to this PLT entry before patching and may be still executing it.
+ *
+ * Since the intention in this case is to make the PLT entry a no-op,
+ * make the target point to the return label instead of NULL.
+ */
+ plt->target = target ?: ret;
}
/*
* Emit function prologue
*
* Save registers and create stack frame if necessary.
- * See stack frame layout description in "bpf_jit.h"!
+ * Stack frame layout is described by struct prog_frame.
*/
-static void bpf_jit_prologue(struct bpf_jit *jit, struct bpf_prog *fp,
- u32 stack_depth)
+static void bpf_jit_prologue(struct bpf_jit *jit, struct bpf_prog *fp)
{
+ BUILD_BUG_ON(sizeof(struct prog_frame) != STACK_FRAME_OVERHEAD);
+
/* No-op for hotpatching */
/* brcl 0,prologue_plt */
EMIT6_PCREL_RILC(0xc0040000, 0, jit->prologue_plt);
@@ -563,8 +604,9 @@ static void bpf_jit_prologue(struct bpf_jit *jit, struct bpf_prog *fp,
if (!bpf_is_subprog(fp)) {
/* Initialize the tail call counter in the main program. */
- /* xc STK_OFF_TCCNT(4,%r15),STK_OFF_TCCNT(%r15) */
- _EMIT6(0xd703f000 | STK_OFF_TCCNT, 0xf000 | STK_OFF_TCCNT);
+ /* xc tail_call_cnt(4,%r15),tail_call_cnt(%r15) */
+ _EMIT6(0xd703f000 | offsetof(struct prog_frame, tail_call_cnt),
+ 0xf000 | offsetof(struct prog_frame, tail_call_cnt));
} else {
/*
* Skip the tail call counter initialization in subprograms.
@@ -587,7 +629,7 @@ static void bpf_jit_prologue(struct bpf_jit *jit, struct bpf_prog *fp,
jit->seen_regs |= NVREGS;
} else {
/* Save registers */
- save_restore_regs(jit, REGS_SAVE, stack_depth,
+ save_restore_regs(jit, REGS_SAVE,
fp->aux->exception_boundary ? NVREGS : 0);
}
/* Setup literal pool */
@@ -605,43 +647,32 @@ static void bpf_jit_prologue(struct bpf_jit *jit, struct bpf_prog *fp,
}
/* Setup stack and backchain */
if (is_first_pass(jit) || (jit->seen & SEEN_STACK)) {
- if (is_first_pass(jit) || (jit->seen & SEEN_FUNC))
- /* lgr %w1,%r15 (backchain) */
- EMIT4(0xb9040000, REG_W1, REG_15);
- /* la %bfp,STK_160_UNUSED(%r15) (BPF frame pointer) */
- EMIT4_DISP(0x41000000, BPF_REG_FP, REG_15, STK_160_UNUSED);
- /* aghi %r15,-STK_OFF */
- EMIT4_IMM(0xa70b0000, REG_15, -(STK_OFF + stack_depth));
- if (is_first_pass(jit) || (jit->seen & SEEN_FUNC))
- /* stg %w1,152(%r15) (backchain) */
- EMIT6_DISP_LH(0xe3000000, 0x0024, REG_W1, REG_0,
- REG_15, 152);
+ /* lgr %w1,%r15 (backchain) */
+ EMIT4(0xb9040000, REG_W1, REG_15);
+ /* la %bfp,unused_end(%r15) (BPF frame pointer) */
+ EMIT4_DISP(0x41000000, BPF_REG_FP, REG_15,
+ offsetofend(struct prog_frame, unused));
+ /* aghi %r15,-frame_off */
+ EMIT4_IMM(0xa70b0000, REG_15, -jit->frame_off);
+ /* stg %w1,backchain(%r15) */
+ EMIT6_DISP_LH(0xe3000000, 0x0024, REG_W1, REG_0,
+ REG_15,
+ offsetof(struct prog_frame, backchain));
}
}
/*
- * Emit an expoline for a jump that follows
- */
-static void emit_expoline(struct bpf_jit *jit)
-{
- /* exrl %r0,.+10 */
- EMIT6_PCREL_RIL(0xc6000000, jit->prg + 10);
- /* j . */
- EMIT4_PCREL(0xa7f40000, 0);
-}
-
-/*
- * Emit __s390_indirect_jump_r1 thunk if necessary
+ * Jump using a register either directly or via an expoline thunk
*/
-static void emit_r1_thunk(struct bpf_jit *jit)
-{
- if (nospec_uses_trampoline()) {
- jit->r1_thunk_ip = jit->prg;
- emit_expoline(jit);
- /* br %r1 */
- _EMIT2(0x07f1);
- }
-}
+#define EMIT_JUMP_REG(reg) do { \
+ if (nospec_uses_trampoline()) \
+ /* brcl 0xf,__s390_indirect_jump_rN */ \
+ EMIT6_PCREL_RILC_PTR(0xc0040000, 0x0f, \
+ __s390_indirect_jump_r ## reg); \
+ else \
+ /* br %rN */ \
+ _EMIT2(0x07f0 | reg); \
+} while (0)
/*
* Call r1 either directly or via __s390_indirect_jump_r1 thunk
@@ -650,7 +681,8 @@ static void call_r1(struct bpf_jit *jit)
{
if (nospec_uses_trampoline())
/* brasl %r14,__s390_indirect_jump_r1 */
- EMIT6_PCREL_RILB(0xc0050000, REG_14, jit->r1_thunk_ip);
+ EMIT6_PCREL_RILB_PTR(0xc0050000, REG_14,
+ __s390_indirect_jump_r1);
else
/* basr %r14,%r1 */
EMIT2(0x0d00, REG_14, REG_1);
@@ -659,23 +691,14 @@ static void call_r1(struct bpf_jit *jit)
/*
* Function epilogue
*/
-static void bpf_jit_epilogue(struct bpf_jit *jit, u32 stack_depth)
+static void bpf_jit_epilogue(struct bpf_jit *jit)
{
jit->exit_ip = jit->prg;
/* Load exit code: lgr %r2,%b0 */
EMIT4(0xb9040000, REG_2, BPF_REG_0);
/* Restore registers */
- save_restore_regs(jit, REGS_RESTORE, stack_depth, 0);
- if (nospec_uses_trampoline()) {
- jit->r14_thunk_ip = jit->prg;
- /* Generate __s390_indirect_jump_r14 thunk */
- emit_expoline(jit);
- }
- /* br %r14 */
- _EMIT2(0x07fe);
-
- if (is_first_pass(jit) || (jit->seen & SEEN_FUNC))
- emit_r1_thunk(jit);
+ save_restore_regs(jit, REGS_RESTORE, 0);
+ EMIT_JUMP_REG(14);
jit->prg = ALIGN(jit->prg, 8);
jit->prologue_plt = jit->prg;
@@ -856,7 +879,7 @@ static int sign_extend(struct bpf_jit *jit, int r, u8 size, u8 flags)
* stack space for the large switch statement.
*/
static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp,
- int i, bool extra_pass, u32 stack_depth)
+ int i, bool extra_pass)
{
struct bpf_insn *insn = &fp->insnsi[i];
s32 branch_oc_off = insn->off;
@@ -1777,9 +1800,10 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp,
* Note 2: We assume that the verifier does not let us call the
* main program, which clears the tail call counter on entry.
*/
- /* mvc STK_OFF_TCCNT(4,%r15),N(%r15) */
- _EMIT6(0xd203f000 | STK_OFF_TCCNT,
- 0xf000 | (STK_OFF_TCCNT + STK_OFF + stack_depth));
+ /* mvc tail_call_cnt(4,%r15),frame_off+tail_call_cnt(%r15) */
+ _EMIT6(0xd203f000 | offsetof(struct prog_frame, tail_call_cnt),
+ 0xf000 | (jit->frame_off +
+ offsetof(struct prog_frame, tail_call_cnt)));
/* Sign-extend the kfunc arguments. */
if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL) {
@@ -1830,10 +1854,8 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp,
* goto out;
*/
- if (jit->seen & SEEN_STACK)
- off = STK_OFF_TCCNT + STK_OFF + stack_depth;
- else
- off = STK_OFF_TCCNT;
+ off = jit->frame_off +
+ offsetof(struct prog_frame, tail_call_cnt);
/* lhi %w0,1 */
EMIT4_IMM(0xa7080000, REG_W0, 1);
/* laal %w1,%w0,off(%r15) */
@@ -1863,7 +1885,7 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp,
/*
* Restore registers before calling function
*/
- save_restore_regs(jit, REGS_RESTORE, stack_depth, 0);
+ save_restore_regs(jit, REGS_RESTORE, 0);
/*
* goto *(prog->bpf_func + tail_call_start);
@@ -1877,7 +1899,8 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp,
/* aghi %r1,tail_call_start */
EMIT4_IMM(0xa70b0000, REG_1, jit->tail_call_start);
/* brcl 0xf,__s390_indirect_jump_r1 */
- EMIT6_PCREL_RILC(0xc0040000, 0xf, jit->r1_thunk_ip);
+ EMIT6_PCREL_RILC_PTR(0xc0040000, 0xf,
+ __s390_indirect_jump_r1);
} else {
/* bc 0xf,tail_call_start(%r1) */
_EMIT4(0x47f01000 + jit->tail_call_start);
@@ -2155,7 +2178,7 @@ static int bpf_set_addr(struct bpf_jit *jit, int i)
* Compile eBPF program into s390x code
*/
static int bpf_jit_prog(struct bpf_jit *jit, struct bpf_prog *fp,
- bool extra_pass, u32 stack_depth)
+ bool extra_pass)
{
int i, insn_count, lit32_size, lit64_size;
u64 kern_arena;
@@ -2164,24 +2187,30 @@ static int bpf_jit_prog(struct bpf_jit *jit, struct bpf_prog *fp,
jit->lit64 = jit->lit64_start;
jit->prg = 0;
jit->excnt = 0;
+ if (is_first_pass(jit) || (jit->seen & SEEN_STACK))
+ jit->frame_off = sizeof(struct prog_frame) -
+ offsetofend(struct prog_frame, unused) +
+ round_up(fp->aux->stack_depth, 8);
+ else
+ jit->frame_off = 0;
kern_arena = bpf_arena_get_kern_vm_start(fp->aux->arena);
if (kern_arena)
jit->kern_arena = _EMIT_CONST_U64(kern_arena);
jit->user_arena = bpf_arena_get_user_vm_start(fp->aux->arena);
- bpf_jit_prologue(jit, fp, stack_depth);
+ bpf_jit_prologue(jit, fp);
if (bpf_set_addr(jit, 0) < 0)
return -1;
for (i = 0; i < fp->len; i += insn_count) {
- insn_count = bpf_jit_insn(jit, fp, i, extra_pass, stack_depth);
+ insn_count = bpf_jit_insn(jit, fp, i, extra_pass);
if (insn_count < 0)
return -1;
/* Next instruction address */
if (bpf_set_addr(jit, i + insn_count) < 0)
return -1;
}
- bpf_jit_epilogue(jit, stack_depth);
+ bpf_jit_epilogue(jit);
lit32_size = jit->lit32 - jit->lit32_start;
lit64_size = jit->lit64 - jit->lit64_start;
@@ -2257,7 +2286,6 @@ static struct bpf_binary_header *bpf_jit_alloc(struct bpf_jit *jit,
*/
struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
{
- u32 stack_depth = round_up(fp->aux->stack_depth, 8);
struct bpf_prog *tmp, *orig_fp = fp;
struct bpf_binary_header *header;
struct s390_jit_data *jit_data;
@@ -2310,7 +2338,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
* - 3: Calculate program size and addrs array
*/
for (pass = 1; pass <= 3; pass++) {
- if (bpf_jit_prog(&jit, fp, extra_pass, stack_depth)) {
+ if (bpf_jit_prog(&jit, fp, extra_pass)) {
fp = orig_fp;
goto free_addrs;
}
@@ -2324,7 +2352,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
goto free_addrs;
}
skip_init_ctx:
- if (bpf_jit_prog(&jit, fp, extra_pass, stack_depth)) {
+ if (bpf_jit_prog(&jit, fp, extra_pass)) {
bpf_jit_binary_free(header);
fp = orig_fp;
goto free_addrs;
@@ -2585,9 +2613,8 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im,
if (nr_stack_args > MAX_NR_STACK_ARGS)
return -ENOTSUPP;
- /* Return to %r14, since func_addr and %r0 are not available. */
- if ((!func_addr && !(flags & BPF_TRAMP_F_ORIG_STACK)) ||
- (flags & BPF_TRAMP_F_INDIRECT))
+ /* Return to %r14 in the struct_ops case. */
+ if (flags & BPF_TRAMP_F_INDIRECT)
flags |= BPF_TRAMP_F_SKIP_FRAME;
/*
@@ -2645,9 +2672,10 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im,
/* stg %r1,backchain_off(%r15) */
EMIT6_DISP_LH(0xe3000000, 0x0024, REG_1, REG_0, REG_15,
tjit->backchain_off);
- /* mvc tccnt_off(4,%r15),stack_size+STK_OFF_TCCNT(%r15) */
+ /* mvc tccnt_off(4,%r15),stack_size+tail_call_cnt(%r15) */
_EMIT6(0xd203f000 | tjit->tccnt_off,
- 0xf000 | (tjit->stack_size + STK_OFF_TCCNT));
+ 0xf000 | (tjit->stack_size +
+ offsetof(struct prog_frame, tail_call_cnt)));
/* stmg %r2,%rN,fwd_reg_args_off(%r15) */
if (nr_reg_args)
EMIT6_DISP_LH(0xeb000000, 0x0024, REG_2,
@@ -2784,8 +2812,9 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im,
(nr_stack_args * sizeof(u64) - 1) << 16 |
tjit->stack_args_off,
0xf000 | tjit->orig_stack_args_off);
- /* mvc STK_OFF_TCCNT(4,%r15),tccnt_off(%r15) */
- _EMIT6(0xd203f000 | STK_OFF_TCCNT, 0xf000 | tjit->tccnt_off);
+ /* mvc tail_call_cnt(4,%r15),tccnt_off(%r15) */
+ _EMIT6(0xd203f000 | offsetof(struct prog_frame, tail_call_cnt),
+ 0xf000 | tjit->tccnt_off);
/* lgr %r1,%r8 */
EMIT4(0xb9040000, REG_1, REG_8);
/* %r1() */
@@ -2842,22 +2871,16 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im,
if (flags & (BPF_TRAMP_F_CALL_ORIG | BPF_TRAMP_F_RET_FENTRY_RET))
EMIT6_DISP_LH(0xe3000000, 0x0004, REG_2, REG_0, REG_15,
tjit->retval_off);
- /* mvc stack_size+STK_OFF_TCCNT(4,%r15),tccnt_off(%r15) */
- _EMIT6(0xd203f000 | (tjit->stack_size + STK_OFF_TCCNT),
+ /* mvc stack_size+tail_call_cnt(4,%r15),tccnt_off(%r15) */
+ _EMIT6(0xd203f000 | (tjit->stack_size +
+ offsetof(struct prog_frame, tail_call_cnt)),
0xf000 | tjit->tccnt_off);
/* aghi %r15,stack_size */
EMIT4_IMM(0xa70b0000, REG_15, tjit->stack_size);
- /* Emit an expoline for the following indirect jump. */
- if (nospec_uses_trampoline())
- emit_expoline(jit);
if (flags & BPF_TRAMP_F_SKIP_FRAME)
- /* br %r14 */
- _EMIT2(0x07fe);
+ EMIT_JUMP_REG(14);
else
- /* br %r1 */
- _EMIT2(0x07f1);
-
- emit_r1_thunk(jit);
+ EMIT_JUMP_REG(1);
return 0;
}
@@ -2919,10 +2942,16 @@ bool bpf_jit_supports_arena(void)
bool bpf_jit_supports_insn(struct bpf_insn *insn, bool in_arena)
{
- /*
- * Currently the verifier uses this function only to check which
- * atomic stores to arena are supported, and they all are.
- */
+ if (!in_arena)
+ return true;
+ switch (insn->code) {
+ case BPF_STX | BPF_ATOMIC | BPF_B:
+ case BPF_STX | BPF_ATOMIC | BPF_H:
+ case BPF_STX | BPF_ATOMIC | BPF_W:
+ case BPF_STX | BPF_ATOMIC | BPF_DW:
+ if (bpf_atomic_is_load_store(insn))
+ return false;
+ }
return true;
}
diff --git a/arch/s390/net/pnet.c b/arch/s390/net/pnet.c
index 79211bec0fc8..03089ef479b2 100644
--- a/arch/s390/net/pnet.c
+++ b/arch/s390/net/pnet.c
@@ -6,6 +6,7 @@
*/
#include <linux/device.h>
+#include <linux/export.h>
#include <linux/module.h>
#include <linux/pci.h>
#include <linux/types.h>
diff --git a/arch/s390/pci/Makefile b/arch/s390/pci/Makefile
index df73c5182990..1810e0944a4e 100644
--- a/arch/s390/pci/Makefile
+++ b/arch/s390/pci/Makefile
@@ -5,6 +5,6 @@
obj-$(CONFIG_PCI) += pci.o pci_irq.o pci_clp.o \
pci_event.o pci_debug.o pci_insn.o pci_mmio.o \
- pci_bus.o pci_kvm_hook.o pci_report.o
+ pci_bus.o pci_kvm_hook.o pci_report.o pci_fixup.o
obj-$(CONFIG_PCI_IOV) += pci_iov.o
obj-$(CONFIG_SYSFS) += pci_sysfs.o
diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c
index 88f72745fa59..cd6676c2d602 100644
--- a/arch/s390/pci/pci.c
+++ b/arch/s390/pci/pci.c
@@ -31,6 +31,7 @@
#include <linux/lockdep.h>
#include <linux/list_sort.h>
+#include <asm/machine.h>
#include <asm/isc.h>
#include <asm/airq.h>
#include <asm/facility.h>
@@ -44,6 +45,7 @@
/* list of all detected zpci devices */
static LIST_HEAD(zpci_list);
static DEFINE_SPINLOCK(zpci_list_lock);
+static DEFINE_MUTEX(zpci_add_remove_lock);
static DECLARE_BITMAP(zpci_domain, ZPCI_DOMAIN_BITMAP_SIZE);
static DEFINE_SPINLOCK(zpci_domain_lock);
@@ -69,6 +71,15 @@ EXPORT_SYMBOL_GPL(zpci_aipb);
struct airq_iv *zpci_aif_sbv;
EXPORT_SYMBOL_GPL(zpci_aif_sbv);
+void zpci_zdev_put(struct zpci_dev *zdev)
+{
+ if (!zdev)
+ return;
+ mutex_lock(&zpci_add_remove_lock);
+ kref_put_lock(&zdev->kref, zpci_release_device, &zpci_list_lock);
+ mutex_unlock(&zpci_add_remove_lock);
+}
+
struct zpci_dev *get_zdev_by_fid(u32 fid)
{
struct zpci_dev *tmp, *zdev = NULL;
@@ -124,14 +135,13 @@ int zpci_register_ioat(struct zpci_dev *zdev, u8 dmaas,
struct zpci_fib fib = {0};
u8 cc;
- WARN_ON_ONCE(iota & 0x3fff);
fib.pba = base;
/* Work around off by one in ISM virt device */
if (zdev->pft == PCI_FUNC_TYPE_ISM && limit > base)
fib.pal = limit + (1 << 12);
else
fib.pal = limit;
- fib.iota = iota | ZPCI_IOTA_RTTO_FLAG;
+ fib.iota = iota;
fib.gd = zdev->gisa;
cc = zpci_mod_fc(req, &fib, status);
if (cc)
@@ -255,7 +265,7 @@ resource_size_t pcibios_align_resource(void *data, const struct resource *res,
}
void __iomem *ioremap_prot(phys_addr_t phys_addr, size_t size,
- unsigned long prot)
+ pgprot_t prot)
{
/*
* When PCI MIO instructions are unavailable the "physical" address
@@ -265,7 +275,7 @@ void __iomem *ioremap_prot(phys_addr_t phys_addr, size_t size,
if (!static_branch_unlikely(&have_mio))
return (void __iomem *)phys_addr;
- return generic_ioremap_prot(phys_addr, size, __pgprot(prot));
+ return generic_ioremap_prot(phys_addr, size, prot);
}
EXPORT_SYMBOL(ioremap_prot);
@@ -690,6 +700,23 @@ int zpci_enable_device(struct zpci_dev *zdev)
}
EXPORT_SYMBOL_GPL(zpci_enable_device);
+int zpci_reenable_device(struct zpci_dev *zdev)
+{
+ u8 status;
+ int rc;
+
+ rc = zpci_enable_device(zdev);
+ if (rc)
+ return rc;
+
+ rc = zpci_iommu_register_ioat(zdev, &status);
+ if (rc)
+ zpci_disable_device(zdev);
+
+ return rc;
+}
+EXPORT_SYMBOL_GPL(zpci_reenable_device);
+
int zpci_disable_device(struct zpci_dev *zdev)
{
u32 fh = zdev->fh;
@@ -739,7 +766,6 @@ EXPORT_SYMBOL_GPL(zpci_disable_device);
*/
int zpci_hot_reset_device(struct zpci_dev *zdev)
{
- u8 status;
int rc;
lockdep_assert_held(&zdev->state_lock);
@@ -758,19 +784,9 @@ int zpci_hot_reset_device(struct zpci_dev *zdev)
return rc;
}
- rc = zpci_enable_device(zdev);
- if (rc)
- return rc;
+ rc = zpci_reenable_device(zdev);
- if (zdev->dma_table)
- rc = zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma,
- virt_to_phys(zdev->dma_table), &status);
- if (rc) {
- zpci_disable_device(zdev);
- return rc;
- }
-
- return 0;
+ return rc;
}
/**
@@ -831,6 +847,7 @@ int zpci_add_device(struct zpci_dev *zdev)
{
int rc;
+ mutex_lock(&zpci_add_remove_lock);
zpci_dbg(1, "add fid:%x, fh:%x, c:%d\n", zdev->fid, zdev->fh, zdev->state);
rc = zpci_init_iommu(zdev);
if (rc)
@@ -844,12 +861,14 @@ int zpci_add_device(struct zpci_dev *zdev)
spin_lock(&zpci_list_lock);
list_add_tail(&zdev->entry, &zpci_list);
spin_unlock(&zpci_list_lock);
+ mutex_unlock(&zpci_add_remove_lock);
return 0;
error_destroy_iommu:
zpci_destroy_iommu(zdev);
error:
zpci_dbg(0, "add fid:%x, rc:%d\n", zdev->fid, rc);
+ mutex_unlock(&zpci_add_remove_lock);
return rc;
}
@@ -919,21 +938,20 @@ int zpci_deconfigure_device(struct zpci_dev *zdev)
* @zdev: the zpci_dev that was reserved
*
* Handle the case that a given zPCI function was reserved by another system.
- * After a call to this function the zpci_dev can not be found via
- * get_zdev_by_fid() anymore but may still be accessible via existing
- * references though it will not be functional anymore.
*/
void zpci_device_reserved(struct zpci_dev *zdev)
{
- /*
- * Remove device from zpci_list as it is going away. This also
- * makes sure we ignore subsequent zPCI events for this device.
- */
- spin_lock(&zpci_list_lock);
- list_del(&zdev->entry);
- spin_unlock(&zpci_list_lock);
+ lockdep_assert_held(&zdev->state_lock);
+ /* We may declare the device reserved multiple times */
+ if (zdev->state == ZPCI_FN_STATE_RESERVED)
+ return;
zdev->state = ZPCI_FN_STATE_RESERVED;
zpci_dbg(3, "rsv fid:%x\n", zdev->fid);
+ /*
+ * The underlying device is gone. Allow the zdev to be freed
+ * as soon as all other references are gone by accounting for
+ * the removal as a dropped reference.
+ */
zpci_zdev_put(zdev);
}
@@ -941,13 +959,14 @@ void zpci_release_device(struct kref *kref)
{
struct zpci_dev *zdev = container_of(kref, struct zpci_dev, kref);
+ lockdep_assert_held(&zpci_add_remove_lock);
WARN_ON(zdev->state != ZPCI_FN_STATE_RESERVED);
-
- if (zdev->zbus->bus)
- zpci_bus_remove_device(zdev, false);
-
- if (zdev_enabled(zdev))
- zpci_disable_device(zdev);
+ /*
+ * We already hold zpci_list_lock thanks to kref_put_lock().
+ * This makes sure no new reference can be taken from the list.
+ */
+ list_del(&zdev->entry);
+ spin_unlock(&zpci_list_lock);
if (zdev->has_hp_slot)
zpci_exit_slot(zdev);
@@ -1073,7 +1092,7 @@ char * __init pcibios_setup(char *str)
return NULL;
}
if (!strcmp(str, "nomio")) {
- get_lowcore()->machine_flags &= ~MACHINE_FLAG_PCI_MIO;
+ clear_machine_feature(MFEATURE_PCI_MIO);
return NULL;
}
if (!strcmp(str, "force_floating")) {
@@ -1148,7 +1167,7 @@ static int __init pci_base_init(void)
return 0;
}
- if (MACHINE_HAS_PCI_MIO) {
+ if (test_machine_feature(MFEATURE_PCI_MIO)) {
static_branch_enable(&have_mio);
system_ctl_set_bit(2, CR2_MIO_ADDRESSING_BIT);
}
diff --git a/arch/s390/pci/pci_bus.c b/arch/s390/pci/pci_bus.c
index d5ace00d10f0..45a1c36c5a54 100644
--- a/arch/s390/pci/pci_bus.c
+++ b/arch/s390/pci/pci_bus.c
@@ -13,12 +13,12 @@
#include <linux/kernel.h>
#include <linux/slab.h>
#include <linux/err.h>
-#include <linux/export.h>
#include <linux/delay.h>
#include <linux/seq_file.h>
#include <linux/jump_label.h>
#include <linux/pci.h>
#include <linux/printk.h>
+#include <linux/dma-direct.h>
#include <asm/pci_clp.h>
#include <asm/pci_dma.h>
@@ -171,7 +171,6 @@ void zpci_bus_scan_busses(void)
static bool zpci_bus_is_multifunction_root(struct zpci_dev *zdev)
{
return !s390_pci_no_rid && zdev->rid_available &&
- zpci_is_device_configured(zdev) &&
!zdev->vfn;
}
@@ -284,10 +283,32 @@ static struct zpci_bus *zpci_bus_alloc(int topo, bool topo_is_tid)
return zbus;
}
+static void pci_dma_range_setup(struct pci_dev *pdev)
+{
+ struct zpci_dev *zdev = to_zpci(pdev);
+ u64 aligned_end, size;
+ dma_addr_t dma_start;
+ int ret;
+
+ dma_start = PAGE_ALIGN(zdev->start_dma);
+ aligned_end = PAGE_ALIGN_DOWN(zdev->end_dma + 1);
+ if (aligned_end >= dma_start)
+ size = aligned_end - dma_start;
+ else
+ size = 0;
+ WARN_ON_ONCE(size == 0);
+
+ ret = dma_direct_set_offset(&pdev->dev, 0, dma_start, size);
+ if (ret)
+ pr_err("Failed to allocate DMA range map for %s\n", pci_name(pdev));
+}
+
void pcibios_bus_add_device(struct pci_dev *pdev)
{
struct zpci_dev *zdev = to_zpci(pdev);
+ pci_dma_range_setup(pdev);
+
/*
* With pdev->no_vf_scan the common PCI probing code does not
* perform PF/VF linking.
@@ -332,6 +353,20 @@ error:
return rc;
}
+static bool zpci_bus_is_isolated_vf(struct zpci_bus *zbus, struct zpci_dev *zdev)
+{
+ struct pci_dev *pdev;
+
+ if (!zdev->vfn)
+ return false;
+
+ pdev = zpci_iov_find_parent_pf(zbus, zdev);
+ if (!pdev)
+ return true;
+ pci_dev_put(pdev);
+ return false;
+}
+
int zpci_bus_device_register(struct zpci_dev *zdev, struct pci_ops *ops)
{
bool topo_is_tid = zdev->tid_avail;
@@ -346,6 +381,15 @@ int zpci_bus_device_register(struct zpci_dev *zdev, struct pci_ops *ops)
topo = topo_is_tid ? zdev->tid : zdev->pchid;
zbus = zpci_bus_get(topo, topo_is_tid);
+ /*
+ * An isolated VF gets its own domain/bus even if there exists
+ * a matching domain/bus already
+ */
+ if (zbus && zpci_bus_is_isolated_vf(zbus, zdev)) {
+ zpci_bus_put(zbus);
+ zbus = NULL;
+ }
+
if (!zbus) {
zbus = zpci_bus_alloc(topo, topo_is_tid);
if (!zbus)
diff --git a/arch/s390/pci/pci_bus.h b/arch/s390/pci/pci_bus.h
index e86a9419d233..ae3d7a9159bd 100644
--- a/arch/s390/pci/pci_bus.h
+++ b/arch/s390/pci/pci_bus.h
@@ -21,11 +21,8 @@ int zpci_bus_scan_device(struct zpci_dev *zdev);
void zpci_bus_remove_device(struct zpci_dev *zdev, bool set_error);
void zpci_release_device(struct kref *kref);
-static inline void zpci_zdev_put(struct zpci_dev *zdev)
-{
- if (zdev)
- kref_put(&zdev->kref, zpci_release_device);
-}
+
+void zpci_zdev_put(struct zpci_dev *zdev);
static inline void zpci_zdev_get(struct zpci_dev *zdev)
{
diff --git a/arch/s390/pci/pci_clp.c b/arch/s390/pci/pci_clp.c
index 14bf7e8d06b7..241f7251c873 100644
--- a/arch/s390/pci/pci_clp.c
+++ b/arch/s390/pci/pci_clp.c
@@ -56,7 +56,7 @@ static inline int clp_get_ilp(unsigned long *ilp)
int cc, exception;
exception = 1;
- asm volatile (
+ asm_inline volatile (
" .insn rrf,0xb9a00000,%[mask],%[cmd],8,0\n"
"0: lhi %[exc],0\n"
"1:\n"
@@ -79,7 +79,7 @@ static __always_inline int clp_req(void *data, unsigned int lps)
u64 ignored;
exception = 1;
- asm volatile (
+ asm_inline volatile (
" .insn rrf,0xb9a00000,%[ign],%[req],0,%[lps]\n"
"0: lhi %[exc],0\n"
"1:\n"
@@ -112,6 +112,7 @@ static void clp_store_query_pci_fngrp(struct zpci_dev *zdev,
zdev->version = response->version;
zdev->maxstbl = response->maxstbl;
zdev->dtsm = response->dtsm;
+ zdev->rtr_avail = response->rtr;
switch (response->version) {
case 1:
@@ -427,6 +428,8 @@ static void __clp_add(struct clp_fh_list_entry *entry, void *data)
return;
}
zdev = zpci_create_device(entry->fid, entry->fh, entry->config_state);
+ if (IS_ERR(zdev))
+ return;
list_add_tail(&zdev->entry, scan_list);
}
diff --git a/arch/s390/pci/pci_event.c b/arch/s390/pci/pci_event.c
index 7bd7721c1239..d930416d4c90 100644
--- a/arch/s390/pci/pci_event.c
+++ b/arch/s390/pci/pci_event.c
@@ -54,6 +54,7 @@ static inline bool ers_result_indicates_abort(pci_ers_result_t ers_res)
case PCI_ERS_RESULT_CAN_RECOVER:
case PCI_ERS_RESULT_RECOVERED:
case PCI_ERS_RESULT_NEED_RESET:
+ case PCI_ERS_RESULT_NONE:
return false;
default:
return true;
@@ -78,10 +79,6 @@ static bool is_driver_supported(struct pci_driver *driver)
return false;
if (!driver->err_handler->error_detected)
return false;
- if (!driver->err_handler->slot_reset)
- return false;
- if (!driver->err_handler->resume)
- return false;
return true;
}
@@ -106,6 +103,10 @@ static pci_ers_result_t zpci_event_do_error_state_clear(struct pci_dev *pdev,
struct zpci_dev *zdev = to_zpci(pdev);
int rc;
+ /* The underlying device may have been disabled by the event */
+ if (!zdev_enabled(zdev))
+ return PCI_ERS_RESULT_NEED_RESET;
+
pr_info("%s: Unblocking device access for examination\n", pci_name(pdev));
rc = zpci_reset_load_store_blocked(zdev);
if (rc) {
@@ -114,16 +115,18 @@ static pci_ers_result_t zpci_event_do_error_state_clear(struct pci_dev *pdev,
return PCI_ERS_RESULT_NEED_RESET;
}
- if (driver->err_handler->mmio_enabled) {
+ if (driver->err_handler->mmio_enabled)
ers_res = driver->err_handler->mmio_enabled(pdev);
- if (ers_result_indicates_abort(ers_res)) {
- pr_info("%s: Automatic recovery failed after MMIO re-enable\n",
- pci_name(pdev));
- return ers_res;
- } else if (ers_res == PCI_ERS_RESULT_NEED_RESET) {
- pr_debug("%s: Driver needs reset to recover\n", pci_name(pdev));
- return ers_res;
- }
+ else
+ ers_res = PCI_ERS_RESULT_NONE;
+
+ if (ers_result_indicates_abort(ers_res)) {
+ pr_info("%s: Automatic recovery failed after MMIO re-enable\n",
+ pci_name(pdev));
+ return ers_res;
+ } else if (ers_res == PCI_ERS_RESULT_NEED_RESET) {
+ pr_debug("%s: Driver needs reset to recover\n", pci_name(pdev));
+ return ers_res;
}
pr_debug("%s: Unblocking DMA\n", pci_name(pdev));
@@ -150,7 +153,12 @@ static pci_ers_result_t zpci_event_do_reset(struct pci_dev *pdev,
return ers_res;
}
pdev->error_state = pci_channel_io_normal;
- ers_res = driver->err_handler->slot_reset(pdev);
+
+ if (driver->err_handler->slot_reset)
+ ers_res = driver->err_handler->slot_reset(pdev);
+ else
+ ers_res = PCI_ERS_RESULT_NONE;
+
if (ers_result_indicates_abort(ers_res)) {
pr_info("%s: Automatic recovery failed after slot reset\n", pci_name(pdev));
return ers_res;
@@ -214,7 +222,7 @@ static pci_ers_result_t zpci_event_attempt_error_recovery(struct pci_dev *pdev)
goto out_unlock;
}
- if (ers_res == PCI_ERS_RESULT_CAN_RECOVER) {
+ if (ers_res != PCI_ERS_RESULT_NEED_RESET) {
ers_res = zpci_event_do_error_state_clear(pdev, driver);
if (ers_result_indicates_abort(ers_res)) {
status_str = "failed (abort on MMIO enable)";
@@ -225,6 +233,16 @@ static pci_ers_result_t zpci_event_attempt_error_recovery(struct pci_dev *pdev)
if (ers_res == PCI_ERS_RESULT_NEED_RESET)
ers_res = zpci_event_do_reset(pdev, driver);
+ /*
+ * ers_res can be PCI_ERS_RESULT_NONE either because the driver
+ * decided to return it, indicating that it abstains from voting
+ * on how to recover, or because it didn't implement the callback.
+ * Both cases assume, that if there is nothing else causing a
+ * disconnect, we recovered successfully.
+ */
+ if (ers_res == PCI_ERS_RESULT_NONE)
+ ers_res = PCI_ERS_RESULT_RECOVERED;
+
if (ers_res != PCI_ERS_RESULT_RECOVERED) {
pr_err("%s: Automatic recovery failed; operator intervention is required\n",
pci_name(pdev));
@@ -273,6 +291,8 @@ static void __zpci_event_error(struct zpci_ccdf_err *ccdf)
struct zpci_dev *zdev = get_zdev_by_fid(ccdf->fid);
struct pci_dev *pdev = NULL;
pci_ers_result_t ers_res;
+ u32 fh = 0;
+ int rc;
zpci_dbg(3, "err fid:%x, fh:%x, pec:%x\n",
ccdf->fid, ccdf->fh, ccdf->pec);
@@ -281,6 +301,15 @@ static void __zpci_event_error(struct zpci_ccdf_err *ccdf)
if (zdev) {
mutex_lock(&zdev->state_lock);
+ rc = clp_refresh_fh(zdev->fid, &fh);
+ if (rc)
+ goto no_pdev;
+ if (!fh || ccdf->fh != fh) {
+ /* Ignore events with stale handles */
+ zpci_dbg(3, "err fid:%x, fh:%x (stale %x)\n",
+ ccdf->fid, fh, ccdf->fh);
+ goto no_pdev;
+ }
zpci_update_fh(zdev, ccdf->fh);
if (zdev->zbus->bus)
pdev = pci_get_slot(zdev->zbus->bus, zdev->devfn);
@@ -335,6 +364,22 @@ static void zpci_event_hard_deconfigured(struct zpci_dev *zdev, u32 fh)
zdev->state = ZPCI_FN_STATE_STANDBY;
}
+static void zpci_event_reappear(struct zpci_dev *zdev)
+{
+ lockdep_assert_held(&zdev->state_lock);
+ /*
+ * The zdev is in the reserved state. This means that it was presumed to
+ * go away but there are still undropped references. Now, the platform
+ * announced its availability again. Bring back the lingering zdev
+ * to standby. This is safe because we hold a temporary reference
+ * now so that it won't go away. Account for the re-appearance of the
+ * underlying device by incrementing the reference count.
+ */
+ zdev->state = ZPCI_FN_STATE_STANDBY;
+ zpci_zdev_get(zdev);
+ zpci_dbg(1, "rea fid:%x, fh:%x\n", zdev->fid, zdev->fh);
+}
+
static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf)
{
struct zpci_dev *zdev = get_zdev_by_fid(ccdf->fid);
@@ -358,8 +403,10 @@ static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf)
break;
}
} else {
+ if (zdev->state == ZPCI_FN_STATE_RESERVED)
+ zpci_event_reappear(zdev);
/* the configuration request may be stale */
- if (zdev->state != ZPCI_FN_STATE_STANDBY)
+ else if (zdev->state != ZPCI_FN_STATE_STANDBY)
break;
zdev->state = ZPCI_FN_STATE_CONFIGURED;
}
@@ -375,6 +422,8 @@ static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf)
break;
}
} else {
+ if (zdev->state == ZPCI_FN_STATE_RESERVED)
+ zpci_event_reappear(zdev);
zpci_update_fh(zdev, ccdf->fh);
}
break;
diff --git a/arch/s390/pci/pci_fixup.c b/arch/s390/pci/pci_fixup.c
new file mode 100644
index 000000000000..35688b645098
--- /dev/null
+++ b/arch/s390/pci/pci_fixup.c
@@ -0,0 +1,23 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Exceptions for specific devices,
+ *
+ * Copyright IBM Corp. 2025
+ *
+ * Author(s):
+ * Niklas Schnelle <schnelle@linux.ibm.com>
+ */
+#include <linux/pci.h>
+
+static void zpci_ism_bar_no_mmap(struct pci_dev *pdev)
+{
+ /*
+ * ISM's BAR is special. Drivers written for ISM know
+ * how to handle this but others need to be aware of their
+ * special nature e.g. to prevent attempts to mmap() it.
+ */
+ pdev->non_mappable_bars = 1;
+}
+DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_IBM,
+ PCI_DEVICE_ID_IBM_ISM,
+ zpci_ism_bar_no_mmap);
diff --git a/arch/s390/pci/pci_insn.c b/arch/s390/pci/pci_insn.c
index f5a75ea7629a..eb978c8012be 100644
--- a/arch/s390/pci/pci_insn.c
+++ b/arch/s390/pci/pci_insn.c
@@ -160,7 +160,7 @@ static inline int ____pcilg(u64 *data, u64 req, u64 offset, u8 *status)
u64 __data;
exception = 1;
- asm volatile (
+ asm_inline volatile (
" .insn rre,0xb9d20000,%[data],%[req_off]\n"
"0: lhi %[exc],0\n"
"1:\n"
@@ -229,7 +229,7 @@ static inline int __pcilg_mio(u64 *data, u64 ioaddr, u64 len, u8 *status)
u64 __data;
exception = 1;
- asm volatile (
+ asm_inline volatile (
" .insn rre,0xb9d60000,%[data],%[ioaddr_len]\n"
"0: lhi %[exc],0\n"
"1:\n"
@@ -267,7 +267,7 @@ static inline int __pcistg(u64 data, u64 req, u64 offset, u8 *status)
int cc, exception;
exception = 1;
- asm volatile (
+ asm_inline volatile (
" .insn rre,0xb9d00000,%[data],%[req_off]\n"
"0: lhi %[exc],0\n"
"1:\n"
@@ -321,7 +321,7 @@ static inline int __pcistg_mio(u64 data, u64 ioaddr, u64 len, u8 *status)
int cc, exception;
exception = 1;
- asm volatile (
+ asm_inline volatile (
" .insn rre,0xb9d40000,%[data],%[ioaddr_len]\n"
"0: lhi %[exc],0\n"
"1:\n"
@@ -356,7 +356,7 @@ static inline int __pcistb(const u64 *data, u64 req, u64 offset, u8 *status)
int cc, exception;
exception = 1;
- asm volatile (
+ asm_inline volatile (
" .insn rsy,0xeb00000000d0,%[req],%[offset],%[data]\n"
"0: lhi %[exc],0\n"
"1:\n"
@@ -410,7 +410,7 @@ static inline int __pcistb_mio(const u64 *data, u64 ioaddr, u64 len, u8 *status)
int cc, exception;
exception = 1;
- asm volatile (
+ asm_inline volatile (
" .insn rsy,0xeb00000000d4,%[len],%[ioaddr],%[data]\n"
"0: lhi %[exc],0\n"
"1:\n"
diff --git a/arch/s390/pci/pci_iov.c b/arch/s390/pci/pci_iov.c
index ead062bf2b41..191e56a623f6 100644
--- a/arch/s390/pci/pci_iov.c
+++ b/arch/s390/pci/pci_iov.c
@@ -60,18 +60,35 @@ static int zpci_iov_link_virtfn(struct pci_dev *pdev, struct pci_dev *virtfn, in
return 0;
}
-int zpci_iov_setup_virtfn(struct zpci_bus *zbus, struct pci_dev *virtfn, int vfn)
+/**
+ * zpci_iov_find_parent_pf - Find the parent PF, if any, of the given function
+ * @zbus: The bus that the PCI function is on, or would be added on
+ * @zdev: The PCI function
+ *
+ * Finds the parent PF, if it exists and is configured, of the given PCI function
+ * and increments its refcount. Th PF is searched for on the provided bus so the
+ * caller has to ensure that this is the correct bus to search. This function may
+ * be used before adding the PCI function to a zbus.
+ *
+ * Return: Pointer to the struct pci_dev of the parent PF or NULL if it not
+ * found. If the function is not a VF or has no RequesterID information,
+ * NULL is returned as well.
+ */
+struct pci_dev *zpci_iov_find_parent_pf(struct zpci_bus *zbus, struct zpci_dev *zdev)
{
- int i, cand_devfn;
- struct zpci_dev *zdev;
+ int i, vfid, devfn, cand_devfn;
struct pci_dev *pdev;
- int vfid = vfn - 1; /* Linux' vfid's start at 0 vfn at 1*/
- int rc = 0;
if (!zbus->multifunction)
- return 0;
-
- /* If the parent PF for the given VF is also configured in the
+ return NULL;
+ /* Non-VFs and VFs without RID available don't have a parent */
+ if (!zdev->vfn || !zdev->rid_available)
+ return NULL;
+ /* Linux vfid starts at 0 vfn at 1 */
+ vfid = zdev->vfn - 1;
+ devfn = zdev->rid & ZPCI_RID_MASK_DEVFN;
+ /*
+ * If the parent PF for the given VF is also configured in the
* instance, it must be on the same zbus.
* We can then identify the parent PF by checking what
* devfn the VF would have if it belonged to that PF using the PF's
@@ -85,15 +102,26 @@ int zpci_iov_setup_virtfn(struct zpci_bus *zbus, struct pci_dev *virtfn, int vfn
if (!pdev)
continue;
cand_devfn = pci_iov_virtfn_devfn(pdev, vfid);
- if (cand_devfn == virtfn->devfn) {
- rc = zpci_iov_link_virtfn(pdev, virtfn, vfid);
- /* balance pci_get_slot() */
- pci_dev_put(pdev);
- break;
- }
+ if (cand_devfn == devfn)
+ return pdev;
/* balance pci_get_slot() */
pci_dev_put(pdev);
}
}
+ return NULL;
+}
+
+int zpci_iov_setup_virtfn(struct zpci_bus *zbus, struct pci_dev *virtfn, int vfn)
+{
+ struct zpci_dev *zdev = to_zpci(virtfn);
+ struct pci_dev *pdev_pf;
+ int rc = 0;
+
+ pdev_pf = zpci_iov_find_parent_pf(zbus, zdev);
+ if (pdev_pf) {
+ /* Linux' vfids start at 0 while zdev->vfn starts at 1 */
+ rc = zpci_iov_link_virtfn(pdev_pf, virtfn, zdev->vfn - 1);
+ pci_dev_put(pdev_pf);
+ }
return rc;
}
diff --git a/arch/s390/pci/pci_iov.h b/arch/s390/pci/pci_iov.h
index e3fa4e77fc86..d2c2793eb0f3 100644
--- a/arch/s390/pci/pci_iov.h
+++ b/arch/s390/pci/pci_iov.h
@@ -19,6 +19,8 @@ void zpci_iov_map_resources(struct pci_dev *pdev);
int zpci_iov_setup_virtfn(struct zpci_bus *zbus, struct pci_dev *virtfn, int vfn);
+struct pci_dev *zpci_iov_find_parent_pf(struct zpci_bus *zbus, struct zpci_dev *zdev);
+
#else /* CONFIG_PCI_IOV */
static inline void zpci_iov_remove_virtfn(struct pci_dev *pdev, int vfn) {}
@@ -28,5 +30,10 @@ static inline int zpci_iov_setup_virtfn(struct zpci_bus *zbus, struct pci_dev *v
{
return 0;
}
+
+static inline struct pci_dev *zpci_iov_find_parent_pf(struct zpci_bus *zbus, struct zpci_dev *zdev)
+{
+ return NULL;
+}
#endif /* CONFIG_PCI_IOV */
#endif /* __S390_PCI_IOV_h */
diff --git a/arch/s390/pci/pci_kvm_hook.c b/arch/s390/pci/pci_kvm_hook.c
index ff34baf50a3e..df5b25dbe9ca 100644
--- a/arch/s390/pci/pci_kvm_hook.c
+++ b/arch/s390/pci/pci_kvm_hook.c
@@ -5,7 +5,9 @@
* Copyright (C) IBM Corp. 2022. All rights reserved.
* Author(s): Pierre Morel <pmorel@linux.ibm.com>
*/
+
#include <linux/kvm_host.h>
+#include <linux/export.h>
struct zpci_kvm_hook zpci_kvm_hook;
EXPORT_SYMBOL_GPL(zpci_kvm_hook);
diff --git a/arch/s390/pci/pci_mmio.c b/arch/s390/pci/pci_mmio.c
index 46f99dc164ad..51e7a28af899 100644
--- a/arch/s390/pci/pci_mmio.c
+++ b/arch/s390/pci/pci_mmio.c
@@ -32,9 +32,11 @@ static inline int __pcistb_mio_inuser(
u64 len, u8 *status)
{
int cc, exception;
+ bool sacf_flag;
exception = 1;
- asm volatile (
+ sacf_flag = enable_sacf_uaccess();
+ asm_inline volatile (
" sacf 256\n"
"0: .insn rsy,0xeb00000000d4,%[len],%[ioaddr],%[src]\n"
"1: lhi %[exc],0\n"
@@ -44,6 +46,7 @@ static inline int __pcistb_mio_inuser(
: CC_OUT(cc, cc), [len] "+d" (len), [exc] "+d" (exception)
: [ioaddr] "a" (ioaddr), [src] "Q" (*((u8 __force *)src))
: CC_CLOBBER_LIST("memory"));
+ disable_sacf_uaccess(sacf_flag);
*status = len >> 24 & 0xff;
return exception ? -ENXIO : CC_TRANSFORM(cc);
}
@@ -54,6 +57,7 @@ static inline int __pcistg_mio_inuser(
{
union register_pair ioaddr_len = {.even = (u64 __force)ioaddr, .odd = ulen};
int cc, exception;
+ bool sacf_flag;
u64 val = 0;
u64 cnt = ulen;
u8 tmp;
@@ -64,7 +68,8 @@ static inline int __pcistg_mio_inuser(
* address space. pcistg then uses the user mappings.
*/
exception = 1;
- asm volatile (
+ sacf_flag = enable_sacf_uaccess();
+ asm_inline volatile (
" sacf 256\n"
"0: llgc %[tmp],0(%[src])\n"
"4: sllg %[val],%[val],8\n"
@@ -81,6 +86,7 @@ static inline int __pcistg_mio_inuser(
CC_OUT(cc, cc), [ioaddr_len] "+&d" (ioaddr_len.pair)
:
: CC_CLOBBER_LIST("memory"));
+ disable_sacf_uaccess(sacf_flag);
*status = ioaddr_len.odd >> 24 & 0xff;
cc = exception ? -ENXIO : CC_TRANSFORM(cc);
@@ -175,8 +181,12 @@ SYSCALL_DEFINE3(s390_pci_mmio_write, unsigned long, mmio_addr,
args.address = mmio_addr;
args.vma = vma;
ret = follow_pfnmap_start(&args);
- if (ret)
- goto out_unlock_mmap;
+ if (ret) {
+ fixup_user_fault(current->mm, mmio_addr, FAULT_FLAG_WRITE, NULL);
+ ret = follow_pfnmap_start(&args);
+ if (ret)
+ goto out_unlock_mmap;
+ }
io_addr = (void __iomem *)((args.pfn << PAGE_SHIFT) |
(mmio_addr & ~PAGE_MASK));
@@ -200,6 +210,7 @@ static inline int __pcilg_mio_inuser(
u64 ulen, u8 *status)
{
union register_pair ioaddr_len = {.even = (u64 __force)ioaddr, .odd = ulen};
+ bool sacf_flag;
u64 cnt = ulen;
int shift = ulen * 8;
int cc, exception;
@@ -211,7 +222,8 @@ static inline int __pcilg_mio_inuser(
* user address @dst
*/
exception = 1;
- asm volatile (
+ sacf_flag = enable_sacf_uaccess();
+ asm_inline volatile (
" sacf 256\n"
"0: .insn rre,0xb9d60000,%[val],%[ioaddr_len]\n"
"1: lhi %[exc],0\n"
@@ -232,10 +244,10 @@ static inline int __pcilg_mio_inuser(
: [ioaddr_len] "+&d" (ioaddr_len.pair), [exc] "+d" (exception),
CC_OUT(cc, cc), [val] "=d" (val),
[dst] "+a" (dst), [cnt] "+d" (cnt), [tmp] "=d" (tmp),
- [shift] "+d" (shift)
+ [shift] "+a" (shift)
:
: CC_CLOBBER_LIST("memory"));
-
+ disable_sacf_uaccess(sacf_flag);
cc = exception ? -ENXIO : CC_TRANSFORM(cc);
/* did we write everything to the user space buffer? */
if (!cc && cnt != 0)
@@ -315,14 +327,18 @@ SYSCALL_DEFINE3(s390_pci_mmio_read, unsigned long, mmio_addr,
if (!(vma->vm_flags & (VM_IO | VM_PFNMAP)))
goto out_unlock_mmap;
ret = -EACCES;
- if (!(vma->vm_flags & VM_WRITE))
+ if (!(vma->vm_flags & VM_READ))
goto out_unlock_mmap;
args.vma = vma;
args.address = mmio_addr;
ret = follow_pfnmap_start(&args);
- if (ret)
- goto out_unlock_mmap;
+ if (ret) {
+ fixup_user_fault(current->mm, mmio_addr, 0, NULL);
+ ret = follow_pfnmap_start(&args);
+ if (ret)
+ goto out_unlock_mmap;
+ }
io_addr = (void __iomem *)((args.pfn << PAGE_SHIFT) |
(mmio_addr & ~PAGE_MASK));
diff --git a/arch/s390/pci/pci_sysfs.c b/arch/s390/pci/pci_sysfs.c
index 2de1ea6c3a8c..0ee0924cfab7 100644
--- a/arch/s390/pci/pci_sysfs.c
+++ b/arch/s390/pci/pci_sysfs.c
@@ -52,7 +52,6 @@ static DEVICE_ATTR_RO(mio_enabled);
static int _do_recover(struct pci_dev *pdev, struct zpci_dev *zdev)
{
- u8 status;
int ret;
pci_stop_and_remove_bus_device(pdev);
@@ -70,16 +69,8 @@ static int _do_recover(struct pci_dev *pdev, struct zpci_dev *zdev)
return ret;
}
- ret = zpci_enable_device(zdev);
- if (ret)
- return ret;
+ ret = zpci_reenable_device(zdev);
- if (zdev->dma_table) {
- ret = zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma,
- virt_to_phys(zdev->dma_table), &status);
- if (ret)
- zpci_disable_device(zdev);
- }
return ret;
}
@@ -227,7 +218,7 @@ static struct attribute *zpci_dev_attrs[] = {
const struct attribute_group zpci_attr_group = {
.attrs = zpci_dev_attrs,
- .bin_attrs_new = zpci_bin_attrs,
+ .bin_attrs = zpci_bin_attrs,
};
static struct attribute *pfip_attrs[] = {
diff --git a/arch/s390/purgatory/Makefile b/arch/s390/purgatory/Makefile
index 24eccaa29337..bd39b36e7bd6 100644
--- a/arch/s390/purgatory/Makefile
+++ b/arch/s390/purgatory/Makefile
@@ -8,20 +8,22 @@ PURGATORY_OBJS = $(addprefix $(obj)/,$(purgatory-y))
$(obj)/sha256.o: $(srctree)/lib/crypto/sha256.c FORCE
$(call if_changed_rule,cc_o_c)
-CFLAGS_sha256.o := -D__DISABLE_EXPORTS -D__NO_FORTIFY
+CFLAGS_sha256.o := -D__NO_FORTIFY
$(obj)/mem.o: $(srctree)/arch/s390/lib/mem.S FORCE
$(call if_changed_rule,as_o_S)
-KBUILD_CFLAGS := -fno-strict-aliasing -Wall -Wstrict-prototypes
+KBUILD_CFLAGS := -std=gnu11 -fno-strict-aliasing -Wall -Wstrict-prototypes
KBUILD_CFLAGS += -Wno-pointer-sign -Wno-sign-compare
KBUILD_CFLAGS += -fno-zero-initialized-in-bss -fno-builtin -ffreestanding
KBUILD_CFLAGS += -Os -m64 -msoft-float -fno-common
KBUILD_CFLAGS += -fno-stack-protector
KBUILD_CFLAGS += -DDISABLE_BRANCH_PROFILING
+KBUILD_CFLAGS += -D__DISABLE_EXPORTS
KBUILD_CFLAGS += $(CLANG_FLAGS)
KBUILD_CFLAGS += $(call cc-option,-fno-PIE)
KBUILD_AFLAGS := $(filter-out -DCC_USING_EXPOLINE,$(KBUILD_AFLAGS))
+KBUILD_AFLAGS += -D__DISABLE_EXPORTS
# Since we link purgatory with -r unresolved symbols are not checked, so we
# also link a purgatory.chk binary without -r to check for unresolved symbols.
diff --git a/arch/s390/purgatory/purgatory.c b/arch/s390/purgatory/purgatory.c
index 030efda05dbe..ecb38102187c 100644
--- a/arch/s390/purgatory/purgatory.c
+++ b/arch/s390/purgatory/purgatory.c
@@ -16,7 +16,7 @@ int verify_sha256_digest(void)
{
struct kexec_sha_region *ptr, *end;
u8 digest[SHA256_DIGEST_SIZE];
- struct sha256_state sctx;
+ struct sha256_ctx sctx;
sha256_init(&sctx);
end = purgatory_sha_regions + ARRAY_SIZE(purgatory_sha_regions);
diff --git a/arch/s390/tools/gen_facilities.c b/arch/s390/tools/gen_facilities.c
index 855f818deb98..d5c68ade71ab 100644
--- a/arch/s390/tools/gen_facilities.c
+++ b/arch/s390/tools/gen_facilities.c
@@ -54,6 +54,9 @@ static struct facility_def facility_defs[] = {
#ifdef CONFIG_HAVE_MARCH_Z15_FEATURES
61, /* miscellaneous-instruction-extension 3 */
#endif
+#ifdef CONFIG_HAVE_MARCH_Z17_FEATURES
+ 84, /* miscellaneous-instruction-extension 4 */
+#endif
-1 /* END */
}
},
diff --git a/arch/s390/tools/gen_opcode_table.c b/arch/s390/tools/gen_opcode_table.c
index a1bc02b29c81..7d76c417f83f 100644
--- a/arch/s390/tools/gen_opcode_table.c
+++ b/arch/s390/tools/gen_opcode_table.c
@@ -201,6 +201,17 @@ static int cmp_long_insn(const void *a, const void *b)
return strcmp(((struct insn *)a)->name, ((struct insn *)b)->name);
}
+static void print_insn_name(const char *name)
+{
+ size_t i, len;
+
+ len = strlen(name);
+ printf("{");
+ for (i = 0; i < len; i++)
+ printf(" \'%c\',", name[i]);
+ printf(" }");
+}
+
static void print_long_insn(struct gen_opcode *desc)
{
struct insn *insn;
@@ -223,7 +234,9 @@ static void print_long_insn(struct gen_opcode *desc)
insn = &desc->insn[i];
if (insn->name_len < 6)
continue;
- printf("\t[LONG_INSN_%s] = \"%s\", \\\n", insn->upper, insn->name);
+ printf("\t[LONG_INSN_%s] = ", insn->upper);
+ print_insn_name(insn->name);
+ printf(", \\\n");
}
printf("}\n\n");
}
@@ -236,11 +249,13 @@ static void print_opcode(struct insn *insn, int nr)
if (insn->type->byte != 0)
opcode += 2;
printf("\t[%4d] = { .opfrag = 0x%s, .format = INSTR_%s, ", nr, opcode, insn->format);
- if (insn->name_len < 6)
- printf(".name = \"%s\" ", insn->name);
- else
- printf(".offset = LONG_INSN_%s ", insn->upper);
- printf("}, \\\n");
+ if (insn->name_len < 6) {
+ printf(".name = ");
+ print_insn_name(insn->name);
+ } else {
+ printf(".offset = LONG_INSN_%s", insn->upper);
+ }
+ printf(" }, \\\n");
}
static void add_to_group(struct gen_opcode *desc, struct insn *insn, int offset)