diff options
Diffstat (limited to 'arch/s390')
278 files changed, 10095 insertions, 8310 deletions
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 0077969170e8..bf680c26a33c 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -41,9 +41,6 @@ config AUDIT_ARCH config NO_IOPORT_MAP def_bool y -config PCI_QUIRKS - def_bool n - config ARCH_SUPPORTS_UPROBES def_bool y @@ -52,13 +49,19 @@ config KASAN_SHADOW_OFFSET depends on KASAN default 0x1C000000000000 -config GCC_ASM_FLAG_OUTPUT_BROKEN +config CC_ASM_FLAG_OUTPUT_BROKEN def_bool CC_IS_GCC && GCC_VERSION < 140200 help GCC versions before 14.2.0 may die with an internal compiler error in some configurations if flag output operands are used within inline assemblies. +config CC_HAS_ASM_AOR_FORMAT_FLAGS + def_bool !(CC_IS_CLANG && CLANG_VERSION < 190100) + help + Clang versions before 19.1.0 do not support A, + O, and R inline assembly format flags. + config S390 def_bool y # @@ -67,11 +70,12 @@ config S390 imply IMA_SECURE_AND_OR_TRUSTED_BOOT select ALTERNATE_USER_ADDRESS_SPACE select ARCH_32BIT_USTAT_F_TINODE - select ARCH_BINFMT_ELF_STATE select ARCH_CORRECT_STACKTRACE_ON_KRETPROBE select ARCH_ENABLE_MEMORY_HOTPLUG if SPARSEMEM select ARCH_ENABLE_MEMORY_HOTREMOVE select ARCH_ENABLE_SPLIT_PMD_PTLOCK if PGTABLE_LEVELS > 2 + select ARCH_ENABLE_THP_MIGRATION if TRANSPARENT_HUGEPAGE + select ARCH_HAS_CPU_FINALIZE_INIT select ARCH_HAS_CURRENT_STACK_POINTER select ARCH_HAS_DEBUG_VIRTUAL select ARCH_HAS_DEBUG_VM_PGTABLE @@ -88,6 +92,7 @@ config S390 select ARCH_HAS_MEM_ENCRYPT select ARCH_HAS_NMI_SAFE_THIS_CPU_OPS select ARCH_HAS_PREEMPT_LAZY + select ARCH_HAS_PTDUMP select ARCH_HAS_PTE_SPECIAL select ARCH_HAS_SCALED_CPUTIME select ARCH_HAS_SET_DIRECT_MAP @@ -98,6 +103,7 @@ config S390 select ARCH_HAS_UBSAN select ARCH_HAS_VDSO_TIME_DATA select ARCH_HAVE_NMI_SAFE_CMPXCHG + select ARCH_HAVE_TRACE_MMIO_ACCESS select ARCH_INLINE_READ_LOCK select ARCH_INLINE_READ_LOCK_BH select ARCH_INLINE_READ_LOCK_IRQ @@ -127,11 +133,13 @@ config S390 select ARCH_INLINE_WRITE_UNLOCK_IRQ select ARCH_INLINE_WRITE_UNLOCK_IRQRESTORE select ARCH_MHP_MEMMAP_ON_MEMORY_ENABLE + select ARCH_MODULE_NEEDS_WEAK_PER_CPU select ARCH_STACKWALK select ARCH_SUPPORTS_ATOMIC_RMW select ARCH_SUPPORTS_DEBUG_PAGEALLOC select ARCH_SUPPORTS_HUGETLBFS select ARCH_SUPPORTS_INT128 if CC_HAS_INT128 && CC_IS_CLANG + select ARCH_SUPPORTS_MSEAL_SYSTEM_MAPPINGS select ARCH_SUPPORTS_NUMA_BALANCING select ARCH_SUPPORTS_PER_VMA_LOCK select ARCH_USE_BUILTIN_BSWAP @@ -140,9 +148,11 @@ config S390 select ARCH_WANTS_NO_INSTR select ARCH_WANT_DEFAULT_BPF_JIT select ARCH_WANT_IPC_PARSE_VERSION + select ARCH_WANT_IRQS_OFF_ACTIVATE_MM select ARCH_WANT_KERNEL_PMD_MKWRITE select ARCH_WANT_LD_ORPHAN_WARN select ARCH_WANT_OPTIMIZE_HUGETLB_VMEMMAP + select ARCH_WANTS_THP_SWAP select BUILDTIME_TABLE_SORT select CLONE_BACKWARDS2 select DCACHE_WORD_ACCESS if !KMSAN @@ -155,9 +165,9 @@ config S390 select GENERIC_CPU_VULNERABILITIES select GENERIC_ENTRY select GENERIC_GETTIMEOFDAY - select GENERIC_PTDUMP select GENERIC_SMP_IDLE_THREAD select GENERIC_TIME_VSYSCALL + select GENERIC_VDSO_DATA_STORE select GENERIC_VDSO_TIME_NS select GENERIC_IOREMAP if PCI select HAVE_ALIGNED_STRUCT_PAGE @@ -169,30 +179,32 @@ config S390 select HAVE_ARCH_KCSAN select HAVE_ARCH_KMSAN select HAVE_ARCH_KFENCE + select HAVE_ARCH_KSTACK_ERASE select HAVE_ARCH_RANDOMIZE_KSTACK_OFFSET select HAVE_ARCH_SECCOMP_FILTER select HAVE_ARCH_SOFT_DIRTY - select HAVE_ARCH_STACKLEAK select HAVE_ARCH_TRACEHOOK select HAVE_ARCH_TRANSPARENT_HUGEPAGE select HAVE_ARCH_VMAP_STACK select HAVE_ASM_MODVERSIONS + select HAVE_BUILDTIME_MCOUNT_SORT select HAVE_CMPXCHG_DOUBLE select HAVE_CMPXCHG_LOCAL select HAVE_DEBUG_KMEMLEAK select HAVE_DMA_CONTIGUOUS select HAVE_DYNAMIC_FTRACE select HAVE_DYNAMIC_FTRACE_WITH_ARGS + select HAVE_FTRACE_REGS_HAVING_PT_REGS select HAVE_DYNAMIC_FTRACE_WITH_DIRECT_CALLS select HAVE_DYNAMIC_FTRACE_WITH_REGS select HAVE_EBPF_JIT if HAVE_MARCH_Z196_FEATURES select HAVE_EFFICIENT_UNALIGNED_ACCESS select HAVE_GUP_FAST select HAVE_FENTRY - select HAVE_FTRACE_MCOUNT_RECORD + select HAVE_FTRACE_GRAPH_FUNC select HAVE_FUNCTION_ARG_ACCESS_API select HAVE_FUNCTION_ERROR_INJECTION - select HAVE_FUNCTION_GRAPH_RETVAL + select HAVE_FUNCTION_GRAPH_FREGS select HAVE_FUNCTION_GRAPH_TRACER select HAVE_FUNCTION_TRACER select HAVE_GCC_PLUGINS @@ -231,8 +243,10 @@ config S390 select HAVE_SYSCALL_TRACEPOINTS select HAVE_VIRT_CPU_ACCOUNTING select HAVE_VIRT_CPU_ACCOUNTING_IDLE + select HOTPLUG_SMT select IOMMU_HELPER if PCI select IOMMU_SUPPORT if PCI + select KASAN_VMALLOC if KASAN select LOCK_MM_AND_FIND_VMA select MMU_GATHER_MERGE_VMAS select MMU_GATHER_NO_GATHER @@ -240,12 +254,14 @@ config S390 select MODULES_USE_ELF_RELA select NEED_DMA_MAP_STATE if PCI select NEED_PER_CPU_EMBED_FIRST_CHUNK + select NEED_PROC_VMCORE_DEVICE_RAM if PROC_VMCORE select NEED_SG_DMA_LENGTH if PCI select OLD_SIGACTION select OLD_SIGSUSPEND3 select PCI_DOMAINS if PCI select PCI_MSI if PCI select PCI_MSI_ARCH_FALLBACKS if PCI_MSI + select PCI_QUIRKS if PCI select SPARSE_IRQ select SWIOTLB select SYSCTL_EXCEPTION_TRACE @@ -255,6 +271,7 @@ config S390 select USER_STACKTRACE_SUPPORT select VDSO_GETRANDOM select VIRT_CPU_ACCOUNTING + select VMAP_STACK select ZONE_DMA # Note: keep the above list sorted alphabetically @@ -318,6 +335,10 @@ config HAVE_MARCH_Z16_FEATURES def_bool n select HAVE_MARCH_Z15_FEATURES +config HAVE_MARCH_Z17_FEATURES + def_bool n + select HAVE_MARCH_Z16_FEATURES + choice prompt "Processor type" default MARCH_Z196 @@ -383,6 +404,14 @@ config MARCH_Z16 Select this to enable optimizations for IBM z16 (3931 and 3932 series). +config MARCH_Z17 + bool "IBM z17" + select HAVE_MARCH_Z17_FEATURES + depends on $(cc-option,-march=z17) + help + Select this to enable optimizations for IBM z17 (9175 and + 9176 series). + endchoice config MARCH_Z10_TUNE @@ -406,6 +435,9 @@ config MARCH_Z15_TUNE config MARCH_Z16_TUNE def_bool TUNE_Z16 || MARCH_Z16 && TUNE_DEFAULT +config MARCH_Z17_TUNE + def_bool TUNE_Z17 || MARCH_Z17 && TUNE_DEFAULT + choice prompt "Tune code generation" default TUNE_DEFAULT @@ -450,6 +482,10 @@ config TUNE_Z16 bool "IBM z16" depends on $(cc-option,-mtune=z16) +config TUNE_Z17 + bool "IBM z17" + depends on $(cc-option,-mtune=z17) + endchoice config 64BIT @@ -617,6 +653,7 @@ endchoice config RELOCATABLE def_bool y + select ARCH_VMLINUX_NEEDS_RELOCS help This builds a kernel image that retains relocation information so it can be loaded at an arbitrary address. @@ -688,32 +725,6 @@ config MAX_PHYSMEM_BITS Increasing the number of bits also increases the kernel image size. By default 46 bits (64TB) are supported. -config CHECK_STACK - def_bool y - depends on !VMAP_STACK - prompt "Detect kernel stack overflow" - help - This option enables the compiler option -mstack-guard and - -mstack-size if they are available. If the compiler supports them - it will emit additional code to each function prolog to trigger - an illegal operation if the kernel stack is about to overflow. - - Say N if you are unsure. - -config STACK_GUARD - int "Size of the guard area (128-1024)" - range 128 1024 - depends on CHECK_STACK - default "256" - help - This allows you to specify the size of the guard area at the lower - end of the kernel stack. If the kernel stack points into the guard - area on function entry an illegal operation is triggered. The size - needs to be a power of 2. Please keep in mind that the size of an - interrupt frame is 184 bytes for 31 bit and 328 bytes on 64 bit. - The minimum size for the stack guard should be 256 for 31 bit and - 512 for 64 bit. - endmenu menu "I/O subsystem" diff --git a/arch/s390/Kconfig.debug b/arch/s390/Kconfig.debug index c4300ea4abf8..7955d7eee7d8 100644 --- a/arch/s390/Kconfig.debug +++ b/arch/s390/Kconfig.debug @@ -13,6 +13,16 @@ config DEBUG_ENTRY If unsure, say N. +config STRICT_MM_TYPECHECKS + bool "Strict Memory Management Type Checks" + depends on DEBUG_KERNEL + help + Enable strict type checking for memory management types like pte_t + and pmd_t. This generates slightly worse code and should be used + for debug builds. + + If unsure, say N. + config CIO_INJECT bool "CIO Inject interfaces" depends on DEBUG_KERNEL && DEBUG_FS diff --git a/arch/s390/Makefile b/arch/s390/Makefile index 7fd57398221e..7679bc16b692 100644 --- a/arch/s390/Makefile +++ b/arch/s390/Makefile @@ -15,14 +15,14 @@ KBUILD_CFLAGS_MODULE += -fPIC KBUILD_AFLAGS += -m64 KBUILD_CFLAGS += -m64 KBUILD_CFLAGS += -fPIC -LDFLAGS_vmlinux := -no-pie --emit-relocs --discard-none +LDFLAGS_vmlinux := $(call ld-option,-no-pie) extra_tools := relocs aflags_dwarf := -Wa,-gdwarf-2 KBUILD_AFLAGS_DECOMPRESSOR := $(CLANG_FLAGS) -m64 -D__ASSEMBLY__ ifndef CONFIG_AS_IS_LLVM KBUILD_AFLAGS_DECOMPRESSOR += $(if $(CONFIG_DEBUG_INFO),$(aflags_dwarf)) endif -KBUILD_CFLAGS_DECOMPRESSOR := $(CLANG_FLAGS) -m64 -O2 -mpacked-stack +KBUILD_CFLAGS_DECOMPRESSOR := $(CLANG_FLAGS) -m64 -O2 -mpacked-stack -std=gnu11 KBUILD_CFLAGS_DECOMPRESSOR += -DDISABLE_BRANCH_PROFILING -D__NO_FORTIFY KBUILD_CFLAGS_DECOMPRESSOR += -D__DECOMPRESSOR KBUILD_CFLAGS_DECOMPRESSOR += -fno-delete-null-pointer-checks -msoft-float -mbackchain @@ -48,6 +48,7 @@ mflags-$(CONFIG_MARCH_Z13) := -march=z13 mflags-$(CONFIG_MARCH_Z14) := -march=z14 mflags-$(CONFIG_MARCH_Z15) := -march=z15 mflags-$(CONFIG_MARCH_Z16) := -march=z16 +mflags-$(CONFIG_MARCH_Z17) := -march=z17 export CC_FLAGS_MARCH := $(mflags-y) @@ -61,6 +62,7 @@ cflags-$(CONFIG_MARCH_Z13_TUNE) += -mtune=z13 cflags-$(CONFIG_MARCH_Z14_TUNE) += -mtune=z14 cflags-$(CONFIG_MARCH_Z15_TUNE) += -mtune=z15 cflags-$(CONFIG_MARCH_Z16_TUNE) += -mtune=z16 +cflags-$(CONFIG_MARCH_Z17_TUNE) += -mtune=z17 cflags-y += -Wa,-I$(srctree)/arch/$(ARCH)/include @@ -72,15 +74,6 @@ cflags-$(CONFIG_FRAME_POINTER) += -fno-optimize-sibling-calls KBUILD_AFLAGS_DECOMPRESSOR += $(aflags-y) KBUILD_CFLAGS_DECOMPRESSOR += $(cflags-y) -ifneq ($(call cc-option,-mstack-size=8192 -mstack-guard=128),) - CC_FLAGS_CHECK_STACK := -mstack-size=$(STACK_SIZE) - ifeq ($(call cc-option,-mstack-size=8192),) - CC_FLAGS_CHECK_STACK += -mstack-guard=$(CONFIG_STACK_GUARD) - endif - export CC_FLAGS_CHECK_STACK - cflags-$(CONFIG_CHECK_STACK) += $(CC_FLAGS_CHECK_STACK) -endif - ifdef CONFIG_EXPOLINE ifdef CONFIG_EXPOLINE_EXTERN CC_FLAGS_EXPOLINE := -mindirect-branch=thunk-extern diff --git a/arch/s390/Makefile.postlink b/arch/s390/Makefile.postlink index df82f5410769..c2b737500a91 100644 --- a/arch/s390/Makefile.postlink +++ b/arch/s390/Makefile.postlink @@ -19,14 +19,8 @@ quiet_cmd_relocs = RELOCS $(OUT_RELOCS)/relocs.S mkdir -p $(OUT_RELOCS); \ $(CMD_RELOCS) $@ > $(OUT_RELOCS)/relocs.S -quiet_cmd_strip_relocs = RSTRIP $@ - cmd_strip_relocs = \ - $(OBJCOPY) --remove-section='.rel.*' --remove-section='.rel__*' \ - --remove-section='.rela.*' --remove-section='.rela__*' $@ - -vmlinux: FORCE +vmlinux.unstripped: FORCE $(call cmd,relocs) - $(call cmd,strip_relocs) clean: @rm -f $(OUT_RELOCS)/relocs.S diff --git a/arch/s390/appldata/appldata_base.c b/arch/s390/appldata/appldata_base.c index 91a30e017d65..ad2b0baa527c 100644 --- a/arch/s390/appldata/appldata_base.c +++ b/arch/s390/appldata/appldata_base.c @@ -12,6 +12,7 @@ #define KMSG_COMPONENT "appldata" #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt +#include <linux/export.h> #include <linux/module.h> #include <linux/sched/stat.h> #include <linux/init.h> @@ -52,7 +53,7 @@ static int appldata_interval_handler(const struct ctl_table *ctl, int write, void *buffer, size_t *lenp, loff_t *ppos); static struct ctl_table_header *appldata_sysctl_header; -static struct ctl_table appldata_table[] = { +static const struct ctl_table appldata_table[] = { { .procname = "timer", .mode = S_IRUGO | S_IWUSR, diff --git a/arch/s390/boot/.gitignore b/arch/s390/boot/.gitignore index f5ef099e2fd3..af2a6a7bc028 100644 --- a/arch/s390/boot/.gitignore +++ b/arch/s390/boot/.gitignore @@ -5,4 +5,5 @@ relocs.S section_cmp.* vmlinux vmlinux.lds +vmlinux.map vmlinux.syms diff --git a/arch/s390/boot/Makefile b/arch/s390/boot/Makefile index 8bc1308ac892..02f2cf082748 100644 --- a/arch/s390/boot/Makefile +++ b/arch/s390/boot/Makefile @@ -19,15 +19,15 @@ CC_FLAGS_MARCH_MINIMUM := -march=z10 KBUILD_AFLAGS := $(filter-out $(CC_FLAGS_MARCH),$(KBUILD_AFLAGS_DECOMPRESSOR)) KBUILD_CFLAGS := $(filter-out $(CC_FLAGS_MARCH),$(KBUILD_CFLAGS_DECOMPRESSOR)) -KBUILD_AFLAGS += $(CC_FLAGS_MARCH_MINIMUM) -KBUILD_CFLAGS += $(CC_FLAGS_MARCH_MINIMUM) +KBUILD_AFLAGS += $(CC_FLAGS_MARCH_MINIMUM) -D__DISABLE_EXPORTS +KBUILD_CFLAGS += $(CC_FLAGS_MARCH_MINIMUM) -D__DISABLE_EXPORTS CFLAGS_sclp_early_core.o += -I$(srctree)/drivers/s390/char obj-y := head.o als.o startup.o physmem_info.o ipl_parm.o ipl_report.o vmem.o obj-y += string.o ebcdic.o sclp_early_core.o mem.o ipl_vmparm.o cmdline.o -obj-y += version.o pgm_check_info.o ctype.o ipl_data.o relocs.o alternative.o -obj-y += uv.o printk.o +obj-y += version.o pgm_check.o ctype.o ipl_data.o relocs.o alternative.o +obj-y += uv.o printk.o trampoline.o obj-$(CONFIG_RANDOMIZE_BASE) += kaslr.o obj-y += $(if $(CONFIG_KERNEL_UNCOMPRESSED),,decompressor.o) info.o obj-$(CONFIG_KERNEL_ZSTD) += clz_ctz.o diff --git a/arch/s390/boot/als.c b/arch/s390/boot/als.c index 11e0c3d5dbc8..25a20986b96e 100644 --- a/arch/s390/boot/als.c +++ b/arch/s390/boot/als.c @@ -46,7 +46,7 @@ void print_missing_facilities(void) * z/VM adds a four character prefix. */ if (strlen(als_str) > 70) { - boot_printk("%s\n", als_str); + boot_emerg("%s\n", als_str); *als_str = '\0'; } u16_to_decimal(val_str, i * BITS_PER_LONG + j); @@ -54,7 +54,7 @@ void print_missing_facilities(void) first = 0; } } - boot_printk("%s\n", als_str); + boot_emerg("%s\n", als_str); } static void facility_mismatch(void) @@ -62,10 +62,10 @@ static void facility_mismatch(void) struct cpuid id; get_cpu_id(&id); - boot_printk("The Linux kernel requires more recent processor hardware\n"); - boot_printk("Detected machine-type number: %4x\n", id.machine); + boot_emerg("The Linux kernel requires more recent processor hardware\n"); + boot_emerg("Detected machine-type number: %4x\n", id.machine); print_missing_facilities(); - boot_printk("See Principles of Operations for facility bits\n"); + boot_emerg("See z/Architecture Principles of Operation - Facility Indications\n"); disabled_wait(); } diff --git a/arch/s390/boot/alternative.c b/arch/s390/boot/alternative.c index abc08d2c873d..19ea7934b918 100644 --- a/arch/s390/boot/alternative.c +++ b/arch/s390/boot/alternative.c @@ -1,3 +1,138 @@ // SPDX-License-Identifier: GPL-2.0 +#define boot_fmt(fmt) "alt: " fmt +#include "boot.h" + +#define a_debug boot_debug #include "../kernel/alternative.c" + +static void alt_debug_all(int type) +{ + int i; + + switch (type) { + case ALT_TYPE_FACILITY: + for (i = 0; i < ARRAY_SIZE(alt_debug.facilities); i++) + alt_debug.facilities[i] = -1UL; + break; + case ALT_TYPE_FEATURE: + for (i = 0; i < ARRAY_SIZE(alt_debug.mfeatures); i++) + alt_debug.mfeatures[i] = -1UL; + break; + case ALT_TYPE_SPEC: + alt_debug.spec = 1; + break; + } +} + +static void alt_debug_modify(int type, unsigned int nr, bool clear) +{ + switch (type) { + case ALT_TYPE_FACILITY: + if (clear) + __clear_facility(nr, alt_debug.facilities); + else + __set_facility(nr, alt_debug.facilities); + break; + case ALT_TYPE_FEATURE: + if (clear) + __clear_machine_feature(nr, alt_debug.mfeatures); + else + __set_machine_feature(nr, alt_debug.mfeatures); + break; + } +} + +static char *alt_debug_parse(int type, char *str) +{ + unsigned long val, endval; + char *endp; + bool clear; + int i; + + if (*str == ':') { + str++; + } else { + alt_debug_all(type); + return str; + } + clear = false; + if (*str == '!') { + alt_debug_all(type); + clear = true; + str++; + } + while (*str) { + val = simple_strtoull(str, &endp, 0); + if (str == endp) + break; + str = endp; + if (*str == '-') { + str++; + endval = simple_strtoull(str, &endp, 0); + if (str == endp) + break; + str = endp; + while (val <= endval) { + alt_debug_modify(type, val, clear); + val++; + } + } else { + alt_debug_modify(type, val, clear); + } + if (*str != ',') + break; + str++; + } + return str; +} + +/* + * Use debug-alternative command line parameter for debugging: + * "debug-alternative" + * -> print debug message for every single alternative + * + * "debug-alternative=0;2" + * -> print debug message for all alternatives with type 0 and 2 + * + * "debug-alternative=0:0-7" + * -> print debug message for all alternatives with type 0 and with + * facility numbers within the range of 0-7 + * (if type 0 is ALT_TYPE_FACILITY) + * + * "debug-alternative=0:!8;1" + * -> print debug message for all alternatives with type 0, for all + * facility number, except facility 8, and in addition print all + * alternatives with type 1 + */ +void alt_debug_setup(char *str) +{ + unsigned long type; + char *endp; + int i; + + if (!str) { + alt_debug_all(ALT_TYPE_FACILITY); + alt_debug_all(ALT_TYPE_FEATURE); + alt_debug_all(ALT_TYPE_SPEC); + return; + } + while (*str) { + type = simple_strtoull(str, &endp, 0); + if (str == endp) + break; + str = endp; + switch (type) { + case ALT_TYPE_FACILITY: + case ALT_TYPE_FEATURE: + str = alt_debug_parse(type, str); + break; + case ALT_TYPE_SPEC: + alt_debug_all(ALT_TYPE_SPEC); + break; + } + if (*str != ';') + break; + str++; + } +} diff --git a/arch/s390/boot/boot.h b/arch/s390/boot/boot.h index 7521a9d75fa2..c0152db285f0 100644 --- a/arch/s390/boot/boot.h +++ b/arch/s390/boot/boot.h @@ -6,16 +6,11 @@ #define IPL_START 0x200 -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ +#include <linux/printk.h> #include <asm/physmem_info.h> -struct machine_info { - unsigned char has_edat1 : 1; - unsigned char has_edat2 : 1; - unsigned char has_nx : 1; -}; - struct vmlinux_info { unsigned long entry; unsigned long image_size; /* does not include .bss */ @@ -48,13 +43,16 @@ void physmem_set_usable_limit(unsigned long limit); void physmem_reserve(enum reserved_range_type type, unsigned long addr, unsigned long size); void physmem_free(enum reserved_range_type type); /* for continuous/multiple allocations per type */ -unsigned long physmem_alloc_top_down(enum reserved_range_type type, unsigned long size, - unsigned long align); +unsigned long physmem_alloc_or_die(enum reserved_range_type type, unsigned long size, + unsigned long align); +unsigned long physmem_alloc(enum reserved_range_type type, unsigned long size, + unsigned long align, bool die_on_oom); /* for single allocations, 1 per type */ unsigned long physmem_alloc_range(enum reserved_range_type type, unsigned long size, unsigned long align, unsigned long min, unsigned long max, bool die_on_oom); unsigned long get_physmem_alloc_pos(void); +void dump_physmem_reserved(void); bool ipl_report_certs_intersects(unsigned long addr, unsigned long size, unsigned long *intersection_start); bool is_ipl_block_dump(void); @@ -66,16 +64,34 @@ void parse_boot_command_line(void); void verify_facilities(void); void print_missing_facilities(void); void sclp_early_setup_buffer(void); -void print_pgm_check_info(void); +void alt_debug_setup(char *str); +void do_pgm_check(struct pt_regs *regs); unsigned long randomize_within_range(unsigned long size, unsigned long align, unsigned long min, unsigned long max); void setup_vmem(unsigned long kernel_start, unsigned long kernel_end, unsigned long asce_limit); -void __printf(1, 2) boot_printk(const char *fmt, ...); +int __printf(1, 2) boot_printk(const char *fmt, ...); void print_stacktrace(unsigned long sp); void error(char *m); int get_random(unsigned long limit, unsigned long *value); +void boot_rb_dump(void); +void __noreturn jump_to_kernel(psw_t *psw); + +#ifndef boot_fmt +#define boot_fmt(fmt) fmt +#endif + +#define boot_emerg(fmt, ...) boot_printk(KERN_EMERG boot_fmt(fmt), ##__VA_ARGS__) +#define boot_alert(fmt, ...) boot_printk(KERN_ALERT boot_fmt(fmt), ##__VA_ARGS__) +#define boot_crit(fmt, ...) boot_printk(KERN_CRIT boot_fmt(fmt), ##__VA_ARGS__) +#define boot_err(fmt, ...) boot_printk(KERN_ERR boot_fmt(fmt), ##__VA_ARGS__) +#define boot_warn(fmt, ...) boot_printk(KERN_WARNING boot_fmt(fmt), ##__VA_ARGS__) +#define boot_notice(fmt, ...) boot_printk(KERN_NOTICE boot_fmt(fmt), ##__VA_ARGS__) +#define boot_info(fmt, ...) boot_printk(KERN_INFO boot_fmt(fmt), ##__VA_ARGS__) +#define boot_debug(fmt, ...) boot_printk(KERN_DEBUG boot_fmt(fmt), ##__VA_ARGS__) extern struct machine_info machine; +extern int boot_console_loglevel; +extern bool boot_ignore_loglevel; /* Symbols defined by linker scripts */ extern const char kernel_version[]; @@ -106,5 +122,5 @@ static inline bool intersects(unsigned long addr0, unsigned long size0, { return addr0 + size0 > addr1 && addr1 + size1 > addr0; } -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* BOOT_BOOT_H */ diff --git a/arch/s390/boot/decompressor.c b/arch/s390/boot/decompressor.c index f478e8e9cbda..03500b9d9fb9 100644 --- a/arch/s390/boot/decompressor.c +++ b/arch/s390/boot/decompressor.c @@ -9,6 +9,7 @@ #include <linux/kernel.h> #include <linux/string.h> +#include <asm/boot_data.h> #include <asm/page.h> #include "decompressor.h" #include "boot.h" @@ -63,6 +64,15 @@ static unsigned long free_mem_end_ptr = (unsigned long) _end + BOOT_HEAP_SIZE; #include "../../../../lib/decompress_unzstd.c" #endif +static void decompress_error(char *m) +{ + if (bootdebug) + boot_rb_dump(); + boot_emerg("Decompression error: %s\n", m); + boot_emerg(" -- System halted\n"); + disabled_wait(); +} + unsigned long mem_safe_offset(void) { return ALIGN(free_mem_end_ptr, PAGE_SIZE); @@ -71,5 +81,5 @@ unsigned long mem_safe_offset(void) void deploy_kernel(void *output) { __decompress(_compressed_start, _compressed_end - _compressed_start, - NULL, NULL, output, vmlinux.image_size, NULL, error); + NULL, NULL, output, vmlinux.image_size, NULL, decompress_error); } diff --git a/arch/s390/boot/head.S b/arch/s390/boot/head.S index 0a47b16f6412..0b511d5c030b 100644 --- a/arch/s390/boot/head.S +++ b/arch/s390/boot/head.S @@ -254,8 +254,9 @@ SYM_CODE_START_LOCAL(startup_normal) xc 0xf00(256),0xf00 larl %r13,.Lctl lctlg %c0,%c15,0(%r13) # load control registers - stcke __LC_BOOT_CLOCK - mvc __LC_LAST_UPDATE_CLOCK(8),__LC_BOOT_CLOCK+1 + larl %r13,tod_clock_base + stcke 0(%r13) + mvc __LC_LAST_UPDATE_CLOCK(8),1(%r13) larl %r13,6f spt 0(%r13) mvc __LC_LAST_UPDATE_TIMER(8),0(%r13) @@ -292,12 +293,6 @@ SYM_CODE_END(startup_normal) #include "head_kdump.S" -# -# This program check is active immediately after kernel start -# and until early_pgm_check_handler is set in kernel/early.c -# It simply saves general/control registers and psw in -# the save area and does disabled wait with a faulty address. -# SYM_CODE_START_LOCAL(startup_pgm_check_handler) stmg %r8,%r15,__LC_SAVE_AREA la %r8,4095 @@ -311,8 +306,18 @@ SYM_CODE_START_LOCAL(startup_pgm_check_handler) oi __LC_RETURN_PSW+1,0x2 # set wait state bit larl %r9,.Lold_psw_disabled_wait stg %r9,__LC_PGM_NEW_PSW+8 - larl %r15,_dump_info_stack_end-STACK_FRAME_OVERHEAD - brasl %r14,print_pgm_check_info + larl %r15,_dump_info_stack_end-(STACK_FRAME_OVERHEAD+__PT_SIZE) + la %r2,STACK_FRAME_OVERHEAD(%r15) + mvc __PT_PSW(16,%r2),__LC_PSW_SAVE_AREA-4095(%r8) + mvc __PT_R0(128,%r2),__LC_GPREGS_SAVE_AREA-4095(%r8) + mvc __PT_LAST_BREAK(8,%r2),__LC_PGM_LAST_BREAK + mvc __PT_INT_CODE(4,%r2),__LC_PGM_INT_CODE + brasl %r14,do_pgm_check + larl %r9,startup_pgm_check_handler + stg %r9,__LC_PGM_NEW_PSW+8 + mvc __LC_RETURN_PSW(16),STACK_FRAME_OVERHEAD+__PT_PSW(%r15) + lmg %r0,%r15,STACK_FRAME_OVERHEAD+__PT_R0(%r15) + lpswe __LC_RETURN_PSW .Lold_psw_disabled_wait: la %r8,4095 lmg %r0,%r15,__LC_GPREGS_SAVE_AREA-4095(%r8) diff --git a/arch/s390/boot/ipl_data.c b/arch/s390/boot/ipl_data.c index 0846e2b249c6..c4130a80b058 100644 --- a/arch/s390/boot/ipl_data.c +++ b/arch/s390/boot/ipl_data.c @@ -16,7 +16,9 @@ struct ipl_lowcore { struct ccw0 ccwpgm[2]; /* 0x0008 */ u8 fill[56]; /* 0x0018 */ struct ccw0 ccwpgmcc[20]; /* 0x0050 */ - u8 pad_0xf0[0x01a0-0x00f0]; /* 0x00f0 */ + u8 pad_0xf0[0x0140-0x00f0]; /* 0x00f0 */ + psw_t svc_old_psw; /* 0x0140 */ + u8 pad_0x150[0x01a0-0x0150]; /* 0x0150 */ psw_t restart_psw; /* 0x01a0 */ psw_t external_new_psw; /* 0x01b0 */ psw_t svc_new_psw; /* 0x01c0 */ @@ -75,6 +77,11 @@ static struct ipl_lowcore ipl_lowcore __used __section(".ipldata") = { [18] = CCW0(CCW_CMD_READ_IPL, 0x690, 0x50, CCW_FLAG_SLI | CCW_FLAG_CC), [19] = CCW0(CCW_CMD_READ_IPL, 0x6e0, 0x50, CCW_FLAG_SLI), }, + /* + * Let the GDB's lx-symbols command find the jump_to_kernel symbol + * without having to load decompressor symbols. + */ + .svc_old_psw = { .mask = 0, .addr = (unsigned long)jump_to_kernel }, .restart_psw = { .mask = 0, .addr = IPL_START, }, .external_new_psw = { .mask = PSW_MASK_DISABLED, .addr = __LC_EXT_NEW_PSW, }, .svc_new_psw = { .mask = PSW_MASK_DISABLED, .addr = __LC_SVC_NEW_PSW, }, diff --git a/arch/s390/boot/ipl_parm.c b/arch/s390/boot/ipl_parm.c index 557462e62cd7..f584d7da29cb 100644 --- a/arch/s390/boot/ipl_parm.c +++ b/arch/s390/boot/ipl_parm.c @@ -5,6 +5,7 @@ #include <linux/pgtable.h> #include <asm/abs_lowcore.h> #include <asm/page-states.h> +#include <asm/machine.h> #include <asm/ebcdic.h> #include <asm/sclp.h> #include <asm/sections.h> @@ -34,29 +35,14 @@ int vmalloc_size_set; static inline int __diag308(unsigned long subcode, void *addr) { - unsigned long reg1, reg2; - union register_pair r1; - psw_t old; - - r1.even = (unsigned long) addr; - r1.odd = 0; - asm volatile( - " mvc 0(16,%[psw_old]),0(%[psw_pgm])\n" - " epsw %[reg1],%[reg2]\n" - " st %[reg1],0(%[psw_pgm])\n" - " st %[reg2],4(%[psw_pgm])\n" - " larl %[reg1],1f\n" - " stg %[reg1],8(%[psw_pgm])\n" + union register_pair r1 = { .even = (unsigned long)addr, .odd = 0 }; + + asm_inline volatile( " diag %[r1],%[subcode],0x308\n" - "1: mvc 0(16,%[psw_pgm]),0(%[psw_old])\n" - : [r1] "+&d" (r1.pair), - [reg1] "=&d" (reg1), - [reg2] "=&a" (reg2), - "+Q" (get_lowcore()->program_new_psw), - "=Q" (old) - : [subcode] "d" (subcode), - [psw_old] "a" (&old), - [psw_pgm] "a" (&get_lowcore()->program_new_psw) + "0:\n" + EX_TABLE(0b, 0b) + : [r1] "+d" (r1.pair) + : [subcode] "d" (subcode) : "cc", "memory"); return r1.odd; } @@ -193,7 +179,7 @@ void setup_boot_command_line(void) if (has_ebcdic_char(parmarea.command_line)) EBCASC(parmarea.command_line, COMMAND_LINE_SIZE); /* copy arch command line */ - strcpy(early_command_line, strim(parmarea.command_line)); + strscpy(early_command_line, strim(parmarea.command_line)); /* append IPL PARM data to the boot command line */ if (!is_prot_virt_guest() && ipl_block_valid) @@ -215,7 +201,7 @@ static void check_cleared_facilities(void) for (i = 0; i < ARRAY_SIZE(als); i++) { if ((stfle_fac_list[i] & als[i]) != als[i]) { - boot_printk("Warning: The Linux kernel requires facilities cleared via command line option\n"); + boot_emerg("The Linux kernel requires facilities cleared via command line option\n"); print_missing_facilities(); break; } @@ -267,7 +253,8 @@ void parse_boot_command_line(void) int rc; __kaslr_enabled = IS_ENABLED(CONFIG_RANDOMIZE_BASE); - args = strcpy(command_line_buf, early_command_line); + strscpy(command_line_buf, early_command_line); + args = command_line_buf; while (*args) { args = next_arg(args, ¶m, &val); @@ -295,6 +282,9 @@ void parse_boot_command_line(void) if (!strcmp(param, "facilities") && val) modify_fac_list(val); + if (!strcmp(param, "debug-alternative")) + alt_debug_setup(val); + if (!strcmp(param, "nokaslr")) __kaslr_enabled = 0; @@ -312,6 +302,24 @@ void parse_boot_command_line(void) } #endif if (!strcmp(param, "relocate_lowcore") && test_facility(193)) - relocate_lowcore = 1; + set_machine_feature(MFEATURE_LOWCORE); + if (!strcmp(param, "earlyprintk")) + boot_earlyprintk = true; + if (!strcmp(param, "debug")) + boot_console_loglevel = CONSOLE_LOGLEVEL_DEBUG; + if (!strcmp(param, "bootdebug")) { + bootdebug = true; + if (val) + strscpy(bootdebug_filter, val); + } + if (!strcmp(param, "quiet")) + boot_console_loglevel = CONSOLE_LOGLEVEL_QUIET; + if (!strcmp(param, "ignore_loglevel")) + boot_ignore_loglevel = true; + if (!strcmp(param, "loglevel")) { + boot_console_loglevel = simple_strtoull(val, NULL, 10); + if (boot_console_loglevel < CONSOLE_LOGLEVEL_MIN) + boot_console_loglevel = CONSOLE_LOGLEVEL_MIN; + } } } diff --git a/arch/s390/boot/ipl_report.c b/arch/s390/boot/ipl_report.c index d00898852a88..f73cd757a5f7 100644 --- a/arch/s390/boot/ipl_report.c +++ b/arch/s390/boot/ipl_report.c @@ -30,7 +30,6 @@ static unsigned long get_cert_comp_list_size(void) { struct ipl_rb_certificate_entry *cert; struct ipl_rb_component_entry *comp; - size_t size; /* * Find the length for the IPL report boot data @@ -155,7 +154,7 @@ void save_ipl_cert_comp_list(void) return; size = get_cert_comp_list_size(); - early_ipl_comp_list_addr = physmem_alloc_top_down(RR_CERT_COMP_LIST, size, sizeof(int)); + early_ipl_comp_list_addr = physmem_alloc_or_die(RR_CERT_COMP_LIST, size, sizeof(int)); ipl_cert_list_addr = early_ipl_comp_list_addr + early_ipl_comp_list_size; copy_components_bootdata(); diff --git a/arch/s390/boot/kaslr.c b/arch/s390/boot/kaslr.c index f864d2bff775..941f4c9e27cc 100644 --- a/arch/s390/boot/kaslr.c +++ b/arch/s390/boot/kaslr.c @@ -32,7 +32,7 @@ struct prng_parm { static int check_prng(void) { if (!cpacf_query_func(CPACF_KMC, CPACF_KMC_PRNG)) { - boot_printk("KASLR disabled: CPU has no PRNG\n"); + boot_warn("KASLR disabled: CPU has no PRNG\n"); return 0; } if (cpacf_query_func(CPACF_PRNO, CPACF_PRNO_TRNG)) @@ -168,7 +168,7 @@ static unsigned long iterate_valid_positions(unsigned long size, unsigned long a * cannot have chains. * * On the other hand, "dynamic" or "repetitive" allocations are done via - * physmem_alloc_top_down(). These allocations are tightly packed together + * physmem_alloc_or_die(). These allocations are tightly packed together * top down from the end of online memory. physmem_alloc_pos represents * current position where those allocations start. * diff --git a/arch/s390/boot/pgm_check.c b/arch/s390/boot/pgm_check.c new file mode 100644 index 000000000000..fa621fa5bc02 --- /dev/null +++ b/arch/s390/boot/pgm_check.c @@ -0,0 +1,92 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/kernel.h> +#include <linux/stdarg.h> +#include <linux/string.h> +#include <linux/ctype.h> +#include <asm/stacktrace.h> +#include <asm/boot_data.h> +#include <asm/lowcore.h> +#include <asm/setup.h> +#include <asm/sclp.h> +#include <asm/uv.h> +#include "boot.h" + +void print_stacktrace(unsigned long sp) +{ + struct stack_info boot_stack = { STACK_TYPE_TASK, (unsigned long)_stack_start, + (unsigned long)_stack_end }; + bool first = true; + + boot_emerg("Call Trace:\n"); + while (!(sp & 0x7) && on_stack(&boot_stack, sp, sizeof(struct stack_frame))) { + struct stack_frame *sf = (struct stack_frame *)sp; + + if (first) + boot_emerg("(sp:%016lx [<%016lx>] %pS)\n", sp, sf->gprs[8], (void *)sf->gprs[8]); + else + boot_emerg(" sp:%016lx [<%016lx>] %pS\n", sp, sf->gprs[8], (void *)sf->gprs[8]); + if (sf->back_chain <= sp) + break; + sp = sf->back_chain; + first = false; + } +} + +extern struct exception_table_entry __start___ex_table[]; +extern struct exception_table_entry __stop___ex_table[]; + +static inline unsigned long extable_insn(const struct exception_table_entry *x) +{ + return (unsigned long)&x->insn + x->insn; +} + +static bool ex_handler(struct pt_regs *regs) +{ + const struct exception_table_entry *ex; + + for (ex = __start___ex_table; ex < __stop___ex_table; ex++) { + if (extable_insn(ex) != regs->psw.addr) + continue; + if (ex->type != EX_TYPE_FIXUP) + return false; + regs->psw.addr = extable_fixup(ex); + return true; + } + return false; +} + +void do_pgm_check(struct pt_regs *regs) +{ + struct psw_bits *psw = &psw_bits(regs->psw); + unsigned long *gpregs = regs->gprs; + + if (ex_handler(regs)) + return; + if (bootdebug) + boot_rb_dump(); + boot_emerg("Linux version %s\n", kernel_version); + if (!is_prot_virt_guest() && early_command_line[0]) + boot_emerg("Kernel command line: %s\n", early_command_line); + boot_emerg("Kernel fault: interruption code %04x ilc:%d\n", + regs->int_code & 0xffff, regs->int_code >> 17); + if (kaslr_enabled()) { + boot_emerg("Kernel random base: %lx\n", __kaslr_offset); + boot_emerg("Kernel random base phys: %lx\n", __kaslr_offset_phys); + } + boot_emerg("PSW : %016lx %016lx (%pS)\n", + regs->psw.mask, regs->psw.addr, (void *)regs->psw.addr); + boot_emerg(" R:%x T:%x IO:%x EX:%x Key:%x M:%x W:%x P:%x AS:%x CC:%x PM:%x RI:%x EA:%x\n", + psw->per, psw->dat, psw->io, psw->ext, psw->key, psw->mcheck, + psw->wait, psw->pstate, psw->as, psw->cc, psw->pm, psw->ri, psw->eaba); + boot_emerg("GPRS: %016lx %016lx %016lx %016lx\n", gpregs[0], gpregs[1], gpregs[2], gpregs[3]); + boot_emerg(" %016lx %016lx %016lx %016lx\n", gpregs[4], gpregs[5], gpregs[6], gpregs[7]); + boot_emerg(" %016lx %016lx %016lx %016lx\n", gpregs[8], gpregs[9], gpregs[10], gpregs[11]); + boot_emerg(" %016lx %016lx %016lx %016lx\n", gpregs[12], gpregs[13], gpregs[14], gpregs[15]); + print_stacktrace(gpregs[15]); + boot_emerg("Last Breaking-Event-Address:\n"); + boot_emerg(" [<%016lx>] %pS\n", regs->last_break, (void *)regs->last_break); + /* Convert to disabled wait PSW */ + psw->io = 0; + psw->ext = 0; + psw->wait = 1; +} diff --git a/arch/s390/boot/pgm_check_info.c b/arch/s390/boot/pgm_check_info.c deleted file mode 100644 index 5abe59fb3bc0..000000000000 --- a/arch/s390/boot/pgm_check_info.c +++ /dev/null @@ -1,65 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -#include <linux/kernel.h> -#include <linux/stdarg.h> -#include <linux/string.h> -#include <linux/ctype.h> -#include <asm/stacktrace.h> -#include <asm/boot_data.h> -#include <asm/lowcore.h> -#include <asm/setup.h> -#include <asm/sclp.h> -#include <asm/uv.h> -#include "boot.h" - -void print_stacktrace(unsigned long sp) -{ - struct stack_info boot_stack = { STACK_TYPE_TASK, (unsigned long)_stack_start, - (unsigned long)_stack_end }; - bool first = true; - - boot_printk("Call Trace:\n"); - while (!(sp & 0x7) && on_stack(&boot_stack, sp, sizeof(struct stack_frame))) { - struct stack_frame *sf = (struct stack_frame *)sp; - - boot_printk(first ? "(sp:%016lx [<%016lx>] %pS)\n" : - " sp:%016lx [<%016lx>] %pS\n", - sp, sf->gprs[8], (void *)sf->gprs[8]); - if (sf->back_chain <= sp) - break; - sp = sf->back_chain; - first = false; - } -} - -void print_pgm_check_info(void) -{ - unsigned long *gpregs = (unsigned long *)get_lowcore()->gpregs_save_area; - struct psw_bits *psw = &psw_bits(get_lowcore()->psw_save_area); - - boot_printk("Linux version %s\n", kernel_version); - if (!is_prot_virt_guest() && early_command_line[0]) - boot_printk("Kernel command line: %s\n", early_command_line); - boot_printk("Kernel fault: interruption code %04x ilc:%x\n", - get_lowcore()->pgm_code, get_lowcore()->pgm_ilc >> 1); - if (kaslr_enabled()) { - boot_printk("Kernel random base: %lx\n", __kaslr_offset); - boot_printk("Kernel random base phys: %lx\n", __kaslr_offset_phys); - } - boot_printk("PSW : %016lx %016lx (%pS)\n", - get_lowcore()->psw_save_area.mask, - get_lowcore()->psw_save_area.addr, - (void *)get_lowcore()->psw_save_area.addr); - boot_printk( - " R:%x T:%x IO:%x EX:%x Key:%x M:%x W:%x P:%x AS:%x CC:%x PM:%x RI:%x EA:%x\n", - psw->per, psw->dat, psw->io, psw->ext, psw->key, psw->mcheck, - psw->wait, psw->pstate, psw->as, psw->cc, psw->pm, psw->ri, - psw->eaba); - boot_printk("GPRS: %016lx %016lx %016lx %016lx\n", gpregs[0], gpregs[1], gpregs[2], gpregs[3]); - boot_printk(" %016lx %016lx %016lx %016lx\n", gpregs[4], gpregs[5], gpregs[6], gpregs[7]); - boot_printk(" %016lx %016lx %016lx %016lx\n", gpregs[8], gpregs[9], gpregs[10], gpregs[11]); - boot_printk(" %016lx %016lx %016lx %016lx\n", gpregs[12], gpregs[13], gpregs[14], gpregs[15]); - print_stacktrace(get_lowcore()->gpregs_save_area[15]); - boot_printk("Last Breaking-Event-Address:\n"); - boot_printk(" [<%016lx>] %pS\n", (unsigned long)get_lowcore()->pgm_last_break, - (void *)get_lowcore()->pgm_last_break); -} diff --git a/arch/s390/boot/physmem_info.c b/arch/s390/boot/physmem_info.c index 7617aa2d2f7e..45e3d057cfaa 100644 --- a/arch/s390/boot/physmem_info.c +++ b/arch/s390/boot/physmem_info.c @@ -1,4 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 +#define boot_fmt(fmt) "physmem: " fmt #include <linux/processor.h> #include <linux/errno.h> #include <linux/init.h> @@ -28,7 +29,7 @@ static struct physmem_range *__get_physmem_range_ptr(u32 n) return &physmem_info.online[n]; if (unlikely(!physmem_info.online_extended)) { physmem_info.online_extended = (struct physmem_range *)physmem_alloc_range( - RR_MEM_DETECT_EXTENDED, ENTRIES_EXTENDED_MAX, sizeof(long), 0, + RR_MEM_DETECT_EXT, ENTRIES_EXTENDED_MAX, sizeof(long), 0, physmem_alloc_pos, true); } return &physmem_info.online_extended[n - MEM_INLINED_ENTRIES]; @@ -58,36 +59,22 @@ void add_physmem_online_range(u64 start, u64 end) static int __diag260(unsigned long rx1, unsigned long rx2) { - unsigned long reg1, reg2, ry; union register_pair rx; int cc, exception; - psw_t old; + unsigned long ry; rx.even = rx1; rx.odd = rx2; ry = 0x10; /* storage configuration */ exception = 1; - asm volatile( - " mvc 0(16,%[psw_old]),0(%[psw_pgm])\n" - " epsw %[reg1],%[reg2]\n" - " st %[reg1],0(%[psw_pgm])\n" - " st %[reg2],4(%[psw_pgm])\n" - " larl %[reg1],1f\n" - " stg %[reg1],8(%[psw_pgm])\n" + asm_inline volatile( " diag %[rx],%[ry],0x260\n" - " lhi %[exc],0\n" - "1: mvc 0(16,%[psw_pgm]),0(%[psw_old])\n" + "0: lhi %[exc],0\n" + "1:\n" CC_IPM(cc) - : CC_OUT(cc, cc), - [exc] "+d" (exception), - [reg1] "=&d" (reg1), - [reg2] "=&a" (reg2), - [ry] "+&d" (ry), - "+Q" (get_lowcore()->program_new_psw), - "=Q" (old) - : [rx] "d" (rx.pair), - [psw_old] "a" (&old), - [psw_pgm] "a" (&get_lowcore()->program_new_psw) + EX_TABLE(0b, 1b) + : CC_OUT(cc, cc), [exc] "+d" (exception), [ry] "+d" (ry) + : [rx] "d" (rx.pair) : CC_CLOBBER_LIST("memory")); cc = exception ? -1 : CC_TRANSFORM(cc); return cc == 0 ? ry : -1; @@ -117,29 +104,15 @@ static int diag260(void) static int diag500_storage_limit(unsigned long *max_physmem_end) { unsigned long storage_limit; - unsigned long reg1, reg2; - psw_t old; - - asm volatile( - " mvc 0(16,%[psw_old]),0(%[psw_pgm])\n" - " epsw %[reg1],%[reg2]\n" - " st %[reg1],0(%[psw_pgm])\n" - " st %[reg2],4(%[psw_pgm])\n" - " larl %[reg1],1f\n" - " stg %[reg1],8(%[psw_pgm])\n" - " lghi 1,%[subcode]\n" - " lghi 2,0\n" - " diag 2,4,0x500\n" - "1: mvc 0(16,%[psw_pgm]),0(%[psw_old])\n" - " lgr %[slimit],2\n" - : [reg1] "=&d" (reg1), - [reg2] "=&a" (reg2), - [slimit] "=d" (storage_limit), - "=Q" (get_lowcore()->program_new_psw), - "=Q" (old) - : [psw_old] "a" (&old), - [psw_pgm] "a" (&get_lowcore()->program_new_psw), - [subcode] "i" (DIAG500_SC_STOR_LIMIT) + + asm_inline volatile( + " lghi %%r1,%[subcode]\n" + " lghi %%r2,0\n" + " diag %%r2,%%r4,0x500\n" + "0: lgr %[slimit],%%r2\n" + EX_TABLE(0b, 0b) + : [slimit] "=d" (storage_limit) + : [subcode] "i" (DIAG500_SC_STOR_LIMIT) : "memory", "1", "2"); if (!storage_limit) return -EINVAL; @@ -150,31 +123,17 @@ static int diag500_storage_limit(unsigned long *max_physmem_end) static int tprot(unsigned long addr) { - unsigned long reg1, reg2; int cc, exception; - psw_t old; exception = 1; - asm volatile( - " mvc 0(16,%[psw_old]),0(%[psw_pgm])\n" - " epsw %[reg1],%[reg2]\n" - " st %[reg1],0(%[psw_pgm])\n" - " st %[reg2],4(%[psw_pgm])\n" - " larl %[reg1],1f\n" - " stg %[reg1],8(%[psw_pgm])\n" + asm_inline volatile( " tprot 0(%[addr]),0\n" - " lhi %[exc],0\n" - "1: mvc 0(16,%[psw_pgm]),0(%[psw_old])\n" + "0: lhi %[exc],0\n" + "1:\n" CC_IPM(cc) - : CC_OUT(cc, cc), - [exc] "+d" (exception), - [reg1] "=&d" (reg1), - [reg2] "=&a" (reg2), - "=Q" (get_lowcore()->program_new_psw.addr), - "=Q" (old) - : [psw_old] "a" (&old), - [psw_pgm] "a" (&get_lowcore()->program_new_psw), - [addr] "a" (addr) + EX_TABLE(0b, 1b) + : CC_OUT(cc, cc), [exc] "+d" (exception) + : [addr] "a" (addr) : CC_CLOBBER_LIST("memory")); cc = exception ? -EFAULT : CC_TRANSFORM(cc); return cc; @@ -207,11 +166,16 @@ unsigned long detect_max_physmem_end(void) max_physmem_end = search_mem_end(); physmem_info.info_source = MEM_DETECT_BIN_SEARCH; } + boot_debug("Max physical memory: 0x%016lx (info source: %s)\n", max_physmem_end, + get_physmem_info_source()); return max_physmem_end; } void detect_physmem_online_ranges(unsigned long max_physmem_end) { + unsigned long start, end; + int i; + if (!sclp_early_read_storage_info()) { physmem_info.info_source = MEM_DETECT_SCLP_STOR_INFO; } else if (physmem_info.info_source == MEM_DETECT_DIAG500_STOR_LIMIT) { @@ -226,12 +190,16 @@ void detect_physmem_online_ranges(unsigned long max_physmem_end) } else if (max_physmem_end) { add_physmem_online_range(0, max_physmem_end); } + boot_debug("Online memory ranges (info source: %s):\n", get_physmem_info_source()); + for_each_physmem_online_range(i, &start, &end) + boot_debug(" online [%d]: 0x%016lx-0x%016lx\n", i, start, end); } void physmem_set_usable_limit(unsigned long limit) { physmem_info.usable = limit; physmem_alloc_pos = limit; + boot_debug("Usable memory limit: 0x%016lx\n", limit); } static void die_oom(unsigned long size, unsigned long align, unsigned long min, unsigned long max) @@ -241,38 +209,47 @@ static void die_oom(unsigned long size, unsigned long align, unsigned long min, enum reserved_range_type t; int i; - boot_printk("Linux version %s\n", kernel_version); + boot_emerg("Linux version %s\n", kernel_version); if (!is_prot_virt_guest() && early_command_line[0]) - boot_printk("Kernel command line: %s\n", early_command_line); - boot_printk("Out of memory allocating %lx bytes %lx aligned in range %lx:%lx\n", - size, align, min, max); - boot_printk("Reserved memory ranges:\n"); + boot_emerg("Kernel command line: %s\n", early_command_line); + boot_emerg("Out of memory allocating %lu bytes 0x%lx aligned in range %lx:%lx\n", + size, align, min, max); + boot_emerg("Reserved memory ranges:\n"); for_each_physmem_reserved_range(t, range, &start, &end) { - boot_printk("%016lx %016lx %s\n", start, end, get_rr_type_name(t)); + boot_emerg("%016lx %016lx %s\n", start, end, get_rr_type_name(t)); total_reserved_mem += end - start; } - boot_printk("Usable online memory ranges (info source: %s [%x]):\n", - get_physmem_info_source(), physmem_info.info_source); + boot_emerg("Usable online memory ranges (info source: %s [%d]):\n", + get_physmem_info_source(), physmem_info.info_source); for_each_physmem_usable_range(i, &start, &end) { - boot_printk("%016lx %016lx\n", start, end); + boot_emerg("%016lx %016lx\n", start, end); total_mem += end - start; } - boot_printk("Usable online memory total: %lx Reserved: %lx Free: %lx\n", - total_mem, total_reserved_mem, - total_mem > total_reserved_mem ? total_mem - total_reserved_mem : 0); + boot_emerg("Usable online memory total: %lu Reserved: %lu Free: %lu\n", + total_mem, total_reserved_mem, + total_mem > total_reserved_mem ? total_mem - total_reserved_mem : 0); print_stacktrace(current_frame_address()); - boot_printk("\n\n -- System halted\n"); + boot_emerg(" -- System halted\n"); disabled_wait(); } -void physmem_reserve(enum reserved_range_type type, unsigned long addr, unsigned long size) +static void _physmem_reserve(enum reserved_range_type type, unsigned long addr, unsigned long size) { physmem_info.reserved[type].start = addr; physmem_info.reserved[type].end = addr + size; } +void physmem_reserve(enum reserved_range_type type, unsigned long addr, unsigned long size) +{ + _physmem_reserve(type, addr, size); + boot_debug("%-14s 0x%016lx-0x%016lx %s\n", "Reserve:", addr, addr + size, + get_rr_type_name(type)); +} + void physmem_free(enum reserved_range_type type) { + boot_debug("%-14s 0x%016lx-0x%016lx %s\n", "Free:", physmem_info.reserved[type].start, + physmem_info.reserved[type].end, get_rr_type_name(type)); physmem_info.reserved[type].start = 0; physmem_info.reserved[type].end = 0; } @@ -339,41 +316,73 @@ unsigned long physmem_alloc_range(enum reserved_range_type type, unsigned long s max = min(max, physmem_alloc_pos); addr = __physmem_alloc_range(size, align, min, max, 0, NULL, die_on_oom); if (addr) - physmem_reserve(type, addr, size); + _physmem_reserve(type, addr, size); + boot_debug("%-14s 0x%016lx-0x%016lx %s\n", "Alloc range:", addr, addr + size, + get_rr_type_name(type)); return addr; } -unsigned long physmem_alloc_top_down(enum reserved_range_type type, unsigned long size, - unsigned long align) +unsigned long physmem_alloc(enum reserved_range_type type, unsigned long size, + unsigned long align, bool die_on_oom) { struct reserved_range *range = &physmem_info.reserved[type]; - struct reserved_range *new_range; + struct reserved_range *new_range = NULL; unsigned int ranges_left; unsigned long addr; addr = __physmem_alloc_range(size, align, 0, physmem_alloc_pos, physmem_alloc_ranges, - &ranges_left, true); + &ranges_left, die_on_oom); + if (!addr) + return 0; /* if not a consecutive allocation of the same type or first allocation */ if (range->start != addr + size) { if (range->end) { - physmem_alloc_pos = __physmem_alloc_range( - sizeof(struct reserved_range), 0, 0, physmem_alloc_pos, - physmem_alloc_ranges, &ranges_left, true); - new_range = (struct reserved_range *)physmem_alloc_pos; + addr = __physmem_alloc_range(sizeof(struct reserved_range), 0, 0, + physmem_alloc_pos, physmem_alloc_ranges, + &ranges_left, true); + new_range = (struct reserved_range *)addr; + addr = __physmem_alloc_range(size, align, 0, addr, ranges_left, + &ranges_left, die_on_oom); + if (!addr) + return 0; *new_range = *range; range->chain = new_range; - addr = __physmem_alloc_range(size, align, 0, physmem_alloc_pos, - ranges_left, &ranges_left, true); } range->end = addr + size; } + if (type != RR_VMEM) { + boot_debug("%-14s 0x%016lx-0x%016lx %-20s align 0x%lx split %d\n", "Alloc topdown:", + addr, addr + size, get_rr_type_name(type), align, !!new_range); + } range->start = addr; physmem_alloc_pos = addr; physmem_alloc_ranges = ranges_left; return addr; } +unsigned long physmem_alloc_or_die(enum reserved_range_type type, unsigned long size, + unsigned long align) +{ + return physmem_alloc(type, size, align, true); +} + unsigned long get_physmem_alloc_pos(void) { return physmem_alloc_pos; } + +void dump_physmem_reserved(void) +{ + struct reserved_range *range; + enum reserved_range_type t; + unsigned long start, end; + + boot_debug("Reserved memory ranges:\n"); + for_each_physmem_reserved_range(t, range, &start, &end) { + if (end) { + boot_debug("%-14s 0x%016lx-0x%016lx @%012lx chain %012lx\n", + get_rr_type_name(t), start, end, (unsigned long)range, + (unsigned long)range->chain); + } + } +} diff --git a/arch/s390/boot/printk.c b/arch/s390/boot/printk.c index 35f18f2b936e..4bb6bc95704e 100644 --- a/arch/s390/boot/printk.c +++ b/arch/s390/boot/printk.c @@ -5,21 +5,113 @@ #include <linux/ctype.h> #include <asm/stacktrace.h> #include <asm/boot_data.h> +#include <asm/sections.h> #include <asm/lowcore.h> #include <asm/setup.h> +#include <asm/timex.h> #include <asm/sclp.h> #include <asm/uv.h> #include "boot.h" +int boot_console_loglevel = CONFIG_CONSOLE_LOGLEVEL_DEFAULT; +bool boot_ignore_loglevel; +char __bootdata(boot_rb)[PAGE_SIZE * 2]; +bool __bootdata(boot_earlyprintk); +size_t __bootdata(boot_rb_off); +char __bootdata(bootdebug_filter)[128]; +bool __bootdata(bootdebug); + +static void boot_rb_add(const char *str, size_t len) +{ + /* leave double '\0' in the end */ + size_t avail = sizeof(boot_rb) - boot_rb_off - 1; + + /* store strings separated by '\0' */ + if (len + 1 > avail) + boot_rb_off = 0; + avail = sizeof(boot_rb) - boot_rb_off - 1; + strscpy(boot_rb + boot_rb_off, str, avail); + boot_rb_off += len + 1; +} + +static void print_rb_entry(const char *str) +{ + sclp_early_printk(printk_skip_level(str)); +} + +static bool debug_messages_printed(void) +{ + return boot_earlyprintk && (boot_ignore_loglevel || boot_console_loglevel > LOGLEVEL_DEBUG); +} + +void boot_rb_dump(void) +{ + if (debug_messages_printed()) + return; + sclp_early_printk("Boot messages ring buffer:\n"); + boot_rb_foreach(print_rb_entry); +} + const char hex_asc[] = "0123456789abcdef"; static char *as_hex(char *dst, unsigned long val, int pad) { - char *p, *end = p = dst + max(pad, (int)__fls(val | 1) / 4 + 1); + char *p = dst + max(pad, (int)__fls(val | 1) / 4 + 1); - for (*p-- = 0; p >= dst; val >>= 4) + for (*p-- = '\0'; p >= dst; val >>= 4) *p-- = hex_asc[val & 0x0f]; - return end; + return dst; +} + +#define MAX_NUMLEN 21 +static char *as_dec(char *buf, unsigned long val, bool is_signed) +{ + bool negative = false; + char *p = buf + MAX_NUMLEN; + + if (is_signed && (long)val < 0) { + val = (val == LONG_MIN ? LONG_MIN : -(long)val); + negative = true; + } + + *--p = '\0'; + do { + *--p = '0' + (val % 10); + val /= 10; + } while (val); + + if (negative) + *--p = '-'; + return p; +} + +static ssize_t strpad(char *dst, size_t dst_size, const char *src, + int _pad, bool zero_pad, bool decimal) +{ + ssize_t len = strlen(src), pad = _pad; + char *p = dst; + + if (max(len, abs(pad)) >= dst_size) + return -E2BIG; + + if (pad > len) { + if (decimal && zero_pad && *src == '-') { + *p++ = '-'; + src++; + len--; + pad--; + } + memset(p, zero_pad ? '0' : ' ', pad - len); + p += pad - len; + } + memcpy(p, src, len); + p += len; + if (pad < 0 && -pad > len) { + memset(p, ' ', -pad - len); + p += -pad - len; + } + *p = '\0'; + return p - dst; } static char *symstart(char *p) @@ -58,35 +150,93 @@ static noinline char *findsym(unsigned long ip, unsigned short *off, unsigned sh return NULL; } -static noinline char *strsym(void *ip) +#define MAX_SYMLEN 64 +static noinline char *strsym(char *buf, void *ip) { - static char buf[64]; unsigned short off; unsigned short len; char *p; p = findsym((unsigned long)ip, &off, &len); if (p) { - strncpy(buf, p, sizeof(buf)); + strscpy(buf, p, MAX_SYMLEN); /* reserve 15 bytes for offset/len in symbol+0x1234/0x1234 */ - p = buf + strnlen(buf, sizeof(buf) - 15); - strcpy(p, "+0x"); - p = as_hex(p + 3, off, 0); - strcpy(p, "/0x"); - as_hex(p + 3, len, 0); + p = buf + strnlen(buf, MAX_SYMLEN - 15); + strscpy(p, "+0x", MAX_SYMLEN - (p - buf)); + as_hex(p + 3, off, 0); + strcat(p, "/0x"); + as_hex(p + strlen(p), len, 0); } else { as_hex(buf, (unsigned long)ip, 16); } return buf; } -void boot_printk(const char *fmt, ...) +static inline int printk_loglevel(const char *buf) +{ + if (buf[0] == KERN_SOH_ASCII && buf[1]) { + switch (buf[1]) { + case '0' ... '7': + return buf[1] - '0'; + } + } + return MESSAGE_LOGLEVEL_DEFAULT; +} + +static void boot_console_earlyprintk(const char *buf) +{ + int level = printk_loglevel(buf); + + /* always print emergency messages */ + if (level > LOGLEVEL_EMERG && !boot_earlyprintk) + return; + buf = printk_skip_level(buf); + /* print debug messages only when bootdebug is enabled */ + if (level == LOGLEVEL_DEBUG && (!bootdebug || !bootdebug_filter_match(skip_timestamp(buf)))) + return; + if (boot_ignore_loglevel || level < boot_console_loglevel) + sclp_early_printk(buf); +} + +static char *add_timestamp(char *buf) +{ +#ifdef CONFIG_PRINTK_TIME + unsigned long ns = tod_to_ns(__get_tod_clock_monotonic()); + char ts[MAX_NUMLEN]; + + *buf++ = '['; + buf += strpad(buf, MAX_NUMLEN, as_dec(ts, ns / NSEC_PER_SEC, 0), 5, 0, 0); + *buf++ = '.'; + buf += strpad(buf, MAX_NUMLEN, as_dec(ts, (ns % NSEC_PER_SEC) / NSEC_PER_USEC, 0), 6, 1, 0); + *buf++ = ']'; + *buf++ = ' '; +#endif + return buf; +} + +#define va_arg_len_type(args, lenmod, typemod) \ + ((lenmod == 'l') ? va_arg(args, typemod long) : \ + (lenmod == 'h') ? (typemod short)va_arg(args, typemod int) : \ + (lenmod == 'H') ? (typemod char)va_arg(args, typemod int) : \ + (lenmod == 'z') ? va_arg(args, typemod long) : \ + va_arg(args, typemod int)) + +int boot_printk(const char *fmt, ...) { char buf[1024] = { 0 }; char *end = buf + sizeof(buf) - 1; /* make sure buf is 0 terminated */ - unsigned long pad; - char *p = buf; + bool zero_pad, decimal; + char *strval, *p = buf; + char valbuf[MAX(MAX_SYMLEN, MAX_NUMLEN)]; va_list args; + char lenmod; + ssize_t len; + int pad; + + *p++ = KERN_SOH_ASCII; + *p++ = printk_get_level(fmt) ?: '0' + MESSAGE_LOGLEVEL_DEFAULT; + p = add_timestamp(p); + fmt = printk_skip_level(fmt); va_start(args, fmt); for (; p < end && *fmt; fmt++) { @@ -94,31 +244,56 @@ void boot_printk(const char *fmt, ...) *p++ = *fmt; continue; } - pad = isdigit(*++fmt) ? simple_strtol(fmt, (char **)&fmt, 10) : 0; + if (*++fmt == '%') { + *p++ = '%'; + continue; + } + zero_pad = (*fmt == '0'); + pad = simple_strtol(fmt, (char **)&fmt, 10); + lenmod = (*fmt == 'h' || *fmt == 'l' || *fmt == 'z') ? *fmt++ : 0; + if (lenmod == 'h' && *fmt == 'h') { + lenmod = 'H'; + fmt++; + } + decimal = false; switch (*fmt) { case 's': - p = buf + strlcat(buf, va_arg(args, char *), sizeof(buf)); + if (lenmod) + goto out; + strval = va_arg(args, char *); + zero_pad = false; break; case 'p': - if (*++fmt != 'S') + if (*++fmt != 'S' || lenmod) goto out; - p = buf + strlcat(buf, strsym(va_arg(args, void *)), sizeof(buf)); + strval = strsym(valbuf, va_arg(args, void *)); + zero_pad = false; break; - case 'l': - if (*++fmt != 'x' || end - p <= max(sizeof(long) * 2, pad)) - goto out; - p = as_hex(p, va_arg(args, unsigned long), pad); + case 'd': + case 'i': + strval = as_dec(valbuf, va_arg_len_type(args, lenmod, signed), 1); + decimal = true; + break; + case 'u': + strval = as_dec(valbuf, va_arg_len_type(args, lenmod, unsigned), 0); break; case 'x': - if (end - p <= max(sizeof(int) * 2, pad)) - goto out; - p = as_hex(p, va_arg(args, unsigned int), pad); + strval = as_hex(valbuf, va_arg_len_type(args, lenmod, unsigned), 0); break; default: goto out; } + len = strpad(p, end - p, strval, pad, zero_pad, decimal); + if (len == -E2BIG) + break; + p += len; } out: va_end(args); - sclp_early_printk(buf); + len = strlen(buf); + if (len) { + boot_rb_add(buf, len); + boot_console_earlyprintk(buf); + } + return len; } diff --git a/arch/s390/boot/startup.c b/arch/s390/boot/startup.c index 6087d38c7235..93684a775716 100644 --- a/arch/s390/boot/startup.c +++ b/arch/s390/boot/startup.c @@ -1,13 +1,18 @@ // SPDX-License-Identifier: GPL-2.0 +#define boot_fmt(fmt) "startup: " fmt #include <linux/string.h> #include <linux/elf.h> #include <asm/page-states.h> #include <asm/boot_data.h> #include <asm/extmem.h> #include <asm/sections.h> +#include <asm/diag288.h> #include <asm/maccess.h> +#include <asm/machine.h> +#include <asm/sysinfo.h> #include <asm/cpu_mf.h> #include <asm/setup.h> +#include <asm/timex.h> #include <asm/kasan.h> #include <asm/kexec.h> #include <asm/sclp.h> @@ -30,57 +35,131 @@ unsigned long __bootdata_preserved(vmemmap_size); unsigned long __bootdata_preserved(MODULES_VADDR); unsigned long __bootdata_preserved(MODULES_END); unsigned long __bootdata_preserved(max_mappable); -int __bootdata_preserved(relocate_lowcore); +unsigned long __bootdata_preserved(page_noexec_mask); +unsigned long __bootdata_preserved(segment_noexec_mask); +unsigned long __bootdata_preserved(region_noexec_mask); +union tod_clock __bootdata_preserved(tod_clock_base); +u64 __bootdata_preserved(clock_comparator_max) = -1UL; u64 __bootdata_preserved(stfle_fac_list[16]); struct oldmem_data __bootdata_preserved(oldmem_data); -struct machine_info machine; - void error(char *x) { - boot_printk("\n\n%s\n\n -- System halted", x); + boot_emerg("%s\n", x); + boot_emerg(" -- System halted\n"); disabled_wait(); } +static char sysinfo_page[PAGE_SIZE] __aligned(PAGE_SIZE); + +static void detect_machine_type(void) +{ + struct sysinfo_3_2_2 *vmms = (struct sysinfo_3_2_2 *)&sysinfo_page; + + /* Check current-configuration-level */ + if (stsi(NULL, 0, 0, 0) <= 2) { + set_machine_feature(MFEATURE_LPAR); + return; + } + /* Get virtual-machine cpu information. */ + if (stsi(vmms, 3, 2, 2) || !vmms->count) + return; + /* Detect known hypervisors */ + if (!memcmp(vmms->vm[0].cpi, "\xd2\xe5\xd4", 3)) + set_machine_feature(MFEATURE_KVM); + else if (!memcmp(vmms->vm[0].cpi, "\xa9\x61\xe5\xd4", 4)) + set_machine_feature(MFEATURE_VM); +} + +static void detect_diag288(void) +{ + /* "BEGIN" in EBCDIC character set */ + static const char cmd[] = "\xc2\xc5\xc7\xc9\xd5"; + unsigned long action, len; + + action = machine_is_vm() ? (unsigned long)cmd : LPARWDT_RESTART; + len = machine_is_vm() ? sizeof(cmd) : 0; + if (__diag288(WDT_FUNC_INIT, MIN_INTERVAL, action, len)) + return; + __diag288(WDT_FUNC_CANCEL, 0, 0, 0); + set_machine_feature(MFEATURE_DIAG288); +} + +static void detect_diag9c(void) +{ + unsigned int cpu; + int rc = 1; + + cpu = stap(); + asm_inline volatile( + " diag %[cpu],%%r0,0x9c\n" + "0: lhi %[rc],0\n" + "1:\n" + EX_TABLE(0b, 1b) + : [rc] "+d" (rc) + : [cpu] "d" (cpu) + : "cc", "memory"); + if (!rc) + set_machine_feature(MFEATURE_DIAG9C); +} + +static void reset_tod_clock(void) +{ + union tod_clock clk; + + if (store_tod_clock_ext_cc(&clk) == 0) + return; + /* TOD clock not running. Set the clock to Unix Epoch. */ + if (set_tod_clock(TOD_UNIX_EPOCH) || store_tod_clock_ext_cc(&clk)) + disabled_wait(); + memset(&tod_clock_base, 0, sizeof(tod_clock_base)); + tod_clock_base.tod = TOD_UNIX_EPOCH; + get_lowcore()->last_update_clock = TOD_UNIX_EPOCH; +} + static void detect_facilities(void) { - if (test_facility(8)) { - machine.has_edat1 = 1; + if (cpu_has_edat1()) local_ctl_set_bit(0, CR0_EDAT_BIT); + page_noexec_mask = -1UL; + segment_noexec_mask = -1UL; + region_noexec_mask = -1UL; + if (!cpu_has_nx()) { + page_noexec_mask &= ~_PAGE_NOEXEC; + segment_noexec_mask &= ~_SEGMENT_ENTRY_NOEXEC; + region_noexec_mask &= ~_REGION_ENTRY_NOEXEC; + } + if (IS_ENABLED(CONFIG_PCI) && test_facility(153)) + set_machine_feature(MFEATURE_PCI_MIO); + reset_tod_clock(); + if (test_facility(139) && (tod_clock_base.tod >> 63)) { + /* Enable signed clock comparator comparisons */ + set_machine_feature(MFEATURE_SCC); + clock_comparator_max = -1UL >> 1; + local_ctl_set_bit(0, CR0_CLOCK_COMPARATOR_SIGN_BIT); } - if (test_facility(78)) - machine.has_edat2 = 1; - if (test_facility(130)) - machine.has_nx = 1; + if (test_facility(50) && test_facility(73)) { + set_machine_feature(MFEATURE_TX); + local_ctl_set_bit(0, CR0_TRANSACTIONAL_EXECUTION_BIT); + } + if (cpu_has_vx()) + local_ctl_set_bit(0, CR0_VECTOR_BIT); } static int cmma_test_essa(void) { - unsigned long reg1, reg2, tmp = 0; + unsigned long tmp = 0; int rc = 1; - psw_t old; /* Test ESSA_GET_STATE */ - asm volatile( - " mvc 0(16,%[psw_old]),0(%[psw_pgm])\n" - " epsw %[reg1],%[reg2]\n" - " st %[reg1],0(%[psw_pgm])\n" - " st %[reg2],4(%[psw_pgm])\n" - " larl %[reg1],1f\n" - " stg %[reg1],8(%[psw_pgm])\n" + asm_inline volatile( " .insn rrf,0xb9ab0000,%[tmp],%[tmp],%[cmd],0\n" - " la %[rc],0\n" - "1: mvc 0(16,%[psw_pgm]),0(%[psw_old])\n" - : [reg1] "=&d" (reg1), - [reg2] "=&a" (reg2), - [rc] "+&d" (rc), - [tmp] "=&d" (tmp), - "+Q" (get_lowcore()->program_new_psw), - "=Q" (old) - : [psw_old] "a" (&old), - [psw_pgm] "a" (&get_lowcore()->program_new_psw), - [cmd] "i" (ESSA_GET_STATE) + "0: lhi %[rc],0\n" + "1:\n" + EX_TABLE(0b, 1b) + : [rc] "+d" (rc), [tmp] "+d" (tmp) + : [cmd] "i" (ESSA_GET_STATE) : "cc", "memory"); return rc; } @@ -134,7 +213,7 @@ static void rescue_initrd(unsigned long min, unsigned long max) return; old_addr = addr; physmem_free(RR_INITRD); - addr = physmem_alloc_top_down(RR_INITRD, size, 0); + addr = physmem_alloc_or_die(RR_INITRD, size, 0); memmove((void *)addr, (void *)old_addr, size); } @@ -213,12 +292,16 @@ static void setup_ident_map_size(unsigned long max_physmem_end) if (oldmem_data.start) { __kaslr_enabled = 0; ident_map_size = min(ident_map_size, oldmem_data.size); + boot_debug("kdump memory limit: 0x%016lx\n", oldmem_data.size); } else if (ipl_block_valid && is_ipl_block_dump()) { __kaslr_enabled = 0; - if (!sclp_early_get_hsa_size(&hsa_size) && hsa_size) + if (!sclp_early_get_hsa_size(&hsa_size) && hsa_size) { ident_map_size = min(ident_map_size, hsa_size); + boot_debug("Stand-alone dump limit: 0x%016lx\n", hsa_size); + } } #endif + boot_debug("Identity map size: 0x%016lx\n", ident_map_size); } #define FIXMAP_SIZE round_up(MEMCPY_REAL_SIZE + ABS_LOWCORE_MAP_SIZE, sizeof(struct lowcore)) @@ -258,6 +341,7 @@ static unsigned long setup_kernel_memory_layout(unsigned long kernel_size) BUILD_BUG_ON(!IS_ALIGNED(__NO_KASLR_START_KERNEL, THREAD_SIZE)); BUILD_BUG_ON(__NO_KASLR_END_KERNEL > _REGION1_SIZE); vsize = get_vmem_size(ident_map_size, vmemmap_size, vmalloc_size, _REGION3_SIZE); + boot_debug("vmem size estimated: 0x%016lx\n", vsize); if (IS_ENABLED(CONFIG_KASAN) || __NO_KASLR_END_KERNEL > _REGION2_SIZE || (vsize > _REGION2_SIZE && kaslr_enabled())) { asce_limit = _REGION1_SIZE; @@ -281,8 +365,10 @@ static unsigned long setup_kernel_memory_layout(unsigned long kernel_size) * otherwise asce_limit and rte_size would have been adjusted. */ vmax = adjust_to_uv_max(asce_limit); + boot_debug("%d level paging 0x%016lx vmax\n", vmax == _REGION1_SIZE ? 4 : 3, vmax); #ifdef CONFIG_KASAN BUILD_BUG_ON(__NO_KASLR_END_KERNEL > KASAN_SHADOW_START); + boot_debug("KASAN shadow area: 0x%016lx-0x%016lx\n", KASAN_SHADOW_START, KASAN_SHADOW_END); /* force vmalloc and modules below kasan shadow */ vmax = min(vmax, KASAN_SHADOW_START); #endif @@ -296,19 +382,27 @@ static unsigned long setup_kernel_memory_layout(unsigned long kernel_size) pos = 0; kernel_end = vmax - pos * THREAD_SIZE; kernel_start = round_down(kernel_end - kernel_size, THREAD_SIZE); + boot_debug("Randomization range: 0x%016lx-0x%016lx\n", vmax - kaslr_len, vmax); + boot_debug("kernel image: 0x%016lx-0x%016lx (kaslr)\n", kernel_start, + kernel_start + kernel_size); } else if (vmax < __NO_KASLR_END_KERNEL || vsize > __NO_KASLR_END_KERNEL) { kernel_start = round_down(vmax - kernel_size, THREAD_SIZE); - boot_printk("The kernel base address is forced to %lx\n", kernel_start); + boot_debug("kernel image: 0x%016lx-0x%016lx (constrained)\n", kernel_start, + kernel_start + kernel_size); } else { kernel_start = __NO_KASLR_START_KERNEL; + boot_debug("kernel image: 0x%016lx-0x%016lx (nokaslr)\n", kernel_start, + kernel_start + kernel_size); } __kaslr_offset = kernel_start; + boot_debug("__kaslr_offset: 0x%016lx\n", __kaslr_offset); MODULES_END = round_down(kernel_start, _SEGMENT_SIZE); MODULES_VADDR = MODULES_END - MODULES_LEN; VMALLOC_END = MODULES_VADDR; if (IS_ENABLED(CONFIG_KMSAN)) VMALLOC_END -= MODULES_LEN * 2; + boot_debug("modules area: 0x%016lx-0x%016lx\n", MODULES_VADDR, MODULES_END); /* allow vmalloc area to occupy up to about 1/2 of the rest virtual space left */ vsize = (VMALLOC_END - FIXMAP_SIZE) / 2; @@ -320,10 +414,15 @@ static unsigned long setup_kernel_memory_layout(unsigned long kernel_size) VMALLOC_END -= vmalloc_size * 2; } VMALLOC_START = VMALLOC_END - vmalloc_size; + boot_debug("vmalloc area: 0x%016lx-0x%016lx\n", VMALLOC_START, VMALLOC_END); __memcpy_real_area = round_down(VMALLOC_START - MEMCPY_REAL_SIZE, PAGE_SIZE); + boot_debug("memcpy real area: 0x%016lx-0x%016lx\n", __memcpy_real_area, + __memcpy_real_area + MEMCPY_REAL_SIZE); __abs_lowcore = round_down(__memcpy_real_area - ABS_LOWCORE_MAP_SIZE, sizeof(struct lowcore)); + boot_debug("abs lowcore: 0x%016lx-0x%016lx\n", __abs_lowcore, + __abs_lowcore + ABS_LOWCORE_MAP_SIZE); /* split remaining virtual space between 1:1 mapping & vmemmap array */ pages = __abs_lowcore / (PAGE_SIZE + sizeof(struct page)); @@ -343,8 +442,11 @@ static unsigned long setup_kernel_memory_layout(unsigned long kernel_size) BUILD_BUG_ON(MAX_DCSS_ADDR > (1UL << MAX_PHYSMEM_BITS)); max_mappable = max(ident_map_size, MAX_DCSS_ADDR); max_mappable = min(max_mappable, vmemmap_start); - if (IS_ENABLED(CONFIG_RANDOMIZE_IDENTITY_BASE)) - __identity_base = round_down(vmemmap_start - max_mappable, rte_size); +#ifdef CONFIG_RANDOMIZE_IDENTITY_BASE + __identity_base = round_down(vmemmap_start - max_mappable, rte_size); +#endif + boot_debug("identity map: 0x%016lx-0x%016lx\n", __identity_base, + __identity_base + ident_map_size); return asce_limit; } @@ -403,6 +505,10 @@ void startup_kernel(void) psw_t psw; setup_lpp(); + store_ipl_parmblock(); + uv_query_info(); + setup_boot_command_line(); + parse_boot_command_line(); /* * Non-randomized kernel physical start address must be _SEGMENT_SIZE @@ -422,13 +528,14 @@ void startup_kernel(void) oldmem_data.start = parmarea.oldmem_base; oldmem_data.size = parmarea.oldmem_size; - store_ipl_parmblock(); read_ipl_report(); - uv_query_info(); sclp_early_read_info(); - setup_boot_command_line(); - parse_boot_command_line(); + sclp_early_detect_machine_features(); detect_facilities(); + detect_diag9c(); + detect_machine_type(); + /* detect_diag288() needs machine type */ + detect_diag288(); cmma_init(); sanitize_prot_virt_host(); max_physmem_end = detect_max_physmem_end(); @@ -517,6 +624,7 @@ void startup_kernel(void) __kaslr_offset, __kaslr_offset_phys); kaslr_adjust_got(__kaslr_offset); setup_vmem(__kaslr_offset, __kaslr_offset + kernel_size, asce_limit); + dump_physmem_reserved(); copy_bootdata(); __apply_alternatives((struct alt_instr *)_vmlinux_info.alt_instructions, (struct alt_instr *)_vmlinux_info.alt_instructions_end, @@ -533,5 +641,6 @@ void startup_kernel(void) */ psw.addr = __kaslr_offset + vmlinux.entry; psw.mask = PSW_KERNEL_BITS; - __load_psw(psw); + boot_debug("Starting kernel at: 0x%016lx\n", psw.addr); + jump_to_kernel(&psw); } diff --git a/arch/s390/boot/string.c b/arch/s390/boot/string.c index f6b9b1df48a8..bd68161434a6 100644 --- a/arch/s390/boot/string.c +++ b/arch/s390/boot/string.c @@ -29,6 +29,18 @@ int strncmp(const char *cs, const char *ct, size_t count) return 0; } +ssize_t sized_strscpy(char *dst, const char *src, size_t count) +{ + size_t len; + + if (count == 0) + return -E2BIG; + len = strnlen(src, count - 1); + memcpy(dst, src, len); + dst[len] = '\0'; + return src[len] ? -E2BIG : len; +} + void *memset64(uint64_t *s, uint64_t v, size_t count) { uint64_t *xs = s; diff --git a/arch/s390/boot/trampoline.S b/arch/s390/boot/trampoline.S new file mode 100644 index 000000000000..1cb5adf005ea --- /dev/null +++ b/arch/s390/boot/trampoline.S @@ -0,0 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#include <linux/linkage.h> + +# This function is identical to __load_psw(), but the lx-symbols GDB command +# puts a breakpoint on it, so it needs to be kept separate. +SYM_CODE_START(jump_to_kernel) + lpswe 0(%r2) +SYM_CODE_END(jump_to_kernel) diff --git a/arch/s390/boot/vmem.c b/arch/s390/boot/vmem.c index 3fa28db2fe59..cea3de4dce8c 100644 --- a/arch/s390/boot/vmem.c +++ b/arch/s390/boot/vmem.c @@ -1,4 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 +#define boot_fmt(fmt) "vmem: " fmt +#include <linux/cpufeature.h> #include <linux/sched/task.h> #include <linux/pgtable.h> #include <linux/kasan.h> @@ -9,10 +11,12 @@ #include <asm/ctlreg.h> #include <asm/physmem_info.h> #include <asm/maccess.h> +#include <asm/machine.h> #include <asm/abs_lowcore.h> #include "decompressor.h" #include "boot.h" +#define INVALID_PHYS_ADDR (~(phys_addr_t)0) struct ctlreg __bootdata_preserved(s390_invalid_asce); #ifdef CONFIG_PROC_FS @@ -31,12 +35,42 @@ enum populate_mode { POPULATE_IDENTITY, POPULATE_KERNEL, #ifdef CONFIG_KASAN + /* KASAN modes should be last and grouped together, see is_kasan_populate_mode() */ POPULATE_KASAN_MAP_SHADOW, POPULATE_KASAN_ZERO_SHADOW, POPULATE_KASAN_SHALLOW #endif }; +#define POPULATE_MODE_NAME(t) case POPULATE_ ## t: return #t +static inline const char *get_populate_mode_name(enum populate_mode t) +{ + switch (t) { + POPULATE_MODE_NAME(NONE); + POPULATE_MODE_NAME(DIRECT); + POPULATE_MODE_NAME(LOWCORE); + POPULATE_MODE_NAME(ABS_LOWCORE); + POPULATE_MODE_NAME(IDENTITY); + POPULATE_MODE_NAME(KERNEL); +#ifdef CONFIG_KASAN + POPULATE_MODE_NAME(KASAN_MAP_SHADOW); + POPULATE_MODE_NAME(KASAN_ZERO_SHADOW); + POPULATE_MODE_NAME(KASAN_SHALLOW); +#endif + default: + return "UNKNOWN"; + } +} + +static bool is_kasan_populate_mode(enum populate_mode mode) +{ +#ifdef CONFIG_KASAN + return mode >= POPULATE_KASAN_MAP_SHADOW; +#else + return false; +#endif +} + static void pgtable_populate(unsigned long addr, unsigned long end, enum populate_mode mode); #ifdef CONFIG_KASAN @@ -52,9 +86,12 @@ static pte_t pte_z; static inline void kasan_populate(unsigned long start, unsigned long end, enum populate_mode mode) { - start = PAGE_ALIGN_DOWN(__sha(start)); - end = PAGE_ALIGN(__sha(end)); - pgtable_populate(start, end, mode); + unsigned long sha_start = PAGE_ALIGN_DOWN(__sha(start)); + unsigned long sha_end = PAGE_ALIGN(__sha(end)); + + boot_debug("%-17s 0x%016lx-0x%016lx >> 0x%016lx-0x%016lx\n", get_populate_mode_name(mode), + start, end, sha_start, sha_end); + pgtable_populate(sha_start, sha_end, mode); } static void kasan_populate_shadow(unsigned long kernel_start, unsigned long kernel_end) @@ -63,13 +100,10 @@ static void kasan_populate_shadow(unsigned long kernel_start, unsigned long kern pud_t pud_z = __pud(__pa(kasan_early_shadow_pmd) | _REGION3_ENTRY); p4d_t p4d_z = __p4d(__pa(kasan_early_shadow_pud) | _REGION2_ENTRY); unsigned long memgap_start = 0; - unsigned long untracked_end; unsigned long start, end; int i; pte_z = __pte(__pa(kasan_early_shadow_page) | pgprot_val(PAGE_KERNEL_RO)); - if (!machine.has_nx) - pte_z = clear_pte_bit(pte_z, __pgprot(_PAGE_NOEXEC)); crst_table_init((unsigned long *)kasan_early_shadow_p4d, p4d_val(p4d_z)); crst_table_init((unsigned long *)kasan_early_shadow_pud, pud_val(pud_z)); crst_table_init((unsigned long *)kasan_early_shadow_pmd, pmd_val(pmd_z)); @@ -93,15 +127,10 @@ static void kasan_populate_shadow(unsigned long kernel_start, unsigned long kern kasan_populate(kernel_start + TEXT_OFFSET, kernel_end, POPULATE_KASAN_MAP_SHADOW); kasan_populate(0, (unsigned long)__identity_va(0), POPULATE_KASAN_ZERO_SHADOW); kasan_populate(AMODE31_START, AMODE31_END, POPULATE_KASAN_ZERO_SHADOW); - if (IS_ENABLED(CONFIG_KASAN_VMALLOC)) { - untracked_end = VMALLOC_START; - /* shallowly populate kasan shadow for vmalloc and modules */ - kasan_populate(VMALLOC_START, MODULES_END, POPULATE_KASAN_SHALLOW); - } else { - untracked_end = MODULES_VADDR; - } + /* shallowly populate kasan shadow for vmalloc and modules */ + kasan_populate(VMALLOC_START, MODULES_END, POPULATE_KASAN_SHALLOW); /* populate kasan shadow for untracked memory */ - kasan_populate((unsigned long)__identity_va(ident_map_size), untracked_end, + kasan_populate((unsigned long)__identity_va(ident_map_size), VMALLOC_START, POPULATE_KASAN_ZERO_SHADOW); kasan_populate(kernel_end, _REGION1_SIZE, POPULATE_KASAN_ZERO_SHADOW); } @@ -208,7 +237,7 @@ static void *boot_crst_alloc(unsigned long val) unsigned long size = PAGE_SIZE << CRST_ALLOC_ORDER; unsigned long *table; - table = (unsigned long *)physmem_alloc_top_down(RR_VMEM, size, size); + table = (unsigned long *)physmem_alloc_or_die(RR_VMEM, size, size); crst_table_init(table, val); __arch_set_page_dat(table, 1UL << CRST_ALLOC_ORDER); return table; @@ -224,7 +253,7 @@ static pte_t *boot_pte_alloc(void) * during POPULATE_KASAN_MAP_SHADOW when EDAT is off */ if (!pte_leftover) { - pte_leftover = (void *)physmem_alloc_top_down(RR_VMEM, PAGE_SIZE, PAGE_SIZE); + pte_leftover = (void *)physmem_alloc_or_die(RR_VMEM, PAGE_SIZE, PAGE_SIZE); pte = pte_leftover + _PAGE_TABLE_SIZE; __arch_set_page_dat(pte, 1); } else { @@ -236,11 +265,12 @@ static pte_t *boot_pte_alloc(void) return pte; } -static unsigned long _pa(unsigned long addr, unsigned long size, enum populate_mode mode) +static unsigned long resolve_pa_may_alloc(unsigned long addr, unsigned long size, + enum populate_mode mode) { switch (mode) { case POPULATE_NONE: - return -1; + return INVALID_PHYS_ADDR; case POPULATE_DIRECT: return addr; case POPULATE_LOWCORE: @@ -253,38 +283,64 @@ static unsigned long _pa(unsigned long addr, unsigned long size, enum populate_m return __identity_pa(addr); #ifdef CONFIG_KASAN case POPULATE_KASAN_MAP_SHADOW: - addr = physmem_alloc_top_down(RR_VMEM, size, size); - memset((void *)addr, 0, size); - return addr; + /* Allow to fail large page allocations, this will fall back to 1mb/4k pages */ + addr = physmem_alloc(RR_VMEM, size, size, size == PAGE_SIZE); + if (addr) { + memset((void *)addr, 0, size); + return addr; + } + return INVALID_PHYS_ADDR; #endif default: - return -1; + return INVALID_PHYS_ADDR; } } -static bool large_allowed(enum populate_mode mode) +static bool large_page_mapping_allowed(enum populate_mode mode) { - return (mode == POPULATE_DIRECT) || (mode == POPULATE_IDENTITY) || (mode == POPULATE_KERNEL); + switch (mode) { + case POPULATE_DIRECT: + case POPULATE_IDENTITY: + case POPULATE_KERNEL: +#ifdef CONFIG_KASAN + case POPULATE_KASAN_MAP_SHADOW: +#endif + return true; + default: + return false; + } } -static bool can_large_pud(pud_t *pu_dir, unsigned long addr, unsigned long end, - enum populate_mode mode) +static unsigned long try_get_large_pud_pa(pud_t *pu_dir, unsigned long addr, unsigned long end, + enum populate_mode mode) { - unsigned long size = end - addr; + unsigned long pa, size = end - addr; + + if (!cpu_has_edat2() || !large_page_mapping_allowed(mode) || + !IS_ALIGNED(addr, PUD_SIZE) || (size < PUD_SIZE)) + return INVALID_PHYS_ADDR; + + pa = resolve_pa_may_alloc(addr, size, mode); + if (!IS_ALIGNED(pa, PUD_SIZE)) + return INVALID_PHYS_ADDR; - return machine.has_edat2 && large_allowed(mode) && - IS_ALIGNED(addr, PUD_SIZE) && (size >= PUD_SIZE) && - IS_ALIGNED(_pa(addr, size, mode), PUD_SIZE); + return pa; } -static bool can_large_pmd(pmd_t *pm_dir, unsigned long addr, unsigned long end, - enum populate_mode mode) +static unsigned long try_get_large_pmd_pa(pmd_t *pm_dir, unsigned long addr, unsigned long end, + enum populate_mode mode) { - unsigned long size = end - addr; + unsigned long pa, size = end - addr; - return machine.has_edat1 && large_allowed(mode) && - IS_ALIGNED(addr, PMD_SIZE) && (size >= PMD_SIZE) && - IS_ALIGNED(_pa(addr, size, mode), PMD_SIZE); + if (!cpu_has_edat1() || !large_page_mapping_allowed(mode) || + !IS_ALIGNED(addr, PMD_SIZE) || (size < PMD_SIZE)) + return INVALID_PHYS_ADDR; + + pa = resolve_pa_may_alloc(addr, size, mode); + if (!IS_ALIGNED(pa, PMD_SIZE)) + return INVALID_PHYS_ADDR; + + return pa; } static void pgtable_pte_populate(pmd_t *pmd, unsigned long addr, unsigned long end, @@ -298,10 +354,8 @@ static void pgtable_pte_populate(pmd_t *pmd, unsigned long addr, unsigned long e if (pte_none(*pte)) { if (kasan_pte_populate_zero_shadow(pte, mode)) continue; - entry = __pte(_pa(addr, PAGE_SIZE, mode)); + entry = __pte(resolve_pa_may_alloc(addr, PAGE_SIZE, mode)); entry = set_pte_bit(entry, PAGE_KERNEL); - if (!machine.has_nx) - entry = clear_pte_bit(entry, __pgprot(_PAGE_NOEXEC)); set_pte(pte, entry); pages++; } @@ -313,7 +367,7 @@ static void pgtable_pte_populate(pmd_t *pmd, unsigned long addr, unsigned long e static void pgtable_pmd_populate(pud_t *pud, unsigned long addr, unsigned long end, enum populate_mode mode) { - unsigned long next, pages = 0; + unsigned long pa, next, pages = 0; pmd_t *pmd, entry; pte_t *pte; @@ -323,11 +377,10 @@ static void pgtable_pmd_populate(pud_t *pud, unsigned long addr, unsigned long e if (pmd_none(*pmd)) { if (kasan_pmd_populate_zero_shadow(pmd, addr, next, mode)) continue; - if (can_large_pmd(pmd, addr, next, mode)) { - entry = __pmd(_pa(addr, _SEGMENT_SIZE, mode)); + pa = try_get_large_pmd_pa(pmd, addr, next, mode); + if (pa != INVALID_PHYS_ADDR) { + entry = __pmd(pa); entry = set_pmd_bit(entry, SEGMENT_KERNEL); - if (!machine.has_nx) - entry = clear_pmd_bit(entry, __pgprot(_SEGMENT_ENTRY_NOEXEC)); set_pmd(pmd, entry); pages++; continue; @@ -346,7 +399,7 @@ static void pgtable_pmd_populate(pud_t *pud, unsigned long addr, unsigned long e static void pgtable_pud_populate(p4d_t *p4d, unsigned long addr, unsigned long end, enum populate_mode mode) { - unsigned long next, pages = 0; + unsigned long pa, next, pages = 0; pud_t *pud, entry; pmd_t *pmd; @@ -356,11 +409,10 @@ static void pgtable_pud_populate(p4d_t *p4d, unsigned long addr, unsigned long e if (pud_none(*pud)) { if (kasan_pud_populate_zero_shadow(pud, addr, next, mode)) continue; - if (can_large_pud(pud, addr, next, mode)) { - entry = __pud(_pa(addr, _REGION3_SIZE, mode)); + pa = try_get_large_pud_pa(pud, addr, next, mode); + if (pa != INVALID_PHYS_ADDR) { + entry = __pud(pa); entry = set_pud_bit(entry, REGION3_KERNEL); - if (!machine.has_nx) - entry = clear_pud_bit(entry, __pgprot(_REGION_ENTRY_NOEXEC)); set_pud(pud, entry); pages++; continue; @@ -402,6 +454,13 @@ static void pgtable_populate(unsigned long addr, unsigned long end, enum populat pgd_t *pgd; p4d_t *p4d; + if (!is_kasan_populate_mode(mode)) { + boot_debug("%-17s 0x%016lx-0x%016lx -> 0x%016lx-0x%016lx\n", + get_populate_mode_name(mode), addr, end, + resolve_pa_may_alloc(addr, 0, mode), + resolve_pa_may_alloc(end - 1, 0, mode) + 1); + } + pgd = pgd_offset(&init_mm, addr); for (; addr < end; addr = next, pgd++) { next = pgd_addr_end(addr, end); @@ -459,7 +518,7 @@ void setup_vmem(unsigned long kernel_start, unsigned long kernel_end, unsigned l __arch_set_page_dat((void *)swapper_pg_dir, 1UL << CRST_ALLOC_ORDER); __arch_set_page_dat((void *)invalid_pg_dir, 1UL << CRST_ALLOC_ORDER); - if (relocate_lowcore) + if (machine_has_relocated_lowcore()) lowcore_address = LOWCORE_ALT_ADDRESS; /* @@ -471,6 +530,9 @@ void setup_vmem(unsigned long kernel_start, unsigned long kernel_end, unsigned l lowcore_address + sizeof(struct lowcore), POPULATE_LOWCORE); for_each_physmem_usable_range(i, &start, &end) { + /* Do not map lowcore with identity mapping */ + if (!start) + start = sizeof(struct lowcore); pgtable_populate((unsigned long)__identity_va(start), (unsigned long)__identity_va(end), POPULATE_IDENTITY); diff --git a/arch/s390/boot/vmlinux.lds.S b/arch/s390/boot/vmlinux.lds.S index 66670212a361..50988022f9ea 100644 --- a/arch/s390/boot/vmlinux.lds.S +++ b/arch/s390/boot/vmlinux.lds.S @@ -40,6 +40,7 @@ SECTIONS *(.rodata.*) _erodata = . ; } + EXCEPTION_TABLE(16) .got : { *(.got) } @@ -165,7 +166,6 @@ SECTIONS /DISCARD/ : { COMMON_DISCARDS *(.eh_frame) - *(__ex_table) *(*__ksymtab*) *(___kcrctab*) } diff --git a/arch/s390/configs/debug_defconfig b/arch/s390/configs/debug_defconfig index d8d227ab82de..5e616bc988ac 100644 --- a/arch/s390/configs/debug_defconfig +++ b/arch/s390/configs/debug_defconfig @@ -5,6 +5,7 @@ CONFIG_WATCH_QUEUE=y CONFIG_AUDIT=y CONFIG_NO_HZ_IDLE=y CONFIG_HIGH_RES_TIMERS=y +CONFIG_POSIX_AUX_CLOCKS=y CONFIG_BPF_SYSCALL=y CONFIG_BPF_JIT=y CONFIG_BPF_JIT_ALWAYS_ON=y @@ -19,6 +20,7 @@ CONFIG_TASK_XACCT=y CONFIG_TASK_IO_ACCOUNTING=y CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y +CONFIG_SCHED_PROXY_EXEC=y CONFIG_NUMA_BALANCING=y CONFIG_MEMCG=y CONFIG_BLK_CGROUP=y @@ -38,11 +40,11 @@ CONFIG_USER_NS=y CONFIG_CHECKPOINT_RESTORE=y CONFIG_SCHED_AUTOGROUP=y CONFIG_EXPERT=y -# CONFIG_SYSFS_SYSCALL is not set CONFIG_PROFILING=y CONFIG_KEXEC=y CONFIG_KEXEC_FILE=y CONFIG_KEXEC_SIG=y +CONFIG_CRASH_DM_CRYPT=y CONFIG_LIVEPATCH=y CONFIG_MARCH_Z13=y CONFIG_NR_CPUS=512 @@ -92,7 +94,6 @@ CONFIG_UNIXWARE_DISKLABEL=y CONFIG_IOSCHED_BFQ=y CONFIG_BINFMT_MISC=m CONFIG_ZSWAP=y -CONFIG_ZSWAP_ZPOOL_DEFAULT_ZBUD=y CONFIG_ZSMALLOC_STAT=y CONFIG_SLAB_BUCKETS=y CONFIG_SLUB_STATS=y @@ -107,6 +108,7 @@ CONFIG_CMA_AREAS=7 CONFIG_MEM_SOFT_DIRTY=y CONFIG_DEFERRED_STRUCT_PAGE_INIT=y CONFIG_IDLE_PAGE_TRACKING=y +CONFIG_ZONE_DEVICE=y CONFIG_PERCPU_STATS=y CONFIG_GUP_TEST=y CONFIG_ANON_VMA_NAME=y @@ -225,17 +227,19 @@ CONFIG_NETFILTER_XT_TARGET_CONNMARK=m CONFIG_NETFILTER_XT_TARGET_CONNSECMARK=m CONFIG_NETFILTER_XT_TARGET_CT=m CONFIG_NETFILTER_XT_TARGET_DSCP=m +CONFIG_NETFILTER_XT_TARGET_HL=m CONFIG_NETFILTER_XT_TARGET_HMARK=m CONFIG_NETFILTER_XT_TARGET_IDLETIMER=m CONFIG_NETFILTER_XT_TARGET_LOG=m CONFIG_NETFILTER_XT_TARGET_MARK=m +CONFIG_NETFILTER_XT_NAT=m CONFIG_NETFILTER_XT_TARGET_NETMAP=m CONFIG_NETFILTER_XT_TARGET_NFLOG=m CONFIG_NETFILTER_XT_TARGET_NFQUEUE=m CONFIG_NETFILTER_XT_TARGET_REDIRECT=m +CONFIG_NETFILTER_XT_TARGET_MASQUERADE=m CONFIG_NETFILTER_XT_TARGET_TEE=m CONFIG_NETFILTER_XT_TARGET_TPROXY=m -CONFIG_NETFILTER_XT_TARGET_TRACE=m CONFIG_NETFILTER_XT_TARGET_SECMARK=m CONFIG_NETFILTER_XT_TARGET_TCPMSS=m CONFIG_NETFILTER_XT_TARGET_TCPOPTSTRIP=m @@ -321,16 +325,8 @@ CONFIG_IP_NF_MATCH_AH=m CONFIG_IP_NF_MATCH_ECN=m CONFIG_IP_NF_MATCH_RPFILTER=m CONFIG_IP_NF_MATCH_TTL=m -CONFIG_IP_NF_FILTER=m CONFIG_IP_NF_TARGET_REJECT=m -CONFIG_IP_NF_NAT=m -CONFIG_IP_NF_TARGET_MASQUERADE=m -CONFIG_IP_NF_MANGLE=m CONFIG_IP_NF_TARGET_ECN=m -CONFIG_IP_NF_TARGET_TTL=m -CONFIG_IP_NF_RAW=m -CONFIG_IP_NF_SECURITY=m -CONFIG_IP_NF_ARPFILTER=m CONFIG_IP_NF_ARP_MANGLE=m CONFIG_NFT_FIB_IPV6=m CONFIG_IP6_NF_IPTABLES=m @@ -343,15 +339,9 @@ CONFIG_IP6_NF_MATCH_IPV6HEADER=m CONFIG_IP6_NF_MATCH_MH=m CONFIG_IP6_NF_MATCH_RPFILTER=m CONFIG_IP6_NF_MATCH_RT=m -CONFIG_IP6_NF_TARGET_HL=m -CONFIG_IP6_NF_FILTER=m CONFIG_IP6_NF_TARGET_REJECT=m -CONFIG_IP6_NF_MANGLE=m -CONFIG_IP6_NF_RAW=m -CONFIG_IP6_NF_SECURITY=m -CONFIG_IP6_NF_NAT=m -CONFIG_IP6_NF_TARGET_MASQUERADE=m CONFIG_NF_TABLES_BRIDGE=m +CONFIG_IP_SCTP=m CONFIG_RDS=m CONFIG_RDS_RDMA=m CONFIG_RDS_TCP=m @@ -386,6 +376,7 @@ CONFIG_NET_SCH_FQ_CODEL=m CONFIG_NET_SCH_INGRESS=m CONFIG_NET_SCH_PLUG=m CONFIG_NET_SCH_ETS=m +CONFIG_NET_SCH_DUALPI2=m CONFIG_NET_CLS_BASIC=m CONFIG_NET_CLS_ROUTE4=m CONFIG_NET_CLS_FW=m @@ -395,6 +386,9 @@ CONFIG_CLS_U32_MARK=y CONFIG_NET_CLS_FLOW=m CONFIG_NET_CLS_CGROUP=y CONFIG_NET_CLS_BPF=m +CONFIG_NET_CLS_FLOWER=m +CONFIG_NET_CLS_MATCHALL=m +CONFIG_NET_EMATCH=y CONFIG_NET_CLS_ACT=y CONFIG_NET_ACT_POLICE=m CONFIG_NET_ACT_GACT=m @@ -405,6 +399,9 @@ CONFIG_NET_ACT_PEDIT=m CONFIG_NET_ACT_SIMP=m CONFIG_NET_ACT_SKBEDIT=m CONFIG_NET_ACT_CSUM=m +CONFIG_NET_ACT_VLAN=m +CONFIG_NET_ACT_TUNNEL_KEY=m +CONFIG_NET_ACT_CT=m CONFIG_NET_ACT_GATE=m CONFIG_NET_TC_SKB_EXT=y CONFIG_DNS_RESOLVER=y @@ -469,6 +466,7 @@ CONFIG_SCSI_DH_ALUA=m CONFIG_MD=y CONFIG_BLK_DEV_MD=y # CONFIG_MD_BITMAP_FILE is not set +CONFIG_MD_LINEAR=m CONFIG_MD_CLUSTER=m CONFIG_BCACHE=m CONFIG_BLK_DEV_DM=y @@ -500,6 +498,7 @@ CONFIG_DM_VDO=m CONFIG_NETDEVICES=y CONFIG_BONDING=m CONFIG_DUMMY=m +CONFIG_OVPN=m CONFIG_EQUALIZER=m CONFIG_IFB=m CONFIG_MACVLAN=m @@ -627,8 +626,17 @@ CONFIG_VIRTIO_PCI=m CONFIG_VIRTIO_BALLOON=m CONFIG_VIRTIO_MEM=m CONFIG_VIRTIO_INPUT=y +CONFIG_VDPA=m +CONFIG_VDPA_SIM=m +CONFIG_VDPA_SIM_NET=m +CONFIG_VDPA_SIM_BLOCK=m +CONFIG_VDPA_USER=m +CONFIG_MLX5_VDPA_NET=m +CONFIG_VP_VDPA=m CONFIG_VHOST_NET=m CONFIG_VHOST_VSOCK=m +CONFIG_VHOST_VDPA=m +CONFIG_DEV_DAX=m CONFIG_EXT4_FS=y CONFIG_EXT4_FS_POSIX_ACL=y CONFIG_EXT4_FS_SECURITY=y @@ -723,11 +731,10 @@ CONFIG_NLS_UTF8=m CONFIG_DLM=m CONFIG_UNICODE=y CONFIG_PERSISTENT_KEYRINGS=y +CONFIG_BIG_KEYS=y CONFIG_ENCRYPTED_KEYS=m CONFIG_KEY_NOTIFICATIONS=y CONFIG_SECURITY=y -CONFIG_HARDENED_USERCOPY=y -CONFIG_FORTIFY_SOURCE=y CONFIG_SECURITY_SELINUX=y CONFIG_SECURITY_SELINUX_BOOTPARAM=y CONFIG_SECURITY_LOCKDOWN_LSM=y @@ -740,13 +747,16 @@ CONFIG_IMA=y CONFIG_IMA_DEFAULT_HASH_SHA256=y CONFIG_IMA_WRITE_POLICY=y CONFIG_IMA_APPRAISE=y -CONFIG_LSM="yama,loadpin,safesetid,integrity,selinux,smack,tomoyo,apparmor" +CONFIG_FORTIFY_SOURCE=y +CONFIG_HARDENED_USERCOPY=y CONFIG_BUG_ON_DATA_CORRUPTION=y CONFIG_CRYPTO_USER=m -# CONFIG_CRYPTO_MANAGER_DISABLE_TESTS is not set +CONFIG_CRYPTO_SELFTESTS=y +CONFIG_CRYPTO_SELFTESTS_FULL=y +CONFIG_CRYPTO_NULL=y CONFIG_CRYPTO_PCRYPT=m CONFIG_CRYPTO_CRYPTD=m -CONFIG_CRYPTO_TEST=m +CONFIG_CRYPTO_BENCHMARK=m CONFIG_CRYPTO_DH=m CONFIG_CRYPTO_ECDH=m CONFIG_CRYPTO_ECDSA=m @@ -756,7 +766,6 @@ CONFIG_CRYPTO_AES_TI=m CONFIG_CRYPTO_ANUBIS=m CONFIG_CRYPTO_ARIA=m CONFIG_CRYPTO_BLOWFISH=m -CONFIG_CRYPTO_CAMELLIA=m CONFIG_CRYPTO_CAST5=m CONFIG_CRYPTO_CAST6=m CONFIG_CRYPTO_DES=m @@ -770,11 +779,9 @@ CONFIG_CRYPTO_TWOFISH=m CONFIG_CRYPTO_ADIANTUM=m CONFIG_CRYPTO_ARC4=m CONFIG_CRYPTO_HCTR2=m -CONFIG_CRYPTO_KEYWRAP=m CONFIG_CRYPTO_LRW=m CONFIG_CRYPTO_PCBC=m CONFIG_CRYPTO_AEGIS128=m -CONFIG_CRYPTO_CHACHA20POLY1305=m CONFIG_CRYPTO_GCM=y CONFIG_CRYPTO_SEQIV=y CONFIG_CRYPTO_MD4=m @@ -782,7 +789,6 @@ CONFIG_CRYPTO_MD5=y CONFIG_CRYPTO_MICHAEL_MIC=m CONFIG_CRYPTO_RMD160=m CONFIG_CRYPTO_SM3_GENERIC=m -CONFIG_CRYPTO_VMAC=m CONFIG_CRYPTO_WP512=m CONFIG_CRYPTO_XCBC=m CONFIG_CRYPTO_CRC32=m @@ -795,16 +801,11 @@ CONFIG_CRYPTO_USER_API_HASH=m CONFIG_CRYPTO_USER_API_SKCIPHER=m CONFIG_CRYPTO_USER_API_RNG=m CONFIG_CRYPTO_USER_API_AEAD=m -CONFIG_CRYPTO_CRC32_S390=y -CONFIG_CRYPTO_SHA512_S390=m -CONFIG_CRYPTO_SHA1_S390=m -CONFIG_CRYPTO_SHA256_S390=m CONFIG_CRYPTO_SHA3_256_S390=m CONFIG_CRYPTO_SHA3_512_S390=m CONFIG_CRYPTO_GHASH_S390=m CONFIG_CRYPTO_AES_S390=m CONFIG_CRYPTO_DES_S390=m -CONFIG_CRYPTO_CHACHA_S390=m CONFIG_CRYPTO_HMAC_S390=m CONFIG_ZCRYPT=m CONFIG_PKEY=m @@ -813,15 +814,13 @@ CONFIG_PKEY_EP11=m CONFIG_PKEY_PCKMO=m CONFIG_PKEY_UV=m CONFIG_CRYPTO_PAES_S390=m +CONFIG_CRYPTO_PHMAC_S390=m CONFIG_CRYPTO_DEV_VIRTIO=m CONFIG_SYSTEM_BLACKLIST_KEYRING=y +CONFIG_CRYPTO_KRB5=m +CONFIG_CRYPTO_KRB5_SELFTESTS=y CONFIG_CORDIC=m -CONFIG_CRYPTO_LIB_CURVE25519=m -CONFIG_CRYPTO_LIB_CHACHA20POLY1305=m -CONFIG_CRC32_SELFTEST=y -CONFIG_CRC4=m -CONFIG_CRC7=m -CONFIG_CRC8=m +CONFIG_TRACE_MMIO_ACCESS=y CONFIG_RANDOM32_SELFTEST=y CONFIG_XZ_DEC_MICROLZMA=y CONFIG_DMA_CMA=y @@ -879,6 +878,7 @@ CONFIG_RCU_CPU_STALL_TIMEOUT=300 CONFIG_LATENCYTOP=y CONFIG_BOOTTIME_TRACING=y CONFIG_FUNCTION_GRAPH_RETVAL=y +CONFIG_FUNCTION_GRAPH_RETADDR=y CONFIG_FPROBE=y CONFIG_FUNCTION_PROFILER=y CONFIG_STACK_TRACER=y @@ -891,12 +891,14 @@ CONFIG_USER_EVENTS=y CONFIG_HIST_TRIGGERS=y CONFIG_FTRACE_STARTUP_TEST=y # CONFIG_EVENT_TRACE_STARTUP_TEST is not set +CONFIG_FTRACE_SORT_STARTUP_TEST=y CONFIG_SAMPLES=y CONFIG_SAMPLE_TRACE_PRINTK=m CONFIG_SAMPLE_FTRACE_DIRECT=m CONFIG_SAMPLE_FTRACE_DIRECT_MULTI=m CONFIG_SAMPLE_FTRACE_OPS=m CONFIG_DEBUG_ENTRY=y +CONFIG_STRICT_MM_TYPECHECKS=y CONFIG_CIO_INJECT=y CONFIG_KUNIT=m CONFIG_KUNIT_DEBUGFS=y diff --git a/arch/s390/configs/defconfig b/arch/s390/configs/defconfig index 6c2f2bb4fbf8..094599cdaf4d 100644 --- a/arch/s390/configs/defconfig +++ b/arch/s390/configs/defconfig @@ -4,6 +4,7 @@ CONFIG_WATCH_QUEUE=y CONFIG_AUDIT=y CONFIG_NO_HZ_IDLE=y CONFIG_HIGH_RES_TIMERS=y +CONFIG_POSIX_AUX_CLOCKS=y CONFIG_BPF_SYSCALL=y CONFIG_BPF_JIT=y CONFIG_BPF_JIT_ALWAYS_ON=y @@ -17,6 +18,7 @@ CONFIG_TASK_XACCT=y CONFIG_TASK_IO_ACCOUNTING=y CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y +CONFIG_SCHED_PROXY_EXEC=y CONFIG_NUMA_BALANCING=y CONFIG_MEMCG=y CONFIG_BLK_CGROUP=y @@ -36,16 +38,16 @@ CONFIG_USER_NS=y CONFIG_CHECKPOINT_RESTORE=y CONFIG_SCHED_AUTOGROUP=y CONFIG_EXPERT=y -# CONFIG_SYSFS_SYSCALL is not set CONFIG_PROFILING=y CONFIG_KEXEC=y CONFIG_KEXEC_FILE=y CONFIG_KEXEC_SIG=y +CONFIG_CRASH_DM_CRYPT=y CONFIG_LIVEPATCH=y CONFIG_MARCH_Z13=y CONFIG_NR_CPUS=512 CONFIG_NUMA=y -CONFIG_HZ_100=y +CONFIG_HZ_1000=y CONFIG_CERT_STORE=y CONFIG_EXPOLINE=y CONFIG_EXPOLINE_AUTO=y @@ -86,7 +88,6 @@ CONFIG_UNIXWARE_DISKLABEL=y CONFIG_IOSCHED_BFQ=y CONFIG_BINFMT_MISC=m CONFIG_ZSWAP=y -CONFIG_ZSWAP_ZPOOL_DEFAULT_ZBUD=y CONFIG_ZSMALLOC_STAT=y CONFIG_SLAB_BUCKETS=y # CONFIG_COMPAT_BRK is not set @@ -99,6 +100,7 @@ CONFIG_CMA_AREAS=7 CONFIG_MEM_SOFT_DIRTY=y CONFIG_DEFERRED_STRUCT_PAGE_INIT=y CONFIG_IDLE_PAGE_TRACKING=y +CONFIG_ZONE_DEVICE=y CONFIG_PERCPU_STATS=y CONFIG_ANON_VMA_NAME=y CONFIG_USERFAULTFD=y @@ -216,17 +218,19 @@ CONFIG_NETFILTER_XT_TARGET_CONNMARK=m CONFIG_NETFILTER_XT_TARGET_CONNSECMARK=m CONFIG_NETFILTER_XT_TARGET_CT=m CONFIG_NETFILTER_XT_TARGET_DSCP=m +CONFIG_NETFILTER_XT_TARGET_HL=m CONFIG_NETFILTER_XT_TARGET_HMARK=m CONFIG_NETFILTER_XT_TARGET_IDLETIMER=m CONFIG_NETFILTER_XT_TARGET_LOG=m CONFIG_NETFILTER_XT_TARGET_MARK=m +CONFIG_NETFILTER_XT_NAT=m CONFIG_NETFILTER_XT_TARGET_NETMAP=m CONFIG_NETFILTER_XT_TARGET_NFLOG=m CONFIG_NETFILTER_XT_TARGET_NFQUEUE=m CONFIG_NETFILTER_XT_TARGET_REDIRECT=m +CONFIG_NETFILTER_XT_TARGET_MASQUERADE=m CONFIG_NETFILTER_XT_TARGET_TEE=m CONFIG_NETFILTER_XT_TARGET_TPROXY=m -CONFIG_NETFILTER_XT_TARGET_TRACE=m CONFIG_NETFILTER_XT_TARGET_SECMARK=m CONFIG_NETFILTER_XT_TARGET_TCPMSS=m CONFIG_NETFILTER_XT_TARGET_TCPOPTSTRIP=m @@ -312,16 +316,8 @@ CONFIG_IP_NF_MATCH_AH=m CONFIG_IP_NF_MATCH_ECN=m CONFIG_IP_NF_MATCH_RPFILTER=m CONFIG_IP_NF_MATCH_TTL=m -CONFIG_IP_NF_FILTER=m CONFIG_IP_NF_TARGET_REJECT=m -CONFIG_IP_NF_NAT=m -CONFIG_IP_NF_TARGET_MASQUERADE=m -CONFIG_IP_NF_MANGLE=m CONFIG_IP_NF_TARGET_ECN=m -CONFIG_IP_NF_TARGET_TTL=m -CONFIG_IP_NF_RAW=m -CONFIG_IP_NF_SECURITY=m -CONFIG_IP_NF_ARPFILTER=m CONFIG_IP_NF_ARP_MANGLE=m CONFIG_NFT_FIB_IPV6=m CONFIG_IP6_NF_IPTABLES=m @@ -334,15 +330,9 @@ CONFIG_IP6_NF_MATCH_IPV6HEADER=m CONFIG_IP6_NF_MATCH_MH=m CONFIG_IP6_NF_MATCH_RPFILTER=m CONFIG_IP6_NF_MATCH_RT=m -CONFIG_IP6_NF_TARGET_HL=m -CONFIG_IP6_NF_FILTER=m CONFIG_IP6_NF_TARGET_REJECT=m -CONFIG_IP6_NF_MANGLE=m -CONFIG_IP6_NF_RAW=m -CONFIG_IP6_NF_SECURITY=m -CONFIG_IP6_NF_NAT=m -CONFIG_IP6_NF_TARGET_MASQUERADE=m CONFIG_NF_TABLES_BRIDGE=m +CONFIG_IP_SCTP=m CONFIG_RDS=m CONFIG_RDS_RDMA=m CONFIG_RDS_TCP=m @@ -376,6 +366,7 @@ CONFIG_NET_SCH_FQ_CODEL=m CONFIG_NET_SCH_INGRESS=m CONFIG_NET_SCH_PLUG=m CONFIG_NET_SCH_ETS=m +CONFIG_NET_SCH_DUALPI2=m CONFIG_NET_CLS_BASIC=m CONFIG_NET_CLS_ROUTE4=m CONFIG_NET_CLS_FW=m @@ -385,6 +376,9 @@ CONFIG_CLS_U32_MARK=y CONFIG_NET_CLS_FLOW=m CONFIG_NET_CLS_CGROUP=y CONFIG_NET_CLS_BPF=m +CONFIG_NET_CLS_FLOWER=m +CONFIG_NET_CLS_MATCHALL=m +CONFIG_NET_EMATCH=y CONFIG_NET_CLS_ACT=y CONFIG_NET_ACT_POLICE=m CONFIG_NET_ACT_GACT=m @@ -395,6 +389,9 @@ CONFIG_NET_ACT_PEDIT=m CONFIG_NET_ACT_SIMP=m CONFIG_NET_ACT_SKBEDIT=m CONFIG_NET_ACT_CSUM=m +CONFIG_NET_ACT_VLAN=m +CONFIG_NET_ACT_TUNNEL_KEY=m +CONFIG_NET_ACT_CT=m CONFIG_NET_ACT_GATE=m CONFIG_NET_TC_SKB_EXT=y CONFIG_DNS_RESOLVER=y @@ -459,6 +456,7 @@ CONFIG_SCSI_DH_ALUA=m CONFIG_MD=y CONFIG_BLK_DEV_MD=y # CONFIG_MD_BITMAP_FILE is not set +CONFIG_MD_LINEAR=m CONFIG_MD_CLUSTER=m CONFIG_BCACHE=m CONFIG_BLK_DEV_DM=y @@ -490,6 +488,7 @@ CONFIG_DM_VDO=m CONFIG_NETDEVICES=y CONFIG_BONDING=m CONFIG_DUMMY=m +CONFIG_OVPN=m CONFIG_EQUALIZER=m CONFIG_IFB=m CONFIG_MACVLAN=m @@ -617,8 +616,17 @@ CONFIG_VIRTIO_PCI=m CONFIG_VIRTIO_BALLOON=m CONFIG_VIRTIO_MEM=m CONFIG_VIRTIO_INPUT=y +CONFIG_VDPA=m +CONFIG_VDPA_SIM=m +CONFIG_VDPA_SIM_NET=m +CONFIG_VDPA_SIM_BLOCK=m +CONFIG_VDPA_USER=m +CONFIG_MLX5_VDPA_NET=m +CONFIG_VP_VDPA=m CONFIG_VHOST_NET=m CONFIG_VHOST_VSOCK=m +CONFIG_VHOST_VDPA=m +CONFIG_DEV_DAX=m CONFIG_EXT4_FS=y CONFIG_EXT4_FS_POSIX_ACL=y CONFIG_EXT4_FS_SECURITY=y @@ -672,7 +680,6 @@ CONFIG_TMPFS_POSIX_ACL=y CONFIG_TMPFS_INODE64=y CONFIG_TMPFS_QUOTA=y CONFIG_HUGETLBFS=y -CONFIG_CONFIGFS_FS=m CONFIG_ECRYPT_FS=m CONFIG_CRAMFS=m CONFIG_SQUASHFS=m @@ -710,6 +717,7 @@ CONFIG_NLS_UTF8=m CONFIG_DLM=m CONFIG_UNICODE=y CONFIG_PERSISTENT_KEYRINGS=y +CONFIG_BIG_KEYS=y CONFIG_ENCRYPTED_KEYS=m CONFIG_KEY_NOTIFICATIONS=y CONFIG_SECURITY=y @@ -725,14 +733,14 @@ CONFIG_IMA=y CONFIG_IMA_DEFAULT_HASH_SHA256=y CONFIG_IMA_WRITE_POLICY=y CONFIG_IMA_APPRAISE=y -CONFIG_LSM="yama,loadpin,safesetid,integrity,selinux,smack,tomoyo,apparmor" CONFIG_BUG_ON_DATA_CORRUPTION=y CONFIG_CRYPTO_FIPS=y CONFIG_CRYPTO_USER=m -# CONFIG_CRYPTO_MANAGER_DISABLE_TESTS is not set +CONFIG_CRYPTO_SELFTESTS=y +CONFIG_CRYPTO_NULL=y CONFIG_CRYPTO_PCRYPT=m CONFIG_CRYPTO_CRYPTD=m -CONFIG_CRYPTO_TEST=m +CONFIG_CRYPTO_BENCHMARK=m CONFIG_CRYPTO_DH=m CONFIG_CRYPTO_ECDH=m CONFIG_CRYPTO_ECDSA=m @@ -742,7 +750,6 @@ CONFIG_CRYPTO_AES_TI=m CONFIG_CRYPTO_ANUBIS=m CONFIG_CRYPTO_ARIA=m CONFIG_CRYPTO_BLOWFISH=m -CONFIG_CRYPTO_CAMELLIA=m CONFIG_CRYPTO_CAST5=m CONFIG_CRYPTO_CAST6=m CONFIG_CRYPTO_DES=m @@ -756,11 +763,9 @@ CONFIG_CRYPTO_TWOFISH=m CONFIG_CRYPTO_ADIANTUM=m CONFIG_CRYPTO_ARC4=m CONFIG_CRYPTO_HCTR2=m -CONFIG_CRYPTO_KEYWRAP=m CONFIG_CRYPTO_LRW=m CONFIG_CRYPTO_PCBC=m CONFIG_CRYPTO_AEGIS128=m -CONFIG_CRYPTO_CHACHA20POLY1305=m CONFIG_CRYPTO_GCM=y CONFIG_CRYPTO_SEQIV=y CONFIG_CRYPTO_MD4=m @@ -768,7 +773,6 @@ CONFIG_CRYPTO_MD5=y CONFIG_CRYPTO_MICHAEL_MIC=m CONFIG_CRYPTO_RMD160=m CONFIG_CRYPTO_SM3_GENERIC=m -CONFIG_CRYPTO_VMAC=m CONFIG_CRYPTO_WP512=m CONFIG_CRYPTO_XCBC=m CONFIG_CRYPTO_CRC32=m @@ -782,16 +786,11 @@ CONFIG_CRYPTO_USER_API_HASH=m CONFIG_CRYPTO_USER_API_SKCIPHER=m CONFIG_CRYPTO_USER_API_RNG=m CONFIG_CRYPTO_USER_API_AEAD=m -CONFIG_CRYPTO_CRC32_S390=y -CONFIG_CRYPTO_SHA512_S390=m -CONFIG_CRYPTO_SHA1_S390=m -CONFIG_CRYPTO_SHA256_S390=m CONFIG_CRYPTO_SHA3_256_S390=m CONFIG_CRYPTO_SHA3_512_S390=m CONFIG_CRYPTO_GHASH_S390=m CONFIG_CRYPTO_AES_S390=m CONFIG_CRYPTO_DES_S390=m -CONFIG_CRYPTO_CHACHA_S390=m CONFIG_CRYPTO_HMAC_S390=m CONFIG_ZCRYPT=m CONFIG_PKEY=m @@ -800,15 +799,13 @@ CONFIG_PKEY_EP11=m CONFIG_PKEY_PCKMO=m CONFIG_PKEY_UV=m CONFIG_CRYPTO_PAES_S390=m +CONFIG_CRYPTO_PHMAC_S390=m CONFIG_CRYPTO_DEV_VIRTIO=m CONFIG_SYSTEM_BLACKLIST_KEYRING=y +CONFIG_CRYPTO_KRB5=m +CONFIG_CRYPTO_KRB5_SELFTESTS=y CONFIG_CORDIC=m CONFIG_PRIME_NUMBERS=m -CONFIG_CRYPTO_LIB_CURVE25519=m -CONFIG_CRYPTO_LIB_CHACHA20POLY1305=m -CONFIG_CRC4=m -CONFIG_CRC7=m -CONFIG_CRC8=m CONFIG_XZ_DEC_MICROLZMA=y CONFIG_DMA_CMA=y CONFIG_CMA_SIZE_MBYTES=0 @@ -829,6 +826,7 @@ CONFIG_RCU_CPU_STALL_TIMEOUT=60 CONFIG_LATENCYTOP=y CONFIG_BOOTTIME_TRACING=y CONFIG_FUNCTION_GRAPH_RETVAL=y +CONFIG_FUNCTION_GRAPH_RETADDR=y CONFIG_FPROBE=y CONFIG_FUNCTION_PROFILER=y CONFIG_STACK_TRACER=y diff --git a/arch/s390/configs/kasan.config b/arch/s390/configs/kasan.config index 84c2b551e992..cefbe2ba1228 100644 --- a/arch/s390/configs/kasan.config +++ b/arch/s390/configs/kasan.config @@ -1,4 +1,4 @@ # Help: Enable KASan for debugging CONFIG_KASAN=y CONFIG_KASAN_INLINE=y -CONFIG_KASAN_VMALLOC=y +CONFIG_KERNEL_IMAGE_BASE=0x7FFFE0000000 diff --git a/arch/s390/configs/mmtypes.config b/arch/s390/configs/mmtypes.config new file mode 100644 index 000000000000..fe32b442d789 --- /dev/null +++ b/arch/s390/configs/mmtypes.config @@ -0,0 +1,2 @@ +# Help: Enable strict memory management typechecks +CONFIG_STRICT_MM_TYPECHECKS=y diff --git a/arch/s390/configs/zfcpdump_defconfig b/arch/s390/configs/zfcpdump_defconfig index bcbaa069de96..ed0b137353ad 100644 --- a/arch/s390/configs/zfcpdump_defconfig +++ b/arch/s390/configs/zfcpdump_defconfig @@ -1,5 +1,6 @@ CONFIG_NO_HZ_IDLE=y CONFIG_HIGH_RES_TIMERS=y +CONFIG_POSIX_AUX_CLOCKS=y CONFIG_BPF_SYSCALL=y # CONFIG_CPU_ISOLATION is not set # CONFIG_UTS_NS is not set @@ -11,7 +12,7 @@ CONFIG_CC_OPTIMIZE_FOR_SIZE=y CONFIG_KEXEC=y CONFIG_MARCH_Z13=y CONFIG_NR_CPUS=2 -CONFIG_HZ_100=y +CONFIG_HZ_1000=y # CONFIG_CHSC_SCH is not set # CONFIG_SCM_BUS is not set # CONFIG_AP is not set @@ -62,7 +63,6 @@ CONFIG_ZFCP=y # CONFIG_INOTIFY_USER is not set # CONFIG_MISC_FILESYSTEMS is not set # CONFIG_NETWORK_FILESYSTEMS is not set -CONFIG_LSM="yama,loadpin,safesetid,integrity" # CONFIG_ZLIB_DFLTCC is not set CONFIG_XZ_DEC_MICROLZMA=y CONFIG_PRINTK_TIME=y @@ -71,7 +71,6 @@ CONFIG_DEBUG_KERNEL=y CONFIG_DEBUG_INFO_DWARF4=y CONFIG_DEBUG_FS=y CONFIG_PANIC_ON_OOPS=y -# CONFIG_SCHED_DEBUG is not set CONFIG_RCU_CPU_STALL_TIMEOUT=60 # CONFIG_RCU_TRACE is not set # CONFIG_FTRACE is not set diff --git a/arch/s390/crypto/Kconfig b/arch/s390/crypto/Kconfig index d3eb3a233693..03f73fbd38b6 100644 --- a/arch/s390/crypto/Kconfig +++ b/arch/s390/crypto/Kconfig @@ -2,54 +2,8 @@ menu "Accelerated Cryptographic Algorithms for CPU (s390)" -config CRYPTO_CRC32_S390 - tristate "CRC32c and CRC32" - depends on S390 - select CRYPTO_HASH - select CRC32 - help - CRC32c and CRC32 CRC algorithms - - Architecture: s390 - - It is available with IBM z13 or later. - -config CRYPTO_SHA512_S390 - tristate "Hash functions: SHA-384 and SHA-512" - depends on S390 - select CRYPTO_HASH - help - SHA-384 and SHA-512 secure hash algorithms (FIPS 180) - - Architecture: s390 - - It is available as of z10. - -config CRYPTO_SHA1_S390 - tristate "Hash functions: SHA-1" - depends on S390 - select CRYPTO_HASH - help - SHA-1 secure hash algorithm (FIPS 180) - - Architecture: s390 - - It is available as of z990. - -config CRYPTO_SHA256_S390 - tristate "Hash functions: SHA-224 and SHA-256" - depends on S390 - select CRYPTO_HASH - help - SHA-224 and SHA-256 secure hash algorithms (FIPS 180) - - Architecture: s390 - - It is available as of z9. - config CRYPTO_SHA3_256_S390 tristate "Hash functions: SHA3-224 and SHA3-256" - depends on S390 select CRYPTO_HASH help SHA3-224 and SHA3-256 secure hash algorithms (FIPS 202) @@ -60,7 +14,6 @@ config CRYPTO_SHA3_256_S390 config CRYPTO_SHA3_512_S390 tristate "Hash functions: SHA3-384 and SHA3-512" - depends on S390 select CRYPTO_HASH help SHA3-384 and SHA3-512 secure hash algorithms (FIPS 202) @@ -71,7 +24,6 @@ config CRYPTO_SHA3_512_S390 config CRYPTO_GHASH_S390 tristate "Hash functions: GHASH" - depends on S390 select CRYPTO_HASH help GCM GHASH hash function (NIST SP800-38D) @@ -82,7 +34,6 @@ config CRYPTO_GHASH_S390 config CRYPTO_AES_S390 tristate "Ciphers: AES, modes: ECB, CBC, CTR, XTS, GCM" - depends on S390 select CRYPTO_ALGAPI select CRYPTO_SKCIPHER help @@ -104,7 +55,6 @@ config CRYPTO_AES_S390 config CRYPTO_DES_S390 tristate "Ciphers: DES and Triple DES EDE, modes: ECB, CBC, CTR" - depends on S390 select CRYPTO_ALGAPI select CRYPTO_SKCIPHER select CRYPTO_LIB_DES @@ -119,22 +69,8 @@ config CRYPTO_DES_S390 As of z990 the ECB and CBC mode are hardware accelerated. As of z196 the CTR mode is hardware accelerated. -config CRYPTO_CHACHA_S390 - tristate "Ciphers: ChaCha20" - depends on S390 - select CRYPTO_SKCIPHER - select CRYPTO_LIB_CHACHA_GENERIC - select CRYPTO_ARCH_HAVE_LIB_CHACHA - help - Length-preserving cipher: ChaCha20 stream cipher (RFC 7539) - - Architecture: s390 - - It is available as of z13. - config CRYPTO_HMAC_S390 tristate "Keyed-hash message authentication code: HMAC" - depends on S390 select CRYPTO_HASH help s390 specific HMAC hardware support for SHA224, SHA256, SHA384 and diff --git a/arch/s390/crypto/Makefile b/arch/s390/crypto/Makefile index a0cb96937c3d..998f4b656b18 100644 --- a/arch/s390/crypto/Makefile +++ b/arch/s390/crypto/Makefile @@ -3,20 +3,13 @@ # Cryptographic API # -obj-$(CONFIG_CRYPTO_SHA1_S390) += sha1_s390.o sha_common.o -obj-$(CONFIG_CRYPTO_SHA256_S390) += sha256_s390.o sha_common.o -obj-$(CONFIG_CRYPTO_SHA512_S390) += sha512_s390.o sha_common.o obj-$(CONFIG_CRYPTO_SHA3_256_S390) += sha3_256_s390.o sha_common.o obj-$(CONFIG_CRYPTO_SHA3_512_S390) += sha3_512_s390.o sha_common.o obj-$(CONFIG_CRYPTO_DES_S390) += des_s390.o obj-$(CONFIG_CRYPTO_AES_S390) += aes_s390.o obj-$(CONFIG_CRYPTO_PAES_S390) += paes_s390.o -obj-$(CONFIG_CRYPTO_CHACHA_S390) += chacha_s390.o obj-$(CONFIG_S390_PRNG) += prng.o obj-$(CONFIG_CRYPTO_GHASH_S390) += ghash_s390.o -obj-$(CONFIG_CRYPTO_CRC32_S390) += crc32-vx_s390.o obj-$(CONFIG_CRYPTO_HMAC_S390) += hmac_s390.o +obj-$(CONFIG_CRYPTO_PHMAC_S390) += phmac_s390.o obj-y += arch_random.o - -crc32-vx_s390-y := crc32-vx.o crc32le-vx.o crc32be-vx.o -chacha_s390-y := chacha-glue.o chacha-s390.o diff --git a/arch/s390/crypto/aes_s390.c b/arch/s390/crypto/aes_s390.c index 9c46b1b630b1..5d36f4020dfa 100644 --- a/arch/s390/crypto/aes_s390.c +++ b/arch/s390/crypto/aes_s390.c @@ -66,7 +66,6 @@ struct s390_xts_ctx { struct gcm_sg_walk { struct scatter_walk walk; unsigned int walk_bytes; - u8 *walk_ptr; unsigned int walk_bytes_remain; u8 buf[AES_BLOCK_SIZE]; unsigned int buf_bytes; @@ -787,29 +786,20 @@ static void gcm_walk_start(struct gcm_sg_walk *gw, struct scatterlist *sg, static inline unsigned int _gcm_sg_clamp_and_map(struct gcm_sg_walk *gw) { - struct scatterlist *nextsg; - - gw->walk_bytes = scatterwalk_clamp(&gw->walk, gw->walk_bytes_remain); - while (!gw->walk_bytes) { - nextsg = sg_next(gw->walk.sg); - if (!nextsg) - return 0; - scatterwalk_start(&gw->walk, nextsg); - gw->walk_bytes = scatterwalk_clamp(&gw->walk, - gw->walk_bytes_remain); - } - gw->walk_ptr = scatterwalk_map(&gw->walk); + if (gw->walk_bytes_remain == 0) + return 0; + gw->walk_bytes = scatterwalk_next(&gw->walk, gw->walk_bytes_remain); return gw->walk_bytes; } static inline void _gcm_sg_unmap_and_advance(struct gcm_sg_walk *gw, - unsigned int nbytes) + unsigned int nbytes, bool out) { gw->walk_bytes_remain -= nbytes; - scatterwalk_unmap(gw->walk_ptr); - scatterwalk_advance(&gw->walk, nbytes); - scatterwalk_done(&gw->walk, 0, gw->walk_bytes_remain); - gw->walk_ptr = NULL; + if (out) + scatterwalk_done_dst(&gw->walk, nbytes); + else + scatterwalk_done_src(&gw->walk, nbytes); } static int gcm_in_walk_go(struct gcm_sg_walk *gw, unsigned int minbytesneeded) @@ -835,16 +825,16 @@ static int gcm_in_walk_go(struct gcm_sg_walk *gw, unsigned int minbytesneeded) } if (!gw->buf_bytes && gw->walk_bytes >= minbytesneeded) { - gw->ptr = gw->walk_ptr; + gw->ptr = gw->walk.addr; gw->nbytes = gw->walk_bytes; goto out; } while (1) { n = min(gw->walk_bytes, AES_BLOCK_SIZE - gw->buf_bytes); - memcpy(gw->buf + gw->buf_bytes, gw->walk_ptr, n); + memcpy(gw->buf + gw->buf_bytes, gw->walk.addr, n); gw->buf_bytes += n; - _gcm_sg_unmap_and_advance(gw, n); + _gcm_sg_unmap_and_advance(gw, n, false); if (gw->buf_bytes >= minbytesneeded) { gw->ptr = gw->buf; gw->nbytes = gw->buf_bytes; @@ -876,13 +866,12 @@ static int gcm_out_walk_go(struct gcm_sg_walk *gw, unsigned int minbytesneeded) } if (gw->walk_bytes >= minbytesneeded) { - gw->ptr = gw->walk_ptr; + gw->ptr = gw->walk.addr; gw->nbytes = gw->walk_bytes; goto out; } - scatterwalk_unmap(gw->walk_ptr); - gw->walk_ptr = NULL; + scatterwalk_unmap(&gw->walk); gw->ptr = gw->buf; gw->nbytes = sizeof(gw->buf); @@ -904,7 +893,7 @@ static int gcm_in_walk_done(struct gcm_sg_walk *gw, unsigned int bytesdone) } else gw->buf_bytes = 0; } else - _gcm_sg_unmap_and_advance(gw, bytesdone); + _gcm_sg_unmap_and_advance(gw, bytesdone, false); return bytesdone; } @@ -921,11 +910,11 @@ static int gcm_out_walk_done(struct gcm_sg_walk *gw, unsigned int bytesdone) if (!_gcm_sg_clamp_and_map(gw)) return i; n = min(gw->walk_bytes, bytesdone - i); - memcpy(gw->walk_ptr, gw->buf + i, n); - _gcm_sg_unmap_and_advance(gw, n); + memcpy(gw->walk.addr, gw->buf + i, n); + _gcm_sg_unmap_and_advance(gw, n, true); } } else - _gcm_sg_unmap_and_advance(gw, bytesdone); + _gcm_sg_unmap_and_advance(gw, bytesdone, true); return bytesdone; } diff --git a/arch/s390/crypto/arch_random.c b/arch/s390/crypto/arch_random.c index a8a2407381af..083e8d5eada2 100644 --- a/arch/s390/crypto/arch_random.c +++ b/arch/s390/crypto/arch_random.c @@ -6,6 +6,7 @@ * Author(s): Harald Freudenberger */ +#include <linux/export.h> #include <linux/kernel.h> #include <linux/atomic.h> #include <linux/random.h> diff --git a/arch/s390/crypto/chacha-glue.c b/arch/s390/crypto/chacha-glue.c deleted file mode 100644 index f8b0c52e77a4..000000000000 --- a/arch/s390/crypto/chacha-glue.c +++ /dev/null @@ -1,130 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * s390 ChaCha stream cipher. - * - * Copyright IBM Corp. 2021 - */ - -#define KMSG_COMPONENT "chacha_s390" -#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt - -#include <crypto/internal/chacha.h> -#include <crypto/internal/skcipher.h> -#include <crypto/algapi.h> -#include <linux/cpufeature.h> -#include <linux/kernel.h> -#include <linux/module.h> -#include <linux/sizes.h> -#include <asm/fpu.h> -#include "chacha-s390.h" - -static void chacha20_crypt_s390(u32 *state, u8 *dst, const u8 *src, - unsigned int nbytes, const u32 *key, - u32 *counter) -{ - DECLARE_KERNEL_FPU_ONSTACK32(vxstate); - - kernel_fpu_begin(&vxstate, KERNEL_VXR); - chacha20_vx(dst, src, nbytes, key, counter); - kernel_fpu_end(&vxstate, KERNEL_VXR); - - *counter += round_up(nbytes, CHACHA_BLOCK_SIZE) / CHACHA_BLOCK_SIZE; -} - -static int chacha20_s390(struct skcipher_request *req) -{ - struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); - struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm); - u32 state[CHACHA_STATE_WORDS] __aligned(16); - struct skcipher_walk walk; - unsigned int nbytes; - int rc; - - rc = skcipher_walk_virt(&walk, req, false); - chacha_init_generic(state, ctx->key, req->iv); - - while (walk.nbytes > 0) { - nbytes = walk.nbytes; - if (nbytes < walk.total) - nbytes = round_down(nbytes, walk.stride); - - if (nbytes <= CHACHA_BLOCK_SIZE) { - chacha_crypt_generic(state, walk.dst.virt.addr, - walk.src.virt.addr, nbytes, - ctx->nrounds); - } else { - chacha20_crypt_s390(state, walk.dst.virt.addr, - walk.src.virt.addr, nbytes, - &state[4], &state[12]); - } - rc = skcipher_walk_done(&walk, walk.nbytes - nbytes); - } - return rc; -} - -void hchacha_block_arch(const u32 *state, u32 *stream, int nrounds) -{ - /* TODO: implement hchacha_block_arch() in assembly */ - hchacha_block_generic(state, stream, nrounds); -} -EXPORT_SYMBOL(hchacha_block_arch); - -void chacha_init_arch(u32 *state, const u32 *key, const u8 *iv) -{ - chacha_init_generic(state, key, iv); -} -EXPORT_SYMBOL(chacha_init_arch); - -void chacha_crypt_arch(u32 *state, u8 *dst, const u8 *src, - unsigned int bytes, int nrounds) -{ - /* s390 chacha20 implementation has 20 rounds hard-coded, - * it cannot handle a block of data or less, but otherwise - * it can handle data of arbitrary size - */ - if (bytes <= CHACHA_BLOCK_SIZE || nrounds != 20 || !cpu_has_vx()) - chacha_crypt_generic(state, dst, src, bytes, nrounds); - else - chacha20_crypt_s390(state, dst, src, bytes, - &state[4], &state[12]); -} -EXPORT_SYMBOL(chacha_crypt_arch); - -static struct skcipher_alg chacha_algs[] = { - { - .base.cra_name = "chacha20", - .base.cra_driver_name = "chacha20-s390", - .base.cra_priority = 900, - .base.cra_blocksize = 1, - .base.cra_ctxsize = sizeof(struct chacha_ctx), - .base.cra_module = THIS_MODULE, - - .min_keysize = CHACHA_KEY_SIZE, - .max_keysize = CHACHA_KEY_SIZE, - .ivsize = CHACHA_IV_SIZE, - .chunksize = CHACHA_BLOCK_SIZE, - .setkey = chacha20_setkey, - .encrypt = chacha20_s390, - .decrypt = chacha20_s390, - } -}; - -static int __init chacha_mod_init(void) -{ - return IS_REACHABLE(CONFIG_CRYPTO_SKCIPHER) ? - crypto_register_skciphers(chacha_algs, ARRAY_SIZE(chacha_algs)) : 0; -} - -static void __exit chacha_mod_fini(void) -{ - if (IS_REACHABLE(CONFIG_CRYPTO_SKCIPHER)) - crypto_unregister_skciphers(chacha_algs, ARRAY_SIZE(chacha_algs)); -} - -module_cpu_feature_match(S390_CPU_FEATURE_VXRS, chacha_mod_init); -module_exit(chacha_mod_fini); - -MODULE_DESCRIPTION("ChaCha20 stream cipher"); -MODULE_LICENSE("GPL v2"); - -MODULE_ALIAS_CRYPTO("chacha20"); diff --git a/arch/s390/crypto/chacha-s390.S b/arch/s390/crypto/chacha-s390.S deleted file mode 100644 index 63f3102678c0..000000000000 --- a/arch/s390/crypto/chacha-s390.S +++ /dev/null @@ -1,908 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * Original implementation written by Andy Polyakov, @dot-asm. - * This is an adaptation of the original code for kernel use. - * - * Copyright (C) 2006-2019 CRYPTOGAMS by <appro@openssl.org>. All Rights Reserved. - */ - -#include <linux/linkage.h> -#include <asm/nospec-insn.h> -#include <asm/fpu-insn.h> - -#define SP %r15 -#define FRAME (16 * 8 + 4 * 8) - - .data - .balign 32 - -SYM_DATA_START_LOCAL(sigma) - .long 0x61707865,0x3320646e,0x79622d32,0x6b206574 # endian-neutral - .long 1,0,0,0 - .long 2,0,0,0 - .long 3,0,0,0 - .long 0x03020100,0x07060504,0x0b0a0908,0x0f0e0d0c # byte swap - - .long 0,1,2,3 - .long 0x61707865,0x61707865,0x61707865,0x61707865 # smashed sigma - .long 0x3320646e,0x3320646e,0x3320646e,0x3320646e - .long 0x79622d32,0x79622d32,0x79622d32,0x79622d32 - .long 0x6b206574,0x6b206574,0x6b206574,0x6b206574 -SYM_DATA_END(sigma) - - .previous - - GEN_BR_THUNK %r14 - - .text - -############################################################################# -# void chacha20_vx_4x(u8 *out, counst u8 *inp, size_t len, -# counst u32 *key, const u32 *counter) - -#define OUT %r2 -#define INP %r3 -#define LEN %r4 -#define KEY %r5 -#define COUNTER %r6 - -#define BEPERM %v31 -#define CTR %v26 - -#define K0 %v16 -#define K1 %v17 -#define K2 %v18 -#define K3 %v19 - -#define XA0 %v0 -#define XA1 %v1 -#define XA2 %v2 -#define XA3 %v3 - -#define XB0 %v4 -#define XB1 %v5 -#define XB2 %v6 -#define XB3 %v7 - -#define XC0 %v8 -#define XC1 %v9 -#define XC2 %v10 -#define XC3 %v11 - -#define XD0 %v12 -#define XD1 %v13 -#define XD2 %v14 -#define XD3 %v15 - -#define XT0 %v27 -#define XT1 %v28 -#define XT2 %v29 -#define XT3 %v30 - -SYM_FUNC_START(chacha20_vx_4x) - stmg %r6,%r7,6*8(SP) - - larl %r7,sigma - lhi %r0,10 - lhi %r1,0 - - VL K0,0,,%r7 # load sigma - VL K1,0,,KEY # load key - VL K2,16,,KEY - VL K3,0,,COUNTER # load counter - - VL BEPERM,0x40,,%r7 - VL CTR,0x50,,%r7 - - VLM XA0,XA3,0x60,%r7,4 # load [smashed] sigma - - VREPF XB0,K1,0 # smash the key - VREPF XB1,K1,1 - VREPF XB2,K1,2 - VREPF XB3,K1,3 - - VREPF XD0,K3,0 - VREPF XD1,K3,1 - VREPF XD2,K3,2 - VREPF XD3,K3,3 - VAF XD0,XD0,CTR - - VREPF XC0,K2,0 - VREPF XC1,K2,1 - VREPF XC2,K2,2 - VREPF XC3,K2,3 - -.Loop_4x: - VAF XA0,XA0,XB0 - VX XD0,XD0,XA0 - VERLLF XD0,XD0,16 - - VAF XA1,XA1,XB1 - VX XD1,XD1,XA1 - VERLLF XD1,XD1,16 - - VAF XA2,XA2,XB2 - VX XD2,XD2,XA2 - VERLLF XD2,XD2,16 - - VAF XA3,XA3,XB3 - VX XD3,XD3,XA3 - VERLLF XD3,XD3,16 - - VAF XC0,XC0,XD0 - VX XB0,XB0,XC0 - VERLLF XB0,XB0,12 - - VAF XC1,XC1,XD1 - VX XB1,XB1,XC1 - VERLLF XB1,XB1,12 - - VAF XC2,XC2,XD2 - VX XB2,XB2,XC2 - VERLLF XB2,XB2,12 - - VAF XC3,XC3,XD3 - VX XB3,XB3,XC3 - VERLLF XB3,XB3,12 - - VAF XA0,XA0,XB0 - VX XD0,XD0,XA0 - VERLLF XD0,XD0,8 - - VAF XA1,XA1,XB1 - VX XD1,XD1,XA1 - VERLLF XD1,XD1,8 - - VAF XA2,XA2,XB2 - VX XD2,XD2,XA2 - VERLLF XD2,XD2,8 - - VAF XA3,XA3,XB3 - VX XD3,XD3,XA3 - VERLLF XD3,XD3,8 - - VAF XC0,XC0,XD0 - VX XB0,XB0,XC0 - VERLLF XB0,XB0,7 - - VAF XC1,XC1,XD1 - VX XB1,XB1,XC1 - VERLLF XB1,XB1,7 - - VAF XC2,XC2,XD2 - VX XB2,XB2,XC2 - VERLLF XB2,XB2,7 - - VAF XC3,XC3,XD3 - VX XB3,XB3,XC3 - VERLLF XB3,XB3,7 - - VAF XA0,XA0,XB1 - VX XD3,XD3,XA0 - VERLLF XD3,XD3,16 - - VAF XA1,XA1,XB2 - VX XD0,XD0,XA1 - VERLLF XD0,XD0,16 - - VAF XA2,XA2,XB3 - VX XD1,XD1,XA2 - VERLLF XD1,XD1,16 - - VAF XA3,XA3,XB0 - VX XD2,XD2,XA3 - VERLLF XD2,XD2,16 - - VAF XC2,XC2,XD3 - VX XB1,XB1,XC2 - VERLLF XB1,XB1,12 - - VAF XC3,XC3,XD0 - VX XB2,XB2,XC3 - VERLLF XB2,XB2,12 - - VAF XC0,XC0,XD1 - VX XB3,XB3,XC0 - VERLLF XB3,XB3,12 - - VAF XC1,XC1,XD2 - VX XB0,XB0,XC1 - VERLLF XB0,XB0,12 - - VAF XA0,XA0,XB1 - VX XD3,XD3,XA0 - VERLLF XD3,XD3,8 - - VAF XA1,XA1,XB2 - VX XD0,XD0,XA1 - VERLLF XD0,XD0,8 - - VAF XA2,XA2,XB3 - VX XD1,XD1,XA2 - VERLLF XD1,XD1,8 - - VAF XA3,XA3,XB0 - VX XD2,XD2,XA3 - VERLLF XD2,XD2,8 - - VAF XC2,XC2,XD3 - VX XB1,XB1,XC2 - VERLLF XB1,XB1,7 - - VAF XC3,XC3,XD0 - VX XB2,XB2,XC3 - VERLLF XB2,XB2,7 - - VAF XC0,XC0,XD1 - VX XB3,XB3,XC0 - VERLLF XB3,XB3,7 - - VAF XC1,XC1,XD2 - VX XB0,XB0,XC1 - VERLLF XB0,XB0,7 - brct %r0,.Loop_4x - - VAF XD0,XD0,CTR - - VMRHF XT0,XA0,XA1 # transpose data - VMRHF XT1,XA2,XA3 - VMRLF XT2,XA0,XA1 - VMRLF XT3,XA2,XA3 - VPDI XA0,XT0,XT1,0b0000 - VPDI XA1,XT0,XT1,0b0101 - VPDI XA2,XT2,XT3,0b0000 - VPDI XA3,XT2,XT3,0b0101 - - VMRHF XT0,XB0,XB1 - VMRHF XT1,XB2,XB3 - VMRLF XT2,XB0,XB1 - VMRLF XT3,XB2,XB3 - VPDI XB0,XT0,XT1,0b0000 - VPDI XB1,XT0,XT1,0b0101 - VPDI XB2,XT2,XT3,0b0000 - VPDI XB3,XT2,XT3,0b0101 - - VMRHF XT0,XC0,XC1 - VMRHF XT1,XC2,XC3 - VMRLF XT2,XC0,XC1 - VMRLF XT3,XC2,XC3 - VPDI XC0,XT0,XT1,0b0000 - VPDI XC1,XT0,XT1,0b0101 - VPDI XC2,XT2,XT3,0b0000 - VPDI XC3,XT2,XT3,0b0101 - - VMRHF XT0,XD0,XD1 - VMRHF XT1,XD2,XD3 - VMRLF XT2,XD0,XD1 - VMRLF XT3,XD2,XD3 - VPDI XD0,XT0,XT1,0b0000 - VPDI XD1,XT0,XT1,0b0101 - VPDI XD2,XT2,XT3,0b0000 - VPDI XD3,XT2,XT3,0b0101 - - VAF XA0,XA0,K0 - VAF XB0,XB0,K1 - VAF XC0,XC0,K2 - VAF XD0,XD0,K3 - - VPERM XA0,XA0,XA0,BEPERM - VPERM XB0,XB0,XB0,BEPERM - VPERM XC0,XC0,XC0,BEPERM - VPERM XD0,XD0,XD0,BEPERM - - VLM XT0,XT3,0,INP,0 - - VX XT0,XT0,XA0 - VX XT1,XT1,XB0 - VX XT2,XT2,XC0 - VX XT3,XT3,XD0 - - VSTM XT0,XT3,0,OUT,0 - - la INP,0x40(INP) - la OUT,0x40(OUT) - aghi LEN,-0x40 - - VAF XA0,XA1,K0 - VAF XB0,XB1,K1 - VAF XC0,XC1,K2 - VAF XD0,XD1,K3 - - VPERM XA0,XA0,XA0,BEPERM - VPERM XB0,XB0,XB0,BEPERM - VPERM XC0,XC0,XC0,BEPERM - VPERM XD0,XD0,XD0,BEPERM - - clgfi LEN,0x40 - jl .Ltail_4x - - VLM XT0,XT3,0,INP,0 - - VX XT0,XT0,XA0 - VX XT1,XT1,XB0 - VX XT2,XT2,XC0 - VX XT3,XT3,XD0 - - VSTM XT0,XT3,0,OUT,0 - - la INP,0x40(INP) - la OUT,0x40(OUT) - aghi LEN,-0x40 - je .Ldone_4x - - VAF XA0,XA2,K0 - VAF XB0,XB2,K1 - VAF XC0,XC2,K2 - VAF XD0,XD2,K3 - - VPERM XA0,XA0,XA0,BEPERM - VPERM XB0,XB0,XB0,BEPERM - VPERM XC0,XC0,XC0,BEPERM - VPERM XD0,XD0,XD0,BEPERM - - clgfi LEN,0x40 - jl .Ltail_4x - - VLM XT0,XT3,0,INP,0 - - VX XT0,XT0,XA0 - VX XT1,XT1,XB0 - VX XT2,XT2,XC0 - VX XT3,XT3,XD0 - - VSTM XT0,XT3,0,OUT,0 - - la INP,0x40(INP) - la OUT,0x40(OUT) - aghi LEN,-0x40 - je .Ldone_4x - - VAF XA0,XA3,K0 - VAF XB0,XB3,K1 - VAF XC0,XC3,K2 - VAF XD0,XD3,K3 - - VPERM XA0,XA0,XA0,BEPERM - VPERM XB0,XB0,XB0,BEPERM - VPERM XC0,XC0,XC0,BEPERM - VPERM XD0,XD0,XD0,BEPERM - - clgfi LEN,0x40 - jl .Ltail_4x - - VLM XT0,XT3,0,INP,0 - - VX XT0,XT0,XA0 - VX XT1,XT1,XB0 - VX XT2,XT2,XC0 - VX XT3,XT3,XD0 - - VSTM XT0,XT3,0,OUT,0 - -.Ldone_4x: - lmg %r6,%r7,6*8(SP) - BR_EX %r14 - -.Ltail_4x: - VLR XT0,XC0 - VLR XT1,XD0 - - VST XA0,8*8+0x00,,SP - VST XB0,8*8+0x10,,SP - VST XT0,8*8+0x20,,SP - VST XT1,8*8+0x30,,SP - - lghi %r1,0 - -.Loop_tail_4x: - llgc %r5,0(%r1,INP) - llgc %r6,8*8(%r1,SP) - xr %r6,%r5 - stc %r6,0(%r1,OUT) - la %r1,1(%r1) - brct LEN,.Loop_tail_4x - - lmg %r6,%r7,6*8(SP) - BR_EX %r14 -SYM_FUNC_END(chacha20_vx_4x) - -#undef OUT -#undef INP -#undef LEN -#undef KEY -#undef COUNTER - -#undef BEPERM - -#undef K0 -#undef K1 -#undef K2 -#undef K3 - - -############################################################################# -# void chacha20_vx(u8 *out, counst u8 *inp, size_t len, -# counst u32 *key, const u32 *counter) - -#define OUT %r2 -#define INP %r3 -#define LEN %r4 -#define KEY %r5 -#define COUNTER %r6 - -#define BEPERM %v31 - -#define K0 %v27 -#define K1 %v24 -#define K2 %v25 -#define K3 %v26 - -#define A0 %v0 -#define B0 %v1 -#define C0 %v2 -#define D0 %v3 - -#define A1 %v4 -#define B1 %v5 -#define C1 %v6 -#define D1 %v7 - -#define A2 %v8 -#define B2 %v9 -#define C2 %v10 -#define D2 %v11 - -#define A3 %v12 -#define B3 %v13 -#define C3 %v14 -#define D3 %v15 - -#define A4 %v16 -#define B4 %v17 -#define C4 %v18 -#define D4 %v19 - -#define A5 %v20 -#define B5 %v21 -#define C5 %v22 -#define D5 %v23 - -#define T0 %v27 -#define T1 %v28 -#define T2 %v29 -#define T3 %v30 - -SYM_FUNC_START(chacha20_vx) - clgfi LEN,256 - jle chacha20_vx_4x - stmg %r6,%r7,6*8(SP) - - lghi %r1,-FRAME - lgr %r0,SP - la SP,0(%r1,SP) - stg %r0,0(SP) # back-chain - - larl %r7,sigma - lhi %r0,10 - - VLM K1,K2,0,KEY,0 # load key - VL K3,0,,COUNTER # load counter - - VLM K0,BEPERM,0,%r7,4 # load sigma, increments, ... - -.Loop_outer_vx: - VLR A0,K0 - VLR B0,K1 - VLR A1,K0 - VLR B1,K1 - VLR A2,K0 - VLR B2,K1 - VLR A3,K0 - VLR B3,K1 - VLR A4,K0 - VLR B4,K1 - VLR A5,K0 - VLR B5,K1 - - VLR D0,K3 - VAF D1,K3,T1 # K[3]+1 - VAF D2,K3,T2 # K[3]+2 - VAF D3,K3,T3 # K[3]+3 - VAF D4,D2,T2 # K[3]+4 - VAF D5,D2,T3 # K[3]+5 - - VLR C0,K2 - VLR C1,K2 - VLR C2,K2 - VLR C3,K2 - VLR C4,K2 - VLR C5,K2 - - VLR T1,D1 - VLR T2,D2 - VLR T3,D3 - -.Loop_vx: - VAF A0,A0,B0 - VAF A1,A1,B1 - VAF A2,A2,B2 - VAF A3,A3,B3 - VAF A4,A4,B4 - VAF A5,A5,B5 - VX D0,D0,A0 - VX D1,D1,A1 - VX D2,D2,A2 - VX D3,D3,A3 - VX D4,D4,A4 - VX D5,D5,A5 - VERLLF D0,D0,16 - VERLLF D1,D1,16 - VERLLF D2,D2,16 - VERLLF D3,D3,16 - VERLLF D4,D4,16 - VERLLF D5,D5,16 - - VAF C0,C0,D0 - VAF C1,C1,D1 - VAF C2,C2,D2 - VAF C3,C3,D3 - VAF C4,C4,D4 - VAF C5,C5,D5 - VX B0,B0,C0 - VX B1,B1,C1 - VX B2,B2,C2 - VX B3,B3,C3 - VX B4,B4,C4 - VX B5,B5,C5 - VERLLF B0,B0,12 - VERLLF B1,B1,12 - VERLLF B2,B2,12 - VERLLF B3,B3,12 - VERLLF B4,B4,12 - VERLLF B5,B5,12 - - VAF A0,A0,B0 - VAF A1,A1,B1 - VAF A2,A2,B2 - VAF A3,A3,B3 - VAF A4,A4,B4 - VAF A5,A5,B5 - VX D0,D0,A0 - VX D1,D1,A1 - VX D2,D2,A2 - VX D3,D3,A3 - VX D4,D4,A4 - VX D5,D5,A5 - VERLLF D0,D0,8 - VERLLF D1,D1,8 - VERLLF D2,D2,8 - VERLLF D3,D3,8 - VERLLF D4,D4,8 - VERLLF D5,D5,8 - - VAF C0,C0,D0 - VAF C1,C1,D1 - VAF C2,C2,D2 - VAF C3,C3,D3 - VAF C4,C4,D4 - VAF C5,C5,D5 - VX B0,B0,C0 - VX B1,B1,C1 - VX B2,B2,C2 - VX B3,B3,C3 - VX B4,B4,C4 - VX B5,B5,C5 - VERLLF B0,B0,7 - VERLLF B1,B1,7 - VERLLF B2,B2,7 - VERLLF B3,B3,7 - VERLLF B4,B4,7 - VERLLF B5,B5,7 - - VSLDB C0,C0,C0,8 - VSLDB C1,C1,C1,8 - VSLDB C2,C2,C2,8 - VSLDB C3,C3,C3,8 - VSLDB C4,C4,C4,8 - VSLDB C5,C5,C5,8 - VSLDB B0,B0,B0,4 - VSLDB B1,B1,B1,4 - VSLDB B2,B2,B2,4 - VSLDB B3,B3,B3,4 - VSLDB B4,B4,B4,4 - VSLDB B5,B5,B5,4 - VSLDB D0,D0,D0,12 - VSLDB D1,D1,D1,12 - VSLDB D2,D2,D2,12 - VSLDB D3,D3,D3,12 - VSLDB D4,D4,D4,12 - VSLDB D5,D5,D5,12 - - VAF A0,A0,B0 - VAF A1,A1,B1 - VAF A2,A2,B2 - VAF A3,A3,B3 - VAF A4,A4,B4 - VAF A5,A5,B5 - VX D0,D0,A0 - VX D1,D1,A1 - VX D2,D2,A2 - VX D3,D3,A3 - VX D4,D4,A4 - VX D5,D5,A5 - VERLLF D0,D0,16 - VERLLF D1,D1,16 - VERLLF D2,D2,16 - VERLLF D3,D3,16 - VERLLF D4,D4,16 - VERLLF D5,D5,16 - - VAF C0,C0,D0 - VAF C1,C1,D1 - VAF C2,C2,D2 - VAF C3,C3,D3 - VAF C4,C4,D4 - VAF C5,C5,D5 - VX B0,B0,C0 - VX B1,B1,C1 - VX B2,B2,C2 - VX B3,B3,C3 - VX B4,B4,C4 - VX B5,B5,C5 - VERLLF B0,B0,12 - VERLLF B1,B1,12 - VERLLF B2,B2,12 - VERLLF B3,B3,12 - VERLLF B4,B4,12 - VERLLF B5,B5,12 - - VAF A0,A0,B0 - VAF A1,A1,B1 - VAF A2,A2,B2 - VAF A3,A3,B3 - VAF A4,A4,B4 - VAF A5,A5,B5 - VX D0,D0,A0 - VX D1,D1,A1 - VX D2,D2,A2 - VX D3,D3,A3 - VX D4,D4,A4 - VX D5,D5,A5 - VERLLF D0,D0,8 - VERLLF D1,D1,8 - VERLLF D2,D2,8 - VERLLF D3,D3,8 - VERLLF D4,D4,8 - VERLLF D5,D5,8 - - VAF C0,C0,D0 - VAF C1,C1,D1 - VAF C2,C2,D2 - VAF C3,C3,D3 - VAF C4,C4,D4 - VAF C5,C5,D5 - VX B0,B0,C0 - VX B1,B1,C1 - VX B2,B2,C2 - VX B3,B3,C3 - VX B4,B4,C4 - VX B5,B5,C5 - VERLLF B0,B0,7 - VERLLF B1,B1,7 - VERLLF B2,B2,7 - VERLLF B3,B3,7 - VERLLF B4,B4,7 - VERLLF B5,B5,7 - - VSLDB C0,C0,C0,8 - VSLDB C1,C1,C1,8 - VSLDB C2,C2,C2,8 - VSLDB C3,C3,C3,8 - VSLDB C4,C4,C4,8 - VSLDB C5,C5,C5,8 - VSLDB B0,B0,B0,12 - VSLDB B1,B1,B1,12 - VSLDB B2,B2,B2,12 - VSLDB B3,B3,B3,12 - VSLDB B4,B4,B4,12 - VSLDB B5,B5,B5,12 - VSLDB D0,D0,D0,4 - VSLDB D1,D1,D1,4 - VSLDB D2,D2,D2,4 - VSLDB D3,D3,D3,4 - VSLDB D4,D4,D4,4 - VSLDB D5,D5,D5,4 - brct %r0,.Loop_vx - - VAF A0,A0,K0 - VAF B0,B0,K1 - VAF C0,C0,K2 - VAF D0,D0,K3 - VAF A1,A1,K0 - VAF D1,D1,T1 # +K[3]+1 - - VPERM A0,A0,A0,BEPERM - VPERM B0,B0,B0,BEPERM - VPERM C0,C0,C0,BEPERM - VPERM D0,D0,D0,BEPERM - - clgfi LEN,0x40 - jl .Ltail_vx - - VAF D2,D2,T2 # +K[3]+2 - VAF D3,D3,T3 # +K[3]+3 - VLM T0,T3,0,INP,0 - - VX A0,A0,T0 - VX B0,B0,T1 - VX C0,C0,T2 - VX D0,D0,T3 - - VLM K0,T3,0,%r7,4 # re-load sigma and increments - - VSTM A0,D0,0,OUT,0 - - la INP,0x40(INP) - la OUT,0x40(OUT) - aghi LEN,-0x40 - je .Ldone_vx - - VAF B1,B1,K1 - VAF C1,C1,K2 - - VPERM A0,A1,A1,BEPERM - VPERM B0,B1,B1,BEPERM - VPERM C0,C1,C1,BEPERM - VPERM D0,D1,D1,BEPERM - - clgfi LEN,0x40 - jl .Ltail_vx - - VLM A1,D1,0,INP,0 - - VX A0,A0,A1 - VX B0,B0,B1 - VX C0,C0,C1 - VX D0,D0,D1 - - VSTM A0,D0,0,OUT,0 - - la INP,0x40(INP) - la OUT,0x40(OUT) - aghi LEN,-0x40 - je .Ldone_vx - - VAF A2,A2,K0 - VAF B2,B2,K1 - VAF C2,C2,K2 - - VPERM A0,A2,A2,BEPERM - VPERM B0,B2,B2,BEPERM - VPERM C0,C2,C2,BEPERM - VPERM D0,D2,D2,BEPERM - - clgfi LEN,0x40 - jl .Ltail_vx - - VLM A1,D1,0,INP,0 - - VX A0,A0,A1 - VX B0,B0,B1 - VX C0,C0,C1 - VX D0,D0,D1 - - VSTM A0,D0,0,OUT,0 - - la INP,0x40(INP) - la OUT,0x40(OUT) - aghi LEN,-0x40 - je .Ldone_vx - - VAF A3,A3,K0 - VAF B3,B3,K1 - VAF C3,C3,K2 - VAF D2,K3,T3 # K[3]+3 - - VPERM A0,A3,A3,BEPERM - VPERM B0,B3,B3,BEPERM - VPERM C0,C3,C3,BEPERM - VPERM D0,D3,D3,BEPERM - - clgfi LEN,0x40 - jl .Ltail_vx - - VAF D3,D2,T1 # K[3]+4 - VLM A1,D1,0,INP,0 - - VX A0,A0,A1 - VX B0,B0,B1 - VX C0,C0,C1 - VX D0,D0,D1 - - VSTM A0,D0,0,OUT,0 - - la INP,0x40(INP) - la OUT,0x40(OUT) - aghi LEN,-0x40 - je .Ldone_vx - - VAF A4,A4,K0 - VAF B4,B4,K1 - VAF C4,C4,K2 - VAF D4,D4,D3 # +K[3]+4 - VAF D3,D3,T1 # K[3]+5 - VAF K3,D2,T3 # K[3]+=6 - - VPERM A0,A4,A4,BEPERM - VPERM B0,B4,B4,BEPERM - VPERM C0,C4,C4,BEPERM - VPERM D0,D4,D4,BEPERM - - clgfi LEN,0x40 - jl .Ltail_vx - - VLM A1,D1,0,INP,0 - - VX A0,A0,A1 - VX B0,B0,B1 - VX C0,C0,C1 - VX D0,D0,D1 - - VSTM A0,D0,0,OUT,0 - - la INP,0x40(INP) - la OUT,0x40(OUT) - aghi LEN,-0x40 - je .Ldone_vx - - VAF A5,A5,K0 - VAF B5,B5,K1 - VAF C5,C5,K2 - VAF D5,D5,D3 # +K[3]+5 - - VPERM A0,A5,A5,BEPERM - VPERM B0,B5,B5,BEPERM - VPERM C0,C5,C5,BEPERM - VPERM D0,D5,D5,BEPERM - - clgfi LEN,0x40 - jl .Ltail_vx - - VLM A1,D1,0,INP,0 - - VX A0,A0,A1 - VX B0,B0,B1 - VX C0,C0,C1 - VX D0,D0,D1 - - VSTM A0,D0,0,OUT,0 - - la INP,0x40(INP) - la OUT,0x40(OUT) - lhi %r0,10 - aghi LEN,-0x40 - jne .Loop_outer_vx - -.Ldone_vx: - lmg %r6,%r7,FRAME+6*8(SP) - la SP,FRAME(SP) - BR_EX %r14 - -.Ltail_vx: - VSTM A0,D0,8*8,SP,3 - lghi %r1,0 - -.Loop_tail_vx: - llgc %r5,0(%r1,INP) - llgc %r6,8*8(%r1,SP) - xr %r6,%r5 - stc %r6,0(%r1,OUT) - la %r1,1(%r1) - brct LEN,.Loop_tail_vx - - lmg %r6,%r7,FRAME+6*8(SP) - la SP,FRAME(SP) - BR_EX %r14 -SYM_FUNC_END(chacha20_vx) - -.previous diff --git a/arch/s390/crypto/chacha-s390.h b/arch/s390/crypto/chacha-s390.h deleted file mode 100644 index 733744ce30f5..000000000000 --- a/arch/s390/crypto/chacha-s390.h +++ /dev/null @@ -1,14 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * s390 ChaCha stream cipher. - * - * Copyright IBM Corp. 2021 - */ - -#ifndef _CHACHA_S390_H -#define _CHACHA_S390_H - -void chacha20_vx(u8 *out, const u8 *inp, size_t len, const u32 *key, - const u32 *counter); - -#endif /* _CHACHA_S390_H */ diff --git a/arch/s390/crypto/crc32-vx.c b/arch/s390/crypto/crc32-vx.c deleted file mode 100644 index 89a10337e6ea..000000000000 --- a/arch/s390/crypto/crc32-vx.c +++ /dev/null @@ -1,306 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Crypto-API module for CRC-32 algorithms implemented with the - * z/Architecture Vector Extension Facility. - * - * Copyright IBM Corp. 2015 - * Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com> - */ -#define KMSG_COMPONENT "crc32-vx" -#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt - -#include <linux/module.h> -#include <linux/cpufeature.h> -#include <linux/crc32.h> -#include <crypto/internal/hash.h> -#include <asm/fpu.h> -#include "crc32-vx.h" - -#define CRC32_BLOCK_SIZE 1 -#define CRC32_DIGEST_SIZE 4 - -#define VX_MIN_LEN 64 -#define VX_ALIGNMENT 16L -#define VX_ALIGN_MASK (VX_ALIGNMENT - 1) - -struct crc_ctx { - u32 key; -}; - -struct crc_desc_ctx { - u32 crc; -}; - -/* - * DEFINE_CRC32_VX() - Define a CRC-32 function using the vector extension - * - * Creates a function to perform a particular CRC-32 computation. Depending - * on the message buffer, the hardware-accelerated or software implementation - * is used. Note that the message buffer is aligned to improve fetch - * operations of VECTOR LOAD MULTIPLE instructions. - * - */ -#define DEFINE_CRC32_VX(___fname, ___crc32_vx, ___crc32_sw) \ - static u32 __pure ___fname(u32 crc, \ - unsigned char const *data, size_t datalen) \ - { \ - unsigned long prealign, aligned, remaining; \ - DECLARE_KERNEL_FPU_ONSTACK16(vxstate); \ - \ - if (datalen < VX_MIN_LEN + VX_ALIGN_MASK) \ - return ___crc32_sw(crc, data, datalen); \ - \ - if ((unsigned long)data & VX_ALIGN_MASK) { \ - prealign = VX_ALIGNMENT - \ - ((unsigned long)data & VX_ALIGN_MASK); \ - datalen -= prealign; \ - crc = ___crc32_sw(crc, data, prealign); \ - data = (void *)((unsigned long)data + prealign); \ - } \ - \ - aligned = datalen & ~VX_ALIGN_MASK; \ - remaining = datalen & VX_ALIGN_MASK; \ - \ - kernel_fpu_begin(&vxstate, KERNEL_VXR_LOW); \ - crc = ___crc32_vx(crc, data, aligned); \ - kernel_fpu_end(&vxstate, KERNEL_VXR_LOW); \ - \ - if (remaining) \ - crc = ___crc32_sw(crc, data + aligned, remaining); \ - \ - return crc; \ - } - -DEFINE_CRC32_VX(crc32_le_vx, crc32_le_vgfm_16, crc32_le) -DEFINE_CRC32_VX(crc32_be_vx, crc32_be_vgfm_16, crc32_be) -DEFINE_CRC32_VX(crc32c_le_vx, crc32c_le_vgfm_16, __crc32c_le) - - -static int crc32_vx_cra_init_zero(struct crypto_tfm *tfm) -{ - struct crc_ctx *mctx = crypto_tfm_ctx(tfm); - - mctx->key = 0; - return 0; -} - -static int crc32_vx_cra_init_invert(struct crypto_tfm *tfm) -{ - struct crc_ctx *mctx = crypto_tfm_ctx(tfm); - - mctx->key = ~0; - return 0; -} - -static int crc32_vx_init(struct shash_desc *desc) -{ - struct crc_ctx *mctx = crypto_shash_ctx(desc->tfm); - struct crc_desc_ctx *ctx = shash_desc_ctx(desc); - - ctx->crc = mctx->key; - return 0; -} - -static int crc32_vx_setkey(struct crypto_shash *tfm, const u8 *newkey, - unsigned int newkeylen) -{ - struct crc_ctx *mctx = crypto_shash_ctx(tfm); - - if (newkeylen != sizeof(mctx->key)) - return -EINVAL; - mctx->key = le32_to_cpu(*(__le32 *)newkey); - return 0; -} - -static int crc32be_vx_setkey(struct crypto_shash *tfm, const u8 *newkey, - unsigned int newkeylen) -{ - struct crc_ctx *mctx = crypto_shash_ctx(tfm); - - if (newkeylen != sizeof(mctx->key)) - return -EINVAL; - mctx->key = be32_to_cpu(*(__be32 *)newkey); - return 0; -} - -static int crc32le_vx_final(struct shash_desc *desc, u8 *out) -{ - struct crc_desc_ctx *ctx = shash_desc_ctx(desc); - - *(__le32 *)out = cpu_to_le32p(&ctx->crc); - return 0; -} - -static int crc32be_vx_final(struct shash_desc *desc, u8 *out) -{ - struct crc_desc_ctx *ctx = shash_desc_ctx(desc); - - *(__be32 *)out = cpu_to_be32p(&ctx->crc); - return 0; -} - -static int crc32c_vx_final(struct shash_desc *desc, u8 *out) -{ - struct crc_desc_ctx *ctx = shash_desc_ctx(desc); - - /* - * Perform a final XOR with 0xFFFFFFFF to be in sync - * with the generic crc32c shash implementation. - */ - *(__le32 *)out = ~cpu_to_le32p(&ctx->crc); - return 0; -} - -static int __crc32le_vx_finup(u32 *crc, const u8 *data, unsigned int len, - u8 *out) -{ - *(__le32 *)out = cpu_to_le32(crc32_le_vx(*crc, data, len)); - return 0; -} - -static int __crc32be_vx_finup(u32 *crc, const u8 *data, unsigned int len, - u8 *out) -{ - *(__be32 *)out = cpu_to_be32(crc32_be_vx(*crc, data, len)); - return 0; -} - -static int __crc32c_vx_finup(u32 *crc, const u8 *data, unsigned int len, - u8 *out) -{ - /* - * Perform a final XOR with 0xFFFFFFFF to be in sync - * with the generic crc32c shash implementation. - */ - *(__le32 *)out = ~cpu_to_le32(crc32c_le_vx(*crc, data, len)); - return 0; -} - - -#define CRC32_VX_FINUP(alg, func) \ - static int alg ## _vx_finup(struct shash_desc *desc, const u8 *data, \ - unsigned int datalen, u8 *out) \ - { \ - return __ ## alg ## _vx_finup(shash_desc_ctx(desc), \ - data, datalen, out); \ - } - -CRC32_VX_FINUP(crc32le, crc32_le_vx) -CRC32_VX_FINUP(crc32be, crc32_be_vx) -CRC32_VX_FINUP(crc32c, crc32c_le_vx) - -#define CRC32_VX_DIGEST(alg, func) \ - static int alg ## _vx_digest(struct shash_desc *desc, const u8 *data, \ - unsigned int len, u8 *out) \ - { \ - return __ ## alg ## _vx_finup(crypto_shash_ctx(desc->tfm), \ - data, len, out); \ - } - -CRC32_VX_DIGEST(crc32le, crc32_le_vx) -CRC32_VX_DIGEST(crc32be, crc32_be_vx) -CRC32_VX_DIGEST(crc32c, crc32c_le_vx) - -#define CRC32_VX_UPDATE(alg, func) \ - static int alg ## _vx_update(struct shash_desc *desc, const u8 *data, \ - unsigned int datalen) \ - { \ - struct crc_desc_ctx *ctx = shash_desc_ctx(desc); \ - ctx->crc = func(ctx->crc, data, datalen); \ - return 0; \ - } - -CRC32_VX_UPDATE(crc32le, crc32_le_vx) -CRC32_VX_UPDATE(crc32be, crc32_be_vx) -CRC32_VX_UPDATE(crc32c, crc32c_le_vx) - - -static struct shash_alg crc32_vx_algs[] = { - /* CRC-32 LE */ - { - .init = crc32_vx_init, - .setkey = crc32_vx_setkey, - .update = crc32le_vx_update, - .final = crc32le_vx_final, - .finup = crc32le_vx_finup, - .digest = crc32le_vx_digest, - .descsize = sizeof(struct crc_desc_ctx), - .digestsize = CRC32_DIGEST_SIZE, - .base = { - .cra_name = "crc32", - .cra_driver_name = "crc32-vx", - .cra_priority = 200, - .cra_flags = CRYPTO_ALG_OPTIONAL_KEY, - .cra_blocksize = CRC32_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct crc_ctx), - .cra_module = THIS_MODULE, - .cra_init = crc32_vx_cra_init_zero, - }, - }, - /* CRC-32 BE */ - { - .init = crc32_vx_init, - .setkey = crc32be_vx_setkey, - .update = crc32be_vx_update, - .final = crc32be_vx_final, - .finup = crc32be_vx_finup, - .digest = crc32be_vx_digest, - .descsize = sizeof(struct crc_desc_ctx), - .digestsize = CRC32_DIGEST_SIZE, - .base = { - .cra_name = "crc32be", - .cra_driver_name = "crc32be-vx", - .cra_priority = 200, - .cra_flags = CRYPTO_ALG_OPTIONAL_KEY, - .cra_blocksize = CRC32_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct crc_ctx), - .cra_module = THIS_MODULE, - .cra_init = crc32_vx_cra_init_zero, - }, - }, - /* CRC-32C LE */ - { - .init = crc32_vx_init, - .setkey = crc32_vx_setkey, - .update = crc32c_vx_update, - .final = crc32c_vx_final, - .finup = crc32c_vx_finup, - .digest = crc32c_vx_digest, - .descsize = sizeof(struct crc_desc_ctx), - .digestsize = CRC32_DIGEST_SIZE, - .base = { - .cra_name = "crc32c", - .cra_driver_name = "crc32c-vx", - .cra_priority = 200, - .cra_flags = CRYPTO_ALG_OPTIONAL_KEY, - .cra_blocksize = CRC32_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct crc_ctx), - .cra_module = THIS_MODULE, - .cra_init = crc32_vx_cra_init_invert, - }, - }, -}; - - -static int __init crc_vx_mod_init(void) -{ - return crypto_register_shashes(crc32_vx_algs, - ARRAY_SIZE(crc32_vx_algs)); -} - -static void __exit crc_vx_mod_exit(void) -{ - crypto_unregister_shashes(crc32_vx_algs, ARRAY_SIZE(crc32_vx_algs)); -} - -module_cpu_feature_match(S390_CPU_FEATURE_VXRS, crc_vx_mod_init); -module_exit(crc_vx_mod_exit); - -MODULE_AUTHOR("Hendrik Brueckner <brueckner@linux.vnet.ibm.com>"); -MODULE_DESCRIPTION("CRC-32 algorithms using z/Architecture Vector Extension Facility"); -MODULE_LICENSE("GPL"); - -MODULE_ALIAS_CRYPTO("crc32"); -MODULE_ALIAS_CRYPTO("crc32-vx"); -MODULE_ALIAS_CRYPTO("crc32c"); -MODULE_ALIAS_CRYPTO("crc32c-vx"); diff --git a/arch/s390/crypto/crc32-vx.h b/arch/s390/crypto/crc32-vx.h deleted file mode 100644 index 652c96e1a822..000000000000 --- a/arch/s390/crypto/crc32-vx.h +++ /dev/null @@ -1,12 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ - -#ifndef _CRC32_VX_S390_H -#define _CRC32_VX_S390_H - -#include <linux/types.h> - -u32 crc32_be_vgfm_16(u32 crc, unsigned char const *buf, size_t size); -u32 crc32_le_vgfm_16(u32 crc, unsigned char const *buf, size_t size); -u32 crc32c_le_vgfm_16(u32 crc, unsigned char const *buf, size_t size); - -#endif /* _CRC32_VX_S390_H */ diff --git a/arch/s390/crypto/crc32be-vx.c b/arch/s390/crypto/crc32be-vx.c deleted file mode 100644 index fed7c9c70d05..000000000000 --- a/arch/s390/crypto/crc32be-vx.c +++ /dev/null @@ -1,174 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * Hardware-accelerated CRC-32 variants for Linux on z Systems - * - * Use the z/Architecture Vector Extension Facility to accelerate the - * computing of CRC-32 checksums. - * - * This CRC-32 implementation algorithm processes the most-significant - * bit first (BE). - * - * Copyright IBM Corp. 2015 - * Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com> - */ - -#include <linux/types.h> -#include <asm/fpu.h> -#include "crc32-vx.h" - -/* Vector register range containing CRC-32 constants */ -#define CONST_R1R2 9 -#define CONST_R3R4 10 -#define CONST_R5 11 -#define CONST_R6 12 -#define CONST_RU_POLY 13 -#define CONST_CRC_POLY 14 - -/* - * The CRC-32 constant block contains reduction constants to fold and - * process particular chunks of the input data stream in parallel. - * - * For the CRC-32 variants, the constants are precomputed according to - * these definitions: - * - * R1 = x4*128+64 mod P(x) - * R2 = x4*128 mod P(x) - * R3 = x128+64 mod P(x) - * R4 = x128 mod P(x) - * R5 = x96 mod P(x) - * R6 = x64 mod P(x) - * - * Barret reduction constant, u, is defined as floor(x**64 / P(x)). - * - * where P(x) is the polynomial in the normal domain and the P'(x) is the - * polynomial in the reversed (bitreflected) domain. - * - * Note that the constant definitions below are extended in order to compute - * intermediate results with a single VECTOR GALOIS FIELD MULTIPLY instruction. - * The rightmost doubleword can be 0 to prevent contribution to the result or - * can be multiplied by 1 to perform an XOR without the need for a separate - * VECTOR EXCLUSIVE OR instruction. - * - * CRC-32 (IEEE 802.3 Ethernet, ...) polynomials: - * - * P(x) = 0x04C11DB7 - * P'(x) = 0xEDB88320 - */ - -static unsigned long constants_CRC_32_BE[] = { - 0x08833794c, 0x0e6228b11, /* R1, R2 */ - 0x0c5b9cd4c, 0x0e8a45605, /* R3, R4 */ - 0x0f200aa66, 1UL << 32, /* R5, x32 */ - 0x0490d678d, 1, /* R6, 1 */ - 0x104d101df, 0, /* u */ - 0x104C11DB7, 0, /* P(x) */ -}; - -/** - * crc32_be_vgfm_16 - Compute CRC-32 (BE variant) with vector registers - * @crc: Initial CRC value, typically ~0. - * @buf: Input buffer pointer, performance might be improved if the - * buffer is on a doubleword boundary. - * @size: Size of the buffer, must be 64 bytes or greater. - * - * Register usage: - * V0: Initial CRC value and intermediate constants and results. - * V1..V4: Data for CRC computation. - * V5..V8: Next data chunks that are fetched from the input buffer. - * V9..V14: CRC-32 constants. - */ -u32 crc32_be_vgfm_16(u32 crc, unsigned char const *buf, size_t size) -{ - /* Load CRC-32 constants */ - fpu_vlm(CONST_R1R2, CONST_CRC_POLY, &constants_CRC_32_BE); - fpu_vzero(0); - - /* Load the initial CRC value into the leftmost word of V0. */ - fpu_vlvgf(0, crc, 0); - - /* Load a 64-byte data chunk and XOR with CRC */ - fpu_vlm(1, 4, buf); - fpu_vx(1, 0, 1); - buf += 64; - size -= 64; - - while (size >= 64) { - /* Load the next 64-byte data chunk into V5 to V8 */ - fpu_vlm(5, 8, buf); - - /* - * Perform a GF(2) multiplication of the doublewords in V1 with - * the reduction constants in V0. The intermediate result is - * then folded (accumulated) with the next data chunk in V5 and - * stored in V1. Repeat this step for the register contents - * in V2, V3, and V4 respectively. - */ - fpu_vgfmag(1, CONST_R1R2, 1, 5); - fpu_vgfmag(2, CONST_R1R2, 2, 6); - fpu_vgfmag(3, CONST_R1R2, 3, 7); - fpu_vgfmag(4, CONST_R1R2, 4, 8); - buf += 64; - size -= 64; - } - - /* Fold V1 to V4 into a single 128-bit value in V1 */ - fpu_vgfmag(1, CONST_R3R4, 1, 2); - fpu_vgfmag(1, CONST_R3R4, 1, 3); - fpu_vgfmag(1, CONST_R3R4, 1, 4); - - while (size >= 16) { - fpu_vl(2, buf); - fpu_vgfmag(1, CONST_R3R4, 1, 2); - buf += 16; - size -= 16; - } - - /* - * The R5 constant is used to fold a 128-bit value into an 96-bit value - * that is XORed with the next 96-bit input data chunk. To use a single - * VGFMG instruction, multiply the rightmost 64-bit with x^32 (1<<32) to - * form an intermediate 96-bit value (with appended zeros) which is then - * XORed with the intermediate reduction result. - */ - fpu_vgfmg(1, CONST_R5, 1); - - /* - * Further reduce the remaining 96-bit value to a 64-bit value using a - * single VGFMG, the rightmost doubleword is multiplied with 0x1. The - * intermediate result is then XORed with the product of the leftmost - * doubleword with R6. The result is a 64-bit value and is subject to - * the Barret reduction. - */ - fpu_vgfmg(1, CONST_R6, 1); - - /* - * The input values to the Barret reduction are the degree-63 polynomial - * in V1 (R(x)), degree-32 generator polynomial, and the reduction - * constant u. The Barret reduction result is the CRC value of R(x) mod - * P(x). - * - * The Barret reduction algorithm is defined as: - * - * 1. T1(x) = floor( R(x) / x^32 ) GF2MUL u - * 2. T2(x) = floor( T1(x) / x^32 ) GF2MUL P(x) - * 3. C(x) = R(x) XOR T2(x) mod x^32 - * - * Note: To compensate the division by x^32, use the vector unpack - * instruction to move the leftmost word into the leftmost doubleword - * of the vector register. The rightmost doubleword is multiplied - * with zero to not contribute to the intermediate results. - */ - - /* T1(x) = floor( R(x) / x^32 ) GF2MUL u */ - fpu_vupllf(2, 1); - fpu_vgfmg(2, CONST_RU_POLY, 2); - - /* - * Compute the GF(2) product of the CRC polynomial in VO with T1(x) in - * V2 and XOR the intermediate result, T2(x), with the value in V1. - * The final result is in the rightmost word of V2. - */ - fpu_vupllf(2, 2); - fpu_vgfmag(2, CONST_CRC_POLY, 2, 1); - return fpu_vlgvf(2, 3); -} diff --git a/arch/s390/crypto/crc32le-vx.c b/arch/s390/crypto/crc32le-vx.c deleted file mode 100644 index 2f629f394df7..000000000000 --- a/arch/s390/crypto/crc32le-vx.c +++ /dev/null @@ -1,240 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * Hardware-accelerated CRC-32 variants for Linux on z Systems - * - * Use the z/Architecture Vector Extension Facility to accelerate the - * computing of bitreflected CRC-32 checksums for IEEE 802.3 Ethernet - * and Castagnoli. - * - * This CRC-32 implementation algorithm is bitreflected and processes - * the least-significant bit first (Little-Endian). - * - * Copyright IBM Corp. 2015 - * Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com> - */ - -#include <linux/types.h> -#include <asm/fpu.h> -#include "crc32-vx.h" - -/* Vector register range containing CRC-32 constants */ -#define CONST_PERM_LE2BE 9 -#define CONST_R2R1 10 -#define CONST_R4R3 11 -#define CONST_R5 12 -#define CONST_RU_POLY 13 -#define CONST_CRC_POLY 14 - -/* - * The CRC-32 constant block contains reduction constants to fold and - * process particular chunks of the input data stream in parallel. - * - * For the CRC-32 variants, the constants are precomputed according to - * these definitions: - * - * R1 = [(x4*128+32 mod P'(x) << 32)]' << 1 - * R2 = [(x4*128-32 mod P'(x) << 32)]' << 1 - * R3 = [(x128+32 mod P'(x) << 32)]' << 1 - * R4 = [(x128-32 mod P'(x) << 32)]' << 1 - * R5 = [(x64 mod P'(x) << 32)]' << 1 - * R6 = [(x32 mod P'(x) << 32)]' << 1 - * - * The bitreflected Barret reduction constant, u', is defined as - * the bit reversal of floor(x**64 / P(x)). - * - * where P(x) is the polynomial in the normal domain and the P'(x) is the - * polynomial in the reversed (bitreflected) domain. - * - * CRC-32 (IEEE 802.3 Ethernet, ...) polynomials: - * - * P(x) = 0x04C11DB7 - * P'(x) = 0xEDB88320 - * - * CRC-32C (Castagnoli) polynomials: - * - * P(x) = 0x1EDC6F41 - * P'(x) = 0x82F63B78 - */ - -static unsigned long constants_CRC_32_LE[] = { - 0x0f0e0d0c0b0a0908, 0x0706050403020100, /* BE->LE mask */ - 0x1c6e41596, 0x154442bd4, /* R2, R1 */ - 0x0ccaa009e, 0x1751997d0, /* R4, R3 */ - 0x0, 0x163cd6124, /* R5 */ - 0x0, 0x1f7011641, /* u' */ - 0x0, 0x1db710641 /* P'(x) << 1 */ -}; - -static unsigned long constants_CRC_32C_LE[] = { - 0x0f0e0d0c0b0a0908, 0x0706050403020100, /* BE->LE mask */ - 0x09e4addf8, 0x740eef02, /* R2, R1 */ - 0x14cd00bd6, 0xf20c0dfe, /* R4, R3 */ - 0x0, 0x0dd45aab8, /* R5 */ - 0x0, 0x0dea713f1, /* u' */ - 0x0, 0x105ec76f0 /* P'(x) << 1 */ -}; - -/** - * crc32_le_vgfm_generic - Compute CRC-32 (LE variant) with vector registers - * @crc: Initial CRC value, typically ~0. - * @buf: Input buffer pointer, performance might be improved if the - * buffer is on a doubleword boundary. - * @size: Size of the buffer, must be 64 bytes or greater. - * @constants: CRC-32 constant pool base pointer. - * - * Register usage: - * V0: Initial CRC value and intermediate constants and results. - * V1..V4: Data for CRC computation. - * V5..V8: Next data chunks that are fetched from the input buffer. - * V9: Constant for BE->LE conversion and shift operations - * V10..V14: CRC-32 constants. - */ -static u32 crc32_le_vgfm_generic(u32 crc, unsigned char const *buf, size_t size, unsigned long *constants) -{ - /* Load CRC-32 constants */ - fpu_vlm(CONST_PERM_LE2BE, CONST_CRC_POLY, constants); - - /* - * Load the initial CRC value. - * - * The CRC value is loaded into the rightmost word of the - * vector register and is later XORed with the LSB portion - * of the loaded input data. - */ - fpu_vzero(0); /* Clear V0 */ - fpu_vlvgf(0, crc, 3); /* Load CRC into rightmost word */ - - /* Load a 64-byte data chunk and XOR with CRC */ - fpu_vlm(1, 4, buf); - fpu_vperm(1, 1, 1, CONST_PERM_LE2BE); - fpu_vperm(2, 2, 2, CONST_PERM_LE2BE); - fpu_vperm(3, 3, 3, CONST_PERM_LE2BE); - fpu_vperm(4, 4, 4, CONST_PERM_LE2BE); - - fpu_vx(1, 0, 1); /* V1 ^= CRC */ - buf += 64; - size -= 64; - - while (size >= 64) { - fpu_vlm(5, 8, buf); - fpu_vperm(5, 5, 5, CONST_PERM_LE2BE); - fpu_vperm(6, 6, 6, CONST_PERM_LE2BE); - fpu_vperm(7, 7, 7, CONST_PERM_LE2BE); - fpu_vperm(8, 8, 8, CONST_PERM_LE2BE); - /* - * Perform a GF(2) multiplication of the doublewords in V1 with - * the R1 and R2 reduction constants in V0. The intermediate - * result is then folded (accumulated) with the next data chunk - * in V5 and stored in V1. Repeat this step for the register - * contents in V2, V3, and V4 respectively. - */ - fpu_vgfmag(1, CONST_R2R1, 1, 5); - fpu_vgfmag(2, CONST_R2R1, 2, 6); - fpu_vgfmag(3, CONST_R2R1, 3, 7); - fpu_vgfmag(4, CONST_R2R1, 4, 8); - buf += 64; - size -= 64; - } - - /* - * Fold V1 to V4 into a single 128-bit value in V1. Multiply V1 with R3 - * and R4 and accumulating the next 128-bit chunk until a single 128-bit - * value remains. - */ - fpu_vgfmag(1, CONST_R4R3, 1, 2); - fpu_vgfmag(1, CONST_R4R3, 1, 3); - fpu_vgfmag(1, CONST_R4R3, 1, 4); - - while (size >= 16) { - fpu_vl(2, buf); - fpu_vperm(2, 2, 2, CONST_PERM_LE2BE); - fpu_vgfmag(1, CONST_R4R3, 1, 2); - buf += 16; - size -= 16; - } - - /* - * Set up a vector register for byte shifts. The shift value must - * be loaded in bits 1-4 in byte element 7 of a vector register. - * Shift by 8 bytes: 0x40 - * Shift by 4 bytes: 0x20 - */ - fpu_vleib(9, 0x40, 7); - - /* - * Prepare V0 for the next GF(2) multiplication: shift V0 by 8 bytes - * to move R4 into the rightmost doubleword and set the leftmost - * doubleword to 0x1. - */ - fpu_vsrlb(0, CONST_R4R3, 9); - fpu_vleig(0, 1, 0); - - /* - * Compute GF(2) product of V1 and V0. The rightmost doubleword - * of V1 is multiplied with R4. The leftmost doubleword of V1 is - * multiplied by 0x1 and is then XORed with rightmost product. - * Implicitly, the intermediate leftmost product becomes padded - */ - fpu_vgfmg(1, 0, 1); - - /* - * Now do the final 32-bit fold by multiplying the rightmost word - * in V1 with R5 and XOR the result with the remaining bits in V1. - * - * To achieve this by a single VGFMAG, right shift V1 by a word - * and store the result in V2 which is then accumulated. Use the - * vector unpack instruction to load the rightmost half of the - * doubleword into the rightmost doubleword element of V1; the other - * half is loaded in the leftmost doubleword. - * The vector register with CONST_R5 contains the R5 constant in the - * rightmost doubleword and the leftmost doubleword is zero to ignore - * the leftmost product of V1. - */ - fpu_vleib(9, 0x20, 7); /* Shift by words */ - fpu_vsrlb(2, 1, 9); /* Store remaining bits in V2 */ - fpu_vupllf(1, 1); /* Split rightmost doubleword */ - fpu_vgfmag(1, CONST_R5, 1, 2); /* V1 = (V1 * R5) XOR V2 */ - - /* - * Apply a Barret reduction to compute the final 32-bit CRC value. - * - * The input values to the Barret reduction are the degree-63 polynomial - * in V1 (R(x)), degree-32 generator polynomial, and the reduction - * constant u. The Barret reduction result is the CRC value of R(x) mod - * P(x). - * - * The Barret reduction algorithm is defined as: - * - * 1. T1(x) = floor( R(x) / x^32 ) GF2MUL u - * 2. T2(x) = floor( T1(x) / x^32 ) GF2MUL P(x) - * 3. C(x) = R(x) XOR T2(x) mod x^32 - * - * Note: The leftmost doubleword of vector register containing - * CONST_RU_POLY is zero and, thus, the intermediate GF(2) product - * is zero and does not contribute to the final result. - */ - - /* T1(x) = floor( R(x) / x^32 ) GF2MUL u */ - fpu_vupllf(2, 1); - fpu_vgfmg(2, CONST_RU_POLY, 2); - - /* - * Compute the GF(2) product of the CRC polynomial with T1(x) in - * V2 and XOR the intermediate result, T2(x), with the value in V1. - * The final result is stored in word element 2 of V2. - */ - fpu_vupllf(2, 2); - fpu_vgfmag(2, CONST_CRC_POLY, 2, 1); - - return fpu_vlgvf(2, 2); -} - -u32 crc32_le_vgfm_16(u32 crc, unsigned char const *buf, size_t size) -{ - return crc32_le_vgfm_generic(crc, buf, size, &constants_CRC_32_LE[0]); -} - -u32 crc32c_le_vgfm_16(u32 crc, unsigned char const *buf, size_t size) -{ - return crc32_le_vgfm_generic(crc, buf, size, &constants_CRC_32C_LE[0]); -} diff --git a/arch/s390/crypto/ghash_s390.c b/arch/s390/crypto/ghash_s390.c index 0800a2a5799f..dcbcee37cb63 100644 --- a/arch/s390/crypto/ghash_s390.c +++ b/arch/s390/crypto/ghash_s390.c @@ -8,29 +8,28 @@ * Author(s): Gerald Schaefer <gerald.schaefer@de.ibm.com> */ +#include <asm/cpacf.h> +#include <crypto/ghash.h> #include <crypto/internal/hash.h> -#include <linux/module.h> #include <linux/cpufeature.h> -#include <asm/cpacf.h> - -#define GHASH_BLOCK_SIZE 16 -#define GHASH_DIGEST_SIZE 16 +#include <linux/err.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/string.h> -struct ghash_ctx { +struct s390_ghash_ctx { u8 key[GHASH_BLOCK_SIZE]; }; -struct ghash_desc_ctx { +struct s390_ghash_desc_ctx { u8 icv[GHASH_BLOCK_SIZE]; u8 key[GHASH_BLOCK_SIZE]; - u8 buffer[GHASH_BLOCK_SIZE]; - u32 bytes; }; static int ghash_init(struct shash_desc *desc) { - struct ghash_desc_ctx *dctx = shash_desc_ctx(desc); - struct ghash_ctx *ctx = crypto_shash_ctx(desc->tfm); + struct s390_ghash_ctx *ctx = crypto_shash_ctx(desc->tfm); + struct s390_ghash_desc_ctx *dctx = shash_desc_ctx(desc); memset(dctx, 0, sizeof(*dctx)); memcpy(dctx->key, ctx->key, GHASH_BLOCK_SIZE); @@ -41,7 +40,7 @@ static int ghash_init(struct shash_desc *desc) static int ghash_setkey(struct crypto_shash *tfm, const u8 *key, unsigned int keylen) { - struct ghash_ctx *ctx = crypto_shash_ctx(tfm); + struct s390_ghash_ctx *ctx = crypto_shash_ctx(tfm); if (keylen != GHASH_BLOCK_SIZE) return -EINVAL; @@ -54,80 +53,71 @@ static int ghash_setkey(struct crypto_shash *tfm, static int ghash_update(struct shash_desc *desc, const u8 *src, unsigned int srclen) { - struct ghash_desc_ctx *dctx = shash_desc_ctx(desc); + struct s390_ghash_desc_ctx *dctx = shash_desc_ctx(desc); unsigned int n; - u8 *buf = dctx->buffer; - - if (dctx->bytes) { - u8 *pos = buf + (GHASH_BLOCK_SIZE - dctx->bytes); - n = min(srclen, dctx->bytes); - dctx->bytes -= n; - srclen -= n; - - memcpy(pos, src, n); - src += n; + n = srclen & ~(GHASH_BLOCK_SIZE - 1); + cpacf_kimd(CPACF_KIMD_GHASH, dctx, src, n); + return srclen - n; +} - if (!dctx->bytes) { - cpacf_kimd(CPACF_KIMD_GHASH, dctx, buf, - GHASH_BLOCK_SIZE); - } - } +static void ghash_flush(struct s390_ghash_desc_ctx *dctx, const u8 *src, + unsigned int len) +{ + if (len) { + u8 buf[GHASH_BLOCK_SIZE] = {}; - n = srclen & ~(GHASH_BLOCK_SIZE - 1); - if (n) { - cpacf_kimd(CPACF_KIMD_GHASH, dctx, src, n); - src += n; - srclen -= n; + memcpy(buf, src, len); + cpacf_kimd(CPACF_KIMD_GHASH, dctx, buf, GHASH_BLOCK_SIZE); + memzero_explicit(buf, sizeof(buf)); } +} - if (srclen) { - dctx->bytes = GHASH_BLOCK_SIZE - srclen; - memcpy(buf, src, srclen); - } +static int ghash_finup(struct shash_desc *desc, const u8 *src, + unsigned int len, u8 *dst) +{ + struct s390_ghash_desc_ctx *dctx = shash_desc_ctx(desc); + ghash_flush(dctx, src, len); + memcpy(dst, dctx->icv, GHASH_BLOCK_SIZE); return 0; } -static int ghash_flush(struct ghash_desc_ctx *dctx) +static int ghash_export(struct shash_desc *desc, void *out) { - u8 *buf = dctx->buffer; - - if (dctx->bytes) { - u8 *pos = buf + (GHASH_BLOCK_SIZE - dctx->bytes); - - memset(pos, 0, dctx->bytes); - cpacf_kimd(CPACF_KIMD_GHASH, dctx, buf, GHASH_BLOCK_SIZE); - dctx->bytes = 0; - } + struct s390_ghash_desc_ctx *dctx = shash_desc_ctx(desc); + memcpy(out, dctx->icv, GHASH_DIGEST_SIZE); return 0; } -static int ghash_final(struct shash_desc *desc, u8 *dst) +static int ghash_import(struct shash_desc *desc, const void *in) { - struct ghash_desc_ctx *dctx = shash_desc_ctx(desc); - int ret; + struct s390_ghash_ctx *ctx = crypto_shash_ctx(desc->tfm); + struct s390_ghash_desc_ctx *dctx = shash_desc_ctx(desc); - ret = ghash_flush(dctx); - if (!ret) - memcpy(dst, dctx->icv, GHASH_BLOCK_SIZE); - return ret; + memcpy(dctx->icv, in, GHASH_DIGEST_SIZE); + memcpy(dctx->key, ctx->key, GHASH_BLOCK_SIZE); + return 0; } static struct shash_alg ghash_alg = { .digestsize = GHASH_DIGEST_SIZE, .init = ghash_init, .update = ghash_update, - .final = ghash_final, + .finup = ghash_finup, .setkey = ghash_setkey, - .descsize = sizeof(struct ghash_desc_ctx), + .export = ghash_export, + .import = ghash_import, + .statesize = sizeof(struct ghash_desc_ctx), + .descsize = sizeof(struct s390_ghash_desc_ctx), .base = { .cra_name = "ghash", .cra_driver_name = "ghash-s390", .cra_priority = 300, + .cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY, .cra_blocksize = GHASH_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct ghash_ctx), + .cra_ctxsize = sizeof(struct s390_ghash_ctx), .cra_module = THIS_MODULE, }, }; diff --git a/arch/s390/crypto/hmac_s390.c b/arch/s390/crypto/hmac_s390.c index bba9a818dfdc..58444da9b004 100644 --- a/arch/s390/crypto/hmac_s390.c +++ b/arch/s390/crypto/hmac_s390.c @@ -9,10 +9,14 @@ #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt #include <asm/cpacf.h> -#include <crypto/sha2.h> #include <crypto/internal/hash.h> +#include <crypto/hmac.h> +#include <crypto/sha2.h> #include <linux/cpufeature.h> +#include <linux/errno.h> +#include <linux/kernel.h> #include <linux/module.h> +#include <linux/string.h> /* * KMAC param block layout for sha2 function codes: @@ -71,32 +75,31 @@ union s390_kmac_gr0 { struct s390_kmac_sha2_ctx { u8 param[MAX_DIGEST_SIZE + MAX_IMBL_SIZE + MAX_BLOCK_SIZE]; union s390_kmac_gr0 gr0; - u8 buf[MAX_BLOCK_SIZE]; - unsigned int buflen; + u64 buflen[2]; }; /* * kmac_sha2_set_imbl - sets the input message bit-length based on the blocksize */ -static inline void kmac_sha2_set_imbl(u8 *param, unsigned int buflen, - unsigned int blocksize) +static inline void kmac_sha2_set_imbl(u8 *param, u64 buflen_lo, + u64 buflen_hi, unsigned int blocksize) { u8 *imbl = param + SHA2_IMBL_OFFSET(blocksize); switch (blocksize) { case SHA256_BLOCK_SIZE: - *(u64 *)imbl = (u64)buflen * BITS_PER_BYTE; + *(u64 *)imbl = buflen_lo * BITS_PER_BYTE; break; case SHA512_BLOCK_SIZE: - *(u128 *)imbl = (u128)buflen * BITS_PER_BYTE; + *(u128 *)imbl = (((u128)buflen_hi << 64) + buflen_lo) << 3; break; default: break; } } -static int hash_key(const u8 *in, unsigned int inlen, - u8 *digest, unsigned int digestsize) +static int hash_data(const u8 *in, unsigned int inlen, + u8 *digest, unsigned int digestsize, bool final) { unsigned long func; union { @@ -123,19 +126,23 @@ static int hash_key(const u8 *in, unsigned int inlen, switch (digestsize) { case SHA224_DIGEST_SIZE: - func = CPACF_KLMD_SHA_256; + func = final ? CPACF_KLMD_SHA_256 : CPACF_KIMD_SHA_256; PARAM_INIT(256, 224, inlen * 8); + if (!final) + digestsize = SHA256_DIGEST_SIZE; break; case SHA256_DIGEST_SIZE: - func = CPACF_KLMD_SHA_256; + func = final ? CPACF_KLMD_SHA_256 : CPACF_KIMD_SHA_256; PARAM_INIT(256, 256, inlen * 8); break; case SHA384_DIGEST_SIZE: - func = CPACF_KLMD_SHA_512; + func = final ? CPACF_KLMD_SHA_512 : CPACF_KIMD_SHA_512; PARAM_INIT(512, 384, inlen * 8); + if (!final) + digestsize = SHA512_DIGEST_SIZE; break; case SHA512_DIGEST_SIZE: - func = CPACF_KLMD_SHA_512; + func = final ? CPACF_KLMD_SHA_512 : CPACF_KIMD_SHA_512; PARAM_INIT(512, 512, inlen * 8); break; default: @@ -151,6 +158,12 @@ static int hash_key(const u8 *in, unsigned int inlen, return 0; } +static int hash_key(const u8 *in, unsigned int inlen, + u8 *digest, unsigned int digestsize) +{ + return hash_data(in, inlen, digest, digestsize, true); +} + static int s390_hmac_sha2_setkey(struct crypto_shash *tfm, const u8 *key, unsigned int keylen) { @@ -176,7 +189,8 @@ static int s390_hmac_sha2_init(struct shash_desc *desc) memcpy(ctx->param + SHA2_KEY_OFFSET(bs), tfm_ctx->key, bs); - ctx->buflen = 0; + ctx->buflen[0] = 0; + ctx->buflen[1] = 0; ctx->gr0.reg = 0; switch (crypto_shash_digestsize(desc->tfm)) { case SHA224_DIGEST_SIZE: @@ -203,48 +217,31 @@ static int s390_hmac_sha2_update(struct shash_desc *desc, { struct s390_kmac_sha2_ctx *ctx = shash_desc_ctx(desc); unsigned int bs = crypto_shash_blocksize(desc->tfm); - unsigned int offset, n; - - /* check current buffer */ - offset = ctx->buflen % bs; - ctx->buflen += len; - if (offset + len < bs) - goto store; - - /* process one stored block */ - if (offset) { - n = bs - offset; - memcpy(ctx->buf + offset, data, n); - ctx->gr0.iimp = 1; - _cpacf_kmac(&ctx->gr0.reg, ctx->param, ctx->buf, bs); - data += n; - len -= n; - offset = 0; - } - /* process as many blocks as possible */ - if (len >= bs) { - n = (len / bs) * bs; - ctx->gr0.iimp = 1; - _cpacf_kmac(&ctx->gr0.reg, ctx->param, data, n); - data += n; - len -= n; - } -store: - /* store incomplete block in buffer */ - if (len) - memcpy(ctx->buf + offset, data, len); + unsigned int n = round_down(len, bs); - return 0; + ctx->buflen[0] += n; + if (ctx->buflen[0] < n) + ctx->buflen[1]++; + + /* process as many blocks as possible */ + ctx->gr0.iimp = 1; + _cpacf_kmac(&ctx->gr0.reg, ctx->param, data, n); + return len - n; } -static int s390_hmac_sha2_final(struct shash_desc *desc, u8 *out) +static int s390_hmac_sha2_finup(struct shash_desc *desc, const u8 *src, + unsigned int len, u8 *out) { struct s390_kmac_sha2_ctx *ctx = shash_desc_ctx(desc); unsigned int bs = crypto_shash_blocksize(desc->tfm); + ctx->buflen[0] += len; + if (ctx->buflen[0] < len) + ctx->buflen[1]++; + ctx->gr0.iimp = 0; - kmac_sha2_set_imbl(ctx->param, ctx->buflen, bs); - _cpacf_kmac(&ctx->gr0.reg, ctx->param, ctx->buf, ctx->buflen % bs); + kmac_sha2_set_imbl(ctx->param, ctx->buflen[0], ctx->buflen[1], bs); + _cpacf_kmac(&ctx->gr0.reg, ctx->param, src, len); memcpy(out, ctx->param, crypto_shash_digestsize(desc->tfm)); return 0; @@ -262,7 +259,7 @@ static int s390_hmac_sha2_digest(struct shash_desc *desc, return rc; ctx->gr0.iimp = 0; - kmac_sha2_set_imbl(ctx->param, len, + kmac_sha2_set_imbl(ctx->param, len, 0, crypto_shash_blocksize(desc->tfm)); _cpacf_kmac(&ctx->gr0.reg, ctx->param, data, len); memcpy(out, ctx->param, ds); @@ -270,22 +267,93 @@ static int s390_hmac_sha2_digest(struct shash_desc *desc, return 0; } -#define S390_HMAC_SHA2_ALG(x) { \ +static int s390_hmac_export_zero(struct shash_desc *desc, void *out) +{ + struct crypto_shash *tfm = desc->tfm; + u8 ipad[SHA512_BLOCK_SIZE]; + struct s390_hmac_ctx *ctx; + unsigned int bs; + int err, i; + + ctx = crypto_shash_ctx(tfm); + bs = crypto_shash_blocksize(tfm); + for (i = 0; i < bs; i++) + ipad[i] = ctx->key[i] ^ HMAC_IPAD_VALUE; + + err = hash_data(ipad, bs, out, crypto_shash_digestsize(tfm), false); + memzero_explicit(ipad, sizeof(ipad)); + return err; +} + +static int s390_hmac_export(struct shash_desc *desc, void *out) +{ + struct s390_kmac_sha2_ctx *ctx = shash_desc_ctx(desc); + unsigned int bs = crypto_shash_blocksize(desc->tfm); + unsigned int ds = bs / 2; + u64 lo = ctx->buflen[0]; + union { + u8 *u8; + u64 *u64; + } p = { .u8 = out }; + int err = 0; + + if (!ctx->gr0.ikp) + err = s390_hmac_export_zero(desc, out); + else + memcpy(p.u8, ctx->param, ds); + p.u8 += ds; + lo += bs; + put_unaligned(lo, p.u64++); + if (ds == SHA512_DIGEST_SIZE) + put_unaligned(ctx->buflen[1] + (lo < bs), p.u64); + return err; +} + +static int s390_hmac_import(struct shash_desc *desc, const void *in) +{ + struct s390_kmac_sha2_ctx *ctx = shash_desc_ctx(desc); + unsigned int bs = crypto_shash_blocksize(desc->tfm); + unsigned int ds = bs / 2; + union { + const u8 *u8; + const u64 *u64; + } p = { .u8 = in }; + u64 lo; + int err; + + err = s390_hmac_sha2_init(desc); + memcpy(ctx->param, p.u8, ds); + p.u8 += ds; + lo = get_unaligned(p.u64++); + ctx->buflen[0] = lo - bs; + if (ds == SHA512_DIGEST_SIZE) + ctx->buflen[1] = get_unaligned(p.u64) - (lo < bs); + if (ctx->buflen[0] | ctx->buflen[1]) + ctx->gr0.ikp = 1; + return err; +} + +#define S390_HMAC_SHA2_ALG(x, ss) { \ .fc = CPACF_KMAC_HMAC_SHA_##x, \ .alg = { \ .init = s390_hmac_sha2_init, \ .update = s390_hmac_sha2_update, \ - .final = s390_hmac_sha2_final, \ + .finup = s390_hmac_sha2_finup, \ .digest = s390_hmac_sha2_digest, \ .setkey = s390_hmac_sha2_setkey, \ + .export = s390_hmac_export, \ + .import = s390_hmac_import, \ .descsize = sizeof(struct s390_kmac_sha2_ctx), \ .halg = { \ + .statesize = ss, \ .digestsize = SHA##x##_DIGEST_SIZE, \ .base = { \ .cra_name = "hmac(sha" #x ")", \ .cra_driver_name = "hmac_s390_sha" #x, \ .cra_blocksize = SHA##x##_BLOCK_SIZE, \ .cra_priority = 400, \ + .cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY | \ + CRYPTO_AHASH_ALG_FINUP_MAX, \ .cra_ctxsize = sizeof(struct s390_hmac_ctx), \ .cra_module = THIS_MODULE, \ }, \ @@ -298,10 +366,10 @@ static struct s390_hmac_alg { unsigned int fc; struct shash_alg alg; } s390_hmac_algs[] = { - S390_HMAC_SHA2_ALG(224), - S390_HMAC_SHA2_ALG(256), - S390_HMAC_SHA2_ALG(384), - S390_HMAC_SHA2_ALG(512), + S390_HMAC_SHA2_ALG(224, sizeof(struct crypto_sha256_state)), + S390_HMAC_SHA2_ALG(256, sizeof(struct crypto_sha256_state)), + S390_HMAC_SHA2_ALG(384, SHA512_STATE_SIZE), + S390_HMAC_SHA2_ALG(512, SHA512_STATE_SIZE), }; static __always_inline void _s390_hmac_algs_unregister(void) diff --git a/arch/s390/crypto/paes_s390.c b/arch/s390/crypto/paes_s390.c index 511093713a6f..a624a43a2b54 100644 --- a/arch/s390/crypto/paes_s390.c +++ b/arch/s390/crypto/paes_s390.c @@ -5,7 +5,7 @@ * s390 implementation of the AES Cipher Algorithm with protected keys. * * s390 Version: - * Copyright IBM Corp. 2017, 2023 + * Copyright IBM Corp. 2017, 2025 * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com> * Harald Freudenberger <freude@de.ibm.com> */ @@ -13,16 +13,18 @@ #define KMSG_COMPONENT "paes_s390" #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt -#include <crypto/aes.h> -#include <crypto/algapi.h> -#include <linux/bug.h> -#include <linux/err.h> -#include <linux/module.h> +#include <linux/atomic.h> #include <linux/cpufeature.h> +#include <linux/delay.h> +#include <linux/err.h> #include <linux/init.h> +#include <linux/miscdevice.h> +#include <linux/module.h> #include <linux/mutex.h> #include <linux/spinlock.h> -#include <linux/delay.h> +#include <crypto/aes.h> +#include <crypto/algapi.h> +#include <crypto/engine.h> #include <crypto/internal/skcipher.h> #include <crypto/xts.h> #include <asm/cpacf.h> @@ -44,23 +46,61 @@ static DEFINE_MUTEX(ctrblk_lock); static cpacf_mask_t km_functions, kmc_functions, kmctr_functions; +static struct crypto_engine *paes_crypto_engine; +#define MAX_QLEN 10 + +/* + * protected key specific stuff + */ + struct paes_protkey { u32 type; u32 len; u8 protkey[PXTS_256_PROTKEY_SIZE]; }; -struct key_blob { - /* - * Small keys will be stored in the keybuf. Larger keys are - * stored in extra allocated memory. In both cases does - * key point to the memory where the key is stored. - * The code distinguishes by checking keylen against - * sizeof(keybuf). See the two following helper functions. - */ - u8 *key; - u8 keybuf[128]; +#define PK_STATE_NO_KEY 0 +#define PK_STATE_CONVERT_IN_PROGRESS 1 +#define PK_STATE_VALID 2 + +struct s390_paes_ctx { + /* source key material used to derive a protected key from */ + u8 keybuf[PAES_MAX_KEYSIZE]; + unsigned int keylen; + + /* cpacf function code to use with this protected key type */ + long fc; + + /* nr of requests enqueued via crypto engine which use this tfm ctx */ + atomic_t via_engine_ctr; + + /* spinlock to atomic read/update all the following fields */ + spinlock_t pk_lock; + + /* see PK_STATE* defines above, < 0 holds convert failure rc */ + int pk_state; + /* if state is valid, pk holds the protected key */ + struct paes_protkey pk; +}; + +struct s390_pxts_ctx { + /* source key material used to derive a protected key from */ + u8 keybuf[2 * PAES_MAX_KEYSIZE]; unsigned int keylen; + + /* cpacf function code to use with this protected key type */ + long fc; + + /* nr of requests enqueued via crypto engine which use this tfm ctx */ + atomic_t via_engine_ctr; + + /* spinlock to atomic read/update all the following fields */ + spinlock_t pk_lock; + + /* see PK_STATE* defines above, < 0 holds convert failure rc */ + int pk_state; + /* if state is valid, pk[] hold(s) the protected key(s) */ + struct paes_protkey pk[2]; }; /* @@ -89,214 +129,370 @@ static inline u32 make_clrkey_token(const u8 *ck, size_t cklen, u8 *dest) return sizeof(*token) + cklen; } -static inline int _key_to_kb(struct key_blob *kb, - const u8 *key, - unsigned int keylen) +/* + * paes_ctx_setkey() - Set key value into context, maybe construct + * a clear key token digestible by pkey from a clear key value. + */ +static inline int paes_ctx_setkey(struct s390_paes_ctx *ctx, + const u8 *key, unsigned int keylen) { + if (keylen > sizeof(ctx->keybuf)) + return -EINVAL; + switch (keylen) { case 16: case 24: case 32: /* clear key value, prepare pkey clear key token in keybuf */ - memset(kb->keybuf, 0, sizeof(kb->keybuf)); - kb->keylen = make_clrkey_token(key, keylen, kb->keybuf); - kb->key = kb->keybuf; + memset(ctx->keybuf, 0, sizeof(ctx->keybuf)); + ctx->keylen = make_clrkey_token(key, keylen, ctx->keybuf); break; default: /* other key material, let pkey handle this */ - if (keylen <= sizeof(kb->keybuf)) - kb->key = kb->keybuf; - else { - kb->key = kmalloc(keylen, GFP_KERNEL); - if (!kb->key) - return -ENOMEM; - } - memcpy(kb->key, key, keylen); - kb->keylen = keylen; + memcpy(ctx->keybuf, key, keylen); + ctx->keylen = keylen; break; } return 0; } -static inline int _xts_key_to_kb(struct key_blob *kb, - const u8 *key, - unsigned int keylen) +/* + * pxts_ctx_setkey() - Set key value into context, maybe construct + * a clear key token digestible by pkey from a clear key value. + */ +static inline int pxts_ctx_setkey(struct s390_pxts_ctx *ctx, + const u8 *key, unsigned int keylen) { size_t cklen = keylen / 2; - memset(kb->keybuf, 0, sizeof(kb->keybuf)); + if (keylen > sizeof(ctx->keybuf)) + return -EINVAL; switch (keylen) { case 32: case 64: /* clear key value, prepare pkey clear key tokens in keybuf */ - kb->key = kb->keybuf; - kb->keylen = make_clrkey_token(key, cklen, kb->key); - kb->keylen += make_clrkey_token(key + cklen, cklen, - kb->key + kb->keylen); + memset(ctx->keybuf, 0, sizeof(ctx->keybuf)); + ctx->keylen = make_clrkey_token(key, cklen, ctx->keybuf); + ctx->keylen += make_clrkey_token(key + cklen, cklen, + ctx->keybuf + ctx->keylen); break; default: /* other key material, let pkey handle this */ - if (keylen <= sizeof(kb->keybuf)) { - kb->key = kb->keybuf; - } else { - kb->key = kmalloc(keylen, GFP_KERNEL); - if (!kb->key) - return -ENOMEM; - } - memcpy(kb->key, key, keylen); - kb->keylen = keylen; + memcpy(ctx->keybuf, key, keylen); + ctx->keylen = keylen; break; } return 0; } -static inline void _free_kb_keybuf(struct key_blob *kb) +/* + * Convert the raw key material into a protected key via PKEY api. + * This function may sleep - don't call in non-sleeping context. + */ +static inline int convert_key(const u8 *key, unsigned int keylen, + struct paes_protkey *pk) { - if (kb->key && kb->key != kb->keybuf - && kb->keylen > sizeof(kb->keybuf)) { - kfree_sensitive(kb->key); - kb->key = NULL; + int rc, i; + + pk->len = sizeof(pk->protkey); + + /* + * In case of a busy card retry with increasing delay + * of 200, 400, 800 and 1600 ms - in total 3 s. + */ + for (rc = -EIO, i = 0; rc && i < 5; i++) { + if (rc == -EBUSY && msleep_interruptible((1 << i) * 100)) { + rc = -EINTR; + goto out; + } + rc = pkey_key2protkey(key, keylen, + pk->protkey, &pk->len, &pk->type, + PKEY_XFLAG_NOMEMALLOC); } - memzero_explicit(kb->keybuf, sizeof(kb->keybuf)); + +out: + pr_debug("rc=%d\n", rc); + return rc; } -struct s390_paes_ctx { - struct key_blob kb; +/* + * (Re-)Convert the raw key material from the ctx into a protected key + * via convert_key() function. Update the pk_state, pk_type, pk_len + * and the protected key in the tfm context. + * Please note this function may be invoked concurrently with the very + * same tfm context. The pk_lock spinlock in the context ensures an + * atomic update of the pk and the pk state but does not guarantee any + * order of update. So a fresh converted valid protected key may get + * updated with an 'old' expired key value. As the cpacf instructions + * detect this, refuse to operate with an invalid key and the calling + * code triggers a (re-)conversion this does no harm. This may lead to + * unnecessary additional conversion but never to invalid data on en- + * or decrypt operations. + */ +static int paes_convert_key(struct s390_paes_ctx *ctx) +{ struct paes_protkey pk; - spinlock_t pk_lock; - unsigned long fc; -}; + int rc; -struct s390_pxts_ctx { - struct key_blob kb; - struct paes_protkey pk[2]; - spinlock_t pk_lock; - unsigned long fc; -}; + spin_lock_bh(&ctx->pk_lock); + ctx->pk_state = PK_STATE_CONVERT_IN_PROGRESS; + spin_unlock_bh(&ctx->pk_lock); -static inline int __paes_keyblob2pkey(const u8 *key, unsigned int keylen, - struct paes_protkey *pk) -{ - int i, rc = -EIO; + rc = convert_key(ctx->keybuf, ctx->keylen, &pk); - /* try three times in case of busy card */ - for (i = 0; rc && i < 3; i++) { - if (rc == -EBUSY && in_task()) { - if (msleep_interruptible(1000)) - return -EINTR; - } - rc = pkey_key2protkey(key, keylen, pk->protkey, &pk->len, - &pk->type); + /* update context */ + spin_lock_bh(&ctx->pk_lock); + if (rc) { + ctx->pk_state = rc; + } else { + ctx->pk_state = PK_STATE_VALID; + ctx->pk = pk; } + spin_unlock_bh(&ctx->pk_lock); + memzero_explicit(&pk, sizeof(pk)); + pr_debug("rc=%d\n", rc); return rc; } -static inline int __paes_convert_key(struct s390_paes_ctx *ctx) +/* + * (Re-)Convert the raw xts key material from the ctx into a + * protected key via convert_key() function. Update the pk_state, + * pk_type, pk_len and the protected key in the tfm context. + * See also comments on function paes_convert_key. + */ +static int pxts_convert_key(struct s390_pxts_ctx *ctx) { - struct paes_protkey pk; + struct paes_protkey pk0, pk1; + size_t split_keylen; int rc; - pk.len = sizeof(pk.protkey); - rc = __paes_keyblob2pkey(ctx->kb.key, ctx->kb.keylen, &pk); + spin_lock_bh(&ctx->pk_lock); + ctx->pk_state = PK_STATE_CONVERT_IN_PROGRESS; + spin_unlock_bh(&ctx->pk_lock); + + rc = convert_key(ctx->keybuf, ctx->keylen, &pk0); if (rc) - return rc; + goto out; + + switch (pk0.type) { + case PKEY_KEYTYPE_AES_128: + case PKEY_KEYTYPE_AES_256: + /* second keytoken required */ + if (ctx->keylen % 2) { + rc = -EINVAL; + goto out; + } + split_keylen = ctx->keylen / 2; + rc = convert_key(ctx->keybuf + split_keylen, + split_keylen, &pk1); + if (rc) + goto out; + if (pk0.type != pk1.type) { + rc = -EINVAL; + goto out; + } + break; + case PKEY_KEYTYPE_AES_XTS_128: + case PKEY_KEYTYPE_AES_XTS_256: + /* single key */ + pk1.type = 0; + break; + default: + /* unsupported protected keytype */ + rc = -EINVAL; + goto out; + } +out: + /* update context */ spin_lock_bh(&ctx->pk_lock); - memcpy(&ctx->pk, &pk, sizeof(pk)); + if (rc) { + ctx->pk_state = rc; + } else { + ctx->pk_state = PK_STATE_VALID; + ctx->pk[0] = pk0; + ctx->pk[1] = pk1; + } spin_unlock_bh(&ctx->pk_lock); - return 0; + memzero_explicit(&pk0, sizeof(pk0)); + memzero_explicit(&pk1, sizeof(pk1)); + pr_debug("rc=%d\n", rc); + return rc; } -static int ecb_paes_init(struct crypto_skcipher *tfm) -{ - struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm); +/* + * PAES ECB implementation + */ - ctx->kb.key = NULL; - spin_lock_init(&ctx->pk_lock); +struct ecb_param { + u8 key[PAES_256_PROTKEY_SIZE]; +} __packed; - return 0; -} +struct s390_pecb_req_ctx { + unsigned long modifier; + struct skcipher_walk walk; + bool param_init_done; + struct ecb_param param; +}; -static void ecb_paes_exit(struct crypto_skcipher *tfm) +static int ecb_paes_setkey(struct crypto_skcipher *tfm, const u8 *in_key, + unsigned int key_len) { struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm); - - _free_kb_keybuf(&ctx->kb); -} - -static inline int __ecb_paes_set_key(struct s390_paes_ctx *ctx) -{ - unsigned long fc; + long fc; int rc; - rc = __paes_convert_key(ctx); + /* set raw key into context */ + rc = paes_ctx_setkey(ctx, in_key, key_len); if (rc) - return rc; + goto out; - /* Pick the correct function code based on the protected key type */ - fc = (ctx->pk.type == PKEY_KEYTYPE_AES_128) ? CPACF_KM_PAES_128 : - (ctx->pk.type == PKEY_KEYTYPE_AES_192) ? CPACF_KM_PAES_192 : - (ctx->pk.type == PKEY_KEYTYPE_AES_256) ? CPACF_KM_PAES_256 : 0; + /* convert key into protected key */ + rc = paes_convert_key(ctx); + if (rc) + goto out; - /* Check if the function code is available */ + /* Pick the correct function code based on the protected key type */ + switch (ctx->pk.type) { + case PKEY_KEYTYPE_AES_128: + fc = CPACF_KM_PAES_128; + break; + case PKEY_KEYTYPE_AES_192: + fc = CPACF_KM_PAES_192; + break; + case PKEY_KEYTYPE_AES_256: + fc = CPACF_KM_PAES_256; + break; + default: + fc = 0; + break; + } ctx->fc = (fc && cpacf_test_func(&km_functions, fc)) ? fc : 0; - return ctx->fc ? 0 : -EINVAL; + rc = fc ? 0 : -EINVAL; + +out: + pr_debug("rc=%d\n", rc); + return rc; } -static int ecb_paes_set_key(struct crypto_skcipher *tfm, const u8 *in_key, - unsigned int key_len) +static int ecb_paes_do_crypt(struct s390_paes_ctx *ctx, + struct s390_pecb_req_ctx *req_ctx, + bool maysleep) { - struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm); - int rc; - - _free_kb_keybuf(&ctx->kb); - rc = _key_to_kb(&ctx->kb, in_key, key_len); + struct ecb_param *param = &req_ctx->param; + struct skcipher_walk *walk = &req_ctx->walk; + unsigned int nbytes, n, k; + int pk_state, rc = 0; + + if (!req_ctx->param_init_done) { + /* fetch and check protected key state */ + spin_lock_bh(&ctx->pk_lock); + pk_state = ctx->pk_state; + switch (pk_state) { + case PK_STATE_NO_KEY: + rc = -ENOKEY; + break; + case PK_STATE_CONVERT_IN_PROGRESS: + rc = -EKEYEXPIRED; + break; + case PK_STATE_VALID: + memcpy(param->key, ctx->pk.protkey, sizeof(param->key)); + req_ctx->param_init_done = true; + break; + default: + rc = pk_state < 0 ? pk_state : -EIO; + break; + } + spin_unlock_bh(&ctx->pk_lock); + } if (rc) - return rc; + goto out; - return __ecb_paes_set_key(ctx); + /* + * Note that in case of partial processing or failure the walk + * is NOT unmapped here. So a follow up task may reuse the walk + * or in case of unrecoverable failure needs to unmap it. + */ + while ((nbytes = walk->nbytes) != 0) { + /* only use complete blocks */ + n = nbytes & ~(AES_BLOCK_SIZE - 1); + k = cpacf_km(ctx->fc | req_ctx->modifier, param, + walk->dst.virt.addr, walk->src.virt.addr, n); + if (k) + rc = skcipher_walk_done(walk, nbytes - k); + if (k < n) { + if (!maysleep) { + rc = -EKEYEXPIRED; + goto out; + } + rc = paes_convert_key(ctx); + if (rc) + goto out; + spin_lock_bh(&ctx->pk_lock); + memcpy(param->key, ctx->pk.protkey, sizeof(param->key)); + spin_unlock_bh(&ctx->pk_lock); + } + } + +out: + pr_debug("rc=%d\n", rc); + return rc; } static int ecb_paes_crypt(struct skcipher_request *req, unsigned long modifier) { + struct s390_pecb_req_ctx *req_ctx = skcipher_request_ctx(req); struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm); - struct { - u8 key[PAES_256_PROTKEY_SIZE]; - } param; - struct skcipher_walk walk; - unsigned int nbytes, n, k; + struct skcipher_walk *walk = &req_ctx->walk; int rc; - rc = skcipher_walk_virt(&walk, req, false); + /* + * Attempt synchronous encryption first. If it fails, schedule the request + * asynchronously via the crypto engine. To preserve execution order, + * once a request is queued to the engine, further requests using the same + * tfm will also be routed through the engine. + */ + + rc = skcipher_walk_virt(walk, req, false); if (rc) - return rc; + goto out; - spin_lock_bh(&ctx->pk_lock); - memcpy(param.key, ctx->pk.protkey, PAES_256_PROTKEY_SIZE); - spin_unlock_bh(&ctx->pk_lock); + req_ctx->modifier = modifier; + req_ctx->param_init_done = false; - while ((nbytes = walk.nbytes) != 0) { - /* only use complete blocks */ - n = nbytes & ~(AES_BLOCK_SIZE - 1); - k = cpacf_km(ctx->fc | modifier, ¶m, - walk.dst.virt.addr, walk.src.virt.addr, n); - if (k) - rc = skcipher_walk_done(&walk, nbytes - k); - if (k < n) { - if (__paes_convert_key(ctx)) - return skcipher_walk_done(&walk, -EIO); - spin_lock_bh(&ctx->pk_lock); - memcpy(param.key, ctx->pk.protkey, PAES_256_PROTKEY_SIZE); - spin_unlock_bh(&ctx->pk_lock); - } + /* Try synchronous operation if no active engine usage */ + if (!atomic_read(&ctx->via_engine_ctr)) { + rc = ecb_paes_do_crypt(ctx, req_ctx, false); + if (rc == 0) + goto out; + } + + /* + * If sync operation failed or key expired or there are already + * requests enqueued via engine, fallback to async. Mark tfm as + * using engine to serialize requests. + */ + if (rc == 0 || rc == -EKEYEXPIRED) { + atomic_inc(&ctx->via_engine_ctr); + rc = crypto_transfer_skcipher_request_to_engine(paes_crypto_engine, req); + if (rc != -EINPROGRESS) + atomic_dec(&ctx->via_engine_ctr); } + + if (rc != -EINPROGRESS) + skcipher_walk_done(walk, rc); + +out: + if (rc != -EINPROGRESS) + memzero_explicit(&req_ctx->param, sizeof(req_ctx->param)); + pr_debug("rc=%d\n", rc); return rc; } @@ -310,112 +506,256 @@ static int ecb_paes_decrypt(struct skcipher_request *req) return ecb_paes_crypt(req, CPACF_DECRYPT); } -static struct skcipher_alg ecb_paes_alg = { - .base.cra_name = "ecb(paes)", - .base.cra_driver_name = "ecb-paes-s390", - .base.cra_priority = 401, /* combo: aes + ecb + 1 */ - .base.cra_blocksize = AES_BLOCK_SIZE, - .base.cra_ctxsize = sizeof(struct s390_paes_ctx), - .base.cra_module = THIS_MODULE, - .base.cra_list = LIST_HEAD_INIT(ecb_paes_alg.base.cra_list), - .init = ecb_paes_init, - .exit = ecb_paes_exit, - .min_keysize = PAES_MIN_KEYSIZE, - .max_keysize = PAES_MAX_KEYSIZE, - .setkey = ecb_paes_set_key, - .encrypt = ecb_paes_encrypt, - .decrypt = ecb_paes_decrypt, -}; - -static int cbc_paes_init(struct crypto_skcipher *tfm) +static int ecb_paes_init(struct crypto_skcipher *tfm) { struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm); - ctx->kb.key = NULL; + memset(ctx, 0, sizeof(*ctx)); spin_lock_init(&ctx->pk_lock); + crypto_skcipher_set_reqsize(tfm, sizeof(struct s390_pecb_req_ctx)); + return 0; } -static void cbc_paes_exit(struct crypto_skcipher *tfm) +static void ecb_paes_exit(struct crypto_skcipher *tfm) { struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm); - _free_kb_keybuf(&ctx->kb); + memzero_explicit(ctx, sizeof(*ctx)); } -static inline int __cbc_paes_set_key(struct s390_paes_ctx *ctx) +static int ecb_paes_do_one_request(struct crypto_engine *engine, void *areq) { - unsigned long fc; + struct skcipher_request *req = skcipher_request_cast(areq); + struct s390_pecb_req_ctx *req_ctx = skcipher_request_ctx(req); + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm); + struct skcipher_walk *walk = &req_ctx->walk; int rc; - rc = __paes_convert_key(ctx); - if (rc) - return rc; + /* walk has already been prepared */ + + rc = ecb_paes_do_crypt(ctx, req_ctx, true); + if (rc == -EKEYEXPIRED) { + /* + * Protected key expired, conversion is in process. + * Trigger a re-schedule of this request by returning + * -ENOSPC ("hardware queue is full") to the crypto engine. + * To avoid immediately re-invocation of this callback, + * tell the scheduler to voluntarily give up the CPU here. + */ + cond_resched(); + pr_debug("rescheduling request\n"); + return -ENOSPC; + } else if (rc) { + skcipher_walk_done(walk, rc); + } - /* Pick the correct function code based on the protected key type */ - fc = (ctx->pk.type == PKEY_KEYTYPE_AES_128) ? CPACF_KMC_PAES_128 : - (ctx->pk.type == PKEY_KEYTYPE_AES_192) ? CPACF_KMC_PAES_192 : - (ctx->pk.type == PKEY_KEYTYPE_AES_256) ? CPACF_KMC_PAES_256 : 0; + memzero_explicit(&req_ctx->param, sizeof(req_ctx->param)); + pr_debug("request complete with rc=%d\n", rc); + local_bh_disable(); + atomic_dec(&ctx->via_engine_ctr); + crypto_finalize_skcipher_request(engine, req, rc); + local_bh_enable(); + return rc; +} - /* Check if the function code is available */ - ctx->fc = (fc && cpacf_test_func(&kmc_functions, fc)) ? fc : 0; +static struct skcipher_engine_alg ecb_paes_alg = { + .base = { + .base.cra_name = "ecb(paes)", + .base.cra_driver_name = "ecb-paes-s390", + .base.cra_priority = 401, /* combo: aes + ecb + 1 */ + .base.cra_blocksize = AES_BLOCK_SIZE, + .base.cra_ctxsize = sizeof(struct s390_paes_ctx), + .base.cra_module = THIS_MODULE, + .base.cra_list = LIST_HEAD_INIT(ecb_paes_alg.base.base.cra_list), + .init = ecb_paes_init, + .exit = ecb_paes_exit, + .min_keysize = PAES_MIN_KEYSIZE, + .max_keysize = PAES_MAX_KEYSIZE, + .setkey = ecb_paes_setkey, + .encrypt = ecb_paes_encrypt, + .decrypt = ecb_paes_decrypt, + }, + .op = { + .do_one_request = ecb_paes_do_one_request, + }, +}; - return ctx->fc ? 0 : -EINVAL; -} +/* + * PAES CBC implementation + */ + +struct cbc_param { + u8 iv[AES_BLOCK_SIZE]; + u8 key[PAES_256_PROTKEY_SIZE]; +} __packed; + +struct s390_pcbc_req_ctx { + unsigned long modifier; + struct skcipher_walk walk; + bool param_init_done; + struct cbc_param param; +}; -static int cbc_paes_set_key(struct crypto_skcipher *tfm, const u8 *in_key, - unsigned int key_len) +static int cbc_paes_setkey(struct crypto_skcipher *tfm, const u8 *in_key, + unsigned int key_len) { struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm); + long fc; int rc; - _free_kb_keybuf(&ctx->kb); - rc = _key_to_kb(&ctx->kb, in_key, key_len); + /* set raw key into context */ + rc = paes_ctx_setkey(ctx, in_key, key_len); if (rc) - return rc; + goto out; - return __cbc_paes_set_key(ctx); + /* convert raw key into protected key */ + rc = paes_convert_key(ctx); + if (rc) + goto out; + + /* Pick the correct function code based on the protected key type */ + switch (ctx->pk.type) { + case PKEY_KEYTYPE_AES_128: + fc = CPACF_KMC_PAES_128; + break; + case PKEY_KEYTYPE_AES_192: + fc = CPACF_KMC_PAES_192; + break; + case PKEY_KEYTYPE_AES_256: + fc = CPACF_KMC_PAES_256; + break; + default: + fc = 0; + break; + } + ctx->fc = (fc && cpacf_test_func(&kmc_functions, fc)) ? fc : 0; + + rc = fc ? 0 : -EINVAL; + +out: + pr_debug("rc=%d\n", rc); + return rc; } -static int cbc_paes_crypt(struct skcipher_request *req, unsigned long modifier) +static int cbc_paes_do_crypt(struct s390_paes_ctx *ctx, + struct s390_pcbc_req_ctx *req_ctx, + bool maysleep) { - struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); - struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm); - struct { - u8 iv[AES_BLOCK_SIZE]; - u8 key[PAES_256_PROTKEY_SIZE]; - } param; - struct skcipher_walk walk; + struct cbc_param *param = &req_ctx->param; + struct skcipher_walk *walk = &req_ctx->walk; unsigned int nbytes, n, k; - int rc; - - rc = skcipher_walk_virt(&walk, req, false); + int pk_state, rc = 0; + + if (!req_ctx->param_init_done) { + /* fetch and check protected key state */ + spin_lock_bh(&ctx->pk_lock); + pk_state = ctx->pk_state; + switch (pk_state) { + case PK_STATE_NO_KEY: + rc = -ENOKEY; + break; + case PK_STATE_CONVERT_IN_PROGRESS: + rc = -EKEYEXPIRED; + break; + case PK_STATE_VALID: + memcpy(param->key, ctx->pk.protkey, sizeof(param->key)); + req_ctx->param_init_done = true; + break; + default: + rc = pk_state < 0 ? pk_state : -EIO; + break; + } + spin_unlock_bh(&ctx->pk_lock); + } if (rc) - return rc; + goto out; - memcpy(param.iv, walk.iv, AES_BLOCK_SIZE); - spin_lock_bh(&ctx->pk_lock); - memcpy(param.key, ctx->pk.protkey, PAES_256_PROTKEY_SIZE); - spin_unlock_bh(&ctx->pk_lock); + memcpy(param->iv, walk->iv, AES_BLOCK_SIZE); - while ((nbytes = walk.nbytes) != 0) { + /* + * Note that in case of partial processing or failure the walk + * is NOT unmapped here. So a follow up task may reuse the walk + * or in case of unrecoverable failure needs to unmap it. + */ + while ((nbytes = walk->nbytes) != 0) { /* only use complete blocks */ n = nbytes & ~(AES_BLOCK_SIZE - 1); - k = cpacf_kmc(ctx->fc | modifier, ¶m, - walk.dst.virt.addr, walk.src.virt.addr, n); + k = cpacf_kmc(ctx->fc | req_ctx->modifier, param, + walk->dst.virt.addr, walk->src.virt.addr, n); if (k) { - memcpy(walk.iv, param.iv, AES_BLOCK_SIZE); - rc = skcipher_walk_done(&walk, nbytes - k); + memcpy(walk->iv, param->iv, AES_BLOCK_SIZE); + rc = skcipher_walk_done(walk, nbytes - k); } if (k < n) { - if (__paes_convert_key(ctx)) - return skcipher_walk_done(&walk, -EIO); + if (!maysleep) { + rc = -EKEYEXPIRED; + goto out; + } + rc = paes_convert_key(ctx); + if (rc) + goto out; spin_lock_bh(&ctx->pk_lock); - memcpy(param.key, ctx->pk.protkey, PAES_256_PROTKEY_SIZE); + memcpy(param->key, ctx->pk.protkey, sizeof(param->key)); spin_unlock_bh(&ctx->pk_lock); } } + +out: + pr_debug("rc=%d\n", rc); + return rc; +} + +static int cbc_paes_crypt(struct skcipher_request *req, unsigned long modifier) +{ + struct s390_pcbc_req_ctx *req_ctx = skcipher_request_ctx(req); + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm); + struct skcipher_walk *walk = &req_ctx->walk; + int rc; + + /* + * Attempt synchronous encryption first. If it fails, schedule the request + * asynchronously via the crypto engine. To preserve execution order, + * once a request is queued to the engine, further requests using the same + * tfm will also be routed through the engine. + */ + + rc = skcipher_walk_virt(walk, req, false); + if (rc) + goto out; + + req_ctx->modifier = modifier; + req_ctx->param_init_done = false; + + /* Try synchronous operation if no active engine usage */ + if (!atomic_read(&ctx->via_engine_ctr)) { + rc = cbc_paes_do_crypt(ctx, req_ctx, false); + if (rc == 0) + goto out; + } + + /* + * If sync operation failed or key expired or there are already + * requests enqueued via engine, fallback to async. Mark tfm as + * using engine to serialize requests. + */ + if (rc == 0 || rc == -EKEYEXPIRED) { + atomic_inc(&ctx->via_engine_ctr); + rc = crypto_transfer_skcipher_request_to_engine(paes_crypto_engine, req); + if (rc != -EINPROGRESS) + atomic_dec(&ctx->via_engine_ctr); + } + + if (rc != -EINPROGRESS) + skcipher_walk_done(walk, rc); + +out: + if (rc != -EINPROGRESS) + memzero_explicit(&req_ctx->param, sizeof(req_ctx->param)); + pr_debug("rc=%d\n", rc); return rc; } @@ -429,496 +769,882 @@ static int cbc_paes_decrypt(struct skcipher_request *req) return cbc_paes_crypt(req, CPACF_DECRYPT); } -static struct skcipher_alg cbc_paes_alg = { - .base.cra_name = "cbc(paes)", - .base.cra_driver_name = "cbc-paes-s390", - .base.cra_priority = 402, /* ecb-paes-s390 + 1 */ - .base.cra_blocksize = AES_BLOCK_SIZE, - .base.cra_ctxsize = sizeof(struct s390_paes_ctx), - .base.cra_module = THIS_MODULE, - .base.cra_list = LIST_HEAD_INIT(cbc_paes_alg.base.cra_list), - .init = cbc_paes_init, - .exit = cbc_paes_exit, - .min_keysize = PAES_MIN_KEYSIZE, - .max_keysize = PAES_MAX_KEYSIZE, - .ivsize = AES_BLOCK_SIZE, - .setkey = cbc_paes_set_key, - .encrypt = cbc_paes_encrypt, - .decrypt = cbc_paes_decrypt, -}; - -static int xts_paes_init(struct crypto_skcipher *tfm) +static int cbc_paes_init(struct crypto_skcipher *tfm) { - struct s390_pxts_ctx *ctx = crypto_skcipher_ctx(tfm); + struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm); - ctx->kb.key = NULL; + memset(ctx, 0, sizeof(*ctx)); spin_lock_init(&ctx->pk_lock); + crypto_skcipher_set_reqsize(tfm, sizeof(struct s390_pcbc_req_ctx)); + return 0; } -static void xts_paes_exit(struct crypto_skcipher *tfm) +static void cbc_paes_exit(struct crypto_skcipher *tfm) { - struct s390_pxts_ctx *ctx = crypto_skcipher_ctx(tfm); + struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm); - _free_kb_keybuf(&ctx->kb); + memzero_explicit(ctx, sizeof(*ctx)); } -static inline int __xts_paes_convert_key(struct s390_pxts_ctx *ctx) +static int cbc_paes_do_one_request(struct crypto_engine *engine, void *areq) { - struct paes_protkey pk0, pk1; - size_t split_keylen; + struct skcipher_request *req = skcipher_request_cast(areq); + struct s390_pcbc_req_ctx *req_ctx = skcipher_request_ctx(req); + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm); + struct skcipher_walk *walk = &req_ctx->walk; int rc; - pk0.len = sizeof(pk0.protkey); - pk1.len = sizeof(pk1.protkey); - - rc = __paes_keyblob2pkey(ctx->kb.key, ctx->kb.keylen, &pk0); - if (rc) - return rc; + /* walk has already been prepared */ + + rc = cbc_paes_do_crypt(ctx, req_ctx, true); + if (rc == -EKEYEXPIRED) { + /* + * Protected key expired, conversion is in process. + * Trigger a re-schedule of this request by returning + * -ENOSPC ("hardware queue is full") to the crypto engine. + * To avoid immediately re-invocation of this callback, + * tell the scheduler to voluntarily give up the CPU here. + */ + cond_resched(); + pr_debug("rescheduling request\n"); + return -ENOSPC; + } else if (rc) { + skcipher_walk_done(walk, rc); + } - switch (pk0.type) { - case PKEY_KEYTYPE_AES_128: - case PKEY_KEYTYPE_AES_256: - /* second keytoken required */ - if (ctx->kb.keylen % 2) - return -EINVAL; - split_keylen = ctx->kb.keylen / 2; + memzero_explicit(&req_ctx->param, sizeof(req_ctx->param)); + pr_debug("request complete with rc=%d\n", rc); + local_bh_disable(); + atomic_dec(&ctx->via_engine_ctr); + crypto_finalize_skcipher_request(engine, req, rc); + local_bh_enable(); + return rc; +} - rc = __paes_keyblob2pkey(ctx->kb.key + split_keylen, - split_keylen, &pk1); - if (rc) - return rc; +static struct skcipher_engine_alg cbc_paes_alg = { + .base = { + .base.cra_name = "cbc(paes)", + .base.cra_driver_name = "cbc-paes-s390", + .base.cra_priority = 402, /* cbc-paes-s390 + 1 */ + .base.cra_blocksize = AES_BLOCK_SIZE, + .base.cra_ctxsize = sizeof(struct s390_paes_ctx), + .base.cra_module = THIS_MODULE, + .base.cra_list = LIST_HEAD_INIT(cbc_paes_alg.base.base.cra_list), + .init = cbc_paes_init, + .exit = cbc_paes_exit, + .min_keysize = PAES_MIN_KEYSIZE, + .max_keysize = PAES_MAX_KEYSIZE, + .ivsize = AES_BLOCK_SIZE, + .setkey = cbc_paes_setkey, + .encrypt = cbc_paes_encrypt, + .decrypt = cbc_paes_decrypt, + }, + .op = { + .do_one_request = cbc_paes_do_one_request, + }, +}; - if (pk0.type != pk1.type) - return -EINVAL; - break; - case PKEY_KEYTYPE_AES_XTS_128: - case PKEY_KEYTYPE_AES_XTS_256: - /* single key */ - pk1.type = 0; - break; - default: - /* unsupported protected keytype */ - return -EINVAL; - } +/* + * PAES CTR implementation + */ - spin_lock_bh(&ctx->pk_lock); - ctx->pk[0] = pk0; - ctx->pk[1] = pk1; - spin_unlock_bh(&ctx->pk_lock); +struct ctr_param { + u8 key[PAES_256_PROTKEY_SIZE]; +} __packed; - return 0; -} +struct s390_pctr_req_ctx { + unsigned long modifier; + struct skcipher_walk walk; + bool param_init_done; + struct ctr_param param; +}; -static inline int __xts_paes_set_key(struct s390_pxts_ctx *ctx) +static int ctr_paes_setkey(struct crypto_skcipher *tfm, const u8 *in_key, + unsigned int key_len) { - unsigned long fc; + struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm); + long fc; int rc; - rc = __xts_paes_convert_key(ctx); + /* set raw key into context */ + rc = paes_ctx_setkey(ctx, in_key, key_len); if (rc) - return rc; + goto out; + + /* convert raw key into protected key */ + rc = paes_convert_key(ctx); + if (rc) + goto out; /* Pick the correct function code based on the protected key type */ - switch (ctx->pk[0].type) { + switch (ctx->pk.type) { case PKEY_KEYTYPE_AES_128: - fc = CPACF_KM_PXTS_128; - break; - case PKEY_KEYTYPE_AES_256: - fc = CPACF_KM_PXTS_256; + fc = CPACF_KMCTR_PAES_128; break; - case PKEY_KEYTYPE_AES_XTS_128: - fc = CPACF_KM_PXTS_128_FULL; + case PKEY_KEYTYPE_AES_192: + fc = CPACF_KMCTR_PAES_192; break; - case PKEY_KEYTYPE_AES_XTS_256: - fc = CPACF_KM_PXTS_256_FULL; + case PKEY_KEYTYPE_AES_256: + fc = CPACF_KMCTR_PAES_256; break; default: fc = 0; break; } + ctx->fc = (fc && cpacf_test_func(&kmctr_functions, fc)) ? fc : 0; - /* Check if the function code is available */ - ctx->fc = (fc && cpacf_test_func(&km_functions, fc)) ? fc : 0; + rc = fc ? 0 : -EINVAL; + +out: + pr_debug("rc=%d\n", rc); + return rc; +} + +static inline unsigned int __ctrblk_init(u8 *ctrptr, u8 *iv, unsigned int nbytes) +{ + unsigned int i, n; + + /* only use complete blocks, max. PAGE_SIZE */ + memcpy(ctrptr, iv, AES_BLOCK_SIZE); + n = (nbytes > PAGE_SIZE) ? PAGE_SIZE : nbytes & ~(AES_BLOCK_SIZE - 1); + for (i = (n / AES_BLOCK_SIZE) - 1; i > 0; i--) { + memcpy(ctrptr + AES_BLOCK_SIZE, ctrptr, AES_BLOCK_SIZE); + crypto_inc(ctrptr + AES_BLOCK_SIZE, AES_BLOCK_SIZE); + ctrptr += AES_BLOCK_SIZE; + } + return n; +} + +static int ctr_paes_do_crypt(struct s390_paes_ctx *ctx, + struct s390_pctr_req_ctx *req_ctx, + bool maysleep) +{ + struct ctr_param *param = &req_ctx->param; + struct skcipher_walk *walk = &req_ctx->walk; + u8 buf[AES_BLOCK_SIZE], *ctrptr; + unsigned int nbytes, n, k; + int pk_state, locked, rc = 0; + + if (!req_ctx->param_init_done) { + /* fetch and check protected key state */ + spin_lock_bh(&ctx->pk_lock); + pk_state = ctx->pk_state; + switch (pk_state) { + case PK_STATE_NO_KEY: + rc = -ENOKEY; + break; + case PK_STATE_CONVERT_IN_PROGRESS: + rc = -EKEYEXPIRED; + break; + case PK_STATE_VALID: + memcpy(param->key, ctx->pk.protkey, sizeof(param->key)); + req_ctx->param_init_done = true; + break; + default: + rc = pk_state < 0 ? pk_state : -EIO; + break; + } + spin_unlock_bh(&ctx->pk_lock); + } + if (rc) + goto out; + + locked = mutex_trylock(&ctrblk_lock); + + /* + * Note that in case of partial processing or failure the walk + * is NOT unmapped here. So a follow up task may reuse the walk + * or in case of unrecoverable failure needs to unmap it. + */ + while ((nbytes = walk->nbytes) >= AES_BLOCK_SIZE) { + n = AES_BLOCK_SIZE; + if (nbytes >= 2 * AES_BLOCK_SIZE && locked) + n = __ctrblk_init(ctrblk, walk->iv, nbytes); + ctrptr = (n > AES_BLOCK_SIZE) ? ctrblk : walk->iv; + k = cpacf_kmctr(ctx->fc, param, walk->dst.virt.addr, + walk->src.virt.addr, n, ctrptr); + if (k) { + if (ctrptr == ctrblk) + memcpy(walk->iv, ctrptr + k - AES_BLOCK_SIZE, + AES_BLOCK_SIZE); + crypto_inc(walk->iv, AES_BLOCK_SIZE); + rc = skcipher_walk_done(walk, nbytes - k); + } + if (k < n) { + if (!maysleep) { + if (locked) + mutex_unlock(&ctrblk_lock); + rc = -EKEYEXPIRED; + goto out; + } + rc = paes_convert_key(ctx); + if (rc) { + if (locked) + mutex_unlock(&ctrblk_lock); + goto out; + } + spin_lock_bh(&ctx->pk_lock); + memcpy(param->key, ctx->pk.protkey, sizeof(param->key)); + spin_unlock_bh(&ctx->pk_lock); + } + } + if (locked) + mutex_unlock(&ctrblk_lock); + + /* final block may be < AES_BLOCK_SIZE, copy only nbytes */ + if (nbytes) { + memset(buf, 0, AES_BLOCK_SIZE); + memcpy(buf, walk->src.virt.addr, nbytes); + while (1) { + if (cpacf_kmctr(ctx->fc, param, buf, + buf, AES_BLOCK_SIZE, + walk->iv) == AES_BLOCK_SIZE) + break; + if (!maysleep) { + rc = -EKEYEXPIRED; + goto out; + } + rc = paes_convert_key(ctx); + if (rc) + goto out; + spin_lock_bh(&ctx->pk_lock); + memcpy(param->key, ctx->pk.protkey, sizeof(param->key)); + spin_unlock_bh(&ctx->pk_lock); + } + memcpy(walk->dst.virt.addr, buf, nbytes); + crypto_inc(walk->iv, AES_BLOCK_SIZE); + rc = skcipher_walk_done(walk, 0); + } + +out: + pr_debug("rc=%d\n", rc); + return rc; +} + +static int ctr_paes_crypt(struct skcipher_request *req) +{ + struct s390_pctr_req_ctx *req_ctx = skcipher_request_ctx(req); + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm); + struct skcipher_walk *walk = &req_ctx->walk; + int rc; + + /* + * Attempt synchronous encryption first. If it fails, schedule the request + * asynchronously via the crypto engine. To preserve execution order, + * once a request is queued to the engine, further requests using the same + * tfm will also be routed through the engine. + */ + + rc = skcipher_walk_virt(walk, req, false); + if (rc) + goto out; + + req_ctx->param_init_done = false; + + /* Try synchronous operation if no active engine usage */ + if (!atomic_read(&ctx->via_engine_ctr)) { + rc = ctr_paes_do_crypt(ctx, req_ctx, false); + if (rc == 0) + goto out; + } + + /* + * If sync operation failed or key expired or there are already + * requests enqueued via engine, fallback to async. Mark tfm as + * using engine to serialize requests. + */ + if (rc == 0 || rc == -EKEYEXPIRED) { + atomic_inc(&ctx->via_engine_ctr); + rc = crypto_transfer_skcipher_request_to_engine(paes_crypto_engine, req); + if (rc != -EINPROGRESS) + atomic_dec(&ctx->via_engine_ctr); + } + + if (rc != -EINPROGRESS) + skcipher_walk_done(walk, rc); + +out: + if (rc != -EINPROGRESS) + memzero_explicit(&req_ctx->param, sizeof(req_ctx->param)); + pr_debug("rc=%d\n", rc); + return rc; +} + +static int ctr_paes_init(struct crypto_skcipher *tfm) +{ + struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm); + + memset(ctx, 0, sizeof(*ctx)); + spin_lock_init(&ctx->pk_lock); + + crypto_skcipher_set_reqsize(tfm, sizeof(struct s390_pctr_req_ctx)); + + return 0; +} + +static void ctr_paes_exit(struct crypto_skcipher *tfm) +{ + struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm); + + memzero_explicit(ctx, sizeof(*ctx)); +} + +static int ctr_paes_do_one_request(struct crypto_engine *engine, void *areq) +{ + struct skcipher_request *req = skcipher_request_cast(areq); + struct s390_pctr_req_ctx *req_ctx = skcipher_request_ctx(req); + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm); + struct skcipher_walk *walk = &req_ctx->walk; + int rc; - return ctx->fc ? 0 : -EINVAL; + /* walk has already been prepared */ + + rc = ctr_paes_do_crypt(ctx, req_ctx, true); + if (rc == -EKEYEXPIRED) { + /* + * Protected key expired, conversion is in process. + * Trigger a re-schedule of this request by returning + * -ENOSPC ("hardware queue is full") to the crypto engine. + * To avoid immediately re-invocation of this callback, + * tell the scheduler to voluntarily give up the CPU here. + */ + cond_resched(); + pr_debug("rescheduling request\n"); + return -ENOSPC; + } else if (rc) { + skcipher_walk_done(walk, rc); + } + + memzero_explicit(&req_ctx->param, sizeof(req_ctx->param)); + pr_debug("request complete with rc=%d\n", rc); + local_bh_disable(); + atomic_dec(&ctx->via_engine_ctr); + crypto_finalize_skcipher_request(engine, req, rc); + local_bh_enable(); + return rc; } -static int xts_paes_set_key(struct crypto_skcipher *tfm, const u8 *in_key, - unsigned int in_keylen) +static struct skcipher_engine_alg ctr_paes_alg = { + .base = { + .base.cra_name = "ctr(paes)", + .base.cra_driver_name = "ctr-paes-s390", + .base.cra_priority = 402, /* ecb-paes-s390 + 1 */ + .base.cra_blocksize = 1, + .base.cra_ctxsize = sizeof(struct s390_paes_ctx), + .base.cra_module = THIS_MODULE, + .base.cra_list = LIST_HEAD_INIT(ctr_paes_alg.base.base.cra_list), + .init = ctr_paes_init, + .exit = ctr_paes_exit, + .min_keysize = PAES_MIN_KEYSIZE, + .max_keysize = PAES_MAX_KEYSIZE, + .ivsize = AES_BLOCK_SIZE, + .setkey = ctr_paes_setkey, + .encrypt = ctr_paes_crypt, + .decrypt = ctr_paes_crypt, + .chunksize = AES_BLOCK_SIZE, + }, + .op = { + .do_one_request = ctr_paes_do_one_request, + }, +}; + +/* + * PAES XTS implementation + */ + +struct xts_full_km_param { + u8 key[64]; + u8 tweak[16]; + u8 nap[16]; + u8 wkvp[32]; +} __packed; + +struct xts_km_param { + u8 key[PAES_256_PROTKEY_SIZE]; + u8 init[16]; +} __packed; + +struct xts_pcc_param { + u8 key[PAES_256_PROTKEY_SIZE]; + u8 tweak[16]; + u8 block[16]; + u8 bit[16]; + u8 xts[16]; +} __packed; + +struct s390_pxts_req_ctx { + unsigned long modifier; + struct skcipher_walk walk; + bool param_init_done; + union { + struct xts_full_km_param full_km_param; + struct xts_km_param km_param; + } param; +}; + +static int xts_paes_setkey(struct crypto_skcipher *tfm, const u8 *in_key, + unsigned int in_keylen) { struct s390_pxts_ctx *ctx = crypto_skcipher_ctx(tfm); u8 ckey[2 * AES_MAX_KEY_SIZE]; unsigned int ckey_len; + long fc; int rc; if ((in_keylen == 32 || in_keylen == 64) && xts_verify_key(tfm, in_key, in_keylen)) return -EINVAL; - _free_kb_keybuf(&ctx->kb); - rc = _xts_key_to_kb(&ctx->kb, in_key, in_keylen); + /* set raw key into context */ + rc = pxts_ctx_setkey(ctx, in_key, in_keylen); if (rc) - return rc; + goto out; - rc = __xts_paes_set_key(ctx); + /* convert raw key(s) into protected key(s) */ + rc = pxts_convert_key(ctx); if (rc) - return rc; + goto out; /* - * It is not possible on a single protected key (e.g. full AES-XTS) to - * check, if k1 and k2 are the same. - */ - if (ctx->pk[0].type == PKEY_KEYTYPE_AES_XTS_128 || - ctx->pk[0].type == PKEY_KEYTYPE_AES_XTS_256) - return 0; - /* * xts_verify_key verifies the key length is not odd and makes * sure that the two keys are not the same. This can be done - * on the two protected keys as well + * on the two protected keys as well - but not for full xts keys. */ - ckey_len = (ctx->pk[0].type == PKEY_KEYTYPE_AES_128) ? - AES_KEYSIZE_128 : AES_KEYSIZE_256; - memcpy(ckey, ctx->pk[0].protkey, ckey_len); - memcpy(ckey + ckey_len, ctx->pk[1].protkey, ckey_len); - return xts_verify_key(tfm, ckey, 2*ckey_len); + if (ctx->pk[0].type == PKEY_KEYTYPE_AES_128 || + ctx->pk[0].type == PKEY_KEYTYPE_AES_256) { + ckey_len = (ctx->pk[0].type == PKEY_KEYTYPE_AES_128) ? + AES_KEYSIZE_128 : AES_KEYSIZE_256; + memcpy(ckey, ctx->pk[0].protkey, ckey_len); + memcpy(ckey + ckey_len, ctx->pk[1].protkey, ckey_len); + rc = xts_verify_key(tfm, ckey, 2 * ckey_len); + memzero_explicit(ckey, sizeof(ckey)); + if (rc) + goto out; + } + + /* Pick the correct function code based on the protected key type */ + switch (ctx->pk[0].type) { + case PKEY_KEYTYPE_AES_128: + fc = CPACF_KM_PXTS_128; + break; + case PKEY_KEYTYPE_AES_256: + fc = CPACF_KM_PXTS_256; + break; + case PKEY_KEYTYPE_AES_XTS_128: + fc = CPACF_KM_PXTS_128_FULL; + break; + case PKEY_KEYTYPE_AES_XTS_256: + fc = CPACF_KM_PXTS_256_FULL; + break; + default: + fc = 0; + break; + } + ctx->fc = (fc && cpacf_test_func(&km_functions, fc)) ? fc : 0; + + rc = fc ? 0 : -EINVAL; + +out: + pr_debug("rc=%d\n", rc); + return rc; } -static int paes_xts_crypt_full(struct skcipher_request *req, - unsigned long modifier) +static int xts_paes_do_crypt_fullkey(struct s390_pxts_ctx *ctx, + struct s390_pxts_req_ctx *req_ctx, + bool maysleep) { - struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); - struct s390_pxts_ctx *ctx = crypto_skcipher_ctx(tfm); + struct xts_full_km_param *param = &req_ctx->param.full_km_param; + struct skcipher_walk *walk = &req_ctx->walk; unsigned int keylen, offset, nbytes, n, k; - struct { - u8 key[64]; - u8 tweak[16]; - u8 nap[16]; - u8 wkvp[32]; - } fxts_param = { - .nap = {0}, - }; - struct skcipher_walk walk; - int rc; + int rc = 0; - rc = skcipher_walk_virt(&walk, req, false); - if (rc) - return rc; + /* + * The calling function xts_paes_do_crypt() ensures the + * protected key state is always PK_STATE_VALID when this + * function is invoked. + */ keylen = (ctx->pk[0].type == PKEY_KEYTYPE_AES_XTS_128) ? 32 : 64; offset = (ctx->pk[0].type == PKEY_KEYTYPE_AES_XTS_128) ? 32 : 0; - spin_lock_bh(&ctx->pk_lock); - memcpy(fxts_param.key + offset, ctx->pk[0].protkey, keylen); - memcpy(fxts_param.wkvp, ctx->pk[0].protkey + keylen, - sizeof(fxts_param.wkvp)); - spin_unlock_bh(&ctx->pk_lock); - memcpy(fxts_param.tweak, walk.iv, sizeof(fxts_param.tweak)); - fxts_param.nap[0] = 0x01; /* initial alpha power (1, little-endian) */ + if (!req_ctx->param_init_done) { + memset(param, 0, sizeof(*param)); + spin_lock_bh(&ctx->pk_lock); + memcpy(param->key + offset, ctx->pk[0].protkey, keylen); + memcpy(param->wkvp, ctx->pk[0].protkey + keylen, sizeof(param->wkvp)); + spin_unlock_bh(&ctx->pk_lock); + memcpy(param->tweak, walk->iv, sizeof(param->tweak)); + param->nap[0] = 0x01; /* initial alpha power (1, little-endian) */ + req_ctx->param_init_done = true; + } - while ((nbytes = walk.nbytes) != 0) { + /* + * Note that in case of partial processing or failure the walk + * is NOT unmapped here. So a follow up task may reuse the walk + * or in case of unrecoverable failure needs to unmap it. + */ + while ((nbytes = walk->nbytes) != 0) { /* only use complete blocks */ n = nbytes & ~(AES_BLOCK_SIZE - 1); - k = cpacf_km(ctx->fc | modifier, fxts_param.key + offset, - walk.dst.virt.addr, walk.src.virt.addr, n); + k = cpacf_km(ctx->fc | req_ctx->modifier, param->key + offset, + walk->dst.virt.addr, walk->src.virt.addr, n); if (k) - rc = skcipher_walk_done(&walk, nbytes - k); + rc = skcipher_walk_done(walk, nbytes - k); if (k < n) { - if (__xts_paes_convert_key(ctx)) - return skcipher_walk_done(&walk, -EIO); + if (!maysleep) { + rc = -EKEYEXPIRED; + goto out; + } + rc = pxts_convert_key(ctx); + if (rc) + goto out; spin_lock_bh(&ctx->pk_lock); - memcpy(fxts_param.key + offset, ctx->pk[0].protkey, - keylen); - memcpy(fxts_param.wkvp, ctx->pk[0].protkey + keylen, - sizeof(fxts_param.wkvp)); + memcpy(param->key + offset, ctx->pk[0].protkey, keylen); + memcpy(param->wkvp, ctx->pk[0].protkey + keylen, sizeof(param->wkvp)); spin_unlock_bh(&ctx->pk_lock); } } +out: + pr_debug("rc=%d\n", rc); return rc; } -static int paes_xts_crypt(struct skcipher_request *req, unsigned long modifier) +static inline int __xts_2keys_prep_param(struct s390_pxts_ctx *ctx, + struct xts_km_param *param, + struct skcipher_walk *walk, + unsigned int keylen, + unsigned int offset, bool maysleep) { - struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); - struct s390_pxts_ctx *ctx = crypto_skcipher_ctx(tfm); + struct xts_pcc_param pcc_param; + unsigned long cc = 1; + int rc = 0; + + while (cc) { + memset(&pcc_param, 0, sizeof(pcc_param)); + memcpy(pcc_param.tweak, walk->iv, sizeof(pcc_param.tweak)); + spin_lock_bh(&ctx->pk_lock); + memcpy(pcc_param.key + offset, ctx->pk[1].protkey, keylen); + memcpy(param->key + offset, ctx->pk[0].protkey, keylen); + spin_unlock_bh(&ctx->pk_lock); + cc = cpacf_pcc(ctx->fc, pcc_param.key + offset); + if (cc) { + if (!maysleep) { + rc = -EKEYEXPIRED; + break; + } + rc = pxts_convert_key(ctx); + if (rc) + break; + continue; + } + memcpy(param->init, pcc_param.xts, 16); + } + + memzero_explicit(pcc_param.key, sizeof(pcc_param.key)); + return rc; +} + +static int xts_paes_do_crypt_2keys(struct s390_pxts_ctx *ctx, + struct s390_pxts_req_ctx *req_ctx, + bool maysleep) +{ + struct xts_km_param *param = &req_ctx->param.km_param; + struct skcipher_walk *walk = &req_ctx->walk; unsigned int keylen, offset, nbytes, n, k; - struct { - u8 key[PAES_256_PROTKEY_SIZE]; - u8 tweak[16]; - u8 block[16]; - u8 bit[16]; - u8 xts[16]; - } pcc_param; - struct { - u8 key[PAES_256_PROTKEY_SIZE]; - u8 init[16]; - } xts_param; - struct skcipher_walk walk; - int rc; + int rc = 0; - rc = skcipher_walk_virt(&walk, req, false); - if (rc) - return rc; + /* + * The calling function xts_paes_do_crypt() ensures the + * protected key state is always PK_STATE_VALID when this + * function is invoked. + */ keylen = (ctx->pk[0].type == PKEY_KEYTYPE_AES_128) ? 48 : 64; offset = (ctx->pk[0].type == PKEY_KEYTYPE_AES_128) ? 16 : 0; - memset(&pcc_param, 0, sizeof(pcc_param)); - memcpy(pcc_param.tweak, walk.iv, sizeof(pcc_param.tweak)); - spin_lock_bh(&ctx->pk_lock); - memcpy(pcc_param.key + offset, ctx->pk[1].protkey, keylen); - memcpy(xts_param.key + offset, ctx->pk[0].protkey, keylen); - spin_unlock_bh(&ctx->pk_lock); - cpacf_pcc(ctx->fc, pcc_param.key + offset); - memcpy(xts_param.init, pcc_param.xts, 16); + if (!req_ctx->param_init_done) { + rc = __xts_2keys_prep_param(ctx, param, walk, + keylen, offset, maysleep); + if (rc) + goto out; + req_ctx->param_init_done = true; + } - while ((nbytes = walk.nbytes) != 0) { + /* + * Note that in case of partial processing or failure the walk + * is NOT unmapped here. So a follow up task may reuse the walk + * or in case of unrecoverable failure needs to unmap it. + */ + while ((nbytes = walk->nbytes) != 0) { /* only use complete blocks */ n = nbytes & ~(AES_BLOCK_SIZE - 1); - k = cpacf_km(ctx->fc | modifier, xts_param.key + offset, - walk.dst.virt.addr, walk.src.virt.addr, n); + k = cpacf_km(ctx->fc | req_ctx->modifier, param->key + offset, + walk->dst.virt.addr, walk->src.virt.addr, n); if (k) - rc = skcipher_walk_done(&walk, nbytes - k); + rc = skcipher_walk_done(walk, nbytes - k); if (k < n) { - if (__xts_paes_convert_key(ctx)) - return skcipher_walk_done(&walk, -EIO); + if (!maysleep) { + rc = -EKEYEXPIRED; + goto out; + } + rc = pxts_convert_key(ctx); + if (rc) + goto out; spin_lock_bh(&ctx->pk_lock); - memcpy(xts_param.key + offset, - ctx->pk[0].protkey, keylen); + memcpy(param->key + offset, ctx->pk[0].protkey, keylen); spin_unlock_bh(&ctx->pk_lock); } } +out: + pr_debug("rc=%d\n", rc); return rc; } -static inline int xts_paes_crypt(struct skcipher_request *req, unsigned long modifier) +static int xts_paes_do_crypt(struct s390_pxts_ctx *ctx, + struct s390_pxts_req_ctx *req_ctx, + bool maysleep) { - struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); - struct s390_pxts_ctx *ctx = crypto_skcipher_ctx(tfm); + int pk_state, rc = 0; + + /* fetch and check protected key state */ + spin_lock_bh(&ctx->pk_lock); + pk_state = ctx->pk_state; + switch (pk_state) { + case PK_STATE_NO_KEY: + rc = -ENOKEY; + break; + case PK_STATE_CONVERT_IN_PROGRESS: + rc = -EKEYEXPIRED; + break; + case PK_STATE_VALID: + break; + default: + rc = pk_state < 0 ? pk_state : -EIO; + break; + } + spin_unlock_bh(&ctx->pk_lock); + if (rc) + goto out; + /* Call the 'real' crypt function based on the xts prot key type. */ switch (ctx->fc) { case CPACF_KM_PXTS_128: case CPACF_KM_PXTS_256: - return paes_xts_crypt(req, modifier); + rc = xts_paes_do_crypt_2keys(ctx, req_ctx, maysleep); + break; case CPACF_KM_PXTS_128_FULL: case CPACF_KM_PXTS_256_FULL: - return paes_xts_crypt_full(req, modifier); + rc = xts_paes_do_crypt_fullkey(ctx, req_ctx, maysleep); + break; default: - return -EINVAL; + rc = -EINVAL; } -} -static int xts_paes_encrypt(struct skcipher_request *req) -{ - return xts_paes_crypt(req, 0); +out: + pr_debug("rc=%d\n", rc); + return rc; } -static int xts_paes_decrypt(struct skcipher_request *req) +static inline int xts_paes_crypt(struct skcipher_request *req, unsigned long modifier) { - return xts_paes_crypt(req, CPACF_DECRYPT); -} + struct s390_pxts_req_ctx *req_ctx = skcipher_request_ctx(req); + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct s390_pxts_ctx *ctx = crypto_skcipher_ctx(tfm); + struct skcipher_walk *walk = &req_ctx->walk; + int rc; -static struct skcipher_alg xts_paes_alg = { - .base.cra_name = "xts(paes)", - .base.cra_driver_name = "xts-paes-s390", - .base.cra_priority = 402, /* ecb-paes-s390 + 1 */ - .base.cra_blocksize = AES_BLOCK_SIZE, - .base.cra_ctxsize = sizeof(struct s390_pxts_ctx), - .base.cra_module = THIS_MODULE, - .base.cra_list = LIST_HEAD_INIT(xts_paes_alg.base.cra_list), - .init = xts_paes_init, - .exit = xts_paes_exit, - .min_keysize = 2 * PAES_MIN_KEYSIZE, - .max_keysize = 2 * PAES_MAX_KEYSIZE, - .ivsize = AES_BLOCK_SIZE, - .setkey = xts_paes_set_key, - .encrypt = xts_paes_encrypt, - .decrypt = xts_paes_decrypt, -}; + /* + * Attempt synchronous encryption first. If it fails, schedule the request + * asynchronously via the crypto engine. To preserve execution order, + * once a request is queued to the engine, further requests using the same + * tfm will also be routed through the engine. + */ -static int ctr_paes_init(struct crypto_skcipher *tfm) -{ - struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm); + rc = skcipher_walk_virt(walk, req, false); + if (rc) + goto out; - ctx->kb.key = NULL; - spin_lock_init(&ctx->pk_lock); + req_ctx->modifier = modifier; + req_ctx->param_init_done = false; - return 0; -} + /* Try synchronous operation if no active engine usage */ + if (!atomic_read(&ctx->via_engine_ctr)) { + rc = xts_paes_do_crypt(ctx, req_ctx, false); + if (rc == 0) + goto out; + } -static void ctr_paes_exit(struct crypto_skcipher *tfm) -{ - struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm); + /* + * If sync operation failed or key expired or there are already + * requests enqueued via engine, fallback to async. Mark tfm as + * using engine to serialize requests. + */ + if (rc == 0 || rc == -EKEYEXPIRED) { + atomic_inc(&ctx->via_engine_ctr); + rc = crypto_transfer_skcipher_request_to_engine(paes_crypto_engine, req); + if (rc != -EINPROGRESS) + atomic_dec(&ctx->via_engine_ctr); + } + + if (rc != -EINPROGRESS) + skcipher_walk_done(walk, rc); - _free_kb_keybuf(&ctx->kb); +out: + if (rc != -EINPROGRESS) + memzero_explicit(&req_ctx->param, sizeof(req_ctx->param)); + pr_debug("rc=%d\n", rc); + return rc; } -static inline int __ctr_paes_set_key(struct s390_paes_ctx *ctx) +static int xts_paes_encrypt(struct skcipher_request *req) { - unsigned long fc; - int rc; - - rc = __paes_convert_key(ctx); - if (rc) - return rc; - - /* Pick the correct function code based on the protected key type */ - fc = (ctx->pk.type == PKEY_KEYTYPE_AES_128) ? CPACF_KMCTR_PAES_128 : - (ctx->pk.type == PKEY_KEYTYPE_AES_192) ? CPACF_KMCTR_PAES_192 : - (ctx->pk.type == PKEY_KEYTYPE_AES_256) ? - CPACF_KMCTR_PAES_256 : 0; - - /* Check if the function code is available */ - ctx->fc = (fc && cpacf_test_func(&kmctr_functions, fc)) ? fc : 0; + return xts_paes_crypt(req, 0); +} - return ctx->fc ? 0 : -EINVAL; +static int xts_paes_decrypt(struct skcipher_request *req) +{ + return xts_paes_crypt(req, CPACF_DECRYPT); } -static int ctr_paes_set_key(struct crypto_skcipher *tfm, const u8 *in_key, - unsigned int key_len) +static int xts_paes_init(struct crypto_skcipher *tfm) { - struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm); - int rc; + struct s390_pxts_ctx *ctx = crypto_skcipher_ctx(tfm); - _free_kb_keybuf(&ctx->kb); - rc = _key_to_kb(&ctx->kb, in_key, key_len); - if (rc) - return rc; + memset(ctx, 0, sizeof(*ctx)); + spin_lock_init(&ctx->pk_lock); - return __ctr_paes_set_key(ctx); + crypto_skcipher_set_reqsize(tfm, sizeof(struct s390_pxts_req_ctx)); + + return 0; } -static unsigned int __ctrblk_init(u8 *ctrptr, u8 *iv, unsigned int nbytes) +static void xts_paes_exit(struct crypto_skcipher *tfm) { - unsigned int i, n; + struct s390_pxts_ctx *ctx = crypto_skcipher_ctx(tfm); - /* only use complete blocks, max. PAGE_SIZE */ - memcpy(ctrptr, iv, AES_BLOCK_SIZE); - n = (nbytes > PAGE_SIZE) ? PAGE_SIZE : nbytes & ~(AES_BLOCK_SIZE - 1); - for (i = (n / AES_BLOCK_SIZE) - 1; i > 0; i--) { - memcpy(ctrptr + AES_BLOCK_SIZE, ctrptr, AES_BLOCK_SIZE); - crypto_inc(ctrptr + AES_BLOCK_SIZE, AES_BLOCK_SIZE); - ctrptr += AES_BLOCK_SIZE; - } - return n; + memzero_explicit(ctx, sizeof(*ctx)); } -static int ctr_paes_crypt(struct skcipher_request *req) +static int xts_paes_do_one_request(struct crypto_engine *engine, void *areq) { + struct skcipher_request *req = skcipher_request_cast(areq); + struct s390_pxts_req_ctx *req_ctx = skcipher_request_ctx(req); struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); - struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm); - u8 buf[AES_BLOCK_SIZE], *ctrptr; - struct { - u8 key[PAES_256_PROTKEY_SIZE]; - } param; - struct skcipher_walk walk; - unsigned int nbytes, n, k; - int rc, locked; - - rc = skcipher_walk_virt(&walk, req, false); - if (rc) - return rc; - - spin_lock_bh(&ctx->pk_lock); - memcpy(param.key, ctx->pk.protkey, PAES_256_PROTKEY_SIZE); - spin_unlock_bh(&ctx->pk_lock); - - locked = mutex_trylock(&ctrblk_lock); + struct s390_pxts_ctx *ctx = crypto_skcipher_ctx(tfm); + struct skcipher_walk *walk = &req_ctx->walk; + int rc; - while ((nbytes = walk.nbytes) >= AES_BLOCK_SIZE) { - n = AES_BLOCK_SIZE; - if (nbytes >= 2*AES_BLOCK_SIZE && locked) - n = __ctrblk_init(ctrblk, walk.iv, nbytes); - ctrptr = (n > AES_BLOCK_SIZE) ? ctrblk : walk.iv; - k = cpacf_kmctr(ctx->fc, ¶m, walk.dst.virt.addr, - walk.src.virt.addr, n, ctrptr); - if (k) { - if (ctrptr == ctrblk) - memcpy(walk.iv, ctrptr + k - AES_BLOCK_SIZE, - AES_BLOCK_SIZE); - crypto_inc(walk.iv, AES_BLOCK_SIZE); - rc = skcipher_walk_done(&walk, nbytes - k); - } - if (k < n) { - if (__paes_convert_key(ctx)) { - if (locked) - mutex_unlock(&ctrblk_lock); - return skcipher_walk_done(&walk, -EIO); - } - spin_lock_bh(&ctx->pk_lock); - memcpy(param.key, ctx->pk.protkey, PAES_256_PROTKEY_SIZE); - spin_unlock_bh(&ctx->pk_lock); - } - } - if (locked) - mutex_unlock(&ctrblk_lock); - /* - * final block may be < AES_BLOCK_SIZE, copy only nbytes - */ - if (nbytes) { - memset(buf, 0, AES_BLOCK_SIZE); - memcpy(buf, walk.src.virt.addr, nbytes); - while (1) { - if (cpacf_kmctr(ctx->fc, ¶m, buf, - buf, AES_BLOCK_SIZE, - walk.iv) == AES_BLOCK_SIZE) - break; - if (__paes_convert_key(ctx)) - return skcipher_walk_done(&walk, -EIO); - spin_lock_bh(&ctx->pk_lock); - memcpy(param.key, ctx->pk.protkey, PAES_256_PROTKEY_SIZE); - spin_unlock_bh(&ctx->pk_lock); - } - memcpy(walk.dst.virt.addr, buf, nbytes); - crypto_inc(walk.iv, AES_BLOCK_SIZE); - rc = skcipher_walk_done(&walk, nbytes); + /* walk has already been prepared */ + + rc = xts_paes_do_crypt(ctx, req_ctx, true); + if (rc == -EKEYEXPIRED) { + /* + * Protected key expired, conversion is in process. + * Trigger a re-schedule of this request by returning + * -ENOSPC ("hardware queue is full") to the crypto engine. + * To avoid immediately re-invocation of this callback, + * tell the scheduler to voluntarily give up the CPU here. + */ + cond_resched(); + pr_debug("rescheduling request\n"); + return -ENOSPC; + } else if (rc) { + skcipher_walk_done(walk, rc); } + memzero_explicit(&req_ctx->param, sizeof(req_ctx->param)); + pr_debug("request complete with rc=%d\n", rc); + local_bh_disable(); + atomic_dec(&ctx->via_engine_ctr); + crypto_finalize_skcipher_request(engine, req, rc); + local_bh_enable(); return rc; } -static struct skcipher_alg ctr_paes_alg = { - .base.cra_name = "ctr(paes)", - .base.cra_driver_name = "ctr-paes-s390", - .base.cra_priority = 402, /* ecb-paes-s390 + 1 */ - .base.cra_blocksize = 1, - .base.cra_ctxsize = sizeof(struct s390_paes_ctx), - .base.cra_module = THIS_MODULE, - .base.cra_list = LIST_HEAD_INIT(ctr_paes_alg.base.cra_list), - .init = ctr_paes_init, - .exit = ctr_paes_exit, - .min_keysize = PAES_MIN_KEYSIZE, - .max_keysize = PAES_MAX_KEYSIZE, - .ivsize = AES_BLOCK_SIZE, - .setkey = ctr_paes_set_key, - .encrypt = ctr_paes_crypt, - .decrypt = ctr_paes_crypt, - .chunksize = AES_BLOCK_SIZE, +static struct skcipher_engine_alg xts_paes_alg = { + .base = { + .base.cra_name = "xts(paes)", + .base.cra_driver_name = "xts-paes-s390", + .base.cra_priority = 402, /* ecb-paes-s390 + 1 */ + .base.cra_blocksize = AES_BLOCK_SIZE, + .base.cra_ctxsize = sizeof(struct s390_pxts_ctx), + .base.cra_module = THIS_MODULE, + .base.cra_list = LIST_HEAD_INIT(xts_paes_alg.base.base.cra_list), + .init = xts_paes_init, + .exit = xts_paes_exit, + .min_keysize = 2 * PAES_MIN_KEYSIZE, + .max_keysize = 2 * PAES_MAX_KEYSIZE, + .ivsize = AES_BLOCK_SIZE, + .setkey = xts_paes_setkey, + .encrypt = xts_paes_encrypt, + .decrypt = xts_paes_decrypt, + }, + .op = { + .do_one_request = xts_paes_do_one_request, + }, }; -static inline void __crypto_unregister_skcipher(struct skcipher_alg *alg) +/* + * alg register, unregister, module init, exit + */ + +static struct miscdevice paes_dev = { + .name = "paes", + .minor = MISC_DYNAMIC_MINOR, +}; + +static inline void __crypto_unregister_skcipher(struct skcipher_engine_alg *alg) { - if (!list_empty(&alg->base.cra_list)) - crypto_unregister_skcipher(alg); + if (!list_empty(&alg->base.base.cra_list)) + crypto_engine_unregister_skcipher(alg); } static void paes_s390_fini(void) { + if (paes_crypto_engine) { + crypto_engine_stop(paes_crypto_engine); + crypto_engine_exit(paes_crypto_engine); + } __crypto_unregister_skcipher(&ctr_paes_alg); __crypto_unregister_skcipher(&xts_paes_alg); __crypto_unregister_skcipher(&cbc_paes_alg); __crypto_unregister_skcipher(&ecb_paes_alg); if (ctrblk) - free_page((unsigned long) ctrblk); + free_page((unsigned long)ctrblk); + misc_deregister(&paes_dev); } static int __init paes_s390_init(void) { int rc; + /* register a simple paes pseudo misc device */ + rc = misc_register(&paes_dev); + if (rc) + return rc; + + /* with this pseudo devie alloc and start a crypto engine */ + paes_crypto_engine = + crypto_engine_alloc_init_and_set(paes_dev.this_device, + true, false, MAX_QLEN); + if (!paes_crypto_engine) { + rc = -ENOMEM; + goto out_err; + } + rc = crypto_engine_start(paes_crypto_engine); + if (rc) { + crypto_engine_exit(paes_crypto_engine); + paes_crypto_engine = NULL; + goto out_err; + } + /* Query available functions for KM, KMC and KMCTR */ cpacf_query(CPACF_KM, &km_functions); cpacf_query(CPACF_KMC, &kmc_functions); @@ -927,40 +1653,45 @@ static int __init paes_s390_init(void) if (cpacf_test_func(&km_functions, CPACF_KM_PAES_128) || cpacf_test_func(&km_functions, CPACF_KM_PAES_192) || cpacf_test_func(&km_functions, CPACF_KM_PAES_256)) { - rc = crypto_register_skcipher(&ecb_paes_alg); + rc = crypto_engine_register_skcipher(&ecb_paes_alg); if (rc) goto out_err; + pr_debug("%s registered\n", ecb_paes_alg.base.base.cra_driver_name); } if (cpacf_test_func(&kmc_functions, CPACF_KMC_PAES_128) || cpacf_test_func(&kmc_functions, CPACF_KMC_PAES_192) || cpacf_test_func(&kmc_functions, CPACF_KMC_PAES_256)) { - rc = crypto_register_skcipher(&cbc_paes_alg); + rc = crypto_engine_register_skcipher(&cbc_paes_alg); if (rc) goto out_err; + pr_debug("%s registered\n", cbc_paes_alg.base.base.cra_driver_name); } if (cpacf_test_func(&km_functions, CPACF_KM_PXTS_128) || cpacf_test_func(&km_functions, CPACF_KM_PXTS_256)) { - rc = crypto_register_skcipher(&xts_paes_alg); + rc = crypto_engine_register_skcipher(&xts_paes_alg); if (rc) goto out_err; + pr_debug("%s registered\n", xts_paes_alg.base.base.cra_driver_name); } if (cpacf_test_func(&kmctr_functions, CPACF_KMCTR_PAES_128) || cpacf_test_func(&kmctr_functions, CPACF_KMCTR_PAES_192) || cpacf_test_func(&kmctr_functions, CPACF_KMCTR_PAES_256)) { - ctrblk = (u8 *) __get_free_page(GFP_KERNEL); + ctrblk = (u8 *)__get_free_page(GFP_KERNEL); if (!ctrblk) { rc = -ENOMEM; goto out_err; } - rc = crypto_register_skcipher(&ctr_paes_alg); + rc = crypto_engine_register_skcipher(&ctr_paes_alg); if (rc) goto out_err; + pr_debug("%s registered\n", ctr_paes_alg.base.base.cra_driver_name); } return 0; + out_err: paes_s390_fini(); return rc; diff --git a/arch/s390/crypto/phmac_s390.c b/arch/s390/crypto/phmac_s390.c new file mode 100644 index 000000000000..7ecfdc4fba2d --- /dev/null +++ b/arch/s390/crypto/phmac_s390.c @@ -0,0 +1,1048 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * Copyright IBM Corp. 2025 + * + * s390 specific HMAC support for protected keys. + */ + +#define KMSG_COMPONENT "phmac_s390" +#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt + +#include <asm/cpacf.h> +#include <asm/pkey.h> +#include <crypto/engine.h> +#include <crypto/hash.h> +#include <crypto/internal/hash.h> +#include <crypto/sha2.h> +#include <linux/atomic.h> +#include <linux/cpufeature.h> +#include <linux/delay.h> +#include <linux/miscdevice.h> +#include <linux/module.h> +#include <linux/spinlock.h> + +static struct crypto_engine *phmac_crypto_engine; +#define MAX_QLEN 10 + +/* + * A simple hash walk helper + */ + +struct hash_walk_helper { + struct crypto_hash_walk walk; + const u8 *walkaddr; + int walkbytes; +}; + +/* + * Prepare hash walk helper. + * Set up the base hash walk, fill walkaddr and walkbytes. + * Returns 0 on success or negative value on error. + */ +static inline int hwh_prepare(struct ahash_request *req, + struct hash_walk_helper *hwh) +{ + hwh->walkbytes = crypto_hash_walk_first(req, &hwh->walk); + if (hwh->walkbytes < 0) + return hwh->walkbytes; + hwh->walkaddr = hwh->walk.data; + return 0; +} + +/* + * Advance hash walk helper by n bytes. + * Progress the walkbytes and walkaddr fields by n bytes. + * If walkbytes is then 0, pull next hunk from hash walk + * and update walkbytes and walkaddr. + * If n is negative, unmap hash walk and return error. + * Returns 0 on success or negative value on error. + */ +static inline int hwh_advance(struct hash_walk_helper *hwh, int n) +{ + if (n < 0) + return crypto_hash_walk_done(&hwh->walk, n); + + hwh->walkbytes -= n; + hwh->walkaddr += n; + if (hwh->walkbytes > 0) + return 0; + + hwh->walkbytes = crypto_hash_walk_done(&hwh->walk, 0); + if (hwh->walkbytes < 0) + return hwh->walkbytes; + + hwh->walkaddr = hwh->walk.data; + return 0; +} + +/* + * KMAC param block layout for sha2 function codes: + * The layout of the param block for the KMAC instruction depends on the + * blocksize of the used hashing sha2-algorithm function codes. The param block + * contains the hash chaining value (cv), the input message bit-length (imbl) + * and the hmac-secret (key). To prevent code duplication, the sizes of all + * these are calculated based on the blocksize. + * + * param-block: + * +-------+ + * | cv | + * +-------+ + * | imbl | + * +-------+ + * | key | + * +-------+ + * + * sizes: + * part | sh2-alg | calculation | size | type + * -----+---------+-------------+------+-------- + * cv | 224/256 | blocksize/2 | 32 | u64[8] + * | 384/512 | | 64 | u128[8] + * imbl | 224/256 | blocksize/8 | 8 | u64 + * | 384/512 | | 16 | u128 + * key | 224/256 | blocksize | 96 | u8[96] + * | 384/512 | | 160 | u8[160] + */ + +#define MAX_DIGEST_SIZE SHA512_DIGEST_SIZE +#define MAX_IMBL_SIZE sizeof(u128) +#define MAX_BLOCK_SIZE SHA512_BLOCK_SIZE + +#define SHA2_CV_SIZE(bs) ((bs) >> 1) +#define SHA2_IMBL_SIZE(bs) ((bs) >> 3) + +#define SHA2_IMBL_OFFSET(bs) (SHA2_CV_SIZE(bs)) +#define SHA2_KEY_OFFSET(bs) (SHA2_CV_SIZE(bs) + SHA2_IMBL_SIZE(bs)) + +#define PHMAC_MAX_KEYSIZE 256 +#define PHMAC_SHA256_PK_SIZE (SHA256_BLOCK_SIZE + 32) +#define PHMAC_SHA512_PK_SIZE (SHA512_BLOCK_SIZE + 32) +#define PHMAC_MAX_PK_SIZE PHMAC_SHA512_PK_SIZE + +/* phmac protected key struct */ +struct phmac_protkey { + u32 type; + u32 len; + u8 protkey[PHMAC_MAX_PK_SIZE]; +}; + +#define PK_STATE_NO_KEY 0 +#define PK_STATE_CONVERT_IN_PROGRESS 1 +#define PK_STATE_VALID 2 + +/* phmac tfm context */ +struct phmac_tfm_ctx { + /* source key material used to derive a protected key from */ + u8 keybuf[PHMAC_MAX_KEYSIZE]; + unsigned int keylen; + + /* cpacf function code to use with this protected key type */ + long fc; + + /* nr of requests enqueued via crypto engine which use this tfm ctx */ + atomic_t via_engine_ctr; + + /* spinlock to atomic read/update all the following fields */ + spinlock_t pk_lock; + + /* see PK_STATE* defines above, < 0 holds convert failure rc */ + int pk_state; + /* if state is valid, pk holds the protected key */ + struct phmac_protkey pk; +}; + +union kmac_gr0 { + unsigned long reg; + struct { + unsigned long : 48; + unsigned long ikp : 1; + unsigned long iimp : 1; + unsigned long ccup : 1; + unsigned long : 6; + unsigned long fc : 7; + }; +}; + +struct kmac_sha2_ctx { + u8 param[MAX_DIGEST_SIZE + MAX_IMBL_SIZE + PHMAC_MAX_PK_SIZE]; + union kmac_gr0 gr0; + u8 buf[MAX_BLOCK_SIZE]; + u64 buflen[2]; +}; + +/* phmac request context */ +struct phmac_req_ctx { + struct hash_walk_helper hwh; + struct kmac_sha2_ctx kmac_ctx; + bool final; +}; + +/* + * Pkey 'token' struct used to derive a protected key value from a clear key. + */ +struct hmac_clrkey_token { + u8 type; + u8 res0[3]; + u8 version; + u8 res1[3]; + u32 keytype; + u32 len; + u8 key[]; +} __packed; + +static int hash_key(const u8 *in, unsigned int inlen, + u8 *digest, unsigned int digestsize) +{ + unsigned long func; + union { + struct sha256_paramblock { + u32 h[8]; + u64 mbl; + } sha256; + struct sha512_paramblock { + u64 h[8]; + u128 mbl; + } sha512; + } __packed param; + +#define PARAM_INIT(x, y, z) \ + param.sha##x.h[0] = SHA##y ## _H0; \ + param.sha##x.h[1] = SHA##y ## _H1; \ + param.sha##x.h[2] = SHA##y ## _H2; \ + param.sha##x.h[3] = SHA##y ## _H3; \ + param.sha##x.h[4] = SHA##y ## _H4; \ + param.sha##x.h[5] = SHA##y ## _H5; \ + param.sha##x.h[6] = SHA##y ## _H6; \ + param.sha##x.h[7] = SHA##y ## _H7; \ + param.sha##x.mbl = (z) + + switch (digestsize) { + case SHA224_DIGEST_SIZE: + func = CPACF_KLMD_SHA_256; + PARAM_INIT(256, 224, inlen * 8); + break; + case SHA256_DIGEST_SIZE: + func = CPACF_KLMD_SHA_256; + PARAM_INIT(256, 256, inlen * 8); + break; + case SHA384_DIGEST_SIZE: + func = CPACF_KLMD_SHA_512; + PARAM_INIT(512, 384, inlen * 8); + break; + case SHA512_DIGEST_SIZE: + func = CPACF_KLMD_SHA_512; + PARAM_INIT(512, 512, inlen * 8); + break; + default: + return -EINVAL; + } + +#undef PARAM_INIT + + cpacf_klmd(func, ¶m, in, inlen); + + memcpy(digest, ¶m, digestsize); + + return 0; +} + +/* + * make_clrkey_token() - wrap the clear key into a pkey clearkey token. + */ +static inline int make_clrkey_token(const u8 *clrkey, size_t clrkeylen, + unsigned int digestsize, u8 *dest) +{ + struct hmac_clrkey_token *token = (struct hmac_clrkey_token *)dest; + unsigned int blocksize; + int rc; + + token->type = 0x00; + token->version = 0x02; + switch (digestsize) { + case SHA224_DIGEST_SIZE: + case SHA256_DIGEST_SIZE: + token->keytype = PKEY_KEYTYPE_HMAC_512; + blocksize = 64; + break; + case SHA384_DIGEST_SIZE: + case SHA512_DIGEST_SIZE: + token->keytype = PKEY_KEYTYPE_HMAC_1024; + blocksize = 128; + break; + default: + return -EINVAL; + } + token->len = blocksize; + + if (clrkeylen > blocksize) { + rc = hash_key(clrkey, clrkeylen, token->key, digestsize); + if (rc) + return rc; + } else { + memcpy(token->key, clrkey, clrkeylen); + } + + return 0; +} + +/* + * phmac_tfm_ctx_setkey() - Set key value into tfm context, maybe construct + * a clear key token digestible by pkey from a clear key value. + */ +static inline int phmac_tfm_ctx_setkey(struct phmac_tfm_ctx *tfm_ctx, + const u8 *key, unsigned int keylen) +{ + if (keylen > sizeof(tfm_ctx->keybuf)) + return -EINVAL; + + memcpy(tfm_ctx->keybuf, key, keylen); + tfm_ctx->keylen = keylen; + + return 0; +} + +/* + * Convert the raw key material into a protected key via PKEY api. + * This function may sleep - don't call in non-sleeping context. + */ +static inline int convert_key(const u8 *key, unsigned int keylen, + struct phmac_protkey *pk) +{ + int rc, i; + + pk->len = sizeof(pk->protkey); + + /* + * In case of a busy card retry with increasing delay + * of 200, 400, 800 and 1600 ms - in total 3 s. + */ + for (rc = -EIO, i = 0; rc && i < 5; i++) { + if (rc == -EBUSY && msleep_interruptible((1 << i) * 100)) { + rc = -EINTR; + goto out; + } + rc = pkey_key2protkey(key, keylen, + pk->protkey, &pk->len, &pk->type, + PKEY_XFLAG_NOMEMALLOC); + } + +out: + pr_debug("rc=%d\n", rc); + return rc; +} + +/* + * (Re-)Convert the raw key material from the tfm ctx into a protected + * key via convert_key() function. Update the pk_state, pk_type, pk_len + * and the protected key in the tfm context. + * Please note this function may be invoked concurrently with the very + * same tfm context. The pk_lock spinlock in the context ensures an + * atomic update of the pk and the pk state but does not guarantee any + * order of update. So a fresh converted valid protected key may get + * updated with an 'old' expired key value. As the cpacf instructions + * detect this, refuse to operate with an invalid key and the calling + * code triggers a (re-)conversion this does no harm. This may lead to + * unnecessary additional conversion but never to invalid data on the + * hash operation. + */ +static int phmac_convert_key(struct phmac_tfm_ctx *tfm_ctx) +{ + struct phmac_protkey pk; + int rc; + + spin_lock_bh(&tfm_ctx->pk_lock); + tfm_ctx->pk_state = PK_STATE_CONVERT_IN_PROGRESS; + spin_unlock_bh(&tfm_ctx->pk_lock); + + rc = convert_key(tfm_ctx->keybuf, tfm_ctx->keylen, &pk); + + /* update context */ + spin_lock_bh(&tfm_ctx->pk_lock); + if (rc) { + tfm_ctx->pk_state = rc; + } else { + tfm_ctx->pk_state = PK_STATE_VALID; + tfm_ctx->pk = pk; + } + spin_unlock_bh(&tfm_ctx->pk_lock); + + memzero_explicit(&pk, sizeof(pk)); + pr_debug("rc=%d\n", rc); + return rc; +} + +/* + * kmac_sha2_set_imbl - sets the input message bit-length based on the blocksize + */ +static inline void kmac_sha2_set_imbl(u8 *param, u64 buflen_lo, + u64 buflen_hi, unsigned int blocksize) +{ + u8 *imbl = param + SHA2_IMBL_OFFSET(blocksize); + + switch (blocksize) { + case SHA256_BLOCK_SIZE: + *(u64 *)imbl = buflen_lo * BITS_PER_BYTE; + break; + case SHA512_BLOCK_SIZE: + *(u128 *)imbl = (((u128)buflen_hi << 64) + buflen_lo) << 3; + break; + default: + break; + } +} + +static int phmac_kmac_update(struct ahash_request *req, bool maysleep) +{ + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + struct phmac_tfm_ctx *tfm_ctx = crypto_ahash_ctx(tfm); + struct phmac_req_ctx *req_ctx = ahash_request_ctx(req); + struct kmac_sha2_ctx *ctx = &req_ctx->kmac_ctx; + struct hash_walk_helper *hwh = &req_ctx->hwh; + unsigned int bs = crypto_ahash_blocksize(tfm); + unsigned int offset, k, n; + int rc = 0; + + /* + * The walk is always mapped when this function is called. + * Note that in case of partial processing or failure the walk + * is NOT unmapped here. So a follow up task may reuse the walk + * or in case of unrecoverable failure needs to unmap it. + */ + + while (hwh->walkbytes > 0) { + /* check sha2 context buffer */ + offset = ctx->buflen[0] % bs; + if (offset + hwh->walkbytes < bs) + goto store; + + if (offset) { + /* fill ctx buffer up to blocksize and process this block */ + n = bs - offset; + memcpy(ctx->buf + offset, hwh->walkaddr, n); + ctx->gr0.iimp = 1; + for (;;) { + k = _cpacf_kmac(&ctx->gr0.reg, ctx->param, ctx->buf, bs); + if (likely(k == bs)) + break; + if (unlikely(k > 0)) { + /* + * Can't deal with hunks smaller than blocksize. + * And kmac should always return the nr of + * processed bytes as 0 or a multiple of the + * blocksize. + */ + rc = -EIO; + goto out; + } + /* protected key is invalid and needs re-conversion */ + if (!maysleep) { + rc = -EKEYEXPIRED; + goto out; + } + rc = phmac_convert_key(tfm_ctx); + if (rc) + goto out; + spin_lock_bh(&tfm_ctx->pk_lock); + memcpy(ctx->param + SHA2_KEY_OFFSET(bs), + tfm_ctx->pk.protkey, tfm_ctx->pk.len); + spin_unlock_bh(&tfm_ctx->pk_lock); + } + ctx->buflen[0] += n; + if (ctx->buflen[0] < n) + ctx->buflen[1]++; + rc = hwh_advance(hwh, n); + if (unlikely(rc)) + goto out; + offset = 0; + } + + /* process as many blocks as possible from the walk */ + while (hwh->walkbytes >= bs) { + n = (hwh->walkbytes / bs) * bs; + ctx->gr0.iimp = 1; + k = _cpacf_kmac(&ctx->gr0.reg, ctx->param, hwh->walkaddr, n); + if (likely(k > 0)) { + ctx->buflen[0] += k; + if (ctx->buflen[0] < k) + ctx->buflen[1]++; + rc = hwh_advance(hwh, k); + if (unlikely(rc)) + goto out; + } + if (unlikely(k < n)) { + /* protected key is invalid and needs re-conversion */ + if (!maysleep) { + rc = -EKEYEXPIRED; + goto out; + } + rc = phmac_convert_key(tfm_ctx); + if (rc) + goto out; + spin_lock_bh(&tfm_ctx->pk_lock); + memcpy(ctx->param + SHA2_KEY_OFFSET(bs), + tfm_ctx->pk.protkey, tfm_ctx->pk.len); + spin_unlock_bh(&tfm_ctx->pk_lock); + } + } + +store: + /* store incomplete block in context buffer */ + if (hwh->walkbytes) { + memcpy(ctx->buf + offset, hwh->walkaddr, hwh->walkbytes); + ctx->buflen[0] += hwh->walkbytes; + if (ctx->buflen[0] < hwh->walkbytes) + ctx->buflen[1]++; + rc = hwh_advance(hwh, hwh->walkbytes); + if (unlikely(rc)) + goto out; + } + + } /* end of while (hwh->walkbytes > 0) */ + +out: + pr_debug("rc=%d\n", rc); + return rc; +} + +static int phmac_kmac_final(struct ahash_request *req, bool maysleep) +{ + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + struct phmac_tfm_ctx *tfm_ctx = crypto_ahash_ctx(tfm); + struct phmac_req_ctx *req_ctx = ahash_request_ctx(req); + struct kmac_sha2_ctx *ctx = &req_ctx->kmac_ctx; + unsigned int ds = crypto_ahash_digestsize(tfm); + unsigned int bs = crypto_ahash_blocksize(tfm); + unsigned int k, n; + int rc = 0; + + n = ctx->buflen[0] % bs; + ctx->gr0.iimp = 0; + kmac_sha2_set_imbl(ctx->param, ctx->buflen[0], ctx->buflen[1], bs); + for (;;) { + k = _cpacf_kmac(&ctx->gr0.reg, ctx->param, ctx->buf, n); + if (likely(k == n)) + break; + if (unlikely(k > 0)) { + /* Can't deal with hunks smaller than blocksize. */ + rc = -EIO; + goto out; + } + /* protected key is invalid and needs re-conversion */ + if (!maysleep) { + rc = -EKEYEXPIRED; + goto out; + } + rc = phmac_convert_key(tfm_ctx); + if (rc) + goto out; + spin_lock_bh(&tfm_ctx->pk_lock); + memcpy(ctx->param + SHA2_KEY_OFFSET(bs), + tfm_ctx->pk.protkey, tfm_ctx->pk.len); + spin_unlock_bh(&tfm_ctx->pk_lock); + } + + memcpy(req->result, ctx->param, ds); + +out: + pr_debug("rc=%d\n", rc); + return rc; +} + +static int phmac_init(struct ahash_request *req) +{ + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + struct phmac_tfm_ctx *tfm_ctx = crypto_ahash_ctx(tfm); + struct phmac_req_ctx *req_ctx = ahash_request_ctx(req); + struct kmac_sha2_ctx *kmac_ctx = &req_ctx->kmac_ctx; + unsigned int bs = crypto_ahash_blocksize(tfm); + int rc = 0; + + /* zero request context (includes the kmac sha2 context) */ + memset(req_ctx, 0, sizeof(*req_ctx)); + + /* + * setkey() should have set a valid fc into the tfm context. + * Copy this function code into the gr0 field of the kmac context. + */ + if (!tfm_ctx->fc) { + rc = -ENOKEY; + goto out; + } + kmac_ctx->gr0.fc = tfm_ctx->fc; + + /* + * Copy the pk from tfm ctx into kmac ctx. The protected key + * may be outdated but update() and final() will handle this. + */ + spin_lock_bh(&tfm_ctx->pk_lock); + memcpy(kmac_ctx->param + SHA2_KEY_OFFSET(bs), + tfm_ctx->pk.protkey, tfm_ctx->pk.len); + spin_unlock_bh(&tfm_ctx->pk_lock); + +out: + pr_debug("rc=%d\n", rc); + return rc; +} + +static int phmac_update(struct ahash_request *req) +{ + struct phmac_req_ctx *req_ctx = ahash_request_ctx(req); + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + struct phmac_tfm_ctx *tfm_ctx = crypto_ahash_ctx(tfm); + struct kmac_sha2_ctx *kmac_ctx = &req_ctx->kmac_ctx; + struct hash_walk_helper *hwh = &req_ctx->hwh; + int rc; + + /* prep the walk in the request context */ + rc = hwh_prepare(req, hwh); + if (rc) + goto out; + + /* Try synchronous operation if no active engine usage */ + if (!atomic_read(&tfm_ctx->via_engine_ctr)) { + rc = phmac_kmac_update(req, false); + if (rc == 0) + goto out; + } + + /* + * If sync operation failed or key expired or there are already + * requests enqueued via engine, fallback to async. Mark tfm as + * using engine to serialize requests. + */ + if (rc == 0 || rc == -EKEYEXPIRED) { + atomic_inc(&tfm_ctx->via_engine_ctr); + rc = crypto_transfer_hash_request_to_engine(phmac_crypto_engine, req); + if (rc != -EINPROGRESS) + atomic_dec(&tfm_ctx->via_engine_ctr); + } + + if (rc != -EINPROGRESS) { + hwh_advance(hwh, rc); + memzero_explicit(kmac_ctx, sizeof(*kmac_ctx)); + } + +out: + pr_debug("rc=%d\n", rc); + return rc; +} + +static int phmac_final(struct ahash_request *req) +{ + struct phmac_req_ctx *req_ctx = ahash_request_ctx(req); + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + struct phmac_tfm_ctx *tfm_ctx = crypto_ahash_ctx(tfm); + struct kmac_sha2_ctx *kmac_ctx = &req_ctx->kmac_ctx; + int rc = 0; + + /* Try synchronous operation if no active engine usage */ + if (!atomic_read(&tfm_ctx->via_engine_ctr)) { + rc = phmac_kmac_final(req, false); + if (rc == 0) + goto out; + } + + /* + * If sync operation failed or key expired or there are already + * requests enqueued via engine, fallback to async. Mark tfm as + * using engine to serialize requests. + */ + if (rc == 0 || rc == -EKEYEXPIRED) { + req->nbytes = 0; + req_ctx->final = true; + atomic_inc(&tfm_ctx->via_engine_ctr); + rc = crypto_transfer_hash_request_to_engine(phmac_crypto_engine, req); + if (rc != -EINPROGRESS) + atomic_dec(&tfm_ctx->via_engine_ctr); + } + +out: + if (rc != -EINPROGRESS) + memzero_explicit(kmac_ctx, sizeof(*kmac_ctx)); + pr_debug("rc=%d\n", rc); + return rc; +} + +static int phmac_finup(struct ahash_request *req) +{ + struct phmac_req_ctx *req_ctx = ahash_request_ctx(req); + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + struct phmac_tfm_ctx *tfm_ctx = crypto_ahash_ctx(tfm); + struct kmac_sha2_ctx *kmac_ctx = &req_ctx->kmac_ctx; + struct hash_walk_helper *hwh = &req_ctx->hwh; + int rc; + + /* prep the walk in the request context */ + rc = hwh_prepare(req, hwh); + if (rc) + goto out; + + /* Try synchronous operations if no active engine usage */ + if (!atomic_read(&tfm_ctx->via_engine_ctr)) { + rc = phmac_kmac_update(req, false); + if (rc == 0) + req->nbytes = 0; + } + if (!rc && !req->nbytes && !atomic_read(&tfm_ctx->via_engine_ctr)) { + rc = phmac_kmac_final(req, false); + if (rc == 0) + goto out; + } + + /* + * If sync operation failed or key expired or there are already + * requests enqueued via engine, fallback to async. Mark tfm as + * using engine to serialize requests. + */ + if (rc == 0 || rc == -EKEYEXPIRED) { + req_ctx->final = true; + atomic_inc(&tfm_ctx->via_engine_ctr); + rc = crypto_transfer_hash_request_to_engine(phmac_crypto_engine, req); + if (rc != -EINPROGRESS) + atomic_dec(&tfm_ctx->via_engine_ctr); + } + + if (rc != -EINPROGRESS) + hwh_advance(hwh, rc); + +out: + if (rc != -EINPROGRESS) + memzero_explicit(kmac_ctx, sizeof(*kmac_ctx)); + pr_debug("rc=%d\n", rc); + return rc; +} + +static int phmac_digest(struct ahash_request *req) +{ + int rc; + + rc = phmac_init(req); + if (rc) + goto out; + + rc = phmac_finup(req); + +out: + pr_debug("rc=%d\n", rc); + return rc; +} + +static int phmac_setkey(struct crypto_ahash *tfm, + const u8 *key, unsigned int keylen) +{ + struct phmac_tfm_ctx *tfm_ctx = crypto_ahash_ctx(tfm); + unsigned int ds = crypto_ahash_digestsize(tfm); + unsigned int bs = crypto_ahash_blocksize(tfm); + unsigned int tmpkeylen; + u8 *tmpkey = NULL; + int rc = 0; + + if (!crypto_ahash_tested(tfm)) { + /* + * selftest running: key is a raw hmac clear key and needs + * to get embedded into a 'clear key token' in order to have + * it correctly processed by the pkey module. + */ + tmpkeylen = sizeof(struct hmac_clrkey_token) + bs; + tmpkey = kzalloc(tmpkeylen, GFP_KERNEL); + if (!tmpkey) { + rc = -ENOMEM; + goto out; + } + rc = make_clrkey_token(key, keylen, ds, tmpkey); + if (rc) + goto out; + keylen = tmpkeylen; + key = tmpkey; + } + + /* copy raw key into tfm context */ + rc = phmac_tfm_ctx_setkey(tfm_ctx, key, keylen); + if (rc) + goto out; + + /* convert raw key into protected key */ + rc = phmac_convert_key(tfm_ctx); + if (rc) + goto out; + + /* set function code in tfm context, check for valid pk type */ + switch (ds) { + case SHA224_DIGEST_SIZE: + if (tfm_ctx->pk.type != PKEY_KEYTYPE_HMAC_512) + rc = -EINVAL; + else + tfm_ctx->fc = CPACF_KMAC_PHMAC_SHA_224; + break; + case SHA256_DIGEST_SIZE: + if (tfm_ctx->pk.type != PKEY_KEYTYPE_HMAC_512) + rc = -EINVAL; + else + tfm_ctx->fc = CPACF_KMAC_PHMAC_SHA_256; + break; + case SHA384_DIGEST_SIZE: + if (tfm_ctx->pk.type != PKEY_KEYTYPE_HMAC_1024) + rc = -EINVAL; + else + tfm_ctx->fc = CPACF_KMAC_PHMAC_SHA_384; + break; + case SHA512_DIGEST_SIZE: + if (tfm_ctx->pk.type != PKEY_KEYTYPE_HMAC_1024) + rc = -EINVAL; + else + tfm_ctx->fc = CPACF_KMAC_PHMAC_SHA_512; + break; + default: + tfm_ctx->fc = 0; + rc = -EINVAL; + } + +out: + kfree(tmpkey); + pr_debug("rc=%d\n", rc); + return rc; +} + +static int phmac_export(struct ahash_request *req, void *out) +{ + struct phmac_req_ctx *req_ctx = ahash_request_ctx(req); + struct kmac_sha2_ctx *ctx = &req_ctx->kmac_ctx; + + memcpy(out, ctx, sizeof(*ctx)); + + return 0; +} + +static int phmac_import(struct ahash_request *req, const void *in) +{ + struct phmac_req_ctx *req_ctx = ahash_request_ctx(req); + struct kmac_sha2_ctx *ctx = &req_ctx->kmac_ctx; + + memset(req_ctx, 0, sizeof(*req_ctx)); + memcpy(ctx, in, sizeof(*ctx)); + + return 0; +} + +static int phmac_init_tfm(struct crypto_ahash *tfm) +{ + struct phmac_tfm_ctx *tfm_ctx = crypto_ahash_ctx(tfm); + + memset(tfm_ctx, 0, sizeof(*tfm_ctx)); + spin_lock_init(&tfm_ctx->pk_lock); + + crypto_ahash_set_reqsize(tfm, sizeof(struct phmac_req_ctx)); + + return 0; +} + +static void phmac_exit_tfm(struct crypto_ahash *tfm) +{ + struct phmac_tfm_ctx *tfm_ctx = crypto_ahash_ctx(tfm); + + memzero_explicit(tfm_ctx->keybuf, sizeof(tfm_ctx->keybuf)); + memzero_explicit(&tfm_ctx->pk, sizeof(tfm_ctx->pk)); +} + +static int phmac_do_one_request(struct crypto_engine *engine, void *areq) +{ + struct ahash_request *req = ahash_request_cast(areq); + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + struct phmac_tfm_ctx *tfm_ctx = crypto_ahash_ctx(tfm); + struct phmac_req_ctx *req_ctx = ahash_request_ctx(req); + struct kmac_sha2_ctx *kmac_ctx = &req_ctx->kmac_ctx; + struct hash_walk_helper *hwh = &req_ctx->hwh; + int rc = -EINVAL; + + /* + * Three kinds of requests come in here: + * update when req->nbytes > 0 and req_ctx->final is false + * final when req->nbytes = 0 and req_ctx->final is true + * finup when req->nbytes > 0 and req_ctx->final is true + * For update and finup the hwh walk needs to be prepared and + * up to date but the actual nr of bytes in req->nbytes may be + * any non zero number. For final there is no hwh walk needed. + */ + + if (req->nbytes) { + rc = phmac_kmac_update(req, true); + if (rc == -EKEYEXPIRED) { + /* + * Protected key expired, conversion is in process. + * Trigger a re-schedule of this request by returning + * -ENOSPC ("hardware queue full") to the crypto engine. + * To avoid immediately re-invocation of this callback, + * tell scheduler to voluntarily give up the CPU here. + */ + pr_debug("rescheduling request\n"); + cond_resched(); + return -ENOSPC; + } else if (rc) { + hwh_advance(hwh, rc); + goto out; + } + req->nbytes = 0; + } + + if (req_ctx->final) { + rc = phmac_kmac_final(req, true); + if (rc == -EKEYEXPIRED) { + /* + * Protected key expired, conversion is in process. + * Trigger a re-schedule of this request by returning + * -ENOSPC ("hardware queue full") to the crypto engine. + * To avoid immediately re-invocation of this callback, + * tell scheduler to voluntarily give up the CPU here. + */ + pr_debug("rescheduling request\n"); + cond_resched(); + return -ENOSPC; + } + } + +out: + if (rc || req_ctx->final) + memzero_explicit(kmac_ctx, sizeof(*kmac_ctx)); + pr_debug("request complete with rc=%d\n", rc); + local_bh_disable(); + atomic_dec(&tfm_ctx->via_engine_ctr); + crypto_finalize_hash_request(engine, req, rc); + local_bh_enable(); + return rc; +} + +#define S390_ASYNC_PHMAC_ALG(x) \ +{ \ + .base = { \ + .init = phmac_init, \ + .update = phmac_update, \ + .final = phmac_final, \ + .finup = phmac_finup, \ + .digest = phmac_digest, \ + .setkey = phmac_setkey, \ + .import = phmac_import, \ + .export = phmac_export, \ + .init_tfm = phmac_init_tfm, \ + .exit_tfm = phmac_exit_tfm, \ + .halg = { \ + .digestsize = SHA##x##_DIGEST_SIZE, \ + .statesize = sizeof(struct kmac_sha2_ctx), \ + .base = { \ + .cra_name = "phmac(sha" #x ")", \ + .cra_driver_name = "phmac_s390_sha" #x, \ + .cra_blocksize = SHA##x##_BLOCK_SIZE, \ + .cra_priority = 400, \ + .cra_flags = CRYPTO_ALG_ASYNC | \ + CRYPTO_ALG_NO_FALLBACK, \ + .cra_ctxsize = sizeof(struct phmac_tfm_ctx), \ + .cra_module = THIS_MODULE, \ + }, \ + }, \ + }, \ + .op = { \ + .do_one_request = phmac_do_one_request, \ + }, \ +} + +static struct phmac_alg { + unsigned int fc; + struct ahash_engine_alg alg; + bool registered; +} phmac_algs[] = { + { + .fc = CPACF_KMAC_PHMAC_SHA_224, + .alg = S390_ASYNC_PHMAC_ALG(224), + }, { + .fc = CPACF_KMAC_PHMAC_SHA_256, + .alg = S390_ASYNC_PHMAC_ALG(256), + }, { + .fc = CPACF_KMAC_PHMAC_SHA_384, + .alg = S390_ASYNC_PHMAC_ALG(384), + }, { + .fc = CPACF_KMAC_PHMAC_SHA_512, + .alg = S390_ASYNC_PHMAC_ALG(512), + } +}; + +static struct miscdevice phmac_dev = { + .name = "phmac", + .minor = MISC_DYNAMIC_MINOR, +}; + +static void s390_phmac_exit(void) +{ + struct phmac_alg *phmac; + int i; + + if (phmac_crypto_engine) { + crypto_engine_stop(phmac_crypto_engine); + crypto_engine_exit(phmac_crypto_engine); + } + + for (i = ARRAY_SIZE(phmac_algs) - 1; i >= 0; i--) { + phmac = &phmac_algs[i]; + if (phmac->registered) + crypto_engine_unregister_ahash(&phmac->alg); + } + + misc_deregister(&phmac_dev); +} + +static int __init s390_phmac_init(void) +{ + struct phmac_alg *phmac; + int i, rc; + + /* for selftest cpacf klmd subfunction is needed */ + if (!cpacf_query_func(CPACF_KLMD, CPACF_KLMD_SHA_256)) + return -ENODEV; + if (!cpacf_query_func(CPACF_KLMD, CPACF_KLMD_SHA_512)) + return -ENODEV; + + /* register a simple phmac pseudo misc device */ + rc = misc_register(&phmac_dev); + if (rc) + return rc; + + /* with this pseudo device alloc and start a crypto engine */ + phmac_crypto_engine = + crypto_engine_alloc_init_and_set(phmac_dev.this_device, + true, false, MAX_QLEN); + if (!phmac_crypto_engine) { + rc = -ENOMEM; + goto out_err; + } + rc = crypto_engine_start(phmac_crypto_engine); + if (rc) { + crypto_engine_exit(phmac_crypto_engine); + phmac_crypto_engine = NULL; + goto out_err; + } + + for (i = 0; i < ARRAY_SIZE(phmac_algs); i++) { + phmac = &phmac_algs[i]; + if (!cpacf_query_func(CPACF_KMAC, phmac->fc)) + continue; + rc = crypto_engine_register_ahash(&phmac->alg); + if (rc) + goto out_err; + phmac->registered = true; + pr_debug("%s registered\n", phmac->alg.base.halg.base.cra_name); + } + + return 0; + +out_err: + s390_phmac_exit(); + return rc; +} + +module_init(s390_phmac_init); +module_exit(s390_phmac_exit); + +MODULE_ALIAS_CRYPTO("phmac(sha224)"); +MODULE_ALIAS_CRYPTO("phmac(sha256)"); +MODULE_ALIAS_CRYPTO("phmac(sha384)"); +MODULE_ALIAS_CRYPTO("phmac(sha512)"); + +MODULE_DESCRIPTION("S390 HMAC driver for protected keys"); +MODULE_LICENSE("GPL"); diff --git a/arch/s390/crypto/sha.h b/arch/s390/crypto/sha.h index 2bb22db54c31..cadb4b13622a 100644 --- a/arch/s390/crypto/sha.h +++ b/arch/s390/crypto/sha.h @@ -10,27 +10,36 @@ #ifndef _CRYPTO_ARCH_S390_SHA_H #define _CRYPTO_ARCH_S390_SHA_H -#include <linux/crypto.h> -#include <crypto/sha1.h> #include <crypto/sha2.h> #include <crypto/sha3.h> +#include <linux/types.h> /* must be big enough for the largest SHA variant */ -#define SHA3_STATE_SIZE 200 #define CPACF_MAX_PARMBLOCK_SIZE SHA3_STATE_SIZE #define SHA_MAX_BLOCK_SIZE SHA3_224_BLOCK_SIZE +#define S390_SHA_CTX_SIZE sizeof(struct s390_sha_ctx) struct s390_sha_ctx { u64 count; /* message length in bytes */ - u32 state[CPACF_MAX_PARMBLOCK_SIZE / sizeof(u32)]; - u8 buf[SHA_MAX_BLOCK_SIZE]; + union { + u32 state[CPACF_MAX_PARMBLOCK_SIZE / sizeof(u32)]; + struct { + u64 state[SHA512_DIGEST_SIZE / sizeof(u64)]; + u64 count_hi; + } sha512; + struct { + __le64 state[SHA3_STATE_SIZE / sizeof(u64)]; + } sha3; + }; int func; /* KIMD function to use */ - int first_message_part; + bool first_message_part; }; struct shash_desc; -int s390_sha_update(struct shash_desc *desc, const u8 *data, unsigned int len); -int s390_sha_final(struct shash_desc *desc, u8 *out); +int s390_sha_update_blocks(struct shash_desc *desc, const u8 *data, + unsigned int len); +int s390_sha_finup(struct shash_desc *desc, const u8 *src, unsigned int len, + u8 *out); #endif diff --git a/arch/s390/crypto/sha1_s390.c b/arch/s390/crypto/sha1_s390.c deleted file mode 100644 index bc3a22704e09..000000000000 --- a/arch/s390/crypto/sha1_s390.c +++ /dev/null @@ -1,103 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0+ -/* - * Cryptographic API. - * - * s390 implementation of the SHA1 Secure Hash Algorithm. - * - * Derived from cryptoapi implementation, adapted for in-place - * scatterlist interface. Originally based on the public domain - * implementation written by Steve Reid. - * - * s390 Version: - * Copyright IBM Corp. 2003, 2007 - * Author(s): Thomas Spatzier - * Jan Glauber (jan.glauber@de.ibm.com) - * - * Derived from "crypto/sha1_generic.c" - * Copyright (c) Alan Smithee. - * Copyright (c) Andrew McDonald <andrew@mcdonald.org.uk> - * Copyright (c) Jean-Francois Dive <jef@linuxbe.org> - */ -#include <crypto/internal/hash.h> -#include <linux/init.h> -#include <linux/module.h> -#include <linux/cpufeature.h> -#include <crypto/sha1.h> -#include <asm/cpacf.h> - -#include "sha.h" - -static int s390_sha1_init(struct shash_desc *desc) -{ - struct s390_sha_ctx *sctx = shash_desc_ctx(desc); - - sctx->state[0] = SHA1_H0; - sctx->state[1] = SHA1_H1; - sctx->state[2] = SHA1_H2; - sctx->state[3] = SHA1_H3; - sctx->state[4] = SHA1_H4; - sctx->count = 0; - sctx->func = CPACF_KIMD_SHA_1; - - return 0; -} - -static int s390_sha1_export(struct shash_desc *desc, void *out) -{ - struct s390_sha_ctx *sctx = shash_desc_ctx(desc); - struct sha1_state *octx = out; - - octx->count = sctx->count; - memcpy(octx->state, sctx->state, sizeof(octx->state)); - memcpy(octx->buffer, sctx->buf, sizeof(octx->buffer)); - return 0; -} - -static int s390_sha1_import(struct shash_desc *desc, const void *in) -{ - struct s390_sha_ctx *sctx = shash_desc_ctx(desc); - const struct sha1_state *ictx = in; - - sctx->count = ictx->count; - memcpy(sctx->state, ictx->state, sizeof(ictx->state)); - memcpy(sctx->buf, ictx->buffer, sizeof(ictx->buffer)); - sctx->func = CPACF_KIMD_SHA_1; - return 0; -} - -static struct shash_alg alg = { - .digestsize = SHA1_DIGEST_SIZE, - .init = s390_sha1_init, - .update = s390_sha_update, - .final = s390_sha_final, - .export = s390_sha1_export, - .import = s390_sha1_import, - .descsize = sizeof(struct s390_sha_ctx), - .statesize = sizeof(struct sha1_state), - .base = { - .cra_name = "sha1", - .cra_driver_name= "sha1-s390", - .cra_priority = 300, - .cra_blocksize = SHA1_BLOCK_SIZE, - .cra_module = THIS_MODULE, - } -}; - -static int __init sha1_s390_init(void) -{ - if (!cpacf_query_func(CPACF_KIMD, CPACF_KIMD_SHA_1)) - return -ENODEV; - return crypto_register_shash(&alg); -} - -static void __exit sha1_s390_fini(void) -{ - crypto_unregister_shash(&alg); -} - -module_cpu_feature_match(S390_CPU_FEATURE_MSA, sha1_s390_init); -module_exit(sha1_s390_fini); - -MODULE_ALIAS_CRYPTO("sha1"); -MODULE_LICENSE("GPL"); -MODULE_DESCRIPTION("SHA1 Secure Hash Algorithm"); diff --git a/arch/s390/crypto/sha256_s390.c b/arch/s390/crypto/sha256_s390.c deleted file mode 100644 index 6f1ccdf93d3e..000000000000 --- a/arch/s390/crypto/sha256_s390.c +++ /dev/null @@ -1,143 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0+ -/* - * Cryptographic API. - * - * s390 implementation of the SHA256 and SHA224 Secure Hash Algorithm. - * - * s390 Version: - * Copyright IBM Corp. 2005, 2011 - * Author(s): Jan Glauber (jang@de.ibm.com) - */ -#include <crypto/internal/hash.h> -#include <linux/init.h> -#include <linux/module.h> -#include <linux/cpufeature.h> -#include <crypto/sha2.h> -#include <asm/cpacf.h> - -#include "sha.h" - -static int s390_sha256_init(struct shash_desc *desc) -{ - struct s390_sha_ctx *sctx = shash_desc_ctx(desc); - - sctx->state[0] = SHA256_H0; - sctx->state[1] = SHA256_H1; - sctx->state[2] = SHA256_H2; - sctx->state[3] = SHA256_H3; - sctx->state[4] = SHA256_H4; - sctx->state[5] = SHA256_H5; - sctx->state[6] = SHA256_H6; - sctx->state[7] = SHA256_H7; - sctx->count = 0; - sctx->func = CPACF_KIMD_SHA_256; - - return 0; -} - -static int sha256_export(struct shash_desc *desc, void *out) -{ - struct s390_sha_ctx *sctx = shash_desc_ctx(desc); - struct sha256_state *octx = out; - - octx->count = sctx->count; - memcpy(octx->state, sctx->state, sizeof(octx->state)); - memcpy(octx->buf, sctx->buf, sizeof(octx->buf)); - return 0; -} - -static int sha256_import(struct shash_desc *desc, const void *in) -{ - struct s390_sha_ctx *sctx = shash_desc_ctx(desc); - const struct sha256_state *ictx = in; - - sctx->count = ictx->count; - memcpy(sctx->state, ictx->state, sizeof(ictx->state)); - memcpy(sctx->buf, ictx->buf, sizeof(ictx->buf)); - sctx->func = CPACF_KIMD_SHA_256; - return 0; -} - -static struct shash_alg sha256_alg = { - .digestsize = SHA256_DIGEST_SIZE, - .init = s390_sha256_init, - .update = s390_sha_update, - .final = s390_sha_final, - .export = sha256_export, - .import = sha256_import, - .descsize = sizeof(struct s390_sha_ctx), - .statesize = sizeof(struct sha256_state), - .base = { - .cra_name = "sha256", - .cra_driver_name= "sha256-s390", - .cra_priority = 300, - .cra_blocksize = SHA256_BLOCK_SIZE, - .cra_module = THIS_MODULE, - } -}; - -static int s390_sha224_init(struct shash_desc *desc) -{ - struct s390_sha_ctx *sctx = shash_desc_ctx(desc); - - sctx->state[0] = SHA224_H0; - sctx->state[1] = SHA224_H1; - sctx->state[2] = SHA224_H2; - sctx->state[3] = SHA224_H3; - sctx->state[4] = SHA224_H4; - sctx->state[5] = SHA224_H5; - sctx->state[6] = SHA224_H6; - sctx->state[7] = SHA224_H7; - sctx->count = 0; - sctx->func = CPACF_KIMD_SHA_256; - - return 0; -} - -static struct shash_alg sha224_alg = { - .digestsize = SHA224_DIGEST_SIZE, - .init = s390_sha224_init, - .update = s390_sha_update, - .final = s390_sha_final, - .export = sha256_export, - .import = sha256_import, - .descsize = sizeof(struct s390_sha_ctx), - .statesize = sizeof(struct sha256_state), - .base = { - .cra_name = "sha224", - .cra_driver_name= "sha224-s390", - .cra_priority = 300, - .cra_blocksize = SHA224_BLOCK_SIZE, - .cra_module = THIS_MODULE, - } -}; - -static int __init sha256_s390_init(void) -{ - int ret; - - if (!cpacf_query_func(CPACF_KIMD, CPACF_KIMD_SHA_256)) - return -ENODEV; - ret = crypto_register_shash(&sha256_alg); - if (ret < 0) - goto out; - ret = crypto_register_shash(&sha224_alg); - if (ret < 0) - crypto_unregister_shash(&sha256_alg); -out: - return ret; -} - -static void __exit sha256_s390_fini(void) -{ - crypto_unregister_shash(&sha224_alg); - crypto_unregister_shash(&sha256_alg); -} - -module_cpu_feature_match(S390_CPU_FEATURE_MSA, sha256_s390_init); -module_exit(sha256_s390_fini); - -MODULE_ALIAS_CRYPTO("sha256"); -MODULE_ALIAS_CRYPTO("sha224"); -MODULE_LICENSE("GPL"); -MODULE_DESCRIPTION("SHA256 and SHA224 Secure Hash Algorithm"); diff --git a/arch/s390/crypto/sha3_256_s390.c b/arch/s390/crypto/sha3_256_s390.c index a84ef692f572..03bb4f4bab70 100644 --- a/arch/s390/crypto/sha3_256_s390.c +++ b/arch/s390/crypto/sha3_256_s390.c @@ -8,12 +8,14 @@ * Copyright IBM Corp. 2019 * Author(s): Joerg Schmidbauer (jschmidb@de.ibm.com) */ +#include <asm/cpacf.h> #include <crypto/internal/hash.h> -#include <linux/init.h> -#include <linux/module.h> -#include <linux/cpufeature.h> #include <crypto/sha3.h> -#include <asm/cpacf.h> +#include <linux/cpufeature.h> +#include <linux/errno.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/string.h> #include "sha.h" @@ -21,11 +23,11 @@ static int sha3_256_init(struct shash_desc *desc) { struct s390_sha_ctx *sctx = shash_desc_ctx(desc); - if (!test_facility(86)) /* msa 12 */ + sctx->first_message_part = test_facility(86); + if (!sctx->first_message_part) memset(sctx->state, 0, sizeof(sctx->state)); sctx->count = 0; sctx->func = CPACF_KIMD_SHA3_256; - sctx->first_message_part = 1; return 0; } @@ -33,25 +35,34 @@ static int sha3_256_init(struct shash_desc *desc) static int sha3_256_export(struct shash_desc *desc, void *out) { struct s390_sha_ctx *sctx = shash_desc_ctx(desc); - struct sha3_state *octx = out; - - octx->rsiz = sctx->count; - memcpy(octx->st, sctx->state, sizeof(octx->st)); - memcpy(octx->buf, sctx->buf, sizeof(octx->buf)); - octx->partial = sctx->first_message_part; - + union { + u8 *u8; + u64 *u64; + } p = { .u8 = out }; + int i; + + if (sctx->first_message_part) { + memset(out, 0, SHA3_STATE_SIZE); + return 0; + } + for (i = 0; i < SHA3_STATE_SIZE / 8; i++) + put_unaligned(le64_to_cpu(sctx->sha3.state[i]), p.u64++); return 0; } static int sha3_256_import(struct shash_desc *desc, const void *in) { struct s390_sha_ctx *sctx = shash_desc_ctx(desc); - const struct sha3_state *ictx = in; - - sctx->count = ictx->rsiz; - memcpy(sctx->state, ictx->st, sizeof(ictx->st)); - memcpy(sctx->buf, ictx->buf, sizeof(ictx->buf)); - sctx->first_message_part = ictx->partial; + union { + const u8 *u8; + const u64 *u64; + } p = { .u8 = in }; + int i; + + for (i = 0; i < SHA3_STATE_SIZE / 8; i++) + sctx->sha3.state[i] = cpu_to_le64(get_unaligned(p.u64++)); + sctx->count = 0; + sctx->first_message_part = 0; sctx->func = CPACF_KIMD_SHA3_256; return 0; @@ -60,30 +71,26 @@ static int sha3_256_import(struct shash_desc *desc, const void *in) static int sha3_224_import(struct shash_desc *desc, const void *in) { struct s390_sha_ctx *sctx = shash_desc_ctx(desc); - const struct sha3_state *ictx = in; - sctx->count = ictx->rsiz; - memcpy(sctx->state, ictx->st, sizeof(ictx->st)); - memcpy(sctx->buf, ictx->buf, sizeof(ictx->buf)); - sctx->first_message_part = ictx->partial; + sha3_256_import(desc, in); sctx->func = CPACF_KIMD_SHA3_224; - return 0; } static struct shash_alg sha3_256_alg = { .digestsize = SHA3_256_DIGEST_SIZE, /* = 32 */ .init = sha3_256_init, - .update = s390_sha_update, - .final = s390_sha_final, + .update = s390_sha_update_blocks, + .finup = s390_sha_finup, .export = sha3_256_export, .import = sha3_256_import, - .descsize = sizeof(struct s390_sha_ctx), - .statesize = sizeof(struct sha3_state), + .descsize = S390_SHA_CTX_SIZE, + .statesize = SHA3_STATE_SIZE, .base = { .cra_name = "sha3-256", .cra_driver_name = "sha3-256-s390", .cra_priority = 300, + .cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY, .cra_blocksize = SHA3_256_BLOCK_SIZE, .cra_module = THIS_MODULE, } @@ -93,28 +100,25 @@ static int sha3_224_init(struct shash_desc *desc) { struct s390_sha_ctx *sctx = shash_desc_ctx(desc); - if (!test_facility(86)) /* msa 12 */ - memset(sctx->state, 0, sizeof(sctx->state)); - sctx->count = 0; + sha3_256_init(desc); sctx->func = CPACF_KIMD_SHA3_224; - sctx->first_message_part = 1; - return 0; } static struct shash_alg sha3_224_alg = { .digestsize = SHA3_224_DIGEST_SIZE, .init = sha3_224_init, - .update = s390_sha_update, - .final = s390_sha_final, + .update = s390_sha_update_blocks, + .finup = s390_sha_finup, .export = sha3_256_export, /* same as for 256 */ .import = sha3_224_import, /* function code different! */ - .descsize = sizeof(struct s390_sha_ctx), - .statesize = sizeof(struct sha3_state), + .descsize = S390_SHA_CTX_SIZE, + .statesize = SHA3_STATE_SIZE, .base = { .cra_name = "sha3-224", .cra_driver_name = "sha3-224-s390", .cra_priority = 300, + .cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY, .cra_blocksize = SHA3_224_BLOCK_SIZE, .cra_module = THIS_MODULE, } diff --git a/arch/s390/crypto/sha3_512_s390.c b/arch/s390/crypto/sha3_512_s390.c index 07528fc98ff7..a5c9690eecb1 100644 --- a/arch/s390/crypto/sha3_512_s390.c +++ b/arch/s390/crypto/sha3_512_s390.c @@ -7,12 +7,14 @@ * Copyright IBM Corp. 2019 * Author(s): Joerg Schmidbauer (jschmidb@de.ibm.com) */ +#include <asm/cpacf.h> #include <crypto/internal/hash.h> -#include <linux/init.h> -#include <linux/module.h> -#include <linux/cpufeature.h> #include <crypto/sha3.h> -#include <asm/cpacf.h> +#include <linux/cpufeature.h> +#include <linux/errno.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/string.h> #include "sha.h" @@ -20,11 +22,11 @@ static int sha3_512_init(struct shash_desc *desc) { struct s390_sha_ctx *sctx = shash_desc_ctx(desc); - if (!test_facility(86)) /* msa 12 */ + sctx->first_message_part = test_facility(86); + if (!sctx->first_message_part) memset(sctx->state, 0, sizeof(sctx->state)); sctx->count = 0; sctx->func = CPACF_KIMD_SHA3_512; - sctx->first_message_part = 1; return 0; } @@ -32,30 +34,34 @@ static int sha3_512_init(struct shash_desc *desc) static int sha3_512_export(struct shash_desc *desc, void *out) { struct s390_sha_ctx *sctx = shash_desc_ctx(desc); - struct sha3_state *octx = out; - - octx->rsiz = sctx->count; - octx->rsizw = sctx->count >> 32; - - memcpy(octx->st, sctx->state, sizeof(octx->st)); - memcpy(octx->buf, sctx->buf, sizeof(octx->buf)); - octx->partial = sctx->first_message_part; - + union { + u8 *u8; + u64 *u64; + } p = { .u8 = out }; + int i; + + if (sctx->first_message_part) { + memset(out, 0, SHA3_STATE_SIZE); + return 0; + } + for (i = 0; i < SHA3_STATE_SIZE / 8; i++) + put_unaligned(le64_to_cpu(sctx->sha3.state[i]), p.u64++); return 0; } static int sha3_512_import(struct shash_desc *desc, const void *in) { struct s390_sha_ctx *sctx = shash_desc_ctx(desc); - const struct sha3_state *ictx = in; - - if (unlikely(ictx->rsizw)) - return -ERANGE; - sctx->count = ictx->rsiz; - - memcpy(sctx->state, ictx->st, sizeof(ictx->st)); - memcpy(sctx->buf, ictx->buf, sizeof(ictx->buf)); - sctx->first_message_part = ictx->partial; + union { + const u8 *u8; + const u64 *u64; + } p = { .u8 = in }; + int i; + + for (i = 0; i < SHA3_STATE_SIZE / 8; i++) + sctx->sha3.state[i] = cpu_to_le64(get_unaligned(p.u64++)); + sctx->count = 0; + sctx->first_message_part = 0; sctx->func = CPACF_KIMD_SHA3_512; return 0; @@ -64,33 +70,26 @@ static int sha3_512_import(struct shash_desc *desc, const void *in) static int sha3_384_import(struct shash_desc *desc, const void *in) { struct s390_sha_ctx *sctx = shash_desc_ctx(desc); - const struct sha3_state *ictx = in; - - if (unlikely(ictx->rsizw)) - return -ERANGE; - sctx->count = ictx->rsiz; - memcpy(sctx->state, ictx->st, sizeof(ictx->st)); - memcpy(sctx->buf, ictx->buf, sizeof(ictx->buf)); - sctx->first_message_part = ictx->partial; + sha3_512_import(desc, in); sctx->func = CPACF_KIMD_SHA3_384; - return 0; } static struct shash_alg sha3_512_alg = { .digestsize = SHA3_512_DIGEST_SIZE, .init = sha3_512_init, - .update = s390_sha_update, - .final = s390_sha_final, + .update = s390_sha_update_blocks, + .finup = s390_sha_finup, .export = sha3_512_export, .import = sha3_512_import, - .descsize = sizeof(struct s390_sha_ctx), - .statesize = sizeof(struct sha3_state), + .descsize = S390_SHA_CTX_SIZE, + .statesize = SHA3_STATE_SIZE, .base = { .cra_name = "sha3-512", .cra_driver_name = "sha3-512-s390", .cra_priority = 300, + .cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY, .cra_blocksize = SHA3_512_BLOCK_SIZE, .cra_module = THIS_MODULE, } @@ -102,28 +101,25 @@ static int sha3_384_init(struct shash_desc *desc) { struct s390_sha_ctx *sctx = shash_desc_ctx(desc); - if (!test_facility(86)) /* msa 12 */ - memset(sctx->state, 0, sizeof(sctx->state)); - sctx->count = 0; + sha3_512_init(desc); sctx->func = CPACF_KIMD_SHA3_384; - sctx->first_message_part = 1; - return 0; } static struct shash_alg sha3_384_alg = { .digestsize = SHA3_384_DIGEST_SIZE, .init = sha3_384_init, - .update = s390_sha_update, - .final = s390_sha_final, + .update = s390_sha_update_blocks, + .finup = s390_sha_finup, .export = sha3_512_export, /* same as for 512 */ .import = sha3_384_import, /* function code different! */ - .descsize = sizeof(struct s390_sha_ctx), - .statesize = sizeof(struct sha3_state), + .descsize = S390_SHA_CTX_SIZE, + .statesize = SHA3_STATE_SIZE, .base = { .cra_name = "sha3-384", .cra_driver_name = "sha3-384-s390", .cra_priority = 300, + .cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY, .cra_blocksize = SHA3_384_BLOCK_SIZE, .cra_ctxsize = sizeof(struct s390_sha_ctx), .cra_module = THIS_MODULE, diff --git a/arch/s390/crypto/sha512_s390.c b/arch/s390/crypto/sha512_s390.c deleted file mode 100644 index 04f11c407763..000000000000 --- a/arch/s390/crypto/sha512_s390.c +++ /dev/null @@ -1,149 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0+ -/* - * Cryptographic API. - * - * s390 implementation of the SHA512 and SHA38 Secure Hash Algorithm. - * - * Copyright IBM Corp. 2007 - * Author(s): Jan Glauber (jang@de.ibm.com) - */ -#include <crypto/internal/hash.h> -#include <crypto/sha2.h> -#include <linux/errno.h> -#include <linux/init.h> -#include <linux/kernel.h> -#include <linux/module.h> -#include <linux/cpufeature.h> -#include <asm/cpacf.h> - -#include "sha.h" - -static int sha512_init(struct shash_desc *desc) -{ - struct s390_sha_ctx *ctx = shash_desc_ctx(desc); - - *(__u64 *)&ctx->state[0] = SHA512_H0; - *(__u64 *)&ctx->state[2] = SHA512_H1; - *(__u64 *)&ctx->state[4] = SHA512_H2; - *(__u64 *)&ctx->state[6] = SHA512_H3; - *(__u64 *)&ctx->state[8] = SHA512_H4; - *(__u64 *)&ctx->state[10] = SHA512_H5; - *(__u64 *)&ctx->state[12] = SHA512_H6; - *(__u64 *)&ctx->state[14] = SHA512_H7; - ctx->count = 0; - ctx->func = CPACF_KIMD_SHA_512; - - return 0; -} - -static int sha512_export(struct shash_desc *desc, void *out) -{ - struct s390_sha_ctx *sctx = shash_desc_ctx(desc); - struct sha512_state *octx = out; - - octx->count[0] = sctx->count; - octx->count[1] = 0; - memcpy(octx->state, sctx->state, sizeof(octx->state)); - memcpy(octx->buf, sctx->buf, sizeof(octx->buf)); - return 0; -} - -static int sha512_import(struct shash_desc *desc, const void *in) -{ - struct s390_sha_ctx *sctx = shash_desc_ctx(desc); - const struct sha512_state *ictx = in; - - if (unlikely(ictx->count[1])) - return -ERANGE; - sctx->count = ictx->count[0]; - - memcpy(sctx->state, ictx->state, sizeof(ictx->state)); - memcpy(sctx->buf, ictx->buf, sizeof(ictx->buf)); - sctx->func = CPACF_KIMD_SHA_512; - return 0; -} - -static struct shash_alg sha512_alg = { - .digestsize = SHA512_DIGEST_SIZE, - .init = sha512_init, - .update = s390_sha_update, - .final = s390_sha_final, - .export = sha512_export, - .import = sha512_import, - .descsize = sizeof(struct s390_sha_ctx), - .statesize = sizeof(struct sha512_state), - .base = { - .cra_name = "sha512", - .cra_driver_name= "sha512-s390", - .cra_priority = 300, - .cra_blocksize = SHA512_BLOCK_SIZE, - .cra_module = THIS_MODULE, - } -}; - -MODULE_ALIAS_CRYPTO("sha512"); - -static int sha384_init(struct shash_desc *desc) -{ - struct s390_sha_ctx *ctx = shash_desc_ctx(desc); - - *(__u64 *)&ctx->state[0] = SHA384_H0; - *(__u64 *)&ctx->state[2] = SHA384_H1; - *(__u64 *)&ctx->state[4] = SHA384_H2; - *(__u64 *)&ctx->state[6] = SHA384_H3; - *(__u64 *)&ctx->state[8] = SHA384_H4; - *(__u64 *)&ctx->state[10] = SHA384_H5; - *(__u64 *)&ctx->state[12] = SHA384_H6; - *(__u64 *)&ctx->state[14] = SHA384_H7; - ctx->count = 0; - ctx->func = CPACF_KIMD_SHA_512; - - return 0; -} - -static struct shash_alg sha384_alg = { - .digestsize = SHA384_DIGEST_SIZE, - .init = sha384_init, - .update = s390_sha_update, - .final = s390_sha_final, - .export = sha512_export, - .import = sha512_import, - .descsize = sizeof(struct s390_sha_ctx), - .statesize = sizeof(struct sha512_state), - .base = { - .cra_name = "sha384", - .cra_driver_name= "sha384-s390", - .cra_priority = 300, - .cra_blocksize = SHA384_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct s390_sha_ctx), - .cra_module = THIS_MODULE, - } -}; - -MODULE_ALIAS_CRYPTO("sha384"); - -static int __init init(void) -{ - int ret; - - if (!cpacf_query_func(CPACF_KIMD, CPACF_KIMD_SHA_512)) - return -ENODEV; - if ((ret = crypto_register_shash(&sha512_alg)) < 0) - goto out; - if ((ret = crypto_register_shash(&sha384_alg)) < 0) - crypto_unregister_shash(&sha512_alg); -out: - return ret; -} - -static void __exit fini(void) -{ - crypto_unregister_shash(&sha512_alg); - crypto_unregister_shash(&sha384_alg); -} - -module_cpu_feature_match(S390_CPU_FEATURE_MSA, init); -module_exit(fini); - -MODULE_LICENSE("GPL"); -MODULE_DESCRIPTION("SHA512 and SHA-384 Secure Hash Algorithm"); diff --git a/arch/s390/crypto/sha_common.c b/arch/s390/crypto/sha_common.c index 961d7d522af1..d6f839618794 100644 --- a/arch/s390/crypto/sha_common.c +++ b/arch/s390/crypto/sha_common.c @@ -9,54 +9,38 @@ */ #include <crypto/internal/hash.h> +#include <linux/export.h> #include <linux/module.h> #include <asm/cpacf.h> #include "sha.h" -int s390_sha_update(struct shash_desc *desc, const u8 *data, unsigned int len) +int s390_sha_update_blocks(struct shash_desc *desc, const u8 *data, + unsigned int len) { - struct s390_sha_ctx *ctx = shash_desc_ctx(desc); unsigned int bsize = crypto_shash_blocksize(desc->tfm); - unsigned int index, n; + struct s390_sha_ctx *ctx = shash_desc_ctx(desc); + unsigned int n; int fc; - /* how much is already in the buffer? */ - index = ctx->count % bsize; - ctx->count += len; - - if ((index + len) < bsize) - goto store; - fc = ctx->func; if (ctx->first_message_part) - fc |= test_facility(86) ? CPACF_KIMD_NIP : 0; - - /* process one stored block */ - if (index) { - memcpy(ctx->buf + index, data, bsize - index); - cpacf_kimd(fc, ctx->state, ctx->buf, bsize); - ctx->first_message_part = 0; - fc &= ~CPACF_KIMD_NIP; - data += bsize - index; - len -= bsize - index; - index = 0; - } + fc |= CPACF_KIMD_NIP; /* process as many blocks as possible */ - if (len >= bsize) { - n = (len / bsize) * bsize; - cpacf_kimd(fc, ctx->state, data, n); - ctx->first_message_part = 0; - data += n; - len -= n; + n = (len / bsize) * bsize; + ctx->count += n; + switch (ctx->func) { + case CPACF_KLMD_SHA_512: + case CPACF_KLMD_SHA3_384: + if (ctx->count < n) + ctx->sha512.count_hi++; + break; } -store: - if (len) - memcpy(ctx->buf + index , data, len); - - return 0; + cpacf_kimd(fc, ctx->state, data, n); + ctx->first_message_part = 0; + return len - n; } -EXPORT_SYMBOL_GPL(s390_sha_update); +EXPORT_SYMBOL_GPL(s390_sha_update_blocks); static int s390_crypto_shash_parmsize(int func) { @@ -77,15 +61,15 @@ static int s390_crypto_shash_parmsize(int func) } } -int s390_sha_final(struct shash_desc *desc, u8 *out) +int s390_sha_finup(struct shash_desc *desc, const u8 *src, unsigned int len, + u8 *out) { struct s390_sha_ctx *ctx = shash_desc_ctx(desc); - unsigned int bsize = crypto_shash_blocksize(desc->tfm); - u64 bits; - unsigned int n; int mbl_offset, fc; + u64 bits; + + ctx->count += len; - n = ctx->count % bsize; bits = ctx->count * 8; mbl_offset = s390_crypto_shash_parmsize(ctx->func); if (mbl_offset < 0) @@ -95,17 +79,16 @@ int s390_sha_final(struct shash_desc *desc, u8 *out) /* set total msg bit length (mbl) in CPACF parmblock */ switch (ctx->func) { - case CPACF_KLMD_SHA_1: - case CPACF_KLMD_SHA_256: - memcpy(ctx->state + mbl_offset, &bits, sizeof(bits)); - break; case CPACF_KLMD_SHA_512: - /* - * the SHA512 parmblock has a 128-bit mbl field, clear - * high-order u64 field, copy bits to low-order u64 field - */ - memset(ctx->state + mbl_offset, 0x00, sizeof(bits)); + /* The SHA512 parmblock has a 128-bit mbl field. */ + if (ctx->count < len) + ctx->sha512.count_hi++; + ctx->sha512.count_hi <<= 3; + ctx->sha512.count_hi |= ctx->count >> 61; mbl_offset += sizeof(u64) / sizeof(u32); + fallthrough; + case CPACF_KLMD_SHA_1: + case CPACF_KLMD_SHA_256: memcpy(ctx->state + mbl_offset, &bits, sizeof(bits)); break; case CPACF_KLMD_SHA3_224: @@ -121,16 +104,14 @@ int s390_sha_final(struct shash_desc *desc, u8 *out) fc |= test_facility(86) ? CPACF_KLMD_DUFOP : 0; if (ctx->first_message_part) fc |= CPACF_KLMD_NIP; - cpacf_klmd(fc, ctx->state, ctx->buf, n); + cpacf_klmd(fc, ctx->state, src, len); /* copy digest to out */ memcpy(out, ctx->state, crypto_shash_digestsize(desc->tfm)); - /* wipe context */ - memset(ctx, 0, sizeof *ctx); return 0; } -EXPORT_SYMBOL_GPL(s390_sha_final); +EXPORT_SYMBOL_GPL(s390_sha_finup); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("s390 SHA cipher common functions"); diff --git a/arch/s390/hypfs/hypfs.h b/arch/s390/hypfs/hypfs.h index 83ebf54cca6b..4dc2e068e0ff 100644 --- a/arch/s390/hypfs/hypfs.h +++ b/arch/s390/hypfs/hypfs.h @@ -48,7 +48,7 @@ void hypfs_sprp_exit(void); int __hypfs_fs_init(void); -static inline int hypfs_fs_init(void) +static __always_inline int hypfs_fs_init(void) { if (IS_ENABLED(CONFIG_S390_HYPFS_FS)) return __hypfs_fs_init(); diff --git a/arch/s390/hypfs/hypfs_dbfs.c b/arch/s390/hypfs/hypfs_dbfs.c index 5d9effb0867c..41a0d2066fa0 100644 --- a/arch/s390/hypfs/hypfs_dbfs.c +++ b/arch/s390/hypfs/hypfs_dbfs.c @@ -6,6 +6,7 @@ * Author(s): Michael Holzheu <holzheu@linux.vnet.ibm.com> */ +#include <linux/security.h> #include <linux/slab.h> #include "hypfs.h" @@ -66,23 +67,27 @@ static long dbfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg) long rc; mutex_lock(&df->lock); - if (df->unlocked_ioctl) - rc = df->unlocked_ioctl(file, cmd, arg); - else - rc = -ENOTTY; + rc = df->unlocked_ioctl(file, cmd, arg); mutex_unlock(&df->lock); return rc; } -static const struct file_operations dbfs_ops = { +static const struct file_operations dbfs_ops_ioctl = { .read = dbfs_read, .unlocked_ioctl = dbfs_ioctl, }; +static const struct file_operations dbfs_ops = { + .read = dbfs_read, +}; + void hypfs_dbfs_create_file(struct hypfs_dbfs_file *df) { - df->dentry = debugfs_create_file(df->name, 0400, dbfs_dir, df, - &dbfs_ops); + const struct file_operations *fops = &dbfs_ops; + + if (df->unlocked_ioctl && !security_locked_down(LOCKDOWN_DEBUGFS)) + fops = &dbfs_ops_ioctl; + df->dentry = debugfs_create_file(df->name, 0400, dbfs_dir, df, fops); mutex_init(&df->lock); } diff --git a/arch/s390/hypfs/hypfs_diag.h b/arch/s390/hypfs/hypfs_diag.h index 7090eff27fef..b5218135b8fe 100644 --- a/arch/s390/hypfs/hypfs_diag.h +++ b/arch/s390/hypfs/hypfs_diag.h @@ -19,7 +19,7 @@ int diag204_store(void *buf, int pages); int __hypfs_diag_fs_init(void); void __hypfs_diag_fs_exit(void); -static inline int hypfs_diag_fs_init(void) +static __always_inline int hypfs_diag_fs_init(void) { if (IS_ENABLED(CONFIG_S390_HYPFS_FS)) return __hypfs_diag_fs_init(); diff --git a/arch/s390/hypfs/hypfs_diag0c.c b/arch/s390/hypfs/hypfs_diag0c.c index 4131f0daa5ea..61220e717af0 100644 --- a/arch/s390/hypfs/hypfs_diag0c.c +++ b/arch/s390/hypfs/hypfs_diag0c.c @@ -9,6 +9,7 @@ #include <linux/slab.h> #include <linux/cpu.h> +#include <asm/machine.h> #include <asm/diag.h> #include <asm/hypfs.h> #include "hypfs.h" @@ -107,7 +108,7 @@ static struct hypfs_dbfs_file dbfs_file_0c = { */ int __init hypfs_diag0c_init(void) { - if (!MACHINE_IS_VM) + if (!machine_is_vm()) return 0; hypfs_dbfs_create_file(&dbfs_file_0c); return 0; @@ -118,7 +119,7 @@ int __init hypfs_diag0c_init(void) */ void hypfs_diag0c_exit(void) { - if (!MACHINE_IS_VM) + if (!machine_is_vm()) return; hypfs_dbfs_remove_file(&dbfs_file_0c); } diff --git a/arch/s390/hypfs/hypfs_diag_fs.c b/arch/s390/hypfs/hypfs_diag_fs.c index 00a6d370a280..ede951dc0085 100644 --- a/arch/s390/hypfs/hypfs_diag_fs.c +++ b/arch/s390/hypfs/hypfs_diag_fs.c @@ -16,6 +16,7 @@ #include <linux/string.h> #include <linux/vmalloc.h> #include <linux/mm.h> +#include <asm/machine.h> #include <asm/diag.h> #include <asm/ebcdic.h> #include "hypfs_diag.h" @@ -208,6 +209,8 @@ static int hypfs_create_cpu_files(struct dentry *cpus_dir, void *cpu_info) snprintf(buffer, TMP_SIZE, "%d", cpu_info__cpu_addr(diag204_get_info_type(), cpu_info)); cpu_dir = hypfs_mkdir(cpus_dir, buffer); + if (IS_ERR(cpu_dir)) + return PTR_ERR(cpu_dir); rc = hypfs_create_u64(cpu_dir, "mgmtime", cpu_info__acc_time(diag204_get_info_type(), cpu_info) - cpu_info__lp_time(diag204_get_info_type(), cpu_info)); @@ -382,7 +385,7 @@ static void diag224_delete_name_table(void) int __init __hypfs_diag_fs_init(void) { - if (MACHINE_IS_LPAR) + if (machine_is_lpar()) return diag224_get_name_table(); return 0; } diff --git a/arch/s390/hypfs/hypfs_vm.c b/arch/s390/hypfs/hypfs_vm.c index 3db40ad853e0..4db2895e4da3 100644 --- a/arch/s390/hypfs/hypfs_vm.c +++ b/arch/s390/hypfs/hypfs_vm.c @@ -11,6 +11,7 @@ #include <linux/string.h> #include <linux/vmalloc.h> #include <asm/extable.h> +#include <asm/machine.h> #include <asm/diag.h> #include <asm/ebcdic.h> #include <asm/timex.h> @@ -121,7 +122,7 @@ static struct hypfs_dbfs_file dbfs_file_2fc = { int hypfs_vm_init(void) { - if (!MACHINE_IS_VM) + if (!machine_is_vm()) return 0; if (diag2fc(0, all_guests, NULL) > 0) diag2fc_guest_query = all_guests; @@ -135,7 +136,7 @@ int hypfs_vm_init(void) void hypfs_vm_exit(void) { - if (!MACHINE_IS_VM) + if (!machine_is_vm()) return; hypfs_dbfs_remove_file(&dbfs_file_2fc); } diff --git a/arch/s390/hypfs/inode.c b/arch/s390/hypfs/inode.c index d428635abf08..96409573c75d 100644 --- a/arch/s390/hypfs/inode.c +++ b/arch/s390/hypfs/inode.c @@ -24,6 +24,7 @@ #include <linux/kobject.h> #include <linux/seq_file.h> #include <linux/uio.h> +#include <asm/machine.h> #include <asm/ebcdic.h> #include "hypfs.h" @@ -184,7 +185,7 @@ static ssize_t hypfs_write_iter(struct kiocb *iocb, struct iov_iter *from) goto out; } hypfs_delete_tree(sb->s_root); - if (MACHINE_IS_VM) + if (machine_is_vm()) rc = hypfs_vm_create_files(sb->s_root); else rc = hypfs_diag_create_files(sb->s_root); @@ -273,7 +274,7 @@ static int hypfs_fill_super(struct super_block *sb, struct fs_context *fc) sb->s_root = root_dentry = d_make_root(root_inode); if (!root_dentry) return -ENOMEM; - if (MACHINE_IS_VM) + if (machine_is_vm()) rc = hypfs_vm_create_files(root_dentry); else rc = hypfs_diag_create_files(root_dentry); @@ -341,7 +342,7 @@ static struct dentry *hypfs_create_file(struct dentry *parent, const char *name, struct inode *inode; inode_lock(d_inode(parent)); - dentry = lookup_one_len(name, parent, strlen(name)); + dentry = lookup_noperm(&QSTR(name), parent); if (IS_ERR(dentry)) { dentry = ERR_PTR(-ENOMEM); goto fail; diff --git a/arch/s390/include/asm/abs_lowcore.h b/arch/s390/include/asm/abs_lowcore.h index d20df8c923fc..317c07c09ae4 100644 --- a/arch/s390/include/asm/abs_lowcore.h +++ b/arch/s390/include/asm/abs_lowcore.h @@ -2,7 +2,7 @@ #ifndef _ASM_S390_ABS_LOWCORE_H #define _ASM_S390_ABS_LOWCORE_H -#include <asm/sections.h> +#include <linux/smp.h> #include <asm/lowcore.h> #define ABS_LOWCORE_MAP_SIZE (NR_CPUS * sizeof(struct lowcore)) @@ -25,11 +25,4 @@ static inline void put_abs_lowcore(struct lowcore *lc) put_cpu(); } -extern int __bootdata_preserved(relocate_lowcore); - -static inline int have_relocated_lowcore(void) -{ - return relocate_lowcore; -} - #endif /* _ASM_S390_ABS_LOWCORE_H */ diff --git a/arch/s390/include/asm/alternative.h b/arch/s390/include/asm/alternative.h index 73e781b56bfe..1c56480def9e 100644 --- a/arch/s390/include/asm/alternative.h +++ b/arch/s390/include/asm/alternative.h @@ -32,8 +32,8 @@ #define ALT_CTX_ALL (ALT_CTX_EARLY | ALT_CTX_LATE) #define ALT_TYPE_FACILITY 0 -#define ALT_TYPE_SPEC 1 -#define ALT_TYPE_LOWCORE 2 +#define ALT_TYPE_FEATURE 1 +#define ALT_TYPE_SPEC 2 #define ALT_DATA_SHIFT 0 #define ALT_TYPE_SHIFT 20 @@ -43,14 +43,15 @@ ALT_TYPE_FACILITY << ALT_TYPE_SHIFT | \ (facility) << ALT_DATA_SHIFT) +#define ALT_FEATURE(feature) (ALT_CTX_EARLY << ALT_CTX_SHIFT | \ + ALT_TYPE_FEATURE << ALT_TYPE_SHIFT | \ + (feature) << ALT_DATA_SHIFT) + #define ALT_SPEC(facility) (ALT_CTX_LATE << ALT_CTX_SHIFT | \ ALT_TYPE_SPEC << ALT_TYPE_SHIFT | \ (facility) << ALT_DATA_SHIFT) -#define ALT_LOWCORE (ALT_CTX_EARLY << ALT_CTX_SHIFT | \ - ALT_TYPE_LOWCORE << ALT_TYPE_SHIFT) - -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <linux/types.h> #include <linux/stddef.h> @@ -182,7 +183,7 @@ static inline void apply_alternatives(struct alt_instr *start, struct alt_instr /* Use this macro if clobbers are needed without inputs. */ #define ASM_NO_INPUT_CLOBBER(clobber...) : clobber -#else /* __ASSEMBLY__ */ +#else /* __ASSEMBLER__ */ /* * Issue one struct alt_instr descriptor entry (need to put it into @@ -232,6 +233,6 @@ static inline void apply_alternatives(struct alt_instr *start, struct alt_instr .popsection .endm -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* _ASM_S390_ALTERNATIVE_H */ diff --git a/arch/s390/include/asm/ap.h b/arch/s390/include/asm/ap.h index 395b02d6a133..352108727d7e 100644 --- a/arch/s390/include/asm/ap.h +++ b/arch/s390/include/asm/ap.h @@ -103,7 +103,7 @@ struct ap_tapq_hwinfo { unsigned int accel : 1; /* A */ unsigned int ep11 : 1; /* X */ unsigned int apxa : 1; /* APXA */ - unsigned int : 1; + unsigned int slcf : 1; /* Cmd filtering avail. */ unsigned int class : 8; unsigned int bs : 2; /* SE bind/assoc */ unsigned int : 14; diff --git a/arch/s390/include/asm/appldata.h b/arch/s390/include/asm/appldata.h index a92ebbc7aa7a..99b2902c10fd 100644 --- a/arch/s390/include/asm/appldata.h +++ b/arch/s390/include/asm/appldata.h @@ -9,6 +9,7 @@ #define _ASM_S390_APPLDATA_H #include <linux/io.h> +#include <asm/machine.h> #include <asm/diag.h> #define APPLDATA_START_INTERVAL_REC 0x80 @@ -48,7 +49,7 @@ static inline int appldata_asm(struct appldata_parameter_list *parm_list, { int ry; - if (!MACHINE_IS_VM) + if (!machine_is_vm()) return -EOPNOTSUPP; parm_list->diag = 0xdc; parm_list->function = fn; diff --git a/arch/s390/include/asm/asce.h b/arch/s390/include/asm/asce.h new file mode 100644 index 000000000000..f6dfaaba735a --- /dev/null +++ b/arch/s390/include/asm/asce.h @@ -0,0 +1,36 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef _ASM_S390_ASCE_H +#define _ASM_S390_ASCE_H + +#include <linux/thread_info.h> +#include <linux/irqflags.h> +#include <asm/lowcore.h> +#include <asm/ctlreg.h> + +static inline bool enable_sacf_uaccess(void) +{ + unsigned long flags; + + if (test_thread_flag(TIF_ASCE_PRIMARY)) + return true; + local_irq_save(flags); + local_ctl_load(1, &get_lowcore()->kernel_asce); + set_thread_flag(TIF_ASCE_PRIMARY); + local_irq_restore(flags); + return false; +} + +static inline void disable_sacf_uaccess(bool previous) +{ + unsigned long flags; + + if (previous) + return; + local_irq_save(flags); + local_ctl_load(1, &get_lowcore()->user_asce); + clear_thread_flag(TIF_ASCE_PRIMARY); + local_irq_restore(flags); +} + +#endif /* _ASM_S390_ASCE_H */ diff --git a/arch/s390/include/asm/asm-const.h b/arch/s390/include/asm/asm-const.h index 11f615eb0066..1cfffad9eea0 100644 --- a/arch/s390/include/asm/asm-const.h +++ b/arch/s390/include/asm/asm-const.h @@ -2,7 +2,7 @@ #ifndef _ASM_S390_ASM_CONST_H #define _ASM_S390_ASM_CONST_H -#ifdef __ASSEMBLY__ +#ifdef __ASSEMBLER__ # define stringify_in_c(...) __VA_ARGS__ #else /* This version of stringify will deal with commas... */ diff --git a/arch/s390/include/asm/asm-extable.h b/arch/s390/include/asm/asm-extable.h index 4a6b0a8b6412..d23ea0c94e4e 100644 --- a/arch/s390/include/asm/asm-extable.h +++ b/arch/s390/include/asm/asm-extable.h @@ -9,11 +9,13 @@ #define EX_TYPE_NONE 0 #define EX_TYPE_FIXUP 1 #define EX_TYPE_BPF 2 -#define EX_TYPE_UA_STORE 3 -#define EX_TYPE_UA_LOAD_MEM 4 +#define EX_TYPE_UA_FAULT 3 #define EX_TYPE_UA_LOAD_REG 5 #define EX_TYPE_UA_LOAD_REGPAIR 6 #define EX_TYPE_ZEROPAD 7 +#define EX_TYPE_FPC 8 +#define EX_TYPE_UA_MVCOS_TO 9 +#define EX_TYPE_UA_MVCOS_FROM 10 #define EX_DATA_REG_ERR_SHIFT 0 #define EX_DATA_REG_ERR GENMASK(3, 0) @@ -69,11 +71,8 @@ #define EX_TABLE_AMODE31(_fault, _target) \ __EX_TABLE(.amode31.ex_table, _fault, _target, EX_TYPE_FIXUP, __stringify(%%r0), __stringify(%%r0), 0) -#define EX_TABLE_UA_STORE(_fault, _target, _regerr) \ - __EX_TABLE(__ex_table, _fault, _target, EX_TYPE_UA_STORE, _regerr, _regerr, 0) - -#define EX_TABLE_UA_LOAD_MEM(_fault, _target, _regerr, _regmem, _len) \ - __EX_TABLE(__ex_table, _fault, _target, EX_TYPE_UA_LOAD_MEM, _regerr, _regmem, _len) +#define EX_TABLE_UA_FAULT(_fault, _target, _regerr) \ + __EX_TABLE(__ex_table, _fault, _target, EX_TYPE_UA_FAULT, _regerr, _regerr, 0) #define EX_TABLE_UA_LOAD_REG(_fault, _target, _regerr, _regzero) \ __EX_TABLE(__ex_table, _fault, _target, EX_TYPE_UA_LOAD_REG, _regerr, _regzero, 0) @@ -84,4 +83,13 @@ #define EX_TABLE_ZEROPAD(_fault, _target, _regdata, _regaddr) \ __EX_TABLE(__ex_table, _fault, _target, EX_TYPE_ZEROPAD, _regdata, _regaddr, 0) +#define EX_TABLE_FPC(_fault, _target) \ + __EX_TABLE(__ex_table, _fault, _target, EX_TYPE_FPC, __stringify(%%r0), __stringify(%%r0), 0) + +#define EX_TABLE_UA_MVCOS_TO(_fault, _target) \ + __EX_TABLE(__ex_table, _fault, _target, EX_TYPE_UA_MVCOS_TO, __stringify(%%r0), __stringify(%%r0), 0) + +#define EX_TABLE_UA_MVCOS_FROM(_fault, _target) \ + __EX_TABLE(__ex_table, _fault, _target, EX_TYPE_UA_MVCOS_FROM, __stringify(%%r0), __stringify(%%r0), 0) + #endif /* __ASM_EXTABLE_H */ diff --git a/arch/s390/include/asm/asm.h b/arch/s390/include/asm/asm.h index ec011b94af2a..e9062b01e2a2 100644 --- a/arch/s390/include/asm/asm.h +++ b/arch/s390/include/asm/asm.h @@ -28,7 +28,7 @@ * [var] also contains the program mask. CC_TRANSFORM() moves the condition * code to the two least significant bits and sets all other bits to zero. */ -#if defined(__GCC_ASM_FLAG_OUTPUTS__) && !(IS_ENABLED(CONFIG_GCC_ASM_FLAG_OUTPUT_BROKEN)) +#if defined(__GCC_ASM_FLAG_OUTPUTS__) && !(IS_ENABLED(CONFIG_CC_ASM_FLAG_OUTPUT_BROKEN)) #define __HAVE_ASM_FLAG_OUTPUTS__ diff --git a/arch/s390/include/asm/atomic.h b/arch/s390/include/asm/atomic.h index 6723fca64018..b36dd6a1d652 100644 --- a/arch/s390/include/asm/atomic.h +++ b/arch/s390/include/asm/atomic.h @@ -17,13 +17,13 @@ static __always_inline int arch_atomic_read(const atomic_t *v) { - return __atomic_read(v); + return __atomic_read(&v->counter); } #define arch_atomic_read arch_atomic_read static __always_inline void arch_atomic_set(atomic_t *v, int i) { - __atomic_set(v, i); + __atomic_set(&v->counter, i); } #define arch_atomic_set arch_atomic_set @@ -45,6 +45,36 @@ static __always_inline void arch_atomic_add(int i, atomic_t *v) } #define arch_atomic_add arch_atomic_add +static __always_inline void arch_atomic_inc(atomic_t *v) +{ + __atomic_add_const(1, &v->counter); +} +#define arch_atomic_inc arch_atomic_inc + +static __always_inline void arch_atomic_dec(atomic_t *v) +{ + __atomic_add_const(-1, &v->counter); +} +#define arch_atomic_dec arch_atomic_dec + +static __always_inline bool arch_atomic_sub_and_test(int i, atomic_t *v) +{ + return __atomic_add_and_test_barrier(-i, &v->counter); +} +#define arch_atomic_sub_and_test arch_atomic_sub_and_test + +static __always_inline bool arch_atomic_dec_and_test(atomic_t *v) +{ + return __atomic_add_const_and_test_barrier(-1, &v->counter); +} +#define arch_atomic_dec_and_test arch_atomic_dec_and_test + +static __always_inline bool arch_atomic_inc_and_test(atomic_t *v) +{ + return __atomic_add_const_and_test_barrier(1, &v->counter); +} +#define arch_atomic_inc_and_test arch_atomic_inc_and_test + #define arch_atomic_sub(_i, _v) arch_atomic_add(-(int)(_i), _v) #define arch_atomic_sub_return(_i, _v) arch_atomic_add_return(-(int)(_i), _v) #define arch_atomic_fetch_sub(_i, _v) arch_atomic_fetch_add(-(int)(_i), _v) @@ -94,13 +124,13 @@ static __always_inline bool arch_atomic_try_cmpxchg(atomic_t *v, int *old, int n static __always_inline s64 arch_atomic64_read(const atomic64_t *v) { - return __atomic64_read(v); + return __atomic64_read((long *)&v->counter); } #define arch_atomic64_read arch_atomic64_read static __always_inline void arch_atomic64_set(atomic64_t *v, s64 i) { - __atomic64_set(v, i); + __atomic64_set((long *)&v->counter, i); } #define arch_atomic64_set arch_atomic64_set @@ -122,6 +152,36 @@ static __always_inline void arch_atomic64_add(s64 i, atomic64_t *v) } #define arch_atomic64_add arch_atomic64_add +static __always_inline void arch_atomic64_inc(atomic64_t *v) +{ + __atomic64_add_const(1, (long *)&v->counter); +} +#define arch_atomic64_inc arch_atomic64_inc + +static __always_inline void arch_atomic64_dec(atomic64_t *v) +{ + __atomic64_add_const(-1, (long *)&v->counter); +} +#define arch_atomic64_dec arch_atomic64_dec + +static __always_inline bool arch_atomic64_sub_and_test(s64 i, atomic64_t *v) +{ + return __atomic64_add_and_test_barrier(-i, (long *)&v->counter); +} +#define arch_atomic64_sub_and_test arch_atomic64_sub_and_test + +static __always_inline bool arch_atomic64_dec_and_test(atomic64_t *v) +{ + return __atomic64_add_const_and_test_barrier(-1, (long *)&v->counter); +} +#define arch_atomic64_dec_and_test arch_atomic64_dec_and_test + +static __always_inline bool arch_atomic64_inc_and_test(atomic64_t *v) +{ + return __atomic64_add_const_and_test_barrier(1, (long *)&v->counter); +} +#define arch_atomic64_inc_and_test arch_atomic64_inc_and_test + static __always_inline s64 arch_atomic64_xchg(atomic64_t *v, s64 new) { return arch_xchg(&v->counter, new); diff --git a/arch/s390/include/asm/atomic_ops.h b/arch/s390/include/asm/atomic_ops.h index 1d6b2056fad8..21c26d842832 100644 --- a/arch/s390/include/asm/atomic_ops.h +++ b/arch/s390/include/asm/atomic_ops.h @@ -10,50 +10,51 @@ #include <linux/limits.h> #include <asm/march.h> +#include <asm/asm.h> -static __always_inline int __atomic_read(const atomic_t *v) +static __always_inline int __atomic_read(const int *ptr) { - int c; + int val; asm volatile( - " l %[c],%[counter]\n" - : [c] "=d" (c) : [counter] "R" (v->counter)); - return c; + " l %[val],%[ptr]\n" + : [val] "=d" (val) : [ptr] "R" (*ptr)); + return val; } -static __always_inline void __atomic_set(atomic_t *v, int i) +static __always_inline void __atomic_set(int *ptr, int val) { - if (__builtin_constant_p(i) && i >= S16_MIN && i <= S16_MAX) { + if (__builtin_constant_p(val) && val >= S16_MIN && val <= S16_MAX) { asm volatile( - " mvhi %[counter], %[i]\n" - : [counter] "=Q" (v->counter) : [i] "K" (i)); + " mvhi %[ptr],%[val]\n" + : [ptr] "=Q" (*ptr) : [val] "K" (val)); } else { asm volatile( - " st %[i],%[counter]\n" - : [counter] "=R" (v->counter) : [i] "d" (i)); + " st %[val],%[ptr]\n" + : [ptr] "=R" (*ptr) : [val] "d" (val)); } } -static __always_inline s64 __atomic64_read(const atomic64_t *v) +static __always_inline long __atomic64_read(const long *ptr) { - s64 c; + long val; asm volatile( - " lg %[c],%[counter]\n" - : [c] "=d" (c) : [counter] "RT" (v->counter)); - return c; + " lg %[val],%[ptr]\n" + : [val] "=d" (val) : [ptr] "RT" (*ptr)); + return val; } -static __always_inline void __atomic64_set(atomic64_t *v, s64 i) +static __always_inline void __atomic64_set(long *ptr, long val) { - if (__builtin_constant_p(i) && i >= S16_MIN && i <= S16_MAX) { + if (__builtin_constant_p(val) && val >= S16_MIN && val <= S16_MAX) { asm volatile( - " mvghi %[counter], %[i]\n" - : [counter] "=Q" (v->counter) : [i] "K" (i)); + " mvghi %[ptr],%[val]\n" + : [ptr] "=Q" (*ptr) : [val] "K" (val)); } else { asm volatile( - " stg %[i],%[counter]\n" - : [counter] "=RT" (v->counter) : [i] "d" (i)); + " stg %[val],%[ptr]\n" + : [ptr] "=RT" (*ptr) : [val] "d" (val)); } } @@ -73,7 +74,7 @@ static __always_inline op_type op_name(op_type val, op_type *ptr) \ } \ #define __ATOMIC_OPS(op_name, op_type, op_string) \ - __ATOMIC_OP(op_name, op_type, op_string, "\n") \ + __ATOMIC_OP(op_name, op_type, op_string, "") \ __ATOMIC_OP(op_name##_barrier, op_type, op_string, "bcr 14,0\n") __ATOMIC_OPS(__atomic_add, int, "laa") @@ -99,7 +100,7 @@ static __always_inline void op_name(op_type val, op_type *ptr) \ } #define __ATOMIC_CONST_OPS(op_name, op_type, op_string) \ - __ATOMIC_CONST_OP(op_name, op_type, op_string, "\n") \ + __ATOMIC_CONST_OP(op_name, op_type, op_string, "") \ __ATOMIC_CONST_OP(op_name##_barrier, op_type, op_string, "bcr 14,0\n") __ATOMIC_CONST_OPS(__atomic_add_const, int, "asi") @@ -162,11 +163,83 @@ __ATOMIC64_OPS(__atomic64_xor, "xgr") #undef __ATOMIC64_OPS -#define __atomic_add_const(val, ptr) __atomic_add(val, ptr) -#define __atomic_add_const_barrier(val, ptr) __atomic_add(val, ptr) -#define __atomic64_add_const(val, ptr) __atomic64_add(val, ptr) -#define __atomic64_add_const_barrier(val, ptr) __atomic64_add(val, ptr) +#define __atomic_add_const(val, ptr) ((void)__atomic_add(val, ptr)) +#define __atomic_add_const_barrier(val, ptr) ((void)__atomic_add(val, ptr)) +#define __atomic64_add_const(val, ptr) ((void)__atomic64_add(val, ptr)) +#define __atomic64_add_const_barrier(val, ptr) ((void)__atomic64_add(val, ptr)) #endif /* MARCH_HAS_Z196_FEATURES */ +#if defined(MARCH_HAS_Z196_FEATURES) && defined(__HAVE_ASM_FLAG_OUTPUTS__) + +#define __ATOMIC_TEST_OP(op_name, op_type, op_string, op_barrier) \ +static __always_inline bool op_name(op_type val, op_type *ptr) \ +{ \ + op_type tmp; \ + int cc; \ + \ + asm volatile( \ + op_string " %[tmp],%[val],%[ptr]\n" \ + op_barrier \ + : "=@cc" (cc), [tmp] "=d" (tmp), [ptr] "+QS" (*ptr) \ + : [val] "d" (val) \ + : "memory"); \ + return (cc == 0) || (cc == 2); \ +} \ + +#define __ATOMIC_TEST_OPS(op_name, op_type, op_string) \ + __ATOMIC_TEST_OP(op_name, op_type, op_string, "") \ + __ATOMIC_TEST_OP(op_name##_barrier, op_type, op_string, "bcr 14,0\n") + +__ATOMIC_TEST_OPS(__atomic_add_and_test, int, "laal") +__ATOMIC_TEST_OPS(__atomic64_add_and_test, long, "laalg") + +#undef __ATOMIC_TEST_OPS +#undef __ATOMIC_TEST_OP + +#define __ATOMIC_CONST_TEST_OP(op_name, op_type, op_string, op_barrier) \ +static __always_inline bool op_name(op_type val, op_type *ptr) \ +{ \ + int cc; \ + \ + asm volatile( \ + op_string " %[ptr],%[val]\n" \ + op_barrier \ + : "=@cc" (cc), [ptr] "+QS" (*ptr) \ + : [val] "i" (val) \ + : "memory"); \ + return (cc == 0) || (cc == 2); \ +} + +#define __ATOMIC_CONST_TEST_OPS(op_name, op_type, op_string) \ + __ATOMIC_CONST_TEST_OP(op_name, op_type, op_string, "") \ + __ATOMIC_CONST_TEST_OP(op_name##_barrier, op_type, op_string, "bcr 14,0\n") + +__ATOMIC_CONST_TEST_OPS(__atomic_add_const_and_test, int, "alsi") +__ATOMIC_CONST_TEST_OPS(__atomic64_add_const_and_test, long, "algsi") + +#undef __ATOMIC_CONST_TEST_OPS +#undef __ATOMIC_CONST_TEST_OP + +#else /* defined(MARCH_HAS_Z196_FEATURES) && defined(__HAVE_ASM_FLAG_OUTPUTS__) */ + +#define __ATOMIC_TEST_OP(op_name, op_func, op_type) \ +static __always_inline bool op_name(op_type val, op_type *ptr) \ +{ \ + return op_func(val, ptr) == -val; \ +} + +__ATOMIC_TEST_OP(__atomic_add_and_test, __atomic_add, int) +__ATOMIC_TEST_OP(__atomic_add_and_test_barrier, __atomic_add_barrier, int) +__ATOMIC_TEST_OP(__atomic_add_const_and_test, __atomic_add, int) +__ATOMIC_TEST_OP(__atomic_add_const_and_test_barrier, __atomic_add_barrier, int) +__ATOMIC_TEST_OP(__atomic64_add_and_test, __atomic64_add, long) +__ATOMIC_TEST_OP(__atomic64_add_and_test_barrier, __atomic64_add_barrier, long) +__ATOMIC_TEST_OP(__atomic64_add_const_and_test, __atomic64_add, long) +__ATOMIC_TEST_OP(__atomic64_add_const_and_test_barrier, __atomic64_add_barrier, long) + +#undef __ATOMIC_TEST_OP + +#endif /* defined(MARCH_HAS_Z196_FEATURES) && defined(__HAVE_ASM_FLAG_OUTPUTS__) */ + #endif /* __ARCH_S390_ATOMIC_OPS__ */ diff --git a/arch/s390/include/asm/bitops.h b/arch/s390/include/asm/bitops.h index 54a079cd39ed..a5ca0a947691 100644 --- a/arch/s390/include/asm/bitops.h +++ b/arch/s390/include/asm/bitops.h @@ -36,184 +36,45 @@ #include <linux/typecheck.h> #include <linux/compiler.h> #include <linux/types.h> -#include <asm/atomic_ops.h> -#include <asm/barrier.h> - -#define __BITOPS_WORDS(bits) (((bits) + BITS_PER_LONG - 1) / BITS_PER_LONG) - -static inline unsigned long * -__bitops_word(unsigned long nr, const volatile unsigned long *ptr) -{ - unsigned long addr; - - addr = (unsigned long)ptr + ((nr ^ (nr & (BITS_PER_LONG - 1))) >> 3); - return (unsigned long *)addr; -} - -static inline unsigned long __bitops_mask(unsigned long nr) -{ - return 1UL << (nr & (BITS_PER_LONG - 1)); -} - -static __always_inline void arch_set_bit(unsigned long nr, volatile unsigned long *ptr) -{ - unsigned long *addr = __bitops_word(nr, ptr); - unsigned long mask = __bitops_mask(nr); - - __atomic64_or(mask, (long *)addr); -} - -static __always_inline void arch_clear_bit(unsigned long nr, volatile unsigned long *ptr) -{ - unsigned long *addr = __bitops_word(nr, ptr); - unsigned long mask = __bitops_mask(nr); - - __atomic64_and(~mask, (long *)addr); -} - -static __always_inline void arch_change_bit(unsigned long nr, - volatile unsigned long *ptr) -{ - unsigned long *addr = __bitops_word(nr, ptr); - unsigned long mask = __bitops_mask(nr); - - __atomic64_xor(mask, (long *)addr); -} - -static inline bool arch_test_and_set_bit(unsigned long nr, - volatile unsigned long *ptr) -{ - unsigned long *addr = __bitops_word(nr, ptr); - unsigned long mask = __bitops_mask(nr); - unsigned long old; - - old = __atomic64_or_barrier(mask, (long *)addr); - return old & mask; -} - -static inline bool arch_test_and_clear_bit(unsigned long nr, - volatile unsigned long *ptr) -{ - unsigned long *addr = __bitops_word(nr, ptr); - unsigned long mask = __bitops_mask(nr); - unsigned long old; - - old = __atomic64_and_barrier(~mask, (long *)addr); - return old & mask; -} - -static inline bool arch_test_and_change_bit(unsigned long nr, - volatile unsigned long *ptr) -{ - unsigned long *addr = __bitops_word(nr, ptr); - unsigned long mask = __bitops_mask(nr); - unsigned long old; - - old = __atomic64_xor_barrier(mask, (long *)addr); - return old & mask; -} - -static __always_inline void -arch___set_bit(unsigned long nr, volatile unsigned long *addr) -{ - unsigned long *p = __bitops_word(nr, addr); - unsigned long mask = __bitops_mask(nr); - - *p |= mask; -} - -static __always_inline void -arch___clear_bit(unsigned long nr, volatile unsigned long *addr) -{ - unsigned long *p = __bitops_word(nr, addr); - unsigned long mask = __bitops_mask(nr); - - *p &= ~mask; -} - -static __always_inline void -arch___change_bit(unsigned long nr, volatile unsigned long *addr) -{ - unsigned long *p = __bitops_word(nr, addr); - unsigned long mask = __bitops_mask(nr); - - *p ^= mask; -} - -static __always_inline bool -arch___test_and_set_bit(unsigned long nr, volatile unsigned long *addr) -{ - unsigned long *p = __bitops_word(nr, addr); - unsigned long mask = __bitops_mask(nr); - unsigned long old; - - old = *p; - *p |= mask; - return old & mask; -} - -static __always_inline bool -arch___test_and_clear_bit(unsigned long nr, volatile unsigned long *addr) -{ - unsigned long *p = __bitops_word(nr, addr); - unsigned long mask = __bitops_mask(nr); - unsigned long old; - - old = *p; - *p &= ~mask; - return old & mask; -} - -static __always_inline bool -arch___test_and_change_bit(unsigned long nr, volatile unsigned long *addr) -{ - unsigned long *p = __bitops_word(nr, addr); - unsigned long mask = __bitops_mask(nr); - unsigned long old; - - old = *p; - *p ^= mask; - return old & mask; -} - -#define arch_test_bit generic_test_bit -#define arch_test_bit_acquire generic_test_bit_acquire - -static inline bool arch_test_and_set_bit_lock(unsigned long nr, - volatile unsigned long *ptr) -{ - if (arch_test_bit(nr, ptr)) - return true; - return arch_test_and_set_bit(nr, ptr); -} - -static inline void arch_clear_bit_unlock(unsigned long nr, - volatile unsigned long *ptr) -{ - smp_mb__before_atomic(); - arch_clear_bit(nr, ptr); -} - -static inline void arch___clear_bit_unlock(unsigned long nr, - volatile unsigned long *ptr) -{ - smp_mb(); - arch___clear_bit(nr, ptr); -} - -static inline bool arch_xor_unlock_is_negative_byte(unsigned long mask, - volatile unsigned long *ptr) -{ - unsigned long old; - - old = __atomic64_xor_barrier(mask, (long *)ptr); - return old & BIT(7); +#include <asm/asm.h> + +#define arch___set_bit generic___set_bit +#define arch___clear_bit generic___clear_bit +#define arch___change_bit generic___change_bit +#define arch___test_and_set_bit generic___test_and_set_bit +#define arch___test_and_clear_bit generic___test_and_clear_bit +#define arch___test_and_change_bit generic___test_and_change_bit +#define arch_test_bit_acquire generic_test_bit_acquire + +static __always_inline bool arch_test_bit(unsigned long nr, const volatile unsigned long *ptr) +{ +#ifdef __HAVE_ASM_FLAG_OUTPUTS__ + const volatile unsigned char *addr; + unsigned long mask; + int cc; + + /* + * With CONFIG_PROFILE_ALL_BRANCHES enabled gcc fails to + * handle __builtin_constant_p() in some cases. + */ + if (!IS_ENABLED(CONFIG_PROFILE_ALL_BRANCHES) && __builtin_constant_p(nr)) { + addr = (const volatile unsigned char *)ptr; + addr += (nr ^ (BITS_PER_LONG - BITS_PER_BYTE)) / BITS_PER_BYTE; + mask = 1UL << (nr & (BITS_PER_BYTE - 1)); + asm volatile( + " tm %[addr],%[mask]\n" + : "=@cc" (cc) + : [addr] "Q" (*addr), [mask] "I" (mask) + ); + return cc == 3; + } +#endif + return generic_test_bit(nr, ptr); } -#define arch_xor_unlock_is_negative_byte arch_xor_unlock_is_negative_byte -#include <asm-generic/bitops/instrumented-atomic.h> -#include <asm-generic/bitops/instrumented-non-atomic.h> -#include <asm-generic/bitops/instrumented-lock.h> +#include <asm-generic/bitops/atomic.h> +#include <asm-generic/bitops/non-instrumented-non-atomic.h> +#include <asm-generic/bitops/lock.h> /* * Functions which use MSB0 bit numbering. diff --git a/arch/s390/include/asm/boot_data.h b/arch/s390/include/asm/boot_data.h index f7eed27b3220..f55f8227058e 100644 --- a/arch/s390/include/asm/boot_data.h +++ b/arch/s390/include/asm/boot_data.h @@ -1,6 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _ASM_S390_BOOT_DATA_H +#include <linux/string.h> #include <asm/setup.h> #include <asm/ipl.h> @@ -15,4 +16,54 @@ extern unsigned long ipl_cert_list_size; extern unsigned long early_ipl_comp_list_addr; extern unsigned long early_ipl_comp_list_size; +extern char boot_rb[PAGE_SIZE * 2]; +extern bool boot_earlyprintk; +extern size_t boot_rb_off; +extern char bootdebug_filter[128]; +extern bool bootdebug; + +#define boot_rb_foreach(cb) \ + do { \ + size_t off = boot_rb_off + strlen(boot_rb + boot_rb_off) + 1; \ + size_t len; \ + for (; off < sizeof(boot_rb) && (len = strlen(boot_rb + off)); off += len + 1) \ + cb(boot_rb + off); \ + for (off = 0; off < boot_rb_off && (len = strlen(boot_rb + off)); off += len + 1) \ + cb(boot_rb + off); \ + } while (0) + +/* + * bootdebug_filter is a comma separated list of strings, + * where each string can be a prefix of the message. + */ +static inline bool bootdebug_filter_match(const char *buf) +{ + char *p = bootdebug_filter, *s; + char *end; + + if (!*p) + return true; + + end = p + strlen(p); + while (p < end) { + p = skip_spaces(p); + s = memscan(p, ',', end - p); + if (!strncmp(p, buf, s - p)) + return true; + p = s + 1; + } + return false; +} + +static inline const char *skip_timestamp(const char *buf) +{ +#ifdef CONFIG_PRINTK_TIME + const char *p = memchr(buf, ']', strlen(buf)); + + if (p && p[1] == ' ') + return p + 2; +#endif + return buf; +} + #endif /* _ASM_S390_BOOT_DATA_H */ diff --git a/arch/s390/include/asm/checksum.h b/arch/s390/include/asm/checksum.h index 46f5c9660616..d86dea5900e7 100644 --- a/arch/s390/include/asm/checksum.h +++ b/arch/s390/include/asm/checksum.h @@ -25,7 +25,7 @@ static inline __wsum cksm(const void *buff, int len, __wsum sum) instrument_read(buff, len); kmsan_check_memory(buff, len); - asm volatile("\n" + asm volatile( "0: cksm %[sum],%[rp]\n" " jo 0b\n" : [sum] "+&d" (sum), [rp] "+&d" (rp.pair) : : "cc", "memory"); diff --git a/arch/s390/include/asm/cpacf.h b/arch/s390/include/asm/cpacf.h index 59ab1192e2d5..4bc5317fbb12 100644 --- a/arch/s390/include/asm/cpacf.h +++ b/arch/s390/include/asm/cpacf.h @@ -129,6 +129,10 @@ #define CPACF_KMAC_HMAC_SHA_256 0x71 #define CPACF_KMAC_HMAC_SHA_384 0x72 #define CPACF_KMAC_HMAC_SHA_512 0x73 +#define CPACF_KMAC_PHMAC_SHA_224 0x78 +#define CPACF_KMAC_PHMAC_SHA_256 0x79 +#define CPACF_KMAC_PHMAC_SHA_384 0x7a +#define CPACF_KMAC_PHMAC_SHA_512 0x7b /* * Function codes for the PCKMO (PERFORM CRYPTOGRAPHIC KEY MANAGEMENT) @@ -649,18 +653,30 @@ static inline void cpacf_trng(u8 *ucbuf, unsigned long ucbuf_len, * instruction * @func: the function code passed to PCC; see CPACF_KM_xxx defines * @param: address of parameter block; see POP for details on each func + * + * Returns the condition code, this is + * 0 - cc code 0 (normal completion) + * 1 - cc code 1 (protected key wkvp mismatch or src operand out of range) + * 2 - cc code 2 (something invalid, scalar multiply infinity, ...) + * Condition code 3 (partial completion) is handled within the asm code + * and never returned. */ -static inline void cpacf_pcc(unsigned long func, void *param) +static inline int cpacf_pcc(unsigned long func, void *param) { + int cc; + asm volatile( " lgr 0,%[fc]\n" " lgr 1,%[pba]\n" "0: .insn rre,%[opc] << 16,0,0\n" /* PCC opcode */ " brc 1,0b\n" /* handle partial completion */ - : + CC_IPM(cc) + : CC_OUT(cc, cc) : [fc] "d" (func), [pba] "d" ((unsigned long)param), [opc] "i" (CPACF_PCC) - : "cc", "memory", "0", "1"); + : CC_CLOBBER_LIST("memory", "0", "1")); + + return CC_TRANSFORM(cc); } /** diff --git a/arch/s390/include/asm/cpu.h b/arch/s390/include/asm/cpu.h index 26c710cd3485..5672e3fab52b 100644 --- a/arch/s390/include/asm/cpu.h +++ b/arch/s390/include/asm/cpu.h @@ -9,7 +9,7 @@ #ifndef _ASM_S390_CPU_H #define _ASM_S390_CPU_H -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <linux/types.h> #include <linux/jump_label.h> @@ -24,5 +24,5 @@ struct cpuid DECLARE_STATIC_KEY_FALSE(cpu_has_bear); -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* _ASM_S390_CPU_H */ diff --git a/arch/s390/include/asm/cpu_mf-insn.h b/arch/s390/include/asm/cpu_mf-insn.h index a68b362e0964..941663939cc7 100644 --- a/arch/s390/include/asm/cpu_mf-insn.h +++ b/arch/s390/include/asm/cpu_mf-insn.h @@ -8,7 +8,7 @@ #ifndef _ASM_S390_CPU_MF_INSN_H #define _ASM_S390_CPU_MF_INSN_H -#ifdef __ASSEMBLY__ +#ifdef __ASSEMBLER__ /* Macro to generate the STCCTM instruction with a customized * M3 field designating the counter set. @@ -17,6 +17,6 @@ .insn rsy,0xeb0000000017,\r1,\m3 & 0xf,\db2 .endm -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif diff --git a/arch/s390/include/asm/cpu_mf.h b/arch/s390/include/asm/cpu_mf.h index e1a279e0d6a6..1798fbd59068 100644 --- a/arch/s390/include/asm/cpu_mf.h +++ b/arch/s390/include/asm/cpu_mf.h @@ -171,7 +171,7 @@ static inline int qctri(struct cpumf_ctr_info *info) { int rc = -EINVAL; - asm volatile ( + asm_inline volatile ( "0: qctri %1\n" "1: lhi %0,0\n" "2:\n" @@ -185,7 +185,7 @@ static inline int lcctl(u64 ctl) { int cc; - asm volatile ( + asm_inline volatile ( " lcctl %[ctl]\n" CC_IPM(cc) : CC_OUT(cc, cc) @@ -200,7 +200,7 @@ static inline int __ecctr(u64 ctr, u64 *content) u64 _content; int cc; - asm volatile ( + asm_inline volatile ( " ecctr %[_content],%[ctr]\n" CC_IPM(cc) : CC_OUT(cc, cc), [_content] "=d" (_content) diff --git a/arch/s390/include/asm/cpufeature.h b/arch/s390/include/asm/cpufeature.h index 931204613753..6c6a99660e78 100644 --- a/arch/s390/include/asm/cpufeature.h +++ b/arch/s390/include/asm/cpufeature.h @@ -9,10 +9,13 @@ #ifndef __ASM_S390_CPUFEATURE_H #define __ASM_S390_CPUFEATURE_H +#include <asm/facility.h> + enum { S390_CPU_FEATURE_MSA, S390_CPU_FEATURE_VXRS, S390_CPU_FEATURE_UV, + S390_CPU_FEATURE_D288, MAX_CPU_FEATURES }; @@ -20,4 +23,16 @@ enum { int cpu_have_feature(unsigned int nr); +#define cpu_has_bear() test_facility(193) +#define cpu_has_edat1() test_facility(8) +#define cpu_has_edat2() test_facility(78) +#define cpu_has_gs() test_facility(133) +#define cpu_has_idte() test_facility(3) +#define cpu_has_nx() test_facility(130) +#define cpu_has_rdp() test_facility(194) +#define cpu_has_seq_insn() test_facility(85) +#define cpu_has_tlb_lc() test_facility(51) +#define cpu_has_topology() test_facility(11) +#define cpu_has_vx() test_facility(129) + #endif /* __ASM_S390_CPUFEATURE_H */ diff --git a/arch/s390/include/asm/css_chars.h b/arch/s390/include/asm/css_chars.h index 638137d46c85..a03f64033760 100644 --- a/arch/s390/include/asm/css_chars.h +++ b/arch/s390/include/asm/css_chars.h @@ -25,7 +25,7 @@ struct css_general_char { u64 : 2; u64 : 3; - u64 aif_osa : 1; /* bit 67 */ + u64 aif_qdio : 1;/* bit 67 */ u64 : 12; u64 eadm_rf : 1; /* bit 80 */ u64 : 1; diff --git a/arch/s390/include/asm/ctlreg.h b/arch/s390/include/asm/ctlreg.h index e6527f51ad0b..e93cc240a1ed 100644 --- a/arch/s390/include/asm/ctlreg.h +++ b/arch/s390/include/asm/ctlreg.h @@ -80,7 +80,7 @@ #define CR14_EXTERNAL_DAMAGE_SUBMASK BIT(CR14_EXTERNAL_DAMAGE_SUBMASK_BIT) #define CR14_WARNING_SUBMASK BIT(CR14_WARNING_SUBMASK_BIT) -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <linux/bug.h> @@ -252,5 +252,5 @@ union ctlreg15 { }; }; -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* __ASM_S390_CTLREG_H */ diff --git a/arch/s390/include/asm/current.h b/arch/s390/include/asm/current.h index d03a922c641e..f9529f7cf62c 100644 --- a/arch/s390/include/asm/current.h +++ b/arch/s390/include/asm/current.h @@ -11,9 +11,25 @@ #define _S390_CURRENT_H #include <asm/lowcore.h> +#include <asm/machine.h> struct task_struct; -#define current ((struct task_struct *const)get_lowcore()->current_task) +static __always_inline struct task_struct *get_current(void) +{ + unsigned long ptr, lc_current; + + lc_current = offsetof(struct lowcore, current_task); + asm_inline( + ALTERNATIVE(" lg %[ptr],%[offzero](%%r0)\n", + " lg %[ptr],%[offalt](%%r0)\n", + ALT_FEATURE(MFEATURE_LOWCORE)) + : [ptr] "=d" (ptr) + : [offzero] "i" (lc_current), + [offalt] "i" (lc_current + LOWCORE_ALT_ADDRESS)); + return (struct task_struct *)ptr; +} + +#define current get_current() #endif /* !(_S390_CURRENT_H) */ diff --git a/arch/s390/include/asm/debug.h b/arch/s390/include/asm/debug.h index a7f7bdc9e19c..6375276d94ea 100644 --- a/arch/s390/include/asm/debug.h +++ b/arch/s390/include/asm/debug.h @@ -85,6 +85,10 @@ int debug_dflt_header_fn(debug_info_t *id, struct debug_view *view, int area, debug_entry_t *entry, char *out_buf, size_t out_buf_size); +#define DEBUG_SPRINTF_MAX_ARGS 10 +int debug_sprintf_format_fn(debug_info_t *id, struct debug_view *view, + char *out_buf, size_t out_buf_size, + const char *inbuf); struct debug_view { char name[DEBUG_MAX_NAME_LEN]; debug_prolog_proc_t *prolog_proc; @@ -114,6 +118,9 @@ debug_info_t *debug_register_mode(const char *name, int pages, int nr_areas, int buf_size, umode_t mode, uid_t uid, gid_t gid); +ssize_t debug_dump(debug_info_t *id, struct debug_view *view, + char *buf, size_t buf_size, bool reverse); + void debug_unregister(debug_info_t *id); void debug_set_level(debug_info_t *id, int new_level); diff --git a/arch/s390/include/asm/diag.h b/arch/s390/include/asm/diag.h index e1316e181230..8db8db3b1018 100644 --- a/arch/s390/include/asm/diag.h +++ b/arch/s390/include/asm/diag.h @@ -36,8 +36,10 @@ enum diag_stat_enum { DIAG_STAT_X2FC, DIAG_STAT_X304, DIAG_STAT_X308, + DIAG_STAT_X310, DIAG_STAT_X318, DIAG_STAT_X320, + DIAG_STAT_X324, DIAG_STAT_X49C, DIAG_STAT_X500, NR_DIAG_STAT @@ -64,7 +66,7 @@ static inline void diag10_range(unsigned long start_pfn, unsigned long num_pfn) end_addr = pfn_to_phys(start_pfn + num_pfn - 1); diag_stat_inc(DIAG_STAT_X010); - asm volatile( + asm_inline volatile( "0: diag %0,%1,0x10\n" "1: nopr %%r7\n" EX_TABLE(0b, 1b) diff --git a/arch/s390/include/asm/diag288.h b/arch/s390/include/asm/diag288.h new file mode 100644 index 000000000000..5e1b43cea9d6 --- /dev/null +++ b/arch/s390/include/asm/diag288.h @@ -0,0 +1,41 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef _ASM_S390_DIAG288_H +#define _ASM_S390_DIAG288_H + +#include <asm/asm-extable.h> +#include <asm/types.h> + +#define MIN_INTERVAL 15 /* Minimal time supported by diag288 */ +#define MAX_INTERVAL 3600 /* One hour should be enough - pure estimation */ + +#define WDT_DEFAULT_TIMEOUT 30 + +/* Function codes - init, change, cancel */ +#define WDT_FUNC_INIT 0 +#define WDT_FUNC_CHANGE 1 +#define WDT_FUNC_CANCEL 2 +#define WDT_FUNC_CONCEAL 0x80000000 + +/* Action codes for LPAR watchdog */ +#define LPARWDT_RESTART 0 + +static inline int __diag288(unsigned int func, unsigned int timeout, + unsigned long action, unsigned int len) +{ + union register_pair r1 = { .even = func, .odd = timeout, }; + union register_pair r3 = { .even = action, .odd = len, }; + int rc = -EINVAL; + + asm volatile( + " diag %[r1],%[r3],0x288\n" + "0: lhi %[rc],0\n" + "1:" + EX_TABLE(0b, 1b) + : [rc] "+d" (rc) + : [r1] "d" (r1.pair), [r3] "d" (r3.pair) + : "cc", "memory"); + return rc; +} + +#endif /* _ASM_S390_DIAG288_H */ diff --git a/arch/s390/include/asm/dwarf.h b/arch/s390/include/asm/dwarf.h index 390906b8e386..e3ad6798d0cd 100644 --- a/arch/s390/include/asm/dwarf.h +++ b/arch/s390/include/asm/dwarf.h @@ -2,7 +2,7 @@ #ifndef _ASM_S390_DWARF_H #define _ASM_S390_DWARF_H -#ifdef __ASSEMBLY__ +#ifdef __ASSEMBLER__ #define CFI_STARTPROC .cfi_startproc #define CFI_ENDPROC .cfi_endproc @@ -33,6 +33,6 @@ .cfi_sections .eh_frame, .debug_frame #endif -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* _ASM_S390_DWARF_H */ diff --git a/arch/s390/include/asm/ebcdic.h b/arch/s390/include/asm/ebcdic.h index efb50fc6866c..7164cb658435 100644 --- a/arch/s390/include/asm/ebcdic.h +++ b/arch/s390/include/asm/ebcdic.h @@ -22,18 +22,18 @@ extern __u8 _ebc_toupper[256]; /* EBCDIC -> uppercase */ static inline void codepage_convert(const __u8 *codepage, volatile char *addr, unsigned long nr) { - if (nr-- <= 0) + if (!nr--) return; asm volatile( - " bras 1,1f\n" - " tr 0(1,%0),0(%2)\n" - "0: tr 0(256,%0),0(%2)\n" + " j 2f\n" + "0: tr 0(1,%0),0(%2)\n" + "1: tr 0(256,%0),0(%2)\n" " la %0,256(%0)\n" - "1: ahi %1,-256\n" - " jnm 0b\n" - " ex %1,0(1)" + "2: aghi %1,-256\n" + " jnm 1b\n" + " exrl %1,0b" : "+&a" (addr), "+&a" (nr) - : "a" (codepage) : "cc", "memory", "1"); + : "a" (codepage) : "cc", "memory"); } #define ASCEBC(addr,nr) codepage_convert(_ascebc, addr, nr) diff --git a/arch/s390/include/asm/elf.h b/arch/s390/include/asm/elf.h index 8f2c23cc52b6..a03df312081e 100644 --- a/arch/s390/include/asm/elf.h +++ b/arch/s390/include/asm/elf.h @@ -158,9 +158,6 @@ enum { #define ELF_DATA ELFDATA2MSB #define ELF_ARCH EM_S390 -/* s390 specific phdr types */ -#define PT_S390_PGSTE 0x70000000 - /* * ELF register definitions.. */ @@ -191,35 +188,6 @@ typedef s390_compat_regs compat_elf_gregset_t; && (x)->e_ident[EI_CLASS] == ELF_CLASS) #define compat_start_thread start_thread31 -struct arch_elf_state { - int rc; -}; - -#define INIT_ARCH_ELF_STATE { .rc = 0 } - -#define arch_check_elf(ehdr, interp, interp_ehdr, state) (0) -#ifdef CONFIG_PGSTE -#define arch_elf_pt_proc(ehdr, phdr, elf, interp, state) \ -({ \ - struct arch_elf_state *_state = state; \ - if ((phdr)->p_type == PT_S390_PGSTE && \ - !page_table_allocate_pgste && \ - !test_thread_flag(TIF_PGSTE) && \ - !current->mm->context.alloc_pgste) { \ - set_thread_flag(TIF_PGSTE); \ - set_pt_regs_flag(task_pt_regs(current), \ - PIF_EXECVE_PGSTE_RESTART); \ - _state->rc = -EAGAIN; \ - } \ - _state->rc; \ -}) -#else -#define arch_elf_pt_proc(ehdr, phdr, elf, interp, state) \ -({ \ - (state)->rc; \ -}) -#endif - /* For SVR4/S390 the function pointer to be registered with `atexit` is passed in R14. */ #define ELF_PLAT_INIT(_r, load_addr) \ diff --git a/arch/s390/include/asm/entry-common.h b/arch/s390/include/asm/entry-common.h index 35555c944630..979af986a8fe 100644 --- a/arch/s390/include/asm/entry-common.h +++ b/arch/s390/include/asm/entry-common.h @@ -59,4 +59,14 @@ static inline void arch_exit_to_user_mode_prepare(struct pt_regs *regs, #define arch_exit_to_user_mode_prepare arch_exit_to_user_mode_prepare +static __always_inline bool arch_in_rcu_eqs(void) +{ + if (IS_ENABLED(CONFIG_KVM)) + return current->flags & PF_VCPU; + + return false; +} + +#define arch_in_rcu_eqs arch_in_rcu_eqs + #endif diff --git a/arch/s390/include/asm/extmem.h b/arch/s390/include/asm/extmem.h index e0a06060afdd..225ee89c3f5e 100644 --- a/arch/s390/include/asm/extmem.h +++ b/arch/s390/include/asm/extmem.h @@ -6,7 +6,7 @@ #ifndef _ASM_S390X_DCSS_H #define _ASM_S390X_DCSS_H -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ /* * DCSS segment is defined as a contiguous range of pages using DEFSEG command. diff --git a/arch/s390/include/asm/fprobe.h b/arch/s390/include/asm/fprobe.h new file mode 100644 index 000000000000..5ef600b372f4 --- /dev/null +++ b/arch/s390/include/asm/fprobe.h @@ -0,0 +1,10 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_S390_FPROBE_H +#define _ASM_S390_FPROBE_H + +#include <asm-generic/fprobe.h> + +#undef FPROBE_HEADER_MSB_PATTERN +#define FPROBE_HEADER_MSB_PATTERN 0 + +#endif /* _ASM_S390_FPROBE_H */ diff --git a/arch/s390/include/asm/fpu-insn-asm.h b/arch/s390/include/asm/fpu-insn-asm.h index d296322be4bc..cc0468fdf2d0 100644 --- a/arch/s390/include/asm/fpu-insn-asm.h +++ b/arch/s390/include/asm/fpu-insn-asm.h @@ -16,7 +16,7 @@ #error only <asm/fpu-insn.h> can be included directly #endif -#ifdef __ASSEMBLY__ +#ifdef __ASSEMBLER__ /* Macros to generate vector instruction byte code */ @@ -750,5 +750,5 @@ MRXBOPC 0, 0x77, v1, v2, v3 .endm -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* __ASM_S390_FPU_INSN_ASM_H */ diff --git a/arch/s390/include/asm/fpu-insn.h b/arch/s390/include/asm/fpu-insn.h index c1e2e521d9af..135bb89c0a89 100644 --- a/arch/s390/include/asm/fpu-insn.h +++ b/arch/s390/include/asm/fpu-insn.h @@ -9,7 +9,7 @@ #include <asm/fpu-insn-asm.h> -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <linux/instrumented.h> #include <asm/asm-extable.h> @@ -100,19 +100,12 @@ static __always_inline void fpu_lfpc(unsigned int *fpc) */ static inline void fpu_lfpc_safe(unsigned int *fpc) { - u32 tmp; - instrument_read(fpc, sizeof(*fpc)); - asm volatile("\n" - "0: lfpc %[fpc]\n" - "1: nopr %%r7\n" - ".pushsection .fixup, \"ax\"\n" - "2: lghi %[tmp],0\n" - " sfpc %[tmp]\n" - " jg 1b\n" - ".popsection\n" - EX_TABLE(1b, 2b) - : [tmp] "=d" (tmp) + asm_inline volatile( + " lfpc %[fpc]\n" + "0: nopr %%r7\n" + EX_TABLE_FPC(0b, 0b) + : : [fpc] "Q" (*fpc) : "memory"); } @@ -183,33 +176,33 @@ static __always_inline void fpu_vgfmg(u8 v1, u8 v2, u8 v3) : "memory"); } -#ifdef CONFIG_CC_IS_CLANG +#ifdef CONFIG_CC_HAS_ASM_AOR_FORMAT_FLAGS static __always_inline void fpu_vl(u8 v1, const void *vxr) { instrument_read(vxr, sizeof(__vector128)); - asm volatile("\n" - " la 1,%[vxr]\n" - " VL %[v1],0,,1\n" - : - : [vxr] "R" (*(__vector128 *)vxr), - [v1] "I" (v1) - : "memory", "1"); + asm volatile("VL %[v1],%O[vxr],,%R[vxr]\n" + : + : [vxr] "Q" (*(__vector128 *)vxr), + [v1] "I" (v1) + : "memory"); } -#else /* CONFIG_CC_IS_CLANG */ +#else /* CONFIG_CC_HAS_ASM_AOR_FORMAT_FLAGS */ static __always_inline void fpu_vl(u8 v1, const void *vxr) { instrument_read(vxr, sizeof(__vector128)); - asm volatile("VL %[v1],%O[vxr],,%R[vxr]\n" - : - : [vxr] "Q" (*(__vector128 *)vxr), - [v1] "I" (v1) - : "memory"); + asm volatile( + " la 1,%[vxr]\n" + " VL %[v1],0,,1\n" + : + : [vxr] "R" (*(__vector128 *)vxr), + [v1] "I" (v1) + : "memory", "1"); } -#endif /* CONFIG_CC_IS_CLANG */ +#endif /* CONFIG_CC_HAS_ASM_AOR_FORMAT_FLAGS */ static __always_inline void fpu_vleib(u8 v, s16 val, u8 index) { @@ -238,7 +231,7 @@ static __always_inline u64 fpu_vlgvf(u8 v, u16 index) return val; } -#ifdef CONFIG_CC_IS_CLANG +#ifdef CONFIG_CC_HAS_ASM_AOR_FORMAT_FLAGS static __always_inline void fpu_vll(u8 v1, u32 index, const void *vxr) { @@ -246,17 +239,15 @@ static __always_inline void fpu_vll(u8 v1, u32 index, const void *vxr) size = min(index + 1, sizeof(__vector128)); instrument_read(vxr, size); - asm volatile("\n" - " la 1,%[vxr]\n" - " VLL %[v1],%[index],0,1\n" - : - : [vxr] "R" (*(u8 *)vxr), - [index] "d" (index), - [v1] "I" (v1) - : "memory", "1"); + asm volatile("VLL %[v1],%[index],%O[vxr],%R[vxr]\n" + : + : [vxr] "Q" (*(u8 *)vxr), + [index] "d" (index), + [v1] "I" (v1) + : "memory"); } -#else /* CONFIG_CC_IS_CLANG */ +#else /* CONFIG_CC_HAS_ASM_AOR_FORMAT_FLAGS */ static __always_inline void fpu_vll(u8 v1, u32 index, const void *vxr) { @@ -264,17 +255,19 @@ static __always_inline void fpu_vll(u8 v1, u32 index, const void *vxr) size = min(index + 1, sizeof(__vector128)); instrument_read(vxr, size); - asm volatile("VLL %[v1],%[index],%O[vxr],%R[vxr]\n" - : - : [vxr] "Q" (*(u8 *)vxr), - [index] "d" (index), - [v1] "I" (v1) - : "memory"); + asm volatile( + " la 1,%[vxr]\n" + " VLL %[v1],%[index],0,1\n" + : + : [vxr] "R" (*(u8 *)vxr), + [index] "d" (index), + [v1] "I" (v1) + : "memory", "1"); } -#endif /* CONFIG_CC_IS_CLANG */ +#endif /* CONFIG_CC_HAS_ASM_AOR_FORMAT_FLAGS */ -#ifdef CONFIG_CC_IS_CLANG +#ifdef CONFIG_CC_HAS_ASM_AOR_FORMAT_FLAGS #define fpu_vlm(_v1, _v3, _vxrs) \ ({ \ @@ -284,17 +277,15 @@ static __always_inline void fpu_vll(u8 v1, u32 index, const void *vxr) } *_v = (void *)(_vxrs); \ \ instrument_read(_v, size); \ - asm volatile("\n" \ - " la 1,%[vxrs]\n" \ - " VLM %[v1],%[v3],0,1\n" \ - : \ - : [vxrs] "R" (*_v), \ - [v1] "I" (_v1), [v3] "I" (_v3) \ - : "memory", "1"); \ + asm volatile("VLM %[v1],%[v3],%O[vxrs],%R[vxrs]\n" \ + : \ + : [vxrs] "Q" (*_v), \ + [v1] "I" (_v1), [v3] "I" (_v3) \ + : "memory"); \ (_v3) - (_v1) + 1; \ }) -#else /* CONFIG_CC_IS_CLANG */ +#else /* CONFIG_CC_HAS_ASM_AOR_FORMAT_FLAGS */ #define fpu_vlm(_v1, _v3, _vxrs) \ ({ \ @@ -304,15 +295,17 @@ static __always_inline void fpu_vll(u8 v1, u32 index, const void *vxr) } *_v = (void *)(_vxrs); \ \ instrument_read(_v, size); \ - asm volatile("VLM %[v1],%[v3],%O[vxrs],%R[vxrs]\n" \ - : \ - : [vxrs] "Q" (*_v), \ - [v1] "I" (_v1), [v3] "I" (_v3) \ - : "memory"); \ + asm volatile( \ + " la 1,%[vxrs]\n" \ + " VLM %[v1],%[v3],0,1\n" \ + : \ + : [vxrs] "R" (*_v), \ + [v1] "I" (_v1), [v3] "I" (_v3) \ + : "memory", "1"); \ (_v3) - (_v1) + 1; \ }) -#endif /* CONFIG_CC_IS_CLANG */ +#endif /* CONFIG_CC_HAS_ASM_AOR_FORMAT_FLAGS */ static __always_inline void fpu_vlr(u8 v1, u8 v2) { @@ -362,33 +355,33 @@ static __always_inline void fpu_vsrlb(u8 v1, u8 v2, u8 v3) : "memory"); } -#ifdef CONFIG_CC_IS_CLANG +#ifdef CONFIG_CC_HAS_ASM_AOR_FORMAT_FLAGS static __always_inline void fpu_vst(u8 v1, const void *vxr) { instrument_write(vxr, sizeof(__vector128)); - asm volatile("\n" - " la 1,%[vxr]\n" - " VST %[v1],0,,1\n" - : [vxr] "=R" (*(__vector128 *)vxr) - : [v1] "I" (v1) - : "memory", "1"); + asm volatile("VST %[v1],%O[vxr],,%R[vxr]\n" + : [vxr] "=Q" (*(__vector128 *)vxr) + : [v1] "I" (v1) + : "memory"); } -#else /* CONFIG_CC_IS_CLANG */ +#else /* CONFIG_CC_HAS_ASM_AOR_FORMAT_FLAGS */ static __always_inline void fpu_vst(u8 v1, const void *vxr) { instrument_write(vxr, sizeof(__vector128)); - asm volatile("VST %[v1],%O[vxr],,%R[vxr]\n" - : [vxr] "=Q" (*(__vector128 *)vxr) - : [v1] "I" (v1) - : "memory"); + asm volatile( + " la 1,%[vxr]\n" + " VST %[v1],0,,1\n" + : [vxr] "=R" (*(__vector128 *)vxr) + : [v1] "I" (v1) + : "memory", "1"); } -#endif /* CONFIG_CC_IS_CLANG */ +#endif /* CONFIG_CC_HAS_ASM_AOR_FORMAT_FLAGS */ -#ifdef CONFIG_CC_IS_CLANG +#ifdef CONFIG_CC_HAS_ASM_AOR_FORMAT_FLAGS static __always_inline void fpu_vstl(u8 v1, u32 index, const void *vxr) { @@ -396,15 +389,13 @@ static __always_inline void fpu_vstl(u8 v1, u32 index, const void *vxr) size = min(index + 1, sizeof(__vector128)); instrument_write(vxr, size); - asm volatile("\n" - " la 1,%[vxr]\n" - " VSTL %[v1],%[index],0,1\n" - : [vxr] "=R" (*(u8 *)vxr) - : [index] "d" (index), [v1] "I" (v1) - : "memory", "1"); + asm volatile("VSTL %[v1],%[index],%O[vxr],%R[vxr]\n" + : [vxr] "=Q" (*(u8 *)vxr) + : [index] "d" (index), [v1] "I" (v1) + : "memory"); } -#else /* CONFIG_CC_IS_CLANG */ +#else /* CONFIG_CC_HAS_ASM_AOR_FORMAT_FLAGS */ static __always_inline void fpu_vstl(u8 v1, u32 index, const void *vxr) { @@ -412,15 +403,17 @@ static __always_inline void fpu_vstl(u8 v1, u32 index, const void *vxr) size = min(index + 1, sizeof(__vector128)); instrument_write(vxr, size); - asm volatile("VSTL %[v1],%[index],%O[vxr],%R[vxr]\n" - : [vxr] "=Q" (*(u8 *)vxr) - : [index] "d" (index), [v1] "I" (v1) - : "memory"); + asm volatile( + " la 1,%[vxr]\n" + " VSTL %[v1],%[index],0,1\n" + : [vxr] "=R" (*(u8 *)vxr) + : [index] "d" (index), [v1] "I" (v1) + : "memory", "1"); } -#endif /* CONFIG_CC_IS_CLANG */ +#endif /* CONFIG_CC_HAS_ASM_AOR_FORMAT_FLAGS */ -#ifdef CONFIG_CC_IS_CLANG +#ifdef CONFIG_CC_HAS_ASM_AOR_FORMAT_FLAGS #define fpu_vstm(_v1, _v3, _vxrs) \ ({ \ @@ -430,16 +423,14 @@ static __always_inline void fpu_vstl(u8 v1, u32 index, const void *vxr) } *_v = (void *)(_vxrs); \ \ instrument_write(_v, size); \ - asm volatile("\n" \ - " la 1,%[vxrs]\n" \ - " VSTM %[v1],%[v3],0,1\n" \ - : [vxrs] "=R" (*_v) \ - : [v1] "I" (_v1), [v3] "I" (_v3) \ - : "memory", "1"); \ + asm volatile("VSTM %[v1],%[v3],%O[vxrs],%R[vxrs]\n" \ + : [vxrs] "=Q" (*_v) \ + : [v1] "I" (_v1), [v3] "I" (_v3) \ + : "memory"); \ (_v3) - (_v1) + 1; \ }) -#else /* CONFIG_CC_IS_CLANG */ +#else /* CONFIG_CC_HAS_ASM_AOR_FORMAT_FLAGS */ #define fpu_vstm(_v1, _v3, _vxrs) \ ({ \ @@ -449,14 +440,16 @@ static __always_inline void fpu_vstl(u8 v1, u32 index, const void *vxr) } *_v = (void *)(_vxrs); \ \ instrument_write(_v, size); \ - asm volatile("VSTM %[v1],%[v3],%O[vxrs],%R[vxrs]\n" \ - : [vxrs] "=Q" (*_v) \ - : [v1] "I" (_v1), [v3] "I" (_v3) \ - : "memory"); \ + asm volatile( \ + " la 1,%[vxrs]\n" \ + " VSTM %[v1],%[v3],0,1\n" \ + : [vxrs] "=R" (*_v) \ + : [v1] "I" (_v1), [v3] "I" (_v3) \ + : "memory", "1"); \ (_v3) - (_v1) + 1; \ }) -#endif /* CONFIG_CC_IS_CLANG */ +#endif /* CONFIG_CC_HAS_ASM_AOR_FORMAT_FLAGS */ static __always_inline void fpu_vupllf(u8 v1, u8 v2) { @@ -482,5 +475,5 @@ static __always_inline void fpu_vzero(u8 v) : "memory"); } -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* __ASM_S390_FPU_INSN_H */ diff --git a/arch/s390/include/asm/fpu.h b/arch/s390/include/asm/fpu.h index c84cb33913e2..960c6c67ad6c 100644 --- a/arch/s390/include/asm/fpu.h +++ b/arch/s390/include/asm/fpu.h @@ -44,6 +44,7 @@ #ifndef _ASM_S390_FPU_H #define _ASM_S390_FPU_H +#include <linux/cpufeature.h> #include <linux/processor.h> #include <linux/preempt.h> #include <linux/string.h> @@ -51,12 +52,6 @@ #include <asm/sigcontext.h> #include <asm/fpu-types.h> #include <asm/fpu-insn.h> -#include <asm/facility.h> - -static inline bool cpu_has_vx(void) -{ - return likely(test_facility(129)); -} enum { KERNEL_FPC_BIT = 0, diff --git a/arch/s390/include/asm/ftrace.h b/arch/s390/include/asm/ftrace.h index fc97d75dc752..bee2d16c2951 100644 --- a/arch/s390/include/asm/ftrace.h +++ b/arch/s390/include/asm/ftrace.h @@ -5,7 +5,7 @@ #define ARCH_SUPPORTS_FTRACE_OPS 1 #define MCOUNT_INSN_SIZE 6 -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <asm/stacktrace.h> static __always_inline unsigned long return_address(unsigned int n) @@ -39,6 +39,7 @@ struct dyn_arch_ftrace { }; struct module; struct dyn_ftrace; +struct ftrace_ops; bool ftrace_need_init_nop(void); #define ftrace_need_init_nop ftrace_need_init_nop @@ -50,6 +51,7 @@ static inline unsigned long ftrace_call_adjust(unsigned long addr) { return addr; } +#define ftrace_get_symaddr(fentry_ip) ((unsigned long)(fentry_ip)) #include <linux/ftrace_regs.h> @@ -62,30 +64,32 @@ static __always_inline struct pt_regs *arch_ftrace_get_regs(struct ftrace_regs * return NULL; } -#ifdef CONFIG_FUNCTION_GRAPH_TRACER -struct fgraph_ret_regs { - unsigned long gpr2; - unsigned long fp; -}; - -static __always_inline unsigned long fgraph_ret_regs_return_value(struct fgraph_ret_regs *ret_regs) +static __always_inline void +ftrace_regs_set_instruction_pointer(struct ftrace_regs *fregs, + unsigned long ip) { - return ret_regs->gpr2; + arch_ftrace_regs(fregs)->regs.psw.addr = ip; } -static __always_inline unsigned long fgraph_ret_regs_frame_pointer(struct fgraph_ret_regs *ret_regs) +#undef ftrace_regs_get_frame_pointer +static __always_inline unsigned long +ftrace_regs_get_frame_pointer(struct ftrace_regs *fregs) { - return ret_regs->fp; + return ftrace_regs_get_stack_pointer(fregs); } -#endif /* CONFIG_FUNCTION_GRAPH_TRACER */ -static __always_inline void -ftrace_regs_set_instruction_pointer(struct ftrace_regs *fregs, - unsigned long ip) +static __always_inline unsigned long +ftrace_regs_get_return_address(const struct ftrace_regs *fregs) { - arch_ftrace_regs(fregs)->regs.psw.addr = ip; + return arch_ftrace_regs(fregs)->regs.gprs[14]; } +#define arch_ftrace_fill_perf_regs(fregs, _regs) do { \ + (_regs)->psw.mask = 0; \ + (_regs)->psw.addr = arch_ftrace_regs(fregs)->regs.psw.addr; \ + (_regs)->gprs[15] = arch_ftrace_regs(fregs)->regs.gprs[15]; \ + } while (0) + #ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS /* * When an ftrace registered caller is tracing a function that is @@ -126,7 +130,11 @@ static inline bool arch_syscall_match_sym_name(const char *sym, return !strcmp(sym + 7, name) || !strcmp(sym + 8, name); } -#endif /* __ASSEMBLY__ */ +void ftrace_graph_func(unsigned long ip, unsigned long parent_ip, + struct ftrace_ops *op, struct ftrace_regs *fregs); +#define ftrace_graph_func ftrace_graph_func + +#endif /* __ASSEMBLER__ */ #ifdef CONFIG_FUNCTION_TRACER diff --git a/arch/s390/include/asm/futex.h b/arch/s390/include/asm/futex.h index eaeaeb3ff0be..942f21c39697 100644 --- a/arch/s390/include/asm/futex.h +++ b/arch/s390/include/asm/futex.h @@ -2,80 +2,101 @@ #ifndef _ASM_S390_FUTEX_H #define _ASM_S390_FUTEX_H +#include <linux/instrumented.h> #include <linux/uaccess.h> #include <linux/futex.h> #include <asm/asm-extable.h> #include <asm/mmu_context.h> #include <asm/errno.h> -#define __futex_atomic_op(insn, ret, oldval, newval, uaddr, oparg) \ - asm volatile( \ - " sacf 256\n" \ - "0: l %1,0(%6)\n" \ - "1:"insn \ - "2: cs %1,%2,0(%6)\n" \ - "3: jl 1b\n" \ - " lhi %0,0\n" \ - "4: sacf 768\n" \ - EX_TABLE(0b,4b) EX_TABLE(1b,4b) \ - EX_TABLE(2b,4b) EX_TABLE(3b,4b) \ - : "=d" (ret), "=&d" (oldval), "=&d" (newval), \ - "=m" (*uaddr) \ - : "0" (-EFAULT), "d" (oparg), "a" (uaddr), \ - "m" (*uaddr) : "cc"); +#define FUTEX_OP_FUNC(name, insn) \ +static uaccess_kmsan_or_inline int \ +__futex_atomic_##name(int oparg, int *old, u32 __user *uaddr) \ +{ \ + bool sacf_flag; \ + int rc, new; \ + \ + instrument_copy_from_user_before(old, uaddr, sizeof(*old)); \ + sacf_flag = enable_sacf_uaccess(); \ + asm_inline volatile( \ + " sacf 256\n" \ + "0: l %[old],%[uaddr]\n" \ + "1:"insn \ + "2: cs %[old],%[new],%[uaddr]\n" \ + "3: jl 1b\n" \ + " lhi %[rc],0\n" \ + "4: sacf 768\n" \ + EX_TABLE_UA_FAULT(0b, 4b, %[rc]) \ + EX_TABLE_UA_FAULT(1b, 4b, %[rc]) \ + EX_TABLE_UA_FAULT(2b, 4b, %[rc]) \ + EX_TABLE_UA_FAULT(3b, 4b, %[rc]) \ + : [rc] "=d" (rc), [old] "=&d" (*old), \ + [new] "=&d" (new), [uaddr] "+Q" (*uaddr) \ + : [oparg] "d" (oparg) \ + : "cc"); \ + disable_sacf_uaccess(sacf_flag); \ + if (!rc) \ + instrument_copy_from_user_after(old, uaddr, sizeof(*old), 0); \ + return rc; \ +} + +FUTEX_OP_FUNC(set, "lr %[new],%[oparg]\n") +FUTEX_OP_FUNC(add, "lr %[new],%[old]\n ar %[new],%[oparg]\n") +FUTEX_OP_FUNC(or, "lr %[new],%[old]\n or %[new],%[oparg]\n") +FUTEX_OP_FUNC(and, "lr %[new],%[old]\n nr %[new],%[oparg]\n") +FUTEX_OP_FUNC(xor, "lr %[new],%[old]\n xr %[new],%[oparg]\n") -static inline int arch_futex_atomic_op_inuser(int op, int oparg, int *oval, - u32 __user *uaddr) +static inline +int arch_futex_atomic_op_inuser(int op, int oparg, int *oval, u32 __user *uaddr) { - int oldval = 0, newval, ret; + int old, rc; switch (op) { case FUTEX_OP_SET: - __futex_atomic_op("lr %2,%5\n", - ret, oldval, newval, uaddr, oparg); + rc = __futex_atomic_set(oparg, &old, uaddr); break; case FUTEX_OP_ADD: - __futex_atomic_op("lr %2,%1\nar %2,%5\n", - ret, oldval, newval, uaddr, oparg); + rc = __futex_atomic_add(oparg, &old, uaddr); break; case FUTEX_OP_OR: - __futex_atomic_op("lr %2,%1\nor %2,%5\n", - ret, oldval, newval, uaddr, oparg); + rc = __futex_atomic_or(oparg, &old, uaddr); break; case FUTEX_OP_ANDN: - __futex_atomic_op("lr %2,%1\nnr %2,%5\n", - ret, oldval, newval, uaddr, oparg); + rc = __futex_atomic_and(~oparg, &old, uaddr); break; case FUTEX_OP_XOR: - __futex_atomic_op("lr %2,%1\nxr %2,%5\n", - ret, oldval, newval, uaddr, oparg); + rc = __futex_atomic_xor(oparg, &old, uaddr); break; default: - ret = -ENOSYS; + rc = -ENOSYS; } - - if (!ret) - *oval = oldval; - - return ret; + if (!rc) + *oval = old; + return rc; } -static inline int futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, - u32 oldval, u32 newval) +static uaccess_kmsan_or_inline +int futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, u32 oldval, u32 newval) { - int ret; + bool sacf_flag; + int rc; - asm volatile( - " sacf 256\n" - "0: cs %1,%4,0(%5)\n" - "1: la %0,0\n" - "2: sacf 768\n" - EX_TABLE(0b,2b) EX_TABLE(1b,2b) - : "=d" (ret), "+d" (oldval), "=m" (*uaddr) - : "0" (-EFAULT), "d" (newval), "a" (uaddr), "m" (*uaddr) + instrument_copy_from_user_before(uval, uaddr, sizeof(*uval)); + sacf_flag = enable_sacf_uaccess(); + asm_inline volatile( + " sacf 256\n" + "0: cs %[old],%[new],%[uaddr]\n" + "1: lhi %[rc],0\n" + "2: sacf 768\n" + EX_TABLE_UA_FAULT(0b, 2b, %[rc]) + EX_TABLE_UA_FAULT(1b, 2b, %[rc]) + : [rc] "=d" (rc), [old] "+d" (oldval), [uaddr] "+Q" (*uaddr) + : [new] "d" (newval) : "cc", "memory"); + disable_sacf_uaccess(sacf_flag); *uval = oldval; - return ret; + instrument_copy_from_user_after(uval, uaddr, sizeof(*uval), 0); + return rc; } #endif /* _ASM_S390_FUTEX_H */ diff --git a/arch/s390/include/asm/gmap.h b/arch/s390/include/asm/gmap.h index 13f51a6a5bb1..66c5808fd011 100644 --- a/arch/s390/include/asm/gmap.h +++ b/arch/s390/include/asm/gmap.h @@ -23,7 +23,6 @@ /** * struct gmap_struct - guest address space * @list: list head for the mm->context gmap list - * @crst_list: list of all crst tables used in the guest address space * @mm: pointer to the parent mm_struct * @guest_to_host: radix tree with guest to host address translation * @host_to_guest: radix tree with pointer to segment table entries @@ -35,7 +34,6 @@ * @guest_handle: protected virtual machine handle for the ultravisor * @host_to_rmap: radix tree with gmap_rmap lists * @children: list of shadow gmap structures - * @pt_list: list of all page tables used in the shadow guest address space * @shadow_lock: spinlock to protect the shadow gmap list * @parent: pointer to the parent gmap for shadow guest address spaces * @orig_asce: ASCE for which the shadow page table has been created @@ -45,7 +43,6 @@ */ struct gmap { struct list_head list; - struct list_head crst_list; struct mm_struct *mm; struct radix_tree_root guest_to_host; struct radix_tree_root host_to_guest; @@ -61,7 +58,6 @@ struct gmap { /* Additional data for shadow guest address spaces */ struct radix_tree_root host_to_rmap; struct list_head children; - struct list_head pt_list; spinlock_t shadow_lock; struct gmap *parent; unsigned long orig_asce; @@ -106,23 +102,20 @@ struct gmap *gmap_create(struct mm_struct *mm, unsigned long limit); void gmap_remove(struct gmap *gmap); struct gmap *gmap_get(struct gmap *gmap); void gmap_put(struct gmap *gmap); +void gmap_free(struct gmap *gmap); +struct gmap *gmap_alloc(unsigned long limit); int gmap_map_segment(struct gmap *gmap, unsigned long from, unsigned long to, unsigned long len); int gmap_unmap_segment(struct gmap *gmap, unsigned long to, unsigned long len); unsigned long __gmap_translate(struct gmap *, unsigned long gaddr); -unsigned long gmap_translate(struct gmap *, unsigned long gaddr); int __gmap_link(struct gmap *gmap, unsigned long gaddr, unsigned long vmaddr); -int gmap_fault(struct gmap *, unsigned long gaddr, unsigned int fault_flags); -void gmap_discard(struct gmap *, unsigned long from, unsigned long to); void __gmap_zap(struct gmap *, unsigned long gaddr); void gmap_unlink(struct mm_struct *, unsigned long *table, unsigned long vmaddr); int gmap_read_table(struct gmap *gmap, unsigned long gaddr, unsigned long *val); -struct gmap *gmap_shadow(struct gmap *parent, unsigned long asce, - int edat_level); -int gmap_shadow_valid(struct gmap *sg, unsigned long asce, int edat_level); +void gmap_unshadow(struct gmap *sg); int gmap_shadow_r2t(struct gmap *sg, unsigned long saddr, unsigned long r2t, int fake); int gmap_shadow_r3t(struct gmap *sg, unsigned long saddr, unsigned long r3t, @@ -131,24 +124,20 @@ int gmap_shadow_sgt(struct gmap *sg, unsigned long saddr, unsigned long sgt, int fake); int gmap_shadow_pgt(struct gmap *sg, unsigned long saddr, unsigned long pgt, int fake); -int gmap_shadow_pgt_lookup(struct gmap *sg, unsigned long saddr, - unsigned long *pgt, int *dat_protection, int *fake); int gmap_shadow_page(struct gmap *sg, unsigned long saddr, pte_t pte); void gmap_register_pte_notifier(struct gmap_notifier *); void gmap_unregister_pte_notifier(struct gmap_notifier *); -int gmap_mprotect_notify(struct gmap *, unsigned long start, - unsigned long len, int prot); +int gmap_protect_one(struct gmap *gmap, unsigned long gaddr, int prot, unsigned long bits); void gmap_sync_dirty_log_pmd(struct gmap *gmap, unsigned long dirty_bitmap[4], unsigned long gaddr, unsigned long vmaddr); -int s390_disable_cow_sharing(void); -void s390_unlist_old_asce(struct gmap *gmap); int s390_replace_asce(struct gmap *gmap); void s390_uv_destroy_pfns(unsigned long count, unsigned long *pfns); int __s390_uv_destroy_range(struct mm_struct *mm, unsigned long start, unsigned long end, bool interruptible); +unsigned long *gmap_table_walk(struct gmap *gmap, unsigned long gaddr, int level); /** * s390_uv_destroy_range - Destroy a range of pages in the given mm. diff --git a/arch/s390/include/asm/gmap_helpers.h b/arch/s390/include/asm/gmap_helpers.h new file mode 100644 index 000000000000..5356446a61c4 --- /dev/null +++ b/arch/s390/include/asm/gmap_helpers.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Helper functions for KVM guest address space mapping code + * + * Copyright IBM Corp. 2025 + */ + +#ifndef _ASM_S390_GMAP_HELPERS_H +#define _ASM_S390_GMAP_HELPERS_H + +void gmap_helper_zap_one_page(struct mm_struct *mm, unsigned long vmaddr); +void gmap_helper_discard(struct mm_struct *mm, unsigned long vmaddr, unsigned long end); +int gmap_helper_disable_cow_sharing(void); + +#endif /* _ASM_S390_GMAP_HELPERS_H */ diff --git a/arch/s390/include/asm/hugetlb.h b/arch/s390/include/asm/hugetlb.h index a40664b236e9..931fcc413598 100644 --- a/arch/s390/include/asm/hugetlb.h +++ b/arch/s390/include/asm/hugetlb.h @@ -9,23 +9,33 @@ #ifndef _ASM_S390_HUGETLB_H #define _ASM_S390_HUGETLB_H +#include <linux/cpufeature.h> #include <linux/pgtable.h> #include <linux/swap.h> #include <linux/swapops.h> #include <asm/page.h> -#define hugepages_supported() (MACHINE_HAS_EDAT1) +#define hugepages_supported() cpu_has_edat1() #define __HAVE_ARCH_HUGE_SET_HUGE_PTE_AT void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte, unsigned long sz); void __set_huge_pte_at(struct mm_struct *mm, unsigned long addr, - pte_t *ptep, pte_t pte); + pte_t *ptep, pte_t pte); + #define __HAVE_ARCH_HUGE_PTEP_GET -extern pte_t huge_ptep_get(struct mm_struct *mm, unsigned long addr, pte_t *ptep); +pte_t huge_ptep_get(struct mm_struct *mm, unsigned long addr, pte_t *ptep); + +pte_t __huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, + pte_t *ptep); + #define __HAVE_ARCH_HUGE_PTEP_GET_AND_CLEAR -extern pte_t huge_ptep_get_and_clear(struct mm_struct *mm, - unsigned long addr, pte_t *ptep); +static inline pte_t huge_ptep_get_and_clear(struct mm_struct *mm, + unsigned long addr, pte_t *ptep, + unsigned long sz) +{ + return __huge_ptep_get_and_clear(mm, addr, ptep); +} static inline void arch_clear_hugetlb_flags(struct folio *folio) { @@ -47,7 +57,7 @@ static inline void huge_pte_clear(struct mm_struct *mm, unsigned long addr, static inline pte_t huge_ptep_clear_flush(struct vm_area_struct *vma, unsigned long address, pte_t *ptep) { - return huge_ptep_get_and_clear(vma->vm_mm, address, ptep); + return __huge_ptep_get_and_clear(vma->vm_mm, address, ptep); } #define __HAVE_ARCH_HUGE_PTEP_SET_ACCESS_FLAGS @@ -56,8 +66,9 @@ static inline int huge_ptep_set_access_flags(struct vm_area_struct *vma, pte_t pte, int dirty) { int changed = !pte_same(huge_ptep_get(vma->vm_mm, addr, ptep), pte); + if (changed) { - huge_ptep_get_and_clear(vma->vm_mm, addr, ptep); + __huge_ptep_get_and_clear(vma->vm_mm, addr, ptep); __set_huge_pte_at(vma->vm_mm, addr, ptep, pte); } return changed; @@ -67,20 +78,9 @@ static inline int huge_ptep_set_access_flags(struct vm_area_struct *vma, static inline void huge_ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep) { - pte_t pte = huge_ptep_get_and_clear(mm, addr, ptep); - __set_huge_pte_at(mm, addr, ptep, pte_wrprotect(pte)); -} - -#define __HAVE_ARCH_HUGE_PTE_NONE -static inline int huge_pte_none(pte_t pte) -{ - return pte_none(pte); -} + pte_t pte = __huge_ptep_get_and_clear(mm, addr, ptep); -#define __HAVE_ARCH_HUGE_PTE_NONE_MOSTLY -static inline int huge_pte_none_mostly(pte_t pte) -{ - return huge_pte_none(pte) || is_pte_marker(pte); + __set_huge_pte_at(mm, addr, ptep, pte_wrprotect(pte)); } #define __HAVE_ARCH_HUGE_PTE_MKUFFD_WP diff --git a/arch/s390/include/asm/io.h b/arch/s390/include/asm/io.h index fc9933a743d6..faddb9aef3b8 100644 --- a/arch/s390/include/asm/io.h +++ b/arch/s390/include/asm/io.h @@ -33,9 +33,7 @@ void unxlate_dev_mem_ptr(phys_addr_t phys, void *addr); #define _PAGE_IOREMAP pgprot_val(PAGE_KERNEL) #define ioremap_wc(addr, size) \ - ioremap_prot((addr), (size), pgprot_val(pgprot_writecombine(PAGE_KERNEL))) -#define ioremap_wt(addr, size) \ - ioremap_prot((addr), (size), pgprot_val(pgprot_writethrough(PAGE_KERNEL))) + ioremap_prot((addr), (size), pgprot_writecombine(PAGE_KERNEL)) static inline void __iomem *ioport_map(unsigned long port, unsigned int nr) { diff --git a/arch/s390/include/asm/irq.h b/arch/s390/include/asm/irq.h index d9e705f4a697..697497e7d13e 100644 --- a/arch/s390/include/asm/irq.h +++ b/arch/s390/include/asm/irq.h @@ -25,7 +25,7 @@ #define EXT_IRQ_CP_SERVICE 0x2603 #define EXT_IRQ_IUCV 0x4000 -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <linux/hardirq.h> #include <linux/percpu.h> @@ -54,7 +54,6 @@ enum interruption_class { IRQIO_C70, IRQIO_TAP, IRQIO_VMR, - IRQIO_LCS, IRQIO_CTC, IRQIO_ADM, IRQIO_CSC, @@ -121,6 +120,6 @@ void irq_subclass_unregister(enum irq_subclass subclass); #define irq_canonicalize(irq) (irq) -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* _ASM_IRQ_H */ diff --git a/arch/s390/include/asm/jump_label.h b/arch/s390/include/asm/jump_label.h index bf78cf381dfc..d9cbc18f6b2e 100644 --- a/arch/s390/include/asm/jump_label.h +++ b/arch/s390/include/asm/jump_label.h @@ -4,7 +4,7 @@ #define HAVE_JUMP_LABEL_BATCH -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <linux/types.h> #include <linux/stringify.h> @@ -51,5 +51,5 @@ label: return true; } -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif diff --git a/arch/s390/include/asm/kfence.h b/arch/s390/include/asm/kfence.h index e47fd8cbe701..e95e35eb8a3f 100644 --- a/arch/s390/include/asm/kfence.h +++ b/arch/s390/include/asm/kfence.h @@ -12,27 +12,16 @@ void __kernel_map_pages(struct page *page, int numpages, int enable); static __always_inline bool arch_kfence_init_pool(void) { - return true; -} - -#define arch_kfence_test_address(addr) ((addr) & PAGE_MASK) - -/* - * Do not split kfence pool to 4k mapping with arch_kfence_init_pool(), - * but earlier where page table allocations still happen with memblock. - * Reason is that arch_kfence_init_pool() gets called when the system - * is still in a limbo state - disabling and enabling bottom halves is - * not yet allowed, but that is what our page_table_alloc() would do. - */ -static __always_inline void kfence_split_mapping(void) -{ #ifdef CONFIG_KFENCE unsigned long pool_pages = KFENCE_POOL_SIZE >> PAGE_SHIFT; set_memory_4k((unsigned long)__kfence_pool, pool_pages); #endif + return true; } +#define arch_kfence_test_address(addr) ((addr) & PAGE_MASK) + static inline bool kfence_protect_page(unsigned long addr, bool protect) { __kernel_map_pages(virt_to_page((void *)addr), 1, !protect); diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index 97c7c8127543..f870d09515cc 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -20,16 +20,17 @@ #include <linux/module.h> #include <linux/pci.h> #include <linux/mmu_notifier.h> +#include <asm/kvm_host_types.h> #include <asm/debug.h> #include <asm/cpu.h> #include <asm/fpu.h> #include <asm/isc.h> #include <asm/guarded_storage.h> -#define KVM_S390_BSCA_CPU_SLOTS 64 -#define KVM_S390_ESCA_CPU_SLOTS 248 #define KVM_MAX_VCPUS 255 +#define KVM_INTERNAL_MEM_SLOTS 1 + /* * These seem to be used for allocating ->chip in the routing table, which we * don't use. 1 is as small as we can get to reduce the needed memory. If we @@ -49,342 +50,6 @@ #define KVM_REQ_REFRESH_GUEST_PREFIX \ KVM_ARCH_REQ_FLAGS(6, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP) -#define SIGP_CTRL_C 0x80 -#define SIGP_CTRL_SCN_MASK 0x3f - -union bsca_sigp_ctrl { - __u8 value; - struct { - __u8 c : 1; - __u8 r : 1; - __u8 scn : 6; - }; -}; - -union esca_sigp_ctrl { - __u16 value; - struct { - __u8 c : 1; - __u8 reserved: 7; - __u8 scn; - }; -}; - -struct esca_entry { - union esca_sigp_ctrl sigp_ctrl; - __u16 reserved1[3]; - __u64 sda; - __u64 reserved2[6]; -}; - -struct bsca_entry { - __u8 reserved0; - union bsca_sigp_ctrl sigp_ctrl; - __u16 reserved[3]; - __u64 sda; - __u64 reserved2[2]; -}; - -union ipte_control { - unsigned long val; - struct { - unsigned long k : 1; - unsigned long kh : 31; - unsigned long kg : 32; - }; -}; - -/* - * Utility is defined as two bytes but having it four bytes wide - * generates more efficient code. Since the following bytes are - * reserved this makes no functional difference. - */ -union sca_utility { - __u32 val; - struct { - __u32 mtcr : 1; - __u32 : 31; - }; -}; - -struct bsca_block { - union ipte_control ipte_control; - __u64 reserved[5]; - __u64 mcn; - union sca_utility utility; - __u8 reserved2[4]; - struct bsca_entry cpu[KVM_S390_BSCA_CPU_SLOTS]; -}; - -struct esca_block { - union ipte_control ipte_control; - __u64 reserved1[6]; - union sca_utility utility; - __u8 reserved2[4]; - __u64 mcn[4]; - __u64 reserved3[20]; - struct esca_entry cpu[KVM_S390_ESCA_CPU_SLOTS]; -}; - -/* - * This struct is used to store some machine check info from lowcore - * for machine checks that happen while the guest is running. - * This info in host's lowcore might be overwritten by a second machine - * check from host when host is in the machine check's high-level handling. - * The size is 24 bytes. - */ -struct mcck_volatile_info { - __u64 mcic; - __u64 failing_storage_address; - __u32 ext_damage_code; - __u32 reserved; -}; - -#define CR0_INITIAL_MASK (CR0_UNUSED_56 | CR0_INTERRUPT_KEY_SUBMASK | \ - CR0_MEASUREMENT_ALERT_SUBMASK) -#define CR14_INITIAL_MASK (CR14_UNUSED_32 | CR14_UNUSED_33 | \ - CR14_EXTERNAL_DAMAGE_SUBMASK) - -#define SIDAD_SIZE_MASK 0xff -#define sida_addr(sie_block) phys_to_virt((sie_block)->sidad & PAGE_MASK) -#define sida_size(sie_block) \ - ((((sie_block)->sidad & SIDAD_SIZE_MASK) + 1) * PAGE_SIZE) - -#define CPUSTAT_STOPPED 0x80000000 -#define CPUSTAT_WAIT 0x10000000 -#define CPUSTAT_ECALL_PEND 0x08000000 -#define CPUSTAT_STOP_INT 0x04000000 -#define CPUSTAT_IO_INT 0x02000000 -#define CPUSTAT_EXT_INT 0x01000000 -#define CPUSTAT_RUNNING 0x00800000 -#define CPUSTAT_RETAINED 0x00400000 -#define CPUSTAT_TIMING_SUB 0x00020000 -#define CPUSTAT_SIE_SUB 0x00010000 -#define CPUSTAT_RRF 0x00008000 -#define CPUSTAT_SLSV 0x00004000 -#define CPUSTAT_SLSR 0x00002000 -#define CPUSTAT_ZARCH 0x00000800 -#define CPUSTAT_MCDS 0x00000100 -#define CPUSTAT_KSS 0x00000200 -#define CPUSTAT_SM 0x00000080 -#define CPUSTAT_IBS 0x00000040 -#define CPUSTAT_GED2 0x00000010 -#define CPUSTAT_G 0x00000008 -#define CPUSTAT_GED 0x00000004 -#define CPUSTAT_J 0x00000002 -#define CPUSTAT_P 0x00000001 - -struct kvm_s390_sie_block { - atomic_t cpuflags; /* 0x0000 */ - __u32 : 1; /* 0x0004 */ - __u32 prefix : 18; - __u32 : 1; - __u32 ibc : 12; - __u8 reserved08[4]; /* 0x0008 */ -#define PROG_IN_SIE (1<<0) - __u32 prog0c; /* 0x000c */ - union { - __u8 reserved10[16]; /* 0x0010 */ - struct { - __u64 pv_handle_cpu; - __u64 pv_handle_config; - }; - }; -#define PROG_BLOCK_SIE (1<<0) -#define PROG_REQUEST (1<<1) - atomic_t prog20; /* 0x0020 */ - __u8 reserved24[4]; /* 0x0024 */ - __u64 cputm; /* 0x0028 */ - __u64 ckc; /* 0x0030 */ - __u64 epoch; /* 0x0038 */ - __u32 svcc; /* 0x0040 */ -#define LCTL_CR0 0x8000 -#define LCTL_CR6 0x0200 -#define LCTL_CR9 0x0040 -#define LCTL_CR10 0x0020 -#define LCTL_CR11 0x0010 -#define LCTL_CR14 0x0002 - __u16 lctl; /* 0x0044 */ - __s16 icpua; /* 0x0046 */ -#define ICTL_OPEREXC 0x80000000 -#define ICTL_PINT 0x20000000 -#define ICTL_LPSW 0x00400000 -#define ICTL_STCTL 0x00040000 -#define ICTL_ISKE 0x00004000 -#define ICTL_SSKE 0x00002000 -#define ICTL_RRBE 0x00001000 -#define ICTL_TPROT 0x00000200 - __u32 ictl; /* 0x0048 */ -#define ECA_CEI 0x80000000 -#define ECA_IB 0x40000000 -#define ECA_SIGPI 0x10000000 -#define ECA_MVPGI 0x01000000 -#define ECA_AIV 0x00200000 -#define ECA_VX 0x00020000 -#define ECA_PROTEXCI 0x00002000 -#define ECA_APIE 0x00000008 -#define ECA_SII 0x00000001 - __u32 eca; /* 0x004c */ -#define ICPT_INST 0x04 -#define ICPT_PROGI 0x08 -#define ICPT_INSTPROGI 0x0C -#define ICPT_EXTREQ 0x10 -#define ICPT_EXTINT 0x14 -#define ICPT_IOREQ 0x18 -#define ICPT_WAIT 0x1c -#define ICPT_VALIDITY 0x20 -#define ICPT_STOP 0x28 -#define ICPT_OPEREXC 0x2C -#define ICPT_PARTEXEC 0x38 -#define ICPT_IOINST 0x40 -#define ICPT_KSS 0x5c -#define ICPT_MCHKREQ 0x60 -#define ICPT_INT_ENABLE 0x64 -#define ICPT_PV_INSTR 0x68 -#define ICPT_PV_NOTIFY 0x6c -#define ICPT_PV_PREF 0x70 - __u8 icptcode; /* 0x0050 */ - __u8 icptstatus; /* 0x0051 */ - __u16 ihcpu; /* 0x0052 */ - __u8 reserved54; /* 0x0054 */ -#define IICTL_CODE_NONE 0x00 -#define IICTL_CODE_MCHK 0x01 -#define IICTL_CODE_EXT 0x02 -#define IICTL_CODE_IO 0x03 -#define IICTL_CODE_RESTART 0x04 -#define IICTL_CODE_SPECIFICATION 0x10 -#define IICTL_CODE_OPERAND 0x11 - __u8 iictl; /* 0x0055 */ - __u16 ipa; /* 0x0056 */ - __u32 ipb; /* 0x0058 */ - __u32 scaoh; /* 0x005c */ -#define FPF_BPBC 0x20 - __u8 fpf; /* 0x0060 */ -#define ECB_GS 0x40 -#define ECB_TE 0x10 -#define ECB_SPECI 0x08 -#define ECB_SRSI 0x04 -#define ECB_HOSTPROTINT 0x02 -#define ECB_PTF 0x01 - __u8 ecb; /* 0x0061 */ -#define ECB2_CMMA 0x80 -#define ECB2_IEP 0x20 -#define ECB2_PFMFI 0x08 -#define ECB2_ESCA 0x04 -#define ECB2_ZPCI_LSI 0x02 - __u8 ecb2; /* 0x0062 */ -#define ECB3_AISI 0x20 -#define ECB3_AISII 0x10 -#define ECB3_DEA 0x08 -#define ECB3_AES 0x04 -#define ECB3_RI 0x01 - __u8 ecb3; /* 0x0063 */ -#define ESCA_SCAOL_MASK ~0x3fU - __u32 scaol; /* 0x0064 */ - __u8 sdf; /* 0x0068 */ - __u8 epdx; /* 0x0069 */ - __u8 cpnc; /* 0x006a */ - __u8 reserved6b; /* 0x006b */ - __u32 todpr; /* 0x006c */ -#define GISA_FORMAT1 0x00000001 - __u32 gd; /* 0x0070 */ - __u8 reserved74[12]; /* 0x0074 */ - __u64 mso; /* 0x0080 */ - __u64 msl; /* 0x0088 */ - psw_t gpsw; /* 0x0090 */ - __u64 gg14; /* 0x00a0 */ - __u64 gg15; /* 0x00a8 */ - __u8 reservedb0[8]; /* 0x00b0 */ -#define HPID_KVM 0x4 -#define HPID_VSIE 0x5 - __u8 hpid; /* 0x00b8 */ - __u8 reservedb9[7]; /* 0x00b9 */ - union { - struct { - __u32 eiparams; /* 0x00c0 */ - __u16 extcpuaddr; /* 0x00c4 */ - __u16 eic; /* 0x00c6 */ - }; - __u64 mcic; /* 0x00c0 */ - } __packed; - __u32 reservedc8; /* 0x00c8 */ - union { - struct { - __u16 pgmilc; /* 0x00cc */ - __u16 iprcc; /* 0x00ce */ - }; - __u32 edc; /* 0x00cc */ - } __packed; - union { - struct { - __u32 dxc; /* 0x00d0 */ - __u16 mcn; /* 0x00d4 */ - __u8 perc; /* 0x00d6 */ - __u8 peratmid; /* 0x00d7 */ - }; - __u64 faddr; /* 0x00d0 */ - } __packed; - __u64 peraddr; /* 0x00d8 */ - __u8 eai; /* 0x00e0 */ - __u8 peraid; /* 0x00e1 */ - __u8 oai; /* 0x00e2 */ - __u8 armid; /* 0x00e3 */ - __u8 reservede4[4]; /* 0x00e4 */ - union { - __u64 tecmc; /* 0x00e8 */ - struct { - __u16 subchannel_id; /* 0x00e8 */ - __u16 subchannel_nr; /* 0x00ea */ - __u32 io_int_parm; /* 0x00ec */ - __u32 io_int_word; /* 0x00f0 */ - }; - } __packed; - __u8 reservedf4[8]; /* 0x00f4 */ -#define CRYCB_FORMAT_MASK 0x00000003 -#define CRYCB_FORMAT0 0x00000000 -#define CRYCB_FORMAT1 0x00000001 -#define CRYCB_FORMAT2 0x00000003 - __u32 crycbd; /* 0x00fc */ - __u64 gcr[16]; /* 0x0100 */ - union { - __u64 gbea; /* 0x0180 */ - __u64 sidad; - }; - __u8 reserved188[8]; /* 0x0188 */ - __u64 sdnxo; /* 0x0190 */ - __u8 reserved198[8]; /* 0x0198 */ - __u32 fac; /* 0x01a0 */ - __u8 reserved1a4[20]; /* 0x01a4 */ - __u64 cbrlo; /* 0x01b8 */ - __u8 reserved1c0[8]; /* 0x01c0 */ -#define ECD_HOSTREGMGMT 0x20000000 -#define ECD_MEF 0x08000000 -#define ECD_ETOKENF 0x02000000 -#define ECD_ECC 0x00200000 -#define ECD_HMAC 0x00004000 - __u32 ecd; /* 0x01c8 */ - __u8 reserved1cc[18]; /* 0x01cc */ - __u64 pp; /* 0x01de */ - __u8 reserved1e6[2]; /* 0x01e6 */ - __u64 itdba; /* 0x01e8 */ - __u64 riccbd; /* 0x01f0 */ - __u64 gvrd; /* 0x01f8 */ -} __packed __aligned(512); - -struct kvm_s390_itdb { - __u8 data[256]; -}; - -struct sie_page { - struct kvm_s390_sie_block sie_block; - struct mcck_volatile_info mcck_info; /* 0x0200 */ - __u8 reserved218[360]; /* 0x0218 */ - __u64 pv_grregs[16]; /* 0x0380 */ - __u8 reserved400[512]; /* 0x0400 */ - struct kvm_s390_itdb itdb; /* 0x0600 */ - __u8 reserved700[2304]; /* 0x0700 */ -}; - struct kvm_vcpu_stat { struct kvm_vcpu_stat_generic generic; u64 exit_userspace; @@ -931,12 +596,14 @@ struct sie_page2 { u8 reserved928[0x1000 - 0x928]; /* 0x0928 */ }; +struct vsie_page; + struct kvm_s390_vsie { struct mutex mutex; struct radix_tree_root addr_to_page; int page_count; int next; - struct page *pages[KVM_MAX_VCPUS]; + struct vsie_page *pages[KVM_MAX_VCPUS]; }; struct kvm_s390_gisa_iam { @@ -1049,10 +716,12 @@ extern char sie_exit; bool kvm_s390_pv_is_protected(struct kvm *kvm); bool kvm_s390_pv_cpu_is_protected(struct kvm_vcpu *vcpu); +extern int kvm_s390_enter_exit_sie(struct kvm_s390_sie_block *scb, + u64 *gprs, unsigned long gasce); + extern int kvm_s390_gisc_register(struct kvm *kvm, u32 gisc); extern int kvm_s390_gisc_unregister(struct kvm *kvm, u32 gisc); -static inline void kvm_arch_sync_events(struct kvm *kvm) {} static inline void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *slot) {} static inline void kvm_arch_memslots_updated(struct kvm *kvm, u64 gen) {} diff --git a/arch/s390/include/asm/kvm_host_types.h b/arch/s390/include/asm/kvm_host_types.h new file mode 100644 index 000000000000..1394d3fb648f --- /dev/null +++ b/arch/s390/include/asm/kvm_host_types.h @@ -0,0 +1,348 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef _ASM_KVM_HOST_TYPES_H +#define _ASM_KVM_HOST_TYPES_H + +#include <linux/atomic.h> +#include <linux/types.h> + +#define KVM_S390_BSCA_CPU_SLOTS 64 +#define KVM_S390_ESCA_CPU_SLOTS 248 + +#define SIGP_CTRL_C 0x80 +#define SIGP_CTRL_SCN_MASK 0x3f + +union bsca_sigp_ctrl { + __u8 value; + struct { + __u8 c : 1; + __u8 r : 1; + __u8 scn : 6; + }; +}; + +union esca_sigp_ctrl { + __u16 value; + struct { + __u8 c : 1; + __u8 reserved: 7; + __u8 scn; + }; +}; + +struct esca_entry { + union esca_sigp_ctrl sigp_ctrl; + __u16 reserved1[3]; + __u64 sda; + __u64 reserved2[6]; +}; + +struct bsca_entry { + __u8 reserved0; + union bsca_sigp_ctrl sigp_ctrl; + __u16 reserved[3]; + __u64 sda; + __u64 reserved2[2]; +}; + +union ipte_control { + unsigned long val; + struct { + unsigned long k : 1; + unsigned long kh : 31; + unsigned long kg : 32; + }; +}; + +/* + * Utility is defined as two bytes but having it four bytes wide + * generates more efficient code. Since the following bytes are + * reserved this makes no functional difference. + */ +union sca_utility { + __u32 val; + struct { + __u32 mtcr : 1; + __u32 : 31; + }; +}; + +struct bsca_block { + union ipte_control ipte_control; + __u64 reserved[5]; + __u64 mcn; + union sca_utility utility; + __u8 reserved2[4]; + struct bsca_entry cpu[KVM_S390_BSCA_CPU_SLOTS]; +}; + +struct esca_block { + union ipte_control ipte_control; + __u64 reserved1[6]; + union sca_utility utility; + __u8 reserved2[4]; + __u64 mcn[4]; + __u64 reserved3[20]; + struct esca_entry cpu[KVM_S390_ESCA_CPU_SLOTS]; +}; + +/* + * This struct is used to store some machine check info from lowcore + * for machine checks that happen while the guest is running. + * This info in host's lowcore might be overwritten by a second machine + * check from host when host is in the machine check's high-level handling. + * The size is 24 bytes. + */ +struct mcck_volatile_info { + __u64 mcic; + __u64 failing_storage_address; + __u32 ext_damage_code; + __u32 reserved; +}; + +#define CR0_INITIAL_MASK (CR0_UNUSED_56 | CR0_INTERRUPT_KEY_SUBMASK | \ + CR0_MEASUREMENT_ALERT_SUBMASK) +#define CR14_INITIAL_MASK (CR14_UNUSED_32 | CR14_UNUSED_33 | \ + CR14_EXTERNAL_DAMAGE_SUBMASK) + +#define SIDAD_SIZE_MASK 0xff +#define sida_addr(sie_block) phys_to_virt((sie_block)->sidad & PAGE_MASK) +#define sida_size(sie_block) \ + ((((sie_block)->sidad & SIDAD_SIZE_MASK) + 1) * PAGE_SIZE) + +#define CPUSTAT_STOPPED 0x80000000 +#define CPUSTAT_WAIT 0x10000000 +#define CPUSTAT_ECALL_PEND 0x08000000 +#define CPUSTAT_STOP_INT 0x04000000 +#define CPUSTAT_IO_INT 0x02000000 +#define CPUSTAT_EXT_INT 0x01000000 +#define CPUSTAT_RUNNING 0x00800000 +#define CPUSTAT_RETAINED 0x00400000 +#define CPUSTAT_TIMING_SUB 0x00020000 +#define CPUSTAT_SIE_SUB 0x00010000 +#define CPUSTAT_RRF 0x00008000 +#define CPUSTAT_SLSV 0x00004000 +#define CPUSTAT_SLSR 0x00002000 +#define CPUSTAT_ZARCH 0x00000800 +#define CPUSTAT_MCDS 0x00000100 +#define CPUSTAT_KSS 0x00000200 +#define CPUSTAT_SM 0x00000080 +#define CPUSTAT_IBS 0x00000040 +#define CPUSTAT_GED2 0x00000010 +#define CPUSTAT_G 0x00000008 +#define CPUSTAT_GED 0x00000004 +#define CPUSTAT_J 0x00000002 +#define CPUSTAT_P 0x00000001 + +struct kvm_s390_sie_block { + atomic_t cpuflags; /* 0x0000 */ + __u32 : 1; /* 0x0004 */ + __u32 prefix : 18; + __u32 : 1; + __u32 ibc : 12; + __u8 reserved08[4]; /* 0x0008 */ +#define PROG_IN_SIE (1<<0) + __u32 prog0c; /* 0x000c */ + union { + __u8 reserved10[16]; /* 0x0010 */ + struct { + __u64 pv_handle_cpu; + __u64 pv_handle_config; + }; + }; +#define PROG_BLOCK_SIE (1<<0) +#define PROG_REQUEST (1<<1) + atomic_t prog20; /* 0x0020 */ + __u8 reserved24[4]; /* 0x0024 */ + __u64 cputm; /* 0x0028 */ + __u64 ckc; /* 0x0030 */ + __u64 epoch; /* 0x0038 */ + __u32 svcc; /* 0x0040 */ +#define LCTL_CR0 0x8000 +#define LCTL_CR6 0x0200 +#define LCTL_CR9 0x0040 +#define LCTL_CR10 0x0020 +#define LCTL_CR11 0x0010 +#define LCTL_CR14 0x0002 + __u16 lctl; /* 0x0044 */ + __s16 icpua; /* 0x0046 */ +#define ICTL_OPEREXC 0x80000000 +#define ICTL_PINT 0x20000000 +#define ICTL_LPSW 0x00400000 +#define ICTL_STCTL 0x00040000 +#define ICTL_ISKE 0x00004000 +#define ICTL_SSKE 0x00002000 +#define ICTL_RRBE 0x00001000 +#define ICTL_TPROT 0x00000200 + __u32 ictl; /* 0x0048 */ +#define ECA_CEI 0x80000000 +#define ECA_IB 0x40000000 +#define ECA_SIGPI 0x10000000 +#define ECA_MVPGI 0x01000000 +#define ECA_AIV 0x00200000 +#define ECA_VX 0x00020000 +#define ECA_PROTEXCI 0x00002000 +#define ECA_APIE 0x00000008 +#define ECA_SII 0x00000001 + __u32 eca; /* 0x004c */ +#define ICPT_INST 0x04 +#define ICPT_PROGI 0x08 +#define ICPT_INSTPROGI 0x0C +#define ICPT_EXTREQ 0x10 +#define ICPT_EXTINT 0x14 +#define ICPT_IOREQ 0x18 +#define ICPT_WAIT 0x1c +#define ICPT_VALIDITY 0x20 +#define ICPT_STOP 0x28 +#define ICPT_OPEREXC 0x2C +#define ICPT_PARTEXEC 0x38 +#define ICPT_IOINST 0x40 +#define ICPT_KSS 0x5c +#define ICPT_MCHKREQ 0x60 +#define ICPT_INT_ENABLE 0x64 +#define ICPT_PV_INSTR 0x68 +#define ICPT_PV_NOTIFY 0x6c +#define ICPT_PV_PREF 0x70 + __u8 icptcode; /* 0x0050 */ + __u8 icptstatus; /* 0x0051 */ + __u16 ihcpu; /* 0x0052 */ + __u8 reserved54; /* 0x0054 */ +#define IICTL_CODE_NONE 0x00 +#define IICTL_CODE_MCHK 0x01 +#define IICTL_CODE_EXT 0x02 +#define IICTL_CODE_IO 0x03 +#define IICTL_CODE_RESTART 0x04 +#define IICTL_CODE_SPECIFICATION 0x10 +#define IICTL_CODE_OPERAND 0x11 + __u8 iictl; /* 0x0055 */ + __u16 ipa; /* 0x0056 */ + __u32 ipb; /* 0x0058 */ + __u32 scaoh; /* 0x005c */ +#define FPF_BPBC 0x20 + __u8 fpf; /* 0x0060 */ +#define ECB_GS 0x40 +#define ECB_TE 0x10 +#define ECB_SPECI 0x08 +#define ECB_SRSI 0x04 +#define ECB_HOSTPROTINT 0x02 +#define ECB_PTF 0x01 + __u8 ecb; /* 0x0061 */ +#define ECB2_CMMA 0x80 +#define ECB2_IEP 0x20 +#define ECB2_PFMFI 0x08 +#define ECB2_ESCA 0x04 +#define ECB2_ZPCI_LSI 0x02 + __u8 ecb2; /* 0x0062 */ +#define ECB3_AISI 0x20 +#define ECB3_AISII 0x10 +#define ECB3_DEA 0x08 +#define ECB3_AES 0x04 +#define ECB3_RI 0x01 + __u8 ecb3; /* 0x0063 */ +#define ESCA_SCAOL_MASK ~0x3fU + __u32 scaol; /* 0x0064 */ + __u8 sdf; /* 0x0068 */ + __u8 epdx; /* 0x0069 */ + __u8 cpnc; /* 0x006a */ + __u8 reserved6b; /* 0x006b */ + __u32 todpr; /* 0x006c */ +#define GISA_FORMAT1 0x00000001 + __u32 gd; /* 0x0070 */ + __u8 reserved74[12]; /* 0x0074 */ + __u64 mso; /* 0x0080 */ + __u64 msl; /* 0x0088 */ + psw_t gpsw; /* 0x0090 */ + __u64 gg14; /* 0x00a0 */ + __u64 gg15; /* 0x00a8 */ + __u8 reservedb0[8]; /* 0x00b0 */ +#define HPID_KVM 0x4 +#define HPID_VSIE 0x5 + __u8 hpid; /* 0x00b8 */ + __u8 reservedb9[7]; /* 0x00b9 */ + union { + struct { + __u32 eiparams; /* 0x00c0 */ + __u16 extcpuaddr; /* 0x00c4 */ + __u16 eic; /* 0x00c6 */ + }; + __u64 mcic; /* 0x00c0 */ + } __packed; + __u32 reservedc8; /* 0x00c8 */ + union { + struct { + __u16 pgmilc; /* 0x00cc */ + __u16 iprcc; /* 0x00ce */ + }; + __u32 edc; /* 0x00cc */ + } __packed; + union { + struct { + __u32 dxc; /* 0x00d0 */ + __u16 mcn; /* 0x00d4 */ + __u8 perc; /* 0x00d6 */ + __u8 peratmid; /* 0x00d7 */ + }; + __u64 faddr; /* 0x00d0 */ + } __packed; + __u64 peraddr; /* 0x00d8 */ + __u8 eai; /* 0x00e0 */ + __u8 peraid; /* 0x00e1 */ + __u8 oai; /* 0x00e2 */ + __u8 armid; /* 0x00e3 */ + __u8 reservede4[4]; /* 0x00e4 */ + union { + __u64 tecmc; /* 0x00e8 */ + struct { + __u16 subchannel_id; /* 0x00e8 */ + __u16 subchannel_nr; /* 0x00ea */ + __u32 io_int_parm; /* 0x00ec */ + __u32 io_int_word; /* 0x00f0 */ + }; + } __packed; + __u8 reservedf4[8]; /* 0x00f4 */ +#define CRYCB_FORMAT_MASK 0x00000003 +#define CRYCB_FORMAT0 0x00000000 +#define CRYCB_FORMAT1 0x00000001 +#define CRYCB_FORMAT2 0x00000003 + __u32 crycbd; /* 0x00fc */ + __u64 gcr[16]; /* 0x0100 */ + union { + __u64 gbea; /* 0x0180 */ + __u64 sidad; + }; + __u8 reserved188[8]; /* 0x0188 */ + __u64 sdnxo; /* 0x0190 */ + __u8 reserved198[8]; /* 0x0198 */ + __u32 fac; /* 0x01a0 */ + __u8 reserved1a4[20]; /* 0x01a4 */ + __u64 cbrlo; /* 0x01b8 */ + __u8 reserved1c0[8]; /* 0x01c0 */ +#define ECD_HOSTREGMGMT 0x20000000 +#define ECD_MEF 0x08000000 +#define ECD_ETOKENF 0x02000000 +#define ECD_ECC 0x00200000 +#define ECD_HMAC 0x00004000 + __u32 ecd; /* 0x01c8 */ + __u8 reserved1cc[18]; /* 0x01cc */ + __u64 pp; /* 0x01de */ + __u8 reserved1e6[2]; /* 0x01e6 */ + __u64 itdba; /* 0x01e8 */ + __u64 riccbd; /* 0x01f0 */ + __u64 gvrd; /* 0x01f8 */ +} __packed __aligned(512); + +struct kvm_s390_itdb { + __u8 data[256]; +}; + +struct sie_page { + struct kvm_s390_sie_block sie_block; + struct mcck_volatile_info mcck_info; /* 0x0200 */ + __u8 reserved218[360]; /* 0x0218 */ + __u64 pv_grregs[16]; /* 0x0380 */ + __u8 reserved400[512]; /* 0x0400 */ + struct kvm_s390_itdb itdb; /* 0x0600 */ + __u8 reserved700[2304]; /* 0x0700 */ +}; + +#endif /* _ASM_KVM_HOST_TYPES_H */ diff --git a/arch/s390/include/asm/lowcore.h b/arch/s390/include/asm/lowcore.h index 42a092fa1029..d9c853db9a40 100644 --- a/arch/s390/include/asm/lowcore.h +++ b/arch/s390/include/asm/lowcore.h @@ -10,6 +10,7 @@ #define _ASM_S390_LOWCORE_H #include <linux/types.h> +#include <asm/machine.h> #include <asm/ptrace.h> #include <asm/ctlreg.h> #include <asm/cpu.h> @@ -21,7 +22,7 @@ #define LOWCORE_ALT_ADDRESS _AC(0x70000, UL) -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ struct pgm_tdb { u64 data[32]; @@ -126,7 +127,7 @@ struct lowcore { __u64 int_clock; /* 0x0318 */ __u8 pad_0x0320[0x0328-0x0320]; /* 0x0320 */ __u64 clock_comparator; /* 0x0328 */ - __u64 boot_clock[2]; /* 0x0330 */ + __u8 pad_0x0330[0x0340-0x0330]; /* 0x0330 */ /* Current process. */ __u64 current_task; /* 0x0340 */ @@ -163,9 +164,7 @@ struct lowcore { __u32 spinlock_index; /* 0x03b0 */ __u8 pad_0x03b4[0x03b8-0x03b4]; /* 0x03b4 */ __u64 percpu_offset; /* 0x03b8 */ - __u8 pad_0x03c0[0x03c8-0x03c0]; /* 0x03c0 */ - __u64 machine_flags; /* 0x03c8 */ - __u8 pad_0x03d0[0x0400-0x03d0]; /* 0x03d0 */ + __u8 pad_0x03c0[0x0400-0x03c0]; /* 0x03c0 */ __u32 return_lpswe; /* 0x0400 */ __u32 return_mcck_lpswe; /* 0x0404 */ @@ -222,9 +221,12 @@ static __always_inline struct lowcore *get_lowcore(void) if (__is_defined(__DECOMPRESSOR)) return NULL; - asm(ALTERNATIVE("llilh %[lc],0", "llilh %[lc],%[alt]", ALT_LOWCORE) - : [lc] "=d" (lc) - : [alt] "i" (LOWCORE_ALT_ADDRESS >> 16)); + asm_inline( + ALTERNATIVE(" lghi %[lc],0", + " llilh %[lc],%[alt]", + ALT_FEATURE(MFEATURE_LOWCORE)) + : [lc] "=d" (lc) + : [alt] "i" (LOWCORE_ALT_ADDRESS >> 16)); return lc; } @@ -235,19 +237,19 @@ static inline void set_prefix(__u32 address) asm volatile("spx %0" : : "Q" (address) : "memory"); } -#else /* __ASSEMBLY__ */ +#else /* __ASSEMBLER__ */ .macro GET_LC reg - ALTERNATIVE "llilh \reg,0", \ + ALTERNATIVE "lghi \reg,0", \ __stringify(llilh \reg, LOWCORE_ALT_ADDRESS >> 16), \ - ALT_LOWCORE + ALT_FEATURE(MFEATURE_LOWCORE) .endm .macro STMG_LC start, end, savearea ALTERNATIVE "stmg \start, \end, \savearea", \ __stringify(stmg \start, \end, LOWCORE_ALT_ADDRESS + \savearea), \ - ALT_LOWCORE + ALT_FEATURE(MFEATURE_LOWCORE) .endm -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* _ASM_S390_LOWCORE_H */ diff --git a/arch/s390/include/asm/machine.h b/arch/s390/include/asm/machine.h new file mode 100644 index 000000000000..9bd4a9dc7778 --- /dev/null +++ b/arch/s390/include/asm/machine.h @@ -0,0 +1,104 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright IBM Corp. 2024 + */ + +#ifndef __ASM_S390_MACHINE_H +#define __ASM_S390_MACHINE_H + +#include <linux/const.h> + +#define MFEATURE_LOWCORE 0 +#define MFEATURE_PCI_MIO 1 +#define MFEATURE_SCC 2 +#define MFEATURE_TLB_GUEST 3 +#define MFEATURE_TX 4 +#define MFEATURE_ESOP 5 +#define MFEATURE_DIAG9C 6 +#define MFEATURE_VM 7 +#define MFEATURE_KVM 8 +#define MFEATURE_LPAR 9 +#define MFEATURE_DIAG288 10 + +#ifndef __ASSEMBLER__ + +#include <linux/bitops.h> +#include <asm/alternative.h> + +extern unsigned long machine_features[1]; + +#define MAX_MFEATURE_BIT (sizeof(machine_features) * BITS_PER_BYTE) + +static inline void __set_machine_feature(unsigned int nr, unsigned long *mfeatures) +{ + if (nr >= MAX_MFEATURE_BIT) + return; + __set_bit(nr, mfeatures); +} + +static inline void set_machine_feature(unsigned int nr) +{ + __set_machine_feature(nr, machine_features); +} + +static inline void __clear_machine_feature(unsigned int nr, unsigned long *mfeatures) +{ + if (nr >= MAX_MFEATURE_BIT) + return; + __clear_bit(nr, mfeatures); +} + +static inline void clear_machine_feature(unsigned int nr) +{ + __clear_machine_feature(nr, machine_features); +} + +static bool __test_machine_feature(unsigned int nr, unsigned long *mfeatures) +{ + if (nr >= MAX_MFEATURE_BIT) + return false; + return test_bit(nr, mfeatures); +} + +static bool test_machine_feature(unsigned int nr) +{ + return __test_machine_feature(nr, machine_features); +} + +static __always_inline bool __test_machine_feature_constant(unsigned int nr) +{ + asm goto( + ALTERNATIVE("brcl 15,%l[l_no]", "brcl 0,0", ALT_FEATURE(%[nr])) + : + : [nr] "i" (nr) + : + : l_no); + return true; +l_no: + return false; +} + +#define DEFINE_MACHINE_HAS_FEATURE(name, feature) \ +static __always_inline bool machine_has_##name(void) \ +{ \ + if (!__is_defined(__DECOMPRESSOR) && __builtin_constant_p(feature)) \ + return __test_machine_feature_constant(feature); \ + return test_machine_feature(feature); \ +} + +DEFINE_MACHINE_HAS_FEATURE(relocated_lowcore, MFEATURE_LOWCORE) +DEFINE_MACHINE_HAS_FEATURE(scc, MFEATURE_SCC) +DEFINE_MACHINE_HAS_FEATURE(tlb_guest, MFEATURE_TLB_GUEST) +DEFINE_MACHINE_HAS_FEATURE(tx, MFEATURE_TX) +DEFINE_MACHINE_HAS_FEATURE(esop, MFEATURE_ESOP) +DEFINE_MACHINE_HAS_FEATURE(diag9c, MFEATURE_DIAG9C) +DEFINE_MACHINE_HAS_FEATURE(vm, MFEATURE_VM) +DEFINE_MACHINE_HAS_FEATURE(kvm, MFEATURE_KVM) +DEFINE_MACHINE_HAS_FEATURE(lpar, MFEATURE_LPAR) + +#define machine_is_vm machine_has_vm +#define machine_is_kvm machine_has_kvm +#define machine_is_lpar machine_has_lpar + +#endif /* __ASSEMBLER__ */ +#endif /* __ASM_S390_MACHINE_H */ diff --git a/arch/s390/include/asm/march.h b/arch/s390/include/asm/march.h index fd9eef3be44c..11a71bd14954 100644 --- a/arch/s390/include/asm/march.h +++ b/arch/s390/include/asm/march.h @@ -33,6 +33,10 @@ #define MARCH_HAS_Z16_FEATURES 1 #endif +#ifdef CONFIG_HAVE_MARCH_Z17_FEATURES +#define MARCH_HAS_Z17_FEATURES 1 +#endif + #endif /* __DECOMPRESSOR */ #endif /* __ASM_S390_MARCH_H */ diff --git a/arch/s390/include/asm/mem_encrypt.h b/arch/s390/include/asm/mem_encrypt.h index b85e13505a0f..28c83ec1f243 100644 --- a/arch/s390/include/asm/mem_encrypt.h +++ b/arch/s390/include/asm/mem_encrypt.h @@ -2,11 +2,11 @@ #ifndef S390_MEM_ENCRYPT_H__ #define S390_MEM_ENCRYPT_H__ -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ int set_memory_encrypted(unsigned long vaddr, int numpages); int set_memory_decrypted(unsigned long vaddr, int numpages); -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* S390_MEM_ENCRYPT_H__ */ diff --git a/arch/s390/include/asm/mmu.h b/arch/s390/include/asm/mmu.h index 4c2dc7abc285..f07e49b419ab 100644 --- a/arch/s390/include/asm/mmu.h +++ b/arch/s390/include/asm/mmu.h @@ -22,10 +22,7 @@ typedef struct { * The following bitfields need a down_write on the mm * semaphore when they are written to. As they are only * written once, they can be read without a lock. - * - * The mmu context allocates 4K page tables. */ - unsigned int alloc_pgste:1; /* The mmu context uses extended page tables. */ unsigned int has_pgste:1; /* The mmu context uses storage keys. */ diff --git a/arch/s390/include/asm/mmu_context.h b/arch/s390/include/asm/mmu_context.h index d56eb0a1f37b..d9b8501bc93d 100644 --- a/arch/s390/include/asm/mmu_context.h +++ b/arch/s390/include/asm/mmu_context.h @@ -13,6 +13,7 @@ #include <linux/mm_types.h> #include <asm/tlbflush.h> #include <asm/ctlreg.h> +#include <asm/asce.h> #include <asm-generic/mm_hooks.h> #define init_new_context init_new_context @@ -29,9 +30,6 @@ static inline int init_new_context(struct task_struct *tsk, mm->context.gmap_asce = 0; mm->context.flush_mm = 0; #ifdef CONFIG_PGSTE - mm->context.alloc_pgste = page_table_allocate_pgste || - test_thread_flag(TIF_PGSTE) || - (current->mm && current->mm->context.alloc_pgste); mm->context.has_pgste = 0; mm->context.uses_skeys = 0; mm->context.uses_cmm = 0; @@ -80,7 +78,8 @@ static inline void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct * else get_lowcore()->user_asce.val = next->context.asce; cpumask_set_cpu(cpu, &next->context.cpu_attach_mask); - /* Clear previous user-ASCE from CR7 */ + /* Clear previous user-ASCE from CR1 and CR7 */ + local_ctl_load(1, &s390_invalid_asce); local_ctl_load(7, &s390_invalid_asce); if (prev != next) cpumask_clear_cpu(cpu, &prev->context.cpu_attach_mask); @@ -102,6 +101,7 @@ static inline void finish_arch_post_lock_switch(void) { struct task_struct *tsk = current; struct mm_struct *mm = tsk->mm; + unsigned long flags; if (mm) { preempt_disable(); @@ -111,15 +111,25 @@ static inline void finish_arch_post_lock_switch(void) __tlb_flush_mm_lazy(mm); preempt_enable(); } + local_irq_save(flags); + if (test_thread_flag(TIF_ASCE_PRIMARY)) + local_ctl_load(1, &get_lowcore()->kernel_asce); + else + local_ctl_load(1, &get_lowcore()->user_asce); local_ctl_load(7, &get_lowcore()->user_asce); + local_irq_restore(flags); } #define activate_mm activate_mm static inline void activate_mm(struct mm_struct *prev, struct mm_struct *next) { - switch_mm(prev, next, current); + switch_mm_irqs_off(prev, next, current); cpumask_set_cpu(smp_processor_id(), mm_cpumask(next)); + if (test_thread_flag(TIF_ASCE_PRIMARY)) + local_ctl_load(1, &get_lowcore()->kernel_asce); + else + local_ctl_load(1, &get_lowcore()->user_asce); local_ctl_load(7, &get_lowcore()->user_asce); } diff --git a/arch/s390/include/asm/nmi.h b/arch/s390/include/asm/nmi.h index 227466ce9e41..6454c1531854 100644 --- a/arch/s390/include/asm/nmi.h +++ b/arch/s390/include/asm/nmi.h @@ -33,7 +33,7 @@ #define MCCK_CODE_FC_VALID BIT(63 - 43) #define MCCK_CODE_CPU_TIMER_VALID BIT(63 - 46) -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ union mci { unsigned long val; @@ -104,5 +104,5 @@ void nmi_free_mcesa(u64 *mcesad); void s390_handle_mcck(void); void s390_do_machine_check(struct pt_regs *regs); -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* _ASM_S390_NMI_H */ diff --git a/arch/s390/include/asm/nospec-branch.h b/arch/s390/include/asm/nospec-branch.h index 192835a3e24d..81c4813cff18 100644 --- a/arch/s390/include/asm/nospec-branch.h +++ b/arch/s390/include/asm/nospec-branch.h @@ -2,7 +2,7 @@ #ifndef _ASM_S390_EXPOLINE_H #define _ASM_S390_EXPOLINE_H -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <linux/types.h> #include <asm/facility.h> @@ -26,8 +26,6 @@ static inline bool nospec_uses_trampoline(void) return __is_defined(CC_USING_EXPOLINE) && !nospec_disable; } -#ifdef CONFIG_EXPOLINE_EXTERN - void __s390_indirect_jump_r1(void); void __s390_indirect_jump_r2(void); void __s390_indirect_jump_r3(void); @@ -44,8 +42,6 @@ void __s390_indirect_jump_r13(void); void __s390_indirect_jump_r14(void); void __s390_indirect_jump_r15(void); -#endif - -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* _ASM_S390_EXPOLINE_H */ diff --git a/arch/s390/include/asm/nospec-insn.h b/arch/s390/include/asm/nospec-insn.h index cb15dd25bf21..6ce6b56e282b 100644 --- a/arch/s390/include/asm/nospec-insn.h +++ b/arch/s390/include/asm/nospec-insn.h @@ -3,9 +3,10 @@ #define _ASM_S390_NOSPEC_ASM_H #include <linux/linkage.h> +#include <linux/export.h> #include <asm/dwarf.h> -#ifdef __ASSEMBLY__ +#ifdef __ASSEMBLER__ #ifdef CC_USING_EXPOLINE @@ -128,6 +129,6 @@ .endm #endif /* CC_USING_EXPOLINE */ -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* _ASM_S390_NOSPEC_ASM_H */ diff --git a/arch/s390/include/asm/page-states.h b/arch/s390/include/asm/page-states.h index 08fcbd628120..794fdb21500a 100644 --- a/arch/s390/include/asm/page-states.h +++ b/arch/s390/include/asm/page-states.h @@ -7,7 +7,6 @@ #ifndef PAGE_STATES_H #define PAGE_STATES_H -#include <asm/sections.h> #include <asm/page.h> #define ESSA_GET_STATE 0 @@ -21,7 +20,7 @@ #define ESSA_MAX ESSA_SET_STABLE_NODAT -extern int __bootdata_preserved(cmma_flag); +extern int cmma_flag; static __always_inline unsigned long essa(unsigned long paddr, unsigned char cmd) { diff --git a/arch/s390/include/asm/page.h b/arch/s390/include/asm/page.h index 4f43cdd9835b..9240a363c893 100644 --- a/arch/s390/include/asm/page.h +++ b/arch/s390/include/asm/page.h @@ -33,7 +33,7 @@ #define HAVE_ARCH_HUGETLB_UNMAPPED_AREA #include <asm/setup.h> -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ void __storage_key_init_range(unsigned long start, unsigned long end); @@ -71,9 +71,11 @@ static inline void copy_page(void *to, void *from) #define vma_alloc_zeroed_movable_folio(vma, vaddr) \ vma_alloc_folio(GFP_HIGHUSER_MOVABLE | __GFP_ZERO, 0, vma, vaddr) -/* - * These are used to make use of C type-checking.. - */ +#ifdef CONFIG_STRICT_MM_TYPECHECKS +#define STRICT_MM_TYPECHECKS +#endif + +#ifdef STRICT_MM_TYPECHECKS typedef struct { unsigned long pgprot; } pgprot_t; typedef struct { unsigned long pgste; } pgste_t; @@ -82,52 +84,65 @@ typedef struct { unsigned long pmd; } pmd_t; typedef struct { unsigned long pud; } pud_t; typedef struct { unsigned long p4d; } p4d_t; typedef struct { unsigned long pgd; } pgd_t; -typedef pte_t *pgtable_t; -#define pgprot_val(x) ((x).pgprot) -#define pgste_val(x) ((x).pgste) - -static inline unsigned long pte_val(pte_t pte) -{ - return pte.pte; +#define DEFINE_PGVAL_FUNC(name) \ +static __always_inline unsigned long name ## _val(name ## _t name) \ +{ \ + return name.name; \ } -static inline unsigned long pmd_val(pmd_t pmd) -{ - return pmd.pmd; -} +#else /* STRICT_MM_TYPECHECKS */ -static inline unsigned long pud_val(pud_t pud) -{ - return pud.pud; -} +typedef unsigned long pgprot_t; +typedef unsigned long pgste_t; +typedef unsigned long pte_t; +typedef unsigned long pmd_t; +typedef unsigned long pud_t; +typedef unsigned long p4d_t; +typedef unsigned long pgd_t; -static inline unsigned long p4d_val(p4d_t p4d) -{ - return p4d.p4d; +#define DEFINE_PGVAL_FUNC(name) \ +static __always_inline unsigned long name ## _val(name ## _t name) \ +{ \ + return name; \ } -static inline unsigned long pgd_val(pgd_t pgd) -{ - return pgd.pgd; -} +#endif /* STRICT_MM_TYPECHECKS */ + +DEFINE_PGVAL_FUNC(pgprot) +DEFINE_PGVAL_FUNC(pgste) +DEFINE_PGVAL_FUNC(pte) +DEFINE_PGVAL_FUNC(pmd) +DEFINE_PGVAL_FUNC(pud) +DEFINE_PGVAL_FUNC(p4d) +DEFINE_PGVAL_FUNC(pgd) + +typedef pte_t *pgtable_t; +#define __pgprot(x) ((pgprot_t) { (x) } ) #define __pgste(x) ((pgste_t) { (x) } ) #define __pte(x) ((pte_t) { (x) } ) #define __pmd(x) ((pmd_t) { (x) } ) #define __pud(x) ((pud_t) { (x) } ) #define __p4d(x) ((p4d_t) { (x) } ) #define __pgd(x) ((pgd_t) { (x) } ) -#define __pgprot(x) ((pgprot_t) { (x) } ) static inline void page_set_storage_key(unsigned long addr, unsigned char skey, int mapped) { - if (!mapped) - asm volatile(".insn rrf,0xb22b0000,%0,%1,8,0" - : : "d" (skey), "a" (addr)); - else - asm volatile("sske %0,%1" : : "d" (skey), "a" (addr)); + if (!mapped) { + asm volatile( + " .insn rrf,0xb22b0000,%[skey],%[addr],8,0" + : + : [skey] "d" (skey), [addr] "a" (addr) + : "memory"); + } else { + asm volatile( + " sske %[skey],%[addr]" + : + : [skey] "d" (skey), [addr] "a" (addr) + : "memory"); + } } static inline unsigned char page_get_storage_key(unsigned long addr) @@ -184,7 +199,11 @@ extern struct vm_layout vm_layout; #define __kaslr_offset vm_layout.kaslr_offset #define __kaslr_offset_phys vm_layout.kaslr_offset_phys +#ifdef CONFIG_RANDOMIZE_IDENTITY_BASE #define __identity_base vm_layout.identity_base +#else +#define __identity_base 0UL +#endif #define ident_map_size vm_layout.identity_size static inline unsigned long kaslr_offset(void) @@ -263,7 +282,7 @@ static inline unsigned long virt_to_pfn(const void *kaddr) #define VM_DATA_DEFAULT_FLAGS VM_DATA_FLAGS_NON_EXEC -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ #include <asm-generic/memory_model.h> #include <asm-generic/getorder.h> diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h index 474e1f8d1d3c..41f900f693d9 100644 --- a/arch/s390/include/asm/pci.h +++ b/arch/s390/include/asm/pci.h @@ -11,6 +11,9 @@ #include <asm/pci_insn.h> #include <asm/sclp.h> +#define ARCH_GENERIC_PCI_MMAP_RESOURCE 1 +#define arch_can_pci_mmap_wc() 1 + #define PCIBIOS_MIN_IO 0x1000 #define PCIBIOS_MIN_MEM 0x10000000 @@ -144,7 +147,7 @@ struct zpci_dev { u8 util_str_avail : 1; u8 irqs_registered : 1; u8 tid_avail : 1; - u8 reserved : 1; + u8 rtr_avail : 1; /* Relaxed translation allowed */ unsigned int devfn; /* DEVFN part of the RID*/ u8 pfip[CLP_PFIP_NR_SEGMENTS]; /* pci function internal path */ @@ -217,6 +220,7 @@ extern struct airq_iv *zpci_aif_sbv; struct zpci_dev *zpci_create_device(u32 fid, u32 fh, enum zpci_state state); int zpci_add_device(struct zpci_dev *zdev); int zpci_enable_device(struct zpci_dev *); +int zpci_reenable_device(struct zpci_dev *zdev); int zpci_disable_device(struct zpci_dev *); int zpci_scan_configured_device(struct zpci_dev *zdev, u32 fh); int zpci_deconfigure_device(struct zpci_dev *zdev); @@ -245,6 +249,7 @@ void update_uid_checking(bool new); /* IOMMU Interface */ int zpci_init_iommu(struct zpci_dev *zdev); void zpci_destroy_iommu(struct zpci_dev *zdev); +int zpci_iommu_register_ioat(struct zpci_dev *zdev, u8 *status); #ifdef CONFIG_PCI static inline bool zpci_use_mio(struct zpci_dev *zdev) diff --git a/arch/s390/include/asm/pci_clp.h b/arch/s390/include/asm/pci_clp.h index 3fff2f7095c8..7ebff39c84b3 100644 --- a/arch/s390/include/asm/pci_clp.h +++ b/arch/s390/include/asm/pci_clp.h @@ -156,7 +156,9 @@ struct clp_rsp_query_pci_grp { u16 : 4; u16 noi : 12; /* number of interrupts */ u8 version; - u8 : 6; + u8 : 2; + u8 rtr : 1; /* Relaxed translation requirement */ + u8 : 3; u8 frame : 1; u8 refresh : 1; /* TLB refresh mode */ u16 : 3; diff --git a/arch/s390/include/asm/pci_dma.h b/arch/s390/include/asm/pci_dma.h index 42d7cc4262ca..d12e17201661 100644 --- a/arch/s390/include/asm/pci_dma.h +++ b/arch/s390/include/asm/pci_dma.h @@ -25,6 +25,7 @@ enum zpci_ioat_dtype { #define ZPCI_KEY (PAGE_DEFAULT_KEY << 5) #define ZPCI_TABLE_SIZE_RT (1UL << 42) +#define ZPCI_TABLE_SIZE_RS (1UL << 53) #define ZPCI_IOTA_STO_FLAG (ZPCI_IOTA_IOT_ENABLED | ZPCI_KEY | ZPCI_IOTA_DT_ST) #define ZPCI_IOTA_RTTO_FLAG (ZPCI_IOTA_IOT_ENABLED | ZPCI_KEY | ZPCI_IOTA_DT_RT) @@ -55,6 +56,8 @@ enum zpci_ioat_dtype { #define ZPCI_PT_BITS 8 #define ZPCI_ST_SHIFT (ZPCI_PT_BITS + PAGE_SHIFT) #define ZPCI_RT_SHIFT (ZPCI_ST_SHIFT + ZPCI_TABLE_BITS) +#define ZPCI_RS_SHIFT (ZPCI_RT_SHIFT + ZPCI_TABLE_BITS) +#define ZPCI_RF_SHIFT (ZPCI_RS_SHIFT + ZPCI_TABLE_BITS) #define ZPCI_RTE_FLAG_MASK 0x3fffUL #define ZPCI_RTE_ADDR_MASK (~ZPCI_RTE_FLAG_MASK) diff --git a/arch/s390/include/asm/percpu.h b/arch/s390/include/asm/percpu.h index 84f6b8357b45..96af7d964014 100644 --- a/arch/s390/include/asm/percpu.h +++ b/arch/s390/include/asm/percpu.h @@ -16,10 +16,9 @@ * For 64 bit module code, the module may be more than 4G above the * per cpu area, use weak definitions to force the compiler to * generate external references. + * Therefore, we have enabled CONFIG_ARCH_MODULE_NEEDS_WEAK_PER_CPU + * in the Kconfig. */ -#if defined(MODULE) -#define ARCH_NEEDS_WEAK_PER_CPU -#endif /* * We use a compare-and-swap loop since that uses less cpu cycles than diff --git a/arch/s390/include/asm/pgalloc.h b/arch/s390/include/asm/pgalloc.h index 7b84ef6dc4b6..5345398df653 100644 --- a/arch/s390/include/asm/pgalloc.h +++ b/arch/s390/include/asm/pgalloc.h @@ -26,7 +26,6 @@ unsigned long *page_table_alloc(struct mm_struct *); struct ptdesc *page_table_alloc_pgste(struct mm_struct *mm); void page_table_free(struct mm_struct *, unsigned long *); void page_table_free_pgste(struct ptdesc *ptdesc); -extern int page_table_allocate_pgste; static inline void crst_table_init(unsigned long *crst, unsigned long entry) { @@ -53,29 +52,42 @@ static inline p4d_t *p4d_alloc_one(struct mm_struct *mm, unsigned long address) { unsigned long *table = crst_table_alloc(mm); - if (table) - crst_table_init(table, _REGION2_ENTRY_EMPTY); + if (!table) + return NULL; + crst_table_init(table, _REGION2_ENTRY_EMPTY); + pagetable_p4d_ctor(virt_to_ptdesc(table)); + return (p4d_t *) table; } static inline void p4d_free(struct mm_struct *mm, p4d_t *p4d) { - if (!mm_p4d_folded(mm)) - crst_table_free(mm, (unsigned long *) p4d); + if (mm_p4d_folded(mm)) + return; + + pagetable_dtor(virt_to_ptdesc(p4d)); + crst_table_free(mm, (unsigned long *) p4d); } static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long address) { unsigned long *table = crst_table_alloc(mm); - if (table) - crst_table_init(table, _REGION3_ENTRY_EMPTY); + + if (!table) + return NULL; + crst_table_init(table, _REGION3_ENTRY_EMPTY); + pagetable_pud_ctor(virt_to_ptdesc(table)); + return (pud_t *) table; } static inline void pud_free(struct mm_struct *mm, pud_t *pud) { - if (!mm_pud_folded(mm)) - crst_table_free(mm, (unsigned long *) pud); + if (mm_pud_folded(mm)) + return; + + pagetable_dtor(virt_to_ptdesc(pud)); + crst_table_free(mm, (unsigned long *) pud); } static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long vmaddr) @@ -85,7 +97,7 @@ static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long vmaddr) if (!table) return NULL; crst_table_init(table, _SEGMENT_ENTRY_EMPTY); - if (!pagetable_pmd_ctor(virt_to_ptdesc(table))) { + if (!pagetable_pmd_ctor(mm, virt_to_ptdesc(table))) { crst_table_free(mm, table); return NULL; } @@ -96,7 +108,7 @@ static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd) { if (mm_pmd_folded(mm)) return; - pagetable_pmd_dtor(virt_to_ptdesc(pmd)); + pagetable_dtor(virt_to_ptdesc(pmd)); crst_table_free(mm, (unsigned long *) pmd); } @@ -117,11 +129,18 @@ static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd) static inline pgd_t *pgd_alloc(struct mm_struct *mm) { - return (pgd_t *) crst_table_alloc(mm); + unsigned long *table = crst_table_alloc(mm); + + if (!table) + return NULL; + pagetable_pgd_ctor(virt_to_ptdesc(table)); + + return (pgd_t *) table; } static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd) { + pagetable_dtor(virt_to_ptdesc(pgd)); crst_table_free(mm, (unsigned long *) pgd); } diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index 48268095b0a3..c1a7a92f0575 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -14,10 +14,10 @@ #include <linux/sched.h> #include <linux/mm_types.h> +#include <linux/cpufeature.h> #include <linux/page-flags.h> #include <linux/radix-tree.h> #include <linux/atomic.h> -#include <asm/sections.h> #include <asm/ctlreg.h> #include <asm/bug.h> #include <asm/page.h> @@ -35,7 +35,7 @@ enum { PG_DIRECT_MAP_MAX }; -extern atomic_long_t __bootdata_preserved(direct_pages_count[PG_DIRECT_MAP_MAX]); +extern atomic_long_t direct_pages_count[PG_DIRECT_MAP_MAX]; static inline void update_page_count(int level, long count) { @@ -85,14 +85,14 @@ extern unsigned long zero_page_mask; * happen without trampolines and in addition the placement within a * 2GB frame is branch prediction unit friendly. */ -extern unsigned long __bootdata_preserved(VMALLOC_START); -extern unsigned long __bootdata_preserved(VMALLOC_END); +extern unsigned long VMALLOC_START; +extern unsigned long VMALLOC_END; #define VMALLOC_DEFAULT_SIZE ((512UL << 30) - MODULES_LEN) -extern struct page *__bootdata_preserved(vmemmap); -extern unsigned long __bootdata_preserved(vmemmap_size); +extern struct page *vmemmap; +extern unsigned long vmemmap_size; -extern unsigned long __bootdata_preserved(MODULES_VADDR); -extern unsigned long __bootdata_preserved(MODULES_END); +extern unsigned long MODULES_VADDR; +extern unsigned long MODULES_END; #define MODULES_VADDR MODULES_VADDR #define MODULES_END MODULES_END #define MODULES_LEN (1UL << 31) @@ -125,6 +125,8 @@ static inline int is_module_addr(void *addr) #define KASLR_LEN 0UL #endif +void setup_protection_map(void); + /* * A 64 bit pagetable entry of S390 has following format: * | PFRA |0IPC| OS | @@ -419,9 +421,10 @@ static inline int is_module_addr(void *addr) #define PGSTE_HC_BIT 0x0020000000000000UL #define PGSTE_GR_BIT 0x0004000000000000UL #define PGSTE_GC_BIT 0x0002000000000000UL -#define PGSTE_UC_BIT 0x0000800000000000UL /* user dirty (migration) */ -#define PGSTE_IN_BIT 0x0000400000000000UL /* IPTE notify bit */ -#define PGSTE_VSIE_BIT 0x0000200000000000UL /* ref'd in a shadow table */ +#define PGSTE_ST2_MASK 0x0000ffff00000000UL +#define PGSTE_UC_BIT 0x0000000000008000UL /* user dirty (migration) */ +#define PGSTE_IN_BIT 0x0000000000004000UL /* IPTE notify bit */ +#define PGSTE_VSIE_BIT 0x0000000000002000UL /* ref'd in a shadow table */ /* Guest Page State used for virtualization */ #define _PGSTE_GPS_ZERO 0x0000000080000000UL @@ -443,98 +446,107 @@ static inline int is_module_addr(void *addr) /* * Page protection definitions. */ -#define PAGE_NONE __pgprot(_PAGE_PRESENT | _PAGE_INVALID | _PAGE_PROTECT) -#define PAGE_RO __pgprot(_PAGE_PRESENT | _PAGE_READ | \ +#define __PAGE_NONE (_PAGE_PRESENT | _PAGE_INVALID | _PAGE_PROTECT) +#define __PAGE_RO (_PAGE_PRESENT | _PAGE_READ | \ _PAGE_NOEXEC | _PAGE_INVALID | _PAGE_PROTECT) -#define PAGE_RX __pgprot(_PAGE_PRESENT | _PAGE_READ | \ +#define __PAGE_RX (_PAGE_PRESENT | _PAGE_READ | \ _PAGE_INVALID | _PAGE_PROTECT) -#define PAGE_RW __pgprot(_PAGE_PRESENT | _PAGE_READ | _PAGE_WRITE | \ +#define __PAGE_RW (_PAGE_PRESENT | _PAGE_READ | _PAGE_WRITE | \ _PAGE_NOEXEC | _PAGE_INVALID | _PAGE_PROTECT) -#define PAGE_RWX __pgprot(_PAGE_PRESENT | _PAGE_READ | _PAGE_WRITE | \ +#define __PAGE_RWX (_PAGE_PRESENT | _PAGE_READ | _PAGE_WRITE | \ _PAGE_INVALID | _PAGE_PROTECT) - -#define PAGE_SHARED __pgprot(_PAGE_PRESENT | _PAGE_READ | _PAGE_WRITE | \ +#define __PAGE_SHARED (_PAGE_PRESENT | _PAGE_READ | _PAGE_WRITE | \ _PAGE_YOUNG | _PAGE_DIRTY | _PAGE_NOEXEC) -#define PAGE_KERNEL __pgprot(_PAGE_PRESENT | _PAGE_READ | _PAGE_WRITE | \ +#define __PAGE_KERNEL (_PAGE_PRESENT | _PAGE_READ | _PAGE_WRITE | \ _PAGE_YOUNG | _PAGE_DIRTY | _PAGE_NOEXEC) -#define PAGE_KERNEL_RO __pgprot(_PAGE_PRESENT | _PAGE_READ | _PAGE_YOUNG | \ +#define __PAGE_KERNEL_RO (_PAGE_PRESENT | _PAGE_READ | _PAGE_YOUNG | \ _PAGE_PROTECT | _PAGE_NOEXEC) -#define PAGE_KERNEL_EXEC __pgprot(_PAGE_PRESENT | _PAGE_READ | _PAGE_WRITE | \ - _PAGE_YOUNG | _PAGE_DIRTY) -/* - * On s390 the page table entry has an invalid bit and a read-only bit. - * Read permission implies execute permission and write permission - * implies read permission. - */ - /*xwr*/ +extern unsigned long page_noexec_mask; + +#define __pgprot_page_mask(x) __pgprot((x) & page_noexec_mask) + +#define PAGE_NONE __pgprot_page_mask(__PAGE_NONE) +#define PAGE_RO __pgprot_page_mask(__PAGE_RO) +#define PAGE_RX __pgprot_page_mask(__PAGE_RX) +#define PAGE_RW __pgprot_page_mask(__PAGE_RW) +#define PAGE_RWX __pgprot_page_mask(__PAGE_RWX) +#define PAGE_SHARED __pgprot_page_mask(__PAGE_SHARED) +#define PAGE_KERNEL __pgprot_page_mask(__PAGE_KERNEL) +#define PAGE_KERNEL_RO __pgprot_page_mask(__PAGE_KERNEL_RO) /* * Segment entry (large page) protection definitions. */ -#define SEGMENT_NONE __pgprot(_SEGMENT_ENTRY_PRESENT | \ +#define __SEGMENT_NONE (_SEGMENT_ENTRY_PRESENT | \ _SEGMENT_ENTRY_INVALID | \ _SEGMENT_ENTRY_PROTECT) -#define SEGMENT_RO __pgprot(_SEGMENT_ENTRY_PRESENT | \ +#define __SEGMENT_RO (_SEGMENT_ENTRY_PRESENT | \ _SEGMENT_ENTRY_PROTECT | \ _SEGMENT_ENTRY_READ | \ _SEGMENT_ENTRY_NOEXEC) -#define SEGMENT_RX __pgprot(_SEGMENT_ENTRY_PRESENT | \ +#define __SEGMENT_RX (_SEGMENT_ENTRY_PRESENT | \ _SEGMENT_ENTRY_PROTECT | \ _SEGMENT_ENTRY_READ) -#define SEGMENT_RW __pgprot(_SEGMENT_ENTRY_PRESENT | \ +#define __SEGMENT_RW (_SEGMENT_ENTRY_PRESENT | \ _SEGMENT_ENTRY_READ | \ _SEGMENT_ENTRY_WRITE | \ _SEGMENT_ENTRY_NOEXEC) -#define SEGMENT_RWX __pgprot(_SEGMENT_ENTRY_PRESENT | \ +#define __SEGMENT_RWX (_SEGMENT_ENTRY_PRESENT | \ _SEGMENT_ENTRY_READ | \ _SEGMENT_ENTRY_WRITE) -#define SEGMENT_KERNEL __pgprot(_SEGMENT_ENTRY | \ +#define __SEGMENT_KERNEL (_SEGMENT_ENTRY | \ _SEGMENT_ENTRY_LARGE | \ _SEGMENT_ENTRY_READ | \ _SEGMENT_ENTRY_WRITE | \ _SEGMENT_ENTRY_YOUNG | \ _SEGMENT_ENTRY_DIRTY | \ _SEGMENT_ENTRY_NOEXEC) -#define SEGMENT_KERNEL_RO __pgprot(_SEGMENT_ENTRY | \ +#define __SEGMENT_KERNEL_RO (_SEGMENT_ENTRY | \ _SEGMENT_ENTRY_LARGE | \ _SEGMENT_ENTRY_READ | \ _SEGMENT_ENTRY_YOUNG | \ _SEGMENT_ENTRY_PROTECT | \ _SEGMENT_ENTRY_NOEXEC) -#define SEGMENT_KERNEL_EXEC __pgprot(_SEGMENT_ENTRY | \ - _SEGMENT_ENTRY_LARGE | \ - _SEGMENT_ENTRY_READ | \ - _SEGMENT_ENTRY_WRITE | \ - _SEGMENT_ENTRY_YOUNG | \ - _SEGMENT_ENTRY_DIRTY) + +extern unsigned long segment_noexec_mask; + +#define __pgprot_segment_mask(x) __pgprot((x) & segment_noexec_mask) + +#define SEGMENT_NONE __pgprot_segment_mask(__SEGMENT_NONE) +#define SEGMENT_RO __pgprot_segment_mask(__SEGMENT_RO) +#define SEGMENT_RX __pgprot_segment_mask(__SEGMENT_RX) +#define SEGMENT_RW __pgprot_segment_mask(__SEGMENT_RW) +#define SEGMENT_RWX __pgprot_segment_mask(__SEGMENT_RWX) +#define SEGMENT_KERNEL __pgprot_segment_mask(__SEGMENT_KERNEL) +#define SEGMENT_KERNEL_RO __pgprot_segment_mask(__SEGMENT_KERNEL_RO) /* * Region3 entry (large page) protection definitions. */ -#define REGION3_KERNEL __pgprot(_REGION_ENTRY_TYPE_R3 | \ +#define __REGION3_KERNEL (_REGION_ENTRY_TYPE_R3 | \ _REGION3_ENTRY_PRESENT | \ - _REGION3_ENTRY_LARGE | \ - _REGION3_ENTRY_READ | \ - _REGION3_ENTRY_WRITE | \ - _REGION3_ENTRY_YOUNG | \ + _REGION3_ENTRY_LARGE | \ + _REGION3_ENTRY_READ | \ + _REGION3_ENTRY_WRITE | \ + _REGION3_ENTRY_YOUNG | \ _REGION3_ENTRY_DIRTY | \ _REGION_ENTRY_NOEXEC) -#define REGION3_KERNEL_RO __pgprot(_REGION_ENTRY_TYPE_R3 | \ - _REGION3_ENTRY_PRESENT | \ - _REGION3_ENTRY_LARGE | \ - _REGION3_ENTRY_READ | \ - _REGION3_ENTRY_YOUNG | \ - _REGION_ENTRY_PROTECT | \ - _REGION_ENTRY_NOEXEC) -#define REGION3_KERNEL_EXEC __pgprot(_REGION_ENTRY_TYPE_R3 | \ +#define __REGION3_KERNEL_RO (_REGION_ENTRY_TYPE_R3 | \ _REGION3_ENTRY_PRESENT | \ - _REGION3_ENTRY_LARGE | \ - _REGION3_ENTRY_READ | \ - _REGION3_ENTRY_WRITE | \ - _REGION3_ENTRY_YOUNG | \ - _REGION3_ENTRY_DIRTY) + _REGION3_ENTRY_LARGE | \ + _REGION3_ENTRY_READ | \ + _REGION3_ENTRY_YOUNG | \ + _REGION_ENTRY_PROTECT | \ + _REGION_ENTRY_NOEXEC) + +extern unsigned long region_noexec_mask; + +#define __pgprot_region_mask(x) __pgprot((x) & region_noexec_mask) + +#define REGION3_KERNEL __pgprot_region_mask(__REGION3_KERNEL) +#define REGION3_KERNEL_RO __pgprot_region_mask(__REGION3_KERNEL_RO) static inline bool mm_p4d_folded(struct mm_struct *mm) { @@ -572,13 +584,14 @@ static inline int mm_is_protected(struct mm_struct *mm) return 0; } -static inline int mm_alloc_pgste(struct mm_struct *mm) +static inline pgste_t clear_pgste_bit(pgste_t pgste, unsigned long mask) { -#ifdef CONFIG_PGSTE - if (unlikely(mm->context.alloc_pgste)) - return 1; -#endif - return 0; + return __pgste(pgste_val(pgste) & ~mask); +} + +static inline pgste_t set_pgste_bit(pgste_t pgste, unsigned long mask) +{ + return __pgste(pgste_val(pgste) | mask); } static inline pte_t clear_pte_bit(pte_t pte, pgprot_t prot) @@ -902,7 +915,7 @@ static inline int pmd_protnone(pmd_t pmd) } #endif -static inline int pte_swp_exclusive(pte_t pte) +static inline bool pte_swp_exclusive(pte_t pte) { return pte_val(pte) & _PAGE_SWP_EXCLUSIVE; } @@ -950,6 +963,12 @@ static inline pmd_t pmd_clear_soft_dirty(pmd_t pmd) return clear_pmd_bit(pmd, __pgprot(_SEGMENT_ENTRY_SOFT_DIRTY)); } +#ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION +#define pmd_swp_soft_dirty(pmd) pmd_soft_dirty(pmd) +#define pmd_swp_mksoft_dirty(pmd) pmd_mksoft_dirty(pmd) +#define pmd_swp_clear_soft_dirty(pmd) pmd_clear_soft_dirty(pmd) +#endif + /* * query functions pte_write/pte_dirty/pte_young only work if * pte_present() is true. Undefined behaviour if not.. @@ -1328,7 +1347,7 @@ static inline void flush_tlb_fix_spurious_fault(struct vm_area_struct *vma, * PTE does not have _PAGE_PROTECT set, to avoid unnecessary overhead. * A local RDP can be used to do the flush. */ - if (MACHINE_HAS_RDP && !(pte_val(*ptep) & _PAGE_PROTECT)) + if (cpu_has_rdp() && !(pte_val(*ptep) & _PAGE_PROTECT)) __ptep_rdp(address, ptep, 0, 0, 1); } #define flush_tlb_fix_spurious_fault flush_tlb_fix_spurious_fault @@ -1343,7 +1362,7 @@ static inline int ptep_set_access_flags(struct vm_area_struct *vma, { if (pte_same(*ptep, entry)) return 0; - if (MACHINE_HAS_RDP && !mm_has_pgste(vma->vm_mm) && pte_allow_rdp(*ptep, entry)) + if (cpu_has_rdp() && !mm_has_pgste(vma->vm_mm) && pte_allow_rdp(*ptep, entry)) ptep_reset_dat_prot(vma->vm_mm, addr, ptep, entry); else ptep_xchg_direct(vma->vm_mm, addr, ptep, entry); @@ -1391,9 +1410,6 @@ void gmap_pmdp_idte_global(struct mm_struct *mm, unsigned long vmaddr); #define pgprot_writecombine pgprot_writecombine pgprot_t pgprot_writecombine(pgprot_t prot); -#define pgprot_writethrough pgprot_writethrough -pgprot_t pgprot_writethrough(pgprot_t prot); - #define PFN_PTE_SHIFT PAGE_SHIFT /* @@ -1435,21 +1451,9 @@ static inline pte_t mk_pte_phys(unsigned long physpage, pgprot_t pgprot) pte_t __pte; __pte = __pte(physpage | pgprot_val(pgprot)); - if (!MACHINE_HAS_NX) - __pte = clear_pte_bit(__pte, __pgprot(_PAGE_NOEXEC)); return pte_mkyoung(__pte); } -static inline pte_t mk_pte(struct page *page, pgprot_t pgprot) -{ - unsigned long physpage = page_to_phys(page); - pte_t __pte = mk_pte_phys(physpage, pgprot); - - if (pte_write(__pte) && PageDirty(page)) - __pte = pte_mkdirty(__pte); - return __pte; -} - #define pgd_index(address) (((address) >> PGDIR_SHIFT) & (PTRS_PER_PGD-1)) #define p4d_index(address) (((address) >> P4D_SHIFT) & (PTRS_PER_P4D-1)) #define pud_index(address) (((address) >> PUD_SHIFT) & (PTRS_PER_PUD-1)) @@ -1804,8 +1808,6 @@ static inline int pmdp_clear_flush_young(struct vm_area_struct *vma, static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr, pmd_t *pmdp, pmd_t entry) { - if (!MACHINE_HAS_NX) - entry = clear_pmd_bit(entry, __pgprot(_SEGMENT_ENTRY_NOEXEC)); set_pmd(pmdp, entry); } @@ -1873,7 +1875,6 @@ static inline pmd_t pmdp_collapse_flush(struct vm_area_struct *vma, #define pmdp_collapse_flush pmdp_collapse_flush #define pfn_pmd(pfn, pgprot) mk_pmd_phys(((pfn) << PAGE_SHIFT), (pgprot)) -#define mk_pmd(page, pgprot) pfn_pmd(page_to_pfn(page), (pgprot)) static inline int pmd_trans_huge(pmd_t pmd) { @@ -1883,7 +1884,7 @@ static inline int pmd_trans_huge(pmd_t pmd) #define has_transparent_hugepage has_transparent_hugepage static inline int has_transparent_hugepage(void) { - return MACHINE_HAS_EDAT1 ? 1 : 0; + return cpu_has_edat1() ? 1 : 0; } #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ @@ -1984,6 +1985,45 @@ static inline unsigned long __swp_offset_rste(swp_entry_t entry) #define __rste_to_swp_entry(rste) ((swp_entry_t) { rste }) +/* + * s390 has different layout for PTE and region / segment table entries (RSTE). + * This is also true for swap entries, and their swap type and offset encoding. + * For hugetlbfs PTE_MARKER support, s390 has internal __swp_type_rste() and + * __swp_offset_rste() helpers to correctly handle RSTE swap entries. + * + * But common swap code does not know about this difference, and only uses + * __swp_type(), __swp_offset() and __swp_entry() helpers for conversion between + * arch-dependent and arch-independent representation of swp_entry_t for all + * pagetable levels. On s390, those helpers only work for PTE swap entries. + * + * Therefore, implement __pmd_to_swp_entry() to build a fake PTE swap entry + * and return the arch-dependent representation of that. Correspondingly, + * implement __swp_entry_to_pmd() to convert that into a proper PMD swap + * entry again. With this, the arch-dependent swp_entry_t representation will + * always look like a PTE swap entry in common code. + * + * This is somewhat similar to fake PTEs in hugetlbfs code for s390, but only + * requires conversion of the swap type and offset, and not all the possible + * PTE bits. + */ +static inline swp_entry_t __pmd_to_swp_entry(pmd_t pmd) +{ + swp_entry_t arch_entry; + pte_t pte; + + arch_entry = __rste_to_swp_entry(pmd_val(pmd)); + pte = mk_swap_pte(__swp_type_rste(arch_entry), __swp_offset_rste(arch_entry)); + return __pte_to_swp_entry(pte); +} + +static inline pmd_t __swp_entry_to_pmd(swp_entry_t arch_entry) +{ + pmd_t pmd; + + pmd = __pmd(mk_swap_rste(__swp_type(arch_entry), __swp_offset(arch_entry))); + return pmd; +} + extern int vmem_add_mapping(unsigned long start, unsigned long size); extern void vmem_remove_mapping(unsigned long start, unsigned long size); extern int __vmem_map_4k_page(unsigned long addr, unsigned long phys, pgprot_t prot, bool alloc); @@ -2001,4 +2041,18 @@ extern void s390_reset_cmma(struct mm_struct *mm); #define pmd_pgtable(pmd) \ ((pgtable_t)__va(pmd_val(pmd) & -sizeof(pte_t)*PTRS_PER_PTE)) +static inline unsigned long gmap_pgste_get_pgt_addr(unsigned long *pgt) +{ + unsigned long *pgstes, res; + + pgstes = pgt + _PAGE_ENTRIES; + + res = (pgstes[0] & PGSTE_ST2_MASK) << 16; + res |= pgstes[1] & PGSTE_ST2_MASK; + res |= (pgstes[2] & PGSTE_ST2_MASK) >> 16; + res |= (pgstes[3] & PGSTE_ST2_MASK) >> 32; + + return res; +} + #endif /* _S390_PAGE_H */ diff --git a/arch/s390/include/asm/physmem_info.h b/arch/s390/include/asm/physmem_info.h index 51b68a43e195..7ef3bbec98b0 100644 --- a/arch/s390/include/asm/physmem_info.h +++ b/arch/s390/include/asm/physmem_info.h @@ -26,7 +26,7 @@ enum reserved_range_type { RR_AMODE31, RR_IPLREPORT, RR_CERT_COMP_LIST, - RR_MEM_DETECT_EXTENDED, + RR_MEM_DETECT_EXT, RR_VMEM, RR_MAX }; @@ -128,7 +128,7 @@ static inline const char *get_rr_type_name(enum reserved_range_type t) RR_TYPE_NAME(AMODE31); RR_TYPE_NAME(IPLREPORT); RR_TYPE_NAME(CERT_COMP_LIST); - RR_TYPE_NAME(MEM_DETECT_EXTENDED); + RR_TYPE_NAME(MEM_DETECT_EXT); RR_TYPE_NAME(VMEM); default: return "UNKNOWN"; diff --git a/arch/s390/include/asm/pkey.h b/arch/s390/include/asm/pkey.h index 5dca1a46a9f6..b7b59faf16f4 100644 --- a/arch/s390/include/asm/pkey.h +++ b/arch/s390/include/asm/pkey.h @@ -20,9 +20,22 @@ * @param key pointer to a buffer containing the key blob * @param keylen size of the key blob in bytes * @param protkey pointer to buffer receiving the protected key + * @param xflags additional execution flags (see PKEY_XFLAG_* definitions below) + * As of now the only supported flag is PKEY_XFLAG_NOMEMALLOC. * @return 0 on success, negative errno value on failure */ int pkey_key2protkey(const u8 *key, u32 keylen, - u8 *protkey, u32 *protkeylen, u32 *protkeytype); + u8 *protkey, u32 *protkeylen, u32 *protkeytype, + u32 xflags); + +/* + * If this flag is given in the xflags parameter, the pkey implementation + * is not allowed to allocate memory but instead should fall back to use + * preallocated memory or simple fail with -ENOMEM. + * This flag is for protected key derive within a cipher or similar + * which must not allocate memory which would cause io operations - see + * also the CRYPTO_ALG_ALLOCATES_MEMORY flag in crypto.h. + */ +#define PKEY_XFLAG_NOMEMALLOC 0x0001 #endif /* _KAPI_PKEY_H */ diff --git a/arch/s390/include/asm/preempt.h b/arch/s390/include/asm/preempt.h index 2c29bdf12127..6ccd033acfe5 100644 --- a/arch/s390/include/asm/preempt.h +++ b/arch/s390/include/asm/preempt.h @@ -8,12 +8,19 @@ #include <asm/cmpxchg.h> #include <asm/march.h> -#ifdef MARCH_HAS_Z196_FEATURES - /* We use the MSB mostly because its available */ #define PREEMPT_NEED_RESCHED 0x80000000 + +/* + * We use the PREEMPT_NEED_RESCHED bit as an inverted NEED_RESCHED such + * that a decrement hitting 0 means we can and should reschedule. + */ #define PREEMPT_ENABLED (0 + PREEMPT_NEED_RESCHED) +/* + * We mask the PREEMPT_NEED_RESCHED bit so as not to confuse all current users + * that think a non-zero value indicates we cannot preempt. + */ static __always_inline int preempt_count(void) { return READ_ONCE(get_lowcore()->preempt_count) & ~PREEMPT_NEED_RESCHED; @@ -29,6 +36,15 @@ static __always_inline void preempt_count_set(int pc) } while (!arch_try_cmpxchg(&get_lowcore()->preempt_count, &old, new)); } +/* + * We fold the NEED_RESCHED bit into the preempt count such that + * preempt_enable() can decrement and test for needing to reschedule with a + * short instruction sequence. + * + * We invert the actual bit, so that when the decrement hits 0 we know we both + * need to resched (the bit is cleared) and can resched (no preempt count). + */ + static __always_inline void set_preempt_need_resched(void) { __atomic_and(~PREEMPT_NEED_RESCHED, &get_lowcore()->preempt_count); @@ -64,67 +80,24 @@ static __always_inline void __preempt_count_sub(int val) __preempt_count_add(-val); } +/* + * Because we keep PREEMPT_NEED_RESCHED set when we do _not_ need to reschedule + * a decrement which hits zero means we have no preempt_count and should + * reschedule. + */ static __always_inline bool __preempt_count_dec_and_test(void) { - return __atomic_add(-1, &get_lowcore()->preempt_count) == 1; -} - -static __always_inline bool should_resched(int preempt_offset) -{ - return unlikely(READ_ONCE(get_lowcore()->preempt_count) == - preempt_offset); -} - -#else /* MARCH_HAS_Z196_FEATURES */ - -#define PREEMPT_ENABLED (0) - -static __always_inline int preempt_count(void) -{ - return READ_ONCE(get_lowcore()->preempt_count); -} - -static __always_inline void preempt_count_set(int pc) -{ - get_lowcore()->preempt_count = pc; -} - -static __always_inline void set_preempt_need_resched(void) -{ -} - -static __always_inline void clear_preempt_need_resched(void) -{ -} - -static __always_inline bool test_preempt_need_resched(void) -{ - return false; -} - -static __always_inline void __preempt_count_add(int val) -{ - get_lowcore()->preempt_count += val; -} - -static __always_inline void __preempt_count_sub(int val) -{ - get_lowcore()->preempt_count -= val; -} - -static __always_inline bool __preempt_count_dec_and_test(void) -{ - return !--get_lowcore()->preempt_count && tif_need_resched(); + return __atomic_add_const_and_test(-1, &get_lowcore()->preempt_count); } +/* + * Returns true when we need to resched and can (barring IRQ state). + */ static __always_inline bool should_resched(int preempt_offset) { - return unlikely(preempt_count() == preempt_offset && - tif_need_resched()); + return unlikely(READ_ONCE(get_lowcore()->preempt_count) == preempt_offset); } -#endif /* MARCH_HAS_Z196_FEATURES */ - #define init_task_preempt_count(p) do { } while (0) /* Deferred to CPU bringup time */ #define init_idle_preempt_count(p, cpu) do { } while (0) diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h index 8761fd01a9f0..6a9c08b80eda 100644 --- a/arch/s390/include/asm/processor.h +++ b/arch/s390/include/asm/processor.h @@ -26,11 +26,12 @@ #define RESTART_FLAG_CTLREGS _AC(1 << 0, U) -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <linux/cpumask.h> #include <linux/linkage.h> #include <linux/irqflags.h> +#include <linux/bitops.h> #include <asm/fpu-types.h> #include <asm/cpu.h> #include <asm/page.h> @@ -62,33 +63,27 @@ static __always_inline struct pcpu *this_pcpu(void) static __always_inline void set_cpu_flag(int flag) { - this_pcpu()->flags |= (1UL << flag); + set_bit(flag, &this_pcpu()->flags); } static __always_inline void clear_cpu_flag(int flag) { - this_pcpu()->flags &= ~(1UL << flag); + clear_bit(flag, &this_pcpu()->flags); } static __always_inline bool test_cpu_flag(int flag) { - return this_pcpu()->flags & (1UL << flag); + return test_bit(flag, &this_pcpu()->flags); } static __always_inline bool test_and_set_cpu_flag(int flag) { - if (test_cpu_flag(flag)) - return true; - set_cpu_flag(flag); - return false; + return test_and_set_bit(flag, &this_pcpu()->flags); } static __always_inline bool test_and_clear_cpu_flag(int flag) { - if (!test_cpu_flag(flag)) - return false; - clear_cpu_flag(flag); - return true; + return test_and_clear_bit(flag, &this_pcpu()->flags); } /* @@ -97,7 +92,7 @@ static __always_inline bool test_and_clear_cpu_flag(int flag) */ static __always_inline bool test_cpu_flag_of(int flag, int cpu) { - return per_cpu(pcpu_devices, cpu).flags & (1UL << flag); + return test_bit(flag, &per_cpu(pcpu_devices, cpu).flags); } #define arch_needs_cpu() test_cpu_flag(CIF_NOHZ_DELAY) @@ -163,8 +158,7 @@ static __always_inline void __stackleak_poison(unsigned long erase_low, " la %[addr],256(%[addr])\n" " brctg %[tmp],0b\n" "1: stg %[poison],0(%[addr])\n" - " larl %[tmp],3f\n" - " ex %[count],0(%[tmp])\n" + " exrl %[count],3f\n" " j 4f\n" "2: stg %[poison],0(%[addr])\n" " j 4f\n" @@ -417,9 +411,13 @@ static __always_inline bool regs_irqs_disabled(struct pt_regs *regs) static __always_inline void bpon(void) { - asm volatile(ALTERNATIVE("nop", ".insn rrf,0xb2e80000,0,0,13,0", ALT_SPEC(82))); + asm_inline volatile( + ALTERNATIVE(" nop\n", + " .insn rrf,0xb2e80000,0,0,13,0\n", + ALT_SPEC(82)) + ); } -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* __ASM_S390_PROCESSOR_H */ diff --git a/arch/s390/include/asm/ptrace.h b/arch/s390/include/asm/ptrace.h index 788bc4467445..dfa770b15fad 100644 --- a/arch/s390/include/asm/ptrace.h +++ b/arch/s390/include/asm/ptrace.h @@ -9,16 +9,15 @@ #include <linux/bits.h> #include <uapi/asm/ptrace.h> +#include <asm/thread_info.h> #include <asm/tpi.h> #define PIF_SYSCALL 0 /* inside a system call */ -#define PIF_EXECVE_PGSTE_RESTART 1 /* restart execve for PGSTE binaries */ #define PIF_SYSCALL_RET_SET 2 /* return value was set via ptrace */ #define PIF_GUEST_FAULT 3 /* indicates program check in sie64a */ #define PIF_FTRACE_FULL_REGS 4 /* all register contents valid (ftrace) */ #define _PIF_SYSCALL BIT(PIF_SYSCALL) -#define _PIF_EXECVE_PGSTE_RESTART BIT(PIF_EXECVE_PGSTE_RESTART) #define _PIF_SYSCALL_RET_SET BIT(PIF_SYSCALL_RET_SET) #define _PIF_GUEST_FAULT BIT(PIF_GUEST_FAULT) #define _PIF_FTRACE_FULL_REGS BIT(PIF_FTRACE_FULL_REGS) @@ -55,7 +54,7 @@ PSW_DEFAULT_KEY | PSW_MASK_BASE | PSW_MASK_MCHECK | \ PSW_MASK_PSTATE | PSW_ASC_PRIMARY) -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ struct psw_bits { unsigned long : 1; @@ -128,7 +127,6 @@ struct pt_regs { struct tpi_info tpi_info; }; unsigned long flags; - unsigned long cr1; unsigned long last_break; }; @@ -231,8 +229,44 @@ static inline void instruction_pointer_set(struct pt_regs *regs, int regs_query_register_offset(const char *name); const char *regs_query_register_name(unsigned int offset); -unsigned long regs_get_register(struct pt_regs *regs, unsigned int offset); -unsigned long regs_get_kernel_stack_nth(struct pt_regs *regs, unsigned int n); + +static __always_inline unsigned long kernel_stack_pointer(struct pt_regs *regs) +{ + return regs->gprs[15]; +} + +static __always_inline unsigned long regs_get_register(struct pt_regs *regs, unsigned int offset) +{ + if (offset >= NUM_GPRS) + return 0; + return regs->gprs[offset]; +} + +static __always_inline int regs_within_kernel_stack(struct pt_regs *regs, unsigned long addr) +{ + unsigned long ksp = kernel_stack_pointer(regs); + + return (addr & ~(THREAD_SIZE - 1)) == (ksp & ~(THREAD_SIZE - 1)); +} + +/** + * regs_get_kernel_stack_nth() - get Nth entry of the stack + * @regs:pt_regs which contains kernel stack pointer. + * @n:stack entry number. + * + * regs_get_kernel_stack_nth() returns @n th entry of the kernel stack which + * is specifined by @regs. If the @n th entry is NOT in the kernel stack, + * this returns 0. + */ +static __always_inline unsigned long regs_get_kernel_stack_nth(struct pt_regs *regs, unsigned int n) +{ + unsigned long addr; + + addr = kernel_stack_pointer(regs) + n * sizeof(long); + if (!regs_within_kernel_stack(regs, addr)) + return 0; + return READ_ONCE_NOCHECK(*(unsigned long *)addr); +} /** * regs_get_kernel_argument() - get Nth function argument in kernel @@ -253,15 +287,10 @@ static inline unsigned long regs_get_kernel_argument(struct pt_regs *regs, return regs_get_kernel_stack_nth(regs, argoffset + n); } -static inline unsigned long kernel_stack_pointer(struct pt_regs *regs) -{ - return regs->gprs[15]; -} - static inline void regs_set_return_value(struct pt_regs *regs, unsigned long rc) { regs->gprs[2] = rc; } -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* _S390_PTRACE_H */ diff --git a/arch/s390/include/asm/purgatory.h b/arch/s390/include/asm/purgatory.h index e297bcfc476f..4c7a43bc43a1 100644 --- a/arch/s390/include/asm/purgatory.h +++ b/arch/s390/include/asm/purgatory.h @@ -7,11 +7,11 @@ #ifndef _S390_PURGATORY_H_ #define _S390_PURGATORY_H_ -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <linux/purgatory.h> int verify_sha256_digest(void); -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* _S390_PURGATORY_H_ */ diff --git a/arch/s390/include/asm/sclp.h b/arch/s390/include/asm/sclp.h index eb00fa1771da..0f184dbdbe5e 100644 --- a/arch/s390/include/asm/sclp.h +++ b/arch/s390/include/asm/sclp.h @@ -16,7 +16,12 @@ /* 24 + 16 * SCLP_MAX_CORES */ #define EXT_SCCB_READ_CPU (3 * PAGE_SIZE) -#ifndef __ASSEMBLY__ +#define SCLP_ERRNOTIFY_AQ_RESET 0 +#define SCLP_ERRNOTIFY_AQ_REPAIR 1 +#define SCLP_ERRNOTIFY_AQ_INFO_LOG 2 +#define SCLP_ERRNOTIFY_AQ_OPTICS_DATA 3 + +#ifndef __ASSEMBLER__ #include <linux/uio.h> #include <asm/chpid.h> #include <asm/cpu.h> @@ -87,8 +92,10 @@ struct sclp_info { unsigned char has_kss : 1; unsigned char has_diag204_bif : 1; unsigned char has_gisaf : 1; + unsigned char has_diag310 : 1; unsigned char has_diag318 : 1; unsigned char has_diag320 : 1; + unsigned char has_diag324 : 1; unsigned char has_sipl : 1; unsigned char has_sipl_eckd : 1; unsigned char has_dirq : 1; @@ -111,6 +118,34 @@ struct sclp_info { }; extern struct sclp_info sclp; +struct sccb_header { + u16 length; + u8 function_code; + u8 control_mask[3]; + u16 response_code; +} __packed; + +struct evbuf_header { + u16 length; + u8 type; + u8 flags; + u16 _reserved; +} __packed; + +struct err_notify_evbuf { + struct evbuf_header header; + u8 action; + u8 atype; + u32 fh; + u32 fid; + u8 data[]; +} __packed; + +struct err_notify_sccb { + struct sccb_header header; + struct err_notify_evbuf evbuf; +} __packed; + struct zpci_report_error_header { u8 version; /* Interface version byte */ u8 action; /* Action qualifier byte @@ -133,10 +168,12 @@ int sclp_early_read_storage_info(void); int sclp_early_get_core_info(struct sclp_core_info *info); void sclp_early_get_ipl_info(struct sclp_ipl_info *info); void sclp_early_detect(void); +void sclp_early_detect_machine_features(void); void sclp_early_printk(const char *s); void __sclp_early_printk(const char *s, unsigned int len); void sclp_emergency_printk(const char *s); +int sclp_init(void); int sclp_early_get_memsize(unsigned long *mem); int sclp_early_get_hsa_size(unsigned long *hsa_size); int _sclp_get_core_info(struct sclp_core_info *info); @@ -162,5 +199,5 @@ static inline int sclp_get_core_info(struct sclp_core_info *info, int early) return _sclp_get_core_info(info); } -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* _ASM_S390_SCLP_H */ diff --git a/arch/s390/include/asm/setup.h b/arch/s390/include/asm/setup.h index 70b920b32827..7c57ac968bf6 100644 --- a/arch/s390/include/asm/setup.h +++ b/arch/s390/include/asm/setup.h @@ -13,28 +13,6 @@ #define PARMAREA 0x10400 #define COMMAND_LINE_SIZE CONFIG_COMMAND_LINE_SIZE -/* - * Machine features detected in early.c - */ - -#define MACHINE_FLAG_VM BIT(0) -#define MACHINE_FLAG_KVM BIT(1) -#define MACHINE_FLAG_LPAR BIT(2) -#define MACHINE_FLAG_DIAG9C BIT(3) -#define MACHINE_FLAG_ESOP BIT(4) -#define MACHINE_FLAG_IDTE BIT(5) -#define MACHINE_FLAG_EDAT1 BIT(7) -#define MACHINE_FLAG_EDAT2 BIT(8) -#define MACHINE_FLAG_TOPOLOGY BIT(10) -#define MACHINE_FLAG_TE BIT(11) -#define MACHINE_FLAG_TLB_LC BIT(12) -#define MACHINE_FLAG_TLB_GUEST BIT(14) -#define MACHINE_FLAG_NX BIT(15) -#define MACHINE_FLAG_GS BIT(16) -#define MACHINE_FLAG_SCC BIT(17) -#define MACHINE_FLAG_PCI_MIO BIT(18) -#define MACHINE_FLAG_RDP BIT(19) -#define MACHINE_FLAG_SEQ_INSN BIT(20) #define LPP_MAGIC BIT(31) #define LPP_PID_MASK _AC(0xffffffff, UL) @@ -46,7 +24,7 @@ #define LEGACY_COMMAND_LINE_SIZE 896 -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <asm/lowcore.h> #include <asm/types.h> @@ -63,6 +41,8 @@ struct parmarea { char command_line[COMMAND_LINE_SIZE]; /* 0x10480 */ }; +extern char arch_hw_string[128]; + extern struct parmarea parmarea; extern unsigned int zlib_dfltcc_support; @@ -78,26 +58,6 @@ extern unsigned long max_mappable; /* The Write Back bit position in the physaddr is given by the SLPC PCI */ extern unsigned long mio_wb_bit_mask; -#define MACHINE_IS_VM (get_lowcore()->machine_flags & MACHINE_FLAG_VM) -#define MACHINE_IS_KVM (get_lowcore()->machine_flags & MACHINE_FLAG_KVM) -#define MACHINE_IS_LPAR (get_lowcore()->machine_flags & MACHINE_FLAG_LPAR) - -#define MACHINE_HAS_DIAG9C (get_lowcore()->machine_flags & MACHINE_FLAG_DIAG9C) -#define MACHINE_HAS_ESOP (get_lowcore()->machine_flags & MACHINE_FLAG_ESOP) -#define MACHINE_HAS_IDTE (get_lowcore()->machine_flags & MACHINE_FLAG_IDTE) -#define MACHINE_HAS_EDAT1 (get_lowcore()->machine_flags & MACHINE_FLAG_EDAT1) -#define MACHINE_HAS_EDAT2 (get_lowcore()->machine_flags & MACHINE_FLAG_EDAT2) -#define MACHINE_HAS_TOPOLOGY (get_lowcore()->machine_flags & MACHINE_FLAG_TOPOLOGY) -#define MACHINE_HAS_TE (get_lowcore()->machine_flags & MACHINE_FLAG_TE) -#define MACHINE_HAS_TLB_LC (get_lowcore()->machine_flags & MACHINE_FLAG_TLB_LC) -#define MACHINE_HAS_TLB_GUEST (get_lowcore()->machine_flags & MACHINE_FLAG_TLB_GUEST) -#define MACHINE_HAS_NX (get_lowcore()->machine_flags & MACHINE_FLAG_NX) -#define MACHINE_HAS_GS (get_lowcore()->machine_flags & MACHINE_FLAG_GS) -#define MACHINE_HAS_SCC (get_lowcore()->machine_flags & MACHINE_FLAG_SCC) -#define MACHINE_HAS_PCI_MIO (get_lowcore()->machine_flags & MACHINE_FLAG_PCI_MIO) -#define MACHINE_HAS_RDP (get_lowcore()->machine_flags & MACHINE_FLAG_RDP) -#define MACHINE_HAS_SEQ_INSN (get_lowcore()->machine_flags & MACHINE_FLAG_SEQ_INSN) - /* * Console mode. Override with conmode= */ @@ -142,5 +102,5 @@ static __always_inline u32 gen_lpswe(unsigned long addr) BUILD_BUG_ON(addr > 0xfff); return 0xb2b20000 | addr; } -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* _ASM_S390_SETUP_H */ diff --git a/arch/s390/include/asm/sigp.h b/arch/s390/include/asm/sigp.h index 472943b77066..97d77868f83c 100644 --- a/arch/s390/include/asm/sigp.h +++ b/arch/s390/include/asm/sigp.h @@ -36,7 +36,7 @@ #define SIGP_STATUS_INCORRECT_STATE 0x00000200UL #define SIGP_STATUS_NOT_RUNNING 0x00000400UL -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <asm/asm.h> @@ -68,6 +68,6 @@ static inline int __pcpu_sigp(u16 addr, u8 order, unsigned long parm, return cc; } -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* __S390_ASM_SIGP_H */ diff --git a/arch/s390/include/asm/skey.h b/arch/s390/include/asm/skey.h new file mode 100644 index 000000000000..84e7cf28b712 --- /dev/null +++ b/arch/s390/include/asm/skey.h @@ -0,0 +1,32 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __ASM_SKEY_H +#define __ASM_SKEY_H + +#include <asm/rwonce.h> + +struct skey_region { + unsigned long start; + unsigned long end; +}; + +#define SKEY_REGION(_start, _end) \ + stringify_in_c(.section .skey_region,"a";) \ + stringify_in_c(.balign 8;) \ + stringify_in_c(.quad (_start);) \ + stringify_in_c(.quad (_end);) \ + stringify_in_c(.previous) + +extern int skey_regions_initialized; +extern struct skey_region __skey_region_start[]; +extern struct skey_region __skey_region_end[]; + +void __skey_regions_initialize(void); + +static inline void skey_regions_initialize(void) +{ + if (READ_ONCE(skey_regions_initialized)) + return; + __skey_regions_initialize(); +} + +#endif /* __ASM_SKEY_H */ diff --git a/arch/s390/include/asm/smp.h b/arch/s390/include/asm/smp.h index 7feca96c48c6..03f4d01664f8 100644 --- a/arch/s390/include/asm/smp.h +++ b/arch/s390/include/asm/smp.h @@ -7,11 +7,29 @@ #ifndef __ASM_SMP_H #define __ASM_SMP_H -#include <asm/sigp.h> -#include <asm/lowcore.h> #include <asm/processor.h> +#include <asm/lowcore.h> +#include <asm/machine.h> +#include <asm/sigp.h> + +static __always_inline unsigned int raw_smp_processor_id(void) +{ + unsigned long lc_cpu_nr; + unsigned int cpu; + + BUILD_BUG_ON(sizeof_field(struct lowcore, cpu_nr) != sizeof(cpu)); + lc_cpu_nr = offsetof(struct lowcore, cpu_nr); + asm_inline( + ALTERNATIVE(" ly %[cpu],%[offzero](%%r0)\n", + " ly %[cpu],%[offalt](%%r0)\n", + ALT_FEATURE(MFEATURE_LOWCORE)) + : [cpu] "=d" (cpu) + : [offzero] "i" (lc_cpu_nr), + [offalt] "i" (lc_cpu_nr + LOWCORE_ALT_ADDRESS), + "m" (((struct lowcore *)0)->cpu_nr)); + return cpu; +} -#define raw_smp_processor_id() (get_lowcore()->cpu_nr) #define arch_scale_cpu_capacity smp_cpu_get_capacity extern struct mutex smp_cpu_state_mutex; diff --git a/arch/s390/include/asm/spinlock.h b/arch/s390/include/asm/spinlock.h index f87dd0a84855..f9935db9fd76 100644 --- a/arch/s390/include/asm/spinlock.h +++ b/arch/s390/include/asm/spinlock.h @@ -16,7 +16,23 @@ #include <asm/processor.h> #include <asm/alternative.h> -#define SPINLOCK_LOCKVAL (get_lowcore()->spinlock_lockval) +static __always_inline unsigned int spinlock_lockval(void) +{ + unsigned long lc_lockval; + unsigned int lockval; + + BUILD_BUG_ON(sizeof_field(struct lowcore, spinlock_lockval) != sizeof(lockval)); + lc_lockval = offsetof(struct lowcore, spinlock_lockval); + asm_inline( + ALTERNATIVE(" ly %[lockval],%[offzero](%%r0)\n", + " ly %[lockval],%[offalt](%%r0)\n", + ALT_FEATURE(MFEATURE_LOWCORE)) + : [lockval] "=d" (lockval) + : [offzero] "i" (lc_lockval), + [offalt] "i" (lc_lockval + LOWCORE_ALT_ADDRESS), + "m" (((struct lowcore *)0)->spinlock_lockval)); + return lockval; +} extern int spin_retry; @@ -60,7 +76,7 @@ static inline int arch_spin_trylock_once(arch_spinlock_t *lp) int old = 0; barrier(); - return likely(arch_try_cmpxchg(&lp->lock, &old, SPINLOCK_LOCKVAL)); + return likely(arch_try_cmpxchg(&lp->lock, &old, spinlock_lockval())); } static inline void arch_spin_lock(arch_spinlock_t *lp) diff --git a/arch/s390/include/asm/string.h b/arch/s390/include/asm/string.h index 2ab868cbae6c..f8f68f4ef255 100644 --- a/arch/s390/include/asm/string.h +++ b/arch/s390/include/asm/string.h @@ -26,11 +26,9 @@ void *memmove(void *dest, const void *src, size_t n); #define __HAVE_ARCH_MEMSCAN /* inline & arch function */ #define __HAVE_ARCH_STRCAT /* inline & arch function */ #define __HAVE_ARCH_STRCMP /* arch function */ -#define __HAVE_ARCH_STRCPY /* inline & arch function */ #define __HAVE_ARCH_STRLCAT /* arch function */ #define __HAVE_ARCH_STRLEN /* inline & arch function */ #define __HAVE_ARCH_STRNCAT /* arch function */ -#define __HAVE_ARCH_STRNCPY /* arch function */ #define __HAVE_ARCH_STRNLEN /* inline & arch function */ #define __HAVE_ARCH_STRSTR /* arch function */ #define __HAVE_ARCH_MEMSET16 /* arch function */ @@ -42,7 +40,6 @@ int memcmp(const void *s1, const void *s2, size_t n); int strcmp(const char *s1, const char *s2); size_t strlcat(char *dest, const char *src, size_t n); char *strncat(char *dest, const char *src, size_t n); -char *strncpy(char *dest, const char *src, size_t n); char *strstr(const char *s1, const char *s2); #endif /* !defined(CONFIG_KASAN) && !defined(CONFIG_KMSAN) */ @@ -155,22 +152,6 @@ static inline char *strcat(char *dst, const char *src) } #endif -#ifdef __HAVE_ARCH_STRCPY -static inline char *strcpy(char *dst, const char *src) -{ - char *ret = dst; - - asm volatile( - " lghi 0,0\n" - "0: mvst %[dst],%[src]\n" - " jo 0b" - : [dst] "+&a" (dst), [src] "+&a" (src) - : - : "cc", "memory", "0"); - return ret; -} -#endif - #if defined(__HAVE_ARCH_STRLEN) || (defined(CONFIG_KASAN) && !defined(__SANITIZE_ADDRESS__)) static inline size_t __no_sanitize_prefix_strfunc(strlen)(const char *s) { @@ -208,7 +189,6 @@ static inline size_t strnlen(const char * s, size_t n) void *memchr(const void * s, int c, size_t n); void *memscan(void *s, int c, size_t n); char *strcat(char *dst, const char *src); -char *strcpy(char *dst, const char *src); size_t strlen(const char *s); size_t strnlen(const char * s, size_t n); #endif /* !IN_ARCH_STRING_C */ diff --git a/arch/s390/include/asm/syscall.h b/arch/s390/include/asm/syscall.h index 27e3d804b311..bd4cb00ccd5e 100644 --- a/arch/s390/include/asm/syscall.h +++ b/arch/s390/include/asm/syscall.h @@ -24,6 +24,18 @@ static inline long syscall_get_nr(struct task_struct *task, (regs->int_code & 0xffff) : -1; } +static inline void syscall_set_nr(struct task_struct *task, + struct pt_regs *regs, + int nr) +{ + /* + * Unlike syscall_get_nr(), syscall_set_nr() can be called only when + * the target task is stopped for tracing on entering syscall, so + * there is no need to have the same check syscall_get_nr() has. + */ + regs->int_code = (regs->int_code & ~0xffff) | (nr & 0xffff); +} + static inline void syscall_rollback(struct task_struct *task, struct pt_regs *regs) { @@ -65,19 +77,26 @@ static inline void syscall_get_arguments(struct task_struct *task, unsigned long *args) { unsigned long mask = -1UL; - unsigned int n = 6; #ifdef CONFIG_COMPAT if (test_tsk_thread_flag(task, TIF_31BIT)) mask = 0xffffffff; #endif - while (n-- > 0) - if (n > 0) - args[n] = regs->gprs[2 + n] & mask; + for (int i = 1; i < 6; i++) + args[i] = regs->gprs[2 + i] & mask; args[0] = regs->orig_gpr2 & mask; } +static inline void syscall_set_arguments(struct task_struct *task, + struct pt_regs *regs, + const unsigned long *args) +{ + regs->orig_gpr2 = args[0]; + for (int n = 1; n < 6; n++) + regs->gprs[2 + n] = args[n]; +} + static inline int syscall_get_arch(struct task_struct *task) { #ifdef CONFIG_COMPAT diff --git a/arch/s390/include/asm/sysinfo.h b/arch/s390/include/asm/sysinfo.h index edca5a751df4..9088c5267f35 100644 --- a/arch/s390/include/asm/sysinfo.h +++ b/arch/s390/include/asm/sysinfo.h @@ -11,8 +11,34 @@ #ifndef __ASM_S390_SYSINFO_H #define __ASM_S390_SYSINFO_H -#include <asm/bitsperlong.h> #include <linux/uuid.h> +#include <asm/bitsperlong.h> +#include <asm/asm.h> + +/* + * stsi - store system information + * + * Returns the current configuration level if function code 0 was specified. + * Otherwise returns 0 on success or a negative value on error. + */ +static inline int stsi(void *sysinfo, int fc, int sel1, int sel2) +{ + int r0 = (fc << 28) | sel1; + int cc; + + asm volatile( + " lr %%r0,%[r0]\n" + " lr %%r1,%[r1]\n" + " stsi %[sysinfo]\n" + " lr %[r0],%%r0\n" + CC_IPM(cc) + : CC_OUT(cc, cc), [r0] "+d" (r0), [sysinfo] "=Q" (*(char *)sysinfo) + : [r1] "d" (sel2) + : CC_CLOBBER_LIST("0", "1", "memory")); + if (cc == 3) + return -EOPNOTSUPP; + return fc ? 0 : (unsigned int)r0 >> 28; +} struct sysinfo_1_1_1 { unsigned char p:1; diff --git a/arch/s390/include/asm/thread_info.h b/arch/s390/include/asm/thread_info.h index c33f7144d1b9..f6ed2c8192c8 100644 --- a/arch/s390/include/asm/thread_info.h +++ b/arch/s390/include/asm/thread_info.h @@ -9,9 +9,7 @@ #define _ASM_THREAD_INFO_H #include <linux/bits.h> -#ifndef ASM_OFFSETS_C -#include <asm/asm-offsets.h> -#endif +#include <vdso/page.h> /* * General size of kernel stacks @@ -26,9 +24,7 @@ #define STACK_INIT_OFFSET (THREAD_SIZE - STACK_FRAME_OVERHEAD - __PT_SIZE) -#ifndef __ASSEMBLY__ -#include <asm/lowcore.h> -#include <asm/page.h> +#ifndef __ASSEMBLER__ /* * low level task data that entry.S needs immediate access to @@ -67,7 +63,7 @@ void arch_setup_new_exec(void); #define TIF_NEED_RESCHED_LAZY 3 /* lazy rescheduling needed */ #define TIF_UPROBE 4 /* breakpointed or single-stepping */ #define TIF_PATCH_PENDING 5 /* pending live patching update */ -#define TIF_PGSTE 6 /* New mm's will use 4K page tables */ +#define TIF_ASCE_PRIMARY 6 /* primary asce is kernel asce */ #define TIF_NOTIFY_SIGNAL 7 /* signal notifications exist */ #define TIF_GUARDED_STORAGE 8 /* load guarded storage control block */ #define TIF_ISOLATE_BP_GUEST 9 /* Run KVM guests with isolated BP */ @@ -89,7 +85,7 @@ void arch_setup_new_exec(void); #define _TIF_NEED_RESCHED_LAZY BIT(TIF_NEED_RESCHED_LAZY) #define _TIF_UPROBE BIT(TIF_UPROBE) #define _TIF_PATCH_PENDING BIT(TIF_PATCH_PENDING) -#define _TIF_PGSTE BIT(TIF_PGSTE) +#define _TIF_ASCE_PRIMARY BIT(TIF_ASCE_PRIMARY) #define _TIF_NOTIFY_SIGNAL BIT(TIF_NOTIFY_SIGNAL) #define _TIF_GUARDED_STORAGE BIT(TIF_GUARDED_STORAGE) #define _TIF_ISOLATE_BP_GUEST BIT(TIF_ISOLATE_BP_GUEST) diff --git a/arch/s390/include/asm/timex.h b/arch/s390/include/asm/timex.h index a9460bd6555b..59dfb8780f62 100644 --- a/arch/s390/include/asm/timex.h +++ b/arch/s390/include/asm/timex.h @@ -13,6 +13,7 @@ #include <linux/preempt.h> #include <linux/time64.h> #include <asm/lowcore.h> +#include <asm/machine.h> #include <asm/asm.h> /* The value of the TOD clock for 1.1.1970. */ @@ -195,13 +196,6 @@ static inline unsigned long get_tod_clock_fast(void) asm volatile("stckf %0" : "=Q" (clk) : : "cc"); return clk; } - -static inline cycles_t get_cycles(void) -{ - return (cycles_t) get_tod_clock() >> 2; -} -#define get_cycles get_cycles - int get_phys_clock(unsigned long *clock); void init_cpu_timer(void); @@ -229,6 +223,12 @@ static inline unsigned long get_tod_clock_monotonic(void) return tod; } +static inline cycles_t get_cycles(void) +{ + return (cycles_t)get_tod_clock_monotonic() >> 2; +} +#define get_cycles get_cycles + /** * tod_to_ns - convert a TOD format value to nanoseconds * @todval: to be converted TOD format value @@ -267,7 +267,7 @@ static __always_inline u128 eitod_to_ns(u128 todval) */ static inline int tod_after(unsigned long a, unsigned long b) { - if (MACHINE_HAS_SCC) + if (machine_has_scc()) return (long) a > (long) b; return a > b; } @@ -281,7 +281,7 @@ static inline int tod_after(unsigned long a, unsigned long b) */ static inline int tod_after_eq(unsigned long a, unsigned long b) { - if (MACHINE_HAS_SCC) + if (machine_has_scc()) return (long) a >= (long) b; return a >= b; } diff --git a/arch/s390/include/asm/tlb.h b/arch/s390/include/asm/tlb.h index e95b2c8081eb..1e50f6f1ad9d 100644 --- a/arch/s390/include/asm/tlb.h +++ b/arch/s390/include/asm/tlb.h @@ -22,7 +22,6 @@ * Pages used for the page tables is a different story. FIXME: more */ -void __tlb_remove_table(void *_table); static inline void tlb_flush(struct mmu_gather *tlb); static inline bool __tlb_remove_page_size(struct mmu_gather *tlb, struct page *page, bool delay_rmap, int page_size); @@ -37,11 +36,12 @@ static inline bool __tlb_remove_folio_pages(struct mmu_gather *tlb, #include <asm/tlbflush.h> #include <asm-generic/tlb.h> +#include <asm/gmap.h> /* * Release the page cache reference for a pte removed by * tlb_ptep_clear_flush. In both flush modes the tlb for a page cache page - * has already been freed, so just do free_page_and_swap_cache. + * has already been freed, so just do free_folio_and_swap_cache. * * s390 doesn't delay rmap removal. */ @@ -50,7 +50,7 @@ static inline bool __tlb_remove_page_size(struct mmu_gather *tlb, { VM_WARN_ON_ONCE(delay_rmap); - free_page_and_swap_cache(page); + free_folio_and_swap_cache(page_folio(page)); return false; } @@ -85,9 +85,9 @@ static inline void pte_free_tlb(struct mmu_gather *tlb, pgtable_t pte, tlb->mm->context.flush_mm = 1; tlb->freed_tables = 1; tlb->cleared_pmds = 1; - if (mm_alloc_pgste(tlb->mm)) + if (mm_has_pgste(tlb->mm)) gmap_unlink(tlb->mm, (unsigned long *)pte, address); - tlb_remove_ptdesc(tlb, pte); + tlb_remove_ptdesc(tlb, virt_to_ptdesc(pte)); } /* @@ -102,12 +102,11 @@ static inline void pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd, { if (mm_pmd_folded(tlb->mm)) return; - pagetable_pmd_dtor(virt_to_ptdesc(pmd)); __tlb_adjust_range(tlb, address, PAGE_SIZE); tlb->mm->context.flush_mm = 1; tlb->freed_tables = 1; tlb->cleared_puds = 1; - tlb_remove_ptdesc(tlb, pmd); + tlb_remove_ptdesc(tlb, virt_to_ptdesc(pmd)); } /* @@ -125,7 +124,7 @@ static inline void p4d_free_tlb(struct mmu_gather *tlb, p4d_t *p4d, __tlb_adjust_range(tlb, address, PAGE_SIZE); tlb->mm->context.flush_mm = 1; tlb->freed_tables = 1; - tlb_remove_ptdesc(tlb, p4d); + tlb_remove_ptdesc(tlb, virt_to_ptdesc(p4d)); } /* @@ -140,11 +139,11 @@ static inline void pud_free_tlb(struct mmu_gather *tlb, pud_t *pud, { if (mm_pud_folded(tlb->mm)) return; + __tlb_adjust_range(tlb, address, PAGE_SIZE); tlb->mm->context.flush_mm = 1; tlb->freed_tables = 1; tlb->cleared_p4ds = 1; - tlb_remove_ptdesc(tlb, pud); + tlb_remove_ptdesc(tlb, virt_to_ptdesc(pud)); } - #endif /* _S390_TLB_H */ diff --git a/arch/s390/include/asm/tlbflush.h b/arch/s390/include/asm/tlbflush.h index 9dfd46dd03c6..75491baa2197 100644 --- a/arch/s390/include/asm/tlbflush.h +++ b/arch/s390/include/asm/tlbflush.h @@ -2,9 +2,11 @@ #ifndef _S390_TLBFLUSH_H #define _S390_TLBFLUSH_H +#include <linux/cpufeature.h> #include <linux/mm.h> #include <linux/sched.h> #include <asm/processor.h> +#include <asm/machine.h> /* * Flush all TLB entries on the local CPU. @@ -22,7 +24,7 @@ static inline void __tlb_flush_idte(unsigned long asce) unsigned long opt; opt = IDTE_PTOA; - if (MACHINE_HAS_TLB_GUEST) + if (machine_has_tlb_guest()) opt |= IDTE_GUEST_ASCE; /* Global TLB flush for the mm */ asm volatile("idte 0,%1,%0" : : "a" (opt), "a" (asce) : "cc"); @@ -52,7 +54,7 @@ static inline void __tlb_flush_mm(struct mm_struct *mm) cpumask_copy(mm_cpumask(mm), &mm->context.cpu_attach_mask); barrier(); gmap_asce = READ_ONCE(mm->context.gmap_asce); - if (MACHINE_HAS_IDTE && gmap_asce != -1UL) { + if (cpu_has_idte() && gmap_asce != -1UL) { if (gmap_asce) __tlb_flush_idte(gmap_asce); __tlb_flush_idte(mm->context.asce); @@ -66,7 +68,7 @@ static inline void __tlb_flush_mm(struct mm_struct *mm) static inline void __tlb_flush_kernel(void) { - if (MACHINE_HAS_IDTE) + if (cpu_has_idte()) __tlb_flush_idte(init_mm.context.asce); else __tlb_flush_global(); diff --git a/arch/s390/include/asm/topology.h b/arch/s390/include/asm/topology.h index cef06bffad80..44110847342a 100644 --- a/arch/s390/include/asm/topology.h +++ b/arch/s390/include/asm/topology.h @@ -61,6 +61,12 @@ static inline void topology_expect_change(void) { } #endif /* CONFIG_SCHED_TOPOLOGY */ +static inline bool topology_is_primary_thread(unsigned int cpu) +{ + return smp_get_base_cpu(cpu) == cpu; +} +#define topology_is_primary_thread topology_is_primary_thread + #define POLARIZATION_UNKNOWN (-1) #define POLARIZATION_HRZ (0) #define POLARIZATION_VL (1) diff --git a/arch/s390/include/asm/tpi.h b/arch/s390/include/asm/tpi.h index f76e5fdff23a..71c8b6f76cdd 100644 --- a/arch/s390/include/asm/tpi.h +++ b/arch/s390/include/asm/tpi.h @@ -5,7 +5,7 @@ #include <linux/types.h> #include <uapi/asm/schid.h> -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ /* I/O-Interruption Code as stored by TEST PENDING INTERRUPTION (TPI). */ struct tpi_info { @@ -32,6 +32,6 @@ struct tpi_adapter_info { u32 :27; } __packed __aligned(4); -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* _ASM_S390_TPI_H */ diff --git a/arch/s390/include/asm/types.h b/arch/s390/include/asm/types.h index 0b5d550a0478..53695b2196f7 100644 --- a/arch/s390/include/asm/types.h +++ b/arch/s390/include/asm/types.h @@ -5,7 +5,7 @@ #include <uapi/asm/types.h> -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ union register_pair { unsigned __int128 pair; @@ -15,5 +15,5 @@ union register_pair { }; }; -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* _ASM_S390_TYPES_H */ diff --git a/arch/s390/include/asm/uaccess.h b/arch/s390/include/asm/uaccess.h index a81f897a81ce..3e5b8b677057 100644 --- a/arch/s390/include/asm/uaccess.h +++ b/arch/s390/include/asm/uaccess.h @@ -13,25 +13,81 @@ /* * User space memory access functions */ +#include <linux/pgtable.h> #include <asm/asm-extable.h> #include <asm/processor.h> #include <asm/extable.h> #include <asm/facility.h> #include <asm-generic/access_ok.h> +#include <asm/asce.h> #include <linux/instrumented.h> void debug_user_asce(int exit); -unsigned long __must_check -raw_copy_from_user(void *to, const void __user *from, unsigned long n); - -unsigned long __must_check -raw_copy_to_user(void __user *to, const void *from, unsigned long n); +#ifdef CONFIG_KMSAN +#define uaccess_kmsan_or_inline noinline __maybe_unused __no_sanitize_memory +#else +#define uaccess_kmsan_or_inline __always_inline +#endif -#ifndef CONFIG_KASAN #define INLINE_COPY_FROM_USER #define INLINE_COPY_TO_USER -#endif + +static uaccess_kmsan_or_inline __must_check unsigned long +raw_copy_from_user(void *to, const void __user *from, unsigned long size) +{ + unsigned long osize; + int cc; + + while (1) { + osize = size; + asm_inline volatile( + " lhi %%r0,%[spec]\n" + "0: mvcos %[to],%[from],%[size]\n" + "1: nopr %%r7\n" + CC_IPM(cc) + EX_TABLE_UA_MVCOS_FROM(0b, 0b) + EX_TABLE_UA_MVCOS_FROM(1b, 0b) + : CC_OUT(cc, cc), [size] "+d" (size), [to] "=Q" (*(char *)to) + : [spec] "I" (0x81), [from] "Q" (*(const char __user *)from) + : CC_CLOBBER_LIST("memory", "0")); + if (__builtin_constant_p(osize) && osize <= 4096) + return osize - size; + if (likely(CC_TRANSFORM(cc) == 0)) + return osize - size; + size -= 4096; + to += 4096; + from += 4096; + } +} + +static uaccess_kmsan_or_inline __must_check unsigned long +raw_copy_to_user(void __user *to, const void *from, unsigned long size) +{ + unsigned long osize; + int cc; + + while (1) { + osize = size; + asm_inline volatile( + " llilh %%r0,%[spec]\n" + "0: mvcos %[to],%[from],%[size]\n" + "1: nopr %%r7\n" + CC_IPM(cc) + EX_TABLE_UA_MVCOS_TO(0b, 0b) + EX_TABLE_UA_MVCOS_TO(1b, 0b) + : CC_OUT(cc, cc), [size] "+d" (size), [to] "=Q" (*(char __user *)to) + : [spec] "I" (0x81), [from] "Q" (*(const char *)from) + : CC_CLOBBER_LIST("memory", "0")); + if (__builtin_constant_p(osize) && osize <= 4096) + return osize - size; + if (likely(CC_TRANSFORM(cc) == 0)) + return osize - size; + size -= 4096; + to += 4096; + from += 4096; + } +} unsigned long __must_check _copy_from_user_key(void *to, const void __user *from, unsigned long n, unsigned long key); @@ -55,63 +111,65 @@ copy_to_user_key(void __user *to, const void *from, unsigned long n, unsigned lo return n; } -union oac { - unsigned int val; - struct { - struct { - unsigned short key : 4; - unsigned short : 4; - unsigned short as : 2; - unsigned short : 4; - unsigned short k : 1; - unsigned short a : 1; - } oac1; - struct { - unsigned short key : 4; - unsigned short : 4; - unsigned short as : 2; - unsigned short : 4; - unsigned short k : 1; - unsigned short a : 1; - } oac2; - }; -}; - int __noreturn __put_user_bad(void); -#ifdef CONFIG_KMSAN -#define get_put_user_noinstr_attributes \ - noinline __maybe_unused __no_sanitize_memory -#else -#define get_put_user_noinstr_attributes __always_inline -#endif +#ifdef CONFIG_CC_HAS_ASM_GOTO_OUTPUT -#define DEFINE_PUT_USER(type) \ -static get_put_user_noinstr_attributes int \ +#define DEFINE_PUT_USER_NOINSTR(type) \ +static uaccess_kmsan_or_inline int \ +__put_user_##type##_noinstr(unsigned type __user *to, \ + unsigned type *from, \ + unsigned long size) \ +{ \ + asm goto( \ + " llilh %%r0,%[spec]\n" \ + "0: mvcos %[to],%[from],%[size]\n" \ + "1: nopr %%r7\n" \ + EX_TABLE(0b, %l[Efault]) \ + EX_TABLE(1b, %l[Efault]) \ + : [to] "+Q" (*to) \ + : [size] "d" (size), [from] "Q" (*from), \ + [spec] "I" (0x81) \ + : "cc", "0" \ + : Efault \ + ); \ + return 0; \ +Efault: \ + return -EFAULT; \ +} + +#else /* CONFIG_CC_HAS_ASM_GOTO_OUTPUT */ + +#define DEFINE_PUT_USER_NOINSTR(type) \ +static uaccess_kmsan_or_inline int \ __put_user_##type##_noinstr(unsigned type __user *to, \ unsigned type *from, \ unsigned long size) \ { \ - union oac __oac_spec = { \ - .oac1.as = PSW_BITS_AS_SECONDARY, \ - .oac1.a = 1, \ - }; \ int rc; \ \ - asm volatile( \ - " lr 0,%[spec]\n" \ - "0: mvcos %[_to],%[_from],%[_size]\n" \ - "1: xr %[rc],%[rc]\n" \ + asm_inline volatile( \ + " llilh %%r0,%[spec]\n" \ + "0: mvcos %[to],%[from],%[size]\n" \ + "1: lhi %[rc],0\n" \ "2:\n" \ - EX_TABLE_UA_STORE(0b, 2b, %[rc]) \ - EX_TABLE_UA_STORE(1b, 2b, %[rc]) \ - : [rc] "=&d" (rc), [_to] "+Q" (*(to)) \ - : [_size] "d" (size), [_from] "Q" (*(from)), \ - [spec] "d" (__oac_spec.val) \ + EX_TABLE_UA_FAULT(0b, 2b, %[rc]) \ + EX_TABLE_UA_FAULT(1b, 2b, %[rc]) \ + : [rc] "=d" (rc), [to] "+Q" (*to) \ + : [size] "d" (size), [from] "Q" (*from), \ + [spec] "I" (0x81) \ : "cc", "0"); \ return rc; \ -} \ - \ +} + +#endif /* CONFIG_CC_HAS_ASM_GOTO_OUTPUT */ + +DEFINE_PUT_USER_NOINSTR(char); +DEFINE_PUT_USER_NOINSTR(short); +DEFINE_PUT_USER_NOINSTR(int); +DEFINE_PUT_USER_NOINSTR(long); + +#define DEFINE_PUT_USER(type) \ static __always_inline int \ __put_user_##type(unsigned type __user *to, unsigned type *from, \ unsigned long size) \ @@ -128,69 +186,111 @@ DEFINE_PUT_USER(short); DEFINE_PUT_USER(int); DEFINE_PUT_USER(long); -static __always_inline int __put_user_fn(void *x, void __user *ptr, unsigned long size) -{ - int rc; +#define __put_user(x, ptr) \ +({ \ + __typeof__(*(ptr)) __x = (x); \ + int __prc; \ + \ + __chk_user_ptr(ptr); \ + switch (sizeof(*(ptr))) { \ + case 1: \ + __prc = __put_user_char((unsigned char __user *)(ptr), \ + (unsigned char *)&__x, \ + sizeof(*(ptr))); \ + break; \ + case 2: \ + __prc = __put_user_short((unsigned short __user *)(ptr),\ + (unsigned short *)&__x, \ + sizeof(*(ptr))); \ + break; \ + case 4: \ + __prc = __put_user_int((unsigned int __user *)(ptr), \ + (unsigned int *)&__x, \ + sizeof(*(ptr))); \ + break; \ + case 8: \ + __prc = __put_user_long((unsigned long __user *)(ptr), \ + (unsigned long *)&__x, \ + sizeof(*(ptr))); \ + break; \ + default: \ + __prc = __put_user_bad(); \ + break; \ + } \ + __builtin_expect(__prc, 0); \ +}) - switch (size) { - case 1: - rc = __put_user_char((unsigned char __user *)ptr, - (unsigned char *)x, - size); - break; - case 2: - rc = __put_user_short((unsigned short __user *)ptr, - (unsigned short *)x, - size); - break; - case 4: - rc = __put_user_int((unsigned int __user *)ptr, - (unsigned int *)x, - size); - break; - case 8: - rc = __put_user_long((unsigned long __user *)ptr, - (unsigned long *)x, - size); - break; - default: - __put_user_bad(); - break; - } - return rc; -} +#define put_user(x, ptr) \ +({ \ + might_fault(); \ + __put_user(x, ptr); \ +}) int __noreturn __get_user_bad(void); -#define DEFINE_GET_USER(type) \ -static get_put_user_noinstr_attributes int \ +#ifdef CONFIG_CC_HAS_ASM_GOTO_OUTPUT + +#define DEFINE_GET_USER_NOINSTR(type) \ +static uaccess_kmsan_or_inline int \ __get_user_##type##_noinstr(unsigned type *to, \ - unsigned type __user *from, \ + const unsigned type __user *from, \ + unsigned long size) \ +{ \ + asm goto( \ + " lhi %%r0,%[spec]\n" \ + "0: mvcos %[to],%[from],%[size]\n" \ + "1: nopr %%r7\n" \ + EX_TABLE(0b, %l[Efault]) \ + EX_TABLE(1b, %l[Efault]) \ + : [to] "=Q" (*to) \ + : [size] "d" (size), [from] "Q" (*from), \ + [spec] "I" (0x81) \ + : "cc", "0" \ + : Efault \ + ); \ + return 0; \ +Efault: \ + *to = 0; \ + return -EFAULT; \ +} + +#else /* CONFIG_CC_HAS_ASM_GOTO_OUTPUT */ + +#define DEFINE_GET_USER_NOINSTR(type) \ +static uaccess_kmsan_or_inline int \ +__get_user_##type##_noinstr(unsigned type *to, \ + const unsigned type __user *from, \ unsigned long size) \ { \ - union oac __oac_spec = { \ - .oac2.as = PSW_BITS_AS_SECONDARY, \ - .oac2.a = 1, \ - }; \ int rc; \ \ - asm volatile( \ - " lr 0,%[spec]\n" \ - "0: mvcos 0(%[_to]),%[_from],%[_size]\n" \ - "1: xr %[rc],%[rc]\n" \ + asm_inline volatile( \ + " lhi %%r0,%[spec]\n" \ + "0: mvcos %[to],%[from],%[size]\n" \ + "1: lhi %[rc],0\n" \ "2:\n" \ - EX_TABLE_UA_LOAD_MEM(0b, 2b, %[rc], %[_to], %[_ksize]) \ - EX_TABLE_UA_LOAD_MEM(1b, 2b, %[rc], %[_to], %[_ksize]) \ - : [rc] "=&d" (rc), "=Q" (*(to)) \ - : [_size] "d" (size), [_from] "Q" (*(from)), \ - [spec] "d" (__oac_spec.val), [_to] "a" (to), \ - [_ksize] "K" (size) \ + EX_TABLE_UA_FAULT(0b, 2b, %[rc]) \ + EX_TABLE_UA_FAULT(1b, 2b, %[rc]) \ + : [rc] "=d" (rc), [to] "=Q" (*to) \ + : [size] "d" (size), [from] "Q" (*from), \ + [spec] "I" (0x81) \ : "cc", "0"); \ + if (likely(!rc)) \ + return 0; \ + *to = 0; \ return rc; \ -} \ - \ +} + +#endif /* CONFIG_CC_HAS_ASM_GOTO_OUTPUT */ + +DEFINE_GET_USER_NOINSTR(char); +DEFINE_GET_USER_NOINSTR(short); +DEFINE_GET_USER_NOINSTR(int); +DEFINE_GET_USER_NOINSTR(long); + +#define DEFINE_GET_USER(type) \ static __always_inline int \ -__get_user_##type(unsigned type *to, unsigned type __user *from, \ +__get_user_##type(unsigned type *to, const unsigned type __user *from, \ unsigned long size) \ { \ int rc; \ @@ -205,107 +305,50 @@ DEFINE_GET_USER(short); DEFINE_GET_USER(int); DEFINE_GET_USER(long); -static __always_inline int __get_user_fn(void *x, const void __user *ptr, unsigned long size) -{ - int rc; - - switch (size) { - case 1: - rc = __get_user_char((unsigned char *)x, - (unsigned char __user *)ptr, - size); - break; - case 2: - rc = __get_user_short((unsigned short *)x, - (unsigned short __user *)ptr, - size); - break; - case 4: - rc = __get_user_int((unsigned int *)x, - (unsigned int __user *)ptr, - size); - break; - case 8: - rc = __get_user_long((unsigned long *)x, - (unsigned long __user *)ptr, - size); - break; - default: - __get_user_bad(); - break; - } - return rc; -} - -/* - * These are the main single-value transfer routines. They automatically - * use the right size if we just have the right pointer type. - */ -#define __put_user(x, ptr) \ -({ \ - __typeof__(*(ptr)) __x = (x); \ - int __pu_err = -EFAULT; \ - \ - __chk_user_ptr(ptr); \ - switch (sizeof(*(ptr))) { \ - case 1: \ - case 2: \ - case 4: \ - case 8: \ - __pu_err = __put_user_fn(&__x, ptr, sizeof(*(ptr))); \ - break; \ - default: \ - __put_user_bad(); \ - break; \ - } \ - __builtin_expect(__pu_err, 0); \ -}) - -#define put_user(x, ptr) \ -({ \ - might_fault(); \ - __put_user(x, ptr); \ -}) - #define __get_user(x, ptr) \ ({ \ - int __gu_err = -EFAULT; \ + const __user void *____guptr = (ptr); \ + int __grc; \ \ __chk_user_ptr(ptr); \ switch (sizeof(*(ptr))) { \ case 1: { \ + const unsigned char __user *__guptr = ____guptr; \ unsigned char __x; \ \ - __gu_err = __get_user_fn(&__x, ptr, sizeof(*(ptr))); \ + __grc = __get_user_char(&__x, __guptr, sizeof(*(ptr))); \ (x) = *(__force __typeof__(*(ptr)) *)&__x; \ break; \ }; \ case 2: { \ + const unsigned short __user *__guptr = ____guptr; \ unsigned short __x; \ \ - __gu_err = __get_user_fn(&__x, ptr, sizeof(*(ptr))); \ + __grc = __get_user_short(&__x, __guptr, sizeof(*(ptr)));\ (x) = *(__force __typeof__(*(ptr)) *)&__x; \ break; \ }; \ case 4: { \ + const unsigned int __user *__guptr = ____guptr; \ unsigned int __x; \ \ - __gu_err = __get_user_fn(&__x, ptr, sizeof(*(ptr))); \ + __grc = __get_user_int(&__x, __guptr, sizeof(*(ptr))); \ (x) = *(__force __typeof__(*(ptr)) *)&__x; \ break; \ }; \ case 8: { \ + const unsigned long __user *__guptr = ____guptr; \ unsigned long __x; \ \ - __gu_err = __get_user_fn(&__x, ptr, sizeof(*(ptr))); \ + __grc = __get_user_long(&__x, __guptr, sizeof(*(ptr))); \ (x) = *(__force __typeof__(*(ptr)) *)&__x; \ break; \ }; \ default: \ - __get_user_bad(); \ + __grc = __get_user_bad(); \ break; \ } \ - __builtin_expect(__gu_err, 0); \ + __builtin_expect(__grc, 0); \ }) #define get_user(x, ptr) \ @@ -321,12 +364,34 @@ long __must_check strncpy_from_user(char *dst, const char __user *src, long coun long __must_check strnlen_user(const char __user *src, long count); -/* - * Zero Userspace - */ -unsigned long __must_check __clear_user(void __user *to, unsigned long size); +static uaccess_kmsan_or_inline __must_check unsigned long +__clear_user(void __user *to, unsigned long size) +{ + unsigned long osize; + int cc; + + while (1) { + osize = size; + asm_inline volatile( + " llilh %%r0,%[spec]\n" + "0: mvcos %[to],%[from],%[size]\n" + "1: nopr %%r7\n" + CC_IPM(cc) + EX_TABLE_UA_MVCOS_TO(0b, 0b) + EX_TABLE_UA_MVCOS_TO(1b, 0b) + : CC_OUT(cc, cc), [size] "+d" (size), [to] "=Q" (*(char __user *)to) + : [spec] "I" (0x81), [from] "Q" (*(const char *)empty_zero_page) + : CC_CLOBBER_LIST("memory", "0")); + if (__builtin_constant_p(osize) && osize <= 4096) + return osize - size; + if (CC_TRANSFORM(cc) == 0) + return osize - size; + size -= 4096; + to += 4096; + } +} -static inline unsigned long __must_check clear_user(void __user *to, unsigned long n) +static __always_inline unsigned long __must_check clear_user(void __user *to, unsigned long n) { might_fault(); return __clear_user(to, n); @@ -341,282 +406,97 @@ static inline void *s390_kernel_write(void *dst, const void *src, size_t size) return __s390_kernel_write(dst, src, size); } -int __noreturn __put_kernel_bad(void); +void __noreturn __mvc_kernel_nofault_bad(void); -#define __put_kernel_asm(val, to, insn) \ -({ \ - int __rc; \ - \ - asm volatile( \ - "0: " insn " %[_val],%[_to]\n" \ - "1: xr %[rc],%[rc]\n" \ - "2:\n" \ - EX_TABLE_UA_STORE(0b, 2b, %[rc]) \ - EX_TABLE_UA_STORE(1b, 2b, %[rc]) \ - : [rc] "=d" (__rc), [_to] "+Q" (*(to)) \ - : [_val] "d" (val) \ - : "cc"); \ - __rc; \ -}) +#if defined(CONFIG_CC_HAS_ASM_GOTO_OUTPUT) && defined(CONFIG_CC_HAS_ASM_AOR_FORMAT_FLAGS) -#define __put_kernel_nofault(dst, src, type, err_label) \ +#define __mvc_kernel_nofault(dst, src, type, err_label) \ do { \ - unsigned long __x = (unsigned long)(*((type *)(src))); \ - int __pk_err; \ - \ switch (sizeof(type)) { \ case 1: \ - __pk_err = __put_kernel_asm(__x, (type *)(dst), "stc"); \ - break; \ case 2: \ - __pk_err = __put_kernel_asm(__x, (type *)(dst), "sth"); \ - break; \ case 4: \ - __pk_err = __put_kernel_asm(__x, (type *)(dst), "st"); \ - break; \ case 8: \ - __pk_err = __put_kernel_asm(__x, (type *)(dst), "stg"); \ + asm goto( \ + "0: mvc %O[_dst](%[_len],%R[_dst]),%[_src]\n" \ + "1: nopr %%r7\n" \ + EX_TABLE(0b, %l[err_label]) \ + EX_TABLE(1b, %l[err_label]) \ + : [_dst] "=Q" (*(type *)dst) \ + : [_src] "Q" (*(type *)(src)), \ + [_len] "I" (sizeof(type)) \ + : \ + : err_label); \ break; \ default: \ - __pk_err = __put_kernel_bad(); \ + __mvc_kernel_nofault_bad(); \ break; \ } \ - if (unlikely(__pk_err)) \ - goto err_label; \ } while (0) -int __noreturn __get_kernel_bad(void); +#else /* CONFIG_CC_HAS_ASM_GOTO_OUTPUT) && CONFIG_CC_HAS_ASM_AOR_FORMAT_FLAGS */ -#define __get_kernel_asm(val, from, insn) \ -({ \ - int __rc; \ - \ - asm volatile( \ - "0: " insn " %[_val],%[_from]\n" \ - "1: xr %[rc],%[rc]\n" \ - "2:\n" \ - EX_TABLE_UA_LOAD_REG(0b, 2b, %[rc], %[_val]) \ - EX_TABLE_UA_LOAD_REG(1b, 2b, %[rc], %[_val]) \ - : [rc] "=d" (__rc), [_val] "=d" (val) \ - : [_from] "Q" (*(from)) \ - : "cc"); \ - __rc; \ -}) - -#define __get_kernel_nofault(dst, src, type, err_label) \ +#define __mvc_kernel_nofault(dst, src, type, err_label) \ do { \ - int __gk_err; \ + type *(__dst) = (type *)(dst); \ + int __rc; \ \ switch (sizeof(type)) { \ - case 1: { \ - unsigned char __x; \ - \ - __gk_err = __get_kernel_asm(__x, (type *)(src), "ic"); \ - *((type *)(dst)) = (type)__x; \ - break; \ - }; \ - case 2: { \ - unsigned short __x; \ - \ - __gk_err = __get_kernel_asm(__x, (type *)(src), "lh"); \ - *((type *)(dst)) = (type)__x; \ - break; \ - }; \ - case 4: { \ - unsigned int __x; \ - \ - __gk_err = __get_kernel_asm(__x, (type *)(src), "l"); \ - *((type *)(dst)) = (type)__x; \ - break; \ - }; \ - case 8: { \ - unsigned long __x; \ - \ - __gk_err = __get_kernel_asm(__x, (type *)(src), "lg"); \ - *((type *)(dst)) = (type)__x; \ + case 1: \ + case 2: \ + case 4: \ + case 8: \ + asm_inline volatile( \ + "0: mvc 0(%[_len],%[_dst]),%[_src]\n" \ + "1: lhi %[_rc],0\n" \ + "2:\n" \ + EX_TABLE_UA_FAULT(0b, 2b, %[_rc]) \ + EX_TABLE_UA_FAULT(1b, 2b, %[_rc]) \ + : [_rc] "=d" (__rc), \ + "=m" (*__dst) \ + : [_src] "Q" (*(type *)(src)), \ + [_dst] "a" (__dst), \ + [_len] "I" (sizeof(type))); \ + if (__rc) \ + goto err_label; \ break; \ - }; \ default: \ - __gk_err = __get_kernel_bad(); \ + __mvc_kernel_nofault_bad(); \ break; \ } \ - if (unlikely(__gk_err)) \ - goto err_label; \ } while (0) -void __cmpxchg_user_key_called_with_bad_pointer(void); +#endif /* CONFIG_CC_HAS_ASM_GOTO_OUTPUT && CONFIG_CC_HAS_ASM_AOR_FORMAT_FLAGS */ -#define CMPXCHG_USER_KEY_MAX_LOOPS 128 +#define __get_kernel_nofault __mvc_kernel_nofault +#define __put_kernel_nofault __mvc_kernel_nofault -static __always_inline int __cmpxchg_user_key(unsigned long address, void *uval, - __uint128_t old, __uint128_t new, - unsigned long key, int size) -{ - int rc = 0; +void __cmpxchg_user_key_called_with_bad_pointer(void); +int __cmpxchg_user_key1(unsigned long address, unsigned char *uval, + unsigned char old, unsigned char new, unsigned long key); +int __cmpxchg_user_key2(unsigned long address, unsigned short *uval, + unsigned short old, unsigned short new, unsigned long key); +int __cmpxchg_user_key4(unsigned long address, unsigned int *uval, + unsigned int old, unsigned int new, unsigned long key); +int __cmpxchg_user_key8(unsigned long address, unsigned long *uval, + unsigned long old, unsigned long new, unsigned long key); +int __cmpxchg_user_key16(unsigned long address, __uint128_t *uval, + __uint128_t old, __uint128_t new, unsigned long key); + +static __always_inline int _cmpxchg_user_key(unsigned long address, void *uval, + __uint128_t old, __uint128_t new, + unsigned long key, int size) +{ switch (size) { - case 1: { - unsigned int prev, shift, mask, _old, _new; - unsigned long count; - - shift = (3 ^ (address & 3)) << 3; - address ^= address & 3; - _old = ((unsigned int)old & 0xff) << shift; - _new = ((unsigned int)new & 0xff) << shift; - mask = ~(0xff << shift); - asm volatile( - " spka 0(%[key])\n" - " sacf 256\n" - " llill %[count],%[max_loops]\n" - "0: l %[prev],%[address]\n" - "1: nr %[prev],%[mask]\n" - " xilf %[mask],0xffffffff\n" - " or %[new],%[prev]\n" - " or %[prev],%[tmp]\n" - "2: lr %[tmp],%[prev]\n" - "3: cs %[prev],%[new],%[address]\n" - "4: jnl 5f\n" - " xr %[tmp],%[prev]\n" - " xr %[new],%[tmp]\n" - " nr %[tmp],%[mask]\n" - " jnz 5f\n" - " brct %[count],2b\n" - "5: sacf 768\n" - " spka %[default_key]\n" - EX_TABLE_UA_LOAD_REG(0b, 5b, %[rc], %[prev]) - EX_TABLE_UA_LOAD_REG(1b, 5b, %[rc], %[prev]) - EX_TABLE_UA_LOAD_REG(3b, 5b, %[rc], %[prev]) - EX_TABLE_UA_LOAD_REG(4b, 5b, %[rc], %[prev]) - : [rc] "+&d" (rc), - [prev] "=&d" (prev), - [address] "+Q" (*(int *)address), - [tmp] "+&d" (_old), - [new] "+&d" (_new), - [mask] "+&d" (mask), - [count] "=a" (count) - : [key] "%[count]" (key << 4), - [default_key] "J" (PAGE_DEFAULT_KEY), - [max_loops] "J" (CMPXCHG_USER_KEY_MAX_LOOPS) - : "memory", "cc"); - *(unsigned char *)uval = prev >> shift; - if (!count) - rc = -EAGAIN; - return rc; - } - case 2: { - unsigned int prev, shift, mask, _old, _new; - unsigned long count; - - shift = (2 ^ (address & 2)) << 3; - address ^= address & 2; - _old = ((unsigned int)old & 0xffff) << shift; - _new = ((unsigned int)new & 0xffff) << shift; - mask = ~(0xffff << shift); - asm volatile( - " spka 0(%[key])\n" - " sacf 256\n" - " llill %[count],%[max_loops]\n" - "0: l %[prev],%[address]\n" - "1: nr %[prev],%[mask]\n" - " xilf %[mask],0xffffffff\n" - " or %[new],%[prev]\n" - " or %[prev],%[tmp]\n" - "2: lr %[tmp],%[prev]\n" - "3: cs %[prev],%[new],%[address]\n" - "4: jnl 5f\n" - " xr %[tmp],%[prev]\n" - " xr %[new],%[tmp]\n" - " nr %[tmp],%[mask]\n" - " jnz 5f\n" - " brct %[count],2b\n" - "5: sacf 768\n" - " spka %[default_key]\n" - EX_TABLE_UA_LOAD_REG(0b, 5b, %[rc], %[prev]) - EX_TABLE_UA_LOAD_REG(1b, 5b, %[rc], %[prev]) - EX_TABLE_UA_LOAD_REG(3b, 5b, %[rc], %[prev]) - EX_TABLE_UA_LOAD_REG(4b, 5b, %[rc], %[prev]) - : [rc] "+&d" (rc), - [prev] "=&d" (prev), - [address] "+Q" (*(int *)address), - [tmp] "+&d" (_old), - [new] "+&d" (_new), - [mask] "+&d" (mask), - [count] "=a" (count) - : [key] "%[count]" (key << 4), - [default_key] "J" (PAGE_DEFAULT_KEY), - [max_loops] "J" (CMPXCHG_USER_KEY_MAX_LOOPS) - : "memory", "cc"); - *(unsigned short *)uval = prev >> shift; - if (!count) - rc = -EAGAIN; - return rc; - } - case 4: { - unsigned int prev = old; - - asm volatile( - " spka 0(%[key])\n" - " sacf 256\n" - "0: cs %[prev],%[new],%[address]\n" - "1: sacf 768\n" - " spka %[default_key]\n" - EX_TABLE_UA_LOAD_REG(0b, 1b, %[rc], %[prev]) - EX_TABLE_UA_LOAD_REG(1b, 1b, %[rc], %[prev]) - : [rc] "+&d" (rc), - [prev] "+&d" (prev), - [address] "+Q" (*(int *)address) - : [new] "d" ((unsigned int)new), - [key] "a" (key << 4), - [default_key] "J" (PAGE_DEFAULT_KEY) - : "memory", "cc"); - *(unsigned int *)uval = prev; - return rc; - } - case 8: { - unsigned long prev = old; - - asm volatile( - " spka 0(%[key])\n" - " sacf 256\n" - "0: csg %[prev],%[new],%[address]\n" - "1: sacf 768\n" - " spka %[default_key]\n" - EX_TABLE_UA_LOAD_REG(0b, 1b, %[rc], %[prev]) - EX_TABLE_UA_LOAD_REG(1b, 1b, %[rc], %[prev]) - : [rc] "+&d" (rc), - [prev] "+&d" (prev), - [address] "+QS" (*(long *)address) - : [new] "d" ((unsigned long)new), - [key] "a" (key << 4), - [default_key] "J" (PAGE_DEFAULT_KEY) - : "memory", "cc"); - *(unsigned long *)uval = prev; - return rc; - } - case 16: { - __uint128_t prev = old; - - asm volatile( - " spka 0(%[key])\n" - " sacf 256\n" - "0: cdsg %[prev],%[new],%[address]\n" - "1: sacf 768\n" - " spka %[default_key]\n" - EX_TABLE_UA_LOAD_REGPAIR(0b, 1b, %[rc], %[prev]) - EX_TABLE_UA_LOAD_REGPAIR(1b, 1b, %[rc], %[prev]) - : [rc] "+&d" (rc), - [prev] "+&d" (prev), - [address] "+QS" (*(__int128_t *)address) - : [new] "d" (new), - [key] "a" (key << 4), - [default_key] "J" (PAGE_DEFAULT_KEY) - : "memory", "cc"); - *(__uint128_t *)uval = prev; - return rc; - } + case 1: return __cmpxchg_user_key1(address, uval, old, new, key); + case 2: return __cmpxchg_user_key2(address, uval, old, new, key); + case 4: return __cmpxchg_user_key4(address, uval, old, new, key); + case 8: return __cmpxchg_user_key8(address, uval, old, new, key); + case 16: return __cmpxchg_user_key16(address, uval, old, new, key); + default: __cmpxchg_user_key_called_with_bad_pointer(); } - __cmpxchg_user_key_called_with_bad_pointer(); - return rc; + return 0; } /** @@ -648,8 +528,8 @@ static __always_inline int __cmpxchg_user_key(unsigned long address, void *uval, BUILD_BUG_ON(sizeof(*(__ptr)) != sizeof(*(__uval))); \ might_fault(); \ __chk_user_ptr(__ptr); \ - __cmpxchg_user_key((unsigned long)(__ptr), (void *)(__uval), \ - (old), (new), (key), sizeof(*(__ptr))); \ + _cmpxchg_user_key((unsigned long)(__ptr), (void *)(__uval), \ + (old), (new), (key), sizeof(*(__ptr))); \ }) #endif /* __S390_UACCESS_H */ diff --git a/arch/s390/include/asm/uv.h b/arch/s390/include/asm/uv.h index dc332609f2c3..8018549a1ad2 100644 --- a/arch/s390/include/asm/uv.h +++ b/arch/s390/include/asm/uv.h @@ -16,7 +16,6 @@ #include <linux/bug.h> #include <linux/sched.h> #include <asm/page.h> -#include <asm/gmap.h> #include <asm/asm.h> #define UVC_CC_OK 0 @@ -616,8 +615,9 @@ static inline int uv_remove_shared(unsigned long addr) return share(addr, UVC_CMD_REMOVE_SHARED_ACCESS); } -int uv_get_secret_metadata(const u8 secret_id[UV_SECRET_ID_LEN], - struct uv_secret_list_item_hdr *secret); +int uv_find_secret(const u8 secret_id[UV_SECRET_ID_LEN], + struct uv_secret_list *list, + struct uv_secret_list_item_hdr *secret); int uv_retrieve_secret(u16 secret_idx, u8 *buf, size_t buf_size); extern int prot_virt_host; @@ -628,12 +628,12 @@ static inline int is_prot_virt_host(void) } int uv_pin_shared(unsigned long paddr); -int gmap_make_secure(struct gmap *gmap, unsigned long gaddr, void *uvcb); -int gmap_destroy_page(struct gmap *gmap, unsigned long gaddr); int uv_destroy_folio(struct folio *folio); int uv_destroy_pte(pte_t pte); int uv_convert_from_secure_pte(pte_t pte); -int gmap_convert_to_secure(struct gmap *gmap, unsigned long gaddr); +int make_hva_secure(struct mm_struct *mm, unsigned long hva, struct uv_cb_header *uvcb); +int uv_convert_from_secure(unsigned long paddr); +int uv_convert_from_secure_folio(struct folio *folio); void setup_uv(void); diff --git a/arch/s390/include/asm/vdso.h b/arch/s390/include/asm/vdso.h index 92c73e4d97a9..8e2fffa0ca68 100644 --- a/arch/s390/include/asm/vdso.h +++ b/arch/s390/include/asm/vdso.h @@ -4,15 +4,13 @@ #include <vdso/datapage.h> -#ifndef __ASSEMBLY__ - -extern struct vdso_data *vdso_data; +#ifndef __ASSEMBLER__ int vdso_getcpu_init(void); -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ -#define __VVAR_PAGES 2 +#define __VDSO_PAGES 4 #define VDSO_VERSION_STRING LINUX_2.6.29 diff --git a/arch/s390/include/asm/vdso/getrandom.h b/arch/s390/include/asm/vdso/getrandom.h index 36355af7160b..6741a27199f8 100644 --- a/arch/s390/include/asm/vdso/getrandom.h +++ b/arch/s390/include/asm/vdso/getrandom.h @@ -3,7 +3,7 @@ #ifndef __ASM_VDSO_GETRANDOM_H #define __ASM_VDSO_GETRANDOM_H -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <vdso/datapage.h> #include <asm/vdso/vsyscall.h> @@ -23,18 +23,6 @@ static __always_inline ssize_t getrandom_syscall(void *buffer, size_t len, unsig return syscall3(__NR_getrandom, (long)buffer, (long)len, (long)flags); } -static __always_inline const struct vdso_rng_data *__arch_get_vdso_rng_data(void) -{ - /* - * The RNG data is in the real VVAR data page, but if a task belongs to a time namespace - * then VVAR_DATA_PAGE_OFFSET points to the namespace-specific VVAR page and VVAR_TIMENS_ - * PAGE_OFFSET points to the real VVAR page. - */ - if (IS_ENABLED(CONFIG_TIME_NS) && _vdso_data->clock_mode == VDSO_CLOCKMODE_TIMENS) - return (void *)&_vdso_rng_data + VVAR_TIMENS_PAGE_OFFSET * PAGE_SIZE; - return &_vdso_rng_data; -} - -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ #endif /* __ASM_VDSO_GETRANDOM_H */ diff --git a/arch/s390/include/asm/vdso/gettimeofday.h b/arch/s390/include/asm/vdso/gettimeofday.h index 7937765ccfa5..c31ac5f61c83 100644 --- a/arch/s390/include/asm/vdso/gettimeofday.h +++ b/arch/s390/include/asm/vdso/gettimeofday.h @@ -14,20 +14,9 @@ #include <linux/compiler.h> -static __always_inline const struct vdso_data *__arch_get_vdso_data(void) +static inline u64 __arch_get_hw_counter(s32 clock_mode, const struct vdso_time_data *vd) { - return _vdso_data; -} - -static inline u64 __arch_get_hw_counter(s32 clock_mode, const struct vdso_data *vd) -{ - u64 adj, now; - - now = get_tod_clock(); - adj = vd->arch_data.tod_steering_end - now; - if (unlikely((s64) adj > 0)) - now += (vd->arch_data.tod_steering_delta < 0) ? (adj >> 15) : -(adj >> 15); - return now; + return get_tod_clock() - vd->arch_data.tod_delta; } static __always_inline @@ -49,12 +38,4 @@ long clock_getres_fallback(clockid_t clkid, struct __kernel_timespec *ts) return syscall2(__NR_clock_getres, (long)clkid, (long)ts); } -#ifdef CONFIG_TIME_NS -static __always_inline -const struct vdso_data *__arch_get_timens_vdso_data(const struct vdso_data *vd) -{ - return _timens_data; -} -#endif - #endif diff --git a/arch/s390/include/asm/vdso/time_data.h b/arch/s390/include/asm/vdso/time_data.h index 8a08752422e6..25c4e0d9f596 100644 --- a/arch/s390/include/asm/vdso/time_data.h +++ b/arch/s390/include/asm/vdso/time_data.h @@ -5,8 +5,7 @@ #include <linux/types.h> struct arch_vdso_time_data { - __s64 tod_steering_delta; - __u64 tod_steering_end; + __s64 tod_delta; }; #endif /* __S390_ASM_VDSO_TIME_DATA_H */ diff --git a/arch/s390/include/asm/vdso/vsyscall.h b/arch/s390/include/asm/vdso/vsyscall.h index 3eb576ecd3bd..b00acec8ddbc 100644 --- a/arch/s390/include/asm/vdso/vsyscall.h +++ b/arch/s390/include/asm/vdso/vsyscall.h @@ -2,35 +2,15 @@ #ifndef __ASM_VDSO_VSYSCALL_H #define __ASM_VDSO_VSYSCALL_H -#define __VDSO_RND_DATA_OFFSET 768 - -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <linux/hrtimer.h> #include <vdso/datapage.h> #include <asm/vdso.h> -enum vvar_pages { - VVAR_DATA_PAGE_OFFSET, - VVAR_TIMENS_PAGE_OFFSET, - VVAR_NR_PAGES -}; - -static __always_inline struct vdso_data *__s390_get_k_vdso_data(void) -{ - return vdso_data; -} -#define __arch_get_k_vdso_data __s390_get_k_vdso_data - -static __always_inline struct vdso_rng_data *__s390_get_k_vdso_rnd_data(void) -{ - return (void *)vdso_data + __VDSO_RND_DATA_OFFSET; -} -#define __arch_get_k_vdso_rng_data __s390_get_k_vdso_rnd_data - /* The asm-generic header needs to be included after the definitions above */ #include <asm-generic/vdso/vsyscall.h> -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ #endif /* __ASM_VDSO_VSYSCALL_H */ diff --git a/arch/s390/include/asm/word-at-a-time.h b/arch/s390/include/asm/word-at-a-time.h index 203acd6e431b..eaa19dee7699 100644 --- a/arch/s390/include/asm/word-at-a-time.h +++ b/arch/s390/include/asm/word-at-a-time.h @@ -52,7 +52,7 @@ static inline unsigned long load_unaligned_zeropad(const void *addr) { unsigned long data; - asm volatile( + asm_inline volatile( "0: lg %[data],0(%[addr])\n" "1: nopr %%r7\n" EX_TABLE_ZEROPAD(0b, 1b, %[data], %[addr]) diff --git a/arch/s390/include/uapi/asm/diag.h b/arch/s390/include/uapi/asm/diag.h new file mode 100644 index 000000000000..b7e6ccb4ff6e --- /dev/null +++ b/arch/s390/include/uapi/asm/diag.h @@ -0,0 +1,32 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * Diag ioctls and its associated structures definitions. + * + * Copyright IBM Corp. 2024 + */ + +#ifndef __S390_UAPI_ASM_DIAG_H +#define __S390_UAPI_ASM_DIAG_H + +#include <linux/types.h> + +#define DIAG_MAGIC_STR 'D' + +struct diag324_pib { + __u64 address; + __u64 sequence; +}; + +struct diag310_memtop { + __u64 address; + __u64 nesting_lvl; +}; + +/* Diag ioctl definitions */ +#define DIAG324_GET_PIBBUF _IOWR(DIAG_MAGIC_STR, 0x77, struct diag324_pib) +#define DIAG324_GET_PIBLEN _IOR(DIAG_MAGIC_STR, 0x78, size_t) +#define DIAG310_GET_STRIDE _IOR(DIAG_MAGIC_STR, 0x79, size_t) +#define DIAG310_GET_MEMTOPLEN _IOWR(DIAG_MAGIC_STR, 0x7a, size_t) +#define DIAG310_GET_MEMTOPBUF _IOWR(DIAG_MAGIC_STR, 0x7b, struct diag310_memtop) + +#endif /* __S390_UAPI_ASM_DIAG_H */ diff --git a/arch/s390/include/uapi/asm/ptrace.h b/arch/s390/include/uapi/asm/ptrace.h index bb0826024bb9..ea202072f1ad 100644 --- a/arch/s390/include/uapi/asm/ptrace.h +++ b/arch/s390/include/uapi/asm/ptrace.h @@ -242,7 +242,8 @@ #define PTRACE_OLDSETOPTIONS 21 #define PTRACE_SYSEMU 31 #define PTRACE_SYSEMU_SINGLESTEP 32 -#ifndef __ASSEMBLY__ + +#ifndef __ASSEMBLER__ #include <linux/stddef.h> #include <linux/types.h> @@ -450,6 +451,6 @@ struct user_regs_struct { unsigned long ieee_instruction_pointer; /* obsolete, always 0 */ }; -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* _UAPI_S390_PTRACE_H */ diff --git a/arch/s390/include/uapi/asm/schid.h b/arch/s390/include/uapi/asm/schid.h index a3e1cf168553..d804d1a5b1b3 100644 --- a/arch/s390/include/uapi/asm/schid.h +++ b/arch/s390/include/uapi/asm/schid.h @@ -4,7 +4,7 @@ #include <linux/types.h> -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ struct subchannel_id { __u32 cssid : 8; @@ -15,6 +15,6 @@ struct subchannel_id { __u32 sch_no : 16; } __attribute__ ((packed, aligned(4))); -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* _UAPIASM_SCHID_H */ diff --git a/arch/s390/include/uapi/asm/types.h b/arch/s390/include/uapi/asm/types.h index 84457dbb26b4..4ab468c5032e 100644 --- a/arch/s390/include/uapi/asm/types.h +++ b/arch/s390/include/uapi/asm/types.h @@ -10,7 +10,7 @@ #include <asm-generic/int-ll64.h> -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ typedef unsigned long addr_t; typedef __signed__ long saddr_t; @@ -25,6 +25,6 @@ typedef struct { }; } __attribute__((packed, aligned(4))) __vector128; -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* _UAPI_S390_TYPES_H */ diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile index 48caae8c7e10..eb06ff888314 100644 --- a/arch/s390/kernel/Makefile +++ b/arch/s390/kernel/Makefile @@ -38,14 +38,15 @@ CFLAGS_unwind_bc.o += -fno-optimize-sibling-calls obj-y := head64.o traps.o time.o process.o early.o setup.o idle.o vtime.o obj-y += processor.o syscall.o ptrace.o signal.o cpcmd.o ebcdic.o nmi.o -obj-y += debug.o irq.o ipl.o dis.o diag.o vdso.o cpufeature.o +obj-y += debug.o irq.o ipl.o dis.o vdso.o cpufeature.o obj-y += sysinfo.o lgr.o os_info.o ctlreg.o obj-y += runtime_instr.o cache.o fpu.o dumpstack.o guarded_storage.o sthyi.o -obj-y += entry.o reipl.o kdebugfs.o alternative.o +obj-y += entry.o reipl.o kdebugfs.o alternative.o skey.o obj-y += nospec-branch.o ipl_vmparm.o machine_kexec_reloc.o unwind_bc.o obj-y += smp.o text_amode31.o stacktrace.o abs_lowcore.o facility.o uv.o wti.o +obj-y += diag/ -extra-y += vmlinux.lds +always-$(KBUILD_BUILTIN) += vmlinux.lds obj-$(CONFIG_SYSFS) += nospec-sysfs.o CFLAGS_REMOVE_nospec-branch.o += $(CC_FLAGS_EXPOLINE) diff --git a/arch/s390/kernel/abs_lowcore.c b/arch/s390/kernel/abs_lowcore.c index 09cd24cbe74e..6252b7d115dd 100644 --- a/arch/s390/kernel/abs_lowcore.c +++ b/arch/s390/kernel/abs_lowcore.c @@ -2,9 +2,9 @@ #include <linux/pgtable.h> #include <asm/abs_lowcore.h> +#include <asm/sections.h> unsigned long __bootdata_preserved(__abs_lowcore); -int __bootdata_preserved(relocate_lowcore); int abs_lowcore_map(int cpu, struct lowcore *lc, bool alloc) { diff --git a/arch/s390/kernel/alternative.c b/arch/s390/kernel/alternative.c index 8d5d0de35de0..90c0e6408992 100644 --- a/arch/s390/kernel/alternative.c +++ b/arch/s390/kernel/alternative.c @@ -1,41 +1,90 @@ // SPDX-License-Identifier: GPL-2.0 +#ifndef pr_fmt +#define pr_fmt(fmt) "alt: " fmt +#endif + #include <linux/uaccess.h> +#include <linux/printk.h> #include <asm/nospec-branch.h> #include <asm/abs_lowcore.h> #include <asm/alternative.h> #include <asm/facility.h> +#include <asm/sections.h> +#include <asm/machine.h> + +#ifndef a_debug +#define a_debug pr_debug +#endif + +#ifndef __kernel_va +#define __kernel_va(x) (void *)(x) +#endif + +unsigned long __bootdata_preserved(machine_features[1]); + +struct alt_debug { + unsigned long facilities[MAX_FACILITY_BIT / BITS_PER_LONG]; + unsigned long mfeatures[MAX_MFEATURE_BIT / BITS_PER_LONG]; + int spec; +}; + +static struct alt_debug __bootdata_preserved(alt_debug); + +static void alternative_dump(u8 *old, u8 *new, unsigned int len, unsigned int type, unsigned int data) +{ + char oinsn[33], ninsn[33]; + unsigned long kptr; + unsigned int pos; + + for (pos = 0; pos < len && 2 * pos < sizeof(oinsn) - 3; pos++) + hex_byte_pack(&oinsn[2 * pos], old[pos]); + oinsn[2 * pos] = 0; + for (pos = 0; pos < len && 2 * pos < sizeof(ninsn) - 3; pos++) + hex_byte_pack(&ninsn[2 * pos], new[pos]); + ninsn[2 * pos] = 0; + kptr = (unsigned long)__kernel_va(old); + a_debug("[%d/%3d] %016lx: %s -> %s\n", type, data, kptr, oinsn, ninsn); +} void __apply_alternatives(struct alt_instr *start, struct alt_instr *end, unsigned int ctx) { - u8 *instr, *replacement; + struct alt_debug *d; struct alt_instr *a; - bool replace; + bool debug, replace; + u8 *old, *new; /* * The scan order should be from start to end. A later scanned * alternative code can overwrite previously scanned alternative code. */ + d = &alt_debug; for (a = start; a < end; a++) { if (!(a->ctx & ctx)) continue; switch (a->type) { case ALT_TYPE_FACILITY: replace = test_facility(a->data); + debug = __test_facility(a->data, d->facilities); + break; + case ALT_TYPE_FEATURE: + replace = test_machine_feature(a->data); + debug = __test_machine_feature(a->data, d->mfeatures); break; case ALT_TYPE_SPEC: replace = nobp_enabled(); - break; - case ALT_TYPE_LOWCORE: - replace = have_relocated_lowcore(); + debug = d->spec; break; default: replace = false; + debug = false; } if (!replace) continue; - instr = (u8 *)&a->instr_offset + a->instr_offset; - replacement = (u8 *)&a->repl_offset + a->repl_offset; - s390_kernel_write(instr, replacement, a->instrlen); + old = (u8 *)&a->instr_offset + a->instr_offset; + new = (u8 *)&a->repl_offset + a->repl_offset; + if (debug) + alternative_dump(old, new, a->instrlen, a->type, a->data); + s390_kernel_write(old, new, a->instrlen); } } diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c index 862a9140528e..95ecad9c7d7d 100644 --- a/arch/s390/kernel/asm-offsets.c +++ b/arch/s390/kernel/asm-offsets.c @@ -5,15 +5,14 @@ * and format the required data. */ -#define ASM_OFFSETS_C - #include <linux/kbuild.h> -#include <linux/kvm_host.h> #include <linux/sched.h> #include <linux/purgatory.h> #include <linux/pgtable.h> -#include <linux/ftrace.h> +#include <linux/ftrace_regs.h> +#include <asm/kvm_host_types.h> #include <asm/stacktrace.h> +#include <asm/ptrace.h> int main(void) { @@ -49,8 +48,8 @@ int main(void) OFFSET(__PT_R14, pt_regs, gprs[14]); OFFSET(__PT_R15, pt_regs, gprs[15]); OFFSET(__PT_ORIG_GPR2, pt_regs, orig_gpr2); + OFFSET(__PT_INT_CODE, pt_regs, int_code); OFFSET(__PT_FLAGS, pt_regs, flags); - OFFSET(__PT_CR1, pt_regs, cr1); OFFSET(__PT_LAST_BREAK, pt_regs, last_break); DEFINE(__PT_SIZE, sizeof(struct pt_regs)); BLANK(); @@ -76,7 +75,8 @@ int main(void) OFFSET(__LC_EXT_CPU_ADDR, lowcore, ext_cpu_addr); OFFSET(__LC_EXT_INT_CODE, lowcore, ext_int_code); OFFSET(__LC_PGM_ILC, lowcore, pgm_ilc); - OFFSET(__LC_PGM_INT_CODE, lowcore, pgm_code); + OFFSET(__LC_PGM_CODE, lowcore, pgm_code); + OFFSET(__LC_PGM_INT_CODE, lowcore, pgm_int_code); OFFSET(__LC_DATA_EXC_CODE, lowcore, data_exc_code); OFFSET(__LC_MON_CLASS_NR, lowcore, mon_class_num); OFFSET(__LC_PER_CODE, lowcore, per_code); @@ -122,7 +122,6 @@ int main(void) OFFSET(__LC_LAST_UPDATE_TIMER, lowcore, last_update_timer); OFFSET(__LC_LAST_UPDATE_CLOCK, lowcore, last_update_clock); OFFSET(__LC_INT_CLOCK, lowcore, int_clock); - OFFSET(__LC_BOOT_CLOCK, lowcore, boot_clock); OFFSET(__LC_CURRENT, lowcore, current_task); OFFSET(__LC_KERNEL_STACK, lowcore, kernel_stack); OFFSET(__LC_ASYNC_STACK, lowcore, async_stack); @@ -175,12 +174,6 @@ int main(void) DEFINE(OLDMEM_SIZE, PARMAREA + offsetof(struct parmarea, oldmem_size)); DEFINE(COMMAND_LINE, PARMAREA + offsetof(struct parmarea, command_line)); DEFINE(MAX_COMMAND_LINE_SIZE, PARMAREA + offsetof(struct parmarea, max_command_line_size)); -#ifdef CONFIG_FUNCTION_GRAPH_TRACER - /* function graph return value tracing */ - OFFSET(__FGRAPH_RET_GPR2, fgraph_ret_regs, gpr2); - OFFSET(__FGRAPH_RET_FP, fgraph_ret_regs, fp); - DEFINE(__FGRAPH_RET_SIZE, sizeof(struct fgraph_ret_regs)); -#endif OFFSET(__FTRACE_REGS_PT_REGS, __arch_ftrace_regs, regs); DEFINE(__FTRACE_REGS_SIZE, sizeof(struct __arch_ftrace_regs)); diff --git a/arch/s390/kernel/cert_store.c b/arch/s390/kernel/cert_store.c index bf983513dd33..c217a5e64094 100644 --- a/arch/s390/kernel/cert_store.c +++ b/arch/s390/kernel/cert_store.c @@ -138,7 +138,7 @@ static void cert_store_key_describe(const struct key *key, struct seq_file *m) * First 64 bytes of the key description is key name in EBCDIC CP 500. * Convert it to ASCII for displaying in /proc/keys. */ - strscpy(ascii, key->description, sizeof(ascii)); + strscpy(ascii, key->description); EBCASC_500(ascii, VC_NAME_LEN_BYTES); seq_puts(m, ascii); @@ -235,7 +235,7 @@ static int __diag320(unsigned long subcode, void *addr) { union register_pair rp = { .even = (unsigned long)addr, }; - asm volatile( + asm_inline volatile( " diag %[rp],%[subcode],0x320\n" "0: nopr %%r7\n" EX_TABLE(0b, 0b) diff --git a/arch/s390/kernel/cpacf.c b/arch/s390/kernel/cpacf.c index c8575dbc890d..3bebc47beeab 100644 --- a/arch/s390/kernel/cpacf.c +++ b/arch/s390/kernel/cpacf.c @@ -14,7 +14,7 @@ #define CPACF_QUERY(name, instruction) \ static ssize_t name##_query_raw_read(struct file *fp, \ struct kobject *kobj, \ - struct bin_attribute *attr, \ + const struct bin_attribute *attr, \ char *buf, loff_t offs, \ size_t count) \ { \ @@ -24,7 +24,7 @@ static ssize_t name##_query_raw_read(struct file *fp, \ return -EOPNOTSUPP; \ return memory_read_from_buffer(buf, count, &offs, &mask, sizeof(mask)); \ } \ -static BIN_ATTR_RO(name##_query_raw, sizeof(cpacf_mask_t)) +static const BIN_ATTR_RO(name##_query_raw, sizeof(cpacf_mask_t)) CPACF_QUERY(km, KM); CPACF_QUERY(kmc, KMC); @@ -40,20 +40,20 @@ CPACF_QUERY(prno, PRNO); CPACF_QUERY(kma, KMA); CPACF_QUERY(kdsa, KDSA); -#define CPACF_QAI(name, instruction) \ -static ssize_t name##_query_auth_info_raw_read( \ - struct file *fp, struct kobject *kobj, \ - struct bin_attribute *attr, char *buf, loff_t offs, \ - size_t count) \ -{ \ - cpacf_qai_t qai; \ - \ - if (!cpacf_qai(CPACF_##instruction, &qai)) \ - return -EOPNOTSUPP; \ - return memory_read_from_buffer(buf, count, &offs, &qai, \ - sizeof(qai)); \ -} \ -static BIN_ATTR_RO(name##_query_auth_info_raw, sizeof(cpacf_qai_t)) +#define CPACF_QAI(name, instruction) \ +static ssize_t name##_query_auth_info_raw_read( \ + struct file *fp, struct kobject *kobj, \ + const struct bin_attribute *attr, char *buf, loff_t offs, \ + size_t count) \ +{ \ + cpacf_qai_t qai; \ + \ + if (!cpacf_qai(CPACF_##instruction, &qai)) \ + return -EOPNOTSUPP; \ + return memory_read_from_buffer(buf, count, &offs, &qai, \ + sizeof(qai)); \ +} \ +static const BIN_ATTR_RO(name##_query_auth_info_raw, sizeof(cpacf_qai_t)) CPACF_QAI(km, KM); CPACF_QAI(kmc, KMC); @@ -69,7 +69,7 @@ CPACF_QAI(prno, PRNO); CPACF_QAI(kma, KMA); CPACF_QAI(kdsa, KDSA); -static struct bin_attribute *cpacf_attrs[] = { +static const struct bin_attribute *const cpacf_attrs[] = { &bin_attr_km_query_raw, &bin_attr_kmc_query_raw, &bin_attr_kimd_query_raw, diff --git a/arch/s390/kernel/cpufeature.c b/arch/s390/kernel/cpufeature.c index 1b2ae42a0c15..c9eef9ed876b 100644 --- a/arch/s390/kernel/cpufeature.c +++ b/arch/s390/kernel/cpufeature.c @@ -4,12 +4,15 @@ */ #include <linux/cpufeature.h> +#include <linux/export.h> #include <linux/bug.h> +#include <asm/machine.h> #include <asm/elf.h> enum { TYPE_HWCAP, TYPE_FACILITY, + TYPE_MACHINE, }; struct s390_cpu_feature { @@ -21,6 +24,7 @@ static struct s390_cpu_feature s390_cpu_features[MAX_CPU_FEATURES] = { [S390_CPU_FEATURE_MSA] = {.type = TYPE_HWCAP, .num = HWCAP_NR_MSA}, [S390_CPU_FEATURE_VXRS] = {.type = TYPE_HWCAP, .num = HWCAP_NR_VXRS}, [S390_CPU_FEATURE_UV] = {.type = TYPE_FACILITY, .num = 158}, + [S390_CPU_FEATURE_D288] = {.type = TYPE_MACHINE, .num = MFEATURE_DIAG288}, }; /* @@ -38,6 +42,8 @@ int cpu_have_feature(unsigned int num) return !!(elf_hwcap & BIT(feature->num)); case TYPE_FACILITY: return test_facility(feature->num); + case TYPE_MACHINE: + return test_machine_feature(feature->num); default: WARN_ON_ONCE(1); return 0; diff --git a/arch/s390/kernel/crash_dump.c b/arch/s390/kernel/crash_dump.c index cd0c93a8fb8b..d4839de8ce9d 100644 --- a/arch/s390/kernel/crash_dump.c +++ b/arch/s390/kernel/crash_dump.c @@ -7,6 +7,7 @@ */ #include <linux/crash_dump.h> +#include <linux/export.h> #include <asm/lowcore.h> #include <linux/kernel.h> #include <linux/init.h> @@ -63,9 +64,7 @@ struct save_area * __init save_area_alloc(bool is_boot_cpu) { struct save_area *sa; - sa = memblock_alloc(sizeof(*sa), 8); - if (!sa) - return NULL; + sa = memblock_alloc_or_panic(sizeof(*sa), 8); if (is_boot_cpu) list_add(&sa->list, &dump_save_areas); @@ -248,15 +247,6 @@ bool is_kdump_kernel(void) } EXPORT_SYMBOL_GPL(is_kdump_kernel); -static const char *nt_name(Elf64_Word type) -{ - const char *name = "LINUX"; - - if (type == NT_PRPSINFO || type == NT_PRSTATUS || type == NT_PRFPREG) - name = KEXEC_CORE_NOTE_NAME; - return name; -} - /* * Initialize ELF note */ @@ -281,10 +271,8 @@ static void *nt_init_name(void *buf, Elf64_Word type, void *desc, int d_len, return PTR_ADD(buf, len); } -static inline void *nt_init(void *buf, Elf64_Word type, void *desc, int d_len) -{ - return nt_init_name(buf, type, desc, d_len, nt_name(type)); -} +#define nt_init(buf, type, desc) \ + nt_init_name(buf, NT_ ## type, &(desc), sizeof(desc), NN_ ## type) /* * Calculate the size of ELF note @@ -300,10 +288,7 @@ static size_t nt_size_name(int d_len, const char *name) return size; } -static inline size_t nt_size(Elf64_Word type, int d_len) -{ - return nt_size_name(d_len, nt_name(type)); -} +#define nt_size(type, desc) nt_size_name(sizeof(desc), NN_ ## type) /* * Fill ELF notes for one CPU with save area registers @@ -324,18 +309,16 @@ static void *fill_cpu_elf_notes(void *ptr, int cpu, struct save_area *sa) memcpy(&nt_fpregset.fpc, &sa->fpc, sizeof(sa->fpc)); memcpy(&nt_fpregset.fprs, &sa->fprs, sizeof(sa->fprs)); /* Create ELF notes for the CPU */ - ptr = nt_init(ptr, NT_PRSTATUS, &nt_prstatus, sizeof(nt_prstatus)); - ptr = nt_init(ptr, NT_PRFPREG, &nt_fpregset, sizeof(nt_fpregset)); - ptr = nt_init(ptr, NT_S390_TIMER, &sa->timer, sizeof(sa->timer)); - ptr = nt_init(ptr, NT_S390_TODCMP, &sa->todcmp, sizeof(sa->todcmp)); - ptr = nt_init(ptr, NT_S390_TODPREG, &sa->todpreg, sizeof(sa->todpreg)); - ptr = nt_init(ptr, NT_S390_CTRS, &sa->ctrs, sizeof(sa->ctrs)); - ptr = nt_init(ptr, NT_S390_PREFIX, &sa->prefix, sizeof(sa->prefix)); + ptr = nt_init(ptr, PRSTATUS, nt_prstatus); + ptr = nt_init(ptr, PRFPREG, nt_fpregset); + ptr = nt_init(ptr, S390_TIMER, sa->timer); + ptr = nt_init(ptr, S390_TODCMP, sa->todcmp); + ptr = nt_init(ptr, S390_TODPREG, sa->todpreg); + ptr = nt_init(ptr, S390_CTRS, sa->ctrs); + ptr = nt_init(ptr, S390_PREFIX, sa->prefix); if (cpu_has_vx()) { - ptr = nt_init(ptr, NT_S390_VXRS_HIGH, - &sa->vxrs_high, sizeof(sa->vxrs_high)); - ptr = nt_init(ptr, NT_S390_VXRS_LOW, - &sa->vxrs_low, sizeof(sa->vxrs_low)); + ptr = nt_init(ptr, S390_VXRS_HIGH, sa->vxrs_high); + ptr = nt_init(ptr, S390_VXRS_LOW, sa->vxrs_low); } return ptr; } @@ -348,16 +331,16 @@ static size_t get_cpu_elf_notes_size(void) struct save_area *sa = NULL; size_t size; - size = nt_size(NT_PRSTATUS, sizeof(struct elf_prstatus)); - size += nt_size(NT_PRFPREG, sizeof(elf_fpregset_t)); - size += nt_size(NT_S390_TIMER, sizeof(sa->timer)); - size += nt_size(NT_S390_TODCMP, sizeof(sa->todcmp)); - size += nt_size(NT_S390_TODPREG, sizeof(sa->todpreg)); - size += nt_size(NT_S390_CTRS, sizeof(sa->ctrs)); - size += nt_size(NT_S390_PREFIX, sizeof(sa->prefix)); + size = nt_size(PRSTATUS, struct elf_prstatus); + size += nt_size(PRFPREG, elf_fpregset_t); + size += nt_size(S390_TIMER, sa->timer); + size += nt_size(S390_TODCMP, sa->todcmp); + size += nt_size(S390_TODPREG, sa->todpreg); + size += nt_size(S390_CTRS, sa->ctrs); + size += nt_size(S390_PREFIX, sa->prefix); if (cpu_has_vx()) { - size += nt_size(NT_S390_VXRS_HIGH, sizeof(sa->vxrs_high)); - size += nt_size(NT_S390_VXRS_LOW, sizeof(sa->vxrs_low)); + size += nt_size(S390_VXRS_HIGH, sa->vxrs_high); + size += nt_size(S390_VXRS_LOW, sa->vxrs_low); } return size; @@ -372,8 +355,8 @@ static void *nt_prpsinfo(void *ptr) memset(&prpsinfo, 0, sizeof(prpsinfo)); prpsinfo.pr_sname = 'R'; - strcpy(prpsinfo.pr_fname, "vmlinux"); - return nt_init(ptr, NT_PRPSINFO, &prpsinfo, sizeof(prpsinfo)); + strscpy(prpsinfo.pr_fname, "vmlinux"); + return nt_init(ptr, PRPSINFO, prpsinfo); } /* @@ -508,6 +491,19 @@ static int get_mem_chunk_cnt(void) return cnt; } +static void fill_ptload(Elf64_Phdr *phdr, unsigned long paddr, + unsigned long vaddr, unsigned long size) +{ + phdr->p_type = PT_LOAD; + phdr->p_vaddr = vaddr; + phdr->p_offset = paddr; + phdr->p_paddr = paddr; + phdr->p_filesz = size; + phdr->p_memsz = size; + phdr->p_flags = PF_R | PF_W | PF_X; + phdr->p_align = PAGE_SIZE; +} + /* * Initialize ELF loads (new kernel) */ @@ -520,14 +516,8 @@ static void loads_init(Elf64_Phdr *phdr, bool os_info_has_vm) if (os_info_has_vm) old_identity_base = os_info_old_value(OS_INFO_IDENTITY_BASE); for_each_physmem_range(idx, &oldmem_type, &start, &end) { - phdr->p_type = PT_LOAD; - phdr->p_vaddr = old_identity_base + start; - phdr->p_offset = start; - phdr->p_paddr = start; - phdr->p_filesz = end - start; - phdr->p_memsz = end - start; - phdr->p_flags = PF_R | PF_W | PF_X; - phdr->p_align = PAGE_SIZE; + fill_ptload(phdr, start, old_identity_base + start, + end - start); phdr++; } } @@ -537,6 +527,22 @@ static bool os_info_has_vm(void) return os_info_old_value(OS_INFO_KASLR_OFFSET); } +#ifdef CONFIG_PROC_VMCORE_DEVICE_RAM +/* + * Fill PT_LOAD for a physical memory range owned by a device and detected by + * its device driver. + */ +void elfcorehdr_fill_device_ram_ptload_elf64(Elf64_Phdr *phdr, + unsigned long long paddr, unsigned long long size) +{ + unsigned long old_identity_base = 0; + + if (os_info_has_vm()) + old_identity_base = os_info_old_value(OS_INFO_IDENTITY_BASE); + fill_ptload(phdr, paddr, old_identity_base + paddr, size); +} +#endif + /* * Prepare PT_LOAD type program header for kernel image region */ @@ -589,7 +595,7 @@ static size_t get_elfcorehdr_size(int phdr_count) /* PT_NOTES */ size += sizeof(Elf64_Phdr); /* nt_prpsinfo */ - size += nt_size(NT_PRPSINFO, sizeof(struct elf_prpsinfo)); + size += nt_size(PRPSINFO, struct elf_prpsinfo); /* regsets */ size += get_cpu_cnt() * get_cpu_elf_notes_size(); /* nt_vmcoreinfo */ diff --git a/arch/s390/kernel/ctlreg.c b/arch/s390/kernel/ctlreg.c index 8cc26cf2c64a..a0501f4c7e7a 100644 --- a/arch/s390/kernel/ctlreg.c +++ b/arch/s390/kernel/ctlreg.c @@ -5,6 +5,7 @@ #include <linux/irqflags.h> #include <linux/spinlock.h> +#include <linux/export.h> #include <linux/kernel.h> #include <linux/init.h> #include <linux/smp.h> diff --git a/arch/s390/kernel/debug.c b/arch/s390/kernel/debug.c index de19fd8a6a95..c62100dc62c8 100644 --- a/arch/s390/kernel/debug.c +++ b/arch/s390/kernel/debug.c @@ -24,6 +24,7 @@ #include <linux/export.h> #include <linux/init.h> #include <linux/fs.h> +#include <linux/math.h> #include <linux/minmax.h> #include <linux/debugfs.h> @@ -94,9 +95,6 @@ static int debug_input_flush_fn(debug_info_t *id, struct debug_view *view, static int debug_hex_ascii_format_fn(debug_info_t *id, struct debug_view *view, char *out_buf, size_t out_buf_size, const char *in_buf); -static int debug_sprintf_format_fn(debug_info_t *id, struct debug_view *view, - char *out_buf, size_t out_buf_size, - const char *inbuf); static void debug_areas_swap(debug_info_t *a, debug_info_t *b); static void debug_events_append(debug_info_t *dest, debug_info_t *src); @@ -253,7 +251,7 @@ static debug_info_t *debug_info_alloc(const char *name, int pages_per_area, rc->level = level; rc->buf_size = buf_size; rc->entry_size = sizeof(debug_entry_t) + buf_size; - strscpy(rc->name, name, sizeof(rc->name)); + strscpy(rc->name, name); memset(rc->views, 0, DEBUG_MAX_VIEWS * sizeof(struct debug_view *)); memset(rc->debugfs_entries, 0, DEBUG_MAX_VIEWS * sizeof(struct dentry *)); refcount_set(&(rc->ref_count), 0); @@ -354,7 +352,10 @@ static debug_info_t *debug_info_copy(debug_info_t *in, int mode) for (i = 0; i < in->nr_areas; i++) { for (j = 0; j < in->pages_per_area; j++) memcpy(rc->areas[i][j], in->areas[i][j], PAGE_SIZE); + rc->active_pages[i] = in->active_pages[i]; + rc->active_entries[i] = in->active_entries[i]; } + rc->active_area = in->active_area; out: spin_unlock_irqrestore(&in->lock, flags); return rc; @@ -422,11 +423,17 @@ out: return len; } -/* - * debug_next_entry: - * - goto next entry in p_info +/** + * debug_next_entry - Go to the next entry + * @p_info: Private info that is manipulated + * + * Sets the current position in @p_info to the next entry. If no further entry + * exists the current position is set to one after the end the return value + * indicates that no further entries exist. + * + * Return: True if there are more following entries, false otherwise */ -static inline int debug_next_entry(file_private_info_t *p_info) +static inline bool debug_next_entry(file_private_info_t *p_info) { debug_info_t *id; @@ -434,10 +441,10 @@ static inline int debug_next_entry(file_private_info_t *p_info) if (p_info->act_entry == DEBUG_PROLOG_ENTRY) { p_info->act_entry = 0; p_info->act_page = 0; - goto out; + return true; } if (!id->areas) - return 1; + return false; p_info->act_entry += id->entry_size; /* switch to next page, if we reached the end of the page */ if (p_info->act_entry > (PAGE_SIZE - id->entry_size)) { @@ -450,10 +457,87 @@ static inline int debug_next_entry(file_private_info_t *p_info) p_info->act_page = 0; } if (p_info->act_area >= id->nr_areas) - return 1; + return false; } -out: - return 0; + return true; +} + +/** + * debug_to_act_entry - Go to the currently active entry + * @p_info: Private info that is manipulated + * + * Sets the current position in @p_info to the currently active + * entry of @p_info->debug_info_snap + */ +static void debug_to_act_entry(file_private_info_t *p_info) +{ + debug_info_t *snap_id; + + snap_id = p_info->debug_info_snap; + p_info->act_area = snap_id->active_area; + p_info->act_page = snap_id->active_pages[snap_id->active_area]; + p_info->act_entry = snap_id->active_entries[snap_id->active_area]; +} + +/** + * debug_prev_entry - Go to the previous entry + * @p_info: Private info that is manipulated + * + * Sets the current position in @p_info to the previous entry. If no previous entry + * exists the current position is set left as DEBUG_PROLOG_ENTRY and the return value + * indicates that no previous entries exist. + * + * Return: True if there are more previous entries, false otherwise + */ + +static inline bool debug_prev_entry(file_private_info_t *p_info) +{ + debug_info_t *id; + + id = p_info->debug_info_snap; + if (p_info->act_entry == DEBUG_PROLOG_ENTRY) + debug_to_act_entry(p_info); + if (!id->areas) + return false; + p_info->act_entry -= id->entry_size; + /* switch to prev page, if we reached the beginning of the page */ + if (p_info->act_entry < 0) { + /* end of previous page */ + p_info->act_entry = rounddown(PAGE_SIZE, id->entry_size) - id->entry_size; + p_info->act_page--; + if (p_info->act_page < 0) { + /* previous area */ + p_info->act_area--; + p_info->act_page = id->pages_per_area - 1; + } + if (p_info->act_area < 0) + p_info->act_area = (id->nr_areas - 1) % id->nr_areas; + } + /* check full circle */ + if (id->active_area == p_info->act_area && + id->active_pages[id->active_area] == p_info->act_page && + id->active_entries[id->active_area] == p_info->act_entry) + return false; + return true; +} + +/** + * debug_move_entry - Go to next entry in either the forward or backward direction + * @p_info: Private info that is manipulated + * @reverse: If true go to the next entry in reverse i.e. previous + * + * Sets the current position in @p_info to the next (@reverse == false) or + * previous (@reverse == true) entry. + * + * Return: True if there are further entries in that direction, + * false otherwise. + */ +static bool debug_move_entry(file_private_info_t *p_info, bool reverse) +{ + if (reverse) + return debug_prev_entry(p_info); + else + return debug_next_entry(p_info); } /* @@ -495,7 +579,7 @@ static ssize_t debug_output(struct file *file, /* file descriptor */ } if (copy_size == formatted_line_residue) { entry_offset = 0; - if (debug_next_entry(p_info)) + if (!debug_next_entry(p_info)) goto out; } } @@ -530,6 +614,42 @@ static ssize_t debug_input(struct file *file, const char __user *user_buf, return rc; /* number of input characters */ } +static file_private_info_t *debug_file_private_alloc(debug_info_t *debug_info, + struct debug_view *view) +{ + debug_info_t *debug_info_snapshot; + file_private_info_t *p_info; + + /* + * Make snapshot of current debug areas to get it consistent. + * To copy all the areas is only needed, if we have a view which + * formats the debug areas. + */ + if (!view->format_proc && !view->header_proc) + debug_info_snapshot = debug_info_copy(debug_info, NO_AREAS); + else + debug_info_snapshot = debug_info_copy(debug_info, ALL_AREAS); + + if (!debug_info_snapshot) + return NULL; + p_info = kmalloc(sizeof(file_private_info_t), GFP_KERNEL); + if (!p_info) { + debug_info_free(debug_info_snapshot); + return NULL; + } + p_info->offset = 0; + p_info->debug_info_snap = debug_info_snapshot; + p_info->debug_info_org = debug_info; + p_info->view = view; + p_info->act_area = 0; + p_info->act_page = 0; + p_info->act_entry = DEBUG_PROLOG_ENTRY; + p_info->act_entry_offset = 0; + debug_info_get(debug_info); + + return p_info; +} + /* * debug_open: * - called for user open() @@ -538,7 +658,7 @@ static ssize_t debug_input(struct file *file, const char __user *user_buf, */ static int debug_open(struct inode *inode, struct file *file) { - debug_info_t *debug_info, *debug_info_snapshot; + debug_info_t *debug_info; file_private_info_t *p_info; int i, rc = 0; @@ -556,42 +676,26 @@ static int debug_open(struct inode *inode, struct file *file) goto out; found: - - /* Make snapshot of current debug areas to get it consistent. */ - /* To copy all the areas is only needed, if we have a view which */ - /* formats the debug areas. */ - - if (!debug_info->views[i]->format_proc && !debug_info->views[i]->header_proc) - debug_info_snapshot = debug_info_copy(debug_info, NO_AREAS); - else - debug_info_snapshot = debug_info_copy(debug_info, ALL_AREAS); - - if (!debug_info_snapshot) { - rc = -ENOMEM; - goto out; - } - p_info = kmalloc(sizeof(file_private_info_t), GFP_KERNEL); + p_info = debug_file_private_alloc(debug_info, debug_info->views[i]); if (!p_info) { - debug_info_free(debug_info_snapshot); rc = -ENOMEM; goto out; } - p_info->offset = 0; - p_info->debug_info_snap = debug_info_snapshot; - p_info->debug_info_org = debug_info; - p_info->view = debug_info->views[i]; - p_info->act_area = 0; - p_info->act_page = 0; - p_info->act_entry = DEBUG_PROLOG_ENTRY; - p_info->act_entry_offset = 0; file->private_data = p_info; - debug_info_get(debug_info); nonseekable_open(inode, file); out: mutex_unlock(&debug_mutex); return rc; } +static void debug_file_private_free(file_private_info_t *p_info) +{ + if (p_info->debug_info_snap) + debug_info_free(p_info->debug_info_snap); + debug_info_put(p_info->debug_info_org); + kfree(p_info); +} + /* * debug_close: * - called for user close() @@ -602,13 +706,59 @@ static int debug_close(struct inode *inode, struct file *file) file_private_info_t *p_info; p_info = (file_private_info_t *) file->private_data; - if (p_info->debug_info_snap) - debug_info_free(p_info->debug_info_snap); - debug_info_put(p_info->debug_info_org); - kfree(file->private_data); + debug_file_private_free(p_info); + file->private_data = NULL; return 0; /* success */ } +/** + * debug_dump - Get a textual representation of debug info, or as much as fits + * @id: Debug information to use + * @view: View with which to dump the debug information + * @buf: Buffer the textual debug data representation is written to + * @buf_size: Size of the buffer, including the trailing '\0' byte + * @reverse: Go backwards from the last written entry + * + * This function may be used whenever a textual representation of the debug + * information is required without using an s390dbf file. + * + * Note: It is the callers responsibility to supply a view that is compatible + * with the debug information data. + * + * Return: On success returns the number of bytes written to the buffer not + * including the trailing '\0' byte. If bug_size == 0 the function returns 0. + * On failure an error code less than 0 is returned. + */ +ssize_t debug_dump(debug_info_t *id, struct debug_view *view, + char *buf, size_t buf_size, bool reverse) +{ + file_private_info_t *p_info; + size_t size, offset = 0; + + /* Need space for '\0' byte */ + if (buf_size < 1) + return 0; + buf_size--; + + p_info = debug_file_private_alloc(id, view); + if (!p_info) + return -ENOMEM; + + /* There is always at least the DEBUG_PROLOG_ENTRY */ + do { + size = debug_format_entry(p_info); + size = min(size, buf_size - offset); + memcpy(buf + offset, p_info->temp_buf, size); + offset += size; + if (offset >= buf_size) + break; + } while (debug_move_entry(p_info, reverse)); + debug_file_private_free(p_info); + buf[offset] = '\0'; + + return offset; +} + /* Create debugfs entries and add to internal list. */ static void _debug_register(debug_info_t *id) { @@ -972,7 +1122,7 @@ static int s390dbf_procactive(const struct ctl_table *table, int write, return 0; } -static struct ctl_table s390dbf_table[] = { +static const struct ctl_table s390dbf_table[] = { { .procname = "debug_stoppable", .data = &debug_stoppable, @@ -1527,13 +1677,13 @@ EXPORT_SYMBOL(debug_dflt_header_fn); /* * prints debug data sprintf-formatted: - * debug_sprinf_event/exception calls must be used together with this view + * debug_sprintf_event/exception calls must be used together with this view */ #define DEBUG_SPRINTF_MAX_ARGS 10 -static int debug_sprintf_format_fn(debug_info_t *id, struct debug_view *view, - char *out_buf, size_t out_buf_size, const char *inbuf) +int debug_sprintf_format_fn(debug_info_t *id, struct debug_view *view, + char *out_buf, size_t out_buf_size, const char *inbuf) { debug_sprintf_entry_t *curr_event = (debug_sprintf_entry_t *)inbuf; int num_longs, num_used_args = 0, i, rc = 0; @@ -1570,6 +1720,7 @@ static int debug_sprintf_format_fn(debug_info_t *id, struct debug_view *view, out: return rc; } +EXPORT_SYMBOL(debug_sprintf_format_fn); /* * debug_init: diff --git a/arch/s390/kernel/diag/Makefile b/arch/s390/kernel/diag/Makefile new file mode 100644 index 000000000000..956aee6c4090 --- /dev/null +++ b/arch/s390/kernel/diag/Makefile @@ -0,0 +1 @@ +obj-y := diag_misc.o diag324.o diag.o diag310.o diff --git a/arch/s390/kernel/diag.c b/arch/s390/kernel/diag/diag.c index cdd6e31344fa..56b862ba9be8 100644 --- a/arch/s390/kernel/diag.c +++ b/arch/s390/kernel/diag/diag.c @@ -17,7 +17,7 @@ #include <asm/trace/diag.h> #include <asm/sections.h> #include <asm/asm.h> -#include "entry.h" +#include "../entry.h" struct diag_stat { unsigned int counter[NR_DIAG_STAT]; @@ -51,8 +51,10 @@ static const struct diag_desc diag_map[NR_DIAG_STAT] = { [DIAG_STAT_X2FC] = { .code = 0x2fc, .name = "Guest Performance Data" }, [DIAG_STAT_X304] = { .code = 0x304, .name = "Partition-Resource Service" }, [DIAG_STAT_X308] = { .code = 0x308, .name = "List-Directed IPL" }, + [DIAG_STAT_X310] = { .code = 0x310, .name = "Memory Topology Information" }, [DIAG_STAT_X318] = { .code = 0x318, .name = "CP Name and Version Codes" }, [DIAG_STAT_X320] = { .code = 0x320, .name = "Certificate Store" }, + [DIAG_STAT_X324] = { .code = 0x324, .name = "Power Information Block" }, [DIAG_STAT_X49C] = { .code = 0x49c, .name = "Warning-Track Interruption" }, [DIAG_STAT_X500] = { .code = 0x500, .name = "Virtio Service" }, }; @@ -193,7 +195,7 @@ static inline int __diag204(unsigned long *subcode, unsigned long size, void *ad { union register_pair rp = { .even = *subcode, .odd = size }; - asm volatile( + asm_inline volatile( " diag %[addr],%[rp],0x204\n" "0: nopr %%r7\n" EX_TABLE(0b,0b) @@ -284,7 +286,7 @@ int diag224(void *ptr) int rc = -EOPNOTSUPP; diag_stat_inc(DIAG_STAT_X224); - asm volatile("\n" + asm_inline volatile("\n" " diag %[type],%[addr],0x224\n" "0: lhi %[rc],0\n" "1:\n" diff --git a/arch/s390/kernel/diag/diag310.c b/arch/s390/kernel/diag/diag310.c new file mode 100644 index 000000000000..d6a34454aa5a --- /dev/null +++ b/arch/s390/kernel/diag/diag310.c @@ -0,0 +1,276 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Request memory topology information via diag0x310. + * + * Copyright IBM Corp. 2025 + */ + +#include <linux/kernel.h> +#include <linux/types.h> +#include <linux/uaccess.h> +#include <linux/vmalloc.h> +#include <asm/diag.h> +#include <asm/sclp.h> +#include <uapi/asm/diag.h> +#include "diag_ioctl.h" + +#define DIAG310_LEVELMIN 1 +#define DIAG310_LEVELMAX 6 + +enum diag310_sc { + DIAG310_SUBC_0 = 0, + DIAG310_SUBC_1 = 1, + DIAG310_SUBC_4 = 4, + DIAG310_SUBC_5 = 5 +}; + +enum diag310_retcode { + DIAG310_RET_SUCCESS = 0x0001, + DIAG310_RET_BUSY = 0x0101, + DIAG310_RET_OPNOTSUPP = 0x0102, + DIAG310_RET_SC4_INVAL = 0x0401, + DIAG310_RET_SC4_NODATA = 0x0402, + DIAG310_RET_SC5_INVAL = 0x0501, + DIAG310_RET_SC5_NODATA = 0x0502, + DIAG310_RET_SC5_ESIZE = 0x0503 +}; + +union diag310_response { + u64 response; + struct { + u64 result : 32; + u64 : 16; + u64 rc : 16; + }; +}; + +union diag310_req_subcode { + u64 subcode; + struct { + u64 : 48; + u64 st : 8; + u64 sc : 8; + }; +}; + +union diag310_req_size { + u64 size; + struct { + u64 page_count : 32; + u64 : 32; + }; +}; + +static inline unsigned long diag310(unsigned long subcode, unsigned long size, void *addr) +{ + union register_pair rp = { .even = (unsigned long)addr, .odd = size }; + + diag_stat_inc(DIAG_STAT_X310); + asm volatile("diag %[rp],%[subcode],0x310\n" + : [rp] "+d" (rp.pair) + : [subcode] "d" (subcode) + : "memory"); + return rp.odd; +} + +static int diag310_result_to_errno(unsigned int result) +{ + switch (result) { + case DIAG310_RET_BUSY: + return -EBUSY; + case DIAG310_RET_OPNOTSUPP: + return -EOPNOTSUPP; + default: + return -EINVAL; + } +} + +static int diag310_get_subcode_mask(unsigned long *mask) +{ + union diag310_response res; + + res.response = diag310(DIAG310_SUBC_0, 0, NULL); + if (res.rc != DIAG310_RET_SUCCESS) + return diag310_result_to_errno(res.rc); + *mask = res.response; + return 0; +} + +static int diag310_get_memtop_stride(unsigned long *stride) +{ + union diag310_response res; + + res.response = diag310(DIAG310_SUBC_1, 0, NULL); + if (res.rc != DIAG310_RET_SUCCESS) + return diag310_result_to_errno(res.rc); + *stride = res.result; + return 0; +} + +static int diag310_get_memtop_size(unsigned long *pages, unsigned long level) +{ + union diag310_req_subcode req = { .sc = DIAG310_SUBC_4, .st = level }; + union diag310_response res; + + res.response = diag310(req.subcode, 0, NULL); + switch (res.rc) { + case DIAG310_RET_SUCCESS: + *pages = res.result; + return 0; + case DIAG310_RET_SC4_NODATA: + return -ENODATA; + case DIAG310_RET_SC4_INVAL: + return -EINVAL; + default: + return diag310_result_to_errno(res.rc); + } +} + +static int diag310_store_topology_map(void *buf, unsigned long pages, unsigned long level) +{ + union diag310_req_subcode req_sc = { .sc = DIAG310_SUBC_5, .st = level }; + union diag310_req_size req_size = { .page_count = pages }; + union diag310_response res; + + res.response = diag310(req_sc.subcode, req_size.size, buf); + switch (res.rc) { + case DIAG310_RET_SUCCESS: + return 0; + case DIAG310_RET_SC5_NODATA: + return -ENODATA; + case DIAG310_RET_SC5_ESIZE: + return -EOVERFLOW; + case DIAG310_RET_SC5_INVAL: + return -EINVAL; + default: + return diag310_result_to_errno(res.rc); + } +} + +static int diag310_check_features(void) +{ + static int features_available; + unsigned long mask; + int rc; + + if (READ_ONCE(features_available)) + return 0; + if (!sclp.has_diag310) + return -EOPNOTSUPP; + rc = diag310_get_subcode_mask(&mask); + if (rc) + return rc; + if (!test_bit_inv(DIAG310_SUBC_1, &mask)) + return -EOPNOTSUPP; + if (!test_bit_inv(DIAG310_SUBC_4, &mask)) + return -EOPNOTSUPP; + if (!test_bit_inv(DIAG310_SUBC_5, &mask)) + return -EOPNOTSUPP; + WRITE_ONCE(features_available, 1); + return 0; +} + +static int memtop_get_stride_len(unsigned long *res) +{ + static unsigned long memtop_stride; + unsigned long stride; + int rc; + + stride = READ_ONCE(memtop_stride); + if (!stride) { + rc = diag310_get_memtop_stride(&stride); + if (rc) + return rc; + WRITE_ONCE(memtop_stride, stride); + } + *res = stride; + return 0; +} + +static int memtop_get_page_count(unsigned long *res, unsigned long level) +{ + static unsigned long memtop_pages[DIAG310_LEVELMAX]; + unsigned long pages; + int rc; + + if (level > DIAG310_LEVELMAX || level < DIAG310_LEVELMIN) + return -EINVAL; + pages = READ_ONCE(memtop_pages[level - 1]); + if (!pages) { + rc = diag310_get_memtop_size(&pages, level); + if (rc) + return rc; + WRITE_ONCE(memtop_pages[level - 1], pages); + } + *res = pages; + return 0; +} + +long diag310_memtop_stride(unsigned long arg) +{ + size_t __user *argp = (void __user *)arg; + unsigned long stride; + int rc; + + rc = diag310_check_features(); + if (rc) + return rc; + rc = memtop_get_stride_len(&stride); + if (rc) + return rc; + if (put_user(stride, argp)) + return -EFAULT; + return 0; +} + +long diag310_memtop_len(unsigned long arg) +{ + size_t __user *argp = (void __user *)arg; + unsigned long pages, level; + int rc; + + rc = diag310_check_features(); + if (rc) + return rc; + if (get_user(level, argp)) + return -EFAULT; + rc = memtop_get_page_count(&pages, level); + if (rc) + return rc; + if (put_user(pages * PAGE_SIZE, argp)) + return -EFAULT; + return 0; +} + +long diag310_memtop_buf(unsigned long arg) +{ + struct diag310_memtop __user *udata = (struct diag310_memtop __user *)arg; + unsigned long level, pages, data_size; + u64 address; + void *buf; + int rc; + + rc = diag310_check_features(); + if (rc) + return rc; + if (get_user(level, &udata->nesting_lvl)) + return -EFAULT; + if (get_user(address, &udata->address)) + return -EFAULT; + rc = memtop_get_page_count(&pages, level); + if (rc) + return rc; + data_size = pages * PAGE_SIZE; + buf = __vmalloc_node(data_size, PAGE_SIZE, GFP_KERNEL | __GFP_ZERO | __GFP_ACCOUNT, + NUMA_NO_NODE, __builtin_return_address(0)); + if (!buf) + return -ENOMEM; + rc = diag310_store_topology_map(buf, pages, level); + if (rc) + goto out; + if (copy_to_user((void __user *)address, buf, data_size)) + rc = -EFAULT; +out: + vfree(buf); + return rc; +} diff --git a/arch/s390/kernel/diag/diag324.c b/arch/s390/kernel/diag/diag324.c new file mode 100644 index 000000000000..7fa4c0b7eb6c --- /dev/null +++ b/arch/s390/kernel/diag/diag324.c @@ -0,0 +1,224 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Request power readings for resources in a computing environment via + * diag 0x324. diag 0x324 stores the power readings in the power information + * block (pib). + * + * Copyright IBM Corp. 2024 + */ + +#define pr_fmt(fmt) "diag324: " fmt +#include <linux/fs.h> +#include <linux/gfp.h> +#include <linux/ioctl.h> +#include <linux/jiffies.h> +#include <linux/kernel.h> +#include <linux/ktime.h> +#include <linux/string.h> +#include <linux/slab.h> +#include <linux/timer.h> +#include <linux/types.h> +#include <linux/uaccess.h> +#include <linux/vmalloc.h> + +#include <asm/diag.h> +#include <asm/sclp.h> +#include <asm/timex.h> +#include <uapi/asm/diag.h> +#include "diag_ioctl.h" + +enum subcode { + DIAG324_SUBC_0 = 0, + DIAG324_SUBC_1 = 1, + DIAG324_SUBC_2 = 2, +}; + +enum retcode { + DIAG324_RET_SUCCESS = 0x0001, + DIAG324_RET_SUBC_NOTAVAIL = 0x0103, + DIAG324_RET_INSUFFICIENT_SIZE = 0x0104, + DIAG324_RET_READING_UNAVAILABLE = 0x0105, +}; + +union diag324_response { + u64 response; + struct { + u64 installed : 32; + u64 : 16; + u64 rc : 16; + } sc0; + struct { + u64 format : 16; + u64 : 16; + u64 pib_len : 16; + u64 rc : 16; + } sc1; + struct { + u64 : 48; + u64 rc : 16; + } sc2; +}; + +union diag324_request { + u64 request; + struct { + u64 : 32; + u64 allocated : 16; + u64 : 12; + u64 sc : 4; + } sc2; +}; + +struct pib { + u32 : 8; + u32 num : 8; + u32 len : 16; + u32 : 24; + u32 hlen : 8; + u64 : 64; + u64 intv; + u8 r[]; +} __packed; + +struct pibdata { + struct pib *pib; + ktime_t expire; + u64 sequence; + size_t len; + int rc; +}; + +static DEFINE_MUTEX(pibmutex); +static struct pibdata pibdata; + +#define PIBWORK_DELAY (5 * NSEC_PER_SEC) + +static void pibwork_handler(struct work_struct *work); +static DECLARE_DELAYED_WORK(pibwork, pibwork_handler); + +static unsigned long diag324(unsigned long subcode, void *addr) +{ + union register_pair rp = { .even = (unsigned long)addr }; + + diag_stat_inc(DIAG_STAT_X324); + asm volatile("diag %[rp],%[subcode],0x324\n" + : [rp] "+d" (rp.pair) + : [subcode] "d" (subcode) + : "memory"); + return rp.odd; +} + +static void pibwork_handler(struct work_struct *work) +{ + struct pibdata *data = &pibdata; + ktime_t timedout; + + mutex_lock(&pibmutex); + timedout = ktime_add_ns(data->expire, PIBWORK_DELAY); + if (ktime_before(ktime_get(), timedout)) { + mod_delayed_work(system_wq, &pibwork, nsecs_to_jiffies(PIBWORK_DELAY)); + goto out; + } + vfree(data->pib); + data->pib = NULL; +out: + mutex_unlock(&pibmutex); +} + +static void pib_update(struct pibdata *data) +{ + union diag324_request req = { .sc2.sc = DIAG324_SUBC_2, .sc2.allocated = data->len }; + union diag324_response res; + int rc; + + memset(data->pib, 0, data->len); + res.response = diag324(req.request, data->pib); + switch (res.sc2.rc) { + case DIAG324_RET_SUCCESS: + rc = 0; + break; + case DIAG324_RET_SUBC_NOTAVAIL: + rc = -ENOENT; + break; + case DIAG324_RET_INSUFFICIENT_SIZE: + rc = -EMSGSIZE; + break; + case DIAG324_RET_READING_UNAVAILABLE: + rc = -EBUSY; + break; + default: + rc = -EINVAL; + } + data->rc = rc; +} + +long diag324_pibbuf(unsigned long arg) +{ + struct diag324_pib __user *udata = (struct diag324_pib __user *)arg; + struct pibdata *data = &pibdata; + static bool first = true; + u64 address; + int rc; + + if (!data->len) + return -EOPNOTSUPP; + if (get_user(address, &udata->address)) + return -EFAULT; + mutex_lock(&pibmutex); + rc = -ENOMEM; + if (!data->pib) + data->pib = vmalloc(data->len); + if (!data->pib) + goto out; + if (first || ktime_after(ktime_get(), data->expire)) { + pib_update(data); + data->sequence++; + data->expire = ktime_add_ns(ktime_get(), tod_to_ns(data->pib->intv)); + mod_delayed_work(system_wq, &pibwork, nsecs_to_jiffies(PIBWORK_DELAY)); + first = false; + } + rc = data->rc; + if (rc != 0 && rc != -EBUSY) + goto out; + rc = copy_to_user((void __user *)address, data->pib, data->pib->len); + rc |= put_user(data->sequence, &udata->sequence); + if (rc) + rc = -EFAULT; +out: + mutex_unlock(&pibmutex); + return rc; +} + +long diag324_piblen(unsigned long arg) +{ + struct pibdata *data = &pibdata; + + if (!data->len) + return -EOPNOTSUPP; + if (put_user(data->len, (size_t __user *)arg)) + return -EFAULT; + return 0; +} + +static int __init diag324_init(void) +{ + union diag324_response res; + unsigned long installed; + + if (!sclp.has_diag324) + return -EOPNOTSUPP; + res.response = diag324(DIAG324_SUBC_0, NULL); + if (res.sc0.rc != DIAG324_RET_SUCCESS) + return -EOPNOTSUPP; + installed = res.response; + if (!test_bit_inv(DIAG324_SUBC_1, &installed)) + return -EOPNOTSUPP; + if (!test_bit_inv(DIAG324_SUBC_2, &installed)) + return -EOPNOTSUPP; + res.response = diag324(DIAG324_SUBC_1, NULL); + if (res.sc1.rc != DIAG324_RET_SUCCESS) + return -EOPNOTSUPP; + pibdata.len = res.sc1.pib_len; + return 0; +} +device_initcall(diag324_init); diff --git a/arch/s390/kernel/diag/diag_ioctl.h b/arch/s390/kernel/diag/diag_ioctl.h new file mode 100644 index 000000000000..7080be946785 --- /dev/null +++ b/arch/s390/kernel/diag/diag_ioctl.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _DIAG_IOCTL_H +#define _DIAG_IOCTL_H + +#include <linux/types.h> + +long diag324_pibbuf(unsigned long arg); +long diag324_piblen(unsigned long arg); + +long diag310_memtop_stride(unsigned long arg); +long diag310_memtop_len(unsigned long arg); +long diag310_memtop_buf(unsigned long arg); + +#endif /* _DIAG_IOCTL_H */ diff --git a/arch/s390/kernel/diag/diag_misc.c b/arch/s390/kernel/diag/diag_misc.c new file mode 100644 index 000000000000..efffe02ea02e --- /dev/null +++ b/arch/s390/kernel/diag/diag_misc.c @@ -0,0 +1,63 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Provide diagnose information via misc device /dev/diag. + * + * Copyright IBM Corp. 2024 + */ + +#include <linux/fs.h> +#include <linux/init.h> +#include <linux/ioctl.h> +#include <linux/kernel.h> +#include <linux/miscdevice.h> +#include <linux/types.h> + +#include <uapi/asm/diag.h> +#include "diag_ioctl.h" + +static long diag_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) +{ + long rc; + + switch (cmd) { + case DIAG324_GET_PIBLEN: + rc = diag324_piblen(arg); + break; + case DIAG324_GET_PIBBUF: + rc = diag324_pibbuf(arg); + break; + case DIAG310_GET_STRIDE: + rc = diag310_memtop_stride(arg); + break; + case DIAG310_GET_MEMTOPLEN: + rc = diag310_memtop_len(arg); + break; + case DIAG310_GET_MEMTOPBUF: + rc = diag310_memtop_buf(arg); + break; + default: + rc = -ENOIOCTLCMD; + break; + } + return rc; +} + +static const struct file_operations fops = { + .owner = THIS_MODULE, + .open = nonseekable_open, + .unlocked_ioctl = diag_ioctl, +}; + +static struct miscdevice diagdev = { + .name = "diag", + .minor = MISC_DYNAMIC_MINOR, + .fops = &fops, + .mode = 0444, +}; + +static int diag_init(void) +{ + return misc_register(&diagdev); +} + +device_initcall(diag_init); diff --git a/arch/s390/kernel/dis.c b/arch/s390/kernel/dis.c index 94eb8168ea44..63a1d4226ff8 100644 --- a/arch/s390/kernel/dis.c +++ b/arch/s390/kernel/dis.c @@ -17,7 +17,6 @@ #include <linux/init.h> #include <linux/interrupt.h> #include <linux/delay.h> -#include <linux/export.h> #include <linux/kallsyms.h> #include <linux/reboot.h> #include <linux/kprobes.h> diff --git a/arch/s390/kernel/dumpstack.c b/arch/s390/kernel/dumpstack.c index 1ecd0580561f..dd410962ecbe 100644 --- a/arch/s390/kernel/dumpstack.c +++ b/arch/s390/kernel/dumpstack.c @@ -17,6 +17,7 @@ #include <linux/sched.h> #include <linux/sched/debug.h> #include <linux/sched/task_stack.h> +#include <asm/asm-offsets.h> #include <asm/processor.h> #include <asm/debug.h> #include <asm/dis.h> @@ -198,13 +199,8 @@ void __noreturn die(struct pt_regs *regs, const char *str) console_verbose(); spin_lock_irq(&die_lock); bust_spinlocks(1); - printk("%s: %04x ilc:%d [#%d] ", str, regs->int_code & 0xffff, + printk("%s: %04x ilc:%d [#%d]", str, regs->int_code & 0xffff, regs->int_code >> 17, ++die_counter); -#ifdef CONFIG_PREEMPT - pr_cont("PREEMPT "); -#elif defined(CONFIG_PREEMPT_RT) - pr_cont("PREEMPT_RT "); -#endif pr_cont("SMP "); if (debug_pagealloc_enabled()) pr_cont("DEBUG_PAGEALLOC"); diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c index 62f8f5a750a3..9adfbdd377dc 100644 --- a/arch/s390/kernel/early.c +++ b/arch/s390/kernel/early.c @@ -8,6 +8,7 @@ #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt #include <linux/sched/debug.h> +#include <linux/cpufeature.h> #include <linux/compiler.h> #include <linux/init.h> #include <linux/errno.h> @@ -21,6 +22,8 @@ #include <asm/asm-extable.h> #include <linux/memblock.h> #include <asm/access-regs.h> +#include <asm/asm-offsets.h> +#include <asm/machine.h> #include <asm/diag.h> #include <asm/ebcdic.h> #include <asm/fpu.h> @@ -36,12 +39,14 @@ #include <asm/boot_data.h> #include "entry.h" -#define decompressor_handled_param(param) \ -static int __init ignore_decompressor_param_##param(char *s) \ +#define __decompressor_handled_param(func, param) \ +static int __init ignore_decompressor_param_##func(char *s) \ { \ return 0; \ } \ -early_param(#param, ignore_decompressor_param_##param) +early_param(#param, ignore_decompressor_param_##func) + +#define decompressor_handled_param(param) __decompressor_handled_param(param, param) decompressor_handled_param(mem); decompressor_handled_param(vmalloc); @@ -50,6 +55,8 @@ decompressor_handled_param(facilities); decompressor_handled_param(nokaslr); decompressor_handled_param(cmma); decompressor_handled_param(relocate_lowcore); +decompressor_handled_param(bootdebug); +__decompressor_handled_param(debug_alternative, debug-alternative); #if IS_ENABLED(CONFIG_KVM) decompressor_handled_param(prot_virt); #endif @@ -58,25 +65,10 @@ static void __init kasan_early_init(void) { #ifdef CONFIG_KASAN init_task.kasan_depth = 0; - sclp_early_printk("KernelAddressSanitizer initialized\n"); + pr_info("KernelAddressSanitizer initialized\n"); #endif } -static void __init reset_tod_clock(void) -{ - union tod_clock clk; - - if (store_tod_clock_ext_cc(&clk) == 0) - return; - /* TOD clock not running. Set the clock to Unix Epoch. */ - if (set_tod_clock(TOD_UNIX_EPOCH) || store_tod_clock_ext_cc(&clk)) - disabled_wait(); - - memset(&tod_clock_base, 0, sizeof(tod_clock_base)); - tod_clock_base.tod = TOD_UNIX_EPOCH; - get_lowcore()->last_update_clock = TOD_UNIX_EPOCH; -} - /* * Initialize storage key for kernel pages */ @@ -95,26 +87,6 @@ static noinline __init void init_kernel_storage_key(void) static __initdata char sysinfo_page[PAGE_SIZE] __aligned(PAGE_SIZE); -static noinline __init void detect_machine_type(void) -{ - struct sysinfo_3_2_2 *vmms = (struct sysinfo_3_2_2 *)&sysinfo_page; - - /* Check current-configuration-level */ - if (stsi(NULL, 0, 0, 0) <= 2) { - get_lowcore()->machine_flags |= MACHINE_FLAG_LPAR; - return; - } - /* Get virtual-machine cpu information. */ - if (stsi(vmms, 3, 2, 2) || !vmms->count) - return; - - /* Detect known hypervisors */ - if (!memcmp(vmms->vm[0].cpi, "\xd2\xe5\xd4", 3)) - get_lowcore()->machine_flags |= MACHINE_FLAG_KVM; - else if (!memcmp(vmms->vm[0].cpi, "\xa9\x61\xe5\xd4", 4)) - get_lowcore()->machine_flags |= MACHINE_FLAG_VM; -} - /* Remove leading, trailing and double whitespace. */ static inline void strim_all(char *str) { @@ -133,6 +105,8 @@ static inline void strim_all(char *str) } } +char arch_hw_string[128]; + static noinline __init void setup_arch_string(void) { struct sysinfo_1_1_1 *mach = (struct sysinfo_1_1_1 *)&sysinfo_page; @@ -155,10 +129,11 @@ static noinline __init void setup_arch_string(void) strim_all(hvstr); } else { sprintf(hvstr, "%s", - MACHINE_IS_LPAR ? "LPAR" : - MACHINE_IS_VM ? "z/VM" : - MACHINE_IS_KVM ? "KVM" : "unknown"); + machine_is_lpar() ? "LPAR" : + machine_is_vm() ? "z/VM" : + machine_is_kvm() ? "KVM" : "unknown"); } + sprintf(arch_hw_string, "HW: %s (%s)", mstr, hvstr); dump_stack_set_arch_desc("%s (%s)", mstr, hvstr); } @@ -166,9 +141,8 @@ static __init void setup_topology(void) { int max_mnest; - if (!test_facility(11)) + if (!cpu_has_topology()) return; - get_lowcore()->machine_flags |= MACHINE_FLAG_TOPOLOGY; for (max_mnest = 6; max_mnest > 1; max_mnest--) { if (stsi(&sysinfo_page, 15, 1, max_mnest) == 0) break; @@ -183,6 +157,7 @@ void __init __do_early_pgm_check(struct pt_regs *regs) regs->int_code = lc->pgm_int_code; regs->int_parm_long = lc->trans_exc_code; + regs->last_break = lc->pgm_last_break; ip = __rewind_psw(regs->psw, regs->int_code >> 16); /* Monitor Event? Might be a warning */ @@ -217,65 +192,10 @@ static noinline __init void setup_lowcore_early(void) lc->return_mcck_lpswe = gen_lpswe(__LC_RETURN_MCCK_PSW); } -static __init void detect_diag9c(void) -{ - unsigned int cpu_address; - int rc; - - cpu_address = stap(); - diag_stat_inc(DIAG_STAT_X09C); - asm volatile( - " diag %2,0,0x9c\n" - "0: la %0,0\n" - "1:\n" - EX_TABLE(0b,1b) - : "=d" (rc) : "0" (-EOPNOTSUPP), "d" (cpu_address) : "cc"); - if (!rc) - get_lowcore()->machine_flags |= MACHINE_FLAG_DIAG9C; -} - -static __init void detect_machine_facilities(void) -{ - if (test_facility(8)) { - get_lowcore()->machine_flags |= MACHINE_FLAG_EDAT1; - system_ctl_set_bit(0, CR0_EDAT_BIT); - } - if (test_facility(78)) - get_lowcore()->machine_flags |= MACHINE_FLAG_EDAT2; - if (test_facility(3)) - get_lowcore()->machine_flags |= MACHINE_FLAG_IDTE; - if (test_facility(50) && test_facility(73)) { - get_lowcore()->machine_flags |= MACHINE_FLAG_TE; - system_ctl_set_bit(0, CR0_TRANSACTIONAL_EXECUTION_BIT); - } - if (test_facility(51)) - get_lowcore()->machine_flags |= MACHINE_FLAG_TLB_LC; - if (test_facility(129)) - system_ctl_set_bit(0, CR0_VECTOR_BIT); - if (test_facility(130)) - get_lowcore()->machine_flags |= MACHINE_FLAG_NX; - if (test_facility(133)) - get_lowcore()->machine_flags |= MACHINE_FLAG_GS; - if (test_facility(139) && (tod_clock_base.tod >> 63)) { - /* Enabled signed clock comparator comparisons */ - get_lowcore()->machine_flags |= MACHINE_FLAG_SCC; - clock_comparator_max = -1ULL >> 1; - system_ctl_set_bit(0, CR0_CLOCK_COMPARATOR_SIGN_BIT); - } - if (IS_ENABLED(CONFIG_PCI) && test_facility(153)) { - get_lowcore()->machine_flags |= MACHINE_FLAG_PCI_MIO; - /* the control bit is set during PCI initialization */ - } - if (test_facility(194)) - get_lowcore()->machine_flags |= MACHINE_FLAG_RDP; - if (test_facility(85)) - get_lowcore()->machine_flags |= MACHINE_FLAG_SEQ_INSN; -} - static inline void save_vector_registers(void) { #ifdef CONFIG_CRASH_DUMP - if (test_facility(129)) + if (cpu_has_vx()) save_vx_regs(boot_cpu_vector_save_area); #endif } @@ -307,17 +227,13 @@ static void __init sort_amode31_extable(void) void __init startup_init(void) { kasan_early_init(); - reset_tod_clock(); time_early_init(); init_kernel_storage_key(); lockdep_off(); sort_amode31_extable(); setup_lowcore_early(); - detect_machine_type(); setup_arch_string(); setup_boot_command_line(); - detect_diag9c(); - detect_machine_facilities(); save_vector_registers(); setup_topology(); sclp_early_detect(); diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index 960c08700cf6..75b0fbb236d0 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -29,6 +29,7 @@ #include <asm/nmi.h> #include <asm/nospec-insn.h> #include <asm/lowcore.h> +#include <asm/machine.h> _LPP_OFFSET = __LC_LPP @@ -44,7 +45,7 @@ _LPP_OFFSET = __LC_LPP ALTERNATIVE_2 "b \lpswe;nopr", \ ".insn siy,0xeb0000000071,\address,0", ALT_FACILITY(193), \ __stringify(.insn siy,0xeb0000000071,LOWCORE_ALT_ADDRESS+\address,0), \ - ALT_LOWCORE + ALT_FEATURE(MFEATURE_LOWCORE) .endm .macro MBEAR reg, lowcore @@ -52,16 +53,7 @@ _LPP_OFFSET = __LC_LPP ALT_FACILITY(193) .endm - .macro CHECK_STACK savearea, lowcore -#ifdef CONFIG_CHECK_STACK - tml %r15,THREAD_SIZE - CONFIG_STACK_GUARD - la %r14,\savearea(\lowcore) - jz stack_overflow -#endif - .endm - .macro CHECK_VMAP_STACK savearea, lowcore, oklabel -#ifdef CONFIG_VMAP_STACK lgr %r14,%r15 nill %r14,0x10000 - THREAD_SIZE oill %r14,STACK_INIT_OFFSET @@ -76,10 +68,7 @@ _LPP_OFFSET = __LC_LPP clg %r14,__LC_RESTART_STACK(\lowcore) je \oklabel la %r14,\savearea(\lowcore) - j stack_overflow -#else - j \oklabel -#endif + j stack_invalid .endm /* @@ -127,7 +116,7 @@ _LPP_OFFSET = __LC_LPP .macro SIEEXIT sie_control,lowcore lg %r9,\sie_control # get control block pointer ni __SIE_PROG0C+3(%r9),0xfe # no longer in SIE - lctlg %c1,%c1,__LC_KERNEL_ASCE(\lowcore) # load primary asce + lctlg %c1,%c1,__LC_USER_ASCE(\lowcore) # load primary asce lg %r9,__LC_CURRENT(\lowcore) mvi __TI_sie(%r9),0 larl %r9,sie_exit # skip forward to sie_exit @@ -135,7 +124,7 @@ _LPP_OFFSET = __LC_LPP #endif .macro STACKLEAK_ERASE -#ifdef CONFIG_GCC_PLUGIN_STACKLEAK +#ifdef CONFIG_KSTACK_ERASE brasl %r14,stackleak_erase_on_task_stack #endif .endm @@ -219,7 +208,7 @@ SYM_FUNC_START(__sie64a) lg %r14,__SF_SIE_CONTROL(%r15) # get control block pointer ni __SIE_PROG0C+3(%r14),0xfe # no longer in SIE GET_LC %r14 - lctlg %c1,%c1,__LC_KERNEL_ASCE(%r14) # load primary asce + lctlg %c1,%c1,__LC_USER_ASCE(%r14) # load primary asce lg %r14,__LC_CURRENT(%r14) mvi __TI_sie(%r14),0 SYM_INNER_LABEL(sie_exit, SYM_L_GLOBAL) @@ -251,7 +240,6 @@ SYM_CODE_START(system_call) lghi %r14,0 .Lsysc_per: STBEAR __LC_LAST_BREAK(%r13) - lctlg %c1,%c1,__LC_KERNEL_ASCE(%r13) lg %r15,__LC_KERNEL_STACK(%r13) xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) stmg %r0,%r7,STACK_FRAME_OVERHEAD+__PT_R0(%r15) @@ -272,7 +260,6 @@ SYM_CODE_START(system_call) lgr %r3,%r14 brasl %r14,__do_syscall STACKLEAK_ERASE - lctlg %c1,%c1,__LC_USER_ASCE(%r13) mvc __LC_RETURN_PSW(16,%r13),STACK_FRAME_OVERHEAD+__PT_PSW(%r15) BPON LBEAR STACK_FRAME_OVERHEAD+__PT_LAST_BREAK(%r15) @@ -289,7 +276,6 @@ SYM_CODE_START(ret_from_fork) brasl %r14,__ret_from_fork STACKLEAK_ERASE GET_LC %r13 - lctlg %c1,%c1,__LC_USER_ASCE(%r13) mvc __LC_RETURN_PSW(16,%r13),STACK_FRAME_OVERHEAD+__PT_PSW(%r15) BPON LBEAR STACK_FRAME_OVERHEAD+__PT_LAST_BREAK(%r15) @@ -310,10 +296,7 @@ SYM_CODE_START(pgm_check_handler) lmg %r8,%r9,__LC_PGM_OLD_PSW(%r13) xgr %r10,%r10 tmhh %r8,0x0001 # coming from user space? - jno .Lpgm_skip_asce - lctlg %c1,%c1,__LC_KERNEL_ASCE(%r13) - j 3f # -> fault in user space -.Lpgm_skip_asce: + jo 3f # -> fault in user space #if IS_ENABLED(CONFIG_KVM) lg %r11,__LC_CURRENT(%r13) tm __TI_sie(%r11),0xff @@ -326,9 +309,8 @@ SYM_CODE_START(pgm_check_handler) jnz 2f # -> enabled, can't be a double fault tm __LC_PGM_ILC+3(%r13),0x80 # check for per exception jnz .Lpgm_svcper # -> single stepped svc -2: CHECK_STACK __LC_SAVE_AREA,%r13 - aghi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE) - # CHECK_VMAP_STACK branches to stack_overflow or 4f +2: aghi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE) + # CHECK_VMAP_STACK branches to stack_invalid or 4f CHECK_VMAP_STACK __LC_SAVE_AREA,%r13,4f 3: lg %r15,__LC_KERNEL_STACK(%r13) 4: la %r11,STACK_FRAME_OVERHEAD(%r15) @@ -352,7 +334,6 @@ SYM_CODE_START(pgm_check_handler) tmhh %r8,0x0001 # returning to user space? jno .Lpgm_exit_kernel STACKLEAK_ERASE - lctlg %c1,%c1,__LC_USER_ASCE(%r13) BPON stpt __LC_EXIT_TIMER(%r13) .Lpgm_exit_kernel: @@ -394,11 +375,9 @@ SYM_CODE_START(\name) BPENTER __SF_SIE_FLAGS(%r15),_TIF_ISOLATE_BP_GUEST SIEEXIT __SF_SIE_CONTROL(%r15),%r13 #endif -0: CHECK_STACK __LC_SAVE_AREA,%r13 - aghi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE) +0: aghi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE) j 2f -1: lctlg %c1,%c1,__LC_KERNEL_ASCE(%r13) - lg %r15,__LC_KERNEL_STACK(%r13) +1: lg %r15,__LC_KERNEL_STACK(%r13) 2: xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) la %r11,STACK_FRAME_OVERHEAD(%r15) stmg %r0,%r7,__PT_R0(%r11) @@ -421,7 +400,6 @@ SYM_CODE_START(\name) tmhh %r8,0x0001 # returning to user ? jno 2f STACKLEAK_ERASE - lctlg %c1,%c1,__LC_USER_ASCE(%r13) BPON stpt __LC_EXIT_TIMER(%r13) 2: LBEAR __PT_LAST_BREAK(%r11) @@ -481,7 +459,7 @@ SYM_CODE_START(mcck_int_handler) clgrjl %r9,%r14, 4f larl %r14,.Lsie_leave clgrjhe %r9,%r14, 4f - lg %r10,__LC_PCPU + lg %r10,__LC_PCPU(%r13) oi __PCPU_FLAGS+7(%r10), _CIF_MCCK_GUEST 4: BPENTER __SF_SIE_FLAGS(%r15),_TIF_ISOLATE_BP_GUEST SIEEXIT __SF_SIE_CONTROL(%r15),%r13 @@ -489,8 +467,6 @@ SYM_CODE_START(mcck_int_handler) .Lmcck_user: lg %r15,__LC_MCCK_STACK(%r13) la %r11,STACK_FRAME_OVERHEAD(%r15) - stctg %c1,%c1,__PT_CR1(%r11) - lctlg %c1,%c1,__LC_KERNEL_ASCE(%r13) xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) lay %r14,__LC_GPREGS_SAVE_AREA(%r13) mvc __PT_R0(128,%r11),0(%r14) @@ -508,7 +484,6 @@ SYM_CODE_START(mcck_int_handler) xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) lgr %r2,%r11 # pass pointer to pt_regs brasl %r14,s390_do_machine_check - lctlg %c1,%c1,__PT_CR1(%r11) lmg %r0,%r10,__PT_R0(%r11) mvc __LC_RETURN_MCCK_PSW(16,%r13),__PT_PSW(%r11) # move return PSW tm __LC_RETURN_MCCK_PSW+1(%r13),0x01 # returning to user ? @@ -603,25 +578,24 @@ SYM_CODE_END(early_pgm_check_handler) .section .kprobes.text, "ax" -#if defined(CONFIG_CHECK_STACK) || defined(CONFIG_VMAP_STACK) /* - * The synchronous or the asynchronous stack overflowed. We are dead. + * The synchronous or the asynchronous stack pointer is invalid. We are dead. * No need to properly save the registers, we are going to panic anyway. * Setup a pt_regs so that show_trace can provide a good call trace. */ -SYM_CODE_START(stack_overflow) +SYM_CODE_START(stack_invalid) GET_LC %r15 lg %r15,__LC_NODAT_STACK(%r15) # change to panic stack la %r11,STACK_FRAME_OVERHEAD(%r15) stmg %r0,%r7,__PT_R0(%r11) stmg %r8,%r9,__PT_PSW(%r11) mvc __PT_R8(64,%r11),0(%r14) - stg %r10,__PT_ORIG_GPR2(%r11) # store last break to orig_gpr2 + GET_LC %r2 + mvc __PT_ORIG_GPR2(8,%r11),__LC_PGM_LAST_BREAK(%r2) xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) lgr %r2,%r11 # pass pointer to pt_regs - jg kernel_stack_overflow -SYM_CODE_END(stack_overflow) -#endif + jg kernel_stack_invalid +SYM_CODE_END(stack_invalid) .section .data, "aw" .balign 4 @@ -637,7 +611,7 @@ SYM_DATA_END(daton_psw) .balign 8 #define SYSCALL(esame,emu) .quad __s390x_ ## esame SYM_DATA_START(sys_call_table) -#include "asm/syscall_table.h" +#include <asm/syscall_table.h> SYM_DATA_END(sys_call_table) #undef SYSCALL @@ -645,7 +619,7 @@ SYM_DATA_END(sys_call_table) #define SYSCALL(esame,emu) .quad __s390_ ## emu SYM_DATA_START(sys_call_table_emu) -#include "asm/syscall_table.h" +#include <asm/syscall_table.h> SYM_DATA_END(sys_call_table_emu) #undef SYSCALL #endif diff --git a/arch/s390/kernel/entry.h b/arch/s390/kernel/entry.h index 21969520f947..dd55cc6bbc28 100644 --- a/arch/s390/kernel/entry.h +++ b/arch/s390/kernel/entry.h @@ -31,7 +31,7 @@ void do_secure_storage_access(struct pt_regs *regs); void do_non_secure_storage_access(struct pt_regs *regs); void do_secure_storage_violation(struct pt_regs *regs); void do_report_trap(struct pt_regs *regs, int si_signo, int si_code, char *str); -void kernel_stack_overflow(struct pt_regs * regs); +void kernel_stack_invalid(struct pt_regs *regs); void handle_signal32(struct ksignal *ksig, sigset_t *oldset, struct pt_regs *regs); @@ -41,7 +41,6 @@ void do_restart(void *arg); void __init startup_init(void); void die(struct pt_regs *regs, const char *str); int setup_profiling_timer(unsigned int multiplier); -unsigned long prepare_ftrace_return(unsigned long parent, unsigned long sp, unsigned long ip); struct s390_mmap_arg_struct; struct fadvise64_64_args; diff --git a/arch/s390/kernel/facility.c b/arch/s390/kernel/facility.c index f02127219a27..d028b0be5c1d 100644 --- a/arch/s390/kernel/facility.c +++ b/arch/s390/kernel/facility.c @@ -3,6 +3,7 @@ * Copyright IBM Corp. 2023 */ +#include <linux/export.h> #include <asm/facility.h> unsigned int stfle_size(void) diff --git a/arch/s390/kernel/fpu.c b/arch/s390/kernel/fpu.c index 6f2e87920288..03a8973aec3c 100644 --- a/arch/s390/kernel/fpu.c +++ b/arch/s390/kernel/fpu.c @@ -5,6 +5,8 @@ * Copyright IBM Corp. 2015 * Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com> */ + +#include <linux/export.h> #include <linux/kernel.h> #include <linux/cpu.h> #include <linux/sched.h> diff --git a/arch/s390/kernel/ftrace.c b/arch/s390/kernel/ftrace.c index 51439a71e392..e94bb98f5231 100644 --- a/arch/s390/kernel/ftrace.c +++ b/arch/s390/kernel/ftrace.c @@ -13,6 +13,7 @@ #include <linux/kernel.h> #include <linux/types.h> #include <linux/kmsan-checks.h> +#include <linux/cpufeature.h> #include <linux/kprobes.h> #include <linux/execmem.h> #include <trace/syscall.h> @@ -69,7 +70,7 @@ static const char *ftrace_shared_hotpatch_trampoline(const char **end) bool ftrace_need_init_nop(void) { - return !MACHINE_HAS_SEQ_INSN; + return !cpu_has_seq_insn(); } int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec) @@ -189,7 +190,7 @@ static int ftrace_modify_trampoline_call(struct dyn_ftrace *rec, int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr, unsigned long addr) { - if (MACHINE_HAS_SEQ_INSN) + if (cpu_has_seq_insn()) return ftrace_patch_branch_insn(rec->ip, old_addr, addr); else return ftrace_modify_trampoline_call(rec, old_addr, addr); @@ -213,8 +214,8 @@ static int ftrace_patch_branch_mask(void *addr, u16 expected, bool enable) int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec, unsigned long addr) { - /* Expect brcl 0xf,... for the !MACHINE_HAS_SEQ_INSN case */ - if (MACHINE_HAS_SEQ_INSN) + /* Expect brcl 0xf,... for the !cpu_has_seq_insn() case */ + if (cpu_has_seq_insn()) return ftrace_patch_branch_insn(rec->ip, addr, 0); else return ftrace_patch_branch_mask((void *)rec->ip, 0xc0f4, false); @@ -234,7 +235,7 @@ static int ftrace_make_trampoline_call(struct dyn_ftrace *rec, unsigned long add int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) { - if (MACHINE_HAS_SEQ_INSN) + if (cpu_has_seq_insn()) return ftrace_patch_branch_insn(rec->ip, 0, addr); else return ftrace_make_trampoline_call(rec, addr); @@ -261,43 +262,19 @@ void ftrace_arch_code_modify_post_process(void) } #ifdef CONFIG_FUNCTION_GRAPH_TRACER -/* - * Hook the return address and push it in the stack of return addresses - * in current thread info. - */ -unsigned long prepare_ftrace_return(unsigned long ra, unsigned long sp, - unsigned long ip) -{ - if (unlikely(ftrace_graph_is_dead())) - goto out; - if (unlikely(atomic_read(¤t->tracing_graph_pause))) - goto out; - ip -= MCOUNT_INSN_SIZE; - if (!function_graph_enter(ra, ip, 0, (void *) sp)) - ra = (unsigned long) return_to_handler; -out: - return ra; -} -NOKPROBE_SYMBOL(prepare_ftrace_return); -/* - * Patch the kernel code at ftrace_graph_caller location. The instruction - * there is branch relative on condition. To enable the ftrace graph code - * block, we simply patch the mask field of the instruction to zero and - * turn the instruction into a nop. - * To disable the ftrace graph code the mask field will be patched to - * all ones, which turns the instruction into an unconditional branch. - */ -int ftrace_enable_ftrace_graph_caller(void) +void ftrace_graph_func(unsigned long ip, unsigned long parent_ip, + struct ftrace_ops *op, struct ftrace_regs *fregs) { - /* Expect brc 0xf,... */ - return ftrace_patch_branch_mask(ftrace_graph_caller, 0xa7f4, false); -} + unsigned long *parent = &arch_ftrace_regs(fregs)->regs.gprs[14]; + unsigned long sp = arch_ftrace_regs(fregs)->regs.gprs[15]; -int ftrace_disable_ftrace_graph_caller(void) -{ - /* Expect brc 0x0,... */ - return ftrace_patch_branch_mask(ftrace_graph_caller, 0xa704, true); + if (unlikely(ftrace_graph_is_dead())) + return; + if (unlikely(atomic_read(¤t->tracing_graph_pause))) + return; + if (!function_graph_enter_regs(*parent, ip, 0, (unsigned long *)sp, fregs)) + *parent = (unsigned long)&return_to_handler; } #endif /* CONFIG_FUNCTION_GRAPH_TRACER */ diff --git a/arch/s390/kernel/guarded_storage.c b/arch/s390/kernel/guarded_storage.c index 0b68168d9566..cf26d7a37425 100644 --- a/arch/s390/kernel/guarded_storage.c +++ b/arch/s390/kernel/guarded_storage.c @@ -4,6 +4,7 @@ * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com> */ +#include <linux/cpufeature.h> #include <linux/kernel.h> #include <linux/syscalls.h> #include <linux/signal.h> @@ -109,7 +110,7 @@ static int gs_broadcast(void) SYSCALL_DEFINE2(s390_guarded_storage, int, command, struct gs_cb __user *, gs_cb) { - if (!MACHINE_HAS_GS) + if (!cpu_has_gs()) return -EOPNOTSUPP; switch (command) { case GS_ENABLE: diff --git a/arch/s390/kernel/head64.S b/arch/s390/kernel/head64.S index 396034b2fe67..7edb9ded199c 100644 --- a/arch/s390/kernel/head64.S +++ b/arch/s390/kernel/head64.S @@ -18,12 +18,10 @@ __HEAD SYM_CODE_START(startup_continue) - larl %r1,tod_clock_base - GET_LC %r2 - mvc 0(16,%r1),__LC_BOOT_CLOCK(%r2) # # Setup stack # + GET_LC %r2 larl %r14,init_task stg %r14,__LC_CURRENT(%r2) larl %r15,init_thread_union+STACK_INIT_OFFSET diff --git a/arch/s390/kernel/hiperdispatch.c b/arch/s390/kernel/hiperdispatch.c index 2a99a216ab62..e7b66d046e8d 100644 --- a/arch/s390/kernel/hiperdispatch.c +++ b/arch/s390/kernel/hiperdispatch.c @@ -45,6 +45,7 @@ * therefore delaying the throughput loss caused by using SMP threads. */ +#include <linux/cpufeature.h> #include <linux/cpumask.h> #include <linux/debugfs.h> #include <linux/device.h> @@ -87,7 +88,7 @@ static DECLARE_DELAYED_WORK(hd_capacity_work, hd_capacity_work_fn); static int hd_set_hiperdispatch_mode(int enable) { - if (!MACHINE_HAS_TOPOLOGY) + if (!cpu_has_topology()) enable = 0; if (hd_enabled == enable) return 0; @@ -292,7 +293,7 @@ static int hiperdispatch_ctl_handler(const struct ctl_table *ctl, int write, return 0; } -static struct ctl_table hiperdispatch_ctl_table[] = { +static const struct ctl_table hiperdispatch_ctl_table[] = { { .procname = "hiperdispatch", .mode = 0644, diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c index 7d12a1305fc9..961a3d60a4dd 100644 --- a/arch/s390/kernel/ipl.c +++ b/arch/s390/kernel/ipl.c @@ -22,6 +22,7 @@ #include <linux/debug_locks.h> #include <linux/vmalloc.h> #include <asm/asm-extable.h> +#include <asm/machine.h> #include <asm/diag.h> #include <asm/ipl.h> #include <asm/smp.h> @@ -185,7 +186,7 @@ static inline int __diag308(unsigned long subcode, unsigned long addr) r1.even = addr; r1.odd = 0; - asm volatile( + asm_inline volatile( " diag %[r1],%[subcode],0x308\n" "0: nopr %%r7\n" EX_TABLE(0b,0b) @@ -269,7 +270,7 @@ static ssize_t sys_##_prefix##_##_name##_store(struct kobject *kobj, \ { \ if (len >= sizeof(_value)) \ return -E2BIG; \ - len = strscpy(_value, buf, sizeof(_value)); \ + len = strscpy(_value, buf); \ if ((ssize_t)len < 0) \ return len; \ strim(_value); \ @@ -280,58 +281,58 @@ static struct kobj_attribute sys_##_prefix##_##_name##_attr = \ sys_##_prefix##_##_name##_show, \ sys_##_prefix##_##_name##_store) -#define IPL_ATTR_SCP_DATA_SHOW_FN(_prefix, _ipl_block) \ -static ssize_t sys_##_prefix##_scp_data_show(struct file *filp, \ - struct kobject *kobj, \ - struct bin_attribute *attr, \ - char *buf, loff_t off, \ - size_t count) \ -{ \ - size_t size = _ipl_block.scp_data_len; \ - void *scp_data = _ipl_block.scp_data; \ - \ - return memory_read_from_buffer(buf, count, &off, \ - scp_data, size); \ +#define IPL_ATTR_SCP_DATA_SHOW_FN(_prefix, _ipl_block) \ +static ssize_t sys_##_prefix##_scp_data_show(struct file *filp, \ + struct kobject *kobj, \ + const struct bin_attribute *attr, \ + char *buf, loff_t off, \ + size_t count) \ +{ \ + size_t size = _ipl_block.scp_data_len; \ + void *scp_data = _ipl_block.scp_data; \ + \ + return memory_read_from_buffer(buf, count, &off, \ + scp_data, size); \ } #define IPL_ATTR_SCP_DATA_STORE_FN(_prefix, _ipl_block_hdr, _ipl_block, _ipl_bp_len, _ipl_bp0_len)\ -static ssize_t sys_##_prefix##_scp_data_store(struct file *filp, \ - struct kobject *kobj, \ - struct bin_attribute *attr, \ - char *buf, loff_t off, \ - size_t count) \ -{ \ - size_t scpdata_len = count; \ - size_t padding; \ - \ - if (off) \ - return -EINVAL; \ - \ - memcpy(_ipl_block.scp_data, buf, count); \ - if (scpdata_len % 8) { \ - padding = 8 - (scpdata_len % 8); \ - memset(_ipl_block.scp_data + scpdata_len, \ - 0, padding); \ - scpdata_len += padding; \ - } \ - \ - _ipl_block_hdr.len = _ipl_bp_len + scpdata_len; \ - _ipl_block.len = _ipl_bp0_len + scpdata_len; \ - _ipl_block.scp_data_len = scpdata_len; \ - \ - return count; \ +static ssize_t sys_##_prefix##_scp_data_store(struct file *filp, \ + struct kobject *kobj, \ + const struct bin_attribute *attr, \ + char *buf, loff_t off, \ + size_t count) \ +{ \ + size_t scpdata_len = count; \ + size_t padding; \ + \ + if (off) \ + return -EINVAL; \ + \ + memcpy(_ipl_block.scp_data, buf, count); \ + if (scpdata_len % 8) { \ + padding = 8 - (scpdata_len % 8); \ + memset(_ipl_block.scp_data + scpdata_len, \ + 0, padding); \ + scpdata_len += padding; \ + } \ + \ + _ipl_block_hdr.len = _ipl_bp_len + scpdata_len; \ + _ipl_block.len = _ipl_bp0_len + scpdata_len; \ + _ipl_block.scp_data_len = scpdata_len; \ + \ + return count; \ } #define DEFINE_IPL_ATTR_SCP_DATA_RO(_prefix, _ipl_block, _size) \ IPL_ATTR_SCP_DATA_SHOW_FN(_prefix, _ipl_block) \ -static struct bin_attribute sys_##_prefix##_scp_data_attr = \ +static const struct bin_attribute sys_##_prefix##_scp_data_attr = \ __BIN_ATTR(scp_data, 0444, sys_##_prefix##_scp_data_show, \ NULL, _size) #define DEFINE_IPL_ATTR_SCP_DATA_RW(_prefix, _ipl_block_hdr, _ipl_block, _ipl_bp_len, _ipl_bp0_len, _size)\ IPL_ATTR_SCP_DATA_SHOW_FN(_prefix, _ipl_block) \ IPL_ATTR_SCP_DATA_STORE_FN(_prefix, _ipl_block_hdr, _ipl_block, _ipl_bp_len, _ipl_bp0_len)\ -static struct bin_attribute sys_##_prefix##_scp_data_attr = \ +static const struct bin_attribute sys_##_prefix##_scp_data_attr = \ __BIN_ATTR(scp_data, 0644, sys_##_prefix##_scp_data_show, \ sys_##_prefix##_scp_data_store, _size) @@ -434,19 +435,19 @@ static struct kobj_attribute sys_ipl_device_attr = __ATTR(device, 0444, sys_ipl_device_show, NULL); static ssize_t sys_ipl_parameter_read(struct file *filp, struct kobject *kobj, - struct bin_attribute *attr, char *buf, + const struct bin_attribute *attr, char *buf, loff_t off, size_t count) { return memory_read_from_buffer(buf, count, &off, &ipl_block, ipl_block.hdr.len); } -static struct bin_attribute sys_ipl_parameter_attr = +static const struct bin_attribute sys_ipl_parameter_attr = __BIN_ATTR(binary_parameter, 0444, sys_ipl_parameter_read, NULL, PAGE_SIZE); DEFINE_IPL_ATTR_SCP_DATA_RO(ipl_fcp, ipl_block.fcp, PAGE_SIZE); -static struct bin_attribute *ipl_fcp_bin_attrs[] = { +static const struct bin_attribute *const ipl_fcp_bin_attrs[] = { &sys_ipl_parameter_attr, &sys_ipl_fcp_scp_data_attr, NULL, @@ -454,7 +455,7 @@ static struct bin_attribute *ipl_fcp_bin_attrs[] = { DEFINE_IPL_ATTR_SCP_DATA_RO(ipl_nvme, ipl_block.nvme, PAGE_SIZE); -static struct bin_attribute *ipl_nvme_bin_attrs[] = { +static const struct bin_attribute *const ipl_nvme_bin_attrs[] = { &sys_ipl_parameter_attr, &sys_ipl_nvme_scp_data_attr, NULL, @@ -462,7 +463,7 @@ static struct bin_attribute *ipl_nvme_bin_attrs[] = { DEFINE_IPL_ATTR_SCP_DATA_RO(ipl_eckd, ipl_block.eckd, PAGE_SIZE); -static struct bin_attribute *ipl_eckd_bin_attrs[] = { +static const struct bin_attribute *const ipl_eckd_bin_attrs[] = { &sys_ipl_parameter_attr, &sys_ipl_eckd_scp_data_attr, NULL, @@ -593,7 +594,7 @@ static struct attribute *ipl_fcp_attrs[] = { NULL, }; -static struct attribute_group ipl_fcp_attr_group = { +static const struct attribute_group ipl_fcp_attr_group = { .attrs = ipl_fcp_attrs, .bin_attrs = ipl_fcp_bin_attrs, }; @@ -607,7 +608,7 @@ static struct attribute *ipl_nvme_attrs[] = { NULL, }; -static struct attribute_group ipl_nvme_attr_group = { +static const struct attribute_group ipl_nvme_attr_group = { .attrs = ipl_nvme_attrs, .bin_attrs = ipl_nvme_bin_attrs, }; @@ -620,7 +621,7 @@ static struct attribute *ipl_eckd_attrs[] = { NULL, }; -static struct attribute_group ipl_eckd_attr_group = { +static const struct attribute_group ipl_eckd_attr_group = { .attrs = ipl_eckd_attrs, .bin_attrs = ipl_eckd_bin_attrs, }; @@ -640,11 +641,11 @@ static struct attribute *ipl_ccw_attrs_lpar[] = { NULL, }; -static struct attribute_group ipl_ccw_attr_group_vm = { +static const struct attribute_group ipl_ccw_attr_group_vm = { .attrs = ipl_ccw_attrs_vm, }; -static struct attribute_group ipl_ccw_attr_group_lpar = { +static const struct attribute_group ipl_ccw_attr_group_lpar = { .attrs = ipl_ccw_attrs_lpar }; @@ -655,7 +656,7 @@ static struct attribute *ipl_common_attrs[] = { NULL, }; -static struct attribute_group ipl_common_attr_group = { +static const struct attribute_group ipl_common_attr_group = { .attrs = ipl_common_attrs, }; @@ -685,7 +686,7 @@ static int __init ipl_init(void) goto out; switch (ipl_info.type) { case IPL_TYPE_CCW: - if (MACHINE_IS_VM) + if (machine_is_vm()) rc = sysfs_create_group(&ipl_kset->kobj, &ipl_ccw_attr_group_vm); else @@ -808,7 +809,7 @@ DEFINE_IPL_ATTR_SCP_DATA_RW(reipl_fcp, reipl_block_fcp->hdr, IPL_BP_FCP_LEN, IPL_BP0_FCP_LEN, DIAG308_SCPDATA_SIZE); -static struct bin_attribute *reipl_fcp_bin_attrs[] = { +static const struct bin_attribute *const reipl_fcp_bin_attrs[] = { &sys_reipl_fcp_scp_data_attr, NULL, }; @@ -917,7 +918,7 @@ static struct attribute *reipl_fcp_attrs[] = { NULL, }; -static struct attribute_group reipl_fcp_attr_group = { +static const struct attribute_group reipl_fcp_attr_group = { .attrs = reipl_fcp_attrs, .bin_attrs = reipl_fcp_bin_attrs, }; @@ -932,7 +933,7 @@ DEFINE_IPL_ATTR_SCP_DATA_RW(reipl_nvme, reipl_block_nvme->hdr, IPL_BP_NVME_LEN, IPL_BP0_NVME_LEN, DIAG308_SCPDATA_SIZE); -static struct bin_attribute *reipl_nvme_bin_attrs[] = { +static const struct bin_attribute *const reipl_nvme_bin_attrs[] = { &sys_reipl_nvme_scp_data_attr, NULL, }; @@ -955,7 +956,7 @@ static struct attribute *reipl_nvme_attrs[] = { NULL, }; -static struct attribute_group reipl_nvme_attr_group = { +static const struct attribute_group reipl_nvme_attr_group = { .attrs = reipl_nvme_attrs, .bin_attrs = reipl_nvme_bin_attrs }; @@ -1031,7 +1032,7 @@ DEFINE_IPL_ATTR_SCP_DATA_RW(reipl_eckd, reipl_block_eckd->hdr, IPL_BP_ECKD_LEN, IPL_BP0_ECKD_LEN, DIAG308_SCPDATA_SIZE); -static struct bin_attribute *reipl_eckd_bin_attrs[] = { +static const struct bin_attribute *const reipl_eckd_bin_attrs[] = { &sys_reipl_eckd_scp_data_attr, NULL, }; @@ -1048,7 +1049,7 @@ static struct attribute *reipl_eckd_attrs[] = { NULL, }; -static struct attribute_group reipl_eckd_attr_group = { +static const struct attribute_group reipl_eckd_attr_group = { .attrs = reipl_eckd_attrs, .bin_attrs = reipl_eckd_bin_attrs }; @@ -1272,7 +1273,7 @@ static void reipl_block_ccw_fill_parms(struct ipl_parameter_block *ipb) ipb->ccw.flags = IPL_PB0_FLAG_LOADPARM; /* VM PARM */ - if (MACHINE_IS_VM && ipl_block_valid && + if (machine_is_vm() && ipl_block_valid && (ipl_block.ccw.vm_flags & IPL_PB0_CCW_VM_FLAG_VP)) { ipb->ccw.vm_flags |= IPL_PB0_CCW_VM_FLAG_VP; @@ -1286,7 +1287,7 @@ static int __init reipl_nss_init(void) { int rc; - if (!MACHINE_IS_VM) + if (!machine_is_vm()) return 0; reipl_block_nss = (void *) get_zeroed_page(GFP_KERNEL); @@ -1311,8 +1312,8 @@ static int __init reipl_ccw_init(void) return -ENOMEM; rc = sysfs_create_group(&reipl_kset->kobj, - MACHINE_IS_VM ? &reipl_ccw_attr_group_vm - : &reipl_ccw_attr_group_lpar); + machine_is_vm() ? &reipl_ccw_attr_group_vm + : &reipl_ccw_attr_group_lpar); if (rc) return rc; @@ -1587,12 +1588,12 @@ static struct attribute *dump_fcp_attrs[] = { NULL, }; -static struct bin_attribute *dump_fcp_bin_attrs[] = { +static const struct bin_attribute *const dump_fcp_bin_attrs[] = { &sys_dump_fcp_scp_data_attr, NULL, }; -static struct attribute_group dump_fcp_attr_group = { +static const struct attribute_group dump_fcp_attr_group = { .name = IPL_FCP_STR, .attrs = dump_fcp_attrs, .bin_attrs = dump_fcp_bin_attrs, @@ -1621,12 +1622,12 @@ static struct attribute *dump_nvme_attrs[] = { NULL, }; -static struct bin_attribute *dump_nvme_bin_attrs[] = { +static const struct bin_attribute *const dump_nvme_bin_attrs[] = { &sys_dump_nvme_scp_data_attr, NULL, }; -static struct attribute_group dump_nvme_attr_group = { +static const struct attribute_group dump_nvme_attr_group = { .name = IPL_NVME_STR, .attrs = dump_nvme_attrs, .bin_attrs = dump_nvme_bin_attrs, @@ -1655,12 +1656,12 @@ static struct attribute *dump_eckd_attrs[] = { NULL, }; -static struct bin_attribute *dump_eckd_bin_attrs[] = { +static const struct bin_attribute *const dump_eckd_bin_attrs[] = { &sys_dump_eckd_scp_data_attr, NULL, }; -static struct attribute_group dump_eckd_attr_group = { +static const struct attribute_group dump_eckd_attr_group = { .name = IPL_ECKD_STR, .attrs = dump_eckd_attrs, .bin_attrs = dump_eckd_bin_attrs, @@ -1987,7 +1988,7 @@ static void vmcmd_run(struct shutdown_trigger *trigger) static int vmcmd_init(void) { - if (!MACHINE_IS_VM) + if (!machine_is_vm()) return -EOPNOTSUPP; vmcmd_kset = kset_create_and_add("vmcmd", NULL, firmware_kobj); if (!vmcmd_kset) @@ -2248,26 +2249,28 @@ static int __init s390_ipl_init(void) __initcall(s390_ipl_init); -static void __init strncpy_skip_quote(char *dst, char *src, int n) +static void __init strscpy_skip_quote(char *dst, char *src, int n) { int sx, dx; - dx = 0; - for (sx = 0; src[sx] != 0; sx++) { + if (!n) + return; + for (sx = 0, dx = 0; src[sx]; sx++) { if (src[sx] == '"') continue; - dst[dx++] = src[sx]; - if (dx >= n) + dst[dx] = src[sx]; + if (dx + 1 == n) break; + dx++; } + dst[dx] = '\0'; } static int __init vmcmd_on_reboot_setup(char *str) { - if (!MACHINE_IS_VM) + if (!machine_is_vm()) return 1; - strncpy_skip_quote(vmcmd_on_reboot, str, VMCMD_MAX_SIZE); - vmcmd_on_reboot[VMCMD_MAX_SIZE] = 0; + strscpy_skip_quote(vmcmd_on_reboot, str, sizeof(vmcmd_on_reboot)); on_reboot_trigger.action = &vmcmd_action; return 1; } @@ -2275,10 +2278,9 @@ __setup("vmreboot=", vmcmd_on_reboot_setup); static int __init vmcmd_on_panic_setup(char *str) { - if (!MACHINE_IS_VM) + if (!machine_is_vm()) return 1; - strncpy_skip_quote(vmcmd_on_panic, str, VMCMD_MAX_SIZE); - vmcmd_on_panic[VMCMD_MAX_SIZE] = 0; + strscpy_skip_quote(vmcmd_on_panic, str, sizeof(vmcmd_on_panic)); on_panic_trigger.action = &vmcmd_action; return 1; } @@ -2286,10 +2288,9 @@ __setup("vmpanic=", vmcmd_on_panic_setup); static int __init vmcmd_on_halt_setup(char *str) { - if (!MACHINE_IS_VM) + if (!machine_is_vm()) return 1; - strncpy_skip_quote(vmcmd_on_halt, str, VMCMD_MAX_SIZE); - vmcmd_on_halt[VMCMD_MAX_SIZE] = 0; + strscpy_skip_quote(vmcmd_on_halt, str, sizeof(vmcmd_on_halt)); on_halt_trigger.action = &vmcmd_action; return 1; } @@ -2297,10 +2298,9 @@ __setup("vmhalt=", vmcmd_on_halt_setup); static int __init vmcmd_on_poff_setup(char *str) { - if (!MACHINE_IS_VM) + if (!machine_is_vm()) return 1; - strncpy_skip_quote(vmcmd_on_poff, str, VMCMD_MAX_SIZE); - vmcmd_on_poff[VMCMD_MAX_SIZE] = 0; + strscpy_skip_quote(vmcmd_on_poff, str, sizeof(vmcmd_on_poff)); on_poff_trigger.action = &vmcmd_action; return 1; } diff --git a/arch/s390/kernel/irq.c b/arch/s390/kernel/irq.c index ef7be599e1f7..bdf9c7cb5685 100644 --- a/arch/s390/kernel/irq.c +++ b/arch/s390/kernel/irq.c @@ -9,6 +9,7 @@ */ #include <linux/kernel_stat.h> +#include <linux/cpufeature.h> #include <linux/interrupt.h> #include <linux/seq_file.h> #include <linux/proc_fs.h> @@ -25,6 +26,7 @@ #include <asm/irq_regs.h> #include <asm/cputime.h> #include <asm/lowcore.h> +#include <asm/machine.h> #include <asm/irq.h> #include <asm/hw_irq.h> #include <asm/stacktrace.h> @@ -84,7 +86,6 @@ static const struct irq_class irqclass_sub_desc[] = { {.irq = IRQIO_C70, .name = "C70", .desc = "[I/O] 3270"}, {.irq = IRQIO_TAP, .name = "TAP", .desc = "[I/O] Tape"}, {.irq = IRQIO_VMR, .name = "VMR", .desc = "[I/O] Unit Record Devices"}, - {.irq = IRQIO_LCS, .name = "LCS", .desc = "[I/O] LCS"}, {.irq = IRQIO_CTC, .name = "CTC", .desc = "[I/O] CTC"}, {.irq = IRQIO_ADM, .name = "ADM", .desc = "[I/O] EADM Subchannel"}, {.irq = IRQIO_CSC, .name = "CSC", .desc = "[I/O] CHSC Subchannel"}, @@ -149,7 +150,7 @@ void noinstr do_io_irq(struct pt_regs *regs) if (user_mode(regs)) { update_timer_sys(); - if (static_branch_likely(&cpu_has_bear)) + if (cpu_has_bear()) current->thread.last_break = regs->last_break; } @@ -164,7 +165,7 @@ void noinstr do_io_irq(struct pt_regs *regs) do_irq_async(regs, THIN_INTERRUPT); else do_irq_async(regs, IO_INTERRUPT); - } while (MACHINE_IS_LPAR && irq_pending(regs)); + } while (machine_is_lpar() && irq_pending(regs)); irq_exit_rcu(); @@ -185,7 +186,7 @@ void noinstr do_ext_irq(struct pt_regs *regs) if (user_mode(regs)) { update_timer_sys(); - if (static_branch_likely(&cpu_has_bear)) + if (cpu_has_bear()) current->thread.last_break = regs->last_break; } diff --git a/arch/s390/kernel/kprobes.c b/arch/s390/kernel/kprobes.c index 8b80ea57125f..c450120b4474 100644 --- a/arch/s390/kernel/kprobes.c +++ b/arch/s390/kernel/kprobes.c @@ -13,6 +13,7 @@ #include <linux/ptrace.h> #include <linux/preempt.h> #include <linux/stop_machine.h> +#include <linux/cpufeature.h> #include <linux/kdebug.h> #include <linux/uaccess.h> #include <linux/extable.h> @@ -153,7 +154,7 @@ void arch_arm_kprobe(struct kprobe *p) { struct swap_insn_args args = {.p = p, .arm_kprobe = 1}; - if (MACHINE_HAS_SEQ_INSN) { + if (cpu_has_seq_insn()) { swap_instruction(&args); text_poke_sync(); } else { @@ -166,7 +167,7 @@ void arch_disarm_kprobe(struct kprobe *p) { struct swap_insn_args args = {.p = p, .arm_kprobe = 0}; - if (MACHINE_HAS_SEQ_INSN) { + if (cpu_has_seq_insn()) { swap_instruction(&args); text_poke_sync(); } else { diff --git a/arch/s390/kernel/lgr.c b/arch/s390/kernel/lgr.c index 6652e54cf3db..6d1ffca5f798 100644 --- a/arch/s390/kernel/lgr.c +++ b/arch/s390/kernel/lgr.c @@ -166,7 +166,7 @@ static struct timer_list lgr_timer; */ static void lgr_timer_set(void) { - mod_timer(&lgr_timer, jiffies + msecs_to_jiffies(LGR_TIMER_INTERVAL_SECS * MSEC_PER_SEC)); + mod_timer(&lgr_timer, jiffies + secs_to_jiffies(LGR_TIMER_INTERVAL_SECS)); } /* diff --git a/arch/s390/kernel/machine_kexec.c b/arch/s390/kernel/machine_kexec.c index 8f681ccfb83a..baeb3dcfc1c8 100644 --- a/arch/s390/kernel/machine_kexec.c +++ b/arch/s390/kernel/machine_kexec.c @@ -13,7 +13,9 @@ #include <linux/reboot.h> #include <linux/ftrace.h> #include <linux/debug_locks.h> +#include <linux/cpufeature.h> #include <asm/guarded_storage.h> +#include <asm/machine.h> #include <asm/pfault.h> #include <asm/cio.h> #include <asm/fpu.h> @@ -94,7 +96,7 @@ static noinline void __machine_kdump(void *image) mcesa = __va(get_lowcore()->mcesad & MCESA_ORIGIN_MASK); if (cpu_has_vx()) save_vx_regs((__vector128 *) mcesa->vector_save_area); - if (MACHINE_HAS_GS) { + if (cpu_has_gs()) { local_ctl_store(2, &cr2_old.reg); cr2_new = cr2_old; cr2_new.gse = 1; @@ -178,7 +180,7 @@ void arch_kexec_unprotect_crashkres(void) static int machine_kexec_prepare_kdump(void) { #ifdef CONFIG_CRASH_DUMP - if (MACHINE_IS_VM) + if (machine_is_vm()) diag10_range(PFN_DOWN(crashk_res.start), PFN_DOWN(crashk_res.end - crashk_res.start + 1)); return 0; diff --git a/arch/s390/kernel/mcount.S b/arch/s390/kernel/mcount.S index 7e267ef63a7f..1fec370fecf4 100644 --- a/arch/s390/kernel/mcount.S +++ b/arch/s390/kernel/mcount.S @@ -104,17 +104,6 @@ SYM_CODE_START(ftrace_common) lgr %r3,%r14 la %r5,STACK_FREGS(%r15) BASR_EX %r14,%r1 -#ifdef CONFIG_FUNCTION_GRAPH_TRACER -# The j instruction gets runtime patched to a nop instruction. -# See ftrace_enable_ftrace_graph_caller. -SYM_INNER_LABEL(ftrace_graph_caller, SYM_L_GLOBAL) - j .Lftrace_graph_caller_end - lmg %r2,%r3,(STACK_FREGS_PTREGS_GPRS+14*8)(%r15) - lg %r4,(STACK_FREGS_PTREGS_PSW+8)(%r15) - brasl %r14,prepare_ftrace_return - stg %r2,(STACK_FREGS_PTREGS_GPRS+14*8)(%r15) -.Lftrace_graph_caller_end: -#endif lg %r0,(STACK_FREGS_PTREGS_PSW+8)(%r15) #ifdef MARCH_HAS_Z196_FEATURES ltg %r1,STACK_FREGS_PTREGS_ORIG_GPR2(%r15) @@ -134,14 +123,14 @@ SYM_CODE_END(ftrace_common) SYM_FUNC_START(return_to_handler) stmg %r2,%r5,32(%r15) lgr %r1,%r15 - aghi %r15,-(STACK_FRAME_OVERHEAD+__FGRAPH_RET_SIZE) + # allocate ftrace_regs and stack frame for ftrace_return_to_handler + aghi %r15,-STACK_FRAME_SIZE_FREGS stg %r1,__SF_BACKCHAIN(%r15) - la %r3,STACK_FRAME_OVERHEAD(%r15) - stg %r1,__FGRAPH_RET_FP(%r3) - stg %r2,__FGRAPH_RET_GPR2(%r3) - lgr %r2,%r3 + stg %r2,(STACK_FREGS_PTREGS_GPRS+2*8)(%r15) + stg %r1,(STACK_FREGS_PTREGS_GPRS+15*8)(%r15) + la %r2,STACK_FRAME_OVERHEAD(%r15) brasl %r14,ftrace_return_to_handler - aghi %r15,STACK_FRAME_OVERHEAD+__FGRAPH_RET_SIZE + aghi %r15,STACK_FRAME_SIZE_FREGS lgr %r14,%r2 lmg %r2,%r5,32(%r15) BR_EX %r14 diff --git a/arch/s390/kernel/nmi.c b/arch/s390/kernel/nmi.c index fbd218b6fc8e..11f33243a23f 100644 --- a/arch/s390/kernel/nmi.c +++ b/arch/s390/kernel/nmi.c @@ -9,6 +9,8 @@ */ #include <linux/kernel_stat.h> +#include <linux/utsname.h> +#include <linux/cpufeature.h> #include <linux/init.h> #include <linux/errno.h> #include <linux/entry-common.h> @@ -20,7 +22,6 @@ #include <linux/module.h> #include <linux/sched/signal.h> #include <linux/kvm_host.h> -#include <linux/export.h> #include <asm/lowcore.h> #include <asm/ctlreg.h> #include <asm/fpu.h> @@ -45,7 +46,7 @@ static DEFINE_PER_CPU(struct mcck_struct, cpu_mcck); static inline int nmi_needs_mcesa(void) { - return cpu_has_vx() || MACHINE_HAS_GS; + return cpu_has_vx() || cpu_has_gs(); } /* @@ -61,7 +62,7 @@ void __init nmi_alloc_mcesa_early(u64 *mcesad) if (!nmi_needs_mcesa()) return; *mcesad = __pa(&boot_mcesa); - if (MACHINE_HAS_GS) + if (cpu_has_gs()) *mcesad |= ilog2(MCESA_MAX_SIZE); } @@ -73,14 +74,14 @@ int nmi_alloc_mcesa(u64 *mcesad) *mcesad = 0; if (!nmi_needs_mcesa()) return 0; - size = MACHINE_HAS_GS ? MCESA_MAX_SIZE : MCESA_MIN_SIZE; + size = cpu_has_gs() ? MCESA_MAX_SIZE : MCESA_MIN_SIZE; origin = kmalloc(size, GFP_KERNEL); if (!origin) return -ENOMEM; /* The pointer is stored with mcesa_bits ORed in */ kmemleak_not_leak(origin); *mcesad = __pa(origin); - if (MACHINE_HAS_GS) + if (cpu_has_gs()) *mcesad |= ilog2(MCESA_MAX_SIZE); return 0; } @@ -115,18 +116,82 @@ static __always_inline char *u64_to_hex(char *dest, u64 val) return dest; } +static notrace void nmi_print_info(void) +{ + struct lowcore *lc = get_lowcore(); + char message[100]; + char *ptr; + int i; + + ptr = nmi_puts(message, "Unrecoverable machine check, code: "); + ptr = u64_to_hex(ptr, lc->mcck_interruption_code); + ptr = nmi_puts(ptr, "\n"); + sclp_emergency_printk(message); + + ptr = nmi_puts(message, init_utsname()->release); + ptr = nmi_puts(ptr, "\n"); + sclp_emergency_printk(message); + + ptr = nmi_puts(message, arch_hw_string); + ptr = nmi_puts(ptr, "\n"); + sclp_emergency_printk(message); + + ptr = nmi_puts(message, "PSW: "); + ptr = u64_to_hex(ptr, lc->mcck_old_psw.mask); + ptr = nmi_puts(ptr, " "); + ptr = u64_to_hex(ptr, lc->mcck_old_psw.addr); + ptr = nmi_puts(ptr, " PFX: "); + ptr = u64_to_hex(ptr, (u64)get_lowcore()); + ptr = nmi_puts(ptr, "\n"); + sclp_emergency_printk(message); + + ptr = nmi_puts(message, "LBA: "); + ptr = u64_to_hex(ptr, lc->last_break_save_area); + ptr = nmi_puts(ptr, " EDC: "); + ptr = u64_to_hex(ptr, lc->external_damage_code); + ptr = nmi_puts(ptr, " FSA: "); + ptr = u64_to_hex(ptr, lc->failing_storage_address); + ptr = nmi_puts(ptr, "\n"); + sclp_emergency_printk(message); + + ptr = nmi_puts(message, "CRS:\n"); + sclp_emergency_printk(message); + ptr = message; + for (i = 0; i < 16; i++) { + ptr = u64_to_hex(ptr, lc->cregs_save_area[i].val); + ptr = nmi_puts(ptr, " "); + if ((i + 1) % 4 == 0) { + ptr = nmi_puts(ptr, "\n"); + sclp_emergency_printk(message); + ptr = message; + } + } + + ptr = nmi_puts(message, "GPRS:\n"); + sclp_emergency_printk(message); + ptr = message; + for (i = 0; i < 16; i++) { + ptr = u64_to_hex(ptr, lc->gpregs_save_area[i]); + ptr = nmi_puts(ptr, " "); + if ((i + 1) % 4 == 0) { + ptr = nmi_puts(ptr, "\n"); + sclp_emergency_printk(message); + ptr = message; + } + } + + ptr = nmi_puts(message, "System stopped\n"); + sclp_emergency_printk(message); +} + static notrace void s390_handle_damage(void) { struct lowcore *lc = get_lowcore(); union ctlreg0 cr0, cr0_new; - char message[100]; psw_t psw_save; - char *ptr; smp_emergency_stop(); diag_amode31_ops.diag308_reset(); - ptr = nmi_puts(message, "System stopped due to unrecoverable machine check, code: 0x"); - u64_to_hex(ptr, lc->mcck_interruption_code); /* * Disable low address protection and make machine check new PSW a @@ -140,7 +205,7 @@ static notrace void s390_handle_damage(void) psw_bits(lc->mcck_new_psw).io = 0; psw_bits(lc->mcck_new_psw).ext = 0; psw_bits(lc->mcck_new_psw).wait = 1; - sclp_emergency_printk(message); + nmi_print_info(); /* * Restore machine check new PSW and control register 0 to original diff --git a/arch/s390/kernel/numa.c b/arch/s390/kernel/numa.c index ddc1448ea2e1..2fc40f97c0ad 100644 --- a/arch/s390/kernel/numa.c +++ b/arch/s390/kernel/numa.c @@ -21,12 +21,8 @@ void __init numa_setup(void) nodes_clear(node_possible_map); node_set(0, node_possible_map); node_set_online(0); - for (nid = 0; nid < MAX_NUMNODES; nid++) { - NODE_DATA(nid) = memblock_alloc(sizeof(pg_data_t), 8); - if (!NODE_DATA(nid)) - panic("%s: Failed to allocate %zu bytes align=0x%x\n", - __func__, sizeof(pg_data_t), 8); - } + for (nid = 0; nid < MAX_NUMNODES; nid++) + NODE_DATA(nid) = memblock_alloc_or_panic(sizeof(pg_data_t), 8); NODE_DATA(0)->node_spanned_pages = memblock_end_of_DRAM() >> PAGE_SHIFT; NODE_DATA(0)->node_id = 0; } diff --git a/arch/s390/kernel/os_info.c b/arch/s390/kernel/os_info.c index 29080d6d5d8d..c2a468986212 100644 --- a/arch/s390/kernel/os_info.c +++ b/arch/s390/kernel/os_info.c @@ -18,6 +18,7 @@ #include <asm/physmem_info.h> #include <asm/maccess.h> #include <asm/asm-offsets.h> +#include <asm/sections.h> #include <asm/ipl.h> /* diff --git a/arch/s390/kernel/perf_cpum_cf.c b/arch/s390/kernel/perf_cpum_cf.c index b0bc68da6a11..4d09954ebf49 100644 --- a/arch/s390/kernel/perf_cpum_cf.c +++ b/arch/s390/kernel/perf_cpum_cf.c @@ -14,7 +14,6 @@ #include <linux/percpu.h> #include <linux/notifier.h> #include <linux/init.h> -#include <linux/export.h> #include <linux/miscdevice.h> #include <linux/perf_event.h> @@ -442,7 +441,7 @@ static void cpum_cf_make_setsize(enum cpumf_ctr_set ctrset) ctrset_size = 48; else if (cpumf_ctr_info.csvn >= 3 && cpumf_ctr_info.csvn <= 5) ctrset_size = 128; - else if (cpumf_ctr_info.csvn == 6 || cpumf_ctr_info.csvn == 7) + else if (cpumf_ctr_info.csvn >= 6 && cpumf_ctr_info.csvn <= 8) ctrset_size = 160; break; case CPUMF_CTR_SET_MT_DIAG: @@ -858,18 +857,13 @@ static int cpumf_pmu_event_type(struct perf_event *event) static int cpumf_pmu_event_init(struct perf_event *event) { unsigned int type = event->attr.type; - int err; + int err = -ENOENT; if (type == PERF_TYPE_HARDWARE || type == PERF_TYPE_RAW) err = __hw_perf_event_init(event, type); else if (event->pmu->type == type) /* Registered as unknown PMU */ err = __hw_perf_event_init(event, cpumf_pmu_event_type(event)); - else - return -ENOENT; - - if (unlikely(err) && event->destroy) - event->destroy(event); return err; } @@ -981,12 +975,10 @@ static int cfdiag_push_sample(struct perf_event *event, if (event->attr.sample_type & PERF_SAMPLE_RAW) { raw.frag.size = cpuhw->usedss; raw.frag.data = cpuhw->stop; - perf_sample_save_raw_data(&data, &raw); + perf_sample_save_raw_data(&data, event, &raw); } overflow = perf_event_overflow(event, &data, ®s); - if (overflow) - event->pmu->stop(event, 0); perf_event_update_userpage(event); return overflow; @@ -1819,8 +1811,6 @@ static int cfdiag_event_init(struct perf_event *event) event->destroy = hw_perf_event_destroy; err = cfdiag_event_init2(event); - if (unlikely(err)) - event->destroy(event); out: return err; } diff --git a/arch/s390/kernel/perf_cpum_cf_events.c b/arch/s390/kernel/perf_cpum_cf_events.c index e4a6bfc91080..7ace1f9e4ccf 100644 --- a/arch/s390/kernel/perf_cpum_cf_events.c +++ b/arch/s390/kernel/perf_cpum_cf_events.c @@ -237,7 +237,6 @@ CPUMF_EVENT_ATTR(cf_z14, TX_C_TABORT_NO_SPECIAL, 0x00f4); CPUMF_EVENT_ATTR(cf_z14, TX_C_TABORT_SPECIAL, 0x00f5); CPUMF_EVENT_ATTR(cf_z14, MT_DIAG_CYCLES_ONE_THR_ACTIVE, 0x01c0); CPUMF_EVENT_ATTR(cf_z14, MT_DIAG_CYCLES_TWO_THR_ACTIVE, 0x01c1); - CPUMF_EVENT_ATTR(cf_z15, L1D_RO_EXCL_WRITES, 0x0080); CPUMF_EVENT_ATTR(cf_z15, DTLB2_WRITES, 0x0081); CPUMF_EVENT_ATTR(cf_z15, DTLB2_MISSES, 0x0082); @@ -291,8 +290,8 @@ CPUMF_EVENT_ATTR(cf_z15, TX_C_TABORT_NO_SPECIAL, 0x00f4); CPUMF_EVENT_ATTR(cf_z15, TX_C_TABORT_SPECIAL, 0x00f5); CPUMF_EVENT_ATTR(cf_z15, DFLT_ACCESS, 0x00f7); CPUMF_EVENT_ATTR(cf_z15, DFLT_CYCLES, 0x00fc); -CPUMF_EVENT_ATTR(cf_z15, DFLT_CC, 0x00108); -CPUMF_EVENT_ATTR(cf_z15, DFLT_CCFINISH, 0x00109); +CPUMF_EVENT_ATTR(cf_z15, DFLT_CC, 0x0108); +CPUMF_EVENT_ATTR(cf_z15, DFLT_CCFINISH, 0x0109); CPUMF_EVENT_ATTR(cf_z15, MT_DIAG_CYCLES_ONE_THR_ACTIVE, 0x01c0); CPUMF_EVENT_ATTR(cf_z15, MT_DIAG_CYCLES_TWO_THR_ACTIVE, 0x01c1); CPUMF_EVENT_ATTR(cf_z16, L1D_RO_EXCL_WRITES, 0x0080); @@ -365,6 +364,83 @@ CPUMF_EVENT_ATTR(cf_z16, NNPA_WAIT_LOCK, 0x010d); CPUMF_EVENT_ATTR(cf_z16, NNPA_HOLD_LOCK, 0x010e); CPUMF_EVENT_ATTR(cf_z16, MT_DIAG_CYCLES_ONE_THR_ACTIVE, 0x01c0); CPUMF_EVENT_ATTR(cf_z16, MT_DIAG_CYCLES_TWO_THR_ACTIVE, 0x01c1); +CPUMF_EVENT_ATTR(cf_z17, L1D_RO_EXCL_WRITES, 0x0080); +CPUMF_EVENT_ATTR(cf_z17, DTLB2_WRITES, 0x0081); +CPUMF_EVENT_ATTR(cf_z17, DTLB2_MISSES, 0x0082); +CPUMF_EVENT_ATTR(cf_z17, CRSTE_1MB_WRITES, 0x0083); +CPUMF_EVENT_ATTR(cf_z17, DTLB2_GPAGE_WRITES, 0x0084); +CPUMF_EVENT_ATTR(cf_z17, ITLB2_WRITES, 0x0086); +CPUMF_EVENT_ATTR(cf_z17, ITLB2_MISSES, 0x0087); +CPUMF_EVENT_ATTR(cf_z17, TLB2_PTE_WRITES, 0x0089); +CPUMF_EVENT_ATTR(cf_z17, TLB2_CRSTE_WRITES, 0x008a); +CPUMF_EVENT_ATTR(cf_z17, TLB2_ENGINES_BUSY, 0x008b); +CPUMF_EVENT_ATTR(cf_z17, TX_C_TEND, 0x008c); +CPUMF_EVENT_ATTR(cf_z17, TX_NC_TEND, 0x008d); +CPUMF_EVENT_ATTR(cf_z17, L1C_TLB2_MISSES, 0x008f); +CPUMF_EVENT_ATTR(cf_z17, DCW_REQ, 0x0091); +CPUMF_EVENT_ATTR(cf_z17, DCW_REQ_IV, 0x0092); +CPUMF_EVENT_ATTR(cf_z17, DCW_REQ_CHIP_HIT, 0x0093); +CPUMF_EVENT_ATTR(cf_z17, DCW_REQ_DRAWER_HIT, 0x0094); +CPUMF_EVENT_ATTR(cf_z17, DCW_ON_CHIP, 0x0095); +CPUMF_EVENT_ATTR(cf_z17, DCW_ON_CHIP_IV, 0x0096); +CPUMF_EVENT_ATTR(cf_z17, DCW_ON_CHIP_CHIP_HIT, 0x0097); +CPUMF_EVENT_ATTR(cf_z17, DCW_ON_CHIP_DRAWER_HIT, 0x0098); +CPUMF_EVENT_ATTR(cf_z17, DCW_ON_MODULE, 0x0099); +CPUMF_EVENT_ATTR(cf_z17, DCW_ON_DRAWER, 0x009a); +CPUMF_EVENT_ATTR(cf_z17, DCW_OFF_DRAWER, 0x009b); +CPUMF_EVENT_ATTR(cf_z17, DCW_ON_CHIP_MEMORY, 0x009c); +CPUMF_EVENT_ATTR(cf_z17, DCW_ON_MODULE_MEMORY, 0x009d); +CPUMF_EVENT_ATTR(cf_z17, DCW_ON_DRAWER_MEMORY, 0x009e); +CPUMF_EVENT_ATTR(cf_z17, DCW_OFF_DRAWER_MEMORY, 0x009f); +CPUMF_EVENT_ATTR(cf_z17, IDCW_ON_MODULE_IV, 0x00a0); +CPUMF_EVENT_ATTR(cf_z17, IDCW_ON_MODULE_CHIP_HIT, 0x00a1); +CPUMF_EVENT_ATTR(cf_z17, IDCW_ON_MODULE_DRAWER_HIT, 0x00a2); +CPUMF_EVENT_ATTR(cf_z17, IDCW_ON_DRAWER_IV, 0x00a3); +CPUMF_EVENT_ATTR(cf_z17, IDCW_ON_DRAWER_CHIP_HIT, 0x00a4); +CPUMF_EVENT_ATTR(cf_z17, IDCW_ON_DRAWER_DRAWER_HIT, 0x00a5); +CPUMF_EVENT_ATTR(cf_z17, IDCW_OFF_DRAWER_IV, 0x00a6); +CPUMF_EVENT_ATTR(cf_z17, IDCW_OFF_DRAWER_CHIP_HIT, 0x00a7); +CPUMF_EVENT_ATTR(cf_z17, IDCW_OFF_DRAWER_DRAWER_HIT, 0x00a8); +CPUMF_EVENT_ATTR(cf_z17, ICW_REQ, 0x00a9); +CPUMF_EVENT_ATTR(cf_z17, ICW_REQ_IV, 0x00aa); +CPUMF_EVENT_ATTR(cf_z17, ICW_REQ_CHIP_HIT, 0x00ab); +CPUMF_EVENT_ATTR(cf_z17, ICW_REQ_DRAWER_HIT, 0x00ac); +CPUMF_EVENT_ATTR(cf_z17, ICW_ON_CHIP, 0x00ad); +CPUMF_EVENT_ATTR(cf_z17, ICW_ON_CHIP_IV, 0x00ae); +CPUMF_EVENT_ATTR(cf_z17, ICW_ON_CHIP_CHIP_HIT, 0x00af); +CPUMF_EVENT_ATTR(cf_z17, ICW_ON_CHIP_DRAWER_HIT, 0x00b0); +CPUMF_EVENT_ATTR(cf_z17, ICW_ON_MODULE, 0x00b1); +CPUMF_EVENT_ATTR(cf_z17, ICW_ON_DRAWER, 0x00b2); +CPUMF_EVENT_ATTR(cf_z17, ICW_OFF_DRAWER, 0x00b3); +CPUMF_EVENT_ATTR(cf_z17, CYCLES_SAMETHRD, 0x00ca); +CPUMF_EVENT_ATTR(cf_z17, CYCLES_DIFFTHRD, 0x00cb); +CPUMF_EVENT_ATTR(cf_z17, INST_SAMETHRD, 0x00cc); +CPUMF_EVENT_ATTR(cf_z17, INST_DIFFTHRD, 0x00cd); +CPUMF_EVENT_ATTR(cf_z17, WRONG_BRANCH_PREDICTION, 0x00ce); +CPUMF_EVENT_ATTR(cf_z17, VX_BCD_EXECUTION_SLOTS, 0x00e1); +CPUMF_EVENT_ATTR(cf_z17, DECIMAL_INSTRUCTIONS, 0x00e2); +CPUMF_EVENT_ATTR(cf_z17, LAST_HOST_TRANSLATIONS, 0x00e8); +CPUMF_EVENT_ATTR(cf_z17, TX_NC_TABORT, 0x00f4); +CPUMF_EVENT_ATTR(cf_z17, TX_C_TABORT_NO_SPECIAL, 0x00f5); +CPUMF_EVENT_ATTR(cf_z17, TX_C_TABORT_SPECIAL, 0x00f6); +CPUMF_EVENT_ATTR(cf_z17, DFLT_ACCESS, 0x00f8); +CPUMF_EVENT_ATTR(cf_z17, DFLT_CYCLES, 0x00fd); +CPUMF_EVENT_ATTR(cf_z17, SORTL, 0x0100); +CPUMF_EVENT_ATTR(cf_z17, DFLT_CC, 0x0109); +CPUMF_EVENT_ATTR(cf_z17, DFLT_CCFINISH, 0x010a); +CPUMF_EVENT_ATTR(cf_z17, NNPA_INVOCATIONS, 0x010b); +CPUMF_EVENT_ATTR(cf_z17, NNPA_COMPLETIONS, 0x010c); +CPUMF_EVENT_ATTR(cf_z17, NNPA_WAIT_LOCK, 0x010d); +CPUMF_EVENT_ATTR(cf_z17, NNPA_HOLD_LOCK, 0x010e); +CPUMF_EVENT_ATTR(cf_z17, NNPA_INST_ONCHIP, 0x0110); +CPUMF_EVENT_ATTR(cf_z17, NNPA_INST_OFFCHIP, 0x0111); +CPUMF_EVENT_ATTR(cf_z17, NNPA_INST_DIFF, 0x0112); +CPUMF_EVENT_ATTR(cf_z17, NNPA_4K_PREFETCH, 0x0114); +CPUMF_EVENT_ATTR(cf_z17, NNPA_COMPL_LOCK, 0x0115); +CPUMF_EVENT_ATTR(cf_z17, NNPA_RETRY_LOCK, 0x0116); +CPUMF_EVENT_ATTR(cf_z17, NNPA_RETRY_LOCK_WITH_PLO, 0x0117); +CPUMF_EVENT_ATTR(cf_z17, MT_DIAG_CYCLES_ONE_THR_ACTIVE, 0x01c0); +CPUMF_EVENT_ATTR(cf_z17, MT_DIAG_CYCLES_TWO_THR_ACTIVE, 0x01c1); static struct attribute *cpumcf_fvn1_pmu_event_attr[] __initdata = { CPUMF_EVENT_PTR(cf_fvn1, CPU_CYCLES), @@ -414,7 +490,7 @@ static struct attribute *cpumcf_svn_12345_pmu_event_attr[] __initdata = { NULL, }; -static struct attribute *cpumcf_svn_67_pmu_event_attr[] __initdata = { +static struct attribute *cpumcf_svn_678_pmu_event_attr[] __initdata = { CPUMF_EVENT_PTR(cf_svn_12345, PRNG_FUNCTIONS), CPUMF_EVENT_PTR(cf_svn_12345, PRNG_CYCLES), CPUMF_EVENT_PTR(cf_svn_12345, PRNG_BLOCKED_FUNCTIONS), @@ -779,6 +855,87 @@ static struct attribute *cpumcf_z16_pmu_event_attr[] __initdata = { NULL, }; +static struct attribute *cpumcf_z17_pmu_event_attr[] __initdata = { + CPUMF_EVENT_PTR(cf_z17, L1D_RO_EXCL_WRITES), + CPUMF_EVENT_PTR(cf_z17, DTLB2_WRITES), + CPUMF_EVENT_PTR(cf_z17, DTLB2_MISSES), + CPUMF_EVENT_PTR(cf_z17, CRSTE_1MB_WRITES), + CPUMF_EVENT_PTR(cf_z17, DTLB2_GPAGE_WRITES), + CPUMF_EVENT_PTR(cf_z17, ITLB2_WRITES), + CPUMF_EVENT_PTR(cf_z17, ITLB2_MISSES), + CPUMF_EVENT_PTR(cf_z17, TLB2_PTE_WRITES), + CPUMF_EVENT_PTR(cf_z17, TLB2_CRSTE_WRITES), + CPUMF_EVENT_PTR(cf_z17, TLB2_ENGINES_BUSY), + CPUMF_EVENT_PTR(cf_z17, TX_C_TEND), + CPUMF_EVENT_PTR(cf_z17, TX_NC_TEND), + CPUMF_EVENT_PTR(cf_z17, L1C_TLB2_MISSES), + CPUMF_EVENT_PTR(cf_z17, DCW_REQ), + CPUMF_EVENT_PTR(cf_z17, DCW_REQ_IV), + CPUMF_EVENT_PTR(cf_z17, DCW_REQ_CHIP_HIT), + CPUMF_EVENT_PTR(cf_z17, DCW_REQ_DRAWER_HIT), + CPUMF_EVENT_PTR(cf_z17, DCW_ON_CHIP), + CPUMF_EVENT_PTR(cf_z17, DCW_ON_CHIP_IV), + CPUMF_EVENT_PTR(cf_z17, DCW_ON_CHIP_CHIP_HIT), + CPUMF_EVENT_PTR(cf_z17, DCW_ON_CHIP_DRAWER_HIT), + CPUMF_EVENT_PTR(cf_z17, DCW_ON_MODULE), + CPUMF_EVENT_PTR(cf_z17, DCW_ON_DRAWER), + CPUMF_EVENT_PTR(cf_z17, DCW_OFF_DRAWER), + CPUMF_EVENT_PTR(cf_z17, DCW_ON_CHIP_MEMORY), + CPUMF_EVENT_PTR(cf_z17, DCW_ON_MODULE_MEMORY), + CPUMF_EVENT_PTR(cf_z17, DCW_ON_DRAWER_MEMORY), + CPUMF_EVENT_PTR(cf_z17, DCW_OFF_DRAWER_MEMORY), + CPUMF_EVENT_PTR(cf_z17, IDCW_ON_MODULE_IV), + CPUMF_EVENT_PTR(cf_z17, IDCW_ON_MODULE_CHIP_HIT), + CPUMF_EVENT_PTR(cf_z17, IDCW_ON_MODULE_DRAWER_HIT), + CPUMF_EVENT_PTR(cf_z17, IDCW_ON_DRAWER_IV), + CPUMF_EVENT_PTR(cf_z17, IDCW_ON_DRAWER_CHIP_HIT), + CPUMF_EVENT_PTR(cf_z17, IDCW_ON_DRAWER_DRAWER_HIT), + CPUMF_EVENT_PTR(cf_z17, IDCW_OFF_DRAWER_IV), + CPUMF_EVENT_PTR(cf_z17, IDCW_OFF_DRAWER_CHIP_HIT), + CPUMF_EVENT_PTR(cf_z17, IDCW_OFF_DRAWER_DRAWER_HIT), + CPUMF_EVENT_PTR(cf_z17, ICW_REQ), + CPUMF_EVENT_PTR(cf_z17, ICW_REQ_IV), + CPUMF_EVENT_PTR(cf_z17, ICW_REQ_CHIP_HIT), + CPUMF_EVENT_PTR(cf_z17, ICW_REQ_DRAWER_HIT), + CPUMF_EVENT_PTR(cf_z17, ICW_ON_CHIP), + CPUMF_EVENT_PTR(cf_z17, ICW_ON_CHIP_IV), + CPUMF_EVENT_PTR(cf_z17, ICW_ON_CHIP_CHIP_HIT), + CPUMF_EVENT_PTR(cf_z17, ICW_ON_CHIP_DRAWER_HIT), + CPUMF_EVENT_PTR(cf_z17, ICW_ON_MODULE), + CPUMF_EVENT_PTR(cf_z17, ICW_ON_DRAWER), + CPUMF_EVENT_PTR(cf_z17, ICW_OFF_DRAWER), + CPUMF_EVENT_PTR(cf_z17, CYCLES_SAMETHRD), + CPUMF_EVENT_PTR(cf_z17, CYCLES_DIFFTHRD), + CPUMF_EVENT_PTR(cf_z17, INST_SAMETHRD), + CPUMF_EVENT_PTR(cf_z17, INST_DIFFTHRD), + CPUMF_EVENT_PTR(cf_z17, WRONG_BRANCH_PREDICTION), + CPUMF_EVENT_PTR(cf_z17, VX_BCD_EXECUTION_SLOTS), + CPUMF_EVENT_PTR(cf_z17, DECIMAL_INSTRUCTIONS), + CPUMF_EVENT_PTR(cf_z17, LAST_HOST_TRANSLATIONS), + CPUMF_EVENT_PTR(cf_z17, TX_NC_TABORT), + CPUMF_EVENT_PTR(cf_z17, TX_C_TABORT_NO_SPECIAL), + CPUMF_EVENT_PTR(cf_z17, TX_C_TABORT_SPECIAL), + CPUMF_EVENT_PTR(cf_z17, DFLT_ACCESS), + CPUMF_EVENT_PTR(cf_z17, DFLT_CYCLES), + CPUMF_EVENT_PTR(cf_z17, SORTL), + CPUMF_EVENT_PTR(cf_z17, DFLT_CC), + CPUMF_EVENT_PTR(cf_z17, DFLT_CCFINISH), + CPUMF_EVENT_PTR(cf_z17, NNPA_INVOCATIONS), + CPUMF_EVENT_PTR(cf_z17, NNPA_COMPLETIONS), + CPUMF_EVENT_PTR(cf_z17, NNPA_WAIT_LOCK), + CPUMF_EVENT_PTR(cf_z17, NNPA_HOLD_LOCK), + CPUMF_EVENT_PTR(cf_z17, NNPA_INST_ONCHIP), + CPUMF_EVENT_PTR(cf_z17, NNPA_INST_OFFCHIP), + CPUMF_EVENT_PTR(cf_z17, NNPA_INST_DIFF), + CPUMF_EVENT_PTR(cf_z17, NNPA_4K_PREFETCH), + CPUMF_EVENT_PTR(cf_z17, NNPA_COMPL_LOCK), + CPUMF_EVENT_PTR(cf_z17, NNPA_RETRY_LOCK), + CPUMF_EVENT_PTR(cf_z17, NNPA_RETRY_LOCK_WITH_PLO), + CPUMF_EVENT_PTR(cf_z17, MT_DIAG_CYCLES_ONE_THR_ACTIVE), + CPUMF_EVENT_PTR(cf_z17, MT_DIAG_CYCLES_TWO_THR_ACTIVE), + NULL, +}; + /* END: CPUM_CF COUNTER DEFINITIONS ===================================== */ static struct attribute_group cpumcf_pmu_events_group = { @@ -859,7 +1016,7 @@ __init const struct attribute_group **cpumf_cf_event_group(void) if (ci.csvn >= 1 && ci.csvn <= 5) csvn = cpumcf_svn_12345_pmu_event_attr; else if (ci.csvn >= 6) - csvn = cpumcf_svn_67_pmu_event_attr; + csvn = cpumcf_svn_678_pmu_event_attr; /* Determine model-specific counter set(s) */ get_cpu_id(&cpu_id); @@ -892,6 +1049,10 @@ __init const struct attribute_group **cpumf_cf_event_group(void) case 0x3932: model = cpumcf_z16_pmu_event_attr; break; + case 0x9175: + case 0x9176: + model = cpumcf_z17_pmu_event_attr; + break; default: model = none; break; diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c index 1e99514fb7ae..f432869f8921 100644 --- a/arch/s390/kernel/perf_cpum_sf.c +++ b/arch/s390/kernel/perf_cpum_sf.c @@ -14,7 +14,6 @@ #include <linux/percpu.h> #include <linux/pid.h> #include <linux/notifier.h> -#include <linux/export.h> #include <linux/slab.h> #include <linux/mm.h> #include <linux/moduleparam.h> @@ -885,9 +884,6 @@ static int cpumsf_pmu_event_init(struct perf_event *event) event->attr.exclude_idle = 0; err = __hw_perf_event_init(event); - if (unlikely(err)) - if (event->destroy) - event->destroy(event); return err; } @@ -981,7 +977,7 @@ static void cpumsf_pmu_disable(struct pmu *pmu) cpuhw->flags &= ~PMU_F_ENABLED; } -/* perf_exclude_event() - Filter event +/* perf_event_exclude() - Filter event * @event: The perf event * @regs: pt_regs structure * @sde_regs: Sample-data-entry (sde) regs structure @@ -990,7 +986,7 @@ static void cpumsf_pmu_disable(struct pmu *pmu) * * Return non-zero if the event shall be excluded. */ -static int perf_exclude_event(struct perf_event *event, struct pt_regs *regs, +static int perf_event_exclude(struct perf_event *event, struct pt_regs *regs, struct perf_sf_sde_regs *sde_regs) { if (event->attr.exclude_user && user_mode(regs)) @@ -1073,12 +1069,9 @@ static int perf_push_sample(struct perf_event *event, data.tid_entry.pid = basic->hpp & LPP_PID_MASK; overflow = 0; - if (perf_exclude_event(event, ®s, sde_regs)) + if (perf_event_exclude(event, ®s, sde_regs)) goto out; - if (perf_event_overflow(event, &data, ®s)) { - overflow = 1; - event->pmu->stop(event, 0); - } + overflow = perf_event_overflow(event, &data, ®s); perf_event_update_userpage(event); out: return overflow; diff --git a/arch/s390/kernel/perf_event.c b/arch/s390/kernel/perf_event.c index 2b9611c4718e..91b8716c883a 100644 --- a/arch/s390/kernel/perf_event.c +++ b/arch/s390/kernel/perf_event.c @@ -12,7 +12,6 @@ #include <linux/perf_event.h> #include <linux/kvm_host.h> #include <linux/percpu.h> -#include <linux/export.h> #include <linux/seq_file.h> #include <linux/spinlock.h> #include <linux/uaccess.h> diff --git a/arch/s390/kernel/perf_pai_crypto.c b/arch/s390/kernel/perf_pai_crypto.c index fa7325454266..f373a1009c45 100644 --- a/arch/s390/kernel/perf_pai_crypto.c +++ b/arch/s390/kernel/perf_pai_crypto.c @@ -13,7 +13,6 @@ #include <linux/percpu.h> #include <linux/notifier.h> #include <linux/init.h> -#include <linux/export.h> #include <linux/io.h> #include <linux/perf_event.h> #include <asm/ctlreg.h> @@ -478,7 +477,7 @@ static int paicrypt_push_sample(size_t rawsize, struct paicrypt_map *cpump, if (event->attr.sample_type & PERF_SAMPLE_RAW) { raw.frag.size = rawsize; raw.frag.data = cpump->save; - perf_sample_save_raw_data(&data, &raw); + perf_sample_save_raw_data(&data, event, &raw); } overflow = perf_event_overflow(event, &data, ®s); @@ -518,7 +517,8 @@ static void paicrypt_have_samples(void) /* Called on schedule-in and schedule-out. No access to event structure, * but for sampling only event CRYPTO_ALL is allowed. */ -static void paicrypt_sched_task(struct perf_event_pmu_context *pmu_ctx, bool sched_in) +static void paicrypt_sched_task(struct perf_event_pmu_context *pmu_ctx, + struct task_struct *task, bool sched_in) { /* We started with a clean page on event installation. So read out * results on schedule_out and if page was dirty, save old values. @@ -695,7 +695,7 @@ static const char * const paicrypt_ctrnames[] = { [111] = "PCC_COMPUTE_LAST_BLOCK_CMAC_USING_AES_256", [112] = "PCC_COMPUTE_LAST_BLOCK_CMAC_USING_ENCRYPTED_AES_128", [113] = "PCC_COMPUTE_LAST_BLOCK_CMAC_USING_ENCRYPTED_AES_192", - [114] = "PCC_COMPUTE_LAST_BLOCK_CMAC_USING_ENCRYPTED_AES_256A", + [114] = "PCC_COMPUTE_LAST_BLOCK_CMAC_USING_ENCRYPTED_AES_256", [115] = "PCC_COMPUTE_XTS_PARAMETER_USING_AES_128", [116] = "PCC_COMPUTE_XTS_PARAMETER_USING_AES_256", [117] = "PCC_COMPUTE_XTS_PARAMETER_USING_ENCRYPTED_AES_128", diff --git a/arch/s390/kernel/perf_pai_ext.c b/arch/s390/kernel/perf_pai_ext.c index 7f462bef1fc0..d827473e7f87 100644 --- a/arch/s390/kernel/perf_pai_ext.c +++ b/arch/s390/kernel/perf_pai_ext.c @@ -14,7 +14,6 @@ #include <linux/percpu.h> #include <linux/notifier.h> #include <linux/init.h> -#include <linux/export.h> #include <linux/io.h> #include <linux/perf_event.h> #include <asm/ctlreg.h> @@ -503,7 +502,7 @@ static int paiext_push_sample(size_t rawsize, struct paiext_map *cpump, if (event->attr.sample_type & PERF_SAMPLE_RAW) { raw.frag.size = rawsize; raw.frag.data = cpump->save; - perf_sample_save_raw_data(&data, &raw); + perf_sample_save_raw_data(&data, event, &raw); } overflow = perf_event_overflow(event, &data, ®s); @@ -542,7 +541,8 @@ static void paiext_have_samples(void) /* Called on schedule-in and schedule-out. No access to event structure, * but for sampling only event NNPA_ALL is allowed. */ -static void paiext_sched_task(struct perf_event_pmu_context *pmu_ctx, bool sched_in) +static void paiext_sched_task(struct perf_event_pmu_context *pmu_ctx, + struct task_struct *task, bool sched_in) { /* We started with a clean page on event installation. So read out * results on schedule_out and if page was dirty, save old values. diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c index 9637aee43c40..f55f09cda6f8 100644 --- a/arch/s390/kernel/process.c +++ b/arch/s390/kernel/process.c @@ -27,7 +27,6 @@ #include <linux/compat.h> #include <linux/kprobes.h> #include <linux/random.h> -#include <linux/export.h> #include <linux/init_task.h> #include <linux/entry-common.h> #include <linux/io.h> diff --git a/arch/s390/kernel/processor.c b/arch/s390/kernel/processor.c index 5ce9a795a0fe..11f70c1e2797 100644 --- a/arch/s390/kernel/processor.c +++ b/arch/s390/kernel/processor.c @@ -8,6 +8,7 @@ #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt #include <linux/stop_machine.h> +#include <linux/cpufeature.h> #include <linux/bitops.h> #include <linux/kernel.h> #include <linux/random.h> @@ -19,6 +20,7 @@ #include <linux/cpu.h> #include <linux/smp.h> #include <asm/text-patching.h> +#include <asm/machine.h> #include <asm/diag.h> #include <asm/facility.h> #include <asm/elf.h> @@ -72,7 +74,7 @@ void notrace stop_machine_yield(const struct cpumask *cpumask) this_cpu = smp_processor_id(); if (__this_cpu_inc_return(cpu_relax_retry) >= spin_retry) { __this_cpu_write(cpu_relax_retry, 0); - cpu = cpumask_next_wrap(this_cpu, cpumask, this_cpu, false); + cpu = cpumask_next_wrap(this_cpu, cpumask); if (cpu >= nr_cpu_ids) return; if (arch_vcpu_is_preempted(cpu)) @@ -209,14 +211,14 @@ static int __init setup_hwcaps(void) elf_hwcap |= HWCAP_DFP; /* huge page support */ - if (MACHINE_HAS_EDAT1) + if (cpu_has_edat1()) elf_hwcap |= HWCAP_HPAGE; /* 64-bit register support for 31-bit processes */ elf_hwcap |= HWCAP_HIGH_GPRS; /* transactional execution */ - if (MACHINE_HAS_TE) + if (machine_has_tx()) elf_hwcap |= HWCAP_TE; /* vector */ @@ -244,10 +246,10 @@ static int __init setup_hwcaps(void) elf_hwcap |= HWCAP_NNPA; /* guarded storage */ - if (MACHINE_HAS_GS) + if (cpu_has_gs()) elf_hwcap |= HWCAP_GS; - if (MACHINE_HAS_PCI_MIO) + if (test_machine_feature(MFEATURE_PCI_MIO)) elf_hwcap |= HWCAP_PCI_MIO; /* virtualization support */ @@ -266,31 +268,35 @@ static int __init setup_elf_platform(void) add_device_randomness(&cpu_id, sizeof(cpu_id)); switch (cpu_id.machine) { default: /* Use "z10" as default. */ - strcpy(elf_platform, "z10"); + strscpy(elf_platform, "z10"); break; case 0x2817: case 0x2818: - strcpy(elf_platform, "z196"); + strscpy(elf_platform, "z196"); break; case 0x2827: case 0x2828: - strcpy(elf_platform, "zEC12"); + strscpy(elf_platform, "zEC12"); break; case 0x2964: case 0x2965: - strcpy(elf_platform, "z13"); + strscpy(elf_platform, "z13"); break; case 0x3906: case 0x3907: - strcpy(elf_platform, "z14"); + strscpy(elf_platform, "z14"); break; case 0x8561: case 0x8562: - strcpy(elf_platform, "z15"); + strscpy(elf_platform, "z15"); break; case 0x3931: case 0x3932: - strcpy(elf_platform, "z16"); + strscpy(elf_platform, "z16"); + break; + case 0x9175: + case 0x9176: + strscpy(elf_platform, "z17"); break; } return 0; diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c index 1cfed8b710b8..494216c4b4f3 100644 --- a/arch/s390/kernel/ptrace.c +++ b/arch/s390/kernel/ptrace.c @@ -7,10 +7,10 @@ * Martin Schwidefsky (schwidefsky@de.ibm.com) */ -#include "asm/ptrace.h" #include <linux/kernel.h> #include <linux/sched.h> #include <linux/sched/task_stack.h> +#include <linux/cpufeature.h> #include <linux/mm.h> #include <linux/smp.h> #include <linux/errno.h> @@ -31,6 +31,9 @@ #include <asm/unistd.h> #include <asm/runtime_instr.h> #include <asm/facility.h> +#include <asm/machine.h> +#include <asm/ptrace.h> +#include <asm/rwonce.h> #include <asm/fpu.h> #include "entry.h" @@ -60,7 +63,7 @@ void update_cr_regs(struct task_struct *task) cr0_new = cr0_old; cr2_new = cr2_old; /* Take care of the enable/disable of transactional execution. */ - if (MACHINE_HAS_TE) { + if (machine_has_tx()) { /* Set or clear transaction execution TXC bit 8. */ cr0_new.tcx = 1; if (task->thread.per_flags & PER_FLAG_NO_TE) @@ -75,7 +78,7 @@ void update_cr_regs(struct task_struct *task) } } /* Take care of enable/disable of guarded storage. */ - if (MACHINE_HAS_GS) { + if (cpu_has_gs()) { cr2_new.gse = 0; if (task->thread.gs_cb) cr2_new.gse = 1; @@ -470,18 +473,18 @@ long arch_ptrace(struct task_struct *child, long request, case PTRACE_GET_LAST_BREAK: return put_user(child->thread.last_break, (unsigned long __user *)data); case PTRACE_ENABLE_TE: - if (!MACHINE_HAS_TE) + if (!machine_has_tx()) return -EIO; child->thread.per_flags &= ~PER_FLAG_NO_TE; return 0; case PTRACE_DISABLE_TE: - if (!MACHINE_HAS_TE) + if (!machine_has_tx()) return -EIO; child->thread.per_flags |= PER_FLAG_NO_TE; child->thread.per_flags &= ~PER_FLAG_TE_ABORT_RAND; return 0; case PTRACE_TE_ABORT_RAND: - if (!MACHINE_HAS_TE || (child->thread.per_flags & PER_FLAG_NO_TE)) + if (!machine_has_tx() || (child->thread.per_flags & PER_FLAG_NO_TE)) return -EIO; switch (data) { case 0UL: @@ -1033,7 +1036,7 @@ static int s390_gs_cb_get(struct task_struct *target, { struct gs_cb *data = target->thread.gs_cb; - if (!MACHINE_HAS_GS) + if (!cpu_has_gs()) return -ENODEV; if (!data) return -ENODATA; @@ -1050,7 +1053,7 @@ static int s390_gs_cb_set(struct task_struct *target, struct gs_cb gs_cb = { }, *data = NULL; int rc; - if (!MACHINE_HAS_GS) + if (!cpu_has_gs()) return -ENODEV; if (!target->thread.gs_cb) { data = kzalloc(sizeof(*data), GFP_KERNEL); @@ -1087,7 +1090,7 @@ static int s390_gs_bc_get(struct task_struct *target, { struct gs_cb *data = target->thread.gs_bc_cb; - if (!MACHINE_HAS_GS) + if (!cpu_has_gs()) return -ENODEV; if (!data) return -ENODATA; @@ -1101,7 +1104,7 @@ static int s390_gs_bc_set(struct task_struct *target, { struct gs_cb *data = target->thread.gs_bc_cb; - if (!MACHINE_HAS_GS) + if (!cpu_has_gs()) return -ENODEV; if (!data) { data = kzalloc(sizeof(*data), GFP_KERNEL); @@ -1206,7 +1209,7 @@ static int s390_runtime_instr_set(struct task_struct *target, static const struct user_regset s390_regsets[] = { { - .core_note_type = NT_PRSTATUS, + USER_REGSET_NOTE_TYPE(PRSTATUS), .n = sizeof(s390_regs) / sizeof(long), .size = sizeof(long), .align = sizeof(long), @@ -1214,7 +1217,7 @@ static const struct user_regset s390_regsets[] = { .set = s390_regs_set, }, { - .core_note_type = NT_PRFPREG, + USER_REGSET_NOTE_TYPE(PRFPREG), .n = sizeof(s390_fp_regs) / sizeof(long), .size = sizeof(long), .align = sizeof(long), @@ -1222,7 +1225,7 @@ static const struct user_regset s390_regsets[] = { .set = s390_fpregs_set, }, { - .core_note_type = NT_S390_SYSTEM_CALL, + USER_REGSET_NOTE_TYPE(S390_SYSTEM_CALL), .n = 1, .size = sizeof(unsigned int), .align = sizeof(unsigned int), @@ -1230,7 +1233,7 @@ static const struct user_regset s390_regsets[] = { .set = s390_system_call_set, }, { - .core_note_type = NT_S390_LAST_BREAK, + USER_REGSET_NOTE_TYPE(S390_LAST_BREAK), .n = 1, .size = sizeof(long), .align = sizeof(long), @@ -1238,7 +1241,7 @@ static const struct user_regset s390_regsets[] = { .set = s390_last_break_set, }, { - .core_note_type = NT_S390_TDB, + USER_REGSET_NOTE_TYPE(S390_TDB), .n = 1, .size = 256, .align = 1, @@ -1246,7 +1249,7 @@ static const struct user_regset s390_regsets[] = { .set = s390_tdb_set, }, { - .core_note_type = NT_S390_VXRS_LOW, + USER_REGSET_NOTE_TYPE(S390_VXRS_LOW), .n = __NUM_VXRS_LOW, .size = sizeof(__u64), .align = sizeof(__u64), @@ -1254,7 +1257,7 @@ static const struct user_regset s390_regsets[] = { .set = s390_vxrs_low_set, }, { - .core_note_type = NT_S390_VXRS_HIGH, + USER_REGSET_NOTE_TYPE(S390_VXRS_HIGH), .n = __NUM_VXRS_HIGH, .size = sizeof(__vector128), .align = sizeof(__vector128), @@ -1262,7 +1265,7 @@ static const struct user_regset s390_regsets[] = { .set = s390_vxrs_high_set, }, { - .core_note_type = NT_S390_GS_CB, + USER_REGSET_NOTE_TYPE(S390_GS_CB), .n = sizeof(struct gs_cb) / sizeof(__u64), .size = sizeof(__u64), .align = sizeof(__u64), @@ -1270,7 +1273,7 @@ static const struct user_regset s390_regsets[] = { .set = s390_gs_cb_set, }, { - .core_note_type = NT_S390_GS_BC, + USER_REGSET_NOTE_TYPE(S390_GS_BC), .n = sizeof(struct gs_cb) / sizeof(__u64), .size = sizeof(__u64), .align = sizeof(__u64), @@ -1278,7 +1281,7 @@ static const struct user_regset s390_regsets[] = { .set = s390_gs_bc_set, }, { - .core_note_type = NT_S390_RI_CB, + USER_REGSET_NOTE_TYPE(S390_RI_CB), .n = sizeof(struct runtime_instr_cb) / sizeof(__u64), .size = sizeof(__u64), .align = sizeof(__u64), @@ -1410,7 +1413,7 @@ static int s390_compat_last_break_set(struct task_struct *target, static const struct user_regset s390_compat_regsets[] = { { - .core_note_type = NT_PRSTATUS, + USER_REGSET_NOTE_TYPE(PRSTATUS), .n = sizeof(s390_compat_regs) / sizeof(compat_long_t), .size = sizeof(compat_long_t), .align = sizeof(compat_long_t), @@ -1418,7 +1421,7 @@ static const struct user_regset s390_compat_regsets[] = { .set = s390_compat_regs_set, }, { - .core_note_type = NT_PRFPREG, + USER_REGSET_NOTE_TYPE(PRFPREG), .n = sizeof(s390_fp_regs) / sizeof(compat_long_t), .size = sizeof(compat_long_t), .align = sizeof(compat_long_t), @@ -1426,7 +1429,7 @@ static const struct user_regset s390_compat_regsets[] = { .set = s390_fpregs_set, }, { - .core_note_type = NT_S390_SYSTEM_CALL, + USER_REGSET_NOTE_TYPE(S390_SYSTEM_CALL), .n = 1, .size = sizeof(compat_uint_t), .align = sizeof(compat_uint_t), @@ -1434,7 +1437,7 @@ static const struct user_regset s390_compat_regsets[] = { .set = s390_system_call_set, }, { - .core_note_type = NT_S390_LAST_BREAK, + USER_REGSET_NOTE_TYPE(S390_LAST_BREAK), .n = 1, .size = sizeof(long), .align = sizeof(long), @@ -1442,7 +1445,7 @@ static const struct user_regset s390_compat_regsets[] = { .set = s390_compat_last_break_set, }, { - .core_note_type = NT_S390_TDB, + USER_REGSET_NOTE_TYPE(S390_TDB), .n = 1, .size = 256, .align = 1, @@ -1450,7 +1453,7 @@ static const struct user_regset s390_compat_regsets[] = { .set = s390_tdb_set, }, { - .core_note_type = NT_S390_VXRS_LOW, + USER_REGSET_NOTE_TYPE(S390_VXRS_LOW), .n = __NUM_VXRS_LOW, .size = sizeof(__u64), .align = sizeof(__u64), @@ -1458,7 +1461,7 @@ static const struct user_regset s390_compat_regsets[] = { .set = s390_vxrs_low_set, }, { - .core_note_type = NT_S390_VXRS_HIGH, + USER_REGSET_NOTE_TYPE(S390_VXRS_HIGH), .n = __NUM_VXRS_HIGH, .size = sizeof(__vector128), .align = sizeof(__vector128), @@ -1466,7 +1469,7 @@ static const struct user_regset s390_compat_regsets[] = { .set = s390_vxrs_high_set, }, { - .core_note_type = NT_S390_HIGH_GPRS, + USER_REGSET_NOTE_TYPE(S390_HIGH_GPRS), .n = sizeof(s390_compat_regs_high) / sizeof(compat_long_t), .size = sizeof(compat_long_t), .align = sizeof(compat_long_t), @@ -1474,7 +1477,7 @@ static const struct user_regset s390_compat_regsets[] = { .set = s390_compat_regs_high_set, }, { - .core_note_type = NT_S390_GS_CB, + USER_REGSET_NOTE_TYPE(S390_GS_CB), .n = sizeof(struct gs_cb) / sizeof(__u64), .size = sizeof(__u64), .align = sizeof(__u64), @@ -1482,7 +1485,7 @@ static const struct user_regset s390_compat_regsets[] = { .set = s390_gs_cb_set, }, { - .core_note_type = NT_S390_GS_BC, + USER_REGSET_NOTE_TYPE(S390_GS_BC), .n = sizeof(struct gs_cb) / sizeof(__u64), .size = sizeof(__u64), .align = sizeof(__u64), @@ -1490,7 +1493,7 @@ static const struct user_regset s390_compat_regsets[] = { .set = s390_gs_bc_set, }, { - .core_note_type = NT_S390_RI_CB, + USER_REGSET_NOTE_TYPE(S390_RI_CB), .n = sizeof(struct runtime_instr_cb) / sizeof(__u64), .size = sizeof(__u64), .align = sizeof(__u64), @@ -1521,13 +1524,6 @@ static const char *gpr_names[NUM_GPRS] = { "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", }; -unsigned long regs_get_register(struct pt_regs *regs, unsigned int offset) -{ - if (offset >= NUM_GPRS) - return 0; - return regs->gprs[offset]; -} - int regs_query_register_offset(const char *name) { unsigned long offset; @@ -1547,29 +1543,3 @@ const char *regs_query_register_name(unsigned int offset) return NULL; return gpr_names[offset]; } - -static int regs_within_kernel_stack(struct pt_regs *regs, unsigned long addr) -{ - unsigned long ksp = kernel_stack_pointer(regs); - - return (addr & ~(THREAD_SIZE - 1)) == (ksp & ~(THREAD_SIZE - 1)); -} - -/** - * regs_get_kernel_stack_nth() - get Nth entry of the stack - * @regs:pt_regs which contains kernel stack pointer. - * @n:stack entry number. - * - * regs_get_kernel_stack_nth() returns @n th entry of the kernel stack which - * is specifined by @regs. If the @n th entry is NOT in the kernel stack, - * this returns 0. - */ -unsigned long regs_get_kernel_stack_nth(struct pt_regs *regs, unsigned int n) -{ - unsigned long addr; - - addr = kernel_stack_pointer(regs) + n * sizeof(long); - if (!regs_within_kernel_stack(regs, addr)) - return 0; - return *(unsigned long *)addr; -} diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index a3fea683b227..7b529868789f 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -54,6 +54,7 @@ #include <asm/archrandom.h> #include <asm/boot_data.h> +#include <asm/machine.h> #include <asm/ipl.h> #include <asm/facility.h> #include <asm/smp.h> @@ -157,25 +158,29 @@ u64 __bootdata_preserved(stfle_fac_list[16]); EXPORT_SYMBOL(stfle_fac_list); struct oldmem_data __bootdata_preserved(oldmem_data); -unsigned long VMALLOC_START; +char __bootdata(boot_rb)[PAGE_SIZE * 2]; +bool __bootdata(boot_earlyprintk); +size_t __bootdata(boot_rb_off); +char __bootdata(bootdebug_filter)[128]; +bool __bootdata(bootdebug); + +unsigned long __bootdata_preserved(VMALLOC_START); EXPORT_SYMBOL(VMALLOC_START); -unsigned long VMALLOC_END; +unsigned long __bootdata_preserved(VMALLOC_END); EXPORT_SYMBOL(VMALLOC_END); -struct page *vmemmap; +struct page *__bootdata_preserved(vmemmap); EXPORT_SYMBOL(vmemmap); -unsigned long vmemmap_size; +unsigned long __bootdata_preserved(vmemmap_size); -unsigned long MODULES_VADDR; -unsigned long MODULES_END; +unsigned long __bootdata_preserved(MODULES_VADDR); +unsigned long __bootdata_preserved(MODULES_END); /* An array with a pointer to the lowcore of every CPU. */ struct lowcore *lowcore_ptr[NR_CPUS]; EXPORT_SYMBOL(lowcore_ptr); -DEFINE_STATIC_KEY_FALSE(cpu_has_bear); - /* * The Write Back bit position in the physaddr is given by the SLPC PCI. * Leaving the mask zero always uses write through which is safe @@ -245,7 +250,7 @@ static void __init conmode_default(void) char query_buffer[1024]; char *ptr; - if (MACHINE_IS_VM) { + if (machine_is_vm()) { cpcmd("QUERY CONSOLE", query_buffer, 1024, NULL); console_devno = simple_strtoul(query_buffer + 5, NULL, 16); ptr = strstr(query_buffer, "SUBCHANNEL ="); @@ -283,7 +288,7 @@ static void __init conmode_default(void) SET_CONSOLE_SCLP; #endif } - } else if (MACHINE_IS_KVM) { + } else if (machine_is_kvm()) { if (sclp.has_vt220 && IS_ENABLED(CONFIG_SCLP_VT220_CONSOLE)) SET_CONSOLE_VT220; else if (sclp.has_linemode && IS_ENABLED(CONFIG_SCLP_CONSOLE)) @@ -359,36 +364,24 @@ void *restart_stack; unsigned long stack_alloc(void) { -#ifdef CONFIG_VMAP_STACK - void *ret; + void *stack; - ret = __vmalloc_node(THREAD_SIZE, THREAD_SIZE, THREADINFO_GFP, - NUMA_NO_NODE, __builtin_return_address(0)); - kmemleak_not_leak(ret); - return (unsigned long)ret; -#else - return __get_free_pages(GFP_KERNEL, THREAD_SIZE_ORDER); -#endif + stack = __vmalloc_node(THREAD_SIZE, THREAD_SIZE, THREADINFO_GFP, + NUMA_NO_NODE, __builtin_return_address(0)); + kmemleak_not_leak(stack); + return (unsigned long)stack; } void stack_free(unsigned long stack) { -#ifdef CONFIG_VMAP_STACK - vfree((void *) stack); -#else - free_pages(stack, THREAD_SIZE_ORDER); -#endif + vfree((void *)stack); } static unsigned long __init stack_alloc_early(void) { unsigned long stack; - stack = (unsigned long)memblock_alloc(THREAD_SIZE, THREAD_SIZE); - if (!stack) { - panic("%s: Failed to allocate %lu bytes align=0x%lx\n", - __func__, THREAD_SIZE, THREAD_SIZE); - } + stack = (unsigned long)memblock_alloc_or_panic(THREAD_SIZE, THREAD_SIZE); return stack; } @@ -421,7 +414,6 @@ static void __init setup_lowcore(void) lc->clock_comparator = clock_comparator_max; lc->current_task = (unsigned long)&init_task; lc->lpp = LPP_MAGIC; - lc->machine_flags = get_lowcore()->machine_flags; lc->preempt_count = get_lowcore()->preempt_count; nmi_alloc_mcesa_early(&lc->mcesad); lc->sys_enter_timer = get_lowcore()->sys_enter_timer; @@ -512,10 +504,7 @@ static void __init setup_resources(void) bss_resource.end = __pa_symbol(__bss_stop) - 1; for_each_mem_range(i, &start, &end) { - res = memblock_alloc(sizeof(*res), 8); - if (!res) - panic("%s: Failed to allocate %zu bytes align=0x%x\n", - __func__, sizeof(*res), 8); + res = memblock_alloc_or_panic(sizeof(*res), 8); res->flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM; res->name = "System RAM"; @@ -534,10 +523,7 @@ static void __init setup_resources(void) std_res->start > res->end) continue; if (std_res->end > res->end) { - sub_res = memblock_alloc(sizeof(*sub_res), 8); - if (!sub_res) - panic("%s: Failed to allocate %zu bytes align=0x%x\n", - __func__, sizeof(*sub_res), 8); + sub_res = memblock_alloc_or_panic(sizeof(*sub_res), 8); *sub_res = *std_res; sub_res->end = res->end; std_res->start = res->end + 1; @@ -619,7 +605,7 @@ static void __init reserve_crashkernel(void) int rc; rc = parse_crashkernel(boot_command_line, ident_map_size, - &crash_size, &crash_base, NULL, NULL); + &crash_size, &crash_base, NULL, NULL, NULL); crash_base = ALIGN(crash_base, KEXEC_CRASH_MEM_ALIGN); crash_size = ALIGN(crash_size, KEXEC_CRASH_MEM_ALIGN); @@ -664,7 +650,7 @@ static void __init reserve_crashkernel(void) return; } - if (!oldmem_data.start && MACHINE_IS_VM) + if (!oldmem_data.start && machine_is_vm()) diag10_range(PFN_DOWN(crash_base), PFN_DOWN(crash_size)); crashk_res.start = crash_base; crashk_res.end = crash_base + crash_size - 1; @@ -704,7 +690,7 @@ static void __init reserve_physmem_info(void) { unsigned long addr, size; - if (get_physmem_reserved(RR_MEM_DETECT_EXTENDED, &addr, &size)) + if (get_physmem_reserved(RR_MEM_DETECT_EXT, &addr, &size)) memblock_reserve(addr, size); } @@ -712,7 +698,7 @@ static void __init free_physmem_info(void) { unsigned long addr, size; - if (get_physmem_reserved(RR_MEM_DETECT_EXTENDED, &addr, &size)) + if (get_physmem_reserved(RR_MEM_DETECT_EXT, &addr, &size)) memblock_phys_free(addr, size); } @@ -733,6 +719,11 @@ static void __init memblock_add_physmem_info(void) memblock_set_node(0, ULONG_MAX, &memblock.memory, 0); } +static void __init setup_high_memory(void) +{ + high_memory = __va(ident_map_size); +} + /* * Reserve memory used for lowcore. */ @@ -742,7 +733,7 @@ static void __init reserve_lowcore(void) void *lowcore_end = lowcore_start + sizeof(struct lowcore); void *start, *end; - if ((void *)__identity_base < lowcore_end) { + if (absolute_pointer(__identity_base) < lowcore_end) { start = max(lowcore_start, (void *)__identity_base); end = min(lowcore_end, (void *)(__identity_base + ident_map_size)); memblock_reserve(__pa(start), __pa(end)); @@ -824,9 +815,7 @@ static void __init setup_randomness(void) { struct sysinfo_3_2_2 *vmms; - vmms = memblock_alloc(PAGE_SIZE, PAGE_SIZE); - if (!vmms) - panic("Failed to allocate memory for sysinfo structure\n"); + vmms = memblock_alloc_or_panic(PAGE_SIZE, PAGE_SIZE); if (stsi(vmms, 3, 2, 2) == 0 && vmms->count) add_device_randomness(&vmms->vm, sizeof(vmms->vm[0]) * vmms->count); memblock_free(vmms, PAGE_SIZE); @@ -886,6 +875,23 @@ static void __init log_component_list(void) } /* + * Print avoiding interpretation of % in buf and taking bootdebug option + * into consideration. + */ +static void __init print_rb_entry(const char *buf) +{ + char fmt[] = KERN_SOH "0boot: %s"; + int level = printk_get_level(buf); + + buf = skip_timestamp(printk_skip_level(buf)); + if (level == KERN_DEBUG[1] && (!bootdebug || !bootdebug_filter_match(buf))) + return; + + fmt[1] = level; + printk(fmt, buf); +} + +/* * Setup function called from init/main.c just after the banner * was printed. */ @@ -895,17 +901,20 @@ void __init setup_arch(char **cmdline_p) /* * print what head.S has found out about the machine */ - if (MACHINE_IS_VM) + if (machine_is_vm()) pr_info("Linux is running as a z/VM " "guest operating system in 64-bit mode\n"); - else if (MACHINE_IS_KVM) + else if (machine_is_kvm()) pr_info("Linux is running under KVM in 64-bit mode\n"); - else if (MACHINE_IS_LPAR) + else if (machine_is_lpar()) pr_info("Linux is running natively in 64-bit mode\n"); else pr_info("Linux is running as a guest in 64-bit mode\n"); + /* Print decompressor messages if not already printed */ + if (!boot_earlyprintk) + boot_rb_foreach(print_rb_entry); - if (have_relocated_lowcore()) + if (machine_has_relocated_lowcore()) pr_info("Lowcore relocated to 0x%px\n", get_lowcore()); log_component_list(); @@ -947,6 +956,7 @@ void __init setup_arch(char **cmdline_p) free_physmem_info(); setup_memory_end(); + setup_high_memory(); memblock_dump_all(); setup_memory(); @@ -955,7 +965,7 @@ void __init setup_arch(char **cmdline_p) setup_uv(); dma_contiguous_reserve(ident_map_size); vmcp_cma_reserve(); - if (MACHINE_HAS_EDAT2) + if (cpu_has_edat2()) hugetlb_cma_reserve(PUD_SHIFT - PAGE_SHIFT); reserve_crashkernel(); @@ -975,10 +985,7 @@ void __init setup_arch(char **cmdline_p) numa_setup(); smp_detect_cpus(); topology_init_early(); - - if (test_facility(193)) - static_branch_enable(&cpu_has_bear); - + setup_protection_map(); /* * Create kernel page tables. */ @@ -1006,3 +1013,8 @@ void __init setup_arch(char **cmdline_p) /* Add system specific data to the random pool */ setup_randomness(); } + +void __init arch_cpu_finalize_init(void) +{ + sclp_init(); +} diff --git a/arch/s390/kernel/skey.c b/arch/s390/kernel/skey.c new file mode 100644 index 000000000000..ba049fd103c2 --- /dev/null +++ b/arch/s390/kernel/skey.c @@ -0,0 +1,48 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <asm/rwonce.h> +#include <asm/page.h> +#include <asm/skey.h> + +int skey_regions_initialized; + +static inline unsigned long load_real_address(unsigned long address) +{ + unsigned long real; + + asm volatile( + " lra %[real],0(%[address])\n" + : [real] "=d" (real) + : [address] "a" (address) + : "cc"); + return real; +} + +/* + * Initialize storage keys of registered memory regions with the + * default key. This is useful for code which is executed with a + * non-default access key. + */ +void __skey_regions_initialize(void) +{ + unsigned long address, real; + struct skey_region *r, *end; + + r = __skey_region_start; + end = __skey_region_end; + while (r < end) { + address = r->start & PAGE_MASK; + do { + real = load_real_address(address); + page_set_storage_key(real, PAGE_DEFAULT_KEY, 1); + address += PAGE_SIZE; + } while (address < r->end); + r++; + } + /* + * Make sure storage keys are initialized before + * skey_regions_initialized is changed. + */ + barrier(); + WRITE_ONCE(skey_regions_initialized, 1); +} diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index 822d8e6f8717..e88ebe5339fc 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -18,6 +18,7 @@ #define KMSG_COMPONENT "cpu" #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt +#include <linux/cpufeature.h> #include <linux/workqueue.h> #include <linux/memblock.h> #include <linux/export.h> @@ -38,6 +39,7 @@ #include <linux/kprobes.h> #include <asm/access-regs.h> #include <asm/asm-offsets.h> +#include <asm/machine.h> #include <asm/ctlreg.h> #include <asm/pfault.h> #include <asm/diag.h> @@ -97,13 +99,6 @@ __vector128 __initdata boot_cpu_vector_save_area[__NUM_VXRS]; static unsigned int smp_max_threads __initdata = -1U; cpumask_t cpu_setup_mask; -static int __init early_nosmt(char *s) -{ - smp_max_threads = 1; - return 0; -} -early_param("nosmt", early_nosmt); - static int __init early_smt(char *s) { get_option(&s, &smp_max_threads); @@ -180,13 +175,10 @@ static struct pcpu *pcpu_find_address(const struct cpumask *mask, u16 address) static void pcpu_ec_call(struct pcpu *pcpu, int ec_bit) { - int order; - if (test_and_set_bit(ec_bit, &pcpu->ec_mask)) return; - order = pcpu_running(pcpu) ? SIGP_EXTERNAL_CALL : SIGP_EMERGENCY_SIGNAL; pcpu->ec_clk = get_tod_clock_fast(); - pcpu_sigp_retry(pcpu, order, 0); + pcpu_sigp_retry(pcpu, SIGP_EXTERNAL_CALL, 0); } static int pcpu_alloc_lowcore(struct pcpu *pcpu, int cpu) @@ -263,13 +255,12 @@ static void pcpu_prepare_secondary(struct pcpu *pcpu, int cpu) lc->percpu_offset = __per_cpu_offset[cpu]; lc->kernel_asce = get_lowcore()->kernel_asce; lc->user_asce = s390_invalid_asce; - lc->machine_flags = get_lowcore()->machine_flags; lc->user_timer = lc->system_timer = lc->steal_timer = lc->avg_steal_timer = 0; abs_lc = get_abs_lowcore(); memcpy(lc->cregs_save_area, abs_lc->cregs_save_area, sizeof(lc->cregs_save_area)); put_abs_lowcore(abs_lc); - lc->cregs_save_area[1] = lc->kernel_asce; + lc->cregs_save_area[1] = lc->user_asce; lc->cregs_save_area[7] = lc->user_asce; save_access_regs((unsigned int *) lc->access_regs_save_area); arch_spin_lock_setup(cpu); @@ -416,7 +407,7 @@ EXPORT_SYMBOL(arch_vcpu_is_preempted); void notrace smp_yield_cpu(int cpu) { - if (!MACHINE_HAS_DIAG9C) + if (!machine_has_diag9c()) return; diag_stat_inc_norecursion(DIAG_STAT_X09C); asm volatile("diag %0,0,0x9c" @@ -439,16 +430,16 @@ void notrace smp_emergency_stop(void) cpumask_copy(&cpumask, cpu_online_mask); cpumask_clear_cpu(smp_processor_id(), &cpumask); - end = get_tod_clock() + (1000000UL << 12); + end = get_tod_clock_monotonic() + (1000000UL << 12); for_each_cpu(cpu, &cpumask) { struct pcpu *pcpu = per_cpu_ptr(&pcpu_devices, cpu); set_bit(ec_stop_cpu, &pcpu->ec_mask); while (__pcpu_sigp(pcpu->address, SIGP_EMERGENCY_SIGNAL, 0, NULL) == SIGP_CC_BUSY && - get_tod_clock() < end) + get_tod_clock_monotonic() < end) cpu_relax(); } - while (get_tod_clock() < end) { + while (get_tod_clock_monotonic() < end) { for_each_cpu(cpu, &cpumask) if (pcpu_stopped(per_cpu_ptr(&pcpu_devices, cpu))) cpumask_clear_cpu(cpu, &cpumask); @@ -561,10 +552,10 @@ int smp_store_status(int cpu) if (__pcpu_sigp_relax(pcpu->address, SIGP_STORE_STATUS_AT_ADDRESS, pa) != SIGP_CC_ORDER_CODE_ACCEPTED) return -EIO; - if (!cpu_has_vx() && !MACHINE_HAS_GS) + if (!cpu_has_vx() && !cpu_has_gs()) return 0; pa = lc->mcesad & MCESA_ORIGIN_MASK; - if (MACHINE_HAS_GS) + if (cpu_has_gs()) pa |= lc->mcesad & MCESA_LC_MASK; if (__pcpu_sigp_relax(pcpu->address, SIGP_STORE_ADDITIONAL_STATUS, pa) != SIGP_CC_ORDER_CODE_ACCEPTED) @@ -611,9 +602,7 @@ void __init smp_save_dump_ipl_cpu(void) if (!dump_available()) return; sa = save_area_alloc(true); - regs = memblock_alloc(512, 8); - if (!sa || !regs) - panic("could not allocate memory for boot CPU save area\n"); + regs = memblock_alloc_or_panic(512, 8); copy_oldmem_kernel(regs, __LC_FPREGS_SAVE_AREA, 512); save_area_add_regs(sa, regs); memblock_free(regs, 512); @@ -646,8 +635,6 @@ void __init smp_save_dump_secondary_cpus(void) SIGP_CC_NOT_OPERATIONAL) continue; sa = save_area_alloc(false); - if (!sa) - panic("could not allocate memory for save area\n"); __pcpu_sigp_relax(addr, SIGP_STORE_STATUS_AT_ADDRESS, __pa(page)); save_area_add_regs(sa, page); if (cpu_has_vx()) { @@ -792,10 +779,7 @@ void __init smp_detect_cpus(void) u16 address; /* Get CPU information */ - info = memblock_alloc(sizeof(*info), 8); - if (!info) - panic("%s: Failed to allocate %zu bytes align=0x%x\n", - __func__, sizeof(*info), 8); + info = memblock_alloc_or_panic(sizeof(*info), 8); smp_get_core_info(info, 1); /* Find boot CPU type */ if (sclp.has_core_type) { @@ -814,6 +798,7 @@ void __init smp_detect_cpus(void) mtid = boot_core_type ? sclp.mtid : sclp.mtid_cp; mtid = (mtid < smp_max_threads) ? mtid : smp_max_threads - 1; pcpu_set_smt(mtid); + cpu_smt_set_num_threads(smp_cpu_mtid + 1, smp_cpu_mtid + 1); /* Print number of CPUs */ c_cpus = s_cpus = 0; diff --git a/arch/s390/kernel/stacktrace.c b/arch/s390/kernel/stacktrace.c index 40edfde25f5b..b153a395f46d 100644 --- a/arch/s390/kernel/stacktrace.c +++ b/arch/s390/kernel/stacktrace.c @@ -9,6 +9,7 @@ #include <linux/stacktrace.h> #include <linux/uaccess.h> #include <linux/compat.h> +#include <asm/asm-offsets.h> #include <asm/stacktrace.h> #include <asm/unwind.h> #include <asm/kprobes.h> diff --git a/arch/s390/kernel/sthyi.c b/arch/s390/kernel/sthyi.c index d40f0b983e74..f4ccdbed4b89 100644 --- a/arch/s390/kernel/sthyi.c +++ b/arch/s390/kernel/sthyi.c @@ -5,6 +5,8 @@ * Copyright IBM Corp. 2016 * Author(s): Janosch Frank <frankja@linux.vnet.ibm.com> */ + +#include <linux/export.h> #include <linux/errno.h> #include <linux/pagemap.h> #include <linux/vmalloc.h> diff --git a/arch/s390/kernel/syscall.c b/arch/s390/kernel/syscall.c index 5ec28028315b..4fee74553ca2 100644 --- a/arch/s390/kernel/syscall.c +++ b/arch/s390/kernel/syscall.c @@ -12,6 +12,7 @@ * platform. */ +#include <linux/cpufeature.h> #include <linux/errno.h> #include <linux/sched.h> #include <linux/mm.h> @@ -81,25 +82,35 @@ SYSCALL_DEFINE0(ni_syscall) return -ENOSYS; } -static void do_syscall(struct pt_regs *regs) +void noinstr __do_syscall(struct pt_regs *regs, int per_trap) { unsigned long nr; + add_random_kstack_offset(); + enter_from_user_mode(regs); + regs->psw = get_lowcore()->svc_old_psw; + regs->int_code = get_lowcore()->svc_int_code; + update_timer_sys(); + if (cpu_has_bear()) + current->thread.last_break = regs->last_break; + local_irq_enable(); + regs->orig_gpr2 = regs->gprs[2]; + if (unlikely(per_trap)) + set_thread_flag(TIF_PER_TRAP); + regs->flags = 0; + set_pt_regs_flag(regs, PIF_SYSCALL); nr = regs->int_code & 0xffff; - if (!nr) { + if (likely(!nr)) { nr = regs->gprs[1] & 0xffff; regs->int_code &= ~0xffffUL; regs->int_code |= nr; } - regs->gprs[2] = nr; - if (nr == __NR_restart_syscall && !(current->restart_block.arch_data & 1)) { regs->psw.addr = current->restart_block.arch_data; current->restart_block.arch_data = 1; } nr = syscall_enter_from_user_mode_work(regs, nr); - /* * In the s390 ptrace ABI, both the syscall number and the return value * use gpr2. However, userspace puts the syscall number either in the @@ -107,37 +118,11 @@ static void do_syscall(struct pt_regs *regs) * work, the ptrace code sets PIF_SYSCALL_RET_SET, which is checked here * and if set, the syscall will be skipped. */ - if (unlikely(test_and_clear_pt_regs_flag(regs, PIF_SYSCALL_RET_SET))) goto out; regs->gprs[2] = -ENOSYS; - if (likely(nr >= NR_syscalls)) - goto out; - do { + if (likely(nr < NR_syscalls)) regs->gprs[2] = current->thread.sys_call_table[nr](regs); - } while (test_and_clear_pt_regs_flag(regs, PIF_EXECVE_PGSTE_RESTART)); out: - syscall_exit_to_user_mode_work(regs); -} - -void noinstr __do_syscall(struct pt_regs *regs, int per_trap) -{ - add_random_kstack_offset(); - enter_from_user_mode(regs); - regs->psw = get_lowcore()->svc_old_psw; - regs->int_code = get_lowcore()->svc_int_code; - update_timer_sys(); - if (static_branch_likely(&cpu_has_bear)) - current->thread.last_break = regs->last_break; - - local_irq_enable(); - regs->orig_gpr2 = regs->gprs[2]; - - if (per_trap) - set_thread_flag(TIF_PER_TRAP); - - regs->flags = 0; - set_pt_regs_flag(regs, PIF_SYSCALL); - do_syscall(regs); - exit_to_user_mode(); + syscall_exit_to_user_mode(regs); } diff --git a/arch/s390/kernel/syscalls/syscall.tbl b/arch/s390/kernel/syscalls/syscall.tbl index e9115b4d8b63..8a6744d658db 100644 --- a/arch/s390/kernel/syscalls/syscall.tbl +++ b/arch/s390/kernel/syscalls/syscall.tbl @@ -469,3 +469,6 @@ 464 common getxattrat sys_getxattrat sys_getxattrat 465 common listxattrat sys_listxattrat sys_listxattrat 466 common removexattrat sys_removexattrat sys_removexattrat +467 common open_tree_attr sys_open_tree_attr sys_open_tree_attr +468 common file_getattr sys_file_getattr sys_file_getattr +469 common file_setattr sys_file_setattr sys_file_setattr diff --git a/arch/s390/kernel/sysinfo.c b/arch/s390/kernel/sysinfo.c index 88055f58fbda..1ea84e942bd4 100644 --- a/arch/s390/kernel/sysinfo.c +++ b/arch/s390/kernel/sysinfo.c @@ -5,6 +5,7 @@ * Martin Schwidefsky <schwidefsky@de.ibm.com>, */ +#include <linux/cpufeature.h> #include <linux/debugfs.h> #include <linux/kernel.h> #include <linux/mm.h> @@ -15,54 +16,17 @@ #include <linux/export.h> #include <linux/slab.h> #include <asm/asm-extable.h> +#include <asm/machine.h> #include <asm/ebcdic.h> #include <asm/debug.h> #include <asm/sysinfo.h> #include <asm/cpcmd.h> #include <asm/topology.h> #include <asm/fpu.h> +#include <asm/asm.h> int topology_max_mnest; -static inline int __stsi(void *sysinfo, int fc, int sel1, int sel2, int *lvl) -{ - int r0 = (fc << 28) | sel1; - int rc = 0; - - asm volatile( - " lr 0,%[r0]\n" - " lr 1,%[r1]\n" - " stsi 0(%[sysinfo])\n" - "0: jz 2f\n" - "1: lhi %[rc],%[retval]\n" - "2: lr %[r0],0\n" - EX_TABLE(0b, 1b) - : [r0] "+d" (r0), [rc] "+d" (rc) - : [r1] "d" (sel2), - [sysinfo] "a" (sysinfo), - [retval] "K" (-EOPNOTSUPP) - : "cc", "0", "1", "memory"); - *lvl = ((unsigned int) r0) >> 28; - return rc; -} - -/* - * stsi - store system information - * - * Returns the current configuration level if function code 0 was specified. - * Otherwise returns 0 on success or a negative value on error. - */ -int stsi(void *sysinfo, int fc, int sel1, int sel2) -{ - int lvl, rc; - - rc = __stsi(sysinfo, fc, sel1, sel2, &lvl); - if (rc) - return rc; - return fc ? 0 : lvl; -} -EXPORT_SYMBOL(stsi); - #ifdef CONFIG_PROC_FS static bool convert_ext_name(unsigned char encoding, char *name, size_t len) @@ -154,7 +118,7 @@ static void stsi_15_1_x(struct seq_file *m, struct sysinfo_15_1_x *info) int i; seq_putc(m, '\n'); - if (!MACHINE_HAS_TOPOLOGY) + if (!cpu_has_topology()) return; if (stsi(info, 15, 1, topology_max_mnest)) return; @@ -415,7 +379,7 @@ static struct service_level service_level_vm = { static __init int create_proc_service_level(void) { proc_create_seq("service_levels", 0, NULL, &service_level_seq_ops); - if (MACHINE_IS_VM) + if (machine_is_vm()) register_service_level(&service_level_vm); return 0; } @@ -559,7 +523,7 @@ static __init int stsi_init_debugfs(void) sf = &stsi_file[i]; debugfs_create_file(sf->name, 0400, stsi_root, NULL, sf->fops); } - if (IS_ENABLED(CONFIG_SCHED_TOPOLOGY) && MACHINE_HAS_TOPOLOGY) { + if (IS_ENABLED(CONFIG_SCHED_TOPOLOGY) && cpu_has_topology()) { char link_to[10]; sprintf(link_to, "15_1_%d", topology_mnest_limit()); diff --git a/arch/s390/kernel/text_amode31.S b/arch/s390/kernel/text_amode31.S index c0a70efa2426..26f2981aa09e 100644 --- a/arch/s390/kernel/text_amode31.S +++ b/arch/s390/kernel/text_amode31.S @@ -18,8 +18,7 @@ * affects a few functions that are not performance-relevant. */ .macro BR_EX_AMODE31_r14 - larl %r1,0f - ex 0,0(%r1) + exrl 0,0f j . 0: br %r14 .endm diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c index 34a65c141ea0..63517b85f4c9 100644 --- a/arch/s390/kernel/time.c +++ b/arch/s390/kernel/time.c @@ -54,10 +54,10 @@ #include <asm/cio.h> #include "entry.h" -union tod_clock tod_clock_base __section(".data"); +union tod_clock __bootdata_preserved(tod_clock_base); EXPORT_SYMBOL_GPL(tod_clock_base); -u64 clock_comparator_max = -1ULL; +u64 __bootdata_preserved(clock_comparator_max); EXPORT_SYMBOL_GPL(clock_comparator_max); static DEFINE_PER_CPU(struct clock_event_device, comparators); @@ -69,8 +69,6 @@ unsigned char ptff_function_mask[16]; static unsigned long lpar_offset; static unsigned long initial_leap_seconds; -static unsigned long tod_steering_end; -static long tod_steering_delta; /* * Get time offsets with PTFF @@ -79,12 +77,8 @@ void __init time_early_init(void) { struct ptff_qto qto; struct ptff_qui qui; - int cs; - /* Initialize TOD steering parameters */ - tod_steering_end = tod_clock_base.tod; - for (cs = 0; cs < CS_BASES; cs++) - vdso_data[cs].arch_data.tod_steering_end = tod_steering_end; + vdso_k_time_data->arch_data.tod_delta = tod_clock_base.tod; if (!test_facility(28)) return; @@ -228,21 +222,7 @@ void __init read_persistent_wall_and_boot_offset(struct timespec64 *wall_time, static u64 read_tod_clock(struct clocksource *cs) { - unsigned long now, adj; - - preempt_disable(); /* protect from changes to steering parameters */ - now = get_tod_clock(); - adj = tod_steering_end - now; - if (unlikely((s64) adj > 0)) - /* - * manually steer by 1 cycle every 2^16 cycles. This - * corresponds to shifting the tod delta by 15. 1s is - * therefore steered in ~9h. The adjust will decrease - * over time, until it finally reaches 0. - */ - now += (tod_steering_delta < 0) ? (adj >> 15) : -(adj >> 15); - preempt_enable(); - return now; + return get_tod_clock_monotonic(); } static struct clocksource clocksource_tod = { @@ -371,29 +351,11 @@ static inline int check_sync_clock(void) */ static void clock_sync_global(long delta) { - unsigned long now, adj; struct ptff_qto qto; - int cs; /* Fixup the monotonic sched clock. */ tod_clock_base.eitod += delta; - /* Adjust TOD steering parameters. */ - now = get_tod_clock(); - adj = tod_steering_end - now; - if (unlikely((s64) adj >= 0)) - /* Calculate how much of the old adjustment is left. */ - tod_steering_delta = (tod_steering_delta < 0) ? - -(adj >> 15) : (adj >> 15); - tod_steering_delta += delta; - if ((abs(tod_steering_delta) >> 48) != 0) - panic("TOD clock sync offset %li is too large to drift\n", - tod_steering_delta); - tod_steering_end = now + (abs(tod_steering_delta) << 15); - for (cs = 0; cs < CS_BASES; cs++) { - vdso_data[cs].arch_data.tod_steering_end = tod_steering_end; - vdso_data[cs].arch_data.tod_steering_delta = tod_steering_delta; - } - + vdso_k_time_data->arch_data.tod_delta = tod_clock_base.tod; /* Update LPAR offset. */ if (ptff_query(PTFF_QTO) && ptff(&qto, sizeof(qto), PTFF_QTO) == 0) lpar_offset = qto.tod_epoch_difference; @@ -435,7 +397,7 @@ struct clock_sync_data { /* * Server Time Protocol (STP) code. */ -static bool stp_online; +static bool stp_online = true; static struct stp_sstpi stp_info; static void *stp_page; @@ -461,7 +423,6 @@ static void __init stp_reset(void) if (rc == 0) set_bit(CLOCK_SYNC_HAS_STP, &clock_sync_flags); else if (stp_online) { - pr_warn("The real or virtual hardware system does not provide an STP interface\n"); free_page((unsigned long) stp_page); stp_page = NULL; stp_online = false; @@ -585,7 +546,7 @@ static int stp_sync_clock(void *data) atomic_dec(&sync->cpus); /* Wait for in_sync to be set. */ while (READ_ONCE(sync->in_sync) == 0) - __udelay(1); + ; } if (sync->in_sync != 1) /* Didn't work. Clear per-cpu in sync bit again. */ @@ -596,81 +557,6 @@ static int stp_sync_clock(void *data) return 0; } -static int stp_clear_leap(void) -{ - struct __kernel_timex txc; - int ret; - - memset(&txc, 0, sizeof(txc)); - - ret = do_adjtimex(&txc); - if (ret < 0) - return ret; - - txc.modes = ADJ_STATUS; - txc.status &= ~(STA_INS|STA_DEL); - return do_adjtimex(&txc); -} - -static void stp_check_leap(void) -{ - struct stp_stzi stzi; - struct stp_lsoib *lsoib = &stzi.lsoib; - struct __kernel_timex txc; - int64_t timediff; - int leapdiff, ret; - - if (!stp_info.lu || !check_sync_clock()) { - /* - * Either a scheduled leap second was removed by the operator, - * or STP is out of sync. In both cases, clear the leap second - * kernel flags. - */ - if (stp_clear_leap() < 0) - pr_err("failed to clear leap second flags\n"); - return; - } - - if (chsc_stzi(stp_page, &stzi, sizeof(stzi))) { - pr_err("stzi failed\n"); - return; - } - - timediff = tod_to_ns(lsoib->nlsout - get_tod_clock()) / NSEC_PER_SEC; - leapdiff = lsoib->nlso - lsoib->also; - - if (leapdiff != 1 && leapdiff != -1) { - pr_err("Cannot schedule %d leap seconds\n", leapdiff); - return; - } - - if (timediff < 0) { - if (stp_clear_leap() < 0) - pr_err("failed to clear leap second flags\n"); - } else if (timediff < 7200) { - memset(&txc, 0, sizeof(txc)); - ret = do_adjtimex(&txc); - if (ret < 0) - return; - - txc.modes = ADJ_STATUS; - if (leapdiff > 0) - txc.status |= STA_INS; - else - txc.status |= STA_DEL; - ret = do_adjtimex(&txc); - if (ret < 0) - pr_err("failed to set leap second flags\n"); - /* arm Timer to clear leap second flags */ - mod_timer(&stp_timer, jiffies + msecs_to_jiffies(14400 * MSEC_PER_SEC)); - } else { - /* The day the leap second is scheduled for hasn't been reached. Retry - * in one hour. - */ - mod_timer(&stp_timer, jiffies + msecs_to_jiffies(3600 * MSEC_PER_SEC)); - } -} - /* * STP work. Check for the STP state and take over the clock * synchronization if the STP clock source is usable. @@ -685,7 +571,7 @@ static void stp_work_fn(struct work_struct *work) if (!stp_online) { chsc_sstpc(stp_page, STP_OP_CTRL, 0x0000, NULL); - del_timer_sync(&stp_timer); + timer_delete_sync(&stp_timer); goto out_unlock; } @@ -712,8 +598,6 @@ static void stp_work_fn(struct work_struct *work) * Retry after a second. */ mod_timer(&stp_timer, jiffies + msecs_to_jiffies(MSEC_PER_SEC)); - else if (stp_info.lu) - stp_check_leap(); out_unlock: mutex_unlock(&stp_mutex); diff --git a/arch/s390/kernel/topology.c b/arch/s390/kernel/topology.c index 4f9c301a705b..46569b8e47dd 100644 --- a/arch/s390/kernel/topology.c +++ b/arch/s390/kernel/topology.c @@ -6,6 +6,7 @@ #define KMSG_COMPONENT "cpu" #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt +#include <linux/cpufeature.h> #include <linux/workqueue.h> #include <linux/memblock.h> #include <linux/uaccess.h> @@ -240,7 +241,7 @@ int topology_set_cpu_management(int fc) { int cpu, rc; - if (!MACHINE_HAS_TOPOLOGY) + if (!cpu_has_topology()) return -EOPNOTSUPP; if (fc) rc = ptf(PTF_VERTICAL); @@ -315,13 +316,13 @@ static int __arch_update_cpu_topology(void) hd_status = 0; rc = 0; mutex_lock(&smp_cpu_state_mutex); - if (MACHINE_HAS_TOPOLOGY) { + if (cpu_has_topology()) { rc = 1; store_topology(info); tl_to_masks(info); } update_cpu_masks(); - if (!MACHINE_HAS_TOPOLOGY) + if (!cpu_has_topology()) topology_update_polarization_simple(); if (cpu_management == 1) hd_status = hd_enable_hiperdispatch(); @@ -371,12 +372,12 @@ static void set_topology_timer(void) if (atomic_add_unless(&topology_poll, -1, 0)) mod_timer(&topology_timer, jiffies + msecs_to_jiffies(100)); else - mod_timer(&topology_timer, jiffies + msecs_to_jiffies(60 * MSEC_PER_SEC)); + mod_timer(&topology_timer, jiffies + secs_to_jiffies(60)); } void topology_expect_change(void) { - if (!MACHINE_HAS_TOPOLOGY) + if (!cpu_has_topology()) return; /* This is racy, but it doesn't matter since it is just a heuristic. * Worst case is that we poll in a higher frequency for a bit longer. @@ -500,7 +501,7 @@ int topology_cpu_init(struct cpu *cpu) int rc; rc = sysfs_create_group(&cpu->dev.kobj, &topology_cpu_attr_group); - if (rc || !MACHINE_HAS_TOPOLOGY) + if (rc || !cpu_has_topology()) return rc; rc = sysfs_create_group(&cpu->dev.kobj, &topology_extra_cpu_attr_group); if (rc) @@ -530,11 +531,11 @@ static const struct cpumask *cpu_drawer_mask(int cpu) } static struct sched_domain_topology_level s390_topology[] = { - { cpu_thread_mask, cpu_smt_flags, SD_INIT_NAME(SMT) }, - { cpu_coregroup_mask, cpu_core_flags, SD_INIT_NAME(MC) }, - { cpu_book_mask, SD_INIT_NAME(BOOK) }, - { cpu_drawer_mask, SD_INIT_NAME(DRAWER) }, - { cpu_cpu_mask, SD_INIT_NAME(PKG) }, + SDTL_INIT(cpu_thread_mask, cpu_smt_flags, SMT), + SDTL_INIT(cpu_coregroup_mask, cpu_core_flags, MC), + SDTL_INIT(cpu_book_mask, NULL, BOOK), + SDTL_INIT(cpu_drawer_mask, NULL, DRAWER), + SDTL_INIT(cpu_cpu_mask, NULL, PKG), { NULL, }, }; @@ -548,33 +549,38 @@ static void __init alloc_masks(struct sysinfo_15_1_x *info, nr_masks *= info->mag[TOPOLOGY_NR_MAG - offset - 1 - i]; nr_masks = max(nr_masks, 1); for (i = 0; i < nr_masks; i++) { - mask->next = memblock_alloc(sizeof(*mask->next), 8); - if (!mask->next) - panic("%s: Failed to allocate %zu bytes align=0x%x\n", - __func__, sizeof(*mask->next), 8); + mask->next = memblock_alloc_or_panic(sizeof(*mask->next), 8); mask = mask->next; } } +static int __init detect_polarization(union topology_entry *tle) +{ + struct topology_core *tl_core; + + while (tle->nl) + tle = next_tle(tle); + tl_core = (struct topology_core *)tle; + return tl_core->pp != POLARIZATION_HRZ; +} + void __init topology_init_early(void) { struct sysinfo_15_1_x *info; set_sched_topology(s390_topology); if (topology_mode == TOPOLOGY_MODE_UNINITIALIZED) { - if (MACHINE_HAS_TOPOLOGY) + if (cpu_has_topology()) topology_mode = TOPOLOGY_MODE_HW; else topology_mode = TOPOLOGY_MODE_SINGLE; } - if (!MACHINE_HAS_TOPOLOGY) + if (!cpu_has_topology()) goto out; - tl_info = memblock_alloc(PAGE_SIZE, PAGE_SIZE); - if (!tl_info) - panic("%s: Failed to allocate %lu bytes align=0x%lx\n", - __func__, PAGE_SIZE, PAGE_SIZE); + tl_info = memblock_alloc_or_panic(PAGE_SIZE, PAGE_SIZE); info = tl_info; store_topology(info); + cpu_management = detect_polarization(info->tle); pr_info("The CPU configuration topology of the machine is: %d %d %d %d %d %d / %d\n", info->mag[0], info->mag[1], info->mag[2], info->mag[3], info->mag[4], info->mag[5], info->mnest); @@ -591,7 +597,7 @@ static inline int topology_get_mode(int enabled) { if (!enabled) return TOPOLOGY_MODE_SINGLE; - return MACHINE_HAS_TOPOLOGY ? TOPOLOGY_MODE_HW : TOPOLOGY_MODE_PACKAGE; + return cpu_has_topology() ? TOPOLOGY_MODE_HW : TOPOLOGY_MODE_PACKAGE; } static inline int topology_is_enabled(void) @@ -662,7 +668,7 @@ static int polarization_ctl_handler(const struct ctl_table *ctl, int write, return set_polarization(polarization); } -static struct ctl_table topology_ctl_table[] = { +static const struct ctl_table topology_ctl_table[] = { { .procname = "topology", .mode = 0644, @@ -681,7 +687,7 @@ static int __init topology_init(void) int rc = 0; timer_setup(&topology_timer, topology_timer_fn, TIMER_DEFERRABLE); - if (MACHINE_HAS_TOPOLOGY) + if (cpu_has_topology()) set_topology_timer(); else topology_update_polarization_simple(); diff --git a/arch/s390/kernel/traps.c b/arch/s390/kernel/traps.c index 24fee11b030d..19687dab32f7 100644 --- a/arch/s390/kernel/traps.c +++ b/arch/s390/kernel/traps.c @@ -3,18 +3,13 @@ * S390 version * Copyright IBM Corp. 1999, 2000 * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com), - * Denis Joseph Barrow (djbarrow@de.ibm.com,barrow_dj@yahoo.com), + * Denis Joseph Barrow (djbarrow@de.ibm.com,barrow_dj@yahoo.com), * * Derived from "arch/i386/kernel/traps.c" * Copyright (C) 1991, 1992 Linus Torvalds */ -/* - * 'Traps.c' handles hardware traps and faults after we have saved some - * state in 'asm.s'. - */ -#include "asm/irqflags.h" -#include "asm/ptrace.h" +#include <linux/cpufeature.h> #include <linux/kprobes.h> #include <linux/kdebug.h> #include <linux/randomize_kstack.h> @@ -29,6 +24,8 @@ #include <linux/entry-common.h> #include <linux/kmsan.h> #include <asm/asm-extable.h> +#include <asm/irqflags.h> +#include <asm/ptrace.h> #include <asm/vtime.h> #include <asm/fpu.h> #include <asm/fault.h> @@ -42,7 +39,7 @@ static inline void __user *get_trap_ip(struct pt_regs *regs) address = current->thread.trap_tdb.data[3]; else address = regs->psw.addr; - return (void __user *) (address - (regs->int_code >> 16)); + return (void __user *)(address - (regs->int_code >> 16)); } #ifdef CONFIG_GENERIC_BUG @@ -57,16 +54,15 @@ void do_report_trap(struct pt_regs *regs, int si_signo, int si_code, char *str) if (user_mode(regs)) { force_sig_fault(si_signo, si_code, get_trap_ip(regs)); report_user_fault(regs, si_signo, 0); - } else { + } else { if (!fixup_exception(regs)) die(regs, str); - } + } } static void do_trap(struct pt_regs *regs, int si_signo, int si_code, char *str) { - if (notify_die(DIE_TRAP, str, regs, 0, - regs->int_code, si_signo) == NOTIFY_STOP) + if (notify_die(DIE_TRAP, str, regs, 0, regs->int_code, si_signo) == NOTIFY_STOP) return; do_report_trap(regs, si_signo, si_code, str); } @@ -78,8 +74,7 @@ void do_per_trap(struct pt_regs *regs) return; if (!current->ptrace) return; - force_sig_fault(SIGTRAP, TRAP_HWBKPT, - (void __force __user *) current->thread.per_event.address); + force_sig_fault(SIGTRAP, TRAP_HWBKPT, (void __force __user *)current->thread.per_event.address); } NOKPROBE_SYMBOL(do_per_trap); @@ -98,36 +93,25 @@ static void name(struct pt_regs *regs) \ do_trap(regs, signr, sicode, str); \ } -DO_ERROR_INFO(addressing_exception, SIGILL, ILL_ILLADR, - "addressing exception") -DO_ERROR_INFO(execute_exception, SIGILL, ILL_ILLOPN, - "execute exception") -DO_ERROR_INFO(divide_exception, SIGFPE, FPE_INTDIV, - "fixpoint divide exception") -DO_ERROR_INFO(overflow_exception, SIGFPE, FPE_INTOVF, - "fixpoint overflow exception") -DO_ERROR_INFO(hfp_overflow_exception, SIGFPE, FPE_FLTOVF, - "HFP overflow exception") -DO_ERROR_INFO(hfp_underflow_exception, SIGFPE, FPE_FLTUND, - "HFP underflow exception") -DO_ERROR_INFO(hfp_significance_exception, SIGFPE, FPE_FLTRES, - "HFP significance exception") -DO_ERROR_INFO(hfp_divide_exception, SIGFPE, FPE_FLTDIV, - "HFP divide exception") -DO_ERROR_INFO(hfp_sqrt_exception, SIGFPE, FPE_FLTINV, - "HFP square root exception") -DO_ERROR_INFO(operand_exception, SIGILL, ILL_ILLOPN, - "operand exception") -DO_ERROR_INFO(privileged_op, SIGILL, ILL_PRVOPC, - "privileged operation") -DO_ERROR_INFO(special_op_exception, SIGILL, ILL_ILLOPN, - "special operation exception") -DO_ERROR_INFO(transaction_exception, SIGILL, ILL_ILLOPN, - "transaction constraint exception") +DO_ERROR_INFO(addressing_exception, SIGILL, ILL_ILLADR, "addressing exception") +DO_ERROR_INFO(divide_exception, SIGFPE, FPE_INTDIV, "fixpoint divide exception") +DO_ERROR_INFO(execute_exception, SIGILL, ILL_ILLOPN, "execute exception") +DO_ERROR_INFO(hfp_divide_exception, SIGFPE, FPE_FLTDIV, "HFP divide exception") +DO_ERROR_INFO(hfp_overflow_exception, SIGFPE, FPE_FLTOVF, "HFP overflow exception") +DO_ERROR_INFO(hfp_significance_exception, SIGFPE, FPE_FLTRES, "HFP significance exception") +DO_ERROR_INFO(hfp_sqrt_exception, SIGFPE, FPE_FLTINV, "HFP square root exception") +DO_ERROR_INFO(hfp_underflow_exception, SIGFPE, FPE_FLTUND, "HFP underflow exception") +DO_ERROR_INFO(operand_exception, SIGILL, ILL_ILLOPN, "operand exception") +DO_ERROR_INFO(overflow_exception, SIGFPE, FPE_INTOVF, "fixpoint overflow exception") +DO_ERROR_INFO(privileged_op, SIGILL, ILL_PRVOPC, "privileged operation") +DO_ERROR_INFO(special_op_exception, SIGILL, ILL_ILLOPN, "special operation exception") +DO_ERROR_INFO(specification_exception, SIGILL, ILL_ILLOPN, "specification exception"); +DO_ERROR_INFO(transaction_exception, SIGILL, ILL_ILLOPN, "transaction constraint exception") static inline void do_fp_trap(struct pt_regs *regs, __u32 fpc) { int si_code = 0; + /* FPC[2] is Data Exception Code */ if ((fpc & 0x00000300) == 0) { /* bits 6 and 7 of DXC are 0 iff IEEE exception */ @@ -153,36 +137,35 @@ static void translation_specification_exception(struct pt_regs *regs) static void illegal_op(struct pt_regs *regs) { - __u8 opcode[6]; - __u16 __user *location; int is_uprobe_insn = 0; + u16 __user *location; int signal = 0; + u16 opcode; location = get_trap_ip(regs); - if (user_mode(regs)) { - if (get_user(*((__u16 *) opcode), (__u16 __user *) location)) + if (get_user(opcode, location)) return; - if (*((__u16 *) opcode) == S390_BREAKPOINT_U16) { + if (opcode == S390_BREAKPOINT_U16) { if (current->ptrace) force_sig_fault(SIGTRAP, TRAP_BRKPT, location); else signal = SIGILL; #ifdef CONFIG_UPROBES - } else if (*((__u16 *) opcode) == UPROBE_SWBP_INSN) { + } else if (opcode == UPROBE_SWBP_INSN) { is_uprobe_insn = 1; #endif - } else + } else { signal = SIGILL; + } } /* - * We got either an illegal op in kernel mode, or user space trapped + * This is either an illegal op in kernel mode, or user space trapped * on a uprobes illegal instruction. See if kprobes or uprobes picks * it up. If not, SIGILL. */ if (is_uprobe_insn || !user_mode(regs)) { - if (notify_die(DIE_BPT, "bpt", regs, 0, - 3, SIGTRAP) != NOTIFY_STOP) + if (notify_die(DIE_BPT, "bpt", regs, 0, 3, SIGTRAP) != NOTIFY_STOP) signal = SIGILL; } if (signal) @@ -190,18 +173,10 @@ static void illegal_op(struct pt_regs *regs) } NOKPROBE_SYMBOL(illegal_op); -DO_ERROR_INFO(specification_exception, SIGILL, ILL_ILLOPN, - "specification exception"); - static void vector_exception(struct pt_regs *regs) { int si_code, vic; - if (!cpu_has_vx()) { - do_trap(regs, SIGILL, ILL_ILLOPN, "illegal operation"); - return; - } - /* get vector interrupt code from fpc */ save_user_fpu_regs(); vic = (current->thread.ufpu.fpc & 0xf00) >> 8; @@ -249,7 +224,6 @@ static void monitor_event_exception(struct pt_regs *regs) { if (user_mode(regs)) return; - switch (report_bug(regs->psw.addr - (regs->int_code >> 16), regs)) { case BUG_TRAP_TYPE_NONE: fixup_exception(regs); @@ -262,7 +236,7 @@ static void monitor_event_exception(struct pt_regs *regs) } } -void kernel_stack_overflow(struct pt_regs *regs) +void kernel_stack_invalid(struct pt_regs *regs) { /* * Normally regs are unpoisoned by the generic entry code, but @@ -270,12 +244,12 @@ void kernel_stack_overflow(struct pt_regs *regs) */ kmsan_unpoison_entry_regs(regs); bust_spinlocks(1); - printk("Kernel stack overflow.\n"); + pr_emerg("Kernel stack pointer invalid\n"); show_regs(regs); bust_spinlocks(0); - panic("Corrupt kernel stack, can't continue."); + panic("Invalid kernel stack pointer, cannot continue"); } -NOKPROBE_SYMBOL(kernel_stack_overflow); +NOKPROBE_SYMBOL(kernel_stack_invalid); static void __init test_monitor_call(void) { @@ -283,12 +257,12 @@ static void __init test_monitor_call(void) if (!IS_ENABLED(CONFIG_BUG)) return; - asm volatile( + asm_inline volatile( " mc 0,0\n" - "0: xgr %0,%0\n" + "0: lhi %[val],0\n" "1:\n" - EX_TABLE(0b,1b) - : "+d" (val)); + EX_TABLE(0b, 1b) + : [val] "+d" (val)); if (!val) panic("Monitor call doesn't work!\n"); } @@ -323,7 +297,6 @@ void noinstr __do_pgm_check(struct pt_regs *regs) teid.val = lc->trans_exc_code; regs->int_code = lc->pgm_int_code; regs->int_parm_long = teid.val; - /* * In case of a guest fault, short-circuit the fault handler and return. * This way the sie64a() function will return 0; fault address and @@ -336,23 +309,19 @@ void noinstr __do_pgm_check(struct pt_regs *regs) current->thread.gmap_int_code = regs->int_code & 0xffff; return; } - state = irqentry_enter(regs); - if (user_mode(regs)) { update_timer_sys(); - if (!static_branch_likely(&cpu_has_bear)) { + if (!cpu_has_bear()) { if (regs->last_break < 4096) regs->last_break = 1; } current->thread.last_break = regs->last_break; } - if (lc->pgm_code & 0x0200) { /* transaction abort */ current->thread.trap_tdb = lc->pgm_tdb; } - if (lc->pgm_code & PGM_INT_CODE_PER) { if (user_mode(regs)) { struct per_event *ev = ¤t->thread.per_event; @@ -368,11 +337,9 @@ void noinstr __do_pgm_check(struct pt_regs *regs) goto out; } } - if (!irqs_disabled_flags(regs->psw.mask)) trace_hardirqs_on(); __arch_local_irq_ssm(regs->psw.mask & ~PSW_MASK_PER); - trapnr = regs->int_code & PGM_INT_CODE_MASK; if (trapnr) pgm_check_table[trapnr](regs); diff --git a/arch/s390/kernel/unwind_bc.c b/arch/s390/kernel/unwind_bc.c index cd44be2b6ce8..0f88caca4eaf 100644 --- a/arch/s390/kernel/unwind_bc.c +++ b/arch/s390/kernel/unwind_bc.c @@ -1,4 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 */ + +#include <linux/export.h> #include <linux/sched.h> #include <linux/sched/task.h> #include <linux/sched/task_stack.h> diff --git a/arch/s390/kernel/uv.c b/arch/s390/kernel/uv.c index 6f9654a191ad..47f574cd1728 100644 --- a/arch/s390/kernel/uv.c +++ b/arch/s390/kernel/uv.c @@ -7,6 +7,7 @@ #define KMSG_COMPONENT "prot_virt" #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt +#include <linux/export.h> #include <linux/kernel.h> #include <linux/types.h> #include <linux/sizes.h> @@ -15,23 +16,11 @@ #include <linux/pagemap.h> #include <linux/swap.h> #include <linux/pagewalk.h> +#include <linux/backing-dev.h> #include <asm/facility.h> #include <asm/sections.h> #include <asm/uv.h> -#if !IS_ENABLED(CONFIG_KVM) -unsigned long __gmap_translate(struct gmap *gmap, unsigned long gaddr) -{ - return 0; -} - -int gmap_fault(struct gmap *gmap, unsigned long gaddr, - unsigned int fault_flags) -{ - return 0; -} -#endif - /* the bootdata_preserved fields come from ones in arch/s390/boot/uv.c */ int __bootdata_preserved(prot_virt_guest); EXPORT_SYMBOL(prot_virt_guest); @@ -148,7 +137,7 @@ int uv_destroy_folio(struct folio *folio) { int rc; - /* See gmap_make_secure(): large folios cannot be secure */ + /* Large folios cannot be secure */ if (unlikely(folio_test_large(folio))) return 0; @@ -159,6 +148,7 @@ int uv_destroy_folio(struct folio *folio) folio_put(folio); return rc; } +EXPORT_SYMBOL(uv_destroy_folio); /* * The present PTE still indirectly holds a folio reference through the mapping. @@ -175,7 +165,7 @@ int uv_destroy_pte(pte_t pte) * * @paddr: Absolute host address of page to be exported */ -static int uv_convert_from_secure(unsigned long paddr) +int uv_convert_from_secure(unsigned long paddr) { struct uv_cb_cfs uvcb = { .header.cmd = UVC_CMD_CONV_FROM_SEC_STOR, @@ -187,15 +177,16 @@ static int uv_convert_from_secure(unsigned long paddr) return -EINVAL; return 0; } +EXPORT_SYMBOL_GPL(uv_convert_from_secure); /* * The caller must already hold a reference to the folio. */ -static int uv_convert_from_secure_folio(struct folio *folio) +int uv_convert_from_secure_folio(struct folio *folio) { int rc; - /* See gmap_make_secure(): large folios cannot be secure */ + /* Large folios cannot be secure */ if (unlikely(folio_test_large(folio))) return 0; @@ -206,6 +197,7 @@ static int uv_convert_from_secure_folio(struct folio *folio) folio_put(folio); return rc; } +EXPORT_SYMBOL_GPL(uv_convert_from_secure_folio); /* * The present PTE still indirectly holds a folio reference through the mapping. @@ -216,6 +208,39 @@ int uv_convert_from_secure_pte(pte_t pte) return uv_convert_from_secure_folio(pfn_folio(pte_pfn(pte))); } +/** + * should_export_before_import - Determine whether an export is needed + * before an import-like operation + * @uvcb: the Ultravisor control block of the UVC to be performed + * @mm: the mm of the process + * + * Returns whether an export is needed before every import-like operation. + * This is needed for shared pages, which don't trigger a secure storage + * exception when accessed from a different guest. + * + * Although considered as one, the Unpin Page UVC is not an actual import, + * so it is not affected. + * + * No export is needed also when there is only one protected VM, because the + * page cannot belong to the wrong VM in that case (there is no "other VM" + * it can belong to). + * + * Return: true if an export is needed before every import, otherwise false. + */ +static bool should_export_before_import(struct uv_cb_header *uvcb, struct mm_struct *mm) +{ + /* + * The misc feature indicates, among other things, that importing a + * shared page from a different protected VM will automatically also + * transfer its ownership. + */ + if (uv_has_feature(BIT_UV_FEAT_MISC)) + return false; + if (uvcb->cmd == UVC_CMD_UNPIN_PAGE_SHARED) + return false; + return atomic_read(&mm->context.protected_count) > 1; +} + /* * Calculate the expected ref_count for a folio that would otherwise have no * further pins. This was cribbed from similar functions in other places in @@ -237,13 +262,31 @@ static int expected_folio_refs(struct folio *folio) return res; } -static int make_folio_secure(struct folio *folio, struct uv_cb_header *uvcb) +/** + * __make_folio_secure() - make a folio secure + * @folio: the folio to make secure + * @uvcb: the uvcb that describes the UVC to be used + * + * The folio @folio will be made secure if possible, @uvcb will be passed + * as-is to the UVC. + * + * Return: 0 on success; + * -EBUSY if the folio is in writeback or has too many references; + * -EAGAIN if the UVC needs to be attempted again; + * -ENXIO if the address is not mapped; + * -EINVAL if the UVC failed for other reasons. + * + * Context: The caller must hold exactly one extra reference on the folio + * (it's the same logic as split_folio()), and the folio must be + * locked. + */ +static int __make_folio_secure(struct folio *folio, struct uv_cb_header *uvcb) { int expected, cc = 0; if (folio_test_writeback(folio)) - return -EAGAIN; - expected = expected_folio_refs(folio); + return -EBUSY; + expected = expected_folio_refs(folio) + 1; if (!folio_ref_freeze(folio, expected)) return -EBUSY; set_bit(PG_arch_1, &folio->flags); @@ -268,262 +311,167 @@ static int make_folio_secure(struct folio *folio, struct uv_cb_header *uvcb) return uvcb->rc == 0x10a ? -ENXIO : -EINVAL; } -/** - * should_export_before_import - Determine whether an export is needed - * before an import-like operation - * @uvcb: the Ultravisor control block of the UVC to be performed - * @mm: the mm of the process - * - * Returns whether an export is needed before every import-like operation. - * This is needed for shared pages, which don't trigger a secure storage - * exception when accessed from a different guest. - * - * Although considered as one, the Unpin Page UVC is not an actual import, - * so it is not affected. - * - * No export is needed also when there is only one protected VM, because the - * page cannot belong to the wrong VM in that case (there is no "other VM" - * it can belong to). - * - * Return: true if an export is needed before every import, otherwise false. - */ -static bool should_export_before_import(struct uv_cb_header *uvcb, struct mm_struct *mm) +static int make_folio_secure(struct mm_struct *mm, struct folio *folio, struct uv_cb_header *uvcb) { - /* - * The misc feature indicates, among other things, that importing a - * shared page from a different protected VM will automatically also - * transfer its ownership. - */ - if (uv_has_feature(BIT_UV_FEAT_MISC)) - return false; - if (uvcb->cmd == UVC_CMD_UNPIN_PAGE_SHARED) - return false; - return atomic_read(&mm->context.protected_count) > 1; -} + int rc; -/* - * Drain LRU caches: the local one on first invocation and the ones of all - * CPUs on successive invocations. Returns "true" on the first invocation. - */ -static bool drain_lru(bool *drain_lru_called) -{ - /* - * If we have tried a local drain and the folio refcount - * still does not match our expected safe value, try with a - * system wide drain. This is needed if the pagevecs holding - * the page are on a different CPU. - */ - if (*drain_lru_called) { - lru_add_drain_all(); - /* We give up here, don't retry immediately. */ - return false; - } - /* - * We are here if the folio refcount does not match the - * expected safe value. The main culprits are usually - * pagevecs. With lru_add_drain() we drain the pagevecs - * on the local CPU so that hopefully the refcount will - * reach the expected safe value. - */ - lru_add_drain(); - *drain_lru_called = true; - /* The caller should try again immediately */ - return true; + if (!folio_trylock(folio)) + return -EAGAIN; + if (should_export_before_import(uvcb, mm)) + uv_convert_from_secure(folio_to_phys(folio)); + rc = __make_folio_secure(folio, uvcb); + folio_unlock(folio); + + return rc; } -/* - * Requests the Ultravisor to make a page accessible to a guest. - * If it's brought in the first time, it will be cleared. If - * it has been exported before, it will be decrypted and integrity - * checked. +/** + * s390_wiggle_split_folio() - try to drain extra references to a folio and + * split the folio if it is large. + * @mm: the mm containing the folio to work on + * @folio: the folio + * + * Context: Must be called while holding an extra reference to the folio; + * the mm lock should not be held. + * Return: 0 if the operation was successful; + * -EAGAIN if splitting the large folio was not successful, + * but another attempt can be made; + * -EINVAL in case of other folio splitting errors. See split_folio(). */ -int gmap_make_secure(struct gmap *gmap, unsigned long gaddr, void *uvcb) +static int s390_wiggle_split_folio(struct mm_struct *mm, struct folio *folio) { - struct vm_area_struct *vma; - bool drain_lru_called = false; - spinlock_t *ptelock; - unsigned long uaddr; - struct folio *folio; - pte_t *ptep; - int rc; + int rc, tried_splits; -again: - rc = -EFAULT; - mmap_read_lock(gmap->mm); + lockdep_assert_not_held(&mm->mmap_lock); + folio_wait_writeback(folio); + lru_add_drain_all(); - uaddr = __gmap_translate(gmap, gaddr); - if (IS_ERR_VALUE(uaddr)) - goto out; - vma = vma_lookup(gmap->mm, uaddr); - if (!vma) - goto out; - /* - * Secure pages cannot be huge and userspace should not combine both. - * In case userspace does it anyway this will result in an -EFAULT for - * the unpack. The guest is thus never reaching secure mode. If - * userspace is playing dirty tricky with mapping huge pages later - * on this will result in a segmentation fault. - */ - if (is_vm_hugetlb_page(vma)) - goto out; - - rc = -ENXIO; - ptep = get_locked_pte(gmap->mm, uaddr, &ptelock); - if (!ptep) - goto out; - if (pte_present(*ptep) && !(pte_val(*ptep) & _PAGE_INVALID) && pte_write(*ptep)) { - folio = page_folio(pte_page(*ptep)); - rc = -EAGAIN; - if (folio_test_large(folio)) { - rc = -E2BIG; - } else if (folio_trylock(folio)) { - if (should_export_before_import(uvcb, gmap->mm)) - uv_convert_from_secure(PFN_PHYS(folio_pfn(folio))); - rc = make_folio_secure(folio, uvcb); + if (!folio_test_large(folio)) + return 0; + + for (tried_splits = 0; tried_splits < 2; tried_splits++) { + struct address_space *mapping; + loff_t lstart, lend; + struct inode *inode; + + folio_lock(folio); + rc = split_folio(folio); + if (rc != -EBUSY) { folio_unlock(folio); + return rc; } /* - * Once we drop the PTL, the folio may get unmapped and - * freed immediately. We need a temporary reference. + * Splitting with -EBUSY can fail for various reasons, but we + * have to handle one case explicitly for now: some mappings + * don't allow for splitting dirty folios; writeback will + * mark them clean again, including marking all page table + * entries mapping the folio read-only, to catch future write + * attempts. + * + * While the system should be writing back dirty folios in the + * background, we obtained this folio by looking up a writable + * page table entry. On these problematic mappings, writable + * page table entries imply dirty folios, preventing the + * split in the first place. + * + * To prevent a livelock when trigger writeback manually and + * letting the caller look up the folio again in the page + * table (turning it dirty), immediately try to split again. + * + * This is only a problem for some mappings (e.g., XFS); + * mappings that do not support writeback (e.g., shmem) do not + * apply. */ - if (rc == -EAGAIN || rc == -E2BIG) - folio_get(folio); - } - pte_unmap_unlock(ptep, ptelock); -out: - mmap_read_unlock(gmap->mm); - - switch (rc) { - case -E2BIG: - folio_lock(folio); - rc = split_folio(folio); - folio_unlock(folio); - folio_put(folio); - - switch (rc) { - case 0: - /* Splitting succeeded, try again immediately. */ - goto again; - case -EAGAIN: - /* Additional folio references. */ - if (drain_lru(&drain_lru_called)) - goto again; - return -EAGAIN; - case -EBUSY: - /* Unexpected race. */ - return -EAGAIN; + if (!folio_test_dirty(folio) || folio_test_anon(folio) || + !folio->mapping || !mapping_can_writeback(folio->mapping)) { + folio_unlock(folio); + break; } - WARN_ON_ONCE(1); - return -ENXIO; - case -EAGAIN: + /* - * If we are here because the UVC returned busy or partial - * completion, this is just a useless check, but it is safe. + * Ideally, we'd only trigger writeback on this exact folio. But + * there is no easy way to do that, so we'll stabilize the + * mapping while we still hold the folio lock, so we can drop + * the folio lock to trigger writeback on the range currently + * covered by the folio instead. */ - folio_wait_writeback(folio); - folio_put(folio); - return -EAGAIN; - case -EBUSY: - /* Additional folio references. */ - if (drain_lru(&drain_lru_called)) - goto again; - return -EAGAIN; - case -ENXIO: - if (gmap_fault(gmap, gaddr, FAULT_FLAG_WRITE)) - return -EFAULT; - return -EAGAIN; - } - return rc; -} -EXPORT_SYMBOL_GPL(gmap_make_secure); + mapping = folio->mapping; + lstart = folio_pos(folio); + lend = lstart + folio_size(folio) - 1; + inode = igrab(mapping->host); + folio_unlock(folio); -int gmap_convert_to_secure(struct gmap *gmap, unsigned long gaddr) -{ - struct uv_cb_cts uvcb = { - .header.cmd = UVC_CMD_CONV_TO_SEC_STOR, - .header.len = sizeof(uvcb), - .guest_handle = gmap->guest_handle, - .gaddr = gaddr, - }; + if (unlikely(!inode)) + break; - return gmap_make_secure(gmap, gaddr, &uvcb); + filemap_write_and_wait_range(mapping, lstart, lend); + iput(mapping->host); + } + return -EAGAIN; } -EXPORT_SYMBOL_GPL(gmap_convert_to_secure); -/** - * gmap_destroy_page - Destroy a guest page. - * @gmap: the gmap of the guest - * @gaddr: the guest address to destroy - * - * An attempt will be made to destroy the given guest page. If the attempt - * fails, an attempt is made to export the page. If both attempts fail, an - * appropriate error is returned. - */ -int gmap_destroy_page(struct gmap *gmap, unsigned long gaddr) +int make_hva_secure(struct mm_struct *mm, unsigned long hva, struct uv_cb_header *uvcb) { struct vm_area_struct *vma; struct folio_walk fw; - unsigned long uaddr; struct folio *folio; int rc; - rc = -EFAULT; - mmap_read_lock(gmap->mm); - - uaddr = __gmap_translate(gmap, gaddr); - if (IS_ERR_VALUE(uaddr)) - goto out; - vma = vma_lookup(gmap->mm, uaddr); - if (!vma) - goto out; - /* - * Huge pages should not be able to become secure - */ - if (is_vm_hugetlb_page(vma)) - goto out; + mmap_read_lock(mm); + vma = vma_lookup(mm, hva); + if (!vma) { + mmap_read_unlock(mm); + return -EFAULT; + } + folio = folio_walk_start(&fw, vma, hva, 0); + if (!folio) { + mmap_read_unlock(mm); + return -ENXIO; + } - rc = 0; - folio = folio_walk_start(&fw, vma, uaddr, 0); - if (!folio) - goto out; - /* - * See gmap_make_secure(): large folios cannot be secure. Small - * folio implies FW_LEVEL_PTE. - */ - if (folio_test_large(folio) || !pte_write(fw.pte)) - goto out_walk_end; - rc = uv_destroy_folio(folio); + folio_get(folio); /* - * Fault handlers can race; it is possible that two CPUs will fault - * on the same secure page. One CPU can destroy the page, reboot, - * re-enter secure mode and import it, while the second CPU was - * stuck at the beginning of the handler. At some point the second - * CPU will be able to progress, and it will not be able to destroy - * the page. In that case we do not want to terminate the process, - * we instead try to export the page. + * Secure pages cannot be huge and userspace should not combine both. + * In case userspace does it anyway this will result in an -EFAULT for + * the unpack. The guest is thus never reaching secure mode. + * If userspace plays dirty tricks and decides to map huge pages at a + * later point in time, it will receive a segmentation fault or + * KVM_RUN will return -EFAULT. */ - if (rc) - rc = uv_convert_from_secure_folio(folio); -out_walk_end: + if (folio_test_hugetlb(folio)) + rc = -EFAULT; + else if (folio_test_large(folio)) + rc = -E2BIG; + else if (!pte_write(fw.pte) || (pte_val(fw.pte) & _PAGE_INVALID)) + rc = -ENXIO; + else + rc = make_folio_secure(mm, folio, uvcb); folio_walk_end(&fw, vma); -out: - mmap_read_unlock(gmap->mm); + mmap_read_unlock(mm); + + if (rc == -E2BIG || rc == -EBUSY) { + rc = s390_wiggle_split_folio(mm, folio); + if (!rc) + rc = -EAGAIN; + } + folio_put(folio); + return rc; } -EXPORT_SYMBOL_GPL(gmap_destroy_page); +EXPORT_SYMBOL_GPL(make_hva_secure); /* * To be called with the folio locked or with an extra reference! This will - * prevent gmap_make_secure from touching the folio concurrently. Having 2 - * parallel arch_make_folio_accessible is fine, as the UV calls will become a - * no-op if the folio is already exported. + * prevent kvm_s390_pv_make_secure() from touching the folio concurrently. + * Having 2 parallel arch_make_folio_accessible is fine, as the UV calls will + * become a no-op if the folio is already exported. */ int arch_make_folio_accessible(struct folio *folio) { int rc = 0; - /* See gmap_make_secure(): large folios cannot be secure */ + /* Large folios cannot be secure */ if (unlikely(folio_test_large(folio))) return 0; @@ -894,7 +842,12 @@ out_kobj: device_initcall(uv_sysfs_init); /* - * Find the secret with the secret_id in the provided list. + * Locate a secret in the list by its id. + * @secret_id: search pattern. + * @list: ephemeral buffer space + * @secret: output data, containing the secret's metadata. + * + * Search for a secret with the given secret_id in the Ultravisor secret store. * * Context: might sleep. */ @@ -915,12 +868,15 @@ static int find_secret_in_page(const u8 secret_id[UV_SECRET_ID_LEN], /* * Do the actual search for `uv_get_secret_metadata`. + * @secret_id: search pattern. + * @list: ephemeral buffer space + * @secret: output data, containing the secret's metadata. * * Context: might sleep. */ -static int find_secret(const u8 secret_id[UV_SECRET_ID_LEN], - struct uv_secret_list *list, - struct uv_secret_list_item_hdr *secret) +int uv_find_secret(const u8 secret_id[UV_SECRET_ID_LEN], + struct uv_secret_list *list, + struct uv_secret_list_item_hdr *secret) { u16 start_idx = 0; u16 list_rc; @@ -942,36 +898,7 @@ static int find_secret(const u8 secret_id[UV_SECRET_ID_LEN], return -ENOENT; } - -/** - * uv_get_secret_metadata() - get secret metadata for a given secret id. - * @secret_id: search pattern. - * @secret: output data, containing the secret's metadata. - * - * Search for a secret with the given secret_id in the Ultravisor secret store. - * - * Context: might sleep. - * - * Return: - * * %0: - Found entry; secret->idx and secret->type are valid. - * * %ENOENT - No entry found. - * * %ENODEV: - Not supported: UV not available or command not available. - * * %EIO: - Other unexpected UV error. - */ -int uv_get_secret_metadata(const u8 secret_id[UV_SECRET_ID_LEN], - struct uv_secret_list_item_hdr *secret) -{ - struct uv_secret_list *buf; - int rc; - - buf = kzalloc(sizeof(*buf), GFP_KERNEL); - if (!buf) - return -ENOMEM; - rc = find_secret(secret_id, buf, secret); - kfree(buf); - return rc; -} -EXPORT_SYMBOL_GPL(uv_get_secret_metadata); +EXPORT_SYMBOL_GPL(uv_find_secret); /** * uv_retrieve_secret() - get the secret value for the secret index. diff --git a/arch/s390/kernel/vdso.c b/arch/s390/kernel/vdso.c index 598b512cde01..430feb1a5013 100644 --- a/arch/s390/kernel/vdso.c +++ b/arch/s390/kernel/vdso.c @@ -16,8 +16,8 @@ #include <linux/mm.h> #include <linux/slab.h> #include <linux/smp.h> -#include <linux/time_namespace.h> #include <linux/random.h> +#include <linux/vdso_datastore.h> #include <vdso/datapage.h> #include <asm/vdso/vsyscall.h> #include <asm/alternative.h> @@ -26,85 +26,6 @@ extern char vdso64_start[], vdso64_end[]; extern char vdso32_start[], vdso32_end[]; -static struct vm_special_mapping vvar_mapping; - -static union vdso_data_store vdso_data_store __page_aligned_data; - -struct vdso_data *vdso_data = vdso_data_store.data; - -#ifdef CONFIG_TIME_NS -struct vdso_data *arch_get_vdso_data(void *vvar_page) -{ - return (struct vdso_data *)(vvar_page); -} - -/* - * The VVAR page layout depends on whether a task belongs to the root or - * non-root time namespace. Whenever a task changes its namespace, the VVAR - * page tables are cleared and then they will be re-faulted with a - * corresponding layout. - * See also the comment near timens_setup_vdso_data() for details. - */ -int vdso_join_timens(struct task_struct *task, struct time_namespace *ns) -{ - struct mm_struct *mm = task->mm; - VMA_ITERATOR(vmi, mm, 0); - struct vm_area_struct *vma; - - mmap_read_lock(mm); - for_each_vma(vmi, vma) { - if (!vma_is_special_mapping(vma, &vvar_mapping)) - continue; - zap_vma_pages(vma); - break; - } - mmap_read_unlock(mm); - return 0; -} -#endif - -static vm_fault_t vvar_fault(const struct vm_special_mapping *sm, - struct vm_area_struct *vma, struct vm_fault *vmf) -{ - struct page *timens_page = find_timens_vvar_page(vma); - unsigned long addr, pfn; - vm_fault_t err; - - switch (vmf->pgoff) { - case VVAR_DATA_PAGE_OFFSET: - pfn = virt_to_pfn(vdso_data); - if (timens_page) { - /* - * Fault in VVAR page too, since it will be accessed - * to get clock data anyway. - */ - addr = vmf->address + VVAR_TIMENS_PAGE_OFFSET * PAGE_SIZE; - err = vmf_insert_pfn(vma, addr, pfn); - if (unlikely(err & VM_FAULT_ERROR)) - return err; - pfn = page_to_pfn(timens_page); - } - break; -#ifdef CONFIG_TIME_NS - case VVAR_TIMENS_PAGE_OFFSET: - /* - * If a task belongs to a time namespace then a namespace - * specific VVAR is mapped with the VVAR_DATA_PAGE_OFFSET and - * the real VVAR page is mapped with the VVAR_TIMENS_PAGE_OFFSET - * offset. - * See also the comment near timens_setup_vdso_data(). - */ - if (!timens_page) - return VM_FAULT_SIGBUS; - pfn = virt_to_pfn(vdso_data); - break; -#endif /* CONFIG_TIME_NS */ - default: - return VM_FAULT_SIGBUS; - } - return vmf_insert_pfn(vma, vmf->address, pfn); -} - static int vdso_mremap(const struct vm_special_mapping *sm, struct vm_area_struct *vma) { @@ -112,11 +33,6 @@ static int vdso_mremap(const struct vm_special_mapping *sm, return 0; } -static struct vm_special_mapping vvar_mapping = { - .name = "[vvar]", - .fault = vvar_fault, -}; - static struct vm_special_mapping vdso64_mapping = { .name = "[vdso]", .mremap = vdso_mremap, @@ -142,7 +58,7 @@ static int map_vdso(unsigned long addr, unsigned long vdso_mapping_len) struct vm_area_struct *vma; int rc; - BUILD_BUG_ON(VVAR_NR_PAGES != __VVAR_PAGES); + BUILD_BUG_ON(VDSO_NR_PAGES != __VDSO_PAGES); if (mmap_write_lock_killable(mm)) return -EINTR; @@ -157,17 +73,14 @@ static int map_vdso(unsigned long addr, unsigned long vdso_mapping_len) rc = vvar_start; if (IS_ERR_VALUE(vvar_start)) goto out; - vma = _install_special_mapping(mm, vvar_start, VVAR_NR_PAGES*PAGE_SIZE, - VM_READ|VM_MAYREAD|VM_IO|VM_DONTDUMP| - VM_PFNMAP, - &vvar_mapping); + vma = vdso_install_vvar_mapping(mm, vvar_start); rc = PTR_ERR(vma); if (IS_ERR(vma)) goto out; - vdso_text_start = vvar_start + VVAR_NR_PAGES * PAGE_SIZE; + vdso_text_start = vvar_start + VDSO_NR_PAGES * PAGE_SIZE; /* VM_MAYWRITE for COW so gdb can set breakpoints */ vma = _install_special_mapping(mm, vdso_text_start, vdso_text_len, - VM_READ|VM_EXEC| + VM_READ|VM_EXEC|VM_SEALED_SYSMAP| VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC, vdso_mapping); if (IS_ERR(vma)) { @@ -220,7 +133,7 @@ unsigned long vdso_text_size(void) unsigned long vdso_size(void) { - return vdso_text_size() + VVAR_NR_PAGES * PAGE_SIZE; + return vdso_text_size() + VDSO_NR_PAGES * PAGE_SIZE; } int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) diff --git a/arch/s390/kernel/vdso32/Makefile b/arch/s390/kernel/vdso32/Makefile index 2c5afb88d298..1e4ddd1a683f 100644 --- a/arch/s390/kernel/vdso32/Makefile +++ b/arch/s390/kernel/vdso32/Makefile @@ -2,7 +2,7 @@ # List of files in the vdso # Include the generic Makefile to check the built vdso. -include $(srctree)/lib/vdso/Makefile +include $(srctree)/lib/vdso/Makefile.include obj-vdso32 = vdso_user_wrapper-32.o note-32.o # Build rules diff --git a/arch/s390/kernel/vdso32/vdso32.lds.S b/arch/s390/kernel/vdso32/vdso32.lds.S index c916c4f73f76..9630d58c2080 100644 --- a/arch/s390/kernel/vdso32/vdso32.lds.S +++ b/arch/s390/kernel/vdso32/vdso32.lds.S @@ -6,16 +6,15 @@ #include <asm/page.h> #include <asm/vdso.h> +#include <vdso/datapage.h> OUTPUT_FORMAT("elf32-s390", "elf32-s390", "elf32-s390") OUTPUT_ARCH(s390:31-bit) SECTIONS { - PROVIDE(_vdso_data = . - __VVAR_PAGES * PAGE_SIZE); -#ifdef CONFIG_TIME_NS - PROVIDE(_timens_data = _vdso_data + PAGE_SIZE); -#endif + VDSO_VVAR_SYMS + . = SIZEOF_HEADERS; .hash : { *(.hash) } :text diff --git a/arch/s390/kernel/vdso64/Makefile b/arch/s390/kernel/vdso64/Makefile index 37bb4b761229..d8f0df742809 100644 --- a/arch/s390/kernel/vdso64/Makefile +++ b/arch/s390/kernel/vdso64/Makefile @@ -2,10 +2,10 @@ # List of files in the vdso # Include the generic Makefile to check the built vdso. -include $(srctree)/lib/vdso/Makefile +include $(srctree)/lib/vdso/Makefile.include obj-vdso64 = vdso_user_wrapper.o note.o vgetrandom-chacha.o obj-cvdso64 = vdso64_generic.o getcpu.o vgetrandom.o -VDSO_CFLAGS_REMOVE := -pg $(CC_FLAGS_FTRACE) $(CC_FLAGS_EXPOLINE) $(CC_FLAGS_CHECK_STACK) +VDSO_CFLAGS_REMOVE := -pg $(CC_FLAGS_FTRACE) $(CC_FLAGS_EXPOLINE) CFLAGS_REMOVE_getcpu.o = $(VDSO_CFLAGS_REMOVE) CFLAGS_REMOVE_vgetrandom.o = $(VDSO_CFLAGS_REMOVE) CFLAGS_REMOVE_vdso64_generic.o = $(VDSO_CFLAGS_REMOVE) diff --git a/arch/s390/kernel/vdso64/vdso64.lds.S b/arch/s390/kernel/vdso64/vdso64.lds.S index ec42b7d9cb53..e4f6551ae898 100644 --- a/arch/s390/kernel/vdso64/vdso64.lds.S +++ b/arch/s390/kernel/vdso64/vdso64.lds.S @@ -7,17 +7,15 @@ #include <asm/vdso/vsyscall.h> #include <asm/page.h> #include <asm/vdso.h> +#include <vdso/datapage.h> OUTPUT_FORMAT("elf64-s390", "elf64-s390", "elf64-s390") OUTPUT_ARCH(s390:64-bit) SECTIONS { - PROVIDE(_vdso_data = . - __VVAR_PAGES * PAGE_SIZE); - PROVIDE(_vdso_rng_data = _vdso_data + __VDSO_RND_DATA_OFFSET); -#ifdef CONFIG_TIME_NS - PROVIDE(_timens_data = _vdso_data + PAGE_SIZE); -#endif + VDSO_VVAR_SYMS + . = SIZEOF_HEADERS; .hash : { *(.hash) } :text diff --git a/arch/s390/kernel/vmcore_info.c b/arch/s390/kernel/vmcore_info.c index 23f7d7619a99..cc8933e04ff7 100644 --- a/arch/s390/kernel/vmcore_info.c +++ b/arch/s390/kernel/vmcore_info.c @@ -1,8 +1,9 @@ // SPDX-License-Identifier: GPL-2.0-only #include <linux/vmcore_info.h> -#include <asm/abs_lowcore.h> #include <linux/mm.h> +#include <asm/abs_lowcore.h> +#include <asm/sections.h> #include <asm/setup.h> void arch_crash_save_vmcoreinfo(void) diff --git a/arch/s390/kernel/vmlinux.lds.S b/arch/s390/kernel/vmlinux.lds.S index 377b9aaf8c92..1c606dfa595d 100644 --- a/arch/s390/kernel/vmlinux.lds.S +++ b/arch/s390/kernel/vmlinux.lds.S @@ -52,7 +52,6 @@ SECTIONS SOFTIRQENTRY_TEXT FTRACE_HOTPATCH_TRAMPOLINES_TEXT *(.text.*_indirect_*) - *(.fixup) *(.gnu.warning) . = ALIGN(PAGE_SIZE); _etext = .; /* End of text section */ @@ -72,6 +71,13 @@ SECTIONS . = ALIGN(PAGE_SIZE); __end_ro_after_init = .; + . = ALIGN(8); + .skey_region_table : { + __skey_region_start = .; + KEEP(*(.skey_region)) + __skey_region_end = .; + } + .data.rel.ro : { *(.data.rel.ro .data.rel.ro.*) } diff --git a/arch/s390/kvm/Makefile b/arch/s390/kvm/Makefile index 02217fb4ae10..9a723c48b05a 100644 --- a/arch/s390/kvm/Makefile +++ b/arch/s390/kvm/Makefile @@ -8,7 +8,7 @@ include $(srctree)/virt/kvm/Makefile.kvm ccflags-y := -Ivirt/kvm -Iarch/s390/kvm kvm-y += kvm-s390.o intercept.o interrupt.o priv.o sigp.o -kvm-y += diag.o gaccess.o guestdbg.o vsie.o pv.o +kvm-y += diag.o gaccess.o guestdbg.o vsie.o pv.o gmap-vsie.o kvm-$(CONFIG_VFIO_PCI_ZDEV_KVM) += pci.o obj-$(CONFIG_KVM) += kvm.o diff --git a/arch/s390/kvm/diag.c b/arch/s390/kvm/diag.c index 74f73141f9b9..53233dec8cad 100644 --- a/arch/s390/kvm/diag.c +++ b/arch/s390/kvm/diag.c @@ -11,12 +11,30 @@ #include <linux/kvm.h> #include <linux/kvm_host.h> #include <asm/gmap.h> +#include <asm/gmap_helpers.h> #include <asm/virtio-ccw.h> #include "kvm-s390.h" #include "trace.h" #include "trace-s390.h" #include "gaccess.h" +static void do_discard_gfn_range(struct kvm_vcpu *vcpu, gfn_t gfn_start, gfn_t gfn_end) +{ + struct kvm_memslot_iter iter; + struct kvm_memory_slot *slot; + struct kvm_memslots *slots; + unsigned long start, end; + + slots = kvm_vcpu_memslots(vcpu); + + kvm_for_each_memslot_in_gfn_range(&iter, slots, gfn_start, gfn_end) { + slot = iter.slot; + start = __gfn_to_hva_memslot(slot, max(gfn_start, slot->base_gfn)); + end = __gfn_to_hva_memslot(slot, min(gfn_end, slot->base_gfn + slot->npages)); + gmap_helper_discard(vcpu->kvm->mm, start, end); + } +} + static int diag_release_pages(struct kvm_vcpu *vcpu) { unsigned long start, end; @@ -32,12 +50,13 @@ static int diag_release_pages(struct kvm_vcpu *vcpu) VCPU_EVENT(vcpu, 5, "diag release pages %lX %lX", start, end); + mmap_read_lock(vcpu->kvm->mm); /* * We checked for start >= end above, so lets check for the * fast path (no prefix swap page involved) */ if (end <= prefix || start >= prefix + 2 * PAGE_SIZE) { - gmap_discard(vcpu->arch.gmap, start, end); + do_discard_gfn_range(vcpu, gpa_to_gfn(start), gpa_to_gfn(end)); } else { /* * This is slow path. gmap_discard will check for start @@ -45,13 +64,14 @@ static int diag_release_pages(struct kvm_vcpu *vcpu) * prefix and let gmap_discard make some of these calls * NOPs. */ - gmap_discard(vcpu->arch.gmap, start, prefix); + do_discard_gfn_range(vcpu, gpa_to_gfn(start), gpa_to_gfn(prefix)); if (start <= prefix) - gmap_discard(vcpu->arch.gmap, 0, PAGE_SIZE); + do_discard_gfn_range(vcpu, 0, 1); if (end > prefix + PAGE_SIZE) - gmap_discard(vcpu->arch.gmap, PAGE_SIZE, 2 * PAGE_SIZE); - gmap_discard(vcpu->arch.gmap, prefix + 2 * PAGE_SIZE, end); + do_discard_gfn_range(vcpu, 1, 2); + do_discard_gfn_range(vcpu, gpa_to_gfn(prefix) + 2, gpa_to_gfn(end)); } + mmap_read_unlock(vcpu->kvm->mm); return 0; } diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c index 9816b0060fbe..21c2e61fece4 100644 --- a/arch/s390/kvm/gaccess.c +++ b/arch/s390/kvm/gaccess.c @@ -18,6 +18,8 @@ #include "kvm-s390.h" #include "gaccess.h" +#define GMAP_SHADOW_FAKE_TABLE 1ULL + /* * vaddress union in order to easily decode a virtual address into its * region first index, region second index etc. parts. @@ -317,7 +319,7 @@ enum prot_type { PROT_TYPE_DAT = 3, PROT_TYPE_IEP = 4, /* Dummy value for passing an initialized value when code != PGM_PROTECTION */ - PROT_NONE, + PROT_TYPE_DUMMY, }; static int trans_exc_ending(struct kvm_vcpu *vcpu, int code, unsigned long gva, u8 ar, @@ -333,7 +335,7 @@ static int trans_exc_ending(struct kvm_vcpu *vcpu, int code, unsigned long gva, switch (code) { case PGM_PROTECTION: switch (prot) { - case PROT_NONE: + case PROT_TYPE_DUMMY: /* We should never get here, acts like termination */ WARN_ON_ONCE(1); break; @@ -803,7 +805,7 @@ static int guest_range_to_gpas(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar, gpa = kvm_s390_real_to_abs(vcpu, ga); if (!kvm_is_gpa_in_memslot(vcpu->kvm, gpa)) { rc = PGM_ADDRESSING; - prot = PROT_NONE; + prot = PROT_TYPE_DUMMY; } } if (rc) @@ -961,7 +963,7 @@ int access_guest_with_key(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar, if (rc == PGM_PROTECTION) prot = PROT_TYPE_KEYC; else - prot = PROT_NONE; + prot = PROT_TYPE_DUMMY; rc = trans_exc_ending(vcpu, rc, ga, ar, mode, prot, terminate); } out_unlock: @@ -1393,6 +1395,44 @@ shadow_pgt: } /** + * shadow_pgt_lookup() - find a shadow page table + * @sg: pointer to the shadow guest address space structure + * @saddr: the address in the shadow aguest address space + * @pgt: parent gmap address of the page table to get shadowed + * @dat_protection: if the pgtable is marked as protected by dat + * @fake: pgt references contiguous guest memory block, not a pgtable + * + * Returns 0 if the shadow page table was found and -EAGAIN if the page + * table was not found. + * + * Called with sg->mm->mmap_lock in read. + */ +static int shadow_pgt_lookup(struct gmap *sg, unsigned long saddr, unsigned long *pgt, + int *dat_protection, int *fake) +{ + unsigned long pt_index; + unsigned long *table; + struct page *page; + int rc; + + spin_lock(&sg->guest_table_lock); + table = gmap_table_walk(sg, saddr, 1); /* get segment pointer */ + if (table && !(*table & _SEGMENT_ENTRY_INVALID)) { + /* Shadow page tables are full pages (pte+pgste) */ + page = pfn_to_page(*table >> PAGE_SHIFT); + pt_index = gmap_pgste_get_pgt_addr(page_to_virt(page)); + *pgt = pt_index & ~GMAP_SHADOW_FAKE_TABLE; + *dat_protection = !!(*table & _SEGMENT_ENTRY_PROTECT); + *fake = !!(pt_index & GMAP_SHADOW_FAKE_TABLE); + rc = 0; + } else { + rc = -EAGAIN; + } + spin_unlock(&sg->guest_table_lock); + return rc; +} + +/** * kvm_s390_shadow_fault - handle fault on a shadow page table * @vcpu: virtual cpu * @sg: pointer to the shadow guest address space structure @@ -1415,6 +1455,9 @@ int kvm_s390_shadow_fault(struct kvm_vcpu *vcpu, struct gmap *sg, int dat_protection, fake; int rc; + if (KVM_BUG_ON(!gmap_is_shadow(sg), vcpu->kvm)) + return -EFAULT; + mmap_read_lock(sg->mm); /* * We don't want any guest-2 tables to change - so the parent @@ -1423,7 +1466,7 @@ int kvm_s390_shadow_fault(struct kvm_vcpu *vcpu, struct gmap *sg, */ ipte_lock(vcpu->kvm); - rc = gmap_shadow_pgt_lookup(sg, saddr, &pgt, &dat_protection, &fake); + rc = shadow_pgt_lookup(sg, saddr, &pgt, &dat_protection, &fake); if (rc) rc = kvm_s390_shadow_tables(sg, saddr, &pgt, &dat_protection, &fake); diff --git a/arch/s390/kvm/gmap-vsie.c b/arch/s390/kvm/gmap-vsie.c new file mode 100644 index 000000000000..56ef153eb8fe --- /dev/null +++ b/arch/s390/kvm/gmap-vsie.c @@ -0,0 +1,141 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Guest memory management for KVM/s390 nested VMs. + * + * Copyright IBM Corp. 2008, 2020, 2024 + * + * Author(s): Claudio Imbrenda <imbrenda@linux.ibm.com> + * Martin Schwidefsky <schwidefsky@de.ibm.com> + * David Hildenbrand <david@redhat.com> + * Janosch Frank <frankja@linux.vnet.ibm.com> + */ + +#include <linux/compiler.h> +#include <linux/kvm.h> +#include <linux/kvm_host.h> +#include <linux/pgtable.h> +#include <linux/pagemap.h> +#include <linux/mman.h> + +#include <asm/lowcore.h> +#include <asm/gmap.h> +#include <asm/uv.h> + +#include "kvm-s390.h" + +/** + * gmap_find_shadow - find a specific asce in the list of shadow tables + * @parent: pointer to the parent gmap + * @asce: ASCE for which the shadow table is created + * @edat_level: edat level to be used for the shadow translation + * + * Returns the pointer to a gmap if a shadow table with the given asce is + * already available, ERR_PTR(-EAGAIN) if another one is just being created, + * otherwise NULL + * + * Context: Called with parent->shadow_lock held + */ +static struct gmap *gmap_find_shadow(struct gmap *parent, unsigned long asce, int edat_level) +{ + struct gmap *sg; + + lockdep_assert_held(&parent->shadow_lock); + list_for_each_entry(sg, &parent->children, list) { + if (!gmap_shadow_valid(sg, asce, edat_level)) + continue; + if (!sg->initialized) + return ERR_PTR(-EAGAIN); + refcount_inc(&sg->ref_count); + return sg; + } + return NULL; +} + +/** + * gmap_shadow - create/find a shadow guest address space + * @parent: pointer to the parent gmap + * @asce: ASCE for which the shadow table is created + * @edat_level: edat level to be used for the shadow translation + * + * The pages of the top level page table referred by the asce parameter + * will be set to read-only and marked in the PGSTEs of the kvm process. + * The shadow table will be removed automatically on any change to the + * PTE mapping for the source table. + * + * Returns a guest address space structure, ERR_PTR(-ENOMEM) if out of memory, + * ERR_PTR(-EAGAIN) if the caller has to retry and ERR_PTR(-EFAULT) if the + * parent gmap table could not be protected. + */ +struct gmap *gmap_shadow(struct gmap *parent, unsigned long asce, int edat_level) +{ + struct gmap *sg, *new; + unsigned long limit; + int rc; + + if (KVM_BUG_ON(parent->mm->context.allow_gmap_hpage_1m, (struct kvm *)parent->private) || + KVM_BUG_ON(gmap_is_shadow(parent), (struct kvm *)parent->private)) + return ERR_PTR(-EFAULT); + spin_lock(&parent->shadow_lock); + sg = gmap_find_shadow(parent, asce, edat_level); + spin_unlock(&parent->shadow_lock); + if (sg) + return sg; + /* Create a new shadow gmap */ + limit = -1UL >> (33 - (((asce & _ASCE_TYPE_MASK) >> 2) * 11)); + if (asce & _ASCE_REAL_SPACE) + limit = -1UL; + new = gmap_alloc(limit); + if (!new) + return ERR_PTR(-ENOMEM); + new->mm = parent->mm; + new->parent = gmap_get(parent); + new->private = parent->private; + new->orig_asce = asce; + new->edat_level = edat_level; + new->initialized = false; + spin_lock(&parent->shadow_lock); + /* Recheck if another CPU created the same shadow */ + sg = gmap_find_shadow(parent, asce, edat_level); + if (sg) { + spin_unlock(&parent->shadow_lock); + gmap_free(new); + return sg; + } + if (asce & _ASCE_REAL_SPACE) { + /* only allow one real-space gmap shadow */ + list_for_each_entry(sg, &parent->children, list) { + if (sg->orig_asce & _ASCE_REAL_SPACE) { + spin_lock(&sg->guest_table_lock); + gmap_unshadow(sg); + spin_unlock(&sg->guest_table_lock); + list_del(&sg->list); + gmap_put(sg); + break; + } + } + } + refcount_set(&new->ref_count, 2); + list_add(&new->list, &parent->children); + if (asce & _ASCE_REAL_SPACE) { + /* nothing to protect, return right away */ + new->initialized = true; + spin_unlock(&parent->shadow_lock); + return new; + } + spin_unlock(&parent->shadow_lock); + /* protect after insertion, so it will get properly invalidated */ + mmap_read_lock(parent->mm); + rc = __kvm_s390_mprotect_many(parent, asce & _ASCE_ORIGIN, + ((asce & _ASCE_TABLE_LENGTH) + 1), + PROT_READ, GMAP_NOTIFY_SHADOW); + mmap_read_unlock(parent->mm); + spin_lock(&parent->shadow_lock); + new->initialized = true; + if (rc) { + list_del(&new->list); + gmap_free(new); + new = ERR_PTR(rc); + } + spin_unlock(&parent->shadow_lock); + return new; +} diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c index 5bbaadf75dc6..c7908950c1f4 100644 --- a/arch/s390/kvm/intercept.c +++ b/arch/s390/kvm/intercept.c @@ -94,7 +94,7 @@ static int handle_validity(struct kvm_vcpu *vcpu) vcpu->stat.exit_validity++; trace_kvm_s390_intercept_validity(vcpu, viwhy); - KVM_EVENT(3, "validity intercept 0x%x for pid %u (kvm 0x%pK)", viwhy, + KVM_EVENT(3, "validity intercept 0x%x for pid %u (kvm 0x%p)", viwhy, current->pid, vcpu->kvm); /* do not warn on invalid runtime instrumentation mode */ @@ -367,7 +367,7 @@ static int handle_mvpg_pei(struct kvm_vcpu *vcpu) reg2, &srcaddr, GACC_FETCH, 0); if (rc) return kvm_s390_inject_prog_cond(vcpu, rc); - rc = gmap_fault(vcpu->arch.gmap, srcaddr, 0); + rc = kvm_s390_handle_dat_fault(vcpu, srcaddr, 0); if (rc != 0) return rc; @@ -376,7 +376,7 @@ static int handle_mvpg_pei(struct kvm_vcpu *vcpu) reg1, &dstaddr, GACC_STORE, 0); if (rc) return kvm_s390_inject_prog_cond(vcpu, rc); - rc = gmap_fault(vcpu->arch.gmap, dstaddr, FAULT_FLAG_WRITE); + rc = kvm_s390_handle_dat_fault(vcpu, dstaddr, FOLL_WRITE); if (rc != 0) return rc; @@ -544,12 +544,12 @@ static int handle_pv_uvc(struct kvm_vcpu *vcpu) guest_uvcb->header.cmd); return 0; } - rc = gmap_make_secure(vcpu->arch.gmap, uvcb.gaddr, &uvcb); + rc = kvm_s390_pv_make_secure(vcpu->kvm, uvcb.gaddr, &uvcb); /* * If the unpin did not succeed, the guest will exit again for the UVC * and we will retry the unpin. */ - if (rc == -EINVAL) + if (rc == -EINVAL || rc == -ENXIO) return 0; /* * If we got -EAGAIN here, we simply return it. It will eventually @@ -652,10 +652,8 @@ int kvm_handle_sie_intercept(struct kvm_vcpu *vcpu) break; case ICPT_PV_PREF: rc = 0; - gmap_convert_to_secure(vcpu->arch.gmap, - kvm_s390_get_prefix(vcpu)); - gmap_convert_to_secure(vcpu->arch.gmap, - kvm_s390_get_prefix(vcpu) + PAGE_SIZE); + kvm_s390_pv_convert_to_secure(vcpu->kvm, kvm_s390_get_prefix(vcpu)); + kvm_s390_pv_convert_to_secure(vcpu->kvm, kvm_s390_get_prefix(vcpu) + PAGE_SIZE); break; default: return -EOPNOTSUPP; diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index ea8dce299954..2a92a8b9e4c2 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -10,9 +10,11 @@ #define KMSG_COMPONENT "kvm-s390" #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt +#include <linux/cpufeature.h> #include <linux/interrupt.h> #include <linux/kvm_host.h> #include <linux/hrtimer.h> +#include <linux/export.h> #include <linux/mmu_context.h> #include <linux/nospec.h> #include <linux/signal.h> @@ -577,7 +579,7 @@ static int __write_machine_check(struct kvm_vcpu *vcpu, /* take care of lazy register loading */ kvm_s390_fpu_store(vcpu->run); save_access_regs(vcpu->run->s.regs.acrs); - if (MACHINE_HAS_GS && vcpu->arch.gs_enabled) + if (cpu_has_gs() && vcpu->arch.gs_enabled) save_gs_cb(current->thread.gs_cb); /* Extended save area */ @@ -948,8 +950,7 @@ static int __must_check __deliver_prog(struct kvm_vcpu *vcpu) rc |= put_guest_lc(vcpu, ilen, (u16 *) __LC_PGM_ILC); rc |= put_guest_lc(vcpu, vcpu->arch.sie_block->gbea, (u64 *) __LC_PGM_LAST_BREAK); - rc |= put_guest_lc(vcpu, pgm_info.code, - (u16 *)__LC_PGM_INT_CODE); + rc |= put_guest_lc(vcpu, pgm_info.code, (u16 *)__LC_PGM_CODE); rc |= write_guest_lc(vcpu, __LC_PGM_OLD_PSW, &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); rc |= read_guest_lc(vcpu, __LC_PGM_NEW_PSW, @@ -2678,9 +2679,13 @@ static int flic_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr) kvm_s390_clear_float_irqs(dev->kvm); break; case KVM_DEV_FLIC_APF_ENABLE: + if (kvm_is_ucontrol(dev->kvm)) + return -EINVAL; dev->kvm->arch.gmap->pfault_enabled = 1; break; case KVM_DEV_FLIC_APF_DISABLE_WAIT: + if (kvm_is_ucontrol(dev->kvm)) + return -EINVAL; dev->kvm->arch.gmap->pfault_enabled = 0; /* * Make sure no async faults are in transition when @@ -2889,20 +2894,25 @@ int kvm_set_routing_entry(struct kvm *kvm, struct kvm_kernel_irq_routing_entry *e, const struct kvm_irq_routing_entry *ue) { - u64 uaddr; + u64 uaddr_s, uaddr_i; + int idx; switch (ue->type) { /* we store the userspace addresses instead of the guest addresses */ case KVM_IRQ_ROUTING_S390_ADAPTER: + if (kvm_is_ucontrol(kvm)) + return -EINVAL; e->set = set_adapter_int; - uaddr = gmap_translate(kvm->arch.gmap, ue->u.adapter.summary_addr); - if (uaddr == -EFAULT) - return -EFAULT; - e->adapter.summary_addr = uaddr; - uaddr = gmap_translate(kvm->arch.gmap, ue->u.adapter.ind_addr); - if (uaddr == -EFAULT) + + idx = srcu_read_lock(&kvm->srcu); + uaddr_s = gpa_to_hva(kvm, ue->u.adapter.summary_addr); + uaddr_i = gpa_to_hva(kvm, ue->u.adapter.ind_addr); + srcu_read_unlock(&kvm->srcu, idx); + + if (kvm_is_error_hva(uaddr_s) || kvm_is_error_hva(uaddr_i)) return -EFAULT; - e->adapter.ind_addr = uaddr; + e->adapter.summary_addr = uaddr_s; + e->adapter.ind_addr = uaddr_i; e->adapter.summary_offset = ue->u.adapter.summary_offset; e->adapter.ind_offset = ue->u.adapter.ind_offset; e->adapter.adapter_id = ue->u.adapter.adapter_id; @@ -3152,7 +3162,7 @@ void kvm_s390_gisa_clear(struct kvm *kvm) if (!gi->origin) return; gisa_clear_ipm(gi->origin); - VM_EVENT(kvm, 3, "gisa 0x%pK cleared", gi->origin); + VM_EVENT(kvm, 3, "gisa 0x%p cleared", gi->origin); } void kvm_s390_gisa_init(struct kvm *kvm) @@ -3165,11 +3175,10 @@ void kvm_s390_gisa_init(struct kvm *kvm) gi->alert.mask = 0; spin_lock_init(&gi->alert.ref_lock); gi->expires = 50 * 1000; /* 50 usec */ - hrtimer_init(&gi->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); - gi->timer.function = gisa_vcpu_kicker; + hrtimer_setup(&gi->timer, gisa_vcpu_kicker, CLOCK_MONOTONIC, HRTIMER_MODE_REL); memset(gi->origin, 0, sizeof(struct kvm_s390_gisa)); gi->origin->next_alert = (u32)virt_to_phys(gi->origin); - VM_EVENT(kvm, 3, "gisa 0x%pK initialized", gi->origin); + VM_EVENT(kvm, 3, "gisa 0x%p initialized", gi->origin); } void kvm_s390_gisa_enable(struct kvm *kvm) @@ -3210,7 +3219,7 @@ void kvm_s390_gisa_destroy(struct kvm *kvm) process_gib_alert_list(); hrtimer_cancel(&gi->timer); gi->origin = NULL; - VM_EVENT(kvm, 3, "gisa 0x%pK destroyed", gisa); + VM_EVENT(kvm, 3, "gisa 0x%p destroyed", gisa); } void kvm_s390_gisa_disable(struct kvm *kvm) @@ -3459,7 +3468,7 @@ int __init kvm_s390_gib_init(u8 nisc) } } - KVM_EVENT(3, "gib 0x%pK (nisc=%d) initialized", gib, gib->nisc); + KVM_EVENT(3, "gib 0x%p (nisc=%d) initialized", gib, gib->nisc); goto out; out_unreg_gal: diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index d8080c27d45b..bf6fa8b9ca73 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -14,6 +14,7 @@ #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt #include <linux/compiler.h> +#include <linux/export.h> #include <linux/err.h> #include <linux/fs.h> #include <linux/hrtimer.h> @@ -23,6 +24,7 @@ #include <linux/mman.h> #include <linux/module.h> #include <linux/moduleparam.h> +#include <linux/cpufeature.h> #include <linux/random.h> #include <linux/slab.h> #include <linux/timer.h> @@ -36,8 +38,10 @@ #include <asm/access-regs.h> #include <asm/asm-offsets.h> #include <asm/lowcore.h> +#include <asm/machine.h> #include <asm/stp.h> #include <asm/gmap.h> +#include <asm/gmap_helpers.h> #include <asm/nmi.h> #include <asm/isc.h> #include <asm/sclp.h> @@ -442,13 +446,13 @@ static void __init kvm_s390_cpu_feat_init(void) if (test_facility(201)) /* PFCR */ pfcr_query(&kvm_s390_available_subfunc.pfcr); - if (MACHINE_HAS_ESOP) + if (machine_has_esop()) allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP); /* * We need SIE support, ESOP (PROT_READ protection for gmap_shadow), * 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing). */ - if (!sclp.has_sief2 || !MACHINE_HAS_ESOP || !sclp.has_64bscao || + if (!sclp.has_sief2 || !machine_has_esop() || !sclp.has_64bscao || !test_facility(3) || !nested) return; allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2); @@ -637,7 +641,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) r = min_t(unsigned int, num_online_cpus(), r); break; case KVM_CAP_S390_COW: - r = MACHINE_HAS_ESOP; + r = machine_has_esop(); break; case KVM_CAP_S390_VECTOR_REGISTERS: r = test_facility(129); @@ -1019,7 +1023,7 @@ static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *att } mutex_unlock(&kvm->lock); VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit); - VM_EVENT(kvm, 3, "New guest asce: 0x%pK", + VM_EVENT(kvm, 3, "New guest asce: 0x%p", (void *) kvm->arch.gmap->asce); break; } @@ -2671,7 +2675,9 @@ static int kvm_s390_handle_pv(struct kvm *kvm, struct kvm_pv_cmd *cmd) if (r) break; - r = s390_disable_cow_sharing(); + mmap_write_lock(kvm->mm); + r = gmap_helper_disable_cow_sharing(); + mmap_write_unlock(kvm->mm); if (r) break; @@ -3395,7 +3401,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) /* we emulate STHYI in kvm */ set_kvm_facility(kvm->arch.model.fac_mask, 74); set_kvm_facility(kvm->arch.model.fac_list, 74); - if (MACHINE_HAS_TLB_GUEST) { + if (machine_has_tlb_guest()) { set_kvm_facility(kvm->arch.model.fac_mask, 147); set_kvm_facility(kvm->arch.model.fac_list, 147); } @@ -3428,8 +3434,20 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) VM_EVENT(kvm, 3, "vm created with type %lu", type); if (type & KVM_VM_S390_UCONTROL) { + struct kvm_userspace_memory_region2 fake_memslot = { + .slot = KVM_S390_UCONTROL_MEMSLOT, + .guest_phys_addr = 0, + .userspace_addr = 0, + .memory_size = ALIGN_DOWN(TASK_SIZE, _SEGMENT_SIZE), + .flags = 0, + }; + kvm->arch.gmap = NULL; kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT; + /* one flat fake memslot covering the whole address-space */ + mutex_lock(&kvm->slots_lock); + KVM_BUG_ON(kvm_set_internal_memslot(kvm, &fake_memslot), kvm); + mutex_unlock(&kvm->slots_lock); } else { if (sclp.hamax == U64_MAX) kvm->arch.mem_limit = TASK_SIZE_MAX; @@ -3451,7 +3469,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) kvm_s390_gisa_init(kvm); INIT_LIST_HEAD(&kvm->arch.pv.need_cleanup); kvm->arch.pv.set_aside = NULL; - KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid); + KVM_EVENT(3, "vm 0x%p created by pid %u", kvm, current->pid); return 0; out_err: @@ -3514,7 +3532,7 @@ void kvm_arch_destroy_vm(struct kvm *kvm) kvm_s390_destroy_adapters(kvm); kvm_s390_clear_float_irqs(kvm); kvm_s390_vsie_destroy(kvm); - KVM_EVENT(3, "vm 0x%pK destroyed", kvm); + KVM_EVENT(3, "vm 0x%p destroyed", kvm); } /* Section: vcpu related */ @@ -3635,7 +3653,7 @@ static int sca_switch_to_extended(struct kvm *kvm) free_page((unsigned long)old_sca); - VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)", + VM_EVENT(kvm, 2, "Switched to ESCA (0x%p -> 0x%p)", old_sca, kvm->arch.sca); return 0; } @@ -3879,8 +3897,8 @@ static int kvm_s390_vcpu_setup(struct kvm_vcpu *vcpu) kvm_s390_vcpu_setup_model(vcpu); - /* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */ - if (MACHINE_HAS_ESOP) + /* pgste_set_pte has special handling for !machine_has_esop() */ + if (machine_has_esop()) vcpu->arch.sie_block->ecb |= ECB_HOSTPROTINT; if (test_kvm_facility(vcpu->kvm, 9)) vcpu->arch.sie_block->ecb |= ECB_SRSI; @@ -3930,8 +3948,8 @@ static int kvm_s390_vcpu_setup(struct kvm_vcpu *vcpu) if (rc) return rc; } - hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); - vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup; + hrtimer_setup(&vcpu->arch.ckc_timer, kvm_s390_idle_wakeup, CLOCK_MONOTONIC, + HRTIMER_MODE_REL); vcpu->arch.sie_block->hpid = HPID_KVM; @@ -4012,7 +4030,7 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu) goto out_free_sie_block; } - VM_EVENT(vcpu->kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK", + VM_EVENT(vcpu->kvm, 3, "create cpu %d at 0x%p, sie block at 0x%p", vcpu->vcpu_id, vcpu, vcpu->arch.sie_block); trace_kvm_s390_create_vcpu(vcpu->vcpu_id, vcpu, vcpu->arch.sie_block); @@ -4498,6 +4516,75 @@ static bool ibs_enabled(struct kvm_vcpu *vcpu) return kvm_s390_test_cpuflags(vcpu, CPUSTAT_IBS); } +static int __kvm_s390_fixup_fault_sync(struct gmap *gmap, gpa_t gaddr, unsigned int flags) +{ + struct kvm *kvm = gmap->private; + gfn_t gfn = gpa_to_gfn(gaddr); + bool unlocked; + hva_t vmaddr; + gpa_t tmp; + int rc; + + if (kvm_is_ucontrol(kvm)) { + tmp = __gmap_translate(gmap, gaddr); + gfn = gpa_to_gfn(tmp); + } + + vmaddr = gfn_to_hva(kvm, gfn); + rc = fixup_user_fault(gmap->mm, vmaddr, FAULT_FLAG_WRITE, &unlocked); + if (!rc) + rc = __gmap_link(gmap, gaddr, vmaddr); + return rc; +} + +/** + * __kvm_s390_mprotect_many() - Apply specified protection to guest pages + * @gmap: the gmap of the guest + * @gpa: the starting guest address + * @npages: how many pages to protect + * @prot: indicates access rights: PROT_NONE, PROT_READ or PROT_WRITE + * @bits: pgste notification bits to set + * + * Returns: 0 in case of success, < 0 in case of error - see gmap_protect_one() + * + * Context: kvm->srcu and gmap->mm need to be held in read mode + */ +int __kvm_s390_mprotect_many(struct gmap *gmap, gpa_t gpa, u8 npages, unsigned int prot, + unsigned long bits) +{ + unsigned int fault_flag = (prot & PROT_WRITE) ? FAULT_FLAG_WRITE : 0; + gpa_t end = gpa + npages * PAGE_SIZE; + int rc; + + for (; gpa < end; gpa = ALIGN(gpa + 1, rc)) { + rc = gmap_protect_one(gmap, gpa, prot, bits); + if (rc == -EAGAIN) { + __kvm_s390_fixup_fault_sync(gmap, gpa, fault_flag); + rc = gmap_protect_one(gmap, gpa, prot, bits); + } + if (rc < 0) + return rc; + } + + return 0; +} + +static int kvm_s390_mprotect_notify_prefix(struct kvm_vcpu *vcpu) +{ + gpa_t gaddr = kvm_s390_get_prefix(vcpu); + int idx, rc; + + idx = srcu_read_lock(&vcpu->kvm->srcu); + mmap_read_lock(vcpu->arch.gmap->mm); + + rc = __kvm_s390_mprotect_many(vcpu->arch.gmap, gaddr, 2, PROT_WRITE, GMAP_NOTIFY_MPROT); + + mmap_read_unlock(vcpu->arch.gmap->mm); + srcu_read_unlock(&vcpu->kvm->srcu, idx); + + return rc; +} + static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu) { retry: @@ -4513,9 +4600,8 @@ retry: */ if (kvm_check_request(KVM_REQ_REFRESH_GUEST_PREFIX, vcpu)) { int rc; - rc = gmap_mprotect_notify(vcpu->arch.gmap, - kvm_s390_get_prefix(vcpu), - PAGE_SIZE * 2, PROT_WRITE); + + rc = kvm_s390_mprotect_notify_prefix(vcpu); if (rc) { kvm_make_request(KVM_REQ_REFRESH_GUEST_PREFIX, vcpu); return rc; @@ -4766,11 +4852,112 @@ static int vcpu_post_run_addressing_exception(struct kvm_vcpu *vcpu) return kvm_s390_inject_prog_irq(vcpu, &pgm_info); } +static void kvm_s390_assert_primary_as(struct kvm_vcpu *vcpu) +{ + KVM_BUG(current->thread.gmap_teid.as != PSW_BITS_AS_PRIMARY, vcpu->kvm, + "Unexpected program interrupt 0x%x, TEID 0x%016lx", + current->thread.gmap_int_code, current->thread.gmap_teid.val); +} + +/* + * __kvm_s390_handle_dat_fault() - handle a dat fault for the gmap of a vcpu + * @vcpu: the vCPU whose gmap is to be fixed up + * @gfn: the guest frame number used for memslots (including fake memslots) + * @gaddr: the gmap address, does not have to match @gfn for ucontrol gmaps + * @flags: FOLL_* flags + * + * Return: 0 on success, < 0 in case of error. + * Context: The mm lock must not be held before calling. May sleep. + */ +int __kvm_s390_handle_dat_fault(struct kvm_vcpu *vcpu, gfn_t gfn, gpa_t gaddr, unsigned int flags) +{ + struct kvm_memory_slot *slot; + unsigned int fault_flags; + bool writable, unlocked; + unsigned long vmaddr; + struct page *page; + kvm_pfn_t pfn; + int rc; + + slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn); + if (!slot || slot->flags & KVM_MEMSLOT_INVALID) + return vcpu_post_run_addressing_exception(vcpu); + + fault_flags = flags & FOLL_WRITE ? FAULT_FLAG_WRITE : 0; + if (vcpu->arch.gmap->pfault_enabled) + flags |= FOLL_NOWAIT; + vmaddr = __gfn_to_hva_memslot(slot, gfn); + +try_again: + pfn = __kvm_faultin_pfn(slot, gfn, flags, &writable, &page); + + /* Access outside memory, inject addressing exception */ + if (is_noslot_pfn(pfn)) + return vcpu_post_run_addressing_exception(vcpu); + /* Signal pending: try again */ + if (pfn == KVM_PFN_ERR_SIGPENDING) + return -EAGAIN; + + /* Needs I/O, try to setup async pfault (only possible with FOLL_NOWAIT) */ + if (pfn == KVM_PFN_ERR_NEEDS_IO) { + trace_kvm_s390_major_guest_pfault(vcpu); + if (kvm_arch_setup_async_pf(vcpu)) + return 0; + vcpu->stat.pfault_sync++; + /* Could not setup async pfault, try again synchronously */ + flags &= ~FOLL_NOWAIT; + goto try_again; + } + /* Any other error */ + if (is_error_pfn(pfn)) + return -EFAULT; + + /* Success */ + mmap_read_lock(vcpu->arch.gmap->mm); + /* Mark the userspace PTEs as young and/or dirty, to avoid page fault loops */ + rc = fixup_user_fault(vcpu->arch.gmap->mm, vmaddr, fault_flags, &unlocked); + if (!rc) + rc = __gmap_link(vcpu->arch.gmap, gaddr, vmaddr); + scoped_guard(spinlock, &vcpu->kvm->mmu_lock) { + kvm_release_faultin_page(vcpu->kvm, page, false, writable); + } + mmap_read_unlock(vcpu->arch.gmap->mm); + return rc; +} + +static int vcpu_dat_fault_handler(struct kvm_vcpu *vcpu, unsigned long gaddr, unsigned int flags) +{ + unsigned long gaddr_tmp; + gfn_t gfn; + + gfn = gpa_to_gfn(gaddr); + if (kvm_is_ucontrol(vcpu->kvm)) { + /* + * This translates the per-vCPU guest address into a + * fake guest address, which can then be used with the + * fake memslots that are identity mapping userspace. + * This allows ucontrol VMs to use the normal fault + * resolution path, like normal VMs. + */ + mmap_read_lock(vcpu->arch.gmap->mm); + gaddr_tmp = __gmap_translate(vcpu->arch.gmap, gaddr); + mmap_read_unlock(vcpu->arch.gmap->mm); + if (gaddr_tmp == -EFAULT) { + vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL; + vcpu->run->s390_ucontrol.trans_exc_code = gaddr; + vcpu->run->s390_ucontrol.pgm_code = PGM_SEGMENT_TRANSLATION; + return -EREMOTE; + } + gfn = gpa_to_gfn(gaddr_tmp); + } + return __kvm_s390_handle_dat_fault(vcpu, gfn, gaddr, flags); +} + static int vcpu_post_run_handle_fault(struct kvm_vcpu *vcpu) { unsigned int flags = 0; unsigned long gaddr; - int rc = 0; + int rc; gaddr = current->thread.gmap_teid.addr * PAGE_SIZE; if (kvm_s390_cur_gmap_fault_is_write()) @@ -4780,30 +4967,16 @@ static int vcpu_post_run_handle_fault(struct kvm_vcpu *vcpu) case 0: vcpu->stat.exit_null++; break; - case PGM_NON_SECURE_STORAGE_ACCESS: - KVM_BUG(current->thread.gmap_teid.as != PSW_BITS_AS_PRIMARY, vcpu->kvm, - "Unexpected program interrupt 0x%x, TEID 0x%016lx", - current->thread.gmap_int_code, current->thread.gmap_teid.val); - /* - * This is normal operation; a page belonging to a protected - * guest has not been imported yet. Try to import the page into - * the protected guest. - */ - if (gmap_convert_to_secure(vcpu->arch.gmap, gaddr) == -EINVAL) - send_sig(SIGSEGV, current, 0); - break; case PGM_SECURE_STORAGE_ACCESS: case PGM_SECURE_STORAGE_VIOLATION: - KVM_BUG(current->thread.gmap_teid.as != PSW_BITS_AS_PRIMARY, vcpu->kvm, - "Unexpected program interrupt 0x%x, TEID 0x%016lx", - current->thread.gmap_int_code, current->thread.gmap_teid.val); + kvm_s390_assert_primary_as(vcpu); /* * This can happen after a reboot with asynchronous teardown; * the new guest (normal or protected) will run on top of the * previous protected guest. The old pages need to be destroyed * so the new guest can use them. */ - if (gmap_destroy_page(vcpu->arch.gmap, gaddr)) { + if (kvm_s390_pv_destroy_page(vcpu->kvm, gaddr)) { /* * Either KVM messed up the secure guest mapping or the * same page is mapped into multiple secure guests. @@ -4818,6 +4991,20 @@ static int vcpu_post_run_handle_fault(struct kvm_vcpu *vcpu) send_sig(SIGSEGV, current, 0); } break; + case PGM_NON_SECURE_STORAGE_ACCESS: + kvm_s390_assert_primary_as(vcpu); + /* + * This is normal operation; a page belonging to a protected + * guest has not been imported yet. Try to import the page into + * the protected guest. + */ + rc = kvm_s390_pv_convert_to_secure(vcpu->kvm, gaddr); + if (rc == -EINVAL) + send_sig(SIGSEGV, current, 0); + if (rc != -ENXIO) + break; + flags = FAULT_FLAG_WRITE; + fallthrough; case PGM_PROTECTION: case PGM_SEGMENT_TRANSLATION: case PGM_PAGE_TRANSLATION: @@ -4825,40 +5012,15 @@ static int vcpu_post_run_handle_fault(struct kvm_vcpu *vcpu) case PGM_REGION_FIRST_TRANS: case PGM_REGION_SECOND_TRANS: case PGM_REGION_THIRD_TRANS: - KVM_BUG(current->thread.gmap_teid.as != PSW_BITS_AS_PRIMARY, vcpu->kvm, - "Unexpected program interrupt 0x%x, TEID 0x%016lx", - current->thread.gmap_int_code, current->thread.gmap_teid.val); - if (vcpu->arch.gmap->pfault_enabled) { - rc = gmap_fault(vcpu->arch.gmap, gaddr, flags | FAULT_FLAG_RETRY_NOWAIT); - if (rc == -EFAULT) - return vcpu_post_run_addressing_exception(vcpu); - if (rc == -EAGAIN) { - trace_kvm_s390_major_guest_pfault(vcpu); - if (kvm_arch_setup_async_pf(vcpu)) - return 0; - vcpu->stat.pfault_sync++; - } else { - return rc; - } - } - rc = gmap_fault(vcpu->arch.gmap, gaddr, flags); - if (rc == -EFAULT) { - if (kvm_is_ucontrol(vcpu->kvm)) { - vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL; - vcpu->run->s390_ucontrol.trans_exc_code = gaddr; - vcpu->run->s390_ucontrol.pgm_code = 0x10; - return -EREMOTE; - } - return vcpu_post_run_addressing_exception(vcpu); - } - break; + kvm_s390_assert_primary_as(vcpu); + return vcpu_dat_fault_handler(vcpu, gaddr, flags); default: KVM_BUG(1, vcpu->kvm, "Unexpected program interrupt 0x%x, TEID 0x%016lx", current->thread.gmap_int_code, current->thread.gmap_teid.val); send_sig(SIGSEGV, current, 0); break; } - return rc; + return 0; } static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason) @@ -4901,6 +5063,30 @@ static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason) return vcpu_post_run_handle_fault(vcpu); } +int noinstr kvm_s390_enter_exit_sie(struct kvm_s390_sie_block *scb, + u64 *gprs, unsigned long gasce) +{ + int ret; + + guest_state_enter_irqoff(); + + /* + * The guest_state_{enter,exit}_irqoff() functions inform lockdep and + * tracing that entry to the guest will enable host IRQs, and exit from + * the guest will disable host IRQs. + * + * We must not use lockdep/tracing/RCU in this critical section, so we + * use the low-level arch_local_irq_*() helpers to enable/disable IRQs. + */ + arch_local_irq_enable(); + ret = sie64a(scb, gprs, gasce); + arch_local_irq_disable(); + + guest_state_exit_irqoff(); + + return ret; +} + #define PSW_INT_MASK (PSW_MASK_EXT | PSW_MASK_IO | PSW_MASK_MCHECK) static int __vcpu_run(struct kvm_vcpu *vcpu) { @@ -4921,20 +5107,27 @@ static int __vcpu_run(struct kvm_vcpu *vcpu) kvm_vcpu_srcu_read_unlock(vcpu); /* * As PF_VCPU will be used in fault handler, between - * guest_enter and guest_exit should be no uaccess. + * guest_timing_enter_irqoff and guest_timing_exit_irqoff + * should be no uaccess. */ - local_irq_disable(); - guest_enter_irqoff(); - __disable_cpu_timer_accounting(vcpu); - local_irq_enable(); if (kvm_s390_pv_cpu_is_protected(vcpu)) { memcpy(sie_page->pv_grregs, vcpu->run->s.regs.gprs, sizeof(sie_page->pv_grregs)); } - exit_reason = sie64a(vcpu->arch.sie_block, - vcpu->run->s.regs.gprs, - vcpu->arch.gmap->asce); + + local_irq_disable(); + guest_timing_enter_irqoff(); + __disable_cpu_timer_accounting(vcpu); + + exit_reason = kvm_s390_enter_exit_sie(vcpu->arch.sie_block, + vcpu->run->s.regs.gprs, + vcpu->arch.gmap->asce); + + __enable_cpu_timer_accounting(vcpu); + guest_timing_exit_irqoff(); + local_irq_enable(); + if (kvm_s390_pv_cpu_is_protected(vcpu)) { memcpy(vcpu->run->s.regs.gprs, sie_page->pv_grregs, @@ -4950,10 +5143,6 @@ static int __vcpu_run(struct kvm_vcpu *vcpu) vcpu->arch.sie_block->gpsw.mask &= ~PSW_INT_MASK; } } - local_irq_disable(); - __enable_cpu_timer_accounting(vcpu); - guest_exit_irqoff(); - local_irq_enable(); kvm_vcpu_srcu_read_lock(vcpu); rc = vcpu_post_run(vcpu, exit_reason); @@ -5019,7 +5208,7 @@ static void sync_regs_fmt2(struct kvm_vcpu *vcpu) vcpu->arch.sie_block->fpf &= ~FPF_BPBC; vcpu->arch.sie_block->fpf |= kvm_run->s.regs.bpbc ? FPF_BPBC : 0; } - if (MACHINE_HAS_GS) { + if (cpu_has_gs()) { preempt_disable(); local_ctl_set_bit(2, CR2_GUARDED_STORAGE_BIT); if (current->thread.gs_cb) { @@ -5085,7 +5274,7 @@ static void store_regs_fmt2(struct kvm_vcpu *vcpu) kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea; kvm_run->s.regs.bpbc = (vcpu->arch.sie_block->fpf & FPF_BPBC) == FPF_BPBC; kvm_run->s.regs.diag318 = vcpu->arch.diag318_info.val; - if (MACHINE_HAS_GS) { + if (cpu_has_gs()) { preempt_disable(); local_ctl_set_bit(2, CR2_GUARDED_STORAGE_BIT); if (vcpu->arch.gs_enabled) @@ -5737,7 +5926,9 @@ long kvm_arch_vcpu_ioctl(struct file *filp, } #endif case KVM_S390_VCPU_FAULT: { - r = gmap_fault(vcpu->arch.gmap, arg, 0); + idx = srcu_read_lock(&vcpu->kvm->srcu); + r = vcpu_dat_fault_handler(vcpu, arg, 0); + srcu_read_unlock(&vcpu->kvm->srcu, idx); break; } case KVM_ENABLE_CAP: @@ -5853,7 +6044,7 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, { gpa_t size; - if (kvm_is_ucontrol(kvm)) + if (kvm_is_ucontrol(kvm) && new->id < KVM_USER_MEM_SLOTS) return -EINVAL; /* When we are protected, we should not change the memory slots */ @@ -5905,6 +6096,9 @@ void kvm_arch_commit_memory_region(struct kvm *kvm, { int rc = 0; + if (kvm_is_ucontrol(kvm)) + return; + switch (change) { case KVM_MR_DELETE: rc = gmap_unmap_segment(kvm->arch.gmap, old->base_gfn * PAGE_SIZE, diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h index 597d7a71deeb..c44fe0c3a097 100644 --- a/arch/s390/kvm/kvm-s390.h +++ b/arch/s390/kvm/kvm-s390.h @@ -20,6 +20,8 @@ #include <asm/processor.h> #include <asm/sclp.h> +#define KVM_S390_UCONTROL_MEMSLOT (KVM_USER_MEM_SLOTS + 0) + static inline void kvm_s390_fpu_store(struct kvm_run *run) { fpu_stfpc(&run->s.regs.fpc); @@ -279,6 +281,15 @@ static inline u32 kvm_s390_get_gisa_desc(struct kvm *kvm) return gd; } +static inline hva_t gpa_to_hva(struct kvm *kvm, gpa_t gpa) +{ + hva_t hva = gfn_to_hva(kvm, gpa_to_gfn(gpa)); + + if (!kvm_is_error_hva(hva)) + hva |= offset_in_page(gpa); + return hva; +} + /* implemented in pv.c */ int kvm_s390_pv_destroy_cpu(struct kvm_vcpu *vcpu, u16 *rc, u16 *rrc); int kvm_s390_pv_create_cpu(struct kvm_vcpu *vcpu, u16 *rc, u16 *rrc); @@ -297,6 +308,9 @@ int kvm_s390_pv_dump_stor_state(struct kvm *kvm, void __user *buff_user, u64 *gaddr, u64 buff_user_len, u16 *rc, u16 *rrc); int kvm_s390_pv_dump_complete(struct kvm *kvm, void __user *buff_user, u16 *rc, u16 *rrc); +int kvm_s390_pv_destroy_page(struct kvm *kvm, unsigned long gaddr); +int kvm_s390_pv_convert_to_secure(struct kvm *kvm, unsigned long gaddr); +int kvm_s390_pv_make_secure(struct kvm *kvm, unsigned long gaddr, void *uvcb); static inline u64 kvm_s390_pv_get_handle(struct kvm *kvm) { @@ -308,6 +322,41 @@ static inline u64 kvm_s390_pv_cpu_get_handle(struct kvm_vcpu *vcpu) return vcpu->arch.pv.handle; } +/** + * __kvm_s390_pv_destroy_page() - Destroy a guest page. + * @page: the page to destroy + * + * An attempt will be made to destroy the given guest page. If the attempt + * fails, an attempt is made to export the page. If both attempts fail, an + * appropriate error is returned. + * + * Context: must be called holding the mm lock for gmap->mm + */ +static inline int __kvm_s390_pv_destroy_page(struct page *page) +{ + struct folio *folio = page_folio(page); + int rc; + + /* Large folios cannot be secure. Small folio implies FW_LEVEL_PTE. */ + if (folio_test_large(folio)) + return -EFAULT; + + rc = uv_destroy_folio(folio); + /* + * Fault handlers can race; it is possible that two CPUs will fault + * on the same secure page. One CPU can destroy the page, reboot, + * re-enter secure mode and import it, while the second CPU was + * stuck at the beginning of the handler. At some point the second + * CPU will be able to progress, and it will not be able to destroy + * the page. In that case we do not want to terminate the process, + * we instead try to export the page. + */ + if (rc) + rc = uv_convert_from_secure_folio(folio); + + return rc; +} + /* implemented in interrupt.c */ int kvm_s390_handle_wait(struct kvm_vcpu *vcpu); void kvm_s390_vcpu_wakeup(struct kvm_vcpu *vcpu); @@ -387,6 +436,10 @@ void kvm_s390_vsie_gmap_notifier(struct gmap *gmap, unsigned long start, unsigned long end); void kvm_s390_vsie_init(struct kvm *kvm); void kvm_s390_vsie_destroy(struct kvm *kvm); +int gmap_shadow_valid(struct gmap *sg, unsigned long asce, int edat_level); + +/* implemented in gmap-vsie.c */ +struct gmap *gmap_shadow(struct gmap *parent, unsigned long asce, int edat_level); /* implemented in sigp.c */ int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu); @@ -408,6 +461,14 @@ void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu); void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm); __u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu); int kvm_s390_cpus_from_pv(struct kvm *kvm, u16 *rc, u16 *rrc); +int __kvm_s390_handle_dat_fault(struct kvm_vcpu *vcpu, gfn_t gfn, gpa_t gaddr, unsigned int flags); +int __kvm_s390_mprotect_many(struct gmap *gmap, gpa_t gpa, u8 npages, unsigned int prot, + unsigned long bits); + +static inline int kvm_s390_handle_dat_fault(struct kvm_vcpu *vcpu, gpa_t gaddr, unsigned int flags) +{ + return __kvm_s390_handle_dat_fault(vcpu, gpa_to_gfn(gaddr), gaddr, flags); +} /* implemented in diag.c */ int kvm_s390_handle_diag(struct kvm_vcpu *vcpu); diff --git a/arch/s390/kvm/pci.c b/arch/s390/kvm/pci.c index 9b9e7fdd5380..8c40154ff50f 100644 --- a/arch/s390/kvm/pci.c +++ b/arch/s390/kvm/pci.c @@ -433,7 +433,6 @@ static void kvm_s390_pci_dev_release(struct zpci_dev *zdev) static int kvm_s390_pci_register_kvm(void *opaque, struct kvm *kvm) { struct zpci_dev *zdev = opaque; - u8 status; int rc; if (!zdev) @@ -480,13 +479,7 @@ static int kvm_s390_pci_register_kvm(void *opaque, struct kvm *kvm) */ zdev->gisa = (u32)virt_to_phys(&kvm->arch.sie_page2->gisa); - rc = zpci_enable_device(zdev); - if (rc) - goto clear_gisa; - - /* Re-register the IOMMU that was already created */ - rc = zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma, - virt_to_phys(zdev->dma_table), &status); + rc = zpci_reenable_device(zdev); if (rc) goto clear_gisa; @@ -516,7 +509,6 @@ static void kvm_s390_pci_unregister_kvm(void *opaque) { struct zpci_dev *zdev = opaque; struct kvm *kvm; - u8 status; if (!zdev) return; @@ -550,12 +542,7 @@ static void kvm_s390_pci_unregister_kvm(void *opaque) goto out; } - if (zpci_enable_device(zdev)) - goto out; - - /* Re-register the IOMMU that was already created */ - zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma, - virt_to_phys(zdev->dma_table), &status); + zpci_reenable_device(zdev); out: spin_lock(&kvm->arch.kzdev_list_lock); diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c index 1a49b89706f8..9253c70897a8 100644 --- a/arch/s390/kvm/priv.c +++ b/arch/s390/kvm/priv.c @@ -1248,6 +1248,8 @@ static inline int __do_essa(struct kvm_vcpu *vcpu, const int orc) static int handle_essa(struct kvm_vcpu *vcpu) { + lockdep_assert_held(&vcpu->kvm->srcu); + /* entries expected to be 1FF */ int entries = (vcpu->arch.sie_block->cbrlo & ~PAGE_MASK) >> 3; unsigned long *cbrlo; @@ -1297,12 +1299,8 @@ static int handle_essa(struct kvm_vcpu *vcpu) /* Retry the ESSA instruction */ kvm_s390_retry_instr(vcpu); } else { - int srcu_idx; - mmap_read_lock(vcpu->kvm->mm); - srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); i = __do_essa(vcpu, orc); - srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx); mmap_read_unlock(vcpu->kvm->mm); if (i < 0) return i; diff --git a/arch/s390/kvm/pv.c b/arch/s390/kvm/pv.c index 75e81ba26d04..25ede8354514 100644 --- a/arch/s390/kvm/pv.c +++ b/arch/s390/kvm/pv.c @@ -5,6 +5,8 @@ * Copyright IBM Corp. 2019, 2020 * Author(s): Janosch Frank <frankja@linux.ibm.com> */ + +#include <linux/export.h> #include <linux/kvm.h> #include <linux/kvm_host.h> #include <linux/minmax.h> @@ -33,6 +35,64 @@ bool kvm_s390_pv_cpu_is_protected(struct kvm_vcpu *vcpu) EXPORT_SYMBOL_GPL(kvm_s390_pv_cpu_is_protected); /** + * kvm_s390_pv_make_secure() - make one guest page secure + * @kvm: the guest + * @gaddr: the guest address that needs to be made secure + * @uvcb: the UVCB specifying which operation needs to be performed + * + * Context: needs to be called with kvm->srcu held. + * Return: 0 on success, < 0 in case of error. + */ +int kvm_s390_pv_make_secure(struct kvm *kvm, unsigned long gaddr, void *uvcb) +{ + unsigned long vmaddr; + + lockdep_assert_held(&kvm->srcu); + + vmaddr = gfn_to_hva(kvm, gpa_to_gfn(gaddr)); + if (kvm_is_error_hva(vmaddr)) + return -EFAULT; + return make_hva_secure(kvm->mm, vmaddr, uvcb); +} + +int kvm_s390_pv_convert_to_secure(struct kvm *kvm, unsigned long gaddr) +{ + struct uv_cb_cts uvcb = { + .header.cmd = UVC_CMD_CONV_TO_SEC_STOR, + .header.len = sizeof(uvcb), + .guest_handle = kvm_s390_pv_get_handle(kvm), + .gaddr = gaddr, + }; + + return kvm_s390_pv_make_secure(kvm, gaddr, &uvcb); +} + +/** + * kvm_s390_pv_destroy_page() - Destroy a guest page. + * @kvm: the guest + * @gaddr: the guest address to destroy + * + * An attempt will be made to destroy the given guest page. If the attempt + * fails, an attempt is made to export the page. If both attempts fail, an + * appropriate error is returned. + * + * Context: may sleep. + */ +int kvm_s390_pv_destroy_page(struct kvm *kvm, unsigned long gaddr) +{ + struct page *page; + int rc = 0; + + mmap_read_lock(kvm->mm); + page = gfn_to_page(kvm, gpa_to_gfn(gaddr)); + if (page) + rc = __kvm_s390_pv_destroy_page(page); + kvm_release_page_clean(page); + mmap_read_unlock(kvm->mm); + return rc; +} + +/** * struct pv_vm_to_be_destroyed - Represents a protected VM that needs to * be destroyed * @@ -637,11 +697,29 @@ static int unpack_one(struct kvm *kvm, unsigned long addr, u64 tweak, .tweak[0] = tweak, .tweak[1] = offset, }; - int ret = gmap_make_secure(kvm->arch.gmap, addr, &uvcb); + int ret = kvm_s390_pv_make_secure(kvm, addr, &uvcb); + unsigned long vmaddr; + bool unlocked; *rc = uvcb.header.rc; *rrc = uvcb.header.rrc; + if (ret == -ENXIO) { + mmap_read_lock(kvm->mm); + vmaddr = gfn_to_hva(kvm, gpa_to_gfn(addr)); + if (kvm_is_error_hva(vmaddr)) { + ret = -EFAULT; + } else { + ret = fixup_user_fault(kvm->mm, vmaddr, FAULT_FLAG_WRITE, &unlocked); + if (!ret) + ret = __gmap_link(kvm->arch.gmap, addr, vmaddr); + } + mmap_read_unlock(kvm->mm); + if (!ret) + return -EAGAIN; + return ret; + } + if (ret && ret != -EAGAIN) KVM_UV_EVENT(kvm, 3, "PROTVIRT VM UNPACK: failed addr %llx with rc %x rrc %x", uvcb.gaddr, *rc, *rrc); @@ -660,6 +738,8 @@ int kvm_s390_pv_unpack(struct kvm *kvm, unsigned long addr, unsigned long size, KVM_UV_EVENT(kvm, 3, "PROTVIRT VM UNPACK: start addr %lx size %lx", addr, size); + guard(srcu)(&kvm->srcu); + while (offset < size) { ret = unpack_one(kvm, addr, tweak, offset, rc, rrc); if (ret == -EAGAIN) { diff --git a/arch/s390/kvm/trace-s390.h b/arch/s390/kvm/trace-s390.h index 9ac92dbf680d..9e28f165c114 100644 --- a/arch/s390/kvm/trace-s390.h +++ b/arch/s390/kvm/trace-s390.h @@ -56,7 +56,7 @@ TRACE_EVENT(kvm_s390_create_vcpu, __entry->sie_block = sie_block; ), - TP_printk("create cpu %d at 0x%pK, sie block at 0x%pK", + TP_printk("create cpu %d at 0x%p, sie block at 0x%p", __entry->id, __entry->vcpu, __entry->sie_block) ); @@ -255,7 +255,7 @@ TRACE_EVENT(kvm_s390_enable_css, __entry->kvm = kvm; ), - TP_printk("enabling channel I/O support (kvm @ %pK)\n", + TP_printk("enabling channel I/O support (kvm @ %p)\n", __entry->kvm) ); diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c index 150b9387860a..347268f89f2f 100644 --- a/arch/s390/kvm/vsie.c +++ b/arch/s390/kvm/vsie.c @@ -13,6 +13,7 @@ #include <linux/bitmap.h> #include <linux/sched/signal.h> #include <linux/io.h> +#include <linux/mman.h> #include <asm/gmap.h> #include <asm/mmu_context.h> @@ -23,6 +24,10 @@ #include "kvm-s390.h" #include "gaccess.h" +enum vsie_page_flags { + VSIE_PAGE_IN_USE = 0, +}; + struct vsie_page { struct kvm_s390_sie_block scb_s; /* 0x0000 */ /* @@ -46,11 +51,40 @@ struct vsie_page { gpa_t gvrd_gpa; /* 0x0240 */ gpa_t riccbd_gpa; /* 0x0248 */ gpa_t sdnx_gpa; /* 0x0250 */ - __u8 reserved[0x0700 - 0x0258]; /* 0x0258 */ + /* + * guest address of the original SCB. Remains set for free vsie + * pages, so we can properly look them up in our addr_to_page + * radix tree. + */ + gpa_t scb_gpa; /* 0x0258 */ + /* + * Flags: must be set/cleared atomically after the vsie page can be + * looked up by other CPUs. + */ + unsigned long flags; /* 0x0260 */ + __u8 reserved[0x0700 - 0x0268]; /* 0x0268 */ struct kvm_s390_crypto_cb crycb; /* 0x0700 */ __u8 fac[S390_ARCH_FAC_LIST_SIZE_BYTE]; /* 0x0800 */ }; +/** + * gmap_shadow_valid() - check if a shadow guest address space matches the + * given properties and is still valid + * @sg: pointer to the shadow guest address space structure + * @asce: ASCE for which the shadow table is requested + * @edat_level: edat level to be used for the shadow translation + * + * Returns 1 if the gmap shadow is still valid and matches the given + * properties, the caller can continue using it. Returns 0 otherwise; the + * caller has to request a new shadow gmap in this case. + */ +int gmap_shadow_valid(struct gmap *sg, unsigned long asce, int edat_level) +{ + if (sg->removed) + return 0; + return sg->orig_asce == asce && sg->edat_level == edat_level; +} + /* trigger a validity icpt for the given scb */ static int set_validity_icpt(struct kvm_s390_sie_block *scb, __u16 reason_code) @@ -584,7 +618,6 @@ void kvm_s390_vsie_gmap_notifier(struct gmap *gmap, unsigned long start, struct kvm *kvm = gmap->private; struct vsie_page *cur; unsigned long prefix; - struct page *page; int i; if (!gmap_is_shadow(gmap)) @@ -594,10 +627,9 @@ void kvm_s390_vsie_gmap_notifier(struct gmap *gmap, unsigned long start, * therefore we can safely reference them all the time. */ for (i = 0; i < kvm->arch.vsie.page_count; i++) { - page = READ_ONCE(kvm->arch.vsie.pages[i]); - if (!page) + cur = READ_ONCE(kvm->arch.vsie.pages[i]); + if (!cur) continue; - cur = page_to_virt(page); if (READ_ONCE(cur->gmap) != gmap) continue; prefix = cur->scb_s.prefix << GUEST_PREFIX_SHIFT; @@ -854,7 +886,7 @@ unpin: static void unpin_scb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page, gpa_t gpa) { - hpa_t hpa = (hpa_t) vsie_page->scb_o; + hpa_t hpa = virt_to_phys(vsie_page->scb_o); if (hpa) unpin_guest_page(vcpu->kvm, gpa, hpa); @@ -1138,10 +1170,6 @@ static int do_vsie_run(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) vcpu->arch.sie_block->fpf & FPF_BPBC) set_thread_flag(TIF_ISOLATE_BP_GUEST); - local_irq_disable(); - guest_enter_irqoff(); - local_irq_enable(); - /* * Simulate a SIE entry of the VCPU (see sie64a), so VCPU blocking * and VCPU requests also hinder the vSIE from running and lead @@ -1151,15 +1179,16 @@ static int do_vsie_run(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) vcpu->arch.sie_block->prog0c |= PROG_IN_SIE; current->thread.gmap_int_code = 0; barrier(); - if (!kvm_s390_vcpu_sie_inhibited(vcpu)) - rc = sie64a(scb_s, vcpu->run->s.regs.gprs, vsie_page->gmap->asce); + if (!kvm_s390_vcpu_sie_inhibited(vcpu)) { + local_irq_disable(); + guest_timing_enter_irqoff(); + rc = kvm_s390_enter_exit_sie(scb_s, vcpu->run->s.regs.gprs, vsie_page->gmap->asce); + guest_timing_exit_irqoff(); + local_irq_enable(); + } barrier(); vcpu->arch.sie_block->prog0c &= ~PROG_IN_SIE; - local_irq_disable(); - guest_exit_irqoff(); - local_irq_enable(); - /* restore guest state for bp isolation override */ if (!guest_bp_isolation) clear_thread_flag(TIF_ISOLATE_BP_GUEST); @@ -1345,6 +1374,20 @@ static int vsie_run(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) return rc; } +/* Try getting a given vsie page, returning "true" on success. */ +static inline bool try_get_vsie_page(struct vsie_page *vsie_page) +{ + if (test_bit(VSIE_PAGE_IN_USE, &vsie_page->flags)) + return false; + return !test_and_set_bit(VSIE_PAGE_IN_USE, &vsie_page->flags); +} + +/* Put a vsie page acquired through get_vsie_page / try_get_vsie_page. */ +static void put_vsie_page(struct vsie_page *vsie_page) +{ + clear_bit(VSIE_PAGE_IN_USE, &vsie_page->flags); +} + /* * Get or create a vsie page for a scb address. * @@ -1355,16 +1398,21 @@ static int vsie_run(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) static struct vsie_page *get_vsie_page(struct kvm *kvm, unsigned long addr) { struct vsie_page *vsie_page; - struct page *page; int nr_vcpus; rcu_read_lock(); - page = radix_tree_lookup(&kvm->arch.vsie.addr_to_page, addr >> 9); + vsie_page = radix_tree_lookup(&kvm->arch.vsie.addr_to_page, addr >> 9); rcu_read_unlock(); - if (page) { - if (page_ref_inc_return(page) == 2) - return page_to_virt(page); - page_ref_dec(page); + if (vsie_page) { + if (try_get_vsie_page(vsie_page)) { + if (vsie_page->scb_gpa == addr) + return vsie_page; + /* + * We raced with someone reusing + putting this vsie + * page before we grabbed it. + */ + put_vsie_page(vsie_page); + } } /* @@ -1375,36 +1423,40 @@ static struct vsie_page *get_vsie_page(struct kvm *kvm, unsigned long addr) mutex_lock(&kvm->arch.vsie.mutex); if (kvm->arch.vsie.page_count < nr_vcpus) { - page = alloc_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO | GFP_DMA); - if (!page) { + vsie_page = (void *)__get_free_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO | GFP_DMA); + if (!vsie_page) { mutex_unlock(&kvm->arch.vsie.mutex); return ERR_PTR(-ENOMEM); } - page_ref_inc(page); - kvm->arch.vsie.pages[kvm->arch.vsie.page_count] = page; + __set_bit(VSIE_PAGE_IN_USE, &vsie_page->flags); + kvm->arch.vsie.pages[kvm->arch.vsie.page_count] = vsie_page; kvm->arch.vsie.page_count++; } else { /* reuse an existing entry that belongs to nobody */ while (true) { - page = kvm->arch.vsie.pages[kvm->arch.vsie.next]; - if (page_ref_inc_return(page) == 2) + vsie_page = kvm->arch.vsie.pages[kvm->arch.vsie.next]; + if (try_get_vsie_page(vsie_page)) break; - page_ref_dec(page); kvm->arch.vsie.next++; kvm->arch.vsie.next %= nr_vcpus; } - radix_tree_delete(&kvm->arch.vsie.addr_to_page, page->index >> 9); + if (vsie_page->scb_gpa != ULONG_MAX) + radix_tree_delete(&kvm->arch.vsie.addr_to_page, + vsie_page->scb_gpa >> 9); } - page->index = addr; - /* double use of the same address */ - if (radix_tree_insert(&kvm->arch.vsie.addr_to_page, addr >> 9, page)) { - page_ref_dec(page); + /* Mark it as invalid until it resides in the tree. */ + vsie_page->scb_gpa = ULONG_MAX; + + /* Double use of the same address or allocation failure. */ + if (radix_tree_insert(&kvm->arch.vsie.addr_to_page, addr >> 9, + vsie_page)) { + put_vsie_page(vsie_page); mutex_unlock(&kvm->arch.vsie.mutex); return NULL; } + vsie_page->scb_gpa = addr; mutex_unlock(&kvm->arch.vsie.mutex); - vsie_page = page_to_virt(page); memset(&vsie_page->scb_s, 0, sizeof(struct kvm_s390_sie_block)); release_gmap_shadow(vsie_page); vsie_page->fault_addr = 0; @@ -1412,14 +1464,6 @@ static struct vsie_page *get_vsie_page(struct kvm *kvm, unsigned long addr) return vsie_page; } -/* put a vsie page acquired via get_vsie_page */ -static void put_vsie_page(struct kvm *kvm, struct vsie_page *vsie_page) -{ - struct page *page = pfn_to_page(__pa(vsie_page) >> PAGE_SHIFT); - - page_ref_dec(page); -} - int kvm_s390_handle_vsie(struct kvm_vcpu *vcpu) { struct vsie_page *vsie_page; @@ -1470,7 +1514,7 @@ out_unshadow: out_unpin_scb: unpin_scb(vcpu, vsie_page, scb_addr); out_put: - put_vsie_page(vcpu->kvm, vsie_page); + put_vsie_page(vsie_page); return rc < 0 ? rc : 0; } @@ -1486,18 +1530,18 @@ void kvm_s390_vsie_init(struct kvm *kvm) void kvm_s390_vsie_destroy(struct kvm *kvm) { struct vsie_page *vsie_page; - struct page *page; int i; mutex_lock(&kvm->arch.vsie.mutex); for (i = 0; i < kvm->arch.vsie.page_count; i++) { - page = kvm->arch.vsie.pages[i]; + vsie_page = kvm->arch.vsie.pages[i]; kvm->arch.vsie.pages[i] = NULL; - vsie_page = page_to_virt(page); release_gmap_shadow(vsie_page); /* free the radix tree entry */ - radix_tree_delete(&kvm->arch.vsie.addr_to_page, page->index >> 9); - __free_page(page); + if (vsie_page->scb_gpa != ULONG_MAX) + radix_tree_delete(&kvm->arch.vsie.addr_to_page, + vsie_page->scb_gpa >> 9); + free_page((unsigned long)vsie_page); } kvm->arch.vsie.page_count = 0; mutex_unlock(&kvm->arch.vsie.mutex); diff --git a/arch/s390/lib/delay.c b/arch/s390/lib/delay.c index be14c58cb989..c1ea14e3c927 100644 --- a/arch/s390/lib/delay.c +++ b/arch/s390/lib/delay.c @@ -7,6 +7,7 @@ */ #include <linux/processor.h> +#include <linux/export.h> #include <linux/delay.h> #include <asm/div64.h> #include <asm/timex.h> diff --git a/arch/s390/lib/mem.S b/arch/s390/lib/mem.S index 08f60a42b9a6..d026debf250c 100644 --- a/arch/s390/lib/mem.S +++ b/arch/s390/lib/mem.S @@ -34,8 +34,7 @@ SYM_FUNC_START(__memmove) la %r3,256(%r3) brctg %r0,.Lmemmove_forward_loop .Lmemmove_forward_remainder: - larl %r5,.Lmemmove_mvc - ex %r4,0(%r5) + exrl %r4,.Lmemmove_mvc .Lmemmove_exit: BR_EX %r14 .Lmemmove_reverse: @@ -83,8 +82,7 @@ SYM_FUNC_START(__memset) la %r1,256(%r1) brctg %r3,.Lmemset_clear_loop .Lmemset_clear_remainder: - larl %r3,.Lmemset_xc - ex %r4,0(%r3) + exrl %r4,.Lmemset_xc .Lmemset_exit: BR_EX %r14 .Lmemset_fill: @@ -102,8 +100,7 @@ SYM_FUNC_START(__memset) brctg %r5,.Lmemset_fill_loop .Lmemset_fill_remainder: stc %r3,0(%r1) - larl %r5,.Lmemset_mvc - ex %r4,0(%r5) + exrl %r4,.Lmemset_mvc BR_EX %r14 .Lmemset_fill_exit: stc %r3,0(%r1) @@ -132,8 +129,7 @@ SYM_FUNC_START(__memcpy) lgr %r1,%r2 jnz .Lmemcpy_loop .Lmemcpy_remainder: - larl %r5,.Lmemcpy_mvc - ex %r4,0(%r5) + exrl %r4,.Lmemcpy_mvc .Lmemcpy_exit: BR_EX %r14 .Lmemcpy_loop: @@ -175,8 +171,7 @@ SYM_FUNC_START(__memset\bits) brctg %r5,.L__memset_loop\bits .L__memset_remainder\bits: \insn %r3,0(%r1) - larl %r5,.L__memset_mvc\bits - ex %r4,0(%r5) + exrl %r4,.L__memset_mvc\bits BR_EX %r14 .L__memset_store\bits: \insn %r3,0(%r2) diff --git a/arch/s390/lib/spinlock.c b/arch/s390/lib/spinlock.c index a81a01c44927..ad9da4038511 100644 --- a/arch/s390/lib/spinlock.c +++ b/arch/s390/lib/spinlock.c @@ -10,11 +10,13 @@ #include <linux/export.h> #include <linux/spinlock.h> #include <linux/jiffies.h> +#include <linux/sysctl.h> #include <linux/init.h> #include <linux/smp.h> #include <linux/percpu.h> #include <linux/io.h> #include <asm/alternative.h> +#include <asm/machine.h> #include <asm/asm.h> int spin_retry = -1; @@ -37,6 +39,23 @@ static int __init spin_retry_setup(char *str) } __setup("spin_retry=", spin_retry_setup); +static const struct ctl_table s390_spin_sysctl_table[] = { + { + .procname = "spin_retry", + .data = &spin_retry, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, +}; + +static int __init init_s390_spin_sysctls(void) +{ + register_sysctl_init("kernel", s390_spin_sysctl_table); + return 0; +} +arch_initcall(init_s390_spin_sysctls); + struct spin_wait { struct spin_wait *next, *prev; int node_id; @@ -141,7 +160,7 @@ static inline void arch_spin_lock_queued(arch_spinlock_t *lp) ix = get_lowcore()->spinlock_index++; barrier(); - lockval = SPINLOCK_LOCKVAL; /* cpu + 1 */ + lockval = spinlock_lockval(); /* cpu + 1 */ node = this_cpu_ptr(&spin_wait[ix]); node->prev = node->next = NULL; node_id = node->node_id; @@ -212,7 +231,7 @@ static inline void arch_spin_lock_queued(arch_spinlock_t *lp) if (count-- >= 0) continue; count = spin_retry; - if (!MACHINE_IS_LPAR || arch_vcpu_is_preempted(owner - 1)) + if (!machine_is_lpar() || arch_vcpu_is_preempted(owner - 1)) smp_yield_cpu(owner - 1); } @@ -232,7 +251,7 @@ static inline void arch_spin_lock_classic(arch_spinlock_t *lp) { int lockval, old, new, owner, count; - lockval = SPINLOCK_LOCKVAL; /* cpu + 1 */ + lockval = spinlock_lockval(); /* cpu + 1 */ /* Pass the virtual CPU to the lock holder if it is not running */ owner = arch_spin_yield_target(READ_ONCE(lp->lock), NULL); @@ -255,7 +274,7 @@ static inline void arch_spin_lock_classic(arch_spinlock_t *lp) if (count-- >= 0) continue; count = spin_retry; - if (!MACHINE_IS_LPAR || arch_vcpu_is_preempted(owner - 1)) + if (!machine_is_lpar() || arch_vcpu_is_preempted(owner - 1)) smp_yield_cpu(owner - 1); } } @@ -271,7 +290,7 @@ EXPORT_SYMBOL(arch_spin_lock_wait); int arch_spin_trylock_retry(arch_spinlock_t *lp) { - int cpu = SPINLOCK_LOCKVAL; + int cpu = spinlock_lockval(); int owner, count; for (count = spin_retry; count > 0; count--) { @@ -337,7 +356,7 @@ void arch_spin_relax(arch_spinlock_t *lp) cpu = READ_ONCE(lp->lock) & _Q_LOCK_CPU_MASK; if (!cpu) return; - if (MACHINE_IS_LPAR && !arch_vcpu_is_preempted(cpu - 1)) + if (machine_is_lpar() && !arch_vcpu_is_preempted(cpu - 1)) return; smp_yield_cpu(cpu - 1); } diff --git a/arch/s390/lib/string.c b/arch/s390/lib/string.c index 373fa1f01937..099de76e8b1a 100644 --- a/arch/s390/lib/string.c +++ b/arch/s390/lib/string.c @@ -78,50 +78,6 @@ EXPORT_SYMBOL(strnlen); #endif /** - * strcpy - Copy a %NUL terminated string - * @dest: Where to copy the string to - * @src: Where to copy the string from - * - * returns a pointer to @dest - */ -#ifdef __HAVE_ARCH_STRCPY -char *strcpy(char *dest, const char *src) -{ - char *ret = dest; - - asm volatile( - " lghi 0,0\n" - "0: mvst %[dest],%[src]\n" - " jo 0b\n" - : [dest] "+&a" (dest), [src] "+&a" (src) - : - : "cc", "memory", "0"); - return ret; -} -EXPORT_SYMBOL(strcpy); -#endif - -/** - * strncpy - Copy a length-limited, %NUL-terminated string - * @dest: Where to copy the string to - * @src: Where to copy the string from - * @n: The maximum number of bytes to copy - * - * The result is not %NUL-terminated if the source exceeds - * @n bytes. - */ -#ifdef __HAVE_ARCH_STRNCPY -char *strncpy(char *dest, const char *src, size_t n) -{ - size_t len = __strnend(src, n) - src; - memset(dest + len, 0, n - len); - memcpy(dest, src, len); - return dest; -} -EXPORT_SYMBOL(strncpy); -#endif - -/** * strcat - Append one %NUL-terminated string to another * @dest: The string to be appended to * @src: The string to append to it @@ -181,9 +137,6 @@ EXPORT_SYMBOL(strlcat); * @n: The maximum numbers of bytes to copy * * returns a pointer to @dest - * - * Note that in contrast to strncpy, strncat ensures the result is - * terminated. */ #ifdef __HAVE_ARCH_STRNCAT char *strncat(char *dest, const char *src, size_t n) diff --git a/arch/s390/lib/uaccess.c b/arch/s390/lib/uaccess.c index c7c269d5c491..1a6ba105e071 100644 --- a/arch/s390/lib/uaccess.c +++ b/arch/s390/lib/uaccess.c @@ -8,73 +8,85 @@ * Gerald Schaefer (gerald.schaefer@de.ibm.com) */ +#include <linux/kprobes.h> #include <linux/uaccess.h> #include <linux/export.h> #include <linux/mm.h> #include <asm/asm-extable.h> #include <asm/ctlreg.h> +#include <asm/skey.h> #ifdef CONFIG_DEBUG_ENTRY void debug_user_asce(int exit) { + struct lowcore *lc = get_lowcore(); struct ctlreg cr1, cr7; local_ctl_store(1, &cr1); local_ctl_store(7, &cr7); - if (cr1.val == get_lowcore()->kernel_asce.val && cr7.val == get_lowcore()->user_asce.val) + if (cr1.val == lc->user_asce.val && cr7.val == lc->user_asce.val) return; panic("incorrect ASCE on kernel %s\n" "cr1: %016lx cr7: %016lx\n" "kernel: %016lx user: %016lx\n", exit ? "exit" : "entry", cr1.val, cr7.val, - get_lowcore()->kernel_asce.val, get_lowcore()->user_asce.val); + lc->kernel_asce.val, lc->user_asce.val); } #endif /*CONFIG_DEBUG_ENTRY */ -static unsigned long raw_copy_from_user_key(void *to, const void __user *from, - unsigned long size, unsigned long key) +union oac { + unsigned int val; + struct { + struct { + unsigned short key : 4; + unsigned short : 4; + unsigned short as : 2; + unsigned short : 4; + unsigned short k : 1; + unsigned short a : 1; + } oac1; + struct { + unsigned short key : 4; + unsigned short : 4; + unsigned short as : 2; + unsigned short : 4; + unsigned short k : 1; + unsigned short a : 1; + } oac2; + }; +}; + +static uaccess_kmsan_or_inline __must_check unsigned long +raw_copy_from_user_key(void *to, const void __user *from, unsigned long size, unsigned long key) { - unsigned long rem; + unsigned long osize; union oac spec = { .oac2.key = key, .oac2.as = PSW_BITS_AS_SECONDARY, .oac2.k = 1, .oac2.a = 1, }; + int cc; - asm volatile( - " lr 0,%[spec]\n" - "0: mvcos 0(%[to]),0(%[from]),%[size]\n" - "1: jz 5f\n" - " algr %[size],%[val]\n" - " slgr %[from],%[val]\n" - " slgr %[to],%[val]\n" - " j 0b\n" - "2: la %[rem],4095(%[from])\n" /* rem = from + 4095 */ - " nr %[rem],%[val]\n" /* rem = (from + 4095) & -4096 */ - " slgr %[rem],%[from]\n" - " clgr %[size],%[rem]\n" /* copy crosses next page boundary? */ - " jnh 6f\n" - "3: mvcos 0(%[to]),0(%[from]),%[rem]\n" - "4: slgr %[size],%[rem]\n" - " j 6f\n" - "5: slgr %[size],%[size]\n" - "6:\n" - EX_TABLE(0b, 2b) - EX_TABLE(1b, 2b) - EX_TABLE(3b, 6b) - EX_TABLE(4b, 6b) - : [size] "+&a" (size), [from] "+&a" (from), [to] "+&a" (to), [rem] "=&a" (rem) - : [val] "a" (-4096UL), [spec] "d" (spec.val) - : "cc", "memory", "0"); - return size; -} - -unsigned long raw_copy_from_user(void *to, const void __user *from, unsigned long n) -{ - return raw_copy_from_user_key(to, from, n, 0); + while (1) { + osize = size; + asm_inline volatile( + " lr %%r0,%[spec]\n" + "0: mvcos %[to],%[from],%[size]\n" + "1: nopr %%r7\n" + CC_IPM(cc) + EX_TABLE_UA_MVCOS_FROM(0b, 0b) + EX_TABLE_UA_MVCOS_FROM(1b, 0b) + : CC_OUT(cc, cc), [size] "+d" (size), [to] "=Q" (*(char *)to) + : [spec] "d" (spec.val), [from] "Q" (*(const char __user *)from) + : CC_CLOBBER_LIST("memory", "0")); + if (CC_TRANSFORM(cc) == 0) + return osize - size; + size -= 4096; + to += 4096; + from += 4096; + } } -EXPORT_SYMBOL(raw_copy_from_user); unsigned long _copy_from_user_key(void *to, const void __user *from, unsigned long n, unsigned long key) @@ -93,50 +105,37 @@ unsigned long _copy_from_user_key(void *to, const void __user *from, } EXPORT_SYMBOL(_copy_from_user_key); -static unsigned long raw_copy_to_user_key(void __user *to, const void *from, - unsigned long size, unsigned long key) +static uaccess_kmsan_or_inline __must_check unsigned long +raw_copy_to_user_key(void __user *to, const void *from, unsigned long size, unsigned long key) { - unsigned long rem; + unsigned long osize; union oac spec = { .oac1.key = key, .oac1.as = PSW_BITS_AS_SECONDARY, .oac1.k = 1, .oac1.a = 1, }; + int cc; - asm volatile( - " lr 0,%[spec]\n" - "0: mvcos 0(%[to]),0(%[from]),%[size]\n" - "1: jz 5f\n" - " algr %[size],%[val]\n" - " slgr %[to],%[val]\n" - " slgr %[from],%[val]\n" - " j 0b\n" - "2: la %[rem],4095(%[to])\n" /* rem = to + 4095 */ - " nr %[rem],%[val]\n" /* rem = (to + 4095) & -4096 */ - " slgr %[rem],%[to]\n" - " clgr %[size],%[rem]\n" /* copy crosses next page boundary? */ - " jnh 6f\n" - "3: mvcos 0(%[to]),0(%[from]),%[rem]\n" - "4: slgr %[size],%[rem]\n" - " j 6f\n" - "5: slgr %[size],%[size]\n" - "6:\n" - EX_TABLE(0b, 2b) - EX_TABLE(1b, 2b) - EX_TABLE(3b, 6b) - EX_TABLE(4b, 6b) - : [size] "+&a" (size), [to] "+&a" (to), [from] "+&a" (from), [rem] "=&a" (rem) - : [val] "a" (-4096UL), [spec] "d" (spec.val) - : "cc", "memory", "0"); - return size; -} - -unsigned long raw_copy_to_user(void __user *to, const void *from, unsigned long n) -{ - return raw_copy_to_user_key(to, from, n, 0); + while (1) { + osize = size; + asm_inline volatile( + " lr %%r0,%[spec]\n" + "0: mvcos %[to],%[from],%[size]\n" + "1: nopr %%r7\n" + CC_IPM(cc) + EX_TABLE_UA_MVCOS_TO(0b, 0b) + EX_TABLE_UA_MVCOS_TO(1b, 0b) + : CC_OUT(cc, cc), [size] "+d" (size), [to] "=Q" (*(char __user *)to) + : [spec] "d" (spec.val), [from] "Q" (*(const char *)from) + : CC_CLOBBER_LIST("memory", "0")); + if (CC_TRANSFORM(cc) == 0) + return osize - size; + size -= 4096; + to += 4096; + from += 4096; + } } -EXPORT_SYMBOL(raw_copy_to_user); unsigned long _copy_to_user_key(void __user *to, const void *from, unsigned long n, unsigned long key) @@ -149,38 +148,188 @@ unsigned long _copy_to_user_key(void __user *to, const void *from, } EXPORT_SYMBOL(_copy_to_user_key); -unsigned long __clear_user(void __user *to, unsigned long size) +#define CMPXCHG_USER_KEY_MAX_LOOPS 128 + +static nokprobe_inline int __cmpxchg_user_key_small(unsigned long address, unsigned int *uval, + unsigned int old, unsigned int new, + unsigned int mask, unsigned long key) { - unsigned long rem; - union oac spec = { - .oac1.as = PSW_BITS_AS_SECONDARY, - .oac1.a = 1, - }; + unsigned long count; + unsigned int prev; + bool sacf_flag; + int rc = 0; + + skey_regions_initialize(); + sacf_flag = enable_sacf_uaccess(); + asm_inline volatile( + "20: spka 0(%[key])\n" + " sacf 256\n" + " llill %[count],%[max_loops]\n" + "0: l %[prev],%[address]\n" + "1: nr %[prev],%[mask]\n" + " xilf %[mask],0xffffffff\n" + " or %[new],%[prev]\n" + " or %[prev],%[tmp]\n" + "2: lr %[tmp],%[prev]\n" + "3: cs %[prev],%[new],%[address]\n" + "4: jnl 5f\n" + " xr %[tmp],%[prev]\n" + " xr %[new],%[tmp]\n" + " nr %[tmp],%[mask]\n" + " jnz 5f\n" + " brct %[count],2b\n" + "5: sacf 768\n" + " spka %[default_key]\n" + "21:\n" + EX_TABLE_UA_LOAD_REG(0b, 5b, %[rc], %[prev]) + EX_TABLE_UA_LOAD_REG(1b, 5b, %[rc], %[prev]) + EX_TABLE_UA_LOAD_REG(3b, 5b, %[rc], %[prev]) + EX_TABLE_UA_LOAD_REG(4b, 5b, %[rc], %[prev]) + SKEY_REGION(20b, 21b) + : [rc] "+&d" (rc), + [prev] "=&d" (prev), + [address] "+Q" (*(int *)address), + [tmp] "+&d" (old), + [new] "+&d" (new), + [mask] "+&d" (mask), + [count] "=a" (count) + : [key] "%[count]" (key << 4), + [default_key] "J" (PAGE_DEFAULT_KEY), + [max_loops] "J" (CMPXCHG_USER_KEY_MAX_LOOPS) + : "memory", "cc"); + disable_sacf_uaccess(sacf_flag); + *uval = prev; + if (!count) + rc = -EAGAIN; + return rc; +} + +int __kprobes __cmpxchg_user_key1(unsigned long address, unsigned char *uval, + unsigned char old, unsigned char new, unsigned long key) +{ + unsigned int prev, shift, mask, _old, _new; + int rc; + + shift = (3 ^ (address & 3)) << 3; + address ^= address & 3; + _old = (unsigned int)old << shift; + _new = (unsigned int)new << shift; + mask = ~(0xff << shift); + rc = __cmpxchg_user_key_small(address, &prev, _old, _new, mask, key); + *uval = prev >> shift; + return rc; +} +EXPORT_SYMBOL(__cmpxchg_user_key1); + +int __kprobes __cmpxchg_user_key2(unsigned long address, unsigned short *uval, + unsigned short old, unsigned short new, unsigned long key) +{ + unsigned int prev, shift, mask, _old, _new; + int rc; + + shift = (2 ^ (address & 2)) << 3; + address ^= address & 2; + _old = (unsigned int)old << shift; + _new = (unsigned int)new << shift; + mask = ~(0xffff << shift); + rc = __cmpxchg_user_key_small(address, &prev, _old, _new, mask, key); + *uval = prev >> shift; + return rc; +} +EXPORT_SYMBOL(__cmpxchg_user_key2); + +int __kprobes __cmpxchg_user_key4(unsigned long address, unsigned int *uval, + unsigned int old, unsigned int new, unsigned long key) +{ + unsigned int prev = old; + bool sacf_flag; + int rc = 0; + + skey_regions_initialize(); + sacf_flag = enable_sacf_uaccess(); + asm_inline volatile( + "20: spka 0(%[key])\n" + " sacf 256\n" + "0: cs %[prev],%[new],%[address]\n" + "1: sacf 768\n" + " spka %[default_key]\n" + "21:\n" + EX_TABLE_UA_LOAD_REG(0b, 1b, %[rc], %[prev]) + EX_TABLE_UA_LOAD_REG(1b, 1b, %[rc], %[prev]) + SKEY_REGION(20b, 21b) + : [rc] "+&d" (rc), + [prev] "+&d" (prev), + [address] "+Q" (*(int *)address) + : [new] "d" (new), + [key] "a" (key << 4), + [default_key] "J" (PAGE_DEFAULT_KEY) + : "memory", "cc"); + disable_sacf_uaccess(sacf_flag); + *uval = prev; + return rc; +} +EXPORT_SYMBOL(__cmpxchg_user_key4); + +int __kprobes __cmpxchg_user_key8(unsigned long address, unsigned long *uval, + unsigned long old, unsigned long new, unsigned long key) +{ + unsigned long prev = old; + bool sacf_flag; + int rc = 0; + + skey_regions_initialize(); + sacf_flag = enable_sacf_uaccess(); + asm_inline volatile( + "20: spka 0(%[key])\n" + " sacf 256\n" + "0: csg %[prev],%[new],%[address]\n" + "1: sacf 768\n" + " spka %[default_key]\n" + "21:\n" + EX_TABLE_UA_LOAD_REG(0b, 1b, %[rc], %[prev]) + EX_TABLE_UA_LOAD_REG(1b, 1b, %[rc], %[prev]) + SKEY_REGION(20b, 21b) + : [rc] "+&d" (rc), + [prev] "+&d" (prev), + [address] "+QS" (*(long *)address) + : [new] "d" (new), + [key] "a" (key << 4), + [default_key] "J" (PAGE_DEFAULT_KEY) + : "memory", "cc"); + disable_sacf_uaccess(sacf_flag); + *uval = prev; + return rc; +} +EXPORT_SYMBOL(__cmpxchg_user_key8); + +int __kprobes __cmpxchg_user_key16(unsigned long address, __uint128_t *uval, + __uint128_t old, __uint128_t new, unsigned long key) +{ + __uint128_t prev = old; + bool sacf_flag; + int rc = 0; - asm volatile( - " lr 0,%[spec]\n" - "0: mvcos 0(%[to]),0(%[zeropg]),%[size]\n" - "1: jz 5f\n" - " algr %[size],%[val]\n" - " slgr %[to],%[val]\n" - " j 0b\n" - "2: la %[rem],4095(%[to])\n" /* rem = to + 4095 */ - " nr %[rem],%[val]\n" /* rem = (to + 4095) & -4096 */ - " slgr %[rem],%[to]\n" - " clgr %[size],%[rem]\n" /* copy crosses next page boundary? */ - " jnh 6f\n" - "3: mvcos 0(%[to]),0(%[zeropg]),%[rem]\n" - "4: slgr %[size],%[rem]\n" - " j 6f\n" - "5: slgr %[size],%[size]\n" - "6:\n" - EX_TABLE(0b, 2b) - EX_TABLE(1b, 2b) - EX_TABLE(3b, 6b) - EX_TABLE(4b, 6b) - : [size] "+&a" (size), [to] "+&a" (to), [rem] "=&a" (rem) - : [val] "a" (-4096UL), [zeropg] "a" (empty_zero_page), [spec] "d" (spec.val) - : "cc", "memory", "0"); - return size; + skey_regions_initialize(); + sacf_flag = enable_sacf_uaccess(); + asm_inline volatile( + "20: spka 0(%[key])\n" + " sacf 256\n" + "0: cdsg %[prev],%[new],%[address]\n" + "1: sacf 768\n" + " spka %[default_key]\n" + "21:\n" + EX_TABLE_UA_LOAD_REGPAIR(0b, 1b, %[rc], %[prev]) + EX_TABLE_UA_LOAD_REGPAIR(1b, 1b, %[rc], %[prev]) + SKEY_REGION(20b, 21b) + : [rc] "+&d" (rc), + [prev] "+&d" (prev), + [address] "+QS" (*(__int128_t *)address) + : [new] "d" (new), + [key] "a" (key << 4), + [default_key] "J" (PAGE_DEFAULT_KEY) + : "memory", "cc"); + disable_sacf_uaccess(sacf_flag); + *uval = prev; + return rc; } -EXPORT_SYMBOL(__clear_user); +EXPORT_SYMBOL(__cmpxchg_user_key16); diff --git a/arch/s390/lib/xor.c b/arch/s390/lib/xor.c index fb924a8041dc..ce7bcf7c0032 100644 --- a/arch/s390/lib/xor.c +++ b/arch/s390/lib/xor.c @@ -15,7 +15,6 @@ static void xor_xc_2(unsigned long bytes, unsigned long * __restrict p1, const unsigned long * __restrict p2) { asm volatile( - " larl 1,2f\n" " aghi %0,-1\n" " jm 3f\n" " srlg 0,%0,8\n" @@ -25,12 +24,12 @@ static void xor_xc_2(unsigned long bytes, unsigned long * __restrict p1, " la %1,256(%1)\n" " la %2,256(%2)\n" " brctg 0,0b\n" - "1: ex %0,0(1)\n" + "1: exrl %0,2f\n" " j 3f\n" "2: xc 0(1,%1),0(%2)\n" "3:\n" : : "d" (bytes), "a" (p1), "a" (p2) - : "0", "1", "cc", "memory"); + : "0", "cc", "memory"); } static void xor_xc_3(unsigned long bytes, unsigned long * __restrict p1, @@ -38,9 +37,8 @@ static void xor_xc_3(unsigned long bytes, unsigned long * __restrict p1, const unsigned long * __restrict p3) { asm volatile( - " larl 1,2f\n" " aghi %0,-1\n" - " jm 3f\n" + " jm 4f\n" " srlg 0,%0,8\n" " ltgr 0,0\n" " jz 1f\n" @@ -50,14 +48,14 @@ static void xor_xc_3(unsigned long bytes, unsigned long * __restrict p1, " la %2,256(%2)\n" " la %3,256(%3)\n" " brctg 0,0b\n" - "1: ex %0,0(1)\n" - " ex %0,6(1)\n" - " j 3f\n" + "1: exrl %0,2f\n" + " exrl %0,3f\n" + " j 4f\n" "2: xc 0(1,%1),0(%2)\n" - " xc 0(1,%1),0(%3)\n" - "3:\n" + "3: xc 0(1,%1),0(%3)\n" + "4:\n" : "+d" (bytes), "+a" (p1), "+a" (p2), "+a" (p3) - : : "0", "1", "cc", "memory"); + : : "0", "cc", "memory"); } static void xor_xc_4(unsigned long bytes, unsigned long * __restrict p1, @@ -66,9 +64,8 @@ static void xor_xc_4(unsigned long bytes, unsigned long * __restrict p1, const unsigned long * __restrict p4) { asm volatile( - " larl 1,2f\n" " aghi %0,-1\n" - " jm 3f\n" + " jm 5f\n" " srlg 0,%0,8\n" " ltgr 0,0\n" " jz 1f\n" @@ -80,16 +77,16 @@ static void xor_xc_4(unsigned long bytes, unsigned long * __restrict p1, " la %3,256(%3)\n" " la %4,256(%4)\n" " brctg 0,0b\n" - "1: ex %0,0(1)\n" - " ex %0,6(1)\n" - " ex %0,12(1)\n" - " j 3f\n" + "1: exrl %0,2f\n" + " exrl %0,3f\n" + " exrl %0,4f\n" + " j 5f\n" "2: xc 0(1,%1),0(%2)\n" - " xc 0(1,%1),0(%3)\n" - " xc 0(1,%1),0(%4)\n" - "3:\n" + "3: xc 0(1,%1),0(%3)\n" + "4: xc 0(1,%1),0(%4)\n" + "5:\n" : "+d" (bytes), "+a" (p1), "+a" (p2), "+a" (p3), "+a" (p4) - : : "0", "1", "cc", "memory"); + : : "0", "cc", "memory"); } static void xor_xc_5(unsigned long bytes, unsigned long * __restrict p1, @@ -101,7 +98,7 @@ static void xor_xc_5(unsigned long bytes, unsigned long * __restrict p1, asm volatile( " larl 1,2f\n" " aghi %0,-1\n" - " jm 3f\n" + " jm 6f\n" " srlg 0,%0,8\n" " ltgr 0,0\n" " jz 1f\n" @@ -115,19 +112,19 @@ static void xor_xc_5(unsigned long bytes, unsigned long * __restrict p1, " la %4,256(%4)\n" " la %5,256(%5)\n" " brctg 0,0b\n" - "1: ex %0,0(1)\n" - " ex %0,6(1)\n" - " ex %0,12(1)\n" - " ex %0,18(1)\n" - " j 3f\n" + "1: exrl %0,2f\n" + " exrl %0,3f\n" + " exrl %0,4f\n" + " exrl %0,5f\n" + " j 6f\n" "2: xc 0(1,%1),0(%2)\n" - " xc 0(1,%1),0(%3)\n" - " xc 0(1,%1),0(%4)\n" - " xc 0(1,%1),0(%5)\n" - "3:\n" + "3: xc 0(1,%1),0(%3)\n" + "4: xc 0(1,%1),0(%4)\n" + "5: xc 0(1,%1),0(%5)\n" + "6:\n" : "+d" (bytes), "+a" (p1), "+a" (p2), "+a" (p3), "+a" (p4), "+a" (p5) - : : "0", "1", "cc", "memory"); + : : "0", "cc", "memory"); } struct xor_block_template xor_block_xc = { diff --git a/arch/s390/mm/Makefile b/arch/s390/mm/Makefile index f6c2db7a8669..bd0401cc7ca5 100644 --- a/arch/s390/mm/Makefile +++ b/arch/s390/mm/Makefile @@ -9,6 +9,8 @@ obj-y += page-states.o pageattr.o pgtable.o pgalloc.o extable.o obj-$(CONFIG_CMM) += cmm.o obj-$(CONFIG_DEBUG_VIRTUAL) += physaddr.o obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o -obj-$(CONFIG_PTDUMP_CORE) += dump_pagetables.o +obj-$(CONFIG_PTDUMP) += dump_pagetables.o obj-$(CONFIG_PGSTE) += gmap.o obj-$(CONFIG_PFAULT) += pfault.o + +obj-$(subst m,y,$(CONFIG_KVM)) += gmap_helpers.o diff --git a/arch/s390/mm/cmm.c b/arch/s390/mm/cmm.c index d01724a715d0..e2a6eb92420f 100644 --- a/arch/s390/mm/cmm.c +++ b/arch/s390/mm/cmm.c @@ -201,10 +201,10 @@ static void cmm_set_timer(void) { if (cmm_timed_pages_target <= 0 || cmm_timeout_seconds <= 0) { if (timer_pending(&cmm_timer)) - del_timer(&cmm_timer); + timer_delete(&cmm_timer); return; } - mod_timer(&cmm_timer, jiffies + msecs_to_jiffies(cmm_timeout_seconds * MSEC_PER_SEC)); + mod_timer(&cmm_timer, jiffies + secs_to_jiffies(cmm_timeout_seconds)); } static void cmm_timer_fn(struct timer_list *unused) @@ -332,7 +332,7 @@ static int cmm_timeout_handler(const struct ctl_table *ctl, int write, return 0; } -static struct ctl_table cmm_table[] = { +static const struct ctl_table cmm_table[] = { { .procname = "cmm_pages", .mode = 0644, @@ -424,7 +424,7 @@ out_smsg: #endif unregister_sysctl_table(cmm_sysctl_header); out_sysctl: - del_timer_sync(&cmm_timer); + timer_delete_sync(&cmm_timer); return rc; } module_init(cmm_init); @@ -437,7 +437,7 @@ static void __exit cmm_exit(void) #endif unregister_oom_notifier(&cmm_oom_nb); kthread_stop(cmm_thread_ptr); - del_timer_sync(&cmm_timer); + timer_delete_sync(&cmm_timer); cmm_free_pages(cmm_pages, &cmm_pages, &cmm_page_list); cmm_free_pages(cmm_timed_pages, &cmm_timed_pages, &cmm_timed_page_list); } diff --git a/arch/s390/mm/dump_pagetables.c b/arch/s390/mm/dump_pagetables.c index fa54f3bc0c8d..9af2aae0a515 100644 --- a/arch/s390/mm/dump_pagetables.c +++ b/arch/s390/mm/dump_pagetables.c @@ -1,4 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 + +#include <linux/cpufeature.h> #include <linux/set_memory.h> #include <linux/ptdump.h> #include <linux/seq_file.h> @@ -82,7 +84,7 @@ static void note_prot_wx(struct pg_state *st, unsigned long addr) * in which case we have two lpswe instructions in lowcore that need * to be executable. */ - if (addr == PAGE_SIZE && (nospec_uses_trampoline() || !static_key_enabled(&cpu_has_bear))) + if (addr == PAGE_SIZE && (nospec_uses_trampoline() || !cpu_has_bear())) return; WARN_ONCE(IS_ENABLED(CONFIG_DEBUG_WX), "s390/mm: Found insecure W+X mapping at address %pS\n", @@ -145,11 +147,48 @@ static void note_page(struct ptdump_state *pt_st, unsigned long addr, int level, } } +static void note_page_pte(struct ptdump_state *pt_st, unsigned long addr, pte_t pte) +{ + note_page(pt_st, addr, 4, pte_val(pte)); +} + +static void note_page_pmd(struct ptdump_state *pt_st, unsigned long addr, pmd_t pmd) +{ + note_page(pt_st, addr, 3, pmd_val(pmd)); +} + +static void note_page_pud(struct ptdump_state *pt_st, unsigned long addr, pud_t pud) +{ + note_page(pt_st, addr, 2, pud_val(pud)); +} + +static void note_page_p4d(struct ptdump_state *pt_st, unsigned long addr, p4d_t p4d) +{ + note_page(pt_st, addr, 1, p4d_val(p4d)); +} + +static void note_page_pgd(struct ptdump_state *pt_st, unsigned long addr, pgd_t pgd) +{ + note_page(pt_st, addr, 0, pgd_val(pgd)); +} + +static void note_page_flush(struct ptdump_state *pt_st) +{ + pte_t pte_zero = {0}; + + note_page(pt_st, 0, -1, pte_val(pte_zero)); +} + bool ptdump_check_wx(void) { struct pg_state st = { .ptdump = { - .note_page = note_page, + .note_page_pte = note_page_pte, + .note_page_pmd = note_page_pmd, + .note_page_pud = note_page_pud, + .note_page_p4d = note_page_p4d, + .note_page_pgd = note_page_pgd, + .note_page_flush = note_page_flush, .range = (struct ptdump_range[]) { {.start = 0, .end = max_addr}, {.start = 0, .end = 0}, @@ -167,7 +206,7 @@ bool ptdump_check_wx(void) }, }; - if (!MACHINE_HAS_NX) + if (!cpu_has_nx()) return true; ptdump_walk_pgd(&st.ptdump, &init_mm, NULL); if (st.wx_pages) { @@ -176,7 +215,7 @@ bool ptdump_check_wx(void) return false; } else { pr_info("Checked W+X mappings: passed, no %sW+X pages found\n", - (nospec_uses_trampoline() || !static_key_enabled(&cpu_has_bear)) ? + (nospec_uses_trampoline() || !cpu_has_bear()) ? "unexpected " : ""); return true; @@ -188,7 +227,12 @@ static int ptdump_show(struct seq_file *m, void *v) { struct pg_state st = { .ptdump = { - .note_page = note_page, + .note_page_pte = note_page_pte, + .note_page_pmd = note_page_pmd, + .note_page_pud = note_page_pud, + .note_page_p4d = note_page_p4d, + .note_page_pgd = note_page_pgd, + .note_page_flush = note_page_flush, .range = (struct ptdump_range[]) { {.start = 0, .end = max_addr}, {.start = 0, .end = 0}, @@ -203,11 +247,9 @@ static int ptdump_show(struct seq_file *m, void *v) .marker = markers, }; - get_online_mems(); mutex_lock(&cpa_mutex); ptdump_walk_pgd(&st.ptdump, &init_mm, NULL); mutex_unlock(&cpa_mutex); - put_online_mems(); return 0; } DEFINE_SHOW_ATTRIBUTE(ptdump); diff --git a/arch/s390/mm/extable.c b/arch/s390/mm/extable.c index 0a0738a473af..7498e858c401 100644 --- a/arch/s390/mm/extable.c +++ b/arch/s390/mm/extable.c @@ -7,6 +7,7 @@ #include <linux/panic.h> #include <asm/asm-extable.h> #include <asm/extable.h> +#include <asm/fpu.h> const struct exception_table_entry *s390_search_extables(unsigned long addr) { @@ -26,7 +27,7 @@ static bool ex_handler_fixup(const struct exception_table_entry *ex, struct pt_r return true; } -static bool ex_handler_ua_store(const struct exception_table_entry *ex, struct pt_regs *regs) +static bool ex_handler_ua_fault(const struct exception_table_entry *ex, struct pt_regs *regs) { unsigned int reg_err = FIELD_GET(EX_DATA_REG_ERR, ex->data); @@ -35,18 +36,6 @@ static bool ex_handler_ua_store(const struct exception_table_entry *ex, struct p return true; } -static bool ex_handler_ua_load_mem(const struct exception_table_entry *ex, struct pt_regs *regs) -{ - unsigned int reg_addr = FIELD_GET(EX_DATA_REG_ADDR, ex->data); - unsigned int reg_err = FIELD_GET(EX_DATA_REG_ERR, ex->data); - size_t len = FIELD_GET(EX_DATA_LEN, ex->data); - - regs->gprs[reg_err] = -EFAULT; - memset((void *)regs->gprs[reg_addr], 0, len); - regs->psw.addr = extable_fixup(ex); - return true; -} - static bool ex_handler_ua_load_reg(const struct exception_table_entry *ex, bool pair, struct pt_regs *regs) { @@ -77,6 +66,56 @@ static bool ex_handler_zeropad(const struct exception_table_entry *ex, struct pt return true; } +static bool ex_handler_fpc(const struct exception_table_entry *ex, struct pt_regs *regs) +{ + fpu_sfpc(0); + regs->psw.addr = extable_fixup(ex); + return true; +} + +struct insn_ssf { + u64 opc1 : 8; + u64 r3 : 4; + u64 opc2 : 4; + u64 b1 : 4; + u64 d1 : 12; + u64 b2 : 4; + u64 d2 : 12; +} __packed; + +static bool ex_handler_ua_mvcos(const struct exception_table_entry *ex, + bool from, struct pt_regs *regs) +{ + unsigned long uaddr, remainder; + struct insn_ssf *insn; + + /* + * If the faulting user space access crossed a page boundary retry by + * limiting the access to the first page (adjust length accordingly). + * Then the mvcos instruction will either complete with condition code + * zero, or generate another fault where the user space access did not + * cross a page boundary. + * If the faulting user space access did not cross a page boundary set + * length to zero and retry. In this case no user space access will + * happen, and the mvcos instruction will complete with condition code + * zero. + * In both cases the instruction will complete with condition code + * zero (copying finished), and the register which contains the + * length, indicates the number of bytes copied. + */ + regs->psw.addr = extable_fixup(ex); + insn = (struct insn_ssf *)regs->psw.addr; + if (from) + uaddr = regs->gprs[insn->b2] + insn->d2; + else + uaddr = regs->gprs[insn->b1] + insn->d1; + remainder = PAGE_SIZE - (uaddr & (PAGE_SIZE - 1)); + if (regs->gprs[insn->r3] <= remainder) + remainder = 0; + regs->gprs[insn->r3] = remainder; + return true; +} + bool fixup_exception(struct pt_regs *regs) { const struct exception_table_entry *ex; @@ -89,16 +128,20 @@ bool fixup_exception(struct pt_regs *regs) return ex_handler_fixup(ex, regs); case EX_TYPE_BPF: return ex_handler_bpf(ex, regs); - case EX_TYPE_UA_STORE: - return ex_handler_ua_store(ex, regs); - case EX_TYPE_UA_LOAD_MEM: - return ex_handler_ua_load_mem(ex, regs); + case EX_TYPE_UA_FAULT: + return ex_handler_ua_fault(ex, regs); case EX_TYPE_UA_LOAD_REG: return ex_handler_ua_load_reg(ex, false, regs); case EX_TYPE_UA_LOAD_REGPAIR: return ex_handler_ua_load_reg(ex, true, regs); case EX_TYPE_ZEROPAD: return ex_handler_zeropad(ex, regs); + case EX_TYPE_FPC: + return ex_handler_fpc(ex, regs); + case EX_TYPE_UA_MVCOS_TO: + return ex_handler_ua_mvcos(ex, false, regs); + case EX_TYPE_UA_MVCOS_FROM: + return ex_handler_ua_mvcos(ex, true, regs); } panic("invalid exception table entry"); } diff --git a/arch/s390/mm/extmem.c b/arch/s390/mm/extmem.c index 4692136c0af1..f7da53e212f5 100644 --- a/arch/s390/mm/extmem.c +++ b/arch/s390/mm/extmem.c @@ -21,6 +21,7 @@ #include <linux/ioport.h> #include <linux/refcount.h> #include <linux/pgtable.h> +#include <asm/machine.h> #include <asm/diag.h> #include <asm/page.h> #include <asm/ebcdic.h> @@ -255,7 +256,7 @@ segment_type (char* name) int rc; struct dcss_segment seg; - if (!MACHINE_IS_VM) + if (!machine_is_vm()) return -ENOSYS; dcss_mkname(name, seg.dcss_name); @@ -418,7 +419,7 @@ segment_load (char *name, int do_nonshared, unsigned long *addr, struct dcss_segment *seg; int rc; - if (!MACHINE_IS_VM) + if (!machine_is_vm()) return -ENOSYS; mutex_lock(&dcss_lock); @@ -529,6 +530,14 @@ segment_modify_shared (char *name, int do_nonshared) return rc; } +static void __dcss_diag_purge_on_cpu_0(void *data) +{ + struct dcss_segment *seg = (struct dcss_segment *)data; + unsigned long dummy; + + dcss_diag(&purgeseg_scode, seg->dcss_name, &dummy, &dummy); +} + /* * Decrease the use count of a DCSS segment and remove * it from the address space if nobody is using it @@ -537,10 +546,9 @@ segment_modify_shared (char *name, int do_nonshared) void segment_unload(char *name) { - unsigned long dummy; struct dcss_segment *seg; - if (!MACHINE_IS_VM) + if (!machine_is_vm()) return; mutex_lock(&dcss_lock); @@ -555,7 +563,14 @@ segment_unload(char *name) kfree(seg->res); vmem_remove_mapping(seg->start_addr, seg->end - seg->start_addr + 1); list_del(&seg->list); - dcss_diag(&purgeseg_scode, seg->dcss_name, &dummy, &dummy); + /* + * Workaround for z/VM issue, where calling the DCSS unload diag on + * a non-IPL CPU would cause bogus sclp maximum memory detection on + * next IPL. + * IPL CPU 0 cannot be set offline, so the dcss_diag() call can + * directly be scheduled to that CPU. + */ + smp_call_function_single(0, __dcss_diag_purge_on_cpu_0, seg, 1); kfree(seg); out_unlock: mutex_unlock(&dcss_lock); @@ -572,7 +587,7 @@ segment_save(char *name) char cmd2[80]; int i, response; - if (!MACHINE_IS_VM) + if (!machine_is_vm()) return; mutex_lock(&dcss_lock); diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c index 9b681f74dccc..e1ad05bfd28a 100644 --- a/arch/s390/mm/fault.c +++ b/arch/s390/mm/fault.c @@ -11,11 +11,11 @@ #include <linux/kernel_stat.h> #include <linux/mmu_context.h> +#include <linux/cpufeature.h> #include <linux/perf_event.h> #include <linux/signal.h> #include <linux/sched.h> #include <linux/sched/debug.h> -#include <linux/jump_label.h> #include <linux/kernel.h> #include <linux/errno.h> #include <linux/string.h> @@ -40,22 +40,11 @@ #include <asm/ptrace.h> #include <asm/fault.h> #include <asm/diag.h> -#include <asm/gmap.h> #include <asm/irq.h> #include <asm/facility.h> #include <asm/uv.h> #include "../kernel/entry.h" -static DEFINE_STATIC_KEY_FALSE(have_store_indication); - -static int __init fault_init(void) -{ - if (test_facility(75)) - static_branch_enable(&have_store_indication); - return 0; -} -early_initcall(fault_init); - /* * Find out which address space caused the exception. */ @@ -81,7 +70,7 @@ static __always_inline bool fault_is_write(struct pt_regs *regs) { union teid teid = { .val = regs->int_parm_long }; - if (static_branch_likely(&have_store_indication)) + if (test_facility(75)) return teid.fsi == TEID_FSI_STORE; return false; } @@ -175,6 +164,23 @@ static void dump_fault_info(struct pt_regs *regs) int show_unhandled_signals = 1; +static const struct ctl_table s390_fault_sysctl_table[] = { + { + .procname = "userprocess_debug", + .data = &show_unhandled_signals, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, +}; + +static int __init init_s390_fault_sysctls(void) +{ + register_sysctl_init("kernel", s390_fault_sysctl_table); + return 0; +} +arch_initcall(init_s390_fault_sysctls); + void report_user_fault(struct pt_regs *regs, long signr, int is_mm_fault) { static DEFINE_RATELIMIT_STATE(rs, DEFAULT_RATELIMIT_INTERVAL, DEFAULT_RATELIMIT_BURST); @@ -369,6 +375,7 @@ void do_protection_exception(struct pt_regs *regs) if (unlikely(!teid.b61)) { if (user_mode(regs)) { /* Low-address protection in user mode: cannot happen */ + dump_fault_info(regs); die(regs, "Low-address protection"); } /* @@ -377,7 +384,7 @@ void do_protection_exception(struct pt_regs *regs) */ return handle_fault_error_nolock(regs, 0); } - if (unlikely(MACHINE_HAS_NX && teid.b56)) { + if (unlikely(cpu_has_nx() && teid.b56)) { regs->int_parm_long = (teid.addr * PAGE_SIZE) | (regs->psw.addr & PAGE_MASK); return handle_fault_error_nolock(regs, SEGV_ACCERR); } @@ -434,6 +441,8 @@ void do_secure_storage_access(struct pt_regs *regs) if (rc) BUG(); } else { + if (faulthandler_disabled()) + return handle_fault_error_nolock(regs, 0); mm = current->mm; mmap_read_lock(mm); vma = find_vma(mm, addr); diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c index 16b8a36c56de..c7defe4ed1f6 100644 --- a/arch/s390/mm/gmap.c +++ b/arch/s390/mm/gmap.c @@ -8,6 +8,8 @@ * Janosch Frank <frankja@linux.vnet.ibm.com> */ +#include <linux/cpufeature.h> +#include <linux/export.h> #include <linux/kernel.h> #include <linux/pagewalk.h> #include <linux/swap.h> @@ -20,9 +22,20 @@ #include <linux/pgtable.h> #include <asm/page-states.h> #include <asm/pgalloc.h> +#include <asm/machine.h> +#include <asm/gmap_helpers.h> #include <asm/gmap.h> #include <asm/page.h> -#include <asm/tlb.h> + +/* + * The address is saved in a radix tree directly; NULL would be ambiguous, + * since 0 is a valid address, and NULL is returned when nothing was found. + * The lower bits are ignored by all users of the macro, so it can be used + * to distinguish a valid address 0 from a NULL. + */ +#define VALID_GADDR_FLAG 1 +#define IS_GADDR_VALID(gaddr) ((gaddr) & VALID_GADDR_FLAG) +#define MAKE_VALID_GADDR(gaddr) (((gaddr) & HPAGE_MASK) | VALID_GADDR_FLAG) #define GMAP_SHADOW_FAKE_TABLE 1ULL @@ -43,7 +56,7 @@ static struct page *gmap_alloc_crst(void) * * Returns a guest address space structure. */ -static struct gmap *gmap_alloc(unsigned long limit) +struct gmap *gmap_alloc(unsigned long limit) { struct gmap *gmap; struct page *page; @@ -70,9 +83,7 @@ static struct gmap *gmap_alloc(unsigned long limit) gmap = kzalloc(sizeof(struct gmap), GFP_KERNEL_ACCOUNT); if (!gmap) goto out; - INIT_LIST_HEAD(&gmap->crst_list); INIT_LIST_HEAD(&gmap->children); - INIT_LIST_HEAD(&gmap->pt_list); INIT_RADIX_TREE(&gmap->guest_to_host, GFP_KERNEL_ACCOUNT); INIT_RADIX_TREE(&gmap->host_to_guest, GFP_ATOMIC | __GFP_ACCOUNT); INIT_RADIX_TREE(&gmap->host_to_rmap, GFP_ATOMIC | __GFP_ACCOUNT); @@ -82,8 +93,6 @@ static struct gmap *gmap_alloc(unsigned long limit) page = gmap_alloc_crst(); if (!page) goto out_free; - page->index = 0; - list_add(&page->lru, &gmap->crst_list); table = page_to_virt(page); crst_table_init(table, etype); gmap->table = table; @@ -97,6 +106,7 @@ out_free: out: return NULL; } +EXPORT_SYMBOL_GPL(gmap_alloc); /** * gmap_create - create a guest address space @@ -128,7 +138,7 @@ EXPORT_SYMBOL_GPL(gmap_create); static void gmap_flush_tlb(struct gmap *gmap) { - if (MACHINE_HAS_IDTE) + if (cpu_has_idte()) __tlb_flush_idte(gmap->asce); else __tlb_flush_global(); @@ -185,32 +195,46 @@ static void gmap_rmap_radix_tree_free(struct radix_tree_root *root) } while (nr > 0); } +static void gmap_free_crst(unsigned long *table, bool free_ptes) +{ + bool is_segment = (table[0] & _SEGMENT_ENTRY_TYPE_MASK) == 0; + int i; + + if (is_segment) { + if (!free_ptes) + goto out; + for (i = 0; i < _CRST_ENTRIES; i++) + if (!(table[i] & _SEGMENT_ENTRY_INVALID)) + page_table_free_pgste(page_ptdesc(phys_to_page(table[i]))); + } else { + for (i = 0; i < _CRST_ENTRIES; i++) + if (!(table[i] & _REGION_ENTRY_INVALID)) + gmap_free_crst(__va(table[i] & PAGE_MASK), free_ptes); + } + +out: + free_pages((unsigned long)table, CRST_ALLOC_ORDER); +} + /** * gmap_free - free a guest address space * @gmap: pointer to the guest address space structure * * No locks required. There are no references to this gmap anymore. */ -static void gmap_free(struct gmap *gmap) +void gmap_free(struct gmap *gmap) { - struct page *page, *next; - /* Flush tlb of all gmaps (if not already done for shadows) */ if (!(gmap_is_shadow(gmap) && gmap->removed)) gmap_flush_tlb(gmap); /* Free all segment & region tables. */ - list_for_each_entry_safe(page, next, &gmap->crst_list, lru) - __free_pages(page, CRST_ALLOC_ORDER); + gmap_free_crst(gmap->table, gmap_is_shadow(gmap)); + gmap_radix_tree_free(&gmap->guest_to_host); gmap_radix_tree_free(&gmap->host_to_guest); /* Free additional data for a shadow gmap */ if (gmap_is_shadow(gmap)) { - struct ptdesc *ptdesc, *n; - - /* Free all page tables. */ - list_for_each_entry_safe(ptdesc, n, &gmap->pt_list, pt_list) - page_table_free_pgste(ptdesc); gmap_rmap_radix_tree_free(&gmap->host_to_rmap); /* Release reference to the parent */ gmap_put(gmap->parent); @@ -218,6 +242,7 @@ static void gmap_free(struct gmap *gmap) kfree(gmap); } +EXPORT_SYMBOL_GPL(gmap_free); /** * gmap_get - increase reference counter for guest address space @@ -298,10 +323,8 @@ static int gmap_alloc_table(struct gmap *gmap, unsigned long *table, crst_table_init(new, init); spin_lock(&gmap->guest_table_lock); if (*table & _REGION_ENTRY_INVALID) { - list_add(&page->lru, &gmap->crst_list); *table = __pa(new) | _REGION_ENTRY_LENGTH | (*table & _REGION_ENTRY_TYPE_MASK); - page->index = gaddr; page = NULL; } spin_unlock(&gmap->guest_table_lock); @@ -310,21 +333,23 @@ static int gmap_alloc_table(struct gmap *gmap, unsigned long *table, return 0; } -/** - * __gmap_segment_gaddr - find virtual address from segment pointer - * @entry: pointer to a segment table entry in the guest address space - * - * Returns the virtual address in the guest address space for the segment - */ -static unsigned long __gmap_segment_gaddr(unsigned long *entry) +static unsigned long host_to_guest_lookup(struct gmap *gmap, unsigned long vmaddr) { - struct page *page; - unsigned long offset; + return (unsigned long)radix_tree_lookup(&gmap->host_to_guest, vmaddr >> PMD_SHIFT); +} + +static unsigned long host_to_guest_delete(struct gmap *gmap, unsigned long vmaddr) +{ + return (unsigned long)radix_tree_delete(&gmap->host_to_guest, vmaddr >> PMD_SHIFT); +} - offset = (unsigned long) entry / sizeof(unsigned long); - offset = (offset & (PTRS_PER_PMD - 1)) * PMD_SIZE; - page = pmd_pgtable_page((pmd_t *) entry); - return page->index + offset; +static pmd_t *host_to_guest_pmd_delete(struct gmap *gmap, unsigned long vmaddr, + unsigned long *gaddr) +{ + *gaddr = host_to_guest_delete(gmap, vmaddr); + if (IS_GADDR_VALID(*gaddr)) + return (pmd_t *)gmap_table_walk(gmap, *gaddr, 1); + return NULL; } /** @@ -336,16 +361,19 @@ static unsigned long __gmap_segment_gaddr(unsigned long *entry) */ static int __gmap_unlink_by_vmaddr(struct gmap *gmap, unsigned long vmaddr) { - unsigned long *entry; + unsigned long gaddr; int flush = 0; + pmd_t *pmdp; BUG_ON(gmap_is_shadow(gmap)); spin_lock(&gmap->guest_table_lock); - entry = radix_tree_delete(&gmap->host_to_guest, vmaddr >> PMD_SHIFT); - if (entry) { - flush = (*entry != _SEGMENT_ENTRY_EMPTY); - *entry = _SEGMENT_ENTRY_EMPTY; + + pmdp = host_to_guest_pmd_delete(gmap, vmaddr, &gaddr); + if (pmdp) { + flush = (pmd_val(*pmdp) != _SEGMENT_ENTRY_EMPTY); + *pmdp = __pmd(_SEGMENT_ENTRY_EMPTY); } + spin_unlock(&gmap->guest_table_lock); return flush; } @@ -464,26 +492,6 @@ unsigned long __gmap_translate(struct gmap *gmap, unsigned long gaddr) EXPORT_SYMBOL_GPL(__gmap_translate); /** - * gmap_translate - translate a guest address to a user space address - * @gmap: pointer to guest mapping meta data structure - * @gaddr: guest address - * - * Returns user space address which corresponds to the guest address or - * -EFAULT if no such mapping exists. - * This function does not establish potentially missing page table entries. - */ -unsigned long gmap_translate(struct gmap *gmap, unsigned long gaddr) -{ - unsigned long rc; - - mmap_read_lock(gmap->mm); - rc = __gmap_translate(gmap, gaddr); - mmap_read_unlock(gmap->mm); - return rc; -} -EXPORT_SYMBOL_GPL(gmap_translate); - -/** * gmap_unlink - disconnect a page table from the gmap shadow tables * @mm: pointer to the parent mm_struct * @table: pointer to the host page table @@ -582,7 +590,8 @@ int __gmap_link(struct gmap *gmap, unsigned long gaddr, unsigned long vmaddr) spin_lock(&gmap->guest_table_lock); if (*table == _SEGMENT_ENTRY_EMPTY) { rc = radix_tree_insert(&gmap->host_to_guest, - vmaddr >> PMD_SHIFT, table); + vmaddr >> PMD_SHIFT, + (void *)MAKE_VALID_GADDR(gaddr)); if (!rc) { if (pmd_leaf(*pmd)) { *table = (pmd_val(*pmd) & @@ -605,193 +614,27 @@ int __gmap_link(struct gmap *gmap, unsigned long gaddr, unsigned long vmaddr) radix_tree_preload_end(); return rc; } - -/** - * fixup_user_fault_nowait - manually resolve a user page fault without waiting - * @mm: mm_struct of target mm - * @address: user address - * @fault_flags:flags to pass down to handle_mm_fault() - * @unlocked: did we unlock the mmap_lock while retrying - * - * This function behaves similarly to fixup_user_fault(), but it guarantees - * that the fault will be resolved without waiting. The function might drop - * and re-acquire the mm lock, in which case @unlocked will be set to true. - * - * The guarantee is that the fault is handled without waiting, but the - * function itself might sleep, due to the lock. - * - * Context: Needs to be called with mm->mmap_lock held in read mode, and will - * return with the lock held in read mode; @unlocked will indicate whether - * the lock has been dropped and re-acquired. This is the same behaviour as - * fixup_user_fault(). - * - * Return: 0 on success, -EAGAIN if the fault cannot be resolved without - * waiting, -EFAULT if the fault cannot be resolved, -ENOMEM if out of - * memory. - */ -static int fixup_user_fault_nowait(struct mm_struct *mm, unsigned long address, - unsigned int fault_flags, bool *unlocked) -{ - struct vm_area_struct *vma; - unsigned int test_flags; - vm_fault_t fault; - int rc; - - fault_flags |= FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_RETRY_NOWAIT; - test_flags = fault_flags & FAULT_FLAG_WRITE ? VM_WRITE : VM_READ; - - vma = find_vma(mm, address); - if (unlikely(!vma || address < vma->vm_start)) - return -EFAULT; - if (unlikely(!(vma->vm_flags & test_flags))) - return -EFAULT; - - fault = handle_mm_fault(vma, address, fault_flags, NULL); - /* the mm lock has been dropped, take it again */ - if (fault & VM_FAULT_COMPLETED) { - *unlocked = true; - mmap_read_lock(mm); - return 0; - } - /* the mm lock has not been dropped */ - if (fault & VM_FAULT_ERROR) { - rc = vm_fault_to_errno(fault, 0); - BUG_ON(!rc); - return rc; - } - /* the mm lock has not been dropped because of FAULT_FLAG_RETRY_NOWAIT */ - if (fault & VM_FAULT_RETRY) - return -EAGAIN; - /* nothing needed to be done and the mm lock has not been dropped */ - return 0; -} - -/** - * __gmap_fault - resolve a fault on a guest address - * @gmap: pointer to guest mapping meta data structure - * @gaddr: guest address - * @fault_flags: flags to pass down to handle_mm_fault() - * - * Context: Needs to be called with mm->mmap_lock held in read mode. Might - * drop and re-acquire the lock. Will always return with the lock held. - */ -static int __gmap_fault(struct gmap *gmap, unsigned long gaddr, unsigned int fault_flags) -{ - unsigned long vmaddr; - bool unlocked; - int rc = 0; - -retry: - unlocked = false; - - vmaddr = __gmap_translate(gmap, gaddr); - if (IS_ERR_VALUE(vmaddr)) - return vmaddr; - - if (fault_flags & FAULT_FLAG_RETRY_NOWAIT) - rc = fixup_user_fault_nowait(gmap->mm, vmaddr, fault_flags, &unlocked); - else - rc = fixup_user_fault(gmap->mm, vmaddr, fault_flags, &unlocked); - if (rc) - return rc; - /* - * In the case that fixup_user_fault unlocked the mmap_lock during - * fault-in, redo __gmap_translate() to avoid racing with a - * map/unmap_segment. - * In particular, __gmap_translate(), fixup_user_fault{,_nowait}(), - * and __gmap_link() must all be called atomically in one go; if the - * lock had been dropped in between, a retry is needed. - */ - if (unlocked) - goto retry; - - return __gmap_link(gmap, gaddr, vmaddr); -} - -/** - * gmap_fault - resolve a fault on a guest address - * @gmap: pointer to guest mapping meta data structure - * @gaddr: guest address - * @fault_flags: flags to pass down to handle_mm_fault() - * - * Returns 0 on success, -ENOMEM for out of memory conditions, -EFAULT if the - * vm address is already mapped to a different guest segment, and -EAGAIN if - * FAULT_FLAG_RETRY_NOWAIT was specified and the fault could not be processed - * immediately. - */ -int gmap_fault(struct gmap *gmap, unsigned long gaddr, unsigned int fault_flags) -{ - int rc; - - mmap_read_lock(gmap->mm); - rc = __gmap_fault(gmap, gaddr, fault_flags); - mmap_read_unlock(gmap->mm); - return rc; -} -EXPORT_SYMBOL_GPL(gmap_fault); +EXPORT_SYMBOL(__gmap_link); /* * this function is assumed to be called with mmap_lock held */ void __gmap_zap(struct gmap *gmap, unsigned long gaddr) { - struct vm_area_struct *vma; unsigned long vmaddr; - spinlock_t *ptl; - pte_t *ptep; + + mmap_assert_locked(gmap->mm); /* Find the vm address for the guest address */ vmaddr = (unsigned long) radix_tree_lookup(&gmap->guest_to_host, gaddr >> PMD_SHIFT); if (vmaddr) { vmaddr |= gaddr & ~PMD_MASK; - - vma = vma_lookup(gmap->mm, vmaddr); - if (!vma || is_vm_hugetlb_page(vma)) - return; - - /* Get pointer to the page table entry */ - ptep = get_locked_pte(gmap->mm, vmaddr, &ptl); - if (likely(ptep)) { - ptep_zap_unused(gmap->mm, vmaddr, ptep, 0); - pte_unmap_unlock(ptep, ptl); - } + gmap_helper_zap_one_page(gmap->mm, vmaddr); } } EXPORT_SYMBOL_GPL(__gmap_zap); -void gmap_discard(struct gmap *gmap, unsigned long from, unsigned long to) -{ - unsigned long gaddr, vmaddr, size; - struct vm_area_struct *vma; - - mmap_read_lock(gmap->mm); - for (gaddr = from; gaddr < to; - gaddr = (gaddr + PMD_SIZE) & PMD_MASK) { - /* Find the vm address for the guest address */ - vmaddr = (unsigned long) - radix_tree_lookup(&gmap->guest_to_host, - gaddr >> PMD_SHIFT); - if (!vmaddr) - continue; - vmaddr |= gaddr & ~PMD_MASK; - /* Find vma in the parent mm */ - vma = find_vma(gmap->mm, vmaddr); - if (!vma) - continue; - /* - * We do not discard pages that are backed by - * hugetlbfs, so we don't have to refault them. - */ - if (is_vm_hugetlb_page(vma)) - continue; - size = min(to - gaddr, PMD_SIZE - (gaddr & ~PMD_MASK)); - zap_page_range_single(vma, vmaddr, size, NULL); - } - mmap_read_unlock(gmap->mm); -} -EXPORT_SYMBOL_GPL(gmap_discard); - static LIST_HEAD(gmap_notifier_list); static DEFINE_SPINLOCK(gmap_notifier_lock); @@ -853,8 +696,7 @@ static void gmap_call_notifier(struct gmap *gmap, unsigned long start, * * Note: Can also be called for shadow gmaps. */ -static inline unsigned long *gmap_table_walk(struct gmap *gmap, - unsigned long gaddr, int level) +unsigned long *gmap_table_walk(struct gmap *gmap, unsigned long gaddr, int level) { const int asce_type = gmap->asce & _ASCE_TYPE_MASK; unsigned long *table = gmap->table; @@ -905,6 +747,7 @@ static inline unsigned long *gmap_table_walk(struct gmap *gmap, } return table; } +EXPORT_SYMBOL(gmap_table_walk); /** * gmap_pte_op_walk - walk the gmap page table, get the page table lock @@ -1101,86 +944,40 @@ static int gmap_protect_pte(struct gmap *gmap, unsigned long gaddr, * @prot: indicates access rights: PROT_NONE, PROT_READ or PROT_WRITE * @bits: pgste notification bits to set * - * Returns 0 if successfully protected, -ENOMEM if out of memory and - * -EFAULT if gaddr is invalid (or mapping for shadows is missing). + * Returns: + * PAGE_SIZE if a small page was successfully protected; + * HPAGE_SIZE if a large page was successfully protected; + * -ENOMEM if out of memory; + * -EFAULT if gaddr is invalid (or mapping for shadows is missing); + * -EAGAIN if the guest mapping is missing and should be fixed by the caller. * - * Called with sg->mm->mmap_lock in read. + * Context: Called with sg->mm->mmap_lock in read. */ -static int gmap_protect_range(struct gmap *gmap, unsigned long gaddr, - unsigned long len, int prot, unsigned long bits) +int gmap_protect_one(struct gmap *gmap, unsigned long gaddr, int prot, unsigned long bits) { - unsigned long vmaddr, dist; pmd_t *pmdp; - int rc; + int rc = 0; BUG_ON(gmap_is_shadow(gmap)); - while (len) { - rc = -EAGAIN; - pmdp = gmap_pmd_op_walk(gmap, gaddr); - if (pmdp) { - if (!pmd_leaf(*pmdp)) { - rc = gmap_protect_pte(gmap, gaddr, pmdp, prot, - bits); - if (!rc) { - len -= PAGE_SIZE; - gaddr += PAGE_SIZE; - } - } else { - rc = gmap_protect_pmd(gmap, gaddr, pmdp, prot, - bits); - if (!rc) { - dist = HPAGE_SIZE - (gaddr & ~HPAGE_MASK); - len = len < dist ? 0 : len - dist; - gaddr = (gaddr & HPAGE_MASK) + HPAGE_SIZE; - } - } - gmap_pmd_op_end(gmap, pmdp); - } - if (rc) { - if (rc == -EINVAL) - return rc; - /* -EAGAIN, fixup of userspace mm and gmap */ - vmaddr = __gmap_translate(gmap, gaddr); - if (IS_ERR_VALUE(vmaddr)) - return vmaddr; - rc = gmap_pte_op_fixup(gmap, gaddr, vmaddr, prot); - if (rc) - return rc; - } - } - return 0; -} + pmdp = gmap_pmd_op_walk(gmap, gaddr); + if (!pmdp) + return -EAGAIN; -/** - * gmap_mprotect_notify - change access rights for a range of ptes and - * call the notifier if any pte changes again - * @gmap: pointer to guest mapping meta data structure - * @gaddr: virtual address in the guest address space - * @len: size of area - * @prot: indicates access rights: PROT_NONE, PROT_READ or PROT_WRITE - * - * Returns 0 if for each page in the given range a gmap mapping exists, - * the new access rights could be set and the notifier could be armed. - * If the gmap mapping is missing for one or more pages -EFAULT is - * returned. If no memory could be allocated -ENOMEM is returned. - * This function establishes missing page table entries. - */ -int gmap_mprotect_notify(struct gmap *gmap, unsigned long gaddr, - unsigned long len, int prot) -{ - int rc; + if (!pmd_leaf(*pmdp)) { + rc = gmap_protect_pte(gmap, gaddr, pmdp, prot, bits); + if (!rc) + rc = PAGE_SIZE; + } else { + rc = gmap_protect_pmd(gmap, gaddr, pmdp, prot, bits); + if (!rc) + rc = HPAGE_SIZE; + } + gmap_pmd_op_end(gmap, pmdp); - if ((gaddr & ~PAGE_MASK) || (len & ~PAGE_MASK) || gmap_is_shadow(gmap)) - return -EINVAL; - if (!MACHINE_HAS_ESOP && prot == PROT_READ) - return -EINVAL; - mmap_read_lock(gmap->mm); - rc = gmap_protect_range(gmap, gaddr, len, prot, GMAP_NOTIFY_MPROT); - mmap_read_unlock(gmap->mm); return rc; } -EXPORT_SYMBOL_GPL(gmap_mprotect_notify); +EXPORT_SYMBOL_GPL(gmap_protect_one); /** * gmap_read_table - get an unsigned long value from a guest page table using @@ -1414,7 +1211,6 @@ static void gmap_unshadow_pgt(struct gmap *sg, unsigned long raddr) __gmap_unshadow_pgt(sg, raddr, __va(pgt)); /* Free page table */ ptdesc = page_ptdesc(phys_to_page(pgt)); - list_del(&ptdesc->pt_list); page_table_free_pgste(ptdesc); } @@ -1442,7 +1238,6 @@ static void __gmap_unshadow_sgt(struct gmap *sg, unsigned long raddr, __gmap_unshadow_pgt(sg, raddr, __va(pgt)); /* Free page table */ ptdesc = page_ptdesc(phys_to_page(pgt)); - list_del(&ptdesc->pt_list); page_table_free_pgste(ptdesc); } } @@ -1472,7 +1267,6 @@ static void gmap_unshadow_sgt(struct gmap *sg, unsigned long raddr) __gmap_unshadow_sgt(sg, raddr, __va(sgt)); /* Free segment table */ page = phys_to_page(sgt); - list_del(&page->lru); __free_pages(page, CRST_ALLOC_ORDER); } @@ -1500,7 +1294,6 @@ static void __gmap_unshadow_r3t(struct gmap *sg, unsigned long raddr, __gmap_unshadow_sgt(sg, raddr, __va(sgt)); /* Free segment table */ page = phys_to_page(sgt); - list_del(&page->lru); __free_pages(page, CRST_ALLOC_ORDER); } } @@ -1530,7 +1323,6 @@ static void gmap_unshadow_r3t(struct gmap *sg, unsigned long raddr) __gmap_unshadow_r3t(sg, raddr, __va(r3t)); /* Free region 3 table */ page = phys_to_page(r3t); - list_del(&page->lru); __free_pages(page, CRST_ALLOC_ORDER); } @@ -1558,7 +1350,6 @@ static void __gmap_unshadow_r2t(struct gmap *sg, unsigned long raddr, __gmap_unshadow_r3t(sg, raddr, __va(r3t)); /* Free region 3 table */ page = phys_to_page(r3t); - list_del(&page->lru); __free_pages(page, CRST_ALLOC_ORDER); } } @@ -1588,7 +1379,6 @@ static void gmap_unshadow_r2t(struct gmap *sg, unsigned long raddr) __gmap_unshadow_r2t(sg, raddr, __va(r2t)); /* Free region 2 table */ page = phys_to_page(r2t); - list_del(&page->lru); __free_pages(page, CRST_ALLOC_ORDER); } @@ -1620,7 +1410,6 @@ static void __gmap_unshadow_r1t(struct gmap *sg, unsigned long raddr, r1t[i] = _REGION1_ENTRY_EMPTY; /* Free region 2 table */ page = phys_to_page(r2t); - list_del(&page->lru); __free_pages(page, CRST_ALLOC_ORDER); } } @@ -1631,7 +1420,7 @@ static void __gmap_unshadow_r1t(struct gmap *sg, unsigned long raddr, * * Called with sg->guest_table_lock */ -static void gmap_unshadow(struct gmap *sg) +void gmap_unshadow(struct gmap *sg) { unsigned long *table; @@ -1657,143 +1446,7 @@ static void gmap_unshadow(struct gmap *sg) break; } } - -/** - * gmap_find_shadow - find a specific asce in the list of shadow tables - * @parent: pointer to the parent gmap - * @asce: ASCE for which the shadow table is created - * @edat_level: edat level to be used for the shadow translation - * - * Returns the pointer to a gmap if a shadow table with the given asce is - * already available, ERR_PTR(-EAGAIN) if another one is just being created, - * otherwise NULL - */ -static struct gmap *gmap_find_shadow(struct gmap *parent, unsigned long asce, - int edat_level) -{ - struct gmap *sg; - - list_for_each_entry(sg, &parent->children, list) { - if (sg->orig_asce != asce || sg->edat_level != edat_level || - sg->removed) - continue; - if (!sg->initialized) - return ERR_PTR(-EAGAIN); - refcount_inc(&sg->ref_count); - return sg; - } - return NULL; -} - -/** - * gmap_shadow_valid - check if a shadow guest address space matches the - * given properties and is still valid - * @sg: pointer to the shadow guest address space structure - * @asce: ASCE for which the shadow table is requested - * @edat_level: edat level to be used for the shadow translation - * - * Returns 1 if the gmap shadow is still valid and matches the given - * properties, the caller can continue using it. Returns 0 otherwise, the - * caller has to request a new shadow gmap in this case. - * - */ -int gmap_shadow_valid(struct gmap *sg, unsigned long asce, int edat_level) -{ - if (sg->removed) - return 0; - return sg->orig_asce == asce && sg->edat_level == edat_level; -} -EXPORT_SYMBOL_GPL(gmap_shadow_valid); - -/** - * gmap_shadow - create/find a shadow guest address space - * @parent: pointer to the parent gmap - * @asce: ASCE for which the shadow table is created - * @edat_level: edat level to be used for the shadow translation - * - * The pages of the top level page table referred by the asce parameter - * will be set to read-only and marked in the PGSTEs of the kvm process. - * The shadow table will be removed automatically on any change to the - * PTE mapping for the source table. - * - * Returns a guest address space structure, ERR_PTR(-ENOMEM) if out of memory, - * ERR_PTR(-EAGAIN) if the caller has to retry and ERR_PTR(-EFAULT) if the - * parent gmap table could not be protected. - */ -struct gmap *gmap_shadow(struct gmap *parent, unsigned long asce, - int edat_level) -{ - struct gmap *sg, *new; - unsigned long limit; - int rc; - - BUG_ON(parent->mm->context.allow_gmap_hpage_1m); - BUG_ON(gmap_is_shadow(parent)); - spin_lock(&parent->shadow_lock); - sg = gmap_find_shadow(parent, asce, edat_level); - spin_unlock(&parent->shadow_lock); - if (sg) - return sg; - /* Create a new shadow gmap */ - limit = -1UL >> (33 - (((asce & _ASCE_TYPE_MASK) >> 2) * 11)); - if (asce & _ASCE_REAL_SPACE) - limit = -1UL; - new = gmap_alloc(limit); - if (!new) - return ERR_PTR(-ENOMEM); - new->mm = parent->mm; - new->parent = gmap_get(parent); - new->private = parent->private; - new->orig_asce = asce; - new->edat_level = edat_level; - new->initialized = false; - spin_lock(&parent->shadow_lock); - /* Recheck if another CPU created the same shadow */ - sg = gmap_find_shadow(parent, asce, edat_level); - if (sg) { - spin_unlock(&parent->shadow_lock); - gmap_free(new); - return sg; - } - if (asce & _ASCE_REAL_SPACE) { - /* only allow one real-space gmap shadow */ - list_for_each_entry(sg, &parent->children, list) { - if (sg->orig_asce & _ASCE_REAL_SPACE) { - spin_lock(&sg->guest_table_lock); - gmap_unshadow(sg); - spin_unlock(&sg->guest_table_lock); - list_del(&sg->list); - gmap_put(sg); - break; - } - } - } - refcount_set(&new->ref_count, 2); - list_add(&new->list, &parent->children); - if (asce & _ASCE_REAL_SPACE) { - /* nothing to protect, return right away */ - new->initialized = true; - spin_unlock(&parent->shadow_lock); - return new; - } - spin_unlock(&parent->shadow_lock); - /* protect after insertion, so it will get properly invalidated */ - mmap_read_lock(parent->mm); - rc = gmap_protect_range(parent, asce & _ASCE_ORIGIN, - ((asce & _ASCE_TABLE_LENGTH) + 1) * PAGE_SIZE, - PROT_READ, GMAP_NOTIFY_SHADOW); - mmap_read_unlock(parent->mm); - spin_lock(&parent->shadow_lock); - new->initialized = true; - if (rc) { - list_del(&new->list); - gmap_free(new); - new = ERR_PTR(rc); - } - spin_unlock(&parent->shadow_lock); - return new; -} -EXPORT_SYMBOL_GPL(gmap_shadow); +EXPORT_SYMBOL(gmap_unshadow); /** * gmap_shadow_r2t - create an empty shadow region 2 table @@ -1827,9 +1480,6 @@ int gmap_shadow_r2t(struct gmap *sg, unsigned long saddr, unsigned long r2t, page = gmap_alloc_crst(); if (!page) return -ENOMEM; - page->index = r2t & _REGION_ENTRY_ORIGIN; - if (fake) - page->index |= GMAP_SHADOW_FAKE_TABLE; s_r2t = page_to_phys(page); /* Install shadow region second table */ spin_lock(&sg->guest_table_lock); @@ -1851,7 +1501,6 @@ int gmap_shadow_r2t(struct gmap *sg, unsigned long saddr, unsigned long r2t, _REGION_ENTRY_TYPE_R1 | _REGION_ENTRY_INVALID; if (sg->edat_level >= 1) *table |= (r2t & _REGION_ENTRY_PROTECT); - list_add(&page->lru, &sg->crst_list); if (fake) { /* nothing to protect for fake tables */ *table &= ~_REGION_ENTRY_INVALID; @@ -1911,9 +1560,6 @@ int gmap_shadow_r3t(struct gmap *sg, unsigned long saddr, unsigned long r3t, page = gmap_alloc_crst(); if (!page) return -ENOMEM; - page->index = r3t & _REGION_ENTRY_ORIGIN; - if (fake) - page->index |= GMAP_SHADOW_FAKE_TABLE; s_r3t = page_to_phys(page); /* Install shadow region second table */ spin_lock(&sg->guest_table_lock); @@ -1935,7 +1581,6 @@ int gmap_shadow_r3t(struct gmap *sg, unsigned long saddr, unsigned long r3t, _REGION_ENTRY_TYPE_R2 | _REGION_ENTRY_INVALID; if (sg->edat_level >= 1) *table |= (r3t & _REGION_ENTRY_PROTECT); - list_add(&page->lru, &sg->crst_list); if (fake) { /* nothing to protect for fake tables */ *table &= ~_REGION_ENTRY_INVALID; @@ -1995,9 +1640,6 @@ int gmap_shadow_sgt(struct gmap *sg, unsigned long saddr, unsigned long sgt, page = gmap_alloc_crst(); if (!page) return -ENOMEM; - page->index = sgt & _REGION_ENTRY_ORIGIN; - if (fake) - page->index |= GMAP_SHADOW_FAKE_TABLE; s_sgt = page_to_phys(page); /* Install shadow region second table */ spin_lock(&sg->guest_table_lock); @@ -2019,7 +1661,6 @@ int gmap_shadow_sgt(struct gmap *sg, unsigned long saddr, unsigned long sgt, _REGION_ENTRY_TYPE_R3 | _REGION_ENTRY_INVALID; if (sg->edat_level >= 1) *table |= sgt & _REGION_ENTRY_PROTECT; - list_add(&page->lru, &sg->crst_list); if (fake) { /* nothing to protect for fake tables */ *table &= ~_REGION_ENTRY_INVALID; @@ -2052,45 +1693,22 @@ out_free: } EXPORT_SYMBOL_GPL(gmap_shadow_sgt); -/** - * gmap_shadow_pgt_lookup - find a shadow page table - * @sg: pointer to the shadow guest address space structure - * @saddr: the address in the shadow aguest address space - * @pgt: parent gmap address of the page table to get shadowed - * @dat_protection: if the pgtable is marked as protected by dat - * @fake: pgt references contiguous guest memory block, not a pgtable - * - * Returns 0 if the shadow page table was found and -EAGAIN if the page - * table was not found. - * - * Called with sg->mm->mmap_lock in read. - */ -int gmap_shadow_pgt_lookup(struct gmap *sg, unsigned long saddr, - unsigned long *pgt, int *dat_protection, - int *fake) +static void gmap_pgste_set_pgt_addr(struct ptdesc *ptdesc, unsigned long pgt_addr) { - unsigned long *table; - struct page *page; - int rc; + unsigned long *pgstes = page_to_virt(ptdesc_page(ptdesc)); - BUG_ON(!gmap_is_shadow(sg)); - spin_lock(&sg->guest_table_lock); - table = gmap_table_walk(sg, saddr, 1); /* get segment pointer */ - if (table && !(*table & _SEGMENT_ENTRY_INVALID)) { - /* Shadow page tables are full pages (pte+pgste) */ - page = pfn_to_page(*table >> PAGE_SHIFT); - *pgt = page->index & ~GMAP_SHADOW_FAKE_TABLE; - *dat_protection = !!(*table & _SEGMENT_ENTRY_PROTECT); - *fake = !!(page->index & GMAP_SHADOW_FAKE_TABLE); - rc = 0; - } else { - rc = -EAGAIN; - } - spin_unlock(&sg->guest_table_lock); - return rc; + pgstes += _PAGE_ENTRIES; + + pgstes[0] &= ~PGSTE_ST2_MASK; + pgstes[1] &= ~PGSTE_ST2_MASK; + pgstes[2] &= ~PGSTE_ST2_MASK; + pgstes[3] &= ~PGSTE_ST2_MASK; + pgstes[0] |= (pgt_addr >> 16) & PGSTE_ST2_MASK; + pgstes[1] |= pgt_addr & PGSTE_ST2_MASK; + pgstes[2] |= (pgt_addr << 16) & PGSTE_ST2_MASK; + pgstes[3] |= (pgt_addr << 32) & PGSTE_ST2_MASK; } -EXPORT_SYMBOL_GPL(gmap_shadow_pgt_lookup); /** * gmap_shadow_pgt - instantiate a shadow page table @@ -2119,9 +1737,10 @@ int gmap_shadow_pgt(struct gmap *sg, unsigned long saddr, unsigned long pgt, ptdesc = page_table_alloc_pgste(sg->mm); if (!ptdesc) return -ENOMEM; - ptdesc->pt_index = pgt & _SEGMENT_ENTRY_ORIGIN; + origin = pgt & _SEGMENT_ENTRY_ORIGIN; if (fake) - ptdesc->pt_index |= GMAP_SHADOW_FAKE_TABLE; + origin |= GMAP_SHADOW_FAKE_TABLE; + gmap_pgste_set_pgt_addr(ptdesc, origin); s_pgt = page_to_phys(ptdesc_page(ptdesc)); /* Install shadow page table */ spin_lock(&sg->guest_table_lock); @@ -2140,7 +1759,6 @@ int gmap_shadow_pgt(struct gmap *sg, unsigned long saddr, unsigned long pgt, /* mark as invalid as long as the parent table is not protected */ *table = (unsigned long) s_pgt | _SEGMENT_ENTRY | (pgt & _SEGMENT_ENTRY_PROTECT) | _SEGMENT_ENTRY_INVALID; - list_add(&ptdesc->pt_list, &sg->pt_list); if (fake) { /* nothing to protect for fake tables */ *table &= ~_SEGMENT_ENTRY_INVALID; @@ -2318,7 +1936,6 @@ void ptep_notify(struct mm_struct *mm, unsigned long vmaddr, pte_t *pte, unsigned long bits) { unsigned long offset, gaddr = 0; - unsigned long *table; struct gmap *gmap, *sg, *next; offset = ((unsigned long) pte) & (255 * sizeof(pte_t)); @@ -2326,12 +1943,9 @@ void ptep_notify(struct mm_struct *mm, unsigned long vmaddr, rcu_read_lock(); list_for_each_entry_rcu(gmap, &mm->context.gmap_list, list) { spin_lock(&gmap->guest_table_lock); - table = radix_tree_lookup(&gmap->host_to_guest, - vmaddr >> PMD_SHIFT); - if (table) - gaddr = __gmap_segment_gaddr(table) + offset; + gaddr = host_to_guest_lookup(gmap, vmaddr) + offset; spin_unlock(&gmap->guest_table_lock); - if (!table) + if (!IS_GADDR_VALID(gaddr)) continue; if (!list_empty(&gmap->children) && (bits & PGSTE_VSIE_BIT)) { @@ -2371,10 +1985,10 @@ static void gmap_pmdp_xchg(struct gmap *gmap, pmd_t *pmdp, pmd_t new, gaddr &= HPAGE_MASK; pmdp_notify_gmap(gmap, pmdp, gaddr); new = clear_pmd_bit(new, __pgprot(_SEGMENT_ENTRY_GMAP_IN)); - if (MACHINE_HAS_TLB_GUEST) + if (machine_has_tlb_guest()) __pmdp_idte(gaddr, (pmd_t *)pmdp, IDTE_GUEST_ASCE, gmap->asce, IDTE_GLOBAL); - else if (MACHINE_HAS_IDTE) + else if (cpu_has_idte()) __pmdp_idte(gaddr, (pmd_t *)pmdp, 0, 0, IDTE_GLOBAL); else __pmdp_csp(pmdp); @@ -2391,10 +2005,8 @@ static void gmap_pmdp_clear(struct mm_struct *mm, unsigned long vmaddr, rcu_read_lock(); list_for_each_entry_rcu(gmap, &mm->context.gmap_list, list) { spin_lock(&gmap->guest_table_lock); - pmdp = (pmd_t *)radix_tree_delete(&gmap->host_to_guest, - vmaddr >> PMD_SHIFT); + pmdp = host_to_guest_pmd_delete(gmap, vmaddr, &gaddr); if (pmdp) { - gaddr = __gmap_segment_gaddr((unsigned long *)pmdp); pmdp_notify_gmap(gmap, pmdp, gaddr); WARN_ON(pmd_val(*pmdp) & ~(_SEGMENT_ENTRY_HARDWARE_BITS_LARGE | _SEGMENT_ENTRY_GMAP_UC | @@ -2438,28 +2050,25 @@ EXPORT_SYMBOL_GPL(gmap_pmdp_csp); */ void gmap_pmdp_idte_local(struct mm_struct *mm, unsigned long vmaddr) { - unsigned long *entry, gaddr; + unsigned long gaddr; struct gmap *gmap; pmd_t *pmdp; rcu_read_lock(); list_for_each_entry_rcu(gmap, &mm->context.gmap_list, list) { spin_lock(&gmap->guest_table_lock); - entry = radix_tree_delete(&gmap->host_to_guest, - vmaddr >> PMD_SHIFT); - if (entry) { - pmdp = (pmd_t *)entry; - gaddr = __gmap_segment_gaddr(entry); + pmdp = host_to_guest_pmd_delete(gmap, vmaddr, &gaddr); + if (pmdp) { pmdp_notify_gmap(gmap, pmdp, gaddr); - WARN_ON(*entry & ~(_SEGMENT_ENTRY_HARDWARE_BITS_LARGE | - _SEGMENT_ENTRY_GMAP_UC | - _SEGMENT_ENTRY)); - if (MACHINE_HAS_TLB_GUEST) + WARN_ON(pmd_val(*pmdp) & ~(_SEGMENT_ENTRY_HARDWARE_BITS_LARGE | + _SEGMENT_ENTRY_GMAP_UC | + _SEGMENT_ENTRY)); + if (machine_has_tlb_guest()) __pmdp_idte(gaddr, pmdp, IDTE_GUEST_ASCE, gmap->asce, IDTE_LOCAL); - else if (MACHINE_HAS_IDTE) + else if (cpu_has_idte()) __pmdp_idte(gaddr, pmdp, 0, 0, IDTE_LOCAL); - *entry = _SEGMENT_ENTRY_EMPTY; + *pmdp = __pmd(_SEGMENT_ENTRY_EMPTY); } spin_unlock(&gmap->guest_table_lock); } @@ -2474,30 +2083,27 @@ EXPORT_SYMBOL_GPL(gmap_pmdp_idte_local); */ void gmap_pmdp_idte_global(struct mm_struct *mm, unsigned long vmaddr) { - unsigned long *entry, gaddr; + unsigned long gaddr; struct gmap *gmap; pmd_t *pmdp; rcu_read_lock(); list_for_each_entry_rcu(gmap, &mm->context.gmap_list, list) { spin_lock(&gmap->guest_table_lock); - entry = radix_tree_delete(&gmap->host_to_guest, - vmaddr >> PMD_SHIFT); - if (entry) { - pmdp = (pmd_t *)entry; - gaddr = __gmap_segment_gaddr(entry); + pmdp = host_to_guest_pmd_delete(gmap, vmaddr, &gaddr); + if (pmdp) { pmdp_notify_gmap(gmap, pmdp, gaddr); - WARN_ON(*entry & ~(_SEGMENT_ENTRY_HARDWARE_BITS_LARGE | - _SEGMENT_ENTRY_GMAP_UC | - _SEGMENT_ENTRY)); - if (MACHINE_HAS_TLB_GUEST) + WARN_ON(pmd_val(*pmdp) & ~(_SEGMENT_ENTRY_HARDWARE_BITS_LARGE | + _SEGMENT_ENTRY_GMAP_UC | + _SEGMENT_ENTRY)); + if (machine_has_tlb_guest()) __pmdp_idte(gaddr, pmdp, IDTE_GUEST_ASCE, gmap->asce, IDTE_GLOBAL); - else if (MACHINE_HAS_IDTE) + else if (cpu_has_idte()) __pmdp_idte(gaddr, pmdp, 0, 0, IDTE_GLOBAL); else __pmdp_csp(pmdp); - *entry = _SEGMENT_ENTRY_EMPTY; + *pmdp = __pmd(_SEGMENT_ENTRY_EMPTY); } spin_unlock(&gmap->guest_table_lock); } @@ -2612,9 +2218,6 @@ int s390_enable_sie(void) /* Do we have pgstes? if yes, we are done */ if (mm_has_pgste(mm)) return 0; - /* Fail if the page tables are 2K */ - if (!mm_alloc_pgste(mm)) - return -EINVAL; mmap_write_lock(mm); mm->context.has_pgste = 1; /* split thp mappings and disable thp for future mappings */ @@ -2624,138 +2227,6 @@ int s390_enable_sie(void) } EXPORT_SYMBOL_GPL(s390_enable_sie); -static int find_zeropage_pte_entry(pte_t *pte, unsigned long addr, - unsigned long end, struct mm_walk *walk) -{ - unsigned long *found_addr = walk->private; - - /* Return 1 of the page is a zeropage. */ - if (is_zero_pfn(pte_pfn(*pte))) { - /* - * Shared zeropage in e.g., a FS DAX mapping? We cannot do the - * right thing and likely don't care: FAULT_FLAG_UNSHARE - * currently only works in COW mappings, which is also where - * mm_forbids_zeropage() is checked. - */ - if (!is_cow_mapping(walk->vma->vm_flags)) - return -EFAULT; - - *found_addr = addr; - return 1; - } - return 0; -} - -static const struct mm_walk_ops find_zeropage_ops = { - .pte_entry = find_zeropage_pte_entry, - .walk_lock = PGWALK_WRLOCK, -}; - -/* - * Unshare all shared zeropages, replacing them by anonymous pages. Note that - * we cannot simply zap all shared zeropages, because this could later - * trigger unexpected userfaultfd missing events. - * - * This must be called after mm->context.allow_cow_sharing was - * set to 0, to avoid future mappings of shared zeropages. - * - * mm contracts with s390, that even if mm were to remove a page table, - * and racing with walk_page_range_vma() calling pte_offset_map_lock() - * would fail, it will never insert a page table containing empty zero - * pages once mm_forbids_zeropage(mm) i.e. - * mm->context.allow_cow_sharing is set to 0. - */ -static int __s390_unshare_zeropages(struct mm_struct *mm) -{ - struct vm_area_struct *vma; - VMA_ITERATOR(vmi, mm, 0); - unsigned long addr; - vm_fault_t fault; - int rc; - - for_each_vma(vmi, vma) { - /* - * We could only look at COW mappings, but it's more future - * proof to catch unexpected zeropages in other mappings and - * fail. - */ - if ((vma->vm_flags & VM_PFNMAP) || is_vm_hugetlb_page(vma)) - continue; - addr = vma->vm_start; - -retry: - rc = walk_page_range_vma(vma, addr, vma->vm_end, - &find_zeropage_ops, &addr); - if (rc < 0) - return rc; - else if (!rc) - continue; - - /* addr was updated by find_zeropage_pte_entry() */ - fault = handle_mm_fault(vma, addr, - FAULT_FLAG_UNSHARE | FAULT_FLAG_REMOTE, - NULL); - if (fault & VM_FAULT_OOM) - return -ENOMEM; - /* - * See break_ksm(): even after handle_mm_fault() returned 0, we - * must start the lookup from the current address, because - * handle_mm_fault() may back out if there's any difficulty. - * - * VM_FAULT_SIGBUS and VM_FAULT_SIGSEGV are unexpected but - * maybe they could trigger in the future on concurrent - * truncation. In that case, the shared zeropage would be gone - * and we can simply retry and make progress. - */ - cond_resched(); - goto retry; - } - - return 0; -} - -static int __s390_disable_cow_sharing(struct mm_struct *mm) -{ - int rc; - - if (!mm->context.allow_cow_sharing) - return 0; - - mm->context.allow_cow_sharing = 0; - - /* Replace all shared zeropages by anonymous pages. */ - rc = __s390_unshare_zeropages(mm); - /* - * Make sure to disable KSM (if enabled for the whole process or - * individual VMAs). Note that nothing currently hinders user space - * from re-enabling it. - */ - if (!rc) - rc = ksm_disable(mm); - if (rc) - mm->context.allow_cow_sharing = 1; - return rc; -} - -/* - * Disable most COW-sharing of memory pages for the whole process: - * (1) Disable KSM and unmerge/unshare any KSM pages. - * (2) Disallow shared zeropages and unshare any zerpages that are mapped. - * - * Not that we currently don't bother with COW-shared pages that are shared - * with parent/child processes due to fork(). - */ -int s390_disable_cow_sharing(void) -{ - int rc; - - mmap_write_lock(current->mm); - rc = __s390_disable_cow_sharing(current->mm); - mmap_write_unlock(current->mm); - return rc; -} -EXPORT_SYMBOL_GPL(s390_disable_cow_sharing); - /* * Enable storage key handling from now on and initialize the storage * keys with the default key. @@ -2823,7 +2294,7 @@ int s390_enable_skey(void) goto out_up; mm->context.uses_skeys = 1; - rc = __s390_disable_cow_sharing(mm); + rc = gmap_helper_disable_cow_sharing(); if (rc) { mm->context.uses_skeys = 0; goto out_up; @@ -2943,49 +2414,6 @@ int __s390_uv_destroy_range(struct mm_struct *mm, unsigned long start, EXPORT_SYMBOL_GPL(__s390_uv_destroy_range); /** - * s390_unlist_old_asce - Remove the topmost level of page tables from the - * list of page tables of the gmap. - * @gmap: the gmap whose table is to be removed - * - * On s390x, KVM keeps a list of all pages containing the page tables of the - * gmap (the CRST list). This list is used at tear down time to free all - * pages that are now not needed anymore. - * - * This function removes the topmost page of the tree (the one pointed to by - * the ASCE) from the CRST list. - * - * This means that it will not be freed when the VM is torn down, and needs - * to be handled separately by the caller, unless a leak is actually - * intended. Notice that this function will only remove the page from the - * list, the page will still be used as a top level page table (and ASCE). - */ -void s390_unlist_old_asce(struct gmap *gmap) -{ - struct page *old; - - old = virt_to_page(gmap->table); - spin_lock(&gmap->guest_table_lock); - list_del(&old->lru); - /* - * Sometimes the topmost page might need to be "removed" multiple - * times, for example if the VM is rebooted into secure mode several - * times concurrently, or if s390_replace_asce fails after calling - * s390_remove_old_asce and is attempted again later. In that case - * the old asce has been removed from the list, and therefore it - * will not be freed when the VM terminates, but the ASCE is still - * in use and still pointed to. - * A subsequent call to replace_asce will follow the pointer and try - * to remove the same page from the list again. - * Therefore it's necessary that the page of the ASCE has valid - * pointers, so list_del can work (and do nothing) without - * dereferencing stale or invalid pointers. - */ - INIT_LIST_HEAD(&old->lru); - spin_unlock(&gmap->guest_table_lock); -} -EXPORT_SYMBOL_GPL(s390_unlist_old_asce); - -/** * s390_replace_asce - Try to replace the current ASCE of a gmap with a copy * @gmap: the gmap whose ASCE needs to be replaced * @@ -3004,8 +2432,6 @@ int s390_replace_asce(struct gmap *gmap) struct page *page; void *table; - s390_unlist_old_asce(gmap); - /* Replacing segment type ASCEs would cause serious issues */ if ((gmap->asce & _ASCE_TYPE_MASK) == _ASCE_TYPE_SEGMENT) return -EINVAL; @@ -3013,19 +2439,9 @@ int s390_replace_asce(struct gmap *gmap) page = gmap_alloc_crst(); if (!page) return -ENOMEM; - page->index = 0; table = page_to_virt(page); memcpy(table, gmap->table, 1UL << (CRST_ALLOC_ORDER + PAGE_SHIFT)); - /* - * The caller has to deal with the old ASCE, but here we make sure - * the new one is properly added to the CRST list, so that - * it will be freed when the VM is torn down. - */ - spin_lock(&gmap->guest_table_lock); - list_add(&page->lru, &gmap->crst_list); - spin_unlock(&gmap->guest_table_lock); - /* Set new table origin while preserving existing ASCE control bits */ asce = (gmap->asce & ~_ASCE_ORIGIN) | __pa(table); WRITE_ONCE(gmap->asce, asce); diff --git a/arch/s390/mm/gmap_helpers.c b/arch/s390/mm/gmap_helpers.c new file mode 100644 index 000000000000..b63f427e7289 --- /dev/null +++ b/arch/s390/mm/gmap_helpers.c @@ -0,0 +1,223 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Helper functions for KVM guest address space mapping code + * + * Copyright IBM Corp. 2007, 2025 + */ + +#include <linux/export.h> +#include <linux/mm_types.h> +#include <linux/mmap_lock.h> +#include <linux/mm.h> +#include <linux/hugetlb.h> +#include <linux/swap.h> +#include <linux/swapops.h> +#include <linux/pagewalk.h> +#include <linux/ksm.h> +#include <asm/gmap_helpers.h> + +/** + * ptep_zap_swap_entry() - discard a swap entry. + * @mm: the mm + * @entry: the swap entry that needs to be zapped + * + * Discards the given swap entry. If the swap entry was an actual swap + * entry (and not a migration entry, for example), the actual swapped + * page is also discarded from swap. + */ +static void ptep_zap_swap_entry(struct mm_struct *mm, swp_entry_t entry) +{ + if (!non_swap_entry(entry)) + dec_mm_counter(mm, MM_SWAPENTS); + else if (is_migration_entry(entry)) + dec_mm_counter(mm, mm_counter(pfn_swap_entry_folio(entry))); + free_swap_and_cache(entry); +} + +/** + * gmap_helper_zap_one_page() - discard a page if it was swapped. + * @mm: the mm + * @vmaddr: the userspace virtual address that needs to be discarded + * + * If the given address maps to a swap entry, discard it. + * + * Context: needs to be called while holding the mmap lock. + */ +void gmap_helper_zap_one_page(struct mm_struct *mm, unsigned long vmaddr) +{ + struct vm_area_struct *vma; + spinlock_t *ptl; + pte_t *ptep; + + mmap_assert_locked(mm); + + /* Find the vm address for the guest address */ + vma = vma_lookup(mm, vmaddr); + if (!vma || is_vm_hugetlb_page(vma)) + return; + + /* Get pointer to the page table entry */ + ptep = get_locked_pte(mm, vmaddr, &ptl); + if (unlikely(!ptep)) + return; + if (pte_swap(*ptep)) + ptep_zap_swap_entry(mm, pte_to_swp_entry(*ptep)); + pte_unmap_unlock(ptep, ptl); +} +EXPORT_SYMBOL_GPL(gmap_helper_zap_one_page); + +/** + * gmap_helper_discard() - discard user pages in the given range + * @mm: the mm + * @vmaddr: starting userspace address + * @end: end address (first address outside the range) + * + * All userpace pages in the range [@vamddr, @end) are discarded and unmapped. + * + * Context: needs to be called while holding the mmap lock. + */ +void gmap_helper_discard(struct mm_struct *mm, unsigned long vmaddr, unsigned long end) +{ + struct vm_area_struct *vma; + + mmap_assert_locked(mm); + + while (vmaddr < end) { + vma = find_vma_intersection(mm, vmaddr, end); + if (!vma) + return; + if (!is_vm_hugetlb_page(vma)) + zap_page_range_single(vma, vmaddr, min(end, vma->vm_end) - vmaddr, NULL); + vmaddr = vma->vm_end; + } +} +EXPORT_SYMBOL_GPL(gmap_helper_discard); + +static int find_zeropage_pte_entry(pte_t *pte, unsigned long addr, + unsigned long end, struct mm_walk *walk) +{ + unsigned long *found_addr = walk->private; + + /* Return 1 of the page is a zeropage. */ + if (is_zero_pfn(pte_pfn(*pte))) { + /* + * Shared zeropage in e.g., a FS DAX mapping? We cannot do the + * right thing and likely don't care: FAULT_FLAG_UNSHARE + * currently only works in COW mappings, which is also where + * mm_forbids_zeropage() is checked. + */ + if (!is_cow_mapping(walk->vma->vm_flags)) + return -EFAULT; + + *found_addr = addr; + return 1; + } + return 0; +} + +static const struct mm_walk_ops find_zeropage_ops = { + .pte_entry = find_zeropage_pte_entry, + .walk_lock = PGWALK_WRLOCK, +}; + +/** __gmap_helper_unshare_zeropages() - unshare all shared zeropages + * @mm: the mm whose zero pages are to be unshared + * + * Unshare all shared zeropages, replacing them by anonymous pages. Note that + * we cannot simply zap all shared zeropages, because this could later + * trigger unexpected userfaultfd missing events. + * + * This must be called after mm->context.allow_cow_sharing was + * set to 0, to avoid future mappings of shared zeropages. + * + * mm contracts with s390, that even if mm were to remove a page table, + * and racing with walk_page_range_vma() calling pte_offset_map_lock() + * would fail, it will never insert a page table containing empty zero + * pages once mm_forbids_zeropage(mm) i.e. + * mm->context.allow_cow_sharing is set to 0. + */ +static int __gmap_helper_unshare_zeropages(struct mm_struct *mm) +{ + struct vm_area_struct *vma; + VMA_ITERATOR(vmi, mm, 0); + unsigned long addr; + vm_fault_t fault; + int rc; + + for_each_vma(vmi, vma) { + /* + * We could only look at COW mappings, but it's more future + * proof to catch unexpected zeropages in other mappings and + * fail. + */ + if ((vma->vm_flags & VM_PFNMAP) || is_vm_hugetlb_page(vma)) + continue; + addr = vma->vm_start; + +retry: + rc = walk_page_range_vma(vma, addr, vma->vm_end, + &find_zeropage_ops, &addr); + if (rc < 0) + return rc; + else if (!rc) + continue; + + /* addr was updated by find_zeropage_pte_entry() */ + fault = handle_mm_fault(vma, addr, + FAULT_FLAG_UNSHARE | FAULT_FLAG_REMOTE, + NULL); + if (fault & VM_FAULT_OOM) + return -ENOMEM; + /* + * See break_ksm(): even after handle_mm_fault() returned 0, we + * must start the lookup from the current address, because + * handle_mm_fault() may back out if there's any difficulty. + * + * VM_FAULT_SIGBUS and VM_FAULT_SIGSEGV are unexpected but + * maybe they could trigger in the future on concurrent + * truncation. In that case, the shared zeropage would be gone + * and we can simply retry and make progress. + */ + cond_resched(); + goto retry; + } + + return 0; +} + +/** + * gmap_helper_disable_cow_sharing() - disable all COW sharing + * + * Disable most COW-sharing of memory pages for the whole process: + * (1) Disable KSM and unmerge/unshare any KSM pages. + * (2) Disallow shared zeropages and unshare any zerpages that are mapped. + * + * Not that we currently don't bother with COW-shared pages that are shared + * with parent/child processes due to fork(). + */ +int gmap_helper_disable_cow_sharing(void) +{ + struct mm_struct *mm = current->mm; + int rc; + + mmap_assert_write_locked(mm); + + if (!mm->context.allow_cow_sharing) + return 0; + + mm->context.allow_cow_sharing = 0; + + /* Replace all shared zeropages by anonymous pages. */ + rc = __gmap_helper_unshare_zeropages(mm); + /* + * Make sure to disable KSM (if enabled for the whole process or + * individual VMAs). Note that nothing currently hinders user space + * from re-enabling it. + */ + if (!rc) + rc = ksm_disable(mm); + if (rc) + mm->context.allow_cow_sharing = 1; + return rc; +} +EXPORT_SYMBOL_GPL(gmap_helper_disable_cow_sharing); diff --git a/arch/s390/mm/hugetlbpage.c b/arch/s390/mm/hugetlbpage.c index d9ce199953de..e88c02c9e642 100644 --- a/arch/s390/mm/hugetlbpage.c +++ b/arch/s390/mm/hugetlbpage.c @@ -9,12 +9,13 @@ #define KMSG_COMPONENT "hugetlb" #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt -#include <asm/pgalloc.h> +#include <linux/cpufeature.h> #include <linux/mm.h> #include <linux/hugetlb.h> #include <linux/mman.h> #include <linux/sched/mm.h> #include <linux/security.h> +#include <asm/pgalloc.h> /* * If the bit selected by single-bit bitmask "a" is set within "x", move @@ -188,8 +189,8 @@ pte_t huge_ptep_get(struct mm_struct *mm, unsigned long addr, pte_t *ptep) return __rste_to_pte(pte_val(*ptep)); } -pte_t huge_ptep_get_and_clear(struct mm_struct *mm, - unsigned long addr, pte_t *ptep) +pte_t __huge_ptep_get_and_clear(struct mm_struct *mm, + unsigned long addr, pte_t *ptep) { pte_t pte = huge_ptep_get(mm, addr, ptep); pmd_t *pmdp = (pmd_t *) ptep; @@ -248,9 +249,9 @@ pte_t *huge_pte_offset(struct mm_struct *mm, bool __init arch_hugetlb_valid_size(unsigned long size) { - if (MACHINE_HAS_EDAT1 && size == PMD_SIZE) + if (cpu_has_edat1() && size == PMD_SIZE) return true; - else if (MACHINE_HAS_EDAT2 && size == PUD_SIZE) + else if (cpu_has_edat2() && size == PUD_SIZE) return true; else return false; diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c index 7a96623a9d2e..e4953453d254 100644 --- a/arch/s390/mm/init.c +++ b/arch/s390/mm/init.c @@ -8,6 +8,7 @@ * Copyright (C) 1995 Linus Torvalds */ +#include <linux/cpufeature.h> #include <linux/signal.h> #include <linux/sched.h> #include <linux/kernel.h> @@ -39,7 +40,6 @@ #include <asm/kfence.h> #include <asm/dma.h> #include <asm/abs_lowcore.h> -#include <asm/tlb.h> #include <asm/tlbflush.h> #include <asm/sections.h> #include <asm/sclp.h> @@ -56,6 +56,15 @@ pgd_t invalid_pg_dir[PTRS_PER_PGD] __section(".bss..invalid_pg_dir"); struct ctlreg __bootdata_preserved(s390_invalid_asce); +unsigned long __bootdata_preserved(page_noexec_mask); +EXPORT_SYMBOL(page_noexec_mask); + +unsigned long __bootdata_preserved(segment_noexec_mask); +EXPORT_SYMBOL(segment_noexec_mask); + +unsigned long __bootdata_preserved(region_noexec_mask); +EXPORT_SYMBOL(region_noexec_mask); + unsigned long empty_zero_page, zero_page_mask; EXPORT_SYMBOL(empty_zero_page); EXPORT_SYMBOL(zero_page_mask); @@ -64,8 +73,6 @@ static void __init setup_zero_pages(void) { unsigned long total_pages = memblock_estimated_nr_free_pages(); unsigned int order; - struct page *page; - int i; /* Latest machines require a mapping granularity of 512KB */ order = 7; @@ -74,16 +81,7 @@ static void __init setup_zero_pages(void) while (order > 2 && (total_pages >> 10) < (1UL << order)) order--; - empty_zero_page = __get_free_pages(GFP_KERNEL | __GFP_ZERO, order); - if (!empty_zero_page) - panic("Out of memory in setup_zero_pages"); - - page = virt_to_page((void *) empty_zero_page); - split_page(page, order); - for (i = 1 << order; i > 0; i--) { - mark_page_reserved(page); - page++; - } + empty_zero_page = (unsigned long)memblock_alloc_or_panic(PAGE_SIZE << order, PAGE_SIZE); zero_page_mask = ((PAGE_SIZE << order) - 1) & PAGE_MASK; } @@ -108,7 +106,7 @@ void mark_rodata_ro(void) { unsigned long size = __end_ro_after_init - __start_ro_after_init; - if (MACHINE_HAS_NX) + if (cpu_has_nx()) system_ctl_set_bit(0, CR0_INSTRUCTION_EXEC_PROTECTION_BIT); __set_memory_ro(__start_ro_after_init, __end_ro_after_init); pr_info("Write protected read-only-after-init data: %luk\n", size >> 10); @@ -144,7 +142,7 @@ bool force_dma_unencrypted(struct device *dev) } /* protected virtualization */ -static void pv_init(void) +static void __init pv_init(void) { if (!is_prot_virt_guest()) return; @@ -156,19 +154,13 @@ static void pv_init(void) swiotlb_update_mem_attributes(); } -void __init mem_init(void) +void __init arch_mm_preinit(void) { cpumask_set_cpu(0, &init_mm.context.cpu_attach_mask); cpumask_set_cpu(0, mm_cpumask(&init_mm)); - set_max_mapnr(max_low_pfn); - high_memory = (void *) __va(max_low_pfn * PAGE_SIZE); - pv_init(); - kfence_split_mapping(); - /* this will put all low memory onto the freelists */ - memblock_free_all(); setup_zero_pages(); /* Setup zeroed pages. */ } @@ -230,16 +222,13 @@ struct s390_cma_mem_data { static int s390_cma_check_range(struct cma *cma, void *data) { struct s390_cma_mem_data *mem_data; - unsigned long start, end; mem_data = data; - start = cma_get_base(cma); - end = start + cma_get_size(cma); - if (end < mem_data->start) - return 0; - if (start >= mem_data->end) - return 0; - return -EBUSY; + + if (cma_intersects(cma, mem_data->start, mem_data->end)) + return -EBUSY; + + return 0; } static int s390_cma_mem_notifier(struct notifier_block *nb, @@ -276,7 +265,7 @@ int arch_add_memory(int nid, u64 start, u64 size, unsigned long size_pages = PFN_DOWN(size); int rc; - if (WARN_ON_ONCE(params->pgprot.pgprot != PAGE_KERNEL.pgprot)) + if (WARN_ON_ONCE(pgprot_val(params->pgprot) != pgprot_val(PAGE_KERNEL))) return -EINVAL; VM_BUG_ON(!mhp_range_allowed(start, size, true)); diff --git a/arch/s390/mm/maccess.c b/arch/s390/mm/maccess.c index 28a18c42ba99..44426e0f2944 100644 --- a/arch/s390/mm/maccess.c +++ b/arch/s390/mm/maccess.c @@ -17,6 +17,7 @@ #include <asm/asm-extable.h> #include <asm/abs_lowcore.h> #include <asm/stacktrace.h> +#include <asm/sections.h> #include <asm/maccess.h> #include <asm/ctlreg.h> diff --git a/arch/s390/mm/mmap.c b/arch/s390/mm/mmap.c index 33f3504be90b..40a526d28184 100644 --- a/arch/s390/mm/mmap.c +++ b/arch/s390/mm/mmap.c @@ -51,7 +51,6 @@ static inline unsigned long mmap_base(unsigned long rnd, { unsigned long gap = rlim_stack->rlim_cur; unsigned long pad = stack_maxrandom_size() + stack_guard_gap; - unsigned long gap_min, gap_max; /* Values close to RLIM_INFINITY can overflow. */ if (gap + pad > gap) @@ -61,13 +60,7 @@ static inline unsigned long mmap_base(unsigned long rnd, * Top of mmap area (just below the process stack). * Leave at least a ~128 MB hole. */ - gap_min = SZ_128M; - gap_max = (STACK_TOP / 6) * 5; - - if (gap < gap_min) - gap = gap_min; - else if (gap > gap_max) - gap = gap_max; + gap = clamp(gap, SZ_128M, (STACK_TOP / 6) * 5); return PAGE_ALIGN(STACK_TOP - gap - rnd); } @@ -196,22 +189,28 @@ void arch_pick_mmap_layout(struct mm_struct *mm, struct rlimit *rlim_stack) } } -static const pgprot_t protection_map[16] = { - [VM_NONE] = PAGE_NONE, - [VM_READ] = PAGE_RO, - [VM_WRITE] = PAGE_RO, - [VM_WRITE | VM_READ] = PAGE_RO, - [VM_EXEC] = PAGE_RX, - [VM_EXEC | VM_READ] = PAGE_RX, - [VM_EXEC | VM_WRITE] = PAGE_RX, - [VM_EXEC | VM_WRITE | VM_READ] = PAGE_RX, - [VM_SHARED] = PAGE_NONE, - [VM_SHARED | VM_READ] = PAGE_RO, - [VM_SHARED | VM_WRITE] = PAGE_RW, - [VM_SHARED | VM_WRITE | VM_READ] = PAGE_RW, - [VM_SHARED | VM_EXEC] = PAGE_RX, - [VM_SHARED | VM_EXEC | VM_READ] = PAGE_RX, - [VM_SHARED | VM_EXEC | VM_WRITE] = PAGE_RWX, - [VM_SHARED | VM_EXEC | VM_WRITE | VM_READ] = PAGE_RWX -}; +static pgprot_t protection_map[16] __ro_after_init; + +void __init setup_protection_map(void) +{ + pgprot_t *pm = protection_map; + + pm[VM_NONE] = PAGE_NONE; + pm[VM_READ] = PAGE_RO; + pm[VM_WRITE] = PAGE_RO; + pm[VM_WRITE | VM_READ] = PAGE_RO; + pm[VM_EXEC] = PAGE_RX; + pm[VM_EXEC | VM_READ] = PAGE_RX; + pm[VM_EXEC | VM_WRITE] = PAGE_RX; + pm[VM_EXEC | VM_WRITE | VM_READ] = PAGE_RX; + pm[VM_SHARED] = PAGE_NONE; + pm[VM_SHARED | VM_READ] = PAGE_RO; + pm[VM_SHARED | VM_WRITE] = PAGE_RW; + pm[VM_SHARED | VM_WRITE | VM_READ] = PAGE_RW; + pm[VM_SHARED | VM_EXEC] = PAGE_RX; + pm[VM_SHARED | VM_EXEC | VM_READ] = PAGE_RX; + pm[VM_SHARED | VM_EXEC | VM_WRITE] = PAGE_RWX; + pm[VM_SHARED | VM_EXEC | VM_WRITE | VM_READ] = PAGE_RWX; +} + DECLARE_VM_GET_PAGE_PROT diff --git a/arch/s390/mm/pageattr.c b/arch/s390/mm/pageattr.c index 8f56a21a077f..348e759840e7 100644 --- a/arch/s390/mm/pageattr.c +++ b/arch/s390/mm/pageattr.c @@ -3,6 +3,7 @@ * Copyright IBM Corp. 2011 * Author(s): Jan Glauber <jang@linux.vnet.ibm.com> */ +#include <linux/cpufeature.h> #include <linux/hugetlb.h> #include <linux/proc_fs.h> #include <linux/vmalloc.h> @@ -27,7 +28,7 @@ void __storage_key_init_range(unsigned long start, unsigned long end) unsigned long boundary, size; while (start < end) { - if (MACHINE_HAS_EDAT1) { + if (cpu_has_edat1()) { /* set storage keys for a 1MB frame */ size = 1UL << 20; boundary = (start + size) & ~(size - 1); @@ -63,7 +64,7 @@ static void pgt_set(unsigned long *old, unsigned long new, unsigned long addr, unsigned long *table, mask; mask = 0; - if (MACHINE_HAS_EDAT2) { + if (cpu_has_edat2()) { switch (dtt) { case CRDTE_DTT_REGION3: mask = ~(PTRS_PER_PUD * sizeof(pud_t) - 1); @@ -77,7 +78,7 @@ static void pgt_set(unsigned long *old, unsigned long new, unsigned long addr, } table = (unsigned long *)((unsigned long)old & mask); crdte(*old, new, table, dtt, addr, get_lowcore()->kernel_asce.val); - } else if (MACHINE_HAS_IDTE) { + } else if (cpu_has_idte()) { cspg(old, *old, new); } else { csp((unsigned int *)old + 1, *old, new); @@ -109,8 +110,6 @@ static int walk_pte_level(pmd_t *pmdp, unsigned long addr, unsigned long end, } else if (flags & SET_MEMORY_DEF) { new = __pte(pte_val(new) & PAGE_MASK); new = set_pte_bit(new, PAGE_KERNEL); - if (!MACHINE_HAS_NX) - new = clear_pte_bit(new, __pgprot(_PAGE_NOEXEC)); } pgt_set((unsigned long *)ptep, pte_val(new), addr, CRDTE_DTT_PAGE); ptep++; @@ -167,8 +166,6 @@ static void modify_pmd_page(pmd_t *pmdp, unsigned long addr, } else if (flags & SET_MEMORY_DEF) { new = __pmd(pmd_val(new) & PMD_MASK); new = set_pmd_bit(new, SEGMENT_KERNEL); - if (!MACHINE_HAS_NX) - new = clear_pmd_bit(new, __pgprot(_SEGMENT_ENTRY_NOEXEC)); } pgt_set((unsigned long *)pmdp, pmd_val(new), addr, CRDTE_DTT_SEGMENT); } @@ -256,8 +253,6 @@ static void modify_pud_page(pud_t *pudp, unsigned long addr, } else if (flags & SET_MEMORY_DEF) { new = __pud(pud_val(new) & PUD_MASK); new = set_pud_bit(new, REGION3_KERNEL); - if (!MACHINE_HAS_NX) - new = clear_pud_bit(new, __pgprot(_REGION_ENTRY_NOEXEC)); } pgt_set((unsigned long *)pudp, pud_val(new), addr, CRDTE_DTT_REGION3); } @@ -379,7 +374,7 @@ int __set_memory(unsigned long addr, unsigned long numpages, unsigned long flags unsigned long end; int rc; - if (!MACHINE_HAS_NX) + if (!cpu_has_nx()) flags &= ~(SET_MEMORY_NX | SET_MEMORY_X); if (!flags) return 0; diff --git a/arch/s390/mm/pfault.c b/arch/s390/mm/pfault.c index 1aac13bb8f53..e6175d75e4b0 100644 --- a/arch/s390/mm/pfault.c +++ b/arch/s390/mm/pfault.c @@ -9,6 +9,7 @@ #include <linux/init.h> #include <linux/irq.h> #include <asm/asm-extable.h> +#include <asm/asm-offsets.h> #include <asm/pfault.h> #include <asm/diag.h> @@ -56,7 +57,7 @@ int __pfault_init(void) if (pfault_disable) return rc; diag_stat_inc(DIAG_STAT_X258); - asm volatile( + asm_inline volatile( " diag %[refbk],%[rc],0x258\n" "0: nopr %%r7\n" EX_TABLE(0b, 0b) @@ -78,7 +79,7 @@ void __pfault_fini(void) if (pfault_disable) return; diag_stat_inc(DIAG_STAT_X258); - asm volatile( + asm_inline volatile( " diag %[refbk],0,0x258\n" "0: nopr %%r7\n" EX_TABLE(0b, 0b) diff --git a/arch/s390/mm/pgalloc.c b/arch/s390/mm/pgalloc.c index 58696a0c4e4a..d2f6f1f6d2fc 100644 --- a/arch/s390/mm/pgalloc.c +++ b/arch/s390/mm/pgalloc.c @@ -12,35 +12,8 @@ #include <asm/mmu_context.h> #include <asm/page-states.h> #include <asm/pgalloc.h> -#include <asm/gmap.h> -#include <asm/tlb.h> #include <asm/tlbflush.h> -#ifdef CONFIG_PGSTE - -int page_table_allocate_pgste = 0; -EXPORT_SYMBOL(page_table_allocate_pgste); - -static struct ctl_table page_table_sysctl[] = { - { - .procname = "allocate_pgste", - .data = &page_table_allocate_pgste, - .maxlen = sizeof(int), - .mode = S_IRUGO | S_IWUSR, - .proc_handler = proc_dointvec_minmax, - .extra1 = SYSCTL_ZERO, - .extra2 = SYSCTL_ONE, - }, -}; - -static int __init page_table_register_sysctl(void) -{ - return register_sysctl("vm", page_table_sysctl) ? 0 : -ENOMEM; -} -__initcall(page_table_register_sysctl); - -#endif /* CONFIG_PGSTE */ - unsigned long *crst_table_alloc(struct mm_struct *mm) { struct ptdesc *ptdesc = pagetable_alloc(GFP_KERNEL, CRST_ALLOC_ORDER); @@ -63,11 +36,15 @@ void crst_table_free(struct mm_struct *mm, unsigned long *table) static void __crst_table_upgrade(void *arg) { struct mm_struct *mm = arg; + struct ctlreg asce; /* change all active ASCEs to avoid the creation of new TLBs */ if (current->active_mm == mm) { - get_lowcore()->user_asce.val = mm->context.asce; - local_ctl_load(7, &get_lowcore()->user_asce); + asce.val = mm->context.asce; + get_lowcore()->user_asce = asce; + local_ctl_load(7, &asce); + if (!test_thread_flag(TIF_ASCE_PRIMARY)) + local_ctl_load(1, &asce); } __tlb_flush_local(); } @@ -77,6 +54,8 @@ int crst_table_upgrade(struct mm_struct *mm, unsigned long end) unsigned long *pgd = NULL, *p4d = NULL, *__pgd; unsigned long asce_limit = mm->context.asce_limit; + mmap_assert_write_locked(mm); + /* upgrade should only happen from 3 to 4, 3 to 5, or 4 to 5 levels */ VM_BUG_ON(asce_limit < _REGION2_SIZE); @@ -88,23 +67,18 @@ int crst_table_upgrade(struct mm_struct *mm, unsigned long end) if (unlikely(!p4d)) goto err_p4d; crst_table_init(p4d, _REGION2_ENTRY_EMPTY); + pagetable_p4d_ctor(virt_to_ptdesc(p4d)); } if (end > _REGION1_SIZE) { pgd = crst_table_alloc(mm); if (unlikely(!pgd)) goto err_pgd; crst_table_init(pgd, _REGION1_ENTRY_EMPTY); + pagetable_pgd_ctor(virt_to_ptdesc(pgd)); } spin_lock_bh(&mm->page_table_lock); - /* - * This routine gets called with mmap_lock lock held and there is - * no reason to optimize for the case of otherwise. However, if - * that would ever change, the below check will let us know. - */ - VM_BUG_ON(asce_limit != mm->context.asce_limit); - if (p4d) { __pgd = (unsigned long *) mm->pgd; p4d_populate(mm, (p4d_t *) p4d, (pud_t *) __pgd); @@ -130,6 +104,7 @@ int crst_table_upgrade(struct mm_struct *mm, unsigned long end) return 0; err_pgd: + pagetable_dtor(virt_to_ptdesc(p4d)); crst_table_free(mm, p4d); err_p4d: return -ENOMEM; @@ -167,43 +142,22 @@ unsigned long *page_table_alloc(struct mm_struct *mm) ptdesc = pagetable_alloc(GFP_KERNEL, 0); if (!ptdesc) return NULL; - if (!pagetable_pte_ctor(ptdesc)) { + if (!pagetable_pte_ctor(mm, ptdesc)) { pagetable_free(ptdesc); return NULL; } table = ptdesc_to_virt(ptdesc); __arch_set_page_dat(table, 1); - /* pt_list is used by gmap only */ - INIT_LIST_HEAD(&ptdesc->pt_list); memset64((u64 *)table, _PAGE_INVALID, PTRS_PER_PTE); memset64((u64 *)table + PTRS_PER_PTE, 0, PTRS_PER_PTE); return table; } -static void pagetable_pte_dtor_free(struct ptdesc *ptdesc) -{ - pagetable_pte_dtor(ptdesc); - pagetable_free(ptdesc); -} - void page_table_free(struct mm_struct *mm, unsigned long *table) { struct ptdesc *ptdesc = virt_to_ptdesc(table); - pagetable_pte_dtor_free(ptdesc); -} - -void __tlb_remove_table(void *table) -{ - struct ptdesc *ptdesc = virt_to_ptdesc(table); - struct page *page = ptdesc_page(ptdesc); - - if (compound_order(page) == CRST_ALLOC_ORDER) { - /* pmd, pud, or p4d */ - pagetable_free(ptdesc); - return; - } - pagetable_pte_dtor_free(ptdesc); + pagetable_dtor_free(ptdesc); } #ifdef CONFIG_TRANSPARENT_HUGEPAGE @@ -211,7 +165,7 @@ static void pte_free_now(struct rcu_head *head) { struct ptdesc *ptdesc = container_of(head, struct ptdesc, pt_rcu_head); - pagetable_pte_dtor_free(ptdesc); + pagetable_dtor_free(ptdesc); } void pte_free_defer(struct mm_struct *mm, pgtable_t pgtable) @@ -219,11 +173,6 @@ void pte_free_defer(struct mm_struct *mm, pgtable_t pgtable) struct ptdesc *ptdesc = virt_to_ptdesc(pgtable); call_rcu(&ptdesc->pt_rcu_head, pte_free_now); - /* - * THPs are not allowed for KVM guests. Warn if pgste ever reaches here. - * Turn to the generic pte_free_defer() version once gmap is removed. - */ - WARN_ON_ONCE(mm_has_pgste(mm)); } #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index cea5dba80468..60688be4e876 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -4,6 +4,8 @@ * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com> */ +#include <linux/cpufeature.h> +#include <linux/export.h> #include <linux/sched.h> #include <linux/kernel.h> #include <linux/errno.h> @@ -19,10 +21,10 @@ #include <linux/ksm.h> #include <linux/mman.h> -#include <asm/tlb.h> #include <asm/tlbflush.h> #include <asm/mmu_context.h> #include <asm/page-states.h> +#include <asm/machine.h> pgprot_t pgprot_writecombine(pgprot_t prot) { @@ -34,22 +36,12 @@ pgprot_t pgprot_writecombine(pgprot_t prot) } EXPORT_SYMBOL_GPL(pgprot_writecombine); -pgprot_t pgprot_writethrough(pgprot_t prot) -{ - /* - * mio_wb_bit_mask may be set on a different CPU, but it is only set - * once at init and only read afterwards. - */ - return __pgprot(pgprot_val(prot) & ~mio_wb_bit_mask); -} -EXPORT_SYMBOL_GPL(pgprot_writethrough); - static inline void ptep_ipte_local(struct mm_struct *mm, unsigned long addr, pte_t *ptep, int nodat) { unsigned long opt, asce; - if (MACHINE_HAS_TLB_GUEST) { + if (machine_has_tlb_guest()) { opt = 0; asce = READ_ONCE(mm->context.gmap_asce); if (asce == 0UL || nodat) @@ -69,7 +61,7 @@ static inline void ptep_ipte_global(struct mm_struct *mm, unsigned long addr, { unsigned long opt, asce; - if (MACHINE_HAS_TLB_GUEST) { + if (machine_has_tlb_guest()) { opt = 0; asce = READ_ONCE(mm->context.gmap_asce); if (asce == 0UL || nodat) @@ -94,7 +86,7 @@ static inline pte_t ptep_flush_direct(struct mm_struct *mm, if (unlikely(pte_val(old) & _PAGE_INVALID)) return old; atomic_inc(&mm->context.flush_count); - if (MACHINE_HAS_TLB_LC && + if (cpu_has_tlb_lc() && cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id()))) ptep_ipte_local(mm, addr, ptep, nodat); else @@ -173,10 +165,10 @@ static inline pgste_t pgste_update_all(pte_t pte, pgste_t pgste, skey = (unsigned long) page_get_storage_key(address); bits = skey & (_PAGE_CHANGED | _PAGE_REFERENCED); /* Transfer page changed & referenced bit to guest bits in pgste */ - pgste_val(pgste) |= bits << 48; /* GR bit & GC bit */ + pgste = set_pgste_bit(pgste, bits << 48); /* GR bit & GC bit */ /* Copy page access key and fetch protection bit to pgste */ - pgste_val(pgste) &= ~(PGSTE_ACC_BITS | PGSTE_FP_BIT); - pgste_val(pgste) |= (skey & (_PAGE_ACC_BITS | _PAGE_FP_BIT)) << 56; + pgste = clear_pgste_bit(pgste, PGSTE_ACC_BITS | PGSTE_FP_BIT); + pgste = set_pgste_bit(pgste, (skey & (_PAGE_ACC_BITS | _PAGE_FP_BIT)) << 56); #endif return pgste; @@ -210,7 +202,7 @@ static inline pgste_t pgste_set_pte(pte_t *ptep, pgste_t pgste, pte_t entry) if ((pte_val(entry) & _PAGE_PRESENT) && (pte_val(entry) & _PAGE_WRITE) && !(pte_val(entry) & _PAGE_INVALID)) { - if (!MACHINE_HAS_ESOP) { + if (!machine_has_esop()) { /* * Without enhanced suppression-on-protection force * the dirty bit on for all writable ptes. @@ -220,7 +212,7 @@ static inline pgste_t pgste_set_pte(pte_t *ptep, pgste_t pgste, pte_t entry) } if (!(pte_val(entry) & _PAGE_PROTECT)) /* This pte allows write access, set user-dirty */ - pgste_val(pgste) |= PGSTE_UC_BIT; + pgste = set_pgste_bit(pgste, PGSTE_UC_BIT); } #endif set_pte(ptep, entry); @@ -236,7 +228,7 @@ static inline pgste_t pgste_pte_notify(struct mm_struct *mm, bits = pgste_val(pgste) & (PGSTE_IN_BIT | PGSTE_VSIE_BIT); if (bits) { - pgste_val(pgste) ^= bits; + pgste = __pgste(pgste_val(pgste) ^ bits); ptep_notify(mm, addr, ptep, bits); } #endif @@ -360,8 +352,6 @@ void ptep_modify_prot_commit(struct vm_area_struct *vma, unsigned long addr, pgste_t pgste; struct mm_struct *mm = vma->vm_mm; - if (!MACHINE_HAS_NX) - pte = clear_pte_bit(pte, __pgprot(_PAGE_NOEXEC)); if (mm_has_pgste(mm)) { pgste = pgste_get(ptep); pgste_set_key(ptep, pgste, pte, mm); @@ -376,7 +366,7 @@ void ptep_modify_prot_commit(struct vm_area_struct *vma, unsigned long addr, static inline void pmdp_idte_local(struct mm_struct *mm, unsigned long addr, pmd_t *pmdp) { - if (MACHINE_HAS_TLB_GUEST) + if (machine_has_tlb_guest()) __pmdp_idte(addr, pmdp, IDTE_NODAT | IDTE_GUEST_ASCE, mm->context.asce, IDTE_LOCAL); else @@ -388,12 +378,12 @@ static inline void pmdp_idte_local(struct mm_struct *mm, static inline void pmdp_idte_global(struct mm_struct *mm, unsigned long addr, pmd_t *pmdp) { - if (MACHINE_HAS_TLB_GUEST) { + if (machine_has_tlb_guest()) { __pmdp_idte(addr, pmdp, IDTE_NODAT | IDTE_GUEST_ASCE, mm->context.asce, IDTE_GLOBAL); if (mm_has_pgste(mm) && mm->context.allow_gmap_hpage_1m) gmap_pmdp_idte_global(mm, addr); - } else if (MACHINE_HAS_IDTE) { + } else if (cpu_has_idte()) { __pmdp_idte(addr, pmdp, 0, 0, IDTE_GLOBAL); if (mm_has_pgste(mm) && mm->context.allow_gmap_hpage_1m) gmap_pmdp_idte_global(mm, addr); @@ -413,7 +403,7 @@ static inline pmd_t pmdp_flush_direct(struct mm_struct *mm, if (pmd_val(old) & _SEGMENT_ENTRY_INVALID) return old; atomic_inc(&mm->context.flush_count); - if (MACHINE_HAS_TLB_LC && + if (cpu_has_tlb_lc() && cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id()))) pmdp_idte_local(mm, addr, pmdp); else @@ -507,7 +497,7 @@ EXPORT_SYMBOL(pmdp_xchg_lazy); static inline void pudp_idte_local(struct mm_struct *mm, unsigned long addr, pud_t *pudp) { - if (MACHINE_HAS_TLB_GUEST) + if (machine_has_tlb_guest()) __pudp_idte(addr, pudp, IDTE_NODAT | IDTE_GUEST_ASCE, mm->context.asce, IDTE_LOCAL); else @@ -517,10 +507,10 @@ static inline void pudp_idte_local(struct mm_struct *mm, static inline void pudp_idte_global(struct mm_struct *mm, unsigned long addr, pud_t *pudp) { - if (MACHINE_HAS_TLB_GUEST) + if (machine_has_tlb_guest()) __pudp_idte(addr, pudp, IDTE_NODAT | IDTE_GUEST_ASCE, mm->context.asce, IDTE_GLOBAL); - else if (MACHINE_HAS_IDTE) + else if (cpu_has_idte()) __pudp_idte(addr, pudp, 0, 0, IDTE_GLOBAL); else /* @@ -539,7 +529,7 @@ static inline pud_t pudp_flush_direct(struct mm_struct *mm, if (pud_val(old) & _REGION_ENTRY_INVALID) return old; atomic_inc(&mm->context.flush_count); - if (MACHINE_HAS_TLB_LC && + if (cpu_has_tlb_lc() && cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id()))) pudp_idte_local(mm, addr, pudp); else @@ -611,7 +601,7 @@ void ptep_set_pte_at(struct mm_struct *mm, unsigned long addr, /* the mm_has_pgste() check is done in set_pte_at() */ preempt_disable(); pgste = pgste_get_lock(ptep); - pgste_val(pgste) &= ~_PGSTE_GPS_ZERO; + pgste = clear_pgste_bit(pgste, _PGSTE_GPS_ZERO); pgste_set_key(ptep, pgste, entry, mm); pgste = pgste_set_pte(ptep, pgste, entry); pgste_set_unlock(ptep, pgste); @@ -624,7 +614,7 @@ void ptep_set_notify(struct mm_struct *mm, unsigned long addr, pte_t *ptep) preempt_disable(); pgste = pgste_get_lock(ptep); - pgste_val(pgste) |= PGSTE_IN_BIT; + pgste = set_pgste_bit(pgste, PGSTE_IN_BIT); pgste_set_unlock(ptep, pgste); preempt_enable(); } @@ -669,7 +659,7 @@ int ptep_force_prot(struct mm_struct *mm, unsigned long addr, entry = clear_pte_bit(entry, __pgprot(_PAGE_INVALID)); entry = set_pte_bit(entry, __pgprot(_PAGE_PROTECT)); } - pgste_val(pgste) |= bit; + pgste = set_pgste_bit(pgste, bit); pgste = pgste_set_pte(ptep, pgste, entry); pgste_set_unlock(ptep, pgste); return 0; @@ -689,7 +679,7 @@ int ptep_shadow_pte(struct mm_struct *mm, unsigned long saddr, if (!(pte_val(spte) & _PAGE_INVALID) && !((pte_val(spte) & _PAGE_PROTECT) && !(pte_val(pte) & _PAGE_PROTECT))) { - pgste_val(spgste) |= PGSTE_VSIE_BIT; + spgste = set_pgste_bit(spgste, PGSTE_VSIE_BIT); tpgste = pgste_get_lock(tptep); tpte = __pte((pte_val(spte) & PAGE_MASK) | (pte_val(pte) & _PAGE_PROTECT)); @@ -747,7 +737,7 @@ void ptep_zap_unused(struct mm_struct *mm, unsigned long addr, pte_clear(mm, addr, ptep); } if (reset) - pgste_val(pgste) &= ~(_PGSTE_GPS_USAGE_MASK | _PGSTE_GPS_NODAT); + pgste = clear_pgste_bit(pgste, _PGSTE_GPS_USAGE_MASK | _PGSTE_GPS_NODAT); pgste_set_unlock(ptep, pgste); preempt_enable(); } @@ -760,8 +750,8 @@ void ptep_zap_key(struct mm_struct *mm, unsigned long addr, pte_t *ptep) /* Clear storage key ACC and F, but set R/C */ preempt_disable(); pgste = pgste_get_lock(ptep); - pgste_val(pgste) &= ~(PGSTE_ACC_BITS | PGSTE_FP_BIT); - pgste_val(pgste) |= PGSTE_GR_BIT | PGSTE_GC_BIT; + pgste = clear_pgste_bit(pgste, PGSTE_ACC_BITS | PGSTE_FP_BIT); + pgste = set_pgste_bit(pgste, PGSTE_GR_BIT | PGSTE_GC_BIT); ptev = pte_val(*ptep); if (!(ptev & _PAGE_INVALID) && (ptev & _PAGE_WRITE)) page_set_storage_key(ptev & PAGE_MASK, PAGE_DEFAULT_KEY, 0); @@ -782,13 +772,13 @@ bool ptep_test_and_clear_uc(struct mm_struct *mm, unsigned long addr, pgste = pgste_get_lock(ptep); dirty = !!(pgste_val(pgste) & PGSTE_UC_BIT); - pgste_val(pgste) &= ~PGSTE_UC_BIT; + pgste = clear_pgste_bit(pgste, PGSTE_UC_BIT); pte = *ptep; if (dirty && (pte_val(pte) & _PAGE_PRESENT)) { pgste = pgste_pte_notify(mm, addr, ptep, pgste); nodat = !!(pgste_val(pgste) & _PGSTE_GPS_NODAT); ptep_ipte_global(mm, addr, ptep, nodat); - if (MACHINE_HAS_ESOP || !(pte_val(pte) & _PAGE_WRITE)) + if (machine_has_esop() || !(pte_val(pte) & _PAGE_WRITE)) pte = set_pte_bit(pte, __pgprot(_PAGE_PROTECT)); else pte = set_pte_bit(pte, __pgprot(_PAGE_INVALID)); @@ -844,11 +834,11 @@ again: if (!ptep) goto again; new = old = pgste_get_lock(ptep); - pgste_val(new) &= ~(PGSTE_GR_BIT | PGSTE_GC_BIT | - PGSTE_ACC_BITS | PGSTE_FP_BIT); + new = clear_pgste_bit(new, PGSTE_GR_BIT | PGSTE_GC_BIT | + PGSTE_ACC_BITS | PGSTE_FP_BIT); keyul = (unsigned long) key; - pgste_val(new) |= (keyul & (_PAGE_CHANGED | _PAGE_REFERENCED)) << 48; - pgste_val(new) |= (keyul & (_PAGE_ACC_BITS | _PAGE_FP_BIT)) << 56; + new = set_pgste_bit(new, (keyul & (_PAGE_CHANGED | _PAGE_REFERENCED)) << 48); + new = set_pgste_bit(new, (keyul & (_PAGE_ACC_BITS | _PAGE_FP_BIT)) << 56); if (!(pte_val(*ptep) & _PAGE_INVALID)) { unsigned long bits, skey; @@ -859,12 +849,12 @@ again: /* Set storage key ACC and FP */ page_set_storage_key(paddr, skey, !nq); /* Merge host changed & referenced into pgste */ - pgste_val(new) |= bits << 52; + new = set_pgste_bit(new, bits << 52); } /* changing the guest storage key is considered a change of the page */ if ((pgste_val(new) ^ pgste_val(old)) & (PGSTE_ACC_BITS | PGSTE_FP_BIT | PGSTE_GR_BIT | PGSTE_GC_BIT)) - pgste_val(new) |= PGSTE_UC_BIT; + new = set_pgste_bit(new, PGSTE_UC_BIT); pgste_set_unlock(ptep, new); pte_unmap_unlock(ptep, ptl); @@ -952,19 +942,19 @@ again: goto again; new = old = pgste_get_lock(ptep); /* Reset guest reference bit only */ - pgste_val(new) &= ~PGSTE_GR_BIT; + new = clear_pgste_bit(new, PGSTE_GR_BIT); if (!(pte_val(*ptep) & _PAGE_INVALID)) { paddr = pte_val(*ptep) & PAGE_MASK; cc = page_reset_referenced(paddr); /* Merge real referenced bit into host-set */ - pgste_val(new) |= ((unsigned long) cc << 53) & PGSTE_HR_BIT; + new = set_pgste_bit(new, ((unsigned long)cc << 53) & PGSTE_HR_BIT); } /* Reflect guest's logical view, not physical */ cc |= (pgste_val(old) & (PGSTE_GR_BIT | PGSTE_GC_BIT)) >> 49; /* Changing the guest storage key is considered a change of the page */ if ((pgste_val(new) ^ pgste_val(old)) & PGSTE_GR_BIT) - pgste_val(new) |= PGSTE_UC_BIT; + new = set_pgste_bit(new, PGSTE_UC_BIT); pgste_set_unlock(ptep, new); pte_unmap_unlock(ptep, ptl); @@ -1128,7 +1118,7 @@ int pgste_perform_essa(struct mm_struct *mm, unsigned long hva, int orc, if (res) pgstev |= _PGSTE_GPS_ZERO; - pgste_val(pgste) = pgstev; + pgste = __pgste(pgstev); pgste_set_unlock(ptep, pgste); pte_unmap_unlock(ptep, ptl); return res; @@ -1161,8 +1151,8 @@ int set_pgste_bits(struct mm_struct *mm, unsigned long hva, return -EFAULT; new = pgste_get_lock(ptep); - pgste_val(new) &= ~bits; - pgste_val(new) |= value & bits; + new = clear_pgste_bit(new, bits); + new = set_pgste_bit(new, value & bits); pgste_set_unlock(ptep, new); pte_unmap_unlock(ptep, ptl); diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c index 665b8228afeb..f48ef361bc83 100644 --- a/arch/s390/mm/vmem.c +++ b/arch/s390/mm/vmem.c @@ -4,6 +4,7 @@ */ #include <linux/memory_hotplug.h> +#include <linux/cpufeature.h> #include <linux/memblock.h> #include <linux/pfn.h> #include <linux/mm.h> @@ -63,13 +64,12 @@ void *vmem_crst_alloc(unsigned long val) pte_t __ref *vmem_pte_alloc(void) { - unsigned long size = PTRS_PER_PTE * sizeof(pte_t); pte_t *pte; if (slab_is_available()) - pte = (pte_t *) page_table_alloc(&init_mm); + pte = (pte_t *)page_table_alloc(&init_mm); else - pte = (pte_t *) memblock_alloc(size, size); + pte = (pte_t *)memblock_alloc(PAGE_SIZE, PAGE_SIZE); if (!pte) return NULL; memset64((u64 *)pte, _PAGE_INVALID, PTRS_PER_PTE); @@ -171,9 +171,6 @@ static int __ref modify_pte_table(pmd_t *pmd, unsigned long addr, pte_t *pte; prot = pgprot_val(PAGE_KERNEL); - if (!MACHINE_HAS_NX) - prot &= ~_PAGE_NOEXEC; - pte = pte_offset_kernel(pmd, addr); for (; addr < end; addr += PAGE_SIZE, pte++) { if (!add) { @@ -230,9 +227,6 @@ static int __ref modify_pmd_table(pud_t *pud, unsigned long addr, pte_t *pte; prot = pgprot_val(SEGMENT_KERNEL); - if (!MACHINE_HAS_NX) - prot &= ~_SEGMENT_ENTRY_NOEXEC; - pmd = pmd_offset(pud, addr); for (; addr < end; addr = next, pmd++) { next = pmd_addr_end(addr, end); @@ -255,12 +249,12 @@ static int __ref modify_pmd_table(pud_t *pud, unsigned long addr, } else if (pmd_none(*pmd)) { if (IS_ALIGNED(addr, PMD_SIZE) && IS_ALIGNED(next, PMD_SIZE) && - MACHINE_HAS_EDAT1 && direct && + cpu_has_edat1() && direct && !debug_pagealloc_enabled()) { set_pmd(pmd, __pmd(__pa(addr) | prot)); pages++; continue; - } else if (!direct && MACHINE_HAS_EDAT1) { + } else if (!direct && cpu_has_edat1()) { void *new_page; /* @@ -324,8 +318,6 @@ static int modify_pud_table(p4d_t *p4d, unsigned long addr, unsigned long end, pmd_t *pmd; prot = pgprot_val(REGION3_KERNEL); - if (!MACHINE_HAS_NX) - prot &= ~_REGION_ENTRY_NOEXEC; pud = pud_offset(p4d, addr); for (; addr < end; addr = next, pud++) { next = pud_addr_end(addr, end); @@ -343,7 +335,7 @@ static int modify_pud_table(p4d_t *p4d, unsigned long addr, unsigned long end, } else if (pud_none(*pud)) { if (IS_ALIGNED(addr, PUD_SIZE) && IS_ALIGNED(next, PUD_SIZE) && - MACHINE_HAS_EDAT2 && direct && + cpu_has_edat2() && direct && !debug_pagealloc_enabled()) { set_pud(pud, __pud(__pa(addr) | prot)); pages++; @@ -667,10 +659,10 @@ void __init vmem_map_init(void) * prefix page is used to return to the previous context with * an LPSWE instruction and therefore must be executable. */ - if (!static_key_enabled(&cpu_has_bear)) + if (!cpu_has_bear()) set_memory_x(0, 1); if (debug_pagealloc_enabled()) - __set_memory_4k(__va(0), __va(0) + ident_map_size); + __set_memory_4k(__va(0), absolute_pointer(__va(0)) + ident_map_size); pr_info("Write protected kernel read-only data: %luk\n", (unsigned long)(__end_rodata - _stext) >> 10); } diff --git a/arch/s390/net/bpf_jit.h b/arch/s390/net/bpf_jit.h deleted file mode 100644 index 7822ea92e54a..000000000000 --- a/arch/s390/net/bpf_jit.h +++ /dev/null @@ -1,55 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * BPF Jit compiler defines - * - * Copyright IBM Corp. 2012,2015 - * - * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com> - * Michael Holzheu <holzheu@linux.vnet.ibm.com> - */ - -#ifndef __ARCH_S390_NET_BPF_JIT_H -#define __ARCH_S390_NET_BPF_JIT_H - -#ifndef __ASSEMBLY__ - -#include <linux/filter.h> -#include <linux/types.h> - -#endif /* __ASSEMBLY__ */ - -/* - * Stackframe layout (packed stack): - * - * ^ high - * +---------------+ | - * | old backchain | | - * +---------------+ | - * | r15 - r6 | | - * +---------------+ | - * | 4 byte align | | - * | tail_call_cnt | | - * BFP -> +===============+ | - * | | | - * | BPF stack | | - * | | | - * R15+160 -> +---------------+ | - * | new backchain | | - * R15+152 -> +---------------+ | - * | + 152 byte SA | | - * R15 -> +---------------+ + low - * - * We get 160 bytes stack space from calling function, but only use - * 12 * 8 byte for old backchain, r15..r6, and tail_call_cnt. - * - * The stack size used by the BPF program ("BPF stack" above) is passed - * via "aux->stack_depth". - */ -#define STK_SPACE_ADD (160) -#define STK_160_UNUSED (160 - 12 * 8) -#define STK_OFF (STK_SPACE_ADD - STK_160_UNUSED) - -#define STK_OFF_R6 (160 - 11 * 8) /* Offset of r6 on stack */ -#define STK_OFF_TCCNT (160 - 12 * 8) /* Offset of tail_call_cnt on stack */ - -#endif /* __ARCH_S390_NET_BPF_JIT_H */ diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c index 9d440a0b729e..bb17efe29d65 100644 --- a/arch/s390/net/bpf_jit_comp.c +++ b/arch/s390/net/bpf_jit_comp.c @@ -32,7 +32,6 @@ #include <asm/set_memory.h> #include <asm/text-patching.h> #include <asm/unwind.h> -#include "bpf_jit.h" struct bpf_jit { u32 seen; /* Flags to remember seen eBPF instructions */ @@ -48,14 +47,13 @@ struct bpf_jit { int lit64; /* Current position in 64-bit literal pool */ int base_ip; /* Base address for literal pool */ int exit_ip; /* Address of exit */ - int r1_thunk_ip; /* Address of expoline thunk for 'br %r1' */ - int r14_thunk_ip; /* Address of expoline thunk for 'br %r14' */ int tail_call_start; /* Tail call start offset */ int excnt; /* Number of exception table entries */ int prologue_plt_ret; /* Return address for prologue hotpatch PLT */ int prologue_plt; /* Start of prologue hotpatch PLT */ int kern_arena; /* Pool offset of kernel arena address */ u64 user_arena; /* User arena address */ + u32 frame_off; /* Offset of struct bpf_prog from %r15 */ }; #define SEEN_MEM BIT(0) /* use mem[] for temporary storage */ @@ -127,6 +125,18 @@ static inline void reg_set_seen(struct bpf_jit *jit, u32 b1) jit->seen_regs |= (1 << r1); } +static s32 off_to_pcrel(struct bpf_jit *jit, u32 off) +{ + return off - jit->prg; +} + +static s64 ptr_to_pcrel(struct bpf_jit *jit, const void *ptr) +{ + if (jit->prg_buf) + return (const u8 *)ptr - ((const u8 *)jit->prg_buf + jit->prg); + return 0; +} + #define REG_SET_SEEN(b1) \ ({ \ reg_set_seen(jit, b1); \ @@ -201,7 +211,7 @@ static inline void reg_set_seen(struct bpf_jit *jit, u32 b1) #define EMIT4_PCREL_RIC(op, mask, target) \ ({ \ - int __rel = ((target) - jit->prg) / 2; \ + int __rel = off_to_pcrel(jit, target) / 2; \ _EMIT4((op) | (mask) << 20 | (__rel & 0xffff)); \ }) @@ -239,7 +249,7 @@ static inline void reg_set_seen(struct bpf_jit *jit, u32 b1) #define EMIT6_PCREL_RIEB(op1, op2, b1, b2, mask, target) \ ({ \ - unsigned int rel = (int)((target) - jit->prg) / 2; \ + unsigned int rel = off_to_pcrel(jit, target) / 2; \ _EMIT6((op1) | reg(b1, b2) << 16 | (rel & 0xffff), \ (op2) | (mask) << 12); \ REG_SET_SEEN(b1); \ @@ -248,7 +258,7 @@ static inline void reg_set_seen(struct bpf_jit *jit, u32 b1) #define EMIT6_PCREL_RIEC(op1, op2, b1, imm, mask, target) \ ({ \ - unsigned int rel = (int)((target) - jit->prg) / 2; \ + unsigned int rel = off_to_pcrel(jit, target) / 2; \ _EMIT6((op1) | (reg_high(b1) | (mask)) << 16 | \ (rel & 0xffff), (op2) | ((imm) & 0xff) << 8); \ REG_SET_SEEN(b1); \ @@ -257,29 +267,41 @@ static inline void reg_set_seen(struct bpf_jit *jit, u32 b1) #define EMIT6_PCREL(op1, op2, b1, b2, i, off, mask) \ ({ \ - int rel = (addrs[(i) + (off) + 1] - jit->prg) / 2; \ + int rel = off_to_pcrel(jit, addrs[(i) + (off) + 1]) / 2;\ _EMIT6((op1) | reg(b1, b2) << 16 | (rel & 0xffff), (op2) | (mask));\ REG_SET_SEEN(b1); \ REG_SET_SEEN(b2); \ }) +static void emit6_pcrel_ril(struct bpf_jit *jit, u32 op, s64 pcrel) +{ + u32 pc32dbl = (s32)(pcrel / 2); + + _EMIT6(op | pc32dbl >> 16, pc32dbl & 0xffff); +} + +static void emit6_pcrel_rilb(struct bpf_jit *jit, u32 op, u8 b, s64 pcrel) +{ + emit6_pcrel_ril(jit, op | reg_high(b) << 16, pcrel); + REG_SET_SEEN(b); +} + #define EMIT6_PCREL_RILB(op, b, target) \ -({ \ - unsigned int rel = (int)((target) - jit->prg) / 2; \ - _EMIT6((op) | reg_high(b) << 16 | rel >> 16, rel & 0xffff);\ - REG_SET_SEEN(b); \ -}) + emit6_pcrel_rilb(jit, op, b, off_to_pcrel(jit, target)) -#define EMIT6_PCREL_RIL(op, target) \ -({ \ - unsigned int rel = (int)((target) - jit->prg) / 2; \ - _EMIT6((op) | rel >> 16, rel & 0xffff); \ -}) +#define EMIT6_PCREL_RILB_PTR(op, b, target_ptr) \ + emit6_pcrel_rilb(jit, op, b, ptr_to_pcrel(jit, target_ptr)) + +static void emit6_pcrel_rilc(struct bpf_jit *jit, u32 op, u8 mask, s64 pcrel) +{ + emit6_pcrel_ril(jit, op | mask << 20, pcrel); +} #define EMIT6_PCREL_RILC(op, mask, target) \ -({ \ - EMIT6_PCREL_RIL((op) | (mask) << 20, (target)); \ -}) + emit6_pcrel_rilc(jit, op, mask, off_to_pcrel(jit, target)) + +#define EMIT6_PCREL_RILC_PTR(op, mask, target_ptr) \ + emit6_pcrel_rilc(jit, op, mask, ptr_to_pcrel(jit, target_ptr)) #define _EMIT6_IMM(op, imm) \ ({ \ @@ -404,11 +426,25 @@ static void jit_fill_hole(void *area, unsigned int size) } /* + * Caller-allocated part of the frame. + * Thanks to packed stack, its otherwise unused initial part can be used for + * the BPF stack and for the next frame. + */ +struct prog_frame { + u64 unused[8]; + /* BPF stack starts here and grows towards 0 */ + u32 tail_call_cnt; + u32 pad; + u64 r6[10]; /* r6 - r15 */ + u64 backchain; +} __packed; + +/* * Save registers from "rs" (register start) to "re" (register end) on stack */ static void save_regs(struct bpf_jit *jit, u32 rs, u32 re) { - u32 off = STK_OFF_R6 + (rs - 6) * 8; + u32 off = offsetof(struct prog_frame, r6) + (rs - 6) * 8; if (rs == re) /* stg %rs,off(%r15) */ @@ -421,12 +457,9 @@ static void save_regs(struct bpf_jit *jit, u32 rs, u32 re) /* * Restore registers from "rs" (register start) to "re" (register end) on stack */ -static void restore_regs(struct bpf_jit *jit, u32 rs, u32 re, u32 stack_depth) +static void restore_regs(struct bpf_jit *jit, u32 rs, u32 re) { - u32 off = STK_OFF_R6 + (rs - 6) * 8; - - if (jit->seen & SEEN_STACK) - off += STK_OFF + stack_depth; + u32 off = jit->frame_off + offsetof(struct prog_frame, r6) + (rs - 6) * 8; if (rs == re) /* lg %rs,off(%r15) */ @@ -470,8 +503,7 @@ static int get_end(u16 seen_regs, int start) * Save and restore clobbered registers (6-15) on stack. * We save/restore registers in chunks with gap >= 2 registers. */ -static void save_restore_regs(struct bpf_jit *jit, int op, u32 stack_depth, - u16 extra_regs) +static void save_restore_regs(struct bpf_jit *jit, int op, u16 extra_regs) { u16 seen_regs = jit->seen_regs | extra_regs; const int last = 15, save_restore_size = 6; @@ -494,7 +526,7 @@ static void save_restore_regs(struct bpf_jit *jit, int op, u32 stack_depth, if (op == REGS_SAVE) save_regs(jit, rs, re); else - restore_regs(jit, rs, re, stack_depth); + restore_regs(jit, rs, re); re++; } while (re <= last); } @@ -503,7 +535,7 @@ static void bpf_skip(struct bpf_jit *jit, int size) { if (size >= 6 && !is_valid_rel(size)) { /* brcl 0xf,size */ - EMIT6_PCREL_RIL(0xc0f4000000, size); + EMIT6_PCREL_RILC(0xc0040000, 0xf, size); size -= 6; } else if (size >= 4 && is_valid_rel(size)) { /* brc 0xf,size */ @@ -544,18 +576,27 @@ static void bpf_jit_plt(struct bpf_plt *plt, void *ret, void *target) { memcpy(plt, &bpf_plt, sizeof(*plt)); plt->ret = ret; - plt->target = target; + /* + * (target == NULL) implies that the branch to this PLT entry was + * patched and became a no-op. However, some CPU could have jumped + * to this PLT entry before patching and may be still executing it. + * + * Since the intention in this case is to make the PLT entry a no-op, + * make the target point to the return label instead of NULL. + */ + plt->target = target ?: ret; } /* * Emit function prologue * * Save registers and create stack frame if necessary. - * See stack frame layout description in "bpf_jit.h"! + * Stack frame layout is described by struct prog_frame. */ -static void bpf_jit_prologue(struct bpf_jit *jit, struct bpf_prog *fp, - u32 stack_depth) +static void bpf_jit_prologue(struct bpf_jit *jit, struct bpf_prog *fp) { + BUILD_BUG_ON(sizeof(struct prog_frame) != STACK_FRAME_OVERHEAD); + /* No-op for hotpatching */ /* brcl 0,prologue_plt */ EMIT6_PCREL_RILC(0xc0040000, 0, jit->prologue_plt); @@ -563,8 +604,9 @@ static void bpf_jit_prologue(struct bpf_jit *jit, struct bpf_prog *fp, if (!bpf_is_subprog(fp)) { /* Initialize the tail call counter in the main program. */ - /* xc STK_OFF_TCCNT(4,%r15),STK_OFF_TCCNT(%r15) */ - _EMIT6(0xd703f000 | STK_OFF_TCCNT, 0xf000 | STK_OFF_TCCNT); + /* xc tail_call_cnt(4,%r15),tail_call_cnt(%r15) */ + _EMIT6(0xd703f000 | offsetof(struct prog_frame, tail_call_cnt), + 0xf000 | offsetof(struct prog_frame, tail_call_cnt)); } else { /* * Skip the tail call counter initialization in subprograms. @@ -587,7 +629,7 @@ static void bpf_jit_prologue(struct bpf_jit *jit, struct bpf_prog *fp, jit->seen_regs |= NVREGS; } else { /* Save registers */ - save_restore_regs(jit, REGS_SAVE, stack_depth, + save_restore_regs(jit, REGS_SAVE, fp->aux->exception_boundary ? NVREGS : 0); } /* Setup literal pool */ @@ -605,43 +647,32 @@ static void bpf_jit_prologue(struct bpf_jit *jit, struct bpf_prog *fp, } /* Setup stack and backchain */ if (is_first_pass(jit) || (jit->seen & SEEN_STACK)) { - if (is_first_pass(jit) || (jit->seen & SEEN_FUNC)) - /* lgr %w1,%r15 (backchain) */ - EMIT4(0xb9040000, REG_W1, REG_15); - /* la %bfp,STK_160_UNUSED(%r15) (BPF frame pointer) */ - EMIT4_DISP(0x41000000, BPF_REG_FP, REG_15, STK_160_UNUSED); - /* aghi %r15,-STK_OFF */ - EMIT4_IMM(0xa70b0000, REG_15, -(STK_OFF + stack_depth)); - if (is_first_pass(jit) || (jit->seen & SEEN_FUNC)) - /* stg %w1,152(%r15) (backchain) */ - EMIT6_DISP_LH(0xe3000000, 0x0024, REG_W1, REG_0, - REG_15, 152); + /* lgr %w1,%r15 (backchain) */ + EMIT4(0xb9040000, REG_W1, REG_15); + /* la %bfp,unused_end(%r15) (BPF frame pointer) */ + EMIT4_DISP(0x41000000, BPF_REG_FP, REG_15, + offsetofend(struct prog_frame, unused)); + /* aghi %r15,-frame_off */ + EMIT4_IMM(0xa70b0000, REG_15, -jit->frame_off); + /* stg %w1,backchain(%r15) */ + EMIT6_DISP_LH(0xe3000000, 0x0024, REG_W1, REG_0, + REG_15, + offsetof(struct prog_frame, backchain)); } } /* - * Emit an expoline for a jump that follows - */ -static void emit_expoline(struct bpf_jit *jit) -{ - /* exrl %r0,.+10 */ - EMIT6_PCREL_RIL(0xc6000000, jit->prg + 10); - /* j . */ - EMIT4_PCREL(0xa7f40000, 0); -} - -/* - * Emit __s390_indirect_jump_r1 thunk if necessary + * Jump using a register either directly or via an expoline thunk */ -static void emit_r1_thunk(struct bpf_jit *jit) -{ - if (nospec_uses_trampoline()) { - jit->r1_thunk_ip = jit->prg; - emit_expoline(jit); - /* br %r1 */ - _EMIT2(0x07f1); - } -} +#define EMIT_JUMP_REG(reg) do { \ + if (nospec_uses_trampoline()) \ + /* brcl 0xf,__s390_indirect_jump_rN */ \ + EMIT6_PCREL_RILC_PTR(0xc0040000, 0x0f, \ + __s390_indirect_jump_r ## reg); \ + else \ + /* br %rN */ \ + _EMIT2(0x07f0 | reg); \ +} while (0) /* * Call r1 either directly or via __s390_indirect_jump_r1 thunk @@ -650,7 +681,8 @@ static void call_r1(struct bpf_jit *jit) { if (nospec_uses_trampoline()) /* brasl %r14,__s390_indirect_jump_r1 */ - EMIT6_PCREL_RILB(0xc0050000, REG_14, jit->r1_thunk_ip); + EMIT6_PCREL_RILB_PTR(0xc0050000, REG_14, + __s390_indirect_jump_r1); else /* basr %r14,%r1 */ EMIT2(0x0d00, REG_14, REG_1); @@ -659,23 +691,14 @@ static void call_r1(struct bpf_jit *jit) /* * Function epilogue */ -static void bpf_jit_epilogue(struct bpf_jit *jit, u32 stack_depth) +static void bpf_jit_epilogue(struct bpf_jit *jit) { jit->exit_ip = jit->prg; /* Load exit code: lgr %r2,%b0 */ EMIT4(0xb9040000, REG_2, BPF_REG_0); /* Restore registers */ - save_restore_regs(jit, REGS_RESTORE, stack_depth, 0); - if (nospec_uses_trampoline()) { - jit->r14_thunk_ip = jit->prg; - /* Generate __s390_indirect_jump_r14 thunk */ - emit_expoline(jit); - } - /* br %r14 */ - _EMIT2(0x07fe); - - if (is_first_pass(jit) || (jit->seen & SEEN_FUNC)) - emit_r1_thunk(jit); + save_restore_regs(jit, REGS_RESTORE, 0); + EMIT_JUMP_REG(14); jit->prg = ALIGN(jit->prg, 8); jit->prologue_plt = jit->prg; @@ -856,7 +879,7 @@ static int sign_extend(struct bpf_jit *jit, int r, u8 size, u8 flags) * stack space for the large switch statement. */ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, - int i, bool extra_pass, u32 stack_depth) + int i, bool extra_pass) { struct bpf_insn *insn = &fp->insnsi[i]; s32 branch_oc_off = insn->off; @@ -1777,9 +1800,10 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, * Note 2: We assume that the verifier does not let us call the * main program, which clears the tail call counter on entry. */ - /* mvc STK_OFF_TCCNT(4,%r15),N(%r15) */ - _EMIT6(0xd203f000 | STK_OFF_TCCNT, - 0xf000 | (STK_OFF_TCCNT + STK_OFF + stack_depth)); + /* mvc tail_call_cnt(4,%r15),frame_off+tail_call_cnt(%r15) */ + _EMIT6(0xd203f000 | offsetof(struct prog_frame, tail_call_cnt), + 0xf000 | (jit->frame_off + + offsetof(struct prog_frame, tail_call_cnt))); /* Sign-extend the kfunc arguments. */ if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL) { @@ -1830,10 +1854,8 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, * goto out; */ - if (jit->seen & SEEN_STACK) - off = STK_OFF_TCCNT + STK_OFF + stack_depth; - else - off = STK_OFF_TCCNT; + off = jit->frame_off + + offsetof(struct prog_frame, tail_call_cnt); /* lhi %w0,1 */ EMIT4_IMM(0xa7080000, REG_W0, 1); /* laal %w1,%w0,off(%r15) */ @@ -1863,7 +1885,7 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, /* * Restore registers before calling function */ - save_restore_regs(jit, REGS_RESTORE, stack_depth, 0); + save_restore_regs(jit, REGS_RESTORE, 0); /* * goto *(prog->bpf_func + tail_call_start); @@ -1877,7 +1899,8 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, /* aghi %r1,tail_call_start */ EMIT4_IMM(0xa70b0000, REG_1, jit->tail_call_start); /* brcl 0xf,__s390_indirect_jump_r1 */ - EMIT6_PCREL_RILC(0xc0040000, 0xf, jit->r1_thunk_ip); + EMIT6_PCREL_RILC_PTR(0xc0040000, 0xf, + __s390_indirect_jump_r1); } else { /* bc 0xf,tail_call_start(%r1) */ _EMIT4(0x47f01000 + jit->tail_call_start); @@ -2155,7 +2178,7 @@ static int bpf_set_addr(struct bpf_jit *jit, int i) * Compile eBPF program into s390x code */ static int bpf_jit_prog(struct bpf_jit *jit, struct bpf_prog *fp, - bool extra_pass, u32 stack_depth) + bool extra_pass) { int i, insn_count, lit32_size, lit64_size; u64 kern_arena; @@ -2164,24 +2187,30 @@ static int bpf_jit_prog(struct bpf_jit *jit, struct bpf_prog *fp, jit->lit64 = jit->lit64_start; jit->prg = 0; jit->excnt = 0; + if (is_first_pass(jit) || (jit->seen & SEEN_STACK)) + jit->frame_off = sizeof(struct prog_frame) - + offsetofend(struct prog_frame, unused) + + round_up(fp->aux->stack_depth, 8); + else + jit->frame_off = 0; kern_arena = bpf_arena_get_kern_vm_start(fp->aux->arena); if (kern_arena) jit->kern_arena = _EMIT_CONST_U64(kern_arena); jit->user_arena = bpf_arena_get_user_vm_start(fp->aux->arena); - bpf_jit_prologue(jit, fp, stack_depth); + bpf_jit_prologue(jit, fp); if (bpf_set_addr(jit, 0) < 0) return -1; for (i = 0; i < fp->len; i += insn_count) { - insn_count = bpf_jit_insn(jit, fp, i, extra_pass, stack_depth); + insn_count = bpf_jit_insn(jit, fp, i, extra_pass); if (insn_count < 0) return -1; /* Next instruction address */ if (bpf_set_addr(jit, i + insn_count) < 0) return -1; } - bpf_jit_epilogue(jit, stack_depth); + bpf_jit_epilogue(jit); lit32_size = jit->lit32 - jit->lit32_start; lit64_size = jit->lit64 - jit->lit64_start; @@ -2257,7 +2286,6 @@ static struct bpf_binary_header *bpf_jit_alloc(struct bpf_jit *jit, */ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp) { - u32 stack_depth = round_up(fp->aux->stack_depth, 8); struct bpf_prog *tmp, *orig_fp = fp; struct bpf_binary_header *header; struct s390_jit_data *jit_data; @@ -2310,7 +2338,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp) * - 3: Calculate program size and addrs array */ for (pass = 1; pass <= 3; pass++) { - if (bpf_jit_prog(&jit, fp, extra_pass, stack_depth)) { + if (bpf_jit_prog(&jit, fp, extra_pass)) { fp = orig_fp; goto free_addrs; } @@ -2324,7 +2352,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp) goto free_addrs; } skip_init_ctx: - if (bpf_jit_prog(&jit, fp, extra_pass, stack_depth)) { + if (bpf_jit_prog(&jit, fp, extra_pass)) { bpf_jit_binary_free(header); fp = orig_fp; goto free_addrs; @@ -2585,9 +2613,8 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, if (nr_stack_args > MAX_NR_STACK_ARGS) return -ENOTSUPP; - /* Return to %r14, since func_addr and %r0 are not available. */ - if ((!func_addr && !(flags & BPF_TRAMP_F_ORIG_STACK)) || - (flags & BPF_TRAMP_F_INDIRECT)) + /* Return to %r14 in the struct_ops case. */ + if (flags & BPF_TRAMP_F_INDIRECT) flags |= BPF_TRAMP_F_SKIP_FRAME; /* @@ -2645,9 +2672,10 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, /* stg %r1,backchain_off(%r15) */ EMIT6_DISP_LH(0xe3000000, 0x0024, REG_1, REG_0, REG_15, tjit->backchain_off); - /* mvc tccnt_off(4,%r15),stack_size+STK_OFF_TCCNT(%r15) */ + /* mvc tccnt_off(4,%r15),stack_size+tail_call_cnt(%r15) */ _EMIT6(0xd203f000 | tjit->tccnt_off, - 0xf000 | (tjit->stack_size + STK_OFF_TCCNT)); + 0xf000 | (tjit->stack_size + + offsetof(struct prog_frame, tail_call_cnt))); /* stmg %r2,%rN,fwd_reg_args_off(%r15) */ if (nr_reg_args) EMIT6_DISP_LH(0xeb000000, 0x0024, REG_2, @@ -2784,8 +2812,9 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, (nr_stack_args * sizeof(u64) - 1) << 16 | tjit->stack_args_off, 0xf000 | tjit->orig_stack_args_off); - /* mvc STK_OFF_TCCNT(4,%r15),tccnt_off(%r15) */ - _EMIT6(0xd203f000 | STK_OFF_TCCNT, 0xf000 | tjit->tccnt_off); + /* mvc tail_call_cnt(4,%r15),tccnt_off(%r15) */ + _EMIT6(0xd203f000 | offsetof(struct prog_frame, tail_call_cnt), + 0xf000 | tjit->tccnt_off); /* lgr %r1,%r8 */ EMIT4(0xb9040000, REG_1, REG_8); /* %r1() */ @@ -2842,22 +2871,16 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, if (flags & (BPF_TRAMP_F_CALL_ORIG | BPF_TRAMP_F_RET_FENTRY_RET)) EMIT6_DISP_LH(0xe3000000, 0x0004, REG_2, REG_0, REG_15, tjit->retval_off); - /* mvc stack_size+STK_OFF_TCCNT(4,%r15),tccnt_off(%r15) */ - _EMIT6(0xd203f000 | (tjit->stack_size + STK_OFF_TCCNT), + /* mvc stack_size+tail_call_cnt(4,%r15),tccnt_off(%r15) */ + _EMIT6(0xd203f000 | (tjit->stack_size + + offsetof(struct prog_frame, tail_call_cnt)), 0xf000 | tjit->tccnt_off); /* aghi %r15,stack_size */ EMIT4_IMM(0xa70b0000, REG_15, tjit->stack_size); - /* Emit an expoline for the following indirect jump. */ - if (nospec_uses_trampoline()) - emit_expoline(jit); if (flags & BPF_TRAMP_F_SKIP_FRAME) - /* br %r14 */ - _EMIT2(0x07fe); + EMIT_JUMP_REG(14); else - /* br %r1 */ - _EMIT2(0x07f1); - - emit_r1_thunk(jit); + EMIT_JUMP_REG(1); return 0; } @@ -2919,10 +2942,16 @@ bool bpf_jit_supports_arena(void) bool bpf_jit_supports_insn(struct bpf_insn *insn, bool in_arena) { - /* - * Currently the verifier uses this function only to check which - * atomic stores to arena are supported, and they all are. - */ + if (!in_arena) + return true; + switch (insn->code) { + case BPF_STX | BPF_ATOMIC | BPF_B: + case BPF_STX | BPF_ATOMIC | BPF_H: + case BPF_STX | BPF_ATOMIC | BPF_W: + case BPF_STX | BPF_ATOMIC | BPF_DW: + if (bpf_atomic_is_load_store(insn)) + return false; + } return true; } diff --git a/arch/s390/net/pnet.c b/arch/s390/net/pnet.c index 79211bec0fc8..03089ef479b2 100644 --- a/arch/s390/net/pnet.c +++ b/arch/s390/net/pnet.c @@ -6,6 +6,7 @@ */ #include <linux/device.h> +#include <linux/export.h> #include <linux/module.h> #include <linux/pci.h> #include <linux/types.h> diff --git a/arch/s390/pci/Makefile b/arch/s390/pci/Makefile index 2c21f0394c9a..1810e0944a4e 100644 --- a/arch/s390/pci/Makefile +++ b/arch/s390/pci/Makefile @@ -5,6 +5,6 @@ obj-$(CONFIG_PCI) += pci.o pci_irq.o pci_clp.o \ pci_event.o pci_debug.o pci_insn.o pci_mmio.o \ - pci_bus.o pci_kvm_hook.o + pci_bus.o pci_kvm_hook.o pci_report.o pci_fixup.o obj-$(CONFIG_PCI_IOV) += pci_iov.o obj-$(CONFIG_SYSFS) += pci_sysfs.o diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c index 88f72745fa59..cd6676c2d602 100644 --- a/arch/s390/pci/pci.c +++ b/arch/s390/pci/pci.c @@ -31,6 +31,7 @@ #include <linux/lockdep.h> #include <linux/list_sort.h> +#include <asm/machine.h> #include <asm/isc.h> #include <asm/airq.h> #include <asm/facility.h> @@ -44,6 +45,7 @@ /* list of all detected zpci devices */ static LIST_HEAD(zpci_list); static DEFINE_SPINLOCK(zpci_list_lock); +static DEFINE_MUTEX(zpci_add_remove_lock); static DECLARE_BITMAP(zpci_domain, ZPCI_DOMAIN_BITMAP_SIZE); static DEFINE_SPINLOCK(zpci_domain_lock); @@ -69,6 +71,15 @@ EXPORT_SYMBOL_GPL(zpci_aipb); struct airq_iv *zpci_aif_sbv; EXPORT_SYMBOL_GPL(zpci_aif_sbv); +void zpci_zdev_put(struct zpci_dev *zdev) +{ + if (!zdev) + return; + mutex_lock(&zpci_add_remove_lock); + kref_put_lock(&zdev->kref, zpci_release_device, &zpci_list_lock); + mutex_unlock(&zpci_add_remove_lock); +} + struct zpci_dev *get_zdev_by_fid(u32 fid) { struct zpci_dev *tmp, *zdev = NULL; @@ -124,14 +135,13 @@ int zpci_register_ioat(struct zpci_dev *zdev, u8 dmaas, struct zpci_fib fib = {0}; u8 cc; - WARN_ON_ONCE(iota & 0x3fff); fib.pba = base; /* Work around off by one in ISM virt device */ if (zdev->pft == PCI_FUNC_TYPE_ISM && limit > base) fib.pal = limit + (1 << 12); else fib.pal = limit; - fib.iota = iota | ZPCI_IOTA_RTTO_FLAG; + fib.iota = iota; fib.gd = zdev->gisa; cc = zpci_mod_fc(req, &fib, status); if (cc) @@ -255,7 +265,7 @@ resource_size_t pcibios_align_resource(void *data, const struct resource *res, } void __iomem *ioremap_prot(phys_addr_t phys_addr, size_t size, - unsigned long prot) + pgprot_t prot) { /* * When PCI MIO instructions are unavailable the "physical" address @@ -265,7 +275,7 @@ void __iomem *ioremap_prot(phys_addr_t phys_addr, size_t size, if (!static_branch_unlikely(&have_mio)) return (void __iomem *)phys_addr; - return generic_ioremap_prot(phys_addr, size, __pgprot(prot)); + return generic_ioremap_prot(phys_addr, size, prot); } EXPORT_SYMBOL(ioremap_prot); @@ -690,6 +700,23 @@ int zpci_enable_device(struct zpci_dev *zdev) } EXPORT_SYMBOL_GPL(zpci_enable_device); +int zpci_reenable_device(struct zpci_dev *zdev) +{ + u8 status; + int rc; + + rc = zpci_enable_device(zdev); + if (rc) + return rc; + + rc = zpci_iommu_register_ioat(zdev, &status); + if (rc) + zpci_disable_device(zdev); + + return rc; +} +EXPORT_SYMBOL_GPL(zpci_reenable_device); + int zpci_disable_device(struct zpci_dev *zdev) { u32 fh = zdev->fh; @@ -739,7 +766,6 @@ EXPORT_SYMBOL_GPL(zpci_disable_device); */ int zpci_hot_reset_device(struct zpci_dev *zdev) { - u8 status; int rc; lockdep_assert_held(&zdev->state_lock); @@ -758,19 +784,9 @@ int zpci_hot_reset_device(struct zpci_dev *zdev) return rc; } - rc = zpci_enable_device(zdev); - if (rc) - return rc; + rc = zpci_reenable_device(zdev); - if (zdev->dma_table) - rc = zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma, - virt_to_phys(zdev->dma_table), &status); - if (rc) { - zpci_disable_device(zdev); - return rc; - } - - return 0; + return rc; } /** @@ -831,6 +847,7 @@ int zpci_add_device(struct zpci_dev *zdev) { int rc; + mutex_lock(&zpci_add_remove_lock); zpci_dbg(1, "add fid:%x, fh:%x, c:%d\n", zdev->fid, zdev->fh, zdev->state); rc = zpci_init_iommu(zdev); if (rc) @@ -844,12 +861,14 @@ int zpci_add_device(struct zpci_dev *zdev) spin_lock(&zpci_list_lock); list_add_tail(&zdev->entry, &zpci_list); spin_unlock(&zpci_list_lock); + mutex_unlock(&zpci_add_remove_lock); return 0; error_destroy_iommu: zpci_destroy_iommu(zdev); error: zpci_dbg(0, "add fid:%x, rc:%d\n", zdev->fid, rc); + mutex_unlock(&zpci_add_remove_lock); return rc; } @@ -919,21 +938,20 @@ int zpci_deconfigure_device(struct zpci_dev *zdev) * @zdev: the zpci_dev that was reserved * * Handle the case that a given zPCI function was reserved by another system. - * After a call to this function the zpci_dev can not be found via - * get_zdev_by_fid() anymore but may still be accessible via existing - * references though it will not be functional anymore. */ void zpci_device_reserved(struct zpci_dev *zdev) { - /* - * Remove device from zpci_list as it is going away. This also - * makes sure we ignore subsequent zPCI events for this device. - */ - spin_lock(&zpci_list_lock); - list_del(&zdev->entry); - spin_unlock(&zpci_list_lock); + lockdep_assert_held(&zdev->state_lock); + /* We may declare the device reserved multiple times */ + if (zdev->state == ZPCI_FN_STATE_RESERVED) + return; zdev->state = ZPCI_FN_STATE_RESERVED; zpci_dbg(3, "rsv fid:%x\n", zdev->fid); + /* + * The underlying device is gone. Allow the zdev to be freed + * as soon as all other references are gone by accounting for + * the removal as a dropped reference. + */ zpci_zdev_put(zdev); } @@ -941,13 +959,14 @@ void zpci_release_device(struct kref *kref) { struct zpci_dev *zdev = container_of(kref, struct zpci_dev, kref); + lockdep_assert_held(&zpci_add_remove_lock); WARN_ON(zdev->state != ZPCI_FN_STATE_RESERVED); - - if (zdev->zbus->bus) - zpci_bus_remove_device(zdev, false); - - if (zdev_enabled(zdev)) - zpci_disable_device(zdev); + /* + * We already hold zpci_list_lock thanks to kref_put_lock(). + * This makes sure no new reference can be taken from the list. + */ + list_del(&zdev->entry); + spin_unlock(&zpci_list_lock); if (zdev->has_hp_slot) zpci_exit_slot(zdev); @@ -1073,7 +1092,7 @@ char * __init pcibios_setup(char *str) return NULL; } if (!strcmp(str, "nomio")) { - get_lowcore()->machine_flags &= ~MACHINE_FLAG_PCI_MIO; + clear_machine_feature(MFEATURE_PCI_MIO); return NULL; } if (!strcmp(str, "force_floating")) { @@ -1148,7 +1167,7 @@ static int __init pci_base_init(void) return 0; } - if (MACHINE_HAS_PCI_MIO) { + if (test_machine_feature(MFEATURE_PCI_MIO)) { static_branch_enable(&have_mio); system_ctl_set_bit(2, CR2_MIO_ADDRESSING_BIT); } diff --git a/arch/s390/pci/pci_bus.c b/arch/s390/pci/pci_bus.c index d5ace00d10f0..45a1c36c5a54 100644 --- a/arch/s390/pci/pci_bus.c +++ b/arch/s390/pci/pci_bus.c @@ -13,12 +13,12 @@ #include <linux/kernel.h> #include <linux/slab.h> #include <linux/err.h> -#include <linux/export.h> #include <linux/delay.h> #include <linux/seq_file.h> #include <linux/jump_label.h> #include <linux/pci.h> #include <linux/printk.h> +#include <linux/dma-direct.h> #include <asm/pci_clp.h> #include <asm/pci_dma.h> @@ -171,7 +171,6 @@ void zpci_bus_scan_busses(void) static bool zpci_bus_is_multifunction_root(struct zpci_dev *zdev) { return !s390_pci_no_rid && zdev->rid_available && - zpci_is_device_configured(zdev) && !zdev->vfn; } @@ -284,10 +283,32 @@ static struct zpci_bus *zpci_bus_alloc(int topo, bool topo_is_tid) return zbus; } +static void pci_dma_range_setup(struct pci_dev *pdev) +{ + struct zpci_dev *zdev = to_zpci(pdev); + u64 aligned_end, size; + dma_addr_t dma_start; + int ret; + + dma_start = PAGE_ALIGN(zdev->start_dma); + aligned_end = PAGE_ALIGN_DOWN(zdev->end_dma + 1); + if (aligned_end >= dma_start) + size = aligned_end - dma_start; + else + size = 0; + WARN_ON_ONCE(size == 0); + + ret = dma_direct_set_offset(&pdev->dev, 0, dma_start, size); + if (ret) + pr_err("Failed to allocate DMA range map for %s\n", pci_name(pdev)); +} + void pcibios_bus_add_device(struct pci_dev *pdev) { struct zpci_dev *zdev = to_zpci(pdev); + pci_dma_range_setup(pdev); + /* * With pdev->no_vf_scan the common PCI probing code does not * perform PF/VF linking. @@ -332,6 +353,20 @@ error: return rc; } +static bool zpci_bus_is_isolated_vf(struct zpci_bus *zbus, struct zpci_dev *zdev) +{ + struct pci_dev *pdev; + + if (!zdev->vfn) + return false; + + pdev = zpci_iov_find_parent_pf(zbus, zdev); + if (!pdev) + return true; + pci_dev_put(pdev); + return false; +} + int zpci_bus_device_register(struct zpci_dev *zdev, struct pci_ops *ops) { bool topo_is_tid = zdev->tid_avail; @@ -346,6 +381,15 @@ int zpci_bus_device_register(struct zpci_dev *zdev, struct pci_ops *ops) topo = topo_is_tid ? zdev->tid : zdev->pchid; zbus = zpci_bus_get(topo, topo_is_tid); + /* + * An isolated VF gets its own domain/bus even if there exists + * a matching domain/bus already + */ + if (zbus && zpci_bus_is_isolated_vf(zbus, zdev)) { + zpci_bus_put(zbus); + zbus = NULL; + } + if (!zbus) { zbus = zpci_bus_alloc(topo, topo_is_tid); if (!zbus) diff --git a/arch/s390/pci/pci_bus.h b/arch/s390/pci/pci_bus.h index e86a9419d233..ae3d7a9159bd 100644 --- a/arch/s390/pci/pci_bus.h +++ b/arch/s390/pci/pci_bus.h @@ -21,11 +21,8 @@ int zpci_bus_scan_device(struct zpci_dev *zdev); void zpci_bus_remove_device(struct zpci_dev *zdev, bool set_error); void zpci_release_device(struct kref *kref); -static inline void zpci_zdev_put(struct zpci_dev *zdev) -{ - if (zdev) - kref_put(&zdev->kref, zpci_release_device); -} + +void zpci_zdev_put(struct zpci_dev *zdev); static inline void zpci_zdev_get(struct zpci_dev *zdev) { diff --git a/arch/s390/pci/pci_clp.c b/arch/s390/pci/pci_clp.c index 14bf7e8d06b7..241f7251c873 100644 --- a/arch/s390/pci/pci_clp.c +++ b/arch/s390/pci/pci_clp.c @@ -56,7 +56,7 @@ static inline int clp_get_ilp(unsigned long *ilp) int cc, exception; exception = 1; - asm volatile ( + asm_inline volatile ( " .insn rrf,0xb9a00000,%[mask],%[cmd],8,0\n" "0: lhi %[exc],0\n" "1:\n" @@ -79,7 +79,7 @@ static __always_inline int clp_req(void *data, unsigned int lps) u64 ignored; exception = 1; - asm volatile ( + asm_inline volatile ( " .insn rrf,0xb9a00000,%[ign],%[req],0,%[lps]\n" "0: lhi %[exc],0\n" "1:\n" @@ -112,6 +112,7 @@ static void clp_store_query_pci_fngrp(struct zpci_dev *zdev, zdev->version = response->version; zdev->maxstbl = response->maxstbl; zdev->dtsm = response->dtsm; + zdev->rtr_avail = response->rtr; switch (response->version) { case 1: @@ -427,6 +428,8 @@ static void __clp_add(struct clp_fh_list_entry *entry, void *data) return; } zdev = zpci_create_device(entry->fid, entry->fh, entry->config_state); + if (IS_ERR(zdev)) + return; list_add_tail(&zdev->entry, scan_list); } diff --git a/arch/s390/pci/pci_event.c b/arch/s390/pci/pci_event.c index 7f7b732b3f3e..d930416d4c90 100644 --- a/arch/s390/pci/pci_event.c +++ b/arch/s390/pci/pci_event.c @@ -16,6 +16,7 @@ #include <asm/sclp.h> #include "pci_bus.h" +#include "pci_report.h" /* Content Code Description for PCI Function Error */ struct zpci_ccdf_err { @@ -53,6 +54,7 @@ static inline bool ers_result_indicates_abort(pci_ers_result_t ers_res) case PCI_ERS_RESULT_CAN_RECOVER: case PCI_ERS_RESULT_RECOVERED: case PCI_ERS_RESULT_NEED_RESET: + case PCI_ERS_RESULT_NONE: return false; default: return true; @@ -77,10 +79,6 @@ static bool is_driver_supported(struct pci_driver *driver) return false; if (!driver->err_handler->error_detected) return false; - if (!driver->err_handler->slot_reset) - return false; - if (!driver->err_handler->resume) - return false; return true; } @@ -105,6 +103,10 @@ static pci_ers_result_t zpci_event_do_error_state_clear(struct pci_dev *pdev, struct zpci_dev *zdev = to_zpci(pdev); int rc; + /* The underlying device may have been disabled by the event */ + if (!zdev_enabled(zdev)) + return PCI_ERS_RESULT_NEED_RESET; + pr_info("%s: Unblocking device access for examination\n", pci_name(pdev)); rc = zpci_reset_load_store_blocked(zdev); if (rc) { @@ -113,16 +115,18 @@ static pci_ers_result_t zpci_event_do_error_state_clear(struct pci_dev *pdev, return PCI_ERS_RESULT_NEED_RESET; } - if (driver->err_handler->mmio_enabled) { + if (driver->err_handler->mmio_enabled) ers_res = driver->err_handler->mmio_enabled(pdev); - if (ers_result_indicates_abort(ers_res)) { - pr_info("%s: Automatic recovery failed after MMIO re-enable\n", - pci_name(pdev)); - return ers_res; - } else if (ers_res == PCI_ERS_RESULT_NEED_RESET) { - pr_debug("%s: Driver needs reset to recover\n", pci_name(pdev)); - return ers_res; - } + else + ers_res = PCI_ERS_RESULT_NONE; + + if (ers_result_indicates_abort(ers_res)) { + pr_info("%s: Automatic recovery failed after MMIO re-enable\n", + pci_name(pdev)); + return ers_res; + } else if (ers_res == PCI_ERS_RESULT_NEED_RESET) { + pr_debug("%s: Driver needs reset to recover\n", pci_name(pdev)); + return ers_res; } pr_debug("%s: Unblocking DMA\n", pci_name(pdev)); @@ -149,7 +153,12 @@ static pci_ers_result_t zpci_event_do_reset(struct pci_dev *pdev, return ers_res; } pdev->error_state = pci_channel_io_normal; - ers_res = driver->err_handler->slot_reset(pdev); + + if (driver->err_handler->slot_reset) + ers_res = driver->err_handler->slot_reset(pdev); + else + ers_res = PCI_ERS_RESULT_NONE; + if (ers_result_indicates_abort(ers_res)) { pr_info("%s: Automatic recovery failed after slot reset\n", pci_name(pdev)); return ers_res; @@ -169,6 +178,8 @@ static pci_ers_result_t zpci_event_do_reset(struct pci_dev *pdev, static pci_ers_result_t zpci_event_attempt_error_recovery(struct pci_dev *pdev) { pci_ers_result_t ers_res = PCI_ERS_RESULT_DISCONNECT; + struct zpci_dev *zdev = to_zpci(pdev); + char *status_str = "success"; struct pci_driver *driver; /* @@ -186,37 +197,56 @@ static pci_ers_result_t zpci_event_attempt_error_recovery(struct pci_dev *pdev) if (is_passed_through(pdev)) { pr_info("%s: Cannot be recovered in the host because it is a pass-through device\n", pci_name(pdev)); + status_str = "failed (pass-through)"; goto out_unlock; } driver = to_pci_driver(pdev->dev.driver); if (!is_driver_supported(driver)) { - if (!driver) + if (!driver) { pr_info("%s: Cannot be recovered because no driver is bound to the device\n", pci_name(pdev)); - else + status_str = "failed (no driver)"; + } else { pr_info("%s: The %s driver bound to the device does not support error recovery\n", pci_name(pdev), driver->name); + status_str = "failed (no driver support)"; + } goto out_unlock; } ers_res = zpci_event_notify_error_detected(pdev, driver); - if (ers_result_indicates_abort(ers_res)) + if (ers_result_indicates_abort(ers_res)) { + status_str = "failed (abort on detection)"; goto out_unlock; + } - if (ers_res == PCI_ERS_RESULT_CAN_RECOVER) { + if (ers_res != PCI_ERS_RESULT_NEED_RESET) { ers_res = zpci_event_do_error_state_clear(pdev, driver); - if (ers_result_indicates_abort(ers_res)) + if (ers_result_indicates_abort(ers_res)) { + status_str = "failed (abort on MMIO enable)"; goto out_unlock; + } } if (ers_res == PCI_ERS_RESULT_NEED_RESET) ers_res = zpci_event_do_reset(pdev, driver); + /* + * ers_res can be PCI_ERS_RESULT_NONE either because the driver + * decided to return it, indicating that it abstains from voting + * on how to recover, or because it didn't implement the callback. + * Both cases assume, that if there is nothing else causing a + * disconnect, we recovered successfully. + */ + if (ers_res == PCI_ERS_RESULT_NONE) + ers_res = PCI_ERS_RESULT_RECOVERED; + if (ers_res != PCI_ERS_RESULT_RECOVERED) { pr_err("%s: Automatic recovery failed; operator intervention is required\n", pci_name(pdev)); + status_str = "failed (driver can't recover)"; goto out_unlock; } @@ -225,6 +255,7 @@ static pci_ers_result_t zpci_event_attempt_error_recovery(struct pci_dev *pdev) driver->err_handler->resume(pdev); out_unlock: pci_dev_unlock(pdev); + zpci_report_status(zdev, "recovery", status_str); return ers_res; } @@ -260,6 +291,8 @@ static void __zpci_event_error(struct zpci_ccdf_err *ccdf) struct zpci_dev *zdev = get_zdev_by_fid(ccdf->fid); struct pci_dev *pdev = NULL; pci_ers_result_t ers_res; + u32 fh = 0; + int rc; zpci_dbg(3, "err fid:%x, fh:%x, pec:%x\n", ccdf->fid, ccdf->fh, ccdf->pec); @@ -268,6 +301,15 @@ static void __zpci_event_error(struct zpci_ccdf_err *ccdf) if (zdev) { mutex_lock(&zdev->state_lock); + rc = clp_refresh_fh(zdev->fid, &fh); + if (rc) + goto no_pdev; + if (!fh || ccdf->fh != fh) { + /* Ignore events with stale handles */ + zpci_dbg(3, "err fid:%x, fh:%x (stale %x)\n", + ccdf->fid, fh, ccdf->fh); + goto no_pdev; + } zpci_update_fh(zdev, ccdf->fh); if (zdev->zbus->bus) pdev = pci_get_slot(zdev->zbus->bus, zdev->devfn); @@ -322,6 +364,22 @@ static void zpci_event_hard_deconfigured(struct zpci_dev *zdev, u32 fh) zdev->state = ZPCI_FN_STATE_STANDBY; } +static void zpci_event_reappear(struct zpci_dev *zdev) +{ + lockdep_assert_held(&zdev->state_lock); + /* + * The zdev is in the reserved state. This means that it was presumed to + * go away but there are still undropped references. Now, the platform + * announced its availability again. Bring back the lingering zdev + * to standby. This is safe because we hold a temporary reference + * now so that it won't go away. Account for the re-appearance of the + * underlying device by incrementing the reference count. + */ + zdev->state = ZPCI_FN_STATE_STANDBY; + zpci_zdev_get(zdev); + zpci_dbg(1, "rea fid:%x, fh:%x\n", zdev->fid, zdev->fh); +} + static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf) { struct zpci_dev *zdev = get_zdev_by_fid(ccdf->fid); @@ -345,8 +403,10 @@ static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf) break; } } else { + if (zdev->state == ZPCI_FN_STATE_RESERVED) + zpci_event_reappear(zdev); /* the configuration request may be stale */ - if (zdev->state != ZPCI_FN_STATE_STANDBY) + else if (zdev->state != ZPCI_FN_STATE_STANDBY) break; zdev->state = ZPCI_FN_STATE_CONFIGURED; } @@ -362,6 +422,8 @@ static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf) break; } } else { + if (zdev->state == ZPCI_FN_STATE_RESERVED) + zpci_event_reappear(zdev); zpci_update_fh(zdev, ccdf->fh); } break; diff --git a/arch/s390/pci/pci_fixup.c b/arch/s390/pci/pci_fixup.c new file mode 100644 index 000000000000..35688b645098 --- /dev/null +++ b/arch/s390/pci/pci_fixup.c @@ -0,0 +1,23 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Exceptions for specific devices, + * + * Copyright IBM Corp. 2025 + * + * Author(s): + * Niklas Schnelle <schnelle@linux.ibm.com> + */ +#include <linux/pci.h> + +static void zpci_ism_bar_no_mmap(struct pci_dev *pdev) +{ + /* + * ISM's BAR is special. Drivers written for ISM know + * how to handle this but others need to be aware of their + * special nature e.g. to prevent attempts to mmap() it. + */ + pdev->non_mappable_bars = 1; +} +DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_IBM, + PCI_DEVICE_ID_IBM_ISM, + zpci_ism_bar_no_mmap); diff --git a/arch/s390/pci/pci_insn.c b/arch/s390/pci/pci_insn.c index f5a75ea7629a..eb978c8012be 100644 --- a/arch/s390/pci/pci_insn.c +++ b/arch/s390/pci/pci_insn.c @@ -160,7 +160,7 @@ static inline int ____pcilg(u64 *data, u64 req, u64 offset, u8 *status) u64 __data; exception = 1; - asm volatile ( + asm_inline volatile ( " .insn rre,0xb9d20000,%[data],%[req_off]\n" "0: lhi %[exc],0\n" "1:\n" @@ -229,7 +229,7 @@ static inline int __pcilg_mio(u64 *data, u64 ioaddr, u64 len, u8 *status) u64 __data; exception = 1; - asm volatile ( + asm_inline volatile ( " .insn rre,0xb9d60000,%[data],%[ioaddr_len]\n" "0: lhi %[exc],0\n" "1:\n" @@ -267,7 +267,7 @@ static inline int __pcistg(u64 data, u64 req, u64 offset, u8 *status) int cc, exception; exception = 1; - asm volatile ( + asm_inline volatile ( " .insn rre,0xb9d00000,%[data],%[req_off]\n" "0: lhi %[exc],0\n" "1:\n" @@ -321,7 +321,7 @@ static inline int __pcistg_mio(u64 data, u64 ioaddr, u64 len, u8 *status) int cc, exception; exception = 1; - asm volatile ( + asm_inline volatile ( " .insn rre,0xb9d40000,%[data],%[ioaddr_len]\n" "0: lhi %[exc],0\n" "1:\n" @@ -356,7 +356,7 @@ static inline int __pcistb(const u64 *data, u64 req, u64 offset, u8 *status) int cc, exception; exception = 1; - asm volatile ( + asm_inline volatile ( " .insn rsy,0xeb00000000d0,%[req],%[offset],%[data]\n" "0: lhi %[exc],0\n" "1:\n" @@ -410,7 +410,7 @@ static inline int __pcistb_mio(const u64 *data, u64 ioaddr, u64 len, u8 *status) int cc, exception; exception = 1; - asm volatile ( + asm_inline volatile ( " .insn rsy,0xeb00000000d4,%[len],%[ioaddr],%[data]\n" "0: lhi %[exc],0\n" "1:\n" diff --git a/arch/s390/pci/pci_iov.c b/arch/s390/pci/pci_iov.c index ead062bf2b41..191e56a623f6 100644 --- a/arch/s390/pci/pci_iov.c +++ b/arch/s390/pci/pci_iov.c @@ -60,18 +60,35 @@ static int zpci_iov_link_virtfn(struct pci_dev *pdev, struct pci_dev *virtfn, in return 0; } -int zpci_iov_setup_virtfn(struct zpci_bus *zbus, struct pci_dev *virtfn, int vfn) +/** + * zpci_iov_find_parent_pf - Find the parent PF, if any, of the given function + * @zbus: The bus that the PCI function is on, or would be added on + * @zdev: The PCI function + * + * Finds the parent PF, if it exists and is configured, of the given PCI function + * and increments its refcount. Th PF is searched for on the provided bus so the + * caller has to ensure that this is the correct bus to search. This function may + * be used before adding the PCI function to a zbus. + * + * Return: Pointer to the struct pci_dev of the parent PF or NULL if it not + * found. If the function is not a VF or has no RequesterID information, + * NULL is returned as well. + */ +struct pci_dev *zpci_iov_find_parent_pf(struct zpci_bus *zbus, struct zpci_dev *zdev) { - int i, cand_devfn; - struct zpci_dev *zdev; + int i, vfid, devfn, cand_devfn; struct pci_dev *pdev; - int vfid = vfn - 1; /* Linux' vfid's start at 0 vfn at 1*/ - int rc = 0; if (!zbus->multifunction) - return 0; - - /* If the parent PF for the given VF is also configured in the + return NULL; + /* Non-VFs and VFs without RID available don't have a parent */ + if (!zdev->vfn || !zdev->rid_available) + return NULL; + /* Linux vfid starts at 0 vfn at 1 */ + vfid = zdev->vfn - 1; + devfn = zdev->rid & ZPCI_RID_MASK_DEVFN; + /* + * If the parent PF for the given VF is also configured in the * instance, it must be on the same zbus. * We can then identify the parent PF by checking what * devfn the VF would have if it belonged to that PF using the PF's @@ -85,15 +102,26 @@ int zpci_iov_setup_virtfn(struct zpci_bus *zbus, struct pci_dev *virtfn, int vfn if (!pdev) continue; cand_devfn = pci_iov_virtfn_devfn(pdev, vfid); - if (cand_devfn == virtfn->devfn) { - rc = zpci_iov_link_virtfn(pdev, virtfn, vfid); - /* balance pci_get_slot() */ - pci_dev_put(pdev); - break; - } + if (cand_devfn == devfn) + return pdev; /* balance pci_get_slot() */ pci_dev_put(pdev); } } + return NULL; +} + +int zpci_iov_setup_virtfn(struct zpci_bus *zbus, struct pci_dev *virtfn, int vfn) +{ + struct zpci_dev *zdev = to_zpci(virtfn); + struct pci_dev *pdev_pf; + int rc = 0; + + pdev_pf = zpci_iov_find_parent_pf(zbus, zdev); + if (pdev_pf) { + /* Linux' vfids start at 0 while zdev->vfn starts at 1 */ + rc = zpci_iov_link_virtfn(pdev_pf, virtfn, zdev->vfn - 1); + pci_dev_put(pdev_pf); + } return rc; } diff --git a/arch/s390/pci/pci_iov.h b/arch/s390/pci/pci_iov.h index e3fa4e77fc86..d2c2793eb0f3 100644 --- a/arch/s390/pci/pci_iov.h +++ b/arch/s390/pci/pci_iov.h @@ -19,6 +19,8 @@ void zpci_iov_map_resources(struct pci_dev *pdev); int zpci_iov_setup_virtfn(struct zpci_bus *zbus, struct pci_dev *virtfn, int vfn); +struct pci_dev *zpci_iov_find_parent_pf(struct zpci_bus *zbus, struct zpci_dev *zdev); + #else /* CONFIG_PCI_IOV */ static inline void zpci_iov_remove_virtfn(struct pci_dev *pdev, int vfn) {} @@ -28,5 +30,10 @@ static inline int zpci_iov_setup_virtfn(struct zpci_bus *zbus, struct pci_dev *v { return 0; } + +static inline struct pci_dev *zpci_iov_find_parent_pf(struct zpci_bus *zbus, struct zpci_dev *zdev) +{ + return NULL; +} #endif /* CONFIG_PCI_IOV */ #endif /* __S390_PCI_IOV_h */ diff --git a/arch/s390/pci/pci_kvm_hook.c b/arch/s390/pci/pci_kvm_hook.c index ff34baf50a3e..df5b25dbe9ca 100644 --- a/arch/s390/pci/pci_kvm_hook.c +++ b/arch/s390/pci/pci_kvm_hook.c @@ -5,7 +5,9 @@ * Copyright (C) IBM Corp. 2022. All rights reserved. * Author(s): Pierre Morel <pmorel@linux.ibm.com> */ + #include <linux/kvm_host.h> +#include <linux/export.h> struct zpci_kvm_hook zpci_kvm_hook; EXPORT_SYMBOL_GPL(zpci_kvm_hook); diff --git a/arch/s390/pci/pci_mmio.c b/arch/s390/pci/pci_mmio.c index 46f99dc164ad..51e7a28af899 100644 --- a/arch/s390/pci/pci_mmio.c +++ b/arch/s390/pci/pci_mmio.c @@ -32,9 +32,11 @@ static inline int __pcistb_mio_inuser( u64 len, u8 *status) { int cc, exception; + bool sacf_flag; exception = 1; - asm volatile ( + sacf_flag = enable_sacf_uaccess(); + asm_inline volatile ( " sacf 256\n" "0: .insn rsy,0xeb00000000d4,%[len],%[ioaddr],%[src]\n" "1: lhi %[exc],0\n" @@ -44,6 +46,7 @@ static inline int __pcistb_mio_inuser( : CC_OUT(cc, cc), [len] "+d" (len), [exc] "+d" (exception) : [ioaddr] "a" (ioaddr), [src] "Q" (*((u8 __force *)src)) : CC_CLOBBER_LIST("memory")); + disable_sacf_uaccess(sacf_flag); *status = len >> 24 & 0xff; return exception ? -ENXIO : CC_TRANSFORM(cc); } @@ -54,6 +57,7 @@ static inline int __pcistg_mio_inuser( { union register_pair ioaddr_len = {.even = (u64 __force)ioaddr, .odd = ulen}; int cc, exception; + bool sacf_flag; u64 val = 0; u64 cnt = ulen; u8 tmp; @@ -64,7 +68,8 @@ static inline int __pcistg_mio_inuser( * address space. pcistg then uses the user mappings. */ exception = 1; - asm volatile ( + sacf_flag = enable_sacf_uaccess(); + asm_inline volatile ( " sacf 256\n" "0: llgc %[tmp],0(%[src])\n" "4: sllg %[val],%[val],8\n" @@ -81,6 +86,7 @@ static inline int __pcistg_mio_inuser( CC_OUT(cc, cc), [ioaddr_len] "+&d" (ioaddr_len.pair) : : CC_CLOBBER_LIST("memory")); + disable_sacf_uaccess(sacf_flag); *status = ioaddr_len.odd >> 24 & 0xff; cc = exception ? -ENXIO : CC_TRANSFORM(cc); @@ -175,8 +181,12 @@ SYSCALL_DEFINE3(s390_pci_mmio_write, unsigned long, mmio_addr, args.address = mmio_addr; args.vma = vma; ret = follow_pfnmap_start(&args); - if (ret) - goto out_unlock_mmap; + if (ret) { + fixup_user_fault(current->mm, mmio_addr, FAULT_FLAG_WRITE, NULL); + ret = follow_pfnmap_start(&args); + if (ret) + goto out_unlock_mmap; + } io_addr = (void __iomem *)((args.pfn << PAGE_SHIFT) | (mmio_addr & ~PAGE_MASK)); @@ -200,6 +210,7 @@ static inline int __pcilg_mio_inuser( u64 ulen, u8 *status) { union register_pair ioaddr_len = {.even = (u64 __force)ioaddr, .odd = ulen}; + bool sacf_flag; u64 cnt = ulen; int shift = ulen * 8; int cc, exception; @@ -211,7 +222,8 @@ static inline int __pcilg_mio_inuser( * user address @dst */ exception = 1; - asm volatile ( + sacf_flag = enable_sacf_uaccess(); + asm_inline volatile ( " sacf 256\n" "0: .insn rre,0xb9d60000,%[val],%[ioaddr_len]\n" "1: lhi %[exc],0\n" @@ -232,10 +244,10 @@ static inline int __pcilg_mio_inuser( : [ioaddr_len] "+&d" (ioaddr_len.pair), [exc] "+d" (exception), CC_OUT(cc, cc), [val] "=d" (val), [dst] "+a" (dst), [cnt] "+d" (cnt), [tmp] "=d" (tmp), - [shift] "+d" (shift) + [shift] "+a" (shift) : : CC_CLOBBER_LIST("memory")); - + disable_sacf_uaccess(sacf_flag); cc = exception ? -ENXIO : CC_TRANSFORM(cc); /* did we write everything to the user space buffer? */ if (!cc && cnt != 0) @@ -315,14 +327,18 @@ SYSCALL_DEFINE3(s390_pci_mmio_read, unsigned long, mmio_addr, if (!(vma->vm_flags & (VM_IO | VM_PFNMAP))) goto out_unlock_mmap; ret = -EACCES; - if (!(vma->vm_flags & VM_WRITE)) + if (!(vma->vm_flags & VM_READ)) goto out_unlock_mmap; args.vma = vma; args.address = mmio_addr; ret = follow_pfnmap_start(&args); - if (ret) - goto out_unlock_mmap; + if (ret) { + fixup_user_fault(current->mm, mmio_addr, 0, NULL); + ret = follow_pfnmap_start(&args); + if (ret) + goto out_unlock_mmap; + } io_addr = (void __iomem *)((args.pfn << PAGE_SHIFT) | (mmio_addr & ~PAGE_MASK)); diff --git a/arch/s390/pci/pci_report.c b/arch/s390/pci/pci_report.c new file mode 100644 index 000000000000..1b494e5ecc4d --- /dev/null +++ b/arch/s390/pci/pci_report.c @@ -0,0 +1,158 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright IBM Corp. 2024 + * + * Author(s): + * Niklas Schnelle <schnelle@linux.ibm.com> + * + */ + +#define KMSG_COMPONENT "zpci" +#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt + +#include <linux/kernel.h> +#include <linux/sprintf.h> +#include <linux/pci.h> + +#include <asm/sclp.h> +#include <asm/debug.h> +#include <asm/pci_debug.h> + +#include "pci_report.h" + +#define ZPCI_ERR_LOG_ID_KERNEL_REPORT 0x4714 + +struct zpci_report_error_data { + u64 timestamp; + u64 err_log_id; + char log_data[]; +} __packed; + +#define ZPCI_REPORT_SIZE (PAGE_SIZE - sizeof(struct err_notify_sccb)) +#define ZPCI_REPORT_DATA_SIZE (ZPCI_REPORT_SIZE - sizeof(struct zpci_report_error_data)) + +struct zpci_report_error { + struct zpci_report_error_header header; + struct zpci_report_error_data data; +} __packed; + +static const char *zpci_state_str(pci_channel_state_t state) +{ + switch (state) { + case pci_channel_io_normal: + return "normal"; + case pci_channel_io_frozen: + return "frozen"; + case pci_channel_io_perm_failure: + return "permanent-failure"; + default: + return "invalid"; + }; +} + +static int debug_log_header_fn(debug_info_t *id, struct debug_view *view, + int area, debug_entry_t *entry, char *out_buf, + size_t out_buf_size) +{ + unsigned long sec, usec; + unsigned int level; + char *except_str; + int rc = 0; + + level = entry->level; + sec = entry->clock; + usec = do_div(sec, USEC_PER_SEC); + + if (entry->exception) + except_str = "*"; + else + except_str = "-"; + rc += scnprintf(out_buf, out_buf_size, "%011ld:%06lu %1u %1s %04u ", + sec, usec, level, except_str, + entry->cpu); + return rc; +} + +static int debug_prolog_header(debug_info_t *id, struct debug_view *view, + char *out_buf, size_t out_buf_size) +{ + return scnprintf(out_buf, out_buf_size, "sec:usec level except cpu msg\n"); +} + +static struct debug_view debug_log_view = { + "pci_msg_log", + &debug_prolog_header, + &debug_log_header_fn, + &debug_sprintf_format_fn, + NULL, + NULL +}; + +/** + * zpci_report_status - Report the status of operations on a PCI device + * @zdev: The PCI device for which to report status + * @operation: A string representing the operation reported + * @status: A string representing the status of the operation + * + * This function creates a human readable report about an operation such as + * PCI device recovery and forwards this to the platform using the SCLP Write + * Event Data mechanism. Besides the operation and status strings the report + * also contains additional information about the device deemed useful for + * debug such as the currently bound device driver, if any, and error state. + * Additionally a string representation of pci_debug_msg_id, or as much as fits, + * is also included. + * + * Return: 0 on success an error code < 0 otherwise. + */ +int zpci_report_status(struct zpci_dev *zdev, const char *operation, const char *status) +{ + struct zpci_report_error *report; + struct pci_driver *driver = NULL; + struct pci_dev *pdev = NULL; + char *buf, *end; + int ret; + + if (!zdev || !zdev->zbus) + return -ENODEV; + + /* Protected virtualization hosts get nothing from us */ + if (prot_virt_guest) + return -ENODATA; + + report = (void *)get_zeroed_page(GFP_KERNEL); + if (!report) + return -ENOMEM; + if (zdev->zbus->bus) + pdev = pci_get_slot(zdev->zbus->bus, zdev->devfn); + if (pdev) + driver = to_pci_driver(pdev->dev.driver); + + buf = report->data.log_data; + end = report->data.log_data + ZPCI_REPORT_DATA_SIZE; + buf += scnprintf(buf, end - buf, "report: %s\n", operation); + buf += scnprintf(buf, end - buf, "status: %s\n", status); + buf += scnprintf(buf, end - buf, "state: %s\n", + (pdev) ? zpci_state_str(pdev->error_state) : "n/a"); + buf += scnprintf(buf, end - buf, "driver: %s\n", (driver) ? driver->name : "n/a"); + ret = debug_dump(pci_debug_msg_id, &debug_log_view, buf, end - buf, true); + if (ret < 0) + pr_err("Reading PCI debug messages failed with code %d\n", ret); + else + buf += ret; + + report->header.version = 1; + report->header.action = SCLP_ERRNOTIFY_AQ_INFO_LOG; + report->header.length = buf - (char *)&report->data; + report->data.timestamp = ktime_get_clocktai_seconds(); + report->data.err_log_id = ZPCI_ERR_LOG_ID_KERNEL_REPORT; + + ret = sclp_pci_report(&report->header, zdev->fh, zdev->fid); + if (ret) + pr_err("Reporting PCI status failed with code %d\n", ret); + else + pr_info("Reported PCI device status\n"); + + free_page((unsigned long)report); + + return ret; +} diff --git a/arch/s390/pci/pci_report.h b/arch/s390/pci/pci_report.h new file mode 100644 index 000000000000..e08003d51a97 --- /dev/null +++ b/arch/s390/pci/pci_report.h @@ -0,0 +1,16 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright IBM Corp. 2024 + * + * Author(s): + * Niklas Schnelle <schnelle@linux.ibm.com> + * + */ +#ifndef __S390_PCI_REPORT_H +#define __S390_PCI_REPORT_H + +struct zpci_dev; + +int zpci_report_status(struct zpci_dev *zdev, const char *operation, const char *status); + +#endif /* __S390_PCI_REPORT_H */ diff --git a/arch/s390/pci/pci_sysfs.c b/arch/s390/pci/pci_sysfs.c index 5f46ad58dcd1..0ee0924cfab7 100644 --- a/arch/s390/pci/pci_sysfs.c +++ b/arch/s390/pci/pci_sysfs.c @@ -52,7 +52,6 @@ static DEVICE_ATTR_RO(mio_enabled); static int _do_recover(struct pci_dev *pdev, struct zpci_dev *zdev) { - u8 status; int ret; pci_stop_and_remove_bus_device(pdev); @@ -70,16 +69,8 @@ static int _do_recover(struct pci_dev *pdev, struct zpci_dev *zdev) return ret; } - ret = zpci_enable_device(zdev); - if (ret) - return ret; + ret = zpci_reenable_device(zdev); - if (zdev->dma_table) { - ret = zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma, - virt_to_phys(zdev->dma_table), &status); - if (ret) - zpci_disable_device(zdev); - } return ret; } @@ -135,7 +126,7 @@ out: static DEVICE_ATTR_WO(recover); static ssize_t util_string_read(struct file *filp, struct kobject *kobj, - struct bin_attribute *attr, char *buf, + const struct bin_attribute *attr, char *buf, loff_t off, size_t count) { struct device *dev = kobj_to_dev(kobj); @@ -145,10 +136,10 @@ static ssize_t util_string_read(struct file *filp, struct kobject *kobj, return memory_read_from_buffer(buf, count, &off, zdev->util_str, sizeof(zdev->util_str)); } -static BIN_ATTR_RO(util_string, CLP_UTIL_STR_LEN); +static const BIN_ATTR_RO(util_string, CLP_UTIL_STR_LEN); static ssize_t report_error_write(struct file *filp, struct kobject *kobj, - struct bin_attribute *attr, char *buf, + const struct bin_attribute *attr, char *buf, loff_t off, size_t count) { struct zpci_report_error_header *report = (void *) buf; @@ -164,7 +155,7 @@ static ssize_t report_error_write(struct file *filp, struct kobject *kobj, return ret ? ret : count; } -static BIN_ATTR(report_error, S_IWUSR, NULL, report_error_write, PAGE_SIZE); +static const BIN_ATTR(report_error, S_IWUSR, NULL, report_error_write, PAGE_SIZE); static ssize_t uid_is_unique_show(struct device *dev, struct device_attribute *attr, char *buf) @@ -203,7 +194,7 @@ const struct attribute_group zpci_ident_attr_group = { .is_visible = zpci_index_is_visible, }; -static struct bin_attribute *zpci_bin_attrs[] = { +static const struct bin_attribute *const zpci_bin_attrs[] = { &bin_attr_util_string, &bin_attr_report_error, NULL, diff --git a/arch/s390/purgatory/Makefile b/arch/s390/purgatory/Makefile index 24eccaa29337..bd39b36e7bd6 100644 --- a/arch/s390/purgatory/Makefile +++ b/arch/s390/purgatory/Makefile @@ -8,20 +8,22 @@ PURGATORY_OBJS = $(addprefix $(obj)/,$(purgatory-y)) $(obj)/sha256.o: $(srctree)/lib/crypto/sha256.c FORCE $(call if_changed_rule,cc_o_c) -CFLAGS_sha256.o := -D__DISABLE_EXPORTS -D__NO_FORTIFY +CFLAGS_sha256.o := -D__NO_FORTIFY $(obj)/mem.o: $(srctree)/arch/s390/lib/mem.S FORCE $(call if_changed_rule,as_o_S) -KBUILD_CFLAGS := -fno-strict-aliasing -Wall -Wstrict-prototypes +KBUILD_CFLAGS := -std=gnu11 -fno-strict-aliasing -Wall -Wstrict-prototypes KBUILD_CFLAGS += -Wno-pointer-sign -Wno-sign-compare KBUILD_CFLAGS += -fno-zero-initialized-in-bss -fno-builtin -ffreestanding KBUILD_CFLAGS += -Os -m64 -msoft-float -fno-common KBUILD_CFLAGS += -fno-stack-protector KBUILD_CFLAGS += -DDISABLE_BRANCH_PROFILING +KBUILD_CFLAGS += -D__DISABLE_EXPORTS KBUILD_CFLAGS += $(CLANG_FLAGS) KBUILD_CFLAGS += $(call cc-option,-fno-PIE) KBUILD_AFLAGS := $(filter-out -DCC_USING_EXPOLINE,$(KBUILD_AFLAGS)) +KBUILD_AFLAGS += -D__DISABLE_EXPORTS # Since we link purgatory with -r unresolved symbols are not checked, so we # also link a purgatory.chk binary without -r to check for unresolved symbols. diff --git a/arch/s390/purgatory/purgatory.c b/arch/s390/purgatory/purgatory.c index 030efda05dbe..ecb38102187c 100644 --- a/arch/s390/purgatory/purgatory.c +++ b/arch/s390/purgatory/purgatory.c @@ -16,7 +16,7 @@ int verify_sha256_digest(void) { struct kexec_sha_region *ptr, *end; u8 digest[SHA256_DIGEST_SIZE]; - struct sha256_state sctx; + struct sha256_ctx sctx; sha256_init(&sctx); end = purgatory_sha_regions + ARRAY_SIZE(purgatory_sha_regions); diff --git a/arch/s390/tools/gen_facilities.c b/arch/s390/tools/gen_facilities.c index 855f818deb98..d5c68ade71ab 100644 --- a/arch/s390/tools/gen_facilities.c +++ b/arch/s390/tools/gen_facilities.c @@ -54,6 +54,9 @@ static struct facility_def facility_defs[] = { #ifdef CONFIG_HAVE_MARCH_Z15_FEATURES 61, /* miscellaneous-instruction-extension 3 */ #endif +#ifdef CONFIG_HAVE_MARCH_Z17_FEATURES + 84, /* miscellaneous-instruction-extension 4 */ +#endif -1 /* END */ } }, diff --git a/arch/s390/tools/gen_opcode_table.c b/arch/s390/tools/gen_opcode_table.c index a1bc02b29c81..7d76c417f83f 100644 --- a/arch/s390/tools/gen_opcode_table.c +++ b/arch/s390/tools/gen_opcode_table.c @@ -201,6 +201,17 @@ static int cmp_long_insn(const void *a, const void *b) return strcmp(((struct insn *)a)->name, ((struct insn *)b)->name); } +static void print_insn_name(const char *name) +{ + size_t i, len; + + len = strlen(name); + printf("{"); + for (i = 0; i < len; i++) + printf(" \'%c\',", name[i]); + printf(" }"); +} + static void print_long_insn(struct gen_opcode *desc) { struct insn *insn; @@ -223,7 +234,9 @@ static void print_long_insn(struct gen_opcode *desc) insn = &desc->insn[i]; if (insn->name_len < 6) continue; - printf("\t[LONG_INSN_%s] = \"%s\", \\\n", insn->upper, insn->name); + printf("\t[LONG_INSN_%s] = ", insn->upper); + print_insn_name(insn->name); + printf(", \\\n"); } printf("}\n\n"); } @@ -236,11 +249,13 @@ static void print_opcode(struct insn *insn, int nr) if (insn->type->byte != 0) opcode += 2; printf("\t[%4d] = { .opfrag = 0x%s, .format = INSTR_%s, ", nr, opcode, insn->format); - if (insn->name_len < 6) - printf(".name = \"%s\" ", insn->name); - else - printf(".offset = LONG_INSN_%s ", insn->upper); - printf("}, \\\n"); + if (insn->name_len < 6) { + printf(".name = "); + print_insn_name(insn->name); + } else { + printf(".offset = LONG_INSN_%s", insn->upper); + } + printf(" }, \\\n"); } static void add_to_group(struct gen_opcode *desc, struct insn *insn, int offset) |