diff options
Diffstat (limited to 'arch/s390')
240 files changed, 8486 insertions, 6120 deletions
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 0077969170e8..0c16dc443e2f 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -41,9 +41,6 @@ config AUDIT_ARCH config NO_IOPORT_MAP def_bool y -config PCI_QUIRKS - def_bool n - config ARCH_SUPPORTS_UPROBES def_bool y @@ -52,13 +49,19 @@ config KASAN_SHADOW_OFFSET depends on KASAN default 0x1C000000000000 -config GCC_ASM_FLAG_OUTPUT_BROKEN +config CC_ASM_FLAG_OUTPUT_BROKEN def_bool CC_IS_GCC && GCC_VERSION < 140200 help GCC versions before 14.2.0 may die with an internal compiler error in some configurations if flag output operands are used within inline assemblies. +config CC_HAS_ASM_AOR_FORMAT_FLAGS + def_bool !(CC_IS_CLANG && CLANG_VERSION < 190100) + help + Clang versions before 19.1.0 do not support A, + O, and R inline assembly format flags. + config S390 def_bool y # @@ -67,11 +70,12 @@ config S390 imply IMA_SECURE_AND_OR_TRUSTED_BOOT select ALTERNATE_USER_ADDRESS_SPACE select ARCH_32BIT_USTAT_F_TINODE - select ARCH_BINFMT_ELF_STATE select ARCH_CORRECT_STACKTRACE_ON_KRETPROBE select ARCH_ENABLE_MEMORY_HOTPLUG if SPARSEMEM select ARCH_ENABLE_MEMORY_HOTREMOVE select ARCH_ENABLE_SPLIT_PMD_PTLOCK if PGTABLE_LEVELS > 2 + select ARCH_HAS_CPU_FINALIZE_INIT + select ARCH_HAS_CRC32 select ARCH_HAS_CURRENT_STACK_POINTER select ARCH_HAS_DEBUG_VIRTUAL select ARCH_HAS_DEBUG_VM_PGTABLE @@ -88,6 +92,7 @@ config S390 select ARCH_HAS_MEM_ENCRYPT select ARCH_HAS_NMI_SAFE_THIS_CPU_OPS select ARCH_HAS_PREEMPT_LAZY + select ARCH_HAS_PTDUMP select ARCH_HAS_PTE_SPECIAL select ARCH_HAS_SCALED_CPUTIME select ARCH_HAS_SET_DIRECT_MAP @@ -132,6 +137,7 @@ config S390 select ARCH_SUPPORTS_DEBUG_PAGEALLOC select ARCH_SUPPORTS_HUGETLBFS select ARCH_SUPPORTS_INT128 if CC_HAS_INT128 && CC_IS_CLANG + select ARCH_SUPPORTS_MSEAL_SYSTEM_MAPPINGS select ARCH_SUPPORTS_NUMA_BALANCING select ARCH_SUPPORTS_PER_VMA_LOCK select ARCH_USE_BUILTIN_BSWAP @@ -140,6 +146,7 @@ config S390 select ARCH_WANTS_NO_INSTR select ARCH_WANT_DEFAULT_BPF_JIT select ARCH_WANT_IPC_PARSE_VERSION + select ARCH_WANT_IRQS_OFF_ACTIVATE_MM select ARCH_WANT_KERNEL_PMD_MKWRITE select ARCH_WANT_LD_ORPHAN_WARN select ARCH_WANT_OPTIMIZE_HUGETLB_VMEMMAP @@ -155,9 +162,9 @@ config S390 select GENERIC_CPU_VULNERABILITIES select GENERIC_ENTRY select GENERIC_GETTIMEOFDAY - select GENERIC_PTDUMP select GENERIC_SMP_IDLE_THREAD select GENERIC_TIME_VSYSCALL + select GENERIC_VDSO_DATA_STORE select GENERIC_VDSO_TIME_NS select GENERIC_IOREMAP if PCI select HAVE_ALIGNED_STRUCT_PAGE @@ -177,22 +184,25 @@ config S390 select HAVE_ARCH_TRANSPARENT_HUGEPAGE select HAVE_ARCH_VMAP_STACK select HAVE_ASM_MODVERSIONS + select HAVE_BUILDTIME_MCOUNT_SORT select HAVE_CMPXCHG_DOUBLE select HAVE_CMPXCHG_LOCAL select HAVE_DEBUG_KMEMLEAK select HAVE_DMA_CONTIGUOUS select HAVE_DYNAMIC_FTRACE select HAVE_DYNAMIC_FTRACE_WITH_ARGS + select HAVE_FTRACE_REGS_HAVING_PT_REGS select HAVE_DYNAMIC_FTRACE_WITH_DIRECT_CALLS select HAVE_DYNAMIC_FTRACE_WITH_REGS select HAVE_EBPF_JIT if HAVE_MARCH_Z196_FEATURES select HAVE_EFFICIENT_UNALIGNED_ACCESS select HAVE_GUP_FAST select HAVE_FENTRY + select HAVE_FTRACE_GRAPH_FUNC select HAVE_FTRACE_MCOUNT_RECORD select HAVE_FUNCTION_ARG_ACCESS_API select HAVE_FUNCTION_ERROR_INJECTION - select HAVE_FUNCTION_GRAPH_RETVAL + select HAVE_FUNCTION_GRAPH_FREGS select HAVE_FUNCTION_GRAPH_TRACER select HAVE_FUNCTION_TRACER select HAVE_GCC_PLUGINS @@ -231,8 +241,10 @@ config S390 select HAVE_SYSCALL_TRACEPOINTS select HAVE_VIRT_CPU_ACCOUNTING select HAVE_VIRT_CPU_ACCOUNTING_IDLE + select HOTPLUG_SMT select IOMMU_HELPER if PCI select IOMMU_SUPPORT if PCI + select KASAN_VMALLOC if KASAN select LOCK_MM_AND_FIND_VMA select MMU_GATHER_MERGE_VMAS select MMU_GATHER_NO_GATHER @@ -240,12 +252,14 @@ config S390 select MODULES_USE_ELF_RELA select NEED_DMA_MAP_STATE if PCI select NEED_PER_CPU_EMBED_FIRST_CHUNK + select NEED_PROC_VMCORE_DEVICE_RAM if PROC_VMCORE select NEED_SG_DMA_LENGTH if PCI select OLD_SIGACTION select OLD_SIGSUSPEND3 select PCI_DOMAINS if PCI select PCI_MSI if PCI select PCI_MSI_ARCH_FALLBACKS if PCI_MSI + select PCI_QUIRKS if PCI select SPARSE_IRQ select SWIOTLB select SYSCTL_EXCEPTION_TRACE @@ -255,6 +269,7 @@ config S390 select USER_STACKTRACE_SUPPORT select VDSO_GETRANDOM select VIRT_CPU_ACCOUNTING + select VMAP_STACK select ZONE_DMA # Note: keep the above list sorted alphabetically @@ -318,6 +333,10 @@ config HAVE_MARCH_Z16_FEATURES def_bool n select HAVE_MARCH_Z15_FEATURES +config HAVE_MARCH_Z17_FEATURES + def_bool n + select HAVE_MARCH_Z16_FEATURES + choice prompt "Processor type" default MARCH_Z196 @@ -383,6 +402,14 @@ config MARCH_Z16 Select this to enable optimizations for IBM z16 (3931 and 3932 series). +config MARCH_Z17 + bool "IBM z17" + select HAVE_MARCH_Z17_FEATURES + depends on $(cc-option,-march=z17) + help + Select this to enable optimizations for IBM z17 (9175 and + 9176 series). + endchoice config MARCH_Z10_TUNE @@ -406,6 +433,9 @@ config MARCH_Z15_TUNE config MARCH_Z16_TUNE def_bool TUNE_Z16 || MARCH_Z16 && TUNE_DEFAULT +config MARCH_Z17_TUNE + def_bool TUNE_Z17 || MARCH_Z17 && TUNE_DEFAULT + choice prompt "Tune code generation" default TUNE_DEFAULT @@ -450,6 +480,10 @@ config TUNE_Z16 bool "IBM z16" depends on $(cc-option,-mtune=z16) +config TUNE_Z17 + bool "IBM z17" + depends on $(cc-option,-mtune=z17) + endchoice config 64BIT @@ -617,6 +651,7 @@ endchoice config RELOCATABLE def_bool y + select ARCH_VMLINUX_NEEDS_RELOCS help This builds a kernel image that retains relocation information so it can be loaded at an arbitrary address. @@ -688,32 +723,6 @@ config MAX_PHYSMEM_BITS Increasing the number of bits also increases the kernel image size. By default 46 bits (64TB) are supported. -config CHECK_STACK - def_bool y - depends on !VMAP_STACK - prompt "Detect kernel stack overflow" - help - This option enables the compiler option -mstack-guard and - -mstack-size if they are available. If the compiler supports them - it will emit additional code to each function prolog to trigger - an illegal operation if the kernel stack is about to overflow. - - Say N if you are unsure. - -config STACK_GUARD - int "Size of the guard area (128-1024)" - range 128 1024 - depends on CHECK_STACK - default "256" - help - This allows you to specify the size of the guard area at the lower - end of the kernel stack. If the kernel stack points into the guard - area on function entry an illegal operation is triggered. The size - needs to be a power of 2. Please keep in mind that the size of an - interrupt frame is 184 bytes for 31 bit and 328 bytes on 64 bit. - The minimum size for the stack guard should be 256 for 31 bit and - 512 for 64 bit. - endmenu menu "I/O subsystem" diff --git a/arch/s390/Kconfig.debug b/arch/s390/Kconfig.debug index c4300ea4abf8..7955d7eee7d8 100644 --- a/arch/s390/Kconfig.debug +++ b/arch/s390/Kconfig.debug @@ -13,6 +13,16 @@ config DEBUG_ENTRY If unsure, say N. +config STRICT_MM_TYPECHECKS + bool "Strict Memory Management Type Checks" + depends on DEBUG_KERNEL + help + Enable strict type checking for memory management types like pte_t + and pmd_t. This generates slightly worse code and should be used + for debug builds. + + If unsure, say N. + config CIO_INJECT bool "CIO Inject interfaces" depends on DEBUG_KERNEL && DEBUG_FS diff --git a/arch/s390/Makefile b/arch/s390/Makefile index 7fd57398221e..7679bc16b692 100644 --- a/arch/s390/Makefile +++ b/arch/s390/Makefile @@ -15,14 +15,14 @@ KBUILD_CFLAGS_MODULE += -fPIC KBUILD_AFLAGS += -m64 KBUILD_CFLAGS += -m64 KBUILD_CFLAGS += -fPIC -LDFLAGS_vmlinux := -no-pie --emit-relocs --discard-none +LDFLAGS_vmlinux := $(call ld-option,-no-pie) extra_tools := relocs aflags_dwarf := -Wa,-gdwarf-2 KBUILD_AFLAGS_DECOMPRESSOR := $(CLANG_FLAGS) -m64 -D__ASSEMBLY__ ifndef CONFIG_AS_IS_LLVM KBUILD_AFLAGS_DECOMPRESSOR += $(if $(CONFIG_DEBUG_INFO),$(aflags_dwarf)) endif -KBUILD_CFLAGS_DECOMPRESSOR := $(CLANG_FLAGS) -m64 -O2 -mpacked-stack +KBUILD_CFLAGS_DECOMPRESSOR := $(CLANG_FLAGS) -m64 -O2 -mpacked-stack -std=gnu11 KBUILD_CFLAGS_DECOMPRESSOR += -DDISABLE_BRANCH_PROFILING -D__NO_FORTIFY KBUILD_CFLAGS_DECOMPRESSOR += -D__DECOMPRESSOR KBUILD_CFLAGS_DECOMPRESSOR += -fno-delete-null-pointer-checks -msoft-float -mbackchain @@ -48,6 +48,7 @@ mflags-$(CONFIG_MARCH_Z13) := -march=z13 mflags-$(CONFIG_MARCH_Z14) := -march=z14 mflags-$(CONFIG_MARCH_Z15) := -march=z15 mflags-$(CONFIG_MARCH_Z16) := -march=z16 +mflags-$(CONFIG_MARCH_Z17) := -march=z17 export CC_FLAGS_MARCH := $(mflags-y) @@ -61,6 +62,7 @@ cflags-$(CONFIG_MARCH_Z13_TUNE) += -mtune=z13 cflags-$(CONFIG_MARCH_Z14_TUNE) += -mtune=z14 cflags-$(CONFIG_MARCH_Z15_TUNE) += -mtune=z15 cflags-$(CONFIG_MARCH_Z16_TUNE) += -mtune=z16 +cflags-$(CONFIG_MARCH_Z17_TUNE) += -mtune=z17 cflags-y += -Wa,-I$(srctree)/arch/$(ARCH)/include @@ -72,15 +74,6 @@ cflags-$(CONFIG_FRAME_POINTER) += -fno-optimize-sibling-calls KBUILD_AFLAGS_DECOMPRESSOR += $(aflags-y) KBUILD_CFLAGS_DECOMPRESSOR += $(cflags-y) -ifneq ($(call cc-option,-mstack-size=8192 -mstack-guard=128),) - CC_FLAGS_CHECK_STACK := -mstack-size=$(STACK_SIZE) - ifeq ($(call cc-option,-mstack-size=8192),) - CC_FLAGS_CHECK_STACK += -mstack-guard=$(CONFIG_STACK_GUARD) - endif - export CC_FLAGS_CHECK_STACK - cflags-$(CONFIG_CHECK_STACK) += $(CC_FLAGS_CHECK_STACK) -endif - ifdef CONFIG_EXPOLINE ifdef CONFIG_EXPOLINE_EXTERN CC_FLAGS_EXPOLINE := -mindirect-branch=thunk-extern diff --git a/arch/s390/Makefile.postlink b/arch/s390/Makefile.postlink index df82f5410769..c2b737500a91 100644 --- a/arch/s390/Makefile.postlink +++ b/arch/s390/Makefile.postlink @@ -19,14 +19,8 @@ quiet_cmd_relocs = RELOCS $(OUT_RELOCS)/relocs.S mkdir -p $(OUT_RELOCS); \ $(CMD_RELOCS) $@ > $(OUT_RELOCS)/relocs.S -quiet_cmd_strip_relocs = RSTRIP $@ - cmd_strip_relocs = \ - $(OBJCOPY) --remove-section='.rel.*' --remove-section='.rel__*' \ - --remove-section='.rela.*' --remove-section='.rela__*' $@ - -vmlinux: FORCE +vmlinux.unstripped: FORCE $(call cmd,relocs) - $(call cmd,strip_relocs) clean: @rm -f $(OUT_RELOCS)/relocs.S diff --git a/arch/s390/appldata/appldata_base.c b/arch/s390/appldata/appldata_base.c index 91a30e017d65..dd7ba7587dd5 100644 --- a/arch/s390/appldata/appldata_base.c +++ b/arch/s390/appldata/appldata_base.c @@ -52,7 +52,7 @@ static int appldata_interval_handler(const struct ctl_table *ctl, int write, void *buffer, size_t *lenp, loff_t *ppos); static struct ctl_table_header *appldata_sysctl_header; -static struct ctl_table appldata_table[] = { +static const struct ctl_table appldata_table[] = { { .procname = "timer", .mode = S_IRUGO | S_IWUSR, diff --git a/arch/s390/boot/.gitignore b/arch/s390/boot/.gitignore index f5ef099e2fd3..af2a6a7bc028 100644 --- a/arch/s390/boot/.gitignore +++ b/arch/s390/boot/.gitignore @@ -5,4 +5,5 @@ relocs.S section_cmp.* vmlinux vmlinux.lds +vmlinux.map vmlinux.syms diff --git a/arch/s390/boot/Makefile b/arch/s390/boot/Makefile index 8bc1308ac892..bee49626be4b 100644 --- a/arch/s390/boot/Makefile +++ b/arch/s390/boot/Makefile @@ -26,7 +26,7 @@ CFLAGS_sclp_early_core.o += -I$(srctree)/drivers/s390/char obj-y := head.o als.o startup.o physmem_info.o ipl_parm.o ipl_report.o vmem.o obj-y += string.o ebcdic.o sclp_early_core.o mem.o ipl_vmparm.o cmdline.o -obj-y += version.o pgm_check_info.o ctype.o ipl_data.o relocs.o alternative.o +obj-y += version.o pgm_check.o ctype.o ipl_data.o relocs.o alternative.o obj-y += uv.o printk.o obj-$(CONFIG_RANDOMIZE_BASE) += kaslr.o obj-y += $(if $(CONFIG_KERNEL_UNCOMPRESSED),,decompressor.o) info.o diff --git a/arch/s390/boot/als.c b/arch/s390/boot/als.c index 11e0c3d5dbc8..79afb5fa7f1f 100644 --- a/arch/s390/boot/als.c +++ b/arch/s390/boot/als.c @@ -46,7 +46,7 @@ void print_missing_facilities(void) * z/VM adds a four character prefix. */ if (strlen(als_str) > 70) { - boot_printk("%s\n", als_str); + boot_emerg("%s\n", als_str); *als_str = '\0'; } u16_to_decimal(val_str, i * BITS_PER_LONG + j); @@ -54,7 +54,7 @@ void print_missing_facilities(void) first = 0; } } - boot_printk("%s\n", als_str); + boot_emerg("%s\n", als_str); } static void facility_mismatch(void) @@ -62,10 +62,10 @@ static void facility_mismatch(void) struct cpuid id; get_cpu_id(&id); - boot_printk("The Linux kernel requires more recent processor hardware\n"); - boot_printk("Detected machine-type number: %4x\n", id.machine); + boot_emerg("The Linux kernel requires more recent processor hardware\n"); + boot_emerg("Detected machine-type number: %4x\n", id.machine); print_missing_facilities(); - boot_printk("See Principles of Operations for facility bits\n"); + boot_emerg("See Principles of Operations for facility bits\n"); disabled_wait(); } diff --git a/arch/s390/boot/alternative.c b/arch/s390/boot/alternative.c index abc08d2c873d..19ea7934b918 100644 --- a/arch/s390/boot/alternative.c +++ b/arch/s390/boot/alternative.c @@ -1,3 +1,138 @@ // SPDX-License-Identifier: GPL-2.0 +#define boot_fmt(fmt) "alt: " fmt +#include "boot.h" + +#define a_debug boot_debug #include "../kernel/alternative.c" + +static void alt_debug_all(int type) +{ + int i; + + switch (type) { + case ALT_TYPE_FACILITY: + for (i = 0; i < ARRAY_SIZE(alt_debug.facilities); i++) + alt_debug.facilities[i] = -1UL; + break; + case ALT_TYPE_FEATURE: + for (i = 0; i < ARRAY_SIZE(alt_debug.mfeatures); i++) + alt_debug.mfeatures[i] = -1UL; + break; + case ALT_TYPE_SPEC: + alt_debug.spec = 1; + break; + } +} + +static void alt_debug_modify(int type, unsigned int nr, bool clear) +{ + switch (type) { + case ALT_TYPE_FACILITY: + if (clear) + __clear_facility(nr, alt_debug.facilities); + else + __set_facility(nr, alt_debug.facilities); + break; + case ALT_TYPE_FEATURE: + if (clear) + __clear_machine_feature(nr, alt_debug.mfeatures); + else + __set_machine_feature(nr, alt_debug.mfeatures); + break; + } +} + +static char *alt_debug_parse(int type, char *str) +{ + unsigned long val, endval; + char *endp; + bool clear; + int i; + + if (*str == ':') { + str++; + } else { + alt_debug_all(type); + return str; + } + clear = false; + if (*str == '!') { + alt_debug_all(type); + clear = true; + str++; + } + while (*str) { + val = simple_strtoull(str, &endp, 0); + if (str == endp) + break; + str = endp; + if (*str == '-') { + str++; + endval = simple_strtoull(str, &endp, 0); + if (str == endp) + break; + str = endp; + while (val <= endval) { + alt_debug_modify(type, val, clear); + val++; + } + } else { + alt_debug_modify(type, val, clear); + } + if (*str != ',') + break; + str++; + } + return str; +} + +/* + * Use debug-alternative command line parameter for debugging: + * "debug-alternative" + * -> print debug message for every single alternative + * + * "debug-alternative=0;2" + * -> print debug message for all alternatives with type 0 and 2 + * + * "debug-alternative=0:0-7" + * -> print debug message for all alternatives with type 0 and with + * facility numbers within the range of 0-7 + * (if type 0 is ALT_TYPE_FACILITY) + * + * "debug-alternative=0:!8;1" + * -> print debug message for all alternatives with type 0, for all + * facility number, except facility 8, and in addition print all + * alternatives with type 1 + */ +void alt_debug_setup(char *str) +{ + unsigned long type; + char *endp; + int i; + + if (!str) { + alt_debug_all(ALT_TYPE_FACILITY); + alt_debug_all(ALT_TYPE_FEATURE); + alt_debug_all(ALT_TYPE_SPEC); + return; + } + while (*str) { + type = simple_strtoull(str, &endp, 0); + if (str == endp) + break; + str = endp; + switch (type) { + case ALT_TYPE_FACILITY: + case ALT_TYPE_FEATURE: + str = alt_debug_parse(type, str); + break; + case ALT_TYPE_SPEC: + alt_debug_all(ALT_TYPE_SPEC); + break; + } + if (*str != ';') + break; + str++; + } +} diff --git a/arch/s390/boot/boot.h b/arch/s390/boot/boot.h index 7521a9d75fa2..e045cae6e80a 100644 --- a/arch/s390/boot/boot.h +++ b/arch/s390/boot/boot.h @@ -8,14 +8,9 @@ #ifndef __ASSEMBLY__ +#include <linux/printk.h> #include <asm/physmem_info.h> -struct machine_info { - unsigned char has_edat1 : 1; - unsigned char has_edat2 : 1; - unsigned char has_nx : 1; -}; - struct vmlinux_info { unsigned long entry; unsigned long image_size; /* does not include .bss */ @@ -48,13 +43,16 @@ void physmem_set_usable_limit(unsigned long limit); void physmem_reserve(enum reserved_range_type type, unsigned long addr, unsigned long size); void physmem_free(enum reserved_range_type type); /* for continuous/multiple allocations per type */ -unsigned long physmem_alloc_top_down(enum reserved_range_type type, unsigned long size, - unsigned long align); +unsigned long physmem_alloc_or_die(enum reserved_range_type type, unsigned long size, + unsigned long align); +unsigned long physmem_alloc(enum reserved_range_type type, unsigned long size, + unsigned long align, bool die_on_oom); /* for single allocations, 1 per type */ unsigned long physmem_alloc_range(enum reserved_range_type type, unsigned long size, unsigned long align, unsigned long min, unsigned long max, bool die_on_oom); unsigned long get_physmem_alloc_pos(void); +void dump_physmem_reserved(void); bool ipl_report_certs_intersects(unsigned long addr, unsigned long size, unsigned long *intersection_start); bool is_ipl_block_dump(void); @@ -66,16 +64,33 @@ void parse_boot_command_line(void); void verify_facilities(void); void print_missing_facilities(void); void sclp_early_setup_buffer(void); -void print_pgm_check_info(void); +void alt_debug_setup(char *str); +void do_pgm_check(struct pt_regs *regs); unsigned long randomize_within_range(unsigned long size, unsigned long align, unsigned long min, unsigned long max); void setup_vmem(unsigned long kernel_start, unsigned long kernel_end, unsigned long asce_limit); -void __printf(1, 2) boot_printk(const char *fmt, ...); +int __printf(1, 2) boot_printk(const char *fmt, ...); void print_stacktrace(unsigned long sp); void error(char *m); int get_random(unsigned long limit, unsigned long *value); +void boot_rb_dump(void); + +#ifndef boot_fmt +#define boot_fmt(fmt) fmt +#endif + +#define boot_emerg(fmt, ...) boot_printk(KERN_EMERG boot_fmt(fmt), ##__VA_ARGS__) +#define boot_alert(fmt, ...) boot_printk(KERN_ALERT boot_fmt(fmt), ##__VA_ARGS__) +#define boot_crit(fmt, ...) boot_printk(KERN_CRIT boot_fmt(fmt), ##__VA_ARGS__) +#define boot_err(fmt, ...) boot_printk(KERN_ERR boot_fmt(fmt), ##__VA_ARGS__) +#define boot_warn(fmt, ...) boot_printk(KERN_WARNING boot_fmt(fmt), ##__VA_ARGS__) +#define boot_notice(fmt, ...) boot_printk(KERN_NOTICE boot_fmt(fmt), ##__VA_ARGS__) +#define boot_info(fmt, ...) boot_printk(KERN_INFO boot_fmt(fmt), ##__VA_ARGS__) +#define boot_debug(fmt, ...) boot_printk(KERN_DEBUG boot_fmt(fmt), ##__VA_ARGS__) extern struct machine_info machine; +extern int boot_console_loglevel; +extern bool boot_ignore_loglevel; /* Symbols defined by linker scripts */ extern const char kernel_version[]; diff --git a/arch/s390/boot/decompressor.c b/arch/s390/boot/decompressor.c index f478e8e9cbda..03500b9d9fb9 100644 --- a/arch/s390/boot/decompressor.c +++ b/arch/s390/boot/decompressor.c @@ -9,6 +9,7 @@ #include <linux/kernel.h> #include <linux/string.h> +#include <asm/boot_data.h> #include <asm/page.h> #include "decompressor.h" #include "boot.h" @@ -63,6 +64,15 @@ static unsigned long free_mem_end_ptr = (unsigned long) _end + BOOT_HEAP_SIZE; #include "../../../../lib/decompress_unzstd.c" #endif +static void decompress_error(char *m) +{ + if (bootdebug) + boot_rb_dump(); + boot_emerg("Decompression error: %s\n", m); + boot_emerg(" -- System halted\n"); + disabled_wait(); +} + unsigned long mem_safe_offset(void) { return ALIGN(free_mem_end_ptr, PAGE_SIZE); @@ -71,5 +81,5 @@ unsigned long mem_safe_offset(void) void deploy_kernel(void *output) { __decompress(_compressed_start, _compressed_end - _compressed_start, - NULL, NULL, output, vmlinux.image_size, NULL, error); + NULL, NULL, output, vmlinux.image_size, NULL, decompress_error); } diff --git a/arch/s390/boot/head.S b/arch/s390/boot/head.S index 0a47b16f6412..0b511d5c030b 100644 --- a/arch/s390/boot/head.S +++ b/arch/s390/boot/head.S @@ -254,8 +254,9 @@ SYM_CODE_START_LOCAL(startup_normal) xc 0xf00(256),0xf00 larl %r13,.Lctl lctlg %c0,%c15,0(%r13) # load control registers - stcke __LC_BOOT_CLOCK - mvc __LC_LAST_UPDATE_CLOCK(8),__LC_BOOT_CLOCK+1 + larl %r13,tod_clock_base + stcke 0(%r13) + mvc __LC_LAST_UPDATE_CLOCK(8),1(%r13) larl %r13,6f spt 0(%r13) mvc __LC_LAST_UPDATE_TIMER(8),0(%r13) @@ -292,12 +293,6 @@ SYM_CODE_END(startup_normal) #include "head_kdump.S" -# -# This program check is active immediately after kernel start -# and until early_pgm_check_handler is set in kernel/early.c -# It simply saves general/control registers and psw in -# the save area and does disabled wait with a faulty address. -# SYM_CODE_START_LOCAL(startup_pgm_check_handler) stmg %r8,%r15,__LC_SAVE_AREA la %r8,4095 @@ -311,8 +306,18 @@ SYM_CODE_START_LOCAL(startup_pgm_check_handler) oi __LC_RETURN_PSW+1,0x2 # set wait state bit larl %r9,.Lold_psw_disabled_wait stg %r9,__LC_PGM_NEW_PSW+8 - larl %r15,_dump_info_stack_end-STACK_FRAME_OVERHEAD - brasl %r14,print_pgm_check_info + larl %r15,_dump_info_stack_end-(STACK_FRAME_OVERHEAD+__PT_SIZE) + la %r2,STACK_FRAME_OVERHEAD(%r15) + mvc __PT_PSW(16,%r2),__LC_PSW_SAVE_AREA-4095(%r8) + mvc __PT_R0(128,%r2),__LC_GPREGS_SAVE_AREA-4095(%r8) + mvc __PT_LAST_BREAK(8,%r2),__LC_PGM_LAST_BREAK + mvc __PT_INT_CODE(4,%r2),__LC_PGM_INT_CODE + brasl %r14,do_pgm_check + larl %r9,startup_pgm_check_handler + stg %r9,__LC_PGM_NEW_PSW+8 + mvc __LC_RETURN_PSW(16),STACK_FRAME_OVERHEAD+__PT_PSW(%r15) + lmg %r0,%r15,STACK_FRAME_OVERHEAD+__PT_R0(%r15) + lpswe __LC_RETURN_PSW .Lold_psw_disabled_wait: la %r8,4095 lmg %r0,%r15,__LC_GPREGS_SAVE_AREA-4095(%r8) diff --git a/arch/s390/boot/ipl_parm.c b/arch/s390/boot/ipl_parm.c index 557462e62cd7..f584d7da29cb 100644 --- a/arch/s390/boot/ipl_parm.c +++ b/arch/s390/boot/ipl_parm.c @@ -5,6 +5,7 @@ #include <linux/pgtable.h> #include <asm/abs_lowcore.h> #include <asm/page-states.h> +#include <asm/machine.h> #include <asm/ebcdic.h> #include <asm/sclp.h> #include <asm/sections.h> @@ -34,29 +35,14 @@ int vmalloc_size_set; static inline int __diag308(unsigned long subcode, void *addr) { - unsigned long reg1, reg2; - union register_pair r1; - psw_t old; - - r1.even = (unsigned long) addr; - r1.odd = 0; - asm volatile( - " mvc 0(16,%[psw_old]),0(%[psw_pgm])\n" - " epsw %[reg1],%[reg2]\n" - " st %[reg1],0(%[psw_pgm])\n" - " st %[reg2],4(%[psw_pgm])\n" - " larl %[reg1],1f\n" - " stg %[reg1],8(%[psw_pgm])\n" + union register_pair r1 = { .even = (unsigned long)addr, .odd = 0 }; + + asm_inline volatile( " diag %[r1],%[subcode],0x308\n" - "1: mvc 0(16,%[psw_pgm]),0(%[psw_old])\n" - : [r1] "+&d" (r1.pair), - [reg1] "=&d" (reg1), - [reg2] "=&a" (reg2), - "+Q" (get_lowcore()->program_new_psw), - "=Q" (old) - : [subcode] "d" (subcode), - [psw_old] "a" (&old), - [psw_pgm] "a" (&get_lowcore()->program_new_psw) + "0:\n" + EX_TABLE(0b, 0b) + : [r1] "+d" (r1.pair) + : [subcode] "d" (subcode) : "cc", "memory"); return r1.odd; } @@ -193,7 +179,7 @@ void setup_boot_command_line(void) if (has_ebcdic_char(parmarea.command_line)) EBCASC(parmarea.command_line, COMMAND_LINE_SIZE); /* copy arch command line */ - strcpy(early_command_line, strim(parmarea.command_line)); + strscpy(early_command_line, strim(parmarea.command_line)); /* append IPL PARM data to the boot command line */ if (!is_prot_virt_guest() && ipl_block_valid) @@ -215,7 +201,7 @@ static void check_cleared_facilities(void) for (i = 0; i < ARRAY_SIZE(als); i++) { if ((stfle_fac_list[i] & als[i]) != als[i]) { - boot_printk("Warning: The Linux kernel requires facilities cleared via command line option\n"); + boot_emerg("The Linux kernel requires facilities cleared via command line option\n"); print_missing_facilities(); break; } @@ -267,7 +253,8 @@ void parse_boot_command_line(void) int rc; __kaslr_enabled = IS_ENABLED(CONFIG_RANDOMIZE_BASE); - args = strcpy(command_line_buf, early_command_line); + strscpy(command_line_buf, early_command_line); + args = command_line_buf; while (*args) { args = next_arg(args, ¶m, &val); @@ -295,6 +282,9 @@ void parse_boot_command_line(void) if (!strcmp(param, "facilities") && val) modify_fac_list(val); + if (!strcmp(param, "debug-alternative")) + alt_debug_setup(val); + if (!strcmp(param, "nokaslr")) __kaslr_enabled = 0; @@ -312,6 +302,24 @@ void parse_boot_command_line(void) } #endif if (!strcmp(param, "relocate_lowcore") && test_facility(193)) - relocate_lowcore = 1; + set_machine_feature(MFEATURE_LOWCORE); + if (!strcmp(param, "earlyprintk")) + boot_earlyprintk = true; + if (!strcmp(param, "debug")) + boot_console_loglevel = CONSOLE_LOGLEVEL_DEBUG; + if (!strcmp(param, "bootdebug")) { + bootdebug = true; + if (val) + strscpy(bootdebug_filter, val); + } + if (!strcmp(param, "quiet")) + boot_console_loglevel = CONSOLE_LOGLEVEL_QUIET; + if (!strcmp(param, "ignore_loglevel")) + boot_ignore_loglevel = true; + if (!strcmp(param, "loglevel")) { + boot_console_loglevel = simple_strtoull(val, NULL, 10); + if (boot_console_loglevel < CONSOLE_LOGLEVEL_MIN) + boot_console_loglevel = CONSOLE_LOGLEVEL_MIN; + } } } diff --git a/arch/s390/boot/ipl_report.c b/arch/s390/boot/ipl_report.c index d00898852a88..f73cd757a5f7 100644 --- a/arch/s390/boot/ipl_report.c +++ b/arch/s390/boot/ipl_report.c @@ -30,7 +30,6 @@ static unsigned long get_cert_comp_list_size(void) { struct ipl_rb_certificate_entry *cert; struct ipl_rb_component_entry *comp; - size_t size; /* * Find the length for the IPL report boot data @@ -155,7 +154,7 @@ void save_ipl_cert_comp_list(void) return; size = get_cert_comp_list_size(); - early_ipl_comp_list_addr = physmem_alloc_top_down(RR_CERT_COMP_LIST, size, sizeof(int)); + early_ipl_comp_list_addr = physmem_alloc_or_die(RR_CERT_COMP_LIST, size, sizeof(int)); ipl_cert_list_addr = early_ipl_comp_list_addr + early_ipl_comp_list_size; copy_components_bootdata(); diff --git a/arch/s390/boot/kaslr.c b/arch/s390/boot/kaslr.c index f864d2bff775..941f4c9e27cc 100644 --- a/arch/s390/boot/kaslr.c +++ b/arch/s390/boot/kaslr.c @@ -32,7 +32,7 @@ struct prng_parm { static int check_prng(void) { if (!cpacf_query_func(CPACF_KMC, CPACF_KMC_PRNG)) { - boot_printk("KASLR disabled: CPU has no PRNG\n"); + boot_warn("KASLR disabled: CPU has no PRNG\n"); return 0; } if (cpacf_query_func(CPACF_PRNO, CPACF_PRNO_TRNG)) @@ -168,7 +168,7 @@ static unsigned long iterate_valid_positions(unsigned long size, unsigned long a * cannot have chains. * * On the other hand, "dynamic" or "repetitive" allocations are done via - * physmem_alloc_top_down(). These allocations are tightly packed together + * physmem_alloc_or_die(). These allocations are tightly packed together * top down from the end of online memory. physmem_alloc_pos represents * current position where those allocations start. * diff --git a/arch/s390/boot/pgm_check.c b/arch/s390/boot/pgm_check.c new file mode 100644 index 000000000000..fa621fa5bc02 --- /dev/null +++ b/arch/s390/boot/pgm_check.c @@ -0,0 +1,92 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/kernel.h> +#include <linux/stdarg.h> +#include <linux/string.h> +#include <linux/ctype.h> +#include <asm/stacktrace.h> +#include <asm/boot_data.h> +#include <asm/lowcore.h> +#include <asm/setup.h> +#include <asm/sclp.h> +#include <asm/uv.h> +#include "boot.h" + +void print_stacktrace(unsigned long sp) +{ + struct stack_info boot_stack = { STACK_TYPE_TASK, (unsigned long)_stack_start, + (unsigned long)_stack_end }; + bool first = true; + + boot_emerg("Call Trace:\n"); + while (!(sp & 0x7) && on_stack(&boot_stack, sp, sizeof(struct stack_frame))) { + struct stack_frame *sf = (struct stack_frame *)sp; + + if (first) + boot_emerg("(sp:%016lx [<%016lx>] %pS)\n", sp, sf->gprs[8], (void *)sf->gprs[8]); + else + boot_emerg(" sp:%016lx [<%016lx>] %pS\n", sp, sf->gprs[8], (void *)sf->gprs[8]); + if (sf->back_chain <= sp) + break; + sp = sf->back_chain; + first = false; + } +} + +extern struct exception_table_entry __start___ex_table[]; +extern struct exception_table_entry __stop___ex_table[]; + +static inline unsigned long extable_insn(const struct exception_table_entry *x) +{ + return (unsigned long)&x->insn + x->insn; +} + +static bool ex_handler(struct pt_regs *regs) +{ + const struct exception_table_entry *ex; + + for (ex = __start___ex_table; ex < __stop___ex_table; ex++) { + if (extable_insn(ex) != regs->psw.addr) + continue; + if (ex->type != EX_TYPE_FIXUP) + return false; + regs->psw.addr = extable_fixup(ex); + return true; + } + return false; +} + +void do_pgm_check(struct pt_regs *regs) +{ + struct psw_bits *psw = &psw_bits(regs->psw); + unsigned long *gpregs = regs->gprs; + + if (ex_handler(regs)) + return; + if (bootdebug) + boot_rb_dump(); + boot_emerg("Linux version %s\n", kernel_version); + if (!is_prot_virt_guest() && early_command_line[0]) + boot_emerg("Kernel command line: %s\n", early_command_line); + boot_emerg("Kernel fault: interruption code %04x ilc:%d\n", + regs->int_code & 0xffff, regs->int_code >> 17); + if (kaslr_enabled()) { + boot_emerg("Kernel random base: %lx\n", __kaslr_offset); + boot_emerg("Kernel random base phys: %lx\n", __kaslr_offset_phys); + } + boot_emerg("PSW : %016lx %016lx (%pS)\n", + regs->psw.mask, regs->psw.addr, (void *)regs->psw.addr); + boot_emerg(" R:%x T:%x IO:%x EX:%x Key:%x M:%x W:%x P:%x AS:%x CC:%x PM:%x RI:%x EA:%x\n", + psw->per, psw->dat, psw->io, psw->ext, psw->key, psw->mcheck, + psw->wait, psw->pstate, psw->as, psw->cc, psw->pm, psw->ri, psw->eaba); + boot_emerg("GPRS: %016lx %016lx %016lx %016lx\n", gpregs[0], gpregs[1], gpregs[2], gpregs[3]); + boot_emerg(" %016lx %016lx %016lx %016lx\n", gpregs[4], gpregs[5], gpregs[6], gpregs[7]); + boot_emerg(" %016lx %016lx %016lx %016lx\n", gpregs[8], gpregs[9], gpregs[10], gpregs[11]); + boot_emerg(" %016lx %016lx %016lx %016lx\n", gpregs[12], gpregs[13], gpregs[14], gpregs[15]); + print_stacktrace(gpregs[15]); + boot_emerg("Last Breaking-Event-Address:\n"); + boot_emerg(" [<%016lx>] %pS\n", regs->last_break, (void *)regs->last_break); + /* Convert to disabled wait PSW */ + psw->io = 0; + psw->ext = 0; + psw->wait = 1; +} diff --git a/arch/s390/boot/pgm_check_info.c b/arch/s390/boot/pgm_check_info.c deleted file mode 100644 index 5abe59fb3bc0..000000000000 --- a/arch/s390/boot/pgm_check_info.c +++ /dev/null @@ -1,65 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -#include <linux/kernel.h> -#include <linux/stdarg.h> -#include <linux/string.h> -#include <linux/ctype.h> -#include <asm/stacktrace.h> -#include <asm/boot_data.h> -#include <asm/lowcore.h> -#include <asm/setup.h> -#include <asm/sclp.h> -#include <asm/uv.h> -#include "boot.h" - -void print_stacktrace(unsigned long sp) -{ - struct stack_info boot_stack = { STACK_TYPE_TASK, (unsigned long)_stack_start, - (unsigned long)_stack_end }; - bool first = true; - - boot_printk("Call Trace:\n"); - while (!(sp & 0x7) && on_stack(&boot_stack, sp, sizeof(struct stack_frame))) { - struct stack_frame *sf = (struct stack_frame *)sp; - - boot_printk(first ? "(sp:%016lx [<%016lx>] %pS)\n" : - " sp:%016lx [<%016lx>] %pS\n", - sp, sf->gprs[8], (void *)sf->gprs[8]); - if (sf->back_chain <= sp) - break; - sp = sf->back_chain; - first = false; - } -} - -void print_pgm_check_info(void) -{ - unsigned long *gpregs = (unsigned long *)get_lowcore()->gpregs_save_area; - struct psw_bits *psw = &psw_bits(get_lowcore()->psw_save_area); - - boot_printk("Linux version %s\n", kernel_version); - if (!is_prot_virt_guest() && early_command_line[0]) - boot_printk("Kernel command line: %s\n", early_command_line); - boot_printk("Kernel fault: interruption code %04x ilc:%x\n", - get_lowcore()->pgm_code, get_lowcore()->pgm_ilc >> 1); - if (kaslr_enabled()) { - boot_printk("Kernel random base: %lx\n", __kaslr_offset); - boot_printk("Kernel random base phys: %lx\n", __kaslr_offset_phys); - } - boot_printk("PSW : %016lx %016lx (%pS)\n", - get_lowcore()->psw_save_area.mask, - get_lowcore()->psw_save_area.addr, - (void *)get_lowcore()->psw_save_area.addr); - boot_printk( - " R:%x T:%x IO:%x EX:%x Key:%x M:%x W:%x P:%x AS:%x CC:%x PM:%x RI:%x EA:%x\n", - psw->per, psw->dat, psw->io, psw->ext, psw->key, psw->mcheck, - psw->wait, psw->pstate, psw->as, psw->cc, psw->pm, psw->ri, - psw->eaba); - boot_printk("GPRS: %016lx %016lx %016lx %016lx\n", gpregs[0], gpregs[1], gpregs[2], gpregs[3]); - boot_printk(" %016lx %016lx %016lx %016lx\n", gpregs[4], gpregs[5], gpregs[6], gpregs[7]); - boot_printk(" %016lx %016lx %016lx %016lx\n", gpregs[8], gpregs[9], gpregs[10], gpregs[11]); - boot_printk(" %016lx %016lx %016lx %016lx\n", gpregs[12], gpregs[13], gpregs[14], gpregs[15]); - print_stacktrace(get_lowcore()->gpregs_save_area[15]); - boot_printk("Last Breaking-Event-Address:\n"); - boot_printk(" [<%016lx>] %pS\n", (unsigned long)get_lowcore()->pgm_last_break, - (void *)get_lowcore()->pgm_last_break); -} diff --git a/arch/s390/boot/physmem_info.c b/arch/s390/boot/physmem_info.c index 7617aa2d2f7e..45e3d057cfaa 100644 --- a/arch/s390/boot/physmem_info.c +++ b/arch/s390/boot/physmem_info.c @@ -1,4 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 +#define boot_fmt(fmt) "physmem: " fmt #include <linux/processor.h> #include <linux/errno.h> #include <linux/init.h> @@ -28,7 +29,7 @@ static struct physmem_range *__get_physmem_range_ptr(u32 n) return &physmem_info.online[n]; if (unlikely(!physmem_info.online_extended)) { physmem_info.online_extended = (struct physmem_range *)physmem_alloc_range( - RR_MEM_DETECT_EXTENDED, ENTRIES_EXTENDED_MAX, sizeof(long), 0, + RR_MEM_DETECT_EXT, ENTRIES_EXTENDED_MAX, sizeof(long), 0, physmem_alloc_pos, true); } return &physmem_info.online_extended[n - MEM_INLINED_ENTRIES]; @@ -58,36 +59,22 @@ void add_physmem_online_range(u64 start, u64 end) static int __diag260(unsigned long rx1, unsigned long rx2) { - unsigned long reg1, reg2, ry; union register_pair rx; int cc, exception; - psw_t old; + unsigned long ry; rx.even = rx1; rx.odd = rx2; ry = 0x10; /* storage configuration */ exception = 1; - asm volatile( - " mvc 0(16,%[psw_old]),0(%[psw_pgm])\n" - " epsw %[reg1],%[reg2]\n" - " st %[reg1],0(%[psw_pgm])\n" - " st %[reg2],4(%[psw_pgm])\n" - " larl %[reg1],1f\n" - " stg %[reg1],8(%[psw_pgm])\n" + asm_inline volatile( " diag %[rx],%[ry],0x260\n" - " lhi %[exc],0\n" - "1: mvc 0(16,%[psw_pgm]),0(%[psw_old])\n" + "0: lhi %[exc],0\n" + "1:\n" CC_IPM(cc) - : CC_OUT(cc, cc), - [exc] "+d" (exception), - [reg1] "=&d" (reg1), - [reg2] "=&a" (reg2), - [ry] "+&d" (ry), - "+Q" (get_lowcore()->program_new_psw), - "=Q" (old) - : [rx] "d" (rx.pair), - [psw_old] "a" (&old), - [psw_pgm] "a" (&get_lowcore()->program_new_psw) + EX_TABLE(0b, 1b) + : CC_OUT(cc, cc), [exc] "+d" (exception), [ry] "+d" (ry) + : [rx] "d" (rx.pair) : CC_CLOBBER_LIST("memory")); cc = exception ? -1 : CC_TRANSFORM(cc); return cc == 0 ? ry : -1; @@ -117,29 +104,15 @@ static int diag260(void) static int diag500_storage_limit(unsigned long *max_physmem_end) { unsigned long storage_limit; - unsigned long reg1, reg2; - psw_t old; - - asm volatile( - " mvc 0(16,%[psw_old]),0(%[psw_pgm])\n" - " epsw %[reg1],%[reg2]\n" - " st %[reg1],0(%[psw_pgm])\n" - " st %[reg2],4(%[psw_pgm])\n" - " larl %[reg1],1f\n" - " stg %[reg1],8(%[psw_pgm])\n" - " lghi 1,%[subcode]\n" - " lghi 2,0\n" - " diag 2,4,0x500\n" - "1: mvc 0(16,%[psw_pgm]),0(%[psw_old])\n" - " lgr %[slimit],2\n" - : [reg1] "=&d" (reg1), - [reg2] "=&a" (reg2), - [slimit] "=d" (storage_limit), - "=Q" (get_lowcore()->program_new_psw), - "=Q" (old) - : [psw_old] "a" (&old), - [psw_pgm] "a" (&get_lowcore()->program_new_psw), - [subcode] "i" (DIAG500_SC_STOR_LIMIT) + + asm_inline volatile( + " lghi %%r1,%[subcode]\n" + " lghi %%r2,0\n" + " diag %%r2,%%r4,0x500\n" + "0: lgr %[slimit],%%r2\n" + EX_TABLE(0b, 0b) + : [slimit] "=d" (storage_limit) + : [subcode] "i" (DIAG500_SC_STOR_LIMIT) : "memory", "1", "2"); if (!storage_limit) return -EINVAL; @@ -150,31 +123,17 @@ static int diag500_storage_limit(unsigned long *max_physmem_end) static int tprot(unsigned long addr) { - unsigned long reg1, reg2; int cc, exception; - psw_t old; exception = 1; - asm volatile( - " mvc 0(16,%[psw_old]),0(%[psw_pgm])\n" - " epsw %[reg1],%[reg2]\n" - " st %[reg1],0(%[psw_pgm])\n" - " st %[reg2],4(%[psw_pgm])\n" - " larl %[reg1],1f\n" - " stg %[reg1],8(%[psw_pgm])\n" + asm_inline volatile( " tprot 0(%[addr]),0\n" - " lhi %[exc],0\n" - "1: mvc 0(16,%[psw_pgm]),0(%[psw_old])\n" + "0: lhi %[exc],0\n" + "1:\n" CC_IPM(cc) - : CC_OUT(cc, cc), - [exc] "+d" (exception), - [reg1] "=&d" (reg1), - [reg2] "=&a" (reg2), - "=Q" (get_lowcore()->program_new_psw.addr), - "=Q" (old) - : [psw_old] "a" (&old), - [psw_pgm] "a" (&get_lowcore()->program_new_psw), - [addr] "a" (addr) + EX_TABLE(0b, 1b) + : CC_OUT(cc, cc), [exc] "+d" (exception) + : [addr] "a" (addr) : CC_CLOBBER_LIST("memory")); cc = exception ? -EFAULT : CC_TRANSFORM(cc); return cc; @@ -207,11 +166,16 @@ unsigned long detect_max_physmem_end(void) max_physmem_end = search_mem_end(); physmem_info.info_source = MEM_DETECT_BIN_SEARCH; } + boot_debug("Max physical memory: 0x%016lx (info source: %s)\n", max_physmem_end, + get_physmem_info_source()); return max_physmem_end; } void detect_physmem_online_ranges(unsigned long max_physmem_end) { + unsigned long start, end; + int i; + if (!sclp_early_read_storage_info()) { physmem_info.info_source = MEM_DETECT_SCLP_STOR_INFO; } else if (physmem_info.info_source == MEM_DETECT_DIAG500_STOR_LIMIT) { @@ -226,12 +190,16 @@ void detect_physmem_online_ranges(unsigned long max_physmem_end) } else if (max_physmem_end) { add_physmem_online_range(0, max_physmem_end); } + boot_debug("Online memory ranges (info source: %s):\n", get_physmem_info_source()); + for_each_physmem_online_range(i, &start, &end) + boot_debug(" online [%d]: 0x%016lx-0x%016lx\n", i, start, end); } void physmem_set_usable_limit(unsigned long limit) { physmem_info.usable = limit; physmem_alloc_pos = limit; + boot_debug("Usable memory limit: 0x%016lx\n", limit); } static void die_oom(unsigned long size, unsigned long align, unsigned long min, unsigned long max) @@ -241,38 +209,47 @@ static void die_oom(unsigned long size, unsigned long align, unsigned long min, enum reserved_range_type t; int i; - boot_printk("Linux version %s\n", kernel_version); + boot_emerg("Linux version %s\n", kernel_version); if (!is_prot_virt_guest() && early_command_line[0]) - boot_printk("Kernel command line: %s\n", early_command_line); - boot_printk("Out of memory allocating %lx bytes %lx aligned in range %lx:%lx\n", - size, align, min, max); - boot_printk("Reserved memory ranges:\n"); + boot_emerg("Kernel command line: %s\n", early_command_line); + boot_emerg("Out of memory allocating %lu bytes 0x%lx aligned in range %lx:%lx\n", + size, align, min, max); + boot_emerg("Reserved memory ranges:\n"); for_each_physmem_reserved_range(t, range, &start, &end) { - boot_printk("%016lx %016lx %s\n", start, end, get_rr_type_name(t)); + boot_emerg("%016lx %016lx %s\n", start, end, get_rr_type_name(t)); total_reserved_mem += end - start; } - boot_printk("Usable online memory ranges (info source: %s [%x]):\n", - get_physmem_info_source(), physmem_info.info_source); + boot_emerg("Usable online memory ranges (info source: %s [%d]):\n", + get_physmem_info_source(), physmem_info.info_source); for_each_physmem_usable_range(i, &start, &end) { - boot_printk("%016lx %016lx\n", start, end); + boot_emerg("%016lx %016lx\n", start, end); total_mem += end - start; } - boot_printk("Usable online memory total: %lx Reserved: %lx Free: %lx\n", - total_mem, total_reserved_mem, - total_mem > total_reserved_mem ? total_mem - total_reserved_mem : 0); + boot_emerg("Usable online memory total: %lu Reserved: %lu Free: %lu\n", + total_mem, total_reserved_mem, + total_mem > total_reserved_mem ? total_mem - total_reserved_mem : 0); print_stacktrace(current_frame_address()); - boot_printk("\n\n -- System halted\n"); + boot_emerg(" -- System halted\n"); disabled_wait(); } -void physmem_reserve(enum reserved_range_type type, unsigned long addr, unsigned long size) +static void _physmem_reserve(enum reserved_range_type type, unsigned long addr, unsigned long size) { physmem_info.reserved[type].start = addr; physmem_info.reserved[type].end = addr + size; } +void physmem_reserve(enum reserved_range_type type, unsigned long addr, unsigned long size) +{ + _physmem_reserve(type, addr, size); + boot_debug("%-14s 0x%016lx-0x%016lx %s\n", "Reserve:", addr, addr + size, + get_rr_type_name(type)); +} + void physmem_free(enum reserved_range_type type) { + boot_debug("%-14s 0x%016lx-0x%016lx %s\n", "Free:", physmem_info.reserved[type].start, + physmem_info.reserved[type].end, get_rr_type_name(type)); physmem_info.reserved[type].start = 0; physmem_info.reserved[type].end = 0; } @@ -339,41 +316,73 @@ unsigned long physmem_alloc_range(enum reserved_range_type type, unsigned long s max = min(max, physmem_alloc_pos); addr = __physmem_alloc_range(size, align, min, max, 0, NULL, die_on_oom); if (addr) - physmem_reserve(type, addr, size); + _physmem_reserve(type, addr, size); + boot_debug("%-14s 0x%016lx-0x%016lx %s\n", "Alloc range:", addr, addr + size, + get_rr_type_name(type)); return addr; } -unsigned long physmem_alloc_top_down(enum reserved_range_type type, unsigned long size, - unsigned long align) +unsigned long physmem_alloc(enum reserved_range_type type, unsigned long size, + unsigned long align, bool die_on_oom) { struct reserved_range *range = &physmem_info.reserved[type]; - struct reserved_range *new_range; + struct reserved_range *new_range = NULL; unsigned int ranges_left; unsigned long addr; addr = __physmem_alloc_range(size, align, 0, physmem_alloc_pos, physmem_alloc_ranges, - &ranges_left, true); + &ranges_left, die_on_oom); + if (!addr) + return 0; /* if not a consecutive allocation of the same type or first allocation */ if (range->start != addr + size) { if (range->end) { - physmem_alloc_pos = __physmem_alloc_range( - sizeof(struct reserved_range), 0, 0, physmem_alloc_pos, - physmem_alloc_ranges, &ranges_left, true); - new_range = (struct reserved_range *)physmem_alloc_pos; + addr = __physmem_alloc_range(sizeof(struct reserved_range), 0, 0, + physmem_alloc_pos, physmem_alloc_ranges, + &ranges_left, true); + new_range = (struct reserved_range *)addr; + addr = __physmem_alloc_range(size, align, 0, addr, ranges_left, + &ranges_left, die_on_oom); + if (!addr) + return 0; *new_range = *range; range->chain = new_range; - addr = __physmem_alloc_range(size, align, 0, physmem_alloc_pos, - ranges_left, &ranges_left, true); } range->end = addr + size; } + if (type != RR_VMEM) { + boot_debug("%-14s 0x%016lx-0x%016lx %-20s align 0x%lx split %d\n", "Alloc topdown:", + addr, addr + size, get_rr_type_name(type), align, !!new_range); + } range->start = addr; physmem_alloc_pos = addr; physmem_alloc_ranges = ranges_left; return addr; } +unsigned long physmem_alloc_or_die(enum reserved_range_type type, unsigned long size, + unsigned long align) +{ + return physmem_alloc(type, size, align, true); +} + unsigned long get_physmem_alloc_pos(void) { return physmem_alloc_pos; } + +void dump_physmem_reserved(void) +{ + struct reserved_range *range; + enum reserved_range_type t; + unsigned long start, end; + + boot_debug("Reserved memory ranges:\n"); + for_each_physmem_reserved_range(t, range, &start, &end) { + if (end) { + boot_debug("%-14s 0x%016lx-0x%016lx @%012lx chain %012lx\n", + get_rr_type_name(t), start, end, (unsigned long)range, + (unsigned long)range->chain); + } + } +} diff --git a/arch/s390/boot/printk.c b/arch/s390/boot/printk.c index 35f18f2b936e..4bb6bc95704e 100644 --- a/arch/s390/boot/printk.c +++ b/arch/s390/boot/printk.c @@ -5,21 +5,113 @@ #include <linux/ctype.h> #include <asm/stacktrace.h> #include <asm/boot_data.h> +#include <asm/sections.h> #include <asm/lowcore.h> #include <asm/setup.h> +#include <asm/timex.h> #include <asm/sclp.h> #include <asm/uv.h> #include "boot.h" +int boot_console_loglevel = CONFIG_CONSOLE_LOGLEVEL_DEFAULT; +bool boot_ignore_loglevel; +char __bootdata(boot_rb)[PAGE_SIZE * 2]; +bool __bootdata(boot_earlyprintk); +size_t __bootdata(boot_rb_off); +char __bootdata(bootdebug_filter)[128]; +bool __bootdata(bootdebug); + +static void boot_rb_add(const char *str, size_t len) +{ + /* leave double '\0' in the end */ + size_t avail = sizeof(boot_rb) - boot_rb_off - 1; + + /* store strings separated by '\0' */ + if (len + 1 > avail) + boot_rb_off = 0; + avail = sizeof(boot_rb) - boot_rb_off - 1; + strscpy(boot_rb + boot_rb_off, str, avail); + boot_rb_off += len + 1; +} + +static void print_rb_entry(const char *str) +{ + sclp_early_printk(printk_skip_level(str)); +} + +static bool debug_messages_printed(void) +{ + return boot_earlyprintk && (boot_ignore_loglevel || boot_console_loglevel > LOGLEVEL_DEBUG); +} + +void boot_rb_dump(void) +{ + if (debug_messages_printed()) + return; + sclp_early_printk("Boot messages ring buffer:\n"); + boot_rb_foreach(print_rb_entry); +} + const char hex_asc[] = "0123456789abcdef"; static char *as_hex(char *dst, unsigned long val, int pad) { - char *p, *end = p = dst + max(pad, (int)__fls(val | 1) / 4 + 1); + char *p = dst + max(pad, (int)__fls(val | 1) / 4 + 1); - for (*p-- = 0; p >= dst; val >>= 4) + for (*p-- = '\0'; p >= dst; val >>= 4) *p-- = hex_asc[val & 0x0f]; - return end; + return dst; +} + +#define MAX_NUMLEN 21 +static char *as_dec(char *buf, unsigned long val, bool is_signed) +{ + bool negative = false; + char *p = buf + MAX_NUMLEN; + + if (is_signed && (long)val < 0) { + val = (val == LONG_MIN ? LONG_MIN : -(long)val); + negative = true; + } + + *--p = '\0'; + do { + *--p = '0' + (val % 10); + val /= 10; + } while (val); + + if (negative) + *--p = '-'; + return p; +} + +static ssize_t strpad(char *dst, size_t dst_size, const char *src, + int _pad, bool zero_pad, bool decimal) +{ + ssize_t len = strlen(src), pad = _pad; + char *p = dst; + + if (max(len, abs(pad)) >= dst_size) + return -E2BIG; + + if (pad > len) { + if (decimal && zero_pad && *src == '-') { + *p++ = '-'; + src++; + len--; + pad--; + } + memset(p, zero_pad ? '0' : ' ', pad - len); + p += pad - len; + } + memcpy(p, src, len); + p += len; + if (pad < 0 && -pad > len) { + memset(p, ' ', -pad - len); + p += -pad - len; + } + *p = '\0'; + return p - dst; } static char *symstart(char *p) @@ -58,35 +150,93 @@ static noinline char *findsym(unsigned long ip, unsigned short *off, unsigned sh return NULL; } -static noinline char *strsym(void *ip) +#define MAX_SYMLEN 64 +static noinline char *strsym(char *buf, void *ip) { - static char buf[64]; unsigned short off; unsigned short len; char *p; p = findsym((unsigned long)ip, &off, &len); if (p) { - strncpy(buf, p, sizeof(buf)); + strscpy(buf, p, MAX_SYMLEN); /* reserve 15 bytes for offset/len in symbol+0x1234/0x1234 */ - p = buf + strnlen(buf, sizeof(buf) - 15); - strcpy(p, "+0x"); - p = as_hex(p + 3, off, 0); - strcpy(p, "/0x"); - as_hex(p + 3, len, 0); + p = buf + strnlen(buf, MAX_SYMLEN - 15); + strscpy(p, "+0x", MAX_SYMLEN - (p - buf)); + as_hex(p + 3, off, 0); + strcat(p, "/0x"); + as_hex(p + strlen(p), len, 0); } else { as_hex(buf, (unsigned long)ip, 16); } return buf; } -void boot_printk(const char *fmt, ...) +static inline int printk_loglevel(const char *buf) +{ + if (buf[0] == KERN_SOH_ASCII && buf[1]) { + switch (buf[1]) { + case '0' ... '7': + return buf[1] - '0'; + } + } + return MESSAGE_LOGLEVEL_DEFAULT; +} + +static void boot_console_earlyprintk(const char *buf) +{ + int level = printk_loglevel(buf); + + /* always print emergency messages */ + if (level > LOGLEVEL_EMERG && !boot_earlyprintk) + return; + buf = printk_skip_level(buf); + /* print debug messages only when bootdebug is enabled */ + if (level == LOGLEVEL_DEBUG && (!bootdebug || !bootdebug_filter_match(skip_timestamp(buf)))) + return; + if (boot_ignore_loglevel || level < boot_console_loglevel) + sclp_early_printk(buf); +} + +static char *add_timestamp(char *buf) +{ +#ifdef CONFIG_PRINTK_TIME + unsigned long ns = tod_to_ns(__get_tod_clock_monotonic()); + char ts[MAX_NUMLEN]; + + *buf++ = '['; + buf += strpad(buf, MAX_NUMLEN, as_dec(ts, ns / NSEC_PER_SEC, 0), 5, 0, 0); + *buf++ = '.'; + buf += strpad(buf, MAX_NUMLEN, as_dec(ts, (ns % NSEC_PER_SEC) / NSEC_PER_USEC, 0), 6, 1, 0); + *buf++ = ']'; + *buf++ = ' '; +#endif + return buf; +} + +#define va_arg_len_type(args, lenmod, typemod) \ + ((lenmod == 'l') ? va_arg(args, typemod long) : \ + (lenmod == 'h') ? (typemod short)va_arg(args, typemod int) : \ + (lenmod == 'H') ? (typemod char)va_arg(args, typemod int) : \ + (lenmod == 'z') ? va_arg(args, typemod long) : \ + va_arg(args, typemod int)) + +int boot_printk(const char *fmt, ...) { char buf[1024] = { 0 }; char *end = buf + sizeof(buf) - 1; /* make sure buf is 0 terminated */ - unsigned long pad; - char *p = buf; + bool zero_pad, decimal; + char *strval, *p = buf; + char valbuf[MAX(MAX_SYMLEN, MAX_NUMLEN)]; va_list args; + char lenmod; + ssize_t len; + int pad; + + *p++ = KERN_SOH_ASCII; + *p++ = printk_get_level(fmt) ?: '0' + MESSAGE_LOGLEVEL_DEFAULT; + p = add_timestamp(p); + fmt = printk_skip_level(fmt); va_start(args, fmt); for (; p < end && *fmt; fmt++) { @@ -94,31 +244,56 @@ void boot_printk(const char *fmt, ...) *p++ = *fmt; continue; } - pad = isdigit(*++fmt) ? simple_strtol(fmt, (char **)&fmt, 10) : 0; + if (*++fmt == '%') { + *p++ = '%'; + continue; + } + zero_pad = (*fmt == '0'); + pad = simple_strtol(fmt, (char **)&fmt, 10); + lenmod = (*fmt == 'h' || *fmt == 'l' || *fmt == 'z') ? *fmt++ : 0; + if (lenmod == 'h' && *fmt == 'h') { + lenmod = 'H'; + fmt++; + } + decimal = false; switch (*fmt) { case 's': - p = buf + strlcat(buf, va_arg(args, char *), sizeof(buf)); + if (lenmod) + goto out; + strval = va_arg(args, char *); + zero_pad = false; break; case 'p': - if (*++fmt != 'S') + if (*++fmt != 'S' || lenmod) goto out; - p = buf + strlcat(buf, strsym(va_arg(args, void *)), sizeof(buf)); + strval = strsym(valbuf, va_arg(args, void *)); + zero_pad = false; break; - case 'l': - if (*++fmt != 'x' || end - p <= max(sizeof(long) * 2, pad)) - goto out; - p = as_hex(p, va_arg(args, unsigned long), pad); + case 'd': + case 'i': + strval = as_dec(valbuf, va_arg_len_type(args, lenmod, signed), 1); + decimal = true; + break; + case 'u': + strval = as_dec(valbuf, va_arg_len_type(args, lenmod, unsigned), 0); break; case 'x': - if (end - p <= max(sizeof(int) * 2, pad)) - goto out; - p = as_hex(p, va_arg(args, unsigned int), pad); + strval = as_hex(valbuf, va_arg_len_type(args, lenmod, unsigned), 0); break; default: goto out; } + len = strpad(p, end - p, strval, pad, zero_pad, decimal); + if (len == -E2BIG) + break; + p += len; } out: va_end(args); - sclp_early_printk(buf); + len = strlen(buf); + if (len) { + boot_rb_add(buf, len); + boot_console_earlyprintk(buf); + } + return len; } diff --git a/arch/s390/boot/startup.c b/arch/s390/boot/startup.c index 6087d38c7235..da8337e63a3e 100644 --- a/arch/s390/boot/startup.c +++ b/arch/s390/boot/startup.c @@ -1,13 +1,18 @@ // SPDX-License-Identifier: GPL-2.0 +#define boot_fmt(fmt) "startup: " fmt #include <linux/string.h> #include <linux/elf.h> #include <asm/page-states.h> #include <asm/boot_data.h> #include <asm/extmem.h> #include <asm/sections.h> +#include <asm/diag288.h> #include <asm/maccess.h> +#include <asm/machine.h> +#include <asm/sysinfo.h> #include <asm/cpu_mf.h> #include <asm/setup.h> +#include <asm/timex.h> #include <asm/kasan.h> #include <asm/kexec.h> #include <asm/sclp.h> @@ -30,57 +35,131 @@ unsigned long __bootdata_preserved(vmemmap_size); unsigned long __bootdata_preserved(MODULES_VADDR); unsigned long __bootdata_preserved(MODULES_END); unsigned long __bootdata_preserved(max_mappable); -int __bootdata_preserved(relocate_lowcore); +unsigned long __bootdata_preserved(page_noexec_mask); +unsigned long __bootdata_preserved(segment_noexec_mask); +unsigned long __bootdata_preserved(region_noexec_mask); +union tod_clock __bootdata_preserved(tod_clock_base); +u64 __bootdata_preserved(clock_comparator_max) = -1UL; u64 __bootdata_preserved(stfle_fac_list[16]); struct oldmem_data __bootdata_preserved(oldmem_data); -struct machine_info machine; - void error(char *x) { - boot_printk("\n\n%s\n\n -- System halted", x); + boot_emerg("%s\n", x); + boot_emerg(" -- System halted\n"); disabled_wait(); } +static char sysinfo_page[PAGE_SIZE] __aligned(PAGE_SIZE); + +static void detect_machine_type(void) +{ + struct sysinfo_3_2_2 *vmms = (struct sysinfo_3_2_2 *)&sysinfo_page; + + /* Check current-configuration-level */ + if (stsi(NULL, 0, 0, 0) <= 2) { + set_machine_feature(MFEATURE_LPAR); + return; + } + /* Get virtual-machine cpu information. */ + if (stsi(vmms, 3, 2, 2) || !vmms->count) + return; + /* Detect known hypervisors */ + if (!memcmp(vmms->vm[0].cpi, "\xd2\xe5\xd4", 3)) + set_machine_feature(MFEATURE_KVM); + else if (!memcmp(vmms->vm[0].cpi, "\xa9\x61\xe5\xd4", 4)) + set_machine_feature(MFEATURE_VM); +} + +static void detect_diag288(void) +{ + /* "BEGIN" in EBCDIC character set */ + static const char cmd[] = "\xc2\xc5\xc7\xc9\xd5"; + unsigned long action, len; + + action = machine_is_vm() ? (unsigned long)cmd : LPARWDT_RESTART; + len = machine_is_vm() ? sizeof(cmd) : 0; + if (__diag288(WDT_FUNC_INIT, MIN_INTERVAL, action, len)) + return; + __diag288(WDT_FUNC_CANCEL, 0, 0, 0); + set_machine_feature(MFEATURE_DIAG288); +} + +static void detect_diag9c(void) +{ + unsigned int cpu; + int rc = 1; + + cpu = stap(); + asm_inline volatile( + " diag %[cpu],%%r0,0x9c\n" + "0: lhi %[rc],0\n" + "1:\n" + EX_TABLE(0b, 1b) + : [rc] "+d" (rc) + : [cpu] "d" (cpu) + : "cc", "memory"); + if (!rc) + set_machine_feature(MFEATURE_DIAG9C); +} + +static void reset_tod_clock(void) +{ + union tod_clock clk; + + if (store_tod_clock_ext_cc(&clk) == 0) + return; + /* TOD clock not running. Set the clock to Unix Epoch. */ + if (set_tod_clock(TOD_UNIX_EPOCH) || store_tod_clock_ext_cc(&clk)) + disabled_wait(); + memset(&tod_clock_base, 0, sizeof(tod_clock_base)); + tod_clock_base.tod = TOD_UNIX_EPOCH; + get_lowcore()->last_update_clock = TOD_UNIX_EPOCH; +} + static void detect_facilities(void) { - if (test_facility(8)) { - machine.has_edat1 = 1; + if (cpu_has_edat1()) local_ctl_set_bit(0, CR0_EDAT_BIT); + page_noexec_mask = -1UL; + segment_noexec_mask = -1UL; + region_noexec_mask = -1UL; + if (!cpu_has_nx()) { + page_noexec_mask &= ~_PAGE_NOEXEC; + segment_noexec_mask &= ~_SEGMENT_ENTRY_NOEXEC; + region_noexec_mask &= ~_REGION_ENTRY_NOEXEC; + } + if (IS_ENABLED(CONFIG_PCI) && test_facility(153)) + set_machine_feature(MFEATURE_PCI_MIO); + reset_tod_clock(); + if (test_facility(139) && (tod_clock_base.tod >> 63)) { + /* Enable signed clock comparator comparisons */ + set_machine_feature(MFEATURE_SCC); + clock_comparator_max = -1UL >> 1; + local_ctl_set_bit(0, CR0_CLOCK_COMPARATOR_SIGN_BIT); } - if (test_facility(78)) - machine.has_edat2 = 1; - if (test_facility(130)) - machine.has_nx = 1; + if (test_facility(50) && test_facility(73)) { + set_machine_feature(MFEATURE_TX); + local_ctl_set_bit(0, CR0_TRANSACTIONAL_EXECUTION_BIT); + } + if (cpu_has_vx()) + local_ctl_set_bit(0, CR0_VECTOR_BIT); } static int cmma_test_essa(void) { - unsigned long reg1, reg2, tmp = 0; + unsigned long tmp = 0; int rc = 1; - psw_t old; /* Test ESSA_GET_STATE */ - asm volatile( - " mvc 0(16,%[psw_old]),0(%[psw_pgm])\n" - " epsw %[reg1],%[reg2]\n" - " st %[reg1],0(%[psw_pgm])\n" - " st %[reg2],4(%[psw_pgm])\n" - " larl %[reg1],1f\n" - " stg %[reg1],8(%[psw_pgm])\n" + asm_inline volatile( " .insn rrf,0xb9ab0000,%[tmp],%[tmp],%[cmd],0\n" - " la %[rc],0\n" - "1: mvc 0(16,%[psw_pgm]),0(%[psw_old])\n" - : [reg1] "=&d" (reg1), - [reg2] "=&a" (reg2), - [rc] "+&d" (rc), - [tmp] "=&d" (tmp), - "+Q" (get_lowcore()->program_new_psw), - "=Q" (old) - : [psw_old] "a" (&old), - [psw_pgm] "a" (&get_lowcore()->program_new_psw), - [cmd] "i" (ESSA_GET_STATE) + "0: lhi %[rc],0\n" + "1:\n" + EX_TABLE(0b, 1b) + : [rc] "+d" (rc), [tmp] "+d" (tmp) + : [cmd] "i" (ESSA_GET_STATE) : "cc", "memory"); return rc; } @@ -134,7 +213,7 @@ static void rescue_initrd(unsigned long min, unsigned long max) return; old_addr = addr; physmem_free(RR_INITRD); - addr = physmem_alloc_top_down(RR_INITRD, size, 0); + addr = physmem_alloc_or_die(RR_INITRD, size, 0); memmove((void *)addr, (void *)old_addr, size); } @@ -213,12 +292,16 @@ static void setup_ident_map_size(unsigned long max_physmem_end) if (oldmem_data.start) { __kaslr_enabled = 0; ident_map_size = min(ident_map_size, oldmem_data.size); + boot_debug("kdump memory limit: 0x%016lx\n", oldmem_data.size); } else if (ipl_block_valid && is_ipl_block_dump()) { __kaslr_enabled = 0; - if (!sclp_early_get_hsa_size(&hsa_size) && hsa_size) + if (!sclp_early_get_hsa_size(&hsa_size) && hsa_size) { ident_map_size = min(ident_map_size, hsa_size); + boot_debug("Stand-alone dump limit: 0x%016lx\n", hsa_size); + } } #endif + boot_debug("Identity map size: 0x%016lx\n", ident_map_size); } #define FIXMAP_SIZE round_up(MEMCPY_REAL_SIZE + ABS_LOWCORE_MAP_SIZE, sizeof(struct lowcore)) @@ -258,6 +341,7 @@ static unsigned long setup_kernel_memory_layout(unsigned long kernel_size) BUILD_BUG_ON(!IS_ALIGNED(__NO_KASLR_START_KERNEL, THREAD_SIZE)); BUILD_BUG_ON(__NO_KASLR_END_KERNEL > _REGION1_SIZE); vsize = get_vmem_size(ident_map_size, vmemmap_size, vmalloc_size, _REGION3_SIZE); + boot_debug("vmem size estimated: 0x%016lx\n", vsize); if (IS_ENABLED(CONFIG_KASAN) || __NO_KASLR_END_KERNEL > _REGION2_SIZE || (vsize > _REGION2_SIZE && kaslr_enabled())) { asce_limit = _REGION1_SIZE; @@ -281,8 +365,10 @@ static unsigned long setup_kernel_memory_layout(unsigned long kernel_size) * otherwise asce_limit and rte_size would have been adjusted. */ vmax = adjust_to_uv_max(asce_limit); + boot_debug("%d level paging 0x%016lx vmax\n", vmax == _REGION1_SIZE ? 4 : 3, vmax); #ifdef CONFIG_KASAN BUILD_BUG_ON(__NO_KASLR_END_KERNEL > KASAN_SHADOW_START); + boot_debug("KASAN shadow area: 0x%016lx-0x%016lx\n", KASAN_SHADOW_START, KASAN_SHADOW_END); /* force vmalloc and modules below kasan shadow */ vmax = min(vmax, KASAN_SHADOW_START); #endif @@ -296,19 +382,27 @@ static unsigned long setup_kernel_memory_layout(unsigned long kernel_size) pos = 0; kernel_end = vmax - pos * THREAD_SIZE; kernel_start = round_down(kernel_end - kernel_size, THREAD_SIZE); + boot_debug("Randomization range: 0x%016lx-0x%016lx\n", vmax - kaslr_len, vmax); + boot_debug("kernel image: 0x%016lx-0x%016lx (kaslr)\n", kernel_start, + kernel_size + kernel_size); } else if (vmax < __NO_KASLR_END_KERNEL || vsize > __NO_KASLR_END_KERNEL) { kernel_start = round_down(vmax - kernel_size, THREAD_SIZE); - boot_printk("The kernel base address is forced to %lx\n", kernel_start); + boot_debug("kernel image: 0x%016lx-0x%016lx (constrained)\n", kernel_start, + kernel_start + kernel_size); } else { kernel_start = __NO_KASLR_START_KERNEL; + boot_debug("kernel image: 0x%016lx-0x%016lx (nokaslr)\n", kernel_start, + kernel_start + kernel_size); } __kaslr_offset = kernel_start; + boot_debug("__kaslr_offset: 0x%016lx\n", __kaslr_offset); MODULES_END = round_down(kernel_start, _SEGMENT_SIZE); MODULES_VADDR = MODULES_END - MODULES_LEN; VMALLOC_END = MODULES_VADDR; if (IS_ENABLED(CONFIG_KMSAN)) VMALLOC_END -= MODULES_LEN * 2; + boot_debug("modules area: 0x%016lx-0x%016lx\n", MODULES_VADDR, MODULES_END); /* allow vmalloc area to occupy up to about 1/2 of the rest virtual space left */ vsize = (VMALLOC_END - FIXMAP_SIZE) / 2; @@ -320,10 +414,15 @@ static unsigned long setup_kernel_memory_layout(unsigned long kernel_size) VMALLOC_END -= vmalloc_size * 2; } VMALLOC_START = VMALLOC_END - vmalloc_size; + boot_debug("vmalloc area: 0x%016lx-0x%016lx\n", VMALLOC_START, VMALLOC_END); __memcpy_real_area = round_down(VMALLOC_START - MEMCPY_REAL_SIZE, PAGE_SIZE); + boot_debug("memcpy real area: 0x%016lx-0x%016lx\n", __memcpy_real_area, + __memcpy_real_area + MEMCPY_REAL_SIZE); __abs_lowcore = round_down(__memcpy_real_area - ABS_LOWCORE_MAP_SIZE, sizeof(struct lowcore)); + boot_debug("abs lowcore: 0x%016lx-0x%016lx\n", __abs_lowcore, + __abs_lowcore + ABS_LOWCORE_MAP_SIZE); /* split remaining virtual space between 1:1 mapping & vmemmap array */ pages = __abs_lowcore / (PAGE_SIZE + sizeof(struct page)); @@ -343,8 +442,11 @@ static unsigned long setup_kernel_memory_layout(unsigned long kernel_size) BUILD_BUG_ON(MAX_DCSS_ADDR > (1UL << MAX_PHYSMEM_BITS)); max_mappable = max(ident_map_size, MAX_DCSS_ADDR); max_mappable = min(max_mappable, vmemmap_start); - if (IS_ENABLED(CONFIG_RANDOMIZE_IDENTITY_BASE)) - __identity_base = round_down(vmemmap_start - max_mappable, rte_size); +#ifdef CONFIG_RANDOMIZE_IDENTITY_BASE + __identity_base = round_down(vmemmap_start - max_mappable, rte_size); +#endif + boot_debug("identity map: 0x%016lx-0x%016lx\n", __identity_base, + __identity_base + ident_map_size); return asce_limit; } @@ -403,6 +505,10 @@ void startup_kernel(void) psw_t psw; setup_lpp(); + store_ipl_parmblock(); + uv_query_info(); + setup_boot_command_line(); + parse_boot_command_line(); /* * Non-randomized kernel physical start address must be _SEGMENT_SIZE @@ -422,13 +528,14 @@ void startup_kernel(void) oldmem_data.start = parmarea.oldmem_base; oldmem_data.size = parmarea.oldmem_size; - store_ipl_parmblock(); read_ipl_report(); - uv_query_info(); sclp_early_read_info(); - setup_boot_command_line(); - parse_boot_command_line(); + sclp_early_detect_machine_features(); detect_facilities(); + detect_diag9c(); + detect_machine_type(); + /* detect_diag288() needs machine type */ + detect_diag288(); cmma_init(); sanitize_prot_virt_host(); max_physmem_end = detect_max_physmem_end(); @@ -517,6 +624,7 @@ void startup_kernel(void) __kaslr_offset, __kaslr_offset_phys); kaslr_adjust_got(__kaslr_offset); setup_vmem(__kaslr_offset, __kaslr_offset + kernel_size, asce_limit); + dump_physmem_reserved(); copy_bootdata(); __apply_alternatives((struct alt_instr *)_vmlinux_info.alt_instructions, (struct alt_instr *)_vmlinux_info.alt_instructions_end, @@ -533,5 +641,6 @@ void startup_kernel(void) */ psw.addr = __kaslr_offset + vmlinux.entry; psw.mask = PSW_KERNEL_BITS; + boot_debug("Starting kernel at: 0x%016lx\n", psw.addr); __load_psw(psw); } diff --git a/arch/s390/boot/string.c b/arch/s390/boot/string.c index f6b9b1df48a8..bd68161434a6 100644 --- a/arch/s390/boot/string.c +++ b/arch/s390/boot/string.c @@ -29,6 +29,18 @@ int strncmp(const char *cs, const char *ct, size_t count) return 0; } +ssize_t sized_strscpy(char *dst, const char *src, size_t count) +{ + size_t len; + + if (count == 0) + return -E2BIG; + len = strnlen(src, count - 1); + memcpy(dst, src, len); + dst[len] = '\0'; + return src[len] ? -E2BIG : len; +} + void *memset64(uint64_t *s, uint64_t v, size_t count) { uint64_t *xs = s; diff --git a/arch/s390/boot/vmem.c b/arch/s390/boot/vmem.c index 3fa28db2fe59..1d073acd05a7 100644 --- a/arch/s390/boot/vmem.c +++ b/arch/s390/boot/vmem.c @@ -1,4 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 +#define boot_fmt(fmt) "vmem: " fmt +#include <linux/cpufeature.h> #include <linux/sched/task.h> #include <linux/pgtable.h> #include <linux/kasan.h> @@ -9,10 +11,12 @@ #include <asm/ctlreg.h> #include <asm/physmem_info.h> #include <asm/maccess.h> +#include <asm/machine.h> #include <asm/abs_lowcore.h> #include "decompressor.h" #include "boot.h" +#define INVALID_PHYS_ADDR (~(phys_addr_t)0) struct ctlreg __bootdata_preserved(s390_invalid_asce); #ifdef CONFIG_PROC_FS @@ -31,12 +35,42 @@ enum populate_mode { POPULATE_IDENTITY, POPULATE_KERNEL, #ifdef CONFIG_KASAN + /* KASAN modes should be last and grouped together, see is_kasan_populate_mode() */ POPULATE_KASAN_MAP_SHADOW, POPULATE_KASAN_ZERO_SHADOW, POPULATE_KASAN_SHALLOW #endif }; +#define POPULATE_MODE_NAME(t) case POPULATE_ ## t: return #t +static inline const char *get_populate_mode_name(enum populate_mode t) +{ + switch (t) { + POPULATE_MODE_NAME(NONE); + POPULATE_MODE_NAME(DIRECT); + POPULATE_MODE_NAME(LOWCORE); + POPULATE_MODE_NAME(ABS_LOWCORE); + POPULATE_MODE_NAME(IDENTITY); + POPULATE_MODE_NAME(KERNEL); +#ifdef CONFIG_KASAN + POPULATE_MODE_NAME(KASAN_MAP_SHADOW); + POPULATE_MODE_NAME(KASAN_ZERO_SHADOW); + POPULATE_MODE_NAME(KASAN_SHALLOW); +#endif + default: + return "UNKNOWN"; + } +} + +static bool is_kasan_populate_mode(enum populate_mode mode) +{ +#ifdef CONFIG_KASAN + return mode >= POPULATE_KASAN_MAP_SHADOW; +#else + return false; +#endif +} + static void pgtable_populate(unsigned long addr, unsigned long end, enum populate_mode mode); #ifdef CONFIG_KASAN @@ -52,9 +86,12 @@ static pte_t pte_z; static inline void kasan_populate(unsigned long start, unsigned long end, enum populate_mode mode) { - start = PAGE_ALIGN_DOWN(__sha(start)); - end = PAGE_ALIGN(__sha(end)); - pgtable_populate(start, end, mode); + unsigned long sha_start = PAGE_ALIGN_DOWN(__sha(start)); + unsigned long sha_end = PAGE_ALIGN(__sha(end)); + + boot_debug("%-17s 0x%016lx-0x%016lx >> 0x%016lx-0x%016lx\n", get_populate_mode_name(mode), + start, end, sha_start, sha_end); + pgtable_populate(sha_start, sha_end, mode); } static void kasan_populate_shadow(unsigned long kernel_start, unsigned long kernel_end) @@ -63,13 +100,10 @@ static void kasan_populate_shadow(unsigned long kernel_start, unsigned long kern pud_t pud_z = __pud(__pa(kasan_early_shadow_pmd) | _REGION3_ENTRY); p4d_t p4d_z = __p4d(__pa(kasan_early_shadow_pud) | _REGION2_ENTRY); unsigned long memgap_start = 0; - unsigned long untracked_end; unsigned long start, end; int i; pte_z = __pte(__pa(kasan_early_shadow_page) | pgprot_val(PAGE_KERNEL_RO)); - if (!machine.has_nx) - pte_z = clear_pte_bit(pte_z, __pgprot(_PAGE_NOEXEC)); crst_table_init((unsigned long *)kasan_early_shadow_p4d, p4d_val(p4d_z)); crst_table_init((unsigned long *)kasan_early_shadow_pud, pud_val(pud_z)); crst_table_init((unsigned long *)kasan_early_shadow_pmd, pmd_val(pmd_z)); @@ -93,15 +127,10 @@ static void kasan_populate_shadow(unsigned long kernel_start, unsigned long kern kasan_populate(kernel_start + TEXT_OFFSET, kernel_end, POPULATE_KASAN_MAP_SHADOW); kasan_populate(0, (unsigned long)__identity_va(0), POPULATE_KASAN_ZERO_SHADOW); kasan_populate(AMODE31_START, AMODE31_END, POPULATE_KASAN_ZERO_SHADOW); - if (IS_ENABLED(CONFIG_KASAN_VMALLOC)) { - untracked_end = VMALLOC_START; - /* shallowly populate kasan shadow for vmalloc and modules */ - kasan_populate(VMALLOC_START, MODULES_END, POPULATE_KASAN_SHALLOW); - } else { - untracked_end = MODULES_VADDR; - } + /* shallowly populate kasan shadow for vmalloc and modules */ + kasan_populate(VMALLOC_START, MODULES_END, POPULATE_KASAN_SHALLOW); /* populate kasan shadow for untracked memory */ - kasan_populate((unsigned long)__identity_va(ident_map_size), untracked_end, + kasan_populate((unsigned long)__identity_va(ident_map_size), VMALLOC_START, POPULATE_KASAN_ZERO_SHADOW); kasan_populate(kernel_end, _REGION1_SIZE, POPULATE_KASAN_ZERO_SHADOW); } @@ -208,7 +237,7 @@ static void *boot_crst_alloc(unsigned long val) unsigned long size = PAGE_SIZE << CRST_ALLOC_ORDER; unsigned long *table; - table = (unsigned long *)physmem_alloc_top_down(RR_VMEM, size, size); + table = (unsigned long *)physmem_alloc_or_die(RR_VMEM, size, size); crst_table_init(table, val); __arch_set_page_dat(table, 1UL << CRST_ALLOC_ORDER); return table; @@ -224,7 +253,7 @@ static pte_t *boot_pte_alloc(void) * during POPULATE_KASAN_MAP_SHADOW when EDAT is off */ if (!pte_leftover) { - pte_leftover = (void *)physmem_alloc_top_down(RR_VMEM, PAGE_SIZE, PAGE_SIZE); + pte_leftover = (void *)physmem_alloc_or_die(RR_VMEM, PAGE_SIZE, PAGE_SIZE); pte = pte_leftover + _PAGE_TABLE_SIZE; __arch_set_page_dat(pte, 1); } else { @@ -236,11 +265,12 @@ static pte_t *boot_pte_alloc(void) return pte; } -static unsigned long _pa(unsigned long addr, unsigned long size, enum populate_mode mode) +static unsigned long resolve_pa_may_alloc(unsigned long addr, unsigned long size, + enum populate_mode mode) { switch (mode) { case POPULATE_NONE: - return -1; + return INVALID_PHYS_ADDR; case POPULATE_DIRECT: return addr; case POPULATE_LOWCORE: @@ -253,38 +283,64 @@ static unsigned long _pa(unsigned long addr, unsigned long size, enum populate_m return __identity_pa(addr); #ifdef CONFIG_KASAN case POPULATE_KASAN_MAP_SHADOW: - addr = physmem_alloc_top_down(RR_VMEM, size, size); - memset((void *)addr, 0, size); - return addr; + /* Allow to fail large page allocations, this will fall back to 1mb/4k pages */ + addr = physmem_alloc(RR_VMEM, size, size, size == PAGE_SIZE); + if (addr) { + memset((void *)addr, 0, size); + return addr; + } + return INVALID_PHYS_ADDR; #endif default: - return -1; + return INVALID_PHYS_ADDR; } } -static bool large_allowed(enum populate_mode mode) +static bool large_page_mapping_allowed(enum populate_mode mode) { - return (mode == POPULATE_DIRECT) || (mode == POPULATE_IDENTITY) || (mode == POPULATE_KERNEL); + switch (mode) { + case POPULATE_DIRECT: + case POPULATE_IDENTITY: + case POPULATE_KERNEL: +#ifdef CONFIG_KASAN + case POPULATE_KASAN_MAP_SHADOW: +#endif + return true; + default: + return false; + } } -static bool can_large_pud(pud_t *pu_dir, unsigned long addr, unsigned long end, - enum populate_mode mode) +static unsigned long try_get_large_pud_pa(pud_t *pu_dir, unsigned long addr, unsigned long end, + enum populate_mode mode) { - unsigned long size = end - addr; + unsigned long pa, size = end - addr; + + if (!cpu_has_edat2() || !large_page_mapping_allowed(mode) || + !IS_ALIGNED(addr, PUD_SIZE) || (size < PUD_SIZE)) + return INVALID_PHYS_ADDR; + + pa = resolve_pa_may_alloc(addr, size, mode); + if (!IS_ALIGNED(pa, PUD_SIZE)) + return INVALID_PHYS_ADDR; - return machine.has_edat2 && large_allowed(mode) && - IS_ALIGNED(addr, PUD_SIZE) && (size >= PUD_SIZE) && - IS_ALIGNED(_pa(addr, size, mode), PUD_SIZE); + return pa; } -static bool can_large_pmd(pmd_t *pm_dir, unsigned long addr, unsigned long end, - enum populate_mode mode) +static unsigned long try_get_large_pmd_pa(pmd_t *pm_dir, unsigned long addr, unsigned long end, + enum populate_mode mode) { - unsigned long size = end - addr; + unsigned long pa, size = end - addr; - return machine.has_edat1 && large_allowed(mode) && - IS_ALIGNED(addr, PMD_SIZE) && (size >= PMD_SIZE) && - IS_ALIGNED(_pa(addr, size, mode), PMD_SIZE); + if (!cpu_has_edat1() || !large_page_mapping_allowed(mode) || + !IS_ALIGNED(addr, PMD_SIZE) || (size < PMD_SIZE)) + return INVALID_PHYS_ADDR; + + pa = resolve_pa_may_alloc(addr, size, mode); + if (!IS_ALIGNED(pa, PMD_SIZE)) + return INVALID_PHYS_ADDR; + + return pa; } static void pgtable_pte_populate(pmd_t *pmd, unsigned long addr, unsigned long end, @@ -298,10 +354,8 @@ static void pgtable_pte_populate(pmd_t *pmd, unsigned long addr, unsigned long e if (pte_none(*pte)) { if (kasan_pte_populate_zero_shadow(pte, mode)) continue; - entry = __pte(_pa(addr, PAGE_SIZE, mode)); + entry = __pte(resolve_pa_may_alloc(addr, PAGE_SIZE, mode)); entry = set_pte_bit(entry, PAGE_KERNEL); - if (!machine.has_nx) - entry = clear_pte_bit(entry, __pgprot(_PAGE_NOEXEC)); set_pte(pte, entry); pages++; } @@ -313,7 +367,7 @@ static void pgtable_pte_populate(pmd_t *pmd, unsigned long addr, unsigned long e static void pgtable_pmd_populate(pud_t *pud, unsigned long addr, unsigned long end, enum populate_mode mode) { - unsigned long next, pages = 0; + unsigned long pa, next, pages = 0; pmd_t *pmd, entry; pte_t *pte; @@ -323,11 +377,10 @@ static void pgtable_pmd_populate(pud_t *pud, unsigned long addr, unsigned long e if (pmd_none(*pmd)) { if (kasan_pmd_populate_zero_shadow(pmd, addr, next, mode)) continue; - if (can_large_pmd(pmd, addr, next, mode)) { - entry = __pmd(_pa(addr, _SEGMENT_SIZE, mode)); + pa = try_get_large_pmd_pa(pmd, addr, next, mode); + if (pa != INVALID_PHYS_ADDR) { + entry = __pmd(pa); entry = set_pmd_bit(entry, SEGMENT_KERNEL); - if (!machine.has_nx) - entry = clear_pmd_bit(entry, __pgprot(_SEGMENT_ENTRY_NOEXEC)); set_pmd(pmd, entry); pages++; continue; @@ -346,7 +399,7 @@ static void pgtable_pmd_populate(pud_t *pud, unsigned long addr, unsigned long e static void pgtable_pud_populate(p4d_t *p4d, unsigned long addr, unsigned long end, enum populate_mode mode) { - unsigned long next, pages = 0; + unsigned long pa, next, pages = 0; pud_t *pud, entry; pmd_t *pmd; @@ -356,11 +409,10 @@ static void pgtable_pud_populate(p4d_t *p4d, unsigned long addr, unsigned long e if (pud_none(*pud)) { if (kasan_pud_populate_zero_shadow(pud, addr, next, mode)) continue; - if (can_large_pud(pud, addr, next, mode)) { - entry = __pud(_pa(addr, _REGION3_SIZE, mode)); + pa = try_get_large_pud_pa(pud, addr, next, mode); + if (pa != INVALID_PHYS_ADDR) { + entry = __pud(pa); entry = set_pud_bit(entry, REGION3_KERNEL); - if (!machine.has_nx) - entry = clear_pud_bit(entry, __pgprot(_REGION_ENTRY_NOEXEC)); set_pud(pud, entry); pages++; continue; @@ -402,6 +454,13 @@ static void pgtable_populate(unsigned long addr, unsigned long end, enum populat pgd_t *pgd; p4d_t *p4d; + if (!is_kasan_populate_mode(mode)) { + boot_debug("%-17s 0x%016lx-0x%016lx -> 0x%016lx-0x%016lx\n", + get_populate_mode_name(mode), addr, end, + resolve_pa_may_alloc(addr, 0, mode), + resolve_pa_may_alloc(end - 1, 0, mode) + 1); + } + pgd = pgd_offset(&init_mm, addr); for (; addr < end; addr = next, pgd++) { next = pgd_addr_end(addr, end); @@ -459,7 +518,7 @@ void setup_vmem(unsigned long kernel_start, unsigned long kernel_end, unsigned l __arch_set_page_dat((void *)swapper_pg_dir, 1UL << CRST_ALLOC_ORDER); __arch_set_page_dat((void *)invalid_pg_dir, 1UL << CRST_ALLOC_ORDER); - if (relocate_lowcore) + if (machine_has_relocated_lowcore()) lowcore_address = LOWCORE_ALT_ADDRESS; /* diff --git a/arch/s390/boot/vmlinux.lds.S b/arch/s390/boot/vmlinux.lds.S index 66670212a361..50988022f9ea 100644 --- a/arch/s390/boot/vmlinux.lds.S +++ b/arch/s390/boot/vmlinux.lds.S @@ -40,6 +40,7 @@ SECTIONS *(.rodata.*) _erodata = . ; } + EXCEPTION_TABLE(16) .got : { *(.got) } @@ -165,7 +166,6 @@ SECTIONS /DISCARD/ : { COMMON_DISCARDS *(.eh_frame) - *(__ex_table) *(*__ksymtab*) *(___kcrctab*) } diff --git a/arch/s390/configs/debug_defconfig b/arch/s390/configs/debug_defconfig index d8d227ab82de..8ecad727497e 100644 --- a/arch/s390/configs/debug_defconfig +++ b/arch/s390/configs/debug_defconfig @@ -38,7 +38,6 @@ CONFIG_USER_NS=y CONFIG_CHECKPOINT_RESTORE=y CONFIG_SCHED_AUTOGROUP=y CONFIG_EXPERT=y -# CONFIG_SYSFS_SYSCALL is not set CONFIG_PROFILING=y CONFIG_KEXEC=y CONFIG_KEXEC_FILE=y @@ -92,7 +91,6 @@ CONFIG_UNIXWARE_DISKLABEL=y CONFIG_IOSCHED_BFQ=y CONFIG_BINFMT_MISC=m CONFIG_ZSWAP=y -CONFIG_ZSWAP_ZPOOL_DEFAULT_ZBUD=y CONFIG_ZSMALLOC_STAT=y CONFIG_SLAB_BUCKETS=y CONFIG_SLUB_STATS=y @@ -395,6 +393,9 @@ CONFIG_CLS_U32_MARK=y CONFIG_NET_CLS_FLOW=m CONFIG_NET_CLS_CGROUP=y CONFIG_NET_CLS_BPF=m +CONFIG_NET_CLS_FLOWER=m +CONFIG_NET_CLS_MATCHALL=m +CONFIG_NET_EMATCH=y CONFIG_NET_CLS_ACT=y CONFIG_NET_ACT_POLICE=m CONFIG_NET_ACT_GACT=m @@ -405,6 +406,9 @@ CONFIG_NET_ACT_PEDIT=m CONFIG_NET_ACT_SIMP=m CONFIG_NET_ACT_SKBEDIT=m CONFIG_NET_ACT_CSUM=m +CONFIG_NET_ACT_VLAN=m +CONFIG_NET_ACT_TUNNEL_KEY=m +CONFIG_NET_ACT_CT=m CONFIG_NET_ACT_GATE=m CONFIG_NET_TC_SKB_EXT=y CONFIG_DNS_RESOLVER=y @@ -469,6 +473,7 @@ CONFIG_SCSI_DH_ALUA=m CONFIG_MD=y CONFIG_BLK_DEV_MD=y # CONFIG_MD_BITMAP_FILE is not set +CONFIG_MD_LINEAR=m CONFIG_MD_CLUSTER=m CONFIG_BCACHE=m CONFIG_BLK_DEV_DM=y @@ -627,8 +632,16 @@ CONFIG_VIRTIO_PCI=m CONFIG_VIRTIO_BALLOON=m CONFIG_VIRTIO_MEM=m CONFIG_VIRTIO_INPUT=y +CONFIG_VDPA=m +CONFIG_VDPA_SIM=m +CONFIG_VDPA_SIM_NET=m +CONFIG_VDPA_SIM_BLOCK=m +CONFIG_VDPA_USER=m +CONFIG_MLX5_VDPA_NET=m +CONFIG_VP_VDPA=m CONFIG_VHOST_NET=m CONFIG_VHOST_VSOCK=m +CONFIG_VHOST_VDPA=m CONFIG_EXT4_FS=y CONFIG_EXT4_FS_POSIX_ACL=y CONFIG_EXT4_FS_SECURITY=y @@ -653,7 +666,6 @@ CONFIG_NILFS2_FS=m CONFIG_BCACHEFS_FS=y CONFIG_BCACHEFS_QUOTA=y CONFIG_BCACHEFS_POSIX_ACL=y -CONFIG_FS_DAX=y CONFIG_EXPORTFS_BLOCK_OPS=y CONFIG_FS_ENCRYPTION=y CONFIG_FS_VERITY=y @@ -723,11 +735,10 @@ CONFIG_NLS_UTF8=m CONFIG_DLM=m CONFIG_UNICODE=y CONFIG_PERSISTENT_KEYRINGS=y +CONFIG_BIG_KEYS=y CONFIG_ENCRYPTED_KEYS=m CONFIG_KEY_NOTIFICATIONS=y CONFIG_SECURITY=y -CONFIG_HARDENED_USERCOPY=y -CONFIG_FORTIFY_SOURCE=y CONFIG_SECURITY_SELINUX=y CONFIG_SECURITY_SELINUX_BOOTPARAM=y CONFIG_SECURITY_LOCKDOWN_LSM=y @@ -740,13 +751,14 @@ CONFIG_IMA=y CONFIG_IMA_DEFAULT_HASH_SHA256=y CONFIG_IMA_WRITE_POLICY=y CONFIG_IMA_APPRAISE=y -CONFIG_LSM="yama,loadpin,safesetid,integrity,selinux,smack,tomoyo,apparmor" +CONFIG_FORTIFY_SOURCE=y +CONFIG_HARDENED_USERCOPY=y CONFIG_BUG_ON_DATA_CORRUPTION=y CONFIG_CRYPTO_USER=m -# CONFIG_CRYPTO_MANAGER_DISABLE_TESTS is not set +CONFIG_CRYPTO_SELFTESTS=y CONFIG_CRYPTO_PCRYPT=m CONFIG_CRYPTO_CRYPTD=m -CONFIG_CRYPTO_TEST=m +CONFIG_CRYPTO_BENCHMARK=m CONFIG_CRYPTO_DH=m CONFIG_CRYPTO_ECDH=m CONFIG_CRYPTO_ECDSA=m @@ -756,7 +768,6 @@ CONFIG_CRYPTO_AES_TI=m CONFIG_CRYPTO_ANUBIS=m CONFIG_CRYPTO_ARIA=m CONFIG_CRYPTO_BLOWFISH=m -CONFIG_CRYPTO_CAMELLIA=m CONFIG_CRYPTO_CAST5=m CONFIG_CRYPTO_CAST6=m CONFIG_CRYPTO_DES=m @@ -770,7 +781,6 @@ CONFIG_CRYPTO_TWOFISH=m CONFIG_CRYPTO_ADIANTUM=m CONFIG_CRYPTO_ARC4=m CONFIG_CRYPTO_HCTR2=m -CONFIG_CRYPTO_KEYWRAP=m CONFIG_CRYPTO_LRW=m CONFIG_CRYPTO_PCBC=m CONFIG_CRYPTO_AEGIS128=m @@ -782,7 +792,6 @@ CONFIG_CRYPTO_MD5=y CONFIG_CRYPTO_MICHAEL_MIC=m CONFIG_CRYPTO_RMD160=m CONFIG_CRYPTO_SM3_GENERIC=m -CONFIG_CRYPTO_VMAC=m CONFIG_CRYPTO_WP512=m CONFIG_CRYPTO_XCBC=m CONFIG_CRYPTO_CRC32=m @@ -795,16 +804,13 @@ CONFIG_CRYPTO_USER_API_HASH=m CONFIG_CRYPTO_USER_API_SKCIPHER=m CONFIG_CRYPTO_USER_API_RNG=m CONFIG_CRYPTO_USER_API_AEAD=m -CONFIG_CRYPTO_CRC32_S390=y CONFIG_CRYPTO_SHA512_S390=m CONFIG_CRYPTO_SHA1_S390=m -CONFIG_CRYPTO_SHA256_S390=m CONFIG_CRYPTO_SHA3_256_S390=m CONFIG_CRYPTO_SHA3_512_S390=m CONFIG_CRYPTO_GHASH_S390=m CONFIG_CRYPTO_AES_S390=m CONFIG_CRYPTO_DES_S390=m -CONFIG_CRYPTO_CHACHA_S390=m CONFIG_CRYPTO_HMAC_S390=m CONFIG_ZCRYPT=m CONFIG_PKEY=m @@ -815,13 +821,9 @@ CONFIG_PKEY_UV=m CONFIG_CRYPTO_PAES_S390=m CONFIG_CRYPTO_DEV_VIRTIO=m CONFIG_SYSTEM_BLACKLIST_KEYRING=y +CONFIG_CRYPTO_KRB5=m +CONFIG_CRYPTO_KRB5_SELFTESTS=y CONFIG_CORDIC=m -CONFIG_CRYPTO_LIB_CURVE25519=m -CONFIG_CRYPTO_LIB_CHACHA20POLY1305=m -CONFIG_CRC32_SELFTEST=y -CONFIG_CRC4=m -CONFIG_CRC7=m -CONFIG_CRC8=m CONFIG_RANDOM32_SELFTEST=y CONFIG_XZ_DEC_MICROLZMA=y CONFIG_DMA_CMA=y @@ -879,6 +881,7 @@ CONFIG_RCU_CPU_STALL_TIMEOUT=300 CONFIG_LATENCYTOP=y CONFIG_BOOTTIME_TRACING=y CONFIG_FUNCTION_GRAPH_RETVAL=y +CONFIG_FUNCTION_GRAPH_RETADDR=y CONFIG_FPROBE=y CONFIG_FUNCTION_PROFILER=y CONFIG_STACK_TRACER=y @@ -891,12 +894,14 @@ CONFIG_USER_EVENTS=y CONFIG_HIST_TRIGGERS=y CONFIG_FTRACE_STARTUP_TEST=y # CONFIG_EVENT_TRACE_STARTUP_TEST is not set +CONFIG_FTRACE_SORT_STARTUP_TEST=y CONFIG_SAMPLES=y CONFIG_SAMPLE_TRACE_PRINTK=m CONFIG_SAMPLE_FTRACE_DIRECT=m CONFIG_SAMPLE_FTRACE_DIRECT_MULTI=m CONFIG_SAMPLE_FTRACE_OPS=m CONFIG_DEBUG_ENTRY=y +CONFIG_STRICT_MM_TYPECHECKS=y CONFIG_CIO_INJECT=y CONFIG_KUNIT=m CONFIG_KUNIT_DEBUGFS=y diff --git a/arch/s390/configs/defconfig b/arch/s390/configs/defconfig index 6c2f2bb4fbf8..c13a77765162 100644 --- a/arch/s390/configs/defconfig +++ b/arch/s390/configs/defconfig @@ -36,7 +36,6 @@ CONFIG_USER_NS=y CONFIG_CHECKPOINT_RESTORE=y CONFIG_SCHED_AUTOGROUP=y CONFIG_EXPERT=y -# CONFIG_SYSFS_SYSCALL is not set CONFIG_PROFILING=y CONFIG_KEXEC=y CONFIG_KEXEC_FILE=y @@ -86,7 +85,6 @@ CONFIG_UNIXWARE_DISKLABEL=y CONFIG_IOSCHED_BFQ=y CONFIG_BINFMT_MISC=m CONFIG_ZSWAP=y -CONFIG_ZSWAP_ZPOOL_DEFAULT_ZBUD=y CONFIG_ZSMALLOC_STAT=y CONFIG_SLAB_BUCKETS=y # CONFIG_COMPAT_BRK is not set @@ -385,6 +383,9 @@ CONFIG_CLS_U32_MARK=y CONFIG_NET_CLS_FLOW=m CONFIG_NET_CLS_CGROUP=y CONFIG_NET_CLS_BPF=m +CONFIG_NET_CLS_FLOWER=m +CONFIG_NET_CLS_MATCHALL=m +CONFIG_NET_EMATCH=y CONFIG_NET_CLS_ACT=y CONFIG_NET_ACT_POLICE=m CONFIG_NET_ACT_GACT=m @@ -395,6 +396,9 @@ CONFIG_NET_ACT_PEDIT=m CONFIG_NET_ACT_SIMP=m CONFIG_NET_ACT_SKBEDIT=m CONFIG_NET_ACT_CSUM=m +CONFIG_NET_ACT_VLAN=m +CONFIG_NET_ACT_TUNNEL_KEY=m +CONFIG_NET_ACT_CT=m CONFIG_NET_ACT_GATE=m CONFIG_NET_TC_SKB_EXT=y CONFIG_DNS_RESOLVER=y @@ -459,6 +463,7 @@ CONFIG_SCSI_DH_ALUA=m CONFIG_MD=y CONFIG_BLK_DEV_MD=y # CONFIG_MD_BITMAP_FILE is not set +CONFIG_MD_LINEAR=m CONFIG_MD_CLUSTER=m CONFIG_BCACHE=m CONFIG_BLK_DEV_DM=y @@ -617,8 +622,16 @@ CONFIG_VIRTIO_PCI=m CONFIG_VIRTIO_BALLOON=m CONFIG_VIRTIO_MEM=m CONFIG_VIRTIO_INPUT=y +CONFIG_VDPA=m +CONFIG_VDPA_SIM=m +CONFIG_VDPA_SIM_NET=m +CONFIG_VDPA_SIM_BLOCK=m +CONFIG_VDPA_USER=m +CONFIG_MLX5_VDPA_NET=m +CONFIG_VP_VDPA=m CONFIG_VHOST_NET=m CONFIG_VHOST_VSOCK=m +CONFIG_VHOST_VDPA=m CONFIG_EXT4_FS=y CONFIG_EXT4_FS_POSIX_ACL=y CONFIG_EXT4_FS_SECURITY=y @@ -640,7 +653,6 @@ CONFIG_NILFS2_FS=m CONFIG_BCACHEFS_FS=m CONFIG_BCACHEFS_QUOTA=y CONFIG_BCACHEFS_POSIX_ACL=y -CONFIG_FS_DAX=y CONFIG_EXPORTFS_BLOCK_OPS=y CONFIG_FS_ENCRYPTION=y CONFIG_FS_VERITY=y @@ -710,6 +722,7 @@ CONFIG_NLS_UTF8=m CONFIG_DLM=m CONFIG_UNICODE=y CONFIG_PERSISTENT_KEYRINGS=y +CONFIG_BIG_KEYS=y CONFIG_ENCRYPTED_KEYS=m CONFIG_KEY_NOTIFICATIONS=y CONFIG_SECURITY=y @@ -725,14 +738,13 @@ CONFIG_IMA=y CONFIG_IMA_DEFAULT_HASH_SHA256=y CONFIG_IMA_WRITE_POLICY=y CONFIG_IMA_APPRAISE=y -CONFIG_LSM="yama,loadpin,safesetid,integrity,selinux,smack,tomoyo,apparmor" CONFIG_BUG_ON_DATA_CORRUPTION=y CONFIG_CRYPTO_FIPS=y CONFIG_CRYPTO_USER=m -# CONFIG_CRYPTO_MANAGER_DISABLE_TESTS is not set +CONFIG_CRYPTO_SELFTESTS=y CONFIG_CRYPTO_PCRYPT=m CONFIG_CRYPTO_CRYPTD=m -CONFIG_CRYPTO_TEST=m +CONFIG_CRYPTO_BENCHMARK=m CONFIG_CRYPTO_DH=m CONFIG_CRYPTO_ECDH=m CONFIG_CRYPTO_ECDSA=m @@ -742,7 +754,6 @@ CONFIG_CRYPTO_AES_TI=m CONFIG_CRYPTO_ANUBIS=m CONFIG_CRYPTO_ARIA=m CONFIG_CRYPTO_BLOWFISH=m -CONFIG_CRYPTO_CAMELLIA=m CONFIG_CRYPTO_CAST5=m CONFIG_CRYPTO_CAST6=m CONFIG_CRYPTO_DES=m @@ -756,7 +767,6 @@ CONFIG_CRYPTO_TWOFISH=m CONFIG_CRYPTO_ADIANTUM=m CONFIG_CRYPTO_ARC4=m CONFIG_CRYPTO_HCTR2=m -CONFIG_CRYPTO_KEYWRAP=m CONFIG_CRYPTO_LRW=m CONFIG_CRYPTO_PCBC=m CONFIG_CRYPTO_AEGIS128=m @@ -768,7 +778,6 @@ CONFIG_CRYPTO_MD5=y CONFIG_CRYPTO_MICHAEL_MIC=m CONFIG_CRYPTO_RMD160=m CONFIG_CRYPTO_SM3_GENERIC=m -CONFIG_CRYPTO_VMAC=m CONFIG_CRYPTO_WP512=m CONFIG_CRYPTO_XCBC=m CONFIG_CRYPTO_CRC32=m @@ -782,16 +791,13 @@ CONFIG_CRYPTO_USER_API_HASH=m CONFIG_CRYPTO_USER_API_SKCIPHER=m CONFIG_CRYPTO_USER_API_RNG=m CONFIG_CRYPTO_USER_API_AEAD=m -CONFIG_CRYPTO_CRC32_S390=y CONFIG_CRYPTO_SHA512_S390=m CONFIG_CRYPTO_SHA1_S390=m -CONFIG_CRYPTO_SHA256_S390=m CONFIG_CRYPTO_SHA3_256_S390=m CONFIG_CRYPTO_SHA3_512_S390=m CONFIG_CRYPTO_GHASH_S390=m CONFIG_CRYPTO_AES_S390=m CONFIG_CRYPTO_DES_S390=m -CONFIG_CRYPTO_CHACHA_S390=m CONFIG_CRYPTO_HMAC_S390=m CONFIG_ZCRYPT=m CONFIG_PKEY=m @@ -802,13 +808,10 @@ CONFIG_PKEY_UV=m CONFIG_CRYPTO_PAES_S390=m CONFIG_CRYPTO_DEV_VIRTIO=m CONFIG_SYSTEM_BLACKLIST_KEYRING=y +CONFIG_CRYPTO_KRB5=m +CONFIG_CRYPTO_KRB5_SELFTESTS=y CONFIG_CORDIC=m CONFIG_PRIME_NUMBERS=m -CONFIG_CRYPTO_LIB_CURVE25519=m -CONFIG_CRYPTO_LIB_CHACHA20POLY1305=m -CONFIG_CRC4=m -CONFIG_CRC7=m -CONFIG_CRC8=m CONFIG_XZ_DEC_MICROLZMA=y CONFIG_DMA_CMA=y CONFIG_CMA_SIZE_MBYTES=0 @@ -829,6 +832,7 @@ CONFIG_RCU_CPU_STALL_TIMEOUT=60 CONFIG_LATENCYTOP=y CONFIG_BOOTTIME_TRACING=y CONFIG_FUNCTION_GRAPH_RETVAL=y +CONFIG_FUNCTION_GRAPH_RETADDR=y CONFIG_FPROBE=y CONFIG_FUNCTION_PROFILER=y CONFIG_STACK_TRACER=y diff --git a/arch/s390/configs/kasan.config b/arch/s390/configs/kasan.config index 84c2b551e992..cefbe2ba1228 100644 --- a/arch/s390/configs/kasan.config +++ b/arch/s390/configs/kasan.config @@ -1,4 +1,4 @@ # Help: Enable KASan for debugging CONFIG_KASAN=y CONFIG_KASAN_INLINE=y -CONFIG_KASAN_VMALLOC=y +CONFIG_KERNEL_IMAGE_BASE=0x7FFFE0000000 diff --git a/arch/s390/configs/mmtypes.config b/arch/s390/configs/mmtypes.config new file mode 100644 index 000000000000..fe32b442d789 --- /dev/null +++ b/arch/s390/configs/mmtypes.config @@ -0,0 +1,2 @@ +# Help: Enable strict memory management typechecks +CONFIG_STRICT_MM_TYPECHECKS=y diff --git a/arch/s390/configs/zfcpdump_defconfig b/arch/s390/configs/zfcpdump_defconfig index bcbaa069de96..8163c1702720 100644 --- a/arch/s390/configs/zfcpdump_defconfig +++ b/arch/s390/configs/zfcpdump_defconfig @@ -62,7 +62,6 @@ CONFIG_ZFCP=y # CONFIG_INOTIFY_USER is not set # CONFIG_MISC_FILESYSTEMS is not set # CONFIG_NETWORK_FILESYSTEMS is not set -CONFIG_LSM="yama,loadpin,safesetid,integrity" # CONFIG_ZLIB_DFLTCC is not set CONFIG_XZ_DEC_MICROLZMA=y CONFIG_PRINTK_TIME=y @@ -71,7 +70,6 @@ CONFIG_DEBUG_KERNEL=y CONFIG_DEBUG_INFO_DWARF4=y CONFIG_DEBUG_FS=y CONFIG_PANIC_ON_OOPS=y -# CONFIG_SCHED_DEBUG is not set CONFIG_RCU_CPU_STALL_TIMEOUT=60 # CONFIG_RCU_TRACE is not set # CONFIG_FTRACE is not set diff --git a/arch/s390/crypto/Kconfig b/arch/s390/crypto/Kconfig index d3eb3a233693..e2c27588b21a 100644 --- a/arch/s390/crypto/Kconfig +++ b/arch/s390/crypto/Kconfig @@ -2,21 +2,8 @@ menu "Accelerated Cryptographic Algorithms for CPU (s390)" -config CRYPTO_CRC32_S390 - tristate "CRC32c and CRC32" - depends on S390 - select CRYPTO_HASH - select CRC32 - help - CRC32c and CRC32 CRC algorithms - - Architecture: s390 - - It is available with IBM z13 or later. - config CRYPTO_SHA512_S390 tristate "Hash functions: SHA-384 and SHA-512" - depends on S390 select CRYPTO_HASH help SHA-384 and SHA-512 secure hash algorithms (FIPS 180) @@ -27,7 +14,6 @@ config CRYPTO_SHA512_S390 config CRYPTO_SHA1_S390 tristate "Hash functions: SHA-1" - depends on S390 select CRYPTO_HASH help SHA-1 secure hash algorithm (FIPS 180) @@ -36,20 +22,8 @@ config CRYPTO_SHA1_S390 It is available as of z990. -config CRYPTO_SHA256_S390 - tristate "Hash functions: SHA-224 and SHA-256" - depends on S390 - select CRYPTO_HASH - help - SHA-224 and SHA-256 secure hash algorithms (FIPS 180) - - Architecture: s390 - - It is available as of z9. - config CRYPTO_SHA3_256_S390 tristate "Hash functions: SHA3-224 and SHA3-256" - depends on S390 select CRYPTO_HASH help SHA3-224 and SHA3-256 secure hash algorithms (FIPS 202) @@ -60,7 +34,6 @@ config CRYPTO_SHA3_256_S390 config CRYPTO_SHA3_512_S390 tristate "Hash functions: SHA3-384 and SHA3-512" - depends on S390 select CRYPTO_HASH help SHA3-384 and SHA3-512 secure hash algorithms (FIPS 202) @@ -71,7 +44,6 @@ config CRYPTO_SHA3_512_S390 config CRYPTO_GHASH_S390 tristate "Hash functions: GHASH" - depends on S390 select CRYPTO_HASH help GCM GHASH hash function (NIST SP800-38D) @@ -82,7 +54,6 @@ config CRYPTO_GHASH_S390 config CRYPTO_AES_S390 tristate "Ciphers: AES, modes: ECB, CBC, CTR, XTS, GCM" - depends on S390 select CRYPTO_ALGAPI select CRYPTO_SKCIPHER help @@ -104,7 +75,6 @@ config CRYPTO_AES_S390 config CRYPTO_DES_S390 tristate "Ciphers: DES and Triple DES EDE, modes: ECB, CBC, CTR" - depends on S390 select CRYPTO_ALGAPI select CRYPTO_SKCIPHER select CRYPTO_LIB_DES @@ -119,22 +89,8 @@ config CRYPTO_DES_S390 As of z990 the ECB and CBC mode are hardware accelerated. As of z196 the CTR mode is hardware accelerated. -config CRYPTO_CHACHA_S390 - tristate "Ciphers: ChaCha20" - depends on S390 - select CRYPTO_SKCIPHER - select CRYPTO_LIB_CHACHA_GENERIC - select CRYPTO_ARCH_HAVE_LIB_CHACHA - help - Length-preserving cipher: ChaCha20 stream cipher (RFC 7539) - - Architecture: s390 - - It is available as of z13. - config CRYPTO_HMAC_S390 tristate "Keyed-hash message authentication code: HMAC" - depends on S390 select CRYPTO_HASH help s390 specific HMAC hardware support for SHA224, SHA256, SHA384 and diff --git a/arch/s390/crypto/Makefile b/arch/s390/crypto/Makefile index a0cb96937c3d..21757d86cd49 100644 --- a/arch/s390/crypto/Makefile +++ b/arch/s390/crypto/Makefile @@ -4,19 +4,13 @@ # obj-$(CONFIG_CRYPTO_SHA1_S390) += sha1_s390.o sha_common.o -obj-$(CONFIG_CRYPTO_SHA256_S390) += sha256_s390.o sha_common.o obj-$(CONFIG_CRYPTO_SHA512_S390) += sha512_s390.o sha_common.o obj-$(CONFIG_CRYPTO_SHA3_256_S390) += sha3_256_s390.o sha_common.o obj-$(CONFIG_CRYPTO_SHA3_512_S390) += sha3_512_s390.o sha_common.o obj-$(CONFIG_CRYPTO_DES_S390) += des_s390.o obj-$(CONFIG_CRYPTO_AES_S390) += aes_s390.o obj-$(CONFIG_CRYPTO_PAES_S390) += paes_s390.o -obj-$(CONFIG_CRYPTO_CHACHA_S390) += chacha_s390.o obj-$(CONFIG_S390_PRNG) += prng.o obj-$(CONFIG_CRYPTO_GHASH_S390) += ghash_s390.o -obj-$(CONFIG_CRYPTO_CRC32_S390) += crc32-vx_s390.o obj-$(CONFIG_CRYPTO_HMAC_S390) += hmac_s390.o obj-y += arch_random.o - -crc32-vx_s390-y := crc32-vx.o crc32le-vx.o crc32be-vx.o -chacha_s390-y := chacha-glue.o chacha-s390.o diff --git a/arch/s390/crypto/aes_s390.c b/arch/s390/crypto/aes_s390.c index 9c46b1b630b1..5d36f4020dfa 100644 --- a/arch/s390/crypto/aes_s390.c +++ b/arch/s390/crypto/aes_s390.c @@ -66,7 +66,6 @@ struct s390_xts_ctx { struct gcm_sg_walk { struct scatter_walk walk; unsigned int walk_bytes; - u8 *walk_ptr; unsigned int walk_bytes_remain; u8 buf[AES_BLOCK_SIZE]; unsigned int buf_bytes; @@ -787,29 +786,20 @@ static void gcm_walk_start(struct gcm_sg_walk *gw, struct scatterlist *sg, static inline unsigned int _gcm_sg_clamp_and_map(struct gcm_sg_walk *gw) { - struct scatterlist *nextsg; - - gw->walk_bytes = scatterwalk_clamp(&gw->walk, gw->walk_bytes_remain); - while (!gw->walk_bytes) { - nextsg = sg_next(gw->walk.sg); - if (!nextsg) - return 0; - scatterwalk_start(&gw->walk, nextsg); - gw->walk_bytes = scatterwalk_clamp(&gw->walk, - gw->walk_bytes_remain); - } - gw->walk_ptr = scatterwalk_map(&gw->walk); + if (gw->walk_bytes_remain == 0) + return 0; + gw->walk_bytes = scatterwalk_next(&gw->walk, gw->walk_bytes_remain); return gw->walk_bytes; } static inline void _gcm_sg_unmap_and_advance(struct gcm_sg_walk *gw, - unsigned int nbytes) + unsigned int nbytes, bool out) { gw->walk_bytes_remain -= nbytes; - scatterwalk_unmap(gw->walk_ptr); - scatterwalk_advance(&gw->walk, nbytes); - scatterwalk_done(&gw->walk, 0, gw->walk_bytes_remain); - gw->walk_ptr = NULL; + if (out) + scatterwalk_done_dst(&gw->walk, nbytes); + else + scatterwalk_done_src(&gw->walk, nbytes); } static int gcm_in_walk_go(struct gcm_sg_walk *gw, unsigned int minbytesneeded) @@ -835,16 +825,16 @@ static int gcm_in_walk_go(struct gcm_sg_walk *gw, unsigned int minbytesneeded) } if (!gw->buf_bytes && gw->walk_bytes >= minbytesneeded) { - gw->ptr = gw->walk_ptr; + gw->ptr = gw->walk.addr; gw->nbytes = gw->walk_bytes; goto out; } while (1) { n = min(gw->walk_bytes, AES_BLOCK_SIZE - gw->buf_bytes); - memcpy(gw->buf + gw->buf_bytes, gw->walk_ptr, n); + memcpy(gw->buf + gw->buf_bytes, gw->walk.addr, n); gw->buf_bytes += n; - _gcm_sg_unmap_and_advance(gw, n); + _gcm_sg_unmap_and_advance(gw, n, false); if (gw->buf_bytes >= minbytesneeded) { gw->ptr = gw->buf; gw->nbytes = gw->buf_bytes; @@ -876,13 +866,12 @@ static int gcm_out_walk_go(struct gcm_sg_walk *gw, unsigned int minbytesneeded) } if (gw->walk_bytes >= minbytesneeded) { - gw->ptr = gw->walk_ptr; + gw->ptr = gw->walk.addr; gw->nbytes = gw->walk_bytes; goto out; } - scatterwalk_unmap(gw->walk_ptr); - gw->walk_ptr = NULL; + scatterwalk_unmap(&gw->walk); gw->ptr = gw->buf; gw->nbytes = sizeof(gw->buf); @@ -904,7 +893,7 @@ static int gcm_in_walk_done(struct gcm_sg_walk *gw, unsigned int bytesdone) } else gw->buf_bytes = 0; } else - _gcm_sg_unmap_and_advance(gw, bytesdone); + _gcm_sg_unmap_and_advance(gw, bytesdone, false); return bytesdone; } @@ -921,11 +910,11 @@ static int gcm_out_walk_done(struct gcm_sg_walk *gw, unsigned int bytesdone) if (!_gcm_sg_clamp_and_map(gw)) return i; n = min(gw->walk_bytes, bytesdone - i); - memcpy(gw->walk_ptr, gw->buf + i, n); - _gcm_sg_unmap_and_advance(gw, n); + memcpy(gw->walk.addr, gw->buf + i, n); + _gcm_sg_unmap_and_advance(gw, n, true); } } else - _gcm_sg_unmap_and_advance(gw, bytesdone); + _gcm_sg_unmap_and_advance(gw, bytesdone, true); return bytesdone; } diff --git a/arch/s390/crypto/chacha-glue.c b/arch/s390/crypto/chacha-glue.c deleted file mode 100644 index f8b0c52e77a4..000000000000 --- a/arch/s390/crypto/chacha-glue.c +++ /dev/null @@ -1,130 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * s390 ChaCha stream cipher. - * - * Copyright IBM Corp. 2021 - */ - -#define KMSG_COMPONENT "chacha_s390" -#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt - -#include <crypto/internal/chacha.h> -#include <crypto/internal/skcipher.h> -#include <crypto/algapi.h> -#include <linux/cpufeature.h> -#include <linux/kernel.h> -#include <linux/module.h> -#include <linux/sizes.h> -#include <asm/fpu.h> -#include "chacha-s390.h" - -static void chacha20_crypt_s390(u32 *state, u8 *dst, const u8 *src, - unsigned int nbytes, const u32 *key, - u32 *counter) -{ - DECLARE_KERNEL_FPU_ONSTACK32(vxstate); - - kernel_fpu_begin(&vxstate, KERNEL_VXR); - chacha20_vx(dst, src, nbytes, key, counter); - kernel_fpu_end(&vxstate, KERNEL_VXR); - - *counter += round_up(nbytes, CHACHA_BLOCK_SIZE) / CHACHA_BLOCK_SIZE; -} - -static int chacha20_s390(struct skcipher_request *req) -{ - struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); - struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm); - u32 state[CHACHA_STATE_WORDS] __aligned(16); - struct skcipher_walk walk; - unsigned int nbytes; - int rc; - - rc = skcipher_walk_virt(&walk, req, false); - chacha_init_generic(state, ctx->key, req->iv); - - while (walk.nbytes > 0) { - nbytes = walk.nbytes; - if (nbytes < walk.total) - nbytes = round_down(nbytes, walk.stride); - - if (nbytes <= CHACHA_BLOCK_SIZE) { - chacha_crypt_generic(state, walk.dst.virt.addr, - walk.src.virt.addr, nbytes, - ctx->nrounds); - } else { - chacha20_crypt_s390(state, walk.dst.virt.addr, - walk.src.virt.addr, nbytes, - &state[4], &state[12]); - } - rc = skcipher_walk_done(&walk, walk.nbytes - nbytes); - } - return rc; -} - -void hchacha_block_arch(const u32 *state, u32 *stream, int nrounds) -{ - /* TODO: implement hchacha_block_arch() in assembly */ - hchacha_block_generic(state, stream, nrounds); -} -EXPORT_SYMBOL(hchacha_block_arch); - -void chacha_init_arch(u32 *state, const u32 *key, const u8 *iv) -{ - chacha_init_generic(state, key, iv); -} -EXPORT_SYMBOL(chacha_init_arch); - -void chacha_crypt_arch(u32 *state, u8 *dst, const u8 *src, - unsigned int bytes, int nrounds) -{ - /* s390 chacha20 implementation has 20 rounds hard-coded, - * it cannot handle a block of data or less, but otherwise - * it can handle data of arbitrary size - */ - if (bytes <= CHACHA_BLOCK_SIZE || nrounds != 20 || !cpu_has_vx()) - chacha_crypt_generic(state, dst, src, bytes, nrounds); - else - chacha20_crypt_s390(state, dst, src, bytes, - &state[4], &state[12]); -} -EXPORT_SYMBOL(chacha_crypt_arch); - -static struct skcipher_alg chacha_algs[] = { - { - .base.cra_name = "chacha20", - .base.cra_driver_name = "chacha20-s390", - .base.cra_priority = 900, - .base.cra_blocksize = 1, - .base.cra_ctxsize = sizeof(struct chacha_ctx), - .base.cra_module = THIS_MODULE, - - .min_keysize = CHACHA_KEY_SIZE, - .max_keysize = CHACHA_KEY_SIZE, - .ivsize = CHACHA_IV_SIZE, - .chunksize = CHACHA_BLOCK_SIZE, - .setkey = chacha20_setkey, - .encrypt = chacha20_s390, - .decrypt = chacha20_s390, - } -}; - -static int __init chacha_mod_init(void) -{ - return IS_REACHABLE(CONFIG_CRYPTO_SKCIPHER) ? - crypto_register_skciphers(chacha_algs, ARRAY_SIZE(chacha_algs)) : 0; -} - -static void __exit chacha_mod_fini(void) -{ - if (IS_REACHABLE(CONFIG_CRYPTO_SKCIPHER)) - crypto_unregister_skciphers(chacha_algs, ARRAY_SIZE(chacha_algs)); -} - -module_cpu_feature_match(S390_CPU_FEATURE_VXRS, chacha_mod_init); -module_exit(chacha_mod_fini); - -MODULE_DESCRIPTION("ChaCha20 stream cipher"); -MODULE_LICENSE("GPL v2"); - -MODULE_ALIAS_CRYPTO("chacha20"); diff --git a/arch/s390/crypto/crc32-vx.c b/arch/s390/crypto/crc32-vx.c deleted file mode 100644 index 89a10337e6ea..000000000000 --- a/arch/s390/crypto/crc32-vx.c +++ /dev/null @@ -1,306 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Crypto-API module for CRC-32 algorithms implemented with the - * z/Architecture Vector Extension Facility. - * - * Copyright IBM Corp. 2015 - * Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com> - */ -#define KMSG_COMPONENT "crc32-vx" -#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt - -#include <linux/module.h> -#include <linux/cpufeature.h> -#include <linux/crc32.h> -#include <crypto/internal/hash.h> -#include <asm/fpu.h> -#include "crc32-vx.h" - -#define CRC32_BLOCK_SIZE 1 -#define CRC32_DIGEST_SIZE 4 - -#define VX_MIN_LEN 64 -#define VX_ALIGNMENT 16L -#define VX_ALIGN_MASK (VX_ALIGNMENT - 1) - -struct crc_ctx { - u32 key; -}; - -struct crc_desc_ctx { - u32 crc; -}; - -/* - * DEFINE_CRC32_VX() - Define a CRC-32 function using the vector extension - * - * Creates a function to perform a particular CRC-32 computation. Depending - * on the message buffer, the hardware-accelerated or software implementation - * is used. Note that the message buffer is aligned to improve fetch - * operations of VECTOR LOAD MULTIPLE instructions. - * - */ -#define DEFINE_CRC32_VX(___fname, ___crc32_vx, ___crc32_sw) \ - static u32 __pure ___fname(u32 crc, \ - unsigned char const *data, size_t datalen) \ - { \ - unsigned long prealign, aligned, remaining; \ - DECLARE_KERNEL_FPU_ONSTACK16(vxstate); \ - \ - if (datalen < VX_MIN_LEN + VX_ALIGN_MASK) \ - return ___crc32_sw(crc, data, datalen); \ - \ - if ((unsigned long)data & VX_ALIGN_MASK) { \ - prealign = VX_ALIGNMENT - \ - ((unsigned long)data & VX_ALIGN_MASK); \ - datalen -= prealign; \ - crc = ___crc32_sw(crc, data, prealign); \ - data = (void *)((unsigned long)data + prealign); \ - } \ - \ - aligned = datalen & ~VX_ALIGN_MASK; \ - remaining = datalen & VX_ALIGN_MASK; \ - \ - kernel_fpu_begin(&vxstate, KERNEL_VXR_LOW); \ - crc = ___crc32_vx(crc, data, aligned); \ - kernel_fpu_end(&vxstate, KERNEL_VXR_LOW); \ - \ - if (remaining) \ - crc = ___crc32_sw(crc, data + aligned, remaining); \ - \ - return crc; \ - } - -DEFINE_CRC32_VX(crc32_le_vx, crc32_le_vgfm_16, crc32_le) -DEFINE_CRC32_VX(crc32_be_vx, crc32_be_vgfm_16, crc32_be) -DEFINE_CRC32_VX(crc32c_le_vx, crc32c_le_vgfm_16, __crc32c_le) - - -static int crc32_vx_cra_init_zero(struct crypto_tfm *tfm) -{ - struct crc_ctx *mctx = crypto_tfm_ctx(tfm); - - mctx->key = 0; - return 0; -} - -static int crc32_vx_cra_init_invert(struct crypto_tfm *tfm) -{ - struct crc_ctx *mctx = crypto_tfm_ctx(tfm); - - mctx->key = ~0; - return 0; -} - -static int crc32_vx_init(struct shash_desc *desc) -{ - struct crc_ctx *mctx = crypto_shash_ctx(desc->tfm); - struct crc_desc_ctx *ctx = shash_desc_ctx(desc); - - ctx->crc = mctx->key; - return 0; -} - -static int crc32_vx_setkey(struct crypto_shash *tfm, const u8 *newkey, - unsigned int newkeylen) -{ - struct crc_ctx *mctx = crypto_shash_ctx(tfm); - - if (newkeylen != sizeof(mctx->key)) - return -EINVAL; - mctx->key = le32_to_cpu(*(__le32 *)newkey); - return 0; -} - -static int crc32be_vx_setkey(struct crypto_shash *tfm, const u8 *newkey, - unsigned int newkeylen) -{ - struct crc_ctx *mctx = crypto_shash_ctx(tfm); - - if (newkeylen != sizeof(mctx->key)) - return -EINVAL; - mctx->key = be32_to_cpu(*(__be32 *)newkey); - return 0; -} - -static int crc32le_vx_final(struct shash_desc *desc, u8 *out) -{ - struct crc_desc_ctx *ctx = shash_desc_ctx(desc); - - *(__le32 *)out = cpu_to_le32p(&ctx->crc); - return 0; -} - -static int crc32be_vx_final(struct shash_desc *desc, u8 *out) -{ - struct crc_desc_ctx *ctx = shash_desc_ctx(desc); - - *(__be32 *)out = cpu_to_be32p(&ctx->crc); - return 0; -} - -static int crc32c_vx_final(struct shash_desc *desc, u8 *out) -{ - struct crc_desc_ctx *ctx = shash_desc_ctx(desc); - - /* - * Perform a final XOR with 0xFFFFFFFF to be in sync - * with the generic crc32c shash implementation. - */ - *(__le32 *)out = ~cpu_to_le32p(&ctx->crc); - return 0; -} - -static int __crc32le_vx_finup(u32 *crc, const u8 *data, unsigned int len, - u8 *out) -{ - *(__le32 *)out = cpu_to_le32(crc32_le_vx(*crc, data, len)); - return 0; -} - -static int __crc32be_vx_finup(u32 *crc, const u8 *data, unsigned int len, - u8 *out) -{ - *(__be32 *)out = cpu_to_be32(crc32_be_vx(*crc, data, len)); - return 0; -} - -static int __crc32c_vx_finup(u32 *crc, const u8 *data, unsigned int len, - u8 *out) -{ - /* - * Perform a final XOR with 0xFFFFFFFF to be in sync - * with the generic crc32c shash implementation. - */ - *(__le32 *)out = ~cpu_to_le32(crc32c_le_vx(*crc, data, len)); - return 0; -} - - -#define CRC32_VX_FINUP(alg, func) \ - static int alg ## _vx_finup(struct shash_desc *desc, const u8 *data, \ - unsigned int datalen, u8 *out) \ - { \ - return __ ## alg ## _vx_finup(shash_desc_ctx(desc), \ - data, datalen, out); \ - } - -CRC32_VX_FINUP(crc32le, crc32_le_vx) -CRC32_VX_FINUP(crc32be, crc32_be_vx) -CRC32_VX_FINUP(crc32c, crc32c_le_vx) - -#define CRC32_VX_DIGEST(alg, func) \ - static int alg ## _vx_digest(struct shash_desc *desc, const u8 *data, \ - unsigned int len, u8 *out) \ - { \ - return __ ## alg ## _vx_finup(crypto_shash_ctx(desc->tfm), \ - data, len, out); \ - } - -CRC32_VX_DIGEST(crc32le, crc32_le_vx) -CRC32_VX_DIGEST(crc32be, crc32_be_vx) -CRC32_VX_DIGEST(crc32c, crc32c_le_vx) - -#define CRC32_VX_UPDATE(alg, func) \ - static int alg ## _vx_update(struct shash_desc *desc, const u8 *data, \ - unsigned int datalen) \ - { \ - struct crc_desc_ctx *ctx = shash_desc_ctx(desc); \ - ctx->crc = func(ctx->crc, data, datalen); \ - return 0; \ - } - -CRC32_VX_UPDATE(crc32le, crc32_le_vx) -CRC32_VX_UPDATE(crc32be, crc32_be_vx) -CRC32_VX_UPDATE(crc32c, crc32c_le_vx) - - -static struct shash_alg crc32_vx_algs[] = { - /* CRC-32 LE */ - { - .init = crc32_vx_init, - .setkey = crc32_vx_setkey, - .update = crc32le_vx_update, - .final = crc32le_vx_final, - .finup = crc32le_vx_finup, - .digest = crc32le_vx_digest, - .descsize = sizeof(struct crc_desc_ctx), - .digestsize = CRC32_DIGEST_SIZE, - .base = { - .cra_name = "crc32", - .cra_driver_name = "crc32-vx", - .cra_priority = 200, - .cra_flags = CRYPTO_ALG_OPTIONAL_KEY, - .cra_blocksize = CRC32_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct crc_ctx), - .cra_module = THIS_MODULE, - .cra_init = crc32_vx_cra_init_zero, - }, - }, - /* CRC-32 BE */ - { - .init = crc32_vx_init, - .setkey = crc32be_vx_setkey, - .update = crc32be_vx_update, - .final = crc32be_vx_final, - .finup = crc32be_vx_finup, - .digest = crc32be_vx_digest, - .descsize = sizeof(struct crc_desc_ctx), - .digestsize = CRC32_DIGEST_SIZE, - .base = { - .cra_name = "crc32be", - .cra_driver_name = "crc32be-vx", - .cra_priority = 200, - .cra_flags = CRYPTO_ALG_OPTIONAL_KEY, - .cra_blocksize = CRC32_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct crc_ctx), - .cra_module = THIS_MODULE, - .cra_init = crc32_vx_cra_init_zero, - }, - }, - /* CRC-32C LE */ - { - .init = crc32_vx_init, - .setkey = crc32_vx_setkey, - .update = crc32c_vx_update, - .final = crc32c_vx_final, - .finup = crc32c_vx_finup, - .digest = crc32c_vx_digest, - .descsize = sizeof(struct crc_desc_ctx), - .digestsize = CRC32_DIGEST_SIZE, - .base = { - .cra_name = "crc32c", - .cra_driver_name = "crc32c-vx", - .cra_priority = 200, - .cra_flags = CRYPTO_ALG_OPTIONAL_KEY, - .cra_blocksize = CRC32_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct crc_ctx), - .cra_module = THIS_MODULE, - .cra_init = crc32_vx_cra_init_invert, - }, - }, -}; - - -static int __init crc_vx_mod_init(void) -{ - return crypto_register_shashes(crc32_vx_algs, - ARRAY_SIZE(crc32_vx_algs)); -} - -static void __exit crc_vx_mod_exit(void) -{ - crypto_unregister_shashes(crc32_vx_algs, ARRAY_SIZE(crc32_vx_algs)); -} - -module_cpu_feature_match(S390_CPU_FEATURE_VXRS, crc_vx_mod_init); -module_exit(crc_vx_mod_exit); - -MODULE_AUTHOR("Hendrik Brueckner <brueckner@linux.vnet.ibm.com>"); -MODULE_DESCRIPTION("CRC-32 algorithms using z/Architecture Vector Extension Facility"); -MODULE_LICENSE("GPL"); - -MODULE_ALIAS_CRYPTO("crc32"); -MODULE_ALIAS_CRYPTO("crc32-vx"); -MODULE_ALIAS_CRYPTO("crc32c"); -MODULE_ALIAS_CRYPTO("crc32c-vx"); diff --git a/arch/s390/crypto/ghash_s390.c b/arch/s390/crypto/ghash_s390.c index 0800a2a5799f..dcbcee37cb63 100644 --- a/arch/s390/crypto/ghash_s390.c +++ b/arch/s390/crypto/ghash_s390.c @@ -8,29 +8,28 @@ * Author(s): Gerald Schaefer <gerald.schaefer@de.ibm.com> */ +#include <asm/cpacf.h> +#include <crypto/ghash.h> #include <crypto/internal/hash.h> -#include <linux/module.h> #include <linux/cpufeature.h> -#include <asm/cpacf.h> - -#define GHASH_BLOCK_SIZE 16 -#define GHASH_DIGEST_SIZE 16 +#include <linux/err.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/string.h> -struct ghash_ctx { +struct s390_ghash_ctx { u8 key[GHASH_BLOCK_SIZE]; }; -struct ghash_desc_ctx { +struct s390_ghash_desc_ctx { u8 icv[GHASH_BLOCK_SIZE]; u8 key[GHASH_BLOCK_SIZE]; - u8 buffer[GHASH_BLOCK_SIZE]; - u32 bytes; }; static int ghash_init(struct shash_desc *desc) { - struct ghash_desc_ctx *dctx = shash_desc_ctx(desc); - struct ghash_ctx *ctx = crypto_shash_ctx(desc->tfm); + struct s390_ghash_ctx *ctx = crypto_shash_ctx(desc->tfm); + struct s390_ghash_desc_ctx *dctx = shash_desc_ctx(desc); memset(dctx, 0, sizeof(*dctx)); memcpy(dctx->key, ctx->key, GHASH_BLOCK_SIZE); @@ -41,7 +40,7 @@ static int ghash_init(struct shash_desc *desc) static int ghash_setkey(struct crypto_shash *tfm, const u8 *key, unsigned int keylen) { - struct ghash_ctx *ctx = crypto_shash_ctx(tfm); + struct s390_ghash_ctx *ctx = crypto_shash_ctx(tfm); if (keylen != GHASH_BLOCK_SIZE) return -EINVAL; @@ -54,80 +53,71 @@ static int ghash_setkey(struct crypto_shash *tfm, static int ghash_update(struct shash_desc *desc, const u8 *src, unsigned int srclen) { - struct ghash_desc_ctx *dctx = shash_desc_ctx(desc); + struct s390_ghash_desc_ctx *dctx = shash_desc_ctx(desc); unsigned int n; - u8 *buf = dctx->buffer; - - if (dctx->bytes) { - u8 *pos = buf + (GHASH_BLOCK_SIZE - dctx->bytes); - n = min(srclen, dctx->bytes); - dctx->bytes -= n; - srclen -= n; - - memcpy(pos, src, n); - src += n; + n = srclen & ~(GHASH_BLOCK_SIZE - 1); + cpacf_kimd(CPACF_KIMD_GHASH, dctx, src, n); + return srclen - n; +} - if (!dctx->bytes) { - cpacf_kimd(CPACF_KIMD_GHASH, dctx, buf, - GHASH_BLOCK_SIZE); - } - } +static void ghash_flush(struct s390_ghash_desc_ctx *dctx, const u8 *src, + unsigned int len) +{ + if (len) { + u8 buf[GHASH_BLOCK_SIZE] = {}; - n = srclen & ~(GHASH_BLOCK_SIZE - 1); - if (n) { - cpacf_kimd(CPACF_KIMD_GHASH, dctx, src, n); - src += n; - srclen -= n; + memcpy(buf, src, len); + cpacf_kimd(CPACF_KIMD_GHASH, dctx, buf, GHASH_BLOCK_SIZE); + memzero_explicit(buf, sizeof(buf)); } +} - if (srclen) { - dctx->bytes = GHASH_BLOCK_SIZE - srclen; - memcpy(buf, src, srclen); - } +static int ghash_finup(struct shash_desc *desc, const u8 *src, + unsigned int len, u8 *dst) +{ + struct s390_ghash_desc_ctx *dctx = shash_desc_ctx(desc); + ghash_flush(dctx, src, len); + memcpy(dst, dctx->icv, GHASH_BLOCK_SIZE); return 0; } -static int ghash_flush(struct ghash_desc_ctx *dctx) +static int ghash_export(struct shash_desc *desc, void *out) { - u8 *buf = dctx->buffer; - - if (dctx->bytes) { - u8 *pos = buf + (GHASH_BLOCK_SIZE - dctx->bytes); - - memset(pos, 0, dctx->bytes); - cpacf_kimd(CPACF_KIMD_GHASH, dctx, buf, GHASH_BLOCK_SIZE); - dctx->bytes = 0; - } + struct s390_ghash_desc_ctx *dctx = shash_desc_ctx(desc); + memcpy(out, dctx->icv, GHASH_DIGEST_SIZE); return 0; } -static int ghash_final(struct shash_desc *desc, u8 *dst) +static int ghash_import(struct shash_desc *desc, const void *in) { - struct ghash_desc_ctx *dctx = shash_desc_ctx(desc); - int ret; + struct s390_ghash_ctx *ctx = crypto_shash_ctx(desc->tfm); + struct s390_ghash_desc_ctx *dctx = shash_desc_ctx(desc); - ret = ghash_flush(dctx); - if (!ret) - memcpy(dst, dctx->icv, GHASH_BLOCK_SIZE); - return ret; + memcpy(dctx->icv, in, GHASH_DIGEST_SIZE); + memcpy(dctx->key, ctx->key, GHASH_BLOCK_SIZE); + return 0; } static struct shash_alg ghash_alg = { .digestsize = GHASH_DIGEST_SIZE, .init = ghash_init, .update = ghash_update, - .final = ghash_final, + .finup = ghash_finup, .setkey = ghash_setkey, - .descsize = sizeof(struct ghash_desc_ctx), + .export = ghash_export, + .import = ghash_import, + .statesize = sizeof(struct ghash_desc_ctx), + .descsize = sizeof(struct s390_ghash_desc_ctx), .base = { .cra_name = "ghash", .cra_driver_name = "ghash-s390", .cra_priority = 300, + .cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY, .cra_blocksize = GHASH_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct ghash_ctx), + .cra_ctxsize = sizeof(struct s390_ghash_ctx), .cra_module = THIS_MODULE, }, }; diff --git a/arch/s390/crypto/hmac_s390.c b/arch/s390/crypto/hmac_s390.c index bba9a818dfdc..93a1098d9f8d 100644 --- a/arch/s390/crypto/hmac_s390.c +++ b/arch/s390/crypto/hmac_s390.c @@ -9,10 +9,14 @@ #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt #include <asm/cpacf.h> -#include <crypto/sha2.h> #include <crypto/internal/hash.h> +#include <crypto/hmac.h> +#include <crypto/sha2.h> #include <linux/cpufeature.h> +#include <linux/errno.h> +#include <linux/kernel.h> #include <linux/module.h> +#include <linux/string.h> /* * KMAC param block layout for sha2 function codes: @@ -71,32 +75,31 @@ union s390_kmac_gr0 { struct s390_kmac_sha2_ctx { u8 param[MAX_DIGEST_SIZE + MAX_IMBL_SIZE + MAX_BLOCK_SIZE]; union s390_kmac_gr0 gr0; - u8 buf[MAX_BLOCK_SIZE]; - unsigned int buflen; + u64 buflen[2]; }; /* * kmac_sha2_set_imbl - sets the input message bit-length based on the blocksize */ -static inline void kmac_sha2_set_imbl(u8 *param, unsigned int buflen, - unsigned int blocksize) +static inline void kmac_sha2_set_imbl(u8 *param, u64 buflen_lo, + u64 buflen_hi, unsigned int blocksize) { u8 *imbl = param + SHA2_IMBL_OFFSET(blocksize); switch (blocksize) { case SHA256_BLOCK_SIZE: - *(u64 *)imbl = (u64)buflen * BITS_PER_BYTE; + *(u64 *)imbl = buflen_lo * BITS_PER_BYTE; break; case SHA512_BLOCK_SIZE: - *(u128 *)imbl = (u128)buflen * BITS_PER_BYTE; + *(u128 *)imbl = (((u128)buflen_hi << 64) + buflen_lo) << 3; break; default: break; } } -static int hash_key(const u8 *in, unsigned int inlen, - u8 *digest, unsigned int digestsize) +static int hash_data(const u8 *in, unsigned int inlen, + u8 *digest, unsigned int digestsize, bool final) { unsigned long func; union { @@ -123,19 +126,23 @@ static int hash_key(const u8 *in, unsigned int inlen, switch (digestsize) { case SHA224_DIGEST_SIZE: - func = CPACF_KLMD_SHA_256; + func = final ? CPACF_KLMD_SHA_256 : CPACF_KIMD_SHA_256; PARAM_INIT(256, 224, inlen * 8); + if (!final) + digestsize = SHA256_DIGEST_SIZE; break; case SHA256_DIGEST_SIZE: - func = CPACF_KLMD_SHA_256; + func = final ? CPACF_KLMD_SHA_256 : CPACF_KIMD_SHA_256; PARAM_INIT(256, 256, inlen * 8); break; case SHA384_DIGEST_SIZE: - func = CPACF_KLMD_SHA_512; + func = final ? CPACF_KLMD_SHA_512 : CPACF_KIMD_SHA_512; PARAM_INIT(512, 384, inlen * 8); + if (!final) + digestsize = SHA512_DIGEST_SIZE; break; case SHA512_DIGEST_SIZE: - func = CPACF_KLMD_SHA_512; + func = final ? CPACF_KLMD_SHA_512 : CPACF_KIMD_SHA_512; PARAM_INIT(512, 512, inlen * 8); break; default: @@ -151,6 +158,12 @@ static int hash_key(const u8 *in, unsigned int inlen, return 0; } +static int hash_key(const u8 *in, unsigned int inlen, + u8 *digest, unsigned int digestsize) +{ + return hash_data(in, inlen, digest, digestsize, true); +} + static int s390_hmac_sha2_setkey(struct crypto_shash *tfm, const u8 *key, unsigned int keylen) { @@ -176,7 +189,8 @@ static int s390_hmac_sha2_init(struct shash_desc *desc) memcpy(ctx->param + SHA2_KEY_OFFSET(bs), tfm_ctx->key, bs); - ctx->buflen = 0; + ctx->buflen[0] = 0; + ctx->buflen[1] = 0; ctx->gr0.reg = 0; switch (crypto_shash_digestsize(desc->tfm)) { case SHA224_DIGEST_SIZE: @@ -203,48 +217,31 @@ static int s390_hmac_sha2_update(struct shash_desc *desc, { struct s390_kmac_sha2_ctx *ctx = shash_desc_ctx(desc); unsigned int bs = crypto_shash_blocksize(desc->tfm); - unsigned int offset, n; - - /* check current buffer */ - offset = ctx->buflen % bs; - ctx->buflen += len; - if (offset + len < bs) - goto store; - - /* process one stored block */ - if (offset) { - n = bs - offset; - memcpy(ctx->buf + offset, data, n); - ctx->gr0.iimp = 1; - _cpacf_kmac(&ctx->gr0.reg, ctx->param, ctx->buf, bs); - data += n; - len -= n; - offset = 0; - } - /* process as many blocks as possible */ - if (len >= bs) { - n = (len / bs) * bs; - ctx->gr0.iimp = 1; - _cpacf_kmac(&ctx->gr0.reg, ctx->param, data, n); - data += n; - len -= n; - } -store: - /* store incomplete block in buffer */ - if (len) - memcpy(ctx->buf + offset, data, len); + unsigned int n = round_down(len, bs); - return 0; + ctx->buflen[0] += n; + if (ctx->buflen[0] < n) + ctx->buflen[1]++; + + /* process as many blocks as possible */ + ctx->gr0.iimp = 1; + _cpacf_kmac(&ctx->gr0.reg, ctx->param, data, n); + return len - n; } -static int s390_hmac_sha2_final(struct shash_desc *desc, u8 *out) +static int s390_hmac_sha2_finup(struct shash_desc *desc, const u8 *src, + unsigned int len, u8 *out) { struct s390_kmac_sha2_ctx *ctx = shash_desc_ctx(desc); unsigned int bs = crypto_shash_blocksize(desc->tfm); + ctx->buflen[0] += len; + if (ctx->buflen[0] < len) + ctx->buflen[1]++; + ctx->gr0.iimp = 0; - kmac_sha2_set_imbl(ctx->param, ctx->buflen, bs); - _cpacf_kmac(&ctx->gr0.reg, ctx->param, ctx->buf, ctx->buflen % bs); + kmac_sha2_set_imbl(ctx->param, ctx->buflen[0], ctx->buflen[1], bs); + _cpacf_kmac(&ctx->gr0.reg, ctx->param, src, len); memcpy(out, ctx->param, crypto_shash_digestsize(desc->tfm)); return 0; @@ -262,7 +259,7 @@ static int s390_hmac_sha2_digest(struct shash_desc *desc, return rc; ctx->gr0.iimp = 0; - kmac_sha2_set_imbl(ctx->param, len, + kmac_sha2_set_imbl(ctx->param, len, 0, crypto_shash_blocksize(desc->tfm)); _cpacf_kmac(&ctx->gr0.reg, ctx->param, data, len); memcpy(out, ctx->param, ds); @@ -270,22 +267,89 @@ static int s390_hmac_sha2_digest(struct shash_desc *desc, return 0; } -#define S390_HMAC_SHA2_ALG(x) { \ +static int s390_hmac_export_zero(struct shash_desc *desc, void *out) +{ + struct crypto_shash *tfm = desc->tfm; + u8 ipad[SHA512_BLOCK_SIZE]; + struct s390_hmac_ctx *ctx; + unsigned int bs; + int err, i; + + ctx = crypto_shash_ctx(tfm); + bs = crypto_shash_blocksize(tfm); + for (i = 0; i < bs; i++) + ipad[i] = ctx->key[i] ^ HMAC_IPAD_VALUE; + + err = hash_data(ipad, bs, out, crypto_shash_digestsize(tfm), false); + memzero_explicit(ipad, sizeof(ipad)); + return err; +} + +static int s390_hmac_export(struct shash_desc *desc, void *out) +{ + struct s390_kmac_sha2_ctx *ctx = shash_desc_ctx(desc); + unsigned int bs = crypto_shash_blocksize(desc->tfm); + unsigned int ds = bs / 2; + union { + u8 *u8; + u64 *u64; + } p = { .u8 = out }; + int err = 0; + + if (!ctx->gr0.ikp) + err = s390_hmac_export_zero(desc, out); + else + memcpy(p.u8, ctx->param, ds); + p.u8 += ds; + put_unaligned(ctx->buflen[0], p.u64++); + if (ds == SHA512_DIGEST_SIZE) + put_unaligned(ctx->buflen[1], p.u64); + return err; +} + +static int s390_hmac_import(struct shash_desc *desc, const void *in) +{ + struct s390_kmac_sha2_ctx *ctx = shash_desc_ctx(desc); + unsigned int bs = crypto_shash_blocksize(desc->tfm); + unsigned int ds = bs / 2; + union { + const u8 *u8; + const u64 *u64; + } p = { .u8 = in }; + int err; + + err = s390_hmac_sha2_init(desc); + memcpy(ctx->param, p.u8, ds); + p.u8 += ds; + ctx->buflen[0] = get_unaligned(p.u64++); + if (ds == SHA512_DIGEST_SIZE) + ctx->buflen[1] = get_unaligned(p.u64); + if (ctx->buflen[0] | ctx->buflen[1]) + ctx->gr0.ikp = 1; + return err; +} + +#define S390_HMAC_SHA2_ALG(x, ss) { \ .fc = CPACF_KMAC_HMAC_SHA_##x, \ .alg = { \ .init = s390_hmac_sha2_init, \ .update = s390_hmac_sha2_update, \ - .final = s390_hmac_sha2_final, \ + .finup = s390_hmac_sha2_finup, \ .digest = s390_hmac_sha2_digest, \ .setkey = s390_hmac_sha2_setkey, \ + .export = s390_hmac_export, \ + .import = s390_hmac_import, \ .descsize = sizeof(struct s390_kmac_sha2_ctx), \ .halg = { \ + .statesize = ss, \ .digestsize = SHA##x##_DIGEST_SIZE, \ .base = { \ .cra_name = "hmac(sha" #x ")", \ .cra_driver_name = "hmac_s390_sha" #x, \ .cra_blocksize = SHA##x##_BLOCK_SIZE, \ .cra_priority = 400, \ + .cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY | \ + CRYPTO_AHASH_ALG_FINUP_MAX, \ .cra_ctxsize = sizeof(struct s390_hmac_ctx), \ .cra_module = THIS_MODULE, \ }, \ @@ -298,10 +362,10 @@ static struct s390_hmac_alg { unsigned int fc; struct shash_alg alg; } s390_hmac_algs[] = { - S390_HMAC_SHA2_ALG(224), - S390_HMAC_SHA2_ALG(256), - S390_HMAC_SHA2_ALG(384), - S390_HMAC_SHA2_ALG(512), + S390_HMAC_SHA2_ALG(224, sizeof(struct crypto_sha256_state)), + S390_HMAC_SHA2_ALG(256, sizeof(struct crypto_sha256_state)), + S390_HMAC_SHA2_ALG(384, SHA512_STATE_SIZE), + S390_HMAC_SHA2_ALG(512, SHA512_STATE_SIZE), }; static __always_inline void _s390_hmac_algs_unregister(void) diff --git a/arch/s390/crypto/paes_s390.c b/arch/s390/crypto/paes_s390.c index 511093713a6f..8a340c16acb4 100644 --- a/arch/s390/crypto/paes_s390.c +++ b/arch/s390/crypto/paes_s390.c @@ -5,7 +5,7 @@ * s390 implementation of the AES Cipher Algorithm with protected keys. * * s390 Version: - * Copyright IBM Corp. 2017, 2023 + * Copyright IBM Corp. 2017, 2025 * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com> * Harald Freudenberger <freude@de.ibm.com> */ @@ -13,16 +13,18 @@ #define KMSG_COMPONENT "paes_s390" #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt -#include <crypto/aes.h> -#include <crypto/algapi.h> -#include <linux/bug.h> -#include <linux/err.h> -#include <linux/module.h> +#include <linux/atomic.h> #include <linux/cpufeature.h> +#include <linux/delay.h> +#include <linux/err.h> #include <linux/init.h> +#include <linux/miscdevice.h> +#include <linux/module.h> #include <linux/mutex.h> #include <linux/spinlock.h> -#include <linux/delay.h> +#include <crypto/aes.h> +#include <crypto/algapi.h> +#include <crypto/engine.h> #include <crypto/internal/skcipher.h> #include <crypto/xts.h> #include <asm/cpacf.h> @@ -44,23 +46,61 @@ static DEFINE_MUTEX(ctrblk_lock); static cpacf_mask_t km_functions, kmc_functions, kmctr_functions; +static struct crypto_engine *paes_crypto_engine; +#define MAX_QLEN 10 + +/* + * protected key specific stuff + */ + struct paes_protkey { u32 type; u32 len; u8 protkey[PXTS_256_PROTKEY_SIZE]; }; -struct key_blob { - /* - * Small keys will be stored in the keybuf. Larger keys are - * stored in extra allocated memory. In both cases does - * key point to the memory where the key is stored. - * The code distinguishes by checking keylen against - * sizeof(keybuf). See the two following helper functions. - */ - u8 *key; - u8 keybuf[128]; +#define PK_STATE_NO_KEY 0 +#define PK_STATE_CONVERT_IN_PROGRESS 1 +#define PK_STATE_VALID 2 + +struct s390_paes_ctx { + /* source key material used to derive a protected key from */ + u8 keybuf[PAES_MAX_KEYSIZE]; + unsigned int keylen; + + /* cpacf function code to use with this protected key type */ + long fc; + + /* nr of requests enqueued via crypto engine which use this tfm ctx */ + atomic_t via_engine_ctr; + + /* spinlock to atomic read/update all the following fields */ + spinlock_t pk_lock; + + /* see PK_STATE* defines above, < 0 holds convert failure rc */ + int pk_state; + /* if state is valid, pk holds the protected key */ + struct paes_protkey pk; +}; + +struct s390_pxts_ctx { + /* source key material used to derive a protected key from */ + u8 keybuf[2 * PAES_MAX_KEYSIZE]; unsigned int keylen; + + /* cpacf function code to use with this protected key type */ + long fc; + + /* nr of requests enqueued via crypto engine which use this tfm ctx */ + atomic_t via_engine_ctr; + + /* spinlock to atomic read/update all the following fields */ + spinlock_t pk_lock; + + /* see PK_STATE* defines above, < 0 holds convert failure rc */ + int pk_state; + /* if state is valid, pk[] hold(s) the protected key(s) */ + struct paes_protkey pk[2]; }; /* @@ -89,214 +129,370 @@ static inline u32 make_clrkey_token(const u8 *ck, size_t cklen, u8 *dest) return sizeof(*token) + cklen; } -static inline int _key_to_kb(struct key_blob *kb, - const u8 *key, - unsigned int keylen) +/* + * paes_ctx_setkey() - Set key value into context, maybe construct + * a clear key token digestible by pkey from a clear key value. + */ +static inline int paes_ctx_setkey(struct s390_paes_ctx *ctx, + const u8 *key, unsigned int keylen) { + if (keylen > sizeof(ctx->keybuf)) + return -EINVAL; + switch (keylen) { case 16: case 24: case 32: /* clear key value, prepare pkey clear key token in keybuf */ - memset(kb->keybuf, 0, sizeof(kb->keybuf)); - kb->keylen = make_clrkey_token(key, keylen, kb->keybuf); - kb->key = kb->keybuf; + memset(ctx->keybuf, 0, sizeof(ctx->keybuf)); + ctx->keylen = make_clrkey_token(key, keylen, ctx->keybuf); break; default: /* other key material, let pkey handle this */ - if (keylen <= sizeof(kb->keybuf)) - kb->key = kb->keybuf; - else { - kb->key = kmalloc(keylen, GFP_KERNEL); - if (!kb->key) - return -ENOMEM; - } - memcpy(kb->key, key, keylen); - kb->keylen = keylen; + memcpy(ctx->keybuf, key, keylen); + ctx->keylen = keylen; break; } return 0; } -static inline int _xts_key_to_kb(struct key_blob *kb, - const u8 *key, - unsigned int keylen) +/* + * pxts_ctx_setkey() - Set key value into context, maybe construct + * a clear key token digestible by pkey from a clear key value. + */ +static inline int pxts_ctx_setkey(struct s390_pxts_ctx *ctx, + const u8 *key, unsigned int keylen) { size_t cklen = keylen / 2; - memset(kb->keybuf, 0, sizeof(kb->keybuf)); + if (keylen > sizeof(ctx->keybuf)) + return -EINVAL; switch (keylen) { case 32: case 64: /* clear key value, prepare pkey clear key tokens in keybuf */ - kb->key = kb->keybuf; - kb->keylen = make_clrkey_token(key, cklen, kb->key); - kb->keylen += make_clrkey_token(key + cklen, cklen, - kb->key + kb->keylen); + memset(ctx->keybuf, 0, sizeof(ctx->keybuf)); + ctx->keylen = make_clrkey_token(key, cklen, ctx->keybuf); + ctx->keylen += make_clrkey_token(key + cklen, cklen, + ctx->keybuf + ctx->keylen); break; default: /* other key material, let pkey handle this */ - if (keylen <= sizeof(kb->keybuf)) { - kb->key = kb->keybuf; - } else { - kb->key = kmalloc(keylen, GFP_KERNEL); - if (!kb->key) - return -ENOMEM; - } - memcpy(kb->key, key, keylen); - kb->keylen = keylen; + memcpy(ctx->keybuf, key, keylen); + ctx->keylen = keylen; break; } return 0; } -static inline void _free_kb_keybuf(struct key_blob *kb) +/* + * Convert the raw key material into a protected key via PKEY api. + * This function may sleep - don't call in non-sleeping context. + */ +static inline int convert_key(const u8 *key, unsigned int keylen, + struct paes_protkey *pk) { - if (kb->key && kb->key != kb->keybuf - && kb->keylen > sizeof(kb->keybuf)) { - kfree_sensitive(kb->key); - kb->key = NULL; + int rc, i; + + pk->len = sizeof(pk->protkey); + + /* + * In case of a busy card retry with increasing delay + * of 200, 400, 800 and 1600 ms - in total 3 s. + */ + for (rc = -EIO, i = 0; rc && i < 5; i++) { + if (rc == -EBUSY && msleep_interruptible((1 << i) * 100)) { + rc = -EINTR; + goto out; + } + rc = pkey_key2protkey(key, keylen, + pk->protkey, &pk->len, &pk->type, + PKEY_XFLAG_NOMEMALLOC); } - memzero_explicit(kb->keybuf, sizeof(kb->keybuf)); + +out: + pr_debug("rc=%d\n", rc); + return rc; } -struct s390_paes_ctx { - struct key_blob kb; +/* + * (Re-)Convert the raw key material from the ctx into a protected key + * via convert_key() function. Update the pk_state, pk_type, pk_len + * and the protected key in the tfm context. + * Please note this function may be invoked concurrently with the very + * same tfm context. The pk_lock spinlock in the context ensures an + * atomic update of the pk and the pk state but does not guarantee any + * order of update. So a fresh converted valid protected key may get + * updated with an 'old' expired key value. As the cpacf instructions + * detect this, refuse to operate with an invalid key and the calling + * code triggers a (re-)conversion this does no harm. This may lead to + * unnecessary additional conversion but never to invalid data on en- + * or decrypt operations. + */ +static int paes_convert_key(struct s390_paes_ctx *ctx) +{ struct paes_protkey pk; - spinlock_t pk_lock; - unsigned long fc; -}; + int rc; -struct s390_pxts_ctx { - struct key_blob kb; - struct paes_protkey pk[2]; - spinlock_t pk_lock; - unsigned long fc; -}; + spin_lock_bh(&ctx->pk_lock); + ctx->pk_state = PK_STATE_CONVERT_IN_PROGRESS; + spin_unlock_bh(&ctx->pk_lock); -static inline int __paes_keyblob2pkey(const u8 *key, unsigned int keylen, - struct paes_protkey *pk) -{ - int i, rc = -EIO; + rc = convert_key(ctx->keybuf, ctx->keylen, &pk); - /* try three times in case of busy card */ - for (i = 0; rc && i < 3; i++) { - if (rc == -EBUSY && in_task()) { - if (msleep_interruptible(1000)) - return -EINTR; - } - rc = pkey_key2protkey(key, keylen, pk->protkey, &pk->len, - &pk->type); + /* update context */ + spin_lock_bh(&ctx->pk_lock); + if (rc) { + ctx->pk_state = rc; + } else { + ctx->pk_state = PK_STATE_VALID; + ctx->pk = pk; } + spin_unlock_bh(&ctx->pk_lock); + memzero_explicit(&pk, sizeof(pk)); + pr_debug("rc=%d\n", rc); return rc; } -static inline int __paes_convert_key(struct s390_paes_ctx *ctx) +/* + * (Re-)Convert the raw xts key material from the ctx into a + * protected key via convert_key() function. Update the pk_state, + * pk_type, pk_len and the protected key in the tfm context. + * See also comments on function paes_convert_key. + */ +static int pxts_convert_key(struct s390_pxts_ctx *ctx) { - struct paes_protkey pk; + struct paes_protkey pk0, pk1; + size_t split_keylen; int rc; - pk.len = sizeof(pk.protkey); - rc = __paes_keyblob2pkey(ctx->kb.key, ctx->kb.keylen, &pk); + spin_lock_bh(&ctx->pk_lock); + ctx->pk_state = PK_STATE_CONVERT_IN_PROGRESS; + spin_unlock_bh(&ctx->pk_lock); + + rc = convert_key(ctx->keybuf, ctx->keylen, &pk0); if (rc) - return rc; + goto out; + + switch (pk0.type) { + case PKEY_KEYTYPE_AES_128: + case PKEY_KEYTYPE_AES_256: + /* second keytoken required */ + if (ctx->keylen % 2) { + rc = -EINVAL; + goto out; + } + split_keylen = ctx->keylen / 2; + rc = convert_key(ctx->keybuf + split_keylen, + split_keylen, &pk1); + if (rc) + goto out; + if (pk0.type != pk1.type) { + rc = -EINVAL; + goto out; + } + break; + case PKEY_KEYTYPE_AES_XTS_128: + case PKEY_KEYTYPE_AES_XTS_256: + /* single key */ + pk1.type = 0; + break; + default: + /* unsupported protected keytype */ + rc = -EINVAL; + goto out; + } +out: + /* update context */ spin_lock_bh(&ctx->pk_lock); - memcpy(&ctx->pk, &pk, sizeof(pk)); + if (rc) { + ctx->pk_state = rc; + } else { + ctx->pk_state = PK_STATE_VALID; + ctx->pk[0] = pk0; + ctx->pk[1] = pk1; + } spin_unlock_bh(&ctx->pk_lock); - return 0; + memzero_explicit(&pk0, sizeof(pk0)); + memzero_explicit(&pk1, sizeof(pk1)); + pr_debug("rc=%d\n", rc); + return rc; } -static int ecb_paes_init(struct crypto_skcipher *tfm) -{ - struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm); +/* + * PAES ECB implementation + */ - ctx->kb.key = NULL; - spin_lock_init(&ctx->pk_lock); +struct ecb_param { + u8 key[PAES_256_PROTKEY_SIZE]; +} __packed; - return 0; -} +struct s390_pecb_req_ctx { + unsigned long modifier; + struct skcipher_walk walk; + bool param_init_done; + struct ecb_param param; +}; -static void ecb_paes_exit(struct crypto_skcipher *tfm) +static int ecb_paes_setkey(struct crypto_skcipher *tfm, const u8 *in_key, + unsigned int key_len) { struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm); - - _free_kb_keybuf(&ctx->kb); -} - -static inline int __ecb_paes_set_key(struct s390_paes_ctx *ctx) -{ - unsigned long fc; + long fc; int rc; - rc = __paes_convert_key(ctx); + /* set raw key into context */ + rc = paes_ctx_setkey(ctx, in_key, key_len); if (rc) - return rc; + goto out; - /* Pick the correct function code based on the protected key type */ - fc = (ctx->pk.type == PKEY_KEYTYPE_AES_128) ? CPACF_KM_PAES_128 : - (ctx->pk.type == PKEY_KEYTYPE_AES_192) ? CPACF_KM_PAES_192 : - (ctx->pk.type == PKEY_KEYTYPE_AES_256) ? CPACF_KM_PAES_256 : 0; + /* convert key into protected key */ + rc = paes_convert_key(ctx); + if (rc) + goto out; - /* Check if the function code is available */ + /* Pick the correct function code based on the protected key type */ + switch (ctx->pk.type) { + case PKEY_KEYTYPE_AES_128: + fc = CPACF_KM_PAES_128; + break; + case PKEY_KEYTYPE_AES_192: + fc = CPACF_KM_PAES_192; + break; + case PKEY_KEYTYPE_AES_256: + fc = CPACF_KM_PAES_256; + break; + default: + fc = 0; + break; + } ctx->fc = (fc && cpacf_test_func(&km_functions, fc)) ? fc : 0; - return ctx->fc ? 0 : -EINVAL; + rc = fc ? 0 : -EINVAL; + +out: + pr_debug("rc=%d\n", rc); + return rc; } -static int ecb_paes_set_key(struct crypto_skcipher *tfm, const u8 *in_key, - unsigned int key_len) +static int ecb_paes_do_crypt(struct s390_paes_ctx *ctx, + struct s390_pecb_req_ctx *req_ctx, + bool maysleep) { - struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm); - int rc; - - _free_kb_keybuf(&ctx->kb); - rc = _key_to_kb(&ctx->kb, in_key, key_len); + struct ecb_param *param = &req_ctx->param; + struct skcipher_walk *walk = &req_ctx->walk; + unsigned int nbytes, n, k; + int pk_state, rc = 0; + + if (!req_ctx->param_init_done) { + /* fetch and check protected key state */ + spin_lock_bh(&ctx->pk_lock); + pk_state = ctx->pk_state; + switch (pk_state) { + case PK_STATE_NO_KEY: + rc = -ENOKEY; + break; + case PK_STATE_CONVERT_IN_PROGRESS: + rc = -EKEYEXPIRED; + break; + case PK_STATE_VALID: + memcpy(param->key, ctx->pk.protkey, sizeof(param->key)); + req_ctx->param_init_done = true; + break; + default: + rc = pk_state < 0 ? pk_state : -EIO; + break; + } + spin_unlock_bh(&ctx->pk_lock); + } if (rc) - return rc; + goto out; - return __ecb_paes_set_key(ctx); + /* + * Note that in case of partial processing or failure the walk + * is NOT unmapped here. So a follow up task may reuse the walk + * or in case of unrecoverable failure needs to unmap it. + */ + while ((nbytes = walk->nbytes) != 0) { + /* only use complete blocks */ + n = nbytes & ~(AES_BLOCK_SIZE - 1); + k = cpacf_km(ctx->fc | req_ctx->modifier, param, + walk->dst.virt.addr, walk->src.virt.addr, n); + if (k) + rc = skcipher_walk_done(walk, nbytes - k); + if (k < n) { + if (!maysleep) { + rc = -EKEYEXPIRED; + goto out; + } + rc = paes_convert_key(ctx); + if (rc) + goto out; + spin_lock_bh(&ctx->pk_lock); + memcpy(param->key, ctx->pk.protkey, sizeof(param->key)); + spin_unlock_bh(&ctx->pk_lock); + } + } + +out: + pr_debug("rc=%d\n", rc); + return rc; } static int ecb_paes_crypt(struct skcipher_request *req, unsigned long modifier) { + struct s390_pecb_req_ctx *req_ctx = skcipher_request_ctx(req); struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm); - struct { - u8 key[PAES_256_PROTKEY_SIZE]; - } param; - struct skcipher_walk walk; - unsigned int nbytes, n, k; + struct skcipher_walk *walk = &req_ctx->walk; int rc; - rc = skcipher_walk_virt(&walk, req, false); + /* + * Attempt synchronous encryption first. If it fails, schedule the request + * asynchronously via the crypto engine. To preserve execution order, + * once a request is queued to the engine, further requests using the same + * tfm will also be routed through the engine. + */ + + rc = skcipher_walk_virt(walk, req, false); if (rc) - return rc; + goto out; - spin_lock_bh(&ctx->pk_lock); - memcpy(param.key, ctx->pk.protkey, PAES_256_PROTKEY_SIZE); - spin_unlock_bh(&ctx->pk_lock); + req_ctx->modifier = modifier; + req_ctx->param_init_done = false; - while ((nbytes = walk.nbytes) != 0) { - /* only use complete blocks */ - n = nbytes & ~(AES_BLOCK_SIZE - 1); - k = cpacf_km(ctx->fc | modifier, ¶m, - walk.dst.virt.addr, walk.src.virt.addr, n); - if (k) - rc = skcipher_walk_done(&walk, nbytes - k); - if (k < n) { - if (__paes_convert_key(ctx)) - return skcipher_walk_done(&walk, -EIO); - spin_lock_bh(&ctx->pk_lock); - memcpy(param.key, ctx->pk.protkey, PAES_256_PROTKEY_SIZE); - spin_unlock_bh(&ctx->pk_lock); - } + /* Try synchronous operation if no active engine usage */ + if (!atomic_read(&ctx->via_engine_ctr)) { + rc = ecb_paes_do_crypt(ctx, req_ctx, false); + if (rc == 0) + goto out; + } + + /* + * If sync operation failed or key expired or there are already + * requests enqueued via engine, fallback to async. Mark tfm as + * using engine to serialize requests. + */ + if (rc == 0 || rc == -EKEYEXPIRED) { + atomic_inc(&ctx->via_engine_ctr); + rc = crypto_transfer_skcipher_request_to_engine(paes_crypto_engine, req); + if (rc != -EINPROGRESS) + atomic_dec(&ctx->via_engine_ctr); } + + if (rc != -EINPROGRESS) + skcipher_walk_done(walk, rc); + +out: + if (rc != -EINPROGRESS) + memzero_explicit(&req_ctx->param, sizeof(req_ctx->param)); + pr_debug("rc=%d\n", rc); return rc; } @@ -310,112 +506,256 @@ static int ecb_paes_decrypt(struct skcipher_request *req) return ecb_paes_crypt(req, CPACF_DECRYPT); } -static struct skcipher_alg ecb_paes_alg = { - .base.cra_name = "ecb(paes)", - .base.cra_driver_name = "ecb-paes-s390", - .base.cra_priority = 401, /* combo: aes + ecb + 1 */ - .base.cra_blocksize = AES_BLOCK_SIZE, - .base.cra_ctxsize = sizeof(struct s390_paes_ctx), - .base.cra_module = THIS_MODULE, - .base.cra_list = LIST_HEAD_INIT(ecb_paes_alg.base.cra_list), - .init = ecb_paes_init, - .exit = ecb_paes_exit, - .min_keysize = PAES_MIN_KEYSIZE, - .max_keysize = PAES_MAX_KEYSIZE, - .setkey = ecb_paes_set_key, - .encrypt = ecb_paes_encrypt, - .decrypt = ecb_paes_decrypt, -}; - -static int cbc_paes_init(struct crypto_skcipher *tfm) +static int ecb_paes_init(struct crypto_skcipher *tfm) { struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm); - ctx->kb.key = NULL; + memset(ctx, 0, sizeof(*ctx)); spin_lock_init(&ctx->pk_lock); + crypto_skcipher_set_reqsize(tfm, sizeof(struct s390_pecb_req_ctx)); + return 0; } -static void cbc_paes_exit(struct crypto_skcipher *tfm) +static void ecb_paes_exit(struct crypto_skcipher *tfm) { struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm); - _free_kb_keybuf(&ctx->kb); + memzero_explicit(ctx, sizeof(*ctx)); } -static inline int __cbc_paes_set_key(struct s390_paes_ctx *ctx) +static int ecb_paes_do_one_request(struct crypto_engine *engine, void *areq) { - unsigned long fc; + struct skcipher_request *req = skcipher_request_cast(areq); + struct s390_pecb_req_ctx *req_ctx = skcipher_request_ctx(req); + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm); + struct skcipher_walk *walk = &req_ctx->walk; int rc; - rc = __paes_convert_key(ctx); - if (rc) - return rc; + /* walk has already been prepared */ + + rc = ecb_paes_do_crypt(ctx, req_ctx, true); + if (rc == -EKEYEXPIRED) { + /* + * Protected key expired, conversion is in process. + * Trigger a re-schedule of this request by returning + * -ENOSPC ("hardware queue is full") to the crypto engine. + * To avoid immediately re-invocation of this callback, + * tell the scheduler to voluntarily give up the CPU here. + */ + cond_resched(); + pr_debug("rescheduling request\n"); + return -ENOSPC; + } else if (rc) { + skcipher_walk_done(walk, rc); + } - /* Pick the correct function code based on the protected key type */ - fc = (ctx->pk.type == PKEY_KEYTYPE_AES_128) ? CPACF_KMC_PAES_128 : - (ctx->pk.type == PKEY_KEYTYPE_AES_192) ? CPACF_KMC_PAES_192 : - (ctx->pk.type == PKEY_KEYTYPE_AES_256) ? CPACF_KMC_PAES_256 : 0; + memzero_explicit(&req_ctx->param, sizeof(req_ctx->param)); + pr_debug("request complete with rc=%d\n", rc); + local_bh_disable(); + atomic_dec(&ctx->via_engine_ctr); + crypto_finalize_skcipher_request(engine, req, rc); + local_bh_enable(); + return rc; +} - /* Check if the function code is available */ - ctx->fc = (fc && cpacf_test_func(&kmc_functions, fc)) ? fc : 0; +static struct skcipher_engine_alg ecb_paes_alg = { + .base = { + .base.cra_name = "ecb(paes)", + .base.cra_driver_name = "ecb-paes-s390", + .base.cra_priority = 401, /* combo: aes + ecb + 1 */ + .base.cra_blocksize = AES_BLOCK_SIZE, + .base.cra_ctxsize = sizeof(struct s390_paes_ctx), + .base.cra_module = THIS_MODULE, + .base.cra_list = LIST_HEAD_INIT(ecb_paes_alg.base.base.cra_list), + .init = ecb_paes_init, + .exit = ecb_paes_exit, + .min_keysize = PAES_MIN_KEYSIZE, + .max_keysize = PAES_MAX_KEYSIZE, + .setkey = ecb_paes_setkey, + .encrypt = ecb_paes_encrypt, + .decrypt = ecb_paes_decrypt, + }, + .op = { + .do_one_request = ecb_paes_do_one_request, + }, +}; - return ctx->fc ? 0 : -EINVAL; -} +/* + * PAES CBC implementation + */ + +struct cbc_param { + u8 iv[AES_BLOCK_SIZE]; + u8 key[PAES_256_PROTKEY_SIZE]; +} __packed; + +struct s390_pcbc_req_ctx { + unsigned long modifier; + struct skcipher_walk walk; + bool param_init_done; + struct cbc_param param; +}; -static int cbc_paes_set_key(struct crypto_skcipher *tfm, const u8 *in_key, - unsigned int key_len) +static int cbc_paes_setkey(struct crypto_skcipher *tfm, const u8 *in_key, + unsigned int key_len) { struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm); + long fc; int rc; - _free_kb_keybuf(&ctx->kb); - rc = _key_to_kb(&ctx->kb, in_key, key_len); + /* set raw key into context */ + rc = paes_ctx_setkey(ctx, in_key, key_len); if (rc) - return rc; + goto out; - return __cbc_paes_set_key(ctx); + /* convert raw key into protected key */ + rc = paes_convert_key(ctx); + if (rc) + goto out; + + /* Pick the correct function code based on the protected key type */ + switch (ctx->pk.type) { + case PKEY_KEYTYPE_AES_128: + fc = CPACF_KMC_PAES_128; + break; + case PKEY_KEYTYPE_AES_192: + fc = CPACF_KMC_PAES_192; + break; + case PKEY_KEYTYPE_AES_256: + fc = CPACF_KMC_PAES_256; + break; + default: + fc = 0; + break; + } + ctx->fc = (fc && cpacf_test_func(&kmc_functions, fc)) ? fc : 0; + + rc = fc ? 0 : -EINVAL; + +out: + pr_debug("rc=%d\n", rc); + return rc; } -static int cbc_paes_crypt(struct skcipher_request *req, unsigned long modifier) +static int cbc_paes_do_crypt(struct s390_paes_ctx *ctx, + struct s390_pcbc_req_ctx *req_ctx, + bool maysleep) { - struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); - struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm); - struct { - u8 iv[AES_BLOCK_SIZE]; - u8 key[PAES_256_PROTKEY_SIZE]; - } param; - struct skcipher_walk walk; + struct cbc_param *param = &req_ctx->param; + struct skcipher_walk *walk = &req_ctx->walk; unsigned int nbytes, n, k; - int rc; - - rc = skcipher_walk_virt(&walk, req, false); + int pk_state, rc = 0; + + if (!req_ctx->param_init_done) { + /* fetch and check protected key state */ + spin_lock_bh(&ctx->pk_lock); + pk_state = ctx->pk_state; + switch (pk_state) { + case PK_STATE_NO_KEY: + rc = -ENOKEY; + break; + case PK_STATE_CONVERT_IN_PROGRESS: + rc = -EKEYEXPIRED; + break; + case PK_STATE_VALID: + memcpy(param->key, ctx->pk.protkey, sizeof(param->key)); + req_ctx->param_init_done = true; + break; + default: + rc = pk_state < 0 ? pk_state : -EIO; + break; + } + spin_unlock_bh(&ctx->pk_lock); + } if (rc) - return rc; + goto out; - memcpy(param.iv, walk.iv, AES_BLOCK_SIZE); - spin_lock_bh(&ctx->pk_lock); - memcpy(param.key, ctx->pk.protkey, PAES_256_PROTKEY_SIZE); - spin_unlock_bh(&ctx->pk_lock); + memcpy(param->iv, walk->iv, AES_BLOCK_SIZE); - while ((nbytes = walk.nbytes) != 0) { + /* + * Note that in case of partial processing or failure the walk + * is NOT unmapped here. So a follow up task may reuse the walk + * or in case of unrecoverable failure needs to unmap it. + */ + while ((nbytes = walk->nbytes) != 0) { /* only use complete blocks */ n = nbytes & ~(AES_BLOCK_SIZE - 1); - k = cpacf_kmc(ctx->fc | modifier, ¶m, - walk.dst.virt.addr, walk.src.virt.addr, n); + k = cpacf_kmc(ctx->fc | req_ctx->modifier, param, + walk->dst.virt.addr, walk->src.virt.addr, n); if (k) { - memcpy(walk.iv, param.iv, AES_BLOCK_SIZE); - rc = skcipher_walk_done(&walk, nbytes - k); + memcpy(walk->iv, param->iv, AES_BLOCK_SIZE); + rc = skcipher_walk_done(walk, nbytes - k); } if (k < n) { - if (__paes_convert_key(ctx)) - return skcipher_walk_done(&walk, -EIO); + if (!maysleep) { + rc = -EKEYEXPIRED; + goto out; + } + rc = paes_convert_key(ctx); + if (rc) + goto out; spin_lock_bh(&ctx->pk_lock); - memcpy(param.key, ctx->pk.protkey, PAES_256_PROTKEY_SIZE); + memcpy(param->key, ctx->pk.protkey, sizeof(param->key)); spin_unlock_bh(&ctx->pk_lock); } } + +out: + pr_debug("rc=%d\n", rc); + return rc; +} + +static int cbc_paes_crypt(struct skcipher_request *req, unsigned long modifier) +{ + struct s390_pcbc_req_ctx *req_ctx = skcipher_request_ctx(req); + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm); + struct skcipher_walk *walk = &req_ctx->walk; + int rc; + + /* + * Attempt synchronous encryption first. If it fails, schedule the request + * asynchronously via the crypto engine. To preserve execution order, + * once a request is queued to the engine, further requests using the same + * tfm will also be routed through the engine. + */ + + rc = skcipher_walk_virt(walk, req, false); + if (rc) + goto out; + + req_ctx->modifier = modifier; + req_ctx->param_init_done = false; + + /* Try synchronous operation if no active engine usage */ + if (!atomic_read(&ctx->via_engine_ctr)) { + rc = cbc_paes_do_crypt(ctx, req_ctx, false); + if (rc == 0) + goto out; + } + + /* + * If sync operation failed or key expired or there are already + * requests enqueued via engine, fallback to async. Mark tfm as + * using engine to serialize requests. + */ + if (rc == 0 || rc == -EKEYEXPIRED) { + atomic_inc(&ctx->via_engine_ctr); + rc = crypto_transfer_skcipher_request_to_engine(paes_crypto_engine, req); + if (rc != -EINPROGRESS) + atomic_dec(&ctx->via_engine_ctr); + } + + if (rc != -EINPROGRESS) + skcipher_walk_done(walk, rc); + +out: + if (rc != -EINPROGRESS) + memzero_explicit(&req_ctx->param, sizeof(req_ctx->param)); + pr_debug("rc=%d\n", rc); return rc; } @@ -429,496 +769,882 @@ static int cbc_paes_decrypt(struct skcipher_request *req) return cbc_paes_crypt(req, CPACF_DECRYPT); } -static struct skcipher_alg cbc_paes_alg = { - .base.cra_name = "cbc(paes)", - .base.cra_driver_name = "cbc-paes-s390", - .base.cra_priority = 402, /* ecb-paes-s390 + 1 */ - .base.cra_blocksize = AES_BLOCK_SIZE, - .base.cra_ctxsize = sizeof(struct s390_paes_ctx), - .base.cra_module = THIS_MODULE, - .base.cra_list = LIST_HEAD_INIT(cbc_paes_alg.base.cra_list), - .init = cbc_paes_init, - .exit = cbc_paes_exit, - .min_keysize = PAES_MIN_KEYSIZE, - .max_keysize = PAES_MAX_KEYSIZE, - .ivsize = AES_BLOCK_SIZE, - .setkey = cbc_paes_set_key, - .encrypt = cbc_paes_encrypt, - .decrypt = cbc_paes_decrypt, -}; - -static int xts_paes_init(struct crypto_skcipher *tfm) +static int cbc_paes_init(struct crypto_skcipher *tfm) { - struct s390_pxts_ctx *ctx = crypto_skcipher_ctx(tfm); + struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm); - ctx->kb.key = NULL; + memset(ctx, 0, sizeof(*ctx)); spin_lock_init(&ctx->pk_lock); + crypto_skcipher_set_reqsize(tfm, sizeof(struct s390_pcbc_req_ctx)); + return 0; } -static void xts_paes_exit(struct crypto_skcipher *tfm) +static void cbc_paes_exit(struct crypto_skcipher *tfm) { - struct s390_pxts_ctx *ctx = crypto_skcipher_ctx(tfm); + struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm); - _free_kb_keybuf(&ctx->kb); + memzero_explicit(ctx, sizeof(*ctx)); } -static inline int __xts_paes_convert_key(struct s390_pxts_ctx *ctx) +static int cbc_paes_do_one_request(struct crypto_engine *engine, void *areq) { - struct paes_protkey pk0, pk1; - size_t split_keylen; + struct skcipher_request *req = skcipher_request_cast(areq); + struct s390_pcbc_req_ctx *req_ctx = skcipher_request_ctx(req); + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm); + struct skcipher_walk *walk = &req_ctx->walk; int rc; - pk0.len = sizeof(pk0.protkey); - pk1.len = sizeof(pk1.protkey); - - rc = __paes_keyblob2pkey(ctx->kb.key, ctx->kb.keylen, &pk0); - if (rc) - return rc; + /* walk has already been prepared */ + + rc = cbc_paes_do_crypt(ctx, req_ctx, true); + if (rc == -EKEYEXPIRED) { + /* + * Protected key expired, conversion is in process. + * Trigger a re-schedule of this request by returning + * -ENOSPC ("hardware queue is full") to the crypto engine. + * To avoid immediately re-invocation of this callback, + * tell the scheduler to voluntarily give up the CPU here. + */ + cond_resched(); + pr_debug("rescheduling request\n"); + return -ENOSPC; + } else if (rc) { + skcipher_walk_done(walk, rc); + } - switch (pk0.type) { - case PKEY_KEYTYPE_AES_128: - case PKEY_KEYTYPE_AES_256: - /* second keytoken required */ - if (ctx->kb.keylen % 2) - return -EINVAL; - split_keylen = ctx->kb.keylen / 2; + memzero_explicit(&req_ctx->param, sizeof(req_ctx->param)); + pr_debug("request complete with rc=%d\n", rc); + local_bh_disable(); + atomic_dec(&ctx->via_engine_ctr); + crypto_finalize_skcipher_request(engine, req, rc); + local_bh_enable(); + return rc; +} - rc = __paes_keyblob2pkey(ctx->kb.key + split_keylen, - split_keylen, &pk1); - if (rc) - return rc; +static struct skcipher_engine_alg cbc_paes_alg = { + .base = { + .base.cra_name = "cbc(paes)", + .base.cra_driver_name = "cbc-paes-s390", + .base.cra_priority = 402, /* cbc-paes-s390 + 1 */ + .base.cra_blocksize = AES_BLOCK_SIZE, + .base.cra_ctxsize = sizeof(struct s390_paes_ctx), + .base.cra_module = THIS_MODULE, + .base.cra_list = LIST_HEAD_INIT(cbc_paes_alg.base.base.cra_list), + .init = cbc_paes_init, + .exit = cbc_paes_exit, + .min_keysize = PAES_MIN_KEYSIZE, + .max_keysize = PAES_MAX_KEYSIZE, + .ivsize = AES_BLOCK_SIZE, + .setkey = cbc_paes_setkey, + .encrypt = cbc_paes_encrypt, + .decrypt = cbc_paes_decrypt, + }, + .op = { + .do_one_request = cbc_paes_do_one_request, + }, +}; - if (pk0.type != pk1.type) - return -EINVAL; - break; - case PKEY_KEYTYPE_AES_XTS_128: - case PKEY_KEYTYPE_AES_XTS_256: - /* single key */ - pk1.type = 0; - break; - default: - /* unsupported protected keytype */ - return -EINVAL; - } +/* + * PAES CTR implementation + */ - spin_lock_bh(&ctx->pk_lock); - ctx->pk[0] = pk0; - ctx->pk[1] = pk1; - spin_unlock_bh(&ctx->pk_lock); +struct ctr_param { + u8 key[PAES_256_PROTKEY_SIZE]; +} __packed; - return 0; -} +struct s390_pctr_req_ctx { + unsigned long modifier; + struct skcipher_walk walk; + bool param_init_done; + struct ctr_param param; +}; -static inline int __xts_paes_set_key(struct s390_pxts_ctx *ctx) +static int ctr_paes_setkey(struct crypto_skcipher *tfm, const u8 *in_key, + unsigned int key_len) { - unsigned long fc; + struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm); + long fc; int rc; - rc = __xts_paes_convert_key(ctx); + /* set raw key into context */ + rc = paes_ctx_setkey(ctx, in_key, key_len); if (rc) - return rc; + goto out; + + /* convert raw key into protected key */ + rc = paes_convert_key(ctx); + if (rc) + goto out; /* Pick the correct function code based on the protected key type */ - switch (ctx->pk[0].type) { + switch (ctx->pk.type) { case PKEY_KEYTYPE_AES_128: - fc = CPACF_KM_PXTS_128; - break; - case PKEY_KEYTYPE_AES_256: - fc = CPACF_KM_PXTS_256; + fc = CPACF_KMCTR_PAES_128; break; - case PKEY_KEYTYPE_AES_XTS_128: - fc = CPACF_KM_PXTS_128_FULL; + case PKEY_KEYTYPE_AES_192: + fc = CPACF_KMCTR_PAES_192; break; - case PKEY_KEYTYPE_AES_XTS_256: - fc = CPACF_KM_PXTS_256_FULL; + case PKEY_KEYTYPE_AES_256: + fc = CPACF_KMCTR_PAES_256; break; default: fc = 0; break; } + ctx->fc = (fc && cpacf_test_func(&kmctr_functions, fc)) ? fc : 0; - /* Check if the function code is available */ - ctx->fc = (fc && cpacf_test_func(&km_functions, fc)) ? fc : 0; + rc = fc ? 0 : -EINVAL; + +out: + pr_debug("rc=%d\n", rc); + return rc; +} + +static inline unsigned int __ctrblk_init(u8 *ctrptr, u8 *iv, unsigned int nbytes) +{ + unsigned int i, n; + + /* only use complete blocks, max. PAGE_SIZE */ + memcpy(ctrptr, iv, AES_BLOCK_SIZE); + n = (nbytes > PAGE_SIZE) ? PAGE_SIZE : nbytes & ~(AES_BLOCK_SIZE - 1); + for (i = (n / AES_BLOCK_SIZE) - 1; i > 0; i--) { + memcpy(ctrptr + AES_BLOCK_SIZE, ctrptr, AES_BLOCK_SIZE); + crypto_inc(ctrptr + AES_BLOCK_SIZE, AES_BLOCK_SIZE); + ctrptr += AES_BLOCK_SIZE; + } + return n; +} + +static int ctr_paes_do_crypt(struct s390_paes_ctx *ctx, + struct s390_pctr_req_ctx *req_ctx, + bool maysleep) +{ + struct ctr_param *param = &req_ctx->param; + struct skcipher_walk *walk = &req_ctx->walk; + u8 buf[AES_BLOCK_SIZE], *ctrptr; + unsigned int nbytes, n, k; + int pk_state, locked, rc = 0; + + if (!req_ctx->param_init_done) { + /* fetch and check protected key state */ + spin_lock_bh(&ctx->pk_lock); + pk_state = ctx->pk_state; + switch (pk_state) { + case PK_STATE_NO_KEY: + rc = -ENOKEY; + break; + case PK_STATE_CONVERT_IN_PROGRESS: + rc = -EKEYEXPIRED; + break; + case PK_STATE_VALID: + memcpy(param->key, ctx->pk.protkey, sizeof(param->key)); + req_ctx->param_init_done = true; + break; + default: + rc = pk_state < 0 ? pk_state : -EIO; + break; + } + spin_unlock_bh(&ctx->pk_lock); + } + if (rc) + goto out; + + locked = mutex_trylock(&ctrblk_lock); + + /* + * Note that in case of partial processing or failure the walk + * is NOT unmapped here. So a follow up task may reuse the walk + * or in case of unrecoverable failure needs to unmap it. + */ + while ((nbytes = walk->nbytes) >= AES_BLOCK_SIZE) { + n = AES_BLOCK_SIZE; + if (nbytes >= 2 * AES_BLOCK_SIZE && locked) + n = __ctrblk_init(ctrblk, walk->iv, nbytes); + ctrptr = (n > AES_BLOCK_SIZE) ? ctrblk : walk->iv; + k = cpacf_kmctr(ctx->fc, param, walk->dst.virt.addr, + walk->src.virt.addr, n, ctrptr); + if (k) { + if (ctrptr == ctrblk) + memcpy(walk->iv, ctrptr + k - AES_BLOCK_SIZE, + AES_BLOCK_SIZE); + crypto_inc(walk->iv, AES_BLOCK_SIZE); + rc = skcipher_walk_done(walk, nbytes - k); + } + if (k < n) { + if (!maysleep) { + if (locked) + mutex_unlock(&ctrblk_lock); + rc = -EKEYEXPIRED; + goto out; + } + rc = paes_convert_key(ctx); + if (rc) { + if (locked) + mutex_unlock(&ctrblk_lock); + goto out; + } + spin_lock_bh(&ctx->pk_lock); + memcpy(param->key, ctx->pk.protkey, sizeof(param->key)); + spin_unlock_bh(&ctx->pk_lock); + } + } + if (locked) + mutex_unlock(&ctrblk_lock); + + /* final block may be < AES_BLOCK_SIZE, copy only nbytes */ + if (nbytes) { + memset(buf, 0, AES_BLOCK_SIZE); + memcpy(buf, walk->src.virt.addr, nbytes); + while (1) { + if (cpacf_kmctr(ctx->fc, param, buf, + buf, AES_BLOCK_SIZE, + walk->iv) == AES_BLOCK_SIZE) + break; + if (!maysleep) { + rc = -EKEYEXPIRED; + goto out; + } + rc = paes_convert_key(ctx); + if (rc) + goto out; + spin_lock_bh(&ctx->pk_lock); + memcpy(param->key, ctx->pk.protkey, sizeof(param->key)); + spin_unlock_bh(&ctx->pk_lock); + } + memcpy(walk->dst.virt.addr, buf, nbytes); + crypto_inc(walk->iv, AES_BLOCK_SIZE); + rc = skcipher_walk_done(walk, 0); + } + +out: + pr_debug("rc=%d\n", rc); + return rc; +} + +static int ctr_paes_crypt(struct skcipher_request *req) +{ + struct s390_pctr_req_ctx *req_ctx = skcipher_request_ctx(req); + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm); + struct skcipher_walk *walk = &req_ctx->walk; + int rc; + + /* + * Attempt synchronous encryption first. If it fails, schedule the request + * asynchronously via the crypto engine. To preserve execution order, + * once a request is queued to the engine, further requests using the same + * tfm will also be routed through the engine. + */ + + rc = skcipher_walk_virt(walk, req, false); + if (rc) + goto out; + + req_ctx->param_init_done = false; + + /* Try synchronous operation if no active engine usage */ + if (!atomic_read(&ctx->via_engine_ctr)) { + rc = ctr_paes_do_crypt(ctx, req_ctx, false); + if (rc == 0) + goto out; + } + + /* + * If sync operation failed or key expired or there are already + * requests enqueued via engine, fallback to async. Mark tfm as + * using engine to serialize requests. + */ + if (rc == 0 || rc == -EKEYEXPIRED) { + atomic_inc(&ctx->via_engine_ctr); + rc = crypto_transfer_skcipher_request_to_engine(paes_crypto_engine, req); + if (rc != -EINPROGRESS) + atomic_dec(&ctx->via_engine_ctr); + } + + if (rc != -EINPROGRESS) + skcipher_walk_done(walk, rc); + +out: + if (rc != -EINPROGRESS) + memzero_explicit(&req_ctx->param, sizeof(req_ctx->param)); + pr_debug("rc=%d\n", rc); + return rc; +} + +static int ctr_paes_init(struct crypto_skcipher *tfm) +{ + struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm); + + memset(ctx, 0, sizeof(*ctx)); + spin_lock_init(&ctx->pk_lock); + + crypto_skcipher_set_reqsize(tfm, sizeof(struct s390_pctr_req_ctx)); + + return 0; +} + +static void ctr_paes_exit(struct crypto_skcipher *tfm) +{ + struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm); + + memzero_explicit(ctx, sizeof(*ctx)); +} + +static int ctr_paes_do_one_request(struct crypto_engine *engine, void *areq) +{ + struct skcipher_request *req = skcipher_request_cast(areq); + struct s390_pctr_req_ctx *req_ctx = skcipher_request_ctx(req); + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm); + struct skcipher_walk *walk = &req_ctx->walk; + int rc; - return ctx->fc ? 0 : -EINVAL; + /* walk has already been prepared */ + + rc = ctr_paes_do_crypt(ctx, req_ctx, true); + if (rc == -EKEYEXPIRED) { + /* + * Protected key expired, conversion is in process. + * Trigger a re-schedule of this request by returning + * -ENOSPC ("hardware queue is full") to the crypto engine. + * To avoid immediately re-invocation of this callback, + * tell the scheduler to voluntarily give up the CPU here. + */ + cond_resched(); + pr_debug("rescheduling request\n"); + return -ENOSPC; + } else if (rc) { + skcipher_walk_done(walk, rc); + } + + memzero_explicit(&req_ctx->param, sizeof(req_ctx->param)); + pr_debug("request complete with rc=%d\n", rc); + local_bh_disable(); + atomic_dec(&ctx->via_engine_ctr); + crypto_finalize_skcipher_request(engine, req, rc); + local_bh_enable(); + return rc; } -static int xts_paes_set_key(struct crypto_skcipher *tfm, const u8 *in_key, - unsigned int in_keylen) +static struct skcipher_engine_alg ctr_paes_alg = { + .base = { + .base.cra_name = "ctr(paes)", + .base.cra_driver_name = "ctr-paes-s390", + .base.cra_priority = 402, /* ecb-paes-s390 + 1 */ + .base.cra_blocksize = 1, + .base.cra_ctxsize = sizeof(struct s390_paes_ctx), + .base.cra_module = THIS_MODULE, + .base.cra_list = LIST_HEAD_INIT(ctr_paes_alg.base.base.cra_list), + .init = ctr_paes_init, + .exit = ctr_paes_exit, + .min_keysize = PAES_MIN_KEYSIZE, + .max_keysize = PAES_MAX_KEYSIZE, + .ivsize = AES_BLOCK_SIZE, + .setkey = ctr_paes_setkey, + .encrypt = ctr_paes_crypt, + .decrypt = ctr_paes_crypt, + .chunksize = AES_BLOCK_SIZE, + }, + .op = { + .do_one_request = ctr_paes_do_one_request, + }, +}; + +/* + * PAES XTS implementation + */ + +struct xts_full_km_param { + u8 key[64]; + u8 tweak[16]; + u8 nap[16]; + u8 wkvp[32]; +} __packed; + +struct xts_km_param { + u8 key[PAES_256_PROTKEY_SIZE]; + u8 init[16]; +} __packed; + +struct xts_pcc_param { + u8 key[PAES_256_PROTKEY_SIZE]; + u8 tweak[16]; + u8 block[16]; + u8 bit[16]; + u8 xts[16]; +} __packed; + +struct s390_pxts_req_ctx { + unsigned long modifier; + struct skcipher_walk walk; + bool param_init_done; + union { + struct xts_full_km_param full_km_param; + struct xts_km_param km_param; + } param; +}; + +static int xts_paes_setkey(struct crypto_skcipher *tfm, const u8 *in_key, + unsigned int in_keylen) { struct s390_pxts_ctx *ctx = crypto_skcipher_ctx(tfm); u8 ckey[2 * AES_MAX_KEY_SIZE]; unsigned int ckey_len; + long fc; int rc; if ((in_keylen == 32 || in_keylen == 64) && xts_verify_key(tfm, in_key, in_keylen)) return -EINVAL; - _free_kb_keybuf(&ctx->kb); - rc = _xts_key_to_kb(&ctx->kb, in_key, in_keylen); + /* set raw key into context */ + rc = pxts_ctx_setkey(ctx, in_key, in_keylen); if (rc) - return rc; + goto out; - rc = __xts_paes_set_key(ctx); + /* convert raw key(s) into protected key(s) */ + rc = pxts_convert_key(ctx); if (rc) - return rc; + goto out; /* - * It is not possible on a single protected key (e.g. full AES-XTS) to - * check, if k1 and k2 are the same. - */ - if (ctx->pk[0].type == PKEY_KEYTYPE_AES_XTS_128 || - ctx->pk[0].type == PKEY_KEYTYPE_AES_XTS_256) - return 0; - /* * xts_verify_key verifies the key length is not odd and makes * sure that the two keys are not the same. This can be done - * on the two protected keys as well + * on the two protected keys as well - but not for full xts keys. */ - ckey_len = (ctx->pk[0].type == PKEY_KEYTYPE_AES_128) ? - AES_KEYSIZE_128 : AES_KEYSIZE_256; - memcpy(ckey, ctx->pk[0].protkey, ckey_len); - memcpy(ckey + ckey_len, ctx->pk[1].protkey, ckey_len); - return xts_verify_key(tfm, ckey, 2*ckey_len); + if (ctx->pk[0].type == PKEY_KEYTYPE_AES_128 || + ctx->pk[0].type == PKEY_KEYTYPE_AES_256) { + ckey_len = (ctx->pk[0].type == PKEY_KEYTYPE_AES_128) ? + AES_KEYSIZE_128 : AES_KEYSIZE_256; + memcpy(ckey, ctx->pk[0].protkey, ckey_len); + memcpy(ckey + ckey_len, ctx->pk[1].protkey, ckey_len); + rc = xts_verify_key(tfm, ckey, 2 * ckey_len); + memzero_explicit(ckey, sizeof(ckey)); + if (rc) + goto out; + } + + /* Pick the correct function code based on the protected key type */ + switch (ctx->pk[0].type) { + case PKEY_KEYTYPE_AES_128: + fc = CPACF_KM_PXTS_128; + break; + case PKEY_KEYTYPE_AES_256: + fc = CPACF_KM_PXTS_256; + break; + case PKEY_KEYTYPE_AES_XTS_128: + fc = CPACF_KM_PXTS_128_FULL; + break; + case PKEY_KEYTYPE_AES_XTS_256: + fc = CPACF_KM_PXTS_256_FULL; + break; + default: + fc = 0; + break; + } + ctx->fc = (fc && cpacf_test_func(&km_functions, fc)) ? fc : 0; + + rc = fc ? 0 : -EINVAL; + +out: + pr_debug("rc=%d\n", rc); + return rc; } -static int paes_xts_crypt_full(struct skcipher_request *req, - unsigned long modifier) +static int xts_paes_do_crypt_fullkey(struct s390_pxts_ctx *ctx, + struct s390_pxts_req_ctx *req_ctx, + bool maysleep) { - struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); - struct s390_pxts_ctx *ctx = crypto_skcipher_ctx(tfm); + struct xts_full_km_param *param = &req_ctx->param.full_km_param; + struct skcipher_walk *walk = &req_ctx->walk; unsigned int keylen, offset, nbytes, n, k; - struct { - u8 key[64]; - u8 tweak[16]; - u8 nap[16]; - u8 wkvp[32]; - } fxts_param = { - .nap = {0}, - }; - struct skcipher_walk walk; - int rc; + int rc = 0; - rc = skcipher_walk_virt(&walk, req, false); - if (rc) - return rc; + /* + * The calling function xts_paes_do_crypt() ensures the + * protected key state is always PK_STATE_VALID when this + * function is invoked. + */ keylen = (ctx->pk[0].type == PKEY_KEYTYPE_AES_XTS_128) ? 32 : 64; offset = (ctx->pk[0].type == PKEY_KEYTYPE_AES_XTS_128) ? 32 : 0; - spin_lock_bh(&ctx->pk_lock); - memcpy(fxts_param.key + offset, ctx->pk[0].protkey, keylen); - memcpy(fxts_param.wkvp, ctx->pk[0].protkey + keylen, - sizeof(fxts_param.wkvp)); - spin_unlock_bh(&ctx->pk_lock); - memcpy(fxts_param.tweak, walk.iv, sizeof(fxts_param.tweak)); - fxts_param.nap[0] = 0x01; /* initial alpha power (1, little-endian) */ + if (!req_ctx->param_init_done) { + memset(param, 0, sizeof(*param)); + spin_lock_bh(&ctx->pk_lock); + memcpy(param->key + offset, ctx->pk[0].protkey, keylen); + memcpy(param->wkvp, ctx->pk[0].protkey + keylen, sizeof(param->wkvp)); + spin_unlock_bh(&ctx->pk_lock); + memcpy(param->tweak, walk->iv, sizeof(param->tweak)); + param->nap[0] = 0x01; /* initial alpha power (1, little-endian) */ + req_ctx->param_init_done = true; + } - while ((nbytes = walk.nbytes) != 0) { + /* + * Note that in case of partial processing or failure the walk + * is NOT unmapped here. So a follow up task may reuse the walk + * or in case of unrecoverable failure needs to unmap it. + */ + while ((nbytes = walk->nbytes) != 0) { /* only use complete blocks */ n = nbytes & ~(AES_BLOCK_SIZE - 1); - k = cpacf_km(ctx->fc | modifier, fxts_param.key + offset, - walk.dst.virt.addr, walk.src.virt.addr, n); + k = cpacf_km(ctx->fc | req_ctx->modifier, param->key + offset, + walk->dst.virt.addr, walk->src.virt.addr, n); if (k) - rc = skcipher_walk_done(&walk, nbytes - k); + rc = skcipher_walk_done(walk, nbytes - k); if (k < n) { - if (__xts_paes_convert_key(ctx)) - return skcipher_walk_done(&walk, -EIO); + if (!maysleep) { + rc = -EKEYEXPIRED; + goto out; + } + rc = pxts_convert_key(ctx); + if (rc) + goto out; spin_lock_bh(&ctx->pk_lock); - memcpy(fxts_param.key + offset, ctx->pk[0].protkey, - keylen); - memcpy(fxts_param.wkvp, ctx->pk[0].protkey + keylen, - sizeof(fxts_param.wkvp)); + memcpy(param->key + offset, ctx->pk[0].protkey, keylen); + memcpy(param->wkvp, ctx->pk[0].protkey + keylen, sizeof(param->wkvp)); spin_unlock_bh(&ctx->pk_lock); } } +out: + pr_debug("rc=%d\n", rc); return rc; } -static int paes_xts_crypt(struct skcipher_request *req, unsigned long modifier) +static inline int __xts_2keys_prep_param(struct s390_pxts_ctx *ctx, + struct xts_km_param *param, + struct skcipher_walk *walk, + unsigned int keylen, + unsigned int offset, bool maysleep) { - struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); - struct s390_pxts_ctx *ctx = crypto_skcipher_ctx(tfm); + struct xts_pcc_param pcc_param; + unsigned long cc = 1; + int rc = 0; + + while (cc) { + memset(&pcc_param, 0, sizeof(pcc_param)); + memcpy(pcc_param.tweak, walk->iv, sizeof(pcc_param.tweak)); + spin_lock_bh(&ctx->pk_lock); + memcpy(pcc_param.key + offset, ctx->pk[1].protkey, keylen); + memcpy(param->key + offset, ctx->pk[0].protkey, keylen); + spin_unlock_bh(&ctx->pk_lock); + cc = cpacf_pcc(ctx->fc, pcc_param.key + offset); + if (cc) { + if (!maysleep) { + rc = -EKEYEXPIRED; + break; + } + rc = pxts_convert_key(ctx); + if (rc) + break; + continue; + } + memcpy(param->init, pcc_param.xts, 16); + } + + memzero_explicit(pcc_param.key, sizeof(pcc_param.key)); + return rc; +} + +static int xts_paes_do_crypt_2keys(struct s390_pxts_ctx *ctx, + struct s390_pxts_req_ctx *req_ctx, + bool maysleep) +{ + struct xts_km_param *param = &req_ctx->param.km_param; + struct skcipher_walk *walk = &req_ctx->walk; unsigned int keylen, offset, nbytes, n, k; - struct { - u8 key[PAES_256_PROTKEY_SIZE]; - u8 tweak[16]; - u8 block[16]; - u8 bit[16]; - u8 xts[16]; - } pcc_param; - struct { - u8 key[PAES_256_PROTKEY_SIZE]; - u8 init[16]; - } xts_param; - struct skcipher_walk walk; - int rc; + int rc = 0; - rc = skcipher_walk_virt(&walk, req, false); - if (rc) - return rc; + /* + * The calling function xts_paes_do_crypt() ensures the + * protected key state is always PK_STATE_VALID when this + * function is invoked. + */ keylen = (ctx->pk[0].type == PKEY_KEYTYPE_AES_128) ? 48 : 64; offset = (ctx->pk[0].type == PKEY_KEYTYPE_AES_128) ? 16 : 0; - memset(&pcc_param, 0, sizeof(pcc_param)); - memcpy(pcc_param.tweak, walk.iv, sizeof(pcc_param.tweak)); - spin_lock_bh(&ctx->pk_lock); - memcpy(pcc_param.key + offset, ctx->pk[1].protkey, keylen); - memcpy(xts_param.key + offset, ctx->pk[0].protkey, keylen); - spin_unlock_bh(&ctx->pk_lock); - cpacf_pcc(ctx->fc, pcc_param.key + offset); - memcpy(xts_param.init, pcc_param.xts, 16); + if (!req_ctx->param_init_done) { + rc = __xts_2keys_prep_param(ctx, param, walk, + keylen, offset, maysleep); + if (rc) + goto out; + req_ctx->param_init_done = true; + } - while ((nbytes = walk.nbytes) != 0) { + /* + * Note that in case of partial processing or failure the walk + * is NOT unmapped here. So a follow up task may reuse the walk + * or in case of unrecoverable failure needs to unmap it. + */ + while ((nbytes = walk->nbytes) != 0) { /* only use complete blocks */ n = nbytes & ~(AES_BLOCK_SIZE - 1); - k = cpacf_km(ctx->fc | modifier, xts_param.key + offset, - walk.dst.virt.addr, walk.src.virt.addr, n); + k = cpacf_km(ctx->fc | req_ctx->modifier, param->key + offset, + walk->dst.virt.addr, walk->src.virt.addr, n); if (k) - rc = skcipher_walk_done(&walk, nbytes - k); + rc = skcipher_walk_done(walk, nbytes - k); if (k < n) { - if (__xts_paes_convert_key(ctx)) - return skcipher_walk_done(&walk, -EIO); + if (!maysleep) { + rc = -EKEYEXPIRED; + goto out; + } + rc = pxts_convert_key(ctx); + if (rc) + goto out; spin_lock_bh(&ctx->pk_lock); - memcpy(xts_param.key + offset, - ctx->pk[0].protkey, keylen); + memcpy(param->key + offset, ctx->pk[0].protkey, keylen); spin_unlock_bh(&ctx->pk_lock); } } +out: + pr_debug("rc=%d\n", rc); return rc; } -static inline int xts_paes_crypt(struct skcipher_request *req, unsigned long modifier) +static int xts_paes_do_crypt(struct s390_pxts_ctx *ctx, + struct s390_pxts_req_ctx *req_ctx, + bool maysleep) { - struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); - struct s390_pxts_ctx *ctx = crypto_skcipher_ctx(tfm); + int pk_state, rc = 0; + + /* fetch and check protected key state */ + spin_lock_bh(&ctx->pk_lock); + pk_state = ctx->pk_state; + switch (pk_state) { + case PK_STATE_NO_KEY: + rc = -ENOKEY; + break; + case PK_STATE_CONVERT_IN_PROGRESS: + rc = -EKEYEXPIRED; + break; + case PK_STATE_VALID: + break; + default: + rc = pk_state < 0 ? pk_state : -EIO; + break; + } + spin_unlock_bh(&ctx->pk_lock); + if (rc) + goto out; + /* Call the 'real' crypt function based on the xts prot key type. */ switch (ctx->fc) { case CPACF_KM_PXTS_128: case CPACF_KM_PXTS_256: - return paes_xts_crypt(req, modifier); + rc = xts_paes_do_crypt_2keys(ctx, req_ctx, maysleep); + break; case CPACF_KM_PXTS_128_FULL: case CPACF_KM_PXTS_256_FULL: - return paes_xts_crypt_full(req, modifier); + rc = xts_paes_do_crypt_fullkey(ctx, req_ctx, maysleep); + break; default: - return -EINVAL; + rc = -EINVAL; } -} -static int xts_paes_encrypt(struct skcipher_request *req) -{ - return xts_paes_crypt(req, 0); +out: + pr_debug("rc=%d\n", rc); + return rc; } -static int xts_paes_decrypt(struct skcipher_request *req) +static inline int xts_paes_crypt(struct skcipher_request *req, unsigned long modifier) { - return xts_paes_crypt(req, CPACF_DECRYPT); -} + struct s390_pxts_req_ctx *req_ctx = skcipher_request_ctx(req); + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct s390_pxts_ctx *ctx = crypto_skcipher_ctx(tfm); + struct skcipher_walk *walk = &req_ctx->walk; + int rc; -static struct skcipher_alg xts_paes_alg = { - .base.cra_name = "xts(paes)", - .base.cra_driver_name = "xts-paes-s390", - .base.cra_priority = 402, /* ecb-paes-s390 + 1 */ - .base.cra_blocksize = AES_BLOCK_SIZE, - .base.cra_ctxsize = sizeof(struct s390_pxts_ctx), - .base.cra_module = THIS_MODULE, - .base.cra_list = LIST_HEAD_INIT(xts_paes_alg.base.cra_list), - .init = xts_paes_init, - .exit = xts_paes_exit, - .min_keysize = 2 * PAES_MIN_KEYSIZE, - .max_keysize = 2 * PAES_MAX_KEYSIZE, - .ivsize = AES_BLOCK_SIZE, - .setkey = xts_paes_set_key, - .encrypt = xts_paes_encrypt, - .decrypt = xts_paes_decrypt, -}; + /* + * Attempt synchronous encryption first. If it fails, schedule the request + * asynchronously via the crypto engine. To preserve execution order, + * once a request is queued to the engine, further requests using the same + * tfm will also be routed through the engine. + */ -static int ctr_paes_init(struct crypto_skcipher *tfm) -{ - struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm); + rc = skcipher_walk_virt(walk, req, false); + if (rc) + goto out; - ctx->kb.key = NULL; - spin_lock_init(&ctx->pk_lock); + req_ctx->modifier = modifier; + req_ctx->param_init_done = false; - return 0; -} + /* Try synchronous operation if no active engine usage */ + if (!atomic_read(&ctx->via_engine_ctr)) { + rc = xts_paes_do_crypt(ctx, req_ctx, false); + if (rc == 0) + goto out; + } -static void ctr_paes_exit(struct crypto_skcipher *tfm) -{ - struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm); + /* + * If sync operation failed or key expired or there are already + * requests enqueued via engine, fallback to async. Mark tfm as + * using engine to serialize requests. + */ + if (rc == 0 || rc == -EKEYEXPIRED) { + atomic_inc(&ctx->via_engine_ctr); + rc = crypto_transfer_skcipher_request_to_engine(paes_crypto_engine, req); + if (rc != -EINPROGRESS) + atomic_dec(&ctx->via_engine_ctr); + } + + if (rc != -EINPROGRESS) + skcipher_walk_done(walk, rc); - _free_kb_keybuf(&ctx->kb); +out: + if (rc != -EINPROGRESS) + memzero_explicit(&req_ctx->param, sizeof(req_ctx->param)); + pr_debug("rc=%d\n", rc); + return rc; } -static inline int __ctr_paes_set_key(struct s390_paes_ctx *ctx) +static int xts_paes_encrypt(struct skcipher_request *req) { - unsigned long fc; - int rc; - - rc = __paes_convert_key(ctx); - if (rc) - return rc; - - /* Pick the correct function code based on the protected key type */ - fc = (ctx->pk.type == PKEY_KEYTYPE_AES_128) ? CPACF_KMCTR_PAES_128 : - (ctx->pk.type == PKEY_KEYTYPE_AES_192) ? CPACF_KMCTR_PAES_192 : - (ctx->pk.type == PKEY_KEYTYPE_AES_256) ? - CPACF_KMCTR_PAES_256 : 0; - - /* Check if the function code is available */ - ctx->fc = (fc && cpacf_test_func(&kmctr_functions, fc)) ? fc : 0; + return xts_paes_crypt(req, 0); +} - return ctx->fc ? 0 : -EINVAL; +static int xts_paes_decrypt(struct skcipher_request *req) +{ + return xts_paes_crypt(req, CPACF_DECRYPT); } -static int ctr_paes_set_key(struct crypto_skcipher *tfm, const u8 *in_key, - unsigned int key_len) +static int xts_paes_init(struct crypto_skcipher *tfm) { - struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm); - int rc; + struct s390_pxts_ctx *ctx = crypto_skcipher_ctx(tfm); - _free_kb_keybuf(&ctx->kb); - rc = _key_to_kb(&ctx->kb, in_key, key_len); - if (rc) - return rc; + memset(ctx, 0, sizeof(*ctx)); + spin_lock_init(&ctx->pk_lock); - return __ctr_paes_set_key(ctx); + crypto_skcipher_set_reqsize(tfm, sizeof(struct s390_pxts_req_ctx)); + + return 0; } -static unsigned int __ctrblk_init(u8 *ctrptr, u8 *iv, unsigned int nbytes) +static void xts_paes_exit(struct crypto_skcipher *tfm) { - unsigned int i, n; + struct s390_pxts_ctx *ctx = crypto_skcipher_ctx(tfm); - /* only use complete blocks, max. PAGE_SIZE */ - memcpy(ctrptr, iv, AES_BLOCK_SIZE); - n = (nbytes > PAGE_SIZE) ? PAGE_SIZE : nbytes & ~(AES_BLOCK_SIZE - 1); - for (i = (n / AES_BLOCK_SIZE) - 1; i > 0; i--) { - memcpy(ctrptr + AES_BLOCK_SIZE, ctrptr, AES_BLOCK_SIZE); - crypto_inc(ctrptr + AES_BLOCK_SIZE, AES_BLOCK_SIZE); - ctrptr += AES_BLOCK_SIZE; - } - return n; + memzero_explicit(ctx, sizeof(*ctx)); } -static int ctr_paes_crypt(struct skcipher_request *req) +static int xts_paes_do_one_request(struct crypto_engine *engine, void *areq) { + struct skcipher_request *req = skcipher_request_cast(areq); + struct s390_pxts_req_ctx *req_ctx = skcipher_request_ctx(req); struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); - struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm); - u8 buf[AES_BLOCK_SIZE], *ctrptr; - struct { - u8 key[PAES_256_PROTKEY_SIZE]; - } param; - struct skcipher_walk walk; - unsigned int nbytes, n, k; - int rc, locked; - - rc = skcipher_walk_virt(&walk, req, false); - if (rc) - return rc; - - spin_lock_bh(&ctx->pk_lock); - memcpy(param.key, ctx->pk.protkey, PAES_256_PROTKEY_SIZE); - spin_unlock_bh(&ctx->pk_lock); - - locked = mutex_trylock(&ctrblk_lock); + struct s390_pxts_ctx *ctx = crypto_skcipher_ctx(tfm); + struct skcipher_walk *walk = &req_ctx->walk; + int rc; - while ((nbytes = walk.nbytes) >= AES_BLOCK_SIZE) { - n = AES_BLOCK_SIZE; - if (nbytes >= 2*AES_BLOCK_SIZE && locked) - n = __ctrblk_init(ctrblk, walk.iv, nbytes); - ctrptr = (n > AES_BLOCK_SIZE) ? ctrblk : walk.iv; - k = cpacf_kmctr(ctx->fc, ¶m, walk.dst.virt.addr, - walk.src.virt.addr, n, ctrptr); - if (k) { - if (ctrptr == ctrblk) - memcpy(walk.iv, ctrptr + k - AES_BLOCK_SIZE, - AES_BLOCK_SIZE); - crypto_inc(walk.iv, AES_BLOCK_SIZE); - rc = skcipher_walk_done(&walk, nbytes - k); - } - if (k < n) { - if (__paes_convert_key(ctx)) { - if (locked) - mutex_unlock(&ctrblk_lock); - return skcipher_walk_done(&walk, -EIO); - } - spin_lock_bh(&ctx->pk_lock); - memcpy(param.key, ctx->pk.protkey, PAES_256_PROTKEY_SIZE); - spin_unlock_bh(&ctx->pk_lock); - } - } - if (locked) - mutex_unlock(&ctrblk_lock); - /* - * final block may be < AES_BLOCK_SIZE, copy only nbytes - */ - if (nbytes) { - memset(buf, 0, AES_BLOCK_SIZE); - memcpy(buf, walk.src.virt.addr, nbytes); - while (1) { - if (cpacf_kmctr(ctx->fc, ¶m, buf, - buf, AES_BLOCK_SIZE, - walk.iv) == AES_BLOCK_SIZE) - break; - if (__paes_convert_key(ctx)) - return skcipher_walk_done(&walk, -EIO); - spin_lock_bh(&ctx->pk_lock); - memcpy(param.key, ctx->pk.protkey, PAES_256_PROTKEY_SIZE); - spin_unlock_bh(&ctx->pk_lock); - } - memcpy(walk.dst.virt.addr, buf, nbytes); - crypto_inc(walk.iv, AES_BLOCK_SIZE); - rc = skcipher_walk_done(&walk, nbytes); + /* walk has already been prepared */ + + rc = xts_paes_do_crypt(ctx, req_ctx, true); + if (rc == -EKEYEXPIRED) { + /* + * Protected key expired, conversion is in process. + * Trigger a re-schedule of this request by returning + * -ENOSPC ("hardware queue is full") to the crypto engine. + * To avoid immediately re-invocation of this callback, + * tell the scheduler to voluntarily give up the CPU here. + */ + cond_resched(); + pr_debug("rescheduling request\n"); + return -ENOSPC; + } else if (rc) { + skcipher_walk_done(walk, rc); } + memzero_explicit(&req_ctx->param, sizeof(req_ctx->param)); + pr_debug("request complete with rc=%d\n", rc); + local_bh_disable(); + atomic_dec(&ctx->via_engine_ctr); + crypto_finalize_skcipher_request(engine, req, rc); + local_bh_enable(); return rc; } -static struct skcipher_alg ctr_paes_alg = { - .base.cra_name = "ctr(paes)", - .base.cra_driver_name = "ctr-paes-s390", - .base.cra_priority = 402, /* ecb-paes-s390 + 1 */ - .base.cra_blocksize = 1, - .base.cra_ctxsize = sizeof(struct s390_paes_ctx), - .base.cra_module = THIS_MODULE, - .base.cra_list = LIST_HEAD_INIT(ctr_paes_alg.base.cra_list), - .init = ctr_paes_init, - .exit = ctr_paes_exit, - .min_keysize = PAES_MIN_KEYSIZE, - .max_keysize = PAES_MAX_KEYSIZE, - .ivsize = AES_BLOCK_SIZE, - .setkey = ctr_paes_set_key, - .encrypt = ctr_paes_crypt, - .decrypt = ctr_paes_crypt, - .chunksize = AES_BLOCK_SIZE, +static struct skcipher_engine_alg xts_paes_alg = { + .base = { + .base.cra_name = "xts(paes)", + .base.cra_driver_name = "xts-paes-s390", + .base.cra_priority = 402, /* ecb-paes-s390 + 1 */ + .base.cra_blocksize = AES_BLOCK_SIZE, + .base.cra_ctxsize = sizeof(struct s390_pxts_ctx), + .base.cra_module = THIS_MODULE, + .base.cra_list = LIST_HEAD_INIT(xts_paes_alg.base.base.cra_list), + .init = xts_paes_init, + .exit = xts_paes_exit, + .min_keysize = 2 * PAES_MIN_KEYSIZE, + .max_keysize = 2 * PAES_MAX_KEYSIZE, + .ivsize = AES_BLOCK_SIZE, + .setkey = xts_paes_setkey, + .encrypt = xts_paes_encrypt, + .decrypt = xts_paes_decrypt, + }, + .op = { + .do_one_request = xts_paes_do_one_request, + }, }; -static inline void __crypto_unregister_skcipher(struct skcipher_alg *alg) +/* + * alg register, unregister, module init, exit + */ + +static struct miscdevice paes_dev = { + .name = "paes", + .minor = MISC_DYNAMIC_MINOR, +}; + +static inline void __crypto_unregister_skcipher(struct skcipher_engine_alg *alg) { - if (!list_empty(&alg->base.cra_list)) - crypto_unregister_skcipher(alg); + if (!list_empty(&alg->base.base.cra_list)) + crypto_engine_unregister_skcipher(alg); } static void paes_s390_fini(void) { + if (paes_crypto_engine) { + crypto_engine_stop(paes_crypto_engine); + crypto_engine_exit(paes_crypto_engine); + } __crypto_unregister_skcipher(&ctr_paes_alg); __crypto_unregister_skcipher(&xts_paes_alg); __crypto_unregister_skcipher(&cbc_paes_alg); __crypto_unregister_skcipher(&ecb_paes_alg); if (ctrblk) - free_page((unsigned long) ctrblk); + free_page((unsigned long)ctrblk); + misc_deregister(&paes_dev); } static int __init paes_s390_init(void) { int rc; + /* register a simple paes pseudo misc device */ + rc = misc_register(&paes_dev); + if (rc) + return rc; + + /* with this pseudo devie alloc and start a crypto engine */ + paes_crypto_engine = + crypto_engine_alloc_init_and_set(paes_dev.this_device, + true, NULL, false, MAX_QLEN); + if (!paes_crypto_engine) { + rc = -ENOMEM; + goto out_err; + } + rc = crypto_engine_start(paes_crypto_engine); + if (rc) { + crypto_engine_exit(paes_crypto_engine); + paes_crypto_engine = NULL; + goto out_err; + } + /* Query available functions for KM, KMC and KMCTR */ cpacf_query(CPACF_KM, &km_functions); cpacf_query(CPACF_KMC, &kmc_functions); @@ -927,40 +1653,45 @@ static int __init paes_s390_init(void) if (cpacf_test_func(&km_functions, CPACF_KM_PAES_128) || cpacf_test_func(&km_functions, CPACF_KM_PAES_192) || cpacf_test_func(&km_functions, CPACF_KM_PAES_256)) { - rc = crypto_register_skcipher(&ecb_paes_alg); + rc = crypto_engine_register_skcipher(&ecb_paes_alg); if (rc) goto out_err; + pr_debug("%s registered\n", ecb_paes_alg.base.base.cra_driver_name); } if (cpacf_test_func(&kmc_functions, CPACF_KMC_PAES_128) || cpacf_test_func(&kmc_functions, CPACF_KMC_PAES_192) || cpacf_test_func(&kmc_functions, CPACF_KMC_PAES_256)) { - rc = crypto_register_skcipher(&cbc_paes_alg); + rc = crypto_engine_register_skcipher(&cbc_paes_alg); if (rc) goto out_err; + pr_debug("%s registered\n", cbc_paes_alg.base.base.cra_driver_name); } if (cpacf_test_func(&km_functions, CPACF_KM_PXTS_128) || cpacf_test_func(&km_functions, CPACF_KM_PXTS_256)) { - rc = crypto_register_skcipher(&xts_paes_alg); + rc = crypto_engine_register_skcipher(&xts_paes_alg); if (rc) goto out_err; + pr_debug("%s registered\n", xts_paes_alg.base.base.cra_driver_name); } if (cpacf_test_func(&kmctr_functions, CPACF_KMCTR_PAES_128) || cpacf_test_func(&kmctr_functions, CPACF_KMCTR_PAES_192) || cpacf_test_func(&kmctr_functions, CPACF_KMCTR_PAES_256)) { - ctrblk = (u8 *) __get_free_page(GFP_KERNEL); + ctrblk = (u8 *)__get_free_page(GFP_KERNEL); if (!ctrblk) { rc = -ENOMEM; goto out_err; } - rc = crypto_register_skcipher(&ctr_paes_alg); + rc = crypto_engine_register_skcipher(&ctr_paes_alg); if (rc) goto out_err; + pr_debug("%s registered\n", ctr_paes_alg.base.base.cra_driver_name); } return 0; + out_err: paes_s390_fini(); return rc; diff --git a/arch/s390/crypto/sha.h b/arch/s390/crypto/sha.h index 2bb22db54c31..d757ccbce2b4 100644 --- a/arch/s390/crypto/sha.h +++ b/arch/s390/crypto/sha.h @@ -10,27 +10,33 @@ #ifndef _CRYPTO_ARCH_S390_SHA_H #define _CRYPTO_ARCH_S390_SHA_H -#include <linux/crypto.h> -#include <crypto/sha1.h> #include <crypto/sha2.h> #include <crypto/sha3.h> +#include <linux/types.h> /* must be big enough for the largest SHA variant */ -#define SHA3_STATE_SIZE 200 #define CPACF_MAX_PARMBLOCK_SIZE SHA3_STATE_SIZE #define SHA_MAX_BLOCK_SIZE SHA3_224_BLOCK_SIZE +#define S390_SHA_CTX_SIZE sizeof(struct s390_sha_ctx) struct s390_sha_ctx { u64 count; /* message length in bytes */ - u32 state[CPACF_MAX_PARMBLOCK_SIZE / sizeof(u32)]; - u8 buf[SHA_MAX_BLOCK_SIZE]; + union { + u32 state[CPACF_MAX_PARMBLOCK_SIZE / sizeof(u32)]; + struct { + u64 state[SHA512_DIGEST_SIZE / sizeof(u64)]; + u64 count_hi; + } sha512; + }; int func; /* KIMD function to use */ - int first_message_part; + bool first_message_part; }; struct shash_desc; -int s390_sha_update(struct shash_desc *desc, const u8 *data, unsigned int len); -int s390_sha_final(struct shash_desc *desc, u8 *out); +int s390_sha_update_blocks(struct shash_desc *desc, const u8 *data, + unsigned int len); +int s390_sha_finup(struct shash_desc *desc, const u8 *src, unsigned int len, + u8 *out); #endif diff --git a/arch/s390/crypto/sha1_s390.c b/arch/s390/crypto/sha1_s390.c index bc3a22704e09..9b0d55be1239 100644 --- a/arch/s390/crypto/sha1_s390.c +++ b/arch/s390/crypto/sha1_s390.c @@ -18,12 +18,12 @@ * Copyright (c) Andrew McDonald <andrew@mcdonald.org.uk> * Copyright (c) Jean-Francois Dive <jef@linuxbe.org> */ +#include <asm/cpacf.h> #include <crypto/internal/hash.h> -#include <linux/init.h> -#include <linux/module.h> -#include <linux/cpufeature.h> #include <crypto/sha1.h> -#include <asm/cpacf.h> +#include <linux/cpufeature.h> +#include <linux/kernel.h> +#include <linux/module.h> #include "sha.h" @@ -38,6 +38,7 @@ static int s390_sha1_init(struct shash_desc *desc) sctx->state[4] = SHA1_H4; sctx->count = 0; sctx->func = CPACF_KIMD_SHA_1; + sctx->first_message_part = 0; return 0; } @@ -49,7 +50,6 @@ static int s390_sha1_export(struct shash_desc *desc, void *out) octx->count = sctx->count; memcpy(octx->state, sctx->state, sizeof(octx->state)); - memcpy(octx->buffer, sctx->buf, sizeof(octx->buffer)); return 0; } @@ -60,24 +60,26 @@ static int s390_sha1_import(struct shash_desc *desc, const void *in) sctx->count = ictx->count; memcpy(sctx->state, ictx->state, sizeof(ictx->state)); - memcpy(sctx->buf, ictx->buffer, sizeof(ictx->buffer)); sctx->func = CPACF_KIMD_SHA_1; + sctx->first_message_part = 0; return 0; } static struct shash_alg alg = { .digestsize = SHA1_DIGEST_SIZE, .init = s390_sha1_init, - .update = s390_sha_update, - .final = s390_sha_final, + .update = s390_sha_update_blocks, + .finup = s390_sha_finup, .export = s390_sha1_export, .import = s390_sha1_import, - .descsize = sizeof(struct s390_sha_ctx), - .statesize = sizeof(struct sha1_state), + .descsize = S390_SHA_CTX_SIZE, + .statesize = SHA1_STATE_SIZE, .base = { .cra_name = "sha1", .cra_driver_name= "sha1-s390", .cra_priority = 300, + .cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY | + CRYPTO_AHASH_ALG_FINUP_MAX, .cra_blocksize = SHA1_BLOCK_SIZE, .cra_module = THIS_MODULE, } diff --git a/arch/s390/crypto/sha256_s390.c b/arch/s390/crypto/sha256_s390.c deleted file mode 100644 index 6f1ccdf93d3e..000000000000 --- a/arch/s390/crypto/sha256_s390.c +++ /dev/null @@ -1,143 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0+ -/* - * Cryptographic API. - * - * s390 implementation of the SHA256 and SHA224 Secure Hash Algorithm. - * - * s390 Version: - * Copyright IBM Corp. 2005, 2011 - * Author(s): Jan Glauber (jang@de.ibm.com) - */ -#include <crypto/internal/hash.h> -#include <linux/init.h> -#include <linux/module.h> -#include <linux/cpufeature.h> -#include <crypto/sha2.h> -#include <asm/cpacf.h> - -#include "sha.h" - -static int s390_sha256_init(struct shash_desc *desc) -{ - struct s390_sha_ctx *sctx = shash_desc_ctx(desc); - - sctx->state[0] = SHA256_H0; - sctx->state[1] = SHA256_H1; - sctx->state[2] = SHA256_H2; - sctx->state[3] = SHA256_H3; - sctx->state[4] = SHA256_H4; - sctx->state[5] = SHA256_H5; - sctx->state[6] = SHA256_H6; - sctx->state[7] = SHA256_H7; - sctx->count = 0; - sctx->func = CPACF_KIMD_SHA_256; - - return 0; -} - -static int sha256_export(struct shash_desc *desc, void *out) -{ - struct s390_sha_ctx *sctx = shash_desc_ctx(desc); - struct sha256_state *octx = out; - - octx->count = sctx->count; - memcpy(octx->state, sctx->state, sizeof(octx->state)); - memcpy(octx->buf, sctx->buf, sizeof(octx->buf)); - return 0; -} - -static int sha256_import(struct shash_desc *desc, const void *in) -{ - struct s390_sha_ctx *sctx = shash_desc_ctx(desc); - const struct sha256_state *ictx = in; - - sctx->count = ictx->count; - memcpy(sctx->state, ictx->state, sizeof(ictx->state)); - memcpy(sctx->buf, ictx->buf, sizeof(ictx->buf)); - sctx->func = CPACF_KIMD_SHA_256; - return 0; -} - -static struct shash_alg sha256_alg = { - .digestsize = SHA256_DIGEST_SIZE, - .init = s390_sha256_init, - .update = s390_sha_update, - .final = s390_sha_final, - .export = sha256_export, - .import = sha256_import, - .descsize = sizeof(struct s390_sha_ctx), - .statesize = sizeof(struct sha256_state), - .base = { - .cra_name = "sha256", - .cra_driver_name= "sha256-s390", - .cra_priority = 300, - .cra_blocksize = SHA256_BLOCK_SIZE, - .cra_module = THIS_MODULE, - } -}; - -static int s390_sha224_init(struct shash_desc *desc) -{ - struct s390_sha_ctx *sctx = shash_desc_ctx(desc); - - sctx->state[0] = SHA224_H0; - sctx->state[1] = SHA224_H1; - sctx->state[2] = SHA224_H2; - sctx->state[3] = SHA224_H3; - sctx->state[4] = SHA224_H4; - sctx->state[5] = SHA224_H5; - sctx->state[6] = SHA224_H6; - sctx->state[7] = SHA224_H7; - sctx->count = 0; - sctx->func = CPACF_KIMD_SHA_256; - - return 0; -} - -static struct shash_alg sha224_alg = { - .digestsize = SHA224_DIGEST_SIZE, - .init = s390_sha224_init, - .update = s390_sha_update, - .final = s390_sha_final, - .export = sha256_export, - .import = sha256_import, - .descsize = sizeof(struct s390_sha_ctx), - .statesize = sizeof(struct sha256_state), - .base = { - .cra_name = "sha224", - .cra_driver_name= "sha224-s390", - .cra_priority = 300, - .cra_blocksize = SHA224_BLOCK_SIZE, - .cra_module = THIS_MODULE, - } -}; - -static int __init sha256_s390_init(void) -{ - int ret; - - if (!cpacf_query_func(CPACF_KIMD, CPACF_KIMD_SHA_256)) - return -ENODEV; - ret = crypto_register_shash(&sha256_alg); - if (ret < 0) - goto out; - ret = crypto_register_shash(&sha224_alg); - if (ret < 0) - crypto_unregister_shash(&sha256_alg); -out: - return ret; -} - -static void __exit sha256_s390_fini(void) -{ - crypto_unregister_shash(&sha224_alg); - crypto_unregister_shash(&sha256_alg); -} - -module_cpu_feature_match(S390_CPU_FEATURE_MSA, sha256_s390_init); -module_exit(sha256_s390_fini); - -MODULE_ALIAS_CRYPTO("sha256"); -MODULE_ALIAS_CRYPTO("sha224"); -MODULE_LICENSE("GPL"); -MODULE_DESCRIPTION("SHA256 and SHA224 Secure Hash Algorithm"); diff --git a/arch/s390/crypto/sha3_256_s390.c b/arch/s390/crypto/sha3_256_s390.c index a84ef692f572..4a7731ac6bcd 100644 --- a/arch/s390/crypto/sha3_256_s390.c +++ b/arch/s390/crypto/sha3_256_s390.c @@ -8,12 +8,14 @@ * Copyright IBM Corp. 2019 * Author(s): Joerg Schmidbauer (jschmidb@de.ibm.com) */ +#include <asm/cpacf.h> #include <crypto/internal/hash.h> -#include <linux/init.h> -#include <linux/module.h> -#include <linux/cpufeature.h> #include <crypto/sha3.h> -#include <asm/cpacf.h> +#include <linux/cpufeature.h> +#include <linux/errno.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/string.h> #include "sha.h" @@ -21,11 +23,11 @@ static int sha3_256_init(struct shash_desc *desc) { struct s390_sha_ctx *sctx = shash_desc_ctx(desc); - if (!test_facility(86)) /* msa 12 */ + sctx->first_message_part = test_facility(86); + if (!sctx->first_message_part) memset(sctx->state, 0, sizeof(sctx->state)); sctx->count = 0; sctx->func = CPACF_KIMD_SHA3_256; - sctx->first_message_part = 1; return 0; } @@ -35,11 +37,11 @@ static int sha3_256_export(struct shash_desc *desc, void *out) struct s390_sha_ctx *sctx = shash_desc_ctx(desc); struct sha3_state *octx = out; - octx->rsiz = sctx->count; + if (sctx->first_message_part) { + memset(sctx->state, 0, sizeof(sctx->state)); + sctx->first_message_part = 0; + } memcpy(octx->st, sctx->state, sizeof(octx->st)); - memcpy(octx->buf, sctx->buf, sizeof(octx->buf)); - octx->partial = sctx->first_message_part; - return 0; } @@ -48,10 +50,9 @@ static int sha3_256_import(struct shash_desc *desc, const void *in) struct s390_sha_ctx *sctx = shash_desc_ctx(desc); const struct sha3_state *ictx = in; - sctx->count = ictx->rsiz; + sctx->count = 0; memcpy(sctx->state, ictx->st, sizeof(ictx->st)); - memcpy(sctx->buf, ictx->buf, sizeof(ictx->buf)); - sctx->first_message_part = ictx->partial; + sctx->first_message_part = 0; sctx->func = CPACF_KIMD_SHA3_256; return 0; @@ -60,30 +61,26 @@ static int sha3_256_import(struct shash_desc *desc, const void *in) static int sha3_224_import(struct shash_desc *desc, const void *in) { struct s390_sha_ctx *sctx = shash_desc_ctx(desc); - const struct sha3_state *ictx = in; - sctx->count = ictx->rsiz; - memcpy(sctx->state, ictx->st, sizeof(ictx->st)); - memcpy(sctx->buf, ictx->buf, sizeof(ictx->buf)); - sctx->first_message_part = ictx->partial; + sha3_256_import(desc, in); sctx->func = CPACF_KIMD_SHA3_224; - return 0; } static struct shash_alg sha3_256_alg = { .digestsize = SHA3_256_DIGEST_SIZE, /* = 32 */ .init = sha3_256_init, - .update = s390_sha_update, - .final = s390_sha_final, + .update = s390_sha_update_blocks, + .finup = s390_sha_finup, .export = sha3_256_export, .import = sha3_256_import, - .descsize = sizeof(struct s390_sha_ctx), - .statesize = sizeof(struct sha3_state), + .descsize = S390_SHA_CTX_SIZE, + .statesize = SHA3_STATE_SIZE, .base = { .cra_name = "sha3-256", .cra_driver_name = "sha3-256-s390", .cra_priority = 300, + .cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY, .cra_blocksize = SHA3_256_BLOCK_SIZE, .cra_module = THIS_MODULE, } @@ -93,28 +90,25 @@ static int sha3_224_init(struct shash_desc *desc) { struct s390_sha_ctx *sctx = shash_desc_ctx(desc); - if (!test_facility(86)) /* msa 12 */ - memset(sctx->state, 0, sizeof(sctx->state)); - sctx->count = 0; + sha3_256_init(desc); sctx->func = CPACF_KIMD_SHA3_224; - sctx->first_message_part = 1; - return 0; } static struct shash_alg sha3_224_alg = { .digestsize = SHA3_224_DIGEST_SIZE, .init = sha3_224_init, - .update = s390_sha_update, - .final = s390_sha_final, + .update = s390_sha_update_blocks, + .finup = s390_sha_finup, .export = sha3_256_export, /* same as for 256 */ .import = sha3_224_import, /* function code different! */ - .descsize = sizeof(struct s390_sha_ctx), - .statesize = sizeof(struct sha3_state), + .descsize = S390_SHA_CTX_SIZE, + .statesize = SHA3_STATE_SIZE, .base = { .cra_name = "sha3-224", .cra_driver_name = "sha3-224-s390", .cra_priority = 300, + .cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY, .cra_blocksize = SHA3_224_BLOCK_SIZE, .cra_module = THIS_MODULE, } diff --git a/arch/s390/crypto/sha3_512_s390.c b/arch/s390/crypto/sha3_512_s390.c index 07528fc98ff7..018f02fff444 100644 --- a/arch/s390/crypto/sha3_512_s390.c +++ b/arch/s390/crypto/sha3_512_s390.c @@ -7,12 +7,14 @@ * Copyright IBM Corp. 2019 * Author(s): Joerg Schmidbauer (jschmidb@de.ibm.com) */ +#include <asm/cpacf.h> #include <crypto/internal/hash.h> -#include <linux/init.h> -#include <linux/module.h> -#include <linux/cpufeature.h> #include <crypto/sha3.h> -#include <asm/cpacf.h> +#include <linux/cpufeature.h> +#include <linux/errno.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/string.h> #include "sha.h" @@ -20,11 +22,11 @@ static int sha3_512_init(struct shash_desc *desc) { struct s390_sha_ctx *sctx = shash_desc_ctx(desc); - if (!test_facility(86)) /* msa 12 */ + sctx->first_message_part = test_facility(86); + if (!sctx->first_message_part) memset(sctx->state, 0, sizeof(sctx->state)); sctx->count = 0; sctx->func = CPACF_KIMD_SHA3_512; - sctx->first_message_part = 1; return 0; } @@ -34,13 +36,12 @@ static int sha3_512_export(struct shash_desc *desc, void *out) struct s390_sha_ctx *sctx = shash_desc_ctx(desc); struct sha3_state *octx = out; - octx->rsiz = sctx->count; - octx->rsizw = sctx->count >> 32; + if (sctx->first_message_part) { + memset(sctx->state, 0, sizeof(sctx->state)); + sctx->first_message_part = 0; + } memcpy(octx->st, sctx->state, sizeof(octx->st)); - memcpy(octx->buf, sctx->buf, sizeof(octx->buf)); - octx->partial = sctx->first_message_part; - return 0; } @@ -49,13 +50,9 @@ static int sha3_512_import(struct shash_desc *desc, const void *in) struct s390_sha_ctx *sctx = shash_desc_ctx(desc); const struct sha3_state *ictx = in; - if (unlikely(ictx->rsizw)) - return -ERANGE; - sctx->count = ictx->rsiz; - + sctx->count = 0; memcpy(sctx->state, ictx->st, sizeof(ictx->st)); - memcpy(sctx->buf, ictx->buf, sizeof(ictx->buf)); - sctx->first_message_part = ictx->partial; + sctx->first_message_part = 0; sctx->func = CPACF_KIMD_SHA3_512; return 0; @@ -64,33 +61,26 @@ static int sha3_512_import(struct shash_desc *desc, const void *in) static int sha3_384_import(struct shash_desc *desc, const void *in) { struct s390_sha_ctx *sctx = shash_desc_ctx(desc); - const struct sha3_state *ictx = in; - if (unlikely(ictx->rsizw)) - return -ERANGE; - sctx->count = ictx->rsiz; - - memcpy(sctx->state, ictx->st, sizeof(ictx->st)); - memcpy(sctx->buf, ictx->buf, sizeof(ictx->buf)); - sctx->first_message_part = ictx->partial; + sha3_512_import(desc, in); sctx->func = CPACF_KIMD_SHA3_384; - return 0; } static struct shash_alg sha3_512_alg = { .digestsize = SHA3_512_DIGEST_SIZE, .init = sha3_512_init, - .update = s390_sha_update, - .final = s390_sha_final, + .update = s390_sha_update_blocks, + .finup = s390_sha_finup, .export = sha3_512_export, .import = sha3_512_import, - .descsize = sizeof(struct s390_sha_ctx), - .statesize = sizeof(struct sha3_state), + .descsize = S390_SHA_CTX_SIZE, + .statesize = SHA3_STATE_SIZE, .base = { .cra_name = "sha3-512", .cra_driver_name = "sha3-512-s390", .cra_priority = 300, + .cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY, .cra_blocksize = SHA3_512_BLOCK_SIZE, .cra_module = THIS_MODULE, } @@ -102,28 +92,25 @@ static int sha3_384_init(struct shash_desc *desc) { struct s390_sha_ctx *sctx = shash_desc_ctx(desc); - if (!test_facility(86)) /* msa 12 */ - memset(sctx->state, 0, sizeof(sctx->state)); - sctx->count = 0; + sha3_512_init(desc); sctx->func = CPACF_KIMD_SHA3_384; - sctx->first_message_part = 1; - return 0; } static struct shash_alg sha3_384_alg = { .digestsize = SHA3_384_DIGEST_SIZE, .init = sha3_384_init, - .update = s390_sha_update, - .final = s390_sha_final, + .update = s390_sha_update_blocks, + .finup = s390_sha_finup, .export = sha3_512_export, /* same as for 512 */ .import = sha3_384_import, /* function code different! */ - .descsize = sizeof(struct s390_sha_ctx), - .statesize = sizeof(struct sha3_state), + .descsize = S390_SHA_CTX_SIZE, + .statesize = SHA3_STATE_SIZE, .base = { .cra_name = "sha3-384", .cra_driver_name = "sha3-384-s390", .cra_priority = 300, + .cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY, .cra_blocksize = SHA3_384_BLOCK_SIZE, .cra_ctxsize = sizeof(struct s390_sha_ctx), .cra_module = THIS_MODULE, diff --git a/arch/s390/crypto/sha512_s390.c b/arch/s390/crypto/sha512_s390.c index 04f11c407763..6cbbf5e8555f 100644 --- a/arch/s390/crypto/sha512_s390.c +++ b/arch/s390/crypto/sha512_s390.c @@ -7,14 +7,13 @@ * Copyright IBM Corp. 2007 * Author(s): Jan Glauber (jang@de.ibm.com) */ +#include <asm/cpacf.h> #include <crypto/internal/hash.h> #include <crypto/sha2.h> +#include <linux/cpufeature.h> #include <linux/errno.h> -#include <linux/init.h> #include <linux/kernel.h> #include <linux/module.h> -#include <linux/cpufeature.h> -#include <asm/cpacf.h> #include "sha.h" @@ -22,16 +21,18 @@ static int sha512_init(struct shash_desc *desc) { struct s390_sha_ctx *ctx = shash_desc_ctx(desc); - *(__u64 *)&ctx->state[0] = SHA512_H0; - *(__u64 *)&ctx->state[2] = SHA512_H1; - *(__u64 *)&ctx->state[4] = SHA512_H2; - *(__u64 *)&ctx->state[6] = SHA512_H3; - *(__u64 *)&ctx->state[8] = SHA512_H4; - *(__u64 *)&ctx->state[10] = SHA512_H5; - *(__u64 *)&ctx->state[12] = SHA512_H6; - *(__u64 *)&ctx->state[14] = SHA512_H7; + ctx->sha512.state[0] = SHA512_H0; + ctx->sha512.state[1] = SHA512_H1; + ctx->sha512.state[2] = SHA512_H2; + ctx->sha512.state[3] = SHA512_H3; + ctx->sha512.state[4] = SHA512_H4; + ctx->sha512.state[5] = SHA512_H5; + ctx->sha512.state[6] = SHA512_H6; + ctx->sha512.state[7] = SHA512_H7; ctx->count = 0; + ctx->sha512.count_hi = 0; ctx->func = CPACF_KIMD_SHA_512; + ctx->first_message_part = 0; return 0; } @@ -42,9 +43,8 @@ static int sha512_export(struct shash_desc *desc, void *out) struct sha512_state *octx = out; octx->count[0] = sctx->count; - octx->count[1] = 0; + octx->count[1] = sctx->sha512.count_hi; memcpy(octx->state, sctx->state, sizeof(octx->state)); - memcpy(octx->buf, sctx->buf, sizeof(octx->buf)); return 0; } @@ -53,29 +53,30 @@ static int sha512_import(struct shash_desc *desc, const void *in) struct s390_sha_ctx *sctx = shash_desc_ctx(desc); const struct sha512_state *ictx = in; - if (unlikely(ictx->count[1])) - return -ERANGE; sctx->count = ictx->count[0]; + sctx->sha512.count_hi = ictx->count[1]; memcpy(sctx->state, ictx->state, sizeof(ictx->state)); - memcpy(sctx->buf, ictx->buf, sizeof(ictx->buf)); sctx->func = CPACF_KIMD_SHA_512; + sctx->first_message_part = 0; return 0; } static struct shash_alg sha512_alg = { .digestsize = SHA512_DIGEST_SIZE, .init = sha512_init, - .update = s390_sha_update, - .final = s390_sha_final, + .update = s390_sha_update_blocks, + .finup = s390_sha_finup, .export = sha512_export, .import = sha512_import, .descsize = sizeof(struct s390_sha_ctx), - .statesize = sizeof(struct sha512_state), + .statesize = SHA512_STATE_SIZE, .base = { .cra_name = "sha512", .cra_driver_name= "sha512-s390", .cra_priority = 300, + .cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY | + CRYPTO_AHASH_ALG_FINUP_MAX, .cra_blocksize = SHA512_BLOCK_SIZE, .cra_module = THIS_MODULE, } @@ -87,16 +88,18 @@ static int sha384_init(struct shash_desc *desc) { struct s390_sha_ctx *ctx = shash_desc_ctx(desc); - *(__u64 *)&ctx->state[0] = SHA384_H0; - *(__u64 *)&ctx->state[2] = SHA384_H1; - *(__u64 *)&ctx->state[4] = SHA384_H2; - *(__u64 *)&ctx->state[6] = SHA384_H3; - *(__u64 *)&ctx->state[8] = SHA384_H4; - *(__u64 *)&ctx->state[10] = SHA384_H5; - *(__u64 *)&ctx->state[12] = SHA384_H6; - *(__u64 *)&ctx->state[14] = SHA384_H7; + ctx->sha512.state[0] = SHA384_H0; + ctx->sha512.state[1] = SHA384_H1; + ctx->sha512.state[2] = SHA384_H2; + ctx->sha512.state[3] = SHA384_H3; + ctx->sha512.state[4] = SHA384_H4; + ctx->sha512.state[5] = SHA384_H5; + ctx->sha512.state[6] = SHA384_H6; + ctx->sha512.state[7] = SHA384_H7; ctx->count = 0; + ctx->sha512.count_hi = 0; ctx->func = CPACF_KIMD_SHA_512; + ctx->first_message_part = 0; return 0; } @@ -104,17 +107,19 @@ static int sha384_init(struct shash_desc *desc) static struct shash_alg sha384_alg = { .digestsize = SHA384_DIGEST_SIZE, .init = sha384_init, - .update = s390_sha_update, - .final = s390_sha_final, + .update = s390_sha_update_blocks, + .finup = s390_sha_finup, .export = sha512_export, .import = sha512_import, .descsize = sizeof(struct s390_sha_ctx), - .statesize = sizeof(struct sha512_state), + .statesize = SHA512_STATE_SIZE, .base = { .cra_name = "sha384", .cra_driver_name= "sha384-s390", .cra_priority = 300, .cra_blocksize = SHA384_BLOCK_SIZE, + .cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY | + CRYPTO_AHASH_ALG_FINUP_MAX, .cra_ctxsize = sizeof(struct s390_sha_ctx), .cra_module = THIS_MODULE, } diff --git a/arch/s390/crypto/sha_common.c b/arch/s390/crypto/sha_common.c index 961d7d522af1..b5e2c365ea05 100644 --- a/arch/s390/crypto/sha_common.c +++ b/arch/s390/crypto/sha_common.c @@ -13,50 +13,33 @@ #include <asm/cpacf.h> #include "sha.h" -int s390_sha_update(struct shash_desc *desc, const u8 *data, unsigned int len) +int s390_sha_update_blocks(struct shash_desc *desc, const u8 *data, + unsigned int len) { - struct s390_sha_ctx *ctx = shash_desc_ctx(desc); unsigned int bsize = crypto_shash_blocksize(desc->tfm); - unsigned int index, n; + struct s390_sha_ctx *ctx = shash_desc_ctx(desc); + unsigned int n; int fc; - /* how much is already in the buffer? */ - index = ctx->count % bsize; - ctx->count += len; - - if ((index + len) < bsize) - goto store; - fc = ctx->func; if (ctx->first_message_part) - fc |= test_facility(86) ? CPACF_KIMD_NIP : 0; - - /* process one stored block */ - if (index) { - memcpy(ctx->buf + index, data, bsize - index); - cpacf_kimd(fc, ctx->state, ctx->buf, bsize); - ctx->first_message_part = 0; - fc &= ~CPACF_KIMD_NIP; - data += bsize - index; - len -= bsize - index; - index = 0; - } + fc |= CPACF_KIMD_NIP; /* process as many blocks as possible */ - if (len >= bsize) { - n = (len / bsize) * bsize; - cpacf_kimd(fc, ctx->state, data, n); - ctx->first_message_part = 0; - data += n; - len -= n; + n = (len / bsize) * bsize; + ctx->count += n; + switch (ctx->func) { + case CPACF_KLMD_SHA_512: + case CPACF_KLMD_SHA3_384: + if (ctx->count < n) + ctx->sha512.count_hi++; + break; } -store: - if (len) - memcpy(ctx->buf + index , data, len); - - return 0; + cpacf_kimd(fc, ctx->state, data, n); + ctx->first_message_part = 0; + return len - n; } -EXPORT_SYMBOL_GPL(s390_sha_update); +EXPORT_SYMBOL_GPL(s390_sha_update_blocks); static int s390_crypto_shash_parmsize(int func) { @@ -77,15 +60,15 @@ static int s390_crypto_shash_parmsize(int func) } } -int s390_sha_final(struct shash_desc *desc, u8 *out) +int s390_sha_finup(struct shash_desc *desc, const u8 *src, unsigned int len, + u8 *out) { struct s390_sha_ctx *ctx = shash_desc_ctx(desc); - unsigned int bsize = crypto_shash_blocksize(desc->tfm); - u64 bits; - unsigned int n; int mbl_offset, fc; + u64 bits; + + ctx->count += len; - n = ctx->count % bsize; bits = ctx->count * 8; mbl_offset = s390_crypto_shash_parmsize(ctx->func); if (mbl_offset < 0) @@ -95,17 +78,16 @@ int s390_sha_final(struct shash_desc *desc, u8 *out) /* set total msg bit length (mbl) in CPACF parmblock */ switch (ctx->func) { - case CPACF_KLMD_SHA_1: - case CPACF_KLMD_SHA_256: - memcpy(ctx->state + mbl_offset, &bits, sizeof(bits)); - break; case CPACF_KLMD_SHA_512: - /* - * the SHA512 parmblock has a 128-bit mbl field, clear - * high-order u64 field, copy bits to low-order u64 field - */ - memset(ctx->state + mbl_offset, 0x00, sizeof(bits)); + /* The SHA512 parmblock has a 128-bit mbl field. */ + if (ctx->count < len) + ctx->sha512.count_hi++; + ctx->sha512.count_hi <<= 3; + ctx->sha512.count_hi |= ctx->count >> 61; mbl_offset += sizeof(u64) / sizeof(u32); + fallthrough; + case CPACF_KLMD_SHA_1: + case CPACF_KLMD_SHA_256: memcpy(ctx->state + mbl_offset, &bits, sizeof(bits)); break; case CPACF_KLMD_SHA3_224: @@ -121,16 +103,14 @@ int s390_sha_final(struct shash_desc *desc, u8 *out) fc |= test_facility(86) ? CPACF_KLMD_DUFOP : 0; if (ctx->first_message_part) fc |= CPACF_KLMD_NIP; - cpacf_klmd(fc, ctx->state, ctx->buf, n); + cpacf_klmd(fc, ctx->state, src, len); /* copy digest to out */ memcpy(out, ctx->state, crypto_shash_digestsize(desc->tfm)); - /* wipe context */ - memset(ctx, 0, sizeof *ctx); return 0; } -EXPORT_SYMBOL_GPL(s390_sha_final); +EXPORT_SYMBOL_GPL(s390_sha_finup); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("s390 SHA cipher common functions"); diff --git a/arch/s390/hypfs/hypfs_diag0c.c b/arch/s390/hypfs/hypfs_diag0c.c index 4131f0daa5ea..61220e717af0 100644 --- a/arch/s390/hypfs/hypfs_diag0c.c +++ b/arch/s390/hypfs/hypfs_diag0c.c @@ -9,6 +9,7 @@ #include <linux/slab.h> #include <linux/cpu.h> +#include <asm/machine.h> #include <asm/diag.h> #include <asm/hypfs.h> #include "hypfs.h" @@ -107,7 +108,7 @@ static struct hypfs_dbfs_file dbfs_file_0c = { */ int __init hypfs_diag0c_init(void) { - if (!MACHINE_IS_VM) + if (!machine_is_vm()) return 0; hypfs_dbfs_create_file(&dbfs_file_0c); return 0; @@ -118,7 +119,7 @@ int __init hypfs_diag0c_init(void) */ void hypfs_diag0c_exit(void) { - if (!MACHINE_IS_VM) + if (!machine_is_vm()) return; hypfs_dbfs_remove_file(&dbfs_file_0c); } diff --git a/arch/s390/hypfs/hypfs_diag_fs.c b/arch/s390/hypfs/hypfs_diag_fs.c index 00a6d370a280..ede951dc0085 100644 --- a/arch/s390/hypfs/hypfs_diag_fs.c +++ b/arch/s390/hypfs/hypfs_diag_fs.c @@ -16,6 +16,7 @@ #include <linux/string.h> #include <linux/vmalloc.h> #include <linux/mm.h> +#include <asm/machine.h> #include <asm/diag.h> #include <asm/ebcdic.h> #include "hypfs_diag.h" @@ -208,6 +209,8 @@ static int hypfs_create_cpu_files(struct dentry *cpus_dir, void *cpu_info) snprintf(buffer, TMP_SIZE, "%d", cpu_info__cpu_addr(diag204_get_info_type(), cpu_info)); cpu_dir = hypfs_mkdir(cpus_dir, buffer); + if (IS_ERR(cpu_dir)) + return PTR_ERR(cpu_dir); rc = hypfs_create_u64(cpu_dir, "mgmtime", cpu_info__acc_time(diag204_get_info_type(), cpu_info) - cpu_info__lp_time(diag204_get_info_type(), cpu_info)); @@ -382,7 +385,7 @@ static void diag224_delete_name_table(void) int __init __hypfs_diag_fs_init(void) { - if (MACHINE_IS_LPAR) + if (machine_is_lpar()) return diag224_get_name_table(); return 0; } diff --git a/arch/s390/hypfs/hypfs_vm.c b/arch/s390/hypfs/hypfs_vm.c index 3db40ad853e0..4db2895e4da3 100644 --- a/arch/s390/hypfs/hypfs_vm.c +++ b/arch/s390/hypfs/hypfs_vm.c @@ -11,6 +11,7 @@ #include <linux/string.h> #include <linux/vmalloc.h> #include <asm/extable.h> +#include <asm/machine.h> #include <asm/diag.h> #include <asm/ebcdic.h> #include <asm/timex.h> @@ -121,7 +122,7 @@ static struct hypfs_dbfs_file dbfs_file_2fc = { int hypfs_vm_init(void) { - if (!MACHINE_IS_VM) + if (!machine_is_vm()) return 0; if (diag2fc(0, all_guests, NULL) > 0) diag2fc_guest_query = all_guests; @@ -135,7 +136,7 @@ int hypfs_vm_init(void) void hypfs_vm_exit(void) { - if (!MACHINE_IS_VM) + if (!machine_is_vm()) return; hypfs_dbfs_remove_file(&dbfs_file_2fc); } diff --git a/arch/s390/hypfs/inode.c b/arch/s390/hypfs/inode.c index d428635abf08..96409573c75d 100644 --- a/arch/s390/hypfs/inode.c +++ b/arch/s390/hypfs/inode.c @@ -24,6 +24,7 @@ #include <linux/kobject.h> #include <linux/seq_file.h> #include <linux/uio.h> +#include <asm/machine.h> #include <asm/ebcdic.h> #include "hypfs.h" @@ -184,7 +185,7 @@ static ssize_t hypfs_write_iter(struct kiocb *iocb, struct iov_iter *from) goto out; } hypfs_delete_tree(sb->s_root); - if (MACHINE_IS_VM) + if (machine_is_vm()) rc = hypfs_vm_create_files(sb->s_root); else rc = hypfs_diag_create_files(sb->s_root); @@ -273,7 +274,7 @@ static int hypfs_fill_super(struct super_block *sb, struct fs_context *fc) sb->s_root = root_dentry = d_make_root(root_inode); if (!root_dentry) return -ENOMEM; - if (MACHINE_IS_VM) + if (machine_is_vm()) rc = hypfs_vm_create_files(root_dentry); else rc = hypfs_diag_create_files(root_dentry); @@ -341,7 +342,7 @@ static struct dentry *hypfs_create_file(struct dentry *parent, const char *name, struct inode *inode; inode_lock(d_inode(parent)); - dentry = lookup_one_len(name, parent, strlen(name)); + dentry = lookup_noperm(&QSTR(name), parent); if (IS_ERR(dentry)) { dentry = ERR_PTR(-ENOMEM); goto fail; diff --git a/arch/s390/include/asm/abs_lowcore.h b/arch/s390/include/asm/abs_lowcore.h index d20df8c923fc..317c07c09ae4 100644 --- a/arch/s390/include/asm/abs_lowcore.h +++ b/arch/s390/include/asm/abs_lowcore.h @@ -2,7 +2,7 @@ #ifndef _ASM_S390_ABS_LOWCORE_H #define _ASM_S390_ABS_LOWCORE_H -#include <asm/sections.h> +#include <linux/smp.h> #include <asm/lowcore.h> #define ABS_LOWCORE_MAP_SIZE (NR_CPUS * sizeof(struct lowcore)) @@ -25,11 +25,4 @@ static inline void put_abs_lowcore(struct lowcore *lc) put_cpu(); } -extern int __bootdata_preserved(relocate_lowcore); - -static inline int have_relocated_lowcore(void) -{ - return relocate_lowcore; -} - #endif /* _ASM_S390_ABS_LOWCORE_H */ diff --git a/arch/s390/include/asm/alternative.h b/arch/s390/include/asm/alternative.h index 73e781b56bfe..c7bf60a541e9 100644 --- a/arch/s390/include/asm/alternative.h +++ b/arch/s390/include/asm/alternative.h @@ -32,8 +32,8 @@ #define ALT_CTX_ALL (ALT_CTX_EARLY | ALT_CTX_LATE) #define ALT_TYPE_FACILITY 0 -#define ALT_TYPE_SPEC 1 -#define ALT_TYPE_LOWCORE 2 +#define ALT_TYPE_FEATURE 1 +#define ALT_TYPE_SPEC 2 #define ALT_DATA_SHIFT 0 #define ALT_TYPE_SHIFT 20 @@ -43,13 +43,14 @@ ALT_TYPE_FACILITY << ALT_TYPE_SHIFT | \ (facility) << ALT_DATA_SHIFT) +#define ALT_FEATURE(feature) (ALT_CTX_EARLY << ALT_CTX_SHIFT | \ + ALT_TYPE_FEATURE << ALT_TYPE_SHIFT | \ + (feature) << ALT_DATA_SHIFT) + #define ALT_SPEC(facility) (ALT_CTX_LATE << ALT_CTX_SHIFT | \ ALT_TYPE_SPEC << ALT_TYPE_SHIFT | \ (facility) << ALT_DATA_SHIFT) -#define ALT_LOWCORE (ALT_CTX_EARLY << ALT_CTX_SHIFT | \ - ALT_TYPE_LOWCORE << ALT_TYPE_SHIFT) - #ifndef __ASSEMBLY__ #include <linux/types.h> diff --git a/arch/s390/include/asm/appldata.h b/arch/s390/include/asm/appldata.h index a92ebbc7aa7a..99b2902c10fd 100644 --- a/arch/s390/include/asm/appldata.h +++ b/arch/s390/include/asm/appldata.h @@ -9,6 +9,7 @@ #define _ASM_S390_APPLDATA_H #include <linux/io.h> +#include <asm/machine.h> #include <asm/diag.h> #define APPLDATA_START_INTERVAL_REC 0x80 @@ -48,7 +49,7 @@ static inline int appldata_asm(struct appldata_parameter_list *parm_list, { int ry; - if (!MACHINE_IS_VM) + if (!machine_is_vm()) return -EOPNOTSUPP; parm_list->diag = 0xdc; parm_list->function = fn; diff --git a/arch/s390/include/asm/asce.h b/arch/s390/include/asm/asce.h new file mode 100644 index 000000000000..f6dfaaba735a --- /dev/null +++ b/arch/s390/include/asm/asce.h @@ -0,0 +1,36 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef _ASM_S390_ASCE_H +#define _ASM_S390_ASCE_H + +#include <linux/thread_info.h> +#include <linux/irqflags.h> +#include <asm/lowcore.h> +#include <asm/ctlreg.h> + +static inline bool enable_sacf_uaccess(void) +{ + unsigned long flags; + + if (test_thread_flag(TIF_ASCE_PRIMARY)) + return true; + local_irq_save(flags); + local_ctl_load(1, &get_lowcore()->kernel_asce); + set_thread_flag(TIF_ASCE_PRIMARY); + local_irq_restore(flags); + return false; +} + +static inline void disable_sacf_uaccess(bool previous) +{ + unsigned long flags; + + if (previous) + return; + local_irq_save(flags); + local_ctl_load(1, &get_lowcore()->user_asce); + clear_thread_flag(TIF_ASCE_PRIMARY); + local_irq_restore(flags); +} + +#endif /* _ASM_S390_ASCE_H */ diff --git a/arch/s390/include/asm/asm-extable.h b/arch/s390/include/asm/asm-extable.h index 4a6b0a8b6412..d23ea0c94e4e 100644 --- a/arch/s390/include/asm/asm-extable.h +++ b/arch/s390/include/asm/asm-extable.h @@ -9,11 +9,13 @@ #define EX_TYPE_NONE 0 #define EX_TYPE_FIXUP 1 #define EX_TYPE_BPF 2 -#define EX_TYPE_UA_STORE 3 -#define EX_TYPE_UA_LOAD_MEM 4 +#define EX_TYPE_UA_FAULT 3 #define EX_TYPE_UA_LOAD_REG 5 #define EX_TYPE_UA_LOAD_REGPAIR 6 #define EX_TYPE_ZEROPAD 7 +#define EX_TYPE_FPC 8 +#define EX_TYPE_UA_MVCOS_TO 9 +#define EX_TYPE_UA_MVCOS_FROM 10 #define EX_DATA_REG_ERR_SHIFT 0 #define EX_DATA_REG_ERR GENMASK(3, 0) @@ -69,11 +71,8 @@ #define EX_TABLE_AMODE31(_fault, _target) \ __EX_TABLE(.amode31.ex_table, _fault, _target, EX_TYPE_FIXUP, __stringify(%%r0), __stringify(%%r0), 0) -#define EX_TABLE_UA_STORE(_fault, _target, _regerr) \ - __EX_TABLE(__ex_table, _fault, _target, EX_TYPE_UA_STORE, _regerr, _regerr, 0) - -#define EX_TABLE_UA_LOAD_MEM(_fault, _target, _regerr, _regmem, _len) \ - __EX_TABLE(__ex_table, _fault, _target, EX_TYPE_UA_LOAD_MEM, _regerr, _regmem, _len) +#define EX_TABLE_UA_FAULT(_fault, _target, _regerr) \ + __EX_TABLE(__ex_table, _fault, _target, EX_TYPE_UA_FAULT, _regerr, _regerr, 0) #define EX_TABLE_UA_LOAD_REG(_fault, _target, _regerr, _regzero) \ __EX_TABLE(__ex_table, _fault, _target, EX_TYPE_UA_LOAD_REG, _regerr, _regzero, 0) @@ -84,4 +83,13 @@ #define EX_TABLE_ZEROPAD(_fault, _target, _regdata, _regaddr) \ __EX_TABLE(__ex_table, _fault, _target, EX_TYPE_ZEROPAD, _regdata, _regaddr, 0) +#define EX_TABLE_FPC(_fault, _target) \ + __EX_TABLE(__ex_table, _fault, _target, EX_TYPE_FPC, __stringify(%%r0), __stringify(%%r0), 0) + +#define EX_TABLE_UA_MVCOS_TO(_fault, _target) \ + __EX_TABLE(__ex_table, _fault, _target, EX_TYPE_UA_MVCOS_TO, __stringify(%%r0), __stringify(%%r0), 0) + +#define EX_TABLE_UA_MVCOS_FROM(_fault, _target) \ + __EX_TABLE(__ex_table, _fault, _target, EX_TYPE_UA_MVCOS_FROM, __stringify(%%r0), __stringify(%%r0), 0) + #endif /* __ASM_EXTABLE_H */ diff --git a/arch/s390/include/asm/asm.h b/arch/s390/include/asm/asm.h index ec011b94af2a..e9062b01e2a2 100644 --- a/arch/s390/include/asm/asm.h +++ b/arch/s390/include/asm/asm.h @@ -28,7 +28,7 @@ * [var] also contains the program mask. CC_TRANSFORM() moves the condition * code to the two least significant bits and sets all other bits to zero. */ -#if defined(__GCC_ASM_FLAG_OUTPUTS__) && !(IS_ENABLED(CONFIG_GCC_ASM_FLAG_OUTPUT_BROKEN)) +#if defined(__GCC_ASM_FLAG_OUTPUTS__) && !(IS_ENABLED(CONFIG_CC_ASM_FLAG_OUTPUT_BROKEN)) #define __HAVE_ASM_FLAG_OUTPUTS__ diff --git a/arch/s390/include/asm/atomic.h b/arch/s390/include/asm/atomic.h index 6723fca64018..b36dd6a1d652 100644 --- a/arch/s390/include/asm/atomic.h +++ b/arch/s390/include/asm/atomic.h @@ -17,13 +17,13 @@ static __always_inline int arch_atomic_read(const atomic_t *v) { - return __atomic_read(v); + return __atomic_read(&v->counter); } #define arch_atomic_read arch_atomic_read static __always_inline void arch_atomic_set(atomic_t *v, int i) { - __atomic_set(v, i); + __atomic_set(&v->counter, i); } #define arch_atomic_set arch_atomic_set @@ -45,6 +45,36 @@ static __always_inline void arch_atomic_add(int i, atomic_t *v) } #define arch_atomic_add arch_atomic_add +static __always_inline void arch_atomic_inc(atomic_t *v) +{ + __atomic_add_const(1, &v->counter); +} +#define arch_atomic_inc arch_atomic_inc + +static __always_inline void arch_atomic_dec(atomic_t *v) +{ + __atomic_add_const(-1, &v->counter); +} +#define arch_atomic_dec arch_atomic_dec + +static __always_inline bool arch_atomic_sub_and_test(int i, atomic_t *v) +{ + return __atomic_add_and_test_barrier(-i, &v->counter); +} +#define arch_atomic_sub_and_test arch_atomic_sub_and_test + +static __always_inline bool arch_atomic_dec_and_test(atomic_t *v) +{ + return __atomic_add_const_and_test_barrier(-1, &v->counter); +} +#define arch_atomic_dec_and_test arch_atomic_dec_and_test + +static __always_inline bool arch_atomic_inc_and_test(atomic_t *v) +{ + return __atomic_add_const_and_test_barrier(1, &v->counter); +} +#define arch_atomic_inc_and_test arch_atomic_inc_and_test + #define arch_atomic_sub(_i, _v) arch_atomic_add(-(int)(_i), _v) #define arch_atomic_sub_return(_i, _v) arch_atomic_add_return(-(int)(_i), _v) #define arch_atomic_fetch_sub(_i, _v) arch_atomic_fetch_add(-(int)(_i), _v) @@ -94,13 +124,13 @@ static __always_inline bool arch_atomic_try_cmpxchg(atomic_t *v, int *old, int n static __always_inline s64 arch_atomic64_read(const atomic64_t *v) { - return __atomic64_read(v); + return __atomic64_read((long *)&v->counter); } #define arch_atomic64_read arch_atomic64_read static __always_inline void arch_atomic64_set(atomic64_t *v, s64 i) { - __atomic64_set(v, i); + __atomic64_set((long *)&v->counter, i); } #define arch_atomic64_set arch_atomic64_set @@ -122,6 +152,36 @@ static __always_inline void arch_atomic64_add(s64 i, atomic64_t *v) } #define arch_atomic64_add arch_atomic64_add +static __always_inline void arch_atomic64_inc(atomic64_t *v) +{ + __atomic64_add_const(1, (long *)&v->counter); +} +#define arch_atomic64_inc arch_atomic64_inc + +static __always_inline void arch_atomic64_dec(atomic64_t *v) +{ + __atomic64_add_const(-1, (long *)&v->counter); +} +#define arch_atomic64_dec arch_atomic64_dec + +static __always_inline bool arch_atomic64_sub_and_test(s64 i, atomic64_t *v) +{ + return __atomic64_add_and_test_barrier(-i, (long *)&v->counter); +} +#define arch_atomic64_sub_and_test arch_atomic64_sub_and_test + +static __always_inline bool arch_atomic64_dec_and_test(atomic64_t *v) +{ + return __atomic64_add_const_and_test_barrier(-1, (long *)&v->counter); +} +#define arch_atomic64_dec_and_test arch_atomic64_dec_and_test + +static __always_inline bool arch_atomic64_inc_and_test(atomic64_t *v) +{ + return __atomic64_add_const_and_test_barrier(1, (long *)&v->counter); +} +#define arch_atomic64_inc_and_test arch_atomic64_inc_and_test + static __always_inline s64 arch_atomic64_xchg(atomic64_t *v, s64 new) { return arch_xchg(&v->counter, new); diff --git a/arch/s390/include/asm/atomic_ops.h b/arch/s390/include/asm/atomic_ops.h index 1d6b2056fad8..21c26d842832 100644 --- a/arch/s390/include/asm/atomic_ops.h +++ b/arch/s390/include/asm/atomic_ops.h @@ -10,50 +10,51 @@ #include <linux/limits.h> #include <asm/march.h> +#include <asm/asm.h> -static __always_inline int __atomic_read(const atomic_t *v) +static __always_inline int __atomic_read(const int *ptr) { - int c; + int val; asm volatile( - " l %[c],%[counter]\n" - : [c] "=d" (c) : [counter] "R" (v->counter)); - return c; + " l %[val],%[ptr]\n" + : [val] "=d" (val) : [ptr] "R" (*ptr)); + return val; } -static __always_inline void __atomic_set(atomic_t *v, int i) +static __always_inline void __atomic_set(int *ptr, int val) { - if (__builtin_constant_p(i) && i >= S16_MIN && i <= S16_MAX) { + if (__builtin_constant_p(val) && val >= S16_MIN && val <= S16_MAX) { asm volatile( - " mvhi %[counter], %[i]\n" - : [counter] "=Q" (v->counter) : [i] "K" (i)); + " mvhi %[ptr],%[val]\n" + : [ptr] "=Q" (*ptr) : [val] "K" (val)); } else { asm volatile( - " st %[i],%[counter]\n" - : [counter] "=R" (v->counter) : [i] "d" (i)); + " st %[val],%[ptr]\n" + : [ptr] "=R" (*ptr) : [val] "d" (val)); } } -static __always_inline s64 __atomic64_read(const atomic64_t *v) +static __always_inline long __atomic64_read(const long *ptr) { - s64 c; + long val; asm volatile( - " lg %[c],%[counter]\n" - : [c] "=d" (c) : [counter] "RT" (v->counter)); - return c; + " lg %[val],%[ptr]\n" + : [val] "=d" (val) : [ptr] "RT" (*ptr)); + return val; } -static __always_inline void __atomic64_set(atomic64_t *v, s64 i) +static __always_inline void __atomic64_set(long *ptr, long val) { - if (__builtin_constant_p(i) && i >= S16_MIN && i <= S16_MAX) { + if (__builtin_constant_p(val) && val >= S16_MIN && val <= S16_MAX) { asm volatile( - " mvghi %[counter], %[i]\n" - : [counter] "=Q" (v->counter) : [i] "K" (i)); + " mvghi %[ptr],%[val]\n" + : [ptr] "=Q" (*ptr) : [val] "K" (val)); } else { asm volatile( - " stg %[i],%[counter]\n" - : [counter] "=RT" (v->counter) : [i] "d" (i)); + " stg %[val],%[ptr]\n" + : [ptr] "=RT" (*ptr) : [val] "d" (val)); } } @@ -73,7 +74,7 @@ static __always_inline op_type op_name(op_type val, op_type *ptr) \ } \ #define __ATOMIC_OPS(op_name, op_type, op_string) \ - __ATOMIC_OP(op_name, op_type, op_string, "\n") \ + __ATOMIC_OP(op_name, op_type, op_string, "") \ __ATOMIC_OP(op_name##_barrier, op_type, op_string, "bcr 14,0\n") __ATOMIC_OPS(__atomic_add, int, "laa") @@ -99,7 +100,7 @@ static __always_inline void op_name(op_type val, op_type *ptr) \ } #define __ATOMIC_CONST_OPS(op_name, op_type, op_string) \ - __ATOMIC_CONST_OP(op_name, op_type, op_string, "\n") \ + __ATOMIC_CONST_OP(op_name, op_type, op_string, "") \ __ATOMIC_CONST_OP(op_name##_barrier, op_type, op_string, "bcr 14,0\n") __ATOMIC_CONST_OPS(__atomic_add_const, int, "asi") @@ -162,11 +163,83 @@ __ATOMIC64_OPS(__atomic64_xor, "xgr") #undef __ATOMIC64_OPS -#define __atomic_add_const(val, ptr) __atomic_add(val, ptr) -#define __atomic_add_const_barrier(val, ptr) __atomic_add(val, ptr) -#define __atomic64_add_const(val, ptr) __atomic64_add(val, ptr) -#define __atomic64_add_const_barrier(val, ptr) __atomic64_add(val, ptr) +#define __atomic_add_const(val, ptr) ((void)__atomic_add(val, ptr)) +#define __atomic_add_const_barrier(val, ptr) ((void)__atomic_add(val, ptr)) +#define __atomic64_add_const(val, ptr) ((void)__atomic64_add(val, ptr)) +#define __atomic64_add_const_barrier(val, ptr) ((void)__atomic64_add(val, ptr)) #endif /* MARCH_HAS_Z196_FEATURES */ +#if defined(MARCH_HAS_Z196_FEATURES) && defined(__HAVE_ASM_FLAG_OUTPUTS__) + +#define __ATOMIC_TEST_OP(op_name, op_type, op_string, op_barrier) \ +static __always_inline bool op_name(op_type val, op_type *ptr) \ +{ \ + op_type tmp; \ + int cc; \ + \ + asm volatile( \ + op_string " %[tmp],%[val],%[ptr]\n" \ + op_barrier \ + : "=@cc" (cc), [tmp] "=d" (tmp), [ptr] "+QS" (*ptr) \ + : [val] "d" (val) \ + : "memory"); \ + return (cc == 0) || (cc == 2); \ +} \ + +#define __ATOMIC_TEST_OPS(op_name, op_type, op_string) \ + __ATOMIC_TEST_OP(op_name, op_type, op_string, "") \ + __ATOMIC_TEST_OP(op_name##_barrier, op_type, op_string, "bcr 14,0\n") + +__ATOMIC_TEST_OPS(__atomic_add_and_test, int, "laal") +__ATOMIC_TEST_OPS(__atomic64_add_and_test, long, "laalg") + +#undef __ATOMIC_TEST_OPS +#undef __ATOMIC_TEST_OP + +#define __ATOMIC_CONST_TEST_OP(op_name, op_type, op_string, op_barrier) \ +static __always_inline bool op_name(op_type val, op_type *ptr) \ +{ \ + int cc; \ + \ + asm volatile( \ + op_string " %[ptr],%[val]\n" \ + op_barrier \ + : "=@cc" (cc), [ptr] "+QS" (*ptr) \ + : [val] "i" (val) \ + : "memory"); \ + return (cc == 0) || (cc == 2); \ +} + +#define __ATOMIC_CONST_TEST_OPS(op_name, op_type, op_string) \ + __ATOMIC_CONST_TEST_OP(op_name, op_type, op_string, "") \ + __ATOMIC_CONST_TEST_OP(op_name##_barrier, op_type, op_string, "bcr 14,0\n") + +__ATOMIC_CONST_TEST_OPS(__atomic_add_const_and_test, int, "alsi") +__ATOMIC_CONST_TEST_OPS(__atomic64_add_const_and_test, long, "algsi") + +#undef __ATOMIC_CONST_TEST_OPS +#undef __ATOMIC_CONST_TEST_OP + +#else /* defined(MARCH_HAS_Z196_FEATURES) && defined(__HAVE_ASM_FLAG_OUTPUTS__) */ + +#define __ATOMIC_TEST_OP(op_name, op_func, op_type) \ +static __always_inline bool op_name(op_type val, op_type *ptr) \ +{ \ + return op_func(val, ptr) == -val; \ +} + +__ATOMIC_TEST_OP(__atomic_add_and_test, __atomic_add, int) +__ATOMIC_TEST_OP(__atomic_add_and_test_barrier, __atomic_add_barrier, int) +__ATOMIC_TEST_OP(__atomic_add_const_and_test, __atomic_add, int) +__ATOMIC_TEST_OP(__atomic_add_const_and_test_barrier, __atomic_add_barrier, int) +__ATOMIC_TEST_OP(__atomic64_add_and_test, __atomic64_add, long) +__ATOMIC_TEST_OP(__atomic64_add_and_test_barrier, __atomic64_add_barrier, long) +__ATOMIC_TEST_OP(__atomic64_add_const_and_test, __atomic64_add, long) +__ATOMIC_TEST_OP(__atomic64_add_const_and_test_barrier, __atomic64_add_barrier, long) + +#undef __ATOMIC_TEST_OP + +#endif /* defined(MARCH_HAS_Z196_FEATURES) && defined(__HAVE_ASM_FLAG_OUTPUTS__) */ + #endif /* __ARCH_S390_ATOMIC_OPS__ */ diff --git a/arch/s390/include/asm/bitops.h b/arch/s390/include/asm/bitops.h index 54a079cd39ed..a5ca0a947691 100644 --- a/arch/s390/include/asm/bitops.h +++ b/arch/s390/include/asm/bitops.h @@ -36,184 +36,45 @@ #include <linux/typecheck.h> #include <linux/compiler.h> #include <linux/types.h> -#include <asm/atomic_ops.h> -#include <asm/barrier.h> - -#define __BITOPS_WORDS(bits) (((bits) + BITS_PER_LONG - 1) / BITS_PER_LONG) - -static inline unsigned long * -__bitops_word(unsigned long nr, const volatile unsigned long *ptr) -{ - unsigned long addr; - - addr = (unsigned long)ptr + ((nr ^ (nr & (BITS_PER_LONG - 1))) >> 3); - return (unsigned long *)addr; -} - -static inline unsigned long __bitops_mask(unsigned long nr) -{ - return 1UL << (nr & (BITS_PER_LONG - 1)); -} - -static __always_inline void arch_set_bit(unsigned long nr, volatile unsigned long *ptr) -{ - unsigned long *addr = __bitops_word(nr, ptr); - unsigned long mask = __bitops_mask(nr); - - __atomic64_or(mask, (long *)addr); -} - -static __always_inline void arch_clear_bit(unsigned long nr, volatile unsigned long *ptr) -{ - unsigned long *addr = __bitops_word(nr, ptr); - unsigned long mask = __bitops_mask(nr); - - __atomic64_and(~mask, (long *)addr); -} - -static __always_inline void arch_change_bit(unsigned long nr, - volatile unsigned long *ptr) -{ - unsigned long *addr = __bitops_word(nr, ptr); - unsigned long mask = __bitops_mask(nr); - - __atomic64_xor(mask, (long *)addr); -} - -static inline bool arch_test_and_set_bit(unsigned long nr, - volatile unsigned long *ptr) -{ - unsigned long *addr = __bitops_word(nr, ptr); - unsigned long mask = __bitops_mask(nr); - unsigned long old; - - old = __atomic64_or_barrier(mask, (long *)addr); - return old & mask; -} - -static inline bool arch_test_and_clear_bit(unsigned long nr, - volatile unsigned long *ptr) -{ - unsigned long *addr = __bitops_word(nr, ptr); - unsigned long mask = __bitops_mask(nr); - unsigned long old; - - old = __atomic64_and_barrier(~mask, (long *)addr); - return old & mask; -} - -static inline bool arch_test_and_change_bit(unsigned long nr, - volatile unsigned long *ptr) -{ - unsigned long *addr = __bitops_word(nr, ptr); - unsigned long mask = __bitops_mask(nr); - unsigned long old; - - old = __atomic64_xor_barrier(mask, (long *)addr); - return old & mask; -} - -static __always_inline void -arch___set_bit(unsigned long nr, volatile unsigned long *addr) -{ - unsigned long *p = __bitops_word(nr, addr); - unsigned long mask = __bitops_mask(nr); - - *p |= mask; -} - -static __always_inline void -arch___clear_bit(unsigned long nr, volatile unsigned long *addr) -{ - unsigned long *p = __bitops_word(nr, addr); - unsigned long mask = __bitops_mask(nr); - - *p &= ~mask; -} - -static __always_inline void -arch___change_bit(unsigned long nr, volatile unsigned long *addr) -{ - unsigned long *p = __bitops_word(nr, addr); - unsigned long mask = __bitops_mask(nr); - - *p ^= mask; -} - -static __always_inline bool -arch___test_and_set_bit(unsigned long nr, volatile unsigned long *addr) -{ - unsigned long *p = __bitops_word(nr, addr); - unsigned long mask = __bitops_mask(nr); - unsigned long old; - - old = *p; - *p |= mask; - return old & mask; -} - -static __always_inline bool -arch___test_and_clear_bit(unsigned long nr, volatile unsigned long *addr) -{ - unsigned long *p = __bitops_word(nr, addr); - unsigned long mask = __bitops_mask(nr); - unsigned long old; - - old = *p; - *p &= ~mask; - return old & mask; -} - -static __always_inline bool -arch___test_and_change_bit(unsigned long nr, volatile unsigned long *addr) -{ - unsigned long *p = __bitops_word(nr, addr); - unsigned long mask = __bitops_mask(nr); - unsigned long old; - - old = *p; - *p ^= mask; - return old & mask; -} - -#define arch_test_bit generic_test_bit -#define arch_test_bit_acquire generic_test_bit_acquire - -static inline bool arch_test_and_set_bit_lock(unsigned long nr, - volatile unsigned long *ptr) -{ - if (arch_test_bit(nr, ptr)) - return true; - return arch_test_and_set_bit(nr, ptr); -} - -static inline void arch_clear_bit_unlock(unsigned long nr, - volatile unsigned long *ptr) -{ - smp_mb__before_atomic(); - arch_clear_bit(nr, ptr); -} - -static inline void arch___clear_bit_unlock(unsigned long nr, - volatile unsigned long *ptr) -{ - smp_mb(); - arch___clear_bit(nr, ptr); -} - -static inline bool arch_xor_unlock_is_negative_byte(unsigned long mask, - volatile unsigned long *ptr) -{ - unsigned long old; - - old = __atomic64_xor_barrier(mask, (long *)ptr); - return old & BIT(7); +#include <asm/asm.h> + +#define arch___set_bit generic___set_bit +#define arch___clear_bit generic___clear_bit +#define arch___change_bit generic___change_bit +#define arch___test_and_set_bit generic___test_and_set_bit +#define arch___test_and_clear_bit generic___test_and_clear_bit +#define arch___test_and_change_bit generic___test_and_change_bit +#define arch_test_bit_acquire generic_test_bit_acquire + +static __always_inline bool arch_test_bit(unsigned long nr, const volatile unsigned long *ptr) +{ +#ifdef __HAVE_ASM_FLAG_OUTPUTS__ + const volatile unsigned char *addr; + unsigned long mask; + int cc; + + /* + * With CONFIG_PROFILE_ALL_BRANCHES enabled gcc fails to + * handle __builtin_constant_p() in some cases. + */ + if (!IS_ENABLED(CONFIG_PROFILE_ALL_BRANCHES) && __builtin_constant_p(nr)) { + addr = (const volatile unsigned char *)ptr; + addr += (nr ^ (BITS_PER_LONG - BITS_PER_BYTE)) / BITS_PER_BYTE; + mask = 1UL << (nr & (BITS_PER_BYTE - 1)); + asm volatile( + " tm %[addr],%[mask]\n" + : "=@cc" (cc) + : [addr] "Q" (*addr), [mask] "I" (mask) + ); + return cc == 3; + } +#endif + return generic_test_bit(nr, ptr); } -#define arch_xor_unlock_is_negative_byte arch_xor_unlock_is_negative_byte -#include <asm-generic/bitops/instrumented-atomic.h> -#include <asm-generic/bitops/instrumented-non-atomic.h> -#include <asm-generic/bitops/instrumented-lock.h> +#include <asm-generic/bitops/atomic.h> +#include <asm-generic/bitops/non-instrumented-non-atomic.h> +#include <asm-generic/bitops/lock.h> /* * Functions which use MSB0 bit numbering. diff --git a/arch/s390/include/asm/boot_data.h b/arch/s390/include/asm/boot_data.h index f7eed27b3220..f55f8227058e 100644 --- a/arch/s390/include/asm/boot_data.h +++ b/arch/s390/include/asm/boot_data.h @@ -1,6 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _ASM_S390_BOOT_DATA_H +#include <linux/string.h> #include <asm/setup.h> #include <asm/ipl.h> @@ -15,4 +16,54 @@ extern unsigned long ipl_cert_list_size; extern unsigned long early_ipl_comp_list_addr; extern unsigned long early_ipl_comp_list_size; +extern char boot_rb[PAGE_SIZE * 2]; +extern bool boot_earlyprintk; +extern size_t boot_rb_off; +extern char bootdebug_filter[128]; +extern bool bootdebug; + +#define boot_rb_foreach(cb) \ + do { \ + size_t off = boot_rb_off + strlen(boot_rb + boot_rb_off) + 1; \ + size_t len; \ + for (; off < sizeof(boot_rb) && (len = strlen(boot_rb + off)); off += len + 1) \ + cb(boot_rb + off); \ + for (off = 0; off < boot_rb_off && (len = strlen(boot_rb + off)); off += len + 1) \ + cb(boot_rb + off); \ + } while (0) + +/* + * bootdebug_filter is a comma separated list of strings, + * where each string can be a prefix of the message. + */ +static inline bool bootdebug_filter_match(const char *buf) +{ + char *p = bootdebug_filter, *s; + char *end; + + if (!*p) + return true; + + end = p + strlen(p); + while (p < end) { + p = skip_spaces(p); + s = memscan(p, ',', end - p); + if (!strncmp(p, buf, s - p)) + return true; + p = s + 1; + } + return false; +} + +static inline const char *skip_timestamp(const char *buf) +{ +#ifdef CONFIG_PRINTK_TIME + const char *p = memchr(buf, ']', strlen(buf)); + + if (p && p[1] == ' ') + return p + 2; +#endif + return buf; +} + #endif /* _ASM_S390_BOOT_DATA_H */ diff --git a/arch/s390/include/asm/checksum.h b/arch/s390/include/asm/checksum.h index 46f5c9660616..d86dea5900e7 100644 --- a/arch/s390/include/asm/checksum.h +++ b/arch/s390/include/asm/checksum.h @@ -25,7 +25,7 @@ static inline __wsum cksm(const void *buff, int len, __wsum sum) instrument_read(buff, len); kmsan_check_memory(buff, len); - asm volatile("\n" + asm volatile( "0: cksm %[sum],%[rp]\n" " jo 0b\n" : [sum] "+&d" (sum), [rp] "+&d" (rp.pair) : : "cc", "memory"); diff --git a/arch/s390/include/asm/cpacf.h b/arch/s390/include/asm/cpacf.h index 59ab1192e2d5..54cb97603ec0 100644 --- a/arch/s390/include/asm/cpacf.h +++ b/arch/s390/include/asm/cpacf.h @@ -649,18 +649,30 @@ static inline void cpacf_trng(u8 *ucbuf, unsigned long ucbuf_len, * instruction * @func: the function code passed to PCC; see CPACF_KM_xxx defines * @param: address of parameter block; see POP for details on each func + * + * Returns the condition code, this is + * 0 - cc code 0 (normal completion) + * 1 - cc code 1 (protected key wkvp mismatch or src operand out of range) + * 2 - cc code 2 (something invalid, scalar multiply infinity, ...) + * Condition code 3 (partial completion) is handled within the asm code + * and never returned. */ -static inline void cpacf_pcc(unsigned long func, void *param) +static inline int cpacf_pcc(unsigned long func, void *param) { + int cc; + asm volatile( " lgr 0,%[fc]\n" " lgr 1,%[pba]\n" "0: .insn rre,%[opc] << 16,0,0\n" /* PCC opcode */ " brc 1,0b\n" /* handle partial completion */ - : + CC_IPM(cc) + : CC_OUT(cc, cc) : [fc] "d" (func), [pba] "d" ((unsigned long)param), [opc] "i" (CPACF_PCC) - : "cc", "memory", "0", "1"); + : CC_CLOBBER_LIST("memory", "0", "1")); + + return CC_TRANSFORM(cc); } /** diff --git a/arch/s390/include/asm/cpu_mf.h b/arch/s390/include/asm/cpu_mf.h index e1a279e0d6a6..1798fbd59068 100644 --- a/arch/s390/include/asm/cpu_mf.h +++ b/arch/s390/include/asm/cpu_mf.h @@ -171,7 +171,7 @@ static inline int qctri(struct cpumf_ctr_info *info) { int rc = -EINVAL; - asm volatile ( + asm_inline volatile ( "0: qctri %1\n" "1: lhi %0,0\n" "2:\n" @@ -185,7 +185,7 @@ static inline int lcctl(u64 ctl) { int cc; - asm volatile ( + asm_inline volatile ( " lcctl %[ctl]\n" CC_IPM(cc) : CC_OUT(cc, cc) @@ -200,7 +200,7 @@ static inline int __ecctr(u64 ctr, u64 *content) u64 _content; int cc; - asm volatile ( + asm_inline volatile ( " ecctr %[_content],%[ctr]\n" CC_IPM(cc) : CC_OUT(cc, cc), [_content] "=d" (_content) diff --git a/arch/s390/include/asm/cpufeature.h b/arch/s390/include/asm/cpufeature.h index 931204613753..6c6a99660e78 100644 --- a/arch/s390/include/asm/cpufeature.h +++ b/arch/s390/include/asm/cpufeature.h @@ -9,10 +9,13 @@ #ifndef __ASM_S390_CPUFEATURE_H #define __ASM_S390_CPUFEATURE_H +#include <asm/facility.h> + enum { S390_CPU_FEATURE_MSA, S390_CPU_FEATURE_VXRS, S390_CPU_FEATURE_UV, + S390_CPU_FEATURE_D288, MAX_CPU_FEATURES }; @@ -20,4 +23,16 @@ enum { int cpu_have_feature(unsigned int nr); +#define cpu_has_bear() test_facility(193) +#define cpu_has_edat1() test_facility(8) +#define cpu_has_edat2() test_facility(78) +#define cpu_has_gs() test_facility(133) +#define cpu_has_idte() test_facility(3) +#define cpu_has_nx() test_facility(130) +#define cpu_has_rdp() test_facility(194) +#define cpu_has_seq_insn() test_facility(85) +#define cpu_has_tlb_lc() test_facility(51) +#define cpu_has_topology() test_facility(11) +#define cpu_has_vx() test_facility(129) + #endif /* __ASM_S390_CPUFEATURE_H */ diff --git a/arch/s390/include/asm/css_chars.h b/arch/s390/include/asm/css_chars.h index 638137d46c85..a03f64033760 100644 --- a/arch/s390/include/asm/css_chars.h +++ b/arch/s390/include/asm/css_chars.h @@ -25,7 +25,7 @@ struct css_general_char { u64 : 2; u64 : 3; - u64 aif_osa : 1; /* bit 67 */ + u64 aif_qdio : 1;/* bit 67 */ u64 : 12; u64 eadm_rf : 1; /* bit 80 */ u64 : 1; diff --git a/arch/s390/include/asm/current.h b/arch/s390/include/asm/current.h index d03a922c641e..f9529f7cf62c 100644 --- a/arch/s390/include/asm/current.h +++ b/arch/s390/include/asm/current.h @@ -11,9 +11,25 @@ #define _S390_CURRENT_H #include <asm/lowcore.h> +#include <asm/machine.h> struct task_struct; -#define current ((struct task_struct *const)get_lowcore()->current_task) +static __always_inline struct task_struct *get_current(void) +{ + unsigned long ptr, lc_current; + + lc_current = offsetof(struct lowcore, current_task); + asm_inline( + ALTERNATIVE(" lg %[ptr],%[offzero](%%r0)\n", + " lg %[ptr],%[offalt](%%r0)\n", + ALT_FEATURE(MFEATURE_LOWCORE)) + : [ptr] "=d" (ptr) + : [offzero] "i" (lc_current), + [offalt] "i" (lc_current + LOWCORE_ALT_ADDRESS)); + return (struct task_struct *)ptr; +} + +#define current get_current() #endif /* !(_S390_CURRENT_H) */ diff --git a/arch/s390/include/asm/debug.h b/arch/s390/include/asm/debug.h index a7f7bdc9e19c..6375276d94ea 100644 --- a/arch/s390/include/asm/debug.h +++ b/arch/s390/include/asm/debug.h @@ -85,6 +85,10 @@ int debug_dflt_header_fn(debug_info_t *id, struct debug_view *view, int area, debug_entry_t *entry, char *out_buf, size_t out_buf_size); +#define DEBUG_SPRINTF_MAX_ARGS 10 +int debug_sprintf_format_fn(debug_info_t *id, struct debug_view *view, + char *out_buf, size_t out_buf_size, + const char *inbuf); struct debug_view { char name[DEBUG_MAX_NAME_LEN]; debug_prolog_proc_t *prolog_proc; @@ -114,6 +118,9 @@ debug_info_t *debug_register_mode(const char *name, int pages, int nr_areas, int buf_size, umode_t mode, uid_t uid, gid_t gid); +ssize_t debug_dump(debug_info_t *id, struct debug_view *view, + char *buf, size_t buf_size, bool reverse); + void debug_unregister(debug_info_t *id); void debug_set_level(debug_info_t *id, int new_level); diff --git a/arch/s390/include/asm/diag.h b/arch/s390/include/asm/diag.h index e1316e181230..8db8db3b1018 100644 --- a/arch/s390/include/asm/diag.h +++ b/arch/s390/include/asm/diag.h @@ -36,8 +36,10 @@ enum diag_stat_enum { DIAG_STAT_X2FC, DIAG_STAT_X304, DIAG_STAT_X308, + DIAG_STAT_X310, DIAG_STAT_X318, DIAG_STAT_X320, + DIAG_STAT_X324, DIAG_STAT_X49C, DIAG_STAT_X500, NR_DIAG_STAT @@ -64,7 +66,7 @@ static inline void diag10_range(unsigned long start_pfn, unsigned long num_pfn) end_addr = pfn_to_phys(start_pfn + num_pfn - 1); diag_stat_inc(DIAG_STAT_X010); - asm volatile( + asm_inline volatile( "0: diag %0,%1,0x10\n" "1: nopr %%r7\n" EX_TABLE(0b, 1b) diff --git a/arch/s390/include/asm/diag288.h b/arch/s390/include/asm/diag288.h new file mode 100644 index 000000000000..5e1b43cea9d6 --- /dev/null +++ b/arch/s390/include/asm/diag288.h @@ -0,0 +1,41 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef _ASM_S390_DIAG288_H +#define _ASM_S390_DIAG288_H + +#include <asm/asm-extable.h> +#include <asm/types.h> + +#define MIN_INTERVAL 15 /* Minimal time supported by diag288 */ +#define MAX_INTERVAL 3600 /* One hour should be enough - pure estimation */ + +#define WDT_DEFAULT_TIMEOUT 30 + +/* Function codes - init, change, cancel */ +#define WDT_FUNC_INIT 0 +#define WDT_FUNC_CHANGE 1 +#define WDT_FUNC_CANCEL 2 +#define WDT_FUNC_CONCEAL 0x80000000 + +/* Action codes for LPAR watchdog */ +#define LPARWDT_RESTART 0 + +static inline int __diag288(unsigned int func, unsigned int timeout, + unsigned long action, unsigned int len) +{ + union register_pair r1 = { .even = func, .odd = timeout, }; + union register_pair r3 = { .even = action, .odd = len, }; + int rc = -EINVAL; + + asm volatile( + " diag %[r1],%[r3],0x288\n" + "0: lhi %[rc],0\n" + "1:" + EX_TABLE(0b, 1b) + : [rc] "+d" (rc) + : [r1] "d" (r1.pair), [r3] "d" (r3.pair) + : "cc", "memory"); + return rc; +} + +#endif /* _ASM_S390_DIAG288_H */ diff --git a/arch/s390/include/asm/ebcdic.h b/arch/s390/include/asm/ebcdic.h index efb50fc6866c..7164cb658435 100644 --- a/arch/s390/include/asm/ebcdic.h +++ b/arch/s390/include/asm/ebcdic.h @@ -22,18 +22,18 @@ extern __u8 _ebc_toupper[256]; /* EBCDIC -> uppercase */ static inline void codepage_convert(const __u8 *codepage, volatile char *addr, unsigned long nr) { - if (nr-- <= 0) + if (!nr--) return; asm volatile( - " bras 1,1f\n" - " tr 0(1,%0),0(%2)\n" - "0: tr 0(256,%0),0(%2)\n" + " j 2f\n" + "0: tr 0(1,%0),0(%2)\n" + "1: tr 0(256,%0),0(%2)\n" " la %0,256(%0)\n" - "1: ahi %1,-256\n" - " jnm 0b\n" - " ex %1,0(1)" + "2: aghi %1,-256\n" + " jnm 1b\n" + " exrl %1,0b" : "+&a" (addr), "+&a" (nr) - : "a" (codepage) : "cc", "memory", "1"); + : "a" (codepage) : "cc", "memory"); } #define ASCEBC(addr,nr) codepage_convert(_ascebc, addr, nr) diff --git a/arch/s390/include/asm/elf.h b/arch/s390/include/asm/elf.h index 8f2c23cc52b6..a03df312081e 100644 --- a/arch/s390/include/asm/elf.h +++ b/arch/s390/include/asm/elf.h @@ -158,9 +158,6 @@ enum { #define ELF_DATA ELFDATA2MSB #define ELF_ARCH EM_S390 -/* s390 specific phdr types */ -#define PT_S390_PGSTE 0x70000000 - /* * ELF register definitions.. */ @@ -191,35 +188,6 @@ typedef s390_compat_regs compat_elf_gregset_t; && (x)->e_ident[EI_CLASS] == ELF_CLASS) #define compat_start_thread start_thread31 -struct arch_elf_state { - int rc; -}; - -#define INIT_ARCH_ELF_STATE { .rc = 0 } - -#define arch_check_elf(ehdr, interp, interp_ehdr, state) (0) -#ifdef CONFIG_PGSTE -#define arch_elf_pt_proc(ehdr, phdr, elf, interp, state) \ -({ \ - struct arch_elf_state *_state = state; \ - if ((phdr)->p_type == PT_S390_PGSTE && \ - !page_table_allocate_pgste && \ - !test_thread_flag(TIF_PGSTE) && \ - !current->mm->context.alloc_pgste) { \ - set_thread_flag(TIF_PGSTE); \ - set_pt_regs_flag(task_pt_regs(current), \ - PIF_EXECVE_PGSTE_RESTART); \ - _state->rc = -EAGAIN; \ - } \ - _state->rc; \ -}) -#else -#define arch_elf_pt_proc(ehdr, phdr, elf, interp, state) \ -({ \ - (state)->rc; \ -}) -#endif - /* For SVR4/S390 the function pointer to be registered with `atexit` is passed in R14. */ #define ELF_PLAT_INIT(_r, load_addr) \ diff --git a/arch/s390/include/asm/fprobe.h b/arch/s390/include/asm/fprobe.h new file mode 100644 index 000000000000..5ef600b372f4 --- /dev/null +++ b/arch/s390/include/asm/fprobe.h @@ -0,0 +1,10 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_S390_FPROBE_H +#define _ASM_S390_FPROBE_H + +#include <asm-generic/fprobe.h> + +#undef FPROBE_HEADER_MSB_PATTERN +#define FPROBE_HEADER_MSB_PATTERN 0 + +#endif /* _ASM_S390_FPROBE_H */ diff --git a/arch/s390/include/asm/fpu-insn.h b/arch/s390/include/asm/fpu-insn.h index c1e2e521d9af..f668bffd6dd3 100644 --- a/arch/s390/include/asm/fpu-insn.h +++ b/arch/s390/include/asm/fpu-insn.h @@ -100,19 +100,12 @@ static __always_inline void fpu_lfpc(unsigned int *fpc) */ static inline void fpu_lfpc_safe(unsigned int *fpc) { - u32 tmp; - instrument_read(fpc, sizeof(*fpc)); - asm volatile("\n" - "0: lfpc %[fpc]\n" - "1: nopr %%r7\n" - ".pushsection .fixup, \"ax\"\n" - "2: lghi %[tmp],0\n" - " sfpc %[tmp]\n" - " jg 1b\n" - ".popsection\n" - EX_TABLE(1b, 2b) - : [tmp] "=d" (tmp) + asm_inline volatile( + " lfpc %[fpc]\n" + "0: nopr %%r7\n" + EX_TABLE_FPC(0b, 0b) + : : [fpc] "Q" (*fpc) : "memory"); } @@ -183,33 +176,33 @@ static __always_inline void fpu_vgfmg(u8 v1, u8 v2, u8 v3) : "memory"); } -#ifdef CONFIG_CC_IS_CLANG +#ifdef CONFIG_CC_HAS_ASM_AOR_FORMAT_FLAGS static __always_inline void fpu_vl(u8 v1, const void *vxr) { instrument_read(vxr, sizeof(__vector128)); - asm volatile("\n" - " la 1,%[vxr]\n" - " VL %[v1],0,,1\n" - : - : [vxr] "R" (*(__vector128 *)vxr), - [v1] "I" (v1) - : "memory", "1"); + asm volatile("VL %[v1],%O[vxr],,%R[vxr]\n" + : + : [vxr] "Q" (*(__vector128 *)vxr), + [v1] "I" (v1) + : "memory"); } -#else /* CONFIG_CC_IS_CLANG */ +#else /* CONFIG_CC_HAS_ASM_AOR_FORMAT_FLAGS */ static __always_inline void fpu_vl(u8 v1, const void *vxr) { instrument_read(vxr, sizeof(__vector128)); - asm volatile("VL %[v1],%O[vxr],,%R[vxr]\n" - : - : [vxr] "Q" (*(__vector128 *)vxr), - [v1] "I" (v1) - : "memory"); + asm volatile( + " la 1,%[vxr]\n" + " VL %[v1],0,,1\n" + : + : [vxr] "R" (*(__vector128 *)vxr), + [v1] "I" (v1) + : "memory", "1"); } -#endif /* CONFIG_CC_IS_CLANG */ +#endif /* CONFIG_CC_HAS_ASM_AOR_FORMAT_FLAGS */ static __always_inline void fpu_vleib(u8 v, s16 val, u8 index) { @@ -238,7 +231,7 @@ static __always_inline u64 fpu_vlgvf(u8 v, u16 index) return val; } -#ifdef CONFIG_CC_IS_CLANG +#ifdef CONFIG_CC_HAS_ASM_AOR_FORMAT_FLAGS static __always_inline void fpu_vll(u8 v1, u32 index, const void *vxr) { @@ -246,17 +239,15 @@ static __always_inline void fpu_vll(u8 v1, u32 index, const void *vxr) size = min(index + 1, sizeof(__vector128)); instrument_read(vxr, size); - asm volatile("\n" - " la 1,%[vxr]\n" - " VLL %[v1],%[index],0,1\n" - : - : [vxr] "R" (*(u8 *)vxr), - [index] "d" (index), - [v1] "I" (v1) - : "memory", "1"); + asm volatile("VLL %[v1],%[index],%O[vxr],%R[vxr]\n" + : + : [vxr] "Q" (*(u8 *)vxr), + [index] "d" (index), + [v1] "I" (v1) + : "memory"); } -#else /* CONFIG_CC_IS_CLANG */ +#else /* CONFIG_CC_HAS_ASM_AOR_FORMAT_FLAGS */ static __always_inline void fpu_vll(u8 v1, u32 index, const void *vxr) { @@ -264,17 +255,19 @@ static __always_inline void fpu_vll(u8 v1, u32 index, const void *vxr) size = min(index + 1, sizeof(__vector128)); instrument_read(vxr, size); - asm volatile("VLL %[v1],%[index],%O[vxr],%R[vxr]\n" - : - : [vxr] "Q" (*(u8 *)vxr), - [index] "d" (index), - [v1] "I" (v1) - : "memory"); + asm volatile( + " la 1,%[vxr]\n" + " VLL %[v1],%[index],0,1\n" + : + : [vxr] "R" (*(u8 *)vxr), + [index] "d" (index), + [v1] "I" (v1) + : "memory", "1"); } -#endif /* CONFIG_CC_IS_CLANG */ +#endif /* CONFIG_CC_HAS_ASM_AOR_FORMAT_FLAGS */ -#ifdef CONFIG_CC_IS_CLANG +#ifdef CONFIG_CC_HAS_ASM_AOR_FORMAT_FLAGS #define fpu_vlm(_v1, _v3, _vxrs) \ ({ \ @@ -284,17 +277,15 @@ static __always_inline void fpu_vll(u8 v1, u32 index, const void *vxr) } *_v = (void *)(_vxrs); \ \ instrument_read(_v, size); \ - asm volatile("\n" \ - " la 1,%[vxrs]\n" \ - " VLM %[v1],%[v3],0,1\n" \ - : \ - : [vxrs] "R" (*_v), \ - [v1] "I" (_v1), [v3] "I" (_v3) \ - : "memory", "1"); \ + asm volatile("VLM %[v1],%[v3],%O[vxrs],%R[vxrs]\n" \ + : \ + : [vxrs] "Q" (*_v), \ + [v1] "I" (_v1), [v3] "I" (_v3) \ + : "memory"); \ (_v3) - (_v1) + 1; \ }) -#else /* CONFIG_CC_IS_CLANG */ +#else /* CONFIG_CC_HAS_ASM_AOR_FORMAT_FLAGS */ #define fpu_vlm(_v1, _v3, _vxrs) \ ({ \ @@ -304,15 +295,17 @@ static __always_inline void fpu_vll(u8 v1, u32 index, const void *vxr) } *_v = (void *)(_vxrs); \ \ instrument_read(_v, size); \ - asm volatile("VLM %[v1],%[v3],%O[vxrs],%R[vxrs]\n" \ - : \ - : [vxrs] "Q" (*_v), \ - [v1] "I" (_v1), [v3] "I" (_v3) \ - : "memory"); \ + asm volatile( \ + " la 1,%[vxrs]\n" \ + " VLM %[v1],%[v3],0,1\n" \ + : \ + : [vxrs] "R" (*_v), \ + [v1] "I" (_v1), [v3] "I" (_v3) \ + : "memory", "1"); \ (_v3) - (_v1) + 1; \ }) -#endif /* CONFIG_CC_IS_CLANG */ +#endif /* CONFIG_CC_HAS_ASM_AOR_FORMAT_FLAGS */ static __always_inline void fpu_vlr(u8 v1, u8 v2) { @@ -362,33 +355,33 @@ static __always_inline void fpu_vsrlb(u8 v1, u8 v2, u8 v3) : "memory"); } -#ifdef CONFIG_CC_IS_CLANG +#ifdef CONFIG_CC_HAS_ASM_AOR_FORMAT_FLAGS static __always_inline void fpu_vst(u8 v1, const void *vxr) { instrument_write(vxr, sizeof(__vector128)); - asm volatile("\n" - " la 1,%[vxr]\n" - " VST %[v1],0,,1\n" - : [vxr] "=R" (*(__vector128 *)vxr) - : [v1] "I" (v1) - : "memory", "1"); + asm volatile("VST %[v1],%O[vxr],,%R[vxr]\n" + : [vxr] "=Q" (*(__vector128 *)vxr) + : [v1] "I" (v1) + : "memory"); } -#else /* CONFIG_CC_IS_CLANG */ +#else /* CONFIG_CC_HAS_ASM_AOR_FORMAT_FLAGS */ static __always_inline void fpu_vst(u8 v1, const void *vxr) { instrument_write(vxr, sizeof(__vector128)); - asm volatile("VST %[v1],%O[vxr],,%R[vxr]\n" - : [vxr] "=Q" (*(__vector128 *)vxr) - : [v1] "I" (v1) - : "memory"); + asm volatile( + " la 1,%[vxr]\n" + " VST %[v1],0,,1\n" + : [vxr] "=R" (*(__vector128 *)vxr) + : [v1] "I" (v1) + : "memory", "1"); } -#endif /* CONFIG_CC_IS_CLANG */ +#endif /* CONFIG_CC_HAS_ASM_AOR_FORMAT_FLAGS */ -#ifdef CONFIG_CC_IS_CLANG +#ifdef CONFIG_CC_HAS_ASM_AOR_FORMAT_FLAGS static __always_inline void fpu_vstl(u8 v1, u32 index, const void *vxr) { @@ -396,15 +389,13 @@ static __always_inline void fpu_vstl(u8 v1, u32 index, const void *vxr) size = min(index + 1, sizeof(__vector128)); instrument_write(vxr, size); - asm volatile("\n" - " la 1,%[vxr]\n" - " VSTL %[v1],%[index],0,1\n" - : [vxr] "=R" (*(u8 *)vxr) - : [index] "d" (index), [v1] "I" (v1) - : "memory", "1"); + asm volatile("VSTL %[v1],%[index],%O[vxr],%R[vxr]\n" + : [vxr] "=Q" (*(u8 *)vxr) + : [index] "d" (index), [v1] "I" (v1) + : "memory"); } -#else /* CONFIG_CC_IS_CLANG */ +#else /* CONFIG_CC_HAS_ASM_AOR_FORMAT_FLAGS */ static __always_inline void fpu_vstl(u8 v1, u32 index, const void *vxr) { @@ -412,15 +403,17 @@ static __always_inline void fpu_vstl(u8 v1, u32 index, const void *vxr) size = min(index + 1, sizeof(__vector128)); instrument_write(vxr, size); - asm volatile("VSTL %[v1],%[index],%O[vxr],%R[vxr]\n" - : [vxr] "=Q" (*(u8 *)vxr) - : [index] "d" (index), [v1] "I" (v1) - : "memory"); + asm volatile( + " la 1,%[vxr]\n" + " VSTL %[v1],%[index],0,1\n" + : [vxr] "=R" (*(u8 *)vxr) + : [index] "d" (index), [v1] "I" (v1) + : "memory", "1"); } -#endif /* CONFIG_CC_IS_CLANG */ +#endif /* CONFIG_CC_HAS_ASM_AOR_FORMAT_FLAGS */ -#ifdef CONFIG_CC_IS_CLANG +#ifdef CONFIG_CC_HAS_ASM_AOR_FORMAT_FLAGS #define fpu_vstm(_v1, _v3, _vxrs) \ ({ \ @@ -430,16 +423,14 @@ static __always_inline void fpu_vstl(u8 v1, u32 index, const void *vxr) } *_v = (void *)(_vxrs); \ \ instrument_write(_v, size); \ - asm volatile("\n" \ - " la 1,%[vxrs]\n" \ - " VSTM %[v1],%[v3],0,1\n" \ - : [vxrs] "=R" (*_v) \ - : [v1] "I" (_v1), [v3] "I" (_v3) \ - : "memory", "1"); \ + asm volatile("VSTM %[v1],%[v3],%O[vxrs],%R[vxrs]\n" \ + : [vxrs] "=Q" (*_v) \ + : [v1] "I" (_v1), [v3] "I" (_v3) \ + : "memory"); \ (_v3) - (_v1) + 1; \ }) -#else /* CONFIG_CC_IS_CLANG */ +#else /* CONFIG_CC_HAS_ASM_AOR_FORMAT_FLAGS */ #define fpu_vstm(_v1, _v3, _vxrs) \ ({ \ @@ -449,14 +440,16 @@ static __always_inline void fpu_vstl(u8 v1, u32 index, const void *vxr) } *_v = (void *)(_vxrs); \ \ instrument_write(_v, size); \ - asm volatile("VSTM %[v1],%[v3],%O[vxrs],%R[vxrs]\n" \ - : [vxrs] "=Q" (*_v) \ - : [v1] "I" (_v1), [v3] "I" (_v3) \ - : "memory"); \ + asm volatile( \ + " la 1,%[vxrs]\n" \ + " VSTM %[v1],%[v3],0,1\n" \ + : [vxrs] "=R" (*_v) \ + : [v1] "I" (_v1), [v3] "I" (_v3) \ + : "memory", "1"); \ (_v3) - (_v1) + 1; \ }) -#endif /* CONFIG_CC_IS_CLANG */ +#endif /* CONFIG_CC_HAS_ASM_AOR_FORMAT_FLAGS */ static __always_inline void fpu_vupllf(u8 v1, u8 v2) { diff --git a/arch/s390/include/asm/fpu.h b/arch/s390/include/asm/fpu.h index c84cb33913e2..960c6c67ad6c 100644 --- a/arch/s390/include/asm/fpu.h +++ b/arch/s390/include/asm/fpu.h @@ -44,6 +44,7 @@ #ifndef _ASM_S390_FPU_H #define _ASM_S390_FPU_H +#include <linux/cpufeature.h> #include <linux/processor.h> #include <linux/preempt.h> #include <linux/string.h> @@ -51,12 +52,6 @@ #include <asm/sigcontext.h> #include <asm/fpu-types.h> #include <asm/fpu-insn.h> -#include <asm/facility.h> - -static inline bool cpu_has_vx(void) -{ - return likely(test_facility(129)); -} enum { KERNEL_FPC_BIT = 0, diff --git a/arch/s390/include/asm/ftrace.h b/arch/s390/include/asm/ftrace.h index fc97d75dc752..185331e91f83 100644 --- a/arch/s390/include/asm/ftrace.h +++ b/arch/s390/include/asm/ftrace.h @@ -39,6 +39,7 @@ struct dyn_arch_ftrace { }; struct module; struct dyn_ftrace; +struct ftrace_ops; bool ftrace_need_init_nop(void); #define ftrace_need_init_nop ftrace_need_init_nop @@ -50,6 +51,7 @@ static inline unsigned long ftrace_call_adjust(unsigned long addr) { return addr; } +#define ftrace_get_symaddr(fentry_ip) ((unsigned long)(fentry_ip)) #include <linux/ftrace_regs.h> @@ -62,30 +64,32 @@ static __always_inline struct pt_regs *arch_ftrace_get_regs(struct ftrace_regs * return NULL; } -#ifdef CONFIG_FUNCTION_GRAPH_TRACER -struct fgraph_ret_regs { - unsigned long gpr2; - unsigned long fp; -}; - -static __always_inline unsigned long fgraph_ret_regs_return_value(struct fgraph_ret_regs *ret_regs) +static __always_inline void +ftrace_regs_set_instruction_pointer(struct ftrace_regs *fregs, + unsigned long ip) { - return ret_regs->gpr2; + arch_ftrace_regs(fregs)->regs.psw.addr = ip; } -static __always_inline unsigned long fgraph_ret_regs_frame_pointer(struct fgraph_ret_regs *ret_regs) +#undef ftrace_regs_get_frame_pointer +static __always_inline unsigned long +ftrace_regs_get_frame_pointer(struct ftrace_regs *fregs) { - return ret_regs->fp; + return ftrace_regs_get_stack_pointer(fregs); } -#endif /* CONFIG_FUNCTION_GRAPH_TRACER */ -static __always_inline void -ftrace_regs_set_instruction_pointer(struct ftrace_regs *fregs, - unsigned long ip) +static __always_inline unsigned long +ftrace_regs_get_return_address(const struct ftrace_regs *fregs) { - arch_ftrace_regs(fregs)->regs.psw.addr = ip; + return arch_ftrace_regs(fregs)->regs.gprs[14]; } +#define arch_ftrace_fill_perf_regs(fregs, _regs) do { \ + (_regs)->psw.mask = 0; \ + (_regs)->psw.addr = arch_ftrace_regs(fregs)->regs.psw.addr; \ + (_regs)->gprs[15] = arch_ftrace_regs(fregs)->regs.gprs[15]; \ + } while (0) + #ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS /* * When an ftrace registered caller is tracing a function that is @@ -126,6 +130,10 @@ static inline bool arch_syscall_match_sym_name(const char *sym, return !strcmp(sym + 7, name) || !strcmp(sym + 8, name); } +void ftrace_graph_func(unsigned long ip, unsigned long parent_ip, + struct ftrace_ops *op, struct ftrace_regs *fregs); +#define ftrace_graph_func ftrace_graph_func + #endif /* __ASSEMBLY__ */ #ifdef CONFIG_FUNCTION_TRACER diff --git a/arch/s390/include/asm/futex.h b/arch/s390/include/asm/futex.h index eaeaeb3ff0be..942f21c39697 100644 --- a/arch/s390/include/asm/futex.h +++ b/arch/s390/include/asm/futex.h @@ -2,80 +2,101 @@ #ifndef _ASM_S390_FUTEX_H #define _ASM_S390_FUTEX_H +#include <linux/instrumented.h> #include <linux/uaccess.h> #include <linux/futex.h> #include <asm/asm-extable.h> #include <asm/mmu_context.h> #include <asm/errno.h> -#define __futex_atomic_op(insn, ret, oldval, newval, uaddr, oparg) \ - asm volatile( \ - " sacf 256\n" \ - "0: l %1,0(%6)\n" \ - "1:"insn \ - "2: cs %1,%2,0(%6)\n" \ - "3: jl 1b\n" \ - " lhi %0,0\n" \ - "4: sacf 768\n" \ - EX_TABLE(0b,4b) EX_TABLE(1b,4b) \ - EX_TABLE(2b,4b) EX_TABLE(3b,4b) \ - : "=d" (ret), "=&d" (oldval), "=&d" (newval), \ - "=m" (*uaddr) \ - : "0" (-EFAULT), "d" (oparg), "a" (uaddr), \ - "m" (*uaddr) : "cc"); +#define FUTEX_OP_FUNC(name, insn) \ +static uaccess_kmsan_or_inline int \ +__futex_atomic_##name(int oparg, int *old, u32 __user *uaddr) \ +{ \ + bool sacf_flag; \ + int rc, new; \ + \ + instrument_copy_from_user_before(old, uaddr, sizeof(*old)); \ + sacf_flag = enable_sacf_uaccess(); \ + asm_inline volatile( \ + " sacf 256\n" \ + "0: l %[old],%[uaddr]\n" \ + "1:"insn \ + "2: cs %[old],%[new],%[uaddr]\n" \ + "3: jl 1b\n" \ + " lhi %[rc],0\n" \ + "4: sacf 768\n" \ + EX_TABLE_UA_FAULT(0b, 4b, %[rc]) \ + EX_TABLE_UA_FAULT(1b, 4b, %[rc]) \ + EX_TABLE_UA_FAULT(2b, 4b, %[rc]) \ + EX_TABLE_UA_FAULT(3b, 4b, %[rc]) \ + : [rc] "=d" (rc), [old] "=&d" (*old), \ + [new] "=&d" (new), [uaddr] "+Q" (*uaddr) \ + : [oparg] "d" (oparg) \ + : "cc"); \ + disable_sacf_uaccess(sacf_flag); \ + if (!rc) \ + instrument_copy_from_user_after(old, uaddr, sizeof(*old), 0); \ + return rc; \ +} + +FUTEX_OP_FUNC(set, "lr %[new],%[oparg]\n") +FUTEX_OP_FUNC(add, "lr %[new],%[old]\n ar %[new],%[oparg]\n") +FUTEX_OP_FUNC(or, "lr %[new],%[old]\n or %[new],%[oparg]\n") +FUTEX_OP_FUNC(and, "lr %[new],%[old]\n nr %[new],%[oparg]\n") +FUTEX_OP_FUNC(xor, "lr %[new],%[old]\n xr %[new],%[oparg]\n") -static inline int arch_futex_atomic_op_inuser(int op, int oparg, int *oval, - u32 __user *uaddr) +static inline +int arch_futex_atomic_op_inuser(int op, int oparg, int *oval, u32 __user *uaddr) { - int oldval = 0, newval, ret; + int old, rc; switch (op) { case FUTEX_OP_SET: - __futex_atomic_op("lr %2,%5\n", - ret, oldval, newval, uaddr, oparg); + rc = __futex_atomic_set(oparg, &old, uaddr); break; case FUTEX_OP_ADD: - __futex_atomic_op("lr %2,%1\nar %2,%5\n", - ret, oldval, newval, uaddr, oparg); + rc = __futex_atomic_add(oparg, &old, uaddr); break; case FUTEX_OP_OR: - __futex_atomic_op("lr %2,%1\nor %2,%5\n", - ret, oldval, newval, uaddr, oparg); + rc = __futex_atomic_or(oparg, &old, uaddr); break; case FUTEX_OP_ANDN: - __futex_atomic_op("lr %2,%1\nnr %2,%5\n", - ret, oldval, newval, uaddr, oparg); + rc = __futex_atomic_and(~oparg, &old, uaddr); break; case FUTEX_OP_XOR: - __futex_atomic_op("lr %2,%1\nxr %2,%5\n", - ret, oldval, newval, uaddr, oparg); + rc = __futex_atomic_xor(oparg, &old, uaddr); break; default: - ret = -ENOSYS; + rc = -ENOSYS; } - - if (!ret) - *oval = oldval; - - return ret; + if (!rc) + *oval = old; + return rc; } -static inline int futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, - u32 oldval, u32 newval) +static uaccess_kmsan_or_inline +int futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, u32 oldval, u32 newval) { - int ret; + bool sacf_flag; + int rc; - asm volatile( - " sacf 256\n" - "0: cs %1,%4,0(%5)\n" - "1: la %0,0\n" - "2: sacf 768\n" - EX_TABLE(0b,2b) EX_TABLE(1b,2b) - : "=d" (ret), "+d" (oldval), "=m" (*uaddr) - : "0" (-EFAULT), "d" (newval), "a" (uaddr), "m" (*uaddr) + instrument_copy_from_user_before(uval, uaddr, sizeof(*uval)); + sacf_flag = enable_sacf_uaccess(); + asm_inline volatile( + " sacf 256\n" + "0: cs %[old],%[new],%[uaddr]\n" + "1: lhi %[rc],0\n" + "2: sacf 768\n" + EX_TABLE_UA_FAULT(0b, 2b, %[rc]) + EX_TABLE_UA_FAULT(1b, 2b, %[rc]) + : [rc] "=d" (rc), [old] "+d" (oldval), [uaddr] "+Q" (*uaddr) + : [new] "d" (newval) : "cc", "memory"); + disable_sacf_uaccess(sacf_flag); *uval = oldval; - return ret; + instrument_copy_from_user_after(uval, uaddr, sizeof(*uval), 0); + return rc; } #endif /* _ASM_S390_FUTEX_H */ diff --git a/arch/s390/include/asm/gmap.h b/arch/s390/include/asm/gmap.h index 13f51a6a5bb1..66c5808fd011 100644 --- a/arch/s390/include/asm/gmap.h +++ b/arch/s390/include/asm/gmap.h @@ -23,7 +23,6 @@ /** * struct gmap_struct - guest address space * @list: list head for the mm->context gmap list - * @crst_list: list of all crst tables used in the guest address space * @mm: pointer to the parent mm_struct * @guest_to_host: radix tree with guest to host address translation * @host_to_guest: radix tree with pointer to segment table entries @@ -35,7 +34,6 @@ * @guest_handle: protected virtual machine handle for the ultravisor * @host_to_rmap: radix tree with gmap_rmap lists * @children: list of shadow gmap structures - * @pt_list: list of all page tables used in the shadow guest address space * @shadow_lock: spinlock to protect the shadow gmap list * @parent: pointer to the parent gmap for shadow guest address spaces * @orig_asce: ASCE for which the shadow page table has been created @@ -45,7 +43,6 @@ */ struct gmap { struct list_head list; - struct list_head crst_list; struct mm_struct *mm; struct radix_tree_root guest_to_host; struct radix_tree_root host_to_guest; @@ -61,7 +58,6 @@ struct gmap { /* Additional data for shadow guest address spaces */ struct radix_tree_root host_to_rmap; struct list_head children; - struct list_head pt_list; spinlock_t shadow_lock; struct gmap *parent; unsigned long orig_asce; @@ -106,23 +102,20 @@ struct gmap *gmap_create(struct mm_struct *mm, unsigned long limit); void gmap_remove(struct gmap *gmap); struct gmap *gmap_get(struct gmap *gmap); void gmap_put(struct gmap *gmap); +void gmap_free(struct gmap *gmap); +struct gmap *gmap_alloc(unsigned long limit); int gmap_map_segment(struct gmap *gmap, unsigned long from, unsigned long to, unsigned long len); int gmap_unmap_segment(struct gmap *gmap, unsigned long to, unsigned long len); unsigned long __gmap_translate(struct gmap *, unsigned long gaddr); -unsigned long gmap_translate(struct gmap *, unsigned long gaddr); int __gmap_link(struct gmap *gmap, unsigned long gaddr, unsigned long vmaddr); -int gmap_fault(struct gmap *, unsigned long gaddr, unsigned int fault_flags); -void gmap_discard(struct gmap *, unsigned long from, unsigned long to); void __gmap_zap(struct gmap *, unsigned long gaddr); void gmap_unlink(struct mm_struct *, unsigned long *table, unsigned long vmaddr); int gmap_read_table(struct gmap *gmap, unsigned long gaddr, unsigned long *val); -struct gmap *gmap_shadow(struct gmap *parent, unsigned long asce, - int edat_level); -int gmap_shadow_valid(struct gmap *sg, unsigned long asce, int edat_level); +void gmap_unshadow(struct gmap *sg); int gmap_shadow_r2t(struct gmap *sg, unsigned long saddr, unsigned long r2t, int fake); int gmap_shadow_r3t(struct gmap *sg, unsigned long saddr, unsigned long r3t, @@ -131,24 +124,20 @@ int gmap_shadow_sgt(struct gmap *sg, unsigned long saddr, unsigned long sgt, int fake); int gmap_shadow_pgt(struct gmap *sg, unsigned long saddr, unsigned long pgt, int fake); -int gmap_shadow_pgt_lookup(struct gmap *sg, unsigned long saddr, - unsigned long *pgt, int *dat_protection, int *fake); int gmap_shadow_page(struct gmap *sg, unsigned long saddr, pte_t pte); void gmap_register_pte_notifier(struct gmap_notifier *); void gmap_unregister_pte_notifier(struct gmap_notifier *); -int gmap_mprotect_notify(struct gmap *, unsigned long start, - unsigned long len, int prot); +int gmap_protect_one(struct gmap *gmap, unsigned long gaddr, int prot, unsigned long bits); void gmap_sync_dirty_log_pmd(struct gmap *gmap, unsigned long dirty_bitmap[4], unsigned long gaddr, unsigned long vmaddr); -int s390_disable_cow_sharing(void); -void s390_unlist_old_asce(struct gmap *gmap); int s390_replace_asce(struct gmap *gmap); void s390_uv_destroy_pfns(unsigned long count, unsigned long *pfns); int __s390_uv_destroy_range(struct mm_struct *mm, unsigned long start, unsigned long end, bool interruptible); +unsigned long *gmap_table_walk(struct gmap *gmap, unsigned long gaddr, int level); /** * s390_uv_destroy_range - Destroy a range of pages in the given mm. diff --git a/arch/s390/include/asm/gmap_helpers.h b/arch/s390/include/asm/gmap_helpers.h new file mode 100644 index 000000000000..5356446a61c4 --- /dev/null +++ b/arch/s390/include/asm/gmap_helpers.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Helper functions for KVM guest address space mapping code + * + * Copyright IBM Corp. 2025 + */ + +#ifndef _ASM_S390_GMAP_HELPERS_H +#define _ASM_S390_GMAP_HELPERS_H + +void gmap_helper_zap_one_page(struct mm_struct *mm, unsigned long vmaddr); +void gmap_helper_discard(struct mm_struct *mm, unsigned long vmaddr, unsigned long end); +int gmap_helper_disable_cow_sharing(void); + +#endif /* _ASM_S390_GMAP_HELPERS_H */ diff --git a/arch/s390/include/asm/hugetlb.h b/arch/s390/include/asm/hugetlb.h index a40664b236e9..931fcc413598 100644 --- a/arch/s390/include/asm/hugetlb.h +++ b/arch/s390/include/asm/hugetlb.h @@ -9,23 +9,33 @@ #ifndef _ASM_S390_HUGETLB_H #define _ASM_S390_HUGETLB_H +#include <linux/cpufeature.h> #include <linux/pgtable.h> #include <linux/swap.h> #include <linux/swapops.h> #include <asm/page.h> -#define hugepages_supported() (MACHINE_HAS_EDAT1) +#define hugepages_supported() cpu_has_edat1() #define __HAVE_ARCH_HUGE_SET_HUGE_PTE_AT void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte, unsigned long sz); void __set_huge_pte_at(struct mm_struct *mm, unsigned long addr, - pte_t *ptep, pte_t pte); + pte_t *ptep, pte_t pte); + #define __HAVE_ARCH_HUGE_PTEP_GET -extern pte_t huge_ptep_get(struct mm_struct *mm, unsigned long addr, pte_t *ptep); +pte_t huge_ptep_get(struct mm_struct *mm, unsigned long addr, pte_t *ptep); + +pte_t __huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, + pte_t *ptep); + #define __HAVE_ARCH_HUGE_PTEP_GET_AND_CLEAR -extern pte_t huge_ptep_get_and_clear(struct mm_struct *mm, - unsigned long addr, pte_t *ptep); +static inline pte_t huge_ptep_get_and_clear(struct mm_struct *mm, + unsigned long addr, pte_t *ptep, + unsigned long sz) +{ + return __huge_ptep_get_and_clear(mm, addr, ptep); +} static inline void arch_clear_hugetlb_flags(struct folio *folio) { @@ -47,7 +57,7 @@ static inline void huge_pte_clear(struct mm_struct *mm, unsigned long addr, static inline pte_t huge_ptep_clear_flush(struct vm_area_struct *vma, unsigned long address, pte_t *ptep) { - return huge_ptep_get_and_clear(vma->vm_mm, address, ptep); + return __huge_ptep_get_and_clear(vma->vm_mm, address, ptep); } #define __HAVE_ARCH_HUGE_PTEP_SET_ACCESS_FLAGS @@ -56,8 +66,9 @@ static inline int huge_ptep_set_access_flags(struct vm_area_struct *vma, pte_t pte, int dirty) { int changed = !pte_same(huge_ptep_get(vma->vm_mm, addr, ptep), pte); + if (changed) { - huge_ptep_get_and_clear(vma->vm_mm, addr, ptep); + __huge_ptep_get_and_clear(vma->vm_mm, addr, ptep); __set_huge_pte_at(vma->vm_mm, addr, ptep, pte); } return changed; @@ -67,20 +78,9 @@ static inline int huge_ptep_set_access_flags(struct vm_area_struct *vma, static inline void huge_ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep) { - pte_t pte = huge_ptep_get_and_clear(mm, addr, ptep); - __set_huge_pte_at(mm, addr, ptep, pte_wrprotect(pte)); -} - -#define __HAVE_ARCH_HUGE_PTE_NONE -static inline int huge_pte_none(pte_t pte) -{ - return pte_none(pte); -} + pte_t pte = __huge_ptep_get_and_clear(mm, addr, ptep); -#define __HAVE_ARCH_HUGE_PTE_NONE_MOSTLY -static inline int huge_pte_none_mostly(pte_t pte) -{ - return huge_pte_none(pte) || is_pte_marker(pte); + __set_huge_pte_at(mm, addr, ptep, pte_wrprotect(pte)); } #define __HAVE_ARCH_HUGE_PTE_MKUFFD_WP diff --git a/arch/s390/include/asm/io.h b/arch/s390/include/asm/io.h index fc9933a743d6..faddb9aef3b8 100644 --- a/arch/s390/include/asm/io.h +++ b/arch/s390/include/asm/io.h @@ -33,9 +33,7 @@ void unxlate_dev_mem_ptr(phys_addr_t phys, void *addr); #define _PAGE_IOREMAP pgprot_val(PAGE_KERNEL) #define ioremap_wc(addr, size) \ - ioremap_prot((addr), (size), pgprot_val(pgprot_writecombine(PAGE_KERNEL))) -#define ioremap_wt(addr, size) \ - ioremap_prot((addr), (size), pgprot_val(pgprot_writethrough(PAGE_KERNEL))) + ioremap_prot((addr), (size), pgprot_writecombine(PAGE_KERNEL)) static inline void __iomem *ioport_map(unsigned long port, unsigned int nr) { diff --git a/arch/s390/include/asm/irq.h b/arch/s390/include/asm/irq.h index d9e705f4a697..bde6a496df5f 100644 --- a/arch/s390/include/asm/irq.h +++ b/arch/s390/include/asm/irq.h @@ -54,7 +54,6 @@ enum interruption_class { IRQIO_C70, IRQIO_TAP, IRQIO_VMR, - IRQIO_LCS, IRQIO_CTC, IRQIO_ADM, IRQIO_CSC, diff --git a/arch/s390/include/asm/kfence.h b/arch/s390/include/asm/kfence.h index e47fd8cbe701..e95e35eb8a3f 100644 --- a/arch/s390/include/asm/kfence.h +++ b/arch/s390/include/asm/kfence.h @@ -12,27 +12,16 @@ void __kernel_map_pages(struct page *page, int numpages, int enable); static __always_inline bool arch_kfence_init_pool(void) { - return true; -} - -#define arch_kfence_test_address(addr) ((addr) & PAGE_MASK) - -/* - * Do not split kfence pool to 4k mapping with arch_kfence_init_pool(), - * but earlier where page table allocations still happen with memblock. - * Reason is that arch_kfence_init_pool() gets called when the system - * is still in a limbo state - disabling and enabling bottom halves is - * not yet allowed, but that is what our page_table_alloc() would do. - */ -static __always_inline void kfence_split_mapping(void) -{ #ifdef CONFIG_KFENCE unsigned long pool_pages = KFENCE_POOL_SIZE >> PAGE_SHIFT; set_memory_4k((unsigned long)__kfence_pool, pool_pages); #endif + return true; } +#define arch_kfence_test_address(addr) ((addr) & PAGE_MASK) + static inline bool kfence_protect_page(unsigned long addr, bool protect) { __kernel_map_pages(virt_to_page((void *)addr), 1, !protect); diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index 97c7c8127543..cb89e54ada25 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -20,16 +20,17 @@ #include <linux/module.h> #include <linux/pci.h> #include <linux/mmu_notifier.h> +#include <asm/kvm_host_types.h> #include <asm/debug.h> #include <asm/cpu.h> #include <asm/fpu.h> #include <asm/isc.h> #include <asm/guarded_storage.h> -#define KVM_S390_BSCA_CPU_SLOTS 64 -#define KVM_S390_ESCA_CPU_SLOTS 248 #define KVM_MAX_VCPUS 255 +#define KVM_INTERNAL_MEM_SLOTS 1 + /* * These seem to be used for allocating ->chip in the routing table, which we * don't use. 1 is as small as we can get to reduce the needed memory. If we @@ -49,342 +50,6 @@ #define KVM_REQ_REFRESH_GUEST_PREFIX \ KVM_ARCH_REQ_FLAGS(6, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP) -#define SIGP_CTRL_C 0x80 -#define SIGP_CTRL_SCN_MASK 0x3f - -union bsca_sigp_ctrl { - __u8 value; - struct { - __u8 c : 1; - __u8 r : 1; - __u8 scn : 6; - }; -}; - -union esca_sigp_ctrl { - __u16 value; - struct { - __u8 c : 1; - __u8 reserved: 7; - __u8 scn; - }; -}; - -struct esca_entry { - union esca_sigp_ctrl sigp_ctrl; - __u16 reserved1[3]; - __u64 sda; - __u64 reserved2[6]; -}; - -struct bsca_entry { - __u8 reserved0; - union bsca_sigp_ctrl sigp_ctrl; - __u16 reserved[3]; - __u64 sda; - __u64 reserved2[2]; -}; - -union ipte_control { - unsigned long val; - struct { - unsigned long k : 1; - unsigned long kh : 31; - unsigned long kg : 32; - }; -}; - -/* - * Utility is defined as two bytes but having it four bytes wide - * generates more efficient code. Since the following bytes are - * reserved this makes no functional difference. - */ -union sca_utility { - __u32 val; - struct { - __u32 mtcr : 1; - __u32 : 31; - }; -}; - -struct bsca_block { - union ipte_control ipte_control; - __u64 reserved[5]; - __u64 mcn; - union sca_utility utility; - __u8 reserved2[4]; - struct bsca_entry cpu[KVM_S390_BSCA_CPU_SLOTS]; -}; - -struct esca_block { - union ipte_control ipte_control; - __u64 reserved1[6]; - union sca_utility utility; - __u8 reserved2[4]; - __u64 mcn[4]; - __u64 reserved3[20]; - struct esca_entry cpu[KVM_S390_ESCA_CPU_SLOTS]; -}; - -/* - * This struct is used to store some machine check info from lowcore - * for machine checks that happen while the guest is running. - * This info in host's lowcore might be overwritten by a second machine - * check from host when host is in the machine check's high-level handling. - * The size is 24 bytes. - */ -struct mcck_volatile_info { - __u64 mcic; - __u64 failing_storage_address; - __u32 ext_damage_code; - __u32 reserved; -}; - -#define CR0_INITIAL_MASK (CR0_UNUSED_56 | CR0_INTERRUPT_KEY_SUBMASK | \ - CR0_MEASUREMENT_ALERT_SUBMASK) -#define CR14_INITIAL_MASK (CR14_UNUSED_32 | CR14_UNUSED_33 | \ - CR14_EXTERNAL_DAMAGE_SUBMASK) - -#define SIDAD_SIZE_MASK 0xff -#define sida_addr(sie_block) phys_to_virt((sie_block)->sidad & PAGE_MASK) -#define sida_size(sie_block) \ - ((((sie_block)->sidad & SIDAD_SIZE_MASK) + 1) * PAGE_SIZE) - -#define CPUSTAT_STOPPED 0x80000000 -#define CPUSTAT_WAIT 0x10000000 -#define CPUSTAT_ECALL_PEND 0x08000000 -#define CPUSTAT_STOP_INT 0x04000000 -#define CPUSTAT_IO_INT 0x02000000 -#define CPUSTAT_EXT_INT 0x01000000 -#define CPUSTAT_RUNNING 0x00800000 -#define CPUSTAT_RETAINED 0x00400000 -#define CPUSTAT_TIMING_SUB 0x00020000 -#define CPUSTAT_SIE_SUB 0x00010000 -#define CPUSTAT_RRF 0x00008000 -#define CPUSTAT_SLSV 0x00004000 -#define CPUSTAT_SLSR 0x00002000 -#define CPUSTAT_ZARCH 0x00000800 -#define CPUSTAT_MCDS 0x00000100 -#define CPUSTAT_KSS 0x00000200 -#define CPUSTAT_SM 0x00000080 -#define CPUSTAT_IBS 0x00000040 -#define CPUSTAT_GED2 0x00000010 -#define CPUSTAT_G 0x00000008 -#define CPUSTAT_GED 0x00000004 -#define CPUSTAT_J 0x00000002 -#define CPUSTAT_P 0x00000001 - -struct kvm_s390_sie_block { - atomic_t cpuflags; /* 0x0000 */ - __u32 : 1; /* 0x0004 */ - __u32 prefix : 18; - __u32 : 1; - __u32 ibc : 12; - __u8 reserved08[4]; /* 0x0008 */ -#define PROG_IN_SIE (1<<0) - __u32 prog0c; /* 0x000c */ - union { - __u8 reserved10[16]; /* 0x0010 */ - struct { - __u64 pv_handle_cpu; - __u64 pv_handle_config; - }; - }; -#define PROG_BLOCK_SIE (1<<0) -#define PROG_REQUEST (1<<1) - atomic_t prog20; /* 0x0020 */ - __u8 reserved24[4]; /* 0x0024 */ - __u64 cputm; /* 0x0028 */ - __u64 ckc; /* 0x0030 */ - __u64 epoch; /* 0x0038 */ - __u32 svcc; /* 0x0040 */ -#define LCTL_CR0 0x8000 -#define LCTL_CR6 0x0200 -#define LCTL_CR9 0x0040 -#define LCTL_CR10 0x0020 -#define LCTL_CR11 0x0010 -#define LCTL_CR14 0x0002 - __u16 lctl; /* 0x0044 */ - __s16 icpua; /* 0x0046 */ -#define ICTL_OPEREXC 0x80000000 -#define ICTL_PINT 0x20000000 -#define ICTL_LPSW 0x00400000 -#define ICTL_STCTL 0x00040000 -#define ICTL_ISKE 0x00004000 -#define ICTL_SSKE 0x00002000 -#define ICTL_RRBE 0x00001000 -#define ICTL_TPROT 0x00000200 - __u32 ictl; /* 0x0048 */ -#define ECA_CEI 0x80000000 -#define ECA_IB 0x40000000 -#define ECA_SIGPI 0x10000000 -#define ECA_MVPGI 0x01000000 -#define ECA_AIV 0x00200000 -#define ECA_VX 0x00020000 -#define ECA_PROTEXCI 0x00002000 -#define ECA_APIE 0x00000008 -#define ECA_SII 0x00000001 - __u32 eca; /* 0x004c */ -#define ICPT_INST 0x04 -#define ICPT_PROGI 0x08 -#define ICPT_INSTPROGI 0x0C -#define ICPT_EXTREQ 0x10 -#define ICPT_EXTINT 0x14 -#define ICPT_IOREQ 0x18 -#define ICPT_WAIT 0x1c -#define ICPT_VALIDITY 0x20 -#define ICPT_STOP 0x28 -#define ICPT_OPEREXC 0x2C -#define ICPT_PARTEXEC 0x38 -#define ICPT_IOINST 0x40 -#define ICPT_KSS 0x5c -#define ICPT_MCHKREQ 0x60 -#define ICPT_INT_ENABLE 0x64 -#define ICPT_PV_INSTR 0x68 -#define ICPT_PV_NOTIFY 0x6c -#define ICPT_PV_PREF 0x70 - __u8 icptcode; /* 0x0050 */ - __u8 icptstatus; /* 0x0051 */ - __u16 ihcpu; /* 0x0052 */ - __u8 reserved54; /* 0x0054 */ -#define IICTL_CODE_NONE 0x00 -#define IICTL_CODE_MCHK 0x01 -#define IICTL_CODE_EXT 0x02 -#define IICTL_CODE_IO 0x03 -#define IICTL_CODE_RESTART 0x04 -#define IICTL_CODE_SPECIFICATION 0x10 -#define IICTL_CODE_OPERAND 0x11 - __u8 iictl; /* 0x0055 */ - __u16 ipa; /* 0x0056 */ - __u32 ipb; /* 0x0058 */ - __u32 scaoh; /* 0x005c */ -#define FPF_BPBC 0x20 - __u8 fpf; /* 0x0060 */ -#define ECB_GS 0x40 -#define ECB_TE 0x10 -#define ECB_SPECI 0x08 -#define ECB_SRSI 0x04 -#define ECB_HOSTPROTINT 0x02 -#define ECB_PTF 0x01 - __u8 ecb; /* 0x0061 */ -#define ECB2_CMMA 0x80 -#define ECB2_IEP 0x20 -#define ECB2_PFMFI 0x08 -#define ECB2_ESCA 0x04 -#define ECB2_ZPCI_LSI 0x02 - __u8 ecb2; /* 0x0062 */ -#define ECB3_AISI 0x20 -#define ECB3_AISII 0x10 -#define ECB3_DEA 0x08 -#define ECB3_AES 0x04 -#define ECB3_RI 0x01 - __u8 ecb3; /* 0x0063 */ -#define ESCA_SCAOL_MASK ~0x3fU - __u32 scaol; /* 0x0064 */ - __u8 sdf; /* 0x0068 */ - __u8 epdx; /* 0x0069 */ - __u8 cpnc; /* 0x006a */ - __u8 reserved6b; /* 0x006b */ - __u32 todpr; /* 0x006c */ -#define GISA_FORMAT1 0x00000001 - __u32 gd; /* 0x0070 */ - __u8 reserved74[12]; /* 0x0074 */ - __u64 mso; /* 0x0080 */ - __u64 msl; /* 0x0088 */ - psw_t gpsw; /* 0x0090 */ - __u64 gg14; /* 0x00a0 */ - __u64 gg15; /* 0x00a8 */ - __u8 reservedb0[8]; /* 0x00b0 */ -#define HPID_KVM 0x4 -#define HPID_VSIE 0x5 - __u8 hpid; /* 0x00b8 */ - __u8 reservedb9[7]; /* 0x00b9 */ - union { - struct { - __u32 eiparams; /* 0x00c0 */ - __u16 extcpuaddr; /* 0x00c4 */ - __u16 eic; /* 0x00c6 */ - }; - __u64 mcic; /* 0x00c0 */ - } __packed; - __u32 reservedc8; /* 0x00c8 */ - union { - struct { - __u16 pgmilc; /* 0x00cc */ - __u16 iprcc; /* 0x00ce */ - }; - __u32 edc; /* 0x00cc */ - } __packed; - union { - struct { - __u32 dxc; /* 0x00d0 */ - __u16 mcn; /* 0x00d4 */ - __u8 perc; /* 0x00d6 */ - __u8 peratmid; /* 0x00d7 */ - }; - __u64 faddr; /* 0x00d0 */ - } __packed; - __u64 peraddr; /* 0x00d8 */ - __u8 eai; /* 0x00e0 */ - __u8 peraid; /* 0x00e1 */ - __u8 oai; /* 0x00e2 */ - __u8 armid; /* 0x00e3 */ - __u8 reservede4[4]; /* 0x00e4 */ - union { - __u64 tecmc; /* 0x00e8 */ - struct { - __u16 subchannel_id; /* 0x00e8 */ - __u16 subchannel_nr; /* 0x00ea */ - __u32 io_int_parm; /* 0x00ec */ - __u32 io_int_word; /* 0x00f0 */ - }; - } __packed; - __u8 reservedf4[8]; /* 0x00f4 */ -#define CRYCB_FORMAT_MASK 0x00000003 -#define CRYCB_FORMAT0 0x00000000 -#define CRYCB_FORMAT1 0x00000001 -#define CRYCB_FORMAT2 0x00000003 - __u32 crycbd; /* 0x00fc */ - __u64 gcr[16]; /* 0x0100 */ - union { - __u64 gbea; /* 0x0180 */ - __u64 sidad; - }; - __u8 reserved188[8]; /* 0x0188 */ - __u64 sdnxo; /* 0x0190 */ - __u8 reserved198[8]; /* 0x0198 */ - __u32 fac; /* 0x01a0 */ - __u8 reserved1a4[20]; /* 0x01a4 */ - __u64 cbrlo; /* 0x01b8 */ - __u8 reserved1c0[8]; /* 0x01c0 */ -#define ECD_HOSTREGMGMT 0x20000000 -#define ECD_MEF 0x08000000 -#define ECD_ETOKENF 0x02000000 -#define ECD_ECC 0x00200000 -#define ECD_HMAC 0x00004000 - __u32 ecd; /* 0x01c8 */ - __u8 reserved1cc[18]; /* 0x01cc */ - __u64 pp; /* 0x01de */ - __u8 reserved1e6[2]; /* 0x01e6 */ - __u64 itdba; /* 0x01e8 */ - __u64 riccbd; /* 0x01f0 */ - __u64 gvrd; /* 0x01f8 */ -} __packed __aligned(512); - -struct kvm_s390_itdb { - __u8 data[256]; -}; - -struct sie_page { - struct kvm_s390_sie_block sie_block; - struct mcck_volatile_info mcck_info; /* 0x0200 */ - __u8 reserved218[360]; /* 0x0218 */ - __u64 pv_grregs[16]; /* 0x0380 */ - __u8 reserved400[512]; /* 0x0400 */ - struct kvm_s390_itdb itdb; /* 0x0600 */ - __u8 reserved700[2304]; /* 0x0700 */ -}; - struct kvm_vcpu_stat { struct kvm_vcpu_stat_generic generic; u64 exit_userspace; @@ -931,12 +596,14 @@ struct sie_page2 { u8 reserved928[0x1000 - 0x928]; /* 0x0928 */ }; +struct vsie_page; + struct kvm_s390_vsie { struct mutex mutex; struct radix_tree_root addr_to_page; int page_count; int next; - struct page *pages[KVM_MAX_VCPUS]; + struct vsie_page *pages[KVM_MAX_VCPUS]; }; struct kvm_s390_gisa_iam { @@ -1052,7 +719,6 @@ bool kvm_s390_pv_cpu_is_protected(struct kvm_vcpu *vcpu); extern int kvm_s390_gisc_register(struct kvm *kvm, u32 gisc); extern int kvm_s390_gisc_unregister(struct kvm *kvm, u32 gisc); -static inline void kvm_arch_sync_events(struct kvm *kvm) {} static inline void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *slot) {} static inline void kvm_arch_memslots_updated(struct kvm *kvm, u64 gen) {} diff --git a/arch/s390/include/asm/kvm_host_types.h b/arch/s390/include/asm/kvm_host_types.h new file mode 100644 index 000000000000..1394d3fb648f --- /dev/null +++ b/arch/s390/include/asm/kvm_host_types.h @@ -0,0 +1,348 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef _ASM_KVM_HOST_TYPES_H +#define _ASM_KVM_HOST_TYPES_H + +#include <linux/atomic.h> +#include <linux/types.h> + +#define KVM_S390_BSCA_CPU_SLOTS 64 +#define KVM_S390_ESCA_CPU_SLOTS 248 + +#define SIGP_CTRL_C 0x80 +#define SIGP_CTRL_SCN_MASK 0x3f + +union bsca_sigp_ctrl { + __u8 value; + struct { + __u8 c : 1; + __u8 r : 1; + __u8 scn : 6; + }; +}; + +union esca_sigp_ctrl { + __u16 value; + struct { + __u8 c : 1; + __u8 reserved: 7; + __u8 scn; + }; +}; + +struct esca_entry { + union esca_sigp_ctrl sigp_ctrl; + __u16 reserved1[3]; + __u64 sda; + __u64 reserved2[6]; +}; + +struct bsca_entry { + __u8 reserved0; + union bsca_sigp_ctrl sigp_ctrl; + __u16 reserved[3]; + __u64 sda; + __u64 reserved2[2]; +}; + +union ipte_control { + unsigned long val; + struct { + unsigned long k : 1; + unsigned long kh : 31; + unsigned long kg : 32; + }; +}; + +/* + * Utility is defined as two bytes but having it four bytes wide + * generates more efficient code. Since the following bytes are + * reserved this makes no functional difference. + */ +union sca_utility { + __u32 val; + struct { + __u32 mtcr : 1; + __u32 : 31; + }; +}; + +struct bsca_block { + union ipte_control ipte_control; + __u64 reserved[5]; + __u64 mcn; + union sca_utility utility; + __u8 reserved2[4]; + struct bsca_entry cpu[KVM_S390_BSCA_CPU_SLOTS]; +}; + +struct esca_block { + union ipte_control ipte_control; + __u64 reserved1[6]; + union sca_utility utility; + __u8 reserved2[4]; + __u64 mcn[4]; + __u64 reserved3[20]; + struct esca_entry cpu[KVM_S390_ESCA_CPU_SLOTS]; +}; + +/* + * This struct is used to store some machine check info from lowcore + * for machine checks that happen while the guest is running. + * This info in host's lowcore might be overwritten by a second machine + * check from host when host is in the machine check's high-level handling. + * The size is 24 bytes. + */ +struct mcck_volatile_info { + __u64 mcic; + __u64 failing_storage_address; + __u32 ext_damage_code; + __u32 reserved; +}; + +#define CR0_INITIAL_MASK (CR0_UNUSED_56 | CR0_INTERRUPT_KEY_SUBMASK | \ + CR0_MEASUREMENT_ALERT_SUBMASK) +#define CR14_INITIAL_MASK (CR14_UNUSED_32 | CR14_UNUSED_33 | \ + CR14_EXTERNAL_DAMAGE_SUBMASK) + +#define SIDAD_SIZE_MASK 0xff +#define sida_addr(sie_block) phys_to_virt((sie_block)->sidad & PAGE_MASK) +#define sida_size(sie_block) \ + ((((sie_block)->sidad & SIDAD_SIZE_MASK) + 1) * PAGE_SIZE) + +#define CPUSTAT_STOPPED 0x80000000 +#define CPUSTAT_WAIT 0x10000000 +#define CPUSTAT_ECALL_PEND 0x08000000 +#define CPUSTAT_STOP_INT 0x04000000 +#define CPUSTAT_IO_INT 0x02000000 +#define CPUSTAT_EXT_INT 0x01000000 +#define CPUSTAT_RUNNING 0x00800000 +#define CPUSTAT_RETAINED 0x00400000 +#define CPUSTAT_TIMING_SUB 0x00020000 +#define CPUSTAT_SIE_SUB 0x00010000 +#define CPUSTAT_RRF 0x00008000 +#define CPUSTAT_SLSV 0x00004000 +#define CPUSTAT_SLSR 0x00002000 +#define CPUSTAT_ZARCH 0x00000800 +#define CPUSTAT_MCDS 0x00000100 +#define CPUSTAT_KSS 0x00000200 +#define CPUSTAT_SM 0x00000080 +#define CPUSTAT_IBS 0x00000040 +#define CPUSTAT_GED2 0x00000010 +#define CPUSTAT_G 0x00000008 +#define CPUSTAT_GED 0x00000004 +#define CPUSTAT_J 0x00000002 +#define CPUSTAT_P 0x00000001 + +struct kvm_s390_sie_block { + atomic_t cpuflags; /* 0x0000 */ + __u32 : 1; /* 0x0004 */ + __u32 prefix : 18; + __u32 : 1; + __u32 ibc : 12; + __u8 reserved08[4]; /* 0x0008 */ +#define PROG_IN_SIE (1<<0) + __u32 prog0c; /* 0x000c */ + union { + __u8 reserved10[16]; /* 0x0010 */ + struct { + __u64 pv_handle_cpu; + __u64 pv_handle_config; + }; + }; +#define PROG_BLOCK_SIE (1<<0) +#define PROG_REQUEST (1<<1) + atomic_t prog20; /* 0x0020 */ + __u8 reserved24[4]; /* 0x0024 */ + __u64 cputm; /* 0x0028 */ + __u64 ckc; /* 0x0030 */ + __u64 epoch; /* 0x0038 */ + __u32 svcc; /* 0x0040 */ +#define LCTL_CR0 0x8000 +#define LCTL_CR6 0x0200 +#define LCTL_CR9 0x0040 +#define LCTL_CR10 0x0020 +#define LCTL_CR11 0x0010 +#define LCTL_CR14 0x0002 + __u16 lctl; /* 0x0044 */ + __s16 icpua; /* 0x0046 */ +#define ICTL_OPEREXC 0x80000000 +#define ICTL_PINT 0x20000000 +#define ICTL_LPSW 0x00400000 +#define ICTL_STCTL 0x00040000 +#define ICTL_ISKE 0x00004000 +#define ICTL_SSKE 0x00002000 +#define ICTL_RRBE 0x00001000 +#define ICTL_TPROT 0x00000200 + __u32 ictl; /* 0x0048 */ +#define ECA_CEI 0x80000000 +#define ECA_IB 0x40000000 +#define ECA_SIGPI 0x10000000 +#define ECA_MVPGI 0x01000000 +#define ECA_AIV 0x00200000 +#define ECA_VX 0x00020000 +#define ECA_PROTEXCI 0x00002000 +#define ECA_APIE 0x00000008 +#define ECA_SII 0x00000001 + __u32 eca; /* 0x004c */ +#define ICPT_INST 0x04 +#define ICPT_PROGI 0x08 +#define ICPT_INSTPROGI 0x0C +#define ICPT_EXTREQ 0x10 +#define ICPT_EXTINT 0x14 +#define ICPT_IOREQ 0x18 +#define ICPT_WAIT 0x1c +#define ICPT_VALIDITY 0x20 +#define ICPT_STOP 0x28 +#define ICPT_OPEREXC 0x2C +#define ICPT_PARTEXEC 0x38 +#define ICPT_IOINST 0x40 +#define ICPT_KSS 0x5c +#define ICPT_MCHKREQ 0x60 +#define ICPT_INT_ENABLE 0x64 +#define ICPT_PV_INSTR 0x68 +#define ICPT_PV_NOTIFY 0x6c +#define ICPT_PV_PREF 0x70 + __u8 icptcode; /* 0x0050 */ + __u8 icptstatus; /* 0x0051 */ + __u16 ihcpu; /* 0x0052 */ + __u8 reserved54; /* 0x0054 */ +#define IICTL_CODE_NONE 0x00 +#define IICTL_CODE_MCHK 0x01 +#define IICTL_CODE_EXT 0x02 +#define IICTL_CODE_IO 0x03 +#define IICTL_CODE_RESTART 0x04 +#define IICTL_CODE_SPECIFICATION 0x10 +#define IICTL_CODE_OPERAND 0x11 + __u8 iictl; /* 0x0055 */ + __u16 ipa; /* 0x0056 */ + __u32 ipb; /* 0x0058 */ + __u32 scaoh; /* 0x005c */ +#define FPF_BPBC 0x20 + __u8 fpf; /* 0x0060 */ +#define ECB_GS 0x40 +#define ECB_TE 0x10 +#define ECB_SPECI 0x08 +#define ECB_SRSI 0x04 +#define ECB_HOSTPROTINT 0x02 +#define ECB_PTF 0x01 + __u8 ecb; /* 0x0061 */ +#define ECB2_CMMA 0x80 +#define ECB2_IEP 0x20 +#define ECB2_PFMFI 0x08 +#define ECB2_ESCA 0x04 +#define ECB2_ZPCI_LSI 0x02 + __u8 ecb2; /* 0x0062 */ +#define ECB3_AISI 0x20 +#define ECB3_AISII 0x10 +#define ECB3_DEA 0x08 +#define ECB3_AES 0x04 +#define ECB3_RI 0x01 + __u8 ecb3; /* 0x0063 */ +#define ESCA_SCAOL_MASK ~0x3fU + __u32 scaol; /* 0x0064 */ + __u8 sdf; /* 0x0068 */ + __u8 epdx; /* 0x0069 */ + __u8 cpnc; /* 0x006a */ + __u8 reserved6b; /* 0x006b */ + __u32 todpr; /* 0x006c */ +#define GISA_FORMAT1 0x00000001 + __u32 gd; /* 0x0070 */ + __u8 reserved74[12]; /* 0x0074 */ + __u64 mso; /* 0x0080 */ + __u64 msl; /* 0x0088 */ + psw_t gpsw; /* 0x0090 */ + __u64 gg14; /* 0x00a0 */ + __u64 gg15; /* 0x00a8 */ + __u8 reservedb0[8]; /* 0x00b0 */ +#define HPID_KVM 0x4 +#define HPID_VSIE 0x5 + __u8 hpid; /* 0x00b8 */ + __u8 reservedb9[7]; /* 0x00b9 */ + union { + struct { + __u32 eiparams; /* 0x00c0 */ + __u16 extcpuaddr; /* 0x00c4 */ + __u16 eic; /* 0x00c6 */ + }; + __u64 mcic; /* 0x00c0 */ + } __packed; + __u32 reservedc8; /* 0x00c8 */ + union { + struct { + __u16 pgmilc; /* 0x00cc */ + __u16 iprcc; /* 0x00ce */ + }; + __u32 edc; /* 0x00cc */ + } __packed; + union { + struct { + __u32 dxc; /* 0x00d0 */ + __u16 mcn; /* 0x00d4 */ + __u8 perc; /* 0x00d6 */ + __u8 peratmid; /* 0x00d7 */ + }; + __u64 faddr; /* 0x00d0 */ + } __packed; + __u64 peraddr; /* 0x00d8 */ + __u8 eai; /* 0x00e0 */ + __u8 peraid; /* 0x00e1 */ + __u8 oai; /* 0x00e2 */ + __u8 armid; /* 0x00e3 */ + __u8 reservede4[4]; /* 0x00e4 */ + union { + __u64 tecmc; /* 0x00e8 */ + struct { + __u16 subchannel_id; /* 0x00e8 */ + __u16 subchannel_nr; /* 0x00ea */ + __u32 io_int_parm; /* 0x00ec */ + __u32 io_int_word; /* 0x00f0 */ + }; + } __packed; + __u8 reservedf4[8]; /* 0x00f4 */ +#define CRYCB_FORMAT_MASK 0x00000003 +#define CRYCB_FORMAT0 0x00000000 +#define CRYCB_FORMAT1 0x00000001 +#define CRYCB_FORMAT2 0x00000003 + __u32 crycbd; /* 0x00fc */ + __u64 gcr[16]; /* 0x0100 */ + union { + __u64 gbea; /* 0x0180 */ + __u64 sidad; + }; + __u8 reserved188[8]; /* 0x0188 */ + __u64 sdnxo; /* 0x0190 */ + __u8 reserved198[8]; /* 0x0198 */ + __u32 fac; /* 0x01a0 */ + __u8 reserved1a4[20]; /* 0x01a4 */ + __u64 cbrlo; /* 0x01b8 */ + __u8 reserved1c0[8]; /* 0x01c0 */ +#define ECD_HOSTREGMGMT 0x20000000 +#define ECD_MEF 0x08000000 +#define ECD_ETOKENF 0x02000000 +#define ECD_ECC 0x00200000 +#define ECD_HMAC 0x00004000 + __u32 ecd; /* 0x01c8 */ + __u8 reserved1cc[18]; /* 0x01cc */ + __u64 pp; /* 0x01de */ + __u8 reserved1e6[2]; /* 0x01e6 */ + __u64 itdba; /* 0x01e8 */ + __u64 riccbd; /* 0x01f0 */ + __u64 gvrd; /* 0x01f8 */ +} __packed __aligned(512); + +struct kvm_s390_itdb { + __u8 data[256]; +}; + +struct sie_page { + struct kvm_s390_sie_block sie_block; + struct mcck_volatile_info mcck_info; /* 0x0200 */ + __u8 reserved218[360]; /* 0x0218 */ + __u64 pv_grregs[16]; /* 0x0380 */ + __u8 reserved400[512]; /* 0x0400 */ + struct kvm_s390_itdb itdb; /* 0x0600 */ + __u8 reserved700[2304]; /* 0x0700 */ +}; + +#endif /* _ASM_KVM_HOST_TYPES_H */ diff --git a/arch/s390/include/asm/lowcore.h b/arch/s390/include/asm/lowcore.h index 42a092fa1029..e99e9c87b1ce 100644 --- a/arch/s390/include/asm/lowcore.h +++ b/arch/s390/include/asm/lowcore.h @@ -10,6 +10,7 @@ #define _ASM_S390_LOWCORE_H #include <linux/types.h> +#include <asm/machine.h> #include <asm/ptrace.h> #include <asm/ctlreg.h> #include <asm/cpu.h> @@ -126,7 +127,7 @@ struct lowcore { __u64 int_clock; /* 0x0318 */ __u8 pad_0x0320[0x0328-0x0320]; /* 0x0320 */ __u64 clock_comparator; /* 0x0328 */ - __u64 boot_clock[2]; /* 0x0330 */ + __u8 pad_0x0330[0x0340-0x0330]; /* 0x0330 */ /* Current process. */ __u64 current_task; /* 0x0340 */ @@ -163,9 +164,7 @@ struct lowcore { __u32 spinlock_index; /* 0x03b0 */ __u8 pad_0x03b4[0x03b8-0x03b4]; /* 0x03b4 */ __u64 percpu_offset; /* 0x03b8 */ - __u8 pad_0x03c0[0x03c8-0x03c0]; /* 0x03c0 */ - __u64 machine_flags; /* 0x03c8 */ - __u8 pad_0x03d0[0x0400-0x03d0]; /* 0x03d0 */ + __u8 pad_0x03c0[0x0400-0x03c0]; /* 0x03c0 */ __u32 return_lpswe; /* 0x0400 */ __u32 return_mcck_lpswe; /* 0x0404 */ @@ -222,9 +221,12 @@ static __always_inline struct lowcore *get_lowcore(void) if (__is_defined(__DECOMPRESSOR)) return NULL; - asm(ALTERNATIVE("llilh %[lc],0", "llilh %[lc],%[alt]", ALT_LOWCORE) - : [lc] "=d" (lc) - : [alt] "i" (LOWCORE_ALT_ADDRESS >> 16)); + asm_inline( + ALTERNATIVE(" lghi %[lc],0", + " llilh %[lc],%[alt]", + ALT_FEATURE(MFEATURE_LOWCORE)) + : [lc] "=d" (lc) + : [alt] "i" (LOWCORE_ALT_ADDRESS >> 16)); return lc; } @@ -238,15 +240,15 @@ static inline void set_prefix(__u32 address) #else /* __ASSEMBLY__ */ .macro GET_LC reg - ALTERNATIVE "llilh \reg,0", \ + ALTERNATIVE "lghi \reg,0", \ __stringify(llilh \reg, LOWCORE_ALT_ADDRESS >> 16), \ - ALT_LOWCORE + ALT_FEATURE(MFEATURE_LOWCORE) .endm .macro STMG_LC start, end, savearea ALTERNATIVE "stmg \start, \end, \savearea", \ __stringify(stmg \start, \end, LOWCORE_ALT_ADDRESS + \savearea), \ - ALT_LOWCORE + ALT_FEATURE(MFEATURE_LOWCORE) .endm #endif /* __ASSEMBLY__ */ diff --git a/arch/s390/include/asm/machine.h b/arch/s390/include/asm/machine.h new file mode 100644 index 000000000000..8abe5afdbfc4 --- /dev/null +++ b/arch/s390/include/asm/machine.h @@ -0,0 +1,104 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright IBM Corp. 2024 + */ + +#ifndef __ASM_S390_MACHINE_H +#define __ASM_S390_MACHINE_H + +#include <linux/const.h> + +#define MFEATURE_LOWCORE 0 +#define MFEATURE_PCI_MIO 1 +#define MFEATURE_SCC 2 +#define MFEATURE_TLB_GUEST 3 +#define MFEATURE_TX 4 +#define MFEATURE_ESOP 5 +#define MFEATURE_DIAG9C 6 +#define MFEATURE_VM 7 +#define MFEATURE_KVM 8 +#define MFEATURE_LPAR 9 +#define MFEATURE_DIAG288 10 + +#ifndef __ASSEMBLY__ + +#include <linux/bitops.h> +#include <asm/alternative.h> + +extern unsigned long machine_features[1]; + +#define MAX_MFEATURE_BIT (sizeof(machine_features) * BITS_PER_BYTE) + +static inline void __set_machine_feature(unsigned int nr, unsigned long *mfeatures) +{ + if (nr >= MAX_MFEATURE_BIT) + return; + __set_bit(nr, mfeatures); +} + +static inline void set_machine_feature(unsigned int nr) +{ + __set_machine_feature(nr, machine_features); +} + +static inline void __clear_machine_feature(unsigned int nr, unsigned long *mfeatures) +{ + if (nr >= MAX_MFEATURE_BIT) + return; + __clear_bit(nr, mfeatures); +} + +static inline void clear_machine_feature(unsigned int nr) +{ + __clear_machine_feature(nr, machine_features); +} + +static bool __test_machine_feature(unsigned int nr, unsigned long *mfeatures) +{ + if (nr >= MAX_MFEATURE_BIT) + return false; + return test_bit(nr, mfeatures); +} + +static bool test_machine_feature(unsigned int nr) +{ + return __test_machine_feature(nr, machine_features); +} + +static __always_inline bool __test_machine_feature_constant(unsigned int nr) +{ + asm goto( + ALTERNATIVE("brcl 15,%l[l_no]", "brcl 0,0", ALT_FEATURE(%[nr])) + : + : [nr] "i" (nr) + : + : l_no); + return true; +l_no: + return false; +} + +#define DEFINE_MACHINE_HAS_FEATURE(name, feature) \ +static __always_inline bool machine_has_##name(void) \ +{ \ + if (!__is_defined(__DECOMPRESSOR) && __builtin_constant_p(feature)) \ + return __test_machine_feature_constant(feature); \ + return test_machine_feature(feature); \ +} + +DEFINE_MACHINE_HAS_FEATURE(relocated_lowcore, MFEATURE_LOWCORE) +DEFINE_MACHINE_HAS_FEATURE(scc, MFEATURE_SCC) +DEFINE_MACHINE_HAS_FEATURE(tlb_guest, MFEATURE_TLB_GUEST) +DEFINE_MACHINE_HAS_FEATURE(tx, MFEATURE_TX) +DEFINE_MACHINE_HAS_FEATURE(esop, MFEATURE_ESOP) +DEFINE_MACHINE_HAS_FEATURE(diag9c, MFEATURE_DIAG9C) +DEFINE_MACHINE_HAS_FEATURE(vm, MFEATURE_VM) +DEFINE_MACHINE_HAS_FEATURE(kvm, MFEATURE_KVM) +DEFINE_MACHINE_HAS_FEATURE(lpar, MFEATURE_LPAR) + +#define machine_is_vm machine_has_vm +#define machine_is_kvm machine_has_kvm +#define machine_is_lpar machine_has_lpar + +#endif /* __ASSEMBLY__ */ +#endif /* __ASM_S390_MACHINE_H */ diff --git a/arch/s390/include/asm/march.h b/arch/s390/include/asm/march.h index fd9eef3be44c..11a71bd14954 100644 --- a/arch/s390/include/asm/march.h +++ b/arch/s390/include/asm/march.h @@ -33,6 +33,10 @@ #define MARCH_HAS_Z16_FEATURES 1 #endif +#ifdef CONFIG_HAVE_MARCH_Z17_FEATURES +#define MARCH_HAS_Z17_FEATURES 1 +#endif + #endif /* __DECOMPRESSOR */ #endif /* __ASM_S390_MARCH_H */ diff --git a/arch/s390/include/asm/mmu.h b/arch/s390/include/asm/mmu.h index 4c2dc7abc285..f07e49b419ab 100644 --- a/arch/s390/include/asm/mmu.h +++ b/arch/s390/include/asm/mmu.h @@ -22,10 +22,7 @@ typedef struct { * The following bitfields need a down_write on the mm * semaphore when they are written to. As they are only * written once, they can be read without a lock. - * - * The mmu context allocates 4K page tables. */ - unsigned int alloc_pgste:1; /* The mmu context uses extended page tables. */ unsigned int has_pgste:1; /* The mmu context uses storage keys. */ diff --git a/arch/s390/include/asm/mmu_context.h b/arch/s390/include/asm/mmu_context.h index d56eb0a1f37b..d9b8501bc93d 100644 --- a/arch/s390/include/asm/mmu_context.h +++ b/arch/s390/include/asm/mmu_context.h @@ -13,6 +13,7 @@ #include <linux/mm_types.h> #include <asm/tlbflush.h> #include <asm/ctlreg.h> +#include <asm/asce.h> #include <asm-generic/mm_hooks.h> #define init_new_context init_new_context @@ -29,9 +30,6 @@ static inline int init_new_context(struct task_struct *tsk, mm->context.gmap_asce = 0; mm->context.flush_mm = 0; #ifdef CONFIG_PGSTE - mm->context.alloc_pgste = page_table_allocate_pgste || - test_thread_flag(TIF_PGSTE) || - (current->mm && current->mm->context.alloc_pgste); mm->context.has_pgste = 0; mm->context.uses_skeys = 0; mm->context.uses_cmm = 0; @@ -80,7 +78,8 @@ static inline void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct * else get_lowcore()->user_asce.val = next->context.asce; cpumask_set_cpu(cpu, &next->context.cpu_attach_mask); - /* Clear previous user-ASCE from CR7 */ + /* Clear previous user-ASCE from CR1 and CR7 */ + local_ctl_load(1, &s390_invalid_asce); local_ctl_load(7, &s390_invalid_asce); if (prev != next) cpumask_clear_cpu(cpu, &prev->context.cpu_attach_mask); @@ -102,6 +101,7 @@ static inline void finish_arch_post_lock_switch(void) { struct task_struct *tsk = current; struct mm_struct *mm = tsk->mm; + unsigned long flags; if (mm) { preempt_disable(); @@ -111,15 +111,25 @@ static inline void finish_arch_post_lock_switch(void) __tlb_flush_mm_lazy(mm); preempt_enable(); } + local_irq_save(flags); + if (test_thread_flag(TIF_ASCE_PRIMARY)) + local_ctl_load(1, &get_lowcore()->kernel_asce); + else + local_ctl_load(1, &get_lowcore()->user_asce); local_ctl_load(7, &get_lowcore()->user_asce); + local_irq_restore(flags); } #define activate_mm activate_mm static inline void activate_mm(struct mm_struct *prev, struct mm_struct *next) { - switch_mm(prev, next, current); + switch_mm_irqs_off(prev, next, current); cpumask_set_cpu(smp_processor_id(), mm_cpumask(next)); + if (test_thread_flag(TIF_ASCE_PRIMARY)) + local_ctl_load(1, &get_lowcore()->kernel_asce); + else + local_ctl_load(1, &get_lowcore()->user_asce); local_ctl_load(7, &get_lowcore()->user_asce); } diff --git a/arch/s390/include/asm/nospec-branch.h b/arch/s390/include/asm/nospec-branch.h index 192835a3e24d..c7c96282f011 100644 --- a/arch/s390/include/asm/nospec-branch.h +++ b/arch/s390/include/asm/nospec-branch.h @@ -26,8 +26,6 @@ static inline bool nospec_uses_trampoline(void) return __is_defined(CC_USING_EXPOLINE) && !nospec_disable; } -#ifdef CONFIG_EXPOLINE_EXTERN - void __s390_indirect_jump_r1(void); void __s390_indirect_jump_r2(void); void __s390_indirect_jump_r3(void); @@ -44,8 +42,6 @@ void __s390_indirect_jump_r13(void); void __s390_indirect_jump_r14(void); void __s390_indirect_jump_r15(void); -#endif - #endif /* __ASSEMBLY__ */ #endif /* _ASM_S390_EXPOLINE_H */ diff --git a/arch/s390/include/asm/page-states.h b/arch/s390/include/asm/page-states.h index 08fcbd628120..794fdb21500a 100644 --- a/arch/s390/include/asm/page-states.h +++ b/arch/s390/include/asm/page-states.h @@ -7,7 +7,6 @@ #ifndef PAGE_STATES_H #define PAGE_STATES_H -#include <asm/sections.h> #include <asm/page.h> #define ESSA_GET_STATE 0 @@ -21,7 +20,7 @@ #define ESSA_MAX ESSA_SET_STABLE_NODAT -extern int __bootdata_preserved(cmma_flag); +extern int cmma_flag; static __always_inline unsigned long essa(unsigned long paddr, unsigned char cmd) { diff --git a/arch/s390/include/asm/page.h b/arch/s390/include/asm/page.h index 4f43cdd9835b..4e5dbabdf202 100644 --- a/arch/s390/include/asm/page.h +++ b/arch/s390/include/asm/page.h @@ -71,9 +71,11 @@ static inline void copy_page(void *to, void *from) #define vma_alloc_zeroed_movable_folio(vma, vaddr) \ vma_alloc_folio(GFP_HIGHUSER_MOVABLE | __GFP_ZERO, 0, vma, vaddr) -/* - * These are used to make use of C type-checking.. - */ +#ifdef CONFIG_STRICT_MM_TYPECHECKS +#define STRICT_MM_TYPECHECKS +#endif + +#ifdef STRICT_MM_TYPECHECKS typedef struct { unsigned long pgprot; } pgprot_t; typedef struct { unsigned long pgste; } pgste_t; @@ -82,43 +84,48 @@ typedef struct { unsigned long pmd; } pmd_t; typedef struct { unsigned long pud; } pud_t; typedef struct { unsigned long p4d; } p4d_t; typedef struct { unsigned long pgd; } pgd_t; -typedef pte_t *pgtable_t; -#define pgprot_val(x) ((x).pgprot) -#define pgste_val(x) ((x).pgste) - -static inline unsigned long pte_val(pte_t pte) -{ - return pte.pte; +#define DEFINE_PGVAL_FUNC(name) \ +static __always_inline unsigned long name ## _val(name ## _t name) \ +{ \ + return name.name; \ } -static inline unsigned long pmd_val(pmd_t pmd) -{ - return pmd.pmd; -} +#else /* STRICT_MM_TYPECHECKS */ -static inline unsigned long pud_val(pud_t pud) -{ - return pud.pud; -} +typedef unsigned long pgprot_t; +typedef unsigned long pgste_t; +typedef unsigned long pte_t; +typedef unsigned long pmd_t; +typedef unsigned long pud_t; +typedef unsigned long p4d_t; +typedef unsigned long pgd_t; -static inline unsigned long p4d_val(p4d_t p4d) -{ - return p4d.p4d; +#define DEFINE_PGVAL_FUNC(name) \ +static __always_inline unsigned long name ## _val(name ## _t name) \ +{ \ + return name; \ } -static inline unsigned long pgd_val(pgd_t pgd) -{ - return pgd.pgd; -} +#endif /* STRICT_MM_TYPECHECKS */ + +DEFINE_PGVAL_FUNC(pgprot) +DEFINE_PGVAL_FUNC(pgste) +DEFINE_PGVAL_FUNC(pte) +DEFINE_PGVAL_FUNC(pmd) +DEFINE_PGVAL_FUNC(pud) +DEFINE_PGVAL_FUNC(p4d) +DEFINE_PGVAL_FUNC(pgd) + +typedef pte_t *pgtable_t; +#define __pgprot(x) ((pgprot_t) { (x) } ) #define __pgste(x) ((pgste_t) { (x) } ) #define __pte(x) ((pte_t) { (x) } ) #define __pmd(x) ((pmd_t) { (x) } ) #define __pud(x) ((pud_t) { (x) } ) #define __p4d(x) ((p4d_t) { (x) } ) #define __pgd(x) ((pgd_t) { (x) } ) -#define __pgprot(x) ((pgprot_t) { (x) } ) static inline void page_set_storage_key(unsigned long addr, unsigned char skey, int mapped) @@ -184,7 +191,11 @@ extern struct vm_layout vm_layout; #define __kaslr_offset vm_layout.kaslr_offset #define __kaslr_offset_phys vm_layout.kaslr_offset_phys +#ifdef CONFIG_RANDOMIZE_IDENTITY_BASE #define __identity_base vm_layout.identity_base +#else +#define __identity_base 0UL +#endif #define ident_map_size vm_layout.identity_size static inline unsigned long kaslr_offset(void) diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h index 474e1f8d1d3c..41f900f693d9 100644 --- a/arch/s390/include/asm/pci.h +++ b/arch/s390/include/asm/pci.h @@ -11,6 +11,9 @@ #include <asm/pci_insn.h> #include <asm/sclp.h> +#define ARCH_GENERIC_PCI_MMAP_RESOURCE 1 +#define arch_can_pci_mmap_wc() 1 + #define PCIBIOS_MIN_IO 0x1000 #define PCIBIOS_MIN_MEM 0x10000000 @@ -144,7 +147,7 @@ struct zpci_dev { u8 util_str_avail : 1; u8 irqs_registered : 1; u8 tid_avail : 1; - u8 reserved : 1; + u8 rtr_avail : 1; /* Relaxed translation allowed */ unsigned int devfn; /* DEVFN part of the RID*/ u8 pfip[CLP_PFIP_NR_SEGMENTS]; /* pci function internal path */ @@ -217,6 +220,7 @@ extern struct airq_iv *zpci_aif_sbv; struct zpci_dev *zpci_create_device(u32 fid, u32 fh, enum zpci_state state); int zpci_add_device(struct zpci_dev *zdev); int zpci_enable_device(struct zpci_dev *); +int zpci_reenable_device(struct zpci_dev *zdev); int zpci_disable_device(struct zpci_dev *); int zpci_scan_configured_device(struct zpci_dev *zdev, u32 fh); int zpci_deconfigure_device(struct zpci_dev *zdev); @@ -245,6 +249,7 @@ void update_uid_checking(bool new); /* IOMMU Interface */ int zpci_init_iommu(struct zpci_dev *zdev); void zpci_destroy_iommu(struct zpci_dev *zdev); +int zpci_iommu_register_ioat(struct zpci_dev *zdev, u8 *status); #ifdef CONFIG_PCI static inline bool zpci_use_mio(struct zpci_dev *zdev) diff --git a/arch/s390/include/asm/pci_clp.h b/arch/s390/include/asm/pci_clp.h index 3fff2f7095c8..7ebff39c84b3 100644 --- a/arch/s390/include/asm/pci_clp.h +++ b/arch/s390/include/asm/pci_clp.h @@ -156,7 +156,9 @@ struct clp_rsp_query_pci_grp { u16 : 4; u16 noi : 12; /* number of interrupts */ u8 version; - u8 : 6; + u8 : 2; + u8 rtr : 1; /* Relaxed translation requirement */ + u8 : 3; u8 frame : 1; u8 refresh : 1; /* TLB refresh mode */ u16 : 3; diff --git a/arch/s390/include/asm/pci_dma.h b/arch/s390/include/asm/pci_dma.h index 42d7cc4262ca..d12e17201661 100644 --- a/arch/s390/include/asm/pci_dma.h +++ b/arch/s390/include/asm/pci_dma.h @@ -25,6 +25,7 @@ enum zpci_ioat_dtype { #define ZPCI_KEY (PAGE_DEFAULT_KEY << 5) #define ZPCI_TABLE_SIZE_RT (1UL << 42) +#define ZPCI_TABLE_SIZE_RS (1UL << 53) #define ZPCI_IOTA_STO_FLAG (ZPCI_IOTA_IOT_ENABLED | ZPCI_KEY | ZPCI_IOTA_DT_ST) #define ZPCI_IOTA_RTTO_FLAG (ZPCI_IOTA_IOT_ENABLED | ZPCI_KEY | ZPCI_IOTA_DT_RT) @@ -55,6 +56,8 @@ enum zpci_ioat_dtype { #define ZPCI_PT_BITS 8 #define ZPCI_ST_SHIFT (ZPCI_PT_BITS + PAGE_SHIFT) #define ZPCI_RT_SHIFT (ZPCI_ST_SHIFT + ZPCI_TABLE_BITS) +#define ZPCI_RS_SHIFT (ZPCI_RT_SHIFT + ZPCI_TABLE_BITS) +#define ZPCI_RF_SHIFT (ZPCI_RS_SHIFT + ZPCI_TABLE_BITS) #define ZPCI_RTE_FLAG_MASK 0x3fffUL #define ZPCI_RTE_ADDR_MASK (~ZPCI_RTE_FLAG_MASK) diff --git a/arch/s390/include/asm/pgalloc.h b/arch/s390/include/asm/pgalloc.h index 7b84ef6dc4b6..5345398df653 100644 --- a/arch/s390/include/asm/pgalloc.h +++ b/arch/s390/include/asm/pgalloc.h @@ -26,7 +26,6 @@ unsigned long *page_table_alloc(struct mm_struct *); struct ptdesc *page_table_alloc_pgste(struct mm_struct *mm); void page_table_free(struct mm_struct *, unsigned long *); void page_table_free_pgste(struct ptdesc *ptdesc); -extern int page_table_allocate_pgste; static inline void crst_table_init(unsigned long *crst, unsigned long entry) { @@ -53,29 +52,42 @@ static inline p4d_t *p4d_alloc_one(struct mm_struct *mm, unsigned long address) { unsigned long *table = crst_table_alloc(mm); - if (table) - crst_table_init(table, _REGION2_ENTRY_EMPTY); + if (!table) + return NULL; + crst_table_init(table, _REGION2_ENTRY_EMPTY); + pagetable_p4d_ctor(virt_to_ptdesc(table)); + return (p4d_t *) table; } static inline void p4d_free(struct mm_struct *mm, p4d_t *p4d) { - if (!mm_p4d_folded(mm)) - crst_table_free(mm, (unsigned long *) p4d); + if (mm_p4d_folded(mm)) + return; + + pagetable_dtor(virt_to_ptdesc(p4d)); + crst_table_free(mm, (unsigned long *) p4d); } static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long address) { unsigned long *table = crst_table_alloc(mm); - if (table) - crst_table_init(table, _REGION3_ENTRY_EMPTY); + + if (!table) + return NULL; + crst_table_init(table, _REGION3_ENTRY_EMPTY); + pagetable_pud_ctor(virt_to_ptdesc(table)); + return (pud_t *) table; } static inline void pud_free(struct mm_struct *mm, pud_t *pud) { - if (!mm_pud_folded(mm)) - crst_table_free(mm, (unsigned long *) pud); + if (mm_pud_folded(mm)) + return; + + pagetable_dtor(virt_to_ptdesc(pud)); + crst_table_free(mm, (unsigned long *) pud); } static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long vmaddr) @@ -85,7 +97,7 @@ static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long vmaddr) if (!table) return NULL; crst_table_init(table, _SEGMENT_ENTRY_EMPTY); - if (!pagetable_pmd_ctor(virt_to_ptdesc(table))) { + if (!pagetable_pmd_ctor(mm, virt_to_ptdesc(table))) { crst_table_free(mm, table); return NULL; } @@ -96,7 +108,7 @@ static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd) { if (mm_pmd_folded(mm)) return; - pagetable_pmd_dtor(virt_to_ptdesc(pmd)); + pagetable_dtor(virt_to_ptdesc(pmd)); crst_table_free(mm, (unsigned long *) pmd); } @@ -117,11 +129,18 @@ static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd) static inline pgd_t *pgd_alloc(struct mm_struct *mm) { - return (pgd_t *) crst_table_alloc(mm); + unsigned long *table = crst_table_alloc(mm); + + if (!table) + return NULL; + pagetable_pgd_ctor(virt_to_ptdesc(table)); + + return (pgd_t *) table; } static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd) { + pagetable_dtor(virt_to_ptdesc(pgd)); crst_table_free(mm, (unsigned long *) pgd); } diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index 48268095b0a3..6d8bc27a366e 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -14,10 +14,10 @@ #include <linux/sched.h> #include <linux/mm_types.h> +#include <linux/cpufeature.h> #include <linux/page-flags.h> #include <linux/radix-tree.h> #include <linux/atomic.h> -#include <asm/sections.h> #include <asm/ctlreg.h> #include <asm/bug.h> #include <asm/page.h> @@ -35,7 +35,7 @@ enum { PG_DIRECT_MAP_MAX }; -extern atomic_long_t __bootdata_preserved(direct_pages_count[PG_DIRECT_MAP_MAX]); +extern atomic_long_t direct_pages_count[PG_DIRECT_MAP_MAX]; static inline void update_page_count(int level, long count) { @@ -85,14 +85,14 @@ extern unsigned long zero_page_mask; * happen without trampolines and in addition the placement within a * 2GB frame is branch prediction unit friendly. */ -extern unsigned long __bootdata_preserved(VMALLOC_START); -extern unsigned long __bootdata_preserved(VMALLOC_END); +extern unsigned long VMALLOC_START; +extern unsigned long VMALLOC_END; #define VMALLOC_DEFAULT_SIZE ((512UL << 30) - MODULES_LEN) -extern struct page *__bootdata_preserved(vmemmap); -extern unsigned long __bootdata_preserved(vmemmap_size); +extern struct page *vmemmap; +extern unsigned long vmemmap_size; -extern unsigned long __bootdata_preserved(MODULES_VADDR); -extern unsigned long __bootdata_preserved(MODULES_END); +extern unsigned long MODULES_VADDR; +extern unsigned long MODULES_END; #define MODULES_VADDR MODULES_VADDR #define MODULES_END MODULES_END #define MODULES_LEN (1UL << 31) @@ -125,6 +125,8 @@ static inline int is_module_addr(void *addr) #define KASLR_LEN 0UL #endif +void setup_protection_map(void); + /* * A 64 bit pagetable entry of S390 has following format: * | PFRA |0IPC| OS | @@ -419,9 +421,10 @@ static inline int is_module_addr(void *addr) #define PGSTE_HC_BIT 0x0020000000000000UL #define PGSTE_GR_BIT 0x0004000000000000UL #define PGSTE_GC_BIT 0x0002000000000000UL -#define PGSTE_UC_BIT 0x0000800000000000UL /* user dirty (migration) */ -#define PGSTE_IN_BIT 0x0000400000000000UL /* IPTE notify bit */ -#define PGSTE_VSIE_BIT 0x0000200000000000UL /* ref'd in a shadow table */ +#define PGSTE_ST2_MASK 0x0000ffff00000000UL +#define PGSTE_UC_BIT 0x0000000000008000UL /* user dirty (migration) */ +#define PGSTE_IN_BIT 0x0000000000004000UL /* IPTE notify bit */ +#define PGSTE_VSIE_BIT 0x0000000000002000UL /* ref'd in a shadow table */ /* Guest Page State used for virtualization */ #define _PGSTE_GPS_ZERO 0x0000000080000000UL @@ -443,98 +446,107 @@ static inline int is_module_addr(void *addr) /* * Page protection definitions. */ -#define PAGE_NONE __pgprot(_PAGE_PRESENT | _PAGE_INVALID | _PAGE_PROTECT) -#define PAGE_RO __pgprot(_PAGE_PRESENT | _PAGE_READ | \ +#define __PAGE_NONE (_PAGE_PRESENT | _PAGE_INVALID | _PAGE_PROTECT) +#define __PAGE_RO (_PAGE_PRESENT | _PAGE_READ | \ _PAGE_NOEXEC | _PAGE_INVALID | _PAGE_PROTECT) -#define PAGE_RX __pgprot(_PAGE_PRESENT | _PAGE_READ | \ +#define __PAGE_RX (_PAGE_PRESENT | _PAGE_READ | \ _PAGE_INVALID | _PAGE_PROTECT) -#define PAGE_RW __pgprot(_PAGE_PRESENT | _PAGE_READ | _PAGE_WRITE | \ +#define __PAGE_RW (_PAGE_PRESENT | _PAGE_READ | _PAGE_WRITE | \ _PAGE_NOEXEC | _PAGE_INVALID | _PAGE_PROTECT) -#define PAGE_RWX __pgprot(_PAGE_PRESENT | _PAGE_READ | _PAGE_WRITE | \ +#define __PAGE_RWX (_PAGE_PRESENT | _PAGE_READ | _PAGE_WRITE | \ _PAGE_INVALID | _PAGE_PROTECT) - -#define PAGE_SHARED __pgprot(_PAGE_PRESENT | _PAGE_READ | _PAGE_WRITE | \ +#define __PAGE_SHARED (_PAGE_PRESENT | _PAGE_READ | _PAGE_WRITE | \ _PAGE_YOUNG | _PAGE_DIRTY | _PAGE_NOEXEC) -#define PAGE_KERNEL __pgprot(_PAGE_PRESENT | _PAGE_READ | _PAGE_WRITE | \ +#define __PAGE_KERNEL (_PAGE_PRESENT | _PAGE_READ | _PAGE_WRITE | \ _PAGE_YOUNG | _PAGE_DIRTY | _PAGE_NOEXEC) -#define PAGE_KERNEL_RO __pgprot(_PAGE_PRESENT | _PAGE_READ | _PAGE_YOUNG | \ +#define __PAGE_KERNEL_RO (_PAGE_PRESENT | _PAGE_READ | _PAGE_YOUNG | \ _PAGE_PROTECT | _PAGE_NOEXEC) -#define PAGE_KERNEL_EXEC __pgprot(_PAGE_PRESENT | _PAGE_READ | _PAGE_WRITE | \ - _PAGE_YOUNG | _PAGE_DIRTY) -/* - * On s390 the page table entry has an invalid bit and a read-only bit. - * Read permission implies execute permission and write permission - * implies read permission. - */ - /*xwr*/ +extern unsigned long page_noexec_mask; + +#define __pgprot_page_mask(x) __pgprot((x) & page_noexec_mask) + +#define PAGE_NONE __pgprot_page_mask(__PAGE_NONE) +#define PAGE_RO __pgprot_page_mask(__PAGE_RO) +#define PAGE_RX __pgprot_page_mask(__PAGE_RX) +#define PAGE_RW __pgprot_page_mask(__PAGE_RW) +#define PAGE_RWX __pgprot_page_mask(__PAGE_RWX) +#define PAGE_SHARED __pgprot_page_mask(__PAGE_SHARED) +#define PAGE_KERNEL __pgprot_page_mask(__PAGE_KERNEL) +#define PAGE_KERNEL_RO __pgprot_page_mask(__PAGE_KERNEL_RO) /* * Segment entry (large page) protection definitions. */ -#define SEGMENT_NONE __pgprot(_SEGMENT_ENTRY_PRESENT | \ +#define __SEGMENT_NONE (_SEGMENT_ENTRY_PRESENT | \ _SEGMENT_ENTRY_INVALID | \ _SEGMENT_ENTRY_PROTECT) -#define SEGMENT_RO __pgprot(_SEGMENT_ENTRY_PRESENT | \ +#define __SEGMENT_RO (_SEGMENT_ENTRY_PRESENT | \ _SEGMENT_ENTRY_PROTECT | \ _SEGMENT_ENTRY_READ | \ _SEGMENT_ENTRY_NOEXEC) -#define SEGMENT_RX __pgprot(_SEGMENT_ENTRY_PRESENT | \ +#define __SEGMENT_RX (_SEGMENT_ENTRY_PRESENT | \ _SEGMENT_ENTRY_PROTECT | \ _SEGMENT_ENTRY_READ) -#define SEGMENT_RW __pgprot(_SEGMENT_ENTRY_PRESENT | \ +#define __SEGMENT_RW (_SEGMENT_ENTRY_PRESENT | \ _SEGMENT_ENTRY_READ | \ _SEGMENT_ENTRY_WRITE | \ _SEGMENT_ENTRY_NOEXEC) -#define SEGMENT_RWX __pgprot(_SEGMENT_ENTRY_PRESENT | \ +#define __SEGMENT_RWX (_SEGMENT_ENTRY_PRESENT | \ _SEGMENT_ENTRY_READ | \ _SEGMENT_ENTRY_WRITE) -#define SEGMENT_KERNEL __pgprot(_SEGMENT_ENTRY | \ +#define __SEGMENT_KERNEL (_SEGMENT_ENTRY | \ _SEGMENT_ENTRY_LARGE | \ _SEGMENT_ENTRY_READ | \ _SEGMENT_ENTRY_WRITE | \ _SEGMENT_ENTRY_YOUNG | \ _SEGMENT_ENTRY_DIRTY | \ _SEGMENT_ENTRY_NOEXEC) -#define SEGMENT_KERNEL_RO __pgprot(_SEGMENT_ENTRY | \ +#define __SEGMENT_KERNEL_RO (_SEGMENT_ENTRY | \ _SEGMENT_ENTRY_LARGE | \ _SEGMENT_ENTRY_READ | \ _SEGMENT_ENTRY_YOUNG | \ _SEGMENT_ENTRY_PROTECT | \ _SEGMENT_ENTRY_NOEXEC) -#define SEGMENT_KERNEL_EXEC __pgprot(_SEGMENT_ENTRY | \ - _SEGMENT_ENTRY_LARGE | \ - _SEGMENT_ENTRY_READ | \ - _SEGMENT_ENTRY_WRITE | \ - _SEGMENT_ENTRY_YOUNG | \ - _SEGMENT_ENTRY_DIRTY) + +extern unsigned long segment_noexec_mask; + +#define __pgprot_segment_mask(x) __pgprot((x) & segment_noexec_mask) + +#define SEGMENT_NONE __pgprot_segment_mask(__SEGMENT_NONE) +#define SEGMENT_RO __pgprot_segment_mask(__SEGMENT_RO) +#define SEGMENT_RX __pgprot_segment_mask(__SEGMENT_RX) +#define SEGMENT_RW __pgprot_segment_mask(__SEGMENT_RW) +#define SEGMENT_RWX __pgprot_segment_mask(__SEGMENT_RWX) +#define SEGMENT_KERNEL __pgprot_segment_mask(__SEGMENT_KERNEL) +#define SEGMENT_KERNEL_RO __pgprot_segment_mask(__SEGMENT_KERNEL_RO) /* * Region3 entry (large page) protection definitions. */ -#define REGION3_KERNEL __pgprot(_REGION_ENTRY_TYPE_R3 | \ +#define __REGION3_KERNEL (_REGION_ENTRY_TYPE_R3 | \ _REGION3_ENTRY_PRESENT | \ - _REGION3_ENTRY_LARGE | \ - _REGION3_ENTRY_READ | \ - _REGION3_ENTRY_WRITE | \ - _REGION3_ENTRY_YOUNG | \ + _REGION3_ENTRY_LARGE | \ + _REGION3_ENTRY_READ | \ + _REGION3_ENTRY_WRITE | \ + _REGION3_ENTRY_YOUNG | \ _REGION3_ENTRY_DIRTY | \ _REGION_ENTRY_NOEXEC) -#define REGION3_KERNEL_RO __pgprot(_REGION_ENTRY_TYPE_R3 | \ - _REGION3_ENTRY_PRESENT | \ - _REGION3_ENTRY_LARGE | \ - _REGION3_ENTRY_READ | \ - _REGION3_ENTRY_YOUNG | \ - _REGION_ENTRY_PROTECT | \ - _REGION_ENTRY_NOEXEC) -#define REGION3_KERNEL_EXEC __pgprot(_REGION_ENTRY_TYPE_R3 | \ +#define __REGION3_KERNEL_RO (_REGION_ENTRY_TYPE_R3 | \ _REGION3_ENTRY_PRESENT | \ - _REGION3_ENTRY_LARGE | \ - _REGION3_ENTRY_READ | \ - _REGION3_ENTRY_WRITE | \ - _REGION3_ENTRY_YOUNG | \ - _REGION3_ENTRY_DIRTY) + _REGION3_ENTRY_LARGE | \ + _REGION3_ENTRY_READ | \ + _REGION3_ENTRY_YOUNG | \ + _REGION_ENTRY_PROTECT | \ + _REGION_ENTRY_NOEXEC) + +extern unsigned long region_noexec_mask; + +#define __pgprot_region_mask(x) __pgprot((x) & region_noexec_mask) + +#define REGION3_KERNEL __pgprot_region_mask(__REGION3_KERNEL) +#define REGION3_KERNEL_RO __pgprot_region_mask(__REGION3_KERNEL_RO) static inline bool mm_p4d_folded(struct mm_struct *mm) { @@ -572,13 +584,14 @@ static inline int mm_is_protected(struct mm_struct *mm) return 0; } -static inline int mm_alloc_pgste(struct mm_struct *mm) +static inline pgste_t clear_pgste_bit(pgste_t pgste, unsigned long mask) { -#ifdef CONFIG_PGSTE - if (unlikely(mm->context.alloc_pgste)) - return 1; -#endif - return 0; + return __pgste(pgste_val(pgste) & ~mask); +} + +static inline pgste_t set_pgste_bit(pgste_t pgste, unsigned long mask) +{ + return __pgste(pgste_val(pgste) | mask); } static inline pte_t clear_pte_bit(pte_t pte, pgprot_t prot) @@ -902,7 +915,7 @@ static inline int pmd_protnone(pmd_t pmd) } #endif -static inline int pte_swp_exclusive(pte_t pte) +static inline bool pte_swp_exclusive(pte_t pte) { return pte_val(pte) & _PAGE_SWP_EXCLUSIVE; } @@ -1328,7 +1341,7 @@ static inline void flush_tlb_fix_spurious_fault(struct vm_area_struct *vma, * PTE does not have _PAGE_PROTECT set, to avoid unnecessary overhead. * A local RDP can be used to do the flush. */ - if (MACHINE_HAS_RDP && !(pte_val(*ptep) & _PAGE_PROTECT)) + if (cpu_has_rdp() && !(pte_val(*ptep) & _PAGE_PROTECT)) __ptep_rdp(address, ptep, 0, 0, 1); } #define flush_tlb_fix_spurious_fault flush_tlb_fix_spurious_fault @@ -1343,7 +1356,7 @@ static inline int ptep_set_access_flags(struct vm_area_struct *vma, { if (pte_same(*ptep, entry)) return 0; - if (MACHINE_HAS_RDP && !mm_has_pgste(vma->vm_mm) && pte_allow_rdp(*ptep, entry)) + if (cpu_has_rdp() && !mm_has_pgste(vma->vm_mm) && pte_allow_rdp(*ptep, entry)) ptep_reset_dat_prot(vma->vm_mm, addr, ptep, entry); else ptep_xchg_direct(vma->vm_mm, addr, ptep, entry); @@ -1391,9 +1404,6 @@ void gmap_pmdp_idte_global(struct mm_struct *mm, unsigned long vmaddr); #define pgprot_writecombine pgprot_writecombine pgprot_t pgprot_writecombine(pgprot_t prot); -#define pgprot_writethrough pgprot_writethrough -pgprot_t pgprot_writethrough(pgprot_t prot); - #define PFN_PTE_SHIFT PAGE_SHIFT /* @@ -1435,21 +1445,9 @@ static inline pte_t mk_pte_phys(unsigned long physpage, pgprot_t pgprot) pte_t __pte; __pte = __pte(physpage | pgprot_val(pgprot)); - if (!MACHINE_HAS_NX) - __pte = clear_pte_bit(__pte, __pgprot(_PAGE_NOEXEC)); return pte_mkyoung(__pte); } -static inline pte_t mk_pte(struct page *page, pgprot_t pgprot) -{ - unsigned long physpage = page_to_phys(page); - pte_t __pte = mk_pte_phys(physpage, pgprot); - - if (pte_write(__pte) && PageDirty(page)) - __pte = pte_mkdirty(__pte); - return __pte; -} - #define pgd_index(address) (((address) >> PGDIR_SHIFT) & (PTRS_PER_PGD-1)) #define p4d_index(address) (((address) >> P4D_SHIFT) & (PTRS_PER_P4D-1)) #define pud_index(address) (((address) >> PUD_SHIFT) & (PTRS_PER_PUD-1)) @@ -1804,8 +1802,6 @@ static inline int pmdp_clear_flush_young(struct vm_area_struct *vma, static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr, pmd_t *pmdp, pmd_t entry) { - if (!MACHINE_HAS_NX) - entry = clear_pmd_bit(entry, __pgprot(_SEGMENT_ENTRY_NOEXEC)); set_pmd(pmdp, entry); } @@ -1873,7 +1869,6 @@ static inline pmd_t pmdp_collapse_flush(struct vm_area_struct *vma, #define pmdp_collapse_flush pmdp_collapse_flush #define pfn_pmd(pfn, pgprot) mk_pmd_phys(((pfn) << PAGE_SHIFT), (pgprot)) -#define mk_pmd(page, pgprot) pfn_pmd(page_to_pfn(page), (pgprot)) static inline int pmd_trans_huge(pmd_t pmd) { @@ -1883,7 +1878,7 @@ static inline int pmd_trans_huge(pmd_t pmd) #define has_transparent_hugepage has_transparent_hugepage static inline int has_transparent_hugepage(void) { - return MACHINE_HAS_EDAT1 ? 1 : 0; + return cpu_has_edat1() ? 1 : 0; } #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ @@ -2001,4 +1996,18 @@ extern void s390_reset_cmma(struct mm_struct *mm); #define pmd_pgtable(pmd) \ ((pgtable_t)__va(pmd_val(pmd) & -sizeof(pte_t)*PTRS_PER_PTE)) +static inline unsigned long gmap_pgste_get_pgt_addr(unsigned long *pgt) +{ + unsigned long *pgstes, res; + + pgstes = pgt + _PAGE_ENTRIES; + + res = (pgstes[0] & PGSTE_ST2_MASK) << 16; + res |= pgstes[1] & PGSTE_ST2_MASK; + res |= (pgstes[2] & PGSTE_ST2_MASK) >> 16; + res |= (pgstes[3] & PGSTE_ST2_MASK) >> 32; + + return res; +} + #endif /* _S390_PAGE_H */ diff --git a/arch/s390/include/asm/physmem_info.h b/arch/s390/include/asm/physmem_info.h index 51b68a43e195..7ef3bbec98b0 100644 --- a/arch/s390/include/asm/physmem_info.h +++ b/arch/s390/include/asm/physmem_info.h @@ -26,7 +26,7 @@ enum reserved_range_type { RR_AMODE31, RR_IPLREPORT, RR_CERT_COMP_LIST, - RR_MEM_DETECT_EXTENDED, + RR_MEM_DETECT_EXT, RR_VMEM, RR_MAX }; @@ -128,7 +128,7 @@ static inline const char *get_rr_type_name(enum reserved_range_type t) RR_TYPE_NAME(AMODE31); RR_TYPE_NAME(IPLREPORT); RR_TYPE_NAME(CERT_COMP_LIST); - RR_TYPE_NAME(MEM_DETECT_EXTENDED); + RR_TYPE_NAME(MEM_DETECT_EXT); RR_TYPE_NAME(VMEM); default: return "UNKNOWN"; diff --git a/arch/s390/include/asm/pkey.h b/arch/s390/include/asm/pkey.h index 5dca1a46a9f6..b7b59faf16f4 100644 --- a/arch/s390/include/asm/pkey.h +++ b/arch/s390/include/asm/pkey.h @@ -20,9 +20,22 @@ * @param key pointer to a buffer containing the key blob * @param keylen size of the key blob in bytes * @param protkey pointer to buffer receiving the protected key + * @param xflags additional execution flags (see PKEY_XFLAG_* definitions below) + * As of now the only supported flag is PKEY_XFLAG_NOMEMALLOC. * @return 0 on success, negative errno value on failure */ int pkey_key2protkey(const u8 *key, u32 keylen, - u8 *protkey, u32 *protkeylen, u32 *protkeytype); + u8 *protkey, u32 *protkeylen, u32 *protkeytype, + u32 xflags); + +/* + * If this flag is given in the xflags parameter, the pkey implementation + * is not allowed to allocate memory but instead should fall back to use + * preallocated memory or simple fail with -ENOMEM. + * This flag is for protected key derive within a cipher or similar + * which must not allocate memory which would cause io operations - see + * also the CRYPTO_ALG_ALLOCATES_MEMORY flag in crypto.h. + */ +#define PKEY_XFLAG_NOMEMALLOC 0x0001 #endif /* _KAPI_PKEY_H */ diff --git a/arch/s390/include/asm/preempt.h b/arch/s390/include/asm/preempt.h index 2c29bdf12127..6ccd033acfe5 100644 --- a/arch/s390/include/asm/preempt.h +++ b/arch/s390/include/asm/preempt.h @@ -8,12 +8,19 @@ #include <asm/cmpxchg.h> #include <asm/march.h> -#ifdef MARCH_HAS_Z196_FEATURES - /* We use the MSB mostly because its available */ #define PREEMPT_NEED_RESCHED 0x80000000 + +/* + * We use the PREEMPT_NEED_RESCHED bit as an inverted NEED_RESCHED such + * that a decrement hitting 0 means we can and should reschedule. + */ #define PREEMPT_ENABLED (0 + PREEMPT_NEED_RESCHED) +/* + * We mask the PREEMPT_NEED_RESCHED bit so as not to confuse all current users + * that think a non-zero value indicates we cannot preempt. + */ static __always_inline int preempt_count(void) { return READ_ONCE(get_lowcore()->preempt_count) & ~PREEMPT_NEED_RESCHED; @@ -29,6 +36,15 @@ static __always_inline void preempt_count_set(int pc) } while (!arch_try_cmpxchg(&get_lowcore()->preempt_count, &old, new)); } +/* + * We fold the NEED_RESCHED bit into the preempt count such that + * preempt_enable() can decrement and test for needing to reschedule with a + * short instruction sequence. + * + * We invert the actual bit, so that when the decrement hits 0 we know we both + * need to resched (the bit is cleared) and can resched (no preempt count). + */ + static __always_inline void set_preempt_need_resched(void) { __atomic_and(~PREEMPT_NEED_RESCHED, &get_lowcore()->preempt_count); @@ -64,67 +80,24 @@ static __always_inline void __preempt_count_sub(int val) __preempt_count_add(-val); } +/* + * Because we keep PREEMPT_NEED_RESCHED set when we do _not_ need to reschedule + * a decrement which hits zero means we have no preempt_count and should + * reschedule. + */ static __always_inline bool __preempt_count_dec_and_test(void) { - return __atomic_add(-1, &get_lowcore()->preempt_count) == 1; -} - -static __always_inline bool should_resched(int preempt_offset) -{ - return unlikely(READ_ONCE(get_lowcore()->preempt_count) == - preempt_offset); -} - -#else /* MARCH_HAS_Z196_FEATURES */ - -#define PREEMPT_ENABLED (0) - -static __always_inline int preempt_count(void) -{ - return READ_ONCE(get_lowcore()->preempt_count); -} - -static __always_inline void preempt_count_set(int pc) -{ - get_lowcore()->preempt_count = pc; -} - -static __always_inline void set_preempt_need_resched(void) -{ -} - -static __always_inline void clear_preempt_need_resched(void) -{ -} - -static __always_inline bool test_preempt_need_resched(void) -{ - return false; -} - -static __always_inline void __preempt_count_add(int val) -{ - get_lowcore()->preempt_count += val; -} - -static __always_inline void __preempt_count_sub(int val) -{ - get_lowcore()->preempt_count -= val; -} - -static __always_inline bool __preempt_count_dec_and_test(void) -{ - return !--get_lowcore()->preempt_count && tif_need_resched(); + return __atomic_add_const_and_test(-1, &get_lowcore()->preempt_count); } +/* + * Returns true when we need to resched and can (barring IRQ state). + */ static __always_inline bool should_resched(int preempt_offset) { - return unlikely(preempt_count() == preempt_offset && - tif_need_resched()); + return unlikely(READ_ONCE(get_lowcore()->preempt_count) == preempt_offset); } -#endif /* MARCH_HAS_Z196_FEATURES */ - #define init_task_preempt_count(p) do { } while (0) /* Deferred to CPU bringup time */ #define init_idle_preempt_count(p, cpu) do { } while (0) diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h index 8761fd01a9f0..6c8063cb8fe7 100644 --- a/arch/s390/include/asm/processor.h +++ b/arch/s390/include/asm/processor.h @@ -31,6 +31,7 @@ #include <linux/cpumask.h> #include <linux/linkage.h> #include <linux/irqflags.h> +#include <linux/bitops.h> #include <asm/fpu-types.h> #include <asm/cpu.h> #include <asm/page.h> @@ -62,33 +63,27 @@ static __always_inline struct pcpu *this_pcpu(void) static __always_inline void set_cpu_flag(int flag) { - this_pcpu()->flags |= (1UL << flag); + set_bit(flag, &this_pcpu()->flags); } static __always_inline void clear_cpu_flag(int flag) { - this_pcpu()->flags &= ~(1UL << flag); + clear_bit(flag, &this_pcpu()->flags); } static __always_inline bool test_cpu_flag(int flag) { - return this_pcpu()->flags & (1UL << flag); + return test_bit(flag, &this_pcpu()->flags); } static __always_inline bool test_and_set_cpu_flag(int flag) { - if (test_cpu_flag(flag)) - return true; - set_cpu_flag(flag); - return false; + return test_and_set_bit(flag, &this_pcpu()->flags); } static __always_inline bool test_and_clear_cpu_flag(int flag) { - if (!test_cpu_flag(flag)) - return false; - clear_cpu_flag(flag); - return true; + return test_and_clear_bit(flag, &this_pcpu()->flags); } /* @@ -97,7 +92,7 @@ static __always_inline bool test_and_clear_cpu_flag(int flag) */ static __always_inline bool test_cpu_flag_of(int flag, int cpu) { - return per_cpu(pcpu_devices, cpu).flags & (1UL << flag); + return test_bit(flag, &per_cpu(pcpu_devices, cpu).flags); } #define arch_needs_cpu() test_cpu_flag(CIF_NOHZ_DELAY) @@ -163,8 +158,7 @@ static __always_inline void __stackleak_poison(unsigned long erase_low, " la %[addr],256(%[addr])\n" " brctg %[tmp],0b\n" "1: stg %[poison],0(%[addr])\n" - " larl %[tmp],3f\n" - " ex %[count],0(%[tmp])\n" + " exrl %[count],3f\n" " j 4f\n" "2: stg %[poison],0(%[addr])\n" " j 4f\n" @@ -417,7 +411,11 @@ static __always_inline bool regs_irqs_disabled(struct pt_regs *regs) static __always_inline void bpon(void) { - asm volatile(ALTERNATIVE("nop", ".insn rrf,0xb2e80000,0,0,13,0", ALT_SPEC(82))); + asm_inline volatile( + ALTERNATIVE(" nop\n", + " .insn rrf,0xb2e80000,0,0,13,0\n", + ALT_SPEC(82)) + ); } #endif /* __ASSEMBLY__ */ diff --git a/arch/s390/include/asm/ptrace.h b/arch/s390/include/asm/ptrace.h index 788bc4467445..0905fa99a31e 100644 --- a/arch/s390/include/asm/ptrace.h +++ b/arch/s390/include/asm/ptrace.h @@ -9,16 +9,15 @@ #include <linux/bits.h> #include <uapi/asm/ptrace.h> +#include <asm/thread_info.h> #include <asm/tpi.h> #define PIF_SYSCALL 0 /* inside a system call */ -#define PIF_EXECVE_PGSTE_RESTART 1 /* restart execve for PGSTE binaries */ #define PIF_SYSCALL_RET_SET 2 /* return value was set via ptrace */ #define PIF_GUEST_FAULT 3 /* indicates program check in sie64a */ #define PIF_FTRACE_FULL_REGS 4 /* all register contents valid (ftrace) */ #define _PIF_SYSCALL BIT(PIF_SYSCALL) -#define _PIF_EXECVE_PGSTE_RESTART BIT(PIF_EXECVE_PGSTE_RESTART) #define _PIF_SYSCALL_RET_SET BIT(PIF_SYSCALL_RET_SET) #define _PIF_GUEST_FAULT BIT(PIF_GUEST_FAULT) #define _PIF_FTRACE_FULL_REGS BIT(PIF_FTRACE_FULL_REGS) @@ -128,7 +127,6 @@ struct pt_regs { struct tpi_info tpi_info; }; unsigned long flags; - unsigned long cr1; unsigned long last_break; }; @@ -231,8 +229,44 @@ static inline void instruction_pointer_set(struct pt_regs *regs, int regs_query_register_offset(const char *name); const char *regs_query_register_name(unsigned int offset); -unsigned long regs_get_register(struct pt_regs *regs, unsigned int offset); -unsigned long regs_get_kernel_stack_nth(struct pt_regs *regs, unsigned int n); + +static __always_inline unsigned long kernel_stack_pointer(struct pt_regs *regs) +{ + return regs->gprs[15]; +} + +static __always_inline unsigned long regs_get_register(struct pt_regs *regs, unsigned int offset) +{ + if (offset >= NUM_GPRS) + return 0; + return regs->gprs[offset]; +} + +static __always_inline int regs_within_kernel_stack(struct pt_regs *regs, unsigned long addr) +{ + unsigned long ksp = kernel_stack_pointer(regs); + + return (addr & ~(THREAD_SIZE - 1)) == (ksp & ~(THREAD_SIZE - 1)); +} + +/** + * regs_get_kernel_stack_nth() - get Nth entry of the stack + * @regs:pt_regs which contains kernel stack pointer. + * @n:stack entry number. + * + * regs_get_kernel_stack_nth() returns @n th entry of the kernel stack which + * is specifined by @regs. If the @n th entry is NOT in the kernel stack, + * this returns 0. + */ +static __always_inline unsigned long regs_get_kernel_stack_nth(struct pt_regs *regs, unsigned int n) +{ + unsigned long addr; + + addr = kernel_stack_pointer(regs) + n * sizeof(long); + if (!regs_within_kernel_stack(regs, addr)) + return 0; + return READ_ONCE_NOCHECK(*(unsigned long *)addr); +} /** * regs_get_kernel_argument() - get Nth function argument in kernel @@ -253,11 +287,6 @@ static inline unsigned long regs_get_kernel_argument(struct pt_regs *regs, return regs_get_kernel_stack_nth(regs, argoffset + n); } -static inline unsigned long kernel_stack_pointer(struct pt_regs *regs) -{ - return regs->gprs[15]; -} - static inline void regs_set_return_value(struct pt_regs *regs, unsigned long rc) { regs->gprs[2] = rc; diff --git a/arch/s390/include/asm/sclp.h b/arch/s390/include/asm/sclp.h index eb00fa1771da..1e62919bacf4 100644 --- a/arch/s390/include/asm/sclp.h +++ b/arch/s390/include/asm/sclp.h @@ -16,6 +16,11 @@ /* 24 + 16 * SCLP_MAX_CORES */ #define EXT_SCCB_READ_CPU (3 * PAGE_SIZE) +#define SCLP_ERRNOTIFY_AQ_RESET 0 +#define SCLP_ERRNOTIFY_AQ_REPAIR 1 +#define SCLP_ERRNOTIFY_AQ_INFO_LOG 2 +#define SCLP_ERRNOTIFY_AQ_OPTICS_DATA 3 + #ifndef __ASSEMBLY__ #include <linux/uio.h> #include <asm/chpid.h> @@ -87,8 +92,10 @@ struct sclp_info { unsigned char has_kss : 1; unsigned char has_diag204_bif : 1; unsigned char has_gisaf : 1; + unsigned char has_diag310 : 1; unsigned char has_diag318 : 1; unsigned char has_diag320 : 1; + unsigned char has_diag324 : 1; unsigned char has_sipl : 1; unsigned char has_sipl_eckd : 1; unsigned char has_dirq : 1; @@ -111,6 +118,34 @@ struct sclp_info { }; extern struct sclp_info sclp; +struct sccb_header { + u16 length; + u8 function_code; + u8 control_mask[3]; + u16 response_code; +} __packed; + +struct evbuf_header { + u16 length; + u8 type; + u8 flags; + u16 _reserved; +} __packed; + +struct err_notify_evbuf { + struct evbuf_header header; + u8 action; + u8 atype; + u32 fh; + u32 fid; + u8 data[]; +} __packed; + +struct err_notify_sccb { + struct sccb_header header; + struct err_notify_evbuf evbuf; +} __packed; + struct zpci_report_error_header { u8 version; /* Interface version byte */ u8 action; /* Action qualifier byte @@ -133,10 +168,12 @@ int sclp_early_read_storage_info(void); int sclp_early_get_core_info(struct sclp_core_info *info); void sclp_early_get_ipl_info(struct sclp_ipl_info *info); void sclp_early_detect(void); +void sclp_early_detect_machine_features(void); void sclp_early_printk(const char *s); void __sclp_early_printk(const char *s, unsigned int len); void sclp_emergency_printk(const char *s); +int sclp_init(void); int sclp_early_get_memsize(unsigned long *mem); int sclp_early_get_hsa_size(unsigned long *hsa_size); int _sclp_get_core_info(struct sclp_core_info *info); diff --git a/arch/s390/include/asm/setup.h b/arch/s390/include/asm/setup.h index 70b920b32827..031e881b4d88 100644 --- a/arch/s390/include/asm/setup.h +++ b/arch/s390/include/asm/setup.h @@ -13,28 +13,6 @@ #define PARMAREA 0x10400 #define COMMAND_LINE_SIZE CONFIG_COMMAND_LINE_SIZE -/* - * Machine features detected in early.c - */ - -#define MACHINE_FLAG_VM BIT(0) -#define MACHINE_FLAG_KVM BIT(1) -#define MACHINE_FLAG_LPAR BIT(2) -#define MACHINE_FLAG_DIAG9C BIT(3) -#define MACHINE_FLAG_ESOP BIT(4) -#define MACHINE_FLAG_IDTE BIT(5) -#define MACHINE_FLAG_EDAT1 BIT(7) -#define MACHINE_FLAG_EDAT2 BIT(8) -#define MACHINE_FLAG_TOPOLOGY BIT(10) -#define MACHINE_FLAG_TE BIT(11) -#define MACHINE_FLAG_TLB_LC BIT(12) -#define MACHINE_FLAG_TLB_GUEST BIT(14) -#define MACHINE_FLAG_NX BIT(15) -#define MACHINE_FLAG_GS BIT(16) -#define MACHINE_FLAG_SCC BIT(17) -#define MACHINE_FLAG_PCI_MIO BIT(18) -#define MACHINE_FLAG_RDP BIT(19) -#define MACHINE_FLAG_SEQ_INSN BIT(20) #define LPP_MAGIC BIT(31) #define LPP_PID_MASK _AC(0xffffffff, UL) @@ -78,26 +56,6 @@ extern unsigned long max_mappable; /* The Write Back bit position in the physaddr is given by the SLPC PCI */ extern unsigned long mio_wb_bit_mask; -#define MACHINE_IS_VM (get_lowcore()->machine_flags & MACHINE_FLAG_VM) -#define MACHINE_IS_KVM (get_lowcore()->machine_flags & MACHINE_FLAG_KVM) -#define MACHINE_IS_LPAR (get_lowcore()->machine_flags & MACHINE_FLAG_LPAR) - -#define MACHINE_HAS_DIAG9C (get_lowcore()->machine_flags & MACHINE_FLAG_DIAG9C) -#define MACHINE_HAS_ESOP (get_lowcore()->machine_flags & MACHINE_FLAG_ESOP) -#define MACHINE_HAS_IDTE (get_lowcore()->machine_flags & MACHINE_FLAG_IDTE) -#define MACHINE_HAS_EDAT1 (get_lowcore()->machine_flags & MACHINE_FLAG_EDAT1) -#define MACHINE_HAS_EDAT2 (get_lowcore()->machine_flags & MACHINE_FLAG_EDAT2) -#define MACHINE_HAS_TOPOLOGY (get_lowcore()->machine_flags & MACHINE_FLAG_TOPOLOGY) -#define MACHINE_HAS_TE (get_lowcore()->machine_flags & MACHINE_FLAG_TE) -#define MACHINE_HAS_TLB_LC (get_lowcore()->machine_flags & MACHINE_FLAG_TLB_LC) -#define MACHINE_HAS_TLB_GUEST (get_lowcore()->machine_flags & MACHINE_FLAG_TLB_GUEST) -#define MACHINE_HAS_NX (get_lowcore()->machine_flags & MACHINE_FLAG_NX) -#define MACHINE_HAS_GS (get_lowcore()->machine_flags & MACHINE_FLAG_GS) -#define MACHINE_HAS_SCC (get_lowcore()->machine_flags & MACHINE_FLAG_SCC) -#define MACHINE_HAS_PCI_MIO (get_lowcore()->machine_flags & MACHINE_FLAG_PCI_MIO) -#define MACHINE_HAS_RDP (get_lowcore()->machine_flags & MACHINE_FLAG_RDP) -#define MACHINE_HAS_SEQ_INSN (get_lowcore()->machine_flags & MACHINE_FLAG_SEQ_INSN) - /* * Console mode. Override with conmode= */ diff --git a/arch/s390/include/asm/smp.h b/arch/s390/include/asm/smp.h index 7feca96c48c6..03f4d01664f8 100644 --- a/arch/s390/include/asm/smp.h +++ b/arch/s390/include/asm/smp.h @@ -7,11 +7,29 @@ #ifndef __ASM_SMP_H #define __ASM_SMP_H -#include <asm/sigp.h> -#include <asm/lowcore.h> #include <asm/processor.h> +#include <asm/lowcore.h> +#include <asm/machine.h> +#include <asm/sigp.h> + +static __always_inline unsigned int raw_smp_processor_id(void) +{ + unsigned long lc_cpu_nr; + unsigned int cpu; + + BUILD_BUG_ON(sizeof_field(struct lowcore, cpu_nr) != sizeof(cpu)); + lc_cpu_nr = offsetof(struct lowcore, cpu_nr); + asm_inline( + ALTERNATIVE(" ly %[cpu],%[offzero](%%r0)\n", + " ly %[cpu],%[offalt](%%r0)\n", + ALT_FEATURE(MFEATURE_LOWCORE)) + : [cpu] "=d" (cpu) + : [offzero] "i" (lc_cpu_nr), + [offalt] "i" (lc_cpu_nr + LOWCORE_ALT_ADDRESS), + "m" (((struct lowcore *)0)->cpu_nr)); + return cpu; +} -#define raw_smp_processor_id() (get_lowcore()->cpu_nr) #define arch_scale_cpu_capacity smp_cpu_get_capacity extern struct mutex smp_cpu_state_mutex; diff --git a/arch/s390/include/asm/spinlock.h b/arch/s390/include/asm/spinlock.h index f87dd0a84855..f9935db9fd76 100644 --- a/arch/s390/include/asm/spinlock.h +++ b/arch/s390/include/asm/spinlock.h @@ -16,7 +16,23 @@ #include <asm/processor.h> #include <asm/alternative.h> -#define SPINLOCK_LOCKVAL (get_lowcore()->spinlock_lockval) +static __always_inline unsigned int spinlock_lockval(void) +{ + unsigned long lc_lockval; + unsigned int lockval; + + BUILD_BUG_ON(sizeof_field(struct lowcore, spinlock_lockval) != sizeof(lockval)); + lc_lockval = offsetof(struct lowcore, spinlock_lockval); + asm_inline( + ALTERNATIVE(" ly %[lockval],%[offzero](%%r0)\n", + " ly %[lockval],%[offalt](%%r0)\n", + ALT_FEATURE(MFEATURE_LOWCORE)) + : [lockval] "=d" (lockval) + : [offzero] "i" (lc_lockval), + [offalt] "i" (lc_lockval + LOWCORE_ALT_ADDRESS), + "m" (((struct lowcore *)0)->spinlock_lockval)); + return lockval; +} extern int spin_retry; @@ -60,7 +76,7 @@ static inline int arch_spin_trylock_once(arch_spinlock_t *lp) int old = 0; barrier(); - return likely(arch_try_cmpxchg(&lp->lock, &old, SPINLOCK_LOCKVAL)); + return likely(arch_try_cmpxchg(&lp->lock, &old, spinlock_lockval())); } static inline void arch_spin_lock(arch_spinlock_t *lp) diff --git a/arch/s390/include/asm/string.h b/arch/s390/include/asm/string.h index 2ab868cbae6c..f8f68f4ef255 100644 --- a/arch/s390/include/asm/string.h +++ b/arch/s390/include/asm/string.h @@ -26,11 +26,9 @@ void *memmove(void *dest, const void *src, size_t n); #define __HAVE_ARCH_MEMSCAN /* inline & arch function */ #define __HAVE_ARCH_STRCAT /* inline & arch function */ #define __HAVE_ARCH_STRCMP /* arch function */ -#define __HAVE_ARCH_STRCPY /* inline & arch function */ #define __HAVE_ARCH_STRLCAT /* arch function */ #define __HAVE_ARCH_STRLEN /* inline & arch function */ #define __HAVE_ARCH_STRNCAT /* arch function */ -#define __HAVE_ARCH_STRNCPY /* arch function */ #define __HAVE_ARCH_STRNLEN /* inline & arch function */ #define __HAVE_ARCH_STRSTR /* arch function */ #define __HAVE_ARCH_MEMSET16 /* arch function */ @@ -42,7 +40,6 @@ int memcmp(const void *s1, const void *s2, size_t n); int strcmp(const char *s1, const char *s2); size_t strlcat(char *dest, const char *src, size_t n); char *strncat(char *dest, const char *src, size_t n); -char *strncpy(char *dest, const char *src, size_t n); char *strstr(const char *s1, const char *s2); #endif /* !defined(CONFIG_KASAN) && !defined(CONFIG_KMSAN) */ @@ -155,22 +152,6 @@ static inline char *strcat(char *dst, const char *src) } #endif -#ifdef __HAVE_ARCH_STRCPY -static inline char *strcpy(char *dst, const char *src) -{ - char *ret = dst; - - asm volatile( - " lghi 0,0\n" - "0: mvst %[dst],%[src]\n" - " jo 0b" - : [dst] "+&a" (dst), [src] "+&a" (src) - : - : "cc", "memory", "0"); - return ret; -} -#endif - #if defined(__HAVE_ARCH_STRLEN) || (defined(CONFIG_KASAN) && !defined(__SANITIZE_ADDRESS__)) static inline size_t __no_sanitize_prefix_strfunc(strlen)(const char *s) { @@ -208,7 +189,6 @@ static inline size_t strnlen(const char * s, size_t n) void *memchr(const void * s, int c, size_t n); void *memscan(void *s, int c, size_t n); char *strcat(char *dst, const char *src); -char *strcpy(char *dst, const char *src); size_t strlen(const char *s); size_t strnlen(const char * s, size_t n); #endif /* !IN_ARCH_STRING_C */ diff --git a/arch/s390/include/asm/syscall.h b/arch/s390/include/asm/syscall.h index 27e3d804b311..bd4cb00ccd5e 100644 --- a/arch/s390/include/asm/syscall.h +++ b/arch/s390/include/asm/syscall.h @@ -24,6 +24,18 @@ static inline long syscall_get_nr(struct task_struct *task, (regs->int_code & 0xffff) : -1; } +static inline void syscall_set_nr(struct task_struct *task, + struct pt_regs *regs, + int nr) +{ + /* + * Unlike syscall_get_nr(), syscall_set_nr() can be called only when + * the target task is stopped for tracing on entering syscall, so + * there is no need to have the same check syscall_get_nr() has. + */ + regs->int_code = (regs->int_code & ~0xffff) | (nr & 0xffff); +} + static inline void syscall_rollback(struct task_struct *task, struct pt_regs *regs) { @@ -65,19 +77,26 @@ static inline void syscall_get_arguments(struct task_struct *task, unsigned long *args) { unsigned long mask = -1UL; - unsigned int n = 6; #ifdef CONFIG_COMPAT if (test_tsk_thread_flag(task, TIF_31BIT)) mask = 0xffffffff; #endif - while (n-- > 0) - if (n > 0) - args[n] = regs->gprs[2 + n] & mask; + for (int i = 1; i < 6; i++) + args[i] = regs->gprs[2 + i] & mask; args[0] = regs->orig_gpr2 & mask; } +static inline void syscall_set_arguments(struct task_struct *task, + struct pt_regs *regs, + const unsigned long *args) +{ + regs->orig_gpr2 = args[0]; + for (int n = 1; n < 6; n++) + regs->gprs[2 + n] = args[n]; +} + static inline int syscall_get_arch(struct task_struct *task) { #ifdef CONFIG_COMPAT diff --git a/arch/s390/include/asm/sysinfo.h b/arch/s390/include/asm/sysinfo.h index edca5a751df4..9088c5267f35 100644 --- a/arch/s390/include/asm/sysinfo.h +++ b/arch/s390/include/asm/sysinfo.h @@ -11,8 +11,34 @@ #ifndef __ASM_S390_SYSINFO_H #define __ASM_S390_SYSINFO_H -#include <asm/bitsperlong.h> #include <linux/uuid.h> +#include <asm/bitsperlong.h> +#include <asm/asm.h> + +/* + * stsi - store system information + * + * Returns the current configuration level if function code 0 was specified. + * Otherwise returns 0 on success or a negative value on error. + */ +static inline int stsi(void *sysinfo, int fc, int sel1, int sel2) +{ + int r0 = (fc << 28) | sel1; + int cc; + + asm volatile( + " lr %%r0,%[r0]\n" + " lr %%r1,%[r1]\n" + " stsi %[sysinfo]\n" + " lr %[r0],%%r0\n" + CC_IPM(cc) + : CC_OUT(cc, cc), [r0] "+d" (r0), [sysinfo] "=Q" (*(char *)sysinfo) + : [r1] "d" (sel2) + : CC_CLOBBER_LIST("0", "1", "memory")); + if (cc == 3) + return -EOPNOTSUPP; + return fc ? 0 : (unsigned int)r0 >> 28; +} struct sysinfo_1_1_1 { unsigned char p:1; diff --git a/arch/s390/include/asm/thread_info.h b/arch/s390/include/asm/thread_info.h index c33f7144d1b9..391eb04d26d8 100644 --- a/arch/s390/include/asm/thread_info.h +++ b/arch/s390/include/asm/thread_info.h @@ -9,9 +9,7 @@ #define _ASM_THREAD_INFO_H #include <linux/bits.h> -#ifndef ASM_OFFSETS_C -#include <asm/asm-offsets.h> -#endif +#include <vdso/page.h> /* * General size of kernel stacks @@ -27,8 +25,6 @@ #define STACK_INIT_OFFSET (THREAD_SIZE - STACK_FRAME_OVERHEAD - __PT_SIZE) #ifndef __ASSEMBLY__ -#include <asm/lowcore.h> -#include <asm/page.h> /* * low level task data that entry.S needs immediate access to @@ -67,7 +63,7 @@ void arch_setup_new_exec(void); #define TIF_NEED_RESCHED_LAZY 3 /* lazy rescheduling needed */ #define TIF_UPROBE 4 /* breakpointed or single-stepping */ #define TIF_PATCH_PENDING 5 /* pending live patching update */ -#define TIF_PGSTE 6 /* New mm's will use 4K page tables */ +#define TIF_ASCE_PRIMARY 6 /* primary asce is kernel asce */ #define TIF_NOTIFY_SIGNAL 7 /* signal notifications exist */ #define TIF_GUARDED_STORAGE 8 /* load guarded storage control block */ #define TIF_ISOLATE_BP_GUEST 9 /* Run KVM guests with isolated BP */ @@ -89,7 +85,7 @@ void arch_setup_new_exec(void); #define _TIF_NEED_RESCHED_LAZY BIT(TIF_NEED_RESCHED_LAZY) #define _TIF_UPROBE BIT(TIF_UPROBE) #define _TIF_PATCH_PENDING BIT(TIF_PATCH_PENDING) -#define _TIF_PGSTE BIT(TIF_PGSTE) +#define _TIF_ASCE_PRIMARY BIT(TIF_ASCE_PRIMARY) #define _TIF_NOTIFY_SIGNAL BIT(TIF_NOTIFY_SIGNAL) #define _TIF_GUARDED_STORAGE BIT(TIF_GUARDED_STORAGE) #define _TIF_ISOLATE_BP_GUEST BIT(TIF_ISOLATE_BP_GUEST) diff --git a/arch/s390/include/asm/timex.h b/arch/s390/include/asm/timex.h index a9460bd6555b..bed8d0b5a282 100644 --- a/arch/s390/include/asm/timex.h +++ b/arch/s390/include/asm/timex.h @@ -13,6 +13,7 @@ #include <linux/preempt.h> #include <linux/time64.h> #include <asm/lowcore.h> +#include <asm/machine.h> #include <asm/asm.h> /* The value of the TOD clock for 1.1.1970. */ @@ -267,7 +268,7 @@ static __always_inline u128 eitod_to_ns(u128 todval) */ static inline int tod_after(unsigned long a, unsigned long b) { - if (MACHINE_HAS_SCC) + if (machine_has_scc()) return (long) a > (long) b; return a > b; } @@ -281,7 +282,7 @@ static inline int tod_after(unsigned long a, unsigned long b) */ static inline int tod_after_eq(unsigned long a, unsigned long b) { - if (MACHINE_HAS_SCC) + if (machine_has_scc()) return (long) a >= (long) b; return a >= b; } diff --git a/arch/s390/include/asm/tlb.h b/arch/s390/include/asm/tlb.h index e95b2c8081eb..1e50f6f1ad9d 100644 --- a/arch/s390/include/asm/tlb.h +++ b/arch/s390/include/asm/tlb.h @@ -22,7 +22,6 @@ * Pages used for the page tables is a different story. FIXME: more */ -void __tlb_remove_table(void *_table); static inline void tlb_flush(struct mmu_gather *tlb); static inline bool __tlb_remove_page_size(struct mmu_gather *tlb, struct page *page, bool delay_rmap, int page_size); @@ -37,11 +36,12 @@ static inline bool __tlb_remove_folio_pages(struct mmu_gather *tlb, #include <asm/tlbflush.h> #include <asm-generic/tlb.h> +#include <asm/gmap.h> /* * Release the page cache reference for a pte removed by * tlb_ptep_clear_flush. In both flush modes the tlb for a page cache page - * has already been freed, so just do free_page_and_swap_cache. + * has already been freed, so just do free_folio_and_swap_cache. * * s390 doesn't delay rmap removal. */ @@ -50,7 +50,7 @@ static inline bool __tlb_remove_page_size(struct mmu_gather *tlb, { VM_WARN_ON_ONCE(delay_rmap); - free_page_and_swap_cache(page); + free_folio_and_swap_cache(page_folio(page)); return false; } @@ -85,9 +85,9 @@ static inline void pte_free_tlb(struct mmu_gather *tlb, pgtable_t pte, tlb->mm->context.flush_mm = 1; tlb->freed_tables = 1; tlb->cleared_pmds = 1; - if (mm_alloc_pgste(tlb->mm)) + if (mm_has_pgste(tlb->mm)) gmap_unlink(tlb->mm, (unsigned long *)pte, address); - tlb_remove_ptdesc(tlb, pte); + tlb_remove_ptdesc(tlb, virt_to_ptdesc(pte)); } /* @@ -102,12 +102,11 @@ static inline void pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd, { if (mm_pmd_folded(tlb->mm)) return; - pagetable_pmd_dtor(virt_to_ptdesc(pmd)); __tlb_adjust_range(tlb, address, PAGE_SIZE); tlb->mm->context.flush_mm = 1; tlb->freed_tables = 1; tlb->cleared_puds = 1; - tlb_remove_ptdesc(tlb, pmd); + tlb_remove_ptdesc(tlb, virt_to_ptdesc(pmd)); } /* @@ -125,7 +124,7 @@ static inline void p4d_free_tlb(struct mmu_gather *tlb, p4d_t *p4d, __tlb_adjust_range(tlb, address, PAGE_SIZE); tlb->mm->context.flush_mm = 1; tlb->freed_tables = 1; - tlb_remove_ptdesc(tlb, p4d); + tlb_remove_ptdesc(tlb, virt_to_ptdesc(p4d)); } /* @@ -140,11 +139,11 @@ static inline void pud_free_tlb(struct mmu_gather *tlb, pud_t *pud, { if (mm_pud_folded(tlb->mm)) return; + __tlb_adjust_range(tlb, address, PAGE_SIZE); tlb->mm->context.flush_mm = 1; tlb->freed_tables = 1; tlb->cleared_p4ds = 1; - tlb_remove_ptdesc(tlb, pud); + tlb_remove_ptdesc(tlb, virt_to_ptdesc(pud)); } - #endif /* _S390_TLB_H */ diff --git a/arch/s390/include/asm/tlbflush.h b/arch/s390/include/asm/tlbflush.h index 9dfd46dd03c6..75491baa2197 100644 --- a/arch/s390/include/asm/tlbflush.h +++ b/arch/s390/include/asm/tlbflush.h @@ -2,9 +2,11 @@ #ifndef _S390_TLBFLUSH_H #define _S390_TLBFLUSH_H +#include <linux/cpufeature.h> #include <linux/mm.h> #include <linux/sched.h> #include <asm/processor.h> +#include <asm/machine.h> /* * Flush all TLB entries on the local CPU. @@ -22,7 +24,7 @@ static inline void __tlb_flush_idte(unsigned long asce) unsigned long opt; opt = IDTE_PTOA; - if (MACHINE_HAS_TLB_GUEST) + if (machine_has_tlb_guest()) opt |= IDTE_GUEST_ASCE; /* Global TLB flush for the mm */ asm volatile("idte 0,%1,%0" : : "a" (opt), "a" (asce) : "cc"); @@ -52,7 +54,7 @@ static inline void __tlb_flush_mm(struct mm_struct *mm) cpumask_copy(mm_cpumask(mm), &mm->context.cpu_attach_mask); barrier(); gmap_asce = READ_ONCE(mm->context.gmap_asce); - if (MACHINE_HAS_IDTE && gmap_asce != -1UL) { + if (cpu_has_idte() && gmap_asce != -1UL) { if (gmap_asce) __tlb_flush_idte(gmap_asce); __tlb_flush_idte(mm->context.asce); @@ -66,7 +68,7 @@ static inline void __tlb_flush_mm(struct mm_struct *mm) static inline void __tlb_flush_kernel(void) { - if (MACHINE_HAS_IDTE) + if (cpu_has_idte()) __tlb_flush_idte(init_mm.context.asce); else __tlb_flush_global(); diff --git a/arch/s390/include/asm/topology.h b/arch/s390/include/asm/topology.h index cef06bffad80..44110847342a 100644 --- a/arch/s390/include/asm/topology.h +++ b/arch/s390/include/asm/topology.h @@ -61,6 +61,12 @@ static inline void topology_expect_change(void) { } #endif /* CONFIG_SCHED_TOPOLOGY */ +static inline bool topology_is_primary_thread(unsigned int cpu) +{ + return smp_get_base_cpu(cpu) == cpu; +} +#define topology_is_primary_thread topology_is_primary_thread + #define POLARIZATION_UNKNOWN (-1) #define POLARIZATION_HRZ (0) #define POLARIZATION_VL (1) diff --git a/arch/s390/include/asm/uaccess.h b/arch/s390/include/asm/uaccess.h index a81f897a81ce..a43fc88c0050 100644 --- a/arch/s390/include/asm/uaccess.h +++ b/arch/s390/include/asm/uaccess.h @@ -13,25 +13,81 @@ /* * User space memory access functions */ +#include <linux/pgtable.h> #include <asm/asm-extable.h> #include <asm/processor.h> #include <asm/extable.h> #include <asm/facility.h> #include <asm-generic/access_ok.h> +#include <asm/asce.h> #include <linux/instrumented.h> void debug_user_asce(int exit); -unsigned long __must_check -raw_copy_from_user(void *to, const void __user *from, unsigned long n); - -unsigned long __must_check -raw_copy_to_user(void __user *to, const void *from, unsigned long n); +#ifdef CONFIG_KMSAN +#define uaccess_kmsan_or_inline noinline __maybe_unused __no_sanitize_memory +#else +#define uaccess_kmsan_or_inline __always_inline +#endif -#ifndef CONFIG_KASAN #define INLINE_COPY_FROM_USER #define INLINE_COPY_TO_USER -#endif + +static uaccess_kmsan_or_inline __must_check unsigned long +raw_copy_from_user(void *to, const void __user *from, unsigned long size) +{ + unsigned long osize; + int cc; + + while (1) { + osize = size; + asm_inline volatile( + " lhi %%r0,%[spec]\n" + "0: mvcos %[to],%[from],%[size]\n" + "1: nopr %%r7\n" + CC_IPM(cc) + EX_TABLE_UA_MVCOS_FROM(0b, 0b) + EX_TABLE_UA_MVCOS_FROM(1b, 0b) + : CC_OUT(cc, cc), [size] "+d" (size), [to] "=Q" (*(char *)to) + : [spec] "I" (0x81), [from] "Q" (*(const char __user *)from) + : CC_CLOBBER_LIST("memory", "0")); + if (__builtin_constant_p(osize) && osize <= 4096) + return osize - size; + if (likely(CC_TRANSFORM(cc) == 0)) + return osize - size; + size -= 4096; + to += 4096; + from += 4096; + } +} + +static uaccess_kmsan_or_inline __must_check unsigned long +raw_copy_to_user(void __user *to, const void *from, unsigned long size) +{ + unsigned long osize; + int cc; + + while (1) { + osize = size; + asm_inline volatile( + " llilh %%r0,%[spec]\n" + "0: mvcos %[to],%[from],%[size]\n" + "1: nopr %%r7\n" + CC_IPM(cc) + EX_TABLE_UA_MVCOS_TO(0b, 0b) + EX_TABLE_UA_MVCOS_TO(1b, 0b) + : CC_OUT(cc, cc), [size] "+d" (size), [to] "=Q" (*(char __user *)to) + : [spec] "I" (0x81), [from] "Q" (*(const char *)from) + : CC_CLOBBER_LIST("memory", "0")); + if (__builtin_constant_p(osize) && osize <= 4096) + return osize - size; + if (likely(CC_TRANSFORM(cc) == 0)) + return osize - size; + size -= 4096; + to += 4096; + from += 4096; + } +} unsigned long __must_check _copy_from_user_key(void *to, const void __user *from, unsigned long n, unsigned long key); @@ -55,63 +111,65 @@ copy_to_user_key(void __user *to, const void *from, unsigned long n, unsigned lo return n; } -union oac { - unsigned int val; - struct { - struct { - unsigned short key : 4; - unsigned short : 4; - unsigned short as : 2; - unsigned short : 4; - unsigned short k : 1; - unsigned short a : 1; - } oac1; - struct { - unsigned short key : 4; - unsigned short : 4; - unsigned short as : 2; - unsigned short : 4; - unsigned short k : 1; - unsigned short a : 1; - } oac2; - }; -}; - int __noreturn __put_user_bad(void); -#ifdef CONFIG_KMSAN -#define get_put_user_noinstr_attributes \ - noinline __maybe_unused __no_sanitize_memory -#else -#define get_put_user_noinstr_attributes __always_inline -#endif +#ifdef CONFIG_CC_HAS_ASM_GOTO_OUTPUT -#define DEFINE_PUT_USER(type) \ -static get_put_user_noinstr_attributes int \ +#define DEFINE_PUT_USER_NOINSTR(type) \ +static uaccess_kmsan_or_inline int \ +__put_user_##type##_noinstr(unsigned type __user *to, \ + unsigned type *from, \ + unsigned long size) \ +{ \ + asm goto( \ + " llilh %%r0,%[spec]\n" \ + "0: mvcos %[to],%[from],%[size]\n" \ + "1: nopr %%r7\n" \ + EX_TABLE(0b, %l[Efault]) \ + EX_TABLE(1b, %l[Efault]) \ + : [to] "+Q" (*to) \ + : [size] "d" (size), [from] "Q" (*from), \ + [spec] "I" (0x81) \ + : "cc", "0" \ + : Efault \ + ); \ + return 0; \ +Efault: \ + return -EFAULT; \ +} + +#else /* CONFIG_CC_HAS_ASM_GOTO_OUTPUT */ + +#define DEFINE_PUT_USER_NOINSTR(type) \ +static uaccess_kmsan_or_inline int \ __put_user_##type##_noinstr(unsigned type __user *to, \ unsigned type *from, \ unsigned long size) \ { \ - union oac __oac_spec = { \ - .oac1.as = PSW_BITS_AS_SECONDARY, \ - .oac1.a = 1, \ - }; \ int rc; \ \ - asm volatile( \ - " lr 0,%[spec]\n" \ - "0: mvcos %[_to],%[_from],%[_size]\n" \ - "1: xr %[rc],%[rc]\n" \ + asm_inline volatile( \ + " llilh %%r0,%[spec]\n" \ + "0: mvcos %[to],%[from],%[size]\n" \ + "1: lhi %[rc],0\n" \ "2:\n" \ - EX_TABLE_UA_STORE(0b, 2b, %[rc]) \ - EX_TABLE_UA_STORE(1b, 2b, %[rc]) \ - : [rc] "=&d" (rc), [_to] "+Q" (*(to)) \ - : [_size] "d" (size), [_from] "Q" (*(from)), \ - [spec] "d" (__oac_spec.val) \ + EX_TABLE_UA_FAULT(0b, 2b, %[rc]) \ + EX_TABLE_UA_FAULT(1b, 2b, %[rc]) \ + : [rc] "=d" (rc), [to] "+Q" (*to) \ + : [size] "d" (size), [from] "Q" (*from), \ + [spec] "I" (0x81) \ : "cc", "0"); \ return rc; \ -} \ - \ +} + +#endif /* CONFIG_CC_HAS_ASM_GOTO_OUTPUT */ + +DEFINE_PUT_USER_NOINSTR(char); +DEFINE_PUT_USER_NOINSTR(short); +DEFINE_PUT_USER_NOINSTR(int); +DEFINE_PUT_USER_NOINSTR(long); + +#define DEFINE_PUT_USER(type) \ static __always_inline int \ __put_user_##type(unsigned type __user *to, unsigned type *from, \ unsigned long size) \ @@ -128,69 +186,111 @@ DEFINE_PUT_USER(short); DEFINE_PUT_USER(int); DEFINE_PUT_USER(long); -static __always_inline int __put_user_fn(void *x, void __user *ptr, unsigned long size) -{ - int rc; +#define __put_user(x, ptr) \ +({ \ + __typeof__(*(ptr)) __x = (x); \ + int __prc; \ + \ + __chk_user_ptr(ptr); \ + switch (sizeof(*(ptr))) { \ + case 1: \ + __prc = __put_user_char((unsigned char __user *)(ptr), \ + (unsigned char *)&__x, \ + sizeof(*(ptr))); \ + break; \ + case 2: \ + __prc = __put_user_short((unsigned short __user *)(ptr),\ + (unsigned short *)&__x, \ + sizeof(*(ptr))); \ + break; \ + case 4: \ + __prc = __put_user_int((unsigned int __user *)(ptr), \ + (unsigned int *)&__x, \ + sizeof(*(ptr))); \ + break; \ + case 8: \ + __prc = __put_user_long((unsigned long __user *)(ptr), \ + (unsigned long *)&__x, \ + sizeof(*(ptr))); \ + break; \ + default: \ + __prc = __put_user_bad(); \ + break; \ + } \ + __builtin_expect(__prc, 0); \ +}) - switch (size) { - case 1: - rc = __put_user_char((unsigned char __user *)ptr, - (unsigned char *)x, - size); - break; - case 2: - rc = __put_user_short((unsigned short __user *)ptr, - (unsigned short *)x, - size); - break; - case 4: - rc = __put_user_int((unsigned int __user *)ptr, - (unsigned int *)x, - size); - break; - case 8: - rc = __put_user_long((unsigned long __user *)ptr, - (unsigned long *)x, - size); - break; - default: - __put_user_bad(); - break; - } - return rc; -} +#define put_user(x, ptr) \ +({ \ + might_fault(); \ + __put_user(x, ptr); \ +}) int __noreturn __get_user_bad(void); -#define DEFINE_GET_USER(type) \ -static get_put_user_noinstr_attributes int \ +#ifdef CONFIG_CC_HAS_ASM_GOTO_OUTPUT + +#define DEFINE_GET_USER_NOINSTR(type) \ +static uaccess_kmsan_or_inline int \ __get_user_##type##_noinstr(unsigned type *to, \ - unsigned type __user *from, \ + const unsigned type __user *from, \ + unsigned long size) \ +{ \ + asm goto( \ + " lhi %%r0,%[spec]\n" \ + "0: mvcos %[to],%[from],%[size]\n" \ + "1: nopr %%r7\n" \ + EX_TABLE(0b, %l[Efault]) \ + EX_TABLE(1b, %l[Efault]) \ + : [to] "=Q" (*to) \ + : [size] "d" (size), [from] "Q" (*from), \ + [spec] "I" (0x81) \ + : "cc", "0" \ + : Efault \ + ); \ + return 0; \ +Efault: \ + *to = 0; \ + return -EFAULT; \ +} + +#else /* CONFIG_CC_HAS_ASM_GOTO_OUTPUT */ + +#define DEFINE_GET_USER_NOINSTR(type) \ +static uaccess_kmsan_or_inline int \ +__get_user_##type##_noinstr(unsigned type *to, \ + const unsigned type __user *from, \ unsigned long size) \ { \ - union oac __oac_spec = { \ - .oac2.as = PSW_BITS_AS_SECONDARY, \ - .oac2.a = 1, \ - }; \ int rc; \ \ - asm volatile( \ - " lr 0,%[spec]\n" \ - "0: mvcos 0(%[_to]),%[_from],%[_size]\n" \ - "1: xr %[rc],%[rc]\n" \ + asm_inline volatile( \ + " lhi %%r0,%[spec]\n" \ + "0: mvcos %[to],%[from],%[size]\n" \ + "1: lhi %[rc],0\n" \ "2:\n" \ - EX_TABLE_UA_LOAD_MEM(0b, 2b, %[rc], %[_to], %[_ksize]) \ - EX_TABLE_UA_LOAD_MEM(1b, 2b, %[rc], %[_to], %[_ksize]) \ - : [rc] "=&d" (rc), "=Q" (*(to)) \ - : [_size] "d" (size), [_from] "Q" (*(from)), \ - [spec] "d" (__oac_spec.val), [_to] "a" (to), \ - [_ksize] "K" (size) \ + EX_TABLE_UA_FAULT(0b, 2b, %[rc]) \ + EX_TABLE_UA_FAULT(1b, 2b, %[rc]) \ + : [rc] "=d" (rc), [to] "=Q" (*to) \ + : [size] "d" (size), [from] "Q" (*from), \ + [spec] "I" (0x81) \ : "cc", "0"); \ + if (likely(!rc)) \ + return 0; \ + *to = 0; \ return rc; \ -} \ - \ +} + +#endif /* CONFIG_CC_HAS_ASM_GOTO_OUTPUT */ + +DEFINE_GET_USER_NOINSTR(char); +DEFINE_GET_USER_NOINSTR(short); +DEFINE_GET_USER_NOINSTR(int); +DEFINE_GET_USER_NOINSTR(long); + +#define DEFINE_GET_USER(type) \ static __always_inline int \ -__get_user_##type(unsigned type *to, unsigned type __user *from, \ +__get_user_##type(unsigned type *to, const unsigned type __user *from, \ unsigned long size) \ { \ int rc; \ @@ -205,107 +305,50 @@ DEFINE_GET_USER(short); DEFINE_GET_USER(int); DEFINE_GET_USER(long); -static __always_inline int __get_user_fn(void *x, const void __user *ptr, unsigned long size) -{ - int rc; - - switch (size) { - case 1: - rc = __get_user_char((unsigned char *)x, - (unsigned char __user *)ptr, - size); - break; - case 2: - rc = __get_user_short((unsigned short *)x, - (unsigned short __user *)ptr, - size); - break; - case 4: - rc = __get_user_int((unsigned int *)x, - (unsigned int __user *)ptr, - size); - break; - case 8: - rc = __get_user_long((unsigned long *)x, - (unsigned long __user *)ptr, - size); - break; - default: - __get_user_bad(); - break; - } - return rc; -} - -/* - * These are the main single-value transfer routines. They automatically - * use the right size if we just have the right pointer type. - */ -#define __put_user(x, ptr) \ -({ \ - __typeof__(*(ptr)) __x = (x); \ - int __pu_err = -EFAULT; \ - \ - __chk_user_ptr(ptr); \ - switch (sizeof(*(ptr))) { \ - case 1: \ - case 2: \ - case 4: \ - case 8: \ - __pu_err = __put_user_fn(&__x, ptr, sizeof(*(ptr))); \ - break; \ - default: \ - __put_user_bad(); \ - break; \ - } \ - __builtin_expect(__pu_err, 0); \ -}) - -#define put_user(x, ptr) \ -({ \ - might_fault(); \ - __put_user(x, ptr); \ -}) - #define __get_user(x, ptr) \ ({ \ - int __gu_err = -EFAULT; \ + const __user void *____guptr = (ptr); \ + int __grc; \ \ __chk_user_ptr(ptr); \ switch (sizeof(*(ptr))) { \ case 1: { \ + const unsigned char __user *__guptr = ____guptr; \ unsigned char __x; \ \ - __gu_err = __get_user_fn(&__x, ptr, sizeof(*(ptr))); \ + __grc = __get_user_char(&__x, __guptr, sizeof(*(ptr))); \ (x) = *(__force __typeof__(*(ptr)) *)&__x; \ break; \ }; \ case 2: { \ + const unsigned short __user *__guptr = ____guptr; \ unsigned short __x; \ \ - __gu_err = __get_user_fn(&__x, ptr, sizeof(*(ptr))); \ + __grc = __get_user_short(&__x, __guptr, sizeof(*(ptr)));\ (x) = *(__force __typeof__(*(ptr)) *)&__x; \ break; \ }; \ case 4: { \ + const unsigned int __user *__guptr = ____guptr; \ unsigned int __x; \ \ - __gu_err = __get_user_fn(&__x, ptr, sizeof(*(ptr))); \ + __grc = __get_user_int(&__x, __guptr, sizeof(*(ptr))); \ (x) = *(__force __typeof__(*(ptr)) *)&__x; \ break; \ }; \ case 8: { \ + const unsigned long __user *__guptr = ____guptr; \ unsigned long __x; \ \ - __gu_err = __get_user_fn(&__x, ptr, sizeof(*(ptr))); \ + __grc = __get_user_long(&__x, __guptr, sizeof(*(ptr))); \ (x) = *(__force __typeof__(*(ptr)) *)&__x; \ break; \ }; \ default: \ - __get_user_bad(); \ + __grc = __get_user_bad(); \ break; \ } \ - __builtin_expect(__gu_err, 0); \ + __builtin_expect(__grc, 0); \ }) #define get_user(x, ptr) \ @@ -321,12 +364,34 @@ long __must_check strncpy_from_user(char *dst, const char __user *src, long coun long __must_check strnlen_user(const char __user *src, long count); -/* - * Zero Userspace - */ -unsigned long __must_check __clear_user(void __user *to, unsigned long size); +static uaccess_kmsan_or_inline __must_check unsigned long +__clear_user(void __user *to, unsigned long size) +{ + unsigned long osize; + int cc; + + while (1) { + osize = size; + asm_inline volatile( + " llilh %%r0,%[spec]\n" + "0: mvcos %[to],%[from],%[size]\n" + "1: nopr %%r7\n" + CC_IPM(cc) + EX_TABLE_UA_MVCOS_TO(0b, 0b) + EX_TABLE_UA_MVCOS_TO(1b, 0b) + : CC_OUT(cc, cc), [size] "+d" (size), [to] "=Q" (*(char __user *)to) + : [spec] "I" (0x81), [from] "Q" (*(const char *)empty_zero_page) + : CC_CLOBBER_LIST("memory", "0")); + if (__builtin_constant_p(osize) && osize <= 4096) + return osize - size; + if (CC_TRANSFORM(cc) == 0) + return osize - size; + size -= 4096; + to += 4096; + } +} -static inline unsigned long __must_check clear_user(void __user *to, unsigned long n) +static __always_inline unsigned long __must_check clear_user(void __user *to, unsigned long n) { might_fault(); return __clear_user(to, n); @@ -341,109 +406,71 @@ static inline void *s390_kernel_write(void *dst, const void *src, size_t size) return __s390_kernel_write(dst, src, size); } -int __noreturn __put_kernel_bad(void); +void __noreturn __mvc_kernel_nofault_bad(void); -#define __put_kernel_asm(val, to, insn) \ -({ \ - int __rc; \ - \ - asm volatile( \ - "0: " insn " %[_val],%[_to]\n" \ - "1: xr %[rc],%[rc]\n" \ - "2:\n" \ - EX_TABLE_UA_STORE(0b, 2b, %[rc]) \ - EX_TABLE_UA_STORE(1b, 2b, %[rc]) \ - : [rc] "=d" (__rc), [_to] "+Q" (*(to)) \ - : [_val] "d" (val) \ - : "cc"); \ - __rc; \ -}) +#if defined(CONFIG_CC_HAS_ASM_GOTO_OUTPUT) && defined(CONFIG_CC_HAS_ASM_AOR_FORMAT_FLAGS) -#define __put_kernel_nofault(dst, src, type, err_label) \ +#define __mvc_kernel_nofault(dst, src, type, err_label) \ do { \ - unsigned long __x = (unsigned long)(*((type *)(src))); \ - int __pk_err; \ - \ switch (sizeof(type)) { \ case 1: \ - __pk_err = __put_kernel_asm(__x, (type *)(dst), "stc"); \ - break; \ case 2: \ - __pk_err = __put_kernel_asm(__x, (type *)(dst), "sth"); \ - break; \ case 4: \ - __pk_err = __put_kernel_asm(__x, (type *)(dst), "st"); \ - break; \ case 8: \ - __pk_err = __put_kernel_asm(__x, (type *)(dst), "stg"); \ + asm goto( \ + "0: mvc %O[_dst](%[_len],%R[_dst]),%[_src]\n" \ + "1: nopr %%r7\n" \ + EX_TABLE(0b, %l[err_label]) \ + EX_TABLE(1b, %l[err_label]) \ + : [_dst] "=Q" (*(type *)dst) \ + : [_src] "Q" (*(type *)(src)), \ + [_len] "I" (sizeof(type)) \ + : \ + : err_label); \ break; \ default: \ - __pk_err = __put_kernel_bad(); \ + __mvc_kernel_nofault_bad(); \ break; \ } \ - if (unlikely(__pk_err)) \ - goto err_label; \ } while (0) -int __noreturn __get_kernel_bad(void); +#else /* CONFIG_CC_HAS_ASM_GOTO_OUTPUT) && CONFIG_CC_HAS_ASM_AOR_FORMAT_FLAGS */ -#define __get_kernel_asm(val, from, insn) \ -({ \ - int __rc; \ - \ - asm volatile( \ - "0: " insn " %[_val],%[_from]\n" \ - "1: xr %[rc],%[rc]\n" \ - "2:\n" \ - EX_TABLE_UA_LOAD_REG(0b, 2b, %[rc], %[_val]) \ - EX_TABLE_UA_LOAD_REG(1b, 2b, %[rc], %[_val]) \ - : [rc] "=d" (__rc), [_val] "=d" (val) \ - : [_from] "Q" (*(from)) \ - : "cc"); \ - __rc; \ -}) - -#define __get_kernel_nofault(dst, src, type, err_label) \ +#define __mvc_kernel_nofault(dst, src, type, err_label) \ do { \ - int __gk_err; \ + type *(__dst) = (type *)(dst); \ + int __rc; \ \ switch (sizeof(type)) { \ - case 1: { \ - unsigned char __x; \ - \ - __gk_err = __get_kernel_asm(__x, (type *)(src), "ic"); \ - *((type *)(dst)) = (type)__x; \ - break; \ - }; \ - case 2: { \ - unsigned short __x; \ - \ - __gk_err = __get_kernel_asm(__x, (type *)(src), "lh"); \ - *((type *)(dst)) = (type)__x; \ - break; \ - }; \ - case 4: { \ - unsigned int __x; \ - \ - __gk_err = __get_kernel_asm(__x, (type *)(src), "l"); \ - *((type *)(dst)) = (type)__x; \ - break; \ - }; \ - case 8: { \ - unsigned long __x; \ - \ - __gk_err = __get_kernel_asm(__x, (type *)(src), "lg"); \ - *((type *)(dst)) = (type)__x; \ + case 1: \ + case 2: \ + case 4: \ + case 8: \ + asm_inline volatile( \ + "0: mvc 0(%[_len],%[_dst]),%[_src]\n" \ + "1: lhi %[_rc],0\n" \ + "2:\n" \ + EX_TABLE_UA_FAULT(0b, 2b, %[_rc]) \ + EX_TABLE_UA_FAULT(1b, 2b, %[_rc]) \ + : [_rc] "=d" (__rc), \ + "=m" (*__dst) \ + : [_src] "Q" (*(type *)(src)), \ + [_dst] "a" (__dst), \ + [_len] "I" (sizeof(type))); \ + if (__rc) \ + goto err_label; \ break; \ - }; \ default: \ - __gk_err = __get_kernel_bad(); \ + __mvc_kernel_nofault_bad(); \ break; \ } \ - if (unlikely(__gk_err)) \ - goto err_label; \ } while (0) +#endif /* CONFIG_CC_HAS_ASM_GOTO_OUTPUT && CONFIG_CC_HAS_ASM_AOR_FORMAT_FLAGS */ + +#define __get_kernel_nofault __mvc_kernel_nofault +#define __put_kernel_nofault __mvc_kernel_nofault + void __cmpxchg_user_key_called_with_bad_pointer(void); #define CMPXCHG_USER_KEY_MAX_LOOPS 128 @@ -452,6 +479,7 @@ static __always_inline int __cmpxchg_user_key(unsigned long address, void *uval, __uint128_t old, __uint128_t new, unsigned long key, int size) { + bool sacf_flag; int rc = 0; switch (size) { @@ -464,7 +492,8 @@ static __always_inline int __cmpxchg_user_key(unsigned long address, void *uval, _old = ((unsigned int)old & 0xff) << shift; _new = ((unsigned int)new & 0xff) << shift; mask = ~(0xff << shift); - asm volatile( + sacf_flag = enable_sacf_uaccess(); + asm_inline volatile( " spka 0(%[key])\n" " sacf 256\n" " llill %[count],%[max_loops]\n" @@ -498,6 +527,7 @@ static __always_inline int __cmpxchg_user_key(unsigned long address, void *uval, [default_key] "J" (PAGE_DEFAULT_KEY), [max_loops] "J" (CMPXCHG_USER_KEY_MAX_LOOPS) : "memory", "cc"); + disable_sacf_uaccess(sacf_flag); *(unsigned char *)uval = prev >> shift; if (!count) rc = -EAGAIN; @@ -512,7 +542,8 @@ static __always_inline int __cmpxchg_user_key(unsigned long address, void *uval, _old = ((unsigned int)old & 0xffff) << shift; _new = ((unsigned int)new & 0xffff) << shift; mask = ~(0xffff << shift); - asm volatile( + sacf_flag = enable_sacf_uaccess(); + asm_inline volatile( " spka 0(%[key])\n" " sacf 256\n" " llill %[count],%[max_loops]\n" @@ -546,6 +577,7 @@ static __always_inline int __cmpxchg_user_key(unsigned long address, void *uval, [default_key] "J" (PAGE_DEFAULT_KEY), [max_loops] "J" (CMPXCHG_USER_KEY_MAX_LOOPS) : "memory", "cc"); + disable_sacf_uaccess(sacf_flag); *(unsigned short *)uval = prev >> shift; if (!count) rc = -EAGAIN; @@ -554,7 +586,8 @@ static __always_inline int __cmpxchg_user_key(unsigned long address, void *uval, case 4: { unsigned int prev = old; - asm volatile( + sacf_flag = enable_sacf_uaccess(); + asm_inline volatile( " spka 0(%[key])\n" " sacf 256\n" "0: cs %[prev],%[new],%[address]\n" @@ -569,13 +602,15 @@ static __always_inline int __cmpxchg_user_key(unsigned long address, void *uval, [key] "a" (key << 4), [default_key] "J" (PAGE_DEFAULT_KEY) : "memory", "cc"); + disable_sacf_uaccess(sacf_flag); *(unsigned int *)uval = prev; return rc; } case 8: { unsigned long prev = old; - asm volatile( + sacf_flag = enable_sacf_uaccess(); + asm_inline volatile( " spka 0(%[key])\n" " sacf 256\n" "0: csg %[prev],%[new],%[address]\n" @@ -590,13 +625,15 @@ static __always_inline int __cmpxchg_user_key(unsigned long address, void *uval, [key] "a" (key << 4), [default_key] "J" (PAGE_DEFAULT_KEY) : "memory", "cc"); + disable_sacf_uaccess(sacf_flag); *(unsigned long *)uval = prev; return rc; } case 16: { __uint128_t prev = old; - asm volatile( + sacf_flag = enable_sacf_uaccess(); + asm_inline volatile( " spka 0(%[key])\n" " sacf 256\n" "0: cdsg %[prev],%[new],%[address]\n" @@ -611,6 +648,7 @@ static __always_inline int __cmpxchg_user_key(unsigned long address, void *uval, [key] "a" (key << 4), [default_key] "J" (PAGE_DEFAULT_KEY) : "memory", "cc"); + disable_sacf_uaccess(sacf_flag); *(__uint128_t *)uval = prev; return rc; } diff --git a/arch/s390/include/asm/uv.h b/arch/s390/include/asm/uv.h index dc332609f2c3..8018549a1ad2 100644 --- a/arch/s390/include/asm/uv.h +++ b/arch/s390/include/asm/uv.h @@ -16,7 +16,6 @@ #include <linux/bug.h> #include <linux/sched.h> #include <asm/page.h> -#include <asm/gmap.h> #include <asm/asm.h> #define UVC_CC_OK 0 @@ -616,8 +615,9 @@ static inline int uv_remove_shared(unsigned long addr) return share(addr, UVC_CMD_REMOVE_SHARED_ACCESS); } -int uv_get_secret_metadata(const u8 secret_id[UV_SECRET_ID_LEN], - struct uv_secret_list_item_hdr *secret); +int uv_find_secret(const u8 secret_id[UV_SECRET_ID_LEN], + struct uv_secret_list *list, + struct uv_secret_list_item_hdr *secret); int uv_retrieve_secret(u16 secret_idx, u8 *buf, size_t buf_size); extern int prot_virt_host; @@ -628,12 +628,12 @@ static inline int is_prot_virt_host(void) } int uv_pin_shared(unsigned long paddr); -int gmap_make_secure(struct gmap *gmap, unsigned long gaddr, void *uvcb); -int gmap_destroy_page(struct gmap *gmap, unsigned long gaddr); int uv_destroy_folio(struct folio *folio); int uv_destroy_pte(pte_t pte); int uv_convert_from_secure_pte(pte_t pte); -int gmap_convert_to_secure(struct gmap *gmap, unsigned long gaddr); +int make_hva_secure(struct mm_struct *mm, unsigned long hva, struct uv_cb_header *uvcb); +int uv_convert_from_secure(unsigned long paddr); +int uv_convert_from_secure_folio(struct folio *folio); void setup_uv(void); diff --git a/arch/s390/include/asm/vdso.h b/arch/s390/include/asm/vdso.h index 92c73e4d97a9..420a073fdde5 100644 --- a/arch/s390/include/asm/vdso.h +++ b/arch/s390/include/asm/vdso.h @@ -6,13 +6,11 @@ #ifndef __ASSEMBLY__ -extern struct vdso_data *vdso_data; - int vdso_getcpu_init(void); #endif /* __ASSEMBLY__ */ -#define __VVAR_PAGES 2 +#define __VDSO_PAGES 4 #define VDSO_VERSION_STRING LINUX_2.6.29 diff --git a/arch/s390/include/asm/vdso/getrandom.h b/arch/s390/include/asm/vdso/getrandom.h index 36355af7160b..f8713ce39bb2 100644 --- a/arch/s390/include/asm/vdso/getrandom.h +++ b/arch/s390/include/asm/vdso/getrandom.h @@ -23,18 +23,6 @@ static __always_inline ssize_t getrandom_syscall(void *buffer, size_t len, unsig return syscall3(__NR_getrandom, (long)buffer, (long)len, (long)flags); } -static __always_inline const struct vdso_rng_data *__arch_get_vdso_rng_data(void) -{ - /* - * The RNG data is in the real VVAR data page, but if a task belongs to a time namespace - * then VVAR_DATA_PAGE_OFFSET points to the namespace-specific VVAR page and VVAR_TIMENS_ - * PAGE_OFFSET points to the real VVAR page. - */ - if (IS_ENABLED(CONFIG_TIME_NS) && _vdso_data->clock_mode == VDSO_CLOCKMODE_TIMENS) - return (void *)&_vdso_rng_data + VVAR_TIMENS_PAGE_OFFSET * PAGE_SIZE; - return &_vdso_rng_data; -} - #endif /* !__ASSEMBLY__ */ #endif /* __ASM_VDSO_GETRANDOM_H */ diff --git a/arch/s390/include/asm/vdso/gettimeofday.h b/arch/s390/include/asm/vdso/gettimeofday.h index 7937765ccfa5..fb4564308e9d 100644 --- a/arch/s390/include/asm/vdso/gettimeofday.h +++ b/arch/s390/include/asm/vdso/gettimeofday.h @@ -14,12 +14,7 @@ #include <linux/compiler.h> -static __always_inline const struct vdso_data *__arch_get_vdso_data(void) -{ - return _vdso_data; -} - -static inline u64 __arch_get_hw_counter(s32 clock_mode, const struct vdso_data *vd) +static inline u64 __arch_get_hw_counter(s32 clock_mode, const struct vdso_time_data *vd) { u64 adj, now; @@ -49,12 +44,4 @@ long clock_getres_fallback(clockid_t clkid, struct __kernel_timespec *ts) return syscall2(__NR_clock_getres, (long)clkid, (long)ts); } -#ifdef CONFIG_TIME_NS -static __always_inline -const struct vdso_data *__arch_get_timens_vdso_data(const struct vdso_data *vd) -{ - return _timens_data; -} -#endif - #endif diff --git a/arch/s390/include/asm/vdso/vsyscall.h b/arch/s390/include/asm/vdso/vsyscall.h index 3eb576ecd3bd..d346ebe51301 100644 --- a/arch/s390/include/asm/vdso/vsyscall.h +++ b/arch/s390/include/asm/vdso/vsyscall.h @@ -2,32 +2,12 @@ #ifndef __ASM_VDSO_VSYSCALL_H #define __ASM_VDSO_VSYSCALL_H -#define __VDSO_RND_DATA_OFFSET 768 - #ifndef __ASSEMBLY__ #include <linux/hrtimer.h> #include <vdso/datapage.h> #include <asm/vdso.h> -enum vvar_pages { - VVAR_DATA_PAGE_OFFSET, - VVAR_TIMENS_PAGE_OFFSET, - VVAR_NR_PAGES -}; - -static __always_inline struct vdso_data *__s390_get_k_vdso_data(void) -{ - return vdso_data; -} -#define __arch_get_k_vdso_data __s390_get_k_vdso_data - -static __always_inline struct vdso_rng_data *__s390_get_k_vdso_rnd_data(void) -{ - return (void *)vdso_data + __VDSO_RND_DATA_OFFSET; -} -#define __arch_get_k_vdso_rng_data __s390_get_k_vdso_rnd_data - /* The asm-generic header needs to be included after the definitions above */ #include <asm-generic/vdso/vsyscall.h> diff --git a/arch/s390/include/asm/word-at-a-time.h b/arch/s390/include/asm/word-at-a-time.h index 203acd6e431b..eaa19dee7699 100644 --- a/arch/s390/include/asm/word-at-a-time.h +++ b/arch/s390/include/asm/word-at-a-time.h @@ -52,7 +52,7 @@ static inline unsigned long load_unaligned_zeropad(const void *addr) { unsigned long data; - asm volatile( + asm_inline volatile( "0: lg %[data],0(%[addr])\n" "1: nopr %%r7\n" EX_TABLE_ZEROPAD(0b, 1b, %[data], %[addr]) diff --git a/arch/s390/include/uapi/asm/diag.h b/arch/s390/include/uapi/asm/diag.h new file mode 100644 index 000000000000..b7e6ccb4ff6e --- /dev/null +++ b/arch/s390/include/uapi/asm/diag.h @@ -0,0 +1,32 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * Diag ioctls and its associated structures definitions. + * + * Copyright IBM Corp. 2024 + */ + +#ifndef __S390_UAPI_ASM_DIAG_H +#define __S390_UAPI_ASM_DIAG_H + +#include <linux/types.h> + +#define DIAG_MAGIC_STR 'D' + +struct diag324_pib { + __u64 address; + __u64 sequence; +}; + +struct diag310_memtop { + __u64 address; + __u64 nesting_lvl; +}; + +/* Diag ioctl definitions */ +#define DIAG324_GET_PIBBUF _IOWR(DIAG_MAGIC_STR, 0x77, struct diag324_pib) +#define DIAG324_GET_PIBLEN _IOR(DIAG_MAGIC_STR, 0x78, size_t) +#define DIAG310_GET_STRIDE _IOR(DIAG_MAGIC_STR, 0x79, size_t) +#define DIAG310_GET_MEMTOPLEN _IOWR(DIAG_MAGIC_STR, 0x7a, size_t) +#define DIAG310_GET_MEMTOPBUF _IOWR(DIAG_MAGIC_STR, 0x7b, struct diag310_memtop) + +#endif /* __S390_UAPI_ASM_DIAG_H */ diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile index 48caae8c7e10..ea5ed6654050 100644 --- a/arch/s390/kernel/Makefile +++ b/arch/s390/kernel/Makefile @@ -38,14 +38,15 @@ CFLAGS_unwind_bc.o += -fno-optimize-sibling-calls obj-y := head64.o traps.o time.o process.o early.o setup.o idle.o vtime.o obj-y += processor.o syscall.o ptrace.o signal.o cpcmd.o ebcdic.o nmi.o -obj-y += debug.o irq.o ipl.o dis.o diag.o vdso.o cpufeature.o +obj-y += debug.o irq.o ipl.o dis.o vdso.o cpufeature.o obj-y += sysinfo.o lgr.o os_info.o ctlreg.o obj-y += runtime_instr.o cache.o fpu.o dumpstack.o guarded_storage.o sthyi.o obj-y += entry.o reipl.o kdebugfs.o alternative.o obj-y += nospec-branch.o ipl_vmparm.o machine_kexec_reloc.o unwind_bc.o obj-y += smp.o text_amode31.o stacktrace.o abs_lowcore.o facility.o uv.o wti.o +obj-y += diag/ -extra-y += vmlinux.lds +always-$(KBUILD_BUILTIN) += vmlinux.lds obj-$(CONFIG_SYSFS) += nospec-sysfs.o CFLAGS_REMOVE_nospec-branch.o += $(CC_FLAGS_EXPOLINE) diff --git a/arch/s390/kernel/abs_lowcore.c b/arch/s390/kernel/abs_lowcore.c index 09cd24cbe74e..6252b7d115dd 100644 --- a/arch/s390/kernel/abs_lowcore.c +++ b/arch/s390/kernel/abs_lowcore.c @@ -2,9 +2,9 @@ #include <linux/pgtable.h> #include <asm/abs_lowcore.h> +#include <asm/sections.h> unsigned long __bootdata_preserved(__abs_lowcore); -int __bootdata_preserved(relocate_lowcore); int abs_lowcore_map(int cpu, struct lowcore *lc, bool alloc) { diff --git a/arch/s390/kernel/alternative.c b/arch/s390/kernel/alternative.c index 8d5d0de35de0..90c0e6408992 100644 --- a/arch/s390/kernel/alternative.c +++ b/arch/s390/kernel/alternative.c @@ -1,41 +1,90 @@ // SPDX-License-Identifier: GPL-2.0 +#ifndef pr_fmt +#define pr_fmt(fmt) "alt: " fmt +#endif + #include <linux/uaccess.h> +#include <linux/printk.h> #include <asm/nospec-branch.h> #include <asm/abs_lowcore.h> #include <asm/alternative.h> #include <asm/facility.h> +#include <asm/sections.h> +#include <asm/machine.h> + +#ifndef a_debug +#define a_debug pr_debug +#endif + +#ifndef __kernel_va +#define __kernel_va(x) (void *)(x) +#endif + +unsigned long __bootdata_preserved(machine_features[1]); + +struct alt_debug { + unsigned long facilities[MAX_FACILITY_BIT / BITS_PER_LONG]; + unsigned long mfeatures[MAX_MFEATURE_BIT / BITS_PER_LONG]; + int spec; +}; + +static struct alt_debug __bootdata_preserved(alt_debug); + +static void alternative_dump(u8 *old, u8 *new, unsigned int len, unsigned int type, unsigned int data) +{ + char oinsn[33], ninsn[33]; + unsigned long kptr; + unsigned int pos; + + for (pos = 0; pos < len && 2 * pos < sizeof(oinsn) - 3; pos++) + hex_byte_pack(&oinsn[2 * pos], old[pos]); + oinsn[2 * pos] = 0; + for (pos = 0; pos < len && 2 * pos < sizeof(ninsn) - 3; pos++) + hex_byte_pack(&ninsn[2 * pos], new[pos]); + ninsn[2 * pos] = 0; + kptr = (unsigned long)__kernel_va(old); + a_debug("[%d/%3d] %016lx: %s -> %s\n", type, data, kptr, oinsn, ninsn); +} void __apply_alternatives(struct alt_instr *start, struct alt_instr *end, unsigned int ctx) { - u8 *instr, *replacement; + struct alt_debug *d; struct alt_instr *a; - bool replace; + bool debug, replace; + u8 *old, *new; /* * The scan order should be from start to end. A later scanned * alternative code can overwrite previously scanned alternative code. */ + d = &alt_debug; for (a = start; a < end; a++) { if (!(a->ctx & ctx)) continue; switch (a->type) { case ALT_TYPE_FACILITY: replace = test_facility(a->data); + debug = __test_facility(a->data, d->facilities); + break; + case ALT_TYPE_FEATURE: + replace = test_machine_feature(a->data); + debug = __test_machine_feature(a->data, d->mfeatures); break; case ALT_TYPE_SPEC: replace = nobp_enabled(); - break; - case ALT_TYPE_LOWCORE: - replace = have_relocated_lowcore(); + debug = d->spec; break; default: replace = false; + debug = false; } if (!replace) continue; - instr = (u8 *)&a->instr_offset + a->instr_offset; - replacement = (u8 *)&a->repl_offset + a->repl_offset; - s390_kernel_write(instr, replacement, a->instrlen); + old = (u8 *)&a->instr_offset + a->instr_offset; + new = (u8 *)&a->repl_offset + a->repl_offset; + if (debug) + alternative_dump(old, new, a->instrlen, a->type, a->data); + s390_kernel_write(old, new, a->instrlen); } } diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c index 862a9140528e..95ecad9c7d7d 100644 --- a/arch/s390/kernel/asm-offsets.c +++ b/arch/s390/kernel/asm-offsets.c @@ -5,15 +5,14 @@ * and format the required data. */ -#define ASM_OFFSETS_C - #include <linux/kbuild.h> -#include <linux/kvm_host.h> #include <linux/sched.h> #include <linux/purgatory.h> #include <linux/pgtable.h> -#include <linux/ftrace.h> +#include <linux/ftrace_regs.h> +#include <asm/kvm_host_types.h> #include <asm/stacktrace.h> +#include <asm/ptrace.h> int main(void) { @@ -49,8 +48,8 @@ int main(void) OFFSET(__PT_R14, pt_regs, gprs[14]); OFFSET(__PT_R15, pt_regs, gprs[15]); OFFSET(__PT_ORIG_GPR2, pt_regs, orig_gpr2); + OFFSET(__PT_INT_CODE, pt_regs, int_code); OFFSET(__PT_FLAGS, pt_regs, flags); - OFFSET(__PT_CR1, pt_regs, cr1); OFFSET(__PT_LAST_BREAK, pt_regs, last_break); DEFINE(__PT_SIZE, sizeof(struct pt_regs)); BLANK(); @@ -76,7 +75,8 @@ int main(void) OFFSET(__LC_EXT_CPU_ADDR, lowcore, ext_cpu_addr); OFFSET(__LC_EXT_INT_CODE, lowcore, ext_int_code); OFFSET(__LC_PGM_ILC, lowcore, pgm_ilc); - OFFSET(__LC_PGM_INT_CODE, lowcore, pgm_code); + OFFSET(__LC_PGM_CODE, lowcore, pgm_code); + OFFSET(__LC_PGM_INT_CODE, lowcore, pgm_int_code); OFFSET(__LC_DATA_EXC_CODE, lowcore, data_exc_code); OFFSET(__LC_MON_CLASS_NR, lowcore, mon_class_num); OFFSET(__LC_PER_CODE, lowcore, per_code); @@ -122,7 +122,6 @@ int main(void) OFFSET(__LC_LAST_UPDATE_TIMER, lowcore, last_update_timer); OFFSET(__LC_LAST_UPDATE_CLOCK, lowcore, last_update_clock); OFFSET(__LC_INT_CLOCK, lowcore, int_clock); - OFFSET(__LC_BOOT_CLOCK, lowcore, boot_clock); OFFSET(__LC_CURRENT, lowcore, current_task); OFFSET(__LC_KERNEL_STACK, lowcore, kernel_stack); OFFSET(__LC_ASYNC_STACK, lowcore, async_stack); @@ -175,12 +174,6 @@ int main(void) DEFINE(OLDMEM_SIZE, PARMAREA + offsetof(struct parmarea, oldmem_size)); DEFINE(COMMAND_LINE, PARMAREA + offsetof(struct parmarea, command_line)); DEFINE(MAX_COMMAND_LINE_SIZE, PARMAREA + offsetof(struct parmarea, max_command_line_size)); -#ifdef CONFIG_FUNCTION_GRAPH_TRACER - /* function graph return value tracing */ - OFFSET(__FGRAPH_RET_GPR2, fgraph_ret_regs, gpr2); - OFFSET(__FGRAPH_RET_FP, fgraph_ret_regs, fp); - DEFINE(__FGRAPH_RET_SIZE, sizeof(struct fgraph_ret_regs)); -#endif OFFSET(__FTRACE_REGS_PT_REGS, __arch_ftrace_regs, regs); DEFINE(__FTRACE_REGS_SIZE, sizeof(struct __arch_ftrace_regs)); diff --git a/arch/s390/kernel/cert_store.c b/arch/s390/kernel/cert_store.c index bf983513dd33..c217a5e64094 100644 --- a/arch/s390/kernel/cert_store.c +++ b/arch/s390/kernel/cert_store.c @@ -138,7 +138,7 @@ static void cert_store_key_describe(const struct key *key, struct seq_file *m) * First 64 bytes of the key description is key name in EBCDIC CP 500. * Convert it to ASCII for displaying in /proc/keys. */ - strscpy(ascii, key->description, sizeof(ascii)); + strscpy(ascii, key->description); EBCASC_500(ascii, VC_NAME_LEN_BYTES); seq_puts(m, ascii); @@ -235,7 +235,7 @@ static int __diag320(unsigned long subcode, void *addr) { union register_pair rp = { .even = (unsigned long)addr, }; - asm volatile( + asm_inline volatile( " diag %[rp],%[subcode],0x320\n" "0: nopr %%r7\n" EX_TABLE(0b, 0b) diff --git a/arch/s390/kernel/cpacf.c b/arch/s390/kernel/cpacf.c index c8575dbc890d..4b9b34f95d72 100644 --- a/arch/s390/kernel/cpacf.c +++ b/arch/s390/kernel/cpacf.c @@ -14,7 +14,7 @@ #define CPACF_QUERY(name, instruction) \ static ssize_t name##_query_raw_read(struct file *fp, \ struct kobject *kobj, \ - struct bin_attribute *attr, \ + const struct bin_attribute *attr, \ char *buf, loff_t offs, \ size_t count) \ { \ @@ -24,7 +24,7 @@ static ssize_t name##_query_raw_read(struct file *fp, \ return -EOPNOTSUPP; \ return memory_read_from_buffer(buf, count, &offs, &mask, sizeof(mask)); \ } \ -static BIN_ATTR_RO(name##_query_raw, sizeof(cpacf_mask_t)) +static const BIN_ATTR_RO(name##_query_raw, sizeof(cpacf_mask_t)) CPACF_QUERY(km, KM); CPACF_QUERY(kmc, KMC); @@ -40,20 +40,20 @@ CPACF_QUERY(prno, PRNO); CPACF_QUERY(kma, KMA); CPACF_QUERY(kdsa, KDSA); -#define CPACF_QAI(name, instruction) \ -static ssize_t name##_query_auth_info_raw_read( \ - struct file *fp, struct kobject *kobj, \ - struct bin_attribute *attr, char *buf, loff_t offs, \ - size_t count) \ -{ \ - cpacf_qai_t qai; \ - \ - if (!cpacf_qai(CPACF_##instruction, &qai)) \ - return -EOPNOTSUPP; \ - return memory_read_from_buffer(buf, count, &offs, &qai, \ - sizeof(qai)); \ -} \ -static BIN_ATTR_RO(name##_query_auth_info_raw, sizeof(cpacf_qai_t)) +#define CPACF_QAI(name, instruction) \ +static ssize_t name##_query_auth_info_raw_read( \ + struct file *fp, struct kobject *kobj, \ + const struct bin_attribute *attr, char *buf, loff_t offs, \ + size_t count) \ +{ \ + cpacf_qai_t qai; \ + \ + if (!cpacf_qai(CPACF_##instruction, &qai)) \ + return -EOPNOTSUPP; \ + return memory_read_from_buffer(buf, count, &offs, &qai, \ + sizeof(qai)); \ +} \ +static const BIN_ATTR_RO(name##_query_auth_info_raw, sizeof(cpacf_qai_t)) CPACF_QAI(km, KM); CPACF_QAI(kmc, KMC); @@ -69,7 +69,7 @@ CPACF_QAI(prno, PRNO); CPACF_QAI(kma, KMA); CPACF_QAI(kdsa, KDSA); -static struct bin_attribute *cpacf_attrs[] = { +static const struct bin_attribute *const cpacf_attrs[] = { &bin_attr_km_query_raw, &bin_attr_kmc_query_raw, &bin_attr_kimd_query_raw, @@ -101,7 +101,7 @@ static struct bin_attribute *cpacf_attrs[] = { static const struct attribute_group cpacf_attr_grp = { .name = "cpacf", - .bin_attrs = cpacf_attrs, + .bin_attrs_new = cpacf_attrs, }; static int __init cpacf_init(void) diff --git a/arch/s390/kernel/cpufeature.c b/arch/s390/kernel/cpufeature.c index 1b2ae42a0c15..76210f001028 100644 --- a/arch/s390/kernel/cpufeature.c +++ b/arch/s390/kernel/cpufeature.c @@ -5,11 +5,13 @@ #include <linux/cpufeature.h> #include <linux/bug.h> +#include <asm/machine.h> #include <asm/elf.h> enum { TYPE_HWCAP, TYPE_FACILITY, + TYPE_MACHINE, }; struct s390_cpu_feature { @@ -21,6 +23,7 @@ static struct s390_cpu_feature s390_cpu_features[MAX_CPU_FEATURES] = { [S390_CPU_FEATURE_MSA] = {.type = TYPE_HWCAP, .num = HWCAP_NR_MSA}, [S390_CPU_FEATURE_VXRS] = {.type = TYPE_HWCAP, .num = HWCAP_NR_VXRS}, [S390_CPU_FEATURE_UV] = {.type = TYPE_FACILITY, .num = 158}, + [S390_CPU_FEATURE_D288] = {.type = TYPE_MACHINE, .num = MFEATURE_DIAG288}, }; /* @@ -38,6 +41,8 @@ int cpu_have_feature(unsigned int num) return !!(elf_hwcap & BIT(feature->num)); case TYPE_FACILITY: return test_facility(feature->num); + case TYPE_MACHINE: + return test_machine_feature(feature->num); default: WARN_ON_ONCE(1); return 0; diff --git a/arch/s390/kernel/crash_dump.c b/arch/s390/kernel/crash_dump.c index cd0c93a8fb8b..adb164223f8c 100644 --- a/arch/s390/kernel/crash_dump.c +++ b/arch/s390/kernel/crash_dump.c @@ -63,9 +63,7 @@ struct save_area * __init save_area_alloc(bool is_boot_cpu) { struct save_area *sa; - sa = memblock_alloc(sizeof(*sa), 8); - if (!sa) - return NULL; + sa = memblock_alloc_or_panic(sizeof(*sa), 8); if (is_boot_cpu) list_add(&sa->list, &dump_save_areas); @@ -248,15 +246,6 @@ bool is_kdump_kernel(void) } EXPORT_SYMBOL_GPL(is_kdump_kernel); -static const char *nt_name(Elf64_Word type) -{ - const char *name = "LINUX"; - - if (type == NT_PRPSINFO || type == NT_PRSTATUS || type == NT_PRFPREG) - name = KEXEC_CORE_NOTE_NAME; - return name; -} - /* * Initialize ELF note */ @@ -281,10 +270,8 @@ static void *nt_init_name(void *buf, Elf64_Word type, void *desc, int d_len, return PTR_ADD(buf, len); } -static inline void *nt_init(void *buf, Elf64_Word type, void *desc, int d_len) -{ - return nt_init_name(buf, type, desc, d_len, nt_name(type)); -} +#define nt_init(buf, type, desc) \ + nt_init_name(buf, NT_ ## type, &(desc), sizeof(desc), NN_ ## type) /* * Calculate the size of ELF note @@ -300,10 +287,7 @@ static size_t nt_size_name(int d_len, const char *name) return size; } -static inline size_t nt_size(Elf64_Word type, int d_len) -{ - return nt_size_name(d_len, nt_name(type)); -} +#define nt_size(type, desc) nt_size_name(sizeof(desc), NN_ ## type) /* * Fill ELF notes for one CPU with save area registers @@ -324,18 +308,16 @@ static void *fill_cpu_elf_notes(void *ptr, int cpu, struct save_area *sa) memcpy(&nt_fpregset.fpc, &sa->fpc, sizeof(sa->fpc)); memcpy(&nt_fpregset.fprs, &sa->fprs, sizeof(sa->fprs)); /* Create ELF notes for the CPU */ - ptr = nt_init(ptr, NT_PRSTATUS, &nt_prstatus, sizeof(nt_prstatus)); - ptr = nt_init(ptr, NT_PRFPREG, &nt_fpregset, sizeof(nt_fpregset)); - ptr = nt_init(ptr, NT_S390_TIMER, &sa->timer, sizeof(sa->timer)); - ptr = nt_init(ptr, NT_S390_TODCMP, &sa->todcmp, sizeof(sa->todcmp)); - ptr = nt_init(ptr, NT_S390_TODPREG, &sa->todpreg, sizeof(sa->todpreg)); - ptr = nt_init(ptr, NT_S390_CTRS, &sa->ctrs, sizeof(sa->ctrs)); - ptr = nt_init(ptr, NT_S390_PREFIX, &sa->prefix, sizeof(sa->prefix)); + ptr = nt_init(ptr, PRSTATUS, nt_prstatus); + ptr = nt_init(ptr, PRFPREG, nt_fpregset); + ptr = nt_init(ptr, S390_TIMER, sa->timer); + ptr = nt_init(ptr, S390_TODCMP, sa->todcmp); + ptr = nt_init(ptr, S390_TODPREG, sa->todpreg); + ptr = nt_init(ptr, S390_CTRS, sa->ctrs); + ptr = nt_init(ptr, S390_PREFIX, sa->prefix); if (cpu_has_vx()) { - ptr = nt_init(ptr, NT_S390_VXRS_HIGH, - &sa->vxrs_high, sizeof(sa->vxrs_high)); - ptr = nt_init(ptr, NT_S390_VXRS_LOW, - &sa->vxrs_low, sizeof(sa->vxrs_low)); + ptr = nt_init(ptr, S390_VXRS_HIGH, sa->vxrs_high); + ptr = nt_init(ptr, S390_VXRS_LOW, sa->vxrs_low); } return ptr; } @@ -348,16 +330,16 @@ static size_t get_cpu_elf_notes_size(void) struct save_area *sa = NULL; size_t size; - size = nt_size(NT_PRSTATUS, sizeof(struct elf_prstatus)); - size += nt_size(NT_PRFPREG, sizeof(elf_fpregset_t)); - size += nt_size(NT_S390_TIMER, sizeof(sa->timer)); - size += nt_size(NT_S390_TODCMP, sizeof(sa->todcmp)); - size += nt_size(NT_S390_TODPREG, sizeof(sa->todpreg)); - size += nt_size(NT_S390_CTRS, sizeof(sa->ctrs)); - size += nt_size(NT_S390_PREFIX, sizeof(sa->prefix)); + size = nt_size(PRSTATUS, struct elf_prstatus); + size += nt_size(PRFPREG, elf_fpregset_t); + size += nt_size(S390_TIMER, sa->timer); + size += nt_size(S390_TODCMP, sa->todcmp); + size += nt_size(S390_TODPREG, sa->todpreg); + size += nt_size(S390_CTRS, sa->ctrs); + size += nt_size(S390_PREFIX, sa->prefix); if (cpu_has_vx()) { - size += nt_size(NT_S390_VXRS_HIGH, sizeof(sa->vxrs_high)); - size += nt_size(NT_S390_VXRS_LOW, sizeof(sa->vxrs_low)); + size += nt_size(S390_VXRS_HIGH, sa->vxrs_high); + size += nt_size(S390_VXRS_LOW, sa->vxrs_low); } return size; @@ -372,8 +354,8 @@ static void *nt_prpsinfo(void *ptr) memset(&prpsinfo, 0, sizeof(prpsinfo)); prpsinfo.pr_sname = 'R'; - strcpy(prpsinfo.pr_fname, "vmlinux"); - return nt_init(ptr, NT_PRPSINFO, &prpsinfo, sizeof(prpsinfo)); + strscpy(prpsinfo.pr_fname, "vmlinux"); + return nt_init(ptr, PRPSINFO, prpsinfo); } /* @@ -508,6 +490,19 @@ static int get_mem_chunk_cnt(void) return cnt; } +static void fill_ptload(Elf64_Phdr *phdr, unsigned long paddr, + unsigned long vaddr, unsigned long size) +{ + phdr->p_type = PT_LOAD; + phdr->p_vaddr = vaddr; + phdr->p_offset = paddr; + phdr->p_paddr = paddr; + phdr->p_filesz = size; + phdr->p_memsz = size; + phdr->p_flags = PF_R | PF_W | PF_X; + phdr->p_align = PAGE_SIZE; +} + /* * Initialize ELF loads (new kernel) */ @@ -520,14 +515,8 @@ static void loads_init(Elf64_Phdr *phdr, bool os_info_has_vm) if (os_info_has_vm) old_identity_base = os_info_old_value(OS_INFO_IDENTITY_BASE); for_each_physmem_range(idx, &oldmem_type, &start, &end) { - phdr->p_type = PT_LOAD; - phdr->p_vaddr = old_identity_base + start; - phdr->p_offset = start; - phdr->p_paddr = start; - phdr->p_filesz = end - start; - phdr->p_memsz = end - start; - phdr->p_flags = PF_R | PF_W | PF_X; - phdr->p_align = PAGE_SIZE; + fill_ptload(phdr, start, old_identity_base + start, + end - start); phdr++; } } @@ -537,6 +526,22 @@ static bool os_info_has_vm(void) return os_info_old_value(OS_INFO_KASLR_OFFSET); } +#ifdef CONFIG_PROC_VMCORE_DEVICE_RAM +/* + * Fill PT_LOAD for a physical memory range owned by a device and detected by + * its device driver. + */ +void elfcorehdr_fill_device_ram_ptload_elf64(Elf64_Phdr *phdr, + unsigned long long paddr, unsigned long long size) +{ + unsigned long old_identity_base = 0; + + if (os_info_has_vm()) + old_identity_base = os_info_old_value(OS_INFO_IDENTITY_BASE); + fill_ptload(phdr, paddr, old_identity_base + paddr, size); +} +#endif + /* * Prepare PT_LOAD type program header for kernel image region */ @@ -589,7 +594,7 @@ static size_t get_elfcorehdr_size(int phdr_count) /* PT_NOTES */ size += sizeof(Elf64_Phdr); /* nt_prpsinfo */ - size += nt_size(NT_PRPSINFO, sizeof(struct elf_prpsinfo)); + size += nt_size(PRPSINFO, struct elf_prpsinfo); /* regsets */ size += get_cpu_cnt() * get_cpu_elf_notes_size(); /* nt_vmcoreinfo */ diff --git a/arch/s390/kernel/debug.c b/arch/s390/kernel/debug.c index de19fd8a6a95..2a41be2f7925 100644 --- a/arch/s390/kernel/debug.c +++ b/arch/s390/kernel/debug.c @@ -24,6 +24,7 @@ #include <linux/export.h> #include <linux/init.h> #include <linux/fs.h> +#include <linux/math.h> #include <linux/minmax.h> #include <linux/debugfs.h> @@ -94,9 +95,6 @@ static int debug_input_flush_fn(debug_info_t *id, struct debug_view *view, static int debug_hex_ascii_format_fn(debug_info_t *id, struct debug_view *view, char *out_buf, size_t out_buf_size, const char *in_buf); -static int debug_sprintf_format_fn(debug_info_t *id, struct debug_view *view, - char *out_buf, size_t out_buf_size, - const char *inbuf); static void debug_areas_swap(debug_info_t *a, debug_info_t *b); static void debug_events_append(debug_info_t *dest, debug_info_t *src); @@ -253,7 +251,7 @@ static debug_info_t *debug_info_alloc(const char *name, int pages_per_area, rc->level = level; rc->buf_size = buf_size; rc->entry_size = sizeof(debug_entry_t) + buf_size; - strscpy(rc->name, name, sizeof(rc->name)); + strscpy(rc->name, name); memset(rc->views, 0, DEBUG_MAX_VIEWS * sizeof(struct debug_view *)); memset(rc->debugfs_entries, 0, DEBUG_MAX_VIEWS * sizeof(struct dentry *)); refcount_set(&(rc->ref_count), 0); @@ -354,7 +352,10 @@ static debug_info_t *debug_info_copy(debug_info_t *in, int mode) for (i = 0; i < in->nr_areas; i++) { for (j = 0; j < in->pages_per_area; j++) memcpy(rc->areas[i][j], in->areas[i][j], PAGE_SIZE); + rc->active_pages[i] = in->active_pages[i]; + rc->active_entries[i] = in->active_entries[i]; } + rc->active_area = in->active_area; out: spin_unlock_irqrestore(&in->lock, flags); return rc; @@ -422,11 +423,17 @@ out: return len; } -/* - * debug_next_entry: - * - goto next entry in p_info +/** + * debug_next_entry - Go to the next entry + * @p_info: Private info that is manipulated + * + * Sets the current position in @p_info to the next entry. If no further entry + * exists the current position is set to one after the end the return value + * indicates that no further entries exist. + * + * Return: True if there are more following entries, false otherwise */ -static inline int debug_next_entry(file_private_info_t *p_info) +static inline bool debug_next_entry(file_private_info_t *p_info) { debug_info_t *id; @@ -434,10 +441,10 @@ static inline int debug_next_entry(file_private_info_t *p_info) if (p_info->act_entry == DEBUG_PROLOG_ENTRY) { p_info->act_entry = 0; p_info->act_page = 0; - goto out; + return true; } if (!id->areas) - return 1; + return false; p_info->act_entry += id->entry_size; /* switch to next page, if we reached the end of the page */ if (p_info->act_entry > (PAGE_SIZE - id->entry_size)) { @@ -450,10 +457,87 @@ static inline int debug_next_entry(file_private_info_t *p_info) p_info->act_page = 0; } if (p_info->act_area >= id->nr_areas) - return 1; + return false; } -out: - return 0; + return true; +} + +/** + * debug_to_act_entry - Go to the currently active entry + * @p_info: Private info that is manipulated + * + * Sets the current position in @p_info to the currently active + * entry of @p_info->debug_info_snap + */ +static void debug_to_act_entry(file_private_info_t *p_info) +{ + debug_info_t *snap_id; + + snap_id = p_info->debug_info_snap; + p_info->act_area = snap_id->active_area; + p_info->act_page = snap_id->active_pages[snap_id->active_area]; + p_info->act_entry = snap_id->active_entries[snap_id->active_area]; +} + +/** + * debug_prev_entry - Go to the previous entry + * @p_info: Private info that is manipulated + * + * Sets the current position in @p_info to the previous entry. If no previous entry + * exists the current position is set left as DEBUG_PROLOG_ENTRY and the return value + * indicates that no previous entries exist. + * + * Return: True if there are more previous entries, false otherwise + */ + +static inline bool debug_prev_entry(file_private_info_t *p_info) +{ + debug_info_t *id; + + id = p_info->debug_info_snap; + if (p_info->act_entry == DEBUG_PROLOG_ENTRY) + debug_to_act_entry(p_info); + if (!id->areas) + return false; + p_info->act_entry -= id->entry_size; + /* switch to prev page, if we reached the beginning of the page */ + if (p_info->act_entry < 0) { + /* end of previous page */ + p_info->act_entry = rounddown(PAGE_SIZE, id->entry_size) - id->entry_size; + p_info->act_page--; + if (p_info->act_page < 0) { + /* previous area */ + p_info->act_area--; + p_info->act_page = id->pages_per_area - 1; + } + if (p_info->act_area < 0) + p_info->act_area = (id->nr_areas - 1) % id->nr_areas; + } + /* check full circle */ + if (id->active_area == p_info->act_area && + id->active_pages[id->active_area] == p_info->act_page && + id->active_entries[id->active_area] == p_info->act_entry) + return false; + return true; +} + +/** + * debug_move_entry - Go to next entry in either the forward or backward direction + * @p_info: Private info that is manipulated + * @reverse: If true go to the next entry in reverse i.e. previous + * + * Sets the current position in @p_info to the next (@reverse == false) or + * previous (@reverse == true) entry. + * + * Return: True if there are further entries in that direction, + * false otherwise. + */ +static bool debug_move_entry(file_private_info_t *p_info, bool reverse) +{ + if (reverse) + return debug_prev_entry(p_info); + else + return debug_next_entry(p_info); } /* @@ -495,7 +579,7 @@ static ssize_t debug_output(struct file *file, /* file descriptor */ } if (copy_size == formatted_line_residue) { entry_offset = 0; - if (debug_next_entry(p_info)) + if (!debug_next_entry(p_info)) goto out; } } @@ -530,6 +614,42 @@ static ssize_t debug_input(struct file *file, const char __user *user_buf, return rc; /* number of input characters */ } +static file_private_info_t *debug_file_private_alloc(debug_info_t *debug_info, + struct debug_view *view) +{ + debug_info_t *debug_info_snapshot; + file_private_info_t *p_info; + + /* + * Make snapshot of current debug areas to get it consistent. + * To copy all the areas is only needed, if we have a view which + * formats the debug areas. + */ + if (!view->format_proc && !view->header_proc) + debug_info_snapshot = debug_info_copy(debug_info, NO_AREAS); + else + debug_info_snapshot = debug_info_copy(debug_info, ALL_AREAS); + + if (!debug_info_snapshot) + return NULL; + p_info = kmalloc(sizeof(file_private_info_t), GFP_KERNEL); + if (!p_info) { + debug_info_free(debug_info_snapshot); + return NULL; + } + p_info->offset = 0; + p_info->debug_info_snap = debug_info_snapshot; + p_info->debug_info_org = debug_info; + p_info->view = view; + p_info->act_area = 0; + p_info->act_page = 0; + p_info->act_entry = DEBUG_PROLOG_ENTRY; + p_info->act_entry_offset = 0; + debug_info_get(debug_info); + + return p_info; +} + /* * debug_open: * - called for user open() @@ -538,7 +658,7 @@ static ssize_t debug_input(struct file *file, const char __user *user_buf, */ static int debug_open(struct inode *inode, struct file *file) { - debug_info_t *debug_info, *debug_info_snapshot; + debug_info_t *debug_info; file_private_info_t *p_info; int i, rc = 0; @@ -556,42 +676,26 @@ static int debug_open(struct inode *inode, struct file *file) goto out; found: - - /* Make snapshot of current debug areas to get it consistent. */ - /* To copy all the areas is only needed, if we have a view which */ - /* formats the debug areas. */ - - if (!debug_info->views[i]->format_proc && !debug_info->views[i]->header_proc) - debug_info_snapshot = debug_info_copy(debug_info, NO_AREAS); - else - debug_info_snapshot = debug_info_copy(debug_info, ALL_AREAS); - - if (!debug_info_snapshot) { - rc = -ENOMEM; - goto out; - } - p_info = kmalloc(sizeof(file_private_info_t), GFP_KERNEL); + p_info = debug_file_private_alloc(debug_info, debug_info->views[i]); if (!p_info) { - debug_info_free(debug_info_snapshot); rc = -ENOMEM; goto out; } - p_info->offset = 0; - p_info->debug_info_snap = debug_info_snapshot; - p_info->debug_info_org = debug_info; - p_info->view = debug_info->views[i]; - p_info->act_area = 0; - p_info->act_page = 0; - p_info->act_entry = DEBUG_PROLOG_ENTRY; - p_info->act_entry_offset = 0; file->private_data = p_info; - debug_info_get(debug_info); nonseekable_open(inode, file); out: mutex_unlock(&debug_mutex); return rc; } +static void debug_file_private_free(file_private_info_t *p_info) +{ + if (p_info->debug_info_snap) + debug_info_free(p_info->debug_info_snap); + debug_info_put(p_info->debug_info_org); + kfree(p_info); +} + /* * debug_close: * - called for user close() @@ -602,13 +706,59 @@ static int debug_close(struct inode *inode, struct file *file) file_private_info_t *p_info; p_info = (file_private_info_t *) file->private_data; - if (p_info->debug_info_snap) - debug_info_free(p_info->debug_info_snap); - debug_info_put(p_info->debug_info_org); - kfree(file->private_data); + debug_file_private_free(p_info); + file->private_data = NULL; return 0; /* success */ } +/** + * debug_dump - Get a textual representation of debug info, or as much as fits + * @id: Debug information to use + * @view: View with which to dump the debug information + * @buf: Buffer the textual debug data representation is written to + * @buf_size: Size of the buffer, including the trailing '\0' byte + * @reverse: Go backwards from the last written entry + * + * This function may be used whenever a textual representation of the debug + * information is required without using an s390dbf file. + * + * Note: It is the callers responsibility to supply a view that is compatible + * with the debug information data. + * + * Return: On success returns the number of bytes written to the buffer not + * including the trailing '\0' byte. If bug_size == 0 the function returns 0. + * On failure an error code less than 0 is returned. + */ +ssize_t debug_dump(debug_info_t *id, struct debug_view *view, + char *buf, size_t buf_size, bool reverse) +{ + file_private_info_t *p_info; + size_t size, offset = 0; + + /* Need space for '\0' byte */ + if (buf_size < 1) + return 0; + buf_size--; + + p_info = debug_file_private_alloc(id, view); + if (!p_info) + return -ENOMEM; + + /* There is always at least the DEBUG_PROLOG_ENTRY */ + do { + size = debug_format_entry(p_info); + size = min(size, buf_size - offset); + memcpy(buf + offset, p_info->temp_buf, size); + offset += size; + if (offset >= buf_size) + break; + } while (debug_move_entry(p_info, reverse)); + debug_file_private_free(p_info); + buf[offset] = '\0'; + + return offset; +} + /* Create debugfs entries and add to internal list. */ static void _debug_register(debug_info_t *id) { @@ -972,7 +1122,7 @@ static int s390dbf_procactive(const struct ctl_table *table, int write, return 0; } -static struct ctl_table s390dbf_table[] = { +static const struct ctl_table s390dbf_table[] = { { .procname = "debug_stoppable", .data = &debug_stoppable, @@ -1532,8 +1682,8 @@ EXPORT_SYMBOL(debug_dflt_header_fn); #define DEBUG_SPRINTF_MAX_ARGS 10 -static int debug_sprintf_format_fn(debug_info_t *id, struct debug_view *view, - char *out_buf, size_t out_buf_size, const char *inbuf) +int debug_sprintf_format_fn(debug_info_t *id, struct debug_view *view, + char *out_buf, size_t out_buf_size, const char *inbuf) { debug_sprintf_entry_t *curr_event = (debug_sprintf_entry_t *)inbuf; int num_longs, num_used_args = 0, i, rc = 0; @@ -1570,6 +1720,7 @@ static int debug_sprintf_format_fn(debug_info_t *id, struct debug_view *view, out: return rc; } +EXPORT_SYMBOL(debug_sprintf_format_fn); /* * debug_init: diff --git a/arch/s390/kernel/diag/Makefile b/arch/s390/kernel/diag/Makefile new file mode 100644 index 000000000000..956aee6c4090 --- /dev/null +++ b/arch/s390/kernel/diag/Makefile @@ -0,0 +1 @@ +obj-y := diag_misc.o diag324.o diag.o diag310.o diff --git a/arch/s390/kernel/diag.c b/arch/s390/kernel/diag/diag.c index cdd6e31344fa..56b862ba9be8 100644 --- a/arch/s390/kernel/diag.c +++ b/arch/s390/kernel/diag/diag.c @@ -17,7 +17,7 @@ #include <asm/trace/diag.h> #include <asm/sections.h> #include <asm/asm.h> -#include "entry.h" +#include "../entry.h" struct diag_stat { unsigned int counter[NR_DIAG_STAT]; @@ -51,8 +51,10 @@ static const struct diag_desc diag_map[NR_DIAG_STAT] = { [DIAG_STAT_X2FC] = { .code = 0x2fc, .name = "Guest Performance Data" }, [DIAG_STAT_X304] = { .code = 0x304, .name = "Partition-Resource Service" }, [DIAG_STAT_X308] = { .code = 0x308, .name = "List-Directed IPL" }, + [DIAG_STAT_X310] = { .code = 0x310, .name = "Memory Topology Information" }, [DIAG_STAT_X318] = { .code = 0x318, .name = "CP Name and Version Codes" }, [DIAG_STAT_X320] = { .code = 0x320, .name = "Certificate Store" }, + [DIAG_STAT_X324] = { .code = 0x324, .name = "Power Information Block" }, [DIAG_STAT_X49C] = { .code = 0x49c, .name = "Warning-Track Interruption" }, [DIAG_STAT_X500] = { .code = 0x500, .name = "Virtio Service" }, }; @@ -193,7 +195,7 @@ static inline int __diag204(unsigned long *subcode, unsigned long size, void *ad { union register_pair rp = { .even = *subcode, .odd = size }; - asm volatile( + asm_inline volatile( " diag %[addr],%[rp],0x204\n" "0: nopr %%r7\n" EX_TABLE(0b,0b) @@ -284,7 +286,7 @@ int diag224(void *ptr) int rc = -EOPNOTSUPP; diag_stat_inc(DIAG_STAT_X224); - asm volatile("\n" + asm_inline volatile("\n" " diag %[type],%[addr],0x224\n" "0: lhi %[rc],0\n" "1:\n" diff --git a/arch/s390/kernel/diag/diag310.c b/arch/s390/kernel/diag/diag310.c new file mode 100644 index 000000000000..d6a34454aa5a --- /dev/null +++ b/arch/s390/kernel/diag/diag310.c @@ -0,0 +1,276 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Request memory topology information via diag0x310. + * + * Copyright IBM Corp. 2025 + */ + +#include <linux/kernel.h> +#include <linux/types.h> +#include <linux/uaccess.h> +#include <linux/vmalloc.h> +#include <asm/diag.h> +#include <asm/sclp.h> +#include <uapi/asm/diag.h> +#include "diag_ioctl.h" + +#define DIAG310_LEVELMIN 1 +#define DIAG310_LEVELMAX 6 + +enum diag310_sc { + DIAG310_SUBC_0 = 0, + DIAG310_SUBC_1 = 1, + DIAG310_SUBC_4 = 4, + DIAG310_SUBC_5 = 5 +}; + +enum diag310_retcode { + DIAG310_RET_SUCCESS = 0x0001, + DIAG310_RET_BUSY = 0x0101, + DIAG310_RET_OPNOTSUPP = 0x0102, + DIAG310_RET_SC4_INVAL = 0x0401, + DIAG310_RET_SC4_NODATA = 0x0402, + DIAG310_RET_SC5_INVAL = 0x0501, + DIAG310_RET_SC5_NODATA = 0x0502, + DIAG310_RET_SC5_ESIZE = 0x0503 +}; + +union diag310_response { + u64 response; + struct { + u64 result : 32; + u64 : 16; + u64 rc : 16; + }; +}; + +union diag310_req_subcode { + u64 subcode; + struct { + u64 : 48; + u64 st : 8; + u64 sc : 8; + }; +}; + +union diag310_req_size { + u64 size; + struct { + u64 page_count : 32; + u64 : 32; + }; +}; + +static inline unsigned long diag310(unsigned long subcode, unsigned long size, void *addr) +{ + union register_pair rp = { .even = (unsigned long)addr, .odd = size }; + + diag_stat_inc(DIAG_STAT_X310); + asm volatile("diag %[rp],%[subcode],0x310\n" + : [rp] "+d" (rp.pair) + : [subcode] "d" (subcode) + : "memory"); + return rp.odd; +} + +static int diag310_result_to_errno(unsigned int result) +{ + switch (result) { + case DIAG310_RET_BUSY: + return -EBUSY; + case DIAG310_RET_OPNOTSUPP: + return -EOPNOTSUPP; + default: + return -EINVAL; + } +} + +static int diag310_get_subcode_mask(unsigned long *mask) +{ + union diag310_response res; + + res.response = diag310(DIAG310_SUBC_0, 0, NULL); + if (res.rc != DIAG310_RET_SUCCESS) + return diag310_result_to_errno(res.rc); + *mask = res.response; + return 0; +} + +static int diag310_get_memtop_stride(unsigned long *stride) +{ + union diag310_response res; + + res.response = diag310(DIAG310_SUBC_1, 0, NULL); + if (res.rc != DIAG310_RET_SUCCESS) + return diag310_result_to_errno(res.rc); + *stride = res.result; + return 0; +} + +static int diag310_get_memtop_size(unsigned long *pages, unsigned long level) +{ + union diag310_req_subcode req = { .sc = DIAG310_SUBC_4, .st = level }; + union diag310_response res; + + res.response = diag310(req.subcode, 0, NULL); + switch (res.rc) { + case DIAG310_RET_SUCCESS: + *pages = res.result; + return 0; + case DIAG310_RET_SC4_NODATA: + return -ENODATA; + case DIAG310_RET_SC4_INVAL: + return -EINVAL; + default: + return diag310_result_to_errno(res.rc); + } +} + +static int diag310_store_topology_map(void *buf, unsigned long pages, unsigned long level) +{ + union diag310_req_subcode req_sc = { .sc = DIAG310_SUBC_5, .st = level }; + union diag310_req_size req_size = { .page_count = pages }; + union diag310_response res; + + res.response = diag310(req_sc.subcode, req_size.size, buf); + switch (res.rc) { + case DIAG310_RET_SUCCESS: + return 0; + case DIAG310_RET_SC5_NODATA: + return -ENODATA; + case DIAG310_RET_SC5_ESIZE: + return -EOVERFLOW; + case DIAG310_RET_SC5_INVAL: + return -EINVAL; + default: + return diag310_result_to_errno(res.rc); + } +} + +static int diag310_check_features(void) +{ + static int features_available; + unsigned long mask; + int rc; + + if (READ_ONCE(features_available)) + return 0; + if (!sclp.has_diag310) + return -EOPNOTSUPP; + rc = diag310_get_subcode_mask(&mask); + if (rc) + return rc; + if (!test_bit_inv(DIAG310_SUBC_1, &mask)) + return -EOPNOTSUPP; + if (!test_bit_inv(DIAG310_SUBC_4, &mask)) + return -EOPNOTSUPP; + if (!test_bit_inv(DIAG310_SUBC_5, &mask)) + return -EOPNOTSUPP; + WRITE_ONCE(features_available, 1); + return 0; +} + +static int memtop_get_stride_len(unsigned long *res) +{ + static unsigned long memtop_stride; + unsigned long stride; + int rc; + + stride = READ_ONCE(memtop_stride); + if (!stride) { + rc = diag310_get_memtop_stride(&stride); + if (rc) + return rc; + WRITE_ONCE(memtop_stride, stride); + } + *res = stride; + return 0; +} + +static int memtop_get_page_count(unsigned long *res, unsigned long level) +{ + static unsigned long memtop_pages[DIAG310_LEVELMAX]; + unsigned long pages; + int rc; + + if (level > DIAG310_LEVELMAX || level < DIAG310_LEVELMIN) + return -EINVAL; + pages = READ_ONCE(memtop_pages[level - 1]); + if (!pages) { + rc = diag310_get_memtop_size(&pages, level); + if (rc) + return rc; + WRITE_ONCE(memtop_pages[level - 1], pages); + } + *res = pages; + return 0; +} + +long diag310_memtop_stride(unsigned long arg) +{ + size_t __user *argp = (void __user *)arg; + unsigned long stride; + int rc; + + rc = diag310_check_features(); + if (rc) + return rc; + rc = memtop_get_stride_len(&stride); + if (rc) + return rc; + if (put_user(stride, argp)) + return -EFAULT; + return 0; +} + +long diag310_memtop_len(unsigned long arg) +{ + size_t __user *argp = (void __user *)arg; + unsigned long pages, level; + int rc; + + rc = diag310_check_features(); + if (rc) + return rc; + if (get_user(level, argp)) + return -EFAULT; + rc = memtop_get_page_count(&pages, level); + if (rc) + return rc; + if (put_user(pages * PAGE_SIZE, argp)) + return -EFAULT; + return 0; +} + +long diag310_memtop_buf(unsigned long arg) +{ + struct diag310_memtop __user *udata = (struct diag310_memtop __user *)arg; + unsigned long level, pages, data_size; + u64 address; + void *buf; + int rc; + + rc = diag310_check_features(); + if (rc) + return rc; + if (get_user(level, &udata->nesting_lvl)) + return -EFAULT; + if (get_user(address, &udata->address)) + return -EFAULT; + rc = memtop_get_page_count(&pages, level); + if (rc) + return rc; + data_size = pages * PAGE_SIZE; + buf = __vmalloc_node(data_size, PAGE_SIZE, GFP_KERNEL | __GFP_ZERO | __GFP_ACCOUNT, + NUMA_NO_NODE, __builtin_return_address(0)); + if (!buf) + return -ENOMEM; + rc = diag310_store_topology_map(buf, pages, level); + if (rc) + goto out; + if (copy_to_user((void __user *)address, buf, data_size)) + rc = -EFAULT; +out: + vfree(buf); + return rc; +} diff --git a/arch/s390/kernel/diag/diag324.c b/arch/s390/kernel/diag/diag324.c new file mode 100644 index 000000000000..7fa4c0b7eb6c --- /dev/null +++ b/arch/s390/kernel/diag/diag324.c @@ -0,0 +1,224 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Request power readings for resources in a computing environment via + * diag 0x324. diag 0x324 stores the power readings in the power information + * block (pib). + * + * Copyright IBM Corp. 2024 + */ + +#define pr_fmt(fmt) "diag324: " fmt +#include <linux/fs.h> +#include <linux/gfp.h> +#include <linux/ioctl.h> +#include <linux/jiffies.h> +#include <linux/kernel.h> +#include <linux/ktime.h> +#include <linux/string.h> +#include <linux/slab.h> +#include <linux/timer.h> +#include <linux/types.h> +#include <linux/uaccess.h> +#include <linux/vmalloc.h> + +#include <asm/diag.h> +#include <asm/sclp.h> +#include <asm/timex.h> +#include <uapi/asm/diag.h> +#include "diag_ioctl.h" + +enum subcode { + DIAG324_SUBC_0 = 0, + DIAG324_SUBC_1 = 1, + DIAG324_SUBC_2 = 2, +}; + +enum retcode { + DIAG324_RET_SUCCESS = 0x0001, + DIAG324_RET_SUBC_NOTAVAIL = 0x0103, + DIAG324_RET_INSUFFICIENT_SIZE = 0x0104, + DIAG324_RET_READING_UNAVAILABLE = 0x0105, +}; + +union diag324_response { + u64 response; + struct { + u64 installed : 32; + u64 : 16; + u64 rc : 16; + } sc0; + struct { + u64 format : 16; + u64 : 16; + u64 pib_len : 16; + u64 rc : 16; + } sc1; + struct { + u64 : 48; + u64 rc : 16; + } sc2; +}; + +union diag324_request { + u64 request; + struct { + u64 : 32; + u64 allocated : 16; + u64 : 12; + u64 sc : 4; + } sc2; +}; + +struct pib { + u32 : 8; + u32 num : 8; + u32 len : 16; + u32 : 24; + u32 hlen : 8; + u64 : 64; + u64 intv; + u8 r[]; +} __packed; + +struct pibdata { + struct pib *pib; + ktime_t expire; + u64 sequence; + size_t len; + int rc; +}; + +static DEFINE_MUTEX(pibmutex); +static struct pibdata pibdata; + +#define PIBWORK_DELAY (5 * NSEC_PER_SEC) + +static void pibwork_handler(struct work_struct *work); +static DECLARE_DELAYED_WORK(pibwork, pibwork_handler); + +static unsigned long diag324(unsigned long subcode, void *addr) +{ + union register_pair rp = { .even = (unsigned long)addr }; + + diag_stat_inc(DIAG_STAT_X324); + asm volatile("diag %[rp],%[subcode],0x324\n" + : [rp] "+d" (rp.pair) + : [subcode] "d" (subcode) + : "memory"); + return rp.odd; +} + +static void pibwork_handler(struct work_struct *work) +{ + struct pibdata *data = &pibdata; + ktime_t timedout; + + mutex_lock(&pibmutex); + timedout = ktime_add_ns(data->expire, PIBWORK_DELAY); + if (ktime_before(ktime_get(), timedout)) { + mod_delayed_work(system_wq, &pibwork, nsecs_to_jiffies(PIBWORK_DELAY)); + goto out; + } + vfree(data->pib); + data->pib = NULL; +out: + mutex_unlock(&pibmutex); +} + +static void pib_update(struct pibdata *data) +{ + union diag324_request req = { .sc2.sc = DIAG324_SUBC_2, .sc2.allocated = data->len }; + union diag324_response res; + int rc; + + memset(data->pib, 0, data->len); + res.response = diag324(req.request, data->pib); + switch (res.sc2.rc) { + case DIAG324_RET_SUCCESS: + rc = 0; + break; + case DIAG324_RET_SUBC_NOTAVAIL: + rc = -ENOENT; + break; + case DIAG324_RET_INSUFFICIENT_SIZE: + rc = -EMSGSIZE; + break; + case DIAG324_RET_READING_UNAVAILABLE: + rc = -EBUSY; + break; + default: + rc = -EINVAL; + } + data->rc = rc; +} + +long diag324_pibbuf(unsigned long arg) +{ + struct diag324_pib __user *udata = (struct diag324_pib __user *)arg; + struct pibdata *data = &pibdata; + static bool first = true; + u64 address; + int rc; + + if (!data->len) + return -EOPNOTSUPP; + if (get_user(address, &udata->address)) + return -EFAULT; + mutex_lock(&pibmutex); + rc = -ENOMEM; + if (!data->pib) + data->pib = vmalloc(data->len); + if (!data->pib) + goto out; + if (first || ktime_after(ktime_get(), data->expire)) { + pib_update(data); + data->sequence++; + data->expire = ktime_add_ns(ktime_get(), tod_to_ns(data->pib->intv)); + mod_delayed_work(system_wq, &pibwork, nsecs_to_jiffies(PIBWORK_DELAY)); + first = false; + } + rc = data->rc; + if (rc != 0 && rc != -EBUSY) + goto out; + rc = copy_to_user((void __user *)address, data->pib, data->pib->len); + rc |= put_user(data->sequence, &udata->sequence); + if (rc) + rc = -EFAULT; +out: + mutex_unlock(&pibmutex); + return rc; +} + +long diag324_piblen(unsigned long arg) +{ + struct pibdata *data = &pibdata; + + if (!data->len) + return -EOPNOTSUPP; + if (put_user(data->len, (size_t __user *)arg)) + return -EFAULT; + return 0; +} + +static int __init diag324_init(void) +{ + union diag324_response res; + unsigned long installed; + + if (!sclp.has_diag324) + return -EOPNOTSUPP; + res.response = diag324(DIAG324_SUBC_0, NULL); + if (res.sc0.rc != DIAG324_RET_SUCCESS) + return -EOPNOTSUPP; + installed = res.response; + if (!test_bit_inv(DIAG324_SUBC_1, &installed)) + return -EOPNOTSUPP; + if (!test_bit_inv(DIAG324_SUBC_2, &installed)) + return -EOPNOTSUPP; + res.response = diag324(DIAG324_SUBC_1, NULL); + if (res.sc1.rc != DIAG324_RET_SUCCESS) + return -EOPNOTSUPP; + pibdata.len = res.sc1.pib_len; + return 0; +} +device_initcall(diag324_init); diff --git a/arch/s390/kernel/diag/diag_ioctl.h b/arch/s390/kernel/diag/diag_ioctl.h new file mode 100644 index 000000000000..7080be946785 --- /dev/null +++ b/arch/s390/kernel/diag/diag_ioctl.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _DIAG_IOCTL_H +#define _DIAG_IOCTL_H + +#include <linux/types.h> + +long diag324_pibbuf(unsigned long arg); +long diag324_piblen(unsigned long arg); + +long diag310_memtop_stride(unsigned long arg); +long diag310_memtop_len(unsigned long arg); +long diag310_memtop_buf(unsigned long arg); + +#endif /* _DIAG_IOCTL_H */ diff --git a/arch/s390/kernel/diag/diag_misc.c b/arch/s390/kernel/diag/diag_misc.c new file mode 100644 index 000000000000..efffe02ea02e --- /dev/null +++ b/arch/s390/kernel/diag/diag_misc.c @@ -0,0 +1,63 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Provide diagnose information via misc device /dev/diag. + * + * Copyright IBM Corp. 2024 + */ + +#include <linux/fs.h> +#include <linux/init.h> +#include <linux/ioctl.h> +#include <linux/kernel.h> +#include <linux/miscdevice.h> +#include <linux/types.h> + +#include <uapi/asm/diag.h> +#include "diag_ioctl.h" + +static long diag_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) +{ + long rc; + + switch (cmd) { + case DIAG324_GET_PIBLEN: + rc = diag324_piblen(arg); + break; + case DIAG324_GET_PIBBUF: + rc = diag324_pibbuf(arg); + break; + case DIAG310_GET_STRIDE: + rc = diag310_memtop_stride(arg); + break; + case DIAG310_GET_MEMTOPLEN: + rc = diag310_memtop_len(arg); + break; + case DIAG310_GET_MEMTOPBUF: + rc = diag310_memtop_buf(arg); + break; + default: + rc = -ENOIOCTLCMD; + break; + } + return rc; +} + +static const struct file_operations fops = { + .owner = THIS_MODULE, + .open = nonseekable_open, + .unlocked_ioctl = diag_ioctl, +}; + +static struct miscdevice diagdev = { + .name = "diag", + .minor = MISC_DYNAMIC_MINOR, + .fops = &fops, + .mode = 0444, +}; + +static int diag_init(void) +{ + return misc_register(&diagdev); +} + +device_initcall(diag_init); diff --git a/arch/s390/kernel/dumpstack.c b/arch/s390/kernel/dumpstack.c index 1ecd0580561f..dd410962ecbe 100644 --- a/arch/s390/kernel/dumpstack.c +++ b/arch/s390/kernel/dumpstack.c @@ -17,6 +17,7 @@ #include <linux/sched.h> #include <linux/sched/debug.h> #include <linux/sched/task_stack.h> +#include <asm/asm-offsets.h> #include <asm/processor.h> #include <asm/debug.h> #include <asm/dis.h> @@ -198,13 +199,8 @@ void __noreturn die(struct pt_regs *regs, const char *str) console_verbose(); spin_lock_irq(&die_lock); bust_spinlocks(1); - printk("%s: %04x ilc:%d [#%d] ", str, regs->int_code & 0xffff, + printk("%s: %04x ilc:%d [#%d]", str, regs->int_code & 0xffff, regs->int_code >> 17, ++die_counter); -#ifdef CONFIG_PREEMPT - pr_cont("PREEMPT "); -#elif defined(CONFIG_PREEMPT_RT) - pr_cont("PREEMPT_RT "); -#endif pr_cont("SMP "); if (debug_pagealloc_enabled()) pr_cont("DEBUG_PAGEALLOC"); diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c index 62f8f5a750a3..54cf0923050f 100644 --- a/arch/s390/kernel/early.c +++ b/arch/s390/kernel/early.c @@ -8,6 +8,7 @@ #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt #include <linux/sched/debug.h> +#include <linux/cpufeature.h> #include <linux/compiler.h> #include <linux/init.h> #include <linux/errno.h> @@ -21,6 +22,8 @@ #include <asm/asm-extable.h> #include <linux/memblock.h> #include <asm/access-regs.h> +#include <asm/asm-offsets.h> +#include <asm/machine.h> #include <asm/diag.h> #include <asm/ebcdic.h> #include <asm/fpu.h> @@ -36,12 +39,14 @@ #include <asm/boot_data.h> #include "entry.h" -#define decompressor_handled_param(param) \ -static int __init ignore_decompressor_param_##param(char *s) \ +#define __decompressor_handled_param(func, param) \ +static int __init ignore_decompressor_param_##func(char *s) \ { \ return 0; \ } \ -early_param(#param, ignore_decompressor_param_##param) +early_param(#param, ignore_decompressor_param_##func) + +#define decompressor_handled_param(param) __decompressor_handled_param(param, param) decompressor_handled_param(mem); decompressor_handled_param(vmalloc); @@ -50,6 +55,8 @@ decompressor_handled_param(facilities); decompressor_handled_param(nokaslr); decompressor_handled_param(cmma); decompressor_handled_param(relocate_lowcore); +decompressor_handled_param(bootdebug); +__decompressor_handled_param(debug_alternative, debug-alternative); #if IS_ENABLED(CONFIG_KVM) decompressor_handled_param(prot_virt); #endif @@ -58,25 +65,10 @@ static void __init kasan_early_init(void) { #ifdef CONFIG_KASAN init_task.kasan_depth = 0; - sclp_early_printk("KernelAddressSanitizer initialized\n"); + pr_info("KernelAddressSanitizer initialized\n"); #endif } -static void __init reset_tod_clock(void) -{ - union tod_clock clk; - - if (store_tod_clock_ext_cc(&clk) == 0) - return; - /* TOD clock not running. Set the clock to Unix Epoch. */ - if (set_tod_clock(TOD_UNIX_EPOCH) || store_tod_clock_ext_cc(&clk)) - disabled_wait(); - - memset(&tod_clock_base, 0, sizeof(tod_clock_base)); - tod_clock_base.tod = TOD_UNIX_EPOCH; - get_lowcore()->last_update_clock = TOD_UNIX_EPOCH; -} - /* * Initialize storage key for kernel pages */ @@ -95,26 +87,6 @@ static noinline __init void init_kernel_storage_key(void) static __initdata char sysinfo_page[PAGE_SIZE] __aligned(PAGE_SIZE); -static noinline __init void detect_machine_type(void) -{ - struct sysinfo_3_2_2 *vmms = (struct sysinfo_3_2_2 *)&sysinfo_page; - - /* Check current-configuration-level */ - if (stsi(NULL, 0, 0, 0) <= 2) { - get_lowcore()->machine_flags |= MACHINE_FLAG_LPAR; - return; - } - /* Get virtual-machine cpu information. */ - if (stsi(vmms, 3, 2, 2) || !vmms->count) - return; - - /* Detect known hypervisors */ - if (!memcmp(vmms->vm[0].cpi, "\xd2\xe5\xd4", 3)) - get_lowcore()->machine_flags |= MACHINE_FLAG_KVM; - else if (!memcmp(vmms->vm[0].cpi, "\xa9\x61\xe5\xd4", 4)) - get_lowcore()->machine_flags |= MACHINE_FLAG_VM; -} - /* Remove leading, trailing and double whitespace. */ static inline void strim_all(char *str) { @@ -155,9 +127,9 @@ static noinline __init void setup_arch_string(void) strim_all(hvstr); } else { sprintf(hvstr, "%s", - MACHINE_IS_LPAR ? "LPAR" : - MACHINE_IS_VM ? "z/VM" : - MACHINE_IS_KVM ? "KVM" : "unknown"); + machine_is_lpar() ? "LPAR" : + machine_is_vm() ? "z/VM" : + machine_is_kvm() ? "KVM" : "unknown"); } dump_stack_set_arch_desc("%s (%s)", mstr, hvstr); } @@ -166,9 +138,8 @@ static __init void setup_topology(void) { int max_mnest; - if (!test_facility(11)) + if (!cpu_has_topology()) return; - get_lowcore()->machine_flags |= MACHINE_FLAG_TOPOLOGY; for (max_mnest = 6; max_mnest > 1; max_mnest--) { if (stsi(&sysinfo_page, 15, 1, max_mnest) == 0) break; @@ -217,65 +188,10 @@ static noinline __init void setup_lowcore_early(void) lc->return_mcck_lpswe = gen_lpswe(__LC_RETURN_MCCK_PSW); } -static __init void detect_diag9c(void) -{ - unsigned int cpu_address; - int rc; - - cpu_address = stap(); - diag_stat_inc(DIAG_STAT_X09C); - asm volatile( - " diag %2,0,0x9c\n" - "0: la %0,0\n" - "1:\n" - EX_TABLE(0b,1b) - : "=d" (rc) : "0" (-EOPNOTSUPP), "d" (cpu_address) : "cc"); - if (!rc) - get_lowcore()->machine_flags |= MACHINE_FLAG_DIAG9C; -} - -static __init void detect_machine_facilities(void) -{ - if (test_facility(8)) { - get_lowcore()->machine_flags |= MACHINE_FLAG_EDAT1; - system_ctl_set_bit(0, CR0_EDAT_BIT); - } - if (test_facility(78)) - get_lowcore()->machine_flags |= MACHINE_FLAG_EDAT2; - if (test_facility(3)) - get_lowcore()->machine_flags |= MACHINE_FLAG_IDTE; - if (test_facility(50) && test_facility(73)) { - get_lowcore()->machine_flags |= MACHINE_FLAG_TE; - system_ctl_set_bit(0, CR0_TRANSACTIONAL_EXECUTION_BIT); - } - if (test_facility(51)) - get_lowcore()->machine_flags |= MACHINE_FLAG_TLB_LC; - if (test_facility(129)) - system_ctl_set_bit(0, CR0_VECTOR_BIT); - if (test_facility(130)) - get_lowcore()->machine_flags |= MACHINE_FLAG_NX; - if (test_facility(133)) - get_lowcore()->machine_flags |= MACHINE_FLAG_GS; - if (test_facility(139) && (tod_clock_base.tod >> 63)) { - /* Enabled signed clock comparator comparisons */ - get_lowcore()->machine_flags |= MACHINE_FLAG_SCC; - clock_comparator_max = -1ULL >> 1; - system_ctl_set_bit(0, CR0_CLOCK_COMPARATOR_SIGN_BIT); - } - if (IS_ENABLED(CONFIG_PCI) && test_facility(153)) { - get_lowcore()->machine_flags |= MACHINE_FLAG_PCI_MIO; - /* the control bit is set during PCI initialization */ - } - if (test_facility(194)) - get_lowcore()->machine_flags |= MACHINE_FLAG_RDP; - if (test_facility(85)) - get_lowcore()->machine_flags |= MACHINE_FLAG_SEQ_INSN; -} - static inline void save_vector_registers(void) { #ifdef CONFIG_CRASH_DUMP - if (test_facility(129)) + if (cpu_has_vx()) save_vx_regs(boot_cpu_vector_save_area); #endif } @@ -307,17 +223,13 @@ static void __init sort_amode31_extable(void) void __init startup_init(void) { kasan_early_init(); - reset_tod_clock(); time_early_init(); init_kernel_storage_key(); lockdep_off(); sort_amode31_extable(); setup_lowcore_early(); - detect_machine_type(); setup_arch_string(); setup_boot_command_line(); - detect_diag9c(); - detect_machine_facilities(); save_vector_registers(); setup_topology(); sclp_early_detect(); diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index 960c08700cf6..0f00f4b06d51 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -29,6 +29,7 @@ #include <asm/nmi.h> #include <asm/nospec-insn.h> #include <asm/lowcore.h> +#include <asm/machine.h> _LPP_OFFSET = __LC_LPP @@ -44,7 +45,7 @@ _LPP_OFFSET = __LC_LPP ALTERNATIVE_2 "b \lpswe;nopr", \ ".insn siy,0xeb0000000071,\address,0", ALT_FACILITY(193), \ __stringify(.insn siy,0xeb0000000071,LOWCORE_ALT_ADDRESS+\address,0), \ - ALT_LOWCORE + ALT_FEATURE(MFEATURE_LOWCORE) .endm .macro MBEAR reg, lowcore @@ -52,16 +53,7 @@ _LPP_OFFSET = __LC_LPP ALT_FACILITY(193) .endm - .macro CHECK_STACK savearea, lowcore -#ifdef CONFIG_CHECK_STACK - tml %r15,THREAD_SIZE - CONFIG_STACK_GUARD - la %r14,\savearea(\lowcore) - jz stack_overflow -#endif - .endm - .macro CHECK_VMAP_STACK savearea, lowcore, oklabel -#ifdef CONFIG_VMAP_STACK lgr %r14,%r15 nill %r14,0x10000 - THREAD_SIZE oill %r14,STACK_INIT_OFFSET @@ -76,10 +68,7 @@ _LPP_OFFSET = __LC_LPP clg %r14,__LC_RESTART_STACK(\lowcore) je \oklabel la %r14,\savearea(\lowcore) - j stack_overflow -#else - j \oklabel -#endif + j stack_invalid .endm /* @@ -127,7 +116,7 @@ _LPP_OFFSET = __LC_LPP .macro SIEEXIT sie_control,lowcore lg %r9,\sie_control # get control block pointer ni __SIE_PROG0C+3(%r9),0xfe # no longer in SIE - lctlg %c1,%c1,__LC_KERNEL_ASCE(\lowcore) # load primary asce + lctlg %c1,%c1,__LC_USER_ASCE(\lowcore) # load primary asce lg %r9,__LC_CURRENT(\lowcore) mvi __TI_sie(%r9),0 larl %r9,sie_exit # skip forward to sie_exit @@ -219,7 +208,7 @@ SYM_FUNC_START(__sie64a) lg %r14,__SF_SIE_CONTROL(%r15) # get control block pointer ni __SIE_PROG0C+3(%r14),0xfe # no longer in SIE GET_LC %r14 - lctlg %c1,%c1,__LC_KERNEL_ASCE(%r14) # load primary asce + lctlg %c1,%c1,__LC_USER_ASCE(%r14) # load primary asce lg %r14,__LC_CURRENT(%r14) mvi __TI_sie(%r14),0 SYM_INNER_LABEL(sie_exit, SYM_L_GLOBAL) @@ -251,7 +240,6 @@ SYM_CODE_START(system_call) lghi %r14,0 .Lsysc_per: STBEAR __LC_LAST_BREAK(%r13) - lctlg %c1,%c1,__LC_KERNEL_ASCE(%r13) lg %r15,__LC_KERNEL_STACK(%r13) xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) stmg %r0,%r7,STACK_FRAME_OVERHEAD+__PT_R0(%r15) @@ -272,7 +260,6 @@ SYM_CODE_START(system_call) lgr %r3,%r14 brasl %r14,__do_syscall STACKLEAK_ERASE - lctlg %c1,%c1,__LC_USER_ASCE(%r13) mvc __LC_RETURN_PSW(16,%r13),STACK_FRAME_OVERHEAD+__PT_PSW(%r15) BPON LBEAR STACK_FRAME_OVERHEAD+__PT_LAST_BREAK(%r15) @@ -289,7 +276,6 @@ SYM_CODE_START(ret_from_fork) brasl %r14,__ret_from_fork STACKLEAK_ERASE GET_LC %r13 - lctlg %c1,%c1,__LC_USER_ASCE(%r13) mvc __LC_RETURN_PSW(16,%r13),STACK_FRAME_OVERHEAD+__PT_PSW(%r15) BPON LBEAR STACK_FRAME_OVERHEAD+__PT_LAST_BREAK(%r15) @@ -310,10 +296,7 @@ SYM_CODE_START(pgm_check_handler) lmg %r8,%r9,__LC_PGM_OLD_PSW(%r13) xgr %r10,%r10 tmhh %r8,0x0001 # coming from user space? - jno .Lpgm_skip_asce - lctlg %c1,%c1,__LC_KERNEL_ASCE(%r13) - j 3f # -> fault in user space -.Lpgm_skip_asce: + jo 3f # -> fault in user space #if IS_ENABLED(CONFIG_KVM) lg %r11,__LC_CURRENT(%r13) tm __TI_sie(%r11),0xff @@ -326,9 +309,8 @@ SYM_CODE_START(pgm_check_handler) jnz 2f # -> enabled, can't be a double fault tm __LC_PGM_ILC+3(%r13),0x80 # check for per exception jnz .Lpgm_svcper # -> single stepped svc -2: CHECK_STACK __LC_SAVE_AREA,%r13 - aghi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE) - # CHECK_VMAP_STACK branches to stack_overflow or 4f +2: aghi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE) + # CHECK_VMAP_STACK branches to stack_invalid or 4f CHECK_VMAP_STACK __LC_SAVE_AREA,%r13,4f 3: lg %r15,__LC_KERNEL_STACK(%r13) 4: la %r11,STACK_FRAME_OVERHEAD(%r15) @@ -352,7 +334,6 @@ SYM_CODE_START(pgm_check_handler) tmhh %r8,0x0001 # returning to user space? jno .Lpgm_exit_kernel STACKLEAK_ERASE - lctlg %c1,%c1,__LC_USER_ASCE(%r13) BPON stpt __LC_EXIT_TIMER(%r13) .Lpgm_exit_kernel: @@ -394,11 +375,9 @@ SYM_CODE_START(\name) BPENTER __SF_SIE_FLAGS(%r15),_TIF_ISOLATE_BP_GUEST SIEEXIT __SF_SIE_CONTROL(%r15),%r13 #endif -0: CHECK_STACK __LC_SAVE_AREA,%r13 - aghi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE) +0: aghi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE) j 2f -1: lctlg %c1,%c1,__LC_KERNEL_ASCE(%r13) - lg %r15,__LC_KERNEL_STACK(%r13) +1: lg %r15,__LC_KERNEL_STACK(%r13) 2: xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) la %r11,STACK_FRAME_OVERHEAD(%r15) stmg %r0,%r7,__PT_R0(%r11) @@ -421,7 +400,6 @@ SYM_CODE_START(\name) tmhh %r8,0x0001 # returning to user ? jno 2f STACKLEAK_ERASE - lctlg %c1,%c1,__LC_USER_ASCE(%r13) BPON stpt __LC_EXIT_TIMER(%r13) 2: LBEAR __PT_LAST_BREAK(%r11) @@ -481,7 +459,7 @@ SYM_CODE_START(mcck_int_handler) clgrjl %r9,%r14, 4f larl %r14,.Lsie_leave clgrjhe %r9,%r14, 4f - lg %r10,__LC_PCPU + lg %r10,__LC_PCPU(%r13) oi __PCPU_FLAGS+7(%r10), _CIF_MCCK_GUEST 4: BPENTER __SF_SIE_FLAGS(%r15),_TIF_ISOLATE_BP_GUEST SIEEXIT __SF_SIE_CONTROL(%r15),%r13 @@ -489,8 +467,6 @@ SYM_CODE_START(mcck_int_handler) .Lmcck_user: lg %r15,__LC_MCCK_STACK(%r13) la %r11,STACK_FRAME_OVERHEAD(%r15) - stctg %c1,%c1,__PT_CR1(%r11) - lctlg %c1,%c1,__LC_KERNEL_ASCE(%r13) xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) lay %r14,__LC_GPREGS_SAVE_AREA(%r13) mvc __PT_R0(128,%r11),0(%r14) @@ -508,7 +484,6 @@ SYM_CODE_START(mcck_int_handler) xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) lgr %r2,%r11 # pass pointer to pt_regs brasl %r14,s390_do_machine_check - lctlg %c1,%c1,__PT_CR1(%r11) lmg %r0,%r10,__PT_R0(%r11) mvc __LC_RETURN_MCCK_PSW(16,%r13),__PT_PSW(%r11) # move return PSW tm __LC_RETURN_MCCK_PSW+1(%r13),0x01 # returning to user ? @@ -603,25 +578,24 @@ SYM_CODE_END(early_pgm_check_handler) .section .kprobes.text, "ax" -#if defined(CONFIG_CHECK_STACK) || defined(CONFIG_VMAP_STACK) /* - * The synchronous or the asynchronous stack overflowed. We are dead. + * The synchronous or the asynchronous stack pointer is invalid. We are dead. * No need to properly save the registers, we are going to panic anyway. * Setup a pt_regs so that show_trace can provide a good call trace. */ -SYM_CODE_START(stack_overflow) +SYM_CODE_START(stack_invalid) GET_LC %r15 lg %r15,__LC_NODAT_STACK(%r15) # change to panic stack la %r11,STACK_FRAME_OVERHEAD(%r15) stmg %r0,%r7,__PT_R0(%r11) stmg %r8,%r9,__PT_PSW(%r11) mvc __PT_R8(64,%r11),0(%r14) - stg %r10,__PT_ORIG_GPR2(%r11) # store last break to orig_gpr2 + GET_LC %r2 + mvc __PT_ORIG_GPR2(8,%r11),__LC_PGM_LAST_BREAK(%r2) xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) lgr %r2,%r11 # pass pointer to pt_regs - jg kernel_stack_overflow -SYM_CODE_END(stack_overflow) -#endif + jg kernel_stack_invalid +SYM_CODE_END(stack_invalid) .section .data, "aw" .balign 4 @@ -637,7 +611,7 @@ SYM_DATA_END(daton_psw) .balign 8 #define SYSCALL(esame,emu) .quad __s390x_ ## esame SYM_DATA_START(sys_call_table) -#include "asm/syscall_table.h" +#include <asm/syscall_table.h> SYM_DATA_END(sys_call_table) #undef SYSCALL @@ -645,7 +619,7 @@ SYM_DATA_END(sys_call_table) #define SYSCALL(esame,emu) .quad __s390_ ## emu SYM_DATA_START(sys_call_table_emu) -#include "asm/syscall_table.h" +#include <asm/syscall_table.h> SYM_DATA_END(sys_call_table_emu) #undef SYSCALL #endif diff --git a/arch/s390/kernel/entry.h b/arch/s390/kernel/entry.h index 21969520f947..dd55cc6bbc28 100644 --- a/arch/s390/kernel/entry.h +++ b/arch/s390/kernel/entry.h @@ -31,7 +31,7 @@ void do_secure_storage_access(struct pt_regs *regs); void do_non_secure_storage_access(struct pt_regs *regs); void do_secure_storage_violation(struct pt_regs *regs); void do_report_trap(struct pt_regs *regs, int si_signo, int si_code, char *str); -void kernel_stack_overflow(struct pt_regs * regs); +void kernel_stack_invalid(struct pt_regs *regs); void handle_signal32(struct ksignal *ksig, sigset_t *oldset, struct pt_regs *regs); @@ -41,7 +41,6 @@ void do_restart(void *arg); void __init startup_init(void); void die(struct pt_regs *regs, const char *str); int setup_profiling_timer(unsigned int multiplier); -unsigned long prepare_ftrace_return(unsigned long parent, unsigned long sp, unsigned long ip); struct s390_mmap_arg_struct; struct fadvise64_64_args; diff --git a/arch/s390/kernel/ftrace.c b/arch/s390/kernel/ftrace.c index 51439a71e392..e94bb98f5231 100644 --- a/arch/s390/kernel/ftrace.c +++ b/arch/s390/kernel/ftrace.c @@ -13,6 +13,7 @@ #include <linux/kernel.h> #include <linux/types.h> #include <linux/kmsan-checks.h> +#include <linux/cpufeature.h> #include <linux/kprobes.h> #include <linux/execmem.h> #include <trace/syscall.h> @@ -69,7 +70,7 @@ static const char *ftrace_shared_hotpatch_trampoline(const char **end) bool ftrace_need_init_nop(void) { - return !MACHINE_HAS_SEQ_INSN; + return !cpu_has_seq_insn(); } int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec) @@ -189,7 +190,7 @@ static int ftrace_modify_trampoline_call(struct dyn_ftrace *rec, int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr, unsigned long addr) { - if (MACHINE_HAS_SEQ_INSN) + if (cpu_has_seq_insn()) return ftrace_patch_branch_insn(rec->ip, old_addr, addr); else return ftrace_modify_trampoline_call(rec, old_addr, addr); @@ -213,8 +214,8 @@ static int ftrace_patch_branch_mask(void *addr, u16 expected, bool enable) int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec, unsigned long addr) { - /* Expect brcl 0xf,... for the !MACHINE_HAS_SEQ_INSN case */ - if (MACHINE_HAS_SEQ_INSN) + /* Expect brcl 0xf,... for the !cpu_has_seq_insn() case */ + if (cpu_has_seq_insn()) return ftrace_patch_branch_insn(rec->ip, addr, 0); else return ftrace_patch_branch_mask((void *)rec->ip, 0xc0f4, false); @@ -234,7 +235,7 @@ static int ftrace_make_trampoline_call(struct dyn_ftrace *rec, unsigned long add int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) { - if (MACHINE_HAS_SEQ_INSN) + if (cpu_has_seq_insn()) return ftrace_patch_branch_insn(rec->ip, 0, addr); else return ftrace_make_trampoline_call(rec, addr); @@ -261,43 +262,19 @@ void ftrace_arch_code_modify_post_process(void) } #ifdef CONFIG_FUNCTION_GRAPH_TRACER -/* - * Hook the return address and push it in the stack of return addresses - * in current thread info. - */ -unsigned long prepare_ftrace_return(unsigned long ra, unsigned long sp, - unsigned long ip) -{ - if (unlikely(ftrace_graph_is_dead())) - goto out; - if (unlikely(atomic_read(¤t->tracing_graph_pause))) - goto out; - ip -= MCOUNT_INSN_SIZE; - if (!function_graph_enter(ra, ip, 0, (void *) sp)) - ra = (unsigned long) return_to_handler; -out: - return ra; -} -NOKPROBE_SYMBOL(prepare_ftrace_return); -/* - * Patch the kernel code at ftrace_graph_caller location. The instruction - * there is branch relative on condition. To enable the ftrace graph code - * block, we simply patch the mask field of the instruction to zero and - * turn the instruction into a nop. - * To disable the ftrace graph code the mask field will be patched to - * all ones, which turns the instruction into an unconditional branch. - */ -int ftrace_enable_ftrace_graph_caller(void) +void ftrace_graph_func(unsigned long ip, unsigned long parent_ip, + struct ftrace_ops *op, struct ftrace_regs *fregs) { - /* Expect brc 0xf,... */ - return ftrace_patch_branch_mask(ftrace_graph_caller, 0xa7f4, false); -} + unsigned long *parent = &arch_ftrace_regs(fregs)->regs.gprs[14]; + unsigned long sp = arch_ftrace_regs(fregs)->regs.gprs[15]; -int ftrace_disable_ftrace_graph_caller(void) -{ - /* Expect brc 0x0,... */ - return ftrace_patch_branch_mask(ftrace_graph_caller, 0xa704, true); + if (unlikely(ftrace_graph_is_dead())) + return; + if (unlikely(atomic_read(¤t->tracing_graph_pause))) + return; + if (!function_graph_enter_regs(*parent, ip, 0, (unsigned long *)sp, fregs)) + *parent = (unsigned long)&return_to_handler; } #endif /* CONFIG_FUNCTION_GRAPH_TRACER */ diff --git a/arch/s390/kernel/guarded_storage.c b/arch/s390/kernel/guarded_storage.c index 0b68168d9566..cf26d7a37425 100644 --- a/arch/s390/kernel/guarded_storage.c +++ b/arch/s390/kernel/guarded_storage.c @@ -4,6 +4,7 @@ * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com> */ +#include <linux/cpufeature.h> #include <linux/kernel.h> #include <linux/syscalls.h> #include <linux/signal.h> @@ -109,7 +110,7 @@ static int gs_broadcast(void) SYSCALL_DEFINE2(s390_guarded_storage, int, command, struct gs_cb __user *, gs_cb) { - if (!MACHINE_HAS_GS) + if (!cpu_has_gs()) return -EOPNOTSUPP; switch (command) { case GS_ENABLE: diff --git a/arch/s390/kernel/head64.S b/arch/s390/kernel/head64.S index 396034b2fe67..7edb9ded199c 100644 --- a/arch/s390/kernel/head64.S +++ b/arch/s390/kernel/head64.S @@ -18,12 +18,10 @@ __HEAD SYM_CODE_START(startup_continue) - larl %r1,tod_clock_base - GET_LC %r2 - mvc 0(16,%r1),__LC_BOOT_CLOCK(%r2) # # Setup stack # + GET_LC %r2 larl %r14,init_task stg %r14,__LC_CURRENT(%r2) larl %r15,init_thread_union+STACK_INIT_OFFSET diff --git a/arch/s390/kernel/hiperdispatch.c b/arch/s390/kernel/hiperdispatch.c index 2a99a216ab62..e7b66d046e8d 100644 --- a/arch/s390/kernel/hiperdispatch.c +++ b/arch/s390/kernel/hiperdispatch.c @@ -45,6 +45,7 @@ * therefore delaying the throughput loss caused by using SMP threads. */ +#include <linux/cpufeature.h> #include <linux/cpumask.h> #include <linux/debugfs.h> #include <linux/device.h> @@ -87,7 +88,7 @@ static DECLARE_DELAYED_WORK(hd_capacity_work, hd_capacity_work_fn); static int hd_set_hiperdispatch_mode(int enable) { - if (!MACHINE_HAS_TOPOLOGY) + if (!cpu_has_topology()) enable = 0; if (hd_enabled == enable) return 0; @@ -292,7 +293,7 @@ static int hiperdispatch_ctl_handler(const struct ctl_table *ctl, int write, return 0; } -static struct ctl_table hiperdispatch_ctl_table[] = { +static const struct ctl_table hiperdispatch_ctl_table[] = { { .procname = "hiperdispatch", .mode = 0644, diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c index 7d12a1305fc9..ff15f91affde 100644 --- a/arch/s390/kernel/ipl.c +++ b/arch/s390/kernel/ipl.c @@ -22,6 +22,7 @@ #include <linux/debug_locks.h> #include <linux/vmalloc.h> #include <asm/asm-extable.h> +#include <asm/machine.h> #include <asm/diag.h> #include <asm/ipl.h> #include <asm/smp.h> @@ -185,7 +186,7 @@ static inline int __diag308(unsigned long subcode, unsigned long addr) r1.even = addr; r1.odd = 0; - asm volatile( + asm_inline volatile( " diag %[r1],%[subcode],0x308\n" "0: nopr %%r7\n" EX_TABLE(0b,0b) @@ -269,7 +270,7 @@ static ssize_t sys_##_prefix##_##_name##_store(struct kobject *kobj, \ { \ if (len >= sizeof(_value)) \ return -E2BIG; \ - len = strscpy(_value, buf, sizeof(_value)); \ + len = strscpy(_value, buf); \ if ((ssize_t)len < 0) \ return len; \ strim(_value); \ @@ -280,58 +281,58 @@ static struct kobj_attribute sys_##_prefix##_##_name##_attr = \ sys_##_prefix##_##_name##_show, \ sys_##_prefix##_##_name##_store) -#define IPL_ATTR_SCP_DATA_SHOW_FN(_prefix, _ipl_block) \ -static ssize_t sys_##_prefix##_scp_data_show(struct file *filp, \ - struct kobject *kobj, \ - struct bin_attribute *attr, \ - char *buf, loff_t off, \ - size_t count) \ -{ \ - size_t size = _ipl_block.scp_data_len; \ - void *scp_data = _ipl_block.scp_data; \ - \ - return memory_read_from_buffer(buf, count, &off, \ - scp_data, size); \ +#define IPL_ATTR_SCP_DATA_SHOW_FN(_prefix, _ipl_block) \ +static ssize_t sys_##_prefix##_scp_data_show(struct file *filp, \ + struct kobject *kobj, \ + const struct bin_attribute *attr, \ + char *buf, loff_t off, \ + size_t count) \ +{ \ + size_t size = _ipl_block.scp_data_len; \ + void *scp_data = _ipl_block.scp_data; \ + \ + return memory_read_from_buffer(buf, count, &off, \ + scp_data, size); \ } #define IPL_ATTR_SCP_DATA_STORE_FN(_prefix, _ipl_block_hdr, _ipl_block, _ipl_bp_len, _ipl_bp0_len)\ -static ssize_t sys_##_prefix##_scp_data_store(struct file *filp, \ - struct kobject *kobj, \ - struct bin_attribute *attr, \ - char *buf, loff_t off, \ - size_t count) \ -{ \ - size_t scpdata_len = count; \ - size_t padding; \ - \ - if (off) \ - return -EINVAL; \ - \ - memcpy(_ipl_block.scp_data, buf, count); \ - if (scpdata_len % 8) { \ - padding = 8 - (scpdata_len % 8); \ - memset(_ipl_block.scp_data + scpdata_len, \ - 0, padding); \ - scpdata_len += padding; \ - } \ - \ - _ipl_block_hdr.len = _ipl_bp_len + scpdata_len; \ - _ipl_block.len = _ipl_bp0_len + scpdata_len; \ - _ipl_block.scp_data_len = scpdata_len; \ - \ - return count; \ +static ssize_t sys_##_prefix##_scp_data_store(struct file *filp, \ + struct kobject *kobj, \ + const struct bin_attribute *attr, \ + char *buf, loff_t off, \ + size_t count) \ +{ \ + size_t scpdata_len = count; \ + size_t padding; \ + \ + if (off) \ + return -EINVAL; \ + \ + memcpy(_ipl_block.scp_data, buf, count); \ + if (scpdata_len % 8) { \ + padding = 8 - (scpdata_len % 8); \ + memset(_ipl_block.scp_data + scpdata_len, \ + 0, padding); \ + scpdata_len += padding; \ + } \ + \ + _ipl_block_hdr.len = _ipl_bp_len + scpdata_len; \ + _ipl_block.len = _ipl_bp0_len + scpdata_len; \ + _ipl_block.scp_data_len = scpdata_len; \ + \ + return count; \ } #define DEFINE_IPL_ATTR_SCP_DATA_RO(_prefix, _ipl_block, _size) \ IPL_ATTR_SCP_DATA_SHOW_FN(_prefix, _ipl_block) \ -static struct bin_attribute sys_##_prefix##_scp_data_attr = \ +static const struct bin_attribute sys_##_prefix##_scp_data_attr = \ __BIN_ATTR(scp_data, 0444, sys_##_prefix##_scp_data_show, \ NULL, _size) #define DEFINE_IPL_ATTR_SCP_DATA_RW(_prefix, _ipl_block_hdr, _ipl_block, _ipl_bp_len, _ipl_bp0_len, _size)\ IPL_ATTR_SCP_DATA_SHOW_FN(_prefix, _ipl_block) \ IPL_ATTR_SCP_DATA_STORE_FN(_prefix, _ipl_block_hdr, _ipl_block, _ipl_bp_len, _ipl_bp0_len)\ -static struct bin_attribute sys_##_prefix##_scp_data_attr = \ +static const struct bin_attribute sys_##_prefix##_scp_data_attr = \ __BIN_ATTR(scp_data, 0644, sys_##_prefix##_scp_data_show, \ sys_##_prefix##_scp_data_store, _size) @@ -434,19 +435,19 @@ static struct kobj_attribute sys_ipl_device_attr = __ATTR(device, 0444, sys_ipl_device_show, NULL); static ssize_t sys_ipl_parameter_read(struct file *filp, struct kobject *kobj, - struct bin_attribute *attr, char *buf, + const struct bin_attribute *attr, char *buf, loff_t off, size_t count) { return memory_read_from_buffer(buf, count, &off, &ipl_block, ipl_block.hdr.len); } -static struct bin_attribute sys_ipl_parameter_attr = +static const struct bin_attribute sys_ipl_parameter_attr = __BIN_ATTR(binary_parameter, 0444, sys_ipl_parameter_read, NULL, PAGE_SIZE); DEFINE_IPL_ATTR_SCP_DATA_RO(ipl_fcp, ipl_block.fcp, PAGE_SIZE); -static struct bin_attribute *ipl_fcp_bin_attrs[] = { +static const struct bin_attribute *const ipl_fcp_bin_attrs[] = { &sys_ipl_parameter_attr, &sys_ipl_fcp_scp_data_attr, NULL, @@ -454,7 +455,7 @@ static struct bin_attribute *ipl_fcp_bin_attrs[] = { DEFINE_IPL_ATTR_SCP_DATA_RO(ipl_nvme, ipl_block.nvme, PAGE_SIZE); -static struct bin_attribute *ipl_nvme_bin_attrs[] = { +static const struct bin_attribute *const ipl_nvme_bin_attrs[] = { &sys_ipl_parameter_attr, &sys_ipl_nvme_scp_data_attr, NULL, @@ -462,7 +463,7 @@ static struct bin_attribute *ipl_nvme_bin_attrs[] = { DEFINE_IPL_ATTR_SCP_DATA_RO(ipl_eckd, ipl_block.eckd, PAGE_SIZE); -static struct bin_attribute *ipl_eckd_bin_attrs[] = { +static const struct bin_attribute *const ipl_eckd_bin_attrs[] = { &sys_ipl_parameter_attr, &sys_ipl_eckd_scp_data_attr, NULL, @@ -593,9 +594,9 @@ static struct attribute *ipl_fcp_attrs[] = { NULL, }; -static struct attribute_group ipl_fcp_attr_group = { +static const struct attribute_group ipl_fcp_attr_group = { .attrs = ipl_fcp_attrs, - .bin_attrs = ipl_fcp_bin_attrs, + .bin_attrs_new = ipl_fcp_bin_attrs, }; static struct attribute *ipl_nvme_attrs[] = { @@ -607,9 +608,9 @@ static struct attribute *ipl_nvme_attrs[] = { NULL, }; -static struct attribute_group ipl_nvme_attr_group = { +static const struct attribute_group ipl_nvme_attr_group = { .attrs = ipl_nvme_attrs, - .bin_attrs = ipl_nvme_bin_attrs, + .bin_attrs_new = ipl_nvme_bin_attrs, }; static struct attribute *ipl_eckd_attrs[] = { @@ -620,9 +621,9 @@ static struct attribute *ipl_eckd_attrs[] = { NULL, }; -static struct attribute_group ipl_eckd_attr_group = { +static const struct attribute_group ipl_eckd_attr_group = { .attrs = ipl_eckd_attrs, - .bin_attrs = ipl_eckd_bin_attrs, + .bin_attrs_new = ipl_eckd_bin_attrs, }; /* CCW ipl device attributes */ @@ -640,11 +641,11 @@ static struct attribute *ipl_ccw_attrs_lpar[] = { NULL, }; -static struct attribute_group ipl_ccw_attr_group_vm = { +static const struct attribute_group ipl_ccw_attr_group_vm = { .attrs = ipl_ccw_attrs_vm, }; -static struct attribute_group ipl_ccw_attr_group_lpar = { +static const struct attribute_group ipl_ccw_attr_group_lpar = { .attrs = ipl_ccw_attrs_lpar }; @@ -655,7 +656,7 @@ static struct attribute *ipl_common_attrs[] = { NULL, }; -static struct attribute_group ipl_common_attr_group = { +static const struct attribute_group ipl_common_attr_group = { .attrs = ipl_common_attrs, }; @@ -685,7 +686,7 @@ static int __init ipl_init(void) goto out; switch (ipl_info.type) { case IPL_TYPE_CCW: - if (MACHINE_IS_VM) + if (machine_is_vm()) rc = sysfs_create_group(&ipl_kset->kobj, &ipl_ccw_attr_group_vm); else @@ -808,7 +809,7 @@ DEFINE_IPL_ATTR_SCP_DATA_RW(reipl_fcp, reipl_block_fcp->hdr, IPL_BP_FCP_LEN, IPL_BP0_FCP_LEN, DIAG308_SCPDATA_SIZE); -static struct bin_attribute *reipl_fcp_bin_attrs[] = { +static const struct bin_attribute *const reipl_fcp_bin_attrs[] = { &sys_reipl_fcp_scp_data_attr, NULL, }; @@ -917,9 +918,9 @@ static struct attribute *reipl_fcp_attrs[] = { NULL, }; -static struct attribute_group reipl_fcp_attr_group = { +static const struct attribute_group reipl_fcp_attr_group = { .attrs = reipl_fcp_attrs, - .bin_attrs = reipl_fcp_bin_attrs, + .bin_attrs_new = reipl_fcp_bin_attrs, }; static struct kobj_attribute sys_reipl_fcp_clear_attr = @@ -932,7 +933,7 @@ DEFINE_IPL_ATTR_SCP_DATA_RW(reipl_nvme, reipl_block_nvme->hdr, IPL_BP_NVME_LEN, IPL_BP0_NVME_LEN, DIAG308_SCPDATA_SIZE); -static struct bin_attribute *reipl_nvme_bin_attrs[] = { +static const struct bin_attribute *const reipl_nvme_bin_attrs[] = { &sys_reipl_nvme_scp_data_attr, NULL, }; @@ -955,9 +956,9 @@ static struct attribute *reipl_nvme_attrs[] = { NULL, }; -static struct attribute_group reipl_nvme_attr_group = { +static const struct attribute_group reipl_nvme_attr_group = { .attrs = reipl_nvme_attrs, - .bin_attrs = reipl_nvme_bin_attrs + .bin_attrs_new = reipl_nvme_bin_attrs }; static ssize_t reipl_nvme_clear_show(struct kobject *kobj, @@ -1031,7 +1032,7 @@ DEFINE_IPL_ATTR_SCP_DATA_RW(reipl_eckd, reipl_block_eckd->hdr, IPL_BP_ECKD_LEN, IPL_BP0_ECKD_LEN, DIAG308_SCPDATA_SIZE); -static struct bin_attribute *reipl_eckd_bin_attrs[] = { +static const struct bin_attribute *const reipl_eckd_bin_attrs[] = { &sys_reipl_eckd_scp_data_attr, NULL, }; @@ -1048,9 +1049,9 @@ static struct attribute *reipl_eckd_attrs[] = { NULL, }; -static struct attribute_group reipl_eckd_attr_group = { +static const struct attribute_group reipl_eckd_attr_group = { .attrs = reipl_eckd_attrs, - .bin_attrs = reipl_eckd_bin_attrs + .bin_attrs_new = reipl_eckd_bin_attrs }; static ssize_t reipl_eckd_clear_show(struct kobject *kobj, @@ -1272,7 +1273,7 @@ static void reipl_block_ccw_fill_parms(struct ipl_parameter_block *ipb) ipb->ccw.flags = IPL_PB0_FLAG_LOADPARM; /* VM PARM */ - if (MACHINE_IS_VM && ipl_block_valid && + if (machine_is_vm() && ipl_block_valid && (ipl_block.ccw.vm_flags & IPL_PB0_CCW_VM_FLAG_VP)) { ipb->ccw.vm_flags |= IPL_PB0_CCW_VM_FLAG_VP; @@ -1286,7 +1287,7 @@ static int __init reipl_nss_init(void) { int rc; - if (!MACHINE_IS_VM) + if (!machine_is_vm()) return 0; reipl_block_nss = (void *) get_zeroed_page(GFP_KERNEL); @@ -1311,8 +1312,8 @@ static int __init reipl_ccw_init(void) return -ENOMEM; rc = sysfs_create_group(&reipl_kset->kobj, - MACHINE_IS_VM ? &reipl_ccw_attr_group_vm - : &reipl_ccw_attr_group_lpar); + machine_is_vm() ? &reipl_ccw_attr_group_vm + : &reipl_ccw_attr_group_lpar); if (rc) return rc; @@ -1587,15 +1588,15 @@ static struct attribute *dump_fcp_attrs[] = { NULL, }; -static struct bin_attribute *dump_fcp_bin_attrs[] = { +static const struct bin_attribute *const dump_fcp_bin_attrs[] = { &sys_dump_fcp_scp_data_attr, NULL, }; -static struct attribute_group dump_fcp_attr_group = { +static const struct attribute_group dump_fcp_attr_group = { .name = IPL_FCP_STR, .attrs = dump_fcp_attrs, - .bin_attrs = dump_fcp_bin_attrs, + .bin_attrs_new = dump_fcp_bin_attrs, }; /* NVME dump device attributes */ @@ -1621,15 +1622,15 @@ static struct attribute *dump_nvme_attrs[] = { NULL, }; -static struct bin_attribute *dump_nvme_bin_attrs[] = { +static const struct bin_attribute *const dump_nvme_bin_attrs[] = { &sys_dump_nvme_scp_data_attr, NULL, }; -static struct attribute_group dump_nvme_attr_group = { +static const struct attribute_group dump_nvme_attr_group = { .name = IPL_NVME_STR, .attrs = dump_nvme_attrs, - .bin_attrs = dump_nvme_bin_attrs, + .bin_attrs_new = dump_nvme_bin_attrs, }; /* ECKD dump device attributes */ @@ -1655,15 +1656,15 @@ static struct attribute *dump_eckd_attrs[] = { NULL, }; -static struct bin_attribute *dump_eckd_bin_attrs[] = { +static const struct bin_attribute *const dump_eckd_bin_attrs[] = { &sys_dump_eckd_scp_data_attr, NULL, }; -static struct attribute_group dump_eckd_attr_group = { +static const struct attribute_group dump_eckd_attr_group = { .name = IPL_ECKD_STR, .attrs = dump_eckd_attrs, - .bin_attrs = dump_eckd_bin_attrs, + .bin_attrs_new = dump_eckd_bin_attrs, }; /* CCW dump device attributes */ @@ -1987,7 +1988,7 @@ static void vmcmd_run(struct shutdown_trigger *trigger) static int vmcmd_init(void) { - if (!MACHINE_IS_VM) + if (!machine_is_vm()) return -EOPNOTSUPP; vmcmd_kset = kset_create_and_add("vmcmd", NULL, firmware_kobj); if (!vmcmd_kset) @@ -2248,26 +2249,28 @@ static int __init s390_ipl_init(void) __initcall(s390_ipl_init); -static void __init strncpy_skip_quote(char *dst, char *src, int n) +static void __init strscpy_skip_quote(char *dst, char *src, int n) { int sx, dx; - dx = 0; - for (sx = 0; src[sx] != 0; sx++) { + if (!n) + return; + for (sx = 0, dx = 0; src[sx]; sx++) { if (src[sx] == '"') continue; - dst[dx++] = src[sx]; - if (dx >= n) + dst[dx] = src[sx]; + if (dx + 1 == n) break; + dx++; } + dst[dx] = '\0'; } static int __init vmcmd_on_reboot_setup(char *str) { - if (!MACHINE_IS_VM) + if (!machine_is_vm()) return 1; - strncpy_skip_quote(vmcmd_on_reboot, str, VMCMD_MAX_SIZE); - vmcmd_on_reboot[VMCMD_MAX_SIZE] = 0; + strscpy_skip_quote(vmcmd_on_reboot, str, sizeof(vmcmd_on_reboot)); on_reboot_trigger.action = &vmcmd_action; return 1; } @@ -2275,10 +2278,9 @@ __setup("vmreboot=", vmcmd_on_reboot_setup); static int __init vmcmd_on_panic_setup(char *str) { - if (!MACHINE_IS_VM) + if (!machine_is_vm()) return 1; - strncpy_skip_quote(vmcmd_on_panic, str, VMCMD_MAX_SIZE); - vmcmd_on_panic[VMCMD_MAX_SIZE] = 0; + strscpy_skip_quote(vmcmd_on_panic, str, sizeof(vmcmd_on_panic)); on_panic_trigger.action = &vmcmd_action; return 1; } @@ -2286,10 +2288,9 @@ __setup("vmpanic=", vmcmd_on_panic_setup); static int __init vmcmd_on_halt_setup(char *str) { - if (!MACHINE_IS_VM) + if (!machine_is_vm()) return 1; - strncpy_skip_quote(vmcmd_on_halt, str, VMCMD_MAX_SIZE); - vmcmd_on_halt[VMCMD_MAX_SIZE] = 0; + strscpy_skip_quote(vmcmd_on_halt, str, sizeof(vmcmd_on_halt)); on_halt_trigger.action = &vmcmd_action; return 1; } @@ -2297,10 +2298,9 @@ __setup("vmhalt=", vmcmd_on_halt_setup); static int __init vmcmd_on_poff_setup(char *str) { - if (!MACHINE_IS_VM) + if (!machine_is_vm()) return 1; - strncpy_skip_quote(vmcmd_on_poff, str, VMCMD_MAX_SIZE); - vmcmd_on_poff[VMCMD_MAX_SIZE] = 0; + strscpy_skip_quote(vmcmd_on_poff, str, sizeof(vmcmd_on_poff)); on_poff_trigger.action = &vmcmd_action; return 1; } diff --git a/arch/s390/kernel/irq.c b/arch/s390/kernel/irq.c index ef7be599e1f7..bdf9c7cb5685 100644 --- a/arch/s390/kernel/irq.c +++ b/arch/s390/kernel/irq.c @@ -9,6 +9,7 @@ */ #include <linux/kernel_stat.h> +#include <linux/cpufeature.h> #include <linux/interrupt.h> #include <linux/seq_file.h> #include <linux/proc_fs.h> @@ -25,6 +26,7 @@ #include <asm/irq_regs.h> #include <asm/cputime.h> #include <asm/lowcore.h> +#include <asm/machine.h> #include <asm/irq.h> #include <asm/hw_irq.h> #include <asm/stacktrace.h> @@ -84,7 +86,6 @@ static const struct irq_class irqclass_sub_desc[] = { {.irq = IRQIO_C70, .name = "C70", .desc = "[I/O] 3270"}, {.irq = IRQIO_TAP, .name = "TAP", .desc = "[I/O] Tape"}, {.irq = IRQIO_VMR, .name = "VMR", .desc = "[I/O] Unit Record Devices"}, - {.irq = IRQIO_LCS, .name = "LCS", .desc = "[I/O] LCS"}, {.irq = IRQIO_CTC, .name = "CTC", .desc = "[I/O] CTC"}, {.irq = IRQIO_ADM, .name = "ADM", .desc = "[I/O] EADM Subchannel"}, {.irq = IRQIO_CSC, .name = "CSC", .desc = "[I/O] CHSC Subchannel"}, @@ -149,7 +150,7 @@ void noinstr do_io_irq(struct pt_regs *regs) if (user_mode(regs)) { update_timer_sys(); - if (static_branch_likely(&cpu_has_bear)) + if (cpu_has_bear()) current->thread.last_break = regs->last_break; } @@ -164,7 +165,7 @@ void noinstr do_io_irq(struct pt_regs *regs) do_irq_async(regs, THIN_INTERRUPT); else do_irq_async(regs, IO_INTERRUPT); - } while (MACHINE_IS_LPAR && irq_pending(regs)); + } while (machine_is_lpar() && irq_pending(regs)); irq_exit_rcu(); @@ -185,7 +186,7 @@ void noinstr do_ext_irq(struct pt_regs *regs) if (user_mode(regs)) { update_timer_sys(); - if (static_branch_likely(&cpu_has_bear)) + if (cpu_has_bear()) current->thread.last_break = regs->last_break; } diff --git a/arch/s390/kernel/kprobes.c b/arch/s390/kernel/kprobes.c index 8b80ea57125f..c450120b4474 100644 --- a/arch/s390/kernel/kprobes.c +++ b/arch/s390/kernel/kprobes.c @@ -13,6 +13,7 @@ #include <linux/ptrace.h> #include <linux/preempt.h> #include <linux/stop_machine.h> +#include <linux/cpufeature.h> #include <linux/kdebug.h> #include <linux/uaccess.h> #include <linux/extable.h> @@ -153,7 +154,7 @@ void arch_arm_kprobe(struct kprobe *p) { struct swap_insn_args args = {.p = p, .arm_kprobe = 1}; - if (MACHINE_HAS_SEQ_INSN) { + if (cpu_has_seq_insn()) { swap_instruction(&args); text_poke_sync(); } else { @@ -166,7 +167,7 @@ void arch_disarm_kprobe(struct kprobe *p) { struct swap_insn_args args = {.p = p, .arm_kprobe = 0}; - if (MACHINE_HAS_SEQ_INSN) { + if (cpu_has_seq_insn()) { swap_instruction(&args); text_poke_sync(); } else { diff --git a/arch/s390/kernel/lgr.c b/arch/s390/kernel/lgr.c index 6652e54cf3db..6d1ffca5f798 100644 --- a/arch/s390/kernel/lgr.c +++ b/arch/s390/kernel/lgr.c @@ -166,7 +166,7 @@ static struct timer_list lgr_timer; */ static void lgr_timer_set(void) { - mod_timer(&lgr_timer, jiffies + msecs_to_jiffies(LGR_TIMER_INTERVAL_SECS * MSEC_PER_SEC)); + mod_timer(&lgr_timer, jiffies + secs_to_jiffies(LGR_TIMER_INTERVAL_SECS)); } /* diff --git a/arch/s390/kernel/machine_kexec.c b/arch/s390/kernel/machine_kexec.c index 8f681ccfb83a..baeb3dcfc1c8 100644 --- a/arch/s390/kernel/machine_kexec.c +++ b/arch/s390/kernel/machine_kexec.c @@ -13,7 +13,9 @@ #include <linux/reboot.h> #include <linux/ftrace.h> #include <linux/debug_locks.h> +#include <linux/cpufeature.h> #include <asm/guarded_storage.h> +#include <asm/machine.h> #include <asm/pfault.h> #include <asm/cio.h> #include <asm/fpu.h> @@ -94,7 +96,7 @@ static noinline void __machine_kdump(void *image) mcesa = __va(get_lowcore()->mcesad & MCESA_ORIGIN_MASK); if (cpu_has_vx()) save_vx_regs((__vector128 *) mcesa->vector_save_area); - if (MACHINE_HAS_GS) { + if (cpu_has_gs()) { local_ctl_store(2, &cr2_old.reg); cr2_new = cr2_old; cr2_new.gse = 1; @@ -178,7 +180,7 @@ void arch_kexec_unprotect_crashkres(void) static int machine_kexec_prepare_kdump(void) { #ifdef CONFIG_CRASH_DUMP - if (MACHINE_IS_VM) + if (machine_is_vm()) diag10_range(PFN_DOWN(crashk_res.start), PFN_DOWN(crashk_res.end - crashk_res.start + 1)); return 0; diff --git a/arch/s390/kernel/mcount.S b/arch/s390/kernel/mcount.S index 7e267ef63a7f..1fec370fecf4 100644 --- a/arch/s390/kernel/mcount.S +++ b/arch/s390/kernel/mcount.S @@ -104,17 +104,6 @@ SYM_CODE_START(ftrace_common) lgr %r3,%r14 la %r5,STACK_FREGS(%r15) BASR_EX %r14,%r1 -#ifdef CONFIG_FUNCTION_GRAPH_TRACER -# The j instruction gets runtime patched to a nop instruction. -# See ftrace_enable_ftrace_graph_caller. -SYM_INNER_LABEL(ftrace_graph_caller, SYM_L_GLOBAL) - j .Lftrace_graph_caller_end - lmg %r2,%r3,(STACK_FREGS_PTREGS_GPRS+14*8)(%r15) - lg %r4,(STACK_FREGS_PTREGS_PSW+8)(%r15) - brasl %r14,prepare_ftrace_return - stg %r2,(STACK_FREGS_PTREGS_GPRS+14*8)(%r15) -.Lftrace_graph_caller_end: -#endif lg %r0,(STACK_FREGS_PTREGS_PSW+8)(%r15) #ifdef MARCH_HAS_Z196_FEATURES ltg %r1,STACK_FREGS_PTREGS_ORIG_GPR2(%r15) @@ -134,14 +123,14 @@ SYM_CODE_END(ftrace_common) SYM_FUNC_START(return_to_handler) stmg %r2,%r5,32(%r15) lgr %r1,%r15 - aghi %r15,-(STACK_FRAME_OVERHEAD+__FGRAPH_RET_SIZE) + # allocate ftrace_regs and stack frame for ftrace_return_to_handler + aghi %r15,-STACK_FRAME_SIZE_FREGS stg %r1,__SF_BACKCHAIN(%r15) - la %r3,STACK_FRAME_OVERHEAD(%r15) - stg %r1,__FGRAPH_RET_FP(%r3) - stg %r2,__FGRAPH_RET_GPR2(%r3) - lgr %r2,%r3 + stg %r2,(STACK_FREGS_PTREGS_GPRS+2*8)(%r15) + stg %r1,(STACK_FREGS_PTREGS_GPRS+15*8)(%r15) + la %r2,STACK_FRAME_OVERHEAD(%r15) brasl %r14,ftrace_return_to_handler - aghi %r15,STACK_FRAME_OVERHEAD+__FGRAPH_RET_SIZE + aghi %r15,STACK_FRAME_SIZE_FREGS lgr %r14,%r2 lmg %r2,%r5,32(%r15) BR_EX %r14 diff --git a/arch/s390/kernel/nmi.c b/arch/s390/kernel/nmi.c index fbd218b6fc8e..3da371c144eb 100644 --- a/arch/s390/kernel/nmi.c +++ b/arch/s390/kernel/nmi.c @@ -9,6 +9,7 @@ */ #include <linux/kernel_stat.h> +#include <linux/cpufeature.h> #include <linux/init.h> #include <linux/errno.h> #include <linux/entry-common.h> @@ -45,7 +46,7 @@ static DEFINE_PER_CPU(struct mcck_struct, cpu_mcck); static inline int nmi_needs_mcesa(void) { - return cpu_has_vx() || MACHINE_HAS_GS; + return cpu_has_vx() || cpu_has_gs(); } /* @@ -61,7 +62,7 @@ void __init nmi_alloc_mcesa_early(u64 *mcesad) if (!nmi_needs_mcesa()) return; *mcesad = __pa(&boot_mcesa); - if (MACHINE_HAS_GS) + if (cpu_has_gs()) *mcesad |= ilog2(MCESA_MAX_SIZE); } @@ -73,14 +74,14 @@ int nmi_alloc_mcesa(u64 *mcesad) *mcesad = 0; if (!nmi_needs_mcesa()) return 0; - size = MACHINE_HAS_GS ? MCESA_MAX_SIZE : MCESA_MIN_SIZE; + size = cpu_has_gs() ? MCESA_MAX_SIZE : MCESA_MIN_SIZE; origin = kmalloc(size, GFP_KERNEL); if (!origin) return -ENOMEM; /* The pointer is stored with mcesa_bits ORed in */ kmemleak_not_leak(origin); *mcesad = __pa(origin); - if (MACHINE_HAS_GS) + if (cpu_has_gs()) *mcesad |= ilog2(MCESA_MAX_SIZE); return 0; } diff --git a/arch/s390/kernel/numa.c b/arch/s390/kernel/numa.c index ddc1448ea2e1..2fc40f97c0ad 100644 --- a/arch/s390/kernel/numa.c +++ b/arch/s390/kernel/numa.c @@ -21,12 +21,8 @@ void __init numa_setup(void) nodes_clear(node_possible_map); node_set(0, node_possible_map); node_set_online(0); - for (nid = 0; nid < MAX_NUMNODES; nid++) { - NODE_DATA(nid) = memblock_alloc(sizeof(pg_data_t), 8); - if (!NODE_DATA(nid)) - panic("%s: Failed to allocate %zu bytes align=0x%x\n", - __func__, sizeof(pg_data_t), 8); - } + for (nid = 0; nid < MAX_NUMNODES; nid++) + NODE_DATA(nid) = memblock_alloc_or_panic(sizeof(pg_data_t), 8); NODE_DATA(0)->node_spanned_pages = memblock_end_of_DRAM() >> PAGE_SHIFT; NODE_DATA(0)->node_id = 0; } diff --git a/arch/s390/kernel/os_info.c b/arch/s390/kernel/os_info.c index 29080d6d5d8d..c2a468986212 100644 --- a/arch/s390/kernel/os_info.c +++ b/arch/s390/kernel/os_info.c @@ -18,6 +18,7 @@ #include <asm/physmem_info.h> #include <asm/maccess.h> #include <asm/asm-offsets.h> +#include <asm/sections.h> #include <asm/ipl.h> /* diff --git a/arch/s390/kernel/perf_cpum_cf.c b/arch/s390/kernel/perf_cpum_cf.c index b0bc68da6a11..6a262e198e35 100644 --- a/arch/s390/kernel/perf_cpum_cf.c +++ b/arch/s390/kernel/perf_cpum_cf.c @@ -442,7 +442,7 @@ static void cpum_cf_make_setsize(enum cpumf_ctr_set ctrset) ctrset_size = 48; else if (cpumf_ctr_info.csvn >= 3 && cpumf_ctr_info.csvn <= 5) ctrset_size = 128; - else if (cpumf_ctr_info.csvn == 6 || cpumf_ctr_info.csvn == 7) + else if (cpumf_ctr_info.csvn >= 6 && cpumf_ctr_info.csvn <= 8) ctrset_size = 160; break; case CPUMF_CTR_SET_MT_DIAG: @@ -858,18 +858,13 @@ static int cpumf_pmu_event_type(struct perf_event *event) static int cpumf_pmu_event_init(struct perf_event *event) { unsigned int type = event->attr.type; - int err; + int err = -ENOENT; if (type == PERF_TYPE_HARDWARE || type == PERF_TYPE_RAW) err = __hw_perf_event_init(event, type); else if (event->pmu->type == type) /* Registered as unknown PMU */ err = __hw_perf_event_init(event, cpumf_pmu_event_type(event)); - else - return -ENOENT; - - if (unlikely(err) && event->destroy) - event->destroy(event); return err; } @@ -981,12 +976,10 @@ static int cfdiag_push_sample(struct perf_event *event, if (event->attr.sample_type & PERF_SAMPLE_RAW) { raw.frag.size = cpuhw->usedss; raw.frag.data = cpuhw->stop; - perf_sample_save_raw_data(&data, &raw); + perf_sample_save_raw_data(&data, event, &raw); } overflow = perf_event_overflow(event, &data, ®s); - if (overflow) - event->pmu->stop(event, 0); perf_event_update_userpage(event); return overflow; @@ -1819,8 +1812,6 @@ static int cfdiag_event_init(struct perf_event *event) event->destroy = hw_perf_event_destroy; err = cfdiag_event_init2(event); - if (unlikely(err)) - event->destroy(event); out: return err; } diff --git a/arch/s390/kernel/perf_cpum_cf_events.c b/arch/s390/kernel/perf_cpum_cf_events.c index e4a6bfc91080..7ace1f9e4ccf 100644 --- a/arch/s390/kernel/perf_cpum_cf_events.c +++ b/arch/s390/kernel/perf_cpum_cf_events.c @@ -237,7 +237,6 @@ CPUMF_EVENT_ATTR(cf_z14, TX_C_TABORT_NO_SPECIAL, 0x00f4); CPUMF_EVENT_ATTR(cf_z14, TX_C_TABORT_SPECIAL, 0x00f5); CPUMF_EVENT_ATTR(cf_z14, MT_DIAG_CYCLES_ONE_THR_ACTIVE, 0x01c0); CPUMF_EVENT_ATTR(cf_z14, MT_DIAG_CYCLES_TWO_THR_ACTIVE, 0x01c1); - CPUMF_EVENT_ATTR(cf_z15, L1D_RO_EXCL_WRITES, 0x0080); CPUMF_EVENT_ATTR(cf_z15, DTLB2_WRITES, 0x0081); CPUMF_EVENT_ATTR(cf_z15, DTLB2_MISSES, 0x0082); @@ -291,8 +290,8 @@ CPUMF_EVENT_ATTR(cf_z15, TX_C_TABORT_NO_SPECIAL, 0x00f4); CPUMF_EVENT_ATTR(cf_z15, TX_C_TABORT_SPECIAL, 0x00f5); CPUMF_EVENT_ATTR(cf_z15, DFLT_ACCESS, 0x00f7); CPUMF_EVENT_ATTR(cf_z15, DFLT_CYCLES, 0x00fc); -CPUMF_EVENT_ATTR(cf_z15, DFLT_CC, 0x00108); -CPUMF_EVENT_ATTR(cf_z15, DFLT_CCFINISH, 0x00109); +CPUMF_EVENT_ATTR(cf_z15, DFLT_CC, 0x0108); +CPUMF_EVENT_ATTR(cf_z15, DFLT_CCFINISH, 0x0109); CPUMF_EVENT_ATTR(cf_z15, MT_DIAG_CYCLES_ONE_THR_ACTIVE, 0x01c0); CPUMF_EVENT_ATTR(cf_z15, MT_DIAG_CYCLES_TWO_THR_ACTIVE, 0x01c1); CPUMF_EVENT_ATTR(cf_z16, L1D_RO_EXCL_WRITES, 0x0080); @@ -365,6 +364,83 @@ CPUMF_EVENT_ATTR(cf_z16, NNPA_WAIT_LOCK, 0x010d); CPUMF_EVENT_ATTR(cf_z16, NNPA_HOLD_LOCK, 0x010e); CPUMF_EVENT_ATTR(cf_z16, MT_DIAG_CYCLES_ONE_THR_ACTIVE, 0x01c0); CPUMF_EVENT_ATTR(cf_z16, MT_DIAG_CYCLES_TWO_THR_ACTIVE, 0x01c1); +CPUMF_EVENT_ATTR(cf_z17, L1D_RO_EXCL_WRITES, 0x0080); +CPUMF_EVENT_ATTR(cf_z17, DTLB2_WRITES, 0x0081); +CPUMF_EVENT_ATTR(cf_z17, DTLB2_MISSES, 0x0082); +CPUMF_EVENT_ATTR(cf_z17, CRSTE_1MB_WRITES, 0x0083); +CPUMF_EVENT_ATTR(cf_z17, DTLB2_GPAGE_WRITES, 0x0084); +CPUMF_EVENT_ATTR(cf_z17, ITLB2_WRITES, 0x0086); +CPUMF_EVENT_ATTR(cf_z17, ITLB2_MISSES, 0x0087); +CPUMF_EVENT_ATTR(cf_z17, TLB2_PTE_WRITES, 0x0089); +CPUMF_EVENT_ATTR(cf_z17, TLB2_CRSTE_WRITES, 0x008a); +CPUMF_EVENT_ATTR(cf_z17, TLB2_ENGINES_BUSY, 0x008b); +CPUMF_EVENT_ATTR(cf_z17, TX_C_TEND, 0x008c); +CPUMF_EVENT_ATTR(cf_z17, TX_NC_TEND, 0x008d); +CPUMF_EVENT_ATTR(cf_z17, L1C_TLB2_MISSES, 0x008f); +CPUMF_EVENT_ATTR(cf_z17, DCW_REQ, 0x0091); +CPUMF_EVENT_ATTR(cf_z17, DCW_REQ_IV, 0x0092); +CPUMF_EVENT_ATTR(cf_z17, DCW_REQ_CHIP_HIT, 0x0093); +CPUMF_EVENT_ATTR(cf_z17, DCW_REQ_DRAWER_HIT, 0x0094); +CPUMF_EVENT_ATTR(cf_z17, DCW_ON_CHIP, 0x0095); +CPUMF_EVENT_ATTR(cf_z17, DCW_ON_CHIP_IV, 0x0096); +CPUMF_EVENT_ATTR(cf_z17, DCW_ON_CHIP_CHIP_HIT, 0x0097); +CPUMF_EVENT_ATTR(cf_z17, DCW_ON_CHIP_DRAWER_HIT, 0x0098); +CPUMF_EVENT_ATTR(cf_z17, DCW_ON_MODULE, 0x0099); +CPUMF_EVENT_ATTR(cf_z17, DCW_ON_DRAWER, 0x009a); +CPUMF_EVENT_ATTR(cf_z17, DCW_OFF_DRAWER, 0x009b); +CPUMF_EVENT_ATTR(cf_z17, DCW_ON_CHIP_MEMORY, 0x009c); +CPUMF_EVENT_ATTR(cf_z17, DCW_ON_MODULE_MEMORY, 0x009d); +CPUMF_EVENT_ATTR(cf_z17, DCW_ON_DRAWER_MEMORY, 0x009e); +CPUMF_EVENT_ATTR(cf_z17, DCW_OFF_DRAWER_MEMORY, 0x009f); +CPUMF_EVENT_ATTR(cf_z17, IDCW_ON_MODULE_IV, 0x00a0); +CPUMF_EVENT_ATTR(cf_z17, IDCW_ON_MODULE_CHIP_HIT, 0x00a1); +CPUMF_EVENT_ATTR(cf_z17, IDCW_ON_MODULE_DRAWER_HIT, 0x00a2); +CPUMF_EVENT_ATTR(cf_z17, IDCW_ON_DRAWER_IV, 0x00a3); +CPUMF_EVENT_ATTR(cf_z17, IDCW_ON_DRAWER_CHIP_HIT, 0x00a4); +CPUMF_EVENT_ATTR(cf_z17, IDCW_ON_DRAWER_DRAWER_HIT, 0x00a5); +CPUMF_EVENT_ATTR(cf_z17, IDCW_OFF_DRAWER_IV, 0x00a6); +CPUMF_EVENT_ATTR(cf_z17, IDCW_OFF_DRAWER_CHIP_HIT, 0x00a7); +CPUMF_EVENT_ATTR(cf_z17, IDCW_OFF_DRAWER_DRAWER_HIT, 0x00a8); +CPUMF_EVENT_ATTR(cf_z17, ICW_REQ, 0x00a9); +CPUMF_EVENT_ATTR(cf_z17, ICW_REQ_IV, 0x00aa); +CPUMF_EVENT_ATTR(cf_z17, ICW_REQ_CHIP_HIT, 0x00ab); +CPUMF_EVENT_ATTR(cf_z17, ICW_REQ_DRAWER_HIT, 0x00ac); +CPUMF_EVENT_ATTR(cf_z17, ICW_ON_CHIP, 0x00ad); +CPUMF_EVENT_ATTR(cf_z17, ICW_ON_CHIP_IV, 0x00ae); +CPUMF_EVENT_ATTR(cf_z17, ICW_ON_CHIP_CHIP_HIT, 0x00af); +CPUMF_EVENT_ATTR(cf_z17, ICW_ON_CHIP_DRAWER_HIT, 0x00b0); +CPUMF_EVENT_ATTR(cf_z17, ICW_ON_MODULE, 0x00b1); +CPUMF_EVENT_ATTR(cf_z17, ICW_ON_DRAWER, 0x00b2); +CPUMF_EVENT_ATTR(cf_z17, ICW_OFF_DRAWER, 0x00b3); +CPUMF_EVENT_ATTR(cf_z17, CYCLES_SAMETHRD, 0x00ca); +CPUMF_EVENT_ATTR(cf_z17, CYCLES_DIFFTHRD, 0x00cb); +CPUMF_EVENT_ATTR(cf_z17, INST_SAMETHRD, 0x00cc); +CPUMF_EVENT_ATTR(cf_z17, INST_DIFFTHRD, 0x00cd); +CPUMF_EVENT_ATTR(cf_z17, WRONG_BRANCH_PREDICTION, 0x00ce); +CPUMF_EVENT_ATTR(cf_z17, VX_BCD_EXECUTION_SLOTS, 0x00e1); +CPUMF_EVENT_ATTR(cf_z17, DECIMAL_INSTRUCTIONS, 0x00e2); +CPUMF_EVENT_ATTR(cf_z17, LAST_HOST_TRANSLATIONS, 0x00e8); +CPUMF_EVENT_ATTR(cf_z17, TX_NC_TABORT, 0x00f4); +CPUMF_EVENT_ATTR(cf_z17, TX_C_TABORT_NO_SPECIAL, 0x00f5); +CPUMF_EVENT_ATTR(cf_z17, TX_C_TABORT_SPECIAL, 0x00f6); +CPUMF_EVENT_ATTR(cf_z17, DFLT_ACCESS, 0x00f8); +CPUMF_EVENT_ATTR(cf_z17, DFLT_CYCLES, 0x00fd); +CPUMF_EVENT_ATTR(cf_z17, SORTL, 0x0100); +CPUMF_EVENT_ATTR(cf_z17, DFLT_CC, 0x0109); +CPUMF_EVENT_ATTR(cf_z17, DFLT_CCFINISH, 0x010a); +CPUMF_EVENT_ATTR(cf_z17, NNPA_INVOCATIONS, 0x010b); +CPUMF_EVENT_ATTR(cf_z17, NNPA_COMPLETIONS, 0x010c); +CPUMF_EVENT_ATTR(cf_z17, NNPA_WAIT_LOCK, 0x010d); +CPUMF_EVENT_ATTR(cf_z17, NNPA_HOLD_LOCK, 0x010e); +CPUMF_EVENT_ATTR(cf_z17, NNPA_INST_ONCHIP, 0x0110); +CPUMF_EVENT_ATTR(cf_z17, NNPA_INST_OFFCHIP, 0x0111); +CPUMF_EVENT_ATTR(cf_z17, NNPA_INST_DIFF, 0x0112); +CPUMF_EVENT_ATTR(cf_z17, NNPA_4K_PREFETCH, 0x0114); +CPUMF_EVENT_ATTR(cf_z17, NNPA_COMPL_LOCK, 0x0115); +CPUMF_EVENT_ATTR(cf_z17, NNPA_RETRY_LOCK, 0x0116); +CPUMF_EVENT_ATTR(cf_z17, NNPA_RETRY_LOCK_WITH_PLO, 0x0117); +CPUMF_EVENT_ATTR(cf_z17, MT_DIAG_CYCLES_ONE_THR_ACTIVE, 0x01c0); +CPUMF_EVENT_ATTR(cf_z17, MT_DIAG_CYCLES_TWO_THR_ACTIVE, 0x01c1); static struct attribute *cpumcf_fvn1_pmu_event_attr[] __initdata = { CPUMF_EVENT_PTR(cf_fvn1, CPU_CYCLES), @@ -414,7 +490,7 @@ static struct attribute *cpumcf_svn_12345_pmu_event_attr[] __initdata = { NULL, }; -static struct attribute *cpumcf_svn_67_pmu_event_attr[] __initdata = { +static struct attribute *cpumcf_svn_678_pmu_event_attr[] __initdata = { CPUMF_EVENT_PTR(cf_svn_12345, PRNG_FUNCTIONS), CPUMF_EVENT_PTR(cf_svn_12345, PRNG_CYCLES), CPUMF_EVENT_PTR(cf_svn_12345, PRNG_BLOCKED_FUNCTIONS), @@ -779,6 +855,87 @@ static struct attribute *cpumcf_z16_pmu_event_attr[] __initdata = { NULL, }; +static struct attribute *cpumcf_z17_pmu_event_attr[] __initdata = { + CPUMF_EVENT_PTR(cf_z17, L1D_RO_EXCL_WRITES), + CPUMF_EVENT_PTR(cf_z17, DTLB2_WRITES), + CPUMF_EVENT_PTR(cf_z17, DTLB2_MISSES), + CPUMF_EVENT_PTR(cf_z17, CRSTE_1MB_WRITES), + CPUMF_EVENT_PTR(cf_z17, DTLB2_GPAGE_WRITES), + CPUMF_EVENT_PTR(cf_z17, ITLB2_WRITES), + CPUMF_EVENT_PTR(cf_z17, ITLB2_MISSES), + CPUMF_EVENT_PTR(cf_z17, TLB2_PTE_WRITES), + CPUMF_EVENT_PTR(cf_z17, TLB2_CRSTE_WRITES), + CPUMF_EVENT_PTR(cf_z17, TLB2_ENGINES_BUSY), + CPUMF_EVENT_PTR(cf_z17, TX_C_TEND), + CPUMF_EVENT_PTR(cf_z17, TX_NC_TEND), + CPUMF_EVENT_PTR(cf_z17, L1C_TLB2_MISSES), + CPUMF_EVENT_PTR(cf_z17, DCW_REQ), + CPUMF_EVENT_PTR(cf_z17, DCW_REQ_IV), + CPUMF_EVENT_PTR(cf_z17, DCW_REQ_CHIP_HIT), + CPUMF_EVENT_PTR(cf_z17, DCW_REQ_DRAWER_HIT), + CPUMF_EVENT_PTR(cf_z17, DCW_ON_CHIP), + CPUMF_EVENT_PTR(cf_z17, DCW_ON_CHIP_IV), + CPUMF_EVENT_PTR(cf_z17, DCW_ON_CHIP_CHIP_HIT), + CPUMF_EVENT_PTR(cf_z17, DCW_ON_CHIP_DRAWER_HIT), + CPUMF_EVENT_PTR(cf_z17, DCW_ON_MODULE), + CPUMF_EVENT_PTR(cf_z17, DCW_ON_DRAWER), + CPUMF_EVENT_PTR(cf_z17, DCW_OFF_DRAWER), + CPUMF_EVENT_PTR(cf_z17, DCW_ON_CHIP_MEMORY), + CPUMF_EVENT_PTR(cf_z17, DCW_ON_MODULE_MEMORY), + CPUMF_EVENT_PTR(cf_z17, DCW_ON_DRAWER_MEMORY), + CPUMF_EVENT_PTR(cf_z17, DCW_OFF_DRAWER_MEMORY), + CPUMF_EVENT_PTR(cf_z17, IDCW_ON_MODULE_IV), + CPUMF_EVENT_PTR(cf_z17, IDCW_ON_MODULE_CHIP_HIT), + CPUMF_EVENT_PTR(cf_z17, IDCW_ON_MODULE_DRAWER_HIT), + CPUMF_EVENT_PTR(cf_z17, IDCW_ON_DRAWER_IV), + CPUMF_EVENT_PTR(cf_z17, IDCW_ON_DRAWER_CHIP_HIT), + CPUMF_EVENT_PTR(cf_z17, IDCW_ON_DRAWER_DRAWER_HIT), + CPUMF_EVENT_PTR(cf_z17, IDCW_OFF_DRAWER_IV), + CPUMF_EVENT_PTR(cf_z17, IDCW_OFF_DRAWER_CHIP_HIT), + CPUMF_EVENT_PTR(cf_z17, IDCW_OFF_DRAWER_DRAWER_HIT), + CPUMF_EVENT_PTR(cf_z17, ICW_REQ), + CPUMF_EVENT_PTR(cf_z17, ICW_REQ_IV), + CPUMF_EVENT_PTR(cf_z17, ICW_REQ_CHIP_HIT), + CPUMF_EVENT_PTR(cf_z17, ICW_REQ_DRAWER_HIT), + CPUMF_EVENT_PTR(cf_z17, ICW_ON_CHIP), + CPUMF_EVENT_PTR(cf_z17, ICW_ON_CHIP_IV), + CPUMF_EVENT_PTR(cf_z17, ICW_ON_CHIP_CHIP_HIT), + CPUMF_EVENT_PTR(cf_z17, ICW_ON_CHIP_DRAWER_HIT), + CPUMF_EVENT_PTR(cf_z17, ICW_ON_MODULE), + CPUMF_EVENT_PTR(cf_z17, ICW_ON_DRAWER), + CPUMF_EVENT_PTR(cf_z17, ICW_OFF_DRAWER), + CPUMF_EVENT_PTR(cf_z17, CYCLES_SAMETHRD), + CPUMF_EVENT_PTR(cf_z17, CYCLES_DIFFTHRD), + CPUMF_EVENT_PTR(cf_z17, INST_SAMETHRD), + CPUMF_EVENT_PTR(cf_z17, INST_DIFFTHRD), + CPUMF_EVENT_PTR(cf_z17, WRONG_BRANCH_PREDICTION), + CPUMF_EVENT_PTR(cf_z17, VX_BCD_EXECUTION_SLOTS), + CPUMF_EVENT_PTR(cf_z17, DECIMAL_INSTRUCTIONS), + CPUMF_EVENT_PTR(cf_z17, LAST_HOST_TRANSLATIONS), + CPUMF_EVENT_PTR(cf_z17, TX_NC_TABORT), + CPUMF_EVENT_PTR(cf_z17, TX_C_TABORT_NO_SPECIAL), + CPUMF_EVENT_PTR(cf_z17, TX_C_TABORT_SPECIAL), + CPUMF_EVENT_PTR(cf_z17, DFLT_ACCESS), + CPUMF_EVENT_PTR(cf_z17, DFLT_CYCLES), + CPUMF_EVENT_PTR(cf_z17, SORTL), + CPUMF_EVENT_PTR(cf_z17, DFLT_CC), + CPUMF_EVENT_PTR(cf_z17, DFLT_CCFINISH), + CPUMF_EVENT_PTR(cf_z17, NNPA_INVOCATIONS), + CPUMF_EVENT_PTR(cf_z17, NNPA_COMPLETIONS), + CPUMF_EVENT_PTR(cf_z17, NNPA_WAIT_LOCK), + CPUMF_EVENT_PTR(cf_z17, NNPA_HOLD_LOCK), + CPUMF_EVENT_PTR(cf_z17, NNPA_INST_ONCHIP), + CPUMF_EVENT_PTR(cf_z17, NNPA_INST_OFFCHIP), + CPUMF_EVENT_PTR(cf_z17, NNPA_INST_DIFF), + CPUMF_EVENT_PTR(cf_z17, NNPA_4K_PREFETCH), + CPUMF_EVENT_PTR(cf_z17, NNPA_COMPL_LOCK), + CPUMF_EVENT_PTR(cf_z17, NNPA_RETRY_LOCK), + CPUMF_EVENT_PTR(cf_z17, NNPA_RETRY_LOCK_WITH_PLO), + CPUMF_EVENT_PTR(cf_z17, MT_DIAG_CYCLES_ONE_THR_ACTIVE), + CPUMF_EVENT_PTR(cf_z17, MT_DIAG_CYCLES_TWO_THR_ACTIVE), + NULL, +}; + /* END: CPUM_CF COUNTER DEFINITIONS ===================================== */ static struct attribute_group cpumcf_pmu_events_group = { @@ -859,7 +1016,7 @@ __init const struct attribute_group **cpumf_cf_event_group(void) if (ci.csvn >= 1 && ci.csvn <= 5) csvn = cpumcf_svn_12345_pmu_event_attr; else if (ci.csvn >= 6) - csvn = cpumcf_svn_67_pmu_event_attr; + csvn = cpumcf_svn_678_pmu_event_attr; /* Determine model-specific counter set(s) */ get_cpu_id(&cpu_id); @@ -892,6 +1049,10 @@ __init const struct attribute_group **cpumf_cf_event_group(void) case 0x3932: model = cpumcf_z16_pmu_event_attr; break; + case 0x9175: + case 0x9176: + model = cpumcf_z17_pmu_event_attr; + break; default: model = none; break; diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c index 1e99514fb7ae..91469401f2c9 100644 --- a/arch/s390/kernel/perf_cpum_sf.c +++ b/arch/s390/kernel/perf_cpum_sf.c @@ -885,9 +885,6 @@ static int cpumsf_pmu_event_init(struct perf_event *event) event->attr.exclude_idle = 0; err = __hw_perf_event_init(event); - if (unlikely(err)) - if (event->destroy) - event->destroy(event); return err; } @@ -981,7 +978,7 @@ static void cpumsf_pmu_disable(struct pmu *pmu) cpuhw->flags &= ~PMU_F_ENABLED; } -/* perf_exclude_event() - Filter event +/* perf_event_exclude() - Filter event * @event: The perf event * @regs: pt_regs structure * @sde_regs: Sample-data-entry (sde) regs structure @@ -990,7 +987,7 @@ static void cpumsf_pmu_disable(struct pmu *pmu) * * Return non-zero if the event shall be excluded. */ -static int perf_exclude_event(struct perf_event *event, struct pt_regs *regs, +static int perf_event_exclude(struct perf_event *event, struct pt_regs *regs, struct perf_sf_sde_regs *sde_regs) { if (event->attr.exclude_user && user_mode(regs)) @@ -1073,12 +1070,9 @@ static int perf_push_sample(struct perf_event *event, data.tid_entry.pid = basic->hpp & LPP_PID_MASK; overflow = 0; - if (perf_exclude_event(event, ®s, sde_regs)) + if (perf_event_exclude(event, ®s, sde_regs)) goto out; - if (perf_event_overflow(event, &data, ®s)) { - overflow = 1; - event->pmu->stop(event, 0); - } + overflow = perf_event_overflow(event, &data, ®s); perf_event_update_userpage(event); out: return overflow; diff --git a/arch/s390/kernel/perf_pai_crypto.c b/arch/s390/kernel/perf_pai_crypto.c index fa7325454266..63875270941b 100644 --- a/arch/s390/kernel/perf_pai_crypto.c +++ b/arch/s390/kernel/perf_pai_crypto.c @@ -478,7 +478,7 @@ static int paicrypt_push_sample(size_t rawsize, struct paicrypt_map *cpump, if (event->attr.sample_type & PERF_SAMPLE_RAW) { raw.frag.size = rawsize; raw.frag.data = cpump->save; - perf_sample_save_raw_data(&data, &raw); + perf_sample_save_raw_data(&data, event, &raw); } overflow = perf_event_overflow(event, &data, ®s); @@ -518,7 +518,8 @@ static void paicrypt_have_samples(void) /* Called on schedule-in and schedule-out. No access to event structure, * but for sampling only event CRYPTO_ALL is allowed. */ -static void paicrypt_sched_task(struct perf_event_pmu_context *pmu_ctx, bool sched_in) +static void paicrypt_sched_task(struct perf_event_pmu_context *pmu_ctx, + struct task_struct *task, bool sched_in) { /* We started with a clean page on event installation. So read out * results on schedule_out and if page was dirty, save old values. diff --git a/arch/s390/kernel/perf_pai_ext.c b/arch/s390/kernel/perf_pai_ext.c index 7f462bef1fc0..fd14d5ebccbc 100644 --- a/arch/s390/kernel/perf_pai_ext.c +++ b/arch/s390/kernel/perf_pai_ext.c @@ -503,7 +503,7 @@ static int paiext_push_sample(size_t rawsize, struct paiext_map *cpump, if (event->attr.sample_type & PERF_SAMPLE_RAW) { raw.frag.size = rawsize; raw.frag.data = cpump->save; - perf_sample_save_raw_data(&data, &raw); + perf_sample_save_raw_data(&data, event, &raw); } overflow = perf_event_overflow(event, &data, ®s); @@ -542,7 +542,8 @@ static void paiext_have_samples(void) /* Called on schedule-in and schedule-out. No access to event structure, * but for sampling only event NNPA_ALL is allowed. */ -static void paiext_sched_task(struct perf_event_pmu_context *pmu_ctx, bool sched_in) +static void paiext_sched_task(struct perf_event_pmu_context *pmu_ctx, + struct task_struct *task, bool sched_in) { /* We started with a clean page on event installation. So read out * results on schedule_out and if page was dirty, save old values. diff --git a/arch/s390/kernel/processor.c b/arch/s390/kernel/processor.c index 5ce9a795a0fe..11f70c1e2797 100644 --- a/arch/s390/kernel/processor.c +++ b/arch/s390/kernel/processor.c @@ -8,6 +8,7 @@ #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt #include <linux/stop_machine.h> +#include <linux/cpufeature.h> #include <linux/bitops.h> #include <linux/kernel.h> #include <linux/random.h> @@ -19,6 +20,7 @@ #include <linux/cpu.h> #include <linux/smp.h> #include <asm/text-patching.h> +#include <asm/machine.h> #include <asm/diag.h> #include <asm/facility.h> #include <asm/elf.h> @@ -72,7 +74,7 @@ void notrace stop_machine_yield(const struct cpumask *cpumask) this_cpu = smp_processor_id(); if (__this_cpu_inc_return(cpu_relax_retry) >= spin_retry) { __this_cpu_write(cpu_relax_retry, 0); - cpu = cpumask_next_wrap(this_cpu, cpumask, this_cpu, false); + cpu = cpumask_next_wrap(this_cpu, cpumask); if (cpu >= nr_cpu_ids) return; if (arch_vcpu_is_preempted(cpu)) @@ -209,14 +211,14 @@ static int __init setup_hwcaps(void) elf_hwcap |= HWCAP_DFP; /* huge page support */ - if (MACHINE_HAS_EDAT1) + if (cpu_has_edat1()) elf_hwcap |= HWCAP_HPAGE; /* 64-bit register support for 31-bit processes */ elf_hwcap |= HWCAP_HIGH_GPRS; /* transactional execution */ - if (MACHINE_HAS_TE) + if (machine_has_tx()) elf_hwcap |= HWCAP_TE; /* vector */ @@ -244,10 +246,10 @@ static int __init setup_hwcaps(void) elf_hwcap |= HWCAP_NNPA; /* guarded storage */ - if (MACHINE_HAS_GS) + if (cpu_has_gs()) elf_hwcap |= HWCAP_GS; - if (MACHINE_HAS_PCI_MIO) + if (test_machine_feature(MFEATURE_PCI_MIO)) elf_hwcap |= HWCAP_PCI_MIO; /* virtualization support */ @@ -266,31 +268,35 @@ static int __init setup_elf_platform(void) add_device_randomness(&cpu_id, sizeof(cpu_id)); switch (cpu_id.machine) { default: /* Use "z10" as default. */ - strcpy(elf_platform, "z10"); + strscpy(elf_platform, "z10"); break; case 0x2817: case 0x2818: - strcpy(elf_platform, "z196"); + strscpy(elf_platform, "z196"); break; case 0x2827: case 0x2828: - strcpy(elf_platform, "zEC12"); + strscpy(elf_platform, "zEC12"); break; case 0x2964: case 0x2965: - strcpy(elf_platform, "z13"); + strscpy(elf_platform, "z13"); break; case 0x3906: case 0x3907: - strcpy(elf_platform, "z14"); + strscpy(elf_platform, "z14"); break; case 0x8561: case 0x8562: - strcpy(elf_platform, "z15"); + strscpy(elf_platform, "z15"); break; case 0x3931: case 0x3932: - strcpy(elf_platform, "z16"); + strscpy(elf_platform, "z16"); + break; + case 0x9175: + case 0x9176: + strscpy(elf_platform, "z17"); break; } return 0; diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c index 1cfed8b710b8..e1240f6b29fa 100644 --- a/arch/s390/kernel/ptrace.c +++ b/arch/s390/kernel/ptrace.c @@ -7,10 +7,10 @@ * Martin Schwidefsky (schwidefsky@de.ibm.com) */ -#include "asm/ptrace.h" #include <linux/kernel.h> #include <linux/sched.h> #include <linux/sched/task_stack.h> +#include <linux/cpufeature.h> #include <linux/mm.h> #include <linux/smp.h> #include <linux/errno.h> @@ -31,6 +31,9 @@ #include <asm/unistd.h> #include <asm/runtime_instr.h> #include <asm/facility.h> +#include <asm/machine.h> +#include <asm/ptrace.h> +#include <asm/rwonce.h> #include <asm/fpu.h> #include "entry.h" @@ -60,7 +63,7 @@ void update_cr_regs(struct task_struct *task) cr0_new = cr0_old; cr2_new = cr2_old; /* Take care of the enable/disable of transactional execution. */ - if (MACHINE_HAS_TE) { + if (machine_has_tx()) { /* Set or clear transaction execution TXC bit 8. */ cr0_new.tcx = 1; if (task->thread.per_flags & PER_FLAG_NO_TE) @@ -75,7 +78,7 @@ void update_cr_regs(struct task_struct *task) } } /* Take care of enable/disable of guarded storage. */ - if (MACHINE_HAS_GS) { + if (cpu_has_gs()) { cr2_new.gse = 0; if (task->thread.gs_cb) cr2_new.gse = 1; @@ -470,18 +473,18 @@ long arch_ptrace(struct task_struct *child, long request, case PTRACE_GET_LAST_BREAK: return put_user(child->thread.last_break, (unsigned long __user *)data); case PTRACE_ENABLE_TE: - if (!MACHINE_HAS_TE) + if (!machine_has_tx()) return -EIO; child->thread.per_flags &= ~PER_FLAG_NO_TE; return 0; case PTRACE_DISABLE_TE: - if (!MACHINE_HAS_TE) + if (!machine_has_tx()) return -EIO; child->thread.per_flags |= PER_FLAG_NO_TE; child->thread.per_flags &= ~PER_FLAG_TE_ABORT_RAND; return 0; case PTRACE_TE_ABORT_RAND: - if (!MACHINE_HAS_TE || (child->thread.per_flags & PER_FLAG_NO_TE)) + if (!machine_has_tx() || (child->thread.per_flags & PER_FLAG_NO_TE)) return -EIO; switch (data) { case 0UL: @@ -1033,7 +1036,7 @@ static int s390_gs_cb_get(struct task_struct *target, { struct gs_cb *data = target->thread.gs_cb; - if (!MACHINE_HAS_GS) + if (!cpu_has_gs()) return -ENODEV; if (!data) return -ENODATA; @@ -1050,7 +1053,7 @@ static int s390_gs_cb_set(struct task_struct *target, struct gs_cb gs_cb = { }, *data = NULL; int rc; - if (!MACHINE_HAS_GS) + if (!cpu_has_gs()) return -ENODEV; if (!target->thread.gs_cb) { data = kzalloc(sizeof(*data), GFP_KERNEL); @@ -1087,7 +1090,7 @@ static int s390_gs_bc_get(struct task_struct *target, { struct gs_cb *data = target->thread.gs_bc_cb; - if (!MACHINE_HAS_GS) + if (!cpu_has_gs()) return -ENODEV; if (!data) return -ENODATA; @@ -1101,7 +1104,7 @@ static int s390_gs_bc_set(struct task_struct *target, { struct gs_cb *data = target->thread.gs_bc_cb; - if (!MACHINE_HAS_GS) + if (!cpu_has_gs()) return -ENODEV; if (!data) { data = kzalloc(sizeof(*data), GFP_KERNEL); @@ -1521,13 +1524,6 @@ static const char *gpr_names[NUM_GPRS] = { "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", }; -unsigned long regs_get_register(struct pt_regs *regs, unsigned int offset) -{ - if (offset >= NUM_GPRS) - return 0; - return regs->gprs[offset]; -} - int regs_query_register_offset(const char *name) { unsigned long offset; @@ -1547,29 +1543,3 @@ const char *regs_query_register_name(unsigned int offset) return NULL; return gpr_names[offset]; } - -static int regs_within_kernel_stack(struct pt_regs *regs, unsigned long addr) -{ - unsigned long ksp = kernel_stack_pointer(regs); - - return (addr & ~(THREAD_SIZE - 1)) == (ksp & ~(THREAD_SIZE - 1)); -} - -/** - * regs_get_kernel_stack_nth() - get Nth entry of the stack - * @regs:pt_regs which contains kernel stack pointer. - * @n:stack entry number. - * - * regs_get_kernel_stack_nth() returns @n th entry of the kernel stack which - * is specifined by @regs. If the @n th entry is NOT in the kernel stack, - * this returns 0. - */ -unsigned long regs_get_kernel_stack_nth(struct pt_regs *regs, unsigned int n) -{ - unsigned long addr; - - addr = kernel_stack_pointer(regs) + n * sizeof(long); - if (!regs_within_kernel_stack(regs, addr)) - return 0; - return *(unsigned long *)addr; -} diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index a3fea683b227..f244c5560e7f 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -54,6 +54,7 @@ #include <asm/archrandom.h> #include <asm/boot_data.h> +#include <asm/machine.h> #include <asm/ipl.h> #include <asm/facility.h> #include <asm/smp.h> @@ -157,25 +158,29 @@ u64 __bootdata_preserved(stfle_fac_list[16]); EXPORT_SYMBOL(stfle_fac_list); struct oldmem_data __bootdata_preserved(oldmem_data); -unsigned long VMALLOC_START; +char __bootdata(boot_rb)[PAGE_SIZE * 2]; +bool __bootdata(boot_earlyprintk); +size_t __bootdata(boot_rb_off); +char __bootdata(bootdebug_filter)[128]; +bool __bootdata(bootdebug); + +unsigned long __bootdata_preserved(VMALLOC_START); EXPORT_SYMBOL(VMALLOC_START); -unsigned long VMALLOC_END; +unsigned long __bootdata_preserved(VMALLOC_END); EXPORT_SYMBOL(VMALLOC_END); -struct page *vmemmap; +struct page *__bootdata_preserved(vmemmap); EXPORT_SYMBOL(vmemmap); -unsigned long vmemmap_size; +unsigned long __bootdata_preserved(vmemmap_size); -unsigned long MODULES_VADDR; -unsigned long MODULES_END; +unsigned long __bootdata_preserved(MODULES_VADDR); +unsigned long __bootdata_preserved(MODULES_END); /* An array with a pointer to the lowcore of every CPU. */ struct lowcore *lowcore_ptr[NR_CPUS]; EXPORT_SYMBOL(lowcore_ptr); -DEFINE_STATIC_KEY_FALSE(cpu_has_bear); - /* * The Write Back bit position in the physaddr is given by the SLPC PCI. * Leaving the mask zero always uses write through which is safe @@ -245,7 +250,7 @@ static void __init conmode_default(void) char query_buffer[1024]; char *ptr; - if (MACHINE_IS_VM) { + if (machine_is_vm()) { cpcmd("QUERY CONSOLE", query_buffer, 1024, NULL); console_devno = simple_strtoul(query_buffer + 5, NULL, 16); ptr = strstr(query_buffer, "SUBCHANNEL ="); @@ -283,7 +288,7 @@ static void __init conmode_default(void) SET_CONSOLE_SCLP; #endif } - } else if (MACHINE_IS_KVM) { + } else if (machine_is_kvm()) { if (sclp.has_vt220 && IS_ENABLED(CONFIG_SCLP_VT220_CONSOLE)) SET_CONSOLE_VT220; else if (sclp.has_linemode && IS_ENABLED(CONFIG_SCLP_CONSOLE)) @@ -359,36 +364,24 @@ void *restart_stack; unsigned long stack_alloc(void) { -#ifdef CONFIG_VMAP_STACK - void *ret; + void *stack; - ret = __vmalloc_node(THREAD_SIZE, THREAD_SIZE, THREADINFO_GFP, - NUMA_NO_NODE, __builtin_return_address(0)); - kmemleak_not_leak(ret); - return (unsigned long)ret; -#else - return __get_free_pages(GFP_KERNEL, THREAD_SIZE_ORDER); -#endif + stack = __vmalloc_node(THREAD_SIZE, THREAD_SIZE, THREADINFO_GFP, + NUMA_NO_NODE, __builtin_return_address(0)); + kmemleak_not_leak(stack); + return (unsigned long)stack; } void stack_free(unsigned long stack) { -#ifdef CONFIG_VMAP_STACK - vfree((void *) stack); -#else - free_pages(stack, THREAD_SIZE_ORDER); -#endif + vfree((void *)stack); } static unsigned long __init stack_alloc_early(void) { unsigned long stack; - stack = (unsigned long)memblock_alloc(THREAD_SIZE, THREAD_SIZE); - if (!stack) { - panic("%s: Failed to allocate %lu bytes align=0x%lx\n", - __func__, THREAD_SIZE, THREAD_SIZE); - } + stack = (unsigned long)memblock_alloc_or_panic(THREAD_SIZE, THREAD_SIZE); return stack; } @@ -421,7 +414,6 @@ static void __init setup_lowcore(void) lc->clock_comparator = clock_comparator_max; lc->current_task = (unsigned long)&init_task; lc->lpp = LPP_MAGIC; - lc->machine_flags = get_lowcore()->machine_flags; lc->preempt_count = get_lowcore()->preempt_count; nmi_alloc_mcesa_early(&lc->mcesad); lc->sys_enter_timer = get_lowcore()->sys_enter_timer; @@ -512,10 +504,7 @@ static void __init setup_resources(void) bss_resource.end = __pa_symbol(__bss_stop) - 1; for_each_mem_range(i, &start, &end) { - res = memblock_alloc(sizeof(*res), 8); - if (!res) - panic("%s: Failed to allocate %zu bytes align=0x%x\n", - __func__, sizeof(*res), 8); + res = memblock_alloc_or_panic(sizeof(*res), 8); res->flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM; res->name = "System RAM"; @@ -534,10 +523,7 @@ static void __init setup_resources(void) std_res->start > res->end) continue; if (std_res->end > res->end) { - sub_res = memblock_alloc(sizeof(*sub_res), 8); - if (!sub_res) - panic("%s: Failed to allocate %zu bytes align=0x%x\n", - __func__, sizeof(*sub_res), 8); + sub_res = memblock_alloc_or_panic(sizeof(*sub_res), 8); *sub_res = *std_res; sub_res->end = res->end; std_res->start = res->end + 1; @@ -664,7 +650,7 @@ static void __init reserve_crashkernel(void) return; } - if (!oldmem_data.start && MACHINE_IS_VM) + if (!oldmem_data.start && machine_is_vm()) diag10_range(PFN_DOWN(crash_base), PFN_DOWN(crash_size)); crashk_res.start = crash_base; crashk_res.end = crash_base + crash_size - 1; @@ -704,7 +690,7 @@ static void __init reserve_physmem_info(void) { unsigned long addr, size; - if (get_physmem_reserved(RR_MEM_DETECT_EXTENDED, &addr, &size)) + if (get_physmem_reserved(RR_MEM_DETECT_EXT, &addr, &size)) memblock_reserve(addr, size); } @@ -712,7 +698,7 @@ static void __init free_physmem_info(void) { unsigned long addr, size; - if (get_physmem_reserved(RR_MEM_DETECT_EXTENDED, &addr, &size)) + if (get_physmem_reserved(RR_MEM_DETECT_EXT, &addr, &size)) memblock_phys_free(addr, size); } @@ -742,7 +728,7 @@ static void __init reserve_lowcore(void) void *lowcore_end = lowcore_start + sizeof(struct lowcore); void *start, *end; - if ((void *)__identity_base < lowcore_end) { + if (absolute_pointer(__identity_base) < lowcore_end) { start = max(lowcore_start, (void *)__identity_base); end = min(lowcore_end, (void *)(__identity_base + ident_map_size)); memblock_reserve(__pa(start), __pa(end)); @@ -824,9 +810,7 @@ static void __init setup_randomness(void) { struct sysinfo_3_2_2 *vmms; - vmms = memblock_alloc(PAGE_SIZE, PAGE_SIZE); - if (!vmms) - panic("Failed to allocate memory for sysinfo structure\n"); + vmms = memblock_alloc_or_panic(PAGE_SIZE, PAGE_SIZE); if (stsi(vmms, 3, 2, 2) == 0 && vmms->count) add_device_randomness(&vmms->vm, sizeof(vmms->vm[0]) * vmms->count); memblock_free(vmms, PAGE_SIZE); @@ -886,6 +870,23 @@ static void __init log_component_list(void) } /* + * Print avoiding interpretation of % in buf and taking bootdebug option + * into consideration. + */ +static void __init print_rb_entry(const char *buf) +{ + char fmt[] = KERN_SOH "0boot: %s"; + int level = printk_get_level(buf); + + buf = skip_timestamp(printk_skip_level(buf)); + if (level == KERN_DEBUG[1] && (!bootdebug || !bootdebug_filter_match(buf))) + return; + + fmt[1] = level; + printk(fmt, buf); +} + +/* * Setup function called from init/main.c just after the banner * was printed. */ @@ -895,17 +896,20 @@ void __init setup_arch(char **cmdline_p) /* * print what head.S has found out about the machine */ - if (MACHINE_IS_VM) + if (machine_is_vm()) pr_info("Linux is running as a z/VM " "guest operating system in 64-bit mode\n"); - else if (MACHINE_IS_KVM) + else if (machine_is_kvm()) pr_info("Linux is running under KVM in 64-bit mode\n"); - else if (MACHINE_IS_LPAR) + else if (machine_is_lpar()) pr_info("Linux is running natively in 64-bit mode\n"); else pr_info("Linux is running as a guest in 64-bit mode\n"); + /* Print decompressor messages if not already printed */ + if (!boot_earlyprintk) + boot_rb_foreach(print_rb_entry); - if (have_relocated_lowcore()) + if (machine_has_relocated_lowcore()) pr_info("Lowcore relocated to 0x%px\n", get_lowcore()); log_component_list(); @@ -955,7 +959,7 @@ void __init setup_arch(char **cmdline_p) setup_uv(); dma_contiguous_reserve(ident_map_size); vmcp_cma_reserve(); - if (MACHINE_HAS_EDAT2) + if (cpu_has_edat2()) hugetlb_cma_reserve(PUD_SHIFT - PAGE_SHIFT); reserve_crashkernel(); @@ -975,10 +979,7 @@ void __init setup_arch(char **cmdline_p) numa_setup(); smp_detect_cpus(); topology_init_early(); - - if (test_facility(193)) - static_branch_enable(&cpu_has_bear); - + setup_protection_map(); /* * Create kernel page tables. */ @@ -1006,3 +1007,8 @@ void __init setup_arch(char **cmdline_p) /* Add system specific data to the random pool */ setup_randomness(); } + +void __init arch_cpu_finalize_init(void) +{ + sclp_init(); +} diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index 822d8e6f8717..81f12bb77f62 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -18,6 +18,7 @@ #define KMSG_COMPONENT "cpu" #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt +#include <linux/cpufeature.h> #include <linux/workqueue.h> #include <linux/memblock.h> #include <linux/export.h> @@ -38,6 +39,7 @@ #include <linux/kprobes.h> #include <asm/access-regs.h> #include <asm/asm-offsets.h> +#include <asm/machine.h> #include <asm/ctlreg.h> #include <asm/pfault.h> #include <asm/diag.h> @@ -97,13 +99,6 @@ __vector128 __initdata boot_cpu_vector_save_area[__NUM_VXRS]; static unsigned int smp_max_threads __initdata = -1U; cpumask_t cpu_setup_mask; -static int __init early_nosmt(char *s) -{ - smp_max_threads = 1; - return 0; -} -early_param("nosmt", early_nosmt); - static int __init early_smt(char *s) { get_option(&s, &smp_max_threads); @@ -263,13 +258,12 @@ static void pcpu_prepare_secondary(struct pcpu *pcpu, int cpu) lc->percpu_offset = __per_cpu_offset[cpu]; lc->kernel_asce = get_lowcore()->kernel_asce; lc->user_asce = s390_invalid_asce; - lc->machine_flags = get_lowcore()->machine_flags; lc->user_timer = lc->system_timer = lc->steal_timer = lc->avg_steal_timer = 0; abs_lc = get_abs_lowcore(); memcpy(lc->cregs_save_area, abs_lc->cregs_save_area, sizeof(lc->cregs_save_area)); put_abs_lowcore(abs_lc); - lc->cregs_save_area[1] = lc->kernel_asce; + lc->cregs_save_area[1] = lc->user_asce; lc->cregs_save_area[7] = lc->user_asce; save_access_regs((unsigned int *) lc->access_regs_save_area); arch_spin_lock_setup(cpu); @@ -416,7 +410,7 @@ EXPORT_SYMBOL(arch_vcpu_is_preempted); void notrace smp_yield_cpu(int cpu) { - if (!MACHINE_HAS_DIAG9C) + if (!machine_has_diag9c()) return; diag_stat_inc_norecursion(DIAG_STAT_X09C); asm volatile("diag %0,0,0x9c" @@ -561,10 +555,10 @@ int smp_store_status(int cpu) if (__pcpu_sigp_relax(pcpu->address, SIGP_STORE_STATUS_AT_ADDRESS, pa) != SIGP_CC_ORDER_CODE_ACCEPTED) return -EIO; - if (!cpu_has_vx() && !MACHINE_HAS_GS) + if (!cpu_has_vx() && !cpu_has_gs()) return 0; pa = lc->mcesad & MCESA_ORIGIN_MASK; - if (MACHINE_HAS_GS) + if (cpu_has_gs()) pa |= lc->mcesad & MCESA_LC_MASK; if (__pcpu_sigp_relax(pcpu->address, SIGP_STORE_ADDITIONAL_STATUS, pa) != SIGP_CC_ORDER_CODE_ACCEPTED) @@ -611,9 +605,7 @@ void __init smp_save_dump_ipl_cpu(void) if (!dump_available()) return; sa = save_area_alloc(true); - regs = memblock_alloc(512, 8); - if (!sa || !regs) - panic("could not allocate memory for boot CPU save area\n"); + regs = memblock_alloc_or_panic(512, 8); copy_oldmem_kernel(regs, __LC_FPREGS_SAVE_AREA, 512); save_area_add_regs(sa, regs); memblock_free(regs, 512); @@ -646,8 +638,6 @@ void __init smp_save_dump_secondary_cpus(void) SIGP_CC_NOT_OPERATIONAL) continue; sa = save_area_alloc(false); - if (!sa) - panic("could not allocate memory for save area\n"); __pcpu_sigp_relax(addr, SIGP_STORE_STATUS_AT_ADDRESS, __pa(page)); save_area_add_regs(sa, page); if (cpu_has_vx()) { @@ -792,10 +782,7 @@ void __init smp_detect_cpus(void) u16 address; /* Get CPU information */ - info = memblock_alloc(sizeof(*info), 8); - if (!info) - panic("%s: Failed to allocate %zu bytes align=0x%x\n", - __func__, sizeof(*info), 8); + info = memblock_alloc_or_panic(sizeof(*info), 8); smp_get_core_info(info, 1); /* Find boot CPU type */ if (sclp.has_core_type) { @@ -814,6 +801,7 @@ void __init smp_detect_cpus(void) mtid = boot_core_type ? sclp.mtid : sclp.mtid_cp; mtid = (mtid < smp_max_threads) ? mtid : smp_max_threads - 1; pcpu_set_smt(mtid); + cpu_smt_set_num_threads(smp_cpu_mtid + 1, smp_cpu_mtid + 1); /* Print number of CPUs */ c_cpus = s_cpus = 0; diff --git a/arch/s390/kernel/stacktrace.c b/arch/s390/kernel/stacktrace.c index 40edfde25f5b..b153a395f46d 100644 --- a/arch/s390/kernel/stacktrace.c +++ b/arch/s390/kernel/stacktrace.c @@ -9,6 +9,7 @@ #include <linux/stacktrace.h> #include <linux/uaccess.h> #include <linux/compat.h> +#include <asm/asm-offsets.h> #include <asm/stacktrace.h> #include <asm/unwind.h> #include <asm/kprobes.h> diff --git a/arch/s390/kernel/syscall.c b/arch/s390/kernel/syscall.c index 5ec28028315b..4fee74553ca2 100644 --- a/arch/s390/kernel/syscall.c +++ b/arch/s390/kernel/syscall.c @@ -12,6 +12,7 @@ * platform. */ +#include <linux/cpufeature.h> #include <linux/errno.h> #include <linux/sched.h> #include <linux/mm.h> @@ -81,25 +82,35 @@ SYSCALL_DEFINE0(ni_syscall) return -ENOSYS; } -static void do_syscall(struct pt_regs *regs) +void noinstr __do_syscall(struct pt_regs *regs, int per_trap) { unsigned long nr; + add_random_kstack_offset(); + enter_from_user_mode(regs); + regs->psw = get_lowcore()->svc_old_psw; + regs->int_code = get_lowcore()->svc_int_code; + update_timer_sys(); + if (cpu_has_bear()) + current->thread.last_break = regs->last_break; + local_irq_enable(); + regs->orig_gpr2 = regs->gprs[2]; + if (unlikely(per_trap)) + set_thread_flag(TIF_PER_TRAP); + regs->flags = 0; + set_pt_regs_flag(regs, PIF_SYSCALL); nr = regs->int_code & 0xffff; - if (!nr) { + if (likely(!nr)) { nr = regs->gprs[1] & 0xffff; regs->int_code &= ~0xffffUL; regs->int_code |= nr; } - regs->gprs[2] = nr; - if (nr == __NR_restart_syscall && !(current->restart_block.arch_data & 1)) { regs->psw.addr = current->restart_block.arch_data; current->restart_block.arch_data = 1; } nr = syscall_enter_from_user_mode_work(regs, nr); - /* * In the s390 ptrace ABI, both the syscall number and the return value * use gpr2. However, userspace puts the syscall number either in the @@ -107,37 +118,11 @@ static void do_syscall(struct pt_regs *regs) * work, the ptrace code sets PIF_SYSCALL_RET_SET, which is checked here * and if set, the syscall will be skipped. */ - if (unlikely(test_and_clear_pt_regs_flag(regs, PIF_SYSCALL_RET_SET))) goto out; regs->gprs[2] = -ENOSYS; - if (likely(nr >= NR_syscalls)) - goto out; - do { + if (likely(nr < NR_syscalls)) regs->gprs[2] = current->thread.sys_call_table[nr](regs); - } while (test_and_clear_pt_regs_flag(regs, PIF_EXECVE_PGSTE_RESTART)); out: - syscall_exit_to_user_mode_work(regs); -} - -void noinstr __do_syscall(struct pt_regs *regs, int per_trap) -{ - add_random_kstack_offset(); - enter_from_user_mode(regs); - regs->psw = get_lowcore()->svc_old_psw; - regs->int_code = get_lowcore()->svc_int_code; - update_timer_sys(); - if (static_branch_likely(&cpu_has_bear)) - current->thread.last_break = regs->last_break; - - local_irq_enable(); - regs->orig_gpr2 = regs->gprs[2]; - - if (per_trap) - set_thread_flag(TIF_PER_TRAP); - - regs->flags = 0; - set_pt_regs_flag(regs, PIF_SYSCALL); - do_syscall(regs); - exit_to_user_mode(); + syscall_exit_to_user_mode(regs); } diff --git a/arch/s390/kernel/syscalls/syscall.tbl b/arch/s390/kernel/syscalls/syscall.tbl index e9115b4d8b63..a4569b96ef06 100644 --- a/arch/s390/kernel/syscalls/syscall.tbl +++ b/arch/s390/kernel/syscalls/syscall.tbl @@ -469,3 +469,4 @@ 464 common getxattrat sys_getxattrat sys_getxattrat 465 common listxattrat sys_listxattrat sys_listxattrat 466 common removexattrat sys_removexattrat sys_removexattrat +467 common open_tree_attr sys_open_tree_attr sys_open_tree_attr diff --git a/arch/s390/kernel/sysinfo.c b/arch/s390/kernel/sysinfo.c index 88055f58fbda..1ea84e942bd4 100644 --- a/arch/s390/kernel/sysinfo.c +++ b/arch/s390/kernel/sysinfo.c @@ -5,6 +5,7 @@ * Martin Schwidefsky <schwidefsky@de.ibm.com>, */ +#include <linux/cpufeature.h> #include <linux/debugfs.h> #include <linux/kernel.h> #include <linux/mm.h> @@ -15,54 +16,17 @@ #include <linux/export.h> #include <linux/slab.h> #include <asm/asm-extable.h> +#include <asm/machine.h> #include <asm/ebcdic.h> #include <asm/debug.h> #include <asm/sysinfo.h> #include <asm/cpcmd.h> #include <asm/topology.h> #include <asm/fpu.h> +#include <asm/asm.h> int topology_max_mnest; -static inline int __stsi(void *sysinfo, int fc, int sel1, int sel2, int *lvl) -{ - int r0 = (fc << 28) | sel1; - int rc = 0; - - asm volatile( - " lr 0,%[r0]\n" - " lr 1,%[r1]\n" - " stsi 0(%[sysinfo])\n" - "0: jz 2f\n" - "1: lhi %[rc],%[retval]\n" - "2: lr %[r0],0\n" - EX_TABLE(0b, 1b) - : [r0] "+d" (r0), [rc] "+d" (rc) - : [r1] "d" (sel2), - [sysinfo] "a" (sysinfo), - [retval] "K" (-EOPNOTSUPP) - : "cc", "0", "1", "memory"); - *lvl = ((unsigned int) r0) >> 28; - return rc; -} - -/* - * stsi - store system information - * - * Returns the current configuration level if function code 0 was specified. - * Otherwise returns 0 on success or a negative value on error. - */ -int stsi(void *sysinfo, int fc, int sel1, int sel2) -{ - int lvl, rc; - - rc = __stsi(sysinfo, fc, sel1, sel2, &lvl); - if (rc) - return rc; - return fc ? 0 : lvl; -} -EXPORT_SYMBOL(stsi); - #ifdef CONFIG_PROC_FS static bool convert_ext_name(unsigned char encoding, char *name, size_t len) @@ -154,7 +118,7 @@ static void stsi_15_1_x(struct seq_file *m, struct sysinfo_15_1_x *info) int i; seq_putc(m, '\n'); - if (!MACHINE_HAS_TOPOLOGY) + if (!cpu_has_topology()) return; if (stsi(info, 15, 1, topology_max_mnest)) return; @@ -415,7 +379,7 @@ static struct service_level service_level_vm = { static __init int create_proc_service_level(void) { proc_create_seq("service_levels", 0, NULL, &service_level_seq_ops); - if (MACHINE_IS_VM) + if (machine_is_vm()) register_service_level(&service_level_vm); return 0; } @@ -559,7 +523,7 @@ static __init int stsi_init_debugfs(void) sf = &stsi_file[i]; debugfs_create_file(sf->name, 0400, stsi_root, NULL, sf->fops); } - if (IS_ENABLED(CONFIG_SCHED_TOPOLOGY) && MACHINE_HAS_TOPOLOGY) { + if (IS_ENABLED(CONFIG_SCHED_TOPOLOGY) && cpu_has_topology()) { char link_to[10]; sprintf(link_to, "15_1_%d", topology_mnest_limit()); diff --git a/arch/s390/kernel/text_amode31.S b/arch/s390/kernel/text_amode31.S index c0a70efa2426..26f2981aa09e 100644 --- a/arch/s390/kernel/text_amode31.S +++ b/arch/s390/kernel/text_amode31.S @@ -18,8 +18,7 @@ * affects a few functions that are not performance-relevant. */ .macro BR_EX_AMODE31_r14 - larl %r1,0f - ex 0,0(%r1) + exrl 0,0f j . 0: br %r14 .endm diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c index 34a65c141ea0..fed17d407a44 100644 --- a/arch/s390/kernel/time.c +++ b/arch/s390/kernel/time.c @@ -54,10 +54,10 @@ #include <asm/cio.h> #include "entry.h" -union tod_clock tod_clock_base __section(".data"); +union tod_clock __bootdata_preserved(tod_clock_base); EXPORT_SYMBOL_GPL(tod_clock_base); -u64 clock_comparator_max = -1ULL; +u64 __bootdata_preserved(clock_comparator_max); EXPORT_SYMBOL_GPL(clock_comparator_max); static DEFINE_PER_CPU(struct clock_event_device, comparators); @@ -79,12 +79,10 @@ void __init time_early_init(void) { struct ptff_qto qto; struct ptff_qui qui; - int cs; /* Initialize TOD steering parameters */ tod_steering_end = tod_clock_base.tod; - for (cs = 0; cs < CS_BASES; cs++) - vdso_data[cs].arch_data.tod_steering_end = tod_steering_end; + vdso_k_time_data->arch_data.tod_steering_end = tod_steering_end; if (!test_facility(28)) return; @@ -373,7 +371,6 @@ static void clock_sync_global(long delta) { unsigned long now, adj; struct ptff_qto qto; - int cs; /* Fixup the monotonic sched clock. */ tod_clock_base.eitod += delta; @@ -389,10 +386,8 @@ static void clock_sync_global(long delta) panic("TOD clock sync offset %li is too large to drift\n", tod_steering_delta); tod_steering_end = now + (abs(tod_steering_delta) << 15); - for (cs = 0; cs < CS_BASES; cs++) { - vdso_data[cs].arch_data.tod_steering_end = tod_steering_end; - vdso_data[cs].arch_data.tod_steering_delta = tod_steering_delta; - } + vdso_k_time_data->arch_data.tod_steering_end = tod_steering_end; + vdso_k_time_data->arch_data.tod_steering_delta = tod_steering_delta; /* Update LPAR offset. */ if (ptff_query(PTFF_QTO) && ptff(&qto, sizeof(qto), PTFF_QTO) == 0) @@ -662,12 +657,12 @@ static void stp_check_leap(void) if (ret < 0) pr_err("failed to set leap second flags\n"); /* arm Timer to clear leap second flags */ - mod_timer(&stp_timer, jiffies + msecs_to_jiffies(14400 * MSEC_PER_SEC)); + mod_timer(&stp_timer, jiffies + secs_to_jiffies(14400)); } else { /* The day the leap second is scheduled for hasn't been reached. Retry * in one hour. */ - mod_timer(&stp_timer, jiffies + msecs_to_jiffies(3600 * MSEC_PER_SEC)); + mod_timer(&stp_timer, jiffies + secs_to_jiffies(3600)); } } @@ -685,7 +680,7 @@ static void stp_work_fn(struct work_struct *work) if (!stp_online) { chsc_sstpc(stp_page, STP_OP_CTRL, 0x0000, NULL); - del_timer_sync(&stp_timer); + timer_delete_sync(&stp_timer); goto out_unlock; } diff --git a/arch/s390/kernel/topology.c b/arch/s390/kernel/topology.c index 4f9c301a705b..3df048e190b1 100644 --- a/arch/s390/kernel/topology.c +++ b/arch/s390/kernel/topology.c @@ -6,6 +6,7 @@ #define KMSG_COMPONENT "cpu" #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt +#include <linux/cpufeature.h> #include <linux/workqueue.h> #include <linux/memblock.h> #include <linux/uaccess.h> @@ -240,7 +241,7 @@ int topology_set_cpu_management(int fc) { int cpu, rc; - if (!MACHINE_HAS_TOPOLOGY) + if (!cpu_has_topology()) return -EOPNOTSUPP; if (fc) rc = ptf(PTF_VERTICAL); @@ -315,13 +316,13 @@ static int __arch_update_cpu_topology(void) hd_status = 0; rc = 0; mutex_lock(&smp_cpu_state_mutex); - if (MACHINE_HAS_TOPOLOGY) { + if (cpu_has_topology()) { rc = 1; store_topology(info); tl_to_masks(info); } update_cpu_masks(); - if (!MACHINE_HAS_TOPOLOGY) + if (!cpu_has_topology()) topology_update_polarization_simple(); if (cpu_management == 1) hd_status = hd_enable_hiperdispatch(); @@ -371,12 +372,12 @@ static void set_topology_timer(void) if (atomic_add_unless(&topology_poll, -1, 0)) mod_timer(&topology_timer, jiffies + msecs_to_jiffies(100)); else - mod_timer(&topology_timer, jiffies + msecs_to_jiffies(60 * MSEC_PER_SEC)); + mod_timer(&topology_timer, jiffies + secs_to_jiffies(60)); } void topology_expect_change(void) { - if (!MACHINE_HAS_TOPOLOGY) + if (!cpu_has_topology()) return; /* This is racy, but it doesn't matter since it is just a heuristic. * Worst case is that we poll in a higher frequency for a bit longer. @@ -500,7 +501,7 @@ int topology_cpu_init(struct cpu *cpu) int rc; rc = sysfs_create_group(&cpu->dev.kobj, &topology_cpu_attr_group); - if (rc || !MACHINE_HAS_TOPOLOGY) + if (rc || !cpu_has_topology()) return rc; rc = sysfs_create_group(&cpu->dev.kobj, &topology_extra_cpu_attr_group); if (rc) @@ -548,33 +549,38 @@ static void __init alloc_masks(struct sysinfo_15_1_x *info, nr_masks *= info->mag[TOPOLOGY_NR_MAG - offset - 1 - i]; nr_masks = max(nr_masks, 1); for (i = 0; i < nr_masks; i++) { - mask->next = memblock_alloc(sizeof(*mask->next), 8); - if (!mask->next) - panic("%s: Failed to allocate %zu bytes align=0x%x\n", - __func__, sizeof(*mask->next), 8); + mask->next = memblock_alloc_or_panic(sizeof(*mask->next), 8); mask = mask->next; } } +static int __init detect_polarization(union topology_entry *tle) +{ + struct topology_core *tl_core; + + while (tle->nl) + tle = next_tle(tle); + tl_core = (struct topology_core *)tle; + return tl_core->pp != POLARIZATION_HRZ; +} + void __init topology_init_early(void) { struct sysinfo_15_1_x *info; set_sched_topology(s390_topology); if (topology_mode == TOPOLOGY_MODE_UNINITIALIZED) { - if (MACHINE_HAS_TOPOLOGY) + if (cpu_has_topology()) topology_mode = TOPOLOGY_MODE_HW; else topology_mode = TOPOLOGY_MODE_SINGLE; } - if (!MACHINE_HAS_TOPOLOGY) + if (!cpu_has_topology()) goto out; - tl_info = memblock_alloc(PAGE_SIZE, PAGE_SIZE); - if (!tl_info) - panic("%s: Failed to allocate %lu bytes align=0x%lx\n", - __func__, PAGE_SIZE, PAGE_SIZE); + tl_info = memblock_alloc_or_panic(PAGE_SIZE, PAGE_SIZE); info = tl_info; store_topology(info); + cpu_management = detect_polarization(info->tle); pr_info("The CPU configuration topology of the machine is: %d %d %d %d %d %d / %d\n", info->mag[0], info->mag[1], info->mag[2], info->mag[3], info->mag[4], info->mag[5], info->mnest); @@ -591,7 +597,7 @@ static inline int topology_get_mode(int enabled) { if (!enabled) return TOPOLOGY_MODE_SINGLE; - return MACHINE_HAS_TOPOLOGY ? TOPOLOGY_MODE_HW : TOPOLOGY_MODE_PACKAGE; + return cpu_has_topology() ? TOPOLOGY_MODE_HW : TOPOLOGY_MODE_PACKAGE; } static inline int topology_is_enabled(void) @@ -662,7 +668,7 @@ static int polarization_ctl_handler(const struct ctl_table *ctl, int write, return set_polarization(polarization); } -static struct ctl_table topology_ctl_table[] = { +static const struct ctl_table topology_ctl_table[] = { { .procname = "topology", .mode = 0644, @@ -681,7 +687,7 @@ static int __init topology_init(void) int rc = 0; timer_setup(&topology_timer, topology_timer_fn, TIMER_DEFERRABLE); - if (MACHINE_HAS_TOPOLOGY) + if (cpu_has_topology()) set_topology_timer(); else topology_update_polarization_simple(); diff --git a/arch/s390/kernel/traps.c b/arch/s390/kernel/traps.c index 24fee11b030d..19687dab32f7 100644 --- a/arch/s390/kernel/traps.c +++ b/arch/s390/kernel/traps.c @@ -3,18 +3,13 @@ * S390 version * Copyright IBM Corp. 1999, 2000 * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com), - * Denis Joseph Barrow (djbarrow@de.ibm.com,barrow_dj@yahoo.com), + * Denis Joseph Barrow (djbarrow@de.ibm.com,barrow_dj@yahoo.com), * * Derived from "arch/i386/kernel/traps.c" * Copyright (C) 1991, 1992 Linus Torvalds */ -/* - * 'Traps.c' handles hardware traps and faults after we have saved some - * state in 'asm.s'. - */ -#include "asm/irqflags.h" -#include "asm/ptrace.h" +#include <linux/cpufeature.h> #include <linux/kprobes.h> #include <linux/kdebug.h> #include <linux/randomize_kstack.h> @@ -29,6 +24,8 @@ #include <linux/entry-common.h> #include <linux/kmsan.h> #include <asm/asm-extable.h> +#include <asm/irqflags.h> +#include <asm/ptrace.h> #include <asm/vtime.h> #include <asm/fpu.h> #include <asm/fault.h> @@ -42,7 +39,7 @@ static inline void __user *get_trap_ip(struct pt_regs *regs) address = current->thread.trap_tdb.data[3]; else address = regs->psw.addr; - return (void __user *) (address - (regs->int_code >> 16)); + return (void __user *)(address - (regs->int_code >> 16)); } #ifdef CONFIG_GENERIC_BUG @@ -57,16 +54,15 @@ void do_report_trap(struct pt_regs *regs, int si_signo, int si_code, char *str) if (user_mode(regs)) { force_sig_fault(si_signo, si_code, get_trap_ip(regs)); report_user_fault(regs, si_signo, 0); - } else { + } else { if (!fixup_exception(regs)) die(regs, str); - } + } } static void do_trap(struct pt_regs *regs, int si_signo, int si_code, char *str) { - if (notify_die(DIE_TRAP, str, regs, 0, - regs->int_code, si_signo) == NOTIFY_STOP) + if (notify_die(DIE_TRAP, str, regs, 0, regs->int_code, si_signo) == NOTIFY_STOP) return; do_report_trap(regs, si_signo, si_code, str); } @@ -78,8 +74,7 @@ void do_per_trap(struct pt_regs *regs) return; if (!current->ptrace) return; - force_sig_fault(SIGTRAP, TRAP_HWBKPT, - (void __force __user *) current->thread.per_event.address); + force_sig_fault(SIGTRAP, TRAP_HWBKPT, (void __force __user *)current->thread.per_event.address); } NOKPROBE_SYMBOL(do_per_trap); @@ -98,36 +93,25 @@ static void name(struct pt_regs *regs) \ do_trap(regs, signr, sicode, str); \ } -DO_ERROR_INFO(addressing_exception, SIGILL, ILL_ILLADR, - "addressing exception") -DO_ERROR_INFO(execute_exception, SIGILL, ILL_ILLOPN, - "execute exception") -DO_ERROR_INFO(divide_exception, SIGFPE, FPE_INTDIV, - "fixpoint divide exception") -DO_ERROR_INFO(overflow_exception, SIGFPE, FPE_INTOVF, - "fixpoint overflow exception") -DO_ERROR_INFO(hfp_overflow_exception, SIGFPE, FPE_FLTOVF, - "HFP overflow exception") -DO_ERROR_INFO(hfp_underflow_exception, SIGFPE, FPE_FLTUND, - "HFP underflow exception") -DO_ERROR_INFO(hfp_significance_exception, SIGFPE, FPE_FLTRES, - "HFP significance exception") -DO_ERROR_INFO(hfp_divide_exception, SIGFPE, FPE_FLTDIV, - "HFP divide exception") -DO_ERROR_INFO(hfp_sqrt_exception, SIGFPE, FPE_FLTINV, - "HFP square root exception") -DO_ERROR_INFO(operand_exception, SIGILL, ILL_ILLOPN, - "operand exception") -DO_ERROR_INFO(privileged_op, SIGILL, ILL_PRVOPC, - "privileged operation") -DO_ERROR_INFO(special_op_exception, SIGILL, ILL_ILLOPN, - "special operation exception") -DO_ERROR_INFO(transaction_exception, SIGILL, ILL_ILLOPN, - "transaction constraint exception") +DO_ERROR_INFO(addressing_exception, SIGILL, ILL_ILLADR, "addressing exception") +DO_ERROR_INFO(divide_exception, SIGFPE, FPE_INTDIV, "fixpoint divide exception") +DO_ERROR_INFO(execute_exception, SIGILL, ILL_ILLOPN, "execute exception") +DO_ERROR_INFO(hfp_divide_exception, SIGFPE, FPE_FLTDIV, "HFP divide exception") +DO_ERROR_INFO(hfp_overflow_exception, SIGFPE, FPE_FLTOVF, "HFP overflow exception") +DO_ERROR_INFO(hfp_significance_exception, SIGFPE, FPE_FLTRES, "HFP significance exception") +DO_ERROR_INFO(hfp_sqrt_exception, SIGFPE, FPE_FLTINV, "HFP square root exception") +DO_ERROR_INFO(hfp_underflow_exception, SIGFPE, FPE_FLTUND, "HFP underflow exception") +DO_ERROR_INFO(operand_exception, SIGILL, ILL_ILLOPN, "operand exception") +DO_ERROR_INFO(overflow_exception, SIGFPE, FPE_INTOVF, "fixpoint overflow exception") +DO_ERROR_INFO(privileged_op, SIGILL, ILL_PRVOPC, "privileged operation") +DO_ERROR_INFO(special_op_exception, SIGILL, ILL_ILLOPN, "special operation exception") +DO_ERROR_INFO(specification_exception, SIGILL, ILL_ILLOPN, "specification exception"); +DO_ERROR_INFO(transaction_exception, SIGILL, ILL_ILLOPN, "transaction constraint exception") static inline void do_fp_trap(struct pt_regs *regs, __u32 fpc) { int si_code = 0; + /* FPC[2] is Data Exception Code */ if ((fpc & 0x00000300) == 0) { /* bits 6 and 7 of DXC are 0 iff IEEE exception */ @@ -153,36 +137,35 @@ static void translation_specification_exception(struct pt_regs *regs) static void illegal_op(struct pt_regs *regs) { - __u8 opcode[6]; - __u16 __user *location; int is_uprobe_insn = 0; + u16 __user *location; int signal = 0; + u16 opcode; location = get_trap_ip(regs); - if (user_mode(regs)) { - if (get_user(*((__u16 *) opcode), (__u16 __user *) location)) + if (get_user(opcode, location)) return; - if (*((__u16 *) opcode) == S390_BREAKPOINT_U16) { + if (opcode == S390_BREAKPOINT_U16) { if (current->ptrace) force_sig_fault(SIGTRAP, TRAP_BRKPT, location); else signal = SIGILL; #ifdef CONFIG_UPROBES - } else if (*((__u16 *) opcode) == UPROBE_SWBP_INSN) { + } else if (opcode == UPROBE_SWBP_INSN) { is_uprobe_insn = 1; #endif - } else + } else { signal = SIGILL; + } } /* - * We got either an illegal op in kernel mode, or user space trapped + * This is either an illegal op in kernel mode, or user space trapped * on a uprobes illegal instruction. See if kprobes or uprobes picks * it up. If not, SIGILL. */ if (is_uprobe_insn || !user_mode(regs)) { - if (notify_die(DIE_BPT, "bpt", regs, 0, - 3, SIGTRAP) != NOTIFY_STOP) + if (notify_die(DIE_BPT, "bpt", regs, 0, 3, SIGTRAP) != NOTIFY_STOP) signal = SIGILL; } if (signal) @@ -190,18 +173,10 @@ static void illegal_op(struct pt_regs *regs) } NOKPROBE_SYMBOL(illegal_op); -DO_ERROR_INFO(specification_exception, SIGILL, ILL_ILLOPN, - "specification exception"); - static void vector_exception(struct pt_regs *regs) { int si_code, vic; - if (!cpu_has_vx()) { - do_trap(regs, SIGILL, ILL_ILLOPN, "illegal operation"); - return; - } - /* get vector interrupt code from fpc */ save_user_fpu_regs(); vic = (current->thread.ufpu.fpc & 0xf00) >> 8; @@ -249,7 +224,6 @@ static void monitor_event_exception(struct pt_regs *regs) { if (user_mode(regs)) return; - switch (report_bug(regs->psw.addr - (regs->int_code >> 16), regs)) { case BUG_TRAP_TYPE_NONE: fixup_exception(regs); @@ -262,7 +236,7 @@ static void monitor_event_exception(struct pt_regs *regs) } } -void kernel_stack_overflow(struct pt_regs *regs) +void kernel_stack_invalid(struct pt_regs *regs) { /* * Normally regs are unpoisoned by the generic entry code, but @@ -270,12 +244,12 @@ void kernel_stack_overflow(struct pt_regs *regs) */ kmsan_unpoison_entry_regs(regs); bust_spinlocks(1); - printk("Kernel stack overflow.\n"); + pr_emerg("Kernel stack pointer invalid\n"); show_regs(regs); bust_spinlocks(0); - panic("Corrupt kernel stack, can't continue."); + panic("Invalid kernel stack pointer, cannot continue"); } -NOKPROBE_SYMBOL(kernel_stack_overflow); +NOKPROBE_SYMBOL(kernel_stack_invalid); static void __init test_monitor_call(void) { @@ -283,12 +257,12 @@ static void __init test_monitor_call(void) if (!IS_ENABLED(CONFIG_BUG)) return; - asm volatile( + asm_inline volatile( " mc 0,0\n" - "0: xgr %0,%0\n" + "0: lhi %[val],0\n" "1:\n" - EX_TABLE(0b,1b) - : "+d" (val)); + EX_TABLE(0b, 1b) + : [val] "+d" (val)); if (!val) panic("Monitor call doesn't work!\n"); } @@ -323,7 +297,6 @@ void noinstr __do_pgm_check(struct pt_regs *regs) teid.val = lc->trans_exc_code; regs->int_code = lc->pgm_int_code; regs->int_parm_long = teid.val; - /* * In case of a guest fault, short-circuit the fault handler and return. * This way the sie64a() function will return 0; fault address and @@ -336,23 +309,19 @@ void noinstr __do_pgm_check(struct pt_regs *regs) current->thread.gmap_int_code = regs->int_code & 0xffff; return; } - state = irqentry_enter(regs); - if (user_mode(regs)) { update_timer_sys(); - if (!static_branch_likely(&cpu_has_bear)) { + if (!cpu_has_bear()) { if (regs->last_break < 4096) regs->last_break = 1; } current->thread.last_break = regs->last_break; } - if (lc->pgm_code & 0x0200) { /* transaction abort */ current->thread.trap_tdb = lc->pgm_tdb; } - if (lc->pgm_code & PGM_INT_CODE_PER) { if (user_mode(regs)) { struct per_event *ev = ¤t->thread.per_event; @@ -368,11 +337,9 @@ void noinstr __do_pgm_check(struct pt_regs *regs) goto out; } } - if (!irqs_disabled_flags(regs->psw.mask)) trace_hardirqs_on(); __arch_local_irq_ssm(regs->psw.mask & ~PSW_MASK_PER); - trapnr = regs->int_code & PGM_INT_CODE_MASK; if (trapnr) pgm_check_table[trapnr](regs); diff --git a/arch/s390/kernel/uv.c b/arch/s390/kernel/uv.c index 6f9654a191ad..b99478e84da4 100644 --- a/arch/s390/kernel/uv.c +++ b/arch/s390/kernel/uv.c @@ -15,23 +15,11 @@ #include <linux/pagemap.h> #include <linux/swap.h> #include <linux/pagewalk.h> +#include <linux/backing-dev.h> #include <asm/facility.h> #include <asm/sections.h> #include <asm/uv.h> -#if !IS_ENABLED(CONFIG_KVM) -unsigned long __gmap_translate(struct gmap *gmap, unsigned long gaddr) -{ - return 0; -} - -int gmap_fault(struct gmap *gmap, unsigned long gaddr, - unsigned int fault_flags) -{ - return 0; -} -#endif - /* the bootdata_preserved fields come from ones in arch/s390/boot/uv.c */ int __bootdata_preserved(prot_virt_guest); EXPORT_SYMBOL(prot_virt_guest); @@ -148,7 +136,7 @@ int uv_destroy_folio(struct folio *folio) { int rc; - /* See gmap_make_secure(): large folios cannot be secure */ + /* Large folios cannot be secure */ if (unlikely(folio_test_large(folio))) return 0; @@ -159,6 +147,7 @@ int uv_destroy_folio(struct folio *folio) folio_put(folio); return rc; } +EXPORT_SYMBOL(uv_destroy_folio); /* * The present PTE still indirectly holds a folio reference through the mapping. @@ -175,7 +164,7 @@ int uv_destroy_pte(pte_t pte) * * @paddr: Absolute host address of page to be exported */ -static int uv_convert_from_secure(unsigned long paddr) +int uv_convert_from_secure(unsigned long paddr) { struct uv_cb_cfs uvcb = { .header.cmd = UVC_CMD_CONV_FROM_SEC_STOR, @@ -187,15 +176,16 @@ static int uv_convert_from_secure(unsigned long paddr) return -EINVAL; return 0; } +EXPORT_SYMBOL_GPL(uv_convert_from_secure); /* * The caller must already hold a reference to the folio. */ -static int uv_convert_from_secure_folio(struct folio *folio) +int uv_convert_from_secure_folio(struct folio *folio) { int rc; - /* See gmap_make_secure(): large folios cannot be secure */ + /* Large folios cannot be secure */ if (unlikely(folio_test_large(folio))) return 0; @@ -206,6 +196,7 @@ static int uv_convert_from_secure_folio(struct folio *folio) folio_put(folio); return rc; } +EXPORT_SYMBOL_GPL(uv_convert_from_secure_folio); /* * The present PTE still indirectly holds a folio reference through the mapping. @@ -216,6 +207,39 @@ int uv_convert_from_secure_pte(pte_t pte) return uv_convert_from_secure_folio(pfn_folio(pte_pfn(pte))); } +/** + * should_export_before_import - Determine whether an export is needed + * before an import-like operation + * @uvcb: the Ultravisor control block of the UVC to be performed + * @mm: the mm of the process + * + * Returns whether an export is needed before every import-like operation. + * This is needed for shared pages, which don't trigger a secure storage + * exception when accessed from a different guest. + * + * Although considered as one, the Unpin Page UVC is not an actual import, + * so it is not affected. + * + * No export is needed also when there is only one protected VM, because the + * page cannot belong to the wrong VM in that case (there is no "other VM" + * it can belong to). + * + * Return: true if an export is needed before every import, otherwise false. + */ +static bool should_export_before_import(struct uv_cb_header *uvcb, struct mm_struct *mm) +{ + /* + * The misc feature indicates, among other things, that importing a + * shared page from a different protected VM will automatically also + * transfer its ownership. + */ + if (uv_has_feature(BIT_UV_FEAT_MISC)) + return false; + if (uvcb->cmd == UVC_CMD_UNPIN_PAGE_SHARED) + return false; + return atomic_read(&mm->context.protected_count) > 1; +} + /* * Calculate the expected ref_count for a folio that would otherwise have no * further pins. This was cribbed from similar functions in other places in @@ -237,13 +261,31 @@ static int expected_folio_refs(struct folio *folio) return res; } -static int make_folio_secure(struct folio *folio, struct uv_cb_header *uvcb) +/** + * __make_folio_secure() - make a folio secure + * @folio: the folio to make secure + * @uvcb: the uvcb that describes the UVC to be used + * + * The folio @folio will be made secure if possible, @uvcb will be passed + * as-is to the UVC. + * + * Return: 0 on success; + * -EBUSY if the folio is in writeback or has too many references; + * -EAGAIN if the UVC needs to be attempted again; + * -ENXIO if the address is not mapped; + * -EINVAL if the UVC failed for other reasons. + * + * Context: The caller must hold exactly one extra reference on the folio + * (it's the same logic as split_folio()), and the folio must be + * locked. + */ +static int __make_folio_secure(struct folio *folio, struct uv_cb_header *uvcb) { int expected, cc = 0; if (folio_test_writeback(folio)) - return -EAGAIN; - expected = expected_folio_refs(folio); + return -EBUSY; + expected = expected_folio_refs(folio) + 1; if (!folio_ref_freeze(folio, expected)) return -EBUSY; set_bit(PG_arch_1, &folio->flags); @@ -268,262 +310,167 @@ static int make_folio_secure(struct folio *folio, struct uv_cb_header *uvcb) return uvcb->rc == 0x10a ? -ENXIO : -EINVAL; } -/** - * should_export_before_import - Determine whether an export is needed - * before an import-like operation - * @uvcb: the Ultravisor control block of the UVC to be performed - * @mm: the mm of the process - * - * Returns whether an export is needed before every import-like operation. - * This is needed for shared pages, which don't trigger a secure storage - * exception when accessed from a different guest. - * - * Although considered as one, the Unpin Page UVC is not an actual import, - * so it is not affected. - * - * No export is needed also when there is only one protected VM, because the - * page cannot belong to the wrong VM in that case (there is no "other VM" - * it can belong to). - * - * Return: true if an export is needed before every import, otherwise false. - */ -static bool should_export_before_import(struct uv_cb_header *uvcb, struct mm_struct *mm) +static int make_folio_secure(struct mm_struct *mm, struct folio *folio, struct uv_cb_header *uvcb) { - /* - * The misc feature indicates, among other things, that importing a - * shared page from a different protected VM will automatically also - * transfer its ownership. - */ - if (uv_has_feature(BIT_UV_FEAT_MISC)) - return false; - if (uvcb->cmd == UVC_CMD_UNPIN_PAGE_SHARED) - return false; - return atomic_read(&mm->context.protected_count) > 1; -} + int rc; -/* - * Drain LRU caches: the local one on first invocation and the ones of all - * CPUs on successive invocations. Returns "true" on the first invocation. - */ -static bool drain_lru(bool *drain_lru_called) -{ - /* - * If we have tried a local drain and the folio refcount - * still does not match our expected safe value, try with a - * system wide drain. This is needed if the pagevecs holding - * the page are on a different CPU. - */ - if (*drain_lru_called) { - lru_add_drain_all(); - /* We give up here, don't retry immediately. */ - return false; - } - /* - * We are here if the folio refcount does not match the - * expected safe value. The main culprits are usually - * pagevecs. With lru_add_drain() we drain the pagevecs - * on the local CPU so that hopefully the refcount will - * reach the expected safe value. - */ - lru_add_drain(); - *drain_lru_called = true; - /* The caller should try again immediately */ - return true; + if (!folio_trylock(folio)) + return -EAGAIN; + if (should_export_before_import(uvcb, mm)) + uv_convert_from_secure(folio_to_phys(folio)); + rc = __make_folio_secure(folio, uvcb); + folio_unlock(folio); + + return rc; } -/* - * Requests the Ultravisor to make a page accessible to a guest. - * If it's brought in the first time, it will be cleared. If - * it has been exported before, it will be decrypted and integrity - * checked. +/** + * s390_wiggle_split_folio() - try to drain extra references to a folio and + * split the folio if it is large. + * @mm: the mm containing the folio to work on + * @folio: the folio + * + * Context: Must be called while holding an extra reference to the folio; + * the mm lock should not be held. + * Return: 0 if the operation was successful; + * -EAGAIN if splitting the large folio was not successful, + * but another attempt can be made; + * -EINVAL in case of other folio splitting errors. See split_folio(). */ -int gmap_make_secure(struct gmap *gmap, unsigned long gaddr, void *uvcb) +static int s390_wiggle_split_folio(struct mm_struct *mm, struct folio *folio) { - struct vm_area_struct *vma; - bool drain_lru_called = false; - spinlock_t *ptelock; - unsigned long uaddr; - struct folio *folio; - pte_t *ptep; - int rc; + int rc, tried_splits; -again: - rc = -EFAULT; - mmap_read_lock(gmap->mm); + lockdep_assert_not_held(&mm->mmap_lock); + folio_wait_writeback(folio); + lru_add_drain_all(); - uaddr = __gmap_translate(gmap, gaddr); - if (IS_ERR_VALUE(uaddr)) - goto out; - vma = vma_lookup(gmap->mm, uaddr); - if (!vma) - goto out; - /* - * Secure pages cannot be huge and userspace should not combine both. - * In case userspace does it anyway this will result in an -EFAULT for - * the unpack. The guest is thus never reaching secure mode. If - * userspace is playing dirty tricky with mapping huge pages later - * on this will result in a segmentation fault. - */ - if (is_vm_hugetlb_page(vma)) - goto out; - - rc = -ENXIO; - ptep = get_locked_pte(gmap->mm, uaddr, &ptelock); - if (!ptep) - goto out; - if (pte_present(*ptep) && !(pte_val(*ptep) & _PAGE_INVALID) && pte_write(*ptep)) { - folio = page_folio(pte_page(*ptep)); - rc = -EAGAIN; - if (folio_test_large(folio)) { - rc = -E2BIG; - } else if (folio_trylock(folio)) { - if (should_export_before_import(uvcb, gmap->mm)) - uv_convert_from_secure(PFN_PHYS(folio_pfn(folio))); - rc = make_folio_secure(folio, uvcb); + if (!folio_test_large(folio)) + return 0; + + for (tried_splits = 0; tried_splits < 2; tried_splits++) { + struct address_space *mapping; + loff_t lstart, lend; + struct inode *inode; + + folio_lock(folio); + rc = split_folio(folio); + if (rc != -EBUSY) { folio_unlock(folio); + return rc; } /* - * Once we drop the PTL, the folio may get unmapped and - * freed immediately. We need a temporary reference. + * Splitting with -EBUSY can fail for various reasons, but we + * have to handle one case explicitly for now: some mappings + * don't allow for splitting dirty folios; writeback will + * mark them clean again, including marking all page table + * entries mapping the folio read-only, to catch future write + * attempts. + * + * While the system should be writing back dirty folios in the + * background, we obtained this folio by looking up a writable + * page table entry. On these problematic mappings, writable + * page table entries imply dirty folios, preventing the + * split in the first place. + * + * To prevent a livelock when trigger writeback manually and + * letting the caller look up the folio again in the page + * table (turning it dirty), immediately try to split again. + * + * This is only a problem for some mappings (e.g., XFS); + * mappings that do not support writeback (e.g., shmem) do not + * apply. */ - if (rc == -EAGAIN || rc == -E2BIG) - folio_get(folio); - } - pte_unmap_unlock(ptep, ptelock); -out: - mmap_read_unlock(gmap->mm); - - switch (rc) { - case -E2BIG: - folio_lock(folio); - rc = split_folio(folio); - folio_unlock(folio); - folio_put(folio); - - switch (rc) { - case 0: - /* Splitting succeeded, try again immediately. */ - goto again; - case -EAGAIN: - /* Additional folio references. */ - if (drain_lru(&drain_lru_called)) - goto again; - return -EAGAIN; - case -EBUSY: - /* Unexpected race. */ - return -EAGAIN; + if (!folio_test_dirty(folio) || folio_test_anon(folio) || + !folio->mapping || !mapping_can_writeback(folio->mapping)) { + folio_unlock(folio); + break; } - WARN_ON_ONCE(1); - return -ENXIO; - case -EAGAIN: + /* - * If we are here because the UVC returned busy or partial - * completion, this is just a useless check, but it is safe. + * Ideally, we'd only trigger writeback on this exact folio. But + * there is no easy way to do that, so we'll stabilize the + * mapping while we still hold the folio lock, so we can drop + * the folio lock to trigger writeback on the range currently + * covered by the folio instead. */ - folio_wait_writeback(folio); - folio_put(folio); - return -EAGAIN; - case -EBUSY: - /* Additional folio references. */ - if (drain_lru(&drain_lru_called)) - goto again; - return -EAGAIN; - case -ENXIO: - if (gmap_fault(gmap, gaddr, FAULT_FLAG_WRITE)) - return -EFAULT; - return -EAGAIN; - } - return rc; -} -EXPORT_SYMBOL_GPL(gmap_make_secure); + mapping = folio->mapping; + lstart = folio_pos(folio); + lend = lstart + folio_size(folio) - 1; + inode = igrab(mapping->host); + folio_unlock(folio); -int gmap_convert_to_secure(struct gmap *gmap, unsigned long gaddr) -{ - struct uv_cb_cts uvcb = { - .header.cmd = UVC_CMD_CONV_TO_SEC_STOR, - .header.len = sizeof(uvcb), - .guest_handle = gmap->guest_handle, - .gaddr = gaddr, - }; + if (unlikely(!inode)) + break; - return gmap_make_secure(gmap, gaddr, &uvcb); + filemap_write_and_wait_range(mapping, lstart, lend); + iput(mapping->host); + } + return -EAGAIN; } -EXPORT_SYMBOL_GPL(gmap_convert_to_secure); -/** - * gmap_destroy_page - Destroy a guest page. - * @gmap: the gmap of the guest - * @gaddr: the guest address to destroy - * - * An attempt will be made to destroy the given guest page. If the attempt - * fails, an attempt is made to export the page. If both attempts fail, an - * appropriate error is returned. - */ -int gmap_destroy_page(struct gmap *gmap, unsigned long gaddr) +int make_hva_secure(struct mm_struct *mm, unsigned long hva, struct uv_cb_header *uvcb) { struct vm_area_struct *vma; struct folio_walk fw; - unsigned long uaddr; struct folio *folio; int rc; - rc = -EFAULT; - mmap_read_lock(gmap->mm); - - uaddr = __gmap_translate(gmap, gaddr); - if (IS_ERR_VALUE(uaddr)) - goto out; - vma = vma_lookup(gmap->mm, uaddr); - if (!vma) - goto out; - /* - * Huge pages should not be able to become secure - */ - if (is_vm_hugetlb_page(vma)) - goto out; + mmap_read_lock(mm); + vma = vma_lookup(mm, hva); + if (!vma) { + mmap_read_unlock(mm); + return -EFAULT; + } + folio = folio_walk_start(&fw, vma, hva, 0); + if (!folio) { + mmap_read_unlock(mm); + return -ENXIO; + } - rc = 0; - folio = folio_walk_start(&fw, vma, uaddr, 0); - if (!folio) - goto out; - /* - * See gmap_make_secure(): large folios cannot be secure. Small - * folio implies FW_LEVEL_PTE. - */ - if (folio_test_large(folio) || !pte_write(fw.pte)) - goto out_walk_end; - rc = uv_destroy_folio(folio); + folio_get(folio); /* - * Fault handlers can race; it is possible that two CPUs will fault - * on the same secure page. One CPU can destroy the page, reboot, - * re-enter secure mode and import it, while the second CPU was - * stuck at the beginning of the handler. At some point the second - * CPU will be able to progress, and it will not be able to destroy - * the page. In that case we do not want to terminate the process, - * we instead try to export the page. + * Secure pages cannot be huge and userspace should not combine both. + * In case userspace does it anyway this will result in an -EFAULT for + * the unpack. The guest is thus never reaching secure mode. + * If userspace plays dirty tricks and decides to map huge pages at a + * later point in time, it will receive a segmentation fault or + * KVM_RUN will return -EFAULT. */ - if (rc) - rc = uv_convert_from_secure_folio(folio); -out_walk_end: + if (folio_test_hugetlb(folio)) + rc = -EFAULT; + else if (folio_test_large(folio)) + rc = -E2BIG; + else if (!pte_write(fw.pte) || (pte_val(fw.pte) & _PAGE_INVALID)) + rc = -ENXIO; + else + rc = make_folio_secure(mm, folio, uvcb); folio_walk_end(&fw, vma); -out: - mmap_read_unlock(gmap->mm); + mmap_read_unlock(mm); + + if (rc == -E2BIG || rc == -EBUSY) { + rc = s390_wiggle_split_folio(mm, folio); + if (!rc) + rc = -EAGAIN; + } + folio_put(folio); + return rc; } -EXPORT_SYMBOL_GPL(gmap_destroy_page); +EXPORT_SYMBOL_GPL(make_hva_secure); /* * To be called with the folio locked or with an extra reference! This will - * prevent gmap_make_secure from touching the folio concurrently. Having 2 - * parallel arch_make_folio_accessible is fine, as the UV calls will become a - * no-op if the folio is already exported. + * prevent kvm_s390_pv_make_secure() from touching the folio concurrently. + * Having 2 parallel arch_make_folio_accessible is fine, as the UV calls will + * become a no-op if the folio is already exported. */ int arch_make_folio_accessible(struct folio *folio) { int rc = 0; - /* See gmap_make_secure(): large folios cannot be secure */ + /* Large folios cannot be secure */ if (unlikely(folio_test_large(folio))) return 0; @@ -894,7 +841,12 @@ out_kobj: device_initcall(uv_sysfs_init); /* - * Find the secret with the secret_id in the provided list. + * Locate a secret in the list by its id. + * @secret_id: search pattern. + * @list: ephemeral buffer space + * @secret: output data, containing the secret's metadata. + * + * Search for a secret with the given secret_id in the Ultravisor secret store. * * Context: might sleep. */ @@ -915,12 +867,15 @@ static int find_secret_in_page(const u8 secret_id[UV_SECRET_ID_LEN], /* * Do the actual search for `uv_get_secret_metadata`. + * @secret_id: search pattern. + * @list: ephemeral buffer space + * @secret: output data, containing the secret's metadata. * * Context: might sleep. */ -static int find_secret(const u8 secret_id[UV_SECRET_ID_LEN], - struct uv_secret_list *list, - struct uv_secret_list_item_hdr *secret) +int uv_find_secret(const u8 secret_id[UV_SECRET_ID_LEN], + struct uv_secret_list *list, + struct uv_secret_list_item_hdr *secret) { u16 start_idx = 0; u16 list_rc; @@ -942,36 +897,7 @@ static int find_secret(const u8 secret_id[UV_SECRET_ID_LEN], return -ENOENT; } - -/** - * uv_get_secret_metadata() - get secret metadata for a given secret id. - * @secret_id: search pattern. - * @secret: output data, containing the secret's metadata. - * - * Search for a secret with the given secret_id in the Ultravisor secret store. - * - * Context: might sleep. - * - * Return: - * * %0: - Found entry; secret->idx and secret->type are valid. - * * %ENOENT - No entry found. - * * %ENODEV: - Not supported: UV not available or command not available. - * * %EIO: - Other unexpected UV error. - */ -int uv_get_secret_metadata(const u8 secret_id[UV_SECRET_ID_LEN], - struct uv_secret_list_item_hdr *secret) -{ - struct uv_secret_list *buf; - int rc; - - buf = kzalloc(sizeof(*buf), GFP_KERNEL); - if (!buf) - return -ENOMEM; - rc = find_secret(secret_id, buf, secret); - kfree(buf); - return rc; -} -EXPORT_SYMBOL_GPL(uv_get_secret_metadata); +EXPORT_SYMBOL_GPL(uv_find_secret); /** * uv_retrieve_secret() - get the secret value for the secret index. diff --git a/arch/s390/kernel/vdso.c b/arch/s390/kernel/vdso.c index 598b512cde01..430feb1a5013 100644 --- a/arch/s390/kernel/vdso.c +++ b/arch/s390/kernel/vdso.c @@ -16,8 +16,8 @@ #include <linux/mm.h> #include <linux/slab.h> #include <linux/smp.h> -#include <linux/time_namespace.h> #include <linux/random.h> +#include <linux/vdso_datastore.h> #include <vdso/datapage.h> #include <asm/vdso/vsyscall.h> #include <asm/alternative.h> @@ -26,85 +26,6 @@ extern char vdso64_start[], vdso64_end[]; extern char vdso32_start[], vdso32_end[]; -static struct vm_special_mapping vvar_mapping; - -static union vdso_data_store vdso_data_store __page_aligned_data; - -struct vdso_data *vdso_data = vdso_data_store.data; - -#ifdef CONFIG_TIME_NS -struct vdso_data *arch_get_vdso_data(void *vvar_page) -{ - return (struct vdso_data *)(vvar_page); -} - -/* - * The VVAR page layout depends on whether a task belongs to the root or - * non-root time namespace. Whenever a task changes its namespace, the VVAR - * page tables are cleared and then they will be re-faulted with a - * corresponding layout. - * See also the comment near timens_setup_vdso_data() for details. - */ -int vdso_join_timens(struct task_struct *task, struct time_namespace *ns) -{ - struct mm_struct *mm = task->mm; - VMA_ITERATOR(vmi, mm, 0); - struct vm_area_struct *vma; - - mmap_read_lock(mm); - for_each_vma(vmi, vma) { - if (!vma_is_special_mapping(vma, &vvar_mapping)) - continue; - zap_vma_pages(vma); - break; - } - mmap_read_unlock(mm); - return 0; -} -#endif - -static vm_fault_t vvar_fault(const struct vm_special_mapping *sm, - struct vm_area_struct *vma, struct vm_fault *vmf) -{ - struct page *timens_page = find_timens_vvar_page(vma); - unsigned long addr, pfn; - vm_fault_t err; - - switch (vmf->pgoff) { - case VVAR_DATA_PAGE_OFFSET: - pfn = virt_to_pfn(vdso_data); - if (timens_page) { - /* - * Fault in VVAR page too, since it will be accessed - * to get clock data anyway. - */ - addr = vmf->address + VVAR_TIMENS_PAGE_OFFSET * PAGE_SIZE; - err = vmf_insert_pfn(vma, addr, pfn); - if (unlikely(err & VM_FAULT_ERROR)) - return err; - pfn = page_to_pfn(timens_page); - } - break; -#ifdef CONFIG_TIME_NS - case VVAR_TIMENS_PAGE_OFFSET: - /* - * If a task belongs to a time namespace then a namespace - * specific VVAR is mapped with the VVAR_DATA_PAGE_OFFSET and - * the real VVAR page is mapped with the VVAR_TIMENS_PAGE_OFFSET - * offset. - * See also the comment near timens_setup_vdso_data(). - */ - if (!timens_page) - return VM_FAULT_SIGBUS; - pfn = virt_to_pfn(vdso_data); - break; -#endif /* CONFIG_TIME_NS */ - default: - return VM_FAULT_SIGBUS; - } - return vmf_insert_pfn(vma, vmf->address, pfn); -} - static int vdso_mremap(const struct vm_special_mapping *sm, struct vm_area_struct *vma) { @@ -112,11 +33,6 @@ static int vdso_mremap(const struct vm_special_mapping *sm, return 0; } -static struct vm_special_mapping vvar_mapping = { - .name = "[vvar]", - .fault = vvar_fault, -}; - static struct vm_special_mapping vdso64_mapping = { .name = "[vdso]", .mremap = vdso_mremap, @@ -142,7 +58,7 @@ static int map_vdso(unsigned long addr, unsigned long vdso_mapping_len) struct vm_area_struct *vma; int rc; - BUILD_BUG_ON(VVAR_NR_PAGES != __VVAR_PAGES); + BUILD_BUG_ON(VDSO_NR_PAGES != __VDSO_PAGES); if (mmap_write_lock_killable(mm)) return -EINTR; @@ -157,17 +73,14 @@ static int map_vdso(unsigned long addr, unsigned long vdso_mapping_len) rc = vvar_start; if (IS_ERR_VALUE(vvar_start)) goto out; - vma = _install_special_mapping(mm, vvar_start, VVAR_NR_PAGES*PAGE_SIZE, - VM_READ|VM_MAYREAD|VM_IO|VM_DONTDUMP| - VM_PFNMAP, - &vvar_mapping); + vma = vdso_install_vvar_mapping(mm, vvar_start); rc = PTR_ERR(vma); if (IS_ERR(vma)) goto out; - vdso_text_start = vvar_start + VVAR_NR_PAGES * PAGE_SIZE; + vdso_text_start = vvar_start + VDSO_NR_PAGES * PAGE_SIZE; /* VM_MAYWRITE for COW so gdb can set breakpoints */ vma = _install_special_mapping(mm, vdso_text_start, vdso_text_len, - VM_READ|VM_EXEC| + VM_READ|VM_EXEC|VM_SEALED_SYSMAP| VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC, vdso_mapping); if (IS_ERR(vma)) { @@ -220,7 +133,7 @@ unsigned long vdso_text_size(void) unsigned long vdso_size(void) { - return vdso_text_size() + VVAR_NR_PAGES * PAGE_SIZE; + return vdso_text_size() + VDSO_NR_PAGES * PAGE_SIZE; } int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) diff --git a/arch/s390/kernel/vdso32/Makefile b/arch/s390/kernel/vdso32/Makefile index 2c5afb88d298..1e4ddd1a683f 100644 --- a/arch/s390/kernel/vdso32/Makefile +++ b/arch/s390/kernel/vdso32/Makefile @@ -2,7 +2,7 @@ # List of files in the vdso # Include the generic Makefile to check the built vdso. -include $(srctree)/lib/vdso/Makefile +include $(srctree)/lib/vdso/Makefile.include obj-vdso32 = vdso_user_wrapper-32.o note-32.o # Build rules diff --git a/arch/s390/kernel/vdso32/vdso32.lds.S b/arch/s390/kernel/vdso32/vdso32.lds.S index c916c4f73f76..9630d58c2080 100644 --- a/arch/s390/kernel/vdso32/vdso32.lds.S +++ b/arch/s390/kernel/vdso32/vdso32.lds.S @@ -6,16 +6,15 @@ #include <asm/page.h> #include <asm/vdso.h> +#include <vdso/datapage.h> OUTPUT_FORMAT("elf32-s390", "elf32-s390", "elf32-s390") OUTPUT_ARCH(s390:31-bit) SECTIONS { - PROVIDE(_vdso_data = . - __VVAR_PAGES * PAGE_SIZE); -#ifdef CONFIG_TIME_NS - PROVIDE(_timens_data = _vdso_data + PAGE_SIZE); -#endif + VDSO_VVAR_SYMS + . = SIZEOF_HEADERS; .hash : { *(.hash) } :text diff --git a/arch/s390/kernel/vdso64/Makefile b/arch/s390/kernel/vdso64/Makefile index 37bb4b761229..d8f0df742809 100644 --- a/arch/s390/kernel/vdso64/Makefile +++ b/arch/s390/kernel/vdso64/Makefile @@ -2,10 +2,10 @@ # List of files in the vdso # Include the generic Makefile to check the built vdso. -include $(srctree)/lib/vdso/Makefile +include $(srctree)/lib/vdso/Makefile.include obj-vdso64 = vdso_user_wrapper.o note.o vgetrandom-chacha.o obj-cvdso64 = vdso64_generic.o getcpu.o vgetrandom.o -VDSO_CFLAGS_REMOVE := -pg $(CC_FLAGS_FTRACE) $(CC_FLAGS_EXPOLINE) $(CC_FLAGS_CHECK_STACK) +VDSO_CFLAGS_REMOVE := -pg $(CC_FLAGS_FTRACE) $(CC_FLAGS_EXPOLINE) CFLAGS_REMOVE_getcpu.o = $(VDSO_CFLAGS_REMOVE) CFLAGS_REMOVE_vgetrandom.o = $(VDSO_CFLAGS_REMOVE) CFLAGS_REMOVE_vdso64_generic.o = $(VDSO_CFLAGS_REMOVE) diff --git a/arch/s390/kernel/vdso64/vdso64.lds.S b/arch/s390/kernel/vdso64/vdso64.lds.S index ec42b7d9cb53..e4f6551ae898 100644 --- a/arch/s390/kernel/vdso64/vdso64.lds.S +++ b/arch/s390/kernel/vdso64/vdso64.lds.S @@ -7,17 +7,15 @@ #include <asm/vdso/vsyscall.h> #include <asm/page.h> #include <asm/vdso.h> +#include <vdso/datapage.h> OUTPUT_FORMAT("elf64-s390", "elf64-s390", "elf64-s390") OUTPUT_ARCH(s390:64-bit) SECTIONS { - PROVIDE(_vdso_data = . - __VVAR_PAGES * PAGE_SIZE); - PROVIDE(_vdso_rng_data = _vdso_data + __VDSO_RND_DATA_OFFSET); -#ifdef CONFIG_TIME_NS - PROVIDE(_timens_data = _vdso_data + PAGE_SIZE); -#endif + VDSO_VVAR_SYMS + . = SIZEOF_HEADERS; .hash : { *(.hash) } :text diff --git a/arch/s390/kernel/vmcore_info.c b/arch/s390/kernel/vmcore_info.c index 23f7d7619a99..cc8933e04ff7 100644 --- a/arch/s390/kernel/vmcore_info.c +++ b/arch/s390/kernel/vmcore_info.c @@ -1,8 +1,9 @@ // SPDX-License-Identifier: GPL-2.0-only #include <linux/vmcore_info.h> -#include <asm/abs_lowcore.h> #include <linux/mm.h> +#include <asm/abs_lowcore.h> +#include <asm/sections.h> #include <asm/setup.h> void arch_crash_save_vmcoreinfo(void) diff --git a/arch/s390/kernel/vmlinux.lds.S b/arch/s390/kernel/vmlinux.lds.S index 377b9aaf8c92..ff1ddba96352 100644 --- a/arch/s390/kernel/vmlinux.lds.S +++ b/arch/s390/kernel/vmlinux.lds.S @@ -52,7 +52,6 @@ SECTIONS SOFTIRQENTRY_TEXT FTRACE_HOTPATCH_TRAMPOLINES_TEXT *(.text.*_indirect_*) - *(.fixup) *(.gnu.warning) . = ALIGN(PAGE_SIZE); _etext = .; /* End of text section */ diff --git a/arch/s390/kvm/Makefile b/arch/s390/kvm/Makefile index 02217fb4ae10..9a723c48b05a 100644 --- a/arch/s390/kvm/Makefile +++ b/arch/s390/kvm/Makefile @@ -8,7 +8,7 @@ include $(srctree)/virt/kvm/Makefile.kvm ccflags-y := -Ivirt/kvm -Iarch/s390/kvm kvm-y += kvm-s390.o intercept.o interrupt.o priv.o sigp.o -kvm-y += diag.o gaccess.o guestdbg.o vsie.o pv.o +kvm-y += diag.o gaccess.o guestdbg.o vsie.o pv.o gmap-vsie.o kvm-$(CONFIG_VFIO_PCI_ZDEV_KVM) += pci.o obj-$(CONFIG_KVM) += kvm.o diff --git a/arch/s390/kvm/diag.c b/arch/s390/kvm/diag.c index 74f73141f9b9..53233dec8cad 100644 --- a/arch/s390/kvm/diag.c +++ b/arch/s390/kvm/diag.c @@ -11,12 +11,30 @@ #include <linux/kvm.h> #include <linux/kvm_host.h> #include <asm/gmap.h> +#include <asm/gmap_helpers.h> #include <asm/virtio-ccw.h> #include "kvm-s390.h" #include "trace.h" #include "trace-s390.h" #include "gaccess.h" +static void do_discard_gfn_range(struct kvm_vcpu *vcpu, gfn_t gfn_start, gfn_t gfn_end) +{ + struct kvm_memslot_iter iter; + struct kvm_memory_slot *slot; + struct kvm_memslots *slots; + unsigned long start, end; + + slots = kvm_vcpu_memslots(vcpu); + + kvm_for_each_memslot_in_gfn_range(&iter, slots, gfn_start, gfn_end) { + slot = iter.slot; + start = __gfn_to_hva_memslot(slot, max(gfn_start, slot->base_gfn)); + end = __gfn_to_hva_memslot(slot, min(gfn_end, slot->base_gfn + slot->npages)); + gmap_helper_discard(vcpu->kvm->mm, start, end); + } +} + static int diag_release_pages(struct kvm_vcpu *vcpu) { unsigned long start, end; @@ -32,12 +50,13 @@ static int diag_release_pages(struct kvm_vcpu *vcpu) VCPU_EVENT(vcpu, 5, "diag release pages %lX %lX", start, end); + mmap_read_lock(vcpu->kvm->mm); /* * We checked for start >= end above, so lets check for the * fast path (no prefix swap page involved) */ if (end <= prefix || start >= prefix + 2 * PAGE_SIZE) { - gmap_discard(vcpu->arch.gmap, start, end); + do_discard_gfn_range(vcpu, gpa_to_gfn(start), gpa_to_gfn(end)); } else { /* * This is slow path. gmap_discard will check for start @@ -45,13 +64,14 @@ static int diag_release_pages(struct kvm_vcpu *vcpu) * prefix and let gmap_discard make some of these calls * NOPs. */ - gmap_discard(vcpu->arch.gmap, start, prefix); + do_discard_gfn_range(vcpu, gpa_to_gfn(start), gpa_to_gfn(prefix)); if (start <= prefix) - gmap_discard(vcpu->arch.gmap, 0, PAGE_SIZE); + do_discard_gfn_range(vcpu, 0, 1); if (end > prefix + PAGE_SIZE) - gmap_discard(vcpu->arch.gmap, PAGE_SIZE, 2 * PAGE_SIZE); - gmap_discard(vcpu->arch.gmap, prefix + 2 * PAGE_SIZE, end); + do_discard_gfn_range(vcpu, 1, 2); + do_discard_gfn_range(vcpu, gpa_to_gfn(prefix) + 2, gpa_to_gfn(end)); } + mmap_read_unlock(vcpu->kvm->mm); return 0; } diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c index 9816b0060fbe..21c2e61fece4 100644 --- a/arch/s390/kvm/gaccess.c +++ b/arch/s390/kvm/gaccess.c @@ -18,6 +18,8 @@ #include "kvm-s390.h" #include "gaccess.h" +#define GMAP_SHADOW_FAKE_TABLE 1ULL + /* * vaddress union in order to easily decode a virtual address into its * region first index, region second index etc. parts. @@ -317,7 +319,7 @@ enum prot_type { PROT_TYPE_DAT = 3, PROT_TYPE_IEP = 4, /* Dummy value for passing an initialized value when code != PGM_PROTECTION */ - PROT_NONE, + PROT_TYPE_DUMMY, }; static int trans_exc_ending(struct kvm_vcpu *vcpu, int code, unsigned long gva, u8 ar, @@ -333,7 +335,7 @@ static int trans_exc_ending(struct kvm_vcpu *vcpu, int code, unsigned long gva, switch (code) { case PGM_PROTECTION: switch (prot) { - case PROT_NONE: + case PROT_TYPE_DUMMY: /* We should never get here, acts like termination */ WARN_ON_ONCE(1); break; @@ -803,7 +805,7 @@ static int guest_range_to_gpas(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar, gpa = kvm_s390_real_to_abs(vcpu, ga); if (!kvm_is_gpa_in_memslot(vcpu->kvm, gpa)) { rc = PGM_ADDRESSING; - prot = PROT_NONE; + prot = PROT_TYPE_DUMMY; } } if (rc) @@ -961,7 +963,7 @@ int access_guest_with_key(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar, if (rc == PGM_PROTECTION) prot = PROT_TYPE_KEYC; else - prot = PROT_NONE; + prot = PROT_TYPE_DUMMY; rc = trans_exc_ending(vcpu, rc, ga, ar, mode, prot, terminate); } out_unlock: @@ -1393,6 +1395,44 @@ shadow_pgt: } /** + * shadow_pgt_lookup() - find a shadow page table + * @sg: pointer to the shadow guest address space structure + * @saddr: the address in the shadow aguest address space + * @pgt: parent gmap address of the page table to get shadowed + * @dat_protection: if the pgtable is marked as protected by dat + * @fake: pgt references contiguous guest memory block, not a pgtable + * + * Returns 0 if the shadow page table was found and -EAGAIN if the page + * table was not found. + * + * Called with sg->mm->mmap_lock in read. + */ +static int shadow_pgt_lookup(struct gmap *sg, unsigned long saddr, unsigned long *pgt, + int *dat_protection, int *fake) +{ + unsigned long pt_index; + unsigned long *table; + struct page *page; + int rc; + + spin_lock(&sg->guest_table_lock); + table = gmap_table_walk(sg, saddr, 1); /* get segment pointer */ + if (table && !(*table & _SEGMENT_ENTRY_INVALID)) { + /* Shadow page tables are full pages (pte+pgste) */ + page = pfn_to_page(*table >> PAGE_SHIFT); + pt_index = gmap_pgste_get_pgt_addr(page_to_virt(page)); + *pgt = pt_index & ~GMAP_SHADOW_FAKE_TABLE; + *dat_protection = !!(*table & _SEGMENT_ENTRY_PROTECT); + *fake = !!(pt_index & GMAP_SHADOW_FAKE_TABLE); + rc = 0; + } else { + rc = -EAGAIN; + } + spin_unlock(&sg->guest_table_lock); + return rc; +} + +/** * kvm_s390_shadow_fault - handle fault on a shadow page table * @vcpu: virtual cpu * @sg: pointer to the shadow guest address space structure @@ -1415,6 +1455,9 @@ int kvm_s390_shadow_fault(struct kvm_vcpu *vcpu, struct gmap *sg, int dat_protection, fake; int rc; + if (KVM_BUG_ON(!gmap_is_shadow(sg), vcpu->kvm)) + return -EFAULT; + mmap_read_lock(sg->mm); /* * We don't want any guest-2 tables to change - so the parent @@ -1423,7 +1466,7 @@ int kvm_s390_shadow_fault(struct kvm_vcpu *vcpu, struct gmap *sg, */ ipte_lock(vcpu->kvm); - rc = gmap_shadow_pgt_lookup(sg, saddr, &pgt, &dat_protection, &fake); + rc = shadow_pgt_lookup(sg, saddr, &pgt, &dat_protection, &fake); if (rc) rc = kvm_s390_shadow_tables(sg, saddr, &pgt, &dat_protection, &fake); diff --git a/arch/s390/kvm/gmap-vsie.c b/arch/s390/kvm/gmap-vsie.c new file mode 100644 index 000000000000..56ef153eb8fe --- /dev/null +++ b/arch/s390/kvm/gmap-vsie.c @@ -0,0 +1,141 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Guest memory management for KVM/s390 nested VMs. + * + * Copyright IBM Corp. 2008, 2020, 2024 + * + * Author(s): Claudio Imbrenda <imbrenda@linux.ibm.com> + * Martin Schwidefsky <schwidefsky@de.ibm.com> + * David Hildenbrand <david@redhat.com> + * Janosch Frank <frankja@linux.vnet.ibm.com> + */ + +#include <linux/compiler.h> +#include <linux/kvm.h> +#include <linux/kvm_host.h> +#include <linux/pgtable.h> +#include <linux/pagemap.h> +#include <linux/mman.h> + +#include <asm/lowcore.h> +#include <asm/gmap.h> +#include <asm/uv.h> + +#include "kvm-s390.h" + +/** + * gmap_find_shadow - find a specific asce in the list of shadow tables + * @parent: pointer to the parent gmap + * @asce: ASCE for which the shadow table is created + * @edat_level: edat level to be used for the shadow translation + * + * Returns the pointer to a gmap if a shadow table with the given asce is + * already available, ERR_PTR(-EAGAIN) if another one is just being created, + * otherwise NULL + * + * Context: Called with parent->shadow_lock held + */ +static struct gmap *gmap_find_shadow(struct gmap *parent, unsigned long asce, int edat_level) +{ + struct gmap *sg; + + lockdep_assert_held(&parent->shadow_lock); + list_for_each_entry(sg, &parent->children, list) { + if (!gmap_shadow_valid(sg, asce, edat_level)) + continue; + if (!sg->initialized) + return ERR_PTR(-EAGAIN); + refcount_inc(&sg->ref_count); + return sg; + } + return NULL; +} + +/** + * gmap_shadow - create/find a shadow guest address space + * @parent: pointer to the parent gmap + * @asce: ASCE for which the shadow table is created + * @edat_level: edat level to be used for the shadow translation + * + * The pages of the top level page table referred by the asce parameter + * will be set to read-only and marked in the PGSTEs of the kvm process. + * The shadow table will be removed automatically on any change to the + * PTE mapping for the source table. + * + * Returns a guest address space structure, ERR_PTR(-ENOMEM) if out of memory, + * ERR_PTR(-EAGAIN) if the caller has to retry and ERR_PTR(-EFAULT) if the + * parent gmap table could not be protected. + */ +struct gmap *gmap_shadow(struct gmap *parent, unsigned long asce, int edat_level) +{ + struct gmap *sg, *new; + unsigned long limit; + int rc; + + if (KVM_BUG_ON(parent->mm->context.allow_gmap_hpage_1m, (struct kvm *)parent->private) || + KVM_BUG_ON(gmap_is_shadow(parent), (struct kvm *)parent->private)) + return ERR_PTR(-EFAULT); + spin_lock(&parent->shadow_lock); + sg = gmap_find_shadow(parent, asce, edat_level); + spin_unlock(&parent->shadow_lock); + if (sg) + return sg; + /* Create a new shadow gmap */ + limit = -1UL >> (33 - (((asce & _ASCE_TYPE_MASK) >> 2) * 11)); + if (asce & _ASCE_REAL_SPACE) + limit = -1UL; + new = gmap_alloc(limit); + if (!new) + return ERR_PTR(-ENOMEM); + new->mm = parent->mm; + new->parent = gmap_get(parent); + new->private = parent->private; + new->orig_asce = asce; + new->edat_level = edat_level; + new->initialized = false; + spin_lock(&parent->shadow_lock); + /* Recheck if another CPU created the same shadow */ + sg = gmap_find_shadow(parent, asce, edat_level); + if (sg) { + spin_unlock(&parent->shadow_lock); + gmap_free(new); + return sg; + } + if (asce & _ASCE_REAL_SPACE) { + /* only allow one real-space gmap shadow */ + list_for_each_entry(sg, &parent->children, list) { + if (sg->orig_asce & _ASCE_REAL_SPACE) { + spin_lock(&sg->guest_table_lock); + gmap_unshadow(sg); + spin_unlock(&sg->guest_table_lock); + list_del(&sg->list); + gmap_put(sg); + break; + } + } + } + refcount_set(&new->ref_count, 2); + list_add(&new->list, &parent->children); + if (asce & _ASCE_REAL_SPACE) { + /* nothing to protect, return right away */ + new->initialized = true; + spin_unlock(&parent->shadow_lock); + return new; + } + spin_unlock(&parent->shadow_lock); + /* protect after insertion, so it will get properly invalidated */ + mmap_read_lock(parent->mm); + rc = __kvm_s390_mprotect_many(parent, asce & _ASCE_ORIGIN, + ((asce & _ASCE_TABLE_LENGTH) + 1), + PROT_READ, GMAP_NOTIFY_SHADOW); + mmap_read_unlock(parent->mm); + spin_lock(&parent->shadow_lock); + new->initialized = true; + if (rc) { + list_del(&new->list); + gmap_free(new); + new = ERR_PTR(rc); + } + spin_unlock(&parent->shadow_lock); + return new; +} diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c index 5bbaadf75dc6..c7908950c1f4 100644 --- a/arch/s390/kvm/intercept.c +++ b/arch/s390/kvm/intercept.c @@ -94,7 +94,7 @@ static int handle_validity(struct kvm_vcpu *vcpu) vcpu->stat.exit_validity++; trace_kvm_s390_intercept_validity(vcpu, viwhy); - KVM_EVENT(3, "validity intercept 0x%x for pid %u (kvm 0x%pK)", viwhy, + KVM_EVENT(3, "validity intercept 0x%x for pid %u (kvm 0x%p)", viwhy, current->pid, vcpu->kvm); /* do not warn on invalid runtime instrumentation mode */ @@ -367,7 +367,7 @@ static int handle_mvpg_pei(struct kvm_vcpu *vcpu) reg2, &srcaddr, GACC_FETCH, 0); if (rc) return kvm_s390_inject_prog_cond(vcpu, rc); - rc = gmap_fault(vcpu->arch.gmap, srcaddr, 0); + rc = kvm_s390_handle_dat_fault(vcpu, srcaddr, 0); if (rc != 0) return rc; @@ -376,7 +376,7 @@ static int handle_mvpg_pei(struct kvm_vcpu *vcpu) reg1, &dstaddr, GACC_STORE, 0); if (rc) return kvm_s390_inject_prog_cond(vcpu, rc); - rc = gmap_fault(vcpu->arch.gmap, dstaddr, FAULT_FLAG_WRITE); + rc = kvm_s390_handle_dat_fault(vcpu, dstaddr, FOLL_WRITE); if (rc != 0) return rc; @@ -544,12 +544,12 @@ static int handle_pv_uvc(struct kvm_vcpu *vcpu) guest_uvcb->header.cmd); return 0; } - rc = gmap_make_secure(vcpu->arch.gmap, uvcb.gaddr, &uvcb); + rc = kvm_s390_pv_make_secure(vcpu->kvm, uvcb.gaddr, &uvcb); /* * If the unpin did not succeed, the guest will exit again for the UVC * and we will retry the unpin. */ - if (rc == -EINVAL) + if (rc == -EINVAL || rc == -ENXIO) return 0; /* * If we got -EAGAIN here, we simply return it. It will eventually @@ -652,10 +652,8 @@ int kvm_handle_sie_intercept(struct kvm_vcpu *vcpu) break; case ICPT_PV_PREF: rc = 0; - gmap_convert_to_secure(vcpu->arch.gmap, - kvm_s390_get_prefix(vcpu)); - gmap_convert_to_secure(vcpu->arch.gmap, - kvm_s390_get_prefix(vcpu) + PAGE_SIZE); + kvm_s390_pv_convert_to_secure(vcpu->kvm, kvm_s390_get_prefix(vcpu)); + kvm_s390_pv_convert_to_secure(vcpu->kvm, kvm_s390_get_prefix(vcpu) + PAGE_SIZE); break; default: return -EOPNOTSUPP; diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index d4f031e086fc..60c360c18690 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -10,6 +10,7 @@ #define KMSG_COMPONENT "kvm-s390" #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt +#include <linux/cpufeature.h> #include <linux/interrupt.h> #include <linux/kvm_host.h> #include <linux/hrtimer.h> @@ -577,7 +578,7 @@ static int __write_machine_check(struct kvm_vcpu *vcpu, /* take care of lazy register loading */ kvm_s390_fpu_store(vcpu->run); save_access_regs(vcpu->run->s.regs.acrs); - if (MACHINE_HAS_GS && vcpu->arch.gs_enabled) + if (cpu_has_gs() && vcpu->arch.gs_enabled) save_gs_cb(current->thread.gs_cb); /* Extended save area */ @@ -948,8 +949,7 @@ static int __must_check __deliver_prog(struct kvm_vcpu *vcpu) rc |= put_guest_lc(vcpu, ilen, (u16 *) __LC_PGM_ILC); rc |= put_guest_lc(vcpu, vcpu->arch.sie_block->gbea, (u64 *) __LC_PGM_LAST_BREAK); - rc |= put_guest_lc(vcpu, pgm_info.code, - (u16 *)__LC_PGM_INT_CODE); + rc |= put_guest_lc(vcpu, pgm_info.code, (u16 *)__LC_PGM_CODE); rc |= write_guest_lc(vcpu, __LC_PGM_OLD_PSW, &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); rc |= read_guest_lc(vcpu, __LC_PGM_NEW_PSW, @@ -2893,7 +2893,8 @@ int kvm_set_routing_entry(struct kvm *kvm, struct kvm_kernel_irq_routing_entry *e, const struct kvm_irq_routing_entry *ue) { - u64 uaddr; + u64 uaddr_s, uaddr_i; + int idx; switch (ue->type) { /* we store the userspace addresses instead of the guest addresses */ @@ -2901,14 +2902,16 @@ int kvm_set_routing_entry(struct kvm *kvm, if (kvm_is_ucontrol(kvm)) return -EINVAL; e->set = set_adapter_int; - uaddr = gmap_translate(kvm->arch.gmap, ue->u.adapter.summary_addr); - if (uaddr == -EFAULT) - return -EFAULT; - e->adapter.summary_addr = uaddr; - uaddr = gmap_translate(kvm->arch.gmap, ue->u.adapter.ind_addr); - if (uaddr == -EFAULT) + + idx = srcu_read_lock(&kvm->srcu); + uaddr_s = gpa_to_hva(kvm, ue->u.adapter.summary_addr); + uaddr_i = gpa_to_hva(kvm, ue->u.adapter.ind_addr); + srcu_read_unlock(&kvm->srcu, idx); + + if (kvm_is_error_hva(uaddr_s) || kvm_is_error_hva(uaddr_i)) return -EFAULT; - e->adapter.ind_addr = uaddr; + e->adapter.summary_addr = uaddr_s; + e->adapter.ind_addr = uaddr_i; e->adapter.summary_offset = ue->u.adapter.summary_offset; e->adapter.ind_offset = ue->u.adapter.ind_offset; e->adapter.adapter_id = ue->u.adapter.adapter_id; @@ -3158,7 +3161,7 @@ void kvm_s390_gisa_clear(struct kvm *kvm) if (!gi->origin) return; gisa_clear_ipm(gi->origin); - VM_EVENT(kvm, 3, "gisa 0x%pK cleared", gi->origin); + VM_EVENT(kvm, 3, "gisa 0x%p cleared", gi->origin); } void kvm_s390_gisa_init(struct kvm *kvm) @@ -3171,11 +3174,10 @@ void kvm_s390_gisa_init(struct kvm *kvm) gi->alert.mask = 0; spin_lock_init(&gi->alert.ref_lock); gi->expires = 50 * 1000; /* 50 usec */ - hrtimer_init(&gi->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); - gi->timer.function = gisa_vcpu_kicker; + hrtimer_setup(&gi->timer, gisa_vcpu_kicker, CLOCK_MONOTONIC, HRTIMER_MODE_REL); memset(gi->origin, 0, sizeof(struct kvm_s390_gisa)); gi->origin->next_alert = (u32)virt_to_phys(gi->origin); - VM_EVENT(kvm, 3, "gisa 0x%pK initialized", gi->origin); + VM_EVENT(kvm, 3, "gisa 0x%p initialized", gi->origin); } void kvm_s390_gisa_enable(struct kvm *kvm) @@ -3216,7 +3218,7 @@ void kvm_s390_gisa_destroy(struct kvm *kvm) process_gib_alert_list(); hrtimer_cancel(&gi->timer); gi->origin = NULL; - VM_EVENT(kvm, 3, "gisa 0x%pK destroyed", gisa); + VM_EVENT(kvm, 3, "gisa 0x%p destroyed", gisa); } void kvm_s390_gisa_disable(struct kvm *kvm) @@ -3465,7 +3467,7 @@ int __init kvm_s390_gib_init(u8 nisc) } } - KVM_EVENT(3, "gib 0x%pK (nisc=%d) initialized", gib, gib->nisc); + KVM_EVENT(3, "gib 0x%p (nisc=%d) initialized", gib, gib->nisc); goto out; out_unreg_gal: diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index d8080c27d45b..d5ad10791c25 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -23,6 +23,7 @@ #include <linux/mman.h> #include <linux/module.h> #include <linux/moduleparam.h> +#include <linux/cpufeature.h> #include <linux/random.h> #include <linux/slab.h> #include <linux/timer.h> @@ -36,8 +37,10 @@ #include <asm/access-regs.h> #include <asm/asm-offsets.h> #include <asm/lowcore.h> +#include <asm/machine.h> #include <asm/stp.h> #include <asm/gmap.h> +#include <asm/gmap_helpers.h> #include <asm/nmi.h> #include <asm/isc.h> #include <asm/sclp.h> @@ -442,13 +445,13 @@ static void __init kvm_s390_cpu_feat_init(void) if (test_facility(201)) /* PFCR */ pfcr_query(&kvm_s390_available_subfunc.pfcr); - if (MACHINE_HAS_ESOP) + if (machine_has_esop()) allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP); /* * We need SIE support, ESOP (PROT_READ protection for gmap_shadow), * 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing). */ - if (!sclp.has_sief2 || !MACHINE_HAS_ESOP || !sclp.has_64bscao || + if (!sclp.has_sief2 || !machine_has_esop() || !sclp.has_64bscao || !test_facility(3) || !nested) return; allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2); @@ -637,7 +640,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) r = min_t(unsigned int, num_online_cpus(), r); break; case KVM_CAP_S390_COW: - r = MACHINE_HAS_ESOP; + r = machine_has_esop(); break; case KVM_CAP_S390_VECTOR_REGISTERS: r = test_facility(129); @@ -1019,7 +1022,7 @@ static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *att } mutex_unlock(&kvm->lock); VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit); - VM_EVENT(kvm, 3, "New guest asce: 0x%pK", + VM_EVENT(kvm, 3, "New guest asce: 0x%p", (void *) kvm->arch.gmap->asce); break; } @@ -2671,7 +2674,9 @@ static int kvm_s390_handle_pv(struct kvm *kvm, struct kvm_pv_cmd *cmd) if (r) break; - r = s390_disable_cow_sharing(); + mmap_write_lock(kvm->mm); + r = gmap_helper_disable_cow_sharing(); + mmap_write_unlock(kvm->mm); if (r) break; @@ -3395,7 +3400,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) /* we emulate STHYI in kvm */ set_kvm_facility(kvm->arch.model.fac_mask, 74); set_kvm_facility(kvm->arch.model.fac_list, 74); - if (MACHINE_HAS_TLB_GUEST) { + if (machine_has_tlb_guest()) { set_kvm_facility(kvm->arch.model.fac_mask, 147); set_kvm_facility(kvm->arch.model.fac_list, 147); } @@ -3428,8 +3433,20 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) VM_EVENT(kvm, 3, "vm created with type %lu", type); if (type & KVM_VM_S390_UCONTROL) { + struct kvm_userspace_memory_region2 fake_memslot = { + .slot = KVM_S390_UCONTROL_MEMSLOT, + .guest_phys_addr = 0, + .userspace_addr = 0, + .memory_size = ALIGN_DOWN(TASK_SIZE, _SEGMENT_SIZE), + .flags = 0, + }; + kvm->arch.gmap = NULL; kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT; + /* one flat fake memslot covering the whole address-space */ + mutex_lock(&kvm->slots_lock); + KVM_BUG_ON(kvm_set_internal_memslot(kvm, &fake_memslot), kvm); + mutex_unlock(&kvm->slots_lock); } else { if (sclp.hamax == U64_MAX) kvm->arch.mem_limit = TASK_SIZE_MAX; @@ -3451,7 +3468,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) kvm_s390_gisa_init(kvm); INIT_LIST_HEAD(&kvm->arch.pv.need_cleanup); kvm->arch.pv.set_aside = NULL; - KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid); + KVM_EVENT(3, "vm 0x%p created by pid %u", kvm, current->pid); return 0; out_err: @@ -3514,7 +3531,7 @@ void kvm_arch_destroy_vm(struct kvm *kvm) kvm_s390_destroy_adapters(kvm); kvm_s390_clear_float_irqs(kvm); kvm_s390_vsie_destroy(kvm); - KVM_EVENT(3, "vm 0x%pK destroyed", kvm); + KVM_EVENT(3, "vm 0x%p destroyed", kvm); } /* Section: vcpu related */ @@ -3635,7 +3652,7 @@ static int sca_switch_to_extended(struct kvm *kvm) free_page((unsigned long)old_sca); - VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)", + VM_EVENT(kvm, 2, "Switched to ESCA (0x%p -> 0x%p)", old_sca, kvm->arch.sca); return 0; } @@ -3879,8 +3896,8 @@ static int kvm_s390_vcpu_setup(struct kvm_vcpu *vcpu) kvm_s390_vcpu_setup_model(vcpu); - /* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */ - if (MACHINE_HAS_ESOP) + /* pgste_set_pte has special handling for !machine_has_esop() */ + if (machine_has_esop()) vcpu->arch.sie_block->ecb |= ECB_HOSTPROTINT; if (test_kvm_facility(vcpu->kvm, 9)) vcpu->arch.sie_block->ecb |= ECB_SRSI; @@ -3930,8 +3947,8 @@ static int kvm_s390_vcpu_setup(struct kvm_vcpu *vcpu) if (rc) return rc; } - hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); - vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup; + hrtimer_setup(&vcpu->arch.ckc_timer, kvm_s390_idle_wakeup, CLOCK_MONOTONIC, + HRTIMER_MODE_REL); vcpu->arch.sie_block->hpid = HPID_KVM; @@ -4012,7 +4029,7 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu) goto out_free_sie_block; } - VM_EVENT(vcpu->kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK", + VM_EVENT(vcpu->kvm, 3, "create cpu %d at 0x%p, sie block at 0x%p", vcpu->vcpu_id, vcpu, vcpu->arch.sie_block); trace_kvm_s390_create_vcpu(vcpu->vcpu_id, vcpu, vcpu->arch.sie_block); @@ -4498,6 +4515,75 @@ static bool ibs_enabled(struct kvm_vcpu *vcpu) return kvm_s390_test_cpuflags(vcpu, CPUSTAT_IBS); } +static int __kvm_s390_fixup_fault_sync(struct gmap *gmap, gpa_t gaddr, unsigned int flags) +{ + struct kvm *kvm = gmap->private; + gfn_t gfn = gpa_to_gfn(gaddr); + bool unlocked; + hva_t vmaddr; + gpa_t tmp; + int rc; + + if (kvm_is_ucontrol(kvm)) { + tmp = __gmap_translate(gmap, gaddr); + gfn = gpa_to_gfn(tmp); + } + + vmaddr = gfn_to_hva(kvm, gfn); + rc = fixup_user_fault(gmap->mm, vmaddr, FAULT_FLAG_WRITE, &unlocked); + if (!rc) + rc = __gmap_link(gmap, gaddr, vmaddr); + return rc; +} + +/** + * __kvm_s390_mprotect_many() - Apply specified protection to guest pages + * @gmap: the gmap of the guest + * @gpa: the starting guest address + * @npages: how many pages to protect + * @prot: indicates access rights: PROT_NONE, PROT_READ or PROT_WRITE + * @bits: pgste notification bits to set + * + * Returns: 0 in case of success, < 0 in case of error - see gmap_protect_one() + * + * Context: kvm->srcu and gmap->mm need to be held in read mode + */ +int __kvm_s390_mprotect_many(struct gmap *gmap, gpa_t gpa, u8 npages, unsigned int prot, + unsigned long bits) +{ + unsigned int fault_flag = (prot & PROT_WRITE) ? FAULT_FLAG_WRITE : 0; + gpa_t end = gpa + npages * PAGE_SIZE; + int rc; + + for (; gpa < end; gpa = ALIGN(gpa + 1, rc)) { + rc = gmap_protect_one(gmap, gpa, prot, bits); + if (rc == -EAGAIN) { + __kvm_s390_fixup_fault_sync(gmap, gpa, fault_flag); + rc = gmap_protect_one(gmap, gpa, prot, bits); + } + if (rc < 0) + return rc; + } + + return 0; +} + +static int kvm_s390_mprotect_notify_prefix(struct kvm_vcpu *vcpu) +{ + gpa_t gaddr = kvm_s390_get_prefix(vcpu); + int idx, rc; + + idx = srcu_read_lock(&vcpu->kvm->srcu); + mmap_read_lock(vcpu->arch.gmap->mm); + + rc = __kvm_s390_mprotect_many(vcpu->arch.gmap, gaddr, 2, PROT_WRITE, GMAP_NOTIFY_MPROT); + + mmap_read_unlock(vcpu->arch.gmap->mm); + srcu_read_unlock(&vcpu->kvm->srcu, idx); + + return rc; +} + static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu) { retry: @@ -4513,9 +4599,8 @@ retry: */ if (kvm_check_request(KVM_REQ_REFRESH_GUEST_PREFIX, vcpu)) { int rc; - rc = gmap_mprotect_notify(vcpu->arch.gmap, - kvm_s390_get_prefix(vcpu), - PAGE_SIZE * 2, PROT_WRITE); + + rc = kvm_s390_mprotect_notify_prefix(vcpu); if (rc) { kvm_make_request(KVM_REQ_REFRESH_GUEST_PREFIX, vcpu); return rc; @@ -4766,11 +4851,112 @@ static int vcpu_post_run_addressing_exception(struct kvm_vcpu *vcpu) return kvm_s390_inject_prog_irq(vcpu, &pgm_info); } +static void kvm_s390_assert_primary_as(struct kvm_vcpu *vcpu) +{ + KVM_BUG(current->thread.gmap_teid.as != PSW_BITS_AS_PRIMARY, vcpu->kvm, + "Unexpected program interrupt 0x%x, TEID 0x%016lx", + current->thread.gmap_int_code, current->thread.gmap_teid.val); +} + +/* + * __kvm_s390_handle_dat_fault() - handle a dat fault for the gmap of a vcpu + * @vcpu: the vCPU whose gmap is to be fixed up + * @gfn: the guest frame number used for memslots (including fake memslots) + * @gaddr: the gmap address, does not have to match @gfn for ucontrol gmaps + * @flags: FOLL_* flags + * + * Return: 0 on success, < 0 in case of error. + * Context: The mm lock must not be held before calling. May sleep. + */ +int __kvm_s390_handle_dat_fault(struct kvm_vcpu *vcpu, gfn_t gfn, gpa_t gaddr, unsigned int flags) +{ + struct kvm_memory_slot *slot; + unsigned int fault_flags; + bool writable, unlocked; + unsigned long vmaddr; + struct page *page; + kvm_pfn_t pfn; + int rc; + + slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn); + if (!slot || slot->flags & KVM_MEMSLOT_INVALID) + return vcpu_post_run_addressing_exception(vcpu); + + fault_flags = flags & FOLL_WRITE ? FAULT_FLAG_WRITE : 0; + if (vcpu->arch.gmap->pfault_enabled) + flags |= FOLL_NOWAIT; + vmaddr = __gfn_to_hva_memslot(slot, gfn); + +try_again: + pfn = __kvm_faultin_pfn(slot, gfn, flags, &writable, &page); + + /* Access outside memory, inject addressing exception */ + if (is_noslot_pfn(pfn)) + return vcpu_post_run_addressing_exception(vcpu); + /* Signal pending: try again */ + if (pfn == KVM_PFN_ERR_SIGPENDING) + return -EAGAIN; + + /* Needs I/O, try to setup async pfault (only possible with FOLL_NOWAIT) */ + if (pfn == KVM_PFN_ERR_NEEDS_IO) { + trace_kvm_s390_major_guest_pfault(vcpu); + if (kvm_arch_setup_async_pf(vcpu)) + return 0; + vcpu->stat.pfault_sync++; + /* Could not setup async pfault, try again synchronously */ + flags &= ~FOLL_NOWAIT; + goto try_again; + } + /* Any other error */ + if (is_error_pfn(pfn)) + return -EFAULT; + + /* Success */ + mmap_read_lock(vcpu->arch.gmap->mm); + /* Mark the userspace PTEs as young and/or dirty, to avoid page fault loops */ + rc = fixup_user_fault(vcpu->arch.gmap->mm, vmaddr, fault_flags, &unlocked); + if (!rc) + rc = __gmap_link(vcpu->arch.gmap, gaddr, vmaddr); + scoped_guard(spinlock, &vcpu->kvm->mmu_lock) { + kvm_release_faultin_page(vcpu->kvm, page, false, writable); + } + mmap_read_unlock(vcpu->arch.gmap->mm); + return rc; +} + +static int vcpu_dat_fault_handler(struct kvm_vcpu *vcpu, unsigned long gaddr, unsigned int flags) +{ + unsigned long gaddr_tmp; + gfn_t gfn; + + gfn = gpa_to_gfn(gaddr); + if (kvm_is_ucontrol(vcpu->kvm)) { + /* + * This translates the per-vCPU guest address into a + * fake guest address, which can then be used with the + * fake memslots that are identity mapping userspace. + * This allows ucontrol VMs to use the normal fault + * resolution path, like normal VMs. + */ + mmap_read_lock(vcpu->arch.gmap->mm); + gaddr_tmp = __gmap_translate(vcpu->arch.gmap, gaddr); + mmap_read_unlock(vcpu->arch.gmap->mm); + if (gaddr_tmp == -EFAULT) { + vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL; + vcpu->run->s390_ucontrol.trans_exc_code = gaddr; + vcpu->run->s390_ucontrol.pgm_code = PGM_SEGMENT_TRANSLATION; + return -EREMOTE; + } + gfn = gpa_to_gfn(gaddr_tmp); + } + return __kvm_s390_handle_dat_fault(vcpu, gfn, gaddr, flags); +} + static int vcpu_post_run_handle_fault(struct kvm_vcpu *vcpu) { unsigned int flags = 0; unsigned long gaddr; - int rc = 0; + int rc; gaddr = current->thread.gmap_teid.addr * PAGE_SIZE; if (kvm_s390_cur_gmap_fault_is_write()) @@ -4780,30 +4966,16 @@ static int vcpu_post_run_handle_fault(struct kvm_vcpu *vcpu) case 0: vcpu->stat.exit_null++; break; - case PGM_NON_SECURE_STORAGE_ACCESS: - KVM_BUG(current->thread.gmap_teid.as != PSW_BITS_AS_PRIMARY, vcpu->kvm, - "Unexpected program interrupt 0x%x, TEID 0x%016lx", - current->thread.gmap_int_code, current->thread.gmap_teid.val); - /* - * This is normal operation; a page belonging to a protected - * guest has not been imported yet. Try to import the page into - * the protected guest. - */ - if (gmap_convert_to_secure(vcpu->arch.gmap, gaddr) == -EINVAL) - send_sig(SIGSEGV, current, 0); - break; case PGM_SECURE_STORAGE_ACCESS: case PGM_SECURE_STORAGE_VIOLATION: - KVM_BUG(current->thread.gmap_teid.as != PSW_BITS_AS_PRIMARY, vcpu->kvm, - "Unexpected program interrupt 0x%x, TEID 0x%016lx", - current->thread.gmap_int_code, current->thread.gmap_teid.val); + kvm_s390_assert_primary_as(vcpu); /* * This can happen after a reboot with asynchronous teardown; * the new guest (normal or protected) will run on top of the * previous protected guest. The old pages need to be destroyed * so the new guest can use them. */ - if (gmap_destroy_page(vcpu->arch.gmap, gaddr)) { + if (kvm_s390_pv_destroy_page(vcpu->kvm, gaddr)) { /* * Either KVM messed up the secure guest mapping or the * same page is mapped into multiple secure guests. @@ -4818,6 +4990,20 @@ static int vcpu_post_run_handle_fault(struct kvm_vcpu *vcpu) send_sig(SIGSEGV, current, 0); } break; + case PGM_NON_SECURE_STORAGE_ACCESS: + kvm_s390_assert_primary_as(vcpu); + /* + * This is normal operation; a page belonging to a protected + * guest has not been imported yet. Try to import the page into + * the protected guest. + */ + rc = kvm_s390_pv_convert_to_secure(vcpu->kvm, gaddr); + if (rc == -EINVAL) + send_sig(SIGSEGV, current, 0); + if (rc != -ENXIO) + break; + flags = FAULT_FLAG_WRITE; + fallthrough; case PGM_PROTECTION: case PGM_SEGMENT_TRANSLATION: case PGM_PAGE_TRANSLATION: @@ -4825,40 +5011,15 @@ static int vcpu_post_run_handle_fault(struct kvm_vcpu *vcpu) case PGM_REGION_FIRST_TRANS: case PGM_REGION_SECOND_TRANS: case PGM_REGION_THIRD_TRANS: - KVM_BUG(current->thread.gmap_teid.as != PSW_BITS_AS_PRIMARY, vcpu->kvm, - "Unexpected program interrupt 0x%x, TEID 0x%016lx", - current->thread.gmap_int_code, current->thread.gmap_teid.val); - if (vcpu->arch.gmap->pfault_enabled) { - rc = gmap_fault(vcpu->arch.gmap, gaddr, flags | FAULT_FLAG_RETRY_NOWAIT); - if (rc == -EFAULT) - return vcpu_post_run_addressing_exception(vcpu); - if (rc == -EAGAIN) { - trace_kvm_s390_major_guest_pfault(vcpu); - if (kvm_arch_setup_async_pf(vcpu)) - return 0; - vcpu->stat.pfault_sync++; - } else { - return rc; - } - } - rc = gmap_fault(vcpu->arch.gmap, gaddr, flags); - if (rc == -EFAULT) { - if (kvm_is_ucontrol(vcpu->kvm)) { - vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL; - vcpu->run->s390_ucontrol.trans_exc_code = gaddr; - vcpu->run->s390_ucontrol.pgm_code = 0x10; - return -EREMOTE; - } - return vcpu_post_run_addressing_exception(vcpu); - } - break; + kvm_s390_assert_primary_as(vcpu); + return vcpu_dat_fault_handler(vcpu, gaddr, flags); default: KVM_BUG(1, vcpu->kvm, "Unexpected program interrupt 0x%x, TEID 0x%016lx", current->thread.gmap_int_code, current->thread.gmap_teid.val); send_sig(SIGSEGV, current, 0); break; } - return rc; + return 0; } static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason) @@ -5019,7 +5180,7 @@ static void sync_regs_fmt2(struct kvm_vcpu *vcpu) vcpu->arch.sie_block->fpf &= ~FPF_BPBC; vcpu->arch.sie_block->fpf |= kvm_run->s.regs.bpbc ? FPF_BPBC : 0; } - if (MACHINE_HAS_GS) { + if (cpu_has_gs()) { preempt_disable(); local_ctl_set_bit(2, CR2_GUARDED_STORAGE_BIT); if (current->thread.gs_cb) { @@ -5085,7 +5246,7 @@ static void store_regs_fmt2(struct kvm_vcpu *vcpu) kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea; kvm_run->s.regs.bpbc = (vcpu->arch.sie_block->fpf & FPF_BPBC) == FPF_BPBC; kvm_run->s.regs.diag318 = vcpu->arch.diag318_info.val; - if (MACHINE_HAS_GS) { + if (cpu_has_gs()) { preempt_disable(); local_ctl_set_bit(2, CR2_GUARDED_STORAGE_BIT); if (vcpu->arch.gs_enabled) @@ -5737,7 +5898,9 @@ long kvm_arch_vcpu_ioctl(struct file *filp, } #endif case KVM_S390_VCPU_FAULT: { - r = gmap_fault(vcpu->arch.gmap, arg, 0); + idx = srcu_read_lock(&vcpu->kvm->srcu); + r = vcpu_dat_fault_handler(vcpu, arg, 0); + srcu_read_unlock(&vcpu->kvm->srcu, idx); break; } case KVM_ENABLE_CAP: @@ -5853,7 +6016,7 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, { gpa_t size; - if (kvm_is_ucontrol(kvm)) + if (kvm_is_ucontrol(kvm) && new->id < KVM_USER_MEM_SLOTS) return -EINVAL; /* When we are protected, we should not change the memory slots */ @@ -5905,6 +6068,9 @@ void kvm_arch_commit_memory_region(struct kvm *kvm, { int rc = 0; + if (kvm_is_ucontrol(kvm)) + return; + switch (change) { case KVM_MR_DELETE: rc = gmap_unmap_segment(kvm->arch.gmap, old->base_gfn * PAGE_SIZE, diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h index 597d7a71deeb..c44fe0c3a097 100644 --- a/arch/s390/kvm/kvm-s390.h +++ b/arch/s390/kvm/kvm-s390.h @@ -20,6 +20,8 @@ #include <asm/processor.h> #include <asm/sclp.h> +#define KVM_S390_UCONTROL_MEMSLOT (KVM_USER_MEM_SLOTS + 0) + static inline void kvm_s390_fpu_store(struct kvm_run *run) { fpu_stfpc(&run->s.regs.fpc); @@ -279,6 +281,15 @@ static inline u32 kvm_s390_get_gisa_desc(struct kvm *kvm) return gd; } +static inline hva_t gpa_to_hva(struct kvm *kvm, gpa_t gpa) +{ + hva_t hva = gfn_to_hva(kvm, gpa_to_gfn(gpa)); + + if (!kvm_is_error_hva(hva)) + hva |= offset_in_page(gpa); + return hva; +} + /* implemented in pv.c */ int kvm_s390_pv_destroy_cpu(struct kvm_vcpu *vcpu, u16 *rc, u16 *rrc); int kvm_s390_pv_create_cpu(struct kvm_vcpu *vcpu, u16 *rc, u16 *rrc); @@ -297,6 +308,9 @@ int kvm_s390_pv_dump_stor_state(struct kvm *kvm, void __user *buff_user, u64 *gaddr, u64 buff_user_len, u16 *rc, u16 *rrc); int kvm_s390_pv_dump_complete(struct kvm *kvm, void __user *buff_user, u16 *rc, u16 *rrc); +int kvm_s390_pv_destroy_page(struct kvm *kvm, unsigned long gaddr); +int kvm_s390_pv_convert_to_secure(struct kvm *kvm, unsigned long gaddr); +int kvm_s390_pv_make_secure(struct kvm *kvm, unsigned long gaddr, void *uvcb); static inline u64 kvm_s390_pv_get_handle(struct kvm *kvm) { @@ -308,6 +322,41 @@ static inline u64 kvm_s390_pv_cpu_get_handle(struct kvm_vcpu *vcpu) return vcpu->arch.pv.handle; } +/** + * __kvm_s390_pv_destroy_page() - Destroy a guest page. + * @page: the page to destroy + * + * An attempt will be made to destroy the given guest page. If the attempt + * fails, an attempt is made to export the page. If both attempts fail, an + * appropriate error is returned. + * + * Context: must be called holding the mm lock for gmap->mm + */ +static inline int __kvm_s390_pv_destroy_page(struct page *page) +{ + struct folio *folio = page_folio(page); + int rc; + + /* Large folios cannot be secure. Small folio implies FW_LEVEL_PTE. */ + if (folio_test_large(folio)) + return -EFAULT; + + rc = uv_destroy_folio(folio); + /* + * Fault handlers can race; it is possible that two CPUs will fault + * on the same secure page. One CPU can destroy the page, reboot, + * re-enter secure mode and import it, while the second CPU was + * stuck at the beginning of the handler. At some point the second + * CPU will be able to progress, and it will not be able to destroy + * the page. In that case we do not want to terminate the process, + * we instead try to export the page. + */ + if (rc) + rc = uv_convert_from_secure_folio(folio); + + return rc; +} + /* implemented in interrupt.c */ int kvm_s390_handle_wait(struct kvm_vcpu *vcpu); void kvm_s390_vcpu_wakeup(struct kvm_vcpu *vcpu); @@ -387,6 +436,10 @@ void kvm_s390_vsie_gmap_notifier(struct gmap *gmap, unsigned long start, unsigned long end); void kvm_s390_vsie_init(struct kvm *kvm); void kvm_s390_vsie_destroy(struct kvm *kvm); +int gmap_shadow_valid(struct gmap *sg, unsigned long asce, int edat_level); + +/* implemented in gmap-vsie.c */ +struct gmap *gmap_shadow(struct gmap *parent, unsigned long asce, int edat_level); /* implemented in sigp.c */ int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu); @@ -408,6 +461,14 @@ void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu); void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm); __u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu); int kvm_s390_cpus_from_pv(struct kvm *kvm, u16 *rc, u16 *rrc); +int __kvm_s390_handle_dat_fault(struct kvm_vcpu *vcpu, gfn_t gfn, gpa_t gaddr, unsigned int flags); +int __kvm_s390_mprotect_many(struct gmap *gmap, gpa_t gpa, u8 npages, unsigned int prot, + unsigned long bits); + +static inline int kvm_s390_handle_dat_fault(struct kvm_vcpu *vcpu, gpa_t gaddr, unsigned int flags) +{ + return __kvm_s390_handle_dat_fault(vcpu, gpa_to_gfn(gaddr), gaddr, flags); +} /* implemented in diag.c */ int kvm_s390_handle_diag(struct kvm_vcpu *vcpu); diff --git a/arch/s390/kvm/pci.c b/arch/s390/kvm/pci.c index 9b9e7fdd5380..8c40154ff50f 100644 --- a/arch/s390/kvm/pci.c +++ b/arch/s390/kvm/pci.c @@ -433,7 +433,6 @@ static void kvm_s390_pci_dev_release(struct zpci_dev *zdev) static int kvm_s390_pci_register_kvm(void *opaque, struct kvm *kvm) { struct zpci_dev *zdev = opaque; - u8 status; int rc; if (!zdev) @@ -480,13 +479,7 @@ static int kvm_s390_pci_register_kvm(void *opaque, struct kvm *kvm) */ zdev->gisa = (u32)virt_to_phys(&kvm->arch.sie_page2->gisa); - rc = zpci_enable_device(zdev); - if (rc) - goto clear_gisa; - - /* Re-register the IOMMU that was already created */ - rc = zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma, - virt_to_phys(zdev->dma_table), &status); + rc = zpci_reenable_device(zdev); if (rc) goto clear_gisa; @@ -516,7 +509,6 @@ static void kvm_s390_pci_unregister_kvm(void *opaque) { struct zpci_dev *zdev = opaque; struct kvm *kvm; - u8 status; if (!zdev) return; @@ -550,12 +542,7 @@ static void kvm_s390_pci_unregister_kvm(void *opaque) goto out; } - if (zpci_enable_device(zdev)) - goto out; - - /* Re-register the IOMMU that was already created */ - zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma, - virt_to_phys(zdev->dma_table), &status); + zpci_reenable_device(zdev); out: spin_lock(&kvm->arch.kzdev_list_lock); diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c index 1a49b89706f8..9253c70897a8 100644 --- a/arch/s390/kvm/priv.c +++ b/arch/s390/kvm/priv.c @@ -1248,6 +1248,8 @@ static inline int __do_essa(struct kvm_vcpu *vcpu, const int orc) static int handle_essa(struct kvm_vcpu *vcpu) { + lockdep_assert_held(&vcpu->kvm->srcu); + /* entries expected to be 1FF */ int entries = (vcpu->arch.sie_block->cbrlo & ~PAGE_MASK) >> 3; unsigned long *cbrlo; @@ -1297,12 +1299,8 @@ static int handle_essa(struct kvm_vcpu *vcpu) /* Retry the ESSA instruction */ kvm_s390_retry_instr(vcpu); } else { - int srcu_idx; - mmap_read_lock(vcpu->kvm->mm); - srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); i = __do_essa(vcpu, orc); - srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx); mmap_read_unlock(vcpu->kvm->mm); if (i < 0) return i; diff --git a/arch/s390/kvm/pv.c b/arch/s390/kvm/pv.c index 75e81ba26d04..14c330ec8ceb 100644 --- a/arch/s390/kvm/pv.c +++ b/arch/s390/kvm/pv.c @@ -33,6 +33,64 @@ bool kvm_s390_pv_cpu_is_protected(struct kvm_vcpu *vcpu) EXPORT_SYMBOL_GPL(kvm_s390_pv_cpu_is_protected); /** + * kvm_s390_pv_make_secure() - make one guest page secure + * @kvm: the guest + * @gaddr: the guest address that needs to be made secure + * @uvcb: the UVCB specifying which operation needs to be performed + * + * Context: needs to be called with kvm->srcu held. + * Return: 0 on success, < 0 in case of error. + */ +int kvm_s390_pv_make_secure(struct kvm *kvm, unsigned long gaddr, void *uvcb) +{ + unsigned long vmaddr; + + lockdep_assert_held(&kvm->srcu); + + vmaddr = gfn_to_hva(kvm, gpa_to_gfn(gaddr)); + if (kvm_is_error_hva(vmaddr)) + return -EFAULT; + return make_hva_secure(kvm->mm, vmaddr, uvcb); +} + +int kvm_s390_pv_convert_to_secure(struct kvm *kvm, unsigned long gaddr) +{ + struct uv_cb_cts uvcb = { + .header.cmd = UVC_CMD_CONV_TO_SEC_STOR, + .header.len = sizeof(uvcb), + .guest_handle = kvm_s390_pv_get_handle(kvm), + .gaddr = gaddr, + }; + + return kvm_s390_pv_make_secure(kvm, gaddr, &uvcb); +} + +/** + * kvm_s390_pv_destroy_page() - Destroy a guest page. + * @kvm: the guest + * @gaddr: the guest address to destroy + * + * An attempt will be made to destroy the given guest page. If the attempt + * fails, an attempt is made to export the page. If both attempts fail, an + * appropriate error is returned. + * + * Context: may sleep. + */ +int kvm_s390_pv_destroy_page(struct kvm *kvm, unsigned long gaddr) +{ + struct page *page; + int rc = 0; + + mmap_read_lock(kvm->mm); + page = gfn_to_page(kvm, gpa_to_gfn(gaddr)); + if (page) + rc = __kvm_s390_pv_destroy_page(page); + kvm_release_page_clean(page); + mmap_read_unlock(kvm->mm); + return rc; +} + +/** * struct pv_vm_to_be_destroyed - Represents a protected VM that needs to * be destroyed * @@ -637,11 +695,29 @@ static int unpack_one(struct kvm *kvm, unsigned long addr, u64 tweak, .tweak[0] = tweak, .tweak[1] = offset, }; - int ret = gmap_make_secure(kvm->arch.gmap, addr, &uvcb); + int ret = kvm_s390_pv_make_secure(kvm, addr, &uvcb); + unsigned long vmaddr; + bool unlocked; *rc = uvcb.header.rc; *rrc = uvcb.header.rrc; + if (ret == -ENXIO) { + mmap_read_lock(kvm->mm); + vmaddr = gfn_to_hva(kvm, gpa_to_gfn(addr)); + if (kvm_is_error_hva(vmaddr)) { + ret = -EFAULT; + } else { + ret = fixup_user_fault(kvm->mm, vmaddr, FAULT_FLAG_WRITE, &unlocked); + if (!ret) + ret = __gmap_link(kvm->arch.gmap, addr, vmaddr); + } + mmap_read_unlock(kvm->mm); + if (!ret) + return -EAGAIN; + return ret; + } + if (ret && ret != -EAGAIN) KVM_UV_EVENT(kvm, 3, "PROTVIRT VM UNPACK: failed addr %llx with rc %x rrc %x", uvcb.gaddr, *rc, *rrc); @@ -660,6 +736,8 @@ int kvm_s390_pv_unpack(struct kvm *kvm, unsigned long addr, unsigned long size, KVM_UV_EVENT(kvm, 3, "PROTVIRT VM UNPACK: start addr %lx size %lx", addr, size); + guard(srcu)(&kvm->srcu); + while (offset < size) { ret = unpack_one(kvm, addr, tweak, offset, rc, rrc); if (ret == -EAGAIN) { diff --git a/arch/s390/kvm/trace-s390.h b/arch/s390/kvm/trace-s390.h index 9ac92dbf680d..9e28f165c114 100644 --- a/arch/s390/kvm/trace-s390.h +++ b/arch/s390/kvm/trace-s390.h @@ -56,7 +56,7 @@ TRACE_EVENT(kvm_s390_create_vcpu, __entry->sie_block = sie_block; ), - TP_printk("create cpu %d at 0x%pK, sie block at 0x%pK", + TP_printk("create cpu %d at 0x%p, sie block at 0x%p", __entry->id, __entry->vcpu, __entry->sie_block) ); @@ -255,7 +255,7 @@ TRACE_EVENT(kvm_s390_enable_css, __entry->kvm = kvm; ), - TP_printk("enabling channel I/O support (kvm @ %pK)\n", + TP_printk("enabling channel I/O support (kvm @ %p)\n", __entry->kvm) ); diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c index a687695d8f68..13a9661d2b28 100644 --- a/arch/s390/kvm/vsie.c +++ b/arch/s390/kvm/vsie.c @@ -13,6 +13,7 @@ #include <linux/bitmap.h> #include <linux/sched/signal.h> #include <linux/io.h> +#include <linux/mman.h> #include <asm/gmap.h> #include <asm/mmu_context.h> @@ -23,6 +24,10 @@ #include "kvm-s390.h" #include "gaccess.h" +enum vsie_page_flags { + VSIE_PAGE_IN_USE = 0, +}; + struct vsie_page { struct kvm_s390_sie_block scb_s; /* 0x0000 */ /* @@ -46,11 +51,40 @@ struct vsie_page { gpa_t gvrd_gpa; /* 0x0240 */ gpa_t riccbd_gpa; /* 0x0248 */ gpa_t sdnx_gpa; /* 0x0250 */ - __u8 reserved[0x0700 - 0x0258]; /* 0x0258 */ + /* + * guest address of the original SCB. Remains set for free vsie + * pages, so we can properly look them up in our addr_to_page + * radix tree. + */ + gpa_t scb_gpa; /* 0x0258 */ + /* + * Flags: must be set/cleared atomically after the vsie page can be + * looked up by other CPUs. + */ + unsigned long flags; /* 0x0260 */ + __u8 reserved[0x0700 - 0x0268]; /* 0x0268 */ struct kvm_s390_crypto_cb crycb; /* 0x0700 */ __u8 fac[S390_ARCH_FAC_LIST_SIZE_BYTE]; /* 0x0800 */ }; +/** + * gmap_shadow_valid() - check if a shadow guest address space matches the + * given properties and is still valid + * @sg: pointer to the shadow guest address space structure + * @asce: ASCE for which the shadow table is requested + * @edat_level: edat level to be used for the shadow translation + * + * Returns 1 if the gmap shadow is still valid and matches the given + * properties, the caller can continue using it. Returns 0 otherwise; the + * caller has to request a new shadow gmap in this case. + */ +int gmap_shadow_valid(struct gmap *sg, unsigned long asce, int edat_level) +{ + if (sg->removed) + return 0; + return sg->orig_asce == asce && sg->edat_level == edat_level; +} + /* trigger a validity icpt for the given scb */ static int set_validity_icpt(struct kvm_s390_sie_block *scb, __u16 reason_code) @@ -584,7 +618,6 @@ void kvm_s390_vsie_gmap_notifier(struct gmap *gmap, unsigned long start, struct kvm *kvm = gmap->private; struct vsie_page *cur; unsigned long prefix; - struct page *page; int i; if (!gmap_is_shadow(gmap)) @@ -594,10 +627,9 @@ void kvm_s390_vsie_gmap_notifier(struct gmap *gmap, unsigned long start, * therefore we can safely reference them all the time. */ for (i = 0; i < kvm->arch.vsie.page_count; i++) { - page = READ_ONCE(kvm->arch.vsie.pages[i]); - if (!page) + cur = READ_ONCE(kvm->arch.vsie.pages[i]); + if (!cur) continue; - cur = page_to_virt(page); if (READ_ONCE(cur->gmap) != gmap) continue; prefix = cur->scb_s.prefix << GUEST_PREFIX_SHIFT; @@ -1345,6 +1377,20 @@ static int vsie_run(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) return rc; } +/* Try getting a given vsie page, returning "true" on success. */ +static inline bool try_get_vsie_page(struct vsie_page *vsie_page) +{ + if (test_bit(VSIE_PAGE_IN_USE, &vsie_page->flags)) + return false; + return !test_and_set_bit(VSIE_PAGE_IN_USE, &vsie_page->flags); +} + +/* Put a vsie page acquired through get_vsie_page / try_get_vsie_page. */ +static void put_vsie_page(struct vsie_page *vsie_page) +{ + clear_bit(VSIE_PAGE_IN_USE, &vsie_page->flags); +} + /* * Get or create a vsie page for a scb address. * @@ -1355,16 +1401,21 @@ static int vsie_run(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) static struct vsie_page *get_vsie_page(struct kvm *kvm, unsigned long addr) { struct vsie_page *vsie_page; - struct page *page; int nr_vcpus; rcu_read_lock(); - page = radix_tree_lookup(&kvm->arch.vsie.addr_to_page, addr >> 9); + vsie_page = radix_tree_lookup(&kvm->arch.vsie.addr_to_page, addr >> 9); rcu_read_unlock(); - if (page) { - if (page_ref_inc_return(page) == 2) - return page_to_virt(page); - page_ref_dec(page); + if (vsie_page) { + if (try_get_vsie_page(vsie_page)) { + if (vsie_page->scb_gpa == addr) + return vsie_page; + /* + * We raced with someone reusing + putting this vsie + * page before we grabbed it. + */ + put_vsie_page(vsie_page); + } } /* @@ -1375,36 +1426,40 @@ static struct vsie_page *get_vsie_page(struct kvm *kvm, unsigned long addr) mutex_lock(&kvm->arch.vsie.mutex); if (kvm->arch.vsie.page_count < nr_vcpus) { - page = alloc_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO | GFP_DMA); - if (!page) { + vsie_page = (void *)__get_free_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO | GFP_DMA); + if (!vsie_page) { mutex_unlock(&kvm->arch.vsie.mutex); return ERR_PTR(-ENOMEM); } - page_ref_inc(page); - kvm->arch.vsie.pages[kvm->arch.vsie.page_count] = page; + __set_bit(VSIE_PAGE_IN_USE, &vsie_page->flags); + kvm->arch.vsie.pages[kvm->arch.vsie.page_count] = vsie_page; kvm->arch.vsie.page_count++; } else { /* reuse an existing entry that belongs to nobody */ while (true) { - page = kvm->arch.vsie.pages[kvm->arch.vsie.next]; - if (page_ref_inc_return(page) == 2) + vsie_page = kvm->arch.vsie.pages[kvm->arch.vsie.next]; + if (try_get_vsie_page(vsie_page)) break; - page_ref_dec(page); kvm->arch.vsie.next++; kvm->arch.vsie.next %= nr_vcpus; } - radix_tree_delete(&kvm->arch.vsie.addr_to_page, page->index >> 9); + if (vsie_page->scb_gpa != ULONG_MAX) + radix_tree_delete(&kvm->arch.vsie.addr_to_page, + vsie_page->scb_gpa >> 9); } - page->index = addr; - /* double use of the same address */ - if (radix_tree_insert(&kvm->arch.vsie.addr_to_page, addr >> 9, page)) { - page_ref_dec(page); + /* Mark it as invalid until it resides in the tree. */ + vsie_page->scb_gpa = ULONG_MAX; + + /* Double use of the same address or allocation failure. */ + if (radix_tree_insert(&kvm->arch.vsie.addr_to_page, addr >> 9, + vsie_page)) { + put_vsie_page(vsie_page); mutex_unlock(&kvm->arch.vsie.mutex); return NULL; } + vsie_page->scb_gpa = addr; mutex_unlock(&kvm->arch.vsie.mutex); - vsie_page = page_to_virt(page); memset(&vsie_page->scb_s, 0, sizeof(struct kvm_s390_sie_block)); release_gmap_shadow(vsie_page); vsie_page->fault_addr = 0; @@ -1412,14 +1467,6 @@ static struct vsie_page *get_vsie_page(struct kvm *kvm, unsigned long addr) return vsie_page; } -/* put a vsie page acquired via get_vsie_page */ -static void put_vsie_page(struct kvm *kvm, struct vsie_page *vsie_page) -{ - struct page *page = pfn_to_page(__pa(vsie_page) >> PAGE_SHIFT); - - page_ref_dec(page); -} - int kvm_s390_handle_vsie(struct kvm_vcpu *vcpu) { struct vsie_page *vsie_page; @@ -1470,7 +1517,7 @@ out_unshadow: out_unpin_scb: unpin_scb(vcpu, vsie_page, scb_addr); out_put: - put_vsie_page(vcpu->kvm, vsie_page); + put_vsie_page(vsie_page); return rc < 0 ? rc : 0; } @@ -1486,18 +1533,18 @@ void kvm_s390_vsie_init(struct kvm *kvm) void kvm_s390_vsie_destroy(struct kvm *kvm) { struct vsie_page *vsie_page; - struct page *page; int i; mutex_lock(&kvm->arch.vsie.mutex); for (i = 0; i < kvm->arch.vsie.page_count; i++) { - page = kvm->arch.vsie.pages[i]; + vsie_page = kvm->arch.vsie.pages[i]; kvm->arch.vsie.pages[i] = NULL; - vsie_page = page_to_virt(page); release_gmap_shadow(vsie_page); /* free the radix tree entry */ - radix_tree_delete(&kvm->arch.vsie.addr_to_page, page->index >> 9); - __free_page(page); + if (vsie_page->scb_gpa != ULONG_MAX) + radix_tree_delete(&kvm->arch.vsie.addr_to_page, + vsie_page->scb_gpa >> 9); + free_page((unsigned long)vsie_page); } kvm->arch.vsie.page_count = 0; mutex_unlock(&kvm->arch.vsie.mutex); diff --git a/arch/s390/lib/Makefile b/arch/s390/lib/Makefile index f43f897d3fc0..cd35cdbfa871 100644 --- a/arch/s390/lib/Makefile +++ b/arch/s390/lib/Makefile @@ -3,6 +3,7 @@ # Makefile for s390-specific library files.. # +obj-y += crypto/ lib-y += delay.o string.o uaccess.o find.o spinlock.o tishift.o lib-y += csum-partial.o obj-y += mem.o xor.o @@ -24,3 +25,6 @@ obj-$(CONFIG_S390_MODULES_SANITY_TEST_HELPERS) += test_modules_helpers.o lib-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o obj-$(CONFIG_EXPOLINE_EXTERN) += expoline.o + +obj-$(CONFIG_CRC32_ARCH) += crc32-s390.o +crc32-s390-y := crc32.o crc32le-vx.o crc32be-vx.o diff --git a/arch/s390/crypto/crc32-vx.h b/arch/s390/lib/crc32-vx.h index 652c96e1a822..652c96e1a822 100644 --- a/arch/s390/crypto/crc32-vx.h +++ b/arch/s390/lib/crc32-vx.h diff --git a/arch/s390/lib/crc32.c b/arch/s390/lib/crc32.c new file mode 100644 index 000000000000..3c4b344417c1 --- /dev/null +++ b/arch/s390/lib/crc32.c @@ -0,0 +1,77 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * CRC-32 implemented with the z/Architecture Vector Extension Facility. + * + * Copyright IBM Corp. 2015 + * Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com> + */ +#define KMSG_COMPONENT "crc32-vx" +#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt + +#include <linux/module.h> +#include <linux/cpufeature.h> +#include <linux/crc32.h> +#include <asm/fpu.h> +#include "crc32-vx.h" + +#define VX_MIN_LEN 64 +#define VX_ALIGNMENT 16L +#define VX_ALIGN_MASK (VX_ALIGNMENT - 1) + +/* + * DEFINE_CRC32_VX() - Define a CRC-32 function using the vector extension + * + * Creates a function to perform a particular CRC-32 computation. Depending + * on the message buffer, the hardware-accelerated or software implementation + * is used. Note that the message buffer is aligned to improve fetch + * operations of VECTOR LOAD MULTIPLE instructions. + */ +#define DEFINE_CRC32_VX(___fname, ___crc32_vx, ___crc32_sw) \ + u32 ___fname(u32 crc, const u8 *data, size_t datalen) \ + { \ + unsigned long prealign, aligned, remaining; \ + DECLARE_KERNEL_FPU_ONSTACK16(vxstate); \ + \ + if (datalen < VX_MIN_LEN + VX_ALIGN_MASK || !cpu_has_vx()) \ + return ___crc32_sw(crc, data, datalen); \ + \ + if ((unsigned long)data & VX_ALIGN_MASK) { \ + prealign = VX_ALIGNMENT - \ + ((unsigned long)data & VX_ALIGN_MASK); \ + datalen -= prealign; \ + crc = ___crc32_sw(crc, data, prealign); \ + data = (void *)((unsigned long)data + prealign); \ + } \ + \ + aligned = datalen & ~VX_ALIGN_MASK; \ + remaining = datalen & VX_ALIGN_MASK; \ + \ + kernel_fpu_begin(&vxstate, KERNEL_VXR_LOW); \ + crc = ___crc32_vx(crc, data, aligned); \ + kernel_fpu_end(&vxstate, KERNEL_VXR_LOW); \ + \ + if (remaining) \ + crc = ___crc32_sw(crc, data + aligned, remaining); \ + \ + return crc; \ + } \ + EXPORT_SYMBOL(___fname); + +DEFINE_CRC32_VX(crc32_le_arch, crc32_le_vgfm_16, crc32_le_base) +DEFINE_CRC32_VX(crc32_be_arch, crc32_be_vgfm_16, crc32_be_base) +DEFINE_CRC32_VX(crc32c_arch, crc32c_le_vgfm_16, crc32c_base) + +u32 crc32_optimizations(void) +{ + if (cpu_has_vx()) { + return CRC32_LE_OPTIMIZATION | + CRC32_BE_OPTIMIZATION | + CRC32C_OPTIMIZATION; + } + return 0; +} +EXPORT_SYMBOL(crc32_optimizations); + +MODULE_AUTHOR("Hendrik Brueckner <brueckner@linux.vnet.ibm.com>"); +MODULE_DESCRIPTION("CRC-32 algorithms using z/Architecture Vector Extension Facility"); +MODULE_LICENSE("GPL"); diff --git a/arch/s390/crypto/crc32be-vx.c b/arch/s390/lib/crc32be-vx.c index fed7c9c70d05..fed7c9c70d05 100644 --- a/arch/s390/crypto/crc32be-vx.c +++ b/arch/s390/lib/crc32be-vx.c diff --git a/arch/s390/crypto/crc32le-vx.c b/arch/s390/lib/crc32le-vx.c index 2f629f394df7..2f629f394df7 100644 --- a/arch/s390/crypto/crc32le-vx.c +++ b/arch/s390/lib/crc32le-vx.c diff --git a/arch/s390/lib/crypto/Kconfig b/arch/s390/lib/crypto/Kconfig new file mode 100644 index 000000000000..e3f855ef4393 --- /dev/null +++ b/arch/s390/lib/crypto/Kconfig @@ -0,0 +1,13 @@ +# SPDX-License-Identifier: GPL-2.0-only + +config CRYPTO_CHACHA_S390 + tristate + default CRYPTO_LIB_CHACHA + select CRYPTO_LIB_CHACHA_GENERIC + select CRYPTO_ARCH_HAVE_LIB_CHACHA + +config CRYPTO_SHA256_S390 + tristate + default CRYPTO_LIB_SHA256 + select CRYPTO_ARCH_HAVE_LIB_SHA256 + select CRYPTO_LIB_SHA256_GENERIC diff --git a/arch/s390/lib/crypto/Makefile b/arch/s390/lib/crypto/Makefile new file mode 100644 index 000000000000..5df30f1e7930 --- /dev/null +++ b/arch/s390/lib/crypto/Makefile @@ -0,0 +1,7 @@ +# SPDX-License-Identifier: GPL-2.0-only + +obj-$(CONFIG_CRYPTO_CHACHA_S390) += chacha_s390.o +chacha_s390-y := chacha-glue.o chacha-s390.o + +obj-$(CONFIG_CRYPTO_SHA256_S390) += sha256-s390.o +sha256-s390-y := sha256.o diff --git a/arch/s390/lib/crypto/chacha-glue.c b/arch/s390/lib/crypto/chacha-glue.c new file mode 100644 index 000000000000..f95ba3483bbc --- /dev/null +++ b/arch/s390/lib/crypto/chacha-glue.c @@ -0,0 +1,56 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * ChaCha stream cipher (s390 optimized) + * + * Copyright IBM Corp. 2021 + */ + +#define KMSG_COMPONENT "chacha_s390" +#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt + +#include <crypto/chacha.h> +#include <linux/cpufeature.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/sizes.h> +#include <asm/fpu.h> +#include "chacha-s390.h" + +void hchacha_block_arch(const struct chacha_state *state, + u32 out[HCHACHA_OUT_WORDS], int nrounds) +{ + /* TODO: implement hchacha_block_arch() in assembly */ + hchacha_block_generic(state, out, nrounds); +} +EXPORT_SYMBOL(hchacha_block_arch); + +void chacha_crypt_arch(struct chacha_state *state, u8 *dst, const u8 *src, + unsigned int bytes, int nrounds) +{ + /* s390 chacha20 implementation has 20 rounds hard-coded, + * it cannot handle a block of data or less, but otherwise + * it can handle data of arbitrary size + */ + if (bytes <= CHACHA_BLOCK_SIZE || nrounds != 20 || !cpu_has_vx()) { + chacha_crypt_generic(state, dst, src, bytes, nrounds); + } else { + DECLARE_KERNEL_FPU_ONSTACK32(vxstate); + + kernel_fpu_begin(&vxstate, KERNEL_VXR); + chacha20_vx(dst, src, bytes, &state->x[4], &state->x[12]); + kernel_fpu_end(&vxstate, KERNEL_VXR); + + state->x[12] += round_up(bytes, CHACHA_BLOCK_SIZE) / + CHACHA_BLOCK_SIZE; + } +} +EXPORT_SYMBOL(chacha_crypt_arch); + +bool chacha_is_arch_optimized(void) +{ + return cpu_has_vx(); +} +EXPORT_SYMBOL(chacha_is_arch_optimized); + +MODULE_DESCRIPTION("ChaCha stream cipher (s390 optimized)"); +MODULE_LICENSE("GPL v2"); diff --git a/arch/s390/crypto/chacha-s390.S b/arch/s390/lib/crypto/chacha-s390.S index 63f3102678c0..63f3102678c0 100644 --- a/arch/s390/crypto/chacha-s390.S +++ b/arch/s390/lib/crypto/chacha-s390.S diff --git a/arch/s390/crypto/chacha-s390.h b/arch/s390/lib/crypto/chacha-s390.h index 733744ce30f5..733744ce30f5 100644 --- a/arch/s390/crypto/chacha-s390.h +++ b/arch/s390/lib/crypto/chacha-s390.h diff --git a/arch/s390/lib/crypto/sha256.c b/arch/s390/lib/crypto/sha256.c new file mode 100644 index 000000000000..7dfe120fafab --- /dev/null +++ b/arch/s390/lib/crypto/sha256.c @@ -0,0 +1,47 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * SHA-256 optimized using the CP Assist for Cryptographic Functions (CPACF) + * + * Copyright 2025 Google LLC + */ +#include <asm/cpacf.h> +#include <crypto/internal/sha2.h> +#include <linux/cpufeature.h> +#include <linux/kernel.h> +#include <linux/module.h> + +static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_cpacf_sha256); + +void sha256_blocks_arch(u32 state[SHA256_STATE_WORDS], + const u8 *data, size_t nblocks) +{ + if (static_branch_likely(&have_cpacf_sha256)) + cpacf_kimd(CPACF_KIMD_SHA_256, state, data, + nblocks * SHA256_BLOCK_SIZE); + else + sha256_blocks_generic(state, data, nblocks); +} +EXPORT_SYMBOL_GPL(sha256_blocks_arch); + +bool sha256_is_arch_optimized(void) +{ + return static_key_enabled(&have_cpacf_sha256); +} +EXPORT_SYMBOL_GPL(sha256_is_arch_optimized); + +static int __init sha256_s390_mod_init(void) +{ + if (cpu_have_feature(S390_CPU_FEATURE_MSA) && + cpacf_query_func(CPACF_KIMD, CPACF_KIMD_SHA_256)) + static_branch_enable(&have_cpacf_sha256); + return 0; +} +subsys_initcall(sha256_s390_mod_init); + +static void __exit sha256_s390_mod_exit(void) +{ +} +module_exit(sha256_s390_mod_exit); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("SHA-256 using the CP Assist for Cryptographic Functions (CPACF)"); diff --git a/arch/s390/lib/mem.S b/arch/s390/lib/mem.S index 08f60a42b9a6..d026debf250c 100644 --- a/arch/s390/lib/mem.S +++ b/arch/s390/lib/mem.S @@ -34,8 +34,7 @@ SYM_FUNC_START(__memmove) la %r3,256(%r3) brctg %r0,.Lmemmove_forward_loop .Lmemmove_forward_remainder: - larl %r5,.Lmemmove_mvc - ex %r4,0(%r5) + exrl %r4,.Lmemmove_mvc .Lmemmove_exit: BR_EX %r14 .Lmemmove_reverse: @@ -83,8 +82,7 @@ SYM_FUNC_START(__memset) la %r1,256(%r1) brctg %r3,.Lmemset_clear_loop .Lmemset_clear_remainder: - larl %r3,.Lmemset_xc - ex %r4,0(%r3) + exrl %r4,.Lmemset_xc .Lmemset_exit: BR_EX %r14 .Lmemset_fill: @@ -102,8 +100,7 @@ SYM_FUNC_START(__memset) brctg %r5,.Lmemset_fill_loop .Lmemset_fill_remainder: stc %r3,0(%r1) - larl %r5,.Lmemset_mvc - ex %r4,0(%r5) + exrl %r4,.Lmemset_mvc BR_EX %r14 .Lmemset_fill_exit: stc %r3,0(%r1) @@ -132,8 +129,7 @@ SYM_FUNC_START(__memcpy) lgr %r1,%r2 jnz .Lmemcpy_loop .Lmemcpy_remainder: - larl %r5,.Lmemcpy_mvc - ex %r4,0(%r5) + exrl %r4,.Lmemcpy_mvc .Lmemcpy_exit: BR_EX %r14 .Lmemcpy_loop: @@ -175,8 +171,7 @@ SYM_FUNC_START(__memset\bits) brctg %r5,.L__memset_loop\bits .L__memset_remainder\bits: \insn %r3,0(%r1) - larl %r5,.L__memset_mvc\bits - ex %r4,0(%r5) + exrl %r4,.L__memset_mvc\bits BR_EX %r14 .L__memset_store\bits: \insn %r3,0(%r2) diff --git a/arch/s390/lib/spinlock.c b/arch/s390/lib/spinlock.c index a81a01c44927..ad9da4038511 100644 --- a/arch/s390/lib/spinlock.c +++ b/arch/s390/lib/spinlock.c @@ -10,11 +10,13 @@ #include <linux/export.h> #include <linux/spinlock.h> #include <linux/jiffies.h> +#include <linux/sysctl.h> #include <linux/init.h> #include <linux/smp.h> #include <linux/percpu.h> #include <linux/io.h> #include <asm/alternative.h> +#include <asm/machine.h> #include <asm/asm.h> int spin_retry = -1; @@ -37,6 +39,23 @@ static int __init spin_retry_setup(char *str) } __setup("spin_retry=", spin_retry_setup); +static const struct ctl_table s390_spin_sysctl_table[] = { + { + .procname = "spin_retry", + .data = &spin_retry, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, +}; + +static int __init init_s390_spin_sysctls(void) +{ + register_sysctl_init("kernel", s390_spin_sysctl_table); + return 0; +} +arch_initcall(init_s390_spin_sysctls); + struct spin_wait { struct spin_wait *next, *prev; int node_id; @@ -141,7 +160,7 @@ static inline void arch_spin_lock_queued(arch_spinlock_t *lp) ix = get_lowcore()->spinlock_index++; barrier(); - lockval = SPINLOCK_LOCKVAL; /* cpu + 1 */ + lockval = spinlock_lockval(); /* cpu + 1 */ node = this_cpu_ptr(&spin_wait[ix]); node->prev = node->next = NULL; node_id = node->node_id; @@ -212,7 +231,7 @@ static inline void arch_spin_lock_queued(arch_spinlock_t *lp) if (count-- >= 0) continue; count = spin_retry; - if (!MACHINE_IS_LPAR || arch_vcpu_is_preempted(owner - 1)) + if (!machine_is_lpar() || arch_vcpu_is_preempted(owner - 1)) smp_yield_cpu(owner - 1); } @@ -232,7 +251,7 @@ static inline void arch_spin_lock_classic(arch_spinlock_t *lp) { int lockval, old, new, owner, count; - lockval = SPINLOCK_LOCKVAL; /* cpu + 1 */ + lockval = spinlock_lockval(); /* cpu + 1 */ /* Pass the virtual CPU to the lock holder if it is not running */ owner = arch_spin_yield_target(READ_ONCE(lp->lock), NULL); @@ -255,7 +274,7 @@ static inline void arch_spin_lock_classic(arch_spinlock_t *lp) if (count-- >= 0) continue; count = spin_retry; - if (!MACHINE_IS_LPAR || arch_vcpu_is_preempted(owner - 1)) + if (!machine_is_lpar() || arch_vcpu_is_preempted(owner - 1)) smp_yield_cpu(owner - 1); } } @@ -271,7 +290,7 @@ EXPORT_SYMBOL(arch_spin_lock_wait); int arch_spin_trylock_retry(arch_spinlock_t *lp) { - int cpu = SPINLOCK_LOCKVAL; + int cpu = spinlock_lockval(); int owner, count; for (count = spin_retry; count > 0; count--) { @@ -337,7 +356,7 @@ void arch_spin_relax(arch_spinlock_t *lp) cpu = READ_ONCE(lp->lock) & _Q_LOCK_CPU_MASK; if (!cpu) return; - if (MACHINE_IS_LPAR && !arch_vcpu_is_preempted(cpu - 1)) + if (machine_is_lpar() && !arch_vcpu_is_preempted(cpu - 1)) return; smp_yield_cpu(cpu - 1); } diff --git a/arch/s390/lib/string.c b/arch/s390/lib/string.c index 373fa1f01937..099de76e8b1a 100644 --- a/arch/s390/lib/string.c +++ b/arch/s390/lib/string.c @@ -78,50 +78,6 @@ EXPORT_SYMBOL(strnlen); #endif /** - * strcpy - Copy a %NUL terminated string - * @dest: Where to copy the string to - * @src: Where to copy the string from - * - * returns a pointer to @dest - */ -#ifdef __HAVE_ARCH_STRCPY -char *strcpy(char *dest, const char *src) -{ - char *ret = dest; - - asm volatile( - " lghi 0,0\n" - "0: mvst %[dest],%[src]\n" - " jo 0b\n" - : [dest] "+&a" (dest), [src] "+&a" (src) - : - : "cc", "memory", "0"); - return ret; -} -EXPORT_SYMBOL(strcpy); -#endif - -/** - * strncpy - Copy a length-limited, %NUL-terminated string - * @dest: Where to copy the string to - * @src: Where to copy the string from - * @n: The maximum number of bytes to copy - * - * The result is not %NUL-terminated if the source exceeds - * @n bytes. - */ -#ifdef __HAVE_ARCH_STRNCPY -char *strncpy(char *dest, const char *src, size_t n) -{ - size_t len = __strnend(src, n) - src; - memset(dest + len, 0, n - len); - memcpy(dest, src, len); - return dest; -} -EXPORT_SYMBOL(strncpy); -#endif - -/** * strcat - Append one %NUL-terminated string to another * @dest: The string to be appended to * @src: The string to append to it @@ -181,9 +137,6 @@ EXPORT_SYMBOL(strlcat); * @n: The maximum numbers of bytes to copy * * returns a pointer to @dest - * - * Note that in contrast to strncpy, strncat ensures the result is - * terminated. */ #ifdef __HAVE_ARCH_STRNCAT char *strncat(char *dest, const char *src, size_t n) diff --git a/arch/s390/lib/uaccess.c b/arch/s390/lib/uaccess.c index c7c269d5c491..fa7d98fa1320 100644 --- a/arch/s390/lib/uaccess.c +++ b/arch/s390/lib/uaccess.c @@ -17,64 +17,74 @@ #ifdef CONFIG_DEBUG_ENTRY void debug_user_asce(int exit) { + struct lowcore *lc = get_lowcore(); struct ctlreg cr1, cr7; local_ctl_store(1, &cr1); local_ctl_store(7, &cr7); - if (cr1.val == get_lowcore()->kernel_asce.val && cr7.val == get_lowcore()->user_asce.val) + if (cr1.val == lc->user_asce.val && cr7.val == lc->user_asce.val) return; panic("incorrect ASCE on kernel %s\n" "cr1: %016lx cr7: %016lx\n" "kernel: %016lx user: %016lx\n", exit ? "exit" : "entry", cr1.val, cr7.val, - get_lowcore()->kernel_asce.val, get_lowcore()->user_asce.val); + lc->kernel_asce.val, lc->user_asce.val); } #endif /*CONFIG_DEBUG_ENTRY */ -static unsigned long raw_copy_from_user_key(void *to, const void __user *from, - unsigned long size, unsigned long key) +union oac { + unsigned int val; + struct { + struct { + unsigned short key : 4; + unsigned short : 4; + unsigned short as : 2; + unsigned short : 4; + unsigned short k : 1; + unsigned short a : 1; + } oac1; + struct { + unsigned short key : 4; + unsigned short : 4; + unsigned short as : 2; + unsigned short : 4; + unsigned short k : 1; + unsigned short a : 1; + } oac2; + }; +}; + +static uaccess_kmsan_or_inline __must_check unsigned long +raw_copy_from_user_key(void *to, const void __user *from, unsigned long size, unsigned long key) { - unsigned long rem; + unsigned long osize; union oac spec = { .oac2.key = key, .oac2.as = PSW_BITS_AS_SECONDARY, .oac2.k = 1, .oac2.a = 1, }; + int cc; - asm volatile( - " lr 0,%[spec]\n" - "0: mvcos 0(%[to]),0(%[from]),%[size]\n" - "1: jz 5f\n" - " algr %[size],%[val]\n" - " slgr %[from],%[val]\n" - " slgr %[to],%[val]\n" - " j 0b\n" - "2: la %[rem],4095(%[from])\n" /* rem = from + 4095 */ - " nr %[rem],%[val]\n" /* rem = (from + 4095) & -4096 */ - " slgr %[rem],%[from]\n" - " clgr %[size],%[rem]\n" /* copy crosses next page boundary? */ - " jnh 6f\n" - "3: mvcos 0(%[to]),0(%[from]),%[rem]\n" - "4: slgr %[size],%[rem]\n" - " j 6f\n" - "5: slgr %[size],%[size]\n" - "6:\n" - EX_TABLE(0b, 2b) - EX_TABLE(1b, 2b) - EX_TABLE(3b, 6b) - EX_TABLE(4b, 6b) - : [size] "+&a" (size), [from] "+&a" (from), [to] "+&a" (to), [rem] "=&a" (rem) - : [val] "a" (-4096UL), [spec] "d" (spec.val) - : "cc", "memory", "0"); - return size; -} - -unsigned long raw_copy_from_user(void *to, const void __user *from, unsigned long n) -{ - return raw_copy_from_user_key(to, from, n, 0); + while (1) { + osize = size; + asm_inline volatile( + " lr %%r0,%[spec]\n" + "0: mvcos %[to],%[from],%[size]\n" + "1: nopr %%r7\n" + CC_IPM(cc) + EX_TABLE_UA_MVCOS_FROM(0b, 0b) + EX_TABLE_UA_MVCOS_FROM(1b, 0b) + : CC_OUT(cc, cc), [size] "+d" (size), [to] "=Q" (*(char *)to) + : [spec] "d" (spec.val), [from] "Q" (*(const char __user *)from) + : CC_CLOBBER_LIST("memory", "0")); + if (CC_TRANSFORM(cc) == 0) + return osize - size; + size -= 4096; + to += 4096; + from += 4096; + } } -EXPORT_SYMBOL(raw_copy_from_user); unsigned long _copy_from_user_key(void *to, const void __user *from, unsigned long n, unsigned long key) @@ -93,50 +103,37 @@ unsigned long _copy_from_user_key(void *to, const void __user *from, } EXPORT_SYMBOL(_copy_from_user_key); -static unsigned long raw_copy_to_user_key(void __user *to, const void *from, - unsigned long size, unsigned long key) +static uaccess_kmsan_or_inline __must_check unsigned long +raw_copy_to_user_key(void __user *to, const void *from, unsigned long size, unsigned long key) { - unsigned long rem; + unsigned long osize; union oac spec = { .oac1.key = key, .oac1.as = PSW_BITS_AS_SECONDARY, .oac1.k = 1, .oac1.a = 1, }; + int cc; - asm volatile( - " lr 0,%[spec]\n" - "0: mvcos 0(%[to]),0(%[from]),%[size]\n" - "1: jz 5f\n" - " algr %[size],%[val]\n" - " slgr %[to],%[val]\n" - " slgr %[from],%[val]\n" - " j 0b\n" - "2: la %[rem],4095(%[to])\n" /* rem = to + 4095 */ - " nr %[rem],%[val]\n" /* rem = (to + 4095) & -4096 */ - " slgr %[rem],%[to]\n" - " clgr %[size],%[rem]\n" /* copy crosses next page boundary? */ - " jnh 6f\n" - "3: mvcos 0(%[to]),0(%[from]),%[rem]\n" - "4: slgr %[size],%[rem]\n" - " j 6f\n" - "5: slgr %[size],%[size]\n" - "6:\n" - EX_TABLE(0b, 2b) - EX_TABLE(1b, 2b) - EX_TABLE(3b, 6b) - EX_TABLE(4b, 6b) - : [size] "+&a" (size), [to] "+&a" (to), [from] "+&a" (from), [rem] "=&a" (rem) - : [val] "a" (-4096UL), [spec] "d" (spec.val) - : "cc", "memory", "0"); - return size; -} - -unsigned long raw_copy_to_user(void __user *to, const void *from, unsigned long n) -{ - return raw_copy_to_user_key(to, from, n, 0); + while (1) { + osize = size; + asm_inline volatile( + " lr %%r0,%[spec]\n" + "0: mvcos %[to],%[from],%[size]\n" + "1: nopr %%r7\n" + CC_IPM(cc) + EX_TABLE_UA_MVCOS_TO(0b, 0b) + EX_TABLE_UA_MVCOS_TO(1b, 0b) + : CC_OUT(cc, cc), [size] "+d" (size), [to] "=Q" (*(char __user *)to) + : [spec] "d" (spec.val), [from] "Q" (*(const char *)from) + : CC_CLOBBER_LIST("memory", "0")); + if (CC_TRANSFORM(cc) == 0) + return osize - size; + size -= 4096; + to += 4096; + from += 4096; + } } -EXPORT_SYMBOL(raw_copy_to_user); unsigned long _copy_to_user_key(void __user *to, const void *from, unsigned long n, unsigned long key) @@ -148,39 +145,3 @@ unsigned long _copy_to_user_key(void __user *to, const void *from, return raw_copy_to_user_key(to, from, n, key); } EXPORT_SYMBOL(_copy_to_user_key); - -unsigned long __clear_user(void __user *to, unsigned long size) -{ - unsigned long rem; - union oac spec = { - .oac1.as = PSW_BITS_AS_SECONDARY, - .oac1.a = 1, - }; - - asm volatile( - " lr 0,%[spec]\n" - "0: mvcos 0(%[to]),0(%[zeropg]),%[size]\n" - "1: jz 5f\n" - " algr %[size],%[val]\n" - " slgr %[to],%[val]\n" - " j 0b\n" - "2: la %[rem],4095(%[to])\n" /* rem = to + 4095 */ - " nr %[rem],%[val]\n" /* rem = (to + 4095) & -4096 */ - " slgr %[rem],%[to]\n" - " clgr %[size],%[rem]\n" /* copy crosses next page boundary? */ - " jnh 6f\n" - "3: mvcos 0(%[to]),0(%[zeropg]),%[rem]\n" - "4: slgr %[size],%[rem]\n" - " j 6f\n" - "5: slgr %[size],%[size]\n" - "6:\n" - EX_TABLE(0b, 2b) - EX_TABLE(1b, 2b) - EX_TABLE(3b, 6b) - EX_TABLE(4b, 6b) - : [size] "+&a" (size), [to] "+&a" (to), [rem] "=&a" (rem) - : [val] "a" (-4096UL), [zeropg] "a" (empty_zero_page), [spec] "d" (spec.val) - : "cc", "memory", "0"); - return size; -} -EXPORT_SYMBOL(__clear_user); diff --git a/arch/s390/lib/xor.c b/arch/s390/lib/xor.c index fb924a8041dc..ce7bcf7c0032 100644 --- a/arch/s390/lib/xor.c +++ b/arch/s390/lib/xor.c @@ -15,7 +15,6 @@ static void xor_xc_2(unsigned long bytes, unsigned long * __restrict p1, const unsigned long * __restrict p2) { asm volatile( - " larl 1,2f\n" " aghi %0,-1\n" " jm 3f\n" " srlg 0,%0,8\n" @@ -25,12 +24,12 @@ static void xor_xc_2(unsigned long bytes, unsigned long * __restrict p1, " la %1,256(%1)\n" " la %2,256(%2)\n" " brctg 0,0b\n" - "1: ex %0,0(1)\n" + "1: exrl %0,2f\n" " j 3f\n" "2: xc 0(1,%1),0(%2)\n" "3:\n" : : "d" (bytes), "a" (p1), "a" (p2) - : "0", "1", "cc", "memory"); + : "0", "cc", "memory"); } static void xor_xc_3(unsigned long bytes, unsigned long * __restrict p1, @@ -38,9 +37,8 @@ static void xor_xc_3(unsigned long bytes, unsigned long * __restrict p1, const unsigned long * __restrict p3) { asm volatile( - " larl 1,2f\n" " aghi %0,-1\n" - " jm 3f\n" + " jm 4f\n" " srlg 0,%0,8\n" " ltgr 0,0\n" " jz 1f\n" @@ -50,14 +48,14 @@ static void xor_xc_3(unsigned long bytes, unsigned long * __restrict p1, " la %2,256(%2)\n" " la %3,256(%3)\n" " brctg 0,0b\n" - "1: ex %0,0(1)\n" - " ex %0,6(1)\n" - " j 3f\n" + "1: exrl %0,2f\n" + " exrl %0,3f\n" + " j 4f\n" "2: xc 0(1,%1),0(%2)\n" - " xc 0(1,%1),0(%3)\n" - "3:\n" + "3: xc 0(1,%1),0(%3)\n" + "4:\n" : "+d" (bytes), "+a" (p1), "+a" (p2), "+a" (p3) - : : "0", "1", "cc", "memory"); + : : "0", "cc", "memory"); } static void xor_xc_4(unsigned long bytes, unsigned long * __restrict p1, @@ -66,9 +64,8 @@ static void xor_xc_4(unsigned long bytes, unsigned long * __restrict p1, const unsigned long * __restrict p4) { asm volatile( - " larl 1,2f\n" " aghi %0,-1\n" - " jm 3f\n" + " jm 5f\n" " srlg 0,%0,8\n" " ltgr 0,0\n" " jz 1f\n" @@ -80,16 +77,16 @@ static void xor_xc_4(unsigned long bytes, unsigned long * __restrict p1, " la %3,256(%3)\n" " la %4,256(%4)\n" " brctg 0,0b\n" - "1: ex %0,0(1)\n" - " ex %0,6(1)\n" - " ex %0,12(1)\n" - " j 3f\n" + "1: exrl %0,2f\n" + " exrl %0,3f\n" + " exrl %0,4f\n" + " j 5f\n" "2: xc 0(1,%1),0(%2)\n" - " xc 0(1,%1),0(%3)\n" - " xc 0(1,%1),0(%4)\n" - "3:\n" + "3: xc 0(1,%1),0(%3)\n" + "4: xc 0(1,%1),0(%4)\n" + "5:\n" : "+d" (bytes), "+a" (p1), "+a" (p2), "+a" (p3), "+a" (p4) - : : "0", "1", "cc", "memory"); + : : "0", "cc", "memory"); } static void xor_xc_5(unsigned long bytes, unsigned long * __restrict p1, @@ -101,7 +98,7 @@ static void xor_xc_5(unsigned long bytes, unsigned long * __restrict p1, asm volatile( " larl 1,2f\n" " aghi %0,-1\n" - " jm 3f\n" + " jm 6f\n" " srlg 0,%0,8\n" " ltgr 0,0\n" " jz 1f\n" @@ -115,19 +112,19 @@ static void xor_xc_5(unsigned long bytes, unsigned long * __restrict p1, " la %4,256(%4)\n" " la %5,256(%5)\n" " brctg 0,0b\n" - "1: ex %0,0(1)\n" - " ex %0,6(1)\n" - " ex %0,12(1)\n" - " ex %0,18(1)\n" - " j 3f\n" + "1: exrl %0,2f\n" + " exrl %0,3f\n" + " exrl %0,4f\n" + " exrl %0,5f\n" + " j 6f\n" "2: xc 0(1,%1),0(%2)\n" - " xc 0(1,%1),0(%3)\n" - " xc 0(1,%1),0(%4)\n" - " xc 0(1,%1),0(%5)\n" - "3:\n" + "3: xc 0(1,%1),0(%3)\n" + "4: xc 0(1,%1),0(%4)\n" + "5: xc 0(1,%1),0(%5)\n" + "6:\n" : "+d" (bytes), "+a" (p1), "+a" (p2), "+a" (p3), "+a" (p4), "+a" (p5) - : : "0", "1", "cc", "memory"); + : : "0", "cc", "memory"); } struct xor_block_template xor_block_xc = { diff --git a/arch/s390/mm/Makefile b/arch/s390/mm/Makefile index f6c2db7a8669..bd0401cc7ca5 100644 --- a/arch/s390/mm/Makefile +++ b/arch/s390/mm/Makefile @@ -9,6 +9,8 @@ obj-y += page-states.o pageattr.o pgtable.o pgalloc.o extable.o obj-$(CONFIG_CMM) += cmm.o obj-$(CONFIG_DEBUG_VIRTUAL) += physaddr.o obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o -obj-$(CONFIG_PTDUMP_CORE) += dump_pagetables.o +obj-$(CONFIG_PTDUMP) += dump_pagetables.o obj-$(CONFIG_PGSTE) += gmap.o obj-$(CONFIG_PFAULT) += pfault.o + +obj-$(subst m,y,$(CONFIG_KVM)) += gmap_helpers.o diff --git a/arch/s390/mm/cmm.c b/arch/s390/mm/cmm.c index d01724a715d0..e2a6eb92420f 100644 --- a/arch/s390/mm/cmm.c +++ b/arch/s390/mm/cmm.c @@ -201,10 +201,10 @@ static void cmm_set_timer(void) { if (cmm_timed_pages_target <= 0 || cmm_timeout_seconds <= 0) { if (timer_pending(&cmm_timer)) - del_timer(&cmm_timer); + timer_delete(&cmm_timer); return; } - mod_timer(&cmm_timer, jiffies + msecs_to_jiffies(cmm_timeout_seconds * MSEC_PER_SEC)); + mod_timer(&cmm_timer, jiffies + secs_to_jiffies(cmm_timeout_seconds)); } static void cmm_timer_fn(struct timer_list *unused) @@ -332,7 +332,7 @@ static int cmm_timeout_handler(const struct ctl_table *ctl, int write, return 0; } -static struct ctl_table cmm_table[] = { +static const struct ctl_table cmm_table[] = { { .procname = "cmm_pages", .mode = 0644, @@ -424,7 +424,7 @@ out_smsg: #endif unregister_sysctl_table(cmm_sysctl_header); out_sysctl: - del_timer_sync(&cmm_timer); + timer_delete_sync(&cmm_timer); return rc; } module_init(cmm_init); @@ -437,7 +437,7 @@ static void __exit cmm_exit(void) #endif unregister_oom_notifier(&cmm_oom_nb); kthread_stop(cmm_thread_ptr); - del_timer_sync(&cmm_timer); + timer_delete_sync(&cmm_timer); cmm_free_pages(cmm_pages, &cmm_pages, &cmm_page_list); cmm_free_pages(cmm_timed_pages, &cmm_timed_pages, &cmm_timed_page_list); } diff --git a/arch/s390/mm/dump_pagetables.c b/arch/s390/mm/dump_pagetables.c index fa54f3bc0c8d..ac604b176660 100644 --- a/arch/s390/mm/dump_pagetables.c +++ b/arch/s390/mm/dump_pagetables.c @@ -1,4 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 + +#include <linux/cpufeature.h> #include <linux/set_memory.h> #include <linux/ptdump.h> #include <linux/seq_file.h> @@ -82,7 +84,7 @@ static void note_prot_wx(struct pg_state *st, unsigned long addr) * in which case we have two lpswe instructions in lowcore that need * to be executable. */ - if (addr == PAGE_SIZE && (nospec_uses_trampoline() || !static_key_enabled(&cpu_has_bear))) + if (addr == PAGE_SIZE && (nospec_uses_trampoline() || !cpu_has_bear())) return; WARN_ONCE(IS_ENABLED(CONFIG_DEBUG_WX), "s390/mm: Found insecure W+X mapping at address %pS\n", @@ -145,11 +147,48 @@ static void note_page(struct ptdump_state *pt_st, unsigned long addr, int level, } } +static void note_page_pte(struct ptdump_state *pt_st, unsigned long addr, pte_t pte) +{ + note_page(pt_st, addr, 4, pte_val(pte)); +} + +static void note_page_pmd(struct ptdump_state *pt_st, unsigned long addr, pmd_t pmd) +{ + note_page(pt_st, addr, 3, pmd_val(pmd)); +} + +static void note_page_pud(struct ptdump_state *pt_st, unsigned long addr, pud_t pud) +{ + note_page(pt_st, addr, 2, pud_val(pud)); +} + +static void note_page_p4d(struct ptdump_state *pt_st, unsigned long addr, p4d_t p4d) +{ + note_page(pt_st, addr, 1, p4d_val(p4d)); +} + +static void note_page_pgd(struct ptdump_state *pt_st, unsigned long addr, pgd_t pgd) +{ + note_page(pt_st, addr, 0, pgd_val(pgd)); +} + +static void note_page_flush(struct ptdump_state *pt_st) +{ + pte_t pte_zero = {0}; + + note_page(pt_st, 0, -1, pte_val(pte_zero)); +} + bool ptdump_check_wx(void) { struct pg_state st = { .ptdump = { - .note_page = note_page, + .note_page_pte = note_page_pte, + .note_page_pmd = note_page_pmd, + .note_page_pud = note_page_pud, + .note_page_p4d = note_page_p4d, + .note_page_pgd = note_page_pgd, + .note_page_flush = note_page_flush, .range = (struct ptdump_range[]) { {.start = 0, .end = max_addr}, {.start = 0, .end = 0}, @@ -167,7 +206,7 @@ bool ptdump_check_wx(void) }, }; - if (!MACHINE_HAS_NX) + if (!cpu_has_nx()) return true; ptdump_walk_pgd(&st.ptdump, &init_mm, NULL); if (st.wx_pages) { @@ -176,7 +215,7 @@ bool ptdump_check_wx(void) return false; } else { pr_info("Checked W+X mappings: passed, no %sW+X pages found\n", - (nospec_uses_trampoline() || !static_key_enabled(&cpu_has_bear)) ? + (nospec_uses_trampoline() || !cpu_has_bear()) ? "unexpected " : ""); return true; @@ -188,7 +227,12 @@ static int ptdump_show(struct seq_file *m, void *v) { struct pg_state st = { .ptdump = { - .note_page = note_page, + .note_page_pte = note_page_pte, + .note_page_pmd = note_page_pmd, + .note_page_pud = note_page_pud, + .note_page_p4d = note_page_p4d, + .note_page_pgd = note_page_pgd, + .note_page_flush = note_page_flush, .range = (struct ptdump_range[]) { {.start = 0, .end = max_addr}, {.start = 0, .end = 0}, diff --git a/arch/s390/mm/extable.c b/arch/s390/mm/extable.c index 0a0738a473af..7498e858c401 100644 --- a/arch/s390/mm/extable.c +++ b/arch/s390/mm/extable.c @@ -7,6 +7,7 @@ #include <linux/panic.h> #include <asm/asm-extable.h> #include <asm/extable.h> +#include <asm/fpu.h> const struct exception_table_entry *s390_search_extables(unsigned long addr) { @@ -26,7 +27,7 @@ static bool ex_handler_fixup(const struct exception_table_entry *ex, struct pt_r return true; } -static bool ex_handler_ua_store(const struct exception_table_entry *ex, struct pt_regs *regs) +static bool ex_handler_ua_fault(const struct exception_table_entry *ex, struct pt_regs *regs) { unsigned int reg_err = FIELD_GET(EX_DATA_REG_ERR, ex->data); @@ -35,18 +36,6 @@ static bool ex_handler_ua_store(const struct exception_table_entry *ex, struct p return true; } -static bool ex_handler_ua_load_mem(const struct exception_table_entry *ex, struct pt_regs *regs) -{ - unsigned int reg_addr = FIELD_GET(EX_DATA_REG_ADDR, ex->data); - unsigned int reg_err = FIELD_GET(EX_DATA_REG_ERR, ex->data); - size_t len = FIELD_GET(EX_DATA_LEN, ex->data); - - regs->gprs[reg_err] = -EFAULT; - memset((void *)regs->gprs[reg_addr], 0, len); - regs->psw.addr = extable_fixup(ex); - return true; -} - static bool ex_handler_ua_load_reg(const struct exception_table_entry *ex, bool pair, struct pt_regs *regs) { @@ -77,6 +66,56 @@ static bool ex_handler_zeropad(const struct exception_table_entry *ex, struct pt return true; } +static bool ex_handler_fpc(const struct exception_table_entry *ex, struct pt_regs *regs) +{ + fpu_sfpc(0); + regs->psw.addr = extable_fixup(ex); + return true; +} + +struct insn_ssf { + u64 opc1 : 8; + u64 r3 : 4; + u64 opc2 : 4; + u64 b1 : 4; + u64 d1 : 12; + u64 b2 : 4; + u64 d2 : 12; +} __packed; + +static bool ex_handler_ua_mvcos(const struct exception_table_entry *ex, + bool from, struct pt_regs *regs) +{ + unsigned long uaddr, remainder; + struct insn_ssf *insn; + + /* + * If the faulting user space access crossed a page boundary retry by + * limiting the access to the first page (adjust length accordingly). + * Then the mvcos instruction will either complete with condition code + * zero, or generate another fault where the user space access did not + * cross a page boundary. + * If the faulting user space access did not cross a page boundary set + * length to zero and retry. In this case no user space access will + * happen, and the mvcos instruction will complete with condition code + * zero. + * In both cases the instruction will complete with condition code + * zero (copying finished), and the register which contains the + * length, indicates the number of bytes copied. + */ + regs->psw.addr = extable_fixup(ex); + insn = (struct insn_ssf *)regs->psw.addr; + if (from) + uaddr = regs->gprs[insn->b2] + insn->d2; + else + uaddr = regs->gprs[insn->b1] + insn->d1; + remainder = PAGE_SIZE - (uaddr & (PAGE_SIZE - 1)); + if (regs->gprs[insn->r3] <= remainder) + remainder = 0; + regs->gprs[insn->r3] = remainder; + return true; +} + bool fixup_exception(struct pt_regs *regs) { const struct exception_table_entry *ex; @@ -89,16 +128,20 @@ bool fixup_exception(struct pt_regs *regs) return ex_handler_fixup(ex, regs); case EX_TYPE_BPF: return ex_handler_bpf(ex, regs); - case EX_TYPE_UA_STORE: - return ex_handler_ua_store(ex, regs); - case EX_TYPE_UA_LOAD_MEM: - return ex_handler_ua_load_mem(ex, regs); + case EX_TYPE_UA_FAULT: + return ex_handler_ua_fault(ex, regs); case EX_TYPE_UA_LOAD_REG: return ex_handler_ua_load_reg(ex, false, regs); case EX_TYPE_UA_LOAD_REGPAIR: return ex_handler_ua_load_reg(ex, true, regs); case EX_TYPE_ZEROPAD: return ex_handler_zeropad(ex, regs); + case EX_TYPE_FPC: + return ex_handler_fpc(ex, regs); + case EX_TYPE_UA_MVCOS_TO: + return ex_handler_ua_mvcos(ex, false, regs); + case EX_TYPE_UA_MVCOS_FROM: + return ex_handler_ua_mvcos(ex, true, regs); } panic("invalid exception table entry"); } diff --git a/arch/s390/mm/extmem.c b/arch/s390/mm/extmem.c index 4692136c0af1..f7da53e212f5 100644 --- a/arch/s390/mm/extmem.c +++ b/arch/s390/mm/extmem.c @@ -21,6 +21,7 @@ #include <linux/ioport.h> #include <linux/refcount.h> #include <linux/pgtable.h> +#include <asm/machine.h> #include <asm/diag.h> #include <asm/page.h> #include <asm/ebcdic.h> @@ -255,7 +256,7 @@ segment_type (char* name) int rc; struct dcss_segment seg; - if (!MACHINE_IS_VM) + if (!machine_is_vm()) return -ENOSYS; dcss_mkname(name, seg.dcss_name); @@ -418,7 +419,7 @@ segment_load (char *name, int do_nonshared, unsigned long *addr, struct dcss_segment *seg; int rc; - if (!MACHINE_IS_VM) + if (!machine_is_vm()) return -ENOSYS; mutex_lock(&dcss_lock); @@ -529,6 +530,14 @@ segment_modify_shared (char *name, int do_nonshared) return rc; } +static void __dcss_diag_purge_on_cpu_0(void *data) +{ + struct dcss_segment *seg = (struct dcss_segment *)data; + unsigned long dummy; + + dcss_diag(&purgeseg_scode, seg->dcss_name, &dummy, &dummy); +} + /* * Decrease the use count of a DCSS segment and remove * it from the address space if nobody is using it @@ -537,10 +546,9 @@ segment_modify_shared (char *name, int do_nonshared) void segment_unload(char *name) { - unsigned long dummy; struct dcss_segment *seg; - if (!MACHINE_IS_VM) + if (!machine_is_vm()) return; mutex_lock(&dcss_lock); @@ -555,7 +563,14 @@ segment_unload(char *name) kfree(seg->res); vmem_remove_mapping(seg->start_addr, seg->end - seg->start_addr + 1); list_del(&seg->list); - dcss_diag(&purgeseg_scode, seg->dcss_name, &dummy, &dummy); + /* + * Workaround for z/VM issue, where calling the DCSS unload diag on + * a non-IPL CPU would cause bogus sclp maximum memory detection on + * next IPL. + * IPL CPU 0 cannot be set offline, so the dcss_diag() call can + * directly be scheduled to that CPU. + */ + smp_call_function_single(0, __dcss_diag_purge_on_cpu_0, seg, 1); kfree(seg); out_unlock: mutex_unlock(&dcss_lock); @@ -572,7 +587,7 @@ segment_save(char *name) char cmd2[80]; int i, response; - if (!MACHINE_IS_VM) + if (!machine_is_vm()) return; mutex_lock(&dcss_lock); diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c index 9b681f74dccc..e1ad05bfd28a 100644 --- a/arch/s390/mm/fault.c +++ b/arch/s390/mm/fault.c @@ -11,11 +11,11 @@ #include <linux/kernel_stat.h> #include <linux/mmu_context.h> +#include <linux/cpufeature.h> #include <linux/perf_event.h> #include <linux/signal.h> #include <linux/sched.h> #include <linux/sched/debug.h> -#include <linux/jump_label.h> #include <linux/kernel.h> #include <linux/errno.h> #include <linux/string.h> @@ -40,22 +40,11 @@ #include <asm/ptrace.h> #include <asm/fault.h> #include <asm/diag.h> -#include <asm/gmap.h> #include <asm/irq.h> #include <asm/facility.h> #include <asm/uv.h> #include "../kernel/entry.h" -static DEFINE_STATIC_KEY_FALSE(have_store_indication); - -static int __init fault_init(void) -{ - if (test_facility(75)) - static_branch_enable(&have_store_indication); - return 0; -} -early_initcall(fault_init); - /* * Find out which address space caused the exception. */ @@ -81,7 +70,7 @@ static __always_inline bool fault_is_write(struct pt_regs *regs) { union teid teid = { .val = regs->int_parm_long }; - if (static_branch_likely(&have_store_indication)) + if (test_facility(75)) return teid.fsi == TEID_FSI_STORE; return false; } @@ -175,6 +164,23 @@ static void dump_fault_info(struct pt_regs *regs) int show_unhandled_signals = 1; +static const struct ctl_table s390_fault_sysctl_table[] = { + { + .procname = "userprocess_debug", + .data = &show_unhandled_signals, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, +}; + +static int __init init_s390_fault_sysctls(void) +{ + register_sysctl_init("kernel", s390_fault_sysctl_table); + return 0; +} +arch_initcall(init_s390_fault_sysctls); + void report_user_fault(struct pt_regs *regs, long signr, int is_mm_fault) { static DEFINE_RATELIMIT_STATE(rs, DEFAULT_RATELIMIT_INTERVAL, DEFAULT_RATELIMIT_BURST); @@ -369,6 +375,7 @@ void do_protection_exception(struct pt_regs *regs) if (unlikely(!teid.b61)) { if (user_mode(regs)) { /* Low-address protection in user mode: cannot happen */ + dump_fault_info(regs); die(regs, "Low-address protection"); } /* @@ -377,7 +384,7 @@ void do_protection_exception(struct pt_regs *regs) */ return handle_fault_error_nolock(regs, 0); } - if (unlikely(MACHINE_HAS_NX && teid.b56)) { + if (unlikely(cpu_has_nx() && teid.b56)) { regs->int_parm_long = (teid.addr * PAGE_SIZE) | (regs->psw.addr & PAGE_MASK); return handle_fault_error_nolock(regs, SEGV_ACCERR); } @@ -434,6 +441,8 @@ void do_secure_storage_access(struct pt_regs *regs) if (rc) BUG(); } else { + if (faulthandler_disabled()) + return handle_fault_error_nolock(regs, 0); mm = current->mm; mmap_read_lock(mm); vma = find_vma(mm, addr); diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c index 16b8a36c56de..012a4366a2ad 100644 --- a/arch/s390/mm/gmap.c +++ b/arch/s390/mm/gmap.c @@ -8,6 +8,7 @@ * Janosch Frank <frankja@linux.vnet.ibm.com> */ +#include <linux/cpufeature.h> #include <linux/kernel.h> #include <linux/pagewalk.h> #include <linux/swap.h> @@ -20,9 +21,20 @@ #include <linux/pgtable.h> #include <asm/page-states.h> #include <asm/pgalloc.h> +#include <asm/machine.h> +#include <asm/gmap_helpers.h> #include <asm/gmap.h> #include <asm/page.h> -#include <asm/tlb.h> + +/* + * The address is saved in a radix tree directly; NULL would be ambiguous, + * since 0 is a valid address, and NULL is returned when nothing was found. + * The lower bits are ignored by all users of the macro, so it can be used + * to distinguish a valid address 0 from a NULL. + */ +#define VALID_GADDR_FLAG 1 +#define IS_GADDR_VALID(gaddr) ((gaddr) & VALID_GADDR_FLAG) +#define MAKE_VALID_GADDR(gaddr) (((gaddr) & HPAGE_MASK) | VALID_GADDR_FLAG) #define GMAP_SHADOW_FAKE_TABLE 1ULL @@ -43,7 +55,7 @@ static struct page *gmap_alloc_crst(void) * * Returns a guest address space structure. */ -static struct gmap *gmap_alloc(unsigned long limit) +struct gmap *gmap_alloc(unsigned long limit) { struct gmap *gmap; struct page *page; @@ -70,9 +82,7 @@ static struct gmap *gmap_alloc(unsigned long limit) gmap = kzalloc(sizeof(struct gmap), GFP_KERNEL_ACCOUNT); if (!gmap) goto out; - INIT_LIST_HEAD(&gmap->crst_list); INIT_LIST_HEAD(&gmap->children); - INIT_LIST_HEAD(&gmap->pt_list); INIT_RADIX_TREE(&gmap->guest_to_host, GFP_KERNEL_ACCOUNT); INIT_RADIX_TREE(&gmap->host_to_guest, GFP_ATOMIC | __GFP_ACCOUNT); INIT_RADIX_TREE(&gmap->host_to_rmap, GFP_ATOMIC | __GFP_ACCOUNT); @@ -82,8 +92,6 @@ static struct gmap *gmap_alloc(unsigned long limit) page = gmap_alloc_crst(); if (!page) goto out_free; - page->index = 0; - list_add(&page->lru, &gmap->crst_list); table = page_to_virt(page); crst_table_init(table, etype); gmap->table = table; @@ -97,6 +105,7 @@ out_free: out: return NULL; } +EXPORT_SYMBOL_GPL(gmap_alloc); /** * gmap_create - create a guest address space @@ -128,7 +137,7 @@ EXPORT_SYMBOL_GPL(gmap_create); static void gmap_flush_tlb(struct gmap *gmap) { - if (MACHINE_HAS_IDTE) + if (cpu_has_idte()) __tlb_flush_idte(gmap->asce); else __tlb_flush_global(); @@ -185,32 +194,46 @@ static void gmap_rmap_radix_tree_free(struct radix_tree_root *root) } while (nr > 0); } +static void gmap_free_crst(unsigned long *table, bool free_ptes) +{ + bool is_segment = (table[0] & _SEGMENT_ENTRY_TYPE_MASK) == 0; + int i; + + if (is_segment) { + if (!free_ptes) + goto out; + for (i = 0; i < _CRST_ENTRIES; i++) + if (!(table[i] & _SEGMENT_ENTRY_INVALID)) + page_table_free_pgste(page_ptdesc(phys_to_page(table[i]))); + } else { + for (i = 0; i < _CRST_ENTRIES; i++) + if (!(table[i] & _REGION_ENTRY_INVALID)) + gmap_free_crst(__va(table[i] & PAGE_MASK), free_ptes); + } + +out: + free_pages((unsigned long)table, CRST_ALLOC_ORDER); +} + /** * gmap_free - free a guest address space * @gmap: pointer to the guest address space structure * * No locks required. There are no references to this gmap anymore. */ -static void gmap_free(struct gmap *gmap) +void gmap_free(struct gmap *gmap) { - struct page *page, *next; - /* Flush tlb of all gmaps (if not already done for shadows) */ if (!(gmap_is_shadow(gmap) && gmap->removed)) gmap_flush_tlb(gmap); /* Free all segment & region tables. */ - list_for_each_entry_safe(page, next, &gmap->crst_list, lru) - __free_pages(page, CRST_ALLOC_ORDER); + gmap_free_crst(gmap->table, gmap_is_shadow(gmap)); + gmap_radix_tree_free(&gmap->guest_to_host); gmap_radix_tree_free(&gmap->host_to_guest); /* Free additional data for a shadow gmap */ if (gmap_is_shadow(gmap)) { - struct ptdesc *ptdesc, *n; - - /* Free all page tables. */ - list_for_each_entry_safe(ptdesc, n, &gmap->pt_list, pt_list) - page_table_free_pgste(ptdesc); gmap_rmap_radix_tree_free(&gmap->host_to_rmap); /* Release reference to the parent */ gmap_put(gmap->parent); @@ -218,6 +241,7 @@ static void gmap_free(struct gmap *gmap) kfree(gmap); } +EXPORT_SYMBOL_GPL(gmap_free); /** * gmap_get - increase reference counter for guest address space @@ -298,10 +322,8 @@ static int gmap_alloc_table(struct gmap *gmap, unsigned long *table, crst_table_init(new, init); spin_lock(&gmap->guest_table_lock); if (*table & _REGION_ENTRY_INVALID) { - list_add(&page->lru, &gmap->crst_list); *table = __pa(new) | _REGION_ENTRY_LENGTH | (*table & _REGION_ENTRY_TYPE_MASK); - page->index = gaddr; page = NULL; } spin_unlock(&gmap->guest_table_lock); @@ -310,21 +332,23 @@ static int gmap_alloc_table(struct gmap *gmap, unsigned long *table, return 0; } -/** - * __gmap_segment_gaddr - find virtual address from segment pointer - * @entry: pointer to a segment table entry in the guest address space - * - * Returns the virtual address in the guest address space for the segment - */ -static unsigned long __gmap_segment_gaddr(unsigned long *entry) +static unsigned long host_to_guest_lookup(struct gmap *gmap, unsigned long vmaddr) { - struct page *page; - unsigned long offset; + return (unsigned long)radix_tree_lookup(&gmap->host_to_guest, vmaddr >> PMD_SHIFT); +} + +static unsigned long host_to_guest_delete(struct gmap *gmap, unsigned long vmaddr) +{ + return (unsigned long)radix_tree_delete(&gmap->host_to_guest, vmaddr >> PMD_SHIFT); +} - offset = (unsigned long) entry / sizeof(unsigned long); - offset = (offset & (PTRS_PER_PMD - 1)) * PMD_SIZE; - page = pmd_pgtable_page((pmd_t *) entry); - return page->index + offset; +static pmd_t *host_to_guest_pmd_delete(struct gmap *gmap, unsigned long vmaddr, + unsigned long *gaddr) +{ + *gaddr = host_to_guest_delete(gmap, vmaddr); + if (IS_GADDR_VALID(*gaddr)) + return (pmd_t *)gmap_table_walk(gmap, *gaddr, 1); + return NULL; } /** @@ -336,16 +360,19 @@ static unsigned long __gmap_segment_gaddr(unsigned long *entry) */ static int __gmap_unlink_by_vmaddr(struct gmap *gmap, unsigned long vmaddr) { - unsigned long *entry; + unsigned long gaddr; int flush = 0; + pmd_t *pmdp; BUG_ON(gmap_is_shadow(gmap)); spin_lock(&gmap->guest_table_lock); - entry = radix_tree_delete(&gmap->host_to_guest, vmaddr >> PMD_SHIFT); - if (entry) { - flush = (*entry != _SEGMENT_ENTRY_EMPTY); - *entry = _SEGMENT_ENTRY_EMPTY; + + pmdp = host_to_guest_pmd_delete(gmap, vmaddr, &gaddr); + if (pmdp) { + flush = (pmd_val(*pmdp) != _SEGMENT_ENTRY_EMPTY); + *pmdp = __pmd(_SEGMENT_ENTRY_EMPTY); } + spin_unlock(&gmap->guest_table_lock); return flush; } @@ -464,26 +491,6 @@ unsigned long __gmap_translate(struct gmap *gmap, unsigned long gaddr) EXPORT_SYMBOL_GPL(__gmap_translate); /** - * gmap_translate - translate a guest address to a user space address - * @gmap: pointer to guest mapping meta data structure - * @gaddr: guest address - * - * Returns user space address which corresponds to the guest address or - * -EFAULT if no such mapping exists. - * This function does not establish potentially missing page table entries. - */ -unsigned long gmap_translate(struct gmap *gmap, unsigned long gaddr) -{ - unsigned long rc; - - mmap_read_lock(gmap->mm); - rc = __gmap_translate(gmap, gaddr); - mmap_read_unlock(gmap->mm); - return rc; -} -EXPORT_SYMBOL_GPL(gmap_translate); - -/** * gmap_unlink - disconnect a page table from the gmap shadow tables * @mm: pointer to the parent mm_struct * @table: pointer to the host page table @@ -582,7 +589,8 @@ int __gmap_link(struct gmap *gmap, unsigned long gaddr, unsigned long vmaddr) spin_lock(&gmap->guest_table_lock); if (*table == _SEGMENT_ENTRY_EMPTY) { rc = radix_tree_insert(&gmap->host_to_guest, - vmaddr >> PMD_SHIFT, table); + vmaddr >> PMD_SHIFT, + (void *)MAKE_VALID_GADDR(gaddr)); if (!rc) { if (pmd_leaf(*pmd)) { *table = (pmd_val(*pmd) & @@ -605,193 +613,27 @@ int __gmap_link(struct gmap *gmap, unsigned long gaddr, unsigned long vmaddr) radix_tree_preload_end(); return rc; } - -/** - * fixup_user_fault_nowait - manually resolve a user page fault without waiting - * @mm: mm_struct of target mm - * @address: user address - * @fault_flags:flags to pass down to handle_mm_fault() - * @unlocked: did we unlock the mmap_lock while retrying - * - * This function behaves similarly to fixup_user_fault(), but it guarantees - * that the fault will be resolved without waiting. The function might drop - * and re-acquire the mm lock, in which case @unlocked will be set to true. - * - * The guarantee is that the fault is handled without waiting, but the - * function itself might sleep, due to the lock. - * - * Context: Needs to be called with mm->mmap_lock held in read mode, and will - * return with the lock held in read mode; @unlocked will indicate whether - * the lock has been dropped and re-acquired. This is the same behaviour as - * fixup_user_fault(). - * - * Return: 0 on success, -EAGAIN if the fault cannot be resolved without - * waiting, -EFAULT if the fault cannot be resolved, -ENOMEM if out of - * memory. - */ -static int fixup_user_fault_nowait(struct mm_struct *mm, unsigned long address, - unsigned int fault_flags, bool *unlocked) -{ - struct vm_area_struct *vma; - unsigned int test_flags; - vm_fault_t fault; - int rc; - - fault_flags |= FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_RETRY_NOWAIT; - test_flags = fault_flags & FAULT_FLAG_WRITE ? VM_WRITE : VM_READ; - - vma = find_vma(mm, address); - if (unlikely(!vma || address < vma->vm_start)) - return -EFAULT; - if (unlikely(!(vma->vm_flags & test_flags))) - return -EFAULT; - - fault = handle_mm_fault(vma, address, fault_flags, NULL); - /* the mm lock has been dropped, take it again */ - if (fault & VM_FAULT_COMPLETED) { - *unlocked = true; - mmap_read_lock(mm); - return 0; - } - /* the mm lock has not been dropped */ - if (fault & VM_FAULT_ERROR) { - rc = vm_fault_to_errno(fault, 0); - BUG_ON(!rc); - return rc; - } - /* the mm lock has not been dropped because of FAULT_FLAG_RETRY_NOWAIT */ - if (fault & VM_FAULT_RETRY) - return -EAGAIN; - /* nothing needed to be done and the mm lock has not been dropped */ - return 0; -} - -/** - * __gmap_fault - resolve a fault on a guest address - * @gmap: pointer to guest mapping meta data structure - * @gaddr: guest address - * @fault_flags: flags to pass down to handle_mm_fault() - * - * Context: Needs to be called with mm->mmap_lock held in read mode. Might - * drop and re-acquire the lock. Will always return with the lock held. - */ -static int __gmap_fault(struct gmap *gmap, unsigned long gaddr, unsigned int fault_flags) -{ - unsigned long vmaddr; - bool unlocked; - int rc = 0; - -retry: - unlocked = false; - - vmaddr = __gmap_translate(gmap, gaddr); - if (IS_ERR_VALUE(vmaddr)) - return vmaddr; - - if (fault_flags & FAULT_FLAG_RETRY_NOWAIT) - rc = fixup_user_fault_nowait(gmap->mm, vmaddr, fault_flags, &unlocked); - else - rc = fixup_user_fault(gmap->mm, vmaddr, fault_flags, &unlocked); - if (rc) - return rc; - /* - * In the case that fixup_user_fault unlocked the mmap_lock during - * fault-in, redo __gmap_translate() to avoid racing with a - * map/unmap_segment. - * In particular, __gmap_translate(), fixup_user_fault{,_nowait}(), - * and __gmap_link() must all be called atomically in one go; if the - * lock had been dropped in between, a retry is needed. - */ - if (unlocked) - goto retry; - - return __gmap_link(gmap, gaddr, vmaddr); -} - -/** - * gmap_fault - resolve a fault on a guest address - * @gmap: pointer to guest mapping meta data structure - * @gaddr: guest address - * @fault_flags: flags to pass down to handle_mm_fault() - * - * Returns 0 on success, -ENOMEM for out of memory conditions, -EFAULT if the - * vm address is already mapped to a different guest segment, and -EAGAIN if - * FAULT_FLAG_RETRY_NOWAIT was specified and the fault could not be processed - * immediately. - */ -int gmap_fault(struct gmap *gmap, unsigned long gaddr, unsigned int fault_flags) -{ - int rc; - - mmap_read_lock(gmap->mm); - rc = __gmap_fault(gmap, gaddr, fault_flags); - mmap_read_unlock(gmap->mm); - return rc; -} -EXPORT_SYMBOL_GPL(gmap_fault); +EXPORT_SYMBOL(__gmap_link); /* * this function is assumed to be called with mmap_lock held */ void __gmap_zap(struct gmap *gmap, unsigned long gaddr) { - struct vm_area_struct *vma; unsigned long vmaddr; - spinlock_t *ptl; - pte_t *ptep; + + mmap_assert_locked(gmap->mm); /* Find the vm address for the guest address */ vmaddr = (unsigned long) radix_tree_lookup(&gmap->guest_to_host, gaddr >> PMD_SHIFT); if (vmaddr) { vmaddr |= gaddr & ~PMD_MASK; - - vma = vma_lookup(gmap->mm, vmaddr); - if (!vma || is_vm_hugetlb_page(vma)) - return; - - /* Get pointer to the page table entry */ - ptep = get_locked_pte(gmap->mm, vmaddr, &ptl); - if (likely(ptep)) { - ptep_zap_unused(gmap->mm, vmaddr, ptep, 0); - pte_unmap_unlock(ptep, ptl); - } + gmap_helper_zap_one_page(gmap->mm, vmaddr); } } EXPORT_SYMBOL_GPL(__gmap_zap); -void gmap_discard(struct gmap *gmap, unsigned long from, unsigned long to) -{ - unsigned long gaddr, vmaddr, size; - struct vm_area_struct *vma; - - mmap_read_lock(gmap->mm); - for (gaddr = from; gaddr < to; - gaddr = (gaddr + PMD_SIZE) & PMD_MASK) { - /* Find the vm address for the guest address */ - vmaddr = (unsigned long) - radix_tree_lookup(&gmap->guest_to_host, - gaddr >> PMD_SHIFT); - if (!vmaddr) - continue; - vmaddr |= gaddr & ~PMD_MASK; - /* Find vma in the parent mm */ - vma = find_vma(gmap->mm, vmaddr); - if (!vma) - continue; - /* - * We do not discard pages that are backed by - * hugetlbfs, so we don't have to refault them. - */ - if (is_vm_hugetlb_page(vma)) - continue; - size = min(to - gaddr, PMD_SIZE - (gaddr & ~PMD_MASK)); - zap_page_range_single(vma, vmaddr, size, NULL); - } - mmap_read_unlock(gmap->mm); -} -EXPORT_SYMBOL_GPL(gmap_discard); - static LIST_HEAD(gmap_notifier_list); static DEFINE_SPINLOCK(gmap_notifier_lock); @@ -853,8 +695,7 @@ static void gmap_call_notifier(struct gmap *gmap, unsigned long start, * * Note: Can also be called for shadow gmaps. */ -static inline unsigned long *gmap_table_walk(struct gmap *gmap, - unsigned long gaddr, int level) +unsigned long *gmap_table_walk(struct gmap *gmap, unsigned long gaddr, int level) { const int asce_type = gmap->asce & _ASCE_TYPE_MASK; unsigned long *table = gmap->table; @@ -905,6 +746,7 @@ static inline unsigned long *gmap_table_walk(struct gmap *gmap, } return table; } +EXPORT_SYMBOL(gmap_table_walk); /** * gmap_pte_op_walk - walk the gmap page table, get the page table lock @@ -1101,86 +943,40 @@ static int gmap_protect_pte(struct gmap *gmap, unsigned long gaddr, * @prot: indicates access rights: PROT_NONE, PROT_READ or PROT_WRITE * @bits: pgste notification bits to set * - * Returns 0 if successfully protected, -ENOMEM if out of memory and - * -EFAULT if gaddr is invalid (or mapping for shadows is missing). + * Returns: + * PAGE_SIZE if a small page was successfully protected; + * HPAGE_SIZE if a large page was successfully protected; + * -ENOMEM if out of memory; + * -EFAULT if gaddr is invalid (or mapping for shadows is missing); + * -EAGAIN if the guest mapping is missing and should be fixed by the caller. * - * Called with sg->mm->mmap_lock in read. + * Context: Called with sg->mm->mmap_lock in read. */ -static int gmap_protect_range(struct gmap *gmap, unsigned long gaddr, - unsigned long len, int prot, unsigned long bits) +int gmap_protect_one(struct gmap *gmap, unsigned long gaddr, int prot, unsigned long bits) { - unsigned long vmaddr, dist; pmd_t *pmdp; - int rc; + int rc = 0; BUG_ON(gmap_is_shadow(gmap)); - while (len) { - rc = -EAGAIN; - pmdp = gmap_pmd_op_walk(gmap, gaddr); - if (pmdp) { - if (!pmd_leaf(*pmdp)) { - rc = gmap_protect_pte(gmap, gaddr, pmdp, prot, - bits); - if (!rc) { - len -= PAGE_SIZE; - gaddr += PAGE_SIZE; - } - } else { - rc = gmap_protect_pmd(gmap, gaddr, pmdp, prot, - bits); - if (!rc) { - dist = HPAGE_SIZE - (gaddr & ~HPAGE_MASK); - len = len < dist ? 0 : len - dist; - gaddr = (gaddr & HPAGE_MASK) + HPAGE_SIZE; - } - } - gmap_pmd_op_end(gmap, pmdp); - } - if (rc) { - if (rc == -EINVAL) - return rc; - /* -EAGAIN, fixup of userspace mm and gmap */ - vmaddr = __gmap_translate(gmap, gaddr); - if (IS_ERR_VALUE(vmaddr)) - return vmaddr; - rc = gmap_pte_op_fixup(gmap, gaddr, vmaddr, prot); - if (rc) - return rc; - } - } - return 0; -} + pmdp = gmap_pmd_op_walk(gmap, gaddr); + if (!pmdp) + return -EAGAIN; -/** - * gmap_mprotect_notify - change access rights for a range of ptes and - * call the notifier if any pte changes again - * @gmap: pointer to guest mapping meta data structure - * @gaddr: virtual address in the guest address space - * @len: size of area - * @prot: indicates access rights: PROT_NONE, PROT_READ or PROT_WRITE - * - * Returns 0 if for each page in the given range a gmap mapping exists, - * the new access rights could be set and the notifier could be armed. - * If the gmap mapping is missing for one or more pages -EFAULT is - * returned. If no memory could be allocated -ENOMEM is returned. - * This function establishes missing page table entries. - */ -int gmap_mprotect_notify(struct gmap *gmap, unsigned long gaddr, - unsigned long len, int prot) -{ - int rc; + if (!pmd_leaf(*pmdp)) { + rc = gmap_protect_pte(gmap, gaddr, pmdp, prot, bits); + if (!rc) + rc = PAGE_SIZE; + } else { + rc = gmap_protect_pmd(gmap, gaddr, pmdp, prot, bits); + if (!rc) + rc = HPAGE_SIZE; + } + gmap_pmd_op_end(gmap, pmdp); - if ((gaddr & ~PAGE_MASK) || (len & ~PAGE_MASK) || gmap_is_shadow(gmap)) - return -EINVAL; - if (!MACHINE_HAS_ESOP && prot == PROT_READ) - return -EINVAL; - mmap_read_lock(gmap->mm); - rc = gmap_protect_range(gmap, gaddr, len, prot, GMAP_NOTIFY_MPROT); - mmap_read_unlock(gmap->mm); return rc; } -EXPORT_SYMBOL_GPL(gmap_mprotect_notify); +EXPORT_SYMBOL_GPL(gmap_protect_one); /** * gmap_read_table - get an unsigned long value from a guest page table using @@ -1414,7 +1210,6 @@ static void gmap_unshadow_pgt(struct gmap *sg, unsigned long raddr) __gmap_unshadow_pgt(sg, raddr, __va(pgt)); /* Free page table */ ptdesc = page_ptdesc(phys_to_page(pgt)); - list_del(&ptdesc->pt_list); page_table_free_pgste(ptdesc); } @@ -1442,7 +1237,6 @@ static void __gmap_unshadow_sgt(struct gmap *sg, unsigned long raddr, __gmap_unshadow_pgt(sg, raddr, __va(pgt)); /* Free page table */ ptdesc = page_ptdesc(phys_to_page(pgt)); - list_del(&ptdesc->pt_list); page_table_free_pgste(ptdesc); } } @@ -1472,7 +1266,6 @@ static void gmap_unshadow_sgt(struct gmap *sg, unsigned long raddr) __gmap_unshadow_sgt(sg, raddr, __va(sgt)); /* Free segment table */ page = phys_to_page(sgt); - list_del(&page->lru); __free_pages(page, CRST_ALLOC_ORDER); } @@ -1500,7 +1293,6 @@ static void __gmap_unshadow_r3t(struct gmap *sg, unsigned long raddr, __gmap_unshadow_sgt(sg, raddr, __va(sgt)); /* Free segment table */ page = phys_to_page(sgt); - list_del(&page->lru); __free_pages(page, CRST_ALLOC_ORDER); } } @@ -1530,7 +1322,6 @@ static void gmap_unshadow_r3t(struct gmap *sg, unsigned long raddr) __gmap_unshadow_r3t(sg, raddr, __va(r3t)); /* Free region 3 table */ page = phys_to_page(r3t); - list_del(&page->lru); __free_pages(page, CRST_ALLOC_ORDER); } @@ -1558,7 +1349,6 @@ static void __gmap_unshadow_r2t(struct gmap *sg, unsigned long raddr, __gmap_unshadow_r3t(sg, raddr, __va(r3t)); /* Free region 3 table */ page = phys_to_page(r3t); - list_del(&page->lru); __free_pages(page, CRST_ALLOC_ORDER); } } @@ -1588,7 +1378,6 @@ static void gmap_unshadow_r2t(struct gmap *sg, unsigned long raddr) __gmap_unshadow_r2t(sg, raddr, __va(r2t)); /* Free region 2 table */ page = phys_to_page(r2t); - list_del(&page->lru); __free_pages(page, CRST_ALLOC_ORDER); } @@ -1620,7 +1409,6 @@ static void __gmap_unshadow_r1t(struct gmap *sg, unsigned long raddr, r1t[i] = _REGION1_ENTRY_EMPTY; /* Free region 2 table */ page = phys_to_page(r2t); - list_del(&page->lru); __free_pages(page, CRST_ALLOC_ORDER); } } @@ -1631,7 +1419,7 @@ static void __gmap_unshadow_r1t(struct gmap *sg, unsigned long raddr, * * Called with sg->guest_table_lock */ -static void gmap_unshadow(struct gmap *sg) +void gmap_unshadow(struct gmap *sg) { unsigned long *table; @@ -1657,143 +1445,7 @@ static void gmap_unshadow(struct gmap *sg) break; } } - -/** - * gmap_find_shadow - find a specific asce in the list of shadow tables - * @parent: pointer to the parent gmap - * @asce: ASCE for which the shadow table is created - * @edat_level: edat level to be used for the shadow translation - * - * Returns the pointer to a gmap if a shadow table with the given asce is - * already available, ERR_PTR(-EAGAIN) if another one is just being created, - * otherwise NULL - */ -static struct gmap *gmap_find_shadow(struct gmap *parent, unsigned long asce, - int edat_level) -{ - struct gmap *sg; - - list_for_each_entry(sg, &parent->children, list) { - if (sg->orig_asce != asce || sg->edat_level != edat_level || - sg->removed) - continue; - if (!sg->initialized) - return ERR_PTR(-EAGAIN); - refcount_inc(&sg->ref_count); - return sg; - } - return NULL; -} - -/** - * gmap_shadow_valid - check if a shadow guest address space matches the - * given properties and is still valid - * @sg: pointer to the shadow guest address space structure - * @asce: ASCE for which the shadow table is requested - * @edat_level: edat level to be used for the shadow translation - * - * Returns 1 if the gmap shadow is still valid and matches the given - * properties, the caller can continue using it. Returns 0 otherwise, the - * caller has to request a new shadow gmap in this case. - * - */ -int gmap_shadow_valid(struct gmap *sg, unsigned long asce, int edat_level) -{ - if (sg->removed) - return 0; - return sg->orig_asce == asce && sg->edat_level == edat_level; -} -EXPORT_SYMBOL_GPL(gmap_shadow_valid); - -/** - * gmap_shadow - create/find a shadow guest address space - * @parent: pointer to the parent gmap - * @asce: ASCE for which the shadow table is created - * @edat_level: edat level to be used for the shadow translation - * - * The pages of the top level page table referred by the asce parameter - * will be set to read-only and marked in the PGSTEs of the kvm process. - * The shadow table will be removed automatically on any change to the - * PTE mapping for the source table. - * - * Returns a guest address space structure, ERR_PTR(-ENOMEM) if out of memory, - * ERR_PTR(-EAGAIN) if the caller has to retry and ERR_PTR(-EFAULT) if the - * parent gmap table could not be protected. - */ -struct gmap *gmap_shadow(struct gmap *parent, unsigned long asce, - int edat_level) -{ - struct gmap *sg, *new; - unsigned long limit; - int rc; - - BUG_ON(parent->mm->context.allow_gmap_hpage_1m); - BUG_ON(gmap_is_shadow(parent)); - spin_lock(&parent->shadow_lock); - sg = gmap_find_shadow(parent, asce, edat_level); - spin_unlock(&parent->shadow_lock); - if (sg) - return sg; - /* Create a new shadow gmap */ - limit = -1UL >> (33 - (((asce & _ASCE_TYPE_MASK) >> 2) * 11)); - if (asce & _ASCE_REAL_SPACE) - limit = -1UL; - new = gmap_alloc(limit); - if (!new) - return ERR_PTR(-ENOMEM); - new->mm = parent->mm; - new->parent = gmap_get(parent); - new->private = parent->private; - new->orig_asce = asce; - new->edat_level = edat_level; - new->initialized = false; - spin_lock(&parent->shadow_lock); - /* Recheck if another CPU created the same shadow */ - sg = gmap_find_shadow(parent, asce, edat_level); - if (sg) { - spin_unlock(&parent->shadow_lock); - gmap_free(new); - return sg; - } - if (asce & _ASCE_REAL_SPACE) { - /* only allow one real-space gmap shadow */ - list_for_each_entry(sg, &parent->children, list) { - if (sg->orig_asce & _ASCE_REAL_SPACE) { - spin_lock(&sg->guest_table_lock); - gmap_unshadow(sg); - spin_unlock(&sg->guest_table_lock); - list_del(&sg->list); - gmap_put(sg); - break; - } - } - } - refcount_set(&new->ref_count, 2); - list_add(&new->list, &parent->children); - if (asce & _ASCE_REAL_SPACE) { - /* nothing to protect, return right away */ - new->initialized = true; - spin_unlock(&parent->shadow_lock); - return new; - } - spin_unlock(&parent->shadow_lock); - /* protect after insertion, so it will get properly invalidated */ - mmap_read_lock(parent->mm); - rc = gmap_protect_range(parent, asce & _ASCE_ORIGIN, - ((asce & _ASCE_TABLE_LENGTH) + 1) * PAGE_SIZE, - PROT_READ, GMAP_NOTIFY_SHADOW); - mmap_read_unlock(parent->mm); - spin_lock(&parent->shadow_lock); - new->initialized = true; - if (rc) { - list_del(&new->list); - gmap_free(new); - new = ERR_PTR(rc); - } - spin_unlock(&parent->shadow_lock); - return new; -} -EXPORT_SYMBOL_GPL(gmap_shadow); +EXPORT_SYMBOL(gmap_unshadow); /** * gmap_shadow_r2t - create an empty shadow region 2 table @@ -1827,9 +1479,6 @@ int gmap_shadow_r2t(struct gmap *sg, unsigned long saddr, unsigned long r2t, page = gmap_alloc_crst(); if (!page) return -ENOMEM; - page->index = r2t & _REGION_ENTRY_ORIGIN; - if (fake) - page->index |= GMAP_SHADOW_FAKE_TABLE; s_r2t = page_to_phys(page); /* Install shadow region second table */ spin_lock(&sg->guest_table_lock); @@ -1851,7 +1500,6 @@ int gmap_shadow_r2t(struct gmap *sg, unsigned long saddr, unsigned long r2t, _REGION_ENTRY_TYPE_R1 | _REGION_ENTRY_INVALID; if (sg->edat_level >= 1) *table |= (r2t & _REGION_ENTRY_PROTECT); - list_add(&page->lru, &sg->crst_list); if (fake) { /* nothing to protect for fake tables */ *table &= ~_REGION_ENTRY_INVALID; @@ -1911,9 +1559,6 @@ int gmap_shadow_r3t(struct gmap *sg, unsigned long saddr, unsigned long r3t, page = gmap_alloc_crst(); if (!page) return -ENOMEM; - page->index = r3t & _REGION_ENTRY_ORIGIN; - if (fake) - page->index |= GMAP_SHADOW_FAKE_TABLE; s_r3t = page_to_phys(page); /* Install shadow region second table */ spin_lock(&sg->guest_table_lock); @@ -1935,7 +1580,6 @@ int gmap_shadow_r3t(struct gmap *sg, unsigned long saddr, unsigned long r3t, _REGION_ENTRY_TYPE_R2 | _REGION_ENTRY_INVALID; if (sg->edat_level >= 1) *table |= (r3t & _REGION_ENTRY_PROTECT); - list_add(&page->lru, &sg->crst_list); if (fake) { /* nothing to protect for fake tables */ *table &= ~_REGION_ENTRY_INVALID; @@ -1995,9 +1639,6 @@ int gmap_shadow_sgt(struct gmap *sg, unsigned long saddr, unsigned long sgt, page = gmap_alloc_crst(); if (!page) return -ENOMEM; - page->index = sgt & _REGION_ENTRY_ORIGIN; - if (fake) - page->index |= GMAP_SHADOW_FAKE_TABLE; s_sgt = page_to_phys(page); /* Install shadow region second table */ spin_lock(&sg->guest_table_lock); @@ -2019,7 +1660,6 @@ int gmap_shadow_sgt(struct gmap *sg, unsigned long saddr, unsigned long sgt, _REGION_ENTRY_TYPE_R3 | _REGION_ENTRY_INVALID; if (sg->edat_level >= 1) *table |= sgt & _REGION_ENTRY_PROTECT; - list_add(&page->lru, &sg->crst_list); if (fake) { /* nothing to protect for fake tables */ *table &= ~_REGION_ENTRY_INVALID; @@ -2052,45 +1692,22 @@ out_free: } EXPORT_SYMBOL_GPL(gmap_shadow_sgt); -/** - * gmap_shadow_pgt_lookup - find a shadow page table - * @sg: pointer to the shadow guest address space structure - * @saddr: the address in the shadow aguest address space - * @pgt: parent gmap address of the page table to get shadowed - * @dat_protection: if the pgtable is marked as protected by dat - * @fake: pgt references contiguous guest memory block, not a pgtable - * - * Returns 0 if the shadow page table was found and -EAGAIN if the page - * table was not found. - * - * Called with sg->mm->mmap_lock in read. - */ -int gmap_shadow_pgt_lookup(struct gmap *sg, unsigned long saddr, - unsigned long *pgt, int *dat_protection, - int *fake) +static void gmap_pgste_set_pgt_addr(struct ptdesc *ptdesc, unsigned long pgt_addr) { - unsigned long *table; - struct page *page; - int rc; + unsigned long *pgstes = page_to_virt(ptdesc_page(ptdesc)); - BUG_ON(!gmap_is_shadow(sg)); - spin_lock(&sg->guest_table_lock); - table = gmap_table_walk(sg, saddr, 1); /* get segment pointer */ - if (table && !(*table & _SEGMENT_ENTRY_INVALID)) { - /* Shadow page tables are full pages (pte+pgste) */ - page = pfn_to_page(*table >> PAGE_SHIFT); - *pgt = page->index & ~GMAP_SHADOW_FAKE_TABLE; - *dat_protection = !!(*table & _SEGMENT_ENTRY_PROTECT); - *fake = !!(page->index & GMAP_SHADOW_FAKE_TABLE); - rc = 0; - } else { - rc = -EAGAIN; - } - spin_unlock(&sg->guest_table_lock); - return rc; + pgstes += _PAGE_ENTRIES; + + pgstes[0] &= ~PGSTE_ST2_MASK; + pgstes[1] &= ~PGSTE_ST2_MASK; + pgstes[2] &= ~PGSTE_ST2_MASK; + pgstes[3] &= ~PGSTE_ST2_MASK; + pgstes[0] |= (pgt_addr >> 16) & PGSTE_ST2_MASK; + pgstes[1] |= pgt_addr & PGSTE_ST2_MASK; + pgstes[2] |= (pgt_addr << 16) & PGSTE_ST2_MASK; + pgstes[3] |= (pgt_addr << 32) & PGSTE_ST2_MASK; } -EXPORT_SYMBOL_GPL(gmap_shadow_pgt_lookup); /** * gmap_shadow_pgt - instantiate a shadow page table @@ -2119,9 +1736,10 @@ int gmap_shadow_pgt(struct gmap *sg, unsigned long saddr, unsigned long pgt, ptdesc = page_table_alloc_pgste(sg->mm); if (!ptdesc) return -ENOMEM; - ptdesc->pt_index = pgt & _SEGMENT_ENTRY_ORIGIN; + origin = pgt & _SEGMENT_ENTRY_ORIGIN; if (fake) - ptdesc->pt_index |= GMAP_SHADOW_FAKE_TABLE; + origin |= GMAP_SHADOW_FAKE_TABLE; + gmap_pgste_set_pgt_addr(ptdesc, origin); s_pgt = page_to_phys(ptdesc_page(ptdesc)); /* Install shadow page table */ spin_lock(&sg->guest_table_lock); @@ -2140,7 +1758,6 @@ int gmap_shadow_pgt(struct gmap *sg, unsigned long saddr, unsigned long pgt, /* mark as invalid as long as the parent table is not protected */ *table = (unsigned long) s_pgt | _SEGMENT_ENTRY | (pgt & _SEGMENT_ENTRY_PROTECT) | _SEGMENT_ENTRY_INVALID; - list_add(&ptdesc->pt_list, &sg->pt_list); if (fake) { /* nothing to protect for fake tables */ *table &= ~_SEGMENT_ENTRY_INVALID; @@ -2318,7 +1935,6 @@ void ptep_notify(struct mm_struct *mm, unsigned long vmaddr, pte_t *pte, unsigned long bits) { unsigned long offset, gaddr = 0; - unsigned long *table; struct gmap *gmap, *sg, *next; offset = ((unsigned long) pte) & (255 * sizeof(pte_t)); @@ -2326,12 +1942,9 @@ void ptep_notify(struct mm_struct *mm, unsigned long vmaddr, rcu_read_lock(); list_for_each_entry_rcu(gmap, &mm->context.gmap_list, list) { spin_lock(&gmap->guest_table_lock); - table = radix_tree_lookup(&gmap->host_to_guest, - vmaddr >> PMD_SHIFT); - if (table) - gaddr = __gmap_segment_gaddr(table) + offset; + gaddr = host_to_guest_lookup(gmap, vmaddr) + offset; spin_unlock(&gmap->guest_table_lock); - if (!table) + if (!IS_GADDR_VALID(gaddr)) continue; if (!list_empty(&gmap->children) && (bits & PGSTE_VSIE_BIT)) { @@ -2371,10 +1984,10 @@ static void gmap_pmdp_xchg(struct gmap *gmap, pmd_t *pmdp, pmd_t new, gaddr &= HPAGE_MASK; pmdp_notify_gmap(gmap, pmdp, gaddr); new = clear_pmd_bit(new, __pgprot(_SEGMENT_ENTRY_GMAP_IN)); - if (MACHINE_HAS_TLB_GUEST) + if (machine_has_tlb_guest()) __pmdp_idte(gaddr, (pmd_t *)pmdp, IDTE_GUEST_ASCE, gmap->asce, IDTE_GLOBAL); - else if (MACHINE_HAS_IDTE) + else if (cpu_has_idte()) __pmdp_idte(gaddr, (pmd_t *)pmdp, 0, 0, IDTE_GLOBAL); else __pmdp_csp(pmdp); @@ -2391,10 +2004,8 @@ static void gmap_pmdp_clear(struct mm_struct *mm, unsigned long vmaddr, rcu_read_lock(); list_for_each_entry_rcu(gmap, &mm->context.gmap_list, list) { spin_lock(&gmap->guest_table_lock); - pmdp = (pmd_t *)radix_tree_delete(&gmap->host_to_guest, - vmaddr >> PMD_SHIFT); + pmdp = host_to_guest_pmd_delete(gmap, vmaddr, &gaddr); if (pmdp) { - gaddr = __gmap_segment_gaddr((unsigned long *)pmdp); pmdp_notify_gmap(gmap, pmdp, gaddr); WARN_ON(pmd_val(*pmdp) & ~(_SEGMENT_ENTRY_HARDWARE_BITS_LARGE | _SEGMENT_ENTRY_GMAP_UC | @@ -2438,28 +2049,25 @@ EXPORT_SYMBOL_GPL(gmap_pmdp_csp); */ void gmap_pmdp_idte_local(struct mm_struct *mm, unsigned long vmaddr) { - unsigned long *entry, gaddr; + unsigned long gaddr; struct gmap *gmap; pmd_t *pmdp; rcu_read_lock(); list_for_each_entry_rcu(gmap, &mm->context.gmap_list, list) { spin_lock(&gmap->guest_table_lock); - entry = radix_tree_delete(&gmap->host_to_guest, - vmaddr >> PMD_SHIFT); - if (entry) { - pmdp = (pmd_t *)entry; - gaddr = __gmap_segment_gaddr(entry); + pmdp = host_to_guest_pmd_delete(gmap, vmaddr, &gaddr); + if (pmdp) { pmdp_notify_gmap(gmap, pmdp, gaddr); - WARN_ON(*entry & ~(_SEGMENT_ENTRY_HARDWARE_BITS_LARGE | - _SEGMENT_ENTRY_GMAP_UC | - _SEGMENT_ENTRY)); - if (MACHINE_HAS_TLB_GUEST) + WARN_ON(pmd_val(*pmdp) & ~(_SEGMENT_ENTRY_HARDWARE_BITS_LARGE | + _SEGMENT_ENTRY_GMAP_UC | + _SEGMENT_ENTRY)); + if (machine_has_tlb_guest()) __pmdp_idte(gaddr, pmdp, IDTE_GUEST_ASCE, gmap->asce, IDTE_LOCAL); - else if (MACHINE_HAS_IDTE) + else if (cpu_has_idte()) __pmdp_idte(gaddr, pmdp, 0, 0, IDTE_LOCAL); - *entry = _SEGMENT_ENTRY_EMPTY; + *pmdp = __pmd(_SEGMENT_ENTRY_EMPTY); } spin_unlock(&gmap->guest_table_lock); } @@ -2474,30 +2082,27 @@ EXPORT_SYMBOL_GPL(gmap_pmdp_idte_local); */ void gmap_pmdp_idte_global(struct mm_struct *mm, unsigned long vmaddr) { - unsigned long *entry, gaddr; + unsigned long gaddr; struct gmap *gmap; pmd_t *pmdp; rcu_read_lock(); list_for_each_entry_rcu(gmap, &mm->context.gmap_list, list) { spin_lock(&gmap->guest_table_lock); - entry = radix_tree_delete(&gmap->host_to_guest, - vmaddr >> PMD_SHIFT); - if (entry) { - pmdp = (pmd_t *)entry; - gaddr = __gmap_segment_gaddr(entry); + pmdp = host_to_guest_pmd_delete(gmap, vmaddr, &gaddr); + if (pmdp) { pmdp_notify_gmap(gmap, pmdp, gaddr); - WARN_ON(*entry & ~(_SEGMENT_ENTRY_HARDWARE_BITS_LARGE | - _SEGMENT_ENTRY_GMAP_UC | - _SEGMENT_ENTRY)); - if (MACHINE_HAS_TLB_GUEST) + WARN_ON(pmd_val(*pmdp) & ~(_SEGMENT_ENTRY_HARDWARE_BITS_LARGE | + _SEGMENT_ENTRY_GMAP_UC | + _SEGMENT_ENTRY)); + if (machine_has_tlb_guest()) __pmdp_idte(gaddr, pmdp, IDTE_GUEST_ASCE, gmap->asce, IDTE_GLOBAL); - else if (MACHINE_HAS_IDTE) + else if (cpu_has_idte()) __pmdp_idte(gaddr, pmdp, 0, 0, IDTE_GLOBAL); else __pmdp_csp(pmdp); - *entry = _SEGMENT_ENTRY_EMPTY; + *pmdp = __pmd(_SEGMENT_ENTRY_EMPTY); } spin_unlock(&gmap->guest_table_lock); } @@ -2612,9 +2217,6 @@ int s390_enable_sie(void) /* Do we have pgstes? if yes, we are done */ if (mm_has_pgste(mm)) return 0; - /* Fail if the page tables are 2K */ - if (!mm_alloc_pgste(mm)) - return -EINVAL; mmap_write_lock(mm); mm->context.has_pgste = 1; /* split thp mappings and disable thp for future mappings */ @@ -2624,138 +2226,6 @@ int s390_enable_sie(void) } EXPORT_SYMBOL_GPL(s390_enable_sie); -static int find_zeropage_pte_entry(pte_t *pte, unsigned long addr, - unsigned long end, struct mm_walk *walk) -{ - unsigned long *found_addr = walk->private; - - /* Return 1 of the page is a zeropage. */ - if (is_zero_pfn(pte_pfn(*pte))) { - /* - * Shared zeropage in e.g., a FS DAX mapping? We cannot do the - * right thing and likely don't care: FAULT_FLAG_UNSHARE - * currently only works in COW mappings, which is also where - * mm_forbids_zeropage() is checked. - */ - if (!is_cow_mapping(walk->vma->vm_flags)) - return -EFAULT; - - *found_addr = addr; - return 1; - } - return 0; -} - -static const struct mm_walk_ops find_zeropage_ops = { - .pte_entry = find_zeropage_pte_entry, - .walk_lock = PGWALK_WRLOCK, -}; - -/* - * Unshare all shared zeropages, replacing them by anonymous pages. Note that - * we cannot simply zap all shared zeropages, because this could later - * trigger unexpected userfaultfd missing events. - * - * This must be called after mm->context.allow_cow_sharing was - * set to 0, to avoid future mappings of shared zeropages. - * - * mm contracts with s390, that even if mm were to remove a page table, - * and racing with walk_page_range_vma() calling pte_offset_map_lock() - * would fail, it will never insert a page table containing empty zero - * pages once mm_forbids_zeropage(mm) i.e. - * mm->context.allow_cow_sharing is set to 0. - */ -static int __s390_unshare_zeropages(struct mm_struct *mm) -{ - struct vm_area_struct *vma; - VMA_ITERATOR(vmi, mm, 0); - unsigned long addr; - vm_fault_t fault; - int rc; - - for_each_vma(vmi, vma) { - /* - * We could only look at COW mappings, but it's more future - * proof to catch unexpected zeropages in other mappings and - * fail. - */ - if ((vma->vm_flags & VM_PFNMAP) || is_vm_hugetlb_page(vma)) - continue; - addr = vma->vm_start; - -retry: - rc = walk_page_range_vma(vma, addr, vma->vm_end, - &find_zeropage_ops, &addr); - if (rc < 0) - return rc; - else if (!rc) - continue; - - /* addr was updated by find_zeropage_pte_entry() */ - fault = handle_mm_fault(vma, addr, - FAULT_FLAG_UNSHARE | FAULT_FLAG_REMOTE, - NULL); - if (fault & VM_FAULT_OOM) - return -ENOMEM; - /* - * See break_ksm(): even after handle_mm_fault() returned 0, we - * must start the lookup from the current address, because - * handle_mm_fault() may back out if there's any difficulty. - * - * VM_FAULT_SIGBUS and VM_FAULT_SIGSEGV are unexpected but - * maybe they could trigger in the future on concurrent - * truncation. In that case, the shared zeropage would be gone - * and we can simply retry and make progress. - */ - cond_resched(); - goto retry; - } - - return 0; -} - -static int __s390_disable_cow_sharing(struct mm_struct *mm) -{ - int rc; - - if (!mm->context.allow_cow_sharing) - return 0; - - mm->context.allow_cow_sharing = 0; - - /* Replace all shared zeropages by anonymous pages. */ - rc = __s390_unshare_zeropages(mm); - /* - * Make sure to disable KSM (if enabled for the whole process or - * individual VMAs). Note that nothing currently hinders user space - * from re-enabling it. - */ - if (!rc) - rc = ksm_disable(mm); - if (rc) - mm->context.allow_cow_sharing = 1; - return rc; -} - -/* - * Disable most COW-sharing of memory pages for the whole process: - * (1) Disable KSM and unmerge/unshare any KSM pages. - * (2) Disallow shared zeropages and unshare any zerpages that are mapped. - * - * Not that we currently don't bother with COW-shared pages that are shared - * with parent/child processes due to fork(). - */ -int s390_disable_cow_sharing(void) -{ - int rc; - - mmap_write_lock(current->mm); - rc = __s390_disable_cow_sharing(current->mm); - mmap_write_unlock(current->mm); - return rc; -} -EXPORT_SYMBOL_GPL(s390_disable_cow_sharing); - /* * Enable storage key handling from now on and initialize the storage * keys with the default key. @@ -2823,7 +2293,7 @@ int s390_enable_skey(void) goto out_up; mm->context.uses_skeys = 1; - rc = __s390_disable_cow_sharing(mm); + rc = gmap_helper_disable_cow_sharing(); if (rc) { mm->context.uses_skeys = 0; goto out_up; @@ -2943,49 +2413,6 @@ int __s390_uv_destroy_range(struct mm_struct *mm, unsigned long start, EXPORT_SYMBOL_GPL(__s390_uv_destroy_range); /** - * s390_unlist_old_asce - Remove the topmost level of page tables from the - * list of page tables of the gmap. - * @gmap: the gmap whose table is to be removed - * - * On s390x, KVM keeps a list of all pages containing the page tables of the - * gmap (the CRST list). This list is used at tear down time to free all - * pages that are now not needed anymore. - * - * This function removes the topmost page of the tree (the one pointed to by - * the ASCE) from the CRST list. - * - * This means that it will not be freed when the VM is torn down, and needs - * to be handled separately by the caller, unless a leak is actually - * intended. Notice that this function will only remove the page from the - * list, the page will still be used as a top level page table (and ASCE). - */ -void s390_unlist_old_asce(struct gmap *gmap) -{ - struct page *old; - - old = virt_to_page(gmap->table); - spin_lock(&gmap->guest_table_lock); - list_del(&old->lru); - /* - * Sometimes the topmost page might need to be "removed" multiple - * times, for example if the VM is rebooted into secure mode several - * times concurrently, or if s390_replace_asce fails after calling - * s390_remove_old_asce and is attempted again later. In that case - * the old asce has been removed from the list, and therefore it - * will not be freed when the VM terminates, but the ASCE is still - * in use and still pointed to. - * A subsequent call to replace_asce will follow the pointer and try - * to remove the same page from the list again. - * Therefore it's necessary that the page of the ASCE has valid - * pointers, so list_del can work (and do nothing) without - * dereferencing stale or invalid pointers. - */ - INIT_LIST_HEAD(&old->lru); - spin_unlock(&gmap->guest_table_lock); -} -EXPORT_SYMBOL_GPL(s390_unlist_old_asce); - -/** * s390_replace_asce - Try to replace the current ASCE of a gmap with a copy * @gmap: the gmap whose ASCE needs to be replaced * @@ -3004,8 +2431,6 @@ int s390_replace_asce(struct gmap *gmap) struct page *page; void *table; - s390_unlist_old_asce(gmap); - /* Replacing segment type ASCEs would cause serious issues */ if ((gmap->asce & _ASCE_TYPE_MASK) == _ASCE_TYPE_SEGMENT) return -EINVAL; @@ -3013,19 +2438,9 @@ int s390_replace_asce(struct gmap *gmap) page = gmap_alloc_crst(); if (!page) return -ENOMEM; - page->index = 0; table = page_to_virt(page); memcpy(table, gmap->table, 1UL << (CRST_ALLOC_ORDER + PAGE_SHIFT)); - /* - * The caller has to deal with the old ASCE, but here we make sure - * the new one is properly added to the CRST list, so that - * it will be freed when the VM is torn down. - */ - spin_lock(&gmap->guest_table_lock); - list_add(&page->lru, &gmap->crst_list); - spin_unlock(&gmap->guest_table_lock); - /* Set new table origin while preserving existing ASCE control bits */ asce = (gmap->asce & ~_ASCE_ORIGIN) | __pa(table); WRITE_ONCE(gmap->asce, asce); diff --git a/arch/s390/mm/gmap_helpers.c b/arch/s390/mm/gmap_helpers.c new file mode 100644 index 000000000000..a45d417ad951 --- /dev/null +++ b/arch/s390/mm/gmap_helpers.c @@ -0,0 +1,221 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Helper functions for KVM guest address space mapping code + * + * Copyright IBM Corp. 2007, 2025 + */ +#include <linux/mm_types.h> +#include <linux/mmap_lock.h> +#include <linux/mm.h> +#include <linux/hugetlb.h> +#include <linux/swap.h> +#include <linux/swapops.h> +#include <linux/pagewalk.h> +#include <linux/ksm.h> +#include <asm/gmap_helpers.h> + +/** + * ptep_zap_swap_entry() - discard a swap entry. + * @mm: the mm + * @entry: the swap entry that needs to be zapped + * + * Discards the given swap entry. If the swap entry was an actual swap + * entry (and not a migration entry, for example), the actual swapped + * page is also discarded from swap. + */ +static void ptep_zap_swap_entry(struct mm_struct *mm, swp_entry_t entry) +{ + if (!non_swap_entry(entry)) + dec_mm_counter(mm, MM_SWAPENTS); + else if (is_migration_entry(entry)) + dec_mm_counter(mm, mm_counter(pfn_swap_entry_folio(entry))); + free_swap_and_cache(entry); +} + +/** + * gmap_helper_zap_one_page() - discard a page if it was swapped. + * @mm: the mm + * @vmaddr: the userspace virtual address that needs to be discarded + * + * If the given address maps to a swap entry, discard it. + * + * Context: needs to be called while holding the mmap lock. + */ +void gmap_helper_zap_one_page(struct mm_struct *mm, unsigned long vmaddr) +{ + struct vm_area_struct *vma; + spinlock_t *ptl; + pte_t *ptep; + + mmap_assert_locked(mm); + + /* Find the vm address for the guest address */ + vma = vma_lookup(mm, vmaddr); + if (!vma || is_vm_hugetlb_page(vma)) + return; + + /* Get pointer to the page table entry */ + ptep = get_locked_pte(mm, vmaddr, &ptl); + if (unlikely(!ptep)) + return; + if (pte_swap(*ptep)) + ptep_zap_swap_entry(mm, pte_to_swp_entry(*ptep)); + pte_unmap_unlock(ptep, ptl); +} +EXPORT_SYMBOL_GPL(gmap_helper_zap_one_page); + +/** + * gmap_helper_discard() - discard user pages in the given range + * @mm: the mm + * @vmaddr: starting userspace address + * @end: end address (first address outside the range) + * + * All userpace pages in the range [@vamddr, @end) are discarded and unmapped. + * + * Context: needs to be called while holding the mmap lock. + */ +void gmap_helper_discard(struct mm_struct *mm, unsigned long vmaddr, unsigned long end) +{ + struct vm_area_struct *vma; + + mmap_assert_locked(mm); + + while (vmaddr < end) { + vma = find_vma_intersection(mm, vmaddr, end); + if (!vma) + return; + if (!is_vm_hugetlb_page(vma)) + zap_page_range_single(vma, vmaddr, min(end, vma->vm_end) - vmaddr, NULL); + vmaddr = vma->vm_end; + } +} +EXPORT_SYMBOL_GPL(gmap_helper_discard); + +static int find_zeropage_pte_entry(pte_t *pte, unsigned long addr, + unsigned long end, struct mm_walk *walk) +{ + unsigned long *found_addr = walk->private; + + /* Return 1 of the page is a zeropage. */ + if (is_zero_pfn(pte_pfn(*pte))) { + /* + * Shared zeropage in e.g., a FS DAX mapping? We cannot do the + * right thing and likely don't care: FAULT_FLAG_UNSHARE + * currently only works in COW mappings, which is also where + * mm_forbids_zeropage() is checked. + */ + if (!is_cow_mapping(walk->vma->vm_flags)) + return -EFAULT; + + *found_addr = addr; + return 1; + } + return 0; +} + +static const struct mm_walk_ops find_zeropage_ops = { + .pte_entry = find_zeropage_pte_entry, + .walk_lock = PGWALK_WRLOCK, +}; + +/** __gmap_helper_unshare_zeropages() - unshare all shared zeropages + * @mm: the mm whose zero pages are to be unshared + * + * Unshare all shared zeropages, replacing them by anonymous pages. Note that + * we cannot simply zap all shared zeropages, because this could later + * trigger unexpected userfaultfd missing events. + * + * This must be called after mm->context.allow_cow_sharing was + * set to 0, to avoid future mappings of shared zeropages. + * + * mm contracts with s390, that even if mm were to remove a page table, + * and racing with walk_page_range_vma() calling pte_offset_map_lock() + * would fail, it will never insert a page table containing empty zero + * pages once mm_forbids_zeropage(mm) i.e. + * mm->context.allow_cow_sharing is set to 0. + */ +static int __gmap_helper_unshare_zeropages(struct mm_struct *mm) +{ + struct vm_area_struct *vma; + VMA_ITERATOR(vmi, mm, 0); + unsigned long addr; + vm_fault_t fault; + int rc; + + for_each_vma(vmi, vma) { + /* + * We could only look at COW mappings, but it's more future + * proof to catch unexpected zeropages in other mappings and + * fail. + */ + if ((vma->vm_flags & VM_PFNMAP) || is_vm_hugetlb_page(vma)) + continue; + addr = vma->vm_start; + +retry: + rc = walk_page_range_vma(vma, addr, vma->vm_end, + &find_zeropage_ops, &addr); + if (rc < 0) + return rc; + else if (!rc) + continue; + + /* addr was updated by find_zeropage_pte_entry() */ + fault = handle_mm_fault(vma, addr, + FAULT_FLAG_UNSHARE | FAULT_FLAG_REMOTE, + NULL); + if (fault & VM_FAULT_OOM) + return -ENOMEM; + /* + * See break_ksm(): even after handle_mm_fault() returned 0, we + * must start the lookup from the current address, because + * handle_mm_fault() may back out if there's any difficulty. + * + * VM_FAULT_SIGBUS and VM_FAULT_SIGSEGV are unexpected but + * maybe they could trigger in the future on concurrent + * truncation. In that case, the shared zeropage would be gone + * and we can simply retry and make progress. + */ + cond_resched(); + goto retry; + } + + return 0; +} + +/** + * gmap_helper_disable_cow_sharing() - disable all COW sharing + * + * Disable most COW-sharing of memory pages for the whole process: + * (1) Disable KSM and unmerge/unshare any KSM pages. + * (2) Disallow shared zeropages and unshare any zerpages that are mapped. + * + * Not that we currently don't bother with COW-shared pages that are shared + * with parent/child processes due to fork(). + */ +int gmap_helper_disable_cow_sharing(void) +{ + struct mm_struct *mm = current->mm; + int rc; + + mmap_assert_write_locked(mm); + + if (!mm->context.allow_cow_sharing) + return 0; + + mm->context.allow_cow_sharing = 0; + + /* Replace all shared zeropages by anonymous pages. */ + rc = __gmap_helper_unshare_zeropages(mm); + /* + * Make sure to disable KSM (if enabled for the whole process or + * individual VMAs). Note that nothing currently hinders user space + * from re-enabling it. + */ + if (!rc) + rc = ksm_disable(mm); + if (rc) + mm->context.allow_cow_sharing = 1; + return rc; +} +EXPORT_SYMBOL_GPL(gmap_helper_disable_cow_sharing); diff --git a/arch/s390/mm/hugetlbpage.c b/arch/s390/mm/hugetlbpage.c index d9ce199953de..e88c02c9e642 100644 --- a/arch/s390/mm/hugetlbpage.c +++ b/arch/s390/mm/hugetlbpage.c @@ -9,12 +9,13 @@ #define KMSG_COMPONENT "hugetlb" #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt -#include <asm/pgalloc.h> +#include <linux/cpufeature.h> #include <linux/mm.h> #include <linux/hugetlb.h> #include <linux/mman.h> #include <linux/sched/mm.h> #include <linux/security.h> +#include <asm/pgalloc.h> /* * If the bit selected by single-bit bitmask "a" is set within "x", move @@ -188,8 +189,8 @@ pte_t huge_ptep_get(struct mm_struct *mm, unsigned long addr, pte_t *ptep) return __rste_to_pte(pte_val(*ptep)); } -pte_t huge_ptep_get_and_clear(struct mm_struct *mm, - unsigned long addr, pte_t *ptep) +pte_t __huge_ptep_get_and_clear(struct mm_struct *mm, + unsigned long addr, pte_t *ptep) { pte_t pte = huge_ptep_get(mm, addr, ptep); pmd_t *pmdp = (pmd_t *) ptep; @@ -248,9 +249,9 @@ pte_t *huge_pte_offset(struct mm_struct *mm, bool __init arch_hugetlb_valid_size(unsigned long size) { - if (MACHINE_HAS_EDAT1 && size == PMD_SIZE) + if (cpu_has_edat1() && size == PMD_SIZE) return true; - else if (MACHINE_HAS_EDAT2 && size == PUD_SIZE) + else if (cpu_has_edat2() && size == PUD_SIZE) return true; else return false; diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c index 7a96623a9d2e..074bf4fb4ce2 100644 --- a/arch/s390/mm/init.c +++ b/arch/s390/mm/init.c @@ -8,6 +8,7 @@ * Copyright (C) 1995 Linus Torvalds */ +#include <linux/cpufeature.h> #include <linux/signal.h> #include <linux/sched.h> #include <linux/kernel.h> @@ -39,7 +40,6 @@ #include <asm/kfence.h> #include <asm/dma.h> #include <asm/abs_lowcore.h> -#include <asm/tlb.h> #include <asm/tlbflush.h> #include <asm/sections.h> #include <asm/sclp.h> @@ -56,6 +56,15 @@ pgd_t invalid_pg_dir[PTRS_PER_PGD] __section(".bss..invalid_pg_dir"); struct ctlreg __bootdata_preserved(s390_invalid_asce); +unsigned long __bootdata_preserved(page_noexec_mask); +EXPORT_SYMBOL(page_noexec_mask); + +unsigned long __bootdata_preserved(segment_noexec_mask); +EXPORT_SYMBOL(segment_noexec_mask); + +unsigned long __bootdata_preserved(region_noexec_mask); +EXPORT_SYMBOL(region_noexec_mask); + unsigned long empty_zero_page, zero_page_mask; EXPORT_SYMBOL(empty_zero_page); EXPORT_SYMBOL(zero_page_mask); @@ -64,8 +73,6 @@ static void __init setup_zero_pages(void) { unsigned long total_pages = memblock_estimated_nr_free_pages(); unsigned int order; - struct page *page; - int i; /* Latest machines require a mapping granularity of 512KB */ order = 7; @@ -74,16 +81,7 @@ static void __init setup_zero_pages(void) while (order > 2 && (total_pages >> 10) < (1UL << order)) order--; - empty_zero_page = __get_free_pages(GFP_KERNEL | __GFP_ZERO, order); - if (!empty_zero_page) - panic("Out of memory in setup_zero_pages"); - - page = virt_to_page((void *) empty_zero_page); - split_page(page, order); - for (i = 1 << order; i > 0; i--) { - mark_page_reserved(page); - page++; - } + empty_zero_page = (unsigned long)memblock_alloc_or_panic(PAGE_SIZE << order, PAGE_SIZE); zero_page_mask = ((PAGE_SIZE << order) - 1) & PAGE_MASK; } @@ -108,7 +106,7 @@ void mark_rodata_ro(void) { unsigned long size = __end_ro_after_init - __start_ro_after_init; - if (MACHINE_HAS_NX) + if (cpu_has_nx()) system_ctl_set_bit(0, CR0_INSTRUCTION_EXEC_PROTECTION_BIT); __set_memory_ro(__start_ro_after_init, __end_ro_after_init); pr_info("Write protected read-only-after-init data: %luk\n", size >> 10); @@ -156,19 +154,13 @@ static void pv_init(void) swiotlb_update_mem_attributes(); } -void __init mem_init(void) +void __init arch_mm_preinit(void) { cpumask_set_cpu(0, &init_mm.context.cpu_attach_mask); cpumask_set_cpu(0, mm_cpumask(&init_mm)); - set_max_mapnr(max_low_pfn); - high_memory = (void *) __va(max_low_pfn * PAGE_SIZE); - pv_init(); - kfence_split_mapping(); - /* this will put all low memory onto the freelists */ - memblock_free_all(); setup_zero_pages(); /* Setup zeroed pages. */ } @@ -230,16 +222,13 @@ struct s390_cma_mem_data { static int s390_cma_check_range(struct cma *cma, void *data) { struct s390_cma_mem_data *mem_data; - unsigned long start, end; mem_data = data; - start = cma_get_base(cma); - end = start + cma_get_size(cma); - if (end < mem_data->start) - return 0; - if (start >= mem_data->end) - return 0; - return -EBUSY; + + if (cma_intersects(cma, mem_data->start, mem_data->end)) + return -EBUSY; + + return 0; } static int s390_cma_mem_notifier(struct notifier_block *nb, @@ -276,7 +265,7 @@ int arch_add_memory(int nid, u64 start, u64 size, unsigned long size_pages = PFN_DOWN(size); int rc; - if (WARN_ON_ONCE(params->pgprot.pgprot != PAGE_KERNEL.pgprot)) + if (WARN_ON_ONCE(pgprot_val(params->pgprot) != pgprot_val(PAGE_KERNEL))) return -EINVAL; VM_BUG_ON(!mhp_range_allowed(start, size, true)); diff --git a/arch/s390/mm/maccess.c b/arch/s390/mm/maccess.c index 28a18c42ba99..44426e0f2944 100644 --- a/arch/s390/mm/maccess.c +++ b/arch/s390/mm/maccess.c @@ -17,6 +17,7 @@ #include <asm/asm-extable.h> #include <asm/abs_lowcore.h> #include <asm/stacktrace.h> +#include <asm/sections.h> #include <asm/maccess.h> #include <asm/ctlreg.h> diff --git a/arch/s390/mm/mmap.c b/arch/s390/mm/mmap.c index 33f3504be90b..40a526d28184 100644 --- a/arch/s390/mm/mmap.c +++ b/arch/s390/mm/mmap.c @@ -51,7 +51,6 @@ static inline unsigned long mmap_base(unsigned long rnd, { unsigned long gap = rlim_stack->rlim_cur; unsigned long pad = stack_maxrandom_size() + stack_guard_gap; - unsigned long gap_min, gap_max; /* Values close to RLIM_INFINITY can overflow. */ if (gap + pad > gap) @@ -61,13 +60,7 @@ static inline unsigned long mmap_base(unsigned long rnd, * Top of mmap area (just below the process stack). * Leave at least a ~128 MB hole. */ - gap_min = SZ_128M; - gap_max = (STACK_TOP / 6) * 5; - - if (gap < gap_min) - gap = gap_min; - else if (gap > gap_max) - gap = gap_max; + gap = clamp(gap, SZ_128M, (STACK_TOP / 6) * 5); return PAGE_ALIGN(STACK_TOP - gap - rnd); } @@ -196,22 +189,28 @@ void arch_pick_mmap_layout(struct mm_struct *mm, struct rlimit *rlim_stack) } } -static const pgprot_t protection_map[16] = { - [VM_NONE] = PAGE_NONE, - [VM_READ] = PAGE_RO, - [VM_WRITE] = PAGE_RO, - [VM_WRITE | VM_READ] = PAGE_RO, - [VM_EXEC] = PAGE_RX, - [VM_EXEC | VM_READ] = PAGE_RX, - [VM_EXEC | VM_WRITE] = PAGE_RX, - [VM_EXEC | VM_WRITE | VM_READ] = PAGE_RX, - [VM_SHARED] = PAGE_NONE, - [VM_SHARED | VM_READ] = PAGE_RO, - [VM_SHARED | VM_WRITE] = PAGE_RW, - [VM_SHARED | VM_WRITE | VM_READ] = PAGE_RW, - [VM_SHARED | VM_EXEC] = PAGE_RX, - [VM_SHARED | VM_EXEC | VM_READ] = PAGE_RX, - [VM_SHARED | VM_EXEC | VM_WRITE] = PAGE_RWX, - [VM_SHARED | VM_EXEC | VM_WRITE | VM_READ] = PAGE_RWX -}; +static pgprot_t protection_map[16] __ro_after_init; + +void __init setup_protection_map(void) +{ + pgprot_t *pm = protection_map; + + pm[VM_NONE] = PAGE_NONE; + pm[VM_READ] = PAGE_RO; + pm[VM_WRITE] = PAGE_RO; + pm[VM_WRITE | VM_READ] = PAGE_RO; + pm[VM_EXEC] = PAGE_RX; + pm[VM_EXEC | VM_READ] = PAGE_RX; + pm[VM_EXEC | VM_WRITE] = PAGE_RX; + pm[VM_EXEC | VM_WRITE | VM_READ] = PAGE_RX; + pm[VM_SHARED] = PAGE_NONE; + pm[VM_SHARED | VM_READ] = PAGE_RO; + pm[VM_SHARED | VM_WRITE] = PAGE_RW; + pm[VM_SHARED | VM_WRITE | VM_READ] = PAGE_RW; + pm[VM_SHARED | VM_EXEC] = PAGE_RX; + pm[VM_SHARED | VM_EXEC | VM_READ] = PAGE_RX; + pm[VM_SHARED | VM_EXEC | VM_WRITE] = PAGE_RWX; + pm[VM_SHARED | VM_EXEC | VM_WRITE | VM_READ] = PAGE_RWX; +} + DECLARE_VM_GET_PAGE_PROT diff --git a/arch/s390/mm/pageattr.c b/arch/s390/mm/pageattr.c index 8f56a21a077f..348e759840e7 100644 --- a/arch/s390/mm/pageattr.c +++ b/arch/s390/mm/pageattr.c @@ -3,6 +3,7 @@ * Copyright IBM Corp. 2011 * Author(s): Jan Glauber <jang@linux.vnet.ibm.com> */ +#include <linux/cpufeature.h> #include <linux/hugetlb.h> #include <linux/proc_fs.h> #include <linux/vmalloc.h> @@ -27,7 +28,7 @@ void __storage_key_init_range(unsigned long start, unsigned long end) unsigned long boundary, size; while (start < end) { - if (MACHINE_HAS_EDAT1) { + if (cpu_has_edat1()) { /* set storage keys for a 1MB frame */ size = 1UL << 20; boundary = (start + size) & ~(size - 1); @@ -63,7 +64,7 @@ static void pgt_set(unsigned long *old, unsigned long new, unsigned long addr, unsigned long *table, mask; mask = 0; - if (MACHINE_HAS_EDAT2) { + if (cpu_has_edat2()) { switch (dtt) { case CRDTE_DTT_REGION3: mask = ~(PTRS_PER_PUD * sizeof(pud_t) - 1); @@ -77,7 +78,7 @@ static void pgt_set(unsigned long *old, unsigned long new, unsigned long addr, } table = (unsigned long *)((unsigned long)old & mask); crdte(*old, new, table, dtt, addr, get_lowcore()->kernel_asce.val); - } else if (MACHINE_HAS_IDTE) { + } else if (cpu_has_idte()) { cspg(old, *old, new); } else { csp((unsigned int *)old + 1, *old, new); @@ -109,8 +110,6 @@ static int walk_pte_level(pmd_t *pmdp, unsigned long addr, unsigned long end, } else if (flags & SET_MEMORY_DEF) { new = __pte(pte_val(new) & PAGE_MASK); new = set_pte_bit(new, PAGE_KERNEL); - if (!MACHINE_HAS_NX) - new = clear_pte_bit(new, __pgprot(_PAGE_NOEXEC)); } pgt_set((unsigned long *)ptep, pte_val(new), addr, CRDTE_DTT_PAGE); ptep++; @@ -167,8 +166,6 @@ static void modify_pmd_page(pmd_t *pmdp, unsigned long addr, } else if (flags & SET_MEMORY_DEF) { new = __pmd(pmd_val(new) & PMD_MASK); new = set_pmd_bit(new, SEGMENT_KERNEL); - if (!MACHINE_HAS_NX) - new = clear_pmd_bit(new, __pgprot(_SEGMENT_ENTRY_NOEXEC)); } pgt_set((unsigned long *)pmdp, pmd_val(new), addr, CRDTE_DTT_SEGMENT); } @@ -256,8 +253,6 @@ static void modify_pud_page(pud_t *pudp, unsigned long addr, } else if (flags & SET_MEMORY_DEF) { new = __pud(pud_val(new) & PUD_MASK); new = set_pud_bit(new, REGION3_KERNEL); - if (!MACHINE_HAS_NX) - new = clear_pud_bit(new, __pgprot(_REGION_ENTRY_NOEXEC)); } pgt_set((unsigned long *)pudp, pud_val(new), addr, CRDTE_DTT_REGION3); } @@ -379,7 +374,7 @@ int __set_memory(unsigned long addr, unsigned long numpages, unsigned long flags unsigned long end; int rc; - if (!MACHINE_HAS_NX) + if (!cpu_has_nx()) flags &= ~(SET_MEMORY_NX | SET_MEMORY_X); if (!flags) return 0; diff --git a/arch/s390/mm/pfault.c b/arch/s390/mm/pfault.c index 1aac13bb8f53..e6175d75e4b0 100644 --- a/arch/s390/mm/pfault.c +++ b/arch/s390/mm/pfault.c @@ -9,6 +9,7 @@ #include <linux/init.h> #include <linux/irq.h> #include <asm/asm-extable.h> +#include <asm/asm-offsets.h> #include <asm/pfault.h> #include <asm/diag.h> @@ -56,7 +57,7 @@ int __pfault_init(void) if (pfault_disable) return rc; diag_stat_inc(DIAG_STAT_X258); - asm volatile( + asm_inline volatile( " diag %[refbk],%[rc],0x258\n" "0: nopr %%r7\n" EX_TABLE(0b, 0b) @@ -78,7 +79,7 @@ void __pfault_fini(void) if (pfault_disable) return; diag_stat_inc(DIAG_STAT_X258); - asm volatile( + asm_inline volatile( " diag %[refbk],0,0x258\n" "0: nopr %%r7\n" EX_TABLE(0b, 0b) diff --git a/arch/s390/mm/pgalloc.c b/arch/s390/mm/pgalloc.c index 58696a0c4e4a..b449fd2605b0 100644 --- a/arch/s390/mm/pgalloc.c +++ b/arch/s390/mm/pgalloc.c @@ -12,35 +12,8 @@ #include <asm/mmu_context.h> #include <asm/page-states.h> #include <asm/pgalloc.h> -#include <asm/gmap.h> -#include <asm/tlb.h> #include <asm/tlbflush.h> -#ifdef CONFIG_PGSTE - -int page_table_allocate_pgste = 0; -EXPORT_SYMBOL(page_table_allocate_pgste); - -static struct ctl_table page_table_sysctl[] = { - { - .procname = "allocate_pgste", - .data = &page_table_allocate_pgste, - .maxlen = sizeof(int), - .mode = S_IRUGO | S_IWUSR, - .proc_handler = proc_dointvec_minmax, - .extra1 = SYSCTL_ZERO, - .extra2 = SYSCTL_ONE, - }, -}; - -static int __init page_table_register_sysctl(void) -{ - return register_sysctl("vm", page_table_sysctl) ? 0 : -ENOMEM; -} -__initcall(page_table_register_sysctl); - -#endif /* CONFIG_PGSTE */ - unsigned long *crst_table_alloc(struct mm_struct *mm) { struct ptdesc *ptdesc = pagetable_alloc(GFP_KERNEL, CRST_ALLOC_ORDER); @@ -63,11 +36,15 @@ void crst_table_free(struct mm_struct *mm, unsigned long *table) static void __crst_table_upgrade(void *arg) { struct mm_struct *mm = arg; + struct ctlreg asce; /* change all active ASCEs to avoid the creation of new TLBs */ if (current->active_mm == mm) { - get_lowcore()->user_asce.val = mm->context.asce; - local_ctl_load(7, &get_lowcore()->user_asce); + asce.val = mm->context.asce; + get_lowcore()->user_asce = asce; + local_ctl_load(7, &asce); + if (!test_thread_flag(TIF_ASCE_PRIMARY)) + local_ctl_load(1, &asce); } __tlb_flush_local(); } @@ -77,6 +54,8 @@ int crst_table_upgrade(struct mm_struct *mm, unsigned long end) unsigned long *pgd = NULL, *p4d = NULL, *__pgd; unsigned long asce_limit = mm->context.asce_limit; + mmap_assert_write_locked(mm); + /* upgrade should only happen from 3 to 4, 3 to 5, or 4 to 5 levels */ VM_BUG_ON(asce_limit < _REGION2_SIZE); @@ -88,23 +67,18 @@ int crst_table_upgrade(struct mm_struct *mm, unsigned long end) if (unlikely(!p4d)) goto err_p4d; crst_table_init(p4d, _REGION2_ENTRY_EMPTY); + pagetable_p4d_ctor(virt_to_ptdesc(p4d)); } if (end > _REGION1_SIZE) { pgd = crst_table_alloc(mm); if (unlikely(!pgd)) goto err_pgd; crst_table_init(pgd, _REGION1_ENTRY_EMPTY); + pagetable_pgd_ctor(virt_to_ptdesc(pgd)); } spin_lock_bh(&mm->page_table_lock); - /* - * This routine gets called with mmap_lock lock held and there is - * no reason to optimize for the case of otherwise. However, if - * that would ever change, the below check will let us know. - */ - VM_BUG_ON(asce_limit != mm->context.asce_limit); - if (p4d) { __pgd = (unsigned long *) mm->pgd; p4d_populate(mm, (p4d_t *) p4d, (pud_t *) __pgd); @@ -130,6 +104,7 @@ int crst_table_upgrade(struct mm_struct *mm, unsigned long end) return 0; err_pgd: + pagetable_dtor(virt_to_ptdesc(p4d)); crst_table_free(mm, p4d); err_p4d: return -ENOMEM; @@ -167,43 +142,22 @@ unsigned long *page_table_alloc(struct mm_struct *mm) ptdesc = pagetable_alloc(GFP_KERNEL, 0); if (!ptdesc) return NULL; - if (!pagetable_pte_ctor(ptdesc)) { + if (!pagetable_pte_ctor(mm, ptdesc)) { pagetable_free(ptdesc); return NULL; } table = ptdesc_to_virt(ptdesc); __arch_set_page_dat(table, 1); - /* pt_list is used by gmap only */ - INIT_LIST_HEAD(&ptdesc->pt_list); memset64((u64 *)table, _PAGE_INVALID, PTRS_PER_PTE); memset64((u64 *)table + PTRS_PER_PTE, 0, PTRS_PER_PTE); return table; } -static void pagetable_pte_dtor_free(struct ptdesc *ptdesc) -{ - pagetable_pte_dtor(ptdesc); - pagetable_free(ptdesc); -} - void page_table_free(struct mm_struct *mm, unsigned long *table) { struct ptdesc *ptdesc = virt_to_ptdesc(table); - pagetable_pte_dtor_free(ptdesc); -} - -void __tlb_remove_table(void *table) -{ - struct ptdesc *ptdesc = virt_to_ptdesc(table); - struct page *page = ptdesc_page(ptdesc); - - if (compound_order(page) == CRST_ALLOC_ORDER) { - /* pmd, pud, or p4d */ - pagetable_free(ptdesc); - return; - } - pagetable_pte_dtor_free(ptdesc); + pagetable_dtor_free(ptdesc); } #ifdef CONFIG_TRANSPARENT_HUGEPAGE @@ -211,7 +165,7 @@ static void pte_free_now(struct rcu_head *head) { struct ptdesc *ptdesc = container_of(head, struct ptdesc, pt_rcu_head); - pagetable_pte_dtor_free(ptdesc); + pagetable_dtor_free(ptdesc); } void pte_free_defer(struct mm_struct *mm, pgtable_t pgtable) diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index cea5dba80468..7df70cd8f739 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -4,6 +4,7 @@ * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com> */ +#include <linux/cpufeature.h> #include <linux/sched.h> #include <linux/kernel.h> #include <linux/errno.h> @@ -19,10 +20,10 @@ #include <linux/ksm.h> #include <linux/mman.h> -#include <asm/tlb.h> #include <asm/tlbflush.h> #include <asm/mmu_context.h> #include <asm/page-states.h> +#include <asm/machine.h> pgprot_t pgprot_writecombine(pgprot_t prot) { @@ -34,22 +35,12 @@ pgprot_t pgprot_writecombine(pgprot_t prot) } EXPORT_SYMBOL_GPL(pgprot_writecombine); -pgprot_t pgprot_writethrough(pgprot_t prot) -{ - /* - * mio_wb_bit_mask may be set on a different CPU, but it is only set - * once at init and only read afterwards. - */ - return __pgprot(pgprot_val(prot) & ~mio_wb_bit_mask); -} -EXPORT_SYMBOL_GPL(pgprot_writethrough); - static inline void ptep_ipte_local(struct mm_struct *mm, unsigned long addr, pte_t *ptep, int nodat) { unsigned long opt, asce; - if (MACHINE_HAS_TLB_GUEST) { + if (machine_has_tlb_guest()) { opt = 0; asce = READ_ONCE(mm->context.gmap_asce); if (asce == 0UL || nodat) @@ -69,7 +60,7 @@ static inline void ptep_ipte_global(struct mm_struct *mm, unsigned long addr, { unsigned long opt, asce; - if (MACHINE_HAS_TLB_GUEST) { + if (machine_has_tlb_guest()) { opt = 0; asce = READ_ONCE(mm->context.gmap_asce); if (asce == 0UL || nodat) @@ -94,7 +85,7 @@ static inline pte_t ptep_flush_direct(struct mm_struct *mm, if (unlikely(pte_val(old) & _PAGE_INVALID)) return old; atomic_inc(&mm->context.flush_count); - if (MACHINE_HAS_TLB_LC && + if (cpu_has_tlb_lc() && cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id()))) ptep_ipte_local(mm, addr, ptep, nodat); else @@ -173,10 +164,10 @@ static inline pgste_t pgste_update_all(pte_t pte, pgste_t pgste, skey = (unsigned long) page_get_storage_key(address); bits = skey & (_PAGE_CHANGED | _PAGE_REFERENCED); /* Transfer page changed & referenced bit to guest bits in pgste */ - pgste_val(pgste) |= bits << 48; /* GR bit & GC bit */ + pgste = set_pgste_bit(pgste, bits << 48); /* GR bit & GC bit */ /* Copy page access key and fetch protection bit to pgste */ - pgste_val(pgste) &= ~(PGSTE_ACC_BITS | PGSTE_FP_BIT); - pgste_val(pgste) |= (skey & (_PAGE_ACC_BITS | _PAGE_FP_BIT)) << 56; + pgste = clear_pgste_bit(pgste, PGSTE_ACC_BITS | PGSTE_FP_BIT); + pgste = set_pgste_bit(pgste, (skey & (_PAGE_ACC_BITS | _PAGE_FP_BIT)) << 56); #endif return pgste; @@ -210,7 +201,7 @@ static inline pgste_t pgste_set_pte(pte_t *ptep, pgste_t pgste, pte_t entry) if ((pte_val(entry) & _PAGE_PRESENT) && (pte_val(entry) & _PAGE_WRITE) && !(pte_val(entry) & _PAGE_INVALID)) { - if (!MACHINE_HAS_ESOP) { + if (!machine_has_esop()) { /* * Without enhanced suppression-on-protection force * the dirty bit on for all writable ptes. @@ -220,7 +211,7 @@ static inline pgste_t pgste_set_pte(pte_t *ptep, pgste_t pgste, pte_t entry) } if (!(pte_val(entry) & _PAGE_PROTECT)) /* This pte allows write access, set user-dirty */ - pgste_val(pgste) |= PGSTE_UC_BIT; + pgste = set_pgste_bit(pgste, PGSTE_UC_BIT); } #endif set_pte(ptep, entry); @@ -236,7 +227,7 @@ static inline pgste_t pgste_pte_notify(struct mm_struct *mm, bits = pgste_val(pgste) & (PGSTE_IN_BIT | PGSTE_VSIE_BIT); if (bits) { - pgste_val(pgste) ^= bits; + pgste = __pgste(pgste_val(pgste) ^ bits); ptep_notify(mm, addr, ptep, bits); } #endif @@ -360,8 +351,6 @@ void ptep_modify_prot_commit(struct vm_area_struct *vma, unsigned long addr, pgste_t pgste; struct mm_struct *mm = vma->vm_mm; - if (!MACHINE_HAS_NX) - pte = clear_pte_bit(pte, __pgprot(_PAGE_NOEXEC)); if (mm_has_pgste(mm)) { pgste = pgste_get(ptep); pgste_set_key(ptep, pgste, pte, mm); @@ -376,7 +365,7 @@ void ptep_modify_prot_commit(struct vm_area_struct *vma, unsigned long addr, static inline void pmdp_idte_local(struct mm_struct *mm, unsigned long addr, pmd_t *pmdp) { - if (MACHINE_HAS_TLB_GUEST) + if (machine_has_tlb_guest()) __pmdp_idte(addr, pmdp, IDTE_NODAT | IDTE_GUEST_ASCE, mm->context.asce, IDTE_LOCAL); else @@ -388,12 +377,12 @@ static inline void pmdp_idte_local(struct mm_struct *mm, static inline void pmdp_idte_global(struct mm_struct *mm, unsigned long addr, pmd_t *pmdp) { - if (MACHINE_HAS_TLB_GUEST) { + if (machine_has_tlb_guest()) { __pmdp_idte(addr, pmdp, IDTE_NODAT | IDTE_GUEST_ASCE, mm->context.asce, IDTE_GLOBAL); if (mm_has_pgste(mm) && mm->context.allow_gmap_hpage_1m) gmap_pmdp_idte_global(mm, addr); - } else if (MACHINE_HAS_IDTE) { + } else if (cpu_has_idte()) { __pmdp_idte(addr, pmdp, 0, 0, IDTE_GLOBAL); if (mm_has_pgste(mm) && mm->context.allow_gmap_hpage_1m) gmap_pmdp_idte_global(mm, addr); @@ -413,7 +402,7 @@ static inline pmd_t pmdp_flush_direct(struct mm_struct *mm, if (pmd_val(old) & _SEGMENT_ENTRY_INVALID) return old; atomic_inc(&mm->context.flush_count); - if (MACHINE_HAS_TLB_LC && + if (cpu_has_tlb_lc() && cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id()))) pmdp_idte_local(mm, addr, pmdp); else @@ -507,7 +496,7 @@ EXPORT_SYMBOL(pmdp_xchg_lazy); static inline void pudp_idte_local(struct mm_struct *mm, unsigned long addr, pud_t *pudp) { - if (MACHINE_HAS_TLB_GUEST) + if (machine_has_tlb_guest()) __pudp_idte(addr, pudp, IDTE_NODAT | IDTE_GUEST_ASCE, mm->context.asce, IDTE_LOCAL); else @@ -517,10 +506,10 @@ static inline void pudp_idte_local(struct mm_struct *mm, static inline void pudp_idte_global(struct mm_struct *mm, unsigned long addr, pud_t *pudp) { - if (MACHINE_HAS_TLB_GUEST) + if (machine_has_tlb_guest()) __pudp_idte(addr, pudp, IDTE_NODAT | IDTE_GUEST_ASCE, mm->context.asce, IDTE_GLOBAL); - else if (MACHINE_HAS_IDTE) + else if (cpu_has_idte()) __pudp_idte(addr, pudp, 0, 0, IDTE_GLOBAL); else /* @@ -539,7 +528,7 @@ static inline pud_t pudp_flush_direct(struct mm_struct *mm, if (pud_val(old) & _REGION_ENTRY_INVALID) return old; atomic_inc(&mm->context.flush_count); - if (MACHINE_HAS_TLB_LC && + if (cpu_has_tlb_lc() && cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id()))) pudp_idte_local(mm, addr, pudp); else @@ -611,7 +600,7 @@ void ptep_set_pte_at(struct mm_struct *mm, unsigned long addr, /* the mm_has_pgste() check is done in set_pte_at() */ preempt_disable(); pgste = pgste_get_lock(ptep); - pgste_val(pgste) &= ~_PGSTE_GPS_ZERO; + pgste = clear_pgste_bit(pgste, _PGSTE_GPS_ZERO); pgste_set_key(ptep, pgste, entry, mm); pgste = pgste_set_pte(ptep, pgste, entry); pgste_set_unlock(ptep, pgste); @@ -624,7 +613,7 @@ void ptep_set_notify(struct mm_struct *mm, unsigned long addr, pte_t *ptep) preempt_disable(); pgste = pgste_get_lock(ptep); - pgste_val(pgste) |= PGSTE_IN_BIT; + pgste = set_pgste_bit(pgste, PGSTE_IN_BIT); pgste_set_unlock(ptep, pgste); preempt_enable(); } @@ -669,7 +658,7 @@ int ptep_force_prot(struct mm_struct *mm, unsigned long addr, entry = clear_pte_bit(entry, __pgprot(_PAGE_INVALID)); entry = set_pte_bit(entry, __pgprot(_PAGE_PROTECT)); } - pgste_val(pgste) |= bit; + pgste = set_pgste_bit(pgste, bit); pgste = pgste_set_pte(ptep, pgste, entry); pgste_set_unlock(ptep, pgste); return 0; @@ -689,7 +678,7 @@ int ptep_shadow_pte(struct mm_struct *mm, unsigned long saddr, if (!(pte_val(spte) & _PAGE_INVALID) && !((pte_val(spte) & _PAGE_PROTECT) && !(pte_val(pte) & _PAGE_PROTECT))) { - pgste_val(spgste) |= PGSTE_VSIE_BIT; + spgste = set_pgste_bit(spgste, PGSTE_VSIE_BIT); tpgste = pgste_get_lock(tptep); tpte = __pte((pte_val(spte) & PAGE_MASK) | (pte_val(pte) & _PAGE_PROTECT)); @@ -747,7 +736,7 @@ void ptep_zap_unused(struct mm_struct *mm, unsigned long addr, pte_clear(mm, addr, ptep); } if (reset) - pgste_val(pgste) &= ~(_PGSTE_GPS_USAGE_MASK | _PGSTE_GPS_NODAT); + pgste = clear_pgste_bit(pgste, _PGSTE_GPS_USAGE_MASK | _PGSTE_GPS_NODAT); pgste_set_unlock(ptep, pgste); preempt_enable(); } @@ -760,8 +749,8 @@ void ptep_zap_key(struct mm_struct *mm, unsigned long addr, pte_t *ptep) /* Clear storage key ACC and F, but set R/C */ preempt_disable(); pgste = pgste_get_lock(ptep); - pgste_val(pgste) &= ~(PGSTE_ACC_BITS | PGSTE_FP_BIT); - pgste_val(pgste) |= PGSTE_GR_BIT | PGSTE_GC_BIT; + pgste = clear_pgste_bit(pgste, PGSTE_ACC_BITS | PGSTE_FP_BIT); + pgste = set_pgste_bit(pgste, PGSTE_GR_BIT | PGSTE_GC_BIT); ptev = pte_val(*ptep); if (!(ptev & _PAGE_INVALID) && (ptev & _PAGE_WRITE)) page_set_storage_key(ptev & PAGE_MASK, PAGE_DEFAULT_KEY, 0); @@ -782,13 +771,13 @@ bool ptep_test_and_clear_uc(struct mm_struct *mm, unsigned long addr, pgste = pgste_get_lock(ptep); dirty = !!(pgste_val(pgste) & PGSTE_UC_BIT); - pgste_val(pgste) &= ~PGSTE_UC_BIT; + pgste = clear_pgste_bit(pgste, PGSTE_UC_BIT); pte = *ptep; if (dirty && (pte_val(pte) & _PAGE_PRESENT)) { pgste = pgste_pte_notify(mm, addr, ptep, pgste); nodat = !!(pgste_val(pgste) & _PGSTE_GPS_NODAT); ptep_ipte_global(mm, addr, ptep, nodat); - if (MACHINE_HAS_ESOP || !(pte_val(pte) & _PAGE_WRITE)) + if (machine_has_esop() || !(pte_val(pte) & _PAGE_WRITE)) pte = set_pte_bit(pte, __pgprot(_PAGE_PROTECT)); else pte = set_pte_bit(pte, __pgprot(_PAGE_INVALID)); @@ -844,11 +833,11 @@ again: if (!ptep) goto again; new = old = pgste_get_lock(ptep); - pgste_val(new) &= ~(PGSTE_GR_BIT | PGSTE_GC_BIT | - PGSTE_ACC_BITS | PGSTE_FP_BIT); + new = clear_pgste_bit(new, PGSTE_GR_BIT | PGSTE_GC_BIT | + PGSTE_ACC_BITS | PGSTE_FP_BIT); keyul = (unsigned long) key; - pgste_val(new) |= (keyul & (_PAGE_CHANGED | _PAGE_REFERENCED)) << 48; - pgste_val(new) |= (keyul & (_PAGE_ACC_BITS | _PAGE_FP_BIT)) << 56; + new = set_pgste_bit(new, (keyul & (_PAGE_CHANGED | _PAGE_REFERENCED)) << 48); + new = set_pgste_bit(new, (keyul & (_PAGE_ACC_BITS | _PAGE_FP_BIT)) << 56); if (!(pte_val(*ptep) & _PAGE_INVALID)) { unsigned long bits, skey; @@ -859,12 +848,12 @@ again: /* Set storage key ACC and FP */ page_set_storage_key(paddr, skey, !nq); /* Merge host changed & referenced into pgste */ - pgste_val(new) |= bits << 52; + new = set_pgste_bit(new, bits << 52); } /* changing the guest storage key is considered a change of the page */ if ((pgste_val(new) ^ pgste_val(old)) & (PGSTE_ACC_BITS | PGSTE_FP_BIT | PGSTE_GR_BIT | PGSTE_GC_BIT)) - pgste_val(new) |= PGSTE_UC_BIT; + new = set_pgste_bit(new, PGSTE_UC_BIT); pgste_set_unlock(ptep, new); pte_unmap_unlock(ptep, ptl); @@ -952,19 +941,19 @@ again: goto again; new = old = pgste_get_lock(ptep); /* Reset guest reference bit only */ - pgste_val(new) &= ~PGSTE_GR_BIT; + new = clear_pgste_bit(new, PGSTE_GR_BIT); if (!(pte_val(*ptep) & _PAGE_INVALID)) { paddr = pte_val(*ptep) & PAGE_MASK; cc = page_reset_referenced(paddr); /* Merge real referenced bit into host-set */ - pgste_val(new) |= ((unsigned long) cc << 53) & PGSTE_HR_BIT; + new = set_pgste_bit(new, ((unsigned long)cc << 53) & PGSTE_HR_BIT); } /* Reflect guest's logical view, not physical */ cc |= (pgste_val(old) & (PGSTE_GR_BIT | PGSTE_GC_BIT)) >> 49; /* Changing the guest storage key is considered a change of the page */ if ((pgste_val(new) ^ pgste_val(old)) & PGSTE_GR_BIT) - pgste_val(new) |= PGSTE_UC_BIT; + new = set_pgste_bit(new, PGSTE_UC_BIT); pgste_set_unlock(ptep, new); pte_unmap_unlock(ptep, ptl); @@ -1128,7 +1117,7 @@ int pgste_perform_essa(struct mm_struct *mm, unsigned long hva, int orc, if (res) pgstev |= _PGSTE_GPS_ZERO; - pgste_val(pgste) = pgstev; + pgste = __pgste(pgstev); pgste_set_unlock(ptep, pgste); pte_unmap_unlock(ptep, ptl); return res; @@ -1161,8 +1150,8 @@ int set_pgste_bits(struct mm_struct *mm, unsigned long hva, return -EFAULT; new = pgste_get_lock(ptep); - pgste_val(new) &= ~bits; - pgste_val(new) |= value & bits; + new = clear_pgste_bit(new, bits); + new = set_pgste_bit(new, value & bits); pgste_set_unlock(ptep, new); pte_unmap_unlock(ptep, ptl); diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c index 665b8228afeb..448dd6ed1069 100644 --- a/arch/s390/mm/vmem.c +++ b/arch/s390/mm/vmem.c @@ -4,6 +4,7 @@ */ #include <linux/memory_hotplug.h> +#include <linux/cpufeature.h> #include <linux/memblock.h> #include <linux/pfn.h> #include <linux/mm.h> @@ -171,9 +172,6 @@ static int __ref modify_pte_table(pmd_t *pmd, unsigned long addr, pte_t *pte; prot = pgprot_val(PAGE_KERNEL); - if (!MACHINE_HAS_NX) - prot &= ~_PAGE_NOEXEC; - pte = pte_offset_kernel(pmd, addr); for (; addr < end; addr += PAGE_SIZE, pte++) { if (!add) { @@ -230,9 +228,6 @@ static int __ref modify_pmd_table(pud_t *pud, unsigned long addr, pte_t *pte; prot = pgprot_val(SEGMENT_KERNEL); - if (!MACHINE_HAS_NX) - prot &= ~_SEGMENT_ENTRY_NOEXEC; - pmd = pmd_offset(pud, addr); for (; addr < end; addr = next, pmd++) { next = pmd_addr_end(addr, end); @@ -255,12 +250,12 @@ static int __ref modify_pmd_table(pud_t *pud, unsigned long addr, } else if (pmd_none(*pmd)) { if (IS_ALIGNED(addr, PMD_SIZE) && IS_ALIGNED(next, PMD_SIZE) && - MACHINE_HAS_EDAT1 && direct && + cpu_has_edat1() && direct && !debug_pagealloc_enabled()) { set_pmd(pmd, __pmd(__pa(addr) | prot)); pages++; continue; - } else if (!direct && MACHINE_HAS_EDAT1) { + } else if (!direct && cpu_has_edat1()) { void *new_page; /* @@ -324,8 +319,6 @@ static int modify_pud_table(p4d_t *p4d, unsigned long addr, unsigned long end, pmd_t *pmd; prot = pgprot_val(REGION3_KERNEL); - if (!MACHINE_HAS_NX) - prot &= ~_REGION_ENTRY_NOEXEC; pud = pud_offset(p4d, addr); for (; addr < end; addr = next, pud++) { next = pud_addr_end(addr, end); @@ -343,7 +336,7 @@ static int modify_pud_table(p4d_t *p4d, unsigned long addr, unsigned long end, } else if (pud_none(*pud)) { if (IS_ALIGNED(addr, PUD_SIZE) && IS_ALIGNED(next, PUD_SIZE) && - MACHINE_HAS_EDAT2 && direct && + cpu_has_edat2() && direct && !debug_pagealloc_enabled()) { set_pud(pud, __pud(__pa(addr) | prot)); pages++; @@ -667,10 +660,10 @@ void __init vmem_map_init(void) * prefix page is used to return to the previous context with * an LPSWE instruction and therefore must be executable. */ - if (!static_key_enabled(&cpu_has_bear)) + if (!cpu_has_bear()) set_memory_x(0, 1); if (debug_pagealloc_enabled()) - __set_memory_4k(__va(0), __va(0) + ident_map_size); + __set_memory_4k(__va(0), absolute_pointer(__va(0)) + ident_map_size); pr_info("Write protected kernel read-only data: %luk\n", (unsigned long)(__end_rodata - _stext) >> 10); } diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c index 9d440a0b729e..c7f8313ba449 100644 --- a/arch/s390/net/bpf_jit_comp.c +++ b/arch/s390/net/bpf_jit_comp.c @@ -48,8 +48,6 @@ struct bpf_jit { int lit64; /* Current position in 64-bit literal pool */ int base_ip; /* Base address for literal pool */ int exit_ip; /* Address of exit */ - int r1_thunk_ip; /* Address of expoline thunk for 'br %r1' */ - int r14_thunk_ip; /* Address of expoline thunk for 'br %r14' */ int tail_call_start; /* Tail call start offset */ int excnt; /* Number of exception table entries */ int prologue_plt_ret; /* Return address for prologue hotpatch PLT */ @@ -127,6 +125,18 @@ static inline void reg_set_seen(struct bpf_jit *jit, u32 b1) jit->seen_regs |= (1 << r1); } +static s32 off_to_pcrel(struct bpf_jit *jit, u32 off) +{ + return off - jit->prg; +} + +static s64 ptr_to_pcrel(struct bpf_jit *jit, const void *ptr) +{ + if (jit->prg_buf) + return (const u8 *)ptr - ((const u8 *)jit->prg_buf + jit->prg); + return 0; +} + #define REG_SET_SEEN(b1) \ ({ \ reg_set_seen(jit, b1); \ @@ -201,7 +211,7 @@ static inline void reg_set_seen(struct bpf_jit *jit, u32 b1) #define EMIT4_PCREL_RIC(op, mask, target) \ ({ \ - int __rel = ((target) - jit->prg) / 2; \ + int __rel = off_to_pcrel(jit, target) / 2; \ _EMIT4((op) | (mask) << 20 | (__rel & 0xffff)); \ }) @@ -239,7 +249,7 @@ static inline void reg_set_seen(struct bpf_jit *jit, u32 b1) #define EMIT6_PCREL_RIEB(op1, op2, b1, b2, mask, target) \ ({ \ - unsigned int rel = (int)((target) - jit->prg) / 2; \ + unsigned int rel = off_to_pcrel(jit, target) / 2; \ _EMIT6((op1) | reg(b1, b2) << 16 | (rel & 0xffff), \ (op2) | (mask) << 12); \ REG_SET_SEEN(b1); \ @@ -248,7 +258,7 @@ static inline void reg_set_seen(struct bpf_jit *jit, u32 b1) #define EMIT6_PCREL_RIEC(op1, op2, b1, imm, mask, target) \ ({ \ - unsigned int rel = (int)((target) - jit->prg) / 2; \ + unsigned int rel = off_to_pcrel(jit, target) / 2; \ _EMIT6((op1) | (reg_high(b1) | (mask)) << 16 | \ (rel & 0xffff), (op2) | ((imm) & 0xff) << 8); \ REG_SET_SEEN(b1); \ @@ -257,29 +267,41 @@ static inline void reg_set_seen(struct bpf_jit *jit, u32 b1) #define EMIT6_PCREL(op1, op2, b1, b2, i, off, mask) \ ({ \ - int rel = (addrs[(i) + (off) + 1] - jit->prg) / 2; \ + int rel = off_to_pcrel(jit, addrs[(i) + (off) + 1]) / 2;\ _EMIT6((op1) | reg(b1, b2) << 16 | (rel & 0xffff), (op2) | (mask));\ REG_SET_SEEN(b1); \ REG_SET_SEEN(b2); \ }) +static void emit6_pcrel_ril(struct bpf_jit *jit, u32 op, s64 pcrel) +{ + u32 pc32dbl = (s32)(pcrel / 2); + + _EMIT6(op | pc32dbl >> 16, pc32dbl & 0xffff); +} + +static void emit6_pcrel_rilb(struct bpf_jit *jit, u32 op, u8 b, s64 pcrel) +{ + emit6_pcrel_ril(jit, op | reg_high(b) << 16, pcrel); + REG_SET_SEEN(b); +} + #define EMIT6_PCREL_RILB(op, b, target) \ -({ \ - unsigned int rel = (int)((target) - jit->prg) / 2; \ - _EMIT6((op) | reg_high(b) << 16 | rel >> 16, rel & 0xffff);\ - REG_SET_SEEN(b); \ -}) + emit6_pcrel_rilb(jit, op, b, off_to_pcrel(jit, target)) -#define EMIT6_PCREL_RIL(op, target) \ -({ \ - unsigned int rel = (int)((target) - jit->prg) / 2; \ - _EMIT6((op) | rel >> 16, rel & 0xffff); \ -}) +#define EMIT6_PCREL_RILB_PTR(op, b, target_ptr) \ + emit6_pcrel_rilb(jit, op, b, ptr_to_pcrel(jit, target_ptr)) + +static void emit6_pcrel_rilc(struct bpf_jit *jit, u32 op, u8 mask, s64 pcrel) +{ + emit6_pcrel_ril(jit, op | mask << 20, pcrel); +} #define EMIT6_PCREL_RILC(op, mask, target) \ -({ \ - EMIT6_PCREL_RIL((op) | (mask) << 20, (target)); \ -}) + emit6_pcrel_rilc(jit, op, mask, off_to_pcrel(jit, target)) + +#define EMIT6_PCREL_RILC_PTR(op, mask, target_ptr) \ + emit6_pcrel_rilc(jit, op, mask, ptr_to_pcrel(jit, target_ptr)) #define _EMIT6_IMM(op, imm) \ ({ \ @@ -503,7 +525,7 @@ static void bpf_skip(struct bpf_jit *jit, int size) { if (size >= 6 && !is_valid_rel(size)) { /* brcl 0xf,size */ - EMIT6_PCREL_RIL(0xc0f4000000, size); + EMIT6_PCREL_RILC(0xc0040000, 0xf, size); size -= 6; } else if (size >= 4 && is_valid_rel(size)) { /* brc 0xf,size */ @@ -605,43 +627,30 @@ static void bpf_jit_prologue(struct bpf_jit *jit, struct bpf_prog *fp, } /* Setup stack and backchain */ if (is_first_pass(jit) || (jit->seen & SEEN_STACK)) { - if (is_first_pass(jit) || (jit->seen & SEEN_FUNC)) - /* lgr %w1,%r15 (backchain) */ - EMIT4(0xb9040000, REG_W1, REG_15); + /* lgr %w1,%r15 (backchain) */ + EMIT4(0xb9040000, REG_W1, REG_15); /* la %bfp,STK_160_UNUSED(%r15) (BPF frame pointer) */ EMIT4_DISP(0x41000000, BPF_REG_FP, REG_15, STK_160_UNUSED); /* aghi %r15,-STK_OFF */ EMIT4_IMM(0xa70b0000, REG_15, -(STK_OFF + stack_depth)); - if (is_first_pass(jit) || (jit->seen & SEEN_FUNC)) - /* stg %w1,152(%r15) (backchain) */ - EMIT6_DISP_LH(0xe3000000, 0x0024, REG_W1, REG_0, - REG_15, 152); + /* stg %w1,152(%r15) (backchain) */ + EMIT6_DISP_LH(0xe3000000, 0x0024, REG_W1, REG_0, + REG_15, 152); } } /* - * Emit an expoline for a jump that follows + * Jump using a register either directly or via an expoline thunk */ -static void emit_expoline(struct bpf_jit *jit) -{ - /* exrl %r0,.+10 */ - EMIT6_PCREL_RIL(0xc6000000, jit->prg + 10); - /* j . */ - EMIT4_PCREL(0xa7f40000, 0); -} - -/* - * Emit __s390_indirect_jump_r1 thunk if necessary - */ -static void emit_r1_thunk(struct bpf_jit *jit) -{ - if (nospec_uses_trampoline()) { - jit->r1_thunk_ip = jit->prg; - emit_expoline(jit); - /* br %r1 */ - _EMIT2(0x07f1); - } -} +#define EMIT_JUMP_REG(reg) do { \ + if (nospec_uses_trampoline()) \ + /* brcl 0xf,__s390_indirect_jump_rN */ \ + EMIT6_PCREL_RILC_PTR(0xc0040000, 0x0f, \ + __s390_indirect_jump_r ## reg); \ + else \ + /* br %rN */ \ + _EMIT2(0x07f0 | reg); \ +} while (0) /* * Call r1 either directly or via __s390_indirect_jump_r1 thunk @@ -650,7 +659,8 @@ static void call_r1(struct bpf_jit *jit) { if (nospec_uses_trampoline()) /* brasl %r14,__s390_indirect_jump_r1 */ - EMIT6_PCREL_RILB(0xc0050000, REG_14, jit->r1_thunk_ip); + EMIT6_PCREL_RILB_PTR(0xc0050000, REG_14, + __s390_indirect_jump_r1); else /* basr %r14,%r1 */ EMIT2(0x0d00, REG_14, REG_1); @@ -666,16 +676,7 @@ static void bpf_jit_epilogue(struct bpf_jit *jit, u32 stack_depth) EMIT4(0xb9040000, REG_2, BPF_REG_0); /* Restore registers */ save_restore_regs(jit, REGS_RESTORE, stack_depth, 0); - if (nospec_uses_trampoline()) { - jit->r14_thunk_ip = jit->prg; - /* Generate __s390_indirect_jump_r14 thunk */ - emit_expoline(jit); - } - /* br %r14 */ - _EMIT2(0x07fe); - - if (is_first_pass(jit) || (jit->seen & SEEN_FUNC)) - emit_r1_thunk(jit); + EMIT_JUMP_REG(14); jit->prg = ALIGN(jit->prg, 8); jit->prologue_plt = jit->prg; @@ -1877,7 +1878,8 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, /* aghi %r1,tail_call_start */ EMIT4_IMM(0xa70b0000, REG_1, jit->tail_call_start); /* brcl 0xf,__s390_indirect_jump_r1 */ - EMIT6_PCREL_RILC(0xc0040000, 0xf, jit->r1_thunk_ip); + EMIT6_PCREL_RILC_PTR(0xc0040000, 0xf, + __s390_indirect_jump_r1); } else { /* bc 0xf,tail_call_start(%r1) */ _EMIT4(0x47f01000 + jit->tail_call_start); @@ -2585,9 +2587,8 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, if (nr_stack_args > MAX_NR_STACK_ARGS) return -ENOTSUPP; - /* Return to %r14, since func_addr and %r0 are not available. */ - if ((!func_addr && !(flags & BPF_TRAMP_F_ORIG_STACK)) || - (flags & BPF_TRAMP_F_INDIRECT)) + /* Return to %r14 in the struct_ops case. */ + if (flags & BPF_TRAMP_F_INDIRECT) flags |= BPF_TRAMP_F_SKIP_FRAME; /* @@ -2847,17 +2848,10 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, 0xf000 | tjit->tccnt_off); /* aghi %r15,stack_size */ EMIT4_IMM(0xa70b0000, REG_15, tjit->stack_size); - /* Emit an expoline for the following indirect jump. */ - if (nospec_uses_trampoline()) - emit_expoline(jit); if (flags & BPF_TRAMP_F_SKIP_FRAME) - /* br %r14 */ - _EMIT2(0x07fe); + EMIT_JUMP_REG(14); else - /* br %r1 */ - _EMIT2(0x07f1); - - emit_r1_thunk(jit); + EMIT_JUMP_REG(1); return 0; } @@ -2919,10 +2913,16 @@ bool bpf_jit_supports_arena(void) bool bpf_jit_supports_insn(struct bpf_insn *insn, bool in_arena) { - /* - * Currently the verifier uses this function only to check which - * atomic stores to arena are supported, and they all are. - */ + if (!in_arena) + return true; + switch (insn->code) { + case BPF_STX | BPF_ATOMIC | BPF_B: + case BPF_STX | BPF_ATOMIC | BPF_H: + case BPF_STX | BPF_ATOMIC | BPF_W: + case BPF_STX | BPF_ATOMIC | BPF_DW: + if (bpf_atomic_is_load_store(insn)) + return false; + } return true; } diff --git a/arch/s390/pci/Makefile b/arch/s390/pci/Makefile index 2c21f0394c9a..1810e0944a4e 100644 --- a/arch/s390/pci/Makefile +++ b/arch/s390/pci/Makefile @@ -5,6 +5,6 @@ obj-$(CONFIG_PCI) += pci.o pci_irq.o pci_clp.o \ pci_event.o pci_debug.o pci_insn.o pci_mmio.o \ - pci_bus.o pci_kvm_hook.o + pci_bus.o pci_kvm_hook.o pci_report.o pci_fixup.o obj-$(CONFIG_PCI_IOV) += pci_iov.o obj-$(CONFIG_SYSFS) += pci_sysfs.o diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c index 88f72745fa59..cd6676c2d602 100644 --- a/arch/s390/pci/pci.c +++ b/arch/s390/pci/pci.c @@ -31,6 +31,7 @@ #include <linux/lockdep.h> #include <linux/list_sort.h> +#include <asm/machine.h> #include <asm/isc.h> #include <asm/airq.h> #include <asm/facility.h> @@ -44,6 +45,7 @@ /* list of all detected zpci devices */ static LIST_HEAD(zpci_list); static DEFINE_SPINLOCK(zpci_list_lock); +static DEFINE_MUTEX(zpci_add_remove_lock); static DECLARE_BITMAP(zpci_domain, ZPCI_DOMAIN_BITMAP_SIZE); static DEFINE_SPINLOCK(zpci_domain_lock); @@ -69,6 +71,15 @@ EXPORT_SYMBOL_GPL(zpci_aipb); struct airq_iv *zpci_aif_sbv; EXPORT_SYMBOL_GPL(zpci_aif_sbv); +void zpci_zdev_put(struct zpci_dev *zdev) +{ + if (!zdev) + return; + mutex_lock(&zpci_add_remove_lock); + kref_put_lock(&zdev->kref, zpci_release_device, &zpci_list_lock); + mutex_unlock(&zpci_add_remove_lock); +} + struct zpci_dev *get_zdev_by_fid(u32 fid) { struct zpci_dev *tmp, *zdev = NULL; @@ -124,14 +135,13 @@ int zpci_register_ioat(struct zpci_dev *zdev, u8 dmaas, struct zpci_fib fib = {0}; u8 cc; - WARN_ON_ONCE(iota & 0x3fff); fib.pba = base; /* Work around off by one in ISM virt device */ if (zdev->pft == PCI_FUNC_TYPE_ISM && limit > base) fib.pal = limit + (1 << 12); else fib.pal = limit; - fib.iota = iota | ZPCI_IOTA_RTTO_FLAG; + fib.iota = iota; fib.gd = zdev->gisa; cc = zpci_mod_fc(req, &fib, status); if (cc) @@ -255,7 +265,7 @@ resource_size_t pcibios_align_resource(void *data, const struct resource *res, } void __iomem *ioremap_prot(phys_addr_t phys_addr, size_t size, - unsigned long prot) + pgprot_t prot) { /* * When PCI MIO instructions are unavailable the "physical" address @@ -265,7 +275,7 @@ void __iomem *ioremap_prot(phys_addr_t phys_addr, size_t size, if (!static_branch_unlikely(&have_mio)) return (void __iomem *)phys_addr; - return generic_ioremap_prot(phys_addr, size, __pgprot(prot)); + return generic_ioremap_prot(phys_addr, size, prot); } EXPORT_SYMBOL(ioremap_prot); @@ -690,6 +700,23 @@ int zpci_enable_device(struct zpci_dev *zdev) } EXPORT_SYMBOL_GPL(zpci_enable_device); +int zpci_reenable_device(struct zpci_dev *zdev) +{ + u8 status; + int rc; + + rc = zpci_enable_device(zdev); + if (rc) + return rc; + + rc = zpci_iommu_register_ioat(zdev, &status); + if (rc) + zpci_disable_device(zdev); + + return rc; +} +EXPORT_SYMBOL_GPL(zpci_reenable_device); + int zpci_disable_device(struct zpci_dev *zdev) { u32 fh = zdev->fh; @@ -739,7 +766,6 @@ EXPORT_SYMBOL_GPL(zpci_disable_device); */ int zpci_hot_reset_device(struct zpci_dev *zdev) { - u8 status; int rc; lockdep_assert_held(&zdev->state_lock); @@ -758,19 +784,9 @@ int zpci_hot_reset_device(struct zpci_dev *zdev) return rc; } - rc = zpci_enable_device(zdev); - if (rc) - return rc; + rc = zpci_reenable_device(zdev); - if (zdev->dma_table) - rc = zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma, - virt_to_phys(zdev->dma_table), &status); - if (rc) { - zpci_disable_device(zdev); - return rc; - } - - return 0; + return rc; } /** @@ -831,6 +847,7 @@ int zpci_add_device(struct zpci_dev *zdev) { int rc; + mutex_lock(&zpci_add_remove_lock); zpci_dbg(1, "add fid:%x, fh:%x, c:%d\n", zdev->fid, zdev->fh, zdev->state); rc = zpci_init_iommu(zdev); if (rc) @@ -844,12 +861,14 @@ int zpci_add_device(struct zpci_dev *zdev) spin_lock(&zpci_list_lock); list_add_tail(&zdev->entry, &zpci_list); spin_unlock(&zpci_list_lock); + mutex_unlock(&zpci_add_remove_lock); return 0; error_destroy_iommu: zpci_destroy_iommu(zdev); error: zpci_dbg(0, "add fid:%x, rc:%d\n", zdev->fid, rc); + mutex_unlock(&zpci_add_remove_lock); return rc; } @@ -919,21 +938,20 @@ int zpci_deconfigure_device(struct zpci_dev *zdev) * @zdev: the zpci_dev that was reserved * * Handle the case that a given zPCI function was reserved by another system. - * After a call to this function the zpci_dev can not be found via - * get_zdev_by_fid() anymore but may still be accessible via existing - * references though it will not be functional anymore. */ void zpci_device_reserved(struct zpci_dev *zdev) { - /* - * Remove device from zpci_list as it is going away. This also - * makes sure we ignore subsequent zPCI events for this device. - */ - spin_lock(&zpci_list_lock); - list_del(&zdev->entry); - spin_unlock(&zpci_list_lock); + lockdep_assert_held(&zdev->state_lock); + /* We may declare the device reserved multiple times */ + if (zdev->state == ZPCI_FN_STATE_RESERVED) + return; zdev->state = ZPCI_FN_STATE_RESERVED; zpci_dbg(3, "rsv fid:%x\n", zdev->fid); + /* + * The underlying device is gone. Allow the zdev to be freed + * as soon as all other references are gone by accounting for + * the removal as a dropped reference. + */ zpci_zdev_put(zdev); } @@ -941,13 +959,14 @@ void zpci_release_device(struct kref *kref) { struct zpci_dev *zdev = container_of(kref, struct zpci_dev, kref); + lockdep_assert_held(&zpci_add_remove_lock); WARN_ON(zdev->state != ZPCI_FN_STATE_RESERVED); - - if (zdev->zbus->bus) - zpci_bus_remove_device(zdev, false); - - if (zdev_enabled(zdev)) - zpci_disable_device(zdev); + /* + * We already hold zpci_list_lock thanks to kref_put_lock(). + * This makes sure no new reference can be taken from the list. + */ + list_del(&zdev->entry); + spin_unlock(&zpci_list_lock); if (zdev->has_hp_slot) zpci_exit_slot(zdev); @@ -1073,7 +1092,7 @@ char * __init pcibios_setup(char *str) return NULL; } if (!strcmp(str, "nomio")) { - get_lowcore()->machine_flags &= ~MACHINE_FLAG_PCI_MIO; + clear_machine_feature(MFEATURE_PCI_MIO); return NULL; } if (!strcmp(str, "force_floating")) { @@ -1148,7 +1167,7 @@ static int __init pci_base_init(void) return 0; } - if (MACHINE_HAS_PCI_MIO) { + if (test_machine_feature(MFEATURE_PCI_MIO)) { static_branch_enable(&have_mio); system_ctl_set_bit(2, CR2_MIO_ADDRESSING_BIT); } diff --git a/arch/s390/pci/pci_bus.c b/arch/s390/pci/pci_bus.c index d5ace00d10f0..81bdb54ad5e3 100644 --- a/arch/s390/pci/pci_bus.c +++ b/arch/s390/pci/pci_bus.c @@ -19,6 +19,7 @@ #include <linux/jump_label.h> #include <linux/pci.h> #include <linux/printk.h> +#include <linux/dma-direct.h> #include <asm/pci_clp.h> #include <asm/pci_dma.h> @@ -171,7 +172,6 @@ void zpci_bus_scan_busses(void) static bool zpci_bus_is_multifunction_root(struct zpci_dev *zdev) { return !s390_pci_no_rid && zdev->rid_available && - zpci_is_device_configured(zdev) && !zdev->vfn; } @@ -284,10 +284,32 @@ static struct zpci_bus *zpci_bus_alloc(int topo, bool topo_is_tid) return zbus; } +static void pci_dma_range_setup(struct pci_dev *pdev) +{ + struct zpci_dev *zdev = to_zpci(pdev); + u64 aligned_end, size; + dma_addr_t dma_start; + int ret; + + dma_start = PAGE_ALIGN(zdev->start_dma); + aligned_end = PAGE_ALIGN_DOWN(zdev->end_dma + 1); + if (aligned_end >= dma_start) + size = aligned_end - dma_start; + else + size = 0; + WARN_ON_ONCE(size == 0); + + ret = dma_direct_set_offset(&pdev->dev, 0, dma_start, size); + if (ret) + pr_err("Failed to allocate DMA range map for %s\n", pci_name(pdev)); +} + void pcibios_bus_add_device(struct pci_dev *pdev) { struct zpci_dev *zdev = to_zpci(pdev); + pci_dma_range_setup(pdev); + /* * With pdev->no_vf_scan the common PCI probing code does not * perform PF/VF linking. @@ -332,6 +354,20 @@ error: return rc; } +static bool zpci_bus_is_isolated_vf(struct zpci_bus *zbus, struct zpci_dev *zdev) +{ + struct pci_dev *pdev; + + if (!zdev->vfn) + return false; + + pdev = zpci_iov_find_parent_pf(zbus, zdev); + if (!pdev) + return true; + pci_dev_put(pdev); + return false; +} + int zpci_bus_device_register(struct zpci_dev *zdev, struct pci_ops *ops) { bool topo_is_tid = zdev->tid_avail; @@ -346,6 +382,15 @@ int zpci_bus_device_register(struct zpci_dev *zdev, struct pci_ops *ops) topo = topo_is_tid ? zdev->tid : zdev->pchid; zbus = zpci_bus_get(topo, topo_is_tid); + /* + * An isolated VF gets its own domain/bus even if there exists + * a matching domain/bus already + */ + if (zbus && zpci_bus_is_isolated_vf(zbus, zdev)) { + zpci_bus_put(zbus); + zbus = NULL; + } + if (!zbus) { zbus = zpci_bus_alloc(topo, topo_is_tid); if (!zbus) diff --git a/arch/s390/pci/pci_bus.h b/arch/s390/pci/pci_bus.h index e86a9419d233..ae3d7a9159bd 100644 --- a/arch/s390/pci/pci_bus.h +++ b/arch/s390/pci/pci_bus.h @@ -21,11 +21,8 @@ int zpci_bus_scan_device(struct zpci_dev *zdev); void zpci_bus_remove_device(struct zpci_dev *zdev, bool set_error); void zpci_release_device(struct kref *kref); -static inline void zpci_zdev_put(struct zpci_dev *zdev) -{ - if (zdev) - kref_put(&zdev->kref, zpci_release_device); -} + +void zpci_zdev_put(struct zpci_dev *zdev); static inline void zpci_zdev_get(struct zpci_dev *zdev) { diff --git a/arch/s390/pci/pci_clp.c b/arch/s390/pci/pci_clp.c index 14bf7e8d06b7..241f7251c873 100644 --- a/arch/s390/pci/pci_clp.c +++ b/arch/s390/pci/pci_clp.c @@ -56,7 +56,7 @@ static inline int clp_get_ilp(unsigned long *ilp) int cc, exception; exception = 1; - asm volatile ( + asm_inline volatile ( " .insn rrf,0xb9a00000,%[mask],%[cmd],8,0\n" "0: lhi %[exc],0\n" "1:\n" @@ -79,7 +79,7 @@ static __always_inline int clp_req(void *data, unsigned int lps) u64 ignored; exception = 1; - asm volatile ( + asm_inline volatile ( " .insn rrf,0xb9a00000,%[ign],%[req],0,%[lps]\n" "0: lhi %[exc],0\n" "1:\n" @@ -112,6 +112,7 @@ static void clp_store_query_pci_fngrp(struct zpci_dev *zdev, zdev->version = response->version; zdev->maxstbl = response->maxstbl; zdev->dtsm = response->dtsm; + zdev->rtr_avail = response->rtr; switch (response->version) { case 1: @@ -427,6 +428,8 @@ static void __clp_add(struct clp_fh_list_entry *entry, void *data) return; } zdev = zpci_create_device(entry->fid, entry->fh, entry->config_state); + if (IS_ERR(zdev)) + return; list_add_tail(&zdev->entry, scan_list); } diff --git a/arch/s390/pci/pci_event.c b/arch/s390/pci/pci_event.c index 7f7b732b3f3e..d930416d4c90 100644 --- a/arch/s390/pci/pci_event.c +++ b/arch/s390/pci/pci_event.c @@ -16,6 +16,7 @@ #include <asm/sclp.h> #include "pci_bus.h" +#include "pci_report.h" /* Content Code Description for PCI Function Error */ struct zpci_ccdf_err { @@ -53,6 +54,7 @@ static inline bool ers_result_indicates_abort(pci_ers_result_t ers_res) case PCI_ERS_RESULT_CAN_RECOVER: case PCI_ERS_RESULT_RECOVERED: case PCI_ERS_RESULT_NEED_RESET: + case PCI_ERS_RESULT_NONE: return false; default: return true; @@ -77,10 +79,6 @@ static bool is_driver_supported(struct pci_driver *driver) return false; if (!driver->err_handler->error_detected) return false; - if (!driver->err_handler->slot_reset) - return false; - if (!driver->err_handler->resume) - return false; return true; } @@ -105,6 +103,10 @@ static pci_ers_result_t zpci_event_do_error_state_clear(struct pci_dev *pdev, struct zpci_dev *zdev = to_zpci(pdev); int rc; + /* The underlying device may have been disabled by the event */ + if (!zdev_enabled(zdev)) + return PCI_ERS_RESULT_NEED_RESET; + pr_info("%s: Unblocking device access for examination\n", pci_name(pdev)); rc = zpci_reset_load_store_blocked(zdev); if (rc) { @@ -113,16 +115,18 @@ static pci_ers_result_t zpci_event_do_error_state_clear(struct pci_dev *pdev, return PCI_ERS_RESULT_NEED_RESET; } - if (driver->err_handler->mmio_enabled) { + if (driver->err_handler->mmio_enabled) ers_res = driver->err_handler->mmio_enabled(pdev); - if (ers_result_indicates_abort(ers_res)) { - pr_info("%s: Automatic recovery failed after MMIO re-enable\n", - pci_name(pdev)); - return ers_res; - } else if (ers_res == PCI_ERS_RESULT_NEED_RESET) { - pr_debug("%s: Driver needs reset to recover\n", pci_name(pdev)); - return ers_res; - } + else + ers_res = PCI_ERS_RESULT_NONE; + + if (ers_result_indicates_abort(ers_res)) { + pr_info("%s: Automatic recovery failed after MMIO re-enable\n", + pci_name(pdev)); + return ers_res; + } else if (ers_res == PCI_ERS_RESULT_NEED_RESET) { + pr_debug("%s: Driver needs reset to recover\n", pci_name(pdev)); + return ers_res; } pr_debug("%s: Unblocking DMA\n", pci_name(pdev)); @@ -149,7 +153,12 @@ static pci_ers_result_t zpci_event_do_reset(struct pci_dev *pdev, return ers_res; } pdev->error_state = pci_channel_io_normal; - ers_res = driver->err_handler->slot_reset(pdev); + + if (driver->err_handler->slot_reset) + ers_res = driver->err_handler->slot_reset(pdev); + else + ers_res = PCI_ERS_RESULT_NONE; + if (ers_result_indicates_abort(ers_res)) { pr_info("%s: Automatic recovery failed after slot reset\n", pci_name(pdev)); return ers_res; @@ -169,6 +178,8 @@ static pci_ers_result_t zpci_event_do_reset(struct pci_dev *pdev, static pci_ers_result_t zpci_event_attempt_error_recovery(struct pci_dev *pdev) { pci_ers_result_t ers_res = PCI_ERS_RESULT_DISCONNECT; + struct zpci_dev *zdev = to_zpci(pdev); + char *status_str = "success"; struct pci_driver *driver; /* @@ -186,37 +197,56 @@ static pci_ers_result_t zpci_event_attempt_error_recovery(struct pci_dev *pdev) if (is_passed_through(pdev)) { pr_info("%s: Cannot be recovered in the host because it is a pass-through device\n", pci_name(pdev)); + status_str = "failed (pass-through)"; goto out_unlock; } driver = to_pci_driver(pdev->dev.driver); if (!is_driver_supported(driver)) { - if (!driver) + if (!driver) { pr_info("%s: Cannot be recovered because no driver is bound to the device\n", pci_name(pdev)); - else + status_str = "failed (no driver)"; + } else { pr_info("%s: The %s driver bound to the device does not support error recovery\n", pci_name(pdev), driver->name); + status_str = "failed (no driver support)"; + } goto out_unlock; } ers_res = zpci_event_notify_error_detected(pdev, driver); - if (ers_result_indicates_abort(ers_res)) + if (ers_result_indicates_abort(ers_res)) { + status_str = "failed (abort on detection)"; goto out_unlock; + } - if (ers_res == PCI_ERS_RESULT_CAN_RECOVER) { + if (ers_res != PCI_ERS_RESULT_NEED_RESET) { ers_res = zpci_event_do_error_state_clear(pdev, driver); - if (ers_result_indicates_abort(ers_res)) + if (ers_result_indicates_abort(ers_res)) { + status_str = "failed (abort on MMIO enable)"; goto out_unlock; + } } if (ers_res == PCI_ERS_RESULT_NEED_RESET) ers_res = zpci_event_do_reset(pdev, driver); + /* + * ers_res can be PCI_ERS_RESULT_NONE either because the driver + * decided to return it, indicating that it abstains from voting + * on how to recover, or because it didn't implement the callback. + * Both cases assume, that if there is nothing else causing a + * disconnect, we recovered successfully. + */ + if (ers_res == PCI_ERS_RESULT_NONE) + ers_res = PCI_ERS_RESULT_RECOVERED; + if (ers_res != PCI_ERS_RESULT_RECOVERED) { pr_err("%s: Automatic recovery failed; operator intervention is required\n", pci_name(pdev)); + status_str = "failed (driver can't recover)"; goto out_unlock; } @@ -225,6 +255,7 @@ static pci_ers_result_t zpci_event_attempt_error_recovery(struct pci_dev *pdev) driver->err_handler->resume(pdev); out_unlock: pci_dev_unlock(pdev); + zpci_report_status(zdev, "recovery", status_str); return ers_res; } @@ -260,6 +291,8 @@ static void __zpci_event_error(struct zpci_ccdf_err *ccdf) struct zpci_dev *zdev = get_zdev_by_fid(ccdf->fid); struct pci_dev *pdev = NULL; pci_ers_result_t ers_res; + u32 fh = 0; + int rc; zpci_dbg(3, "err fid:%x, fh:%x, pec:%x\n", ccdf->fid, ccdf->fh, ccdf->pec); @@ -268,6 +301,15 @@ static void __zpci_event_error(struct zpci_ccdf_err *ccdf) if (zdev) { mutex_lock(&zdev->state_lock); + rc = clp_refresh_fh(zdev->fid, &fh); + if (rc) + goto no_pdev; + if (!fh || ccdf->fh != fh) { + /* Ignore events with stale handles */ + zpci_dbg(3, "err fid:%x, fh:%x (stale %x)\n", + ccdf->fid, fh, ccdf->fh); + goto no_pdev; + } zpci_update_fh(zdev, ccdf->fh); if (zdev->zbus->bus) pdev = pci_get_slot(zdev->zbus->bus, zdev->devfn); @@ -322,6 +364,22 @@ static void zpci_event_hard_deconfigured(struct zpci_dev *zdev, u32 fh) zdev->state = ZPCI_FN_STATE_STANDBY; } +static void zpci_event_reappear(struct zpci_dev *zdev) +{ + lockdep_assert_held(&zdev->state_lock); + /* + * The zdev is in the reserved state. This means that it was presumed to + * go away but there are still undropped references. Now, the platform + * announced its availability again. Bring back the lingering zdev + * to standby. This is safe because we hold a temporary reference + * now so that it won't go away. Account for the re-appearance of the + * underlying device by incrementing the reference count. + */ + zdev->state = ZPCI_FN_STATE_STANDBY; + zpci_zdev_get(zdev); + zpci_dbg(1, "rea fid:%x, fh:%x\n", zdev->fid, zdev->fh); +} + static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf) { struct zpci_dev *zdev = get_zdev_by_fid(ccdf->fid); @@ -345,8 +403,10 @@ static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf) break; } } else { + if (zdev->state == ZPCI_FN_STATE_RESERVED) + zpci_event_reappear(zdev); /* the configuration request may be stale */ - if (zdev->state != ZPCI_FN_STATE_STANDBY) + else if (zdev->state != ZPCI_FN_STATE_STANDBY) break; zdev->state = ZPCI_FN_STATE_CONFIGURED; } @@ -362,6 +422,8 @@ static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf) break; } } else { + if (zdev->state == ZPCI_FN_STATE_RESERVED) + zpci_event_reappear(zdev); zpci_update_fh(zdev, ccdf->fh); } break; diff --git a/arch/s390/pci/pci_fixup.c b/arch/s390/pci/pci_fixup.c new file mode 100644 index 000000000000..35688b645098 --- /dev/null +++ b/arch/s390/pci/pci_fixup.c @@ -0,0 +1,23 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Exceptions for specific devices, + * + * Copyright IBM Corp. 2025 + * + * Author(s): + * Niklas Schnelle <schnelle@linux.ibm.com> + */ +#include <linux/pci.h> + +static void zpci_ism_bar_no_mmap(struct pci_dev *pdev) +{ + /* + * ISM's BAR is special. Drivers written for ISM know + * how to handle this but others need to be aware of their + * special nature e.g. to prevent attempts to mmap() it. + */ + pdev->non_mappable_bars = 1; +} +DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_IBM, + PCI_DEVICE_ID_IBM_ISM, + zpci_ism_bar_no_mmap); diff --git a/arch/s390/pci/pci_insn.c b/arch/s390/pci/pci_insn.c index f5a75ea7629a..eb978c8012be 100644 --- a/arch/s390/pci/pci_insn.c +++ b/arch/s390/pci/pci_insn.c @@ -160,7 +160,7 @@ static inline int ____pcilg(u64 *data, u64 req, u64 offset, u8 *status) u64 __data; exception = 1; - asm volatile ( + asm_inline volatile ( " .insn rre,0xb9d20000,%[data],%[req_off]\n" "0: lhi %[exc],0\n" "1:\n" @@ -229,7 +229,7 @@ static inline int __pcilg_mio(u64 *data, u64 ioaddr, u64 len, u8 *status) u64 __data; exception = 1; - asm volatile ( + asm_inline volatile ( " .insn rre,0xb9d60000,%[data],%[ioaddr_len]\n" "0: lhi %[exc],0\n" "1:\n" @@ -267,7 +267,7 @@ static inline int __pcistg(u64 data, u64 req, u64 offset, u8 *status) int cc, exception; exception = 1; - asm volatile ( + asm_inline volatile ( " .insn rre,0xb9d00000,%[data],%[req_off]\n" "0: lhi %[exc],0\n" "1:\n" @@ -321,7 +321,7 @@ static inline int __pcistg_mio(u64 data, u64 ioaddr, u64 len, u8 *status) int cc, exception; exception = 1; - asm volatile ( + asm_inline volatile ( " .insn rre,0xb9d40000,%[data],%[ioaddr_len]\n" "0: lhi %[exc],0\n" "1:\n" @@ -356,7 +356,7 @@ static inline int __pcistb(const u64 *data, u64 req, u64 offset, u8 *status) int cc, exception; exception = 1; - asm volatile ( + asm_inline volatile ( " .insn rsy,0xeb00000000d0,%[req],%[offset],%[data]\n" "0: lhi %[exc],0\n" "1:\n" @@ -410,7 +410,7 @@ static inline int __pcistb_mio(const u64 *data, u64 ioaddr, u64 len, u8 *status) int cc, exception; exception = 1; - asm volatile ( + asm_inline volatile ( " .insn rsy,0xeb00000000d4,%[len],%[ioaddr],%[data]\n" "0: lhi %[exc],0\n" "1:\n" diff --git a/arch/s390/pci/pci_iov.c b/arch/s390/pci/pci_iov.c index ead062bf2b41..191e56a623f6 100644 --- a/arch/s390/pci/pci_iov.c +++ b/arch/s390/pci/pci_iov.c @@ -60,18 +60,35 @@ static int zpci_iov_link_virtfn(struct pci_dev *pdev, struct pci_dev *virtfn, in return 0; } -int zpci_iov_setup_virtfn(struct zpci_bus *zbus, struct pci_dev *virtfn, int vfn) +/** + * zpci_iov_find_parent_pf - Find the parent PF, if any, of the given function + * @zbus: The bus that the PCI function is on, or would be added on + * @zdev: The PCI function + * + * Finds the parent PF, if it exists and is configured, of the given PCI function + * and increments its refcount. Th PF is searched for on the provided bus so the + * caller has to ensure that this is the correct bus to search. This function may + * be used before adding the PCI function to a zbus. + * + * Return: Pointer to the struct pci_dev of the parent PF or NULL if it not + * found. If the function is not a VF or has no RequesterID information, + * NULL is returned as well. + */ +struct pci_dev *zpci_iov_find_parent_pf(struct zpci_bus *zbus, struct zpci_dev *zdev) { - int i, cand_devfn; - struct zpci_dev *zdev; + int i, vfid, devfn, cand_devfn; struct pci_dev *pdev; - int vfid = vfn - 1; /* Linux' vfid's start at 0 vfn at 1*/ - int rc = 0; if (!zbus->multifunction) - return 0; - - /* If the parent PF for the given VF is also configured in the + return NULL; + /* Non-VFs and VFs without RID available don't have a parent */ + if (!zdev->vfn || !zdev->rid_available) + return NULL; + /* Linux vfid starts at 0 vfn at 1 */ + vfid = zdev->vfn - 1; + devfn = zdev->rid & ZPCI_RID_MASK_DEVFN; + /* + * If the parent PF for the given VF is also configured in the * instance, it must be on the same zbus. * We can then identify the parent PF by checking what * devfn the VF would have if it belonged to that PF using the PF's @@ -85,15 +102,26 @@ int zpci_iov_setup_virtfn(struct zpci_bus *zbus, struct pci_dev *virtfn, int vfn if (!pdev) continue; cand_devfn = pci_iov_virtfn_devfn(pdev, vfid); - if (cand_devfn == virtfn->devfn) { - rc = zpci_iov_link_virtfn(pdev, virtfn, vfid); - /* balance pci_get_slot() */ - pci_dev_put(pdev); - break; - } + if (cand_devfn == devfn) + return pdev; /* balance pci_get_slot() */ pci_dev_put(pdev); } } + return NULL; +} + +int zpci_iov_setup_virtfn(struct zpci_bus *zbus, struct pci_dev *virtfn, int vfn) +{ + struct zpci_dev *zdev = to_zpci(virtfn); + struct pci_dev *pdev_pf; + int rc = 0; + + pdev_pf = zpci_iov_find_parent_pf(zbus, zdev); + if (pdev_pf) { + /* Linux' vfids start at 0 while zdev->vfn starts at 1 */ + rc = zpci_iov_link_virtfn(pdev_pf, virtfn, zdev->vfn - 1); + pci_dev_put(pdev_pf); + } return rc; } diff --git a/arch/s390/pci/pci_iov.h b/arch/s390/pci/pci_iov.h index e3fa4e77fc86..d2c2793eb0f3 100644 --- a/arch/s390/pci/pci_iov.h +++ b/arch/s390/pci/pci_iov.h @@ -19,6 +19,8 @@ void zpci_iov_map_resources(struct pci_dev *pdev); int zpci_iov_setup_virtfn(struct zpci_bus *zbus, struct pci_dev *virtfn, int vfn); +struct pci_dev *zpci_iov_find_parent_pf(struct zpci_bus *zbus, struct zpci_dev *zdev); + #else /* CONFIG_PCI_IOV */ static inline void zpci_iov_remove_virtfn(struct pci_dev *pdev, int vfn) {} @@ -28,5 +30,10 @@ static inline int zpci_iov_setup_virtfn(struct zpci_bus *zbus, struct pci_dev *v { return 0; } + +static inline struct pci_dev *zpci_iov_find_parent_pf(struct zpci_bus *zbus, struct zpci_dev *zdev) +{ + return NULL; +} #endif /* CONFIG_PCI_IOV */ #endif /* __S390_PCI_IOV_h */ diff --git a/arch/s390/pci/pci_mmio.c b/arch/s390/pci/pci_mmio.c index 46f99dc164ad..51e7a28af899 100644 --- a/arch/s390/pci/pci_mmio.c +++ b/arch/s390/pci/pci_mmio.c @@ -32,9 +32,11 @@ static inline int __pcistb_mio_inuser( u64 len, u8 *status) { int cc, exception; + bool sacf_flag; exception = 1; - asm volatile ( + sacf_flag = enable_sacf_uaccess(); + asm_inline volatile ( " sacf 256\n" "0: .insn rsy,0xeb00000000d4,%[len],%[ioaddr],%[src]\n" "1: lhi %[exc],0\n" @@ -44,6 +46,7 @@ static inline int __pcistb_mio_inuser( : CC_OUT(cc, cc), [len] "+d" (len), [exc] "+d" (exception) : [ioaddr] "a" (ioaddr), [src] "Q" (*((u8 __force *)src)) : CC_CLOBBER_LIST("memory")); + disable_sacf_uaccess(sacf_flag); *status = len >> 24 & 0xff; return exception ? -ENXIO : CC_TRANSFORM(cc); } @@ -54,6 +57,7 @@ static inline int __pcistg_mio_inuser( { union register_pair ioaddr_len = {.even = (u64 __force)ioaddr, .odd = ulen}; int cc, exception; + bool sacf_flag; u64 val = 0; u64 cnt = ulen; u8 tmp; @@ -64,7 +68,8 @@ static inline int __pcistg_mio_inuser( * address space. pcistg then uses the user mappings. */ exception = 1; - asm volatile ( + sacf_flag = enable_sacf_uaccess(); + asm_inline volatile ( " sacf 256\n" "0: llgc %[tmp],0(%[src])\n" "4: sllg %[val],%[val],8\n" @@ -81,6 +86,7 @@ static inline int __pcistg_mio_inuser( CC_OUT(cc, cc), [ioaddr_len] "+&d" (ioaddr_len.pair) : : CC_CLOBBER_LIST("memory")); + disable_sacf_uaccess(sacf_flag); *status = ioaddr_len.odd >> 24 & 0xff; cc = exception ? -ENXIO : CC_TRANSFORM(cc); @@ -175,8 +181,12 @@ SYSCALL_DEFINE3(s390_pci_mmio_write, unsigned long, mmio_addr, args.address = mmio_addr; args.vma = vma; ret = follow_pfnmap_start(&args); - if (ret) - goto out_unlock_mmap; + if (ret) { + fixup_user_fault(current->mm, mmio_addr, FAULT_FLAG_WRITE, NULL); + ret = follow_pfnmap_start(&args); + if (ret) + goto out_unlock_mmap; + } io_addr = (void __iomem *)((args.pfn << PAGE_SHIFT) | (mmio_addr & ~PAGE_MASK)); @@ -200,6 +210,7 @@ static inline int __pcilg_mio_inuser( u64 ulen, u8 *status) { union register_pair ioaddr_len = {.even = (u64 __force)ioaddr, .odd = ulen}; + bool sacf_flag; u64 cnt = ulen; int shift = ulen * 8; int cc, exception; @@ -211,7 +222,8 @@ static inline int __pcilg_mio_inuser( * user address @dst */ exception = 1; - asm volatile ( + sacf_flag = enable_sacf_uaccess(); + asm_inline volatile ( " sacf 256\n" "0: .insn rre,0xb9d60000,%[val],%[ioaddr_len]\n" "1: lhi %[exc],0\n" @@ -232,10 +244,10 @@ static inline int __pcilg_mio_inuser( : [ioaddr_len] "+&d" (ioaddr_len.pair), [exc] "+d" (exception), CC_OUT(cc, cc), [val] "=d" (val), [dst] "+a" (dst), [cnt] "+d" (cnt), [tmp] "=d" (tmp), - [shift] "+d" (shift) + [shift] "+a" (shift) : : CC_CLOBBER_LIST("memory")); - + disable_sacf_uaccess(sacf_flag); cc = exception ? -ENXIO : CC_TRANSFORM(cc); /* did we write everything to the user space buffer? */ if (!cc && cnt != 0) @@ -315,14 +327,18 @@ SYSCALL_DEFINE3(s390_pci_mmio_read, unsigned long, mmio_addr, if (!(vma->vm_flags & (VM_IO | VM_PFNMAP))) goto out_unlock_mmap; ret = -EACCES; - if (!(vma->vm_flags & VM_WRITE)) + if (!(vma->vm_flags & VM_READ)) goto out_unlock_mmap; args.vma = vma; args.address = mmio_addr; ret = follow_pfnmap_start(&args); - if (ret) - goto out_unlock_mmap; + if (ret) { + fixup_user_fault(current->mm, mmio_addr, 0, NULL); + ret = follow_pfnmap_start(&args); + if (ret) + goto out_unlock_mmap; + } io_addr = (void __iomem *)((args.pfn << PAGE_SHIFT) | (mmio_addr & ~PAGE_MASK)); diff --git a/arch/s390/pci/pci_report.c b/arch/s390/pci/pci_report.c new file mode 100644 index 000000000000..1b494e5ecc4d --- /dev/null +++ b/arch/s390/pci/pci_report.c @@ -0,0 +1,158 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright IBM Corp. 2024 + * + * Author(s): + * Niklas Schnelle <schnelle@linux.ibm.com> + * + */ + +#define KMSG_COMPONENT "zpci" +#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt + +#include <linux/kernel.h> +#include <linux/sprintf.h> +#include <linux/pci.h> + +#include <asm/sclp.h> +#include <asm/debug.h> +#include <asm/pci_debug.h> + +#include "pci_report.h" + +#define ZPCI_ERR_LOG_ID_KERNEL_REPORT 0x4714 + +struct zpci_report_error_data { + u64 timestamp; + u64 err_log_id; + char log_data[]; +} __packed; + +#define ZPCI_REPORT_SIZE (PAGE_SIZE - sizeof(struct err_notify_sccb)) +#define ZPCI_REPORT_DATA_SIZE (ZPCI_REPORT_SIZE - sizeof(struct zpci_report_error_data)) + +struct zpci_report_error { + struct zpci_report_error_header header; + struct zpci_report_error_data data; +} __packed; + +static const char *zpci_state_str(pci_channel_state_t state) +{ + switch (state) { + case pci_channel_io_normal: + return "normal"; + case pci_channel_io_frozen: + return "frozen"; + case pci_channel_io_perm_failure: + return "permanent-failure"; + default: + return "invalid"; + }; +} + +static int debug_log_header_fn(debug_info_t *id, struct debug_view *view, + int area, debug_entry_t *entry, char *out_buf, + size_t out_buf_size) +{ + unsigned long sec, usec; + unsigned int level; + char *except_str; + int rc = 0; + + level = entry->level; + sec = entry->clock; + usec = do_div(sec, USEC_PER_SEC); + + if (entry->exception) + except_str = "*"; + else + except_str = "-"; + rc += scnprintf(out_buf, out_buf_size, "%011ld:%06lu %1u %1s %04u ", + sec, usec, level, except_str, + entry->cpu); + return rc; +} + +static int debug_prolog_header(debug_info_t *id, struct debug_view *view, + char *out_buf, size_t out_buf_size) +{ + return scnprintf(out_buf, out_buf_size, "sec:usec level except cpu msg\n"); +} + +static struct debug_view debug_log_view = { + "pci_msg_log", + &debug_prolog_header, + &debug_log_header_fn, + &debug_sprintf_format_fn, + NULL, + NULL +}; + +/** + * zpci_report_status - Report the status of operations on a PCI device + * @zdev: The PCI device for which to report status + * @operation: A string representing the operation reported + * @status: A string representing the status of the operation + * + * This function creates a human readable report about an operation such as + * PCI device recovery and forwards this to the platform using the SCLP Write + * Event Data mechanism. Besides the operation and status strings the report + * also contains additional information about the device deemed useful for + * debug such as the currently bound device driver, if any, and error state. + * Additionally a string representation of pci_debug_msg_id, or as much as fits, + * is also included. + * + * Return: 0 on success an error code < 0 otherwise. + */ +int zpci_report_status(struct zpci_dev *zdev, const char *operation, const char *status) +{ + struct zpci_report_error *report; + struct pci_driver *driver = NULL; + struct pci_dev *pdev = NULL; + char *buf, *end; + int ret; + + if (!zdev || !zdev->zbus) + return -ENODEV; + + /* Protected virtualization hosts get nothing from us */ + if (prot_virt_guest) + return -ENODATA; + + report = (void *)get_zeroed_page(GFP_KERNEL); + if (!report) + return -ENOMEM; + if (zdev->zbus->bus) + pdev = pci_get_slot(zdev->zbus->bus, zdev->devfn); + if (pdev) + driver = to_pci_driver(pdev->dev.driver); + + buf = report->data.log_data; + end = report->data.log_data + ZPCI_REPORT_DATA_SIZE; + buf += scnprintf(buf, end - buf, "report: %s\n", operation); + buf += scnprintf(buf, end - buf, "status: %s\n", status); + buf += scnprintf(buf, end - buf, "state: %s\n", + (pdev) ? zpci_state_str(pdev->error_state) : "n/a"); + buf += scnprintf(buf, end - buf, "driver: %s\n", (driver) ? driver->name : "n/a"); + ret = debug_dump(pci_debug_msg_id, &debug_log_view, buf, end - buf, true); + if (ret < 0) + pr_err("Reading PCI debug messages failed with code %d\n", ret); + else + buf += ret; + + report->header.version = 1; + report->header.action = SCLP_ERRNOTIFY_AQ_INFO_LOG; + report->header.length = buf - (char *)&report->data; + report->data.timestamp = ktime_get_clocktai_seconds(); + report->data.err_log_id = ZPCI_ERR_LOG_ID_KERNEL_REPORT; + + ret = sclp_pci_report(&report->header, zdev->fh, zdev->fid); + if (ret) + pr_err("Reporting PCI status failed with code %d\n", ret); + else + pr_info("Reported PCI device status\n"); + + free_page((unsigned long)report); + + return ret; +} diff --git a/arch/s390/pci/pci_report.h b/arch/s390/pci/pci_report.h new file mode 100644 index 000000000000..e08003d51a97 --- /dev/null +++ b/arch/s390/pci/pci_report.h @@ -0,0 +1,16 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright IBM Corp. 2024 + * + * Author(s): + * Niklas Schnelle <schnelle@linux.ibm.com> + * + */ +#ifndef __S390_PCI_REPORT_H +#define __S390_PCI_REPORT_H + +struct zpci_dev; + +int zpci_report_status(struct zpci_dev *zdev, const char *operation, const char *status); + +#endif /* __S390_PCI_REPORT_H */ diff --git a/arch/s390/pci/pci_sysfs.c b/arch/s390/pci/pci_sysfs.c index 5f46ad58dcd1..0ecad08e1b1e 100644 --- a/arch/s390/pci/pci_sysfs.c +++ b/arch/s390/pci/pci_sysfs.c @@ -52,7 +52,6 @@ static DEVICE_ATTR_RO(mio_enabled); static int _do_recover(struct pci_dev *pdev, struct zpci_dev *zdev) { - u8 status; int ret; pci_stop_and_remove_bus_device(pdev); @@ -70,16 +69,8 @@ static int _do_recover(struct pci_dev *pdev, struct zpci_dev *zdev) return ret; } - ret = zpci_enable_device(zdev); - if (ret) - return ret; + ret = zpci_reenable_device(zdev); - if (zdev->dma_table) { - ret = zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma, - virt_to_phys(zdev->dma_table), &status); - if (ret) - zpci_disable_device(zdev); - } return ret; } @@ -135,7 +126,7 @@ out: static DEVICE_ATTR_WO(recover); static ssize_t util_string_read(struct file *filp, struct kobject *kobj, - struct bin_attribute *attr, char *buf, + const struct bin_attribute *attr, char *buf, loff_t off, size_t count) { struct device *dev = kobj_to_dev(kobj); @@ -145,10 +136,10 @@ static ssize_t util_string_read(struct file *filp, struct kobject *kobj, return memory_read_from_buffer(buf, count, &off, zdev->util_str, sizeof(zdev->util_str)); } -static BIN_ATTR_RO(util_string, CLP_UTIL_STR_LEN); +static const BIN_ATTR_RO(util_string, CLP_UTIL_STR_LEN); static ssize_t report_error_write(struct file *filp, struct kobject *kobj, - struct bin_attribute *attr, char *buf, + const struct bin_attribute *attr, char *buf, loff_t off, size_t count) { struct zpci_report_error_header *report = (void *) buf; @@ -164,7 +155,7 @@ static ssize_t report_error_write(struct file *filp, struct kobject *kobj, return ret ? ret : count; } -static BIN_ATTR(report_error, S_IWUSR, NULL, report_error_write, PAGE_SIZE); +static const BIN_ATTR(report_error, S_IWUSR, NULL, report_error_write, PAGE_SIZE); static ssize_t uid_is_unique_show(struct device *dev, struct device_attribute *attr, char *buf) @@ -203,7 +194,7 @@ const struct attribute_group zpci_ident_attr_group = { .is_visible = zpci_index_is_visible, }; -static struct bin_attribute *zpci_bin_attrs[] = { +static const struct bin_attribute *const zpci_bin_attrs[] = { &bin_attr_util_string, &bin_attr_report_error, NULL, @@ -227,7 +218,7 @@ static struct attribute *zpci_dev_attrs[] = { const struct attribute_group zpci_attr_group = { .attrs = zpci_dev_attrs, - .bin_attrs = zpci_bin_attrs, + .bin_attrs_new = zpci_bin_attrs, }; static struct attribute *pfip_attrs[] = { diff --git a/arch/s390/purgatory/Makefile b/arch/s390/purgatory/Makefile index 24eccaa29337..bd39b36e7bd6 100644 --- a/arch/s390/purgatory/Makefile +++ b/arch/s390/purgatory/Makefile @@ -8,20 +8,22 @@ PURGATORY_OBJS = $(addprefix $(obj)/,$(purgatory-y)) $(obj)/sha256.o: $(srctree)/lib/crypto/sha256.c FORCE $(call if_changed_rule,cc_o_c) -CFLAGS_sha256.o := -D__DISABLE_EXPORTS -D__NO_FORTIFY +CFLAGS_sha256.o := -D__NO_FORTIFY $(obj)/mem.o: $(srctree)/arch/s390/lib/mem.S FORCE $(call if_changed_rule,as_o_S) -KBUILD_CFLAGS := -fno-strict-aliasing -Wall -Wstrict-prototypes +KBUILD_CFLAGS := -std=gnu11 -fno-strict-aliasing -Wall -Wstrict-prototypes KBUILD_CFLAGS += -Wno-pointer-sign -Wno-sign-compare KBUILD_CFLAGS += -fno-zero-initialized-in-bss -fno-builtin -ffreestanding KBUILD_CFLAGS += -Os -m64 -msoft-float -fno-common KBUILD_CFLAGS += -fno-stack-protector KBUILD_CFLAGS += -DDISABLE_BRANCH_PROFILING +KBUILD_CFLAGS += -D__DISABLE_EXPORTS KBUILD_CFLAGS += $(CLANG_FLAGS) KBUILD_CFLAGS += $(call cc-option,-fno-PIE) KBUILD_AFLAGS := $(filter-out -DCC_USING_EXPOLINE,$(KBUILD_AFLAGS)) +KBUILD_AFLAGS += -D__DISABLE_EXPORTS # Since we link purgatory with -r unresolved symbols are not checked, so we # also link a purgatory.chk binary without -r to check for unresolved symbols. diff --git a/arch/s390/tools/gen_facilities.c b/arch/s390/tools/gen_facilities.c index 855f818deb98..d5c68ade71ab 100644 --- a/arch/s390/tools/gen_facilities.c +++ b/arch/s390/tools/gen_facilities.c @@ -54,6 +54,9 @@ static struct facility_def facility_defs[] = { #ifdef CONFIG_HAVE_MARCH_Z15_FEATURES 61, /* miscellaneous-instruction-extension 3 */ #endif +#ifdef CONFIG_HAVE_MARCH_Z17_FEATURES + 84, /* miscellaneous-instruction-extension 4 */ +#endif -1 /* END */ } }, diff --git a/arch/s390/tools/gen_opcode_table.c b/arch/s390/tools/gen_opcode_table.c index a1bc02b29c81..7d76c417f83f 100644 --- a/arch/s390/tools/gen_opcode_table.c +++ b/arch/s390/tools/gen_opcode_table.c @@ -201,6 +201,17 @@ static int cmp_long_insn(const void *a, const void *b) return strcmp(((struct insn *)a)->name, ((struct insn *)b)->name); } +static void print_insn_name(const char *name) +{ + size_t i, len; + + len = strlen(name); + printf("{"); + for (i = 0; i < len; i++) + printf(" \'%c\',", name[i]); + printf(" }"); +} + static void print_long_insn(struct gen_opcode *desc) { struct insn *insn; @@ -223,7 +234,9 @@ static void print_long_insn(struct gen_opcode *desc) insn = &desc->insn[i]; if (insn->name_len < 6) continue; - printf("\t[LONG_INSN_%s] = \"%s\", \\\n", insn->upper, insn->name); + printf("\t[LONG_INSN_%s] = ", insn->upper); + print_insn_name(insn->name); + printf(", \\\n"); } printf("}\n\n"); } @@ -236,11 +249,13 @@ static void print_opcode(struct insn *insn, int nr) if (insn->type->byte != 0) opcode += 2; printf("\t[%4d] = { .opfrag = 0x%s, .format = INSTR_%s, ", nr, opcode, insn->format); - if (insn->name_len < 6) - printf(".name = \"%s\" ", insn->name); - else - printf(".offset = LONG_INSN_%s ", insn->upper); - printf("}, \\\n"); + if (insn->name_len < 6) { + printf(".name = "); + print_insn_name(insn->name); + } else { + printf(".offset = LONG_INSN_%s", insn->upper); + } + printf(" }, \\\n"); } static void add_to_group(struct gen_opcode *desc, struct insn *insn, int offset) |