diff options
Diffstat (limited to 'arch/s390')
100 files changed, 1107 insertions, 1227 deletions
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 4a2a12be04c9..c72874f09741 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -35,9 +35,6 @@ config GENERIC_LOCKBREAK config PGSTE def_bool y if KVM -config ARCH_SUPPORTS_DEBUG_PAGEALLOC - def_bool y - config AUDIT_ARCH def_bool y @@ -53,22 +50,27 @@ config ARCH_SUPPORTS_UPROBES config KASAN_SHADOW_OFFSET hex depends on KASAN - default 0x18000000000000 if KASAN_S390_4_LEVEL_PAGING - default 0x30000000000 + default 0x18000000000000 config S390 def_bool y + # + # Note: keep this list sorted alphabetically + # + imply IMA_SECURE_AND_OR_TRUSTED_BOOT select ARCH_BINFMT_ELF_STATE select ARCH_HAS_DEBUG_VM_PGTABLE select ARCH_HAS_DEBUG_WX select ARCH_HAS_DEVMEM_IS_ALLOWED select ARCH_HAS_ELF_RANDOMIZE + select ARCH_HAS_FORCE_DMA_UNENCRYPTED select ARCH_HAS_FORTIFY_SOURCE select ARCH_HAS_GCOV_PROFILE_ALL select ARCH_HAS_GIGANTIC_PAGE select ARCH_HAS_KCOV select ARCH_HAS_MEM_ENCRYPT select ARCH_HAS_PTE_SPECIAL + select ARCH_HAS_SCALED_CPUTIME select ARCH_HAS_SET_MEMORY select ARCH_HAS_STRICT_KERNEL_RWX select ARCH_HAS_STRICT_MODULE_RWX @@ -106,6 +108,7 @@ config S390 select ARCH_INLINE_WRITE_UNLOCK_IRQRESTORE select ARCH_STACKWALK select ARCH_SUPPORTS_ATOMIC_RMW + select ARCH_SUPPORTS_DEBUG_PAGEALLOC select ARCH_SUPPORTS_NUMA_BALANCING select ARCH_USE_BUILTIN_BSWAP select ARCH_USE_CMPXCHG_LOCKREF @@ -114,9 +117,10 @@ config S390 select ARCH_WANT_IPC_PARSE_VERSION select BUILDTIME_TABLE_SORT select CLONE_BACKWARDS2 + select CPU_NO_EFFICIENT_FFS if !HAVE_MARCH_Z9_109_FEATURES select DMA_OPS if PCI select DYNAMIC_FTRACE if FUNCTION_TRACER - select GENERIC_CLOCKEVENTS + select GENERIC_ALLOCATOR select GENERIC_CPU_AUTOPROBE select GENERIC_CPU_VULNERABILITIES select GENERIC_FIND_FIRST_BIT @@ -130,22 +134,21 @@ config S390 select HAVE_ARCH_JUMP_LABEL_RELATIVE select HAVE_ARCH_KASAN select HAVE_ARCH_KASAN_VMALLOC - select CPU_NO_EFFICIENT_FFS if !HAVE_MARCH_Z9_109_FEATURES select HAVE_ARCH_SECCOMP_FILTER select HAVE_ARCH_SOFT_DIRTY select HAVE_ARCH_TRACEHOOK select HAVE_ARCH_TRANSPARENT_HUGEPAGE select HAVE_ARCH_VMAP_STACK select HAVE_ASM_MODVERSIONS - select HAVE_EBPF_JIT if PACK_STACK && HAVE_MARCH_Z196_FEATURES select HAVE_CMPXCHG_DOUBLE select HAVE_CMPXCHG_LOCAL select HAVE_DEBUG_KMEMLEAK select HAVE_DMA_CONTIGUOUS select HAVE_DYNAMIC_FTRACE select HAVE_DYNAMIC_FTRACE_WITH_REGS - select HAVE_FAST_GUP + select HAVE_EBPF_JIT if PACK_STACK && HAVE_MARCH_Z196_FEATURES select HAVE_EFFICIENT_UNALIGNED_ACCESS + select HAVE_FAST_GUP select HAVE_FENTRY select HAVE_FTRACE_MCOUNT_RECORD select HAVE_FUNCTION_ERROR_INJECTION @@ -154,6 +157,7 @@ config S390 select HAVE_FUTEX_CMPXCHG if FUTEX select HAVE_GCC_PLUGINS select HAVE_GENERIC_VDSO + select HAVE_IRQ_EXIT_ON_IRQ_STACK select HAVE_KERNEL_BZIP2 select HAVE_KERNEL_GZIP select HAVE_KERNEL_LZ4 @@ -166,23 +170,25 @@ config S390 select HAVE_KRETPROBES select HAVE_KVM select HAVE_LIVEPATCH - select HAVE_PERF_REGS - select HAVE_PERF_USER_STACK_DUMP select HAVE_MEMBLOCK_PHYS_MAP - select MMU_GATHER_NO_GATHER select HAVE_MOD_ARCH_SPECIFIC + select HAVE_NMI select HAVE_NOP_MCOUNT select HAVE_OPROFILE select HAVE_PCI select HAVE_PERF_EVENTS - select MMU_GATHER_RCU_TABLE_FREE + select HAVE_PERF_REGS + select HAVE_PERF_USER_STACK_DUMP select HAVE_REGS_AND_STACK_ACCESS_API select HAVE_RELIABLE_STACKTRACE select HAVE_RSEQ select HAVE_SYSCALL_TRACEPOINTS select HAVE_VIRT_CPU_ACCOUNTING + select HAVE_VIRT_CPU_ACCOUNTING_IDLE select IOMMU_HELPER if PCI select IOMMU_SUPPORT if PCI + select MMU_GATHER_NO_GATHER + select MMU_GATHER_RCU_TABLE_FREE select MODULES_USE_ELF_RELA select NEED_DMA_MAP_STATE if PCI select NEED_SG_DMA_LENGTH if PCI @@ -191,19 +197,13 @@ config S390 select PCI_DOMAINS if PCI select PCI_MSI if PCI select PCI_MSI_ARCH_FALLBACKS if PCI_MSI - select SET_FS select SPARSE_IRQ + select SWIOTLB select SYSCTL_EXCEPTION_TRACE select THREAD_INFO_IN_TASK select TTY select VIRT_CPU_ACCOUNTING - select ARCH_HAS_SCALED_CPUTIME - select HAVE_NMI - select ARCH_HAS_FORCE_DMA_UNENCRYPTED - select SWIOTLB - select GENERIC_ALLOCATOR - imply IMA_SECURE_AND_OR_TRUSTED_BOOT - + # Note: keep the above list sorted alphabetically config SCHED_OMIT_FRAME_POINTER def_bool y @@ -714,7 +714,7 @@ if PCI config PCI_NR_FUNCTIONS int "Maximum number of PCI functions (1-4096)" range 1 4096 - default "128" + default "512" help This allows you to specify the maximum number of PCI functions which this kernel will support. diff --git a/arch/s390/Kconfig.debug b/arch/s390/Kconfig.debug index ab48b694ade8..6bfaceebbbc0 100644 --- a/arch/s390/Kconfig.debug +++ b/arch/s390/Kconfig.debug @@ -5,3 +5,11 @@ config TRACE_IRQFLAGS_SUPPORT config EARLY_PRINTK def_bool y + +config DEBUG_USER_ASCE + bool "Debug User ASCE" + help + Check on exit to user space that address space control + elements are setup correctly. + + If unsure, say N. diff --git a/arch/s390/Makefile b/arch/s390/Makefile index ba94b03c8b2f..8db267d2a543 100644 --- a/arch/s390/Makefile +++ b/arch/s390/Makefile @@ -25,7 +25,7 @@ KBUILD_AFLAGS_DECOMPRESSOR := $(CLANG_FLAGS) -m64 -D__ASSEMBLY__ KBUILD_AFLAGS_DECOMPRESSOR += $(if $(CONFIG_DEBUG_INFO),$(aflags_dwarf)) KBUILD_CFLAGS_DECOMPRESSOR := $(CLANG_FLAGS) -m64 -O2 KBUILD_CFLAGS_DECOMPRESSOR += -DDISABLE_BRANCH_PROFILING -D__NO_FORTIFY -KBUILD_CFLAGS_DECOMPRESSOR += -fno-delete-null-pointer-checks -msoft-float +KBUILD_CFLAGS_DECOMPRESSOR += -fno-delete-null-pointer-checks -msoft-float -mbackchain KBUILD_CFLAGS_DECOMPRESSOR += -fno-asynchronous-unwind-tables KBUILD_CFLAGS_DECOMPRESSOR += -ffreestanding KBUILD_CFLAGS_DECOMPRESSOR += $(call cc-disable-warning, address-of-packed-member) diff --git a/arch/s390/boot/boot.h b/arch/s390/boot/boot.h index 2ea603f70c3b..8b50967f5804 100644 --- a/arch/s390/boot/boot.h +++ b/arch/s390/boot/boot.h @@ -2,20 +2,32 @@ #ifndef BOOT_BOOT_H #define BOOT_BOOT_H +#include <linux/types.h> + +#define BOOT_STACK_OFFSET 0x8000 + +#ifndef __ASSEMBLY__ + +#include <linux/compiler.h> + void startup_kernel(void); -void detect_memory(void); +unsigned long detect_memory(void); +bool is_ipl_block_dump(void); void store_ipl_parmblock(void); void setup_boot_command_line(void); void parse_boot_command_line(void); -void setup_memory_end(void); void verify_facilities(void); void print_missing_facilities(void); void print_pgm_check_info(void); unsigned long get_random_base(unsigned long safe_addr); +void __printf(1, 2) decompressor_printk(const char *fmt, ...); -extern int kaslr_enabled; extern const char kernel_version[]; +extern unsigned long memory_limit; +extern int vmalloc_size_set; +extern int kaslr_enabled; unsigned long read_ipl_report(unsigned long safe_offset); +#endif /* __ASSEMBLY__ */ #endif /* BOOT_BOOT_H */ diff --git a/arch/s390/boot/compressed/.gitignore b/arch/s390/boot/compressed/.gitignore index 765a08f1bd77..01d93832cf4a 100644 --- a/arch/s390/boot/compressed/.gitignore +++ b/arch/s390/boot/compressed/.gitignore @@ -1,3 +1,4 @@ # SPDX-License-Identifier: GPL-2.0-only vmlinux vmlinux.lds +vmlinux.syms diff --git a/arch/s390/boot/compressed/Makefile b/arch/s390/boot/compressed/Makefile index b235ed95a3d8..de18dab518bb 100644 --- a/arch/s390/boot/compressed/Makefile +++ b/arch/s390/boot/compressed/Makefile @@ -10,21 +10,39 @@ GCOV_PROFILE := n UBSAN_SANITIZE := n KASAN_SANITIZE := n -obj-y := $(if $(CONFIG_KERNEL_UNCOMPRESSED),,decompressor.o) piggy.o info.o +obj-y := $(if $(CONFIG_KERNEL_UNCOMPRESSED),,decompressor.o) info.o +obj-all := $(obj-y) piggy.o syms.o targets := vmlinux.lds vmlinux vmlinux.bin vmlinux.bin.gz vmlinux.bin.bz2 targets += vmlinux.bin.xz vmlinux.bin.lzma vmlinux.bin.lzo vmlinux.bin.lz4 -targets += info.bin $(obj-y) +targets += info.bin syms.bin vmlinux.syms $(obj-all) KBUILD_AFLAGS := $(KBUILD_AFLAGS_DECOMPRESSOR) KBUILD_CFLAGS := $(KBUILD_CFLAGS_DECOMPRESSOR) OBJCOPYFLAGS := OBJECTS := $(addprefix $(obj)/,$(obj-y)) +OBJECTS_ALL := $(addprefix $(obj)/,$(obj-all)) -LDFLAGS_vmlinux := --oformat $(LD_BFD) -e startup -T -$(obj)/vmlinux: $(obj)/vmlinux.lds $(objtree)/arch/s390/boot/startup.a $(OBJECTS) FORCE +LDFLAGS_vmlinux := --oformat $(LD_BFD) -e startup --build-id=sha1 -T +$(obj)/vmlinux: $(obj)/vmlinux.lds $(objtree)/arch/s390/boot/startup.a $(OBJECTS_ALL) FORCE $(call if_changed,ld) +LDFLAGS_vmlinux.syms := --oformat $(LD_BFD) -e startup -T +$(obj)/vmlinux.syms: $(obj)/vmlinux.lds $(objtree)/arch/s390/boot/startup.a $(OBJECTS) FORCE + $(call if_changed,ld) + +quiet_cmd_dumpsyms = DUMPSYMS $< +define cmd_dumpsyms + $(NM) -n -S --format=bsd "$<" | $(PERL) -ne '/(\w+)\s+(\w+)\s+[tT]\s+(\w+)/ and printf "%x %x %s\0",hex $$1,hex $$2,$$3' > "$@" +endef + +$(obj)/syms.bin: $(obj)/vmlinux.syms FORCE + $(call if_changed,dumpsyms) + +OBJCOPYFLAGS_syms.o := -I binary -O elf64-s390 -B s390:64-bit --rename-section .data=.decompressor.syms +$(obj)/syms.o: $(obj)/syms.bin FORCE + $(call if_changed,objcopy) + OBJCOPYFLAGS_info.bin := -O binary --only-section=.vmlinux.info --set-section-flags .vmlinux.info=load $(obj)/info.bin: vmlinux FORCE $(call if_changed,objcopy) diff --git a/arch/s390/boot/compressed/decompressor.h b/arch/s390/boot/compressed/decompressor.h index c15eb7114d83..41f0ad97a4db 100644 --- a/arch/s390/boot/compressed/decompressor.h +++ b/arch/s390/boot/compressed/decompressor.h @@ -2,8 +2,10 @@ #ifndef BOOT_COMPRESSED_DECOMPRESSOR_H #define BOOT_COMPRESSED_DECOMPRESSOR_H +#include <linux/stddef.h> + #ifdef CONFIG_KERNEL_UNCOMPRESSED -static inline void *decompress_kernel(void) {} +static inline void *decompress_kernel(void) { return NULL; } #else void *decompress_kernel(void); #endif diff --git a/arch/s390/boot/compressed/vmlinux.lds.S b/arch/s390/boot/compressed/vmlinux.lds.S index 9427e2cd0c15..27a09c1c78f6 100644 --- a/arch/s390/boot/compressed/vmlinux.lds.S +++ b/arch/s390/boot/compressed/vmlinux.lds.S @@ -27,6 +27,7 @@ SECTIONS *(.rodata.*) _erodata = . ; } + NOTES .data : { _data = . ; *(.data) @@ -82,6 +83,14 @@ SECTIONS *(.vmlinux.info) } + .decompressor.syms : { + . += 1; /* make sure we have \0 before the first entry */ + . = ALIGN(2); + _decompressor_syms_start = .; + *(.decompressor.syms) + _decompressor_syms_end = .; + } + #ifdef CONFIG_KERNEL_UNCOMPRESSED . = 0x100000; #else diff --git a/arch/s390/boot/head.S b/arch/s390/boot/head.S index 1a2c2b1ed964..dacb7813f982 100644 --- a/arch/s390/boot/head.S +++ b/arch/s390/boot/head.S @@ -28,6 +28,7 @@ #include <asm/thread_info.h> #include <asm/page.h> #include <asm/ptrace.h> +#include "boot.h" #define ARCH_OFFSET 4 @@ -62,8 +63,12 @@ __HEAD .org __LC_RST_NEW_PSW # 0x1a0 .quad 0,iplstart + .org __LC_EXT_NEW_PSW # 0x1b0 + .quad 0x0002000180000000,0x1b0 # disabled wait .org __LC_PGM_NEW_PSW # 0x1d0 .quad 0x0000000180000000,startup_pgm_check_handler + .org __LC_IO_NEW_PSW # 0x1f0 + .quad 0x0002000180000000,0x1f0 # disabled wait .org 0x200 @@ -275,8 +280,8 @@ iplstart: # or linload or SALIPL # .org 0x10000 -ENTRY(startup) - j .Lep_startup_normal +SYM_CODE_START(startup) + j startup_normal .org EP_OFFSET # # This is a list of s390 kernel entry points. At address 0x1000f the number of @@ -290,9 +295,9 @@ ENTRY(startup) # kdump startup-code at 0x10010, running in 64 bit absolute addressing mode # .org 0x10010 -ENTRY(startup_kdump) - j .Lep_startup_kdump -.Lep_startup_normal: + j startup_kdump +SYM_CODE_END(startup) +SYM_CODE_START_LOCAL(startup_normal) mvi __LC_AR_MODE_ID,1 # set esame flag slr %r0,%r0 # set cpuid to zero lhi %r1,2 # mode 2 = esame (dump) @@ -303,6 +308,9 @@ ENTRY(startup_kdump) sam64 # switch to 64 bit addressing mode basr %r13,0 # get base .LPG0: + mvc __LC_EXT_NEW_PSW(16),.Lext_new_psw-.LPG0(%r13) + mvc __LC_PGM_NEW_PSW(16),.Lpgm_new_psw-.LPG0(%r13) + mvc __LC_IO_NEW_PSW(16),.Lio_new_psw-.LPG0(%r13) xc 0x200(256),0x200 # partially clear lowcore xc 0x300(256),0x300 xc 0xe00(256),0xe00 @@ -315,12 +323,18 @@ ENTRY(startup_kdump) l %r15,.Lstack-.LPG0(%r13) brasl %r14,verify_facilities brasl %r14,startup_kernel +SYM_CODE_END(startup_normal) .Lstack: - .long 0x8000 + (1<<(PAGE_SHIFT+BOOT_STACK_ORDER)) - STACK_FRAME_OVERHEAD + .long BOOT_STACK_OFFSET + BOOT_STACK_SIZE - STACK_FRAME_OVERHEAD .align 8 6: .long 0x7fffffff,0xffffffff - +.Lext_new_psw: + .quad 0x0002000180000000,0x1b0 # disabled wait +.Lpgm_new_psw: + .quad 0x0000000180000000,startup_pgm_check_handler +.Lio_new_psw: + .quad 0x0002000180000000,0x1f0 # disabled wait .Lctl: .quad 0x04040000 # cr0: AFP registers & secondary space .quad 0 # cr1: primary space segment table .quad .Lduct # cr2: dispatchable unit control table @@ -359,7 +373,7 @@ ENTRY(startup_kdump) # It simply saves general/control registers and psw in # the save area and does disabled wait with a faulty address. # -ENTRY(startup_pgm_check_handler) +SYM_CODE_START_LOCAL(startup_pgm_check_handler) stmg %r8,%r15,__LC_SAVE_AREA_SYNC la %r8,4095 stctg %c0,%c15,__LC_CREGS_SAVE_AREA-4095(%r8) @@ -378,9 +392,9 @@ ENTRY(startup_pgm_check_handler) la %r8,4095 lmg %r0,%r15,__LC_GPREGS_SAVE_AREA-4095(%r8) lpswe __LC_RETURN_PSW # disabled wait +SYM_CODE_END(startup_pgm_check_handler) .Ldump_info_stack: .long 0x5000 + PAGE_SIZE - STACK_FRAME_OVERHEAD -ENDPROC(startup_pgm_check_handler) # # params at 10400 (setup.h) diff --git a/arch/s390/boot/head_kdump.S b/arch/s390/boot/head_kdump.S index 174d6959bf5b..f015469e7db9 100644 --- a/arch/s390/boot/head_kdump.S +++ b/arch/s390/boot/head_kdump.S @@ -19,8 +19,7 @@ # Note: This code has to be position independent # -.align 2 -.Lep_startup_kdump: +SYM_CODE_START_LOCAL(startup_kdump) lhi %r1,2 # mode 2 = esame (dump) sigp %r1,%r0,SIGP_SET_ARCHITECTURE # Switch to esame mode sam64 # Switch to 64 bit addressing @@ -87,14 +86,15 @@ startup_kdump_relocated: basr %r13,0 0: lpswe .Lrestart_psw-0b(%r13) # Start new kernel... +SYM_CODE_END(startup_kdump) .align 8 .Lrestart_psw: .quad 0x0000000080000000,0x0000000000000000 + startup #else -.align 2 -.Lep_startup_kdump: +SYM_CODE_START_LOCAL(startup_kdump) larl %r13,startup_kdump_crash lpswe 0(%r13) +SYM_CODE_END(startup_kdump) .align 8 startup_kdump_crash: .quad 0x0002000080000000,0x0000000000000000 + startup_kdump_crash diff --git a/arch/s390/boot/ipl_parm.c b/arch/s390/boot/ipl_parm.c index f94b91d72620..d372a45fe10e 100644 --- a/arch/s390/boot/ipl_parm.c +++ b/arch/s390/boot/ipl_parm.c @@ -17,10 +17,10 @@ int __bootdata_preserved(ipl_block_valid); unsigned int __bootdata_preserved(zlib_dfltcc_support) = ZLIB_DFLTCC_FULL; unsigned long __bootdata(vmalloc_size) = VMALLOC_DEFAULT_SIZE; -unsigned long __bootdata(memory_end); -int __bootdata(memory_end_set); int __bootdata(noexec_disabled); +unsigned long memory_limit; +int vmalloc_size_set; int kaslr_enabled; static inline int __diag308(unsigned long subcode, void *addr) @@ -57,6 +57,17 @@ void store_ipl_parmblock(void) ipl_block_valid = 1; } +bool is_ipl_block_dump(void) +{ + if (ipl_block.pb0_hdr.pbt == IPL_PBT_FCP && + ipl_block.fcp.opt == IPL_PB0_FCP_OPT_DUMP) + return true; + if (ipl_block.pb0_hdr.pbt == IPL_PBT_NVME && + ipl_block.nvme.opt == IPL_PB0_NVME_OPT_DUMP) + return true; + return false; +} + static size_t scpdata_length(const u8 *buf, size_t count) { while (count) { @@ -237,13 +248,13 @@ void parse_boot_command_line(void) while (*args) { args = next_arg(args, ¶m, &val); - if (!strcmp(param, "mem") && val) { - memory_end = round_down(memparse(val, NULL), PAGE_SIZE); - memory_end_set = 1; - } + if (!strcmp(param, "mem") && val) + memory_limit = round_down(memparse(val, NULL), PAGE_SIZE); - if (!strcmp(param, "vmalloc") && val) + if (!strcmp(param, "vmalloc") && val) { vmalloc_size = round_up(memparse(val, NULL), PAGE_SIZE); + vmalloc_size_set = 1; + } if (!strcmp(param, "dfltcc") && val) { if (!strcmp(val, "off")) @@ -279,27 +290,3 @@ void parse_boot_command_line(void) #endif } } - -static inline bool is_ipl_block_dump(void) -{ - if (ipl_block.pb0_hdr.pbt == IPL_PBT_FCP && - ipl_block.fcp.opt == IPL_PB0_FCP_OPT_DUMP) - return true; - if (ipl_block.pb0_hdr.pbt == IPL_PBT_NVME && - ipl_block.nvme.opt == IPL_PB0_NVME_OPT_DUMP) - return true; - return false; -} - -void setup_memory_end(void) -{ -#ifdef CONFIG_CRASH_DUMP - if (OLDMEM_BASE) { - kaslr_enabled = 0; - } else if (ipl_block_valid && is_ipl_block_dump()) { - kaslr_enabled = 0; - if (!sclp_early_get_hsa_size(&memory_end) && memory_end) - memory_end_set = 1; - } -#endif -} diff --git a/arch/s390/boot/kaslr.c b/arch/s390/boot/kaslr.c index d844a5ef9089..0dd48fbdbaa4 100644 --- a/arch/s390/boot/kaslr.c +++ b/arch/s390/boot/kaslr.c @@ -7,6 +7,7 @@ #include <asm/cpacf.h> #include <asm/timex.h> #include <asm/sclp.h> +#include <asm/kasan.h> #include "compressed/decompressor.h" #include "boot.h" @@ -176,8 +177,14 @@ unsigned long get_random_base(unsigned long safe_addr) unsigned long kasan_needs; int i; - if (memory_end_set) - memory_limit = min(memory_limit, memory_end); + memory_limit = min(memory_limit, ident_map_size); + + /* + * Avoid putting kernel in the end of physical memory + * which kasan will use for shadow memory and early pgtable + * mapping allocations. + */ + memory_limit -= kasan_estimate_memory_needs(memory_limit); if (IS_ENABLED(CONFIG_BLK_DEV_INITRD) && INITRD_START && INITRD_SIZE) { if (safe_addr < INITRD_START + INITRD_SIZE) @@ -185,28 +192,6 @@ unsigned long get_random_base(unsigned long safe_addr) } safe_addr = ALIGN(safe_addr, THREAD_SIZE); - if ((IS_ENABLED(CONFIG_KASAN))) { - /* - * Estimate kasan memory requirements, which it will reserve - * at the very end of available physical memory. To estimate - * that, we take into account that kasan would require - * 1/8 of available physical memory (for shadow memory) + - * creating page tables for the whole memory + shadow memory - * region (1 + 1/8). To keep page tables estimates simple take - * the double of combined ptes size. - */ - memory_limit = get_mem_detect_end(); - if (memory_end_set && memory_limit > memory_end) - memory_limit = memory_end; - - /* for shadow memory */ - kasan_needs = memory_limit / 8; - /* for paging structures */ - kasan_needs += (memory_limit + kasan_needs) / PAGE_SIZE / - _PAGE_ENTRIES * _PAGE_TABLE_SIZE * 2; - memory_limit -= kasan_needs; - } - kernel_size = vmlinux.image_size + vmlinux.bss_size; if (safe_addr + kernel_size > memory_limit) return 0; diff --git a/arch/s390/boot/mem_detect.c b/arch/s390/boot/mem_detect.c index 62e7c13ce85c..40168e59abd3 100644 --- a/arch/s390/boot/mem_detect.c +++ b/arch/s390/boot/mem_detect.c @@ -8,7 +8,6 @@ #include "compressed/decompressor.h" #include "boot.h" -unsigned long __bootdata(max_physmem_end); struct mem_detect_info __bootdata(mem_detect); /* up to 256 storage elements, 1020 subincrements each */ @@ -149,27 +148,29 @@ static void search_mem_end(void) add_mem_detect_block(0, (offset + 1) << 20); } -void detect_memory(void) +unsigned long detect_memory(void) { + unsigned long max_physmem_end; + sclp_early_get_memsize(&max_physmem_end); if (!sclp_early_read_storage_info()) { mem_detect.info_source = MEM_DETECT_SCLP_STOR_INFO; - return; + return max_physmem_end; } if (!diag260()) { mem_detect.info_source = MEM_DETECT_DIAG260; - return; + return max_physmem_end; } if (max_physmem_end) { add_mem_detect_block(0, max_physmem_end); mem_detect.info_source = MEM_DETECT_SCLP_READ_INFO; - return; + return max_physmem_end; } search_mem_end(); mem_detect.info_source = MEM_DETECT_BIN_SEARCH; - max_physmem_end = get_mem_detect_end(); + return get_mem_detect_end(); } diff --git a/arch/s390/boot/pgm_check_info.c b/arch/s390/boot/pgm_check_info.c index a3c9862bcede..3a46abed2549 100644 --- a/arch/s390/boot/pgm_check_info.c +++ b/arch/s390/boot/pgm_check_info.c @@ -1,99 +1,181 @@ // SPDX-License-Identifier: GPL-2.0 #include <linux/kernel.h> #include <linux/string.h> +#include <linux/ctype.h> +#include <asm/stacktrace.h> +#include <asm/boot_data.h> #include <asm/lowcore.h> #include <asm/setup.h> #include <asm/sclp.h> +#include <asm/uv.h> +#include <stdarg.h> #include "boot.h" const char hex_asc[] = "0123456789abcdef"; -#define add_val_as_hex(dst, val) \ - __add_val_as_hex(dst, (const unsigned char *)&val, sizeof(val)) +static char *as_hex(char *dst, unsigned long val, int pad) +{ + char *p, *end = p = dst + max(pad, (int)__fls(val | 1) / 4 + 1); + + for (*p-- = 0; p >= dst; val >>= 4) + *p-- = hex_asc[val & 0x0f]; + return end; +} -static char *__add_val_as_hex(char *dst, const unsigned char *src, size_t count) +static char *symstart(char *p) { - while (count--) - dst = hex_byte_pack(dst, *src++); - return dst; + while (*p) + p--; + return p + 1; } -static char *add_str(char *dst, char *src) +extern char _decompressor_syms_start[], _decompressor_syms_end[]; +static noinline char *findsym(unsigned long ip, unsigned short *off, unsigned short *len) { - strcpy(dst, src); - return dst + strlen(dst); + /* symbol entries are in a form "10000 c4 startup\0" */ + char *a = _decompressor_syms_start; + char *b = _decompressor_syms_end; + unsigned long start; + unsigned long size; + char *pivot; + char *endp; + + while (a < b) { + pivot = symstart(a + (b - a) / 2); + start = simple_strtoull(pivot, &endp, 16); + size = simple_strtoull(endp + 1, &endp, 16); + if (ip < start) { + b = pivot; + continue; + } + if (ip > start + size) { + a = pivot + strlen(pivot) + 1; + continue; + } + *off = ip - start; + *len = size; + return endp + 1; + } + return NULL; } -void print_pgm_check_info(void) +static noinline char *strsym(void *ip) { - struct psw_bits *psw = &psw_bits(S390_lowcore.psw_save_area); - unsigned short ilc = S390_lowcore.pgm_ilc >> 1; - char buf[256]; - int row, col; + static char buf[64]; + unsigned short off; + unsigned short len; char *p; - add_str(buf, "Linux version "); - strlcat(buf, kernel_version, sizeof(buf) - 1); - strlcat(buf, "\n", sizeof(buf)); - sclp_early_printk(buf); + p = findsym((unsigned long)ip, &off, &len); + if (p) { + strncpy(buf, p, sizeof(buf)); + /* reserve 15 bytes for offset/len in symbol+0x1234/0x1234 */ + p = buf + strnlen(buf, sizeof(buf) - 15); + strcpy(p, "+0x"); + p = as_hex(p + 3, off, 0); + strcpy(p, "/0x"); + as_hex(p + 3, len, 0); + } else { + as_hex(buf, (unsigned long)ip, 16); + } + return buf; +} - p = add_str(buf, "Kernel fault: interruption code "); - p = add_val_as_hex(buf + strlen(buf), S390_lowcore.pgm_code); - p = add_str(p, " ilc:"); - *p++ = hex_asc_lo(ilc); - add_str(p, "\n"); - sclp_early_printk(buf); +void decompressor_printk(const char *fmt, ...) +{ + char buf[1024] = { 0 }; + char *end = buf + sizeof(buf) - 1; /* make sure buf is 0 terminated */ + unsigned long pad; + char *p = buf; + va_list args; - if (kaslr_enabled) { - p = add_str(buf, "Kernel random base: "); - p = add_val_as_hex(p, __kaslr_offset); - add_str(p, "\n"); - sclp_early_printk(buf); + va_start(args, fmt); + for (; p < end && *fmt; fmt++) { + if (*fmt != '%') { + *p++ = *fmt; + continue; + } + pad = isdigit(*++fmt) ? simple_strtol(fmt, (char **)&fmt, 10) : 0; + switch (*fmt) { + case 's': + p = buf + strlcat(buf, va_arg(args, char *), sizeof(buf)); + break; + case 'p': + if (*++fmt != 'S') + goto out; + p = buf + strlcat(buf, strsym(va_arg(args, void *)), sizeof(buf)); + break; + case 'l': + if (*++fmt != 'x' || end - p <= max(sizeof(long) * 2, pad)) + goto out; + p = as_hex(p, va_arg(args, unsigned long), pad); + break; + case 'x': + if (end - p <= max(sizeof(int) * 2, pad)) + goto out; + p = as_hex(p, va_arg(args, unsigned int), pad); + break; + default: + goto out; + } } - - p = add_str(buf, "PSW : "); - p = add_val_as_hex(p, S390_lowcore.psw_save_area.mask); - p = add_str(p, " "); - p = add_val_as_hex(p, S390_lowcore.psw_save_area.addr); - add_str(p, "\n"); +out: + va_end(args); sclp_early_printk(buf); +} - p = add_str(buf, " R:"); - *p++ = hex_asc_lo(psw->per); - p = add_str(p, " T:"); - *p++ = hex_asc_lo(psw->dat); - p = add_str(p, " IO:"); - *p++ = hex_asc_lo(psw->io); - p = add_str(p, " EX:"); - *p++ = hex_asc_lo(psw->ext); - p = add_str(p, " Key:"); - *p++ = hex_asc_lo(psw->key); - p = add_str(p, " M:"); - *p++ = hex_asc_lo(psw->mcheck); - p = add_str(p, " W:"); - *p++ = hex_asc_lo(psw->wait); - p = add_str(p, " P:"); - *p++ = hex_asc_lo(psw->pstate); - p = add_str(p, " AS:"); - *p++ = hex_asc_lo(psw->as); - p = add_str(p, " CC:"); - *p++ = hex_asc_lo(psw->cc); - p = add_str(p, " PM:"); - *p++ = hex_asc_lo(psw->pm); - p = add_str(p, " RI:"); - *p++ = hex_asc_lo(psw->ri); - p = add_str(p, " EA:"); - *p++ = hex_asc_lo(psw->eaba); - add_str(p, "\n"); - sclp_early_printk(buf); +static noinline void print_stacktrace(void) +{ + struct stack_info boot_stack = { STACK_TYPE_TASK, BOOT_STACK_OFFSET, + BOOT_STACK_OFFSET + BOOT_STACK_SIZE }; + unsigned long sp = S390_lowcore.gpregs_save_area[15]; + bool first = true; - for (row = 0; row < 4; row++) { - p = add_str(buf, row == 0 ? "GPRS:" : " "); - for (col = 0; col < 4; col++) { - p = add_str(p, " "); - p = add_val_as_hex(p, S390_lowcore.gpregs_save_area[row * 4 + col]); - } - add_str(p, "\n"); - sclp_early_printk(buf); + decompressor_printk("Call Trace:\n"); + while (!(sp & 0x7) && on_stack(&boot_stack, sp, sizeof(struct stack_frame))) { + struct stack_frame *sf = (struct stack_frame *)sp; + + decompressor_printk(first ? "(sp:%016lx [<%016lx>] %pS)\n" : + " sp:%016lx [<%016lx>] %pS\n", + sp, sf->gprs[8], (void *)sf->gprs[8]); + if (sf->back_chain <= sp) + break; + sp = sf->back_chain; + first = false; } } + +void print_pgm_check_info(void) +{ + unsigned long *gpregs = (unsigned long *)S390_lowcore.gpregs_save_area; + struct psw_bits *psw = &psw_bits(S390_lowcore.psw_save_area); + + decompressor_printk("Linux version %s\n", kernel_version); + if (!is_prot_virt_guest() && early_command_line[0]) + decompressor_printk("Kernel command line: %s\n", early_command_line); + decompressor_printk("Kernel fault: interruption code %04x ilc:%x\n", + S390_lowcore.pgm_code, S390_lowcore.pgm_ilc >> 1); + if (kaslr_enabled) + decompressor_printk("Kernel random base: %lx\n", __kaslr_offset); + decompressor_printk("PSW : %016lx %016lx (%pS)\n", + S390_lowcore.psw_save_area.mask, + S390_lowcore.psw_save_area.addr, + (void *)S390_lowcore.psw_save_area.addr); + decompressor_printk( + " R:%x T:%x IO:%x EX:%x Key:%x M:%x W:%x P:%x AS:%x CC:%x PM:%x RI:%x EA:%x\n", + psw->per, psw->dat, psw->io, psw->ext, psw->key, psw->mcheck, + psw->wait, psw->pstate, psw->as, psw->cc, psw->pm, psw->ri, + psw->eaba); + decompressor_printk("GPRS: %016lx %016lx %016lx %016lx\n", + gpregs[0], gpregs[1], gpregs[2], gpregs[3]); + decompressor_printk(" %016lx %016lx %016lx %016lx\n", + gpregs[4], gpregs[5], gpregs[6], gpregs[7]); + decompressor_printk(" %016lx %016lx %016lx %016lx\n", + gpregs[8], gpregs[9], gpregs[10], gpregs[11]); + decompressor_printk(" %016lx %016lx %016lx %016lx\n", + gpregs[12], gpregs[13], gpregs[14], gpregs[15]); + print_stacktrace(); + decompressor_printk("Last Breaking-Event-Address:\n"); + decompressor_printk(" [<%016lx>] %pS\n", (unsigned long)S390_lowcore.breaking_event_addr, + (void *)S390_lowcore.breaking_event_addr); +} diff --git a/arch/s390/boot/startup.c b/arch/s390/boot/startup.c index cc96b04cc0ba..05f8eefa3dcf 100644 --- a/arch/s390/boot/startup.c +++ b/arch/s390/boot/startup.c @@ -1,7 +1,9 @@ // SPDX-License-Identifier: GPL-2.0 #include <linux/string.h> #include <linux/elf.h> +#include <asm/boot_data.h> #include <asm/sections.h> +#include <asm/cpu_mf.h> #include <asm/setup.h> #include <asm/kexec.h> #include <asm/sclp.h> @@ -13,6 +15,7 @@ extern char __boot_data_start[], __boot_data_end[]; extern char __boot_data_preserved_start[], __boot_data_preserved_end[]; unsigned long __bootdata_preserved(__kaslr_offset); +unsigned long __bootdata(ident_map_size); /* * Some code and data needs to stay below 2 GB, even when the kernel would be @@ -58,6 +61,14 @@ void error(char *x) disabled_wait(); } +static void setup_lpp(void) +{ + S390_lowcore.current_pid = 0; + S390_lowcore.lpp = LPP_MAGIC; + if (test_facility(40)) + lpp(&S390_lowcore.lpp); +} + #ifdef CONFIG_KERNEL_UNCOMPRESSED unsigned long mem_safe_offset(void) { @@ -119,6 +130,46 @@ static void handle_relocs(unsigned long offset) } /* + * Merge information from several sources into a single ident_map_size value. + * "ident_map_size" represents the upper limit of physical memory we may ever + * reach. It might not be all online memory, but also include standby (offline) + * memory. "ident_map_size" could be lower then actual standby or even online + * memory present, due to limiting factors. We should never go above this limit. + * It is the size of our identity mapping. + * + * Consider the following factors: + * 1. max_physmem_end - end of physical memory online or standby. + * Always <= end of the last online memory block (get_mem_detect_end()). + * 2. CONFIG_MAX_PHYSMEM_BITS - the maximum size of physical memory the + * kernel is able to support. + * 3. "mem=" kernel command line option which limits physical memory usage. + * 4. OLDMEM_BASE which is a kdump memory limit when the kernel is executed as + * crash kernel. + * 5. "hsa" size which is a memory limit when the kernel is executed during + * zfcp/nvme dump. + */ +static void setup_ident_map_size(unsigned long max_physmem_end) +{ + unsigned long hsa_size; + + ident_map_size = max_physmem_end; + if (memory_limit) + ident_map_size = min(ident_map_size, memory_limit); + ident_map_size = min(ident_map_size, 1UL << MAX_PHYSMEM_BITS); + +#ifdef CONFIG_CRASH_DUMP + if (OLDMEM_BASE) { + kaslr_enabled = 0; + ident_map_size = min(ident_map_size, OLDMEM_SIZE); + } else if (ipl_block_valid && is_ipl_block_dump()) { + kaslr_enabled = 0; + if (!sclp_early_get_hsa_size(&hsa_size) && hsa_size) + ident_map_size = min(ident_map_size, hsa_size); + } +#endif +} + +/* * This function clears the BSS section of the decompressed Linux kernel and NOT the decompressor's. */ static void clear_bss_section(void) @@ -126,12 +177,27 @@ static void clear_bss_section(void) memset((void *)vmlinux.default_lma + vmlinux.image_size, 0, vmlinux.bss_size); } +/* + * Set vmalloc area size to an 8th of (potential) physical memory + * size, unless size has been set by kernel command line parameter. + */ +static void setup_vmalloc_size(void) +{ + unsigned long size; + + if (vmalloc_size_set) + return; + size = round_up(ident_map_size / 8, _SEGMENT_SIZE); + vmalloc_size = max(size, vmalloc_size); +} + void startup_kernel(void) { unsigned long random_lma; unsigned long safe_addr; void *img; + setup_lpp(); store_ipl_parmblock(); safe_addr = mem_safe_offset(); safe_addr = read_ipl_report(safe_addr); @@ -140,8 +206,8 @@ void startup_kernel(void) sclp_early_read_info(); setup_boot_command_line(); parse_boot_command_line(); - setup_memory_end(); - detect_memory(); + setup_ident_map_size(detect_memory()); + setup_vmalloc_size(); random_lma = __kaslr_offset = 0; if (IS_ENABLED(CONFIG_RANDOMIZE_BASE) && kaslr_enabled) { diff --git a/arch/s390/boot/string.c b/arch/s390/boot/string.c index b11e8108773a..faccb33b462c 100644 --- a/arch/s390/boot/string.c +++ b/arch/s390/boot/string.c @@ -3,6 +3,7 @@ #include <linux/kernel.h> #include <linux/errno.h> #undef CONFIG_KASAN +#undef CONFIG_KASAN_GENERIC #include "../lib/string.c" int strncmp(const char *cs, const char *ct, size_t count) diff --git a/arch/s390/configs/debug_defconfig b/arch/s390/configs/debug_defconfig index fe6f529ac82c..c4f6ff98a612 100644 --- a/arch/s390/configs/debug_defconfig +++ b/arch/s390/configs/debug_defconfig @@ -61,7 +61,9 @@ CONFIG_OPROFILE=m CONFIG_KPROBES=y CONFIG_JUMP_LABEL=y CONFIG_STATIC_KEYS_SELFTEST=y +CONFIG_SECCOMP_CACHE_DEBUG=y CONFIG_LOCK_EVENT_COUNTS=y +# CONFIG_GCC_PLUGINS is not set CONFIG_MODULES=y CONFIG_MODULE_FORCE_LOAD=y CONFIG_MODULE_UNLOAD=y @@ -102,7 +104,7 @@ CONFIG_ZSMALLOC_STAT=y CONFIG_DEFERRED_STRUCT_PAGE_INIT=y CONFIG_IDLE_PAGE_TRACKING=y CONFIG_PERCPU_STATS=y -CONFIG_GUP_BENCHMARK=y +CONFIG_GUP_TEST=y CONFIG_NET=y CONFIG_PACKET=y CONFIG_PACKET_DIAG=m @@ -410,12 +412,12 @@ CONFIG_SCSI_ENCLOSURE=m CONFIG_SCSI_CONSTANTS=y CONFIG_SCSI_LOGGING=y CONFIG_SCSI_SPI_ATTRS=m -CONFIG_SCSI_FC_ATTRS=y +CONFIG_SCSI_FC_ATTRS=m CONFIG_SCSI_SAS_LIBSAS=m CONFIG_SCSI_SRP_ATTRS=m CONFIG_ISCSI_TCP=m CONFIG_SCSI_DEBUG=m -CONFIG_ZFCP=y +CONFIG_ZFCP=m CONFIG_SCSI_VIRTIO=m CONFIG_SCSI_DH=y CONFIG_SCSI_DH_RDAC=m @@ -444,6 +446,7 @@ CONFIG_DM_MULTIPATH=m CONFIG_DM_MULTIPATH_QL=m CONFIG_DM_MULTIPATH_ST=m CONFIG_DM_MULTIPATH_HST=m +CONFIG_DM_MULTIPATH_IOA=m CONFIG_DM_DELAY=m CONFIG_DM_UEVENT=y CONFIG_DM_FLAKEY=m @@ -542,7 +545,6 @@ CONFIG_INPUT_EVDEV=y # CONFIG_INPUT_MOUSE is not set # CONFIG_SERIO is not set CONFIG_LEGACY_PTY_COUNT=0 -CONFIG_NULL_TTY=m CONFIG_VIRTIO_CONSOLE=y CONFIG_HW_RANDOM_VIRTIO=m CONFIG_RAW_DRIVER=m @@ -574,6 +576,7 @@ CONFIG_VIRTIO_BALLOON=m CONFIG_VIRTIO_INPUT=y CONFIG_VHOST_NET=m CONFIG_VHOST_VSOCK=m +# CONFIG_SURFACE_PLATFORMS is not set CONFIG_S390_CCW_IOMMU=y CONFIG_S390_AP_IOMMU=y CONFIG_EXT4_FS=y @@ -655,6 +658,7 @@ CONFIG_CIFS_XATTR=y CONFIG_CIFS_POSIX=y # CONFIG_CIFS_DEBUG is not set CONFIG_CIFS_DFS_UPCALL=y +CONFIG_CIFS_SWN_UPCALL=y CONFIG_NLS_DEFAULT="utf8" CONFIG_NLS_CODEPAGE_437=m CONFIG_NLS_CODEPAGE_850=m @@ -826,6 +830,9 @@ CONFIG_FTRACE_SYSCALLS=y CONFIG_BLK_DEV_IO_TRACE=y CONFIG_BPF_KPROBE_OVERRIDE=y CONFIG_HIST_TRIGGERS=y +CONFIG_FTRACE_STARTUP_TEST=y +# CONFIG_EVENT_TRACE_STARTUP_TEST is not set +CONFIG_DEBUG_USER_ASCE=y CONFIG_NOTIFIER_ERROR_INJECTION=m CONFIG_NETDEV_NOTIFIER_ERROR_INJECT=m CONFIG_FAULT_INJECTION=y diff --git a/arch/s390/configs/defconfig b/arch/s390/configs/defconfig index 17d5df2c1eff..51135893cffe 100644 --- a/arch/s390/configs/defconfig +++ b/arch/s390/configs/defconfig @@ -58,6 +58,7 @@ CONFIG_S390_UNWIND_SELFTEST=m CONFIG_OPROFILE=m CONFIG_KPROBES=y CONFIG_JUMP_LABEL=y +# CONFIG_GCC_PLUGINS is not set CONFIG_MODULES=y CONFIG_MODULE_FORCE_LOAD=y CONFIG_MODULE_UNLOAD=y @@ -95,7 +96,6 @@ CONFIG_ZSMALLOC_STAT=y CONFIG_DEFERRED_STRUCT_PAGE_INIT=y CONFIG_IDLE_PAGE_TRACKING=y CONFIG_PERCPU_STATS=y -CONFIG_GUP_BENCHMARK=y CONFIG_NET=y CONFIG_PACKET=y CONFIG_PACKET_DIAG=m @@ -403,12 +403,12 @@ CONFIG_SCSI_ENCLOSURE=m CONFIG_SCSI_CONSTANTS=y CONFIG_SCSI_LOGGING=y CONFIG_SCSI_SPI_ATTRS=m -CONFIG_SCSI_FC_ATTRS=y +CONFIG_SCSI_FC_ATTRS=m CONFIG_SCSI_SAS_LIBSAS=m CONFIG_SCSI_SRP_ATTRS=m CONFIG_ISCSI_TCP=m CONFIG_SCSI_DEBUG=m -CONFIG_ZFCP=y +CONFIG_ZFCP=m CONFIG_SCSI_VIRTIO=m CONFIG_SCSI_DH=y CONFIG_SCSI_DH_RDAC=m @@ -437,6 +437,7 @@ CONFIG_DM_MULTIPATH=m CONFIG_DM_MULTIPATH_QL=m CONFIG_DM_MULTIPATH_ST=m CONFIG_DM_MULTIPATH_HST=m +CONFIG_DM_MULTIPATH_IOA=m CONFIG_DM_DELAY=m CONFIG_DM_UEVENT=y CONFIG_DM_FLAKEY=m @@ -536,7 +537,6 @@ CONFIG_INPUT_EVDEV=y # CONFIG_INPUT_MOUSE is not set # CONFIG_SERIO is not set CONFIG_LEGACY_PTY_COUNT=0 -CONFIG_NULL_TTY=m CONFIG_VIRTIO_CONSOLE=y CONFIG_HW_RANDOM_VIRTIO=m CONFIG_RAW_DRIVER=m @@ -568,6 +568,7 @@ CONFIG_VIRTIO_BALLOON=m CONFIG_VIRTIO_INPUT=y CONFIG_VHOST_NET=m CONFIG_VHOST_VSOCK=m +# CONFIG_SURFACE_PLATFORMS is not set CONFIG_S390_CCW_IOMMU=y CONFIG_S390_AP_IOMMU=y CONFIG_EXT4_FS=y @@ -645,6 +646,7 @@ CONFIG_CIFS_XATTR=y CONFIG_CIFS_POSIX=y # CONFIG_CIFS_DEBUG is not set CONFIG_CIFS_DFS_UPCALL=y +CONFIG_CIFS_SWN_UPCALL=y CONFIG_NLS_DEFAULT="utf8" CONFIG_NLS_CODEPAGE_437=m CONFIG_NLS_CODEPAGE_850=m @@ -778,6 +780,7 @@ CONFIG_FTRACE_SYSCALLS=y CONFIG_BLK_DEV_IO_TRACE=y CONFIG_BPF_KPROBE_OVERRIDE=y CONFIG_HIST_TRIGGERS=y +CONFIG_DEBUG_USER_ASCE=y CONFIG_LKDTM=m CONFIG_PERCPU_TEST=m CONFIG_ATOMIC64_SELFTEST=y diff --git a/arch/s390/configs/zfcpdump_defconfig b/arch/s390/configs/zfcpdump_defconfig index a302630341ef..1ef211dae77a 100644 --- a/arch/s390/configs/zfcpdump_defconfig +++ b/arch/s390/configs/zfcpdump_defconfig @@ -22,6 +22,7 @@ CONFIG_CRASH_DUMP=y # CONFIG_VIRTUALIZATION is not set # CONFIG_S390_GUEST is not set # CONFIG_SECCOMP is not set +# CONFIG_GCC_PLUGINS is not set CONFIG_PARTITION_ADVANCED=y CONFIG_IBM_PARTITION=y # CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set @@ -58,6 +59,7 @@ CONFIG_RAW_DRIVER=y # CONFIG_HID is not set # CONFIG_VIRTIO_MENU is not set # CONFIG_VHOST_MENU is not set +# CONFIG_SURFACE_PLATFORMS is not set # CONFIG_IOMMU_SUPPORT is not set # CONFIG_DNOTIFY is not set # CONFIG_INOTIFY_USER is not set diff --git a/arch/s390/crypto/arch_random.c b/arch/s390/crypto/arch_random.c index dd95cdbd22ce..7b947728d57e 100644 --- a/arch/s390/crypto/arch_random.c +++ b/arch/s390/crypto/arch_random.c @@ -2,7 +2,7 @@ /* * s390 arch random implementation. * - * Copyright IBM Corp. 2017, 2018 + * Copyright IBM Corp. 2017, 2020 * Author(s): Harald Freudenberger * * The s390_arch_random_generate() function may be called from random.c @@ -33,6 +33,7 @@ #include <linux/slab.h> #include <linux/static_key.h> #include <linux/workqueue.h> +#include <linux/moduleparam.h> #include <asm/cpacf.h> DEFINE_STATIC_KEY_FALSE(s390_arch_random_available); @@ -99,6 +100,113 @@ static void arch_rng_refill_buffer(struct work_struct *unused) queue_delayed_work(system_long_wq, &arch_rng_work, delay); } +/* + * Here follows the implementation of s390_arch_get_random_long(). + * + * The random longs to be pulled by arch_get_random_long() are + * prepared in an 4K buffer which is filled from the NIST 800-90 + * compliant s390 drbg. By default the random long buffer is refilled + * 256 times before the drbg itself needs a reseed. The reseed of the + * drbg is done with 32 bytes fetched from the high quality (but slow) + * trng which is assumed to deliver 100% entropy. So the 32 * 8 = 256 + * bits of entropy are spread over 256 * 4KB = 1MB serving 131072 + * arch_get_random_long() invocations before reseeded. + * + * How often the 4K random long buffer is refilled with the drbg + * before the drbg is reseeded can be adjusted. There is a module + * parameter 's390_arch_rnd_long_drbg_reseed' accessible via + * /sys/module/arch_random/parameters/rndlong_drbg_reseed + * or as kernel command line parameter + * arch_random.rndlong_drbg_reseed=<value> + * This parameter tells how often the drbg fills the 4K buffer before + * it is re-seeded by fresh entropy from the trng. + * A value of 16 results in reseeding the drbg at every 16 * 4 KB = 64 + * KB with 32 bytes of fresh entropy pulled from the trng. So a value + * of 16 would result in 256 bits entropy per 64 KB. + * A value of 256 results in 1MB of drbg output before a reseed of the + * drbg is done. So this would spread the 256 bits of entropy among 1MB. + * Setting this parameter to 0 forces the reseed to take place every + * time the 4K buffer is depleted, so the entropy rises to 256 bits + * entropy per 4K or 0.5 bit entropy per arch_get_random_long(). With + * setting this parameter to negative values all this effort is + * disabled, arch_get_random long() returns false and thus indicating + * that the arch_get_random_long() feature is disabled at all. + */ + +static unsigned long rndlong_buf[512]; +static DEFINE_SPINLOCK(rndlong_lock); +static int rndlong_buf_index; + +static int rndlong_drbg_reseed = 256; +module_param_named(rndlong_drbg_reseed, rndlong_drbg_reseed, int, 0600); +MODULE_PARM_DESC(rndlong_drbg_reseed, "s390 arch_get_random_long() drbg reseed"); + +static inline void refill_rndlong_buf(void) +{ + static u8 prng_ws[240]; + static int drbg_counter; + + if (--drbg_counter < 0) { + /* need to re-seed the drbg */ + u8 seed[32]; + + /* fetch seed from trng */ + cpacf_trng(NULL, 0, seed, sizeof(seed)); + /* seed drbg */ + memset(prng_ws, 0, sizeof(prng_ws)); + cpacf_prno(CPACF_PRNO_SHA512_DRNG_SEED, + &prng_ws, NULL, 0, seed, sizeof(seed)); + /* re-init counter for drbg */ + drbg_counter = rndlong_drbg_reseed; + } + + /* fill the arch_get_random_long buffer from drbg */ + cpacf_prno(CPACF_PRNO_SHA512_DRNG_GEN, &prng_ws, + (u8 *) rndlong_buf, sizeof(rndlong_buf), + NULL, 0); +} + +bool s390_arch_get_random_long(unsigned long *v) +{ + bool rc = false; + unsigned long flags; + + /* arch_get_random_long() disabled ? */ + if (rndlong_drbg_reseed < 0) + return false; + + /* try to lock the random long lock */ + if (!spin_trylock_irqsave(&rndlong_lock, flags)) + return false; + + if (--rndlong_buf_index >= 0) { + /* deliver next long value from the buffer */ + *v = rndlong_buf[rndlong_buf_index]; + rc = true; + goto out; + } + + /* buffer is depleted and needs refill */ + if (in_interrupt()) { + /* delay refill in interrupt context to next caller */ + rndlong_buf_index = 0; + goto out; + } + + /* refill random long buffer */ + refill_rndlong_buf(); + rndlong_buf_index = ARRAY_SIZE(rndlong_buf); + + /* and provide one random long */ + *v = rndlong_buf[--rndlong_buf_index]; + rc = true; + +out: + spin_unlock_irqrestore(&rndlong_lock, flags); + return rc; +} +EXPORT_SYMBOL(s390_arch_get_random_long); + static int __init s390_arch_random_init(void) { /* all the needed PRNO subfunctions available ? */ diff --git a/arch/s390/crypto/prng.c b/arch/s390/crypto/prng.c index 5057773f82e9..b2f219ec379c 100644 --- a/arch/s390/crypto/prng.c +++ b/arch/s390/crypto/prng.c @@ -674,20 +674,6 @@ static const struct file_operations prng_tdes_fops = { .llseek = noop_llseek, }; -static struct miscdevice prng_sha512_dev = { - .name = "prandom", - .minor = MISC_DYNAMIC_MINOR, - .mode = 0644, - .fops = &prng_sha512_fops, -}; -static struct miscdevice prng_tdes_dev = { - .name = "prandom", - .minor = MISC_DYNAMIC_MINOR, - .mode = 0644, - .fops = &prng_tdes_fops, -}; - - /* chunksize attribute (ro) */ static ssize_t prng_chunksize_show(struct device *dev, struct device_attribute *attr, @@ -801,18 +787,30 @@ static struct attribute *prng_sha512_dev_attrs[] = { &dev_attr_strength.attr, NULL }; +ATTRIBUTE_GROUPS(prng_sha512_dev); + static struct attribute *prng_tdes_dev_attrs[] = { &dev_attr_chunksize.attr, &dev_attr_byte_counter.attr, &dev_attr_mode.attr, NULL }; +ATTRIBUTE_GROUPS(prng_tdes_dev); -static struct attribute_group prng_sha512_dev_attr_group = { - .attrs = prng_sha512_dev_attrs +static struct miscdevice prng_sha512_dev = { + .name = "prandom", + .minor = MISC_DYNAMIC_MINOR, + .mode = 0644, + .fops = &prng_sha512_fops, + .groups = prng_sha512_dev_groups, }; -static struct attribute_group prng_tdes_dev_attr_group = { - .attrs = prng_tdes_dev_attrs + +static struct miscdevice prng_tdes_dev = { + .name = "prandom", + .minor = MISC_DYNAMIC_MINOR, + .mode = 0644, + .fops = &prng_tdes_fops, + .groups = prng_tdes_dev_groups, }; @@ -867,13 +865,6 @@ static int __init prng_init(void) prng_sha512_deinstantiate(); goto out; } - ret = sysfs_create_group(&prng_sha512_dev.this_device->kobj, - &prng_sha512_dev_attr_group); - if (ret) { - misc_deregister(&prng_sha512_dev); - prng_sha512_deinstantiate(); - goto out; - } } else { @@ -898,14 +889,6 @@ static int __init prng_init(void) prng_tdes_deinstantiate(); goto out; } - ret = sysfs_create_group(&prng_tdes_dev.this_device->kobj, - &prng_tdes_dev_attr_group); - if (ret) { - misc_deregister(&prng_tdes_dev); - prng_tdes_deinstantiate(); - goto out; - } - } out: @@ -916,13 +899,9 @@ out: static void __exit prng_exit(void) { if (prng_mode == PRNG_MODE_SHA512) { - sysfs_remove_group(&prng_sha512_dev.this_device->kobj, - &prng_sha512_dev_attr_group); misc_deregister(&prng_sha512_dev); prng_sha512_deinstantiate(); } else { - sysfs_remove_group(&prng_tdes_dev.this_device->kobj, - &prng_tdes_dev_attr_group); misc_deregister(&prng_tdes_dev); prng_tdes_deinstantiate(); } diff --git a/arch/s390/crypto/sha.h b/arch/s390/crypto/sha.h index ada2f98c27b7..65ea12fc87a1 100644 --- a/arch/s390/crypto/sha.h +++ b/arch/s390/crypto/sha.h @@ -11,7 +11,8 @@ #define _CRYPTO_ARCH_S390_SHA_H #include <linux/crypto.h> -#include <crypto/sha.h> +#include <crypto/sha1.h> +#include <crypto/sha2.h> #include <crypto/sha3.h> /* must be big enough for the largest SHA variant */ diff --git a/arch/s390/crypto/sha1_s390.c b/arch/s390/crypto/sha1_s390.c index 698b1e6d3c14..a3fabf310a38 100644 --- a/arch/s390/crypto/sha1_s390.c +++ b/arch/s390/crypto/sha1_s390.c @@ -22,7 +22,7 @@ #include <linux/init.h> #include <linux/module.h> #include <linux/cpufeature.h> -#include <crypto/sha.h> +#include <crypto/sha1.h> #include <asm/cpacf.h> #include "sha.h" diff --git a/arch/s390/crypto/sha256_s390.c b/arch/s390/crypto/sha256_s390.c index b52c87e44939..24983f175676 100644 --- a/arch/s390/crypto/sha256_s390.c +++ b/arch/s390/crypto/sha256_s390.c @@ -12,7 +12,7 @@ #include <linux/init.h> #include <linux/module.h> #include <linux/cpufeature.h> -#include <crypto/sha.h> +#include <crypto/sha2.h> #include <asm/cpacf.h> #include "sha.h" diff --git a/arch/s390/crypto/sha3_256_s390.c b/arch/s390/crypto/sha3_256_s390.c index 460cbbbaa44a..30ac49b635bf 100644 --- a/arch/s390/crypto/sha3_256_s390.c +++ b/arch/s390/crypto/sha3_256_s390.c @@ -12,7 +12,6 @@ #include <linux/init.h> #include <linux/module.h> #include <linux/cpufeature.h> -#include <crypto/sha.h> #include <crypto/sha3.h> #include <asm/cpacf.h> diff --git a/arch/s390/crypto/sha3_512_s390.c b/arch/s390/crypto/sha3_512_s390.c index 72cf460a53e5..e70d50f7620f 100644 --- a/arch/s390/crypto/sha3_512_s390.c +++ b/arch/s390/crypto/sha3_512_s390.c @@ -11,7 +11,6 @@ #include <linux/init.h> #include <linux/module.h> #include <linux/cpufeature.h> -#include <crypto/sha.h> #include <crypto/sha3.h> #include <asm/cpacf.h> diff --git a/arch/s390/crypto/sha512_s390.c b/arch/s390/crypto/sha512_s390.c index ad29db085a18..29a6bd404c59 100644 --- a/arch/s390/crypto/sha512_s390.c +++ b/arch/s390/crypto/sha512_s390.c @@ -8,7 +8,7 @@ * Author(s): Jan Glauber (jang@de.ibm.com) */ #include <crypto/internal/hash.h> -#include <crypto/sha.h> +#include <crypto/sha2.h> #include <linux/errno.h> #include <linux/init.h> #include <linux/kernel.h> diff --git a/arch/s390/include/asm/Kbuild b/arch/s390/include/asm/Kbuild index 319efa0e6d02..1a18d7b82f86 100644 --- a/arch/s390/include/asm/Kbuild +++ b/arch/s390/include/asm/Kbuild @@ -7,5 +7,4 @@ generated-y += unistd_nr.h generic-y += asm-offsets.h generic-y += export.h generic-y += kvm_types.h -generic-y += local64.h generic-y += mcs_spinlock.h diff --git a/arch/s390/include/asm/archrandom.h b/arch/s390/include/asm/archrandom.h index de61ce562052..5dc712fde3c7 100644 --- a/arch/s390/include/asm/archrandom.h +++ b/arch/s390/include/asm/archrandom.h @@ -2,7 +2,7 @@ /* * Kernel interface for the s390 arch_random_* functions * - * Copyright IBM Corp. 2017 + * Copyright IBM Corp. 2017, 2020 * * Author: Harald Freudenberger <freude@de.ibm.com> * @@ -19,10 +19,13 @@ DECLARE_STATIC_KEY_FALSE(s390_arch_random_available); extern atomic64_t s390_arch_random_counter; +bool s390_arch_get_random_long(unsigned long *v); bool s390_arch_random_generate(u8 *buf, unsigned int nbytes); static inline bool __must_check arch_get_random_long(unsigned long *v) { + if (static_branch_likely(&s390_arch_random_available)) + return s390_arch_get_random_long(v); return false; } diff --git a/arch/s390/include/asm/ccwdev.h b/arch/s390/include/asm/ccwdev.h index c0be5fe1ddba..778247bb1d61 100644 --- a/arch/s390/include/asm/ccwdev.h +++ b/arch/s390/include/asm/ccwdev.h @@ -104,6 +104,8 @@ struct ccw_device { was successfully verified. */ #define PE_PATHGROUP_ESTABLISHED 0x4 /* A pathgroup was reset and had to be established again. */ +#define PE_PATH_FCES_EVENT 0x8 /* The FCES Status of a path has + * changed. */ /* * Possible CIO actions triggered by the unit check handler. @@ -115,7 +117,7 @@ enum uc_todo { }; /** - * struct ccw driver - device driver for channel attached devices + * struct ccw_driver - device driver for channel attached devices * @ids: ids supported by this driver * @probe: function called on probe * @remove: function called on remove @@ -124,11 +126,6 @@ enum uc_todo { * @notify: notify driver of device state changes * @path_event: notify driver of channel path events * @shutdown: called at device shutdown - * @prepare: prepare for pm state transition - * @complete: undo work done in @prepare - * @freeze: callback for freezing during hibernation snapshotting - * @thaw: undo work done in @freeze - * @restore: callback for restoring after hibernation * @uc_handler: callback for unit check handler * @driver: embedded device driver structure * @int_class: interruption class to use for accounting interrupts @@ -142,11 +139,6 @@ struct ccw_driver { int (*notify) (struct ccw_device *, int); void (*path_event) (struct ccw_device *, int *); void (*shutdown) (struct ccw_device *); - int (*prepare) (struct ccw_device *); - void (*complete) (struct ccw_device *); - int (*freeze)(struct ccw_device *); - int (*thaw) (struct ccw_device *); - int (*restore)(struct ccw_device *); enum uc_todo (*uc_handler) (struct ccw_device *, struct irb *); struct device_driver driver; enum interruption_class int_class; diff --git a/arch/s390/include/asm/cio.h b/arch/s390/include/asm/cio.h index 5c58756d6476..ac02df906cae 100644 --- a/arch/s390/include/asm/cio.h +++ b/arch/s390/include/asm/cio.h @@ -329,7 +329,7 @@ struct ccw_dev_id { }; /** - * ccw_device_id_is_equal() - compare two ccw_dev_ids + * ccw_dev_id_is_equal() - compare two ccw_dev_ids * @dev_id1: a ccw_dev_id * @dev_id2: another ccw_dev_id * Returns: @@ -373,5 +373,6 @@ int chsc_sstpc(void *page, unsigned int op, u16 ctrl, u64 *clock_delta); int chsc_sstpi(void *page, void *result, size_t size); int chsc_stzi(void *page, void *result, size_t size); int chsc_sgib(u32 origin); +int chsc_scud(u16 cu, u64 *esm, u8 *esm_valid); #endif diff --git a/arch/s390/include/asm/delay.h b/arch/s390/include/asm/delay.h index 898323fd93d2..21a8fe18fe66 100644 --- a/arch/s390/include/asm/delay.h +++ b/arch/s390/include/asm/delay.h @@ -13,13 +13,12 @@ #ifndef _S390_DELAY_H #define _S390_DELAY_H -void __ndelay(unsigned long long nsecs); -void __udelay(unsigned long long usecs); -void udelay_simple(unsigned long long usecs); +void __ndelay(unsigned long nsecs); +void __udelay(unsigned long usecs); void __delay(unsigned long loops); -#define ndelay(n) __ndelay((unsigned long long) (n)) -#define udelay(n) __udelay((unsigned long long) (n)) -#define mdelay(n) __udelay((unsigned long long) (n) * 1000) +#define ndelay(n) __ndelay((unsigned long)(n)) +#define udelay(n) __udelay((unsigned long)(n)) +#define mdelay(n) __udelay((unsigned long)(n) * 1000) #endif /* defined(_S390_DELAY_H) */ diff --git a/arch/s390/include/asm/ftrace.h b/arch/s390/include/asm/ftrace.h index 68d362f8d6c1..695c61989f97 100644 --- a/arch/s390/include/asm/ftrace.h +++ b/arch/s390/include/asm/ftrace.h @@ -2,16 +2,9 @@ #ifndef _ASM_S390_FTRACE_H #define _ASM_S390_FTRACE_H +#define HAVE_FUNCTION_GRAPH_RET_ADDR_PTR #define ARCH_SUPPORTS_FTRACE_OPS 1 - -#if defined(CC_USING_HOTPATCH) || defined(CC_USING_NOP_MCOUNT) #define MCOUNT_INSN_SIZE 6 -#else -#define MCOUNT_INSN_SIZE 24 -#define MCOUNT_RETURN_FIXUP 18 -#endif - -#define HAVE_FUNCTION_GRAPH_RET_ADDR_PTR #ifndef __ASSEMBLY__ @@ -22,7 +15,6 @@ #define ftrace_return_address(n) __builtin_return_address(n) #endif -void _mcount(void); void ftrace_caller(void); extern char ftrace_graph_caller_end; @@ -30,12 +22,20 @@ extern unsigned long ftrace_plt; struct dyn_arch_ftrace { }; -#define MCOUNT_ADDR ((unsigned long)_mcount) +#define MCOUNT_ADDR 0 #define FTRACE_ADDR ((unsigned long)ftrace_caller) #define KPROBE_ON_FTRACE_NOP 0 #define KPROBE_ON_FTRACE_CALL 1 +struct module; +struct dyn_ftrace; +/* + * Either -mhotpatch or -mnop-mcount is used - no explicit init is required + */ +static inline int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec) { return 0; } +#define ftrace_init_nop ftrace_init_nop + static inline unsigned long ftrace_call_adjust(unsigned long addr) { return addr; @@ -49,28 +49,17 @@ struct ftrace_insn { static inline void ftrace_generate_nop_insn(struct ftrace_insn *insn) { #ifdef CONFIG_FUNCTION_TRACER -#if defined(CC_USING_HOTPATCH) || defined(CC_USING_NOP_MCOUNT) /* brcl 0,0 */ insn->opc = 0xc004; insn->disp = 0; -#else - /* jg .+24 */ - insn->opc = 0xc0f4; - insn->disp = MCOUNT_INSN_SIZE / 2; -#endif #endif } static inline int is_ftrace_nop(struct ftrace_insn *insn) { #ifdef CONFIG_FUNCTION_TRACER -#if defined(CC_USING_HOTPATCH) || defined(CC_USING_NOP_MCOUNT) if (insn->disp == 0) return 1; -#else - if (insn->disp == MCOUNT_INSN_SIZE / 2) - return 1; -#endif #endif return 0; } diff --git a/arch/s390/include/asm/futex.h b/arch/s390/include/asm/futex.h index 26f9144562c9..c22debfcebf1 100644 --- a/arch/s390/include/asm/futex.h +++ b/arch/s390/include/asm/futex.h @@ -26,9 +26,7 @@ static inline int arch_futex_atomic_op_inuser(int op, int oparg, int *oval, u32 __user *uaddr) { int oldval = 0, newval, ret; - mm_segment_t old_fs; - old_fs = enable_sacf_uaccess(); switch (op) { case FUTEX_OP_SET: __futex_atomic_op("lr %2,%5\n", @@ -53,7 +51,6 @@ static inline int arch_futex_atomic_op_inuser(int op, int oparg, int *oval, default: ret = -ENOSYS; } - disable_sacf_uaccess(old_fs); if (!ret) *oval = oldval; @@ -64,10 +61,8 @@ static inline int arch_futex_atomic_op_inuser(int op, int oparg, int *oval, static inline int futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, u32 oldval, u32 newval) { - mm_segment_t old_fs; int ret; - old_fs = enable_sacf_uaccess(); asm volatile( " sacf 256\n" "0: cs %1,%4,0(%5)\n" @@ -77,7 +72,6 @@ static inline int futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, : "=d" (ret), "+d" (oldval), "=m" (*uaddr) : "0" (-EFAULT), "d" (newval), "a" (uaddr), "m" (*uaddr) : "cc", "memory"); - disable_sacf_uaccess(old_fs); *uval = oldval; return ret; } diff --git a/arch/s390/include/asm/kasan.h b/arch/s390/include/asm/kasan.h index e9bf486de136..76f351bd6645 100644 --- a/arch/s390/include/asm/kasan.h +++ b/arch/s390/include/asm/kasan.h @@ -2,28 +2,51 @@ #ifndef __ASM_KASAN_H #define __ASM_KASAN_H +#include <asm/pgtable.h> + #ifdef CONFIG_KASAN #define KASAN_SHADOW_SCALE_SHIFT 3 -#ifdef CONFIG_KASAN_S390_4_LEVEL_PAGING #define KASAN_SHADOW_SIZE \ (_AC(1, UL) << (_REGION1_SHIFT - KASAN_SHADOW_SCALE_SHIFT)) -#else -#define KASAN_SHADOW_SIZE \ - (_AC(1, UL) << (_REGION2_SHIFT - KASAN_SHADOW_SCALE_SHIFT)) -#endif #define KASAN_SHADOW_OFFSET _AC(CONFIG_KASAN_SHADOW_OFFSET, UL) #define KASAN_SHADOW_START KASAN_SHADOW_OFFSET #define KASAN_SHADOW_END (KASAN_SHADOW_START + KASAN_SHADOW_SIZE) extern void kasan_early_init(void); -extern void kasan_copy_shadow(pgd_t *dst); +extern void kasan_copy_shadow_mapping(void); extern void kasan_free_early_identity(void); extern unsigned long kasan_vmax; + +/* + * Estimate kasan memory requirements, which it will reserve + * at the very end of available physical memory. To estimate + * that, we take into account that kasan would require + * 1/8 of available physical memory (for shadow memory) + + * creating page tables for the whole memory + shadow memory + * region (1 + 1/8). To keep page tables estimates simple take + * the double of combined ptes size. + * + * physmem parameter has to be already adjusted if not entire physical memory + * would be used (e.g. due to effect of "mem=" option). + */ +static inline unsigned long kasan_estimate_memory_needs(unsigned long physmem) +{ + unsigned long kasan_needs; + unsigned long pages; + /* for shadow memory */ + kasan_needs = round_up(physmem / 8, PAGE_SIZE); + /* for paging structures */ + pages = DIV_ROUND_UP(physmem + kasan_needs, PAGE_SIZE); + kasan_needs += DIV_ROUND_UP(pages, _PAGE_ENTRIES) * _PAGE_TABLE_SIZE * 2; + + return kasan_needs; +} #else static inline void kasan_early_init(void) { } -static inline void kasan_copy_shadow(pgd_t *dst) { } +static inline void kasan_copy_shadow_mapping(void) { } static inline void kasan_free_early_identity(void) { } +static inline unsigned long kasan_estimate_memory_needs(unsigned long physmem) { return 0; } #endif #endif diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index 463c24e26000..74f9a036bab2 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -459,6 +459,7 @@ struct kvm_vcpu_stat { u64 diagnose_308; u64 diagnose_500; u64 diagnose_other; + u64 pfault_sync; }; #define PGM_OPERATION 0x01 diff --git a/arch/s390/include/asm/livepatch.h b/arch/s390/include/asm/livepatch.h index 818612b784cd..d578a8c76676 100644 --- a/arch/s390/include/asm/livepatch.h +++ b/arch/s390/include/asm/livepatch.h @@ -11,10 +11,13 @@ #ifndef ASM_LIVEPATCH_H #define ASM_LIVEPATCH_H +#include <linux/ftrace.h> #include <asm/ptrace.h> -static inline void klp_arch_set_pc(struct pt_regs *regs, unsigned long ip) +static inline void klp_arch_set_pc(struct ftrace_regs *fregs, unsigned long ip) { + struct pt_regs *regs = ftrace_get_regs(fregs); + regs->psw.addr = ip; } diff --git a/arch/s390/include/asm/lowcore.h b/arch/s390/include/asm/lowcore.h index 612ed3c6d581..69ce9191eaf1 100644 --- a/arch/s390/include/asm/lowcore.h +++ b/arch/s390/include/asm/lowcore.h @@ -116,7 +116,7 @@ struct lowcore { /* Address space pointer. */ __u64 kernel_asce; /* 0x0380 */ __u64 user_asce; /* 0x0388 */ - __u64 vdso_asce; /* 0x0390 */ + __u8 pad_0x0390[0x0398-0x0390]; /* 0x0390 */ /* * The lpp and current_pid fields form a @@ -134,7 +134,7 @@ struct lowcore { __u32 spinlock_index; /* 0x03b0 */ __u32 fpu_flags; /* 0x03b4 */ __u64 percpu_offset; /* 0x03b8 */ - __u64 vdso_per_cpu_data; /* 0x03c0 */ + __u8 pad_0x03c0[0x03c8-0x03c0]; /* 0x03c0 */ __u64 machine_flags; /* 0x03c8 */ __u64 gmap; /* 0x03d0 */ __u8 pad_0x03d8[0x0400-0x03d8]; /* 0x03d8 */ diff --git a/arch/s390/include/asm/mmu_context.h b/arch/s390/include/asm/mmu_context.h index c9f3d8a52756..e7cffc7b5c2f 100644 --- a/arch/s390/include/asm/mmu_context.h +++ b/arch/s390/include/asm/mmu_context.h @@ -15,6 +15,7 @@ #include <asm/ctl_reg.h> #include <asm-generic/mm_hooks.h> +#define init_new_context init_new_context static inline int init_new_context(struct task_struct *tsk, struct mm_struct *mm) { @@ -69,41 +70,18 @@ static inline int init_new_context(struct task_struct *tsk, return 0; } -#define destroy_context(mm) do { } while (0) - -static inline void set_user_asce(struct mm_struct *mm) -{ - S390_lowcore.user_asce = mm->context.asce; - __ctl_load(S390_lowcore.user_asce, 1, 1); - clear_cpu_flag(CIF_ASCE_PRIMARY); -} - -static inline void clear_user_asce(void) -{ - S390_lowcore.user_asce = S390_lowcore.kernel_asce; - __ctl_load(S390_lowcore.kernel_asce, 1, 1); - set_cpu_flag(CIF_ASCE_PRIMARY); -} - -mm_segment_t enable_sacf_uaccess(void); -void disable_sacf_uaccess(mm_segment_t old_fs); - static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, struct task_struct *tsk) { int cpu = smp_processor_id(); - S390_lowcore.user_asce = next->context.asce; + if (next == &init_mm) + S390_lowcore.user_asce = s390_invalid_asce; + else + S390_lowcore.user_asce = next->context.asce; cpumask_set_cpu(cpu, &next->context.cpu_attach_mask); - /* Clear previous user-ASCE from CR1 and CR7 */ - if (!test_cpu_flag(CIF_ASCE_PRIMARY)) { - __ctl_load(S390_lowcore.kernel_asce, 1, 1); - set_cpu_flag(CIF_ASCE_PRIMARY); - } - if (test_cpu_flag(CIF_ASCE_SECONDARY)) { - __ctl_load(S390_lowcore.vdso_asce, 7, 7); - clear_cpu_flag(CIF_ASCE_SECONDARY); - } + /* Clear previous user-ASCE from CR7 */ + __ctl_load(s390_invalid_asce, 7, 7); if (prev != next) cpumask_clear_cpu(cpu, &prev->context.cpu_attach_mask); } @@ -122,18 +100,18 @@ static inline void finish_arch_post_lock_switch(void) __tlb_flush_mm_lazy(mm); preempt_enable(); } - set_fs(current->thread.mm_segment); + __ctl_load(S390_lowcore.user_asce, 7, 7); } -#define enter_lazy_tlb(mm,tsk) do { } while (0) -#define deactivate_mm(tsk,mm) do { } while (0) - +#define activate_mm activate_mm static inline void activate_mm(struct mm_struct *prev, struct mm_struct *next) { switch_mm(prev, next, current); cpumask_set_cpu(smp_processor_id(), mm_cpumask(next)); - set_user_asce(next); + __ctl_load(S390_lowcore.user_asce, 7, 7); } +#include <asm-generic/mmu_context.h> + #endif /* __S390_MMU_CONTEXT_H */ diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index b5dbae78969b..794746a32806 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -23,6 +23,7 @@ extern pgd_t swapper_pg_dir[]; extern void paging_init(void); +extern unsigned long s390_invalid_asce; enum { PG_DIRECT_MAP_4K = 0, @@ -79,15 +80,15 @@ extern unsigned long zero_page_mask; /* * The vmalloc and module area will always be on the topmost area of the - * kernel mapping. We reserve 128GB (64bit) for vmalloc and modules. - * On 64 bit kernels we have a 2GB area at the top of the vmalloc area where - * modules will reside. That makes sure that inter module branches always - * happen without trampolines and in addition the placement within a 2GB frame - * is branch prediction unit friendly. + * kernel mapping. 512GB are reserved for vmalloc by default. + * At the top of the vmalloc area a 2GB area is reserved where modules + * will reside. That makes sure that inter module branches always + * happen without trampolines and in addition the placement within a + * 2GB frame is branch prediction unit friendly. */ extern unsigned long VMALLOC_START; extern unsigned long VMALLOC_END; -#define VMALLOC_DEFAULT_SIZE ((128UL << 30) - MODULES_LEN) +#define VMALLOC_DEFAULT_SIZE ((512UL << 30) - MODULES_LEN) extern struct page *vmemmap; extern unsigned long vmemmap_size; diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h index 962da04234af..2058a435add4 100644 --- a/arch/s390/include/asm/processor.h +++ b/arch/s390/include/asm/processor.h @@ -14,20 +14,14 @@ #include <linux/bits.h> -#define CIF_ASCE_PRIMARY 0 /* primary asce needs fixup / uaccess */ -#define CIF_ASCE_SECONDARY 1 /* secondary asce needs fixup / uaccess */ #define CIF_NOHZ_DELAY 2 /* delay HZ disable for a tick */ #define CIF_FPU 3 /* restore FPU registers */ -#define CIF_IGNORE_IRQ 4 /* ignore interrupt (for udelay) */ #define CIF_ENABLED_WAIT 5 /* in enabled wait state */ #define CIF_MCCK_GUEST 6 /* machine check happening in guest */ #define CIF_DEDICATED_CPU 7 /* this CPU is dedicated */ -#define _CIF_ASCE_PRIMARY BIT(CIF_ASCE_PRIMARY) -#define _CIF_ASCE_SECONDARY BIT(CIF_ASCE_SECONDARY) #define _CIF_NOHZ_DELAY BIT(CIF_NOHZ_DELAY) #define _CIF_FPU BIT(CIF_FPU) -#define _CIF_IGNORE_IRQ BIT(CIF_IGNORE_IRQ) #define _CIF_ENABLED_WAIT BIT(CIF_ENABLED_WAIT) #define _CIF_MCCK_GUEST BIT(CIF_MCCK_GUEST) #define _CIF_DEDICATED_CPU BIT(CIF_DEDICATED_CPU) @@ -102,8 +96,6 @@ extern void __bpon(void); #define HAVE_ARCH_PICK_MMAP_LAYOUT -typedef unsigned int mm_segment_t; - /* * Thread structure */ @@ -116,7 +108,6 @@ struct thread_struct { unsigned long hardirq_timer; /* task cputime in hardirq context */ unsigned long softirq_timer; /* task cputime in softirq context */ unsigned long sys_call_table; /* system call table address */ - mm_segment_t mm_segment; unsigned long gmap_addr; /* address of last gmap fault. */ unsigned int gmap_write_flag; /* gmap fault write indication */ unsigned int gmap_int_code; /* int code of last gmap fault */ @@ -300,11 +291,6 @@ static inline unsigned long __rewind_psw(psw_t psw, unsigned long ilc) } /* - * Function to stop a processor until the next interrupt occurs - */ -void enabled_wait(void); - -/* * Function to drop a processor into disabled wait state */ static __always_inline void __noreturn disabled_wait(void) @@ -318,14 +304,10 @@ static __always_inline void __noreturn disabled_wait(void) } /* - * Basic Machine Check/Program Check Handler. + * Basic Program Check Handler. */ - extern void s390_base_pgm_handler(void); -extern void s390_base_ext_handler(void); - extern void (*s390_base_pgm_handler_fn)(void); -extern void (*s390_base_ext_handler_fn)(void); #define ARCH_LOW_ADDRESS_LIMIT 0x7fffffffUL diff --git a/arch/s390/include/asm/ptrace.h b/arch/s390/include/asm/ptrace.h index 16b3e4396312..73ca7f7cac33 100644 --- a/arch/s390/include/asm/ptrace.h +++ b/arch/s390/include/asm/ptrace.h @@ -87,6 +87,7 @@ struct pt_regs unsigned int int_parm; unsigned long int_parm_long; unsigned long flags; + unsigned long cr1; }; /* diff --git a/arch/s390/include/asm/sclp.h b/arch/s390/include/asm/sclp.h index a7bdd128d85b..5763769a39b6 100644 --- a/arch/s390/include/asm/sclp.h +++ b/arch/s390/include/asm/sclp.h @@ -12,7 +12,12 @@ #include <asm/cpu.h> #define SCLP_CHP_INFO_MASK_SIZE 32 -#define SCLP_MAX_CORES 256 +#define EARLY_SCCB_SIZE PAGE_SIZE +#define SCLP_MAX_CORES 512 +/* 144 + 16 * SCLP_MAX_CORES + 2 * (SCLP_MAX_CORES - 1) */ +#define EXT_SCCB_READ_SCP (3 * PAGE_SIZE) +/* 24 + 16 * SCLP_MAX_CORES */ +#define EXT_SCCB_READ_CPU (3 * PAGE_SIZE) struct sclp_chp_info { u8 recognized[SCLP_CHP_INFO_MASK_SIZE]; diff --git a/arch/s390/include/asm/seccomp.h b/arch/s390/include/asm/seccomp.h index 795bbe0d7ca6..71d46f0ba97b 100644 --- a/arch/s390/include/asm/seccomp.h +++ b/arch/s390/include/asm/seccomp.h @@ -16,4 +16,13 @@ #include <asm-generic/seccomp.h> +#define SECCOMP_ARCH_NATIVE AUDIT_ARCH_S390X +#define SECCOMP_ARCH_NATIVE_NR NR_syscalls +#define SECCOMP_ARCH_NATIVE_NAME "s390x" +#ifdef CONFIG_COMPAT +# define SECCOMP_ARCH_COMPAT AUDIT_ARCH_S390 +# define SECCOMP_ARCH_COMPAT_NR NR_syscalls +# define SECCOMP_ARCH_COMPAT_NAME "s390" +#endif + #endif /* _ASM_S390_SECCOMP_H */ diff --git a/arch/s390/include/asm/setup.h b/arch/s390/include/asm/setup.h index bdb242a1544e..3e388fa208d4 100644 --- a/arch/s390/include/asm/setup.h +++ b/arch/s390/include/asm/setup.h @@ -16,8 +16,6 @@ #define EARLY_SCCB_OFFSET 0x11000 #define HEAD_END 0x12000 -#define EARLY_SCCB_SIZE PAGE_SIZE - /* * Machine features detected in early.c */ @@ -88,10 +86,8 @@ extern unsigned int zlib_dfltcc_support; #define ZLIB_DFLTCC_FULL_DEBUG 4 extern int noexec_disabled; -extern int memory_end_set; -extern unsigned long memory_end; +extern unsigned long ident_map_size; extern unsigned long vmalloc_size; -extern unsigned long max_physmem_end; /* The Write Back bit position in the physaddr is given by the SLPC PCI */ extern unsigned long mio_wb_bit_mask; diff --git a/arch/s390/include/asm/thread_info.h b/arch/s390/include/asm/thread_info.h index 13a04fcf7762..3c5b1f909b6d 100644 --- a/arch/s390/include/asm/thread_info.h +++ b/arch/s390/include/asm/thread_info.h @@ -18,7 +18,7 @@ #else #define THREAD_SIZE_ORDER 2 #endif -#define BOOT_STACK_ORDER 2 +#define BOOT_STACK_SIZE (PAGE_SIZE << 2) #define THREAD_SIZE (PAGE_SIZE << THREAD_SIZE_ORDER) #ifndef __ASSEMBLY__ @@ -65,6 +65,7 @@ void arch_setup_new_exec(void); #define TIF_GUARDED_STORAGE 4 /* load guarded storage control block */ #define TIF_PATCH_PENDING 5 /* pending live patching update */ #define TIF_PGSTE 6 /* New mm's will use 4K page tables */ +#define TIF_NOTIFY_SIGNAL 7 /* signal notifications exist */ #define TIF_ISOLATE_BP 8 /* Run process with isolated BP */ #define TIF_ISOLATE_BP_GUEST 9 /* Run KVM guests with isolated BP */ @@ -82,6 +83,7 @@ void arch_setup_new_exec(void); #define TIF_SYSCALL_TRACEPOINT 27 /* syscall tracepoint instrumentation */ #define _TIF_NOTIFY_RESUME BIT(TIF_NOTIFY_RESUME) +#define _TIF_NOTIFY_SIGNAL BIT(TIF_NOTIFY_SIGNAL) #define _TIF_SIGPENDING BIT(TIF_SIGPENDING) #define _TIF_NEED_RESCHED BIT(TIF_NEED_RESCHED) #define _TIF_UPROBE BIT(TIF_UPROBE) diff --git a/arch/s390/include/asm/timex.h b/arch/s390/include/asm/timex.h index 289aaff4d365..c8e244ecdfde 100644 --- a/arch/s390/include/asm/timex.h +++ b/arch/s390/include/asm/timex.h @@ -49,6 +49,13 @@ static inline void set_clock_comparator(__u64 time) asm volatile("sckc %0" : : "Q" (time)); } +static inline void set_tod_programmable_field(u16 val) +{ + register unsigned long reg0 asm("0") = val; + + asm volatile("sckpf" : : "d" (reg0)); +} + void clock_comparator_work(void); void __init time_early_init(void); diff --git a/arch/s390/include/asm/uaccess.h b/arch/s390/include/asm/uaccess.h index c868e7ee49b3..c6707885e7c2 100644 --- a/arch/s390/include/asm/uaccess.h +++ b/arch/s390/include/asm/uaccess.h @@ -18,23 +18,7 @@ #include <asm/extable.h> #include <asm/facility.h> -/* - * The fs value determines whether argument validity checking should be - * performed or not. If get_fs() == USER_DS, checking is performed, with - * get_fs() == KERNEL_DS, checking is bypassed. - * - * For historical reasons, these macros are grossly misnamed. - */ - -#define KERNEL_DS (0) -#define KERNEL_DS_SACF (1) -#define USER_DS (2) -#define USER_DS_SACF (3) - -#define get_fs() (current->thread.mm_segment) -#define uaccess_kernel() ((get_fs() & 2) == KERNEL_DS) - -void set_fs(mm_segment_t fs); +void debug_user_asce(void); static inline int __range_ok(unsigned long addr, unsigned long size) { @@ -88,7 +72,7 @@ int __get_user_bad(void) __attribute__((noreturn)); static __always_inline int __put_user_fn(void *x, void __user *ptr, unsigned long size) { - unsigned long spec = 0x010000UL; + unsigned long spec = 0x810000UL; int rc; switch (size) { @@ -121,7 +105,7 @@ static __always_inline int __put_user_fn(void *x, void __user *ptr, unsigned lon static __always_inline int __get_user_fn(void *x, const void __user *ptr, unsigned long size) { - unsigned long spec = 0x01UL; + unsigned long spec = 0x81UL; int rc; switch (size) { diff --git a/arch/s390/include/asm/vdso.h b/arch/s390/include/asm/vdso.h index 29b44a930e71..f65590889054 100644 --- a/arch/s390/include/asm/vdso.h +++ b/arch/s390/include/asm/vdso.h @@ -12,32 +12,9 @@ #ifndef __ASSEMBLY__ -/* - * Note about the vdso_data and vdso_per_cpu_data structures: - * - * NEVER USE THEM IN USERSPACE CODE DIRECTLY. The layout of the - * structure is supposed to be known only to the function in the vdso - * itself and may change without notice. - */ - -struct vdso_per_cpu_data { - /* - * Note: node_id and cpu_nr must be at adjacent memory locations. - * VDSO userspace must read both values with a single instruction. - */ - union { - __u64 getcpu_val; - struct { - __u32 node_id; - __u32 cpu_nr; - }; - }; -}; - extern struct vdso_data *vdso_data; -int vdso_alloc_per_cpu(struct lowcore *lowcore); -void vdso_free_per_cpu(struct lowcore *lowcore); +void vdso_getcpu_init(void); #endif /* __ASSEMBLY__ */ #endif /* __S390_VDSO_H__ */ diff --git a/arch/s390/include/asm/vtime.h b/arch/s390/include/asm/vtime.h index 3622d4ebc73a..fac6a67988eb 100644 --- a/arch/s390/include/asm/vtime.h +++ b/arch/s390/include/asm/vtime.h @@ -2,7 +2,6 @@ #ifndef _S390_VTIME_H #define _S390_VTIME_H -#define __ARCH_HAS_VTIME_ACCOUNT #define __ARCH_HAS_VTIME_TASK_SWITCH #endif /* _S390_VTIME_H */ diff --git a/arch/s390/include/uapi/asm/signal.h b/arch/s390/include/uapi/asm/signal.h index 9a14a611ed82..0189f326aac5 100644 --- a/arch/s390/include/uapi/asm/signal.h +++ b/arch/s390/include/uapi/asm/signal.h @@ -65,30 +65,6 @@ typedef unsigned long sigset_t; #define SIGRTMIN 32 #define SIGRTMAX _NSIG -/* - * SA_FLAGS values: - * - * SA_ONSTACK indicates that a registered stack_t will be used. - * SA_RESTART flag to get restarting signals (which were the default long ago) - * SA_NOCLDSTOP flag to turn off SIGCHLD when children stop. - * SA_RESETHAND clears the handler when the signal is delivered. - * SA_NOCLDWAIT flag on SIGCHLD to inhibit zombies. - * SA_NODEFER prevents the current signal from being masked in the handler. - * - * SA_ONESHOT and SA_NOMASK are the historical Linux names for the Single - * Unix names RESETHAND and NODEFER respectively. - */ -#define SA_NOCLDSTOP 0x00000001 -#define SA_NOCLDWAIT 0x00000002 -#define SA_SIGINFO 0x00000004 -#define SA_ONSTACK 0x08000000 -#define SA_RESTART 0x10000000 -#define SA_NODEFER 0x40000000 -#define SA_RESETHAND 0x80000000 - -#define SA_NOMASK SA_NODEFER -#define SA_ONESHOT SA_RESETHAND - #define SA_RESTORER 0x04000000 #define MINSIGSTKSZ 2048 diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c index 483051e10db3..79724d861dc9 100644 --- a/arch/s390/kernel/asm-offsets.c +++ b/arch/s390/kernel/asm-offsets.c @@ -13,7 +13,6 @@ #include <linux/purgatory.h> #include <linux/pgtable.h> #include <asm/idle.h> -#include <asm/vdso.h> #include <asm/gmap.h> #include <asm/nmi.h> #include <asm/stacktrace.h> @@ -48,6 +47,7 @@ int main(void) OFFSET(__PT_INT_PARM, pt_regs, int_parm); OFFSET(__PT_INT_PARM_LONG, pt_regs, int_parm_long); OFFSET(__PT_FLAGS, pt_regs, flags); + OFFSET(__PT_CR1, pt_regs, cr1); DEFINE(__PT_SIZE, sizeof(struct pt_regs)); BLANK(); /* stack_frame offsets */ @@ -59,8 +59,6 @@ int main(void) OFFSET(__SF_SIE_REASON, stack_frame, empty1[3]); OFFSET(__SF_SIE_FLAGS, stack_frame, empty1[4]); BLANK(); - OFFSET(__VDSO_GETCPU_VAL, vdso_per_cpu_data, getcpu_val); - BLANK(); /* idle data offsets */ OFFSET(__CLOCK_IDLE_ENTER, s390_idle_data, clock_idle_enter); OFFSET(__CLOCK_IDLE_EXIT, s390_idle_data, clock_idle_exit); @@ -138,12 +136,11 @@ int main(void) OFFSET(__LC_RESTART_FN, lowcore, restart_fn); OFFSET(__LC_RESTART_DATA, lowcore, restart_data); OFFSET(__LC_RESTART_SOURCE, lowcore, restart_source); + OFFSET(__LC_KERNEL_ASCE, lowcore, kernel_asce); OFFSET(__LC_USER_ASCE, lowcore, user_asce); - OFFSET(__LC_VDSO_ASCE, lowcore, vdso_asce); OFFSET(__LC_LPP, lowcore, lpp); OFFSET(__LC_CURRENT_PID, lowcore, current_pid); OFFSET(__LC_PERCPU_OFFSET, lowcore, percpu_offset); - OFFSET(__LC_VDSO_PER_CPU, lowcore, vdso_per_cpu_data); OFFSET(__LC_MACHINE_FLAGS, lowcore, machine_flags); OFFSET(__LC_PREEMPT_COUNT, lowcore, preempt_count); OFFSET(__LC_GMAP, lowcore, gmap); diff --git a/arch/s390/kernel/base.S b/arch/s390/kernel/base.S index b79e0fd571f8..d255c69c1779 100644 --- a/arch/s390/kernel/base.S +++ b/arch/s390/kernel/base.S @@ -11,32 +11,10 @@ #include <asm/asm-offsets.h> #include <asm/nospec-insn.h> #include <asm/ptrace.h> -#include <asm/sigp.h> GEN_BR_THUNK %r9 GEN_BR_THUNK %r14 -ENTRY(s390_base_ext_handler) - stmg %r0,%r15,__LC_SAVE_AREA_ASYNC - basr %r13,0 -0: aghi %r15,-STACK_FRAME_OVERHEAD - larl %r1,s390_base_ext_handler_fn - lg %r9,0(%r1) - ltgr %r9,%r9 - jz 1f - BASR_EX %r14,%r9 -1: lmg %r0,%r15,__LC_SAVE_AREA_ASYNC - ni __LC_EXT_OLD_PSW+1,0xfd # clear wait state bit - lpswe __LC_EXT_OLD_PSW -ENDPROC(s390_base_ext_handler) - - .section .bss - .align 8 - .globl s390_base_ext_handler_fn -s390_base_ext_handler_fn: - .quad 0 - .previous - ENTRY(s390_base_pgm_handler) stmg %r0,%r15,__LC_SAVE_AREA_SYNC basr %r13,0 diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c index 705844f73934..cc89763a4d3c 100644 --- a/arch/s390/kernel/early.c +++ b/arch/s390/kernel/early.c @@ -169,12 +169,10 @@ static noinline __init void setup_lowcore_early(void) { psw_t psw; + psw.addr = (unsigned long)s390_base_pgm_handler; psw.mask = PSW_MASK_BASE | PSW_DEFAULT_KEY | PSW_MASK_EA | PSW_MASK_BA; if (IS_ENABLED(CONFIG_KASAN)) psw.mask |= PSW_MASK_DAT; - psw.addr = (unsigned long) s390_base_ext_handler; - S390_lowcore.external_new_psw = psw; - psw.addr = (unsigned long) s390_base_pgm_handler; S390_lowcore.program_new_psw = psw; s390_base_pgm_handler_fn = early_pgm_check_handler; S390_lowcore.preempt_count = INIT_PREEMPT_COUNT; diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index 92beb1444644..f1ba197b10c0 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -52,10 +52,11 @@ STACK_SIZE = 1 << STACK_SHIFT STACK_INIT = STACK_SIZE - STACK_FRAME_OVERHEAD - __PT_SIZE _TIF_WORK = (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_NEED_RESCHED | \ - _TIF_UPROBE | _TIF_GUARDED_STORAGE | _TIF_PATCH_PENDING) + _TIF_UPROBE | _TIF_GUARDED_STORAGE | _TIF_PATCH_PENDING | \ + _TIF_NOTIFY_SIGNAL) _TIF_TRACE = (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | _TIF_SECCOMP | \ _TIF_SYSCALL_TRACEPOINT) -_CIF_WORK = (_CIF_ASCE_PRIMARY | _CIF_ASCE_SECONDARY | _CIF_FPU) +_CIF_WORK = (_CIF_FPU) _PIF_WORK = (_PIF_PER_TRAP | _PIF_SYSCALL_RESTART) _LPP_OFFSET = __LC_LPP @@ -90,6 +91,12 @@ _LPP_OFFSET = __LC_LPP #endif .endm + .macro DEBUG_USER_ASCE +#ifdef CONFIG_DEBUG_USER_ASCE + brasl %r14,debug_user_asce +#endif + .endm + .macro CHECK_VMAP_STACK savearea,oklabel #ifdef CONFIG_VMAP_STACK lgr %r14,%r15 @@ -110,9 +117,9 @@ _LPP_OFFSET = __LC_LPP #endif .endm - .macro SWITCH_ASYNC savearea,timer + .macro SWITCH_ASYNC savearea,timer,clock tmhh %r8,0x0001 # interrupting from user ? - jnz 2f + jnz 4f #if IS_ENABLED(CONFIG_KVM) lgr %r14,%r9 larl %r13,.Lsie_gmap @@ -125,10 +132,26 @@ _LPP_OFFSET = __LC_LPP #endif 0: larl %r13,.Lpsw_idle_exit cgr %r13,%r9 - jne 1f + jne 3f - mvc __CLOCK_IDLE_EXIT(8,%r2), __LC_INT_CLOCK - mvc __TIMER_IDLE_EXIT(8,%r2), __LC_ASYNC_ENTER_TIMER + larl %r1,smp_cpu_mtid + llgf %r1,0(%r1) + ltgr %r1,%r1 + jz 2f # no SMT, skip mt_cycles calculation + .insn rsy,0xeb0000000017,%r1,5,__SF_EMPTY+80(%r15) + larl %r3,mt_cycles + ag %r3,__LC_PERCPU_OFFSET + la %r4,__SF_EMPTY+16(%r15) +1: lg %r0,0(%r3) + slg %r0,0(%r4) + alg %r0,64(%r4) + stg %r0,0(%r3) + la %r3,8(%r3) + la %r4,8(%r4) + brct %r1,1b + +2: mvc __CLOCK_IDLE_EXIT(8,%r2), \clock + mvc __TIMER_IDLE_EXIT(8,%r2), \timer # account system time going idle ni __LC_CPU_FLAGS+7,255-_CIF_ENABLED_WAIT @@ -146,17 +169,17 @@ _LPP_OFFSET = __LC_LPP mvc __LC_LAST_UPDATE_TIMER(8),__TIMER_IDLE_EXIT(%r2) nihh %r8,0xfcfd # clear wait state and irq bits -1: lg %r14,__LC_ASYNC_STACK # are we already on the target stack? +3: lg %r14,__LC_ASYNC_STACK # are we already on the target stack? slgr %r14,%r15 srag %r14,%r14,STACK_SHIFT - jnz 3f + jnz 5f CHECK_STACK \savearea aghi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE) - j 4f -2: UPDATE_VTIME %r14,%r15,\timer + j 6f +4: UPDATE_VTIME %r14,%r15,\timer BPENTER __TI_flags(%r12),_TIF_ISOLATE_BP -3: lg %r15,__LC_ASYNC_STACK # load async stack -4: la %r11,STACK_FRAME_OVERHEAD(%r15) +5: lg %r15,__LC_ASYNC_STACK # load async stack +6: la %r11,STACK_FRAME_OVERHEAD(%r15) .endm .macro UPDATE_VTIME w1,w2,enter_timer @@ -327,7 +350,7 @@ ENTRY(sie64a) BPENTER __SF_SIE_FLAGS(%r15),(_TIF_ISOLATE_BP|_TIF_ISOLATE_BP_GUEST) .Lsie_skip: ni __SIE_PROG0C+3(%r14),0xfe # no longer in SIE - lctlg %c1,%c1,__LC_USER_ASCE # load primary asce + lctlg %c1,%c1,__LC_KERNEL_ASCE # load primary asce .Lsie_done: # some program checks are suppressing. C code (e.g. do_protection_exception) # will rewind the PSW by the ILC, which is often 4 bytes in case of SIE. There @@ -380,6 +403,7 @@ ENTRY(system_call) lg %r12,__LC_CURRENT lghi %r14,_PIF_SYSCALL .Lsysc_per: + lctlg %c1,%c1,__LC_KERNEL_ASCE lghi %r13,__TASK_thread lg %r15,__LC_KERNEL_STACK la %r11,STACK_FRAME_OVERHEAD(%r15) # pointer to pt_regs @@ -390,6 +414,7 @@ ENTRY(system_call) mvc __PT_PSW(16,%r11),__LC_SVC_OLD_PSW mvc __PT_INT_CODE(4,%r11),__LC_SVC_ILC stg %r14,__PT_FLAGS(%r11) + xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) ENABLE_INTS .Lsysc_do_svc: # clear user controlled register to prevent speculative use @@ -406,7 +431,6 @@ ENTRY(system_call) jnl .Lsysc_nr_ok slag %r8,%r1,3 .Lsysc_nr_ok: - xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) stg %r2,__PT_ORIG_GPR2(%r11) stg %r7,STACK_FRAME_OVERHEAD(%r15) lg %r9,0(%r8,%r10) # get system call add. @@ -427,11 +451,9 @@ ENTRY(system_call) jnz .Lsysc_work TSTMSK __TI_flags(%r12),_TIF_WORK jnz .Lsysc_work # check for work - TSTMSK __LC_CPU_FLAGS,(_CIF_WORK-_CIF_FPU) - jnz .Lsysc_work + DEBUG_USER_ASCE + lctlg %c1,%c1,__LC_USER_ASCE BPEXIT __TI_flags(%r12),_TIF_ISOLATE_BP -.Lsysc_restore: - DISABLE_INTS TSTMSK __LC_CPU_FLAGS, _CIF_FPU jz .Lsysc_skip_fpu brasl %r14,load_fpu_regs @@ -465,12 +487,10 @@ ENTRY(system_call) #endif TSTMSK __PT_FLAGS(%r11),_PIF_SYSCALL_RESTART jo .Lsysc_syscall_restart - TSTMSK __TI_flags(%r12),_TIF_SIGPENDING - jo .Lsysc_sigpending + TSTMSK __TI_flags(%r12),(_TIF_SIGPENDING|_TIF_NOTIFY_SIGNAL) + jnz .Lsysc_sigpending TSTMSK __TI_flags(%r12),_TIF_NOTIFY_RESUME jo .Lsysc_notify_resume - TSTMSK __LC_CPU_FLAGS,(_CIF_ASCE_PRIMARY|_CIF_ASCE_SECONDARY) - jnz .Lsysc_asce j .Lsysc_return # @@ -481,26 +501,6 @@ ENTRY(system_call) jg schedule # -# _CIF_ASCE_PRIMARY and/or _CIF_ASCE_SECONDARY set, load user space asce -# -.Lsysc_asce: - ni __LC_CPU_FLAGS+7,255-_CIF_ASCE_SECONDARY - lctlg %c7,%c7,__LC_VDSO_ASCE # load secondary asce - TSTMSK __LC_CPU_FLAGS,_CIF_ASCE_PRIMARY - jz .Lsysc_return -#ifndef CONFIG_HAVE_MARCH_Z10_FEATURES - tm __LC_STFLE_FAC_LIST+3,0x10 # has MVCOS ? - jnz .Lsysc_set_fs_fixup - ni __LC_CPU_FLAGS+7,255-_CIF_ASCE_PRIMARY - lctlg %c1,%c1,__LC_USER_ASCE # load primary asce - j .Lsysc_return -.Lsysc_set_fs_fixup: -#endif - larl %r14,.Lsysc_return - jg set_fs_fixup - - -# # _TIF_SIGPENDING is set, call do_signal # .Lsysc_sigpending: @@ -636,8 +636,11 @@ ENTRY(pgm_check_handler) 0: lg %r12,__LC_CURRENT lghi %r11,0 lmg %r8,%r9,__LC_PGM_OLD_PSW - tmhh %r8,0x0001 # test problem state bit - jnz 3f # -> fault in user space + tmhh %r8,0x0001 # coming from user space? + jno .Lpgm_skip_asce + lctlg %c1,%c1,__LC_KERNEL_ASCE + j 3f +.Lpgm_skip_asce: #if IS_ENABLED(CONFIG_KVM) # cleanup critical section for program checks in sie64a lgr %r14,%r9 @@ -648,7 +651,7 @@ ENTRY(pgm_check_handler) jhe 1f lg %r14,__SF_SIE_CONTROL(%r15) # get control block pointer ni __SIE_PROG0C+3(%r14),0xfe # no longer in SIE - lctlg %c1,%c1,__LC_USER_ASCE # load primary asce + lctlg %c1,%c1,__LC_KERNEL_ASCE # load primary asce larl %r9,sie_exit # skip forward to sie_exit lghi %r11,_PIF_GUEST_FAULT #endif @@ -696,8 +699,8 @@ ENTRY(pgm_check_handler) mvc __THREAD_per_address(8,%r14),__LC_PER_ADDRESS mvc __THREAD_per_cause(2,%r14),__LC_PER_CODE mvc __THREAD_per_paid(1,%r14),__LC_PER_ACCESS_ID -6: RESTORE_SM_CLEAR_PER - xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) +6: xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) + RESTORE_SM_CLEAR_PER larl %r1,pgm_check_table llgh %r10,__PT_INT_CODE+2(%r11) nill %r10,0x007f @@ -709,17 +712,27 @@ ENTRY(pgm_check_handler) .Lpgm_return: LOCKDEP_SYS_EXIT tm __PT_PSW+1(%r11),0x01 # returning to user ? - jno .Lsysc_restore + jno .Lpgm_restore TSTMSK __PT_FLAGS(%r11),_PIF_SYSCALL jo .Lsysc_do_syscall j .Lsysc_tif +.Lpgm_restore: + DISABLE_INTS + TSTMSK __LC_CPU_FLAGS, _CIF_FPU + jz .Lpgm_skip_fpu + brasl %r14,load_fpu_regs +.Lpgm_skip_fpu: + mvc __LC_RETURN_PSW(16),__PT_PSW(%r11) + stpt __LC_EXIT_TIMER + lmg %r0,%r15,__PT_R0(%r11) + b __LC_RETURN_LPSWE # # PER event in supervisor state, must be kprobes # .Lpgm_kprobe: - RESTORE_SM_CLEAR_PER xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) + RESTORE_SM_CLEAR_PER lgr %r2,%r11 # pass pointer to pt_regs brasl %r14,do_per_trap j .Lpgm_return @@ -745,7 +758,7 @@ ENTRY(io_int_handler) stmg %r8,%r15,__LC_SAVE_AREA_ASYNC lg %r12,__LC_CURRENT lmg %r8,%r9,__LC_IO_OLD_PSW - SWITCH_ASYNC __LC_SAVE_AREA_ASYNC,__LC_ASYNC_ENTER_TIMER + SWITCH_ASYNC __LC_SAVE_AREA_ASYNC,__LC_ASYNC_ENTER_TIMER,__LC_INT_CLOCK stmg %r0,%r7,__PT_R0(%r11) # clear user controlled registers to prevent speculative use xgr %r0,%r0 @@ -759,12 +772,14 @@ ENTRY(io_int_handler) xgr %r10,%r10 mvc __PT_R8(64,%r11),__LC_SAVE_AREA_ASYNC stmg %r8,%r9,__PT_PSW(%r11) + tm __PT_PSW+1(%r11),0x01 # coming from user space? + jno .Lio_skip_asce + lctlg %c1,%c1,__LC_KERNEL_ASCE +.Lio_skip_asce: mvc __PT_INT_CODE(12,%r11),__LC_SUBCHANNEL_ID xc __PT_FLAGS(8,%r11),__PT_FLAGS(%r11) - TSTMSK __LC_CPU_FLAGS,_CIF_IGNORE_IRQ - jo .Lio_restore - TRACE_IRQS_OFF xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) + TRACE_IRQS_OFF .Lio_loop: lgr %r2,%r11 # pass pointer to pt_regs lghi %r3,IO_INTERRUPT @@ -790,6 +805,8 @@ ENTRY(io_int_handler) mvc __LC_RETURN_PSW(16),__PT_PSW(%r11) tm __PT_PSW+1(%r11),0x01 # returning to user ? jno .Lio_exit_kernel + DEBUG_USER_ASCE + lctlg %c1,%c1,__LC_USER_ASCE BPEXIT __TI_flags(%r12),_TIF_ISOLATE_BP stpt __LC_EXIT_TIMER .Lio_exit_kernel: @@ -847,38 +864,17 @@ ENTRY(io_int_handler) TSTMSK __TI_flags(%r12),_TIF_PATCH_PENDING jo .Lio_patch_pending #endif - TSTMSK __TI_flags(%r12),_TIF_SIGPENDING - jo .Lio_sigpending + TSTMSK __TI_flags(%r12),(_TIF_SIGPENDING|_TIF_NOTIFY_SIGNAL) + jnz .Lio_sigpending TSTMSK __TI_flags(%r12),_TIF_NOTIFY_RESUME jo .Lio_notify_resume TSTMSK __TI_flags(%r12),_TIF_GUARDED_STORAGE jo .Lio_guarded_storage TSTMSK __LC_CPU_FLAGS,_CIF_FPU jo .Lio_vxrs - TSTMSK __LC_CPU_FLAGS,(_CIF_ASCE_PRIMARY|_CIF_ASCE_SECONDARY) - jnz .Lio_asce j .Lio_return # -# _CIF_ASCE_PRIMARY and/or CIF_ASCE_SECONDARY set, load user space asce -# -.Lio_asce: - ni __LC_CPU_FLAGS+7,255-_CIF_ASCE_SECONDARY - lctlg %c7,%c7,__LC_VDSO_ASCE # load secondary asce - TSTMSK __LC_CPU_FLAGS,_CIF_ASCE_PRIMARY - jz .Lio_return -#ifndef CONFIG_HAVE_MARCH_Z10_FEATURES - tm __LC_STFLE_FAC_LIST+3,0x10 # has MVCOS ? - jnz .Lio_set_fs_fixup - ni __LC_CPU_FLAGS+7,255-_CIF_ASCE_PRIMARY - lctlg %c1,%c1,__LC_USER_ASCE # load primary asce - j .Lio_return -.Lio_set_fs_fixup: -#endif - larl %r14,.Lio_return - jg set_fs_fixup - -# # CIF_FPU is set, restore floating-point controls and floating-point registers. # .Lio_vxrs: @@ -945,7 +941,7 @@ ENTRY(ext_int_handler) stmg %r8,%r15,__LC_SAVE_AREA_ASYNC lg %r12,__LC_CURRENT lmg %r8,%r9,__LC_EXT_OLD_PSW - SWITCH_ASYNC __LC_SAVE_AREA_ASYNC,__LC_ASYNC_ENTER_TIMER + SWITCH_ASYNC __LC_SAVE_AREA_ASYNC,__LC_ASYNC_ENTER_TIMER,__LC_INT_CLOCK stmg %r0,%r7,__PT_R0(%r11) # clear user controlled registers to prevent speculative use xgr %r0,%r0 @@ -959,15 +955,17 @@ ENTRY(ext_int_handler) xgr %r10,%r10 mvc __PT_R8(64,%r11),__LC_SAVE_AREA_ASYNC stmg %r8,%r9,__PT_PSW(%r11) + tm __PT_PSW+1(%r11),0x01 # coming from user space? + jno .Lext_skip_asce + lctlg %c1,%c1,__LC_KERNEL_ASCE +.Lext_skip_asce: lghi %r1,__LC_EXT_PARAMS2 mvc __PT_INT_CODE(4,%r11),__LC_EXT_CPU_ADDR mvc __PT_INT_PARM(4,%r11),__LC_EXT_PARAMS mvc __PT_INT_PARM_LONG(8,%r11),0(%r1) xc __PT_FLAGS(8,%r11),__PT_FLAGS(%r11) - TSTMSK __LC_CPU_FLAGS,_CIF_IGNORE_IRQ - jo .Lio_restore - TRACE_IRQS_OFF xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) + TRACE_IRQS_OFF lgr %r2,%r11 # pass pointer to pt_regs lghi %r3,EXT_INTERRUPT brasl %r14,do_IRQ @@ -1167,7 +1165,7 @@ ENTRY(mcck_int_handler) TSTMSK __LC_MCCK_CODE,MCCK_CODE_PSW_IA_VALID jno .Lmcck_panic 4: ssm __LC_PGM_NEW_PSW # turn dat on, keep irqs off - SWITCH_ASYNC __LC_GPREGS_SAVE_AREA+64,__LC_MCCK_ENTER_TIMER + SWITCH_ASYNC __LC_GPREGS_SAVE_AREA+64,__LC_MCCK_ENTER_TIMER,__LC_MCCK_CLOCK .Lmcck_skip: lghi %r14,__LC_GPREGS_SAVE_AREA+64 stmg %r0,%r7,__PT_R0(%r11) @@ -1183,6 +1181,9 @@ ENTRY(mcck_int_handler) xgr %r10,%r10 mvc __PT_R8(64,%r11),0(%r14) stmg %r8,%r9,__PT_PSW(%r11) + la %r14,4095 + mvc __PT_CR1(8,%r11),__LC_CREGS_SAVE_AREA-4095+8(%r14) + lctlg %c1,%c1,__LC_KERNEL_ASCE xc __PT_FLAGS(8,%r11),__PT_FLAGS(%r11) xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) lgr %r2,%r11 # pass pointer to pt_regs @@ -1198,6 +1199,7 @@ ENTRY(mcck_int_handler) brasl %r14,s390_handle_mcck TRACE_IRQS_ON .Lmcck_return: + lctlg %c1,%c1,__PT_CR1(%r11) lmg %r0,%r10,__PT_R0(%r11) mvc __LC_RETURN_MCCK_PSW(16),__PT_PSW(%r11) # move return PSW tm __LC_RETURN_MCCK_PSW+1,0x01 # returning to user ? @@ -1274,7 +1276,7 @@ ENDPROC(stack_overflow) 1: BPENTER __SF_SIE_FLAGS(%r15),(_TIF_ISOLATE_BP|_TIF_ISOLATE_BP_GUEST) lg %r9,__SF_SIE_CONTROL(%r15) # get control block pointer ni __SIE_PROG0C+3(%r9),0xfe # no longer in SIE - lctlg %c1,%c1,__LC_USER_ASCE # load primary asce + lctlg %c1,%c1,__LC_KERNEL_ASCE larl %r9,sie_exit # skip forward to sie_exit BR_EX %r14,%r11 diff --git a/arch/s390/kernel/entry.h b/arch/s390/kernel/entry.h index d2ca3fe51f8e..a16c33b32ab0 100644 --- a/arch/s390/kernel/entry.h +++ b/arch/s390/kernel/entry.h @@ -83,7 +83,6 @@ long sys_s390_sthyi(unsigned long function_code, void __user *buffer, u64 __user DECLARE_PER_CPU(u64, mt_cycles[8]); void gs_load_bc_cb(struct pt_regs *regs); -void set_fs_fixup(void); unsigned long stack_alloc(void); void stack_free(unsigned long stack); diff --git a/arch/s390/kernel/ftrace.c b/arch/s390/kernel/ftrace.c index b388e87a08bf..c6ddeb5029b4 100644 --- a/arch/s390/kernel/ftrace.c +++ b/arch/s390/kernel/ftrace.c @@ -22,56 +22,26 @@ #include "entry.h" /* - * The mcount code looks like this: - * stg %r14,8(%r15) # offset 0 - * larl %r1,<&counter> # offset 6 - * brasl %r14,_mcount # offset 12 - * lg %r14,8(%r15) # offset 18 - * Total length is 24 bytes. Only the first instruction will be patched - * by ftrace_make_call / ftrace_make_nop. - * The enabled ftrace code block looks like this: + * To generate function prologue either gcc's hotpatch feature (since gcc 4.8) + * or a combination of -pg -mrecord-mcount -mnop-mcount -mfentry flags + * (since gcc 9 / clang 10) is used. + * In both cases the original and also the disabled function prologue contains + * only a single six byte instruction and looks like this: + * > brcl 0,0 # offset 0 + * To enable ftrace the code gets patched like above and afterwards looks + * like this: * > brasl %r0,ftrace_caller # offset 0 - * larl %r1,<&counter> # offset 6 - * brasl %r14,_mcount # offset 12 - * lg %r14,8(%r15) # offset 18 + * + * The instruction will be patched by ftrace_make_call / ftrace_make_nop. * The ftrace function gets called with a non-standard C function call ABI * where r0 contains the return address. It is also expected that the called * function only clobbers r0 and r1, but restores r2-r15. * For module code we can't directly jump to ftrace caller, but need a * trampoline (ftrace_plt), which clobbers also r1. - * The return point of the ftrace function has offset 24, so execution - * continues behind the mcount block. - * The disabled ftrace code block looks like this: - * > jg .+24 # offset 0 - * larl %r1,<&counter> # offset 6 - * brasl %r14,_mcount # offset 12 - * lg %r14,8(%r15) # offset 18 - * The jg instruction branches to offset 24 to skip as many instructions - * as possible. - * In case we use gcc's hotpatch feature the original and also the disabled - * function prologue contains only a single six byte instruction and looks - * like this: - * > brcl 0,0 # offset 0 - * To enable ftrace the code gets patched like above and afterwards looks - * like this: - * > brasl %r0,ftrace_caller # offset 0 */ unsigned long ftrace_plt; -static inline void ftrace_generate_orig_insn(struct ftrace_insn *insn) -{ -#if defined(CC_USING_HOTPATCH) || defined(CC_USING_NOP_MCOUNT) - /* brcl 0,0 */ - insn->opc = 0xc004; - insn->disp = 0; -#else - /* stg r14,8(r15) */ - insn->opc = 0xe3e0; - insn->disp = 0xf0080024; -#endif -} - int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr, unsigned long addr) { @@ -85,15 +55,10 @@ int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec, if (copy_from_kernel_nofault(&old, (void *) rec->ip, sizeof(old))) return -EFAULT; - if (addr == MCOUNT_ADDR) { - /* Initial code replacement */ - ftrace_generate_orig_insn(&orig); - ftrace_generate_nop_insn(&new); - } else { - /* Replace ftrace call with a nop. */ - ftrace_generate_call_insn(&orig, rec->ip); - ftrace_generate_nop_insn(&new); - } + /* Replace ftrace call with a nop. */ + ftrace_generate_call_insn(&orig, rec->ip); + ftrace_generate_nop_insn(&new); + /* Verify that the to be replaced code matches what we expect. */ if (memcmp(&orig, &old, sizeof(old))) return -EINVAL; @@ -198,17 +163,26 @@ int ftrace_disable_ftrace_graph_caller(void) #ifdef CONFIG_KPROBES_ON_FTRACE void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip, - struct ftrace_ops *ops, struct pt_regs *regs) + struct ftrace_ops *ops, struct ftrace_regs *fregs) { struct kprobe_ctlblk *kcb; - struct kprobe *p = get_kprobe((kprobe_opcode_t *)ip); + struct pt_regs *regs; + struct kprobe *p; + int bit; - if (unlikely(!p) || kprobe_disabled(p)) + bit = ftrace_test_recursion_trylock(ip, parent_ip); + if (bit < 0) return; + regs = ftrace_get_regs(fregs); + preempt_disable_notrace(); + p = get_kprobe((kprobe_opcode_t *)ip); + if (unlikely(!p) || kprobe_disabled(p)) + goto out; + if (kprobe_running()) { kprobes_inc_nmissed_count(p); - return; + goto out; } __this_cpu_write(current_kprobe, p); @@ -228,6 +202,9 @@ void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip, } } __this_cpu_write(current_kprobe, NULL); +out: + preempt_enable_notrace(); + ftrace_test_recursion_unlock(bit); } NOKPROBE_SYMBOL(kprobe_ftrace_handler); diff --git a/arch/s390/kernel/head64.S b/arch/s390/kernel/head64.S index 8b88dbbda7df..0c253886da78 100644 --- a/arch/s390/kernel/head64.S +++ b/arch/s390/kernel/head64.S @@ -18,12 +18,7 @@ __HEAD ENTRY(startup_continue) - tm __LC_STFLE_FAC_LIST+5,0x80 # LPP available ? - jz 0f - xc __LC_LPP+1(7,0),__LC_LPP+1 # clear lpp and current_pid - mvi __LC_LPP,0x80 # and set LPP_MAGIC - .insn s,0xb2800000,__LC_LPP # load program parameter -0: larl %r1,tod_clock_base + larl %r1,tod_clock_base mvc 0(16,%r1),__LC_BOOT_CLOCK larl %r13,.LPG1 # get base # diff --git a/arch/s390/kernel/idle.c b/arch/s390/kernel/idle.c index 2b85096964f8..a5d4d80d6ede 100644 --- a/arch/s390/kernel/idle.c +++ b/arch/s390/kernel/idle.c @@ -9,7 +9,6 @@ #include <linux/kernel.h> #include <linux/kernel_stat.h> -#include <linux/kprobes.h> #include <linux/notifier.h> #include <linux/init.h> #include <linux/cpu.h> @@ -21,22 +20,19 @@ static DEFINE_PER_CPU(struct s390_idle_data, s390_idle); -void enabled_wait(void) +void arch_cpu_idle(void) { struct s390_idle_data *idle = this_cpu_ptr(&s390_idle); unsigned long long idle_time; - unsigned long psw_mask, flags; - + unsigned long psw_mask; /* Wait for external, I/O or machine check interrupt. */ psw_mask = PSW_KERNEL_BITS | PSW_MASK_WAIT | PSW_MASK_DAT | PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK; clear_cpu_flag(CIF_NOHZ_DELAY); - raw_local_irq_save(flags); - /* Call the assembler magic in entry.S */ + /* psw_idle() returns with interrupts disabled. */ psw_idle(idle, psw_mask); - raw_local_irq_restore(flags); /* Account time spent with enabled wait psw loaded as idle time. */ raw_write_seqcount_begin(&idle->seqcount); @@ -46,8 +42,8 @@ void enabled_wait(void) idle->idle_count++; account_idle_time(cputime_to_nsecs(idle_time)); raw_write_seqcount_end(&idle->seqcount); + raw_local_irq_enable(); } -NOKPROBE_SYMBOL(enabled_wait); static ssize_t show_idle_count(struct device *dev, struct device_attribute *attr, char *buf) @@ -120,12 +116,6 @@ void arch_cpu_idle_enter(void) { } -void arch_cpu_idle(void) -{ - enabled_wait(); - raw_local_irq_enable(); -} - void arch_cpu_idle_exit(void) { } diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c index 98b3aca1de8e..7a21eca498aa 100644 --- a/arch/s390/kernel/ipl.c +++ b/arch/s390/kernel/ipl.c @@ -1512,7 +1512,7 @@ static void diag308_dump(void *dump_block) while (1) { if (diag308(DIAG308_LOAD_NORMAL_DUMP, NULL) != 0x302) break; - udelay_simple(USEC_PER_SEC); + udelay(USEC_PER_SEC); } } diff --git a/arch/s390/kernel/irq.c b/arch/s390/kernel/irq.c index 3514420f0259..f8a8b9428ae2 100644 --- a/arch/s390/kernel/irq.c +++ b/arch/s390/kernel/irq.c @@ -124,7 +124,7 @@ static void show_msi_interrupt(struct seq_file *p, int irq) raw_spin_lock_irqsave(&desc->lock, flags); seq_printf(p, "%3d: ", irq); for_each_online_cpu(cpu) - seq_printf(p, "%10u ", kstat_irqs_cpu(irq, cpu)); + seq_printf(p, "%10u ", irq_desc_kstat_cpu(desc, cpu)); if (desc->irq_data.chip) seq_printf(p, " %8s", desc->irq_data.chip->name); diff --git a/arch/s390/kernel/mcount.S b/arch/s390/kernel/mcount.S index 7458dcfd6464..faf64c2f90f5 100644 --- a/arch/s390/kernel/mcount.S +++ b/arch/s390/kernel/mcount.S @@ -33,11 +33,6 @@ ENDPROC(ftrace_stub) #define TRACED_FUNC_FRAME_SIZE STACK_FRAME_OVERHEAD #endif -ENTRY(_mcount) - BR_EX %r14 -ENDPROC(_mcount) -EXPORT_SYMBOL(_mcount) - ENTRY(ftrace_caller) .globl ftrace_regs_caller .set ftrace_regs_caller,ftrace_caller @@ -46,9 +41,6 @@ ENTRY(ftrace_caller) ipm %r14 # don't put any instructions sllg %r14,%r14,16 # clobbering CC before this point lgr %r1,%r15 -#if !(defined(CC_USING_HOTPATCH) || defined(CC_USING_NOP_MCOUNT)) - aghi %r0,MCOUNT_RETURN_FIXUP -#endif # allocate stack frame for ftrace_caller to contain traced function aghi %r15,-TRACED_FUNC_FRAME_SIZE stg %r1,__SF_BACKCHAIN(%r15) diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c index ec801d3bbb37..bc3ca54edfb4 100644 --- a/arch/s390/kernel/process.c +++ b/arch/s390/kernel/process.c @@ -94,7 +94,6 @@ int copy_thread(unsigned long clone_flags, unsigned long new_stackp, /* Save access registers to new thread structure. */ save_access_regs(&p->thread.acrs[0]); /* start new process with ar4 pointing to the correct address space */ - p->thread.mm_segment = get_fs(); /* Don't copy debug registers */ memset(&p->thread.per_user, 0, sizeof(p->thread.per_user)); memset(&p->thread.per_event, 0, sizeof(p->thread.per_event)); @@ -208,16 +207,3 @@ unsigned long arch_randomize_brk(struct mm_struct *mm) ret = PAGE_ALIGN(mm->brk + brk_rnd()); return (ret > mm->brk) ? ret : mm->brk; } - -void set_fs_fixup(void) -{ - struct pt_regs *regs = current_pt_regs(); - static bool warned; - - set_fs(USER_DS); - if (warned) - return; - WARN(1, "Unbalanced set_fs - int code: 0x%x\n", regs->int_code); - show_registers(regs); - warned = true; -} diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index 4d843e64496f..1fbed91c73bc 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -49,6 +49,7 @@ #include <linux/memory.h> #include <linux/compat.h> #include <linux/start_kernel.h> +#include <linux/hugetlb.h> #include <asm/boot_data.h> #include <asm/ipl.h> @@ -94,10 +95,8 @@ char elf_platform[ELF_PLATFORM_SIZE]; unsigned long int_hwcap = 0; int __bootdata(noexec_disabled); -int __bootdata(memory_end_set); -unsigned long __bootdata(memory_end); +unsigned long __bootdata(ident_map_size); unsigned long __bootdata(vmalloc_size); -unsigned long __bootdata(max_physmem_end); struct mem_detect_info __bootdata(mem_detect); struct exception_table_entry *__bootdata_preserved(__start_dma_ex_table); @@ -556,24 +555,25 @@ static void __init setup_resources(void) #endif } -static void __init setup_memory_end(void) +static void __init setup_ident_map_size(void) { unsigned long vmax, tmp; /* Choose kernel address space layout: 3 or 4 levels. */ - tmp = (memory_end ?: max_physmem_end) / PAGE_SIZE; + tmp = ident_map_size / PAGE_SIZE; tmp = tmp * (sizeof(struct page) + PAGE_SIZE); if (tmp + vmalloc_size + MODULES_LEN <= _REGION2_SIZE) vmax = _REGION2_SIZE; /* 3-level kernel page table */ else vmax = _REGION1_SIZE; /* 4-level kernel page table */ + /* module area is at the end of the kernel address space. */ + MODULES_END = vmax; if (is_prot_virt_host()) - adjust_to_uv_max(&vmax); + adjust_to_uv_max(&MODULES_END); #ifdef CONFIG_KASAN - vmax = kasan_vmax; + vmax = _REGION1_SIZE; + MODULES_END = kasan_vmax; #endif - /* module area is at the end of the kernel address space. */ - MODULES_END = vmax; MODULES_VADDR = MODULES_END - MODULES_LEN; VMALLOC_END = MODULES_VADDR; VMALLOC_START = VMALLOC_END - vmalloc_size; @@ -587,22 +587,22 @@ static void __init setup_memory_end(void) tmp = min(tmp, 1UL << MAX_PHYSMEM_BITS); vmemmap = (struct page *) tmp; - /* Take care that memory_end is set and <= vmemmap */ - memory_end = min(memory_end ?: max_physmem_end, (unsigned long)vmemmap); + /* Take care that ident_map_size <= vmemmap */ + ident_map_size = min(ident_map_size, (unsigned long)vmemmap); #ifdef CONFIG_KASAN - memory_end = min(memory_end, KASAN_SHADOW_START); + ident_map_size = min(ident_map_size, KASAN_SHADOW_START); #endif - vmemmap_size = SECTION_ALIGN_UP(memory_end / PAGE_SIZE) * sizeof(struct page); + vmemmap_size = SECTION_ALIGN_UP(ident_map_size / PAGE_SIZE) * sizeof(struct page); #ifdef CONFIG_KASAN /* move vmemmap above kasan shadow only if stands in a way */ if (KASAN_SHADOW_END > (unsigned long)vmemmap && (unsigned long)vmemmap + vmemmap_size > KASAN_SHADOW_START) vmemmap = max(vmemmap, (struct page *)KASAN_SHADOW_END); #endif - max_pfn = max_low_pfn = PFN_DOWN(memory_end); - memblock_remove(memory_end, ULONG_MAX); + max_pfn = max_low_pfn = PFN_DOWN(ident_map_size); + memblock_remove(ident_map_size, ULONG_MAX); - pr_notice("The maximum memory size is %luMB\n", memory_end >> 20); + pr_notice("The maximum memory size is %luMB\n", ident_map_size >> 20); } #ifdef CONFIG_CRASH_DUMP @@ -632,12 +632,11 @@ static struct notifier_block kdump_mem_nb = { #endif /* - * Make sure that the area behind memory_end is protected + * Make sure that the area above identity mapping is protected */ -static void __init reserve_memory_end(void) +static void __init reserve_above_ident_map(void) { - if (memory_end_set) - memblock_reserve(memory_end, ULONG_MAX); + memblock_reserve(ident_map_size, ULONG_MAX); } /* @@ -674,7 +673,7 @@ static void __init reserve_crashkernel(void) phys_addr_t low, high; int rc; - rc = parse_crashkernel(boot_command_line, memory_end, &crash_size, + rc = parse_crashkernel(boot_command_line, ident_map_size, &crash_size, &crash_base); crash_base = ALIGN(crash_base, KEXEC_CRASH_MEM_ALIGN); @@ -1128,7 +1127,7 @@ void __init setup_arch(char **cmdline_p) setup_control_program_code(); /* Do some memory reservations *before* memory is added to memblock */ - reserve_memory_end(); + reserve_above_ident_map(); reserve_oldmem(); reserve_kernel(); reserve_initrd(); @@ -1143,10 +1142,12 @@ void __init setup_arch(char **cmdline_p) remove_oldmem(); setup_uv(); - setup_memory_end(); + setup_ident_map_size(); setup_memory(); - dma_contiguous_reserve(memory_end); + dma_contiguous_reserve(ident_map_size); vmcp_cma_reserve(); + if (MACHINE_HAS_EDAT2) + hugetlb_cma_reserve(PUD_SHIFT - PAGE_SHIFT); check_initrd(); reserve_crashkernel(); diff --git a/arch/s390/kernel/signal.c b/arch/s390/kernel/signal.c index 9e900a8977bd..b27b6c1f058d 100644 --- a/arch/s390/kernel/signal.c +++ b/arch/s390/kernel/signal.c @@ -472,7 +472,7 @@ void do_signal(struct pt_regs *regs) current->thread.system_call = test_pt_regs_flag(regs, PIF_SYSCALL) ? regs->int_code : 0; - if (get_signal(&ksig)) { + if (test_thread_flag(TIF_SIGPENDING) && get_signal(&ksig)) { /* Whee! Actually deliver the signal. */ if (current->thread.system_call) { regs->int_code = current->thread.system_call; diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index 390d97daa2b3..27c763014114 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -47,7 +47,6 @@ #include <asm/vtimer.h> #include <asm/lowcore.h> #include <asm/sclp.h> -#include <asm/vdso.h> #include <asm/debug.h> #include <asm/os_info.h> #include <asm/sigp.h> @@ -55,6 +54,7 @@ #include <asm/nmi.h> #include <asm/stacktrace.h> #include <asm/topology.h> +#include <asm/vdso.h> #include "entry.h" enum { @@ -217,14 +217,10 @@ static int pcpu_alloc_lowcore(struct pcpu *pcpu, int cpu) lc->return_mcck_lpswe = gen_lpswe(__LC_RETURN_MCCK_PSW); if (nmi_alloc_per_cpu(lc)) goto out_async; - if (vdso_alloc_per_cpu(lc)) - goto out_mcesa; lowcore_ptr[cpu] = lc; pcpu_sigp_retry(pcpu, SIGP_SET_PREFIX, (u32)(unsigned long) lc); return 0; -out_mcesa: - nmi_free_per_cpu(lc); out_async: stack_free(async_stack); out: @@ -245,7 +241,6 @@ static void pcpu_free_lowcore(struct pcpu *pcpu) pcpu_sigp_retry(pcpu, SIGP_SET_PREFIX, 0); lowcore_ptr[pcpu - pcpu_devices] = NULL; - vdso_free_per_cpu(pcpu->lowcore); nmi_free_per_cpu(pcpu->lowcore); stack_free(async_stack); if (pcpu == &pcpu_devices[0]) @@ -265,13 +260,13 @@ static void pcpu_prepare_secondary(struct pcpu *pcpu, int cpu) lc->spinlock_index = 0; lc->percpu_offset = __per_cpu_offset[cpu]; lc->kernel_asce = S390_lowcore.kernel_asce; - lc->user_asce = S390_lowcore.kernel_asce; + lc->user_asce = s390_invalid_asce; lc->machine_flags = S390_lowcore.machine_flags; lc->user_timer = lc->system_timer = lc->steal_timer = lc->avg_steal_timer = 0; __ctl_store(lc->cregs_save_area, 0, 15); lc->cregs_save_area[1] = lc->kernel_asce; - lc->cregs_save_area[7] = lc->vdso_asce; + lc->cregs_save_area[7] = lc->user_asce; save_access_regs((unsigned int *) lc->access_regs_save_area); memcpy(lc->stfle_fac_list, S390_lowcore.stfle_fac_list, sizeof(lc->stfle_fac_list)); @@ -859,13 +854,12 @@ static void smp_init_secondary(void) S390_lowcore.last_update_clock = get_tod_clock(); restore_access_regs(S390_lowcore.access_regs_save_area); - set_cpu_flag(CIF_ASCE_PRIMARY); - set_cpu_flag(CIF_ASCE_SECONDARY); cpu_init(); rcu_cpu_starting(cpu); preempt_disable(); init_cpu_timer(); vtime_init(); + vdso_getcpu_init(); pfault_init(); notify_cpu_starting(cpu); if (topology_cpu_dedicated(cpu)) @@ -896,24 +890,12 @@ static void __no_sanitize_address smp_start_secondary(void *cpuvoid) /* Upping and downing of CPUs */ int __cpu_up(unsigned int cpu, struct task_struct *tidle) { - struct pcpu *pcpu; - int base, i, rc; + struct pcpu *pcpu = pcpu_devices + cpu; + int rc; - pcpu = pcpu_devices + cpu; if (pcpu->state != CPU_STATE_CONFIGURED) return -EIO; - base = smp_get_base_cpu(cpu); - for (i = 0; i <= smp_cpu_mtid; i++) { - if (base + i < nr_cpu_ids) - if (cpu_online(base + i)) - break; - } - /* - * If this is the first CPU of the core to get online - * do an initial CPU reset. - */ - if (i > smp_cpu_mtid && - pcpu_sigp_retry(pcpu_devices + base, SIGP_INITIAL_CPU_RESET, 0) != + if (pcpu_sigp_retry(pcpu, SIGP_INITIAL_CPU_RESET, 0) != SIGP_CC_ORDER_CODE_ACCEPTED) return -EIO; diff --git a/arch/s390/kernel/syscalls/syscall.tbl b/arch/s390/kernel/syscalls/syscall.tbl index 28c168000483..d443423495e5 100644 --- a/arch/s390/kernel/syscalls/syscall.tbl +++ b/arch/s390/kernel/syscalls/syscall.tbl @@ -443,3 +443,4 @@ 438 common pidfd_getfd sys_pidfd_getfd sys_pidfd_getfd 439 common faccessat2 sys_faccessat2 sys_faccessat2 440 common process_madvise sys_process_madvise sys_process_madvise +441 common epoll_pwait2 sys_epoll_pwait2 compat_sys_epoll_pwait2 diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c index 0ac30ee2c633..c59cb44fbb7d 100644 --- a/arch/s390/kernel/time.c +++ b/arch/s390/kernel/time.c @@ -927,41 +927,25 @@ static ssize_t online_store(struct device *dev, */ static DEVICE_ATTR_RW(online); -static struct device_attribute *stp_attributes[] = { - &dev_attr_ctn_id, - &dev_attr_ctn_type, - &dev_attr_dst_offset, - &dev_attr_leap_seconds, - &dev_attr_online, - &dev_attr_leap_seconds_scheduled, - &dev_attr_stratum, - &dev_attr_time_offset, - &dev_attr_time_zone_offset, - &dev_attr_timing_mode, - &dev_attr_timing_state, +static struct attribute *stp_dev_attrs[] = { + &dev_attr_ctn_id.attr, + &dev_attr_ctn_type.attr, + &dev_attr_dst_offset.attr, + &dev_attr_leap_seconds.attr, + &dev_attr_online.attr, + &dev_attr_leap_seconds_scheduled.attr, + &dev_attr_stratum.attr, + &dev_attr_time_offset.attr, + &dev_attr_time_zone_offset.attr, + &dev_attr_timing_mode.attr, + &dev_attr_timing_state.attr, NULL }; +ATTRIBUTE_GROUPS(stp_dev); static int __init stp_init_sysfs(void) { - struct device_attribute **attr; - int rc; - - rc = subsys_system_register(&stp_subsys, NULL); - if (rc) - goto out; - for (attr = stp_attributes; *attr; attr++) { - rc = device_create_file(stp_subsys.dev_root, *attr); - if (rc) - goto out_unreg; - } - return 0; -out_unreg: - for (; attr >= stp_attributes; attr--) - device_remove_file(stp_subsys.dev_root, *attr); - bus_unregister(&stp_subsys); -out: - return rc; + return subsys_system_register(&stp_subsys, stp_dev_groups); } device_initcall(stp_init_sysfs); diff --git a/arch/s390/kernel/vdso.c b/arch/s390/kernel/vdso.c index f9da5b149141..8bc269c55fd3 100644 --- a/arch/s390/kernel/vdso.c +++ b/arch/s390/kernel/vdso.c @@ -29,6 +29,7 @@ #include <asm/sections.h> #include <asm/vdso.h> #include <asm/facility.h> +#include <asm/timex.h> extern char vdso64_start, vdso64_end; static void *vdso64_kbase = &vdso64_start; @@ -61,17 +62,8 @@ static vm_fault_t vdso_fault(const struct vm_special_mapping *sm, static int vdso_mremap(const struct vm_special_mapping *sm, struct vm_area_struct *vma) { - unsigned long vdso_pages; - - vdso_pages = vdso64_pages; - - if ((vdso_pages << PAGE_SHIFT) != vma->vm_end - vma->vm_start) - return -EINVAL; - - if (WARN_ON_ONCE(current->mm != vma->vm_mm)) - return -EFAULT; - current->mm->context.vdso_base = vma->vm_start; + return 0; } @@ -99,60 +91,10 @@ static union { u8 page[PAGE_SIZE]; } vdso_data_store __page_aligned_data; struct vdso_data *vdso_data = (struct vdso_data *)&vdso_data_store.data; -/* - * Allocate/free per cpu vdso data. - */ -#define SEGMENT_ORDER 2 - -int vdso_alloc_per_cpu(struct lowcore *lowcore) -{ - unsigned long segment_table, page_table, page_frame; - struct vdso_per_cpu_data *vd; - segment_table = __get_free_pages(GFP_KERNEL, SEGMENT_ORDER); - page_table = get_zeroed_page(GFP_KERNEL); - page_frame = get_zeroed_page(GFP_KERNEL); - if (!segment_table || !page_table || !page_frame) - goto out; - arch_set_page_dat(virt_to_page(segment_table), SEGMENT_ORDER); - arch_set_page_dat(virt_to_page(page_table), 0); - - /* Initialize per-cpu vdso data page */ - vd = (struct vdso_per_cpu_data *) page_frame; - vd->cpu_nr = lowcore->cpu_nr; - vd->node_id = cpu_to_node(vd->cpu_nr); - - /* Set up page table for the vdso address space */ - memset64((u64 *)segment_table, _SEGMENT_ENTRY_EMPTY, _CRST_ENTRIES); - memset64((u64 *)page_table, _PAGE_INVALID, PTRS_PER_PTE); - - *(unsigned long *) segment_table = _SEGMENT_ENTRY + page_table; - *(unsigned long *) page_table = _PAGE_PROTECT + page_frame; - - lowcore->vdso_asce = segment_table + - _ASCE_TABLE_LENGTH + _ASCE_USER_BITS + _ASCE_TYPE_SEGMENT; - lowcore->vdso_per_cpu_data = page_frame; - - return 0; - -out: - free_page(page_frame); - free_page(page_table); - free_pages(segment_table, SEGMENT_ORDER); - return -ENOMEM; -} - -void vdso_free_per_cpu(struct lowcore *lowcore) +void vdso_getcpu_init(void) { - unsigned long segment_table, page_table, page_frame; - - segment_table = lowcore->vdso_asce & PAGE_MASK; - page_table = *(unsigned long *) segment_table; - page_frame = *(unsigned long *) page_table; - - free_page(page_frame); - free_page(page_table); - free_pages(segment_table, SEGMENT_ORDER); + set_tod_programmable_field(smp_processor_id()); } /* @@ -225,6 +167,7 @@ static int __init vdso_init(void) { int i; + vdso_getcpu_init(); /* Calculate the size of the 64 bit vDSO */ vdso64_pages = ((&vdso64_end - &vdso64_start + PAGE_SIZE - 1) >> PAGE_SHIFT) + 1; @@ -240,8 +183,6 @@ static int __init vdso_init(void) } vdso64_pagelist[vdso64_pages - 1] = virt_to_page(vdso_data); vdso64_pagelist[vdso64_pages] = NULL; - if (vdso_alloc_per_cpu(&S390_lowcore)) - BUG(); get_page(virt_to_page(vdso_data)); diff --git a/arch/s390/kernel/vdso64/Makefile b/arch/s390/kernel/vdso64/Makefile index 13cc5a3f9abf..a6e0fb6b91d6 100644 --- a/arch/s390/kernel/vdso64/Makefile +++ b/arch/s390/kernel/vdso64/Makefile @@ -6,8 +6,9 @@ ARCH_REL_TYPE_ABS := R_390_COPY|R_390_GLOB_DAT|R_390_JMP_SLOT|R_390_RELATIVE ARCH_REL_TYPE_ABS += R_390_GOT|R_390_PLT include $(srctree)/lib/vdso/Makefile -obj-vdso64 = vdso_user_wrapper.o note.o getcpu.o -obj-cvdso64 = vdso64_generic.o +obj-vdso64 = vdso_user_wrapper.o note.o +obj-cvdso64 = vdso64_generic.o getcpu.o +CFLAGS_REMOVE_getcpu.o = -pg $(CC_FLAGS_FTRACE) $(CC_FLAGS_EXPOLINE) CFLAGS_REMOVE_vdso64_generic.o = -pg $(CC_FLAGS_FTRACE) $(CC_FLAGS_EXPOLINE) # Build rules diff --git a/arch/s390/kernel/vdso64/getcpu.S b/arch/s390/kernel/vdso64/getcpu.S deleted file mode 100644 index 3c04f7328500..000000000000 --- a/arch/s390/kernel/vdso64/getcpu.S +++ /dev/null @@ -1,31 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * Userland implementation of getcpu() for 64 bits processes in a - * s390 kernel for use in the vDSO - * - * Copyright IBM Corp. 2016 - * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com> - */ -#include <asm/vdso.h> -#include <asm/asm-offsets.h> -#include <asm/dwarf.h> - - .text - .align 4 - .globl __kernel_getcpu - .type __kernel_getcpu,@function -__kernel_getcpu: - CFI_STARTPROC - sacf 256 - lm %r4,%r5,__VDSO_GETCPU_VAL(%r0) - sacf 0 - ltgr %r2,%r2 - jz 2f - st %r5,0(%r2) -2: ltgr %r3,%r3 - jz 3f - st %r4,0(%r3) -3: lghi %r2,0 - br %r14 - CFI_ENDPROC - .size __kernel_getcpu,.-__kernel_getcpu diff --git a/arch/s390/kernel/vdso64/getcpu.c b/arch/s390/kernel/vdso64/getcpu.c new file mode 100644 index 000000000000..5b2bc7494d5b --- /dev/null +++ b/arch/s390/kernel/vdso64/getcpu.c @@ -0,0 +1,21 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright IBM Corp. 2020 */ + +#include <linux/compiler.h> +#include <linux/getcpu.h> +#include <asm/timex.h> +#include "vdso.h" + +int __s390_vdso_getcpu(unsigned *cpu, unsigned *node, struct getcpu_cache *unused) +{ + __u16 todval[8]; + + /* CPU number is stored in the programmable field of the TOD clock */ + get_tod_clock_ext((char *)todval); + if (cpu) + *cpu = todval[7]; + /* NUMA node is always zero */ + if (node) + *node = 0; + return 0; +} diff --git a/arch/s390/kernel/vdso64/vdso.h b/arch/s390/kernel/vdso64/vdso.h new file mode 100644 index 000000000000..34c7a2312f9d --- /dev/null +++ b/arch/s390/kernel/vdso64/vdso.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __ARCH_S390_KERNEL_VDSO64_VDSO_H +#define __ARCH_S390_KERNEL_VDSO64_VDSO_H + +#include <vdso/datapage.h> + +struct getcpu_cache; + +int __s390_vdso_getcpu(unsigned *cpu, unsigned *node, struct getcpu_cache *unused); +int __s390_vdso_gettimeofday(struct __kernel_old_timeval *tv, struct timezone *tz); +int __s390_vdso_clock_gettime(clockid_t clock, struct __kernel_timespec *ts); +int __s390_vdso_clock_getres(clockid_t clock, struct __kernel_timespec *ts); + +#endif /* __ARCH_S390_KERNEL_VDSO64_VDSO_H */ diff --git a/arch/s390/kernel/vdso64/vdso64.lds.S b/arch/s390/kernel/vdso64/vdso64.lds.S index 7ddb116b5e2e..7bde3909290f 100644 --- a/arch/s390/kernel/vdso64/vdso64.lds.S +++ b/arch/s390/kernel/vdso64/vdso64.lds.S @@ -136,7 +136,6 @@ VERSION __kernel_clock_gettime; __kernel_clock_getres; __kernel_getcpu; - local: *; }; } diff --git a/arch/s390/kernel/vdso64/vdso64_generic.c b/arch/s390/kernel/vdso64/vdso64_generic.c index a8cef7e4d137..a9aa75643c08 100644 --- a/arch/s390/kernel/vdso64/vdso64_generic.c +++ b/arch/s390/kernel/vdso64/vdso64_generic.c @@ -1,5 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 #include "../../../../lib/vdso/gettimeofday.c" +#include "vdso.h" int __s390_vdso_gettimeofday(struct __kernel_old_timeval *tv, struct timezone *tz) diff --git a/arch/s390/kernel/vdso64/vdso_user_wrapper.S b/arch/s390/kernel/vdso64/vdso_user_wrapper.S index a775d7e52872..f773505c7e63 100644 --- a/arch/s390/kernel/vdso64/vdso_user_wrapper.S +++ b/arch/s390/kernel/vdso64/vdso_user_wrapper.S @@ -36,3 +36,4 @@ __kernel_\func: vdso_func gettimeofday vdso_func clock_getres vdso_func clock_gettime +vdso_func getcpu diff --git a/arch/s390/kernel/vmlinux.lds.S b/arch/s390/kernel/vmlinux.lds.S index 177ccfbda40a..4c0e19145cc6 100644 --- a/arch/s390/kernel/vmlinux.lds.S +++ b/arch/s390/kernel/vmlinux.lds.S @@ -10,7 +10,8 @@ * Put .bss..swapper_pg_dir as the first thing in .bss. This will * make sure it has 16k alignment. */ -#define BSS_FIRST_SECTIONS *(.bss..swapper_pg_dir) +#define BSS_FIRST_SECTIONS *(.bss..swapper_pg_dir) \ + *(.bss..invalid_pg_dir) /* Handle ro_after_init data on our own. */ #define RO_AFTER_INIT_DATA diff --git a/arch/s390/kernel/vtime.c b/arch/s390/kernel/vtime.c index 8df10d3c8f6c..5aaa2ca6a928 100644 --- a/arch/s390/kernel/vtime.c +++ b/arch/s390/kernel/vtime.c @@ -222,35 +222,50 @@ void vtime_flush(struct task_struct *tsk) S390_lowcore.avg_steal_timer = avg_steal; } +static u64 vtime_delta(void) +{ + u64 timer = S390_lowcore.last_update_timer; + + S390_lowcore.last_update_timer = get_vtimer(); + + return timer - S390_lowcore.last_update_timer; +} + /* * Update process times based on virtual cpu times stored by entry.S * to the lowcore fields user_timer, system_timer & steal_clock. */ -void vtime_account_irq_enter(struct task_struct *tsk) +void vtime_account_kernel(struct task_struct *tsk) { - u64 timer; - - timer = S390_lowcore.last_update_timer; - S390_lowcore.last_update_timer = get_vtimer(); - timer -= S390_lowcore.last_update_timer; + u64 delta = vtime_delta(); - if ((tsk->flags & PF_VCPU) && (irq_count() == 0)) - S390_lowcore.guest_timer += timer; - else if (hardirq_count()) - S390_lowcore.hardirq_timer += timer; - else if (in_serving_softirq()) - S390_lowcore.softirq_timer += timer; + if (tsk->flags & PF_VCPU) + S390_lowcore.guest_timer += delta; else - S390_lowcore.system_timer += timer; + S390_lowcore.system_timer += delta; - virt_timer_forward(timer); + virt_timer_forward(delta); } -EXPORT_SYMBOL_GPL(vtime_account_irq_enter); - -void vtime_account_kernel(struct task_struct *tsk) -__attribute__((alias("vtime_account_irq_enter"))); EXPORT_SYMBOL_GPL(vtime_account_kernel); +void vtime_account_softirq(struct task_struct *tsk) +{ + u64 delta = vtime_delta(); + + S390_lowcore.softirq_timer += delta; + + virt_timer_forward(delta); +} + +void vtime_account_hardirq(struct task_struct *tsk) +{ + u64 delta = vtime_delta(); + + S390_lowcore.hardirq_timer += delta; + + virt_timer_forward(delta); +} + /* * Sorted add to a list. List is linear searched until first bigger * element is found. diff --git a/arch/s390/kvm/guestdbg.c b/arch/s390/kvm/guestdbg.c index 394a5f53805b..3765c4223bf9 100644 --- a/arch/s390/kvm/guestdbg.c +++ b/arch/s390/kvm/guestdbg.c @@ -184,7 +184,7 @@ static int __import_wp_info(struct kvm_vcpu *vcpu, if (wp_info->len < 0 || wp_info->len > MAX_WP_SIZE) return -EINVAL; - wp_info->old_data = kmalloc(bp_data->len, GFP_KERNEL); + wp_info->old_data = kmalloc(bp_data->len, GFP_KERNEL_ACCOUNT); if (!wp_info->old_data) return -ENOMEM; /* try to backup the original value */ @@ -234,7 +234,7 @@ int kvm_s390_import_bp_data(struct kvm_vcpu *vcpu, if (nr_wp > 0) { wp_info = kmalloc_array(nr_wp, sizeof(*wp_info), - GFP_KERNEL); + GFP_KERNEL_ACCOUNT); if (!wp_info) { ret = -ENOMEM; goto error; @@ -243,7 +243,7 @@ int kvm_s390_import_bp_data(struct kvm_vcpu *vcpu, if (nr_bp > 0) { bp_info = kmalloc_array(nr_bp, sizeof(*bp_info), - GFP_KERNEL); + GFP_KERNEL_ACCOUNT); if (!bp_info) { ret = -ENOMEM; goto error; @@ -349,7 +349,7 @@ static struct kvm_hw_wp_info_arch *any_wp_changed(struct kvm_vcpu *vcpu) if (!wp_info || !wp_info->old_data || wp_info->len <= 0) continue; - temp = kmalloc(wp_info->len, GFP_KERNEL); + temp = kmalloc(wp_info->len, GFP_KERNEL_ACCOUNT); if (!temp) continue; diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c index e7a7c499a73f..72b25b7cc6ae 100644 --- a/arch/s390/kvm/intercept.c +++ b/arch/s390/kvm/intercept.c @@ -398,7 +398,7 @@ int handle_sthyi(struct kvm_vcpu *vcpu) if (!kvm_s390_pv_cpu_is_protected(vcpu) && (addr & ~PAGE_MASK)) return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); - sctns = (void *)get_zeroed_page(GFP_KERNEL); + sctns = (void *)get_zeroed_page(GFP_KERNEL_ACCOUNT); if (!sctns) return -ENOMEM; diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index 2f177298c663..e3183bd05910 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -1792,7 +1792,7 @@ struct kvm_s390_interrupt_info *kvm_s390_get_io_int(struct kvm *kvm, goto out; } gisa_out: - tmp_inti = kzalloc(sizeof(*inti), GFP_KERNEL); + tmp_inti = kzalloc(sizeof(*inti), GFP_KERNEL_ACCOUNT); if (tmp_inti) { tmp_inti->type = KVM_S390_INT_IO(1, 0, 0, 0); tmp_inti->io.io_int_word = isc_to_int_word(isc); @@ -2015,7 +2015,7 @@ int kvm_s390_inject_vm(struct kvm *kvm, struct kvm_s390_interrupt_info *inti; int rc; - inti = kzalloc(sizeof(*inti), GFP_KERNEL); + inti = kzalloc(sizeof(*inti), GFP_KERNEL_ACCOUNT); if (!inti) return -ENOMEM; @@ -2414,7 +2414,7 @@ static int enqueue_floating_irq(struct kvm_device *dev, return -EINVAL; while (len >= sizeof(struct kvm_s390_irq)) { - inti = kzalloc(sizeof(*inti), GFP_KERNEL); + inti = kzalloc(sizeof(*inti), GFP_KERNEL_ACCOUNT); if (!inti) return -ENOMEM; @@ -2462,7 +2462,7 @@ static int register_io_adapter(struct kvm_device *dev, if (dev->kvm->arch.adapters[adapter_info.id] != NULL) return -EINVAL; - adapter = kzalloc(sizeof(*adapter), GFP_KERNEL); + adapter = kzalloc(sizeof(*adapter), GFP_KERNEL_ACCOUNT); if (!adapter) return -ENOMEM; @@ -3290,7 +3290,7 @@ int kvm_s390_gib_init(u8 nisc) goto out; } - gib = (struct kvm_s390_gib *)get_zeroed_page(GFP_KERNEL | GFP_DMA); + gib = (struct kvm_s390_gib *)get_zeroed_page(GFP_KERNEL_ACCOUNT | GFP_DMA); if (!gib) { rc = -ENOMEM; goto out; diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 425d3d75320b..dbafd057ca6a 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -60,6 +60,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { VCPU_STAT("userspace_handled", exit_userspace), VCPU_STAT("exit_null", exit_null), + VCPU_STAT("pfault_sync", pfault_sync), VCPU_STAT("exit_validity", exit_validity), VCPU_STAT("exit_stop_request", exit_stop_request), VCPU_STAT("exit_external_request", exit_external_request), @@ -1254,7 +1255,7 @@ static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr) ret = -EBUSY; goto out; } - proc = kzalloc(sizeof(*proc), GFP_KERNEL); + proc = kzalloc(sizeof(*proc), GFP_KERNEL_ACCOUNT); if (!proc) { ret = -ENOMEM; goto out; @@ -1416,7 +1417,7 @@ static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr) struct kvm_s390_vm_cpu_processor *proc; int ret = 0; - proc = kzalloc(sizeof(*proc), GFP_KERNEL); + proc = kzalloc(sizeof(*proc), GFP_KERNEL_ACCOUNT); if (!proc) { ret = -ENOMEM; goto out; @@ -1444,7 +1445,7 @@ static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr) struct kvm_s390_vm_cpu_machine *mach; int ret = 0; - mach = kzalloc(sizeof(*mach), GFP_KERNEL); + mach = kzalloc(sizeof(*mach), GFP_KERNEL_ACCOUNT); if (!mach) { ret = -ENOMEM; goto out; @@ -1812,7 +1813,7 @@ static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args) if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX) return -EINVAL; - keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL); + keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL_ACCOUNT); if (!keys) return -ENOMEM; @@ -1857,7 +1858,7 @@ static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args) if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX) return -EINVAL; - keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL); + keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL_ACCOUNT); if (!keys) return -ENOMEM; @@ -2625,7 +2626,7 @@ static void sca_dispose(struct kvm *kvm) int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) { - gfp_t alloc_flags = GFP_KERNEL; + gfp_t alloc_flags = GFP_KERNEL_ACCOUNT; int i, rc; char debug_name[16]; static unsigned long sca_offset; @@ -2670,7 +2671,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) BUILD_BUG_ON(sizeof(struct sie_page2) != 4096); kvm->arch.sie_page2 = - (struct sie_page2 *) get_zeroed_page(GFP_KERNEL | GFP_DMA); + (struct sie_page2 *) get_zeroed_page(GFP_KERNEL_ACCOUNT | GFP_DMA); if (!kvm->arch.sie_page2) goto out_err; @@ -2900,7 +2901,7 @@ static int sca_switch_to_extended(struct kvm *kvm) if (kvm->arch.use_esca) return 0; - new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL|__GFP_ZERO); + new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL_ACCOUNT | __GFP_ZERO); if (!new_sca) return -ENOMEM; @@ -3133,7 +3134,7 @@ void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu) int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu) { - vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL); + vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL_ACCOUNT); if (!vcpu->arch.sie_block->cbrlo) return -ENOMEM; return 0; @@ -3243,7 +3244,7 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu) int rc; BUILD_BUG_ON(sizeof(struct sie_page) != 4096); - sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL); + sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL_ACCOUNT); if (!sie_page) return -ENOMEM; @@ -4109,6 +4110,7 @@ static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason) current->thread.gmap_pfault = 0; if (kvm_arch_setup_async_pf(vcpu)) return 0; + vcpu->stat.pfault_sync++; return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1); } return vcpu_post_run_fault_in_sie(vcpu); diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c index cd74989ce0b0..9928f785c677 100644 --- a/arch/s390/kvm/priv.c +++ b/arch/s390/kvm/priv.c @@ -879,7 +879,7 @@ static int handle_stsi(struct kvm_vcpu *vcpu) switch (fc) { case 1: /* same handling for 1 and 2 */ case 2: - mem = get_zeroed_page(GFP_KERNEL); + mem = get_zeroed_page(GFP_KERNEL_ACCOUNT); if (!mem) goto out_no_data; if (stsi((void *) mem, fc, sel1, sel2)) @@ -888,7 +888,7 @@ static int handle_stsi(struct kvm_vcpu *vcpu) case 3: if (sel1 != 2 || sel2 != 2) goto out_no_data; - mem = get_zeroed_page(GFP_KERNEL); + mem = get_zeroed_page(GFP_KERNEL_ACCOUNT); if (!mem) goto out_no_data; handle_stsi_3_2_2(vcpu, (void *) mem); diff --git a/arch/s390/kvm/pv.c b/arch/s390/kvm/pv.c index f5847f9dec7c..813b6e93dc83 100644 --- a/arch/s390/kvm/pv.c +++ b/arch/s390/kvm/pv.c @@ -60,7 +60,7 @@ int kvm_s390_pv_create_cpu(struct kvm_vcpu *vcpu, u16 *rc, u16 *rrc) if (kvm_s390_pv_cpu_get_handle(vcpu)) return -EINVAL; - vcpu->arch.pv.stor_base = __get_free_pages(GFP_KERNEL, + vcpu->arch.pv.stor_base = __get_free_pages(GFP_KERNEL_ACCOUNT, get_order(uv_info.guest_cpu_stor_len)); if (!vcpu->arch.pv.stor_base) return -ENOMEM; @@ -72,7 +72,7 @@ int kvm_s390_pv_create_cpu(struct kvm_vcpu *vcpu, u16 *rc, u16 *rrc) uvcb.stor_origin = (u64)vcpu->arch.pv.stor_base; /* Alloc Secure Instruction Data Area Designation */ - vcpu->arch.sie_block->sidad = __get_free_page(GFP_KERNEL | __GFP_ZERO); + vcpu->arch.sie_block->sidad = __get_free_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO); if (!vcpu->arch.sie_block->sidad) { free_pages(vcpu->arch.pv.stor_base, get_order(uv_info.guest_cpu_stor_len)); @@ -120,7 +120,7 @@ static int kvm_s390_pv_alloc_vm(struct kvm *kvm) struct kvm_memory_slot *memslot; kvm->arch.pv.stor_var = NULL; - kvm->arch.pv.stor_base = __get_free_pages(GFP_KERNEL, get_order(base)); + kvm->arch.pv.stor_base = __get_free_pages(GFP_KERNEL_ACCOUNT, get_order(base)); if (!kvm->arch.pv.stor_base) return -ENOMEM; diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c index 4f3cbf6003a9..c5d0a58b2c29 100644 --- a/arch/s390/kvm/vsie.c +++ b/arch/s390/kvm/vsie.c @@ -1234,7 +1234,7 @@ static struct vsie_page *get_vsie_page(struct kvm *kvm, unsigned long addr) mutex_lock(&kvm->arch.vsie.mutex); if (kvm->arch.vsie.page_count < nr_vcpus) { - page = alloc_page(GFP_KERNEL | __GFP_ZERO | GFP_DMA); + page = alloc_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO | GFP_DMA); if (!page) { mutex_unlock(&kvm->arch.vsie.mutex); return ERR_PTR(-ENOMEM); @@ -1336,7 +1336,7 @@ out_put: void kvm_s390_vsie_init(struct kvm *kvm) { mutex_init(&kvm->arch.vsie.mutex); - INIT_RADIX_TREE(&kvm->arch.vsie.addr_to_page, GFP_KERNEL); + INIT_RADIX_TREE(&kvm->arch.vsie.addr_to_page, GFP_KERNEL_ACCOUNT); } /* Destroy the vsie data structures. To be called when a vm is destroyed. */ diff --git a/arch/s390/lib/delay.c b/arch/s390/lib/delay.c index 8c0c68e7770e..f289afeb3f31 100644 --- a/arch/s390/lib/delay.c +++ b/arch/s390/lib/delay.c @@ -13,6 +13,7 @@ #include <linux/export.h> #include <linux/irqflags.h> #include <linux/interrupt.h> +#include <linux/jump_label.h> #include <linux/irq.h> #include <asm/vtimer.h> #include <asm/div64.h> @@ -31,100 +32,25 @@ void __delay(unsigned long loops) } EXPORT_SYMBOL(__delay); -static void __udelay_disabled(unsigned long long usecs) +static void delay_loop(unsigned long delta) { - unsigned long cr0, cr0_new, psw_mask; - struct s390_idle_data idle; - u64 end; + unsigned long end; - end = get_tod_clock() + (usecs << 12); - __ctl_store(cr0, 0, 0); - cr0_new = cr0 & ~CR0_IRQ_SUBCLASS_MASK; - cr0_new |= (1UL << (63 - 52)); /* enable clock comparator irq */ - __ctl_load(cr0_new, 0, 0); - psw_mask = __extract_psw() | PSW_MASK_EXT | PSW_MASK_WAIT; - set_clock_comparator(end); - set_cpu_flag(CIF_IGNORE_IRQ); - psw_idle(&idle, psw_mask); - trace_hardirqs_off(); - clear_cpu_flag(CIF_IGNORE_IRQ); - set_clock_comparator(S390_lowcore.clock_comparator); - __ctl_load(cr0, 0, 0); -} - -static void __udelay_enabled(unsigned long long usecs) -{ - u64 clock_saved, end; - - end = get_tod_clock_fast() + (usecs << 12); - do { - clock_saved = 0; - if (tod_after(S390_lowcore.clock_comparator, end)) { - clock_saved = local_tick_disable(); - set_clock_comparator(end); - } - enabled_wait(); - if (clock_saved) - local_tick_enable(clock_saved); - } while (get_tod_clock_fast() < end); + end = get_tod_clock_monotonic() + delta; + while (!tod_after(get_tod_clock_monotonic(), end)) + cpu_relax(); } -/* - * Waits for 'usecs' microseconds using the TOD clock comparator. - */ -void __udelay(unsigned long long usecs) +void __udelay(unsigned long usecs) { - unsigned long flags; - - preempt_disable(); - local_irq_save(flags); - if (in_irq()) { - __udelay_disabled(usecs); - goto out; - } - if (in_softirq()) { - if (raw_irqs_disabled_flags(flags)) - __udelay_disabled(usecs); - else - __udelay_enabled(usecs); - goto out; - } - if (raw_irqs_disabled_flags(flags)) { - local_bh_disable(); - __udelay_disabled(usecs); - _local_bh_enable(); - goto out; - } - __udelay_enabled(usecs); -out: - local_irq_restore(flags); - preempt_enable(); + delay_loop(usecs << 12); } EXPORT_SYMBOL(__udelay); -/* - * Simple udelay variant. To be used on startup and reboot - * when the interrupt handler isn't working. - */ -void udelay_simple(unsigned long long usecs) -{ - u64 end; - - end = get_tod_clock_fast() + (usecs << 12); - while (get_tod_clock_fast() < end) - cpu_relax(); -} - -void __ndelay(unsigned long long nsecs) +void __ndelay(unsigned long nsecs) { - u64 end; - nsecs <<= 9; do_div(nsecs, 125); - end = get_tod_clock_fast() + nsecs; - if (nsecs & ~0xfffUL) - __udelay(nsecs >> 12); - while (get_tod_clock_fast() < end) - barrier(); + delay_loop(nsecs); } EXPORT_SYMBOL(__ndelay); diff --git a/arch/s390/lib/test_unwind.c b/arch/s390/lib/test_unwind.c index 7c988994931f..dcd8946255be 100644 --- a/arch/s390/lib/test_unwind.c +++ b/arch/s390/lib/test_unwind.c @@ -9,12 +9,12 @@ #include <linux/kallsyms.h> #include <linux/kthread.h> #include <linux/module.h> +#include <linux/timer.h> #include <linux/slab.h> #include <linux/string.h> #include <linux/kprobes.h> #include <linux/wait.h> #include <asm/irq.h> -#include <asm/delay.h> #define BT_BUF_SIZE (PAGE_SIZE * 4) @@ -205,12 +205,15 @@ static noinline int unwindme_func3(struct unwindme *u) /* This function must appear in the backtrace. */ static noinline int unwindme_func2(struct unwindme *u) { + unsigned long flags; int rc; if (u->flags & UWM_SWITCH_STACK) { - preempt_disable(); + local_irq_save(flags); + local_mcck_disable(); rc = CALL_ON_STACK(unwindme_func3, S390_lowcore.nodat_stack, 1, u); - preempt_enable(); + local_mcck_enable(); + local_irq_restore(flags); return rc; } else { return unwindme_func3(u); @@ -223,31 +226,27 @@ static noinline int unwindme_func1(void *u) return unwindme_func2((struct unwindme *)u); } -static void unwindme_irq_handler(struct ext_code ext_code, - unsigned int param32, - unsigned long param64) +static void unwindme_timer_fn(struct timer_list *unused) { struct unwindme *u = READ_ONCE(unwindme); - if (u && u->task == current) { + if (u) { unwindme = NULL; u->task = NULL; u->ret = unwindme_func1(u); + complete(&u->task_ready); } } +static struct timer_list unwind_timer; + static int test_unwind_irq(struct unwindme *u) { - preempt_disable(); - if (register_external_irq(EXT_IRQ_CLK_COMP, unwindme_irq_handler)) { - pr_info("Couldn't register external interrupt handler"); - return -1; - } - u->task = current; unwindme = u; - udelay(1); - unregister_external_irq(EXT_IRQ_CLK_COMP, unwindme_irq_handler); - preempt_enable(); + init_completion(&u->task_ready); + timer_setup(&unwind_timer, unwindme_timer_fn, 0); + mod_timer(&unwind_timer, jiffies + 1); + wait_for_completion(&u->task_ready); return u->ret; } diff --git a/arch/s390/lib/uaccess.c b/arch/s390/lib/uaccess.c index 0267405ab7c6..e8f642446fed 100644 --- a/arch/s390/lib/uaccess.c +++ b/arch/s390/lib/uaccess.c @@ -16,6 +16,22 @@ #include <asm/mmu_context.h> #include <asm/facility.h> +#ifdef CONFIG_DEBUG_USER_ASCE +void debug_user_asce(void) +{ + unsigned long cr1, cr7; + + __ctl_store(cr1, 1, 1); + __ctl_store(cr7, 7, 7); + if (cr1 == S390_lowcore.kernel_asce && cr7 == S390_lowcore.user_asce) + return; + panic("incorrect ASCE on kernel exit\n" + "cr1: %016lx cr7: %016lx\n" + "kernel: %016llx user: %016llx\n", + cr1, cr7, S390_lowcore.kernel_asce, S390_lowcore.user_asce); +} +#endif /*CONFIG_DEBUG_USER_ASCE */ + #ifndef CONFIG_HAVE_MARCH_Z10_FEATURES static DEFINE_STATIC_KEY_FALSE(have_mvcos); @@ -40,71 +56,10 @@ static inline int copy_with_mvcos(void) } #endif -void set_fs(mm_segment_t fs) -{ - current->thread.mm_segment = fs; - if (fs == USER_DS) { - __ctl_load(S390_lowcore.user_asce, 1, 1); - clear_cpu_flag(CIF_ASCE_PRIMARY); - } else { - __ctl_load(S390_lowcore.kernel_asce, 1, 1); - set_cpu_flag(CIF_ASCE_PRIMARY); - } - if (fs & 1) { - if (fs == USER_DS_SACF) - __ctl_load(S390_lowcore.user_asce, 7, 7); - else - __ctl_load(S390_lowcore.kernel_asce, 7, 7); - set_cpu_flag(CIF_ASCE_SECONDARY); - } -} -EXPORT_SYMBOL(set_fs); - -mm_segment_t enable_sacf_uaccess(void) -{ - mm_segment_t old_fs; - unsigned long asce, cr; - unsigned long flags; - - old_fs = current->thread.mm_segment; - if (old_fs & 1) - return old_fs; - /* protect against a concurrent page table upgrade */ - local_irq_save(flags); - current->thread.mm_segment |= 1; - asce = S390_lowcore.kernel_asce; - if (likely(old_fs == USER_DS)) { - __ctl_store(cr, 1, 1); - if (cr != S390_lowcore.kernel_asce) { - __ctl_load(S390_lowcore.kernel_asce, 1, 1); - set_cpu_flag(CIF_ASCE_PRIMARY); - } - asce = S390_lowcore.user_asce; - } - __ctl_store(cr, 7, 7); - if (cr != asce) { - __ctl_load(asce, 7, 7); - set_cpu_flag(CIF_ASCE_SECONDARY); - } - local_irq_restore(flags); - return old_fs; -} -EXPORT_SYMBOL(enable_sacf_uaccess); - -void disable_sacf_uaccess(mm_segment_t old_fs) -{ - current->thread.mm_segment = old_fs; - if (old_fs == USER_DS && test_facility(27)) { - __ctl_load(S390_lowcore.user_asce, 1, 1); - clear_cpu_flag(CIF_ASCE_PRIMARY); - } -} -EXPORT_SYMBOL(disable_sacf_uaccess); - static inline unsigned long copy_from_user_mvcos(void *x, const void __user *ptr, unsigned long size) { - register unsigned long reg0 asm("0") = 0x01UL; + register unsigned long reg0 asm("0") = 0x81UL; unsigned long tmp1, tmp2; tmp1 = -4096UL; @@ -135,9 +90,7 @@ static inline unsigned long copy_from_user_mvcp(void *x, const void __user *ptr, unsigned long size) { unsigned long tmp1, tmp2; - mm_segment_t old_fs; - old_fs = enable_sacf_uaccess(); tmp1 = -256UL; asm volatile( " sacf 0\n" @@ -164,7 +117,6 @@ static inline unsigned long copy_from_user_mvcp(void *x, const void __user *ptr, EX_TABLE(7b,3b) EX_TABLE(8b,3b) EX_TABLE(9b,6b) : "+a" (size), "+a" (ptr), "+a" (x), "+a" (tmp1), "=a" (tmp2) : : "cc", "memory"); - disable_sacf_uaccess(old_fs); return size; } @@ -179,7 +131,7 @@ EXPORT_SYMBOL(raw_copy_from_user); static inline unsigned long copy_to_user_mvcos(void __user *ptr, const void *x, unsigned long size) { - register unsigned long reg0 asm("0") = 0x010000UL; + register unsigned long reg0 asm("0") = 0x810000UL; unsigned long tmp1, tmp2; tmp1 = -4096UL; @@ -210,9 +162,7 @@ static inline unsigned long copy_to_user_mvcs(void __user *ptr, const void *x, unsigned long size) { unsigned long tmp1, tmp2; - mm_segment_t old_fs; - old_fs = enable_sacf_uaccess(); tmp1 = -256UL; asm volatile( " sacf 0\n" @@ -239,7 +189,6 @@ static inline unsigned long copy_to_user_mvcs(void __user *ptr, const void *x, EX_TABLE(7b,3b) EX_TABLE(8b,3b) EX_TABLE(9b,6b) : "+a" (size), "+a" (ptr), "+a" (x), "+a" (tmp1), "=a" (tmp2) : : "cc", "memory"); - disable_sacf_uaccess(old_fs); return size; } @@ -254,7 +203,7 @@ EXPORT_SYMBOL(raw_copy_to_user); static inline unsigned long copy_in_user_mvcos(void __user *to, const void __user *from, unsigned long size) { - register unsigned long reg0 asm("0") = 0x010001UL; + register unsigned long reg0 asm("0") = 0x810081UL; unsigned long tmp1, tmp2; tmp1 = -4096UL; @@ -277,10 +226,8 @@ static inline unsigned long copy_in_user_mvcos(void __user *to, const void __use static inline unsigned long copy_in_user_mvc(void __user *to, const void __user *from, unsigned long size) { - mm_segment_t old_fs; unsigned long tmp1; - old_fs = enable_sacf_uaccess(); asm volatile( " sacf 256\n" " aghi %0,-1\n" @@ -304,7 +251,6 @@ static inline unsigned long copy_in_user_mvc(void __user *to, const void __user EX_TABLE(1b,6b) EX_TABLE(2b,0b) EX_TABLE(4b,0b) : "+a" (size), "+a" (to), "+a" (from), "=a" (tmp1) : : "cc", "memory"); - disable_sacf_uaccess(old_fs); return size; } @@ -318,7 +264,7 @@ EXPORT_SYMBOL(raw_copy_in_user); static inline unsigned long clear_user_mvcos(void __user *to, unsigned long size) { - register unsigned long reg0 asm("0") = 0x010000UL; + register unsigned long reg0 asm("0") = 0x810000UL; unsigned long tmp1, tmp2; tmp1 = -4096UL; @@ -346,10 +292,8 @@ static inline unsigned long clear_user_mvcos(void __user *to, unsigned long size static inline unsigned long clear_user_xc(void __user *to, unsigned long size) { - mm_segment_t old_fs; unsigned long tmp1, tmp2; - old_fs = enable_sacf_uaccess(); asm volatile( " sacf 256\n" " aghi %0,-1\n" @@ -378,7 +322,6 @@ static inline unsigned long clear_user_xc(void __user *to, unsigned long size) EX_TABLE(1b,6b) EX_TABLE(2b,0b) EX_TABLE(4b,0b) : "+a" (size), "+a" (to), "=a" (tmp1), "=a" (tmp2) : : "cc", "memory"); - disable_sacf_uaccess(old_fs); return size; } @@ -414,15 +357,9 @@ static inline unsigned long strnlen_user_srst(const char __user *src, unsigned long __strnlen_user(const char __user *src, unsigned long size) { - mm_segment_t old_fs; - unsigned long len; - if (unlikely(!size)) return 0; - old_fs = enable_sacf_uaccess(); - len = strnlen_user_srst(src, size); - disable_sacf_uaccess(old_fs); - return len; + return strnlen_user_srst(src, size); } EXPORT_SYMBOL(__strnlen_user); diff --git a/arch/s390/mm/dump_pagetables.c b/arch/s390/mm/dump_pagetables.c index 8f9ff7e7187d..e40a30647d99 100644 --- a/arch/s390/mm/dump_pagetables.c +++ b/arch/s390/mm/dump_pagetables.c @@ -255,7 +255,7 @@ static int pt_dump_init(void) */ max_addr = (S390_lowcore.kernel_asce & _REGION_ENTRY_TYPE_MASK) >> 2; max_addr = 1UL << (max_addr * 11 + 31); - address_markers[IDENTITY_AFTER_END_NR].start_address = memory_end; + address_markers[IDENTITY_AFTER_END_NR].start_address = ident_map_size; address_markers[MODULES_NR].start_address = MODULES_VADDR; address_markers[MODULES_END_NR].start_address = MODULES_END; address_markers[VMEMMAP_NR].start_address = (unsigned long) vmemmap; diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c index 996884dcc9fd..b8210103de14 100644 --- a/arch/s390/mm/fault.c +++ b/arch/s390/mm/fault.c @@ -53,7 +53,6 @@ enum fault_type { KERNEL_FAULT, USER_FAULT, - VDSO_FAULT, GMAP_FAULT, }; @@ -77,22 +76,16 @@ static enum fault_type get_fault_type(struct pt_regs *regs) trans_exc_code = regs->int_parm_long & 3; if (likely(trans_exc_code == 0)) { /* primary space exception */ - if (IS_ENABLED(CONFIG_PGSTE) && - test_pt_regs_flag(regs, PIF_GUEST_FAULT)) - return GMAP_FAULT; - if (current->thread.mm_segment == USER_DS) + if (user_mode(regs)) return USER_FAULT; - return KERNEL_FAULT; - } - if (trans_exc_code == 2) { - /* secondary space exception */ - if (current->thread.mm_segment & 1) { - if (current->thread.mm_segment == USER_DS_SACF) - return USER_FAULT; + if (!IS_ENABLED(CONFIG_PGSTE)) return KERNEL_FAULT; - } - return VDSO_FAULT; + if (test_pt_regs_flag(regs, PIF_GUEST_FAULT)) + return GMAP_FAULT; + return KERNEL_FAULT; } + if (trans_exc_code == 2) + return USER_FAULT; if (trans_exc_code == 1) { /* access register mode, not used in the kernel */ return USER_FAULT; @@ -188,10 +181,6 @@ static void dump_fault_info(struct pt_regs *regs) asce = S390_lowcore.user_asce; pr_cont("user "); break; - case VDSO_FAULT: - asce = S390_lowcore.vdso_asce; - pr_cont("vdso "); - break; case GMAP_FAULT: asce = ((struct gmap *) S390_lowcore.gmap)->asce; pr_cont("gmap "); @@ -414,9 +403,6 @@ static inline vm_fault_t do_exception(struct pt_regs *regs, int access) switch (type) { case KERNEL_FAULT: goto out; - case VDSO_FAULT: - fault = VM_FAULT_BADMAP; - goto out; case USER_FAULT: case GMAP_FAULT: if (faulthandler_disabled() || !mm) @@ -834,7 +820,6 @@ void do_secure_storage_access(struct pt_regs *regs) if (rc) BUG(); break; - case VDSO_FAULT: case GMAP_FAULT: default: do_fault_error(regs, VM_READ | VM_WRITE, VM_FAULT_BADMAP); diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c index 64795d034926..9bb2c7512cd5 100644 --- a/arch/s390/mm/gmap.c +++ b/arch/s390/mm/gmap.c @@ -2,7 +2,7 @@ /* * KVM guest address space mapping code * - * Copyright IBM Corp. 2007, 2016, 2018 + * Copyright IBM Corp. 2007, 2020 * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com> * David Hildenbrand <david@redhat.com> * Janosch Frank <frankja@linux.vnet.ibm.com> @@ -56,19 +56,19 @@ static struct gmap *gmap_alloc(unsigned long limit) atype = _ASCE_TYPE_REGION1; etype = _REGION1_ENTRY_EMPTY; } - gmap = kzalloc(sizeof(struct gmap), GFP_KERNEL); + gmap = kzalloc(sizeof(struct gmap), GFP_KERNEL_ACCOUNT); if (!gmap) goto out; INIT_LIST_HEAD(&gmap->crst_list); INIT_LIST_HEAD(&gmap->children); INIT_LIST_HEAD(&gmap->pt_list); - INIT_RADIX_TREE(&gmap->guest_to_host, GFP_KERNEL); - INIT_RADIX_TREE(&gmap->host_to_guest, GFP_ATOMIC); - INIT_RADIX_TREE(&gmap->host_to_rmap, GFP_ATOMIC); + INIT_RADIX_TREE(&gmap->guest_to_host, GFP_KERNEL_ACCOUNT); + INIT_RADIX_TREE(&gmap->host_to_guest, GFP_ATOMIC | __GFP_ACCOUNT); + INIT_RADIX_TREE(&gmap->host_to_rmap, GFP_ATOMIC | __GFP_ACCOUNT); spin_lock_init(&gmap->guest_table_lock); spin_lock_init(&gmap->shadow_lock); refcount_set(&gmap->ref_count, 1); - page = alloc_pages(GFP_KERNEL, CRST_ALLOC_ORDER); + page = alloc_pages(GFP_KERNEL_ACCOUNT, CRST_ALLOC_ORDER); if (!page) goto out_free; page->index = 0; @@ -309,7 +309,7 @@ static int gmap_alloc_table(struct gmap *gmap, unsigned long *table, unsigned long *new; /* since we dont free the gmap table until gmap_free we can unlock */ - page = alloc_pages(GFP_KERNEL, CRST_ALLOC_ORDER); + page = alloc_pages(GFP_KERNEL_ACCOUNT, CRST_ALLOC_ORDER); if (!page) return -ENOMEM; new = (unsigned long *) page_to_phys(page); @@ -594,7 +594,7 @@ int __gmap_link(struct gmap *gmap, unsigned long gaddr, unsigned long vmaddr) if (pmd_large(*pmd) && !gmap->mm->context.allow_gmap_hpage_1m) return -EFAULT; /* Link gmap segment table entry location to page table. */ - rc = radix_tree_preload(GFP_KERNEL); + rc = radix_tree_preload(GFP_KERNEL_ACCOUNT); if (rc) return rc; ptl = pmd_lock(mm, pmd); @@ -1218,11 +1218,11 @@ static int gmap_protect_rmap(struct gmap *sg, unsigned long raddr, vmaddr = __gmap_translate(parent, paddr); if (IS_ERR_VALUE(vmaddr)) return vmaddr; - rmap = kzalloc(sizeof(*rmap), GFP_KERNEL); + rmap = kzalloc(sizeof(*rmap), GFP_KERNEL_ACCOUNT); if (!rmap) return -ENOMEM; rmap->raddr = raddr; - rc = radix_tree_preload(GFP_KERNEL); + rc = radix_tree_preload(GFP_KERNEL_ACCOUNT); if (rc) { kfree(rmap); return rc; @@ -1741,7 +1741,7 @@ int gmap_shadow_r2t(struct gmap *sg, unsigned long saddr, unsigned long r2t, BUG_ON(!gmap_is_shadow(sg)); /* Allocate a shadow region second table */ - page = alloc_pages(GFP_KERNEL, CRST_ALLOC_ORDER); + page = alloc_pages(GFP_KERNEL_ACCOUNT, CRST_ALLOC_ORDER); if (!page) return -ENOMEM; page->index = r2t & _REGION_ENTRY_ORIGIN; @@ -1825,7 +1825,7 @@ int gmap_shadow_r3t(struct gmap *sg, unsigned long saddr, unsigned long r3t, BUG_ON(!gmap_is_shadow(sg)); /* Allocate a shadow region second table */ - page = alloc_pages(GFP_KERNEL, CRST_ALLOC_ORDER); + page = alloc_pages(GFP_KERNEL_ACCOUNT, CRST_ALLOC_ORDER); if (!page) return -ENOMEM; page->index = r3t & _REGION_ENTRY_ORIGIN; @@ -1909,7 +1909,7 @@ int gmap_shadow_sgt(struct gmap *sg, unsigned long saddr, unsigned long sgt, BUG_ON(!gmap_is_shadow(sg) || (sgt & _REGION3_ENTRY_LARGE)); /* Allocate a shadow segment table */ - page = alloc_pages(GFP_KERNEL, CRST_ALLOC_ORDER); + page = alloc_pages(GFP_KERNEL_ACCOUNT, CRST_ALLOC_ORDER); if (!page) return -ENOMEM; page->index = sgt & _REGION_ENTRY_ORIGIN; @@ -2116,7 +2116,7 @@ int gmap_shadow_page(struct gmap *sg, unsigned long saddr, pte_t pte) parent = sg->parent; prot = (pte_val(pte) & _PAGE_PROTECT) ? PROT_READ : PROT_WRITE; - rmap = kzalloc(sizeof(*rmap), GFP_KERNEL); + rmap = kzalloc(sizeof(*rmap), GFP_KERNEL_ACCOUNT); if (!rmap) return -ENOMEM; rmap->raddr = (saddr & PAGE_MASK) | _SHADOW_RMAP_PGTABLE; @@ -2128,7 +2128,7 @@ int gmap_shadow_page(struct gmap *sg, unsigned long saddr, pte_t pte) rc = vmaddr; break; } - rc = radix_tree_preload(GFP_KERNEL); + rc = radix_tree_preload(GFP_KERNEL_ACCOUNT); if (rc) break; rc = -EAGAIN; diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c index 77767850d0d0..73a163065b95 100644 --- a/arch/s390/mm/init.c +++ b/arch/s390/mm/init.c @@ -49,6 +49,9 @@ #include <linux/virtio_config.h> pgd_t swapper_pg_dir[PTRS_PER_PGD] __section(".bss..swapper_pg_dir"); +static pgd_t invalid_pg_dir[PTRS_PER_PGD] __section(".bss..invalid_pg_dir"); + +unsigned long s390_invalid_asce; unsigned long empty_zero_page, zero_page_mask; EXPORT_SYMBOL(empty_zero_page); @@ -92,6 +95,9 @@ void __init paging_init(void) unsigned long pgd_type, asce_bits; psw_t psw; + s390_invalid_asce = (unsigned long)invalid_pg_dir; + s390_invalid_asce |= _ASCE_TYPE_REGION3 | _ASCE_TABLE_LENGTH; + crst_table_init((unsigned long *)invalid_pg_dir, _REGION3_ENTRY_EMPTY); init_mm.pgd = swapper_pg_dir; if (VMALLOC_END > _REGION2_SIZE) { asce_bits = _ASCE_TYPE_REGION2 | _ASCE_TABLE_LENGTH; @@ -102,14 +108,14 @@ void __init paging_init(void) } init_mm.context.asce = (__pa(init_mm.pgd) & PAGE_MASK) | asce_bits; S390_lowcore.kernel_asce = init_mm.context.asce; - S390_lowcore.user_asce = S390_lowcore.kernel_asce; + S390_lowcore.user_asce = s390_invalid_asce; crst_table_init((unsigned long *) init_mm.pgd, pgd_type); vmem_map_init(); - kasan_copy_shadow(init_mm.pgd); + kasan_copy_shadow_mapping(); /* enable virtual mapping in kernel mode */ __ctl_load(S390_lowcore.kernel_asce, 1, 1); - __ctl_load(S390_lowcore.kernel_asce, 7, 7); + __ctl_load(S390_lowcore.user_asce, 7, 7); __ctl_load(S390_lowcore.kernel_asce, 13, 13); psw.mask = __extract_psw(); psw_bits(psw).dat = 1; diff --git a/arch/s390/mm/kasan_init.c b/arch/s390/mm/kasan_init.c index 5646b39c728a..db4d303aaaa9 100644 --- a/arch/s390/mm/kasan_init.c +++ b/arch/s390/mm/kasan_init.c @@ -87,7 +87,7 @@ enum populate_mode { POPULATE_ZERO_SHADOW, POPULATE_SHALLOW }; -static void __init kasan_early_vmemmap_populate(unsigned long address, +static void __init kasan_early_pgtable_populate(unsigned long address, unsigned long end, enum populate_mode mode) { @@ -123,8 +123,7 @@ static void __init kasan_early_vmemmap_populate(unsigned long address, pgd_populate(&init_mm, pg_dir, p4_dir); } - if (IS_ENABLED(CONFIG_KASAN_S390_4_LEVEL_PAGING) && - mode == POPULATE_SHALLOW) { + if (mode == POPULATE_SHALLOW) { address = (address + P4D_SIZE) & P4D_MASK; continue; } @@ -143,12 +142,6 @@ static void __init kasan_early_vmemmap_populate(unsigned long address, p4d_populate(&init_mm, p4_dir, pu_dir); } - if (!IS_ENABLED(CONFIG_KASAN_S390_4_LEVEL_PAGING) && - mode == POPULATE_SHALLOW) { - address = (address + PUD_SIZE) & PUD_MASK; - continue; - } - pu_dir = pud_offset(p4_dir, address); if (pud_none(*pu_dir)) { if (mode == POPULATE_ZERO_SHADOW && @@ -281,7 +274,6 @@ void __init kasan_early_init(void) unsigned long shadow_alloc_size; unsigned long vmax_unlimited; unsigned long initrd_end; - unsigned long asce_type; unsigned long memsize; unsigned long pgt_prot = pgprot_val(PAGE_KERNEL_RO); pte_t pte_z; @@ -297,32 +289,26 @@ void __init kasan_early_init(void) memsize = get_mem_detect_end(); if (!memsize) kasan_early_panic("cannot detect physical memory size\n"); - /* respect mem= cmdline parameter */ - if (memory_end_set && memsize > memory_end) - memsize = memory_end; - if (IS_ENABLED(CONFIG_CRASH_DUMP) && OLDMEM_BASE) - memsize = min(memsize, OLDMEM_SIZE); - memsize = min(memsize, KASAN_SHADOW_START); - - if (IS_ENABLED(CONFIG_KASAN_S390_4_LEVEL_PAGING)) { - /* 4 level paging */ - BUILD_BUG_ON(!IS_ALIGNED(KASAN_SHADOW_START, P4D_SIZE)); - BUILD_BUG_ON(!IS_ALIGNED(KASAN_SHADOW_END, P4D_SIZE)); - crst_table_init((unsigned long *)early_pg_dir, - _REGION2_ENTRY_EMPTY); - untracked_mem_end = kasan_vmax = vmax_unlimited = _REGION1_SIZE; - if (has_uv_sec_stor_limit()) - kasan_vmax = min(vmax_unlimited, uv_info.max_sec_stor_addr); - asce_type = _ASCE_TYPE_REGION2; - } else { - /* 3 level paging */ - BUILD_BUG_ON(!IS_ALIGNED(KASAN_SHADOW_START, PUD_SIZE)); - BUILD_BUG_ON(!IS_ALIGNED(KASAN_SHADOW_END, PUD_SIZE)); - crst_table_init((unsigned long *)early_pg_dir, - _REGION3_ENTRY_EMPTY); - untracked_mem_end = kasan_vmax = vmax_unlimited = _REGION2_SIZE; - asce_type = _ASCE_TYPE_REGION3; - } + /* + * Kasan currently supports standby memory but only if it follows + * online memory (default allocation), i.e. no memory holes. + * - memsize represents end of online memory + * - ident_map_size represents online + standby and memory limits + * accounted. + * Kasan maps "memsize" right away. + * [0, memsize] - as identity mapping + * [__sha(0), __sha(memsize)] - shadow memory for identity mapping + * The rest [memsize, ident_map_size] if memsize < ident_map_size + * could be mapped/unmapped dynamically later during memory hotplug. + */ + memsize = min(memsize, ident_map_size); + + BUILD_BUG_ON(!IS_ALIGNED(KASAN_SHADOW_START, P4D_SIZE)); + BUILD_BUG_ON(!IS_ALIGNED(KASAN_SHADOW_END, P4D_SIZE)); + crst_table_init((unsigned long *)early_pg_dir, _REGION2_ENTRY_EMPTY); + untracked_mem_end = kasan_vmax = vmax_unlimited = _REGION1_SIZE; + if (has_uv_sec_stor_limit()) + kasan_vmax = min(vmax_unlimited, uv_info.max_sec_stor_addr); /* init kasan zero shadow */ crst_table_init((unsigned long *)kasan_early_shadow_p4d, @@ -388,27 +374,25 @@ void __init kasan_early_init(void) * +-----------------+ +- shadow end ---+ */ /* populate kasan shadow (for identity mapping and zero page mapping) */ - kasan_early_vmemmap_populate(__sha(0), __sha(memsize), POPULATE_MAP); + kasan_early_pgtable_populate(__sha(0), __sha(memsize), POPULATE_MAP); if (IS_ENABLED(CONFIG_MODULES)) untracked_mem_end = kasan_vmax - MODULES_LEN; if (IS_ENABLED(CONFIG_KASAN_VMALLOC)) { untracked_mem_end = kasan_vmax - vmalloc_size - MODULES_LEN; /* shallowly populate kasan shadow for vmalloc and modules */ - kasan_early_vmemmap_populate(__sha(untracked_mem_end), - __sha(kasan_vmax), POPULATE_SHALLOW); + kasan_early_pgtable_populate(__sha(untracked_mem_end), __sha(kasan_vmax), + POPULATE_SHALLOW); } /* populate kasan shadow for untracked memory */ - kasan_early_vmemmap_populate(__sha(max_physmem_end), - __sha(untracked_mem_end), + kasan_early_pgtable_populate(__sha(ident_map_size), __sha(untracked_mem_end), POPULATE_ZERO_SHADOW); - kasan_early_vmemmap_populate(__sha(kasan_vmax), - __sha(vmax_unlimited), + kasan_early_pgtable_populate(__sha(kasan_vmax), __sha(vmax_unlimited), POPULATE_ZERO_SHADOW); /* memory allocated for identity mapping structs will be freed later */ pgalloc_freeable = pgalloc_pos; /* populate identity mapping */ - kasan_early_vmemmap_populate(0, memsize, POPULATE_ONE2ONE); - kasan_set_pgd(early_pg_dir, asce_type); + kasan_early_pgtable_populate(0, memsize, POPULATE_ONE2ONE); + kasan_set_pgd(early_pg_dir, _ASCE_TYPE_REGION2); kasan_enable_dat(); /* enable kasan */ init_task.kasan_depth = 0; @@ -416,7 +400,7 @@ void __init kasan_early_init(void) sclp_early_printk("KernelAddressSanitizer initialized\n"); } -void __init kasan_copy_shadow(pgd_t *pg_dir) +void __init kasan_copy_shadow_mapping(void) { /* * At this point we are still running on early pages setup early_pg_dir, @@ -428,24 +412,13 @@ void __init kasan_copy_shadow(pgd_t *pg_dir) pgd_t *pg_dir_dst; p4d_t *p4_dir_src; p4d_t *p4_dir_dst; - pud_t *pu_dir_src; - pud_t *pu_dir_dst; pg_dir_src = pgd_offset_raw(early_pg_dir, KASAN_SHADOW_START); - pg_dir_dst = pgd_offset_raw(pg_dir, KASAN_SHADOW_START); + pg_dir_dst = pgd_offset_raw(init_mm.pgd, KASAN_SHADOW_START); p4_dir_src = p4d_offset(pg_dir_src, KASAN_SHADOW_START); p4_dir_dst = p4d_offset(pg_dir_dst, KASAN_SHADOW_START); - if (!p4d_folded(*p4_dir_src)) { - /* 4 level paging */ - memcpy(p4_dir_dst, p4_dir_src, - (KASAN_SHADOW_SIZE >> P4D_SHIFT) * sizeof(p4d_t)); - return; - } - /* 3 level paging */ - pu_dir_src = pud_offset(p4_dir_src, KASAN_SHADOW_START); - pu_dir_dst = pud_offset(p4_dir_dst, KASAN_SHADOW_START); - memcpy(pu_dir_dst, pu_dir_src, - (KASAN_SHADOW_SIZE >> PUD_SHIFT) * sizeof(pud_t)); + memcpy(p4_dir_dst, p4_dir_src, + (KASAN_SHADOW_SIZE >> P4D_SHIFT) * sizeof(p4d_t)); } void __init kasan_free_early_identity(void) diff --git a/arch/s390/mm/pgalloc.c b/arch/s390/mm/pgalloc.c index 11d2c8395e2a..4e87c819ddea 100644 --- a/arch/s390/mm/pgalloc.c +++ b/arch/s390/mm/pgalloc.c @@ -70,19 +70,10 @@ static void __crst_table_upgrade(void *arg) { struct mm_struct *mm = arg; - /* we must change all active ASCEs to avoid the creation of new TLBs */ + /* change all active ASCEs to avoid the creation of new TLBs */ if (current->active_mm == mm) { S390_lowcore.user_asce = mm->context.asce; - if (current->thread.mm_segment == USER_DS) { - __ctl_load(S390_lowcore.user_asce, 1, 1); - /* Mark user-ASCE present in CR1 */ - clear_cpu_flag(CIF_ASCE_PRIMARY); - } - if (current->thread.mm_segment == USER_DS_SACF) { - __ctl_load(S390_lowcore.user_asce, 7, 7); - /* enable_sacf_uaccess does all or nothing */ - WARN_ON(!test_cpu_flag(CIF_ASCE_SECONDARY)); - } + __ctl_load(S390_lowcore.user_asce, 7, 7); } __tlb_flush_local(); } diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c index b239f2ba93b0..01f3a5f58e64 100644 --- a/arch/s390/mm/vmem.c +++ b/arch/s390/mm/vmem.c @@ -76,20 +76,20 @@ static void vmem_pte_free(unsigned long *table) /* * The unused vmemmap range, which was not yet memset(PAGE_UNUSED) ranges - * from unused_pmd_start to next PMD_SIZE boundary. + * from unused_sub_pmd_start to next PMD_SIZE boundary. */ -static unsigned long unused_pmd_start; +static unsigned long unused_sub_pmd_start; -static void vmemmap_flush_unused_pmd(void) +static void vmemmap_flush_unused_sub_pmd(void) { - if (!unused_pmd_start) + if (!unused_sub_pmd_start) return; - memset(__va(unused_pmd_start), PAGE_UNUSED, - ALIGN(unused_pmd_start, PMD_SIZE) - unused_pmd_start); - unused_pmd_start = 0; + memset(__va(unused_sub_pmd_start), PAGE_UNUSED, + ALIGN(unused_sub_pmd_start, PMD_SIZE) - unused_sub_pmd_start); + unused_sub_pmd_start = 0; } -static void __vmemmap_use_sub_pmd(unsigned long start, unsigned long end) +static void vmemmap_mark_sub_pmd_used(unsigned long start, unsigned long end) { /* * As we expect to add in the same granularity as we remove, it's @@ -106,24 +106,24 @@ static void vmemmap_use_sub_pmd(unsigned long start, unsigned long end) * We only optimize if the new used range directly follows the * previously unused range (esp., when populating consecutive sections). */ - if (unused_pmd_start == start) { - unused_pmd_start = end; - if (likely(IS_ALIGNED(unused_pmd_start, PMD_SIZE))) - unused_pmd_start = 0; + if (unused_sub_pmd_start == start) { + unused_sub_pmd_start = end; + if (likely(IS_ALIGNED(unused_sub_pmd_start, PMD_SIZE))) + unused_sub_pmd_start = 0; return; } - vmemmap_flush_unused_pmd(); - __vmemmap_use_sub_pmd(start, end); + vmemmap_flush_unused_sub_pmd(); + vmemmap_mark_sub_pmd_used(start, end); } static void vmemmap_use_new_sub_pmd(unsigned long start, unsigned long end) { void *page = __va(ALIGN_DOWN(start, PMD_SIZE)); - vmemmap_flush_unused_pmd(); + vmemmap_flush_unused_sub_pmd(); /* Could be our memmap page is filled with PAGE_UNUSED already ... */ - __vmemmap_use_sub_pmd(start, end); + vmemmap_mark_sub_pmd_used(start, end); /* Mark the unused parts of the new memmap page PAGE_UNUSED. */ if (!IS_ALIGNED(start, PMD_SIZE)) @@ -134,7 +134,7 @@ static void vmemmap_use_new_sub_pmd(unsigned long start, unsigned long end) * unused range in the populated PMD. */ if (!IS_ALIGNED(end, PMD_SIZE)) - unused_pmd_start = end; + unused_sub_pmd_start = end; } /* Returns true if the PMD is completely unused and can be freed. */ @@ -142,7 +142,7 @@ static bool vmemmap_unuse_sub_pmd(unsigned long start, unsigned long end) { void *page = __va(ALIGN_DOWN(start, PMD_SIZE)); - vmemmap_flush_unused_pmd(); + vmemmap_flush_unused_sub_pmd(); memset(__va(start), PAGE_UNUSED, end - start); return !memchr_inv(page, PAGE_UNUSED, PMD_SIZE); } @@ -223,7 +223,7 @@ static int __ref modify_pmd_table(pud_t *pud, unsigned long addr, if (!add) { if (pmd_none(*pmd)) continue; - if (pmd_large(*pmd) && !add) { + if (pmd_large(*pmd)) { if (IS_ALIGNED(addr, PMD_SIZE) && IS_ALIGNED(next, PMD_SIZE)) { if (!direct) diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c index 570016ae8bcd..41df8fcfddde 100644 --- a/arch/s390/pci/pci.c +++ b/arch/s390/pci/pci.c @@ -851,8 +851,10 @@ static int __init pci_base_init(void) if (!s390_pci_probe) return 0; - if (!test_facility(69) || !test_facility(71)) + if (!test_facility(69) || !test_facility(71)) { + pr_info("PCI is not supported because CPU facilities 69 or 71 are not available\n"); return 0; + } if (test_facility(153) && !s390_pci_no_mio) { static_branch_enable(&have_mio); diff --git a/arch/s390/pci/pci_irq.c b/arch/s390/pci/pci_irq.c index 75217fb63d7b..9dd5ad1b553d 100644 --- a/arch/s390/pci/pci_irq.c +++ b/arch/s390/pci/pci_irq.c @@ -179,9 +179,7 @@ static void zpci_handle_fallback_irq(void) if (atomic_inc_return(&cpu_data->scheduled) > 1) continue; - cpu_data->csd.func = zpci_handle_remote_irq; - cpu_data->csd.info = &cpu_data->scheduled; - cpu_data->csd.flags = 0; + INIT_CSD(&cpu_data->csd, zpci_handle_remote_irq, &cpu_data->scheduled); smp_call_function_single_async(cpu, &cpu_data->csd); } } diff --git a/arch/s390/pci/pci_mmio.c b/arch/s390/pci/pci_mmio.c index 401cf670a243..18f2d10c3176 100644 --- a/arch/s390/pci/pci_mmio.c +++ b/arch/s390/pci/pci_mmio.c @@ -93,12 +93,10 @@ static inline int __memcpy_toio_inuser(void __iomem *dst, { int size, rc = 0; u8 status = 0; - mm_segment_t old_fs; if (!src) return -EINVAL; - old_fs = enable_sacf_uaccess(); while (n > 0) { size = zpci_get_max_write_size((u64 __force) dst, (u64 __force) src, n, @@ -113,39 +111,20 @@ static inline int __memcpy_toio_inuser(void __iomem *dst, dst += size; n -= size; } - disable_sacf_uaccess(old_fs); if (rc) zpci_err_mmio(rc, status, (__force u64) dst); return rc; } -static long get_pfn(unsigned long user_addr, unsigned long access, - unsigned long *pfn) -{ - struct vm_area_struct *vma; - long ret; - - mmap_read_lock(current->mm); - ret = -EINVAL; - vma = find_vma(current->mm, user_addr); - if (!vma) - goto out; - ret = -EACCES; - if (!(vma->vm_flags & access)) - goto out; - ret = follow_pfn(vma, user_addr, pfn); -out: - mmap_read_unlock(current->mm); - return ret; -} - SYSCALL_DEFINE3(s390_pci_mmio_write, unsigned long, mmio_addr, const void __user *, user_buffer, size_t, length) { u8 local_buf[64]; void __iomem *io_addr; void *buf; - unsigned long pfn; + struct vm_area_struct *vma; + pte_t *ptep; + spinlock_t *ptl; long ret; if (!zpci_is_enabled()) @@ -158,7 +137,7 @@ SYSCALL_DEFINE3(s390_pci_mmio_write, unsigned long, mmio_addr, * We only support write access to MIO capable devices if we are on * a MIO enabled system. Otherwise we would have to check for every * address if it is a special ZPCI_ADDR and would have to do - * a get_pfn() which we don't need for MIO capable devices. Currently + * a pfn lookup which we don't need for MIO capable devices. Currently * ISM devices are the only devices without MIO support and there is no * known need for accessing these from userspace. */ @@ -176,21 +155,37 @@ SYSCALL_DEFINE3(s390_pci_mmio_write, unsigned long, mmio_addr, } else buf = local_buf; - ret = get_pfn(mmio_addr, VM_WRITE, &pfn); + ret = -EFAULT; + if (copy_from_user(buf, user_buffer, length)) + goto out_free; + + mmap_read_lock(current->mm); + ret = -EINVAL; + vma = find_vma(current->mm, mmio_addr); + if (!vma) + goto out_unlock_mmap; + if (!(vma->vm_flags & (VM_IO | VM_PFNMAP))) + goto out_unlock_mmap; + ret = -EACCES; + if (!(vma->vm_flags & VM_WRITE)) + goto out_unlock_mmap; + + ret = follow_pte(vma->vm_mm, mmio_addr, NULL, &ptep, NULL, &ptl); if (ret) - goto out; - io_addr = (void __iomem *)((pfn << PAGE_SHIFT) | + goto out_unlock_mmap; + + io_addr = (void __iomem *)((pte_pfn(*ptep) << PAGE_SHIFT) | (mmio_addr & ~PAGE_MASK)); - ret = -EFAULT; if ((unsigned long) io_addr < ZPCI_IOMAP_ADDR_BASE) - goto out; - - if (copy_from_user(buf, user_buffer, length)) - goto out; + goto out_unlock_pt; ret = zpci_memcpy_toio(io_addr, buf, length); -out: +out_unlock_pt: + pte_unmap_unlock(ptep, ptl); +out_unlock_mmap: + mmap_read_unlock(current->mm); +out_free: if (buf != local_buf) kfree(buf); return ret; @@ -248,9 +243,7 @@ static inline int __memcpy_fromio_inuser(void __user *dst, { int size, rc = 0; u8 status; - mm_segment_t old_fs; - old_fs = enable_sacf_uaccess(); while (n > 0) { size = zpci_get_max_write_size((u64 __force) src, (u64 __force) dst, n, @@ -262,7 +255,6 @@ static inline int __memcpy_fromio_inuser(void __user *dst, dst += size; n -= size; } - disable_sacf_uaccess(old_fs); if (rc) zpci_err_mmio(rc, status, (__force u64) dst); return rc; @@ -274,7 +266,9 @@ SYSCALL_DEFINE3(s390_pci_mmio_read, unsigned long, mmio_addr, u8 local_buf[64]; void __iomem *io_addr; void *buf; - unsigned long pfn; + struct vm_area_struct *vma; + pte_t *ptep; + spinlock_t *ptl; long ret; if (!zpci_is_enabled()) @@ -287,7 +281,7 @@ SYSCALL_DEFINE3(s390_pci_mmio_read, unsigned long, mmio_addr, * We only support read access to MIO capable devices if we are on * a MIO enabled system. Otherwise we would have to check for every * address if it is a special ZPCI_ADDR and would have to do - * a get_pfn() which we don't need for MIO capable devices. Currently + * a pfn lookup which we don't need for MIO capable devices. Currently * ISM devices are the only devices without MIO support and there is no * known need for accessing these from userspace. */ @@ -306,22 +300,38 @@ SYSCALL_DEFINE3(s390_pci_mmio_read, unsigned long, mmio_addr, buf = local_buf; } - ret = get_pfn(mmio_addr, VM_READ, &pfn); + mmap_read_lock(current->mm); + ret = -EINVAL; + vma = find_vma(current->mm, mmio_addr); + if (!vma) + goto out_unlock_mmap; + if (!(vma->vm_flags & (VM_IO | VM_PFNMAP))) + goto out_unlock_mmap; + ret = -EACCES; + if (!(vma->vm_flags & VM_WRITE)) + goto out_unlock_mmap; + + ret = follow_pte(vma->vm_mm, mmio_addr, NULL, &ptep, NULL, &ptl); if (ret) - goto out; - io_addr = (void __iomem *)((pfn << PAGE_SHIFT) | (mmio_addr & ~PAGE_MASK)); + goto out_unlock_mmap; + + io_addr = (void __iomem *)((pte_pfn(*ptep) << PAGE_SHIFT) | + (mmio_addr & ~PAGE_MASK)); if ((unsigned long) io_addr < ZPCI_IOMAP_ADDR_BASE) { ret = -EFAULT; - goto out; + goto out_unlock_pt; } ret = zpci_memcpy_fromio(buf, io_addr, length); - if (ret) - goto out; - if (copy_to_user(user_buffer, buf, length)) + +out_unlock_pt: + pte_unmap_unlock(ptep, ptl); +out_unlock_mmap: + mmap_read_unlock(current->mm); + + if (!ret && copy_to_user(user_buffer, buf, length)) ret = -EFAULT; -out: if (buf != local_buf) kfree(buf); return ret; diff --git a/arch/s390/purgatory/head.S b/arch/s390/purgatory/head.S index 5a10ce34b95d..3d1c31e0cf3d 100644 --- a/arch/s390/purgatory/head.S +++ b/arch/s390/purgatory/head.S @@ -62,14 +62,15 @@ jh 10b .endm -.macro START_NEXT_KERNEL base +.macro START_NEXT_KERNEL base subcode lg %r4,kernel_entry-\base(%r13) lg %r5,load_psw_mask-\base(%r13) ogr %r4,%r5 stg %r4,0(%r0) xgr %r0,%r0 - diag %r0,%r0,0x308 + lghi %r1,\subcode + diag %r0,%r1,0x308 .endm .text @@ -123,7 +124,7 @@ ENTRY(purgatory_start) je .start_crash_kernel /* start normal kernel */ - START_NEXT_KERNEL .base_crash + START_NEXT_KERNEL .base_crash 0 .return_old_kernel: lmg %r6,%r15,gprregs-.base_crash(%r13) @@ -227,7 +228,7 @@ ENTRY(purgatory_start) MEMCPY %r9,%r10,%r11 /* start crash kernel */ - START_NEXT_KERNEL .base_dst + START_NEXT_KERNEL .base_dst 1 load_psw_mask: diff --git a/arch/s390/purgatory/purgatory.c b/arch/s390/purgatory/purgatory.c index 0a423bcf6746..030efda05dbe 100644 --- a/arch/s390/purgatory/purgatory.c +++ b/arch/s390/purgatory/purgatory.c @@ -9,7 +9,7 @@ #include <linux/kexec.h> #include <linux/string.h> -#include <crypto/sha.h> +#include <crypto/sha2.h> #include <asm/purgatory.h> int verify_sha256_digest(void) |