summaryrefslogtreecommitdiff
path: root/arch/s390
diff options
context:
space:
mode:
Diffstat (limited to 'arch/s390')
-rw-r--r--arch/s390/Kconfig43
-rw-r--r--arch/s390/Makefile11
-rw-r--r--arch/s390/Makefile.postlink6
-rw-r--r--arch/s390/appldata/appldata_base.c2
-rw-r--r--arch/s390/boot/als.c10
-rw-r--r--arch/s390/boot/boot.h27
-rw-r--r--arch/s390/boot/decompressor.c12
-rw-r--r--arch/s390/boot/ipl_parm.c20
-rw-r--r--arch/s390/boot/ipl_report.c3
-rw-r--r--arch/s390/boot/kaslr.c4
-rw-r--r--arch/s390/boot/pgm_check_info.c53
-rw-r--r--arch/s390/boot/physmem_info.c103
-rw-r--r--arch/s390/boot/printk.c224
-rw-r--r--arch/s390/boot/startup.c64
-rw-r--r--arch/s390/boot/vmem.c161
-rw-r--r--arch/s390/configs/debug_defconfig7
-rw-r--r--arch/s390/configs/defconfig6
-rw-r--r--arch/s390/configs/kasan.config2
-rw-r--r--arch/s390/configs/zfcpdump_defconfig1
-rw-r--r--arch/s390/crypto/Kconfig12
-rw-r--r--arch/s390/crypto/Makefile2
-rw-r--r--arch/s390/crypto/crc32-vx.c306
-rw-r--r--arch/s390/include/asm/abs_lowcore.h4
-rw-r--r--arch/s390/include/asm/asm-extable.h14
-rw-r--r--arch/s390/include/asm/asm.h2
-rw-r--r--arch/s390/include/asm/atomic.h68
-rw-r--r--arch/s390/include/asm/atomic_ops.h121
-rw-r--r--arch/s390/include/asm/bitops.h213
-rw-r--r--arch/s390/include/asm/boot_data.h51
-rw-r--r--arch/s390/include/asm/checksum.h2
-rw-r--r--arch/s390/include/asm/css_chars.h2
-rw-r--r--arch/s390/include/asm/debug.h7
-rw-r--r--arch/s390/include/asm/diag.h2
-rw-r--r--arch/s390/include/asm/ebcdic.h16
-rw-r--r--arch/s390/include/asm/fprobe.h10
-rw-r--r--arch/s390/include/asm/fpu-insn.h189
-rw-r--r--arch/s390/include/asm/ftrace.h38
-rw-r--r--arch/s390/include/asm/futex.h107
-rw-r--r--arch/s390/include/asm/gmap.h20
-rw-r--r--arch/s390/include/asm/hugetlb.h23
-rw-r--r--arch/s390/include/asm/kvm_host.h6
-rw-r--r--arch/s390/include/asm/page-states.h3
-rw-r--r--arch/s390/include/asm/page.h4
-rw-r--r--arch/s390/include/asm/pgalloc.h40
-rw-r--r--arch/s390/include/asm/pgtable.h147
-rw-r--r--arch/s390/include/asm/physmem_info.h4
-rw-r--r--arch/s390/include/asm/preempt.h83
-rw-r--r--arch/s390/include/asm/processor.h3
-rw-r--r--arch/s390/include/asm/sclp.h36
-rw-r--r--arch/s390/include/asm/tlb.h12
-rw-r--r--arch/s390/include/asm/uaccess.h548
-rw-r--r--arch/s390/include/asm/uv.h6
-rw-r--r--arch/s390/include/uapi/asm/diag.h32
-rw-r--r--arch/s390/kernel/Makefile3
-rw-r--r--arch/s390/kernel/abs_lowcore.c1
-rw-r--r--arch/s390/kernel/asm-offsets.c6
-rw-r--r--arch/s390/kernel/cpacf.c36
-rw-r--r--arch/s390/kernel/crash_dump.c43
-rw-r--r--arch/s390/kernel/debug.c243
-rw-r--r--arch/s390/kernel/diag/Makefile1
-rw-r--r--arch/s390/kernel/diag/diag.c (renamed from arch/s390/kernel/diag.c)4
-rw-r--r--arch/s390/kernel/diag/diag310.c276
-rw-r--r--arch/s390/kernel/diag/diag324.c224
-rw-r--r--arch/s390/kernel/diag/diag_ioctl.h14
-rw-r--r--arch/s390/kernel/diag/diag_misc.c63
-rw-r--r--arch/s390/kernel/early.c3
-rw-r--r--arch/s390/kernel/entry.S20
-rw-r--r--arch/s390/kernel/entry.h1
-rw-r--r--arch/s390/kernel/ftrace.c43
-rw-r--r--arch/s390/kernel/hiperdispatch.c2
-rw-r--r--arch/s390/kernel/ipl.c144
-rw-r--r--arch/s390/kernel/lgr.c2
-rw-r--r--arch/s390/kernel/mcount.S23
-rw-r--r--arch/s390/kernel/numa.c8
-rw-r--r--arch/s390/kernel/os_info.c1
-rw-r--r--arch/s390/kernel/perf_cpum_cf.c2
-rw-r--r--arch/s390/kernel/perf_cpum_sf.c6
-rw-r--r--arch/s390/kernel/perf_pai_crypto.c2
-rw-r--r--arch/s390/kernel/perf_pai_ext.c2
-rw-r--r--arch/s390/kernel/setup.c90
-rw-r--r--arch/s390/kernel/smp.c11
-rw-r--r--arch/s390/kernel/text_amode31.S3
-rw-r--r--arch/s390/kernel/time.c4
-rw-r--r--arch/s390/kernel/topology.c25
-rw-r--r--arch/s390/kernel/uv.c292
-rw-r--r--arch/s390/kernel/vdso64/Makefile2
-rw-r--r--arch/s390/kernel/vmcore_info.c3
-rw-r--r--arch/s390/kernel/vmlinux.lds.S1
-rw-r--r--arch/s390/kvm/Makefile2
-rw-r--r--arch/s390/kvm/gaccess.c44
-rw-r--r--arch/s390/kvm/gmap-vsie.c142
-rw-r--r--arch/s390/kvm/gmap.c212
-rw-r--r--arch/s390/kvm/gmap.h39
-rw-r--r--arch/s390/kvm/intercept.c7
-rw-r--r--arch/s390/kvm/interrupt.c25
-rw-r--r--arch/s390/kvm/kvm-s390.c237
-rw-r--r--arch/s390/kvm/kvm-s390.h19
-rw-r--r--arch/s390/kvm/pv.c21
-rw-r--r--arch/s390/kvm/vsie.c108
-rw-r--r--arch/s390/lib/Makefile3
-rw-r--r--arch/s390/lib/crc32-glue.c92
-rw-r--r--arch/s390/lib/crc32-vx.h (renamed from arch/s390/crypto/crc32-vx.h)0
-rw-r--r--arch/s390/lib/crc32be-vx.c (renamed from arch/s390/crypto/crc32be-vx.c)0
-rw-r--r--arch/s390/lib/crc32le-vx.c (renamed from arch/s390/crypto/crc32le-vx.c)0
-rw-r--r--arch/s390/lib/mem.S15
-rw-r--r--arch/s390/lib/uaccess.c90
-rw-r--r--arch/s390/lib/xor.c61
-rw-r--r--arch/s390/mm/cmm.c4
-rw-r--r--arch/s390/mm/extable.c30
-rw-r--r--arch/s390/mm/gmap.c681
-rw-r--r--arch/s390/mm/init.c9
-rw-r--r--arch/s390/mm/maccess.c1
-rw-r--r--arch/s390/mm/mmap.c42
-rw-r--r--arch/s390/mm/pageattr.c6
-rw-r--r--arch/s390/mm/pgalloc.c30
-rw-r--r--arch/s390/mm/pgtable.c2
-rw-r--r--arch/s390/mm/vmem.c10
-rw-r--r--arch/s390/pci/Makefile2
-rw-r--r--arch/s390/pci/pci_bus.c21
-rw-r--r--arch/s390/pci/pci_event.c21
-rw-r--r--arch/s390/pci/pci_iov.c56
-rw-r--r--arch/s390/pci/pci_iov.h7
-rw-r--r--arch/s390/pci/pci_report.c158
-rw-r--r--arch/s390/pci/pci_report.h16
-rw-r--r--arch/s390/pci/pci_sysfs.c12
-rw-r--r--arch/s390/purgatory/Makefile6
-rw-r--r--arch/s390/tools/gen_opcode_table.c27
127 files changed, 3983 insertions, 2771 deletions
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index 0077969170e8..9c9ec08d78c7 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -52,13 +52,19 @@ config KASAN_SHADOW_OFFSET
depends on KASAN
default 0x1C000000000000
-config GCC_ASM_FLAG_OUTPUT_BROKEN
+config CC_ASM_FLAG_OUTPUT_BROKEN
def_bool CC_IS_GCC && GCC_VERSION < 140200
help
GCC versions before 14.2.0 may die with an internal
compiler error in some configurations if flag output
operands are used within inline assemblies.
+config CC_HAS_ASM_AOR_FORMAT_FLAGS
+ def_bool !(CC_IS_CLANG && CLANG_VERSION < 190100)
+ help
+ Clang versions before 19.1.0 do not support A,
+ O, and R inline assembly format flags.
+
config S390
def_bool y
#
@@ -72,6 +78,8 @@ config S390
select ARCH_ENABLE_MEMORY_HOTPLUG if SPARSEMEM
select ARCH_ENABLE_MEMORY_HOTREMOVE
select ARCH_ENABLE_SPLIT_PMD_PTLOCK if PGTABLE_LEVELS > 2
+ select ARCH_HAS_CPU_FINALIZE_INIT
+ select ARCH_HAS_CRC32
select ARCH_HAS_CURRENT_STACK_POINTER
select ARCH_HAS_DEBUG_VIRTUAL
select ARCH_HAS_DEBUG_VM_PGTABLE
@@ -183,16 +191,18 @@ config S390
select HAVE_DMA_CONTIGUOUS
select HAVE_DYNAMIC_FTRACE
select HAVE_DYNAMIC_FTRACE_WITH_ARGS
+ select HAVE_FTRACE_REGS_HAVING_PT_REGS
select HAVE_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
select HAVE_DYNAMIC_FTRACE_WITH_REGS
select HAVE_EBPF_JIT if HAVE_MARCH_Z196_FEATURES
select HAVE_EFFICIENT_UNALIGNED_ACCESS
select HAVE_GUP_FAST
select HAVE_FENTRY
+ select HAVE_FTRACE_GRAPH_FUNC
select HAVE_FTRACE_MCOUNT_RECORD
select HAVE_FUNCTION_ARG_ACCESS_API
select HAVE_FUNCTION_ERROR_INJECTION
- select HAVE_FUNCTION_GRAPH_RETVAL
+ select HAVE_FUNCTION_GRAPH_FREGS
select HAVE_FUNCTION_GRAPH_TRACER
select HAVE_FUNCTION_TRACER
select HAVE_GCC_PLUGINS
@@ -233,6 +243,7 @@ config S390
select HAVE_VIRT_CPU_ACCOUNTING_IDLE
select IOMMU_HELPER if PCI
select IOMMU_SUPPORT if PCI
+ select KASAN_VMALLOC if KASAN
select LOCK_MM_AND_FIND_VMA
select MMU_GATHER_MERGE_VMAS
select MMU_GATHER_NO_GATHER
@@ -240,6 +251,7 @@ config S390
select MODULES_USE_ELF_RELA
select NEED_DMA_MAP_STATE if PCI
select NEED_PER_CPU_EMBED_FIRST_CHUNK
+ select NEED_PROC_VMCORE_DEVICE_RAM if PROC_VMCORE
select NEED_SG_DMA_LENGTH if PCI
select OLD_SIGACTION
select OLD_SIGSUSPEND3
@@ -255,6 +267,7 @@ config S390
select USER_STACKTRACE_SUPPORT
select VDSO_GETRANDOM
select VIRT_CPU_ACCOUNTING
+ select VMAP_STACK
select ZONE_DMA
# Note: keep the above list sorted alphabetically
@@ -688,32 +701,6 @@ config MAX_PHYSMEM_BITS
Increasing the number of bits also increases the kernel image size.
By default 46 bits (64TB) are supported.
-config CHECK_STACK
- def_bool y
- depends on !VMAP_STACK
- prompt "Detect kernel stack overflow"
- help
- This option enables the compiler option -mstack-guard and
- -mstack-size if they are available. If the compiler supports them
- it will emit additional code to each function prolog to trigger
- an illegal operation if the kernel stack is about to overflow.
-
- Say N if you are unsure.
-
-config STACK_GUARD
- int "Size of the guard area (128-1024)"
- range 128 1024
- depends on CHECK_STACK
- default "256"
- help
- This allows you to specify the size of the guard area at the lower
- end of the kernel stack. If the kernel stack points into the guard
- area on function entry an illegal operation is triggered. The size
- needs to be a power of 2. Please keep in mind that the size of an
- interrupt frame is 184 bytes for 31 bit and 328 bytes on 64 bit.
- The minimum size for the stack guard should be 256 for 31 bit and
- 512 for 64 bit.
-
endmenu
menu "I/O subsystem"
diff --git a/arch/s390/Makefile b/arch/s390/Makefile
index 7fd57398221e..5fae311203c2 100644
--- a/arch/s390/Makefile
+++ b/arch/s390/Makefile
@@ -22,7 +22,7 @@ KBUILD_AFLAGS_DECOMPRESSOR := $(CLANG_FLAGS) -m64 -D__ASSEMBLY__
ifndef CONFIG_AS_IS_LLVM
KBUILD_AFLAGS_DECOMPRESSOR += $(if $(CONFIG_DEBUG_INFO),$(aflags_dwarf))
endif
-KBUILD_CFLAGS_DECOMPRESSOR := $(CLANG_FLAGS) -m64 -O2 -mpacked-stack
+KBUILD_CFLAGS_DECOMPRESSOR := $(CLANG_FLAGS) -m64 -O2 -mpacked-stack -std=gnu11
KBUILD_CFLAGS_DECOMPRESSOR += -DDISABLE_BRANCH_PROFILING -D__NO_FORTIFY
KBUILD_CFLAGS_DECOMPRESSOR += -D__DECOMPRESSOR
KBUILD_CFLAGS_DECOMPRESSOR += -fno-delete-null-pointer-checks -msoft-float -mbackchain
@@ -72,15 +72,6 @@ cflags-$(CONFIG_FRAME_POINTER) += -fno-optimize-sibling-calls
KBUILD_AFLAGS_DECOMPRESSOR += $(aflags-y)
KBUILD_CFLAGS_DECOMPRESSOR += $(cflags-y)
-ifneq ($(call cc-option,-mstack-size=8192 -mstack-guard=128),)
- CC_FLAGS_CHECK_STACK := -mstack-size=$(STACK_SIZE)
- ifeq ($(call cc-option,-mstack-size=8192),)
- CC_FLAGS_CHECK_STACK += -mstack-guard=$(CONFIG_STACK_GUARD)
- endif
- export CC_FLAGS_CHECK_STACK
- cflags-$(CONFIG_CHECK_STACK) += $(CC_FLAGS_CHECK_STACK)
-endif
-
ifdef CONFIG_EXPOLINE
ifdef CONFIG_EXPOLINE_EXTERN
CC_FLAGS_EXPOLINE := -mindirect-branch=thunk-extern
diff --git a/arch/s390/Makefile.postlink b/arch/s390/Makefile.postlink
index df82f5410769..1ae5478cd6ac 100644
--- a/arch/s390/Makefile.postlink
+++ b/arch/s390/Makefile.postlink
@@ -11,6 +11,7 @@ __archpost:
-include include/config/auto.conf
include $(srctree)/scripts/Kbuild.include
+include $(srctree)/scripts/Makefile.lib
CMD_RELOCS=arch/s390/tools/relocs
OUT_RELOCS = arch/s390/boot
@@ -19,11 +20,6 @@ quiet_cmd_relocs = RELOCS $(OUT_RELOCS)/relocs.S
mkdir -p $(OUT_RELOCS); \
$(CMD_RELOCS) $@ > $(OUT_RELOCS)/relocs.S
-quiet_cmd_strip_relocs = RSTRIP $@
- cmd_strip_relocs = \
- $(OBJCOPY) --remove-section='.rel.*' --remove-section='.rel__*' \
- --remove-section='.rela.*' --remove-section='.rela__*' $@
-
vmlinux: FORCE
$(call cmd,relocs)
$(call cmd,strip_relocs)
diff --git a/arch/s390/appldata/appldata_base.c b/arch/s390/appldata/appldata_base.c
index 91a30e017d65..dd7ba7587dd5 100644
--- a/arch/s390/appldata/appldata_base.c
+++ b/arch/s390/appldata/appldata_base.c
@@ -52,7 +52,7 @@ static int appldata_interval_handler(const struct ctl_table *ctl, int write,
void *buffer, size_t *lenp, loff_t *ppos);
static struct ctl_table_header *appldata_sysctl_header;
-static struct ctl_table appldata_table[] = {
+static const struct ctl_table appldata_table[] = {
{
.procname = "timer",
.mode = S_IRUGO | S_IWUSR,
diff --git a/arch/s390/boot/als.c b/arch/s390/boot/als.c
index 11e0c3d5dbc8..79afb5fa7f1f 100644
--- a/arch/s390/boot/als.c
+++ b/arch/s390/boot/als.c
@@ -46,7 +46,7 @@ void print_missing_facilities(void)
* z/VM adds a four character prefix.
*/
if (strlen(als_str) > 70) {
- boot_printk("%s\n", als_str);
+ boot_emerg("%s\n", als_str);
*als_str = '\0';
}
u16_to_decimal(val_str, i * BITS_PER_LONG + j);
@@ -54,7 +54,7 @@ void print_missing_facilities(void)
first = 0;
}
}
- boot_printk("%s\n", als_str);
+ boot_emerg("%s\n", als_str);
}
static void facility_mismatch(void)
@@ -62,10 +62,10 @@ static void facility_mismatch(void)
struct cpuid id;
get_cpu_id(&id);
- boot_printk("The Linux kernel requires more recent processor hardware\n");
- boot_printk("Detected machine-type number: %4x\n", id.machine);
+ boot_emerg("The Linux kernel requires more recent processor hardware\n");
+ boot_emerg("Detected machine-type number: %4x\n", id.machine);
print_missing_facilities();
- boot_printk("See Principles of Operations for facility bits\n");
+ boot_emerg("See Principles of Operations for facility bits\n");
disabled_wait();
}
diff --git a/arch/s390/boot/boot.h b/arch/s390/boot/boot.h
index 7521a9d75fa2..69f261566a64 100644
--- a/arch/s390/boot/boot.h
+++ b/arch/s390/boot/boot.h
@@ -8,12 +8,12 @@
#ifndef __ASSEMBLY__
+#include <linux/printk.h>
#include <asm/physmem_info.h>
struct machine_info {
unsigned char has_edat1 : 1;
unsigned char has_edat2 : 1;
- unsigned char has_nx : 1;
};
struct vmlinux_info {
@@ -48,13 +48,16 @@ void physmem_set_usable_limit(unsigned long limit);
void physmem_reserve(enum reserved_range_type type, unsigned long addr, unsigned long size);
void physmem_free(enum reserved_range_type type);
/* for continuous/multiple allocations per type */
-unsigned long physmem_alloc_top_down(enum reserved_range_type type, unsigned long size,
- unsigned long align);
+unsigned long physmem_alloc_or_die(enum reserved_range_type type, unsigned long size,
+ unsigned long align);
+unsigned long physmem_alloc(enum reserved_range_type type, unsigned long size,
+ unsigned long align, bool die_on_oom);
/* for single allocations, 1 per type */
unsigned long physmem_alloc_range(enum reserved_range_type type, unsigned long size,
unsigned long align, unsigned long min, unsigned long max,
bool die_on_oom);
unsigned long get_physmem_alloc_pos(void);
+void dump_physmem_reserved(void);
bool ipl_report_certs_intersects(unsigned long addr, unsigned long size,
unsigned long *intersection_start);
bool is_ipl_block_dump(void);
@@ -70,12 +73,28 @@ void print_pgm_check_info(void);
unsigned long randomize_within_range(unsigned long size, unsigned long align,
unsigned long min, unsigned long max);
void setup_vmem(unsigned long kernel_start, unsigned long kernel_end, unsigned long asce_limit);
-void __printf(1, 2) boot_printk(const char *fmt, ...);
+int __printf(1, 2) boot_printk(const char *fmt, ...);
void print_stacktrace(unsigned long sp);
void error(char *m);
int get_random(unsigned long limit, unsigned long *value);
+void boot_rb_dump(void);
+
+#ifndef boot_fmt
+#define boot_fmt(fmt) fmt
+#endif
+
+#define boot_emerg(fmt, ...) boot_printk(KERN_EMERG boot_fmt(fmt), ##__VA_ARGS__)
+#define boot_alert(fmt, ...) boot_printk(KERN_ALERT boot_fmt(fmt), ##__VA_ARGS__)
+#define boot_crit(fmt, ...) boot_printk(KERN_CRIT boot_fmt(fmt), ##__VA_ARGS__)
+#define boot_err(fmt, ...) boot_printk(KERN_ERR boot_fmt(fmt), ##__VA_ARGS__)
+#define boot_warn(fmt, ...) boot_printk(KERN_WARNING boot_fmt(fmt), ##__VA_ARGS__)
+#define boot_notice(fmt, ...) boot_printk(KERN_NOTICE boot_fmt(fmt), ##__VA_ARGS__)
+#define boot_info(fmt, ...) boot_printk(KERN_INFO boot_fmt(fmt), ##__VA_ARGS__)
+#define boot_debug(fmt, ...) boot_printk(KERN_DEBUG boot_fmt(fmt), ##__VA_ARGS__)
extern struct machine_info machine;
+extern int boot_console_loglevel;
+extern bool boot_ignore_loglevel;
/* Symbols defined by linker scripts */
extern const char kernel_version[];
diff --git a/arch/s390/boot/decompressor.c b/arch/s390/boot/decompressor.c
index f478e8e9cbda..03500b9d9fb9 100644
--- a/arch/s390/boot/decompressor.c
+++ b/arch/s390/boot/decompressor.c
@@ -9,6 +9,7 @@
#include <linux/kernel.h>
#include <linux/string.h>
+#include <asm/boot_data.h>
#include <asm/page.h>
#include "decompressor.h"
#include "boot.h"
@@ -63,6 +64,15 @@ static unsigned long free_mem_end_ptr = (unsigned long) _end + BOOT_HEAP_SIZE;
#include "../../../../lib/decompress_unzstd.c"
#endif
+static void decompress_error(char *m)
+{
+ if (bootdebug)
+ boot_rb_dump();
+ boot_emerg("Decompression error: %s\n", m);
+ boot_emerg(" -- System halted\n");
+ disabled_wait();
+}
+
unsigned long mem_safe_offset(void)
{
return ALIGN(free_mem_end_ptr, PAGE_SIZE);
@@ -71,5 +81,5 @@ unsigned long mem_safe_offset(void)
void deploy_kernel(void *output)
{
__decompress(_compressed_start, _compressed_end - _compressed_start,
- NULL, NULL, output, vmlinux.image_size, NULL, error);
+ NULL, NULL, output, vmlinux.image_size, NULL, decompress_error);
}
diff --git a/arch/s390/boot/ipl_parm.c b/arch/s390/boot/ipl_parm.c
index 557462e62cd7..d3731f2983b7 100644
--- a/arch/s390/boot/ipl_parm.c
+++ b/arch/s390/boot/ipl_parm.c
@@ -215,7 +215,7 @@ static void check_cleared_facilities(void)
for (i = 0; i < ARRAY_SIZE(als); i++) {
if ((stfle_fac_list[i] & als[i]) != als[i]) {
- boot_printk("Warning: The Linux kernel requires facilities cleared via command line option\n");
+ boot_emerg("The Linux kernel requires facilities cleared via command line option\n");
print_missing_facilities();
break;
}
@@ -313,5 +313,23 @@ void parse_boot_command_line(void)
#endif
if (!strcmp(param, "relocate_lowcore") && test_facility(193))
relocate_lowcore = 1;
+ if (!strcmp(param, "earlyprintk"))
+ boot_earlyprintk = true;
+ if (!strcmp(param, "debug"))
+ boot_console_loglevel = CONSOLE_LOGLEVEL_DEBUG;
+ if (!strcmp(param, "bootdebug")) {
+ bootdebug = true;
+ if (val)
+ strncpy(bootdebug_filter, val, sizeof(bootdebug_filter) - 1);
+ }
+ if (!strcmp(param, "quiet"))
+ boot_console_loglevel = CONSOLE_LOGLEVEL_QUIET;
+ if (!strcmp(param, "ignore_loglevel"))
+ boot_ignore_loglevel = true;
+ if (!strcmp(param, "loglevel")) {
+ boot_console_loglevel = simple_strtoull(val, NULL, 10);
+ if (boot_console_loglevel < CONSOLE_LOGLEVEL_MIN)
+ boot_console_loglevel = CONSOLE_LOGLEVEL_MIN;
+ }
}
}
diff --git a/arch/s390/boot/ipl_report.c b/arch/s390/boot/ipl_report.c
index d00898852a88..f73cd757a5f7 100644
--- a/arch/s390/boot/ipl_report.c
+++ b/arch/s390/boot/ipl_report.c
@@ -30,7 +30,6 @@ static unsigned long get_cert_comp_list_size(void)
{
struct ipl_rb_certificate_entry *cert;
struct ipl_rb_component_entry *comp;
- size_t size;
/*
* Find the length for the IPL report boot data
@@ -155,7 +154,7 @@ void save_ipl_cert_comp_list(void)
return;
size = get_cert_comp_list_size();
- early_ipl_comp_list_addr = physmem_alloc_top_down(RR_CERT_COMP_LIST, size, sizeof(int));
+ early_ipl_comp_list_addr = physmem_alloc_or_die(RR_CERT_COMP_LIST, size, sizeof(int));
ipl_cert_list_addr = early_ipl_comp_list_addr + early_ipl_comp_list_size;
copy_components_bootdata();
diff --git a/arch/s390/boot/kaslr.c b/arch/s390/boot/kaslr.c
index f864d2bff775..941f4c9e27cc 100644
--- a/arch/s390/boot/kaslr.c
+++ b/arch/s390/boot/kaslr.c
@@ -32,7 +32,7 @@ struct prng_parm {
static int check_prng(void)
{
if (!cpacf_query_func(CPACF_KMC, CPACF_KMC_PRNG)) {
- boot_printk("KASLR disabled: CPU has no PRNG\n");
+ boot_warn("KASLR disabled: CPU has no PRNG\n");
return 0;
}
if (cpacf_query_func(CPACF_PRNO, CPACF_PRNO_TRNG))
@@ -168,7 +168,7 @@ static unsigned long iterate_valid_positions(unsigned long size, unsigned long a
* cannot have chains.
*
* On the other hand, "dynamic" or "repetitive" allocations are done via
- * physmem_alloc_top_down(). These allocations are tightly packed together
+ * physmem_alloc_or_die(). These allocations are tightly packed together
* top down from the end of online memory. physmem_alloc_pos represents
* current position where those allocations start.
*
diff --git a/arch/s390/boot/pgm_check_info.c b/arch/s390/boot/pgm_check_info.c
index 5abe59fb3bc0..633f11600aab 100644
--- a/arch/s390/boot/pgm_check_info.c
+++ b/arch/s390/boot/pgm_check_info.c
@@ -17,13 +17,14 @@ void print_stacktrace(unsigned long sp)
(unsigned long)_stack_end };
bool first = true;
- boot_printk("Call Trace:\n");
+ boot_emerg("Call Trace:\n");
while (!(sp & 0x7) && on_stack(&boot_stack, sp, sizeof(struct stack_frame))) {
struct stack_frame *sf = (struct stack_frame *)sp;
- boot_printk(first ? "(sp:%016lx [<%016lx>] %pS)\n" :
- " sp:%016lx [<%016lx>] %pS\n",
- sp, sf->gprs[8], (void *)sf->gprs[8]);
+ if (first)
+ boot_emerg("(sp:%016lx [<%016lx>] %pS)\n", sp, sf->gprs[8], (void *)sf->gprs[8]);
+ else
+ boot_emerg(" sp:%016lx [<%016lx>] %pS\n", sp, sf->gprs[8], (void *)sf->gprs[8]);
if (sf->back_chain <= sp)
break;
sp = sf->back_chain;
@@ -36,30 +37,30 @@ void print_pgm_check_info(void)
unsigned long *gpregs = (unsigned long *)get_lowcore()->gpregs_save_area;
struct psw_bits *psw = &psw_bits(get_lowcore()->psw_save_area);
- boot_printk("Linux version %s\n", kernel_version);
+ if (bootdebug)
+ boot_rb_dump();
+ boot_emerg("Linux version %s\n", kernel_version);
if (!is_prot_virt_guest() && early_command_line[0])
- boot_printk("Kernel command line: %s\n", early_command_line);
- boot_printk("Kernel fault: interruption code %04x ilc:%x\n",
- get_lowcore()->pgm_code, get_lowcore()->pgm_ilc >> 1);
+ boot_emerg("Kernel command line: %s\n", early_command_line);
+ boot_emerg("Kernel fault: interruption code %04x ilc:%d\n",
+ get_lowcore()->pgm_code, get_lowcore()->pgm_ilc >> 1);
if (kaslr_enabled()) {
- boot_printk("Kernel random base: %lx\n", __kaslr_offset);
- boot_printk("Kernel random base phys: %lx\n", __kaslr_offset_phys);
+ boot_emerg("Kernel random base: %lx\n", __kaslr_offset);
+ boot_emerg("Kernel random base phys: %lx\n", __kaslr_offset_phys);
}
- boot_printk("PSW : %016lx %016lx (%pS)\n",
- get_lowcore()->psw_save_area.mask,
- get_lowcore()->psw_save_area.addr,
- (void *)get_lowcore()->psw_save_area.addr);
- boot_printk(
- " R:%x T:%x IO:%x EX:%x Key:%x M:%x W:%x P:%x AS:%x CC:%x PM:%x RI:%x EA:%x\n",
- psw->per, psw->dat, psw->io, psw->ext, psw->key, psw->mcheck,
- psw->wait, psw->pstate, psw->as, psw->cc, psw->pm, psw->ri,
- psw->eaba);
- boot_printk("GPRS: %016lx %016lx %016lx %016lx\n", gpregs[0], gpregs[1], gpregs[2], gpregs[3]);
- boot_printk(" %016lx %016lx %016lx %016lx\n", gpregs[4], gpregs[5], gpregs[6], gpregs[7]);
- boot_printk(" %016lx %016lx %016lx %016lx\n", gpregs[8], gpregs[9], gpregs[10], gpregs[11]);
- boot_printk(" %016lx %016lx %016lx %016lx\n", gpregs[12], gpregs[13], gpregs[14], gpregs[15]);
+ boot_emerg("PSW : %016lx %016lx (%pS)\n",
+ get_lowcore()->psw_save_area.mask,
+ get_lowcore()->psw_save_area.addr,
+ (void *)get_lowcore()->psw_save_area.addr);
+ boot_emerg(" R:%x T:%x IO:%x EX:%x Key:%x M:%x W:%x P:%x AS:%x CC:%x PM:%x RI:%x EA:%x\n",
+ psw->per, psw->dat, psw->io, psw->ext, psw->key, psw->mcheck,
+ psw->wait, psw->pstate, psw->as, psw->cc, psw->pm, psw->ri, psw->eaba);
+ boot_emerg("GPRS: %016lx %016lx %016lx %016lx\n", gpregs[0], gpregs[1], gpregs[2], gpregs[3]);
+ boot_emerg(" %016lx %016lx %016lx %016lx\n", gpregs[4], gpregs[5], gpregs[6], gpregs[7]);
+ boot_emerg(" %016lx %016lx %016lx %016lx\n", gpregs[8], gpregs[9], gpregs[10], gpregs[11]);
+ boot_emerg(" %016lx %016lx %016lx %016lx\n", gpregs[12], gpregs[13], gpregs[14], gpregs[15]);
print_stacktrace(get_lowcore()->gpregs_save_area[15]);
- boot_printk("Last Breaking-Event-Address:\n");
- boot_printk(" [<%016lx>] %pS\n", (unsigned long)get_lowcore()->pgm_last_break,
- (void *)get_lowcore()->pgm_last_break);
+ boot_emerg("Last Breaking-Event-Address:\n");
+ boot_emerg(" [<%016lx>] %pS\n", (unsigned long)get_lowcore()->pgm_last_break,
+ (void *)get_lowcore()->pgm_last_break);
}
diff --git a/arch/s390/boot/physmem_info.c b/arch/s390/boot/physmem_info.c
index 7617aa2d2f7e..aa096ef68e8c 100644
--- a/arch/s390/boot/physmem_info.c
+++ b/arch/s390/boot/physmem_info.c
@@ -1,4 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
+#define boot_fmt(fmt) "physmem: " fmt
#include <linux/processor.h>
#include <linux/errno.h>
#include <linux/init.h>
@@ -28,7 +29,7 @@ static struct physmem_range *__get_physmem_range_ptr(u32 n)
return &physmem_info.online[n];
if (unlikely(!physmem_info.online_extended)) {
physmem_info.online_extended = (struct physmem_range *)physmem_alloc_range(
- RR_MEM_DETECT_EXTENDED, ENTRIES_EXTENDED_MAX, sizeof(long), 0,
+ RR_MEM_DETECT_EXT, ENTRIES_EXTENDED_MAX, sizeof(long), 0,
physmem_alloc_pos, true);
}
return &physmem_info.online_extended[n - MEM_INLINED_ENTRIES];
@@ -207,11 +208,16 @@ unsigned long detect_max_physmem_end(void)
max_physmem_end = search_mem_end();
physmem_info.info_source = MEM_DETECT_BIN_SEARCH;
}
+ boot_debug("Max physical memory: 0x%016lx (info source: %s)\n", max_physmem_end,
+ get_physmem_info_source());
return max_physmem_end;
}
void detect_physmem_online_ranges(unsigned long max_physmem_end)
{
+ unsigned long start, end;
+ int i;
+
if (!sclp_early_read_storage_info()) {
physmem_info.info_source = MEM_DETECT_SCLP_STOR_INFO;
} else if (physmem_info.info_source == MEM_DETECT_DIAG500_STOR_LIMIT) {
@@ -226,12 +232,16 @@ void detect_physmem_online_ranges(unsigned long max_physmem_end)
} else if (max_physmem_end) {
add_physmem_online_range(0, max_physmem_end);
}
+ boot_debug("Online memory ranges (info source: %s):\n", get_physmem_info_source());
+ for_each_physmem_online_range(i, &start, &end)
+ boot_debug(" online [%d]: 0x%016lx-0x%016lx\n", i, start, end);
}
void physmem_set_usable_limit(unsigned long limit)
{
physmem_info.usable = limit;
physmem_alloc_pos = limit;
+ boot_debug("Usable memory limit: 0x%016lx\n", limit);
}
static void die_oom(unsigned long size, unsigned long align, unsigned long min, unsigned long max)
@@ -241,38 +251,47 @@ static void die_oom(unsigned long size, unsigned long align, unsigned long min,
enum reserved_range_type t;
int i;
- boot_printk("Linux version %s\n", kernel_version);
+ boot_emerg("Linux version %s\n", kernel_version);
if (!is_prot_virt_guest() && early_command_line[0])
- boot_printk("Kernel command line: %s\n", early_command_line);
- boot_printk("Out of memory allocating %lx bytes %lx aligned in range %lx:%lx\n",
- size, align, min, max);
- boot_printk("Reserved memory ranges:\n");
+ boot_emerg("Kernel command line: %s\n", early_command_line);
+ boot_emerg("Out of memory allocating %lu bytes 0x%lx aligned in range %lx:%lx\n",
+ size, align, min, max);
+ boot_emerg("Reserved memory ranges:\n");
for_each_physmem_reserved_range(t, range, &start, &end) {
- boot_printk("%016lx %016lx %s\n", start, end, get_rr_type_name(t));
+ boot_emerg("%016lx %016lx %s\n", start, end, get_rr_type_name(t));
total_reserved_mem += end - start;
}
- boot_printk("Usable online memory ranges (info source: %s [%x]):\n",
- get_physmem_info_source(), physmem_info.info_source);
+ boot_emerg("Usable online memory ranges (info source: %s [%d]):\n",
+ get_physmem_info_source(), physmem_info.info_source);
for_each_physmem_usable_range(i, &start, &end) {
- boot_printk("%016lx %016lx\n", start, end);
+ boot_emerg("%016lx %016lx\n", start, end);
total_mem += end - start;
}
- boot_printk("Usable online memory total: %lx Reserved: %lx Free: %lx\n",
- total_mem, total_reserved_mem,
- total_mem > total_reserved_mem ? total_mem - total_reserved_mem : 0);
+ boot_emerg("Usable online memory total: %lu Reserved: %lu Free: %lu\n",
+ total_mem, total_reserved_mem,
+ total_mem > total_reserved_mem ? total_mem - total_reserved_mem : 0);
print_stacktrace(current_frame_address());
- boot_printk("\n\n -- System halted\n");
+ boot_emerg(" -- System halted\n");
disabled_wait();
}
-void physmem_reserve(enum reserved_range_type type, unsigned long addr, unsigned long size)
+static void _physmem_reserve(enum reserved_range_type type, unsigned long addr, unsigned long size)
{
physmem_info.reserved[type].start = addr;
physmem_info.reserved[type].end = addr + size;
}
+void physmem_reserve(enum reserved_range_type type, unsigned long addr, unsigned long size)
+{
+ _physmem_reserve(type, addr, size);
+ boot_debug("%-14s 0x%016lx-0x%016lx %s\n", "Reserve:", addr, addr + size,
+ get_rr_type_name(type));
+}
+
void physmem_free(enum reserved_range_type type)
{
+ boot_debug("%-14s 0x%016lx-0x%016lx %s\n", "Free:", physmem_info.reserved[type].start,
+ physmem_info.reserved[type].end, get_rr_type_name(type));
physmem_info.reserved[type].start = 0;
physmem_info.reserved[type].end = 0;
}
@@ -339,41 +358,73 @@ unsigned long physmem_alloc_range(enum reserved_range_type type, unsigned long s
max = min(max, physmem_alloc_pos);
addr = __physmem_alloc_range(size, align, min, max, 0, NULL, die_on_oom);
if (addr)
- physmem_reserve(type, addr, size);
+ _physmem_reserve(type, addr, size);
+ boot_debug("%-14s 0x%016lx-0x%016lx %s\n", "Alloc range:", addr, addr + size,
+ get_rr_type_name(type));
return addr;
}
-unsigned long physmem_alloc_top_down(enum reserved_range_type type, unsigned long size,
- unsigned long align)
+unsigned long physmem_alloc(enum reserved_range_type type, unsigned long size,
+ unsigned long align, bool die_on_oom)
{
struct reserved_range *range = &physmem_info.reserved[type];
- struct reserved_range *new_range;
+ struct reserved_range *new_range = NULL;
unsigned int ranges_left;
unsigned long addr;
addr = __physmem_alloc_range(size, align, 0, physmem_alloc_pos, physmem_alloc_ranges,
- &ranges_left, true);
+ &ranges_left, die_on_oom);
+ if (!addr)
+ return 0;
/* if not a consecutive allocation of the same type or first allocation */
if (range->start != addr + size) {
if (range->end) {
- physmem_alloc_pos = __physmem_alloc_range(
- sizeof(struct reserved_range), 0, 0, physmem_alloc_pos,
- physmem_alloc_ranges, &ranges_left, true);
- new_range = (struct reserved_range *)physmem_alloc_pos;
+ addr = __physmem_alloc_range(sizeof(struct reserved_range), 0, 0,
+ physmem_alloc_pos, physmem_alloc_ranges,
+ &ranges_left, true);
+ new_range = (struct reserved_range *)addr;
+ addr = __physmem_alloc_range(size, align, 0, addr, ranges_left,
+ &ranges_left, die_on_oom);
+ if (!addr)
+ return 0;
*new_range = *range;
range->chain = new_range;
- addr = __physmem_alloc_range(size, align, 0, physmem_alloc_pos,
- ranges_left, &ranges_left, true);
}
range->end = addr + size;
}
+ if (type != RR_VMEM) {
+ boot_debug("%-14s 0x%016lx-0x%016lx %-20s align 0x%lx split %d\n", "Alloc topdown:",
+ addr, addr + size, get_rr_type_name(type), align, !!new_range);
+ }
range->start = addr;
physmem_alloc_pos = addr;
physmem_alloc_ranges = ranges_left;
return addr;
}
+unsigned long physmem_alloc_or_die(enum reserved_range_type type, unsigned long size,
+ unsigned long align)
+{
+ return physmem_alloc(type, size, align, true);
+}
+
unsigned long get_physmem_alloc_pos(void)
{
return physmem_alloc_pos;
}
+
+void dump_physmem_reserved(void)
+{
+ struct reserved_range *range;
+ enum reserved_range_type t;
+ unsigned long start, end;
+
+ boot_debug("Reserved memory ranges:\n");
+ for_each_physmem_reserved_range(t, range, &start, &end) {
+ if (end) {
+ boot_debug("%-14s 0x%016lx-0x%016lx @%012lx chain %012lx\n",
+ get_rr_type_name(t), start, end, (unsigned long)range,
+ (unsigned long)range->chain);
+ }
+ }
+}
diff --git a/arch/s390/boot/printk.c b/arch/s390/boot/printk.c
index 35f18f2b936e..b4c66fa667d5 100644
--- a/arch/s390/boot/printk.c
+++ b/arch/s390/boot/printk.c
@@ -5,21 +5,111 @@
#include <linux/ctype.h>
#include <asm/stacktrace.h>
#include <asm/boot_data.h>
+#include <asm/sections.h>
#include <asm/lowcore.h>
#include <asm/setup.h>
#include <asm/sclp.h>
#include <asm/uv.h>
#include "boot.h"
+int boot_console_loglevel = CONFIG_CONSOLE_LOGLEVEL_DEFAULT;
+bool boot_ignore_loglevel;
+char __bootdata(boot_rb)[PAGE_SIZE * 2];
+bool __bootdata(boot_earlyprintk);
+size_t __bootdata(boot_rb_off);
+char __bootdata(bootdebug_filter)[128];
+bool __bootdata(bootdebug);
+
+static void boot_rb_add(const char *str, size_t len)
+{
+ /* leave double '\0' in the end */
+ size_t avail = sizeof(boot_rb) - boot_rb_off - 1;
+
+ /* store strings separated by '\0' */
+ if (len + 1 > avail)
+ boot_rb_off = 0;
+ strcpy(boot_rb + boot_rb_off, str);
+ boot_rb_off += len + 1;
+}
+
+static void print_rb_entry(const char *str)
+{
+ sclp_early_printk(printk_skip_level(str));
+}
+
+static bool debug_messages_printed(void)
+{
+ return boot_earlyprintk && (boot_ignore_loglevel || boot_console_loglevel > LOGLEVEL_DEBUG);
+}
+
+void boot_rb_dump(void)
+{
+ if (debug_messages_printed())
+ return;
+ sclp_early_printk("Boot messages ring buffer:\n");
+ boot_rb_foreach(print_rb_entry);
+}
+
const char hex_asc[] = "0123456789abcdef";
static char *as_hex(char *dst, unsigned long val, int pad)
{
- char *p, *end = p = dst + max(pad, (int)__fls(val | 1) / 4 + 1);
+ char *p = dst + max(pad, (int)__fls(val | 1) / 4 + 1);
- for (*p-- = 0; p >= dst; val >>= 4)
+ for (*p-- = '\0'; p >= dst; val >>= 4)
*p-- = hex_asc[val & 0x0f];
- return end;
+ return dst;
+}
+
+#define MAX_NUMLEN 21
+static char *as_dec(char *buf, unsigned long val, bool is_signed)
+{
+ bool negative = false;
+ char *p = buf + MAX_NUMLEN;
+
+ if (is_signed && (long)val < 0) {
+ val = (val == LONG_MIN ? LONG_MIN : -(long)val);
+ negative = true;
+ }
+
+ *--p = '\0';
+ do {
+ *--p = '0' + (val % 10);
+ val /= 10;
+ } while (val);
+
+ if (negative)
+ *--p = '-';
+ return p;
+}
+
+static ssize_t strpad(char *dst, size_t dst_size, const char *src,
+ int _pad, bool zero_pad, bool decimal)
+{
+ ssize_t len = strlen(src), pad = _pad;
+ char *p = dst;
+
+ if (max(len, abs(pad)) >= dst_size)
+ return -E2BIG;
+
+ if (pad > len) {
+ if (decimal && zero_pad && *src == '-') {
+ *p++ = '-';
+ src++;
+ len--;
+ pad--;
+ }
+ memset(p, zero_pad ? '0' : ' ', pad - len);
+ p += pad - len;
+ }
+ memcpy(p, src, len);
+ p += len;
+ if (pad < 0 && -pad > len) {
+ memset(p, ' ', -pad - len);
+ p += -pad - len;
+ }
+ *p = '\0';
+ return p - dst;
}
static char *symstart(char *p)
@@ -58,35 +148,94 @@ static noinline char *findsym(unsigned long ip, unsigned short *off, unsigned sh
return NULL;
}
-static noinline char *strsym(void *ip)
+#define MAX_SYMLEN 64
+static noinline char *strsym(char *buf, void *ip)
{
- static char buf[64];
unsigned short off;
unsigned short len;
char *p;
p = findsym((unsigned long)ip, &off, &len);
if (p) {
- strncpy(buf, p, sizeof(buf));
+ strncpy(buf, p, MAX_SYMLEN);
/* reserve 15 bytes for offset/len in symbol+0x1234/0x1234 */
- p = buf + strnlen(buf, sizeof(buf) - 15);
+ p = buf + strnlen(buf, MAX_SYMLEN - 15);
strcpy(p, "+0x");
- p = as_hex(p + 3, off, 0);
- strcpy(p, "/0x");
- as_hex(p + 3, len, 0);
+ as_hex(p + 3, off, 0);
+ strcat(p, "/0x");
+ as_hex(p + strlen(p), len, 0);
} else {
as_hex(buf, (unsigned long)ip, 16);
}
return buf;
}
-void boot_printk(const char *fmt, ...)
+static inline int printk_loglevel(const char *buf)
+{
+ if (buf[0] == KERN_SOH_ASCII && buf[1]) {
+ switch (buf[1]) {
+ case '0' ... '7':
+ return buf[1] - '0';
+ }
+ }
+ return MESSAGE_LOGLEVEL_DEFAULT;
+}
+
+static void boot_console_earlyprintk(const char *buf)
+{
+ int level = printk_loglevel(buf);
+
+ /* always print emergency messages */
+ if (level > LOGLEVEL_EMERG && !boot_earlyprintk)
+ return;
+ buf = printk_skip_level(buf);
+ /* print debug messages only when bootdebug is enabled */
+ if (level == LOGLEVEL_DEBUG && (!bootdebug || !bootdebug_filter_match(skip_timestamp(buf))))
+ return;
+ if (boot_ignore_loglevel || level < boot_console_loglevel)
+ sclp_early_printk(buf);
+}
+
+static char *add_timestamp(char *buf)
+{
+#ifdef CONFIG_PRINTK_TIME
+ union tod_clock *boot_clock = (union tod_clock *)&get_lowcore()->boot_clock;
+ unsigned long ns = tod_to_ns(get_tod_clock() - boot_clock->tod);
+ char ts[MAX_NUMLEN];
+
+ *buf++ = '[';
+ buf += strpad(buf, MAX_NUMLEN, as_dec(ts, ns / NSEC_PER_SEC, 0), 5, 0, 0);
+ *buf++ = '.';
+ buf += strpad(buf, MAX_NUMLEN, as_dec(ts, (ns % NSEC_PER_SEC) / NSEC_PER_USEC, 0), 6, 1, 0);
+ *buf++ = ']';
+ *buf++ = ' ';
+#endif
+ return buf;
+}
+
+#define va_arg_len_type(args, lenmod, typemod) \
+ ((lenmod == 'l') ? va_arg(args, typemod long) : \
+ (lenmod == 'h') ? (typemod short)va_arg(args, typemod int) : \
+ (lenmod == 'H') ? (typemod char)va_arg(args, typemod int) : \
+ (lenmod == 'z') ? va_arg(args, typemod long) : \
+ va_arg(args, typemod int))
+
+int boot_printk(const char *fmt, ...)
{
char buf[1024] = { 0 };
char *end = buf + sizeof(buf) - 1; /* make sure buf is 0 terminated */
- unsigned long pad;
- char *p = buf;
+ bool zero_pad, decimal;
+ char *strval, *p = buf;
+ char valbuf[MAX(MAX_SYMLEN, MAX_NUMLEN)];
va_list args;
+ char lenmod;
+ ssize_t len;
+ int pad;
+
+ *p++ = KERN_SOH_ASCII;
+ *p++ = printk_get_level(fmt) ?: '0' + MESSAGE_LOGLEVEL_DEFAULT;
+ p = add_timestamp(p);
+ fmt = printk_skip_level(fmt);
va_start(args, fmt);
for (; p < end && *fmt; fmt++) {
@@ -94,31 +243,56 @@ void boot_printk(const char *fmt, ...)
*p++ = *fmt;
continue;
}
- pad = isdigit(*++fmt) ? simple_strtol(fmt, (char **)&fmt, 10) : 0;
+ if (*++fmt == '%') {
+ *p++ = '%';
+ continue;
+ }
+ zero_pad = (*fmt == '0');
+ pad = simple_strtol(fmt, (char **)&fmt, 10);
+ lenmod = (*fmt == 'h' || *fmt == 'l' || *fmt == 'z') ? *fmt++ : 0;
+ if (lenmod == 'h' && *fmt == 'h') {
+ lenmod = 'H';
+ fmt++;
+ }
+ decimal = false;
switch (*fmt) {
case 's':
- p = buf + strlcat(buf, va_arg(args, char *), sizeof(buf));
+ if (lenmod)
+ goto out;
+ strval = va_arg(args, char *);
+ zero_pad = false;
break;
case 'p':
- if (*++fmt != 'S')
+ if (*++fmt != 'S' || lenmod)
goto out;
- p = buf + strlcat(buf, strsym(va_arg(args, void *)), sizeof(buf));
+ strval = strsym(valbuf, va_arg(args, void *));
+ zero_pad = false;
break;
- case 'l':
- if (*++fmt != 'x' || end - p <= max(sizeof(long) * 2, pad))
- goto out;
- p = as_hex(p, va_arg(args, unsigned long), pad);
+ case 'd':
+ case 'i':
+ strval = as_dec(valbuf, va_arg_len_type(args, lenmod, signed), 1);
+ decimal = true;
+ break;
+ case 'u':
+ strval = as_dec(valbuf, va_arg_len_type(args, lenmod, unsigned), 0);
break;
case 'x':
- if (end - p <= max(sizeof(int) * 2, pad))
- goto out;
- p = as_hex(p, va_arg(args, unsigned int), pad);
+ strval = as_hex(valbuf, va_arg_len_type(args, lenmod, unsigned), 0);
break;
default:
goto out;
}
+ len = strpad(p, end - p, strval, pad, zero_pad, decimal);
+ if (len == -E2BIG)
+ break;
+ p += len;
}
out:
va_end(args);
- sclp_early_printk(buf);
+ len = strlen(buf);
+ if (len) {
+ boot_rb_add(buf, len);
+ boot_console_earlyprintk(buf);
+ }
+ return len;
}
diff --git a/arch/s390/boot/startup.c b/arch/s390/boot/startup.c
index abe6e6c0ab98..9276e0576d0a 100644
--- a/arch/s390/boot/startup.c
+++ b/arch/s390/boot/startup.c
@@ -1,4 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
+#define boot_fmt(fmt) "startup: " fmt
#include <linux/string.h>
#include <linux/elf.h>
#include <asm/page-states.h>
@@ -30,6 +31,9 @@ unsigned long __bootdata_preserved(vmemmap_size);
unsigned long __bootdata_preserved(MODULES_VADDR);
unsigned long __bootdata_preserved(MODULES_END);
unsigned long __bootdata_preserved(max_mappable);
+unsigned long __bootdata_preserved(page_noexec_mask);
+unsigned long __bootdata_preserved(segment_noexec_mask);
+unsigned long __bootdata_preserved(region_noexec_mask);
int __bootdata_preserved(relocate_lowcore);
u64 __bootdata_preserved(stfle_fac_list[16]);
@@ -39,7 +43,8 @@ struct machine_info machine;
void error(char *x)
{
- boot_printk("\n\n%s\n\n -- System halted", x);
+ boot_emerg("%s\n", x);
+ boot_emerg(" -- System halted\n");
disabled_wait();
}
@@ -51,8 +56,14 @@ static void detect_facilities(void)
}
if (test_facility(78))
machine.has_edat2 = 1;
- if (test_facility(130))
- machine.has_nx = 1;
+ page_noexec_mask = -1UL;
+ segment_noexec_mask = -1UL;
+ region_noexec_mask = -1UL;
+ if (!test_facility(130)) {
+ page_noexec_mask &= ~_PAGE_NOEXEC;
+ segment_noexec_mask &= ~_SEGMENT_ENTRY_NOEXEC;
+ region_noexec_mask &= ~_REGION_ENTRY_NOEXEC;
+ }
}
static int cmma_test_essa(void)
@@ -75,7 +86,7 @@ static int cmma_test_essa(void)
: [reg1] "=&d" (reg1),
[reg2] "=&a" (reg2),
[rc] "+&d" (rc),
- [tmp] "=&d" (tmp),
+ [tmp] "+&d" (tmp),
"+Q" (get_lowcore()->program_new_psw),
"=Q" (old)
: [psw_old] "a" (&old),
@@ -134,7 +145,7 @@ static void rescue_initrd(unsigned long min, unsigned long max)
return;
old_addr = addr;
physmem_free(RR_INITRD);
- addr = physmem_alloc_top_down(RR_INITRD, size, 0);
+ addr = physmem_alloc_or_die(RR_INITRD, size, 0);
memmove((void *)addr, (void *)old_addr, size);
}
@@ -213,12 +224,16 @@ static void setup_ident_map_size(unsigned long max_physmem_end)
if (oldmem_data.start) {
__kaslr_enabled = 0;
ident_map_size = min(ident_map_size, oldmem_data.size);
+ boot_debug("kdump memory limit: 0x%016lx\n", oldmem_data.size);
} else if (ipl_block_valid && is_ipl_block_dump()) {
__kaslr_enabled = 0;
- if (!sclp_early_get_hsa_size(&hsa_size) && hsa_size)
+ if (!sclp_early_get_hsa_size(&hsa_size) && hsa_size) {
ident_map_size = min(ident_map_size, hsa_size);
+ boot_debug("Stand-alone dump limit: 0x%016lx\n", hsa_size);
+ }
}
#endif
+ boot_debug("Identity map size: 0x%016lx\n", ident_map_size);
}
#define FIXMAP_SIZE round_up(MEMCPY_REAL_SIZE + ABS_LOWCORE_MAP_SIZE, sizeof(struct lowcore))
@@ -234,6 +249,8 @@ static unsigned long get_vmem_size(unsigned long identity_size,
vsize = round_up(SZ_2G + max_mappable, rte_size) +
round_up(vmemmap_size, rte_size) +
FIXMAP_SIZE + MODULES_LEN + KASLR_LEN;
+ if (IS_ENABLED(CONFIG_KMSAN))
+ vsize += MODULES_LEN * 2;
return size_add(vsize, vmalloc_size);
}
@@ -256,6 +273,7 @@ static unsigned long setup_kernel_memory_layout(unsigned long kernel_size)
BUILD_BUG_ON(!IS_ALIGNED(__NO_KASLR_START_KERNEL, THREAD_SIZE));
BUILD_BUG_ON(__NO_KASLR_END_KERNEL > _REGION1_SIZE);
vsize = get_vmem_size(ident_map_size, vmemmap_size, vmalloc_size, _REGION3_SIZE);
+ boot_debug("vmem size estimated: 0x%016lx\n", vsize);
if (IS_ENABLED(CONFIG_KASAN) || __NO_KASLR_END_KERNEL > _REGION2_SIZE ||
(vsize > _REGION2_SIZE && kaslr_enabled())) {
asce_limit = _REGION1_SIZE;
@@ -279,8 +297,10 @@ static unsigned long setup_kernel_memory_layout(unsigned long kernel_size)
* otherwise asce_limit and rte_size would have been adjusted.
*/
vmax = adjust_to_uv_max(asce_limit);
+ boot_debug("%d level paging 0x%016lx vmax\n", vmax == _REGION1_SIZE ? 4 : 3, vmax);
#ifdef CONFIG_KASAN
BUILD_BUG_ON(__NO_KASLR_END_KERNEL > KASAN_SHADOW_START);
+ boot_debug("KASAN shadow area: 0x%016lx-0x%016lx\n", KASAN_SHADOW_START, KASAN_SHADOW_END);
/* force vmalloc and modules below kasan shadow */
vmax = min(vmax, KASAN_SHADOW_START);
#endif
@@ -294,19 +314,27 @@ static unsigned long setup_kernel_memory_layout(unsigned long kernel_size)
pos = 0;
kernel_end = vmax - pos * THREAD_SIZE;
kernel_start = round_down(kernel_end - kernel_size, THREAD_SIZE);
+ boot_debug("Randomization range: 0x%016lx-0x%016lx\n", vmax - kaslr_len, vmax);
+ boot_debug("kernel image: 0x%016lx-0x%016lx (kaslr)\n", kernel_start,
+ kernel_size + kernel_size);
} else if (vmax < __NO_KASLR_END_KERNEL || vsize > __NO_KASLR_END_KERNEL) {
kernel_start = round_down(vmax - kernel_size, THREAD_SIZE);
- boot_printk("The kernel base address is forced to %lx\n", kernel_start);
+ boot_debug("kernel image: 0x%016lx-0x%016lx (constrained)\n", kernel_start,
+ kernel_start + kernel_size);
} else {
kernel_start = __NO_KASLR_START_KERNEL;
+ boot_debug("kernel image: 0x%016lx-0x%016lx (nokaslr)\n", kernel_start,
+ kernel_start + kernel_size);
}
__kaslr_offset = kernel_start;
+ boot_debug("__kaslr_offset: 0x%016lx\n", __kaslr_offset);
MODULES_END = round_down(kernel_start, _SEGMENT_SIZE);
MODULES_VADDR = MODULES_END - MODULES_LEN;
VMALLOC_END = MODULES_VADDR;
if (IS_ENABLED(CONFIG_KMSAN))
VMALLOC_END -= MODULES_LEN * 2;
+ boot_debug("modules area: 0x%016lx-0x%016lx\n", MODULES_VADDR, MODULES_END);
/* allow vmalloc area to occupy up to about 1/2 of the rest virtual space left */
vsize = (VMALLOC_END - FIXMAP_SIZE) / 2;
@@ -318,10 +346,15 @@ static unsigned long setup_kernel_memory_layout(unsigned long kernel_size)
VMALLOC_END -= vmalloc_size * 2;
}
VMALLOC_START = VMALLOC_END - vmalloc_size;
+ boot_debug("vmalloc area: 0x%016lx-0x%016lx\n", VMALLOC_START, VMALLOC_END);
__memcpy_real_area = round_down(VMALLOC_START - MEMCPY_REAL_SIZE, PAGE_SIZE);
+ boot_debug("memcpy real area: 0x%016lx-0x%016lx\n", __memcpy_real_area,
+ __memcpy_real_area + MEMCPY_REAL_SIZE);
__abs_lowcore = round_down(__memcpy_real_area - ABS_LOWCORE_MAP_SIZE,
sizeof(struct lowcore));
+ boot_debug("abs lowcore: 0x%016lx-0x%016lx\n", __abs_lowcore,
+ __abs_lowcore + ABS_LOWCORE_MAP_SIZE);
/* split remaining virtual space between 1:1 mapping & vmemmap array */
pages = __abs_lowcore / (PAGE_SIZE + sizeof(struct page));
@@ -341,8 +374,11 @@ static unsigned long setup_kernel_memory_layout(unsigned long kernel_size)
BUILD_BUG_ON(MAX_DCSS_ADDR > (1UL << MAX_PHYSMEM_BITS));
max_mappable = max(ident_map_size, MAX_DCSS_ADDR);
max_mappable = min(max_mappable, vmemmap_start);
- if (IS_ENABLED(CONFIG_RANDOMIZE_IDENTITY_BASE))
- __identity_base = round_down(vmemmap_start - max_mappable, rte_size);
+#ifdef CONFIG_RANDOMIZE_IDENTITY_BASE
+ __identity_base = round_down(vmemmap_start - max_mappable, rte_size);
+#endif
+ boot_debug("identity map: 0x%016lx-0x%016lx\n", __identity_base,
+ __identity_base + ident_map_size);
return asce_limit;
}
@@ -401,6 +437,10 @@ void startup_kernel(void)
psw_t psw;
setup_lpp();
+ store_ipl_parmblock();
+ uv_query_info();
+ setup_boot_command_line();
+ parse_boot_command_line();
/*
* Non-randomized kernel physical start address must be _SEGMENT_SIZE
@@ -420,12 +460,8 @@ void startup_kernel(void)
oldmem_data.start = parmarea.oldmem_base;
oldmem_data.size = parmarea.oldmem_size;
- store_ipl_parmblock();
read_ipl_report();
- uv_query_info();
sclp_early_read_info();
- setup_boot_command_line();
- parse_boot_command_line();
detect_facilities();
cmma_init();
sanitize_prot_virt_host();
@@ -515,6 +551,7 @@ void startup_kernel(void)
__kaslr_offset, __kaslr_offset_phys);
kaslr_adjust_got(__kaslr_offset);
setup_vmem(__kaslr_offset, __kaslr_offset + kernel_size, asce_limit);
+ dump_physmem_reserved();
copy_bootdata();
__apply_alternatives((struct alt_instr *)_vmlinux_info.alt_instructions,
(struct alt_instr *)_vmlinux_info.alt_instructions_end,
@@ -531,5 +568,6 @@ void startup_kernel(void)
*/
psw.addr = __kaslr_offset + vmlinux.entry;
psw.mask = PSW_KERNEL_BITS;
+ boot_debug("Starting kernel at: 0x%016lx\n", psw.addr);
__load_psw(psw);
}
diff --git a/arch/s390/boot/vmem.c b/arch/s390/boot/vmem.c
index 145035f84a0e..cfca94a8eac4 100644
--- a/arch/s390/boot/vmem.c
+++ b/arch/s390/boot/vmem.c
@@ -1,4 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
+#define boot_fmt(fmt) "vmem: " fmt
#include <linux/sched/task.h>
#include <linux/pgtable.h>
#include <linux/kasan.h>
@@ -13,6 +14,7 @@
#include "decompressor.h"
#include "boot.h"
+#define INVALID_PHYS_ADDR (~(phys_addr_t)0)
struct ctlreg __bootdata_preserved(s390_invalid_asce);
#ifdef CONFIG_PROC_FS
@@ -31,12 +33,42 @@ enum populate_mode {
POPULATE_IDENTITY,
POPULATE_KERNEL,
#ifdef CONFIG_KASAN
+ /* KASAN modes should be last and grouped together, see is_kasan_populate_mode() */
POPULATE_KASAN_MAP_SHADOW,
POPULATE_KASAN_ZERO_SHADOW,
POPULATE_KASAN_SHALLOW
#endif
};
+#define POPULATE_MODE_NAME(t) case POPULATE_ ## t: return #t
+static inline const char *get_populate_mode_name(enum populate_mode t)
+{
+ switch (t) {
+ POPULATE_MODE_NAME(NONE);
+ POPULATE_MODE_NAME(DIRECT);
+ POPULATE_MODE_NAME(LOWCORE);
+ POPULATE_MODE_NAME(ABS_LOWCORE);
+ POPULATE_MODE_NAME(IDENTITY);
+ POPULATE_MODE_NAME(KERNEL);
+#ifdef CONFIG_KASAN
+ POPULATE_MODE_NAME(KASAN_MAP_SHADOW);
+ POPULATE_MODE_NAME(KASAN_ZERO_SHADOW);
+ POPULATE_MODE_NAME(KASAN_SHALLOW);
+#endif
+ default:
+ return "UNKNOWN";
+ }
+}
+
+static bool is_kasan_populate_mode(enum populate_mode mode)
+{
+#ifdef CONFIG_KASAN
+ return mode >= POPULATE_KASAN_MAP_SHADOW;
+#else
+ return false;
+#endif
+}
+
static void pgtable_populate(unsigned long addr, unsigned long end, enum populate_mode mode);
#ifdef CONFIG_KASAN
@@ -52,9 +84,12 @@ static pte_t pte_z;
static inline void kasan_populate(unsigned long start, unsigned long end, enum populate_mode mode)
{
- start = PAGE_ALIGN_DOWN(__sha(start));
- end = PAGE_ALIGN(__sha(end));
- pgtable_populate(start, end, mode);
+ unsigned long sha_start = PAGE_ALIGN_DOWN(__sha(start));
+ unsigned long sha_end = PAGE_ALIGN(__sha(end));
+
+ boot_debug("%-17s 0x%016lx-0x%016lx >> 0x%016lx-0x%016lx\n", get_populate_mode_name(mode),
+ start, end, sha_start, sha_end);
+ pgtable_populate(sha_start, sha_end, mode);
}
static void kasan_populate_shadow(unsigned long kernel_start, unsigned long kernel_end)
@@ -63,13 +98,10 @@ static void kasan_populate_shadow(unsigned long kernel_start, unsigned long kern
pud_t pud_z = __pud(__pa(kasan_early_shadow_pmd) | _REGION3_ENTRY);
p4d_t p4d_z = __p4d(__pa(kasan_early_shadow_pud) | _REGION2_ENTRY);
unsigned long memgap_start = 0;
- unsigned long untracked_end;
unsigned long start, end;
int i;
pte_z = __pte(__pa(kasan_early_shadow_page) | pgprot_val(PAGE_KERNEL_RO));
- if (!machine.has_nx)
- pte_z = clear_pte_bit(pte_z, __pgprot(_PAGE_NOEXEC));
crst_table_init((unsigned long *)kasan_early_shadow_p4d, p4d_val(p4d_z));
crst_table_init((unsigned long *)kasan_early_shadow_pud, pud_val(pud_z));
crst_table_init((unsigned long *)kasan_early_shadow_pmd, pmd_val(pmd_z));
@@ -93,15 +125,10 @@ static void kasan_populate_shadow(unsigned long kernel_start, unsigned long kern
kasan_populate(kernel_start + TEXT_OFFSET, kernel_end, POPULATE_KASAN_MAP_SHADOW);
kasan_populate(0, (unsigned long)__identity_va(0), POPULATE_KASAN_ZERO_SHADOW);
kasan_populate(AMODE31_START, AMODE31_END, POPULATE_KASAN_ZERO_SHADOW);
- if (IS_ENABLED(CONFIG_KASAN_VMALLOC)) {
- untracked_end = VMALLOC_START;
- /* shallowly populate kasan shadow for vmalloc and modules */
- kasan_populate(VMALLOC_START, MODULES_END, POPULATE_KASAN_SHALLOW);
- } else {
- untracked_end = MODULES_VADDR;
- }
+ /* shallowly populate kasan shadow for vmalloc and modules */
+ kasan_populate(VMALLOC_START, MODULES_END, POPULATE_KASAN_SHALLOW);
/* populate kasan shadow for untracked memory */
- kasan_populate((unsigned long)__identity_va(ident_map_size), untracked_end,
+ kasan_populate((unsigned long)__identity_va(ident_map_size), VMALLOC_START,
POPULATE_KASAN_ZERO_SHADOW);
kasan_populate(kernel_end, _REGION1_SIZE, POPULATE_KASAN_ZERO_SHADOW);
}
@@ -208,7 +235,7 @@ static void *boot_crst_alloc(unsigned long val)
unsigned long size = PAGE_SIZE << CRST_ALLOC_ORDER;
unsigned long *table;
- table = (unsigned long *)physmem_alloc_top_down(RR_VMEM, size, size);
+ table = (unsigned long *)physmem_alloc_or_die(RR_VMEM, size, size);
crst_table_init(table, val);
__arch_set_page_dat(table, 1UL << CRST_ALLOC_ORDER);
return table;
@@ -224,7 +251,7 @@ static pte_t *boot_pte_alloc(void)
* during POPULATE_KASAN_MAP_SHADOW when EDAT is off
*/
if (!pte_leftover) {
- pte_leftover = (void *)physmem_alloc_top_down(RR_VMEM, PAGE_SIZE, PAGE_SIZE);
+ pte_leftover = (void *)physmem_alloc_or_die(RR_VMEM, PAGE_SIZE, PAGE_SIZE);
pte = pte_leftover + _PAGE_TABLE_SIZE;
__arch_set_page_dat(pte, 1);
} else {
@@ -236,11 +263,12 @@ static pte_t *boot_pte_alloc(void)
return pte;
}
-static unsigned long _pa(unsigned long addr, unsigned long size, enum populate_mode mode)
+static unsigned long resolve_pa_may_alloc(unsigned long addr, unsigned long size,
+ enum populate_mode mode)
{
switch (mode) {
case POPULATE_NONE:
- return -1;
+ return INVALID_PHYS_ADDR;
case POPULATE_DIRECT:
return addr;
case POPULATE_LOWCORE:
@@ -253,38 +281,64 @@ static unsigned long _pa(unsigned long addr, unsigned long size, enum populate_m
return __identity_pa(addr);
#ifdef CONFIG_KASAN
case POPULATE_KASAN_MAP_SHADOW:
- addr = physmem_alloc_top_down(RR_VMEM, size, size);
- memset((void *)addr, 0, size);
- return addr;
+ /* Allow to fail large page allocations, this will fall back to 1mb/4k pages */
+ addr = physmem_alloc(RR_VMEM, size, size, size == PAGE_SIZE);
+ if (addr) {
+ memset((void *)addr, 0, size);
+ return addr;
+ }
+ return INVALID_PHYS_ADDR;
#endif
default:
- return -1;
+ return INVALID_PHYS_ADDR;
}
}
-static bool large_allowed(enum populate_mode mode)
+static bool large_page_mapping_allowed(enum populate_mode mode)
{
- return (mode == POPULATE_DIRECT) || (mode == POPULATE_IDENTITY) || (mode == POPULATE_KERNEL);
+ switch (mode) {
+ case POPULATE_DIRECT:
+ case POPULATE_IDENTITY:
+ case POPULATE_KERNEL:
+#ifdef CONFIG_KASAN
+ case POPULATE_KASAN_MAP_SHADOW:
+#endif
+ return true;
+ default:
+ return false;
+ }
}
-static bool can_large_pud(pud_t *pu_dir, unsigned long addr, unsigned long end,
- enum populate_mode mode)
+static unsigned long try_get_large_pud_pa(pud_t *pu_dir, unsigned long addr, unsigned long end,
+ enum populate_mode mode)
{
- unsigned long size = end - addr;
+ unsigned long pa, size = end - addr;
+
+ if (!machine.has_edat2 || !large_page_mapping_allowed(mode) ||
+ !IS_ALIGNED(addr, PUD_SIZE) || (size < PUD_SIZE))
+ return INVALID_PHYS_ADDR;
+
+ pa = resolve_pa_may_alloc(addr, size, mode);
+ if (!IS_ALIGNED(pa, PUD_SIZE))
+ return INVALID_PHYS_ADDR;
- return machine.has_edat2 && large_allowed(mode) &&
- IS_ALIGNED(addr, PUD_SIZE) && (size >= PUD_SIZE) &&
- IS_ALIGNED(_pa(addr, size, mode), PUD_SIZE);
+ return pa;
}
-static bool can_large_pmd(pmd_t *pm_dir, unsigned long addr, unsigned long end,
- enum populate_mode mode)
+static unsigned long try_get_large_pmd_pa(pmd_t *pm_dir, unsigned long addr, unsigned long end,
+ enum populate_mode mode)
{
- unsigned long size = end - addr;
+ unsigned long pa, size = end - addr;
- return machine.has_edat1 && large_allowed(mode) &&
- IS_ALIGNED(addr, PMD_SIZE) && (size >= PMD_SIZE) &&
- IS_ALIGNED(_pa(addr, size, mode), PMD_SIZE);
+ if (!machine.has_edat1 || !large_page_mapping_allowed(mode) ||
+ !IS_ALIGNED(addr, PMD_SIZE) || (size < PMD_SIZE))
+ return INVALID_PHYS_ADDR;
+
+ pa = resolve_pa_may_alloc(addr, size, mode);
+ if (!IS_ALIGNED(pa, PMD_SIZE))
+ return INVALID_PHYS_ADDR;
+
+ return pa;
}
static void pgtable_pte_populate(pmd_t *pmd, unsigned long addr, unsigned long end,
@@ -298,22 +352,20 @@ static void pgtable_pte_populate(pmd_t *pmd, unsigned long addr, unsigned long e
if (pte_none(*pte)) {
if (kasan_pte_populate_zero_shadow(pte, mode))
continue;
- entry = __pte(_pa(addr, PAGE_SIZE, mode));
+ entry = __pte(resolve_pa_may_alloc(addr, PAGE_SIZE, mode));
entry = set_pte_bit(entry, PAGE_KERNEL);
- if (!machine.has_nx)
- entry = clear_pte_bit(entry, __pgprot(_PAGE_NOEXEC));
set_pte(pte, entry);
pages++;
}
}
- if (mode == POPULATE_DIRECT)
+ if (mode == POPULATE_IDENTITY)
update_page_count(PG_DIRECT_MAP_4K, pages);
}
static void pgtable_pmd_populate(pud_t *pud, unsigned long addr, unsigned long end,
enum populate_mode mode)
{
- unsigned long next, pages = 0;
+ unsigned long pa, next, pages = 0;
pmd_t *pmd, entry;
pte_t *pte;
@@ -323,11 +375,10 @@ static void pgtable_pmd_populate(pud_t *pud, unsigned long addr, unsigned long e
if (pmd_none(*pmd)) {
if (kasan_pmd_populate_zero_shadow(pmd, addr, next, mode))
continue;
- if (can_large_pmd(pmd, addr, next, mode)) {
- entry = __pmd(_pa(addr, _SEGMENT_SIZE, mode));
+ pa = try_get_large_pmd_pa(pmd, addr, next, mode);
+ if (pa != INVALID_PHYS_ADDR) {
+ entry = __pmd(pa);
entry = set_pmd_bit(entry, SEGMENT_KERNEL);
- if (!machine.has_nx)
- entry = clear_pmd_bit(entry, __pgprot(_SEGMENT_ENTRY_NOEXEC));
set_pmd(pmd, entry);
pages++;
continue;
@@ -339,14 +390,14 @@ static void pgtable_pmd_populate(pud_t *pud, unsigned long addr, unsigned long e
}
pgtable_pte_populate(pmd, addr, next, mode);
}
- if (mode == POPULATE_DIRECT)
+ if (mode == POPULATE_IDENTITY)
update_page_count(PG_DIRECT_MAP_1M, pages);
}
static void pgtable_pud_populate(p4d_t *p4d, unsigned long addr, unsigned long end,
enum populate_mode mode)
{
- unsigned long next, pages = 0;
+ unsigned long pa, next, pages = 0;
pud_t *pud, entry;
pmd_t *pmd;
@@ -356,11 +407,10 @@ static void pgtable_pud_populate(p4d_t *p4d, unsigned long addr, unsigned long e
if (pud_none(*pud)) {
if (kasan_pud_populate_zero_shadow(pud, addr, next, mode))
continue;
- if (can_large_pud(pud, addr, next, mode)) {
- entry = __pud(_pa(addr, _REGION3_SIZE, mode));
+ pa = try_get_large_pud_pa(pud, addr, next, mode);
+ if (pa != INVALID_PHYS_ADDR) {
+ entry = __pud(pa);
entry = set_pud_bit(entry, REGION3_KERNEL);
- if (!machine.has_nx)
- entry = clear_pud_bit(entry, __pgprot(_REGION_ENTRY_NOEXEC));
set_pud(pud, entry);
pages++;
continue;
@@ -372,7 +422,7 @@ static void pgtable_pud_populate(p4d_t *p4d, unsigned long addr, unsigned long e
}
pgtable_pmd_populate(pud, addr, next, mode);
}
- if (mode == POPULATE_DIRECT)
+ if (mode == POPULATE_IDENTITY)
update_page_count(PG_DIRECT_MAP_2G, pages);
}
@@ -402,6 +452,13 @@ static void pgtable_populate(unsigned long addr, unsigned long end, enum populat
pgd_t *pgd;
p4d_t *p4d;
+ if (!is_kasan_populate_mode(mode)) {
+ boot_debug("%-17s 0x%016lx-0x%016lx -> 0x%016lx-0x%016lx\n",
+ get_populate_mode_name(mode), addr, end,
+ resolve_pa_may_alloc(addr, 0, mode),
+ resolve_pa_may_alloc(end - 1, 0, mode) + 1);
+ }
+
pgd = pgd_offset(&init_mm, addr);
for (; addr < end; addr = next, pgd++) {
next = pgd_addr_end(addr, end);
diff --git a/arch/s390/configs/debug_defconfig b/arch/s390/configs/debug_defconfig
index d8d227ab82de..80bdfbae6e5b 100644
--- a/arch/s390/configs/debug_defconfig
+++ b/arch/s390/configs/debug_defconfig
@@ -469,6 +469,7 @@ CONFIG_SCSI_DH_ALUA=m
CONFIG_MD=y
CONFIG_BLK_DEV_MD=y
# CONFIG_MD_BITMAP_FILE is not set
+CONFIG_MD_LINEAR=m
CONFIG_MD_CLUSTER=m
CONFIG_BCACHE=m
CONFIG_BLK_DEV_DM=y
@@ -740,7 +741,6 @@ CONFIG_IMA=y
CONFIG_IMA_DEFAULT_HASH_SHA256=y
CONFIG_IMA_WRITE_POLICY=y
CONFIG_IMA_APPRAISE=y
-CONFIG_LSM="yama,loadpin,safesetid,integrity,selinux,smack,tomoyo,apparmor"
CONFIG_BUG_ON_DATA_CORRUPTION=y
CONFIG_CRYPTO_USER=m
# CONFIG_CRYPTO_MANAGER_DISABLE_TESTS is not set
@@ -770,7 +770,6 @@ CONFIG_CRYPTO_TWOFISH=m
CONFIG_CRYPTO_ADIANTUM=m
CONFIG_CRYPTO_ARC4=m
CONFIG_CRYPTO_HCTR2=m
-CONFIG_CRYPTO_KEYWRAP=m
CONFIG_CRYPTO_LRW=m
CONFIG_CRYPTO_PCBC=m
CONFIG_CRYPTO_AEGIS128=m
@@ -782,7 +781,6 @@ CONFIG_CRYPTO_MD5=y
CONFIG_CRYPTO_MICHAEL_MIC=m
CONFIG_CRYPTO_RMD160=m
CONFIG_CRYPTO_SM3_GENERIC=m
-CONFIG_CRYPTO_VMAC=m
CONFIG_CRYPTO_WP512=m
CONFIG_CRYPTO_XCBC=m
CONFIG_CRYPTO_CRC32=m
@@ -795,7 +793,6 @@ CONFIG_CRYPTO_USER_API_HASH=m
CONFIG_CRYPTO_USER_API_SKCIPHER=m
CONFIG_CRYPTO_USER_API_RNG=m
CONFIG_CRYPTO_USER_API_AEAD=m
-CONFIG_CRYPTO_CRC32_S390=y
CONFIG_CRYPTO_SHA512_S390=m
CONFIG_CRYPTO_SHA1_S390=m
CONFIG_CRYPTO_SHA256_S390=m
@@ -818,7 +815,6 @@ CONFIG_SYSTEM_BLACKLIST_KEYRING=y
CONFIG_CORDIC=m
CONFIG_CRYPTO_LIB_CURVE25519=m
CONFIG_CRYPTO_LIB_CHACHA20POLY1305=m
-CONFIG_CRC32_SELFTEST=y
CONFIG_CRC4=m
CONFIG_CRC7=m
CONFIG_CRC8=m
@@ -879,6 +875,7 @@ CONFIG_RCU_CPU_STALL_TIMEOUT=300
CONFIG_LATENCYTOP=y
CONFIG_BOOTTIME_TRACING=y
CONFIG_FUNCTION_GRAPH_RETVAL=y
+CONFIG_FUNCTION_GRAPH_RETADDR=y
CONFIG_FPROBE=y
CONFIG_FUNCTION_PROFILER=y
CONFIG_STACK_TRACER=y
diff --git a/arch/s390/configs/defconfig b/arch/s390/configs/defconfig
index 6c2f2bb4fbf8..449a0e996b96 100644
--- a/arch/s390/configs/defconfig
+++ b/arch/s390/configs/defconfig
@@ -459,6 +459,7 @@ CONFIG_SCSI_DH_ALUA=m
CONFIG_MD=y
CONFIG_BLK_DEV_MD=y
# CONFIG_MD_BITMAP_FILE is not set
+CONFIG_MD_LINEAR=m
CONFIG_MD_CLUSTER=m
CONFIG_BCACHE=m
CONFIG_BLK_DEV_DM=y
@@ -725,7 +726,6 @@ CONFIG_IMA=y
CONFIG_IMA_DEFAULT_HASH_SHA256=y
CONFIG_IMA_WRITE_POLICY=y
CONFIG_IMA_APPRAISE=y
-CONFIG_LSM="yama,loadpin,safesetid,integrity,selinux,smack,tomoyo,apparmor"
CONFIG_BUG_ON_DATA_CORRUPTION=y
CONFIG_CRYPTO_FIPS=y
CONFIG_CRYPTO_USER=m
@@ -756,7 +756,6 @@ CONFIG_CRYPTO_TWOFISH=m
CONFIG_CRYPTO_ADIANTUM=m
CONFIG_CRYPTO_ARC4=m
CONFIG_CRYPTO_HCTR2=m
-CONFIG_CRYPTO_KEYWRAP=m
CONFIG_CRYPTO_LRW=m
CONFIG_CRYPTO_PCBC=m
CONFIG_CRYPTO_AEGIS128=m
@@ -768,7 +767,6 @@ CONFIG_CRYPTO_MD5=y
CONFIG_CRYPTO_MICHAEL_MIC=m
CONFIG_CRYPTO_RMD160=m
CONFIG_CRYPTO_SM3_GENERIC=m
-CONFIG_CRYPTO_VMAC=m
CONFIG_CRYPTO_WP512=m
CONFIG_CRYPTO_XCBC=m
CONFIG_CRYPTO_CRC32=m
@@ -782,7 +780,6 @@ CONFIG_CRYPTO_USER_API_HASH=m
CONFIG_CRYPTO_USER_API_SKCIPHER=m
CONFIG_CRYPTO_USER_API_RNG=m
CONFIG_CRYPTO_USER_API_AEAD=m
-CONFIG_CRYPTO_CRC32_S390=y
CONFIG_CRYPTO_SHA512_S390=m
CONFIG_CRYPTO_SHA1_S390=m
CONFIG_CRYPTO_SHA256_S390=m
@@ -829,6 +826,7 @@ CONFIG_RCU_CPU_STALL_TIMEOUT=60
CONFIG_LATENCYTOP=y
CONFIG_BOOTTIME_TRACING=y
CONFIG_FUNCTION_GRAPH_RETVAL=y
+CONFIG_FUNCTION_GRAPH_RETADDR=y
CONFIG_FPROBE=y
CONFIG_FUNCTION_PROFILER=y
CONFIG_STACK_TRACER=y
diff --git a/arch/s390/configs/kasan.config b/arch/s390/configs/kasan.config
index 84c2b551e992..cefbe2ba1228 100644
--- a/arch/s390/configs/kasan.config
+++ b/arch/s390/configs/kasan.config
@@ -1,4 +1,4 @@
# Help: Enable KASan for debugging
CONFIG_KASAN=y
CONFIG_KASAN_INLINE=y
-CONFIG_KASAN_VMALLOC=y
+CONFIG_KERNEL_IMAGE_BASE=0x7FFFE0000000
diff --git a/arch/s390/configs/zfcpdump_defconfig b/arch/s390/configs/zfcpdump_defconfig
index bcbaa069de96..853b2326a171 100644
--- a/arch/s390/configs/zfcpdump_defconfig
+++ b/arch/s390/configs/zfcpdump_defconfig
@@ -62,7 +62,6 @@ CONFIG_ZFCP=y
# CONFIG_INOTIFY_USER is not set
# CONFIG_MISC_FILESYSTEMS is not set
# CONFIG_NETWORK_FILESYSTEMS is not set
-CONFIG_LSM="yama,loadpin,safesetid,integrity"
# CONFIG_ZLIB_DFLTCC is not set
CONFIG_XZ_DEC_MICROLZMA=y
CONFIG_PRINTK_TIME=y
diff --git a/arch/s390/crypto/Kconfig b/arch/s390/crypto/Kconfig
index d3eb3a233693..b760232537f1 100644
--- a/arch/s390/crypto/Kconfig
+++ b/arch/s390/crypto/Kconfig
@@ -2,18 +2,6 @@
menu "Accelerated Cryptographic Algorithms for CPU (s390)"
-config CRYPTO_CRC32_S390
- tristate "CRC32c and CRC32"
- depends on S390
- select CRYPTO_HASH
- select CRC32
- help
- CRC32c and CRC32 CRC algorithms
-
- Architecture: s390
-
- It is available with IBM z13 or later.
-
config CRYPTO_SHA512_S390
tristate "Hash functions: SHA-384 and SHA-512"
depends on S390
diff --git a/arch/s390/crypto/Makefile b/arch/s390/crypto/Makefile
index a0cb96937c3d..14dafadbcbed 100644
--- a/arch/s390/crypto/Makefile
+++ b/arch/s390/crypto/Makefile
@@ -14,9 +14,7 @@ obj-$(CONFIG_CRYPTO_PAES_S390) += paes_s390.o
obj-$(CONFIG_CRYPTO_CHACHA_S390) += chacha_s390.o
obj-$(CONFIG_S390_PRNG) += prng.o
obj-$(CONFIG_CRYPTO_GHASH_S390) += ghash_s390.o
-obj-$(CONFIG_CRYPTO_CRC32_S390) += crc32-vx_s390.o
obj-$(CONFIG_CRYPTO_HMAC_S390) += hmac_s390.o
obj-y += arch_random.o
-crc32-vx_s390-y := crc32-vx.o crc32le-vx.o crc32be-vx.o
chacha_s390-y := chacha-glue.o chacha-s390.o
diff --git a/arch/s390/crypto/crc32-vx.c b/arch/s390/crypto/crc32-vx.c
deleted file mode 100644
index 89a10337e6ea..000000000000
--- a/arch/s390/crypto/crc32-vx.c
+++ /dev/null
@@ -1,306 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Crypto-API module for CRC-32 algorithms implemented with the
- * z/Architecture Vector Extension Facility.
- *
- * Copyright IBM Corp. 2015
- * Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
- */
-#define KMSG_COMPONENT "crc32-vx"
-#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
-
-#include <linux/module.h>
-#include <linux/cpufeature.h>
-#include <linux/crc32.h>
-#include <crypto/internal/hash.h>
-#include <asm/fpu.h>
-#include "crc32-vx.h"
-
-#define CRC32_BLOCK_SIZE 1
-#define CRC32_DIGEST_SIZE 4
-
-#define VX_MIN_LEN 64
-#define VX_ALIGNMENT 16L
-#define VX_ALIGN_MASK (VX_ALIGNMENT - 1)
-
-struct crc_ctx {
- u32 key;
-};
-
-struct crc_desc_ctx {
- u32 crc;
-};
-
-/*
- * DEFINE_CRC32_VX() - Define a CRC-32 function using the vector extension
- *
- * Creates a function to perform a particular CRC-32 computation. Depending
- * on the message buffer, the hardware-accelerated or software implementation
- * is used. Note that the message buffer is aligned to improve fetch
- * operations of VECTOR LOAD MULTIPLE instructions.
- *
- */
-#define DEFINE_CRC32_VX(___fname, ___crc32_vx, ___crc32_sw) \
- static u32 __pure ___fname(u32 crc, \
- unsigned char const *data, size_t datalen) \
- { \
- unsigned long prealign, aligned, remaining; \
- DECLARE_KERNEL_FPU_ONSTACK16(vxstate); \
- \
- if (datalen < VX_MIN_LEN + VX_ALIGN_MASK) \
- return ___crc32_sw(crc, data, datalen); \
- \
- if ((unsigned long)data & VX_ALIGN_MASK) { \
- prealign = VX_ALIGNMENT - \
- ((unsigned long)data & VX_ALIGN_MASK); \
- datalen -= prealign; \
- crc = ___crc32_sw(crc, data, prealign); \
- data = (void *)((unsigned long)data + prealign); \
- } \
- \
- aligned = datalen & ~VX_ALIGN_MASK; \
- remaining = datalen & VX_ALIGN_MASK; \
- \
- kernel_fpu_begin(&vxstate, KERNEL_VXR_LOW); \
- crc = ___crc32_vx(crc, data, aligned); \
- kernel_fpu_end(&vxstate, KERNEL_VXR_LOW); \
- \
- if (remaining) \
- crc = ___crc32_sw(crc, data + aligned, remaining); \
- \
- return crc; \
- }
-
-DEFINE_CRC32_VX(crc32_le_vx, crc32_le_vgfm_16, crc32_le)
-DEFINE_CRC32_VX(crc32_be_vx, crc32_be_vgfm_16, crc32_be)
-DEFINE_CRC32_VX(crc32c_le_vx, crc32c_le_vgfm_16, __crc32c_le)
-
-
-static int crc32_vx_cra_init_zero(struct crypto_tfm *tfm)
-{
- struct crc_ctx *mctx = crypto_tfm_ctx(tfm);
-
- mctx->key = 0;
- return 0;
-}
-
-static int crc32_vx_cra_init_invert(struct crypto_tfm *tfm)
-{
- struct crc_ctx *mctx = crypto_tfm_ctx(tfm);
-
- mctx->key = ~0;
- return 0;
-}
-
-static int crc32_vx_init(struct shash_desc *desc)
-{
- struct crc_ctx *mctx = crypto_shash_ctx(desc->tfm);
- struct crc_desc_ctx *ctx = shash_desc_ctx(desc);
-
- ctx->crc = mctx->key;
- return 0;
-}
-
-static int crc32_vx_setkey(struct crypto_shash *tfm, const u8 *newkey,
- unsigned int newkeylen)
-{
- struct crc_ctx *mctx = crypto_shash_ctx(tfm);
-
- if (newkeylen != sizeof(mctx->key))
- return -EINVAL;
- mctx->key = le32_to_cpu(*(__le32 *)newkey);
- return 0;
-}
-
-static int crc32be_vx_setkey(struct crypto_shash *tfm, const u8 *newkey,
- unsigned int newkeylen)
-{
- struct crc_ctx *mctx = crypto_shash_ctx(tfm);
-
- if (newkeylen != sizeof(mctx->key))
- return -EINVAL;
- mctx->key = be32_to_cpu(*(__be32 *)newkey);
- return 0;
-}
-
-static int crc32le_vx_final(struct shash_desc *desc, u8 *out)
-{
- struct crc_desc_ctx *ctx = shash_desc_ctx(desc);
-
- *(__le32 *)out = cpu_to_le32p(&ctx->crc);
- return 0;
-}
-
-static int crc32be_vx_final(struct shash_desc *desc, u8 *out)
-{
- struct crc_desc_ctx *ctx = shash_desc_ctx(desc);
-
- *(__be32 *)out = cpu_to_be32p(&ctx->crc);
- return 0;
-}
-
-static int crc32c_vx_final(struct shash_desc *desc, u8 *out)
-{
- struct crc_desc_ctx *ctx = shash_desc_ctx(desc);
-
- /*
- * Perform a final XOR with 0xFFFFFFFF to be in sync
- * with the generic crc32c shash implementation.
- */
- *(__le32 *)out = ~cpu_to_le32p(&ctx->crc);
- return 0;
-}
-
-static int __crc32le_vx_finup(u32 *crc, const u8 *data, unsigned int len,
- u8 *out)
-{
- *(__le32 *)out = cpu_to_le32(crc32_le_vx(*crc, data, len));
- return 0;
-}
-
-static int __crc32be_vx_finup(u32 *crc, const u8 *data, unsigned int len,
- u8 *out)
-{
- *(__be32 *)out = cpu_to_be32(crc32_be_vx(*crc, data, len));
- return 0;
-}
-
-static int __crc32c_vx_finup(u32 *crc, const u8 *data, unsigned int len,
- u8 *out)
-{
- /*
- * Perform a final XOR with 0xFFFFFFFF to be in sync
- * with the generic crc32c shash implementation.
- */
- *(__le32 *)out = ~cpu_to_le32(crc32c_le_vx(*crc, data, len));
- return 0;
-}
-
-
-#define CRC32_VX_FINUP(alg, func) \
- static int alg ## _vx_finup(struct shash_desc *desc, const u8 *data, \
- unsigned int datalen, u8 *out) \
- { \
- return __ ## alg ## _vx_finup(shash_desc_ctx(desc), \
- data, datalen, out); \
- }
-
-CRC32_VX_FINUP(crc32le, crc32_le_vx)
-CRC32_VX_FINUP(crc32be, crc32_be_vx)
-CRC32_VX_FINUP(crc32c, crc32c_le_vx)
-
-#define CRC32_VX_DIGEST(alg, func) \
- static int alg ## _vx_digest(struct shash_desc *desc, const u8 *data, \
- unsigned int len, u8 *out) \
- { \
- return __ ## alg ## _vx_finup(crypto_shash_ctx(desc->tfm), \
- data, len, out); \
- }
-
-CRC32_VX_DIGEST(crc32le, crc32_le_vx)
-CRC32_VX_DIGEST(crc32be, crc32_be_vx)
-CRC32_VX_DIGEST(crc32c, crc32c_le_vx)
-
-#define CRC32_VX_UPDATE(alg, func) \
- static int alg ## _vx_update(struct shash_desc *desc, const u8 *data, \
- unsigned int datalen) \
- { \
- struct crc_desc_ctx *ctx = shash_desc_ctx(desc); \
- ctx->crc = func(ctx->crc, data, datalen); \
- return 0; \
- }
-
-CRC32_VX_UPDATE(crc32le, crc32_le_vx)
-CRC32_VX_UPDATE(crc32be, crc32_be_vx)
-CRC32_VX_UPDATE(crc32c, crc32c_le_vx)
-
-
-static struct shash_alg crc32_vx_algs[] = {
- /* CRC-32 LE */
- {
- .init = crc32_vx_init,
- .setkey = crc32_vx_setkey,
- .update = crc32le_vx_update,
- .final = crc32le_vx_final,
- .finup = crc32le_vx_finup,
- .digest = crc32le_vx_digest,
- .descsize = sizeof(struct crc_desc_ctx),
- .digestsize = CRC32_DIGEST_SIZE,
- .base = {
- .cra_name = "crc32",
- .cra_driver_name = "crc32-vx",
- .cra_priority = 200,
- .cra_flags = CRYPTO_ALG_OPTIONAL_KEY,
- .cra_blocksize = CRC32_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct crc_ctx),
- .cra_module = THIS_MODULE,
- .cra_init = crc32_vx_cra_init_zero,
- },
- },
- /* CRC-32 BE */
- {
- .init = crc32_vx_init,
- .setkey = crc32be_vx_setkey,
- .update = crc32be_vx_update,
- .final = crc32be_vx_final,
- .finup = crc32be_vx_finup,
- .digest = crc32be_vx_digest,
- .descsize = sizeof(struct crc_desc_ctx),
- .digestsize = CRC32_DIGEST_SIZE,
- .base = {
- .cra_name = "crc32be",
- .cra_driver_name = "crc32be-vx",
- .cra_priority = 200,
- .cra_flags = CRYPTO_ALG_OPTIONAL_KEY,
- .cra_blocksize = CRC32_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct crc_ctx),
- .cra_module = THIS_MODULE,
- .cra_init = crc32_vx_cra_init_zero,
- },
- },
- /* CRC-32C LE */
- {
- .init = crc32_vx_init,
- .setkey = crc32_vx_setkey,
- .update = crc32c_vx_update,
- .final = crc32c_vx_final,
- .finup = crc32c_vx_finup,
- .digest = crc32c_vx_digest,
- .descsize = sizeof(struct crc_desc_ctx),
- .digestsize = CRC32_DIGEST_SIZE,
- .base = {
- .cra_name = "crc32c",
- .cra_driver_name = "crc32c-vx",
- .cra_priority = 200,
- .cra_flags = CRYPTO_ALG_OPTIONAL_KEY,
- .cra_blocksize = CRC32_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct crc_ctx),
- .cra_module = THIS_MODULE,
- .cra_init = crc32_vx_cra_init_invert,
- },
- },
-};
-
-
-static int __init crc_vx_mod_init(void)
-{
- return crypto_register_shashes(crc32_vx_algs,
- ARRAY_SIZE(crc32_vx_algs));
-}
-
-static void __exit crc_vx_mod_exit(void)
-{
- crypto_unregister_shashes(crc32_vx_algs, ARRAY_SIZE(crc32_vx_algs));
-}
-
-module_cpu_feature_match(S390_CPU_FEATURE_VXRS, crc_vx_mod_init);
-module_exit(crc_vx_mod_exit);
-
-MODULE_AUTHOR("Hendrik Brueckner <brueckner@linux.vnet.ibm.com>");
-MODULE_DESCRIPTION("CRC-32 algorithms using z/Architecture Vector Extension Facility");
-MODULE_LICENSE("GPL");
-
-MODULE_ALIAS_CRYPTO("crc32");
-MODULE_ALIAS_CRYPTO("crc32-vx");
-MODULE_ALIAS_CRYPTO("crc32c");
-MODULE_ALIAS_CRYPTO("crc32c-vx");
diff --git a/arch/s390/include/asm/abs_lowcore.h b/arch/s390/include/asm/abs_lowcore.h
index d20df8c923fc..004d17ea05cf 100644
--- a/arch/s390/include/asm/abs_lowcore.h
+++ b/arch/s390/include/asm/abs_lowcore.h
@@ -2,7 +2,7 @@
#ifndef _ASM_S390_ABS_LOWCORE_H
#define _ASM_S390_ABS_LOWCORE_H
-#include <asm/sections.h>
+#include <linux/smp.h>
#include <asm/lowcore.h>
#define ABS_LOWCORE_MAP_SIZE (NR_CPUS * sizeof(struct lowcore))
@@ -25,7 +25,7 @@ static inline void put_abs_lowcore(struct lowcore *lc)
put_cpu();
}
-extern int __bootdata_preserved(relocate_lowcore);
+extern int relocate_lowcore;
static inline int have_relocated_lowcore(void)
{
diff --git a/arch/s390/include/asm/asm-extable.h b/arch/s390/include/asm/asm-extable.h
index 4a6b0a8b6412..2e829c16fd8a 100644
--- a/arch/s390/include/asm/asm-extable.h
+++ b/arch/s390/include/asm/asm-extable.h
@@ -9,11 +9,11 @@
#define EX_TYPE_NONE 0
#define EX_TYPE_FIXUP 1
#define EX_TYPE_BPF 2
-#define EX_TYPE_UA_STORE 3
-#define EX_TYPE_UA_LOAD_MEM 4
+#define EX_TYPE_UA_FAULT 3
#define EX_TYPE_UA_LOAD_REG 5
#define EX_TYPE_UA_LOAD_REGPAIR 6
#define EX_TYPE_ZEROPAD 7
+#define EX_TYPE_FPC 8
#define EX_DATA_REG_ERR_SHIFT 0
#define EX_DATA_REG_ERR GENMASK(3, 0)
@@ -69,11 +69,8 @@
#define EX_TABLE_AMODE31(_fault, _target) \
__EX_TABLE(.amode31.ex_table, _fault, _target, EX_TYPE_FIXUP, __stringify(%%r0), __stringify(%%r0), 0)
-#define EX_TABLE_UA_STORE(_fault, _target, _regerr) \
- __EX_TABLE(__ex_table, _fault, _target, EX_TYPE_UA_STORE, _regerr, _regerr, 0)
-
-#define EX_TABLE_UA_LOAD_MEM(_fault, _target, _regerr, _regmem, _len) \
- __EX_TABLE(__ex_table, _fault, _target, EX_TYPE_UA_LOAD_MEM, _regerr, _regmem, _len)
+#define EX_TABLE_UA_FAULT(_fault, _target, _regerr) \
+ __EX_TABLE(__ex_table, _fault, _target, EX_TYPE_UA_FAULT, _regerr, _regerr, 0)
#define EX_TABLE_UA_LOAD_REG(_fault, _target, _regerr, _regzero) \
__EX_TABLE(__ex_table, _fault, _target, EX_TYPE_UA_LOAD_REG, _regerr, _regzero, 0)
@@ -84,4 +81,7 @@
#define EX_TABLE_ZEROPAD(_fault, _target, _regdata, _regaddr) \
__EX_TABLE(__ex_table, _fault, _target, EX_TYPE_ZEROPAD, _regdata, _regaddr, 0)
+#define EX_TABLE_FPC(_fault, _target) \
+ __EX_TABLE(__ex_table, _fault, _target, EX_TYPE_FPC, __stringify(%%r0), __stringify(%%r0), 0)
+
#endif /* __ASM_EXTABLE_H */
diff --git a/arch/s390/include/asm/asm.h b/arch/s390/include/asm/asm.h
index ec011b94af2a..e9062b01e2a2 100644
--- a/arch/s390/include/asm/asm.h
+++ b/arch/s390/include/asm/asm.h
@@ -28,7 +28,7 @@
* [var] also contains the program mask. CC_TRANSFORM() moves the condition
* code to the two least significant bits and sets all other bits to zero.
*/
-#if defined(__GCC_ASM_FLAG_OUTPUTS__) && !(IS_ENABLED(CONFIG_GCC_ASM_FLAG_OUTPUT_BROKEN))
+#if defined(__GCC_ASM_FLAG_OUTPUTS__) && !(IS_ENABLED(CONFIG_CC_ASM_FLAG_OUTPUT_BROKEN))
#define __HAVE_ASM_FLAG_OUTPUTS__
diff --git a/arch/s390/include/asm/atomic.h b/arch/s390/include/asm/atomic.h
index 6723fca64018..b36dd6a1d652 100644
--- a/arch/s390/include/asm/atomic.h
+++ b/arch/s390/include/asm/atomic.h
@@ -17,13 +17,13 @@
static __always_inline int arch_atomic_read(const atomic_t *v)
{
- return __atomic_read(v);
+ return __atomic_read(&v->counter);
}
#define arch_atomic_read arch_atomic_read
static __always_inline void arch_atomic_set(atomic_t *v, int i)
{
- __atomic_set(v, i);
+ __atomic_set(&v->counter, i);
}
#define arch_atomic_set arch_atomic_set
@@ -45,6 +45,36 @@ static __always_inline void arch_atomic_add(int i, atomic_t *v)
}
#define arch_atomic_add arch_atomic_add
+static __always_inline void arch_atomic_inc(atomic_t *v)
+{
+ __atomic_add_const(1, &v->counter);
+}
+#define arch_atomic_inc arch_atomic_inc
+
+static __always_inline void arch_atomic_dec(atomic_t *v)
+{
+ __atomic_add_const(-1, &v->counter);
+}
+#define arch_atomic_dec arch_atomic_dec
+
+static __always_inline bool arch_atomic_sub_and_test(int i, atomic_t *v)
+{
+ return __atomic_add_and_test_barrier(-i, &v->counter);
+}
+#define arch_atomic_sub_and_test arch_atomic_sub_and_test
+
+static __always_inline bool arch_atomic_dec_and_test(atomic_t *v)
+{
+ return __atomic_add_const_and_test_barrier(-1, &v->counter);
+}
+#define arch_atomic_dec_and_test arch_atomic_dec_and_test
+
+static __always_inline bool arch_atomic_inc_and_test(atomic_t *v)
+{
+ return __atomic_add_const_and_test_barrier(1, &v->counter);
+}
+#define arch_atomic_inc_and_test arch_atomic_inc_and_test
+
#define arch_atomic_sub(_i, _v) arch_atomic_add(-(int)(_i), _v)
#define arch_atomic_sub_return(_i, _v) arch_atomic_add_return(-(int)(_i), _v)
#define arch_atomic_fetch_sub(_i, _v) arch_atomic_fetch_add(-(int)(_i), _v)
@@ -94,13 +124,13 @@ static __always_inline bool arch_atomic_try_cmpxchg(atomic_t *v, int *old, int n
static __always_inline s64 arch_atomic64_read(const atomic64_t *v)
{
- return __atomic64_read(v);
+ return __atomic64_read((long *)&v->counter);
}
#define arch_atomic64_read arch_atomic64_read
static __always_inline void arch_atomic64_set(atomic64_t *v, s64 i)
{
- __atomic64_set(v, i);
+ __atomic64_set((long *)&v->counter, i);
}
#define arch_atomic64_set arch_atomic64_set
@@ -122,6 +152,36 @@ static __always_inline void arch_atomic64_add(s64 i, atomic64_t *v)
}
#define arch_atomic64_add arch_atomic64_add
+static __always_inline void arch_atomic64_inc(atomic64_t *v)
+{
+ __atomic64_add_const(1, (long *)&v->counter);
+}
+#define arch_atomic64_inc arch_atomic64_inc
+
+static __always_inline void arch_atomic64_dec(atomic64_t *v)
+{
+ __atomic64_add_const(-1, (long *)&v->counter);
+}
+#define arch_atomic64_dec arch_atomic64_dec
+
+static __always_inline bool arch_atomic64_sub_and_test(s64 i, atomic64_t *v)
+{
+ return __atomic64_add_and_test_barrier(-i, (long *)&v->counter);
+}
+#define arch_atomic64_sub_and_test arch_atomic64_sub_and_test
+
+static __always_inline bool arch_atomic64_dec_and_test(atomic64_t *v)
+{
+ return __atomic64_add_const_and_test_barrier(-1, (long *)&v->counter);
+}
+#define arch_atomic64_dec_and_test arch_atomic64_dec_and_test
+
+static __always_inline bool arch_atomic64_inc_and_test(atomic64_t *v)
+{
+ return __atomic64_add_const_and_test_barrier(1, (long *)&v->counter);
+}
+#define arch_atomic64_inc_and_test arch_atomic64_inc_and_test
+
static __always_inline s64 arch_atomic64_xchg(atomic64_t *v, s64 new)
{
return arch_xchg(&v->counter, new);
diff --git a/arch/s390/include/asm/atomic_ops.h b/arch/s390/include/asm/atomic_ops.h
index 1d6b2056fad8..585678bbcd7a 100644
--- a/arch/s390/include/asm/atomic_ops.h
+++ b/arch/s390/include/asm/atomic_ops.h
@@ -10,50 +10,51 @@
#include <linux/limits.h>
#include <asm/march.h>
+#include <asm/asm.h>
-static __always_inline int __atomic_read(const atomic_t *v)
+static __always_inline int __atomic_read(const int *ptr)
{
- int c;
+ int val;
asm volatile(
- " l %[c],%[counter]\n"
- : [c] "=d" (c) : [counter] "R" (v->counter));
- return c;
+ " l %[val],%[ptr]\n"
+ : [val] "=d" (val) : [ptr] "R" (*ptr));
+ return val;
}
-static __always_inline void __atomic_set(atomic_t *v, int i)
+static __always_inline void __atomic_set(int *ptr, int val)
{
- if (__builtin_constant_p(i) && i >= S16_MIN && i <= S16_MAX) {
+ if (__builtin_constant_p(val) && val >= S16_MIN && val <= S16_MAX) {
asm volatile(
- " mvhi %[counter], %[i]\n"
- : [counter] "=Q" (v->counter) : [i] "K" (i));
+ " mvhi %[ptr],%[val]\n"
+ : [ptr] "=Q" (*ptr) : [val] "K" (val));
} else {
asm volatile(
- " st %[i],%[counter]\n"
- : [counter] "=R" (v->counter) : [i] "d" (i));
+ " st %[val],%[ptr]\n"
+ : [ptr] "=R" (*ptr) : [val] "d" (val));
}
}
-static __always_inline s64 __atomic64_read(const atomic64_t *v)
+static __always_inline long __atomic64_read(const long *ptr)
{
- s64 c;
+ long val;
asm volatile(
- " lg %[c],%[counter]\n"
- : [c] "=d" (c) : [counter] "RT" (v->counter));
- return c;
+ " lg %[val],%[ptr]\n"
+ : [val] "=d" (val) : [ptr] "RT" (*ptr));
+ return val;
}
-static __always_inline void __atomic64_set(atomic64_t *v, s64 i)
+static __always_inline void __atomic64_set(long *ptr, long val)
{
- if (__builtin_constant_p(i) && i >= S16_MIN && i <= S16_MAX) {
+ if (__builtin_constant_p(val) && val >= S16_MIN && val <= S16_MAX) {
asm volatile(
- " mvghi %[counter], %[i]\n"
- : [counter] "=Q" (v->counter) : [i] "K" (i));
+ " mvghi %[ptr],%[val]\n"
+ : [ptr] "=Q" (*ptr) : [val] "K" (val));
} else {
asm volatile(
- " stg %[i],%[counter]\n"
- : [counter] "=RT" (v->counter) : [i] "d" (i));
+ " stg %[val],%[ptr]\n"
+ : [ptr] "=RT" (*ptr) : [val] "d" (val));
}
}
@@ -73,7 +74,7 @@ static __always_inline op_type op_name(op_type val, op_type *ptr) \
} \
#define __ATOMIC_OPS(op_name, op_type, op_string) \
- __ATOMIC_OP(op_name, op_type, op_string, "\n") \
+ __ATOMIC_OP(op_name, op_type, op_string, "") \
__ATOMIC_OP(op_name##_barrier, op_type, op_string, "bcr 14,0\n")
__ATOMIC_OPS(__atomic_add, int, "laa")
@@ -99,7 +100,7 @@ static __always_inline void op_name(op_type val, op_type *ptr) \
}
#define __ATOMIC_CONST_OPS(op_name, op_type, op_string) \
- __ATOMIC_CONST_OP(op_name, op_type, op_string, "\n") \
+ __ATOMIC_CONST_OP(op_name, op_type, op_string, "") \
__ATOMIC_CONST_OP(op_name##_barrier, op_type, op_string, "bcr 14,0\n")
__ATOMIC_CONST_OPS(__atomic_add_const, int, "asi")
@@ -169,4 +170,76 @@ __ATOMIC64_OPS(__atomic64_xor, "xgr")
#endif /* MARCH_HAS_Z196_FEATURES */
+#if defined(MARCH_HAS_Z196_FEATURES) && defined(__HAVE_ASM_FLAG_OUTPUTS__)
+
+#define __ATOMIC_TEST_OP(op_name, op_type, op_string, op_barrier) \
+static __always_inline bool op_name(op_type val, op_type *ptr) \
+{ \
+ op_type tmp; \
+ int cc; \
+ \
+ asm volatile( \
+ op_string " %[tmp],%[val],%[ptr]\n" \
+ op_barrier \
+ : "=@cc" (cc), [tmp] "=d" (tmp), [ptr] "+QS" (*ptr) \
+ : [val] "d" (val) \
+ : "memory"); \
+ return (cc == 0) || (cc == 2); \
+} \
+
+#define __ATOMIC_TEST_OPS(op_name, op_type, op_string) \
+ __ATOMIC_TEST_OP(op_name, op_type, op_string, "") \
+ __ATOMIC_TEST_OP(op_name##_barrier, op_type, op_string, "bcr 14,0\n")
+
+__ATOMIC_TEST_OPS(__atomic_add_and_test, int, "laal")
+__ATOMIC_TEST_OPS(__atomic64_add_and_test, long, "laalg")
+
+#undef __ATOMIC_TEST_OPS
+#undef __ATOMIC_TEST_OP
+
+#define __ATOMIC_CONST_TEST_OP(op_name, op_type, op_string, op_barrier) \
+static __always_inline bool op_name(op_type val, op_type *ptr) \
+{ \
+ int cc; \
+ \
+ asm volatile( \
+ op_string " %[ptr],%[val]\n" \
+ op_barrier \
+ : "=@cc" (cc), [ptr] "+QS" (*ptr) \
+ : [val] "i" (val) \
+ : "memory"); \
+ return (cc == 0) || (cc == 2); \
+}
+
+#define __ATOMIC_CONST_TEST_OPS(op_name, op_type, op_string) \
+ __ATOMIC_CONST_TEST_OP(op_name, op_type, op_string, "") \
+ __ATOMIC_CONST_TEST_OP(op_name##_barrier, op_type, op_string, "bcr 14,0\n")
+
+__ATOMIC_CONST_TEST_OPS(__atomic_add_const_and_test, int, "alsi")
+__ATOMIC_CONST_TEST_OPS(__atomic64_add_const_and_test, long, "algsi")
+
+#undef __ATOMIC_CONST_TEST_OPS
+#undef __ATOMIC_CONST_TEST_OP
+
+#else /* defined(MARCH_HAS_Z196_FEATURES) && defined(__HAVE_ASM_FLAG_OUTPUTS__) */
+
+#define __ATOMIC_TEST_OP(op_name, op_func, op_type) \
+static __always_inline bool op_name(op_type val, op_type *ptr) \
+{ \
+ return op_func(val, ptr) == -val; \
+}
+
+__ATOMIC_TEST_OP(__atomic_add_and_test, __atomic_add, int)
+__ATOMIC_TEST_OP(__atomic_add_and_test_barrier, __atomic_add_barrier, int)
+__ATOMIC_TEST_OP(__atomic_add_const_and_test, __atomic_add, int)
+__ATOMIC_TEST_OP(__atomic_add_const_and_test_barrier, __atomic_add_barrier, int)
+__ATOMIC_TEST_OP(__atomic64_add_and_test, __atomic64_add, long)
+__ATOMIC_TEST_OP(__atomic64_add_and_test_barrier, __atomic64_add_barrier, long)
+__ATOMIC_TEST_OP(__atomic64_add_const_and_test, __atomic64_add, long)
+__ATOMIC_TEST_OP(__atomic64_add_const_and_test_barrier, __atomic64_add_barrier, long)
+
+#undef __ATOMIC_TEST_OP
+
+#endif /* defined(MARCH_HAS_Z196_FEATURES) && defined(__HAVE_ASM_FLAG_OUTPUTS__) */
+
#endif /* __ARCH_S390_ATOMIC_OPS__ */
diff --git a/arch/s390/include/asm/bitops.h b/arch/s390/include/asm/bitops.h
index 54a079cd39ed..a5ca0a947691 100644
--- a/arch/s390/include/asm/bitops.h
+++ b/arch/s390/include/asm/bitops.h
@@ -36,184 +36,45 @@
#include <linux/typecheck.h>
#include <linux/compiler.h>
#include <linux/types.h>
-#include <asm/atomic_ops.h>
-#include <asm/barrier.h>
-
-#define __BITOPS_WORDS(bits) (((bits) + BITS_PER_LONG - 1) / BITS_PER_LONG)
-
-static inline unsigned long *
-__bitops_word(unsigned long nr, const volatile unsigned long *ptr)
-{
- unsigned long addr;
-
- addr = (unsigned long)ptr + ((nr ^ (nr & (BITS_PER_LONG - 1))) >> 3);
- return (unsigned long *)addr;
-}
-
-static inline unsigned long __bitops_mask(unsigned long nr)
-{
- return 1UL << (nr & (BITS_PER_LONG - 1));
-}
-
-static __always_inline void arch_set_bit(unsigned long nr, volatile unsigned long *ptr)
-{
- unsigned long *addr = __bitops_word(nr, ptr);
- unsigned long mask = __bitops_mask(nr);
-
- __atomic64_or(mask, (long *)addr);
-}
-
-static __always_inline void arch_clear_bit(unsigned long nr, volatile unsigned long *ptr)
-{
- unsigned long *addr = __bitops_word(nr, ptr);
- unsigned long mask = __bitops_mask(nr);
-
- __atomic64_and(~mask, (long *)addr);
-}
-
-static __always_inline void arch_change_bit(unsigned long nr,
- volatile unsigned long *ptr)
-{
- unsigned long *addr = __bitops_word(nr, ptr);
- unsigned long mask = __bitops_mask(nr);
-
- __atomic64_xor(mask, (long *)addr);
-}
-
-static inline bool arch_test_and_set_bit(unsigned long nr,
- volatile unsigned long *ptr)
-{
- unsigned long *addr = __bitops_word(nr, ptr);
- unsigned long mask = __bitops_mask(nr);
- unsigned long old;
-
- old = __atomic64_or_barrier(mask, (long *)addr);
- return old & mask;
-}
-
-static inline bool arch_test_and_clear_bit(unsigned long nr,
- volatile unsigned long *ptr)
-{
- unsigned long *addr = __bitops_word(nr, ptr);
- unsigned long mask = __bitops_mask(nr);
- unsigned long old;
-
- old = __atomic64_and_barrier(~mask, (long *)addr);
- return old & mask;
-}
-
-static inline bool arch_test_and_change_bit(unsigned long nr,
- volatile unsigned long *ptr)
-{
- unsigned long *addr = __bitops_word(nr, ptr);
- unsigned long mask = __bitops_mask(nr);
- unsigned long old;
-
- old = __atomic64_xor_barrier(mask, (long *)addr);
- return old & mask;
-}
-
-static __always_inline void
-arch___set_bit(unsigned long nr, volatile unsigned long *addr)
-{
- unsigned long *p = __bitops_word(nr, addr);
- unsigned long mask = __bitops_mask(nr);
-
- *p |= mask;
-}
-
-static __always_inline void
-arch___clear_bit(unsigned long nr, volatile unsigned long *addr)
-{
- unsigned long *p = __bitops_word(nr, addr);
- unsigned long mask = __bitops_mask(nr);
-
- *p &= ~mask;
-}
-
-static __always_inline void
-arch___change_bit(unsigned long nr, volatile unsigned long *addr)
-{
- unsigned long *p = __bitops_word(nr, addr);
- unsigned long mask = __bitops_mask(nr);
-
- *p ^= mask;
-}
-
-static __always_inline bool
-arch___test_and_set_bit(unsigned long nr, volatile unsigned long *addr)
-{
- unsigned long *p = __bitops_word(nr, addr);
- unsigned long mask = __bitops_mask(nr);
- unsigned long old;
-
- old = *p;
- *p |= mask;
- return old & mask;
-}
-
-static __always_inline bool
-arch___test_and_clear_bit(unsigned long nr, volatile unsigned long *addr)
-{
- unsigned long *p = __bitops_word(nr, addr);
- unsigned long mask = __bitops_mask(nr);
- unsigned long old;
-
- old = *p;
- *p &= ~mask;
- return old & mask;
-}
-
-static __always_inline bool
-arch___test_and_change_bit(unsigned long nr, volatile unsigned long *addr)
-{
- unsigned long *p = __bitops_word(nr, addr);
- unsigned long mask = __bitops_mask(nr);
- unsigned long old;
-
- old = *p;
- *p ^= mask;
- return old & mask;
-}
-
-#define arch_test_bit generic_test_bit
-#define arch_test_bit_acquire generic_test_bit_acquire
-
-static inline bool arch_test_and_set_bit_lock(unsigned long nr,
- volatile unsigned long *ptr)
-{
- if (arch_test_bit(nr, ptr))
- return true;
- return arch_test_and_set_bit(nr, ptr);
-}
-
-static inline void arch_clear_bit_unlock(unsigned long nr,
- volatile unsigned long *ptr)
-{
- smp_mb__before_atomic();
- arch_clear_bit(nr, ptr);
-}
-
-static inline void arch___clear_bit_unlock(unsigned long nr,
- volatile unsigned long *ptr)
-{
- smp_mb();
- arch___clear_bit(nr, ptr);
-}
-
-static inline bool arch_xor_unlock_is_negative_byte(unsigned long mask,
- volatile unsigned long *ptr)
-{
- unsigned long old;
-
- old = __atomic64_xor_barrier(mask, (long *)ptr);
- return old & BIT(7);
+#include <asm/asm.h>
+
+#define arch___set_bit generic___set_bit
+#define arch___clear_bit generic___clear_bit
+#define arch___change_bit generic___change_bit
+#define arch___test_and_set_bit generic___test_and_set_bit
+#define arch___test_and_clear_bit generic___test_and_clear_bit
+#define arch___test_and_change_bit generic___test_and_change_bit
+#define arch_test_bit_acquire generic_test_bit_acquire
+
+static __always_inline bool arch_test_bit(unsigned long nr, const volatile unsigned long *ptr)
+{
+#ifdef __HAVE_ASM_FLAG_OUTPUTS__
+ const volatile unsigned char *addr;
+ unsigned long mask;
+ int cc;
+
+ /*
+ * With CONFIG_PROFILE_ALL_BRANCHES enabled gcc fails to
+ * handle __builtin_constant_p() in some cases.
+ */
+ if (!IS_ENABLED(CONFIG_PROFILE_ALL_BRANCHES) && __builtin_constant_p(nr)) {
+ addr = (const volatile unsigned char *)ptr;
+ addr += (nr ^ (BITS_PER_LONG - BITS_PER_BYTE)) / BITS_PER_BYTE;
+ mask = 1UL << (nr & (BITS_PER_BYTE - 1));
+ asm volatile(
+ " tm %[addr],%[mask]\n"
+ : "=@cc" (cc)
+ : [addr] "Q" (*addr), [mask] "I" (mask)
+ );
+ return cc == 3;
+ }
+#endif
+ return generic_test_bit(nr, ptr);
}
-#define arch_xor_unlock_is_negative_byte arch_xor_unlock_is_negative_byte
-#include <asm-generic/bitops/instrumented-atomic.h>
-#include <asm-generic/bitops/instrumented-non-atomic.h>
-#include <asm-generic/bitops/instrumented-lock.h>
+#include <asm-generic/bitops/atomic.h>
+#include <asm-generic/bitops/non-instrumented-non-atomic.h>
+#include <asm-generic/bitops/lock.h>
/*
* Functions which use MSB0 bit numbering.
diff --git a/arch/s390/include/asm/boot_data.h b/arch/s390/include/asm/boot_data.h
index f7eed27b3220..f55f8227058e 100644
--- a/arch/s390/include/asm/boot_data.h
+++ b/arch/s390/include/asm/boot_data.h
@@ -1,6 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _ASM_S390_BOOT_DATA_H
+#include <linux/string.h>
#include <asm/setup.h>
#include <asm/ipl.h>
@@ -15,4 +16,54 @@ extern unsigned long ipl_cert_list_size;
extern unsigned long early_ipl_comp_list_addr;
extern unsigned long early_ipl_comp_list_size;
+extern char boot_rb[PAGE_SIZE * 2];
+extern bool boot_earlyprintk;
+extern size_t boot_rb_off;
+extern char bootdebug_filter[128];
+extern bool bootdebug;
+
+#define boot_rb_foreach(cb) \
+ do { \
+ size_t off = boot_rb_off + strlen(boot_rb + boot_rb_off) + 1; \
+ size_t len; \
+ for (; off < sizeof(boot_rb) && (len = strlen(boot_rb + off)); off += len + 1) \
+ cb(boot_rb + off); \
+ for (off = 0; off < boot_rb_off && (len = strlen(boot_rb + off)); off += len + 1) \
+ cb(boot_rb + off); \
+ } while (0)
+
+/*
+ * bootdebug_filter is a comma separated list of strings,
+ * where each string can be a prefix of the message.
+ */
+static inline bool bootdebug_filter_match(const char *buf)
+{
+ char *p = bootdebug_filter, *s;
+ char *end;
+
+ if (!*p)
+ return true;
+
+ end = p + strlen(p);
+ while (p < end) {
+ p = skip_spaces(p);
+ s = memscan(p, ',', end - p);
+ if (!strncmp(p, buf, s - p))
+ return true;
+ p = s + 1;
+ }
+ return false;
+}
+
+static inline const char *skip_timestamp(const char *buf)
+{
+#ifdef CONFIG_PRINTK_TIME
+ const char *p = memchr(buf, ']', strlen(buf));
+
+ if (p && p[1] == ' ')
+ return p + 2;
+#endif
+ return buf;
+}
+
#endif /* _ASM_S390_BOOT_DATA_H */
diff --git a/arch/s390/include/asm/checksum.h b/arch/s390/include/asm/checksum.h
index 46f5c9660616..d86dea5900e7 100644
--- a/arch/s390/include/asm/checksum.h
+++ b/arch/s390/include/asm/checksum.h
@@ -25,7 +25,7 @@ static inline __wsum cksm(const void *buff, int len, __wsum sum)
instrument_read(buff, len);
kmsan_check_memory(buff, len);
- asm volatile("\n"
+ asm volatile(
"0: cksm %[sum],%[rp]\n"
" jo 0b\n"
: [sum] "+&d" (sum), [rp] "+&d" (rp.pair) : : "cc", "memory");
diff --git a/arch/s390/include/asm/css_chars.h b/arch/s390/include/asm/css_chars.h
index 638137d46c85..a03f64033760 100644
--- a/arch/s390/include/asm/css_chars.h
+++ b/arch/s390/include/asm/css_chars.h
@@ -25,7 +25,7 @@ struct css_general_char {
u64 : 2;
u64 : 3;
- u64 aif_osa : 1; /* bit 67 */
+ u64 aif_qdio : 1;/* bit 67 */
u64 : 12;
u64 eadm_rf : 1; /* bit 80 */
u64 : 1;
diff --git a/arch/s390/include/asm/debug.h b/arch/s390/include/asm/debug.h
index a7f7bdc9e19c..6375276d94ea 100644
--- a/arch/s390/include/asm/debug.h
+++ b/arch/s390/include/asm/debug.h
@@ -85,6 +85,10 @@ int debug_dflt_header_fn(debug_info_t *id, struct debug_view *view,
int area, debug_entry_t *entry,
char *out_buf, size_t out_buf_size);
+#define DEBUG_SPRINTF_MAX_ARGS 10
+int debug_sprintf_format_fn(debug_info_t *id, struct debug_view *view,
+ char *out_buf, size_t out_buf_size,
+ const char *inbuf);
struct debug_view {
char name[DEBUG_MAX_NAME_LEN];
debug_prolog_proc_t *prolog_proc;
@@ -114,6 +118,9 @@ debug_info_t *debug_register_mode(const char *name, int pages, int nr_areas,
int buf_size, umode_t mode, uid_t uid,
gid_t gid);
+ssize_t debug_dump(debug_info_t *id, struct debug_view *view,
+ char *buf, size_t buf_size, bool reverse);
+
void debug_unregister(debug_info_t *id);
void debug_set_level(debug_info_t *id, int new_level);
diff --git a/arch/s390/include/asm/diag.h b/arch/s390/include/asm/diag.h
index e1316e181230..5790630e31f0 100644
--- a/arch/s390/include/asm/diag.h
+++ b/arch/s390/include/asm/diag.h
@@ -36,8 +36,10 @@ enum diag_stat_enum {
DIAG_STAT_X2FC,
DIAG_STAT_X304,
DIAG_STAT_X308,
+ DIAG_STAT_X310,
DIAG_STAT_X318,
DIAG_STAT_X320,
+ DIAG_STAT_X324,
DIAG_STAT_X49C,
DIAG_STAT_X500,
NR_DIAG_STAT
diff --git a/arch/s390/include/asm/ebcdic.h b/arch/s390/include/asm/ebcdic.h
index efb50fc6866c..7164cb658435 100644
--- a/arch/s390/include/asm/ebcdic.h
+++ b/arch/s390/include/asm/ebcdic.h
@@ -22,18 +22,18 @@ extern __u8 _ebc_toupper[256]; /* EBCDIC -> uppercase */
static inline void
codepage_convert(const __u8 *codepage, volatile char *addr, unsigned long nr)
{
- if (nr-- <= 0)
+ if (!nr--)
return;
asm volatile(
- " bras 1,1f\n"
- " tr 0(1,%0),0(%2)\n"
- "0: tr 0(256,%0),0(%2)\n"
+ " j 2f\n"
+ "0: tr 0(1,%0),0(%2)\n"
+ "1: tr 0(256,%0),0(%2)\n"
" la %0,256(%0)\n"
- "1: ahi %1,-256\n"
- " jnm 0b\n"
- " ex %1,0(1)"
+ "2: aghi %1,-256\n"
+ " jnm 1b\n"
+ " exrl %1,0b"
: "+&a" (addr), "+&a" (nr)
- : "a" (codepage) : "cc", "memory", "1");
+ : "a" (codepage) : "cc", "memory");
}
#define ASCEBC(addr,nr) codepage_convert(_ascebc, addr, nr)
diff --git a/arch/s390/include/asm/fprobe.h b/arch/s390/include/asm/fprobe.h
new file mode 100644
index 000000000000..5ef600b372f4
--- /dev/null
+++ b/arch/s390/include/asm/fprobe.h
@@ -0,0 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_S390_FPROBE_H
+#define _ASM_S390_FPROBE_H
+
+#include <asm-generic/fprobe.h>
+
+#undef FPROBE_HEADER_MSB_PATTERN
+#define FPROBE_HEADER_MSB_PATTERN 0
+
+#endif /* _ASM_S390_FPROBE_H */
diff --git a/arch/s390/include/asm/fpu-insn.h b/arch/s390/include/asm/fpu-insn.h
index c1e2e521d9af..f668bffd6dd3 100644
--- a/arch/s390/include/asm/fpu-insn.h
+++ b/arch/s390/include/asm/fpu-insn.h
@@ -100,19 +100,12 @@ static __always_inline void fpu_lfpc(unsigned int *fpc)
*/
static inline void fpu_lfpc_safe(unsigned int *fpc)
{
- u32 tmp;
-
instrument_read(fpc, sizeof(*fpc));
- asm volatile("\n"
- "0: lfpc %[fpc]\n"
- "1: nopr %%r7\n"
- ".pushsection .fixup, \"ax\"\n"
- "2: lghi %[tmp],0\n"
- " sfpc %[tmp]\n"
- " jg 1b\n"
- ".popsection\n"
- EX_TABLE(1b, 2b)
- : [tmp] "=d" (tmp)
+ asm_inline volatile(
+ " lfpc %[fpc]\n"
+ "0: nopr %%r7\n"
+ EX_TABLE_FPC(0b, 0b)
+ :
: [fpc] "Q" (*fpc)
: "memory");
}
@@ -183,33 +176,33 @@ static __always_inline void fpu_vgfmg(u8 v1, u8 v2, u8 v3)
: "memory");
}
-#ifdef CONFIG_CC_IS_CLANG
+#ifdef CONFIG_CC_HAS_ASM_AOR_FORMAT_FLAGS
static __always_inline void fpu_vl(u8 v1, const void *vxr)
{
instrument_read(vxr, sizeof(__vector128));
- asm volatile("\n"
- " la 1,%[vxr]\n"
- " VL %[v1],0,,1\n"
- :
- : [vxr] "R" (*(__vector128 *)vxr),
- [v1] "I" (v1)
- : "memory", "1");
+ asm volatile("VL %[v1],%O[vxr],,%R[vxr]\n"
+ :
+ : [vxr] "Q" (*(__vector128 *)vxr),
+ [v1] "I" (v1)
+ : "memory");
}
-#else /* CONFIG_CC_IS_CLANG */
+#else /* CONFIG_CC_HAS_ASM_AOR_FORMAT_FLAGS */
static __always_inline void fpu_vl(u8 v1, const void *vxr)
{
instrument_read(vxr, sizeof(__vector128));
- asm volatile("VL %[v1],%O[vxr],,%R[vxr]\n"
- :
- : [vxr] "Q" (*(__vector128 *)vxr),
- [v1] "I" (v1)
- : "memory");
+ asm volatile(
+ " la 1,%[vxr]\n"
+ " VL %[v1],0,,1\n"
+ :
+ : [vxr] "R" (*(__vector128 *)vxr),
+ [v1] "I" (v1)
+ : "memory", "1");
}
-#endif /* CONFIG_CC_IS_CLANG */
+#endif /* CONFIG_CC_HAS_ASM_AOR_FORMAT_FLAGS */
static __always_inline void fpu_vleib(u8 v, s16 val, u8 index)
{
@@ -238,7 +231,7 @@ static __always_inline u64 fpu_vlgvf(u8 v, u16 index)
return val;
}
-#ifdef CONFIG_CC_IS_CLANG
+#ifdef CONFIG_CC_HAS_ASM_AOR_FORMAT_FLAGS
static __always_inline void fpu_vll(u8 v1, u32 index, const void *vxr)
{
@@ -246,17 +239,15 @@ static __always_inline void fpu_vll(u8 v1, u32 index, const void *vxr)
size = min(index + 1, sizeof(__vector128));
instrument_read(vxr, size);
- asm volatile("\n"
- " la 1,%[vxr]\n"
- " VLL %[v1],%[index],0,1\n"
- :
- : [vxr] "R" (*(u8 *)vxr),
- [index] "d" (index),
- [v1] "I" (v1)
- : "memory", "1");
+ asm volatile("VLL %[v1],%[index],%O[vxr],%R[vxr]\n"
+ :
+ : [vxr] "Q" (*(u8 *)vxr),
+ [index] "d" (index),
+ [v1] "I" (v1)
+ : "memory");
}
-#else /* CONFIG_CC_IS_CLANG */
+#else /* CONFIG_CC_HAS_ASM_AOR_FORMAT_FLAGS */
static __always_inline void fpu_vll(u8 v1, u32 index, const void *vxr)
{
@@ -264,17 +255,19 @@ static __always_inline void fpu_vll(u8 v1, u32 index, const void *vxr)
size = min(index + 1, sizeof(__vector128));
instrument_read(vxr, size);
- asm volatile("VLL %[v1],%[index],%O[vxr],%R[vxr]\n"
- :
- : [vxr] "Q" (*(u8 *)vxr),
- [index] "d" (index),
- [v1] "I" (v1)
- : "memory");
+ asm volatile(
+ " la 1,%[vxr]\n"
+ " VLL %[v1],%[index],0,1\n"
+ :
+ : [vxr] "R" (*(u8 *)vxr),
+ [index] "d" (index),
+ [v1] "I" (v1)
+ : "memory", "1");
}
-#endif /* CONFIG_CC_IS_CLANG */
+#endif /* CONFIG_CC_HAS_ASM_AOR_FORMAT_FLAGS */
-#ifdef CONFIG_CC_IS_CLANG
+#ifdef CONFIG_CC_HAS_ASM_AOR_FORMAT_FLAGS
#define fpu_vlm(_v1, _v3, _vxrs) \
({ \
@@ -284,17 +277,15 @@ static __always_inline void fpu_vll(u8 v1, u32 index, const void *vxr)
} *_v = (void *)(_vxrs); \
\
instrument_read(_v, size); \
- asm volatile("\n" \
- " la 1,%[vxrs]\n" \
- " VLM %[v1],%[v3],0,1\n" \
- : \
- : [vxrs] "R" (*_v), \
- [v1] "I" (_v1), [v3] "I" (_v3) \
- : "memory", "1"); \
+ asm volatile("VLM %[v1],%[v3],%O[vxrs],%R[vxrs]\n" \
+ : \
+ : [vxrs] "Q" (*_v), \
+ [v1] "I" (_v1), [v3] "I" (_v3) \
+ : "memory"); \
(_v3) - (_v1) + 1; \
})
-#else /* CONFIG_CC_IS_CLANG */
+#else /* CONFIG_CC_HAS_ASM_AOR_FORMAT_FLAGS */
#define fpu_vlm(_v1, _v3, _vxrs) \
({ \
@@ -304,15 +295,17 @@ static __always_inline void fpu_vll(u8 v1, u32 index, const void *vxr)
} *_v = (void *)(_vxrs); \
\
instrument_read(_v, size); \
- asm volatile("VLM %[v1],%[v3],%O[vxrs],%R[vxrs]\n" \
- : \
- : [vxrs] "Q" (*_v), \
- [v1] "I" (_v1), [v3] "I" (_v3) \
- : "memory"); \
+ asm volatile( \
+ " la 1,%[vxrs]\n" \
+ " VLM %[v1],%[v3],0,1\n" \
+ : \
+ : [vxrs] "R" (*_v), \
+ [v1] "I" (_v1), [v3] "I" (_v3) \
+ : "memory", "1"); \
(_v3) - (_v1) + 1; \
})
-#endif /* CONFIG_CC_IS_CLANG */
+#endif /* CONFIG_CC_HAS_ASM_AOR_FORMAT_FLAGS */
static __always_inline void fpu_vlr(u8 v1, u8 v2)
{
@@ -362,33 +355,33 @@ static __always_inline void fpu_vsrlb(u8 v1, u8 v2, u8 v3)
: "memory");
}
-#ifdef CONFIG_CC_IS_CLANG
+#ifdef CONFIG_CC_HAS_ASM_AOR_FORMAT_FLAGS
static __always_inline void fpu_vst(u8 v1, const void *vxr)
{
instrument_write(vxr, sizeof(__vector128));
- asm volatile("\n"
- " la 1,%[vxr]\n"
- " VST %[v1],0,,1\n"
- : [vxr] "=R" (*(__vector128 *)vxr)
- : [v1] "I" (v1)
- : "memory", "1");
+ asm volatile("VST %[v1],%O[vxr],,%R[vxr]\n"
+ : [vxr] "=Q" (*(__vector128 *)vxr)
+ : [v1] "I" (v1)
+ : "memory");
}
-#else /* CONFIG_CC_IS_CLANG */
+#else /* CONFIG_CC_HAS_ASM_AOR_FORMAT_FLAGS */
static __always_inline void fpu_vst(u8 v1, const void *vxr)
{
instrument_write(vxr, sizeof(__vector128));
- asm volatile("VST %[v1],%O[vxr],,%R[vxr]\n"
- : [vxr] "=Q" (*(__vector128 *)vxr)
- : [v1] "I" (v1)
- : "memory");
+ asm volatile(
+ " la 1,%[vxr]\n"
+ " VST %[v1],0,,1\n"
+ : [vxr] "=R" (*(__vector128 *)vxr)
+ : [v1] "I" (v1)
+ : "memory", "1");
}
-#endif /* CONFIG_CC_IS_CLANG */
+#endif /* CONFIG_CC_HAS_ASM_AOR_FORMAT_FLAGS */
-#ifdef CONFIG_CC_IS_CLANG
+#ifdef CONFIG_CC_HAS_ASM_AOR_FORMAT_FLAGS
static __always_inline void fpu_vstl(u8 v1, u32 index, const void *vxr)
{
@@ -396,15 +389,13 @@ static __always_inline void fpu_vstl(u8 v1, u32 index, const void *vxr)
size = min(index + 1, sizeof(__vector128));
instrument_write(vxr, size);
- asm volatile("\n"
- " la 1,%[vxr]\n"
- " VSTL %[v1],%[index],0,1\n"
- : [vxr] "=R" (*(u8 *)vxr)
- : [index] "d" (index), [v1] "I" (v1)
- : "memory", "1");
+ asm volatile("VSTL %[v1],%[index],%O[vxr],%R[vxr]\n"
+ : [vxr] "=Q" (*(u8 *)vxr)
+ : [index] "d" (index), [v1] "I" (v1)
+ : "memory");
}
-#else /* CONFIG_CC_IS_CLANG */
+#else /* CONFIG_CC_HAS_ASM_AOR_FORMAT_FLAGS */
static __always_inline void fpu_vstl(u8 v1, u32 index, const void *vxr)
{
@@ -412,15 +403,17 @@ static __always_inline void fpu_vstl(u8 v1, u32 index, const void *vxr)
size = min(index + 1, sizeof(__vector128));
instrument_write(vxr, size);
- asm volatile("VSTL %[v1],%[index],%O[vxr],%R[vxr]\n"
- : [vxr] "=Q" (*(u8 *)vxr)
- : [index] "d" (index), [v1] "I" (v1)
- : "memory");
+ asm volatile(
+ " la 1,%[vxr]\n"
+ " VSTL %[v1],%[index],0,1\n"
+ : [vxr] "=R" (*(u8 *)vxr)
+ : [index] "d" (index), [v1] "I" (v1)
+ : "memory", "1");
}
-#endif /* CONFIG_CC_IS_CLANG */
+#endif /* CONFIG_CC_HAS_ASM_AOR_FORMAT_FLAGS */
-#ifdef CONFIG_CC_IS_CLANG
+#ifdef CONFIG_CC_HAS_ASM_AOR_FORMAT_FLAGS
#define fpu_vstm(_v1, _v3, _vxrs) \
({ \
@@ -430,16 +423,14 @@ static __always_inline void fpu_vstl(u8 v1, u32 index, const void *vxr)
} *_v = (void *)(_vxrs); \
\
instrument_write(_v, size); \
- asm volatile("\n" \
- " la 1,%[vxrs]\n" \
- " VSTM %[v1],%[v3],0,1\n" \
- : [vxrs] "=R" (*_v) \
- : [v1] "I" (_v1), [v3] "I" (_v3) \
- : "memory", "1"); \
+ asm volatile("VSTM %[v1],%[v3],%O[vxrs],%R[vxrs]\n" \
+ : [vxrs] "=Q" (*_v) \
+ : [v1] "I" (_v1), [v3] "I" (_v3) \
+ : "memory"); \
(_v3) - (_v1) + 1; \
})
-#else /* CONFIG_CC_IS_CLANG */
+#else /* CONFIG_CC_HAS_ASM_AOR_FORMAT_FLAGS */
#define fpu_vstm(_v1, _v3, _vxrs) \
({ \
@@ -449,14 +440,16 @@ static __always_inline void fpu_vstl(u8 v1, u32 index, const void *vxr)
} *_v = (void *)(_vxrs); \
\
instrument_write(_v, size); \
- asm volatile("VSTM %[v1],%[v3],%O[vxrs],%R[vxrs]\n" \
- : [vxrs] "=Q" (*_v) \
- : [v1] "I" (_v1), [v3] "I" (_v3) \
- : "memory"); \
+ asm volatile( \
+ " la 1,%[vxrs]\n" \
+ " VSTM %[v1],%[v3],0,1\n" \
+ : [vxrs] "=R" (*_v) \
+ : [v1] "I" (_v1), [v3] "I" (_v3) \
+ : "memory", "1"); \
(_v3) - (_v1) + 1; \
})
-#endif /* CONFIG_CC_IS_CLANG */
+#endif /* CONFIG_CC_HAS_ASM_AOR_FORMAT_FLAGS */
static __always_inline void fpu_vupllf(u8 v1, u8 v2)
{
diff --git a/arch/s390/include/asm/ftrace.h b/arch/s390/include/asm/ftrace.h
index fc97d75dc752..185331e91f83 100644
--- a/arch/s390/include/asm/ftrace.h
+++ b/arch/s390/include/asm/ftrace.h
@@ -39,6 +39,7 @@ struct dyn_arch_ftrace { };
struct module;
struct dyn_ftrace;
+struct ftrace_ops;
bool ftrace_need_init_nop(void);
#define ftrace_need_init_nop ftrace_need_init_nop
@@ -50,6 +51,7 @@ static inline unsigned long ftrace_call_adjust(unsigned long addr)
{
return addr;
}
+#define ftrace_get_symaddr(fentry_ip) ((unsigned long)(fentry_ip))
#include <linux/ftrace_regs.h>
@@ -62,30 +64,32 @@ static __always_inline struct pt_regs *arch_ftrace_get_regs(struct ftrace_regs *
return NULL;
}
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
-struct fgraph_ret_regs {
- unsigned long gpr2;
- unsigned long fp;
-};
-
-static __always_inline unsigned long fgraph_ret_regs_return_value(struct fgraph_ret_regs *ret_regs)
+static __always_inline void
+ftrace_regs_set_instruction_pointer(struct ftrace_regs *fregs,
+ unsigned long ip)
{
- return ret_regs->gpr2;
+ arch_ftrace_regs(fregs)->regs.psw.addr = ip;
}
-static __always_inline unsigned long fgraph_ret_regs_frame_pointer(struct fgraph_ret_regs *ret_regs)
+#undef ftrace_regs_get_frame_pointer
+static __always_inline unsigned long
+ftrace_regs_get_frame_pointer(struct ftrace_regs *fregs)
{
- return ret_regs->fp;
+ return ftrace_regs_get_stack_pointer(fregs);
}
-#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
-static __always_inline void
-ftrace_regs_set_instruction_pointer(struct ftrace_regs *fregs,
- unsigned long ip)
+static __always_inline unsigned long
+ftrace_regs_get_return_address(const struct ftrace_regs *fregs)
{
- arch_ftrace_regs(fregs)->regs.psw.addr = ip;
+ return arch_ftrace_regs(fregs)->regs.gprs[14];
}
+#define arch_ftrace_fill_perf_regs(fregs, _regs) do { \
+ (_regs)->psw.mask = 0; \
+ (_regs)->psw.addr = arch_ftrace_regs(fregs)->regs.psw.addr; \
+ (_regs)->gprs[15] = arch_ftrace_regs(fregs)->regs.gprs[15]; \
+ } while (0)
+
#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
/*
* When an ftrace registered caller is tracing a function that is
@@ -126,6 +130,10 @@ static inline bool arch_syscall_match_sym_name(const char *sym,
return !strcmp(sym + 7, name) || !strcmp(sym + 8, name);
}
+void ftrace_graph_func(unsigned long ip, unsigned long parent_ip,
+ struct ftrace_ops *op, struct ftrace_regs *fregs);
+#define ftrace_graph_func ftrace_graph_func
+
#endif /* __ASSEMBLY__ */
#ifdef CONFIG_FUNCTION_TRACER
diff --git a/arch/s390/include/asm/futex.h b/arch/s390/include/asm/futex.h
index eaeaeb3ff0be..f5781794356b 100644
--- a/arch/s390/include/asm/futex.h
+++ b/arch/s390/include/asm/futex.h
@@ -2,80 +2,95 @@
#ifndef _ASM_S390_FUTEX_H
#define _ASM_S390_FUTEX_H
+#include <linux/instrumented.h>
#include <linux/uaccess.h>
#include <linux/futex.h>
#include <asm/asm-extable.h>
#include <asm/mmu_context.h>
#include <asm/errno.h>
-#define __futex_atomic_op(insn, ret, oldval, newval, uaddr, oparg) \
- asm volatile( \
- " sacf 256\n" \
- "0: l %1,0(%6)\n" \
- "1:"insn \
- "2: cs %1,%2,0(%6)\n" \
- "3: jl 1b\n" \
- " lhi %0,0\n" \
- "4: sacf 768\n" \
- EX_TABLE(0b,4b) EX_TABLE(1b,4b) \
- EX_TABLE(2b,4b) EX_TABLE(3b,4b) \
- : "=d" (ret), "=&d" (oldval), "=&d" (newval), \
- "=m" (*uaddr) \
- : "0" (-EFAULT), "d" (oparg), "a" (uaddr), \
- "m" (*uaddr) : "cc");
+#define FUTEX_OP_FUNC(name, insn) \
+static uaccess_kmsan_or_inline int \
+__futex_atomic_##name(int oparg, int *old, u32 __user *uaddr) \
+{ \
+ int rc, new; \
+ \
+ instrument_copy_from_user_before(old, uaddr, sizeof(*old)); \
+ asm_inline volatile( \
+ " sacf 256\n" \
+ "0: l %[old],%[uaddr]\n" \
+ "1:"insn \
+ "2: cs %[old],%[new],%[uaddr]\n" \
+ "3: jl 1b\n" \
+ " lhi %[rc],0\n" \
+ "4: sacf 768\n" \
+ EX_TABLE_UA_FAULT(0b, 4b, %[rc]) \
+ EX_TABLE_UA_FAULT(1b, 4b, %[rc]) \
+ EX_TABLE_UA_FAULT(2b, 4b, %[rc]) \
+ EX_TABLE_UA_FAULT(3b, 4b, %[rc]) \
+ : [rc] "=d" (rc), [old] "=&d" (*old), \
+ [new] "=&d" (new), [uaddr] "+Q" (*uaddr) \
+ : [oparg] "d" (oparg) \
+ : "cc"); \
+ if (!rc) \
+ instrument_copy_from_user_after(old, uaddr, sizeof(*old), 0); \
+ return rc; \
+}
+
+FUTEX_OP_FUNC(set, "lr %[new],%[oparg]\n")
+FUTEX_OP_FUNC(add, "lr %[new],%[old]\n ar %[new],%[oparg]\n")
+FUTEX_OP_FUNC(or, "lr %[new],%[old]\n or %[new],%[oparg]\n")
+FUTEX_OP_FUNC(and, "lr %[new],%[old]\n nr %[new],%[oparg]\n")
+FUTEX_OP_FUNC(xor, "lr %[new],%[old]\n xr %[new],%[oparg]\n")
-static inline int arch_futex_atomic_op_inuser(int op, int oparg, int *oval,
- u32 __user *uaddr)
+static inline
+int arch_futex_atomic_op_inuser(int op, int oparg, int *oval, u32 __user *uaddr)
{
- int oldval = 0, newval, ret;
+ int old, rc;
switch (op) {
case FUTEX_OP_SET:
- __futex_atomic_op("lr %2,%5\n",
- ret, oldval, newval, uaddr, oparg);
+ rc = __futex_atomic_set(oparg, &old, uaddr);
break;
case FUTEX_OP_ADD:
- __futex_atomic_op("lr %2,%1\nar %2,%5\n",
- ret, oldval, newval, uaddr, oparg);
+ rc = __futex_atomic_add(oparg, &old, uaddr);
break;
case FUTEX_OP_OR:
- __futex_atomic_op("lr %2,%1\nor %2,%5\n",
- ret, oldval, newval, uaddr, oparg);
+ rc = __futex_atomic_or(oparg, &old, uaddr);
break;
case FUTEX_OP_ANDN:
- __futex_atomic_op("lr %2,%1\nnr %2,%5\n",
- ret, oldval, newval, uaddr, oparg);
+ rc = __futex_atomic_and(~oparg, &old, uaddr);
break;
case FUTEX_OP_XOR:
- __futex_atomic_op("lr %2,%1\nxr %2,%5\n",
- ret, oldval, newval, uaddr, oparg);
+ rc = __futex_atomic_xor(oparg, &old, uaddr);
break;
default:
- ret = -ENOSYS;
+ rc = -ENOSYS;
}
-
- if (!ret)
- *oval = oldval;
-
- return ret;
+ if (!rc)
+ *oval = old;
+ return rc;
}
-static inline int futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
- u32 oldval, u32 newval)
+static uaccess_kmsan_or_inline
+int futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, u32 oldval, u32 newval)
{
- int ret;
+ int rc;
- asm volatile(
- " sacf 256\n"
- "0: cs %1,%4,0(%5)\n"
- "1: la %0,0\n"
- "2: sacf 768\n"
- EX_TABLE(0b,2b) EX_TABLE(1b,2b)
- : "=d" (ret), "+d" (oldval), "=m" (*uaddr)
- : "0" (-EFAULT), "d" (newval), "a" (uaddr), "m" (*uaddr)
+ instrument_copy_from_user_before(uval, uaddr, sizeof(*uval));
+ asm_inline volatile(
+ " sacf 256\n"
+ "0: cs %[old],%[new],%[uaddr]\n"
+ "1: lhi %[rc],0\n"
+ "2: sacf 768\n"
+ EX_TABLE_UA_FAULT(0b, 2b, %[rc])
+ EX_TABLE_UA_FAULT(1b, 2b, %[rc])
+ : [rc] "=d" (rc), [old] "+d" (oldval), [uaddr] "+Q" (*uaddr)
+ : [new] "d" (newval)
: "cc", "memory");
*uval = oldval;
- return ret;
+ instrument_copy_from_user_after(uval, uaddr, sizeof(*uval), 0);
+ return rc;
}
#endif /* _ASM_S390_FUTEX_H */
diff --git a/arch/s390/include/asm/gmap.h b/arch/s390/include/asm/gmap.h
index 13f51a6a5bb1..4e73ef46d4b2 100644
--- a/arch/s390/include/asm/gmap.h
+++ b/arch/s390/include/asm/gmap.h
@@ -23,7 +23,6 @@
/**
* struct gmap_struct - guest address space
* @list: list head for the mm->context gmap list
- * @crst_list: list of all crst tables used in the guest address space
* @mm: pointer to the parent mm_struct
* @guest_to_host: radix tree with guest to host address translation
* @host_to_guest: radix tree with pointer to segment table entries
@@ -35,7 +34,6 @@
* @guest_handle: protected virtual machine handle for the ultravisor
* @host_to_rmap: radix tree with gmap_rmap lists
* @children: list of shadow gmap structures
- * @pt_list: list of all page tables used in the shadow guest address space
* @shadow_lock: spinlock to protect the shadow gmap list
* @parent: pointer to the parent gmap for shadow guest address spaces
* @orig_asce: ASCE for which the shadow page table has been created
@@ -45,7 +43,6 @@
*/
struct gmap {
struct list_head list;
- struct list_head crst_list;
struct mm_struct *mm;
struct radix_tree_root guest_to_host;
struct radix_tree_root host_to_guest;
@@ -61,7 +58,6 @@ struct gmap {
/* Additional data for shadow guest address spaces */
struct radix_tree_root host_to_rmap;
struct list_head children;
- struct list_head pt_list;
spinlock_t shadow_lock;
struct gmap *parent;
unsigned long orig_asce;
@@ -106,23 +102,21 @@ struct gmap *gmap_create(struct mm_struct *mm, unsigned long limit);
void gmap_remove(struct gmap *gmap);
struct gmap *gmap_get(struct gmap *gmap);
void gmap_put(struct gmap *gmap);
+void gmap_free(struct gmap *gmap);
+struct gmap *gmap_alloc(unsigned long limit);
int gmap_map_segment(struct gmap *gmap, unsigned long from,
unsigned long to, unsigned long len);
int gmap_unmap_segment(struct gmap *gmap, unsigned long to, unsigned long len);
unsigned long __gmap_translate(struct gmap *, unsigned long gaddr);
-unsigned long gmap_translate(struct gmap *, unsigned long gaddr);
int __gmap_link(struct gmap *gmap, unsigned long gaddr, unsigned long vmaddr);
-int gmap_fault(struct gmap *, unsigned long gaddr, unsigned int fault_flags);
void gmap_discard(struct gmap *, unsigned long from, unsigned long to);
void __gmap_zap(struct gmap *, unsigned long gaddr);
void gmap_unlink(struct mm_struct *, unsigned long *table, unsigned long vmaddr);
int gmap_read_table(struct gmap *gmap, unsigned long gaddr, unsigned long *val);
-struct gmap *gmap_shadow(struct gmap *parent, unsigned long asce,
- int edat_level);
-int gmap_shadow_valid(struct gmap *sg, unsigned long asce, int edat_level);
+void gmap_unshadow(struct gmap *sg);
int gmap_shadow_r2t(struct gmap *sg, unsigned long saddr, unsigned long r2t,
int fake);
int gmap_shadow_r3t(struct gmap *sg, unsigned long saddr, unsigned long r3t,
@@ -131,24 +125,22 @@ int gmap_shadow_sgt(struct gmap *sg, unsigned long saddr, unsigned long sgt,
int fake);
int gmap_shadow_pgt(struct gmap *sg, unsigned long saddr, unsigned long pgt,
int fake);
-int gmap_shadow_pgt_lookup(struct gmap *sg, unsigned long saddr,
- unsigned long *pgt, int *dat_protection, int *fake);
int gmap_shadow_page(struct gmap *sg, unsigned long saddr, pte_t pte);
void gmap_register_pte_notifier(struct gmap_notifier *);
void gmap_unregister_pte_notifier(struct gmap_notifier *);
-int gmap_mprotect_notify(struct gmap *, unsigned long start,
- unsigned long len, int prot);
+int gmap_protect_one(struct gmap *gmap, unsigned long gaddr, int prot, unsigned long bits);
void gmap_sync_dirty_log_pmd(struct gmap *gmap, unsigned long dirty_bitmap[4],
unsigned long gaddr, unsigned long vmaddr);
int s390_disable_cow_sharing(void);
-void s390_unlist_old_asce(struct gmap *gmap);
int s390_replace_asce(struct gmap *gmap);
void s390_uv_destroy_pfns(unsigned long count, unsigned long *pfns);
int __s390_uv_destroy_range(struct mm_struct *mm, unsigned long start,
unsigned long end, bool interruptible);
+int kvm_s390_wiggle_split_folio(struct mm_struct *mm, struct folio *folio, bool split);
+unsigned long *gmap_table_walk(struct gmap *gmap, unsigned long gaddr, int level);
/**
* s390_uv_destroy_range - Destroy a range of pages in the given mm.
diff --git a/arch/s390/include/asm/hugetlb.h b/arch/s390/include/asm/hugetlb.h
index a40664b236e9..7c52acaf9f82 100644
--- a/arch/s390/include/asm/hugetlb.h
+++ b/arch/s390/include/asm/hugetlb.h
@@ -20,12 +20,13 @@
void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
pte_t *ptep, pte_t pte, unsigned long sz);
void __set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
- pte_t *ptep, pte_t pte);
+ pte_t *ptep, pte_t pte);
+
#define __HAVE_ARCH_HUGE_PTEP_GET
-extern pte_t huge_ptep_get(struct mm_struct *mm, unsigned long addr, pte_t *ptep);
+pte_t huge_ptep_get(struct mm_struct *mm, unsigned long addr, pte_t *ptep);
+
#define __HAVE_ARCH_HUGE_PTEP_GET_AND_CLEAR
-extern pte_t huge_ptep_get_and_clear(struct mm_struct *mm,
- unsigned long addr, pte_t *ptep);
+pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep);
static inline void arch_clear_hugetlb_flags(struct folio *folio)
{
@@ -56,6 +57,7 @@ static inline int huge_ptep_set_access_flags(struct vm_area_struct *vma,
pte_t pte, int dirty)
{
int changed = !pte_same(huge_ptep_get(vma->vm_mm, addr, ptep), pte);
+
if (changed) {
huge_ptep_get_and_clear(vma->vm_mm, addr, ptep);
__set_huge_pte_at(vma->vm_mm, addr, ptep, pte);
@@ -68,19 +70,8 @@ static inline void huge_ptep_set_wrprotect(struct mm_struct *mm,
unsigned long addr, pte_t *ptep)
{
pte_t pte = huge_ptep_get_and_clear(mm, addr, ptep);
- __set_huge_pte_at(mm, addr, ptep, pte_wrprotect(pte));
-}
-#define __HAVE_ARCH_HUGE_PTE_NONE
-static inline int huge_pte_none(pte_t pte)
-{
- return pte_none(pte);
-}
-
-#define __HAVE_ARCH_HUGE_PTE_NONE_MOSTLY
-static inline int huge_pte_none_mostly(pte_t pte)
-{
- return huge_pte_none(pte) || is_pte_marker(pte);
+ __set_huge_pte_at(mm, addr, ptep, pte_wrprotect(pte));
}
#define __HAVE_ARCH_HUGE_PTE_MKUFFD_WP
diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index 97c7c8127543..9a367866cab0 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -30,6 +30,8 @@
#define KVM_S390_ESCA_CPU_SLOTS 248
#define KVM_MAX_VCPUS 255
+#define KVM_INTERNAL_MEM_SLOTS 1
+
/*
* These seem to be used for allocating ->chip in the routing table, which we
* don't use. 1 is as small as we can get to reduce the needed memory. If we
@@ -931,12 +933,14 @@ struct sie_page2 {
u8 reserved928[0x1000 - 0x928]; /* 0x0928 */
};
+struct vsie_page;
+
struct kvm_s390_vsie {
struct mutex mutex;
struct radix_tree_root addr_to_page;
int page_count;
int next;
- struct page *pages[KVM_MAX_VCPUS];
+ struct vsie_page *pages[KVM_MAX_VCPUS];
};
struct kvm_s390_gisa_iam {
diff --git a/arch/s390/include/asm/page-states.h b/arch/s390/include/asm/page-states.h
index 08fcbd628120..794fdb21500a 100644
--- a/arch/s390/include/asm/page-states.h
+++ b/arch/s390/include/asm/page-states.h
@@ -7,7 +7,6 @@
#ifndef PAGE_STATES_H
#define PAGE_STATES_H
-#include <asm/sections.h>
#include <asm/page.h>
#define ESSA_GET_STATE 0
@@ -21,7 +20,7 @@
#define ESSA_MAX ESSA_SET_STABLE_NODAT
-extern int __bootdata_preserved(cmma_flag);
+extern int cmma_flag;
static __always_inline unsigned long essa(unsigned long paddr, unsigned char cmd)
{
diff --git a/arch/s390/include/asm/page.h b/arch/s390/include/asm/page.h
index 4f43cdd9835b..1ff145f7b52b 100644
--- a/arch/s390/include/asm/page.h
+++ b/arch/s390/include/asm/page.h
@@ -184,7 +184,11 @@ extern struct vm_layout vm_layout;
#define __kaslr_offset vm_layout.kaslr_offset
#define __kaslr_offset_phys vm_layout.kaslr_offset_phys
+#ifdef CONFIG_RANDOMIZE_IDENTITY_BASE
#define __identity_base vm_layout.identity_base
+#else
+#define __identity_base 0UL
+#endif
#define ident_map_size vm_layout.identity_size
static inline unsigned long kaslr_offset(void)
diff --git a/arch/s390/include/asm/pgalloc.h b/arch/s390/include/asm/pgalloc.h
index 7b84ef6dc4b6..b19b6ed2ab53 100644
--- a/arch/s390/include/asm/pgalloc.h
+++ b/arch/s390/include/asm/pgalloc.h
@@ -53,29 +53,42 @@ static inline p4d_t *p4d_alloc_one(struct mm_struct *mm, unsigned long address)
{
unsigned long *table = crst_table_alloc(mm);
- if (table)
- crst_table_init(table, _REGION2_ENTRY_EMPTY);
+ if (!table)
+ return NULL;
+ crst_table_init(table, _REGION2_ENTRY_EMPTY);
+ pagetable_p4d_ctor(virt_to_ptdesc(table));
+
return (p4d_t *) table;
}
static inline void p4d_free(struct mm_struct *mm, p4d_t *p4d)
{
- if (!mm_p4d_folded(mm))
- crst_table_free(mm, (unsigned long *) p4d);
+ if (mm_p4d_folded(mm))
+ return;
+
+ pagetable_dtor(virt_to_ptdesc(p4d));
+ crst_table_free(mm, (unsigned long *) p4d);
}
static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long address)
{
unsigned long *table = crst_table_alloc(mm);
- if (table)
- crst_table_init(table, _REGION3_ENTRY_EMPTY);
+
+ if (!table)
+ return NULL;
+ crst_table_init(table, _REGION3_ENTRY_EMPTY);
+ pagetable_pud_ctor(virt_to_ptdesc(table));
+
return (pud_t *) table;
}
static inline void pud_free(struct mm_struct *mm, pud_t *pud)
{
- if (!mm_pud_folded(mm))
- crst_table_free(mm, (unsigned long *) pud);
+ if (mm_pud_folded(mm))
+ return;
+
+ pagetable_dtor(virt_to_ptdesc(pud));
+ crst_table_free(mm, (unsigned long *) pud);
}
static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long vmaddr)
@@ -96,7 +109,7 @@ static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
{
if (mm_pmd_folded(mm))
return;
- pagetable_pmd_dtor(virt_to_ptdesc(pmd));
+ pagetable_dtor(virt_to_ptdesc(pmd));
crst_table_free(mm, (unsigned long *) pmd);
}
@@ -117,11 +130,18 @@ static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd)
static inline pgd_t *pgd_alloc(struct mm_struct *mm)
{
- return (pgd_t *) crst_table_alloc(mm);
+ unsigned long *table = crst_table_alloc(mm);
+
+ if (!table)
+ return NULL;
+ pagetable_pgd_ctor(virt_to_ptdesc(table));
+
+ return (pgd_t *) table;
}
static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
{
+ pagetable_dtor(virt_to_ptdesc(pgd));
crst_table_free(mm, (unsigned long *) pgd);
}
diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h
index 48268095b0a3..3ca5af4cfe43 100644
--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@ -17,7 +17,6 @@
#include <linux/page-flags.h>
#include <linux/radix-tree.h>
#include <linux/atomic.h>
-#include <asm/sections.h>
#include <asm/ctlreg.h>
#include <asm/bug.h>
#include <asm/page.h>
@@ -35,7 +34,7 @@ enum {
PG_DIRECT_MAP_MAX
};
-extern atomic_long_t __bootdata_preserved(direct_pages_count[PG_DIRECT_MAP_MAX]);
+extern atomic_long_t direct_pages_count[PG_DIRECT_MAP_MAX];
static inline void update_page_count(int level, long count)
{
@@ -85,14 +84,14 @@ extern unsigned long zero_page_mask;
* happen without trampolines and in addition the placement within a
* 2GB frame is branch prediction unit friendly.
*/
-extern unsigned long __bootdata_preserved(VMALLOC_START);
-extern unsigned long __bootdata_preserved(VMALLOC_END);
+extern unsigned long VMALLOC_START;
+extern unsigned long VMALLOC_END;
#define VMALLOC_DEFAULT_SIZE ((512UL << 30) - MODULES_LEN)
-extern struct page *__bootdata_preserved(vmemmap);
-extern unsigned long __bootdata_preserved(vmemmap_size);
+extern struct page *vmemmap;
+extern unsigned long vmemmap_size;
-extern unsigned long __bootdata_preserved(MODULES_VADDR);
-extern unsigned long __bootdata_preserved(MODULES_END);
+extern unsigned long MODULES_VADDR;
+extern unsigned long MODULES_END;
#define MODULES_VADDR MODULES_VADDR
#define MODULES_END MODULES_END
#define MODULES_LEN (1UL << 31)
@@ -125,6 +124,8 @@ static inline int is_module_addr(void *addr)
#define KASLR_LEN 0UL
#endif
+void setup_protection_map(void);
+
/*
* A 64 bit pagetable entry of S390 has following format:
* | PFRA |0IPC| OS |
@@ -419,9 +420,10 @@ static inline int is_module_addr(void *addr)
#define PGSTE_HC_BIT 0x0020000000000000UL
#define PGSTE_GR_BIT 0x0004000000000000UL
#define PGSTE_GC_BIT 0x0002000000000000UL
-#define PGSTE_UC_BIT 0x0000800000000000UL /* user dirty (migration) */
-#define PGSTE_IN_BIT 0x0000400000000000UL /* IPTE notify bit */
-#define PGSTE_VSIE_BIT 0x0000200000000000UL /* ref'd in a shadow table */
+#define PGSTE_ST2_MASK 0x0000ffff00000000UL
+#define PGSTE_UC_BIT 0x0000000000008000UL /* user dirty (migration) */
+#define PGSTE_IN_BIT 0x0000000000004000UL /* IPTE notify bit */
+#define PGSTE_VSIE_BIT 0x0000000000002000UL /* ref'd in a shadow table */
/* Guest Page State used for virtualization */
#define _PGSTE_GPS_ZERO 0x0000000080000000UL
@@ -443,98 +445,107 @@ static inline int is_module_addr(void *addr)
/*
* Page protection definitions.
*/
-#define PAGE_NONE __pgprot(_PAGE_PRESENT | _PAGE_INVALID | _PAGE_PROTECT)
-#define PAGE_RO __pgprot(_PAGE_PRESENT | _PAGE_READ | \
+#define __PAGE_NONE (_PAGE_PRESENT | _PAGE_INVALID | _PAGE_PROTECT)
+#define __PAGE_RO (_PAGE_PRESENT | _PAGE_READ | \
_PAGE_NOEXEC | _PAGE_INVALID | _PAGE_PROTECT)
-#define PAGE_RX __pgprot(_PAGE_PRESENT | _PAGE_READ | \
+#define __PAGE_RX (_PAGE_PRESENT | _PAGE_READ | \
_PAGE_INVALID | _PAGE_PROTECT)
-#define PAGE_RW __pgprot(_PAGE_PRESENT | _PAGE_READ | _PAGE_WRITE | \
+#define __PAGE_RW (_PAGE_PRESENT | _PAGE_READ | _PAGE_WRITE | \
_PAGE_NOEXEC | _PAGE_INVALID | _PAGE_PROTECT)
-#define PAGE_RWX __pgprot(_PAGE_PRESENT | _PAGE_READ | _PAGE_WRITE | \
+#define __PAGE_RWX (_PAGE_PRESENT | _PAGE_READ | _PAGE_WRITE | \
_PAGE_INVALID | _PAGE_PROTECT)
-
-#define PAGE_SHARED __pgprot(_PAGE_PRESENT | _PAGE_READ | _PAGE_WRITE | \
+#define __PAGE_SHARED (_PAGE_PRESENT | _PAGE_READ | _PAGE_WRITE | \
_PAGE_YOUNG | _PAGE_DIRTY | _PAGE_NOEXEC)
-#define PAGE_KERNEL __pgprot(_PAGE_PRESENT | _PAGE_READ | _PAGE_WRITE | \
+#define __PAGE_KERNEL (_PAGE_PRESENT | _PAGE_READ | _PAGE_WRITE | \
_PAGE_YOUNG | _PAGE_DIRTY | _PAGE_NOEXEC)
-#define PAGE_KERNEL_RO __pgprot(_PAGE_PRESENT | _PAGE_READ | _PAGE_YOUNG | \
+#define __PAGE_KERNEL_RO (_PAGE_PRESENT | _PAGE_READ | _PAGE_YOUNG | \
_PAGE_PROTECT | _PAGE_NOEXEC)
-#define PAGE_KERNEL_EXEC __pgprot(_PAGE_PRESENT | _PAGE_READ | _PAGE_WRITE | \
- _PAGE_YOUNG | _PAGE_DIRTY)
-/*
- * On s390 the page table entry has an invalid bit and a read-only bit.
- * Read permission implies execute permission and write permission
- * implies read permission.
- */
- /*xwr*/
+extern unsigned long page_noexec_mask;
+
+#define __pgprot_page_mask(x) __pgprot((x) & page_noexec_mask)
+
+#define PAGE_NONE __pgprot_page_mask(__PAGE_NONE)
+#define PAGE_RO __pgprot_page_mask(__PAGE_RO)
+#define PAGE_RX __pgprot_page_mask(__PAGE_RX)
+#define PAGE_RW __pgprot_page_mask(__PAGE_RW)
+#define PAGE_RWX __pgprot_page_mask(__PAGE_RWX)
+#define PAGE_SHARED __pgprot_page_mask(__PAGE_SHARED)
+#define PAGE_KERNEL __pgprot_page_mask(__PAGE_KERNEL)
+#define PAGE_KERNEL_RO __pgprot_page_mask(__PAGE_KERNEL_RO)
/*
* Segment entry (large page) protection definitions.
*/
-#define SEGMENT_NONE __pgprot(_SEGMENT_ENTRY_PRESENT | \
+#define __SEGMENT_NONE (_SEGMENT_ENTRY_PRESENT | \
_SEGMENT_ENTRY_INVALID | \
_SEGMENT_ENTRY_PROTECT)
-#define SEGMENT_RO __pgprot(_SEGMENT_ENTRY_PRESENT | \
+#define __SEGMENT_RO (_SEGMENT_ENTRY_PRESENT | \
_SEGMENT_ENTRY_PROTECT | \
_SEGMENT_ENTRY_READ | \
_SEGMENT_ENTRY_NOEXEC)
-#define SEGMENT_RX __pgprot(_SEGMENT_ENTRY_PRESENT | \
+#define __SEGMENT_RX (_SEGMENT_ENTRY_PRESENT | \
_SEGMENT_ENTRY_PROTECT | \
_SEGMENT_ENTRY_READ)
-#define SEGMENT_RW __pgprot(_SEGMENT_ENTRY_PRESENT | \
+#define __SEGMENT_RW (_SEGMENT_ENTRY_PRESENT | \
_SEGMENT_ENTRY_READ | \
_SEGMENT_ENTRY_WRITE | \
_SEGMENT_ENTRY_NOEXEC)
-#define SEGMENT_RWX __pgprot(_SEGMENT_ENTRY_PRESENT | \
+#define __SEGMENT_RWX (_SEGMENT_ENTRY_PRESENT | \
_SEGMENT_ENTRY_READ | \
_SEGMENT_ENTRY_WRITE)
-#define SEGMENT_KERNEL __pgprot(_SEGMENT_ENTRY | \
+#define __SEGMENT_KERNEL (_SEGMENT_ENTRY | \
_SEGMENT_ENTRY_LARGE | \
_SEGMENT_ENTRY_READ | \
_SEGMENT_ENTRY_WRITE | \
_SEGMENT_ENTRY_YOUNG | \
_SEGMENT_ENTRY_DIRTY | \
_SEGMENT_ENTRY_NOEXEC)
-#define SEGMENT_KERNEL_RO __pgprot(_SEGMENT_ENTRY | \
+#define __SEGMENT_KERNEL_RO (_SEGMENT_ENTRY | \
_SEGMENT_ENTRY_LARGE | \
_SEGMENT_ENTRY_READ | \
_SEGMENT_ENTRY_YOUNG | \
_SEGMENT_ENTRY_PROTECT | \
_SEGMENT_ENTRY_NOEXEC)
-#define SEGMENT_KERNEL_EXEC __pgprot(_SEGMENT_ENTRY | \
- _SEGMENT_ENTRY_LARGE | \
- _SEGMENT_ENTRY_READ | \
- _SEGMENT_ENTRY_WRITE | \
- _SEGMENT_ENTRY_YOUNG | \
- _SEGMENT_ENTRY_DIRTY)
+
+extern unsigned long segment_noexec_mask;
+
+#define __pgprot_segment_mask(x) __pgprot((x) & segment_noexec_mask)
+
+#define SEGMENT_NONE __pgprot_segment_mask(__SEGMENT_NONE)
+#define SEGMENT_RO __pgprot_segment_mask(__SEGMENT_RO)
+#define SEGMENT_RX __pgprot_segment_mask(__SEGMENT_RX)
+#define SEGMENT_RW __pgprot_segment_mask(__SEGMENT_RW)
+#define SEGMENT_RWX __pgprot_segment_mask(__SEGMENT_RWX)
+#define SEGMENT_KERNEL __pgprot_segment_mask(__SEGMENT_KERNEL)
+#define SEGMENT_KERNEL_RO __pgprot_segment_mask(__SEGMENT_KERNEL_RO)
/*
* Region3 entry (large page) protection definitions.
*/
-#define REGION3_KERNEL __pgprot(_REGION_ENTRY_TYPE_R3 | \
+#define __REGION3_KERNEL (_REGION_ENTRY_TYPE_R3 | \
_REGION3_ENTRY_PRESENT | \
- _REGION3_ENTRY_LARGE | \
- _REGION3_ENTRY_READ | \
- _REGION3_ENTRY_WRITE | \
- _REGION3_ENTRY_YOUNG | \
+ _REGION3_ENTRY_LARGE | \
+ _REGION3_ENTRY_READ | \
+ _REGION3_ENTRY_WRITE | \
+ _REGION3_ENTRY_YOUNG | \
_REGION3_ENTRY_DIRTY | \
_REGION_ENTRY_NOEXEC)
-#define REGION3_KERNEL_RO __pgprot(_REGION_ENTRY_TYPE_R3 | \
- _REGION3_ENTRY_PRESENT | \
- _REGION3_ENTRY_LARGE | \
- _REGION3_ENTRY_READ | \
- _REGION3_ENTRY_YOUNG | \
- _REGION_ENTRY_PROTECT | \
- _REGION_ENTRY_NOEXEC)
-#define REGION3_KERNEL_EXEC __pgprot(_REGION_ENTRY_TYPE_R3 | \
+#define __REGION3_KERNEL_RO (_REGION_ENTRY_TYPE_R3 | \
_REGION3_ENTRY_PRESENT | \
- _REGION3_ENTRY_LARGE | \
- _REGION3_ENTRY_READ | \
- _REGION3_ENTRY_WRITE | \
- _REGION3_ENTRY_YOUNG | \
- _REGION3_ENTRY_DIRTY)
+ _REGION3_ENTRY_LARGE | \
+ _REGION3_ENTRY_READ | \
+ _REGION3_ENTRY_YOUNG | \
+ _REGION_ENTRY_PROTECT | \
+ _REGION_ENTRY_NOEXEC)
+
+extern unsigned long region_noexec_mask;
+
+#define __pgprot_region_mask(x) __pgprot((x) & region_noexec_mask)
+
+#define REGION3_KERNEL __pgprot_region_mask(__REGION3_KERNEL)
+#define REGION3_KERNEL_RO __pgprot_region_mask(__REGION3_KERNEL_RO)
static inline bool mm_p4d_folded(struct mm_struct *mm)
{
@@ -1435,8 +1446,6 @@ static inline pte_t mk_pte_phys(unsigned long physpage, pgprot_t pgprot)
pte_t __pte;
__pte = __pte(physpage | pgprot_val(pgprot));
- if (!MACHINE_HAS_NX)
- __pte = clear_pte_bit(__pte, __pgprot(_PAGE_NOEXEC));
return pte_mkyoung(__pte);
}
@@ -1804,8 +1813,6 @@ static inline int pmdp_clear_flush_young(struct vm_area_struct *vma,
static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr,
pmd_t *pmdp, pmd_t entry)
{
- if (!MACHINE_HAS_NX)
- entry = clear_pmd_bit(entry, __pgprot(_SEGMENT_ENTRY_NOEXEC));
set_pmd(pmdp, entry);
}
@@ -2001,4 +2008,18 @@ extern void s390_reset_cmma(struct mm_struct *mm);
#define pmd_pgtable(pmd) \
((pgtable_t)__va(pmd_val(pmd) & -sizeof(pte_t)*PTRS_PER_PTE))
+static inline unsigned long gmap_pgste_get_pgt_addr(unsigned long *pgt)
+{
+ unsigned long *pgstes, res;
+
+ pgstes = pgt + _PAGE_ENTRIES;
+
+ res = (pgstes[0] & PGSTE_ST2_MASK) << 16;
+ res |= pgstes[1] & PGSTE_ST2_MASK;
+ res |= (pgstes[2] & PGSTE_ST2_MASK) >> 16;
+ res |= (pgstes[3] & PGSTE_ST2_MASK) >> 32;
+
+ return res;
+}
+
#endif /* _S390_PAGE_H */
diff --git a/arch/s390/include/asm/physmem_info.h b/arch/s390/include/asm/physmem_info.h
index 51b68a43e195..7ef3bbec98b0 100644
--- a/arch/s390/include/asm/physmem_info.h
+++ b/arch/s390/include/asm/physmem_info.h
@@ -26,7 +26,7 @@ enum reserved_range_type {
RR_AMODE31,
RR_IPLREPORT,
RR_CERT_COMP_LIST,
- RR_MEM_DETECT_EXTENDED,
+ RR_MEM_DETECT_EXT,
RR_VMEM,
RR_MAX
};
@@ -128,7 +128,7 @@ static inline const char *get_rr_type_name(enum reserved_range_type t)
RR_TYPE_NAME(AMODE31);
RR_TYPE_NAME(IPLREPORT);
RR_TYPE_NAME(CERT_COMP_LIST);
- RR_TYPE_NAME(MEM_DETECT_EXTENDED);
+ RR_TYPE_NAME(MEM_DETECT_EXT);
RR_TYPE_NAME(VMEM);
default:
return "UNKNOWN";
diff --git a/arch/s390/include/asm/preempt.h b/arch/s390/include/asm/preempt.h
index 2c29bdf12127..6ccd033acfe5 100644
--- a/arch/s390/include/asm/preempt.h
+++ b/arch/s390/include/asm/preempt.h
@@ -8,12 +8,19 @@
#include <asm/cmpxchg.h>
#include <asm/march.h>
-#ifdef MARCH_HAS_Z196_FEATURES
-
/* We use the MSB mostly because its available */
#define PREEMPT_NEED_RESCHED 0x80000000
+
+/*
+ * We use the PREEMPT_NEED_RESCHED bit as an inverted NEED_RESCHED such
+ * that a decrement hitting 0 means we can and should reschedule.
+ */
#define PREEMPT_ENABLED (0 + PREEMPT_NEED_RESCHED)
+/*
+ * We mask the PREEMPT_NEED_RESCHED bit so as not to confuse all current users
+ * that think a non-zero value indicates we cannot preempt.
+ */
static __always_inline int preempt_count(void)
{
return READ_ONCE(get_lowcore()->preempt_count) & ~PREEMPT_NEED_RESCHED;
@@ -29,6 +36,15 @@ static __always_inline void preempt_count_set(int pc)
} while (!arch_try_cmpxchg(&get_lowcore()->preempt_count, &old, new));
}
+/*
+ * We fold the NEED_RESCHED bit into the preempt count such that
+ * preempt_enable() can decrement and test for needing to reschedule with a
+ * short instruction sequence.
+ *
+ * We invert the actual bit, so that when the decrement hits 0 we know we both
+ * need to resched (the bit is cleared) and can resched (no preempt count).
+ */
+
static __always_inline void set_preempt_need_resched(void)
{
__atomic_and(~PREEMPT_NEED_RESCHED, &get_lowcore()->preempt_count);
@@ -64,67 +80,24 @@ static __always_inline void __preempt_count_sub(int val)
__preempt_count_add(-val);
}
+/*
+ * Because we keep PREEMPT_NEED_RESCHED set when we do _not_ need to reschedule
+ * a decrement which hits zero means we have no preempt_count and should
+ * reschedule.
+ */
static __always_inline bool __preempt_count_dec_and_test(void)
{
- return __atomic_add(-1, &get_lowcore()->preempt_count) == 1;
-}
-
-static __always_inline bool should_resched(int preempt_offset)
-{
- return unlikely(READ_ONCE(get_lowcore()->preempt_count) ==
- preempt_offset);
-}
-
-#else /* MARCH_HAS_Z196_FEATURES */
-
-#define PREEMPT_ENABLED (0)
-
-static __always_inline int preempt_count(void)
-{
- return READ_ONCE(get_lowcore()->preempt_count);
-}
-
-static __always_inline void preempt_count_set(int pc)
-{
- get_lowcore()->preempt_count = pc;
-}
-
-static __always_inline void set_preempt_need_resched(void)
-{
-}
-
-static __always_inline void clear_preempt_need_resched(void)
-{
-}
-
-static __always_inline bool test_preempt_need_resched(void)
-{
- return false;
-}
-
-static __always_inline void __preempt_count_add(int val)
-{
- get_lowcore()->preempt_count += val;
-}
-
-static __always_inline void __preempt_count_sub(int val)
-{
- get_lowcore()->preempt_count -= val;
-}
-
-static __always_inline bool __preempt_count_dec_and_test(void)
-{
- return !--get_lowcore()->preempt_count && tif_need_resched();
+ return __atomic_add_const_and_test(-1, &get_lowcore()->preempt_count);
}
+/*
+ * Returns true when we need to resched and can (barring IRQ state).
+ */
static __always_inline bool should_resched(int preempt_offset)
{
- return unlikely(preempt_count() == preempt_offset &&
- tif_need_resched());
+ return unlikely(READ_ONCE(get_lowcore()->preempt_count) == preempt_offset);
}
-#endif /* MARCH_HAS_Z196_FEATURES */
-
#define init_task_preempt_count(p) do { } while (0)
/* Deferred to CPU bringup time */
#define init_idle_preempt_count(p, cpu) do { } while (0)
diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h
index 8761fd01a9f0..4f8d5592c298 100644
--- a/arch/s390/include/asm/processor.h
+++ b/arch/s390/include/asm/processor.h
@@ -163,8 +163,7 @@ static __always_inline void __stackleak_poison(unsigned long erase_low,
" la %[addr],256(%[addr])\n"
" brctg %[tmp],0b\n"
"1: stg %[poison],0(%[addr])\n"
- " larl %[tmp],3f\n"
- " ex %[count],0(%[tmp])\n"
+ " exrl %[count],3f\n"
" j 4f\n"
"2: stg %[poison],0(%[addr])\n"
" j 4f\n"
diff --git a/arch/s390/include/asm/sclp.h b/arch/s390/include/asm/sclp.h
index eb00fa1771da..18f37dff03c9 100644
--- a/arch/s390/include/asm/sclp.h
+++ b/arch/s390/include/asm/sclp.h
@@ -16,6 +16,11 @@
/* 24 + 16 * SCLP_MAX_CORES */
#define EXT_SCCB_READ_CPU (3 * PAGE_SIZE)
+#define SCLP_ERRNOTIFY_AQ_RESET 0
+#define SCLP_ERRNOTIFY_AQ_REPAIR 1
+#define SCLP_ERRNOTIFY_AQ_INFO_LOG 2
+#define SCLP_ERRNOTIFY_AQ_OPTICS_DATA 3
+
#ifndef __ASSEMBLY__
#include <linux/uio.h>
#include <asm/chpid.h>
@@ -87,8 +92,10 @@ struct sclp_info {
unsigned char has_kss : 1;
unsigned char has_diag204_bif : 1;
unsigned char has_gisaf : 1;
+ unsigned char has_diag310 : 1;
unsigned char has_diag318 : 1;
unsigned char has_diag320 : 1;
+ unsigned char has_diag324 : 1;
unsigned char has_sipl : 1;
unsigned char has_sipl_eckd : 1;
unsigned char has_dirq : 1;
@@ -111,6 +118,34 @@ struct sclp_info {
};
extern struct sclp_info sclp;
+struct sccb_header {
+ u16 length;
+ u8 function_code;
+ u8 control_mask[3];
+ u16 response_code;
+} __packed;
+
+struct evbuf_header {
+ u16 length;
+ u8 type;
+ u8 flags;
+ u16 _reserved;
+} __packed;
+
+struct err_notify_evbuf {
+ struct evbuf_header header;
+ u8 action;
+ u8 atype;
+ u32 fh;
+ u32 fid;
+ u8 data[];
+} __packed;
+
+struct err_notify_sccb {
+ struct sccb_header header;
+ struct err_notify_evbuf evbuf;
+} __packed;
+
struct zpci_report_error_header {
u8 version; /* Interface version byte */
u8 action; /* Action qualifier byte
@@ -137,6 +172,7 @@ void sclp_early_printk(const char *s);
void __sclp_early_printk(const char *s, unsigned int len);
void sclp_emergency_printk(const char *s);
+int sclp_init(void);
int sclp_early_get_memsize(unsigned long *mem);
int sclp_early_get_hsa_size(unsigned long *hsa_size);
int _sclp_get_core_info(struct sclp_core_info *info);
diff --git a/arch/s390/include/asm/tlb.h b/arch/s390/include/asm/tlb.h
index e95b2c8081eb..72655fd2d867 100644
--- a/arch/s390/include/asm/tlb.h
+++ b/arch/s390/include/asm/tlb.h
@@ -22,7 +22,6 @@
* Pages used for the page tables is a different story. FIXME: more
*/
-void __tlb_remove_table(void *_table);
static inline void tlb_flush(struct mmu_gather *tlb);
static inline bool __tlb_remove_page_size(struct mmu_gather *tlb,
struct page *page, bool delay_rmap, int page_size);
@@ -87,7 +86,7 @@ static inline void pte_free_tlb(struct mmu_gather *tlb, pgtable_t pte,
tlb->cleared_pmds = 1;
if (mm_alloc_pgste(tlb->mm))
gmap_unlink(tlb->mm, (unsigned long *)pte, address);
- tlb_remove_ptdesc(tlb, pte);
+ tlb_remove_ptdesc(tlb, virt_to_ptdesc(pte));
}
/*
@@ -102,12 +101,11 @@ static inline void pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd,
{
if (mm_pmd_folded(tlb->mm))
return;
- pagetable_pmd_dtor(virt_to_ptdesc(pmd));
__tlb_adjust_range(tlb, address, PAGE_SIZE);
tlb->mm->context.flush_mm = 1;
tlb->freed_tables = 1;
tlb->cleared_puds = 1;
- tlb_remove_ptdesc(tlb, pmd);
+ tlb_remove_ptdesc(tlb, virt_to_ptdesc(pmd));
}
/*
@@ -125,7 +123,7 @@ static inline void p4d_free_tlb(struct mmu_gather *tlb, p4d_t *p4d,
__tlb_adjust_range(tlb, address, PAGE_SIZE);
tlb->mm->context.flush_mm = 1;
tlb->freed_tables = 1;
- tlb_remove_ptdesc(tlb, p4d);
+ tlb_remove_ptdesc(tlb, virt_to_ptdesc(p4d));
}
/*
@@ -140,11 +138,11 @@ static inline void pud_free_tlb(struct mmu_gather *tlb, pud_t *pud,
{
if (mm_pud_folded(tlb->mm))
return;
+ __tlb_adjust_range(tlb, address, PAGE_SIZE);
tlb->mm->context.flush_mm = 1;
tlb->freed_tables = 1;
tlb->cleared_p4ds = 1;
- tlb_remove_ptdesc(tlb, pud);
+ tlb_remove_ptdesc(tlb, virt_to_ptdesc(pud));
}
-
#endif /* _S390_TLB_H */
diff --git a/arch/s390/include/asm/uaccess.h b/arch/s390/include/asm/uaccess.h
index a81f897a81ce..f5920163ee97 100644
--- a/arch/s390/include/asm/uaccess.h
+++ b/arch/s390/include/asm/uaccess.h
@@ -22,16 +22,117 @@
void debug_user_asce(int exit);
-unsigned long __must_check
-raw_copy_from_user(void *to, const void __user *from, unsigned long n);
+union oac {
+ unsigned int val;
+ struct {
+ struct {
+ unsigned short key : 4;
+ unsigned short : 4;
+ unsigned short as : 2;
+ unsigned short : 4;
+ unsigned short k : 1;
+ unsigned short a : 1;
+ } oac1;
+ struct {
+ unsigned short key : 4;
+ unsigned short : 4;
+ unsigned short as : 2;
+ unsigned short : 4;
+ unsigned short k : 1;
+ unsigned short a : 1;
+ } oac2;
+ };
+};
-unsigned long __must_check
-raw_copy_to_user(void __user *to, const void *from, unsigned long n);
+static __always_inline __must_check unsigned long
+raw_copy_from_user_key(void *to, const void __user *from, unsigned long size, unsigned long key)
+{
+ unsigned long rem;
+ union oac spec = {
+ .oac2.key = key,
+ .oac2.as = PSW_BITS_AS_SECONDARY,
+ .oac2.k = 1,
+ .oac2.a = 1,
+ };
-#ifndef CONFIG_KASAN
-#define INLINE_COPY_FROM_USER
-#define INLINE_COPY_TO_USER
-#endif
+ asm_inline volatile(
+ " lr %%r0,%[spec]\n"
+ "0: mvcos 0(%[to]),0(%[from]),%[size]\n"
+ "1: jz 5f\n"
+ " algr %[size],%[val]\n"
+ " slgr %[from],%[val]\n"
+ " slgr %[to],%[val]\n"
+ " j 0b\n"
+ "2: la %[rem],4095(%[from])\n" /* rem = from + 4095 */
+ " nr %[rem],%[val]\n" /* rem = (from + 4095) & -4096 */
+ " slgr %[rem],%[from]\n"
+ " clgr %[size],%[rem]\n" /* copy crosses next page boundary? */
+ " jnh 6f\n"
+ "3: mvcos 0(%[to]),0(%[from]),%[rem]\n"
+ "4: slgr %[size],%[rem]\n"
+ " j 6f\n"
+ "5: lghi %[size],0\n"
+ "6:\n"
+ EX_TABLE(0b, 2b)
+ EX_TABLE(1b, 2b)
+ EX_TABLE(3b, 6b)
+ EX_TABLE(4b, 6b)
+ : [size] "+&a" (size), [from] "+&a" (from), [to] "+&a" (to), [rem] "=&a" (rem)
+ : [val] "a" (-4096UL), [spec] "d" (spec.val)
+ : "cc", "memory", "0");
+ return size;
+}
+
+static __always_inline __must_check unsigned long
+raw_copy_from_user(void *to, const void __user *from, unsigned long n)
+{
+ return raw_copy_from_user_key(to, from, n, 0);
+}
+
+static __always_inline __must_check unsigned long
+raw_copy_to_user_key(void __user *to, const void *from, unsigned long size, unsigned long key)
+{
+ unsigned long rem;
+ union oac spec = {
+ .oac1.key = key,
+ .oac1.as = PSW_BITS_AS_SECONDARY,
+ .oac1.k = 1,
+ .oac1.a = 1,
+ };
+
+ asm_inline volatile(
+ " lr %%r0,%[spec]\n"
+ "0: mvcos 0(%[to]),0(%[from]),%[size]\n"
+ "1: jz 5f\n"
+ " algr %[size],%[val]\n"
+ " slgr %[to],%[val]\n"
+ " slgr %[from],%[val]\n"
+ " j 0b\n"
+ "2: la %[rem],4095(%[to])\n" /* rem = to + 4095 */
+ " nr %[rem],%[val]\n" /* rem = (to + 4095) & -4096 */
+ " slgr %[rem],%[to]\n"
+ " clgr %[size],%[rem]\n" /* copy crosses next page boundary? */
+ " jnh 6f\n"
+ "3: mvcos 0(%[to]),0(%[from]),%[rem]\n"
+ "4: slgr %[size],%[rem]\n"
+ " j 6f\n"
+ "5: lghi %[size],0\n"
+ "6:\n"
+ EX_TABLE(0b, 2b)
+ EX_TABLE(1b, 2b)
+ EX_TABLE(3b, 6b)
+ EX_TABLE(4b, 6b)
+ : [size] "+&a" (size), [to] "+&a" (to), [from] "+&a" (from), [rem] "=&a" (rem)
+ : [val] "a" (-4096UL), [spec] "d" (spec.val)
+ : "cc", "memory", "0");
+ return size;
+}
+
+static __always_inline __must_check unsigned long
+raw_copy_to_user(void __user *to, const void *from, unsigned long n)
+{
+ return raw_copy_to_user_key(to, from, n, 0);
+}
unsigned long __must_check
_copy_from_user_key(void *to, const void __user *from, unsigned long n, unsigned long key);
@@ -55,63 +156,71 @@ copy_to_user_key(void __user *to, const void *from, unsigned long n, unsigned lo
return n;
}
-union oac {
- unsigned int val;
- struct {
- struct {
- unsigned short key : 4;
- unsigned short : 4;
- unsigned short as : 2;
- unsigned short : 4;
- unsigned short k : 1;
- unsigned short a : 1;
- } oac1;
- struct {
- unsigned short key : 4;
- unsigned short : 4;
- unsigned short as : 2;
- unsigned short : 4;
- unsigned short k : 1;
- unsigned short a : 1;
- } oac2;
- };
-};
-
int __noreturn __put_user_bad(void);
#ifdef CONFIG_KMSAN
-#define get_put_user_noinstr_attributes \
- noinline __maybe_unused __no_sanitize_memory
+#define uaccess_kmsan_or_inline noinline __maybe_unused __no_sanitize_memory
#else
-#define get_put_user_noinstr_attributes __always_inline
+#define uaccess_kmsan_or_inline __always_inline
#endif
-#define DEFINE_PUT_USER(type) \
-static get_put_user_noinstr_attributes int \
+#ifdef CONFIG_CC_HAS_ASM_GOTO_OUTPUT
+
+#define DEFINE_PUT_USER_NOINSTR(type) \
+static uaccess_kmsan_or_inline int \
+__put_user_##type##_noinstr(unsigned type __user *to, \
+ unsigned type *from, \
+ unsigned long size) \
+{ \
+ asm goto( \
+ " llilh %%r0,%[spec]\n" \
+ "0: mvcos %[to],%[from],%[size]\n" \
+ "1: nopr %%r7\n" \
+ EX_TABLE(0b, %l[Efault]) \
+ EX_TABLE(1b, %l[Efault]) \
+ : [to] "+Q" (*to) \
+ : [size] "d" (size), [from] "Q" (*from), \
+ [spec] "I" (0x81) \
+ : "cc", "0" \
+ : Efault \
+ ); \
+ return 0; \
+Efault: \
+ return -EFAULT; \
+}
+
+#else /* CONFIG_CC_HAS_ASM_GOTO_OUTPUT */
+
+#define DEFINE_PUT_USER_NOINSTR(type) \
+static uaccess_kmsan_or_inline int \
__put_user_##type##_noinstr(unsigned type __user *to, \
unsigned type *from, \
unsigned long size) \
{ \
- union oac __oac_spec = { \
- .oac1.as = PSW_BITS_AS_SECONDARY, \
- .oac1.a = 1, \
- }; \
int rc; \
\
asm volatile( \
- " lr 0,%[spec]\n" \
- "0: mvcos %[_to],%[_from],%[_size]\n" \
- "1: xr %[rc],%[rc]\n" \
+ " llilh %%r0,%[spec]\n" \
+ "0: mvcos %[to],%[from],%[size]\n" \
+ "1: lhi %[rc],0\n" \
"2:\n" \
- EX_TABLE_UA_STORE(0b, 2b, %[rc]) \
- EX_TABLE_UA_STORE(1b, 2b, %[rc]) \
- : [rc] "=&d" (rc), [_to] "+Q" (*(to)) \
- : [_size] "d" (size), [_from] "Q" (*(from)), \
- [spec] "d" (__oac_spec.val) \
+ EX_TABLE_UA_FAULT(0b, 2b, %[rc]) \
+ EX_TABLE_UA_FAULT(1b, 2b, %[rc]) \
+ : [rc] "=d" (rc), [to] "+Q" (*to) \
+ : [size] "d" (size), [from] "Q" (*from), \
+ [spec] "I" (0x81) \
: "cc", "0"); \
return rc; \
-} \
- \
+}
+
+#endif /* CONFIG_CC_HAS_ASM_GOTO_OUTPUT */
+
+DEFINE_PUT_USER_NOINSTR(char);
+DEFINE_PUT_USER_NOINSTR(short);
+DEFINE_PUT_USER_NOINSTR(int);
+DEFINE_PUT_USER_NOINSTR(long);
+
+#define DEFINE_PUT_USER(type) \
static __always_inline int \
__put_user_##type(unsigned type __user *to, unsigned type *from, \
unsigned long size) \
@@ -128,69 +237,111 @@ DEFINE_PUT_USER(short);
DEFINE_PUT_USER(int);
DEFINE_PUT_USER(long);
-static __always_inline int __put_user_fn(void *x, void __user *ptr, unsigned long size)
-{
- int rc;
+#define __put_user(x, ptr) \
+({ \
+ __typeof__(*(ptr)) __x = (x); \
+ int __prc; \
+ \
+ __chk_user_ptr(ptr); \
+ switch (sizeof(*(ptr))) { \
+ case 1: \
+ __prc = __put_user_char((unsigned char __user *)(ptr), \
+ (unsigned char *)&__x, \
+ sizeof(*(ptr))); \
+ break; \
+ case 2: \
+ __prc = __put_user_short((unsigned short __user *)(ptr),\
+ (unsigned short *)&__x, \
+ sizeof(*(ptr))); \
+ break; \
+ case 4: \
+ __prc = __put_user_int((unsigned int __user *)(ptr), \
+ (unsigned int *)&__x, \
+ sizeof(*(ptr))); \
+ break; \
+ case 8: \
+ __prc = __put_user_long((unsigned long __user *)(ptr), \
+ (unsigned long *)&__x, \
+ sizeof(*(ptr))); \
+ break; \
+ default: \
+ __prc = __put_user_bad(); \
+ break; \
+ } \
+ __builtin_expect(__prc, 0); \
+})
- switch (size) {
- case 1:
- rc = __put_user_char((unsigned char __user *)ptr,
- (unsigned char *)x,
- size);
- break;
- case 2:
- rc = __put_user_short((unsigned short __user *)ptr,
- (unsigned short *)x,
- size);
- break;
- case 4:
- rc = __put_user_int((unsigned int __user *)ptr,
- (unsigned int *)x,
- size);
- break;
- case 8:
- rc = __put_user_long((unsigned long __user *)ptr,
- (unsigned long *)x,
- size);
- break;
- default:
- __put_user_bad();
- break;
- }
- return rc;
-}
+#define put_user(x, ptr) \
+({ \
+ might_fault(); \
+ __put_user(x, ptr); \
+})
int __noreturn __get_user_bad(void);
-#define DEFINE_GET_USER(type) \
-static get_put_user_noinstr_attributes int \
+#ifdef CONFIG_CC_HAS_ASM_GOTO_OUTPUT
+
+#define DEFINE_GET_USER_NOINSTR(type) \
+static uaccess_kmsan_or_inline int \
__get_user_##type##_noinstr(unsigned type *to, \
- unsigned type __user *from, \
+ const unsigned type __user *from, \
+ unsigned long size) \
+{ \
+ asm goto( \
+ " lhi %%r0,%[spec]\n" \
+ "0: mvcos %[to],%[from],%[size]\n" \
+ "1: nopr %%r7\n" \
+ EX_TABLE(0b, %l[Efault]) \
+ EX_TABLE(1b, %l[Efault]) \
+ : [to] "=Q" (*to) \
+ : [size] "d" (size), [from] "Q" (*from), \
+ [spec] "I" (0x81) \
+ : "cc", "0" \
+ : Efault \
+ ); \
+ return 0; \
+Efault: \
+ *to = 0; \
+ return -EFAULT; \
+}
+
+#else /* CONFIG_CC_HAS_ASM_GOTO_OUTPUT */
+
+#define DEFINE_GET_USER_NOINSTR(type) \
+static uaccess_kmsan_or_inline int \
+__get_user_##type##_noinstr(unsigned type *to, \
+ const unsigned type __user *from, \
unsigned long size) \
{ \
- union oac __oac_spec = { \
- .oac2.as = PSW_BITS_AS_SECONDARY, \
- .oac2.a = 1, \
- }; \
int rc; \
\
asm volatile( \
- " lr 0,%[spec]\n" \
- "0: mvcos 0(%[_to]),%[_from],%[_size]\n" \
- "1: xr %[rc],%[rc]\n" \
+ " lhi %%r0,%[spec]\n" \
+ "0: mvcos %[to],%[from],%[size]\n" \
+ "1: lhi %[rc],0\n" \
"2:\n" \
- EX_TABLE_UA_LOAD_MEM(0b, 2b, %[rc], %[_to], %[_ksize]) \
- EX_TABLE_UA_LOAD_MEM(1b, 2b, %[rc], %[_to], %[_ksize]) \
- : [rc] "=&d" (rc), "=Q" (*(to)) \
- : [_size] "d" (size), [_from] "Q" (*(from)), \
- [spec] "d" (__oac_spec.val), [_to] "a" (to), \
- [_ksize] "K" (size) \
+ EX_TABLE_UA_FAULT(0b, 2b, %[rc]) \
+ EX_TABLE_UA_FAULT(1b, 2b, %[rc]) \
+ : [rc] "=d" (rc), [to] "=Q" (*to) \
+ : [size] "d" (size), [from] "Q" (*from), \
+ [spec] "I" (0x81) \
: "cc", "0"); \
+ if (likely(!rc)) \
+ return 0; \
+ *to = 0; \
return rc; \
-} \
- \
+}
+
+#endif /* CONFIG_CC_HAS_ASM_GOTO_OUTPUT */
+
+DEFINE_GET_USER_NOINSTR(char);
+DEFINE_GET_USER_NOINSTR(short);
+DEFINE_GET_USER_NOINSTR(int);
+DEFINE_GET_USER_NOINSTR(long);
+
+#define DEFINE_GET_USER(type) \
static __always_inline int \
-__get_user_##type(unsigned type *to, unsigned type __user *from, \
+__get_user_##type(unsigned type *to, const unsigned type __user *from, \
unsigned long size) \
{ \
int rc; \
@@ -205,107 +356,50 @@ DEFINE_GET_USER(short);
DEFINE_GET_USER(int);
DEFINE_GET_USER(long);
-static __always_inline int __get_user_fn(void *x, const void __user *ptr, unsigned long size)
-{
- int rc;
-
- switch (size) {
- case 1:
- rc = __get_user_char((unsigned char *)x,
- (unsigned char __user *)ptr,
- size);
- break;
- case 2:
- rc = __get_user_short((unsigned short *)x,
- (unsigned short __user *)ptr,
- size);
- break;
- case 4:
- rc = __get_user_int((unsigned int *)x,
- (unsigned int __user *)ptr,
- size);
- break;
- case 8:
- rc = __get_user_long((unsigned long *)x,
- (unsigned long __user *)ptr,
- size);
- break;
- default:
- __get_user_bad();
- break;
- }
- return rc;
-}
-
-/*
- * These are the main single-value transfer routines. They automatically
- * use the right size if we just have the right pointer type.
- */
-#define __put_user(x, ptr) \
-({ \
- __typeof__(*(ptr)) __x = (x); \
- int __pu_err = -EFAULT; \
- \
- __chk_user_ptr(ptr); \
- switch (sizeof(*(ptr))) { \
- case 1: \
- case 2: \
- case 4: \
- case 8: \
- __pu_err = __put_user_fn(&__x, ptr, sizeof(*(ptr))); \
- break; \
- default: \
- __put_user_bad(); \
- break; \
- } \
- __builtin_expect(__pu_err, 0); \
-})
-
-#define put_user(x, ptr) \
-({ \
- might_fault(); \
- __put_user(x, ptr); \
-})
-
#define __get_user(x, ptr) \
({ \
- int __gu_err = -EFAULT; \
+ const __user void *____guptr = (ptr); \
+ int __grc; \
\
__chk_user_ptr(ptr); \
switch (sizeof(*(ptr))) { \
case 1: { \
+ const unsigned char __user *__guptr = ____guptr; \
unsigned char __x; \
\
- __gu_err = __get_user_fn(&__x, ptr, sizeof(*(ptr))); \
+ __grc = __get_user_char(&__x, __guptr, sizeof(*(ptr))); \
(x) = *(__force __typeof__(*(ptr)) *)&__x; \
break; \
}; \
case 2: { \
+ const unsigned short __user *__guptr = ____guptr; \
unsigned short __x; \
\
- __gu_err = __get_user_fn(&__x, ptr, sizeof(*(ptr))); \
+ __grc = __get_user_short(&__x, __guptr, sizeof(*(ptr)));\
(x) = *(__force __typeof__(*(ptr)) *)&__x; \
break; \
}; \
case 4: { \
+ const unsigned int __user *__guptr = ____guptr; \
unsigned int __x; \
\
- __gu_err = __get_user_fn(&__x, ptr, sizeof(*(ptr))); \
+ __grc = __get_user_int(&__x, __guptr, sizeof(*(ptr))); \
(x) = *(__force __typeof__(*(ptr)) *)&__x; \
break; \
}; \
case 8: { \
+ const unsigned long __user *__guptr = ____guptr; \
unsigned long __x; \
\
- __gu_err = __get_user_fn(&__x, ptr, sizeof(*(ptr))); \
+ __grc = __get_user_long(&__x, __guptr, sizeof(*(ptr))); \
(x) = *(__force __typeof__(*(ptr)) *)&__x; \
break; \
}; \
default: \
- __get_user_bad(); \
+ __grc = __get_user_bad(); \
break; \
} \
- __builtin_expect(__gu_err, 0); \
+ __builtin_expect(__grc, 0); \
})
#define get_user(x, ptr) \
@@ -341,109 +435,71 @@ static inline void *s390_kernel_write(void *dst, const void *src, size_t size)
return __s390_kernel_write(dst, src, size);
}
-int __noreturn __put_kernel_bad(void);
+void __noreturn __mvc_kernel_nofault_bad(void);
-#define __put_kernel_asm(val, to, insn) \
-({ \
- int __rc; \
- \
- asm volatile( \
- "0: " insn " %[_val],%[_to]\n" \
- "1: xr %[rc],%[rc]\n" \
- "2:\n" \
- EX_TABLE_UA_STORE(0b, 2b, %[rc]) \
- EX_TABLE_UA_STORE(1b, 2b, %[rc]) \
- : [rc] "=d" (__rc), [_to] "+Q" (*(to)) \
- : [_val] "d" (val) \
- : "cc"); \
- __rc; \
-})
+#if defined(CONFIG_CC_HAS_ASM_GOTO_OUTPUT) && defined(CONFIG_CC_HAS_ASM_AOR_FORMAT_FLAGS)
-#define __put_kernel_nofault(dst, src, type, err_label) \
+#define __mvc_kernel_nofault(dst, src, type, err_label) \
do { \
- unsigned long __x = (unsigned long)(*((type *)(src))); \
- int __pk_err; \
- \
switch (sizeof(type)) { \
case 1: \
- __pk_err = __put_kernel_asm(__x, (type *)(dst), "stc"); \
- break; \
case 2: \
- __pk_err = __put_kernel_asm(__x, (type *)(dst), "sth"); \
- break; \
case 4: \
- __pk_err = __put_kernel_asm(__x, (type *)(dst), "st"); \
- break; \
case 8: \
- __pk_err = __put_kernel_asm(__x, (type *)(dst), "stg"); \
+ asm goto( \
+ "0: mvc %O[_dst](%[_len],%R[_dst]),%[_src]\n" \
+ "1: nopr %%r7\n" \
+ EX_TABLE(0b, %l[err_label]) \
+ EX_TABLE(1b, %l[err_label]) \
+ : [_dst] "=Q" (*(type *)dst) \
+ : [_src] "Q" (*(type *)(src)), \
+ [_len] "I" (sizeof(type)) \
+ : \
+ : err_label); \
break; \
default: \
- __pk_err = __put_kernel_bad(); \
+ __mvc_kernel_nofault_bad(); \
break; \
} \
- if (unlikely(__pk_err)) \
- goto err_label; \
} while (0)
-int __noreturn __get_kernel_bad(void);
-
-#define __get_kernel_asm(val, from, insn) \
-({ \
- int __rc; \
- \
- asm volatile( \
- "0: " insn " %[_val],%[_from]\n" \
- "1: xr %[rc],%[rc]\n" \
- "2:\n" \
- EX_TABLE_UA_LOAD_REG(0b, 2b, %[rc], %[_val]) \
- EX_TABLE_UA_LOAD_REG(1b, 2b, %[rc], %[_val]) \
- : [rc] "=d" (__rc), [_val] "=d" (val) \
- : [_from] "Q" (*(from)) \
- : "cc"); \
- __rc; \
-})
+#else /* CONFIG_CC_HAS_ASM_GOTO_OUTPUT) && CONFIG_CC_HAS_ASM_AOR_FORMAT_FLAGS */
-#define __get_kernel_nofault(dst, src, type, err_label) \
+#define __mvc_kernel_nofault(dst, src, type, err_label) \
do { \
- int __gk_err; \
+ type *(__dst) = (type *)(dst); \
+ int __rc; \
\
switch (sizeof(type)) { \
- case 1: { \
- unsigned char __x; \
- \
- __gk_err = __get_kernel_asm(__x, (type *)(src), "ic"); \
- *((type *)(dst)) = (type)__x; \
- break; \
- }; \
- case 2: { \
- unsigned short __x; \
- \
- __gk_err = __get_kernel_asm(__x, (type *)(src), "lh"); \
- *((type *)(dst)) = (type)__x; \
- break; \
- }; \
- case 4: { \
- unsigned int __x; \
- \
- __gk_err = __get_kernel_asm(__x, (type *)(src), "l"); \
- *((type *)(dst)) = (type)__x; \
- break; \
- }; \
- case 8: { \
- unsigned long __x; \
- \
- __gk_err = __get_kernel_asm(__x, (type *)(src), "lg"); \
- *((type *)(dst)) = (type)__x; \
+ case 1: \
+ case 2: \
+ case 4: \
+ case 8: \
+ asm_inline volatile( \
+ "0: mvc 0(%[_len],%[_dst]),%[_src]\n" \
+ "1: lhi %[_rc],0\n" \
+ "2:\n" \
+ EX_TABLE_UA_FAULT(0b, 2b, %[_rc]) \
+ EX_TABLE_UA_FAULT(1b, 2b, %[_rc]) \
+ : [_rc] "=d" (__rc), \
+ "=m" (*__dst) \
+ : [_src] "Q" (*(type *)(src)), \
+ [_dst] "a" (__dst), \
+ [_len] "I" (sizeof(type))); \
+ if (__rc) \
+ goto err_label; \
break; \
- }; \
default: \
- __gk_err = __get_kernel_bad(); \
+ __mvc_kernel_nofault_bad(); \
break; \
} \
- if (unlikely(__gk_err)) \
- goto err_label; \
} while (0)
+#endif /* CONFIG_CC_HAS_ASM_GOTO_OUTPUT && CONFIG_CC_HAS_ASM_AOR_FORMAT_FLAGS */
+
+#define __get_kernel_nofault __mvc_kernel_nofault
+#define __put_kernel_nofault __mvc_kernel_nofault
+
void __cmpxchg_user_key_called_with_bad_pointer(void);
#define CMPXCHG_USER_KEY_MAX_LOOPS 128
diff --git a/arch/s390/include/asm/uv.h b/arch/s390/include/asm/uv.h
index dc332609f2c3..b11f5b6d0bd1 100644
--- a/arch/s390/include/asm/uv.h
+++ b/arch/s390/include/asm/uv.h
@@ -628,12 +628,12 @@ static inline int is_prot_virt_host(void)
}
int uv_pin_shared(unsigned long paddr);
-int gmap_make_secure(struct gmap *gmap, unsigned long gaddr, void *uvcb);
-int gmap_destroy_page(struct gmap *gmap, unsigned long gaddr);
int uv_destroy_folio(struct folio *folio);
int uv_destroy_pte(pte_t pte);
int uv_convert_from_secure_pte(pte_t pte);
-int gmap_convert_to_secure(struct gmap *gmap, unsigned long gaddr);
+int make_folio_secure(struct folio *folio, struct uv_cb_header *uvcb);
+int uv_convert_from_secure(unsigned long paddr);
+int uv_convert_from_secure_folio(struct folio *folio);
void setup_uv(void);
diff --git a/arch/s390/include/uapi/asm/diag.h b/arch/s390/include/uapi/asm/diag.h
new file mode 100644
index 000000000000..b7e6ccb4ff6e
--- /dev/null
+++ b/arch/s390/include/uapi/asm/diag.h
@@ -0,0 +1,32 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ * Diag ioctls and its associated structures definitions.
+ *
+ * Copyright IBM Corp. 2024
+ */
+
+#ifndef __S390_UAPI_ASM_DIAG_H
+#define __S390_UAPI_ASM_DIAG_H
+
+#include <linux/types.h>
+
+#define DIAG_MAGIC_STR 'D'
+
+struct diag324_pib {
+ __u64 address;
+ __u64 sequence;
+};
+
+struct diag310_memtop {
+ __u64 address;
+ __u64 nesting_lvl;
+};
+
+/* Diag ioctl definitions */
+#define DIAG324_GET_PIBBUF _IOWR(DIAG_MAGIC_STR, 0x77, struct diag324_pib)
+#define DIAG324_GET_PIBLEN _IOR(DIAG_MAGIC_STR, 0x78, size_t)
+#define DIAG310_GET_STRIDE _IOR(DIAG_MAGIC_STR, 0x79, size_t)
+#define DIAG310_GET_MEMTOPLEN _IOWR(DIAG_MAGIC_STR, 0x7a, size_t)
+#define DIAG310_GET_MEMTOPBUF _IOWR(DIAG_MAGIC_STR, 0x7b, struct diag310_memtop)
+
+#endif /* __S390_UAPI_ASM_DIAG_H */
diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile
index 48caae8c7e10..db5f3a3faefb 100644
--- a/arch/s390/kernel/Makefile
+++ b/arch/s390/kernel/Makefile
@@ -38,12 +38,13 @@ CFLAGS_unwind_bc.o += -fno-optimize-sibling-calls
obj-y := head64.o traps.o time.o process.o early.o setup.o idle.o vtime.o
obj-y += processor.o syscall.o ptrace.o signal.o cpcmd.o ebcdic.o nmi.o
-obj-y += debug.o irq.o ipl.o dis.o diag.o vdso.o cpufeature.o
+obj-y += debug.o irq.o ipl.o dis.o vdso.o cpufeature.o
obj-y += sysinfo.o lgr.o os_info.o ctlreg.o
obj-y += runtime_instr.o cache.o fpu.o dumpstack.o guarded_storage.o sthyi.o
obj-y += entry.o reipl.o kdebugfs.o alternative.o
obj-y += nospec-branch.o ipl_vmparm.o machine_kexec_reloc.o unwind_bc.o
obj-y += smp.o text_amode31.o stacktrace.o abs_lowcore.o facility.o uv.o wti.o
+obj-y += diag/
extra-y += vmlinux.lds
diff --git a/arch/s390/kernel/abs_lowcore.c b/arch/s390/kernel/abs_lowcore.c
index 09cd24cbe74e..88f0b91d7a73 100644
--- a/arch/s390/kernel/abs_lowcore.c
+++ b/arch/s390/kernel/abs_lowcore.c
@@ -2,6 +2,7 @@
#include <linux/pgtable.h>
#include <asm/abs_lowcore.h>
+#include <asm/sections.h>
unsigned long __bootdata_preserved(__abs_lowcore);
int __bootdata_preserved(relocate_lowcore);
diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c
index 862a9140528e..36709112ae7a 100644
--- a/arch/s390/kernel/asm-offsets.c
+++ b/arch/s390/kernel/asm-offsets.c
@@ -175,12 +175,6 @@ int main(void)
DEFINE(OLDMEM_SIZE, PARMAREA + offsetof(struct parmarea, oldmem_size));
DEFINE(COMMAND_LINE, PARMAREA + offsetof(struct parmarea, command_line));
DEFINE(MAX_COMMAND_LINE_SIZE, PARMAREA + offsetof(struct parmarea, max_command_line_size));
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
- /* function graph return value tracing */
- OFFSET(__FGRAPH_RET_GPR2, fgraph_ret_regs, gpr2);
- OFFSET(__FGRAPH_RET_FP, fgraph_ret_regs, fp);
- DEFINE(__FGRAPH_RET_SIZE, sizeof(struct fgraph_ret_regs));
-#endif
OFFSET(__FTRACE_REGS_PT_REGS, __arch_ftrace_regs, regs);
DEFINE(__FTRACE_REGS_SIZE, sizeof(struct __arch_ftrace_regs));
diff --git a/arch/s390/kernel/cpacf.c b/arch/s390/kernel/cpacf.c
index c8575dbc890d..4b9b34f95d72 100644
--- a/arch/s390/kernel/cpacf.c
+++ b/arch/s390/kernel/cpacf.c
@@ -14,7 +14,7 @@
#define CPACF_QUERY(name, instruction) \
static ssize_t name##_query_raw_read(struct file *fp, \
struct kobject *kobj, \
- struct bin_attribute *attr, \
+ const struct bin_attribute *attr, \
char *buf, loff_t offs, \
size_t count) \
{ \
@@ -24,7 +24,7 @@ static ssize_t name##_query_raw_read(struct file *fp, \
return -EOPNOTSUPP; \
return memory_read_from_buffer(buf, count, &offs, &mask, sizeof(mask)); \
} \
-static BIN_ATTR_RO(name##_query_raw, sizeof(cpacf_mask_t))
+static const BIN_ATTR_RO(name##_query_raw, sizeof(cpacf_mask_t))
CPACF_QUERY(km, KM);
CPACF_QUERY(kmc, KMC);
@@ -40,20 +40,20 @@ CPACF_QUERY(prno, PRNO);
CPACF_QUERY(kma, KMA);
CPACF_QUERY(kdsa, KDSA);
-#define CPACF_QAI(name, instruction) \
-static ssize_t name##_query_auth_info_raw_read( \
- struct file *fp, struct kobject *kobj, \
- struct bin_attribute *attr, char *buf, loff_t offs, \
- size_t count) \
-{ \
- cpacf_qai_t qai; \
- \
- if (!cpacf_qai(CPACF_##instruction, &qai)) \
- return -EOPNOTSUPP; \
- return memory_read_from_buffer(buf, count, &offs, &qai, \
- sizeof(qai)); \
-} \
-static BIN_ATTR_RO(name##_query_auth_info_raw, sizeof(cpacf_qai_t))
+#define CPACF_QAI(name, instruction) \
+static ssize_t name##_query_auth_info_raw_read( \
+ struct file *fp, struct kobject *kobj, \
+ const struct bin_attribute *attr, char *buf, loff_t offs, \
+ size_t count) \
+{ \
+ cpacf_qai_t qai; \
+ \
+ if (!cpacf_qai(CPACF_##instruction, &qai)) \
+ return -EOPNOTSUPP; \
+ return memory_read_from_buffer(buf, count, &offs, &qai, \
+ sizeof(qai)); \
+} \
+static const BIN_ATTR_RO(name##_query_auth_info_raw, sizeof(cpacf_qai_t))
CPACF_QAI(km, KM);
CPACF_QAI(kmc, KMC);
@@ -69,7 +69,7 @@ CPACF_QAI(prno, PRNO);
CPACF_QAI(kma, KMA);
CPACF_QAI(kdsa, KDSA);
-static struct bin_attribute *cpacf_attrs[] = {
+static const struct bin_attribute *const cpacf_attrs[] = {
&bin_attr_km_query_raw,
&bin_attr_kmc_query_raw,
&bin_attr_kimd_query_raw,
@@ -101,7 +101,7 @@ static struct bin_attribute *cpacf_attrs[] = {
static const struct attribute_group cpacf_attr_grp = {
.name = "cpacf",
- .bin_attrs = cpacf_attrs,
+ .bin_attrs_new = cpacf_attrs,
};
static int __init cpacf_init(void)
diff --git a/arch/s390/kernel/crash_dump.c b/arch/s390/kernel/crash_dump.c
index cd0c93a8fb8b..276cb4c1e11b 100644
--- a/arch/s390/kernel/crash_dump.c
+++ b/arch/s390/kernel/crash_dump.c
@@ -63,9 +63,7 @@ struct save_area * __init save_area_alloc(bool is_boot_cpu)
{
struct save_area *sa;
- sa = memblock_alloc(sizeof(*sa), 8);
- if (!sa)
- return NULL;
+ sa = memblock_alloc_or_panic(sizeof(*sa), 8);
if (is_boot_cpu)
list_add(&sa->list, &dump_save_areas);
@@ -508,6 +506,19 @@ static int get_mem_chunk_cnt(void)
return cnt;
}
+static void fill_ptload(Elf64_Phdr *phdr, unsigned long paddr,
+ unsigned long vaddr, unsigned long size)
+{
+ phdr->p_type = PT_LOAD;
+ phdr->p_vaddr = vaddr;
+ phdr->p_offset = paddr;
+ phdr->p_paddr = paddr;
+ phdr->p_filesz = size;
+ phdr->p_memsz = size;
+ phdr->p_flags = PF_R | PF_W | PF_X;
+ phdr->p_align = PAGE_SIZE;
+}
+
/*
* Initialize ELF loads (new kernel)
*/
@@ -520,14 +531,8 @@ static void loads_init(Elf64_Phdr *phdr, bool os_info_has_vm)
if (os_info_has_vm)
old_identity_base = os_info_old_value(OS_INFO_IDENTITY_BASE);
for_each_physmem_range(idx, &oldmem_type, &start, &end) {
- phdr->p_type = PT_LOAD;
- phdr->p_vaddr = old_identity_base + start;
- phdr->p_offset = start;
- phdr->p_paddr = start;
- phdr->p_filesz = end - start;
- phdr->p_memsz = end - start;
- phdr->p_flags = PF_R | PF_W | PF_X;
- phdr->p_align = PAGE_SIZE;
+ fill_ptload(phdr, start, old_identity_base + start,
+ end - start);
phdr++;
}
}
@@ -537,6 +542,22 @@ static bool os_info_has_vm(void)
return os_info_old_value(OS_INFO_KASLR_OFFSET);
}
+#ifdef CONFIG_PROC_VMCORE_DEVICE_RAM
+/*
+ * Fill PT_LOAD for a physical memory range owned by a device and detected by
+ * its device driver.
+ */
+void elfcorehdr_fill_device_ram_ptload_elf64(Elf64_Phdr *phdr,
+ unsigned long long paddr, unsigned long long size)
+{
+ unsigned long old_identity_base = 0;
+
+ if (os_info_has_vm())
+ old_identity_base = os_info_old_value(OS_INFO_IDENTITY_BASE);
+ fill_ptload(phdr, paddr, old_identity_base + paddr, size);
+}
+#endif
+
/*
* Prepare PT_LOAD type program header for kernel image region
*/
diff --git a/arch/s390/kernel/debug.c b/arch/s390/kernel/debug.c
index de19fd8a6a95..ce038e9205f7 100644
--- a/arch/s390/kernel/debug.c
+++ b/arch/s390/kernel/debug.c
@@ -24,6 +24,7 @@
#include <linux/export.h>
#include <linux/init.h>
#include <linux/fs.h>
+#include <linux/math.h>
#include <linux/minmax.h>
#include <linux/debugfs.h>
@@ -94,9 +95,6 @@ static int debug_input_flush_fn(debug_info_t *id, struct debug_view *view,
static int debug_hex_ascii_format_fn(debug_info_t *id, struct debug_view *view,
char *out_buf, size_t out_buf_size,
const char *in_buf);
-static int debug_sprintf_format_fn(debug_info_t *id, struct debug_view *view,
- char *out_buf, size_t out_buf_size,
- const char *inbuf);
static void debug_areas_swap(debug_info_t *a, debug_info_t *b);
static void debug_events_append(debug_info_t *dest, debug_info_t *src);
@@ -354,7 +352,10 @@ static debug_info_t *debug_info_copy(debug_info_t *in, int mode)
for (i = 0; i < in->nr_areas; i++) {
for (j = 0; j < in->pages_per_area; j++)
memcpy(rc->areas[i][j], in->areas[i][j], PAGE_SIZE);
+ rc->active_pages[i] = in->active_pages[i];
+ rc->active_entries[i] = in->active_entries[i];
}
+ rc->active_area = in->active_area;
out:
spin_unlock_irqrestore(&in->lock, flags);
return rc;
@@ -422,11 +423,17 @@ out:
return len;
}
-/*
- * debug_next_entry:
- * - goto next entry in p_info
+/**
+ * debug_next_entry - Go to the next entry
+ * @p_info: Private info that is manipulated
+ *
+ * Sets the current position in @p_info to the next entry. If no further entry
+ * exists the current position is set to one after the end the return value
+ * indicates that no further entries exist.
+ *
+ * Return: True if there are more following entries, false otherwise
*/
-static inline int debug_next_entry(file_private_info_t *p_info)
+static inline bool debug_next_entry(file_private_info_t *p_info)
{
debug_info_t *id;
@@ -434,10 +441,10 @@ static inline int debug_next_entry(file_private_info_t *p_info)
if (p_info->act_entry == DEBUG_PROLOG_ENTRY) {
p_info->act_entry = 0;
p_info->act_page = 0;
- goto out;
+ return true;
}
if (!id->areas)
- return 1;
+ return false;
p_info->act_entry += id->entry_size;
/* switch to next page, if we reached the end of the page */
if (p_info->act_entry > (PAGE_SIZE - id->entry_size)) {
@@ -450,10 +457,87 @@ static inline int debug_next_entry(file_private_info_t *p_info)
p_info->act_page = 0;
}
if (p_info->act_area >= id->nr_areas)
- return 1;
+ return false;
}
-out:
- return 0;
+ return true;
+}
+
+/**
+ * debug_to_act_entry - Go to the currently active entry
+ * @p_info: Private info that is manipulated
+ *
+ * Sets the current position in @p_info to the currently active
+ * entry of @p_info->debug_info_snap
+ */
+static void debug_to_act_entry(file_private_info_t *p_info)
+{
+ debug_info_t *snap_id;
+
+ snap_id = p_info->debug_info_snap;
+ p_info->act_area = snap_id->active_area;
+ p_info->act_page = snap_id->active_pages[snap_id->active_area];
+ p_info->act_entry = snap_id->active_entries[snap_id->active_area];
+}
+
+/**
+ * debug_prev_entry - Go to the previous entry
+ * @p_info: Private info that is manipulated
+ *
+ * Sets the current position in @p_info to the previous entry. If no previous entry
+ * exists the current position is set left as DEBUG_PROLOG_ENTRY and the return value
+ * indicates that no previous entries exist.
+ *
+ * Return: True if there are more previous entries, false otherwise
+ */
+
+static inline bool debug_prev_entry(file_private_info_t *p_info)
+{
+ debug_info_t *id;
+
+ id = p_info->debug_info_snap;
+ if (p_info->act_entry == DEBUG_PROLOG_ENTRY)
+ debug_to_act_entry(p_info);
+ if (!id->areas)
+ return false;
+ p_info->act_entry -= id->entry_size;
+ /* switch to prev page, if we reached the beginning of the page */
+ if (p_info->act_entry < 0) {
+ /* end of previous page */
+ p_info->act_entry = rounddown(PAGE_SIZE, id->entry_size) - id->entry_size;
+ p_info->act_page--;
+ if (p_info->act_page < 0) {
+ /* previous area */
+ p_info->act_area--;
+ p_info->act_page = id->pages_per_area - 1;
+ }
+ if (p_info->act_area < 0)
+ p_info->act_area = (id->nr_areas - 1) % id->nr_areas;
+ }
+ /* check full circle */
+ if (id->active_area == p_info->act_area &&
+ id->active_pages[id->active_area] == p_info->act_page &&
+ id->active_entries[id->active_area] == p_info->act_entry)
+ return false;
+ return true;
+}
+
+/**
+ * debug_move_entry - Go to next entry in either the forward or backward direction
+ * @p_info: Private info that is manipulated
+ * @reverse: If true go to the next entry in reverse i.e. previous
+ *
+ * Sets the current position in @p_info to the next (@reverse == false) or
+ * previous (@reverse == true) entry.
+ *
+ * Return: True if there are further entries in that direction,
+ * false otherwise.
+ */
+static bool debug_move_entry(file_private_info_t *p_info, bool reverse)
+{
+ if (reverse)
+ return debug_prev_entry(p_info);
+ else
+ return debug_next_entry(p_info);
}
/*
@@ -495,7 +579,7 @@ static ssize_t debug_output(struct file *file, /* file descriptor */
}
if (copy_size == formatted_line_residue) {
entry_offset = 0;
- if (debug_next_entry(p_info))
+ if (!debug_next_entry(p_info))
goto out;
}
}
@@ -530,6 +614,42 @@ static ssize_t debug_input(struct file *file, const char __user *user_buf,
return rc; /* number of input characters */
}
+static file_private_info_t *debug_file_private_alloc(debug_info_t *debug_info,
+ struct debug_view *view)
+{
+ debug_info_t *debug_info_snapshot;
+ file_private_info_t *p_info;
+
+ /*
+ * Make snapshot of current debug areas to get it consistent.
+ * To copy all the areas is only needed, if we have a view which
+ * formats the debug areas.
+ */
+ if (!view->format_proc && !view->header_proc)
+ debug_info_snapshot = debug_info_copy(debug_info, NO_AREAS);
+ else
+ debug_info_snapshot = debug_info_copy(debug_info, ALL_AREAS);
+
+ if (!debug_info_snapshot)
+ return NULL;
+ p_info = kmalloc(sizeof(file_private_info_t), GFP_KERNEL);
+ if (!p_info) {
+ debug_info_free(debug_info_snapshot);
+ return NULL;
+ }
+ p_info->offset = 0;
+ p_info->debug_info_snap = debug_info_snapshot;
+ p_info->debug_info_org = debug_info;
+ p_info->view = view;
+ p_info->act_area = 0;
+ p_info->act_page = 0;
+ p_info->act_entry = DEBUG_PROLOG_ENTRY;
+ p_info->act_entry_offset = 0;
+ debug_info_get(debug_info);
+
+ return p_info;
+}
+
/*
* debug_open:
* - called for user open()
@@ -538,7 +658,7 @@ static ssize_t debug_input(struct file *file, const char __user *user_buf,
*/
static int debug_open(struct inode *inode, struct file *file)
{
- debug_info_t *debug_info, *debug_info_snapshot;
+ debug_info_t *debug_info;
file_private_info_t *p_info;
int i, rc = 0;
@@ -556,42 +676,26 @@ static int debug_open(struct inode *inode, struct file *file)
goto out;
found:
-
- /* Make snapshot of current debug areas to get it consistent. */
- /* To copy all the areas is only needed, if we have a view which */
- /* formats the debug areas. */
-
- if (!debug_info->views[i]->format_proc && !debug_info->views[i]->header_proc)
- debug_info_snapshot = debug_info_copy(debug_info, NO_AREAS);
- else
- debug_info_snapshot = debug_info_copy(debug_info, ALL_AREAS);
-
- if (!debug_info_snapshot) {
- rc = -ENOMEM;
- goto out;
- }
- p_info = kmalloc(sizeof(file_private_info_t), GFP_KERNEL);
+ p_info = debug_file_private_alloc(debug_info, debug_info->views[i]);
if (!p_info) {
- debug_info_free(debug_info_snapshot);
rc = -ENOMEM;
goto out;
}
- p_info->offset = 0;
- p_info->debug_info_snap = debug_info_snapshot;
- p_info->debug_info_org = debug_info;
- p_info->view = debug_info->views[i];
- p_info->act_area = 0;
- p_info->act_page = 0;
- p_info->act_entry = DEBUG_PROLOG_ENTRY;
- p_info->act_entry_offset = 0;
file->private_data = p_info;
- debug_info_get(debug_info);
nonseekable_open(inode, file);
out:
mutex_unlock(&debug_mutex);
return rc;
}
+static void debug_file_private_free(file_private_info_t *p_info)
+{
+ if (p_info->debug_info_snap)
+ debug_info_free(p_info->debug_info_snap);
+ debug_info_put(p_info->debug_info_org);
+ kfree(p_info);
+}
+
/*
* debug_close:
* - called for user close()
@@ -602,13 +706,59 @@ static int debug_close(struct inode *inode, struct file *file)
file_private_info_t *p_info;
p_info = (file_private_info_t *) file->private_data;
- if (p_info->debug_info_snap)
- debug_info_free(p_info->debug_info_snap);
- debug_info_put(p_info->debug_info_org);
- kfree(file->private_data);
+ debug_file_private_free(p_info);
+ file->private_data = NULL;
return 0; /* success */
}
+/**
+ * debug_dump - Get a textual representation of debug info, or as much as fits
+ * @id: Debug information to use
+ * @view: View with which to dump the debug information
+ * @buf: Buffer the textual debug data representation is written to
+ * @buf_size: Size of the buffer, including the trailing '\0' byte
+ * @reverse: Go backwards from the last written entry
+ *
+ * This function may be used whenever a textual representation of the debug
+ * information is required without using an s390dbf file.
+ *
+ * Note: It is the callers responsibility to supply a view that is compatible
+ * with the debug information data.
+ *
+ * Return: On success returns the number of bytes written to the buffer not
+ * including the trailing '\0' byte. If bug_size == 0 the function returns 0.
+ * On failure an error code less than 0 is returned.
+ */
+ssize_t debug_dump(debug_info_t *id, struct debug_view *view,
+ char *buf, size_t buf_size, bool reverse)
+{
+ file_private_info_t *p_info;
+ size_t size, offset = 0;
+
+ /* Need space for '\0' byte */
+ if (buf_size < 1)
+ return 0;
+ buf_size--;
+
+ p_info = debug_file_private_alloc(id, view);
+ if (!p_info)
+ return -ENOMEM;
+
+ /* There is always at least the DEBUG_PROLOG_ENTRY */
+ do {
+ size = debug_format_entry(p_info);
+ size = min(size, buf_size - offset);
+ memcpy(buf + offset, p_info->temp_buf, size);
+ offset += size;
+ if (offset >= buf_size)
+ break;
+ } while (debug_move_entry(p_info, reverse));
+ debug_file_private_free(p_info);
+ buf[offset] = '\0';
+
+ return offset;
+}
+
/* Create debugfs entries and add to internal list. */
static void _debug_register(debug_info_t *id)
{
@@ -972,7 +1122,7 @@ static int s390dbf_procactive(const struct ctl_table *table, int write,
return 0;
}
-static struct ctl_table s390dbf_table[] = {
+static const struct ctl_table s390dbf_table[] = {
{
.procname = "debug_stoppable",
.data = &debug_stoppable,
@@ -1532,8 +1682,8 @@ EXPORT_SYMBOL(debug_dflt_header_fn);
#define DEBUG_SPRINTF_MAX_ARGS 10
-static int debug_sprintf_format_fn(debug_info_t *id, struct debug_view *view,
- char *out_buf, size_t out_buf_size, const char *inbuf)
+int debug_sprintf_format_fn(debug_info_t *id, struct debug_view *view,
+ char *out_buf, size_t out_buf_size, const char *inbuf)
{
debug_sprintf_entry_t *curr_event = (debug_sprintf_entry_t *)inbuf;
int num_longs, num_used_args = 0, i, rc = 0;
@@ -1570,6 +1720,7 @@ static int debug_sprintf_format_fn(debug_info_t *id, struct debug_view *view,
out:
return rc;
}
+EXPORT_SYMBOL(debug_sprintf_format_fn);
/*
* debug_init:
diff --git a/arch/s390/kernel/diag/Makefile b/arch/s390/kernel/diag/Makefile
new file mode 100644
index 000000000000..956aee6c4090
--- /dev/null
+++ b/arch/s390/kernel/diag/Makefile
@@ -0,0 +1 @@
+obj-y := diag_misc.o diag324.o diag.o diag310.o
diff --git a/arch/s390/kernel/diag.c b/arch/s390/kernel/diag/diag.c
index cdd6e31344fa..e15b8dee3228 100644
--- a/arch/s390/kernel/diag.c
+++ b/arch/s390/kernel/diag/diag.c
@@ -17,7 +17,7 @@
#include <asm/trace/diag.h>
#include <asm/sections.h>
#include <asm/asm.h>
-#include "entry.h"
+#include "../entry.h"
struct diag_stat {
unsigned int counter[NR_DIAG_STAT];
@@ -51,8 +51,10 @@ static const struct diag_desc diag_map[NR_DIAG_STAT] = {
[DIAG_STAT_X2FC] = { .code = 0x2fc, .name = "Guest Performance Data" },
[DIAG_STAT_X304] = { .code = 0x304, .name = "Partition-Resource Service" },
[DIAG_STAT_X308] = { .code = 0x308, .name = "List-Directed IPL" },
+ [DIAG_STAT_X310] = { .code = 0x310, .name = "Memory Topology Information" },
[DIAG_STAT_X318] = { .code = 0x318, .name = "CP Name and Version Codes" },
[DIAG_STAT_X320] = { .code = 0x320, .name = "Certificate Store" },
+ [DIAG_STAT_X324] = { .code = 0x324, .name = "Power Information Block" },
[DIAG_STAT_X49C] = { .code = 0x49c, .name = "Warning-Track Interruption" },
[DIAG_STAT_X500] = { .code = 0x500, .name = "Virtio Service" },
};
diff --git a/arch/s390/kernel/diag/diag310.c b/arch/s390/kernel/diag/diag310.c
new file mode 100644
index 000000000000..d6a34454aa5a
--- /dev/null
+++ b/arch/s390/kernel/diag/diag310.c
@@ -0,0 +1,276 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Request memory topology information via diag0x310.
+ *
+ * Copyright IBM Corp. 2025
+ */
+
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/uaccess.h>
+#include <linux/vmalloc.h>
+#include <asm/diag.h>
+#include <asm/sclp.h>
+#include <uapi/asm/diag.h>
+#include "diag_ioctl.h"
+
+#define DIAG310_LEVELMIN 1
+#define DIAG310_LEVELMAX 6
+
+enum diag310_sc {
+ DIAG310_SUBC_0 = 0,
+ DIAG310_SUBC_1 = 1,
+ DIAG310_SUBC_4 = 4,
+ DIAG310_SUBC_5 = 5
+};
+
+enum diag310_retcode {
+ DIAG310_RET_SUCCESS = 0x0001,
+ DIAG310_RET_BUSY = 0x0101,
+ DIAG310_RET_OPNOTSUPP = 0x0102,
+ DIAG310_RET_SC4_INVAL = 0x0401,
+ DIAG310_RET_SC4_NODATA = 0x0402,
+ DIAG310_RET_SC5_INVAL = 0x0501,
+ DIAG310_RET_SC5_NODATA = 0x0502,
+ DIAG310_RET_SC5_ESIZE = 0x0503
+};
+
+union diag310_response {
+ u64 response;
+ struct {
+ u64 result : 32;
+ u64 : 16;
+ u64 rc : 16;
+ };
+};
+
+union diag310_req_subcode {
+ u64 subcode;
+ struct {
+ u64 : 48;
+ u64 st : 8;
+ u64 sc : 8;
+ };
+};
+
+union diag310_req_size {
+ u64 size;
+ struct {
+ u64 page_count : 32;
+ u64 : 32;
+ };
+};
+
+static inline unsigned long diag310(unsigned long subcode, unsigned long size, void *addr)
+{
+ union register_pair rp = { .even = (unsigned long)addr, .odd = size };
+
+ diag_stat_inc(DIAG_STAT_X310);
+ asm volatile("diag %[rp],%[subcode],0x310\n"
+ : [rp] "+d" (rp.pair)
+ : [subcode] "d" (subcode)
+ : "memory");
+ return rp.odd;
+}
+
+static int diag310_result_to_errno(unsigned int result)
+{
+ switch (result) {
+ case DIAG310_RET_BUSY:
+ return -EBUSY;
+ case DIAG310_RET_OPNOTSUPP:
+ return -EOPNOTSUPP;
+ default:
+ return -EINVAL;
+ }
+}
+
+static int diag310_get_subcode_mask(unsigned long *mask)
+{
+ union diag310_response res;
+
+ res.response = diag310(DIAG310_SUBC_0, 0, NULL);
+ if (res.rc != DIAG310_RET_SUCCESS)
+ return diag310_result_to_errno(res.rc);
+ *mask = res.response;
+ return 0;
+}
+
+static int diag310_get_memtop_stride(unsigned long *stride)
+{
+ union diag310_response res;
+
+ res.response = diag310(DIAG310_SUBC_1, 0, NULL);
+ if (res.rc != DIAG310_RET_SUCCESS)
+ return diag310_result_to_errno(res.rc);
+ *stride = res.result;
+ return 0;
+}
+
+static int diag310_get_memtop_size(unsigned long *pages, unsigned long level)
+{
+ union diag310_req_subcode req = { .sc = DIAG310_SUBC_4, .st = level };
+ union diag310_response res;
+
+ res.response = diag310(req.subcode, 0, NULL);
+ switch (res.rc) {
+ case DIAG310_RET_SUCCESS:
+ *pages = res.result;
+ return 0;
+ case DIAG310_RET_SC4_NODATA:
+ return -ENODATA;
+ case DIAG310_RET_SC4_INVAL:
+ return -EINVAL;
+ default:
+ return diag310_result_to_errno(res.rc);
+ }
+}
+
+static int diag310_store_topology_map(void *buf, unsigned long pages, unsigned long level)
+{
+ union diag310_req_subcode req_sc = { .sc = DIAG310_SUBC_5, .st = level };
+ union diag310_req_size req_size = { .page_count = pages };
+ union diag310_response res;
+
+ res.response = diag310(req_sc.subcode, req_size.size, buf);
+ switch (res.rc) {
+ case DIAG310_RET_SUCCESS:
+ return 0;
+ case DIAG310_RET_SC5_NODATA:
+ return -ENODATA;
+ case DIAG310_RET_SC5_ESIZE:
+ return -EOVERFLOW;
+ case DIAG310_RET_SC5_INVAL:
+ return -EINVAL;
+ default:
+ return diag310_result_to_errno(res.rc);
+ }
+}
+
+static int diag310_check_features(void)
+{
+ static int features_available;
+ unsigned long mask;
+ int rc;
+
+ if (READ_ONCE(features_available))
+ return 0;
+ if (!sclp.has_diag310)
+ return -EOPNOTSUPP;
+ rc = diag310_get_subcode_mask(&mask);
+ if (rc)
+ return rc;
+ if (!test_bit_inv(DIAG310_SUBC_1, &mask))
+ return -EOPNOTSUPP;
+ if (!test_bit_inv(DIAG310_SUBC_4, &mask))
+ return -EOPNOTSUPP;
+ if (!test_bit_inv(DIAG310_SUBC_5, &mask))
+ return -EOPNOTSUPP;
+ WRITE_ONCE(features_available, 1);
+ return 0;
+}
+
+static int memtop_get_stride_len(unsigned long *res)
+{
+ static unsigned long memtop_stride;
+ unsigned long stride;
+ int rc;
+
+ stride = READ_ONCE(memtop_stride);
+ if (!stride) {
+ rc = diag310_get_memtop_stride(&stride);
+ if (rc)
+ return rc;
+ WRITE_ONCE(memtop_stride, stride);
+ }
+ *res = stride;
+ return 0;
+}
+
+static int memtop_get_page_count(unsigned long *res, unsigned long level)
+{
+ static unsigned long memtop_pages[DIAG310_LEVELMAX];
+ unsigned long pages;
+ int rc;
+
+ if (level > DIAG310_LEVELMAX || level < DIAG310_LEVELMIN)
+ return -EINVAL;
+ pages = READ_ONCE(memtop_pages[level - 1]);
+ if (!pages) {
+ rc = diag310_get_memtop_size(&pages, level);
+ if (rc)
+ return rc;
+ WRITE_ONCE(memtop_pages[level - 1], pages);
+ }
+ *res = pages;
+ return 0;
+}
+
+long diag310_memtop_stride(unsigned long arg)
+{
+ size_t __user *argp = (void __user *)arg;
+ unsigned long stride;
+ int rc;
+
+ rc = diag310_check_features();
+ if (rc)
+ return rc;
+ rc = memtop_get_stride_len(&stride);
+ if (rc)
+ return rc;
+ if (put_user(stride, argp))
+ return -EFAULT;
+ return 0;
+}
+
+long diag310_memtop_len(unsigned long arg)
+{
+ size_t __user *argp = (void __user *)arg;
+ unsigned long pages, level;
+ int rc;
+
+ rc = diag310_check_features();
+ if (rc)
+ return rc;
+ if (get_user(level, argp))
+ return -EFAULT;
+ rc = memtop_get_page_count(&pages, level);
+ if (rc)
+ return rc;
+ if (put_user(pages * PAGE_SIZE, argp))
+ return -EFAULT;
+ return 0;
+}
+
+long diag310_memtop_buf(unsigned long arg)
+{
+ struct diag310_memtop __user *udata = (struct diag310_memtop __user *)arg;
+ unsigned long level, pages, data_size;
+ u64 address;
+ void *buf;
+ int rc;
+
+ rc = diag310_check_features();
+ if (rc)
+ return rc;
+ if (get_user(level, &udata->nesting_lvl))
+ return -EFAULT;
+ if (get_user(address, &udata->address))
+ return -EFAULT;
+ rc = memtop_get_page_count(&pages, level);
+ if (rc)
+ return rc;
+ data_size = pages * PAGE_SIZE;
+ buf = __vmalloc_node(data_size, PAGE_SIZE, GFP_KERNEL | __GFP_ZERO | __GFP_ACCOUNT,
+ NUMA_NO_NODE, __builtin_return_address(0));
+ if (!buf)
+ return -ENOMEM;
+ rc = diag310_store_topology_map(buf, pages, level);
+ if (rc)
+ goto out;
+ if (copy_to_user((void __user *)address, buf, data_size))
+ rc = -EFAULT;
+out:
+ vfree(buf);
+ return rc;
+}
diff --git a/arch/s390/kernel/diag/diag324.c b/arch/s390/kernel/diag/diag324.c
new file mode 100644
index 000000000000..7fa4c0b7eb6c
--- /dev/null
+++ b/arch/s390/kernel/diag/diag324.c
@@ -0,0 +1,224 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Request power readings for resources in a computing environment via
+ * diag 0x324. diag 0x324 stores the power readings in the power information
+ * block (pib).
+ *
+ * Copyright IBM Corp. 2024
+ */
+
+#define pr_fmt(fmt) "diag324: " fmt
+#include <linux/fs.h>
+#include <linux/gfp.h>
+#include <linux/ioctl.h>
+#include <linux/jiffies.h>
+#include <linux/kernel.h>
+#include <linux/ktime.h>
+#include <linux/string.h>
+#include <linux/slab.h>
+#include <linux/timer.h>
+#include <linux/types.h>
+#include <linux/uaccess.h>
+#include <linux/vmalloc.h>
+
+#include <asm/diag.h>
+#include <asm/sclp.h>
+#include <asm/timex.h>
+#include <uapi/asm/diag.h>
+#include "diag_ioctl.h"
+
+enum subcode {
+ DIAG324_SUBC_0 = 0,
+ DIAG324_SUBC_1 = 1,
+ DIAG324_SUBC_2 = 2,
+};
+
+enum retcode {
+ DIAG324_RET_SUCCESS = 0x0001,
+ DIAG324_RET_SUBC_NOTAVAIL = 0x0103,
+ DIAG324_RET_INSUFFICIENT_SIZE = 0x0104,
+ DIAG324_RET_READING_UNAVAILABLE = 0x0105,
+};
+
+union diag324_response {
+ u64 response;
+ struct {
+ u64 installed : 32;
+ u64 : 16;
+ u64 rc : 16;
+ } sc0;
+ struct {
+ u64 format : 16;
+ u64 : 16;
+ u64 pib_len : 16;
+ u64 rc : 16;
+ } sc1;
+ struct {
+ u64 : 48;
+ u64 rc : 16;
+ } sc2;
+};
+
+union diag324_request {
+ u64 request;
+ struct {
+ u64 : 32;
+ u64 allocated : 16;
+ u64 : 12;
+ u64 sc : 4;
+ } sc2;
+};
+
+struct pib {
+ u32 : 8;
+ u32 num : 8;
+ u32 len : 16;
+ u32 : 24;
+ u32 hlen : 8;
+ u64 : 64;
+ u64 intv;
+ u8 r[];
+} __packed;
+
+struct pibdata {
+ struct pib *pib;
+ ktime_t expire;
+ u64 sequence;
+ size_t len;
+ int rc;
+};
+
+static DEFINE_MUTEX(pibmutex);
+static struct pibdata pibdata;
+
+#define PIBWORK_DELAY (5 * NSEC_PER_SEC)
+
+static void pibwork_handler(struct work_struct *work);
+static DECLARE_DELAYED_WORK(pibwork, pibwork_handler);
+
+static unsigned long diag324(unsigned long subcode, void *addr)
+{
+ union register_pair rp = { .even = (unsigned long)addr };
+
+ diag_stat_inc(DIAG_STAT_X324);
+ asm volatile("diag %[rp],%[subcode],0x324\n"
+ : [rp] "+d" (rp.pair)
+ : [subcode] "d" (subcode)
+ : "memory");
+ return rp.odd;
+}
+
+static void pibwork_handler(struct work_struct *work)
+{
+ struct pibdata *data = &pibdata;
+ ktime_t timedout;
+
+ mutex_lock(&pibmutex);
+ timedout = ktime_add_ns(data->expire, PIBWORK_DELAY);
+ if (ktime_before(ktime_get(), timedout)) {
+ mod_delayed_work(system_wq, &pibwork, nsecs_to_jiffies(PIBWORK_DELAY));
+ goto out;
+ }
+ vfree(data->pib);
+ data->pib = NULL;
+out:
+ mutex_unlock(&pibmutex);
+}
+
+static void pib_update(struct pibdata *data)
+{
+ union diag324_request req = { .sc2.sc = DIAG324_SUBC_2, .sc2.allocated = data->len };
+ union diag324_response res;
+ int rc;
+
+ memset(data->pib, 0, data->len);
+ res.response = diag324(req.request, data->pib);
+ switch (res.sc2.rc) {
+ case DIAG324_RET_SUCCESS:
+ rc = 0;
+ break;
+ case DIAG324_RET_SUBC_NOTAVAIL:
+ rc = -ENOENT;
+ break;
+ case DIAG324_RET_INSUFFICIENT_SIZE:
+ rc = -EMSGSIZE;
+ break;
+ case DIAG324_RET_READING_UNAVAILABLE:
+ rc = -EBUSY;
+ break;
+ default:
+ rc = -EINVAL;
+ }
+ data->rc = rc;
+}
+
+long diag324_pibbuf(unsigned long arg)
+{
+ struct diag324_pib __user *udata = (struct diag324_pib __user *)arg;
+ struct pibdata *data = &pibdata;
+ static bool first = true;
+ u64 address;
+ int rc;
+
+ if (!data->len)
+ return -EOPNOTSUPP;
+ if (get_user(address, &udata->address))
+ return -EFAULT;
+ mutex_lock(&pibmutex);
+ rc = -ENOMEM;
+ if (!data->pib)
+ data->pib = vmalloc(data->len);
+ if (!data->pib)
+ goto out;
+ if (first || ktime_after(ktime_get(), data->expire)) {
+ pib_update(data);
+ data->sequence++;
+ data->expire = ktime_add_ns(ktime_get(), tod_to_ns(data->pib->intv));
+ mod_delayed_work(system_wq, &pibwork, nsecs_to_jiffies(PIBWORK_DELAY));
+ first = false;
+ }
+ rc = data->rc;
+ if (rc != 0 && rc != -EBUSY)
+ goto out;
+ rc = copy_to_user((void __user *)address, data->pib, data->pib->len);
+ rc |= put_user(data->sequence, &udata->sequence);
+ if (rc)
+ rc = -EFAULT;
+out:
+ mutex_unlock(&pibmutex);
+ return rc;
+}
+
+long diag324_piblen(unsigned long arg)
+{
+ struct pibdata *data = &pibdata;
+
+ if (!data->len)
+ return -EOPNOTSUPP;
+ if (put_user(data->len, (size_t __user *)arg))
+ return -EFAULT;
+ return 0;
+}
+
+static int __init diag324_init(void)
+{
+ union diag324_response res;
+ unsigned long installed;
+
+ if (!sclp.has_diag324)
+ return -EOPNOTSUPP;
+ res.response = diag324(DIAG324_SUBC_0, NULL);
+ if (res.sc0.rc != DIAG324_RET_SUCCESS)
+ return -EOPNOTSUPP;
+ installed = res.response;
+ if (!test_bit_inv(DIAG324_SUBC_1, &installed))
+ return -EOPNOTSUPP;
+ if (!test_bit_inv(DIAG324_SUBC_2, &installed))
+ return -EOPNOTSUPP;
+ res.response = diag324(DIAG324_SUBC_1, NULL);
+ if (res.sc1.rc != DIAG324_RET_SUCCESS)
+ return -EOPNOTSUPP;
+ pibdata.len = res.sc1.pib_len;
+ return 0;
+}
+device_initcall(diag324_init);
diff --git a/arch/s390/kernel/diag/diag_ioctl.h b/arch/s390/kernel/diag/diag_ioctl.h
new file mode 100644
index 000000000000..7080be946785
--- /dev/null
+++ b/arch/s390/kernel/diag/diag_ioctl.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _DIAG_IOCTL_H
+#define _DIAG_IOCTL_H
+
+#include <linux/types.h>
+
+long diag324_pibbuf(unsigned long arg);
+long diag324_piblen(unsigned long arg);
+
+long diag310_memtop_stride(unsigned long arg);
+long diag310_memtop_len(unsigned long arg);
+long diag310_memtop_buf(unsigned long arg);
+
+#endif /* _DIAG_IOCTL_H */
diff --git a/arch/s390/kernel/diag/diag_misc.c b/arch/s390/kernel/diag/diag_misc.c
new file mode 100644
index 000000000000..efffe02ea02e
--- /dev/null
+++ b/arch/s390/kernel/diag/diag_misc.c
@@ -0,0 +1,63 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Provide diagnose information via misc device /dev/diag.
+ *
+ * Copyright IBM Corp. 2024
+ */
+
+#include <linux/fs.h>
+#include <linux/init.h>
+#include <linux/ioctl.h>
+#include <linux/kernel.h>
+#include <linux/miscdevice.h>
+#include <linux/types.h>
+
+#include <uapi/asm/diag.h>
+#include "diag_ioctl.h"
+
+static long diag_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
+{
+ long rc;
+
+ switch (cmd) {
+ case DIAG324_GET_PIBLEN:
+ rc = diag324_piblen(arg);
+ break;
+ case DIAG324_GET_PIBBUF:
+ rc = diag324_pibbuf(arg);
+ break;
+ case DIAG310_GET_STRIDE:
+ rc = diag310_memtop_stride(arg);
+ break;
+ case DIAG310_GET_MEMTOPLEN:
+ rc = diag310_memtop_len(arg);
+ break;
+ case DIAG310_GET_MEMTOPBUF:
+ rc = diag310_memtop_buf(arg);
+ break;
+ default:
+ rc = -ENOIOCTLCMD;
+ break;
+ }
+ return rc;
+}
+
+static const struct file_operations fops = {
+ .owner = THIS_MODULE,
+ .open = nonseekable_open,
+ .unlocked_ioctl = diag_ioctl,
+};
+
+static struct miscdevice diagdev = {
+ .name = "diag",
+ .minor = MISC_DYNAMIC_MINOR,
+ .fops = &fops,
+ .mode = 0444,
+};
+
+static int diag_init(void)
+{
+ return misc_register(&diagdev);
+}
+
+device_initcall(diag_init);
diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c
index 62f8f5a750a3..2fa25164df7d 100644
--- a/arch/s390/kernel/early.c
+++ b/arch/s390/kernel/early.c
@@ -50,6 +50,7 @@ decompressor_handled_param(facilities);
decompressor_handled_param(nokaslr);
decompressor_handled_param(cmma);
decompressor_handled_param(relocate_lowcore);
+decompressor_handled_param(bootdebug);
#if IS_ENABLED(CONFIG_KVM)
decompressor_handled_param(prot_virt);
#endif
@@ -58,7 +59,7 @@ static void __init kasan_early_init(void)
{
#ifdef CONFIG_KASAN
init_task.kasan_depth = 0;
- sclp_early_printk("KernelAddressSanitizer initialized\n");
+ pr_info("KernelAddressSanitizer initialized\n");
#endif
}
diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S
index 960c08700cf6..4cc3408c4dac 100644
--- a/arch/s390/kernel/entry.S
+++ b/arch/s390/kernel/entry.S
@@ -52,16 +52,7 @@ _LPP_OFFSET = __LC_LPP
ALT_FACILITY(193)
.endm
- .macro CHECK_STACK savearea, lowcore
-#ifdef CONFIG_CHECK_STACK
- tml %r15,THREAD_SIZE - CONFIG_STACK_GUARD
- la %r14,\savearea(\lowcore)
- jz stack_overflow
-#endif
- .endm
-
.macro CHECK_VMAP_STACK savearea, lowcore, oklabel
-#ifdef CONFIG_VMAP_STACK
lgr %r14,%r15
nill %r14,0x10000 - THREAD_SIZE
oill %r14,STACK_INIT_OFFSET
@@ -77,9 +68,6 @@ _LPP_OFFSET = __LC_LPP
je \oklabel
la %r14,\savearea(\lowcore)
j stack_overflow
-#else
- j \oklabel
-#endif
.endm
/*
@@ -326,8 +314,7 @@ SYM_CODE_START(pgm_check_handler)
jnz 2f # -> enabled, can't be a double fault
tm __LC_PGM_ILC+3(%r13),0x80 # check for per exception
jnz .Lpgm_svcper # -> single stepped svc
-2: CHECK_STACK __LC_SAVE_AREA,%r13
- aghi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE)
+2: aghi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE)
# CHECK_VMAP_STACK branches to stack_overflow or 4f
CHECK_VMAP_STACK __LC_SAVE_AREA,%r13,4f
3: lg %r15,__LC_KERNEL_STACK(%r13)
@@ -394,8 +381,7 @@ SYM_CODE_START(\name)
BPENTER __SF_SIE_FLAGS(%r15),_TIF_ISOLATE_BP_GUEST
SIEEXIT __SF_SIE_CONTROL(%r15),%r13
#endif
-0: CHECK_STACK __LC_SAVE_AREA,%r13
- aghi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE)
+0: aghi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE)
j 2f
1: lctlg %c1,%c1,__LC_KERNEL_ASCE(%r13)
lg %r15,__LC_KERNEL_STACK(%r13)
@@ -603,7 +589,6 @@ SYM_CODE_END(early_pgm_check_handler)
.section .kprobes.text, "ax"
-#if defined(CONFIG_CHECK_STACK) || defined(CONFIG_VMAP_STACK)
/*
* The synchronous or the asynchronous stack overflowed. We are dead.
* No need to properly save the registers, we are going to panic anyway.
@@ -621,7 +606,6 @@ SYM_CODE_START(stack_overflow)
lgr %r2,%r11 # pass pointer to pt_regs
jg kernel_stack_overflow
SYM_CODE_END(stack_overflow)
-#endif
.section .data, "aw"
.balign 4
diff --git a/arch/s390/kernel/entry.h b/arch/s390/kernel/entry.h
index 21969520f947..a1f28879c87e 100644
--- a/arch/s390/kernel/entry.h
+++ b/arch/s390/kernel/entry.h
@@ -41,7 +41,6 @@ void do_restart(void *arg);
void __init startup_init(void);
void die(struct pt_regs *regs, const char *str);
int setup_profiling_timer(unsigned int multiplier);
-unsigned long prepare_ftrace_return(unsigned long parent, unsigned long sp, unsigned long ip);
struct s390_mmap_arg_struct;
struct fadvise64_64_args;
diff --git a/arch/s390/kernel/ftrace.c b/arch/s390/kernel/ftrace.c
index 51439a71e392..63ba6306632e 100644
--- a/arch/s390/kernel/ftrace.c
+++ b/arch/s390/kernel/ftrace.c
@@ -261,43 +261,18 @@ void ftrace_arch_code_modify_post_process(void)
}
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
-/*
- * Hook the return address and push it in the stack of return addresses
- * in current thread info.
- */
-unsigned long prepare_ftrace_return(unsigned long ra, unsigned long sp,
- unsigned long ip)
-{
- if (unlikely(ftrace_graph_is_dead()))
- goto out;
- if (unlikely(atomic_read(&current->tracing_graph_pause)))
- goto out;
- ip -= MCOUNT_INSN_SIZE;
- if (!function_graph_enter(ra, ip, 0, (void *) sp))
- ra = (unsigned long) return_to_handler;
-out:
- return ra;
-}
-NOKPROBE_SYMBOL(prepare_ftrace_return);
-/*
- * Patch the kernel code at ftrace_graph_caller location. The instruction
- * there is branch relative on condition. To enable the ftrace graph code
- * block, we simply patch the mask field of the instruction to zero and
- * turn the instruction into a nop.
- * To disable the ftrace graph code the mask field will be patched to
- * all ones, which turns the instruction into an unconditional branch.
- */
-int ftrace_enable_ftrace_graph_caller(void)
+void ftrace_graph_func(unsigned long ip, unsigned long parent_ip,
+ struct ftrace_ops *op, struct ftrace_regs *fregs)
{
- /* Expect brc 0xf,... */
- return ftrace_patch_branch_mask(ftrace_graph_caller, 0xa7f4, false);
-}
+ unsigned long *parent = &arch_ftrace_regs(fregs)->regs.gprs[14];
-int ftrace_disable_ftrace_graph_caller(void)
-{
- /* Expect brc 0x0,... */
- return ftrace_patch_branch_mask(ftrace_graph_caller, 0xa704, true);
+ if (unlikely(ftrace_graph_is_dead()))
+ return;
+ if (unlikely(atomic_read(&current->tracing_graph_pause)))
+ return;
+ if (!function_graph_enter_regs(*parent, ip, 0, parent, fregs))
+ *parent = (unsigned long)&return_to_handler;
}
#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
diff --git a/arch/s390/kernel/hiperdispatch.c b/arch/s390/kernel/hiperdispatch.c
index 2a99a216ab62..7857a7e8e56c 100644
--- a/arch/s390/kernel/hiperdispatch.c
+++ b/arch/s390/kernel/hiperdispatch.c
@@ -292,7 +292,7 @@ static int hiperdispatch_ctl_handler(const struct ctl_table *ctl, int write,
return 0;
}
-static struct ctl_table hiperdispatch_ctl_table[] = {
+static const struct ctl_table hiperdispatch_ctl_table[] = {
{
.procname = "hiperdispatch",
.mode = 0644,
diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c
index edbb52ce3f1e..69be2309cde0 100644
--- a/arch/s390/kernel/ipl.c
+++ b/arch/s390/kernel/ipl.c
@@ -270,7 +270,7 @@ static ssize_t sys_##_prefix##_##_name##_store(struct kobject *kobj, \
if (len >= sizeof(_value)) \
return -E2BIG; \
len = strscpy(_value, buf, sizeof(_value)); \
- if (len < 0) \
+ if ((ssize_t)len < 0) \
return len; \
strim(_value); \
return len; \
@@ -280,58 +280,58 @@ static struct kobj_attribute sys_##_prefix##_##_name##_attr = \
sys_##_prefix##_##_name##_show, \
sys_##_prefix##_##_name##_store)
-#define IPL_ATTR_SCP_DATA_SHOW_FN(_prefix, _ipl_block) \
-static ssize_t sys_##_prefix##_scp_data_show(struct file *filp, \
- struct kobject *kobj, \
- struct bin_attribute *attr, \
- char *buf, loff_t off, \
- size_t count) \
-{ \
- size_t size = _ipl_block.scp_data_len; \
- void *scp_data = _ipl_block.scp_data; \
- \
- return memory_read_from_buffer(buf, count, &off, \
- scp_data, size); \
+#define IPL_ATTR_SCP_DATA_SHOW_FN(_prefix, _ipl_block) \
+static ssize_t sys_##_prefix##_scp_data_show(struct file *filp, \
+ struct kobject *kobj, \
+ const struct bin_attribute *attr, \
+ char *buf, loff_t off, \
+ size_t count) \
+{ \
+ size_t size = _ipl_block.scp_data_len; \
+ void *scp_data = _ipl_block.scp_data; \
+ \
+ return memory_read_from_buffer(buf, count, &off, \
+ scp_data, size); \
}
#define IPL_ATTR_SCP_DATA_STORE_FN(_prefix, _ipl_block_hdr, _ipl_block, _ipl_bp_len, _ipl_bp0_len)\
-static ssize_t sys_##_prefix##_scp_data_store(struct file *filp, \
- struct kobject *kobj, \
- struct bin_attribute *attr, \
- char *buf, loff_t off, \
- size_t count) \
-{ \
- size_t scpdata_len = count; \
- size_t padding; \
- \
- if (off) \
- return -EINVAL; \
- \
- memcpy(_ipl_block.scp_data, buf, count); \
- if (scpdata_len % 8) { \
- padding = 8 - (scpdata_len % 8); \
- memset(_ipl_block.scp_data + scpdata_len, \
- 0, padding); \
- scpdata_len += padding; \
- } \
- \
- _ipl_block_hdr.len = _ipl_bp_len + scpdata_len; \
- _ipl_block.len = _ipl_bp0_len + scpdata_len; \
- _ipl_block.scp_data_len = scpdata_len; \
- \
- return count; \
+static ssize_t sys_##_prefix##_scp_data_store(struct file *filp, \
+ struct kobject *kobj, \
+ const struct bin_attribute *attr, \
+ char *buf, loff_t off, \
+ size_t count) \
+{ \
+ size_t scpdata_len = count; \
+ size_t padding; \
+ \
+ if (off) \
+ return -EINVAL; \
+ \
+ memcpy(_ipl_block.scp_data, buf, count); \
+ if (scpdata_len % 8) { \
+ padding = 8 - (scpdata_len % 8); \
+ memset(_ipl_block.scp_data + scpdata_len, \
+ 0, padding); \
+ scpdata_len += padding; \
+ } \
+ \
+ _ipl_block_hdr.len = _ipl_bp_len + scpdata_len; \
+ _ipl_block.len = _ipl_bp0_len + scpdata_len; \
+ _ipl_block.scp_data_len = scpdata_len; \
+ \
+ return count; \
}
#define DEFINE_IPL_ATTR_SCP_DATA_RO(_prefix, _ipl_block, _size) \
IPL_ATTR_SCP_DATA_SHOW_FN(_prefix, _ipl_block) \
-static struct bin_attribute sys_##_prefix##_scp_data_attr = \
+static const struct bin_attribute sys_##_prefix##_scp_data_attr = \
__BIN_ATTR(scp_data, 0444, sys_##_prefix##_scp_data_show, \
NULL, _size)
#define DEFINE_IPL_ATTR_SCP_DATA_RW(_prefix, _ipl_block_hdr, _ipl_block, _ipl_bp_len, _ipl_bp0_len, _size)\
IPL_ATTR_SCP_DATA_SHOW_FN(_prefix, _ipl_block) \
IPL_ATTR_SCP_DATA_STORE_FN(_prefix, _ipl_block_hdr, _ipl_block, _ipl_bp_len, _ipl_bp0_len)\
-static struct bin_attribute sys_##_prefix##_scp_data_attr = \
+static const struct bin_attribute sys_##_prefix##_scp_data_attr = \
__BIN_ATTR(scp_data, 0644, sys_##_prefix##_scp_data_show, \
sys_##_prefix##_scp_data_store, _size)
@@ -434,19 +434,19 @@ static struct kobj_attribute sys_ipl_device_attr =
__ATTR(device, 0444, sys_ipl_device_show, NULL);
static ssize_t sys_ipl_parameter_read(struct file *filp, struct kobject *kobj,
- struct bin_attribute *attr, char *buf,
+ const struct bin_attribute *attr, char *buf,
loff_t off, size_t count)
{
return memory_read_from_buffer(buf, count, &off, &ipl_block,
ipl_block.hdr.len);
}
-static struct bin_attribute sys_ipl_parameter_attr =
+static const struct bin_attribute sys_ipl_parameter_attr =
__BIN_ATTR(binary_parameter, 0444, sys_ipl_parameter_read, NULL,
PAGE_SIZE);
DEFINE_IPL_ATTR_SCP_DATA_RO(ipl_fcp, ipl_block.fcp, PAGE_SIZE);
-static struct bin_attribute *ipl_fcp_bin_attrs[] = {
+static const struct bin_attribute *const ipl_fcp_bin_attrs[] = {
&sys_ipl_parameter_attr,
&sys_ipl_fcp_scp_data_attr,
NULL,
@@ -454,7 +454,7 @@ static struct bin_attribute *ipl_fcp_bin_attrs[] = {
DEFINE_IPL_ATTR_SCP_DATA_RO(ipl_nvme, ipl_block.nvme, PAGE_SIZE);
-static struct bin_attribute *ipl_nvme_bin_attrs[] = {
+static const struct bin_attribute *const ipl_nvme_bin_attrs[] = {
&sys_ipl_parameter_attr,
&sys_ipl_nvme_scp_data_attr,
NULL,
@@ -462,7 +462,7 @@ static struct bin_attribute *ipl_nvme_bin_attrs[] = {
DEFINE_IPL_ATTR_SCP_DATA_RO(ipl_eckd, ipl_block.eckd, PAGE_SIZE);
-static struct bin_attribute *ipl_eckd_bin_attrs[] = {
+static const struct bin_attribute *const ipl_eckd_bin_attrs[] = {
&sys_ipl_parameter_attr,
&sys_ipl_eckd_scp_data_attr,
NULL,
@@ -593,9 +593,9 @@ static struct attribute *ipl_fcp_attrs[] = {
NULL,
};
-static struct attribute_group ipl_fcp_attr_group = {
+static const struct attribute_group ipl_fcp_attr_group = {
.attrs = ipl_fcp_attrs,
- .bin_attrs = ipl_fcp_bin_attrs,
+ .bin_attrs_new = ipl_fcp_bin_attrs,
};
static struct attribute *ipl_nvme_attrs[] = {
@@ -607,9 +607,9 @@ static struct attribute *ipl_nvme_attrs[] = {
NULL,
};
-static struct attribute_group ipl_nvme_attr_group = {
+static const struct attribute_group ipl_nvme_attr_group = {
.attrs = ipl_nvme_attrs,
- .bin_attrs = ipl_nvme_bin_attrs,
+ .bin_attrs_new = ipl_nvme_bin_attrs,
};
static struct attribute *ipl_eckd_attrs[] = {
@@ -620,9 +620,9 @@ static struct attribute *ipl_eckd_attrs[] = {
NULL,
};
-static struct attribute_group ipl_eckd_attr_group = {
+static const struct attribute_group ipl_eckd_attr_group = {
.attrs = ipl_eckd_attrs,
- .bin_attrs = ipl_eckd_bin_attrs,
+ .bin_attrs_new = ipl_eckd_bin_attrs,
};
/* CCW ipl device attributes */
@@ -640,11 +640,11 @@ static struct attribute *ipl_ccw_attrs_lpar[] = {
NULL,
};
-static struct attribute_group ipl_ccw_attr_group_vm = {
+static const struct attribute_group ipl_ccw_attr_group_vm = {
.attrs = ipl_ccw_attrs_vm,
};
-static struct attribute_group ipl_ccw_attr_group_lpar = {
+static const struct attribute_group ipl_ccw_attr_group_lpar = {
.attrs = ipl_ccw_attrs_lpar
};
@@ -655,7 +655,7 @@ static struct attribute *ipl_common_attrs[] = {
NULL,
};
-static struct attribute_group ipl_common_attr_group = {
+static const struct attribute_group ipl_common_attr_group = {
.attrs = ipl_common_attrs,
};
@@ -808,7 +808,7 @@ DEFINE_IPL_ATTR_SCP_DATA_RW(reipl_fcp, reipl_block_fcp->hdr,
IPL_BP_FCP_LEN, IPL_BP0_FCP_LEN,
DIAG308_SCPDATA_SIZE);
-static struct bin_attribute *reipl_fcp_bin_attrs[] = {
+static const struct bin_attribute *const reipl_fcp_bin_attrs[] = {
&sys_reipl_fcp_scp_data_attr,
NULL,
};
@@ -917,9 +917,9 @@ static struct attribute *reipl_fcp_attrs[] = {
NULL,
};
-static struct attribute_group reipl_fcp_attr_group = {
+static const struct attribute_group reipl_fcp_attr_group = {
.attrs = reipl_fcp_attrs,
- .bin_attrs = reipl_fcp_bin_attrs,
+ .bin_attrs_new = reipl_fcp_bin_attrs,
};
static struct kobj_attribute sys_reipl_fcp_clear_attr =
@@ -932,7 +932,7 @@ DEFINE_IPL_ATTR_SCP_DATA_RW(reipl_nvme, reipl_block_nvme->hdr,
IPL_BP_NVME_LEN, IPL_BP0_NVME_LEN,
DIAG308_SCPDATA_SIZE);
-static struct bin_attribute *reipl_nvme_bin_attrs[] = {
+static const struct bin_attribute *const reipl_nvme_bin_attrs[] = {
&sys_reipl_nvme_scp_data_attr,
NULL,
};
@@ -955,9 +955,9 @@ static struct attribute *reipl_nvme_attrs[] = {
NULL,
};
-static struct attribute_group reipl_nvme_attr_group = {
+static const struct attribute_group reipl_nvme_attr_group = {
.attrs = reipl_nvme_attrs,
- .bin_attrs = reipl_nvme_bin_attrs
+ .bin_attrs_new = reipl_nvme_bin_attrs
};
static ssize_t reipl_nvme_clear_show(struct kobject *kobj,
@@ -1031,7 +1031,7 @@ DEFINE_IPL_ATTR_SCP_DATA_RW(reipl_eckd, reipl_block_eckd->hdr,
IPL_BP_ECKD_LEN, IPL_BP0_ECKD_LEN,
DIAG308_SCPDATA_SIZE);
-static struct bin_attribute *reipl_eckd_bin_attrs[] = {
+static const struct bin_attribute *const reipl_eckd_bin_attrs[] = {
&sys_reipl_eckd_scp_data_attr,
NULL,
};
@@ -1048,9 +1048,9 @@ static struct attribute *reipl_eckd_attrs[] = {
NULL,
};
-static struct attribute_group reipl_eckd_attr_group = {
+static const struct attribute_group reipl_eckd_attr_group = {
.attrs = reipl_eckd_attrs,
- .bin_attrs = reipl_eckd_bin_attrs
+ .bin_attrs_new = reipl_eckd_bin_attrs
};
static ssize_t reipl_eckd_clear_show(struct kobject *kobj,
@@ -1587,15 +1587,15 @@ static struct attribute *dump_fcp_attrs[] = {
NULL,
};
-static struct bin_attribute *dump_fcp_bin_attrs[] = {
+static const struct bin_attribute *const dump_fcp_bin_attrs[] = {
&sys_dump_fcp_scp_data_attr,
NULL,
};
-static struct attribute_group dump_fcp_attr_group = {
+static const struct attribute_group dump_fcp_attr_group = {
.name = IPL_FCP_STR,
.attrs = dump_fcp_attrs,
- .bin_attrs = dump_fcp_bin_attrs,
+ .bin_attrs_new = dump_fcp_bin_attrs,
};
/* NVME dump device attributes */
@@ -1621,15 +1621,15 @@ static struct attribute *dump_nvme_attrs[] = {
NULL,
};
-static struct bin_attribute *dump_nvme_bin_attrs[] = {
+static const struct bin_attribute *const dump_nvme_bin_attrs[] = {
&sys_dump_nvme_scp_data_attr,
NULL,
};
-static struct attribute_group dump_nvme_attr_group = {
+static const struct attribute_group dump_nvme_attr_group = {
.name = IPL_NVME_STR,
.attrs = dump_nvme_attrs,
- .bin_attrs = dump_nvme_bin_attrs,
+ .bin_attrs_new = dump_nvme_bin_attrs,
};
/* ECKD dump device attributes */
@@ -1655,15 +1655,15 @@ static struct attribute *dump_eckd_attrs[] = {
NULL,
};
-static struct bin_attribute *dump_eckd_bin_attrs[] = {
+static const struct bin_attribute *const dump_eckd_bin_attrs[] = {
&sys_dump_eckd_scp_data_attr,
NULL,
};
-static struct attribute_group dump_eckd_attr_group = {
+static const struct attribute_group dump_eckd_attr_group = {
.name = IPL_ECKD_STR,
.attrs = dump_eckd_attrs,
- .bin_attrs = dump_eckd_bin_attrs,
+ .bin_attrs_new = dump_eckd_bin_attrs,
};
/* CCW dump device attributes */
diff --git a/arch/s390/kernel/lgr.c b/arch/s390/kernel/lgr.c
index 6652e54cf3db..6d1ffca5f798 100644
--- a/arch/s390/kernel/lgr.c
+++ b/arch/s390/kernel/lgr.c
@@ -166,7 +166,7 @@ static struct timer_list lgr_timer;
*/
static void lgr_timer_set(void)
{
- mod_timer(&lgr_timer, jiffies + msecs_to_jiffies(LGR_TIMER_INTERVAL_SECS * MSEC_PER_SEC));
+ mod_timer(&lgr_timer, jiffies + secs_to_jiffies(LGR_TIMER_INTERVAL_SECS));
}
/*
diff --git a/arch/s390/kernel/mcount.S b/arch/s390/kernel/mcount.S
index 7e267ef63a7f..1fec370fecf4 100644
--- a/arch/s390/kernel/mcount.S
+++ b/arch/s390/kernel/mcount.S
@@ -104,17 +104,6 @@ SYM_CODE_START(ftrace_common)
lgr %r3,%r14
la %r5,STACK_FREGS(%r15)
BASR_EX %r14,%r1
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
-# The j instruction gets runtime patched to a nop instruction.
-# See ftrace_enable_ftrace_graph_caller.
-SYM_INNER_LABEL(ftrace_graph_caller, SYM_L_GLOBAL)
- j .Lftrace_graph_caller_end
- lmg %r2,%r3,(STACK_FREGS_PTREGS_GPRS+14*8)(%r15)
- lg %r4,(STACK_FREGS_PTREGS_PSW+8)(%r15)
- brasl %r14,prepare_ftrace_return
- stg %r2,(STACK_FREGS_PTREGS_GPRS+14*8)(%r15)
-.Lftrace_graph_caller_end:
-#endif
lg %r0,(STACK_FREGS_PTREGS_PSW+8)(%r15)
#ifdef MARCH_HAS_Z196_FEATURES
ltg %r1,STACK_FREGS_PTREGS_ORIG_GPR2(%r15)
@@ -134,14 +123,14 @@ SYM_CODE_END(ftrace_common)
SYM_FUNC_START(return_to_handler)
stmg %r2,%r5,32(%r15)
lgr %r1,%r15
- aghi %r15,-(STACK_FRAME_OVERHEAD+__FGRAPH_RET_SIZE)
+ # allocate ftrace_regs and stack frame for ftrace_return_to_handler
+ aghi %r15,-STACK_FRAME_SIZE_FREGS
stg %r1,__SF_BACKCHAIN(%r15)
- la %r3,STACK_FRAME_OVERHEAD(%r15)
- stg %r1,__FGRAPH_RET_FP(%r3)
- stg %r2,__FGRAPH_RET_GPR2(%r3)
- lgr %r2,%r3
+ stg %r2,(STACK_FREGS_PTREGS_GPRS+2*8)(%r15)
+ stg %r1,(STACK_FREGS_PTREGS_GPRS+15*8)(%r15)
+ la %r2,STACK_FRAME_OVERHEAD(%r15)
brasl %r14,ftrace_return_to_handler
- aghi %r15,STACK_FRAME_OVERHEAD+__FGRAPH_RET_SIZE
+ aghi %r15,STACK_FRAME_SIZE_FREGS
lgr %r14,%r2
lmg %r2,%r5,32(%r15)
BR_EX %r14
diff --git a/arch/s390/kernel/numa.c b/arch/s390/kernel/numa.c
index ddc1448ea2e1..2fc40f97c0ad 100644
--- a/arch/s390/kernel/numa.c
+++ b/arch/s390/kernel/numa.c
@@ -21,12 +21,8 @@ void __init numa_setup(void)
nodes_clear(node_possible_map);
node_set(0, node_possible_map);
node_set_online(0);
- for (nid = 0; nid < MAX_NUMNODES; nid++) {
- NODE_DATA(nid) = memblock_alloc(sizeof(pg_data_t), 8);
- if (!NODE_DATA(nid))
- panic("%s: Failed to allocate %zu bytes align=0x%x\n",
- __func__, sizeof(pg_data_t), 8);
- }
+ for (nid = 0; nid < MAX_NUMNODES; nid++)
+ NODE_DATA(nid) = memblock_alloc_or_panic(sizeof(pg_data_t), 8);
NODE_DATA(0)->node_spanned_pages = memblock_end_of_DRAM() >> PAGE_SHIFT;
NODE_DATA(0)->node_id = 0;
}
diff --git a/arch/s390/kernel/os_info.c b/arch/s390/kernel/os_info.c
index 29080d6d5d8d..c2a468986212 100644
--- a/arch/s390/kernel/os_info.c
+++ b/arch/s390/kernel/os_info.c
@@ -18,6 +18,7 @@
#include <asm/physmem_info.h>
#include <asm/maccess.h>
#include <asm/asm-offsets.h>
+#include <asm/sections.h>
#include <asm/ipl.h>
/*
diff --git a/arch/s390/kernel/perf_cpum_cf.c b/arch/s390/kernel/perf_cpum_cf.c
index b0bc68da6a11..33205dd410e4 100644
--- a/arch/s390/kernel/perf_cpum_cf.c
+++ b/arch/s390/kernel/perf_cpum_cf.c
@@ -981,7 +981,7 @@ static int cfdiag_push_sample(struct perf_event *event,
if (event->attr.sample_type & PERF_SAMPLE_RAW) {
raw.frag.size = cpuhw->usedss;
raw.frag.data = cpuhw->stop;
- perf_sample_save_raw_data(&data, &raw);
+ perf_sample_save_raw_data(&data, event, &raw);
}
overflow = perf_event_overflow(event, &data, &regs);
diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c
index 1e99514fb7ae..5f60248cb468 100644
--- a/arch/s390/kernel/perf_cpum_sf.c
+++ b/arch/s390/kernel/perf_cpum_sf.c
@@ -981,7 +981,7 @@ static void cpumsf_pmu_disable(struct pmu *pmu)
cpuhw->flags &= ~PMU_F_ENABLED;
}
-/* perf_exclude_event() - Filter event
+/* perf_event_exclude() - Filter event
* @event: The perf event
* @regs: pt_regs structure
* @sde_regs: Sample-data-entry (sde) regs structure
@@ -990,7 +990,7 @@ static void cpumsf_pmu_disable(struct pmu *pmu)
*
* Return non-zero if the event shall be excluded.
*/
-static int perf_exclude_event(struct perf_event *event, struct pt_regs *regs,
+static int perf_event_exclude(struct perf_event *event, struct pt_regs *regs,
struct perf_sf_sde_regs *sde_regs)
{
if (event->attr.exclude_user && user_mode(regs))
@@ -1073,7 +1073,7 @@ static int perf_push_sample(struct perf_event *event,
data.tid_entry.pid = basic->hpp & LPP_PID_MASK;
overflow = 0;
- if (perf_exclude_event(event, &regs, sde_regs))
+ if (perf_event_exclude(event, &regs, sde_regs))
goto out;
if (perf_event_overflow(event, &data, &regs)) {
overflow = 1;
diff --git a/arch/s390/kernel/perf_pai_crypto.c b/arch/s390/kernel/perf_pai_crypto.c
index fa7325454266..10725f5a6f0f 100644
--- a/arch/s390/kernel/perf_pai_crypto.c
+++ b/arch/s390/kernel/perf_pai_crypto.c
@@ -478,7 +478,7 @@ static int paicrypt_push_sample(size_t rawsize, struct paicrypt_map *cpump,
if (event->attr.sample_type & PERF_SAMPLE_RAW) {
raw.frag.size = rawsize;
raw.frag.data = cpump->save;
- perf_sample_save_raw_data(&data, &raw);
+ perf_sample_save_raw_data(&data, event, &raw);
}
overflow = perf_event_overflow(event, &data, &regs);
diff --git a/arch/s390/kernel/perf_pai_ext.c b/arch/s390/kernel/perf_pai_ext.c
index 7f462bef1fc0..a8f0bad99cf0 100644
--- a/arch/s390/kernel/perf_pai_ext.c
+++ b/arch/s390/kernel/perf_pai_ext.c
@@ -503,7 +503,7 @@ static int paiext_push_sample(size_t rawsize, struct paiext_map *cpump,
if (event->attr.sample_type & PERF_SAMPLE_RAW) {
raw.frag.size = rawsize;
raw.frag.data = cpump->save;
- perf_sample_save_raw_data(&data, &raw);
+ perf_sample_save_raw_data(&data, event, &raw);
}
overflow = perf_event_overflow(event, &data, &regs);
diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
index a3fea683b227..d78bcfe707b5 100644
--- a/arch/s390/kernel/setup.c
+++ b/arch/s390/kernel/setup.c
@@ -157,18 +157,24 @@ u64 __bootdata_preserved(stfle_fac_list[16]);
EXPORT_SYMBOL(stfle_fac_list);
struct oldmem_data __bootdata_preserved(oldmem_data);
-unsigned long VMALLOC_START;
+char __bootdata(boot_rb)[PAGE_SIZE * 2];
+bool __bootdata(boot_earlyprintk);
+size_t __bootdata(boot_rb_off);
+char __bootdata(bootdebug_filter)[128];
+bool __bootdata(bootdebug);
+
+unsigned long __bootdata_preserved(VMALLOC_START);
EXPORT_SYMBOL(VMALLOC_START);
-unsigned long VMALLOC_END;
+unsigned long __bootdata_preserved(VMALLOC_END);
EXPORT_SYMBOL(VMALLOC_END);
-struct page *vmemmap;
+struct page *__bootdata_preserved(vmemmap);
EXPORT_SYMBOL(vmemmap);
-unsigned long vmemmap_size;
+unsigned long __bootdata_preserved(vmemmap_size);
-unsigned long MODULES_VADDR;
-unsigned long MODULES_END;
+unsigned long __bootdata_preserved(MODULES_VADDR);
+unsigned long __bootdata_preserved(MODULES_END);
/* An array with a pointer to the lowcore of every CPU. */
struct lowcore *lowcore_ptr[NR_CPUS];
@@ -359,36 +365,24 @@ void *restart_stack;
unsigned long stack_alloc(void)
{
-#ifdef CONFIG_VMAP_STACK
- void *ret;
+ void *stack;
- ret = __vmalloc_node(THREAD_SIZE, THREAD_SIZE, THREADINFO_GFP,
- NUMA_NO_NODE, __builtin_return_address(0));
- kmemleak_not_leak(ret);
- return (unsigned long)ret;
-#else
- return __get_free_pages(GFP_KERNEL, THREAD_SIZE_ORDER);
-#endif
+ stack = __vmalloc_node(THREAD_SIZE, THREAD_SIZE, THREADINFO_GFP,
+ NUMA_NO_NODE, __builtin_return_address(0));
+ kmemleak_not_leak(stack);
+ return (unsigned long)stack;
}
void stack_free(unsigned long stack)
{
-#ifdef CONFIG_VMAP_STACK
- vfree((void *) stack);
-#else
- free_pages(stack, THREAD_SIZE_ORDER);
-#endif
+ vfree((void *)stack);
}
static unsigned long __init stack_alloc_early(void)
{
unsigned long stack;
- stack = (unsigned long)memblock_alloc(THREAD_SIZE, THREAD_SIZE);
- if (!stack) {
- panic("%s: Failed to allocate %lu bytes align=0x%lx\n",
- __func__, THREAD_SIZE, THREAD_SIZE);
- }
+ stack = (unsigned long)memblock_alloc_or_panic(THREAD_SIZE, THREAD_SIZE);
return stack;
}
@@ -512,10 +506,7 @@ static void __init setup_resources(void)
bss_resource.end = __pa_symbol(__bss_stop) - 1;
for_each_mem_range(i, &start, &end) {
- res = memblock_alloc(sizeof(*res), 8);
- if (!res)
- panic("%s: Failed to allocate %zu bytes align=0x%x\n",
- __func__, sizeof(*res), 8);
+ res = memblock_alloc_or_panic(sizeof(*res), 8);
res->flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM;
res->name = "System RAM";
@@ -534,10 +525,7 @@ static void __init setup_resources(void)
std_res->start > res->end)
continue;
if (std_res->end > res->end) {
- sub_res = memblock_alloc(sizeof(*sub_res), 8);
- if (!sub_res)
- panic("%s: Failed to allocate %zu bytes align=0x%x\n",
- __func__, sizeof(*sub_res), 8);
+ sub_res = memblock_alloc_or_panic(sizeof(*sub_res), 8);
*sub_res = *std_res;
sub_res->end = res->end;
std_res->start = res->end + 1;
@@ -704,7 +692,7 @@ static void __init reserve_physmem_info(void)
{
unsigned long addr, size;
- if (get_physmem_reserved(RR_MEM_DETECT_EXTENDED, &addr, &size))
+ if (get_physmem_reserved(RR_MEM_DETECT_EXT, &addr, &size))
memblock_reserve(addr, size);
}
@@ -712,7 +700,7 @@ static void __init free_physmem_info(void)
{
unsigned long addr, size;
- if (get_physmem_reserved(RR_MEM_DETECT_EXTENDED, &addr, &size))
+ if (get_physmem_reserved(RR_MEM_DETECT_EXT, &addr, &size))
memblock_phys_free(addr, size);
}
@@ -742,7 +730,7 @@ static void __init reserve_lowcore(void)
void *lowcore_end = lowcore_start + sizeof(struct lowcore);
void *start, *end;
- if ((void *)__identity_base < lowcore_end) {
+ if (absolute_pointer(__identity_base) < lowcore_end) {
start = max(lowcore_start, (void *)__identity_base);
end = min(lowcore_end, (void *)(__identity_base + ident_map_size));
memblock_reserve(__pa(start), __pa(end));
@@ -824,9 +812,7 @@ static void __init setup_randomness(void)
{
struct sysinfo_3_2_2 *vmms;
- vmms = memblock_alloc(PAGE_SIZE, PAGE_SIZE);
- if (!vmms)
- panic("Failed to allocate memory for sysinfo structure\n");
+ vmms = memblock_alloc_or_panic(PAGE_SIZE, PAGE_SIZE);
if (stsi(vmms, 3, 2, 2) == 0 && vmms->count)
add_device_randomness(&vmms->vm, sizeof(vmms->vm[0]) * vmms->count);
memblock_free(vmms, PAGE_SIZE);
@@ -886,6 +872,23 @@ static void __init log_component_list(void)
}
/*
+ * Print avoiding interpretation of % in buf and taking bootdebug option
+ * into consideration.
+ */
+static void __init print_rb_entry(const char *buf)
+{
+ char fmt[] = KERN_SOH "0boot: %s";
+ int level = printk_get_level(buf);
+
+ buf = skip_timestamp(printk_skip_level(buf));
+ if (level == KERN_DEBUG[1] && (!bootdebug || !bootdebug_filter_match(buf)))
+ return;
+
+ fmt[1] = level;
+ printk(fmt, buf);
+}
+
+/*
* Setup function called from init/main.c just after the banner
* was printed.
*/
@@ -904,6 +907,9 @@ void __init setup_arch(char **cmdline_p)
pr_info("Linux is running natively in 64-bit mode\n");
else
pr_info("Linux is running as a guest in 64-bit mode\n");
+ /* Print decompressor messages if not already printed */
+ if (!boot_earlyprintk)
+ boot_rb_foreach(print_rb_entry);
if (have_relocated_lowcore())
pr_info("Lowcore relocated to 0x%px\n", get_lowcore());
@@ -979,6 +985,7 @@ void __init setup_arch(char **cmdline_p)
if (test_facility(193))
static_branch_enable(&cpu_has_bear);
+ setup_protection_map();
/*
* Create kernel page tables.
*/
@@ -1006,3 +1013,8 @@ void __init setup_arch(char **cmdline_p)
/* Add system specific data to the random pool */
setup_randomness();
}
+
+void __init arch_cpu_finalize_init(void)
+{
+ sclp_init();
+}
diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c
index 822d8e6f8717..7b08399b0846 100644
--- a/arch/s390/kernel/smp.c
+++ b/arch/s390/kernel/smp.c
@@ -611,9 +611,7 @@ void __init smp_save_dump_ipl_cpu(void)
if (!dump_available())
return;
sa = save_area_alloc(true);
- regs = memblock_alloc(512, 8);
- if (!sa || !regs)
- panic("could not allocate memory for boot CPU save area\n");
+ regs = memblock_alloc_or_panic(512, 8);
copy_oldmem_kernel(regs, __LC_FPREGS_SAVE_AREA, 512);
save_area_add_regs(sa, regs);
memblock_free(regs, 512);
@@ -646,8 +644,6 @@ void __init smp_save_dump_secondary_cpus(void)
SIGP_CC_NOT_OPERATIONAL)
continue;
sa = save_area_alloc(false);
- if (!sa)
- panic("could not allocate memory for save area\n");
__pcpu_sigp_relax(addr, SIGP_STORE_STATUS_AT_ADDRESS, __pa(page));
save_area_add_regs(sa, page);
if (cpu_has_vx()) {
@@ -792,10 +788,7 @@ void __init smp_detect_cpus(void)
u16 address;
/* Get CPU information */
- info = memblock_alloc(sizeof(*info), 8);
- if (!info)
- panic("%s: Failed to allocate %zu bytes align=0x%x\n",
- __func__, sizeof(*info), 8);
+ info = memblock_alloc_or_panic(sizeof(*info), 8);
smp_get_core_info(info, 1);
/* Find boot CPU type */
if (sclp.has_core_type) {
diff --git a/arch/s390/kernel/text_amode31.S b/arch/s390/kernel/text_amode31.S
index c0a70efa2426..26f2981aa09e 100644
--- a/arch/s390/kernel/text_amode31.S
+++ b/arch/s390/kernel/text_amode31.S
@@ -18,8 +18,7 @@
* affects a few functions that are not performance-relevant.
*/
.macro BR_EX_AMODE31_r14
- larl %r1,0f
- ex 0,0(%r1)
+ exrl 0,0f
j .
0: br %r14
.endm
diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c
index 34a65c141ea0..e9f47c3a6197 100644
--- a/arch/s390/kernel/time.c
+++ b/arch/s390/kernel/time.c
@@ -662,12 +662,12 @@ static void stp_check_leap(void)
if (ret < 0)
pr_err("failed to set leap second flags\n");
/* arm Timer to clear leap second flags */
- mod_timer(&stp_timer, jiffies + msecs_to_jiffies(14400 * MSEC_PER_SEC));
+ mod_timer(&stp_timer, jiffies + secs_to_jiffies(14400));
} else {
/* The day the leap second is scheduled for hasn't been reached. Retry
* in one hour.
*/
- mod_timer(&stp_timer, jiffies + msecs_to_jiffies(3600 * MSEC_PER_SEC));
+ mod_timer(&stp_timer, jiffies + secs_to_jiffies(3600));
}
}
diff --git a/arch/s390/kernel/topology.c b/arch/s390/kernel/topology.c
index 4f9c301a705b..211cc8382e4a 100644
--- a/arch/s390/kernel/topology.c
+++ b/arch/s390/kernel/topology.c
@@ -371,7 +371,7 @@ static void set_topology_timer(void)
if (atomic_add_unless(&topology_poll, -1, 0))
mod_timer(&topology_timer, jiffies + msecs_to_jiffies(100));
else
- mod_timer(&topology_timer, jiffies + msecs_to_jiffies(60 * MSEC_PER_SEC));
+ mod_timer(&topology_timer, jiffies + secs_to_jiffies(60));
}
void topology_expect_change(void)
@@ -548,14 +548,21 @@ static void __init alloc_masks(struct sysinfo_15_1_x *info,
nr_masks *= info->mag[TOPOLOGY_NR_MAG - offset - 1 - i];
nr_masks = max(nr_masks, 1);
for (i = 0; i < nr_masks; i++) {
- mask->next = memblock_alloc(sizeof(*mask->next), 8);
- if (!mask->next)
- panic("%s: Failed to allocate %zu bytes align=0x%x\n",
- __func__, sizeof(*mask->next), 8);
+ mask->next = memblock_alloc_or_panic(sizeof(*mask->next), 8);
mask = mask->next;
}
}
+static int __init detect_polarization(union topology_entry *tle)
+{
+ struct topology_core *tl_core;
+
+ while (tle->nl)
+ tle = next_tle(tle);
+ tl_core = (struct topology_core *)tle;
+ return tl_core->pp != POLARIZATION_HRZ;
+}
+
void __init topology_init_early(void)
{
struct sysinfo_15_1_x *info;
@@ -569,12 +576,10 @@ void __init topology_init_early(void)
}
if (!MACHINE_HAS_TOPOLOGY)
goto out;
- tl_info = memblock_alloc(PAGE_SIZE, PAGE_SIZE);
- if (!tl_info)
- panic("%s: Failed to allocate %lu bytes align=0x%lx\n",
- __func__, PAGE_SIZE, PAGE_SIZE);
+ tl_info = memblock_alloc_or_panic(PAGE_SIZE, PAGE_SIZE);
info = tl_info;
store_topology(info);
+ cpu_management = detect_polarization(info->tle);
pr_info("The CPU configuration topology of the machine is: %d %d %d %d %d %d / %d\n",
info->mag[0], info->mag[1], info->mag[2], info->mag[3],
info->mag[4], info->mag[5], info->mnest);
@@ -662,7 +667,7 @@ static int polarization_ctl_handler(const struct ctl_table *ctl, int write,
return set_polarization(polarization);
}
-static struct ctl_table topology_ctl_table[] = {
+static const struct ctl_table topology_ctl_table[] = {
{
.procname = "topology",
.mode = 0644,
diff --git a/arch/s390/kernel/uv.c b/arch/s390/kernel/uv.c
index 6f9654a191ad..9f05df2da2f7 100644
--- a/arch/s390/kernel/uv.c
+++ b/arch/s390/kernel/uv.c
@@ -19,19 +19,6 @@
#include <asm/sections.h>
#include <asm/uv.h>
-#if !IS_ENABLED(CONFIG_KVM)
-unsigned long __gmap_translate(struct gmap *gmap, unsigned long gaddr)
-{
- return 0;
-}
-
-int gmap_fault(struct gmap *gmap, unsigned long gaddr,
- unsigned int fault_flags)
-{
- return 0;
-}
-#endif
-
/* the bootdata_preserved fields come from ones in arch/s390/boot/uv.c */
int __bootdata_preserved(prot_virt_guest);
EXPORT_SYMBOL(prot_virt_guest);
@@ -159,6 +146,7 @@ int uv_destroy_folio(struct folio *folio)
folio_put(folio);
return rc;
}
+EXPORT_SYMBOL(uv_destroy_folio);
/*
* The present PTE still indirectly holds a folio reference through the mapping.
@@ -175,7 +163,7 @@ int uv_destroy_pte(pte_t pte)
*
* @paddr: Absolute host address of page to be exported
*/
-static int uv_convert_from_secure(unsigned long paddr)
+int uv_convert_from_secure(unsigned long paddr)
{
struct uv_cb_cfs uvcb = {
.header.cmd = UVC_CMD_CONV_FROM_SEC_STOR,
@@ -187,11 +175,12 @@ static int uv_convert_from_secure(unsigned long paddr)
return -EINVAL;
return 0;
}
+EXPORT_SYMBOL_GPL(uv_convert_from_secure);
/*
* The caller must already hold a reference to the folio.
*/
-static int uv_convert_from_secure_folio(struct folio *folio)
+int uv_convert_from_secure_folio(struct folio *folio)
{
int rc;
@@ -206,6 +195,7 @@ static int uv_convert_from_secure_folio(struct folio *folio)
folio_put(folio);
return rc;
}
+EXPORT_SYMBOL_GPL(uv_convert_from_secure_folio);
/*
* The present PTE still indirectly holds a folio reference through the mapping.
@@ -237,13 +227,33 @@ static int expected_folio_refs(struct folio *folio)
return res;
}
-static int make_folio_secure(struct folio *folio, struct uv_cb_header *uvcb)
+/**
+ * make_folio_secure() - make a folio secure
+ * @folio: the folio to make secure
+ * @uvcb: the uvcb that describes the UVC to be used
+ *
+ * The folio @folio will be made secure if possible, @uvcb will be passed
+ * as-is to the UVC.
+ *
+ * Return: 0 on success;
+ * -EBUSY if the folio is in writeback or has too many references;
+ * -E2BIG if the folio is large;
+ * -EAGAIN if the UVC needs to be attempted again;
+ * -ENXIO if the address is not mapped;
+ * -EINVAL if the UVC failed for other reasons.
+ *
+ * Context: The caller must hold exactly one extra reference on the folio
+ * (it's the same logic as split_folio())
+ */
+int make_folio_secure(struct folio *folio, struct uv_cb_header *uvcb)
{
int expected, cc = 0;
+ if (folio_test_large(folio))
+ return -E2BIG;
if (folio_test_writeback(folio))
- return -EAGAIN;
- expected = expected_folio_refs(folio);
+ return -EBUSY;
+ expected = expected_folio_refs(folio) + 1;
if (!folio_ref_freeze(folio, expected))
return -EBUSY;
set_bit(PG_arch_1, &folio->flags);
@@ -267,251 +277,7 @@ static int make_folio_secure(struct folio *folio, struct uv_cb_header *uvcb)
return -EAGAIN;
return uvcb->rc == 0x10a ? -ENXIO : -EINVAL;
}
-
-/**
- * should_export_before_import - Determine whether an export is needed
- * before an import-like operation
- * @uvcb: the Ultravisor control block of the UVC to be performed
- * @mm: the mm of the process
- *
- * Returns whether an export is needed before every import-like operation.
- * This is needed for shared pages, which don't trigger a secure storage
- * exception when accessed from a different guest.
- *
- * Although considered as one, the Unpin Page UVC is not an actual import,
- * so it is not affected.
- *
- * No export is needed also when there is only one protected VM, because the
- * page cannot belong to the wrong VM in that case (there is no "other VM"
- * it can belong to).
- *
- * Return: true if an export is needed before every import, otherwise false.
- */
-static bool should_export_before_import(struct uv_cb_header *uvcb, struct mm_struct *mm)
-{
- /*
- * The misc feature indicates, among other things, that importing a
- * shared page from a different protected VM will automatically also
- * transfer its ownership.
- */
- if (uv_has_feature(BIT_UV_FEAT_MISC))
- return false;
- if (uvcb->cmd == UVC_CMD_UNPIN_PAGE_SHARED)
- return false;
- return atomic_read(&mm->context.protected_count) > 1;
-}
-
-/*
- * Drain LRU caches: the local one on first invocation and the ones of all
- * CPUs on successive invocations. Returns "true" on the first invocation.
- */
-static bool drain_lru(bool *drain_lru_called)
-{
- /*
- * If we have tried a local drain and the folio refcount
- * still does not match our expected safe value, try with a
- * system wide drain. This is needed if the pagevecs holding
- * the page are on a different CPU.
- */
- if (*drain_lru_called) {
- lru_add_drain_all();
- /* We give up here, don't retry immediately. */
- return false;
- }
- /*
- * We are here if the folio refcount does not match the
- * expected safe value. The main culprits are usually
- * pagevecs. With lru_add_drain() we drain the pagevecs
- * on the local CPU so that hopefully the refcount will
- * reach the expected safe value.
- */
- lru_add_drain();
- *drain_lru_called = true;
- /* The caller should try again immediately */
- return true;
-}
-
-/*
- * Requests the Ultravisor to make a page accessible to a guest.
- * If it's brought in the first time, it will be cleared. If
- * it has been exported before, it will be decrypted and integrity
- * checked.
- */
-int gmap_make_secure(struct gmap *gmap, unsigned long gaddr, void *uvcb)
-{
- struct vm_area_struct *vma;
- bool drain_lru_called = false;
- spinlock_t *ptelock;
- unsigned long uaddr;
- struct folio *folio;
- pte_t *ptep;
- int rc;
-
-again:
- rc = -EFAULT;
- mmap_read_lock(gmap->mm);
-
- uaddr = __gmap_translate(gmap, gaddr);
- if (IS_ERR_VALUE(uaddr))
- goto out;
- vma = vma_lookup(gmap->mm, uaddr);
- if (!vma)
- goto out;
- /*
- * Secure pages cannot be huge and userspace should not combine both.
- * In case userspace does it anyway this will result in an -EFAULT for
- * the unpack. The guest is thus never reaching secure mode. If
- * userspace is playing dirty tricky with mapping huge pages later
- * on this will result in a segmentation fault.
- */
- if (is_vm_hugetlb_page(vma))
- goto out;
-
- rc = -ENXIO;
- ptep = get_locked_pte(gmap->mm, uaddr, &ptelock);
- if (!ptep)
- goto out;
- if (pte_present(*ptep) && !(pte_val(*ptep) & _PAGE_INVALID) && pte_write(*ptep)) {
- folio = page_folio(pte_page(*ptep));
- rc = -EAGAIN;
- if (folio_test_large(folio)) {
- rc = -E2BIG;
- } else if (folio_trylock(folio)) {
- if (should_export_before_import(uvcb, gmap->mm))
- uv_convert_from_secure(PFN_PHYS(folio_pfn(folio)));
- rc = make_folio_secure(folio, uvcb);
- folio_unlock(folio);
- }
-
- /*
- * Once we drop the PTL, the folio may get unmapped and
- * freed immediately. We need a temporary reference.
- */
- if (rc == -EAGAIN || rc == -E2BIG)
- folio_get(folio);
- }
- pte_unmap_unlock(ptep, ptelock);
-out:
- mmap_read_unlock(gmap->mm);
-
- switch (rc) {
- case -E2BIG:
- folio_lock(folio);
- rc = split_folio(folio);
- folio_unlock(folio);
- folio_put(folio);
-
- switch (rc) {
- case 0:
- /* Splitting succeeded, try again immediately. */
- goto again;
- case -EAGAIN:
- /* Additional folio references. */
- if (drain_lru(&drain_lru_called))
- goto again;
- return -EAGAIN;
- case -EBUSY:
- /* Unexpected race. */
- return -EAGAIN;
- }
- WARN_ON_ONCE(1);
- return -ENXIO;
- case -EAGAIN:
- /*
- * If we are here because the UVC returned busy or partial
- * completion, this is just a useless check, but it is safe.
- */
- folio_wait_writeback(folio);
- folio_put(folio);
- return -EAGAIN;
- case -EBUSY:
- /* Additional folio references. */
- if (drain_lru(&drain_lru_called))
- goto again;
- return -EAGAIN;
- case -ENXIO:
- if (gmap_fault(gmap, gaddr, FAULT_FLAG_WRITE))
- return -EFAULT;
- return -EAGAIN;
- }
- return rc;
-}
-EXPORT_SYMBOL_GPL(gmap_make_secure);
-
-int gmap_convert_to_secure(struct gmap *gmap, unsigned long gaddr)
-{
- struct uv_cb_cts uvcb = {
- .header.cmd = UVC_CMD_CONV_TO_SEC_STOR,
- .header.len = sizeof(uvcb),
- .guest_handle = gmap->guest_handle,
- .gaddr = gaddr,
- };
-
- return gmap_make_secure(gmap, gaddr, &uvcb);
-}
-EXPORT_SYMBOL_GPL(gmap_convert_to_secure);
-
-/**
- * gmap_destroy_page - Destroy a guest page.
- * @gmap: the gmap of the guest
- * @gaddr: the guest address to destroy
- *
- * An attempt will be made to destroy the given guest page. If the attempt
- * fails, an attempt is made to export the page. If both attempts fail, an
- * appropriate error is returned.
- */
-int gmap_destroy_page(struct gmap *gmap, unsigned long gaddr)
-{
- struct vm_area_struct *vma;
- struct folio_walk fw;
- unsigned long uaddr;
- struct folio *folio;
- int rc;
-
- rc = -EFAULT;
- mmap_read_lock(gmap->mm);
-
- uaddr = __gmap_translate(gmap, gaddr);
- if (IS_ERR_VALUE(uaddr))
- goto out;
- vma = vma_lookup(gmap->mm, uaddr);
- if (!vma)
- goto out;
- /*
- * Huge pages should not be able to become secure
- */
- if (is_vm_hugetlb_page(vma))
- goto out;
-
- rc = 0;
- folio = folio_walk_start(&fw, vma, uaddr, 0);
- if (!folio)
- goto out;
- /*
- * See gmap_make_secure(): large folios cannot be secure. Small
- * folio implies FW_LEVEL_PTE.
- */
- if (folio_test_large(folio) || !pte_write(fw.pte))
- goto out_walk_end;
- rc = uv_destroy_folio(folio);
- /*
- * Fault handlers can race; it is possible that two CPUs will fault
- * on the same secure page. One CPU can destroy the page, reboot,
- * re-enter secure mode and import it, while the second CPU was
- * stuck at the beginning of the handler. At some point the second
- * CPU will be able to progress, and it will not be able to destroy
- * the page. In that case we do not want to terminate the process,
- * we instead try to export the page.
- */
- if (rc)
- rc = uv_convert_from_secure_folio(folio);
-out_walk_end:
- folio_walk_end(&fw, vma);
-out:
- mmap_read_unlock(gmap->mm);
- return rc;
-}
-EXPORT_SYMBOL_GPL(gmap_destroy_page);
+EXPORT_SYMBOL_GPL(make_folio_secure);
/*
* To be called with the folio locked or with an extra reference! This will
diff --git a/arch/s390/kernel/vdso64/Makefile b/arch/s390/kernel/vdso64/Makefile
index 37bb4b761229..ad206f2068d8 100644
--- a/arch/s390/kernel/vdso64/Makefile
+++ b/arch/s390/kernel/vdso64/Makefile
@@ -5,7 +5,7 @@
include $(srctree)/lib/vdso/Makefile
obj-vdso64 = vdso_user_wrapper.o note.o vgetrandom-chacha.o
obj-cvdso64 = vdso64_generic.o getcpu.o vgetrandom.o
-VDSO_CFLAGS_REMOVE := -pg $(CC_FLAGS_FTRACE) $(CC_FLAGS_EXPOLINE) $(CC_FLAGS_CHECK_STACK)
+VDSO_CFLAGS_REMOVE := -pg $(CC_FLAGS_FTRACE) $(CC_FLAGS_EXPOLINE)
CFLAGS_REMOVE_getcpu.o = $(VDSO_CFLAGS_REMOVE)
CFLAGS_REMOVE_vgetrandom.o = $(VDSO_CFLAGS_REMOVE)
CFLAGS_REMOVE_vdso64_generic.o = $(VDSO_CFLAGS_REMOVE)
diff --git a/arch/s390/kernel/vmcore_info.c b/arch/s390/kernel/vmcore_info.c
index 23f7d7619a99..cc8933e04ff7 100644
--- a/arch/s390/kernel/vmcore_info.c
+++ b/arch/s390/kernel/vmcore_info.c
@@ -1,8 +1,9 @@
// SPDX-License-Identifier: GPL-2.0-only
#include <linux/vmcore_info.h>
-#include <asm/abs_lowcore.h>
#include <linux/mm.h>
+#include <asm/abs_lowcore.h>
+#include <asm/sections.h>
#include <asm/setup.h>
void arch_crash_save_vmcoreinfo(void)
diff --git a/arch/s390/kernel/vmlinux.lds.S b/arch/s390/kernel/vmlinux.lds.S
index 377b9aaf8c92..ff1ddba96352 100644
--- a/arch/s390/kernel/vmlinux.lds.S
+++ b/arch/s390/kernel/vmlinux.lds.S
@@ -52,7 +52,6 @@ SECTIONS
SOFTIRQENTRY_TEXT
FTRACE_HOTPATCH_TRAMPOLINES_TEXT
*(.text.*_indirect_*)
- *(.fixup)
*(.gnu.warning)
. = ALIGN(PAGE_SIZE);
_etext = .; /* End of text section */
diff --git a/arch/s390/kvm/Makefile b/arch/s390/kvm/Makefile
index 02217fb4ae10..f0ffe874adc2 100644
--- a/arch/s390/kvm/Makefile
+++ b/arch/s390/kvm/Makefile
@@ -8,7 +8,7 @@ include $(srctree)/virt/kvm/Makefile.kvm
ccflags-y := -Ivirt/kvm -Iarch/s390/kvm
kvm-y += kvm-s390.o intercept.o interrupt.o priv.o sigp.o
-kvm-y += diag.o gaccess.o guestdbg.o vsie.o pv.o
+kvm-y += diag.o gaccess.o guestdbg.o vsie.o pv.o gmap.o gmap-vsie.o
kvm-$(CONFIG_VFIO_PCI_ZDEV_KVM) += pci.o
obj-$(CONFIG_KVM) += kvm.o
diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c
index 9816b0060fbe..f6fded15633a 100644
--- a/arch/s390/kvm/gaccess.c
+++ b/arch/s390/kvm/gaccess.c
@@ -16,6 +16,7 @@
#include <asm/gmap.h>
#include <asm/dat-bits.h>
#include "kvm-s390.h"
+#include "gmap.h"
#include "gaccess.h"
/*
@@ -1393,6 +1394,44 @@ shadow_pgt:
}
/**
+ * shadow_pgt_lookup() - find a shadow page table
+ * @sg: pointer to the shadow guest address space structure
+ * @saddr: the address in the shadow aguest address space
+ * @pgt: parent gmap address of the page table to get shadowed
+ * @dat_protection: if the pgtable is marked as protected by dat
+ * @fake: pgt references contiguous guest memory block, not a pgtable
+ *
+ * Returns 0 if the shadow page table was found and -EAGAIN if the page
+ * table was not found.
+ *
+ * Called with sg->mm->mmap_lock in read.
+ */
+static int shadow_pgt_lookup(struct gmap *sg, unsigned long saddr, unsigned long *pgt,
+ int *dat_protection, int *fake)
+{
+ unsigned long pt_index;
+ unsigned long *table;
+ struct page *page;
+ int rc;
+
+ spin_lock(&sg->guest_table_lock);
+ table = gmap_table_walk(sg, saddr, 1); /* get segment pointer */
+ if (table && !(*table & _SEGMENT_ENTRY_INVALID)) {
+ /* Shadow page tables are full pages (pte+pgste) */
+ page = pfn_to_page(*table >> PAGE_SHIFT);
+ pt_index = gmap_pgste_get_pgt_addr(page_to_virt(page));
+ *pgt = pt_index & ~GMAP_SHADOW_FAKE_TABLE;
+ *dat_protection = !!(*table & _SEGMENT_ENTRY_PROTECT);
+ *fake = !!(pt_index & GMAP_SHADOW_FAKE_TABLE);
+ rc = 0;
+ } else {
+ rc = -EAGAIN;
+ }
+ spin_unlock(&sg->guest_table_lock);
+ return rc;
+}
+
+/**
* kvm_s390_shadow_fault - handle fault on a shadow page table
* @vcpu: virtual cpu
* @sg: pointer to the shadow guest address space structure
@@ -1415,6 +1454,9 @@ int kvm_s390_shadow_fault(struct kvm_vcpu *vcpu, struct gmap *sg,
int dat_protection, fake;
int rc;
+ if (KVM_BUG_ON(!gmap_is_shadow(sg), vcpu->kvm))
+ return -EFAULT;
+
mmap_read_lock(sg->mm);
/*
* We don't want any guest-2 tables to change - so the parent
@@ -1423,7 +1465,7 @@ int kvm_s390_shadow_fault(struct kvm_vcpu *vcpu, struct gmap *sg,
*/
ipte_lock(vcpu->kvm);
- rc = gmap_shadow_pgt_lookup(sg, saddr, &pgt, &dat_protection, &fake);
+ rc = shadow_pgt_lookup(sg, saddr, &pgt, &dat_protection, &fake);
if (rc)
rc = kvm_s390_shadow_tables(sg, saddr, &pgt, &dat_protection,
&fake);
diff --git a/arch/s390/kvm/gmap-vsie.c b/arch/s390/kvm/gmap-vsie.c
new file mode 100644
index 000000000000..a6d1dbb04c97
--- /dev/null
+++ b/arch/s390/kvm/gmap-vsie.c
@@ -0,0 +1,142 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Guest memory management for KVM/s390 nested VMs.
+ *
+ * Copyright IBM Corp. 2008, 2020, 2024
+ *
+ * Author(s): Claudio Imbrenda <imbrenda@linux.ibm.com>
+ * Martin Schwidefsky <schwidefsky@de.ibm.com>
+ * David Hildenbrand <david@redhat.com>
+ * Janosch Frank <frankja@linux.vnet.ibm.com>
+ */
+
+#include <linux/compiler.h>
+#include <linux/kvm.h>
+#include <linux/kvm_host.h>
+#include <linux/pgtable.h>
+#include <linux/pagemap.h>
+#include <linux/mman.h>
+
+#include <asm/lowcore.h>
+#include <asm/gmap.h>
+#include <asm/uv.h>
+
+#include "kvm-s390.h"
+#include "gmap.h"
+
+/**
+ * gmap_find_shadow - find a specific asce in the list of shadow tables
+ * @parent: pointer to the parent gmap
+ * @asce: ASCE for which the shadow table is created
+ * @edat_level: edat level to be used for the shadow translation
+ *
+ * Returns the pointer to a gmap if a shadow table with the given asce is
+ * already available, ERR_PTR(-EAGAIN) if another one is just being created,
+ * otherwise NULL
+ *
+ * Context: Called with parent->shadow_lock held
+ */
+static struct gmap *gmap_find_shadow(struct gmap *parent, unsigned long asce, int edat_level)
+{
+ struct gmap *sg;
+
+ lockdep_assert_held(&parent->shadow_lock);
+ list_for_each_entry(sg, &parent->children, list) {
+ if (!gmap_shadow_valid(sg, asce, edat_level))
+ continue;
+ if (!sg->initialized)
+ return ERR_PTR(-EAGAIN);
+ refcount_inc(&sg->ref_count);
+ return sg;
+ }
+ return NULL;
+}
+
+/**
+ * gmap_shadow - create/find a shadow guest address space
+ * @parent: pointer to the parent gmap
+ * @asce: ASCE for which the shadow table is created
+ * @edat_level: edat level to be used for the shadow translation
+ *
+ * The pages of the top level page table referred by the asce parameter
+ * will be set to read-only and marked in the PGSTEs of the kvm process.
+ * The shadow table will be removed automatically on any change to the
+ * PTE mapping for the source table.
+ *
+ * Returns a guest address space structure, ERR_PTR(-ENOMEM) if out of memory,
+ * ERR_PTR(-EAGAIN) if the caller has to retry and ERR_PTR(-EFAULT) if the
+ * parent gmap table could not be protected.
+ */
+struct gmap *gmap_shadow(struct gmap *parent, unsigned long asce, int edat_level)
+{
+ struct gmap *sg, *new;
+ unsigned long limit;
+ int rc;
+
+ if (KVM_BUG_ON(parent->mm->context.allow_gmap_hpage_1m, (struct kvm *)parent->private) ||
+ KVM_BUG_ON(gmap_is_shadow(parent), (struct kvm *)parent->private))
+ return ERR_PTR(-EFAULT);
+ spin_lock(&parent->shadow_lock);
+ sg = gmap_find_shadow(parent, asce, edat_level);
+ spin_unlock(&parent->shadow_lock);
+ if (sg)
+ return sg;
+ /* Create a new shadow gmap */
+ limit = -1UL >> (33 - (((asce & _ASCE_TYPE_MASK) >> 2) * 11));
+ if (asce & _ASCE_REAL_SPACE)
+ limit = -1UL;
+ new = gmap_alloc(limit);
+ if (!new)
+ return ERR_PTR(-ENOMEM);
+ new->mm = parent->mm;
+ new->parent = gmap_get(parent);
+ new->private = parent->private;
+ new->orig_asce = asce;
+ new->edat_level = edat_level;
+ new->initialized = false;
+ spin_lock(&parent->shadow_lock);
+ /* Recheck if another CPU created the same shadow */
+ sg = gmap_find_shadow(parent, asce, edat_level);
+ if (sg) {
+ spin_unlock(&parent->shadow_lock);
+ gmap_free(new);
+ return sg;
+ }
+ if (asce & _ASCE_REAL_SPACE) {
+ /* only allow one real-space gmap shadow */
+ list_for_each_entry(sg, &parent->children, list) {
+ if (sg->orig_asce & _ASCE_REAL_SPACE) {
+ spin_lock(&sg->guest_table_lock);
+ gmap_unshadow(sg);
+ spin_unlock(&sg->guest_table_lock);
+ list_del(&sg->list);
+ gmap_put(sg);
+ break;
+ }
+ }
+ }
+ refcount_set(&new->ref_count, 2);
+ list_add(&new->list, &parent->children);
+ if (asce & _ASCE_REAL_SPACE) {
+ /* nothing to protect, return right away */
+ new->initialized = true;
+ spin_unlock(&parent->shadow_lock);
+ return new;
+ }
+ spin_unlock(&parent->shadow_lock);
+ /* protect after insertion, so it will get properly invalidated */
+ mmap_read_lock(parent->mm);
+ rc = __kvm_s390_mprotect_many(parent, asce & _ASCE_ORIGIN,
+ ((asce & _ASCE_TABLE_LENGTH) + 1),
+ PROT_READ, GMAP_NOTIFY_SHADOW);
+ mmap_read_unlock(parent->mm);
+ spin_lock(&parent->shadow_lock);
+ new->initialized = true;
+ if (rc) {
+ list_del(&new->list);
+ gmap_free(new);
+ new = ERR_PTR(rc);
+ }
+ spin_unlock(&parent->shadow_lock);
+ return new;
+}
diff --git a/arch/s390/kvm/gmap.c b/arch/s390/kvm/gmap.c
new file mode 100644
index 000000000000..02adf151d4de
--- /dev/null
+++ b/arch/s390/kvm/gmap.c
@@ -0,0 +1,212 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Guest memory management for KVM/s390
+ *
+ * Copyright IBM Corp. 2008, 2020, 2024
+ *
+ * Author(s): Claudio Imbrenda <imbrenda@linux.ibm.com>
+ * Martin Schwidefsky <schwidefsky@de.ibm.com>
+ * David Hildenbrand <david@redhat.com>
+ * Janosch Frank <frankja@linux.vnet.ibm.com>
+ */
+
+#include <linux/compiler.h>
+#include <linux/kvm.h>
+#include <linux/kvm_host.h>
+#include <linux/pgtable.h>
+#include <linux/pagemap.h>
+
+#include <asm/lowcore.h>
+#include <asm/gmap.h>
+#include <asm/uv.h>
+
+#include "gmap.h"
+
+/**
+ * should_export_before_import - Determine whether an export is needed
+ * before an import-like operation
+ * @uvcb: the Ultravisor control block of the UVC to be performed
+ * @mm: the mm of the process
+ *
+ * Returns whether an export is needed before every import-like operation.
+ * This is needed for shared pages, which don't trigger a secure storage
+ * exception when accessed from a different guest.
+ *
+ * Although considered as one, the Unpin Page UVC is not an actual import,
+ * so it is not affected.
+ *
+ * No export is needed also when there is only one protected VM, because the
+ * page cannot belong to the wrong VM in that case (there is no "other VM"
+ * it can belong to).
+ *
+ * Return: true if an export is needed before every import, otherwise false.
+ */
+static bool should_export_before_import(struct uv_cb_header *uvcb, struct mm_struct *mm)
+{
+ /*
+ * The misc feature indicates, among other things, that importing a
+ * shared page from a different protected VM will automatically also
+ * transfer its ownership.
+ */
+ if (uv_has_feature(BIT_UV_FEAT_MISC))
+ return false;
+ if (uvcb->cmd == UVC_CMD_UNPIN_PAGE_SHARED)
+ return false;
+ return atomic_read(&mm->context.protected_count) > 1;
+}
+
+static int __gmap_make_secure(struct gmap *gmap, struct page *page, void *uvcb)
+{
+ struct folio *folio = page_folio(page);
+ int rc;
+
+ /*
+ * Secure pages cannot be huge and userspace should not combine both.
+ * In case userspace does it anyway this will result in an -EFAULT for
+ * the unpack. The guest is thus never reaching secure mode.
+ * If userspace plays dirty tricks and decides to map huge pages at a
+ * later point in time, it will receive a segmentation fault or
+ * KVM_RUN will return -EFAULT.
+ */
+ if (folio_test_hugetlb(folio))
+ return -EFAULT;
+ if (folio_test_large(folio)) {
+ mmap_read_unlock(gmap->mm);
+ rc = kvm_s390_wiggle_split_folio(gmap->mm, folio, true);
+ mmap_read_lock(gmap->mm);
+ if (rc)
+ return rc;
+ folio = page_folio(page);
+ }
+
+ if (!folio_trylock(folio))
+ return -EAGAIN;
+ if (should_export_before_import(uvcb, gmap->mm))
+ uv_convert_from_secure(folio_to_phys(folio));
+ rc = make_folio_secure(folio, uvcb);
+ folio_unlock(folio);
+
+ /*
+ * In theory a race is possible and the folio might have become
+ * large again before the folio_trylock() above. In that case, no
+ * action is performed and -EAGAIN is returned; the callers will
+ * have to try again later.
+ * In most cases this implies running the VM again, getting the same
+ * exception again, and make another attempt in this function.
+ * This is expected to happen extremely rarely.
+ */
+ if (rc == -E2BIG)
+ return -EAGAIN;
+ /* The folio has too many references, try to shake some off */
+ if (rc == -EBUSY) {
+ mmap_read_unlock(gmap->mm);
+ kvm_s390_wiggle_split_folio(gmap->mm, folio, false);
+ mmap_read_lock(gmap->mm);
+ return -EAGAIN;
+ }
+
+ return rc;
+}
+
+/**
+ * gmap_make_secure() - make one guest page secure
+ * @gmap: the guest gmap
+ * @gaddr: the guest address that needs to be made secure
+ * @uvcb: the UVCB specifying which operation needs to be performed
+ *
+ * Context: needs to be called with kvm->srcu held.
+ * Return: 0 on success, < 0 in case of error (see __gmap_make_secure()).
+ */
+int gmap_make_secure(struct gmap *gmap, unsigned long gaddr, void *uvcb)
+{
+ struct kvm *kvm = gmap->private;
+ struct page *page;
+ int rc = 0;
+
+ lockdep_assert_held(&kvm->srcu);
+
+ page = gfn_to_page(kvm, gpa_to_gfn(gaddr));
+ mmap_read_lock(gmap->mm);
+ if (page)
+ rc = __gmap_make_secure(gmap, page, uvcb);
+ kvm_release_page_clean(page);
+ mmap_read_unlock(gmap->mm);
+
+ return rc;
+}
+
+int gmap_convert_to_secure(struct gmap *gmap, unsigned long gaddr)
+{
+ struct uv_cb_cts uvcb = {
+ .header.cmd = UVC_CMD_CONV_TO_SEC_STOR,
+ .header.len = sizeof(uvcb),
+ .guest_handle = gmap->guest_handle,
+ .gaddr = gaddr,
+ };
+
+ return gmap_make_secure(gmap, gaddr, &uvcb);
+}
+
+/**
+ * __gmap_destroy_page() - Destroy a guest page.
+ * @gmap: the gmap of the guest
+ * @page: the page to destroy
+ *
+ * An attempt will be made to destroy the given guest page. If the attempt
+ * fails, an attempt is made to export the page. If both attempts fail, an
+ * appropriate error is returned.
+ *
+ * Context: must be called holding the mm lock for gmap->mm
+ */
+static int __gmap_destroy_page(struct gmap *gmap, struct page *page)
+{
+ struct folio *folio = page_folio(page);
+ int rc;
+
+ /*
+ * See gmap_make_secure(): large folios cannot be secure. Small
+ * folio implies FW_LEVEL_PTE.
+ */
+ if (folio_test_large(folio))
+ return -EFAULT;
+
+ rc = uv_destroy_folio(folio);
+ /*
+ * Fault handlers can race; it is possible that two CPUs will fault
+ * on the same secure page. One CPU can destroy the page, reboot,
+ * re-enter secure mode and import it, while the second CPU was
+ * stuck at the beginning of the handler. At some point the second
+ * CPU will be able to progress, and it will not be able to destroy
+ * the page. In that case we do not want to terminate the process,
+ * we instead try to export the page.
+ */
+ if (rc)
+ rc = uv_convert_from_secure_folio(folio);
+
+ return rc;
+}
+
+/**
+ * gmap_destroy_page() - Destroy a guest page.
+ * @gmap: the gmap of the guest
+ * @gaddr: the guest address to destroy
+ *
+ * An attempt will be made to destroy the given guest page. If the attempt
+ * fails, an attempt is made to export the page. If both attempts fail, an
+ * appropriate error is returned.
+ *
+ * Context: may sleep.
+ */
+int gmap_destroy_page(struct gmap *gmap, unsigned long gaddr)
+{
+ struct page *page;
+ int rc = 0;
+
+ mmap_read_lock(gmap->mm);
+ page = gfn_to_page(gmap->private, gpa_to_gfn(gaddr));
+ if (page)
+ rc = __gmap_destroy_page(gmap, page);
+ kvm_release_page_clean(page);
+ mmap_read_unlock(gmap->mm);
+ return rc;
+}
diff --git a/arch/s390/kvm/gmap.h b/arch/s390/kvm/gmap.h
new file mode 100644
index 000000000000..c8f031c9ea5f
--- /dev/null
+++ b/arch/s390/kvm/gmap.h
@@ -0,0 +1,39 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * KVM guest address space mapping code
+ *
+ * Copyright IBM Corp. 2007, 2016, 2025
+ * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
+ * Claudio Imbrenda <imbrenda@linux.ibm.com>
+ */
+
+#ifndef ARCH_KVM_S390_GMAP_H
+#define ARCH_KVM_S390_GMAP_H
+
+#define GMAP_SHADOW_FAKE_TABLE 1ULL
+
+int gmap_make_secure(struct gmap *gmap, unsigned long gaddr, void *uvcb);
+int gmap_convert_to_secure(struct gmap *gmap, unsigned long gaddr);
+int gmap_destroy_page(struct gmap *gmap, unsigned long gaddr);
+struct gmap *gmap_shadow(struct gmap *parent, unsigned long asce, int edat_level);
+
+/**
+ * gmap_shadow_valid - check if a shadow guest address space matches the
+ * given properties and is still valid
+ * @sg: pointer to the shadow guest address space structure
+ * @asce: ASCE for which the shadow table is requested
+ * @edat_level: edat level to be used for the shadow translation
+ *
+ * Returns 1 if the gmap shadow is still valid and matches the given
+ * properties, the caller can continue using it. Returns 0 otherwise, the
+ * caller has to request a new shadow gmap in this case.
+ *
+ */
+static inline int gmap_shadow_valid(struct gmap *sg, unsigned long asce, int edat_level)
+{
+ if (sg->removed)
+ return 0;
+ return sg->orig_asce == asce && sg->edat_level == edat_level;
+}
+
+#endif
diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c
index 5bbaadf75dc6..610dd44a948b 100644
--- a/arch/s390/kvm/intercept.c
+++ b/arch/s390/kvm/intercept.c
@@ -21,6 +21,7 @@
#include "gaccess.h"
#include "trace.h"
#include "trace-s390.h"
+#include "gmap.h"
u8 kvm_s390_get_ilen(struct kvm_vcpu *vcpu)
{
@@ -367,7 +368,7 @@ static int handle_mvpg_pei(struct kvm_vcpu *vcpu)
reg2, &srcaddr, GACC_FETCH, 0);
if (rc)
return kvm_s390_inject_prog_cond(vcpu, rc);
- rc = gmap_fault(vcpu->arch.gmap, srcaddr, 0);
+ rc = kvm_s390_handle_dat_fault(vcpu, srcaddr, 0);
if (rc != 0)
return rc;
@@ -376,7 +377,7 @@ static int handle_mvpg_pei(struct kvm_vcpu *vcpu)
reg1, &dstaddr, GACC_STORE, 0);
if (rc)
return kvm_s390_inject_prog_cond(vcpu, rc);
- rc = gmap_fault(vcpu->arch.gmap, dstaddr, FAULT_FLAG_WRITE);
+ rc = kvm_s390_handle_dat_fault(vcpu, dstaddr, FOLL_WRITE);
if (rc != 0)
return rc;
@@ -549,7 +550,7 @@ static int handle_pv_uvc(struct kvm_vcpu *vcpu)
* If the unpin did not succeed, the guest will exit again for the UVC
* and we will retry the unpin.
*/
- if (rc == -EINVAL)
+ if (rc == -EINVAL || rc == -ENXIO)
return 0;
/*
* If we got -EAGAIN here, we simply return it. It will eventually
diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index ea8dce299954..07ff0e10cb7f 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -2678,9 +2678,13 @@ static int flic_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
kvm_s390_clear_float_irqs(dev->kvm);
break;
case KVM_DEV_FLIC_APF_ENABLE:
+ if (kvm_is_ucontrol(dev->kvm))
+ return -EINVAL;
dev->kvm->arch.gmap->pfault_enabled = 1;
break;
case KVM_DEV_FLIC_APF_DISABLE_WAIT:
+ if (kvm_is_ucontrol(dev->kvm))
+ return -EINVAL;
dev->kvm->arch.gmap->pfault_enabled = 0;
/*
* Make sure no async faults are in transition when
@@ -2889,20 +2893,25 @@ int kvm_set_routing_entry(struct kvm *kvm,
struct kvm_kernel_irq_routing_entry *e,
const struct kvm_irq_routing_entry *ue)
{
- u64 uaddr;
+ u64 uaddr_s, uaddr_i;
+ int idx;
switch (ue->type) {
/* we store the userspace addresses instead of the guest addresses */
case KVM_IRQ_ROUTING_S390_ADAPTER:
+ if (kvm_is_ucontrol(kvm))
+ return -EINVAL;
e->set = set_adapter_int;
- uaddr = gmap_translate(kvm->arch.gmap, ue->u.adapter.summary_addr);
- if (uaddr == -EFAULT)
- return -EFAULT;
- e->adapter.summary_addr = uaddr;
- uaddr = gmap_translate(kvm->arch.gmap, ue->u.adapter.ind_addr);
- if (uaddr == -EFAULT)
+
+ idx = srcu_read_lock(&kvm->srcu);
+ uaddr_s = gpa_to_hva(kvm, ue->u.adapter.summary_addr);
+ uaddr_i = gpa_to_hva(kvm, ue->u.adapter.ind_addr);
+ srcu_read_unlock(&kvm->srcu, idx);
+
+ if (kvm_is_error_hva(uaddr_s) || kvm_is_error_hva(uaddr_i))
return -EFAULT;
- e->adapter.ind_addr = uaddr;
+ e->adapter.summary_addr = uaddr_s;
+ e->adapter.ind_addr = uaddr_i;
e->adapter.summary_offset = ue->u.adapter.summary_offset;
e->adapter.ind_offset = ue->u.adapter.ind_offset;
e->adapter.adapter_id = ue->u.adapter.adapter_id;
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index d8080c27d45b..ebecb96bacce 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -50,6 +50,7 @@
#include "kvm-s390.h"
#include "gaccess.h"
#include "pci.h"
+#include "gmap.h"
#define CREATE_TRACE_POINTS
#include "trace.h"
@@ -3428,8 +3429,20 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
VM_EVENT(kvm, 3, "vm created with type %lu", type);
if (type & KVM_VM_S390_UCONTROL) {
+ struct kvm_userspace_memory_region2 fake_memslot = {
+ .slot = KVM_S390_UCONTROL_MEMSLOT,
+ .guest_phys_addr = 0,
+ .userspace_addr = 0,
+ .memory_size = ALIGN_DOWN(TASK_SIZE, _SEGMENT_SIZE),
+ .flags = 0,
+ };
+
kvm->arch.gmap = NULL;
kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT;
+ /* one flat fake memslot covering the whole address-space */
+ mutex_lock(&kvm->slots_lock);
+ KVM_BUG_ON(kvm_set_internal_memslot(kvm, &fake_memslot), kvm);
+ mutex_unlock(&kvm->slots_lock);
} else {
if (sclp.hamax == U64_MAX)
kvm->arch.mem_limit = TASK_SIZE_MAX;
@@ -4498,6 +4511,75 @@ static bool ibs_enabled(struct kvm_vcpu *vcpu)
return kvm_s390_test_cpuflags(vcpu, CPUSTAT_IBS);
}
+static int __kvm_s390_fixup_fault_sync(struct gmap *gmap, gpa_t gaddr, unsigned int flags)
+{
+ struct kvm *kvm = gmap->private;
+ gfn_t gfn = gpa_to_gfn(gaddr);
+ bool unlocked;
+ hva_t vmaddr;
+ gpa_t tmp;
+ int rc;
+
+ if (kvm_is_ucontrol(kvm)) {
+ tmp = __gmap_translate(gmap, gaddr);
+ gfn = gpa_to_gfn(tmp);
+ }
+
+ vmaddr = gfn_to_hva(kvm, gfn);
+ rc = fixup_user_fault(gmap->mm, vmaddr, FAULT_FLAG_WRITE, &unlocked);
+ if (!rc)
+ rc = __gmap_link(gmap, gaddr, vmaddr);
+ return rc;
+}
+
+/**
+ * __kvm_s390_mprotect_many() - Apply specified protection to guest pages
+ * @gmap: the gmap of the guest
+ * @gpa: the starting guest address
+ * @npages: how many pages to protect
+ * @prot: indicates access rights: PROT_NONE, PROT_READ or PROT_WRITE
+ * @bits: pgste notification bits to set
+ *
+ * Returns: 0 in case of success, < 0 in case of error - see gmap_protect_one()
+ *
+ * Context: kvm->srcu and gmap->mm need to be held in read mode
+ */
+int __kvm_s390_mprotect_many(struct gmap *gmap, gpa_t gpa, u8 npages, unsigned int prot,
+ unsigned long bits)
+{
+ unsigned int fault_flag = (prot & PROT_WRITE) ? FAULT_FLAG_WRITE : 0;
+ gpa_t end = gpa + npages * PAGE_SIZE;
+ int rc;
+
+ for (; gpa < end; gpa = ALIGN(gpa + 1, rc)) {
+ rc = gmap_protect_one(gmap, gpa, prot, bits);
+ if (rc == -EAGAIN) {
+ __kvm_s390_fixup_fault_sync(gmap, gpa, fault_flag);
+ rc = gmap_protect_one(gmap, gpa, prot, bits);
+ }
+ if (rc < 0)
+ return rc;
+ }
+
+ return 0;
+}
+
+static int kvm_s390_mprotect_notify_prefix(struct kvm_vcpu *vcpu)
+{
+ gpa_t gaddr = kvm_s390_get_prefix(vcpu);
+ int idx, rc;
+
+ idx = srcu_read_lock(&vcpu->kvm->srcu);
+ mmap_read_lock(vcpu->arch.gmap->mm);
+
+ rc = __kvm_s390_mprotect_many(vcpu->arch.gmap, gaddr, 2, PROT_WRITE, GMAP_NOTIFY_MPROT);
+
+ mmap_read_unlock(vcpu->arch.gmap->mm);
+ srcu_read_unlock(&vcpu->kvm->srcu, idx);
+
+ return rc;
+}
+
static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
{
retry:
@@ -4513,9 +4595,8 @@ retry:
*/
if (kvm_check_request(KVM_REQ_REFRESH_GUEST_PREFIX, vcpu)) {
int rc;
- rc = gmap_mprotect_notify(vcpu->arch.gmap,
- kvm_s390_get_prefix(vcpu),
- PAGE_SIZE * 2, PROT_WRITE);
+
+ rc = kvm_s390_mprotect_notify_prefix(vcpu);
if (rc) {
kvm_make_request(KVM_REQ_REFRESH_GUEST_PREFIX, vcpu);
return rc;
@@ -4766,11 +4847,111 @@ static int vcpu_post_run_addressing_exception(struct kvm_vcpu *vcpu)
return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
}
+static void kvm_s390_assert_primary_as(struct kvm_vcpu *vcpu)
+{
+ KVM_BUG(current->thread.gmap_teid.as != PSW_BITS_AS_PRIMARY, vcpu->kvm,
+ "Unexpected program interrupt 0x%x, TEID 0x%016lx",
+ current->thread.gmap_int_code, current->thread.gmap_teid.val);
+}
+
+/*
+ * __kvm_s390_handle_dat_fault() - handle a dat fault for the gmap of a vcpu
+ * @vcpu: the vCPU whose gmap is to be fixed up
+ * @gfn: the guest frame number used for memslots (including fake memslots)
+ * @gaddr: the gmap address, does not have to match @gfn for ucontrol gmaps
+ * @flags: FOLL_* flags
+ *
+ * Return: 0 on success, < 0 in case of error.
+ * Context: The mm lock must not be held before calling. May sleep.
+ */
+int __kvm_s390_handle_dat_fault(struct kvm_vcpu *vcpu, gfn_t gfn, gpa_t gaddr, unsigned int flags)
+{
+ struct kvm_memory_slot *slot;
+ unsigned int fault_flags;
+ bool writable, unlocked;
+ unsigned long vmaddr;
+ struct page *page;
+ kvm_pfn_t pfn;
+ int rc;
+
+ slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn);
+ if (!slot || slot->flags & KVM_MEMSLOT_INVALID)
+ return vcpu_post_run_addressing_exception(vcpu);
+
+ fault_flags = flags & FOLL_WRITE ? FAULT_FLAG_WRITE : 0;
+ if (vcpu->arch.gmap->pfault_enabled)
+ flags |= FOLL_NOWAIT;
+ vmaddr = __gfn_to_hva_memslot(slot, gfn);
+
+try_again:
+ pfn = __kvm_faultin_pfn(slot, gfn, flags, &writable, &page);
+
+ /* Access outside memory, inject addressing exception */
+ if (is_noslot_pfn(pfn))
+ return vcpu_post_run_addressing_exception(vcpu);
+ /* Signal pending: try again */
+ if (pfn == KVM_PFN_ERR_SIGPENDING)
+ return -EAGAIN;
+
+ /* Needs I/O, try to setup async pfault (only possible with FOLL_NOWAIT) */
+ if (pfn == KVM_PFN_ERR_NEEDS_IO) {
+ trace_kvm_s390_major_guest_pfault(vcpu);
+ if (kvm_arch_setup_async_pf(vcpu))
+ return 0;
+ vcpu->stat.pfault_sync++;
+ /* Could not setup async pfault, try again synchronously */
+ flags &= ~FOLL_NOWAIT;
+ goto try_again;
+ }
+ /* Any other error */
+ if (is_error_pfn(pfn))
+ return -EFAULT;
+
+ /* Success */
+ mmap_read_lock(vcpu->arch.gmap->mm);
+ /* Mark the userspace PTEs as young and/or dirty, to avoid page fault loops */
+ rc = fixup_user_fault(vcpu->arch.gmap->mm, vmaddr, fault_flags, &unlocked);
+ if (!rc)
+ rc = __gmap_link(vcpu->arch.gmap, gaddr, vmaddr);
+ scoped_guard(spinlock, &vcpu->kvm->mmu_lock) {
+ kvm_release_faultin_page(vcpu->kvm, page, false, writable);
+ }
+ mmap_read_unlock(vcpu->arch.gmap->mm);
+ return rc;
+}
+
+static int vcpu_dat_fault_handler(struct kvm_vcpu *vcpu, unsigned long gaddr, unsigned int flags)
+{
+ unsigned long gaddr_tmp;
+ gfn_t gfn;
+
+ gfn = gpa_to_gfn(gaddr);
+ if (kvm_is_ucontrol(vcpu->kvm)) {
+ /*
+ * This translates the per-vCPU guest address into a
+ * fake guest address, which can then be used with the
+ * fake memslots that are identity mapping userspace.
+ * This allows ucontrol VMs to use the normal fault
+ * resolution path, like normal VMs.
+ */
+ mmap_read_lock(vcpu->arch.gmap->mm);
+ gaddr_tmp = __gmap_translate(vcpu->arch.gmap, gaddr);
+ mmap_read_unlock(vcpu->arch.gmap->mm);
+ if (gaddr_tmp == -EFAULT) {
+ vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
+ vcpu->run->s390_ucontrol.trans_exc_code = gaddr;
+ vcpu->run->s390_ucontrol.pgm_code = PGM_SEGMENT_TRANSLATION;
+ return -EREMOTE;
+ }
+ gfn = gpa_to_gfn(gaddr_tmp);
+ }
+ return __kvm_s390_handle_dat_fault(vcpu, gfn, gaddr, flags);
+}
+
static int vcpu_post_run_handle_fault(struct kvm_vcpu *vcpu)
{
unsigned int flags = 0;
unsigned long gaddr;
- int rc = 0;
gaddr = current->thread.gmap_teid.addr * PAGE_SIZE;
if (kvm_s390_cur_gmap_fault_is_write())
@@ -4781,9 +4962,7 @@ static int vcpu_post_run_handle_fault(struct kvm_vcpu *vcpu)
vcpu->stat.exit_null++;
break;
case PGM_NON_SECURE_STORAGE_ACCESS:
- KVM_BUG(current->thread.gmap_teid.as != PSW_BITS_AS_PRIMARY, vcpu->kvm,
- "Unexpected program interrupt 0x%x, TEID 0x%016lx",
- current->thread.gmap_int_code, current->thread.gmap_teid.val);
+ kvm_s390_assert_primary_as(vcpu);
/*
* This is normal operation; a page belonging to a protected
* guest has not been imported yet. Try to import the page into
@@ -4794,9 +4973,7 @@ static int vcpu_post_run_handle_fault(struct kvm_vcpu *vcpu)
break;
case PGM_SECURE_STORAGE_ACCESS:
case PGM_SECURE_STORAGE_VIOLATION:
- KVM_BUG(current->thread.gmap_teid.as != PSW_BITS_AS_PRIMARY, vcpu->kvm,
- "Unexpected program interrupt 0x%x, TEID 0x%016lx",
- current->thread.gmap_int_code, current->thread.gmap_teid.val);
+ kvm_s390_assert_primary_as(vcpu);
/*
* This can happen after a reboot with asynchronous teardown;
* the new guest (normal or protected) will run on top of the
@@ -4825,40 +5002,15 @@ static int vcpu_post_run_handle_fault(struct kvm_vcpu *vcpu)
case PGM_REGION_FIRST_TRANS:
case PGM_REGION_SECOND_TRANS:
case PGM_REGION_THIRD_TRANS:
- KVM_BUG(current->thread.gmap_teid.as != PSW_BITS_AS_PRIMARY, vcpu->kvm,
- "Unexpected program interrupt 0x%x, TEID 0x%016lx",
- current->thread.gmap_int_code, current->thread.gmap_teid.val);
- if (vcpu->arch.gmap->pfault_enabled) {
- rc = gmap_fault(vcpu->arch.gmap, gaddr, flags | FAULT_FLAG_RETRY_NOWAIT);
- if (rc == -EFAULT)
- return vcpu_post_run_addressing_exception(vcpu);
- if (rc == -EAGAIN) {
- trace_kvm_s390_major_guest_pfault(vcpu);
- if (kvm_arch_setup_async_pf(vcpu))
- return 0;
- vcpu->stat.pfault_sync++;
- } else {
- return rc;
- }
- }
- rc = gmap_fault(vcpu->arch.gmap, gaddr, flags);
- if (rc == -EFAULT) {
- if (kvm_is_ucontrol(vcpu->kvm)) {
- vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
- vcpu->run->s390_ucontrol.trans_exc_code = gaddr;
- vcpu->run->s390_ucontrol.pgm_code = 0x10;
- return -EREMOTE;
- }
- return vcpu_post_run_addressing_exception(vcpu);
- }
- break;
+ kvm_s390_assert_primary_as(vcpu);
+ return vcpu_dat_fault_handler(vcpu, gaddr, flags);
default:
KVM_BUG(1, vcpu->kvm, "Unexpected program interrupt 0x%x, TEID 0x%016lx",
current->thread.gmap_int_code, current->thread.gmap_teid.val);
send_sig(SIGSEGV, current, 0);
break;
}
- return rc;
+ return 0;
}
static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
@@ -5737,7 +5889,9 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
}
#endif
case KVM_S390_VCPU_FAULT: {
- r = gmap_fault(vcpu->arch.gmap, arg, 0);
+ idx = srcu_read_lock(&vcpu->kvm->srcu);
+ r = vcpu_dat_fault_handler(vcpu, arg, 0);
+ srcu_read_unlock(&vcpu->kvm->srcu, idx);
break;
}
case KVM_ENABLE_CAP:
@@ -5853,7 +6007,7 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
{
gpa_t size;
- if (kvm_is_ucontrol(kvm))
+ if (kvm_is_ucontrol(kvm) && new->id < KVM_USER_MEM_SLOTS)
return -EINVAL;
/* When we are protected, we should not change the memory slots */
@@ -5905,6 +6059,9 @@ void kvm_arch_commit_memory_region(struct kvm *kvm,
{
int rc = 0;
+ if (kvm_is_ucontrol(kvm))
+ return;
+
switch (change) {
case KVM_MR_DELETE:
rc = gmap_unmap_segment(kvm->arch.gmap, old->base_gfn * PAGE_SIZE,
diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h
index 597d7a71deeb..8d3bbb2dd8d2 100644
--- a/arch/s390/kvm/kvm-s390.h
+++ b/arch/s390/kvm/kvm-s390.h
@@ -20,6 +20,8 @@
#include <asm/processor.h>
#include <asm/sclp.h>
+#define KVM_S390_UCONTROL_MEMSLOT (KVM_USER_MEM_SLOTS + 0)
+
static inline void kvm_s390_fpu_store(struct kvm_run *run)
{
fpu_stfpc(&run->s.regs.fpc);
@@ -279,6 +281,15 @@ static inline u32 kvm_s390_get_gisa_desc(struct kvm *kvm)
return gd;
}
+static inline hva_t gpa_to_hva(struct kvm *kvm, gpa_t gpa)
+{
+ hva_t hva = gfn_to_hva(kvm, gpa_to_gfn(gpa));
+
+ if (!kvm_is_error_hva(hva))
+ hva |= offset_in_page(gpa);
+ return hva;
+}
+
/* implemented in pv.c */
int kvm_s390_pv_destroy_cpu(struct kvm_vcpu *vcpu, u16 *rc, u16 *rrc);
int kvm_s390_pv_create_cpu(struct kvm_vcpu *vcpu, u16 *rc, u16 *rrc);
@@ -408,6 +419,14 @@ void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu);
void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm);
__u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu);
int kvm_s390_cpus_from_pv(struct kvm *kvm, u16 *rc, u16 *rrc);
+int __kvm_s390_handle_dat_fault(struct kvm_vcpu *vcpu, gfn_t gfn, gpa_t gaddr, unsigned int flags);
+int __kvm_s390_mprotect_many(struct gmap *gmap, gpa_t gpa, u8 npages, unsigned int prot,
+ unsigned long bits);
+
+static inline int kvm_s390_handle_dat_fault(struct kvm_vcpu *vcpu, gpa_t gaddr, unsigned int flags)
+{
+ return __kvm_s390_handle_dat_fault(vcpu, gpa_to_gfn(gaddr), gaddr, flags);
+}
/* implemented in diag.c */
int kvm_s390_handle_diag(struct kvm_vcpu *vcpu);
diff --git a/arch/s390/kvm/pv.c b/arch/s390/kvm/pv.c
index 75e81ba26d04..22c012aa5206 100644
--- a/arch/s390/kvm/pv.c
+++ b/arch/s390/kvm/pv.c
@@ -17,6 +17,7 @@
#include <linux/sched/mm.h>
#include <linux/mmu_notifier.h>
#include "kvm-s390.h"
+#include "gmap.h"
bool kvm_s390_pv_is_protected(struct kvm *kvm)
{
@@ -638,10 +639,28 @@ static int unpack_one(struct kvm *kvm, unsigned long addr, u64 tweak,
.tweak[1] = offset,
};
int ret = gmap_make_secure(kvm->arch.gmap, addr, &uvcb);
+ unsigned long vmaddr;
+ bool unlocked;
*rc = uvcb.header.rc;
*rrc = uvcb.header.rrc;
+ if (ret == -ENXIO) {
+ mmap_read_lock(kvm->mm);
+ vmaddr = gfn_to_hva(kvm, gpa_to_gfn(addr));
+ if (kvm_is_error_hva(vmaddr)) {
+ ret = -EFAULT;
+ } else {
+ ret = fixup_user_fault(kvm->mm, vmaddr, FAULT_FLAG_WRITE, &unlocked);
+ if (!ret)
+ ret = __gmap_link(kvm->arch.gmap, addr, vmaddr);
+ }
+ mmap_read_unlock(kvm->mm);
+ if (!ret)
+ return -EAGAIN;
+ return ret;
+ }
+
if (ret && ret != -EAGAIN)
KVM_UV_EVENT(kvm, 3, "PROTVIRT VM UNPACK: failed addr %llx with rc %x rrc %x",
uvcb.gaddr, *rc, *rrc);
@@ -660,6 +679,8 @@ int kvm_s390_pv_unpack(struct kvm *kvm, unsigned long addr, unsigned long size,
KVM_UV_EVENT(kvm, 3, "PROTVIRT VM UNPACK: start addr %lx size %lx",
addr, size);
+ guard(srcu)(&kvm->srcu);
+
while (offset < size) {
ret = unpack_one(kvm, addr, tweak, offset, rc, rrc);
if (ret == -EAGAIN) {
diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c
index 150b9387860a..a78df3a4f353 100644
--- a/arch/s390/kvm/vsie.c
+++ b/arch/s390/kvm/vsie.c
@@ -13,6 +13,7 @@
#include <linux/bitmap.h>
#include <linux/sched/signal.h>
#include <linux/io.h>
+#include <linux/mman.h>
#include <asm/gmap.h>
#include <asm/mmu_context.h>
@@ -22,6 +23,11 @@
#include <asm/facility.h>
#include "kvm-s390.h"
#include "gaccess.h"
+#include "gmap.h"
+
+enum vsie_page_flags {
+ VSIE_PAGE_IN_USE = 0,
+};
struct vsie_page {
struct kvm_s390_sie_block scb_s; /* 0x0000 */
@@ -46,7 +52,18 @@ struct vsie_page {
gpa_t gvrd_gpa; /* 0x0240 */
gpa_t riccbd_gpa; /* 0x0248 */
gpa_t sdnx_gpa; /* 0x0250 */
- __u8 reserved[0x0700 - 0x0258]; /* 0x0258 */
+ /*
+ * guest address of the original SCB. Remains set for free vsie
+ * pages, so we can properly look them up in our addr_to_page
+ * radix tree.
+ */
+ gpa_t scb_gpa; /* 0x0258 */
+ /*
+ * Flags: must be set/cleared atomically after the vsie page can be
+ * looked up by other CPUs.
+ */
+ unsigned long flags; /* 0x0260 */
+ __u8 reserved[0x0700 - 0x0268]; /* 0x0268 */
struct kvm_s390_crypto_cb crycb; /* 0x0700 */
__u8 fac[S390_ARCH_FAC_LIST_SIZE_BYTE]; /* 0x0800 */
};
@@ -584,7 +601,6 @@ void kvm_s390_vsie_gmap_notifier(struct gmap *gmap, unsigned long start,
struct kvm *kvm = gmap->private;
struct vsie_page *cur;
unsigned long prefix;
- struct page *page;
int i;
if (!gmap_is_shadow(gmap))
@@ -594,10 +610,9 @@ void kvm_s390_vsie_gmap_notifier(struct gmap *gmap, unsigned long start,
* therefore we can safely reference them all the time.
*/
for (i = 0; i < kvm->arch.vsie.page_count; i++) {
- page = READ_ONCE(kvm->arch.vsie.pages[i]);
- if (!page)
+ cur = READ_ONCE(kvm->arch.vsie.pages[i]);
+ if (!cur)
continue;
- cur = page_to_virt(page);
if (READ_ONCE(cur->gmap) != gmap)
continue;
prefix = cur->scb_s.prefix << GUEST_PREFIX_SHIFT;
@@ -854,7 +869,7 @@ unpin:
static void unpin_scb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page,
gpa_t gpa)
{
- hpa_t hpa = (hpa_t) vsie_page->scb_o;
+ hpa_t hpa = virt_to_phys(vsie_page->scb_o);
if (hpa)
unpin_guest_page(vcpu->kvm, gpa, hpa);
@@ -1345,6 +1360,20 @@ static int vsie_run(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
return rc;
}
+/* Try getting a given vsie page, returning "true" on success. */
+static inline bool try_get_vsie_page(struct vsie_page *vsie_page)
+{
+ if (test_bit(VSIE_PAGE_IN_USE, &vsie_page->flags))
+ return false;
+ return !test_and_set_bit(VSIE_PAGE_IN_USE, &vsie_page->flags);
+}
+
+/* Put a vsie page acquired through get_vsie_page / try_get_vsie_page. */
+static void put_vsie_page(struct vsie_page *vsie_page)
+{
+ clear_bit(VSIE_PAGE_IN_USE, &vsie_page->flags);
+}
+
/*
* Get or create a vsie page for a scb address.
*
@@ -1355,16 +1384,21 @@ static int vsie_run(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
static struct vsie_page *get_vsie_page(struct kvm *kvm, unsigned long addr)
{
struct vsie_page *vsie_page;
- struct page *page;
int nr_vcpus;
rcu_read_lock();
- page = radix_tree_lookup(&kvm->arch.vsie.addr_to_page, addr >> 9);
+ vsie_page = radix_tree_lookup(&kvm->arch.vsie.addr_to_page, addr >> 9);
rcu_read_unlock();
- if (page) {
- if (page_ref_inc_return(page) == 2)
- return page_to_virt(page);
- page_ref_dec(page);
+ if (vsie_page) {
+ if (try_get_vsie_page(vsie_page)) {
+ if (vsie_page->scb_gpa == addr)
+ return vsie_page;
+ /*
+ * We raced with someone reusing + putting this vsie
+ * page before we grabbed it.
+ */
+ put_vsie_page(vsie_page);
+ }
}
/*
@@ -1375,36 +1409,40 @@ static struct vsie_page *get_vsie_page(struct kvm *kvm, unsigned long addr)
mutex_lock(&kvm->arch.vsie.mutex);
if (kvm->arch.vsie.page_count < nr_vcpus) {
- page = alloc_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO | GFP_DMA);
- if (!page) {
+ vsie_page = (void *)__get_free_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO | GFP_DMA);
+ if (!vsie_page) {
mutex_unlock(&kvm->arch.vsie.mutex);
return ERR_PTR(-ENOMEM);
}
- page_ref_inc(page);
- kvm->arch.vsie.pages[kvm->arch.vsie.page_count] = page;
+ __set_bit(VSIE_PAGE_IN_USE, &vsie_page->flags);
+ kvm->arch.vsie.pages[kvm->arch.vsie.page_count] = vsie_page;
kvm->arch.vsie.page_count++;
} else {
/* reuse an existing entry that belongs to nobody */
while (true) {
- page = kvm->arch.vsie.pages[kvm->arch.vsie.next];
- if (page_ref_inc_return(page) == 2)
+ vsie_page = kvm->arch.vsie.pages[kvm->arch.vsie.next];
+ if (try_get_vsie_page(vsie_page))
break;
- page_ref_dec(page);
kvm->arch.vsie.next++;
kvm->arch.vsie.next %= nr_vcpus;
}
- radix_tree_delete(&kvm->arch.vsie.addr_to_page, page->index >> 9);
+ if (vsie_page->scb_gpa != ULONG_MAX)
+ radix_tree_delete(&kvm->arch.vsie.addr_to_page,
+ vsie_page->scb_gpa >> 9);
}
- page->index = addr;
- /* double use of the same address */
- if (radix_tree_insert(&kvm->arch.vsie.addr_to_page, addr >> 9, page)) {
- page_ref_dec(page);
+ /* Mark it as invalid until it resides in the tree. */
+ vsie_page->scb_gpa = ULONG_MAX;
+
+ /* Double use of the same address or allocation failure. */
+ if (radix_tree_insert(&kvm->arch.vsie.addr_to_page, addr >> 9,
+ vsie_page)) {
+ put_vsie_page(vsie_page);
mutex_unlock(&kvm->arch.vsie.mutex);
return NULL;
}
+ vsie_page->scb_gpa = addr;
mutex_unlock(&kvm->arch.vsie.mutex);
- vsie_page = page_to_virt(page);
memset(&vsie_page->scb_s, 0, sizeof(struct kvm_s390_sie_block));
release_gmap_shadow(vsie_page);
vsie_page->fault_addr = 0;
@@ -1412,14 +1450,6 @@ static struct vsie_page *get_vsie_page(struct kvm *kvm, unsigned long addr)
return vsie_page;
}
-/* put a vsie page acquired via get_vsie_page */
-static void put_vsie_page(struct kvm *kvm, struct vsie_page *vsie_page)
-{
- struct page *page = pfn_to_page(__pa(vsie_page) >> PAGE_SHIFT);
-
- page_ref_dec(page);
-}
-
int kvm_s390_handle_vsie(struct kvm_vcpu *vcpu)
{
struct vsie_page *vsie_page;
@@ -1470,7 +1500,7 @@ out_unshadow:
out_unpin_scb:
unpin_scb(vcpu, vsie_page, scb_addr);
out_put:
- put_vsie_page(vcpu->kvm, vsie_page);
+ put_vsie_page(vsie_page);
return rc < 0 ? rc : 0;
}
@@ -1486,18 +1516,18 @@ void kvm_s390_vsie_init(struct kvm *kvm)
void kvm_s390_vsie_destroy(struct kvm *kvm)
{
struct vsie_page *vsie_page;
- struct page *page;
int i;
mutex_lock(&kvm->arch.vsie.mutex);
for (i = 0; i < kvm->arch.vsie.page_count; i++) {
- page = kvm->arch.vsie.pages[i];
+ vsie_page = kvm->arch.vsie.pages[i];
kvm->arch.vsie.pages[i] = NULL;
- vsie_page = page_to_virt(page);
release_gmap_shadow(vsie_page);
/* free the radix tree entry */
- radix_tree_delete(&kvm->arch.vsie.addr_to_page, page->index >> 9);
- __free_page(page);
+ if (vsie_page->scb_gpa != ULONG_MAX)
+ radix_tree_delete(&kvm->arch.vsie.addr_to_page,
+ vsie_page->scb_gpa >> 9);
+ free_page((unsigned long)vsie_page);
}
kvm->arch.vsie.page_count = 0;
mutex_unlock(&kvm->arch.vsie.mutex);
diff --git a/arch/s390/lib/Makefile b/arch/s390/lib/Makefile
index f43f897d3fc0..14bbfe50033c 100644
--- a/arch/s390/lib/Makefile
+++ b/arch/s390/lib/Makefile
@@ -24,3 +24,6 @@ obj-$(CONFIG_S390_MODULES_SANITY_TEST_HELPERS) += test_modules_helpers.o
lib-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o
obj-$(CONFIG_EXPOLINE_EXTERN) += expoline.o
+
+obj-$(CONFIG_CRC32_ARCH) += crc32-s390.o
+crc32-s390-y := crc32-glue.o crc32le-vx.o crc32be-vx.o
diff --git a/arch/s390/lib/crc32-glue.c b/arch/s390/lib/crc32-glue.c
new file mode 100644
index 000000000000..137080e61f90
--- /dev/null
+++ b/arch/s390/lib/crc32-glue.c
@@ -0,0 +1,92 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * CRC-32 implemented with the z/Architecture Vector Extension Facility.
+ *
+ * Copyright IBM Corp. 2015
+ * Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
+ */
+#define KMSG_COMPONENT "crc32-vx"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
+#include <linux/module.h>
+#include <linux/cpufeature.h>
+#include <linux/crc32.h>
+#include <asm/fpu.h>
+#include "crc32-vx.h"
+
+#define VX_MIN_LEN 64
+#define VX_ALIGNMENT 16L
+#define VX_ALIGN_MASK (VX_ALIGNMENT - 1)
+
+static DEFINE_STATIC_KEY_FALSE(have_vxrs);
+
+/*
+ * DEFINE_CRC32_VX() - Define a CRC-32 function using the vector extension
+ *
+ * Creates a function to perform a particular CRC-32 computation. Depending
+ * on the message buffer, the hardware-accelerated or software implementation
+ * is used. Note that the message buffer is aligned to improve fetch
+ * operations of VECTOR LOAD MULTIPLE instructions.
+ */
+#define DEFINE_CRC32_VX(___fname, ___crc32_vx, ___crc32_sw) \
+ u32 ___fname(u32 crc, const u8 *data, size_t datalen) \
+ { \
+ unsigned long prealign, aligned, remaining; \
+ DECLARE_KERNEL_FPU_ONSTACK16(vxstate); \
+ \
+ if (datalen < VX_MIN_LEN + VX_ALIGN_MASK || \
+ !static_branch_likely(&have_vxrs)) \
+ return ___crc32_sw(crc, data, datalen); \
+ \
+ if ((unsigned long)data & VX_ALIGN_MASK) { \
+ prealign = VX_ALIGNMENT - \
+ ((unsigned long)data & VX_ALIGN_MASK); \
+ datalen -= prealign; \
+ crc = ___crc32_sw(crc, data, prealign); \
+ data = (void *)((unsigned long)data + prealign); \
+ } \
+ \
+ aligned = datalen & ~VX_ALIGN_MASK; \
+ remaining = datalen & VX_ALIGN_MASK; \
+ \
+ kernel_fpu_begin(&vxstate, KERNEL_VXR_LOW); \
+ crc = ___crc32_vx(crc, data, aligned); \
+ kernel_fpu_end(&vxstate, KERNEL_VXR_LOW); \
+ \
+ if (remaining) \
+ crc = ___crc32_sw(crc, data + aligned, remaining); \
+ \
+ return crc; \
+ } \
+ EXPORT_SYMBOL(___fname);
+
+DEFINE_CRC32_VX(crc32_le_arch, crc32_le_vgfm_16, crc32_le_base)
+DEFINE_CRC32_VX(crc32_be_arch, crc32_be_vgfm_16, crc32_be_base)
+DEFINE_CRC32_VX(crc32c_le_arch, crc32c_le_vgfm_16, crc32c_le_base)
+
+static int __init crc32_s390_init(void)
+{
+ if (cpu_have_feature(S390_CPU_FEATURE_VXRS))
+ static_branch_enable(&have_vxrs);
+ return 0;
+}
+arch_initcall(crc32_s390_init);
+
+static void __exit crc32_s390_exit(void)
+{
+}
+module_exit(crc32_s390_exit);
+
+u32 crc32_optimizations(void)
+{
+ if (static_key_enabled(&have_vxrs))
+ return CRC32_LE_OPTIMIZATION |
+ CRC32_BE_OPTIMIZATION |
+ CRC32C_OPTIMIZATION;
+ return 0;
+}
+EXPORT_SYMBOL(crc32_optimizations);
+
+MODULE_AUTHOR("Hendrik Brueckner <brueckner@linux.vnet.ibm.com>");
+MODULE_DESCRIPTION("CRC-32 algorithms using z/Architecture Vector Extension Facility");
+MODULE_LICENSE("GPL");
diff --git a/arch/s390/crypto/crc32-vx.h b/arch/s390/lib/crc32-vx.h
index 652c96e1a822..652c96e1a822 100644
--- a/arch/s390/crypto/crc32-vx.h
+++ b/arch/s390/lib/crc32-vx.h
diff --git a/arch/s390/crypto/crc32be-vx.c b/arch/s390/lib/crc32be-vx.c
index fed7c9c70d05..fed7c9c70d05 100644
--- a/arch/s390/crypto/crc32be-vx.c
+++ b/arch/s390/lib/crc32be-vx.c
diff --git a/arch/s390/crypto/crc32le-vx.c b/arch/s390/lib/crc32le-vx.c
index 2f629f394df7..2f629f394df7 100644
--- a/arch/s390/crypto/crc32le-vx.c
+++ b/arch/s390/lib/crc32le-vx.c
diff --git a/arch/s390/lib/mem.S b/arch/s390/lib/mem.S
index 08f60a42b9a6..d026debf250c 100644
--- a/arch/s390/lib/mem.S
+++ b/arch/s390/lib/mem.S
@@ -34,8 +34,7 @@ SYM_FUNC_START(__memmove)
la %r3,256(%r3)
brctg %r0,.Lmemmove_forward_loop
.Lmemmove_forward_remainder:
- larl %r5,.Lmemmove_mvc
- ex %r4,0(%r5)
+ exrl %r4,.Lmemmove_mvc
.Lmemmove_exit:
BR_EX %r14
.Lmemmove_reverse:
@@ -83,8 +82,7 @@ SYM_FUNC_START(__memset)
la %r1,256(%r1)
brctg %r3,.Lmemset_clear_loop
.Lmemset_clear_remainder:
- larl %r3,.Lmemset_xc
- ex %r4,0(%r3)
+ exrl %r4,.Lmemset_xc
.Lmemset_exit:
BR_EX %r14
.Lmemset_fill:
@@ -102,8 +100,7 @@ SYM_FUNC_START(__memset)
brctg %r5,.Lmemset_fill_loop
.Lmemset_fill_remainder:
stc %r3,0(%r1)
- larl %r5,.Lmemset_mvc
- ex %r4,0(%r5)
+ exrl %r4,.Lmemset_mvc
BR_EX %r14
.Lmemset_fill_exit:
stc %r3,0(%r1)
@@ -132,8 +129,7 @@ SYM_FUNC_START(__memcpy)
lgr %r1,%r2
jnz .Lmemcpy_loop
.Lmemcpy_remainder:
- larl %r5,.Lmemcpy_mvc
- ex %r4,0(%r5)
+ exrl %r4,.Lmemcpy_mvc
.Lmemcpy_exit:
BR_EX %r14
.Lmemcpy_loop:
@@ -175,8 +171,7 @@ SYM_FUNC_START(__memset\bits)
brctg %r5,.L__memset_loop\bits
.L__memset_remainder\bits:
\insn %r3,0(%r1)
- larl %r5,.L__memset_mvc\bits
- ex %r4,0(%r5)
+ exrl %r4,.L__memset_mvc\bits
BR_EX %r14
.L__memset_store\bits:
\insn %r3,0(%r2)
diff --git a/arch/s390/lib/uaccess.c b/arch/s390/lib/uaccess.c
index c7c269d5c491..f977b7c37efc 100644
--- a/arch/s390/lib/uaccess.c
+++ b/arch/s390/lib/uaccess.c
@@ -31,51 +31,6 @@ void debug_user_asce(int exit)
}
#endif /*CONFIG_DEBUG_ENTRY */
-static unsigned long raw_copy_from_user_key(void *to, const void __user *from,
- unsigned long size, unsigned long key)
-{
- unsigned long rem;
- union oac spec = {
- .oac2.key = key,
- .oac2.as = PSW_BITS_AS_SECONDARY,
- .oac2.k = 1,
- .oac2.a = 1,
- };
-
- asm volatile(
- " lr 0,%[spec]\n"
- "0: mvcos 0(%[to]),0(%[from]),%[size]\n"
- "1: jz 5f\n"
- " algr %[size],%[val]\n"
- " slgr %[from],%[val]\n"
- " slgr %[to],%[val]\n"
- " j 0b\n"
- "2: la %[rem],4095(%[from])\n" /* rem = from + 4095 */
- " nr %[rem],%[val]\n" /* rem = (from + 4095) & -4096 */
- " slgr %[rem],%[from]\n"
- " clgr %[size],%[rem]\n" /* copy crosses next page boundary? */
- " jnh 6f\n"
- "3: mvcos 0(%[to]),0(%[from]),%[rem]\n"
- "4: slgr %[size],%[rem]\n"
- " j 6f\n"
- "5: slgr %[size],%[size]\n"
- "6:\n"
- EX_TABLE(0b, 2b)
- EX_TABLE(1b, 2b)
- EX_TABLE(3b, 6b)
- EX_TABLE(4b, 6b)
- : [size] "+&a" (size), [from] "+&a" (from), [to] "+&a" (to), [rem] "=&a" (rem)
- : [val] "a" (-4096UL), [spec] "d" (spec.val)
- : "cc", "memory", "0");
- return size;
-}
-
-unsigned long raw_copy_from_user(void *to, const void __user *from, unsigned long n)
-{
- return raw_copy_from_user_key(to, from, n, 0);
-}
-EXPORT_SYMBOL(raw_copy_from_user);
-
unsigned long _copy_from_user_key(void *to, const void __user *from,
unsigned long n, unsigned long key)
{
@@ -93,51 +48,6 @@ unsigned long _copy_from_user_key(void *to, const void __user *from,
}
EXPORT_SYMBOL(_copy_from_user_key);
-static unsigned long raw_copy_to_user_key(void __user *to, const void *from,
- unsigned long size, unsigned long key)
-{
- unsigned long rem;
- union oac spec = {
- .oac1.key = key,
- .oac1.as = PSW_BITS_AS_SECONDARY,
- .oac1.k = 1,
- .oac1.a = 1,
- };
-
- asm volatile(
- " lr 0,%[spec]\n"
- "0: mvcos 0(%[to]),0(%[from]),%[size]\n"
- "1: jz 5f\n"
- " algr %[size],%[val]\n"
- " slgr %[to],%[val]\n"
- " slgr %[from],%[val]\n"
- " j 0b\n"
- "2: la %[rem],4095(%[to])\n" /* rem = to + 4095 */
- " nr %[rem],%[val]\n" /* rem = (to + 4095) & -4096 */
- " slgr %[rem],%[to]\n"
- " clgr %[size],%[rem]\n" /* copy crosses next page boundary? */
- " jnh 6f\n"
- "3: mvcos 0(%[to]),0(%[from]),%[rem]\n"
- "4: slgr %[size],%[rem]\n"
- " j 6f\n"
- "5: slgr %[size],%[size]\n"
- "6:\n"
- EX_TABLE(0b, 2b)
- EX_TABLE(1b, 2b)
- EX_TABLE(3b, 6b)
- EX_TABLE(4b, 6b)
- : [size] "+&a" (size), [to] "+&a" (to), [from] "+&a" (from), [rem] "=&a" (rem)
- : [val] "a" (-4096UL), [spec] "d" (spec.val)
- : "cc", "memory", "0");
- return size;
-}
-
-unsigned long raw_copy_to_user(void __user *to, const void *from, unsigned long n)
-{
- return raw_copy_to_user_key(to, from, n, 0);
-}
-EXPORT_SYMBOL(raw_copy_to_user);
-
unsigned long _copy_to_user_key(void __user *to, const void *from,
unsigned long n, unsigned long key)
{
diff --git a/arch/s390/lib/xor.c b/arch/s390/lib/xor.c
index fb924a8041dc..ce7bcf7c0032 100644
--- a/arch/s390/lib/xor.c
+++ b/arch/s390/lib/xor.c
@@ -15,7 +15,6 @@ static void xor_xc_2(unsigned long bytes, unsigned long * __restrict p1,
const unsigned long * __restrict p2)
{
asm volatile(
- " larl 1,2f\n"
" aghi %0,-1\n"
" jm 3f\n"
" srlg 0,%0,8\n"
@@ -25,12 +24,12 @@ static void xor_xc_2(unsigned long bytes, unsigned long * __restrict p1,
" la %1,256(%1)\n"
" la %2,256(%2)\n"
" brctg 0,0b\n"
- "1: ex %0,0(1)\n"
+ "1: exrl %0,2f\n"
" j 3f\n"
"2: xc 0(1,%1),0(%2)\n"
"3:\n"
: : "d" (bytes), "a" (p1), "a" (p2)
- : "0", "1", "cc", "memory");
+ : "0", "cc", "memory");
}
static void xor_xc_3(unsigned long bytes, unsigned long * __restrict p1,
@@ -38,9 +37,8 @@ static void xor_xc_3(unsigned long bytes, unsigned long * __restrict p1,
const unsigned long * __restrict p3)
{
asm volatile(
- " larl 1,2f\n"
" aghi %0,-1\n"
- " jm 3f\n"
+ " jm 4f\n"
" srlg 0,%0,8\n"
" ltgr 0,0\n"
" jz 1f\n"
@@ -50,14 +48,14 @@ static void xor_xc_3(unsigned long bytes, unsigned long * __restrict p1,
" la %2,256(%2)\n"
" la %3,256(%3)\n"
" brctg 0,0b\n"
- "1: ex %0,0(1)\n"
- " ex %0,6(1)\n"
- " j 3f\n"
+ "1: exrl %0,2f\n"
+ " exrl %0,3f\n"
+ " j 4f\n"
"2: xc 0(1,%1),0(%2)\n"
- " xc 0(1,%1),0(%3)\n"
- "3:\n"
+ "3: xc 0(1,%1),0(%3)\n"
+ "4:\n"
: "+d" (bytes), "+a" (p1), "+a" (p2), "+a" (p3)
- : : "0", "1", "cc", "memory");
+ : : "0", "cc", "memory");
}
static void xor_xc_4(unsigned long bytes, unsigned long * __restrict p1,
@@ -66,9 +64,8 @@ static void xor_xc_4(unsigned long bytes, unsigned long * __restrict p1,
const unsigned long * __restrict p4)
{
asm volatile(
- " larl 1,2f\n"
" aghi %0,-1\n"
- " jm 3f\n"
+ " jm 5f\n"
" srlg 0,%0,8\n"
" ltgr 0,0\n"
" jz 1f\n"
@@ -80,16 +77,16 @@ static void xor_xc_4(unsigned long bytes, unsigned long * __restrict p1,
" la %3,256(%3)\n"
" la %4,256(%4)\n"
" brctg 0,0b\n"
- "1: ex %0,0(1)\n"
- " ex %0,6(1)\n"
- " ex %0,12(1)\n"
- " j 3f\n"
+ "1: exrl %0,2f\n"
+ " exrl %0,3f\n"
+ " exrl %0,4f\n"
+ " j 5f\n"
"2: xc 0(1,%1),0(%2)\n"
- " xc 0(1,%1),0(%3)\n"
- " xc 0(1,%1),0(%4)\n"
- "3:\n"
+ "3: xc 0(1,%1),0(%3)\n"
+ "4: xc 0(1,%1),0(%4)\n"
+ "5:\n"
: "+d" (bytes), "+a" (p1), "+a" (p2), "+a" (p3), "+a" (p4)
- : : "0", "1", "cc", "memory");
+ : : "0", "cc", "memory");
}
static void xor_xc_5(unsigned long bytes, unsigned long * __restrict p1,
@@ -101,7 +98,7 @@ static void xor_xc_5(unsigned long bytes, unsigned long * __restrict p1,
asm volatile(
" larl 1,2f\n"
" aghi %0,-1\n"
- " jm 3f\n"
+ " jm 6f\n"
" srlg 0,%0,8\n"
" ltgr 0,0\n"
" jz 1f\n"
@@ -115,19 +112,19 @@ static void xor_xc_5(unsigned long bytes, unsigned long * __restrict p1,
" la %4,256(%4)\n"
" la %5,256(%5)\n"
" brctg 0,0b\n"
- "1: ex %0,0(1)\n"
- " ex %0,6(1)\n"
- " ex %0,12(1)\n"
- " ex %0,18(1)\n"
- " j 3f\n"
+ "1: exrl %0,2f\n"
+ " exrl %0,3f\n"
+ " exrl %0,4f\n"
+ " exrl %0,5f\n"
+ " j 6f\n"
"2: xc 0(1,%1),0(%2)\n"
- " xc 0(1,%1),0(%3)\n"
- " xc 0(1,%1),0(%4)\n"
- " xc 0(1,%1),0(%5)\n"
- "3:\n"
+ "3: xc 0(1,%1),0(%3)\n"
+ "4: xc 0(1,%1),0(%4)\n"
+ "5: xc 0(1,%1),0(%5)\n"
+ "6:\n"
: "+d" (bytes), "+a" (p1), "+a" (p2), "+a" (p3), "+a" (p4),
"+a" (p5)
- : : "0", "1", "cc", "memory");
+ : : "0", "cc", "memory");
}
struct xor_block_template xor_block_xc = {
diff --git a/arch/s390/mm/cmm.c b/arch/s390/mm/cmm.c
index d01724a715d0..39f44b6256e0 100644
--- a/arch/s390/mm/cmm.c
+++ b/arch/s390/mm/cmm.c
@@ -204,7 +204,7 @@ static void cmm_set_timer(void)
del_timer(&cmm_timer);
return;
}
- mod_timer(&cmm_timer, jiffies + msecs_to_jiffies(cmm_timeout_seconds * MSEC_PER_SEC));
+ mod_timer(&cmm_timer, jiffies + secs_to_jiffies(cmm_timeout_seconds));
}
static void cmm_timer_fn(struct timer_list *unused)
@@ -332,7 +332,7 @@ static int cmm_timeout_handler(const struct ctl_table *ctl, int write,
return 0;
}
-static struct ctl_table cmm_table[] = {
+static const struct ctl_table cmm_table[] = {
{
.procname = "cmm_pages",
.mode = 0644,
diff --git a/arch/s390/mm/extable.c b/arch/s390/mm/extable.c
index 0a0738a473af..a046be1715cf 100644
--- a/arch/s390/mm/extable.c
+++ b/arch/s390/mm/extable.c
@@ -7,6 +7,7 @@
#include <linux/panic.h>
#include <asm/asm-extable.h>
#include <asm/extable.h>
+#include <asm/fpu.h>
const struct exception_table_entry *s390_search_extables(unsigned long addr)
{
@@ -26,7 +27,7 @@ static bool ex_handler_fixup(const struct exception_table_entry *ex, struct pt_r
return true;
}
-static bool ex_handler_ua_store(const struct exception_table_entry *ex, struct pt_regs *regs)
+static bool ex_handler_ua_fault(const struct exception_table_entry *ex, struct pt_regs *regs)
{
unsigned int reg_err = FIELD_GET(EX_DATA_REG_ERR, ex->data);
@@ -35,18 +36,6 @@ static bool ex_handler_ua_store(const struct exception_table_entry *ex, struct p
return true;
}
-static bool ex_handler_ua_load_mem(const struct exception_table_entry *ex, struct pt_regs *regs)
-{
- unsigned int reg_addr = FIELD_GET(EX_DATA_REG_ADDR, ex->data);
- unsigned int reg_err = FIELD_GET(EX_DATA_REG_ERR, ex->data);
- size_t len = FIELD_GET(EX_DATA_LEN, ex->data);
-
- regs->gprs[reg_err] = -EFAULT;
- memset((void *)regs->gprs[reg_addr], 0, len);
- regs->psw.addr = extable_fixup(ex);
- return true;
-}
-
static bool ex_handler_ua_load_reg(const struct exception_table_entry *ex,
bool pair, struct pt_regs *regs)
{
@@ -77,6 +66,13 @@ static bool ex_handler_zeropad(const struct exception_table_entry *ex, struct pt
return true;
}
+static bool ex_handler_fpc(const struct exception_table_entry *ex, struct pt_regs *regs)
+{
+ fpu_sfpc(0);
+ regs->psw.addr = extable_fixup(ex);
+ return true;
+}
+
bool fixup_exception(struct pt_regs *regs)
{
const struct exception_table_entry *ex;
@@ -89,16 +85,16 @@ bool fixup_exception(struct pt_regs *regs)
return ex_handler_fixup(ex, regs);
case EX_TYPE_BPF:
return ex_handler_bpf(ex, regs);
- case EX_TYPE_UA_STORE:
- return ex_handler_ua_store(ex, regs);
- case EX_TYPE_UA_LOAD_MEM:
- return ex_handler_ua_load_mem(ex, regs);
+ case EX_TYPE_UA_FAULT:
+ return ex_handler_ua_fault(ex, regs);
case EX_TYPE_UA_LOAD_REG:
return ex_handler_ua_load_reg(ex, false, regs);
case EX_TYPE_UA_LOAD_REGPAIR:
return ex_handler_ua_load_reg(ex, true, regs);
case EX_TYPE_ZEROPAD:
return ex_handler_zeropad(ex, regs);
+ case EX_TYPE_FPC:
+ return ex_handler_fpc(ex, regs);
}
panic("invalid exception table entry");
}
diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c
index 16b8a36c56de..94d927785800 100644
--- a/arch/s390/mm/gmap.c
+++ b/arch/s390/mm/gmap.c
@@ -24,6 +24,16 @@
#include <asm/page.h>
#include <asm/tlb.h>
+/*
+ * The address is saved in a radix tree directly; NULL would be ambiguous,
+ * since 0 is a valid address, and NULL is returned when nothing was found.
+ * The lower bits are ignored by all users of the macro, so it can be used
+ * to distinguish a valid address 0 from a NULL.
+ */
+#define VALID_GADDR_FLAG 1
+#define IS_GADDR_VALID(gaddr) ((gaddr) & VALID_GADDR_FLAG)
+#define MAKE_VALID_GADDR(gaddr) (((gaddr) & HPAGE_MASK) | VALID_GADDR_FLAG)
+
#define GMAP_SHADOW_FAKE_TABLE 1ULL
static struct page *gmap_alloc_crst(void)
@@ -43,7 +53,7 @@ static struct page *gmap_alloc_crst(void)
*
* Returns a guest address space structure.
*/
-static struct gmap *gmap_alloc(unsigned long limit)
+struct gmap *gmap_alloc(unsigned long limit)
{
struct gmap *gmap;
struct page *page;
@@ -70,9 +80,7 @@ static struct gmap *gmap_alloc(unsigned long limit)
gmap = kzalloc(sizeof(struct gmap), GFP_KERNEL_ACCOUNT);
if (!gmap)
goto out;
- INIT_LIST_HEAD(&gmap->crst_list);
INIT_LIST_HEAD(&gmap->children);
- INIT_LIST_HEAD(&gmap->pt_list);
INIT_RADIX_TREE(&gmap->guest_to_host, GFP_KERNEL_ACCOUNT);
INIT_RADIX_TREE(&gmap->host_to_guest, GFP_ATOMIC | __GFP_ACCOUNT);
INIT_RADIX_TREE(&gmap->host_to_rmap, GFP_ATOMIC | __GFP_ACCOUNT);
@@ -82,8 +90,6 @@ static struct gmap *gmap_alloc(unsigned long limit)
page = gmap_alloc_crst();
if (!page)
goto out_free;
- page->index = 0;
- list_add(&page->lru, &gmap->crst_list);
table = page_to_virt(page);
crst_table_init(table, etype);
gmap->table = table;
@@ -97,6 +103,7 @@ out_free:
out:
return NULL;
}
+EXPORT_SYMBOL_GPL(gmap_alloc);
/**
* gmap_create - create a guest address space
@@ -185,32 +192,46 @@ static void gmap_rmap_radix_tree_free(struct radix_tree_root *root)
} while (nr > 0);
}
+static void gmap_free_crst(unsigned long *table, bool free_ptes)
+{
+ bool is_segment = (table[0] & _SEGMENT_ENTRY_TYPE_MASK) == 0;
+ int i;
+
+ if (is_segment) {
+ if (!free_ptes)
+ goto out;
+ for (i = 0; i < _CRST_ENTRIES; i++)
+ if (!(table[i] & _SEGMENT_ENTRY_INVALID))
+ page_table_free_pgste(page_ptdesc(phys_to_page(table[i])));
+ } else {
+ for (i = 0; i < _CRST_ENTRIES; i++)
+ if (!(table[i] & _REGION_ENTRY_INVALID))
+ gmap_free_crst(__va(table[i] & PAGE_MASK), free_ptes);
+ }
+
+out:
+ free_pages((unsigned long)table, CRST_ALLOC_ORDER);
+}
+
/**
* gmap_free - free a guest address space
* @gmap: pointer to the guest address space structure
*
* No locks required. There are no references to this gmap anymore.
*/
-static void gmap_free(struct gmap *gmap)
+void gmap_free(struct gmap *gmap)
{
- struct page *page, *next;
-
/* Flush tlb of all gmaps (if not already done for shadows) */
if (!(gmap_is_shadow(gmap) && gmap->removed))
gmap_flush_tlb(gmap);
/* Free all segment & region tables. */
- list_for_each_entry_safe(page, next, &gmap->crst_list, lru)
- __free_pages(page, CRST_ALLOC_ORDER);
+ gmap_free_crst(gmap->table, gmap_is_shadow(gmap));
+
gmap_radix_tree_free(&gmap->guest_to_host);
gmap_radix_tree_free(&gmap->host_to_guest);
/* Free additional data for a shadow gmap */
if (gmap_is_shadow(gmap)) {
- struct ptdesc *ptdesc, *n;
-
- /* Free all page tables. */
- list_for_each_entry_safe(ptdesc, n, &gmap->pt_list, pt_list)
- page_table_free_pgste(ptdesc);
gmap_rmap_radix_tree_free(&gmap->host_to_rmap);
/* Release reference to the parent */
gmap_put(gmap->parent);
@@ -218,6 +239,7 @@ static void gmap_free(struct gmap *gmap)
kfree(gmap);
}
+EXPORT_SYMBOL_GPL(gmap_free);
/**
* gmap_get - increase reference counter for guest address space
@@ -298,10 +320,8 @@ static int gmap_alloc_table(struct gmap *gmap, unsigned long *table,
crst_table_init(new, init);
spin_lock(&gmap->guest_table_lock);
if (*table & _REGION_ENTRY_INVALID) {
- list_add(&page->lru, &gmap->crst_list);
*table = __pa(new) | _REGION_ENTRY_LENGTH |
(*table & _REGION_ENTRY_TYPE_MASK);
- page->index = gaddr;
page = NULL;
}
spin_unlock(&gmap->guest_table_lock);
@@ -310,21 +330,23 @@ static int gmap_alloc_table(struct gmap *gmap, unsigned long *table,
return 0;
}
-/**
- * __gmap_segment_gaddr - find virtual address from segment pointer
- * @entry: pointer to a segment table entry in the guest address space
- *
- * Returns the virtual address in the guest address space for the segment
- */
-static unsigned long __gmap_segment_gaddr(unsigned long *entry)
+static unsigned long host_to_guest_lookup(struct gmap *gmap, unsigned long vmaddr)
{
- struct page *page;
- unsigned long offset;
+ return (unsigned long)radix_tree_lookup(&gmap->host_to_guest, vmaddr >> PMD_SHIFT);
+}
- offset = (unsigned long) entry / sizeof(unsigned long);
- offset = (offset & (PTRS_PER_PMD - 1)) * PMD_SIZE;
- page = pmd_pgtable_page((pmd_t *) entry);
- return page->index + offset;
+static unsigned long host_to_guest_delete(struct gmap *gmap, unsigned long vmaddr)
+{
+ return (unsigned long)radix_tree_delete(&gmap->host_to_guest, vmaddr >> PMD_SHIFT);
+}
+
+static pmd_t *host_to_guest_pmd_delete(struct gmap *gmap, unsigned long vmaddr,
+ unsigned long *gaddr)
+{
+ *gaddr = host_to_guest_delete(gmap, vmaddr);
+ if (IS_GADDR_VALID(*gaddr))
+ return (pmd_t *)gmap_table_walk(gmap, *gaddr, 1);
+ return NULL;
}
/**
@@ -336,16 +358,19 @@ static unsigned long __gmap_segment_gaddr(unsigned long *entry)
*/
static int __gmap_unlink_by_vmaddr(struct gmap *gmap, unsigned long vmaddr)
{
- unsigned long *entry;
+ unsigned long gaddr;
int flush = 0;
+ pmd_t *pmdp;
BUG_ON(gmap_is_shadow(gmap));
spin_lock(&gmap->guest_table_lock);
- entry = radix_tree_delete(&gmap->host_to_guest, vmaddr >> PMD_SHIFT);
- if (entry) {
- flush = (*entry != _SEGMENT_ENTRY_EMPTY);
- *entry = _SEGMENT_ENTRY_EMPTY;
+
+ pmdp = host_to_guest_pmd_delete(gmap, vmaddr, &gaddr);
+ if (pmdp) {
+ flush = (pmd_val(*pmdp) != _SEGMENT_ENTRY_EMPTY);
+ *pmdp = __pmd(_SEGMENT_ENTRY_EMPTY);
}
+
spin_unlock(&gmap->guest_table_lock);
return flush;
}
@@ -464,26 +489,6 @@ unsigned long __gmap_translate(struct gmap *gmap, unsigned long gaddr)
EXPORT_SYMBOL_GPL(__gmap_translate);
/**
- * gmap_translate - translate a guest address to a user space address
- * @gmap: pointer to guest mapping meta data structure
- * @gaddr: guest address
- *
- * Returns user space address which corresponds to the guest address or
- * -EFAULT if no such mapping exists.
- * This function does not establish potentially missing page table entries.
- */
-unsigned long gmap_translate(struct gmap *gmap, unsigned long gaddr)
-{
- unsigned long rc;
-
- mmap_read_lock(gmap->mm);
- rc = __gmap_translate(gmap, gaddr);
- mmap_read_unlock(gmap->mm);
- return rc;
-}
-EXPORT_SYMBOL_GPL(gmap_translate);
-
-/**
* gmap_unlink - disconnect a page table from the gmap shadow tables
* @mm: pointer to the parent mm_struct
* @table: pointer to the host page table
@@ -582,7 +587,8 @@ int __gmap_link(struct gmap *gmap, unsigned long gaddr, unsigned long vmaddr)
spin_lock(&gmap->guest_table_lock);
if (*table == _SEGMENT_ENTRY_EMPTY) {
rc = radix_tree_insert(&gmap->host_to_guest,
- vmaddr >> PMD_SHIFT, table);
+ vmaddr >> PMD_SHIFT,
+ (void *)MAKE_VALID_GADDR(gaddr));
if (!rc) {
if (pmd_leaf(*pmd)) {
*table = (pmd_val(*pmd) &
@@ -605,130 +611,7 @@ int __gmap_link(struct gmap *gmap, unsigned long gaddr, unsigned long vmaddr)
radix_tree_preload_end();
return rc;
}
-
-/**
- * fixup_user_fault_nowait - manually resolve a user page fault without waiting
- * @mm: mm_struct of target mm
- * @address: user address
- * @fault_flags:flags to pass down to handle_mm_fault()
- * @unlocked: did we unlock the mmap_lock while retrying
- *
- * This function behaves similarly to fixup_user_fault(), but it guarantees
- * that the fault will be resolved without waiting. The function might drop
- * and re-acquire the mm lock, in which case @unlocked will be set to true.
- *
- * The guarantee is that the fault is handled without waiting, but the
- * function itself might sleep, due to the lock.
- *
- * Context: Needs to be called with mm->mmap_lock held in read mode, and will
- * return with the lock held in read mode; @unlocked will indicate whether
- * the lock has been dropped and re-acquired. This is the same behaviour as
- * fixup_user_fault().
- *
- * Return: 0 on success, -EAGAIN if the fault cannot be resolved without
- * waiting, -EFAULT if the fault cannot be resolved, -ENOMEM if out of
- * memory.
- */
-static int fixup_user_fault_nowait(struct mm_struct *mm, unsigned long address,
- unsigned int fault_flags, bool *unlocked)
-{
- struct vm_area_struct *vma;
- unsigned int test_flags;
- vm_fault_t fault;
- int rc;
-
- fault_flags |= FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_RETRY_NOWAIT;
- test_flags = fault_flags & FAULT_FLAG_WRITE ? VM_WRITE : VM_READ;
-
- vma = find_vma(mm, address);
- if (unlikely(!vma || address < vma->vm_start))
- return -EFAULT;
- if (unlikely(!(vma->vm_flags & test_flags)))
- return -EFAULT;
-
- fault = handle_mm_fault(vma, address, fault_flags, NULL);
- /* the mm lock has been dropped, take it again */
- if (fault & VM_FAULT_COMPLETED) {
- *unlocked = true;
- mmap_read_lock(mm);
- return 0;
- }
- /* the mm lock has not been dropped */
- if (fault & VM_FAULT_ERROR) {
- rc = vm_fault_to_errno(fault, 0);
- BUG_ON(!rc);
- return rc;
- }
- /* the mm lock has not been dropped because of FAULT_FLAG_RETRY_NOWAIT */
- if (fault & VM_FAULT_RETRY)
- return -EAGAIN;
- /* nothing needed to be done and the mm lock has not been dropped */
- return 0;
-}
-
-/**
- * __gmap_fault - resolve a fault on a guest address
- * @gmap: pointer to guest mapping meta data structure
- * @gaddr: guest address
- * @fault_flags: flags to pass down to handle_mm_fault()
- *
- * Context: Needs to be called with mm->mmap_lock held in read mode. Might
- * drop and re-acquire the lock. Will always return with the lock held.
- */
-static int __gmap_fault(struct gmap *gmap, unsigned long gaddr, unsigned int fault_flags)
-{
- unsigned long vmaddr;
- bool unlocked;
- int rc = 0;
-
-retry:
- unlocked = false;
-
- vmaddr = __gmap_translate(gmap, gaddr);
- if (IS_ERR_VALUE(vmaddr))
- return vmaddr;
-
- if (fault_flags & FAULT_FLAG_RETRY_NOWAIT)
- rc = fixup_user_fault_nowait(gmap->mm, vmaddr, fault_flags, &unlocked);
- else
- rc = fixup_user_fault(gmap->mm, vmaddr, fault_flags, &unlocked);
- if (rc)
- return rc;
- /*
- * In the case that fixup_user_fault unlocked the mmap_lock during
- * fault-in, redo __gmap_translate() to avoid racing with a
- * map/unmap_segment.
- * In particular, __gmap_translate(), fixup_user_fault{,_nowait}(),
- * and __gmap_link() must all be called atomically in one go; if the
- * lock had been dropped in between, a retry is needed.
- */
- if (unlocked)
- goto retry;
-
- return __gmap_link(gmap, gaddr, vmaddr);
-}
-
-/**
- * gmap_fault - resolve a fault on a guest address
- * @gmap: pointer to guest mapping meta data structure
- * @gaddr: guest address
- * @fault_flags: flags to pass down to handle_mm_fault()
- *
- * Returns 0 on success, -ENOMEM for out of memory conditions, -EFAULT if the
- * vm address is already mapped to a different guest segment, and -EAGAIN if
- * FAULT_FLAG_RETRY_NOWAIT was specified and the fault could not be processed
- * immediately.
- */
-int gmap_fault(struct gmap *gmap, unsigned long gaddr, unsigned int fault_flags)
-{
- int rc;
-
- mmap_read_lock(gmap->mm);
- rc = __gmap_fault(gmap, gaddr, fault_flags);
- mmap_read_unlock(gmap->mm);
- return rc;
-}
-EXPORT_SYMBOL_GPL(gmap_fault);
+EXPORT_SYMBOL(__gmap_link);
/*
* this function is assumed to be called with mmap_lock held
@@ -853,8 +736,7 @@ static void gmap_call_notifier(struct gmap *gmap, unsigned long start,
*
* Note: Can also be called for shadow gmaps.
*/
-static inline unsigned long *gmap_table_walk(struct gmap *gmap,
- unsigned long gaddr, int level)
+unsigned long *gmap_table_walk(struct gmap *gmap, unsigned long gaddr, int level)
{
const int asce_type = gmap->asce & _ASCE_TYPE_MASK;
unsigned long *table = gmap->table;
@@ -905,6 +787,7 @@ static inline unsigned long *gmap_table_walk(struct gmap *gmap,
}
return table;
}
+EXPORT_SYMBOL(gmap_table_walk);
/**
* gmap_pte_op_walk - walk the gmap page table, get the page table lock
@@ -1101,86 +984,40 @@ static int gmap_protect_pte(struct gmap *gmap, unsigned long gaddr,
* @prot: indicates access rights: PROT_NONE, PROT_READ or PROT_WRITE
* @bits: pgste notification bits to set
*
- * Returns 0 if successfully protected, -ENOMEM if out of memory and
- * -EFAULT if gaddr is invalid (or mapping for shadows is missing).
+ * Returns:
+ * PAGE_SIZE if a small page was successfully protected;
+ * HPAGE_SIZE if a large page was successfully protected;
+ * -ENOMEM if out of memory;
+ * -EFAULT if gaddr is invalid (or mapping for shadows is missing);
+ * -EAGAIN if the guest mapping is missing and should be fixed by the caller.
*
- * Called with sg->mm->mmap_lock in read.
+ * Context: Called with sg->mm->mmap_lock in read.
*/
-static int gmap_protect_range(struct gmap *gmap, unsigned long gaddr,
- unsigned long len, int prot, unsigned long bits)
+int gmap_protect_one(struct gmap *gmap, unsigned long gaddr, int prot, unsigned long bits)
{
- unsigned long vmaddr, dist;
pmd_t *pmdp;
- int rc;
+ int rc = 0;
BUG_ON(gmap_is_shadow(gmap));
- while (len) {
- rc = -EAGAIN;
- pmdp = gmap_pmd_op_walk(gmap, gaddr);
- if (pmdp) {
- if (!pmd_leaf(*pmdp)) {
- rc = gmap_protect_pte(gmap, gaddr, pmdp, prot,
- bits);
- if (!rc) {
- len -= PAGE_SIZE;
- gaddr += PAGE_SIZE;
- }
- } else {
- rc = gmap_protect_pmd(gmap, gaddr, pmdp, prot,
- bits);
- if (!rc) {
- dist = HPAGE_SIZE - (gaddr & ~HPAGE_MASK);
- len = len < dist ? 0 : len - dist;
- gaddr = (gaddr & HPAGE_MASK) + HPAGE_SIZE;
- }
- }
- gmap_pmd_op_end(gmap, pmdp);
- }
- if (rc) {
- if (rc == -EINVAL)
- return rc;
- /* -EAGAIN, fixup of userspace mm and gmap */
- vmaddr = __gmap_translate(gmap, gaddr);
- if (IS_ERR_VALUE(vmaddr))
- return vmaddr;
- rc = gmap_pte_op_fixup(gmap, gaddr, vmaddr, prot);
- if (rc)
- return rc;
- }
- }
- return 0;
-}
+ pmdp = gmap_pmd_op_walk(gmap, gaddr);
+ if (!pmdp)
+ return -EAGAIN;
-/**
- * gmap_mprotect_notify - change access rights for a range of ptes and
- * call the notifier if any pte changes again
- * @gmap: pointer to guest mapping meta data structure
- * @gaddr: virtual address in the guest address space
- * @len: size of area
- * @prot: indicates access rights: PROT_NONE, PROT_READ or PROT_WRITE
- *
- * Returns 0 if for each page in the given range a gmap mapping exists,
- * the new access rights could be set and the notifier could be armed.
- * If the gmap mapping is missing for one or more pages -EFAULT is
- * returned. If no memory could be allocated -ENOMEM is returned.
- * This function establishes missing page table entries.
- */
-int gmap_mprotect_notify(struct gmap *gmap, unsigned long gaddr,
- unsigned long len, int prot)
-{
- int rc;
+ if (!pmd_leaf(*pmdp)) {
+ rc = gmap_protect_pte(gmap, gaddr, pmdp, prot, bits);
+ if (!rc)
+ rc = PAGE_SIZE;
+ } else {
+ rc = gmap_protect_pmd(gmap, gaddr, pmdp, prot, bits);
+ if (!rc)
+ rc = HPAGE_SIZE;
+ }
+ gmap_pmd_op_end(gmap, pmdp);
- if ((gaddr & ~PAGE_MASK) || (len & ~PAGE_MASK) || gmap_is_shadow(gmap))
- return -EINVAL;
- if (!MACHINE_HAS_ESOP && prot == PROT_READ)
- return -EINVAL;
- mmap_read_lock(gmap->mm);
- rc = gmap_protect_range(gmap, gaddr, len, prot, GMAP_NOTIFY_MPROT);
- mmap_read_unlock(gmap->mm);
return rc;
}
-EXPORT_SYMBOL_GPL(gmap_mprotect_notify);
+EXPORT_SYMBOL_GPL(gmap_protect_one);
/**
* gmap_read_table - get an unsigned long value from a guest page table using
@@ -1414,7 +1251,6 @@ static void gmap_unshadow_pgt(struct gmap *sg, unsigned long raddr)
__gmap_unshadow_pgt(sg, raddr, __va(pgt));
/* Free page table */
ptdesc = page_ptdesc(phys_to_page(pgt));
- list_del(&ptdesc->pt_list);
page_table_free_pgste(ptdesc);
}
@@ -1442,7 +1278,6 @@ static void __gmap_unshadow_sgt(struct gmap *sg, unsigned long raddr,
__gmap_unshadow_pgt(sg, raddr, __va(pgt));
/* Free page table */
ptdesc = page_ptdesc(phys_to_page(pgt));
- list_del(&ptdesc->pt_list);
page_table_free_pgste(ptdesc);
}
}
@@ -1472,7 +1307,6 @@ static void gmap_unshadow_sgt(struct gmap *sg, unsigned long raddr)
__gmap_unshadow_sgt(sg, raddr, __va(sgt));
/* Free segment table */
page = phys_to_page(sgt);
- list_del(&page->lru);
__free_pages(page, CRST_ALLOC_ORDER);
}
@@ -1500,7 +1334,6 @@ static void __gmap_unshadow_r3t(struct gmap *sg, unsigned long raddr,
__gmap_unshadow_sgt(sg, raddr, __va(sgt));
/* Free segment table */
page = phys_to_page(sgt);
- list_del(&page->lru);
__free_pages(page, CRST_ALLOC_ORDER);
}
}
@@ -1530,7 +1363,6 @@ static void gmap_unshadow_r3t(struct gmap *sg, unsigned long raddr)
__gmap_unshadow_r3t(sg, raddr, __va(r3t));
/* Free region 3 table */
page = phys_to_page(r3t);
- list_del(&page->lru);
__free_pages(page, CRST_ALLOC_ORDER);
}
@@ -1558,7 +1390,6 @@ static void __gmap_unshadow_r2t(struct gmap *sg, unsigned long raddr,
__gmap_unshadow_r3t(sg, raddr, __va(r3t));
/* Free region 3 table */
page = phys_to_page(r3t);
- list_del(&page->lru);
__free_pages(page, CRST_ALLOC_ORDER);
}
}
@@ -1588,7 +1419,6 @@ static void gmap_unshadow_r2t(struct gmap *sg, unsigned long raddr)
__gmap_unshadow_r2t(sg, raddr, __va(r2t));
/* Free region 2 table */
page = phys_to_page(r2t);
- list_del(&page->lru);
__free_pages(page, CRST_ALLOC_ORDER);
}
@@ -1620,7 +1450,6 @@ static void __gmap_unshadow_r1t(struct gmap *sg, unsigned long raddr,
r1t[i] = _REGION1_ENTRY_EMPTY;
/* Free region 2 table */
page = phys_to_page(r2t);
- list_del(&page->lru);
__free_pages(page, CRST_ALLOC_ORDER);
}
}
@@ -1631,7 +1460,7 @@ static void __gmap_unshadow_r1t(struct gmap *sg, unsigned long raddr,
*
* Called with sg->guest_table_lock
*/
-static void gmap_unshadow(struct gmap *sg)
+void gmap_unshadow(struct gmap *sg)
{
unsigned long *table;
@@ -1657,143 +1486,7 @@ static void gmap_unshadow(struct gmap *sg)
break;
}
}
-
-/**
- * gmap_find_shadow - find a specific asce in the list of shadow tables
- * @parent: pointer to the parent gmap
- * @asce: ASCE for which the shadow table is created
- * @edat_level: edat level to be used for the shadow translation
- *
- * Returns the pointer to a gmap if a shadow table with the given asce is
- * already available, ERR_PTR(-EAGAIN) if another one is just being created,
- * otherwise NULL
- */
-static struct gmap *gmap_find_shadow(struct gmap *parent, unsigned long asce,
- int edat_level)
-{
- struct gmap *sg;
-
- list_for_each_entry(sg, &parent->children, list) {
- if (sg->orig_asce != asce || sg->edat_level != edat_level ||
- sg->removed)
- continue;
- if (!sg->initialized)
- return ERR_PTR(-EAGAIN);
- refcount_inc(&sg->ref_count);
- return sg;
- }
- return NULL;
-}
-
-/**
- * gmap_shadow_valid - check if a shadow guest address space matches the
- * given properties and is still valid
- * @sg: pointer to the shadow guest address space structure
- * @asce: ASCE for which the shadow table is requested
- * @edat_level: edat level to be used for the shadow translation
- *
- * Returns 1 if the gmap shadow is still valid and matches the given
- * properties, the caller can continue using it. Returns 0 otherwise, the
- * caller has to request a new shadow gmap in this case.
- *
- */
-int gmap_shadow_valid(struct gmap *sg, unsigned long asce, int edat_level)
-{
- if (sg->removed)
- return 0;
- return sg->orig_asce == asce && sg->edat_level == edat_level;
-}
-EXPORT_SYMBOL_GPL(gmap_shadow_valid);
-
-/**
- * gmap_shadow - create/find a shadow guest address space
- * @parent: pointer to the parent gmap
- * @asce: ASCE for which the shadow table is created
- * @edat_level: edat level to be used for the shadow translation
- *
- * The pages of the top level page table referred by the asce parameter
- * will be set to read-only and marked in the PGSTEs of the kvm process.
- * The shadow table will be removed automatically on any change to the
- * PTE mapping for the source table.
- *
- * Returns a guest address space structure, ERR_PTR(-ENOMEM) if out of memory,
- * ERR_PTR(-EAGAIN) if the caller has to retry and ERR_PTR(-EFAULT) if the
- * parent gmap table could not be protected.
- */
-struct gmap *gmap_shadow(struct gmap *parent, unsigned long asce,
- int edat_level)
-{
- struct gmap *sg, *new;
- unsigned long limit;
- int rc;
-
- BUG_ON(parent->mm->context.allow_gmap_hpage_1m);
- BUG_ON(gmap_is_shadow(parent));
- spin_lock(&parent->shadow_lock);
- sg = gmap_find_shadow(parent, asce, edat_level);
- spin_unlock(&parent->shadow_lock);
- if (sg)
- return sg;
- /* Create a new shadow gmap */
- limit = -1UL >> (33 - (((asce & _ASCE_TYPE_MASK) >> 2) * 11));
- if (asce & _ASCE_REAL_SPACE)
- limit = -1UL;
- new = gmap_alloc(limit);
- if (!new)
- return ERR_PTR(-ENOMEM);
- new->mm = parent->mm;
- new->parent = gmap_get(parent);
- new->private = parent->private;
- new->orig_asce = asce;
- new->edat_level = edat_level;
- new->initialized = false;
- spin_lock(&parent->shadow_lock);
- /* Recheck if another CPU created the same shadow */
- sg = gmap_find_shadow(parent, asce, edat_level);
- if (sg) {
- spin_unlock(&parent->shadow_lock);
- gmap_free(new);
- return sg;
- }
- if (asce & _ASCE_REAL_SPACE) {
- /* only allow one real-space gmap shadow */
- list_for_each_entry(sg, &parent->children, list) {
- if (sg->orig_asce & _ASCE_REAL_SPACE) {
- spin_lock(&sg->guest_table_lock);
- gmap_unshadow(sg);
- spin_unlock(&sg->guest_table_lock);
- list_del(&sg->list);
- gmap_put(sg);
- break;
- }
- }
- }
- refcount_set(&new->ref_count, 2);
- list_add(&new->list, &parent->children);
- if (asce & _ASCE_REAL_SPACE) {
- /* nothing to protect, return right away */
- new->initialized = true;
- spin_unlock(&parent->shadow_lock);
- return new;
- }
- spin_unlock(&parent->shadow_lock);
- /* protect after insertion, so it will get properly invalidated */
- mmap_read_lock(parent->mm);
- rc = gmap_protect_range(parent, asce & _ASCE_ORIGIN,
- ((asce & _ASCE_TABLE_LENGTH) + 1) * PAGE_SIZE,
- PROT_READ, GMAP_NOTIFY_SHADOW);
- mmap_read_unlock(parent->mm);
- spin_lock(&parent->shadow_lock);
- new->initialized = true;
- if (rc) {
- list_del(&new->list);
- gmap_free(new);
- new = ERR_PTR(rc);
- }
- spin_unlock(&parent->shadow_lock);
- return new;
-}
-EXPORT_SYMBOL_GPL(gmap_shadow);
+EXPORT_SYMBOL(gmap_unshadow);
/**
* gmap_shadow_r2t - create an empty shadow region 2 table
@@ -1827,9 +1520,6 @@ int gmap_shadow_r2t(struct gmap *sg, unsigned long saddr, unsigned long r2t,
page = gmap_alloc_crst();
if (!page)
return -ENOMEM;
- page->index = r2t & _REGION_ENTRY_ORIGIN;
- if (fake)
- page->index |= GMAP_SHADOW_FAKE_TABLE;
s_r2t = page_to_phys(page);
/* Install shadow region second table */
spin_lock(&sg->guest_table_lock);
@@ -1851,7 +1541,6 @@ int gmap_shadow_r2t(struct gmap *sg, unsigned long saddr, unsigned long r2t,
_REGION_ENTRY_TYPE_R1 | _REGION_ENTRY_INVALID;
if (sg->edat_level >= 1)
*table |= (r2t & _REGION_ENTRY_PROTECT);
- list_add(&page->lru, &sg->crst_list);
if (fake) {
/* nothing to protect for fake tables */
*table &= ~_REGION_ENTRY_INVALID;
@@ -1911,9 +1600,6 @@ int gmap_shadow_r3t(struct gmap *sg, unsigned long saddr, unsigned long r3t,
page = gmap_alloc_crst();
if (!page)
return -ENOMEM;
- page->index = r3t & _REGION_ENTRY_ORIGIN;
- if (fake)
- page->index |= GMAP_SHADOW_FAKE_TABLE;
s_r3t = page_to_phys(page);
/* Install shadow region second table */
spin_lock(&sg->guest_table_lock);
@@ -1935,7 +1621,6 @@ int gmap_shadow_r3t(struct gmap *sg, unsigned long saddr, unsigned long r3t,
_REGION_ENTRY_TYPE_R2 | _REGION_ENTRY_INVALID;
if (sg->edat_level >= 1)
*table |= (r3t & _REGION_ENTRY_PROTECT);
- list_add(&page->lru, &sg->crst_list);
if (fake) {
/* nothing to protect for fake tables */
*table &= ~_REGION_ENTRY_INVALID;
@@ -1995,9 +1680,6 @@ int gmap_shadow_sgt(struct gmap *sg, unsigned long saddr, unsigned long sgt,
page = gmap_alloc_crst();
if (!page)
return -ENOMEM;
- page->index = sgt & _REGION_ENTRY_ORIGIN;
- if (fake)
- page->index |= GMAP_SHADOW_FAKE_TABLE;
s_sgt = page_to_phys(page);
/* Install shadow region second table */
spin_lock(&sg->guest_table_lock);
@@ -2019,7 +1701,6 @@ int gmap_shadow_sgt(struct gmap *sg, unsigned long saddr, unsigned long sgt,
_REGION_ENTRY_TYPE_R3 | _REGION_ENTRY_INVALID;
if (sg->edat_level >= 1)
*table |= sgt & _REGION_ENTRY_PROTECT;
- list_add(&page->lru, &sg->crst_list);
if (fake) {
/* nothing to protect for fake tables */
*table &= ~_REGION_ENTRY_INVALID;
@@ -2052,45 +1733,22 @@ out_free:
}
EXPORT_SYMBOL_GPL(gmap_shadow_sgt);
-/**
- * gmap_shadow_pgt_lookup - find a shadow page table
- * @sg: pointer to the shadow guest address space structure
- * @saddr: the address in the shadow aguest address space
- * @pgt: parent gmap address of the page table to get shadowed
- * @dat_protection: if the pgtable is marked as protected by dat
- * @fake: pgt references contiguous guest memory block, not a pgtable
- *
- * Returns 0 if the shadow page table was found and -EAGAIN if the page
- * table was not found.
- *
- * Called with sg->mm->mmap_lock in read.
- */
-int gmap_shadow_pgt_lookup(struct gmap *sg, unsigned long saddr,
- unsigned long *pgt, int *dat_protection,
- int *fake)
+static void gmap_pgste_set_pgt_addr(struct ptdesc *ptdesc, unsigned long pgt_addr)
{
- unsigned long *table;
- struct page *page;
- int rc;
+ unsigned long *pgstes = page_to_virt(ptdesc_page(ptdesc));
- BUG_ON(!gmap_is_shadow(sg));
- spin_lock(&sg->guest_table_lock);
- table = gmap_table_walk(sg, saddr, 1); /* get segment pointer */
- if (table && !(*table & _SEGMENT_ENTRY_INVALID)) {
- /* Shadow page tables are full pages (pte+pgste) */
- page = pfn_to_page(*table >> PAGE_SHIFT);
- *pgt = page->index & ~GMAP_SHADOW_FAKE_TABLE;
- *dat_protection = !!(*table & _SEGMENT_ENTRY_PROTECT);
- *fake = !!(page->index & GMAP_SHADOW_FAKE_TABLE);
- rc = 0;
- } else {
- rc = -EAGAIN;
- }
- spin_unlock(&sg->guest_table_lock);
- return rc;
+ pgstes += _PAGE_ENTRIES;
+
+ pgstes[0] &= ~PGSTE_ST2_MASK;
+ pgstes[1] &= ~PGSTE_ST2_MASK;
+ pgstes[2] &= ~PGSTE_ST2_MASK;
+ pgstes[3] &= ~PGSTE_ST2_MASK;
+ pgstes[0] |= (pgt_addr >> 16) & PGSTE_ST2_MASK;
+ pgstes[1] |= pgt_addr & PGSTE_ST2_MASK;
+ pgstes[2] |= (pgt_addr << 16) & PGSTE_ST2_MASK;
+ pgstes[3] |= (pgt_addr << 32) & PGSTE_ST2_MASK;
}
-EXPORT_SYMBOL_GPL(gmap_shadow_pgt_lookup);
/**
* gmap_shadow_pgt - instantiate a shadow page table
@@ -2119,9 +1777,10 @@ int gmap_shadow_pgt(struct gmap *sg, unsigned long saddr, unsigned long pgt,
ptdesc = page_table_alloc_pgste(sg->mm);
if (!ptdesc)
return -ENOMEM;
- ptdesc->pt_index = pgt & _SEGMENT_ENTRY_ORIGIN;
+ origin = pgt & _SEGMENT_ENTRY_ORIGIN;
if (fake)
- ptdesc->pt_index |= GMAP_SHADOW_FAKE_TABLE;
+ origin |= GMAP_SHADOW_FAKE_TABLE;
+ gmap_pgste_set_pgt_addr(ptdesc, origin);
s_pgt = page_to_phys(ptdesc_page(ptdesc));
/* Install shadow page table */
spin_lock(&sg->guest_table_lock);
@@ -2140,7 +1799,6 @@ int gmap_shadow_pgt(struct gmap *sg, unsigned long saddr, unsigned long pgt,
/* mark as invalid as long as the parent table is not protected */
*table = (unsigned long) s_pgt | _SEGMENT_ENTRY |
(pgt & _SEGMENT_ENTRY_PROTECT) | _SEGMENT_ENTRY_INVALID;
- list_add(&ptdesc->pt_list, &sg->pt_list);
if (fake) {
/* nothing to protect for fake tables */
*table &= ~_SEGMENT_ENTRY_INVALID;
@@ -2318,7 +1976,6 @@ void ptep_notify(struct mm_struct *mm, unsigned long vmaddr,
pte_t *pte, unsigned long bits)
{
unsigned long offset, gaddr = 0;
- unsigned long *table;
struct gmap *gmap, *sg, *next;
offset = ((unsigned long) pte) & (255 * sizeof(pte_t));
@@ -2326,12 +1983,9 @@ void ptep_notify(struct mm_struct *mm, unsigned long vmaddr,
rcu_read_lock();
list_for_each_entry_rcu(gmap, &mm->context.gmap_list, list) {
spin_lock(&gmap->guest_table_lock);
- table = radix_tree_lookup(&gmap->host_to_guest,
- vmaddr >> PMD_SHIFT);
- if (table)
- gaddr = __gmap_segment_gaddr(table) + offset;
+ gaddr = host_to_guest_lookup(gmap, vmaddr) + offset;
spin_unlock(&gmap->guest_table_lock);
- if (!table)
+ if (!IS_GADDR_VALID(gaddr))
continue;
if (!list_empty(&gmap->children) && (bits & PGSTE_VSIE_BIT)) {
@@ -2391,10 +2045,8 @@ static void gmap_pmdp_clear(struct mm_struct *mm, unsigned long vmaddr,
rcu_read_lock();
list_for_each_entry_rcu(gmap, &mm->context.gmap_list, list) {
spin_lock(&gmap->guest_table_lock);
- pmdp = (pmd_t *)radix_tree_delete(&gmap->host_to_guest,
- vmaddr >> PMD_SHIFT);
+ pmdp = host_to_guest_pmd_delete(gmap, vmaddr, &gaddr);
if (pmdp) {
- gaddr = __gmap_segment_gaddr((unsigned long *)pmdp);
pmdp_notify_gmap(gmap, pmdp, gaddr);
WARN_ON(pmd_val(*pmdp) & ~(_SEGMENT_ENTRY_HARDWARE_BITS_LARGE |
_SEGMENT_ENTRY_GMAP_UC |
@@ -2438,28 +2090,25 @@ EXPORT_SYMBOL_GPL(gmap_pmdp_csp);
*/
void gmap_pmdp_idte_local(struct mm_struct *mm, unsigned long vmaddr)
{
- unsigned long *entry, gaddr;
+ unsigned long gaddr;
struct gmap *gmap;
pmd_t *pmdp;
rcu_read_lock();
list_for_each_entry_rcu(gmap, &mm->context.gmap_list, list) {
spin_lock(&gmap->guest_table_lock);
- entry = radix_tree_delete(&gmap->host_to_guest,
- vmaddr >> PMD_SHIFT);
- if (entry) {
- pmdp = (pmd_t *)entry;
- gaddr = __gmap_segment_gaddr(entry);
+ pmdp = host_to_guest_pmd_delete(gmap, vmaddr, &gaddr);
+ if (pmdp) {
pmdp_notify_gmap(gmap, pmdp, gaddr);
- WARN_ON(*entry & ~(_SEGMENT_ENTRY_HARDWARE_BITS_LARGE |
- _SEGMENT_ENTRY_GMAP_UC |
- _SEGMENT_ENTRY));
+ WARN_ON(pmd_val(*pmdp) & ~(_SEGMENT_ENTRY_HARDWARE_BITS_LARGE |
+ _SEGMENT_ENTRY_GMAP_UC |
+ _SEGMENT_ENTRY));
if (MACHINE_HAS_TLB_GUEST)
__pmdp_idte(gaddr, pmdp, IDTE_GUEST_ASCE,
gmap->asce, IDTE_LOCAL);
else if (MACHINE_HAS_IDTE)
__pmdp_idte(gaddr, pmdp, 0, 0, IDTE_LOCAL);
- *entry = _SEGMENT_ENTRY_EMPTY;
+ *pmdp = __pmd(_SEGMENT_ENTRY_EMPTY);
}
spin_unlock(&gmap->guest_table_lock);
}
@@ -2474,22 +2123,19 @@ EXPORT_SYMBOL_GPL(gmap_pmdp_idte_local);
*/
void gmap_pmdp_idte_global(struct mm_struct *mm, unsigned long vmaddr)
{
- unsigned long *entry, gaddr;
+ unsigned long gaddr;
struct gmap *gmap;
pmd_t *pmdp;
rcu_read_lock();
list_for_each_entry_rcu(gmap, &mm->context.gmap_list, list) {
spin_lock(&gmap->guest_table_lock);
- entry = radix_tree_delete(&gmap->host_to_guest,
- vmaddr >> PMD_SHIFT);
- if (entry) {
- pmdp = (pmd_t *)entry;
- gaddr = __gmap_segment_gaddr(entry);
+ pmdp = host_to_guest_pmd_delete(gmap, vmaddr, &gaddr);
+ if (pmdp) {
pmdp_notify_gmap(gmap, pmdp, gaddr);
- WARN_ON(*entry & ~(_SEGMENT_ENTRY_HARDWARE_BITS_LARGE |
- _SEGMENT_ENTRY_GMAP_UC |
- _SEGMENT_ENTRY));
+ WARN_ON(pmd_val(*pmdp) & ~(_SEGMENT_ENTRY_HARDWARE_BITS_LARGE |
+ _SEGMENT_ENTRY_GMAP_UC |
+ _SEGMENT_ENTRY));
if (MACHINE_HAS_TLB_GUEST)
__pmdp_idte(gaddr, pmdp, IDTE_GUEST_ASCE,
gmap->asce, IDTE_GLOBAL);
@@ -2497,7 +2143,7 @@ void gmap_pmdp_idte_global(struct mm_struct *mm, unsigned long vmaddr)
__pmdp_idte(gaddr, pmdp, 0, 0, IDTE_GLOBAL);
else
__pmdp_csp(pmdp);
- *entry = _SEGMENT_ENTRY_EMPTY;
+ *pmdp = __pmd(_SEGMENT_ENTRY_EMPTY);
}
spin_unlock(&gmap->guest_table_lock);
}
@@ -2943,49 +2589,6 @@ int __s390_uv_destroy_range(struct mm_struct *mm, unsigned long start,
EXPORT_SYMBOL_GPL(__s390_uv_destroy_range);
/**
- * s390_unlist_old_asce - Remove the topmost level of page tables from the
- * list of page tables of the gmap.
- * @gmap: the gmap whose table is to be removed
- *
- * On s390x, KVM keeps a list of all pages containing the page tables of the
- * gmap (the CRST list). This list is used at tear down time to free all
- * pages that are now not needed anymore.
- *
- * This function removes the topmost page of the tree (the one pointed to by
- * the ASCE) from the CRST list.
- *
- * This means that it will not be freed when the VM is torn down, and needs
- * to be handled separately by the caller, unless a leak is actually
- * intended. Notice that this function will only remove the page from the
- * list, the page will still be used as a top level page table (and ASCE).
- */
-void s390_unlist_old_asce(struct gmap *gmap)
-{
- struct page *old;
-
- old = virt_to_page(gmap->table);
- spin_lock(&gmap->guest_table_lock);
- list_del(&old->lru);
- /*
- * Sometimes the topmost page might need to be "removed" multiple
- * times, for example if the VM is rebooted into secure mode several
- * times concurrently, or if s390_replace_asce fails after calling
- * s390_remove_old_asce and is attempted again later. In that case
- * the old asce has been removed from the list, and therefore it
- * will not be freed when the VM terminates, but the ASCE is still
- * in use and still pointed to.
- * A subsequent call to replace_asce will follow the pointer and try
- * to remove the same page from the list again.
- * Therefore it's necessary that the page of the ASCE has valid
- * pointers, so list_del can work (and do nothing) without
- * dereferencing stale or invalid pointers.
- */
- INIT_LIST_HEAD(&old->lru);
- spin_unlock(&gmap->guest_table_lock);
-}
-EXPORT_SYMBOL_GPL(s390_unlist_old_asce);
-
-/**
* s390_replace_asce - Try to replace the current ASCE of a gmap with a copy
* @gmap: the gmap whose ASCE needs to be replaced
*
@@ -3004,8 +2607,6 @@ int s390_replace_asce(struct gmap *gmap)
struct page *page;
void *table;
- s390_unlist_old_asce(gmap);
-
/* Replacing segment type ASCEs would cause serious issues */
if ((gmap->asce & _ASCE_TYPE_MASK) == _ASCE_TYPE_SEGMENT)
return -EINVAL;
@@ -3013,19 +2614,9 @@ int s390_replace_asce(struct gmap *gmap)
page = gmap_alloc_crst();
if (!page)
return -ENOMEM;
- page->index = 0;
table = page_to_virt(page);
memcpy(table, gmap->table, 1UL << (CRST_ALLOC_ORDER + PAGE_SHIFT));
- /*
- * The caller has to deal with the old ASCE, but here we make sure
- * the new one is properly added to the CRST list, so that
- * it will be freed when the VM is torn down.
- */
- spin_lock(&gmap->guest_table_lock);
- list_add(&page->lru, &gmap->crst_list);
- spin_unlock(&gmap->guest_table_lock);
-
/* Set new table origin while preserving existing ASCE control bits */
asce = (gmap->asce & ~_ASCE_ORIGIN) | __pa(table);
WRITE_ONCE(gmap->asce, asce);
@@ -3035,3 +2626,31 @@ int s390_replace_asce(struct gmap *gmap)
return 0;
}
EXPORT_SYMBOL_GPL(s390_replace_asce);
+
+/**
+ * kvm_s390_wiggle_split_folio() - try to drain extra references to a folio and optionally split
+ * @mm: the mm containing the folio to work on
+ * @folio: the folio
+ * @split: whether to split a large folio
+ *
+ * Context: Must be called while holding an extra reference to the folio;
+ * the mm lock should not be held.
+ */
+int kvm_s390_wiggle_split_folio(struct mm_struct *mm, struct folio *folio, bool split)
+{
+ int rc;
+
+ lockdep_assert_not_held(&mm->mmap_lock);
+ folio_wait_writeback(folio);
+ lru_add_drain_all();
+ if (split) {
+ folio_lock(folio);
+ rc = split_folio(folio);
+ folio_unlock(folio);
+
+ if (rc != -EBUSY)
+ return rc;
+ }
+ return -EAGAIN;
+}
+EXPORT_SYMBOL_GPL(kvm_s390_wiggle_split_folio);
diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c
index 7a96623a9d2e..f2298f7a3f21 100644
--- a/arch/s390/mm/init.c
+++ b/arch/s390/mm/init.c
@@ -56,6 +56,15 @@ pgd_t invalid_pg_dir[PTRS_PER_PGD] __section(".bss..invalid_pg_dir");
struct ctlreg __bootdata_preserved(s390_invalid_asce);
+unsigned long __bootdata_preserved(page_noexec_mask);
+EXPORT_SYMBOL(page_noexec_mask);
+
+unsigned long __bootdata_preserved(segment_noexec_mask);
+EXPORT_SYMBOL(segment_noexec_mask);
+
+unsigned long __bootdata_preserved(region_noexec_mask);
+EXPORT_SYMBOL(region_noexec_mask);
+
unsigned long empty_zero_page, zero_page_mask;
EXPORT_SYMBOL(empty_zero_page);
EXPORT_SYMBOL(zero_page_mask);
diff --git a/arch/s390/mm/maccess.c b/arch/s390/mm/maccess.c
index 28a18c42ba99..44426e0f2944 100644
--- a/arch/s390/mm/maccess.c
+++ b/arch/s390/mm/maccess.c
@@ -17,6 +17,7 @@
#include <asm/asm-extable.h>
#include <asm/abs_lowcore.h>
#include <asm/stacktrace.h>
+#include <asm/sections.h>
#include <asm/maccess.h>
#include <asm/ctlreg.h>
diff --git a/arch/s390/mm/mmap.c b/arch/s390/mm/mmap.c
index 33f3504be90b..76f376876e0d 100644
--- a/arch/s390/mm/mmap.c
+++ b/arch/s390/mm/mmap.c
@@ -196,22 +196,28 @@ void arch_pick_mmap_layout(struct mm_struct *mm, struct rlimit *rlim_stack)
}
}
-static const pgprot_t protection_map[16] = {
- [VM_NONE] = PAGE_NONE,
- [VM_READ] = PAGE_RO,
- [VM_WRITE] = PAGE_RO,
- [VM_WRITE | VM_READ] = PAGE_RO,
- [VM_EXEC] = PAGE_RX,
- [VM_EXEC | VM_READ] = PAGE_RX,
- [VM_EXEC | VM_WRITE] = PAGE_RX,
- [VM_EXEC | VM_WRITE | VM_READ] = PAGE_RX,
- [VM_SHARED] = PAGE_NONE,
- [VM_SHARED | VM_READ] = PAGE_RO,
- [VM_SHARED | VM_WRITE] = PAGE_RW,
- [VM_SHARED | VM_WRITE | VM_READ] = PAGE_RW,
- [VM_SHARED | VM_EXEC] = PAGE_RX,
- [VM_SHARED | VM_EXEC | VM_READ] = PAGE_RX,
- [VM_SHARED | VM_EXEC | VM_WRITE] = PAGE_RWX,
- [VM_SHARED | VM_EXEC | VM_WRITE | VM_READ] = PAGE_RWX
-};
+static pgprot_t protection_map[16] __ro_after_init;
+
+void __init setup_protection_map(void)
+{
+ pgprot_t *pm = protection_map;
+
+ pm[VM_NONE] = PAGE_NONE;
+ pm[VM_READ] = PAGE_RO;
+ pm[VM_WRITE] = PAGE_RO;
+ pm[VM_WRITE | VM_READ] = PAGE_RO;
+ pm[VM_EXEC] = PAGE_RX;
+ pm[VM_EXEC | VM_READ] = PAGE_RX;
+ pm[VM_EXEC | VM_WRITE] = PAGE_RX;
+ pm[VM_EXEC | VM_WRITE | VM_READ] = PAGE_RX;
+ pm[VM_SHARED] = PAGE_NONE;
+ pm[VM_SHARED | VM_READ] = PAGE_RO;
+ pm[VM_SHARED | VM_WRITE] = PAGE_RW;
+ pm[VM_SHARED | VM_WRITE | VM_READ] = PAGE_RW;
+ pm[VM_SHARED | VM_EXEC] = PAGE_RX;
+ pm[VM_SHARED | VM_EXEC | VM_READ] = PAGE_RX;
+ pm[VM_SHARED | VM_EXEC | VM_WRITE] = PAGE_RWX;
+ pm[VM_SHARED | VM_EXEC | VM_WRITE | VM_READ] = PAGE_RWX;
+}
+
DECLARE_VM_GET_PAGE_PROT
diff --git a/arch/s390/mm/pageattr.c b/arch/s390/mm/pageattr.c
index 8f56a21a077f..eae97fb61712 100644
--- a/arch/s390/mm/pageattr.c
+++ b/arch/s390/mm/pageattr.c
@@ -109,8 +109,6 @@ static int walk_pte_level(pmd_t *pmdp, unsigned long addr, unsigned long end,
} else if (flags & SET_MEMORY_DEF) {
new = __pte(pte_val(new) & PAGE_MASK);
new = set_pte_bit(new, PAGE_KERNEL);
- if (!MACHINE_HAS_NX)
- new = clear_pte_bit(new, __pgprot(_PAGE_NOEXEC));
}
pgt_set((unsigned long *)ptep, pte_val(new), addr, CRDTE_DTT_PAGE);
ptep++;
@@ -167,8 +165,6 @@ static void modify_pmd_page(pmd_t *pmdp, unsigned long addr,
} else if (flags & SET_MEMORY_DEF) {
new = __pmd(pmd_val(new) & PMD_MASK);
new = set_pmd_bit(new, SEGMENT_KERNEL);
- if (!MACHINE_HAS_NX)
- new = clear_pmd_bit(new, __pgprot(_SEGMENT_ENTRY_NOEXEC));
}
pgt_set((unsigned long *)pmdp, pmd_val(new), addr, CRDTE_DTT_SEGMENT);
}
@@ -256,8 +252,6 @@ static void modify_pud_page(pud_t *pudp, unsigned long addr,
} else if (flags & SET_MEMORY_DEF) {
new = __pud(pud_val(new) & PUD_MASK);
new = set_pud_bit(new, REGION3_KERNEL);
- if (!MACHINE_HAS_NX)
- new = clear_pud_bit(new, __pgprot(_REGION_ENTRY_NOEXEC));
}
pgt_set((unsigned long *)pudp, pud_val(new), addr, CRDTE_DTT_REGION3);
}
diff --git a/arch/s390/mm/pgalloc.c b/arch/s390/mm/pgalloc.c
index 58696a0c4e4a..30387a6e98ff 100644
--- a/arch/s390/mm/pgalloc.c
+++ b/arch/s390/mm/pgalloc.c
@@ -21,7 +21,7 @@
int page_table_allocate_pgste = 0;
EXPORT_SYMBOL(page_table_allocate_pgste);
-static struct ctl_table page_table_sysctl[] = {
+static const struct ctl_table page_table_sysctl[] = {
{
.procname = "allocate_pgste",
.data = &page_table_allocate_pgste,
@@ -88,12 +88,14 @@ int crst_table_upgrade(struct mm_struct *mm, unsigned long end)
if (unlikely(!p4d))
goto err_p4d;
crst_table_init(p4d, _REGION2_ENTRY_EMPTY);
+ pagetable_p4d_ctor(virt_to_ptdesc(p4d));
}
if (end > _REGION1_SIZE) {
pgd = crst_table_alloc(mm);
if (unlikely(!pgd))
goto err_pgd;
crst_table_init(pgd, _REGION1_ENTRY_EMPTY);
+ pagetable_pgd_ctor(virt_to_ptdesc(pgd));
}
spin_lock_bh(&mm->page_table_lock);
@@ -130,6 +132,7 @@ int crst_table_upgrade(struct mm_struct *mm, unsigned long end)
return 0;
err_pgd:
+ pagetable_dtor(virt_to_ptdesc(p4d));
crst_table_free(mm, p4d);
err_p4d:
return -ENOMEM;
@@ -173,37 +176,16 @@ unsigned long *page_table_alloc(struct mm_struct *mm)
}
table = ptdesc_to_virt(ptdesc);
__arch_set_page_dat(table, 1);
- /* pt_list is used by gmap only */
- INIT_LIST_HEAD(&ptdesc->pt_list);
memset64((u64 *)table, _PAGE_INVALID, PTRS_PER_PTE);
memset64((u64 *)table + PTRS_PER_PTE, 0, PTRS_PER_PTE);
return table;
}
-static void pagetable_pte_dtor_free(struct ptdesc *ptdesc)
-{
- pagetable_pte_dtor(ptdesc);
- pagetable_free(ptdesc);
-}
-
void page_table_free(struct mm_struct *mm, unsigned long *table)
{
struct ptdesc *ptdesc = virt_to_ptdesc(table);
- pagetable_pte_dtor_free(ptdesc);
-}
-
-void __tlb_remove_table(void *table)
-{
- struct ptdesc *ptdesc = virt_to_ptdesc(table);
- struct page *page = ptdesc_page(ptdesc);
-
- if (compound_order(page) == CRST_ALLOC_ORDER) {
- /* pmd, pud, or p4d */
- pagetable_free(ptdesc);
- return;
- }
- pagetable_pte_dtor_free(ptdesc);
+ pagetable_dtor_free(ptdesc);
}
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
@@ -211,7 +193,7 @@ static void pte_free_now(struct rcu_head *head)
{
struct ptdesc *ptdesc = container_of(head, struct ptdesc, pt_rcu_head);
- pagetable_pte_dtor_free(ptdesc);
+ pagetable_dtor_free(ptdesc);
}
void pte_free_defer(struct mm_struct *mm, pgtable_t pgtable)
diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c
index cea5dba80468..f05e62e037c2 100644
--- a/arch/s390/mm/pgtable.c
+++ b/arch/s390/mm/pgtable.c
@@ -360,8 +360,6 @@ void ptep_modify_prot_commit(struct vm_area_struct *vma, unsigned long addr,
pgste_t pgste;
struct mm_struct *mm = vma->vm_mm;
- if (!MACHINE_HAS_NX)
- pte = clear_pte_bit(pte, __pgprot(_PAGE_NOEXEC));
if (mm_has_pgste(mm)) {
pgste = pgste_get(ptep);
pgste_set_key(ptep, pgste, pte, mm);
diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c
index 665b8228afeb..8ead999e340b 100644
--- a/arch/s390/mm/vmem.c
+++ b/arch/s390/mm/vmem.c
@@ -171,9 +171,6 @@ static int __ref modify_pte_table(pmd_t *pmd, unsigned long addr,
pte_t *pte;
prot = pgprot_val(PAGE_KERNEL);
- if (!MACHINE_HAS_NX)
- prot &= ~_PAGE_NOEXEC;
-
pte = pte_offset_kernel(pmd, addr);
for (; addr < end; addr += PAGE_SIZE, pte++) {
if (!add) {
@@ -230,9 +227,6 @@ static int __ref modify_pmd_table(pud_t *pud, unsigned long addr,
pte_t *pte;
prot = pgprot_val(SEGMENT_KERNEL);
- if (!MACHINE_HAS_NX)
- prot &= ~_SEGMENT_ENTRY_NOEXEC;
-
pmd = pmd_offset(pud, addr);
for (; addr < end; addr = next, pmd++) {
next = pmd_addr_end(addr, end);
@@ -324,8 +318,6 @@ static int modify_pud_table(p4d_t *p4d, unsigned long addr, unsigned long end,
pmd_t *pmd;
prot = pgprot_val(REGION3_KERNEL);
- if (!MACHINE_HAS_NX)
- prot &= ~_REGION_ENTRY_NOEXEC;
pud = pud_offset(p4d, addr);
for (; addr < end; addr = next, pud++) {
next = pud_addr_end(addr, end);
@@ -670,7 +662,7 @@ void __init vmem_map_init(void)
if (!static_key_enabled(&cpu_has_bear))
set_memory_x(0, 1);
if (debug_pagealloc_enabled())
- __set_memory_4k(__va(0), __va(0) + ident_map_size);
+ __set_memory_4k(__va(0), absolute_pointer(__va(0)) + ident_map_size);
pr_info("Write protected kernel read-only data: %luk\n",
(unsigned long)(__end_rodata - _stext) >> 10);
}
diff --git a/arch/s390/pci/Makefile b/arch/s390/pci/Makefile
index 2c21f0394c9a..df73c5182990 100644
--- a/arch/s390/pci/Makefile
+++ b/arch/s390/pci/Makefile
@@ -5,6 +5,6 @@
obj-$(CONFIG_PCI) += pci.o pci_irq.o pci_clp.o \
pci_event.o pci_debug.o pci_insn.o pci_mmio.o \
- pci_bus.o pci_kvm_hook.o
+ pci_bus.o pci_kvm_hook.o pci_report.o
obj-$(CONFIG_PCI_IOV) += pci_iov.o
obj-$(CONFIG_SYSFS) += pci_sysfs.o
diff --git a/arch/s390/pci/pci_bus.c b/arch/s390/pci/pci_bus.c
index d5ace00d10f0..39a481ec4a40 100644
--- a/arch/s390/pci/pci_bus.c
+++ b/arch/s390/pci/pci_bus.c
@@ -171,7 +171,6 @@ void zpci_bus_scan_busses(void)
static bool zpci_bus_is_multifunction_root(struct zpci_dev *zdev)
{
return !s390_pci_no_rid && zdev->rid_available &&
- zpci_is_device_configured(zdev) &&
!zdev->vfn;
}
@@ -332,6 +331,17 @@ error:
return rc;
}
+static bool zpci_bus_is_isolated_vf(struct zpci_bus *zbus, struct zpci_dev *zdev)
+{
+ struct pci_dev *pdev;
+
+ pdev = zpci_iov_find_parent_pf(zbus, zdev);
+ if (!pdev)
+ return true;
+ pci_dev_put(pdev);
+ return false;
+}
+
int zpci_bus_device_register(struct zpci_dev *zdev, struct pci_ops *ops)
{
bool topo_is_tid = zdev->tid_avail;
@@ -346,6 +356,15 @@ int zpci_bus_device_register(struct zpci_dev *zdev, struct pci_ops *ops)
topo = topo_is_tid ? zdev->tid : zdev->pchid;
zbus = zpci_bus_get(topo, topo_is_tid);
+ /*
+ * An isolated VF gets its own domain/bus even if there exists
+ * a matching domain/bus already
+ */
+ if (zbus && zpci_bus_is_isolated_vf(zbus, zdev)) {
+ zpci_bus_put(zbus);
+ zbus = NULL;
+ }
+
if (!zbus) {
zbus = zpci_bus_alloc(topo, topo_is_tid);
if (!zbus)
diff --git a/arch/s390/pci/pci_event.c b/arch/s390/pci/pci_event.c
index 7f7b732b3f3e..7bd7721c1239 100644
--- a/arch/s390/pci/pci_event.c
+++ b/arch/s390/pci/pci_event.c
@@ -16,6 +16,7 @@
#include <asm/sclp.h>
#include "pci_bus.h"
+#include "pci_report.h"
/* Content Code Description for PCI Function Error */
struct zpci_ccdf_err {
@@ -169,6 +170,8 @@ static pci_ers_result_t zpci_event_do_reset(struct pci_dev *pdev,
static pci_ers_result_t zpci_event_attempt_error_recovery(struct pci_dev *pdev)
{
pci_ers_result_t ers_res = PCI_ERS_RESULT_DISCONNECT;
+ struct zpci_dev *zdev = to_zpci(pdev);
+ char *status_str = "success";
struct pci_driver *driver;
/*
@@ -186,29 +189,37 @@ static pci_ers_result_t zpci_event_attempt_error_recovery(struct pci_dev *pdev)
if (is_passed_through(pdev)) {
pr_info("%s: Cannot be recovered in the host because it is a pass-through device\n",
pci_name(pdev));
+ status_str = "failed (pass-through)";
goto out_unlock;
}
driver = to_pci_driver(pdev->dev.driver);
if (!is_driver_supported(driver)) {
- if (!driver)
+ if (!driver) {
pr_info("%s: Cannot be recovered because no driver is bound to the device\n",
pci_name(pdev));
- else
+ status_str = "failed (no driver)";
+ } else {
pr_info("%s: The %s driver bound to the device does not support error recovery\n",
pci_name(pdev),
driver->name);
+ status_str = "failed (no driver support)";
+ }
goto out_unlock;
}
ers_res = zpci_event_notify_error_detected(pdev, driver);
- if (ers_result_indicates_abort(ers_res))
+ if (ers_result_indicates_abort(ers_res)) {
+ status_str = "failed (abort on detection)";
goto out_unlock;
+ }
if (ers_res == PCI_ERS_RESULT_CAN_RECOVER) {
ers_res = zpci_event_do_error_state_clear(pdev, driver);
- if (ers_result_indicates_abort(ers_res))
+ if (ers_result_indicates_abort(ers_res)) {
+ status_str = "failed (abort on MMIO enable)";
goto out_unlock;
+ }
}
if (ers_res == PCI_ERS_RESULT_NEED_RESET)
@@ -217,6 +228,7 @@ static pci_ers_result_t zpci_event_attempt_error_recovery(struct pci_dev *pdev)
if (ers_res != PCI_ERS_RESULT_RECOVERED) {
pr_err("%s: Automatic recovery failed; operator intervention is required\n",
pci_name(pdev));
+ status_str = "failed (driver can't recover)";
goto out_unlock;
}
@@ -225,6 +237,7 @@ static pci_ers_result_t zpci_event_attempt_error_recovery(struct pci_dev *pdev)
driver->err_handler->resume(pdev);
out_unlock:
pci_dev_unlock(pdev);
+ zpci_report_status(zdev, "recovery", status_str);
return ers_res;
}
diff --git a/arch/s390/pci/pci_iov.c b/arch/s390/pci/pci_iov.c
index ead062bf2b41..191e56a623f6 100644
--- a/arch/s390/pci/pci_iov.c
+++ b/arch/s390/pci/pci_iov.c
@@ -60,18 +60,35 @@ static int zpci_iov_link_virtfn(struct pci_dev *pdev, struct pci_dev *virtfn, in
return 0;
}
-int zpci_iov_setup_virtfn(struct zpci_bus *zbus, struct pci_dev *virtfn, int vfn)
+/**
+ * zpci_iov_find_parent_pf - Find the parent PF, if any, of the given function
+ * @zbus: The bus that the PCI function is on, or would be added on
+ * @zdev: The PCI function
+ *
+ * Finds the parent PF, if it exists and is configured, of the given PCI function
+ * and increments its refcount. Th PF is searched for on the provided bus so the
+ * caller has to ensure that this is the correct bus to search. This function may
+ * be used before adding the PCI function to a zbus.
+ *
+ * Return: Pointer to the struct pci_dev of the parent PF or NULL if it not
+ * found. If the function is not a VF or has no RequesterID information,
+ * NULL is returned as well.
+ */
+struct pci_dev *zpci_iov_find_parent_pf(struct zpci_bus *zbus, struct zpci_dev *zdev)
{
- int i, cand_devfn;
- struct zpci_dev *zdev;
+ int i, vfid, devfn, cand_devfn;
struct pci_dev *pdev;
- int vfid = vfn - 1; /* Linux' vfid's start at 0 vfn at 1*/
- int rc = 0;
if (!zbus->multifunction)
- return 0;
-
- /* If the parent PF for the given VF is also configured in the
+ return NULL;
+ /* Non-VFs and VFs without RID available don't have a parent */
+ if (!zdev->vfn || !zdev->rid_available)
+ return NULL;
+ /* Linux vfid starts at 0 vfn at 1 */
+ vfid = zdev->vfn - 1;
+ devfn = zdev->rid & ZPCI_RID_MASK_DEVFN;
+ /*
+ * If the parent PF for the given VF is also configured in the
* instance, it must be on the same zbus.
* We can then identify the parent PF by checking what
* devfn the VF would have if it belonged to that PF using the PF's
@@ -85,15 +102,26 @@ int zpci_iov_setup_virtfn(struct zpci_bus *zbus, struct pci_dev *virtfn, int vfn
if (!pdev)
continue;
cand_devfn = pci_iov_virtfn_devfn(pdev, vfid);
- if (cand_devfn == virtfn->devfn) {
- rc = zpci_iov_link_virtfn(pdev, virtfn, vfid);
- /* balance pci_get_slot() */
- pci_dev_put(pdev);
- break;
- }
+ if (cand_devfn == devfn)
+ return pdev;
/* balance pci_get_slot() */
pci_dev_put(pdev);
}
}
+ return NULL;
+}
+
+int zpci_iov_setup_virtfn(struct zpci_bus *zbus, struct pci_dev *virtfn, int vfn)
+{
+ struct zpci_dev *zdev = to_zpci(virtfn);
+ struct pci_dev *pdev_pf;
+ int rc = 0;
+
+ pdev_pf = zpci_iov_find_parent_pf(zbus, zdev);
+ if (pdev_pf) {
+ /* Linux' vfids start at 0 while zdev->vfn starts at 1 */
+ rc = zpci_iov_link_virtfn(pdev_pf, virtfn, zdev->vfn - 1);
+ pci_dev_put(pdev_pf);
+ }
return rc;
}
diff --git a/arch/s390/pci/pci_iov.h b/arch/s390/pci/pci_iov.h
index e3fa4e77fc86..d2c2793eb0f3 100644
--- a/arch/s390/pci/pci_iov.h
+++ b/arch/s390/pci/pci_iov.h
@@ -19,6 +19,8 @@ void zpci_iov_map_resources(struct pci_dev *pdev);
int zpci_iov_setup_virtfn(struct zpci_bus *zbus, struct pci_dev *virtfn, int vfn);
+struct pci_dev *zpci_iov_find_parent_pf(struct zpci_bus *zbus, struct zpci_dev *zdev);
+
#else /* CONFIG_PCI_IOV */
static inline void zpci_iov_remove_virtfn(struct pci_dev *pdev, int vfn) {}
@@ -28,5 +30,10 @@ static inline int zpci_iov_setup_virtfn(struct zpci_bus *zbus, struct pci_dev *v
{
return 0;
}
+
+static inline struct pci_dev *zpci_iov_find_parent_pf(struct zpci_bus *zbus, struct zpci_dev *zdev)
+{
+ return NULL;
+}
#endif /* CONFIG_PCI_IOV */
#endif /* __S390_PCI_IOV_h */
diff --git a/arch/s390/pci/pci_report.c b/arch/s390/pci/pci_report.c
new file mode 100644
index 000000000000..1b494e5ecc4d
--- /dev/null
+++ b/arch/s390/pci/pci_report.c
@@ -0,0 +1,158 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright IBM Corp. 2024
+ *
+ * Author(s):
+ * Niklas Schnelle <schnelle@linux.ibm.com>
+ *
+ */
+
+#define KMSG_COMPONENT "zpci"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
+#include <linux/kernel.h>
+#include <linux/sprintf.h>
+#include <linux/pci.h>
+
+#include <asm/sclp.h>
+#include <asm/debug.h>
+#include <asm/pci_debug.h>
+
+#include "pci_report.h"
+
+#define ZPCI_ERR_LOG_ID_KERNEL_REPORT 0x4714
+
+struct zpci_report_error_data {
+ u64 timestamp;
+ u64 err_log_id;
+ char log_data[];
+} __packed;
+
+#define ZPCI_REPORT_SIZE (PAGE_SIZE - sizeof(struct err_notify_sccb))
+#define ZPCI_REPORT_DATA_SIZE (ZPCI_REPORT_SIZE - sizeof(struct zpci_report_error_data))
+
+struct zpci_report_error {
+ struct zpci_report_error_header header;
+ struct zpci_report_error_data data;
+} __packed;
+
+static const char *zpci_state_str(pci_channel_state_t state)
+{
+ switch (state) {
+ case pci_channel_io_normal:
+ return "normal";
+ case pci_channel_io_frozen:
+ return "frozen";
+ case pci_channel_io_perm_failure:
+ return "permanent-failure";
+ default:
+ return "invalid";
+ };
+}
+
+static int debug_log_header_fn(debug_info_t *id, struct debug_view *view,
+ int area, debug_entry_t *entry, char *out_buf,
+ size_t out_buf_size)
+{
+ unsigned long sec, usec;
+ unsigned int level;
+ char *except_str;
+ int rc = 0;
+
+ level = entry->level;
+ sec = entry->clock;
+ usec = do_div(sec, USEC_PER_SEC);
+
+ if (entry->exception)
+ except_str = "*";
+ else
+ except_str = "-";
+ rc += scnprintf(out_buf, out_buf_size, "%011ld:%06lu %1u %1s %04u ",
+ sec, usec, level, except_str,
+ entry->cpu);
+ return rc;
+}
+
+static int debug_prolog_header(debug_info_t *id, struct debug_view *view,
+ char *out_buf, size_t out_buf_size)
+{
+ return scnprintf(out_buf, out_buf_size, "sec:usec level except cpu msg\n");
+}
+
+static struct debug_view debug_log_view = {
+ "pci_msg_log",
+ &debug_prolog_header,
+ &debug_log_header_fn,
+ &debug_sprintf_format_fn,
+ NULL,
+ NULL
+};
+
+/**
+ * zpci_report_status - Report the status of operations on a PCI device
+ * @zdev: The PCI device for which to report status
+ * @operation: A string representing the operation reported
+ * @status: A string representing the status of the operation
+ *
+ * This function creates a human readable report about an operation such as
+ * PCI device recovery and forwards this to the platform using the SCLP Write
+ * Event Data mechanism. Besides the operation and status strings the report
+ * also contains additional information about the device deemed useful for
+ * debug such as the currently bound device driver, if any, and error state.
+ * Additionally a string representation of pci_debug_msg_id, or as much as fits,
+ * is also included.
+ *
+ * Return: 0 on success an error code < 0 otherwise.
+ */
+int zpci_report_status(struct zpci_dev *zdev, const char *operation, const char *status)
+{
+ struct zpci_report_error *report;
+ struct pci_driver *driver = NULL;
+ struct pci_dev *pdev = NULL;
+ char *buf, *end;
+ int ret;
+
+ if (!zdev || !zdev->zbus)
+ return -ENODEV;
+
+ /* Protected virtualization hosts get nothing from us */
+ if (prot_virt_guest)
+ return -ENODATA;
+
+ report = (void *)get_zeroed_page(GFP_KERNEL);
+ if (!report)
+ return -ENOMEM;
+ if (zdev->zbus->bus)
+ pdev = pci_get_slot(zdev->zbus->bus, zdev->devfn);
+ if (pdev)
+ driver = to_pci_driver(pdev->dev.driver);
+
+ buf = report->data.log_data;
+ end = report->data.log_data + ZPCI_REPORT_DATA_SIZE;
+ buf += scnprintf(buf, end - buf, "report: %s\n", operation);
+ buf += scnprintf(buf, end - buf, "status: %s\n", status);
+ buf += scnprintf(buf, end - buf, "state: %s\n",
+ (pdev) ? zpci_state_str(pdev->error_state) : "n/a");
+ buf += scnprintf(buf, end - buf, "driver: %s\n", (driver) ? driver->name : "n/a");
+ ret = debug_dump(pci_debug_msg_id, &debug_log_view, buf, end - buf, true);
+ if (ret < 0)
+ pr_err("Reading PCI debug messages failed with code %d\n", ret);
+ else
+ buf += ret;
+
+ report->header.version = 1;
+ report->header.action = SCLP_ERRNOTIFY_AQ_INFO_LOG;
+ report->header.length = buf - (char *)&report->data;
+ report->data.timestamp = ktime_get_clocktai_seconds();
+ report->data.err_log_id = ZPCI_ERR_LOG_ID_KERNEL_REPORT;
+
+ ret = sclp_pci_report(&report->header, zdev->fh, zdev->fid);
+ if (ret)
+ pr_err("Reporting PCI status failed with code %d\n", ret);
+ else
+ pr_info("Reported PCI device status\n");
+
+ free_page((unsigned long)report);
+
+ return ret;
+}
diff --git a/arch/s390/pci/pci_report.h b/arch/s390/pci/pci_report.h
new file mode 100644
index 000000000000..e08003d51a97
--- /dev/null
+++ b/arch/s390/pci/pci_report.h
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright IBM Corp. 2024
+ *
+ * Author(s):
+ * Niklas Schnelle <schnelle@linux.ibm.com>
+ *
+ */
+#ifndef __S390_PCI_REPORT_H
+#define __S390_PCI_REPORT_H
+
+struct zpci_dev;
+
+int zpci_report_status(struct zpci_dev *zdev, const char *operation, const char *status);
+
+#endif /* __S390_PCI_REPORT_H */
diff --git a/arch/s390/pci/pci_sysfs.c b/arch/s390/pci/pci_sysfs.c
index 5f46ad58dcd1..2de1ea6c3a8c 100644
--- a/arch/s390/pci/pci_sysfs.c
+++ b/arch/s390/pci/pci_sysfs.c
@@ -135,7 +135,7 @@ out:
static DEVICE_ATTR_WO(recover);
static ssize_t util_string_read(struct file *filp, struct kobject *kobj,
- struct bin_attribute *attr, char *buf,
+ const struct bin_attribute *attr, char *buf,
loff_t off, size_t count)
{
struct device *dev = kobj_to_dev(kobj);
@@ -145,10 +145,10 @@ static ssize_t util_string_read(struct file *filp, struct kobject *kobj,
return memory_read_from_buffer(buf, count, &off, zdev->util_str,
sizeof(zdev->util_str));
}
-static BIN_ATTR_RO(util_string, CLP_UTIL_STR_LEN);
+static const BIN_ATTR_RO(util_string, CLP_UTIL_STR_LEN);
static ssize_t report_error_write(struct file *filp, struct kobject *kobj,
- struct bin_attribute *attr, char *buf,
+ const struct bin_attribute *attr, char *buf,
loff_t off, size_t count)
{
struct zpci_report_error_header *report = (void *) buf;
@@ -164,7 +164,7 @@ static ssize_t report_error_write(struct file *filp, struct kobject *kobj,
return ret ? ret : count;
}
-static BIN_ATTR(report_error, S_IWUSR, NULL, report_error_write, PAGE_SIZE);
+static const BIN_ATTR(report_error, S_IWUSR, NULL, report_error_write, PAGE_SIZE);
static ssize_t uid_is_unique_show(struct device *dev,
struct device_attribute *attr, char *buf)
@@ -203,7 +203,7 @@ const struct attribute_group zpci_ident_attr_group = {
.is_visible = zpci_index_is_visible,
};
-static struct bin_attribute *zpci_bin_attrs[] = {
+static const struct bin_attribute *const zpci_bin_attrs[] = {
&bin_attr_util_string,
&bin_attr_report_error,
NULL,
@@ -227,7 +227,7 @@ static struct attribute *zpci_dev_attrs[] = {
const struct attribute_group zpci_attr_group = {
.attrs = zpci_dev_attrs,
- .bin_attrs = zpci_bin_attrs,
+ .bin_attrs_new = zpci_bin_attrs,
};
static struct attribute *pfip_attrs[] = {
diff --git a/arch/s390/purgatory/Makefile b/arch/s390/purgatory/Makefile
index 24eccaa29337..bd39b36e7bd6 100644
--- a/arch/s390/purgatory/Makefile
+++ b/arch/s390/purgatory/Makefile
@@ -8,20 +8,22 @@ PURGATORY_OBJS = $(addprefix $(obj)/,$(purgatory-y))
$(obj)/sha256.o: $(srctree)/lib/crypto/sha256.c FORCE
$(call if_changed_rule,cc_o_c)
-CFLAGS_sha256.o := -D__DISABLE_EXPORTS -D__NO_FORTIFY
+CFLAGS_sha256.o := -D__NO_FORTIFY
$(obj)/mem.o: $(srctree)/arch/s390/lib/mem.S FORCE
$(call if_changed_rule,as_o_S)
-KBUILD_CFLAGS := -fno-strict-aliasing -Wall -Wstrict-prototypes
+KBUILD_CFLAGS := -std=gnu11 -fno-strict-aliasing -Wall -Wstrict-prototypes
KBUILD_CFLAGS += -Wno-pointer-sign -Wno-sign-compare
KBUILD_CFLAGS += -fno-zero-initialized-in-bss -fno-builtin -ffreestanding
KBUILD_CFLAGS += -Os -m64 -msoft-float -fno-common
KBUILD_CFLAGS += -fno-stack-protector
KBUILD_CFLAGS += -DDISABLE_BRANCH_PROFILING
+KBUILD_CFLAGS += -D__DISABLE_EXPORTS
KBUILD_CFLAGS += $(CLANG_FLAGS)
KBUILD_CFLAGS += $(call cc-option,-fno-PIE)
KBUILD_AFLAGS := $(filter-out -DCC_USING_EXPOLINE,$(KBUILD_AFLAGS))
+KBUILD_AFLAGS += -D__DISABLE_EXPORTS
# Since we link purgatory with -r unresolved symbols are not checked, so we
# also link a purgatory.chk binary without -r to check for unresolved symbols.
diff --git a/arch/s390/tools/gen_opcode_table.c b/arch/s390/tools/gen_opcode_table.c
index a1bc02b29c81..7d76c417f83f 100644
--- a/arch/s390/tools/gen_opcode_table.c
+++ b/arch/s390/tools/gen_opcode_table.c
@@ -201,6 +201,17 @@ static int cmp_long_insn(const void *a, const void *b)
return strcmp(((struct insn *)a)->name, ((struct insn *)b)->name);
}
+static void print_insn_name(const char *name)
+{
+ size_t i, len;
+
+ len = strlen(name);
+ printf("{");
+ for (i = 0; i < len; i++)
+ printf(" \'%c\',", name[i]);
+ printf(" }");
+}
+
static void print_long_insn(struct gen_opcode *desc)
{
struct insn *insn;
@@ -223,7 +234,9 @@ static void print_long_insn(struct gen_opcode *desc)
insn = &desc->insn[i];
if (insn->name_len < 6)
continue;
- printf("\t[LONG_INSN_%s] = \"%s\", \\\n", insn->upper, insn->name);
+ printf("\t[LONG_INSN_%s] = ", insn->upper);
+ print_insn_name(insn->name);
+ printf(", \\\n");
}
printf("}\n\n");
}
@@ -236,11 +249,13 @@ static void print_opcode(struct insn *insn, int nr)
if (insn->type->byte != 0)
opcode += 2;
printf("\t[%4d] = { .opfrag = 0x%s, .format = INSTR_%s, ", nr, opcode, insn->format);
- if (insn->name_len < 6)
- printf(".name = \"%s\" ", insn->name);
- else
- printf(".offset = LONG_INSN_%s ", insn->upper);
- printf("}, \\\n");
+ if (insn->name_len < 6) {
+ printf(".name = ");
+ print_insn_name(insn->name);
+ } else {
+ printf(".offset = LONG_INSN_%s", insn->upper);
+ }
+ printf(" }, \\\n");
}
static void add_to_group(struct gen_opcode *desc, struct insn *insn, int offset)