summaryrefslogtreecommitdiff
path: root/arch/s390
diff options
context:
space:
mode:
Diffstat (limited to 'arch/s390')
-rw-r--r--arch/s390/Kbuild1
-rw-r--r--arch/s390/Kconfig16
-rw-r--r--arch/s390/Kconfig.debug2
-rw-r--r--arch/s390/Makefile2
-rw-r--r--arch/s390/appldata/appldata_base.c11
-rw-r--r--arch/s390/appldata/appldata_mem.c4
-rw-r--r--arch/s390/appldata/appldata_net_sum.c4
-rw-r--r--arch/s390/appldata/appldata_os.c10
-rw-r--r--arch/s390/boot/ipl_parm.c2
-rw-r--r--arch/s390/boot/kaslr.c2
-rw-r--r--arch/s390/configs/debug_defconfig48
-rw-r--r--arch/s390/configs/defconfig46
-rw-r--r--arch/s390/configs/zfcpdump_defconfig6
-rw-r--r--arch/s390/crypto/prng.c18
-rw-r--r--arch/s390/crypto/sha1_s390.c12
-rw-r--r--arch/s390/include/asm/Kbuild1
-rw-r--r--arch/s390/include/asm/asm-const.h12
-rw-r--r--arch/s390/include/asm/atomic.h14
-rw-r--r--arch/s390/include/asm/bug.h2
-rw-r--r--arch/s390/include/asm/ccwdev.h5
-rw-r--r--arch/s390/include/asm/ccwgroup.h10
-rw-r--r--arch/s390/include/asm/checksum.h19
-rw-r--r--arch/s390/include/asm/chsc.h62
-rw-r--r--arch/s390/include/asm/debug.h15
-rw-r--r--arch/s390/include/asm/diag.h6
-rw-r--r--arch/s390/include/asm/extable.h52
-rw-r--r--arch/s390/include/asm/hugetlb.h10
-rw-r--r--arch/s390/include/asm/io.h2
-rw-r--r--arch/s390/include/asm/ipl.h11
-rw-r--r--arch/s390/include/asm/kasan.h2
-rw-r--r--arch/s390/include/asm/kvm_host.h20
-rw-r--r--arch/s390/include/asm/linkage.h35
-rw-r--r--arch/s390/include/asm/nmi.h2
-rw-r--r--arch/s390/include/asm/pci.h42
-rw-r--r--arch/s390/include/asm/pci_clp.h13
-rw-r--r--arch/s390/include/asm/pci_dma.h11
-rw-r--r--arch/s390/include/asm/percpu.h28
-rw-r--r--arch/s390/include/asm/pgtable.h59
-rw-r--r--arch/s390/include/asm/processor.h20
-rw-r--r--arch/s390/include/asm/ptrace.h5
-rw-r--r--arch/s390/include/asm/qdio.h33
-rw-r--r--arch/s390/include/asm/smp.h6
-rw-r--r--arch/s390/include/asm/syscall.h12
-rw-r--r--arch/s390/include/asm/syscall_wrapper.h6
-rw-r--r--arch/s390/include/asm/thread_info.h1
-rw-r--r--arch/s390/include/asm/timex.h5
-rw-r--r--arch/s390/include/asm/tlb.h1
-rw-r--r--arch/s390/include/asm/tlbflush.h2
-rw-r--r--arch/s390/include/asm/topology.h6
-rw-r--r--arch/s390/include/asm/uaccess.h4
-rw-r--r--arch/s390/include/asm/vdso.h1
-rw-r--r--arch/s390/include/uapi/asm/debug.h35
-rw-r--r--arch/s390/include/uapi/asm/ipl.h25
-rw-r--r--arch/s390/include/uapi/asm/kvm.h7
-rw-r--r--arch/s390/include/uapi/asm/zcrypt.h140
-rw-r--r--arch/s390/kernel/Makefile6
-rw-r--r--arch/s390/kernel/asm-offsets.c4
-rw-r--r--arch/s390/kernel/crash_dump.c6
-rw-r--r--arch/s390/kernel/debug.c79
-rw-r--r--arch/s390/kernel/dumpstack.c13
-rw-r--r--arch/s390/kernel/early.c2
-rw-r--r--arch/s390/kernel/entry.S470
-rw-r--r--arch/s390/kernel/entry.h1
-rw-r--r--arch/s390/kernel/ftrace.c20
-rw-r--r--arch/s390/kernel/idle.c20
-rw-r--r--arch/s390/kernel/ipl.c211
-rw-r--r--arch/s390/kernel/kprobes.c4
-rw-r--r--arch/s390/kernel/lgr.c2
-rw-r--r--arch/s390/kernel/machine_kexec.c2
-rw-r--r--arch/s390/kernel/module.c147
-rw-r--r--arch/s390/kernel/nmi.c23
-rw-r--r--arch/s390/kernel/numa.c (renamed from arch/s390/numa/numa.c)0
-rw-r--r--arch/s390/kernel/perf_cpum_cf_events.c4
-rw-r--r--arch/s390/kernel/perf_cpum_sf.c11
-rw-r--r--arch/s390/kernel/pgm_check.S2
-rw-r--r--arch/s390/kernel/process.c22
-rw-r--r--arch/s390/kernel/ptrace.c293
-rw-r--r--arch/s390/kernel/runtime_instr.c2
-rw-r--r--arch/s390/kernel/setup.c43
-rw-r--r--arch/s390/kernel/smp.c15
-rw-r--r--arch/s390/kernel/syscalls/syscall.tbl8
-rw-r--r--arch/s390/kernel/time.c56
-rw-r--r--arch/s390/kernel/topology.c6
-rw-r--r--arch/s390/kernel/traps.c7
-rw-r--r--arch/s390/kernel/uv.c12
-rw-r--r--arch/s390/kernel/vdso.c5
-rw-r--r--arch/s390/kernel/vdso64/Makefile10
-rw-r--r--arch/s390/kernel/vdso64/clock_getres.S10
-rw-r--r--arch/s390/kvm/Kconfig6
-rw-r--r--arch/s390/kvm/diag.c1
-rw-r--r--arch/s390/kvm/gaccess.c6
-rw-r--r--arch/s390/kvm/interrupt.c8
-rw-r--r--arch/s390/kvm/kvm-s390.c303
-rw-r--r--arch/s390/kvm/priv.c45
-rw-r--r--arch/s390/kvm/pv.c1
-rw-r--r--arch/s390/kvm/vsie.c7
-rw-r--r--arch/s390/lib/Makefile2
-rw-r--r--arch/s390/lib/delay.c4
-rw-r--r--arch/s390/lib/error-inject.c14
-rw-r--r--arch/s390/lib/test_unwind.c2
-rw-r--r--arch/s390/mm/cmm.c15
-rw-r--r--arch/s390/mm/dump_pagetables.c1
-rw-r--r--arch/s390/mm/extmem.c34
-rw-r--r--arch/s390/mm/fault.c61
-rw-r--r--arch/s390/mm/gmap.c119
-rw-r--r--arch/s390/mm/hugetlbpage.c26
-rw-r--r--arch/s390/mm/init.c4
-rw-r--r--arch/s390/mm/kasan_init.c2
-rw-r--r--arch/s390/mm/maccess.c19
-rw-r--r--arch/s390/mm/mmap.c1
-rw-r--r--arch/s390/mm/pageattr.c13
-rw-r--r--arch/s390/mm/pgalloc.c2
-rw-r--r--arch/s390/mm/pgtable.c2
-rw-r--r--arch/s390/mm/vmem.c705
-rw-r--r--arch/s390/net/bpf_jit_comp.c220
-rw-r--r--arch/s390/numa/Makefile2
-rw-r--r--arch/s390/pci/Makefile3
-rw-r--r--arch/s390/pci/pci.c237
-rw-r--r--arch/s390/pci/pci_bus.c336
-rw-r--r--arch/s390/pci/pci_bus.h44
-rw-r--r--arch/s390/pci/pci_clp.c8
-rw-r--r--arch/s390/pci/pci_event.c43
-rw-r--r--arch/s390/pci/pci_mmio.c24
-rw-r--r--arch/s390/pci/pci_sysfs.c4
124 files changed, 2745 insertions, 2021 deletions
diff --git a/arch/s390/Kbuild b/arch/s390/Kbuild
index e63940bb57cd..8b98c501142d 100644
--- a/arch/s390/Kbuild
+++ b/arch/s390/Kbuild
@@ -7,5 +7,4 @@ obj-$(CONFIG_S390_HYPFS_FS) += hypfs/
obj-$(CONFIG_APPLDATA_BASE) += appldata/
obj-y += net/
obj-$(CONFIG_PCI) += pci/
-obj-$(CONFIG_NUMA) += numa/
obj-$(CONFIG_ARCH_HAS_KEXEC_PURGATORY) += purgatory/
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index 2167bce993ff..b29fcc66ec39 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -30,7 +30,7 @@ config GENERIC_BUG_RELATIVE_POINTERS
def_bool y
config GENERIC_LOCKBREAK
- def_bool y if PREEMPTTION
+ def_bool y if PREEMPTION
config PGSTE
def_bool y if KVM
@@ -59,6 +59,7 @@ config KASAN_SHADOW_OFFSET
config S390
def_bool y
select ARCH_BINFMT_ELF_STATE
+ select ARCH_HAS_DEBUG_VM_PGTABLE
select ARCH_HAS_DEVMEM_IS_ALLOWED
select ARCH_HAS_ELF_RANDOMIZE
select ARCH_HAS_FORTIFY_SOURCE
@@ -101,7 +102,6 @@ config S390
select ARCH_INLINE_WRITE_UNLOCK_BH
select ARCH_INLINE_WRITE_UNLOCK_IRQ
select ARCH_INLINE_WRITE_UNLOCK_IRQRESTORE
- select ARCH_KEEP_MEMBLOCK
select ARCH_STACKWALK
select ARCH_SUPPORTS_ATOMIC_RMW
select ARCH_SUPPORTS_NUMA_BALANCING
@@ -112,6 +112,7 @@ config S390
select ARCH_WANT_IPC_PARSE_VERSION
select BUILDTIME_TABLE_SORT
select CLONE_BACKWARDS2
+ select DMA_OPS if PCI
select DYNAMIC_FTRACE if FUNCTION_TRACER
select GENERIC_CLOCKEVENTS
select GENERIC_CPU_AUTOPROBE
@@ -135,7 +136,6 @@ config S390
select HAVE_EBPF_JIT if PACK_STACK && HAVE_MARCH_Z196_FEATURES
select HAVE_CMPXCHG_DOUBLE
select HAVE_CMPXCHG_LOCAL
- select HAVE_COPY_THREAD_TLS
select HAVE_DEBUG_KMEMLEAK
select HAVE_DMA_CONTIGUOUS
select HAVE_DYNAMIC_FTRACE
@@ -144,6 +144,7 @@ config S390
select HAVE_EFFICIENT_UNALIGNED_ACCESS
select HAVE_FENTRY
select HAVE_FTRACE_MCOUNT_RECORD
+ select HAVE_FUNCTION_ERROR_INJECTION
select HAVE_FUNCTION_GRAPH_TRACER
select HAVE_FUNCTION_TRACER
select HAVE_FUTEX_CMPXCHG if FUTEX
@@ -162,7 +163,6 @@ config S390
select HAVE_LIVEPATCH
select HAVE_PERF_REGS
select HAVE_PERF_USER_STACK_DUMP
- select HAVE_MEMBLOCK_NODE_MAP
select HAVE_MEMBLOCK_PHYS_MAP
select MMU_GATHER_NO_GATHER
select HAVE_MOD_ARCH_SPECIFIC
@@ -462,6 +462,7 @@ config NUMA
config NODES_SHIFT
int
+ depends on NEED_MULTIPLE_NODES
default "1"
config SCHED_SMT
@@ -625,10 +626,6 @@ config ARCH_ENABLE_MEMORY_HOTREMOVE
config ARCH_ENABLE_SPLIT_PMD_PTLOCK
def_bool y
-config FORCE_MAX_ZONEORDER
- int
- default "9"
-
config MAX_PHYSMEM_BITS
int "Maximum size of supported physical memory in bits (42-53)"
range 42 53
@@ -696,7 +693,7 @@ menu "I/O subsystem"
config QDIO
def_tristate y
prompt "QDIO support"
- ---help---
+ help
This driver provides the Queued Direct I/O base support for
IBM System z.
@@ -768,6 +765,7 @@ config VFIO_AP
def_tristate n
prompt "VFIO support for AP devices"
depends on S390_AP_IOMMU && VFIO_MDEV_DEVICE && KVM
+ depends on ZCRYPT
help
This driver grants access to Adjunct Processor (AP) devices
via the VFIO mediated device interface.
diff --git a/arch/s390/Kconfig.debug b/arch/s390/Kconfig.debug
index 190527560b2c..761fe2b0b2f6 100644
--- a/arch/s390/Kconfig.debug
+++ b/arch/s390/Kconfig.debug
@@ -7,7 +7,7 @@ config S390_PTDUMP
bool "Export kernel pagetable layout to userspace via debugfs"
depends on DEBUG_KERNEL
select DEBUG_FS
- ---help---
+ help
Say Y here if you want to show the kernel pagetable layout in a
debugfs file. This information is only useful for kernel developers
who are working in architecture specific areas of the kernel.
diff --git a/arch/s390/Makefile b/arch/s390/Makefile
index 8dfa2cf1f05c..ba94b03c8b2f 100644
--- a/arch/s390/Makefile
+++ b/arch/s390/Makefile
@@ -27,7 +27,7 @@ KBUILD_CFLAGS_DECOMPRESSOR := $(CLANG_FLAGS) -m64 -O2
KBUILD_CFLAGS_DECOMPRESSOR += -DDISABLE_BRANCH_PROFILING -D__NO_FORTIFY
KBUILD_CFLAGS_DECOMPRESSOR += -fno-delete-null-pointer-checks -msoft-float
KBUILD_CFLAGS_DECOMPRESSOR += -fno-asynchronous-unwind-tables
-KBUILD_CFLAGS_DECOMPRESSOR += $(call cc-option,-ffreestanding)
+KBUILD_CFLAGS_DECOMPRESSOR += -ffreestanding
KBUILD_CFLAGS_DECOMPRESSOR += $(call cc-disable-warning, address-of-packed-member)
KBUILD_CFLAGS_DECOMPRESSOR += $(if $(CONFIG_DEBUG_INFO),-g)
KBUILD_CFLAGS_DECOMPRESSOR += $(if $(CONFIG_DEBUG_INFO_DWARF4), $(call cc-option, -gdwarf-4,))
diff --git a/arch/s390/appldata/appldata_base.c b/arch/s390/appldata/appldata_base.c
index aa738cad1338..d74a4c7d5df6 100644
--- a/arch/s390/appldata/appldata_base.c
+++ b/arch/s390/appldata/appldata_base.c
@@ -51,10 +51,9 @@ static struct platform_device *appldata_pdev;
*/
static const char appldata_proc_name[APPLDATA_PROC_NAME_LENGTH] = "appldata";
static int appldata_timer_handler(struct ctl_table *ctl, int write,
- void __user *buffer, size_t *lenp, loff_t *ppos);
+ void *buffer, size_t *lenp, loff_t *ppos);
static int appldata_interval_handler(struct ctl_table *ctl, int write,
- void __user *buffer,
- size_t *lenp, loff_t *ppos);
+ void *buffer, size_t *lenp, loff_t *ppos);
static struct ctl_table_header *appldata_sysctl_header;
static struct ctl_table appldata_table[] = {
@@ -217,7 +216,7 @@ static void __appldata_vtimer_setup(int cmd)
*/
static int
appldata_timer_handler(struct ctl_table *ctl, int write,
- void __user *buffer, size_t *lenp, loff_t *ppos)
+ void *buffer, size_t *lenp, loff_t *ppos)
{
int timer_active = appldata_timer_active;
int rc;
@@ -250,7 +249,7 @@ appldata_timer_handler(struct ctl_table *ctl, int write,
*/
static int
appldata_interval_handler(struct ctl_table *ctl, int write,
- void __user *buffer, size_t *lenp, loff_t *ppos)
+ void *buffer, size_t *lenp, loff_t *ppos)
{
int interval = appldata_interval;
int rc;
@@ -280,7 +279,7 @@ appldata_interval_handler(struct ctl_table *ctl, int write,
*/
static int
appldata_generic_handler(struct ctl_table *ctl, int write,
- void __user *buffer, size_t *lenp, loff_t *ppos)
+ void *buffer, size_t *lenp, loff_t *ppos)
{
struct appldata_ops *ops = NULL, *tmp_ops;
struct list_head *lh;
diff --git a/arch/s390/appldata/appldata_mem.c b/arch/s390/appldata/appldata_mem.c
index e68136c3c23a..21c3147bd92a 100644
--- a/arch/s390/appldata/appldata_mem.c
+++ b/arch/s390/appldata/appldata_mem.c
@@ -29,10 +29,6 @@
* the structure version (product ID, see appldata_base.c) needs to be changed
* as well and all documentation and z/VM applications using it must be
* updated.
- *
- * The record layout is documented in the Linux for zSeries Device Drivers
- * book:
- * http://oss.software.ibm.com/developerworks/opensource/linux390/index.shtml
*/
struct appldata_mem_data {
u64 timestamp;
diff --git a/arch/s390/appldata/appldata_net_sum.c b/arch/s390/appldata/appldata_net_sum.c
index 8bc14b0d1def..59c282ca002f 100644
--- a/arch/s390/appldata/appldata_net_sum.c
+++ b/arch/s390/appldata/appldata_net_sum.c
@@ -25,10 +25,6 @@
* This is accessed as binary data by z/VM. If changes to it can't be avoided,
* the structure version (product ID, see appldata_base.c) needs to be changed
* as well and all documentation and z/VM applications using it must be updated.
- *
- * The record layout is documented in the Linux for zSeries Device Drivers
- * book:
- * http://oss.software.ibm.com/developerworks/opensource/linux390/index.shtml
*/
struct appldata_net_sum_data {
u64 timestamp;
diff --git a/arch/s390/appldata/appldata_os.c b/arch/s390/appldata/appldata_os.c
index 8bf46d705957..a363d30ce739 100644
--- a/arch/s390/appldata/appldata_os.c
+++ b/arch/s390/appldata/appldata_os.c
@@ -32,10 +32,6 @@
* the structure version (product ID, see appldata_base.c) needs to be changed
* as well and all documentation and z/VM applications using it must be
* updated.
- *
- * The record layout is documented in the Linux for zSeries Device Drivers
- * book:
- * http://oss.software.ibm.com/developerworks/opensource/linux390/index.shtml
*/
struct appldata_os_per_cpu {
u32 per_cpu_user; /* timer ticks spent in user mode */
@@ -133,8 +129,7 @@ static void appldata_get_os_data(void *data)
os_data->nr_cpus = j;
- new_size = sizeof(struct appldata_os_data) +
- (os_data->nr_cpus * sizeof(struct appldata_os_per_cpu));
+ new_size = struct_size(os_data, os_cpu, os_data->nr_cpus);
if (ops.size != new_size) {
if (ops.active) {
rc = appldata_diag(APPLDATA_RECORD_OS_ID,
@@ -169,8 +164,7 @@ static int __init appldata_os_init(void)
{
int rc, max_size;
- max_size = sizeof(struct appldata_os_data) +
- (num_possible_cpus() * sizeof(struct appldata_os_per_cpu));
+ max_size = struct_size(appldata_os_data, os_cpu, num_possible_cpus());
if (max_size > APPLDATA_MAX_REC_SIZE) {
pr_err("Maximum OS record size %i exceeds the maximum "
"record size %i\n", max_size, APPLDATA_MAX_REC_SIZE);
diff --git a/arch/s390/boot/ipl_parm.c b/arch/s390/boot/ipl_parm.c
index 357adad991d2..8e222a666025 100644
--- a/arch/s390/boot/ipl_parm.c
+++ b/arch/s390/boot/ipl_parm.c
@@ -2,12 +2,12 @@
#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/ctype.h>
+#include <linux/pgtable.h>
#include <asm/ebcdic.h>
#include <asm/sclp.h>
#include <asm/sections.h>
#include <asm/boot_data.h>
#include <asm/facility.h>
-#include <asm/pgtable.h>
#include <asm/uv.h>
#include "boot.h"
diff --git a/arch/s390/boot/kaslr.c b/arch/s390/boot/kaslr.c
index 5591243d673e..d4442163ffa9 100644
--- a/arch/s390/boot/kaslr.c
+++ b/arch/s390/boot/kaslr.c
@@ -2,8 +2,8 @@
/*
* Copyright IBM Corp. 2019
*/
+#include <linux/pgtable.h>
#include <asm/mem_detect.h>
-#include <asm/pgtable.h>
#include <asm/cpacf.h>
#include <asm/timex.h>
#include <asm/sclp.h>
diff --git a/arch/s390/configs/debug_defconfig b/arch/s390/configs/debug_defconfig
index 46038bc58c9e..7228aabe9da6 100644
--- a/arch/s390/configs/debug_defconfig
+++ b/arch/s390/configs/debug_defconfig
@@ -1,5 +1,6 @@
CONFIG_SYSVIPC=y
CONFIG_POSIX_MQUEUE=y
+CONFIG_WATCH_QUEUE=y
CONFIG_AUDIT=y
CONFIG_NO_HZ_IDLE=y
CONFIG_HIGH_RES_TIMERS=y
@@ -14,7 +15,6 @@ CONFIG_IKCONFIG=y
CONFIG_IKCONFIG_PROC=y
CONFIG_NUMA_BALANCING=y
CONFIG_MEMCG=y
-CONFIG_MEMCG_SWAP=y
CONFIG_BLK_CGROUP=y
CONFIG_CFS_BANDWIDTH=y
CONFIG_RT_GROUP_SCHED=y
@@ -31,9 +31,9 @@ CONFIG_NAMESPACES=y
CONFIG_USER_NS=y
CONFIG_CHECKPOINT_RESTORE=y
CONFIG_SCHED_AUTOGROUP=y
-CONFIG_BLK_DEV_INITRD=y
CONFIG_EXPERT=y
# CONFIG_SYSFS_SYSCALL is not set
+CONFIG_BPF_LSM=y
CONFIG_BPF_SYSCALL=y
CONFIG_USERFAULTFD=y
# CONFIG_COMPAT_BRK is not set
@@ -51,14 +51,11 @@ CONFIG_CHSC_SCH=y
CONFIG_VFIO_CCW=m
CONFIG_VFIO_AP=m
CONFIG_CRASH_DUMP=y
-CONFIG_HIBERNATION=y
-CONFIG_PM_DEBUG=y
CONFIG_PROTECTED_VIRTUALIZATION_GUEST=y
CONFIG_CMM=m
CONFIG_APPLDATA_BASE=y
CONFIG_KVM=m
-CONFIG_VHOST_NET=m
-CONFIG_VHOST_VSOCK=m
+CONFIG_S390_UNWIND_SELFTEST=y
CONFIG_OPROFILE=m
CONFIG_KPROBES=y
CONFIG_JUMP_LABEL=y
@@ -77,6 +74,8 @@ CONFIG_BLK_DEV_THROTTLING=y
CONFIG_BLK_WBT=y
CONFIG_BLK_CGROUP_IOLATENCY=y
CONFIG_BLK_CGROUP_IOCOST=y
+CONFIG_BLK_INLINE_ENCRYPTION=y
+CONFIG_BLK_INLINE_ENCRYPTION_FALLBACK=y
CONFIG_PARTITION_ADVANCED=y
CONFIG_IBM_PARTITION=y
CONFIG_BSD_DISKLABEL=y
@@ -96,7 +95,6 @@ CONFIG_CMA_DEBUG=y
CONFIG_CMA_DEBUGFS=y
CONFIG_MEM_SOFT_DIRTY=y
CONFIG_ZSWAP=y
-CONFIG_ZBUD=m
CONFIG_ZSMALLOC=m
CONFIG_ZSMALLOC_STAT=y
CONFIG_DEFERRED_STRUCT_PAGE_INIT=y
@@ -130,6 +128,7 @@ CONFIG_SYN_COOKIES=y
CONFIG_NET_IPVTI=m
CONFIG_INET_AH=m
CONFIG_INET_ESP=m
+CONFIG_INET_ESPINTCP=y
CONFIG_INET_IPCOMP=m
CONFIG_INET_DIAG=m
CONFIG_INET_UDP_DIAG=m
@@ -144,6 +143,7 @@ CONFIG_TCP_CONG_ILLINOIS=m
CONFIG_IPV6_ROUTER_PREF=y
CONFIG_INET6_AH=m
CONFIG_INET6_ESP=m
+CONFIG_INET6_ESPINTCP=y
CONFIG_INET6_IPCOMP=m
CONFIG_IPV6_MIP6=m
CONFIG_IPV6_VTI=m
@@ -151,7 +151,10 @@ CONFIG_IPV6_SIT=m
CONFIG_IPV6_GRE=m
CONFIG_IPV6_MULTIPLE_TABLES=y
CONFIG_IPV6_SUBTREES=y
+CONFIG_IPV6_RPL_LWTUNNEL=y
+CONFIG_MPTCP=y
CONFIG_NETFILTER=y
+CONFIG_BRIDGE_NETFILTER=m
CONFIG_NF_CONNTRACK=m
CONFIG_NF_CONNTRACK_SECMARK=y
CONFIG_NF_CONNTRACK_EVENTS=y
@@ -317,6 +320,7 @@ CONFIG_L2TP_V3=y
CONFIG_L2TP_IP=m
CONFIG_L2TP_ETH=m
CONFIG_BRIDGE=m
+CONFIG_BRIDGE_MRP=y
CONFIG_VLAN_8021Q=m
CONFIG_VLAN_8021Q_GVRP=y
CONFIG_NET_SCHED=y
@@ -341,6 +345,7 @@ CONFIG_NET_SCH_CODEL=m
CONFIG_NET_SCH_FQ_CODEL=m
CONFIG_NET_SCH_INGRESS=m
CONFIG_NET_SCH_PLUG=m
+CONFIG_NET_SCH_ETS=m
CONFIG_NET_CLS_BASIC=m
CONFIG_NET_CLS_TCINDEX=m
CONFIG_NET_CLS_ROUTE4=m
@@ -364,6 +369,7 @@ CONFIG_NET_ACT_PEDIT=m
CONFIG_NET_ACT_SIMP=m
CONFIG_NET_ACT_SKBEDIT=m
CONFIG_NET_ACT_CSUM=m
+CONFIG_NET_ACT_GATE=m
CONFIG_DNS_RESOLVER=y
CONFIG_OPENVSWITCH=m
CONFIG_VSOCKETS=m
@@ -374,6 +380,7 @@ CONFIG_BPF_JIT=y
CONFIG_NET_PKTGEN=m
# CONFIG_NET_DROP_MONITOR is not set
CONFIG_PCI=y
+# CONFIG_PCIEASPM is not set
CONFIG_PCI_DEBUG=y
CONFIG_HOTPLUG_PCI=y
CONFIG_HOTPLUG_PCI_S390=y
@@ -435,6 +442,7 @@ CONFIG_DM_ZERO=m
CONFIG_DM_MULTIPATH=m
CONFIG_DM_MULTIPATH_QL=m
CONFIG_DM_MULTIPATH_ST=m
+CONFIG_DM_MULTIPATH_HST=m
CONFIG_DM_DELAY=m
CONFIG_DM_UEVENT=y
CONFIG_DM_FLAKEY=m
@@ -448,6 +456,8 @@ CONFIG_EQUALIZER=m
CONFIG_IFB=m
CONFIG_MACVLAN=m
CONFIG_MACVTAP=m
+CONFIG_VXLAN=m
+CONFIG_BAREUDP=m
CONFIG_TUN=m
CONFIG_VETH=m
CONFIG_VIRTIO_NET=m
@@ -481,7 +491,6 @@ CONFIG_NLMON=m
CONFIG_MLX4_EN=m
CONFIG_MLX5_CORE=m
CONFIG_MLX5_CORE_EN=y
-# CONFIG_MLXFW is not set
# CONFIG_NET_VENDOR_MICREL is not set
# CONFIG_NET_VENDOR_MICROCHIP is not set
# CONFIG_NET_VENDOR_MICROSEMI is not set
@@ -514,6 +523,7 @@ CONFIG_MLX5_CORE_EN=y
# CONFIG_NET_VENDOR_TI is not set
# CONFIG_NET_VENDOR_VIA is not set
# CONFIG_NET_VENDOR_WIZNET is not set
+# CONFIG_NET_VENDOR_XILINX is not set
CONFIG_PPP=m
CONFIG_PPP_BSDCOMP=m
CONFIG_PPP_DEFLATE=m
@@ -561,6 +571,8 @@ CONFIG_VFIO_MDEV_DEVICE=m
CONFIG_VIRTIO_PCI=m
CONFIG_VIRTIO_BALLOON=m
CONFIG_VIRTIO_INPUT=y
+CONFIG_VHOST_NET=m
+CONFIG_VHOST_VSOCK=m
CONFIG_S390_CCW_IOMMU=y
CONFIG_S390_AP_IOMMU=y
CONFIG_EXT4_FS=y
@@ -608,11 +620,13 @@ CONFIG_ZISOFS=y
CONFIG_UDF_FS=m
CONFIG_MSDOS_FS=m
CONFIG_VFAT_FS=m
+CONFIG_EXFAT_FS=m
CONFIG_NTFS_FS=m
CONFIG_NTFS_RW=y
CONFIG_PROC_KCORE=y
CONFIG_TMPFS=y
CONFIG_TMPFS_POSIX_ACL=y
+CONFIG_TMPFS_INODE64=y
CONFIG_HUGETLBFS=y
CONFIG_CONFIGFS_FS=m
CONFIG_ECRYPT_FS=m
@@ -650,8 +664,8 @@ CONFIG_NLS_UTF8=m
CONFIG_DLM=m
CONFIG_UNICODE=y
CONFIG_PERSISTENT_KEYRINGS=y
-CONFIG_BIG_KEYS=y
CONFIG_ENCRYPTED_KEYS=m
+CONFIG_KEY_NOTIFICATIONS=y
CONFIG_SECURITY=y
CONFIG_SECURITY_NETWORK=y
CONFIG_FORTIFY_SOURCE=y
@@ -675,8 +689,11 @@ CONFIG_CRYPTO_TEST=m
CONFIG_CRYPTO_DH=m
CONFIG_CRYPTO_ECDH=m
CONFIG_CRYPTO_ECRDSA=m
+CONFIG_CRYPTO_CURVE25519=m
+CONFIG_CRYPTO_GCM=y
CONFIG_CRYPTO_CHACHA20POLY1305=m
CONFIG_CRYPTO_AEGIS128=m
+CONFIG_CRYPTO_SEQIV=y
CONFIG_CRYPTO_CFB=m
CONFIG_CRYPTO_LRW=m
CONFIG_CRYPTO_PCBC=m
@@ -685,6 +702,7 @@ CONFIG_CRYPTO_ADIANTUM=m
CONFIG_CRYPTO_XCBC=m
CONFIG_CRYPTO_VMAC=m
CONFIG_CRYPTO_CRC32=m
+CONFIG_CRYPTO_BLAKE2S=m
CONFIG_CRYPTO_MICHAEL_MIC=m
CONFIG_CRYPTO_RMD128=m
CONFIG_CRYPTO_RMD160=m
@@ -701,6 +719,7 @@ CONFIG_CRYPTO_BLOWFISH=m
CONFIG_CRYPTO_CAMELLIA=m
CONFIG_CRYPTO_CAST5=m
CONFIG_CRYPTO_CAST6=m
+CONFIG_CRYPTO_DES=m
CONFIG_CRYPTO_FCRYPT=m
CONFIG_CRYPTO_KHAZAD=m
CONFIG_CRYPTO_SALSA20=m
@@ -719,6 +738,9 @@ CONFIG_CRYPTO_USER_API_SKCIPHER=m
CONFIG_CRYPTO_USER_API_RNG=m
CONFIG_CRYPTO_USER_API_AEAD=m
CONFIG_CRYPTO_STATS=y
+CONFIG_CRYPTO_LIB_BLAKE2S=m
+CONFIG_CRYPTO_LIB_CURVE25519=m
+CONFIG_CRYPTO_LIB_CHACHA20POLY1305=m
CONFIG_ZCRYPT=m
CONFIG_PKEY=m
CONFIG_CRYPTO_PAES_S390=m
@@ -774,6 +796,7 @@ CONFIG_DEBUG_SHIRQ=y
CONFIG_PANIC_ON_OOPS=y
CONFIG_DETECT_HUNG_TASK=y
CONFIG_WQ_WATCHDOG=y
+CONFIG_TEST_LOCKUP=m
CONFIG_DEBUG_TIMEKEEPING=y
CONFIG_PROVE_LOCKING=y
CONFIG_LOCK_STAT=y
@@ -785,8 +808,11 @@ CONFIG_DEBUG_NOTIFIERS=y
CONFIG_BUG_ON_DATA_CORRUPTION=y
CONFIG_DEBUG_CREDENTIALS=y
CONFIG_RCU_TORTURE_TEST=m
+CONFIG_RCU_REF_SCALE_TEST=m
CONFIG_RCU_CPU_STALL_TIMEOUT=300
+# CONFIG_RCU_TRACE is not set
CONFIG_LATENCYTOP=y
+CONFIG_BOOTTIME_TRACING=y
CONFIG_FUNCTION_PROFILER=y
CONFIG_STACK_TRACER=y
CONFIG_IRQSOFF_TRACER=y
@@ -794,6 +820,7 @@ CONFIG_PREEMPT_TRACER=y
CONFIG_SCHED_TRACER=y
CONFIG_FTRACE_SYSCALLS=y
CONFIG_BLK_DEV_IO_TRACE=y
+CONFIG_BPF_KPROBE_OVERRIDE=y
CONFIG_HIST_TRIGGERS=y
CONFIG_S390_PTDUMP=y
CONFIG_NOTIFIER_ERROR_INJECTION=m
@@ -805,13 +832,16 @@ CONFIG_FAIL_MAKE_REQUEST=y
CONFIG_FAIL_IO_TIMEOUT=y
CONFIG_FAIL_FUTEX=y
CONFIG_FAULT_INJECTION_DEBUG_FS=y
+CONFIG_FAIL_FUNCTION=y
CONFIG_FAULT_INJECTION_STACKTRACE_FILTER=y
CONFIG_LKDTM=m
CONFIG_TEST_LIST_SORT=y
+CONFIG_TEST_MIN_HEAP=y
CONFIG_TEST_SORT=y
CONFIG_KPROBES_SANITY_TEST=y
CONFIG_RBTREE_TEST=y
CONFIG_INTERVAL_TREE_TEST=m
CONFIG_PERCPU_TEST=m
CONFIG_ATOMIC64_SELFTEST=y
+CONFIG_TEST_BITOPS=m
CONFIG_TEST_BPF=m
diff --git a/arch/s390/configs/defconfig b/arch/s390/configs/defconfig
index 7cd0648c1f4e..fab03b7a6932 100644
--- a/arch/s390/configs/defconfig
+++ b/arch/s390/configs/defconfig
@@ -1,5 +1,6 @@
CONFIG_SYSVIPC=y
CONFIG_POSIX_MQUEUE=y
+CONFIG_WATCH_QUEUE=y
CONFIG_AUDIT=y
CONFIG_NO_HZ_IDLE=y
CONFIG_HIGH_RES_TIMERS=y
@@ -13,7 +14,6 @@ CONFIG_IKCONFIG=y
CONFIG_IKCONFIG_PROC=y
CONFIG_NUMA_BALANCING=y
CONFIG_MEMCG=y
-CONFIG_MEMCG_SWAP=y
CONFIG_BLK_CGROUP=y
CONFIG_CFS_BANDWIDTH=y
CONFIG_RT_GROUP_SCHED=y
@@ -30,9 +30,9 @@ CONFIG_NAMESPACES=y
CONFIG_USER_NS=y
CONFIG_CHECKPOINT_RESTORE=y
CONFIG_SCHED_AUTOGROUP=y
-CONFIG_BLK_DEV_INITRD=y
CONFIG_EXPERT=y
# CONFIG_SYSFS_SYSCALL is not set
+CONFIG_BPF_LSM=y
CONFIG_BPF_SYSCALL=y
CONFIG_USERFAULTFD=y
# CONFIG_COMPAT_BRK is not set
@@ -41,7 +41,6 @@ CONFIG_LIVEPATCH=y
CONFIG_TUNE_ZEC12=y
CONFIG_NR_CPUS=512
CONFIG_NUMA=y
-# CONFIG_NUMA_EMU is not set
CONFIG_HZ_100=y
CONFIG_KEXEC_FILE=y
CONFIG_KEXEC_SIG=y
@@ -51,14 +50,11 @@ CONFIG_CHSC_SCH=y
CONFIG_VFIO_CCW=m
CONFIG_VFIO_AP=m
CONFIG_CRASH_DUMP=y
-CONFIG_HIBERNATION=y
-CONFIG_PM_DEBUG=y
CONFIG_PROTECTED_VIRTUALIZATION_GUEST=y
CONFIG_CMM=m
CONFIG_APPLDATA_BASE=y
CONFIG_KVM=m
-CONFIG_VHOST_NET=m
-CONFIG_VHOST_VSOCK=m
+CONFIG_S390_UNWIND_SELFTEST=m
CONFIG_OPROFILE=m
CONFIG_KPROBES=y
CONFIG_JUMP_LABEL=y
@@ -74,6 +70,8 @@ CONFIG_BLK_DEV_THROTTLING=y
CONFIG_BLK_WBT=y
CONFIG_BLK_CGROUP_IOLATENCY=y
CONFIG_BLK_CGROUP_IOCOST=y
+CONFIG_BLK_INLINE_ENCRYPTION=y
+CONFIG_BLK_INLINE_ENCRYPTION_FALLBACK=y
CONFIG_PARTITION_ADVANCED=y
CONFIG_IBM_PARTITION=y
CONFIG_BSD_DISKLABEL=y
@@ -91,7 +89,6 @@ CONFIG_CLEANCACHE=y
CONFIG_FRONTSWAP=y
CONFIG_MEM_SOFT_DIRTY=y
CONFIG_ZSWAP=y
-CONFIG_ZBUD=m
CONFIG_ZSMALLOC=m
CONFIG_ZSMALLOC_STAT=y
CONFIG_DEFERRED_STRUCT_PAGE_INIT=y
@@ -125,6 +122,7 @@ CONFIG_SYN_COOKIES=y
CONFIG_NET_IPVTI=m
CONFIG_INET_AH=m
CONFIG_INET_ESP=m
+CONFIG_INET_ESPINTCP=y
CONFIG_INET_IPCOMP=m
CONFIG_INET_DIAG=m
CONFIG_INET_UDP_DIAG=m
@@ -139,6 +137,7 @@ CONFIG_TCP_CONG_ILLINOIS=m
CONFIG_IPV6_ROUTER_PREF=y
CONFIG_INET6_AH=m
CONFIG_INET6_ESP=m
+CONFIG_INET6_ESPINTCP=y
CONFIG_INET6_IPCOMP=m
CONFIG_IPV6_MIP6=m
CONFIG_IPV6_VTI=m
@@ -146,7 +145,10 @@ CONFIG_IPV6_SIT=m
CONFIG_IPV6_GRE=m
CONFIG_IPV6_MULTIPLE_TABLES=y
CONFIG_IPV6_SUBTREES=y
+CONFIG_IPV6_RPL_LWTUNNEL=y
+CONFIG_MPTCP=y
CONFIG_NETFILTER=y
+CONFIG_BRIDGE_NETFILTER=m
CONFIG_NF_CONNTRACK=m
CONFIG_NF_CONNTRACK_SECMARK=y
CONFIG_NF_CONNTRACK_EVENTS=y
@@ -311,6 +313,7 @@ CONFIG_L2TP_V3=y
CONFIG_L2TP_IP=m
CONFIG_L2TP_ETH=m
CONFIG_BRIDGE=m
+CONFIG_BRIDGE_MRP=y
CONFIG_VLAN_8021Q=m
CONFIG_VLAN_8021Q_GVRP=y
CONFIG_NET_SCHED=y
@@ -335,6 +338,7 @@ CONFIG_NET_SCH_CODEL=m
CONFIG_NET_SCH_FQ_CODEL=m
CONFIG_NET_SCH_INGRESS=m
CONFIG_NET_SCH_PLUG=m
+CONFIG_NET_SCH_ETS=m
CONFIG_NET_CLS_BASIC=m
CONFIG_NET_CLS_TCINDEX=m
CONFIG_NET_CLS_ROUTE4=m
@@ -358,6 +362,7 @@ CONFIG_NET_ACT_PEDIT=m
CONFIG_NET_ACT_SIMP=m
CONFIG_NET_ACT_SKBEDIT=m
CONFIG_NET_ACT_CSUM=m
+CONFIG_NET_ACT_GATE=m
CONFIG_DNS_RESOLVER=y
CONFIG_OPENVSWITCH=m
CONFIG_VSOCKETS=m
@@ -368,6 +373,7 @@ CONFIG_BPF_JIT=y
CONFIG_NET_PKTGEN=m
# CONFIG_NET_DROP_MONITOR is not set
CONFIG_PCI=y
+# CONFIG_PCIEASPM is not set
CONFIG_HOTPLUG_PCI=y
CONFIG_HOTPLUG_PCI_S390=y
CONFIG_UEVENT_HELPER=y
@@ -430,6 +436,7 @@ CONFIG_DM_ZERO=m
CONFIG_DM_MULTIPATH=m
CONFIG_DM_MULTIPATH_QL=m
CONFIG_DM_MULTIPATH_ST=m
+CONFIG_DM_MULTIPATH_HST=m
CONFIG_DM_DELAY=m
CONFIG_DM_UEVENT=y
CONFIG_DM_FLAKEY=m
@@ -444,6 +451,8 @@ CONFIG_EQUALIZER=m
CONFIG_IFB=m
CONFIG_MACVLAN=m
CONFIG_MACVTAP=m
+CONFIG_VXLAN=m
+CONFIG_BAREUDP=m
CONFIG_TUN=m
CONFIG_VETH=m
CONFIG_VIRTIO_NET=m
@@ -477,7 +486,6 @@ CONFIG_NLMON=m
CONFIG_MLX4_EN=m
CONFIG_MLX5_CORE=m
CONFIG_MLX5_CORE_EN=y
-# CONFIG_MLXFW is not set
# CONFIG_NET_VENDOR_MICREL is not set
# CONFIG_NET_VENDOR_MICROCHIP is not set
# CONFIG_NET_VENDOR_MICROSEMI is not set
@@ -510,6 +518,7 @@ CONFIG_MLX5_CORE_EN=y
# CONFIG_NET_VENDOR_TI is not set
# CONFIG_NET_VENDOR_VIA is not set
# CONFIG_NET_VENDOR_WIZNET is not set
+# CONFIG_NET_VENDOR_XILINX is not set
CONFIG_PPP=m
CONFIG_PPP_BSDCOMP=m
CONFIG_PPP_DEFLATE=m
@@ -557,6 +566,8 @@ CONFIG_VFIO_MDEV_DEVICE=m
CONFIG_VIRTIO_PCI=m
CONFIG_VIRTIO_BALLOON=m
CONFIG_VIRTIO_INPUT=y
+CONFIG_VHOST_NET=m
+CONFIG_VHOST_VSOCK=m
CONFIG_S390_CCW_IOMMU=y
CONFIG_S390_AP_IOMMU=y
CONFIG_EXT4_FS=y
@@ -600,11 +611,13 @@ CONFIG_ZISOFS=y
CONFIG_UDF_FS=m
CONFIG_MSDOS_FS=m
CONFIG_VFAT_FS=m
+CONFIG_EXFAT_FS=m
CONFIG_NTFS_FS=m
CONFIG_NTFS_RW=y
CONFIG_PROC_KCORE=y
CONFIG_TMPFS=y
CONFIG_TMPFS_POSIX_ACL=y
+CONFIG_TMPFS_INODE64=y
CONFIG_HUGETLBFS=y
CONFIG_CONFIGFS_FS=m
CONFIG_ECRYPT_FS=m
@@ -642,8 +655,8 @@ CONFIG_NLS_UTF8=m
CONFIG_DLM=m
CONFIG_UNICODE=y
CONFIG_PERSISTENT_KEYRINGS=y
-CONFIG_BIG_KEYS=y
CONFIG_ENCRYPTED_KEYS=m
+CONFIG_KEY_NOTIFICATIONS=y
CONFIG_SECURITY=y
CONFIG_SECURITY_NETWORK=y
CONFIG_SECURITY_SELINUX=y
@@ -667,8 +680,11 @@ CONFIG_CRYPTO_TEST=m
CONFIG_CRYPTO_DH=m
CONFIG_CRYPTO_ECDH=m
CONFIG_CRYPTO_ECRDSA=m
+CONFIG_CRYPTO_CURVE25519=m
+CONFIG_CRYPTO_GCM=y
CONFIG_CRYPTO_CHACHA20POLY1305=m
CONFIG_CRYPTO_AEGIS128=m
+CONFIG_CRYPTO_SEQIV=y
CONFIG_CRYPTO_CFB=m
CONFIG_CRYPTO_LRW=m
CONFIG_CRYPTO_OFB=m
@@ -678,6 +694,7 @@ CONFIG_CRYPTO_ADIANTUM=m
CONFIG_CRYPTO_XCBC=m
CONFIG_CRYPTO_VMAC=m
CONFIG_CRYPTO_CRC32=m
+CONFIG_CRYPTO_BLAKE2S=m
CONFIG_CRYPTO_MICHAEL_MIC=m
CONFIG_CRYPTO_RMD128=m
CONFIG_CRYPTO_RMD160=m
@@ -694,6 +711,7 @@ CONFIG_CRYPTO_BLOWFISH=m
CONFIG_CRYPTO_CAMELLIA=m
CONFIG_CRYPTO_CAST5=m
CONFIG_CRYPTO_CAST6=m
+CONFIG_CRYPTO_DES=m
CONFIG_CRYPTO_FCRYPT=m
CONFIG_CRYPTO_KHAZAD=m
CONFIG_CRYPTO_SALSA20=m
@@ -712,6 +730,9 @@ CONFIG_CRYPTO_USER_API_SKCIPHER=m
CONFIG_CRYPTO_USER_API_RNG=m
CONFIG_CRYPTO_USER_API_AEAD=m
CONFIG_CRYPTO_STATS=y
+CONFIG_CRYPTO_LIB_BLAKE2S=m
+CONFIG_CRYPTO_LIB_CURVE25519=m
+CONFIG_CRYPTO_LIB_CHACHA20POLY1305=m
CONFIG_ZCRYPT=m
CONFIG_PKEY=m
CONFIG_CRYPTO_PAES_S390=m
@@ -725,6 +746,7 @@ CONFIG_CRYPTO_AES_S390=m
CONFIG_CRYPTO_GHASH_S390=m
CONFIG_CRYPTO_CRC32_S390=y
CONFIG_CORDIC=m
+CONFIG_PRIME_NUMBERS=m
CONFIG_CRC4=m
CONFIG_CRC7=m
CONFIG_CRC8=m
@@ -739,15 +761,19 @@ CONFIG_DEBUG_SECTION_MISMATCH=y
CONFIG_MAGIC_SYSRQ=y
CONFIG_DEBUG_MEMORY_INIT=y
CONFIG_PANIC_ON_OOPS=y
+CONFIG_TEST_LOCKUP=m
CONFIG_BUG_ON_DATA_CORRUPTION=y
CONFIG_RCU_TORTURE_TEST=m
+CONFIG_RCU_REF_SCALE_TEST=m
CONFIG_RCU_CPU_STALL_TIMEOUT=60
CONFIG_LATENCYTOP=y
+CONFIG_BOOTTIME_TRACING=y
CONFIG_FUNCTION_PROFILER=y
CONFIG_STACK_TRACER=y
CONFIG_SCHED_TRACER=y
CONFIG_FTRACE_SYSCALLS=y
CONFIG_BLK_DEV_IO_TRACE=y
+CONFIG_BPF_KPROBE_OVERRIDE=y
CONFIG_HIST_TRIGGERS=y
CONFIG_S390_PTDUMP=y
CONFIG_LKDTM=m
diff --git a/arch/s390/configs/zfcpdump_defconfig b/arch/s390/configs/zfcpdump_defconfig
index 20c51e5d9353..8f67c55625f9 100644
--- a/arch/s390/configs/zfcpdump_defconfig
+++ b/arch/s390/configs/zfcpdump_defconfig
@@ -30,6 +30,7 @@ CONFIG_IBM_PARTITION=y
# CONFIG_BOUNCE is not set
CONFIG_NET=y
# CONFIG_IUCV is not set
+# CONFIG_ETHTOOL_NETLINK is not set
CONFIG_DEVTMPFS=y
CONFIG_BLK_DEV_RAM=y
# CONFIG_BLK_DEV_XPRAM is not set
@@ -55,6 +56,8 @@ CONFIG_RAW_DRIVER=y
# CONFIG_MONWRITER is not set
# CONFIG_S390_VMUR is not set
# CONFIG_HID is not set
+# CONFIG_VIRTIO_MENU is not set
+# CONFIG_VHOST_MENU is not set
# CONFIG_IOMMU_SUPPORT is not set
# CONFIG_DNOTIFY is not set
# CONFIG_INOTIFY_USER is not set
@@ -62,12 +65,15 @@ CONFIG_CONFIGFS_FS=y
# CONFIG_MISC_FILESYSTEMS is not set
# CONFIG_NETWORK_FILESYSTEMS is not set
CONFIG_LSM="yama,loadpin,safesetid,integrity"
+# CONFIG_ZLIB_DFLTCC is not set
CONFIG_PRINTK_TIME=y
+# CONFIG_SYMBOLIC_ERRNAME is not set
CONFIG_DEBUG_INFO=y
CONFIG_DEBUG_FS=y
CONFIG_DEBUG_KERNEL=y
CONFIG_PANIC_ON_OOPS=y
# CONFIG_SCHED_DEBUG is not set
CONFIG_RCU_CPU_STALL_TIMEOUT=60
+# CONFIG_RCU_TRACE is not set
# CONFIG_FTRACE is not set
# CONFIG_RUNTIME_TESTING_MENU is not set
diff --git a/arch/s390/crypto/prng.c b/arch/s390/crypto/prng.c
index d977643fa627..5057773f82e9 100644
--- a/arch/s390/crypto/prng.c
+++ b/arch/s390/crypto/prng.c
@@ -249,7 +249,7 @@ static void prng_tdes_deinstantiate(void)
{
pr_debug("The prng module stopped "
"after running in triple DES mode\n");
- kzfree(prng_data);
+ kfree_sensitive(prng_data);
}
@@ -442,7 +442,7 @@ outfree:
static void prng_sha512_deinstantiate(void)
{
pr_debug("The prng module stopped after running in SHA-512 mode\n");
- kzfree(prng_data);
+ kfree_sensitive(prng_data);
}
@@ -693,7 +693,7 @@ static ssize_t prng_chunksize_show(struct device *dev,
struct device_attribute *attr,
char *buf)
{
- return snprintf(buf, PAGE_SIZE, "%u\n", prng_chunk_size);
+ return scnprintf(buf, PAGE_SIZE, "%u\n", prng_chunk_size);
}
static DEVICE_ATTR(chunksize, 0444, prng_chunksize_show, NULL);
@@ -712,7 +712,7 @@ static ssize_t prng_counter_show(struct device *dev,
counter = prng_data->prngws.byte_counter;
mutex_unlock(&prng_data->mutex);
- return snprintf(buf, PAGE_SIZE, "%llu\n", counter);
+ return scnprintf(buf, PAGE_SIZE, "%llu\n", counter);
}
static DEVICE_ATTR(byte_counter, 0444, prng_counter_show, NULL);
@@ -721,7 +721,7 @@ static ssize_t prng_errorflag_show(struct device *dev,
struct device_attribute *attr,
char *buf)
{
- return snprintf(buf, PAGE_SIZE, "%d\n", prng_errorflag);
+ return scnprintf(buf, PAGE_SIZE, "%d\n", prng_errorflag);
}
static DEVICE_ATTR(errorflag, 0444, prng_errorflag_show, NULL);
@@ -731,9 +731,9 @@ static ssize_t prng_mode_show(struct device *dev,
char *buf)
{
if (prng_mode == PRNG_MODE_TDES)
- return snprintf(buf, PAGE_SIZE, "TDES\n");
+ return scnprintf(buf, PAGE_SIZE, "TDES\n");
else
- return snprintf(buf, PAGE_SIZE, "SHA512\n");
+ return scnprintf(buf, PAGE_SIZE, "SHA512\n");
}
static DEVICE_ATTR(mode, 0444, prng_mode_show, NULL);
@@ -756,7 +756,7 @@ static ssize_t prng_reseed_limit_show(struct device *dev,
struct device_attribute *attr,
char *buf)
{
- return snprintf(buf, PAGE_SIZE, "%u\n", prng_reseed_limit);
+ return scnprintf(buf, PAGE_SIZE, "%u\n", prng_reseed_limit);
}
static ssize_t prng_reseed_limit_store(struct device *dev,
struct device_attribute *attr,
@@ -787,7 +787,7 @@ static ssize_t prng_strength_show(struct device *dev,
struct device_attribute *attr,
char *buf)
{
- return snprintf(buf, PAGE_SIZE, "256\n");
+ return scnprintf(buf, PAGE_SIZE, "256\n");
}
static DEVICE_ATTR(strength, 0444, prng_strength_show, NULL);
diff --git a/arch/s390/crypto/sha1_s390.c b/arch/s390/crypto/sha1_s390.c
index 7c15542d3685..698b1e6d3c14 100644
--- a/arch/s390/crypto/sha1_s390.c
+++ b/arch/s390/crypto/sha1_s390.c
@@ -27,7 +27,7 @@
#include "sha.h"
-static int sha1_init(struct shash_desc *desc)
+static int s390_sha1_init(struct shash_desc *desc)
{
struct s390_sha_ctx *sctx = shash_desc_ctx(desc);
@@ -42,7 +42,7 @@ static int sha1_init(struct shash_desc *desc)
return 0;
}
-static int sha1_export(struct shash_desc *desc, void *out)
+static int s390_sha1_export(struct shash_desc *desc, void *out)
{
struct s390_sha_ctx *sctx = shash_desc_ctx(desc);
struct sha1_state *octx = out;
@@ -53,7 +53,7 @@ static int sha1_export(struct shash_desc *desc, void *out)
return 0;
}
-static int sha1_import(struct shash_desc *desc, const void *in)
+static int s390_sha1_import(struct shash_desc *desc, const void *in)
{
struct s390_sha_ctx *sctx = shash_desc_ctx(desc);
const struct sha1_state *ictx = in;
@@ -67,11 +67,11 @@ static int sha1_import(struct shash_desc *desc, const void *in)
static struct shash_alg alg = {
.digestsize = SHA1_DIGEST_SIZE,
- .init = sha1_init,
+ .init = s390_sha1_init,
.update = s390_sha_update,
.final = s390_sha_final,
- .export = sha1_export,
- .import = sha1_import,
+ .export = s390_sha1_export,
+ .import = s390_sha1_import,
.descsize = sizeof(struct s390_sha_ctx),
.statesize = sizeof(struct sha1_state),
.base = {
diff --git a/arch/s390/include/asm/Kbuild b/arch/s390/include/asm/Kbuild
index 83f6e85de7bc..319efa0e6d02 100644
--- a/arch/s390/include/asm/Kbuild
+++ b/arch/s390/include/asm/Kbuild
@@ -6,5 +6,6 @@ generated-y += unistd_nr.h
generic-y += asm-offsets.h
generic-y += export.h
+generic-y += kvm_types.h
generic-y += local64.h
generic-y += mcs_spinlock.h
diff --git a/arch/s390/include/asm/asm-const.h b/arch/s390/include/asm/asm-const.h
new file mode 100644
index 000000000000..11f615eb0066
--- /dev/null
+++ b/arch/s390/include/asm/asm-const.h
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_S390_ASM_CONST_H
+#define _ASM_S390_ASM_CONST_H
+
+#ifdef __ASSEMBLY__
+# define stringify_in_c(...) __VA_ARGS__
+#else
+/* This version of stringify will deal with commas... */
+# define __stringify_in_c(...) #__VA_ARGS__
+# define stringify_in_c(...) __stringify_in_c(__VA_ARGS__) " "
+#endif
+#endif /* _ASM_S390_ASM_CONST_H */
diff --git a/arch/s390/include/asm/atomic.h b/arch/s390/include/asm/atomic.h
index 491ad53a0d4e..11c5952e1afa 100644
--- a/arch/s390/include/asm/atomic.h
+++ b/arch/s390/include/asm/atomic.h
@@ -15,8 +15,6 @@
#include <asm/barrier.h>
#include <asm/cmpxchg.h>
-#define ATOMIC_INIT(i) { (i) }
-
static inline int atomic_read(const atomic_t *v)
{
int c;
@@ -47,7 +45,11 @@ static inline int atomic_fetch_add(int i, atomic_t *v)
static inline void atomic_add(int i, atomic_t *v)
{
#ifdef CONFIG_HAVE_MARCH_Z196_FEATURES
- if (__builtin_constant_p(i) && (i > -129) && (i < 128)) {
+ /*
+ * Order of conditions is important to circumvent gcc 10 bug:
+ * https://gcc.gnu.org/pipermail/gcc-patches/2020-July/549318.html
+ */
+ if ((i > -129) && (i < 128) && __builtin_constant_p(i)) {
__atomic_add_const(i, &v->counter);
return;
}
@@ -114,7 +116,11 @@ static inline s64 atomic64_fetch_add(s64 i, atomic64_t *v)
static inline void atomic64_add(s64 i, atomic64_t *v)
{
#ifdef CONFIG_HAVE_MARCH_Z196_FEATURES
- if (__builtin_constant_p(i) && (i > -129) && (i < 128)) {
+ /*
+ * Order of conditions is important to circumvent gcc 10 bug:
+ * https://gcc.gnu.org/pipermail/gcc-patches/2020-July/549318.html
+ */
+ if ((i > -129) && (i < 128) && __builtin_constant_p(i)) {
__atomic64_add_const(i, (long *)&v->counter);
return;
}
diff --git a/arch/s390/include/asm/bug.h b/arch/s390/include/asm/bug.h
index 7725f8006fdf..0b25f28351ed 100644
--- a/arch/s390/include/asm/bug.h
+++ b/arch/s390/include/asm/bug.h
@@ -2,7 +2,7 @@
#ifndef _ASM_S390_BUG_H
#define _ASM_S390_BUG_H
-#include <linux/kernel.h>
+#include <linux/compiler.h>
#ifdef CONFIG_BUG
diff --git a/arch/s390/include/asm/ccwdev.h b/arch/s390/include/asm/ccwdev.h
index 865ce1cb86d5..3cfe1eb89838 100644
--- a/arch/s390/include/asm/ccwdev.h
+++ b/arch/s390/include/asm/ccwdev.h
@@ -11,6 +11,7 @@
#include <linux/device.h>
#include <linux/mod_devicetable.h>
+#include <asm/chsc.h>
#include <asm/fcx.h>
#include <asm/irq.h>
#include <asm/schid.h>
@@ -236,4 +237,8 @@ extern void ccw_device_get_schid(struct ccw_device *, struct subchannel_id *);
struct channel_path_desc_fmt0 *ccw_device_get_chp_desc(struct ccw_device *, int);
u8 *ccw_device_get_util_str(struct ccw_device *cdev, int chp_idx);
+int ccw_device_pnso(struct ccw_device *cdev,
+ struct chsc_pnso_area *pnso_area,
+ struct chsc_pnso_resume_token resume_token,
+ int cnc);
#endif /* _S390_CCWDEV_H_ */
diff --git a/arch/s390/include/asm/ccwgroup.h b/arch/s390/include/asm/ccwgroup.h
index 7293c139dd79..ad3acb1e882b 100644
--- a/arch/s390/include/asm/ccwgroup.h
+++ b/arch/s390/include/asm/ccwgroup.h
@@ -36,11 +36,6 @@ struct ccwgroup_device {
* @set_online: function called when device is set online
* @set_offline: function called when device is set offline
* @shutdown: function called when device is shut down
- * @prepare: prepare for pm state transition
- * @complete: undo work done in @prepare
- * @freeze: callback for freezing during hibernation snapshotting
- * @thaw: undo work done in @freeze
- * @restore: callback for restoring after hibernation
* @driver: embedded driver structure
* @ccw_driver: supported ccw_driver (optional)
*/
@@ -50,11 +45,6 @@ struct ccwgroup_driver {
int (*set_online) (struct ccwgroup_device *);
int (*set_offline) (struct ccwgroup_device *);
void (*shutdown)(struct ccwgroup_device *);
- int (*prepare) (struct ccwgroup_device *);
- void (*complete) (struct ccwgroup_device *);
- int (*freeze)(struct ccwgroup_device *);
- int (*thaw) (struct ccwgroup_device *);
- int (*restore)(struct ccwgroup_device *);
struct device_driver driver;
struct ccw_driver *ccw_driver;
diff --git a/arch/s390/include/asm/checksum.h b/arch/s390/include/asm/checksum.h
index 91e376b0d28c..6d01c96aeb5c 100644
--- a/arch/s390/include/asm/checksum.h
+++ b/arch/s390/include/asm/checksum.h
@@ -39,25 +39,6 @@ csum_partial(const void *buff, int len, __wsum sum)
return sum;
}
-/*
- * the same as csum_partial_copy, but copies from user space.
- *
- * here even more important to align src and dst on a 32-bit (or even
- * better 64-bit) boundary
- *
- * Copy from userspace and compute checksum.
- */
-static inline __wsum
-csum_partial_copy_from_user(const void __user *src, void *dst,
- int len, __wsum sum,
- int *err_ptr)
-{
- if (unlikely(copy_from_user(dst, src, len)))
- *err_ptr = -EFAULT;
- return csum_partial(dst, len, sum);
-}
-
-
static inline __wsum
csum_partial_copy_nocheck (const void *src, void *dst, int len, __wsum sum)
{
diff --git a/arch/s390/include/asm/chsc.h b/arch/s390/include/asm/chsc.h
new file mode 100644
index 000000000000..36ce2d25a5fc
--- /dev/null
+++ b/arch/s390/include/asm/chsc.h
@@ -0,0 +1,62 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright IBM Corp. 2020
+ *
+ * Author(s): Alexandra Winter <wintera@linux.ibm.com>
+ *
+ * Interface for Channel Subsystem Call
+ */
+#ifndef _ASM_S390_CHSC_H
+#define _ASM_S390_CHSC_H
+
+#include <uapi/asm/chsc.h>
+
+/**
+ * struct chsc_pnso_naid_l2 - network address information descriptor
+ * @nit: Network interface token
+ * @addr_lnid: network address and logical network id (VLAN ID)
+ */
+struct chsc_pnso_naid_l2 {
+ u64 nit;
+ struct { u8 mac[6]; u16 lnid; } addr_lnid;
+} __packed;
+
+struct chsc_pnso_resume_token {
+ u64 t1;
+ u64 t2;
+} __packed;
+
+struct chsc_pnso_naihdr {
+ struct chsc_pnso_resume_token resume_token;
+ u32:32;
+ u32 instance;
+ u32:24;
+ u8 naids;
+ u32 reserved[3];
+} __packed;
+
+struct chsc_pnso_area {
+ struct chsc_header request;
+ u8:2;
+ u8 m:1;
+ u8:5;
+ u8:2;
+ u8 ssid:2;
+ u8 fmt:4;
+ u16 sch;
+ u8:8;
+ u8 cssid;
+ u16:16;
+ u8 oc;
+ u32:24;
+ struct chsc_pnso_resume_token resume_token;
+ u32 n:1;
+ u32:31;
+ u32 reserved[3];
+ struct chsc_header response;
+ u32:32;
+ struct chsc_pnso_naihdr naihdr;
+ struct chsc_pnso_naid_l2 entries[0];
+} __packed __aligned(PAGE_SIZE);
+
+#endif /* _ASM_S390_CHSC_H */
diff --git a/arch/s390/include/asm/debug.h b/arch/s390/include/asm/debug.h
index 310134015541..c1b82bcc017c 100644
--- a/arch/s390/include/asm/debug.h
+++ b/arch/s390/include/asm/debug.h
@@ -2,7 +2,7 @@
/*
* S/390 debug facility
*
- * Copyright IBM Corp. 1999, 2000
+ * Copyright IBM Corp. 1999, 2020
*/
#ifndef DEBUG_H
#define DEBUG_H
@@ -12,7 +12,7 @@
#include <linux/kernel.h>
#include <linux/time.h>
#include <linux/refcount.h>
-#include <uapi/asm/debug.h>
+#include <linux/fs.h>
#define DEBUG_MAX_LEVEL 6 /* debug levels range from 0 to 6 */
#define DEBUG_OFF_LEVEL -1 /* level where debug is switched off */
@@ -26,6 +26,16 @@
#define DEBUG_DATA(entry) (char *)(entry + 1) /* data is stored behind */
/* the entry information */
+#define __DEBUG_FEATURE_VERSION 3 /* version of debug feature */
+
+struct __debug_entry {
+ unsigned long clock : 60;
+ unsigned long exception : 1;
+ unsigned long level : 3;
+ void *caller;
+ unsigned short cpu;
+} __packed;
+
typedef struct __debug_entry debug_entry_t;
struct debug_view;
@@ -82,7 +92,6 @@ struct debug_view {
};
extern struct debug_view debug_hex_ascii_view;
-extern struct debug_view debug_raw_view;
extern struct debug_view debug_sprintf_view;
/* do NOT use the _common functions */
diff --git a/arch/s390/include/asm/diag.h b/arch/s390/include/asm/diag.h
index 0036eab14391..ca8f85b53a90 100644
--- a/arch/s390/include/asm/diag.h
+++ b/arch/s390/include/asm/diag.h
@@ -298,10 +298,8 @@ struct diag26c_mac_resp {
union diag318_info {
unsigned long val;
struct {
- unsigned int cpnc : 8;
- unsigned int cpvc_linux : 24;
- unsigned char cpvc_distro[3];
- unsigned char zero;
+ unsigned long cpnc : 8;
+ unsigned long cpvc : 56;
};
};
diff --git a/arch/s390/include/asm/extable.h b/arch/s390/include/asm/extable.h
index ae27f756b409..3beb294fd553 100644
--- a/arch/s390/include/asm/extable.h
+++ b/arch/s390/include/asm/extable.h
@@ -1,12 +1,20 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef __S390_EXTABLE_H
#define __S390_EXTABLE_H
+
+#include <asm/ptrace.h>
+#include <linux/compiler.h>
+
/*
- * The exception table consists of pairs of addresses: the first is the
- * address of an instruction that is allowed to fault, and the second is
- * the address at which the program should continue. No registers are
- * modified, so it is entirely up to the continuation code to figure out
- * what to do.
+ * The exception table consists of three addresses:
+ *
+ * - Address of an instruction that is allowed to fault.
+ * - Address at which the program should continue.
+ * - Optional address of handler that takes pt_regs * argument and runs in
+ * interrupt context.
+ *
+ * No registers are modified, so it is entirely up to the continuation code
+ * to figure out what to do.
*
* All the routines below use bits of fixup code that are out of line
* with the main instruction path. This means when everything is well,
@@ -17,6 +25,7 @@
struct exception_table_entry
{
int insn, fixup;
+ long handler;
};
extern struct exception_table_entry *__start_dma_ex_table;
@@ -29,6 +38,39 @@ static inline unsigned long extable_fixup(const struct exception_table_entry *x)
return (unsigned long)&x->fixup + x->fixup;
}
+typedef bool (*ex_handler_t)(const struct exception_table_entry *,
+ struct pt_regs *);
+
+static inline ex_handler_t
+ex_fixup_handler(const struct exception_table_entry *x)
+{
+ if (likely(!x->handler))
+ return NULL;
+ return (ex_handler_t)((unsigned long)&x->handler + x->handler);
+}
+
+static inline bool ex_handle(const struct exception_table_entry *x,
+ struct pt_regs *regs)
+{
+ ex_handler_t handler = ex_fixup_handler(x);
+
+ if (unlikely(handler))
+ return handler(x, regs);
+ regs->psw.addr = extable_fixup(x);
+ return true;
+}
+
#define ARCH_HAS_RELATIVE_EXTABLE
+static inline void swap_ex_entry_fixup(struct exception_table_entry *a,
+ struct exception_table_entry *b,
+ struct exception_table_entry tmp,
+ int delta)
+{
+ a->fixup = b->fixup + delta;
+ b->fixup = tmp.fixup - delta;
+ a->handler = b->handler + delta;
+ b->handler = tmp.handler - delta;
+}
+
#endif
diff --git a/arch/s390/include/asm/hugetlb.h b/arch/s390/include/asm/hugetlb.h
index de8f0bf5f238..60f9241e5e4a 100644
--- a/arch/s390/include/asm/hugetlb.h
+++ b/arch/s390/include/asm/hugetlb.h
@@ -9,8 +9,8 @@
#ifndef _ASM_S390_HUGETLB_H
#define _ASM_S390_HUGETLB_H
+#include <linux/pgtable.h>
#include <asm/page.h>
-#include <asm/pgtable.h>
#define hugetlb_free_pgd_range free_pgd_range
#define hugepages_supported() (MACHINE_HAS_EDAT1)
@@ -21,13 +21,6 @@ pte_t huge_ptep_get(pte_t *ptep);
pte_t huge_ptep_get_and_clear(struct mm_struct *mm,
unsigned long addr, pte_t *ptep);
-static inline bool is_hugepage_only_range(struct mm_struct *mm,
- unsigned long addr,
- unsigned long len)
-{
- return false;
-}
-
/*
* If the arch doesn't supply something else, assume that hugepage
* size aligned regions are ok without further preparation.
@@ -46,6 +39,7 @@ static inline void arch_clear_hugepage_flags(struct page *page)
{
clear_bit(PG_arch_1, &page->flags);
}
+#define arch_clear_hugepage_flags arch_clear_hugepage_flags
static inline void huge_pte_clear(struct mm_struct *mm, unsigned long addr,
pte_t *ptep, unsigned long sz)
diff --git a/arch/s390/include/asm/io.h b/arch/s390/include/asm/io.h
index 5a16f500515a..da014e4f8113 100644
--- a/arch/s390/include/asm/io.h
+++ b/arch/s390/include/asm/io.h
@@ -26,7 +26,7 @@ void unxlate_dev_mem_ptr(phys_addr_t phys, void *addr);
#define IO_SPACE_LIMIT 0
-void __iomem *ioremap(unsigned long offset, unsigned long size);
+void __iomem *ioremap(phys_addr_t addr, size_t size);
void iounmap(volatile void __iomem *addr);
static inline void __iomem *ioport_map(unsigned long port, unsigned int nr)
diff --git a/arch/s390/include/asm/ipl.h b/arch/s390/include/asm/ipl.h
index b63bd66404b8..7d5cfdda5277 100644
--- a/arch/s390/include/asm/ipl.h
+++ b/arch/s390/include/asm/ipl.h
@@ -21,6 +21,7 @@ struct ipl_parameter_block {
struct ipl_pb0_common common;
struct ipl_pb0_fcp fcp;
struct ipl_pb0_ccw ccw;
+ struct ipl_pb0_nvme nvme;
char raw[PAGE_SIZE - sizeof(struct ipl_pl_hdr)];
};
} __packed __aligned(PAGE_SIZE);
@@ -30,6 +31,11 @@ struct ipl_parameter_block {
#define IPL_BP_FCP_LEN (sizeof(struct ipl_pl_hdr) + \
sizeof(struct ipl_pb0_fcp))
#define IPL_BP0_FCP_LEN (sizeof(struct ipl_pb0_fcp))
+
+#define IPL_BP_NVME_LEN (sizeof(struct ipl_pl_hdr) + \
+ sizeof(struct ipl_pb0_nvme))
+#define IPL_BP0_NVME_LEN (sizeof(struct ipl_pb0_nvme))
+
#define IPL_BP_CCW_LEN (sizeof(struct ipl_pl_hdr) + \
sizeof(struct ipl_pb0_ccw))
#define IPL_BP0_CCW_LEN (sizeof(struct ipl_pb0_ccw))
@@ -59,6 +65,7 @@ enum ipl_type {
IPL_TYPE_FCP = 4,
IPL_TYPE_FCP_DUMP = 8,
IPL_TYPE_NSS = 16,
+ IPL_TYPE_NVME = 32,
};
struct ipl_info
@@ -74,6 +81,10 @@ struct ipl_info
u64 lun;
} fcp;
struct {
+ u32 fid;
+ u32 nsid;
+ } nvme;
+ struct {
char name[NSS_NAME_SIZE + 1];
} nss;
} data;
diff --git a/arch/s390/include/asm/kasan.h b/arch/s390/include/asm/kasan.h
index 70930fe5c496..89d6886040c8 100644
--- a/arch/s390/include/asm/kasan.h
+++ b/arch/s390/include/asm/kasan.h
@@ -2,8 +2,6 @@
#ifndef __ASM_KASAN_H
#define __ASM_KASAN_H
-#include <asm/pgtable.h>
-
#ifdef CONFIG_KASAN
#define KASAN_SHADOW_SCALE_SHIFT 3
diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index d6bcd34f3ec3..463c24e26000 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -31,12 +31,12 @@
#define KVM_USER_MEM_SLOTS 32
/*
- * These seem to be used for allocating ->chip in the routing table,
- * which we don't use. 4096 is an out-of-thin-air value. If we need
- * to look at ->chip later on, we'll need to revisit this.
+ * These seem to be used for allocating ->chip in the routing table, which we
+ * don't use. 1 is as small as we can get to reduce the needed memory. If we
+ * need to look at ->chip later on, we'll need to revisit this.
*/
#define KVM_NR_IRQCHIPS 1
-#define KVM_IRQCHIP_NUM_PINS 4096
+#define KVM_IRQCHIP_NUM_PINS 1
#define KVM_HALT_POLL_NS_DEFAULT 50000
/* s390-specific vcpu->requests bit members */
@@ -260,7 +260,8 @@ struct kvm_s390_sie_block {
__u32 scaol; /* 0x0064 */
__u8 sdf; /* 0x0068 */
__u8 epdx; /* 0x0069 */
- __u8 reserved6a[2]; /* 0x006a */
+ __u8 cpnc; /* 0x006a */
+ __u8 reserved6b; /* 0x006b */
__u32 todpr; /* 0x006c */
#define GISA_FORMAT1 0x00000001
__u32 gd; /* 0x0070 */
@@ -375,6 +376,8 @@ struct kvm_vcpu_stat {
u64 halt_poll_invalid;
u64 halt_no_poll_steal;
u64 halt_wakeup;
+ u64 halt_poll_success_ns;
+ u64 halt_poll_fail_ns;
u64 instruction_lctl;
u64 instruction_lctlg;
u64 instruction_stctl;
@@ -743,6 +746,7 @@ struct kvm_vcpu_arch {
bool gs_enabled;
bool skey_enabled;
struct kvm_s390_pv_vcpu pv;
+ union diag318_info diag318_info;
};
struct kvm_vm_stat {
@@ -971,17 +975,19 @@ struct kvm_arch_async_pf {
unsigned long pfault_token;
};
-bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu);
+bool kvm_arch_can_dequeue_async_page_present(struct kvm_vcpu *vcpu);
void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
struct kvm_async_pf *work);
-void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
+bool kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
struct kvm_async_pf *work);
void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
struct kvm_async_pf *work);
+static inline void kvm_arch_async_page_present_queued(struct kvm_vcpu *vcpu) {}
+
void kvm_arch_crypto_clear_masks(struct kvm *kvm);
void kvm_arch_crypto_set_masks(struct kvm *kvm, unsigned long *apm,
unsigned long *aqm, unsigned long *adm);
diff --git a/arch/s390/include/asm/linkage.h b/arch/s390/include/asm/linkage.h
index 7f22262b0e46..a0a7a2c72bd4 100644
--- a/arch/s390/include/asm/linkage.h
+++ b/arch/s390/include/asm/linkage.h
@@ -2,38 +2,27 @@
#ifndef __ASM_LINKAGE_H
#define __ASM_LINKAGE_H
+#include <asm/asm-const.h>
#include <linux/stringify.h>
#define __ALIGN .align 4, 0x07
#define __ALIGN_STR __stringify(__ALIGN)
-#ifndef __ASSEMBLY__
-
/*
* Helper macro for exception table entries
*/
-#define EX_TABLE(_fault, _target) \
- ".section __ex_table,\"a\"\n" \
- ".align 4\n" \
- ".long (" #_fault ") - .\n" \
- ".long (" #_target ") - .\n" \
- ".previous\n"
-
-#else /* __ASSEMBLY__ */
-#define EX_TABLE(_fault, _target) \
- .section __ex_table,"a" ; \
- .align 4 ; \
- .long (_fault) - . ; \
- .long (_target) - . ; \
- .previous
+#define __EX_TABLE(_section, _fault, _target) \
+ stringify_in_c(.section _section,"a";) \
+ stringify_in_c(.align 8;) \
+ stringify_in_c(.long (_fault) - .;) \
+ stringify_in_c(.long (_target) - .;) \
+ stringify_in_c(.quad 0;) \
+ stringify_in_c(.previous)
-#define EX_TABLE_DMA(_fault, _target) \
- .section .dma.ex_table, "a" ; \
- .align 4 ; \
- .long (_fault) - . ; \
- .long (_target) - . ; \
- .previous
+#define EX_TABLE(_fault, _target) \
+ __EX_TABLE(__ex_table, _fault, _target)
+#define EX_TABLE_DMA(_fault, _target) \
+ __EX_TABLE(.dma.ex_table, _fault, _target)
-#endif /* __ASSEMBLY__ */
#endif
diff --git a/arch/s390/include/asm/nmi.h b/arch/s390/include/asm/nmi.h
index b160da8fa14b..5afee80cff58 100644
--- a/arch/s390/include/asm/nmi.h
+++ b/arch/s390/include/asm/nmi.h
@@ -99,7 +99,7 @@ int nmi_alloc_per_cpu(struct lowcore *lc);
void nmi_free_per_cpu(struct lowcore *lc);
void s390_handle_mcck(void);
-void s390_do_machine_check(struct pt_regs *regs);
+int s390_do_machine_check(struct pt_regs *regs);
#endif /* __ASSEMBLY__ */
#endif /* _ASM_S390_NMI_H */
diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h
index 7485ee561fec..99b92c3e46b0 100644
--- a/arch/s390/include/asm/pci.h
+++ b/arch/s390/include/asm/pci.h
@@ -22,12 +22,17 @@ int pci_domain_nr(struct pci_bus *);
int pci_proc_domain(struct pci_bus *);
#define ZPCI_BUS_NR 0 /* default bus number */
-#define ZPCI_DEVFN 0 /* default device number */
#define ZPCI_NR_DMA_SPACES 1
#define ZPCI_NR_DEVICES CONFIG_PCI_NR_FUNCTIONS
#define ZPCI_DOMAIN_BITMAP_SIZE (1 << 16)
+#ifdef PCI
+#if (ZPCI_NR_DEVICES > ZPCI_DOMAIN_BITMAP_SIZE)
+# error ZPCI_NR_DEVICES can not be bigger than ZPCI_DOMAIN_BITMAP_SIZE
+#endif
+#endif /* PCI */
+
/* PCI Function Controls */
#define ZPCI_FC_FN_ENABLED 0x80
#define ZPCI_FC_ERROR 0x40
@@ -94,10 +99,26 @@ struct zpci_bar_struct {
struct s390_domain;
+#define ZPCI_FUNCTIONS_PER_BUS 256
+struct zpci_bus {
+ struct kref kref;
+ struct pci_bus *bus;
+ struct zpci_dev *function[ZPCI_FUNCTIONS_PER_BUS];
+ struct list_head resources;
+ struct list_head bus_next;
+ struct resource bus_resource;
+ int pchid;
+ int domain_nr;
+ bool multifunction;
+ enum pci_bus_speed max_bus_speed;
+};
+
/* Private data per function */
struct zpci_dev {
- struct pci_bus *bus;
+ struct zpci_bus *zbus;
struct list_head entry; /* list of all zpci_devices, needed for hotplug, etc. */
+ struct list_head bus_next;
+ struct kref kref;
struct hotplug_slot hotplug_slot;
enum zpci_state state;
@@ -107,7 +128,12 @@ struct zpci_dev {
u16 pchid; /* physical channel ID */
u8 pfgid; /* function group ID */
u8 pft; /* pci function type */
- u16 domain;
+ u8 port;
+ u8 rid_available : 1;
+ u8 has_hp_slot : 1;
+ u8 is_physfn : 1;
+ u8 reserved : 5;
+ unsigned int devfn; /* DEVFN part of the RID*/
struct mutex lock;
u8 pfip[CLP_PFIP_NR_SEGMENTS]; /* pci function internal path */
@@ -167,6 +193,7 @@ static inline bool zdev_enabled(struct zpci_dev *zdev)
extern const struct attribute_group *zpci_attr_groups[];
extern unsigned int s390_pci_force_floating __initdata;
+extern unsigned int s390_pci_no_rid;
/* -----------------------------------------------------------------------------
Prototypes
@@ -227,7 +254,14 @@ static inline void zpci_exit_slot(struct zpci_dev *zdev) {}
/* Helpers */
static inline struct zpci_dev *to_zpci(struct pci_dev *pdev)
{
- return pdev->sysdata;
+ struct zpci_bus *zbus = pdev->sysdata;
+
+ return zbus->function[pdev->devfn];
+}
+
+static inline struct zpci_dev *to_zpci_dev(struct device *dev)
+{
+ return to_zpci(to_pci_dev(dev));
}
struct zpci_dev *get_zdev_by_fid(u32);
diff --git a/arch/s390/include/asm/pci_clp.h b/arch/s390/include/asm/pci_clp.h
index bd2cb4ea7d93..eb51272dd2cc 100644
--- a/arch/s390/include/asm/pci_clp.h
+++ b/arch/s390/include/asm/pci_clp.h
@@ -93,7 +93,10 @@ struct clp_req_query_pci {
struct clp_rsp_query_pci {
struct clp_rsp_hdr hdr;
u16 vfn; /* virtual fn number */
- u16 : 6;
+ u16 : 3;
+ u16 rid_avail : 1;
+ u16 is_physfn : 1;
+ u16 reserved1 : 1;
u16 mio_addr_avail : 1;
u16 util_str_avail : 1; /* utility string available? */
u16 pfgid : 8; /* pci function group id */
@@ -102,12 +105,16 @@ struct clp_rsp_query_pci {
u16 pchid;
__le32 bar[PCI_STD_NUM_BARS];
u8 pfip[CLP_PFIP_NR_SEGMENTS]; /* pci function internal path */
- u32 : 16;
+ u16 : 12;
+ u16 port : 4;
u8 fmb_len;
u8 pft; /* pci function type */
u64 sdma; /* start dma as */
u64 edma; /* end dma as */
- u32 reserved[11];
+#define ZPCI_RID_MASK_DEVFN 0x00ff
+ u16 rid; /* BUS/DEVFN PCI address */
+ u16 reserved0;
+ u32 reserved[10];
u32 uid; /* user defined id */
u8 util_str[CLP_UTIL_STR_LEN]; /* utility string */
u32 reserved2[16];
diff --git a/arch/s390/include/asm/pci_dma.h b/arch/s390/include/asm/pci_dma.h
index 419fac7a62c0..f62cd3ed2d44 100644
--- a/arch/s390/include/asm/pci_dma.h
+++ b/arch/s390/include/asm/pci_dma.h
@@ -131,12 +131,6 @@ static inline void validate_st_entry(unsigned long *entry)
*entry |= ZPCI_TABLE_VALID;
}
-static inline void invalidate_table_entry(unsigned long *entry)
-{
- *entry &= ~ZPCI_TABLE_VALID_MASK;
- *entry |= ZPCI_TABLE_INVALID;
-}
-
static inline void invalidate_pt_entry(unsigned long *entry)
{
WARN_ON_ONCE((*entry & ZPCI_PTE_VALID_MASK) == ZPCI_PTE_INVALID);
@@ -173,11 +167,6 @@ static inline int pt_entry_isvalid(unsigned long entry)
return (entry & ZPCI_PTE_VALID_MASK) == ZPCI_PTE_VALID;
}
-static inline int entry_isprotected(unsigned long entry)
-{
- return (entry & ZPCI_TABLE_PROT_MASK) == ZPCI_TABLE_PROTECTED;
-}
-
static inline unsigned long *get_rt_sto(unsigned long entry)
{
return ((entry & ZPCI_TABLE_TYPE_MASK) == ZPCI_TABLE_TYPE_RTX)
diff --git a/arch/s390/include/asm/percpu.h b/arch/s390/include/asm/percpu.h
index 50b4ce8cddfd..918f0ba4f4d2 100644
--- a/arch/s390/include/asm/percpu.h
+++ b/arch/s390/include/asm/percpu.h
@@ -29,7 +29,7 @@
typedef typeof(pcp) pcp_op_T__; \
pcp_op_T__ old__, new__, prev__; \
pcp_op_T__ *ptr__; \
- preempt_disable(); \
+ preempt_disable_notrace(); \
ptr__ = raw_cpu_ptr(&(pcp)); \
prev__ = *ptr__; \
do { \
@@ -37,7 +37,7 @@
new__ = old__ op (val); \
prev__ = cmpxchg(ptr__, old__, new__); \
} while (prev__ != old__); \
- preempt_enable(); \
+ preempt_enable_notrace(); \
new__; \
})
@@ -68,7 +68,7 @@
typedef typeof(pcp) pcp_op_T__; \
pcp_op_T__ val__ = (val); \
pcp_op_T__ old__, *ptr__; \
- preempt_disable(); \
+ preempt_disable_notrace(); \
ptr__ = raw_cpu_ptr(&(pcp)); \
if (__builtin_constant_p(val__) && \
((szcast)val__ > -129) && ((szcast)val__ < 128)) { \
@@ -84,7 +84,7 @@
: [val__] "d" (val__) \
: "cc"); \
} \
- preempt_enable(); \
+ preempt_enable_notrace(); \
}
#define this_cpu_add_4(pcp, val) arch_this_cpu_add(pcp, val, "laa", "asi", int)
@@ -95,14 +95,14 @@
typedef typeof(pcp) pcp_op_T__; \
pcp_op_T__ val__ = (val); \
pcp_op_T__ old__, *ptr__; \
- preempt_disable(); \
+ preempt_disable_notrace(); \
ptr__ = raw_cpu_ptr(&(pcp)); \
asm volatile( \
op " %[old__],%[val__],%[ptr__]\n" \
: [old__] "=d" (old__), [ptr__] "+Q" (*ptr__) \
: [val__] "d" (val__) \
: "cc"); \
- preempt_enable(); \
+ preempt_enable_notrace(); \
old__ + val__; \
})
@@ -114,14 +114,14 @@
typedef typeof(pcp) pcp_op_T__; \
pcp_op_T__ val__ = (val); \
pcp_op_T__ old__, *ptr__; \
- preempt_disable(); \
+ preempt_disable_notrace(); \
ptr__ = raw_cpu_ptr(&(pcp)); \
asm volatile( \
op " %[old__],%[val__],%[ptr__]\n" \
: [old__] "=d" (old__), [ptr__] "+Q" (*ptr__) \
: [val__] "d" (val__) \
: "cc"); \
- preempt_enable(); \
+ preempt_enable_notrace(); \
}
#define this_cpu_and_4(pcp, val) arch_this_cpu_to_op(pcp, val, "lan")
@@ -136,10 +136,10 @@
typedef typeof(pcp) pcp_op_T__; \
pcp_op_T__ ret__; \
pcp_op_T__ *ptr__; \
- preempt_disable(); \
+ preempt_disable_notrace(); \
ptr__ = raw_cpu_ptr(&(pcp)); \
ret__ = cmpxchg(ptr__, oval, nval); \
- preempt_enable(); \
+ preempt_enable_notrace(); \
ret__; \
})
@@ -152,10 +152,10 @@
({ \
typeof(pcp) *ptr__; \
typeof(pcp) ret__; \
- preempt_disable(); \
+ preempt_disable_notrace(); \
ptr__ = raw_cpu_ptr(&(pcp)); \
ret__ = xchg(ptr__, nval); \
- preempt_enable(); \
+ preempt_enable_notrace(); \
ret__; \
})
@@ -171,11 +171,11 @@
typeof(pcp1) *p1__; \
typeof(pcp2) *p2__; \
int ret__; \
- preempt_disable(); \
+ preempt_disable_notrace(); \
p1__ = raw_cpu_ptr(&(pcp1)); \
p2__ = raw_cpu_ptr(&(pcp2)); \
ret__ = __cmpxchg_double(p1__, p2__, o1__, o2__, n1__, n2__); \
- preempt_enable(); \
+ preempt_enable_notrace(); \
ret__; \
})
diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h
index 6076c8c912d2..b55561cc8786 100644
--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@ -1229,7 +1229,6 @@ static inline pte_t mk_pte(struct page *page, pgprot_t pgprot)
#define p4d_index(address) (((address) >> P4D_SHIFT) & (PTRS_PER_P4D-1))
#define pud_index(address) (((address) >> PUD_SHIFT) & (PTRS_PER_PUD-1))
#define pmd_index(address) (((address) >> PMD_SHIFT) & (PTRS_PER_PMD-1))
-#define pte_index(address) (((address) >> PAGE_SHIFT) & (PTRS_PER_PTE-1))
#define pmd_deref(pmd) (pmd_val(pmd) & _SEGMENT_ENTRY_ORIGIN)
#define pud_deref(pud) (pud_val(pud) & _REGION_ENTRY_ORIGIN)
@@ -1260,38 +1259,52 @@ static inline pgd_t *pgd_offset_raw(pgd_t *pgd, unsigned long address)
}
#define pgd_offset(mm, address) pgd_offset_raw(READ_ONCE((mm)->pgd), address)
-#define pgd_offset_k(address) pgd_offset(&init_mm, address)
-static inline p4d_t *p4d_offset(pgd_t *pgd, unsigned long address)
+static inline p4d_t *p4d_offset_lockless(pgd_t *pgdp, pgd_t pgd, unsigned long address)
{
- if ((pgd_val(*pgd) & _REGION_ENTRY_TYPE_MASK) >= _REGION_ENTRY_TYPE_R1)
- return (p4d_t *) pgd_deref(*pgd) + p4d_index(address);
- return (p4d_t *) pgd;
+ if ((pgd_val(pgd) & _REGION_ENTRY_TYPE_MASK) >= _REGION_ENTRY_TYPE_R1)
+ return (p4d_t *) pgd_deref(pgd) + p4d_index(address);
+ return (p4d_t *) pgdp;
}
+#define p4d_offset_lockless p4d_offset_lockless
-static inline pud_t *pud_offset(p4d_t *p4d, unsigned long address)
+static inline p4d_t *p4d_offset(pgd_t *pgdp, unsigned long address)
{
- if ((p4d_val(*p4d) & _REGION_ENTRY_TYPE_MASK) >= _REGION_ENTRY_TYPE_R2)
- return (pud_t *) p4d_deref(*p4d) + pud_index(address);
- return (pud_t *) p4d;
+ return p4d_offset_lockless(pgdp, *pgdp, address);
}
-static inline pmd_t *pmd_offset(pud_t *pud, unsigned long address)
+static inline pud_t *pud_offset_lockless(p4d_t *p4dp, p4d_t p4d, unsigned long address)
{
- if ((pud_val(*pud) & _REGION_ENTRY_TYPE_MASK) >= _REGION_ENTRY_TYPE_R3)
- return (pmd_t *) pud_deref(*pud) + pmd_index(address);
- return (pmd_t *) pud;
+ if ((p4d_val(p4d) & _REGION_ENTRY_TYPE_MASK) >= _REGION_ENTRY_TYPE_R2)
+ return (pud_t *) p4d_deref(p4d) + pud_index(address);
+ return (pud_t *) p4dp;
}
+#define pud_offset_lockless pud_offset_lockless
-static inline pte_t *pte_offset(pmd_t *pmd, unsigned long address)
+static inline pud_t *pud_offset(p4d_t *p4dp, unsigned long address)
{
- return (pte_t *) pmd_deref(*pmd) + pte_index(address);
+ return pud_offset_lockless(p4dp, *p4dp, address);
}
+#define pud_offset pud_offset
-#define pte_offset_kernel(pmd, address) pte_offset(pmd, address)
-#define pte_offset_map(pmd, address) pte_offset_kernel(pmd, address)
+static inline pmd_t *pmd_offset_lockless(pud_t *pudp, pud_t pud, unsigned long address)
+{
+ if ((pud_val(pud) & _REGION_ENTRY_TYPE_MASK) >= _REGION_ENTRY_TYPE_R3)
+ return (pmd_t *) pud_deref(pud) + pmd_index(address);
+ return (pmd_t *) pudp;
+}
+#define pmd_offset_lockless pmd_offset_lockless
+
+static inline pmd_t *pmd_offset(pud_t *pudp, unsigned long address)
+{
+ return pmd_offset_lockless(pudp, *pudp, address);
+}
+#define pmd_offset pmd_offset
-static inline void pte_unmap(pte_t *pte) { }
+static inline unsigned long pmd_page_vaddr(pmd_t pmd)
+{
+ return (unsigned long) pmd_deref(pmd);
+}
static inline bool gup_fast_permitted(unsigned long start, unsigned long end)
{
@@ -1560,7 +1573,7 @@ static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm,
}
#define __HAVE_ARCH_PMDP_HUGE_GET_AND_CLEAR_FULL
-static inline pmd_t pmdp_huge_get_and_clear_full(struct mm_struct *mm,
+static inline pmd_t pmdp_huge_get_and_clear_full(struct vm_area_struct *vma,
unsigned long addr,
pmd_t *pmdp, int full)
{
@@ -1569,7 +1582,7 @@ static inline pmd_t pmdp_huge_get_and_clear_full(struct mm_struct *mm,
*pmdp = __pmd(_SEGMENT_ENTRY_EMPTY);
return pmd;
}
- return pmdp_xchg_lazy(mm, addr, pmdp, __pmd(_SEGMENT_ENTRY_EMPTY));
+ return pmdp_xchg_lazy(vma->vm_mm, addr, pmdp, __pmd(_SEGMENT_ENTRY_EMPTY));
}
#define __HAVE_ARCH_PMDP_HUGE_CLEAR_FLUSH
@@ -1674,7 +1687,7 @@ static inline swp_entry_t __swp_entry(unsigned long type, unsigned long offset)
#define kern_addr_valid(addr) (1)
extern int vmem_add_mapping(unsigned long start, unsigned long size);
-extern int vmem_remove_mapping(unsigned long start, unsigned long size);
+extern void vmem_remove_mapping(unsigned long start, unsigned long size);
extern int s390_enable_sie(void);
extern int s390_enable_skey(void);
extern void s390_reset_cmma(struct mm_struct *mm);
@@ -1683,6 +1696,4 @@ extern void s390_reset_cmma(struct mm_struct *mm);
#define HAVE_ARCH_UNMAPPED_AREA
#define HAVE_ARCH_UNMAPPED_AREA_TOPDOWN
-#include <asm-generic/pgtable.h>
-
#endif /* _S390_PAGE_H */
diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h
index 555d148ccf32..962da04234af 100644
--- a/arch/s390/include/asm/processor.h
+++ b/arch/s390/include/asm/processor.h
@@ -14,17 +14,15 @@
#include <linux/bits.h>
-#define CIF_MCCK_PENDING 0 /* machine check handling is pending */
-#define CIF_ASCE_PRIMARY 1 /* primary asce needs fixup / uaccess */
-#define CIF_ASCE_SECONDARY 2 /* secondary asce needs fixup / uaccess */
-#define CIF_NOHZ_DELAY 3 /* delay HZ disable for a tick */
-#define CIF_FPU 4 /* restore FPU registers */
-#define CIF_IGNORE_IRQ 5 /* ignore interrupt (for udelay) */
-#define CIF_ENABLED_WAIT 6 /* in enabled wait state */
-#define CIF_MCCK_GUEST 7 /* machine check happening in guest */
-#define CIF_DEDICATED_CPU 8 /* this CPU is dedicated */
-
-#define _CIF_MCCK_PENDING BIT(CIF_MCCK_PENDING)
+#define CIF_ASCE_PRIMARY 0 /* primary asce needs fixup / uaccess */
+#define CIF_ASCE_SECONDARY 1 /* secondary asce needs fixup / uaccess */
+#define CIF_NOHZ_DELAY 2 /* delay HZ disable for a tick */
+#define CIF_FPU 3 /* restore FPU registers */
+#define CIF_IGNORE_IRQ 4 /* ignore interrupt (for udelay) */
+#define CIF_ENABLED_WAIT 5 /* in enabled wait state */
+#define CIF_MCCK_GUEST 6 /* machine check happening in guest */
+#define CIF_DEDICATED_CPU 7 /* this CPU is dedicated */
+
#define _CIF_ASCE_PRIMARY BIT(CIF_ASCE_PRIMARY)
#define _CIF_ASCE_SECONDARY BIT(CIF_ASCE_SECONDARY)
#define _CIF_NOHZ_DELAY BIT(CIF_NOHZ_DELAY)
diff --git a/arch/s390/include/asm/ptrace.h b/arch/s390/include/asm/ptrace.h
index f009a13afe71..16b3e4396312 100644
--- a/arch/s390/include/asm/ptrace.h
+++ b/arch/s390/include/asm/ptrace.h
@@ -184,5 +184,10 @@ static inline unsigned long kernel_stack_pointer(struct pt_regs *regs)
return regs->gprs[15];
}
+static inline void regs_set_return_value(struct pt_regs *regs, unsigned long rc)
+{
+ regs->gprs[2] = rc;
+}
+
#endif /* __ASSEMBLY__ */
#endif /* _S390_PTRACE_H */
diff --git a/arch/s390/include/asm/qdio.h b/arch/s390/include/asm/qdio.h
index 86a3796e9be8..e69dbf438f99 100644
--- a/arch/s390/include/asm/qdio.h
+++ b/arch/s390/include/asm/qdio.h
@@ -365,34 +365,6 @@ struct qdio_initialize {
struct qdio_outbuf_state *output_sbal_state_array;
};
-/**
- * enum qdio_brinfo_entry_type - type of address entry for qdio_brinfo_desc()
- * @l3_ipv6_addr: entry contains IPv6 address
- * @l3_ipv4_addr: entry contains IPv4 address
- * @l2_addr_lnid: entry contains MAC address and VLAN ID
- */
-enum qdio_brinfo_entry_type {l3_ipv6_addr, l3_ipv4_addr, l2_addr_lnid};
-
-/**
- * struct qdio_brinfo_entry_XXX - Address entry for qdio_brinfo_desc()
- * @nit: Network interface token
- * @addr: Address of one of the three types
- *
- * The struct is passed to the callback function by qdio_brinfo_desc()
- */
-struct qdio_brinfo_entry_l3_ipv6 {
- u64 nit;
- struct { unsigned char _s6_addr[16]; } addr;
-} __packed;
-struct qdio_brinfo_entry_l3_ipv4 {
- u64 nit;
- struct { uint32_t _s_addr; } addr;
-} __packed;
-struct qdio_brinfo_entry_l2 {
- u64 nit;
- struct { u8 mac[6]; u16 lnid; } addr_lnid;
-} __packed;
-
#define QDIO_STATE_INACTIVE 0x00000002 /* after qdio_cleanup */
#define QDIO_STATE_ESTABLISHED 0x00000004 /* after qdio_establish */
#define QDIO_STATE_ACTIVE 0x00000008 /* after qdio_activate */
@@ -423,10 +395,5 @@ extern int qdio_inspect_queue(struct ccw_device *cdev, unsigned int nr,
extern int qdio_shutdown(struct ccw_device *, int);
extern int qdio_free(struct ccw_device *);
extern int qdio_get_ssqd_desc(struct ccw_device *, struct qdio_ssqd_desc *);
-extern int qdio_pnso_brinfo(struct subchannel_id schid,
- int cnc, u16 *response,
- void (*cb)(void *priv, enum qdio_brinfo_entry_type type,
- void *entry),
- void *priv);
#endif /* __QDIO_H__ */
diff --git a/arch/s390/include/asm/smp.h b/arch/s390/include/asm/smp.h
index 231a51e870fe..7e155fb6c254 100644
--- a/arch/s390/include/asm/smp.h
+++ b/arch/s390/include/asm/smp.h
@@ -10,6 +10,7 @@
#include <asm/sigp.h>
#include <asm/lowcore.h>
+#include <asm/processor.h>
#define raw_smp_processor_id() (S390_lowcore.cpu_nr)
@@ -54,9 +55,14 @@ static inline int smp_get_base_cpu(int cpu)
return cpu - (cpu % (smp_cpu_mtid + 1));
}
+static inline void smp_cpus_done(unsigned int max_cpus)
+{
+}
+
extern int smp_rescan_cpus(void);
extern void __noreturn cpu_die(void);
extern void __cpu_die(unsigned int cpu);
extern int __cpu_disable(void);
+extern void schedule_mcck_handler(void);
#endif /* __ASM_SMP_H */
diff --git a/arch/s390/include/asm/syscall.h b/arch/s390/include/asm/syscall.h
index f073292e9fdb..d9d5de0f67ff 100644
--- a/arch/s390/include/asm/syscall.h
+++ b/arch/s390/include/asm/syscall.h
@@ -33,7 +33,17 @@ static inline void syscall_rollback(struct task_struct *task,
static inline long syscall_get_error(struct task_struct *task,
struct pt_regs *regs)
{
- return IS_ERR_VALUE(regs->gprs[2]) ? regs->gprs[2] : 0;
+ unsigned long error = regs->gprs[2];
+#ifdef CONFIG_COMPAT
+ if (test_tsk_thread_flag(task, TIF_31BIT)) {
+ /*
+ * Sign-extend the value so (int)-EFOO becomes (long)-EFOO
+ * and will match correctly in comparisons.
+ */
+ error = (long)(int)error;
+ }
+#endif
+ return IS_ERR_VALUE(error) ? error : 0;
}
static inline long syscall_get_return_value(struct task_struct *task,
diff --git a/arch/s390/include/asm/syscall_wrapper.h b/arch/s390/include/asm/syscall_wrapper.h
index 3c3d6fe8e2f0..1320f4213d80 100644
--- a/arch/s390/include/asm/syscall_wrapper.h
+++ b/arch/s390/include/asm/syscall_wrapper.h
@@ -30,7 +30,7 @@
})
#define __S390_SYS_STUBx(x, name, ...) \
- asmlinkage long __s390_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__))\
+ asmlinkage long __s390_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__));\
ALLOW_ERROR_INJECTION(__s390_sys##name, ERRNO); \
asmlinkage long __s390_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__))\
{ \
@@ -46,7 +46,7 @@
#define COMPAT_SYSCALL_DEFINE0(sname) \
SYSCALL_METADATA(_##sname, 0); \
asmlinkage long __s390_compat_sys_##sname(void); \
- ALLOW_ERROR_INJECTION(__s390_compat__sys_##sname, ERRNO); \
+ ALLOW_ERROR_INJECTION(__s390_compat_sys_##sname, ERRNO); \
asmlinkage long __s390_compat_sys_##sname(void)
#define SYSCALL_DEFINE0(sname) \
@@ -72,7 +72,7 @@
asmlinkage long __s390_compat_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__)); \
asmlinkage long __s390_compat_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__)) \
__attribute__((alias(__stringify(__se_compat_sys##name)))); \
- ALLOW_ERROR_INJECTION(compat_sys##name, ERRNO); \
+ ALLOW_ERROR_INJECTION(__s390_compat_sys##name, ERRNO); \
static inline long __do_compat_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__));\
asmlinkage long __se_compat_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__)); \
asmlinkage long __se_compat_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__)) \
diff --git a/arch/s390/include/asm/thread_info.h b/arch/s390/include/asm/thread_info.h
index e582fbe59e20..13a04fcf7762 100644
--- a/arch/s390/include/asm/thread_info.h
+++ b/arch/s390/include/asm/thread_info.h
@@ -24,7 +24,6 @@
#ifndef __ASSEMBLY__
#include <asm/lowcore.h>
#include <asm/page.h>
-#include <asm/processor.h>
#define STACK_INIT_OFFSET \
(THREAD_SIZE - STACK_FRAME_OVERHEAD - sizeof(struct pt_regs))
diff --git a/arch/s390/include/asm/timex.h b/arch/s390/include/asm/timex.h
index 6bf3a45ccfec..289aaff4d365 100644
--- a/arch/s390/include/asm/timex.h
+++ b/arch/s390/include/asm/timex.h
@@ -49,11 +49,6 @@ static inline void set_clock_comparator(__u64 time)
asm volatile("sckc %0" : : "Q" (time));
}
-static inline void store_clock_comparator(__u64 *time)
-{
- asm volatile("stckc %0" : "=Q" (*time));
-}
-
void clock_comparator_work(void);
void __init time_early_init(void);
diff --git a/arch/s390/include/asm/tlb.h b/arch/s390/include/asm/tlb.h
index aa406c05a350..954fa8ca6cbd 100644
--- a/arch/s390/include/asm/tlb.h
+++ b/arch/s390/include/asm/tlb.h
@@ -36,7 +36,6 @@ static inline bool __tlb_remove_page_size(struct mmu_gather *tlb,
#define p4d_free_tlb p4d_free_tlb
#define pud_free_tlb pud_free_tlb
-#include <asm/pgalloc.h>
#include <asm/tlbflush.h>
#include <asm-generic/tlb.h>
diff --git a/arch/s390/include/asm/tlbflush.h b/arch/s390/include/asm/tlbflush.h
index 82703e03f35d..acce6a08a1fa 100644
--- a/arch/s390/include/asm/tlbflush.h
+++ b/arch/s390/include/asm/tlbflush.h
@@ -5,8 +5,6 @@
#include <linux/mm.h>
#include <linux/sched.h>
#include <asm/processor.h>
-#include <asm/pgalloc.h>
-#include <asm/pgtable.h>
/*
* Flush all TLB entries on the local CPU.
diff --git a/arch/s390/include/asm/topology.h b/arch/s390/include/asm/topology.h
index fbb507504a3b..3a0ac0c7a9a3 100644
--- a/arch/s390/include/asm/topology.h
+++ b/arch/s390/include/asm/topology.h
@@ -86,12 +86,6 @@ static inline const struct cpumask *cpumask_of_node(int node)
#define pcibus_to_node(bus) __pcibus_to_node(bus)
-#define node_distance(a, b) __node_distance(a, b)
-static inline int __node_distance(int a, int b)
-{
- return 0;
-}
-
#else /* !CONFIG_NUMA */
#define numa_node_id numa_node_id
diff --git a/arch/s390/include/asm/uaccess.h b/arch/s390/include/asm/uaccess.h
index a470f1fa9f2a..f09444d6aeab 100644
--- a/arch/s390/include/asm/uaccess.h
+++ b/arch/s390/include/asm/uaccess.h
@@ -32,7 +32,7 @@
#define USER_DS_SACF (3)
#define get_fs() (current->thread.mm_segment)
-#define segment_eq(a,b) (((a) & 2) == ((b) & 2))
+#define uaccess_kernel() ((get_fs() & 2) == KERNEL_DS)
void set_fs(mm_segment_t fs);
@@ -276,6 +276,6 @@ static inline unsigned long __must_check clear_user(void __user *to, unsigned lo
}
int copy_to_user_real(void __user *dest, void *src, unsigned long count);
-void s390_kernel_write(void *dst, const void *src, size_t size);
+void *s390_kernel_write(void *dst, const void *src, size_t size);
#endif /* __S390_UACCESS_H */
diff --git a/arch/s390/include/asm/vdso.h b/arch/s390/include/asm/vdso.h
index 3bcfdeb01395..0cd085cdeb4f 100644
--- a/arch/s390/include/asm/vdso.h
+++ b/arch/s390/include/asm/vdso.h
@@ -36,6 +36,7 @@ struct vdso_data {
__u32 tk_shift; /* Shift used for xtime_nsec 0x60 */
__u32 ts_dir; /* TOD steering direction 0x64 */
__u64 ts_end; /* TOD steering end 0x68 */
+ __u32 hrtimer_res; /* hrtimer resolution 0x70 */
};
struct vdso_per_cpu_data {
diff --git a/arch/s390/include/uapi/asm/debug.h b/arch/s390/include/uapi/asm/debug.h
deleted file mode 100644
index c7c564d9aea4..000000000000
--- a/arch/s390/include/uapi/asm/debug.h
+++ /dev/null
@@ -1,35 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
-/*
- * S/390 debug facility
- *
- * Copyright IBM Corp. 1999, 2000
- */
-
-#ifndef _UAPIDEBUG_H
-#define _UAPIDEBUG_H
-
-#include <linux/fs.h>
-
-/* Note:
- * struct __debug_entry must be defined outside of #ifdef __KERNEL__
- * in order to allow a user program to analyze the 'raw'-view.
- */
-
-struct __debug_entry{
- union {
- struct {
- unsigned long long clock:52;
- unsigned long long exception:1;
- unsigned long long level:3;
- unsigned long long cpuid:8;
- } fields;
-
- unsigned long long stck;
- } id;
- void* caller;
-} __attribute__((packed));
-
-
-#define __DEBUG_FEATURE_VERSION 2 /* version of debug feature */
-
-#endif /* _UAPIDEBUG_H */
diff --git a/arch/s390/include/uapi/asm/ipl.h b/arch/s390/include/uapi/asm/ipl.h
index 451ba7d08905..d1ecd5d722a0 100644
--- a/arch/s390/include/uapi/asm/ipl.h
+++ b/arch/s390/include/uapi/asm/ipl.h
@@ -27,6 +27,7 @@ enum ipl_pbt {
IPL_PBT_FCP = 0,
IPL_PBT_SCP_DATA = 1,
IPL_PBT_CCW = 2,
+ IPL_PBT_NVME = 4,
};
/* IPL Parameter Block 0 with common fields */
@@ -67,6 +68,30 @@ struct ipl_pb0_fcp {
#define IPL_PB0_FCP_OPT_IPL 0x10
#define IPL_PB0_FCP_OPT_DUMP 0x20
+/* IPL Parameter Block 0 for NVMe */
+struct ipl_pb0_nvme {
+ __u32 len;
+ __u8 pbt;
+ __u8 reserved1[3];
+ __u8 loadparm[8];
+ __u8 reserved2[304];
+ __u8 opt;
+ __u8 reserved3[3];
+ __u32 fid;
+ __u8 reserved4[12];
+ __u32 nsid;
+ __u8 reserved5[4];
+ __u32 bootprog;
+ __u8 reserved6[12];
+ __u64 br_lba;
+ __u32 scp_data_len;
+ __u8 reserved7[260];
+ __u8 scp_data[];
+} __packed;
+
+#define IPL_PB0_NVME_OPT_IPL 0x10
+#define IPL_PB0_NVME_OPT_DUMP 0x20
+
/* IPL Parameter Block 0 for CCW */
struct ipl_pb0_ccw {
__u32 len;
diff --git a/arch/s390/include/uapi/asm/kvm.h b/arch/s390/include/uapi/asm/kvm.h
index 436ec7636927..7a6b14874d65 100644
--- a/arch/s390/include/uapi/asm/kvm.h
+++ b/arch/s390/include/uapi/asm/kvm.h
@@ -231,11 +231,13 @@ struct kvm_guest_debug_arch {
#define KVM_SYNC_GSCB (1UL << 9)
#define KVM_SYNC_BPBC (1UL << 10)
#define KVM_SYNC_ETOKEN (1UL << 11)
+#define KVM_SYNC_DIAG318 (1UL << 12)
#define KVM_SYNC_S390_VALID_FIELDS \
(KVM_SYNC_PREFIX | KVM_SYNC_GPRS | KVM_SYNC_ACRS | KVM_SYNC_CRS | \
KVM_SYNC_ARCH0 | KVM_SYNC_PFAULT | KVM_SYNC_VRS | KVM_SYNC_RICCB | \
- KVM_SYNC_FPRS | KVM_SYNC_GSCB | KVM_SYNC_BPBC | KVM_SYNC_ETOKEN)
+ KVM_SYNC_FPRS | KVM_SYNC_GSCB | KVM_SYNC_BPBC | KVM_SYNC_ETOKEN | \
+ KVM_SYNC_DIAG318)
/* length and alignment of the sdnx as a power of two */
#define SDNXC 8
@@ -264,7 +266,8 @@ struct kvm_sync_regs {
__u8 reserved2 : 7;
__u8 padding1[51]; /* riccb needs to be 64byte aligned */
__u8 riccb[64]; /* runtime instrumentation controls block */
- __u8 padding2[192]; /* sdnx needs to be 256byte aligned */
+ __u64 diag318; /* diagnose 0x318 info */
+ __u8 padding2[184]; /* sdnx needs to be 256byte aligned */
union {
__u8 sdnx[SDNXL]; /* state description annex */
struct {
diff --git a/arch/s390/include/uapi/asm/zcrypt.h b/arch/s390/include/uapi/asm/zcrypt.h
index 5a2177e96e88..22fd202856bc 100644
--- a/arch/s390/include/uapi/asm/zcrypt.h
+++ b/arch/s390/include/uapi/asm/zcrypt.h
@@ -36,12 +36,12 @@
* - length(n_modulus) = inputdatalength
*/
struct ica_rsa_modexpo {
- char __user *inputdata;
- unsigned int inputdatalength;
- char __user *outputdata;
- unsigned int outputdatalength;
- char __user *b_key;
- char __user *n_modulus;
+ __u8 __user *inputdata;
+ __u32 inputdatalength;
+ __u8 __user *outputdata;
+ __u32 outputdatalength;
+ __u8 __user *b_key;
+ __u8 __user *n_modulus;
};
/**
@@ -59,15 +59,15 @@ struct ica_rsa_modexpo {
* - length(u_mult_inv) = inputdatalength/2 + 8
*/
struct ica_rsa_modexpo_crt {
- char __user *inputdata;
- unsigned int inputdatalength;
- char __user *outputdata;
- unsigned int outputdatalength;
- char __user *bp_key;
- char __user *bq_key;
- char __user *np_prime;
- char __user *nq_prime;
- char __user *u_mult_inv;
+ __u8 __user *inputdata;
+ __u32 inputdatalength;
+ __u8 __user *outputdata;
+ __u32 outputdatalength;
+ __u8 __user *bp_key;
+ __u8 __user *bq_key;
+ __u8 __user *np_prime;
+ __u8 __user *nq_prime;
+ __u8 __user *u_mult_inv;
};
/**
@@ -83,67 +83,67 @@ struct ica_rsa_modexpo_crt {
* key block
*/
struct CPRBX {
- unsigned short cprb_len; /* CPRB length 220 */
- unsigned char cprb_ver_id; /* CPRB version id. 0x02 */
- unsigned char pad_000[3]; /* Alignment pad bytes */
- unsigned char func_id[2]; /* function id 0x5432 */
- unsigned char cprb_flags[4]; /* Flags */
- unsigned int req_parml; /* request parameter buffer len */
- unsigned int req_datal; /* request data buffer */
- unsigned int rpl_msgbl; /* reply message block length */
- unsigned int rpld_parml; /* replied parameter block len */
- unsigned int rpl_datal; /* reply data block len */
- unsigned int rpld_datal; /* replied data block len */
- unsigned int req_extbl; /* request extension block len */
- unsigned char pad_001[4]; /* reserved */
- unsigned int rpld_extbl; /* replied extension block len */
- unsigned char padx000[16 - sizeof(char *)];
- unsigned char *req_parmb; /* request parm block 'address' */
- unsigned char padx001[16 - sizeof(char *)];
- unsigned char *req_datab; /* request data block 'address' */
- unsigned char padx002[16 - sizeof(char *)];
- unsigned char *rpl_parmb; /* reply parm block 'address' */
- unsigned char padx003[16 - sizeof(char *)];
- unsigned char *rpl_datab; /* reply data block 'address' */
- unsigned char padx004[16 - sizeof(char *)];
- unsigned char *req_extb; /* request extension block 'addr'*/
- unsigned char padx005[16 - sizeof(char *)];
- unsigned char *rpl_extb; /* reply extension block 'address'*/
- unsigned short ccp_rtcode; /* server return code */
- unsigned short ccp_rscode; /* server reason code */
- unsigned int mac_data_len; /* Mac Data Length */
- unsigned char logon_id[8]; /* Logon Identifier */
- unsigned char mac_value[8]; /* Mac Value */
- unsigned char mac_content_flgs;/* Mac content flag byte */
- unsigned char pad_002; /* Alignment */
- unsigned short domain; /* Domain */
- unsigned char usage_domain[4];/* Usage domain */
- unsigned char cntrl_domain[4];/* Control domain */
- unsigned char S390enf_mask[4];/* S/390 enforcement mask */
- unsigned char pad_004[36]; /* reserved */
+ __u16 cprb_len; /* CPRB length 220 */
+ __u8 cprb_ver_id; /* CPRB version id. 0x02 */
+ __u8 pad_000[3]; /* Alignment pad bytes */
+ __u8 func_id[2]; /* function id 0x5432 */
+ __u8 cprb_flags[4]; /* Flags */
+ __u32 req_parml; /* request parameter buffer len */
+ __u32 req_datal; /* request data buffer */
+ __u32 rpl_msgbl; /* reply message block length */
+ __u32 rpld_parml; /* replied parameter block len */
+ __u32 rpl_datal; /* reply data block len */
+ __u32 rpld_datal; /* replied data block len */
+ __u32 req_extbl; /* request extension block len */
+ __u8 pad_001[4]; /* reserved */
+ __u32 rpld_extbl; /* replied extension block len */
+ __u8 padx000[16 - sizeof(__u8 *)];
+ __u8 __user *req_parmb; /* request parm block 'address' */
+ __u8 padx001[16 - sizeof(__u8 *)];
+ __u8 __user *req_datab; /* request data block 'address' */
+ __u8 padx002[16 - sizeof(__u8 *)];
+ __u8 __user *rpl_parmb; /* reply parm block 'address' */
+ __u8 padx003[16 - sizeof(__u8 *)];
+ __u8 __user *rpl_datab; /* reply data block 'address' */
+ __u8 padx004[16 - sizeof(__u8 *)];
+ __u8 __user *req_extb; /* request extension block 'addr'*/
+ __u8 padx005[16 - sizeof(__u8 *)];
+ __u8 __user *rpl_extb; /* reply extension block 'address'*/
+ __u16 ccp_rtcode; /* server return code */
+ __u16 ccp_rscode; /* server reason code */
+ __u32 mac_data_len; /* Mac Data Length */
+ __u8 logon_id[8]; /* Logon Identifier */
+ __u8 mac_value[8]; /* Mac Value */
+ __u8 mac_content_flgs; /* Mac content flag byte */
+ __u8 pad_002; /* Alignment */
+ __u16 domain; /* Domain */
+ __u8 usage_domain[4]; /* Usage domain */
+ __u8 cntrl_domain[4]; /* Control domain */
+ __u8 S390enf_mask[4]; /* S/390 enforcement mask */
+ __u8 pad_004[36]; /* reserved */
} __attribute__((packed));
/**
* xcRB
*/
struct ica_xcRB {
- unsigned short agent_ID;
- unsigned int user_defined;
- unsigned short request_ID;
- unsigned int request_control_blk_length;
- unsigned char padding1[16 - sizeof(char *)];
- char __user *request_control_blk_addr;
- unsigned int request_data_length;
- char padding2[16 - sizeof(char *)];
- char __user *request_data_address;
- unsigned int reply_control_blk_length;
- char padding3[16 - sizeof(char *)];
- char __user *reply_control_blk_addr;
- unsigned int reply_data_length;
- char padding4[16 - sizeof(char *)];
- char __user *reply_data_addr;
- unsigned short priority_window;
- unsigned int status;
+ __u16 agent_ID;
+ __u32 user_defined;
+ __u16 request_ID;
+ __u32 request_control_blk_length;
+ __u8 _padding1[16 - sizeof(__u8 *)];
+ __u8 __user *request_control_blk_addr;
+ __u32 request_data_length;
+ __u8 _padding2[16 - sizeof(__u8 *)];
+ __u8 __user *request_data_address;
+ __u32 reply_control_blk_length;
+ __u8 _padding3[16 - sizeof(__u8 *)];
+ __u8 __user *reply_control_blk_addr;
+ __u32 reply_data_length;
+ __u8 __padding4[16 - sizeof(__u8 *)];
+ __u8 __user *reply_data_addr;
+ __u16 priority_window;
+ __u32 status;
} __attribute__((packed));
/**
diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile
index 75f26d775027..efca70970761 100644
--- a/arch/s390/kernel/Makefile
+++ b/arch/s390/kernel/Makefile
@@ -33,11 +33,6 @@ CFLAGS_stacktrace.o += -fno-optimize-sibling-calls
CFLAGS_dumpstack.o += -fno-optimize-sibling-calls
CFLAGS_unwind_bc.o += -fno-optimize-sibling-calls
-#
-# Pass UTS_MACHINE for user_regset definition
-#
-CFLAGS_ptrace.o += -DUTS_MACHINE='"$(UTS_MACHINE)"'
-
obj-y := traps.o time.o process.o base.o early.o setup.o idle.o vtime.o
obj-y += processor.o sys_s390.o ptrace.o signal.o cpcmd.o ebcdic.o nmi.o
obj-y += debug.o irq.o ipl.o dis.o diag.o vdso.o
@@ -54,6 +49,7 @@ CFLAGS_REMOVE_nospec-branch.o += $(CC_FLAGS_EXPOLINE)
obj-$(CONFIG_MODULES) += module.o
obj-$(CONFIG_SCHED_TOPOLOGY) += topology.o
+obj-$(CONFIG_NUMA) += numa.o
obj-$(CONFIG_AUDIT) += audit.o
compat-obj-$(CONFIG_AUDIT) += compat_audit.o
obj-$(CONFIG_COMPAT) += compat_linux.o compat_signal.o
diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c
index e80f0e6f5972..5d8cc1864566 100644
--- a/arch/s390/kernel/asm-offsets.c
+++ b/arch/s390/kernel/asm-offsets.c
@@ -11,9 +11,9 @@
#include <linux/kvm_host.h>
#include <linux/sched.h>
#include <linux/purgatory.h>
+#include <linux/pgtable.h>
#include <asm/idle.h>
#include <asm/vdso.h>
-#include <asm/pgtable.h>
#include <asm/gmap.h>
#include <asm/nmi.h>
#include <asm/stacktrace.h>
@@ -76,6 +76,7 @@ int main(void)
OFFSET(__VDSO_TK_SHIFT, vdso_data, tk_shift);
OFFSET(__VDSO_TS_DIR, vdso_data, ts_dir);
OFFSET(__VDSO_TS_END, vdso_data, ts_end);
+ OFFSET(__VDSO_CLOCK_REALTIME_RES, vdso_data, hrtimer_res);
OFFSET(__VDSO_ECTG_BASE, vdso_per_cpu_data, ectg_timer_base);
OFFSET(__VDSO_ECTG_USER, vdso_per_cpu_data, ectg_user_time);
OFFSET(__VDSO_GETCPU_VAL, vdso_per_cpu_data, getcpu_val);
@@ -86,7 +87,6 @@ int main(void)
DEFINE(__CLOCK_REALTIME_COARSE, CLOCK_REALTIME_COARSE);
DEFINE(__CLOCK_MONOTONIC_COARSE, CLOCK_MONOTONIC_COARSE);
DEFINE(__CLOCK_THREAD_CPUTIME_ID, CLOCK_THREAD_CPUTIME_ID);
- DEFINE(__CLOCK_REALTIME_RES, MONOTONIC_RES_NSEC);
DEFINE(__CLOCK_COARSE_RES, LOW_RES_NSEC);
BLANK();
/* idle data offsets */
diff --git a/arch/s390/kernel/crash_dump.c b/arch/s390/kernel/crash_dump.c
index f96a5857bbfd..c42ce348103c 100644
--- a/arch/s390/kernel/crash_dump.c
+++ b/arch/s390/kernel/crash_dump.c
@@ -549,8 +549,7 @@ static int get_mem_chunk_cnt(void)
int cnt = 0;
u64 idx;
- for_each_mem_range(idx, &memblock.physmem, &oldmem_type, NUMA_NO_NODE,
- MEMBLOCK_NONE, NULL, NULL, NULL)
+ for_each_physmem_range(idx, &oldmem_type, NULL, NULL)
cnt++;
return cnt;
}
@@ -563,8 +562,7 @@ static void loads_init(Elf64_Phdr *phdr, u64 loads_offset)
phys_addr_t start, end;
u64 idx;
- for_each_mem_range(idx, &memblock.physmem, &oldmem_type, NUMA_NO_NODE,
- MEMBLOCK_NONE, &start, &end, NULL) {
+ for_each_physmem_range(idx, &oldmem_type, &start, &end) {
phdr->p_filesz = end - start;
phdr->p_type = PT_LOAD;
phdr->p_offset = start;
diff --git a/arch/s390/kernel/debug.c b/arch/s390/kernel/debug.c
index 6d321f5f101d..b6619ae9a3e0 100644
--- a/arch/s390/kernel/debug.c
+++ b/arch/s390/kernel/debug.c
@@ -2,7 +2,7 @@
/*
* S/390 debug facility
*
- * Copyright IBM Corp. 1999, 2012
+ * Copyright IBM Corp. 1999, 2020
*
* Author(s): Michael Holzheu (holzheu@de.ibm.com),
* Holger Smolinski (Holger.Smolinski@de.ibm.com)
@@ -90,27 +90,11 @@ static int debug_input_flush_fn(debug_info_t *id, struct debug_view *view,
size_t user_buf_size, loff_t *offset);
static int debug_hex_ascii_format_fn(debug_info_t *id, struct debug_view *view,
char *out_buf, const char *in_buf);
-static int debug_raw_format_fn(debug_info_t *id,
- struct debug_view *view, char *out_buf,
- const char *in_buf);
-static int debug_raw_header_fn(debug_info_t *id, struct debug_view *view,
- int area, debug_entry_t *entry, char *out_buf);
-
static int debug_sprintf_format_fn(debug_info_t *id, struct debug_view *view,
char *out_buf, debug_sprintf_entry_t *curr_event);
/* globals */
-struct debug_view debug_raw_view = {
- "raw",
- NULL,
- &debug_raw_header_fn,
- &debug_raw_format_fn,
- NULL,
- NULL
-};
-EXPORT_SYMBOL(debug_raw_view);
-
struct debug_view debug_hex_ascii_view = {
"hex_ascii",
NULL,
@@ -198,9 +182,10 @@ static debug_entry_t ***debug_areas_alloc(int pages_per_area, int nr_areas)
if (!areas)
goto fail_malloc_areas;
for (i = 0; i < nr_areas; i++) {
+ /* GFP_NOWARN to avoid user triggerable WARN, we handle fails */
areas[i] = kmalloc_array(pages_per_area,
sizeof(debug_entry_t *),
- GFP_KERNEL);
+ GFP_KERNEL | __GFP_NOWARN);
if (!areas[i])
goto fail_malloc_areas2;
for (j = 0; j < pages_per_area; j++) {
@@ -448,7 +433,7 @@ static int debug_format_entry(file_private_info_t *p_info)
act_entry = (debug_entry_t *) ((char *)id_snap->areas[p_info->act_area]
[p_info->act_page] + p_info->act_entry);
- if (act_entry->id.stck == 0LL)
+ if (act_entry->clock == 0LL)
goto out; /* empty entry */
if (view->header_proc)
len += view->header_proc(id_snap, view, p_info->act_area,
@@ -844,12 +829,17 @@ static inline debug_entry_t *get_active_entry(debug_info_t *id)
static inline void debug_finish_entry(debug_info_t *id, debug_entry_t *active,
int level, int exception)
{
- active->id.stck = get_tod_clock_fast() -
- *(unsigned long long *) &tod_clock_base[1];
- active->id.fields.cpuid = smp_processor_id();
+ unsigned char clk[STORE_CLOCK_EXT_SIZE];
+ unsigned long timestamp;
+
+ get_tod_clock_ext(clk);
+ timestamp = *(unsigned long *) &clk[0] >> 4;
+ timestamp -= TOD_UNIX_EPOCH >> 12;
+ active->clock = timestamp;
+ active->cpu = smp_processor_id();
active->caller = __builtin_return_address(0);
- active->id.fields.exception = exception;
- active->id.fields.level = level;
+ active->exception = exception;
+ active->level = level;
proceed_active_entry(id);
if (exception)
proceed_active_area(id);
@@ -867,7 +857,7 @@ static int debug_active = 1;
* if debug_active is already off
*/
static int s390dbf_procactive(struct ctl_table *table, int write,
- void __user *buffer, size_t *lenp, loff_t *ppos)
+ void *buffer, size_t *lenp, loff_t *ppos)
{
if (!write || debug_stoppable || !debug_active)
return proc_dointvec(table, write, buffer, lenp, ppos);
@@ -1385,32 +1375,6 @@ out:
}
/*
- * prints debug header in raw format
- */
-static int debug_raw_header_fn(debug_info_t *id, struct debug_view *view,
- int area, debug_entry_t *entry, char *out_buf)
-{
- int rc;
-
- rc = sizeof(debug_entry_t);
- memcpy(out_buf, entry, sizeof(debug_entry_t));
- return rc;
-}
-
-/*
- * prints debug data in raw format
- */
-static int debug_raw_format_fn(debug_info_t *id, struct debug_view *view,
- char *out_buf, const char *in_buf)
-{
- int rc;
-
- rc = id->buf_size;
- memcpy(out_buf, in_buf, id->buf_size);
- return rc;
-}
-
-/*
* prints debug data in hex/ascii format
*/
static int debug_hex_ascii_format_fn(debug_info_t *id, struct debug_view *view,
@@ -1439,25 +1403,24 @@ static int debug_hex_ascii_format_fn(debug_info_t *id, struct debug_view *view,
int debug_dflt_header_fn(debug_info_t *id, struct debug_view *view,
int area, debug_entry_t *entry, char *out_buf)
{
- unsigned long base, sec, usec;
+ unsigned long sec, usec;
unsigned long caller;
unsigned int level;
char *except_str;
int rc = 0;
- level = entry->id.fields.level;
- base = (*(unsigned long *) &tod_clock_base[0]) >> 4;
- sec = (entry->id.stck >> 12) + base - (TOD_UNIX_EPOCH >> 12);
+ level = entry->level;
+ sec = entry->clock;
usec = do_div(sec, USEC_PER_SEC);
- if (entry->id.fields.exception)
+ if (entry->exception)
except_str = "*";
else
except_str = "-";
caller = (unsigned long) entry->caller;
- rc += sprintf(out_buf, "%02i %011ld:%06lu %1u %1s %02i %pK ",
+ rc += sprintf(out_buf, "%02i %011ld:%06lu %1u %1s %04u %pK ",
area, sec, usec, level, except_str,
- entry->id.fields.cpuid, (void *)caller);
+ entry->cpu, (void *)caller);
return rc;
}
EXPORT_SYMBOL(debug_dflt_header_fn);
diff --git a/arch/s390/kernel/dumpstack.c b/arch/s390/kernel/dumpstack.c
index 2c122d8bab93..0dc4b258b98d 100644
--- a/arch/s390/kernel/dumpstack.c
+++ b/arch/s390/kernel/dumpstack.c
@@ -126,15 +126,16 @@ unknown:
return -EINVAL;
}
-void show_stack(struct task_struct *task, unsigned long *stack)
+void show_stack(struct task_struct *task, unsigned long *stack,
+ const char *loglvl)
{
struct unwind_state state;
- printk("Call Trace:\n");
+ printk("%sCall Trace:\n", loglvl);
unwind_for_each_frame(&state, task, NULL, (unsigned long) stack)
- printk(state.reliable ? " [<%016lx>] %pSR \n" :
- "([<%016lx>] %pSR)\n",
- state.ip, (void *) state.ip);
+ printk(state.reliable ? "%s [<%016lx>] %pSR \n" :
+ "%s([<%016lx>] %pSR)\n",
+ loglvl, state.ip, (void *) state.ip);
debug_show_held_locks(task ? : current);
}
@@ -175,7 +176,7 @@ void show_regs(struct pt_regs *regs)
show_registers(regs);
/* Show stack backtrace if pt_regs is from kernel mode */
if (!user_mode(regs))
- show_stack(NULL, (unsigned long *) regs->gprs[15]);
+ show_stack(NULL, (unsigned long *) regs->gprs[15], KERN_DEFAULT);
show_last_breaking_event(regs);
}
diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c
index cd241ee66eff..078277231858 100644
--- a/arch/s390/kernel/early.c
+++ b/arch/s390/kernel/early.c
@@ -170,6 +170,8 @@ static noinline __init void setup_lowcore_early(void)
psw_t psw;
psw.mask = PSW_MASK_BASE | PSW_DEFAULT_KEY | PSW_MASK_EA | PSW_MASK_BA;
+ if (IS_ENABLED(CONFIG_KASAN))
+ psw.mask |= PSW_MASK_DAT;
psw.addr = (unsigned long) s390_base_ext_handler;
S390_lowcore.external_new_psw = psw;
psw.addr = (unsigned long) s390_base_pgm_handler;
diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S
index 3ae64914bd14..23edf196d3dc 100644
--- a/arch/s390/kernel/entry.S
+++ b/arch/s390/kernel/entry.S
@@ -55,14 +55,11 @@ _TIF_WORK = (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_NEED_RESCHED | \
_TIF_UPROBE | _TIF_GUARDED_STORAGE | _TIF_PATCH_PENDING)
_TIF_TRACE = (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | _TIF_SECCOMP | \
_TIF_SYSCALL_TRACEPOINT)
-_CIF_WORK = (_CIF_MCCK_PENDING | _CIF_ASCE_PRIMARY | \
- _CIF_ASCE_SECONDARY | _CIF_FPU)
+_CIF_WORK = (_CIF_ASCE_PRIMARY | _CIF_ASCE_SECONDARY | _CIF_FPU)
_PIF_WORK = (_PIF_PER_TRAP | _PIF_SYSCALL_RESTART)
_LPP_OFFSET = __LC_LPP
-#define BASED(name) name-cleanup_critical(%r13)
-
.macro TRACE_IRQS_ON
#ifdef CONFIG_TRACE_IRQFLAGS
basr %r2,%r0
@@ -116,17 +113,39 @@ _LPP_OFFSET = __LC_LPP
.macro SWITCH_ASYNC savearea,timer
tmhh %r8,0x0001 # interrupting from user ?
jnz 2f
+#if IS_ENABLED(CONFIG_KVM)
lgr %r14,%r9
- cghi %r14,__LC_RETURN_LPSWE
- je 0f
- slg %r14,BASED(.Lcritical_start)
- clg %r14,BASED(.Lcritical_length)
- jhe 1f
-0:
+ larl %r13,.Lsie_gmap
+ slgr %r14,%r13
+ lghi %r13,.Lsie_done - .Lsie_gmap
+ clgr %r14,%r13
+ jhe 0f
lghi %r11,\savearea # inside critical section, do cleanup
- brasl %r14,cleanup_critical
- tmhh %r8,0x0001 # retest problem state after cleanup
- jnz 2f
+ brasl %r14,.Lcleanup_sie
+#endif
+0: larl %r13,.Lpsw_idle_exit
+ cgr %r13,%r9
+ jne 1f
+
+ mvc __CLOCK_IDLE_EXIT(8,%r2), __LC_INT_CLOCK
+ mvc __TIMER_IDLE_EXIT(8,%r2), __LC_ASYNC_ENTER_TIMER
+ # account system time going idle
+ ni __LC_CPU_FLAGS+7,255-_CIF_ENABLED_WAIT
+
+ lg %r13,__LC_STEAL_TIMER
+ alg %r13,__CLOCK_IDLE_ENTER(%r2)
+ slg %r13,__LC_LAST_UPDATE_CLOCK
+ stg %r13,__LC_STEAL_TIMER
+
+ mvc __LC_LAST_UPDATE_CLOCK(8),__CLOCK_IDLE_EXIT(%r2)
+
+ lg %r13,__LC_SYSTEM_TIMER
+ alg %r13,__LC_LAST_UPDATE_TIMER
+ slg %r13,__TIMER_IDLE_ENTER(%r2)
+ stg %r13,__LC_SYSTEM_TIMER
+ mvc __LC_LAST_UPDATE_TIMER(8),__TIMER_IDLE_EXIT(%r2)
+
+ nihh %r8,0xfcfd # clear wait state and irq bits
1: lg %r14,__LC_ASYNC_STACK # are we already on the target stack?
slgr %r14,%r15
srag %r14,%r14,STACK_SHIFT
@@ -152,12 +171,30 @@ _LPP_OFFSET = __LC_LPP
mvc __LC_LAST_UPDATE_TIMER(8),\enter_timer
.endm
- .macro REENABLE_IRQS
+ .macro RESTORE_SM_CLEAR_PER
stg %r8,__LC_RETURN_PSW
ni __LC_RETURN_PSW,0xbf
ssm __LC_RETURN_PSW
.endm
+ .macro ENABLE_INTS
+ stosm __SF_EMPTY(%r15),3
+ .endm
+
+ .macro ENABLE_INTS_TRACE
+ TRACE_IRQS_ON
+ ENABLE_INTS
+ .endm
+
+ .macro DISABLE_INTS
+ stnsm __SF_EMPTY(%r15),0xfc
+ .endm
+
+ .macro DISABLE_INTS_TRACE
+ DISABLE_INTS
+ TRACE_IRQS_OFF
+ .endm
+
.macro STCK savearea
#ifdef CONFIG_HAVE_MARCH_Z9_109_FEATURES
.insn s,0xb27c0000,\savearea # store clock fast
@@ -254,8 +291,6 @@ ENTRY(__switch_to)
BR_EX %r14
ENDPROC(__switch_to)
-.L__critical_start:
-
#if IS_ENABLED(CONFIG_KVM)
/*
* sie64a calling convention:
@@ -288,7 +323,6 @@ ENTRY(sie64a)
BPEXIT __SF_SIE_FLAGS(%r15),(_TIF_ISOLATE_BP|_TIF_ISOLATE_BP_GUEST)
.Lsie_entry:
sie 0(%r14)
-.Lsie_exit:
BPOFF
BPENTER __SF_SIE_FLAGS(%r15),(_TIF_ISOLATE_BP|_TIF_ISOLATE_BP_GUEST)
.Lsie_skip:
@@ -336,21 +370,19 @@ EXPORT_SYMBOL(sie_exit)
/*
* SVC interrupt handler routine. System calls are synchronous events and
- * are executed with interrupts enabled.
+ * are entered with interrupts disabled.
*/
ENTRY(system_call)
stpt __LC_SYNC_ENTER_TIMER
-.Lsysc_stmg:
stmg %r8,%r15,__LC_SAVE_AREA_SYNC
BPOFF
lg %r12,__LC_CURRENT
- lghi %r13,__TASK_thread
lghi %r14,_PIF_SYSCALL
.Lsysc_per:
+ lghi %r13,__TASK_thread
lg %r15,__LC_KERNEL_STACK
la %r11,STACK_FRAME_OVERHEAD(%r15) # pointer to pt_regs
-.Lsysc_vtime:
UPDATE_VTIME %r8,%r9,__LC_SYNC_ENTER_TIMER
BPENTER __TI_flags(%r12),_TIF_ISOLATE_BP
stmg %r0,%r7,__PT_R0(%r11)
@@ -358,6 +390,7 @@ ENTRY(system_call)
mvc __PT_PSW(16,%r11),__LC_SVC_OLD_PSW
mvc __PT_INT_CODE(4,%r11),__LC_SVC_ILC
stg %r14,__PT_FLAGS(%r11)
+ ENABLE_INTS
.Lsysc_do_svc:
# clear user controlled register to prevent speculative use
xgr %r0,%r0
@@ -368,9 +401,9 @@ ENTRY(system_call)
jnz .Lsysc_nr_ok
# svc 0: system call number in %r1
llgfr %r1,%r1 # clear high word in r1
+ sth %r1,__PT_INT_CODE+2(%r11)
cghi %r1,NR_syscalls
jnl .Lsysc_nr_ok
- sth %r1,__PT_INT_CODE+2(%r11)
slag %r8,%r1,3
.Lsysc_nr_ok:
xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
@@ -393,26 +426,26 @@ ENTRY(system_call)
jnz .Lsysc_work
TSTMSK __TI_flags(%r12),_TIF_WORK
jnz .Lsysc_work # check for work
- TSTMSK __LC_CPU_FLAGS,_CIF_WORK
+ TSTMSK __LC_CPU_FLAGS,(_CIF_WORK-_CIF_FPU)
jnz .Lsysc_work
BPEXIT __TI_flags(%r12),_TIF_ISOLATE_BP
.Lsysc_restore:
+ DISABLE_INTS
+ TSTMSK __LC_CPU_FLAGS, _CIF_FPU
+ jz .Lsysc_skip_fpu
+ brasl %r14,load_fpu_regs
+.Lsysc_skip_fpu:
lg %r14,__LC_VDSO_PER_CPU
- lmg %r0,%r10,__PT_R0(%r11)
mvc __LC_RETURN_PSW(16),__PT_PSW(%r11)
-.Lsysc_exit_timer:
stpt __LC_EXIT_TIMER
mvc __VDSO_ECTG_BASE(16,%r14),__LC_EXIT_TIMER
- lmg %r11,%r15,__PT_R11(%r11)
- b __LC_RETURN_LPSWE(%r0)
-.Lsysc_done:
+ lmg %r0,%r15,__PT_R0(%r11)
+ b __LC_RETURN_LPSWE
#
# One of the work bits is on. Find out which one.
#
.Lsysc_work:
- TSTMSK __LC_CPU_FLAGS,_CIF_MCCK_PENDING
- jo .Lsysc_mcck_pending
TSTMSK __TI_flags(%r12),_TIF_NEED_RESCHED
jo .Lsysc_reschedule
TSTMSK __PT_FLAGS(%r11),_PIF_SYSCALL_RESTART
@@ -436,11 +469,9 @@ ENTRY(system_call)
jo .Lsysc_sigpending
TSTMSK __TI_flags(%r12),_TIF_NOTIFY_RESUME
jo .Lsysc_notify_resume
- TSTMSK __LC_CPU_FLAGS,_CIF_FPU
- jo .Lsysc_vxrs
TSTMSK __LC_CPU_FLAGS,(_CIF_ASCE_PRIMARY|_CIF_ASCE_SECONDARY)
jnz .Lsysc_asce
- j .Lsysc_return # beware of critical section cleanup
+ j .Lsysc_return
#
# _TIF_NEED_RESCHED is set, call schedule
@@ -450,13 +481,6 @@ ENTRY(system_call)
jg schedule
#
-# _CIF_MCCK_PENDING is set, call handler
-#
-.Lsysc_mcck_pending:
- larl %r14,.Lsysc_return
- jg s390_handle_mcck # TIF bit will be cleared by handler
-
-#
# _CIF_ASCE_PRIMARY and/or _CIF_ASCE_SECONDARY set, load user space asce
#
.Lsysc_asce:
@@ -475,12 +499,6 @@ ENTRY(system_call)
larl %r14,.Lsysc_return
jg set_fs_fixup
-#
-# CIF_FPU is set, restore floating-point controls and floating-point registers.
-#
-.Lsysc_vxrs:
- larl %r14,.Lsysc_return
- jg load_fpu_regs
#
# _TIF_SIGPENDING is set, call do_signal
@@ -564,7 +582,6 @@ ENTRY(system_call)
jnh .Lsysc_tracenogo
sllg %r8,%r2,3
lg %r9,0(%r8,%r10)
-.Lsysc_tracego:
lmg %r3,%r7,__PT_R3(%r11)
stg %r7,STACK_FRAME_OVERHEAD(%r15)
lg %r2,__PT_ORIG_GPR2(%r11)
@@ -585,8 +602,6 @@ ENTRY(ret_from_fork)
la %r11,STACK_FRAME_OVERHEAD(%r15)
lg %r12,__LC_CURRENT
brasl %r14,schedule_tail
- TRACE_IRQS_ON
- ssm __LC_SVC_NEW_PSW # reenable interrupts
tm __PT_PSW+1(%r11),0x01 # forking a kernel thread ?
jne .Lsysc_tracenogo
# it's a kernel thread
@@ -620,15 +635,16 @@ ENTRY(pgm_check_handler)
lghi %r10,1
0: lg %r12,__LC_CURRENT
lghi %r11,0
- larl %r13,cleanup_critical
lmg %r8,%r9,__LC_PGM_OLD_PSW
tmhh %r8,0x0001 # test problem state bit
jnz 3f # -> fault in user space
#if IS_ENABLED(CONFIG_KVM)
# cleanup critical section for program checks in sie64a
lgr %r14,%r9
- slg %r14,BASED(.Lsie_critical_start)
- clg %r14,BASED(.Lsie_critical_length)
+ larl %r13,.Lsie_gmap
+ slgr %r14,%r13
+ lghi %r13,.Lsie_done - .Lsie_gmap
+ clgr %r14,%r13
jhe 1f
lg %r14,__SF_SIE_CONTROL(%r15) # get control block pointer
ni __SIE_PROG0C+3(%r14),0xfe # no longer in SIE
@@ -680,7 +696,7 @@ ENTRY(pgm_check_handler)
mvc __THREAD_per_address(8,%r14),__LC_PER_ADDRESS
mvc __THREAD_per_cause(2,%r14),__LC_PER_CODE
mvc __THREAD_per_paid(1,%r14),__LC_PER_ACCESS_ID
-6: REENABLE_IRQS
+6: RESTORE_SM_CLEAR_PER
xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
larl %r1,pgm_check_table
llgh %r10,__PT_INT_CODE+2(%r11)
@@ -702,7 +718,7 @@ ENTRY(pgm_check_handler)
# PER event in supervisor state, must be kprobes
#
.Lpgm_kprobe:
- REENABLE_IRQS
+ RESTORE_SM_CLEAR_PER
xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
lgr %r2,%r11 # pass pointer to pt_regs
brasl %r14,do_per_trap
@@ -713,11 +729,10 @@ ENTRY(pgm_check_handler)
#
.Lpgm_svcper:
mvc __LC_RETURN_PSW(8),__LC_SVC_NEW_PSW
- lghi %r13,__TASK_thread
larl %r14,.Lsysc_per
stg %r14,__LC_RETURN_PSW+8
lghi %r14,_PIF_SYSCALL | _PIF_PER_TRAP
- lpswe __LC_RETURN_PSW # branch to .Lsysc_per and enable irqs
+ lpswe __LC_RETURN_PSW # branch to .Lsysc_per
ENDPROC(pgm_check_handler)
/*
@@ -729,7 +744,6 @@ ENTRY(io_int_handler)
BPOFF
stmg %r8,%r15,__LC_SAVE_AREA_ASYNC
lg %r12,__LC_CURRENT
- larl %r13,cleanup_critical
lmg %r8,%r9,__LC_IO_OLD_PSW
SWITCH_ASYNC __LC_SAVE_AREA_ASYNC,__LC_ASYNC_ENTER_TIMER
stmg %r0,%r7,__PT_R0(%r11)
@@ -749,7 +763,12 @@ ENTRY(io_int_handler)
xc __PT_FLAGS(8,%r11),__PT_FLAGS(%r11)
TSTMSK __LC_CPU_FLAGS,_CIF_IGNORE_IRQ
jo .Lio_restore
+#if IS_ENABLED(CONFIG_TRACE_IRQFLAGS)
+ tmhh %r8,0x300
+ jz 1f
TRACE_IRQS_OFF
+1:
+#endif
xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
.Lio_loop:
lgr %r2,%r11 # pass pointer to pt_regs
@@ -767,25 +786,27 @@ ENTRY(io_int_handler)
j .Lio_loop
.Lio_return:
LOCKDEP_SYS_EXIT
- TRACE_IRQS_ON
-.Lio_tif:
TSTMSK __TI_flags(%r12),_TIF_WORK
jnz .Lio_work # there is work to do (signals etc.)
TSTMSK __LC_CPU_FLAGS,_CIF_WORK
jnz .Lio_work
.Lio_restore:
+#if IS_ENABLED(CONFIG_TRACE_IRQFLAGS)
+ tm __PT_PSW(%r11),3
+ jno 0f
+ TRACE_IRQS_ON
+0:
+#endif
lg %r14,__LC_VDSO_PER_CPU
- lmg %r0,%r10,__PT_R0(%r11)
mvc __LC_RETURN_PSW(16),__PT_PSW(%r11)
tm __PT_PSW+1(%r11),0x01 # returning to user ?
jno .Lio_exit_kernel
BPEXIT __TI_flags(%r12),_TIF_ISOLATE_BP
-.Lio_exit_timer:
stpt __LC_EXIT_TIMER
mvc __VDSO_ECTG_BASE(16,%r14),__LC_EXIT_TIMER
.Lio_exit_kernel:
- lmg %r11,%r15,__PT_R11(%r11)
- b __LC_RETURN_LPSWE(%r0)
+ lmg %r0,%r15,__PT_R0(%r11)
+ b __LC_RETURN_LPSWE
.Lio_done:
#
@@ -813,9 +834,6 @@ ENTRY(io_int_handler)
xc __SF_BACKCHAIN(8,%r1),__SF_BACKCHAIN(%r1)
la %r11,STACK_FRAME_OVERHEAD(%r1)
lgr %r15,%r1
- # TRACE_IRQS_ON already done at .Lio_return, call
- # TRACE_IRQS_OFF to keep things symmetrical
- TRACE_IRQS_OFF
brasl %r14,preempt_schedule_irq
j .Lio_return
#else
@@ -835,9 +853,6 @@ ENTRY(io_int_handler)
#
# One of the work bits is on. Find out which one.
#
-.Lio_work_tif:
- TSTMSK __LC_CPU_FLAGS,_CIF_MCCK_PENDING
- jo .Lio_mcck_pending
TSTMSK __TI_flags(%r12),_TIF_NEED_RESCHED
jo .Lio_reschedule
#ifdef CONFIG_LIVEPATCH
@@ -854,15 +869,6 @@ ENTRY(io_int_handler)
jo .Lio_vxrs
TSTMSK __LC_CPU_FLAGS,(_CIF_ASCE_PRIMARY|_CIF_ASCE_SECONDARY)
jnz .Lio_asce
- j .Lio_return # beware of critical section cleanup
-
-#
-# _CIF_MCCK_PENDING is set, call handler
-#
-.Lio_mcck_pending:
- # TRACE_IRQS_ON already done at .Lio_return
- brasl %r14,s390_handle_mcck # TIF bit will be cleared by handler
- TRACE_IRQS_OFF
j .Lio_return
#
@@ -895,23 +901,19 @@ ENTRY(io_int_handler)
# _TIF_GUARDED_STORAGE is set, call guarded_storage_load
#
.Lio_guarded_storage:
- # TRACE_IRQS_ON already done at .Lio_return
- ssm __LC_SVC_NEW_PSW # reenable interrupts
+ ENABLE_INTS_TRACE
lgr %r2,%r11 # pass pointer to pt_regs
brasl %r14,gs_load_bc_cb
- ssm __LC_PGM_NEW_PSW # disable I/O and ext. interrupts
- TRACE_IRQS_OFF
+ DISABLE_INTS_TRACE
j .Lio_return
#
# _TIF_NEED_RESCHED is set, call schedule
#
.Lio_reschedule:
- # TRACE_IRQS_ON already done at .Lio_return
- ssm __LC_SVC_NEW_PSW # reenable interrupts
+ ENABLE_INTS_TRACE
brasl %r14,schedule # call scheduler
- ssm __LC_PGM_NEW_PSW # disable I/O and ext. interrupts
- TRACE_IRQS_OFF
+ DISABLE_INTS_TRACE
j .Lio_return
#
@@ -928,24 +930,20 @@ ENTRY(io_int_handler)
# _TIF_SIGPENDING or is set, call do_signal
#
.Lio_sigpending:
- # TRACE_IRQS_ON already done at .Lio_return
- ssm __LC_SVC_NEW_PSW # reenable interrupts
+ ENABLE_INTS_TRACE
lgr %r2,%r11 # pass pointer to pt_regs
brasl %r14,do_signal
- ssm __LC_PGM_NEW_PSW # disable I/O and ext. interrupts
- TRACE_IRQS_OFF
+ DISABLE_INTS_TRACE
j .Lio_return
#
# _TIF_NOTIFY_RESUME or is set, call do_notify_resume
#
.Lio_notify_resume:
- # TRACE_IRQS_ON already done at .Lio_return
- ssm __LC_SVC_NEW_PSW # reenable interrupts
+ ENABLE_INTS_TRACE
lgr %r2,%r11 # pass pointer to pt_regs
brasl %r14,do_notify_resume
- ssm __LC_PGM_NEW_PSW # disable I/O and ext. interrupts
- TRACE_IRQS_OFF
+ DISABLE_INTS_TRACE
j .Lio_return
ENDPROC(io_int_handler)
@@ -958,7 +956,6 @@ ENTRY(ext_int_handler)
BPOFF
stmg %r8,%r15,__LC_SAVE_AREA_ASYNC
lg %r12,__LC_CURRENT
- larl %r13,cleanup_critical
lmg %r8,%r9,__LC_EXT_OLD_PSW
SWITCH_ASYNC __LC_SAVE_AREA_ASYNC,__LC_ASYNC_ENTER_TIMER
stmg %r0,%r7,__PT_R0(%r11)
@@ -981,7 +978,12 @@ ENTRY(ext_int_handler)
xc __PT_FLAGS(8,%r11),__PT_FLAGS(%r11)
TSTMSK __LC_CPU_FLAGS,_CIF_IGNORE_IRQ
jo .Lio_restore
+#if IS_ENABLED(CONFIG_TRACE_IRQFLAGS)
+ tmhh %r8,0x300
+ jz 1f
TRACE_IRQS_OFF
+1:
+#endif
xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
lgr %r2,%r11 # pass pointer to pt_regs
lghi %r3,EXT_INTERRUPT
@@ -990,11 +992,11 @@ ENTRY(ext_int_handler)
ENDPROC(ext_int_handler)
/*
- * Load idle PSW. The second "half" of this function is in .Lcleanup_idle.
+ * Load idle PSW.
*/
ENTRY(psw_idle)
stg %r3,__SF_EMPTY(%r15)
- larl %r1,.Lpsw_idle_lpsw+4
+ larl %r1,.Lpsw_idle_exit
stg %r1,__SF_EMPTY+8(%r15)
larl %r1,smp_cpu_mtid
llgf %r1,0(%r1)
@@ -1006,10 +1008,9 @@ ENTRY(psw_idle)
BPON
STCK __CLOCK_IDLE_ENTER(%r2)
stpt __TIMER_IDLE_ENTER(%r2)
-.Lpsw_idle_lpsw:
lpswe __SF_EMPTY(%r15)
+.Lpsw_idle_exit:
BR_EX %r14
-.Lpsw_idle_end:
ENDPROC(psw_idle)
/*
@@ -1020,6 +1021,7 @@ ENDPROC(psw_idle)
* of the register contents at return from io or a system call.
*/
ENTRY(save_fpu_regs)
+ stnsm __SF_EMPTY(%r15),0xfc
lg %r2,__LC_CURRENT
aghi %r2,__TASK_thread
TSTMSK __LC_CPU_FLAGS,_CIF_FPU
@@ -1051,6 +1053,7 @@ ENTRY(save_fpu_regs)
.Lsave_fpu_regs_done:
oi __LC_CPU_FLAGS+7,_CIF_FPU
.Lsave_fpu_regs_exit:
+ ssm __SF_EMPTY(%r15)
BR_EX %r14
.Lsave_fpu_regs_end:
ENDPROC(save_fpu_regs)
@@ -1102,8 +1105,6 @@ load_fpu_regs:
.Lload_fpu_regs_end:
ENDPROC(load_fpu_regs)
-.L__critical_end:
-
/*
* Machine check handler routines
*/
@@ -1116,7 +1117,6 @@ ENTRY(mcck_int_handler)
lam %a0,%a15,__LC_AREGS_SAVE_AREA-4095(%r1) # validate acrs
lmg %r0,%r15,__LC_GPREGS_SAVE_AREA-4095(%r1)# validate gprs
lg %r12,__LC_CURRENT
- larl %r13,cleanup_critical
lmg %r8,%r9,__LC_MCK_OLD_PSW
TSTMSK __LC_MCCK_CODE,MCCK_CODE_SYSTEM_DAMAGE
jo .Lmcck_panic # yes -> rest of mcck code invalid
@@ -1202,15 +1202,13 @@ ENTRY(mcck_int_handler)
xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
lgr %r2,%r11 # pass pointer to pt_regs
brasl %r14,s390_do_machine_check
- tm __PT_PSW+1(%r11),0x01 # returning to user ?
- jno .Lmcck_return
+ cghi %r2,0
+ je .Lmcck_return
lg %r1,__LC_KERNEL_STACK # switch to kernel stack
mvc STACK_FRAME_OVERHEAD(__PT_SIZE,%r1),0(%r11)
xc __SF_BACKCHAIN(8,%r1),__SF_BACKCHAIN(%r1)
la %r11,STACK_FRAME_OVERHEAD(%r1)
lgr %r15,%r1
- TSTMSK __LC_CPU_FLAGS,_CIF_MCCK_PENDING
- jno .Lmcck_return
TRACE_IRQS_OFF
brasl %r14,s390_handle_mcck
TRACE_IRQS_ON
@@ -1280,265 +1278,23 @@ ENTRY(stack_overflow)
ENDPROC(stack_overflow)
#endif
-ENTRY(cleanup_critical)
- cghi %r9,__LC_RETURN_LPSWE
- je .Lcleanup_lpswe
#if IS_ENABLED(CONFIG_KVM)
- clg %r9,BASED(.Lcleanup_table_sie) # .Lsie_gmap
- jl 0f
- clg %r9,BASED(.Lcleanup_table_sie+8)# .Lsie_done
- jl .Lcleanup_sie
-#endif
- clg %r9,BASED(.Lcleanup_table) # system_call
- jl 0f
- clg %r9,BASED(.Lcleanup_table+8) # .Lsysc_do_svc
- jl .Lcleanup_system_call
- clg %r9,BASED(.Lcleanup_table+16) # .Lsysc_tif
- jl 0f
- clg %r9,BASED(.Lcleanup_table+24) # .Lsysc_restore
- jl .Lcleanup_sysc_tif
- clg %r9,BASED(.Lcleanup_table+32) # .Lsysc_done
- jl .Lcleanup_sysc_restore
- clg %r9,BASED(.Lcleanup_table+40) # .Lio_tif
- jl 0f
- clg %r9,BASED(.Lcleanup_table+48) # .Lio_restore
- jl .Lcleanup_io_tif
- clg %r9,BASED(.Lcleanup_table+56) # .Lio_done
- jl .Lcleanup_io_restore
- clg %r9,BASED(.Lcleanup_table+64) # psw_idle
- jl 0f
- clg %r9,BASED(.Lcleanup_table+72) # .Lpsw_idle_end
- jl .Lcleanup_idle
- clg %r9,BASED(.Lcleanup_table+80) # save_fpu_regs
- jl 0f
- clg %r9,BASED(.Lcleanup_table+88) # .Lsave_fpu_regs_end
- jl .Lcleanup_save_fpu_regs
- clg %r9,BASED(.Lcleanup_table+96) # load_fpu_regs
- jl 0f
- clg %r9,BASED(.Lcleanup_table+104) # .Lload_fpu_regs_end
- jl .Lcleanup_load_fpu_regs
-0: BR_EX %r14,%r11
-ENDPROC(cleanup_critical)
-
- .align 8
-.Lcleanup_table:
- .quad system_call
- .quad .Lsysc_do_svc
- .quad .Lsysc_tif
- .quad .Lsysc_restore
- .quad .Lsysc_done
- .quad .Lio_tif
- .quad .Lio_restore
- .quad .Lio_done
- .quad psw_idle
- .quad .Lpsw_idle_end
- .quad save_fpu_regs
- .quad .Lsave_fpu_regs_end
- .quad load_fpu_regs
- .quad .Lload_fpu_regs_end
-
-#if IS_ENABLED(CONFIG_KVM)
-.Lcleanup_table_sie:
- .quad .Lsie_gmap
- .quad .Lsie_done
-
.Lcleanup_sie:
- cghi %r11,__LC_SAVE_AREA_ASYNC #Is this in normal interrupt?
- je 1f
- slg %r9,BASED(.Lsie_crit_mcck_start)
- clg %r9,BASED(.Lsie_crit_mcck_length)
- jh 1f
- oi __LC_CPU_FLAGS+7, _CIF_MCCK_GUEST
-1: BPENTER __SF_SIE_FLAGS(%r15),(_TIF_ISOLATE_BP|_TIF_ISOLATE_BP_GUEST)
+ cghi %r11,__LC_SAVE_AREA_ASYNC #Is this in normal interrupt?
+ je 1f
+ larl %r13,.Lsie_entry
+ slgr %r9,%r13
+ larl %r13,.Lsie_skip
+ clgr %r9,%r13
+ jh 1f
+ oi __LC_CPU_FLAGS+7, _CIF_MCCK_GUEST
+1: BPENTER __SF_SIE_FLAGS(%r15),(_TIF_ISOLATE_BP|_TIF_ISOLATE_BP_GUEST)
lg %r9,__SF_SIE_CONTROL(%r15) # get control block pointer
ni __SIE_PROG0C+3(%r9),0xfe # no longer in SIE
lctlg %c1,%c1,__LC_USER_ASCE # load primary asce
larl %r9,sie_exit # skip forward to sie_exit
BR_EX %r14,%r11
-#endif
-.Lcleanup_system_call:
- # check if stpt has been executed
- clg %r9,BASED(.Lcleanup_system_call_insn)
- jh 0f
- mvc __LC_SYNC_ENTER_TIMER(8),__LC_ASYNC_ENTER_TIMER
- cghi %r11,__LC_SAVE_AREA_ASYNC
- je 0f
- mvc __LC_SYNC_ENTER_TIMER(8),__LC_MCCK_ENTER_TIMER
-0: # check if stmg has been executed
- clg %r9,BASED(.Lcleanup_system_call_insn+8)
- jh 0f
- mvc __LC_SAVE_AREA_SYNC(64),0(%r11)
-0: # check if base register setup + TIF bit load has been done
- clg %r9,BASED(.Lcleanup_system_call_insn+16)
- jhe 0f
- # set up saved register r12 task struct pointer
- stg %r12,32(%r11)
- # set up saved register r13 __TASK_thread offset
- mvc 40(8,%r11),BASED(.Lcleanup_system_call_const)
-0: # check if the user time update has been done
- clg %r9,BASED(.Lcleanup_system_call_insn+24)
- jh 0f
- lg %r15,__LC_EXIT_TIMER
- slg %r15,__LC_SYNC_ENTER_TIMER
- alg %r15,__LC_USER_TIMER
- stg %r15,__LC_USER_TIMER
-0: # check if the system time update has been done
- clg %r9,BASED(.Lcleanup_system_call_insn+32)
- jh 0f
- lg %r15,__LC_LAST_UPDATE_TIMER
- slg %r15,__LC_EXIT_TIMER
- alg %r15,__LC_SYSTEM_TIMER
- stg %r15,__LC_SYSTEM_TIMER
-0: # update accounting time stamp
- mvc __LC_LAST_UPDATE_TIMER(8),__LC_SYNC_ENTER_TIMER
- BPENTER __TI_flags(%r12),_TIF_ISOLATE_BP
- # set up saved register r11
- lg %r15,__LC_KERNEL_STACK
- la %r9,STACK_FRAME_OVERHEAD(%r15)
- stg %r9,24(%r11) # r11 pt_regs pointer
- # fill pt_regs
- mvc __PT_R8(64,%r9),__LC_SAVE_AREA_SYNC
- stmg %r0,%r7,__PT_R0(%r9)
- mvc __PT_PSW(16,%r9),__LC_SVC_OLD_PSW
- mvc __PT_INT_CODE(4,%r9),__LC_SVC_ILC
- xc __PT_FLAGS(8,%r9),__PT_FLAGS(%r9)
- mvi __PT_FLAGS+7(%r9),_PIF_SYSCALL
- # setup saved register r15
- stg %r15,56(%r11) # r15 stack pointer
- # set new psw address and exit
- larl %r9,.Lsysc_do_svc
- BR_EX %r14,%r11
-.Lcleanup_system_call_insn:
- .quad system_call
- .quad .Lsysc_stmg
- .quad .Lsysc_per
- .quad .Lsysc_vtime+36
- .quad .Lsysc_vtime+42
-.Lcleanup_system_call_const:
- .quad __TASK_thread
-
-.Lcleanup_sysc_tif:
- larl %r9,.Lsysc_tif
- BR_EX %r14,%r11
-
-.Lcleanup_sysc_restore:
- # check if stpt has been executed
- clg %r9,BASED(.Lcleanup_sysc_restore_insn)
- jh 0f
- mvc __LC_EXIT_TIMER(8),__LC_ASYNC_ENTER_TIMER
- cghi %r11,__LC_SAVE_AREA_ASYNC
- je 0f
- mvc __LC_EXIT_TIMER(8),__LC_MCCK_ENTER_TIMER
-0: clg %r9,BASED(.Lcleanup_sysc_restore_insn+8)
- je 1f
- lg %r9,24(%r11) # get saved pointer to pt_regs
- mvc __LC_RETURN_PSW(16),__PT_PSW(%r9)
- mvc 0(64,%r11),__PT_R8(%r9)
- lmg %r0,%r7,__PT_R0(%r9)
-.Lcleanup_lpswe:
-1: lmg %r8,%r9,__LC_RETURN_PSW
- BR_EX %r14,%r11
-.Lcleanup_sysc_restore_insn:
- .quad .Lsysc_exit_timer
- .quad .Lsysc_done - 4
-
-.Lcleanup_io_tif:
- larl %r9,.Lio_tif
- BR_EX %r14,%r11
-
-.Lcleanup_io_restore:
- # check if stpt has been executed
- clg %r9,BASED(.Lcleanup_io_restore_insn)
- jh 0f
- mvc __LC_EXIT_TIMER(8),__LC_MCCK_ENTER_TIMER
-0: clg %r9,BASED(.Lcleanup_io_restore_insn+8)
- je 1f
- lg %r9,24(%r11) # get saved r11 pointer to pt_regs
- mvc __LC_RETURN_PSW(16),__PT_PSW(%r9)
- mvc 0(64,%r11),__PT_R8(%r9)
- lmg %r0,%r7,__PT_R0(%r9)
-1: lmg %r8,%r9,__LC_RETURN_PSW
- BR_EX %r14,%r11
-.Lcleanup_io_restore_insn:
- .quad .Lio_exit_timer
- .quad .Lio_done - 4
-
-.Lcleanup_idle:
- ni __LC_CPU_FLAGS+7,255-_CIF_ENABLED_WAIT
- # copy interrupt clock & cpu timer
- mvc __CLOCK_IDLE_EXIT(8,%r2),__LC_INT_CLOCK
- mvc __TIMER_IDLE_EXIT(8,%r2),__LC_ASYNC_ENTER_TIMER
- cghi %r11,__LC_SAVE_AREA_ASYNC
- je 0f
- mvc __CLOCK_IDLE_EXIT(8,%r2),__LC_MCCK_CLOCK
- mvc __TIMER_IDLE_EXIT(8,%r2),__LC_MCCK_ENTER_TIMER
-0: # check if stck & stpt have been executed
- clg %r9,BASED(.Lcleanup_idle_insn)
- jhe 1f
- mvc __CLOCK_IDLE_ENTER(8,%r2),__CLOCK_IDLE_EXIT(%r2)
- mvc __TIMER_IDLE_ENTER(8,%r2),__TIMER_IDLE_EXIT(%r2)
-1: # calculate idle cycles
- clg %r9,BASED(.Lcleanup_idle_insn)
- jl 3f
- larl %r1,smp_cpu_mtid
- llgf %r1,0(%r1)
- ltgr %r1,%r1
- jz 3f
- .insn rsy,0xeb0000000017,%r1,5,__SF_EMPTY+80(%r15)
- larl %r3,mt_cycles
- ag %r3,__LC_PERCPU_OFFSET
- la %r4,__SF_EMPTY+16(%r15)
-2: lg %r0,0(%r3)
- slg %r0,0(%r4)
- alg %r0,64(%r4)
- stg %r0,0(%r3)
- la %r3,8(%r3)
- la %r4,8(%r4)
- brct %r1,2b
-3: # account system time going idle
- lg %r9,__LC_STEAL_TIMER
- alg %r9,__CLOCK_IDLE_ENTER(%r2)
- slg %r9,__LC_LAST_UPDATE_CLOCK
- stg %r9,__LC_STEAL_TIMER
- mvc __LC_LAST_UPDATE_CLOCK(8),__CLOCK_IDLE_EXIT(%r2)
- lg %r9,__LC_SYSTEM_TIMER
- alg %r9,__LC_LAST_UPDATE_TIMER
- slg %r9,__TIMER_IDLE_ENTER(%r2)
- stg %r9,__LC_SYSTEM_TIMER
- mvc __LC_LAST_UPDATE_TIMER(8),__TIMER_IDLE_EXIT(%r2)
- # prepare return psw
- nihh %r8,0xfcfd # clear irq & wait state bits
- lg %r9,48(%r11) # return from psw_idle
- BR_EX %r14,%r11
-.Lcleanup_idle_insn:
- .quad .Lpsw_idle_lpsw
-
-.Lcleanup_save_fpu_regs:
- larl %r9,save_fpu_regs
- BR_EX %r14,%r11
-
-.Lcleanup_load_fpu_regs:
- larl %r9,load_fpu_regs
- BR_EX %r14,%r11
-
-/*
- * Integer constants
- */
- .align 8
-.Lcritical_start:
- .quad .L__critical_start
-.Lcritical_length:
- .quad .L__critical_end - .L__critical_start
-#if IS_ENABLED(CONFIG_KVM)
-.Lsie_critical_start:
- .quad .Lsie_gmap
-.Lsie_critical_length:
- .quad .Lsie_done - .Lsie_gmap
-.Lsie_crit_mcck_start:
- .quad .Lsie_entry
-.Lsie_crit_mcck_length:
- .quad .Lsie_skip - .Lsie_entry
#endif
.section .rodata, "a"
#define SYSCALL(esame,emu) .quad __s390x_ ## esame
diff --git a/arch/s390/kernel/entry.h b/arch/s390/kernel/entry.h
index faca269d5f27..a44ddc2f2dec 100644
--- a/arch/s390/kernel/entry.h
+++ b/arch/s390/kernel/entry.h
@@ -26,6 +26,7 @@ void do_protection_exception(struct pt_regs *regs);
void do_dat_exception(struct pt_regs *regs);
void do_secure_storage_access(struct pt_regs *regs);
void do_non_secure_storage_access(struct pt_regs *regs);
+void do_secure_storage_violation(struct pt_regs *regs);
void addressing_exception(struct pt_regs *regs);
void data_exception(struct pt_regs *regs);
diff --git a/arch/s390/kernel/ftrace.c b/arch/s390/kernel/ftrace.c
index 4cd9b1ada834..b388e87a08bf 100644
--- a/arch/s390/kernel/ftrace.c
+++ b/arch/s390/kernel/ftrace.c
@@ -72,22 +72,6 @@ static inline void ftrace_generate_orig_insn(struct ftrace_insn *insn)
#endif
}
-static inline void ftrace_generate_kprobe_nop_insn(struct ftrace_insn *insn)
-{
-#ifdef CONFIG_KPROBES
- insn->opc = BREAKPOINT_INSTRUCTION;
- insn->disp = KPROBE_ON_FTRACE_NOP;
-#endif
-}
-
-static inline void ftrace_generate_kprobe_call_insn(struct ftrace_insn *insn)
-{
-#ifdef CONFIG_KPROBES
- insn->opc = BREAKPOINT_INSTRUCTION;
- insn->disp = KPROBE_ON_FTRACE_CALL;
-#endif
-}
-
int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr,
unsigned long addr)
{
@@ -99,7 +83,7 @@ int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec,
{
struct ftrace_insn orig, new, old;
- if (probe_kernel_read(&old, (void *) rec->ip, sizeof(old)))
+ if (copy_from_kernel_nofault(&old, (void *) rec->ip, sizeof(old)))
return -EFAULT;
if (addr == MCOUNT_ADDR) {
/* Initial code replacement */
@@ -121,7 +105,7 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
{
struct ftrace_insn orig, new, old;
- if (probe_kernel_read(&old, (void *) rec->ip, sizeof(old)))
+ if (copy_from_kernel_nofault(&old, (void *) rec->ip, sizeof(old)))
return -EFAULT;
/* Replace nop with an ftrace call. */
ftrace_generate_nop_insn(&orig);
diff --git a/arch/s390/kernel/idle.c b/arch/s390/kernel/idle.c
index 8f8456816d83..f7f1e64e0d98 100644
--- a/arch/s390/kernel/idle.c
+++ b/arch/s390/kernel/idle.c
@@ -14,6 +14,7 @@
#include <linux/init.h>
#include <linux/cpu.h>
#include <linux/sched/cputime.h>
+#include <trace/events/power.h>
#include <asm/nmi.h>
#include <asm/smp.h>
#include "entry.h"
@@ -24,28 +25,27 @@ void enabled_wait(void)
{
struct s390_idle_data *idle = this_cpu_ptr(&s390_idle);
unsigned long long idle_time;
- unsigned long psw_mask;
+ unsigned long psw_mask, flags;
- trace_hardirqs_on();
/* Wait for external, I/O or machine check interrupt. */
psw_mask = PSW_KERNEL_BITS | PSW_MASK_WAIT | PSW_MASK_DAT |
PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK;
clear_cpu_flag(CIF_NOHZ_DELAY);
+ local_irq_save(flags);
/* Call the assembler magic in entry.S */
psw_idle(idle, psw_mask);
-
- trace_hardirqs_off();
+ local_irq_restore(flags);
/* Account time spent with enabled wait psw loaded as idle time. */
- write_seqcount_begin(&idle->seqcount);
+ raw_write_seqcount_begin(&idle->seqcount);
idle_time = idle->clock_idle_exit - idle->clock_idle_enter;
idle->clock_idle_enter = idle->clock_idle_exit = 0ULL;
idle->idle_time += idle_time;
idle->idle_count++;
account_idle_time(cputime_to_nsecs(idle_time));
- write_seqcount_end(&idle->seqcount);
+ raw_write_seqcount_end(&idle->seqcount);
}
NOKPROBE_SYMBOL(enabled_wait);
@@ -118,22 +118,16 @@ u64 arch_cpu_idle_time(int cpu)
void arch_cpu_idle_enter(void)
{
- local_mcck_disable();
}
void arch_cpu_idle(void)
{
- if (!test_cpu_flag(CIF_MCCK_PENDING))
- /* Halt the cpu and keep track of cpu time accounting. */
- enabled_wait();
+ enabled_wait();
local_irq_enable();
}
void arch_cpu_idle_exit(void)
{
- local_mcck_enable();
- if (test_cpu_flag(CIF_MCCK_PENDING))
- s390_handle_mcck();
}
void arch_cpu_idle_dead(void)
diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c
index 4a71061974fd..90a2a17239b0 100644
--- a/arch/s390/kernel/ipl.c
+++ b/arch/s390/kernel/ipl.c
@@ -39,6 +39,7 @@
#define IPL_CCW_STR "ccw"
#define IPL_FCP_STR "fcp"
#define IPL_FCP_DUMP_STR "fcp_dump"
+#define IPL_NVME_STR "nvme"
#define IPL_NSS_STR "nss"
#define DUMP_CCW_STR "ccw"
@@ -93,6 +94,8 @@ static char *ipl_type_str(enum ipl_type type)
return IPL_FCP_DUMP_STR;
case IPL_TYPE_NSS:
return IPL_NSS_STR;
+ case IPL_TYPE_NVME:
+ return IPL_NVME_STR;
case IPL_TYPE_UNKNOWN:
default:
return IPL_UNKNOWN_STR;
@@ -133,6 +136,7 @@ static int reipl_capabilities = IPL_TYPE_UNKNOWN;
static enum ipl_type reipl_type = IPL_TYPE_UNKNOWN;
static struct ipl_parameter_block *reipl_block_fcp;
+static struct ipl_parameter_block *reipl_block_nvme;
static struct ipl_parameter_block *reipl_block_ccw;
static struct ipl_parameter_block *reipl_block_nss;
static struct ipl_parameter_block *reipl_block_actual;
@@ -177,7 +181,7 @@ static ssize_t sys_##_prefix##_##_name##_show(struct kobject *kobj, \
struct kobj_attribute *attr, \
char *page) \
{ \
- return snprintf(page, PAGE_SIZE, _format, ##args); \
+ return scnprintf(page, PAGE_SIZE, _format, ##args); \
}
#define IPL_ATTR_CCW_STORE_FN(_prefix, _name, _ipl_blk) \
@@ -261,6 +265,8 @@ static __init enum ipl_type get_ipl_type(void)
return IPL_TYPE_FCP_DUMP;
else
return IPL_TYPE_FCP;
+ case IPL_PBT_NVME:
+ return IPL_TYPE_NVME;
}
return IPL_TYPE_UNKNOWN;
}
@@ -317,6 +323,8 @@ static ssize_t sys_ipl_device_show(struct kobject *kobj,
case IPL_TYPE_FCP:
case IPL_TYPE_FCP_DUMP:
return sprintf(page, "0.0.%04x\n", ipl_block.fcp.devno);
+ case IPL_TYPE_NVME:
+ return sprintf(page, "%08ux\n", ipl_block.nvme.fid);
default:
return 0;
}
@@ -345,15 +353,35 @@ static ssize_t ipl_scp_data_read(struct file *filp, struct kobject *kobj,
return memory_read_from_buffer(buf, count, &off, scp_data, size);
}
+
+static ssize_t ipl_nvme_scp_data_read(struct file *filp, struct kobject *kobj,
+ struct bin_attribute *attr, char *buf,
+ loff_t off, size_t count)
+{
+ unsigned int size = ipl_block.nvme.scp_data_len;
+ void *scp_data = &ipl_block.nvme.scp_data;
+
+ return memory_read_from_buffer(buf, count, &off, scp_data, size);
+}
+
static struct bin_attribute ipl_scp_data_attr =
__BIN_ATTR(scp_data, S_IRUGO, ipl_scp_data_read, NULL, PAGE_SIZE);
+static struct bin_attribute ipl_nvme_scp_data_attr =
+ __BIN_ATTR(scp_data, S_IRUGO, ipl_nvme_scp_data_read, NULL, PAGE_SIZE);
+
static struct bin_attribute *ipl_fcp_bin_attrs[] = {
&ipl_parameter_attr,
&ipl_scp_data_attr,
NULL,
};
+static struct bin_attribute *ipl_nvme_bin_attrs[] = {
+ &ipl_parameter_attr,
+ &ipl_nvme_scp_data_attr,
+ NULL,
+};
+
/* FCP ipl device attributes */
DEFINE_IPL_ATTR_RO(ipl_fcp, wwpn, "0x%016llx\n",
@@ -365,6 +393,16 @@ DEFINE_IPL_ATTR_RO(ipl_fcp, bootprog, "%lld\n",
DEFINE_IPL_ATTR_RO(ipl_fcp, br_lba, "%lld\n",
(unsigned long long)ipl_block.fcp.br_lba);
+/* NVMe ipl device attributes */
+DEFINE_IPL_ATTR_RO(ipl_nvme, fid, "0x%08llx\n",
+ (unsigned long long)ipl_block.nvme.fid);
+DEFINE_IPL_ATTR_RO(ipl_nvme, nsid, "0x%08llx\n",
+ (unsigned long long)ipl_block.nvme.nsid);
+DEFINE_IPL_ATTR_RO(ipl_nvme, bootprog, "%lld\n",
+ (unsigned long long)ipl_block.nvme.bootprog);
+DEFINE_IPL_ATTR_RO(ipl_nvme, br_lba, "%lld\n",
+ (unsigned long long)ipl_block.nvme.br_lba);
+
static ssize_t ipl_ccw_loadparm_show(struct kobject *kobj,
struct kobj_attribute *attr, char *page)
{
@@ -399,6 +437,24 @@ static struct attribute_group ipl_fcp_attr_group = {
.bin_attrs = ipl_fcp_bin_attrs,
};
+static struct attribute *ipl_nvme_attrs[] = {
+ &sys_ipl_type_attr.attr,
+ &sys_ipl_nvme_fid_attr.attr,
+ &sys_ipl_nvme_nsid_attr.attr,
+ &sys_ipl_nvme_bootprog_attr.attr,
+ &sys_ipl_nvme_br_lba_attr.attr,
+ &sys_ipl_ccw_loadparm_attr.attr,
+ &sys_ipl_secure_attr.attr,
+ &sys_ipl_has_secure_attr.attr,
+ NULL,
+};
+
+static struct attribute_group ipl_nvme_attr_group = {
+ .attrs = ipl_nvme_attrs,
+ .bin_attrs = ipl_nvme_bin_attrs,
+};
+
+
/* CCW ipl device attributes */
static struct attribute *ipl_ccw_attrs_vm[] = {
@@ -474,6 +530,9 @@ static int __init ipl_init(void)
case IPL_TYPE_FCP_DUMP:
rc = sysfs_create_group(&ipl_kset->kobj, &ipl_fcp_attr_group);
break;
+ case IPL_TYPE_NVME:
+ rc = sysfs_create_group(&ipl_kset->kobj, &ipl_nvme_attr_group);
+ break;
default:
rc = sysfs_create_group(&ipl_kset->kobj,
&ipl_unknown_attr_group);
@@ -727,6 +786,93 @@ static struct attribute_group reipl_fcp_attr_group = {
static struct kobj_attribute sys_reipl_fcp_clear_attr =
__ATTR(clear, 0644, reipl_fcp_clear_show, reipl_fcp_clear_store);
+/* NVME reipl device attributes */
+
+static ssize_t reipl_nvme_scpdata_read(struct file *filp, struct kobject *kobj,
+ struct bin_attribute *attr,
+ char *buf, loff_t off, size_t count)
+{
+ size_t size = reipl_block_nvme->nvme.scp_data_len;
+ void *scp_data = reipl_block_nvme->nvme.scp_data;
+
+ return memory_read_from_buffer(buf, count, &off, scp_data, size);
+}
+
+static ssize_t reipl_nvme_scpdata_write(struct file *filp, struct kobject *kobj,
+ struct bin_attribute *attr,
+ char *buf, loff_t off, size_t count)
+{
+ size_t scpdata_len = count;
+ size_t padding;
+
+ if (off)
+ return -EINVAL;
+
+ memcpy(reipl_block_nvme->nvme.scp_data, buf, count);
+ if (scpdata_len % 8) {
+ padding = 8 - (scpdata_len % 8);
+ memset(reipl_block_nvme->nvme.scp_data + scpdata_len,
+ 0, padding);
+ scpdata_len += padding;
+ }
+
+ reipl_block_nvme->hdr.len = IPL_BP_FCP_LEN + scpdata_len;
+ reipl_block_nvme->nvme.len = IPL_BP0_FCP_LEN + scpdata_len;
+ reipl_block_nvme->nvme.scp_data_len = scpdata_len;
+
+ return count;
+}
+
+static struct bin_attribute sys_reipl_nvme_scp_data_attr =
+ __BIN_ATTR(scp_data, (S_IRUGO | S_IWUSR), reipl_nvme_scpdata_read,
+ reipl_nvme_scpdata_write, DIAG308_SCPDATA_SIZE);
+
+static struct bin_attribute *reipl_nvme_bin_attrs[] = {
+ &sys_reipl_nvme_scp_data_attr,
+ NULL,
+};
+
+DEFINE_IPL_ATTR_RW(reipl_nvme, fid, "0x%08llx\n", "%llx\n",
+ reipl_block_nvme->nvme.fid);
+DEFINE_IPL_ATTR_RW(reipl_nvme, nsid, "0x%08llx\n", "%llx\n",
+ reipl_block_nvme->nvme.nsid);
+DEFINE_IPL_ATTR_RW(reipl_nvme, bootprog, "%lld\n", "%lld\n",
+ reipl_block_nvme->nvme.bootprog);
+DEFINE_IPL_ATTR_RW(reipl_nvme, br_lba, "%lld\n", "%lld\n",
+ reipl_block_nvme->nvme.br_lba);
+
+/* nvme wrapper */
+static ssize_t reipl_nvme_loadparm_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *page)
+{
+ return reipl_generic_loadparm_show(reipl_block_nvme, page);
+}
+
+static ssize_t reipl_nvme_loadparm_store(struct kobject *kobj,
+ struct kobj_attribute *attr,
+ const char *buf, size_t len)
+{
+ return reipl_generic_loadparm_store(reipl_block_nvme, buf, len);
+}
+
+static struct kobj_attribute sys_reipl_nvme_loadparm_attr =
+ __ATTR(loadparm, S_IRUGO | S_IWUSR, reipl_nvme_loadparm_show,
+ reipl_nvme_loadparm_store);
+
+static struct attribute *reipl_nvme_attrs[] = {
+ &sys_reipl_nvme_fid_attr.attr,
+ &sys_reipl_nvme_nsid_attr.attr,
+ &sys_reipl_nvme_bootprog_attr.attr,
+ &sys_reipl_nvme_br_lba_attr.attr,
+ &sys_reipl_nvme_loadparm_attr.attr,
+ NULL,
+};
+
+static struct attribute_group reipl_nvme_attr_group = {
+ .attrs = reipl_nvme_attrs,
+ .bin_attrs = reipl_nvme_bin_attrs
+};
+
/* CCW reipl device attributes */
DEFINE_IPL_CCW_ATTR_RW(reipl_ccw, device, reipl_block_ccw->ccw);
@@ -891,6 +1037,9 @@ static int reipl_set_type(enum ipl_type type)
case IPL_TYPE_FCP:
reipl_block_actual = reipl_block_fcp;
break;
+ case IPL_TYPE_NVME:
+ reipl_block_actual = reipl_block_nvme;
+ break;
case IPL_TYPE_NSS:
reipl_block_actual = reipl_block_nss;
break;
@@ -917,6 +1066,8 @@ static ssize_t reipl_type_store(struct kobject *kobj,
rc = reipl_set_type(IPL_TYPE_CCW);
else if (strncmp(buf, IPL_FCP_STR, strlen(IPL_FCP_STR)) == 0)
rc = reipl_set_type(IPL_TYPE_FCP);
+ else if (strncmp(buf, IPL_NVME_STR, strlen(IPL_NVME_STR)) == 0)
+ rc = reipl_set_type(IPL_TYPE_NVME);
else if (strncmp(buf, IPL_NSS_STR, strlen(IPL_NSS_STR)) == 0)
rc = reipl_set_type(IPL_TYPE_NSS);
return (rc != 0) ? rc : len;
@@ -927,6 +1078,7 @@ static struct kobj_attribute reipl_type_attr =
static struct kset *reipl_kset;
static struct kset *reipl_fcp_kset;
+static struct kset *reipl_nvme_kset;
static void __reipl_run(void *unused)
{
@@ -945,6 +1097,10 @@ static void __reipl_run(void *unused)
else
diag308(DIAG308_LOAD_NORMAL, NULL);
break;
+ case IPL_TYPE_NVME:
+ diag308(DIAG308_SET, reipl_block_nvme);
+ diag308(DIAG308_LOAD_CLEAR, NULL);
+ break;
case IPL_TYPE_NSS:
diag308(DIAG308_SET, reipl_block_nss);
diag308(DIAG308_LOAD_CLEAR, NULL);
@@ -1093,6 +1249,49 @@ out1:
return rc;
}
+static int __init reipl_nvme_init(void)
+{
+ int rc;
+
+ reipl_block_nvme = (void *) get_zeroed_page(GFP_KERNEL);
+ if (!reipl_block_nvme)
+ return -ENOMEM;
+
+ /* sysfs: create kset for mixing attr group and bin attrs */
+ reipl_nvme_kset = kset_create_and_add(IPL_NVME_STR, NULL,
+ &reipl_kset->kobj);
+ if (!reipl_nvme_kset) {
+ free_page((unsigned long) reipl_block_nvme);
+ return -ENOMEM;
+ }
+
+ rc = sysfs_create_group(&reipl_nvme_kset->kobj, &reipl_nvme_attr_group);
+ if (rc) {
+ kset_unregister(reipl_nvme_kset);
+ free_page((unsigned long) reipl_block_nvme);
+ return rc;
+ }
+
+ if (ipl_info.type == IPL_TYPE_NVME) {
+ memcpy(reipl_block_nvme, &ipl_block, sizeof(ipl_block));
+ /*
+ * Fix loadparm: There are systems where the (SCSI) LOADPARM
+ * is invalid in the IPL parameter block, so take it
+ * always from sclp_ipl_info.
+ */
+ memcpy(reipl_block_nvme->nvme.loadparm, sclp_ipl_info.loadparm,
+ LOADPARM_LEN);
+ } else {
+ reipl_block_nvme->hdr.len = IPL_BP_NVME_LEN;
+ reipl_block_nvme->hdr.version = IPL_PARM_BLOCK_VERSION;
+ reipl_block_nvme->nvme.len = IPL_BP0_NVME_LEN;
+ reipl_block_nvme->nvme.pbt = IPL_PBT_NVME;
+ reipl_block_nvme->nvme.opt = IPL_PB0_NVME_OPT_IPL;
+ }
+ reipl_capabilities |= IPL_TYPE_NVME;
+ return 0;
+}
+
static int __init reipl_type_init(void)
{
enum ipl_type reipl_type = ipl_info.type;
@@ -1108,6 +1307,9 @@ static int __init reipl_type_init(void)
if (reipl_block->pb0_hdr.pbt == IPL_PBT_FCP) {
memcpy(reipl_block_fcp, reipl_block, size);
reipl_type = IPL_TYPE_FCP;
+ } else if (reipl_block->pb0_hdr.pbt == IPL_PBT_NVME) {
+ memcpy(reipl_block_nvme, reipl_block, size);
+ reipl_type = IPL_TYPE_NVME;
} else if (reipl_block->pb0_hdr.pbt == IPL_PBT_CCW) {
memcpy(reipl_block_ccw, reipl_block, size);
reipl_type = IPL_TYPE_CCW;
@@ -1134,6 +1336,9 @@ static int __init reipl_init(void)
rc = reipl_fcp_init();
if (rc)
return rc;
+ rc = reipl_nvme_init();
+ if (rc)
+ return rc;
rc = reipl_nss_init();
if (rc)
return rc;
@@ -1750,6 +1955,10 @@ void __init setup_ipl(void)
ipl_info.data.fcp.wwpn = ipl_block.fcp.wwpn;
ipl_info.data.fcp.lun = ipl_block.fcp.lun;
break;
+ case IPL_TYPE_NVME:
+ ipl_info.data.nvme.fid = ipl_block.nvme.fid;
+ ipl_info.data.nvme.nsid = ipl_block.nvme.nsid;
+ break;
case IPL_TYPE_NSS:
case IPL_TYPE_UNKNOWN:
/* We have no info to copy */
diff --git a/arch/s390/kernel/kprobes.c b/arch/s390/kernel/kprobes.c
index 548d0ea9808d..d2a71d872638 100644
--- a/arch/s390/kernel/kprobes.c
+++ b/arch/s390/kernel/kprobes.c
@@ -523,10 +523,8 @@ static int kprobe_trap_handler(struct pt_regs *regs, int trapnr)
* zero, try to fix up.
*/
entry = s390_search_extables(regs->psw.addr);
- if (entry) {
- regs->psw.addr = extable_fixup(entry);
+ if (entry && ex_handle(entry, regs))
return 1;
- }
/*
* fixup_exception() could not handle it,
diff --git a/arch/s390/kernel/lgr.c b/arch/s390/kernel/lgr.c
index 452502f9a0d9..3b895971c3d0 100644
--- a/arch/s390/kernel/lgr.c
+++ b/arch/s390/kernel/lgr.c
@@ -167,7 +167,7 @@ static struct timer_list lgr_timer;
*/
static void lgr_timer_set(void)
{
- mod_timer(&lgr_timer, jiffies + LGR_TIMER_INTERVAL_SECS * HZ);
+ mod_timer(&lgr_timer, jiffies + msecs_to_jiffies(LGR_TIMER_INTERVAL_SECS * MSEC_PER_SEC));
}
/*
diff --git a/arch/s390/kernel/machine_kexec.c b/arch/s390/kernel/machine_kexec.c
index 3a854cb5a4c6..d91989c7bd6a 100644
--- a/arch/s390/kernel/machine_kexec.c
+++ b/arch/s390/kernel/machine_kexec.c
@@ -16,8 +16,6 @@
#include <linux/debug_locks.h>
#include <asm/cio.h>
#include <asm/setup.h>
-#include <asm/pgtable.h>
-#include <asm/pgalloc.h>
#include <asm/smp.h>
#include <asm/ipl.h>
#include <asm/diag.h>
diff --git a/arch/s390/kernel/module.c b/arch/s390/kernel/module.c
index ba8f19bb438b..4055f1c49814 100644
--- a/arch/s390/kernel/module.c
+++ b/arch/s390/kernel/module.c
@@ -19,6 +19,7 @@
#include <linux/kasan.h>
#include <linux/moduleloader.h>
#include <linux/bug.h>
+#include <linux/memory.h>
#include <asm/alternative.h>
#include <asm/nospec-branch.h>
#include <asm/facility.h>
@@ -174,10 +175,12 @@ int module_frob_arch_sections(Elf_Ehdr *hdr, Elf_Shdr *sechdrs,
}
static int apply_rela_bits(Elf_Addr loc, Elf_Addr val,
- int sign, int bits, int shift)
+ int sign, int bits, int shift,
+ void *(*write)(void *dest, const void *src, size_t len))
{
unsigned long umax;
long min, max;
+ void *dest = (void *)loc;
if (val & ((1UL << shift) - 1))
return -ENOEXEC;
@@ -194,26 +197,33 @@ static int apply_rela_bits(Elf_Addr loc, Elf_Addr val,
return -ENOEXEC;
}
- if (bits == 8)
- *(unsigned char *) loc = val;
- else if (bits == 12)
- *(unsigned short *) loc = (val & 0xfff) |
+ if (bits == 8) {
+ unsigned char tmp = val;
+ write(dest, &tmp, 1);
+ } else if (bits == 12) {
+ unsigned short tmp = (val & 0xfff) |
(*(unsigned short *) loc & 0xf000);
- else if (bits == 16)
- *(unsigned short *) loc = val;
- else if (bits == 20)
- *(unsigned int *) loc = (val & 0xfff) << 16 |
- (val & 0xff000) >> 4 |
- (*(unsigned int *) loc & 0xf00000ff);
- else if (bits == 32)
- *(unsigned int *) loc = val;
- else if (bits == 64)
- *(unsigned long *) loc = val;
+ write(dest, &tmp, 2);
+ } else if (bits == 16) {
+ unsigned short tmp = val;
+ write(dest, &tmp, 2);
+ } else if (bits == 20) {
+ unsigned int tmp = (val & 0xfff) << 16 |
+ (val & 0xff000) >> 4 | (*(unsigned int *) loc & 0xf00000ff);
+ write(dest, &tmp, 4);
+ } else if (bits == 32) {
+ unsigned int tmp = val;
+ write(dest, &tmp, 4);
+ } else if (bits == 64) {
+ unsigned long tmp = val;
+ write(dest, &tmp, 8);
+ }
return 0;
}
static int apply_rela(Elf_Rela *rela, Elf_Addr base, Elf_Sym *symtab,
- const char *strtab, struct module *me)
+ const char *strtab, struct module *me,
+ void *(*write)(void *dest, const void *src, size_t len))
{
struct mod_arch_syminfo *info;
Elf_Addr loc, val;
@@ -241,17 +251,17 @@ static int apply_rela(Elf_Rela *rela, Elf_Addr base, Elf_Sym *symtab,
case R_390_64: /* Direct 64 bit. */
val += rela->r_addend;
if (r_type == R_390_8)
- rc = apply_rela_bits(loc, val, 0, 8, 0);
+ rc = apply_rela_bits(loc, val, 0, 8, 0, write);
else if (r_type == R_390_12)
- rc = apply_rela_bits(loc, val, 0, 12, 0);
+ rc = apply_rela_bits(loc, val, 0, 12, 0, write);
else if (r_type == R_390_16)
- rc = apply_rela_bits(loc, val, 0, 16, 0);
+ rc = apply_rela_bits(loc, val, 0, 16, 0, write);
else if (r_type == R_390_20)
- rc = apply_rela_bits(loc, val, 1, 20, 0);
+ rc = apply_rela_bits(loc, val, 1, 20, 0, write);
else if (r_type == R_390_32)
- rc = apply_rela_bits(loc, val, 0, 32, 0);
+ rc = apply_rela_bits(loc, val, 0, 32, 0, write);
else if (r_type == R_390_64)
- rc = apply_rela_bits(loc, val, 0, 64, 0);
+ rc = apply_rela_bits(loc, val, 0, 64, 0, write);
break;
case R_390_PC16: /* PC relative 16 bit. */
case R_390_PC16DBL: /* PC relative 16 bit shifted by 1. */
@@ -260,15 +270,15 @@ static int apply_rela(Elf_Rela *rela, Elf_Addr base, Elf_Sym *symtab,
case R_390_PC64: /* PC relative 64 bit. */
val += rela->r_addend - loc;
if (r_type == R_390_PC16)
- rc = apply_rela_bits(loc, val, 1, 16, 0);
+ rc = apply_rela_bits(loc, val, 1, 16, 0, write);
else if (r_type == R_390_PC16DBL)
- rc = apply_rela_bits(loc, val, 1, 16, 1);
+ rc = apply_rela_bits(loc, val, 1, 16, 1, write);
else if (r_type == R_390_PC32DBL)
- rc = apply_rela_bits(loc, val, 1, 32, 1);
+ rc = apply_rela_bits(loc, val, 1, 32, 1, write);
else if (r_type == R_390_PC32)
- rc = apply_rela_bits(loc, val, 1, 32, 0);
+ rc = apply_rela_bits(loc, val, 1, 32, 0, write);
else if (r_type == R_390_PC64)
- rc = apply_rela_bits(loc, val, 1, 64, 0);
+ rc = apply_rela_bits(loc, val, 1, 64, 0, write);
break;
case R_390_GOT12: /* 12 bit GOT offset. */
case R_390_GOT16: /* 16 bit GOT offset. */
@@ -283,33 +293,33 @@ static int apply_rela(Elf_Rela *rela, Elf_Addr base, Elf_Sym *symtab,
case R_390_GOTPLT64: /* 64 bit offset to jump slot. */
case R_390_GOTPLTENT: /* 32 bit rel. offset to jump slot >> 1. */
if (info->got_initialized == 0) {
- Elf_Addr *gotent;
+ Elf_Addr *gotent = me->core_layout.base +
+ me->arch.got_offset +
+ info->got_offset;
- gotent = me->core_layout.base + me->arch.got_offset +
- info->got_offset;
- *gotent = val;
+ write(gotent, &val, sizeof(*gotent));
info->got_initialized = 1;
}
val = info->got_offset + rela->r_addend;
if (r_type == R_390_GOT12 ||
r_type == R_390_GOTPLT12)
- rc = apply_rela_bits(loc, val, 0, 12, 0);
+ rc = apply_rela_bits(loc, val, 0, 12, 0, write);
else if (r_type == R_390_GOT16 ||
r_type == R_390_GOTPLT16)
- rc = apply_rela_bits(loc, val, 0, 16, 0);
+ rc = apply_rela_bits(loc, val, 0, 16, 0, write);
else if (r_type == R_390_GOT20 ||
r_type == R_390_GOTPLT20)
- rc = apply_rela_bits(loc, val, 1, 20, 0);
+ rc = apply_rela_bits(loc, val, 1, 20, 0, write);
else if (r_type == R_390_GOT32 ||
r_type == R_390_GOTPLT32)
- rc = apply_rela_bits(loc, val, 0, 32, 0);
+ rc = apply_rela_bits(loc, val, 0, 32, 0, write);
else if (r_type == R_390_GOT64 ||
r_type == R_390_GOTPLT64)
- rc = apply_rela_bits(loc, val, 0, 64, 0);
+ rc = apply_rela_bits(loc, val, 0, 64, 0, write);
else if (r_type == R_390_GOTENT ||
r_type == R_390_GOTPLTENT) {
val += (Elf_Addr) me->core_layout.base - loc;
- rc = apply_rela_bits(loc, val, 1, 32, 1);
+ rc = apply_rela_bits(loc, val, 1, 32, 1, write);
}
break;
case R_390_PLT16DBL: /* 16 bit PC rel. PLT shifted by 1. */
@@ -320,25 +330,29 @@ static int apply_rela(Elf_Rela *rela, Elf_Addr base, Elf_Sym *symtab,
case R_390_PLTOFF32: /* 32 bit offset from GOT to PLT. */
case R_390_PLTOFF64: /* 16 bit offset from GOT to PLT. */
if (info->plt_initialized == 0) {
- unsigned int *ip;
- ip = me->core_layout.base + me->arch.plt_offset +
- info->plt_offset;
- ip[0] = 0x0d10e310; /* basr 1,0 */
- ip[1] = 0x100a0004; /* lg 1,10(1) */
+ unsigned int insn[5];
+ unsigned int *ip = me->core_layout.base +
+ me->arch.plt_offset +
+ info->plt_offset;
+
+ insn[0] = 0x0d10e310; /* basr 1,0 */
+ insn[1] = 0x100a0004; /* lg 1,10(1) */
if (IS_ENABLED(CONFIG_EXPOLINE) && !nospec_disable) {
unsigned int *ij;
ij = me->core_layout.base +
me->arch.plt_offset +
me->arch.plt_size - PLT_ENTRY_SIZE;
- ip[2] = 0xa7f40000 + /* j __jump_r1 */
+ insn[2] = 0xa7f40000 + /* j __jump_r1 */
(unsigned int)(u16)
(((unsigned long) ij - 8 -
(unsigned long) ip) / 2);
} else {
- ip[2] = 0x07f10000; /* br %r1 */
+ insn[2] = 0x07f10000; /* br %r1 */
}
- ip[3] = (unsigned int) (val >> 32);
- ip[4] = (unsigned int) val;
+ insn[3] = (unsigned int) (val >> 32);
+ insn[4] = (unsigned int) val;
+
+ write(ip, insn, sizeof(insn));
info->plt_initialized = 1;
}
if (r_type == R_390_PLTOFF16 ||
@@ -357,17 +371,17 @@ static int apply_rela(Elf_Rela *rela, Elf_Addr base, Elf_Sym *symtab,
val += rela->r_addend - loc;
}
if (r_type == R_390_PLT16DBL)
- rc = apply_rela_bits(loc, val, 1, 16, 1);
+ rc = apply_rela_bits(loc, val, 1, 16, 1, write);
else if (r_type == R_390_PLTOFF16)
- rc = apply_rela_bits(loc, val, 0, 16, 0);
+ rc = apply_rela_bits(loc, val, 0, 16, 0, write);
else if (r_type == R_390_PLT32DBL)
- rc = apply_rela_bits(loc, val, 1, 32, 1);
+ rc = apply_rela_bits(loc, val, 1, 32, 1, write);
else if (r_type == R_390_PLT32 ||
r_type == R_390_PLTOFF32)
- rc = apply_rela_bits(loc, val, 0, 32, 0);
+ rc = apply_rela_bits(loc, val, 0, 32, 0, write);
else if (r_type == R_390_PLT64 ||
r_type == R_390_PLTOFF64)
- rc = apply_rela_bits(loc, val, 0, 64, 0);
+ rc = apply_rela_bits(loc, val, 0, 64, 0, write);
break;
case R_390_GOTOFF16: /* 16 bit offset to GOT. */
case R_390_GOTOFF32: /* 32 bit offset to GOT. */
@@ -375,20 +389,20 @@ static int apply_rela(Elf_Rela *rela, Elf_Addr base, Elf_Sym *symtab,
val = val + rela->r_addend -
((Elf_Addr) me->core_layout.base + me->arch.got_offset);
if (r_type == R_390_GOTOFF16)
- rc = apply_rela_bits(loc, val, 0, 16, 0);
+ rc = apply_rela_bits(loc, val, 0, 16, 0, write);
else if (r_type == R_390_GOTOFF32)
- rc = apply_rela_bits(loc, val, 0, 32, 0);
+ rc = apply_rela_bits(loc, val, 0, 32, 0, write);
else if (r_type == R_390_GOTOFF64)
- rc = apply_rela_bits(loc, val, 0, 64, 0);
+ rc = apply_rela_bits(loc, val, 0, 64, 0, write);
break;
case R_390_GOTPC: /* 32 bit PC relative offset to GOT. */
case R_390_GOTPCDBL: /* 32 bit PC rel. off. to GOT shifted by 1. */
val = (Elf_Addr) me->core_layout.base + me->arch.got_offset +
rela->r_addend - loc;
if (r_type == R_390_GOTPC)
- rc = apply_rela_bits(loc, val, 1, 32, 0);
+ rc = apply_rela_bits(loc, val, 1, 32, 0, write);
else if (r_type == R_390_GOTPCDBL)
- rc = apply_rela_bits(loc, val, 1, 32, 1);
+ rc = apply_rela_bits(loc, val, 1, 32, 1, write);
break;
case R_390_COPY:
case R_390_GLOB_DAT: /* Create GOT entry. */
@@ -412,9 +426,10 @@ static int apply_rela(Elf_Rela *rela, Elf_Addr base, Elf_Sym *symtab,
return 0;
}
-int apply_relocate_add(Elf_Shdr *sechdrs, const char *strtab,
+static int __apply_relocate_add(Elf_Shdr *sechdrs, const char *strtab,
unsigned int symindex, unsigned int relsec,
- struct module *me)
+ struct module *me,
+ void *(*write)(void *dest, const void *src, size_t len))
{
Elf_Addr base;
Elf_Sym *symtab;
@@ -430,13 +445,27 @@ int apply_relocate_add(Elf_Shdr *sechdrs, const char *strtab,
n = sechdrs[relsec].sh_size / sizeof(Elf_Rela);
for (i = 0; i < n; i++, rela++) {
- rc = apply_rela(rela, base, symtab, strtab, me);
+ rc = apply_rela(rela, base, symtab, strtab, me, write);
if (rc)
return rc;
}
return 0;
}
+int apply_relocate_add(Elf_Shdr *sechdrs, const char *strtab,
+ unsigned int symindex, unsigned int relsec,
+ struct module *me)
+{
+ bool early = me->state == MODULE_STATE_UNFORMED;
+ void *(*write)(void *, const void *, size_t) = memcpy;
+
+ if (!early)
+ write = s390_kernel_write;
+
+ return __apply_relocate_add(sechdrs, strtab, symindex, relsec, me,
+ write);
+}
+
int module_finalize(const Elf_Ehdr *hdr,
const Elf_Shdr *sechdrs,
struct module *me)
diff --git a/arch/s390/kernel/nmi.c b/arch/s390/kernel/nmi.c
index 0a487fae763e..86c8d5370e7f 100644
--- a/arch/s390/kernel/nmi.c
+++ b/arch/s390/kernel/nmi.c
@@ -148,7 +148,6 @@ void s390_handle_mcck(void)
local_mcck_disable();
mcck = *this_cpu_ptr(&cpu_mcck);
memset(this_cpu_ptr(&cpu_mcck), 0, sizeof(mcck));
- clear_cpu_flag(CIF_MCCK_PENDING);
local_mcck_enable();
local_irq_restore(flags);
@@ -333,7 +332,7 @@ NOKPROBE_SYMBOL(s390_backup_mcck_info);
/*
* machine check handler.
*/
-void notrace s390_do_machine_check(struct pt_regs *regs)
+int notrace s390_do_machine_check(struct pt_regs *regs)
{
static int ipd_count;
static DEFINE_SPINLOCK(ipd_lock);
@@ -342,6 +341,7 @@ void notrace s390_do_machine_check(struct pt_regs *regs)
unsigned long long tmp;
union mci mci;
unsigned long mcck_dam_code;
+ int mcck_pending = 0;
nmi_enter();
inc_irq_stat(NMI_NMI);
@@ -400,7 +400,7 @@ void notrace s390_do_machine_check(struct pt_regs *regs)
*/
mcck->kill_task = 1;
mcck->mcck_code = mci.val;
- set_cpu_flag(CIF_MCCK_PENDING);
+ mcck_pending = 1;
}
/*
@@ -420,8 +420,7 @@ void notrace s390_do_machine_check(struct pt_regs *regs)
mcck->stp_queue |= stp_sync_check();
if (S390_lowcore.external_damage_code & (1U << ED_STP_ISLAND))
mcck->stp_queue |= stp_island_check();
- if (mcck->stp_queue)
- set_cpu_flag(CIF_MCCK_PENDING);
+ mcck_pending = 1;
}
/*
@@ -442,12 +441,12 @@ void notrace s390_do_machine_check(struct pt_regs *regs)
if (mci.cp) {
/* Channel report word pending */
mcck->channel_report = 1;
- set_cpu_flag(CIF_MCCK_PENDING);
+ mcck_pending = 1;
}
if (mci.w) {
/* Warning pending */
mcck->warning = 1;
- set_cpu_flag(CIF_MCCK_PENDING);
+ mcck_pending = 1;
}
/*
@@ -462,7 +461,17 @@ void notrace s390_do_machine_check(struct pt_regs *regs)
*((long *)(regs->gprs[15] + __SF_SIE_REASON)) = -EINTR;
}
clear_cpu_flag(CIF_MCCK_GUEST);
+
+ if (user_mode(regs) && mcck_pending) {
+ nmi_exit();
+ return 1;
+ }
+
+ if (mcck_pending)
+ schedule_mcck_handler();
+
nmi_exit();
+ return 0;
}
NOKPROBE_SYMBOL(s390_do_machine_check);
diff --git a/arch/s390/numa/numa.c b/arch/s390/kernel/numa.c
index 51c5a9f6e525..51c5a9f6e525 100644
--- a/arch/s390/numa/numa.c
+++ b/arch/s390/kernel/numa.c
diff --git a/arch/s390/kernel/perf_cpum_cf_events.c b/arch/s390/kernel/perf_cpum_cf_events.c
index 1e3df52b2b65..37265f551a11 100644
--- a/arch/s390/kernel/perf_cpum_cf_events.c
+++ b/arch/s390/kernel/perf_cpum_cf_events.c
@@ -292,7 +292,7 @@ CPUMF_EVENT_ATTR(cf_z15, TX_C_TABORT_SPECIAL, 0x00f5);
CPUMF_EVENT_ATTR(cf_z15, DFLT_ACCESS, 0x00f7);
CPUMF_EVENT_ATTR(cf_z15, DFLT_CYCLES, 0x00fc);
CPUMF_EVENT_ATTR(cf_z15, DFLT_CC, 0x00108);
-CPUMF_EVENT_ATTR(cf_z15, DFLT_CCERROR, 0x00109);
+CPUMF_EVENT_ATTR(cf_z15, DFLT_CCFINISH, 0x00109);
CPUMF_EVENT_ATTR(cf_z15, MT_DIAG_CYCLES_ONE_THR_ACTIVE, 0x01c0);
CPUMF_EVENT_ATTR(cf_z15, MT_DIAG_CYCLES_TWO_THR_ACTIVE, 0x01c1);
@@ -629,7 +629,7 @@ static struct attribute *cpumcf_z15_pmu_event_attr[] __initdata = {
CPUMF_EVENT_PTR(cf_z15, DFLT_ACCESS),
CPUMF_EVENT_PTR(cf_z15, DFLT_CYCLES),
CPUMF_EVENT_PTR(cf_z15, DFLT_CC),
- CPUMF_EVENT_PTR(cf_z15, DFLT_CCERROR),
+ CPUMF_EVENT_PTR(cf_z15, DFLT_CCFINISH),
CPUMF_EVENT_PTR(cf_z15, MT_DIAG_CYCLES_ONE_THR_ACTIVE),
CPUMF_EVENT_PTR(cf_z15, MT_DIAG_CYCLES_TWO_THR_ACTIVE),
NULL,
diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c
index 85a711d783eb..4f9e4626df55 100644
--- a/arch/s390/kernel/perf_cpum_sf.c
+++ b/arch/s390/kernel/perf_cpum_sf.c
@@ -881,12 +881,21 @@ out:
return err;
}
+static bool is_callchain_event(struct perf_event *event)
+{
+ u64 sample_type = event->attr.sample_type;
+
+ return sample_type & (PERF_SAMPLE_CALLCHAIN | PERF_SAMPLE_REGS_USER |
+ PERF_SAMPLE_STACK_USER);
+}
+
static int cpumsf_pmu_event_init(struct perf_event *event)
{
int err;
/* No support for taken branch sampling */
- if (has_branch_stack(event))
+ /* No support for callchain, stacks and registers */
+ if (has_branch_stack(event) || is_callchain_event(event))
return -EOPNOTSUPP;
switch (event->attr.type) {
diff --git a/arch/s390/kernel/pgm_check.S b/arch/s390/kernel/pgm_check.S
index 2c27907a5ffc..9a92638360ee 100644
--- a/arch/s390/kernel/pgm_check.S
+++ b/arch/s390/kernel/pgm_check.S
@@ -80,7 +80,7 @@ PGM_CHECK(do_dat_exception) /* 3b */
PGM_CHECK_DEFAULT /* 3c */
PGM_CHECK(do_secure_storage_access) /* 3d */
PGM_CHECK(do_non_secure_storage_access) /* 3e */
-PGM_CHECK_DEFAULT /* 3f */
+PGM_CHECK(do_secure_storage_violation) /* 3f */
PGM_CHECK(monitor_event_exception) /* 40 */
PGM_CHECK_DEFAULT /* 41 */
PGM_CHECK_DEFAULT /* 42 */
diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c
index eb6e23ad15a2..ec801d3bbb37 100644
--- a/arch/s390/kernel/process.c
+++ b/arch/s390/kernel/process.c
@@ -80,8 +80,8 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
return 0;
}
-int copy_thread_tls(unsigned long clone_flags, unsigned long new_stackp,
- unsigned long arg, struct task_struct *p, unsigned long tls)
+int copy_thread(unsigned long clone_flags, unsigned long new_stackp,
+ unsigned long arg, struct task_struct *p, unsigned long tls)
{
struct fake_frame
{
@@ -160,24 +160,6 @@ asmlinkage void execve_tail(void)
asm volatile("sfpc %0" : : "d" (0));
}
-/*
- * fill in the FPU structure for a core dump.
- */
-int dump_fpu (struct pt_regs * regs, s390_fp_regs *fpregs)
-{
- save_fpu_regs();
- fpregs->fpc = current->thread.fpu.fpc;
- fpregs->pad = 0;
- if (MACHINE_HAS_VX)
- convert_vx_to_fp((freg_t *)&fpregs->fprs,
- current->thread.fpu.vxrs);
- else
- memcpy(&fpregs->fprs, current->thread.fpu.fprs,
- sizeof(fpregs->fprs));
- return 1;
-}
-EXPORT_SYMBOL(dump_fpu);
-
unsigned long get_wchan(struct task_struct *p)
{
struct unwind_state state;
diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c
index 58faa12542a1..a76dd27fb2e8 100644
--- a/arch/s390/kernel/ptrace.c
+++ b/arch/s390/kernel/ptrace.c
@@ -25,8 +25,6 @@
#include <linux/compat.h>
#include <trace/syscall.h>
#include <asm/page.h>
-#include <asm/pgtable.h>
-#include <asm/pgalloc.h>
#include <linux/uaccess.h>
#include <asm/unistd.h>
#include <asm/switch_to.h>
@@ -324,6 +322,25 @@ static inline void __poke_user_per(struct task_struct *child,
child->thread.per_user.end = data;
}
+static void fixup_int_code(struct task_struct *child, addr_t data)
+{
+ struct pt_regs *regs = task_pt_regs(child);
+ int ilc = regs->int_code >> 16;
+ u16 insn;
+
+ if (ilc > 6)
+ return;
+
+ if (ptrace_access_vm(child, regs->psw.addr - (regs->int_code >> 16),
+ &insn, sizeof(insn), FOLL_FORCE) != sizeof(insn))
+ return;
+
+ /* double check that tracee stopped on svc instruction */
+ if ((insn >> 8) != 0xa)
+ return;
+
+ regs->int_code = 0x20000 | (data & 0xffff);
+}
/*
* Write a word to the user area of a process at location addr. This
* operation does have an additional problem compared to peek_user.
@@ -335,7 +352,9 @@ static int __poke_user(struct task_struct *child, addr_t addr, addr_t data)
struct user *dummy = NULL;
addr_t offset;
+
if (addr < (addr_t) &dummy->regs.acrs) {
+ struct pt_regs *regs = task_pt_regs(child);
/*
* psw and gprs are stored on the stack
*/
@@ -353,7 +372,11 @@ static int __poke_user(struct task_struct *child, addr_t addr, addr_t data)
/* Invalid addressing mode bits */
return -EINVAL;
}
- *(addr_t *)((addr_t) &task_pt_regs(child)->psw + addr) = data;
+
+ if (test_pt_regs_flag(regs, PIF_SYSCALL) &&
+ addr == offsetof(struct user, regs.gprs[2]))
+ fixup_int_code(child, data);
+ *(addr_t *)((addr_t) &regs->psw + addr) = data;
} else if (addr < (addr_t) (&dummy->regs.orig_gpr2)) {
/*
@@ -719,6 +742,10 @@ static int __poke_user_compat(struct task_struct *child,
regs->psw.mask = (regs->psw.mask & ~PSW_MASK_BA) |
(__u64)(tmp & PSW32_ADDR_AMODE);
} else {
+
+ if (test_pt_regs_flag(regs, PIF_SYSCALL) &&
+ addr == offsetof(struct compat_user, regs.gprs[2]))
+ fixup_int_code(child, data);
/* gpr 0-15 */
*(__u32*)((addr_t) &regs->psw + addr*2 + 4) = tmp;
}
@@ -838,40 +865,66 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request,
asmlinkage long do_syscall_trace_enter(struct pt_regs *regs)
{
unsigned long mask = -1UL;
+ long ret = -1;
+
+ if (is_compat_task())
+ mask = 0xffffffff;
/*
* The sysc_tracesys code in entry.S stored the system
* call number to gprs[2].
*/
if (test_thread_flag(TIF_SYSCALL_TRACE) &&
- (tracehook_report_syscall_entry(regs) ||
- regs->gprs[2] >= NR_syscalls)) {
+ tracehook_report_syscall_entry(regs)) {
/*
- * Tracing decided this syscall should not happen or the
- * debugger stored an invalid system call number. Skip
+ * Tracing decided this syscall should not happen. Skip
* the system call and the system call restart handling.
*/
- clear_pt_regs_flag(regs, PIF_SYSCALL);
- return -1;
+ goto skip;
}
+#ifdef CONFIG_SECCOMP
/* Do the secure computing check after ptrace. */
- if (secure_computing()) {
- /* seccomp failures shouldn't expose any additional code. */
- return -1;
+ if (unlikely(test_thread_flag(TIF_SECCOMP))) {
+ struct seccomp_data sd;
+
+ if (is_compat_task()) {
+ sd.instruction_pointer = regs->psw.addr & 0x7fffffff;
+ sd.arch = AUDIT_ARCH_S390;
+ } else {
+ sd.instruction_pointer = regs->psw.addr;
+ sd.arch = AUDIT_ARCH_S390X;
+ }
+
+ sd.nr = regs->int_code & 0xffff;
+ sd.args[0] = regs->orig_gpr2 & mask;
+ sd.args[1] = regs->gprs[3] & mask;
+ sd.args[2] = regs->gprs[4] & mask;
+ sd.args[3] = regs->gprs[5] & mask;
+ sd.args[4] = regs->gprs[6] & mask;
+ sd.args[5] = regs->gprs[7] & mask;
+
+ if (__secure_computing(&sd) == -1)
+ goto skip;
}
+#endif /* CONFIG_SECCOMP */
if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT)))
- trace_sys_enter(regs, regs->gprs[2]);
+ trace_sys_enter(regs, regs->int_code & 0xffff);
- if (is_compat_task())
- mask = 0xffffffff;
- audit_syscall_entry(regs->gprs[2], regs->orig_gpr2 & mask,
+ audit_syscall_entry(regs->int_code & 0xffff, regs->orig_gpr2 & mask,
regs->gprs[3] &mask, regs->gprs[4] &mask,
regs->gprs[5] &mask);
+ if ((signed long)regs->gprs[2] >= NR_syscalls) {
+ regs->gprs[2] = -ENOSYS;
+ ret = -ENOSYS;
+ }
return regs->gprs[2];
+skip:
+ clear_pt_regs_flag(regs, PIF_SYSCALL);
+ return ret;
}
asmlinkage void do_syscall_trace_exit(struct pt_regs *regs)
@@ -891,28 +944,14 @@ asmlinkage void do_syscall_trace_exit(struct pt_regs *regs)
static int s390_regs_get(struct task_struct *target,
const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- void *kbuf, void __user *ubuf)
+ struct membuf to)
{
+ unsigned pos;
if (target == current)
save_access_regs(target->thread.acrs);
- if (kbuf) {
- unsigned long *k = kbuf;
- while (count > 0) {
- *k++ = __peek_user(target, pos);
- count -= sizeof(*k);
- pos += sizeof(*k);
- }
- } else {
- unsigned long __user *u = ubuf;
- while (count > 0) {
- if (__put_user(__peek_user(target, pos), u++))
- return -EFAULT;
- count -= sizeof(*u);
- pos += sizeof(*u);
- }
- }
+ for (pos = 0; pos < sizeof(s390_regs); pos += sizeof(long))
+ membuf_store(&to, __peek_user(target, pos));
return 0;
}
@@ -953,8 +992,8 @@ static int s390_regs_set(struct task_struct *target,
}
static int s390_fpregs_get(struct task_struct *target,
- const struct user_regset *regset, unsigned int pos,
- unsigned int count, void *kbuf, void __user *ubuf)
+ const struct user_regset *regset,
+ struct membuf to)
{
_s390_fp_regs fp_regs;
@@ -964,8 +1003,7 @@ static int s390_fpregs_get(struct task_struct *target,
fp_regs.fpc = target->thread.fpu.fpc;
fpregs_store(&fp_regs, &target->thread.fpu);
- return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
- &fp_regs, 0, -1);
+ return membuf_write(&to, &fp_regs, sizeof(fp_regs));
}
static int s390_fpregs_set(struct task_struct *target,
@@ -1012,20 +1050,9 @@ static int s390_fpregs_set(struct task_struct *target,
static int s390_last_break_get(struct task_struct *target,
const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- void *kbuf, void __user *ubuf)
+ struct membuf to)
{
- if (count > 0) {
- if (kbuf) {
- unsigned long *k = kbuf;
- *k = target->thread.last_break;
- } else {
- unsigned long __user *u = ubuf;
- if (__put_user(target->thread.last_break, u))
- return -EFAULT;
- }
- }
- return 0;
+ return membuf_store(&to, target->thread.last_break);
}
static int s390_last_break_set(struct task_struct *target,
@@ -1038,16 +1065,13 @@ static int s390_last_break_set(struct task_struct *target,
static int s390_tdb_get(struct task_struct *target,
const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- void *kbuf, void __user *ubuf)
+ struct membuf to)
{
struct pt_regs *regs = task_pt_regs(target);
- unsigned char *data;
if (!(regs->int_code & 0x200))
return -ENODATA;
- data = target->thread.trap_tdb;
- return user_regset_copyout(&pos, &count, &kbuf, &ubuf, data, 0, 256);
+ return membuf_write(&to, target->thread.trap_tdb, 256);
}
static int s390_tdb_set(struct task_struct *target,
@@ -1060,8 +1084,7 @@ static int s390_tdb_set(struct task_struct *target,
static int s390_vxrs_low_get(struct task_struct *target,
const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- void *kbuf, void __user *ubuf)
+ struct membuf to)
{
__u64 vxrs[__NUM_VXRS_LOW];
int i;
@@ -1072,7 +1095,7 @@ static int s390_vxrs_low_get(struct task_struct *target,
save_fpu_regs();
for (i = 0; i < __NUM_VXRS_LOW; i++)
vxrs[i] = *((__u64 *)(target->thread.fpu.vxrs + i) + 1);
- return user_regset_copyout(&pos, &count, &kbuf, &ubuf, vxrs, 0, -1);
+ return membuf_write(&to, vxrs, sizeof(vxrs));
}
static int s390_vxrs_low_set(struct task_struct *target,
@@ -1101,18 +1124,14 @@ static int s390_vxrs_low_set(struct task_struct *target,
static int s390_vxrs_high_get(struct task_struct *target,
const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- void *kbuf, void __user *ubuf)
+ struct membuf to)
{
- __vector128 vxrs[__NUM_VXRS_HIGH];
-
if (!MACHINE_HAS_VX)
return -ENODEV;
if (target == current)
save_fpu_regs();
- memcpy(vxrs, target->thread.fpu.vxrs + __NUM_VXRS_LOW, sizeof(vxrs));
-
- return user_regset_copyout(&pos, &count, &kbuf, &ubuf, vxrs, 0, -1);
+ return membuf_write(&to, target->thread.fpu.vxrs + __NUM_VXRS_LOW,
+ __NUM_VXRS_HIGH * sizeof(__vector128));
}
static int s390_vxrs_high_set(struct task_struct *target,
@@ -1134,12 +1153,9 @@ static int s390_vxrs_high_set(struct task_struct *target,
static int s390_system_call_get(struct task_struct *target,
const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- void *kbuf, void __user *ubuf)
+ struct membuf to)
{
- unsigned int *data = &target->thread.system_call;
- return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
- data, 0, sizeof(unsigned int));
+ return membuf_store(&to, target->thread.system_call);
}
static int s390_system_call_set(struct task_struct *target,
@@ -1154,8 +1170,7 @@ static int s390_system_call_set(struct task_struct *target,
static int s390_gs_cb_get(struct task_struct *target,
const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- void *kbuf, void __user *ubuf)
+ struct membuf to)
{
struct gs_cb *data = target->thread.gs_cb;
@@ -1165,8 +1180,7 @@ static int s390_gs_cb_get(struct task_struct *target,
return -ENODATA;
if (target == current)
save_gs_cb(data);
- return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
- data, 0, sizeof(struct gs_cb));
+ return membuf_write(&to, data, sizeof(struct gs_cb));
}
static int s390_gs_cb_set(struct task_struct *target,
@@ -1210,8 +1224,7 @@ static int s390_gs_cb_set(struct task_struct *target,
static int s390_gs_bc_get(struct task_struct *target,
const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- void *kbuf, void __user *ubuf)
+ struct membuf to)
{
struct gs_cb *data = target->thread.gs_bc_cb;
@@ -1219,8 +1232,7 @@ static int s390_gs_bc_get(struct task_struct *target,
return -ENODEV;
if (!data)
return -ENODATA;
- return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
- data, 0, sizeof(struct gs_cb));
+ return membuf_write(&to, data, sizeof(struct gs_cb));
}
static int s390_gs_bc_set(struct task_struct *target,
@@ -1256,7 +1268,6 @@ static bool is_ri_cb_valid(struct runtime_instr_cb *cb)
cb->pc == 1 &&
cb->qc == 0 &&
cb->reserved2 == 0 &&
- cb->key == PAGE_DEFAULT_KEY &&
cb->reserved3 == 0 &&
cb->reserved4 == 0 &&
cb->reserved5 == 0 &&
@@ -1271,8 +1282,7 @@ static bool is_ri_cb_valid(struct runtime_instr_cb *cb)
static int s390_runtime_instr_get(struct task_struct *target,
const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- void *kbuf, void __user *ubuf)
+ struct membuf to)
{
struct runtime_instr_cb *data = target->thread.ri_cb;
@@ -1281,8 +1291,7 @@ static int s390_runtime_instr_get(struct task_struct *target,
if (!data)
return -ENODATA;
- return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
- data, 0, sizeof(struct runtime_instr_cb));
+ return membuf_write(&to, data, sizeof(struct runtime_instr_cb));
}
static int s390_runtime_instr_set(struct task_struct *target,
@@ -1320,7 +1329,11 @@ static int s390_runtime_instr_set(struct task_struct *target,
kfree(data);
return -EINVAL;
}
-
+ /*
+ * Override access key in any case, since user space should
+ * not be able to set it, nor should it care about it.
+ */
+ ri_cb.key = PAGE_DEFAULT_KEY >> 4;
preempt_disable();
if (!target->thread.ri_cb)
target->thread.ri_cb = data;
@@ -1338,7 +1351,7 @@ static const struct user_regset s390_regsets[] = {
.n = sizeof(s390_regs) / sizeof(long),
.size = sizeof(long),
.align = sizeof(long),
- .get = s390_regs_get,
+ .regset_get = s390_regs_get,
.set = s390_regs_set,
},
{
@@ -1346,7 +1359,7 @@ static const struct user_regset s390_regsets[] = {
.n = sizeof(s390_fp_regs) / sizeof(long),
.size = sizeof(long),
.align = sizeof(long),
- .get = s390_fpregs_get,
+ .regset_get = s390_fpregs_get,
.set = s390_fpregs_set,
},
{
@@ -1354,7 +1367,7 @@ static const struct user_regset s390_regsets[] = {
.n = 1,
.size = sizeof(unsigned int),
.align = sizeof(unsigned int),
- .get = s390_system_call_get,
+ .regset_get = s390_system_call_get,
.set = s390_system_call_set,
},
{
@@ -1362,7 +1375,7 @@ static const struct user_regset s390_regsets[] = {
.n = 1,
.size = sizeof(long),
.align = sizeof(long),
- .get = s390_last_break_get,
+ .regset_get = s390_last_break_get,
.set = s390_last_break_set,
},
{
@@ -1370,7 +1383,7 @@ static const struct user_regset s390_regsets[] = {
.n = 1,
.size = 256,
.align = 1,
- .get = s390_tdb_get,
+ .regset_get = s390_tdb_get,
.set = s390_tdb_set,
},
{
@@ -1378,7 +1391,7 @@ static const struct user_regset s390_regsets[] = {
.n = __NUM_VXRS_LOW,
.size = sizeof(__u64),
.align = sizeof(__u64),
- .get = s390_vxrs_low_get,
+ .regset_get = s390_vxrs_low_get,
.set = s390_vxrs_low_set,
},
{
@@ -1386,7 +1399,7 @@ static const struct user_regset s390_regsets[] = {
.n = __NUM_VXRS_HIGH,
.size = sizeof(__vector128),
.align = sizeof(__vector128),
- .get = s390_vxrs_high_get,
+ .regset_get = s390_vxrs_high_get,
.set = s390_vxrs_high_set,
},
{
@@ -1394,7 +1407,7 @@ static const struct user_regset s390_regsets[] = {
.n = sizeof(struct gs_cb) / sizeof(__u64),
.size = sizeof(__u64),
.align = sizeof(__u64),
- .get = s390_gs_cb_get,
+ .regset_get = s390_gs_cb_get,
.set = s390_gs_cb_set,
},
{
@@ -1402,7 +1415,7 @@ static const struct user_regset s390_regsets[] = {
.n = sizeof(struct gs_cb) / sizeof(__u64),
.size = sizeof(__u64),
.align = sizeof(__u64),
- .get = s390_gs_bc_get,
+ .regset_get = s390_gs_bc_get,
.set = s390_gs_bc_set,
},
{
@@ -1410,13 +1423,13 @@ static const struct user_regset s390_regsets[] = {
.n = sizeof(struct runtime_instr_cb) / sizeof(__u64),
.size = sizeof(__u64),
.align = sizeof(__u64),
- .get = s390_runtime_instr_get,
+ .regset_get = s390_runtime_instr_get,
.set = s390_runtime_instr_set,
},
};
static const struct user_regset_view user_s390_view = {
- .name = UTS_MACHINE,
+ .name = "s390x",
.e_machine = EM_S390,
.regsets = s390_regsets,
.n = ARRAY_SIZE(s390_regsets)
@@ -1425,28 +1438,15 @@ static const struct user_regset_view user_s390_view = {
#ifdef CONFIG_COMPAT
static int s390_compat_regs_get(struct task_struct *target,
const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- void *kbuf, void __user *ubuf)
+ struct membuf to)
{
+ unsigned n;
+
if (target == current)
save_access_regs(target->thread.acrs);
- if (kbuf) {
- compat_ulong_t *k = kbuf;
- while (count > 0) {
- *k++ = __peek_user_compat(target, pos);
- count -= sizeof(*k);
- pos += sizeof(*k);
- }
- } else {
- compat_ulong_t __user *u = ubuf;
- while (count > 0) {
- if (__put_user(__peek_user_compat(target, pos), u++))
- return -EFAULT;
- count -= sizeof(*u);
- pos += sizeof(*u);
- }
- }
+ for (n = 0; n < sizeof(s390_compat_regs); n += sizeof(compat_ulong_t))
+ membuf_store(&to, __peek_user_compat(target, n));
return 0;
}
@@ -1488,29 +1488,14 @@ static int s390_compat_regs_set(struct task_struct *target,
static int s390_compat_regs_high_get(struct task_struct *target,
const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- void *kbuf, void __user *ubuf)
+ struct membuf to)
{
compat_ulong_t *gprs_high;
+ int i;
- gprs_high = (compat_ulong_t *)
- &task_pt_regs(target)->gprs[pos / sizeof(compat_ulong_t)];
- if (kbuf) {
- compat_ulong_t *k = kbuf;
- while (count > 0) {
- *k++ = *gprs_high;
- gprs_high += 2;
- count -= sizeof(*k);
- }
- } else {
- compat_ulong_t __user *u = ubuf;
- while (count > 0) {
- if (__put_user(*gprs_high, u++))
- return -EFAULT;
- gprs_high += 2;
- count -= sizeof(*u);
- }
- }
+ gprs_high = (compat_ulong_t *)task_pt_regs(target)->gprs;
+ for (i = 0; i < NUM_GPRS; i++, gprs_high += 2)
+ membuf_store(&to, *gprs_high);
return 0;
}
@@ -1549,23 +1534,11 @@ static int s390_compat_regs_high_set(struct task_struct *target,
static int s390_compat_last_break_get(struct task_struct *target,
const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- void *kbuf, void __user *ubuf)
+ struct membuf to)
{
- compat_ulong_t last_break;
+ compat_ulong_t last_break = target->thread.last_break;
- if (count > 0) {
- last_break = target->thread.last_break;
- if (kbuf) {
- unsigned long *k = kbuf;
- *k = last_break;
- } else {
- unsigned long __user *u = ubuf;
- if (__put_user(last_break, u))
- return -EFAULT;
- }
- }
- return 0;
+ return membuf_store(&to, (unsigned long)last_break);
}
static int s390_compat_last_break_set(struct task_struct *target,
@@ -1582,7 +1555,7 @@ static const struct user_regset s390_compat_regsets[] = {
.n = sizeof(s390_compat_regs) / sizeof(compat_long_t),
.size = sizeof(compat_long_t),
.align = sizeof(compat_long_t),
- .get = s390_compat_regs_get,
+ .regset_get = s390_compat_regs_get,
.set = s390_compat_regs_set,
},
{
@@ -1590,7 +1563,7 @@ static const struct user_regset s390_compat_regsets[] = {
.n = sizeof(s390_fp_regs) / sizeof(compat_long_t),
.size = sizeof(compat_long_t),
.align = sizeof(compat_long_t),
- .get = s390_fpregs_get,
+ .regset_get = s390_fpregs_get,
.set = s390_fpregs_set,
},
{
@@ -1598,7 +1571,7 @@ static const struct user_regset s390_compat_regsets[] = {
.n = 1,
.size = sizeof(compat_uint_t),
.align = sizeof(compat_uint_t),
- .get = s390_system_call_get,
+ .regset_get = s390_system_call_get,
.set = s390_system_call_set,
},
{
@@ -1606,7 +1579,7 @@ static const struct user_regset s390_compat_regsets[] = {
.n = 1,
.size = sizeof(long),
.align = sizeof(long),
- .get = s390_compat_last_break_get,
+ .regset_get = s390_compat_last_break_get,
.set = s390_compat_last_break_set,
},
{
@@ -1614,7 +1587,7 @@ static const struct user_regset s390_compat_regsets[] = {
.n = 1,
.size = 256,
.align = 1,
- .get = s390_tdb_get,
+ .regset_get = s390_tdb_get,
.set = s390_tdb_set,
},
{
@@ -1622,7 +1595,7 @@ static const struct user_regset s390_compat_regsets[] = {
.n = __NUM_VXRS_LOW,
.size = sizeof(__u64),
.align = sizeof(__u64),
- .get = s390_vxrs_low_get,
+ .regset_get = s390_vxrs_low_get,
.set = s390_vxrs_low_set,
},
{
@@ -1630,7 +1603,7 @@ static const struct user_regset s390_compat_regsets[] = {
.n = __NUM_VXRS_HIGH,
.size = sizeof(__vector128),
.align = sizeof(__vector128),
- .get = s390_vxrs_high_get,
+ .regset_get = s390_vxrs_high_get,
.set = s390_vxrs_high_set,
},
{
@@ -1638,7 +1611,7 @@ static const struct user_regset s390_compat_regsets[] = {
.n = sizeof(s390_compat_regs_high) / sizeof(compat_long_t),
.size = sizeof(compat_long_t),
.align = sizeof(compat_long_t),
- .get = s390_compat_regs_high_get,
+ .regset_get = s390_compat_regs_high_get,
.set = s390_compat_regs_high_set,
},
{
@@ -1646,7 +1619,7 @@ static const struct user_regset s390_compat_regsets[] = {
.n = sizeof(struct gs_cb) / sizeof(__u64),
.size = sizeof(__u64),
.align = sizeof(__u64),
- .get = s390_gs_cb_get,
+ .regset_get = s390_gs_cb_get,
.set = s390_gs_cb_set,
},
{
@@ -1654,7 +1627,7 @@ static const struct user_regset s390_compat_regsets[] = {
.n = sizeof(struct gs_cb) / sizeof(__u64),
.size = sizeof(__u64),
.align = sizeof(__u64),
- .get = s390_gs_bc_get,
+ .regset_get = s390_gs_bc_get,
.set = s390_gs_bc_set,
},
{
@@ -1662,7 +1635,7 @@ static const struct user_regset s390_compat_regsets[] = {
.n = sizeof(struct runtime_instr_cb) / sizeof(__u64),
.size = sizeof(__u64),
.align = sizeof(__u64),
- .get = s390_runtime_instr_get,
+ .regset_get = s390_runtime_instr_get,
.set = s390_runtime_instr_set,
},
};
diff --git a/arch/s390/kernel/runtime_instr.c b/arch/s390/kernel/runtime_instr.c
index 125c7f6e8715..1788a5454b6f 100644
--- a/arch/s390/kernel/runtime_instr.c
+++ b/arch/s390/kernel/runtime_instr.c
@@ -57,7 +57,7 @@ static void init_runtime_instr_cb(struct runtime_instr_cb *cb)
cb->k = 1;
cb->ps = 1;
cb->pc = 1;
- cb->key = PAGE_DEFAULT_KEY;
+ cb->key = PAGE_DEFAULT_KEY >> 4;
cb->v = 1;
}
diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
index 36445dd40fdb..c2c1b4e723ea 100644
--- a/arch/s390/kernel/setup.c
+++ b/arch/s390/kernel/setup.c
@@ -305,12 +305,9 @@ void *restart_stack __section(.data);
unsigned long stack_alloc(void)
{
#ifdef CONFIG_VMAP_STACK
- return (unsigned long)
- __vmalloc_node_range(THREAD_SIZE, THREAD_SIZE,
- VMALLOC_START, VMALLOC_END,
- THREADINFO_GFP,
- PAGE_KERNEL, 0, NUMA_NO_NODE,
- __builtin_return_address(0));
+ return (unsigned long)__vmalloc_node(THREAD_SIZE, THREAD_SIZE,
+ THREADINFO_GFP, NUMA_NO_NODE,
+ __builtin_return_address(0));
#else
return __get_free_pages(GFP_KERNEL, THREAD_SIZE_ORDER);
#endif
@@ -384,8 +381,7 @@ static void __init setup_lowcore_dat_off(void)
lc->restart_psw.addr = (unsigned long) restart_int_handler;
lc->external_new_psw.mask = PSW_KERNEL_BITS | PSW_MASK_MCHECK;
lc->external_new_psw.addr = (unsigned long) ext_int_handler;
- lc->svc_new_psw.mask = PSW_KERNEL_BITS |
- PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK;
+ lc->svc_new_psw.mask = PSW_KERNEL_BITS | PSW_MASK_MCHECK;
lc->svc_new_psw.addr = (unsigned long) system_call;
lc->program_new_psw.mask = PSW_KERNEL_BITS | PSW_MASK_MCHECK;
lc->program_new_psw.addr = (unsigned long) pgm_check_handler;
@@ -597,9 +593,10 @@ static void __init setup_memory_end(void)
#ifdef CONFIG_CRASH_DUMP
/*
- * When kdump is enabled, we have to ensure that no memory from
- * the area [0 - crashkernel memory size] and
- * [crashk_res.start - crashk_res.end] is set offline.
+ * When kdump is enabled, we have to ensure that no memory from the area
+ * [0 - crashkernel memory size] is set offline - it will be exchanged with
+ * the crashkernel memory region when kdump is triggered. The crashkernel
+ * memory region can never get offlined (pages are unmovable).
*/
static int kdump_mem_notifier(struct notifier_block *nb,
unsigned long action, void *data)
@@ -610,11 +607,7 @@ static int kdump_mem_notifier(struct notifier_block *nb,
return NOTIFY_OK;
if (arg->start_pfn < PFN_DOWN(resource_size(&crashk_res)))
return NOTIFY_BAD;
- if (arg->start_pfn > PFN_DOWN(crashk_res.end))
- return NOTIFY_OK;
- if (arg->start_pfn + arg->nr_pages - 1 < PFN_DOWN(crashk_res.start))
- return NOTIFY_OK;
- return NOTIFY_BAD;
+ return NOTIFY_OK;
}
static struct notifier_block kdump_mem_nb = {
@@ -626,7 +619,7 @@ static struct notifier_block kdump_mem_nb = {
/*
* Make sure that the area behind memory_end is protected
*/
-static void reserve_memory_end(void)
+static void __init reserve_memory_end(void)
{
if (memory_end_set)
memblock_reserve(memory_end, ULONG_MAX);
@@ -635,7 +628,7 @@ static void reserve_memory_end(void)
/*
* Make sure that oldmem, where the dump is stored, is protected
*/
-static void reserve_oldmem(void)
+static void __init reserve_oldmem(void)
{
#ifdef CONFIG_CRASH_DUMP
if (OLDMEM_BASE)
@@ -647,7 +640,7 @@ static void reserve_oldmem(void)
/*
* Make sure that oldmem, where the dump is stored, is protected
*/
-static void remove_oldmem(void)
+static void __init remove_oldmem(void)
{
#ifdef CONFIG_CRASH_DUMP
if (OLDMEM_BASE)
@@ -1028,8 +1021,7 @@ static void __init setup_control_program_code(void)
{
union diag318_info diag318_info = {
.cpnc = CPNC_LINUX,
- .cpvc_linux = 0,
- .cpvc_distro = {0},
+ .cpvc = 0,
};
if (!sclp.has_diag318)
@@ -1107,6 +1099,7 @@ void __init setup_arch(char **cmdline_p)
if (IS_ENABLED(CONFIG_EXPOLINE_AUTO))
nospec_auto_detect();
+ jump_label_init();
parse_early_param();
#ifdef CONFIG_CRASH_DUMP
/* Deactivate elfcorehdr= kernel parameter */
@@ -1133,14 +1126,6 @@ void __init setup_arch(char **cmdline_p)
free_mem_detect_info();
remove_oldmem();
- /*
- * Make sure all chunks are MAX_ORDER aligned so we don't need the
- * extra checks that HOLES_IN_ZONE would require.
- *
- * Is this still required?
- */
- memblock_trim_memory(1UL << (MAX_ORDER - 1 + PAGE_SHIFT));
-
if (is_prot_virt_host())
setup_uv();
setup_memory_end();
diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c
index 10dbb12eb14d..85700bd85f98 100644
--- a/arch/s390/kernel/smp.c
+++ b/arch/s390/kernel/smp.c
@@ -61,6 +61,7 @@ enum {
ec_schedule = 0,
ec_call_function_single,
ec_stop_cpu,
+ ec_mcck_pending,
};
enum {
@@ -145,7 +146,7 @@ static int pcpu_sigp_retry(struct pcpu *pcpu, u8 order, u32 parm)
static inline int pcpu_stopped(struct pcpu *pcpu)
{
- u32 uninitialized_var(status);
+ u32 status;
if (__pcpu_sigp(pcpu->address, SIGP_SENSE,
0, &status) != SIGP_CC_STATUS_STORED)
@@ -403,6 +404,11 @@ int smp_find_processor_id(u16 address)
return -1;
}
+void schedule_mcck_handler(void)
+{
+ pcpu_ec_call(pcpu_devices + smp_processor_id(), ec_mcck_pending);
+}
+
bool notrace arch_vcpu_is_preempted(int cpu)
{
if (test_cpu_flag_of(CIF_ENABLED_WAIT, cpu))
@@ -497,6 +503,8 @@ static void smp_handle_ext_call(void)
scheduler_ipi();
if (test_bit(ec_call_function_single, &bits))
generic_smp_call_function_single_interrupt();
+ if (test_bit(ec_mcck_pending, &bits))
+ s390_handle_mcck();
}
static void do_ext_call_interrupt(struct ext_code ext_code,
@@ -1004,10 +1012,6 @@ void __init smp_prepare_boot_cpu(void)
smp_cpu_set_polarization(0, POLARIZATION_UNKNOWN);
}
-void __init smp_cpus_done(unsigned int max_cpus)
-{
-}
-
void __init smp_setup_processor_id(void)
{
pcpu_devices[0].address = stap();
@@ -1137,6 +1141,7 @@ static int smp_cpu_online(unsigned int cpu)
return sysfs_create_group(&s->kobj, &cpu_online_attr_group);
}
+
static int smp_cpu_pre_down(unsigned int cpu)
{
struct device *s = &per_cpu(cpu_device, cpu)->dev;
diff --git a/arch/s390/kernel/syscalls/syscall.tbl b/arch/s390/kernel/syscalls/syscall.tbl
index bd7bd3581a0f..10456bc936fb 100644
--- a/arch/s390/kernel/syscalls/syscall.tbl
+++ b/arch/s390/kernel/syscalls/syscall.tbl
@@ -138,7 +138,7 @@
146 common writev sys_writev compat_sys_writev
147 common getsid sys_getsid sys_getsid
148 common fdatasync sys_fdatasync sys_fdatasync
-149 common _sysctl sys_sysctl compat_sys_sysctl
+149 common _sysctl - -
150 common mlock sys_mlock sys_mlock
151 common munlock sys_munlock sys_munlock
152 common mlockall sys_mlockall sys_mlockall
@@ -372,8 +372,8 @@
362 common connect sys_connect sys_connect
363 common listen sys_listen sys_listen
364 common accept4 sys_accept4 sys_accept4
-365 common getsockopt sys_getsockopt compat_sys_getsockopt
-366 common setsockopt sys_setsockopt compat_sys_setsockopt
+365 common getsockopt sys_getsockopt sys_getsockopt
+366 common setsockopt sys_setsockopt sys_setsockopt
367 common getsockname sys_getsockname sys_getsockname
368 common getpeername sys_getpeername sys_getpeername
369 common sendto sys_sendto sys_sendto
@@ -438,5 +438,7 @@
433 common fspick sys_fspick sys_fspick
434 common pidfd_open sys_pidfd_open sys_pidfd_open
435 common clone3 sys_clone3 sys_clone3
+436 common close_range sys_close_range sys_close_range
437 common openat2 sys_openat2 sys_openat2
438 common pidfd_getfd sys_pidfd_getfd sys_pidfd_getfd
+439 common faccessat2 sys_faccessat2 sys_faccessat2
diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c
index f9d070d016e3..513e59d08a55 100644
--- a/arch/s390/kernel/time.c
+++ b/arch/s390/kernel/time.c
@@ -237,7 +237,7 @@ static u64 read_tod_clock(struct clocksource *cs)
preempt_disable(); /* protect from changes to steering parameters */
now = get_tod_clock();
adj = tod_steering_end - now;
- if (unlikely((s64) adj >= 0))
+ if (unlikely((s64) adj > 0))
/*
* manually steer by 1 cycle every 2^16 cycles. This
* corresponds to shifting the tod delta by 15. 1s is
@@ -253,7 +253,7 @@ static struct clocksource clocksource_tod = {
.name = "tod",
.rating = 400,
.read = read_tod_clock,
- .mask = -1ULL,
+ .mask = CLOCKSOURCE_MASK(64),
.mult = 1000,
.shift = 12,
.flags = CLOCK_SOURCE_IS_CONTINUOUS,
@@ -301,6 +301,7 @@ void update_vsyscall(struct timekeeper *tk)
vdso_data->tk_mult = tk->tkr_mono.mult;
vdso_data->tk_shift = tk->tkr_mono.shift;
+ vdso_data->hrtimer_res = hrtimer_resolution;
smp_wmb();
++vdso_data->tb_update_count;
}
@@ -668,7 +669,7 @@ static void stp_work_fn(struct work_struct *work)
* There is a usable clock but the synchonization failed.
* Retry after a second.
*/
- mod_timer(&stp_timer, jiffies + HZ);
+ mod_timer(&stp_timer, jiffies + msecs_to_jiffies(MSEC_PER_SEC));
out_unlock:
mutex_unlock(&stp_work_mutex);
@@ -682,7 +683,7 @@ static struct bus_type stp_subsys = {
.dev_name = "stp",
};
-static ssize_t stp_ctn_id_show(struct device *dev,
+static ssize_t ctn_id_show(struct device *dev,
struct device_attribute *attr,
char *buf)
{
@@ -692,9 +693,9 @@ static ssize_t stp_ctn_id_show(struct device *dev,
*(unsigned long long *) stp_info.ctnid);
}
-static DEVICE_ATTR(ctn_id, 0400, stp_ctn_id_show, NULL);
+static DEVICE_ATTR_RO(ctn_id);
-static ssize_t stp_ctn_type_show(struct device *dev,
+static ssize_t ctn_type_show(struct device *dev,
struct device_attribute *attr,
char *buf)
{
@@ -703,9 +704,9 @@ static ssize_t stp_ctn_type_show(struct device *dev,
return sprintf(buf, "%i\n", stp_info.ctn);
}
-static DEVICE_ATTR(ctn_type, 0400, stp_ctn_type_show, NULL);
+static DEVICE_ATTR_RO(ctn_type);
-static ssize_t stp_dst_offset_show(struct device *dev,
+static ssize_t dst_offset_show(struct device *dev,
struct device_attribute *attr,
char *buf)
{
@@ -714,9 +715,9 @@ static ssize_t stp_dst_offset_show(struct device *dev,
return sprintf(buf, "%i\n", (int)(s16) stp_info.dsto);
}
-static DEVICE_ATTR(dst_offset, 0400, stp_dst_offset_show, NULL);
+static DEVICE_ATTR_RO(dst_offset);
-static ssize_t stp_leap_seconds_show(struct device *dev,
+static ssize_t leap_seconds_show(struct device *dev,
struct device_attribute *attr,
char *buf)
{
@@ -725,9 +726,9 @@ static ssize_t stp_leap_seconds_show(struct device *dev,
return sprintf(buf, "%i\n", (int)(s16) stp_info.leaps);
}
-static DEVICE_ATTR(leap_seconds, 0400, stp_leap_seconds_show, NULL);
+static DEVICE_ATTR_RO(leap_seconds);
-static ssize_t stp_stratum_show(struct device *dev,
+static ssize_t stratum_show(struct device *dev,
struct device_attribute *attr,
char *buf)
{
@@ -736,9 +737,9 @@ static ssize_t stp_stratum_show(struct device *dev,
return sprintf(buf, "%i\n", (int)(s16) stp_info.stratum);
}
-static DEVICE_ATTR(stratum, 0400, stp_stratum_show, NULL);
+static DEVICE_ATTR_RO(stratum);
-static ssize_t stp_time_offset_show(struct device *dev,
+static ssize_t time_offset_show(struct device *dev,
struct device_attribute *attr,
char *buf)
{
@@ -747,9 +748,9 @@ static ssize_t stp_time_offset_show(struct device *dev,
return sprintf(buf, "%i\n", (int) stp_info.tto);
}
-static DEVICE_ATTR(time_offset, 0400, stp_time_offset_show, NULL);
+static DEVICE_ATTR_RO(time_offset);
-static ssize_t stp_time_zone_offset_show(struct device *dev,
+static ssize_t time_zone_offset_show(struct device *dev,
struct device_attribute *attr,
char *buf)
{
@@ -758,10 +759,9 @@ static ssize_t stp_time_zone_offset_show(struct device *dev,
return sprintf(buf, "%i\n", (int)(s16) stp_info.tzo);
}
-static DEVICE_ATTR(time_zone_offset, 0400,
- stp_time_zone_offset_show, NULL);
+static DEVICE_ATTR_RO(time_zone_offset);
-static ssize_t stp_timing_mode_show(struct device *dev,
+static ssize_t timing_mode_show(struct device *dev,
struct device_attribute *attr,
char *buf)
{
@@ -770,9 +770,9 @@ static ssize_t stp_timing_mode_show(struct device *dev,
return sprintf(buf, "%i\n", stp_info.tmd);
}
-static DEVICE_ATTR(timing_mode, 0400, stp_timing_mode_show, NULL);
+static DEVICE_ATTR_RO(timing_mode);
-static ssize_t stp_timing_state_show(struct device *dev,
+static ssize_t timing_state_show(struct device *dev,
struct device_attribute *attr,
char *buf)
{
@@ -781,16 +781,16 @@ static ssize_t stp_timing_state_show(struct device *dev,
return sprintf(buf, "%i\n", stp_info.tst);
}
-static DEVICE_ATTR(timing_state, 0400, stp_timing_state_show, NULL);
+static DEVICE_ATTR_RO(timing_state);
-static ssize_t stp_online_show(struct device *dev,
+static ssize_t online_show(struct device *dev,
struct device_attribute *attr,
char *buf)
{
return sprintf(buf, "%i\n", stp_online);
}
-static ssize_t stp_online_store(struct device *dev,
+static ssize_t online_store(struct device *dev,
struct device_attribute *attr,
const char *buf, size_t count)
{
@@ -816,18 +816,14 @@ static ssize_t stp_online_store(struct device *dev,
* Can't use DEVICE_ATTR because the attribute should be named
* stp/online but dev_attr_online already exists in this file ..
*/
-static struct device_attribute dev_attr_stp_online = {
- .attr = { .name = "online", .mode = 0600 },
- .show = stp_online_show,
- .store = stp_online_store,
-};
+static DEVICE_ATTR_RW(online);
static struct device_attribute *stp_attributes[] = {
&dev_attr_ctn_id,
&dev_attr_ctn_type,
&dev_attr_dst_offset,
&dev_attr_leap_seconds,
- &dev_attr_stp_online,
+ &dev_attr_online,
&dev_attr_stratum,
&dev_attr_time_offset,
&dev_attr_time_zone_offset,
diff --git a/arch/s390/kernel/topology.c b/arch/s390/kernel/topology.c
index 5f70cefc13e4..ca47141a5be9 100644
--- a/arch/s390/kernel/topology.c
+++ b/arch/s390/kernel/topology.c
@@ -356,9 +356,9 @@ static atomic_t topology_poll = ATOMIC_INIT(0);
static void set_topology_timer(void)
{
if (atomic_add_unless(&topology_poll, -1, 0))
- mod_timer(&topology_timer, jiffies + HZ / 10);
+ mod_timer(&topology_timer, jiffies + msecs_to_jiffies(100));
else
- mod_timer(&topology_timer, jiffies + HZ * 60);
+ mod_timer(&topology_timer, jiffies + msecs_to_jiffies(60 * MSEC_PER_SEC));
}
void topology_expect_change(void)
@@ -594,7 +594,7 @@ static int __init topology_setup(char *str)
early_param("topology", topology_setup);
static int topology_ctl_handler(struct ctl_table *ctl, int write,
- void __user *buffer, size_t *lenp, loff_t *ppos)
+ void *buffer, size_t *lenp, loff_t *ppos)
{
int enabled = topology_is_enabled();
int new_mode;
diff --git a/arch/s390/kernel/traps.c b/arch/s390/kernel/traps.c
index ff9cc4c3290e..8d1e8a1a97df 100644
--- a/arch/s390/kernel/traps.c
+++ b/arch/s390/kernel/traps.c
@@ -50,11 +50,8 @@ void do_report_trap(struct pt_regs *regs, int si_signo, int si_code, char *str)
} else {
const struct exception_table_entry *fixup;
fixup = s390_search_extables(regs->psw.addr);
- if (fixup)
- regs->psw.addr = extable_fixup(fixup);
- else {
+ if (!fixup || !ex_handle(fixup, regs))
die(regs, str);
- }
}
}
@@ -251,7 +248,7 @@ void monitor_event_exception(struct pt_regs *regs)
case BUG_TRAP_TYPE_NONE:
fixup = s390_search_extables(regs->psw.addr);
if (fixup)
- regs->psw.addr = extable_fixup(fixup);
+ ex_handle(fixup, regs);
break;
case BUG_TRAP_TYPE_WARN:
break;
diff --git a/arch/s390/kernel/uv.c b/arch/s390/kernel/uv.c
index 4c0677fc8904..c296e5c8dbf9 100644
--- a/arch/s390/kernel/uv.c
+++ b/arch/s390/kernel/uv.c
@@ -205,7 +205,7 @@ int gmap_make_secure(struct gmap *gmap, unsigned long gaddr, void *uvcb)
again:
rc = -EFAULT;
- down_read(&gmap->mm->mmap_sem);
+ mmap_read_lock(gmap->mm);
uaddr = __gmap_translate(gmap, gaddr);
if (IS_ERR_VALUE(uaddr))
@@ -234,7 +234,7 @@ again:
pte_unmap_unlock(ptep, ptelock);
unlock_page(page);
out:
- up_read(&gmap->mm->mmap_sem);
+ mmap_read_unlock(gmap->mm);
if (rc == -EAGAIN) {
wait_on_page_writeback(page);
@@ -331,7 +331,7 @@ EXPORT_SYMBOL_GPL(arch_make_page_accessible);
static ssize_t uv_query_facilities(struct kobject *kobj,
struct kobj_attribute *attr, char *page)
{
- return snprintf(page, PAGE_SIZE, "%lx\n%lx\n%lx\n%lx\n",
+ return scnprintf(page, PAGE_SIZE, "%lx\n%lx\n%lx\n%lx\n",
uv_info.inst_calls_list[0],
uv_info.inst_calls_list[1],
uv_info.inst_calls_list[2],
@@ -344,7 +344,7 @@ static struct kobj_attribute uv_query_facilities_attr =
static ssize_t uv_query_max_guest_cpus(struct kobject *kobj,
struct kobj_attribute *attr, char *page)
{
- return snprintf(page, PAGE_SIZE, "%d\n",
+ return scnprintf(page, PAGE_SIZE, "%d\n",
uv_info.max_guest_cpus);
}
@@ -354,7 +354,7 @@ static struct kobj_attribute uv_query_max_guest_cpus_attr =
static ssize_t uv_query_max_guest_vms(struct kobject *kobj,
struct kobj_attribute *attr, char *page)
{
- return snprintf(page, PAGE_SIZE, "%d\n",
+ return scnprintf(page, PAGE_SIZE, "%d\n",
uv_info.max_num_sec_conf);
}
@@ -364,7 +364,7 @@ static struct kobj_attribute uv_query_max_guest_vms_attr =
static ssize_t uv_query_max_guest_addr(struct kobject *kobj,
struct kobj_attribute *attr, char *page)
{
- return snprintf(page, PAGE_SIZE, "%lx\n",
+ return scnprintf(page, PAGE_SIZE, "%lx\n",
uv_info.max_sec_stor_addr);
}
diff --git a/arch/s390/kernel/vdso.c b/arch/s390/kernel/vdso.c
index bcc9bdb39ba2..c4baefaa6e34 100644
--- a/arch/s390/kernel/vdso.c
+++ b/arch/s390/kernel/vdso.c
@@ -21,7 +21,6 @@
#include <linux/memblock.h>
#include <linux/compat.h>
#include <asm/asm-offsets.h>
-#include <asm/pgtable.h>
#include <asm/processor.h>
#include <asm/mmu.h>
#include <asm/mmu_context.h>
@@ -208,7 +207,7 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
* it at vdso_base which is the "natural" base for it, but we might
* fail and end up putting it elsewhere.
*/
- if (down_write_killable(&mm->mmap_sem))
+ if (mmap_write_lock_killable(mm))
return -EINTR;
vdso_base = get_unmapped_area(NULL, 0, vdso_pages << PAGE_SHIFT, 0, 0);
if (IS_ERR_VALUE(vdso_base)) {
@@ -239,7 +238,7 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
rc = 0;
out_up:
- up_write(&mm->mmap_sem);
+ mmap_write_unlock(mm);
return rc;
}
diff --git a/arch/s390/kernel/vdso64/Makefile b/arch/s390/kernel/vdso64/Makefile
index bec19e7e6e1c..4a66a1cb919b 100644
--- a/arch/s390/kernel/vdso64/Makefile
+++ b/arch/s390/kernel/vdso64/Makefile
@@ -18,8 +18,8 @@ KBUILD_AFLAGS_64 += -m64 -s
KBUILD_CFLAGS_64 := $(filter-out -m64,$(KBUILD_CFLAGS))
KBUILD_CFLAGS_64 += -m64 -fPIC -shared -fno-common -fno-builtin
-KBUILD_CFLAGS_64 += -nostdlib -Wl,-soname=linux-vdso64.so.1 \
- -Wl,--hash-style=both
+ldflags-y := -fPIC -shared -nostdlib -soname=linux-vdso64.so.1 \
+ --hash-style=both --build-id -T
$(targets:%=$(obj)/%.dbg): KBUILD_CFLAGS = $(KBUILD_CFLAGS_64)
$(targets:%=$(obj)/%.dbg): KBUILD_AFLAGS = $(KBUILD_AFLAGS_64)
@@ -37,8 +37,8 @@ KASAN_SANITIZE := n
$(obj)/vdso64_wrapper.o : $(obj)/vdso64.so
# link rule for the .so file, .lds has to be first
-$(obj)/vdso64.so.dbg: $(src)/vdso64.lds $(obj-vdso64) FORCE
- $(call if_changed,vdso64ld)
+$(obj)/vdso64.so.dbg: $(obj)/vdso64.lds $(obj-vdso64) FORCE
+ $(call if_changed,ld)
# strip rule for the .so file
$(obj)/%.so: OBJCOPYFLAGS := -S
@@ -50,8 +50,6 @@ $(obj-vdso64): %.o: %.S FORCE
$(call if_changed_dep,vdso64as)
# actual build commands
-quiet_cmd_vdso64ld = VDSO64L $@
- cmd_vdso64ld = $(CC) $(c_flags) -Wl,-T $(filter %.lds %.o,$^) -o $@
quiet_cmd_vdso64as = VDSO64A $@
cmd_vdso64as = $(CC) $(a_flags) -c -o $@ $<
diff --git a/arch/s390/kernel/vdso64/clock_getres.S b/arch/s390/kernel/vdso64/clock_getres.S
index 081435398e0a..0c79caa32b59 100644
--- a/arch/s390/kernel/vdso64/clock_getres.S
+++ b/arch/s390/kernel/vdso64/clock_getres.S
@@ -17,12 +17,14 @@
.type __kernel_clock_getres,@function
__kernel_clock_getres:
CFI_STARTPROC
- larl %r1,4f
+ larl %r1,3f
+ lg %r0,0(%r1)
cghi %r2,__CLOCK_REALTIME_COARSE
je 0f
cghi %r2,__CLOCK_MONOTONIC_COARSE
je 0f
- larl %r1,3f
+ larl %r1,_vdso_data
+ llgf %r0,__VDSO_CLOCK_REALTIME_RES(%r1)
cghi %r2,__CLOCK_REALTIME
je 0f
cghi %r2,__CLOCK_MONOTONIC
@@ -36,7 +38,6 @@ __kernel_clock_getres:
jz 2f
0: ltgr %r3,%r3
jz 1f /* res == NULL */
- lg %r0,0(%r1)
xc 0(8,%r3),0(%r3) /* set tp->tv_sec to zero */
stg %r0,8(%r3) /* store tp->tv_usec */
1: lghi %r2,0
@@ -45,6 +46,5 @@ __kernel_clock_getres:
svc 0
br %r14
CFI_ENDPROC
-3: .quad __CLOCK_REALTIME_RES
-4: .quad __CLOCK_COARSE_RES
+3: .quad __CLOCK_COARSE_RES
.size __kernel_clock_getres,.-__kernel_clock_getres
diff --git a/arch/s390/kvm/Kconfig b/arch/s390/kvm/Kconfig
index def3b60f1fe8..67a8e770e369 100644
--- a/arch/s390/kvm/Kconfig
+++ b/arch/s390/kvm/Kconfig
@@ -7,7 +7,7 @@ source "virt/kvm/Kconfig"
menuconfig VIRTUALIZATION
def_bool y
prompt "KVM"
- ---help---
+ help
Say Y here to get to see options for using your Linux host to run other
operating systems inside virtual machines (guests).
This option alone does not add any kernel code.
@@ -33,7 +33,7 @@ config KVM
select HAVE_KVM_NO_POLL
select SRCU
select KVM_VFIO
- ---help---
+ help
Support hosting paravirtualized guest machines using the SIE
virtualization capability on the mainframe. This should work
on any 64bit machine.
@@ -49,7 +49,7 @@ config KVM
config KVM_S390_UCONTROL
bool "Userspace controlled virtual machines"
depends on KVM
- ---help---
+ help
Allow CAP_SYS_ADMIN users to create KVM virtual machines that are
controlled by userspace.
diff --git a/arch/s390/kvm/diag.c b/arch/s390/kvm/diag.c
index 563429dece03..5b8ec1c447e1 100644
--- a/arch/s390/kvm/diag.c
+++ b/arch/s390/kvm/diag.c
@@ -10,7 +10,6 @@
#include <linux/kvm.h>
#include <linux/kvm_host.h>
-#include <asm/pgalloc.h>
#include <asm/gmap.h>
#include <asm/virtio-ccw.h>
#include "kvm-s390.h"
diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c
index 47a67a958107..6d6b57059493 100644
--- a/arch/s390/kvm/gaccess.c
+++ b/arch/s390/kvm/gaccess.c
@@ -9,8 +9,8 @@
#include <linux/vmalloc.h>
#include <linux/mm_types.h>
#include <linux/err.h>
+#include <linux/pgtable.h>
-#include <asm/pgtable.h>
#include <asm/gmap.h>
#include "kvm-s390.h"
#include "gaccess.h"
@@ -1173,7 +1173,7 @@ int kvm_s390_shadow_fault(struct kvm_vcpu *vcpu, struct gmap *sg,
int dat_protection, fake;
int rc;
- down_read(&sg->mm->mmap_sem);
+ mmap_read_lock(sg->mm);
/*
* We don't want any guest-2 tables to change - so the parent
* tables/pointers we read stay valid - unshadowing is however
@@ -1202,6 +1202,6 @@ shadow_page:
if (!rc)
rc = gmap_shadow_page(sg, saddr, __pte(pte.val));
ipte_unlock(vcpu);
- up_read(&sg->mm->mmap_sem);
+ mmap_read_unlock(sg->mm);
return rc;
}
diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index bfb481134994..2f177298c663 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -2767,10 +2767,10 @@ static struct page *get_map_page(struct kvm *kvm, u64 uaddr)
{
struct page *page = NULL;
- down_read(&kvm->mm->mmap_sem);
- get_user_pages_remote(NULL, kvm->mm, uaddr, 1, FOLL_WRITE,
+ mmap_read_lock(kvm->mm);
+ get_user_pages_remote(kvm->mm, uaddr, 1, FOLL_WRITE,
&page, NULL, NULL);
- up_read(&kvm->mm->mmap_sem);
+ mmap_read_unlock(kvm->mm);
return page;
}
@@ -3082,7 +3082,7 @@ static enum hrtimer_restart gisa_vcpu_kicker(struct hrtimer *timer)
__airqs_kick_single_vcpu(kvm, pending_mask);
hrtimer_forward_now(timer, ns_to_ktime(gi->expires));
return HRTIMER_RESTART;
- };
+ }
return HRTIMER_NORESTART;
}
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index d05bb040fd42..6b74b92c1a58 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -31,11 +31,11 @@
#include <linux/bitmap.h>
#include <linux/sched/signal.h>
#include <linux/string.h>
+#include <linux/pgtable.h>
#include <asm/asm-offsets.h>
#include <asm/lowcore.h>
#include <asm/stp.h>
-#include <asm/pgtable.h>
#include <asm/gmap.h>
#include <asm/nmi.h>
#include <asm/switch_to.h>
@@ -57,110 +57,109 @@
#define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
(KVM_MAX_VCPUS + LOCAL_IRQS))
-#define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
-#define VM_STAT(x) offsetof(struct kvm, stat.x), KVM_STAT_VM
-
struct kvm_stats_debugfs_item debugfs_entries[] = {
- { "userspace_handled", VCPU_STAT(exit_userspace) },
- { "exit_null", VCPU_STAT(exit_null) },
- { "exit_validity", VCPU_STAT(exit_validity) },
- { "exit_stop_request", VCPU_STAT(exit_stop_request) },
- { "exit_external_request", VCPU_STAT(exit_external_request) },
- { "exit_io_request", VCPU_STAT(exit_io_request) },
- { "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) },
- { "exit_instruction", VCPU_STAT(exit_instruction) },
- { "exit_pei", VCPU_STAT(exit_pei) },
- { "exit_program_interruption", VCPU_STAT(exit_program_interruption) },
- { "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
- { "exit_operation_exception", VCPU_STAT(exit_operation_exception) },
- { "halt_successful_poll", VCPU_STAT(halt_successful_poll) },
- { "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) },
- { "halt_poll_invalid", VCPU_STAT(halt_poll_invalid) },
- { "halt_no_poll_steal", VCPU_STAT(halt_no_poll_steal) },
- { "halt_wakeup", VCPU_STAT(halt_wakeup) },
- { "instruction_lctlg", VCPU_STAT(instruction_lctlg) },
- { "instruction_lctl", VCPU_STAT(instruction_lctl) },
- { "instruction_stctl", VCPU_STAT(instruction_stctl) },
- { "instruction_stctg", VCPU_STAT(instruction_stctg) },
- { "deliver_ckc", VCPU_STAT(deliver_ckc) },
- { "deliver_cputm", VCPU_STAT(deliver_cputm) },
- { "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) },
- { "deliver_external_call", VCPU_STAT(deliver_external_call) },
- { "deliver_service_signal", VCPU_STAT(deliver_service_signal) },
- { "deliver_virtio", VCPU_STAT(deliver_virtio) },
- { "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) },
- { "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) },
- { "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) },
- { "deliver_program", VCPU_STAT(deliver_program) },
- { "deliver_io", VCPU_STAT(deliver_io) },
- { "deliver_machine_check", VCPU_STAT(deliver_machine_check) },
- { "exit_wait_state", VCPU_STAT(exit_wait_state) },
- { "inject_ckc", VCPU_STAT(inject_ckc) },
- { "inject_cputm", VCPU_STAT(inject_cputm) },
- { "inject_external_call", VCPU_STAT(inject_external_call) },
- { "inject_float_mchk", VM_STAT(inject_float_mchk) },
- { "inject_emergency_signal", VCPU_STAT(inject_emergency_signal) },
- { "inject_io", VM_STAT(inject_io) },
- { "inject_mchk", VCPU_STAT(inject_mchk) },
- { "inject_pfault_done", VM_STAT(inject_pfault_done) },
- { "inject_program", VCPU_STAT(inject_program) },
- { "inject_restart", VCPU_STAT(inject_restart) },
- { "inject_service_signal", VM_STAT(inject_service_signal) },
- { "inject_set_prefix", VCPU_STAT(inject_set_prefix) },
- { "inject_stop_signal", VCPU_STAT(inject_stop_signal) },
- { "inject_pfault_init", VCPU_STAT(inject_pfault_init) },
- { "inject_virtio", VM_STAT(inject_virtio) },
- { "instruction_epsw", VCPU_STAT(instruction_epsw) },
- { "instruction_gs", VCPU_STAT(instruction_gs) },
- { "instruction_io_other", VCPU_STAT(instruction_io_other) },
- { "instruction_lpsw", VCPU_STAT(instruction_lpsw) },
- { "instruction_lpswe", VCPU_STAT(instruction_lpswe) },
- { "instruction_pfmf", VCPU_STAT(instruction_pfmf) },
- { "instruction_ptff", VCPU_STAT(instruction_ptff) },
- { "instruction_stidp", VCPU_STAT(instruction_stidp) },
- { "instruction_sck", VCPU_STAT(instruction_sck) },
- { "instruction_sckpf", VCPU_STAT(instruction_sckpf) },
- { "instruction_spx", VCPU_STAT(instruction_spx) },
- { "instruction_stpx", VCPU_STAT(instruction_stpx) },
- { "instruction_stap", VCPU_STAT(instruction_stap) },
- { "instruction_iske", VCPU_STAT(instruction_iske) },
- { "instruction_ri", VCPU_STAT(instruction_ri) },
- { "instruction_rrbe", VCPU_STAT(instruction_rrbe) },
- { "instruction_sske", VCPU_STAT(instruction_sske) },
- { "instruction_ipte_interlock", VCPU_STAT(instruction_ipte_interlock) },
- { "instruction_essa", VCPU_STAT(instruction_essa) },
- { "instruction_stsi", VCPU_STAT(instruction_stsi) },
- { "instruction_stfl", VCPU_STAT(instruction_stfl) },
- { "instruction_tb", VCPU_STAT(instruction_tb) },
- { "instruction_tpi", VCPU_STAT(instruction_tpi) },
- { "instruction_tprot", VCPU_STAT(instruction_tprot) },
- { "instruction_tsch", VCPU_STAT(instruction_tsch) },
- { "instruction_sthyi", VCPU_STAT(instruction_sthyi) },
- { "instruction_sie", VCPU_STAT(instruction_sie) },
- { "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) },
- { "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) },
- { "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) },
- { "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) },
- { "instruction_sigp_cond_emergency", VCPU_STAT(instruction_sigp_cond_emergency) },
- { "instruction_sigp_start", VCPU_STAT(instruction_sigp_start) },
- { "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) },
- { "instruction_sigp_stop_store_status", VCPU_STAT(instruction_sigp_stop_store_status) },
- { "instruction_sigp_store_status", VCPU_STAT(instruction_sigp_store_status) },
- { "instruction_sigp_store_adtl_status", VCPU_STAT(instruction_sigp_store_adtl_status) },
- { "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) },
- { "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) },
- { "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) },
- { "instruction_sigp_cpu_reset", VCPU_STAT(instruction_sigp_cpu_reset) },
- { "instruction_sigp_init_cpu_reset", VCPU_STAT(instruction_sigp_init_cpu_reset) },
- { "instruction_sigp_unknown", VCPU_STAT(instruction_sigp_unknown) },
- { "instruction_diag_10", VCPU_STAT(diagnose_10) },
- { "instruction_diag_44", VCPU_STAT(diagnose_44) },
- { "instruction_diag_9c", VCPU_STAT(diagnose_9c) },
- { "diag_9c_ignored", VCPU_STAT(diagnose_9c_ignored) },
- { "instruction_diag_258", VCPU_STAT(diagnose_258) },
- { "instruction_diag_308", VCPU_STAT(diagnose_308) },
- { "instruction_diag_500", VCPU_STAT(diagnose_500) },
- { "instruction_diag_other", VCPU_STAT(diagnose_other) },
+ VCPU_STAT("userspace_handled", exit_userspace),
+ VCPU_STAT("exit_null", exit_null),
+ VCPU_STAT("exit_validity", exit_validity),
+ VCPU_STAT("exit_stop_request", exit_stop_request),
+ VCPU_STAT("exit_external_request", exit_external_request),
+ VCPU_STAT("exit_io_request", exit_io_request),
+ VCPU_STAT("exit_external_interrupt", exit_external_interrupt),
+ VCPU_STAT("exit_instruction", exit_instruction),
+ VCPU_STAT("exit_pei", exit_pei),
+ VCPU_STAT("exit_program_interruption", exit_program_interruption),
+ VCPU_STAT("exit_instr_and_program_int", exit_instr_and_program),
+ VCPU_STAT("exit_operation_exception", exit_operation_exception),
+ VCPU_STAT("halt_successful_poll", halt_successful_poll),
+ VCPU_STAT("halt_attempted_poll", halt_attempted_poll),
+ VCPU_STAT("halt_poll_invalid", halt_poll_invalid),
+ VCPU_STAT("halt_no_poll_steal", halt_no_poll_steal),
+ VCPU_STAT("halt_wakeup", halt_wakeup),
+ VCPU_STAT("halt_poll_success_ns", halt_poll_success_ns),
+ VCPU_STAT("halt_poll_fail_ns", halt_poll_fail_ns),
+ VCPU_STAT("instruction_lctlg", instruction_lctlg),
+ VCPU_STAT("instruction_lctl", instruction_lctl),
+ VCPU_STAT("instruction_stctl", instruction_stctl),
+ VCPU_STAT("instruction_stctg", instruction_stctg),
+ VCPU_STAT("deliver_ckc", deliver_ckc),
+ VCPU_STAT("deliver_cputm", deliver_cputm),
+ VCPU_STAT("deliver_emergency_signal", deliver_emergency_signal),
+ VCPU_STAT("deliver_external_call", deliver_external_call),
+ VCPU_STAT("deliver_service_signal", deliver_service_signal),
+ VCPU_STAT("deliver_virtio", deliver_virtio),
+ VCPU_STAT("deliver_stop_signal", deliver_stop_signal),
+ VCPU_STAT("deliver_prefix_signal", deliver_prefix_signal),
+ VCPU_STAT("deliver_restart_signal", deliver_restart_signal),
+ VCPU_STAT("deliver_program", deliver_program),
+ VCPU_STAT("deliver_io", deliver_io),
+ VCPU_STAT("deliver_machine_check", deliver_machine_check),
+ VCPU_STAT("exit_wait_state", exit_wait_state),
+ VCPU_STAT("inject_ckc", inject_ckc),
+ VCPU_STAT("inject_cputm", inject_cputm),
+ VCPU_STAT("inject_external_call", inject_external_call),
+ VM_STAT("inject_float_mchk", inject_float_mchk),
+ VCPU_STAT("inject_emergency_signal", inject_emergency_signal),
+ VM_STAT("inject_io", inject_io),
+ VCPU_STAT("inject_mchk", inject_mchk),
+ VM_STAT("inject_pfault_done", inject_pfault_done),
+ VCPU_STAT("inject_program", inject_program),
+ VCPU_STAT("inject_restart", inject_restart),
+ VM_STAT("inject_service_signal", inject_service_signal),
+ VCPU_STAT("inject_set_prefix", inject_set_prefix),
+ VCPU_STAT("inject_stop_signal", inject_stop_signal),
+ VCPU_STAT("inject_pfault_init", inject_pfault_init),
+ VM_STAT("inject_virtio", inject_virtio),
+ VCPU_STAT("instruction_epsw", instruction_epsw),
+ VCPU_STAT("instruction_gs", instruction_gs),
+ VCPU_STAT("instruction_io_other", instruction_io_other),
+ VCPU_STAT("instruction_lpsw", instruction_lpsw),
+ VCPU_STAT("instruction_lpswe", instruction_lpswe),
+ VCPU_STAT("instruction_pfmf", instruction_pfmf),
+ VCPU_STAT("instruction_ptff", instruction_ptff),
+ VCPU_STAT("instruction_stidp", instruction_stidp),
+ VCPU_STAT("instruction_sck", instruction_sck),
+ VCPU_STAT("instruction_sckpf", instruction_sckpf),
+ VCPU_STAT("instruction_spx", instruction_spx),
+ VCPU_STAT("instruction_stpx", instruction_stpx),
+ VCPU_STAT("instruction_stap", instruction_stap),
+ VCPU_STAT("instruction_iske", instruction_iske),
+ VCPU_STAT("instruction_ri", instruction_ri),
+ VCPU_STAT("instruction_rrbe", instruction_rrbe),
+ VCPU_STAT("instruction_sske", instruction_sske),
+ VCPU_STAT("instruction_ipte_interlock", instruction_ipte_interlock),
+ VCPU_STAT("instruction_essa", instruction_essa),
+ VCPU_STAT("instruction_stsi", instruction_stsi),
+ VCPU_STAT("instruction_stfl", instruction_stfl),
+ VCPU_STAT("instruction_tb", instruction_tb),
+ VCPU_STAT("instruction_tpi", instruction_tpi),
+ VCPU_STAT("instruction_tprot", instruction_tprot),
+ VCPU_STAT("instruction_tsch", instruction_tsch),
+ VCPU_STAT("instruction_sthyi", instruction_sthyi),
+ VCPU_STAT("instruction_sie", instruction_sie),
+ VCPU_STAT("instruction_sigp_sense", instruction_sigp_sense),
+ VCPU_STAT("instruction_sigp_sense_running", instruction_sigp_sense_running),
+ VCPU_STAT("instruction_sigp_external_call", instruction_sigp_external_call),
+ VCPU_STAT("instruction_sigp_emergency", instruction_sigp_emergency),
+ VCPU_STAT("instruction_sigp_cond_emergency", instruction_sigp_cond_emergency),
+ VCPU_STAT("instruction_sigp_start", instruction_sigp_start),
+ VCPU_STAT("instruction_sigp_stop", instruction_sigp_stop),
+ VCPU_STAT("instruction_sigp_stop_store_status", instruction_sigp_stop_store_status),
+ VCPU_STAT("instruction_sigp_store_status", instruction_sigp_store_status),
+ VCPU_STAT("instruction_sigp_store_adtl_status", instruction_sigp_store_adtl_status),
+ VCPU_STAT("instruction_sigp_set_arch", instruction_sigp_arch),
+ VCPU_STAT("instruction_sigp_set_prefix", instruction_sigp_prefix),
+ VCPU_STAT("instruction_sigp_restart", instruction_sigp_restart),
+ VCPU_STAT("instruction_sigp_cpu_reset", instruction_sigp_cpu_reset),
+ VCPU_STAT("instruction_sigp_init_cpu_reset", instruction_sigp_init_cpu_reset),
+ VCPU_STAT("instruction_sigp_unknown", instruction_sigp_unknown),
+ VCPU_STAT("instruction_diag_10", diagnose_10),
+ VCPU_STAT("instruction_diag_44", diagnose_44),
+ VCPU_STAT("instruction_diag_9c", diagnose_9c),
+ VCPU_STAT("diag_9c_ignored", diagnose_9c_ignored),
+ VCPU_STAT("instruction_diag_258", diagnose_258),
+ VCPU_STAT("instruction_diag_308", diagnose_308),
+ VCPU_STAT("instruction_diag_500", diagnose_500),
+ VCPU_STAT("instruction_diag_other", diagnose_other),
{ NULL }
};
@@ -546,6 +545,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
case KVM_CAP_S390_AIS_MIGRATION:
case KVM_CAP_S390_VCPU_RESETS:
case KVM_CAP_SET_GUEST_DEBUG:
+ case KVM_CAP_S390_DIAG318:
r = 1;
break;
case KVM_CAP_S390_HPAGE_1M:
@@ -764,9 +764,9 @@ int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
r = -EINVAL;
else {
r = 0;
- down_write(&kvm->mm->mmap_sem);
+ mmap_write_lock(kvm->mm);
kvm->mm->context.allow_gmap_hpage_1m = 1;
- up_write(&kvm->mm->mmap_sem);
+ mmap_write_unlock(kvm->mm);
/*
* We might have to create fake 4k page
* tables. To avoid that the hardware works on
@@ -1816,7 +1816,7 @@ static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
if (!keys)
return -ENOMEM;
- down_read(&current->mm->mmap_sem);
+ mmap_read_lock(current->mm);
srcu_idx = srcu_read_lock(&kvm->srcu);
for (i = 0; i < args->count; i++) {
hva = gfn_to_hva(kvm, args->start_gfn + i);
@@ -1830,7 +1830,7 @@ static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
break;
}
srcu_read_unlock(&kvm->srcu, srcu_idx);
- up_read(&current->mm->mmap_sem);
+ mmap_read_unlock(current->mm);
if (!r) {
r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
@@ -1874,7 +1874,7 @@ static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
goto out;
i = 0;
- down_read(&current->mm->mmap_sem);
+ mmap_read_lock(current->mm);
srcu_idx = srcu_read_lock(&kvm->srcu);
while (i < args->count) {
unlocked = false;
@@ -1892,7 +1892,7 @@ static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
r = set_guest_storage_key(current->mm, hva, keys[i], 0);
if (r) {
- r = fixup_user_fault(current, current->mm, hva,
+ r = fixup_user_fault(current->mm, hva,
FAULT_FLAG_WRITE, &unlocked);
if (r)
break;
@@ -1901,7 +1901,7 @@ static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
i++;
}
srcu_read_unlock(&kvm->srcu, srcu_idx);
- up_read(&current->mm->mmap_sem);
+ mmap_read_unlock(current->mm);
out:
kvfree(keys);
return r;
@@ -2090,14 +2090,14 @@ static int kvm_s390_get_cmma_bits(struct kvm *kvm,
if (!values)
return -ENOMEM;
- down_read(&kvm->mm->mmap_sem);
+ mmap_read_lock(kvm->mm);
srcu_idx = srcu_read_lock(&kvm->srcu);
if (peek)
ret = kvm_s390_peek_cmma(kvm, args, values, bufsize);
else
ret = kvm_s390_get_cmma(kvm, args, values, bufsize);
srcu_read_unlock(&kvm->srcu, srcu_idx);
- up_read(&kvm->mm->mmap_sem);
+ mmap_read_unlock(kvm->mm);
if (kvm->arch.migration_mode)
args->remaining = atomic64_read(&kvm->arch.cmma_dirty_pages);
@@ -2147,7 +2147,7 @@ static int kvm_s390_set_cmma_bits(struct kvm *kvm,
goto out;
}
- down_read(&kvm->mm->mmap_sem);
+ mmap_read_lock(kvm->mm);
srcu_idx = srcu_read_lock(&kvm->srcu);
for (i = 0; i < args->count; i++) {
hva = gfn_to_hva(kvm, args->start_gfn + i);
@@ -2162,12 +2162,12 @@ static int kvm_s390_set_cmma_bits(struct kvm *kvm,
set_pgste_bits(kvm->mm, hva, mask, pgstev);
}
srcu_read_unlock(&kvm->srcu, srcu_idx);
- up_read(&kvm->mm->mmap_sem);
+ mmap_read_unlock(kvm->mm);
if (!kvm->mm->context.uses_cmm) {
- down_write(&kvm->mm->mmap_sem);
+ mmap_write_lock(kvm->mm);
kvm->mm->context.uses_cmm = 1;
- up_write(&kvm->mm->mmap_sem);
+ mmap_write_unlock(kvm->mm);
}
out:
vfree(bits);
@@ -2240,9 +2240,9 @@ static int kvm_s390_handle_pv(struct kvm *kvm, struct kvm_pv_cmd *cmd)
if (r)
break;
- down_write(&current->mm->mmap_sem);
+ mmap_write_lock(current->mm);
r = gmap_mark_unmergeable();
- up_write(&current->mm->mmap_sem);
+ mmap_write_unlock(current->mm);
if (r)
break;
@@ -3268,7 +3268,8 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
KVM_SYNC_ACRS |
KVM_SYNC_CRS |
KVM_SYNC_ARCH0 |
- KVM_SYNC_PFAULT;
+ KVM_SYNC_PFAULT |
+ KVM_SYNC_DIAG318;
kvm_s390_set_prefix(vcpu, 0);
if (test_kvm_facility(vcpu->kvm, 64))
vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
@@ -3563,6 +3564,7 @@ static void kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
vcpu->arch.sie_block->pp = 0;
vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
vcpu->arch.sie_block->todpr = 0;
+ vcpu->arch.sie_block->cpnc = 0;
}
}
@@ -3580,6 +3582,7 @@ static void kvm_arch_vcpu_ioctl_clear_reset(struct kvm_vcpu *vcpu)
regs->etoken = 0;
regs->etoken_extension = 0;
+ regs->diag318 = 0;
}
int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
@@ -3924,11 +3927,13 @@ static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
}
}
-void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
+bool kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
struct kvm_async_pf *work)
{
trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
__kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
+
+ return true;
}
void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
@@ -3944,7 +3949,7 @@ void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
/* s390 will always inject the page directly */
}
-bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
+bool kvm_arch_can_dequeue_async_page_present(struct kvm_vcpu *vcpu)
{
/*
* s390 will always inject the page directly,
@@ -3953,33 +3958,31 @@ bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
return true;
}
-static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
+static bool kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
{
hva_t hva;
struct kvm_arch_async_pf arch;
- int rc;
if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
- return 0;
+ return false;
if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
vcpu->arch.pfault_compare)
- return 0;
+ return false;
if (psw_extint_disabled(vcpu))
- return 0;
+ return false;
if (kvm_s390_vcpu_has_irq(vcpu, 0))
- return 0;
+ return false;
if (!(vcpu->arch.sie_block->gcr[0] & CR0_SERVICE_SIGNAL_SUBMASK))
- return 0;
+ return false;
if (!vcpu->arch.gmap->pfault_enabled)
- return 0;
+ return false;
hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
hva += current->thread.gmap_addr & ~PAGE_MASK;
if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
- return 0;
+ return false;
- rc = kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
- return rc;
+ return kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
}
static int vcpu_pre_run(struct kvm_vcpu *vcpu)
@@ -3999,9 +4002,6 @@ static int vcpu_pre_run(struct kvm_vcpu *vcpu)
if (need_resched())
schedule();
- if (test_cpu_flag(CIF_MCCK_PENDING))
- s390_handle_mcck();
-
if (!kvm_is_ucontrol(vcpu->kvm)) {
rc = kvm_s390_deliver_pending_interrupts(vcpu);
if (rc)
@@ -4177,8 +4177,9 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
return rc;
}
-static void sync_regs_fmt2(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
+static void sync_regs_fmt2(struct kvm_vcpu *vcpu)
{
+ struct kvm_run *kvm_run = vcpu->run;
struct runtime_instr_cb *riccb;
struct gs_cb *gscb;
@@ -4198,6 +4199,10 @@ static void sync_regs_fmt2(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
kvm_clear_async_pf_completion_queue(vcpu);
}
+ if (kvm_run->kvm_dirty_regs & KVM_SYNC_DIAG318) {
+ vcpu->arch.diag318_info.val = kvm_run->s.regs.diag318;
+ vcpu->arch.sie_block->cpnc = vcpu->arch.diag318_info.cpnc;
+ }
/*
* If userspace sets the riccb (e.g. after migration) to a valid state,
* we should enable RI here instead of doing the lazy enablement.
@@ -4244,8 +4249,10 @@ static void sync_regs_fmt2(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
/* SIE will load etoken directly from SDNX and therefore kvm_run */
}
-static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
+static void sync_regs(struct kvm_vcpu *vcpu)
{
+ struct kvm_run *kvm_run = vcpu->run;
+
if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
@@ -4274,7 +4281,7 @@ static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
/* Sync fmt2 only data */
if (likely(!kvm_s390_pv_cpu_is_protected(vcpu))) {
- sync_regs_fmt2(vcpu, kvm_run);
+ sync_regs_fmt2(vcpu);
} else {
/*
* In several places we have to modify our internal view to
@@ -4293,12 +4300,15 @@ static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
kvm_run->kvm_dirty_regs = 0;
}
-static void store_regs_fmt2(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
+static void store_regs_fmt2(struct kvm_vcpu *vcpu)
{
+ struct kvm_run *kvm_run = vcpu->run;
+
kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
kvm_run->s.regs.bpbc = (vcpu->arch.sie_block->fpf & FPF_BPBC) == FPF_BPBC;
+ kvm_run->s.regs.diag318 = vcpu->arch.diag318_info.val;
if (MACHINE_HAS_GS) {
__ctl_set_bit(2, 4);
if (vcpu->arch.gs_enabled)
@@ -4314,8 +4324,10 @@ static void store_regs_fmt2(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
/* SIE will save etoken directly into SDNX and therefore kvm_run */
}
-static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
+static void store_regs(struct kvm_vcpu *vcpu)
{
+ struct kvm_run *kvm_run = vcpu->run;
+
kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
@@ -4334,11 +4346,12 @@ static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
if (likely(!kvm_s390_pv_cpu_is_protected(vcpu)))
- store_regs_fmt2(vcpu, kvm_run);
+ store_regs_fmt2(vcpu);
}
-int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
+int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
{
+ struct kvm_run *kvm_run = vcpu->run;
int rc;
if (kvm_run->immediate_exit)
@@ -4371,7 +4384,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
goto out;
}
- sync_regs(vcpu, kvm_run);
+ sync_regs(vcpu);
enable_cpu_timer_accounting(vcpu);
might_fault();
@@ -4393,7 +4406,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
}
disable_cpu_timer_accounting(vcpu);
- store_regs(vcpu, kvm_run);
+ store_regs(vcpu);
kvm_sigset_deactivate(vcpu);
diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c
index 893893642415..cd74989ce0b0 100644
--- a/arch/s390/kvm/priv.c
+++ b/arch/s390/kvm/priv.c
@@ -13,6 +13,7 @@
#include <linux/errno.h>
#include <linux/compat.h>
#include <linux/mm_types.h>
+#include <linux/pgtable.h>
#include <asm/asm-offsets.h>
#include <asm/facility.h>
@@ -20,9 +21,7 @@
#include <asm/debug.h>
#include <asm/ebcdic.h>
#include <asm/sysinfo.h>
-#include <asm/pgtable.h>
#include <asm/page-states.h>
-#include <asm/pgalloc.h>
#include <asm/gmap.h>
#include <asm/io.h>
#include <asm/ptrace.h>
@@ -270,18 +269,18 @@ static int handle_iske(struct kvm_vcpu *vcpu)
return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
retry:
unlocked = false;
- down_read(&current->mm->mmap_sem);
+ mmap_read_lock(current->mm);
rc = get_guest_storage_key(current->mm, vmaddr, &key);
if (rc) {
- rc = fixup_user_fault(current, current->mm, vmaddr,
+ rc = fixup_user_fault(current->mm, vmaddr,
FAULT_FLAG_WRITE, &unlocked);
if (!rc) {
- up_read(&current->mm->mmap_sem);
+ mmap_read_unlock(current->mm);
goto retry;
}
}
- up_read(&current->mm->mmap_sem);
+ mmap_read_unlock(current->mm);
if (rc == -EFAULT)
return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
if (rc < 0)
@@ -317,17 +316,17 @@ static int handle_rrbe(struct kvm_vcpu *vcpu)
return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
retry:
unlocked = false;
- down_read(&current->mm->mmap_sem);
+ mmap_read_lock(current->mm);
rc = reset_guest_reference_bit(current->mm, vmaddr);
if (rc < 0) {
- rc = fixup_user_fault(current, current->mm, vmaddr,
+ rc = fixup_user_fault(current->mm, vmaddr,
FAULT_FLAG_WRITE, &unlocked);
if (!rc) {
- up_read(&current->mm->mmap_sem);
+ mmap_read_unlock(current->mm);
goto retry;
}
}
- up_read(&current->mm->mmap_sem);
+ mmap_read_unlock(current->mm);
if (rc == -EFAULT)
return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
if (rc < 0)
@@ -385,17 +384,17 @@ static int handle_sske(struct kvm_vcpu *vcpu)
if (kvm_is_error_hva(vmaddr))
return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
- down_read(&current->mm->mmap_sem);
+ mmap_read_lock(current->mm);
rc = cond_set_guest_storage_key(current->mm, vmaddr, key, &oldkey,
m3 & SSKE_NQ, m3 & SSKE_MR,
m3 & SSKE_MC);
if (rc < 0) {
- rc = fixup_user_fault(current, current->mm, vmaddr,
+ rc = fixup_user_fault(current->mm, vmaddr,
FAULT_FLAG_WRITE, &unlocked);
rc = !rc ? -EAGAIN : rc;
}
- up_read(&current->mm->mmap_sem);
+ mmap_read_unlock(current->mm);
if (rc == -EFAULT)
return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
if (rc < 0)
@@ -1091,15 +1090,15 @@ static int handle_pfmf(struct kvm_vcpu *vcpu)
if (rc)
return rc;
- down_read(&current->mm->mmap_sem);
+ mmap_read_lock(current->mm);
rc = cond_set_guest_storage_key(current->mm, vmaddr,
key, NULL, nq, mr, mc);
if (rc < 0) {
- rc = fixup_user_fault(current, current->mm, vmaddr,
+ rc = fixup_user_fault(current->mm, vmaddr,
FAULT_FLAG_WRITE, &unlocked);
rc = !rc ? -EAGAIN : rc;
}
- up_read(&current->mm->mmap_sem);
+ mmap_read_unlock(current->mm);
if (rc == -EFAULT)
return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
if (rc == -EAGAIN)
@@ -1122,7 +1121,7 @@ static int handle_pfmf(struct kvm_vcpu *vcpu)
}
/*
- * Must be called with relevant read locks held (kvm->mm->mmap_sem, kvm->srcu)
+ * Must be called with relevant read locks held (kvm->mm->mmap_lock, kvm->srcu)
*/
static inline int __do_essa(struct kvm_vcpu *vcpu, const int orc)
{
@@ -1220,9 +1219,9 @@ static int handle_essa(struct kvm_vcpu *vcpu)
* already correct, we do nothing and avoid the lock.
*/
if (vcpu->kvm->mm->context.uses_cmm == 0) {
- down_write(&vcpu->kvm->mm->mmap_sem);
+ mmap_write_lock(vcpu->kvm->mm);
vcpu->kvm->mm->context.uses_cmm = 1;
- up_write(&vcpu->kvm->mm->mmap_sem);
+ mmap_write_unlock(vcpu->kvm->mm);
}
/*
* If we are here, we are supposed to have CMMA enabled in
@@ -1239,11 +1238,11 @@ static int handle_essa(struct kvm_vcpu *vcpu)
} else {
int srcu_idx;
- down_read(&vcpu->kvm->mm->mmap_sem);
+ mmap_read_lock(vcpu->kvm->mm);
srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
i = __do_essa(vcpu, orc);
srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
- up_read(&vcpu->kvm->mm->mmap_sem);
+ mmap_read_unlock(vcpu->kvm->mm);
if (i < 0)
return i;
/* Account for the possible extra cbrl entry */
@@ -1251,10 +1250,10 @@ static int handle_essa(struct kvm_vcpu *vcpu)
}
vcpu->arch.sie_block->cbrlo &= PAGE_MASK; /* reset nceo */
cbrlo = phys_to_virt(vcpu->arch.sie_block->cbrlo);
- down_read(&gmap->mm->mmap_sem);
+ mmap_read_lock(gmap->mm);
for (i = 0; i < entries; ++i)
__gmap_zap(gmap, cbrlo[i]);
- up_read(&gmap->mm->mmap_sem);
+ mmap_read_unlock(gmap->mm);
return 0;
}
diff --git a/arch/s390/kvm/pv.c b/arch/s390/kvm/pv.c
index 63e330109b63..eb99e2f95ebe 100644
--- a/arch/s390/kvm/pv.c
+++ b/arch/s390/kvm/pv.c
@@ -9,7 +9,6 @@
#include <linux/kvm_host.h>
#include <linux/pagemap.h>
#include <linux/sched/signal.h>
-#include <asm/pgalloc.h>
#include <asm/gmap.h>
#include <asm/uv.h>
#include <asm/mman.h>
diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c
index 4f6c22d72072..4f3cbf6003a9 100644
--- a/arch/s390/kvm/vsie.c
+++ b/arch/s390/kvm/vsie.c
@@ -548,6 +548,7 @@ static int shadow_scb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
scb_s->ecd |= scb_o->ecd & ECD_ETOKENF;
scb_s->hpid = HPID_VSIE;
+ scb_s->cpnc = scb_o->cpnc;
prepare_ibc(vcpu, vsie_page);
rc = shadow_crycb(vcpu, vsie_page);
@@ -1000,11 +1001,6 @@ static int do_vsie_run(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
handle_last_fault(vcpu, vsie_page);
- if (need_resched())
- schedule();
- if (test_cpu_flag(CIF_MCCK_PENDING))
- s390_handle_mcck();
-
srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
/* save current guest state of bp isolation override */
@@ -1185,6 +1181,7 @@ static int vsie_run(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
kvm_s390_vcpu_has_irq(vcpu, 0) ||
kvm_s390_vcpu_sie_inhibited(vcpu))
break;
+ cond_resched();
}
if (rc == -EFAULT) {
diff --git a/arch/s390/lib/Makefile b/arch/s390/lib/Makefile
index 28fd66d558ff..678333936f78 100644
--- a/arch/s390/lib/Makefile
+++ b/arch/s390/lib/Makefile
@@ -14,3 +14,5 @@ KASAN_SANITIZE_uaccess.o := n
obj-$(CONFIG_S390_UNWIND_SELFTEST) += test_unwind.o
CFLAGS_test_unwind.o += -fno-optimize-sibling-calls
+
+lib-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o
diff --git a/arch/s390/lib/delay.c b/arch/s390/lib/delay.c
index d4aa10795605..daca7bad66de 100644
--- a/arch/s390/lib/delay.c
+++ b/arch/s390/lib/delay.c
@@ -33,7 +33,7 @@ EXPORT_SYMBOL(__delay);
static void __udelay_disabled(unsigned long long usecs)
{
- unsigned long cr0, cr0_new, psw_mask;
+ unsigned long cr0, cr0_new, psw_mask, flags;
struct s390_idle_data idle;
u64 end;
@@ -45,7 +45,9 @@ static void __udelay_disabled(unsigned long long usecs)
psw_mask = __extract_psw() | PSW_MASK_EXT | PSW_MASK_WAIT;
set_clock_comparator(end);
set_cpu_flag(CIF_IGNORE_IRQ);
+ local_irq_save(flags);
psw_idle(&idle, psw_mask);
+ local_irq_restore(flags);
clear_cpu_flag(CIF_IGNORE_IRQ);
set_clock_comparator(S390_lowcore.clock_comparator);
__ctl_load(cr0, 0, 0);
diff --git a/arch/s390/lib/error-inject.c b/arch/s390/lib/error-inject.c
new file mode 100644
index 000000000000..8c9d4da87eef
--- /dev/null
+++ b/arch/s390/lib/error-inject.c
@@ -0,0 +1,14 @@
+// SPDX-License-Identifier: GPL-2.0+
+#include <asm/ptrace.h>
+#include <linux/error-injection.h>
+#include <linux/kprobes.h>
+
+void override_function_with_return(struct pt_regs *regs)
+{
+ /*
+ * Emulate 'br 14'. 'regs' is captured by kprobes on entry to some
+ * kernel function.
+ */
+ regs->psw.addr = regs->gprs[14];
+}
+NOKPROBE_SYMBOL(override_function_with_return);
diff --git a/arch/s390/lib/test_unwind.c b/arch/s390/lib/test_unwind.c
index 32b7a30b2485..7c988994931f 100644
--- a/arch/s390/lib/test_unwind.c
+++ b/arch/s390/lib/test_unwind.c
@@ -9,6 +9,7 @@
#include <linux/kallsyms.h>
#include <linux/kthread.h>
#include <linux/module.h>
+#include <linux/slab.h>
#include <linux/string.h>
#include <linux/kprobes.h>
#include <linux/wait.h>
@@ -63,6 +64,7 @@ static noinline int test_unwind(struct task_struct *task, struct pt_regs *regs,
break;
if (state.reliable && !addr) {
pr_err("unwind state reliable but addr is 0\n");
+ kfree(bt);
return -EINVAL;
}
sprint_symbol(sym, addr);
diff --git a/arch/s390/mm/cmm.c b/arch/s390/mm/cmm.c
index ae989b740376..1141c8d5c0d0 100644
--- a/arch/s390/mm/cmm.c
+++ b/arch/s390/mm/cmm.c
@@ -21,7 +21,6 @@
#include <linux/oom.h>
#include <linux/uaccess.h>
-#include <asm/pgalloc.h>
#include <asm/diag.h>
#ifdef CONFIG_CMM_IUCV
@@ -189,7 +188,7 @@ static void cmm_set_timer(void)
del_timer(&cmm_timer);
return;
}
- mod_timer(&cmm_timer, jiffies + cmm_timeout_seconds * HZ);
+ mod_timer(&cmm_timer, jiffies + msecs_to_jiffies(cmm_timeout_seconds * MSEC_PER_SEC));
}
static void cmm_timer_fn(struct timer_list *unused)
@@ -245,7 +244,7 @@ static int cmm_skip_blanks(char *cp, char **endp)
}
static int cmm_pages_handler(struct ctl_table *ctl, int write,
- void __user *buffer, size_t *lenp, loff_t *ppos)
+ void *buffer, size_t *lenp, loff_t *ppos)
{
long nr = cmm_get_pages();
struct ctl_table ctl_entry = {
@@ -264,7 +263,7 @@ static int cmm_pages_handler(struct ctl_table *ctl, int write,
}
static int cmm_timed_pages_handler(struct ctl_table *ctl, int write,
- void __user *buffer, size_t *lenp,
+ void *buffer, size_t *lenp,
loff_t *ppos)
{
long nr = cmm_get_timed_pages();
@@ -284,7 +283,7 @@ static int cmm_timed_pages_handler(struct ctl_table *ctl, int write,
}
static int cmm_timeout_handler(struct ctl_table *ctl, int write,
- void __user *buffer, size_t *lenp, loff_t *ppos)
+ void *buffer, size_t *lenp, loff_t *ppos)
{
char buf[64], *p;
long nr, seconds;
@@ -297,8 +296,7 @@ static int cmm_timeout_handler(struct ctl_table *ctl, int write,
if (write) {
len = min(*lenp, sizeof(buf));
- if (copy_from_user(buf, buffer, len))
- return -EFAULT;
+ memcpy(buf, buffer, len);
buf[len - 1] = '\0';
cmm_skip_blanks(buf, &p);
nr = simple_strtoul(p, &p, 0);
@@ -311,8 +309,7 @@ static int cmm_timeout_handler(struct ctl_table *ctl, int write,
cmm_timeout_pages, cmm_timeout_seconds);
if (len > *lenp)
len = *lenp;
- if (copy_to_user(buffer, buf, len))
- return -EFAULT;
+ memcpy(buffer, buf, len);
*lenp = len;
*ppos += len;
}
diff --git a/arch/s390/mm/dump_pagetables.c b/arch/s390/mm/dump_pagetables.c
index 5d67b81c704a..c2ac9b8ae612 100644
--- a/arch/s390/mm/dump_pagetables.c
+++ b/arch/s390/mm/dump_pagetables.c
@@ -6,7 +6,6 @@
#include <linux/kasan.h>
#include <asm/kasan.h>
#include <asm/sections.h>
-#include <asm/pgtable.h>
static unsigned long max_addr;
diff --git a/arch/s390/mm/extmem.c b/arch/s390/mm/extmem.c
index fd0dae9d10f4..5060956b8e7d 100644
--- a/arch/s390/mm/extmem.c
+++ b/arch/s390/mm/extmem.c
@@ -20,9 +20,9 @@
#include <linux/ctype.h>
#include <linux/ioport.h>
#include <linux/refcount.h>
+#include <linux/pgtable.h>
#include <asm/diag.h>
#include <asm/page.h>
-#include <asm/pgtable.h>
#include <asm/ebcdic.h>
#include <asm/errno.h>
#include <asm/extmem.h>
@@ -313,15 +313,10 @@ __segment_load (char *name, int do_nonshared, unsigned long *addr, unsigned long
goto out_free;
}
- rc = vmem_add_mapping(seg->start_addr, seg->end - seg->start_addr + 1);
-
- if (rc)
- goto out_free;
-
seg->res = kzalloc(sizeof(struct resource), GFP_KERNEL);
if (seg->res == NULL) {
rc = -ENOMEM;
- goto out_shared;
+ goto out_free;
}
seg->res->flags = IORESOURCE_BUSY | IORESOURCE_MEM;
seg->res->start = seg->start_addr;
@@ -335,12 +330,17 @@ __segment_load (char *name, int do_nonshared, unsigned long *addr, unsigned long
if (rc == SEG_TYPE_SC ||
((rc == SEG_TYPE_SR || rc == SEG_TYPE_ER) && !do_nonshared))
seg->res->flags |= IORESOURCE_READONLY;
+
+ /* Check for overlapping resources before adding the mapping. */
if (request_resource(&iomem_resource, seg->res)) {
rc = -EBUSY;
- kfree(seg->res);
- goto out_shared;
+ goto out_free_resource;
}
+ rc = vmem_add_mapping(seg->start_addr, seg->end - seg->start_addr + 1);
+ if (rc)
+ goto out_resource;
+
if (do_nonshared)
diag_cc = dcss_diag(&loadnsr_scode, seg->dcss_name,
&start_addr, &end_addr);
@@ -351,14 +351,14 @@ __segment_load (char *name, int do_nonshared, unsigned long *addr, unsigned long
dcss_diag(&purgeseg_scode, seg->dcss_name,
&dummy, &dummy);
rc = diag_cc;
- goto out_resource;
+ goto out_mapping;
}
if (diag_cc > 1) {
pr_warn("Loading DCSS %s failed with rc=%ld\n", name, end_addr);
rc = dcss_diag_translate_rc(end_addr);
dcss_diag(&purgeseg_scode, seg->dcss_name,
&dummy, &dummy);
- goto out_resource;
+ goto out_mapping;
}
seg->start_addr = start_addr;
seg->end = end_addr;
@@ -377,11 +377,12 @@ __segment_load (char *name, int do_nonshared, unsigned long *addr, unsigned long
(void*) seg->end, segtype_string[seg->vm_segtype]);
}
goto out;
+ out_mapping:
+ vmem_remove_mapping(seg->start_addr, seg->end - seg->start_addr + 1);
out_resource:
release_resource(seg->res);
+ out_free_resource:
kfree(seg->res);
- out_shared:
- vmem_remove_mapping(seg->start_addr, seg->end - seg->start_addr + 1);
out_free:
kfree(seg);
out:
@@ -400,8 +401,7 @@ __segment_load (char *name, int do_nonshared, unsigned long *addr, unsigned long
* -EIO : could not perform query or load diagnose
* -ENOENT : no such segment
* -EOPNOTSUPP: multi-part segment cannot be used with linux
- * -ENOSPC : segment cannot be used (overlaps with storage)
- * -EBUSY : segment can temporarily not be used (overlaps with dcss)
+ * -EBUSY : segment cannot be used (overlaps with dcss or storage)
* -ERANGE : segment cannot be used (exceeds kernel mapping range)
* -EPERM : segment is currently loaded with incompatible permissions
* -ENOMEM : out of memory
@@ -626,10 +626,6 @@ void segment_warning(int rc, char *seg_name)
pr_err("DCSS %s has multiple page ranges and cannot be "
"loaded or queried\n", seg_name);
break;
- case -ENOSPC:
- pr_err("DCSS %s overlaps with used storage and cannot "
- "be loaded\n", seg_name);
- break;
case -EBUSY:
pr_err("%s needs used memory resources and cannot be "
"loaded or queried\n", seg_name);
diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c
index dedc28be27ab..996884dcc9fd 100644
--- a/arch/s390/mm/fault.c
+++ b/arch/s390/mm/fault.c
@@ -33,7 +33,6 @@
#include <linux/hugetlb.h>
#include <asm/asm-offsets.h>
#include <asm/diag.h>
-#include <asm/pgtable.h>
#include <asm/gmap.h>
#include <asm/irq.h>
#include <asm/mmu_context.h>
@@ -106,7 +105,7 @@ static int bad_address(void *p)
{
unsigned long dummy;
- return probe_kernel_address((unsigned long *)p, dummy);
+ return get_kernel_nofault(dummy, (unsigned long *)p);
}
static void dump_pagetable(unsigned long asce, unsigned long address)
@@ -256,10 +255,8 @@ static noinline void do_no_context(struct pt_regs *regs)
/* Are we prepared to handle this kernel fault? */
fixup = s390_search_extables(regs->psw.addr);
- if (fixup) {
- regs->psw.addr = extable_fixup(fixup);
+ if (fixup && ex_handle(fixup, regs))
return;
- }
/*
* Oops. The kernel tried to access some bad page. We'll have to
@@ -377,7 +374,7 @@ static noinline void do_fault_error(struct pt_regs *regs, int access,
* routines.
*
* interruption code (int_code):
- * 04 Protection -> Write-Protection (suprression)
+ * 04 Protection -> Write-Protection (suppression)
* 10 Segment translation -> Not present (nullification)
* 11 Page translation -> Not present (nullification)
* 3b Region third trans. -> Not present (nullification)
@@ -434,7 +431,7 @@ static inline vm_fault_t do_exception(struct pt_regs *regs, int access)
flags |= FAULT_FLAG_USER;
if (access == VM_WRITE || (trans_exc_code & store_indication) == 0x400)
flags |= FAULT_FLAG_WRITE;
- down_read(&mm->mmap_sem);
+ mmap_read_lock(mm);
gmap = NULL;
if (IS_ENABLED(CONFIG_PGSTE) && type == GMAP_FAULT) {
@@ -479,7 +476,7 @@ retry:
* make sure we exit gracefully rather than endlessly redo
* the fault.
*/
- fault = handle_mm_fault(vma, address, flags);
+ fault = handle_mm_fault(vma, address, flags, regs);
if (fault_signal_pending(fault, regs)) {
fault = VM_FAULT_SIGNAL;
if (flags & FAULT_FLAG_RETRY_NOWAIT)
@@ -489,33 +486,19 @@ retry:
if (unlikely(fault & VM_FAULT_ERROR))
goto out_up;
- /*
- * Major/minor page fault accounting is only done on the
- * initial attempt. If we go through a retry, it is extremely
- * likely that the page will be found in page cache at that point.
- */
if (flags & FAULT_FLAG_ALLOW_RETRY) {
- if (fault & VM_FAULT_MAJOR) {
- tsk->maj_flt++;
- perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1,
- regs, address);
- } else {
- tsk->min_flt++;
- perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1,
- regs, address);
- }
if (fault & VM_FAULT_RETRY) {
if (IS_ENABLED(CONFIG_PGSTE) && gmap &&
(flags & FAULT_FLAG_RETRY_NOWAIT)) {
/* FAULT_FLAG_RETRY_NOWAIT has been set,
- * mmap_sem has not been released */
+ * mmap_lock has not been released */
current->thread.gmap_pfault = 1;
fault = VM_FAULT_PFAULT;
goto out_up;
}
flags &= ~FAULT_FLAG_RETRY_NOWAIT;
flags |= FAULT_FLAG_TRIED;
- down_read(&mm->mmap_sem);
+ mmap_read_lock(mm);
goto retry;
}
}
@@ -533,7 +516,7 @@ retry:
}
fault = 0;
out_up:
- up_read(&mm->mmap_sem);
+ mmap_read_unlock(mm);
out:
return fault;
}
@@ -825,22 +808,22 @@ void do_secure_storage_access(struct pt_regs *regs)
switch (get_fault_type(regs)) {
case USER_FAULT:
mm = current->mm;
- down_read(&mm->mmap_sem);
+ mmap_read_lock(mm);
vma = find_vma(mm, addr);
if (!vma) {
- up_read(&mm->mmap_sem);
+ mmap_read_unlock(mm);
do_fault_error(regs, VM_READ | VM_WRITE, VM_FAULT_BADMAP);
break;
}
page = follow_page(vma, addr, FOLL_WRITE | FOLL_GET);
if (IS_ERR_OR_NULL(page)) {
- up_read(&mm->mmap_sem);
+ mmap_read_unlock(mm);
break;
}
if (arch_make_page_accessible(page))
send_sig(SIGSEGV, current, 0);
put_page(page);
- up_read(&mm->mmap_sem);
+ mmap_read_unlock(mm);
break;
case KERNEL_FAULT:
page = phys_to_page(addr);
@@ -876,6 +859,21 @@ void do_non_secure_storage_access(struct pt_regs *regs)
}
NOKPROBE_SYMBOL(do_non_secure_storage_access);
+void do_secure_storage_violation(struct pt_regs *regs)
+{
+ /*
+ * Either KVM messed up the secure guest mapping or the same
+ * page is mapped into multiple secure guests.
+ *
+ * This exception is only triggered when a guest 2 is running
+ * and can therefore never occur in kernel context.
+ */
+ printk_ratelimited(KERN_WARNING
+ "Secure storage violation in task: %s, pid %d\n",
+ current->comm, current->pid);
+ send_sig(SIGSEGV, current, 0);
+}
+
#else
void do_secure_storage_access(struct pt_regs *regs)
{
@@ -886,4 +884,9 @@ void do_non_secure_storage_access(struct pt_regs *regs)
{
default_trap_handler(regs);
}
+
+void do_secure_storage_violation(struct pt_regs *regs)
+{
+ default_trap_handler(regs);
+}
#endif
diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c
index 1a95d8809cc3..373542ca1113 100644
--- a/arch/s390/mm/gmap.c
+++ b/arch/s390/mm/gmap.c
@@ -17,8 +17,8 @@
#include <linux/swapops.h>
#include <linux/ksm.h>
#include <linux/mman.h>
+#include <linux/pgtable.h>
-#include <asm/pgtable.h>
#include <asm/pgalloc.h>
#include <asm/gmap.h>
#include <asm/tlb.h>
@@ -300,7 +300,7 @@ struct gmap *gmap_get_enabled(void)
EXPORT_SYMBOL_GPL(gmap_get_enabled);
/*
- * gmap_alloc_table is assumed to be called with mmap_sem held
+ * gmap_alloc_table is assumed to be called with mmap_lock held
*/
static int gmap_alloc_table(struct gmap *gmap, unsigned long *table,
unsigned long init, unsigned long gaddr)
@@ -405,10 +405,10 @@ int gmap_unmap_segment(struct gmap *gmap, unsigned long to, unsigned long len)
return -EINVAL;
flush = 0;
- down_write(&gmap->mm->mmap_sem);
+ mmap_write_lock(gmap->mm);
for (off = 0; off < len; off += PMD_SIZE)
flush |= __gmap_unmap_by_gaddr(gmap, to + off);
- up_write(&gmap->mm->mmap_sem);
+ mmap_write_unlock(gmap->mm);
if (flush)
gmap_flush_tlb(gmap);
return 0;
@@ -438,7 +438,7 @@ int gmap_map_segment(struct gmap *gmap, unsigned long from,
return -EINVAL;
flush = 0;
- down_write(&gmap->mm->mmap_sem);
+ mmap_write_lock(gmap->mm);
for (off = 0; off < len; off += PMD_SIZE) {
/* Remove old translation */
flush |= __gmap_unmap_by_gaddr(gmap, to + off);
@@ -448,7 +448,7 @@ int gmap_map_segment(struct gmap *gmap, unsigned long from,
(void *) from + off))
break;
}
- up_write(&gmap->mm->mmap_sem);
+ mmap_write_unlock(gmap->mm);
if (flush)
gmap_flush_tlb(gmap);
if (off >= len)
@@ -466,7 +466,7 @@ EXPORT_SYMBOL_GPL(gmap_map_segment);
* Returns user space address which corresponds to the guest address or
* -EFAULT if no such mapping exists.
* This function does not establish potentially missing page table entries.
- * The mmap_sem of the mm that belongs to the address space must be held
+ * The mmap_lock of the mm that belongs to the address space must be held
* when this function gets called.
*
* Note: Can also be called for shadow gmaps.
@@ -495,9 +495,9 @@ unsigned long gmap_translate(struct gmap *gmap, unsigned long gaddr)
{
unsigned long rc;
- down_read(&gmap->mm->mmap_sem);
+ mmap_read_lock(gmap->mm);
rc = __gmap_translate(gmap, gaddr);
- up_read(&gmap->mm->mmap_sem);
+ mmap_read_unlock(gmap->mm);
return rc;
}
EXPORT_SYMBOL_GPL(gmap_translate);
@@ -534,7 +534,7 @@ static void gmap_pmdp_xchg(struct gmap *gmap, pmd_t *old, pmd_t new,
*
* Returns 0 on success, -ENOMEM for out of memory conditions, and -EFAULT
* if the vm address is already mapped to a different guest segment.
- * The mmap_sem of the mm that belongs to the address space must be held
+ * The mmap_lock of the mm that belongs to the address space must be held
* when this function gets called.
*/
int __gmap_link(struct gmap *gmap, unsigned long gaddr, unsigned long vmaddr)
@@ -640,7 +640,7 @@ int gmap_fault(struct gmap *gmap, unsigned long gaddr,
int rc;
bool unlocked;
- down_read(&gmap->mm->mmap_sem);
+ mmap_read_lock(gmap->mm);
retry:
unlocked = false;
@@ -649,13 +649,13 @@ retry:
rc = vmaddr;
goto out_up;
}
- if (fixup_user_fault(current, gmap->mm, vmaddr, fault_flags,
+ if (fixup_user_fault(gmap->mm, vmaddr, fault_flags,
&unlocked)) {
rc = -EFAULT;
goto out_up;
}
/*
- * In the case that fixup_user_fault unlocked the mmap_sem during
+ * In the case that fixup_user_fault unlocked the mmap_lock during
* faultin redo __gmap_translate to not race with a map/unmap_segment.
*/
if (unlocked)
@@ -663,13 +663,13 @@ retry:
rc = __gmap_link(gmap, gaddr, vmaddr);
out_up:
- up_read(&gmap->mm->mmap_sem);
+ mmap_read_unlock(gmap->mm);
return rc;
}
EXPORT_SYMBOL_GPL(gmap_fault);
/*
- * this function is assumed to be called with mmap_sem held
+ * this function is assumed to be called with mmap_lock held
*/
void __gmap_zap(struct gmap *gmap, unsigned long gaddr)
{
@@ -696,7 +696,7 @@ void gmap_discard(struct gmap *gmap, unsigned long from, unsigned long to)
unsigned long gaddr, vmaddr, size;
struct vm_area_struct *vma;
- down_read(&gmap->mm->mmap_sem);
+ mmap_read_lock(gmap->mm);
for (gaddr = from; gaddr < to;
gaddr = (gaddr + PMD_SIZE) & PMD_MASK) {
/* Find the vm address for the guest address */
@@ -719,7 +719,7 @@ void gmap_discard(struct gmap *gmap, unsigned long from, unsigned long to)
size = min(to - gaddr, PMD_SIZE - (gaddr & ~PMD_MASK));
zap_page_range(vma, vmaddr, size);
}
- up_read(&gmap->mm->mmap_sem);
+ mmap_read_unlock(gmap->mm);
}
EXPORT_SYMBOL_GPL(gmap_discard);
@@ -788,19 +788,19 @@ static inline unsigned long *gmap_table_walk(struct gmap *gmap,
unsigned long gaddr, int level)
{
const int asce_type = gmap->asce & _ASCE_TYPE_MASK;
- unsigned long *table;
+ unsigned long *table = gmap->table;
- if ((gmap->asce & _ASCE_TYPE_MASK) + 4 < (level * 4))
- return NULL;
if (gmap_is_shadow(gmap) && gmap->removed)
return NULL;
+ if (WARN_ON_ONCE(level > (asce_type >> 2) + 1))
+ return NULL;
+
if (asce_type != _ASCE_TYPE_REGION1 &&
gaddr & (-1UL << (31 + (asce_type >> 2) * 11)))
return NULL;
- table = gmap->table;
- switch (gmap->asce & _ASCE_TYPE_MASK) {
+ switch (asce_type) {
case _ASCE_TYPE_REGION1:
table += (gaddr & _REGION1_INDEX) >> _REGION1_SHIFT;
if (level == 4)
@@ -879,10 +879,10 @@ static int gmap_pte_op_fixup(struct gmap *gmap, unsigned long gaddr,
BUG_ON(gmap_is_shadow(gmap));
fault_flags = (prot == PROT_WRITE) ? FAULT_FLAG_WRITE : 0;
- if (fixup_user_fault(current, mm, vmaddr, fault_flags, &unlocked))
+ if (fixup_user_fault(mm, vmaddr, fault_flags, &unlocked))
return -EFAULT;
if (unlocked)
- /* lost mmap_sem, caller has to retry __gmap_translate */
+ /* lost mmap_lock, caller has to retry __gmap_translate */
return 0;
/* Connect the page tables */
return __gmap_link(gmap, gaddr, vmaddr);
@@ -953,7 +953,7 @@ static inline void gmap_pmd_op_end(struct gmap *gmap, pmd_t *pmdp)
* -EAGAIN if a fixup is needed
* -EINVAL if unsupported notifier bits have been specified
*
- * Expected to be called with sg->mm->mmap_sem in read and
+ * Expected to be called with sg->mm->mmap_lock in read and
* guest_table_lock held.
*/
static int gmap_protect_pmd(struct gmap *gmap, unsigned long gaddr,
@@ -999,7 +999,7 @@ static int gmap_protect_pmd(struct gmap *gmap, unsigned long gaddr,
* Returns 0 if successfully protected, -ENOMEM if out of memory and
* -EAGAIN if a fixup is needed.
*
- * Expected to be called with sg->mm->mmap_sem in read
+ * Expected to be called with sg->mm->mmap_lock in read
*/
static int gmap_protect_pte(struct gmap *gmap, unsigned long gaddr,
pmd_t *pmdp, int prot, unsigned long bits)
@@ -1035,7 +1035,7 @@ static int gmap_protect_pte(struct gmap *gmap, unsigned long gaddr,
* Returns 0 if successfully protected, -ENOMEM if out of memory and
* -EFAULT if gaddr is invalid (or mapping for shadows is missing).
*
- * Called with sg->mm->mmap_sem in read.
+ * Called with sg->mm->mmap_lock in read.
*/
static int gmap_protect_range(struct gmap *gmap, unsigned long gaddr,
unsigned long len, int prot, unsigned long bits)
@@ -1106,9 +1106,9 @@ int gmap_mprotect_notify(struct gmap *gmap, unsigned long gaddr,
return -EINVAL;
if (!MACHINE_HAS_ESOP && prot == PROT_READ)
return -EINVAL;
- down_read(&gmap->mm->mmap_sem);
+ mmap_read_lock(gmap->mm);
rc = gmap_protect_range(gmap, gaddr, len, prot, GMAP_NOTIFY_MPROT);
- up_read(&gmap->mm->mmap_sem);
+ mmap_read_unlock(gmap->mm);
return rc;
}
EXPORT_SYMBOL_GPL(gmap_mprotect_notify);
@@ -1124,7 +1124,7 @@ EXPORT_SYMBOL_GPL(gmap_mprotect_notify);
* if reading using the virtual address failed. -EINVAL if called on a gmap
* shadow.
*
- * Called with gmap->mm->mmap_sem in read.
+ * Called with gmap->mm->mmap_lock in read.
*/
int gmap_read_table(struct gmap *gmap, unsigned long gaddr, unsigned long *val)
{
@@ -1696,11 +1696,11 @@ struct gmap *gmap_shadow(struct gmap *parent, unsigned long asce,
}
spin_unlock(&parent->shadow_lock);
/* protect after insertion, so it will get properly invalidated */
- down_read(&parent->mm->mmap_sem);
+ mmap_read_lock(parent->mm);
rc = gmap_protect_range(parent, asce & _ASCE_ORIGIN,
((asce & _ASCE_TABLE_LENGTH) + 1) * PAGE_SIZE,
PROT_READ, GMAP_NOTIFY_SHADOW);
- up_read(&parent->mm->mmap_sem);
+ mmap_read_unlock(parent->mm);
spin_lock(&parent->shadow_lock);
new->initialized = true;
if (rc) {
@@ -1729,7 +1729,7 @@ EXPORT_SYMBOL_GPL(gmap_shadow);
* shadow table structure is incomplete, -ENOMEM if out of memory and
* -EFAULT if an address in the parent gmap could not be resolved.
*
- * Called with sg->mm->mmap_sem in read.
+ * Called with sg->mm->mmap_lock in read.
*/
int gmap_shadow_r2t(struct gmap *sg, unsigned long saddr, unsigned long r2t,
int fake)
@@ -1813,7 +1813,7 @@ EXPORT_SYMBOL_GPL(gmap_shadow_r2t);
* shadow table structure is incomplete, -ENOMEM if out of memory and
* -EFAULT if an address in the parent gmap could not be resolved.
*
- * Called with sg->mm->mmap_sem in read.
+ * Called with sg->mm->mmap_lock in read.
*/
int gmap_shadow_r3t(struct gmap *sg, unsigned long saddr, unsigned long r3t,
int fake)
@@ -1897,7 +1897,7 @@ EXPORT_SYMBOL_GPL(gmap_shadow_r3t);
* shadow table structure is incomplete, -ENOMEM if out of memory and
* -EFAULT if an address in the parent gmap could not be resolved.
*
- * Called with sg->mm->mmap_sem in read.
+ * Called with sg->mm->mmap_lock in read.
*/
int gmap_shadow_sgt(struct gmap *sg, unsigned long saddr, unsigned long sgt,
int fake)
@@ -1981,7 +1981,7 @@ EXPORT_SYMBOL_GPL(gmap_shadow_sgt);
* Returns 0 if the shadow page table was found and -EAGAIN if the page
* table was not found.
*
- * Called with sg->mm->mmap_sem in read.
+ * Called with sg->mm->mmap_lock in read.
*/
int gmap_shadow_pgt_lookup(struct gmap *sg, unsigned long saddr,
unsigned long *pgt, int *dat_protection,
@@ -2021,7 +2021,7 @@ EXPORT_SYMBOL_GPL(gmap_shadow_pgt_lookup);
* shadow table structure is incomplete, -ENOMEM if out of memory,
* -EFAULT if an address in the parent gmap could not be resolved and
*
- * Called with gmap->mm->mmap_sem in read
+ * Called with gmap->mm->mmap_lock in read
*/
int gmap_shadow_pgt(struct gmap *sg, unsigned long saddr, unsigned long pgt,
int fake)
@@ -2100,7 +2100,7 @@ EXPORT_SYMBOL_GPL(gmap_shadow_pgt);
* shadow table structure is incomplete, -ENOMEM if out of memory and
* -EFAULT if an address in the parent gmap could not be resolved.
*
- * Called with sg->mm->mmap_sem in read.
+ * Called with sg->mm->mmap_lock in read.
*/
int gmap_shadow_page(struct gmap *sg, unsigned long saddr, pte_t pte)
{
@@ -2485,23 +2485,36 @@ void gmap_sync_dirty_log_pmd(struct gmap *gmap, unsigned long bitmap[4],
}
EXPORT_SYMBOL_GPL(gmap_sync_dirty_log_pmd);
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+static int thp_split_walk_pmd_entry(pmd_t *pmd, unsigned long addr,
+ unsigned long end, struct mm_walk *walk)
+{
+ struct vm_area_struct *vma = walk->vma;
+
+ split_huge_pmd(vma, pmd, addr);
+ return 0;
+}
+
+static const struct mm_walk_ops thp_split_walk_ops = {
+ .pmd_entry = thp_split_walk_pmd_entry,
+};
+
static inline void thp_split_mm(struct mm_struct *mm)
{
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
struct vm_area_struct *vma;
- unsigned long addr;
for (vma = mm->mmap; vma != NULL; vma = vma->vm_next) {
- for (addr = vma->vm_start;
- addr < vma->vm_end;
- addr += PAGE_SIZE)
- follow_page(vma, addr, FOLL_SPLIT);
vma->vm_flags &= ~VM_HUGEPAGE;
vma->vm_flags |= VM_NOHUGEPAGE;
+ walk_page_vma(vma, &thp_split_walk_ops, NULL);
}
mm->def_flags |= VM_NOHUGEPAGE;
-#endif
}
+#else
+static inline void thp_split_mm(struct mm_struct *mm)
+{
+}
+#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
/*
* Remove all empty zero pages from the mapping for lazy refaulting
@@ -2543,12 +2556,12 @@ int s390_enable_sie(void)
/* Fail if the page tables are 2K */
if (!mm_alloc_pgste(mm))
return -EINVAL;
- down_write(&mm->mmap_sem);
+ mmap_write_lock(mm);
mm->context.has_pgste = 1;
/* split thp mappings and disable thp for future mappings */
thp_split_mm(mm);
walk_page_range(mm, 0, TASK_SIZE, &zap_zero_walk_ops, NULL);
- up_write(&mm->mmap_sem);
+ mmap_write_unlock(mm);
return 0;
}
EXPORT_SYMBOL_GPL(s390_enable_sie);
@@ -2617,7 +2630,7 @@ int s390_enable_skey(void)
struct mm_struct *mm = current->mm;
int rc = 0;
- down_write(&mm->mmap_sem);
+ mmap_write_lock(mm);
if (mm_uses_skeys(mm))
goto out_up;
@@ -2630,7 +2643,7 @@ int s390_enable_skey(void)
walk_page_range(mm, 0, TASK_SIZE, &enable_skey_walk_ops, NULL);
out_up:
- up_write(&mm->mmap_sem);
+ mmap_write_unlock(mm);
return rc;
}
EXPORT_SYMBOL_GPL(s390_enable_skey);
@@ -2651,9 +2664,9 @@ static const struct mm_walk_ops reset_cmma_walk_ops = {
void s390_reset_cmma(struct mm_struct *mm)
{
- down_write(&mm->mmap_sem);
+ mmap_write_lock(mm);
walk_page_range(mm, 0, TASK_SIZE, &reset_cmma_walk_ops, NULL);
- up_write(&mm->mmap_sem);
+ mmap_write_unlock(mm);
}
EXPORT_SYMBOL_GPL(s390_reset_cmma);
@@ -2685,9 +2698,9 @@ void s390_reset_acc(struct mm_struct *mm)
*/
if (!mmget_not_zero(mm))
return;
- down_read(&mm->mmap_sem);
+ mmap_read_lock(mm);
walk_page_range(mm, 0, TASK_SIZE, &reset_acc_walk_ops, NULL);
- up_read(&mm->mmap_sem);
+ mmap_read_unlock(mm);
mmput(mm);
}
EXPORT_SYMBOL_GPL(s390_reset_acc);
diff --git a/arch/s390/mm/hugetlbpage.c b/arch/s390/mm/hugetlbpage.c
index 4632d4e26b66..3b5a4d25ca9b 100644
--- a/arch/s390/mm/hugetlbpage.c
+++ b/arch/s390/mm/hugetlbpage.c
@@ -117,7 +117,7 @@ static inline pte_t __rste_to_pte(unsigned long rste)
_PAGE_YOUNG);
#ifdef CONFIG_MEM_SOFT_DIRTY
pte_val(pte) |= move_set_bit(rste, _SEGMENT_ENTRY_SOFT_DIRTY,
- _PAGE_DIRTY);
+ _PAGE_SOFT_DIRTY);
#endif
pte_val(pte) |= move_set_bit(rste, _SEGMENT_ENTRY_NOEXEC,
_PAGE_NOEXEC);
@@ -254,25 +254,15 @@ follow_huge_pud(struct mm_struct *mm, unsigned long address,
return pud_page(*pud) + ((address & ~PUD_MASK) >> PAGE_SHIFT);
}
-static __init int setup_hugepagesz(char *opt)
+bool __init arch_hugetlb_valid_size(unsigned long size)
{
- unsigned long size;
- char *string = opt;
-
- size = memparse(opt, &opt);
- if (MACHINE_HAS_EDAT1 && size == PMD_SIZE) {
- hugetlb_add_hstate(PMD_SHIFT - PAGE_SHIFT);
- } else if (MACHINE_HAS_EDAT2 && size == PUD_SIZE) {
- hugetlb_add_hstate(PUD_SHIFT - PAGE_SHIFT);
- } else {
- hugetlb_bad_size();
- pr_err("hugepagesz= specifies an unsupported page size %s\n",
- string);
- return 0;
- }
- return 1;
+ if (MACHINE_HAS_EDAT1 && size == PMD_SIZE)
+ return true;
+ else if (MACHINE_HAS_EDAT2 && size == PUD_SIZE)
+ return true;
+ else
+ return false;
}
-__setup("hugepagesz=", setup_hugepagesz);
static unsigned long hugetlb_get_unmapped_area_bottomup(struct file *file,
unsigned long addr, unsigned long len,
diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c
index 87b2d024e75a..0d282081dc1f 100644
--- a/arch/s390/mm/init.c
+++ b/arch/s390/mm/init.c
@@ -33,7 +33,6 @@
#include <linux/dma-direct.h>
#include <asm/processor.h>
#include <linux/uaccess.h>
-#include <asm/pgtable.h>
#include <asm/pgalloc.h>
#include <asm/dma.h>
#include <asm/lowcore.h>
@@ -116,13 +115,12 @@ void __init paging_init(void)
__load_psw_mask(psw.mask);
kasan_free_early_identity();
- sparse_memory_present_with_active_regions(MAX_NUMNODES);
sparse_init();
zone_dma_bits = 31;
memset(max_zone_pfns, 0, sizeof(max_zone_pfns));
max_zone_pfns[ZONE_DMA] = PFN_DOWN(MAX_DMA_ADDRESS);
max_zone_pfns[ZONE_NORMAL] = max_low_pfn;
- free_area_init_nodes(max_zone_pfns);
+ free_area_init(max_zone_pfns);
}
void mark_rodata_ro(void)
diff --git a/arch/s390/mm/kasan_init.c b/arch/s390/mm/kasan_init.c
index 06345616a646..99dd1c63a065 100644
--- a/arch/s390/mm/kasan_init.c
+++ b/arch/s390/mm/kasan_init.c
@@ -2,8 +2,8 @@
#include <linux/kasan.h>
#include <linux/sched/task.h>
#include <linux/memblock.h>
+#include <linux/pgtable.h>
#include <asm/pgalloc.h>
-#include <asm/pgtable.h>
#include <asm/kasan.h>
#include <asm/mem_detect.h>
#include <asm/processor.h>
diff --git a/arch/s390/mm/maccess.c b/arch/s390/mm/maccess.c
index de7ca4b6718f..1d17413b319a 100644
--- a/arch/s390/mm/maccess.c
+++ b/arch/s390/mm/maccess.c
@@ -55,19 +55,26 @@ static notrace long s390_kernel_write_odd(void *dst, const void *src, size_t siz
*/
static DEFINE_SPINLOCK(s390_kernel_write_lock);
-void notrace s390_kernel_write(void *dst, const void *src, size_t size)
+notrace void *s390_kernel_write(void *dst, const void *src, size_t size)
{
+ void *tmp = dst;
unsigned long flags;
long copied;
spin_lock_irqsave(&s390_kernel_write_lock, flags);
- while (size) {
- copied = s390_kernel_write_odd(dst, src, size);
- dst += copied;
- src += copied;
- size -= copied;
+ if (!(flags & PSW_MASK_DAT)) {
+ memcpy(dst, src, size);
+ } else {
+ while (size) {
+ copied = s390_kernel_write_odd(tmp, src, size);
+ tmp += copied;
+ src += copied;
+ size -= copied;
+ }
}
spin_unlock_irqrestore(&s390_kernel_write_lock, flags);
+
+ return dst;
}
static int __no_sanitize_address __memcpy_real(void *dest, void *src, size_t count)
diff --git a/arch/s390/mm/mmap.c b/arch/s390/mm/mmap.c
index 1b78f630a9ca..e54f928503c5 100644
--- a/arch/s390/mm/mmap.c
+++ b/arch/s390/mm/mmap.c
@@ -17,7 +17,6 @@
#include <linux/random.h>
#include <linux/compat.h>
#include <linux/security.h>
-#include <asm/pgalloc.h>
#include <asm/elf.h>
static unsigned long stack_maxrandom_size(void)
diff --git a/arch/s390/mm/pageattr.c b/arch/s390/mm/pageattr.c
index e22c06d5f206..c5c52ec2b46f 100644
--- a/arch/s390/mm/pageattr.c
+++ b/arch/s390/mm/pageattr.c
@@ -7,7 +7,6 @@
#include <linux/mm.h>
#include <asm/cacheflush.h>
#include <asm/facility.h>
-#include <asm/pgtable.h>
#include <asm/pgalloc.h>
#include <asm/page.h>
#include <asm/set_memory.h>
@@ -86,7 +85,7 @@ static int walk_pte_level(pmd_t *pmdp, unsigned long addr, unsigned long end,
{
pte_t *ptep, new;
- ptep = pte_offset(pmdp, addr);
+ ptep = pte_offset_kernel(pmdp, addr);
do {
new = *ptep;
if (pte_none(new))
@@ -338,19 +337,11 @@ void __kernel_map_pages(struct page *page, int numpages, int enable)
{
unsigned long address;
int nr, i, j;
- pgd_t *pgd;
- p4d_t *p4d;
- pud_t *pud;
- pmd_t *pmd;
pte_t *pte;
for (i = 0; i < numpages;) {
address = page_to_phys(page + i);
- pgd = pgd_offset_k(address);
- p4d = p4d_offset(pgd, address);
- pud = pud_offset(p4d, address);
- pmd = pmd_offset(pud, address);
- pte = pte_offset_kernel(pmd, address);
+ pte = virt_to_kpte(address);
nr = (unsigned long)pte >> ilog2(sizeof(long));
nr = PTRS_PER_PTE - (nr & (PTRS_PER_PTE - 1));
nr = min(numpages - i, nr);
diff --git a/arch/s390/mm/pgalloc.c b/arch/s390/mm/pgalloc.c
index fff169d64711..11d2c8395e2a 100644
--- a/arch/s390/mm/pgalloc.c
+++ b/arch/s390/mm/pgalloc.c
@@ -114,7 +114,7 @@ int crst_table_upgrade(struct mm_struct *mm, unsigned long end)
spin_lock_bh(&mm->page_table_lock);
/*
- * This routine gets called with mmap_sem lock held and there is
+ * This routine gets called with mmap_lock lock held and there is
* no reason to optimize for the case of otherwise. However, if
* that would ever change, the below check will let us know.
*/
diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c
index 9ebd01219812..0d25f743b270 100644
--- a/arch/s390/mm/pgtable.c
+++ b/arch/s390/mm/pgtable.c
@@ -19,8 +19,6 @@
#include <linux/ksm.h>
#include <linux/mman.h>
-#include <asm/pgtable.h>
-#include <asm/pgalloc.h>
#include <asm/tlb.h>
#include <asm/tlbflush.h>
#include <asm/mmu_context.h>
diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c
index f810930aff42..eddf71c22875 100644
--- a/arch/s390/mm/vmem.c
+++ b/arch/s390/mm/vmem.c
@@ -13,7 +13,6 @@
#include <linux/slab.h>
#include <asm/cacheflush.h>
#include <asm/pgalloc.h>
-#include <asm/pgtable.h>
#include <asm/setup.h>
#include <asm/tlbflush.h>
#include <asm/sections.h>
@@ -21,14 +20,6 @@
static DEFINE_MUTEX(vmem_mutex);
-struct memory_segment {
- struct list_head list;
- unsigned long start;
- unsigned long size;
-};
-
-static LIST_HEAD(mem_segs);
-
static void __ref *vmem_alloc_pages(unsigned int order)
{
unsigned long size = PAGE_SIZE << order;
@@ -38,6 +29,15 @@ static void __ref *vmem_alloc_pages(unsigned int order)
return (void *) memblock_phys_alloc(size, size);
}
+static void vmem_free_pages(unsigned long addr, int order)
+{
+ /* We don't expect boot memory to be removed ever. */
+ if (!slab_is_available() ||
+ WARN_ON_ONCE(PageReserved(phys_to_page(addr))))
+ return;
+ free_pages(addr, order);
+}
+
void *vmem_crst_alloc(unsigned long val)
{
unsigned long *table;
@@ -63,332 +63,487 @@ pte_t __ref *vmem_pte_alloc(void)
return pte;
}
+static void vmem_pte_free(unsigned long *table)
+{
+ /* We don't expect boot memory to be removed ever. */
+ if (!slab_is_available() ||
+ WARN_ON_ONCE(PageReserved(virt_to_page(table))))
+ return;
+ page_table_free(&init_mm, table);
+}
+
+#define PAGE_UNUSED 0xFD
+
/*
- * Add a physical memory range to the 1:1 mapping.
+ * The unused vmemmap range, which was not yet memset(PAGE_UNUSED) ranges
+ * from unused_pmd_start to next PMD_SIZE boundary.
*/
-static int vmem_add_mem(unsigned long start, unsigned long size)
+static unsigned long unused_pmd_start;
+
+static void vmemmap_flush_unused_pmd(void)
{
- unsigned long pgt_prot, sgt_prot, r3_prot;
- unsigned long pages4k, pages1m, pages2g;
- unsigned long end = start + size;
- unsigned long address = start;
- pgd_t *pg_dir;
- p4d_t *p4_dir;
- pud_t *pu_dir;
- pmd_t *pm_dir;
- pte_t *pt_dir;
- int ret = -ENOMEM;
+ if (!unused_pmd_start)
+ return;
+ memset(__va(unused_pmd_start), PAGE_UNUSED,
+ ALIGN(unused_pmd_start, PMD_SIZE) - unused_pmd_start);
+ unused_pmd_start = 0;
+}
+
+static void __vmemmap_use_sub_pmd(unsigned long start, unsigned long end)
+{
+ /*
+ * As we expect to add in the same granularity as we remove, it's
+ * sufficient to mark only some piece used to block the memmap page from
+ * getting removed (just in case the memmap never gets initialized,
+ * e.g., because the memory block never gets onlined).
+ */
+ memset(__va(start), 0, sizeof(struct page));
+}
- pgt_prot = pgprot_val(PAGE_KERNEL);
- sgt_prot = pgprot_val(SEGMENT_KERNEL);
- r3_prot = pgprot_val(REGION3_KERNEL);
- if (!MACHINE_HAS_NX) {
- pgt_prot &= ~_PAGE_NOEXEC;
- sgt_prot &= ~_SEGMENT_ENTRY_NOEXEC;
- r3_prot &= ~_REGION_ENTRY_NOEXEC;
+static void vmemmap_use_sub_pmd(unsigned long start, unsigned long end)
+{
+ /*
+ * We only optimize if the new used range directly follows the
+ * previously unused range (esp., when populating consecutive sections).
+ */
+ if (unused_pmd_start == start) {
+ unused_pmd_start = end;
+ if (likely(IS_ALIGNED(unused_pmd_start, PMD_SIZE)))
+ unused_pmd_start = 0;
+ return;
}
- pages4k = pages1m = pages2g = 0;
- while (address < end) {
- pg_dir = pgd_offset_k(address);
- if (pgd_none(*pg_dir)) {
- p4_dir = vmem_crst_alloc(_REGION2_ENTRY_EMPTY);
- if (!p4_dir)
- goto out;
- pgd_populate(&init_mm, pg_dir, p4_dir);
- }
- p4_dir = p4d_offset(pg_dir, address);
- if (p4d_none(*p4_dir)) {
- pu_dir = vmem_crst_alloc(_REGION3_ENTRY_EMPTY);
- if (!pu_dir)
- goto out;
- p4d_populate(&init_mm, p4_dir, pu_dir);
- }
- pu_dir = pud_offset(p4_dir, address);
- if (MACHINE_HAS_EDAT2 && pud_none(*pu_dir) && address &&
- !(address & ~PUD_MASK) && (address + PUD_SIZE <= end) &&
- !debug_pagealloc_enabled()) {
- pud_val(*pu_dir) = address | r3_prot;
- address += PUD_SIZE;
- pages2g++;
- continue;
- }
- if (pud_none(*pu_dir)) {
- pm_dir = vmem_crst_alloc(_SEGMENT_ENTRY_EMPTY);
- if (!pm_dir)
- goto out;
- pud_populate(&init_mm, pu_dir, pm_dir);
- }
- pm_dir = pmd_offset(pu_dir, address);
- if (MACHINE_HAS_EDAT1 && pmd_none(*pm_dir) && address &&
- !(address & ~PMD_MASK) && (address + PMD_SIZE <= end) &&
- !debug_pagealloc_enabled()) {
- pmd_val(*pm_dir) = address | sgt_prot;
- address += PMD_SIZE;
- pages1m++;
+ vmemmap_flush_unused_pmd();
+ __vmemmap_use_sub_pmd(start, end);
+}
+
+static void vmemmap_use_new_sub_pmd(unsigned long start, unsigned long end)
+{
+ void *page = __va(ALIGN_DOWN(start, PMD_SIZE));
+
+ vmemmap_flush_unused_pmd();
+
+ /* Could be our memmap page is filled with PAGE_UNUSED already ... */
+ __vmemmap_use_sub_pmd(start, end);
+
+ /* Mark the unused parts of the new memmap page PAGE_UNUSED. */
+ if (!IS_ALIGNED(start, PMD_SIZE))
+ memset(page, PAGE_UNUSED, start - __pa(page));
+ /*
+ * We want to avoid memset(PAGE_UNUSED) when populating the vmemmap of
+ * consecutive sections. Remember for the last added PMD the last
+ * unused range in the populated PMD.
+ */
+ if (!IS_ALIGNED(end, PMD_SIZE))
+ unused_pmd_start = end;
+}
+
+/* Returns true if the PMD is completely unused and can be freed. */
+static bool vmemmap_unuse_sub_pmd(unsigned long start, unsigned long end)
+{
+ void *page = __va(ALIGN_DOWN(start, PMD_SIZE));
+
+ vmemmap_flush_unused_pmd();
+ memset(__va(start), PAGE_UNUSED, end - start);
+ return !memchr_inv(page, PAGE_UNUSED, PMD_SIZE);
+}
+
+/* __ref: we'll only call vmemmap_alloc_block() via vmemmap_populate() */
+static int __ref modify_pte_table(pmd_t *pmd, unsigned long addr,
+ unsigned long end, bool add, bool direct)
+{
+ unsigned long prot, pages = 0;
+ int ret = -ENOMEM;
+ pte_t *pte;
+
+ prot = pgprot_val(PAGE_KERNEL);
+ if (!MACHINE_HAS_NX)
+ prot &= ~_PAGE_NOEXEC;
+
+ pte = pte_offset_kernel(pmd, addr);
+ for (; addr < end; addr += PAGE_SIZE, pte++) {
+ if (!add) {
+ if (pte_none(*pte))
+ continue;
+ if (!direct)
+ vmem_free_pages(pfn_to_phys(pte_pfn(*pte)), 0);
+ pte_clear(&init_mm, addr, pte);
+ } else if (pte_none(*pte)) {
+ if (!direct) {
+ void *new_page = vmemmap_alloc_block(PAGE_SIZE, NUMA_NO_NODE);
+
+ if (!new_page)
+ goto out;
+ pte_val(*pte) = __pa(new_page) | prot;
+ } else {
+ pte_val(*pte) = addr | prot;
+ }
+ } else {
continue;
}
- if (pmd_none(*pm_dir)) {
- pt_dir = vmem_pte_alloc();
- if (!pt_dir)
- goto out;
- pmd_populate(&init_mm, pm_dir, pt_dir);
- }
-
- pt_dir = pte_offset_kernel(pm_dir, address);
- pte_val(*pt_dir) = address | pgt_prot;
- address += PAGE_SIZE;
- pages4k++;
+ pages++;
}
ret = 0;
out:
- update_page_count(PG_DIRECT_MAP_4K, pages4k);
- update_page_count(PG_DIRECT_MAP_1M, pages1m);
- update_page_count(PG_DIRECT_MAP_2G, pages2g);
+ if (direct)
+ update_page_count(PG_DIRECT_MAP_4K, add ? pages : -pages);
return ret;
}
-/*
- * Remove a physical memory range from the 1:1 mapping.
- * Currently only invalidates page table entries.
- */
-static void vmem_remove_range(unsigned long start, unsigned long size)
+static void try_free_pte_table(pmd_t *pmd, unsigned long start)
{
- unsigned long pages4k, pages1m, pages2g;
- unsigned long end = start + size;
- unsigned long address = start;
- pgd_t *pg_dir;
- p4d_t *p4_dir;
- pud_t *pu_dir;
- pmd_t *pm_dir;
- pte_t *pt_dir;
-
- pages4k = pages1m = pages2g = 0;
- while (address < end) {
- pg_dir = pgd_offset_k(address);
- if (pgd_none(*pg_dir)) {
- address += PGDIR_SIZE;
- continue;
- }
- p4_dir = p4d_offset(pg_dir, address);
- if (p4d_none(*p4_dir)) {
- address += P4D_SIZE;
- continue;
- }
- pu_dir = pud_offset(p4_dir, address);
- if (pud_none(*pu_dir)) {
- address += PUD_SIZE;
- continue;
- }
- if (pud_large(*pu_dir)) {
- pud_clear(pu_dir);
- address += PUD_SIZE;
- pages2g++;
- continue;
- }
- pm_dir = pmd_offset(pu_dir, address);
- if (pmd_none(*pm_dir)) {
- address += PMD_SIZE;
- continue;
- }
- if (pmd_large(*pm_dir)) {
- pmd_clear(pm_dir);
- address += PMD_SIZE;
- pages1m++;
- continue;
- }
- pt_dir = pte_offset_kernel(pm_dir, address);
- pte_clear(&init_mm, address, pt_dir);
- address += PAGE_SIZE;
- pages4k++;
+ pte_t *pte;
+ int i;
+
+ /* We can safely assume this is fully in 1:1 mapping & vmemmap area */
+ pte = pte_offset_kernel(pmd, start);
+ for (i = 0; i < PTRS_PER_PTE; i++, pte++) {
+ if (!pte_none(*pte))
+ return;
}
- flush_tlb_kernel_range(start, end);
- update_page_count(PG_DIRECT_MAP_4K, -pages4k);
- update_page_count(PG_DIRECT_MAP_1M, -pages1m);
- update_page_count(PG_DIRECT_MAP_2G, -pages2g);
+ vmem_pte_free(__va(pmd_deref(*pmd)));
+ pmd_clear(pmd);
}
-/*
- * Add a backed mem_map array to the virtual mem_map array.
- */
-int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node,
- struct vmem_altmap *altmap)
+/* __ref: we'll only call vmemmap_alloc_block() via vmemmap_populate() */
+static int __ref modify_pmd_table(pud_t *pud, unsigned long addr,
+ unsigned long end, bool add, bool direct)
{
- unsigned long pgt_prot, sgt_prot;
- unsigned long address = start;
- pgd_t *pg_dir;
- p4d_t *p4_dir;
- pud_t *pu_dir;
- pmd_t *pm_dir;
- pte_t *pt_dir;
+ unsigned long next, prot, pages = 0;
int ret = -ENOMEM;
+ pmd_t *pmd;
+ pte_t *pte;
- pgt_prot = pgprot_val(PAGE_KERNEL);
- sgt_prot = pgprot_val(SEGMENT_KERNEL);
- if (!MACHINE_HAS_NX) {
- pgt_prot &= ~_PAGE_NOEXEC;
- sgt_prot &= ~_SEGMENT_ENTRY_NOEXEC;
- }
- for (address = start; address < end;) {
- pg_dir = pgd_offset_k(address);
- if (pgd_none(*pg_dir)) {
- p4_dir = vmem_crst_alloc(_REGION2_ENTRY_EMPTY);
- if (!p4_dir)
- goto out;
- pgd_populate(&init_mm, pg_dir, p4_dir);
- }
+ prot = pgprot_val(SEGMENT_KERNEL);
+ if (!MACHINE_HAS_NX)
+ prot &= ~_SEGMENT_ENTRY_NOEXEC;
- p4_dir = p4d_offset(pg_dir, address);
- if (p4d_none(*p4_dir)) {
- pu_dir = vmem_crst_alloc(_REGION3_ENTRY_EMPTY);
- if (!pu_dir)
- goto out;
- p4d_populate(&init_mm, p4_dir, pu_dir);
- }
+ pmd = pmd_offset(pud, addr);
+ for (; addr < end; addr = next, pmd++) {
+ next = pmd_addr_end(addr, end);
+ if (!add) {
+ if (pmd_none(*pmd))
+ continue;
+ if (pmd_large(*pmd) && !add) {
+ if (IS_ALIGNED(addr, PMD_SIZE) &&
+ IS_ALIGNED(next, PMD_SIZE)) {
+ if (!direct)
+ vmem_free_pages(pmd_deref(*pmd), get_order(PMD_SIZE));
+ pmd_clear(pmd);
+ pages++;
+ } else if (!direct && vmemmap_unuse_sub_pmd(addr, next)) {
+ vmem_free_pages(pmd_deref(*pmd), get_order(PMD_SIZE));
+ pmd_clear(pmd);
+ }
+ continue;
+ }
+ } else if (pmd_none(*pmd)) {
+ if (IS_ALIGNED(addr, PMD_SIZE) &&
+ IS_ALIGNED(next, PMD_SIZE) &&
+ MACHINE_HAS_EDAT1 && addr && direct &&
+ !debug_pagealloc_enabled()) {
+ pmd_val(*pmd) = addr | prot;
+ pages++;
+ continue;
+ } else if (!direct && MACHINE_HAS_EDAT1) {
+ void *new_page;
- pu_dir = pud_offset(p4_dir, address);
- if (pud_none(*pu_dir)) {
- pm_dir = vmem_crst_alloc(_SEGMENT_ENTRY_EMPTY);
- if (!pm_dir)
+ /*
+ * Use 1MB frames for vmemmap if available. We
+ * always use large frames even if they are only
+ * partially used. Otherwise we would have also
+ * page tables since vmemmap_populate gets
+ * called for each section separately.
+ */
+ new_page = vmemmap_alloc_block(PMD_SIZE, NUMA_NO_NODE);
+ if (new_page) {
+ pmd_val(*pmd) = __pa(new_page) | prot;
+ if (!IS_ALIGNED(addr, PMD_SIZE) ||
+ !IS_ALIGNED(next, PMD_SIZE)) {
+ vmemmap_use_new_sub_pmd(addr, next);
+ }
+ continue;
+ }
+ }
+ pte = vmem_pte_alloc();
+ if (!pte)
goto out;
- pud_populate(&init_mm, pu_dir, pm_dir);
+ pmd_populate(&init_mm, pmd, pte);
+ } else if (pmd_large(*pmd)) {
+ if (!direct)
+ vmemmap_use_sub_pmd(addr, next);
+ continue;
}
+ ret = modify_pte_table(pmd, addr, next, add, direct);
+ if (ret)
+ goto out;
+ if (!add)
+ try_free_pte_table(pmd, addr & PMD_MASK);
+ }
+ ret = 0;
+out:
+ if (direct)
+ update_page_count(PG_DIRECT_MAP_1M, add ? pages : -pages);
+ return ret;
+}
- pm_dir = pmd_offset(pu_dir, address);
- if (pmd_none(*pm_dir)) {
- /* Use 1MB frames for vmemmap if available. We always
- * use large frames even if they are only partially
- * used.
- * Otherwise we would have also page tables since
- * vmemmap_populate gets called for each section
- * separately. */
- if (MACHINE_HAS_EDAT1) {
- void *new_page;
+static void try_free_pmd_table(pud_t *pud, unsigned long start)
+{
+ const unsigned long end = start + PUD_SIZE;
+ pmd_t *pmd;
+ int i;
+
+ /* Don't mess with any tables not fully in 1:1 mapping & vmemmap area */
+ if (end > VMALLOC_START)
+ return;
+#ifdef CONFIG_KASAN
+ if (start < KASAN_SHADOW_END && KASAN_SHADOW_START > end)
+ return;
+#endif
+ pmd = pmd_offset(pud, start);
+ for (i = 0; i < PTRS_PER_PMD; i++, pmd++)
+ if (!pmd_none(*pmd))
+ return;
+ vmem_free_pages(pud_deref(*pud), CRST_ALLOC_ORDER);
+ pud_clear(pud);
+}
- new_page = vmemmap_alloc_block(PMD_SIZE, node);
- if (!new_page)
- goto out;
- pmd_val(*pm_dir) = __pa(new_page) | sgt_prot;
- address = (address + PMD_SIZE) & PMD_MASK;
+static int modify_pud_table(p4d_t *p4d, unsigned long addr, unsigned long end,
+ bool add, bool direct)
+{
+ unsigned long next, prot, pages = 0;
+ int ret = -ENOMEM;
+ pud_t *pud;
+ pmd_t *pmd;
+
+ prot = pgprot_val(REGION3_KERNEL);
+ if (!MACHINE_HAS_NX)
+ prot &= ~_REGION_ENTRY_NOEXEC;
+ pud = pud_offset(p4d, addr);
+ for (; addr < end; addr = next, pud++) {
+ next = pud_addr_end(addr, end);
+ if (!add) {
+ if (pud_none(*pud))
+ continue;
+ if (pud_large(*pud)) {
+ if (IS_ALIGNED(addr, PUD_SIZE) &&
+ IS_ALIGNED(next, PUD_SIZE)) {
+ pud_clear(pud);
+ pages++;
+ }
+ continue;
+ }
+ } else if (pud_none(*pud)) {
+ if (IS_ALIGNED(addr, PUD_SIZE) &&
+ IS_ALIGNED(next, PUD_SIZE) &&
+ MACHINE_HAS_EDAT2 && addr && direct &&
+ !debug_pagealloc_enabled()) {
+ pud_val(*pud) = addr | prot;
+ pages++;
continue;
}
- pt_dir = vmem_pte_alloc();
- if (!pt_dir)
+ pmd = vmem_crst_alloc(_SEGMENT_ENTRY_EMPTY);
+ if (!pmd)
goto out;
- pmd_populate(&init_mm, pm_dir, pt_dir);
- } else if (pmd_large(*pm_dir)) {
- address = (address + PMD_SIZE) & PMD_MASK;
+ pud_populate(&init_mm, pud, pmd);
+ } else if (pud_large(*pud)) {
continue;
}
+ ret = modify_pmd_table(pud, addr, next, add, direct);
+ if (ret)
+ goto out;
+ if (!add)
+ try_free_pmd_table(pud, addr & PUD_MASK);
+ }
+ ret = 0;
+out:
+ if (direct)
+ update_page_count(PG_DIRECT_MAP_2G, add ? pages : -pages);
+ return ret;
+}
- pt_dir = pte_offset_kernel(pm_dir, address);
- if (pte_none(*pt_dir)) {
- void *new_page;
+static void try_free_pud_table(p4d_t *p4d, unsigned long start)
+{
+ const unsigned long end = start + P4D_SIZE;
+ pud_t *pud;
+ int i;
+
+ /* Don't mess with any tables not fully in 1:1 mapping & vmemmap area */
+ if (end > VMALLOC_START)
+ return;
+#ifdef CONFIG_KASAN
+ if (start < KASAN_SHADOW_END && KASAN_SHADOW_START > end)
+ return;
+#endif
+
+ pud = pud_offset(p4d, start);
+ for (i = 0; i < PTRS_PER_PUD; i++, pud++) {
+ if (!pud_none(*pud))
+ return;
+ }
+ vmem_free_pages(p4d_deref(*p4d), CRST_ALLOC_ORDER);
+ p4d_clear(p4d);
+}
- new_page = vmemmap_alloc_block(PAGE_SIZE, node);
- if (!new_page)
+static int modify_p4d_table(pgd_t *pgd, unsigned long addr, unsigned long end,
+ bool add, bool direct)
+{
+ unsigned long next;
+ int ret = -ENOMEM;
+ p4d_t *p4d;
+ pud_t *pud;
+
+ p4d = p4d_offset(pgd, addr);
+ for (; addr < end; addr = next, p4d++) {
+ next = p4d_addr_end(addr, end);
+ if (!add) {
+ if (p4d_none(*p4d))
+ continue;
+ } else if (p4d_none(*p4d)) {
+ pud = vmem_crst_alloc(_REGION3_ENTRY_EMPTY);
+ if (!pud)
goto out;
- pte_val(*pt_dir) = __pa(new_page) | pgt_prot;
+ p4d_populate(&init_mm, p4d, pud);
}
- address += PAGE_SIZE;
+ ret = modify_pud_table(p4d, addr, next, add, direct);
+ if (ret)
+ goto out;
+ if (!add)
+ try_free_pud_table(p4d, addr & P4D_MASK);
}
ret = 0;
out:
return ret;
}
-void vmemmap_free(unsigned long start, unsigned long end,
- struct vmem_altmap *altmap)
+static void try_free_p4d_table(pgd_t *pgd, unsigned long start)
{
+ const unsigned long end = start + PGDIR_SIZE;
+ p4d_t *p4d;
+ int i;
+
+ /* Don't mess with any tables not fully in 1:1 mapping & vmemmap area */
+ if (end > VMALLOC_START)
+ return;
+#ifdef CONFIG_KASAN
+ if (start < KASAN_SHADOW_END && KASAN_SHADOW_START > end)
+ return;
+#endif
+
+ p4d = p4d_offset(pgd, start);
+ for (i = 0; i < PTRS_PER_P4D; i++, p4d++) {
+ if (!p4d_none(*p4d))
+ return;
+ }
+ vmem_free_pages(pgd_deref(*pgd), CRST_ALLOC_ORDER);
+ pgd_clear(pgd);
}
-/*
- * Add memory segment to the segment list if it doesn't overlap with
- * an already present segment.
- */
-static int insert_memory_segment(struct memory_segment *seg)
+static int modify_pagetable(unsigned long start, unsigned long end, bool add,
+ bool direct)
{
- struct memory_segment *tmp;
+ unsigned long addr, next;
+ int ret = -ENOMEM;
+ pgd_t *pgd;
+ p4d_t *p4d;
- if (seg->start + seg->size > VMEM_MAX_PHYS ||
- seg->start + seg->size < seg->start)
- return -ERANGE;
+ if (WARN_ON_ONCE(!PAGE_ALIGNED(start | end)))
+ return -EINVAL;
+ for (addr = start; addr < end; addr = next) {
+ next = pgd_addr_end(addr, end);
+ pgd = pgd_offset_k(addr);
- list_for_each_entry(tmp, &mem_segs, list) {
- if (seg->start >= tmp->start + tmp->size)
- continue;
- if (seg->start + seg->size <= tmp->start)
- continue;
- return -ENOSPC;
+ if (!add) {
+ if (pgd_none(*pgd))
+ continue;
+ } else if (pgd_none(*pgd)) {
+ p4d = vmem_crst_alloc(_REGION2_ENTRY_EMPTY);
+ if (!p4d)
+ goto out;
+ pgd_populate(&init_mm, pgd, p4d);
+ }
+ ret = modify_p4d_table(pgd, addr, next, add, direct);
+ if (ret)
+ goto out;
+ if (!add)
+ try_free_p4d_table(pgd, addr & PGDIR_MASK);
}
- list_add(&seg->list, &mem_segs);
- return 0;
+ ret = 0;
+out:
+ if (!add)
+ flush_tlb_kernel_range(start, end);
+ return ret;
+}
+
+static int add_pagetable(unsigned long start, unsigned long end, bool direct)
+{
+ return modify_pagetable(start, end, true, direct);
+}
+
+static int remove_pagetable(unsigned long start, unsigned long end, bool direct)
+{
+ return modify_pagetable(start, end, false, direct);
}
/*
- * Remove memory segment from the segment list.
+ * Add a physical memory range to the 1:1 mapping.
*/
-static void remove_memory_segment(struct memory_segment *seg)
+static int vmem_add_range(unsigned long start, unsigned long size)
{
- list_del(&seg->list);
+ return add_pagetable(start, start + size, true);
}
-static void __remove_shared_memory(struct memory_segment *seg)
+/*
+ * Remove a physical memory range from the 1:1 mapping.
+ */
+static void vmem_remove_range(unsigned long start, unsigned long size)
{
- remove_memory_segment(seg);
- vmem_remove_range(seg->start, seg->size);
+ remove_pagetable(start, start + size, true);
}
-int vmem_remove_mapping(unsigned long start, unsigned long size)
+/*
+ * Add a backed mem_map array to the virtual mem_map array.
+ */
+int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node,
+ struct vmem_altmap *altmap)
{
- struct memory_segment *seg;
int ret;
mutex_lock(&vmem_mutex);
+ /* We don't care about the node, just use NUMA_NO_NODE on allocations */
+ ret = add_pagetable(start, end, false);
+ if (ret)
+ remove_pagetable(start, end, false);
+ mutex_unlock(&vmem_mutex);
+ return ret;
+}
- ret = -ENOENT;
- list_for_each_entry(seg, &mem_segs, list) {
- if (seg->start == start && seg->size == size)
- break;
- }
-
- if (seg->start != start || seg->size != size)
- goto out;
+void vmemmap_free(unsigned long start, unsigned long end,
+ struct vmem_altmap *altmap)
+{
+ mutex_lock(&vmem_mutex);
+ remove_pagetable(start, end, false);
+ mutex_unlock(&vmem_mutex);
+}
- ret = 0;
- __remove_shared_memory(seg);
- kfree(seg);
-out:
+void vmem_remove_mapping(unsigned long start, unsigned long size)
+{
+ mutex_lock(&vmem_mutex);
+ vmem_remove_range(start, size);
mutex_unlock(&vmem_mutex);
- return ret;
}
int vmem_add_mapping(unsigned long start, unsigned long size)
{
- struct memory_segment *seg;
int ret;
- mutex_lock(&vmem_mutex);
- ret = -ENOMEM;
- seg = kzalloc(sizeof(*seg), GFP_KERNEL);
- if (!seg)
- goto out;
- seg->start = start;
- seg->size = size;
-
- ret = insert_memory_segment(seg);
- if (ret)
- goto out_free;
+ if (start + size > VMEM_MAX_PHYS ||
+ start + size < start)
+ return -ERANGE;
- ret = vmem_add_mem(start, size);
+ mutex_lock(&vmem_mutex);
+ ret = vmem_add_range(start, size);
if (ret)
- goto out_remove;
- goto out;
-
-out_remove:
- __remove_shared_memory(seg);
-out_free:
- kfree(seg);
-out:
+ vmem_remove_range(start, size);
mutex_unlock(&vmem_mutex);
return ret;
}
@@ -403,7 +558,7 @@ void __init vmem_map_init(void)
struct memblock_region *reg;
for_each_memblock(memory, reg)
- vmem_add_mem(reg->base, reg->size);
+ vmem_add_range(reg->base, reg->size);
__set_memory((unsigned long)_stext,
(unsigned long)(_etext - _stext) >> PAGE_SHIFT,
SET_MEMORY_RO | SET_MEMORY_X);
@@ -422,27 +577,3 @@ void __init vmem_map_init(void)
pr_info("Write protected kernel read-only data: %luk\n",
(unsigned long)(__end_rodata - _stext) >> 10);
}
-
-/*
- * Convert memblock.memory to a memory segment list so there is a single
- * list that contains all memory segments.
- */
-static int __init vmem_convert_memory_chunk(void)
-{
- struct memblock_region *reg;
- struct memory_segment *seg;
-
- mutex_lock(&vmem_mutex);
- for_each_memblock(memory, reg) {
- seg = kzalloc(sizeof(*seg), GFP_KERNEL);
- if (!seg)
- panic("Out of memory...\n");
- seg->start = reg->base;
- seg->size = reg->size;
- insert_memory_segment(seg);
- }
- mutex_unlock(&vmem_mutex);
- return 0;
-}
-
-core_initcall(vmem_convert_memory_chunk);
diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c
index 8d2134136290..be4b8532dd3c 100644
--- a/arch/s390/net/bpf_jit_comp.c
+++ b/arch/s390/net/bpf_jit_comp.c
@@ -49,6 +49,7 @@ struct bpf_jit {
int r1_thunk_ip; /* Address of expoline thunk for 'br %r1' */
int r14_thunk_ip; /* Address of expoline thunk for 'br %r14' */
int tail_call_start; /* Tail call start offset */
+ int excnt; /* Number of exception table entries */
int labels[1]; /* Labels for local jumps */
};
@@ -489,6 +490,24 @@ static void save_restore_regs(struct bpf_jit *jit, int op, u32 stack_depth)
} while (re <= last);
}
+static void bpf_skip(struct bpf_jit *jit, int size)
+{
+ if (size >= 6 && !is_valid_rel(size)) {
+ /* brcl 0xf,size */
+ EMIT6_PCREL_RIL(0xc0f4000000, size);
+ size -= 6;
+ } else if (size >= 4 && is_valid_rel(size)) {
+ /* brc 0xf,size */
+ EMIT4_PCREL(0xa7f40000, size);
+ size -= 4;
+ }
+ while (size >= 2) {
+ /* bcr 0,%0 */
+ _EMIT2(0x0700);
+ size -= 2;
+ }
+}
+
/*
* Emit function prologue
*
@@ -501,9 +520,11 @@ static void bpf_jit_prologue(struct bpf_jit *jit, u32 stack_depth)
/* xc STK_OFF_TCCNT(4,%r15),STK_OFF_TCCNT(%r15) */
_EMIT6(0xd703f000 | STK_OFF_TCCNT, 0xf000 | STK_OFF_TCCNT);
} else {
- /* j tail_call_start: NOP if no tail calls are used */
- EMIT4_PCREL(0xa7f40000, 6);
- _EMIT2(0);
+ /*
+ * There are no tail calls. Insert nops in order to have
+ * tail_call_start at a predictable offset.
+ */
+ bpf_skip(jit, 6);
}
/* Tail calls have to skip above initialization */
jit->tail_call_start = jit->prg;
@@ -587,6 +608,84 @@ static void bpf_jit_epilogue(struct bpf_jit *jit, u32 stack_depth)
}
}
+static int get_probe_mem_regno(const u8 *insn)
+{
+ /*
+ * insn must point to llgc, llgh, llgf or lg, which have destination
+ * register at the same position.
+ */
+ if (insn[0] != 0xe3) /* common llgc, llgh, llgf and lg prefix */
+ return -1;
+ if (insn[5] != 0x90 && /* llgc */
+ insn[5] != 0x91 && /* llgh */
+ insn[5] != 0x16 && /* llgf */
+ insn[5] != 0x04) /* lg */
+ return -1;
+ return insn[1] >> 4;
+}
+
+static bool ex_handler_bpf(const struct exception_table_entry *x,
+ struct pt_regs *regs)
+{
+ int regno;
+ u8 *insn;
+
+ regs->psw.addr = extable_fixup(x);
+ insn = (u8 *)__rewind_psw(regs->psw, regs->int_code >> 16);
+ regno = get_probe_mem_regno(insn);
+ if (WARN_ON_ONCE(regno < 0))
+ /* JIT bug - unexpected instruction. */
+ return false;
+ regs->gprs[regno] = 0;
+ return true;
+}
+
+static int bpf_jit_probe_mem(struct bpf_jit *jit, struct bpf_prog *fp,
+ int probe_prg, int nop_prg)
+{
+ struct exception_table_entry *ex;
+ s64 delta;
+ u8 *insn;
+ int prg;
+ int i;
+
+ if (!fp->aux->extable)
+ /* Do nothing during early JIT passes. */
+ return 0;
+ insn = jit->prg_buf + probe_prg;
+ if (WARN_ON_ONCE(get_probe_mem_regno(insn) < 0))
+ /* JIT bug - unexpected probe instruction. */
+ return -1;
+ if (WARN_ON_ONCE(probe_prg + insn_length(*insn) != nop_prg))
+ /* JIT bug - gap between probe and nop instructions. */
+ return -1;
+ for (i = 0; i < 2; i++) {
+ if (WARN_ON_ONCE(jit->excnt >= fp->aux->num_exentries))
+ /* Verifier bug - not enough entries. */
+ return -1;
+ ex = &fp->aux->extable[jit->excnt];
+ /* Add extable entries for probe and nop instructions. */
+ prg = i == 0 ? probe_prg : nop_prg;
+ delta = jit->prg_buf + prg - (u8 *)&ex->insn;
+ if (WARN_ON_ONCE(delta < INT_MIN || delta > INT_MAX))
+ /* JIT bug - code and extable must be close. */
+ return -1;
+ ex->insn = delta;
+ /*
+ * Always land on the nop. Note that extable infrastructure
+ * ignores fixup field, it is handled by ex_handler_bpf().
+ */
+ delta = jit->prg_buf + nop_prg - (u8 *)&ex->fixup;
+ if (WARN_ON_ONCE(delta < INT_MIN || delta > INT_MAX))
+ /* JIT bug - landing pad and extable must be close. */
+ return -1;
+ ex->fixup = delta;
+ ex->handler = (u8 *)ex_handler_bpf - (u8 *)&ex->handler;
+ jit->excnt++;
+ }
+ return 0;
+}
+
/*
* Compile one eBPF instruction into s390x code
*
@@ -594,7 +693,7 @@ static void bpf_jit_epilogue(struct bpf_jit *jit, u32 stack_depth)
* stack space for the large switch statement.
*/
static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp,
- int i, bool extra_pass)
+ int i, bool extra_pass, u32 stack_depth)
{
struct bpf_insn *insn = &fp->insnsi[i];
u32 dst_reg = insn->dst_reg;
@@ -603,7 +702,14 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp,
u32 *addrs = jit->addrs;
s32 imm = insn->imm;
s16 off = insn->off;
+ int probe_prg = -1;
unsigned int mask;
+ int nop_prg;
+ int err;
+
+ if (BPF_CLASS(insn->code) == BPF_LDX &&
+ BPF_MODE(insn->code) == BPF_PROBE_MEM)
+ probe_prg = jit->prg;
switch (insn->code) {
/*
@@ -1118,6 +1224,7 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp,
* BPF_LDX
*/
case BPF_LDX | BPF_MEM | BPF_B: /* dst = *(u8 *)(ul) (src + off) */
+ case BPF_LDX | BPF_PROBE_MEM | BPF_B:
/* llgc %dst,0(off,%src) */
EMIT6_DISP_LH(0xe3000000, 0x0090, dst_reg, src_reg, REG_0, off);
jit->seen |= SEEN_MEM;
@@ -1125,6 +1232,7 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp,
insn_count = 2;
break;
case BPF_LDX | BPF_MEM | BPF_H: /* dst = *(u16 *)(ul) (src + off) */
+ case BPF_LDX | BPF_PROBE_MEM | BPF_H:
/* llgh %dst,0(off,%src) */
EMIT6_DISP_LH(0xe3000000, 0x0091, dst_reg, src_reg, REG_0, off);
jit->seen |= SEEN_MEM;
@@ -1132,6 +1240,7 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp,
insn_count = 2;
break;
case BPF_LDX | BPF_MEM | BPF_W: /* dst = *(u32 *)(ul) (src + off) */
+ case BPF_LDX | BPF_PROBE_MEM | BPF_W:
/* llgf %dst,off(%src) */
jit->seen |= SEEN_MEM;
EMIT6_DISP_LH(0xe3000000, 0x0016, dst_reg, src_reg, REG_0, off);
@@ -1139,6 +1248,7 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp,
insn_count = 2;
break;
case BPF_LDX | BPF_MEM | BPF_DW: /* dst = *(u64 *)(ul) (src + off) */
+ case BPF_LDX | BPF_PROBE_MEM | BPF_DW:
/* lg %dst,0(off,%src) */
jit->seen |= SEEN_MEM;
EMIT6_DISP_LH(0xe3000000, 0x0004, dst_reg, src_reg, REG_0, off);
@@ -1207,7 +1317,7 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp,
*/
if (jit->seen & SEEN_STACK)
- off = STK_OFF_TCCNT + STK_OFF + fp->aux->stack_depth;
+ off = STK_OFF_TCCNT + STK_OFF + stack_depth;
else
off = STK_OFF_TCCNT;
/* lhi %w0,1 */
@@ -1249,7 +1359,7 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp,
/*
* Restore registers before calling function
*/
- save_restore_regs(jit, REGS_RESTORE, fp->aux->stack_depth);
+ save_restore_regs(jit, REGS_RESTORE, stack_depth);
/*
* goto *(prog->bpf_func + tail_call_start);
@@ -1267,8 +1377,12 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp,
last = (i == fp->len - 1) ? 1 : 0;
if (last)
break;
- /* j <exit> */
- EMIT4_PCREL(0xa7f40000, jit->exit_ip - jit->prg);
+ if (!is_first_pass(jit) && can_use_rel(jit, jit->exit_ip))
+ /* brc 0xf, <exit> */
+ EMIT4_PCREL_RIC(0xa7040000, 0xf, jit->exit_ip);
+ else
+ /* brcl 0xf, <exit> */
+ EMIT6_PCREL_RILC(0xc0040000, 0xf, jit->exit_ip);
break;
/*
* Branch relative (number of skipped instructions) to offset on
@@ -1416,21 +1530,10 @@ branch_ks:
}
break;
branch_ku:
- is_jmp32 = BPF_CLASS(insn->code) == BPF_JMP32;
- /* clfi or clgfi %dst,imm */
- EMIT6_IMM(is_jmp32 ? 0xc20f0000 : 0xc20e0000,
- dst_reg, imm);
- if (!is_first_pass(jit) &&
- can_use_rel(jit, addrs[i + off + 1])) {
- /* brc mask,off */
- EMIT4_PCREL_RIC(0xa7040000,
- mask >> 12, addrs[i + off + 1]);
- } else {
- /* brcl mask,off */
- EMIT6_PCREL_RILC(0xc0040000,
- mask >> 12, addrs[i + off + 1]);
- }
- break;
+ /* lgfi %w1,imm (load sign extend imm) */
+ src_reg = REG_1;
+ EMIT6_IMM(0xc0010000, src_reg, imm);
+ goto branch_xu;
branch_xs:
is_jmp32 = BPF_CLASS(insn->code) == BPF_JMP32;
if (!is_first_pass(jit) &&
@@ -1484,6 +1587,23 @@ branch_oc:
pr_err("Unknown opcode %02x\n", insn->code);
return -1;
}
+
+ if (probe_prg != -1) {
+ /*
+ * Handlers of certain exceptions leave psw.addr pointing to
+ * the instruction directly after the failing one. Therefore,
+ * create two exception table entries and also add a nop in
+ * case two probing instructions come directly after each
+ * other.
+ */
+ nop_prg = jit->prg;
+ /* bcr 0,%0 */
+ _EMIT2(0x0700);
+ err = bpf_jit_probe_mem(jit, fp, probe_prg, nop_prg);
+ if (err < 0)
+ return err;
+ }
+
return insn_count;
}
@@ -1509,7 +1629,14 @@ static bool bpf_is_new_addr_sane(struct bpf_jit *jit, int i)
*/
static int bpf_set_addr(struct bpf_jit *jit, int i)
{
- if (!bpf_is_new_addr_sane(jit, i))
+ int delta;
+
+ if (is_codegen_pass(jit)) {
+ delta = jit->prg - jit->addrs[i];
+ if (delta < 0)
+ bpf_skip(jit, -delta);
+ }
+ if (WARN_ON_ONCE(!bpf_is_new_addr_sane(jit, i)))
return -1;
jit->addrs[i] = jit->prg;
return 0;
@@ -1519,26 +1646,27 @@ static int bpf_set_addr(struct bpf_jit *jit, int i)
* Compile eBPF program into s390x code
*/
static int bpf_jit_prog(struct bpf_jit *jit, struct bpf_prog *fp,
- bool extra_pass)
+ bool extra_pass, u32 stack_depth)
{
int i, insn_count, lit32_size, lit64_size;
jit->lit32 = jit->lit32_start;
jit->lit64 = jit->lit64_start;
jit->prg = 0;
+ jit->excnt = 0;
- bpf_jit_prologue(jit, fp->aux->stack_depth);
+ bpf_jit_prologue(jit, stack_depth);
if (bpf_set_addr(jit, 0) < 0)
return -1;
for (i = 0; i < fp->len; i += insn_count) {
- insn_count = bpf_jit_insn(jit, fp, i, extra_pass);
+ insn_count = bpf_jit_insn(jit, fp, i, extra_pass, stack_depth);
if (insn_count < 0)
return -1;
/* Next instruction address */
if (bpf_set_addr(jit, i + insn_count) < 0)
return -1;
}
- bpf_jit_epilogue(jit, fp->aux->stack_depth);
+ bpf_jit_epilogue(jit, stack_depth);
lit32_size = jit->lit32 - jit->lit32_start;
lit64_size = jit->lit64 - jit->lit64_start;
@@ -1550,6 +1678,12 @@ static int bpf_jit_prog(struct bpf_jit *jit, struct bpf_prog *fp,
jit->lit64_start = ALIGN(jit->lit64_start, 8);
jit->size = jit->lit64_start + lit64_size;
jit->size_prg = jit->prg;
+
+ if (WARN_ON_ONCE(fp->aux->extable &&
+ jit->excnt != fp->aux->num_exentries))
+ /* Verifier bug - too many entries. */
+ return -1;
+
return 0;
}
@@ -1564,11 +1698,35 @@ struct s390_jit_data {
int pass;
};
+static struct bpf_binary_header *bpf_jit_alloc(struct bpf_jit *jit,
+ struct bpf_prog *fp)
+{
+ struct bpf_binary_header *header;
+ u32 extable_size;
+ u32 code_size;
+
+ /* We need two entries per insn. */
+ fp->aux->num_exentries *= 2;
+
+ code_size = roundup(jit->size,
+ __alignof__(struct exception_table_entry));
+ extable_size = fp->aux->num_exentries *
+ sizeof(struct exception_table_entry);
+ header = bpf_jit_binary_alloc(code_size + extable_size, &jit->prg_buf,
+ 8, jit_fill_hole);
+ if (!header)
+ return NULL;
+ fp->aux->extable = (struct exception_table_entry *)
+ (jit->prg_buf + code_size);
+ return header;
+}
+
/*
* Compile eBPF program "fp"
*/
struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
{
+ u32 stack_depth = round_up(fp->aux->stack_depth, 8);
struct bpf_prog *tmp, *orig_fp = fp;
struct bpf_binary_header *header;
struct s390_jit_data *jit_data;
@@ -1621,7 +1779,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
* - 3: Calculate program size and addrs arrray
*/
for (pass = 1; pass <= 3; pass++) {
- if (bpf_jit_prog(&jit, fp, extra_pass)) {
+ if (bpf_jit_prog(&jit, fp, extra_pass, stack_depth)) {
fp = orig_fp;
goto free_addrs;
}
@@ -1629,13 +1787,13 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
/*
* Final pass: Allocate and generate program
*/
- header = bpf_jit_binary_alloc(jit.size, &jit.prg_buf, 8, jit_fill_hole);
+ header = bpf_jit_alloc(&jit, fp);
if (!header) {
fp = orig_fp;
goto free_addrs;
}
skip_init_ctx:
- if (bpf_jit_prog(&jit, fp, extra_pass)) {
+ if (bpf_jit_prog(&jit, fp, extra_pass, stack_depth)) {
bpf_jit_binary_free(header);
fp = orig_fp;
goto free_addrs;
diff --git a/arch/s390/numa/Makefile b/arch/s390/numa/Makefile
deleted file mode 100644
index c89d26f4f77d..000000000000
--- a/arch/s390/numa/Makefile
+++ /dev/null
@@ -1,2 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0
-obj-y += numa.o
diff --git a/arch/s390/pci/Makefile b/arch/s390/pci/Makefile
index 748626a33028..b4e3c84772a1 100644
--- a/arch/s390/pci/Makefile
+++ b/arch/s390/pci/Makefile
@@ -4,4 +4,5 @@
#
obj-$(CONFIG_PCI) += pci.o pci_irq.o pci_dma.o pci_clp.o pci_sysfs.o \
- pci_event.o pci_debug.o pci_insn.o pci_mmio.o
+ pci_event.o pci_debug.o pci_insn.o pci_mmio.o \
+ pci_bus.o
diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c
index 94ca121933de..1804230dd8d8 100644
--- a/arch/s390/pci/pci.c
+++ b/arch/s390/pci/pci.c
@@ -36,18 +36,21 @@
#include <asm/pci_clp.h>
#include <asm/pci_dma.h>
+#include "pci_bus.h"
+
/* list of all detected zpci devices */
static LIST_HEAD(zpci_list);
static DEFINE_SPINLOCK(zpci_list_lock);
static DECLARE_BITMAP(zpci_domain, ZPCI_DOMAIN_BITMAP_SIZE);
static DEFINE_SPINLOCK(zpci_domain_lock);
-static unsigned int zpci_num_domains_allocated;
#define ZPCI_IOMAP_ENTRIES \
min(((unsigned long) ZPCI_NR_DEVICES * PCI_STD_NUM_BARS / 2), \
ZPCI_IOMAP_MAX_ENTRIES)
+unsigned int s390_pci_no_rid;
+
static DEFINE_SPINLOCK(zpci_iomap_lock);
static unsigned long *zpci_iomap_bitmap;
struct zpci_iomap_entry *zpci_iomap_start;
@@ -88,17 +91,12 @@ void zpci_remove_reserved_devices(void)
spin_unlock(&zpci_list_lock);
list_for_each_entry_safe(zdev, tmp, &remove, entry)
- zpci_remove_device(zdev);
-}
-
-static struct zpci_dev *get_zdev_by_bus(struct pci_bus *bus)
-{
- return (bus && bus->sysdata) ? (struct zpci_dev *) bus->sysdata : NULL;
+ zpci_zdev_put(zdev);
}
int pci_domain_nr(struct pci_bus *bus)
{
- return ((struct zpci_dev *) bus->sysdata)->domain;
+ return ((struct zpci_bus *) bus->sysdata)->domain_nr;
}
EXPORT_SYMBOL_GPL(pci_domain_nr);
@@ -228,28 +226,29 @@ void __iowrite64_copy(void __iomem *to, const void *from, size_t count)
zpci_memcpy_toio(to, from, count);
}
-void __iomem *ioremap(unsigned long ioaddr, unsigned long size)
+void __iomem *ioremap(phys_addr_t addr, size_t size)
{
+ unsigned long offset, vaddr;
struct vm_struct *area;
- unsigned long offset;
+ phys_addr_t last_addr;
- if (!size)
+ last_addr = addr + size - 1;
+ if (!size || last_addr < addr)
return NULL;
if (!static_branch_unlikely(&have_mio))
- return (void __iomem *) ioaddr;
+ return (void __iomem *) addr;
- offset = ioaddr & ~PAGE_MASK;
- ioaddr &= PAGE_MASK;
+ offset = addr & ~PAGE_MASK;
+ addr &= PAGE_MASK;
size = PAGE_ALIGN(size + offset);
area = get_vm_area(size, VM_IOREMAP);
if (!area)
return NULL;
- if (ioremap_page_range((unsigned long) area->addr,
- (unsigned long) area->addr + size,
- ioaddr, PAGE_KERNEL)) {
- vunmap(area->addr);
+ vaddr = (unsigned long) area->addr;
+ if (ioremap_page_range(vaddr, vaddr + size, addr, PAGE_KERNEL)) {
+ free_vm_area(area);
return NULL;
}
return (void __iomem *) ((unsigned long) area->addr + offset);
@@ -373,29 +372,17 @@ EXPORT_SYMBOL(pci_iounmap);
static int pci_read(struct pci_bus *bus, unsigned int devfn, int where,
int size, u32 *val)
{
- struct zpci_dev *zdev = get_zdev_by_bus(bus);
- int ret;
+ struct zpci_dev *zdev = get_zdev_by_bus(bus, devfn);
- if (!zdev || devfn != ZPCI_DEVFN)
- ret = -ENODEV;
- else
- ret = zpci_cfg_load(zdev, where, val, size);
-
- return ret;
+ return (zdev) ? zpci_cfg_load(zdev, where, val, size) : -ENODEV;
}
static int pci_write(struct pci_bus *bus, unsigned int devfn, int where,
int size, u32 val)
{
- struct zpci_dev *zdev = get_zdev_by_bus(bus);
- int ret;
+ struct zpci_dev *zdev = get_zdev_by_bus(bus, devfn);
- if (!zdev || devfn != ZPCI_DEVFN)
- ret = -ENODEV;
- else
- ret = zpci_cfg_store(zdev, where, val, size);
-
- return ret;
+ return (zdev) ? zpci_cfg_store(zdev, where, val, size) : -ENODEV;
}
static struct pci_ops pci_root_ops = {
@@ -506,15 +493,15 @@ static struct resource *__alloc_res(struct zpci_dev *zdev, unsigned long start,
return r;
}
-static int zpci_setup_bus_resources(struct zpci_dev *zdev,
- struct list_head *resources)
+int zpci_setup_bus_resources(struct zpci_dev *zdev,
+ struct list_head *resources)
{
unsigned long addr, size, flags;
struct resource *res;
int i, entry;
snprintf(zdev->res_name, sizeof(zdev->res_name),
- "PCI Bus %04x:%02x", zdev->domain, ZPCI_BUS_NR);
+ "PCI Bus %04x:%02x", zdev->uid, ZPCI_BUS_NR);
for (i = 0; i < PCI_STD_NUM_BARS; i++) {
if (!zdev->bars[i].size)
@@ -608,98 +595,53 @@ void pcibios_disable_device(struct pci_dev *pdev)
zpci_debug_exit_device(zdev);
}
-static int zpci_alloc_domain(struct zpci_dev *zdev)
+static int __zpci_register_domain(int domain)
{
spin_lock(&zpci_domain_lock);
- if (zpci_num_domains_allocated > (ZPCI_NR_DEVICES - 1)) {
+ if (test_bit(domain, zpci_domain)) {
spin_unlock(&zpci_domain_lock);
- pr_err("Adding PCI function %08x failed because the configured limit of %d is reached\n",
- zdev->fid, ZPCI_NR_DEVICES);
- return -ENOSPC;
+ pr_err("Domain %04x is already assigned\n", domain);
+ return -EEXIST;
}
+ set_bit(domain, zpci_domain);
+ spin_unlock(&zpci_domain_lock);
+ return domain;
+}
- if (zpci_unique_uid) {
- zdev->domain = (u16) zdev->uid;
- if (zdev->domain == 0) {
- pr_warn("UID checking is active but no UID is set for PCI function %08x, so automatic domain allocation is used instead\n",
- zdev->fid);
- update_uid_checking(false);
- goto auto_allocate;
- }
+static int __zpci_alloc_domain(void)
+{
+ int domain;
- if (test_bit(zdev->domain, zpci_domain)) {
- spin_unlock(&zpci_domain_lock);
- pr_err("Adding PCI function %08x failed because domain %04x is already assigned\n",
- zdev->fid, zdev->domain);
- return -EEXIST;
- }
- set_bit(zdev->domain, zpci_domain);
- zpci_num_domains_allocated++;
- spin_unlock(&zpci_domain_lock);
- return 0;
- }
-auto_allocate:
+ spin_lock(&zpci_domain_lock);
/*
* We can always auto allocate domains below ZPCI_NR_DEVICES.
* There is either a free domain or we have reached the maximum in
* which case we would have bailed earlier.
*/
- zdev->domain = find_first_zero_bit(zpci_domain, ZPCI_NR_DEVICES);
- set_bit(zdev->domain, zpci_domain);
- zpci_num_domains_allocated++;
+ domain = find_first_zero_bit(zpci_domain, ZPCI_NR_DEVICES);
+ set_bit(domain, zpci_domain);
spin_unlock(&zpci_domain_lock);
- return 0;
+ return domain;
}
-static void zpci_free_domain(struct zpci_dev *zdev)
+int zpci_alloc_domain(int domain)
{
- spin_lock(&zpci_domain_lock);
- clear_bit(zdev->domain, zpci_domain);
- zpci_num_domains_allocated--;
- spin_unlock(&zpci_domain_lock);
+ if (zpci_unique_uid) {
+ if (domain)
+ return __zpci_register_domain(domain);
+ pr_warn("UID checking was active but no UID is provided: switching to automatic domain allocation\n");
+ update_uid_checking(false);
+ }
+ return __zpci_alloc_domain();
}
-void pcibios_remove_bus(struct pci_bus *bus)
+void zpci_free_domain(int domain)
{
- struct zpci_dev *zdev = get_zdev_by_bus(bus);
-
- zpci_exit_slot(zdev);
- zpci_cleanup_bus_resources(zdev);
- zpci_destroy_iommu(zdev);
- zpci_free_domain(zdev);
-
- spin_lock(&zpci_list_lock);
- list_del(&zdev->entry);
- spin_unlock(&zpci_list_lock);
-
- zpci_dbg(3, "rem fid:%x\n", zdev->fid);
- kfree(zdev);
+ spin_lock(&zpci_domain_lock);
+ clear_bit(domain, zpci_domain);
+ spin_unlock(&zpci_domain_lock);
}
-static int zpci_scan_bus(struct zpci_dev *zdev)
-{
- LIST_HEAD(resources);
- int ret;
-
- ret = zpci_setup_bus_resources(zdev, &resources);
- if (ret)
- goto error;
-
- zdev->bus = pci_scan_root_bus(NULL, ZPCI_BUS_NR, &pci_root_ops,
- zdev, &resources);
- if (!zdev->bus) {
- ret = -EIO;
- goto error;
- }
- zdev->bus->max_bus_speed = zdev->max_bus_speed;
- pci_bus_add_devices(zdev->bus);
- return 0;
-
-error:
- zpci_cleanup_bus_resources(zdev);
- pci_free_resource_list(&resources);
- return ret;
-}
int zpci_enable_device(struct zpci_dev *zdev)
{
@@ -726,21 +668,40 @@ EXPORT_SYMBOL_GPL(zpci_enable_device);
int zpci_disable_device(struct zpci_dev *zdev)
{
zpci_dma_exit_device(zdev);
+ /*
+ * The zPCI function may already be disabled by the platform, this is
+ * detected in clp_disable_fh() which becomes a no-op.
+ */
return clp_disable_fh(zdev);
}
EXPORT_SYMBOL_GPL(zpci_disable_device);
+void zpci_remove_device(struct zpci_dev *zdev)
+{
+ struct zpci_bus *zbus = zdev->zbus;
+ struct pci_dev *pdev;
+
+ pdev = pci_get_slot(zbus->bus, zdev->devfn);
+ if (pdev) {
+ if (pdev->is_virtfn)
+ return zpci_remove_virtfn(pdev, zdev->vfn);
+ pci_stop_and_remove_bus_device_locked(pdev);
+ }
+}
+
int zpci_create_device(struct zpci_dev *zdev)
{
int rc;
- rc = zpci_alloc_domain(zdev);
- if (rc)
- goto out;
+ kref_init(&zdev->kref);
+
+ spin_lock(&zpci_list_lock);
+ list_add_tail(&zdev->entry, &zpci_list);
+ spin_unlock(&zpci_list_lock);
rc = zpci_init_iommu(zdev);
if (rc)
- goto out_free;
+ goto out;
mutex_init(&zdev->lock);
if (zdev->state == ZPCI_FN_STATE_CONFIGURED) {
@@ -748,36 +709,54 @@ int zpci_create_device(struct zpci_dev *zdev)
if (rc)
goto out_destroy_iommu;
}
- rc = zpci_scan_bus(zdev);
+
+ rc = zpci_bus_device_register(zdev, &pci_root_ops);
if (rc)
goto out_disable;
- spin_lock(&zpci_list_lock);
- list_add_tail(&zdev->entry, &zpci_list);
- spin_unlock(&zpci_list_lock);
-
- zpci_init_slot(zdev);
-
return 0;
out_disable:
if (zdev->state == ZPCI_FN_STATE_ONLINE)
zpci_disable_device(zdev);
+
out_destroy_iommu:
zpci_destroy_iommu(zdev);
-out_free:
- zpci_free_domain(zdev);
out:
+ spin_lock(&zpci_list_lock);
+ list_del(&zdev->entry);
+ spin_unlock(&zpci_list_lock);
return rc;
}
-void zpci_remove_device(struct zpci_dev *zdev)
+void zpci_release_device(struct kref *kref)
{
- if (!zdev->bus)
- return;
+ struct zpci_dev *zdev = container_of(kref, struct zpci_dev, kref);
+
+ if (zdev->zbus->bus)
+ zpci_remove_device(zdev);
- pci_stop_root_bus(zdev->bus);
- pci_remove_root_bus(zdev->bus);
+ switch (zdev->state) {
+ case ZPCI_FN_STATE_ONLINE:
+ case ZPCI_FN_STATE_CONFIGURED:
+ zpci_disable_device(zdev);
+ fallthrough;
+ case ZPCI_FN_STATE_STANDBY:
+ if (zdev->has_hp_slot)
+ zpci_exit_slot(zdev);
+ zpci_cleanup_bus_resources(zdev);
+ zpci_bus_device_unregister(zdev);
+ zpci_destroy_iommu(zdev);
+ fallthrough;
+ default:
+ break;
+ }
+
+ spin_lock(&zpci_list_lock);
+ list_del(&zdev->entry);
+ spin_unlock(&zpci_list_lock);
+ zpci_dbg(3, "rem fid:%x\n", zdev->fid);
+ kfree(zdev);
}
int zpci_report_error(struct pci_dev *pdev,
@@ -844,6 +823,10 @@ char * __init pcibios_setup(char *str)
s390_pci_force_floating = 1;
return NULL;
}
+ if (!strcmp(str, "norid")) {
+ s390_pci_no_rid = 1;
+ return NULL;
+ }
return str;
}
diff --git a/arch/s390/pci/pci_bus.c b/arch/s390/pci/pci_bus.c
new file mode 100644
index 000000000000..5967f3014156
--- /dev/null
+++ b/arch/s390/pci/pci_bus.c
@@ -0,0 +1,336 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright IBM Corp. 2020
+ *
+ * Author(s):
+ * Pierre Morel <pmorel@linux.ibm.com>
+ *
+ */
+
+#define KMSG_COMPONENT "zpci"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <linux/err.h>
+#include <linux/export.h>
+#include <linux/delay.h>
+#include <linux/seq_file.h>
+#include <linux/jump_label.h>
+#include <linux/pci.h>
+#include <linux/printk.h>
+
+#include <asm/pci_clp.h>
+#include <asm/pci_dma.h>
+
+#include "pci_bus.h"
+
+static LIST_HEAD(zbus_list);
+static DEFINE_SPINLOCK(zbus_list_lock);
+static int zpci_nb_devices;
+
+/* zpci_bus_scan
+ * @zbus: the zbus holding the zdevices
+ * @ops: the pci operations
+ *
+ * The domain number must be set before pci_scan_root_bus is called.
+ * This function can be called once the domain is known, hence
+ * when the function_0 is dicovered.
+ */
+static int zpci_bus_scan(struct zpci_bus *zbus, int domain, struct pci_ops *ops)
+{
+ struct pci_bus *bus;
+ int rc;
+
+ rc = zpci_alloc_domain(domain);
+ if (rc < 0)
+ return rc;
+ zbus->domain_nr = rc;
+
+ bus = pci_scan_root_bus(NULL, ZPCI_BUS_NR, ops, zbus, &zbus->resources);
+ if (!bus) {
+ zpci_free_domain(zbus->domain_nr);
+ return -EFAULT;
+ }
+
+ zbus->bus = bus;
+ pci_bus_add_devices(bus);
+ return 0;
+}
+
+static void zpci_bus_release(struct kref *kref)
+{
+ struct zpci_bus *zbus = container_of(kref, struct zpci_bus, kref);
+
+ if (zbus->bus) {
+ pci_lock_rescan_remove();
+ pci_stop_root_bus(zbus->bus);
+
+ zpci_free_domain(zbus->domain_nr);
+ pci_free_resource_list(&zbus->resources);
+
+ pci_remove_root_bus(zbus->bus);
+ pci_unlock_rescan_remove();
+ }
+
+ spin_lock(&zbus_list_lock);
+ list_del(&zbus->bus_next);
+ spin_unlock(&zbus_list_lock);
+ kfree(zbus);
+}
+
+static void zpci_bus_put(struct zpci_bus *zbus)
+{
+ kref_put(&zbus->kref, zpci_bus_release);
+}
+
+static struct zpci_bus *zpci_bus_get(int pchid)
+{
+ struct zpci_bus *zbus;
+
+ spin_lock(&zbus_list_lock);
+ list_for_each_entry(zbus, &zbus_list, bus_next) {
+ if (pchid == zbus->pchid) {
+ kref_get(&zbus->kref);
+ goto out_unlock;
+ }
+ }
+ zbus = NULL;
+out_unlock:
+ spin_unlock(&zbus_list_lock);
+ return zbus;
+}
+
+static struct zpci_bus *zpci_bus_alloc(int pchid)
+{
+ struct zpci_bus *zbus;
+
+ zbus = kzalloc(sizeof(*zbus), GFP_KERNEL);
+ if (!zbus)
+ return NULL;
+
+ zbus->pchid = pchid;
+ INIT_LIST_HEAD(&zbus->bus_next);
+ spin_lock(&zbus_list_lock);
+ list_add_tail(&zbus->bus_next, &zbus_list);
+ spin_unlock(&zbus_list_lock);
+
+ kref_init(&zbus->kref);
+ INIT_LIST_HEAD(&zbus->resources);
+
+ zbus->bus_resource.start = 0;
+ zbus->bus_resource.end = ZPCI_BUS_NR;
+ zbus->bus_resource.flags = IORESOURCE_BUS;
+ pci_add_resource(&zbus->resources, &zbus->bus_resource);
+
+ return zbus;
+}
+
+#ifdef CONFIG_PCI_IOV
+static int zpci_bus_link_virtfn(struct pci_dev *pdev,
+ struct pci_dev *virtfn, int vfid)
+{
+ int rc;
+
+ rc = pci_iov_sysfs_link(pdev, virtfn, vfid);
+ if (rc)
+ return rc;
+
+ virtfn->is_virtfn = 1;
+ virtfn->multifunction = 0;
+ virtfn->physfn = pci_dev_get(pdev);
+
+ return 0;
+}
+
+static int zpci_bus_setup_virtfn(struct zpci_bus *zbus,
+ struct pci_dev *virtfn, int vfn)
+{
+ int i, cand_devfn;
+ struct zpci_dev *zdev;
+ struct pci_dev *pdev;
+ int vfid = vfn - 1; /* Linux' vfid's start at 0 vfn at 1*/
+ int rc = 0;
+
+ if (!zbus->multifunction)
+ return 0;
+
+ /* If the parent PF for the given VF is also configured in the
+ * instance, it must be on the same zbus.
+ * We can then identify the parent PF by checking what
+ * devfn the VF would have if it belonged to that PF using the PF's
+ * stride and offset. Only if this candidate devfn matches the
+ * actual devfn will we link both functions.
+ */
+ for (i = 0; i < ZPCI_FUNCTIONS_PER_BUS; i++) {
+ zdev = zbus->function[i];
+ if (zdev && zdev->is_physfn) {
+ pdev = pci_get_slot(zbus->bus, zdev->devfn);
+ if (!pdev)
+ continue;
+ cand_devfn = pci_iov_virtfn_devfn(pdev, vfid);
+ if (cand_devfn == virtfn->devfn) {
+ rc = zpci_bus_link_virtfn(pdev, virtfn, vfid);
+ /* balance pci_get_slot() */
+ pci_dev_put(pdev);
+ break;
+ }
+ /* balance pci_get_slot() */
+ pci_dev_put(pdev);
+ }
+ }
+ return rc;
+}
+#else
+static inline int zpci_bus_setup_virtfn(struct zpci_bus *zbus,
+ struct pci_dev *virtfn, int vfn)
+{
+ return 0;
+}
+#endif
+
+void pcibios_bus_add_device(struct pci_dev *pdev)
+{
+ struct zpci_dev *zdev = to_zpci(pdev);
+
+ /*
+ * With pdev->no_vf_scan the common PCI probing code does not
+ * perform PF/VF linking.
+ */
+ if (zdev->vfn)
+ zpci_bus_setup_virtfn(zdev->zbus, pdev, zdev->vfn);
+
+}
+
+static int zpci_bus_add_device(struct zpci_bus *zbus, struct zpci_dev *zdev)
+{
+ struct pci_bus *bus;
+ struct resource_entry *window, *n;
+ struct resource *res;
+ struct pci_dev *pdev;
+ int rc;
+
+ bus = zbus->bus;
+ if (!bus)
+ return -EINVAL;
+
+ pdev = pci_get_slot(bus, zdev->devfn);
+ if (pdev) {
+ /* Device is already known. */
+ pci_dev_put(pdev);
+ return 0;
+ }
+
+ rc = zpci_init_slot(zdev);
+ if (rc)
+ return rc;
+ zdev->has_hp_slot = 1;
+
+ resource_list_for_each_entry_safe(window, n, &zbus->resources) {
+ res = window->res;
+ pci_bus_add_resource(bus, res, 0);
+ }
+
+ pdev = pci_scan_single_device(bus, zdev->devfn);
+ if (pdev)
+ pci_bus_add_device(pdev);
+
+ return 0;
+}
+
+static void zpci_bus_add_devices(struct zpci_bus *zbus)
+{
+ int i;
+
+ for (i = 1; i < ZPCI_FUNCTIONS_PER_BUS; i++)
+ if (zbus->function[i])
+ zpci_bus_add_device(zbus, zbus->function[i]);
+
+ pci_lock_rescan_remove();
+ pci_bus_add_devices(zbus->bus);
+ pci_unlock_rescan_remove();
+}
+
+int zpci_bus_device_register(struct zpci_dev *zdev, struct pci_ops *ops)
+{
+ struct zpci_bus *zbus = NULL;
+ int rc = -EBADF;
+
+ if (zpci_nb_devices == ZPCI_NR_DEVICES) {
+ pr_warn("Adding PCI function %08x failed because the configured limit of %d is reached\n",
+ zdev->fid, ZPCI_NR_DEVICES);
+ return -ENOSPC;
+ }
+ zpci_nb_devices++;
+
+ if (zdev->devfn >= ZPCI_FUNCTIONS_PER_BUS)
+ return -EINVAL;
+
+ if (!s390_pci_no_rid && zdev->rid_available)
+ zbus = zpci_bus_get(zdev->pchid);
+
+ if (!zbus) {
+ zbus = zpci_bus_alloc(zdev->pchid);
+ if (!zbus)
+ return -ENOMEM;
+ }
+
+ zdev->zbus = zbus;
+ if (zbus->function[zdev->devfn]) {
+ pr_err("devfn %04x is already assigned\n", zdev->devfn);
+ goto error; /* rc already set */
+ }
+ zbus->function[zdev->devfn] = zdev;
+
+ zpci_setup_bus_resources(zdev, &zbus->resources);
+
+ if (zbus->bus) {
+ if (!zbus->multifunction) {
+ WARN_ONCE(1, "zbus is not multifunction\n");
+ goto error_bus;
+ }
+ if (!zdev->rid_available) {
+ WARN_ONCE(1, "rid_available not set for multifunction\n");
+ goto error_bus;
+ }
+ rc = zpci_bus_add_device(zbus, zdev);
+ if (rc)
+ goto error_bus;
+ } else if (zdev->devfn == 0) {
+ if (zbus->multifunction && !zdev->rid_available) {
+ WARN_ONCE(1, "rid_available not set on function 0 for multifunction\n");
+ goto error_bus;
+ }
+ rc = zpci_bus_scan(zbus, (u16)zdev->uid, ops);
+ if (rc)
+ goto error_bus;
+ zpci_bus_add_devices(zbus);
+ rc = zpci_init_slot(zdev);
+ if (rc)
+ goto error_bus;
+ zdev->has_hp_slot = 1;
+ zbus->multifunction = zdev->rid_available;
+ zbus->max_bus_speed = zdev->max_bus_speed;
+ } else {
+ zbus->multifunction = 1;
+ }
+
+ return 0;
+
+error_bus:
+ zpci_nb_devices--;
+ zbus->function[zdev->devfn] = NULL;
+error:
+ pr_err("Adding PCI function %08x failed\n", zdev->fid);
+ zpci_bus_put(zbus);
+ return rc;
+}
+
+void zpci_bus_device_unregister(struct zpci_dev *zdev)
+{
+ struct zpci_bus *zbus = zdev->zbus;
+
+ zpci_nb_devices--;
+ zbus->function[zdev->devfn] = NULL;
+ zpci_bus_put(zbus);
+}
diff --git a/arch/s390/pci/pci_bus.h b/arch/s390/pci/pci_bus.h
new file mode 100644
index 000000000000..4972433df458
--- /dev/null
+++ b/arch/s390/pci/pci_bus.h
@@ -0,0 +1,44 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright IBM Corp. 2020
+ *
+ * Author(s):
+ * Pierre Morel <pmorel@linux.ibm.com>
+ *
+ */
+
+int zpci_bus_device_register(struct zpci_dev *zdev, struct pci_ops *ops);
+void zpci_bus_device_unregister(struct zpci_dev *zdev);
+int zpci_bus_init(void);
+
+void zpci_release_device(struct kref *kref);
+static inline void zpci_zdev_put(struct zpci_dev *zdev)
+{
+ kref_put(&zdev->kref, zpci_release_device);
+}
+
+int zpci_alloc_domain(int domain);
+void zpci_free_domain(int domain);
+int zpci_setup_bus_resources(struct zpci_dev *zdev,
+ struct list_head *resources);
+
+static inline struct zpci_dev *get_zdev_by_bus(struct pci_bus *bus,
+ unsigned int devfn)
+{
+ struct zpci_bus *zbus = bus->sysdata;
+
+ return (devfn >= ZPCI_FUNCTIONS_PER_BUS) ? NULL : zbus->function[devfn];
+}
+
+#ifdef CONFIG_PCI_IOV
+static inline void zpci_remove_virtfn(struct pci_dev *pdev, int vfn)
+{
+
+ pci_lock_rescan_remove();
+ /* Linux' vfid's start at 0 vfn at 1 */
+ pci_iov_remove_virtfn(pdev->physfn, vfn - 1);
+ pci_unlock_rescan_remove();
+}
+#else /* CONFIG_PCI_IOV */
+static inline void zpci_remove_virtfn(struct pci_dev *pdev, int vfn) {}
+#endif /* CONFIG_PCI_IOV */
diff --git a/arch/s390/pci/pci_clp.c b/arch/s390/pci/pci_clp.c
index ea794ae755ae..7e735f41a0a6 100644
--- a/arch/s390/pci/pci_clp.c
+++ b/arch/s390/pci/pci_clp.c
@@ -155,8 +155,13 @@ static int clp_store_query_pci_fn(struct zpci_dev *zdev,
zdev->pfgid = response->pfgid;
zdev->pft = response->pft;
zdev->vfn = response->vfn;
+ zdev->port = response->port;
zdev->uid = response->uid;
zdev->fmb_length = sizeof(u32) * response->fmb_len;
+ zdev->rid_available = response->rid_avail;
+ zdev->is_physfn = response->is_physfn;
+ if (!s390_pci_no_rid && zdev->rid_available)
+ zdev->devfn = response->rid & ZPCI_RID_MASK_DEVFN;
memcpy(zdev->pfip, response->pfip, sizeof(zdev->pfip));
if (response->util_str_avail) {
@@ -309,14 +314,13 @@ out:
int clp_disable_fh(struct zpci_dev *zdev)
{
- u32 fh = zdev->fh;
int rc;
if (!zdev_enabled(zdev))
return 0;
rc = clp_set_pci_fn(zdev, 0, CLP_SET_DISABLE_PCI_FN);
- zpci_dbg(3, "dis fid:%x, fh:%x, rc:%d\n", zdev->fid, fh, rc);
+ zpci_dbg(3, "dis fid:%x, fh:%x, rc:%d\n", zdev->fid, zdev->fh, rc);
return rc;
}
diff --git a/arch/s390/pci/pci_event.c b/arch/s390/pci/pci_event.c
index 8d6ee4af4230..d9ae7456dd4c 100644
--- a/arch/s390/pci/pci_event.c
+++ b/arch/s390/pci/pci_event.c
@@ -14,6 +14,8 @@
#include <asm/pci_debug.h>
#include <asm/sclp.h>
+#include "pci_bus.h"
+
/* Content Code Description for PCI Function Error */
struct zpci_ccdf_err {
u32 reserved1;
@@ -53,7 +55,7 @@ static void __zpci_event_error(struct zpci_ccdf_err *ccdf)
zpci_err_hex(ccdf, sizeof(*ccdf));
if (zdev)
- pdev = pci_get_slot(zdev->bus, ZPCI_DEVFN);
+ pdev = pci_get_slot(zdev->zbus->bus, zdev->devfn);
pr_err("%s: Event 0x%x reports an error for PCI function 0x%x\n",
pdev ? pci_name(pdev) : "n/a", ccdf->pec, ccdf->fid);
@@ -78,42 +80,48 @@ static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf)
enum zpci_state state;
int ret;
- if (zdev)
- pdev = pci_get_slot(zdev->bus, ZPCI_DEVFN);
+ if (zdev && zdev->zbus && zdev->zbus->bus)
+ pdev = pci_get_slot(zdev->zbus->bus, zdev->devfn);
- pr_info("%s: Event 0x%x reconfigured PCI function 0x%x\n",
- pdev ? pci_name(pdev) : "n/a", ccdf->pec, ccdf->fid);
zpci_err("avail CCDF:\n");
zpci_err_hex(ccdf, sizeof(*ccdf));
switch (ccdf->pec) {
case 0x0301: /* Reserved|Standby -> Configured */
if (!zdev) {
- ret = clp_add_pci_device(ccdf->fid, ccdf->fh, 0);
- if (ret)
- break;
- zdev = get_zdev_by_fid(ccdf->fid);
+ ret = clp_add_pci_device(ccdf->fid, ccdf->fh, 1);
+ break;
}
- if (!zdev || zdev->state != ZPCI_FN_STATE_STANDBY)
+ /* the configuration request may be stale */
+ if (zdev->state != ZPCI_FN_STATE_STANDBY)
break;
- zdev->state = ZPCI_FN_STATE_CONFIGURED;
zdev->fh = ccdf->fh;
+ zdev->state = ZPCI_FN_STATE_CONFIGURED;
ret = zpci_enable_device(zdev);
if (ret)
break;
+
+ pdev = pci_scan_single_device(zdev->zbus->bus, zdev->devfn);
+ if (!pdev)
+ break;
+
+ pci_bus_add_device(pdev);
pci_lock_rescan_remove();
- pci_rescan_bus(zdev->bus);
+ pci_bus_add_devices(zdev->zbus->bus);
pci_unlock_rescan_remove();
break;
case 0x0302: /* Reserved -> Standby */
- if (!zdev)
+ if (!zdev) {
clp_add_pci_device(ccdf->fid, ccdf->fh, 0);
+ break;
+ }
+ zdev->fh = ccdf->fh;
break;
case 0x0303: /* Deconfiguration requested */
if (!zdev)
break;
if (pdev)
- pci_stop_and_remove_bus_device_locked(pdev);
+ zpci_remove_device(zdev);
ret = zpci_disable_device(zdev);
if (ret)
@@ -132,7 +140,7 @@ static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf)
/* Give the driver a hint that the function is
* already unusable. */
pdev->error_state = pci_channel_io_perm_failure;
- pci_stop_and_remove_bus_device_locked(pdev);
+ zpci_remove_device(zdev);
}
zdev->fh = ccdf->fh;
@@ -140,7 +148,7 @@ static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf)
zdev->state = ZPCI_FN_STATE_STANDBY;
if (!clp_get_state(ccdf->fid, &state) &&
state == ZPCI_FN_STATE_RESERVED) {
- zpci_remove_device(zdev);
+ zpci_zdev_put(zdev);
}
break;
case 0x0306: /* 0x308 or 0x302 for multiple devices */
@@ -149,12 +157,11 @@ static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf)
case 0x0308: /* Standby -> Reserved */
if (!zdev)
break;
- zpci_remove_device(zdev);
+ zpci_zdev_put(zdev);
break;
default:
break;
}
- pci_dev_put(pdev);
}
void zpci_event_availability(void *data)
diff --git a/arch/s390/pci/pci_mmio.c b/arch/s390/pci/pci_mmio.c
index 020a2c514d96..401cf670a243 100644
--- a/arch/s390/pci/pci_mmio.c
+++ b/arch/s390/pci/pci_mmio.c
@@ -125,7 +125,7 @@ static long get_pfn(unsigned long user_addr, unsigned long access,
struct vm_area_struct *vma;
long ret;
- down_read(&current->mm->mmap_sem);
+ mmap_read_lock(current->mm);
ret = -EINVAL;
vma = find_vma(current->mm, user_addr);
if (!vma)
@@ -135,7 +135,7 @@ static long get_pfn(unsigned long user_addr, unsigned long access,
goto out;
ret = follow_pfn(vma, user_addr, pfn);
out:
- up_read(&current->mm->mmap_sem);
+ mmap_read_unlock(current->mm);
return ret;
}
@@ -155,10 +155,12 @@ SYSCALL_DEFINE3(s390_pci_mmio_write, unsigned long, mmio_addr,
return -EINVAL;
/*
- * Only support read access to MIO capable devices on a MIO enabled
- * system. Otherwise we would have to check for every address if it is
- * a special ZPCI_ADDR and we would have to do a get_pfn() which we
- * don't need for MIO capable devices.
+ * We only support write access to MIO capable devices if we are on
+ * a MIO enabled system. Otherwise we would have to check for every
+ * address if it is a special ZPCI_ADDR and would have to do
+ * a get_pfn() which we don't need for MIO capable devices. Currently
+ * ISM devices are the only devices without MIO support and there is no
+ * known need for accessing these from userspace.
*/
if (static_branch_likely(&have_mio)) {
ret = __memcpy_toio_inuser((void __iomem *) mmio_addr,
@@ -282,10 +284,12 @@ SYSCALL_DEFINE3(s390_pci_mmio_read, unsigned long, mmio_addr,
return -EINVAL;
/*
- * Only support write access to MIO capable devices on a MIO enabled
- * system. Otherwise we would have to check for every address if it is
- * a special ZPCI_ADDR and we would have to do a get_pfn() which we
- * don't need for MIO capable devices.
+ * We only support read access to MIO capable devices if we are on
+ * a MIO enabled system. Otherwise we would have to check for every
+ * address if it is a special ZPCI_ADDR and would have to do
+ * a get_pfn() which we don't need for MIO capable devices. Currently
+ * ISM devices are the only devices without MIO support and there is no
+ * known need for accessing these from userspace.
*/
if (static_branch_likely(&have_mio)) {
ret = __memcpy_fromio_inuser(
diff --git a/arch/s390/pci/pci_sysfs.c b/arch/s390/pci/pci_sysfs.c
index 215f17437a4f..5c028bee91b9 100644
--- a/arch/s390/pci/pci_sysfs.c
+++ b/arch/s390/pci/pci_sysfs.c
@@ -33,6 +33,7 @@ zpci_attr(pchid, "0x%04x\n", pchid);
zpci_attr(pfgid, "0x%02x\n", pfgid);
zpci_attr(vfn, "0x%04x\n", vfn);
zpci_attr(pft, "0x%02x\n", pft);
+zpci_attr(port, "%d\n", port);
zpci_attr(uid, "0x%x\n", uid);
zpci_attr(segment0, "0x%02x\n", pfip[0]);
zpci_attr(segment1, "0x%02x\n", pfip[1]);
@@ -88,7 +89,7 @@ static ssize_t recover_store(struct device *dev, struct device_attribute *attr,
ret = zpci_enable_device(zdev);
if (ret)
goto out;
- pci_rescan_bus(zdev->bus);
+ pci_rescan_bus(zdev->zbus->bus);
}
out:
pci_unlock_rescan_remove();
@@ -142,6 +143,7 @@ static struct attribute *zpci_dev_attrs[] = {
&dev_attr_pchid.attr,
&dev_attr_pfgid.attr,
&dev_attr_pft.attr,
+ &dev_attr_port.attr,
&dev_attr_vfn.attr,
&dev_attr_uid.attr,
&dev_attr_recover.attr,