diff options
Diffstat (limited to 'arch/s390')
124 files changed, 2745 insertions, 2021 deletions
diff --git a/arch/s390/Kbuild b/arch/s390/Kbuild index e63940bb57cd..8b98c501142d 100644 --- a/arch/s390/Kbuild +++ b/arch/s390/Kbuild @@ -7,5 +7,4 @@ obj-$(CONFIG_S390_HYPFS_FS) += hypfs/ obj-$(CONFIG_APPLDATA_BASE) += appldata/ obj-y += net/ obj-$(CONFIG_PCI) += pci/ -obj-$(CONFIG_NUMA) += numa/ obj-$(CONFIG_ARCH_HAS_KEXEC_PURGATORY) += purgatory/ diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 2167bce993ff..b29fcc66ec39 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -30,7 +30,7 @@ config GENERIC_BUG_RELATIVE_POINTERS def_bool y config GENERIC_LOCKBREAK - def_bool y if PREEMPTTION + def_bool y if PREEMPTION config PGSTE def_bool y if KVM @@ -59,6 +59,7 @@ config KASAN_SHADOW_OFFSET config S390 def_bool y select ARCH_BINFMT_ELF_STATE + select ARCH_HAS_DEBUG_VM_PGTABLE select ARCH_HAS_DEVMEM_IS_ALLOWED select ARCH_HAS_ELF_RANDOMIZE select ARCH_HAS_FORTIFY_SOURCE @@ -101,7 +102,6 @@ config S390 select ARCH_INLINE_WRITE_UNLOCK_BH select ARCH_INLINE_WRITE_UNLOCK_IRQ select ARCH_INLINE_WRITE_UNLOCK_IRQRESTORE - select ARCH_KEEP_MEMBLOCK select ARCH_STACKWALK select ARCH_SUPPORTS_ATOMIC_RMW select ARCH_SUPPORTS_NUMA_BALANCING @@ -112,6 +112,7 @@ config S390 select ARCH_WANT_IPC_PARSE_VERSION select BUILDTIME_TABLE_SORT select CLONE_BACKWARDS2 + select DMA_OPS if PCI select DYNAMIC_FTRACE if FUNCTION_TRACER select GENERIC_CLOCKEVENTS select GENERIC_CPU_AUTOPROBE @@ -135,7 +136,6 @@ config S390 select HAVE_EBPF_JIT if PACK_STACK && HAVE_MARCH_Z196_FEATURES select HAVE_CMPXCHG_DOUBLE select HAVE_CMPXCHG_LOCAL - select HAVE_COPY_THREAD_TLS select HAVE_DEBUG_KMEMLEAK select HAVE_DMA_CONTIGUOUS select HAVE_DYNAMIC_FTRACE @@ -144,6 +144,7 @@ config S390 select HAVE_EFFICIENT_UNALIGNED_ACCESS select HAVE_FENTRY select HAVE_FTRACE_MCOUNT_RECORD + select HAVE_FUNCTION_ERROR_INJECTION select HAVE_FUNCTION_GRAPH_TRACER select HAVE_FUNCTION_TRACER select HAVE_FUTEX_CMPXCHG if FUTEX @@ -162,7 +163,6 @@ config S390 select HAVE_LIVEPATCH select HAVE_PERF_REGS select HAVE_PERF_USER_STACK_DUMP - select HAVE_MEMBLOCK_NODE_MAP select HAVE_MEMBLOCK_PHYS_MAP select MMU_GATHER_NO_GATHER select HAVE_MOD_ARCH_SPECIFIC @@ -462,6 +462,7 @@ config NUMA config NODES_SHIFT int + depends on NEED_MULTIPLE_NODES default "1" config SCHED_SMT @@ -625,10 +626,6 @@ config ARCH_ENABLE_MEMORY_HOTREMOVE config ARCH_ENABLE_SPLIT_PMD_PTLOCK def_bool y -config FORCE_MAX_ZONEORDER - int - default "9" - config MAX_PHYSMEM_BITS int "Maximum size of supported physical memory in bits (42-53)" range 42 53 @@ -696,7 +693,7 @@ menu "I/O subsystem" config QDIO def_tristate y prompt "QDIO support" - ---help--- + help This driver provides the Queued Direct I/O base support for IBM System z. @@ -768,6 +765,7 @@ config VFIO_AP def_tristate n prompt "VFIO support for AP devices" depends on S390_AP_IOMMU && VFIO_MDEV_DEVICE && KVM + depends on ZCRYPT help This driver grants access to Adjunct Processor (AP) devices via the VFIO mediated device interface. diff --git a/arch/s390/Kconfig.debug b/arch/s390/Kconfig.debug index 190527560b2c..761fe2b0b2f6 100644 --- a/arch/s390/Kconfig.debug +++ b/arch/s390/Kconfig.debug @@ -7,7 +7,7 @@ config S390_PTDUMP bool "Export kernel pagetable layout to userspace via debugfs" depends on DEBUG_KERNEL select DEBUG_FS - ---help--- + help Say Y here if you want to show the kernel pagetable layout in a debugfs file. This information is only useful for kernel developers who are working in architecture specific areas of the kernel. diff --git a/arch/s390/Makefile b/arch/s390/Makefile index 8dfa2cf1f05c..ba94b03c8b2f 100644 --- a/arch/s390/Makefile +++ b/arch/s390/Makefile @@ -27,7 +27,7 @@ KBUILD_CFLAGS_DECOMPRESSOR := $(CLANG_FLAGS) -m64 -O2 KBUILD_CFLAGS_DECOMPRESSOR += -DDISABLE_BRANCH_PROFILING -D__NO_FORTIFY KBUILD_CFLAGS_DECOMPRESSOR += -fno-delete-null-pointer-checks -msoft-float KBUILD_CFLAGS_DECOMPRESSOR += -fno-asynchronous-unwind-tables -KBUILD_CFLAGS_DECOMPRESSOR += $(call cc-option,-ffreestanding) +KBUILD_CFLAGS_DECOMPRESSOR += -ffreestanding KBUILD_CFLAGS_DECOMPRESSOR += $(call cc-disable-warning, address-of-packed-member) KBUILD_CFLAGS_DECOMPRESSOR += $(if $(CONFIG_DEBUG_INFO),-g) KBUILD_CFLAGS_DECOMPRESSOR += $(if $(CONFIG_DEBUG_INFO_DWARF4), $(call cc-option, -gdwarf-4,)) diff --git a/arch/s390/appldata/appldata_base.c b/arch/s390/appldata/appldata_base.c index aa738cad1338..d74a4c7d5df6 100644 --- a/arch/s390/appldata/appldata_base.c +++ b/arch/s390/appldata/appldata_base.c @@ -51,10 +51,9 @@ static struct platform_device *appldata_pdev; */ static const char appldata_proc_name[APPLDATA_PROC_NAME_LENGTH] = "appldata"; static int appldata_timer_handler(struct ctl_table *ctl, int write, - void __user *buffer, size_t *lenp, loff_t *ppos); + void *buffer, size_t *lenp, loff_t *ppos); static int appldata_interval_handler(struct ctl_table *ctl, int write, - void __user *buffer, - size_t *lenp, loff_t *ppos); + void *buffer, size_t *lenp, loff_t *ppos); static struct ctl_table_header *appldata_sysctl_header; static struct ctl_table appldata_table[] = { @@ -217,7 +216,7 @@ static void __appldata_vtimer_setup(int cmd) */ static int appldata_timer_handler(struct ctl_table *ctl, int write, - void __user *buffer, size_t *lenp, loff_t *ppos) + void *buffer, size_t *lenp, loff_t *ppos) { int timer_active = appldata_timer_active; int rc; @@ -250,7 +249,7 @@ appldata_timer_handler(struct ctl_table *ctl, int write, */ static int appldata_interval_handler(struct ctl_table *ctl, int write, - void __user *buffer, size_t *lenp, loff_t *ppos) + void *buffer, size_t *lenp, loff_t *ppos) { int interval = appldata_interval; int rc; @@ -280,7 +279,7 @@ appldata_interval_handler(struct ctl_table *ctl, int write, */ static int appldata_generic_handler(struct ctl_table *ctl, int write, - void __user *buffer, size_t *lenp, loff_t *ppos) + void *buffer, size_t *lenp, loff_t *ppos) { struct appldata_ops *ops = NULL, *tmp_ops; struct list_head *lh; diff --git a/arch/s390/appldata/appldata_mem.c b/arch/s390/appldata/appldata_mem.c index e68136c3c23a..21c3147bd92a 100644 --- a/arch/s390/appldata/appldata_mem.c +++ b/arch/s390/appldata/appldata_mem.c @@ -29,10 +29,6 @@ * the structure version (product ID, see appldata_base.c) needs to be changed * as well and all documentation and z/VM applications using it must be * updated. - * - * The record layout is documented in the Linux for zSeries Device Drivers - * book: - * http://oss.software.ibm.com/developerworks/opensource/linux390/index.shtml */ struct appldata_mem_data { u64 timestamp; diff --git a/arch/s390/appldata/appldata_net_sum.c b/arch/s390/appldata/appldata_net_sum.c index 8bc14b0d1def..59c282ca002f 100644 --- a/arch/s390/appldata/appldata_net_sum.c +++ b/arch/s390/appldata/appldata_net_sum.c @@ -25,10 +25,6 @@ * This is accessed as binary data by z/VM. If changes to it can't be avoided, * the structure version (product ID, see appldata_base.c) needs to be changed * as well and all documentation and z/VM applications using it must be updated. - * - * The record layout is documented in the Linux for zSeries Device Drivers - * book: - * http://oss.software.ibm.com/developerworks/opensource/linux390/index.shtml */ struct appldata_net_sum_data { u64 timestamp; diff --git a/arch/s390/appldata/appldata_os.c b/arch/s390/appldata/appldata_os.c index 8bf46d705957..a363d30ce739 100644 --- a/arch/s390/appldata/appldata_os.c +++ b/arch/s390/appldata/appldata_os.c @@ -32,10 +32,6 @@ * the structure version (product ID, see appldata_base.c) needs to be changed * as well and all documentation and z/VM applications using it must be * updated. - * - * The record layout is documented in the Linux for zSeries Device Drivers - * book: - * http://oss.software.ibm.com/developerworks/opensource/linux390/index.shtml */ struct appldata_os_per_cpu { u32 per_cpu_user; /* timer ticks spent in user mode */ @@ -133,8 +129,7 @@ static void appldata_get_os_data(void *data) os_data->nr_cpus = j; - new_size = sizeof(struct appldata_os_data) + - (os_data->nr_cpus * sizeof(struct appldata_os_per_cpu)); + new_size = struct_size(os_data, os_cpu, os_data->nr_cpus); if (ops.size != new_size) { if (ops.active) { rc = appldata_diag(APPLDATA_RECORD_OS_ID, @@ -169,8 +164,7 @@ static int __init appldata_os_init(void) { int rc, max_size; - max_size = sizeof(struct appldata_os_data) + - (num_possible_cpus() * sizeof(struct appldata_os_per_cpu)); + max_size = struct_size(appldata_os_data, os_cpu, num_possible_cpus()); if (max_size > APPLDATA_MAX_REC_SIZE) { pr_err("Maximum OS record size %i exceeds the maximum " "record size %i\n", max_size, APPLDATA_MAX_REC_SIZE); diff --git a/arch/s390/boot/ipl_parm.c b/arch/s390/boot/ipl_parm.c index 357adad991d2..8e222a666025 100644 --- a/arch/s390/boot/ipl_parm.c +++ b/arch/s390/boot/ipl_parm.c @@ -2,12 +2,12 @@ #include <linux/kernel.h> #include <linux/init.h> #include <linux/ctype.h> +#include <linux/pgtable.h> #include <asm/ebcdic.h> #include <asm/sclp.h> #include <asm/sections.h> #include <asm/boot_data.h> #include <asm/facility.h> -#include <asm/pgtable.h> #include <asm/uv.h> #include "boot.h" diff --git a/arch/s390/boot/kaslr.c b/arch/s390/boot/kaslr.c index 5591243d673e..d4442163ffa9 100644 --- a/arch/s390/boot/kaslr.c +++ b/arch/s390/boot/kaslr.c @@ -2,8 +2,8 @@ /* * Copyright IBM Corp. 2019 */ +#include <linux/pgtable.h> #include <asm/mem_detect.h> -#include <asm/pgtable.h> #include <asm/cpacf.h> #include <asm/timex.h> #include <asm/sclp.h> diff --git a/arch/s390/configs/debug_defconfig b/arch/s390/configs/debug_defconfig index 46038bc58c9e..7228aabe9da6 100644 --- a/arch/s390/configs/debug_defconfig +++ b/arch/s390/configs/debug_defconfig @@ -1,5 +1,6 @@ CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y +CONFIG_WATCH_QUEUE=y CONFIG_AUDIT=y CONFIG_NO_HZ_IDLE=y CONFIG_HIGH_RES_TIMERS=y @@ -14,7 +15,6 @@ CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y CONFIG_NUMA_BALANCING=y CONFIG_MEMCG=y -CONFIG_MEMCG_SWAP=y CONFIG_BLK_CGROUP=y CONFIG_CFS_BANDWIDTH=y CONFIG_RT_GROUP_SCHED=y @@ -31,9 +31,9 @@ CONFIG_NAMESPACES=y CONFIG_USER_NS=y CONFIG_CHECKPOINT_RESTORE=y CONFIG_SCHED_AUTOGROUP=y -CONFIG_BLK_DEV_INITRD=y CONFIG_EXPERT=y # CONFIG_SYSFS_SYSCALL is not set +CONFIG_BPF_LSM=y CONFIG_BPF_SYSCALL=y CONFIG_USERFAULTFD=y # CONFIG_COMPAT_BRK is not set @@ -51,14 +51,11 @@ CONFIG_CHSC_SCH=y CONFIG_VFIO_CCW=m CONFIG_VFIO_AP=m CONFIG_CRASH_DUMP=y -CONFIG_HIBERNATION=y -CONFIG_PM_DEBUG=y CONFIG_PROTECTED_VIRTUALIZATION_GUEST=y CONFIG_CMM=m CONFIG_APPLDATA_BASE=y CONFIG_KVM=m -CONFIG_VHOST_NET=m -CONFIG_VHOST_VSOCK=m +CONFIG_S390_UNWIND_SELFTEST=y CONFIG_OPROFILE=m CONFIG_KPROBES=y CONFIG_JUMP_LABEL=y @@ -77,6 +74,8 @@ CONFIG_BLK_DEV_THROTTLING=y CONFIG_BLK_WBT=y CONFIG_BLK_CGROUP_IOLATENCY=y CONFIG_BLK_CGROUP_IOCOST=y +CONFIG_BLK_INLINE_ENCRYPTION=y +CONFIG_BLK_INLINE_ENCRYPTION_FALLBACK=y CONFIG_PARTITION_ADVANCED=y CONFIG_IBM_PARTITION=y CONFIG_BSD_DISKLABEL=y @@ -96,7 +95,6 @@ CONFIG_CMA_DEBUG=y CONFIG_CMA_DEBUGFS=y CONFIG_MEM_SOFT_DIRTY=y CONFIG_ZSWAP=y -CONFIG_ZBUD=m CONFIG_ZSMALLOC=m CONFIG_ZSMALLOC_STAT=y CONFIG_DEFERRED_STRUCT_PAGE_INIT=y @@ -130,6 +128,7 @@ CONFIG_SYN_COOKIES=y CONFIG_NET_IPVTI=m CONFIG_INET_AH=m CONFIG_INET_ESP=m +CONFIG_INET_ESPINTCP=y CONFIG_INET_IPCOMP=m CONFIG_INET_DIAG=m CONFIG_INET_UDP_DIAG=m @@ -144,6 +143,7 @@ CONFIG_TCP_CONG_ILLINOIS=m CONFIG_IPV6_ROUTER_PREF=y CONFIG_INET6_AH=m CONFIG_INET6_ESP=m +CONFIG_INET6_ESPINTCP=y CONFIG_INET6_IPCOMP=m CONFIG_IPV6_MIP6=m CONFIG_IPV6_VTI=m @@ -151,7 +151,10 @@ CONFIG_IPV6_SIT=m CONFIG_IPV6_GRE=m CONFIG_IPV6_MULTIPLE_TABLES=y CONFIG_IPV6_SUBTREES=y +CONFIG_IPV6_RPL_LWTUNNEL=y +CONFIG_MPTCP=y CONFIG_NETFILTER=y +CONFIG_BRIDGE_NETFILTER=m CONFIG_NF_CONNTRACK=m CONFIG_NF_CONNTRACK_SECMARK=y CONFIG_NF_CONNTRACK_EVENTS=y @@ -317,6 +320,7 @@ CONFIG_L2TP_V3=y CONFIG_L2TP_IP=m CONFIG_L2TP_ETH=m CONFIG_BRIDGE=m +CONFIG_BRIDGE_MRP=y CONFIG_VLAN_8021Q=m CONFIG_VLAN_8021Q_GVRP=y CONFIG_NET_SCHED=y @@ -341,6 +345,7 @@ CONFIG_NET_SCH_CODEL=m CONFIG_NET_SCH_FQ_CODEL=m CONFIG_NET_SCH_INGRESS=m CONFIG_NET_SCH_PLUG=m +CONFIG_NET_SCH_ETS=m CONFIG_NET_CLS_BASIC=m CONFIG_NET_CLS_TCINDEX=m CONFIG_NET_CLS_ROUTE4=m @@ -364,6 +369,7 @@ CONFIG_NET_ACT_PEDIT=m CONFIG_NET_ACT_SIMP=m CONFIG_NET_ACT_SKBEDIT=m CONFIG_NET_ACT_CSUM=m +CONFIG_NET_ACT_GATE=m CONFIG_DNS_RESOLVER=y CONFIG_OPENVSWITCH=m CONFIG_VSOCKETS=m @@ -374,6 +380,7 @@ CONFIG_BPF_JIT=y CONFIG_NET_PKTGEN=m # CONFIG_NET_DROP_MONITOR is not set CONFIG_PCI=y +# CONFIG_PCIEASPM is not set CONFIG_PCI_DEBUG=y CONFIG_HOTPLUG_PCI=y CONFIG_HOTPLUG_PCI_S390=y @@ -435,6 +442,7 @@ CONFIG_DM_ZERO=m CONFIG_DM_MULTIPATH=m CONFIG_DM_MULTIPATH_QL=m CONFIG_DM_MULTIPATH_ST=m +CONFIG_DM_MULTIPATH_HST=m CONFIG_DM_DELAY=m CONFIG_DM_UEVENT=y CONFIG_DM_FLAKEY=m @@ -448,6 +456,8 @@ CONFIG_EQUALIZER=m CONFIG_IFB=m CONFIG_MACVLAN=m CONFIG_MACVTAP=m +CONFIG_VXLAN=m +CONFIG_BAREUDP=m CONFIG_TUN=m CONFIG_VETH=m CONFIG_VIRTIO_NET=m @@ -481,7 +491,6 @@ CONFIG_NLMON=m CONFIG_MLX4_EN=m CONFIG_MLX5_CORE=m CONFIG_MLX5_CORE_EN=y -# CONFIG_MLXFW is not set # CONFIG_NET_VENDOR_MICREL is not set # CONFIG_NET_VENDOR_MICROCHIP is not set # CONFIG_NET_VENDOR_MICROSEMI is not set @@ -514,6 +523,7 @@ CONFIG_MLX5_CORE_EN=y # CONFIG_NET_VENDOR_TI is not set # CONFIG_NET_VENDOR_VIA is not set # CONFIG_NET_VENDOR_WIZNET is not set +# CONFIG_NET_VENDOR_XILINX is not set CONFIG_PPP=m CONFIG_PPP_BSDCOMP=m CONFIG_PPP_DEFLATE=m @@ -561,6 +571,8 @@ CONFIG_VFIO_MDEV_DEVICE=m CONFIG_VIRTIO_PCI=m CONFIG_VIRTIO_BALLOON=m CONFIG_VIRTIO_INPUT=y +CONFIG_VHOST_NET=m +CONFIG_VHOST_VSOCK=m CONFIG_S390_CCW_IOMMU=y CONFIG_S390_AP_IOMMU=y CONFIG_EXT4_FS=y @@ -608,11 +620,13 @@ CONFIG_ZISOFS=y CONFIG_UDF_FS=m CONFIG_MSDOS_FS=m CONFIG_VFAT_FS=m +CONFIG_EXFAT_FS=m CONFIG_NTFS_FS=m CONFIG_NTFS_RW=y CONFIG_PROC_KCORE=y CONFIG_TMPFS=y CONFIG_TMPFS_POSIX_ACL=y +CONFIG_TMPFS_INODE64=y CONFIG_HUGETLBFS=y CONFIG_CONFIGFS_FS=m CONFIG_ECRYPT_FS=m @@ -650,8 +664,8 @@ CONFIG_NLS_UTF8=m CONFIG_DLM=m CONFIG_UNICODE=y CONFIG_PERSISTENT_KEYRINGS=y -CONFIG_BIG_KEYS=y CONFIG_ENCRYPTED_KEYS=m +CONFIG_KEY_NOTIFICATIONS=y CONFIG_SECURITY=y CONFIG_SECURITY_NETWORK=y CONFIG_FORTIFY_SOURCE=y @@ -675,8 +689,11 @@ CONFIG_CRYPTO_TEST=m CONFIG_CRYPTO_DH=m CONFIG_CRYPTO_ECDH=m CONFIG_CRYPTO_ECRDSA=m +CONFIG_CRYPTO_CURVE25519=m +CONFIG_CRYPTO_GCM=y CONFIG_CRYPTO_CHACHA20POLY1305=m CONFIG_CRYPTO_AEGIS128=m +CONFIG_CRYPTO_SEQIV=y CONFIG_CRYPTO_CFB=m CONFIG_CRYPTO_LRW=m CONFIG_CRYPTO_PCBC=m @@ -685,6 +702,7 @@ CONFIG_CRYPTO_ADIANTUM=m CONFIG_CRYPTO_XCBC=m CONFIG_CRYPTO_VMAC=m CONFIG_CRYPTO_CRC32=m +CONFIG_CRYPTO_BLAKE2S=m CONFIG_CRYPTO_MICHAEL_MIC=m CONFIG_CRYPTO_RMD128=m CONFIG_CRYPTO_RMD160=m @@ -701,6 +719,7 @@ CONFIG_CRYPTO_BLOWFISH=m CONFIG_CRYPTO_CAMELLIA=m CONFIG_CRYPTO_CAST5=m CONFIG_CRYPTO_CAST6=m +CONFIG_CRYPTO_DES=m CONFIG_CRYPTO_FCRYPT=m CONFIG_CRYPTO_KHAZAD=m CONFIG_CRYPTO_SALSA20=m @@ -719,6 +738,9 @@ CONFIG_CRYPTO_USER_API_SKCIPHER=m CONFIG_CRYPTO_USER_API_RNG=m CONFIG_CRYPTO_USER_API_AEAD=m CONFIG_CRYPTO_STATS=y +CONFIG_CRYPTO_LIB_BLAKE2S=m +CONFIG_CRYPTO_LIB_CURVE25519=m +CONFIG_CRYPTO_LIB_CHACHA20POLY1305=m CONFIG_ZCRYPT=m CONFIG_PKEY=m CONFIG_CRYPTO_PAES_S390=m @@ -774,6 +796,7 @@ CONFIG_DEBUG_SHIRQ=y CONFIG_PANIC_ON_OOPS=y CONFIG_DETECT_HUNG_TASK=y CONFIG_WQ_WATCHDOG=y +CONFIG_TEST_LOCKUP=m CONFIG_DEBUG_TIMEKEEPING=y CONFIG_PROVE_LOCKING=y CONFIG_LOCK_STAT=y @@ -785,8 +808,11 @@ CONFIG_DEBUG_NOTIFIERS=y CONFIG_BUG_ON_DATA_CORRUPTION=y CONFIG_DEBUG_CREDENTIALS=y CONFIG_RCU_TORTURE_TEST=m +CONFIG_RCU_REF_SCALE_TEST=m CONFIG_RCU_CPU_STALL_TIMEOUT=300 +# CONFIG_RCU_TRACE is not set CONFIG_LATENCYTOP=y +CONFIG_BOOTTIME_TRACING=y CONFIG_FUNCTION_PROFILER=y CONFIG_STACK_TRACER=y CONFIG_IRQSOFF_TRACER=y @@ -794,6 +820,7 @@ CONFIG_PREEMPT_TRACER=y CONFIG_SCHED_TRACER=y CONFIG_FTRACE_SYSCALLS=y CONFIG_BLK_DEV_IO_TRACE=y +CONFIG_BPF_KPROBE_OVERRIDE=y CONFIG_HIST_TRIGGERS=y CONFIG_S390_PTDUMP=y CONFIG_NOTIFIER_ERROR_INJECTION=m @@ -805,13 +832,16 @@ CONFIG_FAIL_MAKE_REQUEST=y CONFIG_FAIL_IO_TIMEOUT=y CONFIG_FAIL_FUTEX=y CONFIG_FAULT_INJECTION_DEBUG_FS=y +CONFIG_FAIL_FUNCTION=y CONFIG_FAULT_INJECTION_STACKTRACE_FILTER=y CONFIG_LKDTM=m CONFIG_TEST_LIST_SORT=y +CONFIG_TEST_MIN_HEAP=y CONFIG_TEST_SORT=y CONFIG_KPROBES_SANITY_TEST=y CONFIG_RBTREE_TEST=y CONFIG_INTERVAL_TREE_TEST=m CONFIG_PERCPU_TEST=m CONFIG_ATOMIC64_SELFTEST=y +CONFIG_TEST_BITOPS=m CONFIG_TEST_BPF=m diff --git a/arch/s390/configs/defconfig b/arch/s390/configs/defconfig index 7cd0648c1f4e..fab03b7a6932 100644 --- a/arch/s390/configs/defconfig +++ b/arch/s390/configs/defconfig @@ -1,5 +1,6 @@ CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y +CONFIG_WATCH_QUEUE=y CONFIG_AUDIT=y CONFIG_NO_HZ_IDLE=y CONFIG_HIGH_RES_TIMERS=y @@ -13,7 +14,6 @@ CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y CONFIG_NUMA_BALANCING=y CONFIG_MEMCG=y -CONFIG_MEMCG_SWAP=y CONFIG_BLK_CGROUP=y CONFIG_CFS_BANDWIDTH=y CONFIG_RT_GROUP_SCHED=y @@ -30,9 +30,9 @@ CONFIG_NAMESPACES=y CONFIG_USER_NS=y CONFIG_CHECKPOINT_RESTORE=y CONFIG_SCHED_AUTOGROUP=y -CONFIG_BLK_DEV_INITRD=y CONFIG_EXPERT=y # CONFIG_SYSFS_SYSCALL is not set +CONFIG_BPF_LSM=y CONFIG_BPF_SYSCALL=y CONFIG_USERFAULTFD=y # CONFIG_COMPAT_BRK is not set @@ -41,7 +41,6 @@ CONFIG_LIVEPATCH=y CONFIG_TUNE_ZEC12=y CONFIG_NR_CPUS=512 CONFIG_NUMA=y -# CONFIG_NUMA_EMU is not set CONFIG_HZ_100=y CONFIG_KEXEC_FILE=y CONFIG_KEXEC_SIG=y @@ -51,14 +50,11 @@ CONFIG_CHSC_SCH=y CONFIG_VFIO_CCW=m CONFIG_VFIO_AP=m CONFIG_CRASH_DUMP=y -CONFIG_HIBERNATION=y -CONFIG_PM_DEBUG=y CONFIG_PROTECTED_VIRTUALIZATION_GUEST=y CONFIG_CMM=m CONFIG_APPLDATA_BASE=y CONFIG_KVM=m -CONFIG_VHOST_NET=m -CONFIG_VHOST_VSOCK=m +CONFIG_S390_UNWIND_SELFTEST=m CONFIG_OPROFILE=m CONFIG_KPROBES=y CONFIG_JUMP_LABEL=y @@ -74,6 +70,8 @@ CONFIG_BLK_DEV_THROTTLING=y CONFIG_BLK_WBT=y CONFIG_BLK_CGROUP_IOLATENCY=y CONFIG_BLK_CGROUP_IOCOST=y +CONFIG_BLK_INLINE_ENCRYPTION=y +CONFIG_BLK_INLINE_ENCRYPTION_FALLBACK=y CONFIG_PARTITION_ADVANCED=y CONFIG_IBM_PARTITION=y CONFIG_BSD_DISKLABEL=y @@ -91,7 +89,6 @@ CONFIG_CLEANCACHE=y CONFIG_FRONTSWAP=y CONFIG_MEM_SOFT_DIRTY=y CONFIG_ZSWAP=y -CONFIG_ZBUD=m CONFIG_ZSMALLOC=m CONFIG_ZSMALLOC_STAT=y CONFIG_DEFERRED_STRUCT_PAGE_INIT=y @@ -125,6 +122,7 @@ CONFIG_SYN_COOKIES=y CONFIG_NET_IPVTI=m CONFIG_INET_AH=m CONFIG_INET_ESP=m +CONFIG_INET_ESPINTCP=y CONFIG_INET_IPCOMP=m CONFIG_INET_DIAG=m CONFIG_INET_UDP_DIAG=m @@ -139,6 +137,7 @@ CONFIG_TCP_CONG_ILLINOIS=m CONFIG_IPV6_ROUTER_PREF=y CONFIG_INET6_AH=m CONFIG_INET6_ESP=m +CONFIG_INET6_ESPINTCP=y CONFIG_INET6_IPCOMP=m CONFIG_IPV6_MIP6=m CONFIG_IPV6_VTI=m @@ -146,7 +145,10 @@ CONFIG_IPV6_SIT=m CONFIG_IPV6_GRE=m CONFIG_IPV6_MULTIPLE_TABLES=y CONFIG_IPV6_SUBTREES=y +CONFIG_IPV6_RPL_LWTUNNEL=y +CONFIG_MPTCP=y CONFIG_NETFILTER=y +CONFIG_BRIDGE_NETFILTER=m CONFIG_NF_CONNTRACK=m CONFIG_NF_CONNTRACK_SECMARK=y CONFIG_NF_CONNTRACK_EVENTS=y @@ -311,6 +313,7 @@ CONFIG_L2TP_V3=y CONFIG_L2TP_IP=m CONFIG_L2TP_ETH=m CONFIG_BRIDGE=m +CONFIG_BRIDGE_MRP=y CONFIG_VLAN_8021Q=m CONFIG_VLAN_8021Q_GVRP=y CONFIG_NET_SCHED=y @@ -335,6 +338,7 @@ CONFIG_NET_SCH_CODEL=m CONFIG_NET_SCH_FQ_CODEL=m CONFIG_NET_SCH_INGRESS=m CONFIG_NET_SCH_PLUG=m +CONFIG_NET_SCH_ETS=m CONFIG_NET_CLS_BASIC=m CONFIG_NET_CLS_TCINDEX=m CONFIG_NET_CLS_ROUTE4=m @@ -358,6 +362,7 @@ CONFIG_NET_ACT_PEDIT=m CONFIG_NET_ACT_SIMP=m CONFIG_NET_ACT_SKBEDIT=m CONFIG_NET_ACT_CSUM=m +CONFIG_NET_ACT_GATE=m CONFIG_DNS_RESOLVER=y CONFIG_OPENVSWITCH=m CONFIG_VSOCKETS=m @@ -368,6 +373,7 @@ CONFIG_BPF_JIT=y CONFIG_NET_PKTGEN=m # CONFIG_NET_DROP_MONITOR is not set CONFIG_PCI=y +# CONFIG_PCIEASPM is not set CONFIG_HOTPLUG_PCI=y CONFIG_HOTPLUG_PCI_S390=y CONFIG_UEVENT_HELPER=y @@ -430,6 +436,7 @@ CONFIG_DM_ZERO=m CONFIG_DM_MULTIPATH=m CONFIG_DM_MULTIPATH_QL=m CONFIG_DM_MULTIPATH_ST=m +CONFIG_DM_MULTIPATH_HST=m CONFIG_DM_DELAY=m CONFIG_DM_UEVENT=y CONFIG_DM_FLAKEY=m @@ -444,6 +451,8 @@ CONFIG_EQUALIZER=m CONFIG_IFB=m CONFIG_MACVLAN=m CONFIG_MACVTAP=m +CONFIG_VXLAN=m +CONFIG_BAREUDP=m CONFIG_TUN=m CONFIG_VETH=m CONFIG_VIRTIO_NET=m @@ -477,7 +486,6 @@ CONFIG_NLMON=m CONFIG_MLX4_EN=m CONFIG_MLX5_CORE=m CONFIG_MLX5_CORE_EN=y -# CONFIG_MLXFW is not set # CONFIG_NET_VENDOR_MICREL is not set # CONFIG_NET_VENDOR_MICROCHIP is not set # CONFIG_NET_VENDOR_MICROSEMI is not set @@ -510,6 +518,7 @@ CONFIG_MLX5_CORE_EN=y # CONFIG_NET_VENDOR_TI is not set # CONFIG_NET_VENDOR_VIA is not set # CONFIG_NET_VENDOR_WIZNET is not set +# CONFIG_NET_VENDOR_XILINX is not set CONFIG_PPP=m CONFIG_PPP_BSDCOMP=m CONFIG_PPP_DEFLATE=m @@ -557,6 +566,8 @@ CONFIG_VFIO_MDEV_DEVICE=m CONFIG_VIRTIO_PCI=m CONFIG_VIRTIO_BALLOON=m CONFIG_VIRTIO_INPUT=y +CONFIG_VHOST_NET=m +CONFIG_VHOST_VSOCK=m CONFIG_S390_CCW_IOMMU=y CONFIG_S390_AP_IOMMU=y CONFIG_EXT4_FS=y @@ -600,11 +611,13 @@ CONFIG_ZISOFS=y CONFIG_UDF_FS=m CONFIG_MSDOS_FS=m CONFIG_VFAT_FS=m +CONFIG_EXFAT_FS=m CONFIG_NTFS_FS=m CONFIG_NTFS_RW=y CONFIG_PROC_KCORE=y CONFIG_TMPFS=y CONFIG_TMPFS_POSIX_ACL=y +CONFIG_TMPFS_INODE64=y CONFIG_HUGETLBFS=y CONFIG_CONFIGFS_FS=m CONFIG_ECRYPT_FS=m @@ -642,8 +655,8 @@ CONFIG_NLS_UTF8=m CONFIG_DLM=m CONFIG_UNICODE=y CONFIG_PERSISTENT_KEYRINGS=y -CONFIG_BIG_KEYS=y CONFIG_ENCRYPTED_KEYS=m +CONFIG_KEY_NOTIFICATIONS=y CONFIG_SECURITY=y CONFIG_SECURITY_NETWORK=y CONFIG_SECURITY_SELINUX=y @@ -667,8 +680,11 @@ CONFIG_CRYPTO_TEST=m CONFIG_CRYPTO_DH=m CONFIG_CRYPTO_ECDH=m CONFIG_CRYPTO_ECRDSA=m +CONFIG_CRYPTO_CURVE25519=m +CONFIG_CRYPTO_GCM=y CONFIG_CRYPTO_CHACHA20POLY1305=m CONFIG_CRYPTO_AEGIS128=m +CONFIG_CRYPTO_SEQIV=y CONFIG_CRYPTO_CFB=m CONFIG_CRYPTO_LRW=m CONFIG_CRYPTO_OFB=m @@ -678,6 +694,7 @@ CONFIG_CRYPTO_ADIANTUM=m CONFIG_CRYPTO_XCBC=m CONFIG_CRYPTO_VMAC=m CONFIG_CRYPTO_CRC32=m +CONFIG_CRYPTO_BLAKE2S=m CONFIG_CRYPTO_MICHAEL_MIC=m CONFIG_CRYPTO_RMD128=m CONFIG_CRYPTO_RMD160=m @@ -694,6 +711,7 @@ CONFIG_CRYPTO_BLOWFISH=m CONFIG_CRYPTO_CAMELLIA=m CONFIG_CRYPTO_CAST5=m CONFIG_CRYPTO_CAST6=m +CONFIG_CRYPTO_DES=m CONFIG_CRYPTO_FCRYPT=m CONFIG_CRYPTO_KHAZAD=m CONFIG_CRYPTO_SALSA20=m @@ -712,6 +730,9 @@ CONFIG_CRYPTO_USER_API_SKCIPHER=m CONFIG_CRYPTO_USER_API_RNG=m CONFIG_CRYPTO_USER_API_AEAD=m CONFIG_CRYPTO_STATS=y +CONFIG_CRYPTO_LIB_BLAKE2S=m +CONFIG_CRYPTO_LIB_CURVE25519=m +CONFIG_CRYPTO_LIB_CHACHA20POLY1305=m CONFIG_ZCRYPT=m CONFIG_PKEY=m CONFIG_CRYPTO_PAES_S390=m @@ -725,6 +746,7 @@ CONFIG_CRYPTO_AES_S390=m CONFIG_CRYPTO_GHASH_S390=m CONFIG_CRYPTO_CRC32_S390=y CONFIG_CORDIC=m +CONFIG_PRIME_NUMBERS=m CONFIG_CRC4=m CONFIG_CRC7=m CONFIG_CRC8=m @@ -739,15 +761,19 @@ CONFIG_DEBUG_SECTION_MISMATCH=y CONFIG_MAGIC_SYSRQ=y CONFIG_DEBUG_MEMORY_INIT=y CONFIG_PANIC_ON_OOPS=y +CONFIG_TEST_LOCKUP=m CONFIG_BUG_ON_DATA_CORRUPTION=y CONFIG_RCU_TORTURE_TEST=m +CONFIG_RCU_REF_SCALE_TEST=m CONFIG_RCU_CPU_STALL_TIMEOUT=60 CONFIG_LATENCYTOP=y +CONFIG_BOOTTIME_TRACING=y CONFIG_FUNCTION_PROFILER=y CONFIG_STACK_TRACER=y CONFIG_SCHED_TRACER=y CONFIG_FTRACE_SYSCALLS=y CONFIG_BLK_DEV_IO_TRACE=y +CONFIG_BPF_KPROBE_OVERRIDE=y CONFIG_HIST_TRIGGERS=y CONFIG_S390_PTDUMP=y CONFIG_LKDTM=m diff --git a/arch/s390/configs/zfcpdump_defconfig b/arch/s390/configs/zfcpdump_defconfig index 20c51e5d9353..8f67c55625f9 100644 --- a/arch/s390/configs/zfcpdump_defconfig +++ b/arch/s390/configs/zfcpdump_defconfig @@ -30,6 +30,7 @@ CONFIG_IBM_PARTITION=y # CONFIG_BOUNCE is not set CONFIG_NET=y # CONFIG_IUCV is not set +# CONFIG_ETHTOOL_NETLINK is not set CONFIG_DEVTMPFS=y CONFIG_BLK_DEV_RAM=y # CONFIG_BLK_DEV_XPRAM is not set @@ -55,6 +56,8 @@ CONFIG_RAW_DRIVER=y # CONFIG_MONWRITER is not set # CONFIG_S390_VMUR is not set # CONFIG_HID is not set +# CONFIG_VIRTIO_MENU is not set +# CONFIG_VHOST_MENU is not set # CONFIG_IOMMU_SUPPORT is not set # CONFIG_DNOTIFY is not set # CONFIG_INOTIFY_USER is not set @@ -62,12 +65,15 @@ CONFIG_CONFIGFS_FS=y # CONFIG_MISC_FILESYSTEMS is not set # CONFIG_NETWORK_FILESYSTEMS is not set CONFIG_LSM="yama,loadpin,safesetid,integrity" +# CONFIG_ZLIB_DFLTCC is not set CONFIG_PRINTK_TIME=y +# CONFIG_SYMBOLIC_ERRNAME is not set CONFIG_DEBUG_INFO=y CONFIG_DEBUG_FS=y CONFIG_DEBUG_KERNEL=y CONFIG_PANIC_ON_OOPS=y # CONFIG_SCHED_DEBUG is not set CONFIG_RCU_CPU_STALL_TIMEOUT=60 +# CONFIG_RCU_TRACE is not set # CONFIG_FTRACE is not set # CONFIG_RUNTIME_TESTING_MENU is not set diff --git a/arch/s390/crypto/prng.c b/arch/s390/crypto/prng.c index d977643fa627..5057773f82e9 100644 --- a/arch/s390/crypto/prng.c +++ b/arch/s390/crypto/prng.c @@ -249,7 +249,7 @@ static void prng_tdes_deinstantiate(void) { pr_debug("The prng module stopped " "after running in triple DES mode\n"); - kzfree(prng_data); + kfree_sensitive(prng_data); } @@ -442,7 +442,7 @@ outfree: static void prng_sha512_deinstantiate(void) { pr_debug("The prng module stopped after running in SHA-512 mode\n"); - kzfree(prng_data); + kfree_sensitive(prng_data); } @@ -693,7 +693,7 @@ static ssize_t prng_chunksize_show(struct device *dev, struct device_attribute *attr, char *buf) { - return snprintf(buf, PAGE_SIZE, "%u\n", prng_chunk_size); + return scnprintf(buf, PAGE_SIZE, "%u\n", prng_chunk_size); } static DEVICE_ATTR(chunksize, 0444, prng_chunksize_show, NULL); @@ -712,7 +712,7 @@ static ssize_t prng_counter_show(struct device *dev, counter = prng_data->prngws.byte_counter; mutex_unlock(&prng_data->mutex); - return snprintf(buf, PAGE_SIZE, "%llu\n", counter); + return scnprintf(buf, PAGE_SIZE, "%llu\n", counter); } static DEVICE_ATTR(byte_counter, 0444, prng_counter_show, NULL); @@ -721,7 +721,7 @@ static ssize_t prng_errorflag_show(struct device *dev, struct device_attribute *attr, char *buf) { - return snprintf(buf, PAGE_SIZE, "%d\n", prng_errorflag); + return scnprintf(buf, PAGE_SIZE, "%d\n", prng_errorflag); } static DEVICE_ATTR(errorflag, 0444, prng_errorflag_show, NULL); @@ -731,9 +731,9 @@ static ssize_t prng_mode_show(struct device *dev, char *buf) { if (prng_mode == PRNG_MODE_TDES) - return snprintf(buf, PAGE_SIZE, "TDES\n"); + return scnprintf(buf, PAGE_SIZE, "TDES\n"); else - return snprintf(buf, PAGE_SIZE, "SHA512\n"); + return scnprintf(buf, PAGE_SIZE, "SHA512\n"); } static DEVICE_ATTR(mode, 0444, prng_mode_show, NULL); @@ -756,7 +756,7 @@ static ssize_t prng_reseed_limit_show(struct device *dev, struct device_attribute *attr, char *buf) { - return snprintf(buf, PAGE_SIZE, "%u\n", prng_reseed_limit); + return scnprintf(buf, PAGE_SIZE, "%u\n", prng_reseed_limit); } static ssize_t prng_reseed_limit_store(struct device *dev, struct device_attribute *attr, @@ -787,7 +787,7 @@ static ssize_t prng_strength_show(struct device *dev, struct device_attribute *attr, char *buf) { - return snprintf(buf, PAGE_SIZE, "256\n"); + return scnprintf(buf, PAGE_SIZE, "256\n"); } static DEVICE_ATTR(strength, 0444, prng_strength_show, NULL); diff --git a/arch/s390/crypto/sha1_s390.c b/arch/s390/crypto/sha1_s390.c index 7c15542d3685..698b1e6d3c14 100644 --- a/arch/s390/crypto/sha1_s390.c +++ b/arch/s390/crypto/sha1_s390.c @@ -27,7 +27,7 @@ #include "sha.h" -static int sha1_init(struct shash_desc *desc) +static int s390_sha1_init(struct shash_desc *desc) { struct s390_sha_ctx *sctx = shash_desc_ctx(desc); @@ -42,7 +42,7 @@ static int sha1_init(struct shash_desc *desc) return 0; } -static int sha1_export(struct shash_desc *desc, void *out) +static int s390_sha1_export(struct shash_desc *desc, void *out) { struct s390_sha_ctx *sctx = shash_desc_ctx(desc); struct sha1_state *octx = out; @@ -53,7 +53,7 @@ static int sha1_export(struct shash_desc *desc, void *out) return 0; } -static int sha1_import(struct shash_desc *desc, const void *in) +static int s390_sha1_import(struct shash_desc *desc, const void *in) { struct s390_sha_ctx *sctx = shash_desc_ctx(desc); const struct sha1_state *ictx = in; @@ -67,11 +67,11 @@ static int sha1_import(struct shash_desc *desc, const void *in) static struct shash_alg alg = { .digestsize = SHA1_DIGEST_SIZE, - .init = sha1_init, + .init = s390_sha1_init, .update = s390_sha_update, .final = s390_sha_final, - .export = sha1_export, - .import = sha1_import, + .export = s390_sha1_export, + .import = s390_sha1_import, .descsize = sizeof(struct s390_sha_ctx), .statesize = sizeof(struct sha1_state), .base = { diff --git a/arch/s390/include/asm/Kbuild b/arch/s390/include/asm/Kbuild index 83f6e85de7bc..319efa0e6d02 100644 --- a/arch/s390/include/asm/Kbuild +++ b/arch/s390/include/asm/Kbuild @@ -6,5 +6,6 @@ generated-y += unistd_nr.h generic-y += asm-offsets.h generic-y += export.h +generic-y += kvm_types.h generic-y += local64.h generic-y += mcs_spinlock.h diff --git a/arch/s390/include/asm/asm-const.h b/arch/s390/include/asm/asm-const.h new file mode 100644 index 000000000000..11f615eb0066 --- /dev/null +++ b/arch/s390/include/asm/asm-const.h @@ -0,0 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_S390_ASM_CONST_H +#define _ASM_S390_ASM_CONST_H + +#ifdef __ASSEMBLY__ +# define stringify_in_c(...) __VA_ARGS__ +#else +/* This version of stringify will deal with commas... */ +# define __stringify_in_c(...) #__VA_ARGS__ +# define stringify_in_c(...) __stringify_in_c(__VA_ARGS__) " " +#endif +#endif /* _ASM_S390_ASM_CONST_H */ diff --git a/arch/s390/include/asm/atomic.h b/arch/s390/include/asm/atomic.h index 491ad53a0d4e..11c5952e1afa 100644 --- a/arch/s390/include/asm/atomic.h +++ b/arch/s390/include/asm/atomic.h @@ -15,8 +15,6 @@ #include <asm/barrier.h> #include <asm/cmpxchg.h> -#define ATOMIC_INIT(i) { (i) } - static inline int atomic_read(const atomic_t *v) { int c; @@ -47,7 +45,11 @@ static inline int atomic_fetch_add(int i, atomic_t *v) static inline void atomic_add(int i, atomic_t *v) { #ifdef CONFIG_HAVE_MARCH_Z196_FEATURES - if (__builtin_constant_p(i) && (i > -129) && (i < 128)) { + /* + * Order of conditions is important to circumvent gcc 10 bug: + * https://gcc.gnu.org/pipermail/gcc-patches/2020-July/549318.html + */ + if ((i > -129) && (i < 128) && __builtin_constant_p(i)) { __atomic_add_const(i, &v->counter); return; } @@ -114,7 +116,11 @@ static inline s64 atomic64_fetch_add(s64 i, atomic64_t *v) static inline void atomic64_add(s64 i, atomic64_t *v) { #ifdef CONFIG_HAVE_MARCH_Z196_FEATURES - if (__builtin_constant_p(i) && (i > -129) && (i < 128)) { + /* + * Order of conditions is important to circumvent gcc 10 bug: + * https://gcc.gnu.org/pipermail/gcc-patches/2020-July/549318.html + */ + if ((i > -129) && (i < 128) && __builtin_constant_p(i)) { __atomic64_add_const(i, (long *)&v->counter); return; } diff --git a/arch/s390/include/asm/bug.h b/arch/s390/include/asm/bug.h index 7725f8006fdf..0b25f28351ed 100644 --- a/arch/s390/include/asm/bug.h +++ b/arch/s390/include/asm/bug.h @@ -2,7 +2,7 @@ #ifndef _ASM_S390_BUG_H #define _ASM_S390_BUG_H -#include <linux/kernel.h> +#include <linux/compiler.h> #ifdef CONFIG_BUG diff --git a/arch/s390/include/asm/ccwdev.h b/arch/s390/include/asm/ccwdev.h index 865ce1cb86d5..3cfe1eb89838 100644 --- a/arch/s390/include/asm/ccwdev.h +++ b/arch/s390/include/asm/ccwdev.h @@ -11,6 +11,7 @@ #include <linux/device.h> #include <linux/mod_devicetable.h> +#include <asm/chsc.h> #include <asm/fcx.h> #include <asm/irq.h> #include <asm/schid.h> @@ -236,4 +237,8 @@ extern void ccw_device_get_schid(struct ccw_device *, struct subchannel_id *); struct channel_path_desc_fmt0 *ccw_device_get_chp_desc(struct ccw_device *, int); u8 *ccw_device_get_util_str(struct ccw_device *cdev, int chp_idx); +int ccw_device_pnso(struct ccw_device *cdev, + struct chsc_pnso_area *pnso_area, + struct chsc_pnso_resume_token resume_token, + int cnc); #endif /* _S390_CCWDEV_H_ */ diff --git a/arch/s390/include/asm/ccwgroup.h b/arch/s390/include/asm/ccwgroup.h index 7293c139dd79..ad3acb1e882b 100644 --- a/arch/s390/include/asm/ccwgroup.h +++ b/arch/s390/include/asm/ccwgroup.h @@ -36,11 +36,6 @@ struct ccwgroup_device { * @set_online: function called when device is set online * @set_offline: function called when device is set offline * @shutdown: function called when device is shut down - * @prepare: prepare for pm state transition - * @complete: undo work done in @prepare - * @freeze: callback for freezing during hibernation snapshotting - * @thaw: undo work done in @freeze - * @restore: callback for restoring after hibernation * @driver: embedded driver structure * @ccw_driver: supported ccw_driver (optional) */ @@ -50,11 +45,6 @@ struct ccwgroup_driver { int (*set_online) (struct ccwgroup_device *); int (*set_offline) (struct ccwgroup_device *); void (*shutdown)(struct ccwgroup_device *); - int (*prepare) (struct ccwgroup_device *); - void (*complete) (struct ccwgroup_device *); - int (*freeze)(struct ccwgroup_device *); - int (*thaw) (struct ccwgroup_device *); - int (*restore)(struct ccwgroup_device *); struct device_driver driver; struct ccw_driver *ccw_driver; diff --git a/arch/s390/include/asm/checksum.h b/arch/s390/include/asm/checksum.h index 91e376b0d28c..6d01c96aeb5c 100644 --- a/arch/s390/include/asm/checksum.h +++ b/arch/s390/include/asm/checksum.h @@ -39,25 +39,6 @@ csum_partial(const void *buff, int len, __wsum sum) return sum; } -/* - * the same as csum_partial_copy, but copies from user space. - * - * here even more important to align src and dst on a 32-bit (or even - * better 64-bit) boundary - * - * Copy from userspace and compute checksum. - */ -static inline __wsum -csum_partial_copy_from_user(const void __user *src, void *dst, - int len, __wsum sum, - int *err_ptr) -{ - if (unlikely(copy_from_user(dst, src, len))) - *err_ptr = -EFAULT; - return csum_partial(dst, len, sum); -} - - static inline __wsum csum_partial_copy_nocheck (const void *src, void *dst, int len, __wsum sum) { diff --git a/arch/s390/include/asm/chsc.h b/arch/s390/include/asm/chsc.h new file mode 100644 index 000000000000..36ce2d25a5fc --- /dev/null +++ b/arch/s390/include/asm/chsc.h @@ -0,0 +1,62 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright IBM Corp. 2020 + * + * Author(s): Alexandra Winter <wintera@linux.ibm.com> + * + * Interface for Channel Subsystem Call + */ +#ifndef _ASM_S390_CHSC_H +#define _ASM_S390_CHSC_H + +#include <uapi/asm/chsc.h> + +/** + * struct chsc_pnso_naid_l2 - network address information descriptor + * @nit: Network interface token + * @addr_lnid: network address and logical network id (VLAN ID) + */ +struct chsc_pnso_naid_l2 { + u64 nit; + struct { u8 mac[6]; u16 lnid; } addr_lnid; +} __packed; + +struct chsc_pnso_resume_token { + u64 t1; + u64 t2; +} __packed; + +struct chsc_pnso_naihdr { + struct chsc_pnso_resume_token resume_token; + u32:32; + u32 instance; + u32:24; + u8 naids; + u32 reserved[3]; +} __packed; + +struct chsc_pnso_area { + struct chsc_header request; + u8:2; + u8 m:1; + u8:5; + u8:2; + u8 ssid:2; + u8 fmt:4; + u16 sch; + u8:8; + u8 cssid; + u16:16; + u8 oc; + u32:24; + struct chsc_pnso_resume_token resume_token; + u32 n:1; + u32:31; + u32 reserved[3]; + struct chsc_header response; + u32:32; + struct chsc_pnso_naihdr naihdr; + struct chsc_pnso_naid_l2 entries[0]; +} __packed __aligned(PAGE_SIZE); + +#endif /* _ASM_S390_CHSC_H */ diff --git a/arch/s390/include/asm/debug.h b/arch/s390/include/asm/debug.h index 310134015541..c1b82bcc017c 100644 --- a/arch/s390/include/asm/debug.h +++ b/arch/s390/include/asm/debug.h @@ -2,7 +2,7 @@ /* * S/390 debug facility * - * Copyright IBM Corp. 1999, 2000 + * Copyright IBM Corp. 1999, 2020 */ #ifndef DEBUG_H #define DEBUG_H @@ -12,7 +12,7 @@ #include <linux/kernel.h> #include <linux/time.h> #include <linux/refcount.h> -#include <uapi/asm/debug.h> +#include <linux/fs.h> #define DEBUG_MAX_LEVEL 6 /* debug levels range from 0 to 6 */ #define DEBUG_OFF_LEVEL -1 /* level where debug is switched off */ @@ -26,6 +26,16 @@ #define DEBUG_DATA(entry) (char *)(entry + 1) /* data is stored behind */ /* the entry information */ +#define __DEBUG_FEATURE_VERSION 3 /* version of debug feature */ + +struct __debug_entry { + unsigned long clock : 60; + unsigned long exception : 1; + unsigned long level : 3; + void *caller; + unsigned short cpu; +} __packed; + typedef struct __debug_entry debug_entry_t; struct debug_view; @@ -82,7 +92,6 @@ struct debug_view { }; extern struct debug_view debug_hex_ascii_view; -extern struct debug_view debug_raw_view; extern struct debug_view debug_sprintf_view; /* do NOT use the _common functions */ diff --git a/arch/s390/include/asm/diag.h b/arch/s390/include/asm/diag.h index 0036eab14391..ca8f85b53a90 100644 --- a/arch/s390/include/asm/diag.h +++ b/arch/s390/include/asm/diag.h @@ -298,10 +298,8 @@ struct diag26c_mac_resp { union diag318_info { unsigned long val; struct { - unsigned int cpnc : 8; - unsigned int cpvc_linux : 24; - unsigned char cpvc_distro[3]; - unsigned char zero; + unsigned long cpnc : 8; + unsigned long cpvc : 56; }; }; diff --git a/arch/s390/include/asm/extable.h b/arch/s390/include/asm/extable.h index ae27f756b409..3beb294fd553 100644 --- a/arch/s390/include/asm/extable.h +++ b/arch/s390/include/asm/extable.h @@ -1,12 +1,20 @@ /* SPDX-License-Identifier: GPL-2.0 */ #ifndef __S390_EXTABLE_H #define __S390_EXTABLE_H + +#include <asm/ptrace.h> +#include <linux/compiler.h> + /* - * The exception table consists of pairs of addresses: the first is the - * address of an instruction that is allowed to fault, and the second is - * the address at which the program should continue. No registers are - * modified, so it is entirely up to the continuation code to figure out - * what to do. + * The exception table consists of three addresses: + * + * - Address of an instruction that is allowed to fault. + * - Address at which the program should continue. + * - Optional address of handler that takes pt_regs * argument and runs in + * interrupt context. + * + * No registers are modified, so it is entirely up to the continuation code + * to figure out what to do. * * All the routines below use bits of fixup code that are out of line * with the main instruction path. This means when everything is well, @@ -17,6 +25,7 @@ struct exception_table_entry { int insn, fixup; + long handler; }; extern struct exception_table_entry *__start_dma_ex_table; @@ -29,6 +38,39 @@ static inline unsigned long extable_fixup(const struct exception_table_entry *x) return (unsigned long)&x->fixup + x->fixup; } +typedef bool (*ex_handler_t)(const struct exception_table_entry *, + struct pt_regs *); + +static inline ex_handler_t +ex_fixup_handler(const struct exception_table_entry *x) +{ + if (likely(!x->handler)) + return NULL; + return (ex_handler_t)((unsigned long)&x->handler + x->handler); +} + +static inline bool ex_handle(const struct exception_table_entry *x, + struct pt_regs *regs) +{ + ex_handler_t handler = ex_fixup_handler(x); + + if (unlikely(handler)) + return handler(x, regs); + regs->psw.addr = extable_fixup(x); + return true; +} + #define ARCH_HAS_RELATIVE_EXTABLE +static inline void swap_ex_entry_fixup(struct exception_table_entry *a, + struct exception_table_entry *b, + struct exception_table_entry tmp, + int delta) +{ + a->fixup = b->fixup + delta; + b->fixup = tmp.fixup - delta; + a->handler = b->handler + delta; + b->handler = tmp.handler - delta; +} + #endif diff --git a/arch/s390/include/asm/hugetlb.h b/arch/s390/include/asm/hugetlb.h index de8f0bf5f238..60f9241e5e4a 100644 --- a/arch/s390/include/asm/hugetlb.h +++ b/arch/s390/include/asm/hugetlb.h @@ -9,8 +9,8 @@ #ifndef _ASM_S390_HUGETLB_H #define _ASM_S390_HUGETLB_H +#include <linux/pgtable.h> #include <asm/page.h> -#include <asm/pgtable.h> #define hugetlb_free_pgd_range free_pgd_range #define hugepages_supported() (MACHINE_HAS_EDAT1) @@ -21,13 +21,6 @@ pte_t huge_ptep_get(pte_t *ptep); pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep); -static inline bool is_hugepage_only_range(struct mm_struct *mm, - unsigned long addr, - unsigned long len) -{ - return false; -} - /* * If the arch doesn't supply something else, assume that hugepage * size aligned regions are ok without further preparation. @@ -46,6 +39,7 @@ static inline void arch_clear_hugepage_flags(struct page *page) { clear_bit(PG_arch_1, &page->flags); } +#define arch_clear_hugepage_flags arch_clear_hugepage_flags static inline void huge_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep, unsigned long sz) diff --git a/arch/s390/include/asm/io.h b/arch/s390/include/asm/io.h index 5a16f500515a..da014e4f8113 100644 --- a/arch/s390/include/asm/io.h +++ b/arch/s390/include/asm/io.h @@ -26,7 +26,7 @@ void unxlate_dev_mem_ptr(phys_addr_t phys, void *addr); #define IO_SPACE_LIMIT 0 -void __iomem *ioremap(unsigned long offset, unsigned long size); +void __iomem *ioremap(phys_addr_t addr, size_t size); void iounmap(volatile void __iomem *addr); static inline void __iomem *ioport_map(unsigned long port, unsigned int nr) diff --git a/arch/s390/include/asm/ipl.h b/arch/s390/include/asm/ipl.h index b63bd66404b8..7d5cfdda5277 100644 --- a/arch/s390/include/asm/ipl.h +++ b/arch/s390/include/asm/ipl.h @@ -21,6 +21,7 @@ struct ipl_parameter_block { struct ipl_pb0_common common; struct ipl_pb0_fcp fcp; struct ipl_pb0_ccw ccw; + struct ipl_pb0_nvme nvme; char raw[PAGE_SIZE - sizeof(struct ipl_pl_hdr)]; }; } __packed __aligned(PAGE_SIZE); @@ -30,6 +31,11 @@ struct ipl_parameter_block { #define IPL_BP_FCP_LEN (sizeof(struct ipl_pl_hdr) + \ sizeof(struct ipl_pb0_fcp)) #define IPL_BP0_FCP_LEN (sizeof(struct ipl_pb0_fcp)) + +#define IPL_BP_NVME_LEN (sizeof(struct ipl_pl_hdr) + \ + sizeof(struct ipl_pb0_nvme)) +#define IPL_BP0_NVME_LEN (sizeof(struct ipl_pb0_nvme)) + #define IPL_BP_CCW_LEN (sizeof(struct ipl_pl_hdr) + \ sizeof(struct ipl_pb0_ccw)) #define IPL_BP0_CCW_LEN (sizeof(struct ipl_pb0_ccw)) @@ -59,6 +65,7 @@ enum ipl_type { IPL_TYPE_FCP = 4, IPL_TYPE_FCP_DUMP = 8, IPL_TYPE_NSS = 16, + IPL_TYPE_NVME = 32, }; struct ipl_info @@ -74,6 +81,10 @@ struct ipl_info u64 lun; } fcp; struct { + u32 fid; + u32 nsid; + } nvme; + struct { char name[NSS_NAME_SIZE + 1]; } nss; } data; diff --git a/arch/s390/include/asm/kasan.h b/arch/s390/include/asm/kasan.h index 70930fe5c496..89d6886040c8 100644 --- a/arch/s390/include/asm/kasan.h +++ b/arch/s390/include/asm/kasan.h @@ -2,8 +2,6 @@ #ifndef __ASM_KASAN_H #define __ASM_KASAN_H -#include <asm/pgtable.h> - #ifdef CONFIG_KASAN #define KASAN_SHADOW_SCALE_SHIFT 3 diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index d6bcd34f3ec3..463c24e26000 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -31,12 +31,12 @@ #define KVM_USER_MEM_SLOTS 32 /* - * These seem to be used for allocating ->chip in the routing table, - * which we don't use. 4096 is an out-of-thin-air value. If we need - * to look at ->chip later on, we'll need to revisit this. + * These seem to be used for allocating ->chip in the routing table, which we + * don't use. 1 is as small as we can get to reduce the needed memory. If we + * need to look at ->chip later on, we'll need to revisit this. */ #define KVM_NR_IRQCHIPS 1 -#define KVM_IRQCHIP_NUM_PINS 4096 +#define KVM_IRQCHIP_NUM_PINS 1 #define KVM_HALT_POLL_NS_DEFAULT 50000 /* s390-specific vcpu->requests bit members */ @@ -260,7 +260,8 @@ struct kvm_s390_sie_block { __u32 scaol; /* 0x0064 */ __u8 sdf; /* 0x0068 */ __u8 epdx; /* 0x0069 */ - __u8 reserved6a[2]; /* 0x006a */ + __u8 cpnc; /* 0x006a */ + __u8 reserved6b; /* 0x006b */ __u32 todpr; /* 0x006c */ #define GISA_FORMAT1 0x00000001 __u32 gd; /* 0x0070 */ @@ -375,6 +376,8 @@ struct kvm_vcpu_stat { u64 halt_poll_invalid; u64 halt_no_poll_steal; u64 halt_wakeup; + u64 halt_poll_success_ns; + u64 halt_poll_fail_ns; u64 instruction_lctl; u64 instruction_lctlg; u64 instruction_stctl; @@ -743,6 +746,7 @@ struct kvm_vcpu_arch { bool gs_enabled; bool skey_enabled; struct kvm_s390_pv_vcpu pv; + union diag318_info diag318_info; }; struct kvm_vm_stat { @@ -971,17 +975,19 @@ struct kvm_arch_async_pf { unsigned long pfault_token; }; -bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu); +bool kvm_arch_can_dequeue_async_page_present(struct kvm_vcpu *vcpu); void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu, struct kvm_async_pf *work); -void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu, +bool kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu, struct kvm_async_pf *work); void kvm_arch_async_page_present(struct kvm_vcpu *vcpu, struct kvm_async_pf *work); +static inline void kvm_arch_async_page_present_queued(struct kvm_vcpu *vcpu) {} + void kvm_arch_crypto_clear_masks(struct kvm *kvm); void kvm_arch_crypto_set_masks(struct kvm *kvm, unsigned long *apm, unsigned long *aqm, unsigned long *adm); diff --git a/arch/s390/include/asm/linkage.h b/arch/s390/include/asm/linkage.h index 7f22262b0e46..a0a7a2c72bd4 100644 --- a/arch/s390/include/asm/linkage.h +++ b/arch/s390/include/asm/linkage.h @@ -2,38 +2,27 @@ #ifndef __ASM_LINKAGE_H #define __ASM_LINKAGE_H +#include <asm/asm-const.h> #include <linux/stringify.h> #define __ALIGN .align 4, 0x07 #define __ALIGN_STR __stringify(__ALIGN) -#ifndef __ASSEMBLY__ - /* * Helper macro for exception table entries */ -#define EX_TABLE(_fault, _target) \ - ".section __ex_table,\"a\"\n" \ - ".align 4\n" \ - ".long (" #_fault ") - .\n" \ - ".long (" #_target ") - .\n" \ - ".previous\n" - -#else /* __ASSEMBLY__ */ -#define EX_TABLE(_fault, _target) \ - .section __ex_table,"a" ; \ - .align 4 ; \ - .long (_fault) - . ; \ - .long (_target) - . ; \ - .previous +#define __EX_TABLE(_section, _fault, _target) \ + stringify_in_c(.section _section,"a";) \ + stringify_in_c(.align 8;) \ + stringify_in_c(.long (_fault) - .;) \ + stringify_in_c(.long (_target) - .;) \ + stringify_in_c(.quad 0;) \ + stringify_in_c(.previous) -#define EX_TABLE_DMA(_fault, _target) \ - .section .dma.ex_table, "a" ; \ - .align 4 ; \ - .long (_fault) - . ; \ - .long (_target) - . ; \ - .previous +#define EX_TABLE(_fault, _target) \ + __EX_TABLE(__ex_table, _fault, _target) +#define EX_TABLE_DMA(_fault, _target) \ + __EX_TABLE(.dma.ex_table, _fault, _target) -#endif /* __ASSEMBLY__ */ #endif diff --git a/arch/s390/include/asm/nmi.h b/arch/s390/include/asm/nmi.h index b160da8fa14b..5afee80cff58 100644 --- a/arch/s390/include/asm/nmi.h +++ b/arch/s390/include/asm/nmi.h @@ -99,7 +99,7 @@ int nmi_alloc_per_cpu(struct lowcore *lc); void nmi_free_per_cpu(struct lowcore *lc); void s390_handle_mcck(void); -void s390_do_machine_check(struct pt_regs *regs); +int s390_do_machine_check(struct pt_regs *regs); #endif /* __ASSEMBLY__ */ #endif /* _ASM_S390_NMI_H */ diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h index 7485ee561fec..99b92c3e46b0 100644 --- a/arch/s390/include/asm/pci.h +++ b/arch/s390/include/asm/pci.h @@ -22,12 +22,17 @@ int pci_domain_nr(struct pci_bus *); int pci_proc_domain(struct pci_bus *); #define ZPCI_BUS_NR 0 /* default bus number */ -#define ZPCI_DEVFN 0 /* default device number */ #define ZPCI_NR_DMA_SPACES 1 #define ZPCI_NR_DEVICES CONFIG_PCI_NR_FUNCTIONS #define ZPCI_DOMAIN_BITMAP_SIZE (1 << 16) +#ifdef PCI +#if (ZPCI_NR_DEVICES > ZPCI_DOMAIN_BITMAP_SIZE) +# error ZPCI_NR_DEVICES can not be bigger than ZPCI_DOMAIN_BITMAP_SIZE +#endif +#endif /* PCI */ + /* PCI Function Controls */ #define ZPCI_FC_FN_ENABLED 0x80 #define ZPCI_FC_ERROR 0x40 @@ -94,10 +99,26 @@ struct zpci_bar_struct { struct s390_domain; +#define ZPCI_FUNCTIONS_PER_BUS 256 +struct zpci_bus { + struct kref kref; + struct pci_bus *bus; + struct zpci_dev *function[ZPCI_FUNCTIONS_PER_BUS]; + struct list_head resources; + struct list_head bus_next; + struct resource bus_resource; + int pchid; + int domain_nr; + bool multifunction; + enum pci_bus_speed max_bus_speed; +}; + /* Private data per function */ struct zpci_dev { - struct pci_bus *bus; + struct zpci_bus *zbus; struct list_head entry; /* list of all zpci_devices, needed for hotplug, etc. */ + struct list_head bus_next; + struct kref kref; struct hotplug_slot hotplug_slot; enum zpci_state state; @@ -107,7 +128,12 @@ struct zpci_dev { u16 pchid; /* physical channel ID */ u8 pfgid; /* function group ID */ u8 pft; /* pci function type */ - u16 domain; + u8 port; + u8 rid_available : 1; + u8 has_hp_slot : 1; + u8 is_physfn : 1; + u8 reserved : 5; + unsigned int devfn; /* DEVFN part of the RID*/ struct mutex lock; u8 pfip[CLP_PFIP_NR_SEGMENTS]; /* pci function internal path */ @@ -167,6 +193,7 @@ static inline bool zdev_enabled(struct zpci_dev *zdev) extern const struct attribute_group *zpci_attr_groups[]; extern unsigned int s390_pci_force_floating __initdata; +extern unsigned int s390_pci_no_rid; /* ----------------------------------------------------------------------------- Prototypes @@ -227,7 +254,14 @@ static inline void zpci_exit_slot(struct zpci_dev *zdev) {} /* Helpers */ static inline struct zpci_dev *to_zpci(struct pci_dev *pdev) { - return pdev->sysdata; + struct zpci_bus *zbus = pdev->sysdata; + + return zbus->function[pdev->devfn]; +} + +static inline struct zpci_dev *to_zpci_dev(struct device *dev) +{ + return to_zpci(to_pci_dev(dev)); } struct zpci_dev *get_zdev_by_fid(u32); diff --git a/arch/s390/include/asm/pci_clp.h b/arch/s390/include/asm/pci_clp.h index bd2cb4ea7d93..eb51272dd2cc 100644 --- a/arch/s390/include/asm/pci_clp.h +++ b/arch/s390/include/asm/pci_clp.h @@ -93,7 +93,10 @@ struct clp_req_query_pci { struct clp_rsp_query_pci { struct clp_rsp_hdr hdr; u16 vfn; /* virtual fn number */ - u16 : 6; + u16 : 3; + u16 rid_avail : 1; + u16 is_physfn : 1; + u16 reserved1 : 1; u16 mio_addr_avail : 1; u16 util_str_avail : 1; /* utility string available? */ u16 pfgid : 8; /* pci function group id */ @@ -102,12 +105,16 @@ struct clp_rsp_query_pci { u16 pchid; __le32 bar[PCI_STD_NUM_BARS]; u8 pfip[CLP_PFIP_NR_SEGMENTS]; /* pci function internal path */ - u32 : 16; + u16 : 12; + u16 port : 4; u8 fmb_len; u8 pft; /* pci function type */ u64 sdma; /* start dma as */ u64 edma; /* end dma as */ - u32 reserved[11]; +#define ZPCI_RID_MASK_DEVFN 0x00ff + u16 rid; /* BUS/DEVFN PCI address */ + u16 reserved0; + u32 reserved[10]; u32 uid; /* user defined id */ u8 util_str[CLP_UTIL_STR_LEN]; /* utility string */ u32 reserved2[16]; diff --git a/arch/s390/include/asm/pci_dma.h b/arch/s390/include/asm/pci_dma.h index 419fac7a62c0..f62cd3ed2d44 100644 --- a/arch/s390/include/asm/pci_dma.h +++ b/arch/s390/include/asm/pci_dma.h @@ -131,12 +131,6 @@ static inline void validate_st_entry(unsigned long *entry) *entry |= ZPCI_TABLE_VALID; } -static inline void invalidate_table_entry(unsigned long *entry) -{ - *entry &= ~ZPCI_TABLE_VALID_MASK; - *entry |= ZPCI_TABLE_INVALID; -} - static inline void invalidate_pt_entry(unsigned long *entry) { WARN_ON_ONCE((*entry & ZPCI_PTE_VALID_MASK) == ZPCI_PTE_INVALID); @@ -173,11 +167,6 @@ static inline int pt_entry_isvalid(unsigned long entry) return (entry & ZPCI_PTE_VALID_MASK) == ZPCI_PTE_VALID; } -static inline int entry_isprotected(unsigned long entry) -{ - return (entry & ZPCI_TABLE_PROT_MASK) == ZPCI_TABLE_PROTECTED; -} - static inline unsigned long *get_rt_sto(unsigned long entry) { return ((entry & ZPCI_TABLE_TYPE_MASK) == ZPCI_TABLE_TYPE_RTX) diff --git a/arch/s390/include/asm/percpu.h b/arch/s390/include/asm/percpu.h index 50b4ce8cddfd..918f0ba4f4d2 100644 --- a/arch/s390/include/asm/percpu.h +++ b/arch/s390/include/asm/percpu.h @@ -29,7 +29,7 @@ typedef typeof(pcp) pcp_op_T__; \ pcp_op_T__ old__, new__, prev__; \ pcp_op_T__ *ptr__; \ - preempt_disable(); \ + preempt_disable_notrace(); \ ptr__ = raw_cpu_ptr(&(pcp)); \ prev__ = *ptr__; \ do { \ @@ -37,7 +37,7 @@ new__ = old__ op (val); \ prev__ = cmpxchg(ptr__, old__, new__); \ } while (prev__ != old__); \ - preempt_enable(); \ + preempt_enable_notrace(); \ new__; \ }) @@ -68,7 +68,7 @@ typedef typeof(pcp) pcp_op_T__; \ pcp_op_T__ val__ = (val); \ pcp_op_T__ old__, *ptr__; \ - preempt_disable(); \ + preempt_disable_notrace(); \ ptr__ = raw_cpu_ptr(&(pcp)); \ if (__builtin_constant_p(val__) && \ ((szcast)val__ > -129) && ((szcast)val__ < 128)) { \ @@ -84,7 +84,7 @@ : [val__] "d" (val__) \ : "cc"); \ } \ - preempt_enable(); \ + preempt_enable_notrace(); \ } #define this_cpu_add_4(pcp, val) arch_this_cpu_add(pcp, val, "laa", "asi", int) @@ -95,14 +95,14 @@ typedef typeof(pcp) pcp_op_T__; \ pcp_op_T__ val__ = (val); \ pcp_op_T__ old__, *ptr__; \ - preempt_disable(); \ + preempt_disable_notrace(); \ ptr__ = raw_cpu_ptr(&(pcp)); \ asm volatile( \ op " %[old__],%[val__],%[ptr__]\n" \ : [old__] "=d" (old__), [ptr__] "+Q" (*ptr__) \ : [val__] "d" (val__) \ : "cc"); \ - preempt_enable(); \ + preempt_enable_notrace(); \ old__ + val__; \ }) @@ -114,14 +114,14 @@ typedef typeof(pcp) pcp_op_T__; \ pcp_op_T__ val__ = (val); \ pcp_op_T__ old__, *ptr__; \ - preempt_disable(); \ + preempt_disable_notrace(); \ ptr__ = raw_cpu_ptr(&(pcp)); \ asm volatile( \ op " %[old__],%[val__],%[ptr__]\n" \ : [old__] "=d" (old__), [ptr__] "+Q" (*ptr__) \ : [val__] "d" (val__) \ : "cc"); \ - preempt_enable(); \ + preempt_enable_notrace(); \ } #define this_cpu_and_4(pcp, val) arch_this_cpu_to_op(pcp, val, "lan") @@ -136,10 +136,10 @@ typedef typeof(pcp) pcp_op_T__; \ pcp_op_T__ ret__; \ pcp_op_T__ *ptr__; \ - preempt_disable(); \ + preempt_disable_notrace(); \ ptr__ = raw_cpu_ptr(&(pcp)); \ ret__ = cmpxchg(ptr__, oval, nval); \ - preempt_enable(); \ + preempt_enable_notrace(); \ ret__; \ }) @@ -152,10 +152,10 @@ ({ \ typeof(pcp) *ptr__; \ typeof(pcp) ret__; \ - preempt_disable(); \ + preempt_disable_notrace(); \ ptr__ = raw_cpu_ptr(&(pcp)); \ ret__ = xchg(ptr__, nval); \ - preempt_enable(); \ + preempt_enable_notrace(); \ ret__; \ }) @@ -171,11 +171,11 @@ typeof(pcp1) *p1__; \ typeof(pcp2) *p2__; \ int ret__; \ - preempt_disable(); \ + preempt_disable_notrace(); \ p1__ = raw_cpu_ptr(&(pcp1)); \ p2__ = raw_cpu_ptr(&(pcp2)); \ ret__ = __cmpxchg_double(p1__, p2__, o1__, o2__, n1__, n2__); \ - preempt_enable(); \ + preempt_enable_notrace(); \ ret__; \ }) diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index 6076c8c912d2..b55561cc8786 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -1229,7 +1229,6 @@ static inline pte_t mk_pte(struct page *page, pgprot_t pgprot) #define p4d_index(address) (((address) >> P4D_SHIFT) & (PTRS_PER_P4D-1)) #define pud_index(address) (((address) >> PUD_SHIFT) & (PTRS_PER_PUD-1)) #define pmd_index(address) (((address) >> PMD_SHIFT) & (PTRS_PER_PMD-1)) -#define pte_index(address) (((address) >> PAGE_SHIFT) & (PTRS_PER_PTE-1)) #define pmd_deref(pmd) (pmd_val(pmd) & _SEGMENT_ENTRY_ORIGIN) #define pud_deref(pud) (pud_val(pud) & _REGION_ENTRY_ORIGIN) @@ -1260,38 +1259,52 @@ static inline pgd_t *pgd_offset_raw(pgd_t *pgd, unsigned long address) } #define pgd_offset(mm, address) pgd_offset_raw(READ_ONCE((mm)->pgd), address) -#define pgd_offset_k(address) pgd_offset(&init_mm, address) -static inline p4d_t *p4d_offset(pgd_t *pgd, unsigned long address) +static inline p4d_t *p4d_offset_lockless(pgd_t *pgdp, pgd_t pgd, unsigned long address) { - if ((pgd_val(*pgd) & _REGION_ENTRY_TYPE_MASK) >= _REGION_ENTRY_TYPE_R1) - return (p4d_t *) pgd_deref(*pgd) + p4d_index(address); - return (p4d_t *) pgd; + if ((pgd_val(pgd) & _REGION_ENTRY_TYPE_MASK) >= _REGION_ENTRY_TYPE_R1) + return (p4d_t *) pgd_deref(pgd) + p4d_index(address); + return (p4d_t *) pgdp; } +#define p4d_offset_lockless p4d_offset_lockless -static inline pud_t *pud_offset(p4d_t *p4d, unsigned long address) +static inline p4d_t *p4d_offset(pgd_t *pgdp, unsigned long address) { - if ((p4d_val(*p4d) & _REGION_ENTRY_TYPE_MASK) >= _REGION_ENTRY_TYPE_R2) - return (pud_t *) p4d_deref(*p4d) + pud_index(address); - return (pud_t *) p4d; + return p4d_offset_lockless(pgdp, *pgdp, address); } -static inline pmd_t *pmd_offset(pud_t *pud, unsigned long address) +static inline pud_t *pud_offset_lockless(p4d_t *p4dp, p4d_t p4d, unsigned long address) { - if ((pud_val(*pud) & _REGION_ENTRY_TYPE_MASK) >= _REGION_ENTRY_TYPE_R3) - return (pmd_t *) pud_deref(*pud) + pmd_index(address); - return (pmd_t *) pud; + if ((p4d_val(p4d) & _REGION_ENTRY_TYPE_MASK) >= _REGION_ENTRY_TYPE_R2) + return (pud_t *) p4d_deref(p4d) + pud_index(address); + return (pud_t *) p4dp; } +#define pud_offset_lockless pud_offset_lockless -static inline pte_t *pte_offset(pmd_t *pmd, unsigned long address) +static inline pud_t *pud_offset(p4d_t *p4dp, unsigned long address) { - return (pte_t *) pmd_deref(*pmd) + pte_index(address); + return pud_offset_lockless(p4dp, *p4dp, address); } +#define pud_offset pud_offset -#define pte_offset_kernel(pmd, address) pte_offset(pmd, address) -#define pte_offset_map(pmd, address) pte_offset_kernel(pmd, address) +static inline pmd_t *pmd_offset_lockless(pud_t *pudp, pud_t pud, unsigned long address) +{ + if ((pud_val(pud) & _REGION_ENTRY_TYPE_MASK) >= _REGION_ENTRY_TYPE_R3) + return (pmd_t *) pud_deref(pud) + pmd_index(address); + return (pmd_t *) pudp; +} +#define pmd_offset_lockless pmd_offset_lockless + +static inline pmd_t *pmd_offset(pud_t *pudp, unsigned long address) +{ + return pmd_offset_lockless(pudp, *pudp, address); +} +#define pmd_offset pmd_offset -static inline void pte_unmap(pte_t *pte) { } +static inline unsigned long pmd_page_vaddr(pmd_t pmd) +{ + return (unsigned long) pmd_deref(pmd); +} static inline bool gup_fast_permitted(unsigned long start, unsigned long end) { @@ -1560,7 +1573,7 @@ static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm, } #define __HAVE_ARCH_PMDP_HUGE_GET_AND_CLEAR_FULL -static inline pmd_t pmdp_huge_get_and_clear_full(struct mm_struct *mm, +static inline pmd_t pmdp_huge_get_and_clear_full(struct vm_area_struct *vma, unsigned long addr, pmd_t *pmdp, int full) { @@ -1569,7 +1582,7 @@ static inline pmd_t pmdp_huge_get_and_clear_full(struct mm_struct *mm, *pmdp = __pmd(_SEGMENT_ENTRY_EMPTY); return pmd; } - return pmdp_xchg_lazy(mm, addr, pmdp, __pmd(_SEGMENT_ENTRY_EMPTY)); + return pmdp_xchg_lazy(vma->vm_mm, addr, pmdp, __pmd(_SEGMENT_ENTRY_EMPTY)); } #define __HAVE_ARCH_PMDP_HUGE_CLEAR_FLUSH @@ -1674,7 +1687,7 @@ static inline swp_entry_t __swp_entry(unsigned long type, unsigned long offset) #define kern_addr_valid(addr) (1) extern int vmem_add_mapping(unsigned long start, unsigned long size); -extern int vmem_remove_mapping(unsigned long start, unsigned long size); +extern void vmem_remove_mapping(unsigned long start, unsigned long size); extern int s390_enable_sie(void); extern int s390_enable_skey(void); extern void s390_reset_cmma(struct mm_struct *mm); @@ -1683,6 +1696,4 @@ extern void s390_reset_cmma(struct mm_struct *mm); #define HAVE_ARCH_UNMAPPED_AREA #define HAVE_ARCH_UNMAPPED_AREA_TOPDOWN -#include <asm-generic/pgtable.h> - #endif /* _S390_PAGE_H */ diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h index 555d148ccf32..962da04234af 100644 --- a/arch/s390/include/asm/processor.h +++ b/arch/s390/include/asm/processor.h @@ -14,17 +14,15 @@ #include <linux/bits.h> -#define CIF_MCCK_PENDING 0 /* machine check handling is pending */ -#define CIF_ASCE_PRIMARY 1 /* primary asce needs fixup / uaccess */ -#define CIF_ASCE_SECONDARY 2 /* secondary asce needs fixup / uaccess */ -#define CIF_NOHZ_DELAY 3 /* delay HZ disable for a tick */ -#define CIF_FPU 4 /* restore FPU registers */ -#define CIF_IGNORE_IRQ 5 /* ignore interrupt (for udelay) */ -#define CIF_ENABLED_WAIT 6 /* in enabled wait state */ -#define CIF_MCCK_GUEST 7 /* machine check happening in guest */ -#define CIF_DEDICATED_CPU 8 /* this CPU is dedicated */ - -#define _CIF_MCCK_PENDING BIT(CIF_MCCK_PENDING) +#define CIF_ASCE_PRIMARY 0 /* primary asce needs fixup / uaccess */ +#define CIF_ASCE_SECONDARY 1 /* secondary asce needs fixup / uaccess */ +#define CIF_NOHZ_DELAY 2 /* delay HZ disable for a tick */ +#define CIF_FPU 3 /* restore FPU registers */ +#define CIF_IGNORE_IRQ 4 /* ignore interrupt (for udelay) */ +#define CIF_ENABLED_WAIT 5 /* in enabled wait state */ +#define CIF_MCCK_GUEST 6 /* machine check happening in guest */ +#define CIF_DEDICATED_CPU 7 /* this CPU is dedicated */ + #define _CIF_ASCE_PRIMARY BIT(CIF_ASCE_PRIMARY) #define _CIF_ASCE_SECONDARY BIT(CIF_ASCE_SECONDARY) #define _CIF_NOHZ_DELAY BIT(CIF_NOHZ_DELAY) diff --git a/arch/s390/include/asm/ptrace.h b/arch/s390/include/asm/ptrace.h index f009a13afe71..16b3e4396312 100644 --- a/arch/s390/include/asm/ptrace.h +++ b/arch/s390/include/asm/ptrace.h @@ -184,5 +184,10 @@ static inline unsigned long kernel_stack_pointer(struct pt_regs *regs) return regs->gprs[15]; } +static inline void regs_set_return_value(struct pt_regs *regs, unsigned long rc) +{ + regs->gprs[2] = rc; +} + #endif /* __ASSEMBLY__ */ #endif /* _S390_PTRACE_H */ diff --git a/arch/s390/include/asm/qdio.h b/arch/s390/include/asm/qdio.h index 86a3796e9be8..e69dbf438f99 100644 --- a/arch/s390/include/asm/qdio.h +++ b/arch/s390/include/asm/qdio.h @@ -365,34 +365,6 @@ struct qdio_initialize { struct qdio_outbuf_state *output_sbal_state_array; }; -/** - * enum qdio_brinfo_entry_type - type of address entry for qdio_brinfo_desc() - * @l3_ipv6_addr: entry contains IPv6 address - * @l3_ipv4_addr: entry contains IPv4 address - * @l2_addr_lnid: entry contains MAC address and VLAN ID - */ -enum qdio_brinfo_entry_type {l3_ipv6_addr, l3_ipv4_addr, l2_addr_lnid}; - -/** - * struct qdio_brinfo_entry_XXX - Address entry for qdio_brinfo_desc() - * @nit: Network interface token - * @addr: Address of one of the three types - * - * The struct is passed to the callback function by qdio_brinfo_desc() - */ -struct qdio_brinfo_entry_l3_ipv6 { - u64 nit; - struct { unsigned char _s6_addr[16]; } addr; -} __packed; -struct qdio_brinfo_entry_l3_ipv4 { - u64 nit; - struct { uint32_t _s_addr; } addr; -} __packed; -struct qdio_brinfo_entry_l2 { - u64 nit; - struct { u8 mac[6]; u16 lnid; } addr_lnid; -} __packed; - #define QDIO_STATE_INACTIVE 0x00000002 /* after qdio_cleanup */ #define QDIO_STATE_ESTABLISHED 0x00000004 /* after qdio_establish */ #define QDIO_STATE_ACTIVE 0x00000008 /* after qdio_activate */ @@ -423,10 +395,5 @@ extern int qdio_inspect_queue(struct ccw_device *cdev, unsigned int nr, extern int qdio_shutdown(struct ccw_device *, int); extern int qdio_free(struct ccw_device *); extern int qdio_get_ssqd_desc(struct ccw_device *, struct qdio_ssqd_desc *); -extern int qdio_pnso_brinfo(struct subchannel_id schid, - int cnc, u16 *response, - void (*cb)(void *priv, enum qdio_brinfo_entry_type type, - void *entry), - void *priv); #endif /* __QDIO_H__ */ diff --git a/arch/s390/include/asm/smp.h b/arch/s390/include/asm/smp.h index 231a51e870fe..7e155fb6c254 100644 --- a/arch/s390/include/asm/smp.h +++ b/arch/s390/include/asm/smp.h @@ -10,6 +10,7 @@ #include <asm/sigp.h> #include <asm/lowcore.h> +#include <asm/processor.h> #define raw_smp_processor_id() (S390_lowcore.cpu_nr) @@ -54,9 +55,14 @@ static inline int smp_get_base_cpu(int cpu) return cpu - (cpu % (smp_cpu_mtid + 1)); } +static inline void smp_cpus_done(unsigned int max_cpus) +{ +} + extern int smp_rescan_cpus(void); extern void __noreturn cpu_die(void); extern void __cpu_die(unsigned int cpu); extern int __cpu_disable(void); +extern void schedule_mcck_handler(void); #endif /* __ASM_SMP_H */ diff --git a/arch/s390/include/asm/syscall.h b/arch/s390/include/asm/syscall.h index f073292e9fdb..d9d5de0f67ff 100644 --- a/arch/s390/include/asm/syscall.h +++ b/arch/s390/include/asm/syscall.h @@ -33,7 +33,17 @@ static inline void syscall_rollback(struct task_struct *task, static inline long syscall_get_error(struct task_struct *task, struct pt_regs *regs) { - return IS_ERR_VALUE(regs->gprs[2]) ? regs->gprs[2] : 0; + unsigned long error = regs->gprs[2]; +#ifdef CONFIG_COMPAT + if (test_tsk_thread_flag(task, TIF_31BIT)) { + /* + * Sign-extend the value so (int)-EFOO becomes (long)-EFOO + * and will match correctly in comparisons. + */ + error = (long)(int)error; + } +#endif + return IS_ERR_VALUE(error) ? error : 0; } static inline long syscall_get_return_value(struct task_struct *task, diff --git a/arch/s390/include/asm/syscall_wrapper.h b/arch/s390/include/asm/syscall_wrapper.h index 3c3d6fe8e2f0..1320f4213d80 100644 --- a/arch/s390/include/asm/syscall_wrapper.h +++ b/arch/s390/include/asm/syscall_wrapper.h @@ -30,7 +30,7 @@ }) #define __S390_SYS_STUBx(x, name, ...) \ - asmlinkage long __s390_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__))\ + asmlinkage long __s390_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__));\ ALLOW_ERROR_INJECTION(__s390_sys##name, ERRNO); \ asmlinkage long __s390_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__))\ { \ @@ -46,7 +46,7 @@ #define COMPAT_SYSCALL_DEFINE0(sname) \ SYSCALL_METADATA(_##sname, 0); \ asmlinkage long __s390_compat_sys_##sname(void); \ - ALLOW_ERROR_INJECTION(__s390_compat__sys_##sname, ERRNO); \ + ALLOW_ERROR_INJECTION(__s390_compat_sys_##sname, ERRNO); \ asmlinkage long __s390_compat_sys_##sname(void) #define SYSCALL_DEFINE0(sname) \ @@ -72,7 +72,7 @@ asmlinkage long __s390_compat_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__)); \ asmlinkage long __s390_compat_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__)) \ __attribute__((alias(__stringify(__se_compat_sys##name)))); \ - ALLOW_ERROR_INJECTION(compat_sys##name, ERRNO); \ + ALLOW_ERROR_INJECTION(__s390_compat_sys##name, ERRNO); \ static inline long __do_compat_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__));\ asmlinkage long __se_compat_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__)); \ asmlinkage long __se_compat_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__)) \ diff --git a/arch/s390/include/asm/thread_info.h b/arch/s390/include/asm/thread_info.h index e582fbe59e20..13a04fcf7762 100644 --- a/arch/s390/include/asm/thread_info.h +++ b/arch/s390/include/asm/thread_info.h @@ -24,7 +24,6 @@ #ifndef __ASSEMBLY__ #include <asm/lowcore.h> #include <asm/page.h> -#include <asm/processor.h> #define STACK_INIT_OFFSET \ (THREAD_SIZE - STACK_FRAME_OVERHEAD - sizeof(struct pt_regs)) diff --git a/arch/s390/include/asm/timex.h b/arch/s390/include/asm/timex.h index 6bf3a45ccfec..289aaff4d365 100644 --- a/arch/s390/include/asm/timex.h +++ b/arch/s390/include/asm/timex.h @@ -49,11 +49,6 @@ static inline void set_clock_comparator(__u64 time) asm volatile("sckc %0" : : "Q" (time)); } -static inline void store_clock_comparator(__u64 *time) -{ - asm volatile("stckc %0" : "=Q" (*time)); -} - void clock_comparator_work(void); void __init time_early_init(void); diff --git a/arch/s390/include/asm/tlb.h b/arch/s390/include/asm/tlb.h index aa406c05a350..954fa8ca6cbd 100644 --- a/arch/s390/include/asm/tlb.h +++ b/arch/s390/include/asm/tlb.h @@ -36,7 +36,6 @@ static inline bool __tlb_remove_page_size(struct mmu_gather *tlb, #define p4d_free_tlb p4d_free_tlb #define pud_free_tlb pud_free_tlb -#include <asm/pgalloc.h> #include <asm/tlbflush.h> #include <asm-generic/tlb.h> diff --git a/arch/s390/include/asm/tlbflush.h b/arch/s390/include/asm/tlbflush.h index 82703e03f35d..acce6a08a1fa 100644 --- a/arch/s390/include/asm/tlbflush.h +++ b/arch/s390/include/asm/tlbflush.h @@ -5,8 +5,6 @@ #include <linux/mm.h> #include <linux/sched.h> #include <asm/processor.h> -#include <asm/pgalloc.h> -#include <asm/pgtable.h> /* * Flush all TLB entries on the local CPU. diff --git a/arch/s390/include/asm/topology.h b/arch/s390/include/asm/topology.h index fbb507504a3b..3a0ac0c7a9a3 100644 --- a/arch/s390/include/asm/topology.h +++ b/arch/s390/include/asm/topology.h @@ -86,12 +86,6 @@ static inline const struct cpumask *cpumask_of_node(int node) #define pcibus_to_node(bus) __pcibus_to_node(bus) -#define node_distance(a, b) __node_distance(a, b) -static inline int __node_distance(int a, int b) -{ - return 0; -} - #else /* !CONFIG_NUMA */ #define numa_node_id numa_node_id diff --git a/arch/s390/include/asm/uaccess.h b/arch/s390/include/asm/uaccess.h index a470f1fa9f2a..f09444d6aeab 100644 --- a/arch/s390/include/asm/uaccess.h +++ b/arch/s390/include/asm/uaccess.h @@ -32,7 +32,7 @@ #define USER_DS_SACF (3) #define get_fs() (current->thread.mm_segment) -#define segment_eq(a,b) (((a) & 2) == ((b) & 2)) +#define uaccess_kernel() ((get_fs() & 2) == KERNEL_DS) void set_fs(mm_segment_t fs); @@ -276,6 +276,6 @@ static inline unsigned long __must_check clear_user(void __user *to, unsigned lo } int copy_to_user_real(void __user *dest, void *src, unsigned long count); -void s390_kernel_write(void *dst, const void *src, size_t size); +void *s390_kernel_write(void *dst, const void *src, size_t size); #endif /* __S390_UACCESS_H */ diff --git a/arch/s390/include/asm/vdso.h b/arch/s390/include/asm/vdso.h index 3bcfdeb01395..0cd085cdeb4f 100644 --- a/arch/s390/include/asm/vdso.h +++ b/arch/s390/include/asm/vdso.h @@ -36,6 +36,7 @@ struct vdso_data { __u32 tk_shift; /* Shift used for xtime_nsec 0x60 */ __u32 ts_dir; /* TOD steering direction 0x64 */ __u64 ts_end; /* TOD steering end 0x68 */ + __u32 hrtimer_res; /* hrtimer resolution 0x70 */ }; struct vdso_per_cpu_data { diff --git a/arch/s390/include/uapi/asm/debug.h b/arch/s390/include/uapi/asm/debug.h deleted file mode 100644 index c7c564d9aea4..000000000000 --- a/arch/s390/include/uapi/asm/debug.h +++ /dev/null @@ -1,35 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -/* - * S/390 debug facility - * - * Copyright IBM Corp. 1999, 2000 - */ - -#ifndef _UAPIDEBUG_H -#define _UAPIDEBUG_H - -#include <linux/fs.h> - -/* Note: - * struct __debug_entry must be defined outside of #ifdef __KERNEL__ - * in order to allow a user program to analyze the 'raw'-view. - */ - -struct __debug_entry{ - union { - struct { - unsigned long long clock:52; - unsigned long long exception:1; - unsigned long long level:3; - unsigned long long cpuid:8; - } fields; - - unsigned long long stck; - } id; - void* caller; -} __attribute__((packed)); - - -#define __DEBUG_FEATURE_VERSION 2 /* version of debug feature */ - -#endif /* _UAPIDEBUG_H */ diff --git a/arch/s390/include/uapi/asm/ipl.h b/arch/s390/include/uapi/asm/ipl.h index 451ba7d08905..d1ecd5d722a0 100644 --- a/arch/s390/include/uapi/asm/ipl.h +++ b/arch/s390/include/uapi/asm/ipl.h @@ -27,6 +27,7 @@ enum ipl_pbt { IPL_PBT_FCP = 0, IPL_PBT_SCP_DATA = 1, IPL_PBT_CCW = 2, + IPL_PBT_NVME = 4, }; /* IPL Parameter Block 0 with common fields */ @@ -67,6 +68,30 @@ struct ipl_pb0_fcp { #define IPL_PB0_FCP_OPT_IPL 0x10 #define IPL_PB0_FCP_OPT_DUMP 0x20 +/* IPL Parameter Block 0 for NVMe */ +struct ipl_pb0_nvme { + __u32 len; + __u8 pbt; + __u8 reserved1[3]; + __u8 loadparm[8]; + __u8 reserved2[304]; + __u8 opt; + __u8 reserved3[3]; + __u32 fid; + __u8 reserved4[12]; + __u32 nsid; + __u8 reserved5[4]; + __u32 bootprog; + __u8 reserved6[12]; + __u64 br_lba; + __u32 scp_data_len; + __u8 reserved7[260]; + __u8 scp_data[]; +} __packed; + +#define IPL_PB0_NVME_OPT_IPL 0x10 +#define IPL_PB0_NVME_OPT_DUMP 0x20 + /* IPL Parameter Block 0 for CCW */ struct ipl_pb0_ccw { __u32 len; diff --git a/arch/s390/include/uapi/asm/kvm.h b/arch/s390/include/uapi/asm/kvm.h index 436ec7636927..7a6b14874d65 100644 --- a/arch/s390/include/uapi/asm/kvm.h +++ b/arch/s390/include/uapi/asm/kvm.h @@ -231,11 +231,13 @@ struct kvm_guest_debug_arch { #define KVM_SYNC_GSCB (1UL << 9) #define KVM_SYNC_BPBC (1UL << 10) #define KVM_SYNC_ETOKEN (1UL << 11) +#define KVM_SYNC_DIAG318 (1UL << 12) #define KVM_SYNC_S390_VALID_FIELDS \ (KVM_SYNC_PREFIX | KVM_SYNC_GPRS | KVM_SYNC_ACRS | KVM_SYNC_CRS | \ KVM_SYNC_ARCH0 | KVM_SYNC_PFAULT | KVM_SYNC_VRS | KVM_SYNC_RICCB | \ - KVM_SYNC_FPRS | KVM_SYNC_GSCB | KVM_SYNC_BPBC | KVM_SYNC_ETOKEN) + KVM_SYNC_FPRS | KVM_SYNC_GSCB | KVM_SYNC_BPBC | KVM_SYNC_ETOKEN | \ + KVM_SYNC_DIAG318) /* length and alignment of the sdnx as a power of two */ #define SDNXC 8 @@ -264,7 +266,8 @@ struct kvm_sync_regs { __u8 reserved2 : 7; __u8 padding1[51]; /* riccb needs to be 64byte aligned */ __u8 riccb[64]; /* runtime instrumentation controls block */ - __u8 padding2[192]; /* sdnx needs to be 256byte aligned */ + __u64 diag318; /* diagnose 0x318 info */ + __u8 padding2[184]; /* sdnx needs to be 256byte aligned */ union { __u8 sdnx[SDNXL]; /* state description annex */ struct { diff --git a/arch/s390/include/uapi/asm/zcrypt.h b/arch/s390/include/uapi/asm/zcrypt.h index 5a2177e96e88..22fd202856bc 100644 --- a/arch/s390/include/uapi/asm/zcrypt.h +++ b/arch/s390/include/uapi/asm/zcrypt.h @@ -36,12 +36,12 @@ * - length(n_modulus) = inputdatalength */ struct ica_rsa_modexpo { - char __user *inputdata; - unsigned int inputdatalength; - char __user *outputdata; - unsigned int outputdatalength; - char __user *b_key; - char __user *n_modulus; + __u8 __user *inputdata; + __u32 inputdatalength; + __u8 __user *outputdata; + __u32 outputdatalength; + __u8 __user *b_key; + __u8 __user *n_modulus; }; /** @@ -59,15 +59,15 @@ struct ica_rsa_modexpo { * - length(u_mult_inv) = inputdatalength/2 + 8 */ struct ica_rsa_modexpo_crt { - char __user *inputdata; - unsigned int inputdatalength; - char __user *outputdata; - unsigned int outputdatalength; - char __user *bp_key; - char __user *bq_key; - char __user *np_prime; - char __user *nq_prime; - char __user *u_mult_inv; + __u8 __user *inputdata; + __u32 inputdatalength; + __u8 __user *outputdata; + __u32 outputdatalength; + __u8 __user *bp_key; + __u8 __user *bq_key; + __u8 __user *np_prime; + __u8 __user *nq_prime; + __u8 __user *u_mult_inv; }; /** @@ -83,67 +83,67 @@ struct ica_rsa_modexpo_crt { * key block */ struct CPRBX { - unsigned short cprb_len; /* CPRB length 220 */ - unsigned char cprb_ver_id; /* CPRB version id. 0x02 */ - unsigned char pad_000[3]; /* Alignment pad bytes */ - unsigned char func_id[2]; /* function id 0x5432 */ - unsigned char cprb_flags[4]; /* Flags */ - unsigned int req_parml; /* request parameter buffer len */ - unsigned int req_datal; /* request data buffer */ - unsigned int rpl_msgbl; /* reply message block length */ - unsigned int rpld_parml; /* replied parameter block len */ - unsigned int rpl_datal; /* reply data block len */ - unsigned int rpld_datal; /* replied data block len */ - unsigned int req_extbl; /* request extension block len */ - unsigned char pad_001[4]; /* reserved */ - unsigned int rpld_extbl; /* replied extension block len */ - unsigned char padx000[16 - sizeof(char *)]; - unsigned char *req_parmb; /* request parm block 'address' */ - unsigned char padx001[16 - sizeof(char *)]; - unsigned char *req_datab; /* request data block 'address' */ - unsigned char padx002[16 - sizeof(char *)]; - unsigned char *rpl_parmb; /* reply parm block 'address' */ - unsigned char padx003[16 - sizeof(char *)]; - unsigned char *rpl_datab; /* reply data block 'address' */ - unsigned char padx004[16 - sizeof(char *)]; - unsigned char *req_extb; /* request extension block 'addr'*/ - unsigned char padx005[16 - sizeof(char *)]; - unsigned char *rpl_extb; /* reply extension block 'address'*/ - unsigned short ccp_rtcode; /* server return code */ - unsigned short ccp_rscode; /* server reason code */ - unsigned int mac_data_len; /* Mac Data Length */ - unsigned char logon_id[8]; /* Logon Identifier */ - unsigned char mac_value[8]; /* Mac Value */ - unsigned char mac_content_flgs;/* Mac content flag byte */ - unsigned char pad_002; /* Alignment */ - unsigned short domain; /* Domain */ - unsigned char usage_domain[4];/* Usage domain */ - unsigned char cntrl_domain[4];/* Control domain */ - unsigned char S390enf_mask[4];/* S/390 enforcement mask */ - unsigned char pad_004[36]; /* reserved */ + __u16 cprb_len; /* CPRB length 220 */ + __u8 cprb_ver_id; /* CPRB version id. 0x02 */ + __u8 pad_000[3]; /* Alignment pad bytes */ + __u8 func_id[2]; /* function id 0x5432 */ + __u8 cprb_flags[4]; /* Flags */ + __u32 req_parml; /* request parameter buffer len */ + __u32 req_datal; /* request data buffer */ + __u32 rpl_msgbl; /* reply message block length */ + __u32 rpld_parml; /* replied parameter block len */ + __u32 rpl_datal; /* reply data block len */ + __u32 rpld_datal; /* replied data block len */ + __u32 req_extbl; /* request extension block len */ + __u8 pad_001[4]; /* reserved */ + __u32 rpld_extbl; /* replied extension block len */ + __u8 padx000[16 - sizeof(__u8 *)]; + __u8 __user *req_parmb; /* request parm block 'address' */ + __u8 padx001[16 - sizeof(__u8 *)]; + __u8 __user *req_datab; /* request data block 'address' */ + __u8 padx002[16 - sizeof(__u8 *)]; + __u8 __user *rpl_parmb; /* reply parm block 'address' */ + __u8 padx003[16 - sizeof(__u8 *)]; + __u8 __user *rpl_datab; /* reply data block 'address' */ + __u8 padx004[16 - sizeof(__u8 *)]; + __u8 __user *req_extb; /* request extension block 'addr'*/ + __u8 padx005[16 - sizeof(__u8 *)]; + __u8 __user *rpl_extb; /* reply extension block 'address'*/ + __u16 ccp_rtcode; /* server return code */ + __u16 ccp_rscode; /* server reason code */ + __u32 mac_data_len; /* Mac Data Length */ + __u8 logon_id[8]; /* Logon Identifier */ + __u8 mac_value[8]; /* Mac Value */ + __u8 mac_content_flgs; /* Mac content flag byte */ + __u8 pad_002; /* Alignment */ + __u16 domain; /* Domain */ + __u8 usage_domain[4]; /* Usage domain */ + __u8 cntrl_domain[4]; /* Control domain */ + __u8 S390enf_mask[4]; /* S/390 enforcement mask */ + __u8 pad_004[36]; /* reserved */ } __attribute__((packed)); /** * xcRB */ struct ica_xcRB { - unsigned short agent_ID; - unsigned int user_defined; - unsigned short request_ID; - unsigned int request_control_blk_length; - unsigned char padding1[16 - sizeof(char *)]; - char __user *request_control_blk_addr; - unsigned int request_data_length; - char padding2[16 - sizeof(char *)]; - char __user *request_data_address; - unsigned int reply_control_blk_length; - char padding3[16 - sizeof(char *)]; - char __user *reply_control_blk_addr; - unsigned int reply_data_length; - char padding4[16 - sizeof(char *)]; - char __user *reply_data_addr; - unsigned short priority_window; - unsigned int status; + __u16 agent_ID; + __u32 user_defined; + __u16 request_ID; + __u32 request_control_blk_length; + __u8 _padding1[16 - sizeof(__u8 *)]; + __u8 __user *request_control_blk_addr; + __u32 request_data_length; + __u8 _padding2[16 - sizeof(__u8 *)]; + __u8 __user *request_data_address; + __u32 reply_control_blk_length; + __u8 _padding3[16 - sizeof(__u8 *)]; + __u8 __user *reply_control_blk_addr; + __u32 reply_data_length; + __u8 __padding4[16 - sizeof(__u8 *)]; + __u8 __user *reply_data_addr; + __u16 priority_window; + __u32 status; } __attribute__((packed)); /** diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile index 75f26d775027..efca70970761 100644 --- a/arch/s390/kernel/Makefile +++ b/arch/s390/kernel/Makefile @@ -33,11 +33,6 @@ CFLAGS_stacktrace.o += -fno-optimize-sibling-calls CFLAGS_dumpstack.o += -fno-optimize-sibling-calls CFLAGS_unwind_bc.o += -fno-optimize-sibling-calls -# -# Pass UTS_MACHINE for user_regset definition -# -CFLAGS_ptrace.o += -DUTS_MACHINE='"$(UTS_MACHINE)"' - obj-y := traps.o time.o process.o base.o early.o setup.o idle.o vtime.o obj-y += processor.o sys_s390.o ptrace.o signal.o cpcmd.o ebcdic.o nmi.o obj-y += debug.o irq.o ipl.o dis.o diag.o vdso.o @@ -54,6 +49,7 @@ CFLAGS_REMOVE_nospec-branch.o += $(CC_FLAGS_EXPOLINE) obj-$(CONFIG_MODULES) += module.o obj-$(CONFIG_SCHED_TOPOLOGY) += topology.o +obj-$(CONFIG_NUMA) += numa.o obj-$(CONFIG_AUDIT) += audit.o compat-obj-$(CONFIG_AUDIT) += compat_audit.o obj-$(CONFIG_COMPAT) += compat_linux.o compat_signal.o diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c index e80f0e6f5972..5d8cc1864566 100644 --- a/arch/s390/kernel/asm-offsets.c +++ b/arch/s390/kernel/asm-offsets.c @@ -11,9 +11,9 @@ #include <linux/kvm_host.h> #include <linux/sched.h> #include <linux/purgatory.h> +#include <linux/pgtable.h> #include <asm/idle.h> #include <asm/vdso.h> -#include <asm/pgtable.h> #include <asm/gmap.h> #include <asm/nmi.h> #include <asm/stacktrace.h> @@ -76,6 +76,7 @@ int main(void) OFFSET(__VDSO_TK_SHIFT, vdso_data, tk_shift); OFFSET(__VDSO_TS_DIR, vdso_data, ts_dir); OFFSET(__VDSO_TS_END, vdso_data, ts_end); + OFFSET(__VDSO_CLOCK_REALTIME_RES, vdso_data, hrtimer_res); OFFSET(__VDSO_ECTG_BASE, vdso_per_cpu_data, ectg_timer_base); OFFSET(__VDSO_ECTG_USER, vdso_per_cpu_data, ectg_user_time); OFFSET(__VDSO_GETCPU_VAL, vdso_per_cpu_data, getcpu_val); @@ -86,7 +87,6 @@ int main(void) DEFINE(__CLOCK_REALTIME_COARSE, CLOCK_REALTIME_COARSE); DEFINE(__CLOCK_MONOTONIC_COARSE, CLOCK_MONOTONIC_COARSE); DEFINE(__CLOCK_THREAD_CPUTIME_ID, CLOCK_THREAD_CPUTIME_ID); - DEFINE(__CLOCK_REALTIME_RES, MONOTONIC_RES_NSEC); DEFINE(__CLOCK_COARSE_RES, LOW_RES_NSEC); BLANK(); /* idle data offsets */ diff --git a/arch/s390/kernel/crash_dump.c b/arch/s390/kernel/crash_dump.c index f96a5857bbfd..c42ce348103c 100644 --- a/arch/s390/kernel/crash_dump.c +++ b/arch/s390/kernel/crash_dump.c @@ -549,8 +549,7 @@ static int get_mem_chunk_cnt(void) int cnt = 0; u64 idx; - for_each_mem_range(idx, &memblock.physmem, &oldmem_type, NUMA_NO_NODE, - MEMBLOCK_NONE, NULL, NULL, NULL) + for_each_physmem_range(idx, &oldmem_type, NULL, NULL) cnt++; return cnt; } @@ -563,8 +562,7 @@ static void loads_init(Elf64_Phdr *phdr, u64 loads_offset) phys_addr_t start, end; u64 idx; - for_each_mem_range(idx, &memblock.physmem, &oldmem_type, NUMA_NO_NODE, - MEMBLOCK_NONE, &start, &end, NULL) { + for_each_physmem_range(idx, &oldmem_type, &start, &end) { phdr->p_filesz = end - start; phdr->p_type = PT_LOAD; phdr->p_offset = start; diff --git a/arch/s390/kernel/debug.c b/arch/s390/kernel/debug.c index 6d321f5f101d..b6619ae9a3e0 100644 --- a/arch/s390/kernel/debug.c +++ b/arch/s390/kernel/debug.c @@ -2,7 +2,7 @@ /* * S/390 debug facility * - * Copyright IBM Corp. 1999, 2012 + * Copyright IBM Corp. 1999, 2020 * * Author(s): Michael Holzheu (holzheu@de.ibm.com), * Holger Smolinski (Holger.Smolinski@de.ibm.com) @@ -90,27 +90,11 @@ static int debug_input_flush_fn(debug_info_t *id, struct debug_view *view, size_t user_buf_size, loff_t *offset); static int debug_hex_ascii_format_fn(debug_info_t *id, struct debug_view *view, char *out_buf, const char *in_buf); -static int debug_raw_format_fn(debug_info_t *id, - struct debug_view *view, char *out_buf, - const char *in_buf); -static int debug_raw_header_fn(debug_info_t *id, struct debug_view *view, - int area, debug_entry_t *entry, char *out_buf); - static int debug_sprintf_format_fn(debug_info_t *id, struct debug_view *view, char *out_buf, debug_sprintf_entry_t *curr_event); /* globals */ -struct debug_view debug_raw_view = { - "raw", - NULL, - &debug_raw_header_fn, - &debug_raw_format_fn, - NULL, - NULL -}; -EXPORT_SYMBOL(debug_raw_view); - struct debug_view debug_hex_ascii_view = { "hex_ascii", NULL, @@ -198,9 +182,10 @@ static debug_entry_t ***debug_areas_alloc(int pages_per_area, int nr_areas) if (!areas) goto fail_malloc_areas; for (i = 0; i < nr_areas; i++) { + /* GFP_NOWARN to avoid user triggerable WARN, we handle fails */ areas[i] = kmalloc_array(pages_per_area, sizeof(debug_entry_t *), - GFP_KERNEL); + GFP_KERNEL | __GFP_NOWARN); if (!areas[i]) goto fail_malloc_areas2; for (j = 0; j < pages_per_area; j++) { @@ -448,7 +433,7 @@ static int debug_format_entry(file_private_info_t *p_info) act_entry = (debug_entry_t *) ((char *)id_snap->areas[p_info->act_area] [p_info->act_page] + p_info->act_entry); - if (act_entry->id.stck == 0LL) + if (act_entry->clock == 0LL) goto out; /* empty entry */ if (view->header_proc) len += view->header_proc(id_snap, view, p_info->act_area, @@ -844,12 +829,17 @@ static inline debug_entry_t *get_active_entry(debug_info_t *id) static inline void debug_finish_entry(debug_info_t *id, debug_entry_t *active, int level, int exception) { - active->id.stck = get_tod_clock_fast() - - *(unsigned long long *) &tod_clock_base[1]; - active->id.fields.cpuid = smp_processor_id(); + unsigned char clk[STORE_CLOCK_EXT_SIZE]; + unsigned long timestamp; + + get_tod_clock_ext(clk); + timestamp = *(unsigned long *) &clk[0] >> 4; + timestamp -= TOD_UNIX_EPOCH >> 12; + active->clock = timestamp; + active->cpu = smp_processor_id(); active->caller = __builtin_return_address(0); - active->id.fields.exception = exception; - active->id.fields.level = level; + active->exception = exception; + active->level = level; proceed_active_entry(id); if (exception) proceed_active_area(id); @@ -867,7 +857,7 @@ static int debug_active = 1; * if debug_active is already off */ static int s390dbf_procactive(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, loff_t *ppos) + void *buffer, size_t *lenp, loff_t *ppos) { if (!write || debug_stoppable || !debug_active) return proc_dointvec(table, write, buffer, lenp, ppos); @@ -1385,32 +1375,6 @@ out: } /* - * prints debug header in raw format - */ -static int debug_raw_header_fn(debug_info_t *id, struct debug_view *view, - int area, debug_entry_t *entry, char *out_buf) -{ - int rc; - - rc = sizeof(debug_entry_t); - memcpy(out_buf, entry, sizeof(debug_entry_t)); - return rc; -} - -/* - * prints debug data in raw format - */ -static int debug_raw_format_fn(debug_info_t *id, struct debug_view *view, - char *out_buf, const char *in_buf) -{ - int rc; - - rc = id->buf_size; - memcpy(out_buf, in_buf, id->buf_size); - return rc; -} - -/* * prints debug data in hex/ascii format */ static int debug_hex_ascii_format_fn(debug_info_t *id, struct debug_view *view, @@ -1439,25 +1403,24 @@ static int debug_hex_ascii_format_fn(debug_info_t *id, struct debug_view *view, int debug_dflt_header_fn(debug_info_t *id, struct debug_view *view, int area, debug_entry_t *entry, char *out_buf) { - unsigned long base, sec, usec; + unsigned long sec, usec; unsigned long caller; unsigned int level; char *except_str; int rc = 0; - level = entry->id.fields.level; - base = (*(unsigned long *) &tod_clock_base[0]) >> 4; - sec = (entry->id.stck >> 12) + base - (TOD_UNIX_EPOCH >> 12); + level = entry->level; + sec = entry->clock; usec = do_div(sec, USEC_PER_SEC); - if (entry->id.fields.exception) + if (entry->exception) except_str = "*"; else except_str = "-"; caller = (unsigned long) entry->caller; - rc += sprintf(out_buf, "%02i %011ld:%06lu %1u %1s %02i %pK ", + rc += sprintf(out_buf, "%02i %011ld:%06lu %1u %1s %04u %pK ", area, sec, usec, level, except_str, - entry->id.fields.cpuid, (void *)caller); + entry->cpu, (void *)caller); return rc; } EXPORT_SYMBOL(debug_dflt_header_fn); diff --git a/arch/s390/kernel/dumpstack.c b/arch/s390/kernel/dumpstack.c index 2c122d8bab93..0dc4b258b98d 100644 --- a/arch/s390/kernel/dumpstack.c +++ b/arch/s390/kernel/dumpstack.c @@ -126,15 +126,16 @@ unknown: return -EINVAL; } -void show_stack(struct task_struct *task, unsigned long *stack) +void show_stack(struct task_struct *task, unsigned long *stack, + const char *loglvl) { struct unwind_state state; - printk("Call Trace:\n"); + printk("%sCall Trace:\n", loglvl); unwind_for_each_frame(&state, task, NULL, (unsigned long) stack) - printk(state.reliable ? " [<%016lx>] %pSR \n" : - "([<%016lx>] %pSR)\n", - state.ip, (void *) state.ip); + printk(state.reliable ? "%s [<%016lx>] %pSR \n" : + "%s([<%016lx>] %pSR)\n", + loglvl, state.ip, (void *) state.ip); debug_show_held_locks(task ? : current); } @@ -175,7 +176,7 @@ void show_regs(struct pt_regs *regs) show_registers(regs); /* Show stack backtrace if pt_regs is from kernel mode */ if (!user_mode(regs)) - show_stack(NULL, (unsigned long *) regs->gprs[15]); + show_stack(NULL, (unsigned long *) regs->gprs[15], KERN_DEFAULT); show_last_breaking_event(regs); } diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c index cd241ee66eff..078277231858 100644 --- a/arch/s390/kernel/early.c +++ b/arch/s390/kernel/early.c @@ -170,6 +170,8 @@ static noinline __init void setup_lowcore_early(void) psw_t psw; psw.mask = PSW_MASK_BASE | PSW_DEFAULT_KEY | PSW_MASK_EA | PSW_MASK_BA; + if (IS_ENABLED(CONFIG_KASAN)) + psw.mask |= PSW_MASK_DAT; psw.addr = (unsigned long) s390_base_ext_handler; S390_lowcore.external_new_psw = psw; psw.addr = (unsigned long) s390_base_pgm_handler; diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index 3ae64914bd14..23edf196d3dc 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -55,14 +55,11 @@ _TIF_WORK = (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_NEED_RESCHED | \ _TIF_UPROBE | _TIF_GUARDED_STORAGE | _TIF_PATCH_PENDING) _TIF_TRACE = (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | _TIF_SECCOMP | \ _TIF_SYSCALL_TRACEPOINT) -_CIF_WORK = (_CIF_MCCK_PENDING | _CIF_ASCE_PRIMARY | \ - _CIF_ASCE_SECONDARY | _CIF_FPU) +_CIF_WORK = (_CIF_ASCE_PRIMARY | _CIF_ASCE_SECONDARY | _CIF_FPU) _PIF_WORK = (_PIF_PER_TRAP | _PIF_SYSCALL_RESTART) _LPP_OFFSET = __LC_LPP -#define BASED(name) name-cleanup_critical(%r13) - .macro TRACE_IRQS_ON #ifdef CONFIG_TRACE_IRQFLAGS basr %r2,%r0 @@ -116,17 +113,39 @@ _LPP_OFFSET = __LC_LPP .macro SWITCH_ASYNC savearea,timer tmhh %r8,0x0001 # interrupting from user ? jnz 2f +#if IS_ENABLED(CONFIG_KVM) lgr %r14,%r9 - cghi %r14,__LC_RETURN_LPSWE - je 0f - slg %r14,BASED(.Lcritical_start) - clg %r14,BASED(.Lcritical_length) - jhe 1f -0: + larl %r13,.Lsie_gmap + slgr %r14,%r13 + lghi %r13,.Lsie_done - .Lsie_gmap + clgr %r14,%r13 + jhe 0f lghi %r11,\savearea # inside critical section, do cleanup - brasl %r14,cleanup_critical - tmhh %r8,0x0001 # retest problem state after cleanup - jnz 2f + brasl %r14,.Lcleanup_sie +#endif +0: larl %r13,.Lpsw_idle_exit + cgr %r13,%r9 + jne 1f + + mvc __CLOCK_IDLE_EXIT(8,%r2), __LC_INT_CLOCK + mvc __TIMER_IDLE_EXIT(8,%r2), __LC_ASYNC_ENTER_TIMER + # account system time going idle + ni __LC_CPU_FLAGS+7,255-_CIF_ENABLED_WAIT + + lg %r13,__LC_STEAL_TIMER + alg %r13,__CLOCK_IDLE_ENTER(%r2) + slg %r13,__LC_LAST_UPDATE_CLOCK + stg %r13,__LC_STEAL_TIMER + + mvc __LC_LAST_UPDATE_CLOCK(8),__CLOCK_IDLE_EXIT(%r2) + + lg %r13,__LC_SYSTEM_TIMER + alg %r13,__LC_LAST_UPDATE_TIMER + slg %r13,__TIMER_IDLE_ENTER(%r2) + stg %r13,__LC_SYSTEM_TIMER + mvc __LC_LAST_UPDATE_TIMER(8),__TIMER_IDLE_EXIT(%r2) + + nihh %r8,0xfcfd # clear wait state and irq bits 1: lg %r14,__LC_ASYNC_STACK # are we already on the target stack? slgr %r14,%r15 srag %r14,%r14,STACK_SHIFT @@ -152,12 +171,30 @@ _LPP_OFFSET = __LC_LPP mvc __LC_LAST_UPDATE_TIMER(8),\enter_timer .endm - .macro REENABLE_IRQS + .macro RESTORE_SM_CLEAR_PER stg %r8,__LC_RETURN_PSW ni __LC_RETURN_PSW,0xbf ssm __LC_RETURN_PSW .endm + .macro ENABLE_INTS + stosm __SF_EMPTY(%r15),3 + .endm + + .macro ENABLE_INTS_TRACE + TRACE_IRQS_ON + ENABLE_INTS + .endm + + .macro DISABLE_INTS + stnsm __SF_EMPTY(%r15),0xfc + .endm + + .macro DISABLE_INTS_TRACE + DISABLE_INTS + TRACE_IRQS_OFF + .endm + .macro STCK savearea #ifdef CONFIG_HAVE_MARCH_Z9_109_FEATURES .insn s,0xb27c0000,\savearea # store clock fast @@ -254,8 +291,6 @@ ENTRY(__switch_to) BR_EX %r14 ENDPROC(__switch_to) -.L__critical_start: - #if IS_ENABLED(CONFIG_KVM) /* * sie64a calling convention: @@ -288,7 +323,6 @@ ENTRY(sie64a) BPEXIT __SF_SIE_FLAGS(%r15),(_TIF_ISOLATE_BP|_TIF_ISOLATE_BP_GUEST) .Lsie_entry: sie 0(%r14) -.Lsie_exit: BPOFF BPENTER __SF_SIE_FLAGS(%r15),(_TIF_ISOLATE_BP|_TIF_ISOLATE_BP_GUEST) .Lsie_skip: @@ -336,21 +370,19 @@ EXPORT_SYMBOL(sie_exit) /* * SVC interrupt handler routine. System calls are synchronous events and - * are executed with interrupts enabled. + * are entered with interrupts disabled. */ ENTRY(system_call) stpt __LC_SYNC_ENTER_TIMER -.Lsysc_stmg: stmg %r8,%r15,__LC_SAVE_AREA_SYNC BPOFF lg %r12,__LC_CURRENT - lghi %r13,__TASK_thread lghi %r14,_PIF_SYSCALL .Lsysc_per: + lghi %r13,__TASK_thread lg %r15,__LC_KERNEL_STACK la %r11,STACK_FRAME_OVERHEAD(%r15) # pointer to pt_regs -.Lsysc_vtime: UPDATE_VTIME %r8,%r9,__LC_SYNC_ENTER_TIMER BPENTER __TI_flags(%r12),_TIF_ISOLATE_BP stmg %r0,%r7,__PT_R0(%r11) @@ -358,6 +390,7 @@ ENTRY(system_call) mvc __PT_PSW(16,%r11),__LC_SVC_OLD_PSW mvc __PT_INT_CODE(4,%r11),__LC_SVC_ILC stg %r14,__PT_FLAGS(%r11) + ENABLE_INTS .Lsysc_do_svc: # clear user controlled register to prevent speculative use xgr %r0,%r0 @@ -368,9 +401,9 @@ ENTRY(system_call) jnz .Lsysc_nr_ok # svc 0: system call number in %r1 llgfr %r1,%r1 # clear high word in r1 + sth %r1,__PT_INT_CODE+2(%r11) cghi %r1,NR_syscalls jnl .Lsysc_nr_ok - sth %r1,__PT_INT_CODE+2(%r11) slag %r8,%r1,3 .Lsysc_nr_ok: xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) @@ -393,26 +426,26 @@ ENTRY(system_call) jnz .Lsysc_work TSTMSK __TI_flags(%r12),_TIF_WORK jnz .Lsysc_work # check for work - TSTMSK __LC_CPU_FLAGS,_CIF_WORK + TSTMSK __LC_CPU_FLAGS,(_CIF_WORK-_CIF_FPU) jnz .Lsysc_work BPEXIT __TI_flags(%r12),_TIF_ISOLATE_BP .Lsysc_restore: + DISABLE_INTS + TSTMSK __LC_CPU_FLAGS, _CIF_FPU + jz .Lsysc_skip_fpu + brasl %r14,load_fpu_regs +.Lsysc_skip_fpu: lg %r14,__LC_VDSO_PER_CPU - lmg %r0,%r10,__PT_R0(%r11) mvc __LC_RETURN_PSW(16),__PT_PSW(%r11) -.Lsysc_exit_timer: stpt __LC_EXIT_TIMER mvc __VDSO_ECTG_BASE(16,%r14),__LC_EXIT_TIMER - lmg %r11,%r15,__PT_R11(%r11) - b __LC_RETURN_LPSWE(%r0) -.Lsysc_done: + lmg %r0,%r15,__PT_R0(%r11) + b __LC_RETURN_LPSWE # # One of the work bits is on. Find out which one. # .Lsysc_work: - TSTMSK __LC_CPU_FLAGS,_CIF_MCCK_PENDING - jo .Lsysc_mcck_pending TSTMSK __TI_flags(%r12),_TIF_NEED_RESCHED jo .Lsysc_reschedule TSTMSK __PT_FLAGS(%r11),_PIF_SYSCALL_RESTART @@ -436,11 +469,9 @@ ENTRY(system_call) jo .Lsysc_sigpending TSTMSK __TI_flags(%r12),_TIF_NOTIFY_RESUME jo .Lsysc_notify_resume - TSTMSK __LC_CPU_FLAGS,_CIF_FPU - jo .Lsysc_vxrs TSTMSK __LC_CPU_FLAGS,(_CIF_ASCE_PRIMARY|_CIF_ASCE_SECONDARY) jnz .Lsysc_asce - j .Lsysc_return # beware of critical section cleanup + j .Lsysc_return # # _TIF_NEED_RESCHED is set, call schedule @@ -450,13 +481,6 @@ ENTRY(system_call) jg schedule # -# _CIF_MCCK_PENDING is set, call handler -# -.Lsysc_mcck_pending: - larl %r14,.Lsysc_return - jg s390_handle_mcck # TIF bit will be cleared by handler - -# # _CIF_ASCE_PRIMARY and/or _CIF_ASCE_SECONDARY set, load user space asce # .Lsysc_asce: @@ -475,12 +499,6 @@ ENTRY(system_call) larl %r14,.Lsysc_return jg set_fs_fixup -# -# CIF_FPU is set, restore floating-point controls and floating-point registers. -# -.Lsysc_vxrs: - larl %r14,.Lsysc_return - jg load_fpu_regs # # _TIF_SIGPENDING is set, call do_signal @@ -564,7 +582,6 @@ ENTRY(system_call) jnh .Lsysc_tracenogo sllg %r8,%r2,3 lg %r9,0(%r8,%r10) -.Lsysc_tracego: lmg %r3,%r7,__PT_R3(%r11) stg %r7,STACK_FRAME_OVERHEAD(%r15) lg %r2,__PT_ORIG_GPR2(%r11) @@ -585,8 +602,6 @@ ENTRY(ret_from_fork) la %r11,STACK_FRAME_OVERHEAD(%r15) lg %r12,__LC_CURRENT brasl %r14,schedule_tail - TRACE_IRQS_ON - ssm __LC_SVC_NEW_PSW # reenable interrupts tm __PT_PSW+1(%r11),0x01 # forking a kernel thread ? jne .Lsysc_tracenogo # it's a kernel thread @@ -620,15 +635,16 @@ ENTRY(pgm_check_handler) lghi %r10,1 0: lg %r12,__LC_CURRENT lghi %r11,0 - larl %r13,cleanup_critical lmg %r8,%r9,__LC_PGM_OLD_PSW tmhh %r8,0x0001 # test problem state bit jnz 3f # -> fault in user space #if IS_ENABLED(CONFIG_KVM) # cleanup critical section for program checks in sie64a lgr %r14,%r9 - slg %r14,BASED(.Lsie_critical_start) - clg %r14,BASED(.Lsie_critical_length) + larl %r13,.Lsie_gmap + slgr %r14,%r13 + lghi %r13,.Lsie_done - .Lsie_gmap + clgr %r14,%r13 jhe 1f lg %r14,__SF_SIE_CONTROL(%r15) # get control block pointer ni __SIE_PROG0C+3(%r14),0xfe # no longer in SIE @@ -680,7 +696,7 @@ ENTRY(pgm_check_handler) mvc __THREAD_per_address(8,%r14),__LC_PER_ADDRESS mvc __THREAD_per_cause(2,%r14),__LC_PER_CODE mvc __THREAD_per_paid(1,%r14),__LC_PER_ACCESS_ID -6: REENABLE_IRQS +6: RESTORE_SM_CLEAR_PER xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) larl %r1,pgm_check_table llgh %r10,__PT_INT_CODE+2(%r11) @@ -702,7 +718,7 @@ ENTRY(pgm_check_handler) # PER event in supervisor state, must be kprobes # .Lpgm_kprobe: - REENABLE_IRQS + RESTORE_SM_CLEAR_PER xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) lgr %r2,%r11 # pass pointer to pt_regs brasl %r14,do_per_trap @@ -713,11 +729,10 @@ ENTRY(pgm_check_handler) # .Lpgm_svcper: mvc __LC_RETURN_PSW(8),__LC_SVC_NEW_PSW - lghi %r13,__TASK_thread larl %r14,.Lsysc_per stg %r14,__LC_RETURN_PSW+8 lghi %r14,_PIF_SYSCALL | _PIF_PER_TRAP - lpswe __LC_RETURN_PSW # branch to .Lsysc_per and enable irqs + lpswe __LC_RETURN_PSW # branch to .Lsysc_per ENDPROC(pgm_check_handler) /* @@ -729,7 +744,6 @@ ENTRY(io_int_handler) BPOFF stmg %r8,%r15,__LC_SAVE_AREA_ASYNC lg %r12,__LC_CURRENT - larl %r13,cleanup_critical lmg %r8,%r9,__LC_IO_OLD_PSW SWITCH_ASYNC __LC_SAVE_AREA_ASYNC,__LC_ASYNC_ENTER_TIMER stmg %r0,%r7,__PT_R0(%r11) @@ -749,7 +763,12 @@ ENTRY(io_int_handler) xc __PT_FLAGS(8,%r11),__PT_FLAGS(%r11) TSTMSK __LC_CPU_FLAGS,_CIF_IGNORE_IRQ jo .Lio_restore +#if IS_ENABLED(CONFIG_TRACE_IRQFLAGS) + tmhh %r8,0x300 + jz 1f TRACE_IRQS_OFF +1: +#endif xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) .Lio_loop: lgr %r2,%r11 # pass pointer to pt_regs @@ -767,25 +786,27 @@ ENTRY(io_int_handler) j .Lio_loop .Lio_return: LOCKDEP_SYS_EXIT - TRACE_IRQS_ON -.Lio_tif: TSTMSK __TI_flags(%r12),_TIF_WORK jnz .Lio_work # there is work to do (signals etc.) TSTMSK __LC_CPU_FLAGS,_CIF_WORK jnz .Lio_work .Lio_restore: +#if IS_ENABLED(CONFIG_TRACE_IRQFLAGS) + tm __PT_PSW(%r11),3 + jno 0f + TRACE_IRQS_ON +0: +#endif lg %r14,__LC_VDSO_PER_CPU - lmg %r0,%r10,__PT_R0(%r11) mvc __LC_RETURN_PSW(16),__PT_PSW(%r11) tm __PT_PSW+1(%r11),0x01 # returning to user ? jno .Lio_exit_kernel BPEXIT __TI_flags(%r12),_TIF_ISOLATE_BP -.Lio_exit_timer: stpt __LC_EXIT_TIMER mvc __VDSO_ECTG_BASE(16,%r14),__LC_EXIT_TIMER .Lio_exit_kernel: - lmg %r11,%r15,__PT_R11(%r11) - b __LC_RETURN_LPSWE(%r0) + lmg %r0,%r15,__PT_R0(%r11) + b __LC_RETURN_LPSWE .Lio_done: # @@ -813,9 +834,6 @@ ENTRY(io_int_handler) xc __SF_BACKCHAIN(8,%r1),__SF_BACKCHAIN(%r1) la %r11,STACK_FRAME_OVERHEAD(%r1) lgr %r15,%r1 - # TRACE_IRQS_ON already done at .Lio_return, call - # TRACE_IRQS_OFF to keep things symmetrical - TRACE_IRQS_OFF brasl %r14,preempt_schedule_irq j .Lio_return #else @@ -835,9 +853,6 @@ ENTRY(io_int_handler) # # One of the work bits is on. Find out which one. # -.Lio_work_tif: - TSTMSK __LC_CPU_FLAGS,_CIF_MCCK_PENDING - jo .Lio_mcck_pending TSTMSK __TI_flags(%r12),_TIF_NEED_RESCHED jo .Lio_reschedule #ifdef CONFIG_LIVEPATCH @@ -854,15 +869,6 @@ ENTRY(io_int_handler) jo .Lio_vxrs TSTMSK __LC_CPU_FLAGS,(_CIF_ASCE_PRIMARY|_CIF_ASCE_SECONDARY) jnz .Lio_asce - j .Lio_return # beware of critical section cleanup - -# -# _CIF_MCCK_PENDING is set, call handler -# -.Lio_mcck_pending: - # TRACE_IRQS_ON already done at .Lio_return - brasl %r14,s390_handle_mcck # TIF bit will be cleared by handler - TRACE_IRQS_OFF j .Lio_return # @@ -895,23 +901,19 @@ ENTRY(io_int_handler) # _TIF_GUARDED_STORAGE is set, call guarded_storage_load # .Lio_guarded_storage: - # TRACE_IRQS_ON already done at .Lio_return - ssm __LC_SVC_NEW_PSW # reenable interrupts + ENABLE_INTS_TRACE lgr %r2,%r11 # pass pointer to pt_regs brasl %r14,gs_load_bc_cb - ssm __LC_PGM_NEW_PSW # disable I/O and ext. interrupts - TRACE_IRQS_OFF + DISABLE_INTS_TRACE j .Lio_return # # _TIF_NEED_RESCHED is set, call schedule # .Lio_reschedule: - # TRACE_IRQS_ON already done at .Lio_return - ssm __LC_SVC_NEW_PSW # reenable interrupts + ENABLE_INTS_TRACE brasl %r14,schedule # call scheduler - ssm __LC_PGM_NEW_PSW # disable I/O and ext. interrupts - TRACE_IRQS_OFF + DISABLE_INTS_TRACE j .Lio_return # @@ -928,24 +930,20 @@ ENTRY(io_int_handler) # _TIF_SIGPENDING or is set, call do_signal # .Lio_sigpending: - # TRACE_IRQS_ON already done at .Lio_return - ssm __LC_SVC_NEW_PSW # reenable interrupts + ENABLE_INTS_TRACE lgr %r2,%r11 # pass pointer to pt_regs brasl %r14,do_signal - ssm __LC_PGM_NEW_PSW # disable I/O and ext. interrupts - TRACE_IRQS_OFF + DISABLE_INTS_TRACE j .Lio_return # # _TIF_NOTIFY_RESUME or is set, call do_notify_resume # .Lio_notify_resume: - # TRACE_IRQS_ON already done at .Lio_return - ssm __LC_SVC_NEW_PSW # reenable interrupts + ENABLE_INTS_TRACE lgr %r2,%r11 # pass pointer to pt_regs brasl %r14,do_notify_resume - ssm __LC_PGM_NEW_PSW # disable I/O and ext. interrupts - TRACE_IRQS_OFF + DISABLE_INTS_TRACE j .Lio_return ENDPROC(io_int_handler) @@ -958,7 +956,6 @@ ENTRY(ext_int_handler) BPOFF stmg %r8,%r15,__LC_SAVE_AREA_ASYNC lg %r12,__LC_CURRENT - larl %r13,cleanup_critical lmg %r8,%r9,__LC_EXT_OLD_PSW SWITCH_ASYNC __LC_SAVE_AREA_ASYNC,__LC_ASYNC_ENTER_TIMER stmg %r0,%r7,__PT_R0(%r11) @@ -981,7 +978,12 @@ ENTRY(ext_int_handler) xc __PT_FLAGS(8,%r11),__PT_FLAGS(%r11) TSTMSK __LC_CPU_FLAGS,_CIF_IGNORE_IRQ jo .Lio_restore +#if IS_ENABLED(CONFIG_TRACE_IRQFLAGS) + tmhh %r8,0x300 + jz 1f TRACE_IRQS_OFF +1: +#endif xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) lgr %r2,%r11 # pass pointer to pt_regs lghi %r3,EXT_INTERRUPT @@ -990,11 +992,11 @@ ENTRY(ext_int_handler) ENDPROC(ext_int_handler) /* - * Load idle PSW. The second "half" of this function is in .Lcleanup_idle. + * Load idle PSW. */ ENTRY(psw_idle) stg %r3,__SF_EMPTY(%r15) - larl %r1,.Lpsw_idle_lpsw+4 + larl %r1,.Lpsw_idle_exit stg %r1,__SF_EMPTY+8(%r15) larl %r1,smp_cpu_mtid llgf %r1,0(%r1) @@ -1006,10 +1008,9 @@ ENTRY(psw_idle) BPON STCK __CLOCK_IDLE_ENTER(%r2) stpt __TIMER_IDLE_ENTER(%r2) -.Lpsw_idle_lpsw: lpswe __SF_EMPTY(%r15) +.Lpsw_idle_exit: BR_EX %r14 -.Lpsw_idle_end: ENDPROC(psw_idle) /* @@ -1020,6 +1021,7 @@ ENDPROC(psw_idle) * of the register contents at return from io or a system call. */ ENTRY(save_fpu_regs) + stnsm __SF_EMPTY(%r15),0xfc lg %r2,__LC_CURRENT aghi %r2,__TASK_thread TSTMSK __LC_CPU_FLAGS,_CIF_FPU @@ -1051,6 +1053,7 @@ ENTRY(save_fpu_regs) .Lsave_fpu_regs_done: oi __LC_CPU_FLAGS+7,_CIF_FPU .Lsave_fpu_regs_exit: + ssm __SF_EMPTY(%r15) BR_EX %r14 .Lsave_fpu_regs_end: ENDPROC(save_fpu_regs) @@ -1102,8 +1105,6 @@ load_fpu_regs: .Lload_fpu_regs_end: ENDPROC(load_fpu_regs) -.L__critical_end: - /* * Machine check handler routines */ @@ -1116,7 +1117,6 @@ ENTRY(mcck_int_handler) lam %a0,%a15,__LC_AREGS_SAVE_AREA-4095(%r1) # validate acrs lmg %r0,%r15,__LC_GPREGS_SAVE_AREA-4095(%r1)# validate gprs lg %r12,__LC_CURRENT - larl %r13,cleanup_critical lmg %r8,%r9,__LC_MCK_OLD_PSW TSTMSK __LC_MCCK_CODE,MCCK_CODE_SYSTEM_DAMAGE jo .Lmcck_panic # yes -> rest of mcck code invalid @@ -1202,15 +1202,13 @@ ENTRY(mcck_int_handler) xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) lgr %r2,%r11 # pass pointer to pt_regs brasl %r14,s390_do_machine_check - tm __PT_PSW+1(%r11),0x01 # returning to user ? - jno .Lmcck_return + cghi %r2,0 + je .Lmcck_return lg %r1,__LC_KERNEL_STACK # switch to kernel stack mvc STACK_FRAME_OVERHEAD(__PT_SIZE,%r1),0(%r11) xc __SF_BACKCHAIN(8,%r1),__SF_BACKCHAIN(%r1) la %r11,STACK_FRAME_OVERHEAD(%r1) lgr %r15,%r1 - TSTMSK __LC_CPU_FLAGS,_CIF_MCCK_PENDING - jno .Lmcck_return TRACE_IRQS_OFF brasl %r14,s390_handle_mcck TRACE_IRQS_ON @@ -1280,265 +1278,23 @@ ENTRY(stack_overflow) ENDPROC(stack_overflow) #endif -ENTRY(cleanup_critical) - cghi %r9,__LC_RETURN_LPSWE - je .Lcleanup_lpswe #if IS_ENABLED(CONFIG_KVM) - clg %r9,BASED(.Lcleanup_table_sie) # .Lsie_gmap - jl 0f - clg %r9,BASED(.Lcleanup_table_sie+8)# .Lsie_done - jl .Lcleanup_sie -#endif - clg %r9,BASED(.Lcleanup_table) # system_call - jl 0f - clg %r9,BASED(.Lcleanup_table+8) # .Lsysc_do_svc - jl .Lcleanup_system_call - clg %r9,BASED(.Lcleanup_table+16) # .Lsysc_tif - jl 0f - clg %r9,BASED(.Lcleanup_table+24) # .Lsysc_restore - jl .Lcleanup_sysc_tif - clg %r9,BASED(.Lcleanup_table+32) # .Lsysc_done - jl .Lcleanup_sysc_restore - clg %r9,BASED(.Lcleanup_table+40) # .Lio_tif - jl 0f - clg %r9,BASED(.Lcleanup_table+48) # .Lio_restore - jl .Lcleanup_io_tif - clg %r9,BASED(.Lcleanup_table+56) # .Lio_done - jl .Lcleanup_io_restore - clg %r9,BASED(.Lcleanup_table+64) # psw_idle - jl 0f - clg %r9,BASED(.Lcleanup_table+72) # .Lpsw_idle_end - jl .Lcleanup_idle - clg %r9,BASED(.Lcleanup_table+80) # save_fpu_regs - jl 0f - clg %r9,BASED(.Lcleanup_table+88) # .Lsave_fpu_regs_end - jl .Lcleanup_save_fpu_regs - clg %r9,BASED(.Lcleanup_table+96) # load_fpu_regs - jl 0f - clg %r9,BASED(.Lcleanup_table+104) # .Lload_fpu_regs_end - jl .Lcleanup_load_fpu_regs -0: BR_EX %r14,%r11 -ENDPROC(cleanup_critical) - - .align 8 -.Lcleanup_table: - .quad system_call - .quad .Lsysc_do_svc - .quad .Lsysc_tif - .quad .Lsysc_restore - .quad .Lsysc_done - .quad .Lio_tif - .quad .Lio_restore - .quad .Lio_done - .quad psw_idle - .quad .Lpsw_idle_end - .quad save_fpu_regs - .quad .Lsave_fpu_regs_end - .quad load_fpu_regs - .quad .Lload_fpu_regs_end - -#if IS_ENABLED(CONFIG_KVM) -.Lcleanup_table_sie: - .quad .Lsie_gmap - .quad .Lsie_done - .Lcleanup_sie: - cghi %r11,__LC_SAVE_AREA_ASYNC #Is this in normal interrupt? - je 1f - slg %r9,BASED(.Lsie_crit_mcck_start) - clg %r9,BASED(.Lsie_crit_mcck_length) - jh 1f - oi __LC_CPU_FLAGS+7, _CIF_MCCK_GUEST -1: BPENTER __SF_SIE_FLAGS(%r15),(_TIF_ISOLATE_BP|_TIF_ISOLATE_BP_GUEST) + cghi %r11,__LC_SAVE_AREA_ASYNC #Is this in normal interrupt? + je 1f + larl %r13,.Lsie_entry + slgr %r9,%r13 + larl %r13,.Lsie_skip + clgr %r9,%r13 + jh 1f + oi __LC_CPU_FLAGS+7, _CIF_MCCK_GUEST +1: BPENTER __SF_SIE_FLAGS(%r15),(_TIF_ISOLATE_BP|_TIF_ISOLATE_BP_GUEST) lg %r9,__SF_SIE_CONTROL(%r15) # get control block pointer ni __SIE_PROG0C+3(%r9),0xfe # no longer in SIE lctlg %c1,%c1,__LC_USER_ASCE # load primary asce larl %r9,sie_exit # skip forward to sie_exit BR_EX %r14,%r11 -#endif -.Lcleanup_system_call: - # check if stpt has been executed - clg %r9,BASED(.Lcleanup_system_call_insn) - jh 0f - mvc __LC_SYNC_ENTER_TIMER(8),__LC_ASYNC_ENTER_TIMER - cghi %r11,__LC_SAVE_AREA_ASYNC - je 0f - mvc __LC_SYNC_ENTER_TIMER(8),__LC_MCCK_ENTER_TIMER -0: # check if stmg has been executed - clg %r9,BASED(.Lcleanup_system_call_insn+8) - jh 0f - mvc __LC_SAVE_AREA_SYNC(64),0(%r11) -0: # check if base register setup + TIF bit load has been done - clg %r9,BASED(.Lcleanup_system_call_insn+16) - jhe 0f - # set up saved register r12 task struct pointer - stg %r12,32(%r11) - # set up saved register r13 __TASK_thread offset - mvc 40(8,%r11),BASED(.Lcleanup_system_call_const) -0: # check if the user time update has been done - clg %r9,BASED(.Lcleanup_system_call_insn+24) - jh 0f - lg %r15,__LC_EXIT_TIMER - slg %r15,__LC_SYNC_ENTER_TIMER - alg %r15,__LC_USER_TIMER - stg %r15,__LC_USER_TIMER -0: # check if the system time update has been done - clg %r9,BASED(.Lcleanup_system_call_insn+32) - jh 0f - lg %r15,__LC_LAST_UPDATE_TIMER - slg %r15,__LC_EXIT_TIMER - alg %r15,__LC_SYSTEM_TIMER - stg %r15,__LC_SYSTEM_TIMER -0: # update accounting time stamp - mvc __LC_LAST_UPDATE_TIMER(8),__LC_SYNC_ENTER_TIMER - BPENTER __TI_flags(%r12),_TIF_ISOLATE_BP - # set up saved register r11 - lg %r15,__LC_KERNEL_STACK - la %r9,STACK_FRAME_OVERHEAD(%r15) - stg %r9,24(%r11) # r11 pt_regs pointer - # fill pt_regs - mvc __PT_R8(64,%r9),__LC_SAVE_AREA_SYNC - stmg %r0,%r7,__PT_R0(%r9) - mvc __PT_PSW(16,%r9),__LC_SVC_OLD_PSW - mvc __PT_INT_CODE(4,%r9),__LC_SVC_ILC - xc __PT_FLAGS(8,%r9),__PT_FLAGS(%r9) - mvi __PT_FLAGS+7(%r9),_PIF_SYSCALL - # setup saved register r15 - stg %r15,56(%r11) # r15 stack pointer - # set new psw address and exit - larl %r9,.Lsysc_do_svc - BR_EX %r14,%r11 -.Lcleanup_system_call_insn: - .quad system_call - .quad .Lsysc_stmg - .quad .Lsysc_per - .quad .Lsysc_vtime+36 - .quad .Lsysc_vtime+42 -.Lcleanup_system_call_const: - .quad __TASK_thread - -.Lcleanup_sysc_tif: - larl %r9,.Lsysc_tif - BR_EX %r14,%r11 - -.Lcleanup_sysc_restore: - # check if stpt has been executed - clg %r9,BASED(.Lcleanup_sysc_restore_insn) - jh 0f - mvc __LC_EXIT_TIMER(8),__LC_ASYNC_ENTER_TIMER - cghi %r11,__LC_SAVE_AREA_ASYNC - je 0f - mvc __LC_EXIT_TIMER(8),__LC_MCCK_ENTER_TIMER -0: clg %r9,BASED(.Lcleanup_sysc_restore_insn+8) - je 1f - lg %r9,24(%r11) # get saved pointer to pt_regs - mvc __LC_RETURN_PSW(16),__PT_PSW(%r9) - mvc 0(64,%r11),__PT_R8(%r9) - lmg %r0,%r7,__PT_R0(%r9) -.Lcleanup_lpswe: -1: lmg %r8,%r9,__LC_RETURN_PSW - BR_EX %r14,%r11 -.Lcleanup_sysc_restore_insn: - .quad .Lsysc_exit_timer - .quad .Lsysc_done - 4 - -.Lcleanup_io_tif: - larl %r9,.Lio_tif - BR_EX %r14,%r11 - -.Lcleanup_io_restore: - # check if stpt has been executed - clg %r9,BASED(.Lcleanup_io_restore_insn) - jh 0f - mvc __LC_EXIT_TIMER(8),__LC_MCCK_ENTER_TIMER -0: clg %r9,BASED(.Lcleanup_io_restore_insn+8) - je 1f - lg %r9,24(%r11) # get saved r11 pointer to pt_regs - mvc __LC_RETURN_PSW(16),__PT_PSW(%r9) - mvc 0(64,%r11),__PT_R8(%r9) - lmg %r0,%r7,__PT_R0(%r9) -1: lmg %r8,%r9,__LC_RETURN_PSW - BR_EX %r14,%r11 -.Lcleanup_io_restore_insn: - .quad .Lio_exit_timer - .quad .Lio_done - 4 - -.Lcleanup_idle: - ni __LC_CPU_FLAGS+7,255-_CIF_ENABLED_WAIT - # copy interrupt clock & cpu timer - mvc __CLOCK_IDLE_EXIT(8,%r2),__LC_INT_CLOCK - mvc __TIMER_IDLE_EXIT(8,%r2),__LC_ASYNC_ENTER_TIMER - cghi %r11,__LC_SAVE_AREA_ASYNC - je 0f - mvc __CLOCK_IDLE_EXIT(8,%r2),__LC_MCCK_CLOCK - mvc __TIMER_IDLE_EXIT(8,%r2),__LC_MCCK_ENTER_TIMER -0: # check if stck & stpt have been executed - clg %r9,BASED(.Lcleanup_idle_insn) - jhe 1f - mvc __CLOCK_IDLE_ENTER(8,%r2),__CLOCK_IDLE_EXIT(%r2) - mvc __TIMER_IDLE_ENTER(8,%r2),__TIMER_IDLE_EXIT(%r2) -1: # calculate idle cycles - clg %r9,BASED(.Lcleanup_idle_insn) - jl 3f - larl %r1,smp_cpu_mtid - llgf %r1,0(%r1) - ltgr %r1,%r1 - jz 3f - .insn rsy,0xeb0000000017,%r1,5,__SF_EMPTY+80(%r15) - larl %r3,mt_cycles - ag %r3,__LC_PERCPU_OFFSET - la %r4,__SF_EMPTY+16(%r15) -2: lg %r0,0(%r3) - slg %r0,0(%r4) - alg %r0,64(%r4) - stg %r0,0(%r3) - la %r3,8(%r3) - la %r4,8(%r4) - brct %r1,2b -3: # account system time going idle - lg %r9,__LC_STEAL_TIMER - alg %r9,__CLOCK_IDLE_ENTER(%r2) - slg %r9,__LC_LAST_UPDATE_CLOCK - stg %r9,__LC_STEAL_TIMER - mvc __LC_LAST_UPDATE_CLOCK(8),__CLOCK_IDLE_EXIT(%r2) - lg %r9,__LC_SYSTEM_TIMER - alg %r9,__LC_LAST_UPDATE_TIMER - slg %r9,__TIMER_IDLE_ENTER(%r2) - stg %r9,__LC_SYSTEM_TIMER - mvc __LC_LAST_UPDATE_TIMER(8),__TIMER_IDLE_EXIT(%r2) - # prepare return psw - nihh %r8,0xfcfd # clear irq & wait state bits - lg %r9,48(%r11) # return from psw_idle - BR_EX %r14,%r11 -.Lcleanup_idle_insn: - .quad .Lpsw_idle_lpsw - -.Lcleanup_save_fpu_regs: - larl %r9,save_fpu_regs - BR_EX %r14,%r11 - -.Lcleanup_load_fpu_regs: - larl %r9,load_fpu_regs - BR_EX %r14,%r11 - -/* - * Integer constants - */ - .align 8 -.Lcritical_start: - .quad .L__critical_start -.Lcritical_length: - .quad .L__critical_end - .L__critical_start -#if IS_ENABLED(CONFIG_KVM) -.Lsie_critical_start: - .quad .Lsie_gmap -.Lsie_critical_length: - .quad .Lsie_done - .Lsie_gmap -.Lsie_crit_mcck_start: - .quad .Lsie_entry -.Lsie_crit_mcck_length: - .quad .Lsie_skip - .Lsie_entry #endif .section .rodata, "a" #define SYSCALL(esame,emu) .quad __s390x_ ## esame diff --git a/arch/s390/kernel/entry.h b/arch/s390/kernel/entry.h index faca269d5f27..a44ddc2f2dec 100644 --- a/arch/s390/kernel/entry.h +++ b/arch/s390/kernel/entry.h @@ -26,6 +26,7 @@ void do_protection_exception(struct pt_regs *regs); void do_dat_exception(struct pt_regs *regs); void do_secure_storage_access(struct pt_regs *regs); void do_non_secure_storage_access(struct pt_regs *regs); +void do_secure_storage_violation(struct pt_regs *regs); void addressing_exception(struct pt_regs *regs); void data_exception(struct pt_regs *regs); diff --git a/arch/s390/kernel/ftrace.c b/arch/s390/kernel/ftrace.c index 4cd9b1ada834..b388e87a08bf 100644 --- a/arch/s390/kernel/ftrace.c +++ b/arch/s390/kernel/ftrace.c @@ -72,22 +72,6 @@ static inline void ftrace_generate_orig_insn(struct ftrace_insn *insn) #endif } -static inline void ftrace_generate_kprobe_nop_insn(struct ftrace_insn *insn) -{ -#ifdef CONFIG_KPROBES - insn->opc = BREAKPOINT_INSTRUCTION; - insn->disp = KPROBE_ON_FTRACE_NOP; -#endif -} - -static inline void ftrace_generate_kprobe_call_insn(struct ftrace_insn *insn) -{ -#ifdef CONFIG_KPROBES - insn->opc = BREAKPOINT_INSTRUCTION; - insn->disp = KPROBE_ON_FTRACE_CALL; -#endif -} - int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr, unsigned long addr) { @@ -99,7 +83,7 @@ int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec, { struct ftrace_insn orig, new, old; - if (probe_kernel_read(&old, (void *) rec->ip, sizeof(old))) + if (copy_from_kernel_nofault(&old, (void *) rec->ip, sizeof(old))) return -EFAULT; if (addr == MCOUNT_ADDR) { /* Initial code replacement */ @@ -121,7 +105,7 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) { struct ftrace_insn orig, new, old; - if (probe_kernel_read(&old, (void *) rec->ip, sizeof(old))) + if (copy_from_kernel_nofault(&old, (void *) rec->ip, sizeof(old))) return -EFAULT; /* Replace nop with an ftrace call. */ ftrace_generate_nop_insn(&orig); diff --git a/arch/s390/kernel/idle.c b/arch/s390/kernel/idle.c index 8f8456816d83..f7f1e64e0d98 100644 --- a/arch/s390/kernel/idle.c +++ b/arch/s390/kernel/idle.c @@ -14,6 +14,7 @@ #include <linux/init.h> #include <linux/cpu.h> #include <linux/sched/cputime.h> +#include <trace/events/power.h> #include <asm/nmi.h> #include <asm/smp.h> #include "entry.h" @@ -24,28 +25,27 @@ void enabled_wait(void) { struct s390_idle_data *idle = this_cpu_ptr(&s390_idle); unsigned long long idle_time; - unsigned long psw_mask; + unsigned long psw_mask, flags; - trace_hardirqs_on(); /* Wait for external, I/O or machine check interrupt. */ psw_mask = PSW_KERNEL_BITS | PSW_MASK_WAIT | PSW_MASK_DAT | PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK; clear_cpu_flag(CIF_NOHZ_DELAY); + local_irq_save(flags); /* Call the assembler magic in entry.S */ psw_idle(idle, psw_mask); - - trace_hardirqs_off(); + local_irq_restore(flags); /* Account time spent with enabled wait psw loaded as idle time. */ - write_seqcount_begin(&idle->seqcount); + raw_write_seqcount_begin(&idle->seqcount); idle_time = idle->clock_idle_exit - idle->clock_idle_enter; idle->clock_idle_enter = idle->clock_idle_exit = 0ULL; idle->idle_time += idle_time; idle->idle_count++; account_idle_time(cputime_to_nsecs(idle_time)); - write_seqcount_end(&idle->seqcount); + raw_write_seqcount_end(&idle->seqcount); } NOKPROBE_SYMBOL(enabled_wait); @@ -118,22 +118,16 @@ u64 arch_cpu_idle_time(int cpu) void arch_cpu_idle_enter(void) { - local_mcck_disable(); } void arch_cpu_idle(void) { - if (!test_cpu_flag(CIF_MCCK_PENDING)) - /* Halt the cpu and keep track of cpu time accounting. */ - enabled_wait(); + enabled_wait(); local_irq_enable(); } void arch_cpu_idle_exit(void) { - local_mcck_enable(); - if (test_cpu_flag(CIF_MCCK_PENDING)) - s390_handle_mcck(); } void arch_cpu_idle_dead(void) diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c index 4a71061974fd..90a2a17239b0 100644 --- a/arch/s390/kernel/ipl.c +++ b/arch/s390/kernel/ipl.c @@ -39,6 +39,7 @@ #define IPL_CCW_STR "ccw" #define IPL_FCP_STR "fcp" #define IPL_FCP_DUMP_STR "fcp_dump" +#define IPL_NVME_STR "nvme" #define IPL_NSS_STR "nss" #define DUMP_CCW_STR "ccw" @@ -93,6 +94,8 @@ static char *ipl_type_str(enum ipl_type type) return IPL_FCP_DUMP_STR; case IPL_TYPE_NSS: return IPL_NSS_STR; + case IPL_TYPE_NVME: + return IPL_NVME_STR; case IPL_TYPE_UNKNOWN: default: return IPL_UNKNOWN_STR; @@ -133,6 +136,7 @@ static int reipl_capabilities = IPL_TYPE_UNKNOWN; static enum ipl_type reipl_type = IPL_TYPE_UNKNOWN; static struct ipl_parameter_block *reipl_block_fcp; +static struct ipl_parameter_block *reipl_block_nvme; static struct ipl_parameter_block *reipl_block_ccw; static struct ipl_parameter_block *reipl_block_nss; static struct ipl_parameter_block *reipl_block_actual; @@ -177,7 +181,7 @@ static ssize_t sys_##_prefix##_##_name##_show(struct kobject *kobj, \ struct kobj_attribute *attr, \ char *page) \ { \ - return snprintf(page, PAGE_SIZE, _format, ##args); \ + return scnprintf(page, PAGE_SIZE, _format, ##args); \ } #define IPL_ATTR_CCW_STORE_FN(_prefix, _name, _ipl_blk) \ @@ -261,6 +265,8 @@ static __init enum ipl_type get_ipl_type(void) return IPL_TYPE_FCP_DUMP; else return IPL_TYPE_FCP; + case IPL_PBT_NVME: + return IPL_TYPE_NVME; } return IPL_TYPE_UNKNOWN; } @@ -317,6 +323,8 @@ static ssize_t sys_ipl_device_show(struct kobject *kobj, case IPL_TYPE_FCP: case IPL_TYPE_FCP_DUMP: return sprintf(page, "0.0.%04x\n", ipl_block.fcp.devno); + case IPL_TYPE_NVME: + return sprintf(page, "%08ux\n", ipl_block.nvme.fid); default: return 0; } @@ -345,15 +353,35 @@ static ssize_t ipl_scp_data_read(struct file *filp, struct kobject *kobj, return memory_read_from_buffer(buf, count, &off, scp_data, size); } + +static ssize_t ipl_nvme_scp_data_read(struct file *filp, struct kobject *kobj, + struct bin_attribute *attr, char *buf, + loff_t off, size_t count) +{ + unsigned int size = ipl_block.nvme.scp_data_len; + void *scp_data = &ipl_block.nvme.scp_data; + + return memory_read_from_buffer(buf, count, &off, scp_data, size); +} + static struct bin_attribute ipl_scp_data_attr = __BIN_ATTR(scp_data, S_IRUGO, ipl_scp_data_read, NULL, PAGE_SIZE); +static struct bin_attribute ipl_nvme_scp_data_attr = + __BIN_ATTR(scp_data, S_IRUGO, ipl_nvme_scp_data_read, NULL, PAGE_SIZE); + static struct bin_attribute *ipl_fcp_bin_attrs[] = { &ipl_parameter_attr, &ipl_scp_data_attr, NULL, }; +static struct bin_attribute *ipl_nvme_bin_attrs[] = { + &ipl_parameter_attr, + &ipl_nvme_scp_data_attr, + NULL, +}; + /* FCP ipl device attributes */ DEFINE_IPL_ATTR_RO(ipl_fcp, wwpn, "0x%016llx\n", @@ -365,6 +393,16 @@ DEFINE_IPL_ATTR_RO(ipl_fcp, bootprog, "%lld\n", DEFINE_IPL_ATTR_RO(ipl_fcp, br_lba, "%lld\n", (unsigned long long)ipl_block.fcp.br_lba); +/* NVMe ipl device attributes */ +DEFINE_IPL_ATTR_RO(ipl_nvme, fid, "0x%08llx\n", + (unsigned long long)ipl_block.nvme.fid); +DEFINE_IPL_ATTR_RO(ipl_nvme, nsid, "0x%08llx\n", + (unsigned long long)ipl_block.nvme.nsid); +DEFINE_IPL_ATTR_RO(ipl_nvme, bootprog, "%lld\n", + (unsigned long long)ipl_block.nvme.bootprog); +DEFINE_IPL_ATTR_RO(ipl_nvme, br_lba, "%lld\n", + (unsigned long long)ipl_block.nvme.br_lba); + static ssize_t ipl_ccw_loadparm_show(struct kobject *kobj, struct kobj_attribute *attr, char *page) { @@ -399,6 +437,24 @@ static struct attribute_group ipl_fcp_attr_group = { .bin_attrs = ipl_fcp_bin_attrs, }; +static struct attribute *ipl_nvme_attrs[] = { + &sys_ipl_type_attr.attr, + &sys_ipl_nvme_fid_attr.attr, + &sys_ipl_nvme_nsid_attr.attr, + &sys_ipl_nvme_bootprog_attr.attr, + &sys_ipl_nvme_br_lba_attr.attr, + &sys_ipl_ccw_loadparm_attr.attr, + &sys_ipl_secure_attr.attr, + &sys_ipl_has_secure_attr.attr, + NULL, +}; + +static struct attribute_group ipl_nvme_attr_group = { + .attrs = ipl_nvme_attrs, + .bin_attrs = ipl_nvme_bin_attrs, +}; + + /* CCW ipl device attributes */ static struct attribute *ipl_ccw_attrs_vm[] = { @@ -474,6 +530,9 @@ static int __init ipl_init(void) case IPL_TYPE_FCP_DUMP: rc = sysfs_create_group(&ipl_kset->kobj, &ipl_fcp_attr_group); break; + case IPL_TYPE_NVME: + rc = sysfs_create_group(&ipl_kset->kobj, &ipl_nvme_attr_group); + break; default: rc = sysfs_create_group(&ipl_kset->kobj, &ipl_unknown_attr_group); @@ -727,6 +786,93 @@ static struct attribute_group reipl_fcp_attr_group = { static struct kobj_attribute sys_reipl_fcp_clear_attr = __ATTR(clear, 0644, reipl_fcp_clear_show, reipl_fcp_clear_store); +/* NVME reipl device attributes */ + +static ssize_t reipl_nvme_scpdata_read(struct file *filp, struct kobject *kobj, + struct bin_attribute *attr, + char *buf, loff_t off, size_t count) +{ + size_t size = reipl_block_nvme->nvme.scp_data_len; + void *scp_data = reipl_block_nvme->nvme.scp_data; + + return memory_read_from_buffer(buf, count, &off, scp_data, size); +} + +static ssize_t reipl_nvme_scpdata_write(struct file *filp, struct kobject *kobj, + struct bin_attribute *attr, + char *buf, loff_t off, size_t count) +{ + size_t scpdata_len = count; + size_t padding; + + if (off) + return -EINVAL; + + memcpy(reipl_block_nvme->nvme.scp_data, buf, count); + if (scpdata_len % 8) { + padding = 8 - (scpdata_len % 8); + memset(reipl_block_nvme->nvme.scp_data + scpdata_len, + 0, padding); + scpdata_len += padding; + } + + reipl_block_nvme->hdr.len = IPL_BP_FCP_LEN + scpdata_len; + reipl_block_nvme->nvme.len = IPL_BP0_FCP_LEN + scpdata_len; + reipl_block_nvme->nvme.scp_data_len = scpdata_len; + + return count; +} + +static struct bin_attribute sys_reipl_nvme_scp_data_attr = + __BIN_ATTR(scp_data, (S_IRUGO | S_IWUSR), reipl_nvme_scpdata_read, + reipl_nvme_scpdata_write, DIAG308_SCPDATA_SIZE); + +static struct bin_attribute *reipl_nvme_bin_attrs[] = { + &sys_reipl_nvme_scp_data_attr, + NULL, +}; + +DEFINE_IPL_ATTR_RW(reipl_nvme, fid, "0x%08llx\n", "%llx\n", + reipl_block_nvme->nvme.fid); +DEFINE_IPL_ATTR_RW(reipl_nvme, nsid, "0x%08llx\n", "%llx\n", + reipl_block_nvme->nvme.nsid); +DEFINE_IPL_ATTR_RW(reipl_nvme, bootprog, "%lld\n", "%lld\n", + reipl_block_nvme->nvme.bootprog); +DEFINE_IPL_ATTR_RW(reipl_nvme, br_lba, "%lld\n", "%lld\n", + reipl_block_nvme->nvme.br_lba); + +/* nvme wrapper */ +static ssize_t reipl_nvme_loadparm_show(struct kobject *kobj, + struct kobj_attribute *attr, char *page) +{ + return reipl_generic_loadparm_show(reipl_block_nvme, page); +} + +static ssize_t reipl_nvme_loadparm_store(struct kobject *kobj, + struct kobj_attribute *attr, + const char *buf, size_t len) +{ + return reipl_generic_loadparm_store(reipl_block_nvme, buf, len); +} + +static struct kobj_attribute sys_reipl_nvme_loadparm_attr = + __ATTR(loadparm, S_IRUGO | S_IWUSR, reipl_nvme_loadparm_show, + reipl_nvme_loadparm_store); + +static struct attribute *reipl_nvme_attrs[] = { + &sys_reipl_nvme_fid_attr.attr, + &sys_reipl_nvme_nsid_attr.attr, + &sys_reipl_nvme_bootprog_attr.attr, + &sys_reipl_nvme_br_lba_attr.attr, + &sys_reipl_nvme_loadparm_attr.attr, + NULL, +}; + +static struct attribute_group reipl_nvme_attr_group = { + .attrs = reipl_nvme_attrs, + .bin_attrs = reipl_nvme_bin_attrs +}; + /* CCW reipl device attributes */ DEFINE_IPL_CCW_ATTR_RW(reipl_ccw, device, reipl_block_ccw->ccw); @@ -891,6 +1037,9 @@ static int reipl_set_type(enum ipl_type type) case IPL_TYPE_FCP: reipl_block_actual = reipl_block_fcp; break; + case IPL_TYPE_NVME: + reipl_block_actual = reipl_block_nvme; + break; case IPL_TYPE_NSS: reipl_block_actual = reipl_block_nss; break; @@ -917,6 +1066,8 @@ static ssize_t reipl_type_store(struct kobject *kobj, rc = reipl_set_type(IPL_TYPE_CCW); else if (strncmp(buf, IPL_FCP_STR, strlen(IPL_FCP_STR)) == 0) rc = reipl_set_type(IPL_TYPE_FCP); + else if (strncmp(buf, IPL_NVME_STR, strlen(IPL_NVME_STR)) == 0) + rc = reipl_set_type(IPL_TYPE_NVME); else if (strncmp(buf, IPL_NSS_STR, strlen(IPL_NSS_STR)) == 0) rc = reipl_set_type(IPL_TYPE_NSS); return (rc != 0) ? rc : len; @@ -927,6 +1078,7 @@ static struct kobj_attribute reipl_type_attr = static struct kset *reipl_kset; static struct kset *reipl_fcp_kset; +static struct kset *reipl_nvme_kset; static void __reipl_run(void *unused) { @@ -945,6 +1097,10 @@ static void __reipl_run(void *unused) else diag308(DIAG308_LOAD_NORMAL, NULL); break; + case IPL_TYPE_NVME: + diag308(DIAG308_SET, reipl_block_nvme); + diag308(DIAG308_LOAD_CLEAR, NULL); + break; case IPL_TYPE_NSS: diag308(DIAG308_SET, reipl_block_nss); diag308(DIAG308_LOAD_CLEAR, NULL); @@ -1093,6 +1249,49 @@ out1: return rc; } +static int __init reipl_nvme_init(void) +{ + int rc; + + reipl_block_nvme = (void *) get_zeroed_page(GFP_KERNEL); + if (!reipl_block_nvme) + return -ENOMEM; + + /* sysfs: create kset for mixing attr group and bin attrs */ + reipl_nvme_kset = kset_create_and_add(IPL_NVME_STR, NULL, + &reipl_kset->kobj); + if (!reipl_nvme_kset) { + free_page((unsigned long) reipl_block_nvme); + return -ENOMEM; + } + + rc = sysfs_create_group(&reipl_nvme_kset->kobj, &reipl_nvme_attr_group); + if (rc) { + kset_unregister(reipl_nvme_kset); + free_page((unsigned long) reipl_block_nvme); + return rc; + } + + if (ipl_info.type == IPL_TYPE_NVME) { + memcpy(reipl_block_nvme, &ipl_block, sizeof(ipl_block)); + /* + * Fix loadparm: There are systems where the (SCSI) LOADPARM + * is invalid in the IPL parameter block, so take it + * always from sclp_ipl_info. + */ + memcpy(reipl_block_nvme->nvme.loadparm, sclp_ipl_info.loadparm, + LOADPARM_LEN); + } else { + reipl_block_nvme->hdr.len = IPL_BP_NVME_LEN; + reipl_block_nvme->hdr.version = IPL_PARM_BLOCK_VERSION; + reipl_block_nvme->nvme.len = IPL_BP0_NVME_LEN; + reipl_block_nvme->nvme.pbt = IPL_PBT_NVME; + reipl_block_nvme->nvme.opt = IPL_PB0_NVME_OPT_IPL; + } + reipl_capabilities |= IPL_TYPE_NVME; + return 0; +} + static int __init reipl_type_init(void) { enum ipl_type reipl_type = ipl_info.type; @@ -1108,6 +1307,9 @@ static int __init reipl_type_init(void) if (reipl_block->pb0_hdr.pbt == IPL_PBT_FCP) { memcpy(reipl_block_fcp, reipl_block, size); reipl_type = IPL_TYPE_FCP; + } else if (reipl_block->pb0_hdr.pbt == IPL_PBT_NVME) { + memcpy(reipl_block_nvme, reipl_block, size); + reipl_type = IPL_TYPE_NVME; } else if (reipl_block->pb0_hdr.pbt == IPL_PBT_CCW) { memcpy(reipl_block_ccw, reipl_block, size); reipl_type = IPL_TYPE_CCW; @@ -1134,6 +1336,9 @@ static int __init reipl_init(void) rc = reipl_fcp_init(); if (rc) return rc; + rc = reipl_nvme_init(); + if (rc) + return rc; rc = reipl_nss_init(); if (rc) return rc; @@ -1750,6 +1955,10 @@ void __init setup_ipl(void) ipl_info.data.fcp.wwpn = ipl_block.fcp.wwpn; ipl_info.data.fcp.lun = ipl_block.fcp.lun; break; + case IPL_TYPE_NVME: + ipl_info.data.nvme.fid = ipl_block.nvme.fid; + ipl_info.data.nvme.nsid = ipl_block.nvme.nsid; + break; case IPL_TYPE_NSS: case IPL_TYPE_UNKNOWN: /* We have no info to copy */ diff --git a/arch/s390/kernel/kprobes.c b/arch/s390/kernel/kprobes.c index 548d0ea9808d..d2a71d872638 100644 --- a/arch/s390/kernel/kprobes.c +++ b/arch/s390/kernel/kprobes.c @@ -523,10 +523,8 @@ static int kprobe_trap_handler(struct pt_regs *regs, int trapnr) * zero, try to fix up. */ entry = s390_search_extables(regs->psw.addr); - if (entry) { - regs->psw.addr = extable_fixup(entry); + if (entry && ex_handle(entry, regs)) return 1; - } /* * fixup_exception() could not handle it, diff --git a/arch/s390/kernel/lgr.c b/arch/s390/kernel/lgr.c index 452502f9a0d9..3b895971c3d0 100644 --- a/arch/s390/kernel/lgr.c +++ b/arch/s390/kernel/lgr.c @@ -167,7 +167,7 @@ static struct timer_list lgr_timer; */ static void lgr_timer_set(void) { - mod_timer(&lgr_timer, jiffies + LGR_TIMER_INTERVAL_SECS * HZ); + mod_timer(&lgr_timer, jiffies + msecs_to_jiffies(LGR_TIMER_INTERVAL_SECS * MSEC_PER_SEC)); } /* diff --git a/arch/s390/kernel/machine_kexec.c b/arch/s390/kernel/machine_kexec.c index 3a854cb5a4c6..d91989c7bd6a 100644 --- a/arch/s390/kernel/machine_kexec.c +++ b/arch/s390/kernel/machine_kexec.c @@ -16,8 +16,6 @@ #include <linux/debug_locks.h> #include <asm/cio.h> #include <asm/setup.h> -#include <asm/pgtable.h> -#include <asm/pgalloc.h> #include <asm/smp.h> #include <asm/ipl.h> #include <asm/diag.h> diff --git a/arch/s390/kernel/module.c b/arch/s390/kernel/module.c index ba8f19bb438b..4055f1c49814 100644 --- a/arch/s390/kernel/module.c +++ b/arch/s390/kernel/module.c @@ -19,6 +19,7 @@ #include <linux/kasan.h> #include <linux/moduleloader.h> #include <linux/bug.h> +#include <linux/memory.h> #include <asm/alternative.h> #include <asm/nospec-branch.h> #include <asm/facility.h> @@ -174,10 +175,12 @@ int module_frob_arch_sections(Elf_Ehdr *hdr, Elf_Shdr *sechdrs, } static int apply_rela_bits(Elf_Addr loc, Elf_Addr val, - int sign, int bits, int shift) + int sign, int bits, int shift, + void *(*write)(void *dest, const void *src, size_t len)) { unsigned long umax; long min, max; + void *dest = (void *)loc; if (val & ((1UL << shift) - 1)) return -ENOEXEC; @@ -194,26 +197,33 @@ static int apply_rela_bits(Elf_Addr loc, Elf_Addr val, return -ENOEXEC; } - if (bits == 8) - *(unsigned char *) loc = val; - else if (bits == 12) - *(unsigned short *) loc = (val & 0xfff) | + if (bits == 8) { + unsigned char tmp = val; + write(dest, &tmp, 1); + } else if (bits == 12) { + unsigned short tmp = (val & 0xfff) | (*(unsigned short *) loc & 0xf000); - else if (bits == 16) - *(unsigned short *) loc = val; - else if (bits == 20) - *(unsigned int *) loc = (val & 0xfff) << 16 | - (val & 0xff000) >> 4 | - (*(unsigned int *) loc & 0xf00000ff); - else if (bits == 32) - *(unsigned int *) loc = val; - else if (bits == 64) - *(unsigned long *) loc = val; + write(dest, &tmp, 2); + } else if (bits == 16) { + unsigned short tmp = val; + write(dest, &tmp, 2); + } else if (bits == 20) { + unsigned int tmp = (val & 0xfff) << 16 | + (val & 0xff000) >> 4 | (*(unsigned int *) loc & 0xf00000ff); + write(dest, &tmp, 4); + } else if (bits == 32) { + unsigned int tmp = val; + write(dest, &tmp, 4); + } else if (bits == 64) { + unsigned long tmp = val; + write(dest, &tmp, 8); + } return 0; } static int apply_rela(Elf_Rela *rela, Elf_Addr base, Elf_Sym *symtab, - const char *strtab, struct module *me) + const char *strtab, struct module *me, + void *(*write)(void *dest, const void *src, size_t len)) { struct mod_arch_syminfo *info; Elf_Addr loc, val; @@ -241,17 +251,17 @@ static int apply_rela(Elf_Rela *rela, Elf_Addr base, Elf_Sym *symtab, case R_390_64: /* Direct 64 bit. */ val += rela->r_addend; if (r_type == R_390_8) - rc = apply_rela_bits(loc, val, 0, 8, 0); + rc = apply_rela_bits(loc, val, 0, 8, 0, write); else if (r_type == R_390_12) - rc = apply_rela_bits(loc, val, 0, 12, 0); + rc = apply_rela_bits(loc, val, 0, 12, 0, write); else if (r_type == R_390_16) - rc = apply_rela_bits(loc, val, 0, 16, 0); + rc = apply_rela_bits(loc, val, 0, 16, 0, write); else if (r_type == R_390_20) - rc = apply_rela_bits(loc, val, 1, 20, 0); + rc = apply_rela_bits(loc, val, 1, 20, 0, write); else if (r_type == R_390_32) - rc = apply_rela_bits(loc, val, 0, 32, 0); + rc = apply_rela_bits(loc, val, 0, 32, 0, write); else if (r_type == R_390_64) - rc = apply_rela_bits(loc, val, 0, 64, 0); + rc = apply_rela_bits(loc, val, 0, 64, 0, write); break; case R_390_PC16: /* PC relative 16 bit. */ case R_390_PC16DBL: /* PC relative 16 bit shifted by 1. */ @@ -260,15 +270,15 @@ static int apply_rela(Elf_Rela *rela, Elf_Addr base, Elf_Sym *symtab, case R_390_PC64: /* PC relative 64 bit. */ val += rela->r_addend - loc; if (r_type == R_390_PC16) - rc = apply_rela_bits(loc, val, 1, 16, 0); + rc = apply_rela_bits(loc, val, 1, 16, 0, write); else if (r_type == R_390_PC16DBL) - rc = apply_rela_bits(loc, val, 1, 16, 1); + rc = apply_rela_bits(loc, val, 1, 16, 1, write); else if (r_type == R_390_PC32DBL) - rc = apply_rela_bits(loc, val, 1, 32, 1); + rc = apply_rela_bits(loc, val, 1, 32, 1, write); else if (r_type == R_390_PC32) - rc = apply_rela_bits(loc, val, 1, 32, 0); + rc = apply_rela_bits(loc, val, 1, 32, 0, write); else if (r_type == R_390_PC64) - rc = apply_rela_bits(loc, val, 1, 64, 0); + rc = apply_rela_bits(loc, val, 1, 64, 0, write); break; case R_390_GOT12: /* 12 bit GOT offset. */ case R_390_GOT16: /* 16 bit GOT offset. */ @@ -283,33 +293,33 @@ static int apply_rela(Elf_Rela *rela, Elf_Addr base, Elf_Sym *symtab, case R_390_GOTPLT64: /* 64 bit offset to jump slot. */ case R_390_GOTPLTENT: /* 32 bit rel. offset to jump slot >> 1. */ if (info->got_initialized == 0) { - Elf_Addr *gotent; + Elf_Addr *gotent = me->core_layout.base + + me->arch.got_offset + + info->got_offset; - gotent = me->core_layout.base + me->arch.got_offset + - info->got_offset; - *gotent = val; + write(gotent, &val, sizeof(*gotent)); info->got_initialized = 1; } val = info->got_offset + rela->r_addend; if (r_type == R_390_GOT12 || r_type == R_390_GOTPLT12) - rc = apply_rela_bits(loc, val, 0, 12, 0); + rc = apply_rela_bits(loc, val, 0, 12, 0, write); else if (r_type == R_390_GOT16 || r_type == R_390_GOTPLT16) - rc = apply_rela_bits(loc, val, 0, 16, 0); + rc = apply_rela_bits(loc, val, 0, 16, 0, write); else if (r_type == R_390_GOT20 || r_type == R_390_GOTPLT20) - rc = apply_rela_bits(loc, val, 1, 20, 0); + rc = apply_rela_bits(loc, val, 1, 20, 0, write); else if (r_type == R_390_GOT32 || r_type == R_390_GOTPLT32) - rc = apply_rela_bits(loc, val, 0, 32, 0); + rc = apply_rela_bits(loc, val, 0, 32, 0, write); else if (r_type == R_390_GOT64 || r_type == R_390_GOTPLT64) - rc = apply_rela_bits(loc, val, 0, 64, 0); + rc = apply_rela_bits(loc, val, 0, 64, 0, write); else if (r_type == R_390_GOTENT || r_type == R_390_GOTPLTENT) { val += (Elf_Addr) me->core_layout.base - loc; - rc = apply_rela_bits(loc, val, 1, 32, 1); + rc = apply_rela_bits(loc, val, 1, 32, 1, write); } break; case R_390_PLT16DBL: /* 16 bit PC rel. PLT shifted by 1. */ @@ -320,25 +330,29 @@ static int apply_rela(Elf_Rela *rela, Elf_Addr base, Elf_Sym *symtab, case R_390_PLTOFF32: /* 32 bit offset from GOT to PLT. */ case R_390_PLTOFF64: /* 16 bit offset from GOT to PLT. */ if (info->plt_initialized == 0) { - unsigned int *ip; - ip = me->core_layout.base + me->arch.plt_offset + - info->plt_offset; - ip[0] = 0x0d10e310; /* basr 1,0 */ - ip[1] = 0x100a0004; /* lg 1,10(1) */ + unsigned int insn[5]; + unsigned int *ip = me->core_layout.base + + me->arch.plt_offset + + info->plt_offset; + + insn[0] = 0x0d10e310; /* basr 1,0 */ + insn[1] = 0x100a0004; /* lg 1,10(1) */ if (IS_ENABLED(CONFIG_EXPOLINE) && !nospec_disable) { unsigned int *ij; ij = me->core_layout.base + me->arch.plt_offset + me->arch.plt_size - PLT_ENTRY_SIZE; - ip[2] = 0xa7f40000 + /* j __jump_r1 */ + insn[2] = 0xa7f40000 + /* j __jump_r1 */ (unsigned int)(u16) (((unsigned long) ij - 8 - (unsigned long) ip) / 2); } else { - ip[2] = 0x07f10000; /* br %r1 */ + insn[2] = 0x07f10000; /* br %r1 */ } - ip[3] = (unsigned int) (val >> 32); - ip[4] = (unsigned int) val; + insn[3] = (unsigned int) (val >> 32); + insn[4] = (unsigned int) val; + + write(ip, insn, sizeof(insn)); info->plt_initialized = 1; } if (r_type == R_390_PLTOFF16 || @@ -357,17 +371,17 @@ static int apply_rela(Elf_Rela *rela, Elf_Addr base, Elf_Sym *symtab, val += rela->r_addend - loc; } if (r_type == R_390_PLT16DBL) - rc = apply_rela_bits(loc, val, 1, 16, 1); + rc = apply_rela_bits(loc, val, 1, 16, 1, write); else if (r_type == R_390_PLTOFF16) - rc = apply_rela_bits(loc, val, 0, 16, 0); + rc = apply_rela_bits(loc, val, 0, 16, 0, write); else if (r_type == R_390_PLT32DBL) - rc = apply_rela_bits(loc, val, 1, 32, 1); + rc = apply_rela_bits(loc, val, 1, 32, 1, write); else if (r_type == R_390_PLT32 || r_type == R_390_PLTOFF32) - rc = apply_rela_bits(loc, val, 0, 32, 0); + rc = apply_rela_bits(loc, val, 0, 32, 0, write); else if (r_type == R_390_PLT64 || r_type == R_390_PLTOFF64) - rc = apply_rela_bits(loc, val, 0, 64, 0); + rc = apply_rela_bits(loc, val, 0, 64, 0, write); break; case R_390_GOTOFF16: /* 16 bit offset to GOT. */ case R_390_GOTOFF32: /* 32 bit offset to GOT. */ @@ -375,20 +389,20 @@ static int apply_rela(Elf_Rela *rela, Elf_Addr base, Elf_Sym *symtab, val = val + rela->r_addend - ((Elf_Addr) me->core_layout.base + me->arch.got_offset); if (r_type == R_390_GOTOFF16) - rc = apply_rela_bits(loc, val, 0, 16, 0); + rc = apply_rela_bits(loc, val, 0, 16, 0, write); else if (r_type == R_390_GOTOFF32) - rc = apply_rela_bits(loc, val, 0, 32, 0); + rc = apply_rela_bits(loc, val, 0, 32, 0, write); else if (r_type == R_390_GOTOFF64) - rc = apply_rela_bits(loc, val, 0, 64, 0); + rc = apply_rela_bits(loc, val, 0, 64, 0, write); break; case R_390_GOTPC: /* 32 bit PC relative offset to GOT. */ case R_390_GOTPCDBL: /* 32 bit PC rel. off. to GOT shifted by 1. */ val = (Elf_Addr) me->core_layout.base + me->arch.got_offset + rela->r_addend - loc; if (r_type == R_390_GOTPC) - rc = apply_rela_bits(loc, val, 1, 32, 0); + rc = apply_rela_bits(loc, val, 1, 32, 0, write); else if (r_type == R_390_GOTPCDBL) - rc = apply_rela_bits(loc, val, 1, 32, 1); + rc = apply_rela_bits(loc, val, 1, 32, 1, write); break; case R_390_COPY: case R_390_GLOB_DAT: /* Create GOT entry. */ @@ -412,9 +426,10 @@ static int apply_rela(Elf_Rela *rela, Elf_Addr base, Elf_Sym *symtab, return 0; } -int apply_relocate_add(Elf_Shdr *sechdrs, const char *strtab, +static int __apply_relocate_add(Elf_Shdr *sechdrs, const char *strtab, unsigned int symindex, unsigned int relsec, - struct module *me) + struct module *me, + void *(*write)(void *dest, const void *src, size_t len)) { Elf_Addr base; Elf_Sym *symtab; @@ -430,13 +445,27 @@ int apply_relocate_add(Elf_Shdr *sechdrs, const char *strtab, n = sechdrs[relsec].sh_size / sizeof(Elf_Rela); for (i = 0; i < n; i++, rela++) { - rc = apply_rela(rela, base, symtab, strtab, me); + rc = apply_rela(rela, base, symtab, strtab, me, write); if (rc) return rc; } return 0; } +int apply_relocate_add(Elf_Shdr *sechdrs, const char *strtab, + unsigned int symindex, unsigned int relsec, + struct module *me) +{ + bool early = me->state == MODULE_STATE_UNFORMED; + void *(*write)(void *, const void *, size_t) = memcpy; + + if (!early) + write = s390_kernel_write; + + return __apply_relocate_add(sechdrs, strtab, symindex, relsec, me, + write); +} + int module_finalize(const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs, struct module *me) diff --git a/arch/s390/kernel/nmi.c b/arch/s390/kernel/nmi.c index 0a487fae763e..86c8d5370e7f 100644 --- a/arch/s390/kernel/nmi.c +++ b/arch/s390/kernel/nmi.c @@ -148,7 +148,6 @@ void s390_handle_mcck(void) local_mcck_disable(); mcck = *this_cpu_ptr(&cpu_mcck); memset(this_cpu_ptr(&cpu_mcck), 0, sizeof(mcck)); - clear_cpu_flag(CIF_MCCK_PENDING); local_mcck_enable(); local_irq_restore(flags); @@ -333,7 +332,7 @@ NOKPROBE_SYMBOL(s390_backup_mcck_info); /* * machine check handler. */ -void notrace s390_do_machine_check(struct pt_regs *regs) +int notrace s390_do_machine_check(struct pt_regs *regs) { static int ipd_count; static DEFINE_SPINLOCK(ipd_lock); @@ -342,6 +341,7 @@ void notrace s390_do_machine_check(struct pt_regs *regs) unsigned long long tmp; union mci mci; unsigned long mcck_dam_code; + int mcck_pending = 0; nmi_enter(); inc_irq_stat(NMI_NMI); @@ -400,7 +400,7 @@ void notrace s390_do_machine_check(struct pt_regs *regs) */ mcck->kill_task = 1; mcck->mcck_code = mci.val; - set_cpu_flag(CIF_MCCK_PENDING); + mcck_pending = 1; } /* @@ -420,8 +420,7 @@ void notrace s390_do_machine_check(struct pt_regs *regs) mcck->stp_queue |= stp_sync_check(); if (S390_lowcore.external_damage_code & (1U << ED_STP_ISLAND)) mcck->stp_queue |= stp_island_check(); - if (mcck->stp_queue) - set_cpu_flag(CIF_MCCK_PENDING); + mcck_pending = 1; } /* @@ -442,12 +441,12 @@ void notrace s390_do_machine_check(struct pt_regs *regs) if (mci.cp) { /* Channel report word pending */ mcck->channel_report = 1; - set_cpu_flag(CIF_MCCK_PENDING); + mcck_pending = 1; } if (mci.w) { /* Warning pending */ mcck->warning = 1; - set_cpu_flag(CIF_MCCK_PENDING); + mcck_pending = 1; } /* @@ -462,7 +461,17 @@ void notrace s390_do_machine_check(struct pt_regs *regs) *((long *)(regs->gprs[15] + __SF_SIE_REASON)) = -EINTR; } clear_cpu_flag(CIF_MCCK_GUEST); + + if (user_mode(regs) && mcck_pending) { + nmi_exit(); + return 1; + } + + if (mcck_pending) + schedule_mcck_handler(); + nmi_exit(); + return 0; } NOKPROBE_SYMBOL(s390_do_machine_check); diff --git a/arch/s390/numa/numa.c b/arch/s390/kernel/numa.c index 51c5a9f6e525..51c5a9f6e525 100644 --- a/arch/s390/numa/numa.c +++ b/arch/s390/kernel/numa.c diff --git a/arch/s390/kernel/perf_cpum_cf_events.c b/arch/s390/kernel/perf_cpum_cf_events.c index 1e3df52b2b65..37265f551a11 100644 --- a/arch/s390/kernel/perf_cpum_cf_events.c +++ b/arch/s390/kernel/perf_cpum_cf_events.c @@ -292,7 +292,7 @@ CPUMF_EVENT_ATTR(cf_z15, TX_C_TABORT_SPECIAL, 0x00f5); CPUMF_EVENT_ATTR(cf_z15, DFLT_ACCESS, 0x00f7); CPUMF_EVENT_ATTR(cf_z15, DFLT_CYCLES, 0x00fc); CPUMF_EVENT_ATTR(cf_z15, DFLT_CC, 0x00108); -CPUMF_EVENT_ATTR(cf_z15, DFLT_CCERROR, 0x00109); +CPUMF_EVENT_ATTR(cf_z15, DFLT_CCFINISH, 0x00109); CPUMF_EVENT_ATTR(cf_z15, MT_DIAG_CYCLES_ONE_THR_ACTIVE, 0x01c0); CPUMF_EVENT_ATTR(cf_z15, MT_DIAG_CYCLES_TWO_THR_ACTIVE, 0x01c1); @@ -629,7 +629,7 @@ static struct attribute *cpumcf_z15_pmu_event_attr[] __initdata = { CPUMF_EVENT_PTR(cf_z15, DFLT_ACCESS), CPUMF_EVENT_PTR(cf_z15, DFLT_CYCLES), CPUMF_EVENT_PTR(cf_z15, DFLT_CC), - CPUMF_EVENT_PTR(cf_z15, DFLT_CCERROR), + CPUMF_EVENT_PTR(cf_z15, DFLT_CCFINISH), CPUMF_EVENT_PTR(cf_z15, MT_DIAG_CYCLES_ONE_THR_ACTIVE), CPUMF_EVENT_PTR(cf_z15, MT_DIAG_CYCLES_TWO_THR_ACTIVE), NULL, diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c index 85a711d783eb..4f9e4626df55 100644 --- a/arch/s390/kernel/perf_cpum_sf.c +++ b/arch/s390/kernel/perf_cpum_sf.c @@ -881,12 +881,21 @@ out: return err; } +static bool is_callchain_event(struct perf_event *event) +{ + u64 sample_type = event->attr.sample_type; + + return sample_type & (PERF_SAMPLE_CALLCHAIN | PERF_SAMPLE_REGS_USER | + PERF_SAMPLE_STACK_USER); +} + static int cpumsf_pmu_event_init(struct perf_event *event) { int err; /* No support for taken branch sampling */ - if (has_branch_stack(event)) + /* No support for callchain, stacks and registers */ + if (has_branch_stack(event) || is_callchain_event(event)) return -EOPNOTSUPP; switch (event->attr.type) { diff --git a/arch/s390/kernel/pgm_check.S b/arch/s390/kernel/pgm_check.S index 2c27907a5ffc..9a92638360ee 100644 --- a/arch/s390/kernel/pgm_check.S +++ b/arch/s390/kernel/pgm_check.S @@ -80,7 +80,7 @@ PGM_CHECK(do_dat_exception) /* 3b */ PGM_CHECK_DEFAULT /* 3c */ PGM_CHECK(do_secure_storage_access) /* 3d */ PGM_CHECK(do_non_secure_storage_access) /* 3e */ -PGM_CHECK_DEFAULT /* 3f */ +PGM_CHECK(do_secure_storage_violation) /* 3f */ PGM_CHECK(monitor_event_exception) /* 40 */ PGM_CHECK_DEFAULT /* 41 */ PGM_CHECK_DEFAULT /* 42 */ diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c index eb6e23ad15a2..ec801d3bbb37 100644 --- a/arch/s390/kernel/process.c +++ b/arch/s390/kernel/process.c @@ -80,8 +80,8 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) return 0; } -int copy_thread_tls(unsigned long clone_flags, unsigned long new_stackp, - unsigned long arg, struct task_struct *p, unsigned long tls) +int copy_thread(unsigned long clone_flags, unsigned long new_stackp, + unsigned long arg, struct task_struct *p, unsigned long tls) { struct fake_frame { @@ -160,24 +160,6 @@ asmlinkage void execve_tail(void) asm volatile("sfpc %0" : : "d" (0)); } -/* - * fill in the FPU structure for a core dump. - */ -int dump_fpu (struct pt_regs * regs, s390_fp_regs *fpregs) -{ - save_fpu_regs(); - fpregs->fpc = current->thread.fpu.fpc; - fpregs->pad = 0; - if (MACHINE_HAS_VX) - convert_vx_to_fp((freg_t *)&fpregs->fprs, - current->thread.fpu.vxrs); - else - memcpy(&fpregs->fprs, current->thread.fpu.fprs, - sizeof(fpregs->fprs)); - return 1; -} -EXPORT_SYMBOL(dump_fpu); - unsigned long get_wchan(struct task_struct *p) { struct unwind_state state; diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c index 58faa12542a1..a76dd27fb2e8 100644 --- a/arch/s390/kernel/ptrace.c +++ b/arch/s390/kernel/ptrace.c @@ -25,8 +25,6 @@ #include <linux/compat.h> #include <trace/syscall.h> #include <asm/page.h> -#include <asm/pgtable.h> -#include <asm/pgalloc.h> #include <linux/uaccess.h> #include <asm/unistd.h> #include <asm/switch_to.h> @@ -324,6 +322,25 @@ static inline void __poke_user_per(struct task_struct *child, child->thread.per_user.end = data; } +static void fixup_int_code(struct task_struct *child, addr_t data) +{ + struct pt_regs *regs = task_pt_regs(child); + int ilc = regs->int_code >> 16; + u16 insn; + + if (ilc > 6) + return; + + if (ptrace_access_vm(child, regs->psw.addr - (regs->int_code >> 16), + &insn, sizeof(insn), FOLL_FORCE) != sizeof(insn)) + return; + + /* double check that tracee stopped on svc instruction */ + if ((insn >> 8) != 0xa) + return; + + regs->int_code = 0x20000 | (data & 0xffff); +} /* * Write a word to the user area of a process at location addr. This * operation does have an additional problem compared to peek_user. @@ -335,7 +352,9 @@ static int __poke_user(struct task_struct *child, addr_t addr, addr_t data) struct user *dummy = NULL; addr_t offset; + if (addr < (addr_t) &dummy->regs.acrs) { + struct pt_regs *regs = task_pt_regs(child); /* * psw and gprs are stored on the stack */ @@ -353,7 +372,11 @@ static int __poke_user(struct task_struct *child, addr_t addr, addr_t data) /* Invalid addressing mode bits */ return -EINVAL; } - *(addr_t *)((addr_t) &task_pt_regs(child)->psw + addr) = data; + + if (test_pt_regs_flag(regs, PIF_SYSCALL) && + addr == offsetof(struct user, regs.gprs[2])) + fixup_int_code(child, data); + *(addr_t *)((addr_t) ®s->psw + addr) = data; } else if (addr < (addr_t) (&dummy->regs.orig_gpr2)) { /* @@ -719,6 +742,10 @@ static int __poke_user_compat(struct task_struct *child, regs->psw.mask = (regs->psw.mask & ~PSW_MASK_BA) | (__u64)(tmp & PSW32_ADDR_AMODE); } else { + + if (test_pt_regs_flag(regs, PIF_SYSCALL) && + addr == offsetof(struct compat_user, regs.gprs[2])) + fixup_int_code(child, data); /* gpr 0-15 */ *(__u32*)((addr_t) ®s->psw + addr*2 + 4) = tmp; } @@ -838,40 +865,66 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request, asmlinkage long do_syscall_trace_enter(struct pt_regs *regs) { unsigned long mask = -1UL; + long ret = -1; + + if (is_compat_task()) + mask = 0xffffffff; /* * The sysc_tracesys code in entry.S stored the system * call number to gprs[2]. */ if (test_thread_flag(TIF_SYSCALL_TRACE) && - (tracehook_report_syscall_entry(regs) || - regs->gprs[2] >= NR_syscalls)) { + tracehook_report_syscall_entry(regs)) { /* - * Tracing decided this syscall should not happen or the - * debugger stored an invalid system call number. Skip + * Tracing decided this syscall should not happen. Skip * the system call and the system call restart handling. */ - clear_pt_regs_flag(regs, PIF_SYSCALL); - return -1; + goto skip; } +#ifdef CONFIG_SECCOMP /* Do the secure computing check after ptrace. */ - if (secure_computing()) { - /* seccomp failures shouldn't expose any additional code. */ - return -1; + if (unlikely(test_thread_flag(TIF_SECCOMP))) { + struct seccomp_data sd; + + if (is_compat_task()) { + sd.instruction_pointer = regs->psw.addr & 0x7fffffff; + sd.arch = AUDIT_ARCH_S390; + } else { + sd.instruction_pointer = regs->psw.addr; + sd.arch = AUDIT_ARCH_S390X; + } + + sd.nr = regs->int_code & 0xffff; + sd.args[0] = regs->orig_gpr2 & mask; + sd.args[1] = regs->gprs[3] & mask; + sd.args[2] = regs->gprs[4] & mask; + sd.args[3] = regs->gprs[5] & mask; + sd.args[4] = regs->gprs[6] & mask; + sd.args[5] = regs->gprs[7] & mask; + + if (__secure_computing(&sd) == -1) + goto skip; } +#endif /* CONFIG_SECCOMP */ if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT))) - trace_sys_enter(regs, regs->gprs[2]); + trace_sys_enter(regs, regs->int_code & 0xffff); - if (is_compat_task()) - mask = 0xffffffff; - audit_syscall_entry(regs->gprs[2], regs->orig_gpr2 & mask, + audit_syscall_entry(regs->int_code & 0xffff, regs->orig_gpr2 & mask, regs->gprs[3] &mask, regs->gprs[4] &mask, regs->gprs[5] &mask); + if ((signed long)regs->gprs[2] >= NR_syscalls) { + regs->gprs[2] = -ENOSYS; + ret = -ENOSYS; + } return regs->gprs[2]; +skip: + clear_pt_regs_flag(regs, PIF_SYSCALL); + return ret; } asmlinkage void do_syscall_trace_exit(struct pt_regs *regs) @@ -891,28 +944,14 @@ asmlinkage void do_syscall_trace_exit(struct pt_regs *regs) static int s390_regs_get(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, - void *kbuf, void __user *ubuf) + struct membuf to) { + unsigned pos; if (target == current) save_access_regs(target->thread.acrs); - if (kbuf) { - unsigned long *k = kbuf; - while (count > 0) { - *k++ = __peek_user(target, pos); - count -= sizeof(*k); - pos += sizeof(*k); - } - } else { - unsigned long __user *u = ubuf; - while (count > 0) { - if (__put_user(__peek_user(target, pos), u++)) - return -EFAULT; - count -= sizeof(*u); - pos += sizeof(*u); - } - } + for (pos = 0; pos < sizeof(s390_regs); pos += sizeof(long)) + membuf_store(&to, __peek_user(target, pos)); return 0; } @@ -953,8 +992,8 @@ static int s390_regs_set(struct task_struct *target, } static int s390_fpregs_get(struct task_struct *target, - const struct user_regset *regset, unsigned int pos, - unsigned int count, void *kbuf, void __user *ubuf) + const struct user_regset *regset, + struct membuf to) { _s390_fp_regs fp_regs; @@ -964,8 +1003,7 @@ static int s390_fpregs_get(struct task_struct *target, fp_regs.fpc = target->thread.fpu.fpc; fpregs_store(&fp_regs, &target->thread.fpu); - return user_regset_copyout(&pos, &count, &kbuf, &ubuf, - &fp_regs, 0, -1); + return membuf_write(&to, &fp_regs, sizeof(fp_regs)); } static int s390_fpregs_set(struct task_struct *target, @@ -1012,20 +1050,9 @@ static int s390_fpregs_set(struct task_struct *target, static int s390_last_break_get(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, - void *kbuf, void __user *ubuf) + struct membuf to) { - if (count > 0) { - if (kbuf) { - unsigned long *k = kbuf; - *k = target->thread.last_break; - } else { - unsigned long __user *u = ubuf; - if (__put_user(target->thread.last_break, u)) - return -EFAULT; - } - } - return 0; + return membuf_store(&to, target->thread.last_break); } static int s390_last_break_set(struct task_struct *target, @@ -1038,16 +1065,13 @@ static int s390_last_break_set(struct task_struct *target, static int s390_tdb_get(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, - void *kbuf, void __user *ubuf) + struct membuf to) { struct pt_regs *regs = task_pt_regs(target); - unsigned char *data; if (!(regs->int_code & 0x200)) return -ENODATA; - data = target->thread.trap_tdb; - return user_regset_copyout(&pos, &count, &kbuf, &ubuf, data, 0, 256); + return membuf_write(&to, target->thread.trap_tdb, 256); } static int s390_tdb_set(struct task_struct *target, @@ -1060,8 +1084,7 @@ static int s390_tdb_set(struct task_struct *target, static int s390_vxrs_low_get(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, - void *kbuf, void __user *ubuf) + struct membuf to) { __u64 vxrs[__NUM_VXRS_LOW]; int i; @@ -1072,7 +1095,7 @@ static int s390_vxrs_low_get(struct task_struct *target, save_fpu_regs(); for (i = 0; i < __NUM_VXRS_LOW; i++) vxrs[i] = *((__u64 *)(target->thread.fpu.vxrs + i) + 1); - return user_regset_copyout(&pos, &count, &kbuf, &ubuf, vxrs, 0, -1); + return membuf_write(&to, vxrs, sizeof(vxrs)); } static int s390_vxrs_low_set(struct task_struct *target, @@ -1101,18 +1124,14 @@ static int s390_vxrs_low_set(struct task_struct *target, static int s390_vxrs_high_get(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, - void *kbuf, void __user *ubuf) + struct membuf to) { - __vector128 vxrs[__NUM_VXRS_HIGH]; - if (!MACHINE_HAS_VX) return -ENODEV; if (target == current) save_fpu_regs(); - memcpy(vxrs, target->thread.fpu.vxrs + __NUM_VXRS_LOW, sizeof(vxrs)); - - return user_regset_copyout(&pos, &count, &kbuf, &ubuf, vxrs, 0, -1); + return membuf_write(&to, target->thread.fpu.vxrs + __NUM_VXRS_LOW, + __NUM_VXRS_HIGH * sizeof(__vector128)); } static int s390_vxrs_high_set(struct task_struct *target, @@ -1134,12 +1153,9 @@ static int s390_vxrs_high_set(struct task_struct *target, static int s390_system_call_get(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, - void *kbuf, void __user *ubuf) + struct membuf to) { - unsigned int *data = &target->thread.system_call; - return user_regset_copyout(&pos, &count, &kbuf, &ubuf, - data, 0, sizeof(unsigned int)); + return membuf_store(&to, target->thread.system_call); } static int s390_system_call_set(struct task_struct *target, @@ -1154,8 +1170,7 @@ static int s390_system_call_set(struct task_struct *target, static int s390_gs_cb_get(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, - void *kbuf, void __user *ubuf) + struct membuf to) { struct gs_cb *data = target->thread.gs_cb; @@ -1165,8 +1180,7 @@ static int s390_gs_cb_get(struct task_struct *target, return -ENODATA; if (target == current) save_gs_cb(data); - return user_regset_copyout(&pos, &count, &kbuf, &ubuf, - data, 0, sizeof(struct gs_cb)); + return membuf_write(&to, data, sizeof(struct gs_cb)); } static int s390_gs_cb_set(struct task_struct *target, @@ -1210,8 +1224,7 @@ static int s390_gs_cb_set(struct task_struct *target, static int s390_gs_bc_get(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, - void *kbuf, void __user *ubuf) + struct membuf to) { struct gs_cb *data = target->thread.gs_bc_cb; @@ -1219,8 +1232,7 @@ static int s390_gs_bc_get(struct task_struct *target, return -ENODEV; if (!data) return -ENODATA; - return user_regset_copyout(&pos, &count, &kbuf, &ubuf, - data, 0, sizeof(struct gs_cb)); + return membuf_write(&to, data, sizeof(struct gs_cb)); } static int s390_gs_bc_set(struct task_struct *target, @@ -1256,7 +1268,6 @@ static bool is_ri_cb_valid(struct runtime_instr_cb *cb) cb->pc == 1 && cb->qc == 0 && cb->reserved2 == 0 && - cb->key == PAGE_DEFAULT_KEY && cb->reserved3 == 0 && cb->reserved4 == 0 && cb->reserved5 == 0 && @@ -1271,8 +1282,7 @@ static bool is_ri_cb_valid(struct runtime_instr_cb *cb) static int s390_runtime_instr_get(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, - void *kbuf, void __user *ubuf) + struct membuf to) { struct runtime_instr_cb *data = target->thread.ri_cb; @@ -1281,8 +1291,7 @@ static int s390_runtime_instr_get(struct task_struct *target, if (!data) return -ENODATA; - return user_regset_copyout(&pos, &count, &kbuf, &ubuf, - data, 0, sizeof(struct runtime_instr_cb)); + return membuf_write(&to, data, sizeof(struct runtime_instr_cb)); } static int s390_runtime_instr_set(struct task_struct *target, @@ -1320,7 +1329,11 @@ static int s390_runtime_instr_set(struct task_struct *target, kfree(data); return -EINVAL; } - + /* + * Override access key in any case, since user space should + * not be able to set it, nor should it care about it. + */ + ri_cb.key = PAGE_DEFAULT_KEY >> 4; preempt_disable(); if (!target->thread.ri_cb) target->thread.ri_cb = data; @@ -1338,7 +1351,7 @@ static const struct user_regset s390_regsets[] = { .n = sizeof(s390_regs) / sizeof(long), .size = sizeof(long), .align = sizeof(long), - .get = s390_regs_get, + .regset_get = s390_regs_get, .set = s390_regs_set, }, { @@ -1346,7 +1359,7 @@ static const struct user_regset s390_regsets[] = { .n = sizeof(s390_fp_regs) / sizeof(long), .size = sizeof(long), .align = sizeof(long), - .get = s390_fpregs_get, + .regset_get = s390_fpregs_get, .set = s390_fpregs_set, }, { @@ -1354,7 +1367,7 @@ static const struct user_regset s390_regsets[] = { .n = 1, .size = sizeof(unsigned int), .align = sizeof(unsigned int), - .get = s390_system_call_get, + .regset_get = s390_system_call_get, .set = s390_system_call_set, }, { @@ -1362,7 +1375,7 @@ static const struct user_regset s390_regsets[] = { .n = 1, .size = sizeof(long), .align = sizeof(long), - .get = s390_last_break_get, + .regset_get = s390_last_break_get, .set = s390_last_break_set, }, { @@ -1370,7 +1383,7 @@ static const struct user_regset s390_regsets[] = { .n = 1, .size = 256, .align = 1, - .get = s390_tdb_get, + .regset_get = s390_tdb_get, .set = s390_tdb_set, }, { @@ -1378,7 +1391,7 @@ static const struct user_regset s390_regsets[] = { .n = __NUM_VXRS_LOW, .size = sizeof(__u64), .align = sizeof(__u64), - .get = s390_vxrs_low_get, + .regset_get = s390_vxrs_low_get, .set = s390_vxrs_low_set, }, { @@ -1386,7 +1399,7 @@ static const struct user_regset s390_regsets[] = { .n = __NUM_VXRS_HIGH, .size = sizeof(__vector128), .align = sizeof(__vector128), - .get = s390_vxrs_high_get, + .regset_get = s390_vxrs_high_get, .set = s390_vxrs_high_set, }, { @@ -1394,7 +1407,7 @@ static const struct user_regset s390_regsets[] = { .n = sizeof(struct gs_cb) / sizeof(__u64), .size = sizeof(__u64), .align = sizeof(__u64), - .get = s390_gs_cb_get, + .regset_get = s390_gs_cb_get, .set = s390_gs_cb_set, }, { @@ -1402,7 +1415,7 @@ static const struct user_regset s390_regsets[] = { .n = sizeof(struct gs_cb) / sizeof(__u64), .size = sizeof(__u64), .align = sizeof(__u64), - .get = s390_gs_bc_get, + .regset_get = s390_gs_bc_get, .set = s390_gs_bc_set, }, { @@ -1410,13 +1423,13 @@ static const struct user_regset s390_regsets[] = { .n = sizeof(struct runtime_instr_cb) / sizeof(__u64), .size = sizeof(__u64), .align = sizeof(__u64), - .get = s390_runtime_instr_get, + .regset_get = s390_runtime_instr_get, .set = s390_runtime_instr_set, }, }; static const struct user_regset_view user_s390_view = { - .name = UTS_MACHINE, + .name = "s390x", .e_machine = EM_S390, .regsets = s390_regsets, .n = ARRAY_SIZE(s390_regsets) @@ -1425,28 +1438,15 @@ static const struct user_regset_view user_s390_view = { #ifdef CONFIG_COMPAT static int s390_compat_regs_get(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, - void *kbuf, void __user *ubuf) + struct membuf to) { + unsigned n; + if (target == current) save_access_regs(target->thread.acrs); - if (kbuf) { - compat_ulong_t *k = kbuf; - while (count > 0) { - *k++ = __peek_user_compat(target, pos); - count -= sizeof(*k); - pos += sizeof(*k); - } - } else { - compat_ulong_t __user *u = ubuf; - while (count > 0) { - if (__put_user(__peek_user_compat(target, pos), u++)) - return -EFAULT; - count -= sizeof(*u); - pos += sizeof(*u); - } - } + for (n = 0; n < sizeof(s390_compat_regs); n += sizeof(compat_ulong_t)) + membuf_store(&to, __peek_user_compat(target, n)); return 0; } @@ -1488,29 +1488,14 @@ static int s390_compat_regs_set(struct task_struct *target, static int s390_compat_regs_high_get(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, - void *kbuf, void __user *ubuf) + struct membuf to) { compat_ulong_t *gprs_high; + int i; - gprs_high = (compat_ulong_t *) - &task_pt_regs(target)->gprs[pos / sizeof(compat_ulong_t)]; - if (kbuf) { - compat_ulong_t *k = kbuf; - while (count > 0) { - *k++ = *gprs_high; - gprs_high += 2; - count -= sizeof(*k); - } - } else { - compat_ulong_t __user *u = ubuf; - while (count > 0) { - if (__put_user(*gprs_high, u++)) - return -EFAULT; - gprs_high += 2; - count -= sizeof(*u); - } - } + gprs_high = (compat_ulong_t *)task_pt_regs(target)->gprs; + for (i = 0; i < NUM_GPRS; i++, gprs_high += 2) + membuf_store(&to, *gprs_high); return 0; } @@ -1549,23 +1534,11 @@ static int s390_compat_regs_high_set(struct task_struct *target, static int s390_compat_last_break_get(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, - void *kbuf, void __user *ubuf) + struct membuf to) { - compat_ulong_t last_break; + compat_ulong_t last_break = target->thread.last_break; - if (count > 0) { - last_break = target->thread.last_break; - if (kbuf) { - unsigned long *k = kbuf; - *k = last_break; - } else { - unsigned long __user *u = ubuf; - if (__put_user(last_break, u)) - return -EFAULT; - } - } - return 0; + return membuf_store(&to, (unsigned long)last_break); } static int s390_compat_last_break_set(struct task_struct *target, @@ -1582,7 +1555,7 @@ static const struct user_regset s390_compat_regsets[] = { .n = sizeof(s390_compat_regs) / sizeof(compat_long_t), .size = sizeof(compat_long_t), .align = sizeof(compat_long_t), - .get = s390_compat_regs_get, + .regset_get = s390_compat_regs_get, .set = s390_compat_regs_set, }, { @@ -1590,7 +1563,7 @@ static const struct user_regset s390_compat_regsets[] = { .n = sizeof(s390_fp_regs) / sizeof(compat_long_t), .size = sizeof(compat_long_t), .align = sizeof(compat_long_t), - .get = s390_fpregs_get, + .regset_get = s390_fpregs_get, .set = s390_fpregs_set, }, { @@ -1598,7 +1571,7 @@ static const struct user_regset s390_compat_regsets[] = { .n = 1, .size = sizeof(compat_uint_t), .align = sizeof(compat_uint_t), - .get = s390_system_call_get, + .regset_get = s390_system_call_get, .set = s390_system_call_set, }, { @@ -1606,7 +1579,7 @@ static const struct user_regset s390_compat_regsets[] = { .n = 1, .size = sizeof(long), .align = sizeof(long), - .get = s390_compat_last_break_get, + .regset_get = s390_compat_last_break_get, .set = s390_compat_last_break_set, }, { @@ -1614,7 +1587,7 @@ static const struct user_regset s390_compat_regsets[] = { .n = 1, .size = 256, .align = 1, - .get = s390_tdb_get, + .regset_get = s390_tdb_get, .set = s390_tdb_set, }, { @@ -1622,7 +1595,7 @@ static const struct user_regset s390_compat_regsets[] = { .n = __NUM_VXRS_LOW, .size = sizeof(__u64), .align = sizeof(__u64), - .get = s390_vxrs_low_get, + .regset_get = s390_vxrs_low_get, .set = s390_vxrs_low_set, }, { @@ -1630,7 +1603,7 @@ static const struct user_regset s390_compat_regsets[] = { .n = __NUM_VXRS_HIGH, .size = sizeof(__vector128), .align = sizeof(__vector128), - .get = s390_vxrs_high_get, + .regset_get = s390_vxrs_high_get, .set = s390_vxrs_high_set, }, { @@ -1638,7 +1611,7 @@ static const struct user_regset s390_compat_regsets[] = { .n = sizeof(s390_compat_regs_high) / sizeof(compat_long_t), .size = sizeof(compat_long_t), .align = sizeof(compat_long_t), - .get = s390_compat_regs_high_get, + .regset_get = s390_compat_regs_high_get, .set = s390_compat_regs_high_set, }, { @@ -1646,7 +1619,7 @@ static const struct user_regset s390_compat_regsets[] = { .n = sizeof(struct gs_cb) / sizeof(__u64), .size = sizeof(__u64), .align = sizeof(__u64), - .get = s390_gs_cb_get, + .regset_get = s390_gs_cb_get, .set = s390_gs_cb_set, }, { @@ -1654,7 +1627,7 @@ static const struct user_regset s390_compat_regsets[] = { .n = sizeof(struct gs_cb) / sizeof(__u64), .size = sizeof(__u64), .align = sizeof(__u64), - .get = s390_gs_bc_get, + .regset_get = s390_gs_bc_get, .set = s390_gs_bc_set, }, { @@ -1662,7 +1635,7 @@ static const struct user_regset s390_compat_regsets[] = { .n = sizeof(struct runtime_instr_cb) / sizeof(__u64), .size = sizeof(__u64), .align = sizeof(__u64), - .get = s390_runtime_instr_get, + .regset_get = s390_runtime_instr_get, .set = s390_runtime_instr_set, }, }; diff --git a/arch/s390/kernel/runtime_instr.c b/arch/s390/kernel/runtime_instr.c index 125c7f6e8715..1788a5454b6f 100644 --- a/arch/s390/kernel/runtime_instr.c +++ b/arch/s390/kernel/runtime_instr.c @@ -57,7 +57,7 @@ static void init_runtime_instr_cb(struct runtime_instr_cb *cb) cb->k = 1; cb->ps = 1; cb->pc = 1; - cb->key = PAGE_DEFAULT_KEY; + cb->key = PAGE_DEFAULT_KEY >> 4; cb->v = 1; } diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index 36445dd40fdb..c2c1b4e723ea 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -305,12 +305,9 @@ void *restart_stack __section(.data); unsigned long stack_alloc(void) { #ifdef CONFIG_VMAP_STACK - return (unsigned long) - __vmalloc_node_range(THREAD_SIZE, THREAD_SIZE, - VMALLOC_START, VMALLOC_END, - THREADINFO_GFP, - PAGE_KERNEL, 0, NUMA_NO_NODE, - __builtin_return_address(0)); + return (unsigned long)__vmalloc_node(THREAD_SIZE, THREAD_SIZE, + THREADINFO_GFP, NUMA_NO_NODE, + __builtin_return_address(0)); #else return __get_free_pages(GFP_KERNEL, THREAD_SIZE_ORDER); #endif @@ -384,8 +381,7 @@ static void __init setup_lowcore_dat_off(void) lc->restart_psw.addr = (unsigned long) restart_int_handler; lc->external_new_psw.mask = PSW_KERNEL_BITS | PSW_MASK_MCHECK; lc->external_new_psw.addr = (unsigned long) ext_int_handler; - lc->svc_new_psw.mask = PSW_KERNEL_BITS | - PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK; + lc->svc_new_psw.mask = PSW_KERNEL_BITS | PSW_MASK_MCHECK; lc->svc_new_psw.addr = (unsigned long) system_call; lc->program_new_psw.mask = PSW_KERNEL_BITS | PSW_MASK_MCHECK; lc->program_new_psw.addr = (unsigned long) pgm_check_handler; @@ -597,9 +593,10 @@ static void __init setup_memory_end(void) #ifdef CONFIG_CRASH_DUMP /* - * When kdump is enabled, we have to ensure that no memory from - * the area [0 - crashkernel memory size] and - * [crashk_res.start - crashk_res.end] is set offline. + * When kdump is enabled, we have to ensure that no memory from the area + * [0 - crashkernel memory size] is set offline - it will be exchanged with + * the crashkernel memory region when kdump is triggered. The crashkernel + * memory region can never get offlined (pages are unmovable). */ static int kdump_mem_notifier(struct notifier_block *nb, unsigned long action, void *data) @@ -610,11 +607,7 @@ static int kdump_mem_notifier(struct notifier_block *nb, return NOTIFY_OK; if (arg->start_pfn < PFN_DOWN(resource_size(&crashk_res))) return NOTIFY_BAD; - if (arg->start_pfn > PFN_DOWN(crashk_res.end)) - return NOTIFY_OK; - if (arg->start_pfn + arg->nr_pages - 1 < PFN_DOWN(crashk_res.start)) - return NOTIFY_OK; - return NOTIFY_BAD; + return NOTIFY_OK; } static struct notifier_block kdump_mem_nb = { @@ -626,7 +619,7 @@ static struct notifier_block kdump_mem_nb = { /* * Make sure that the area behind memory_end is protected */ -static void reserve_memory_end(void) +static void __init reserve_memory_end(void) { if (memory_end_set) memblock_reserve(memory_end, ULONG_MAX); @@ -635,7 +628,7 @@ static void reserve_memory_end(void) /* * Make sure that oldmem, where the dump is stored, is protected */ -static void reserve_oldmem(void) +static void __init reserve_oldmem(void) { #ifdef CONFIG_CRASH_DUMP if (OLDMEM_BASE) @@ -647,7 +640,7 @@ static void reserve_oldmem(void) /* * Make sure that oldmem, where the dump is stored, is protected */ -static void remove_oldmem(void) +static void __init remove_oldmem(void) { #ifdef CONFIG_CRASH_DUMP if (OLDMEM_BASE) @@ -1028,8 +1021,7 @@ static void __init setup_control_program_code(void) { union diag318_info diag318_info = { .cpnc = CPNC_LINUX, - .cpvc_linux = 0, - .cpvc_distro = {0}, + .cpvc = 0, }; if (!sclp.has_diag318) @@ -1107,6 +1099,7 @@ void __init setup_arch(char **cmdline_p) if (IS_ENABLED(CONFIG_EXPOLINE_AUTO)) nospec_auto_detect(); + jump_label_init(); parse_early_param(); #ifdef CONFIG_CRASH_DUMP /* Deactivate elfcorehdr= kernel parameter */ @@ -1133,14 +1126,6 @@ void __init setup_arch(char **cmdline_p) free_mem_detect_info(); remove_oldmem(); - /* - * Make sure all chunks are MAX_ORDER aligned so we don't need the - * extra checks that HOLES_IN_ZONE would require. - * - * Is this still required? - */ - memblock_trim_memory(1UL << (MAX_ORDER - 1 + PAGE_SHIFT)); - if (is_prot_virt_host()) setup_uv(); setup_memory_end(); diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index 10dbb12eb14d..85700bd85f98 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -61,6 +61,7 @@ enum { ec_schedule = 0, ec_call_function_single, ec_stop_cpu, + ec_mcck_pending, }; enum { @@ -145,7 +146,7 @@ static int pcpu_sigp_retry(struct pcpu *pcpu, u8 order, u32 parm) static inline int pcpu_stopped(struct pcpu *pcpu) { - u32 uninitialized_var(status); + u32 status; if (__pcpu_sigp(pcpu->address, SIGP_SENSE, 0, &status) != SIGP_CC_STATUS_STORED) @@ -403,6 +404,11 @@ int smp_find_processor_id(u16 address) return -1; } +void schedule_mcck_handler(void) +{ + pcpu_ec_call(pcpu_devices + smp_processor_id(), ec_mcck_pending); +} + bool notrace arch_vcpu_is_preempted(int cpu) { if (test_cpu_flag_of(CIF_ENABLED_WAIT, cpu)) @@ -497,6 +503,8 @@ static void smp_handle_ext_call(void) scheduler_ipi(); if (test_bit(ec_call_function_single, &bits)) generic_smp_call_function_single_interrupt(); + if (test_bit(ec_mcck_pending, &bits)) + s390_handle_mcck(); } static void do_ext_call_interrupt(struct ext_code ext_code, @@ -1004,10 +1012,6 @@ void __init smp_prepare_boot_cpu(void) smp_cpu_set_polarization(0, POLARIZATION_UNKNOWN); } -void __init smp_cpus_done(unsigned int max_cpus) -{ -} - void __init smp_setup_processor_id(void) { pcpu_devices[0].address = stap(); @@ -1137,6 +1141,7 @@ static int smp_cpu_online(unsigned int cpu) return sysfs_create_group(&s->kobj, &cpu_online_attr_group); } + static int smp_cpu_pre_down(unsigned int cpu) { struct device *s = &per_cpu(cpu_device, cpu)->dev; diff --git a/arch/s390/kernel/syscalls/syscall.tbl b/arch/s390/kernel/syscalls/syscall.tbl index bd7bd3581a0f..10456bc936fb 100644 --- a/arch/s390/kernel/syscalls/syscall.tbl +++ b/arch/s390/kernel/syscalls/syscall.tbl @@ -138,7 +138,7 @@ 146 common writev sys_writev compat_sys_writev 147 common getsid sys_getsid sys_getsid 148 common fdatasync sys_fdatasync sys_fdatasync -149 common _sysctl sys_sysctl compat_sys_sysctl +149 common _sysctl - - 150 common mlock sys_mlock sys_mlock 151 common munlock sys_munlock sys_munlock 152 common mlockall sys_mlockall sys_mlockall @@ -372,8 +372,8 @@ 362 common connect sys_connect sys_connect 363 common listen sys_listen sys_listen 364 common accept4 sys_accept4 sys_accept4 -365 common getsockopt sys_getsockopt compat_sys_getsockopt -366 common setsockopt sys_setsockopt compat_sys_setsockopt +365 common getsockopt sys_getsockopt sys_getsockopt +366 common setsockopt sys_setsockopt sys_setsockopt 367 common getsockname sys_getsockname sys_getsockname 368 common getpeername sys_getpeername sys_getpeername 369 common sendto sys_sendto sys_sendto @@ -438,5 +438,7 @@ 433 common fspick sys_fspick sys_fspick 434 common pidfd_open sys_pidfd_open sys_pidfd_open 435 common clone3 sys_clone3 sys_clone3 +436 common close_range sys_close_range sys_close_range 437 common openat2 sys_openat2 sys_openat2 438 common pidfd_getfd sys_pidfd_getfd sys_pidfd_getfd +439 common faccessat2 sys_faccessat2 sys_faccessat2 diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c index f9d070d016e3..513e59d08a55 100644 --- a/arch/s390/kernel/time.c +++ b/arch/s390/kernel/time.c @@ -237,7 +237,7 @@ static u64 read_tod_clock(struct clocksource *cs) preempt_disable(); /* protect from changes to steering parameters */ now = get_tod_clock(); adj = tod_steering_end - now; - if (unlikely((s64) adj >= 0)) + if (unlikely((s64) adj > 0)) /* * manually steer by 1 cycle every 2^16 cycles. This * corresponds to shifting the tod delta by 15. 1s is @@ -253,7 +253,7 @@ static struct clocksource clocksource_tod = { .name = "tod", .rating = 400, .read = read_tod_clock, - .mask = -1ULL, + .mask = CLOCKSOURCE_MASK(64), .mult = 1000, .shift = 12, .flags = CLOCK_SOURCE_IS_CONTINUOUS, @@ -301,6 +301,7 @@ void update_vsyscall(struct timekeeper *tk) vdso_data->tk_mult = tk->tkr_mono.mult; vdso_data->tk_shift = tk->tkr_mono.shift; + vdso_data->hrtimer_res = hrtimer_resolution; smp_wmb(); ++vdso_data->tb_update_count; } @@ -668,7 +669,7 @@ static void stp_work_fn(struct work_struct *work) * There is a usable clock but the synchonization failed. * Retry after a second. */ - mod_timer(&stp_timer, jiffies + HZ); + mod_timer(&stp_timer, jiffies + msecs_to_jiffies(MSEC_PER_SEC)); out_unlock: mutex_unlock(&stp_work_mutex); @@ -682,7 +683,7 @@ static struct bus_type stp_subsys = { .dev_name = "stp", }; -static ssize_t stp_ctn_id_show(struct device *dev, +static ssize_t ctn_id_show(struct device *dev, struct device_attribute *attr, char *buf) { @@ -692,9 +693,9 @@ static ssize_t stp_ctn_id_show(struct device *dev, *(unsigned long long *) stp_info.ctnid); } -static DEVICE_ATTR(ctn_id, 0400, stp_ctn_id_show, NULL); +static DEVICE_ATTR_RO(ctn_id); -static ssize_t stp_ctn_type_show(struct device *dev, +static ssize_t ctn_type_show(struct device *dev, struct device_attribute *attr, char *buf) { @@ -703,9 +704,9 @@ static ssize_t stp_ctn_type_show(struct device *dev, return sprintf(buf, "%i\n", stp_info.ctn); } -static DEVICE_ATTR(ctn_type, 0400, stp_ctn_type_show, NULL); +static DEVICE_ATTR_RO(ctn_type); -static ssize_t stp_dst_offset_show(struct device *dev, +static ssize_t dst_offset_show(struct device *dev, struct device_attribute *attr, char *buf) { @@ -714,9 +715,9 @@ static ssize_t stp_dst_offset_show(struct device *dev, return sprintf(buf, "%i\n", (int)(s16) stp_info.dsto); } -static DEVICE_ATTR(dst_offset, 0400, stp_dst_offset_show, NULL); +static DEVICE_ATTR_RO(dst_offset); -static ssize_t stp_leap_seconds_show(struct device *dev, +static ssize_t leap_seconds_show(struct device *dev, struct device_attribute *attr, char *buf) { @@ -725,9 +726,9 @@ static ssize_t stp_leap_seconds_show(struct device *dev, return sprintf(buf, "%i\n", (int)(s16) stp_info.leaps); } -static DEVICE_ATTR(leap_seconds, 0400, stp_leap_seconds_show, NULL); +static DEVICE_ATTR_RO(leap_seconds); -static ssize_t stp_stratum_show(struct device *dev, +static ssize_t stratum_show(struct device *dev, struct device_attribute *attr, char *buf) { @@ -736,9 +737,9 @@ static ssize_t stp_stratum_show(struct device *dev, return sprintf(buf, "%i\n", (int)(s16) stp_info.stratum); } -static DEVICE_ATTR(stratum, 0400, stp_stratum_show, NULL); +static DEVICE_ATTR_RO(stratum); -static ssize_t stp_time_offset_show(struct device *dev, +static ssize_t time_offset_show(struct device *dev, struct device_attribute *attr, char *buf) { @@ -747,9 +748,9 @@ static ssize_t stp_time_offset_show(struct device *dev, return sprintf(buf, "%i\n", (int) stp_info.tto); } -static DEVICE_ATTR(time_offset, 0400, stp_time_offset_show, NULL); +static DEVICE_ATTR_RO(time_offset); -static ssize_t stp_time_zone_offset_show(struct device *dev, +static ssize_t time_zone_offset_show(struct device *dev, struct device_attribute *attr, char *buf) { @@ -758,10 +759,9 @@ static ssize_t stp_time_zone_offset_show(struct device *dev, return sprintf(buf, "%i\n", (int)(s16) stp_info.tzo); } -static DEVICE_ATTR(time_zone_offset, 0400, - stp_time_zone_offset_show, NULL); +static DEVICE_ATTR_RO(time_zone_offset); -static ssize_t stp_timing_mode_show(struct device *dev, +static ssize_t timing_mode_show(struct device *dev, struct device_attribute *attr, char *buf) { @@ -770,9 +770,9 @@ static ssize_t stp_timing_mode_show(struct device *dev, return sprintf(buf, "%i\n", stp_info.tmd); } -static DEVICE_ATTR(timing_mode, 0400, stp_timing_mode_show, NULL); +static DEVICE_ATTR_RO(timing_mode); -static ssize_t stp_timing_state_show(struct device *dev, +static ssize_t timing_state_show(struct device *dev, struct device_attribute *attr, char *buf) { @@ -781,16 +781,16 @@ static ssize_t stp_timing_state_show(struct device *dev, return sprintf(buf, "%i\n", stp_info.tst); } -static DEVICE_ATTR(timing_state, 0400, stp_timing_state_show, NULL); +static DEVICE_ATTR_RO(timing_state); -static ssize_t stp_online_show(struct device *dev, +static ssize_t online_show(struct device *dev, struct device_attribute *attr, char *buf) { return sprintf(buf, "%i\n", stp_online); } -static ssize_t stp_online_store(struct device *dev, +static ssize_t online_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { @@ -816,18 +816,14 @@ static ssize_t stp_online_store(struct device *dev, * Can't use DEVICE_ATTR because the attribute should be named * stp/online but dev_attr_online already exists in this file .. */ -static struct device_attribute dev_attr_stp_online = { - .attr = { .name = "online", .mode = 0600 }, - .show = stp_online_show, - .store = stp_online_store, -}; +static DEVICE_ATTR_RW(online); static struct device_attribute *stp_attributes[] = { &dev_attr_ctn_id, &dev_attr_ctn_type, &dev_attr_dst_offset, &dev_attr_leap_seconds, - &dev_attr_stp_online, + &dev_attr_online, &dev_attr_stratum, &dev_attr_time_offset, &dev_attr_time_zone_offset, diff --git a/arch/s390/kernel/topology.c b/arch/s390/kernel/topology.c index 5f70cefc13e4..ca47141a5be9 100644 --- a/arch/s390/kernel/topology.c +++ b/arch/s390/kernel/topology.c @@ -356,9 +356,9 @@ static atomic_t topology_poll = ATOMIC_INIT(0); static void set_topology_timer(void) { if (atomic_add_unless(&topology_poll, -1, 0)) - mod_timer(&topology_timer, jiffies + HZ / 10); + mod_timer(&topology_timer, jiffies + msecs_to_jiffies(100)); else - mod_timer(&topology_timer, jiffies + HZ * 60); + mod_timer(&topology_timer, jiffies + msecs_to_jiffies(60 * MSEC_PER_SEC)); } void topology_expect_change(void) @@ -594,7 +594,7 @@ static int __init topology_setup(char *str) early_param("topology", topology_setup); static int topology_ctl_handler(struct ctl_table *ctl, int write, - void __user *buffer, size_t *lenp, loff_t *ppos) + void *buffer, size_t *lenp, loff_t *ppos) { int enabled = topology_is_enabled(); int new_mode; diff --git a/arch/s390/kernel/traps.c b/arch/s390/kernel/traps.c index ff9cc4c3290e..8d1e8a1a97df 100644 --- a/arch/s390/kernel/traps.c +++ b/arch/s390/kernel/traps.c @@ -50,11 +50,8 @@ void do_report_trap(struct pt_regs *regs, int si_signo, int si_code, char *str) } else { const struct exception_table_entry *fixup; fixup = s390_search_extables(regs->psw.addr); - if (fixup) - regs->psw.addr = extable_fixup(fixup); - else { + if (!fixup || !ex_handle(fixup, regs)) die(regs, str); - } } } @@ -251,7 +248,7 @@ void monitor_event_exception(struct pt_regs *regs) case BUG_TRAP_TYPE_NONE: fixup = s390_search_extables(regs->psw.addr); if (fixup) - regs->psw.addr = extable_fixup(fixup); + ex_handle(fixup, regs); break; case BUG_TRAP_TYPE_WARN: break; diff --git a/arch/s390/kernel/uv.c b/arch/s390/kernel/uv.c index 4c0677fc8904..c296e5c8dbf9 100644 --- a/arch/s390/kernel/uv.c +++ b/arch/s390/kernel/uv.c @@ -205,7 +205,7 @@ int gmap_make_secure(struct gmap *gmap, unsigned long gaddr, void *uvcb) again: rc = -EFAULT; - down_read(&gmap->mm->mmap_sem); + mmap_read_lock(gmap->mm); uaddr = __gmap_translate(gmap, gaddr); if (IS_ERR_VALUE(uaddr)) @@ -234,7 +234,7 @@ again: pte_unmap_unlock(ptep, ptelock); unlock_page(page); out: - up_read(&gmap->mm->mmap_sem); + mmap_read_unlock(gmap->mm); if (rc == -EAGAIN) { wait_on_page_writeback(page); @@ -331,7 +331,7 @@ EXPORT_SYMBOL_GPL(arch_make_page_accessible); static ssize_t uv_query_facilities(struct kobject *kobj, struct kobj_attribute *attr, char *page) { - return snprintf(page, PAGE_SIZE, "%lx\n%lx\n%lx\n%lx\n", + return scnprintf(page, PAGE_SIZE, "%lx\n%lx\n%lx\n%lx\n", uv_info.inst_calls_list[0], uv_info.inst_calls_list[1], uv_info.inst_calls_list[2], @@ -344,7 +344,7 @@ static struct kobj_attribute uv_query_facilities_attr = static ssize_t uv_query_max_guest_cpus(struct kobject *kobj, struct kobj_attribute *attr, char *page) { - return snprintf(page, PAGE_SIZE, "%d\n", + return scnprintf(page, PAGE_SIZE, "%d\n", uv_info.max_guest_cpus); } @@ -354,7 +354,7 @@ static struct kobj_attribute uv_query_max_guest_cpus_attr = static ssize_t uv_query_max_guest_vms(struct kobject *kobj, struct kobj_attribute *attr, char *page) { - return snprintf(page, PAGE_SIZE, "%d\n", + return scnprintf(page, PAGE_SIZE, "%d\n", uv_info.max_num_sec_conf); } @@ -364,7 +364,7 @@ static struct kobj_attribute uv_query_max_guest_vms_attr = static ssize_t uv_query_max_guest_addr(struct kobject *kobj, struct kobj_attribute *attr, char *page) { - return snprintf(page, PAGE_SIZE, "%lx\n", + return scnprintf(page, PAGE_SIZE, "%lx\n", uv_info.max_sec_stor_addr); } diff --git a/arch/s390/kernel/vdso.c b/arch/s390/kernel/vdso.c index bcc9bdb39ba2..c4baefaa6e34 100644 --- a/arch/s390/kernel/vdso.c +++ b/arch/s390/kernel/vdso.c @@ -21,7 +21,6 @@ #include <linux/memblock.h> #include <linux/compat.h> #include <asm/asm-offsets.h> -#include <asm/pgtable.h> #include <asm/processor.h> #include <asm/mmu.h> #include <asm/mmu_context.h> @@ -208,7 +207,7 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) * it at vdso_base which is the "natural" base for it, but we might * fail and end up putting it elsewhere. */ - if (down_write_killable(&mm->mmap_sem)) + if (mmap_write_lock_killable(mm)) return -EINTR; vdso_base = get_unmapped_area(NULL, 0, vdso_pages << PAGE_SHIFT, 0, 0); if (IS_ERR_VALUE(vdso_base)) { @@ -239,7 +238,7 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) rc = 0; out_up: - up_write(&mm->mmap_sem); + mmap_write_unlock(mm); return rc; } diff --git a/arch/s390/kernel/vdso64/Makefile b/arch/s390/kernel/vdso64/Makefile index bec19e7e6e1c..4a66a1cb919b 100644 --- a/arch/s390/kernel/vdso64/Makefile +++ b/arch/s390/kernel/vdso64/Makefile @@ -18,8 +18,8 @@ KBUILD_AFLAGS_64 += -m64 -s KBUILD_CFLAGS_64 := $(filter-out -m64,$(KBUILD_CFLAGS)) KBUILD_CFLAGS_64 += -m64 -fPIC -shared -fno-common -fno-builtin -KBUILD_CFLAGS_64 += -nostdlib -Wl,-soname=linux-vdso64.so.1 \ - -Wl,--hash-style=both +ldflags-y := -fPIC -shared -nostdlib -soname=linux-vdso64.so.1 \ + --hash-style=both --build-id -T $(targets:%=$(obj)/%.dbg): KBUILD_CFLAGS = $(KBUILD_CFLAGS_64) $(targets:%=$(obj)/%.dbg): KBUILD_AFLAGS = $(KBUILD_AFLAGS_64) @@ -37,8 +37,8 @@ KASAN_SANITIZE := n $(obj)/vdso64_wrapper.o : $(obj)/vdso64.so # link rule for the .so file, .lds has to be first -$(obj)/vdso64.so.dbg: $(src)/vdso64.lds $(obj-vdso64) FORCE - $(call if_changed,vdso64ld) +$(obj)/vdso64.so.dbg: $(obj)/vdso64.lds $(obj-vdso64) FORCE + $(call if_changed,ld) # strip rule for the .so file $(obj)/%.so: OBJCOPYFLAGS := -S @@ -50,8 +50,6 @@ $(obj-vdso64): %.o: %.S FORCE $(call if_changed_dep,vdso64as) # actual build commands -quiet_cmd_vdso64ld = VDSO64L $@ - cmd_vdso64ld = $(CC) $(c_flags) -Wl,-T $(filter %.lds %.o,$^) -o $@ quiet_cmd_vdso64as = VDSO64A $@ cmd_vdso64as = $(CC) $(a_flags) -c -o $@ $< diff --git a/arch/s390/kernel/vdso64/clock_getres.S b/arch/s390/kernel/vdso64/clock_getres.S index 081435398e0a..0c79caa32b59 100644 --- a/arch/s390/kernel/vdso64/clock_getres.S +++ b/arch/s390/kernel/vdso64/clock_getres.S @@ -17,12 +17,14 @@ .type __kernel_clock_getres,@function __kernel_clock_getres: CFI_STARTPROC - larl %r1,4f + larl %r1,3f + lg %r0,0(%r1) cghi %r2,__CLOCK_REALTIME_COARSE je 0f cghi %r2,__CLOCK_MONOTONIC_COARSE je 0f - larl %r1,3f + larl %r1,_vdso_data + llgf %r0,__VDSO_CLOCK_REALTIME_RES(%r1) cghi %r2,__CLOCK_REALTIME je 0f cghi %r2,__CLOCK_MONOTONIC @@ -36,7 +38,6 @@ __kernel_clock_getres: jz 2f 0: ltgr %r3,%r3 jz 1f /* res == NULL */ - lg %r0,0(%r1) xc 0(8,%r3),0(%r3) /* set tp->tv_sec to zero */ stg %r0,8(%r3) /* store tp->tv_usec */ 1: lghi %r2,0 @@ -45,6 +46,5 @@ __kernel_clock_getres: svc 0 br %r14 CFI_ENDPROC -3: .quad __CLOCK_REALTIME_RES -4: .quad __CLOCK_COARSE_RES +3: .quad __CLOCK_COARSE_RES .size __kernel_clock_getres,.-__kernel_clock_getres diff --git a/arch/s390/kvm/Kconfig b/arch/s390/kvm/Kconfig index def3b60f1fe8..67a8e770e369 100644 --- a/arch/s390/kvm/Kconfig +++ b/arch/s390/kvm/Kconfig @@ -7,7 +7,7 @@ source "virt/kvm/Kconfig" menuconfig VIRTUALIZATION def_bool y prompt "KVM" - ---help--- + help Say Y here to get to see options for using your Linux host to run other operating systems inside virtual machines (guests). This option alone does not add any kernel code. @@ -33,7 +33,7 @@ config KVM select HAVE_KVM_NO_POLL select SRCU select KVM_VFIO - ---help--- + help Support hosting paravirtualized guest machines using the SIE virtualization capability on the mainframe. This should work on any 64bit machine. @@ -49,7 +49,7 @@ config KVM config KVM_S390_UCONTROL bool "Userspace controlled virtual machines" depends on KVM - ---help--- + help Allow CAP_SYS_ADMIN users to create KVM virtual machines that are controlled by userspace. diff --git a/arch/s390/kvm/diag.c b/arch/s390/kvm/diag.c index 563429dece03..5b8ec1c447e1 100644 --- a/arch/s390/kvm/diag.c +++ b/arch/s390/kvm/diag.c @@ -10,7 +10,6 @@ #include <linux/kvm.h> #include <linux/kvm_host.h> -#include <asm/pgalloc.h> #include <asm/gmap.h> #include <asm/virtio-ccw.h> #include "kvm-s390.h" diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c index 47a67a958107..6d6b57059493 100644 --- a/arch/s390/kvm/gaccess.c +++ b/arch/s390/kvm/gaccess.c @@ -9,8 +9,8 @@ #include <linux/vmalloc.h> #include <linux/mm_types.h> #include <linux/err.h> +#include <linux/pgtable.h> -#include <asm/pgtable.h> #include <asm/gmap.h> #include "kvm-s390.h" #include "gaccess.h" @@ -1173,7 +1173,7 @@ int kvm_s390_shadow_fault(struct kvm_vcpu *vcpu, struct gmap *sg, int dat_protection, fake; int rc; - down_read(&sg->mm->mmap_sem); + mmap_read_lock(sg->mm); /* * We don't want any guest-2 tables to change - so the parent * tables/pointers we read stay valid - unshadowing is however @@ -1202,6 +1202,6 @@ shadow_page: if (!rc) rc = gmap_shadow_page(sg, saddr, __pte(pte.val)); ipte_unlock(vcpu); - up_read(&sg->mm->mmap_sem); + mmap_read_unlock(sg->mm); return rc; } diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index bfb481134994..2f177298c663 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -2767,10 +2767,10 @@ static struct page *get_map_page(struct kvm *kvm, u64 uaddr) { struct page *page = NULL; - down_read(&kvm->mm->mmap_sem); - get_user_pages_remote(NULL, kvm->mm, uaddr, 1, FOLL_WRITE, + mmap_read_lock(kvm->mm); + get_user_pages_remote(kvm->mm, uaddr, 1, FOLL_WRITE, &page, NULL, NULL); - up_read(&kvm->mm->mmap_sem); + mmap_read_unlock(kvm->mm); return page; } @@ -3082,7 +3082,7 @@ static enum hrtimer_restart gisa_vcpu_kicker(struct hrtimer *timer) __airqs_kick_single_vcpu(kvm, pending_mask); hrtimer_forward_now(timer, ns_to_ktime(gi->expires)); return HRTIMER_RESTART; - }; + } return HRTIMER_NORESTART; } diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index d05bb040fd42..6b74b92c1a58 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -31,11 +31,11 @@ #include <linux/bitmap.h> #include <linux/sched/signal.h> #include <linux/string.h> +#include <linux/pgtable.h> #include <asm/asm-offsets.h> #include <asm/lowcore.h> #include <asm/stp.h> -#include <asm/pgtable.h> #include <asm/gmap.h> #include <asm/nmi.h> #include <asm/switch_to.h> @@ -57,110 +57,109 @@ #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \ (KVM_MAX_VCPUS + LOCAL_IRQS)) -#define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU -#define VM_STAT(x) offsetof(struct kvm, stat.x), KVM_STAT_VM - struct kvm_stats_debugfs_item debugfs_entries[] = { - { "userspace_handled", VCPU_STAT(exit_userspace) }, - { "exit_null", VCPU_STAT(exit_null) }, - { "exit_validity", VCPU_STAT(exit_validity) }, - { "exit_stop_request", VCPU_STAT(exit_stop_request) }, - { "exit_external_request", VCPU_STAT(exit_external_request) }, - { "exit_io_request", VCPU_STAT(exit_io_request) }, - { "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) }, - { "exit_instruction", VCPU_STAT(exit_instruction) }, - { "exit_pei", VCPU_STAT(exit_pei) }, - { "exit_program_interruption", VCPU_STAT(exit_program_interruption) }, - { "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) }, - { "exit_operation_exception", VCPU_STAT(exit_operation_exception) }, - { "halt_successful_poll", VCPU_STAT(halt_successful_poll) }, - { "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) }, - { "halt_poll_invalid", VCPU_STAT(halt_poll_invalid) }, - { "halt_no_poll_steal", VCPU_STAT(halt_no_poll_steal) }, - { "halt_wakeup", VCPU_STAT(halt_wakeup) }, - { "instruction_lctlg", VCPU_STAT(instruction_lctlg) }, - { "instruction_lctl", VCPU_STAT(instruction_lctl) }, - { "instruction_stctl", VCPU_STAT(instruction_stctl) }, - { "instruction_stctg", VCPU_STAT(instruction_stctg) }, - { "deliver_ckc", VCPU_STAT(deliver_ckc) }, - { "deliver_cputm", VCPU_STAT(deliver_cputm) }, - { "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) }, - { "deliver_external_call", VCPU_STAT(deliver_external_call) }, - { "deliver_service_signal", VCPU_STAT(deliver_service_signal) }, - { "deliver_virtio", VCPU_STAT(deliver_virtio) }, - { "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) }, - { "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) }, - { "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) }, - { "deliver_program", VCPU_STAT(deliver_program) }, - { "deliver_io", VCPU_STAT(deliver_io) }, - { "deliver_machine_check", VCPU_STAT(deliver_machine_check) }, - { "exit_wait_state", VCPU_STAT(exit_wait_state) }, - { "inject_ckc", VCPU_STAT(inject_ckc) }, - { "inject_cputm", VCPU_STAT(inject_cputm) }, - { "inject_external_call", VCPU_STAT(inject_external_call) }, - { "inject_float_mchk", VM_STAT(inject_float_mchk) }, - { "inject_emergency_signal", VCPU_STAT(inject_emergency_signal) }, - { "inject_io", VM_STAT(inject_io) }, - { "inject_mchk", VCPU_STAT(inject_mchk) }, - { "inject_pfault_done", VM_STAT(inject_pfault_done) }, - { "inject_program", VCPU_STAT(inject_program) }, - { "inject_restart", VCPU_STAT(inject_restart) }, - { "inject_service_signal", VM_STAT(inject_service_signal) }, - { "inject_set_prefix", VCPU_STAT(inject_set_prefix) }, - { "inject_stop_signal", VCPU_STAT(inject_stop_signal) }, - { "inject_pfault_init", VCPU_STAT(inject_pfault_init) }, - { "inject_virtio", VM_STAT(inject_virtio) }, - { "instruction_epsw", VCPU_STAT(instruction_epsw) }, - { "instruction_gs", VCPU_STAT(instruction_gs) }, - { "instruction_io_other", VCPU_STAT(instruction_io_other) }, - { "instruction_lpsw", VCPU_STAT(instruction_lpsw) }, - { "instruction_lpswe", VCPU_STAT(instruction_lpswe) }, - { "instruction_pfmf", VCPU_STAT(instruction_pfmf) }, - { "instruction_ptff", VCPU_STAT(instruction_ptff) }, - { "instruction_stidp", VCPU_STAT(instruction_stidp) }, - { "instruction_sck", VCPU_STAT(instruction_sck) }, - { "instruction_sckpf", VCPU_STAT(instruction_sckpf) }, - { "instruction_spx", VCPU_STAT(instruction_spx) }, - { "instruction_stpx", VCPU_STAT(instruction_stpx) }, - { "instruction_stap", VCPU_STAT(instruction_stap) }, - { "instruction_iske", VCPU_STAT(instruction_iske) }, - { "instruction_ri", VCPU_STAT(instruction_ri) }, - { "instruction_rrbe", VCPU_STAT(instruction_rrbe) }, - { "instruction_sske", VCPU_STAT(instruction_sske) }, - { "instruction_ipte_interlock", VCPU_STAT(instruction_ipte_interlock) }, - { "instruction_essa", VCPU_STAT(instruction_essa) }, - { "instruction_stsi", VCPU_STAT(instruction_stsi) }, - { "instruction_stfl", VCPU_STAT(instruction_stfl) }, - { "instruction_tb", VCPU_STAT(instruction_tb) }, - { "instruction_tpi", VCPU_STAT(instruction_tpi) }, - { "instruction_tprot", VCPU_STAT(instruction_tprot) }, - { "instruction_tsch", VCPU_STAT(instruction_tsch) }, - { "instruction_sthyi", VCPU_STAT(instruction_sthyi) }, - { "instruction_sie", VCPU_STAT(instruction_sie) }, - { "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) }, - { "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) }, - { "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) }, - { "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) }, - { "instruction_sigp_cond_emergency", VCPU_STAT(instruction_sigp_cond_emergency) }, - { "instruction_sigp_start", VCPU_STAT(instruction_sigp_start) }, - { "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) }, - { "instruction_sigp_stop_store_status", VCPU_STAT(instruction_sigp_stop_store_status) }, - { "instruction_sigp_store_status", VCPU_STAT(instruction_sigp_store_status) }, - { "instruction_sigp_store_adtl_status", VCPU_STAT(instruction_sigp_store_adtl_status) }, - { "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) }, - { "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) }, - { "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) }, - { "instruction_sigp_cpu_reset", VCPU_STAT(instruction_sigp_cpu_reset) }, - { "instruction_sigp_init_cpu_reset", VCPU_STAT(instruction_sigp_init_cpu_reset) }, - { "instruction_sigp_unknown", VCPU_STAT(instruction_sigp_unknown) }, - { "instruction_diag_10", VCPU_STAT(diagnose_10) }, - { "instruction_diag_44", VCPU_STAT(diagnose_44) }, - { "instruction_diag_9c", VCPU_STAT(diagnose_9c) }, - { "diag_9c_ignored", VCPU_STAT(diagnose_9c_ignored) }, - { "instruction_diag_258", VCPU_STAT(diagnose_258) }, - { "instruction_diag_308", VCPU_STAT(diagnose_308) }, - { "instruction_diag_500", VCPU_STAT(diagnose_500) }, - { "instruction_diag_other", VCPU_STAT(diagnose_other) }, + VCPU_STAT("userspace_handled", exit_userspace), + VCPU_STAT("exit_null", exit_null), + VCPU_STAT("exit_validity", exit_validity), + VCPU_STAT("exit_stop_request", exit_stop_request), + VCPU_STAT("exit_external_request", exit_external_request), + VCPU_STAT("exit_io_request", exit_io_request), + VCPU_STAT("exit_external_interrupt", exit_external_interrupt), + VCPU_STAT("exit_instruction", exit_instruction), + VCPU_STAT("exit_pei", exit_pei), + VCPU_STAT("exit_program_interruption", exit_program_interruption), + VCPU_STAT("exit_instr_and_program_int", exit_instr_and_program), + VCPU_STAT("exit_operation_exception", exit_operation_exception), + VCPU_STAT("halt_successful_poll", halt_successful_poll), + VCPU_STAT("halt_attempted_poll", halt_attempted_poll), + VCPU_STAT("halt_poll_invalid", halt_poll_invalid), + VCPU_STAT("halt_no_poll_steal", halt_no_poll_steal), + VCPU_STAT("halt_wakeup", halt_wakeup), + VCPU_STAT("halt_poll_success_ns", halt_poll_success_ns), + VCPU_STAT("halt_poll_fail_ns", halt_poll_fail_ns), + VCPU_STAT("instruction_lctlg", instruction_lctlg), + VCPU_STAT("instruction_lctl", instruction_lctl), + VCPU_STAT("instruction_stctl", instruction_stctl), + VCPU_STAT("instruction_stctg", instruction_stctg), + VCPU_STAT("deliver_ckc", deliver_ckc), + VCPU_STAT("deliver_cputm", deliver_cputm), + VCPU_STAT("deliver_emergency_signal", deliver_emergency_signal), + VCPU_STAT("deliver_external_call", deliver_external_call), + VCPU_STAT("deliver_service_signal", deliver_service_signal), + VCPU_STAT("deliver_virtio", deliver_virtio), + VCPU_STAT("deliver_stop_signal", deliver_stop_signal), + VCPU_STAT("deliver_prefix_signal", deliver_prefix_signal), + VCPU_STAT("deliver_restart_signal", deliver_restart_signal), + VCPU_STAT("deliver_program", deliver_program), + VCPU_STAT("deliver_io", deliver_io), + VCPU_STAT("deliver_machine_check", deliver_machine_check), + VCPU_STAT("exit_wait_state", exit_wait_state), + VCPU_STAT("inject_ckc", inject_ckc), + VCPU_STAT("inject_cputm", inject_cputm), + VCPU_STAT("inject_external_call", inject_external_call), + VM_STAT("inject_float_mchk", inject_float_mchk), + VCPU_STAT("inject_emergency_signal", inject_emergency_signal), + VM_STAT("inject_io", inject_io), + VCPU_STAT("inject_mchk", inject_mchk), + VM_STAT("inject_pfault_done", inject_pfault_done), + VCPU_STAT("inject_program", inject_program), + VCPU_STAT("inject_restart", inject_restart), + VM_STAT("inject_service_signal", inject_service_signal), + VCPU_STAT("inject_set_prefix", inject_set_prefix), + VCPU_STAT("inject_stop_signal", inject_stop_signal), + VCPU_STAT("inject_pfault_init", inject_pfault_init), + VM_STAT("inject_virtio", inject_virtio), + VCPU_STAT("instruction_epsw", instruction_epsw), + VCPU_STAT("instruction_gs", instruction_gs), + VCPU_STAT("instruction_io_other", instruction_io_other), + VCPU_STAT("instruction_lpsw", instruction_lpsw), + VCPU_STAT("instruction_lpswe", instruction_lpswe), + VCPU_STAT("instruction_pfmf", instruction_pfmf), + VCPU_STAT("instruction_ptff", instruction_ptff), + VCPU_STAT("instruction_stidp", instruction_stidp), + VCPU_STAT("instruction_sck", instruction_sck), + VCPU_STAT("instruction_sckpf", instruction_sckpf), + VCPU_STAT("instruction_spx", instruction_spx), + VCPU_STAT("instruction_stpx", instruction_stpx), + VCPU_STAT("instruction_stap", instruction_stap), + VCPU_STAT("instruction_iske", instruction_iske), + VCPU_STAT("instruction_ri", instruction_ri), + VCPU_STAT("instruction_rrbe", instruction_rrbe), + VCPU_STAT("instruction_sske", instruction_sske), + VCPU_STAT("instruction_ipte_interlock", instruction_ipte_interlock), + VCPU_STAT("instruction_essa", instruction_essa), + VCPU_STAT("instruction_stsi", instruction_stsi), + VCPU_STAT("instruction_stfl", instruction_stfl), + VCPU_STAT("instruction_tb", instruction_tb), + VCPU_STAT("instruction_tpi", instruction_tpi), + VCPU_STAT("instruction_tprot", instruction_tprot), + VCPU_STAT("instruction_tsch", instruction_tsch), + VCPU_STAT("instruction_sthyi", instruction_sthyi), + VCPU_STAT("instruction_sie", instruction_sie), + VCPU_STAT("instruction_sigp_sense", instruction_sigp_sense), + VCPU_STAT("instruction_sigp_sense_running", instruction_sigp_sense_running), + VCPU_STAT("instruction_sigp_external_call", instruction_sigp_external_call), + VCPU_STAT("instruction_sigp_emergency", instruction_sigp_emergency), + VCPU_STAT("instruction_sigp_cond_emergency", instruction_sigp_cond_emergency), + VCPU_STAT("instruction_sigp_start", instruction_sigp_start), + VCPU_STAT("instruction_sigp_stop", instruction_sigp_stop), + VCPU_STAT("instruction_sigp_stop_store_status", instruction_sigp_stop_store_status), + VCPU_STAT("instruction_sigp_store_status", instruction_sigp_store_status), + VCPU_STAT("instruction_sigp_store_adtl_status", instruction_sigp_store_adtl_status), + VCPU_STAT("instruction_sigp_set_arch", instruction_sigp_arch), + VCPU_STAT("instruction_sigp_set_prefix", instruction_sigp_prefix), + VCPU_STAT("instruction_sigp_restart", instruction_sigp_restart), + VCPU_STAT("instruction_sigp_cpu_reset", instruction_sigp_cpu_reset), + VCPU_STAT("instruction_sigp_init_cpu_reset", instruction_sigp_init_cpu_reset), + VCPU_STAT("instruction_sigp_unknown", instruction_sigp_unknown), + VCPU_STAT("instruction_diag_10", diagnose_10), + VCPU_STAT("instruction_diag_44", diagnose_44), + VCPU_STAT("instruction_diag_9c", diagnose_9c), + VCPU_STAT("diag_9c_ignored", diagnose_9c_ignored), + VCPU_STAT("instruction_diag_258", diagnose_258), + VCPU_STAT("instruction_diag_308", diagnose_308), + VCPU_STAT("instruction_diag_500", diagnose_500), + VCPU_STAT("instruction_diag_other", diagnose_other), { NULL } }; @@ -546,6 +545,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_S390_AIS_MIGRATION: case KVM_CAP_S390_VCPU_RESETS: case KVM_CAP_SET_GUEST_DEBUG: + case KVM_CAP_S390_DIAG318: r = 1; break; case KVM_CAP_S390_HPAGE_1M: @@ -764,9 +764,9 @@ int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap) r = -EINVAL; else { r = 0; - down_write(&kvm->mm->mmap_sem); + mmap_write_lock(kvm->mm); kvm->mm->context.allow_gmap_hpage_1m = 1; - up_write(&kvm->mm->mmap_sem); + mmap_write_unlock(kvm->mm); /* * We might have to create fake 4k page * tables. To avoid that the hardware works on @@ -1816,7 +1816,7 @@ static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args) if (!keys) return -ENOMEM; - down_read(¤t->mm->mmap_sem); + mmap_read_lock(current->mm); srcu_idx = srcu_read_lock(&kvm->srcu); for (i = 0; i < args->count; i++) { hva = gfn_to_hva(kvm, args->start_gfn + i); @@ -1830,7 +1830,7 @@ static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args) break; } srcu_read_unlock(&kvm->srcu, srcu_idx); - up_read(¤t->mm->mmap_sem); + mmap_read_unlock(current->mm); if (!r) { r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys, @@ -1874,7 +1874,7 @@ static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args) goto out; i = 0; - down_read(¤t->mm->mmap_sem); + mmap_read_lock(current->mm); srcu_idx = srcu_read_lock(&kvm->srcu); while (i < args->count) { unlocked = false; @@ -1892,7 +1892,7 @@ static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args) r = set_guest_storage_key(current->mm, hva, keys[i], 0); if (r) { - r = fixup_user_fault(current, current->mm, hva, + r = fixup_user_fault(current->mm, hva, FAULT_FLAG_WRITE, &unlocked); if (r) break; @@ -1901,7 +1901,7 @@ static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args) i++; } srcu_read_unlock(&kvm->srcu, srcu_idx); - up_read(¤t->mm->mmap_sem); + mmap_read_unlock(current->mm); out: kvfree(keys); return r; @@ -2090,14 +2090,14 @@ static int kvm_s390_get_cmma_bits(struct kvm *kvm, if (!values) return -ENOMEM; - down_read(&kvm->mm->mmap_sem); + mmap_read_lock(kvm->mm); srcu_idx = srcu_read_lock(&kvm->srcu); if (peek) ret = kvm_s390_peek_cmma(kvm, args, values, bufsize); else ret = kvm_s390_get_cmma(kvm, args, values, bufsize); srcu_read_unlock(&kvm->srcu, srcu_idx); - up_read(&kvm->mm->mmap_sem); + mmap_read_unlock(kvm->mm); if (kvm->arch.migration_mode) args->remaining = atomic64_read(&kvm->arch.cmma_dirty_pages); @@ -2147,7 +2147,7 @@ static int kvm_s390_set_cmma_bits(struct kvm *kvm, goto out; } - down_read(&kvm->mm->mmap_sem); + mmap_read_lock(kvm->mm); srcu_idx = srcu_read_lock(&kvm->srcu); for (i = 0; i < args->count; i++) { hva = gfn_to_hva(kvm, args->start_gfn + i); @@ -2162,12 +2162,12 @@ static int kvm_s390_set_cmma_bits(struct kvm *kvm, set_pgste_bits(kvm->mm, hva, mask, pgstev); } srcu_read_unlock(&kvm->srcu, srcu_idx); - up_read(&kvm->mm->mmap_sem); + mmap_read_unlock(kvm->mm); if (!kvm->mm->context.uses_cmm) { - down_write(&kvm->mm->mmap_sem); + mmap_write_lock(kvm->mm); kvm->mm->context.uses_cmm = 1; - up_write(&kvm->mm->mmap_sem); + mmap_write_unlock(kvm->mm); } out: vfree(bits); @@ -2240,9 +2240,9 @@ static int kvm_s390_handle_pv(struct kvm *kvm, struct kvm_pv_cmd *cmd) if (r) break; - down_write(¤t->mm->mmap_sem); + mmap_write_lock(current->mm); r = gmap_mark_unmergeable(); - up_write(¤t->mm->mmap_sem); + mmap_write_unlock(current->mm); if (r) break; @@ -3268,7 +3268,8 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu) KVM_SYNC_ACRS | KVM_SYNC_CRS | KVM_SYNC_ARCH0 | - KVM_SYNC_PFAULT; + KVM_SYNC_PFAULT | + KVM_SYNC_DIAG318; kvm_s390_set_prefix(vcpu, 0); if (test_kvm_facility(vcpu->kvm, 64)) vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB; @@ -3563,6 +3564,7 @@ static void kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu) vcpu->arch.sie_block->pp = 0; vcpu->arch.sie_block->fpf &= ~FPF_BPBC; vcpu->arch.sie_block->todpr = 0; + vcpu->arch.sie_block->cpnc = 0; } } @@ -3580,6 +3582,7 @@ static void kvm_arch_vcpu_ioctl_clear_reset(struct kvm_vcpu *vcpu) regs->etoken = 0; regs->etoken_extension = 0; + regs->diag318 = 0; } int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) @@ -3924,11 +3927,13 @@ static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token, } } -void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu, +bool kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu, struct kvm_async_pf *work) { trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token); __kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token); + + return true; } void kvm_arch_async_page_present(struct kvm_vcpu *vcpu, @@ -3944,7 +3949,7 @@ void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu, /* s390 will always inject the page directly */ } -bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu) +bool kvm_arch_can_dequeue_async_page_present(struct kvm_vcpu *vcpu) { /* * s390 will always inject the page directly, @@ -3953,33 +3958,31 @@ bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu) return true; } -static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu) +static bool kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu) { hva_t hva; struct kvm_arch_async_pf arch; - int rc; if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID) - return 0; + return false; if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) != vcpu->arch.pfault_compare) - return 0; + return false; if (psw_extint_disabled(vcpu)) - return 0; + return false; if (kvm_s390_vcpu_has_irq(vcpu, 0)) - return 0; + return false; if (!(vcpu->arch.sie_block->gcr[0] & CR0_SERVICE_SIGNAL_SUBMASK)) - return 0; + return false; if (!vcpu->arch.gmap->pfault_enabled) - return 0; + return false; hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr)); hva += current->thread.gmap_addr & ~PAGE_MASK; if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8)) - return 0; + return false; - rc = kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch); - return rc; + return kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch); } static int vcpu_pre_run(struct kvm_vcpu *vcpu) @@ -3999,9 +4002,6 @@ static int vcpu_pre_run(struct kvm_vcpu *vcpu) if (need_resched()) schedule(); - if (test_cpu_flag(CIF_MCCK_PENDING)) - s390_handle_mcck(); - if (!kvm_is_ucontrol(vcpu->kvm)) { rc = kvm_s390_deliver_pending_interrupts(vcpu); if (rc) @@ -4177,8 +4177,9 @@ static int __vcpu_run(struct kvm_vcpu *vcpu) return rc; } -static void sync_regs_fmt2(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) +static void sync_regs_fmt2(struct kvm_vcpu *vcpu) { + struct kvm_run *kvm_run = vcpu->run; struct runtime_instr_cb *riccb; struct gs_cb *gscb; @@ -4198,6 +4199,10 @@ static void sync_regs_fmt2(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID) kvm_clear_async_pf_completion_queue(vcpu); } + if (kvm_run->kvm_dirty_regs & KVM_SYNC_DIAG318) { + vcpu->arch.diag318_info.val = kvm_run->s.regs.diag318; + vcpu->arch.sie_block->cpnc = vcpu->arch.diag318_info.cpnc; + } /* * If userspace sets the riccb (e.g. after migration) to a valid state, * we should enable RI here instead of doing the lazy enablement. @@ -4244,8 +4249,10 @@ static void sync_regs_fmt2(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) /* SIE will load etoken directly from SDNX and therefore kvm_run */ } -static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) +static void sync_regs(struct kvm_vcpu *vcpu) { + struct kvm_run *kvm_run = vcpu->run; + if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX) kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix); if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) { @@ -4274,7 +4281,7 @@ static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) /* Sync fmt2 only data */ if (likely(!kvm_s390_pv_cpu_is_protected(vcpu))) { - sync_regs_fmt2(vcpu, kvm_run); + sync_regs_fmt2(vcpu); } else { /* * In several places we have to modify our internal view to @@ -4293,12 +4300,15 @@ static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) kvm_run->kvm_dirty_regs = 0; } -static void store_regs_fmt2(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) +static void store_regs_fmt2(struct kvm_vcpu *vcpu) { + struct kvm_run *kvm_run = vcpu->run; + kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr; kvm_run->s.regs.pp = vcpu->arch.sie_block->pp; kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea; kvm_run->s.regs.bpbc = (vcpu->arch.sie_block->fpf & FPF_BPBC) == FPF_BPBC; + kvm_run->s.regs.diag318 = vcpu->arch.diag318_info.val; if (MACHINE_HAS_GS) { __ctl_set_bit(2, 4); if (vcpu->arch.gs_enabled) @@ -4314,8 +4324,10 @@ static void store_regs_fmt2(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) /* SIE will save etoken directly into SDNX and therefore kvm_run */ } -static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) +static void store_regs(struct kvm_vcpu *vcpu) { + struct kvm_run *kvm_run = vcpu->run; + kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask; kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr; kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu); @@ -4334,11 +4346,12 @@ static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc; current->thread.fpu.regs = vcpu->arch.host_fpregs.regs; if (likely(!kvm_s390_pv_cpu_is_protected(vcpu))) - store_regs_fmt2(vcpu, kvm_run); + store_regs_fmt2(vcpu); } -int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) +int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) { + struct kvm_run *kvm_run = vcpu->run; int rc; if (kvm_run->immediate_exit) @@ -4371,7 +4384,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) goto out; } - sync_regs(vcpu, kvm_run); + sync_regs(vcpu); enable_cpu_timer_accounting(vcpu); might_fault(); @@ -4393,7 +4406,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) } disable_cpu_timer_accounting(vcpu); - store_regs(vcpu, kvm_run); + store_regs(vcpu); kvm_sigset_deactivate(vcpu); diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c index 893893642415..cd74989ce0b0 100644 --- a/arch/s390/kvm/priv.c +++ b/arch/s390/kvm/priv.c @@ -13,6 +13,7 @@ #include <linux/errno.h> #include <linux/compat.h> #include <linux/mm_types.h> +#include <linux/pgtable.h> #include <asm/asm-offsets.h> #include <asm/facility.h> @@ -20,9 +21,7 @@ #include <asm/debug.h> #include <asm/ebcdic.h> #include <asm/sysinfo.h> -#include <asm/pgtable.h> #include <asm/page-states.h> -#include <asm/pgalloc.h> #include <asm/gmap.h> #include <asm/io.h> #include <asm/ptrace.h> @@ -270,18 +269,18 @@ static int handle_iske(struct kvm_vcpu *vcpu) return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); retry: unlocked = false; - down_read(¤t->mm->mmap_sem); + mmap_read_lock(current->mm); rc = get_guest_storage_key(current->mm, vmaddr, &key); if (rc) { - rc = fixup_user_fault(current, current->mm, vmaddr, + rc = fixup_user_fault(current->mm, vmaddr, FAULT_FLAG_WRITE, &unlocked); if (!rc) { - up_read(¤t->mm->mmap_sem); + mmap_read_unlock(current->mm); goto retry; } } - up_read(¤t->mm->mmap_sem); + mmap_read_unlock(current->mm); if (rc == -EFAULT) return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); if (rc < 0) @@ -317,17 +316,17 @@ static int handle_rrbe(struct kvm_vcpu *vcpu) return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); retry: unlocked = false; - down_read(¤t->mm->mmap_sem); + mmap_read_lock(current->mm); rc = reset_guest_reference_bit(current->mm, vmaddr); if (rc < 0) { - rc = fixup_user_fault(current, current->mm, vmaddr, + rc = fixup_user_fault(current->mm, vmaddr, FAULT_FLAG_WRITE, &unlocked); if (!rc) { - up_read(¤t->mm->mmap_sem); + mmap_read_unlock(current->mm); goto retry; } } - up_read(¤t->mm->mmap_sem); + mmap_read_unlock(current->mm); if (rc == -EFAULT) return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); if (rc < 0) @@ -385,17 +384,17 @@ static int handle_sske(struct kvm_vcpu *vcpu) if (kvm_is_error_hva(vmaddr)) return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); - down_read(¤t->mm->mmap_sem); + mmap_read_lock(current->mm); rc = cond_set_guest_storage_key(current->mm, vmaddr, key, &oldkey, m3 & SSKE_NQ, m3 & SSKE_MR, m3 & SSKE_MC); if (rc < 0) { - rc = fixup_user_fault(current, current->mm, vmaddr, + rc = fixup_user_fault(current->mm, vmaddr, FAULT_FLAG_WRITE, &unlocked); rc = !rc ? -EAGAIN : rc; } - up_read(¤t->mm->mmap_sem); + mmap_read_unlock(current->mm); if (rc == -EFAULT) return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); if (rc < 0) @@ -1091,15 +1090,15 @@ static int handle_pfmf(struct kvm_vcpu *vcpu) if (rc) return rc; - down_read(¤t->mm->mmap_sem); + mmap_read_lock(current->mm); rc = cond_set_guest_storage_key(current->mm, vmaddr, key, NULL, nq, mr, mc); if (rc < 0) { - rc = fixup_user_fault(current, current->mm, vmaddr, + rc = fixup_user_fault(current->mm, vmaddr, FAULT_FLAG_WRITE, &unlocked); rc = !rc ? -EAGAIN : rc; } - up_read(¤t->mm->mmap_sem); + mmap_read_unlock(current->mm); if (rc == -EFAULT) return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); if (rc == -EAGAIN) @@ -1122,7 +1121,7 @@ static int handle_pfmf(struct kvm_vcpu *vcpu) } /* - * Must be called with relevant read locks held (kvm->mm->mmap_sem, kvm->srcu) + * Must be called with relevant read locks held (kvm->mm->mmap_lock, kvm->srcu) */ static inline int __do_essa(struct kvm_vcpu *vcpu, const int orc) { @@ -1220,9 +1219,9 @@ static int handle_essa(struct kvm_vcpu *vcpu) * already correct, we do nothing and avoid the lock. */ if (vcpu->kvm->mm->context.uses_cmm == 0) { - down_write(&vcpu->kvm->mm->mmap_sem); + mmap_write_lock(vcpu->kvm->mm); vcpu->kvm->mm->context.uses_cmm = 1; - up_write(&vcpu->kvm->mm->mmap_sem); + mmap_write_unlock(vcpu->kvm->mm); } /* * If we are here, we are supposed to have CMMA enabled in @@ -1239,11 +1238,11 @@ static int handle_essa(struct kvm_vcpu *vcpu) } else { int srcu_idx; - down_read(&vcpu->kvm->mm->mmap_sem); + mmap_read_lock(vcpu->kvm->mm); srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); i = __do_essa(vcpu, orc); srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx); - up_read(&vcpu->kvm->mm->mmap_sem); + mmap_read_unlock(vcpu->kvm->mm); if (i < 0) return i; /* Account for the possible extra cbrl entry */ @@ -1251,10 +1250,10 @@ static int handle_essa(struct kvm_vcpu *vcpu) } vcpu->arch.sie_block->cbrlo &= PAGE_MASK; /* reset nceo */ cbrlo = phys_to_virt(vcpu->arch.sie_block->cbrlo); - down_read(&gmap->mm->mmap_sem); + mmap_read_lock(gmap->mm); for (i = 0; i < entries; ++i) __gmap_zap(gmap, cbrlo[i]); - up_read(&gmap->mm->mmap_sem); + mmap_read_unlock(gmap->mm); return 0; } diff --git a/arch/s390/kvm/pv.c b/arch/s390/kvm/pv.c index 63e330109b63..eb99e2f95ebe 100644 --- a/arch/s390/kvm/pv.c +++ b/arch/s390/kvm/pv.c @@ -9,7 +9,6 @@ #include <linux/kvm_host.h> #include <linux/pagemap.h> #include <linux/sched/signal.h> -#include <asm/pgalloc.h> #include <asm/gmap.h> #include <asm/uv.h> #include <asm/mman.h> diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c index 4f6c22d72072..4f3cbf6003a9 100644 --- a/arch/s390/kvm/vsie.c +++ b/arch/s390/kvm/vsie.c @@ -548,6 +548,7 @@ static int shadow_scb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) scb_s->ecd |= scb_o->ecd & ECD_ETOKENF; scb_s->hpid = HPID_VSIE; + scb_s->cpnc = scb_o->cpnc; prepare_ibc(vcpu, vsie_page); rc = shadow_crycb(vcpu, vsie_page); @@ -1000,11 +1001,6 @@ static int do_vsie_run(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) handle_last_fault(vcpu, vsie_page); - if (need_resched()) - schedule(); - if (test_cpu_flag(CIF_MCCK_PENDING)) - s390_handle_mcck(); - srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); /* save current guest state of bp isolation override */ @@ -1185,6 +1181,7 @@ static int vsie_run(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) kvm_s390_vcpu_has_irq(vcpu, 0) || kvm_s390_vcpu_sie_inhibited(vcpu)) break; + cond_resched(); } if (rc == -EFAULT) { diff --git a/arch/s390/lib/Makefile b/arch/s390/lib/Makefile index 28fd66d558ff..678333936f78 100644 --- a/arch/s390/lib/Makefile +++ b/arch/s390/lib/Makefile @@ -14,3 +14,5 @@ KASAN_SANITIZE_uaccess.o := n obj-$(CONFIG_S390_UNWIND_SELFTEST) += test_unwind.o CFLAGS_test_unwind.o += -fno-optimize-sibling-calls + +lib-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o diff --git a/arch/s390/lib/delay.c b/arch/s390/lib/delay.c index d4aa10795605..daca7bad66de 100644 --- a/arch/s390/lib/delay.c +++ b/arch/s390/lib/delay.c @@ -33,7 +33,7 @@ EXPORT_SYMBOL(__delay); static void __udelay_disabled(unsigned long long usecs) { - unsigned long cr0, cr0_new, psw_mask; + unsigned long cr0, cr0_new, psw_mask, flags; struct s390_idle_data idle; u64 end; @@ -45,7 +45,9 @@ static void __udelay_disabled(unsigned long long usecs) psw_mask = __extract_psw() | PSW_MASK_EXT | PSW_MASK_WAIT; set_clock_comparator(end); set_cpu_flag(CIF_IGNORE_IRQ); + local_irq_save(flags); psw_idle(&idle, psw_mask); + local_irq_restore(flags); clear_cpu_flag(CIF_IGNORE_IRQ); set_clock_comparator(S390_lowcore.clock_comparator); __ctl_load(cr0, 0, 0); diff --git a/arch/s390/lib/error-inject.c b/arch/s390/lib/error-inject.c new file mode 100644 index 000000000000..8c9d4da87eef --- /dev/null +++ b/arch/s390/lib/error-inject.c @@ -0,0 +1,14 @@ +// SPDX-License-Identifier: GPL-2.0+ +#include <asm/ptrace.h> +#include <linux/error-injection.h> +#include <linux/kprobes.h> + +void override_function_with_return(struct pt_regs *regs) +{ + /* + * Emulate 'br 14'. 'regs' is captured by kprobes on entry to some + * kernel function. + */ + regs->psw.addr = regs->gprs[14]; +} +NOKPROBE_SYMBOL(override_function_with_return); diff --git a/arch/s390/lib/test_unwind.c b/arch/s390/lib/test_unwind.c index 32b7a30b2485..7c988994931f 100644 --- a/arch/s390/lib/test_unwind.c +++ b/arch/s390/lib/test_unwind.c @@ -9,6 +9,7 @@ #include <linux/kallsyms.h> #include <linux/kthread.h> #include <linux/module.h> +#include <linux/slab.h> #include <linux/string.h> #include <linux/kprobes.h> #include <linux/wait.h> @@ -63,6 +64,7 @@ static noinline int test_unwind(struct task_struct *task, struct pt_regs *regs, break; if (state.reliable && !addr) { pr_err("unwind state reliable but addr is 0\n"); + kfree(bt); return -EINVAL; } sprint_symbol(sym, addr); diff --git a/arch/s390/mm/cmm.c b/arch/s390/mm/cmm.c index ae989b740376..1141c8d5c0d0 100644 --- a/arch/s390/mm/cmm.c +++ b/arch/s390/mm/cmm.c @@ -21,7 +21,6 @@ #include <linux/oom.h> #include <linux/uaccess.h> -#include <asm/pgalloc.h> #include <asm/diag.h> #ifdef CONFIG_CMM_IUCV @@ -189,7 +188,7 @@ static void cmm_set_timer(void) del_timer(&cmm_timer); return; } - mod_timer(&cmm_timer, jiffies + cmm_timeout_seconds * HZ); + mod_timer(&cmm_timer, jiffies + msecs_to_jiffies(cmm_timeout_seconds * MSEC_PER_SEC)); } static void cmm_timer_fn(struct timer_list *unused) @@ -245,7 +244,7 @@ static int cmm_skip_blanks(char *cp, char **endp) } static int cmm_pages_handler(struct ctl_table *ctl, int write, - void __user *buffer, size_t *lenp, loff_t *ppos) + void *buffer, size_t *lenp, loff_t *ppos) { long nr = cmm_get_pages(); struct ctl_table ctl_entry = { @@ -264,7 +263,7 @@ static int cmm_pages_handler(struct ctl_table *ctl, int write, } static int cmm_timed_pages_handler(struct ctl_table *ctl, int write, - void __user *buffer, size_t *lenp, + void *buffer, size_t *lenp, loff_t *ppos) { long nr = cmm_get_timed_pages(); @@ -284,7 +283,7 @@ static int cmm_timed_pages_handler(struct ctl_table *ctl, int write, } static int cmm_timeout_handler(struct ctl_table *ctl, int write, - void __user *buffer, size_t *lenp, loff_t *ppos) + void *buffer, size_t *lenp, loff_t *ppos) { char buf[64], *p; long nr, seconds; @@ -297,8 +296,7 @@ static int cmm_timeout_handler(struct ctl_table *ctl, int write, if (write) { len = min(*lenp, sizeof(buf)); - if (copy_from_user(buf, buffer, len)) - return -EFAULT; + memcpy(buf, buffer, len); buf[len - 1] = '\0'; cmm_skip_blanks(buf, &p); nr = simple_strtoul(p, &p, 0); @@ -311,8 +309,7 @@ static int cmm_timeout_handler(struct ctl_table *ctl, int write, cmm_timeout_pages, cmm_timeout_seconds); if (len > *lenp) len = *lenp; - if (copy_to_user(buffer, buf, len)) - return -EFAULT; + memcpy(buffer, buf, len); *lenp = len; *ppos += len; } diff --git a/arch/s390/mm/dump_pagetables.c b/arch/s390/mm/dump_pagetables.c index 5d67b81c704a..c2ac9b8ae612 100644 --- a/arch/s390/mm/dump_pagetables.c +++ b/arch/s390/mm/dump_pagetables.c @@ -6,7 +6,6 @@ #include <linux/kasan.h> #include <asm/kasan.h> #include <asm/sections.h> -#include <asm/pgtable.h> static unsigned long max_addr; diff --git a/arch/s390/mm/extmem.c b/arch/s390/mm/extmem.c index fd0dae9d10f4..5060956b8e7d 100644 --- a/arch/s390/mm/extmem.c +++ b/arch/s390/mm/extmem.c @@ -20,9 +20,9 @@ #include <linux/ctype.h> #include <linux/ioport.h> #include <linux/refcount.h> +#include <linux/pgtable.h> #include <asm/diag.h> #include <asm/page.h> -#include <asm/pgtable.h> #include <asm/ebcdic.h> #include <asm/errno.h> #include <asm/extmem.h> @@ -313,15 +313,10 @@ __segment_load (char *name, int do_nonshared, unsigned long *addr, unsigned long goto out_free; } - rc = vmem_add_mapping(seg->start_addr, seg->end - seg->start_addr + 1); - - if (rc) - goto out_free; - seg->res = kzalloc(sizeof(struct resource), GFP_KERNEL); if (seg->res == NULL) { rc = -ENOMEM; - goto out_shared; + goto out_free; } seg->res->flags = IORESOURCE_BUSY | IORESOURCE_MEM; seg->res->start = seg->start_addr; @@ -335,12 +330,17 @@ __segment_load (char *name, int do_nonshared, unsigned long *addr, unsigned long if (rc == SEG_TYPE_SC || ((rc == SEG_TYPE_SR || rc == SEG_TYPE_ER) && !do_nonshared)) seg->res->flags |= IORESOURCE_READONLY; + + /* Check for overlapping resources before adding the mapping. */ if (request_resource(&iomem_resource, seg->res)) { rc = -EBUSY; - kfree(seg->res); - goto out_shared; + goto out_free_resource; } + rc = vmem_add_mapping(seg->start_addr, seg->end - seg->start_addr + 1); + if (rc) + goto out_resource; + if (do_nonshared) diag_cc = dcss_diag(&loadnsr_scode, seg->dcss_name, &start_addr, &end_addr); @@ -351,14 +351,14 @@ __segment_load (char *name, int do_nonshared, unsigned long *addr, unsigned long dcss_diag(&purgeseg_scode, seg->dcss_name, &dummy, &dummy); rc = diag_cc; - goto out_resource; + goto out_mapping; } if (diag_cc > 1) { pr_warn("Loading DCSS %s failed with rc=%ld\n", name, end_addr); rc = dcss_diag_translate_rc(end_addr); dcss_diag(&purgeseg_scode, seg->dcss_name, &dummy, &dummy); - goto out_resource; + goto out_mapping; } seg->start_addr = start_addr; seg->end = end_addr; @@ -377,11 +377,12 @@ __segment_load (char *name, int do_nonshared, unsigned long *addr, unsigned long (void*) seg->end, segtype_string[seg->vm_segtype]); } goto out; + out_mapping: + vmem_remove_mapping(seg->start_addr, seg->end - seg->start_addr + 1); out_resource: release_resource(seg->res); + out_free_resource: kfree(seg->res); - out_shared: - vmem_remove_mapping(seg->start_addr, seg->end - seg->start_addr + 1); out_free: kfree(seg); out: @@ -400,8 +401,7 @@ __segment_load (char *name, int do_nonshared, unsigned long *addr, unsigned long * -EIO : could not perform query or load diagnose * -ENOENT : no such segment * -EOPNOTSUPP: multi-part segment cannot be used with linux - * -ENOSPC : segment cannot be used (overlaps with storage) - * -EBUSY : segment can temporarily not be used (overlaps with dcss) + * -EBUSY : segment cannot be used (overlaps with dcss or storage) * -ERANGE : segment cannot be used (exceeds kernel mapping range) * -EPERM : segment is currently loaded with incompatible permissions * -ENOMEM : out of memory @@ -626,10 +626,6 @@ void segment_warning(int rc, char *seg_name) pr_err("DCSS %s has multiple page ranges and cannot be " "loaded or queried\n", seg_name); break; - case -ENOSPC: - pr_err("DCSS %s overlaps with used storage and cannot " - "be loaded\n", seg_name); - break; case -EBUSY: pr_err("%s needs used memory resources and cannot be " "loaded or queried\n", seg_name); diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c index dedc28be27ab..996884dcc9fd 100644 --- a/arch/s390/mm/fault.c +++ b/arch/s390/mm/fault.c @@ -33,7 +33,6 @@ #include <linux/hugetlb.h> #include <asm/asm-offsets.h> #include <asm/diag.h> -#include <asm/pgtable.h> #include <asm/gmap.h> #include <asm/irq.h> #include <asm/mmu_context.h> @@ -106,7 +105,7 @@ static int bad_address(void *p) { unsigned long dummy; - return probe_kernel_address((unsigned long *)p, dummy); + return get_kernel_nofault(dummy, (unsigned long *)p); } static void dump_pagetable(unsigned long asce, unsigned long address) @@ -256,10 +255,8 @@ static noinline void do_no_context(struct pt_regs *regs) /* Are we prepared to handle this kernel fault? */ fixup = s390_search_extables(regs->psw.addr); - if (fixup) { - regs->psw.addr = extable_fixup(fixup); + if (fixup && ex_handle(fixup, regs)) return; - } /* * Oops. The kernel tried to access some bad page. We'll have to @@ -377,7 +374,7 @@ static noinline void do_fault_error(struct pt_regs *regs, int access, * routines. * * interruption code (int_code): - * 04 Protection -> Write-Protection (suprression) + * 04 Protection -> Write-Protection (suppression) * 10 Segment translation -> Not present (nullification) * 11 Page translation -> Not present (nullification) * 3b Region third trans. -> Not present (nullification) @@ -434,7 +431,7 @@ static inline vm_fault_t do_exception(struct pt_regs *regs, int access) flags |= FAULT_FLAG_USER; if (access == VM_WRITE || (trans_exc_code & store_indication) == 0x400) flags |= FAULT_FLAG_WRITE; - down_read(&mm->mmap_sem); + mmap_read_lock(mm); gmap = NULL; if (IS_ENABLED(CONFIG_PGSTE) && type == GMAP_FAULT) { @@ -479,7 +476,7 @@ retry: * make sure we exit gracefully rather than endlessly redo * the fault. */ - fault = handle_mm_fault(vma, address, flags); + fault = handle_mm_fault(vma, address, flags, regs); if (fault_signal_pending(fault, regs)) { fault = VM_FAULT_SIGNAL; if (flags & FAULT_FLAG_RETRY_NOWAIT) @@ -489,33 +486,19 @@ retry: if (unlikely(fault & VM_FAULT_ERROR)) goto out_up; - /* - * Major/minor page fault accounting is only done on the - * initial attempt. If we go through a retry, it is extremely - * likely that the page will be found in page cache at that point. - */ if (flags & FAULT_FLAG_ALLOW_RETRY) { - if (fault & VM_FAULT_MAJOR) { - tsk->maj_flt++; - perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, - regs, address); - } else { - tsk->min_flt++; - perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, - regs, address); - } if (fault & VM_FAULT_RETRY) { if (IS_ENABLED(CONFIG_PGSTE) && gmap && (flags & FAULT_FLAG_RETRY_NOWAIT)) { /* FAULT_FLAG_RETRY_NOWAIT has been set, - * mmap_sem has not been released */ + * mmap_lock has not been released */ current->thread.gmap_pfault = 1; fault = VM_FAULT_PFAULT; goto out_up; } flags &= ~FAULT_FLAG_RETRY_NOWAIT; flags |= FAULT_FLAG_TRIED; - down_read(&mm->mmap_sem); + mmap_read_lock(mm); goto retry; } } @@ -533,7 +516,7 @@ retry: } fault = 0; out_up: - up_read(&mm->mmap_sem); + mmap_read_unlock(mm); out: return fault; } @@ -825,22 +808,22 @@ void do_secure_storage_access(struct pt_regs *regs) switch (get_fault_type(regs)) { case USER_FAULT: mm = current->mm; - down_read(&mm->mmap_sem); + mmap_read_lock(mm); vma = find_vma(mm, addr); if (!vma) { - up_read(&mm->mmap_sem); + mmap_read_unlock(mm); do_fault_error(regs, VM_READ | VM_WRITE, VM_FAULT_BADMAP); break; } page = follow_page(vma, addr, FOLL_WRITE | FOLL_GET); if (IS_ERR_OR_NULL(page)) { - up_read(&mm->mmap_sem); + mmap_read_unlock(mm); break; } if (arch_make_page_accessible(page)) send_sig(SIGSEGV, current, 0); put_page(page); - up_read(&mm->mmap_sem); + mmap_read_unlock(mm); break; case KERNEL_FAULT: page = phys_to_page(addr); @@ -876,6 +859,21 @@ void do_non_secure_storage_access(struct pt_regs *regs) } NOKPROBE_SYMBOL(do_non_secure_storage_access); +void do_secure_storage_violation(struct pt_regs *regs) +{ + /* + * Either KVM messed up the secure guest mapping or the same + * page is mapped into multiple secure guests. + * + * This exception is only triggered when a guest 2 is running + * and can therefore never occur in kernel context. + */ + printk_ratelimited(KERN_WARNING + "Secure storage violation in task: %s, pid %d\n", + current->comm, current->pid); + send_sig(SIGSEGV, current, 0); +} + #else void do_secure_storage_access(struct pt_regs *regs) { @@ -886,4 +884,9 @@ void do_non_secure_storage_access(struct pt_regs *regs) { default_trap_handler(regs); } + +void do_secure_storage_violation(struct pt_regs *regs) +{ + default_trap_handler(regs); +} #endif diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c index 1a95d8809cc3..373542ca1113 100644 --- a/arch/s390/mm/gmap.c +++ b/arch/s390/mm/gmap.c @@ -17,8 +17,8 @@ #include <linux/swapops.h> #include <linux/ksm.h> #include <linux/mman.h> +#include <linux/pgtable.h> -#include <asm/pgtable.h> #include <asm/pgalloc.h> #include <asm/gmap.h> #include <asm/tlb.h> @@ -300,7 +300,7 @@ struct gmap *gmap_get_enabled(void) EXPORT_SYMBOL_GPL(gmap_get_enabled); /* - * gmap_alloc_table is assumed to be called with mmap_sem held + * gmap_alloc_table is assumed to be called with mmap_lock held */ static int gmap_alloc_table(struct gmap *gmap, unsigned long *table, unsigned long init, unsigned long gaddr) @@ -405,10 +405,10 @@ int gmap_unmap_segment(struct gmap *gmap, unsigned long to, unsigned long len) return -EINVAL; flush = 0; - down_write(&gmap->mm->mmap_sem); + mmap_write_lock(gmap->mm); for (off = 0; off < len; off += PMD_SIZE) flush |= __gmap_unmap_by_gaddr(gmap, to + off); - up_write(&gmap->mm->mmap_sem); + mmap_write_unlock(gmap->mm); if (flush) gmap_flush_tlb(gmap); return 0; @@ -438,7 +438,7 @@ int gmap_map_segment(struct gmap *gmap, unsigned long from, return -EINVAL; flush = 0; - down_write(&gmap->mm->mmap_sem); + mmap_write_lock(gmap->mm); for (off = 0; off < len; off += PMD_SIZE) { /* Remove old translation */ flush |= __gmap_unmap_by_gaddr(gmap, to + off); @@ -448,7 +448,7 @@ int gmap_map_segment(struct gmap *gmap, unsigned long from, (void *) from + off)) break; } - up_write(&gmap->mm->mmap_sem); + mmap_write_unlock(gmap->mm); if (flush) gmap_flush_tlb(gmap); if (off >= len) @@ -466,7 +466,7 @@ EXPORT_SYMBOL_GPL(gmap_map_segment); * Returns user space address which corresponds to the guest address or * -EFAULT if no such mapping exists. * This function does not establish potentially missing page table entries. - * The mmap_sem of the mm that belongs to the address space must be held + * The mmap_lock of the mm that belongs to the address space must be held * when this function gets called. * * Note: Can also be called for shadow gmaps. @@ -495,9 +495,9 @@ unsigned long gmap_translate(struct gmap *gmap, unsigned long gaddr) { unsigned long rc; - down_read(&gmap->mm->mmap_sem); + mmap_read_lock(gmap->mm); rc = __gmap_translate(gmap, gaddr); - up_read(&gmap->mm->mmap_sem); + mmap_read_unlock(gmap->mm); return rc; } EXPORT_SYMBOL_GPL(gmap_translate); @@ -534,7 +534,7 @@ static void gmap_pmdp_xchg(struct gmap *gmap, pmd_t *old, pmd_t new, * * Returns 0 on success, -ENOMEM for out of memory conditions, and -EFAULT * if the vm address is already mapped to a different guest segment. - * The mmap_sem of the mm that belongs to the address space must be held + * The mmap_lock of the mm that belongs to the address space must be held * when this function gets called. */ int __gmap_link(struct gmap *gmap, unsigned long gaddr, unsigned long vmaddr) @@ -640,7 +640,7 @@ int gmap_fault(struct gmap *gmap, unsigned long gaddr, int rc; bool unlocked; - down_read(&gmap->mm->mmap_sem); + mmap_read_lock(gmap->mm); retry: unlocked = false; @@ -649,13 +649,13 @@ retry: rc = vmaddr; goto out_up; } - if (fixup_user_fault(current, gmap->mm, vmaddr, fault_flags, + if (fixup_user_fault(gmap->mm, vmaddr, fault_flags, &unlocked)) { rc = -EFAULT; goto out_up; } /* - * In the case that fixup_user_fault unlocked the mmap_sem during + * In the case that fixup_user_fault unlocked the mmap_lock during * faultin redo __gmap_translate to not race with a map/unmap_segment. */ if (unlocked) @@ -663,13 +663,13 @@ retry: rc = __gmap_link(gmap, gaddr, vmaddr); out_up: - up_read(&gmap->mm->mmap_sem); + mmap_read_unlock(gmap->mm); return rc; } EXPORT_SYMBOL_GPL(gmap_fault); /* - * this function is assumed to be called with mmap_sem held + * this function is assumed to be called with mmap_lock held */ void __gmap_zap(struct gmap *gmap, unsigned long gaddr) { @@ -696,7 +696,7 @@ void gmap_discard(struct gmap *gmap, unsigned long from, unsigned long to) unsigned long gaddr, vmaddr, size; struct vm_area_struct *vma; - down_read(&gmap->mm->mmap_sem); + mmap_read_lock(gmap->mm); for (gaddr = from; gaddr < to; gaddr = (gaddr + PMD_SIZE) & PMD_MASK) { /* Find the vm address for the guest address */ @@ -719,7 +719,7 @@ void gmap_discard(struct gmap *gmap, unsigned long from, unsigned long to) size = min(to - gaddr, PMD_SIZE - (gaddr & ~PMD_MASK)); zap_page_range(vma, vmaddr, size); } - up_read(&gmap->mm->mmap_sem); + mmap_read_unlock(gmap->mm); } EXPORT_SYMBOL_GPL(gmap_discard); @@ -788,19 +788,19 @@ static inline unsigned long *gmap_table_walk(struct gmap *gmap, unsigned long gaddr, int level) { const int asce_type = gmap->asce & _ASCE_TYPE_MASK; - unsigned long *table; + unsigned long *table = gmap->table; - if ((gmap->asce & _ASCE_TYPE_MASK) + 4 < (level * 4)) - return NULL; if (gmap_is_shadow(gmap) && gmap->removed) return NULL; + if (WARN_ON_ONCE(level > (asce_type >> 2) + 1)) + return NULL; + if (asce_type != _ASCE_TYPE_REGION1 && gaddr & (-1UL << (31 + (asce_type >> 2) * 11))) return NULL; - table = gmap->table; - switch (gmap->asce & _ASCE_TYPE_MASK) { + switch (asce_type) { case _ASCE_TYPE_REGION1: table += (gaddr & _REGION1_INDEX) >> _REGION1_SHIFT; if (level == 4) @@ -879,10 +879,10 @@ static int gmap_pte_op_fixup(struct gmap *gmap, unsigned long gaddr, BUG_ON(gmap_is_shadow(gmap)); fault_flags = (prot == PROT_WRITE) ? FAULT_FLAG_WRITE : 0; - if (fixup_user_fault(current, mm, vmaddr, fault_flags, &unlocked)) + if (fixup_user_fault(mm, vmaddr, fault_flags, &unlocked)) return -EFAULT; if (unlocked) - /* lost mmap_sem, caller has to retry __gmap_translate */ + /* lost mmap_lock, caller has to retry __gmap_translate */ return 0; /* Connect the page tables */ return __gmap_link(gmap, gaddr, vmaddr); @@ -953,7 +953,7 @@ static inline void gmap_pmd_op_end(struct gmap *gmap, pmd_t *pmdp) * -EAGAIN if a fixup is needed * -EINVAL if unsupported notifier bits have been specified * - * Expected to be called with sg->mm->mmap_sem in read and + * Expected to be called with sg->mm->mmap_lock in read and * guest_table_lock held. */ static int gmap_protect_pmd(struct gmap *gmap, unsigned long gaddr, @@ -999,7 +999,7 @@ static int gmap_protect_pmd(struct gmap *gmap, unsigned long gaddr, * Returns 0 if successfully protected, -ENOMEM if out of memory and * -EAGAIN if a fixup is needed. * - * Expected to be called with sg->mm->mmap_sem in read + * Expected to be called with sg->mm->mmap_lock in read */ static int gmap_protect_pte(struct gmap *gmap, unsigned long gaddr, pmd_t *pmdp, int prot, unsigned long bits) @@ -1035,7 +1035,7 @@ static int gmap_protect_pte(struct gmap *gmap, unsigned long gaddr, * Returns 0 if successfully protected, -ENOMEM if out of memory and * -EFAULT if gaddr is invalid (or mapping for shadows is missing). * - * Called with sg->mm->mmap_sem in read. + * Called with sg->mm->mmap_lock in read. */ static int gmap_protect_range(struct gmap *gmap, unsigned long gaddr, unsigned long len, int prot, unsigned long bits) @@ -1106,9 +1106,9 @@ int gmap_mprotect_notify(struct gmap *gmap, unsigned long gaddr, return -EINVAL; if (!MACHINE_HAS_ESOP && prot == PROT_READ) return -EINVAL; - down_read(&gmap->mm->mmap_sem); + mmap_read_lock(gmap->mm); rc = gmap_protect_range(gmap, gaddr, len, prot, GMAP_NOTIFY_MPROT); - up_read(&gmap->mm->mmap_sem); + mmap_read_unlock(gmap->mm); return rc; } EXPORT_SYMBOL_GPL(gmap_mprotect_notify); @@ -1124,7 +1124,7 @@ EXPORT_SYMBOL_GPL(gmap_mprotect_notify); * if reading using the virtual address failed. -EINVAL if called on a gmap * shadow. * - * Called with gmap->mm->mmap_sem in read. + * Called with gmap->mm->mmap_lock in read. */ int gmap_read_table(struct gmap *gmap, unsigned long gaddr, unsigned long *val) { @@ -1696,11 +1696,11 @@ struct gmap *gmap_shadow(struct gmap *parent, unsigned long asce, } spin_unlock(&parent->shadow_lock); /* protect after insertion, so it will get properly invalidated */ - down_read(&parent->mm->mmap_sem); + mmap_read_lock(parent->mm); rc = gmap_protect_range(parent, asce & _ASCE_ORIGIN, ((asce & _ASCE_TABLE_LENGTH) + 1) * PAGE_SIZE, PROT_READ, GMAP_NOTIFY_SHADOW); - up_read(&parent->mm->mmap_sem); + mmap_read_unlock(parent->mm); spin_lock(&parent->shadow_lock); new->initialized = true; if (rc) { @@ -1729,7 +1729,7 @@ EXPORT_SYMBOL_GPL(gmap_shadow); * shadow table structure is incomplete, -ENOMEM if out of memory and * -EFAULT if an address in the parent gmap could not be resolved. * - * Called with sg->mm->mmap_sem in read. + * Called with sg->mm->mmap_lock in read. */ int gmap_shadow_r2t(struct gmap *sg, unsigned long saddr, unsigned long r2t, int fake) @@ -1813,7 +1813,7 @@ EXPORT_SYMBOL_GPL(gmap_shadow_r2t); * shadow table structure is incomplete, -ENOMEM if out of memory and * -EFAULT if an address in the parent gmap could not be resolved. * - * Called with sg->mm->mmap_sem in read. + * Called with sg->mm->mmap_lock in read. */ int gmap_shadow_r3t(struct gmap *sg, unsigned long saddr, unsigned long r3t, int fake) @@ -1897,7 +1897,7 @@ EXPORT_SYMBOL_GPL(gmap_shadow_r3t); * shadow table structure is incomplete, -ENOMEM if out of memory and * -EFAULT if an address in the parent gmap could not be resolved. * - * Called with sg->mm->mmap_sem in read. + * Called with sg->mm->mmap_lock in read. */ int gmap_shadow_sgt(struct gmap *sg, unsigned long saddr, unsigned long sgt, int fake) @@ -1981,7 +1981,7 @@ EXPORT_SYMBOL_GPL(gmap_shadow_sgt); * Returns 0 if the shadow page table was found and -EAGAIN if the page * table was not found. * - * Called with sg->mm->mmap_sem in read. + * Called with sg->mm->mmap_lock in read. */ int gmap_shadow_pgt_lookup(struct gmap *sg, unsigned long saddr, unsigned long *pgt, int *dat_protection, @@ -2021,7 +2021,7 @@ EXPORT_SYMBOL_GPL(gmap_shadow_pgt_lookup); * shadow table structure is incomplete, -ENOMEM if out of memory, * -EFAULT if an address in the parent gmap could not be resolved and * - * Called with gmap->mm->mmap_sem in read + * Called with gmap->mm->mmap_lock in read */ int gmap_shadow_pgt(struct gmap *sg, unsigned long saddr, unsigned long pgt, int fake) @@ -2100,7 +2100,7 @@ EXPORT_SYMBOL_GPL(gmap_shadow_pgt); * shadow table structure is incomplete, -ENOMEM if out of memory and * -EFAULT if an address in the parent gmap could not be resolved. * - * Called with sg->mm->mmap_sem in read. + * Called with sg->mm->mmap_lock in read. */ int gmap_shadow_page(struct gmap *sg, unsigned long saddr, pte_t pte) { @@ -2485,23 +2485,36 @@ void gmap_sync_dirty_log_pmd(struct gmap *gmap, unsigned long bitmap[4], } EXPORT_SYMBOL_GPL(gmap_sync_dirty_log_pmd); +#ifdef CONFIG_TRANSPARENT_HUGEPAGE +static int thp_split_walk_pmd_entry(pmd_t *pmd, unsigned long addr, + unsigned long end, struct mm_walk *walk) +{ + struct vm_area_struct *vma = walk->vma; + + split_huge_pmd(vma, pmd, addr); + return 0; +} + +static const struct mm_walk_ops thp_split_walk_ops = { + .pmd_entry = thp_split_walk_pmd_entry, +}; + static inline void thp_split_mm(struct mm_struct *mm) { -#ifdef CONFIG_TRANSPARENT_HUGEPAGE struct vm_area_struct *vma; - unsigned long addr; for (vma = mm->mmap; vma != NULL; vma = vma->vm_next) { - for (addr = vma->vm_start; - addr < vma->vm_end; - addr += PAGE_SIZE) - follow_page(vma, addr, FOLL_SPLIT); vma->vm_flags &= ~VM_HUGEPAGE; vma->vm_flags |= VM_NOHUGEPAGE; + walk_page_vma(vma, &thp_split_walk_ops, NULL); } mm->def_flags |= VM_NOHUGEPAGE; -#endif } +#else +static inline void thp_split_mm(struct mm_struct *mm) +{ +} +#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ /* * Remove all empty zero pages from the mapping for lazy refaulting @@ -2543,12 +2556,12 @@ int s390_enable_sie(void) /* Fail if the page tables are 2K */ if (!mm_alloc_pgste(mm)) return -EINVAL; - down_write(&mm->mmap_sem); + mmap_write_lock(mm); mm->context.has_pgste = 1; /* split thp mappings and disable thp for future mappings */ thp_split_mm(mm); walk_page_range(mm, 0, TASK_SIZE, &zap_zero_walk_ops, NULL); - up_write(&mm->mmap_sem); + mmap_write_unlock(mm); return 0; } EXPORT_SYMBOL_GPL(s390_enable_sie); @@ -2617,7 +2630,7 @@ int s390_enable_skey(void) struct mm_struct *mm = current->mm; int rc = 0; - down_write(&mm->mmap_sem); + mmap_write_lock(mm); if (mm_uses_skeys(mm)) goto out_up; @@ -2630,7 +2643,7 @@ int s390_enable_skey(void) walk_page_range(mm, 0, TASK_SIZE, &enable_skey_walk_ops, NULL); out_up: - up_write(&mm->mmap_sem); + mmap_write_unlock(mm); return rc; } EXPORT_SYMBOL_GPL(s390_enable_skey); @@ -2651,9 +2664,9 @@ static const struct mm_walk_ops reset_cmma_walk_ops = { void s390_reset_cmma(struct mm_struct *mm) { - down_write(&mm->mmap_sem); + mmap_write_lock(mm); walk_page_range(mm, 0, TASK_SIZE, &reset_cmma_walk_ops, NULL); - up_write(&mm->mmap_sem); + mmap_write_unlock(mm); } EXPORT_SYMBOL_GPL(s390_reset_cmma); @@ -2685,9 +2698,9 @@ void s390_reset_acc(struct mm_struct *mm) */ if (!mmget_not_zero(mm)) return; - down_read(&mm->mmap_sem); + mmap_read_lock(mm); walk_page_range(mm, 0, TASK_SIZE, &reset_acc_walk_ops, NULL); - up_read(&mm->mmap_sem); + mmap_read_unlock(mm); mmput(mm); } EXPORT_SYMBOL_GPL(s390_reset_acc); diff --git a/arch/s390/mm/hugetlbpage.c b/arch/s390/mm/hugetlbpage.c index 4632d4e26b66..3b5a4d25ca9b 100644 --- a/arch/s390/mm/hugetlbpage.c +++ b/arch/s390/mm/hugetlbpage.c @@ -117,7 +117,7 @@ static inline pte_t __rste_to_pte(unsigned long rste) _PAGE_YOUNG); #ifdef CONFIG_MEM_SOFT_DIRTY pte_val(pte) |= move_set_bit(rste, _SEGMENT_ENTRY_SOFT_DIRTY, - _PAGE_DIRTY); + _PAGE_SOFT_DIRTY); #endif pte_val(pte) |= move_set_bit(rste, _SEGMENT_ENTRY_NOEXEC, _PAGE_NOEXEC); @@ -254,25 +254,15 @@ follow_huge_pud(struct mm_struct *mm, unsigned long address, return pud_page(*pud) + ((address & ~PUD_MASK) >> PAGE_SHIFT); } -static __init int setup_hugepagesz(char *opt) +bool __init arch_hugetlb_valid_size(unsigned long size) { - unsigned long size; - char *string = opt; - - size = memparse(opt, &opt); - if (MACHINE_HAS_EDAT1 && size == PMD_SIZE) { - hugetlb_add_hstate(PMD_SHIFT - PAGE_SHIFT); - } else if (MACHINE_HAS_EDAT2 && size == PUD_SIZE) { - hugetlb_add_hstate(PUD_SHIFT - PAGE_SHIFT); - } else { - hugetlb_bad_size(); - pr_err("hugepagesz= specifies an unsupported page size %s\n", - string); - return 0; - } - return 1; + if (MACHINE_HAS_EDAT1 && size == PMD_SIZE) + return true; + else if (MACHINE_HAS_EDAT2 && size == PUD_SIZE) + return true; + else + return false; } -__setup("hugepagesz=", setup_hugepagesz); static unsigned long hugetlb_get_unmapped_area_bottomup(struct file *file, unsigned long addr, unsigned long len, diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c index 87b2d024e75a..0d282081dc1f 100644 --- a/arch/s390/mm/init.c +++ b/arch/s390/mm/init.c @@ -33,7 +33,6 @@ #include <linux/dma-direct.h> #include <asm/processor.h> #include <linux/uaccess.h> -#include <asm/pgtable.h> #include <asm/pgalloc.h> #include <asm/dma.h> #include <asm/lowcore.h> @@ -116,13 +115,12 @@ void __init paging_init(void) __load_psw_mask(psw.mask); kasan_free_early_identity(); - sparse_memory_present_with_active_regions(MAX_NUMNODES); sparse_init(); zone_dma_bits = 31; memset(max_zone_pfns, 0, sizeof(max_zone_pfns)); max_zone_pfns[ZONE_DMA] = PFN_DOWN(MAX_DMA_ADDRESS); max_zone_pfns[ZONE_NORMAL] = max_low_pfn; - free_area_init_nodes(max_zone_pfns); + free_area_init(max_zone_pfns); } void mark_rodata_ro(void) diff --git a/arch/s390/mm/kasan_init.c b/arch/s390/mm/kasan_init.c index 06345616a646..99dd1c63a065 100644 --- a/arch/s390/mm/kasan_init.c +++ b/arch/s390/mm/kasan_init.c @@ -2,8 +2,8 @@ #include <linux/kasan.h> #include <linux/sched/task.h> #include <linux/memblock.h> +#include <linux/pgtable.h> #include <asm/pgalloc.h> -#include <asm/pgtable.h> #include <asm/kasan.h> #include <asm/mem_detect.h> #include <asm/processor.h> diff --git a/arch/s390/mm/maccess.c b/arch/s390/mm/maccess.c index de7ca4b6718f..1d17413b319a 100644 --- a/arch/s390/mm/maccess.c +++ b/arch/s390/mm/maccess.c @@ -55,19 +55,26 @@ static notrace long s390_kernel_write_odd(void *dst, const void *src, size_t siz */ static DEFINE_SPINLOCK(s390_kernel_write_lock); -void notrace s390_kernel_write(void *dst, const void *src, size_t size) +notrace void *s390_kernel_write(void *dst, const void *src, size_t size) { + void *tmp = dst; unsigned long flags; long copied; spin_lock_irqsave(&s390_kernel_write_lock, flags); - while (size) { - copied = s390_kernel_write_odd(dst, src, size); - dst += copied; - src += copied; - size -= copied; + if (!(flags & PSW_MASK_DAT)) { + memcpy(dst, src, size); + } else { + while (size) { + copied = s390_kernel_write_odd(tmp, src, size); + tmp += copied; + src += copied; + size -= copied; + } } spin_unlock_irqrestore(&s390_kernel_write_lock, flags); + + return dst; } static int __no_sanitize_address __memcpy_real(void *dest, void *src, size_t count) diff --git a/arch/s390/mm/mmap.c b/arch/s390/mm/mmap.c index 1b78f630a9ca..e54f928503c5 100644 --- a/arch/s390/mm/mmap.c +++ b/arch/s390/mm/mmap.c @@ -17,7 +17,6 @@ #include <linux/random.h> #include <linux/compat.h> #include <linux/security.h> -#include <asm/pgalloc.h> #include <asm/elf.h> static unsigned long stack_maxrandom_size(void) diff --git a/arch/s390/mm/pageattr.c b/arch/s390/mm/pageattr.c index e22c06d5f206..c5c52ec2b46f 100644 --- a/arch/s390/mm/pageattr.c +++ b/arch/s390/mm/pageattr.c @@ -7,7 +7,6 @@ #include <linux/mm.h> #include <asm/cacheflush.h> #include <asm/facility.h> -#include <asm/pgtable.h> #include <asm/pgalloc.h> #include <asm/page.h> #include <asm/set_memory.h> @@ -86,7 +85,7 @@ static int walk_pte_level(pmd_t *pmdp, unsigned long addr, unsigned long end, { pte_t *ptep, new; - ptep = pte_offset(pmdp, addr); + ptep = pte_offset_kernel(pmdp, addr); do { new = *ptep; if (pte_none(new)) @@ -338,19 +337,11 @@ void __kernel_map_pages(struct page *page, int numpages, int enable) { unsigned long address; int nr, i, j; - pgd_t *pgd; - p4d_t *p4d; - pud_t *pud; - pmd_t *pmd; pte_t *pte; for (i = 0; i < numpages;) { address = page_to_phys(page + i); - pgd = pgd_offset_k(address); - p4d = p4d_offset(pgd, address); - pud = pud_offset(p4d, address); - pmd = pmd_offset(pud, address); - pte = pte_offset_kernel(pmd, address); + pte = virt_to_kpte(address); nr = (unsigned long)pte >> ilog2(sizeof(long)); nr = PTRS_PER_PTE - (nr & (PTRS_PER_PTE - 1)); nr = min(numpages - i, nr); diff --git a/arch/s390/mm/pgalloc.c b/arch/s390/mm/pgalloc.c index fff169d64711..11d2c8395e2a 100644 --- a/arch/s390/mm/pgalloc.c +++ b/arch/s390/mm/pgalloc.c @@ -114,7 +114,7 @@ int crst_table_upgrade(struct mm_struct *mm, unsigned long end) spin_lock_bh(&mm->page_table_lock); /* - * This routine gets called with mmap_sem lock held and there is + * This routine gets called with mmap_lock lock held and there is * no reason to optimize for the case of otherwise. However, if * that would ever change, the below check will let us know. */ diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index 9ebd01219812..0d25f743b270 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -19,8 +19,6 @@ #include <linux/ksm.h> #include <linux/mman.h> -#include <asm/pgtable.h> -#include <asm/pgalloc.h> #include <asm/tlb.h> #include <asm/tlbflush.h> #include <asm/mmu_context.h> diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c index f810930aff42..eddf71c22875 100644 --- a/arch/s390/mm/vmem.c +++ b/arch/s390/mm/vmem.c @@ -13,7 +13,6 @@ #include <linux/slab.h> #include <asm/cacheflush.h> #include <asm/pgalloc.h> -#include <asm/pgtable.h> #include <asm/setup.h> #include <asm/tlbflush.h> #include <asm/sections.h> @@ -21,14 +20,6 @@ static DEFINE_MUTEX(vmem_mutex); -struct memory_segment { - struct list_head list; - unsigned long start; - unsigned long size; -}; - -static LIST_HEAD(mem_segs); - static void __ref *vmem_alloc_pages(unsigned int order) { unsigned long size = PAGE_SIZE << order; @@ -38,6 +29,15 @@ static void __ref *vmem_alloc_pages(unsigned int order) return (void *) memblock_phys_alloc(size, size); } +static void vmem_free_pages(unsigned long addr, int order) +{ + /* We don't expect boot memory to be removed ever. */ + if (!slab_is_available() || + WARN_ON_ONCE(PageReserved(phys_to_page(addr)))) + return; + free_pages(addr, order); +} + void *vmem_crst_alloc(unsigned long val) { unsigned long *table; @@ -63,332 +63,487 @@ pte_t __ref *vmem_pte_alloc(void) return pte; } +static void vmem_pte_free(unsigned long *table) +{ + /* We don't expect boot memory to be removed ever. */ + if (!slab_is_available() || + WARN_ON_ONCE(PageReserved(virt_to_page(table)))) + return; + page_table_free(&init_mm, table); +} + +#define PAGE_UNUSED 0xFD + /* - * Add a physical memory range to the 1:1 mapping. + * The unused vmemmap range, which was not yet memset(PAGE_UNUSED) ranges + * from unused_pmd_start to next PMD_SIZE boundary. */ -static int vmem_add_mem(unsigned long start, unsigned long size) +static unsigned long unused_pmd_start; + +static void vmemmap_flush_unused_pmd(void) { - unsigned long pgt_prot, sgt_prot, r3_prot; - unsigned long pages4k, pages1m, pages2g; - unsigned long end = start + size; - unsigned long address = start; - pgd_t *pg_dir; - p4d_t *p4_dir; - pud_t *pu_dir; - pmd_t *pm_dir; - pte_t *pt_dir; - int ret = -ENOMEM; + if (!unused_pmd_start) + return; + memset(__va(unused_pmd_start), PAGE_UNUSED, + ALIGN(unused_pmd_start, PMD_SIZE) - unused_pmd_start); + unused_pmd_start = 0; +} + +static void __vmemmap_use_sub_pmd(unsigned long start, unsigned long end) +{ + /* + * As we expect to add in the same granularity as we remove, it's + * sufficient to mark only some piece used to block the memmap page from + * getting removed (just in case the memmap never gets initialized, + * e.g., because the memory block never gets onlined). + */ + memset(__va(start), 0, sizeof(struct page)); +} - pgt_prot = pgprot_val(PAGE_KERNEL); - sgt_prot = pgprot_val(SEGMENT_KERNEL); - r3_prot = pgprot_val(REGION3_KERNEL); - if (!MACHINE_HAS_NX) { - pgt_prot &= ~_PAGE_NOEXEC; - sgt_prot &= ~_SEGMENT_ENTRY_NOEXEC; - r3_prot &= ~_REGION_ENTRY_NOEXEC; +static void vmemmap_use_sub_pmd(unsigned long start, unsigned long end) +{ + /* + * We only optimize if the new used range directly follows the + * previously unused range (esp., when populating consecutive sections). + */ + if (unused_pmd_start == start) { + unused_pmd_start = end; + if (likely(IS_ALIGNED(unused_pmd_start, PMD_SIZE))) + unused_pmd_start = 0; + return; } - pages4k = pages1m = pages2g = 0; - while (address < end) { - pg_dir = pgd_offset_k(address); - if (pgd_none(*pg_dir)) { - p4_dir = vmem_crst_alloc(_REGION2_ENTRY_EMPTY); - if (!p4_dir) - goto out; - pgd_populate(&init_mm, pg_dir, p4_dir); - } - p4_dir = p4d_offset(pg_dir, address); - if (p4d_none(*p4_dir)) { - pu_dir = vmem_crst_alloc(_REGION3_ENTRY_EMPTY); - if (!pu_dir) - goto out; - p4d_populate(&init_mm, p4_dir, pu_dir); - } - pu_dir = pud_offset(p4_dir, address); - if (MACHINE_HAS_EDAT2 && pud_none(*pu_dir) && address && - !(address & ~PUD_MASK) && (address + PUD_SIZE <= end) && - !debug_pagealloc_enabled()) { - pud_val(*pu_dir) = address | r3_prot; - address += PUD_SIZE; - pages2g++; - continue; - } - if (pud_none(*pu_dir)) { - pm_dir = vmem_crst_alloc(_SEGMENT_ENTRY_EMPTY); - if (!pm_dir) - goto out; - pud_populate(&init_mm, pu_dir, pm_dir); - } - pm_dir = pmd_offset(pu_dir, address); - if (MACHINE_HAS_EDAT1 && pmd_none(*pm_dir) && address && - !(address & ~PMD_MASK) && (address + PMD_SIZE <= end) && - !debug_pagealloc_enabled()) { - pmd_val(*pm_dir) = address | sgt_prot; - address += PMD_SIZE; - pages1m++; + vmemmap_flush_unused_pmd(); + __vmemmap_use_sub_pmd(start, end); +} + +static void vmemmap_use_new_sub_pmd(unsigned long start, unsigned long end) +{ + void *page = __va(ALIGN_DOWN(start, PMD_SIZE)); + + vmemmap_flush_unused_pmd(); + + /* Could be our memmap page is filled with PAGE_UNUSED already ... */ + __vmemmap_use_sub_pmd(start, end); + + /* Mark the unused parts of the new memmap page PAGE_UNUSED. */ + if (!IS_ALIGNED(start, PMD_SIZE)) + memset(page, PAGE_UNUSED, start - __pa(page)); + /* + * We want to avoid memset(PAGE_UNUSED) when populating the vmemmap of + * consecutive sections. Remember for the last added PMD the last + * unused range in the populated PMD. + */ + if (!IS_ALIGNED(end, PMD_SIZE)) + unused_pmd_start = end; +} + +/* Returns true if the PMD is completely unused and can be freed. */ +static bool vmemmap_unuse_sub_pmd(unsigned long start, unsigned long end) +{ + void *page = __va(ALIGN_DOWN(start, PMD_SIZE)); + + vmemmap_flush_unused_pmd(); + memset(__va(start), PAGE_UNUSED, end - start); + return !memchr_inv(page, PAGE_UNUSED, PMD_SIZE); +} + +/* __ref: we'll only call vmemmap_alloc_block() via vmemmap_populate() */ +static int __ref modify_pte_table(pmd_t *pmd, unsigned long addr, + unsigned long end, bool add, bool direct) +{ + unsigned long prot, pages = 0; + int ret = -ENOMEM; + pte_t *pte; + + prot = pgprot_val(PAGE_KERNEL); + if (!MACHINE_HAS_NX) + prot &= ~_PAGE_NOEXEC; + + pte = pte_offset_kernel(pmd, addr); + for (; addr < end; addr += PAGE_SIZE, pte++) { + if (!add) { + if (pte_none(*pte)) + continue; + if (!direct) + vmem_free_pages(pfn_to_phys(pte_pfn(*pte)), 0); + pte_clear(&init_mm, addr, pte); + } else if (pte_none(*pte)) { + if (!direct) { + void *new_page = vmemmap_alloc_block(PAGE_SIZE, NUMA_NO_NODE); + + if (!new_page) + goto out; + pte_val(*pte) = __pa(new_page) | prot; + } else { + pte_val(*pte) = addr | prot; + } + } else { continue; } - if (pmd_none(*pm_dir)) { - pt_dir = vmem_pte_alloc(); - if (!pt_dir) - goto out; - pmd_populate(&init_mm, pm_dir, pt_dir); - } - - pt_dir = pte_offset_kernel(pm_dir, address); - pte_val(*pt_dir) = address | pgt_prot; - address += PAGE_SIZE; - pages4k++; + pages++; } ret = 0; out: - update_page_count(PG_DIRECT_MAP_4K, pages4k); - update_page_count(PG_DIRECT_MAP_1M, pages1m); - update_page_count(PG_DIRECT_MAP_2G, pages2g); + if (direct) + update_page_count(PG_DIRECT_MAP_4K, add ? pages : -pages); return ret; } -/* - * Remove a physical memory range from the 1:1 mapping. - * Currently only invalidates page table entries. - */ -static void vmem_remove_range(unsigned long start, unsigned long size) +static void try_free_pte_table(pmd_t *pmd, unsigned long start) { - unsigned long pages4k, pages1m, pages2g; - unsigned long end = start + size; - unsigned long address = start; - pgd_t *pg_dir; - p4d_t *p4_dir; - pud_t *pu_dir; - pmd_t *pm_dir; - pte_t *pt_dir; - - pages4k = pages1m = pages2g = 0; - while (address < end) { - pg_dir = pgd_offset_k(address); - if (pgd_none(*pg_dir)) { - address += PGDIR_SIZE; - continue; - } - p4_dir = p4d_offset(pg_dir, address); - if (p4d_none(*p4_dir)) { - address += P4D_SIZE; - continue; - } - pu_dir = pud_offset(p4_dir, address); - if (pud_none(*pu_dir)) { - address += PUD_SIZE; - continue; - } - if (pud_large(*pu_dir)) { - pud_clear(pu_dir); - address += PUD_SIZE; - pages2g++; - continue; - } - pm_dir = pmd_offset(pu_dir, address); - if (pmd_none(*pm_dir)) { - address += PMD_SIZE; - continue; - } - if (pmd_large(*pm_dir)) { - pmd_clear(pm_dir); - address += PMD_SIZE; - pages1m++; - continue; - } - pt_dir = pte_offset_kernel(pm_dir, address); - pte_clear(&init_mm, address, pt_dir); - address += PAGE_SIZE; - pages4k++; + pte_t *pte; + int i; + + /* We can safely assume this is fully in 1:1 mapping & vmemmap area */ + pte = pte_offset_kernel(pmd, start); + for (i = 0; i < PTRS_PER_PTE; i++, pte++) { + if (!pte_none(*pte)) + return; } - flush_tlb_kernel_range(start, end); - update_page_count(PG_DIRECT_MAP_4K, -pages4k); - update_page_count(PG_DIRECT_MAP_1M, -pages1m); - update_page_count(PG_DIRECT_MAP_2G, -pages2g); + vmem_pte_free(__va(pmd_deref(*pmd))); + pmd_clear(pmd); } -/* - * Add a backed mem_map array to the virtual mem_map array. - */ -int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node, - struct vmem_altmap *altmap) +/* __ref: we'll only call vmemmap_alloc_block() via vmemmap_populate() */ +static int __ref modify_pmd_table(pud_t *pud, unsigned long addr, + unsigned long end, bool add, bool direct) { - unsigned long pgt_prot, sgt_prot; - unsigned long address = start; - pgd_t *pg_dir; - p4d_t *p4_dir; - pud_t *pu_dir; - pmd_t *pm_dir; - pte_t *pt_dir; + unsigned long next, prot, pages = 0; int ret = -ENOMEM; + pmd_t *pmd; + pte_t *pte; - pgt_prot = pgprot_val(PAGE_KERNEL); - sgt_prot = pgprot_val(SEGMENT_KERNEL); - if (!MACHINE_HAS_NX) { - pgt_prot &= ~_PAGE_NOEXEC; - sgt_prot &= ~_SEGMENT_ENTRY_NOEXEC; - } - for (address = start; address < end;) { - pg_dir = pgd_offset_k(address); - if (pgd_none(*pg_dir)) { - p4_dir = vmem_crst_alloc(_REGION2_ENTRY_EMPTY); - if (!p4_dir) - goto out; - pgd_populate(&init_mm, pg_dir, p4_dir); - } + prot = pgprot_val(SEGMENT_KERNEL); + if (!MACHINE_HAS_NX) + prot &= ~_SEGMENT_ENTRY_NOEXEC; - p4_dir = p4d_offset(pg_dir, address); - if (p4d_none(*p4_dir)) { - pu_dir = vmem_crst_alloc(_REGION3_ENTRY_EMPTY); - if (!pu_dir) - goto out; - p4d_populate(&init_mm, p4_dir, pu_dir); - } + pmd = pmd_offset(pud, addr); + for (; addr < end; addr = next, pmd++) { + next = pmd_addr_end(addr, end); + if (!add) { + if (pmd_none(*pmd)) + continue; + if (pmd_large(*pmd) && !add) { + if (IS_ALIGNED(addr, PMD_SIZE) && + IS_ALIGNED(next, PMD_SIZE)) { + if (!direct) + vmem_free_pages(pmd_deref(*pmd), get_order(PMD_SIZE)); + pmd_clear(pmd); + pages++; + } else if (!direct && vmemmap_unuse_sub_pmd(addr, next)) { + vmem_free_pages(pmd_deref(*pmd), get_order(PMD_SIZE)); + pmd_clear(pmd); + } + continue; + } + } else if (pmd_none(*pmd)) { + if (IS_ALIGNED(addr, PMD_SIZE) && + IS_ALIGNED(next, PMD_SIZE) && + MACHINE_HAS_EDAT1 && addr && direct && + !debug_pagealloc_enabled()) { + pmd_val(*pmd) = addr | prot; + pages++; + continue; + } else if (!direct && MACHINE_HAS_EDAT1) { + void *new_page; - pu_dir = pud_offset(p4_dir, address); - if (pud_none(*pu_dir)) { - pm_dir = vmem_crst_alloc(_SEGMENT_ENTRY_EMPTY); - if (!pm_dir) + /* + * Use 1MB frames for vmemmap if available. We + * always use large frames even if they are only + * partially used. Otherwise we would have also + * page tables since vmemmap_populate gets + * called for each section separately. + */ + new_page = vmemmap_alloc_block(PMD_SIZE, NUMA_NO_NODE); + if (new_page) { + pmd_val(*pmd) = __pa(new_page) | prot; + if (!IS_ALIGNED(addr, PMD_SIZE) || + !IS_ALIGNED(next, PMD_SIZE)) { + vmemmap_use_new_sub_pmd(addr, next); + } + continue; + } + } + pte = vmem_pte_alloc(); + if (!pte) goto out; - pud_populate(&init_mm, pu_dir, pm_dir); + pmd_populate(&init_mm, pmd, pte); + } else if (pmd_large(*pmd)) { + if (!direct) + vmemmap_use_sub_pmd(addr, next); + continue; } + ret = modify_pte_table(pmd, addr, next, add, direct); + if (ret) + goto out; + if (!add) + try_free_pte_table(pmd, addr & PMD_MASK); + } + ret = 0; +out: + if (direct) + update_page_count(PG_DIRECT_MAP_1M, add ? pages : -pages); + return ret; +} - pm_dir = pmd_offset(pu_dir, address); - if (pmd_none(*pm_dir)) { - /* Use 1MB frames for vmemmap if available. We always - * use large frames even if they are only partially - * used. - * Otherwise we would have also page tables since - * vmemmap_populate gets called for each section - * separately. */ - if (MACHINE_HAS_EDAT1) { - void *new_page; +static void try_free_pmd_table(pud_t *pud, unsigned long start) +{ + const unsigned long end = start + PUD_SIZE; + pmd_t *pmd; + int i; + + /* Don't mess with any tables not fully in 1:1 mapping & vmemmap area */ + if (end > VMALLOC_START) + return; +#ifdef CONFIG_KASAN + if (start < KASAN_SHADOW_END && KASAN_SHADOW_START > end) + return; +#endif + pmd = pmd_offset(pud, start); + for (i = 0; i < PTRS_PER_PMD; i++, pmd++) + if (!pmd_none(*pmd)) + return; + vmem_free_pages(pud_deref(*pud), CRST_ALLOC_ORDER); + pud_clear(pud); +} - new_page = vmemmap_alloc_block(PMD_SIZE, node); - if (!new_page) - goto out; - pmd_val(*pm_dir) = __pa(new_page) | sgt_prot; - address = (address + PMD_SIZE) & PMD_MASK; +static int modify_pud_table(p4d_t *p4d, unsigned long addr, unsigned long end, + bool add, bool direct) +{ + unsigned long next, prot, pages = 0; + int ret = -ENOMEM; + pud_t *pud; + pmd_t *pmd; + + prot = pgprot_val(REGION3_KERNEL); + if (!MACHINE_HAS_NX) + prot &= ~_REGION_ENTRY_NOEXEC; + pud = pud_offset(p4d, addr); + for (; addr < end; addr = next, pud++) { + next = pud_addr_end(addr, end); + if (!add) { + if (pud_none(*pud)) + continue; + if (pud_large(*pud)) { + if (IS_ALIGNED(addr, PUD_SIZE) && + IS_ALIGNED(next, PUD_SIZE)) { + pud_clear(pud); + pages++; + } + continue; + } + } else if (pud_none(*pud)) { + if (IS_ALIGNED(addr, PUD_SIZE) && + IS_ALIGNED(next, PUD_SIZE) && + MACHINE_HAS_EDAT2 && addr && direct && + !debug_pagealloc_enabled()) { + pud_val(*pud) = addr | prot; + pages++; continue; } - pt_dir = vmem_pte_alloc(); - if (!pt_dir) + pmd = vmem_crst_alloc(_SEGMENT_ENTRY_EMPTY); + if (!pmd) goto out; - pmd_populate(&init_mm, pm_dir, pt_dir); - } else if (pmd_large(*pm_dir)) { - address = (address + PMD_SIZE) & PMD_MASK; + pud_populate(&init_mm, pud, pmd); + } else if (pud_large(*pud)) { continue; } + ret = modify_pmd_table(pud, addr, next, add, direct); + if (ret) + goto out; + if (!add) + try_free_pmd_table(pud, addr & PUD_MASK); + } + ret = 0; +out: + if (direct) + update_page_count(PG_DIRECT_MAP_2G, add ? pages : -pages); + return ret; +} - pt_dir = pte_offset_kernel(pm_dir, address); - if (pte_none(*pt_dir)) { - void *new_page; +static void try_free_pud_table(p4d_t *p4d, unsigned long start) +{ + const unsigned long end = start + P4D_SIZE; + pud_t *pud; + int i; + + /* Don't mess with any tables not fully in 1:1 mapping & vmemmap area */ + if (end > VMALLOC_START) + return; +#ifdef CONFIG_KASAN + if (start < KASAN_SHADOW_END && KASAN_SHADOW_START > end) + return; +#endif + + pud = pud_offset(p4d, start); + for (i = 0; i < PTRS_PER_PUD; i++, pud++) { + if (!pud_none(*pud)) + return; + } + vmem_free_pages(p4d_deref(*p4d), CRST_ALLOC_ORDER); + p4d_clear(p4d); +} - new_page = vmemmap_alloc_block(PAGE_SIZE, node); - if (!new_page) +static int modify_p4d_table(pgd_t *pgd, unsigned long addr, unsigned long end, + bool add, bool direct) +{ + unsigned long next; + int ret = -ENOMEM; + p4d_t *p4d; + pud_t *pud; + + p4d = p4d_offset(pgd, addr); + for (; addr < end; addr = next, p4d++) { + next = p4d_addr_end(addr, end); + if (!add) { + if (p4d_none(*p4d)) + continue; + } else if (p4d_none(*p4d)) { + pud = vmem_crst_alloc(_REGION3_ENTRY_EMPTY); + if (!pud) goto out; - pte_val(*pt_dir) = __pa(new_page) | pgt_prot; + p4d_populate(&init_mm, p4d, pud); } - address += PAGE_SIZE; + ret = modify_pud_table(p4d, addr, next, add, direct); + if (ret) + goto out; + if (!add) + try_free_pud_table(p4d, addr & P4D_MASK); } ret = 0; out: return ret; } -void vmemmap_free(unsigned long start, unsigned long end, - struct vmem_altmap *altmap) +static void try_free_p4d_table(pgd_t *pgd, unsigned long start) { + const unsigned long end = start + PGDIR_SIZE; + p4d_t *p4d; + int i; + + /* Don't mess with any tables not fully in 1:1 mapping & vmemmap area */ + if (end > VMALLOC_START) + return; +#ifdef CONFIG_KASAN + if (start < KASAN_SHADOW_END && KASAN_SHADOW_START > end) + return; +#endif + + p4d = p4d_offset(pgd, start); + for (i = 0; i < PTRS_PER_P4D; i++, p4d++) { + if (!p4d_none(*p4d)) + return; + } + vmem_free_pages(pgd_deref(*pgd), CRST_ALLOC_ORDER); + pgd_clear(pgd); } -/* - * Add memory segment to the segment list if it doesn't overlap with - * an already present segment. - */ -static int insert_memory_segment(struct memory_segment *seg) +static int modify_pagetable(unsigned long start, unsigned long end, bool add, + bool direct) { - struct memory_segment *tmp; + unsigned long addr, next; + int ret = -ENOMEM; + pgd_t *pgd; + p4d_t *p4d; - if (seg->start + seg->size > VMEM_MAX_PHYS || - seg->start + seg->size < seg->start) - return -ERANGE; + if (WARN_ON_ONCE(!PAGE_ALIGNED(start | end))) + return -EINVAL; + for (addr = start; addr < end; addr = next) { + next = pgd_addr_end(addr, end); + pgd = pgd_offset_k(addr); - list_for_each_entry(tmp, &mem_segs, list) { - if (seg->start >= tmp->start + tmp->size) - continue; - if (seg->start + seg->size <= tmp->start) - continue; - return -ENOSPC; + if (!add) { + if (pgd_none(*pgd)) + continue; + } else if (pgd_none(*pgd)) { + p4d = vmem_crst_alloc(_REGION2_ENTRY_EMPTY); + if (!p4d) + goto out; + pgd_populate(&init_mm, pgd, p4d); + } + ret = modify_p4d_table(pgd, addr, next, add, direct); + if (ret) + goto out; + if (!add) + try_free_p4d_table(pgd, addr & PGDIR_MASK); } - list_add(&seg->list, &mem_segs); - return 0; + ret = 0; +out: + if (!add) + flush_tlb_kernel_range(start, end); + return ret; +} + +static int add_pagetable(unsigned long start, unsigned long end, bool direct) +{ + return modify_pagetable(start, end, true, direct); +} + +static int remove_pagetable(unsigned long start, unsigned long end, bool direct) +{ + return modify_pagetable(start, end, false, direct); } /* - * Remove memory segment from the segment list. + * Add a physical memory range to the 1:1 mapping. */ -static void remove_memory_segment(struct memory_segment *seg) +static int vmem_add_range(unsigned long start, unsigned long size) { - list_del(&seg->list); + return add_pagetable(start, start + size, true); } -static void __remove_shared_memory(struct memory_segment *seg) +/* + * Remove a physical memory range from the 1:1 mapping. + */ +static void vmem_remove_range(unsigned long start, unsigned long size) { - remove_memory_segment(seg); - vmem_remove_range(seg->start, seg->size); + remove_pagetable(start, start + size, true); } -int vmem_remove_mapping(unsigned long start, unsigned long size) +/* + * Add a backed mem_map array to the virtual mem_map array. + */ +int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node, + struct vmem_altmap *altmap) { - struct memory_segment *seg; int ret; mutex_lock(&vmem_mutex); + /* We don't care about the node, just use NUMA_NO_NODE on allocations */ + ret = add_pagetable(start, end, false); + if (ret) + remove_pagetable(start, end, false); + mutex_unlock(&vmem_mutex); + return ret; +} - ret = -ENOENT; - list_for_each_entry(seg, &mem_segs, list) { - if (seg->start == start && seg->size == size) - break; - } - - if (seg->start != start || seg->size != size) - goto out; +void vmemmap_free(unsigned long start, unsigned long end, + struct vmem_altmap *altmap) +{ + mutex_lock(&vmem_mutex); + remove_pagetable(start, end, false); + mutex_unlock(&vmem_mutex); +} - ret = 0; - __remove_shared_memory(seg); - kfree(seg); -out: +void vmem_remove_mapping(unsigned long start, unsigned long size) +{ + mutex_lock(&vmem_mutex); + vmem_remove_range(start, size); mutex_unlock(&vmem_mutex); - return ret; } int vmem_add_mapping(unsigned long start, unsigned long size) { - struct memory_segment *seg; int ret; - mutex_lock(&vmem_mutex); - ret = -ENOMEM; - seg = kzalloc(sizeof(*seg), GFP_KERNEL); - if (!seg) - goto out; - seg->start = start; - seg->size = size; - - ret = insert_memory_segment(seg); - if (ret) - goto out_free; + if (start + size > VMEM_MAX_PHYS || + start + size < start) + return -ERANGE; - ret = vmem_add_mem(start, size); + mutex_lock(&vmem_mutex); + ret = vmem_add_range(start, size); if (ret) - goto out_remove; - goto out; - -out_remove: - __remove_shared_memory(seg); -out_free: - kfree(seg); -out: + vmem_remove_range(start, size); mutex_unlock(&vmem_mutex); return ret; } @@ -403,7 +558,7 @@ void __init vmem_map_init(void) struct memblock_region *reg; for_each_memblock(memory, reg) - vmem_add_mem(reg->base, reg->size); + vmem_add_range(reg->base, reg->size); __set_memory((unsigned long)_stext, (unsigned long)(_etext - _stext) >> PAGE_SHIFT, SET_MEMORY_RO | SET_MEMORY_X); @@ -422,27 +577,3 @@ void __init vmem_map_init(void) pr_info("Write protected kernel read-only data: %luk\n", (unsigned long)(__end_rodata - _stext) >> 10); } - -/* - * Convert memblock.memory to a memory segment list so there is a single - * list that contains all memory segments. - */ -static int __init vmem_convert_memory_chunk(void) -{ - struct memblock_region *reg; - struct memory_segment *seg; - - mutex_lock(&vmem_mutex); - for_each_memblock(memory, reg) { - seg = kzalloc(sizeof(*seg), GFP_KERNEL); - if (!seg) - panic("Out of memory...\n"); - seg->start = reg->base; - seg->size = reg->size; - insert_memory_segment(seg); - } - mutex_unlock(&vmem_mutex); - return 0; -} - -core_initcall(vmem_convert_memory_chunk); diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c index 8d2134136290..be4b8532dd3c 100644 --- a/arch/s390/net/bpf_jit_comp.c +++ b/arch/s390/net/bpf_jit_comp.c @@ -49,6 +49,7 @@ struct bpf_jit { int r1_thunk_ip; /* Address of expoline thunk for 'br %r1' */ int r14_thunk_ip; /* Address of expoline thunk for 'br %r14' */ int tail_call_start; /* Tail call start offset */ + int excnt; /* Number of exception table entries */ int labels[1]; /* Labels for local jumps */ }; @@ -489,6 +490,24 @@ static void save_restore_regs(struct bpf_jit *jit, int op, u32 stack_depth) } while (re <= last); } +static void bpf_skip(struct bpf_jit *jit, int size) +{ + if (size >= 6 && !is_valid_rel(size)) { + /* brcl 0xf,size */ + EMIT6_PCREL_RIL(0xc0f4000000, size); + size -= 6; + } else if (size >= 4 && is_valid_rel(size)) { + /* brc 0xf,size */ + EMIT4_PCREL(0xa7f40000, size); + size -= 4; + } + while (size >= 2) { + /* bcr 0,%0 */ + _EMIT2(0x0700); + size -= 2; + } +} + /* * Emit function prologue * @@ -501,9 +520,11 @@ static void bpf_jit_prologue(struct bpf_jit *jit, u32 stack_depth) /* xc STK_OFF_TCCNT(4,%r15),STK_OFF_TCCNT(%r15) */ _EMIT6(0xd703f000 | STK_OFF_TCCNT, 0xf000 | STK_OFF_TCCNT); } else { - /* j tail_call_start: NOP if no tail calls are used */ - EMIT4_PCREL(0xa7f40000, 6); - _EMIT2(0); + /* + * There are no tail calls. Insert nops in order to have + * tail_call_start at a predictable offset. + */ + bpf_skip(jit, 6); } /* Tail calls have to skip above initialization */ jit->tail_call_start = jit->prg; @@ -587,6 +608,84 @@ static void bpf_jit_epilogue(struct bpf_jit *jit, u32 stack_depth) } } +static int get_probe_mem_regno(const u8 *insn) +{ + /* + * insn must point to llgc, llgh, llgf or lg, which have destination + * register at the same position. + */ + if (insn[0] != 0xe3) /* common llgc, llgh, llgf and lg prefix */ + return -1; + if (insn[5] != 0x90 && /* llgc */ + insn[5] != 0x91 && /* llgh */ + insn[5] != 0x16 && /* llgf */ + insn[5] != 0x04) /* lg */ + return -1; + return insn[1] >> 4; +} + +static bool ex_handler_bpf(const struct exception_table_entry *x, + struct pt_regs *regs) +{ + int regno; + u8 *insn; + + regs->psw.addr = extable_fixup(x); + insn = (u8 *)__rewind_psw(regs->psw, regs->int_code >> 16); + regno = get_probe_mem_regno(insn); + if (WARN_ON_ONCE(regno < 0)) + /* JIT bug - unexpected instruction. */ + return false; + regs->gprs[regno] = 0; + return true; +} + +static int bpf_jit_probe_mem(struct bpf_jit *jit, struct bpf_prog *fp, + int probe_prg, int nop_prg) +{ + struct exception_table_entry *ex; + s64 delta; + u8 *insn; + int prg; + int i; + + if (!fp->aux->extable) + /* Do nothing during early JIT passes. */ + return 0; + insn = jit->prg_buf + probe_prg; + if (WARN_ON_ONCE(get_probe_mem_regno(insn) < 0)) + /* JIT bug - unexpected probe instruction. */ + return -1; + if (WARN_ON_ONCE(probe_prg + insn_length(*insn) != nop_prg)) + /* JIT bug - gap between probe and nop instructions. */ + return -1; + for (i = 0; i < 2; i++) { + if (WARN_ON_ONCE(jit->excnt >= fp->aux->num_exentries)) + /* Verifier bug - not enough entries. */ + return -1; + ex = &fp->aux->extable[jit->excnt]; + /* Add extable entries for probe and nop instructions. */ + prg = i == 0 ? probe_prg : nop_prg; + delta = jit->prg_buf + prg - (u8 *)&ex->insn; + if (WARN_ON_ONCE(delta < INT_MIN || delta > INT_MAX)) + /* JIT bug - code and extable must be close. */ + return -1; + ex->insn = delta; + /* + * Always land on the nop. Note that extable infrastructure + * ignores fixup field, it is handled by ex_handler_bpf(). + */ + delta = jit->prg_buf + nop_prg - (u8 *)&ex->fixup; + if (WARN_ON_ONCE(delta < INT_MIN || delta > INT_MAX)) + /* JIT bug - landing pad and extable must be close. */ + return -1; + ex->fixup = delta; + ex->handler = (u8 *)ex_handler_bpf - (u8 *)&ex->handler; + jit->excnt++; + } + return 0; +} + /* * Compile one eBPF instruction into s390x code * @@ -594,7 +693,7 @@ static void bpf_jit_epilogue(struct bpf_jit *jit, u32 stack_depth) * stack space for the large switch statement. */ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, - int i, bool extra_pass) + int i, bool extra_pass, u32 stack_depth) { struct bpf_insn *insn = &fp->insnsi[i]; u32 dst_reg = insn->dst_reg; @@ -603,7 +702,14 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, u32 *addrs = jit->addrs; s32 imm = insn->imm; s16 off = insn->off; + int probe_prg = -1; unsigned int mask; + int nop_prg; + int err; + + if (BPF_CLASS(insn->code) == BPF_LDX && + BPF_MODE(insn->code) == BPF_PROBE_MEM) + probe_prg = jit->prg; switch (insn->code) { /* @@ -1118,6 +1224,7 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, * BPF_LDX */ case BPF_LDX | BPF_MEM | BPF_B: /* dst = *(u8 *)(ul) (src + off) */ + case BPF_LDX | BPF_PROBE_MEM | BPF_B: /* llgc %dst,0(off,%src) */ EMIT6_DISP_LH(0xe3000000, 0x0090, dst_reg, src_reg, REG_0, off); jit->seen |= SEEN_MEM; @@ -1125,6 +1232,7 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, insn_count = 2; break; case BPF_LDX | BPF_MEM | BPF_H: /* dst = *(u16 *)(ul) (src + off) */ + case BPF_LDX | BPF_PROBE_MEM | BPF_H: /* llgh %dst,0(off,%src) */ EMIT6_DISP_LH(0xe3000000, 0x0091, dst_reg, src_reg, REG_0, off); jit->seen |= SEEN_MEM; @@ -1132,6 +1240,7 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, insn_count = 2; break; case BPF_LDX | BPF_MEM | BPF_W: /* dst = *(u32 *)(ul) (src + off) */ + case BPF_LDX | BPF_PROBE_MEM | BPF_W: /* llgf %dst,off(%src) */ jit->seen |= SEEN_MEM; EMIT6_DISP_LH(0xe3000000, 0x0016, dst_reg, src_reg, REG_0, off); @@ -1139,6 +1248,7 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, insn_count = 2; break; case BPF_LDX | BPF_MEM | BPF_DW: /* dst = *(u64 *)(ul) (src + off) */ + case BPF_LDX | BPF_PROBE_MEM | BPF_DW: /* lg %dst,0(off,%src) */ jit->seen |= SEEN_MEM; EMIT6_DISP_LH(0xe3000000, 0x0004, dst_reg, src_reg, REG_0, off); @@ -1207,7 +1317,7 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, */ if (jit->seen & SEEN_STACK) - off = STK_OFF_TCCNT + STK_OFF + fp->aux->stack_depth; + off = STK_OFF_TCCNT + STK_OFF + stack_depth; else off = STK_OFF_TCCNT; /* lhi %w0,1 */ @@ -1249,7 +1359,7 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, /* * Restore registers before calling function */ - save_restore_regs(jit, REGS_RESTORE, fp->aux->stack_depth); + save_restore_regs(jit, REGS_RESTORE, stack_depth); /* * goto *(prog->bpf_func + tail_call_start); @@ -1267,8 +1377,12 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, last = (i == fp->len - 1) ? 1 : 0; if (last) break; - /* j <exit> */ - EMIT4_PCREL(0xa7f40000, jit->exit_ip - jit->prg); + if (!is_first_pass(jit) && can_use_rel(jit, jit->exit_ip)) + /* brc 0xf, <exit> */ + EMIT4_PCREL_RIC(0xa7040000, 0xf, jit->exit_ip); + else + /* brcl 0xf, <exit> */ + EMIT6_PCREL_RILC(0xc0040000, 0xf, jit->exit_ip); break; /* * Branch relative (number of skipped instructions) to offset on @@ -1416,21 +1530,10 @@ branch_ks: } break; branch_ku: - is_jmp32 = BPF_CLASS(insn->code) == BPF_JMP32; - /* clfi or clgfi %dst,imm */ - EMIT6_IMM(is_jmp32 ? 0xc20f0000 : 0xc20e0000, - dst_reg, imm); - if (!is_first_pass(jit) && - can_use_rel(jit, addrs[i + off + 1])) { - /* brc mask,off */ - EMIT4_PCREL_RIC(0xa7040000, - mask >> 12, addrs[i + off + 1]); - } else { - /* brcl mask,off */ - EMIT6_PCREL_RILC(0xc0040000, - mask >> 12, addrs[i + off + 1]); - } - break; + /* lgfi %w1,imm (load sign extend imm) */ + src_reg = REG_1; + EMIT6_IMM(0xc0010000, src_reg, imm); + goto branch_xu; branch_xs: is_jmp32 = BPF_CLASS(insn->code) == BPF_JMP32; if (!is_first_pass(jit) && @@ -1484,6 +1587,23 @@ branch_oc: pr_err("Unknown opcode %02x\n", insn->code); return -1; } + + if (probe_prg != -1) { + /* + * Handlers of certain exceptions leave psw.addr pointing to + * the instruction directly after the failing one. Therefore, + * create two exception table entries and also add a nop in + * case two probing instructions come directly after each + * other. + */ + nop_prg = jit->prg; + /* bcr 0,%0 */ + _EMIT2(0x0700); + err = bpf_jit_probe_mem(jit, fp, probe_prg, nop_prg); + if (err < 0) + return err; + } + return insn_count; } @@ -1509,7 +1629,14 @@ static bool bpf_is_new_addr_sane(struct bpf_jit *jit, int i) */ static int bpf_set_addr(struct bpf_jit *jit, int i) { - if (!bpf_is_new_addr_sane(jit, i)) + int delta; + + if (is_codegen_pass(jit)) { + delta = jit->prg - jit->addrs[i]; + if (delta < 0) + bpf_skip(jit, -delta); + } + if (WARN_ON_ONCE(!bpf_is_new_addr_sane(jit, i))) return -1; jit->addrs[i] = jit->prg; return 0; @@ -1519,26 +1646,27 @@ static int bpf_set_addr(struct bpf_jit *jit, int i) * Compile eBPF program into s390x code */ static int bpf_jit_prog(struct bpf_jit *jit, struct bpf_prog *fp, - bool extra_pass) + bool extra_pass, u32 stack_depth) { int i, insn_count, lit32_size, lit64_size; jit->lit32 = jit->lit32_start; jit->lit64 = jit->lit64_start; jit->prg = 0; + jit->excnt = 0; - bpf_jit_prologue(jit, fp->aux->stack_depth); + bpf_jit_prologue(jit, stack_depth); if (bpf_set_addr(jit, 0) < 0) return -1; for (i = 0; i < fp->len; i += insn_count) { - insn_count = bpf_jit_insn(jit, fp, i, extra_pass); + insn_count = bpf_jit_insn(jit, fp, i, extra_pass, stack_depth); if (insn_count < 0) return -1; /* Next instruction address */ if (bpf_set_addr(jit, i + insn_count) < 0) return -1; } - bpf_jit_epilogue(jit, fp->aux->stack_depth); + bpf_jit_epilogue(jit, stack_depth); lit32_size = jit->lit32 - jit->lit32_start; lit64_size = jit->lit64 - jit->lit64_start; @@ -1550,6 +1678,12 @@ static int bpf_jit_prog(struct bpf_jit *jit, struct bpf_prog *fp, jit->lit64_start = ALIGN(jit->lit64_start, 8); jit->size = jit->lit64_start + lit64_size; jit->size_prg = jit->prg; + + if (WARN_ON_ONCE(fp->aux->extable && + jit->excnt != fp->aux->num_exentries)) + /* Verifier bug - too many entries. */ + return -1; + return 0; } @@ -1564,11 +1698,35 @@ struct s390_jit_data { int pass; }; +static struct bpf_binary_header *bpf_jit_alloc(struct bpf_jit *jit, + struct bpf_prog *fp) +{ + struct bpf_binary_header *header; + u32 extable_size; + u32 code_size; + + /* We need two entries per insn. */ + fp->aux->num_exentries *= 2; + + code_size = roundup(jit->size, + __alignof__(struct exception_table_entry)); + extable_size = fp->aux->num_exentries * + sizeof(struct exception_table_entry); + header = bpf_jit_binary_alloc(code_size + extable_size, &jit->prg_buf, + 8, jit_fill_hole); + if (!header) + return NULL; + fp->aux->extable = (struct exception_table_entry *) + (jit->prg_buf + code_size); + return header; +} + /* * Compile eBPF program "fp" */ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp) { + u32 stack_depth = round_up(fp->aux->stack_depth, 8); struct bpf_prog *tmp, *orig_fp = fp; struct bpf_binary_header *header; struct s390_jit_data *jit_data; @@ -1621,7 +1779,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp) * - 3: Calculate program size and addrs arrray */ for (pass = 1; pass <= 3; pass++) { - if (bpf_jit_prog(&jit, fp, extra_pass)) { + if (bpf_jit_prog(&jit, fp, extra_pass, stack_depth)) { fp = orig_fp; goto free_addrs; } @@ -1629,13 +1787,13 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp) /* * Final pass: Allocate and generate program */ - header = bpf_jit_binary_alloc(jit.size, &jit.prg_buf, 8, jit_fill_hole); + header = bpf_jit_alloc(&jit, fp); if (!header) { fp = orig_fp; goto free_addrs; } skip_init_ctx: - if (bpf_jit_prog(&jit, fp, extra_pass)) { + if (bpf_jit_prog(&jit, fp, extra_pass, stack_depth)) { bpf_jit_binary_free(header); fp = orig_fp; goto free_addrs; diff --git a/arch/s390/numa/Makefile b/arch/s390/numa/Makefile deleted file mode 100644 index c89d26f4f77d..000000000000 --- a/arch/s390/numa/Makefile +++ /dev/null @@ -1,2 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0 -obj-y += numa.o diff --git a/arch/s390/pci/Makefile b/arch/s390/pci/Makefile index 748626a33028..b4e3c84772a1 100644 --- a/arch/s390/pci/Makefile +++ b/arch/s390/pci/Makefile @@ -4,4 +4,5 @@ # obj-$(CONFIG_PCI) += pci.o pci_irq.o pci_dma.o pci_clp.o pci_sysfs.o \ - pci_event.o pci_debug.o pci_insn.o pci_mmio.o + pci_event.o pci_debug.o pci_insn.o pci_mmio.o \ + pci_bus.o diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c index 94ca121933de..1804230dd8d8 100644 --- a/arch/s390/pci/pci.c +++ b/arch/s390/pci/pci.c @@ -36,18 +36,21 @@ #include <asm/pci_clp.h> #include <asm/pci_dma.h> +#include "pci_bus.h" + /* list of all detected zpci devices */ static LIST_HEAD(zpci_list); static DEFINE_SPINLOCK(zpci_list_lock); static DECLARE_BITMAP(zpci_domain, ZPCI_DOMAIN_BITMAP_SIZE); static DEFINE_SPINLOCK(zpci_domain_lock); -static unsigned int zpci_num_domains_allocated; #define ZPCI_IOMAP_ENTRIES \ min(((unsigned long) ZPCI_NR_DEVICES * PCI_STD_NUM_BARS / 2), \ ZPCI_IOMAP_MAX_ENTRIES) +unsigned int s390_pci_no_rid; + static DEFINE_SPINLOCK(zpci_iomap_lock); static unsigned long *zpci_iomap_bitmap; struct zpci_iomap_entry *zpci_iomap_start; @@ -88,17 +91,12 @@ void zpci_remove_reserved_devices(void) spin_unlock(&zpci_list_lock); list_for_each_entry_safe(zdev, tmp, &remove, entry) - zpci_remove_device(zdev); -} - -static struct zpci_dev *get_zdev_by_bus(struct pci_bus *bus) -{ - return (bus && bus->sysdata) ? (struct zpci_dev *) bus->sysdata : NULL; + zpci_zdev_put(zdev); } int pci_domain_nr(struct pci_bus *bus) { - return ((struct zpci_dev *) bus->sysdata)->domain; + return ((struct zpci_bus *) bus->sysdata)->domain_nr; } EXPORT_SYMBOL_GPL(pci_domain_nr); @@ -228,28 +226,29 @@ void __iowrite64_copy(void __iomem *to, const void *from, size_t count) zpci_memcpy_toio(to, from, count); } -void __iomem *ioremap(unsigned long ioaddr, unsigned long size) +void __iomem *ioremap(phys_addr_t addr, size_t size) { + unsigned long offset, vaddr; struct vm_struct *area; - unsigned long offset; + phys_addr_t last_addr; - if (!size) + last_addr = addr + size - 1; + if (!size || last_addr < addr) return NULL; if (!static_branch_unlikely(&have_mio)) - return (void __iomem *) ioaddr; + return (void __iomem *) addr; - offset = ioaddr & ~PAGE_MASK; - ioaddr &= PAGE_MASK; + offset = addr & ~PAGE_MASK; + addr &= PAGE_MASK; size = PAGE_ALIGN(size + offset); area = get_vm_area(size, VM_IOREMAP); if (!area) return NULL; - if (ioremap_page_range((unsigned long) area->addr, - (unsigned long) area->addr + size, - ioaddr, PAGE_KERNEL)) { - vunmap(area->addr); + vaddr = (unsigned long) area->addr; + if (ioremap_page_range(vaddr, vaddr + size, addr, PAGE_KERNEL)) { + free_vm_area(area); return NULL; } return (void __iomem *) ((unsigned long) area->addr + offset); @@ -373,29 +372,17 @@ EXPORT_SYMBOL(pci_iounmap); static int pci_read(struct pci_bus *bus, unsigned int devfn, int where, int size, u32 *val) { - struct zpci_dev *zdev = get_zdev_by_bus(bus); - int ret; + struct zpci_dev *zdev = get_zdev_by_bus(bus, devfn); - if (!zdev || devfn != ZPCI_DEVFN) - ret = -ENODEV; - else - ret = zpci_cfg_load(zdev, where, val, size); - - return ret; + return (zdev) ? zpci_cfg_load(zdev, where, val, size) : -ENODEV; } static int pci_write(struct pci_bus *bus, unsigned int devfn, int where, int size, u32 val) { - struct zpci_dev *zdev = get_zdev_by_bus(bus); - int ret; + struct zpci_dev *zdev = get_zdev_by_bus(bus, devfn); - if (!zdev || devfn != ZPCI_DEVFN) - ret = -ENODEV; - else - ret = zpci_cfg_store(zdev, where, val, size); - - return ret; + return (zdev) ? zpci_cfg_store(zdev, where, val, size) : -ENODEV; } static struct pci_ops pci_root_ops = { @@ -506,15 +493,15 @@ static struct resource *__alloc_res(struct zpci_dev *zdev, unsigned long start, return r; } -static int zpci_setup_bus_resources(struct zpci_dev *zdev, - struct list_head *resources) +int zpci_setup_bus_resources(struct zpci_dev *zdev, + struct list_head *resources) { unsigned long addr, size, flags; struct resource *res; int i, entry; snprintf(zdev->res_name, sizeof(zdev->res_name), - "PCI Bus %04x:%02x", zdev->domain, ZPCI_BUS_NR); + "PCI Bus %04x:%02x", zdev->uid, ZPCI_BUS_NR); for (i = 0; i < PCI_STD_NUM_BARS; i++) { if (!zdev->bars[i].size) @@ -608,98 +595,53 @@ void pcibios_disable_device(struct pci_dev *pdev) zpci_debug_exit_device(zdev); } -static int zpci_alloc_domain(struct zpci_dev *zdev) +static int __zpci_register_domain(int domain) { spin_lock(&zpci_domain_lock); - if (zpci_num_domains_allocated > (ZPCI_NR_DEVICES - 1)) { + if (test_bit(domain, zpci_domain)) { spin_unlock(&zpci_domain_lock); - pr_err("Adding PCI function %08x failed because the configured limit of %d is reached\n", - zdev->fid, ZPCI_NR_DEVICES); - return -ENOSPC; + pr_err("Domain %04x is already assigned\n", domain); + return -EEXIST; } + set_bit(domain, zpci_domain); + spin_unlock(&zpci_domain_lock); + return domain; +} - if (zpci_unique_uid) { - zdev->domain = (u16) zdev->uid; - if (zdev->domain == 0) { - pr_warn("UID checking is active but no UID is set for PCI function %08x, so automatic domain allocation is used instead\n", - zdev->fid); - update_uid_checking(false); - goto auto_allocate; - } +static int __zpci_alloc_domain(void) +{ + int domain; - if (test_bit(zdev->domain, zpci_domain)) { - spin_unlock(&zpci_domain_lock); - pr_err("Adding PCI function %08x failed because domain %04x is already assigned\n", - zdev->fid, zdev->domain); - return -EEXIST; - } - set_bit(zdev->domain, zpci_domain); - zpci_num_domains_allocated++; - spin_unlock(&zpci_domain_lock); - return 0; - } -auto_allocate: + spin_lock(&zpci_domain_lock); /* * We can always auto allocate domains below ZPCI_NR_DEVICES. * There is either a free domain or we have reached the maximum in * which case we would have bailed earlier. */ - zdev->domain = find_first_zero_bit(zpci_domain, ZPCI_NR_DEVICES); - set_bit(zdev->domain, zpci_domain); - zpci_num_domains_allocated++; + domain = find_first_zero_bit(zpci_domain, ZPCI_NR_DEVICES); + set_bit(domain, zpci_domain); spin_unlock(&zpci_domain_lock); - return 0; + return domain; } -static void zpci_free_domain(struct zpci_dev *zdev) +int zpci_alloc_domain(int domain) { - spin_lock(&zpci_domain_lock); - clear_bit(zdev->domain, zpci_domain); - zpci_num_domains_allocated--; - spin_unlock(&zpci_domain_lock); + if (zpci_unique_uid) { + if (domain) + return __zpci_register_domain(domain); + pr_warn("UID checking was active but no UID is provided: switching to automatic domain allocation\n"); + update_uid_checking(false); + } + return __zpci_alloc_domain(); } -void pcibios_remove_bus(struct pci_bus *bus) +void zpci_free_domain(int domain) { - struct zpci_dev *zdev = get_zdev_by_bus(bus); - - zpci_exit_slot(zdev); - zpci_cleanup_bus_resources(zdev); - zpci_destroy_iommu(zdev); - zpci_free_domain(zdev); - - spin_lock(&zpci_list_lock); - list_del(&zdev->entry); - spin_unlock(&zpci_list_lock); - - zpci_dbg(3, "rem fid:%x\n", zdev->fid); - kfree(zdev); + spin_lock(&zpci_domain_lock); + clear_bit(domain, zpci_domain); + spin_unlock(&zpci_domain_lock); } -static int zpci_scan_bus(struct zpci_dev *zdev) -{ - LIST_HEAD(resources); - int ret; - - ret = zpci_setup_bus_resources(zdev, &resources); - if (ret) - goto error; - - zdev->bus = pci_scan_root_bus(NULL, ZPCI_BUS_NR, &pci_root_ops, - zdev, &resources); - if (!zdev->bus) { - ret = -EIO; - goto error; - } - zdev->bus->max_bus_speed = zdev->max_bus_speed; - pci_bus_add_devices(zdev->bus); - return 0; - -error: - zpci_cleanup_bus_resources(zdev); - pci_free_resource_list(&resources); - return ret; -} int zpci_enable_device(struct zpci_dev *zdev) { @@ -726,21 +668,40 @@ EXPORT_SYMBOL_GPL(zpci_enable_device); int zpci_disable_device(struct zpci_dev *zdev) { zpci_dma_exit_device(zdev); + /* + * The zPCI function may already be disabled by the platform, this is + * detected in clp_disable_fh() which becomes a no-op. + */ return clp_disable_fh(zdev); } EXPORT_SYMBOL_GPL(zpci_disable_device); +void zpci_remove_device(struct zpci_dev *zdev) +{ + struct zpci_bus *zbus = zdev->zbus; + struct pci_dev *pdev; + + pdev = pci_get_slot(zbus->bus, zdev->devfn); + if (pdev) { + if (pdev->is_virtfn) + return zpci_remove_virtfn(pdev, zdev->vfn); + pci_stop_and_remove_bus_device_locked(pdev); + } +} + int zpci_create_device(struct zpci_dev *zdev) { int rc; - rc = zpci_alloc_domain(zdev); - if (rc) - goto out; + kref_init(&zdev->kref); + + spin_lock(&zpci_list_lock); + list_add_tail(&zdev->entry, &zpci_list); + spin_unlock(&zpci_list_lock); rc = zpci_init_iommu(zdev); if (rc) - goto out_free; + goto out; mutex_init(&zdev->lock); if (zdev->state == ZPCI_FN_STATE_CONFIGURED) { @@ -748,36 +709,54 @@ int zpci_create_device(struct zpci_dev *zdev) if (rc) goto out_destroy_iommu; } - rc = zpci_scan_bus(zdev); + + rc = zpci_bus_device_register(zdev, &pci_root_ops); if (rc) goto out_disable; - spin_lock(&zpci_list_lock); - list_add_tail(&zdev->entry, &zpci_list); - spin_unlock(&zpci_list_lock); - - zpci_init_slot(zdev); - return 0; out_disable: if (zdev->state == ZPCI_FN_STATE_ONLINE) zpci_disable_device(zdev); + out_destroy_iommu: zpci_destroy_iommu(zdev); -out_free: - zpci_free_domain(zdev); out: + spin_lock(&zpci_list_lock); + list_del(&zdev->entry); + spin_unlock(&zpci_list_lock); return rc; } -void zpci_remove_device(struct zpci_dev *zdev) +void zpci_release_device(struct kref *kref) { - if (!zdev->bus) - return; + struct zpci_dev *zdev = container_of(kref, struct zpci_dev, kref); + + if (zdev->zbus->bus) + zpci_remove_device(zdev); - pci_stop_root_bus(zdev->bus); - pci_remove_root_bus(zdev->bus); + switch (zdev->state) { + case ZPCI_FN_STATE_ONLINE: + case ZPCI_FN_STATE_CONFIGURED: + zpci_disable_device(zdev); + fallthrough; + case ZPCI_FN_STATE_STANDBY: + if (zdev->has_hp_slot) + zpci_exit_slot(zdev); + zpci_cleanup_bus_resources(zdev); + zpci_bus_device_unregister(zdev); + zpci_destroy_iommu(zdev); + fallthrough; + default: + break; + } + + spin_lock(&zpci_list_lock); + list_del(&zdev->entry); + spin_unlock(&zpci_list_lock); + zpci_dbg(3, "rem fid:%x\n", zdev->fid); + kfree(zdev); } int zpci_report_error(struct pci_dev *pdev, @@ -844,6 +823,10 @@ char * __init pcibios_setup(char *str) s390_pci_force_floating = 1; return NULL; } + if (!strcmp(str, "norid")) { + s390_pci_no_rid = 1; + return NULL; + } return str; } diff --git a/arch/s390/pci/pci_bus.c b/arch/s390/pci/pci_bus.c new file mode 100644 index 000000000000..5967f3014156 --- /dev/null +++ b/arch/s390/pci/pci_bus.c @@ -0,0 +1,336 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright IBM Corp. 2020 + * + * Author(s): + * Pierre Morel <pmorel@linux.ibm.com> + * + */ + +#define KMSG_COMPONENT "zpci" +#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt + +#include <linux/kernel.h> +#include <linux/slab.h> +#include <linux/err.h> +#include <linux/export.h> +#include <linux/delay.h> +#include <linux/seq_file.h> +#include <linux/jump_label.h> +#include <linux/pci.h> +#include <linux/printk.h> + +#include <asm/pci_clp.h> +#include <asm/pci_dma.h> + +#include "pci_bus.h" + +static LIST_HEAD(zbus_list); +static DEFINE_SPINLOCK(zbus_list_lock); +static int zpci_nb_devices; + +/* zpci_bus_scan + * @zbus: the zbus holding the zdevices + * @ops: the pci operations + * + * The domain number must be set before pci_scan_root_bus is called. + * This function can be called once the domain is known, hence + * when the function_0 is dicovered. + */ +static int zpci_bus_scan(struct zpci_bus *zbus, int domain, struct pci_ops *ops) +{ + struct pci_bus *bus; + int rc; + + rc = zpci_alloc_domain(domain); + if (rc < 0) + return rc; + zbus->domain_nr = rc; + + bus = pci_scan_root_bus(NULL, ZPCI_BUS_NR, ops, zbus, &zbus->resources); + if (!bus) { + zpci_free_domain(zbus->domain_nr); + return -EFAULT; + } + + zbus->bus = bus; + pci_bus_add_devices(bus); + return 0; +} + +static void zpci_bus_release(struct kref *kref) +{ + struct zpci_bus *zbus = container_of(kref, struct zpci_bus, kref); + + if (zbus->bus) { + pci_lock_rescan_remove(); + pci_stop_root_bus(zbus->bus); + + zpci_free_domain(zbus->domain_nr); + pci_free_resource_list(&zbus->resources); + + pci_remove_root_bus(zbus->bus); + pci_unlock_rescan_remove(); + } + + spin_lock(&zbus_list_lock); + list_del(&zbus->bus_next); + spin_unlock(&zbus_list_lock); + kfree(zbus); +} + +static void zpci_bus_put(struct zpci_bus *zbus) +{ + kref_put(&zbus->kref, zpci_bus_release); +} + +static struct zpci_bus *zpci_bus_get(int pchid) +{ + struct zpci_bus *zbus; + + spin_lock(&zbus_list_lock); + list_for_each_entry(zbus, &zbus_list, bus_next) { + if (pchid == zbus->pchid) { + kref_get(&zbus->kref); + goto out_unlock; + } + } + zbus = NULL; +out_unlock: + spin_unlock(&zbus_list_lock); + return zbus; +} + +static struct zpci_bus *zpci_bus_alloc(int pchid) +{ + struct zpci_bus *zbus; + + zbus = kzalloc(sizeof(*zbus), GFP_KERNEL); + if (!zbus) + return NULL; + + zbus->pchid = pchid; + INIT_LIST_HEAD(&zbus->bus_next); + spin_lock(&zbus_list_lock); + list_add_tail(&zbus->bus_next, &zbus_list); + spin_unlock(&zbus_list_lock); + + kref_init(&zbus->kref); + INIT_LIST_HEAD(&zbus->resources); + + zbus->bus_resource.start = 0; + zbus->bus_resource.end = ZPCI_BUS_NR; + zbus->bus_resource.flags = IORESOURCE_BUS; + pci_add_resource(&zbus->resources, &zbus->bus_resource); + + return zbus; +} + +#ifdef CONFIG_PCI_IOV +static int zpci_bus_link_virtfn(struct pci_dev *pdev, + struct pci_dev *virtfn, int vfid) +{ + int rc; + + rc = pci_iov_sysfs_link(pdev, virtfn, vfid); + if (rc) + return rc; + + virtfn->is_virtfn = 1; + virtfn->multifunction = 0; + virtfn->physfn = pci_dev_get(pdev); + + return 0; +} + +static int zpci_bus_setup_virtfn(struct zpci_bus *zbus, + struct pci_dev *virtfn, int vfn) +{ + int i, cand_devfn; + struct zpci_dev *zdev; + struct pci_dev *pdev; + int vfid = vfn - 1; /* Linux' vfid's start at 0 vfn at 1*/ + int rc = 0; + + if (!zbus->multifunction) + return 0; + + /* If the parent PF for the given VF is also configured in the + * instance, it must be on the same zbus. + * We can then identify the parent PF by checking what + * devfn the VF would have if it belonged to that PF using the PF's + * stride and offset. Only if this candidate devfn matches the + * actual devfn will we link both functions. + */ + for (i = 0; i < ZPCI_FUNCTIONS_PER_BUS; i++) { + zdev = zbus->function[i]; + if (zdev && zdev->is_physfn) { + pdev = pci_get_slot(zbus->bus, zdev->devfn); + if (!pdev) + continue; + cand_devfn = pci_iov_virtfn_devfn(pdev, vfid); + if (cand_devfn == virtfn->devfn) { + rc = zpci_bus_link_virtfn(pdev, virtfn, vfid); + /* balance pci_get_slot() */ + pci_dev_put(pdev); + break; + } + /* balance pci_get_slot() */ + pci_dev_put(pdev); + } + } + return rc; +} +#else +static inline int zpci_bus_setup_virtfn(struct zpci_bus *zbus, + struct pci_dev *virtfn, int vfn) +{ + return 0; +} +#endif + +void pcibios_bus_add_device(struct pci_dev *pdev) +{ + struct zpci_dev *zdev = to_zpci(pdev); + + /* + * With pdev->no_vf_scan the common PCI probing code does not + * perform PF/VF linking. + */ + if (zdev->vfn) + zpci_bus_setup_virtfn(zdev->zbus, pdev, zdev->vfn); + +} + +static int zpci_bus_add_device(struct zpci_bus *zbus, struct zpci_dev *zdev) +{ + struct pci_bus *bus; + struct resource_entry *window, *n; + struct resource *res; + struct pci_dev *pdev; + int rc; + + bus = zbus->bus; + if (!bus) + return -EINVAL; + + pdev = pci_get_slot(bus, zdev->devfn); + if (pdev) { + /* Device is already known. */ + pci_dev_put(pdev); + return 0; + } + + rc = zpci_init_slot(zdev); + if (rc) + return rc; + zdev->has_hp_slot = 1; + + resource_list_for_each_entry_safe(window, n, &zbus->resources) { + res = window->res; + pci_bus_add_resource(bus, res, 0); + } + + pdev = pci_scan_single_device(bus, zdev->devfn); + if (pdev) + pci_bus_add_device(pdev); + + return 0; +} + +static void zpci_bus_add_devices(struct zpci_bus *zbus) +{ + int i; + + for (i = 1; i < ZPCI_FUNCTIONS_PER_BUS; i++) + if (zbus->function[i]) + zpci_bus_add_device(zbus, zbus->function[i]); + + pci_lock_rescan_remove(); + pci_bus_add_devices(zbus->bus); + pci_unlock_rescan_remove(); +} + +int zpci_bus_device_register(struct zpci_dev *zdev, struct pci_ops *ops) +{ + struct zpci_bus *zbus = NULL; + int rc = -EBADF; + + if (zpci_nb_devices == ZPCI_NR_DEVICES) { + pr_warn("Adding PCI function %08x failed because the configured limit of %d is reached\n", + zdev->fid, ZPCI_NR_DEVICES); + return -ENOSPC; + } + zpci_nb_devices++; + + if (zdev->devfn >= ZPCI_FUNCTIONS_PER_BUS) + return -EINVAL; + + if (!s390_pci_no_rid && zdev->rid_available) + zbus = zpci_bus_get(zdev->pchid); + + if (!zbus) { + zbus = zpci_bus_alloc(zdev->pchid); + if (!zbus) + return -ENOMEM; + } + + zdev->zbus = zbus; + if (zbus->function[zdev->devfn]) { + pr_err("devfn %04x is already assigned\n", zdev->devfn); + goto error; /* rc already set */ + } + zbus->function[zdev->devfn] = zdev; + + zpci_setup_bus_resources(zdev, &zbus->resources); + + if (zbus->bus) { + if (!zbus->multifunction) { + WARN_ONCE(1, "zbus is not multifunction\n"); + goto error_bus; + } + if (!zdev->rid_available) { + WARN_ONCE(1, "rid_available not set for multifunction\n"); + goto error_bus; + } + rc = zpci_bus_add_device(zbus, zdev); + if (rc) + goto error_bus; + } else if (zdev->devfn == 0) { + if (zbus->multifunction && !zdev->rid_available) { + WARN_ONCE(1, "rid_available not set on function 0 for multifunction\n"); + goto error_bus; + } + rc = zpci_bus_scan(zbus, (u16)zdev->uid, ops); + if (rc) + goto error_bus; + zpci_bus_add_devices(zbus); + rc = zpci_init_slot(zdev); + if (rc) + goto error_bus; + zdev->has_hp_slot = 1; + zbus->multifunction = zdev->rid_available; + zbus->max_bus_speed = zdev->max_bus_speed; + } else { + zbus->multifunction = 1; + } + + return 0; + +error_bus: + zpci_nb_devices--; + zbus->function[zdev->devfn] = NULL; +error: + pr_err("Adding PCI function %08x failed\n", zdev->fid); + zpci_bus_put(zbus); + return rc; +} + +void zpci_bus_device_unregister(struct zpci_dev *zdev) +{ + struct zpci_bus *zbus = zdev->zbus; + + zpci_nb_devices--; + zbus->function[zdev->devfn] = NULL; + zpci_bus_put(zbus); +} diff --git a/arch/s390/pci/pci_bus.h b/arch/s390/pci/pci_bus.h new file mode 100644 index 000000000000..4972433df458 --- /dev/null +++ b/arch/s390/pci/pci_bus.h @@ -0,0 +1,44 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright IBM Corp. 2020 + * + * Author(s): + * Pierre Morel <pmorel@linux.ibm.com> + * + */ + +int zpci_bus_device_register(struct zpci_dev *zdev, struct pci_ops *ops); +void zpci_bus_device_unregister(struct zpci_dev *zdev); +int zpci_bus_init(void); + +void zpci_release_device(struct kref *kref); +static inline void zpci_zdev_put(struct zpci_dev *zdev) +{ + kref_put(&zdev->kref, zpci_release_device); +} + +int zpci_alloc_domain(int domain); +void zpci_free_domain(int domain); +int zpci_setup_bus_resources(struct zpci_dev *zdev, + struct list_head *resources); + +static inline struct zpci_dev *get_zdev_by_bus(struct pci_bus *bus, + unsigned int devfn) +{ + struct zpci_bus *zbus = bus->sysdata; + + return (devfn >= ZPCI_FUNCTIONS_PER_BUS) ? NULL : zbus->function[devfn]; +} + +#ifdef CONFIG_PCI_IOV +static inline void zpci_remove_virtfn(struct pci_dev *pdev, int vfn) +{ + + pci_lock_rescan_remove(); + /* Linux' vfid's start at 0 vfn at 1 */ + pci_iov_remove_virtfn(pdev->physfn, vfn - 1); + pci_unlock_rescan_remove(); +} +#else /* CONFIG_PCI_IOV */ +static inline void zpci_remove_virtfn(struct pci_dev *pdev, int vfn) {} +#endif /* CONFIG_PCI_IOV */ diff --git a/arch/s390/pci/pci_clp.c b/arch/s390/pci/pci_clp.c index ea794ae755ae..7e735f41a0a6 100644 --- a/arch/s390/pci/pci_clp.c +++ b/arch/s390/pci/pci_clp.c @@ -155,8 +155,13 @@ static int clp_store_query_pci_fn(struct zpci_dev *zdev, zdev->pfgid = response->pfgid; zdev->pft = response->pft; zdev->vfn = response->vfn; + zdev->port = response->port; zdev->uid = response->uid; zdev->fmb_length = sizeof(u32) * response->fmb_len; + zdev->rid_available = response->rid_avail; + zdev->is_physfn = response->is_physfn; + if (!s390_pci_no_rid && zdev->rid_available) + zdev->devfn = response->rid & ZPCI_RID_MASK_DEVFN; memcpy(zdev->pfip, response->pfip, sizeof(zdev->pfip)); if (response->util_str_avail) { @@ -309,14 +314,13 @@ out: int clp_disable_fh(struct zpci_dev *zdev) { - u32 fh = zdev->fh; int rc; if (!zdev_enabled(zdev)) return 0; rc = clp_set_pci_fn(zdev, 0, CLP_SET_DISABLE_PCI_FN); - zpci_dbg(3, "dis fid:%x, fh:%x, rc:%d\n", zdev->fid, fh, rc); + zpci_dbg(3, "dis fid:%x, fh:%x, rc:%d\n", zdev->fid, zdev->fh, rc); return rc; } diff --git a/arch/s390/pci/pci_event.c b/arch/s390/pci/pci_event.c index 8d6ee4af4230..d9ae7456dd4c 100644 --- a/arch/s390/pci/pci_event.c +++ b/arch/s390/pci/pci_event.c @@ -14,6 +14,8 @@ #include <asm/pci_debug.h> #include <asm/sclp.h> +#include "pci_bus.h" + /* Content Code Description for PCI Function Error */ struct zpci_ccdf_err { u32 reserved1; @@ -53,7 +55,7 @@ static void __zpci_event_error(struct zpci_ccdf_err *ccdf) zpci_err_hex(ccdf, sizeof(*ccdf)); if (zdev) - pdev = pci_get_slot(zdev->bus, ZPCI_DEVFN); + pdev = pci_get_slot(zdev->zbus->bus, zdev->devfn); pr_err("%s: Event 0x%x reports an error for PCI function 0x%x\n", pdev ? pci_name(pdev) : "n/a", ccdf->pec, ccdf->fid); @@ -78,42 +80,48 @@ static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf) enum zpci_state state; int ret; - if (zdev) - pdev = pci_get_slot(zdev->bus, ZPCI_DEVFN); + if (zdev && zdev->zbus && zdev->zbus->bus) + pdev = pci_get_slot(zdev->zbus->bus, zdev->devfn); - pr_info("%s: Event 0x%x reconfigured PCI function 0x%x\n", - pdev ? pci_name(pdev) : "n/a", ccdf->pec, ccdf->fid); zpci_err("avail CCDF:\n"); zpci_err_hex(ccdf, sizeof(*ccdf)); switch (ccdf->pec) { case 0x0301: /* Reserved|Standby -> Configured */ if (!zdev) { - ret = clp_add_pci_device(ccdf->fid, ccdf->fh, 0); - if (ret) - break; - zdev = get_zdev_by_fid(ccdf->fid); + ret = clp_add_pci_device(ccdf->fid, ccdf->fh, 1); + break; } - if (!zdev || zdev->state != ZPCI_FN_STATE_STANDBY) + /* the configuration request may be stale */ + if (zdev->state != ZPCI_FN_STATE_STANDBY) break; - zdev->state = ZPCI_FN_STATE_CONFIGURED; zdev->fh = ccdf->fh; + zdev->state = ZPCI_FN_STATE_CONFIGURED; ret = zpci_enable_device(zdev); if (ret) break; + + pdev = pci_scan_single_device(zdev->zbus->bus, zdev->devfn); + if (!pdev) + break; + + pci_bus_add_device(pdev); pci_lock_rescan_remove(); - pci_rescan_bus(zdev->bus); + pci_bus_add_devices(zdev->zbus->bus); pci_unlock_rescan_remove(); break; case 0x0302: /* Reserved -> Standby */ - if (!zdev) + if (!zdev) { clp_add_pci_device(ccdf->fid, ccdf->fh, 0); + break; + } + zdev->fh = ccdf->fh; break; case 0x0303: /* Deconfiguration requested */ if (!zdev) break; if (pdev) - pci_stop_and_remove_bus_device_locked(pdev); + zpci_remove_device(zdev); ret = zpci_disable_device(zdev); if (ret) @@ -132,7 +140,7 @@ static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf) /* Give the driver a hint that the function is * already unusable. */ pdev->error_state = pci_channel_io_perm_failure; - pci_stop_and_remove_bus_device_locked(pdev); + zpci_remove_device(zdev); } zdev->fh = ccdf->fh; @@ -140,7 +148,7 @@ static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf) zdev->state = ZPCI_FN_STATE_STANDBY; if (!clp_get_state(ccdf->fid, &state) && state == ZPCI_FN_STATE_RESERVED) { - zpci_remove_device(zdev); + zpci_zdev_put(zdev); } break; case 0x0306: /* 0x308 or 0x302 for multiple devices */ @@ -149,12 +157,11 @@ static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf) case 0x0308: /* Standby -> Reserved */ if (!zdev) break; - zpci_remove_device(zdev); + zpci_zdev_put(zdev); break; default: break; } - pci_dev_put(pdev); } void zpci_event_availability(void *data) diff --git a/arch/s390/pci/pci_mmio.c b/arch/s390/pci/pci_mmio.c index 020a2c514d96..401cf670a243 100644 --- a/arch/s390/pci/pci_mmio.c +++ b/arch/s390/pci/pci_mmio.c @@ -125,7 +125,7 @@ static long get_pfn(unsigned long user_addr, unsigned long access, struct vm_area_struct *vma; long ret; - down_read(¤t->mm->mmap_sem); + mmap_read_lock(current->mm); ret = -EINVAL; vma = find_vma(current->mm, user_addr); if (!vma) @@ -135,7 +135,7 @@ static long get_pfn(unsigned long user_addr, unsigned long access, goto out; ret = follow_pfn(vma, user_addr, pfn); out: - up_read(¤t->mm->mmap_sem); + mmap_read_unlock(current->mm); return ret; } @@ -155,10 +155,12 @@ SYSCALL_DEFINE3(s390_pci_mmio_write, unsigned long, mmio_addr, return -EINVAL; /* - * Only support read access to MIO capable devices on a MIO enabled - * system. Otherwise we would have to check for every address if it is - * a special ZPCI_ADDR and we would have to do a get_pfn() which we - * don't need for MIO capable devices. + * We only support write access to MIO capable devices if we are on + * a MIO enabled system. Otherwise we would have to check for every + * address if it is a special ZPCI_ADDR and would have to do + * a get_pfn() which we don't need for MIO capable devices. Currently + * ISM devices are the only devices without MIO support and there is no + * known need for accessing these from userspace. */ if (static_branch_likely(&have_mio)) { ret = __memcpy_toio_inuser((void __iomem *) mmio_addr, @@ -282,10 +284,12 @@ SYSCALL_DEFINE3(s390_pci_mmio_read, unsigned long, mmio_addr, return -EINVAL; /* - * Only support write access to MIO capable devices on a MIO enabled - * system. Otherwise we would have to check for every address if it is - * a special ZPCI_ADDR and we would have to do a get_pfn() which we - * don't need for MIO capable devices. + * We only support read access to MIO capable devices if we are on + * a MIO enabled system. Otherwise we would have to check for every + * address if it is a special ZPCI_ADDR and would have to do + * a get_pfn() which we don't need for MIO capable devices. Currently + * ISM devices are the only devices without MIO support and there is no + * known need for accessing these from userspace. */ if (static_branch_likely(&have_mio)) { ret = __memcpy_fromio_inuser( diff --git a/arch/s390/pci/pci_sysfs.c b/arch/s390/pci/pci_sysfs.c index 215f17437a4f..5c028bee91b9 100644 --- a/arch/s390/pci/pci_sysfs.c +++ b/arch/s390/pci/pci_sysfs.c @@ -33,6 +33,7 @@ zpci_attr(pchid, "0x%04x\n", pchid); zpci_attr(pfgid, "0x%02x\n", pfgid); zpci_attr(vfn, "0x%04x\n", vfn); zpci_attr(pft, "0x%02x\n", pft); +zpci_attr(port, "%d\n", port); zpci_attr(uid, "0x%x\n", uid); zpci_attr(segment0, "0x%02x\n", pfip[0]); zpci_attr(segment1, "0x%02x\n", pfip[1]); @@ -88,7 +89,7 @@ static ssize_t recover_store(struct device *dev, struct device_attribute *attr, ret = zpci_enable_device(zdev); if (ret) goto out; - pci_rescan_bus(zdev->bus); + pci_rescan_bus(zdev->zbus->bus); } out: pci_unlock_rescan_remove(); @@ -142,6 +143,7 @@ static struct attribute *zpci_dev_attrs[] = { &dev_attr_pchid.attr, &dev_attr_pfgid.attr, &dev_attr_pft.attr, + &dev_attr_port.attr, &dev_attr_vfn.attr, &dev_attr_uid.attr, &dev_attr_recover.attr, |