diff options
Diffstat (limited to 'arch/s390')
110 files changed, 1872 insertions, 1360 deletions
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 3a55f493c7da..3be9c832dec1 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -10,9 +10,6 @@ config LOCKDEP_SUPPORT config STACKTRACE_SUPPORT def_bool y -config HAVE_LATENCYTOP_SUPPORT - def_bool y - config RWSEM_GENERIC_SPINLOCK bool @@ -66,6 +63,7 @@ config S390 def_bool y select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE select ARCH_HAS_DEBUG_STRICT_USER_COPY_CHECKS + select ARCH_HAS_DEVMEM_IS_ALLOWED select ARCH_HAS_ELF_RANDOMIZE select ARCH_HAS_GCOV_PROFILE_ALL select ARCH_HAS_SG_CHAIN @@ -166,8 +164,7 @@ config SCHED_OMIT_FRAME_POINTER config PGTABLE_LEVELS int - default 4 if 64BIT - default 2 + default 4 source "init/Kconfig" @@ -390,9 +387,6 @@ config HOTPLUG_CPU can be controlled through /sys/devices/system/cpu/cpu#. Say N if you want to disable CPU hotplug. -config SCHED_SMT - def_bool n - # Some NUMA nodes have memory ranges that span # other nodes. Even though a pfn is valid and # between a node's start and end pfns, it may not @@ -403,7 +397,7 @@ config NODES_SPAN_OTHER_NODES config NUMA bool "NUMA support" - depends on SMP && 64BIT && SCHED_TOPOLOGY + depends on SMP && SCHED_TOPOLOGY default n help Enable NUMA support @@ -463,6 +457,9 @@ config EMU_SIZE endmenu +config SCHED_SMT + def_bool n + config SCHED_MC def_bool n @@ -582,7 +579,6 @@ config QDIO menuconfig PCI bool "PCI support" - select HAVE_DMA_ATTRS select PCI_MSI select IOMMU_SUPPORT help diff --git a/arch/s390/Kconfig.debug b/arch/s390/Kconfig.debug index c56878e1245f..26c5d5beb4be 100644 --- a/arch/s390/Kconfig.debug +++ b/arch/s390/Kconfig.debug @@ -5,18 +5,6 @@ config TRACE_IRQFLAGS_SUPPORT source "lib/Kconfig.debug" -config STRICT_DEVMEM - def_bool y - prompt "Filter access to /dev/mem" - ---help--- - This option restricts access to /dev/mem. If this option is - disabled, you allow userspace access to all memory, including - kernel and userspace memory. Accidental memory access is likely - to be disastrous. - Memory access is required for experts who want to debug the kernel. - - If you are unsure, say Y. - config S390_PTDUMP bool "Export kernel pagetable layout to userspace via debugfs" depends on DEBUG_KERNEL diff --git a/arch/s390/Makefile b/arch/s390/Makefile index e8d4423e4f85..224b42734f0d 100644 --- a/arch/s390/Makefile +++ b/arch/s390/Makefile @@ -106,6 +106,7 @@ drivers-y += drivers/s390/ drivers-$(CONFIG_OPROFILE) += arch/s390/oprofile/ boot := arch/s390/boot +tools := arch/s390/tools all: image bzImage @@ -124,9 +125,17 @@ vdso_install: archclean: $(Q)$(MAKE) $(clean)=$(boot) + $(Q)$(MAKE) $(clean)=$(tools) + +archprepare: + $(Q)$(MAKE) $(build)=$(tools) include/generated/facilities.h # Don't use tabs in echo arguments define archhelp echo '* image - Kernel image for IPL ($(boot)/image)' echo '* bzImage - Compressed kernel image for IPL ($(boot)/bzImage)' + echo ' install - Install kernel using' + echo ' (your) ~/bin/$(INSTALLKERNEL) or' + echo ' (distribution) /sbin/$(INSTALLKERNEL) or' + echo ' install to $$(INSTALL_PATH)' endef diff --git a/arch/s390/configs/default_defconfig b/arch/s390/configs/default_defconfig index ed7da281df66..0ac42cc4f880 100644 --- a/arch/s390/configs/default_defconfig +++ b/arch/s390/configs/default_defconfig @@ -10,28 +10,35 @@ CONFIG_TASKSTATS=y CONFIG_TASK_DELAY_ACCT=y CONFIG_TASK_XACCT=y CONFIG_TASK_IO_ACCOUNTING=y -CONFIG_RCU_FAST_NO_HZ=y CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y CONFIG_NUMA_BALANCING=y CONFIG_CGROUP_FREEZER=y +CONFIG_CGROUP_PIDS=y CONFIG_CGROUP_DEVICE=y CONFIG_CPUSETS=y CONFIG_CGROUP_CPUACCT=y +CONFIG_MEMCG=y +CONFIG_MEMCG_SWAP=y +CONFIG_MEMCG_KMEM=y +CONFIG_CGROUP_HUGETLB=y CONFIG_CGROUP_PERF=y CONFIG_CFS_BANDWIDTH=y CONFIG_RT_GROUP_SCHED=y CONFIG_BLK_CGROUP=y CONFIG_NAMESPACES=y +CONFIG_USER_NS=y CONFIG_SCHED_AUTOGROUP=y CONFIG_BLK_DEV_INITRD=y CONFIG_EXPERT=y CONFIG_BPF_SYSCALL=y +CONFIG_USERFAULTFD=y # CONFIG_COMPAT_BRK is not set CONFIG_PROFILING=y CONFIG_OPROFILE=m CONFIG_KPROBES=y CONFIG_JUMP_LABEL=y +CONFIG_STATIC_KEYS_SELFTEST=y CONFIG_MODULES=y CONFIG_MODULE_FORCE_LOAD=y CONFIG_MODULE_UNLOAD=y @@ -64,7 +71,6 @@ CONFIG_HOTPLUG_PCI=y CONFIG_HOTPLUG_PCI_S390=y CONFIG_CHSC_SCH=y CONFIG_CRASH_DUMP=y -# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set CONFIG_BINFMT_MISC=m CONFIG_HIBERNATION=y CONFIG_NET=y @@ -106,7 +112,6 @@ CONFIG_TCP_CONG_LP=m CONFIG_TCP_CONG_VENO=m CONFIG_TCP_CONG_YEAH=m CONFIG_TCP_CONG_ILLINOIS=m -CONFIG_IPV6=y CONFIG_IPV6_ROUTER_PREF=y CONFIG_INET6_AH=m CONFIG_INET6_ESP=m @@ -457,19 +462,9 @@ CONFIG_INFINIBAND=m CONFIG_INFINIBAND_USER_ACCESS=m CONFIG_MLX4_INFINIBAND=m CONFIG_VIRTIO_BALLOON=m -# CONFIG_IOMMU_SUPPORT is not set -CONFIG_EXT2_FS=y -CONFIG_EXT2_FS_XATTR=y -CONFIG_EXT2_FS_POSIX_ACL=y -CONFIG_EXT2_FS_SECURITY=y -CONFIG_EXT3_FS=y -# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set -CONFIG_EXT3_FS_POSIX_ACL=y -CONFIG_EXT3_FS_SECURITY=y CONFIG_EXT4_FS=y CONFIG_EXT4_FS_POSIX_ACL=y CONFIG_EXT4_FS_SECURITY=y -CONFIG_JBD_DEBUG=y CONFIG_JBD2_DEBUG=y CONFIG_JFS_FS=m CONFIG_JFS_POSIX_ACL=y @@ -490,7 +485,7 @@ CONFIG_QUOTA_NETLINK_INTERFACE=y CONFIG_QFMT_V1=m CONFIG_QFMT_V2=m CONFIG_AUTOFS4_FS=m -CONFIG_FUSE_FS=m +CONFIG_FUSE_FS=y CONFIG_CUSE=m CONFIG_FSCACHE=m CONFIG_CACHEFILES=m @@ -542,10 +537,11 @@ CONFIG_DLM=m CONFIG_PRINTK_TIME=y CONFIG_DYNAMIC_DEBUG=y CONFIG_DEBUG_INFO=y -# CONFIG_ENABLE_MUST_CHECK is not set CONFIG_FRAME_WARN=1024 CONFIG_READABLE_ASM=y CONFIG_UNUSED_SYMBOLS=y +CONFIG_HEADERS_CHECK=y +CONFIG_DEBUG_SECTION_MISMATCH=y CONFIG_MAGIC_SYSRQ=y CONFIG_DEBUG_PAGEALLOC=y CONFIG_DEBUG_OBJECTS=y @@ -588,6 +584,7 @@ CONFIG_FAILSLAB=y CONFIG_FAIL_PAGE_ALLOC=y CONFIG_FAIL_MAKE_REQUEST=y CONFIG_FAIL_IO_TIMEOUT=y +CONFIG_FAIL_FUTEX=y CONFIG_FAULT_INJECTION_DEBUG_FS=y CONFIG_FAULT_INJECTION_STACKTRACE_FILTER=y CONFIG_LATENCYTOP=y diff --git a/arch/s390/configs/gcov_defconfig b/arch/s390/configs/gcov_defconfig index 9858b14cde1e..a31dcd56f7c0 100644 --- a/arch/s390/configs/gcov_defconfig +++ b/arch/s390/configs/gcov_defconfig @@ -10,21 +10,27 @@ CONFIG_TASKSTATS=y CONFIG_TASK_DELAY_ACCT=y CONFIG_TASK_XACCT=y CONFIG_TASK_IO_ACCOUNTING=y -CONFIG_RCU_FAST_NO_HZ=y CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y CONFIG_NUMA_BALANCING=y CONFIG_CGROUP_FREEZER=y +CONFIG_CGROUP_PIDS=y CONFIG_CGROUP_DEVICE=y CONFIG_CPUSETS=y CONFIG_CGROUP_CPUACCT=y +CONFIG_MEMCG=y +CONFIG_MEMCG_SWAP=y +CONFIG_MEMCG_KMEM=y +CONFIG_CGROUP_HUGETLB=y CONFIG_CGROUP_PERF=y CONFIG_BLK_CGROUP=y CONFIG_NAMESPACES=y +CONFIG_USER_NS=y CONFIG_SCHED_AUTOGROUP=y CONFIG_BLK_DEV_INITRD=y CONFIG_EXPERT=y CONFIG_BPF_SYSCALL=y +CONFIG_USERFAULTFD=y # CONFIG_COMPAT_BRK is not set CONFIG_PROFILING=y CONFIG_OPROFILE=m @@ -61,7 +67,6 @@ CONFIG_HOTPLUG_PCI=y CONFIG_HOTPLUG_PCI_S390=y CONFIG_CHSC_SCH=y CONFIG_CRASH_DUMP=y -# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set CONFIG_BINFMT_MISC=m CONFIG_HIBERNATION=y CONFIG_NET=y @@ -103,7 +108,6 @@ CONFIG_TCP_CONG_LP=m CONFIG_TCP_CONG_VENO=m CONFIG_TCP_CONG_YEAH=m CONFIG_TCP_CONG_ILLINOIS=m -CONFIG_IPV6=y CONFIG_IPV6_ROUTER_PREF=y CONFIG_INET6_AH=m CONFIG_INET6_ESP=m @@ -453,19 +457,9 @@ CONFIG_INFINIBAND=m CONFIG_INFINIBAND_USER_ACCESS=m CONFIG_MLX4_INFINIBAND=m CONFIG_VIRTIO_BALLOON=m -# CONFIG_IOMMU_SUPPORT is not set -CONFIG_EXT2_FS=y -CONFIG_EXT2_FS_XATTR=y -CONFIG_EXT2_FS_POSIX_ACL=y -CONFIG_EXT2_FS_SECURITY=y -CONFIG_EXT3_FS=y -# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set -CONFIG_EXT3_FS_POSIX_ACL=y -CONFIG_EXT3_FS_SECURITY=y CONFIG_EXT4_FS=y CONFIG_EXT4_FS_POSIX_ACL=y CONFIG_EXT4_FS_SECURITY=y -CONFIG_JBD_DEBUG=y CONFIG_JBD2_DEBUG=y CONFIG_JFS_FS=m CONFIG_JFS_POSIX_ACL=y @@ -485,7 +479,7 @@ CONFIG_QUOTA_NETLINK_INTERFACE=y CONFIG_QFMT_V1=m CONFIG_QFMT_V2=m CONFIG_AUTOFS4_FS=m -CONFIG_FUSE_FS=m +CONFIG_FUSE_FS=y CONFIG_CUSE=m CONFIG_FSCACHE=m CONFIG_CACHEFILES=m @@ -550,6 +544,7 @@ CONFIG_NOTIFIER_ERROR_INJECTION=m CONFIG_CPU_NOTIFIER_ERROR_INJECT=m CONFIG_PM_NOTIFIER_ERROR_INJECT=m CONFIG_LATENCYTOP=y +CONFIG_DEBUG_STRICT_USER_COPY_CHECKS=y CONFIG_BLK_DEV_IO_TRACE=y # CONFIG_KPROBE_EVENT is not set CONFIG_LKDTM=m @@ -557,6 +552,7 @@ CONFIG_RBTREE_TEST=m CONFIG_INTERVAL_TREE_TEST=m CONFIG_PERCPU_TEST=m CONFIG_ATOMIC64_SELFTEST=y +CONFIG_TEST_BPF=m # CONFIG_STRICT_DEVMEM is not set CONFIG_S390_PTDUMP=y CONFIG_ENCRYPTED_KEYS=m diff --git a/arch/s390/configs/performance_defconfig b/arch/s390/configs/performance_defconfig index 7f14f80717d4..7b73bf353345 100644 --- a/arch/s390/configs/performance_defconfig +++ b/arch/s390/configs/performance_defconfig @@ -10,22 +10,28 @@ CONFIG_TASKSTATS=y CONFIG_TASK_DELAY_ACCT=y CONFIG_TASK_XACCT=y CONFIG_TASK_IO_ACCOUNTING=y -CONFIG_RCU_FAST_NO_HZ=y CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y CONFIG_NUMA_BALANCING=y # CONFIG_NUMA_BALANCING_DEFAULT_ENABLED is not set CONFIG_CGROUP_FREEZER=y +CONFIG_CGROUP_PIDS=y CONFIG_CGROUP_DEVICE=y CONFIG_CPUSETS=y CONFIG_CGROUP_CPUACCT=y +CONFIG_MEMCG=y +CONFIG_MEMCG_SWAP=y +CONFIG_MEMCG_KMEM=y +CONFIG_CGROUP_HUGETLB=y CONFIG_CGROUP_PERF=y CONFIG_BLK_CGROUP=y CONFIG_NAMESPACES=y +CONFIG_USER_NS=y CONFIG_SCHED_AUTOGROUP=y CONFIG_BLK_DEV_INITRD=y CONFIG_EXPERT=y CONFIG_BPF_SYSCALL=y +CONFIG_USERFAULTFD=y # CONFIG_COMPAT_BRK is not set CONFIG_PROFILING=y CONFIG_OPROFILE=m @@ -61,7 +67,6 @@ CONFIG_HOTPLUG_PCI=y CONFIG_HOTPLUG_PCI_S390=y CONFIG_CHSC_SCH=y CONFIG_CRASH_DUMP=y -# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set CONFIG_BINFMT_MISC=m CONFIG_HIBERNATION=y CONFIG_NET=y @@ -103,7 +108,6 @@ CONFIG_TCP_CONG_LP=m CONFIG_TCP_CONG_VENO=m CONFIG_TCP_CONG_YEAH=m CONFIG_TCP_CONG_ILLINOIS=m -CONFIG_IPV6=y CONFIG_IPV6_ROUTER_PREF=y CONFIG_INET6_AH=m CONFIG_INET6_ESP=m @@ -453,19 +457,9 @@ CONFIG_INFINIBAND=m CONFIG_INFINIBAND_USER_ACCESS=m CONFIG_MLX4_INFINIBAND=m CONFIG_VIRTIO_BALLOON=m -# CONFIG_IOMMU_SUPPORT is not set -CONFIG_EXT2_FS=y -CONFIG_EXT2_FS_XATTR=y -CONFIG_EXT2_FS_POSIX_ACL=y -CONFIG_EXT2_FS_SECURITY=y -CONFIG_EXT3_FS=y -# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set -CONFIG_EXT3_FS_POSIX_ACL=y -CONFIG_EXT3_FS_SECURITY=y CONFIG_EXT4_FS=y CONFIG_EXT4_FS_POSIX_ACL=y CONFIG_EXT4_FS_SECURITY=y -CONFIG_JBD_DEBUG=y CONFIG_JBD2_DEBUG=y CONFIG_JFS_FS=m CONFIG_JFS_POSIX_ACL=y @@ -485,7 +479,7 @@ CONFIG_QUOTA_NETLINK_INTERFACE=y CONFIG_QFMT_V1=m CONFIG_QFMT_V2=m CONFIG_AUTOFS4_FS=m -CONFIG_FUSE_FS=m +CONFIG_FUSE_FS=y CONFIG_CUSE=m CONFIG_FSCACHE=m CONFIG_CACHEFILES=m @@ -546,6 +540,7 @@ CONFIG_TIMER_STATS=y CONFIG_RCU_TORTURE_TEST=m CONFIG_RCU_CPU_STALL_TIMEOUT=60 CONFIG_LATENCYTOP=y +CONFIG_DEBUG_STRICT_USER_COPY_CHECKS=y CONFIG_SCHED_TRACER=y CONFIG_FTRACE_SYSCALLS=y CONFIG_STACK_TRACER=y @@ -554,6 +549,7 @@ CONFIG_UPROBE_EVENT=y CONFIG_LKDTM=m CONFIG_PERCPU_TEST=m CONFIG_ATOMIC64_SELFTEST=y +CONFIG_TEST_BPF=m # CONFIG_STRICT_DEVMEM is not set CONFIG_S390_PTDUMP=y CONFIG_ENCRYPTED_KEYS=m diff --git a/arch/s390/configs/zfcpdump_defconfig b/arch/s390/configs/zfcpdump_defconfig index 92805d604173..1719843a55a2 100644 --- a/arch/s390/configs/zfcpdump_defconfig +++ b/arch/s390/configs/zfcpdump_defconfig @@ -23,8 +23,6 @@ CONFIG_CRASH_DUMP=y # CONFIG_SECCOMP is not set CONFIG_NET=y # CONFIG_IUCV is not set -CONFIG_ATM=y -CONFIG_ATM_LANE=y CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_DEVTMPFS=y # CONFIG_FIRMWARE_IN_KERNEL is not set @@ -54,14 +52,10 @@ CONFIG_RAW_DRIVER=y # CONFIG_S390_VMUR is not set # CONFIG_HID is not set # CONFIG_IOMMU_SUPPORT is not set -CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=y -# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set -CONFIG_EXT4_FS=y -CONFIG_EXT4_FS_POSIX_ACL=y -CONFIG_EXT4_FS_SECURITY=y +# CONFIG_DNOTIFY is not set # CONFIG_INOTIFY_USER is not set CONFIG_CONFIGFS_FS=y +# CONFIG_MISC_FILESYSTEMS is not set CONFIG_PRINTK_TIME=y CONFIG_DEBUG_INFO=y CONFIG_DEBUG_FS=y diff --git a/arch/s390/defconfig b/arch/s390/defconfig index 9256b48e7e43..e24f2af4c73b 100644 --- a/arch/s390/defconfig +++ b/arch/s390/defconfig @@ -11,22 +11,31 @@ CONFIG_TASK_IO_ACCOUNTING=y CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y CONFIG_CGROUPS=y +CONFIG_CGROUP_FREEZER=y +CONFIG_CGROUP_PIDS=y +CONFIG_CGROUP_DEVICE=y CONFIG_CPUSETS=y CONFIG_CGROUP_CPUACCT=y CONFIG_MEMCG=y CONFIG_MEMCG_SWAP=y +CONFIG_MEMCG_KMEM=y +CONFIG_CGROUP_HUGETLB=y +CONFIG_CGROUP_PERF=y CONFIG_CGROUP_SCHED=y CONFIG_RT_GROUP_SCHED=y CONFIG_BLK_CGROUP=y CONFIG_NAMESPACES=y +CONFIG_USER_NS=y CONFIG_BLK_DEV_INITRD=y CONFIG_EXPERT=y CONFIG_BPF_SYSCALL=y +CONFIG_USERFAULTFD=y # CONFIG_COMPAT_BRK is not set CONFIG_PROFILING=y CONFIG_OPROFILE=y CONFIG_KPROBES=y CONFIG_JUMP_LABEL=y +CONFIG_STATIC_KEYS_SELFTEST=y CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y CONFIG_MODVERSIONS=y @@ -37,6 +46,7 @@ CONFIG_DEFAULT_DEADLINE=y CONFIG_LIVEPATCH=y CONFIG_MARCH_Z196=y CONFIG_NR_CPUS=256 +CONFIG_NUMA=y CONFIG_HZ_100=y CONFIG_MEMORY_HOTPLUG=y CONFIG_MEMORY_HOTREMOVE=y @@ -52,7 +62,6 @@ CONFIG_NET_KEY=y CONFIG_INET=y CONFIG_IP_MULTICAST=y # CONFIG_INET_LRO is not set -CONFIG_IPV6=y CONFIG_L2TP=m CONFIG_L2TP_DEBUGFS=m CONFIG_VLAN_8021Q=y @@ -89,10 +98,26 @@ CONFIG_BLK_DEV_SR_VENDOR=y CONFIG_CHR_DEV_SG=y CONFIG_SCSI_CONSTANTS=y CONFIG_SCSI_LOGGING=y -CONFIG_SCSI_SCAN_ASYNC=y CONFIG_SCSI_FC_ATTRS=y CONFIG_ZFCP=y CONFIG_SCSI_VIRTIO=y +CONFIG_MD=y +CONFIG_MD_LINEAR=m +CONFIG_MD_RAID0=m +CONFIG_MD_MULTIPATH=m +CONFIG_BLK_DEV_DM=y +CONFIG_DM_CRYPT=m +CONFIG_DM_SNAPSHOT=m +CONFIG_DM_MIRROR=m +CONFIG_DM_LOG_USERSPACE=m +CONFIG_DM_RAID=m +CONFIG_DM_ZERO=m +CONFIG_DM_MULTIPATH=m +CONFIG_DM_MULTIPATH_QL=m +CONFIG_DM_MULTIPATH_ST=m +CONFIG_DM_UEVENT=y +CONFIG_DM_VERITY=m +CONFIG_DM_SWITCH=m CONFIG_NETDEVICES=y CONFIG_BONDING=m CONFIG_DUMMY=m @@ -137,7 +162,6 @@ CONFIG_DEBUG_PI_LIST=y CONFIG_DEBUG_SG=y CONFIG_DEBUG_NOTIFIERS=y CONFIG_RCU_CPU_STALL_TIMEOUT=60 -# CONFIG_RCU_CPU_STALL_INFO is not set CONFIG_RCU_TRACE=y CONFIG_LATENCYTOP=y CONFIG_DEBUG_STRICT_USER_COPY_CHECKS=y diff --git a/arch/s390/hypfs/inode.c b/arch/s390/hypfs/inode.c index b2e5902bd8f4..0f3da2cb2bd6 100644 --- a/arch/s390/hypfs/inode.c +++ b/arch/s390/hypfs/inode.c @@ -67,7 +67,7 @@ static void hypfs_remove(struct dentry *dentry) struct dentry *parent; parent = dentry->d_parent; - mutex_lock(&d_inode(parent)->i_mutex); + inode_lock(d_inode(parent)); if (simple_positive(dentry)) { if (d_is_dir(dentry)) simple_rmdir(d_inode(parent), dentry); @@ -76,7 +76,7 @@ static void hypfs_remove(struct dentry *dentry) } d_delete(dentry); dput(dentry); - mutex_unlock(&d_inode(parent)->i_mutex); + inode_unlock(d_inode(parent)); } static void hypfs_delete_tree(struct dentry *root) @@ -331,7 +331,7 @@ static struct dentry *hypfs_create_file(struct dentry *parent, const char *name, struct dentry *dentry; struct inode *inode; - mutex_lock(&d_inode(parent)->i_mutex); + inode_lock(d_inode(parent)); dentry = lookup_one_len(name, parent, strlen(name)); if (IS_ERR(dentry)) { dentry = ERR_PTR(-ENOMEM); @@ -359,7 +359,7 @@ static struct dentry *hypfs_create_file(struct dentry *parent, const char *name, d_instantiate(dentry, inode); dget(dentry); fail: - mutex_unlock(&d_inode(parent)->i_mutex); + inode_unlock(d_inode(parent)); return dentry; } diff --git a/arch/s390/include/asm/barrier.h b/arch/s390/include/asm/barrier.h index d68e11e0df5e..5c8db3ce61c8 100644 --- a/arch/s390/include/asm/barrier.h +++ b/arch/s390/include/asm/barrier.h @@ -26,26 +26,18 @@ #define wmb() barrier() #define dma_rmb() mb() #define dma_wmb() mb() -#define smp_mb() mb() -#define smp_rmb() rmb() -#define smp_wmb() wmb() +#define __smp_mb() mb() +#define __smp_rmb() rmb() +#define __smp_wmb() wmb() -#define read_barrier_depends() do { } while (0) -#define smp_read_barrier_depends() do { } while (0) - -#define smp_mb__before_atomic() smp_mb() -#define smp_mb__after_atomic() smp_mb() - -#define smp_store_mb(var, value) do { WRITE_ONCE(var, value); mb(); } while (0) - -#define smp_store_release(p, v) \ +#define __smp_store_release(p, v) \ do { \ compiletime_assert_atomic_type(*p); \ barrier(); \ WRITE_ONCE(*p, v); \ } while (0) -#define smp_load_acquire(p) \ +#define __smp_load_acquire(p) \ ({ \ typeof(*p) ___p1 = READ_ONCE(*p); \ compiletime_assert_atomic_type(*p); \ @@ -53,4 +45,9 @@ do { \ ___p1; \ }) +#define __smp_mb__before_atomic() barrier() +#define __smp_mb__after_atomic() barrier() + +#include <asm-generic/barrier.h> + #endif /* __ASM_BARRIER_H */ diff --git a/arch/s390/include/asm/compat.h b/arch/s390/include/asm/compat.h index d350ed9d0fbb..352f7bdaf11f 100644 --- a/arch/s390/include/asm/compat.h +++ b/arch/s390/include/asm/compat.h @@ -284,7 +284,7 @@ static inline compat_uptr_t ptr_to_compat(void __user *uptr) static inline int is_compat_task(void) { - return is_32bit_task(); + return test_thread_flag(TIF_31BIT); } static inline void __user *arch_compat_alloc_user_space(long len) diff --git a/arch/s390/include/asm/crw.h b/arch/s390/include/asm/crw.h index 7c31d3e25cd1..bcb9cd2a730a 100644 --- a/arch/s390/include/asm/crw.h +++ b/arch/s390/include/asm/crw.h @@ -52,18 +52,4 @@ void crw_wait_for_channel_report(void); #define CRW_ERC_PERRI 0x07 /* perm. error, facility init */ #define CRW_ERC_PMOD 0x08 /* installed parameters modified */ -static inline int stcrw(struct crw *pcrw) -{ - int ccode; - - asm volatile( - " stcrw 0(%2)\n" - " ipm %0\n" - " srl %0,28\n" - : "=d" (ccode), "=m" (*pcrw) - : "a" (pcrw) - : "cc" ); - return ccode; -} - #endif /* _ASM_S390_CRW_H */ diff --git a/arch/s390/include/asm/dma-mapping.h b/arch/s390/include/asm/dma-mapping.h index b3fd54d93dd2..e64bfcb9702f 100644 --- a/arch/s390/include/asm/dma-mapping.h +++ b/arch/s390/include/asm/dma-mapping.h @@ -23,8 +23,6 @@ static inline void dma_cache_sync(struct device *dev, void *vaddr, size_t size, { } -#include <asm-generic/dma-mapping-common.h> - static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size) { if (!dev->dma_mask) diff --git a/arch/s390/include/asm/elf.h b/arch/s390/include/asm/elf.h index bab6739a1154..563ab9f44874 100644 --- a/arch/s390/include/asm/elf.h +++ b/arch/s390/include/asm/elf.h @@ -104,6 +104,9 @@ #define HWCAP_S390_TE 1024 #define HWCAP_S390_VXRS 2048 +/* Internal bits, not exposed via elf */ +#define HWCAP_INT_SIE 1UL + /* * These are used to set parameters in the core dumps. */ @@ -126,6 +129,7 @@ typedef s390_regs elf_gregset_t; typedef s390_fp_regs compat_elf_fpregset_t; typedef s390_compat_regs compat_elf_gregset_t; +#include <linux/compat.h> #include <linux/sched.h> /* for task_struct */ #include <asm/mmu_context.h> @@ -159,7 +163,7 @@ extern unsigned int vdso_enabled; the loader. We need to make sure that it is out of the way of the program that it will "exec", and that there is sufficient room for the brk. 64-bit tasks are aligned to 4GB. */ -#define ELF_ET_DYN_BASE (is_32bit_task() ? \ +#define ELF_ET_DYN_BASE (is_compat_task() ? \ (STACK_TOP / 3 * 2) : \ (STACK_TOP / 3 * 2) & ~((1UL << 32) - 1)) @@ -169,6 +173,10 @@ extern unsigned int vdso_enabled; extern unsigned long elf_hwcap; #define ELF_HWCAP (elf_hwcap) +/* Internal hardware capabilities, not exposed via elf */ + +extern unsigned long int_hwcap; + /* This yields a string that ld.so will use to load implementation specific libraries for optimization. This is more specific in intent than poking at uname or /proc/cpuinfo. @@ -212,9 +220,9 @@ do { \ * of up to 1GB. For 31-bit processes the virtual address space is limited, * use no alignment and limit the randomization to 8MB. */ -#define BRK_RND_MASK (is_32bit_task() ? 0x7ffUL : 0x3ffffUL) -#define MMAP_RND_MASK (is_32bit_task() ? 0x7ffUL : 0x3ff80UL) -#define MMAP_ALIGN_MASK (is_32bit_task() ? 0 : 0x7fUL) +#define BRK_RND_MASK (is_compat_task() ? 0x7ffUL : 0x3ffffUL) +#define MMAP_RND_MASK (is_compat_task() ? 0x7ffUL : 0x3ff80UL) +#define MMAP_ALIGN_MASK (is_compat_task() ? 0 : 0x7fUL) #define STACK_RND_MASK MMAP_RND_MASK #define ARCH_DLINFO \ @@ -229,6 +237,4 @@ struct linux_binprm; #define ARCH_HAS_SETUP_ADDITIONAL_PAGES 1 int arch_setup_additional_pages(struct linux_binprm *, int); -void *fill_cpu_elf_notes(void *ptr, struct save_area *sa, __vector128 *vxrs); - #endif diff --git a/arch/s390/include/asm/facilities_src.h b/arch/s390/include/asm/facilities_src.h new file mode 100644 index 000000000000..4917728e5828 --- /dev/null +++ b/arch/s390/include/asm/facilities_src.h @@ -0,0 +1,58 @@ +/* + * Copyright IBM Corp. 2015 + */ + +#ifndef S390_GEN_FACILITIES_C +#error "This file can only be included by gen_facilities.c" +#endif + +#include <linux/kconfig.h> + +struct facility_def { + char *name; + int *bits; +}; + +static struct facility_def facility_defs[] = { + { + /* + * FACILITIES_ALS contains the list of facilities that are + * required to run a kernel that is compiled e.g. with + * -march=<machine>. + */ + .name = "FACILITIES_ALS", + .bits = (int[]){ +#ifdef CONFIG_HAVE_MARCH_Z900_FEATURES + 0, /* N3 instructions */ + 1, /* z/Arch mode installed */ +#endif +#ifdef CONFIG_HAVE_MARCH_Z990_FEATURES + 18, /* long displacement facility */ +#endif +#ifdef CONFIG_HAVE_MARCH_Z9_109_FEATURES + 7, /* stfle */ + 17, /* message security assist */ + 21, /* extended-immediate facility */ + 25, /* store clock fast */ +#endif +#ifdef CONFIG_HAVE_MARCH_Z10_FEATURES + 27, /* mvcos */ + 32, /* compare and swap and store */ + 33, /* compare and swap and store 2 */ + 34, /* general extension facility */ + 35, /* execute extensions */ +#endif +#ifdef CONFIG_HAVE_MARCH_Z196_FEATURES + 45, /* fast-BCR, etc. */ +#endif +#ifdef CONFIG_HAVE_MARCH_ZEC12_FEATURES + 49, /* misc-instruction-extensions */ + 52, /* interlocked facility 2 */ +#endif +#ifdef CONFIG_HAVE_MARCH_Z13_FEATURES + 53, /* load-and-zero-rightmost-byte, etc. */ +#endif + -1 /* END */ + } + }, +}; diff --git a/arch/s390/include/asm/facility.h b/arch/s390/include/asm/facility.h index 0aa6a7ed95a3..09b406db7529 100644 --- a/arch/s390/include/asm/facility.h +++ b/arch/s390/include/asm/facility.h @@ -7,6 +7,10 @@ #ifndef __ASM_FACILITY_H #define __ASM_FACILITY_H +#include <generated/facilities.h> + +#ifndef __ASSEMBLY__ + #include <linux/string.h> #include <linux/preempt.h> #include <asm/lowcore.h> @@ -30,6 +34,12 @@ static inline int __test_facility(unsigned long nr, void *facilities) */ static inline int test_facility(unsigned long nr) { + unsigned long facilities_als[] = { FACILITIES_ALS }; + + if (__builtin_constant_p(nr) && nr < sizeof(facilities_als) * 8) { + if (__test_facility(nr, &facilities_als)) + return 1; + } return __test_facility(nr, &S390_lowcore.stfle_fac_list); } @@ -44,10 +54,8 @@ static inline void stfle(u64 *stfle_fac_list, int size) preempt_disable(); asm volatile( - " .insn s,0xb2b10000,0(0)\n" /* stfl */ - "0:\n" - EX_TABLE(0b, 0b) - : "+m" (S390_lowcore.stfl_fac_list)); + " stfl 0(0)\n" + : "=m" (S390_lowcore.stfl_fac_list)); nr = 4; /* bytes stored by stfl */ memcpy(stfle_fac_list, &S390_lowcore.stfl_fac_list, 4); if (S390_lowcore.stfl_fac_list & 0x01000000) { @@ -64,4 +72,5 @@ static inline void stfle(u64 *stfle_fac_list, int size) preempt_enable(); } +#endif /* __ASSEMBLY__ */ #endif /* __ASM_FACILITY_H */ diff --git a/arch/s390/include/asm/fpu/internal.h b/arch/s390/include/asm/fpu/internal.h index 2559b16da525..629c90865a07 100644 --- a/arch/s390/include/asm/fpu/internal.h +++ b/arch/s390/include/asm/fpu/internal.h @@ -12,21 +12,13 @@ #include <asm/ctl_reg.h> #include <asm/fpu/types.h> -static inline void save_vx_regs_safe(__vector128 *vxrs) +static inline void save_vx_regs(__vector128 *vxrs) { - unsigned long cr0, flags; - - flags = arch_local_irq_save(); - __ctl_store(cr0, 0, 0); - __ctl_set_bit(0, 17); - __ctl_set_bit(0, 18); asm volatile( " la 1,%0\n" " .word 0xe70f,0x1000,0x003e\n" /* vstm 0,15,0(1) */ " .word 0xe70f,0x1100,0x0c3e\n" /* vstm 16,31,256(1) */ : "=Q" (*(struct vx_array *) vxrs) : : "1"); - __ctl_load(cr0, 0, 0); - arch_local_irq_restore(flags); } static inline void convert_vx_to_fp(freg_t *fprs, __vector128 *vxrs) @@ -48,6 +40,7 @@ static inline void convert_fp_to_vx(__vector128 *vxrs, freg_t *fprs) static inline void fpregs_store(_s390_fp_regs *fpregs, struct fpu *fpu) { fpregs->pad = 0; + fpregs->fpc = fpu->fpc; if (MACHINE_HAS_VX) convert_vx_to_fp((freg_t *)&fpregs->fprs, fpu->vxrs); else @@ -57,6 +50,7 @@ static inline void fpregs_store(_s390_fp_regs *fpregs, struct fpu *fpu) static inline void fpregs_load(_s390_fp_regs *fpregs, struct fpu *fpu) { + fpu->fpc = fpregs->fpc; if (MACHINE_HAS_VX) convert_fp_to_vx(fpu->vxrs, (freg_t *)&fpregs->fprs); else diff --git a/arch/s390/include/asm/ipl.h b/arch/s390/include/asm/ipl.h index 86634e71b69f..6fc44dca193e 100644 --- a/arch/s390/include/asm/ipl.h +++ b/arch/s390/include/asm/ipl.h @@ -87,14 +87,12 @@ struct ipl_parameter_block { * IPL validity flags */ extern u32 ipl_flags; -extern u32 dump_prefix_page; -struct dump_save_areas { - struct save_area_ext **areas; - int count; -}; - -extern struct dump_save_areas dump_save_areas; +struct save_area; +struct save_area * __init save_area_alloc(bool is_boot_cpu); +struct save_area * __init save_area_boot_cpu(void); +void __init save_area_add_regs(struct save_area *, void *regs); +void __init save_area_add_vxrs(struct save_area *, __vector128 *vxrs); extern void do_reipl(void); extern void do_halt(void); @@ -176,7 +174,7 @@ enum diag308_rc { extern int diag308(unsigned long subcode, void *addr); extern void diag308_reset(void); -extern void store_status(void); +extern void store_status(void (*fn)(void *), void *data); extern void lgr_info_log(void); #endif /* _ASM_S390_IPL_H */ diff --git a/arch/s390/include/asm/irqflags.h b/arch/s390/include/asm/irqflags.h index 16aa0c779e07..595a275c36f8 100644 --- a/arch/s390/include/asm/irqflags.h +++ b/arch/s390/include/asm/irqflags.h @@ -8,6 +8,8 @@ #include <linux/types.h> +#define ARCH_IRQ_ENABLED (3UL << (BITS_PER_LONG - 8)) + /* store then OR system mask. */ #define __arch_local_irq_stosm(__or) \ ({ \ @@ -54,14 +56,17 @@ static inline notrace void arch_local_irq_enable(void) __arch_local_irq_stosm(0x03); } +/* This only restores external and I/O interrupt state */ static inline notrace void arch_local_irq_restore(unsigned long flags) { - __arch_local_irq_ssm(flags); + /* only disabled->disabled and disabled->enabled is valid */ + if (flags & ARCH_IRQ_ENABLED) + arch_local_irq_enable(); } static inline notrace bool arch_irqs_disabled_flags(unsigned long flags) { - return !(flags & (3UL << (BITS_PER_LONG - 8))); + return !(flags & ARCH_IRQ_ENABLED); } static inline notrace bool arch_irqs_disabled(void) diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index efaac2c3bb77..8959ebb6d2c9 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -25,7 +25,9 @@ #include <asm/fpu/api.h> #include <asm/isc.h> -#define KVM_MAX_VCPUS 64 +#define KVM_S390_BSCA_CPU_SLOTS 64 +#define KVM_S390_ESCA_CPU_SLOTS 248 +#define KVM_MAX_VCPUS KVM_S390_ESCA_CPU_SLOTS #define KVM_USER_MEM_SLOTS 32 /* @@ -37,12 +39,41 @@ #define KVM_IRQCHIP_NUM_PINS 4096 #define KVM_HALT_POLL_NS_DEFAULT 0 +/* s390-specific vcpu->requests bit members */ +#define KVM_REQ_ENABLE_IBS 8 +#define KVM_REQ_DISABLE_IBS 9 + #define SIGP_CTRL_C 0x80 #define SIGP_CTRL_SCN_MASK 0x3f -struct sca_entry { +union bsca_sigp_ctrl { + __u8 value; + struct { + __u8 c : 1; + __u8 r : 1; + __u8 scn : 6; + }; +} __packed; + +union esca_sigp_ctrl { + __u16 value; + struct { + __u8 c : 1; + __u8 reserved: 7; + __u8 scn; + }; +} __packed; + +struct esca_entry { + union esca_sigp_ctrl sigp_ctrl; + __u16 reserved1[3]; + __u64 sda; + __u64 reserved2[6]; +} __packed; + +struct bsca_entry { __u8 reserved0; - __u8 sigp_ctrl; + union bsca_sigp_ctrl sigp_ctrl; __u16 reserved[3]; __u64 sda; __u64 reserved2[2]; @@ -57,14 +88,22 @@ union ipte_control { }; }; -struct sca_block { +struct bsca_block { union ipte_control ipte_control; __u64 reserved[5]; __u64 mcn; __u64 reserved2; - struct sca_entry cpu[64]; + struct bsca_entry cpu[KVM_S390_BSCA_CPU_SLOTS]; } __attribute__((packed)); +struct esca_block { + union ipte_control ipte_control; + __u64 reserved1[7]; + __u64 mcn[4]; + __u64 reserved2[20]; + struct esca_entry cpu[KVM_S390_ESCA_CPU_SLOTS]; +} __packed; + #define CPUSTAT_STOPPED 0x80000000 #define CPUSTAT_WAIT 0x10000000 #define CPUSTAT_ECALL_PEND 0x08000000 @@ -182,7 +221,8 @@ struct kvm_s390_sie_block { __u64 pp; /* 0x01de */ __u8 reserved1e6[2]; /* 0x01e6 */ __u64 itdba; /* 0x01e8 */ - __u8 reserved1f0[16]; /* 0x01f0 */ + __u64 riccbd; /* 0x01f0 */ + __u8 reserved1f8[8]; /* 0x01f8 */ } __attribute__((packed)); struct kvm_s390_itdb { @@ -506,7 +546,6 @@ struct kvm_vcpu_arch { struct kvm_s390_sie_block *sie_block; unsigned int host_acrs[NUM_ACRS]; struct fpu host_fpregs; - struct fpu guest_fpregs; struct kvm_s390_local_interrupt local_int; struct hrtimer ckc_timer; struct kvm_s390_pgm_info pgm; @@ -585,11 +624,14 @@ struct kvm_s390_crypto_cb { }; struct kvm_arch{ - struct sca_block *sca; + void *sca; + int use_esca; + rwlock_t sca_lock; debug_info_t *dbf; struct kvm_s390_float_interrupt float_int; struct kvm_device *flic; struct gmap *gmap; + unsigned long mem_limit; int css_support; int use_irqchip; int use_cmma; diff --git a/arch/s390/include/asm/livepatch.h b/arch/s390/include/asm/livepatch.h index 7aa799134a11..a52b6cca873d 100644 --- a/arch/s390/include/asm/livepatch.h +++ b/arch/s390/include/asm/livepatch.h @@ -37,7 +37,7 @@ static inline void klp_arch_set_pc(struct pt_regs *regs, unsigned long ip) regs->psw.addr = ip; } #else -#error Live patching support is disabled; check CONFIG_LIVEPATCH +#error Include linux/livepatch.h, not asm/livepatch.h #endif #endif diff --git a/arch/s390/include/asm/lowcore.h b/arch/s390/include/asm/lowcore.h index afe1cfebf1a4..d79ba7cf75b0 100644 --- a/arch/s390/include/asm/lowcore.h +++ b/arch/s390/include/asm/lowcore.h @@ -16,28 +16,7 @@ #define LC_ORDER 1 #define LC_PAGES 2 -struct save_area { - u64 fp_regs[16]; - u64 gp_regs[16]; - u8 psw[16]; - u8 pad1[8]; - u32 pref_reg; - u32 fp_ctrl_reg; - u8 pad2[4]; - u32 tod_reg; - u64 timer; - u64 clk_cmp; - u8 pad3[8]; - u32 acc_regs[16]; - u64 ctrl_regs[16]; -} __packed; - -struct save_area_ext { - struct save_area sa; - __vector128 vx_regs[32]; -}; - -struct _lowcore { +struct lowcore { __u8 pad_0x0000[0x0014-0x0000]; /* 0x0000 */ __u32 ipl_parmblock_ptr; /* 0x0014 */ __u8 pad_0x0018[0x0080-0x0018]; /* 0x0018 */ @@ -204,9 +183,9 @@ struct _lowcore { __u8 vector_save_area[1024]; /* 0x1c00 */ } __packed; -#define S390_lowcore (*((struct _lowcore *) 0)) +#define S390_lowcore (*((struct lowcore *) 0)) -extern struct _lowcore *lowcore_ptr[]; +extern struct lowcore *lowcore_ptr[]; static inline void set_prefix(__u32 address) { diff --git a/arch/s390/include/asm/mmu_context.h b/arch/s390/include/asm/mmu_context.h index fb1b93ea3e3f..e485817f7b1a 100644 --- a/arch/s390/include/asm/mmu_context.h +++ b/arch/s390/include/asm/mmu_context.h @@ -15,17 +15,25 @@ static inline int init_new_context(struct task_struct *tsk, struct mm_struct *mm) { + spin_lock_init(&mm->context.list_lock); + INIT_LIST_HEAD(&mm->context.pgtable_list); + INIT_LIST_HEAD(&mm->context.gmap_list); cpumask_clear(&mm->context.cpu_attach_mask); atomic_set(&mm->context.attach_count, 0); mm->context.flush_mm = 0; - mm->context.asce_bits = _ASCE_TABLE_LENGTH | _ASCE_USER_BITS; - mm->context.asce_bits |= _ASCE_TYPE_REGION3; #ifdef CONFIG_PGSTE mm->context.alloc_pgste = page_table_allocate_pgste; mm->context.has_pgste = 0; mm->context.use_skey = 0; #endif - mm->context.asce_limit = STACK_TOP_MAX; + if (mm->context.asce_limit == 0) { + /* context created by exec, set asce limit to 4TB */ + mm->context.asce_bits = _ASCE_TABLE_LENGTH | + _ASCE_USER_BITS | _ASCE_TYPE_REGION3; + mm->context.asce_limit = STACK_TOP_MAX; + } else if (mm->context.asce_limit == (1UL << 31)) { + mm_inc_nr_pmds(mm); + } crst_table_init((unsigned long *) mm->pgd, pgd_entry_type(mm)); return 0; } @@ -111,8 +119,6 @@ static inline void activate_mm(struct mm_struct *prev, static inline void arch_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm) { - if (oldmm->context.asce_limit < mm->context.asce_limit) - crst_table_downgrade(mm, oldmm->context.asce_limit); } static inline void arch_exit_mmap(struct mm_struct *mm) diff --git a/arch/s390/include/asm/os_info.h b/arch/s390/include/asm/os_info.h index 295f2c4f1c96..943475382d51 100644 --- a/arch/s390/include/asm/os_info.h +++ b/arch/s390/include/asm/os_info.h @@ -38,7 +38,7 @@ u32 os_info_csum(struct os_info *os_info); #ifdef CONFIG_CRASH_DUMP void *os_info_old_entry(int nr, unsigned long *size); -int copy_from_oldmem(void *dest, void *src, size_t count); +int copy_oldmem_kernel(void *dst, void *src, size_t count); #else static inline void *os_info_old_entry(int nr, unsigned long *size) { diff --git a/arch/s390/include/asm/pci_dma.h b/arch/s390/include/asm/pci_dma.h index 1aac41e83ea1..92df3eb8d14e 100644 --- a/arch/s390/include/asm/pci_dma.h +++ b/arch/s390/include/asm/pci_dma.h @@ -23,6 +23,8 @@ enum zpci_ioat_dtype { #define ZPCI_IOTA_FS_2G 2 #define ZPCI_KEY (PAGE_DEFAULT_KEY << 5) +#define ZPCI_TABLE_SIZE_RT (1UL << 42) + #define ZPCI_IOTA_STO_FLAG (ZPCI_IOTA_IOT_ENABLED | ZPCI_KEY | ZPCI_IOTA_DT_ST) #define ZPCI_IOTA_RTTO_FLAG (ZPCI_IOTA_IOT_ENABLED | ZPCI_KEY | ZPCI_IOTA_DT_RT) #define ZPCI_IOTA_RSTO_FLAG (ZPCI_IOTA_IOT_ENABLED | ZPCI_KEY | ZPCI_IOTA_DT_RS) diff --git a/arch/s390/include/asm/pci_io.h b/arch/s390/include/asm/pci_io.h index 1a9a98de5bde..69aa18be61af 100644 --- a/arch/s390/include/asm/pci_io.h +++ b/arch/s390/include/asm/pci_io.h @@ -8,10 +8,13 @@ #include <asm/pci_insn.h> /* I/O Map */ -#define ZPCI_IOMAP_MAX_ENTRIES 0x7fff -#define ZPCI_IOMAP_ADDR_BASE 0x8000000000000000ULL -#define ZPCI_IOMAP_ADDR_IDX_MASK 0x7fff000000000000ULL -#define ZPCI_IOMAP_ADDR_OFF_MASK 0x0000ffffffffffffULL +#define ZPCI_IOMAP_SHIFT 48 +#define ZPCI_IOMAP_ADDR_BASE 0x8000000000000000UL +#define ZPCI_IOMAP_ADDR_OFF_MASK ((1UL << ZPCI_IOMAP_SHIFT) - 1) +#define ZPCI_IOMAP_MAX_ENTRIES \ + ((ULONG_MAX - ZPCI_IOMAP_ADDR_BASE + 1) / (1UL << ZPCI_IOMAP_SHIFT)) +#define ZPCI_IOMAP_ADDR_IDX_MASK \ + (~ZPCI_IOMAP_ADDR_OFF_MASK - ZPCI_IOMAP_ADDR_BASE) struct zpci_iomap_entry { u32 fh; @@ -21,8 +24,9 @@ struct zpci_iomap_entry { extern struct zpci_iomap_entry *zpci_iomap_start; +#define ZPCI_ADDR(idx) (ZPCI_IOMAP_ADDR_BASE | ((u64) idx << ZPCI_IOMAP_SHIFT)) #define ZPCI_IDX(addr) \ - (((__force u64) addr & ZPCI_IOMAP_ADDR_IDX_MASK) >> 48) + (((__force u64) addr & ZPCI_IOMAP_ADDR_IDX_MASK) >> ZPCI_IOMAP_SHIFT) #define ZPCI_OFFSET(addr) \ ((__force u64) addr & ZPCI_IOMAP_ADDR_OFF_MASK) diff --git a/arch/s390/include/asm/pgalloc.h b/arch/s390/include/asm/pgalloc.h index 7b7858f158b4..d7cc79fb6191 100644 --- a/arch/s390/include/asm/pgalloc.h +++ b/arch/s390/include/asm/pgalloc.h @@ -100,12 +100,26 @@ static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd) static inline pgd_t *pgd_alloc(struct mm_struct *mm) { - spin_lock_init(&mm->context.list_lock); - INIT_LIST_HEAD(&mm->context.pgtable_list); - INIT_LIST_HEAD(&mm->context.gmap_list); - return (pgd_t *) crst_table_alloc(mm); + unsigned long *table = crst_table_alloc(mm); + + if (!table) + return NULL; + if (mm->context.asce_limit == (1UL << 31)) { + /* Forking a compat process with 2 page table levels */ + if (!pgtable_pmd_page_ctor(virt_to_page(table))) { + crst_table_free(mm, table); + return NULL; + } + } + return (pgd_t *) table; +} + +static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd) +{ + if (mm->context.asce_limit == (1UL << 31)) + pgtable_pmd_page_dtor(virt_to_page(pgd)); + crst_table_free(mm, (unsigned long *) pgd); } -#define pgd_free(mm, pgd) crst_table_free(mm, (unsigned long *) pgd) static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd, pgtable_t pte) diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index 024f85f947ae..64ead8091248 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -286,7 +286,6 @@ static inline int is_module_addr(void *addr) #define _SEGMENT_ENTRY_DIRTY 0x2000 /* SW segment dirty bit */ #define _SEGMENT_ENTRY_YOUNG 0x1000 /* SW segment young bit */ -#define _SEGMENT_ENTRY_SPLIT 0x0800 /* THP splitting bit */ #define _SEGMENT_ENTRY_LARGE 0x0400 /* STE-format control, large page */ #define _SEGMENT_ENTRY_READ 0x0002 /* SW segment read bit */ #define _SEGMENT_ENTRY_WRITE 0x0001 /* SW segment write bit */ @@ -318,8 +317,6 @@ static inline int is_module_addr(void *addr) * SW-bits: y young, d dirty, r read, w write */ -#define _SEGMENT_ENTRY_SPLIT_BIT 11 /* THP splitting bit number */ - /* Page status table bits for virtualization */ #define PGSTE_ACC_BITS 0xf000000000000000UL #define PGSTE_FP_BIT 0x0800000000000000UL @@ -523,10 +520,6 @@ static inline int pmd_bad(pmd_t pmd) return (pmd_val(pmd) & ~_SEGMENT_ENTRY_BITS) != 0; } -#define __HAVE_ARCH_PMDP_SPLITTING_FLUSH -extern void pmdp_splitting_flush(struct vm_area_struct *vma, - unsigned long addr, pmd_t *pmdp); - #define __HAVE_ARCH_PMDP_SET_ACCESS_FLAGS extern int pmdp_set_access_flags(struct vm_area_struct *vma, unsigned long address, pmd_t *pmdp, @@ -1424,8 +1417,7 @@ static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot) if (pmd_large(pmd)) { pmd_val(pmd) &= _SEGMENT_ENTRY_ORIGIN_LARGE | _SEGMENT_ENTRY_DIRTY | _SEGMENT_ENTRY_YOUNG | - _SEGMENT_ENTRY_LARGE | _SEGMENT_ENTRY_SPLIT | - _SEGMENT_ENTRY_SOFT_DIRTY; + _SEGMENT_ENTRY_LARGE | _SEGMENT_ENTRY_SOFT_DIRTY; pmd_val(pmd) |= massage_pgprot_pmd(newprot); if (!(pmd_val(pmd) & _SEGMENT_ENTRY_DIRTY)) pmd_val(pmd) |= _SEGMENT_ENTRY_PROTECT; @@ -1533,12 +1525,6 @@ extern void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp, #define __HAVE_ARCH_PGTABLE_WITHDRAW extern pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp); -static inline int pmd_trans_splitting(pmd_t pmd) -{ - return (pmd_val(pmd) & _SEGMENT_ENTRY_LARGE) && - (pmd_val(pmd) & _SEGMENT_ENTRY_SPLIT); -} - static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr, pmd_t *pmdp, pmd_t entry) { diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h index b16c3d0a1b9f..1c4fe129486d 100644 --- a/arch/s390/include/asm/processor.h +++ b/arch/s390/include/asm/processor.h @@ -18,12 +18,14 @@ #define CIF_NOHZ_DELAY 2 /* delay HZ disable for a tick */ #define CIF_FPU 3 /* restore FPU registers */ #define CIF_IGNORE_IRQ 4 /* ignore interrupt (for udelay) */ +#define CIF_ENABLED_WAIT 5 /* in enabled wait state */ #define _CIF_MCCK_PENDING _BITUL(CIF_MCCK_PENDING) #define _CIF_ASCE _BITUL(CIF_ASCE) #define _CIF_NOHZ_DELAY _BITUL(CIF_NOHZ_DELAY) #define _CIF_FPU _BITUL(CIF_FPU) #define _CIF_IGNORE_IRQ _BITUL(CIF_IGNORE_IRQ) +#define _CIF_ENABLED_WAIT _BITUL(CIF_ENABLED_WAIT) #ifndef __ASSEMBLY__ @@ -52,6 +54,16 @@ static inline int test_cpu_flag(int flag) return !!(S390_lowcore.cpu_flags & (1UL << flag)); } +/* + * Test CIF flag of another CPU. The caller needs to ensure that + * CPU hotplug can not happen, e.g. by disabling preemption. + */ +static inline int test_cpu_flag_of(int flag, int cpu) +{ + struct lowcore *lc = lowcore_ptr[cpu]; + return !!(lc->cpu_flags & (1UL << flag)); +} + #define arch_needs_cpu() test_cpu_flag(CIF_NOHZ_DELAY) /* @@ -154,14 +166,14 @@ extern __vector128 init_task_fpu_regs[__NUM_VXRS]; */ #define start_thread(regs, new_psw, new_stackp) do { \ regs->psw.mask = PSW_USER_BITS | PSW_MASK_EA | PSW_MASK_BA; \ - regs->psw.addr = new_psw | PSW_ADDR_AMODE; \ + regs->psw.addr = new_psw; \ regs->gprs[15] = new_stackp; \ execve_tail(); \ } while (0) #define start_thread31(regs, new_psw, new_stackp) do { \ regs->psw.mask = PSW_USER_BITS | PSW_MASK_BA; \ - regs->psw.addr = new_psw | PSW_ADDR_AMODE; \ + regs->psw.addr = new_psw; \ regs->gprs[15] = new_stackp; \ crst_table_downgrade(current->mm, 1UL << 31); \ execve_tail(); \ diff --git a/arch/s390/include/asm/ptrace.h b/arch/s390/include/asm/ptrace.h index 37cbc50947f2..99bc456cc26a 100644 --- a/arch/s390/include/asm/ptrace.h +++ b/arch/s390/include/asm/ptrace.h @@ -24,25 +24,25 @@ PSW_MASK_PSTATE | PSW_ASC_PRIMARY) struct psw_bits { - unsigned long long : 1; - unsigned long long r : 1; /* PER-Mask */ - unsigned long long : 3; - unsigned long long t : 1; /* DAT Mode */ - unsigned long long i : 1; /* Input/Output Mask */ - unsigned long long e : 1; /* External Mask */ - unsigned long long key : 4; /* PSW Key */ - unsigned long long : 1; - unsigned long long m : 1; /* Machine-Check Mask */ - unsigned long long w : 1; /* Wait State */ - unsigned long long p : 1; /* Problem State */ - unsigned long long as : 2; /* Address Space Control */ - unsigned long long cc : 2; /* Condition Code */ - unsigned long long pm : 4; /* Program Mask */ - unsigned long long ri : 1; /* Runtime Instrumentation */ - unsigned long long : 6; - unsigned long long eaba : 2; /* Addressing Mode */ - unsigned long long : 31; - unsigned long long ia : 64;/* Instruction Address */ + unsigned long : 1; + unsigned long r : 1; /* PER-Mask */ + unsigned long : 3; + unsigned long t : 1; /* DAT Mode */ + unsigned long i : 1; /* Input/Output Mask */ + unsigned long e : 1; /* External Mask */ + unsigned long key : 4; /* PSW Key */ + unsigned long : 1; + unsigned long m : 1; /* Machine-Check Mask */ + unsigned long w : 1; /* Wait State */ + unsigned long p : 1; /* Problem State */ + unsigned long as : 2; /* Address Space Control */ + unsigned long cc : 2; /* Condition Code */ + unsigned long pm : 4; /* Program Mask */ + unsigned long ri : 1; /* Runtime Instrumentation */ + unsigned long : 6; + unsigned long eaba : 2; /* Addressing Mode */ + unsigned long : 31; + unsigned long ia : 64; /* Instruction Address */ }; enum { @@ -149,7 +149,7 @@ static inline int test_pt_regs_flag(struct pt_regs *regs, int flag) #define arch_has_block_step() (1) #define user_mode(regs) (((regs)->psw.mask & PSW_MASK_PSTATE) != 0) -#define instruction_pointer(regs) ((regs)->psw.addr & PSW_ADDR_INSN) +#define instruction_pointer(regs) ((regs)->psw.addr) #define user_stack_pointer(regs)((regs)->gprs[15]) #define profile_pc(regs) instruction_pointer(regs) @@ -161,7 +161,7 @@ static inline long regs_return_value(struct pt_regs *regs) static inline void instruction_pointer_set(struct pt_regs *regs, unsigned long val) { - regs->psw.addr = val | PSW_ADDR_AMODE; + regs->psw.addr = val; } int regs_query_register_offset(const char *name); @@ -171,7 +171,7 @@ unsigned long regs_get_kernel_stack_nth(struct pt_regs *regs, unsigned int n); static inline unsigned long kernel_stack_pointer(struct pt_regs *regs) { - return regs->gprs[15] & PSW_ADDR_INSN; + return regs->gprs[15]; } #endif /* __ASSEMBLY__ */ diff --git a/arch/s390/include/asm/reset.h b/arch/s390/include/asm/reset.h index 72786067b300..fe11fa88a0e0 100644 --- a/arch/s390/include/asm/reset.h +++ b/arch/s390/include/asm/reset.h @@ -15,6 +15,5 @@ struct reset_call { extern void register_reset_call(struct reset_call *reset); extern void unregister_reset_call(struct reset_call *reset); -extern void s390_reset_system(void (*fn_pre)(void), - void (*fn_post)(void *), void *data); +extern void s390_reset_system(void); #endif /* _ASM_S390_RESET_H */ diff --git a/arch/s390/include/asm/sclp.h b/arch/s390/include/asm/sclp.h index 821dde5f425d..bab456be9a4f 100644 --- a/arch/s390/include/asm/sclp.h +++ b/arch/s390/include/asm/sclp.h @@ -29,7 +29,10 @@ struct sclp_ipl_info { struct sclp_core_entry { u8 core_id; - u8 reserved0[2]; + u8 reserved0; + u8 : 4; + u8 sief2 : 1; + u8 : 3; u8 : 3; u8 siif : 1; u8 sigpif : 1; @@ -53,16 +56,19 @@ struct sclp_info { unsigned char has_sigpif : 1; unsigned char has_core_type : 1; unsigned char has_sprp : 1; + unsigned char has_hvs : 1; + unsigned char has_esca : 1; + unsigned char has_sief2 : 1; unsigned int ibc; unsigned int mtid; unsigned int mtid_cp; unsigned int mtid_prev; - unsigned long long rzm; - unsigned long long rnmax; - unsigned long long hamax; + unsigned long rzm; + unsigned long rnmax; + unsigned long hamax; unsigned int max_cores; unsigned long hsa_size; - unsigned long long facilities; + unsigned long facilities; }; extern struct sclp_info sclp; @@ -77,8 +83,9 @@ int sclp_chp_read_info(struct sclp_chp_info *info); void sclp_get_ipl_info(struct sclp_ipl_info *info); int sclp_pci_configure(u32 fid); int sclp_pci_deconfigure(u32 fid); -int memcpy_hsa(void *dest, unsigned long src, size_t count, int mode); +int memcpy_hsa_kernel(void *dest, unsigned long src, size_t count); +int memcpy_hsa_user(void __user *dest, unsigned long src, size_t count); void sclp_early_detect(void); -int _sclp_print_early(const char *); +void _sclp_print_early(const char *); #endif /* _ASM_S390_SCLP_H */ diff --git a/arch/s390/include/asm/setup.h b/arch/s390/include/asm/setup.h index 23537661da0e..69837225119e 100644 --- a/arch/s390/include/asm/setup.h +++ b/arch/s390/include/asm/setup.h @@ -12,27 +12,24 @@ #define PARMAREA 0x10400 /* - * Machine features detected in head.S + * Machine features detected in early.c */ #define MACHINE_FLAG_VM _BITUL(0) -#define MACHINE_FLAG_IEEE _BITUL(1) -#define MACHINE_FLAG_CSP _BITUL(2) -#define MACHINE_FLAG_MVPG _BITUL(3) -#define MACHINE_FLAG_DIAG44 _BITUL(4) +#define MACHINE_FLAG_KVM _BITUL(1) +#define MACHINE_FLAG_LPAR _BITUL(2) +#define MACHINE_FLAG_DIAG9C _BITUL(3) +#define MACHINE_FLAG_ESOP _BITUL(4) #define MACHINE_FLAG_IDTE _BITUL(5) -#define MACHINE_FLAG_DIAG9C _BITUL(6) -#define MACHINE_FLAG_KVM _BITUL(8) -#define MACHINE_FLAG_ESOP _BITUL(9) -#define MACHINE_FLAG_EDAT1 _BITUL(10) -#define MACHINE_FLAG_EDAT2 _BITUL(11) -#define MACHINE_FLAG_LPAR _BITUL(12) -#define MACHINE_FLAG_LPP _BITUL(13) -#define MACHINE_FLAG_TOPOLOGY _BITUL(14) -#define MACHINE_FLAG_TE _BITUL(15) -#define MACHINE_FLAG_TLB_LC _BITUL(17) -#define MACHINE_FLAG_VX _BITUL(18) -#define MACHINE_FLAG_CAD _BITUL(19) +#define MACHINE_FLAG_DIAG44 _BITUL(6) +#define MACHINE_FLAG_EDAT1 _BITUL(7) +#define MACHINE_FLAG_EDAT2 _BITUL(8) +#define MACHINE_FLAG_LPP _BITUL(9) +#define MACHINE_FLAG_TOPOLOGY _BITUL(10) +#define MACHINE_FLAG_TE _BITUL(11) +#define MACHINE_FLAG_TLB_LC _BITUL(12) +#define MACHINE_FLAG_VX _BITUL(13) +#define MACHINE_FLAG_CAD _BITUL(14) #define LPP_MAGIC _BITUL(31) #define LPP_PFAULT_PID_MASK _AC(0xffffffff, UL) diff --git a/arch/s390/include/asm/smp.h b/arch/s390/include/asm/smp.h index 5df26b11cf47..0cc383b9be7f 100644 --- a/arch/s390/include/asm/smp.h +++ b/arch/s390/include/asm/smp.h @@ -18,6 +18,7 @@ extern struct mutex smp_cpu_state_mutex; extern unsigned int smp_cpu_mt_shift; extern unsigned int smp_cpu_mtid; +extern __vector128 __initdata boot_cpu_vector_save_area[__NUM_VXRS]; extern int __cpu_up(unsigned int cpu, struct task_struct *tidle); @@ -55,7 +56,6 @@ static inline int smp_store_status(int cpu) { return 0; } static inline int smp_vcpu_scheduled(int cpu) { return 1; } static inline void smp_yield_cpu(int cpu) { } static inline void smp_fill_possible_mask(void) { } -static inline void smp_save_dump_cpus(void) { } #endif /* CONFIG_SMP */ diff --git a/arch/s390/include/asm/sysinfo.h b/arch/s390/include/asm/sysinfo.h index f7054a892d9e..2728114d5484 100644 --- a/arch/s390/include/asm/sysinfo.h +++ b/arch/s390/include/asm/sysinfo.h @@ -56,7 +56,12 @@ struct sysinfo_1_2_2 { char format; char reserved_0[1]; unsigned short acc_offset; - char reserved_1[20]; + unsigned char mt_installed :1; + unsigned char :2; + unsigned char mt_stid :5; + unsigned char :3; + unsigned char mt_gtid :5; + char reserved_1[18]; unsigned int nominal_cap; unsigned int secondary_cap; unsigned int capability; @@ -92,9 +97,13 @@ struct sysinfo_2_2_2 { char name[8]; unsigned int caf; char reserved_2[8]; - unsigned char mt_installed; - unsigned char mt_general; - unsigned char mt_psmtid; + unsigned char mt_installed :1; + unsigned char :2; + unsigned char mt_stid :5; + unsigned char :3; + unsigned char mt_gtid :5; + unsigned char :3; + unsigned char mt_psmtid :5; char reserved_3[5]; unsigned short cpus_dedicated; unsigned short cpus_shared; diff --git a/arch/s390/include/asm/thread_info.h b/arch/s390/include/asm/thread_info.h index 692b9247c019..2fffc2c27581 100644 --- a/arch/s390/include/asm/thread_info.h +++ b/arch/s390/include/asm/thread_info.h @@ -96,6 +96,4 @@ void arch_release_task_struct(struct task_struct *tsk); #define _TIF_31BIT _BITUL(TIF_31BIT) #define _TIF_SINGLE_STEP _BITUL(TIF_SINGLE_STEP) -#define is_32bit_task() (test_thread_flag(TIF_31BIT)) - #endif /* _ASM_THREAD_INFO_H */ diff --git a/arch/s390/include/asm/topology.h b/arch/s390/include/asm/topology.h index 94fc55fc72ce..6b53962e807e 100644 --- a/arch/s390/include/asm/topology.h +++ b/arch/s390/include/asm/topology.h @@ -7,7 +7,7 @@ struct sysinfo_15_1_x; struct cpu; -#ifdef CONFIG_SCHED_BOOK +#ifdef CONFIG_SCHED_TOPOLOGY struct cpu_topology_s390 { unsigned short thread_id; @@ -40,13 +40,13 @@ void store_topology(struct sysinfo_15_1_x *info); void topology_expect_change(void); const struct cpumask *cpu_coregroup_mask(int cpu); -#else /* CONFIG_SCHED_BOOK */ +#else /* CONFIG_SCHED_TOPOLOGY */ static inline void topology_schedule_update(void) { } static inline int topology_cpu_init(struct cpu *cpu) { return 0; } static inline void topology_expect_change(void) { } -#endif /* CONFIG_SCHED_BOOK */ +#endif /* CONFIG_SCHED_TOPOLOGY */ #define POLARIZATION_UNKNOWN (-1) #define POLARIZATION_HRZ (0) diff --git a/arch/s390/include/asm/vdso.h b/arch/s390/include/asm/vdso.h index 787acd4f9668..d0a2dbf2433d 100644 --- a/arch/s390/include/asm/vdso.h +++ b/arch/s390/include/asm/vdso.h @@ -38,12 +38,14 @@ struct vdso_data { struct vdso_per_cpu_data { __u64 ectg_timer_base; __u64 ectg_user_time; + __u32 cpu_nr; + __u32 node_id; }; extern struct vdso_data *vdso_data; -int vdso_alloc_per_cpu(struct _lowcore *lowcore); -void vdso_free_per_cpu(struct _lowcore *lowcore); +int vdso_alloc_per_cpu(struct lowcore *lowcore); +void vdso_free_per_cpu(struct lowcore *lowcore); #endif /* __ASSEMBLY__ */ #endif /* __S390_VDSO_H__ */ diff --git a/arch/s390/include/uapi/asm/kvm.h b/arch/s390/include/uapi/asm/kvm.h index ef1a5fcc6c66..fe84bd5fe7ce 100644 --- a/arch/s390/include/uapi/asm/kvm.h +++ b/arch/s390/include/uapi/asm/kvm.h @@ -66,6 +66,8 @@ struct kvm_s390_io_adapter_req { #define KVM_S390_VM_MEM_CLR_CMMA 1 #define KVM_S390_VM_MEM_LIMIT_SIZE 2 +#define KVM_S390_NO_MEM_LIMIT U64_MAX + /* kvm attributes for KVM_S390_VM_TOD */ #define KVM_S390_VM_TOD_LOW 0 #define KVM_S390_VM_TOD_HIGH 1 @@ -151,6 +153,7 @@ struct kvm_guest_debug_arch { #define KVM_SYNC_ARCH0 (1UL << 4) #define KVM_SYNC_PFAULT (1UL << 5) #define KVM_SYNC_VRS (1UL << 6) +#define KVM_SYNC_RICCB (1UL << 7) /* definition of registers in kvm_run */ struct kvm_sync_regs { __u64 prefix; /* prefix register */ @@ -168,6 +171,8 @@ struct kvm_sync_regs { __u64 vrs[32][2]; /* vector registers */ __u8 reserved[512]; /* for future vector expansion */ __u32 fpc; /* only valid with vector registers */ + __u8 padding[52]; /* riccb needs to be 64byte aligned */ + __u8 riccb[64]; /* runtime instrumentation controls block */ }; #define KVM_REG_S390_TODPR (KVM_REG_S390 | KVM_REG_SIZE_U32 | 0x1) diff --git a/arch/s390/include/uapi/asm/socket.h b/arch/s390/include/uapi/asm/socket.h index 296942d56e6a..d02e89d14fef 100644 --- a/arch/s390/include/uapi/asm/socket.h +++ b/arch/s390/include/uapi/asm/socket.h @@ -91,4 +91,7 @@ #define SO_ATTACH_BPF 50 #define SO_DETACH_BPF SO_DETACH_FILTER +#define SO_ATTACH_REUSEPORT_CBPF 51 +#define SO_ATTACH_REUSEPORT_EBPF 52 + #endif /* _ASM_SOCKET_H */ diff --git a/arch/s390/include/uapi/asm/unistd.h b/arch/s390/include/uapi/asm/unistd.h index 34ec202472c6..ab3aa6875a59 100644 --- a/arch/s390/include/uapi/asm/unistd.h +++ b/arch/s390/include/uapi/asm/unistd.h @@ -310,7 +310,8 @@ #define __NR_recvmsg 372 #define __NR_shutdown 373 #define __NR_mlock2 374 -#define NR_syscalls 375 +#define __NR_copy_file_range 375 +#define NR_syscalls 376 /* * There are some system calls that are not present on 64 bit, some diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile index dc167a23b920..2f5586ab8a6a 100644 --- a/arch/s390/kernel/Makefile +++ b/arch/s390/kernel/Makefile @@ -34,8 +34,10 @@ CFLAGS_sysinfo.o += -w # CFLAGS_REMOVE_sclp.o = $(CC_FLAGS_FTRACE) ifneq ($(CC_FLAGS_MARCH),-march=z900) -CFLAGS_REMOVE_sclp.o += $(CC_FLAGS_MARCH) -CFLAGS_sclp.o += -march=z900 +CFLAGS_REMOVE_sclp.o += $(CC_FLAGS_MARCH) +CFLAGS_sclp.o += -march=z900 +AFLAGS_REMOVE_head.o += $(CC_FLAGS_MARCH) +AFLAGS_head.o += -march=z900 endif GCOV_PROFILE_sclp.o := n @@ -50,7 +52,7 @@ extra-y += head.o head64.o vmlinux.lds obj-$(CONFIG_MODULES) += s390_ksyms.o module.o obj-$(CONFIG_SMP) += smp.o -obj-$(CONFIG_SCHED_BOOK) += topology.o +obj-$(CONFIG_SCHED_TOPOLOGY) += topology.o obj-$(CONFIG_HIBERNATION) += suspend.o swsusp.o obj-$(CONFIG_AUDIT) += audit.o compat-obj-$(CONFIG_AUDIT) += compat_audit.o diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c index 9cd248f637c7..53bbc9e8b281 100644 --- a/arch/s390/kernel/asm-offsets.c +++ b/arch/s390/kernel/asm-offsets.c @@ -80,6 +80,8 @@ int main(void) OFFSET(__VDSO_TK_SHIFT, vdso_data, tk_shift); OFFSET(__VDSO_ECTG_BASE, vdso_per_cpu_data, ectg_timer_base); OFFSET(__VDSO_ECTG_USER, vdso_per_cpu_data, ectg_user_time); + OFFSET(__VDSO_CPU_NR, vdso_per_cpu_data, cpu_nr); + OFFSET(__VDSO_NODE_ID, vdso_per_cpu_data, node_id); BLANK(); /* constants used by the vdso */ DEFINE(__CLOCK_REALTIME, CLOCK_REALTIME); @@ -97,95 +99,96 @@ int main(void) OFFSET(__TIMER_IDLE_EXIT, s390_idle_data, timer_idle_exit); BLANK(); /* hardware defined lowcore locations 0x000 - 0x1ff */ - OFFSET(__LC_EXT_PARAMS, _lowcore, ext_params); - OFFSET(__LC_EXT_CPU_ADDR, _lowcore, ext_cpu_addr); - OFFSET(__LC_EXT_INT_CODE, _lowcore, ext_int_code); - OFFSET(__LC_SVC_ILC, _lowcore, svc_ilc); - OFFSET(__LC_SVC_INT_CODE, _lowcore, svc_code); - OFFSET(__LC_PGM_ILC, _lowcore, pgm_ilc); - OFFSET(__LC_PGM_INT_CODE, _lowcore, pgm_code); - OFFSET(__LC_DATA_EXC_CODE, _lowcore, data_exc_code); - OFFSET(__LC_MON_CLASS_NR, _lowcore, mon_class_num); - OFFSET(__LC_PER_CODE, _lowcore, per_code); - OFFSET(__LC_PER_ATMID, _lowcore, per_atmid); - OFFSET(__LC_PER_ADDRESS, _lowcore, per_address); - OFFSET(__LC_EXC_ACCESS_ID, _lowcore, exc_access_id); - OFFSET(__LC_PER_ACCESS_ID, _lowcore, per_access_id); - OFFSET(__LC_OP_ACCESS_ID, _lowcore, op_access_id); - OFFSET(__LC_AR_MODE_ID, _lowcore, ar_mode_id); - OFFSET(__LC_TRANS_EXC_CODE, _lowcore, trans_exc_code); - OFFSET(__LC_MON_CODE, _lowcore, monitor_code); - OFFSET(__LC_SUBCHANNEL_ID, _lowcore, subchannel_id); - OFFSET(__LC_SUBCHANNEL_NR, _lowcore, subchannel_nr); - OFFSET(__LC_IO_INT_PARM, _lowcore, io_int_parm); - OFFSET(__LC_IO_INT_WORD, _lowcore, io_int_word); - OFFSET(__LC_STFL_FAC_LIST, _lowcore, stfl_fac_list); - OFFSET(__LC_MCCK_CODE, _lowcore, mcck_interruption_code); - OFFSET(__LC_MCCK_FAIL_STOR_ADDR, _lowcore, failing_storage_address); - OFFSET(__LC_LAST_BREAK, _lowcore, breaking_event_addr); - OFFSET(__LC_RST_OLD_PSW, _lowcore, restart_old_psw); - OFFSET(__LC_EXT_OLD_PSW, _lowcore, external_old_psw); - OFFSET(__LC_SVC_OLD_PSW, _lowcore, svc_old_psw); - OFFSET(__LC_PGM_OLD_PSW, _lowcore, program_old_psw); - OFFSET(__LC_MCK_OLD_PSW, _lowcore, mcck_old_psw); - OFFSET(__LC_IO_OLD_PSW, _lowcore, io_old_psw); - OFFSET(__LC_RST_NEW_PSW, _lowcore, restart_psw); - OFFSET(__LC_EXT_NEW_PSW, _lowcore, external_new_psw); - OFFSET(__LC_SVC_NEW_PSW, _lowcore, svc_new_psw); - OFFSET(__LC_PGM_NEW_PSW, _lowcore, program_new_psw); - OFFSET(__LC_MCK_NEW_PSW, _lowcore, mcck_new_psw); - OFFSET(__LC_IO_NEW_PSW, _lowcore, io_new_psw); + OFFSET(__LC_EXT_PARAMS, lowcore, ext_params); + OFFSET(__LC_EXT_CPU_ADDR, lowcore, ext_cpu_addr); + OFFSET(__LC_EXT_INT_CODE, lowcore, ext_int_code); + OFFSET(__LC_SVC_ILC, lowcore, svc_ilc); + OFFSET(__LC_SVC_INT_CODE, lowcore, svc_code); + OFFSET(__LC_PGM_ILC, lowcore, pgm_ilc); + OFFSET(__LC_PGM_INT_CODE, lowcore, pgm_code); + OFFSET(__LC_DATA_EXC_CODE, lowcore, data_exc_code); + OFFSET(__LC_MON_CLASS_NR, lowcore, mon_class_num); + OFFSET(__LC_PER_CODE, lowcore, per_code); + OFFSET(__LC_PER_ATMID, lowcore, per_atmid); + OFFSET(__LC_PER_ADDRESS, lowcore, per_address); + OFFSET(__LC_EXC_ACCESS_ID, lowcore, exc_access_id); + OFFSET(__LC_PER_ACCESS_ID, lowcore, per_access_id); + OFFSET(__LC_OP_ACCESS_ID, lowcore, op_access_id); + OFFSET(__LC_AR_MODE_ID, lowcore, ar_mode_id); + OFFSET(__LC_TRANS_EXC_CODE, lowcore, trans_exc_code); + OFFSET(__LC_MON_CODE, lowcore, monitor_code); + OFFSET(__LC_SUBCHANNEL_ID, lowcore, subchannel_id); + OFFSET(__LC_SUBCHANNEL_NR, lowcore, subchannel_nr); + OFFSET(__LC_IO_INT_PARM, lowcore, io_int_parm); + OFFSET(__LC_IO_INT_WORD, lowcore, io_int_word); + OFFSET(__LC_STFL_FAC_LIST, lowcore, stfl_fac_list); + OFFSET(__LC_STFLE_FAC_LIST, lowcore, stfle_fac_list); + OFFSET(__LC_MCCK_CODE, lowcore, mcck_interruption_code); + OFFSET(__LC_MCCK_FAIL_STOR_ADDR, lowcore, failing_storage_address); + OFFSET(__LC_LAST_BREAK, lowcore, breaking_event_addr); + OFFSET(__LC_RST_OLD_PSW, lowcore, restart_old_psw); + OFFSET(__LC_EXT_OLD_PSW, lowcore, external_old_psw); + OFFSET(__LC_SVC_OLD_PSW, lowcore, svc_old_psw); + OFFSET(__LC_PGM_OLD_PSW, lowcore, program_old_psw); + OFFSET(__LC_MCK_OLD_PSW, lowcore, mcck_old_psw); + OFFSET(__LC_IO_OLD_PSW, lowcore, io_old_psw); + OFFSET(__LC_RST_NEW_PSW, lowcore, restart_psw); + OFFSET(__LC_EXT_NEW_PSW, lowcore, external_new_psw); + OFFSET(__LC_SVC_NEW_PSW, lowcore, svc_new_psw); + OFFSET(__LC_PGM_NEW_PSW, lowcore, program_new_psw); + OFFSET(__LC_MCK_NEW_PSW, lowcore, mcck_new_psw); + OFFSET(__LC_IO_NEW_PSW, lowcore, io_new_psw); /* software defined lowcore locations 0x200 - 0xdff*/ - OFFSET(__LC_SAVE_AREA_SYNC, _lowcore, save_area_sync); - OFFSET(__LC_SAVE_AREA_ASYNC, _lowcore, save_area_async); - OFFSET(__LC_SAVE_AREA_RESTART, _lowcore, save_area_restart); - OFFSET(__LC_CPU_FLAGS, _lowcore, cpu_flags); - OFFSET(__LC_RETURN_PSW, _lowcore, return_psw); - OFFSET(__LC_RETURN_MCCK_PSW, _lowcore, return_mcck_psw); - OFFSET(__LC_SYNC_ENTER_TIMER, _lowcore, sync_enter_timer); - OFFSET(__LC_ASYNC_ENTER_TIMER, _lowcore, async_enter_timer); - OFFSET(__LC_MCCK_ENTER_TIMER, _lowcore, mcck_enter_timer); - OFFSET(__LC_EXIT_TIMER, _lowcore, exit_timer); - OFFSET(__LC_USER_TIMER, _lowcore, user_timer); - OFFSET(__LC_SYSTEM_TIMER, _lowcore, system_timer); - OFFSET(__LC_STEAL_TIMER, _lowcore, steal_timer); - OFFSET(__LC_LAST_UPDATE_TIMER, _lowcore, last_update_timer); - OFFSET(__LC_LAST_UPDATE_CLOCK, _lowcore, last_update_clock); - OFFSET(__LC_INT_CLOCK, _lowcore, int_clock); - OFFSET(__LC_MCCK_CLOCK, _lowcore, mcck_clock); - OFFSET(__LC_CURRENT, _lowcore, current_task); - OFFSET(__LC_THREAD_INFO, _lowcore, thread_info); - OFFSET(__LC_KERNEL_STACK, _lowcore, kernel_stack); - OFFSET(__LC_ASYNC_STACK, _lowcore, async_stack); - OFFSET(__LC_PANIC_STACK, _lowcore, panic_stack); - OFFSET(__LC_RESTART_STACK, _lowcore, restart_stack); - OFFSET(__LC_RESTART_FN, _lowcore, restart_fn); - OFFSET(__LC_RESTART_DATA, _lowcore, restart_data); - OFFSET(__LC_RESTART_SOURCE, _lowcore, restart_source); - OFFSET(__LC_USER_ASCE, _lowcore, user_asce); - OFFSET(__LC_LPP, _lowcore, lpp); - OFFSET(__LC_CURRENT_PID, _lowcore, current_pid); - OFFSET(__LC_PERCPU_OFFSET, _lowcore, percpu_offset); - OFFSET(__LC_VDSO_PER_CPU, _lowcore, vdso_per_cpu_data); - OFFSET(__LC_MACHINE_FLAGS, _lowcore, machine_flags); - OFFSET(__LC_GMAP, _lowcore, gmap); - OFFSET(__LC_PASTE, _lowcore, paste); + OFFSET(__LC_SAVE_AREA_SYNC, lowcore, save_area_sync); + OFFSET(__LC_SAVE_AREA_ASYNC, lowcore, save_area_async); + OFFSET(__LC_SAVE_AREA_RESTART, lowcore, save_area_restart); + OFFSET(__LC_CPU_FLAGS, lowcore, cpu_flags); + OFFSET(__LC_RETURN_PSW, lowcore, return_psw); + OFFSET(__LC_RETURN_MCCK_PSW, lowcore, return_mcck_psw); + OFFSET(__LC_SYNC_ENTER_TIMER, lowcore, sync_enter_timer); + OFFSET(__LC_ASYNC_ENTER_TIMER, lowcore, async_enter_timer); + OFFSET(__LC_MCCK_ENTER_TIMER, lowcore, mcck_enter_timer); + OFFSET(__LC_EXIT_TIMER, lowcore, exit_timer); + OFFSET(__LC_USER_TIMER, lowcore, user_timer); + OFFSET(__LC_SYSTEM_TIMER, lowcore, system_timer); + OFFSET(__LC_STEAL_TIMER, lowcore, steal_timer); + OFFSET(__LC_LAST_UPDATE_TIMER, lowcore, last_update_timer); + OFFSET(__LC_LAST_UPDATE_CLOCK, lowcore, last_update_clock); + OFFSET(__LC_INT_CLOCK, lowcore, int_clock); + OFFSET(__LC_MCCK_CLOCK, lowcore, mcck_clock); + OFFSET(__LC_CURRENT, lowcore, current_task); + OFFSET(__LC_THREAD_INFO, lowcore, thread_info); + OFFSET(__LC_KERNEL_STACK, lowcore, kernel_stack); + OFFSET(__LC_ASYNC_STACK, lowcore, async_stack); + OFFSET(__LC_PANIC_STACK, lowcore, panic_stack); + OFFSET(__LC_RESTART_STACK, lowcore, restart_stack); + OFFSET(__LC_RESTART_FN, lowcore, restart_fn); + OFFSET(__LC_RESTART_DATA, lowcore, restart_data); + OFFSET(__LC_RESTART_SOURCE, lowcore, restart_source); + OFFSET(__LC_USER_ASCE, lowcore, user_asce); + OFFSET(__LC_LPP, lowcore, lpp); + OFFSET(__LC_CURRENT_PID, lowcore, current_pid); + OFFSET(__LC_PERCPU_OFFSET, lowcore, percpu_offset); + OFFSET(__LC_VDSO_PER_CPU, lowcore, vdso_per_cpu_data); + OFFSET(__LC_MACHINE_FLAGS, lowcore, machine_flags); + OFFSET(__LC_GMAP, lowcore, gmap); + OFFSET(__LC_PASTE, lowcore, paste); /* software defined ABI-relevant lowcore locations 0xe00 - 0xe20 */ - OFFSET(__LC_DUMP_REIPL, _lowcore, ipib); + OFFSET(__LC_DUMP_REIPL, lowcore, ipib); /* hardware defined lowcore locations 0x1000 - 0x18ff */ - OFFSET(__LC_VX_SAVE_AREA_ADDR, _lowcore, vector_save_area_addr); - OFFSET(__LC_EXT_PARAMS2, _lowcore, ext_params2); - OFFSET(SAVE_AREA_BASE, _lowcore, floating_pt_save_area); - OFFSET(__LC_FPREGS_SAVE_AREA, _lowcore, floating_pt_save_area); - OFFSET(__LC_GPREGS_SAVE_AREA, _lowcore, gpregs_save_area); - OFFSET(__LC_PSW_SAVE_AREA, _lowcore, psw_save_area); - OFFSET(__LC_PREFIX_SAVE_AREA, _lowcore, prefixreg_save_area); - OFFSET(__LC_FP_CREG_SAVE_AREA, _lowcore, fpt_creg_save_area); - OFFSET(__LC_CPU_TIMER_SAVE_AREA, _lowcore, cpu_timer_save_area); - OFFSET(__LC_CLOCK_COMP_SAVE_AREA, _lowcore, clock_comp_save_area); - OFFSET(__LC_AREGS_SAVE_AREA, _lowcore, access_regs_save_area); - OFFSET(__LC_CREGS_SAVE_AREA, _lowcore, cregs_save_area); - OFFSET(__LC_PGM_TDB, _lowcore, pgm_tdb); + OFFSET(__LC_VX_SAVE_AREA_ADDR, lowcore, vector_save_area_addr); + OFFSET(__LC_EXT_PARAMS2, lowcore, ext_params2); + OFFSET(__LC_FPREGS_SAVE_AREA, lowcore, floating_pt_save_area); + OFFSET(__LC_GPREGS_SAVE_AREA, lowcore, gpregs_save_area); + OFFSET(__LC_PSW_SAVE_AREA, lowcore, psw_save_area); + OFFSET(__LC_PREFIX_SAVE_AREA, lowcore, prefixreg_save_area); + OFFSET(__LC_FP_CREG_SAVE_AREA, lowcore, fpt_creg_save_area); + OFFSET(__LC_TOD_PROGREG_SAVE_AREA, lowcore, tod_progreg_save_area); + OFFSET(__LC_CPU_TIMER_SAVE_AREA, lowcore, cpu_timer_save_area); + OFFSET(__LC_CLOCK_COMP_SAVE_AREA, lowcore, clock_comp_save_area); + OFFSET(__LC_AREGS_SAVE_AREA, lowcore, access_regs_save_area); + OFFSET(__LC_CREGS_SAVE_AREA, lowcore, cregs_save_area); + OFFSET(__LC_PGM_TDB, lowcore, pgm_tdb); BLANK(); /* gmap/sie offsets */ OFFSET(__GMAP_ASCE, gmap, asce); diff --git a/arch/s390/kernel/compat_signal.c b/arch/s390/kernel/compat_signal.c index 66c94417c0ba..4af60374eba0 100644 --- a/arch/s390/kernel/compat_signal.c +++ b/arch/s390/kernel/compat_signal.c @@ -271,7 +271,7 @@ static int restore_sigregs_ext32(struct pt_regs *regs, /* Restore high gprs from signal stack */ if (__copy_from_user(&gprs_high, &sregs_ext->gprs_high, - sizeof(&sregs_ext->gprs_high))) + sizeof(sregs_ext->gprs_high))) return -EFAULT; for (i = 0; i < NUM_GPRS; i++) *(__u32 *)®s->gprs[i] = gprs_high[i]; diff --git a/arch/s390/kernel/compat_wrapper.c b/arch/s390/kernel/compat_wrapper.c index fac4eeddef91..ae2cda5eee5a 100644 --- a/arch/s390/kernel/compat_wrapper.c +++ b/arch/s390/kernel/compat_wrapper.c @@ -177,3 +177,4 @@ COMPAT_SYSCALL_WRAP3(getsockname, int, fd, struct sockaddr __user *, usockaddr, COMPAT_SYSCALL_WRAP3(getpeername, int, fd, struct sockaddr __user *, usockaddr, int __user *, usockaddr_len); COMPAT_SYSCALL_WRAP6(sendto, int, fd, void __user *, buff, size_t, len, unsigned int, flags, struct sockaddr __user *, addr, int, addr_len); COMPAT_SYSCALL_WRAP3(mlock2, unsigned long, start, size_t, len, int, flags); +COMPAT_SYSCALL_WRAP6(copy_file_range, int, fd_in, loff_t __user *, off_in, int, fd_out, loff_t __user *, off_out, size_t, len, unsigned int, flags); diff --git a/arch/s390/kernel/crash_dump.c b/arch/s390/kernel/crash_dump.c index 171e09bb8ea2..3986c9f62191 100644 --- a/arch/s390/kernel/crash_dump.c +++ b/arch/s390/kernel/crash_dump.c @@ -13,6 +13,7 @@ #include <linux/slab.h> #include <linux/bootmem.h> #include <linux/elf.h> +#include <asm/asm-offsets.h> #include <linux/memblock.h> #include <asm/os_info.h> #include <asm/elf.h> @@ -32,7 +33,82 @@ static struct memblock_type oldmem_type = { .regions = &oldmem_region, }; -struct dump_save_areas dump_save_areas; +struct save_area { + struct list_head list; + u64 psw[2]; + u64 ctrs[16]; + u64 gprs[16]; + u32 acrs[16]; + u64 fprs[16]; + u32 fpc; + u32 prefix; + u64 todpreg; + u64 timer; + u64 todcmp; + u64 vxrs_low[16]; + __vector128 vxrs_high[16]; +}; + +static LIST_HEAD(dump_save_areas); + +/* + * Allocate a save area + */ +struct save_area * __init save_area_alloc(bool is_boot_cpu) +{ + struct save_area *sa; + + sa = (void *) memblock_alloc(sizeof(*sa), 8); + if (is_boot_cpu) + list_add(&sa->list, &dump_save_areas); + else + list_add_tail(&sa->list, &dump_save_areas); + return sa; +} + +/* + * Return the address of the save area for the boot CPU + */ +struct save_area * __init save_area_boot_cpu(void) +{ + if (list_empty(&dump_save_areas)) + return NULL; + return list_first_entry(&dump_save_areas, struct save_area, list); +} + +/* + * Copy CPU registers into the save area + */ +void __init save_area_add_regs(struct save_area *sa, void *regs) +{ + struct lowcore *lc; + + lc = (struct lowcore *)(regs - __LC_FPREGS_SAVE_AREA); + memcpy(&sa->psw, &lc->psw_save_area, sizeof(sa->psw)); + memcpy(&sa->ctrs, &lc->cregs_save_area, sizeof(sa->ctrs)); + memcpy(&sa->gprs, &lc->gpregs_save_area, sizeof(sa->gprs)); + memcpy(&sa->acrs, &lc->access_regs_save_area, sizeof(sa->acrs)); + memcpy(&sa->fprs, &lc->floating_pt_save_area, sizeof(sa->fprs)); + memcpy(&sa->fpc, &lc->fpt_creg_save_area, sizeof(sa->fpc)); + memcpy(&sa->prefix, &lc->prefixreg_save_area, sizeof(sa->prefix)); + memcpy(&sa->todpreg, &lc->tod_progreg_save_area, sizeof(sa->todpreg)); + memcpy(&sa->timer, &lc->cpu_timer_save_area, sizeof(sa->timer)); + memcpy(&sa->todcmp, &lc->clock_comp_save_area, sizeof(sa->todcmp)); +} + +/* + * Copy vector registers into the save area + */ +void __init save_area_add_vxrs(struct save_area *sa, __vector128 *vxrs) +{ + int i; + + /* Copy lower halves of vector registers 0-15 */ + for (i = 0; i < 16; i++) + memcpy(&sa->vxrs_low[i], &vxrs[i].u[2], 8); + /* Copy vector registers 16-31 */ + memcpy(sa->vxrs_high, vxrs + 16, 16 * sizeof(__vector128)); +} /* * Return physical address for virtual address @@ -51,79 +127,85 @@ static inline void *load_real_addr(void *addr) } /* - * Copy real to virtual or real memory - */ -static int copy_from_realmem(void *dest, void *src, size_t count) -{ - unsigned long size; - - if (!count) - return 0; - if (!is_vmalloc_or_module_addr(dest)) - return memcpy_real(dest, src, count); - do { - size = min(count, PAGE_SIZE - (__pa(dest) & ~PAGE_MASK)); - if (memcpy_real(load_real_addr(dest), src, size)) - return -EFAULT; - count -= size; - dest += size; - src += size; - } while (count); - return 0; -} - -/* - * Pointer to ELF header in new kernel + * Copy memory of the old, dumped system to a kernel space virtual address */ -static void *elfcorehdr_newmem; - -/* - * Copy one page from zfcpdump "oldmem" - * - * For pages below HSA size memory from the HSA is copied. Otherwise - * real memory copy is used. - */ -static ssize_t copy_oldmem_page_zfcpdump(char *buf, size_t csize, - unsigned long src, int userbuf) +int copy_oldmem_kernel(void *dst, void *src, size_t count) { + unsigned long from, len; + void *ra; int rc; - if (src < sclp.hsa_size) { - rc = memcpy_hsa(buf, src, csize, userbuf); - } else { - if (userbuf) - rc = copy_to_user_real((void __force __user *) buf, - (void *) src, csize); - else - rc = memcpy_real(buf, (void *) src, csize); + while (count) { + from = __pa(src); + if (!OLDMEM_BASE && from < sclp.hsa_size) { + /* Copy from zfcpdump HSA area */ + len = min(count, sclp.hsa_size - from); + rc = memcpy_hsa_kernel(dst, from, len); + if (rc) + return rc; + } else { + /* Check for swapped kdump oldmem areas */ + if (OLDMEM_BASE && from - OLDMEM_BASE < OLDMEM_SIZE) { + from -= OLDMEM_BASE; + len = min(count, OLDMEM_SIZE - from); + } else if (OLDMEM_BASE && from < OLDMEM_SIZE) { + len = min(count, OLDMEM_SIZE - from); + from += OLDMEM_BASE; + } else { + len = count; + } + if (is_vmalloc_or_module_addr(dst)) { + ra = load_real_addr(dst); + len = min(PAGE_SIZE - offset_in_page(ra), len); + } else { + ra = dst; + } + if (memcpy_real(ra, (void *) from, len)) + return -EFAULT; + } + dst += len; + src += len; + count -= len; } - return rc ? rc : csize; + return 0; } /* - * Copy one page from kdump "oldmem" - * - * For the kdump reserved memory this functions performs a swap operation: - * - [OLDMEM_BASE - OLDMEM_BASE + OLDMEM_SIZE] is mapped to [0 - OLDMEM_SIZE]. - * - [0 - OLDMEM_SIZE] is mapped to [OLDMEM_BASE - OLDMEM_BASE + OLDMEM_SIZE] + * Copy memory of the old, dumped system to a user space virtual address */ -static ssize_t copy_oldmem_page_kdump(char *buf, size_t csize, - unsigned long src, int userbuf) - +int copy_oldmem_user(void __user *dst, void *src, size_t count) { + unsigned long from, len; int rc; - if (src < OLDMEM_SIZE) - src += OLDMEM_BASE; - else if (src > OLDMEM_BASE && - src < OLDMEM_BASE + OLDMEM_SIZE) - src -= OLDMEM_BASE; - if (userbuf) - rc = copy_to_user_real((void __force __user *) buf, - (void *) src, csize); - else - rc = copy_from_realmem(buf, (void *) src, csize); - return (rc == 0) ? rc : csize; + while (count) { + from = __pa(src); + if (!OLDMEM_BASE && from < sclp.hsa_size) { + /* Copy from zfcpdump HSA area */ + len = min(count, sclp.hsa_size - from); + rc = memcpy_hsa_user(dst, from, len); + if (rc) + return rc; + } else { + /* Check for swapped kdump oldmem areas */ + if (OLDMEM_BASE && from - OLDMEM_BASE < OLDMEM_SIZE) { + from -= OLDMEM_BASE; + len = min(count, OLDMEM_SIZE - from); + } else if (OLDMEM_BASE && from < OLDMEM_SIZE) { + len = min(count, OLDMEM_SIZE - from); + from += OLDMEM_BASE; + } else { + len = count; + } + rc = copy_to_user_real(dst, (void *) from, count); + if (rc) + return rc; + } + dst += len; + src += len; + count -= len; + } + return 0; } /* @@ -132,15 +214,17 @@ static ssize_t copy_oldmem_page_kdump(char *buf, size_t csize, ssize_t copy_oldmem_page(unsigned long pfn, char *buf, size_t csize, unsigned long offset, int userbuf) { - unsigned long src; + void *src; + int rc; if (!csize) return 0; - src = (pfn << PAGE_SHIFT) + offset; - if (OLDMEM_BASE) - return copy_oldmem_page_kdump(buf, csize, src, userbuf); + src = (void *) (pfn << PAGE_SHIFT) + offset; + if (userbuf) + rc = copy_oldmem_user((void __force __user *) buf, src, csize); else - return copy_oldmem_page_zfcpdump(buf, csize, src, userbuf); + rc = copy_oldmem_kernel((void *) buf, src, csize); + return rc; } /* @@ -209,33 +293,6 @@ int remap_oldmem_pfn_range(struct vm_area_struct *vma, unsigned long from, } /* - * Copy memory from old kernel - */ -int copy_from_oldmem(void *dest, void *src, size_t count) -{ - unsigned long copied = 0; - int rc; - - if (OLDMEM_BASE) { - if ((unsigned long) src < OLDMEM_SIZE) { - copied = min(count, OLDMEM_SIZE - (unsigned long) src); - rc = copy_from_realmem(dest, src + OLDMEM_BASE, copied); - if (rc) - return rc; - } - } else { - unsigned long hsa_end = sclp.hsa_size; - if ((unsigned long) src < hsa_end) { - copied = min(count, hsa_end - (unsigned long) src); - rc = memcpy_hsa(dest, (unsigned long) src, copied, 0); - if (rc) - return rc; - } - } - return copy_from_realmem(dest + copied, src + copied, count - copied); -} - -/* * Alloc memory and panic in case of ENOMEM */ static void *kzalloc_panic(int len) @@ -251,8 +308,8 @@ static void *kzalloc_panic(int len) /* * Initialize ELF note */ -static void *nt_init(void *buf, Elf64_Word type, void *desc, int d_len, - const char *name) +static void *nt_init_name(void *buf, Elf64_Word type, void *desc, int d_len, + const char *name) { Elf64_Nhdr *note; u64 len; @@ -272,136 +329,42 @@ static void *nt_init(void *buf, Elf64_Word type, void *desc, int d_len, return PTR_ADD(buf, len); } -/* - * Initialize prstatus note - */ -static void *nt_prstatus(void *ptr, struct save_area *sa) +static inline void *nt_init(void *buf, Elf64_Word type, void *desc, int d_len) { - struct elf_prstatus nt_prstatus; - static int cpu_nr = 1; - - memset(&nt_prstatus, 0, sizeof(nt_prstatus)); - memcpy(&nt_prstatus.pr_reg.gprs, sa->gp_regs, sizeof(sa->gp_regs)); - memcpy(&nt_prstatus.pr_reg.psw, sa->psw, sizeof(sa->psw)); - memcpy(&nt_prstatus.pr_reg.acrs, sa->acc_regs, sizeof(sa->acc_regs)); - nt_prstatus.pr_pid = cpu_nr; - cpu_nr++; - - return nt_init(ptr, NT_PRSTATUS, &nt_prstatus, sizeof(nt_prstatus), - "CORE"); + return nt_init_name(buf, type, desc, d_len, KEXEC_CORE_NOTE_NAME); } /* - * Initialize fpregset (floating point) note + * Fill ELF notes for one CPU with save area registers */ -static void *nt_fpregset(void *ptr, struct save_area *sa) +static void *fill_cpu_elf_notes(void *ptr, int cpu, struct save_area *sa) { + struct elf_prstatus nt_prstatus; elf_fpregset_t nt_fpregset; + /* Prepare prstatus note */ + memset(&nt_prstatus, 0, sizeof(nt_prstatus)); + memcpy(&nt_prstatus.pr_reg.gprs, sa->gprs, sizeof(sa->gprs)); + memcpy(&nt_prstatus.pr_reg.psw, sa->psw, sizeof(sa->psw)); + memcpy(&nt_prstatus.pr_reg.acrs, sa->acrs, sizeof(sa->acrs)); + nt_prstatus.pr_pid = cpu; + /* Prepare fpregset (floating point) note */ memset(&nt_fpregset, 0, sizeof(nt_fpregset)); - memcpy(&nt_fpregset.fpc, &sa->fp_ctrl_reg, sizeof(sa->fp_ctrl_reg)); - memcpy(&nt_fpregset.fprs, &sa->fp_regs, sizeof(sa->fp_regs)); - - return nt_init(ptr, NT_PRFPREG, &nt_fpregset, sizeof(nt_fpregset), - "CORE"); -} - -/* - * Initialize timer note - */ -static void *nt_s390_timer(void *ptr, struct save_area *sa) -{ - return nt_init(ptr, NT_S390_TIMER, &sa->timer, sizeof(sa->timer), - KEXEC_CORE_NOTE_NAME); -} - -/* - * Initialize TOD clock comparator note - */ -static void *nt_s390_tod_cmp(void *ptr, struct save_area *sa) -{ - return nt_init(ptr, NT_S390_TODCMP, &sa->clk_cmp, - sizeof(sa->clk_cmp), KEXEC_CORE_NOTE_NAME); -} - -/* - * Initialize TOD programmable register note - */ -static void *nt_s390_tod_preg(void *ptr, struct save_area *sa) -{ - return nt_init(ptr, NT_S390_TODPREG, &sa->tod_reg, - sizeof(sa->tod_reg), KEXEC_CORE_NOTE_NAME); -} - -/* - * Initialize control register note - */ -static void *nt_s390_ctrs(void *ptr, struct save_area *sa) -{ - return nt_init(ptr, NT_S390_CTRS, &sa->ctrl_regs, - sizeof(sa->ctrl_regs), KEXEC_CORE_NOTE_NAME); -} - -/* - * Initialize prefix register note - */ -static void *nt_s390_prefix(void *ptr, struct save_area *sa) -{ - return nt_init(ptr, NT_S390_PREFIX, &sa->pref_reg, - sizeof(sa->pref_reg), KEXEC_CORE_NOTE_NAME); -} - -/* - * Initialize vxrs high note (full 128 bit VX registers 16-31) - */ -static void *nt_s390_vx_high(void *ptr, __vector128 *vx_regs) -{ - return nt_init(ptr, NT_S390_VXRS_HIGH, &vx_regs[16], - 16 * sizeof(__vector128), KEXEC_CORE_NOTE_NAME); -} - -/* - * Initialize vxrs low note (lower halves of VX registers 0-15) - */ -static void *nt_s390_vx_low(void *ptr, __vector128 *vx_regs) -{ - Elf64_Nhdr *note; - u64 len; - int i; - - note = (Elf64_Nhdr *)ptr; - note->n_namesz = strlen(KEXEC_CORE_NOTE_NAME) + 1; - note->n_descsz = 16 * 8; - note->n_type = NT_S390_VXRS_LOW; - len = sizeof(Elf64_Nhdr); - - memcpy(ptr + len, KEXEC_CORE_NOTE_NAME, note->n_namesz); - len = roundup(len + note->n_namesz, 4); - - ptr += len; - /* Copy lower halves of SIMD registers 0-15 */ - for (i = 0; i < 16; i++) { - memcpy(ptr, &vx_regs[i].u[2], 8); - ptr += 8; - } - return ptr; -} - -/* - * Fill ELF notes for one CPU with save area registers - */ -void *fill_cpu_elf_notes(void *ptr, struct save_area *sa, __vector128 *vx_regs) -{ - ptr = nt_prstatus(ptr, sa); - ptr = nt_fpregset(ptr, sa); - ptr = nt_s390_timer(ptr, sa); - ptr = nt_s390_tod_cmp(ptr, sa); - ptr = nt_s390_tod_preg(ptr, sa); - ptr = nt_s390_ctrs(ptr, sa); - ptr = nt_s390_prefix(ptr, sa); - if (MACHINE_HAS_VX && vx_regs) { - ptr = nt_s390_vx_low(ptr, vx_regs); - ptr = nt_s390_vx_high(ptr, vx_regs); + memcpy(&nt_fpregset.fpc, &sa->fpc, sizeof(sa->fpc)); + memcpy(&nt_fpregset.fprs, &sa->fprs, sizeof(sa->fprs)); + /* Create ELF notes for the CPU */ + ptr = nt_init(ptr, NT_PRSTATUS, &nt_prstatus, sizeof(nt_prstatus)); + ptr = nt_init(ptr, NT_PRFPREG, &nt_fpregset, sizeof(nt_fpregset)); + ptr = nt_init(ptr, NT_S390_TIMER, &sa->timer, sizeof(sa->timer)); + ptr = nt_init(ptr, NT_S390_TODCMP, &sa->todcmp, sizeof(sa->todcmp)); + ptr = nt_init(ptr, NT_S390_TODPREG, &sa->todpreg, sizeof(sa->todpreg)); + ptr = nt_init(ptr, NT_S390_CTRS, &sa->ctrs, sizeof(sa->ctrs)); + ptr = nt_init(ptr, NT_S390_PREFIX, &sa->prefix, sizeof(sa->prefix)); + if (MACHINE_HAS_VX) { + ptr = nt_init(ptr, NT_S390_VXRS_HIGH, + &sa->vxrs_high, sizeof(sa->vxrs_high)); + ptr = nt_init(ptr, NT_S390_VXRS_LOW, + &sa->vxrs_low, sizeof(sa->vxrs_low)); } return ptr; } @@ -416,8 +379,7 @@ static void *nt_prpsinfo(void *ptr) memset(&prpsinfo, 0, sizeof(prpsinfo)); prpsinfo.pr_sname = 'R'; strcpy(prpsinfo.pr_fname, "vmlinux"); - return nt_init(ptr, NT_PRPSINFO, &prpsinfo, sizeof(prpsinfo), - KEXEC_CORE_NOTE_NAME); + return nt_init(ptr, NT_PRPSINFO, &prpsinfo, sizeof(prpsinfo)); } /* @@ -429,17 +391,18 @@ static void *get_vmcoreinfo_old(unsigned long *size) Elf64_Nhdr note; void *addr; - if (copy_from_oldmem(&addr, &S390_lowcore.vmcore_info, sizeof(addr))) + if (copy_oldmem_kernel(&addr, &S390_lowcore.vmcore_info, sizeof(addr))) return NULL; memset(nt_name, 0, sizeof(nt_name)); - if (copy_from_oldmem(¬e, addr, sizeof(note))) + if (copy_oldmem_kernel(¬e, addr, sizeof(note))) return NULL; - if (copy_from_oldmem(nt_name, addr + sizeof(note), sizeof(nt_name) - 1)) + if (copy_oldmem_kernel(nt_name, addr + sizeof(note), + sizeof(nt_name) - 1)) return NULL; if (strcmp(nt_name, "VMCOREINFO") != 0) return NULL; vmcoreinfo = kzalloc_panic(note.n_descsz); - if (copy_from_oldmem(vmcoreinfo, addr + 24, note.n_descsz)) + if (copy_oldmem_kernel(vmcoreinfo, addr + 24, note.n_descsz)) return NULL; *size = note.n_descsz; return vmcoreinfo; @@ -458,7 +421,7 @@ static void *nt_vmcoreinfo(void *ptr) vmcoreinfo = get_vmcoreinfo_old(&size); if (!vmcoreinfo) return ptr; - return nt_init(ptr, 0, vmcoreinfo, size, "VMCOREINFO"); + return nt_init_name(ptr, 0, vmcoreinfo, size, "VMCOREINFO"); } /* @@ -487,13 +450,12 @@ static void *ehdr_init(Elf64_Ehdr *ehdr, int mem_chunk_cnt) */ static int get_cpu_cnt(void) { - int i, cpus = 0; + struct save_area *sa; + int cpus = 0; - for (i = 0; i < dump_save_areas.count; i++) { - if (dump_save_areas.areas[i]->sa.pref_reg == 0) - continue; - cpus++; - } + list_for_each_entry(sa, &dump_save_areas, list) + if (sa->prefix != 0) + cpus++; return cpus; } @@ -538,18 +500,16 @@ static void loads_init(Elf64_Phdr *phdr, u64 loads_offset) */ static void *notes_init(Elf64_Phdr *phdr, void *ptr, u64 notes_offset) { - struct save_area_ext *sa_ext; + struct save_area *sa; void *ptr_start = ptr; - int i; + int cpu; ptr = nt_prpsinfo(ptr); - for (i = 0; i < dump_save_areas.count; i++) { - sa_ext = dump_save_areas.areas[i]; - if (sa_ext->sa.pref_reg == 0) - continue; - ptr = fill_cpu_elf_notes(ptr, &sa_ext->sa, sa_ext->vx_regs); - } + cpu = 1; + list_for_each_entry(sa, &dump_save_areas, list) + if (sa->prefix != 0) + ptr = fill_cpu_elf_notes(ptr, cpu++, sa); ptr = nt_vmcoreinfo(ptr); memset(phdr, 0, sizeof(*phdr)); phdr->p_type = PT_NOTE; @@ -573,9 +533,6 @@ int elfcorehdr_alloc(unsigned long long *addr, unsigned long long *size) /* If we are not in kdump or zfcpdump mode return */ if (!OLDMEM_BASE && ipl_info.type != IPL_TYPE_FCP_DUMP) return 0; - /* If elfcorehdr= has been passed via cmdline, we use that one */ - if (elfcorehdr_addr != ELFCORE_ADDR_MAX) - return 0; /* If we cannot get HSA size for zfcpdump return error */ if (ipl_info.type == IPL_TYPE_FCP_DUMP && !sclp.hsa_size) return -ENODEV; @@ -606,7 +563,6 @@ int elfcorehdr_alloc(unsigned long long *addr, unsigned long long *size) hdr_off = PTR_DIFF(ptr, hdr); loads_init(phdr_loads, hdr_off); *addr = (unsigned long long) hdr; - elfcorehdr_newmem = hdr; *size = (unsigned long long) hdr_off; BUG_ON(elfcorehdr_size > alloc_size); return 0; @@ -617,8 +573,6 @@ int elfcorehdr_alloc(unsigned long long *addr, unsigned long long *size) */ void elfcorehdr_free(unsigned long long addr) { - if (!elfcorehdr_newmem) - return; kfree((void *)(unsigned long)addr); } @@ -629,7 +583,6 @@ ssize_t elfcorehdr_read(char *buf, size_t count, u64 *ppos) { void *src = (void *)(unsigned long)*ppos; - src = elfcorehdr_newmem ? src : src - OLDMEM_BASE; memcpy(buf, src, count); *ppos += count; return count; @@ -641,15 +594,8 @@ ssize_t elfcorehdr_read(char *buf, size_t count, u64 *ppos) ssize_t elfcorehdr_read_notes(char *buf, size_t count, u64 *ppos) { void *src = (void *)(unsigned long)*ppos; - int rc; - if (elfcorehdr_newmem) { - memcpy(buf, src, count); - } else { - rc = copy_from_oldmem(buf, src, count); - if (rc) - return rc; - } + memcpy(buf, src, count); *ppos += count; return count; } diff --git a/arch/s390/kernel/debug.c b/arch/s390/kernel/debug.c index 6fca0e46464e..c890a5589e59 100644 --- a/arch/s390/kernel/debug.c +++ b/arch/s390/kernel/debug.c @@ -1470,7 +1470,7 @@ debug_dflt_header_fn(debug_info_t * id, struct debug_view *view, except_str = "*"; else except_str = "-"; - caller = ((unsigned long) entry->caller) & PSW_ADDR_INSN; + caller = (unsigned long) entry->caller; rc += sprintf(out_buf, "%02i %011lld:%06lu %1u %1s %02i %p ", area, (long long)time_spec.tv_sec, time_spec.tv_nsec / 1000, level, except_str, diff --git a/arch/s390/kernel/dis.c b/arch/s390/kernel/dis.c index 6e72961608f0..62973efd214a 100644 --- a/arch/s390/kernel/dis.c +++ b/arch/s390/kernel/dis.c @@ -2022,7 +2022,7 @@ void show_code(struct pt_regs *regs) *ptr++ = '\t'; ptr += print_insn(ptr, code + start, addr); start += opsize; - printk(buffer); + printk("%s", buffer); ptr = buffer; ptr += sprintf(ptr, "\n "); hops++; @@ -2049,7 +2049,7 @@ void print_fn_code(unsigned char *code, unsigned long len) ptr += print_insn(ptr, code, (unsigned long) code); *ptr++ = '\n'; *ptr++ = 0; - printk(buffer); + printk("%s", buffer); code += opsize; len -= opsize; } diff --git a/arch/s390/kernel/dumpstack.c b/arch/s390/kernel/dumpstack.c index dc8e20473484..02bd02ff648b 100644 --- a/arch/s390/kernel/dumpstack.c +++ b/arch/s390/kernel/dumpstack.c @@ -34,22 +34,21 @@ __show_trace(unsigned long sp, unsigned long low, unsigned long high) unsigned long addr; while (1) { - sp = sp & PSW_ADDR_INSN; if (sp < low || sp > high - sizeof(*sf)) return sp; sf = (struct stack_frame *) sp; - addr = sf->gprs[8] & PSW_ADDR_INSN; + addr = sf->gprs[8]; printk("([<%016lx>] %pSR)\n", addr, (void *)addr); /* Follow the backchain. */ while (1) { low = sp; - sp = sf->back_chain & PSW_ADDR_INSN; + sp = sf->back_chain; if (!sp) break; if (sp <= low || sp > high - sizeof(*sf)) return sp; sf = (struct stack_frame *) sp; - addr = sf->gprs[8] & PSW_ADDR_INSN; + addr = sf->gprs[8]; printk(" [<%016lx>] %pSR\n", addr, (void *)addr); } /* Zero backchain detected, check for interrupt frame. */ @@ -57,7 +56,7 @@ __show_trace(unsigned long sp, unsigned long low, unsigned long high) if (sp <= low || sp > high - sizeof(*regs)) return sp; regs = (struct pt_regs *) sp; - addr = regs->psw.addr & PSW_ADDR_INSN; + addr = regs->psw.addr; printk(" [<%016lx>] %pSR\n", addr, (void *)addr); low = sp; sp = regs->gprs[15]; diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c index 3c31609df959..c55576bbaa1f 100644 --- a/arch/s390/kernel/early.c +++ b/arch/s390/kernel/early.c @@ -252,14 +252,14 @@ static void early_pgm_check_handler(void) unsigned long addr; addr = S390_lowcore.program_old_psw.addr; - fixup = search_exception_tables(addr & PSW_ADDR_INSN); + fixup = search_exception_tables(addr); if (!fixup) disabled_wait(0); /* Disable low address protection before storing into lowcore. */ __ctl_store(cr0, 0, 0); cr0_new = cr0 & ~(1UL << 28); __ctl_load(cr0_new, 0, 0); - S390_lowcore.program_old_psw.addr = extable_fixup(fixup)|PSW_ADDR_AMODE; + S390_lowcore.program_old_psw.addr = extable_fixup(fixup); __ctl_load(cr0, 0, 0); } @@ -268,9 +268,9 @@ static noinline __init void setup_lowcore_early(void) psw_t psw; psw.mask = PSW_MASK_BASE | PSW_DEFAULT_KEY | PSW_MASK_EA | PSW_MASK_BA; - psw.addr = PSW_ADDR_AMODE | (unsigned long) s390_base_ext_handler; + psw.addr = (unsigned long) s390_base_ext_handler; S390_lowcore.external_new_psw = psw; - psw.addr = PSW_ADDR_AMODE | (unsigned long) s390_base_pgm_handler; + psw.addr = (unsigned long) s390_base_pgm_handler; S390_lowcore.program_new_psw = psw; s390_base_pgm_handler_fn = early_pgm_check_handler; } @@ -335,6 +335,14 @@ static __init void detect_machine_facilities(void) } } +static inline void save_vector_registers(void) +{ +#ifdef CONFIG_CRASH_DUMP + if (test_facility(129)) + save_vx_regs(boot_cpu_vector_save_area); +#endif +} + static int __init disable_vector_extension(char *str) { S390_lowcore.machine_flags &= ~MACHINE_FLAG_VX; @@ -451,6 +459,7 @@ void __init startup_init(void) detect_diag9c(); detect_diag44(); detect_machine_facilities(); + save_vector_registers(); setup_topology(); sclp_early_detect(); lockdep_on(); diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index 857b6526d298..cd5a191381b9 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -764,6 +764,7 @@ ENTRY(psw_idle) .insn rsy,0xeb0000000017,%r1,5,__SF_EMPTY+16(%r15) .Lpsw_idle_stcctm: #endif + oi __LC_CPU_FLAGS+7,_CIF_ENABLED_WAIT STCK __CLOCK_IDLE_ENTER(%r2) stpt __TIMER_IDLE_ENTER(%r2) .Lpsw_idle_lpsw: @@ -1146,6 +1147,7 @@ cleanup_critical: .quad .Lio_done - 4 .Lcleanup_idle: + ni __LC_CPU_FLAGS+7,255-_CIF_ENABLED_WAIT # copy interrupt clock & cpu timer mvc __CLOCK_IDLE_EXIT(8,%r2),__LC_INT_CLOCK mvc __TIMER_IDLE_EXIT(8,%r2),__LC_ASYNC_ENTER_TIMER diff --git a/arch/s390/kernel/ftrace.c b/arch/s390/kernel/ftrace.c index e0eaf11134b4..0f7bfeba6da6 100644 --- a/arch/s390/kernel/ftrace.c +++ b/arch/s390/kernel/ftrace.c @@ -203,7 +203,7 @@ unsigned long prepare_ftrace_return(unsigned long parent, unsigned long ip) goto out; if (unlikely(atomic_read(¤t->tracing_graph_pause))) goto out; - ip = (ip & PSW_ADDR_INSN) - MCOUNT_INSN_SIZE; + ip -= MCOUNT_INSN_SIZE; trace.func = ip; trace.depth = current->curr_ret_stack + 1; /* Only trace if the calling function expects to. */ diff --git a/arch/s390/kernel/head.S b/arch/s390/kernel/head.S index 301ee9c70688..fcaefb041364 100644 --- a/arch/s390/kernel/head.S +++ b/arch/s390/kernel/head.S @@ -25,6 +25,7 @@ #include <linux/linkage.h> #include <asm/asm-offsets.h> #include <asm/thread_info.h> +#include <asm/facility.h> #include <asm/page.h> #include <asm/ptrace.h> @@ -300,27 +301,27 @@ ENTRY(startup_kdump) xc 0x200(256),0x200 # partially clear lowcore xc 0x300(256),0x300 xc 0xe00(256),0xe00 + xc 0xf00(256),0xf00 lctlg %c0,%c15,0x200(%r0) # initialize control registers stck __LC_LAST_UPDATE_CLOCK spt 6f-.LPG0(%r13) mvc __LC_LAST_UPDATE_TIMER(8),6f-.LPG0(%r13) - xc __LC_STFL_FAC_LIST(8),__LC_STFL_FAC_LIST - # check capabilities against MARCH_{G5,Z900,Z990,Z9_109,Z10} - .insn s,0xb2b10000,0 # store facilities @ __LC_STFL_FAC_LIST - tm __LC_STFL_FAC_LIST,0x01 # stfle available ? + stfl 0(%r0) # store facilities @ __LC_STFL_FAC_LIST + mvc __LC_STFLE_FAC_LIST(4),__LC_STFL_FAC_LIST + tm __LC_STFLE_FAC_LIST,0x01 # stfle available ? jz 0f - la %r0,1 - .insn s,0xb2b00000,__LC_STFL_FAC_LIST # store facility list extended + lghi %r0,FACILITIES_ALS_DWORDS-1 + .insn s,0xb2b00000,__LC_STFLE_FAC_LIST # store facility list extended # verify if all required facilities are supported by the machine -0: la %r1,__LC_STFL_FAC_LIST +0: la %r1,__LC_STFLE_FAC_LIST la %r2,3f+8-.LPG0(%r13) - l %r3,0(%r2) -1: l %r0,0(%r1) - n %r0,4(%r2) - cl %r0,4(%r2) + lhi %r3,FACILITIES_ALS_DWORDS +1: lg %r0,0(%r1) + ng %r0,0(%r2) + clg %r0,0(%r2) jne 2f - la %r1,4(%r1) - la %r2,4(%r2) + la %r1,8(%r1) + la %r2,8(%r2) ahi %r3,-1 jnz 1b j 4f @@ -340,24 +341,10 @@ ENTRY(startup_kdump) 3: .long 0x000a0000,0x8badcccc # List of facilities that are required. If not all facilities are present -# the kernel will crash. Format is number of facility words with bits set, -# followed by the facility words. +# the kernel will crash. + + .quad FACILITIES_ALS -#if defined(CONFIG_MARCH_Z13) - .long 2, 0xc100eff2, 0xf46cc800 -#elif defined(CONFIG_MARCH_ZEC12) - .long 2, 0xc100eff2, 0xf46cc800 -#elif defined(CONFIG_MARCH_Z196) - .long 2, 0xc100eff2, 0xf46c0000 -#elif defined(CONFIG_MARCH_Z10) - .long 2, 0xc100eff2, 0xf0680000 -#elif defined(CONFIG_MARCH_Z9_109) - .long 1, 0xc100efc2 -#elif defined(CONFIG_MARCH_Z990) - .long 1, 0xc0002000 -#elif defined(CONFIG_MARCH_Z900) - .long 1, 0xc0000000 -#endif 4: /* Continue with startup code in head64.S */ jg startup_continue diff --git a/arch/s390/kernel/head64.S b/arch/s390/kernel/head64.S index 58b719fa8067..03c2b469c472 100644 --- a/arch/s390/kernel/head64.S +++ b/arch/s390/kernel/head64.S @@ -16,7 +16,7 @@ __HEAD ENTRY(startup_continue) - tm __LC_STFL_FAC_LIST+6,0x80 # LPP available ? + tm __LC_STFLE_FAC_LIST+5,0x80 # LPP available ? jz 0f xc __LC_LPP+1(7,0),__LC_LPP+1 # clear lpp and current_pid mvi __LC_LPP,0x80 # and set LPP_MAGIC diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c index b1f0a90f933b..f20abdb5630a 100644 --- a/arch/s390/kernel/ipl.c +++ b/arch/s390/kernel/ipl.c @@ -2039,21 +2039,15 @@ static void do_reset_calls(void) reset->fn(); } -u32 dump_prefix_page; - -void s390_reset_system(void (*fn_pre)(void), - void (*fn_post)(void *), void *data) +void s390_reset_system(void) { - struct _lowcore *lc; + struct lowcore *lc; - lc = (struct _lowcore *)(unsigned long) store_prefix(); + lc = (struct lowcore *)(unsigned long) store_prefix(); /* Stack for interrupt/machine check handler */ lc->panic_stack = S390_lowcore.panic_stack; - /* Save prefix page address for dump case */ - dump_prefix_page = (u32)(unsigned long) lc; - /* Disable prefixing */ set_prefix(0); @@ -2063,12 +2057,12 @@ void s390_reset_system(void (*fn_pre)(void), /* Set new machine check handler */ S390_lowcore.mcck_new_psw.mask = PSW_KERNEL_BITS | PSW_MASK_DAT; S390_lowcore.mcck_new_psw.addr = - PSW_ADDR_AMODE | (unsigned long) s390_base_mcck_handler; + (unsigned long) s390_base_mcck_handler; /* Set new program check handler */ S390_lowcore.program_new_psw.mask = PSW_KERNEL_BITS | PSW_MASK_DAT; S390_lowcore.program_new_psw.addr = - PSW_ADDR_AMODE | (unsigned long) s390_base_pgm_handler; + (unsigned long) s390_base_pgm_handler; /* * Clear subchannel ID and number to signal new kernel that no CCW or @@ -2077,14 +2071,5 @@ void s390_reset_system(void (*fn_pre)(void), S390_lowcore.subchannel_id = 0; S390_lowcore.subchannel_nr = 0; - /* Store status at absolute zero */ - store_status(); - - /* Call function before reset */ - if (fn_pre) - fn_pre(); do_reset_calls(); - /* Call function after reset */ - if (fn_post) - fn_post(data); } diff --git a/arch/s390/kernel/kprobes.c b/arch/s390/kernel/kprobes.c index 389db56a2208..250f5972536a 100644 --- a/arch/s390/kernel/kprobes.c +++ b/arch/s390/kernel/kprobes.c @@ -226,7 +226,7 @@ static void enable_singlestep(struct kprobe_ctlblk *kcb, __ctl_load(per_kprobe, 9, 11); regs->psw.mask |= PSW_MASK_PER; regs->psw.mask &= ~(PSW_MASK_IO | PSW_MASK_EXT); - regs->psw.addr = ip | PSW_ADDR_AMODE; + regs->psw.addr = ip; } NOKPROBE_SYMBOL(enable_singlestep); @@ -238,7 +238,7 @@ static void disable_singlestep(struct kprobe_ctlblk *kcb, __ctl_load(kcb->kprobe_saved_ctl, 9, 11); regs->psw.mask &= ~PSW_MASK_PER; regs->psw.mask |= kcb->kprobe_saved_imask; - regs->psw.addr = ip | PSW_ADDR_AMODE; + regs->psw.addr = ip; } NOKPROBE_SYMBOL(disable_singlestep); @@ -310,7 +310,7 @@ static int kprobe_handler(struct pt_regs *regs) */ preempt_disable(); kcb = get_kprobe_ctlblk(); - p = get_kprobe((void *)((regs->psw.addr & PSW_ADDR_INSN) - 2)); + p = get_kprobe((void *)(regs->psw.addr - 2)); if (p) { if (kprobe_running()) { @@ -460,7 +460,7 @@ static int trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs) break; } - regs->psw.addr = orig_ret_address | PSW_ADDR_AMODE; + regs->psw.addr = orig_ret_address; pop_kprobe(get_kprobe_ctlblk()); kretprobe_hash_unlock(current, &flags); @@ -490,7 +490,7 @@ NOKPROBE_SYMBOL(trampoline_probe_handler); static void resume_execution(struct kprobe *p, struct pt_regs *regs) { struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); - unsigned long ip = regs->psw.addr & PSW_ADDR_INSN; + unsigned long ip = regs->psw.addr; int fixup = probe_get_fixup_type(p->ainsn.insn); /* Check if the kprobes location is an enabled ftrace caller */ @@ -605,9 +605,9 @@ static int kprobe_trap_handler(struct pt_regs *regs, int trapnr) * In case the user-specified fault handler returned * zero, try to fix up. */ - entry = search_exception_tables(regs->psw.addr & PSW_ADDR_INSN); + entry = search_exception_tables(regs->psw.addr); if (entry) { - regs->psw.addr = extable_fixup(entry) | PSW_ADDR_AMODE; + regs->psw.addr = extable_fixup(entry); return 1; } @@ -683,7 +683,7 @@ int setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs) memcpy(&kcb->jprobe_saved_regs, regs, sizeof(struct pt_regs)); /* setup return addr to the jprobe handler routine */ - regs->psw.addr = (unsigned long) jp->entry | PSW_ADDR_AMODE; + regs->psw.addr = (unsigned long) jp->entry; regs->psw.mask &= ~(PSW_MASK_IO | PSW_MASK_EXT); /* r15 is the stack pointer */ diff --git a/arch/s390/kernel/machine_kexec.c b/arch/s390/kernel/machine_kexec.c index fb0901ec4306..2f1b7217c25c 100644 --- a/arch/s390/kernel/machine_kexec.c +++ b/arch/s390/kernel/machine_kexec.c @@ -35,46 +35,6 @@ extern const unsigned long long relocate_kernel_len; #ifdef CONFIG_CRASH_DUMP /* - * Create ELF notes for one CPU - */ -static void add_elf_notes(int cpu) -{ - struct save_area *sa = (void *) 4608 + store_prefix(); - void *ptr; - - memcpy((void *) (4608UL + sa->pref_reg), sa, sizeof(*sa)); - ptr = (u64 *) per_cpu_ptr(crash_notes, cpu); - ptr = fill_cpu_elf_notes(ptr, sa, NULL); - memset(ptr, 0, sizeof(struct elf_note)); -} - -/* - * Initialize CPU ELF notes - */ -static void setup_regs(void) -{ - unsigned long sa = S390_lowcore.prefixreg_save_area + SAVE_AREA_BASE; - struct _lowcore *lc; - int cpu, this_cpu; - - /* Get lowcore pointer from store status of this CPU (absolute zero) */ - lc = (struct _lowcore *)(unsigned long)S390_lowcore.prefixreg_save_area; - this_cpu = smp_find_processor_id(stap()); - add_elf_notes(this_cpu); - for_each_online_cpu(cpu) { - if (cpu == this_cpu) - continue; - if (smp_store_status(cpu)) - continue; - add_elf_notes(cpu); - } - if (MACHINE_HAS_VX) - save_vx_regs_safe((void *) lc->vector_save_area_addr); - /* Copy dump CPU store status info to absolute zero */ - memcpy((void *) SAVE_AREA_BASE, (void *) sa, sizeof(struct save_area)); -} - -/* * PM notifier callback for kdump */ static int machine_kdump_pm_cb(struct notifier_block *nb, unsigned long action, @@ -105,14 +65,66 @@ static int __init machine_kdump_pm_init(void) arch_initcall(machine_kdump_pm_init); /* - * Start kdump: We expect here that a store status has been done on our CPU + * Reset the system, copy boot CPU registers to absolute zero, + * and jump to the kdump image */ static void __do_machine_kdump(void *image) { - int (*start_kdump)(int) = (void *)((struct kimage *) image)->start; + int (*start_kdump)(int); + unsigned long prefix; + + /* store_status() saved the prefix register to lowcore */ + prefix = (unsigned long) S390_lowcore.prefixreg_save_area; + + /* Now do the reset */ + s390_reset_system(); + + /* + * Copy dump CPU store status info to absolute zero. + * This need to be done *after* s390_reset_system set the + * prefix register of this CPU to zero + */ + memcpy((void *) __LC_FPREGS_SAVE_AREA, + (void *)(prefix + __LC_FPREGS_SAVE_AREA), 512); __load_psw_mask(PSW_MASK_BASE | PSW_DEFAULT_KEY | PSW_MASK_EA | PSW_MASK_BA); + start_kdump = (void *)((struct kimage *) image)->start; start_kdump(1); + + /* Die if start_kdump returns */ + disabled_wait((unsigned long) __builtin_return_address(0)); +} + +/* + * Start kdump: create a LGR log entry, store status of all CPUs and + * branch to __do_machine_kdump. + */ +static noinline void __machine_kdump(void *image) +{ + int this_cpu, cpu; + + lgr_info_log(); + /* Get status of the other CPUs */ + this_cpu = smp_find_processor_id(stap()); + for_each_online_cpu(cpu) { + if (cpu == this_cpu) + continue; + if (smp_store_status(cpu)) + continue; + } + /* Store status of the boot CPU */ + if (MACHINE_HAS_VX) + save_vx_regs((void *) &S390_lowcore.vector_save_area); + /* + * To create a good backchain for this CPU in the dump store_status + * is passed the address of a function. The address is saved into + * the PSW save area of the boot CPU and the function is invoked as + * a tail call of store_status. The backchain in the dump will look + * like this: + * restart_int_handler -> __machine_kexec -> __do_machine_kdump + * The call to store_status() will not return. + */ + store_status(__do_machine_kdump, image); } #endif @@ -235,10 +247,14 @@ static void __do_machine_kexec(void *data) relocate_kernel_t data_mover; struct kimage *image = data; + s390_reset_system(); data_mover = (relocate_kernel_t) page_to_phys(image->control_code_page); /* Call the moving routine */ (*data_mover)(&image->head, image->start); + + /* Die if kexec returns */ + disabled_wait((unsigned long) __builtin_return_address(0)); } /* @@ -251,14 +267,10 @@ static void __machine_kexec(void *data) tracing_off(); debug_locks_off(); #ifdef CONFIG_CRASH_DUMP - if (((struct kimage *) data)->type == KEXEC_TYPE_CRASH) { - - lgr_info_log(); - s390_reset_system(setup_regs, __do_machine_kdump, data); - } else + if (((struct kimage *) data)->type == KEXEC_TYPE_CRASH) + __machine_kdump(data); #endif - s390_reset_system(NULL, __do_machine_kexec, data); - disabled_wait((unsigned long) __builtin_return_address(0)); + __do_machine_kexec(data); } /* diff --git a/arch/s390/kernel/module.c b/arch/s390/kernel/module.c index 0c1a679314dd..7873e171457c 100644 --- a/arch/s390/kernel/module.c +++ b/arch/s390/kernel/module.c @@ -159,11 +159,11 @@ int module_frob_arch_sections(Elf_Ehdr *hdr, Elf_Shdr *sechdrs, /* Increase core size by size of got & plt and set start offsets for got and plt. */ - me->core_size = ALIGN(me->core_size, 4); - me->arch.got_offset = me->core_size; - me->core_size += me->arch.got_size; - me->arch.plt_offset = me->core_size; - me->core_size += me->arch.plt_size; + me->core_layout.size = ALIGN(me->core_layout.size, 4); + me->arch.got_offset = me->core_layout.size; + me->core_layout.size += me->arch.got_size; + me->arch.plt_offset = me->core_layout.size; + me->core_layout.size += me->arch.plt_size; return 0; } @@ -279,7 +279,7 @@ static int apply_rela(Elf_Rela *rela, Elf_Addr base, Elf_Sym *symtab, if (info->got_initialized == 0) { Elf_Addr *gotent; - gotent = me->module_core + me->arch.got_offset + + gotent = me->core_layout.base + me->arch.got_offset + info->got_offset; *gotent = val; info->got_initialized = 1; @@ -302,7 +302,7 @@ static int apply_rela(Elf_Rela *rela, Elf_Addr base, Elf_Sym *symtab, rc = apply_rela_bits(loc, val, 0, 64, 0); else if (r_type == R_390_GOTENT || r_type == R_390_GOTPLTENT) { - val += (Elf_Addr) me->module_core - loc; + val += (Elf_Addr) me->core_layout.base - loc; rc = apply_rela_bits(loc, val, 1, 32, 1); } break; @@ -315,7 +315,7 @@ static int apply_rela(Elf_Rela *rela, Elf_Addr base, Elf_Sym *symtab, case R_390_PLTOFF64: /* 16 bit offset from GOT to PLT. */ if (info->plt_initialized == 0) { unsigned int *ip; - ip = me->module_core + me->arch.plt_offset + + ip = me->core_layout.base + me->arch.plt_offset + info->plt_offset; ip[0] = 0x0d10e310; /* basr 1,0; lg 1,10(1); br 1 */ ip[1] = 0x100a0004; @@ -334,7 +334,7 @@ static int apply_rela(Elf_Rela *rela, Elf_Addr base, Elf_Sym *symtab, val - loc + 0xffffUL < 0x1ffffeUL) || (r_type == R_390_PLT32DBL && val - loc + 0xffffffffULL < 0x1fffffffeULL))) - val = (Elf_Addr) me->module_core + + val = (Elf_Addr) me->core_layout.base + me->arch.plt_offset + info->plt_offset; val += rela->r_addend - loc; @@ -356,7 +356,7 @@ static int apply_rela(Elf_Rela *rela, Elf_Addr base, Elf_Sym *symtab, case R_390_GOTOFF32: /* 32 bit offset to GOT. */ case R_390_GOTOFF64: /* 64 bit offset to GOT. */ val = val + rela->r_addend - - ((Elf_Addr) me->module_core + me->arch.got_offset); + ((Elf_Addr) me->core_layout.base + me->arch.got_offset); if (r_type == R_390_GOTOFF16) rc = apply_rela_bits(loc, val, 0, 16, 0); else if (r_type == R_390_GOTOFF32) @@ -366,7 +366,7 @@ static int apply_rela(Elf_Rela *rela, Elf_Addr base, Elf_Sym *symtab, break; case R_390_GOTPC: /* 32 bit PC relative offset to GOT. */ case R_390_GOTPCDBL: /* 32 bit PC rel. off. to GOT shifted by 1. */ - val = (Elf_Addr) me->module_core + me->arch.got_offset + + val = (Elf_Addr) me->core_layout.base + me->arch.got_offset + rela->r_addend - loc; if (r_type == R_390_GOTPC) rc = apply_rela_bits(loc, val, 1, 32, 0); diff --git a/arch/s390/kernel/os_info.c b/arch/s390/kernel/os_info.c index d112fc66f993..87f05e475ae8 100644 --- a/arch/s390/kernel/os_info.c +++ b/arch/s390/kernel/os_info.c @@ -89,7 +89,7 @@ static void os_info_old_alloc(int nr, int align) goto fail; } buf_align = PTR_ALIGN(buf, align); - if (copy_from_oldmem(buf_align, (void *) addr, size)) { + if (copy_oldmem_kernel(buf_align, (void *) addr, size)) { msg = "copy failed"; goto fail_free; } @@ -122,14 +122,15 @@ static void os_info_old_init(void) return; if (!OLDMEM_BASE) goto fail; - if (copy_from_oldmem(&addr, &S390_lowcore.os_info, sizeof(addr))) + if (copy_oldmem_kernel(&addr, &S390_lowcore.os_info, sizeof(addr))) goto fail; if (addr == 0 || addr % PAGE_SIZE) goto fail; os_info_old = kzalloc(sizeof(*os_info_old), GFP_KERNEL); if (!os_info_old) goto fail; - if (copy_from_oldmem(os_info_old, (void *) addr, sizeof(*os_info_old))) + if (copy_oldmem_kernel(os_info_old, (void *) addr, + sizeof(*os_info_old))) goto fail_free; if (os_info_old->magic != OS_INFO_MAGIC) goto fail_free; diff --git a/arch/s390/kernel/perf_event.c b/arch/s390/kernel/perf_event.c index 61595c1f0a0f..0943b11a2f6e 100644 --- a/arch/s390/kernel/perf_event.c +++ b/arch/s390/kernel/perf_event.c @@ -74,7 +74,7 @@ static unsigned long guest_is_user_mode(struct pt_regs *regs) static unsigned long instruction_pointer_guest(struct pt_regs *regs) { - return sie_block(regs)->gpsw.addr & PSW_ADDR_INSN; + return sie_block(regs)->gpsw.addr; } unsigned long perf_instruction_pointer(struct pt_regs *regs) @@ -231,29 +231,27 @@ static unsigned long __store_trace(struct perf_callchain_entry *entry, struct pt_regs *regs; while (1) { - sp = sp & PSW_ADDR_INSN; if (sp < low || sp > high - sizeof(*sf)) return sp; sf = (struct stack_frame *) sp; - perf_callchain_store(entry, sf->gprs[8] & PSW_ADDR_INSN); + perf_callchain_store(entry, sf->gprs[8]); /* Follow the backchain. */ while (1) { low = sp; - sp = sf->back_chain & PSW_ADDR_INSN; + sp = sf->back_chain; if (!sp) break; if (sp <= low || sp > high - sizeof(*sf)) return sp; sf = (struct stack_frame *) sp; - perf_callchain_store(entry, - sf->gprs[8] & PSW_ADDR_INSN); + perf_callchain_store(entry, sf->gprs[8]); } /* Zero backchain detected, check for interrupt frame. */ sp = (unsigned long) (sf + 1); if (sp <= low || sp > high - sizeof(*regs)) return sp; regs = (struct pt_regs *) sp; - perf_callchain_store(entry, sf->gprs[8] & PSW_ADDR_INSN); + perf_callchain_store(entry, sf->gprs[8]); low = sp; sp = regs->gprs[15]; } @@ -262,12 +260,13 @@ static unsigned long __store_trace(struct perf_callchain_entry *entry, void perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs) { - unsigned long head; + unsigned long head, frame_size; struct stack_frame *head_sf; if (user_mode(regs)) return; + frame_size = STACK_FRAME_OVERHEAD + sizeof(struct pt_regs); head = regs->gprs[15]; head_sf = (struct stack_frame *) head; @@ -275,8 +274,9 @@ void perf_callchain_kernel(struct perf_callchain_entry *entry, return; head = head_sf->back_chain; - head = __store_trace(entry, head, S390_lowcore.async_stack - ASYNC_SIZE, - S390_lowcore.async_stack); + head = __store_trace(entry, head, + S390_lowcore.async_stack + frame_size - ASYNC_SIZE, + S390_lowcore.async_stack + frame_size); __store_trace(entry, head, S390_lowcore.thread_info, S390_lowcore.thread_info + THREAD_SIZE); diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c index 114ee8b96f17..2bba7df4ac51 100644 --- a/arch/s390/kernel/process.c +++ b/arch/s390/kernel/process.c @@ -56,10 +56,10 @@ unsigned long thread_saved_pc(struct task_struct *tsk) return 0; low = task_stack_page(tsk); high = (struct stack_frame *) task_pt_regs(tsk); - sf = (struct stack_frame *) (tsk->thread.ksp & PSW_ADDR_INSN); + sf = (struct stack_frame *) tsk->thread.ksp; if (sf <= low || sf > high) return 0; - sf = (struct stack_frame *) (sf->back_chain & PSW_ADDR_INSN); + sf = (struct stack_frame *) sf->back_chain; if (sf <= low || sf > high) return 0; return sf->gprs[8]; @@ -154,7 +154,7 @@ int copy_thread(unsigned long clone_flags, unsigned long new_stackp, memset(&frame->childregs, 0, sizeof(struct pt_regs)); frame->childregs.psw.mask = PSW_KERNEL_BITS | PSW_MASK_DAT | PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK; - frame->childregs.psw.addr = PSW_ADDR_AMODE | + frame->childregs.psw.addr = (unsigned long) kernel_thread_starter; frame->childregs.gprs[9] = new_stackp; /* function */ frame->childregs.gprs[10] = arg; @@ -220,14 +220,14 @@ unsigned long get_wchan(struct task_struct *p) return 0; low = task_stack_page(p); high = (struct stack_frame *) task_pt_regs(p); - sf = (struct stack_frame *) (p->thread.ksp & PSW_ADDR_INSN); + sf = (struct stack_frame *) p->thread.ksp; if (sf <= low || sf > high) return 0; for (count = 0; count < 16; count++) { - sf = (struct stack_frame *) (sf->back_chain & PSW_ADDR_INSN); + sf = (struct stack_frame *) sf->back_chain; if (sf <= low || sf > high) return 0; - return_address = sf->gprs[8] & PSW_ADDR_INSN; + return_address = sf->gprs[8]; if (!in_sched_functions(return_address)) return return_address; } diff --git a/arch/s390/kernel/processor.c b/arch/s390/kernel/processor.c index 7ce00e7a709a..647128d5b983 100644 --- a/arch/s390/kernel/processor.c +++ b/arch/s390/kernel/processor.c @@ -61,6 +61,9 @@ static int show_cpuinfo(struct seq_file *m, void *v) "esan3", "zarch", "stfle", "msa", "ldisp", "eimm", "dfp", "edat", "etf3eh", "highgprs", "te", "vx" }; + static const char * const int_hwcap_str[] = { + "sie" + }; unsigned long n = (unsigned long) v - 1; int i; @@ -75,6 +78,9 @@ static int show_cpuinfo(struct seq_file *m, void *v) for (i = 0; i < ARRAY_SIZE(hwcap_str); i++) if (hwcap_str[i] && (elf_hwcap & (1UL << i))) seq_printf(m, "%s ", hwcap_str[i]); + for (i = 0; i < ARRAY_SIZE(int_hwcap_str); i++) + if (int_hwcap_str[i] && (int_hwcap & (1UL << i))) + seq_printf(m, "%s ", int_hwcap_str[i]); seq_puts(m, "\n"); show_cacheinfo(m); } diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c index 01c37b36caf9..49b1c13bf6c9 100644 --- a/arch/s390/kernel/ptrace.c +++ b/arch/s390/kernel/ptrace.c @@ -84,7 +84,7 @@ void update_cr_regs(struct task_struct *task) if (test_tsk_thread_flag(task, TIF_UPROBE_SINGLESTEP)) new.control |= PER_EVENT_IFETCH; new.start = 0; - new.end = PSW_ADDR_INSN; + new.end = -1UL; } /* Take care of the PER enablement bit in the PSW. */ @@ -148,7 +148,7 @@ static inline unsigned long __peek_user_per(struct task_struct *child, else if (addr == (addr_t) &dummy->cr11) /* End address of the active per set. */ return test_thread_flag(TIF_SINGLE_STEP) ? - PSW_ADDR_INSN : child->thread.per_user.end; + -1UL : child->thread.per_user.end; else if (addr == (addr_t) &dummy->bits) /* Single-step bit. */ return test_thread_flag(TIF_SINGLE_STEP) ? @@ -495,8 +495,6 @@ long arch_ptrace(struct task_struct *child, long request, } return 0; default: - /* Removing high order bit from addr (only for 31 bit). */ - addr &= PSW_ADDR_INSN; return ptrace_request(child, request, addr, data); } } diff --git a/arch/s390/kernel/reipl.S b/arch/s390/kernel/reipl.S index 52aab0bd84f8..89ea8c213d82 100644 --- a/arch/s390/kernel/reipl.S +++ b/arch/s390/kernel/reipl.S @@ -9,60 +9,66 @@ #include <asm/sigp.h> # -# store_status +# Issue "store status" for the current CPU to its prefix page +# and call passed function afterwards # -# Prerequisites to run this function: -# - Prefix register is set to zero -# - Original prefix register is stored in "dump_prefix_page" -# - Lowcore protection is off +# r2 = Function to be called after store status +# r3 = Parameter for function # ENTRY(store_status) /* Save register one and load save area base */ stg %r1,__LC_SAVE_AREA_RESTART - lghi %r1,SAVE_AREA_BASE /* General purpose registers */ - stmg %r0,%r15,__LC_GPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1) - lg %r2,__LC_SAVE_AREA_RESTART - stg %r2,__LC_GPREGS_SAVE_AREA-SAVE_AREA_BASE+8(%r1) + lghi %r1,__LC_GPREGS_SAVE_AREA + stmg %r0,%r15,0(%r1) + mvc 8(8,%r1),__LC_SAVE_AREA_RESTART /* Control registers */ - stctg %c0,%c15,__LC_CREGS_SAVE_AREA-SAVE_AREA_BASE(%r1) + lghi %r1,__LC_CREGS_SAVE_AREA + stctg %c0,%c15,0(%r1) /* Access registers */ - stam %a0,%a15,__LC_AREGS_SAVE_AREA-SAVE_AREA_BASE(%r1) + lghi %r1,__LC_AREGS_SAVE_AREA + stam %a0,%a15,0(%r1) /* Floating point registers */ - std %f0, 0x00 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1) - std %f1, 0x08 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1) - std %f2, 0x10 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1) - std %f3, 0x18 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1) - std %f4, 0x20 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1) - std %f5, 0x28 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1) - std %f6, 0x30 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1) - std %f7, 0x38 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1) - std %f8, 0x40 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1) - std %f9, 0x48 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1) - std %f10,0x50 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1) - std %f11,0x58 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1) - std %f12,0x60 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1) - std %f13,0x68 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1) - std %f14,0x70 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1) - std %f15,0x78 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1) + lghi %r1,__LC_FPREGS_SAVE_AREA + std %f0, 0x00(%r1) + std %f1, 0x08(%r1) + std %f2, 0x10(%r1) + std %f3, 0x18(%r1) + std %f4, 0x20(%r1) + std %f5, 0x28(%r1) + std %f6, 0x30(%r1) + std %f7, 0x38(%r1) + std %f8, 0x40(%r1) + std %f9, 0x48(%r1) + std %f10,0x50(%r1) + std %f11,0x58(%r1) + std %f12,0x60(%r1) + std %f13,0x68(%r1) + std %f14,0x70(%r1) + std %f15,0x78(%r1) /* Floating point control register */ - stfpc __LC_FP_CREG_SAVE_AREA-SAVE_AREA_BASE(%r1) + lghi %r1,__LC_FP_CREG_SAVE_AREA + stfpc 0(%r1) /* CPU timer */ - stpt __LC_CPU_TIMER_SAVE_AREA-SAVE_AREA_BASE(%r1) - /* Saved prefix register */ - larl %r2,dump_prefix_page - mvc __LC_PREFIX_SAVE_AREA-SAVE_AREA_BASE(4,%r1),0(%r2) + lghi %r1,__LC_CPU_TIMER_SAVE_AREA + stpt 0(%r1) + /* Store prefix register */ + lghi %r1,__LC_PREFIX_SAVE_AREA + stpx 0(%r1) /* Clock comparator - seven bytes */ - larl %r2,.Lclkcmp - stckc 0(%r2) - mvc __LC_CLOCK_COMP_SAVE_AREA-SAVE_AREA_BASE + 1(7,%r1),1(%r2) + lghi %r1,__LC_CLOCK_COMP_SAVE_AREA + larl %r4,.Lclkcmp + stckc 0(%r4) + mvc 1(7,%r1),1(%r4) /* Program status word */ - epsw %r2,%r3 - st %r2,__LC_PSW_SAVE_AREA-SAVE_AREA_BASE + 0(%r1) - st %r3,__LC_PSW_SAVE_AREA-SAVE_AREA_BASE + 4(%r1) - larl %r2,store_status - stg %r2,__LC_PSW_SAVE_AREA-SAVE_AREA_BASE + 8(%r1) - br %r14 + lghi %r1,__LC_PSW_SAVE_AREA + epsw %r4,%r5 + st %r4,0(%r1) + st %r5,4(%r1) + stg %r2,8(%r1) + lgr %r1,%r2 + lgr %r2,%r3 + br %r1 .section .bss .align 8 @@ -77,9 +83,11 @@ ENTRY(store_status) ENTRY(do_reipl_asm) basr %r13,0 .Lpg0: lpswe .Lnewpsw-.Lpg0(%r13) -.Lpg1: brasl %r14,store_status +.Lpg1: lgr %r3,%r2 + larl %r2,.Lstatus + brasl %r14,store_status - lctlg %c6,%c6,.Lall-.Lpg0(%r13) +.Lstatus: lctlg %c6,%c6,.Lall-.Lpg0(%r13) lgr %r1,%r2 mvc __LC_PGM_NEW_PSW(16),.Lpcnew-.Lpg0(%r13) stsch .Lschib-.Lpg0(%r13) diff --git a/arch/s390/kernel/sclp.c b/arch/s390/kernel/sclp.c index 9fe7781a45cd..d88db40bdf15 100644 --- a/arch/s390/kernel/sclp.c +++ b/arch/s390/kernel/sclp.c @@ -9,7 +9,11 @@ #include <asm/processor.h> #include <asm/sclp.h> +#define EVTYP_VT220MSG_MASK 0x00000040 +#define EVTYP_MSG_MASK 0x40000000 + static char _sclp_work_area[4096] __aligned(PAGE_SIZE); +static bool have_vt220, have_linemode; static void _sclp_wait_int(void) { @@ -68,7 +72,7 @@ static int _sclp_setup(int disable) 0x00, 0x1c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, - 0x80, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x00, + 0x80, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }; unsigned int *masks; @@ -82,13 +86,13 @@ static int _sclp_setup(int disable) rc = _sclp_servc(0x00780005, _sclp_work_area); if (rc) return rc; - if ((masks[0] & masks[3]) != masks[0] || - (masks[1] & masks[2]) != masks[1]) - return -EIO; + have_vt220 = masks[2] & EVTYP_VT220MSG_MASK; + have_linemode = masks[2] & EVTYP_MSG_MASK; return 0; } -static int _sclp_print(const char *str) +/* Output multi-line text using SCLP Message interface. */ +static void _sclp_print_lm(const char *str) { static unsigned char write_head[] = { /* sccb header */ @@ -143,18 +147,49 @@ static int _sclp_print(const char *str) } while (ch != 0); /* SCLP write data */ - return _sclp_servc(0x00760005, _sclp_work_area); + _sclp_servc(0x00760005, _sclp_work_area); } -int _sclp_print_early(const char *str) +/* Output multi-line text (plus a newline) using SCLP VT220 + * interface. + */ +static void _sclp_print_vt220(const char *str) { - int rc; + static unsigned char const write_head[] = { + /* sccb header */ + 0x00, 0x0e, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + /* evbuf header */ + 0x00, 0x06, + 0x1a, 0x00, 0x00, 0x00, + }; + size_t len = strlen(str); - rc = _sclp_setup(0); - if (rc) - return rc; - rc = _sclp_print(str); - if (rc) - return rc; - return _sclp_setup(1); + if (sizeof(write_head) + len >= sizeof(_sclp_work_area)) + len = sizeof(_sclp_work_area) - sizeof(write_head) - 1; + + memcpy(_sclp_work_area, write_head, sizeof(write_head)); + memcpy(_sclp_work_area + sizeof(write_head), str, len); + _sclp_work_area[sizeof(write_head) + len] = '\n'; + + /* Update length fields in evbuf and sccb headers */ + *(unsigned short *)(_sclp_work_area + 8) += len + 1; + *(unsigned short *)(_sclp_work_area + 0) += len + 1; + + /* SCLP write data */ + (void)_sclp_servc(0x00760005, _sclp_work_area); +} + +/* Output one or more lines of text on the SCLP console (VT220 and / + * or line-mode). All lines get terminated; no need for a trailing LF. + */ +void _sclp_print_early(const char *str) +{ + if (_sclp_setup(0) != 0) + return; + if (have_linemode) + _sclp_print_lm(str); + if (have_vt220) + _sclp_print_vt220(str); + _sclp_setup(1); } diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index c837bcacf218..9220db5c996a 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -80,6 +80,8 @@ EXPORT_SYMBOL(console_irq); unsigned long elf_hwcap __read_mostly = 0; char elf_platform[ELF_PLATFORM_SIZE]; +unsigned long int_hwcap = 0; + int __initdata memory_end_set; unsigned long __initdata memory_end; unsigned long __initdata max_physmem_end; @@ -97,7 +99,7 @@ unsigned long MODULES_VADDR; unsigned long MODULES_END; /* An array with a pointer to the lowcore of every CPU. */ -struct _lowcore *lowcore_ptr[NR_CPUS]; +struct lowcore *lowcore_ptr[NR_CPUS]; EXPORT_SYMBOL(lowcore_ptr); /* @@ -291,33 +293,29 @@ void *restart_stack __attribute__((__section__(".data"))); static void __init setup_lowcore(void) { - struct _lowcore *lc; + struct lowcore *lc; /* * Setup lowcore for boot cpu */ - BUILD_BUG_ON(sizeof(struct _lowcore) != LC_PAGES * 4096); + BUILD_BUG_ON(sizeof(struct lowcore) != LC_PAGES * 4096); lc = __alloc_bootmem_low(LC_PAGES * PAGE_SIZE, LC_PAGES * PAGE_SIZE, 0); lc->restart_psw.mask = PSW_KERNEL_BITS; - lc->restart_psw.addr = - PSW_ADDR_AMODE | (unsigned long) restart_int_handler; + lc->restart_psw.addr = (unsigned long) restart_int_handler; lc->external_new_psw.mask = PSW_KERNEL_BITS | PSW_MASK_DAT | PSW_MASK_MCHECK; - lc->external_new_psw.addr = - PSW_ADDR_AMODE | (unsigned long) ext_int_handler; + lc->external_new_psw.addr = (unsigned long) ext_int_handler; lc->svc_new_psw.mask = PSW_KERNEL_BITS | PSW_MASK_DAT | PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK; - lc->svc_new_psw.addr = PSW_ADDR_AMODE | (unsigned long) system_call; + lc->svc_new_psw.addr = (unsigned long) system_call; lc->program_new_psw.mask = PSW_KERNEL_BITS | PSW_MASK_DAT | PSW_MASK_MCHECK; - lc->program_new_psw.addr = - PSW_ADDR_AMODE | (unsigned long) pgm_check_handler; + lc->program_new_psw.addr = (unsigned long) pgm_check_handler; lc->mcck_new_psw.mask = PSW_KERNEL_BITS; - lc->mcck_new_psw.addr = - PSW_ADDR_AMODE | (unsigned long) mcck_int_handler; + lc->mcck_new_psw.addr = (unsigned long) mcck_int_handler; lc->io_new_psw.mask = PSW_KERNEL_BITS | PSW_MASK_DAT | PSW_MASK_MCHECK; - lc->io_new_psw.addr = PSW_ADDR_AMODE | (unsigned long) io_int_handler; + lc->io_new_psw.addr = (unsigned long) io_int_handler; lc->clock_comparator = -1ULL; lc->kernel_stack = ((unsigned long) &init_thread_union) + THREAD_SIZE - STACK_FRAME_OVERHEAD - sizeof(struct pt_regs); @@ -661,15 +659,6 @@ static void __init reserve_kernel(void) #endif } -static void __init reserve_elfcorehdr(void) -{ -#ifdef CONFIG_CRASH_DUMP - if (is_kdump_kernel()) - memblock_reserve(elfcorehdr_addr - OLDMEM_BASE, - PAGE_ALIGN(elfcorehdr_size)); -#endif -} - static void __init setup_memory(void) { struct memblock_region *reg; @@ -793,6 +782,13 @@ static int __init setup_hwcaps(void) strcpy(elf_platform, "z13"); break; } + + /* + * Virtualization support HWCAP_INT_SIE is bit 0. + */ + if (sclp.has_sief2) + int_hwcap |= HWCAP_INT_SIE; + return 0; } arch_initcall(setup_hwcaps); @@ -841,6 +837,11 @@ void __init setup_arch(char **cmdline_p) init_mm.brk = (unsigned long) &_end; parse_early_param(); +#ifdef CONFIG_CRASH_DUMP + /* Deactivate elfcorehdr= kernel parameter */ + elfcorehdr_addr = ELFCORE_ADDR_MAX; +#endif + os_info_init(); setup_ipl(); @@ -849,7 +850,6 @@ void __init setup_arch(char **cmdline_p) reserve_oldmem(); reserve_kernel(); reserve_initrd(); - reserve_elfcorehdr(); memblock_allow_resize(); /* Get information about *all* installed memory */ @@ -870,11 +870,13 @@ void __init setup_arch(char **cmdline_p) check_initrd(); reserve_crashkernel(); +#ifdef CONFIG_CRASH_DUMP /* * Be aware that smp_save_dump_cpus() triggers a system reset. * Therefore CPU and device initialization should be done afterwards. */ smp_save_dump_cpus(); +#endif setup_resources(); setup_vmcoreinfo(); diff --git a/arch/s390/kernel/signal.c b/arch/s390/kernel/signal.c index 028cc46cb82a..d82562cf0a0e 100644 --- a/arch/s390/kernel/signal.c +++ b/arch/s390/kernel/signal.c @@ -331,13 +331,13 @@ static int setup_frame(int sig, struct k_sigaction *ka, /* Set up to return from userspace. If provided, use a stub already in userspace. */ if (ka->sa.sa_flags & SA_RESTORER) { - restorer = (unsigned long) ka->sa.sa_restorer | PSW_ADDR_AMODE; + restorer = (unsigned long) ka->sa.sa_restorer; } else { /* Signal frame without vector registers are short ! */ __u16 __user *svc = (void __user *) frame + frame_size - 2; if (__put_user(S390_SYSCALL_OPCODE | __NR_sigreturn, svc)) return -EFAULT; - restorer = (unsigned long) svc | PSW_ADDR_AMODE; + restorer = (unsigned long) svc; } /* Set up registers for signal handler */ @@ -347,7 +347,7 @@ static int setup_frame(int sig, struct k_sigaction *ka, regs->psw.mask = PSW_MASK_EA | PSW_MASK_BA | (PSW_USER_BITS & PSW_MASK_ASC) | (regs->psw.mask & ~PSW_MASK_ASC); - regs->psw.addr = (unsigned long) ka->sa.sa_handler | PSW_ADDR_AMODE; + regs->psw.addr = (unsigned long) ka->sa.sa_handler; regs->gprs[2] = sig; regs->gprs[3] = (unsigned long) &frame->sc; @@ -394,13 +394,12 @@ static int setup_rt_frame(struct ksignal *ksig, sigset_t *set, /* Set up to return from userspace. If provided, use a stub already in userspace. */ if (ksig->ka.sa.sa_flags & SA_RESTORER) { - restorer = (unsigned long) - ksig->ka.sa.sa_restorer | PSW_ADDR_AMODE; + restorer = (unsigned long) ksig->ka.sa.sa_restorer; } else { __u16 __user *svc = &frame->svc_insn; if (__put_user(S390_SYSCALL_OPCODE | __NR_rt_sigreturn, svc)) return -EFAULT; - restorer = (unsigned long) svc | PSW_ADDR_AMODE; + restorer = (unsigned long) svc; } /* Create siginfo on the signal stack */ @@ -426,7 +425,7 @@ static int setup_rt_frame(struct ksignal *ksig, sigset_t *set, regs->psw.mask = PSW_MASK_EA | PSW_MASK_BA | (PSW_USER_BITS & PSW_MASK_ASC) | (regs->psw.mask & ~PSW_MASK_ASC); - regs->psw.addr = (unsigned long) ksig->ka.sa.sa_handler | PSW_ADDR_AMODE; + regs->psw.addr = (unsigned long) ksig->ka.sa.sa_handler; regs->gprs[2] = ksig->sig; regs->gprs[3] = (unsigned long) &frame->info; diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index 9062df575afe..3c65a8eae34d 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -64,8 +64,9 @@ enum { static DEFINE_PER_CPU(struct cpu *, cpu_device); struct pcpu { - struct _lowcore *lowcore; /* lowcore page(s) for the cpu */ + struct lowcore *lowcore; /* lowcore page(s) for the cpu */ unsigned long ec_mask; /* bit mask for ec_xxx functions */ + unsigned long ec_clk; /* sigp timestamp for ec_xxx */ signed char state; /* physical cpu state */ signed char polarization; /* physical polarization */ u16 address; /* physical cpu address */ @@ -80,6 +81,10 @@ EXPORT_SYMBOL(smp_cpu_mt_shift); unsigned int smp_cpu_mtid; EXPORT_SYMBOL(smp_cpu_mtid); +#ifdef CONFIG_CRASH_DUMP +__vector128 __initdata boot_cpu_vector_save_area[__NUM_VXRS]; +#endif + static unsigned int smp_max_threads __initdata = -1U; static int __init early_nosmt(char *s) @@ -105,8 +110,7 @@ DEFINE_MUTEX(smp_cpu_state_mutex); /* * Signal processor helper functions. */ -static inline int __pcpu_sigp_relax(u16 addr, u8 order, unsigned long parm, - u32 *status) +static inline int __pcpu_sigp_relax(u16 addr, u8 order, unsigned long parm) { int cc; @@ -171,6 +175,7 @@ static void pcpu_ec_call(struct pcpu *pcpu, int ec_bit) if (test_and_set_bit(ec_bit, &pcpu->ec_mask)) return; order = pcpu_running(pcpu) ? SIGP_EXTERNAL_CALL : SIGP_EMERGENCY_SIGNAL; + pcpu->ec_clk = get_tod_clock_fast(); pcpu_sigp_retry(pcpu, order, 0); } @@ -180,10 +185,10 @@ static void pcpu_ec_call(struct pcpu *pcpu, int ec_bit) static int pcpu_alloc_lowcore(struct pcpu *pcpu, int cpu) { unsigned long async_stack, panic_stack; - struct _lowcore *lc; + struct lowcore *lc; if (pcpu != &pcpu_devices[0]) { - pcpu->lowcore = (struct _lowcore *) + pcpu->lowcore = (struct lowcore *) __get_free_pages(GFP_KERNEL | GFP_DMA, LC_ORDER); async_stack = __get_free_pages(GFP_KERNEL, ASYNC_ORDER); panic_stack = __get_free_page(GFP_KERNEL); @@ -235,7 +240,7 @@ static void pcpu_free_lowcore(struct pcpu *pcpu) static void pcpu_prepare_secondary(struct pcpu *pcpu, int cpu) { - struct _lowcore *lc = pcpu->lowcore; + struct lowcore *lc = pcpu->lowcore; if (MACHINE_HAS_TLB_LC) cpumask_set_cpu(cpu, &init_mm.context.cpu_attach_mask); @@ -255,7 +260,7 @@ static void pcpu_prepare_secondary(struct pcpu *pcpu, int cpu) static void pcpu_attach_task(struct pcpu *pcpu, struct task_struct *tsk) { - struct _lowcore *lc = pcpu->lowcore; + struct lowcore *lc = pcpu->lowcore; struct thread_info *ti = task_thread_info(tsk); lc->kernel_stack = (unsigned long) task_stack_page(tsk) @@ -271,7 +276,7 @@ static void pcpu_attach_task(struct pcpu *pcpu, struct task_struct *tsk) static void pcpu_start_fn(struct pcpu *pcpu, void (*func)(void *), void *data) { - struct _lowcore *lc = pcpu->lowcore; + struct lowcore *lc = pcpu->lowcore; lc->restart_stack = lc->kernel_stack; lc->restart_fn = (unsigned long) func; @@ -286,7 +291,7 @@ static void pcpu_start_fn(struct pcpu *pcpu, void (*func)(void *), void *data) static void pcpu_delegate(struct pcpu *pcpu, void (*func)(void *), void *data, unsigned long stack) { - struct _lowcore *lc = lowcore_ptr[pcpu - pcpu_devices]; + struct lowcore *lc = lowcore_ptr[pcpu - pcpu_devices]; unsigned long source_cpu = stap(); __load_psw_mask(PSW_KERNEL_BITS); @@ -538,53 +543,24 @@ EXPORT_SYMBOL(smp_ctl_clear_bit); #ifdef CONFIG_CRASH_DUMP -static void __init __smp_store_cpu_state(struct save_area_ext *sa_ext, - u16 address, int is_boot_cpu) -{ - void *lc = (void *)(unsigned long) store_prefix(); - unsigned long vx_sa; - - if (is_boot_cpu) { - /* Copy the registers of the boot CPU. */ - copy_oldmem_page(1, (void *) &sa_ext->sa, sizeof(sa_ext->sa), - SAVE_AREA_BASE - PAGE_SIZE, 0); - if (MACHINE_HAS_VX) - save_vx_regs_safe(sa_ext->vx_regs); - return; - } - /* Get the registers of a non-boot cpu. */ - __pcpu_sigp_relax(address, SIGP_STOP_AND_STORE_STATUS, 0, NULL); - memcpy_real(&sa_ext->sa, lc + SAVE_AREA_BASE, sizeof(sa_ext->sa)); - if (!MACHINE_HAS_VX) - return; - /* Get the VX registers */ - vx_sa = memblock_alloc(PAGE_SIZE, PAGE_SIZE); - if (!vx_sa) - panic("could not allocate memory for VX save area\n"); - __pcpu_sigp_relax(address, SIGP_STORE_ADDITIONAL_STATUS, vx_sa, NULL); - memcpy(sa_ext->vx_regs, (void *) vx_sa, sizeof(sa_ext->vx_regs)); - memblock_free(vx_sa, PAGE_SIZE); -} - int smp_store_status(int cpu) { - unsigned long vx_sa; - struct pcpu *pcpu; + struct pcpu *pcpu = pcpu_devices + cpu; + unsigned long pa; - pcpu = pcpu_devices + cpu; - if (__pcpu_sigp_relax(pcpu->address, SIGP_STOP_AND_STORE_STATUS, - 0, NULL) != SIGP_CC_ORDER_CODE_ACCEPTED) + pa = __pa(&pcpu->lowcore->floating_pt_save_area); + if (__pcpu_sigp_relax(pcpu->address, SIGP_STORE_STATUS_AT_ADDRESS, + pa) != SIGP_CC_ORDER_CODE_ACCEPTED) return -EIO; if (!MACHINE_HAS_VX) return 0; - vx_sa = __pa(pcpu->lowcore->vector_save_area_addr); - __pcpu_sigp_relax(pcpu->address, SIGP_STORE_ADDITIONAL_STATUS, - vx_sa, NULL); + pa = __pa(pcpu->lowcore->vector_save_area_addr); + if (__pcpu_sigp_relax(pcpu->address, SIGP_STORE_ADDITIONAL_STATUS, + pa) != SIGP_CC_ORDER_CODE_ACCEPTED) + return -EIO; return 0; } -#endif /* CONFIG_CRASH_DUMP */ - /* * Collect CPU state of the previous, crashed system. * There are four cases: @@ -593,7 +569,7 @@ int smp_store_status(int cpu) * The state for all CPUs except the boot CPU needs to be collected * with sigp stop-and-store-status. The boot CPU state is located in * the absolute lowcore of the memory stored in the HSA. The zcore code - * will allocate the save area and copy the boot CPU state from the HSA. + * will copy the boot CPU state from the HSA. * 2) stand-alone kdump for SCSI (zfcp dump with swapped memory) * condition: OLDMEM_BASE != NULL && ipl_info.type == IPL_TYPE_FCP_DUMP * The state for all CPUs except the boot CPU needs to be collected @@ -608,55 +584,76 @@ int smp_store_status(int cpu) * stored the registers of the boot CPU in the memory of the old system. * 4) kdump and the old kernel stored the CPU state * condition: OLDMEM_BASE != NULL && is_kdump_kernel() - * The state of all CPUs is stored in ELF sections in the memory of the - * old system. The ELF sections are picked up by the crash_dump code - * via elfcorehdr_addr. + * This case does not exist for s390 anymore, setup_arch explicitly + * deactivates the elfcorehdr= kernel parameter */ +static __init void smp_save_cpu_vxrs(struct save_area *sa, u16 addr, + bool is_boot_cpu, unsigned long page) +{ + __vector128 *vxrs = (__vector128 *) page; + + if (is_boot_cpu) + vxrs = boot_cpu_vector_save_area; + else + __pcpu_sigp_relax(addr, SIGP_STORE_ADDITIONAL_STATUS, page); + save_area_add_vxrs(sa, vxrs); +} + +static __init void smp_save_cpu_regs(struct save_area *sa, u16 addr, + bool is_boot_cpu, unsigned long page) +{ + void *regs = (void *) page; + + if (is_boot_cpu) + copy_oldmem_kernel(regs, (void *) __LC_FPREGS_SAVE_AREA, 512); + else + __pcpu_sigp_relax(addr, SIGP_STORE_STATUS_AT_ADDRESS, page); + save_area_add_regs(sa, regs); +} + void __init smp_save_dump_cpus(void) { -#ifdef CONFIG_CRASH_DUMP - int addr, cpu, boot_cpu_addr, max_cpu_addr; - struct save_area_ext *sa_ext; + int addr, boot_cpu_addr, max_cpu_addr; + struct save_area *sa; + unsigned long page; bool is_boot_cpu; - if (is_kdump_kernel()) - /* Previous system stored the CPU states. Nothing to do. */ - return; if (!(OLDMEM_BASE || ipl_info.type == IPL_TYPE_FCP_DUMP)) /* No previous system present, normal boot. */ return; + /* Allocate a page as dumping area for the store status sigps */ + page = memblock_alloc_base(PAGE_SIZE, PAGE_SIZE, 1UL << 31); /* Set multi-threading state to the previous system. */ pcpu_set_smt(sclp.mtid_prev); - max_cpu_addr = SCLP_MAX_CORES << sclp.mtid_prev; - for (cpu = 0, addr = 0; addr <= max_cpu_addr; addr++) { - if (__pcpu_sigp_relax(addr, SIGP_SENSE, 0, NULL) == - SIGP_CC_NOT_OPERATIONAL) - continue; - cpu += 1; - } - dump_save_areas.areas = (void *)memblock_alloc(sizeof(void *) * cpu, 8); - dump_save_areas.count = cpu; boot_cpu_addr = stap(); - for (cpu = 0, addr = 0; addr <= max_cpu_addr; addr++) { - if (__pcpu_sigp_relax(addr, SIGP_SENSE, 0, NULL) == + max_cpu_addr = SCLP_MAX_CORES << sclp.mtid_prev; + for (addr = 0; addr <= max_cpu_addr; addr++) { + if (__pcpu_sigp_relax(addr, SIGP_SENSE, 0) == SIGP_CC_NOT_OPERATIONAL) continue; - sa_ext = (void *) memblock_alloc(sizeof(*sa_ext), 8); - dump_save_areas.areas[cpu] = sa_ext; - if (!sa_ext) - panic("could not allocate memory for save area\n"); is_boot_cpu = (addr == boot_cpu_addr); - cpu += 1; - if (is_boot_cpu && !OLDMEM_BASE) - /* Skip boot CPU for standard zfcp dump. */ - continue; - /* Get state for this CPU. */ - __smp_store_cpu_state(sa_ext, addr, is_boot_cpu); + /* Allocate save area */ + sa = save_area_alloc(is_boot_cpu); + if (!sa) + panic("could not allocate memory for save area\n"); + if (MACHINE_HAS_VX) + /* Get the vector registers */ + smp_save_cpu_vxrs(sa, addr, is_boot_cpu, page); + /* + * For a zfcp dump OLDMEM_BASE == NULL and the registers + * of the boot CPU are stored in the HSA. To retrieve + * these registers an SCLP request is required which is + * done by drivers/s390/char/zcore.c:init_cpu_info() + */ + if (!is_boot_cpu || OLDMEM_BASE) + /* Get the CPU registers */ + smp_save_cpu_regs(sa, addr, is_boot_cpu, page); } + memblock_free(page, PAGE_SIZE); diag308_reset(); pcpu_set_smt(0); -#endif /* CONFIG_CRASH_DUMP */ } +#endif /* CONFIG_CRASH_DUMP */ void smp_cpu_set_polarization(int cpu, int val) { @@ -680,7 +677,7 @@ static struct sclp_core_info *smp_get_core_info(void) for (address = 0; address < (SCLP_MAX_CORES << smp_cpu_mt_shift); address += (1U << smp_cpu_mt_shift)) { - if (__pcpu_sigp_relax(address, SIGP_SENSE, 0, NULL) == + if (__pcpu_sigp_relax(address, SIGP_SENSE, 0) == SIGP_CC_NOT_OPERATIONAL) continue; info->core[info->configured].core_id = @@ -924,7 +921,7 @@ void __init smp_prepare_boot_cpu(void) pcpu->state = CPU_STATE_CONFIGURED; pcpu->address = stap(); - pcpu->lowcore = (struct _lowcore *)(unsigned long) store_prefix(); + pcpu->lowcore = (struct lowcore *)(unsigned long) store_prefix(); S390_lowcore.percpu_offset = __per_cpu_offset[0]; smp_cpu_set_polarization(0, POLARIZATION_UNKNOWN); set_cpu_present(0, true); diff --git a/arch/s390/kernel/stacktrace.c b/arch/s390/kernel/stacktrace.c index 1785cd82253c..8f64ebd63767 100644 --- a/arch/s390/kernel/stacktrace.c +++ b/arch/s390/kernel/stacktrace.c @@ -21,12 +21,11 @@ static unsigned long save_context_stack(struct stack_trace *trace, unsigned long addr; while(1) { - sp &= PSW_ADDR_INSN; if (sp < low || sp > high) return sp; sf = (struct stack_frame *)sp; while(1) { - addr = sf->gprs[8] & PSW_ADDR_INSN; + addr = sf->gprs[8]; if (!trace->skip) trace->entries[trace->nr_entries++] = addr; else @@ -34,7 +33,7 @@ static unsigned long save_context_stack(struct stack_trace *trace, if (trace->nr_entries >= trace->max_entries) return sp; low = sp; - sp = sf->back_chain & PSW_ADDR_INSN; + sp = sf->back_chain; if (!sp) break; if (sp <= low || sp > high - sizeof(*sf)) @@ -46,7 +45,7 @@ static unsigned long save_context_stack(struct stack_trace *trace, if (sp <= low || sp > high - sizeof(*regs)) return sp; regs = (struct pt_regs *)sp; - addr = regs->psw.addr & PSW_ADDR_INSN; + addr = regs->psw.addr; if (savesched || !in_sched_functions(addr)) { if (!trace->skip) trace->entries[trace->nr_entries++] = addr; @@ -60,33 +59,43 @@ static unsigned long save_context_stack(struct stack_trace *trace, } } -void save_stack_trace(struct stack_trace *trace) +static void __save_stack_trace(struct stack_trace *trace, unsigned long sp) { - register unsigned long sp asm ("15"); - unsigned long orig_sp, new_sp; + unsigned long new_sp, frame_size; - orig_sp = sp & PSW_ADDR_INSN; - new_sp = save_context_stack(trace, orig_sp, - S390_lowcore.panic_stack - PAGE_SIZE, - S390_lowcore.panic_stack, 1); - if (new_sp != orig_sp) - return; + frame_size = STACK_FRAME_OVERHEAD + sizeof(struct pt_regs); + new_sp = save_context_stack(trace, sp, + S390_lowcore.panic_stack + frame_size - PAGE_SIZE, + S390_lowcore.panic_stack + frame_size, 1); new_sp = save_context_stack(trace, new_sp, - S390_lowcore.async_stack - ASYNC_SIZE, - S390_lowcore.async_stack, 1); - if (new_sp != orig_sp) - return; + S390_lowcore.async_stack + frame_size - ASYNC_SIZE, + S390_lowcore.async_stack + frame_size, 1); save_context_stack(trace, new_sp, S390_lowcore.thread_info, S390_lowcore.thread_info + THREAD_SIZE, 1); } + +void save_stack_trace(struct stack_trace *trace) +{ + register unsigned long r15 asm ("15"); + unsigned long sp; + + sp = r15; + __save_stack_trace(trace, sp); + if (trace->nr_entries < trace->max_entries) + trace->entries[trace->nr_entries++] = ULONG_MAX; +} EXPORT_SYMBOL_GPL(save_stack_trace); void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace) { unsigned long sp, low, high; - sp = tsk->thread.ksp & PSW_ADDR_INSN; + sp = tsk->thread.ksp; + if (tsk == current) { + /* Get current stack pointer. */ + asm volatile("la %0,0(15)" : "=a" (sp)); + } low = (unsigned long) task_stack_page(tsk); high = (unsigned long) task_pt_regs(tsk); save_context_stack(trace, sp, low, high, 0); @@ -94,3 +103,14 @@ void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace) trace->entries[trace->nr_entries++] = ULONG_MAX; } EXPORT_SYMBOL_GPL(save_stack_trace_tsk); + +void save_stack_trace_regs(struct pt_regs *regs, struct stack_trace *trace) +{ + unsigned long sp; + + sp = kernel_stack_pointer(regs); + __save_stack_trace(trace, sp); + if (trace->nr_entries < trace->max_entries) + trace->entries[trace->nr_entries++] = ULONG_MAX; +} +EXPORT_SYMBOL_GPL(save_stack_trace_regs); diff --git a/arch/s390/kernel/syscalls.S b/arch/s390/kernel/syscalls.S index 5378c3ea1b98..293d8b98fd52 100644 --- a/arch/s390/kernel/syscalls.S +++ b/arch/s390/kernel/syscalls.S @@ -383,3 +383,4 @@ SYSCALL(sys_recvfrom,compat_sys_recvfrom) SYSCALL(sys_recvmsg,compat_sys_recvmsg) SYSCALL(sys_shutdown,sys_shutdown) SYSCALL(sys_mlock2,compat_sys_mlock2) +SYSCALL(sys_copy_file_range,compat_sys_copy_file_range) /* 375 */ diff --git a/arch/s390/kernel/sysinfo.c b/arch/s390/kernel/sysinfo.c index 99babea026ca..f7dba3887a54 100644 --- a/arch/s390/kernel/sysinfo.c +++ b/arch/s390/kernel/sysinfo.c @@ -111,8 +111,7 @@ static void stsi_1_1_1(struct seq_file *m, struct sysinfo_1_1_1 *info) static void stsi_15_1_x(struct seq_file *m, struct sysinfo_15_1_x *info) { - static int max_mnest; - int i, rc; + int i; seq_putc(m, '\n'); if (!MACHINE_HAS_TOPOLOGY) @@ -123,7 +122,7 @@ static void stsi_15_1_x(struct seq_file *m, struct sysinfo_15_1_x *info) for (i = 0; i < TOPOLOGY_NR_MAG; i++) seq_printf(m, " %d", info->mag[i]); seq_putc(m, '\n'); -#ifdef CONFIG_SCHED_MC +#ifdef CONFIG_SCHED_TOPOLOGY store_topology(info); seq_printf(m, "CPU Topology SW: "); for (i = 0; i < TOPOLOGY_NR_MAG; i++) @@ -145,6 +144,10 @@ static void stsi_1_2_2(struct seq_file *m, struct sysinfo_1_2_2 *info) seq_printf(m, "CPUs Configured: %d\n", info->cpus_configured); seq_printf(m, "CPUs Standby: %d\n", info->cpus_standby); seq_printf(m, "CPUs Reserved: %d\n", info->cpus_reserved); + if (info->mt_installed) { + seq_printf(m, "CPUs G-MTID: %d\n", info->mt_gtid); + seq_printf(m, "CPUs S-MTID: %d\n", info->mt_stid); + } /* * Sigh 2. According to the specification the alternate * capability field is a 32 bit floating point number @@ -194,13 +197,10 @@ static void stsi_2_2_2(struct seq_file *m, struct sysinfo_2_2_2 *info) seq_printf(m, "LPAR CPUs Reserved: %d\n", info->cpus_reserved); seq_printf(m, "LPAR CPUs Dedicated: %d\n", info->cpus_dedicated); seq_printf(m, "LPAR CPUs Shared: %d\n", info->cpus_shared); - if (info->mt_installed & 0x80) { - seq_printf(m, "LPAR CPUs G-MTID: %d\n", - info->mt_general & 0x1f); - seq_printf(m, "LPAR CPUs S-MTID: %d\n", - info->mt_installed & 0x1f); - seq_printf(m, "LPAR CPUs PS-MTID: %d\n", - info->mt_psmtid & 0x1f); + if (info->mt_installed) { + seq_printf(m, "LPAR CPUs G-MTID: %d\n", info->mt_gtid); + seq_printf(m, "LPAR CPUs S-MTID: %d\n", info->mt_stid); + seq_printf(m, "LPAR CPUs PS-MTID: %d\n", info->mt_psmtid); } } diff --git a/arch/s390/kernel/trace.c b/arch/s390/kernel/trace.c index 21a5df99552b..dde7654f5c68 100644 --- a/arch/s390/kernel/trace.c +++ b/arch/s390/kernel/trace.c @@ -18,6 +18,9 @@ void trace_s390_diagnose_norecursion(int diag_nr) unsigned long flags; unsigned int *depth; + /* Avoid lockdep recursion. */ + if (IS_ENABLED(CONFIG_LOCKDEP)) + return; local_irq_save(flags); depth = this_cpu_ptr(&diagnose_trace_depth); if (*depth == 0) { diff --git a/arch/s390/kernel/traps.c b/arch/s390/kernel/traps.c index 1b18118bbc06..017eb03daee2 100644 --- a/arch/s390/kernel/traps.c +++ b/arch/s390/kernel/traps.c @@ -32,8 +32,7 @@ static inline void __user *get_trap_ip(struct pt_regs *regs) address = *(unsigned long *)(current->thread.trap_tdb + 24); else address = regs->psw.addr; - return (void __user *) - ((address - (regs->int_code >> 16)) & PSW_ADDR_INSN); + return (void __user *) (address - (regs->int_code >> 16)); } static inline void report_user_fault(struct pt_regs *regs, int signr) @@ -46,7 +45,7 @@ static inline void report_user_fault(struct pt_regs *regs, int signr) return; printk("User process fault: interruption code %04x ilc:%d ", regs->int_code & 0xffff, regs->int_code >> 17); - print_vma_addr("in ", regs->psw.addr & PSW_ADDR_INSN); + print_vma_addr("in ", regs->psw.addr); printk("\n"); show_regs(regs); } @@ -69,13 +68,13 @@ void do_report_trap(struct pt_regs *regs, int si_signo, int si_code, char *str) report_user_fault(regs, si_signo); } else { const struct exception_table_entry *fixup; - fixup = search_exception_tables(regs->psw.addr & PSW_ADDR_INSN); + fixup = search_exception_tables(regs->psw.addr); if (fixup) - regs->psw.addr = extable_fixup(fixup) | PSW_ADDR_AMODE; + regs->psw.addr = extable_fixup(fixup); else { enum bug_trap_type btt; - btt = report_bug(regs->psw.addr & PSW_ADDR_INSN, regs); + btt = report_bug(regs->psw.addr, regs); if (btt == BUG_TRAP_TYPE_WARN) return; die(regs, str); @@ -260,11 +259,8 @@ void vector_exception(struct pt_regs *regs) void data_exception(struct pt_regs *regs) { - __u16 __user *location; int signal = 0; - location = get_trap_ip(regs); - save_fpu_regs(); if (current->thread.fpu.fpc & FPC_DXC_MASK) signal = SIGFPE; diff --git a/arch/s390/kernel/vdso.c b/arch/s390/kernel/vdso.c index 59eddb0e1a3e..94495cac8be3 100644 --- a/arch/s390/kernel/vdso.c +++ b/arch/s390/kernel/vdso.c @@ -80,7 +80,7 @@ struct vdso_data *vdso_data = &vdso_data_store.data; /* * Setup vdso data page. */ -static void vdso_init_data(struct vdso_data *vd) +static void __init vdso_init_data(struct vdso_data *vd) { vd->ectg_available = test_facility(31); } @@ -90,9 +90,10 @@ static void vdso_init_data(struct vdso_data *vd) */ #define SEGMENT_ORDER 2 -int vdso_alloc_per_cpu(struct _lowcore *lowcore) +int vdso_alloc_per_cpu(struct lowcore *lowcore) { unsigned long segment_table, page_table, page_frame; + struct vdso_per_cpu_data *vd; u32 *psal, *aste; int i; @@ -107,6 +108,12 @@ int vdso_alloc_per_cpu(struct _lowcore *lowcore) if (!segment_table || !page_table || !page_frame) goto out; + /* Initialize per-cpu vdso data page */ + vd = (struct vdso_per_cpu_data *) page_frame; + vd->cpu_nr = lowcore->cpu_nr; + vd->node_id = cpu_to_node(vd->cpu_nr); + + /* Set up access register mode page table */ clear_table((unsigned long *) segment_table, _SEGMENT_ENTRY_EMPTY, PAGE_SIZE << SEGMENT_ORDER); clear_table((unsigned long *) page_table, _PAGE_INVALID, @@ -138,7 +145,7 @@ out: return -ENOMEM; } -void vdso_free_per_cpu(struct _lowcore *lowcore) +void vdso_free_per_cpu(struct lowcore *lowcore) { unsigned long segment_table, page_table, page_frame; u32 *psal, *aste; @@ -163,7 +170,7 @@ static void vdso_init_cr5(void) if (!vdso_enabled) return; - cr5 = offsetof(struct _lowcore, paste); + cr5 = offsetof(struct lowcore, paste); __ctl_load(cr5, 5, 5); } @@ -299,8 +306,6 @@ static int __init vdso_init(void) get_page(virt_to_page(vdso_data)); - smp_mb(); - return 0; } early_initcall(vdso_init); diff --git a/arch/s390/kernel/vdso32/Makefile b/arch/s390/kernel/vdso32/Makefile index ee8a18e50a25..f9c459586649 100644 --- a/arch/s390/kernel/vdso32/Makefile +++ b/arch/s390/kernel/vdso32/Makefile @@ -1,6 +1,6 @@ # List of files in the vdso, has to be asm only for now -obj-vdso32 = gettimeofday.o clock_getres.o clock_gettime.o note.o +obj-vdso32 = gettimeofday.o clock_getres.o clock_gettime.o note.o getcpu.o # Build rules diff --git a/arch/s390/kernel/vdso32/getcpu.S b/arch/s390/kernel/vdso32/getcpu.S new file mode 100644 index 000000000000..c1ed0b72030f --- /dev/null +++ b/arch/s390/kernel/vdso32/getcpu.S @@ -0,0 +1,43 @@ +/* + * Userland implementation of getcpu() for 32 bits processes in a + * s390 kernel for use in the vDSO + * + * Copyright IBM Corp. 2016 + * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com> + */ +#include <asm/vdso.h> +#include <asm/asm-offsets.h> + + .text + .align 4 + .globl __kernel_getcpu + .type __kernel_getcpu,@function +__kernel_getcpu: + .cfi_startproc + ear %r1,%a4 + lhi %r4,1 + sll %r4,24 + sar %a4,%r4 + la %r4,0 + epsw %r0,0 + sacf 512 + l %r5,__VDSO_CPU_NR(%r4) + l %r4,__VDSO_NODE_ID(%r4) + tml %r0,0x4000 + jo 1f + tml %r0,0x8000 + jno 0f + sacf 256 + j 1f +0: sacf 0 +1: sar %a4,%r1 + ltr %r2,%r2 + jz 2f + st %r5,0(%r2) +2: ltr %r3,%r3 + jz 3f + st %r4,0(%r3) +3: lhi %r2,0 + br %r14 + .cfi_endproc + .size __kernel_getcpu,.-__kernel_getcpu diff --git a/arch/s390/kernel/vdso32/vdso32.lds.S b/arch/s390/kernel/vdso32/vdso32.lds.S index a8c379fa1247..8f048c2d6d13 100644 --- a/arch/s390/kernel/vdso32/vdso32.lds.S +++ b/arch/s390/kernel/vdso32/vdso32.lds.S @@ -132,6 +132,7 @@ VERSION __kernel_gettimeofday; __kernel_clock_gettime; __kernel_clock_getres; + __kernel_getcpu; local: *; }; diff --git a/arch/s390/kernel/vdso64/Makefile b/arch/s390/kernel/vdso64/Makefile index c4b03f9ed228..058659c1b8cf 100644 --- a/arch/s390/kernel/vdso64/Makefile +++ b/arch/s390/kernel/vdso64/Makefile @@ -1,6 +1,6 @@ # List of files in the vdso, has to be asm only for now -obj-vdso64 = gettimeofday.o clock_getres.o clock_gettime.o note.o +obj-vdso64 = gettimeofday.o clock_getres.o clock_gettime.o note.o getcpu.o # Build rules diff --git a/arch/s390/kernel/vdso64/getcpu.S b/arch/s390/kernel/vdso64/getcpu.S new file mode 100644 index 000000000000..4cbe98291931 --- /dev/null +++ b/arch/s390/kernel/vdso64/getcpu.S @@ -0,0 +1,42 @@ +/* + * Userland implementation of getcpu() for 64 bits processes in a + * s390 kernel for use in the vDSO + * + * Copyright IBM Corp. 2016 + * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com> + */ +#include <asm/vdso.h> +#include <asm/asm-offsets.h> + + .text + .align 4 + .globl __kernel_getcpu + .type __kernel_getcpu,@function +__kernel_getcpu: + .cfi_startproc + ear %r1,%a4 + llilh %r4,0x0100 + sar %a4,%r4 + la %r4,0 + epsw %r0,0 + sacf 512 + l %r5,__VDSO_CPU_NR(%r4) + l %r4,__VDSO_NODE_ID(%r4) + tml %r0,0x4000 + jo 1f + tml %r0,0x8000 + jno 0f + sacf 256 + j 1f +0: sacf 0 +1: sar %a4,%r1 + ltgr %r2,%r2 + jz 2f + st %r5,0(%r2) +2: ltgr %r3,%r3 + jz 3f + st %r4,0(%r3) +3: lghi %r2,0 + br %r14 + .cfi_endproc + .size __kernel_getcpu,.-__kernel_getcpu diff --git a/arch/s390/kernel/vdso64/vdso64.lds.S b/arch/s390/kernel/vdso64/vdso64.lds.S index 9f5979d102a9..f35455d497fe 100644 --- a/arch/s390/kernel/vdso64/vdso64.lds.S +++ b/arch/s390/kernel/vdso64/vdso64.lds.S @@ -132,6 +132,7 @@ VERSION __kernel_gettimeofday; __kernel_clock_gettime; __kernel_clock_getres; + __kernel_getcpu; local: *; }; diff --git a/arch/s390/kvm/Kconfig b/arch/s390/kvm/Kconfig index 5fce52cf0e57..5ea5af3c7db7 100644 --- a/arch/s390/kvm/Kconfig +++ b/arch/s390/kvm/Kconfig @@ -29,6 +29,7 @@ config KVM select HAVE_KVM_IRQFD select HAVE_KVM_IRQ_ROUTING select SRCU + select KVM_VFIO ---help--- Support hosting paravirtualized guest machines using the SIE virtualization capability on the mainframe. This should work diff --git a/arch/s390/kvm/Makefile b/arch/s390/kvm/Makefile index b3b553469650..d42fa38c2429 100644 --- a/arch/s390/kvm/Makefile +++ b/arch/s390/kvm/Makefile @@ -7,7 +7,7 @@ # as published by the Free Software Foundation. KVM := ../../../virt/kvm -common-objs = $(KVM)/kvm_main.o $(KVM)/eventfd.o $(KVM)/async_pf.o $(KVM)/irqchip.o +common-objs = $(KVM)/kvm_main.o $(KVM)/eventfd.o $(KVM)/async_pf.o $(KVM)/irqchip.o $(KVM)/vfio.o ccflags-y := -Ivirt/kvm -Iarch/s390/kvm diff --git a/arch/s390/kvm/diag.c b/arch/s390/kvm/diag.c index 5fbfb88f8477..05f7de9869a9 100644 --- a/arch/s390/kvm/diag.c +++ b/arch/s390/kvm/diag.c @@ -155,10 +155,8 @@ static int __diag_time_slice_end(struct kvm_vcpu *vcpu) static int __diag_time_slice_end_directed(struct kvm_vcpu *vcpu) { - struct kvm *kvm = vcpu->kvm; struct kvm_vcpu *tcpu; int tid; - int i; tid = vcpu->run->s.regs.gprs[(vcpu->arch.sie_block->ipa & 0xf0) >> 4]; vcpu->stat.diagnose_9c++; @@ -167,12 +165,9 @@ static int __diag_time_slice_end_directed(struct kvm_vcpu *vcpu) if (tid == vcpu->vcpu_id) return 0; - kvm_for_each_vcpu(i, tcpu, kvm) - if (tcpu->vcpu_id == tid) { - kvm_vcpu_yield_to(tcpu); - break; - } - + tcpu = kvm_get_vcpu_by_id(vcpu->kvm, tid); + if (tcpu) + kvm_vcpu_yield_to(tcpu); return 0; } diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c index a7559f7207df..d30db40437dc 100644 --- a/arch/s390/kvm/gaccess.c +++ b/arch/s390/kvm/gaccess.c @@ -259,10 +259,14 @@ struct aste { int ipte_lock_held(struct kvm_vcpu *vcpu) { - union ipte_control *ic = &vcpu->kvm->arch.sca->ipte_control; + if (vcpu->arch.sie_block->eca & 1) { + int rc; - if (vcpu->arch.sie_block->eca & 1) - return ic->kh != 0; + read_lock(&vcpu->kvm->arch.sca_lock); + rc = kvm_s390_get_ipte_control(vcpu->kvm)->kh != 0; + read_unlock(&vcpu->kvm->arch.sca_lock); + return rc; + } return vcpu->kvm->arch.ipte_lock_count != 0; } @@ -274,16 +278,20 @@ static void ipte_lock_simple(struct kvm_vcpu *vcpu) vcpu->kvm->arch.ipte_lock_count++; if (vcpu->kvm->arch.ipte_lock_count > 1) goto out; - ic = &vcpu->kvm->arch.sca->ipte_control; +retry: + read_lock(&vcpu->kvm->arch.sca_lock); + ic = kvm_s390_get_ipte_control(vcpu->kvm); do { old = READ_ONCE(*ic); - while (old.k) { + if (old.k) { + read_unlock(&vcpu->kvm->arch.sca_lock); cond_resched(); - old = READ_ONCE(*ic); + goto retry; } new = old; new.k = 1; } while (cmpxchg(&ic->val, old.val, new.val) != old.val); + read_unlock(&vcpu->kvm->arch.sca_lock); out: mutex_unlock(&vcpu->kvm->arch.ipte_mutex); } @@ -296,12 +304,14 @@ static void ipte_unlock_simple(struct kvm_vcpu *vcpu) vcpu->kvm->arch.ipte_lock_count--; if (vcpu->kvm->arch.ipte_lock_count) goto out; - ic = &vcpu->kvm->arch.sca->ipte_control; + read_lock(&vcpu->kvm->arch.sca_lock); + ic = kvm_s390_get_ipte_control(vcpu->kvm); do { old = READ_ONCE(*ic); new = old; new.k = 0; } while (cmpxchg(&ic->val, old.val, new.val) != old.val); + read_unlock(&vcpu->kvm->arch.sca_lock); wake_up(&vcpu->kvm->arch.ipte_wq); out: mutex_unlock(&vcpu->kvm->arch.ipte_mutex); @@ -311,24 +321,29 @@ static void ipte_lock_siif(struct kvm_vcpu *vcpu) { union ipte_control old, new, *ic; - ic = &vcpu->kvm->arch.sca->ipte_control; +retry: + read_lock(&vcpu->kvm->arch.sca_lock); + ic = kvm_s390_get_ipte_control(vcpu->kvm); do { old = READ_ONCE(*ic); - while (old.kg) { + if (old.kg) { + read_unlock(&vcpu->kvm->arch.sca_lock); cond_resched(); - old = READ_ONCE(*ic); + goto retry; } new = old; new.k = 1; new.kh++; } while (cmpxchg(&ic->val, old.val, new.val) != old.val); + read_unlock(&vcpu->kvm->arch.sca_lock); } static void ipte_unlock_siif(struct kvm_vcpu *vcpu) { union ipte_control old, new, *ic; - ic = &vcpu->kvm->arch.sca->ipte_control; + read_lock(&vcpu->kvm->arch.sca_lock); + ic = kvm_s390_get_ipte_control(vcpu->kvm); do { old = READ_ONCE(*ic); new = old; @@ -336,6 +351,7 @@ static void ipte_unlock_siif(struct kvm_vcpu *vcpu) if (!new.kh) new.k = 0; } while (cmpxchg(&ic->val, old.val, new.val) != old.val); + read_unlock(&vcpu->kvm->arch.sca_lock); if (!new.kh) wake_up(&vcpu->kvm->arch.ipte_wq); } diff --git a/arch/s390/kvm/guestdbg.c b/arch/s390/kvm/guestdbg.c index 47518a324d75..d697312ce9ee 100644 --- a/arch/s390/kvm/guestdbg.c +++ b/arch/s390/kvm/guestdbg.c @@ -116,7 +116,7 @@ static void enable_all_hw_wp(struct kvm_vcpu *vcpu) if (*cr9 & PER_EVENT_STORE && *cr9 & PER_CONTROL_ALTERATION) { *cr9 &= ~PER_CONTROL_ALTERATION; *cr10 = 0; - *cr11 = PSW_ADDR_INSN; + *cr11 = -1UL; } else { *cr9 &= ~PER_CONTROL_ALTERATION; *cr9 |= PER_EVENT_STORE; @@ -159,7 +159,7 @@ void kvm_s390_patch_guest_per_regs(struct kvm_vcpu *vcpu) vcpu->arch.sie_block->gcr[0] &= ~0x800ul; vcpu->arch.sie_block->gcr[9] |= PER_EVENT_IFETCH; vcpu->arch.sie_block->gcr[10] = 0; - vcpu->arch.sie_block->gcr[11] = PSW_ADDR_INSN; + vcpu->arch.sie_block->gcr[11] = -1UL; } if (guestdbg_hw_bp_enabled(vcpu)) { diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c index b4a5aa110cec..d53c10753c46 100644 --- a/arch/s390/kvm/intercept.c +++ b/arch/s390/kvm/intercept.c @@ -54,9 +54,6 @@ void kvm_s390_rewind_psw(struct kvm_vcpu *vcpu, int ilc) static int handle_noop(struct kvm_vcpu *vcpu) { switch (vcpu->arch.sie_block->icptcode) { - case 0x0: - vcpu->stat.exit_null++; - break; case 0x10: vcpu->stat.exit_external_request++; break; @@ -338,8 +335,10 @@ static int handle_partial_execution(struct kvm_vcpu *vcpu) int kvm_handle_sie_intercept(struct kvm_vcpu *vcpu) { + if (kvm_is_ucontrol(vcpu->kvm)) + return -EOPNOTSUPP; + switch (vcpu->arch.sie_block->icptcode) { - case 0x00: case 0x10: case 0x18: return handle_noop(vcpu); diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index 6a75352f453c..f88ca72c3a77 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -34,6 +34,106 @@ #define PFAULT_DONE 0x0680 #define VIRTIO_PARAM 0x0d00 +/* handle external calls via sigp interpretation facility */ +static int sca_ext_call_pending(struct kvm_vcpu *vcpu, int *src_id) +{ + int c, scn; + + if (!(atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_ECALL_PEND)) + return 0; + + read_lock(&vcpu->kvm->arch.sca_lock); + if (vcpu->kvm->arch.use_esca) { + struct esca_block *sca = vcpu->kvm->arch.sca; + union esca_sigp_ctrl sigp_ctrl = + sca->cpu[vcpu->vcpu_id].sigp_ctrl; + + c = sigp_ctrl.c; + scn = sigp_ctrl.scn; + } else { + struct bsca_block *sca = vcpu->kvm->arch.sca; + union bsca_sigp_ctrl sigp_ctrl = + sca->cpu[vcpu->vcpu_id].sigp_ctrl; + + c = sigp_ctrl.c; + scn = sigp_ctrl.scn; + } + read_unlock(&vcpu->kvm->arch.sca_lock); + + if (src_id) + *src_id = scn; + + return c; +} + +static int sca_inject_ext_call(struct kvm_vcpu *vcpu, int src_id) +{ + int expect, rc; + + read_lock(&vcpu->kvm->arch.sca_lock); + if (vcpu->kvm->arch.use_esca) { + struct esca_block *sca = vcpu->kvm->arch.sca; + union esca_sigp_ctrl *sigp_ctrl = + &(sca->cpu[vcpu->vcpu_id].sigp_ctrl); + union esca_sigp_ctrl new_val = {0}, old_val = *sigp_ctrl; + + new_val.scn = src_id; + new_val.c = 1; + old_val.c = 0; + + expect = old_val.value; + rc = cmpxchg(&sigp_ctrl->value, old_val.value, new_val.value); + } else { + struct bsca_block *sca = vcpu->kvm->arch.sca; + union bsca_sigp_ctrl *sigp_ctrl = + &(sca->cpu[vcpu->vcpu_id].sigp_ctrl); + union bsca_sigp_ctrl new_val = {0}, old_val = *sigp_ctrl; + + new_val.scn = src_id; + new_val.c = 1; + old_val.c = 0; + + expect = old_val.value; + rc = cmpxchg(&sigp_ctrl->value, old_val.value, new_val.value); + } + read_unlock(&vcpu->kvm->arch.sca_lock); + + if (rc != expect) { + /* another external call is pending */ + return -EBUSY; + } + atomic_or(CPUSTAT_ECALL_PEND, &vcpu->arch.sie_block->cpuflags); + return 0; +} + +static void sca_clear_ext_call(struct kvm_vcpu *vcpu) +{ + struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; + int rc, expect; + + atomic_andnot(CPUSTAT_ECALL_PEND, li->cpuflags); + read_lock(&vcpu->kvm->arch.sca_lock); + if (vcpu->kvm->arch.use_esca) { + struct esca_block *sca = vcpu->kvm->arch.sca; + union esca_sigp_ctrl *sigp_ctrl = + &(sca->cpu[vcpu->vcpu_id].sigp_ctrl); + union esca_sigp_ctrl old = *sigp_ctrl; + + expect = old.value; + rc = cmpxchg(&sigp_ctrl->value, old.value, 0); + } else { + struct bsca_block *sca = vcpu->kvm->arch.sca; + union bsca_sigp_ctrl *sigp_ctrl = + &(sca->cpu[vcpu->vcpu_id].sigp_ctrl); + union bsca_sigp_ctrl old = *sigp_ctrl; + + expect = old.value; + rc = cmpxchg(&sigp_ctrl->value, old.value, 0); + } + read_unlock(&vcpu->kvm->arch.sca_lock); + WARN_ON(rc != expect); /* cannot clear? */ +} + int psw_extint_disabled(struct kvm_vcpu *vcpu) { return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_EXT); @@ -399,9 +499,9 @@ static int __must_check __deliver_restart(struct kvm_vcpu *vcpu) trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, KVM_S390_RESTART, 0, 0); rc = write_guest_lc(vcpu, - offsetof(struct _lowcore, restart_old_psw), + offsetof(struct lowcore, restart_old_psw), &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); - rc |= read_guest_lc(vcpu, offsetof(struct _lowcore, restart_psw), + rc |= read_guest_lc(vcpu, offsetof(struct lowcore, restart_psw), &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); clear_bit(IRQ_PEND_RESTART, &li->pending_irqs); return rc ? -EFAULT : 0; @@ -792,13 +892,11 @@ static const deliver_irq_t deliver_irq_funcs[] = { int kvm_s390_ext_call_pending(struct kvm_vcpu *vcpu) { struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; - uint8_t sigp_ctrl = vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].sigp_ctrl; if (!sclp.has_sigpif) return test_bit(IRQ_PEND_EXT_EXTERNAL, &li->pending_irqs); - return (sigp_ctrl & SIGP_CTRL_C) && - (atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_ECALL_PEND); + return sca_ext_call_pending(vcpu, NULL); } int kvm_s390_vcpu_has_irq(struct kvm_vcpu *vcpu, int exclude_stop) @@ -909,9 +1007,7 @@ void kvm_s390_clear_local_irqs(struct kvm_vcpu *vcpu) memset(&li->irq, 0, sizeof(li->irq)); spin_unlock(&li->lock); - /* clear pending external calls set by sigp interpretation facility */ - atomic_andnot(CPUSTAT_ECALL_PEND, li->cpuflags); - vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].sigp_ctrl = 0; + sca_clear_ext_call(vcpu); } int __must_check kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu) @@ -1003,21 +1099,6 @@ static int __inject_pfault_init(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq) return 0; } -static int __inject_extcall_sigpif(struct kvm_vcpu *vcpu, uint16_t src_id) -{ - unsigned char new_val, old_val; - uint8_t *sigp_ctrl = &vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].sigp_ctrl; - - new_val = SIGP_CTRL_C | (src_id & SIGP_CTRL_SCN_MASK); - old_val = *sigp_ctrl & ~SIGP_CTRL_C; - if (cmpxchg(sigp_ctrl, old_val, new_val) != old_val) { - /* another external call is pending */ - return -EBUSY; - } - atomic_or(CPUSTAT_ECALL_PEND, &vcpu->arch.sie_block->cpuflags); - return 0; -} - static int __inject_extcall(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq) { struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; @@ -1034,7 +1115,7 @@ static int __inject_extcall(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq) return -EINVAL; if (sclp.has_sigpif) - return __inject_extcall_sigpif(vcpu, src_id); + return sca_inject_ext_call(vcpu, src_id); if (test_and_set_bit(IRQ_PEND_EXT_EXTERNAL, &li->pending_irqs)) return -EBUSY; @@ -2203,7 +2284,7 @@ static void store_local_irq(struct kvm_s390_local_interrupt *li, int kvm_s390_get_irq_state(struct kvm_vcpu *vcpu, __u8 __user *buf, int len) { - uint8_t sigp_ctrl = vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].sigp_ctrl; + int scn; unsigned long sigp_emerg_pending[BITS_TO_LONGS(KVM_MAX_VCPUS)]; struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; unsigned long pending_irqs; @@ -2243,14 +2324,12 @@ int kvm_s390_get_irq_state(struct kvm_vcpu *vcpu, __u8 __user *buf, int len) } } - if ((sigp_ctrl & SIGP_CTRL_C) && - (atomic_read(&vcpu->arch.sie_block->cpuflags) & - CPUSTAT_ECALL_PEND)) { + if (sca_ext_call_pending(vcpu, &scn)) { if (n + sizeof(irq) > len) return -ENOBUFS; memset(&irq, 0, sizeof(irq)); irq.type = KVM_S390_INT_EXTERNAL_CALL; - irq.u.extcall.code = sigp_ctrl & SIGP_CTRL_SCN_MASK; + irq.u.extcall.code = scn; if (copy_to_user(&buf[n], &irq, sizeof(irq))) return -EFAULT; n += sizeof(irq); diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 846589281b04..03dfe9c667f4 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -246,7 +246,8 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) break; case KVM_CAP_NR_VCPUS: case KVM_CAP_MAX_VCPUS: - r = KVM_MAX_VCPUS; + r = sclp.has_esca ? KVM_S390_ESCA_CPU_SLOTS + : KVM_S390_BSCA_CPU_SLOTS; break; case KVM_CAP_NR_MEMSLOTS: r = KVM_USER_MEM_SLOTS; @@ -257,6 +258,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_S390_VECTOR_REGISTERS: r = MACHINE_HAS_VX; break; + case KVM_CAP_S390_RI: + r = test_facility(64); + break; default: r = 0; } @@ -283,6 +287,8 @@ static void kvm_s390_sync_dirty_log(struct kvm *kvm, } /* Section: vm related */ +static void sca_del_vcpu(struct kvm_vcpu *vcpu); + /* * Get (and clear) the dirty memory log for a memory slot. */ @@ -355,6 +361,20 @@ static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap) VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s", r ? "(not available)" : "(success)"); break; + case KVM_CAP_S390_RI: + r = -EINVAL; + mutex_lock(&kvm->lock); + if (atomic_read(&kvm->online_vcpus)) { + r = -EBUSY; + } else if (test_facility(64)) { + set_kvm_facility(kvm->arch.model.fac->mask, 64); + set_kvm_facility(kvm->arch.model.fac->list, 64); + r = 0; + } + mutex_unlock(&kvm->lock); + VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s", + r ? "(not available)" : "(success)"); + break; case KVM_CAP_S390_USER_STSI: VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI"); kvm->arch.user_stsi = 1; @@ -375,8 +395,8 @@ static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *att case KVM_S390_VM_MEM_LIMIT_SIZE: ret = 0; VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes", - kvm->arch.gmap->asce_end); - if (put_user(kvm->arch.gmap->asce_end, (u64 __user *)attr->addr)) + kvm->arch.mem_limit); + if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr)) ret = -EFAULT; break; default: @@ -428,9 +448,17 @@ static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *att if (get_user(new_limit, (u64 __user *)attr->addr)) return -EFAULT; - if (new_limit > kvm->arch.gmap->asce_end) + if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT && + new_limit > kvm->arch.mem_limit) return -E2BIG; + if (!new_limit) + return -EINVAL; + + /* gmap_alloc takes last usable address */ + if (new_limit != KVM_S390_NO_MEM_LIMIT) + new_limit -= 1; + ret = -EBUSY; mutex_lock(&kvm->lock); if (atomic_read(&kvm->online_vcpus) == 0) { @@ -447,7 +475,9 @@ static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *att } } mutex_unlock(&kvm->lock); - VM_EVENT(kvm, 3, "SET: max guest memory: %lu bytes", new_limit); + VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit); + VM_EVENT(kvm, 3, "New guest asce: 0x%pK", + (void *) kvm->arch.gmap->asce); break; } default: @@ -1024,7 +1054,7 @@ static int kvm_s390_apxa_installed(void) u8 config[128]; int cc; - if (test_facility(2) && test_facility(12)) { + if (test_facility(12)) { cc = kvm_s390_query_ap_config(config); if (cc) @@ -1075,6 +1105,15 @@ static int kvm_s390_crypto_init(struct kvm *kvm) return 0; } +static void sca_dispose(struct kvm *kvm) +{ + if (kvm->arch.use_esca) + free_pages_exact(kvm->arch.sca, sizeof(struct esca_block)); + else + free_page((unsigned long)(kvm->arch.sca)); + kvm->arch.sca = NULL; +} + int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) { int i, rc; @@ -1098,14 +1137,17 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) rc = -ENOMEM; - kvm->arch.sca = (struct sca_block *) get_zeroed_page(GFP_KERNEL); + kvm->arch.use_esca = 0; /* start with basic SCA */ + rwlock_init(&kvm->arch.sca_lock); + kvm->arch.sca = (struct bsca_block *) get_zeroed_page(GFP_KERNEL); if (!kvm->arch.sca) goto out_err; spin_lock(&kvm_lock); sca_offset += 16; - if (sca_offset + sizeof(struct sca_block) > PAGE_SIZE) + if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE) sca_offset = 0; - kvm->arch.sca = (struct sca_block *) ((char *) kvm->arch.sca + sca_offset); + kvm->arch.sca = (struct bsca_block *) + ((char *) kvm->arch.sca + sca_offset); spin_unlock(&kvm_lock); sprintf(debug_name, "kvm-%u", current->pid); @@ -1157,8 +1199,14 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) if (type & KVM_VM_S390_UCONTROL) { kvm->arch.gmap = NULL; + kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT; } else { - kvm->arch.gmap = gmap_alloc(current->mm, (1UL << 44) - 1); + if (sclp.hamax == U64_MAX) + kvm->arch.mem_limit = TASK_MAX_SIZE; + else + kvm->arch.mem_limit = min_t(unsigned long, TASK_MAX_SIZE, + sclp.hamax + 1); + kvm->arch.gmap = gmap_alloc(current->mm, kvm->arch.mem_limit - 1); if (!kvm->arch.gmap) goto out_err; kvm->arch.gmap->private = kvm; @@ -1170,14 +1218,14 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) kvm->arch.epoch = 0; spin_lock_init(&kvm->arch.start_stop_lock); - KVM_EVENT(3, "vm 0x%p created by pid %u", kvm, current->pid); + KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid); return 0; out_err: kfree(kvm->arch.crypto.crycb); free_page((unsigned long)kvm->arch.model.fac); debug_unregister(kvm->arch.dbf); - free_page((unsigned long)(kvm->arch.sca)); + sca_dispose(kvm); KVM_EVENT(3, "creation of vm failed: %d", rc); return rc; } @@ -1188,14 +1236,8 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id); kvm_s390_clear_local_irqs(vcpu); kvm_clear_async_pf_completion_queue(vcpu); - if (!kvm_is_ucontrol(vcpu->kvm)) { - clear_bit(63 - vcpu->vcpu_id, - (unsigned long *) &vcpu->kvm->arch.sca->mcn); - if (vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].sda == - (__u64) vcpu->arch.sie_block) - vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].sda = 0; - } - smp_mb(); + if (!kvm_is_ucontrol(vcpu->kvm)) + sca_del_vcpu(vcpu); if (kvm_is_ucontrol(vcpu->kvm)) gmap_free(vcpu->arch.gmap); @@ -1228,14 +1270,14 @@ void kvm_arch_destroy_vm(struct kvm *kvm) { kvm_free_vcpus(kvm); free_page((unsigned long)kvm->arch.model.fac); - free_page((unsigned long)(kvm->arch.sca)); + sca_dispose(kvm); debug_unregister(kvm->arch.dbf); kfree(kvm->arch.crypto.crycb); if (!kvm_is_ucontrol(kvm)) gmap_free(kvm->arch.gmap); kvm_s390_destroy_adapters(kvm); kvm_s390_clear_float_irqs(kvm); - KVM_EVENT(3, "vm 0x%p destroyed", kvm); + KVM_EVENT(3, "vm 0x%pK destroyed", kvm); } /* Section: vcpu related */ @@ -1249,6 +1291,117 @@ static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu) return 0; } +static void sca_del_vcpu(struct kvm_vcpu *vcpu) +{ + read_lock(&vcpu->kvm->arch.sca_lock); + if (vcpu->kvm->arch.use_esca) { + struct esca_block *sca = vcpu->kvm->arch.sca; + + clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn); + sca->cpu[vcpu->vcpu_id].sda = 0; + } else { + struct bsca_block *sca = vcpu->kvm->arch.sca; + + clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn); + sca->cpu[vcpu->vcpu_id].sda = 0; + } + read_unlock(&vcpu->kvm->arch.sca_lock); +} + +static void sca_add_vcpu(struct kvm_vcpu *vcpu) +{ + read_lock(&vcpu->kvm->arch.sca_lock); + if (vcpu->kvm->arch.use_esca) { + struct esca_block *sca = vcpu->kvm->arch.sca; + + sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block; + vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32); + vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU; + vcpu->arch.sie_block->ecb2 |= 0x04U; + set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn); + } else { + struct bsca_block *sca = vcpu->kvm->arch.sca; + + sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block; + vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32); + vcpu->arch.sie_block->scaol = (__u32)(__u64)sca; + set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn); + } + read_unlock(&vcpu->kvm->arch.sca_lock); +} + +/* Basic SCA to Extended SCA data copy routines */ +static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s) +{ + d->sda = s->sda; + d->sigp_ctrl.c = s->sigp_ctrl.c; + d->sigp_ctrl.scn = s->sigp_ctrl.scn; +} + +static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s) +{ + int i; + + d->ipte_control = s->ipte_control; + d->mcn[0] = s->mcn; + for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++) + sca_copy_entry(&d->cpu[i], &s->cpu[i]); +} + +static int sca_switch_to_extended(struct kvm *kvm) +{ + struct bsca_block *old_sca = kvm->arch.sca; + struct esca_block *new_sca; + struct kvm_vcpu *vcpu; + unsigned int vcpu_idx; + u32 scaol, scaoh; + + new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL|__GFP_ZERO); + if (!new_sca) + return -ENOMEM; + + scaoh = (u32)((u64)(new_sca) >> 32); + scaol = (u32)(u64)(new_sca) & ~0x3fU; + + kvm_s390_vcpu_block_all(kvm); + write_lock(&kvm->arch.sca_lock); + + sca_copy_b_to_e(new_sca, old_sca); + + kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) { + vcpu->arch.sie_block->scaoh = scaoh; + vcpu->arch.sie_block->scaol = scaol; + vcpu->arch.sie_block->ecb2 |= 0x04U; + } + kvm->arch.sca = new_sca; + kvm->arch.use_esca = 1; + + write_unlock(&kvm->arch.sca_lock); + kvm_s390_vcpu_unblock_all(kvm); + + free_page((unsigned long)old_sca); + + VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)", + old_sca, kvm->arch.sca); + return 0; +} + +static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id) +{ + int rc; + + if (id < KVM_S390_BSCA_CPU_SLOTS) + return true; + if (!sclp.has_esca) + return false; + + mutex_lock(&kvm->lock); + rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm); + mutex_unlock(&kvm->lock); + + return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS; +} + int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) { vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID; @@ -1259,6 +1412,8 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) KVM_SYNC_CRS | KVM_SYNC_ARCH0 | KVM_SYNC_PFAULT; + if (test_kvm_facility(vcpu->kvm, 64)) + vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB; if (test_kvm_facility(vcpu->kvm, 129)) vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS; @@ -1268,44 +1423,18 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) return 0; } -/* - * Backs up the current FP/VX register save area on a particular - * destination. Used to switch between different register save - * areas. - */ -static inline void save_fpu_to(struct fpu *dst) -{ - dst->fpc = current->thread.fpu.fpc; - dst->regs = current->thread.fpu.regs; -} - -/* - * Switches the FP/VX register save area from which to lazy - * restore register contents. - */ -static inline void load_fpu_from(struct fpu *from) -{ - current->thread.fpu.fpc = from->fpc; - current->thread.fpu.regs = from->regs; -} - void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) { /* Save host register state */ save_fpu_regs(); - save_fpu_to(&vcpu->arch.host_fpregs); - - if (test_kvm_facility(vcpu->kvm, 129)) { - current->thread.fpu.fpc = vcpu->run->s.regs.fpc; - /* - * Use the register save area in the SIE-control block - * for register restore and save in kvm_arch_vcpu_put() - */ - current->thread.fpu.vxrs = - (__vector128 *)&vcpu->run->s.regs.vrs; - } else - load_fpu_from(&vcpu->arch.guest_fpregs); + vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc; + vcpu->arch.host_fpregs.regs = current->thread.fpu.regs; + /* Depending on MACHINE_HAS_VX, data stored to vrs either + * has vector register or floating point register format. + */ + current->thread.fpu.regs = vcpu->run->s.regs.vrs; + current->thread.fpu.fpc = vcpu->run->s.regs.fpc; if (test_fp_ctl(current->thread.fpu.fpc)) /* User space provided an invalid FPC, let's clear it */ current->thread.fpu.fpc = 0; @@ -1321,19 +1450,13 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) atomic_andnot(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags); gmap_disable(vcpu->arch.gmap); + /* Save guest register state */ save_fpu_regs(); + vcpu->run->s.regs.fpc = current->thread.fpu.fpc; - if (test_kvm_facility(vcpu->kvm, 129)) - /* - * kvm_arch_vcpu_load() set up the register save area to - * the &vcpu->run->s.regs.vrs and, thus, the vector registers - * are already saved. Only the floating-point control must be - * copied. - */ - vcpu->run->s.regs.fpc = current->thread.fpu.fpc; - else - save_fpu_to(&vcpu->arch.guest_fpregs); - load_fpu_from(&vcpu->arch.host_fpregs); + /* Restore host register state */ + current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc; + current->thread.fpu.regs = vcpu->arch.host_fpregs.regs; save_access_regs(vcpu->run->s.regs.acrs); restore_access_regs(vcpu->arch.host_acrs); @@ -1351,8 +1474,9 @@ static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu) memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64)); vcpu->arch.sie_block->gcr[0] = 0xE0UL; vcpu->arch.sie_block->gcr[14] = 0xC2000000UL; - vcpu->arch.guest_fpregs.fpc = 0; - asm volatile("lfpc %0" : : "Q" (vcpu->arch.guest_fpregs.fpc)); + /* make sure the new fpc will be lazily loaded */ + save_fpu_regs(); + current->thread.fpu.fpc = 0; vcpu->arch.sie_block->gbea = 1; vcpu->arch.sie_block->pp = 0; vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID; @@ -1369,8 +1493,11 @@ void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu) vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch; preempt_enable(); mutex_unlock(&vcpu->kvm->lock); - if (!kvm_is_ucontrol(vcpu->kvm)) + if (!kvm_is_ucontrol(vcpu->kvm)) { vcpu->arch.gmap = vcpu->kvm->arch.gmap; + sca_add_vcpu(vcpu); + } + } static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu) @@ -1439,10 +1566,13 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) vcpu->arch.sie_block->eca |= 1; if (sclp.has_sigpif) vcpu->arch.sie_block->eca |= 0x10000000U; + if (test_kvm_facility(vcpu->kvm, 64)) + vcpu->arch.sie_block->ecb3 |= 0x01; if (test_kvm_facility(vcpu->kvm, 129)) { vcpu->arch.sie_block->eca |= 0x00020000; vcpu->arch.sie_block->ecd |= 0x20000000; } + vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb; vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE; if (vcpu->kvm->arch.use_cmma) { @@ -1465,7 +1595,7 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, struct sie_page *sie_page; int rc = -EINVAL; - if (id >= KVM_MAX_VCPUS) + if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id)) goto out; rc = -ENOMEM; @@ -1482,42 +1612,15 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb; vcpu->arch.sie_block->icpua = id; - if (!kvm_is_ucontrol(kvm)) { - if (!kvm->arch.sca) { - WARN_ON_ONCE(1); - goto out_free_cpu; - } - if (!kvm->arch.sca->cpu[id].sda) - kvm->arch.sca->cpu[id].sda = - (__u64) vcpu->arch.sie_block; - vcpu->arch.sie_block->scaoh = - (__u32)(((__u64)kvm->arch.sca) >> 32); - vcpu->arch.sie_block->scaol = (__u32)(__u64)kvm->arch.sca; - set_bit(63 - id, (unsigned long *) &kvm->arch.sca->mcn); - } - spin_lock_init(&vcpu->arch.local_int.lock); vcpu->arch.local_int.float_int = &kvm->arch.float_int; vcpu->arch.local_int.wq = &vcpu->wq; vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags; - /* - * Allocate a save area for floating-point registers. If the vector - * extension is available, register contents are saved in the SIE - * control block. The allocated save area is still required in - * particular places, for example, in kvm_s390_vcpu_store_status(). - */ - vcpu->arch.guest_fpregs.fprs = kzalloc(sizeof(freg_t) * __NUM_FPRS, - GFP_KERNEL); - if (!vcpu->arch.guest_fpregs.fprs) { - rc = -ENOMEM; - goto out_free_sie_block; - } - rc = kvm_vcpu_init(vcpu, kvm, id); if (rc) goto out_free_sie_block; - VM_EVENT(kvm, 3, "create cpu %d at %p, sie block at %p", id, vcpu, + VM_EVENT(kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK", id, vcpu, vcpu->arch.sie_block); trace_kvm_s390_create_vcpu(id, vcpu, vcpu->arch.sie_block); @@ -1734,19 +1837,27 @@ int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) { + /* make sure the new values will be lazily loaded */ + save_fpu_regs(); if (test_fp_ctl(fpu->fpc)) return -EINVAL; - memcpy(vcpu->arch.guest_fpregs.fprs, &fpu->fprs, sizeof(fpu->fprs)); - vcpu->arch.guest_fpregs.fpc = fpu->fpc; - save_fpu_regs(); - load_fpu_from(&vcpu->arch.guest_fpregs); + current->thread.fpu.fpc = fpu->fpc; + if (MACHINE_HAS_VX) + convert_fp_to_vx(current->thread.fpu.vxrs, (freg_t *)fpu->fprs); + else + memcpy(current->thread.fpu.fprs, &fpu->fprs, sizeof(fpu->fprs)); return 0; } int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) { - memcpy(&fpu->fprs, vcpu->arch.guest_fpregs.fprs, sizeof(fpu->fprs)); - fpu->fpc = vcpu->arch.guest_fpregs.fpc; + /* make sure we have the latest values */ + save_fpu_regs(); + if (MACHINE_HAS_VX) + convert_vx_to_fp((freg_t *)fpu->fprs, current->thread.fpu.vxrs); + else + memcpy(fpu->fprs, current->thread.fpu.fprs, sizeof(fpu->fprs)); + fpu->fpc = current->thread.fpu.fpc; return 0; } @@ -2013,7 +2124,8 @@ static int vcpu_pre_run(struct kvm_vcpu *vcpu) */ kvm_check_async_pf_completion(vcpu); - memcpy(&vcpu->arch.sie_block->gg14, &vcpu->run->s.regs.gprs[14], 16); + vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14]; + vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15]; if (need_resched()) schedule(); @@ -2071,8 +2183,6 @@ static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu) static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason) { - int rc = -1; - VCPU_EVENT(vcpu, 6, "exit sie icptcode %d", vcpu->arch.sie_block->icptcode); trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode); @@ -2080,40 +2190,36 @@ static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason) if (guestdbg_enabled(vcpu)) kvm_s390_restore_guest_per_regs(vcpu); - if (exit_reason >= 0) { - rc = 0; + vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14; + vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15; + + if (vcpu->arch.sie_block->icptcode > 0) { + int rc = kvm_handle_sie_intercept(vcpu); + + if (rc != -EOPNOTSUPP) + return rc; + vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC; + vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode; + vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa; + vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb; + return -EREMOTE; + } else if (exit_reason != -EFAULT) { + vcpu->stat.exit_null++; + return 0; } else if (kvm_is_ucontrol(vcpu->kvm)) { vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL; vcpu->run->s390_ucontrol.trans_exc_code = current->thread.gmap_addr; vcpu->run->s390_ucontrol.pgm_code = 0x10; - rc = -EREMOTE; - + return -EREMOTE; } else if (current->thread.gmap_pfault) { trace_kvm_s390_major_guest_pfault(vcpu); current->thread.gmap_pfault = 0; - if (kvm_arch_setup_async_pf(vcpu)) { - rc = 0; - } else { - gpa_t gpa = current->thread.gmap_addr; - rc = kvm_arch_fault_in_page(vcpu, gpa, 1); - } - } - - if (rc == -1) - rc = vcpu_post_run_fault_in_sie(vcpu); - - memcpy(&vcpu->run->s.regs.gprs[14], &vcpu->arch.sie_block->gg14, 16); - - if (rc == 0) { - if (kvm_is_ucontrol(vcpu->kvm)) - /* Don't exit for host interrupts. */ - rc = vcpu->arch.sie_block->icptcode ? -EOPNOTSUPP : 0; - else - rc = kvm_handle_sie_intercept(vcpu); + if (kvm_arch_setup_async_pf(vcpu)) + return 0; + return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1); } - - return rc; + return vcpu_post_run_fault_in_sie(vcpu); } static int __vcpu_run(struct kvm_vcpu *vcpu) @@ -2233,18 +2339,8 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) rc = 0; } - if (rc == -EOPNOTSUPP) { - /* intercept cannot be handled in-kernel, prepare kvm-run */ - kvm_run->exit_reason = KVM_EXIT_S390_SIEIC; - kvm_run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode; - kvm_run->s390_sieic.ipa = vcpu->arch.sie_block->ipa; - kvm_run->s390_sieic.ipb = vcpu->arch.sie_block->ipb; - rc = 0; - } - if (rc == -EREMOTE) { - /* intercept was handled, but userspace support is needed - * kvm_run has been prepared by the handler */ + /* userspace support is needed, kvm_run has been prepared */ rc = 0; } @@ -2266,41 +2362,50 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa) { unsigned char archmode = 1; + freg_t fprs[NUM_FPRS]; unsigned int px; u64 clkcomp; int rc; + px = kvm_s390_get_prefix(vcpu); if (gpa == KVM_S390_STORE_STATUS_NOADDR) { if (write_guest_abs(vcpu, 163, &archmode, 1)) return -EFAULT; - gpa = SAVE_AREA_BASE; + gpa = 0; } else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) { if (write_guest_real(vcpu, 163, &archmode, 1)) return -EFAULT; - gpa = kvm_s390_real_to_abs(vcpu, SAVE_AREA_BASE); + gpa = px; + } else + gpa -= __LC_FPREGS_SAVE_AREA; + + /* manually convert vector registers if necessary */ + if (MACHINE_HAS_VX) { + convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs); + rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA, + fprs, 128); + } else { + rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA, + vcpu->run->s.regs.vrs, 128); } - rc = write_guest_abs(vcpu, gpa + offsetof(struct save_area, fp_regs), - vcpu->arch.guest_fpregs.fprs, 128); - rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, gp_regs), + rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA, vcpu->run->s.regs.gprs, 128); - rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, psw), + rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA, &vcpu->arch.sie_block->gpsw, 16); - px = kvm_s390_get_prefix(vcpu); - rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, pref_reg), + rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA, &px, 4); - rc |= write_guest_abs(vcpu, - gpa + offsetof(struct save_area, fp_ctrl_reg), - &vcpu->arch.guest_fpregs.fpc, 4); - rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, tod_reg), + rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA, + &vcpu->run->s.regs.fpc, 4); + rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA, &vcpu->arch.sie_block->todpr, 4); - rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, timer), + rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA, &vcpu->arch.sie_block->cputm, 8); clkcomp = vcpu->arch.sie_block->ckc >> 8; - rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, clk_cmp), + rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA, &clkcomp, 8); - rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, acc_regs), + rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA, &vcpu->run->s.regs.acrs, 64); - rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, ctrl_regs), + rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA, &vcpu->arch.sie_block->gcr, 128); return rc ? -EFAULT : 0; } @@ -2313,19 +2418,7 @@ int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr) * it into the save area */ save_fpu_regs(); - if (test_kvm_facility(vcpu->kvm, 129)) { - /* - * If the vector extension is available, the vector registers - * which overlaps with floating-point registers are saved in - * the SIE-control block. Hence, extract the floating-point - * registers and the FPC value and store them in the - * guest_fpregs structure. - */ - vcpu->arch.guest_fpregs.fpc = current->thread.fpu.fpc; - convert_vx_to_fp(vcpu->arch.guest_fpregs.fprs, - current->thread.fpu.vxrs); - } else - save_fpu_to(&vcpu->arch.guest_fpregs); + vcpu->run->s.regs.fpc = current->thread.fpu.fpc; save_access_regs(vcpu->run->s.regs.acrs); return kvm_s390_store_status_unloaded(vcpu, addr); @@ -2736,6 +2829,9 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, if (mem->memory_size & 0xffffful) return -EINVAL; + if (mem->guest_phys_addr + mem->memory_size > kvm->arch.mem_limit) + return -EINVAL; + return 0; } @@ -2767,6 +2863,11 @@ void kvm_arch_commit_memory_region(struct kvm *kvm, static int __init kvm_s390_init(void) { + if (!sclp.has_sief2) { + pr_info("SIE not available\n"); + return -ENODEV; + } + return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE); } diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h index 1e70e00d3c5e..df1abada1f36 100644 --- a/arch/s390/kvm/kvm-s390.h +++ b/arch/s390/kvm/kvm-s390.h @@ -340,4 +340,11 @@ void kvm_s390_clear_bp_data(struct kvm_vcpu *vcpu); void kvm_s390_prepare_debug_exit(struct kvm_vcpu *vcpu); void kvm_s390_handle_per_event(struct kvm_vcpu *vcpu); +/* support for Basic/Extended SCA handling */ +static inline union ipte_control *kvm_s390_get_ipte_control(struct kvm *kvm) +{ + struct bsca_block *sca = kvm->arch.sca; /* SCA version doesn't matter */ + + return &sca->ipte_control; +} #endif diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c index d76b51cb4b62..ed74e86d9b9e 100644 --- a/arch/s390/kvm/priv.c +++ b/arch/s390/kvm/priv.c @@ -355,7 +355,7 @@ static int handle_stfl(struct kvm_vcpu *vcpu) * into a u32 memory representation. They will remain bits 0-31. */ fac = *vcpu->kvm->arch.model.fac->list >> 32; - rc = write_guest_lc(vcpu, offsetof(struct _lowcore, stfl_fac_list), + rc = write_guest_lc(vcpu, offsetof(struct lowcore, stfl_fac_list), &fac, sizeof(fac)); if (rc) return rc; diff --git a/arch/s390/kvm/trace-s390.h b/arch/s390/kvm/trace-s390.h index cc1d6c68356f..396485bca191 100644 --- a/arch/s390/kvm/trace-s390.h +++ b/arch/s390/kvm/trace-s390.h @@ -55,8 +55,8 @@ TRACE_EVENT(kvm_s390_create_vcpu, __entry->sie_block = sie_block; ), - TP_printk("create cpu %d at %p, sie block at %p", __entry->id, - __entry->vcpu, __entry->sie_block) + TP_printk("create cpu %d at 0x%pK, sie block at 0x%pK", + __entry->id, __entry->vcpu, __entry->sie_block) ); TRACE_EVENT(kvm_s390_destroy_vcpu, @@ -254,7 +254,7 @@ TRACE_EVENT(kvm_s390_enable_css, __entry->kvm = kvm; ), - TP_printk("enabling channel I/O support (kvm @ %p)\n", + TP_printk("enabling channel I/O support (kvm @ %pK)\n", __entry->kvm) ); diff --git a/arch/s390/lib/spinlock.c b/arch/s390/lib/spinlock.c index 427aa44b3505..d4549c964589 100644 --- a/arch/s390/lib/spinlock.c +++ b/arch/s390/lib/spinlock.c @@ -37,12 +37,22 @@ static inline void _raw_compare_and_delay(unsigned int *lock, unsigned int old) asm(".insn rsy,0xeb0000000022,%0,0,%1" : : "d" (old), "Q" (*lock)); } +static inline int cpu_is_preempted(int cpu) +{ + if (test_cpu_flag_of(CIF_ENABLED_WAIT, cpu)) + return 0; + if (smp_vcpu_scheduled(cpu)) + return 0; + return 1; +} + void arch_spin_lock_wait(arch_spinlock_t *lp) { unsigned int cpu = SPINLOCK_LOCKVAL; unsigned int owner; - int count; + int count, first_diag; + first_diag = 1; while (1) { owner = ACCESS_ONCE(lp->lock); /* Try to get the lock if it is free. */ @@ -51,9 +61,10 @@ void arch_spin_lock_wait(arch_spinlock_t *lp) return; continue; } - /* Check if the lock owner is running. */ - if (!smp_vcpu_scheduled(~owner)) { + /* First iteration: check if the lock owner is running. */ + if (first_diag && cpu_is_preempted(~owner)) { smp_yield_cpu(~owner); + first_diag = 0; continue; } /* Loop for a while on the lock value. */ @@ -67,10 +78,13 @@ void arch_spin_lock_wait(arch_spinlock_t *lp) continue; /* * For multiple layers of hypervisors, e.g. z/VM + LPAR - * yield the CPU if the lock is still unavailable. + * yield the CPU unconditionally. For LPAR rely on the + * sense running status. */ - if (!MACHINE_IS_LPAR) + if (!MACHINE_IS_LPAR || cpu_is_preempted(~owner)) { smp_yield_cpu(~owner); + first_diag = 0; + } } } EXPORT_SYMBOL(arch_spin_lock_wait); @@ -79,9 +93,10 @@ void arch_spin_lock_wait_flags(arch_spinlock_t *lp, unsigned long flags) { unsigned int cpu = SPINLOCK_LOCKVAL; unsigned int owner; - int count; + int count, first_diag; local_irq_restore(flags); + first_diag = 1; while (1) { owner = ACCESS_ONCE(lp->lock); /* Try to get the lock if it is free. */ @@ -92,8 +107,9 @@ void arch_spin_lock_wait_flags(arch_spinlock_t *lp, unsigned long flags) local_irq_restore(flags); } /* Check if the lock owner is running. */ - if (!smp_vcpu_scheduled(~owner)) { + if (first_diag && cpu_is_preempted(~owner)) { smp_yield_cpu(~owner); + first_diag = 0; continue; } /* Loop for a while on the lock value. */ @@ -107,10 +123,13 @@ void arch_spin_lock_wait_flags(arch_spinlock_t *lp, unsigned long flags) continue; /* * For multiple layers of hypervisors, e.g. z/VM + LPAR - * yield the CPU if the lock is still unavailable. + * yield the CPU unconditionally. For LPAR rely on the + * sense running status. */ - if (!MACHINE_IS_LPAR) + if (!MACHINE_IS_LPAR || cpu_is_preempted(~owner)) { smp_yield_cpu(~owner); + first_diag = 0; + } } } EXPORT_SYMBOL(arch_spin_lock_wait_flags); @@ -145,7 +164,7 @@ void _raw_read_lock_wait(arch_rwlock_t *rw) owner = 0; while (1) { if (count-- <= 0) { - if (owner && !smp_vcpu_scheduled(~owner)) + if (owner && cpu_is_preempted(~owner)) smp_yield_cpu(~owner); count = spin_retry; } @@ -191,7 +210,7 @@ void _raw_write_lock_wait(arch_rwlock_t *rw, unsigned int prev) owner = 0; while (1) { if (count-- <= 0) { - if (owner && !smp_vcpu_scheduled(~owner)) + if (owner && cpu_is_preempted(~owner)) smp_yield_cpu(~owner); count = spin_retry; } @@ -221,7 +240,7 @@ void _raw_write_lock_wait(arch_rwlock_t *rw) owner = 0; while (1) { if (count-- <= 0) { - if (owner && !smp_vcpu_scheduled(~owner)) + if (owner && cpu_is_preempted(~owner)) smp_yield_cpu(~owner); count = spin_retry; } @@ -265,7 +284,7 @@ void arch_lock_relax(unsigned int cpu) { if (!cpu) return; - if (MACHINE_IS_LPAR && smp_vcpu_scheduled(~cpu)) + if (MACHINE_IS_LPAR && !cpu_is_preempted(~cpu)) return; smp_yield_cpu(~cpu); } diff --git a/arch/s390/mm/extable.c b/arch/s390/mm/extable.c index 4d1ee88864e8..18c8b819b0aa 100644 --- a/arch/s390/mm/extable.c +++ b/arch/s390/mm/extable.c @@ -52,12 +52,16 @@ void sort_extable(struct exception_table_entry *start, int i; /* Normalize entries to being relative to the start of the section */ - for (p = start, i = 0; p < finish; p++, i += 8) + for (p = start, i = 0; p < finish; p++, i += 8) { p->insn += i; + p->fixup += i + 4; + } sort(start, finish - start, sizeof(*start), cmp_ex, NULL); /* Denormalize all entries */ - for (p = start, i = 0; p < finish; p++, i += 8) + for (p = start, i = 0; p < finish; p++, i += 8) { p->insn -= i; + p->fixup -= i + 4; + } } #ifdef CONFIG_MODULES diff --git a/arch/s390/mm/extmem.c b/arch/s390/mm/extmem.c index 18fccc303db7..a1bf4ad8925d 100644 --- a/arch/s390/mm/extmem.c +++ b/arch/s390/mm/extmem.c @@ -94,7 +94,7 @@ static DEFINE_MUTEX(dcss_lock); static LIST_HEAD(dcss_list); static char *segtype_string[] = { "SW", "EW", "SR", "ER", "SN", "EN", "SC", "EW/EN-MIXED" }; -static int loadshr_scode, loadnsr_scode, findseg_scode; +static int loadshr_scode, loadnsr_scode; static int segext_scode, purgeseg_scode; static int scode_set; @@ -130,7 +130,6 @@ dcss_set_subcodes(void) loadshr_scode = DCSS_LOADSHRX; loadnsr_scode = DCSS_LOADNSRX; purgeseg_scode = DCSS_PURGESEG; - findseg_scode = DCSS_FINDSEGX; segext_scode = DCSS_SEGEXTX; return 0; } @@ -138,7 +137,6 @@ dcss_set_subcodes(void) loadshr_scode = DCSS_LOADNOLY; loadnsr_scode = DCSS_LOADNSR; purgeseg_scode = DCSS_PURGESEG; - findseg_scode = DCSS_FINDSEG; segext_scode = DCSS_SEGEXT; return 0; } diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c index ec1a30d0d11a..791a4146052c 100644 --- a/arch/s390/mm/fault.c +++ b/arch/s390/mm/fault.c @@ -228,7 +228,7 @@ static inline void report_user_fault(struct pt_regs *regs, long signr) return; printk(KERN_ALERT "User process fault: interruption code %04x ilc:%d ", regs->int_code & 0xffff, regs->int_code >> 17); - print_vma_addr(KERN_CONT "in ", regs->psw.addr & PSW_ADDR_INSN); + print_vma_addr(KERN_CONT "in ", regs->psw.addr); printk(KERN_CONT "\n"); printk(KERN_ALERT "failing address: %016lx TEID: %016lx\n", regs->int_parm_long & __FAIL_ADDR_MASK, regs->int_parm_long); @@ -254,12 +254,11 @@ static noinline void do_sigsegv(struct pt_regs *regs, int si_code) static noinline void do_no_context(struct pt_regs *regs) { const struct exception_table_entry *fixup; - unsigned long address; /* Are we prepared to handle this kernel fault? */ - fixup = search_exception_tables(regs->psw.addr & PSW_ADDR_INSN); + fixup = search_exception_tables(regs->psw.addr); if (fixup) { - regs->psw.addr = extable_fixup(fixup) | PSW_ADDR_AMODE; + regs->psw.addr = extable_fixup(fixup); return; } @@ -267,7 +266,6 @@ static noinline void do_no_context(struct pt_regs *regs) * Oops. The kernel tried to access some bad page. We'll have to * terminate things with extreme prejudice. */ - address = regs->int_parm_long & __FAIL_ADDR_MASK; if (!user_space_fault(regs)) printk(KERN_ALERT "Unable to handle kernel pointer dereference" " in virtual kernel address space\n"); diff --git a/arch/s390/mm/gup.c b/arch/s390/mm/gup.c index 12bbf0e8478f..13dab0c1645c 100644 --- a/arch/s390/mm/gup.c +++ b/arch/s390/mm/gup.c @@ -55,7 +55,7 @@ static inline int gup_huge_pmd(pmd_t *pmdp, pmd_t pmd, unsigned long addr, unsigned long end, int write, struct page **pages, int *nr) { unsigned long mask, result; - struct page *head, *page, *tail; + struct page *head, *page; int refs; result = write ? 0 : _SEGMENT_ENTRY_PROTECT; @@ -67,7 +67,6 @@ static inline int gup_huge_pmd(pmd_t *pmdp, pmd_t pmd, unsigned long addr, refs = 0; head = pmd_page(pmd); page = head + ((addr & ~PMD_MASK) >> PAGE_SHIFT); - tail = page; do { VM_BUG_ON(compound_head(page) != head); pages[*nr] = page; @@ -88,16 +87,6 @@ static inline int gup_huge_pmd(pmd_t *pmdp, pmd_t pmd, unsigned long addr, return 0; } - /* - * Any tail page need their mapcount reference taken before we - * return. - */ - while (refs--) { - if (PageTail(tail)) - get_huge_page_tail(tail); - tail++; - } - return 1; } @@ -116,16 +105,7 @@ static inline int gup_pmd_range(pud_t *pudp, pud_t pud, unsigned long addr, pmd = *pmdp; barrier(); next = pmd_addr_end(addr, end); - /* - * The pmd_trans_splitting() check below explains why - * pmdp_splitting_flush() has to serialize with - * smp_call_function() against our disabled IRQs, to stop - * this gup-fast code from running while we set the - * splitting bit in the pmd. Returning zero will take - * the slow path that will call wait_split_huge_page() - * if the pmd is still in splitting state. - */ - if (pmd_none(pmd) || pmd_trans_splitting(pmd)) + if (pmd_none(pmd)) return 0; if (unlikely(pmd_large(pmd))) { /* @@ -233,6 +213,7 @@ int get_user_pages_fast(unsigned long start, int nr_pages, int write, struct mm_struct *mm = current->mm; int nr, ret; + might_sleep(); start &= PAGE_MASK; nr = __get_user_pages_fast(start, nr_pages, write, pages); if (nr == nr_pages) diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c index c722400c7697..73e290337092 100644 --- a/arch/s390/mm/init.c +++ b/arch/s390/mm/init.c @@ -98,7 +98,7 @@ void __init paging_init(void) __ctl_load(S390_lowcore.kernel_asce, 1, 1); __ctl_load(S390_lowcore.kernel_asce, 7, 7); __ctl_load(S390_lowcore.kernel_asce, 13, 13); - arch_local_irq_restore(4UL << (BITS_PER_LONG - 8)); + __arch_local_irq_stosm(0x04); sparse_memory_present_with_active_regions(MAX_NUMNODES); sparse_init(); diff --git a/arch/s390/mm/maccess.c b/arch/s390/mm/maccess.c index 8a993a53fcd6..792f9c63fbca 100644 --- a/arch/s390/mm/maccess.c +++ b/arch/s390/mm/maccess.c @@ -93,15 +93,19 @@ static int __memcpy_real(void *dest, void *src, size_t count) */ int memcpy_real(void *dest, void *src, size_t count) { + int irqs_disabled, rc; unsigned long flags; - int rc; if (!count) return 0; - local_irq_save(flags); - __arch_local_irq_stnsm(0xfbUL); + flags = __arch_local_irq_stnsm(0xf8UL); + irqs_disabled = arch_irqs_disabled_flags(flags); + if (!irqs_disabled) + trace_hardirqs_off(); rc = __memcpy_real(dest, src, count); - local_irq_restore(flags); + if (!irqs_disabled) + trace_hardirqs_on(); + __arch_local_irq_ssm(flags); return rc; } @@ -163,11 +167,11 @@ static int is_swapped(unsigned long addr) unsigned long lc; int cpu; - if (addr < sizeof(struct _lowcore)) + if (addr < sizeof(struct lowcore)) return 1; for_each_online_cpu(cpu) { lc = (unsigned long) lowcore_ptr[cpu]; - if (addr > lc + sizeof(struct _lowcore) - 1 || addr < lc) + if (addr > lc + sizeof(struct lowcore) - 1 || addr < lc) continue; return 1; } diff --git a/arch/s390/mm/mem_detect.c b/arch/s390/mm/mem_detect.c index e00f0d5d296d..d612cc3eec6a 100644 --- a/arch/s390/mm/mem_detect.c +++ b/arch/s390/mm/mem_detect.c @@ -14,8 +14,6 @@ #include <asm/sclp.h> #include <asm/setup.h> -#define ADDR2G (1ULL << 31) - #define CHUNK_READ_WRITE 0 #define CHUNK_READ_ONLY 1 @@ -27,15 +25,14 @@ static inline void memblock_physmem_add(phys_addr_t start, phys_addr_t size) void __init detect_memory_memblock(void) { - unsigned long long memsize, rnmax, rzm; - unsigned long addr, size; + unsigned long memsize, rnmax, rzm, addr, size; int type; rzm = sclp.rzm; rnmax = sclp.rnmax; memsize = rzm * rnmax; if (!rzm) - rzm = 1ULL << 17; + rzm = 1UL << 17; max_physmem_end = memsize; addr = 0; /* keep memblock lists close to the kernel */ diff --git a/arch/s390/mm/mmap.c b/arch/s390/mm/mmap.c index ea01477b4aa6..45c4daa49930 100644 --- a/arch/s390/mm/mmap.c +++ b/arch/s390/mm/mmap.c @@ -169,12 +169,12 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0, int s390_mmap_check(unsigned long addr, unsigned long len, unsigned long flags) { - if (is_compat_task() || (TASK_SIZE >= (1UL << 53))) + if (is_compat_task() || TASK_SIZE >= TASK_MAX_SIZE) return 0; if (!(flags & MAP_FIXED)) addr = 0; if ((addr + len) >= TASK_SIZE) - return crst_table_upgrade(current->mm, 1UL << 53); + return crst_table_upgrade(current->mm, TASK_MAX_SIZE); return 0; } @@ -189,9 +189,9 @@ s390_get_unmapped_area(struct file *filp, unsigned long addr, area = arch_get_unmapped_area(filp, addr, len, pgoff, flags); if (!(area & ~PAGE_MASK)) return area; - if (area == -ENOMEM && !is_compat_task() && TASK_SIZE < (1UL << 53)) { + if (area == -ENOMEM && !is_compat_task() && TASK_SIZE < TASK_MAX_SIZE) { /* Upgrade the page table to 4 levels and retry. */ - rc = crst_table_upgrade(mm, 1UL << 53); + rc = crst_table_upgrade(mm, TASK_MAX_SIZE); if (rc) return (unsigned long) rc; area = arch_get_unmapped_area(filp, addr, len, pgoff, flags); @@ -211,9 +211,9 @@ s390_get_unmapped_area_topdown(struct file *filp, const unsigned long addr, area = arch_get_unmapped_area_topdown(filp, addr, len, pgoff, flags); if (!(area & ~PAGE_MASK)) return area; - if (area == -ENOMEM && !is_compat_task() && TASK_SIZE < (1UL << 53)) { + if (area == -ENOMEM && !is_compat_task() && TASK_SIZE < TASK_MAX_SIZE) { /* Upgrade the page table to 4 levels and retry. */ - rc = crst_table_upgrade(mm, 1UL << 53); + rc = crst_table_upgrade(mm, TASK_MAX_SIZE); if (rc) return (unsigned long) rc; area = arch_get_unmapped_area_topdown(filp, addr, len, diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index 54ef3bc01b43..5109827883ac 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -55,7 +55,7 @@ int crst_table_upgrade(struct mm_struct *mm, unsigned long limit) unsigned long entry; int flush; - BUG_ON(limit > (1UL << 53)); + BUG_ON(limit > TASK_MAX_SIZE); flush = 0; repeat: table = crst_table_alloc(mm); @@ -133,7 +133,7 @@ void crst_table_downgrade(struct mm_struct *mm, unsigned long limit) /** * gmap_alloc - allocate a guest address space * @mm: pointer to the parent mm_struct - * @limit: maximum size of the gmap address space + * @limit: maximum address of the gmap address space * * Returns a guest address space structure. */ @@ -402,7 +402,7 @@ int gmap_map_segment(struct gmap *gmap, unsigned long from, if ((from | to | len) & (PMD_SIZE - 1)) return -EINVAL; if (len == 0 || from + len < from || to + len < to || - from + len > TASK_MAX_SIZE || to + len > gmap->asce_end) + from + len - 1 > TASK_MAX_SIZE || to + len - 1 > gmap->asce_end) return -EINVAL; flush = 0; @@ -578,17 +578,29 @@ int gmap_fault(struct gmap *gmap, unsigned long gaddr, { unsigned long vmaddr; int rc; + bool unlocked; down_read(&gmap->mm->mmap_sem); + +retry: + unlocked = false; vmaddr = __gmap_translate(gmap, gaddr); if (IS_ERR_VALUE(vmaddr)) { rc = vmaddr; goto out_up; } - if (fixup_user_fault(current, gmap->mm, vmaddr, fault_flags)) { + if (fixup_user_fault(current, gmap->mm, vmaddr, fault_flags, + &unlocked)) { rc = -EFAULT; goto out_up; } + /* + * In the case that fixup_user_fault unlocked the mmap_sem during + * faultin redo __gmap_translate to not race with a map/unmap_segment. + */ + if (unlocked) + goto retry; + rc = __gmap_link(gmap, gaddr, vmaddr); out_up: up_read(&gmap->mm->mmap_sem); @@ -603,10 +615,7 @@ static void gmap_zap_swap_entry(swp_entry_t entry, struct mm_struct *mm) else if (is_migration_entry(entry)) { struct page *page = migration_entry_to_page(entry); - if (PageAnon(page)) - dec_mm_counter(mm, MM_ANONPAGES); - else - dec_mm_counter(mm, MM_FILEPAGES); + dec_mm_counter(mm, mm_counter(page)); } free_swap_and_cache(entry); } @@ -717,12 +726,14 @@ int gmap_ipte_notify(struct gmap *gmap, unsigned long gaddr, unsigned long len) spinlock_t *ptl; pte_t *ptep, entry; pgste_t pgste; + bool unlocked; int rc = 0; if ((gaddr & ~PAGE_MASK) || (len & ~PAGE_MASK)) return -EINVAL; down_read(&gmap->mm->mmap_sem); while (len) { + unlocked = false; /* Convert gmap address and connect the page tables */ addr = __gmap_translate(gmap, gaddr); if (IS_ERR_VALUE(addr)) { @@ -730,10 +741,14 @@ int gmap_ipte_notify(struct gmap *gmap, unsigned long gaddr, unsigned long len) break; } /* Get the page mapped */ - if (fixup_user_fault(current, gmap->mm, addr, FAULT_FLAG_WRITE)) { + if (fixup_user_fault(current, gmap->mm, addr, FAULT_FLAG_WRITE, + &unlocked)) { rc = -EFAULT; break; } + /* While trying to map mmap_sem got unlocked. Let us retry */ + if (unlocked) + continue; rc = __gmap_link(gmap, gaddr, addr); if (rc) break; @@ -794,9 +809,11 @@ int set_guest_storage_key(struct mm_struct *mm, unsigned long addr, spinlock_t *ptl; pgste_t old, new; pte_t *ptep; + bool unlocked; down_read(&mm->mmap_sem); retry: + unlocked = false; ptep = get_locked_pte(mm, addr, &ptl); if (unlikely(!ptep)) { up_read(&mm->mmap_sem); @@ -805,7 +822,12 @@ retry: if (!(pte_val(*ptep) & _PAGE_INVALID) && (pte_val(*ptep) & _PAGE_PROTECT)) { pte_unmap_unlock(ptep, ptl); - if (fixup_user_fault(current, mm, addr, FAULT_FLAG_WRITE)) { + /* + * We do not really care about unlocked. We will retry either + * way. But this allows fixup_user_fault to enable userfaultfd. + */ + if (fixup_user_fault(current, mm, addr, FAULT_FLAG_WRITE, + &unlocked)) { up_read(&mm->mmap_sem); return -EFAULT; } @@ -1308,22 +1330,6 @@ int pmdp_set_access_flags(struct vm_area_struct *vma, return 1; } -static void pmdp_splitting_flush_sync(void *arg) -{ - /* Simply deliver the interrupt */ -} - -void pmdp_splitting_flush(struct vm_area_struct *vma, unsigned long address, - pmd_t *pmdp) -{ - VM_BUG_ON(address & ~HPAGE_PMD_MASK); - if (!test_and_set_bit(_SEGMENT_ENTRY_SPLIT_BIT, - (unsigned long *) pmdp)) { - /* need to serialize against gup-fast (IRQ disabled) */ - smp_call_function(pmdp_splitting_flush_sync, NULL, 1); - } -} - void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp, pgtable_t pgtable) { diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c index 9a0c4c22e536..3c0bfc1f2694 100644 --- a/arch/s390/net/bpf_jit_comp.c +++ b/arch/s390/net/bpf_jit_comp.c @@ -408,7 +408,7 @@ static void emit_load_skb_data_hlen(struct bpf_jit *jit) * Save registers and create stack frame if necessary. * See stack frame layout desription in "bpf_jit.h"! */ -static void bpf_jit_prologue(struct bpf_jit *jit, bool is_classic) +static void bpf_jit_prologue(struct bpf_jit *jit) { if (jit->seen & SEEN_TAIL_CALL) { /* xc STK_OFF_TCCNT(4,%r15),STK_OFF_TCCNT(%r15) */ @@ -448,15 +448,6 @@ static void bpf_jit_prologue(struct bpf_jit *jit, bool is_classic) /* stg %b1,ST_OFF_SKBP(%r0,%r15) */ EMIT6_DISP_LH(0xe3000000, 0x0024, REG_W1, REG_0, REG_15, STK_OFF_SKBP); - /* Clear A (%b0) and X (%b7) registers for converted BPF programs */ - if (is_classic) { - if (REG_SEEN(BPF_REG_A)) - /* lghi %ba,0 */ - EMIT4_IMM(0xa7090000, BPF_REG_A, 0); - if (REG_SEEN(BPF_REG_X)) - /* lghi %bx,0 */ - EMIT4_IMM(0xa7090000, BPF_REG_X, 0); - } } /* @@ -1245,7 +1236,7 @@ static int bpf_jit_prog(struct bpf_jit *jit, struct bpf_prog *fp) jit->lit = jit->lit_start; jit->prg = 0; - bpf_jit_prologue(jit, bpf_prog_was_classic(fp)); + bpf_jit_prologue(jit); for (i = 0; i < fp->len; i += insn_count) { insn_count = bpf_jit_insn(jit, fp, i); if (insn_count < 0) diff --git a/arch/s390/numa/numa.c b/arch/s390/numa/numa.c index 43f32ce60aa3..2794845061c6 100644 --- a/arch/s390/numa/numa.c +++ b/arch/s390/numa/numa.c @@ -57,9 +57,7 @@ static __init pg_data_t *alloc_node_data(void) { pg_data_t *res; - res = (pg_data_t *) memblock_alloc(sizeof(pg_data_t), 1); - if (!res) - panic("Could not allocate memory for node data!\n"); + res = (pg_data_t *) memblock_alloc(sizeof(pg_data_t), 8); memset(res, 0, sizeof(pg_data_t)); return res; } @@ -162,7 +160,7 @@ static int __init numa_init_late(void) register_one_node(nid); return 0; } -device_initcall(numa_init_late); +arch_initcall(numa_init_late); static int __init parse_debug(char *parm) { diff --git a/arch/s390/oprofile/backtrace.c b/arch/s390/oprofile/backtrace.c index 8a6811b2cdb9..1884e1759529 100644 --- a/arch/s390/oprofile/backtrace.c +++ b/arch/s390/oprofile/backtrace.c @@ -16,24 +16,23 @@ __show_trace(unsigned int *depth, unsigned long sp, struct pt_regs *regs; while (*depth) { - sp = sp & PSW_ADDR_INSN; if (sp < low || sp > high - sizeof(*sf)) return sp; sf = (struct stack_frame *) sp; (*depth)--; - oprofile_add_trace(sf->gprs[8] & PSW_ADDR_INSN); + oprofile_add_trace(sf->gprs[8]); /* Follow the backchain. */ while (*depth) { low = sp; - sp = sf->back_chain & PSW_ADDR_INSN; + sp = sf->back_chain; if (!sp) break; if (sp <= low || sp > high - sizeof(*sf)) return sp; sf = (struct stack_frame *) sp; (*depth)--; - oprofile_add_trace(sf->gprs[8] & PSW_ADDR_INSN); + oprofile_add_trace(sf->gprs[8]); } @@ -46,7 +45,7 @@ __show_trace(unsigned int *depth, unsigned long sp, return sp; regs = (struct pt_regs *) sp; (*depth)--; - oprofile_add_trace(sf->gprs[8] & PSW_ADDR_INSN); + oprofile_add_trace(sf->gprs[8]); low = sp; sp = regs->gprs[15]; } @@ -55,12 +54,13 @@ __show_trace(unsigned int *depth, unsigned long sp, void s390_backtrace(struct pt_regs * const regs, unsigned int depth) { - unsigned long head; + unsigned long head, frame_size; struct stack_frame* head_sf; if (user_mode(regs)) return; + frame_size = STACK_FRAME_OVERHEAD + sizeof(struct pt_regs); head = regs->gprs[15]; head_sf = (struct stack_frame*)head; @@ -69,8 +69,9 @@ void s390_backtrace(struct pt_regs * const regs, unsigned int depth) head = head_sf->back_chain; - head = __show_trace(&depth, head, S390_lowcore.async_stack - ASYNC_SIZE, - S390_lowcore.async_stack); + head = __show_trace(&depth, head, + S390_lowcore.async_stack + frame_size - ASYNC_SIZE, + S390_lowcore.async_stack + frame_size); __show_trace(&depth, head, S390_lowcore.thread_info, S390_lowcore.thread_info + THREAD_SIZE); diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c index 7ef12a3ace3a..8f19c8f9d660 100644 --- a/arch/s390/pci/pci.c +++ b/arch/s390/pci/pci.c @@ -68,9 +68,12 @@ static struct airq_struct zpci_airq = { .isc = PCI_ISC, }; -/* I/O Map */ +#define ZPCI_IOMAP_ENTRIES \ + min(((unsigned long) CONFIG_PCI_NR_FUNCTIONS * PCI_BAR_COUNT), \ + ZPCI_IOMAP_MAX_ENTRIES) + static DEFINE_SPINLOCK(zpci_iomap_lock); -static DECLARE_BITMAP(zpci_iomap, ZPCI_IOMAP_MAX_ENTRIES); +static unsigned long *zpci_iomap_bitmap; struct zpci_iomap_entry *zpci_iomap_start; EXPORT_SYMBOL_GPL(zpci_iomap_start); @@ -265,27 +268,20 @@ void __iomem *pci_iomap_range(struct pci_dev *pdev, unsigned long max) { struct zpci_dev *zdev = to_zpci(pdev); - u64 addr; int idx; - if ((bar & 7) != bar) + if (!pci_resource_len(pdev, bar)) return NULL; idx = zdev->bars[bar].map_idx; spin_lock(&zpci_iomap_lock); - if (zpci_iomap_start[idx].count++) { - BUG_ON(zpci_iomap_start[idx].fh != zdev->fh || - zpci_iomap_start[idx].bar != bar); - } else { - zpci_iomap_start[idx].fh = zdev->fh; - zpci_iomap_start[idx].bar = bar; - } /* Detect overrun */ - BUG_ON(!zpci_iomap_start[idx].count); + WARN_ON(!++zpci_iomap_start[idx].count); + zpci_iomap_start[idx].fh = zdev->fh; + zpci_iomap_start[idx].bar = bar; spin_unlock(&zpci_iomap_lock); - addr = ZPCI_IOMAP_ADDR_BASE | ((u64) idx << 48); - return (void __iomem *) addr + offset; + return (void __iomem *) ZPCI_ADDR(idx) + offset; } EXPORT_SYMBOL(pci_iomap_range); @@ -297,12 +293,11 @@ EXPORT_SYMBOL(pci_iomap); void pci_iounmap(struct pci_dev *pdev, void __iomem *addr) { - unsigned int idx; + unsigned int idx = ZPCI_IDX(addr); - idx = (((__force u64) addr) & ~ZPCI_IOMAP_ADDR_BASE) >> 48; spin_lock(&zpci_iomap_lock); /* Detect underrun */ - BUG_ON(!zpci_iomap_start[idx].count); + WARN_ON(!zpci_iomap_start[idx].count); if (!--zpci_iomap_start[idx].count) { zpci_iomap_start[idx].fh = 0; zpci_iomap_start[idx].bar = 0; @@ -544,15 +539,15 @@ static void zpci_irq_exit(void) static int zpci_alloc_iomap(struct zpci_dev *zdev) { - int entry; + unsigned long entry; spin_lock(&zpci_iomap_lock); - entry = find_first_zero_bit(zpci_iomap, ZPCI_IOMAP_MAX_ENTRIES); - if (entry == ZPCI_IOMAP_MAX_ENTRIES) { + entry = find_first_zero_bit(zpci_iomap_bitmap, ZPCI_IOMAP_ENTRIES); + if (entry == ZPCI_IOMAP_ENTRIES) { spin_unlock(&zpci_iomap_lock); return -ENOSPC; } - set_bit(entry, zpci_iomap); + set_bit(entry, zpci_iomap_bitmap); spin_unlock(&zpci_iomap_lock); return entry; } @@ -561,7 +556,7 @@ static void zpci_free_iomap(struct zpci_dev *zdev, int entry) { spin_lock(&zpci_iomap_lock); memset(&zpci_iomap_start[entry], 0, sizeof(struct zpci_iomap_entry)); - clear_bit(entry, zpci_iomap); + clear_bit(entry, zpci_iomap_bitmap); spin_unlock(&zpci_iomap_lock); } @@ -611,8 +606,7 @@ static int zpci_setup_bus_resources(struct zpci_dev *zdev, if (zdev->bars[i].val & 4) flags |= IORESOURCE_MEM_64; - addr = ZPCI_IOMAP_ADDR_BASE + ((u64) entry << 48); - + addr = ZPCI_ADDR(entry); size = 1UL << zdev->bars[i].size; res = __alloc_res(zdev, addr, size, flags); @@ -701,8 +695,7 @@ static int zpci_restore(struct device *dev) goto out; zpci_map_resources(pdev); - zpci_register_ioat(zdev, 0, zdev->start_dma + PAGE_OFFSET, - zdev->start_dma + zdev->iommu_size - 1, + zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma, (u64) zdev->dma_table); out: @@ -874,23 +867,30 @@ static int zpci_mem_init(void) zdev_fmb_cache = kmem_cache_create("PCI_FMB_cache", sizeof(struct zpci_fmb), 16, 0, NULL); if (!zdev_fmb_cache) - goto error_zdev; + goto error_fmb; - /* TODO: use realloc */ - zpci_iomap_start = kzalloc(ZPCI_IOMAP_MAX_ENTRIES * sizeof(*zpci_iomap_start), - GFP_KERNEL); + zpci_iomap_start = kcalloc(ZPCI_IOMAP_ENTRIES, + sizeof(*zpci_iomap_start), GFP_KERNEL); if (!zpci_iomap_start) goto error_iomap; - return 0; + zpci_iomap_bitmap = kcalloc(BITS_TO_LONGS(ZPCI_IOMAP_ENTRIES), + sizeof(*zpci_iomap_bitmap), GFP_KERNEL); + if (!zpci_iomap_bitmap) + goto error_iomap_bitmap; + + return 0; +error_iomap_bitmap: + kfree(zpci_iomap_start); error_iomap: kmem_cache_destroy(zdev_fmb_cache); -error_zdev: +error_fmb: return -ENOMEM; } static void zpci_mem_exit(void) { + kfree(zpci_iomap_bitmap); kfree(zpci_iomap_start); kmem_cache_destroy(zdev_fmb_cache); } diff --git a/arch/s390/pci/pci_dma.c b/arch/s390/pci/pci_dma.c index d348f2c09a1e..4638b93c7632 100644 --- a/arch/s390/pci/pci_dma.c +++ b/arch/s390/pci/pci_dma.c @@ -366,8 +366,7 @@ static void *s390_dma_alloc(struct device *dev, size_t size, pa = page_to_phys(page); memset((void *) pa, 0, size); - map = s390_dma_map_pages(dev, page, pa % PAGE_SIZE, - size, DMA_BIDIRECTIONAL, NULL); + map = s390_dma_map_pages(dev, page, 0, size, DMA_BIDIRECTIONAL, NULL); if (dma_mapping_error(dev, map)) { free_pages(pa, get_order(size)); return NULL; @@ -458,7 +457,19 @@ int zpci_dma_init_device(struct zpci_dev *zdev) goto out_clean; } - zdev->iommu_size = (unsigned long) high_memory - PAGE_OFFSET; + /* + * Restrict the iommu bitmap size to the minimum of the following: + * - main memory size + * - 3-level pagetable address limit minus start_dma offset + * - DMA address range allowed by the hardware (clp query pci fn) + * + * Also set zdev->end_dma to the actual end address of the usable + * range, instead of the theoretical maximum as reported by hardware. + */ + zdev->iommu_size = min3((u64) high_memory, + ZPCI_TABLE_SIZE_RT - zdev->start_dma, + zdev->end_dma - zdev->start_dma + 1); + zdev->end_dma = zdev->start_dma + zdev->iommu_size - 1; zdev->iommu_pages = zdev->iommu_size >> PAGE_SHIFT; zdev->iommu_bitmap = vzalloc(zdev->iommu_pages / 8); if (!zdev->iommu_bitmap) { @@ -466,10 +477,7 @@ int zpci_dma_init_device(struct zpci_dev *zdev) goto out_reg; } - rc = zpci_register_ioat(zdev, - 0, - zdev->start_dma + PAGE_OFFSET, - zdev->start_dma + zdev->iommu_size - 1, + rc = zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma, (u64) zdev->dma_table); if (rc) goto out_reg; diff --git a/arch/s390/pci/pci_event.c b/arch/s390/pci/pci_event.c index 369a3e05d468..b0e04751c5d5 100644 --- a/arch/s390/pci/pci_event.c +++ b/arch/s390/pci/pci_event.c @@ -53,6 +53,11 @@ static void __zpci_event_error(struct zpci_ccdf_err *ccdf) pr_err("%s: Event 0x%x reports an error for PCI function 0x%x\n", pdev ? pci_name(pdev) : "n/a", ccdf->pec, ccdf->fid); + + if (!pdev) + return; + + pdev->error_state = pci_channel_io_perm_failure; } void zpci_event_error(void *data) diff --git a/arch/s390/tools/.gitignore b/arch/s390/tools/.gitignore new file mode 100644 index 000000000000..72a4b2cf1365 --- /dev/null +++ b/arch/s390/tools/.gitignore @@ -0,0 +1 @@ +gen_facilities diff --git a/arch/s390/tools/Makefile b/arch/s390/tools/Makefile new file mode 100644 index 000000000000..6d9814c9df2b --- /dev/null +++ b/arch/s390/tools/Makefile @@ -0,0 +1,15 @@ +# +# Makefile for s390 specific build tools +# + +hostprogs-y += gen_facilities +HOSTCFLAGS_gen_facilities.o += -Wall $(LINUXINCLUDE) + +define filechk_facilities.h + $(obj)/gen_facilities +endef + +$(obj)/gen_facilities.o: $(srctree)/arch/s390/tools/gen_facilities.c + +include/generated/facilities.h: $(obj)/gen_facilities FORCE + $(call filechk,facilities.h) diff --git a/arch/s390/tools/gen_facilities.c b/arch/s390/tools/gen_facilities.c new file mode 100644 index 000000000000..e2660d27889b --- /dev/null +++ b/arch/s390/tools/gen_facilities.c @@ -0,0 +1,67 @@ +/* + * Simple program to generate defines out of facility lists that use the bit + * numbering scheme from the Princples of Operations: most significant bit + * has bit number 0. + * + * Copyright IBM Corp. 2015 + * + */ + +#define S390_GEN_FACILITIES_C + +#include <strings.h> +#include <string.h> +#include <stdlib.h> +#include <stdio.h> +#include <asm/facilities_src.h> + +static void print_facility_list(struct facility_def *def) +{ + unsigned int high, bit, dword, i; + unsigned long long *array; + + array = calloc(1, 8); + if (!array) + exit(EXIT_FAILURE); + high = 0; + for (i = 0; def->bits[i] != -1; i++) { + bit = 63 - (def->bits[i] & 63); + dword = def->bits[i] / 64; + if (dword > high) { + array = realloc(array, (dword + 1) * 8); + if (!array) + exit(EXIT_FAILURE); + memset(array + high + 1, 0, (dword - high) * 8); + high = dword; + } + array[dword] |= 1ULL << bit; + } + printf("#define %s ", def->name); + for (i = 0; i <= high; i++) + printf("_AC(0x%016llx,UL)%c", array[i], i < high ? ',' : '\n'); + printf("#define %s_DWORDS %d\n", def->name, high + 1); + free(array); +} + +static void print_facility_lists(void) +{ + unsigned int i; + + for (i = 0; i < sizeof(facility_defs) / sizeof(facility_defs[0]); i++) + print_facility_list(&facility_defs[i]); +} + +int main(int argc, char **argv) +{ + printf("#ifndef __ASM_S390_FACILITIES__\n"); + printf("#define __ASM_S390_FACILITIES__\n"); + printf("/*\n"); + printf(" * DO NOT MODIFY.\n"); + printf(" *\n"); + printf(" * This file was generated by %s\n", __FILE__); + printf(" */\n\n"); + printf("#include <linux/const.h>\n\n"); + print_facility_lists(); + printf("\n#endif\n"); + return 0; +} |