diff options
51 files changed, 2436 insertions, 837 deletions
diff --git a/Documentation/devicetree/bindings/timer/nuvoton,npcm7xx-timer.txt b/Documentation/devicetree/bindings/timer/nuvoton,npcm7xx-timer.txt new file mode 100644 index 000000000000..ea22dfe485be --- /dev/null +++ b/Documentation/devicetree/bindings/timer/nuvoton,npcm7xx-timer.txt @@ -0,0 +1,21 @@ +Nuvoton NPCM7xx timer + +Nuvoton NPCM7xx have three timer modules, each timer module provides five 24-bit +timer counters. + +Required properties: +- compatible : "nuvoton,npcm750-timer" for Poleg NPCM750. +- reg : Offset and length of the register set for the device. +- interrupts : Contain the timer interrupt with flags for + falling edge. +- clocks : phandle of timer reference clock (usually a 25 MHz clock). + +Example: + +timer@f0008000 { + compatible = "nuvoton,npcm750-timer"; + interrupts = <GIC_SPI 32 IRQ_TYPE_LEVEL_HIGH>; + reg = <0xf0008000 0x50>; + clocks = <&clk NPCM7XX_CLK_TIMER>; +}; + diff --git a/Documentation/devicetree/bindings/timer/nxp,tpm-timer.txt b/Documentation/devicetree/bindings/timer/nxp,tpm-timer.txt index b4aa7ddb5b13..f82087b220f4 100644 --- a/Documentation/devicetree/bindings/timer/nxp,tpm-timer.txt +++ b/Documentation/devicetree/bindings/timer/nxp,tpm-timer.txt @@ -15,7 +15,7 @@ Required properties: - interrupts : Should be the clock event device interrupt. - clocks : The clocks provided by the SoC to drive the timer, must contain an entry for each entry in clock-names. -- clock-names : Must include the following entries: "igp" and "per". +- clock-names : Must include the following entries: "ipg" and "per". Example: tpm5: tpm@40260000 { diff --git a/arch/parisc/kernel/Makefile b/arch/parisc/kernel/Makefile index eafd06ab59ef..e5de34d00b1a 100644 --- a/arch/parisc/kernel/Makefile +++ b/arch/parisc/kernel/Makefile @@ -23,7 +23,7 @@ obj-$(CONFIG_SMP) += smp.o obj-$(CONFIG_PA11) += pci-dma.o obj-$(CONFIG_PCI) += pci.o obj-$(CONFIG_MODULES) += module.o -obj-$(CONFIG_64BIT) += binfmt_elf32.o sys_parisc32.o signal32.o +obj-$(CONFIG_64BIT) += sys_parisc32.o signal32.o obj-$(CONFIG_STACKTRACE)+= stacktrace.o obj-$(CONFIG_AUDIT) += audit.o obj64-$(CONFIG_AUDIT) += compat_audit.o diff --git a/arch/s390/Kbuild b/arch/s390/Kbuild index 9fdff3fe1a42..e63940bb57cd 100644 --- a/arch/s390/Kbuild +++ b/arch/s390/Kbuild @@ -8,3 +8,4 @@ obj-$(CONFIG_APPLDATA_BASE) += appldata/ obj-y += net/ obj-$(CONFIG_PCI) += pci/ obj-$(CONFIG_NUMA) += numa/ +obj-$(CONFIG_ARCH_HAS_KEXEC_PURGATORY) += purgatory/ diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 32a0d5b958bf..199ac3e4da1d 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -47,10 +47,6 @@ config PGSTE config ARCH_SUPPORTS_DEBUG_PAGEALLOC def_bool y -config KEXEC - def_bool y - select KEXEC_CORE - config AUDIT_ARCH def_bool y @@ -290,12 +286,12 @@ config MARCH_Z13 older machines. config MARCH_Z14 - bool "IBM z14" + bool "IBM z14 ZR1 and z14" select HAVE_MARCH_Z14_FEATURES help - Select this to enable optimizations for IBM z14 (3906 series). - The kernel will be slightly faster but will not work on older - machines. + Select this to enable optimizations for IBM z14 ZR1 and z14 (3907 + and 3906 series). The kernel will be slightly faster but will not + work on older machines. endchoice @@ -525,6 +521,26 @@ source kernel/Kconfig.preempt source kernel/Kconfig.hz +config KEXEC + def_bool y + select KEXEC_CORE + +config KEXEC_FILE + bool "kexec file based system call" + select KEXEC_CORE + select BUILD_BIN2C + depends on CRYPTO + depends on CRYPTO_SHA256 + depends on CRYPTO_SHA256_S390 + help + Enable the kexec file based system call. In contrast to the normal + kexec system call this system call takes file descriptors for the + kernel and initramfs as arguments. + +config ARCH_HAS_KEXEC_PURGATORY + def_bool y + depends on KEXEC_FILE + config ARCH_RANDOM def_bool y prompt "s390 architectural random number generation API" diff --git a/arch/s390/boot/Makefile b/arch/s390/boot/Makefile index da9dad35c28e..d1fa37fcce83 100644 --- a/arch/s390/boot/Makefile +++ b/arch/s390/boot/Makefile @@ -3,12 +3,6 @@ # Makefile for the linux s390-specific parts of the memory manager. # -COMPILE_VERSION := __linux_compile_version_id__`hostname | \ - tr -c '[0-9A-Za-z]' '_'`__`date | \ - tr -c '[0-9A-Za-z]' '_'`_t - -ccflags-y := -DCOMPILE_VERSION=$(COMPILE_VERSION) -gstabs -I. - targets := image targets += bzImage subdir- := compressed diff --git a/arch/s390/boot/compressed/.gitignore b/arch/s390/boot/compressed/.gitignore index ae06b9b4c02f..2088cc140629 100644 --- a/arch/s390/boot/compressed/.gitignore +++ b/arch/s390/boot/compressed/.gitignore @@ -1,3 +1,4 @@ sizes.h vmlinux vmlinux.lds +vmlinux.bin.full diff --git a/arch/s390/configs/default_defconfig b/arch/s390/configs/debug_defconfig index 5af8458951cf..6176fe9795ca 100644 --- a/arch/s390/configs/default_defconfig +++ b/arch/s390/configs/debug_defconfig @@ -24,13 +24,13 @@ CONFIG_CPUSETS=y CONFIG_CGROUP_DEVICE=y CONFIG_CGROUP_CPUACCT=y CONFIG_CGROUP_PERF=y -CONFIG_CHECKPOINT_RESTORE=y CONFIG_NAMESPACES=y CONFIG_USER_NS=y CONFIG_SCHED_AUTOGROUP=y CONFIG_BLK_DEV_INITRD=y CONFIG_EXPERT=y # CONFIG_SYSFS_SYSCALL is not set +CONFIG_CHECKPOINT_RESTORE=y CONFIG_BPF_SYSCALL=y CONFIG_USERFAULTFD=y # CONFIG_COMPAT_BRK is not set @@ -59,10 +59,11 @@ CONFIG_CFQ_GROUP_IOSCHED=y CONFIG_DEFAULT_DEADLINE=y CONFIG_LIVEPATCH=y CONFIG_TUNE_ZEC12=y -CONFIG_NR_CPUS=256 +CONFIG_NR_CPUS=512 CONFIG_NUMA=y CONFIG_PREEMPT=y CONFIG_HZ_100=y +CONFIG_KEXEC_FILE=y CONFIG_MEMORY_HOTPLUG=y CONFIG_MEMORY_HOTREMOVE=y CONFIG_KSM=y @@ -305,7 +306,6 @@ CONFIG_IP6_NF_SECURITY=m CONFIG_IP6_NF_NAT=m CONFIG_IP6_NF_TARGET_MASQUERADE=m CONFIG_NF_TABLES_BRIDGE=m -CONFIG_NET_SCTPPROBE=m CONFIG_RDS=m CONFIG_RDS_RDMA=m CONFIG_RDS_TCP=m @@ -364,11 +364,11 @@ CONFIG_NET_ACT_SIMP=m CONFIG_NET_ACT_SKBEDIT=m CONFIG_NET_ACT_CSUM=m CONFIG_DNS_RESOLVER=y +CONFIG_OPENVSWITCH=m CONFIG_NETLINK_DIAG=m CONFIG_CGROUP_NET_PRIO=y CONFIG_BPF_JIT=y CONFIG_NET_PKTGEN=m -CONFIG_NET_TCPPROBE=m CONFIG_DEVTMPFS=y CONFIG_DMA_CMA=y CONFIG_CMA_SIZE_MBYTES=0 @@ -380,9 +380,9 @@ CONFIG_BLK_DEV_DRBD=m CONFIG_BLK_DEV_NBD=m CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_RAM_SIZE=32768 -CONFIG_BLK_DEV_RAM_DAX=y CONFIG_VIRTIO_BLK=y CONFIG_BLK_DEV_RBD=m +CONFIG_BLK_DEV_NVME=m CONFIG_ENCLOSURE_SERVICES=m CONFIG_GENWQE=m CONFIG_RAID_ATTRS=m @@ -461,6 +461,7 @@ CONFIG_PPTP=m CONFIG_PPPOL2TP=m CONFIG_PPP_ASYNC=m CONFIG_PPP_SYNC_TTY=m +CONFIG_INPUT_EVDEV=y # CONFIG_INPUT_KEYBOARD is not set # CONFIG_INPUT_MOUSE is not set # CONFIG_SERIO is not set @@ -474,6 +475,9 @@ CONFIG_WATCHDOG=y CONFIG_WATCHDOG_NOWAYOUT=y CONFIG_SOFT_WATCHDOG=m CONFIG_DIAG288_WATCHDOG=m +CONFIG_DRM=y +CONFIG_DRM_VIRTIO_GPU=y +CONFIG_FRAMEBUFFER_CONSOLE=y # CONFIG_HID is not set # CONFIG_USB_SUPPORT is not set CONFIG_INFINIBAND=m @@ -482,7 +486,9 @@ CONFIG_MLX4_INFINIBAND=m CONFIG_MLX5_INFINIBAND=m CONFIG_VFIO=m CONFIG_VFIO_PCI=m +CONFIG_VIRTIO_PCI=m CONFIG_VIRTIO_BALLOON=m +CONFIG_VIRTIO_INPUT=y CONFIG_EXT4_FS=y CONFIG_EXT4_FS_POSIX_ACL=y CONFIG_EXT4_FS_SECURITY=y @@ -641,6 +647,8 @@ CONFIG_ATOMIC64_SELFTEST=y CONFIG_TEST_BPF=m CONFIG_BUG_ON_DATA_CORRUPTION=y CONFIG_S390_PTDUMP=y +CONFIG_PERSISTENT_KEYRINGS=y +CONFIG_BIG_KEYS=y CONFIG_ENCRYPTED_KEYS=m CONFIG_SECURITY=y CONFIG_SECURITY_NETWORK=y @@ -649,17 +657,20 @@ CONFIG_SECURITY_SELINUX=y CONFIG_SECURITY_SELINUX_BOOTPARAM=y CONFIG_SECURITY_SELINUX_BOOTPARAM_VALUE=0 CONFIG_SECURITY_SELINUX_DISABLE=y +CONFIG_INTEGRITY_SIGNATURE=y +CONFIG_INTEGRITY_ASYMMETRIC_KEYS=y CONFIG_IMA=y +CONFIG_IMA_DEFAULT_HASH_SHA256=y +CONFIG_IMA_WRITE_POLICY=y CONFIG_IMA_APPRAISE=y -CONFIG_CRYPTO_RSA=m CONFIG_CRYPTO_DH=m CONFIG_CRYPTO_ECDH=m CONFIG_CRYPTO_USER=m +# CONFIG_CRYPTO_MANAGER_DISABLE_TESTS is not set CONFIG_CRYPTO_PCRYPT=m CONFIG_CRYPTO_CRYPTD=m CONFIG_CRYPTO_MCRYPTD=m CONFIG_CRYPTO_TEST=m -CONFIG_CRYPTO_GCM=m CONFIG_CRYPTO_CHACHA20POLY1305=m CONFIG_CRYPTO_LRW=m CONFIG_CRYPTO_PCBC=m @@ -707,9 +718,8 @@ CONFIG_CRYPTO_DES_S390=m CONFIG_CRYPTO_AES_S390=m CONFIG_CRYPTO_GHASH_S390=m CONFIG_CRYPTO_CRC32_S390=y -CONFIG_ASYMMETRIC_KEY_TYPE=y -CONFIG_ASYMMETRIC_PUBLIC_KEY_SUBTYPE=m -CONFIG_X509_CERTIFICATE_PARSER=m +CONFIG_PKCS7_MESSAGE_PARSER=y +CONFIG_SYSTEM_TRUSTED_KEYRING=y CONFIG_CRC7=m CONFIG_CRC8=m CONFIG_RANDOM32_SELFTEST=y diff --git a/arch/s390/configs/gcov_defconfig b/arch/s390/configs/gcov_defconfig deleted file mode 100644 index d52eafe57ae8..000000000000 --- a/arch/s390/configs/gcov_defconfig +++ /dev/null @@ -1,661 +0,0 @@ -CONFIG_SYSVIPC=y -CONFIG_POSIX_MQUEUE=y -CONFIG_AUDIT=y -CONFIG_NO_HZ_IDLE=y -CONFIG_HIGH_RES_TIMERS=y -CONFIG_BSD_PROCESS_ACCT=y -CONFIG_BSD_PROCESS_ACCT_V3=y -CONFIG_TASKSTATS=y -CONFIG_TASK_DELAY_ACCT=y -CONFIG_TASK_XACCT=y -CONFIG_TASK_IO_ACCOUNTING=y -CONFIG_IKCONFIG=y -CONFIG_IKCONFIG_PROC=y -CONFIG_NUMA_BALANCING=y -# CONFIG_NUMA_BALANCING_DEFAULT_ENABLED is not set -CONFIG_MEMCG=y -CONFIG_MEMCG_SWAP=y -CONFIG_BLK_CGROUP=y -CONFIG_CFS_BANDWIDTH=y -CONFIG_RT_GROUP_SCHED=y -CONFIG_CGROUP_PIDS=y -CONFIG_CGROUP_FREEZER=y -CONFIG_CGROUP_HUGETLB=y -CONFIG_CPUSETS=y -CONFIG_CGROUP_DEVICE=y -CONFIG_CGROUP_CPUACCT=y -CONFIG_CGROUP_PERF=y -CONFIG_CHECKPOINT_RESTORE=y -CONFIG_NAMESPACES=y -CONFIG_USER_NS=y -CONFIG_SCHED_AUTOGROUP=y -CONFIG_BLK_DEV_INITRD=y -CONFIG_EXPERT=y -# CONFIG_SYSFS_SYSCALL is not set -CONFIG_BPF_SYSCALL=y -CONFIG_USERFAULTFD=y -# CONFIG_COMPAT_BRK is not set -CONFIG_PROFILING=y -CONFIG_OPROFILE=m -CONFIG_KPROBES=y -CONFIG_JUMP_LABEL=y -CONFIG_GCOV_KERNEL=y -CONFIG_GCOV_PROFILE_ALL=y -CONFIG_MODULES=y -CONFIG_MODULE_FORCE_LOAD=y -CONFIG_MODULE_UNLOAD=y -CONFIG_MODULE_FORCE_UNLOAD=y -CONFIG_MODVERSIONS=y -CONFIG_MODULE_SRCVERSION_ALL=y -CONFIG_BLK_DEV_INTEGRITY=y -CONFIG_BLK_DEV_THROTTLING=y -CONFIG_BLK_WBT=y -CONFIG_BLK_WBT_SQ=y -CONFIG_PARTITION_ADVANCED=y -CONFIG_IBM_PARTITION=y -CONFIG_BSD_DISKLABEL=y -CONFIG_MINIX_SUBPARTITION=y -CONFIG_SOLARIS_X86_PARTITION=y -CONFIG_UNIXWARE_DISKLABEL=y -CONFIG_CFQ_GROUP_IOSCHED=y -CONFIG_DEFAULT_DEADLINE=y -CONFIG_LIVEPATCH=y -CONFIG_TUNE_ZEC12=y -CONFIG_NR_CPUS=512 -CONFIG_NUMA=y -CONFIG_HZ_100=y -CONFIG_MEMORY_HOTPLUG=y -CONFIG_MEMORY_HOTREMOVE=y -CONFIG_KSM=y -CONFIG_TRANSPARENT_HUGEPAGE=y -CONFIG_CLEANCACHE=y -CONFIG_FRONTSWAP=y -CONFIG_MEM_SOFT_DIRTY=y -CONFIG_ZSWAP=y -CONFIG_ZBUD=m -CONFIG_ZSMALLOC=m -CONFIG_ZSMALLOC_STAT=y -CONFIG_DEFERRED_STRUCT_PAGE_INIT=y -CONFIG_IDLE_PAGE_TRACKING=y -CONFIG_PCI=y -CONFIG_HOTPLUG_PCI=y -CONFIG_HOTPLUG_PCI_S390=y -CONFIG_CHSC_SCH=y -CONFIG_CRASH_DUMP=y -CONFIG_BINFMT_MISC=m -CONFIG_HIBERNATION=y -CONFIG_NET=y -CONFIG_PACKET=y -CONFIG_PACKET_DIAG=m -CONFIG_UNIX=y -CONFIG_UNIX_DIAG=m -CONFIG_XFRM_USER=m -CONFIG_NET_KEY=m -CONFIG_SMC=m -CONFIG_SMC_DIAG=m -CONFIG_INET=y -CONFIG_IP_MULTICAST=y -CONFIG_IP_ADVANCED_ROUTER=y -CONFIG_IP_MULTIPLE_TABLES=y -CONFIG_IP_ROUTE_MULTIPATH=y -CONFIG_IP_ROUTE_VERBOSE=y -CONFIG_NET_IPIP=m -CONFIG_NET_IPGRE_DEMUX=m -CONFIG_NET_IPGRE=m -CONFIG_NET_IPGRE_BROADCAST=y -CONFIG_IP_MROUTE=y -CONFIG_IP_MROUTE_MULTIPLE_TABLES=y -CONFIG_IP_PIMSM_V1=y -CONFIG_IP_PIMSM_V2=y -CONFIG_SYN_COOKIES=y -CONFIG_NET_IPVTI=m -CONFIG_INET_AH=m -CONFIG_INET_ESP=m -CONFIG_INET_IPCOMP=m -CONFIG_INET_XFRM_MODE_TRANSPORT=m -CONFIG_INET_XFRM_MODE_TUNNEL=m -CONFIG_INET_XFRM_MODE_BEET=m -CONFIG_INET_DIAG=m -CONFIG_INET_UDP_DIAG=m -CONFIG_TCP_CONG_ADVANCED=y -CONFIG_TCP_CONG_HSTCP=m -CONFIG_TCP_CONG_HYBLA=m -CONFIG_TCP_CONG_SCALABLE=m -CONFIG_TCP_CONG_LP=m -CONFIG_TCP_CONG_VENO=m -CONFIG_TCP_CONG_YEAH=m -CONFIG_TCP_CONG_ILLINOIS=m -CONFIG_IPV6_ROUTER_PREF=y -CONFIG_INET6_AH=m -CONFIG_INET6_ESP=m -CONFIG_INET6_IPCOMP=m -CONFIG_IPV6_MIP6=m -CONFIG_INET6_XFRM_MODE_TRANSPORT=m -CONFIG_INET6_XFRM_MODE_TUNNEL=m -CONFIG_INET6_XFRM_MODE_BEET=m -CONFIG_INET6_XFRM_MODE_ROUTEOPTIMIZATION=m -CONFIG_IPV6_VTI=m -CONFIG_IPV6_SIT=m -CONFIG_IPV6_GRE=m -CONFIG_IPV6_MULTIPLE_TABLES=y -CONFIG_IPV6_SUBTREES=y -CONFIG_NETFILTER=y -CONFIG_NF_CONNTRACK=m -CONFIG_NF_CONNTRACK_SECMARK=y -CONFIG_NF_CONNTRACK_EVENTS=y -CONFIG_NF_CONNTRACK_TIMEOUT=y -CONFIG_NF_CONNTRACK_TIMESTAMP=y -CONFIG_NF_CONNTRACK_AMANDA=m -CONFIG_NF_CONNTRACK_FTP=m -CONFIG_NF_CONNTRACK_H323=m -CONFIG_NF_CONNTRACK_IRC=m -CONFIG_NF_CONNTRACK_NETBIOS_NS=m -CONFIG_NF_CONNTRACK_SNMP=m -CONFIG_NF_CONNTRACK_PPTP=m -CONFIG_NF_CONNTRACK_SANE=m -CONFIG_NF_CONNTRACK_SIP=m -CONFIG_NF_CONNTRACK_TFTP=m -CONFIG_NF_CT_NETLINK=m -CONFIG_NF_CT_NETLINK_TIMEOUT=m -CONFIG_NF_TABLES=m -CONFIG_NFT_EXTHDR=m -CONFIG_NFT_META=m -CONFIG_NFT_CT=m -CONFIG_NFT_COUNTER=m -CONFIG_NFT_LOG=m -CONFIG_NFT_LIMIT=m -CONFIG_NFT_NAT=m -CONFIG_NFT_COMPAT=m -CONFIG_NFT_HASH=m -CONFIG_NETFILTER_XT_SET=m -CONFIG_NETFILTER_XT_TARGET_AUDIT=m -CONFIG_NETFILTER_XT_TARGET_CHECKSUM=m -CONFIG_NETFILTER_XT_TARGET_CLASSIFY=m -CONFIG_NETFILTER_XT_TARGET_CONNMARK=m -CONFIG_NETFILTER_XT_TARGET_CONNSECMARK=m -CONFIG_NETFILTER_XT_TARGET_CT=m -CONFIG_NETFILTER_XT_TARGET_DSCP=m -CONFIG_NETFILTER_XT_TARGET_HMARK=m -CONFIG_NETFILTER_XT_TARGET_IDLETIMER=m -CONFIG_NETFILTER_XT_TARGET_LOG=m -CONFIG_NETFILTER_XT_TARGET_MARK=m -CONFIG_NETFILTER_XT_TARGET_NFLOG=m -CONFIG_NETFILTER_XT_TARGET_NFQUEUE=m -CONFIG_NETFILTER_XT_TARGET_TEE=m -CONFIG_NETFILTER_XT_TARGET_TPROXY=m -CONFIG_NETFILTER_XT_TARGET_TRACE=m -CONFIG_NETFILTER_XT_TARGET_SECMARK=m -CONFIG_NETFILTER_XT_TARGET_TCPMSS=m -CONFIG_NETFILTER_XT_TARGET_TCPOPTSTRIP=m -CONFIG_NETFILTER_XT_MATCH_ADDRTYPE=m -CONFIG_NETFILTER_XT_MATCH_BPF=m -CONFIG_NETFILTER_XT_MATCH_CLUSTER=m -CONFIG_NETFILTER_XT_MATCH_COMMENT=m -CONFIG_NETFILTER_XT_MATCH_CONNBYTES=m -CONFIG_NETFILTER_XT_MATCH_CONNLABEL=m -CONFIG_NETFILTER_XT_MATCH_CONNLIMIT=m -CONFIG_NETFILTER_XT_MATCH_CONNMARK=m -CONFIG_NETFILTER_XT_MATCH_CONNTRACK=m -CONFIG_NETFILTER_XT_MATCH_CPU=m -CONFIG_NETFILTER_XT_MATCH_DCCP=m -CONFIG_NETFILTER_XT_MATCH_DEVGROUP=m -CONFIG_NETFILTER_XT_MATCH_DSCP=m -CONFIG_NETFILTER_XT_MATCH_ESP=m -CONFIG_NETFILTER_XT_MATCH_HASHLIMIT=m -CONFIG_NETFILTER_XT_MATCH_HELPER=m -CONFIG_NETFILTER_XT_MATCH_IPRANGE=m -CONFIG_NETFILTER_XT_MATCH_IPVS=m -CONFIG_NETFILTER_XT_MATCH_LENGTH=m -CONFIG_NETFILTER_XT_MATCH_LIMIT=m -CONFIG_NETFILTER_XT_MATCH_MAC=m -CONFIG_NETFILTER_XT_MATCH_MARK=m -CONFIG_NETFILTER_XT_MATCH_MULTIPORT=m -CONFIG_NETFILTER_XT_MATCH_NFACCT=m -CONFIG_NETFILTER_XT_MATCH_OSF=m -CONFIG_NETFILTER_XT_MATCH_OWNER=m -CONFIG_NETFILTER_XT_MATCH_POLICY=m -CONFIG_NETFILTER_XT_MATCH_PHYSDEV=m -CONFIG_NETFILTER_XT_MATCH_PKTTYPE=m -CONFIG_NETFILTER_XT_MATCH_QUOTA=m -CONFIG_NETFILTER_XT_MATCH_RATEEST=m -CONFIG_NETFILTER_XT_MATCH_REALM=m -CONFIG_NETFILTER_XT_MATCH_RECENT=m -CONFIG_NETFILTER_XT_MATCH_STATE=m -CONFIG_NETFILTER_XT_MATCH_STATISTIC=m -CONFIG_NETFILTER_XT_MATCH_STRING=m -CONFIG_NETFILTER_XT_MATCH_TCPMSS=m -CONFIG_NETFILTER_XT_MATCH_TIME=m -CONFIG_NETFILTER_XT_MATCH_U32=m -CONFIG_IP_SET=m -CONFIG_IP_SET_BITMAP_IP=m -CONFIG_IP_SET_BITMAP_IPMAC=m -CONFIG_IP_SET_BITMAP_PORT=m -CONFIG_IP_SET_HASH_IP=m -CONFIG_IP_SET_HASH_IPPORT=m -CONFIG_IP_SET_HASH_IPPORTIP=m -CONFIG_IP_SET_HASH_IPPORTNET=m -CONFIG_IP_SET_HASH_NETPORTNET=m -CONFIG_IP_SET_HASH_NET=m -CONFIG_IP_SET_HASH_NETNET=m -CONFIG_IP_SET_HASH_NETPORT=m -CONFIG_IP_SET_HASH_NETIFACE=m -CONFIG_IP_SET_LIST_SET=m -CONFIG_IP_VS=m -CONFIG_IP_VS_PROTO_TCP=y -CONFIG_IP_VS_PROTO_UDP=y -CONFIG_IP_VS_PROTO_ESP=y -CONFIG_IP_VS_PROTO_AH=y -CONFIG_IP_VS_RR=m -CONFIG_IP_VS_WRR=m -CONFIG_IP_VS_LC=m -CONFIG_IP_VS_WLC=m -CONFIG_IP_VS_LBLC=m -CONFIG_IP_VS_LBLCR=m -CONFIG_IP_VS_DH=m -CONFIG_IP_VS_SH=m -CONFIG_IP_VS_SED=m -CONFIG_IP_VS_NQ=m -CONFIG_IP_VS_FTP=m -CONFIG_IP_VS_PE_SIP=m -CONFIG_NF_CONNTRACK_IPV4=m -CONFIG_NF_TABLES_IPV4=m -CONFIG_NFT_CHAIN_ROUTE_IPV4=m -CONFIG_NF_TABLES_ARP=m -CONFIG_NFT_CHAIN_NAT_IPV4=m -CONFIG_IP_NF_IPTABLES=m -CONFIG_IP_NF_MATCH_AH=m -CONFIG_IP_NF_MATCH_ECN=m -CONFIG_IP_NF_MATCH_RPFILTER=m -CONFIG_IP_NF_MATCH_TTL=m -CONFIG_IP_NF_FILTER=m -CONFIG_IP_NF_TARGET_REJECT=m -CONFIG_IP_NF_NAT=m -CONFIG_IP_NF_TARGET_MASQUERADE=m -CONFIG_IP_NF_MANGLE=m -CONFIG_IP_NF_TARGET_CLUSTERIP=m -CONFIG_IP_NF_TARGET_ECN=m -CONFIG_IP_NF_TARGET_TTL=m -CONFIG_IP_NF_RAW=m -CONFIG_IP_NF_SECURITY=m -CONFIG_IP_NF_ARPTABLES=m -CONFIG_IP_NF_ARPFILTER=m -CONFIG_IP_NF_ARP_MANGLE=m -CONFIG_NF_CONNTRACK_IPV6=m -CONFIG_NF_TABLES_IPV6=m -CONFIG_NFT_CHAIN_ROUTE_IPV6=m -CONFIG_NFT_CHAIN_NAT_IPV6=m -CONFIG_IP6_NF_IPTABLES=m -CONFIG_IP6_NF_MATCH_AH=m -CONFIG_IP6_NF_MATCH_EUI64=m -CONFIG_IP6_NF_MATCH_FRAG=m -CONFIG_IP6_NF_MATCH_OPTS=m -CONFIG_IP6_NF_MATCH_HL=m -CONFIG_IP6_NF_MATCH_IPV6HEADER=m -CONFIG_IP6_NF_MATCH_MH=m -CONFIG_IP6_NF_MATCH_RPFILTER=m -CONFIG_IP6_NF_MATCH_RT=m -CONFIG_IP6_NF_TARGET_HL=m -CONFIG_IP6_NF_FILTER=m -CONFIG_IP6_NF_TARGET_REJECT=m -CONFIG_IP6_NF_MANGLE=m -CONFIG_IP6_NF_RAW=m -CONFIG_IP6_NF_SECURITY=m -CONFIG_IP6_NF_NAT=m -CONFIG_IP6_NF_TARGET_MASQUERADE=m -CONFIG_NF_TABLES_BRIDGE=m -CONFIG_NET_SCTPPROBE=m -CONFIG_RDS=m -CONFIG_RDS_RDMA=m -CONFIG_RDS_TCP=m -CONFIG_L2TP=m -CONFIG_L2TP_DEBUGFS=m -CONFIG_L2TP_V3=y -CONFIG_L2TP_IP=m -CONFIG_L2TP_ETH=m -CONFIG_BRIDGE=m -CONFIG_VLAN_8021Q=m -CONFIG_VLAN_8021Q_GVRP=y -CONFIG_NET_SCHED=y -CONFIG_NET_SCH_CBQ=m -CONFIG_NET_SCH_HTB=m -CONFIG_NET_SCH_HFSC=m -CONFIG_NET_SCH_PRIO=m -CONFIG_NET_SCH_MULTIQ=m -CONFIG_NET_SCH_RED=m -CONFIG_NET_SCH_SFB=m -CONFIG_NET_SCH_SFQ=m -CONFIG_NET_SCH_TEQL=m -CONFIG_NET_SCH_TBF=m -CONFIG_NET_SCH_GRED=m -CONFIG_NET_SCH_DSMARK=m -CONFIG_NET_SCH_NETEM=m -CONFIG_NET_SCH_DRR=m -CONFIG_NET_SCH_MQPRIO=m -CONFIG_NET_SCH_CHOKE=m -CONFIG_NET_SCH_QFQ=m -CONFIG_NET_SCH_CODEL=m -CONFIG_NET_SCH_FQ_CODEL=m -CONFIG_NET_SCH_INGRESS=m -CONFIG_NET_SCH_PLUG=m -CONFIG_NET_CLS_BASIC=m -CONFIG_NET_CLS_TCINDEX=m -CONFIG_NET_CLS_ROUTE4=m -CONFIG_NET_CLS_FW=m -CONFIG_NET_CLS_U32=m -CONFIG_CLS_U32_PERF=y -CONFIG_CLS_U32_MARK=y -CONFIG_NET_CLS_RSVP=m -CONFIG_NET_CLS_RSVP6=m -CONFIG_NET_CLS_FLOW=m -CONFIG_NET_CLS_CGROUP=y -CONFIG_NET_CLS_BPF=m -CONFIG_NET_CLS_ACT=y -CONFIG_NET_ACT_POLICE=m -CONFIG_NET_ACT_GACT=m -CONFIG_GACT_PROB=y -CONFIG_NET_ACT_MIRRED=m -CONFIG_NET_ACT_IPT=m -CONFIG_NET_ACT_NAT=m -CONFIG_NET_ACT_PEDIT=m -CONFIG_NET_ACT_SIMP=m -CONFIG_NET_ACT_SKBEDIT=m -CONFIG_NET_ACT_CSUM=m -CONFIG_DNS_RESOLVER=y -CONFIG_NETLINK_DIAG=m -CONFIG_CGROUP_NET_PRIO=y -CONFIG_BPF_JIT=y -CONFIG_NET_PKTGEN=m -CONFIG_NET_TCPPROBE=m -CONFIG_DEVTMPFS=y -CONFIG_DMA_CMA=y -CONFIG_CMA_SIZE_MBYTES=0 -CONFIG_CONNECTOR=y -CONFIG_ZRAM=m -CONFIG_BLK_DEV_LOOP=m -CONFIG_BLK_DEV_CRYPTOLOOP=m -CONFIG_BLK_DEV_DRBD=m -CONFIG_BLK_DEV_NBD=m -CONFIG_BLK_DEV_RAM=y -CONFIG_BLK_DEV_RAM_SIZE=32768 -CONFIG_BLK_DEV_RAM_DAX=y -CONFIG_VIRTIO_BLK=y -CONFIG_ENCLOSURE_SERVICES=m -CONFIG_GENWQE=m -CONFIG_RAID_ATTRS=m -CONFIG_SCSI=y -CONFIG_BLK_DEV_SD=y -CONFIG_CHR_DEV_ST=m -CONFIG_CHR_DEV_OSST=m -CONFIG_BLK_DEV_SR=m -CONFIG_CHR_DEV_SG=y -CONFIG_CHR_DEV_SCH=m -CONFIG_SCSI_ENCLOSURE=m -CONFIG_SCSI_CONSTANTS=y -CONFIG_SCSI_LOGGING=y -CONFIG_SCSI_SPI_ATTRS=m -CONFIG_SCSI_FC_ATTRS=y -CONFIG_SCSI_SAS_LIBSAS=m -CONFIG_SCSI_SRP_ATTRS=m -CONFIG_ISCSI_TCP=m -CONFIG_SCSI_DEBUG=m -CONFIG_ZFCP=y -CONFIG_SCSI_VIRTIO=m -CONFIG_SCSI_DH=y -CONFIG_SCSI_DH_RDAC=m -CONFIG_SCSI_DH_HP_SW=m -CONFIG_SCSI_DH_EMC=m -CONFIG_SCSI_DH_ALUA=m -CONFIG_SCSI_OSD_INITIATOR=m -CONFIG_SCSI_OSD_ULD=m -CONFIG_MD=y -CONFIG_BLK_DEV_MD=y -CONFIG_MD_LINEAR=m -CONFIG_MD_MULTIPATH=m -CONFIG_MD_FAULTY=m -CONFIG_BLK_DEV_DM=m -CONFIG_DM_CRYPT=m -CONFIG_DM_SNAPSHOT=m -CONFIG_DM_THIN_PROVISIONING=m -CONFIG_DM_MIRROR=m -CONFIG_DM_LOG_USERSPACE=m -CONFIG_DM_RAID=m -CONFIG_DM_ZERO=m -CONFIG_DM_MULTIPATH=m -CONFIG_DM_MULTIPATH_QL=m -CONFIG_DM_MULTIPATH_ST=m -CONFIG_DM_DELAY=m -CONFIG_DM_UEVENT=y -CONFIG_DM_FLAKEY=m -CONFIG_DM_VERITY=m -CONFIG_DM_SWITCH=m -CONFIG_NETDEVICES=y -CONFIG_BONDING=m -CONFIG_DUMMY=m -CONFIG_EQUALIZER=m -CONFIG_IFB=m -CONFIG_MACVLAN=m -CONFIG_MACVTAP=m -CONFIG_VXLAN=m -CONFIG_TUN=m -CONFIG_VETH=m -CONFIG_VIRTIO_NET=m -CONFIG_NLMON=m -# CONFIG_NET_VENDOR_ARC is not set -# CONFIG_NET_VENDOR_CHELSIO is not set -# CONFIG_NET_VENDOR_INTEL is not set -# CONFIG_NET_VENDOR_MARVELL is not set -CONFIG_MLX4_EN=m -CONFIG_MLX5_CORE=m -CONFIG_MLX5_CORE_EN=y -# CONFIG_NET_VENDOR_NATSEMI is not set -CONFIG_PPP=m -CONFIG_PPP_BSDCOMP=m -CONFIG_PPP_DEFLATE=m -CONFIG_PPP_MPPE=m -CONFIG_PPPOE=m -CONFIG_PPTP=m -CONFIG_PPPOL2TP=m -CONFIG_PPP_ASYNC=m -CONFIG_PPP_SYNC_TTY=m -# CONFIG_INPUT_KEYBOARD is not set -# CONFIG_INPUT_MOUSE is not set -# CONFIG_SERIO is not set -CONFIG_LEGACY_PTY_COUNT=0 -CONFIG_HW_RANDOM_VIRTIO=m -CONFIG_RAW_DRIVER=m -CONFIG_HANGCHECK_TIMER=m -CONFIG_TN3270_FS=y -# CONFIG_HWMON is not set -CONFIG_WATCHDOG=y -CONFIG_WATCHDOG_NOWAYOUT=y -CONFIG_SOFT_WATCHDOG=m -CONFIG_DIAG288_WATCHDOG=m -# CONFIG_HID is not set -# CONFIG_USB_SUPPORT is not set -CONFIG_INFINIBAND=m -CONFIG_INFINIBAND_USER_ACCESS=m -CONFIG_MLX4_INFINIBAND=m -CONFIG_MLX5_INFINIBAND=m -CONFIG_VFIO=m -CONFIG_VFIO_PCI=m -CONFIG_VIRTIO_BALLOON=m -CONFIG_EXT4_FS=y -CONFIG_EXT4_FS_POSIX_ACL=y -CONFIG_EXT4_FS_SECURITY=y -CONFIG_EXT4_ENCRYPTION=y -CONFIG_JBD2_DEBUG=y -CONFIG_JFS_FS=m -CONFIG_JFS_POSIX_ACL=y -CONFIG_JFS_SECURITY=y -CONFIG_JFS_STATISTICS=y -CONFIG_XFS_FS=y -CONFIG_XFS_QUOTA=y -CONFIG_XFS_POSIX_ACL=y -CONFIG_XFS_RT=y -CONFIG_GFS2_FS=m -CONFIG_GFS2_FS_LOCKING_DLM=y -CONFIG_OCFS2_FS=m -CONFIG_BTRFS_FS=y -CONFIG_BTRFS_FS_POSIX_ACL=y -CONFIG_NILFS2_FS=m -CONFIG_FS_DAX=y -CONFIG_EXPORTFS_BLOCK_OPS=y -CONFIG_FANOTIFY=y -CONFIG_FANOTIFY_ACCESS_PERMISSIONS=y -CONFIG_QUOTA_NETLINK_INTERFACE=y -CONFIG_QFMT_V1=m -CONFIG_QFMT_V2=m -CONFIG_AUTOFS4_FS=m -CONFIG_FUSE_FS=y -CONFIG_CUSE=m -CONFIG_OVERLAY_FS=m -CONFIG_OVERLAY_FS_REDIRECT_DIR=y -CONFIG_FSCACHE=m -CONFIG_CACHEFILES=m -CONFIG_ISO9660_FS=y -CONFIG_JOLIET=y -CONFIG_ZISOFS=y -CONFIG_UDF_FS=m -CONFIG_MSDOS_FS=m -CONFIG_VFAT_FS=m -CONFIG_NTFS_FS=m -CONFIG_NTFS_RW=y -CONFIG_PROC_KCORE=y -CONFIG_TMPFS=y -CONFIG_TMPFS_POSIX_ACL=y -CONFIG_HUGETLBFS=y -CONFIG_CONFIGFS_FS=m -CONFIG_ECRYPT_FS=m -CONFIG_CRAMFS=m -CONFIG_SQUASHFS=m -CONFIG_SQUASHFS_XATTR=y -CONFIG_SQUASHFS_LZO=y -CONFIG_SQUASHFS_XZ=y -CONFIG_ROMFS_FS=m -CONFIG_NFS_FS=m -CONFIG_NFS_V3_ACL=y -CONFIG_NFS_V4=m -CONFIG_NFS_SWAP=y -CONFIG_NFSD=m -CONFIG_NFSD_V3_ACL=y -CONFIG_NFSD_V4=y -CONFIG_NFSD_V4_SECURITY_LABEL=y -CONFIG_CIFS=m -CONFIG_CIFS_STATS=y -CONFIG_CIFS_STATS2=y -CONFIG_CIFS_WEAK_PW_HASH=y -CONFIG_CIFS_UPCALL=y -CONFIG_CIFS_XATTR=y -CONFIG_CIFS_POSIX=y -# CONFIG_CIFS_DEBUG is not set -CONFIG_CIFS_DFS_UPCALL=y -CONFIG_NLS_DEFAULT="utf8" -CONFIG_NLS_CODEPAGE_437=m -CONFIG_NLS_CODEPAGE_850=m -CONFIG_NLS_ASCII=m -CONFIG_NLS_ISO8859_1=m -CONFIG_NLS_ISO8859_15=m -CONFIG_NLS_UTF8=m -CONFIG_DLM=m -CONFIG_PRINTK_TIME=y -CONFIG_DEBUG_INFO=y -CONFIG_DEBUG_INFO_DWARF4=y -CONFIG_GDB_SCRIPTS=y -# CONFIG_ENABLE_MUST_CHECK is not set -CONFIG_FRAME_WARN=1024 -CONFIG_UNUSED_SYMBOLS=y -CONFIG_MAGIC_SYSRQ=y -CONFIG_DEBUG_MEMORY_INIT=y -CONFIG_PANIC_ON_OOPS=y -CONFIG_RCU_TORTURE_TEST=m -CONFIG_RCU_CPU_STALL_TIMEOUT=60 -CONFIG_LATENCYTOP=y -CONFIG_SCHED_TRACER=y -CONFIG_FTRACE_SYSCALLS=y -CONFIG_STACK_TRACER=y -CONFIG_BLK_DEV_IO_TRACE=y -CONFIG_FUNCTION_PROFILER=y -CONFIG_HIST_TRIGGERS=y -CONFIG_LKDTM=m -CONFIG_PERCPU_TEST=m -CONFIG_ATOMIC64_SELFTEST=y -CONFIG_TEST_BPF=m -CONFIG_BUG_ON_DATA_CORRUPTION=y -CONFIG_S390_PTDUMP=y -CONFIG_PERSISTENT_KEYRINGS=y -CONFIG_BIG_KEYS=y -CONFIG_ENCRYPTED_KEYS=m -CONFIG_SECURITY=y -CONFIG_SECURITY_NETWORK=y -CONFIG_SECURITY_SELINUX=y -CONFIG_SECURITY_SELINUX_BOOTPARAM=y -CONFIG_SECURITY_SELINUX_BOOTPARAM_VALUE=0 -CONFIG_SECURITY_SELINUX_DISABLE=y -CONFIG_INTEGRITY_SIGNATURE=y -CONFIG_INTEGRITY_ASYMMETRIC_KEYS=y -CONFIG_IMA=y -CONFIG_IMA_WRITE_POLICY=y -CONFIG_IMA_APPRAISE=y -CONFIG_CRYPTO_DH=m -CONFIG_CRYPTO_ECDH=m -CONFIG_CRYPTO_USER=m -# CONFIG_CRYPTO_MANAGER_DISABLE_TESTS is not set -CONFIG_CRYPTO_PCRYPT=m -CONFIG_CRYPTO_CRYPTD=m -CONFIG_CRYPTO_MCRYPTD=m -CONFIG_CRYPTO_TEST=m -CONFIG_CRYPTO_CHACHA20POLY1305=m -CONFIG_CRYPTO_LRW=m -CONFIG_CRYPTO_PCBC=m -CONFIG_CRYPTO_KEYWRAP=m -CONFIG_CRYPTO_XCBC=m -CONFIG_CRYPTO_VMAC=m -CONFIG_CRYPTO_CRC32=m -CONFIG_CRYPTO_MICHAEL_MIC=m -CONFIG_CRYPTO_RMD128=m -CONFIG_CRYPTO_RMD160=m -CONFIG_CRYPTO_RMD256=m -CONFIG_CRYPTO_RMD320=m -CONFIG_CRYPTO_SHA512=m -CONFIG_CRYPTO_SHA3=m -CONFIG_CRYPTO_TGR192=m -CONFIG_CRYPTO_WP512=m -CONFIG_CRYPTO_AES_TI=m -CONFIG_CRYPTO_ANUBIS=m -CONFIG_CRYPTO_BLOWFISH=m -CONFIG_CRYPTO_CAMELLIA=m -CONFIG_CRYPTO_CAST5=m -CONFIG_CRYPTO_CAST6=m -CONFIG_CRYPTO_FCRYPT=m -CONFIG_CRYPTO_KHAZAD=m -CONFIG_CRYPTO_SALSA20=m -CONFIG_CRYPTO_SEED=m -CONFIG_CRYPTO_SERPENT=m -CONFIG_CRYPTO_TEA=m -CONFIG_CRYPTO_TWOFISH=m -CONFIG_CRYPTO_842=m -CONFIG_CRYPTO_LZ4=m -CONFIG_CRYPTO_LZ4HC=m -CONFIG_CRYPTO_ANSI_CPRNG=m -CONFIG_CRYPTO_USER_API_HASH=m -CONFIG_CRYPTO_USER_API_SKCIPHER=m -CONFIG_CRYPTO_USER_API_RNG=m -CONFIG_CRYPTO_USER_API_AEAD=m -CONFIG_ZCRYPT=m -CONFIG_PKEY=m -CONFIG_CRYPTO_PAES_S390=m -CONFIG_CRYPTO_SHA1_S390=m -CONFIG_CRYPTO_SHA256_S390=m -CONFIG_CRYPTO_SHA512_S390=m -CONFIG_CRYPTO_DES_S390=m -CONFIG_CRYPTO_AES_S390=m -CONFIG_CRYPTO_GHASH_S390=m -CONFIG_CRYPTO_CRC32_S390=y -CONFIG_CRC7=m -CONFIG_CRC8=m -CONFIG_CORDIC=m -CONFIG_CMM=m -CONFIG_APPLDATA_BASE=y -CONFIG_KVM=m -CONFIG_KVM_S390_UCONTROL=y -CONFIG_VHOST_NET=m diff --git a/arch/s390/configs/performance_defconfig b/arch/s390/configs/performance_defconfig index 20ed149e1137..c105bcc6d7a6 100644 --- a/arch/s390/configs/performance_defconfig +++ b/arch/s390/configs/performance_defconfig @@ -25,13 +25,13 @@ CONFIG_CPUSETS=y CONFIG_CGROUP_DEVICE=y CONFIG_CGROUP_CPUACCT=y CONFIG_CGROUP_PERF=y -CONFIG_CHECKPOINT_RESTORE=y CONFIG_NAMESPACES=y CONFIG_USER_NS=y CONFIG_SCHED_AUTOGROUP=y CONFIG_BLK_DEV_INITRD=y CONFIG_EXPERT=y # CONFIG_SYSFS_SYSCALL is not set +CONFIG_CHECKPOINT_RESTORE=y CONFIG_BPF_SYSCALL=y CONFIG_USERFAULTFD=y # CONFIG_COMPAT_BRK is not set @@ -45,6 +45,8 @@ CONFIG_MODULE_UNLOAD=y CONFIG_MODULE_FORCE_UNLOAD=y CONFIG_MODVERSIONS=y CONFIG_MODULE_SRCVERSION_ALL=y +CONFIG_MODULE_SIG=y +CONFIG_MODULE_SIG_SHA256=y CONFIG_BLK_DEV_INTEGRITY=y CONFIG_BLK_DEV_THROTTLING=y CONFIG_BLK_WBT=y @@ -62,6 +64,7 @@ CONFIG_TUNE_ZEC12=y CONFIG_NR_CPUS=512 CONFIG_NUMA=y CONFIG_HZ_100=y +CONFIG_KEXEC_FILE=y CONFIG_MEMORY_HOTPLUG=y CONFIG_MEMORY_HOTREMOVE=y CONFIG_KSM=y @@ -301,7 +304,6 @@ CONFIG_IP6_NF_SECURITY=m CONFIG_IP6_NF_NAT=m CONFIG_IP6_NF_TARGET_MASQUERADE=m CONFIG_NF_TABLES_BRIDGE=m -CONFIG_NET_SCTPPROBE=m CONFIG_RDS=m CONFIG_RDS_RDMA=m CONFIG_RDS_TCP=m @@ -359,11 +361,11 @@ CONFIG_NET_ACT_SIMP=m CONFIG_NET_ACT_SKBEDIT=m CONFIG_NET_ACT_CSUM=m CONFIG_DNS_RESOLVER=y +CONFIG_OPENVSWITCH=m CONFIG_NETLINK_DIAG=m CONFIG_CGROUP_NET_PRIO=y CONFIG_BPF_JIT=y CONFIG_NET_PKTGEN=m -CONFIG_NET_TCPPROBE=m CONFIG_DEVTMPFS=y CONFIG_DMA_CMA=y CONFIG_CMA_SIZE_MBYTES=0 @@ -375,8 +377,9 @@ CONFIG_BLK_DEV_DRBD=m CONFIG_BLK_DEV_NBD=m CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_RAM_SIZE=32768 -CONFIG_BLK_DEV_RAM_DAX=y CONFIG_VIRTIO_BLK=y +CONFIG_BLK_DEV_RBD=m +CONFIG_BLK_DEV_NVME=m CONFIG_ENCLOSURE_SERVICES=m CONFIG_GENWQE=m CONFIG_RAID_ATTRS=m @@ -455,6 +458,7 @@ CONFIG_PPTP=m CONFIG_PPPOL2TP=m CONFIG_PPP_ASYNC=m CONFIG_PPP_SYNC_TTY=m +CONFIG_INPUT_EVDEV=y # CONFIG_INPUT_KEYBOARD is not set # CONFIG_INPUT_MOUSE is not set # CONFIG_SERIO is not set @@ -468,6 +472,9 @@ CONFIG_WATCHDOG=y CONFIG_WATCHDOG_NOWAYOUT=y CONFIG_SOFT_WATCHDOG=m CONFIG_DIAG288_WATCHDOG=m +CONFIG_DRM=y +CONFIG_DRM_VIRTIO_GPU=y +CONFIG_FRAMEBUFFER_CONSOLE=y # CONFIG_HID is not set # CONFIG_USB_SUPPORT is not set CONFIG_INFINIBAND=m @@ -476,7 +483,9 @@ CONFIG_MLX4_INFINIBAND=m CONFIG_MLX5_INFINIBAND=m CONFIG_VFIO=m CONFIG_VFIO_PCI=m +CONFIG_VIRTIO_PCI=m CONFIG_VIRTIO_BALLOON=m +CONFIG_VIRTIO_INPUT=y CONFIG_EXT4_FS=y CONFIG_EXT4_FS_POSIX_ACL=y CONFIG_EXT4_FS_SECURITY=y @@ -507,7 +516,6 @@ CONFIG_AUTOFS4_FS=m CONFIG_FUSE_FS=y CONFIG_CUSE=m CONFIG_OVERLAY_FS=m -CONFIG_OVERLAY_FS_REDIRECT_DIR=y CONFIG_FSCACHE=m CONFIG_CACHEFILES=m CONFIG_ISO9660_FS=y @@ -592,8 +600,10 @@ CONFIG_SECURITY_SELINUX_DISABLE=y CONFIG_INTEGRITY_SIGNATURE=y CONFIG_INTEGRITY_ASYMMETRIC_KEYS=y CONFIG_IMA=y +CONFIG_IMA_DEFAULT_HASH_SHA256=y CONFIG_IMA_WRITE_POLICY=y CONFIG_IMA_APPRAISE=y +CONFIG_CRYPTO_FIPS=y CONFIG_CRYPTO_DH=m CONFIG_CRYPTO_ECDH=m CONFIG_CRYPTO_USER=m diff --git a/arch/s390/defconfig b/arch/s390/defconfig index 46a3178d8bc6..f40600eb1762 100644 --- a/arch/s390/defconfig +++ b/arch/s390/defconfig @@ -8,6 +8,7 @@ CONFIG_TASKSTATS=y CONFIG_TASK_DELAY_ACCT=y CONFIG_TASK_XACCT=y CONFIG_TASK_IO_ACCOUNTING=y +# CONFIG_CPU_ISOLATION is not set CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y CONFIG_CGROUPS=y @@ -23,12 +24,12 @@ CONFIG_CPUSETS=y CONFIG_CGROUP_DEVICE=y CONFIG_CGROUP_CPUACCT=y CONFIG_CGROUP_PERF=y -CONFIG_CHECKPOINT_RESTORE=y CONFIG_NAMESPACES=y CONFIG_USER_NS=y CONFIG_BLK_DEV_INITRD=y CONFIG_EXPERT=y # CONFIG_SYSFS_SYSCALL is not set +CONFIG_CHECKPOINT_RESTORE=y CONFIG_BPF_SYSCALL=y CONFIG_USERFAULTFD=y # CONFIG_COMPAT_BRK is not set @@ -47,6 +48,7 @@ CONFIG_LIVEPATCH=y CONFIG_NR_CPUS=256 CONFIG_NUMA=y CONFIG_HZ_100=y +CONFIG_KEXEC_FILE=y CONFIG_MEMORY_HOTPLUG=y CONFIG_MEMORY_HOTREMOVE=y CONFIG_KSM=y @@ -129,10 +131,13 @@ CONFIG_EQUALIZER=m CONFIG_TUN=m CONFIG_VIRTIO_NET=y # CONFIG_NET_VENDOR_ALACRITECH is not set +# CONFIG_NET_VENDOR_CORTINA is not set # CONFIG_NET_VENDOR_SOLARFLARE is not set +# CONFIG_NET_VENDOR_SOCIONEXT is not set # CONFIG_NET_VENDOR_SYNOPSYS is not set # CONFIG_INPUT is not set # CONFIG_SERIO is not set +# CONFIG_VT is not set CONFIG_DEVKMEM=y CONFIG_RAW_DRIVER=m CONFIG_VIRTIO_BALLOON=y @@ -177,13 +182,15 @@ CONFIG_TRACER_SNAPSHOT_PER_CPU_SWAP=y CONFIG_STACK_TRACER=y CONFIG_BLK_DEV_IO_TRACE=y CONFIG_FUNCTION_PROFILER=y -CONFIG_KPROBES_SANITY_TEST=y +# CONFIG_RUNTIME_TESTING_MENU is not set CONFIG_S390_PTDUMP=y CONFIG_CRYPTO_CRYPTD=m +CONFIG_CRYPTO_AUTHENC=m CONFIG_CRYPTO_TEST=m CONFIG_CRYPTO_CCM=m CONFIG_CRYPTO_GCM=m CONFIG_CRYPTO_CBC=y +CONFIG_CRYPTO_CFB=m CONFIG_CRYPTO_CTS=m CONFIG_CRYPTO_LRW=m CONFIG_CRYPTO_PCBC=m @@ -213,6 +220,8 @@ CONFIG_CRYPTO_KHAZAD=m CONFIG_CRYPTO_SALSA20=m CONFIG_CRYPTO_SEED=m CONFIG_CRYPTO_SERPENT=m +CONFIG_CRYPTO_SM4=m +CONFIG_CRYPTO_SPECK=m CONFIG_CRYPTO_TEA=m CONFIG_CRYPTO_TWOFISH=m CONFIG_CRYPTO_DEFLATE=m diff --git a/arch/s390/include/asm/kexec.h b/arch/s390/include/asm/kexec.h index 1d708a419326..825dd0f7f221 100644 --- a/arch/s390/include/asm/kexec.h +++ b/arch/s390/include/asm/kexec.h @@ -46,4 +46,27 @@ static inline void crash_setup_regs(struct pt_regs *newregs, struct pt_regs *oldregs) { } +struct kimage; +struct s390_load_data { + /* Pointer to the kernel buffer. Used to register cmdline etc.. */ + void *kernel_buf; + + /* Total size of loaded segments in memory. Used as an offset. */ + size_t memsz; + + /* Load address of initrd. Used to register INITRD_START in kernel. */ + unsigned long initrd_load_addr; +}; + +int kexec_file_add_purgatory(struct kimage *image, + struct s390_load_data *data); +int kexec_file_add_initrd(struct kimage *image, + struct s390_load_data *data, + char *initrd, unsigned long initrd_len); +int *kexec_file_update_kernel(struct kimage *iamge, + struct s390_load_data *data); + +extern const struct kexec_file_ops s390_kexec_image_ops; +extern const struct kexec_file_ops s390_kexec_elf_ops; + #endif /*_S390_KEXEC_H */ diff --git a/arch/s390/include/asm/purgatory.h b/arch/s390/include/asm/purgatory.h new file mode 100644 index 000000000000..e297bcfc476f --- /dev/null +++ b/arch/s390/include/asm/purgatory.h @@ -0,0 +1,17 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright IBM Corp. 2018 + * + * Author(s): Philipp Rudo <prudo@linux.vnet.ibm.com> + */ + +#ifndef _S390_PURGATORY_H_ +#define _S390_PURGATORY_H_ +#ifndef __ASSEMBLY__ + +#include <linux/purgatory.h> + +int verify_sha256_digest(void); + +#endif /* __ASSEMBLY__ */ +#endif /* _S390_PURGATORY_H_ */ diff --git a/arch/s390/include/asm/setup.h b/arch/s390/include/asm/setup.h index 124154fdfc97..9c30ebe046f3 100644 --- a/arch/s390/include/asm/setup.h +++ b/arch/s390/include/asm/setup.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 */ /* * S390 version - * Copyright IBM Corp. 1999, 2010 + * Copyright IBM Corp. 1999, 2017 */ #ifndef _ASM_S390_SETUP_H #define _ASM_S390_SETUP_H @@ -37,17 +37,31 @@ #define LPP_MAGIC _BITUL(31) #define LPP_PID_MASK _AC(0xffffffff, UL) +/* Offsets to entry points in kernel/head.S */ + +#define STARTUP_NORMAL_OFFSET 0x10000 +#define STARTUP_KDUMP_OFFSET 0x10010 + +/* Offsets to parameters in kernel/head.S */ + +#define IPL_DEVICE_OFFSET 0x10400 +#define INITRD_START_OFFSET 0x10408 +#define INITRD_SIZE_OFFSET 0x10410 +#define OLDMEM_BASE_OFFSET 0x10418 +#define OLDMEM_SIZE_OFFSET 0x10420 +#define COMMAND_LINE_OFFSET 0x10480 + #ifndef __ASSEMBLY__ #include <asm/lowcore.h> #include <asm/types.h> -#define IPL_DEVICE (*(unsigned long *) (0x10400)) -#define INITRD_START (*(unsigned long *) (0x10408)) -#define INITRD_SIZE (*(unsigned long *) (0x10410)) -#define OLDMEM_BASE (*(unsigned long *) (0x10418)) -#define OLDMEM_SIZE (*(unsigned long *) (0x10420)) -#define COMMAND_LINE ((char *) (0x10480)) +#define IPL_DEVICE (*(unsigned long *) (IPL_DEVICE_OFFSET)) +#define INITRD_START (*(unsigned long *) (INITRD_START_OFFSET)) +#define INITRD_SIZE (*(unsigned long *) (INITRD_SIZE_OFFSET)) +#define OLDMEM_BASE (*(unsigned long *) (OLDMEM_BASE_OFFSET)) +#define OLDMEM_SIZE (*(unsigned long *) (OLDMEM_SIZE_OFFSET)) +#define COMMAND_LINE ((char *) (COMMAND_LINE_OFFSET)) extern int memory_end_set; extern unsigned long memory_end; @@ -121,12 +135,12 @@ extern void (*_machine_power_off)(void); #else /* __ASSEMBLY__ */ -#define IPL_DEVICE 0x10400 -#define INITRD_START 0x10408 -#define INITRD_SIZE 0x10410 -#define OLDMEM_BASE 0x10418 -#define OLDMEM_SIZE 0x10420 -#define COMMAND_LINE 0x10480 +#define IPL_DEVICE (IPL_DEVICE_OFFSET) +#define INITRD_START (INITRD_START_OFFSET) +#define INITRD_SIZE (INITRD_SIZE_OFFSET) +#define OLDMEM_BASE (OLDMEM_BASE_OFFSET) +#define OLDMEM_SIZE (OLDMEM_SIZE_OFFSET) +#define COMMAND_LINE (COMMAND_LINE_OFFSET) #endif /* __ASSEMBLY__ */ #endif /* _ASM_S390_SETUP_H */ diff --git a/arch/s390/include/uapi/asm/signal.h b/arch/s390/include/uapi/asm/signal.h index c57f9d28d894..9a14a611ed82 100644 --- a/arch/s390/include/uapi/asm/signal.h +++ b/arch/s390/include/uapi/asm/signal.h @@ -97,22 +97,31 @@ typedef unsigned long sigset_t; #include <asm-generic/signal-defs.h> #ifndef __KERNEL__ -/* Here we must cater to libcs that poke about in kernel headers. */ +/* + * There are two system calls in regard to sigaction, sys_rt_sigaction + * and sys_sigaction. Internally the kernel uses the struct old_sigaction + * for the older sys_sigaction system call, and the kernel version of the + * struct sigaction for the newer sys_rt_sigaction. + * + * The uapi definition for struct sigaction has made a strange distinction + * between 31-bit and 64-bit in the past. For 64-bit the uapi structure + * looks like the kernel struct sigaction, but for 31-bit it used to + * look like the kernel struct old_sigaction. That practically made the + * structure unusable for either system call. To get around this problem + * the glibc always had its own definitions for the sigaction structures. + * + * The current struct sigaction uapi definition below is suitable for the + * sys_rt_sigaction system call only. + */ struct sigaction { union { __sighandler_t _sa_handler; void (*_sa_sigaction)(int, struct siginfo *, void *); } _u; -#ifndef __s390x__ /* lovely */ - sigset_t sa_mask; - unsigned long sa_flags; - void (*sa_restorer)(void); -#else /* __s390x__ */ unsigned long sa_flags; void (*sa_restorer)(void); sigset_t sa_mask; -#endif /* __s390x__ */ }; #define sa_handler _u._sa_handler diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile index b06a6f79c1ec..84ea6225efb4 100644 --- a/arch/s390/kernel/Makefile +++ b/arch/s390/kernel/Makefile @@ -82,6 +82,9 @@ obj-$(CONFIG_FUNCTION_TRACER) += mcount.o ftrace.o obj-$(CONFIG_CRASH_DUMP) += crash_dump.o obj-$(CONFIG_UPROBES) += uprobes.o +obj-$(CONFIG_KEXEC_FILE) += machine_kexec_file.o kexec_image.o +obj-$(CONFIG_KEXEC_FILE) += kexec_elf.o + obj-$(CONFIG_PERF_EVENTS) += perf_event.o perf_cpum_cf.o perf_cpum_sf.o obj-$(CONFIG_PERF_EVENTS) += perf_cpum_cf_events.o perf_regs.o diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c index cfe2c45c5180..eb2a5c0443cd 100644 --- a/arch/s390/kernel/asm-offsets.c +++ b/arch/s390/kernel/asm-offsets.c @@ -10,6 +10,7 @@ #include <linux/kbuild.h> #include <linux/kvm_host.h> #include <linux/sched.h> +#include <linux/purgatory.h> #include <asm/idle.h> #include <asm/vdso.h> #include <asm/pgtable.h> @@ -204,5 +205,9 @@ int main(void) OFFSET(__GMAP_ASCE, gmap, asce); OFFSET(__SIE_PROG0C, kvm_s390_sie_block, prog0c); OFFSET(__SIE_PROG20, kvm_s390_sie_block, prog20); + /* kexec_sha_region */ + OFFSET(__KEXEC_SHA_REGION_START, kexec_sha_region, start); + OFFSET(__KEXEC_SHA_REGION_LEN, kexec_sha_region, len); + DEFINE(__KEXEC_SHA_REGION_SIZE, sizeof(struct kexec_sha_region)); return 0; } diff --git a/arch/s390/kernel/compat_wrapper.c b/arch/s390/kernel/compat_wrapper.c index 11e9d8b5c1b0..607c5e9fba3d 100644 --- a/arch/s390/kernel/compat_wrapper.c +++ b/arch/s390/kernel/compat_wrapper.c @@ -182,3 +182,4 @@ COMPAT_SYSCALL_WRAP6(copy_file_range, int, fd_in, loff_t __user *, off_in, int, COMPAT_SYSCALL_WRAP2(s390_guarded_storage, int, command, struct gs_cb *, gs_cb); COMPAT_SYSCALL_WRAP5(statx, int, dfd, const char __user *, path, unsigned, flags, unsigned, mask, struct statx __user *, buffer); COMPAT_SYSCALL_WRAP4(s390_sthyi, unsigned long, code, void __user *, info, u64 __user *, rc, unsigned long, flags); +COMPAT_SYSCALL_WRAP5(kexec_file_load, int, kernel_fd, int, initrd_fd, unsigned long, cmdline_len, const char __user *, cmdline_ptr, unsigned long, flags) diff --git a/arch/s390/kernel/kexec_elf.c b/arch/s390/kernel/kexec_elf.c new file mode 100644 index 000000000000..5a286b012043 --- /dev/null +++ b/arch/s390/kernel/kexec_elf.c @@ -0,0 +1,147 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * ELF loader for kexec_file_load system call. + * + * Copyright IBM Corp. 2018 + * + * Author(s): Philipp Rudo <prudo@linux.vnet.ibm.com> + */ + +#include <linux/errno.h> +#include <linux/kernel.h> +#include <linux/kexec.h> +#include <asm/setup.h> + +static int kexec_file_add_elf_kernel(struct kimage *image, + struct s390_load_data *data, + char *kernel, unsigned long kernel_len) +{ + struct kexec_buf buf; + const Elf_Ehdr *ehdr; + const Elf_Phdr *phdr; + int i, ret; + + ehdr = (Elf_Ehdr *)kernel; + buf.image = image; + + phdr = (void *)ehdr + ehdr->e_phoff; + for (i = 0; i < ehdr->e_phnum; i++, phdr++) { + if (phdr->p_type != PT_LOAD) + continue; + + buf.buffer = kernel + phdr->p_offset; + buf.bufsz = phdr->p_filesz; + + buf.mem = ALIGN(phdr->p_paddr, phdr->p_align); + buf.memsz = phdr->p_memsz; + + if (phdr->p_paddr == 0) { + data->kernel_buf = buf.buffer; + data->memsz += STARTUP_NORMAL_OFFSET; + + buf.buffer += STARTUP_NORMAL_OFFSET; + buf.bufsz -= STARTUP_NORMAL_OFFSET; + + buf.mem += STARTUP_NORMAL_OFFSET; + buf.memsz -= STARTUP_NORMAL_OFFSET; + } + + if (image->type == KEXEC_TYPE_CRASH) + buf.mem += crashk_res.start; + + ret = kexec_add_buffer(&buf); + if (ret) + return ret; + + data->memsz += buf.memsz; + } + + return 0; +} + +static void *s390_elf_load(struct kimage *image, + char *kernel, unsigned long kernel_len, + char *initrd, unsigned long initrd_len, + char *cmdline, unsigned long cmdline_len) +{ + struct s390_load_data data = {0}; + const Elf_Ehdr *ehdr; + const Elf_Phdr *phdr; + size_t size; + int i, ret; + + /* image->fobs->probe already checked for valid ELF magic number. */ + ehdr = (Elf_Ehdr *)kernel; + + if (ehdr->e_type != ET_EXEC || + ehdr->e_ident[EI_CLASS] != ELFCLASS64 || + !elf_check_arch(ehdr)) + return ERR_PTR(-EINVAL); + + if (!ehdr->e_phnum || ehdr->e_phentsize != sizeof(Elf_Phdr)) + return ERR_PTR(-EINVAL); + + size = ehdr->e_ehsize + ehdr->e_phoff; + size += ehdr->e_phentsize * ehdr->e_phnum; + if (size > kernel_len) + return ERR_PTR(-EINVAL); + + phdr = (void *)ehdr + ehdr->e_phoff; + size = ALIGN(size, phdr->p_align); + for (i = 0; i < ehdr->e_phnum; i++, phdr++) { + if (phdr->p_type == PT_INTERP) + return ERR_PTR(-EINVAL); + + if (phdr->p_offset > kernel_len) + return ERR_PTR(-EINVAL); + + size += ALIGN(phdr->p_filesz, phdr->p_align); + } + + if (size > kernel_len) + return ERR_PTR(-EINVAL); + + ret = kexec_file_add_elf_kernel(image, &data, kernel, kernel_len); + if (ret) + return ERR_PTR(ret); + + if (!data.memsz) + return ERR_PTR(-EINVAL); + + if (initrd) { + ret = kexec_file_add_initrd(image, &data, initrd, initrd_len); + if (ret) + return ERR_PTR(ret); + } + + ret = kexec_file_add_purgatory(image, &data); + if (ret) + return ERR_PTR(ret); + + return kexec_file_update_kernel(image, &data); +} + +static int s390_elf_probe(const char *buf, unsigned long len) +{ + const Elf_Ehdr *ehdr; + + if (len < sizeof(Elf_Ehdr)) + return -ENOEXEC; + + ehdr = (Elf_Ehdr *)buf; + + /* Only check the ELF magic number here and do proper validity check + * in the loader. Any check here that fails would send the erroneous + * ELF file to the image loader that does not care what it gets. + * (Most likely) causing behavior not intended by the user. + */ + if (memcmp(ehdr->e_ident, ELFMAG, SELFMAG) != 0) + return -ENOEXEC; + + return 0; +} + +const struct kexec_file_ops s390_kexec_elf_ops = { + .probe = s390_elf_probe, + .load = s390_elf_load, +}; diff --git a/arch/s390/kernel/kexec_image.c b/arch/s390/kernel/kexec_image.c new file mode 100644 index 000000000000..3800852595e8 --- /dev/null +++ b/arch/s390/kernel/kexec_image.c @@ -0,0 +1,76 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Image loader for kexec_file_load system call. + * + * Copyright IBM Corp. 2018 + * + * Author(s): Philipp Rudo <prudo@linux.vnet.ibm.com> + */ + +#include <linux/errno.h> +#include <linux/kernel.h> +#include <linux/kexec.h> +#include <asm/setup.h> + +static int kexec_file_add_image_kernel(struct kimage *image, + struct s390_load_data *data, + char *kernel, unsigned long kernel_len) +{ + struct kexec_buf buf; + int ret; + + buf.image = image; + + buf.buffer = kernel + STARTUP_NORMAL_OFFSET; + buf.bufsz = kernel_len - STARTUP_NORMAL_OFFSET; + + buf.mem = STARTUP_NORMAL_OFFSET; + if (image->type == KEXEC_TYPE_CRASH) + buf.mem += crashk_res.start; + buf.memsz = buf.bufsz; + + ret = kexec_add_buffer(&buf); + + data->kernel_buf = kernel; + data->memsz += buf.memsz + STARTUP_NORMAL_OFFSET; + + return ret; +} + +static void *s390_image_load(struct kimage *image, + char *kernel, unsigned long kernel_len, + char *initrd, unsigned long initrd_len, + char *cmdline, unsigned long cmdline_len) +{ + struct s390_load_data data = {0}; + int ret; + + ret = kexec_file_add_image_kernel(image, &data, kernel, kernel_len); + if (ret) + return ERR_PTR(ret); + + if (initrd) { + ret = kexec_file_add_initrd(image, &data, initrd, initrd_len); + if (ret) + return ERR_PTR(ret); + } + + ret = kexec_file_add_purgatory(image, &data); + if (ret) + return ERR_PTR(ret); + + return kexec_file_update_kernel(image, &data); +} + +static int s390_image_probe(const char *buf, unsigned long len) +{ + /* Can't reliably tell if an image is valid. Therefore give the + * user whatever he wants. + */ + return 0; +} + +const struct kexec_file_ops s390_kexec_image_ops = { + .probe = s390_image_probe, + .load = s390_image_load, +}; diff --git a/arch/s390/kernel/machine_kexec_file.c b/arch/s390/kernel/machine_kexec_file.c new file mode 100644 index 000000000000..f413f57f8d20 --- /dev/null +++ b/arch/s390/kernel/machine_kexec_file.c @@ -0,0 +1,245 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * s390 code for kexec_file_load system call + * + * Copyright IBM Corp. 2018 + * + * Author(s): Philipp Rudo <prudo@linux.vnet.ibm.com> + */ + +#include <linux/elf.h> +#include <linux/kexec.h> +#include <asm/setup.h> + +const struct kexec_file_ops * const kexec_file_loaders[] = { + &s390_kexec_elf_ops, + &s390_kexec_image_ops, + NULL, +}; + +int *kexec_file_update_kernel(struct kimage *image, + struct s390_load_data *data) +{ + unsigned long *loc; + + if (image->cmdline_buf_len >= ARCH_COMMAND_LINE_SIZE) + return ERR_PTR(-EINVAL); + + if (image->cmdline_buf_len) + memcpy(data->kernel_buf + COMMAND_LINE_OFFSET, + image->cmdline_buf, image->cmdline_buf_len); + + if (image->type == KEXEC_TYPE_CRASH) { + loc = (unsigned long *)(data->kernel_buf + OLDMEM_BASE_OFFSET); + *loc = crashk_res.start; + + loc = (unsigned long *)(data->kernel_buf + OLDMEM_SIZE_OFFSET); + *loc = crashk_res.end - crashk_res.start + 1; + } + + if (image->initrd_buf) { + loc = (unsigned long *)(data->kernel_buf + INITRD_START_OFFSET); + *loc = data->initrd_load_addr; + + loc = (unsigned long *)(data->kernel_buf + INITRD_SIZE_OFFSET); + *loc = image->initrd_buf_len; + } + + return NULL; +} + +static int kexec_file_update_purgatory(struct kimage *image) +{ + u64 entry, type; + int ret; + + if (image->type == KEXEC_TYPE_CRASH) { + entry = STARTUP_KDUMP_OFFSET; + type = KEXEC_TYPE_CRASH; + } else { + entry = STARTUP_NORMAL_OFFSET; + type = KEXEC_TYPE_DEFAULT; + } + + ret = kexec_purgatory_get_set_symbol(image, "kernel_entry", &entry, + sizeof(entry), false); + if (ret) + return ret; + + ret = kexec_purgatory_get_set_symbol(image, "kernel_type", &type, + sizeof(type), false); + if (ret) + return ret; + + if (image->type == KEXEC_TYPE_CRASH) { + u64 crash_size; + + ret = kexec_purgatory_get_set_symbol(image, "crash_start", + &crashk_res.start, + sizeof(crashk_res.start), + false); + if (ret) + return ret; + + crash_size = crashk_res.end - crashk_res.start + 1; + ret = kexec_purgatory_get_set_symbol(image, "crash_size", + &crash_size, + sizeof(crash_size), + false); + } + return ret; +} + +int kexec_file_add_purgatory(struct kimage *image, struct s390_load_data *data) +{ + struct kexec_buf buf; + int ret; + + buf.image = image; + + data->memsz = ALIGN(data->memsz, PAGE_SIZE); + buf.mem = data->memsz; + if (image->type == KEXEC_TYPE_CRASH) + buf.mem += crashk_res.start; + + ret = kexec_load_purgatory(image, &buf); + if (ret) + return ret; + + ret = kexec_file_update_purgatory(image); + return ret; +} + +int kexec_file_add_initrd(struct kimage *image, struct s390_load_data *data, + char *initrd, unsigned long initrd_len) +{ + struct kexec_buf buf; + int ret; + + buf.image = image; + + buf.buffer = initrd; + buf.bufsz = initrd_len; + + data->memsz = ALIGN(data->memsz, PAGE_SIZE); + buf.mem = data->memsz; + if (image->type == KEXEC_TYPE_CRASH) + buf.mem += crashk_res.start; + buf.memsz = buf.bufsz; + + data->initrd_load_addr = buf.mem; + data->memsz += buf.memsz; + + ret = kexec_add_buffer(&buf); + return ret; +} + +/* + * The kernel is loaded to a fixed location. Turn off kexec_locate_mem_hole + * and provide kbuf->mem by hand. + */ +int arch_kexec_walk_mem(struct kexec_buf *kbuf, + int (*func)(struct resource *, void *)) +{ + return 1; +} + +int arch_kexec_apply_relocations_add(struct purgatory_info *pi, + Elf_Shdr *section, + const Elf_Shdr *relsec, + const Elf_Shdr *symtab) +{ + Elf_Rela *relas; + int i; + + relas = (void *)pi->ehdr + relsec->sh_offset; + + for (i = 0; i < relsec->sh_size / sizeof(*relas); i++) { + const Elf_Sym *sym; /* symbol to relocate */ + unsigned long addr; /* final location after relocation */ + unsigned long val; /* relocated symbol value */ + void *loc; /* tmp location to modify */ + + sym = (void *)pi->ehdr + symtab->sh_offset; + sym += ELF64_R_SYM(relas[i].r_info); + + if (sym->st_shndx == SHN_UNDEF) + return -ENOEXEC; + + if (sym->st_shndx == SHN_COMMON) + return -ENOEXEC; + + if (sym->st_shndx >= pi->ehdr->e_shnum && + sym->st_shndx != SHN_ABS) + return -ENOEXEC; + + loc = pi->purgatory_buf; + loc += section->sh_offset; + loc += relas[i].r_offset; + + val = sym->st_value; + if (sym->st_shndx != SHN_ABS) + val += pi->sechdrs[sym->st_shndx].sh_addr; + val += relas[i].r_addend; + + addr = section->sh_addr + relas[i].r_offset; + + switch (ELF64_R_TYPE(relas[i].r_info)) { + case R_390_8: /* Direct 8 bit. */ + *(u8 *)loc = val; + break; + case R_390_12: /* Direct 12 bit. */ + *(u16 *)loc &= 0xf000; + *(u16 *)loc |= val & 0xfff; + break; + case R_390_16: /* Direct 16 bit. */ + *(u16 *)loc = val; + break; + case R_390_20: /* Direct 20 bit. */ + *(u32 *)loc &= 0xf00000ff; + *(u32 *)loc |= (val & 0xfff) << 16; /* DL */ + *(u32 *)loc |= (val & 0xff000) >> 4; /* DH */ + break; + case R_390_32: /* Direct 32 bit. */ + *(u32 *)loc = val; + break; + case R_390_64: /* Direct 64 bit. */ + *(u64 *)loc = val; + break; + case R_390_PC16: /* PC relative 16 bit. */ + *(u16 *)loc = (val - addr); + break; + case R_390_PC16DBL: /* PC relative 16 bit shifted by 1. */ + *(u16 *)loc = (val - addr) >> 1; + break; + case R_390_PC32DBL: /* PC relative 32 bit shifted by 1. */ + *(u32 *)loc = (val - addr) >> 1; + break; + case R_390_PC32: /* PC relative 32 bit. */ + *(u32 *)loc = (val - addr); + break; + case R_390_PC64: /* PC relative 64 bit. */ + *(u64 *)loc = (val - addr); + break; + default: + break; + } + } + return 0; +} + +int arch_kexec_kernel_image_probe(struct kimage *image, void *buf, + unsigned long buf_len) +{ + /* A kernel must be at least large enough to contain head.S. During + * load memory in head.S will be accessed, e.g. to register the next + * command line. If the next kernel were smaller the current kernel + * will panic at load. + * + * 0x11000 = sizeof(head.S) + */ + if (buf_len < 0x11000) + return -ENOEXEC; + + return kexec_image_probe_default(image, buf, buf_len); +} diff --git a/arch/s390/kernel/nospec-branch.c b/arch/s390/kernel/nospec-branch.c index f236ce8757e8..46d49a11663f 100644 --- a/arch/s390/kernel/nospec-branch.c +++ b/arch/s390/kernel/nospec-branch.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 #include <linux/module.h> #include <linux/device.h> +#include <linux/cpu.h> #include <asm/nospec-branch.h> static int __init nobp_setup_early(char *str) diff --git a/arch/s390/kernel/perf_cpum_cf_events.c b/arch/s390/kernel/perf_cpum_cf_events.c index c5bc3f209652..5ee27dc9a10c 100644 --- a/arch/s390/kernel/perf_cpum_cf_events.c +++ b/arch/s390/kernel/perf_cpum_cf_events.c @@ -583,6 +583,7 @@ __init const struct attribute_group **cpumf_cf_event_group(void) model = cpumcf_z13_pmu_event_attr; break; case 0x3906: + case 0x3907: model = cpumcf_z14_pmu_event_attr; break; default: diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index fc3b4aa185cc..d82a9ec64ea9 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -821,6 +821,7 @@ static int __init setup_hwcaps(void) strcpy(elf_platform, "z13"); break; case 0x3906: + case 0x3907: strcpy(elf_platform, "z14"); break; } diff --git a/arch/s390/kernel/syscalls/syscall.tbl b/arch/s390/kernel/syscalls/syscall.tbl index b38d48464368..8b210ead7956 100644 --- a/arch/s390/kernel/syscalls/syscall.tbl +++ b/arch/s390/kernel/syscalls/syscall.tbl @@ -388,3 +388,4 @@ 378 common s390_guarded_storage sys_s390_guarded_storage compat_sys_s390_guarded_storage 379 common statx sys_statx compat_sys_statx 380 common s390_sthyi sys_s390_sthyi compat_sys_s390_sthyi +381 common kexec_file_load sys_kexec_file_load compat_sys_kexec_file_load diff --git a/arch/s390/purgatory/.gitignore b/arch/s390/purgatory/.gitignore new file mode 100644 index 000000000000..e9e66f178a6d --- /dev/null +++ b/arch/s390/purgatory/.gitignore @@ -0,0 +1,2 @@ +kexec-purgatory.c +purgatory.ro diff --git a/arch/s390/purgatory/Makefile b/arch/s390/purgatory/Makefile new file mode 100644 index 000000000000..e9525bc1b4a6 --- /dev/null +++ b/arch/s390/purgatory/Makefile @@ -0,0 +1,37 @@ +# SPDX-License-Identifier: GPL-2.0 + +OBJECT_FILES_NON_STANDARD := y + +purgatory-y := head.o purgatory.o string.o sha256.o mem.o + +targets += $(purgatory-y) purgatory.ro kexec-purgatory.c +PURGATORY_OBJS = $(addprefix $(obj)/,$(purgatory-y)) + +$(obj)/sha256.o: $(srctree)/lib/sha256.c + $(call if_changed_rule,cc_o_c) + +$(obj)/mem.o: $(srctree)/arch/s390/lib/mem.S + $(call if_changed_rule,as_o_S) + +$(obj)/string.o: $(srctree)/arch/s390/lib/string.c + $(call if_changed_rule,cc_o_c) + +LDFLAGS_purgatory.ro := -e purgatory_start -r --no-undefined -nostdlib +LDFLAGS_purgatory.ro += -z nodefaultlib +KBUILD_CFLAGS := -fno-strict-aliasing -Wall -Wstrict-prototypes +KBUILD_CFLAGS += -Wno-pointer-sign -Wno-sign-compare +KBUILD_CFLAGS += -fno-zero-initialized-in-bss -fno-builtin -ffreestanding +KBUILD_CFLAGS += -c -MD -Os -m64 +KBUILD_CFLAGS += $(call cc-option,-fno-PIE) + +$(obj)/purgatory.ro: $(PURGATORY_OBJS) FORCE + $(call if_changed,ld) + +CMD_BIN2C = $(objtree)/scripts/basic/bin2c +quiet_cmd_bin2c = BIN2C $@ + cmd_bin2c = $(CMD_BIN2C) kexec_purgatory < $< > $@ + +$(obj)/kexec-purgatory.c: $(obj)/purgatory.ro FORCE + $(call if_changed,bin2c) + +obj-$(CONFIG_ARCH_HAS_KEXEC_PURGATORY) += kexec-purgatory.o diff --git a/arch/s390/purgatory/head.S b/arch/s390/purgatory/head.S new file mode 100644 index 000000000000..660c96a05a9b --- /dev/null +++ b/arch/s390/purgatory/head.S @@ -0,0 +1,279 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Purgatory setup code + * + * Copyright IBM Corp. 2018 + * + * Author(s): Philipp Rudo <prudo@linux.vnet.ibm.com> + */ + +#include <linux/linkage.h> +#include <asm/asm-offsets.h> +#include <asm/page.h> +#include <asm/sigp.h> + +/* The purgatory is the code running between two kernels. It's main purpose + * is to verify that the next kernel was not corrupted after load and to + * start it. + * + * If the next kernel is a crash kernel there are some peculiarities to + * consider: + * + * First the purgatory is called twice. Once only to verify the + * sha digest. So if the crash kernel got corrupted the old kernel can try + * to trigger a stand-alone dumper. And once to actually load the crash kernel. + * + * Second the purgatory also has to swap the crash memory region with its + * destination at address 0. As the purgatory is part of crash memory this + * requires some finesse. The tactic here is that the purgatory first copies + * itself to the end of the destination and then swaps the rest of the + * memory running from there. + */ + +#define bufsz purgatory_end-stack + +.macro MEMCPY dst,src,len + lgr %r0,\dst + lgr %r1,\len + lgr %r2,\src + lgr %r3,\len + +20: mvcle %r0,%r2,0 + jo 20b +.endm + +.macro MEMSWAP dst,src,buf,len +10: cghi \len,bufsz + jh 11f + lgr %r4,\len + j 12f +11: lghi %r4,bufsz + +12: MEMCPY \buf,\dst,%r4 + MEMCPY \dst,\src,%r4 + MEMCPY \src,\buf,%r4 + + agr \dst,%r4 + agr \src,%r4 + sgr \len,%r4 + + cghi \len,0 + jh 10b +.endm + +.macro START_NEXT_KERNEL base + lg %r4,kernel_entry-\base(%r13) + lg %r5,load_psw_mask-\base(%r13) + ogr %r4,%r5 + stg %r4,0(%r0) + + xgr %r0,%r0 + diag %r0,%r0,0x308 +.endm + +.text +.align PAGE_SIZE +ENTRY(purgatory_start) + /* The purgatory might be called after a diag308 so better set + * architecture and addressing mode. + */ + lhi %r1,1 + sigp %r1,%r0,SIGP_SET_ARCHITECTURE + sam64 + + larl %r5,gprregs + stmg %r6,%r15,0(%r5) + + basr %r13,0 +.base_crash: + + /* Setup stack */ + larl %r15,purgatory_end + aghi %r15,-160 + + /* If the next kernel is KEXEC_TYPE_CRASH the purgatory is called + * directly with a flag passed in %r2 whether the purgatory shall do + * checksum verification only (%r2 = 0 -> verification only). + * + * Check now and preserve over C function call by storing in + * %r10 whith + * 1 -> checksum verification only + * 0 -> load new kernel + */ + lghi %r10,0 + lg %r11,kernel_type-.base_crash(%r13) + cghi %r11,1 /* KEXEC_TYPE_CRASH */ + jne .do_checksum_verification + cghi %r2,0 /* checksum verification only */ + jne .do_checksum_verification + lghi %r10,1 + +.do_checksum_verification: + brasl %r14,verify_sha256_digest + + cghi %r10,1 /* checksum verification only */ + je .return_old_kernel + cghi %r2,0 /* checksum match */ + jne .disabled_wait + + /* If the next kernel is a crash kernel the purgatory has to swap + * the mem regions first. + */ + cghi %r11,1 /* KEXEC_TYPE_CRASH */ + je .start_crash_kernel + + /* start normal kernel */ + START_NEXT_KERNEL .base_crash + +.return_old_kernel: + lmg %r6,%r15,gprregs-.base_crash(%r13) + br %r14 + +.disabled_wait: + lpswe disabled_wait_psw-.base_crash(%r13) + +.start_crash_kernel: + /* Location of purgatory_start in crash memory */ + lgr %r8,%r13 + aghi %r8,-(.base_crash-purgatory_start) + + /* Destination for this code i.e. end of memory to be swapped. */ + lg %r9,crash_size-.base_crash(%r13) + aghi %r9,-(purgatory_end-purgatory_start) + + /* Destination in crash memory, i.e. same as r9 but in crash memory. */ + lg %r10,crash_start-.base_crash(%r13) + agr %r10,%r9 + + /* Buffer location (in crash memory) and size. As the purgatory is + * behind the point of no return it can re-use the stack as buffer. + */ + lghi %r11,bufsz + larl %r12,stack + + MEMCPY %r12,%r9,%r11 /* dst -> (crash) buf */ + MEMCPY %r9,%r8,%r11 /* self -> dst */ + + /* Jump to new location. */ + lgr %r7,%r9 + aghi %r7,.jump_to_dst-purgatory_start + br %r7 + +.jump_to_dst: + basr %r13,0 +.base_dst: + + /* clear buffer */ + MEMCPY %r12,%r10,%r11 /* (crash) buf -> (crash) dst */ + + /* Load new buffer location after jump */ + larl %r7,stack + aghi %r10,stack-purgatory_start + MEMCPY %r10,%r7,%r11 /* (new) buf -> (crash) buf */ + + /* Now the code is set up to run from its designated location. Start + * swapping the rest of crash memory now. + * + * The registers will be used as follow: + * + * %r0-%r4 reserved for macros defined above + * %r5-%r6 tmp registers + * %r7 pointer to current struct sha region + * %r8 index to iterate over all sha regions + * %r9 pointer in crash memory + * %r10 pointer in old kernel + * %r11 total size (still) to be moved + * %r12 pointer to buffer + */ + lgr %r12,%r7 + lgr %r11,%r9 + lghi %r10,0 + lg %r9,crash_start-.base_dst(%r13) + lghi %r8,16 /* KEXEC_SEGMENTS_MAX */ + larl %r7,purgatory_sha_regions + + j .loop_first + + /* Loop over all purgatory_sha_regions. */ +.loop_next: + aghi %r8,-1 + cghi %r8,0 + je .loop_out + + aghi %r7,__KEXEC_SHA_REGION_SIZE + +.loop_first: + lg %r5,__KEXEC_SHA_REGION_START(%r7) + cghi %r5,0 + je .loop_next + + /* Copy [end last sha region, start current sha region) */ + /* Note: kexec_sha_region->start points in crash memory */ + sgr %r5,%r9 + MEMCPY %r9,%r10,%r5 + + agr %r9,%r5 + agr %r10,%r5 + sgr %r11,%r5 + + /* Swap sha region */ + lg %r6,__KEXEC_SHA_REGION_LEN(%r7) + MEMSWAP %r9,%r10,%r12,%r6 + sg %r11,__KEXEC_SHA_REGION_LEN(%r7) + j .loop_next + +.loop_out: + /* Copy rest of crash memory */ + MEMCPY %r9,%r10,%r11 + + /* start crash kernel */ + START_NEXT_KERNEL .base_dst + + +load_psw_mask: + .long 0x00080000,0x80000000 + + .align 8 +disabled_wait_psw: + .quad 0x0002000180000000 + .quad 0x0000000000000000 + .do_checksum_verification + +gprregs: + .rept 10 + .quad 0 + .endr + +purgatory_sha256_digest: + .global purgatory_sha256_digest + .rept 32 /* SHA256_DIGEST_SIZE */ + .byte 0 + .endr + +purgatory_sha_regions: + .global purgatory_sha_regions + .rept 16 * __KEXEC_SHA_REGION_SIZE /* KEXEC_SEGMENTS_MAX */ + .byte 0 + .endr + +kernel_entry: + .global kernel_entry + .quad 0 + +kernel_type: + .global kernel_type + .quad 0 + +crash_start: + .global crash_start + .quad 0 + +crash_size: + .global crash_size + .quad 0 + + .align PAGE_SIZE +stack: + /* The buffer to move this code must be as big as the code. */ + .skip stack-purgatory_start + .align PAGE_SIZE +purgatory_end: diff --git a/arch/s390/purgatory/purgatory.c b/arch/s390/purgatory/purgatory.c new file mode 100644 index 000000000000..4e2beb3c29b7 --- /dev/null +++ b/arch/s390/purgatory/purgatory.c @@ -0,0 +1,42 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Purgatory code running between two kernels. + * + * Copyright IBM Corp. 2018 + * + * Author(s): Philipp Rudo <prudo@linux.vnet.ibm.com> + */ + +#include <linux/kexec.h> +#include <linux/sha256.h> +#include <linux/string.h> +#include <asm/purgatory.h> + +struct kexec_sha_region purgatory_sha_regions[KEXEC_SEGMENT_MAX]; +u8 purgatory_sha256_digest[SHA256_DIGEST_SIZE]; + +u64 kernel_entry; +u64 kernel_type; + +u64 crash_start; +u64 crash_size; + +int verify_sha256_digest(void) +{ + struct kexec_sha_region *ptr, *end; + u8 digest[SHA256_DIGEST_SIZE]; + struct sha256_state sctx; + + sha256_init(&sctx); + end = purgatory_sha_regions + ARRAY_SIZE(purgatory_sha_regions); + + for (ptr = purgatory_sha_regions; ptr < end; ptr++) + sha256_update(&sctx, (uint8_t *)(ptr->start), ptr->len); + + sha256_final(&sctx, digest); + + if (memcmp(digest, purgatory_sha256_digest, sizeof(digest))) + return 1; + + return 0; +} diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 949c977bc4c9..c25775fad4ed 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1013,6 +1013,7 @@ struct kvm_x86_ops { bool (*has_wbinvd_exit)(void); + u64 (*read_l1_tsc_offset)(struct kvm_vcpu *vcpu); void (*write_tsc_offset)(struct kvm_vcpu *vcpu, u64 offset); void (*get_exit_info)(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2); diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c index d41d896481b8..c9b14020f4dd 100644 --- a/arch/x86/kernel/ldt.c +++ b/arch/x86/kernel/ldt.c @@ -166,7 +166,7 @@ map_ldt_struct(struct mm_struct *mm, struct ldt_struct *ldt, int slot) */ pte_prot = __pgprot(__PAGE_KERNEL_RO & ~_PAGE_GLOBAL); /* Filter out unsuppored __PAGE_KERNEL* bits: */ - pgprot_val(pte_prot) |= __supported_pte_mask; + pgprot_val(pte_prot) &= __supported_pte_mask; pte = pfn_pte(pfn, pte_prot); set_pte_at(mm, va, ptep, pte); pte_unmap_unlock(ptep, ptl); diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index b58787daf9f8..1fc05e428aba 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -1423,12 +1423,23 @@ static void init_sys_seg(struct vmcb_seg *seg, uint32_t type) seg->base = 0; } +static u64 svm_read_l1_tsc_offset(struct kvm_vcpu *vcpu) +{ + struct vcpu_svm *svm = to_svm(vcpu); + + if (is_guest_mode(vcpu)) + return svm->nested.hsave->control.tsc_offset; + + return vcpu->arch.tsc_offset; +} + static void svm_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset) { struct vcpu_svm *svm = to_svm(vcpu); u64 g_tsc_offset = 0; if (is_guest_mode(vcpu)) { + /* Write L1's TSC offset. */ g_tsc_offset = svm->vmcb->control.tsc_offset - svm->nested.hsave->control.tsc_offset; svm->nested.hsave->control.tsc_offset = offset; @@ -3322,6 +3333,7 @@ static int nested_svm_vmexit(struct vcpu_svm *svm) /* Restore the original control entries */ copy_vmcb_control_area(vmcb, hsave); + svm->vcpu.arch.tsc_offset = svm->vmcb->control.tsc_offset; kvm_clear_exception_queue(&svm->vcpu); kvm_clear_interrupt_queue(&svm->vcpu); @@ -3482,10 +3494,12 @@ static void enter_svm_guest_mode(struct vcpu_svm *svm, u64 vmcb_gpa, /* We don't want to see VMMCALLs from a nested guest */ clr_intercept(svm, INTERCEPT_VMMCALL); + svm->vcpu.arch.tsc_offset += nested_vmcb->control.tsc_offset; + svm->vmcb->control.tsc_offset = svm->vcpu.arch.tsc_offset; + svm->vmcb->control.virt_ext = nested_vmcb->control.virt_ext; svm->vmcb->control.int_vector = nested_vmcb->control.int_vector; svm->vmcb->control.int_state = nested_vmcb->control.int_state; - svm->vmcb->control.tsc_offset += nested_vmcb->control.tsc_offset; svm->vmcb->control.event_inj = nested_vmcb->control.event_inj; svm->vmcb->control.event_inj_err = nested_vmcb->control.event_inj_err; @@ -4035,12 +4049,6 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) struct vcpu_svm *svm = to_svm(vcpu); switch (msr_info->index) { - case MSR_IA32_TSC: { - msr_info->data = svm->vmcb->control.tsc_offset + - kvm_scale_tsc(vcpu, rdtsc()); - - break; - } case MSR_STAR: msr_info->data = svm->vmcb->save.star; break; @@ -4193,9 +4201,6 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr) svm->vmcb->save.g_pat = data; mark_dirty(svm->vmcb, VMCB_NPT); break; - case MSR_IA32_TSC: - kvm_write_tsc(vcpu, msr); - break; case MSR_IA32_SPEC_CTRL: if (!msr->host_initiated && !guest_cpuid_has(vcpu, X86_FEATURE_IBRS)) @@ -5265,9 +5270,8 @@ static int svm_update_pi_irte(struct kvm *kvm, unsigned int host_irq, } if (!ret && svm) { - trace_kvm_pi_irte_update(svm->vcpu.vcpu_id, - host_irq, e->gsi, - vcpu_info.vector, + trace_kvm_pi_irte_update(host_irq, svm->vcpu.vcpu_id, + e->gsi, vcpu_info.vector, vcpu_info.pi_desc_addr, set); } @@ -7102,6 +7106,7 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = { .has_wbinvd_exit = svm_has_wbinvd_exit, + .read_l1_tsc_offset = svm_read_l1_tsc_offset, .write_tsc_offset = svm_write_tsc_offset, .set_tdp_cr3 = set_tdp_cr3, diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index aafcc9881e88..aa66ccd6ed6c 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -2880,18 +2880,15 @@ static void setup_msrs(struct vcpu_vmx *vmx) vmx_update_msr_bitmap(&vmx->vcpu); } -/* - * reads and returns guest's timestamp counter "register" - * guest_tsc = (host_tsc * tsc multiplier) >> 48 + tsc_offset - * -- Intel TSC Scaling for Virtualization White Paper, sec 1.3 - */ -static u64 guest_read_tsc(struct kvm_vcpu *vcpu) +static u64 vmx_read_l1_tsc_offset(struct kvm_vcpu *vcpu) { - u64 host_tsc, tsc_offset; + struct vmcs12 *vmcs12 = get_vmcs12(vcpu); - host_tsc = rdtsc(); - tsc_offset = vmcs_read64(TSC_OFFSET); - return kvm_scale_tsc(vcpu, host_tsc) + tsc_offset; + if (is_guest_mode(vcpu) && + (vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_TSC_OFFSETING)) + return vcpu->arch.tsc_offset - vmcs12->tsc_offset; + + return vcpu->arch.tsc_offset; } /* @@ -3524,9 +3521,6 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) #endif case MSR_EFER: return kvm_get_msr_common(vcpu, msr_info); - case MSR_IA32_TSC: - msr_info->data = guest_read_tsc(vcpu); - break; case MSR_IA32_SPEC_CTRL: if (!msr_info->host_initiated && !guest_cpuid_has(vcpu, X86_FEATURE_IBRS) && @@ -3646,9 +3640,6 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) return 1; vmcs_write64(GUEST_BNDCFGS, data); break; - case MSR_IA32_TSC: - kvm_write_tsc(vcpu, msr_info); - break; case MSR_IA32_SPEC_CTRL: if (!msr_info->host_initiated && !guest_cpuid_has(vcpu, X86_FEATURE_IBRS) && @@ -10608,6 +10599,16 @@ static inline bool nested_vmx_prepare_msr_bitmap(struct kvm_vcpu *vcpu, return true; } +static int nested_vmx_check_apic_access_controls(struct kvm_vcpu *vcpu, + struct vmcs12 *vmcs12) +{ + if (nested_cpu_has2(vmcs12, SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES) && + !page_address_valid(vcpu, vmcs12->apic_access_addr)) + return -EINVAL; + else + return 0; +} + static int nested_vmx_check_apicv_controls(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) { @@ -11176,11 +11177,8 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, vmcs_write64(GUEST_IA32_PAT, vmx->vcpu.arch.pat); } - if (vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_TSC_OFFSETING) - vmcs_write64(TSC_OFFSET, - vcpu->arch.tsc_offset + vmcs12->tsc_offset); - else - vmcs_write64(TSC_OFFSET, vcpu->arch.tsc_offset); + vmcs_write64(TSC_OFFSET, vcpu->arch.tsc_offset); + if (kvm_has_tsc_control) decache_tsc_multiplier(vmx); @@ -11299,6 +11297,9 @@ static int check_vmentry_prereqs(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) if (nested_vmx_check_msr_bitmap_controls(vcpu, vmcs12)) return VMXERR_ENTRY_INVALID_CONTROL_FIELD; + if (nested_vmx_check_apic_access_controls(vcpu, vmcs12)) + return VMXERR_ENTRY_INVALID_CONTROL_FIELD; + if (nested_vmx_check_tpr_shadow_controls(vcpu, vmcs12)) return VMXERR_ENTRY_INVALID_CONTROL_FIELD; @@ -11420,6 +11421,7 @@ static int enter_vmx_non_root_mode(struct kvm_vcpu *vcpu, bool from_vmentry) struct vmcs12 *vmcs12 = get_vmcs12(vcpu); u32 msr_entry_idx; u32 exit_qual; + int r; enter_guest_mode(vcpu); @@ -11429,26 +11431,21 @@ static int enter_vmx_non_root_mode(struct kvm_vcpu *vcpu, bool from_vmentry) vmx_switch_vmcs(vcpu, &vmx->nested.vmcs02); vmx_segment_cache_clear(vmx); - if (prepare_vmcs02(vcpu, vmcs12, from_vmentry, &exit_qual)) { - leave_guest_mode(vcpu); - vmx_switch_vmcs(vcpu, &vmx->vmcs01); - nested_vmx_entry_failure(vcpu, vmcs12, - EXIT_REASON_INVALID_STATE, exit_qual); - return 1; - } + if (vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_TSC_OFFSETING) + vcpu->arch.tsc_offset += vmcs12->tsc_offset; + + r = EXIT_REASON_INVALID_STATE; + if (prepare_vmcs02(vcpu, vmcs12, from_vmentry, &exit_qual)) + goto fail; nested_get_vmcs12_pages(vcpu, vmcs12); + r = EXIT_REASON_MSR_LOAD_FAIL; msr_entry_idx = nested_vmx_load_msr(vcpu, vmcs12->vm_entry_msr_load_addr, vmcs12->vm_entry_msr_load_count); - if (msr_entry_idx) { - leave_guest_mode(vcpu); - vmx_switch_vmcs(vcpu, &vmx->vmcs01); - nested_vmx_entry_failure(vcpu, vmcs12, - EXIT_REASON_MSR_LOAD_FAIL, msr_entry_idx); - return 1; - } + if (msr_entry_idx) + goto fail; /* * Note no nested_vmx_succeed or nested_vmx_fail here. At this point @@ -11457,6 +11454,14 @@ static int enter_vmx_non_root_mode(struct kvm_vcpu *vcpu, bool from_vmentry) * the success flag) when L2 exits (see nested_vmx_vmexit()). */ return 0; + +fail: + if (vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_TSC_OFFSETING) + vcpu->arch.tsc_offset -= vmcs12->tsc_offset; + leave_guest_mode(vcpu); + vmx_switch_vmcs(vcpu, &vmx->vmcs01); + nested_vmx_entry_failure(vcpu, vmcs12, r, exit_qual); + return 1; } /* @@ -12028,6 +12033,9 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason, leave_guest_mode(vcpu); + if (vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_TSC_OFFSETING) + vcpu->arch.tsc_offset -= vmcs12->tsc_offset; + if (likely(!vmx->fail)) { if (exit_reason == -1) sync_vmcs12(vcpu, vmcs12); @@ -12224,10 +12232,16 @@ static inline int u64_shl_div_u64(u64 a, unsigned int shift, static int vmx_set_hv_timer(struct kvm_vcpu *vcpu, u64 guest_deadline_tsc) { - struct vcpu_vmx *vmx = to_vmx(vcpu); - u64 tscl = rdtsc(); - u64 guest_tscl = kvm_read_l1_tsc(vcpu, tscl); - u64 delta_tsc = max(guest_deadline_tsc, guest_tscl) - guest_tscl; + struct vcpu_vmx *vmx; + u64 tscl, guest_tscl, delta_tsc; + + if (kvm_mwait_in_guest(vcpu->kvm)) + return -EOPNOTSUPP; + + vmx = to_vmx(vcpu); + tscl = rdtsc(); + guest_tscl = kvm_read_l1_tsc(vcpu, tscl); + delta_tsc = max(guest_deadline_tsc, guest_tscl) - guest_tscl; /* Convert to host delta tsc if tsc scaling is enabled */ if (vcpu->arch.tsc_scaling_ratio != kvm_default_tsc_scaling_ratio && @@ -12533,7 +12547,7 @@ static int vmx_update_pi_irte(struct kvm *kvm, unsigned int host_irq, vcpu_info.pi_desc_addr = __pa(vcpu_to_pi_desc(vcpu)); vcpu_info.vector = irq.vector; - trace_kvm_pi_irte_update(vcpu->vcpu_id, host_irq, e->gsi, + trace_kvm_pi_irte_update(host_irq, vcpu->vcpu_id, e->gsi, vcpu_info.vector, vcpu_info.pi_desc_addr, set); if (set) @@ -12712,6 +12726,7 @@ static struct kvm_x86_ops vmx_x86_ops __ro_after_init = { .has_wbinvd_exit = cpu_has_vmx_wbinvd_exit, + .read_l1_tsc_offset = vmx_read_l1_tsc_offset, .write_tsc_offset = vmx_write_tsc_offset, .set_tdp_cr3 = vmx_set_cr3, diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index b2ff74b12ec4..51ecd381793b 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1490,7 +1490,7 @@ static void kvm_track_tsc_matching(struct kvm_vcpu *vcpu) static void update_ia32_tsc_adjust_msr(struct kvm_vcpu *vcpu, s64 offset) { - u64 curr_offset = vcpu->arch.tsc_offset; + u64 curr_offset = kvm_x86_ops->read_l1_tsc_offset(vcpu); vcpu->arch.ia32_tsc_adjust_msr += offset - curr_offset; } @@ -1532,7 +1532,9 @@ static u64 kvm_compute_tsc_offset(struct kvm_vcpu *vcpu, u64 target_tsc) u64 kvm_read_l1_tsc(struct kvm_vcpu *vcpu, u64 host_tsc) { - return vcpu->arch.tsc_offset + kvm_scale_tsc(vcpu, host_tsc); + u64 tsc_offset = kvm_x86_ops->read_l1_tsc_offset(vcpu); + + return tsc_offset + kvm_scale_tsc(vcpu, host_tsc); } EXPORT_SYMBOL_GPL(kvm_read_l1_tsc); @@ -2362,6 +2364,9 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) return 1; vcpu->arch.smbase = data; break; + case MSR_IA32_TSC: + kvm_write_tsc(vcpu, msr_info); + break; case MSR_SMI_COUNT: if (!msr_info->host_initiated) return 1; @@ -2605,6 +2610,9 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) case MSR_IA32_UCODE_REV: msr_info->data = vcpu->arch.microcode_version; break; + case MSR_IA32_TSC: + msr_info->data = kvm_scale_tsc(vcpu, rdtsc()) + vcpu->arch.tsc_offset; + break; case MSR_MTRRcap: case 0x200 ... 0x2ff: return kvm_mtrr_get_msr(vcpu, msr_info->index, &msr_info->data); @@ -2819,7 +2827,8 @@ out: static inline bool kvm_can_mwait_in_guest(void) { return boot_cpu_has(X86_FEATURE_MWAIT) && - !boot_cpu_has_bug(X86_BUG_MONITOR); + !boot_cpu_has_bug(X86_BUG_MONITOR) && + boot_cpu_has(X86_FEATURE_ARAT); } int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index 07dc5419bd63..8e8b04cc569a 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -732,6 +732,7 @@ static struct rbd_client *rbd_client_find(struct ceph_options *ceph_opts) */ enum { Opt_queue_depth, + Opt_lock_timeout, Opt_last_int, /* int args above */ Opt_last_string, @@ -740,11 +741,13 @@ enum { Opt_read_write, Opt_lock_on_read, Opt_exclusive, + Opt_notrim, Opt_err }; static match_table_t rbd_opts_tokens = { {Opt_queue_depth, "queue_depth=%d"}, + {Opt_lock_timeout, "lock_timeout=%d"}, /* int args above */ /* string args above */ {Opt_read_only, "read_only"}, @@ -753,20 +756,25 @@ static match_table_t rbd_opts_tokens = { {Opt_read_write, "rw"}, /* Alternate spelling */ {Opt_lock_on_read, "lock_on_read"}, {Opt_exclusive, "exclusive"}, + {Opt_notrim, "notrim"}, {Opt_err, NULL} }; struct rbd_options { int queue_depth; + unsigned long lock_timeout; bool read_only; bool lock_on_read; bool exclusive; + bool trim; }; #define RBD_QUEUE_DEPTH_DEFAULT BLKDEV_MAX_RQ +#define RBD_LOCK_TIMEOUT_DEFAULT 0 /* no timeout */ #define RBD_READ_ONLY_DEFAULT false #define RBD_LOCK_ON_READ_DEFAULT false #define RBD_EXCLUSIVE_DEFAULT false +#define RBD_TRIM_DEFAULT true static int parse_rbd_opts_token(char *c, void *private) { @@ -796,6 +804,14 @@ static int parse_rbd_opts_token(char *c, void *private) } rbd_opts->queue_depth = intval; break; + case Opt_lock_timeout: + /* 0 is "wait forever" (i.e. infinite timeout) */ + if (intval < 0 || intval > INT_MAX / 1000) { + pr_err("lock_timeout out of range\n"); + return -EINVAL; + } + rbd_opts->lock_timeout = msecs_to_jiffies(intval * 1000); + break; case Opt_read_only: rbd_opts->read_only = true; break; @@ -808,6 +824,9 @@ static int parse_rbd_opts_token(char *c, void *private) case Opt_exclusive: rbd_opts->exclusive = true; break; + case Opt_notrim: + rbd_opts->trim = false; + break; default: /* libceph prints "bad option" msg */ return -EINVAL; @@ -1392,7 +1411,7 @@ static bool rbd_img_is_write(struct rbd_img_request *img_req) case OBJ_OP_DISCARD: return true; default: - rbd_assert(0); + BUG(); } } @@ -2466,7 +2485,7 @@ again: } return false; default: - rbd_assert(0); + BUG(); } } @@ -2494,7 +2513,7 @@ static bool __rbd_obj_handle_request(struct rbd_obj_request *obj_req) } return false; default: - rbd_assert(0); + BUG(); } } @@ -3533,9 +3552,22 @@ static int rbd_obj_method_sync(struct rbd_device *rbd_dev, /* * lock_rwsem must be held for read */ -static void rbd_wait_state_locked(struct rbd_device *rbd_dev) +static int rbd_wait_state_locked(struct rbd_device *rbd_dev, bool may_acquire) { DEFINE_WAIT(wait); + unsigned long timeout; + int ret = 0; + + if (test_bit(RBD_DEV_FLAG_BLACKLISTED, &rbd_dev->flags)) + return -EBLACKLISTED; + + if (rbd_dev->lock_state == RBD_LOCK_STATE_LOCKED) + return 0; + + if (!may_acquire) { + rbd_warn(rbd_dev, "exclusive lock required"); + return -EROFS; + } do { /* @@ -3547,12 +3579,22 @@ static void rbd_wait_state_locked(struct rbd_device *rbd_dev) prepare_to_wait_exclusive(&rbd_dev->lock_waitq, &wait, TASK_UNINTERRUPTIBLE); up_read(&rbd_dev->lock_rwsem); - schedule(); + timeout = schedule_timeout(ceph_timeout_jiffies( + rbd_dev->opts->lock_timeout)); down_read(&rbd_dev->lock_rwsem); - } while (rbd_dev->lock_state != RBD_LOCK_STATE_LOCKED && - !test_bit(RBD_DEV_FLAG_BLACKLISTED, &rbd_dev->flags)); + if (test_bit(RBD_DEV_FLAG_BLACKLISTED, &rbd_dev->flags)) { + ret = -EBLACKLISTED; + break; + } + if (!timeout) { + rbd_warn(rbd_dev, "timed out waiting for lock"); + ret = -ETIMEDOUT; + break; + } + } while (rbd_dev->lock_state != RBD_LOCK_STATE_LOCKED); finish_wait(&rbd_dev->lock_waitq, &wait); + return ret; } static void rbd_queue_workfn(struct work_struct *work) @@ -3638,19 +3680,10 @@ static void rbd_queue_workfn(struct work_struct *work) (op_type != OBJ_OP_READ || rbd_dev->opts->lock_on_read); if (must_be_locked) { down_read(&rbd_dev->lock_rwsem); - if (rbd_dev->lock_state != RBD_LOCK_STATE_LOCKED && - !test_bit(RBD_DEV_FLAG_BLACKLISTED, &rbd_dev->flags)) { - if (rbd_dev->opts->exclusive) { - rbd_warn(rbd_dev, "exclusive lock required"); - result = -EROFS; - goto err_unlock; - } - rbd_wait_state_locked(rbd_dev); - } - if (test_bit(RBD_DEV_FLAG_BLACKLISTED, &rbd_dev->flags)) { - result = -EBLACKLISTED; + result = rbd_wait_state_locked(rbd_dev, + !rbd_dev->opts->exclusive); + if (result) goto err_unlock; - } } img_request = rbd_img_request_create(rbd_dev, op_type, snapc); @@ -3902,7 +3935,8 @@ static int rbd_init_disk(struct rbd_device *rbd_dev) { struct gendisk *disk; struct request_queue *q; - u64 segment_size; + unsigned int objset_bytes = + rbd_dev->layout.object_size * rbd_dev->layout.stripe_count; int err; /* create gendisk info */ @@ -3942,20 +3976,19 @@ static int rbd_init_disk(struct rbd_device *rbd_dev) blk_queue_flag_set(QUEUE_FLAG_NONROT, q); /* QUEUE_FLAG_ADD_RANDOM is off by default for blk-mq */ - /* set io sizes to object size */ - segment_size = rbd_obj_bytes(&rbd_dev->header); - blk_queue_max_hw_sectors(q, segment_size / SECTOR_SIZE); + blk_queue_max_hw_sectors(q, objset_bytes >> SECTOR_SHIFT); q->limits.max_sectors = queue_max_hw_sectors(q); blk_queue_max_segments(q, USHRT_MAX); blk_queue_max_segment_size(q, UINT_MAX); - blk_queue_io_min(q, segment_size); - blk_queue_io_opt(q, segment_size); + blk_queue_io_min(q, objset_bytes); + blk_queue_io_opt(q, objset_bytes); - /* enable the discard support */ - blk_queue_flag_set(QUEUE_FLAG_DISCARD, q); - q->limits.discard_granularity = segment_size; - blk_queue_max_discard_sectors(q, segment_size / SECTOR_SIZE); - blk_queue_max_write_zeroes_sectors(q, segment_size / SECTOR_SIZE); + if (rbd_dev->opts->trim) { + blk_queue_flag_set(QUEUE_FLAG_DISCARD, q); + q->limits.discard_granularity = objset_bytes; + blk_queue_max_discard_sectors(q, objset_bytes >> SECTOR_SHIFT); + blk_queue_max_write_zeroes_sectors(q, objset_bytes >> SECTOR_SHIFT); + } if (!ceph_test_opt(rbd_dev->rbd_client->client, NOCRC)) q->backing_dev_info->capabilities |= BDI_CAP_STABLE_WRITES; @@ -5179,8 +5212,10 @@ static int rbd_add_parse_args(const char *buf, rbd_opts->read_only = RBD_READ_ONLY_DEFAULT; rbd_opts->queue_depth = RBD_QUEUE_DEPTH_DEFAULT; + rbd_opts->lock_timeout = RBD_LOCK_TIMEOUT_DEFAULT; rbd_opts->lock_on_read = RBD_LOCK_ON_READ_DEFAULT; rbd_opts->exclusive = RBD_EXCLUSIVE_DEFAULT; + rbd_opts->trim = RBD_TRIM_DEFAULT; copts = ceph_parse_options(options, mon_addrs, mon_addrs + mon_addrs_size - 1, @@ -5216,6 +5251,8 @@ static void rbd_dev_image_unlock(struct rbd_device *rbd_dev) static int rbd_add_acquire_lock(struct rbd_device *rbd_dev) { + int ret; + if (!(rbd_dev->header.features & RBD_FEATURE_EXCLUSIVE_LOCK)) { rbd_warn(rbd_dev, "exclusive-lock feature is not enabled"); return -EINVAL; @@ -5223,9 +5260,9 @@ static int rbd_add_acquire_lock(struct rbd_device *rbd_dev) /* FIXME: "rbd map --exclusive" should be in interruptible */ down_read(&rbd_dev->lock_rwsem); - rbd_wait_state_locked(rbd_dev); + ret = rbd_wait_state_locked(rbd_dev, true); up_read(&rbd_dev->lock_rwsem); - if (test_bit(RBD_DEV_FLAG_BLACKLISTED, &rbd_dev->flags)) { + if (ret) { rbd_warn(rbd_dev, "failed to acquire exclusive lock"); return -EROFS; } diff --git a/drivers/clocksource/Kconfig b/drivers/clocksource/Kconfig index 9ee2888275c1..8e8a09755d10 100644 --- a/drivers/clocksource/Kconfig +++ b/drivers/clocksource/Kconfig @@ -133,6 +133,14 @@ config VT8500_TIMER help Enables support for the VT8500 driver. +config NPCM7XX_TIMER + bool "NPCM7xx timer driver" if COMPILE_TEST + depends on HAS_IOMEM + select CLKSRC_MMIO + help + Enable 24-bit TIMER0 and TIMER1 counters in the NPCM7xx architecture, + While TIMER0 serves as clockevent and TIMER1 serves as clocksource. + config CADENCE_TTC_TIMER bool "Cadence TTC timer driver" if COMPILE_TEST depends on COMMON_CLK diff --git a/drivers/clocksource/Makefile b/drivers/clocksource/Makefile index e8e76dfef00b..00caf37e52f9 100644 --- a/drivers/clocksource/Makefile +++ b/drivers/clocksource/Makefile @@ -56,6 +56,7 @@ obj-$(CONFIG_CLKSRC_NPS) += timer-nps.o obj-$(CONFIG_OXNAS_RPS_TIMER) += timer-oxnas-rps.o obj-$(CONFIG_OWL_TIMER) += owl-timer.o obj-$(CONFIG_SPRD_TIMER) += timer-sprd.o +obj-$(CONFIG_NPCM7XX_TIMER) += timer-npcm7xx.o obj-$(CONFIG_ARC_TIMERS) += arc_timer.o obj-$(CONFIG_ARM_ARCH_TIMER) += arm_arch_timer.o diff --git a/drivers/clocksource/timer-imx-tpm.c b/drivers/clocksource/timer-imx-tpm.c index 21bffdcb2f20..05d97a6871d8 100644 --- a/drivers/clocksource/timer-imx-tpm.c +++ b/drivers/clocksource/timer-imx-tpm.c @@ -17,9 +17,14 @@ #include <linux/of_irq.h> #include <linux/sched_clock.h> +#define TPM_PARAM 0x4 +#define TPM_PARAM_WIDTH_SHIFT 16 +#define TPM_PARAM_WIDTH_MASK (0xff << 16) #define TPM_SC 0x10 #define TPM_SC_CMOD_INC_PER_CNT (0x1 << 3) #define TPM_SC_CMOD_DIV_DEFAULT 0x3 +#define TPM_SC_CMOD_DIV_MAX 0x7 +#define TPM_SC_TOF_MASK (0x1 << 7) #define TPM_CNT 0x14 #define TPM_MOD 0x18 #define TPM_STATUS 0x1c @@ -29,8 +34,11 @@ #define TPM_C0SC_MODE_SHIFT 2 #define TPM_C0SC_MODE_MASK 0x3c #define TPM_C0SC_MODE_SW_COMPARE 0x4 +#define TPM_C0SC_CHF_MASK (0x1 << 7) #define TPM_C0V 0x24 +static int counter_width; +static int rating; static void __iomem *timer_base; static struct clock_event_device clockevent_tpm; @@ -83,10 +91,11 @@ static int __init tpm_clocksource_init(unsigned long rate) tpm_delay_timer.freq = rate; register_current_timer_delay(&tpm_delay_timer); - sched_clock_register(tpm_read_sched_clock, 32, rate); + sched_clock_register(tpm_read_sched_clock, counter_width, rate); return clocksource_mmio_init(timer_base + TPM_CNT, "imx-tpm", - rate, 200, 32, clocksource_mmio_readl_up); + rate, rating, counter_width, + clocksource_mmio_readl_up); } static int tpm_set_next_event(unsigned long delta, @@ -139,7 +148,6 @@ static struct clock_event_device clockevent_tpm = { .set_state_oneshot = tpm_set_state_oneshot, .set_next_event = tpm_set_next_event, .set_state_shutdown = tpm_set_state_shutdown, - .rating = 200, }; static int __init tpm_clockevent_init(unsigned long rate, int irq) @@ -149,10 +157,11 @@ static int __init tpm_clockevent_init(unsigned long rate, int irq) ret = request_irq(irq, tpm_timer_interrupt, IRQF_TIMER | IRQF_IRQPOLL, "i.MX7ULP TPM Timer", &clockevent_tpm); + clockevent_tpm.rating = rating; clockevent_tpm.cpumask = cpumask_of(0); clockevent_tpm.irq = irq; - clockevents_config_and_register(&clockevent_tpm, - rate, 300, 0xfffffffe); + clockevents_config_and_register(&clockevent_tpm, rate, 300, + GENMASK(counter_width - 1, 1)); return ret; } @@ -179,7 +188,7 @@ static int __init tpm_timer_init(struct device_node *np) ipg = of_clk_get_by_name(np, "ipg"); per = of_clk_get_by_name(np, "per"); if (IS_ERR(ipg) || IS_ERR(per)) { - pr_err("tpm: failed to get igp or per clk\n"); + pr_err("tpm: failed to get ipg or per clk\n"); ret = -ENODEV; goto err_clk_get; } @@ -197,6 +206,11 @@ static int __init tpm_timer_init(struct device_node *np) goto err_per_clk_enable; } + counter_width = (readl(timer_base + TPM_PARAM) & TPM_PARAM_WIDTH_MASK) + >> TPM_PARAM_WIDTH_SHIFT; + /* use rating 200 for 32-bit counter and 150 for 16-bit counter */ + rating = counter_width == 0x20 ? 200 : 150; + /* * Initialize tpm module to a known state * 1) Counter disabled @@ -205,16 +219,25 @@ static int __init tpm_timer_init(struct device_node *np) * 4) Channel0 disabled * 5) DMA transfers disabled */ + /* make sure counter is disabled */ writel(0, timer_base + TPM_SC); + /* TOF is W1C */ + writel(TPM_SC_TOF_MASK, timer_base + TPM_SC); writel(0, timer_base + TPM_CNT); - writel(0, timer_base + TPM_C0SC); + /* CHF is W1C */ + writel(TPM_C0SC_CHF_MASK, timer_base + TPM_C0SC); - /* increase per cnt, div 8 by default */ - writel(TPM_SC_CMOD_INC_PER_CNT | TPM_SC_CMOD_DIV_DEFAULT, + /* + * increase per cnt, + * div 8 for 32-bit counter and div 128 for 16-bit counter + */ + writel(TPM_SC_CMOD_INC_PER_CNT | + (counter_width == 0x20 ? + TPM_SC_CMOD_DIV_DEFAULT : TPM_SC_CMOD_DIV_MAX), timer_base + TPM_SC); /* set MOD register to maximum for free running mode */ - writel(0xffffffff, timer_base + TPM_MOD); + writel(GENMASK(counter_width - 1, 0), timer_base + TPM_MOD); rate = clk_get_rate(per) >> 3; ret = tpm_clocksource_init(rate); diff --git a/drivers/clocksource/timer-npcm7xx.c b/drivers/clocksource/timer-npcm7xx.c new file mode 100644 index 000000000000..7a9bb5532d99 --- /dev/null +++ b/drivers/clocksource/timer-npcm7xx.c @@ -0,0 +1,215 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2014-2018 Nuvoton Technologies tomer.maimon@nuvoton.com + * All rights reserved. + * + * Copyright 2017 Google, Inc. + */ + +#include <linux/kernel.h> +#include <linux/sched.h> +#include <linux/init.h> +#include <linux/interrupt.h> +#include <linux/err.h> +#include <linux/clk.h> +#include <linux/io.h> +#include <linux/clockchips.h> +#include <linux/of_irq.h> +#include <linux/of_address.h> +#include "timer-of.h" + +/* Timers registers */ +#define NPCM7XX_REG_TCSR0 0x0 /* Timer 0 Control and Status Register */ +#define NPCM7XX_REG_TICR0 0x8 /* Timer 0 Initial Count Register */ +#define NPCM7XX_REG_TCSR1 0x4 /* Timer 1 Control and Status Register */ +#define NPCM7XX_REG_TICR1 0xc /* Timer 1 Initial Count Register */ +#define NPCM7XX_REG_TDR1 0x14 /* Timer 1 Data Register */ +#define NPCM7XX_REG_TISR 0x18 /* Timer Interrupt Status Register */ + +/* Timers control */ +#define NPCM7XX_Tx_RESETINT 0x1f +#define NPCM7XX_Tx_PERIOD BIT(27) +#define NPCM7XX_Tx_INTEN BIT(29) +#define NPCM7XX_Tx_COUNTEN BIT(30) +#define NPCM7XX_Tx_ONESHOT 0x0 +#define NPCM7XX_Tx_OPER GENMASK(3, 27) +#define NPCM7XX_Tx_MIN_PRESCALE 0x1 +#define NPCM7XX_Tx_TDR_MASK_BITS 24 +#define NPCM7XX_Tx_MAX_CNT 0xFFFFFF +#define NPCM7XX_T0_CLR_INT 0x1 +#define NPCM7XX_Tx_CLR_CSR 0x0 + +/* Timers operating mode */ +#define NPCM7XX_START_PERIODIC_Tx (NPCM7XX_Tx_PERIOD | NPCM7XX_Tx_COUNTEN | \ + NPCM7XX_Tx_INTEN | \ + NPCM7XX_Tx_MIN_PRESCALE) + +#define NPCM7XX_START_ONESHOT_Tx (NPCM7XX_Tx_ONESHOT | NPCM7XX_Tx_COUNTEN | \ + NPCM7XX_Tx_INTEN | \ + NPCM7XX_Tx_MIN_PRESCALE) + +#define NPCM7XX_START_Tx (NPCM7XX_Tx_COUNTEN | NPCM7XX_Tx_PERIOD | \ + NPCM7XX_Tx_MIN_PRESCALE) + +#define NPCM7XX_DEFAULT_CSR (NPCM7XX_Tx_CLR_CSR | NPCM7XX_Tx_MIN_PRESCALE) + +static int npcm7xx_timer_resume(struct clock_event_device *evt) +{ + struct timer_of *to = to_timer_of(evt); + u32 val; + + val = readl(timer_of_base(to) + NPCM7XX_REG_TCSR0); + val |= NPCM7XX_Tx_COUNTEN; + writel(val, timer_of_base(to) + NPCM7XX_REG_TCSR0); + + return 0; +} + +static int npcm7xx_timer_shutdown(struct clock_event_device *evt) +{ + struct timer_of *to = to_timer_of(evt); + u32 val; + + val = readl(timer_of_base(to) + NPCM7XX_REG_TCSR0); + val &= ~NPCM7XX_Tx_COUNTEN; + writel(val, timer_of_base(to) + NPCM7XX_REG_TCSR0); + + return 0; +} + +static int npcm7xx_timer_oneshot(struct clock_event_device *evt) +{ + struct timer_of *to = to_timer_of(evt); + u32 val; + + val = readl(timer_of_base(to) + NPCM7XX_REG_TCSR0); + val &= ~NPCM7XX_Tx_OPER; + + val = readl(timer_of_base(to) + NPCM7XX_REG_TCSR0); + val |= NPCM7XX_START_ONESHOT_Tx; + writel(val, timer_of_base(to) + NPCM7XX_REG_TCSR0); + + return 0; +} + +static int npcm7xx_timer_periodic(struct clock_event_device *evt) +{ + struct timer_of *to = to_timer_of(evt); + u32 val; + + val = readl(timer_of_base(to) + NPCM7XX_REG_TCSR0); + val &= ~NPCM7XX_Tx_OPER; + + writel(timer_of_period(to), timer_of_base(to) + NPCM7XX_REG_TICR0); + val |= NPCM7XX_START_PERIODIC_Tx; + + writel(val, timer_of_base(to) + NPCM7XX_REG_TCSR0); + + return 0; +} + +static int npcm7xx_clockevent_set_next_event(unsigned long evt, + struct clock_event_device *clk) +{ + struct timer_of *to = to_timer_of(clk); + u32 val; + + writel(evt, timer_of_base(to) + NPCM7XX_REG_TICR0); + val = readl(timer_of_base(to) + NPCM7XX_REG_TCSR0); + val |= NPCM7XX_START_Tx; + writel(val, timer_of_base(to) + NPCM7XX_REG_TCSR0); + + return 0; +} + +static irqreturn_t npcm7xx_timer0_interrupt(int irq, void *dev_id) +{ + struct clock_event_device *evt = (struct clock_event_device *)dev_id; + struct timer_of *to = to_timer_of(evt); + + writel(NPCM7XX_T0_CLR_INT, timer_of_base(to) + NPCM7XX_REG_TISR); + + evt->event_handler(evt); + + return IRQ_HANDLED; +} + +static struct timer_of npcm7xx_to = { + .flags = TIMER_OF_IRQ | TIMER_OF_BASE | TIMER_OF_CLOCK, + + .clkevt = { + .name = "npcm7xx-timer0", + .features = CLOCK_EVT_FEAT_PERIODIC | + CLOCK_EVT_FEAT_ONESHOT, + .set_next_event = npcm7xx_clockevent_set_next_event, + .set_state_shutdown = npcm7xx_timer_shutdown, + .set_state_periodic = npcm7xx_timer_periodic, + .set_state_oneshot = npcm7xx_timer_oneshot, + .tick_resume = npcm7xx_timer_resume, + .rating = 300, + }, + + .of_irq = { + .handler = npcm7xx_timer0_interrupt, + .flags = IRQF_TIMER | IRQF_IRQPOLL, + }, +}; + +static void __init npcm7xx_clockevents_init(void) +{ + writel(NPCM7XX_DEFAULT_CSR, + timer_of_base(&npcm7xx_to) + NPCM7XX_REG_TCSR0); + + writel(NPCM7XX_Tx_RESETINT, + timer_of_base(&npcm7xx_to) + NPCM7XX_REG_TISR); + + npcm7xx_to.clkevt.cpumask = cpumask_of(0); + clockevents_config_and_register(&npcm7xx_to.clkevt, + timer_of_rate(&npcm7xx_to), + 0x1, NPCM7XX_Tx_MAX_CNT); +} + +static void __init npcm7xx_clocksource_init(void) +{ + u32 val; + + writel(NPCM7XX_DEFAULT_CSR, + timer_of_base(&npcm7xx_to) + NPCM7XX_REG_TCSR1); + writel(NPCM7XX_Tx_MAX_CNT, + timer_of_base(&npcm7xx_to) + NPCM7XX_REG_TICR1); + + val = readl(timer_of_base(&npcm7xx_to) + NPCM7XX_REG_TCSR1); + val |= NPCM7XX_START_Tx; + writel(val, timer_of_base(&npcm7xx_to) + NPCM7XX_REG_TCSR1); + + clocksource_mmio_init(timer_of_base(&npcm7xx_to) + + NPCM7XX_REG_TDR1, + "npcm7xx-timer1", timer_of_rate(&npcm7xx_to), + 200, (unsigned int)NPCM7XX_Tx_TDR_MASK_BITS, + clocksource_mmio_readl_down); +} + +static int __init npcm7xx_timer_init(struct device_node *np) +{ + int ret; + + ret = timer_of_init(np, &npcm7xx_to); + if (ret) + return ret; + + /* Clock input is divided by PRESCALE + 1 before it is fed */ + /* to the counter */ + npcm7xx_to.of_clk.rate = npcm7xx_to.of_clk.rate / + (NPCM7XX_Tx_MIN_PRESCALE + 1); + + npcm7xx_clocksource_init(); + npcm7xx_clockevents_init(); + + pr_info("Enabling NPCM7xx clocksource timer base: %px, IRQ: %d ", + timer_of_base(&npcm7xx_to), timer_of_irq(&npcm7xx_to)); + + return 0; +} + +TIMER_OF_DECLARE(npcm7xx, "nuvoton,npcm750-timer", npcm7xx_timer_init); + diff --git a/drivers/s390/block/dasd_diag.c b/drivers/s390/block/dasd_diag.c index f035c2f25d35..131f1989f6f3 100644 --- a/drivers/s390/block/dasd_diag.c +++ b/drivers/s390/block/dasd_diag.c @@ -27,7 +27,6 @@ #include <asm/io.h> #include <asm/irq.h> #include <asm/vtoc.h> -#include <asm/diag.h> #include "dasd_int.h" #include "dasd_diag.h" diff --git a/drivers/s390/char/sclp_early_core.c b/drivers/s390/char/sclp_early_core.c index 5f8d9ea69ebd..eceba3858cef 100644 --- a/drivers/s390/char/sclp_early_core.c +++ b/drivers/s390/char/sclp_early_core.c @@ -18,7 +18,7 @@ int sclp_init_state __section(.data) = sclp_init_state_uninitialized; * Used to keep track of the size of the event masks. Qemu until version 2.11 * only supports 4 and needs a workaround. */ -bool sclp_mask_compat_mode; +bool sclp_mask_compat_mode __section(.data); void sclp_early_wait_irq(void) { diff --git a/drivers/s390/net/qeth_l2_main.c b/drivers/s390/net/qeth_l2_main.c index 50a313806dde..2ad6f12f3d49 100644 --- a/drivers/s390/net/qeth_l2_main.c +++ b/drivers/s390/net/qeth_l2_main.c @@ -21,7 +21,6 @@ #include <linux/list.h> #include <linux/hash.h> #include <linux/hashtable.h> -#include <linux/string.h> #include <asm/setup.h> #include "qeth_core.h" #include "qeth_l2.h" diff --git a/drivers/s390/net/smsgiucv.c b/drivers/s390/net/smsgiucv.c index 3b0c8b8a7634..066b5c3aaae6 100644 --- a/drivers/s390/net/smsgiucv.c +++ b/drivers/s390/net/smsgiucv.c @@ -176,7 +176,7 @@ static struct device_driver smsg_driver = { static void __exit smsg_exit(void) { - cpcmd("SET SMSG IUCV", NULL, 0, NULL); + cpcmd("SET SMSG OFF", NULL, 0, NULL); device_unregister(smsg_dev); iucv_unregister(&smsg_handler, 1); driver_unregister(&smsg_driver); diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index 8bf60250309e..ae056927080d 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c @@ -669,13 +669,15 @@ void ceph_fill_file_time(struct inode *inode, int issued, CEPH_CAP_FILE_BUFFER| CEPH_CAP_AUTH_EXCL| CEPH_CAP_XATTR_EXCL)) { - if (timespec_compare(ctime, &inode->i_ctime) > 0) { + if (ci->i_version == 0 || + timespec_compare(ctime, &inode->i_ctime) > 0) { dout("ctime %ld.%09ld -> %ld.%09ld inc w/ cap\n", inode->i_ctime.tv_sec, inode->i_ctime.tv_nsec, ctime->tv_sec, ctime->tv_nsec); inode->i_ctime = *ctime; } - if (ceph_seq_cmp(time_warp_seq, ci->i_time_warp_seq) > 0) { + if (ci->i_version == 0 || + ceph_seq_cmp(time_warp_seq, ci->i_time_warp_seq) > 0) { /* the MDS did a utimes() */ dout("mtime %ld.%09ld -> %ld.%09ld " "tw %d -> %d\n", @@ -795,7 +797,6 @@ static int fill_inode(struct inode *inode, struct page *locked_page, new_issued = ~issued & le32_to_cpu(info->cap.caps); /* update inode */ - ci->i_version = le64_to_cpu(info->version); inode->i_rdev = le32_to_cpu(info->rdev); inode->i_blkbits = fls(le32_to_cpu(info->layout.fl_stripe_unit)) - 1; @@ -868,6 +869,9 @@ static int fill_inode(struct inode *inode, struct page *locked_page, xattr_blob = NULL; } + /* finally update i_version */ + ci->i_version = le64_to_cpu(info->version); + inode->i_mapping->a_ops = &ceph_aops; switch (inode->i_mode & S_IFMT) { diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile index dc44de904797..2ddcc96ae456 100644 --- a/tools/testing/selftests/kvm/Makefile +++ b/tools/testing/selftests/kvm/Makefile @@ -4,17 +4,18 @@ top_srcdir = ../../../../ UNAME_M := $(shell uname -m) LIBKVM = lib/assert.c lib/elf.c lib/io.c lib/kvm_util.c lib/sparsebit.c -LIBKVM_x86_64 = lib/x86.c +LIBKVM_x86_64 = lib/x86.c lib/vmx.c TEST_GEN_PROGS_x86_64 = set_sregs_test TEST_GEN_PROGS_x86_64 += sync_regs_test +TEST_GEN_PROGS_x86_64 += vmx_tsc_adjust_test TEST_GEN_PROGS += $(TEST_GEN_PROGS_$(UNAME_M)) LIBKVM += $(LIBKVM_$(UNAME_M)) INSTALL_HDR_PATH = $(top_srcdir)/usr LINUX_HDR_PATH = $(INSTALL_HDR_PATH)/include/ -CFLAGS += -O2 -g -I$(LINUX_HDR_PATH) -Iinclude -I$(<D) +CFLAGS += -O2 -g -std=gnu99 -I$(LINUX_HDR_PATH) -Iinclude -I$(<D) # After inclusion, $(OUTPUT) is defined and # $(TEST_GEN_PROGS) starts with $(OUTPUT)/ diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h index 57974ad46373..637b7017b6ee 100644 --- a/tools/testing/selftests/kvm/include/kvm_util.h +++ b/tools/testing/selftests/kvm/include/kvm_util.h @@ -112,24 +112,27 @@ void virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, vm_paddr_t vm_phy_page_alloc(struct kvm_vm *vm, vm_paddr_t paddr_min, uint32_t memslot); -void kvm_get_supported_cpuid(struct kvm_cpuid2 *cpuid); +struct kvm_cpuid2 *kvm_get_supported_cpuid(void); void vcpu_set_cpuid( struct kvm_vm *vm, uint32_t vcpuid, struct kvm_cpuid2 *cpuid); -struct kvm_cpuid2 *allocate_kvm_cpuid2(void); struct kvm_cpuid_entry2 * -find_cpuid_index_entry(struct kvm_cpuid2 *cpuid, uint32_t function, - uint32_t index); +kvm_get_supported_cpuid_index(uint32_t function, uint32_t index); static inline struct kvm_cpuid_entry2 * -find_cpuid_entry(struct kvm_cpuid2 *cpuid, uint32_t function) +kvm_get_supported_cpuid_entry(uint32_t function) { - return find_cpuid_index_entry(cpuid, function, 0); + return kvm_get_supported_cpuid_index(function, 0); } struct kvm_vm *vm_create_default(uint32_t vcpuid, void *guest_code); void vm_vcpu_add_default(struct kvm_vm *vm, uint32_t vcpuid, void *guest_code); +typedef void (*vmx_guest_code_t)(vm_vaddr_t vmxon_vaddr, + vm_paddr_t vmxon_paddr, + vm_vaddr_t vmcs_vaddr, + vm_paddr_t vmcs_paddr); + struct kvm_userspace_memory_region * kvm_userspace_memory_region_find(struct kvm_vm *vm, uint64_t start, uint64_t end); diff --git a/tools/testing/selftests/kvm/include/vmx.h b/tools/testing/selftests/kvm/include/vmx.h new file mode 100644 index 000000000000..6ed8499807fd --- /dev/null +++ b/tools/testing/selftests/kvm/include/vmx.h @@ -0,0 +1,494 @@ +/* + * tools/testing/selftests/kvm/include/vmx.h + * + * Copyright (C) 2018, Google LLC. + * + * This work is licensed under the terms of the GNU GPL, version 2. + * + */ + +#ifndef SELFTEST_KVM_VMX_H +#define SELFTEST_KVM_VMX_H + +#include <stdint.h> +#include "x86.h" + +#define CPUID_VMX_BIT 5 + +#define CPUID_VMX (1 << 5) + +/* + * Definitions of Primary Processor-Based VM-Execution Controls. + */ +#define CPU_BASED_VIRTUAL_INTR_PENDING 0x00000004 +#define CPU_BASED_USE_TSC_OFFSETING 0x00000008 +#define CPU_BASED_HLT_EXITING 0x00000080 +#define CPU_BASED_INVLPG_EXITING 0x00000200 +#define CPU_BASED_MWAIT_EXITING 0x00000400 +#define CPU_BASED_RDPMC_EXITING 0x00000800 +#define CPU_BASED_RDTSC_EXITING 0x00001000 +#define CPU_BASED_CR3_LOAD_EXITING 0x00008000 +#define CPU_BASED_CR3_STORE_EXITING 0x00010000 +#define CPU_BASED_CR8_LOAD_EXITING 0x00080000 +#define CPU_BASED_CR8_STORE_EXITING 0x00100000 +#define CPU_BASED_TPR_SHADOW 0x00200000 +#define CPU_BASED_VIRTUAL_NMI_PENDING 0x00400000 +#define CPU_BASED_MOV_DR_EXITING 0x00800000 +#define CPU_BASED_UNCOND_IO_EXITING 0x01000000 +#define CPU_BASED_USE_IO_BITMAPS 0x02000000 +#define CPU_BASED_MONITOR_TRAP 0x08000000 +#define CPU_BASED_USE_MSR_BITMAPS 0x10000000 +#define CPU_BASED_MONITOR_EXITING 0x20000000 +#define CPU_BASED_PAUSE_EXITING 0x40000000 +#define CPU_BASED_ACTIVATE_SECONDARY_CONTROLS 0x80000000 + +#define CPU_BASED_ALWAYSON_WITHOUT_TRUE_MSR 0x0401e172 + +/* + * Definitions of Secondary Processor-Based VM-Execution Controls. + */ +#define SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES 0x00000001 +#define SECONDARY_EXEC_ENABLE_EPT 0x00000002 +#define SECONDARY_EXEC_DESC 0x00000004 +#define SECONDARY_EXEC_RDTSCP 0x00000008 +#define SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE 0x00000010 +#define SECONDARY_EXEC_ENABLE_VPID 0x00000020 +#define SECONDARY_EXEC_WBINVD_EXITING 0x00000040 +#define SECONDARY_EXEC_UNRESTRICTED_GUEST 0x00000080 +#define SECONDARY_EXEC_APIC_REGISTER_VIRT 0x00000100 +#define SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY 0x00000200 +#define SECONDARY_EXEC_PAUSE_LOOP_EXITING 0x00000400 +#define SECONDARY_EXEC_RDRAND_EXITING 0x00000800 +#define SECONDARY_EXEC_ENABLE_INVPCID 0x00001000 +#define SECONDARY_EXEC_ENABLE_VMFUNC 0x00002000 +#define SECONDARY_EXEC_SHADOW_VMCS 0x00004000 +#define SECONDARY_EXEC_RDSEED_EXITING 0x00010000 +#define SECONDARY_EXEC_ENABLE_PML 0x00020000 +#define SECONDARY_EPT_VE 0x00040000 +#define SECONDARY_ENABLE_XSAV_RESTORE 0x00100000 +#define SECONDARY_EXEC_TSC_SCALING 0x02000000 + +#define PIN_BASED_EXT_INTR_MASK 0x00000001 +#define PIN_BASED_NMI_EXITING 0x00000008 +#define PIN_BASED_VIRTUAL_NMIS 0x00000020 +#define PIN_BASED_VMX_PREEMPTION_TIMER 0x00000040 +#define PIN_BASED_POSTED_INTR 0x00000080 + +#define PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR 0x00000016 + +#define VM_EXIT_SAVE_DEBUG_CONTROLS 0x00000004 +#define VM_EXIT_HOST_ADDR_SPACE_SIZE 0x00000200 +#define VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL 0x00001000 +#define VM_EXIT_ACK_INTR_ON_EXIT 0x00008000 +#define VM_EXIT_SAVE_IA32_PAT 0x00040000 +#define VM_EXIT_LOAD_IA32_PAT 0x00080000 +#define VM_EXIT_SAVE_IA32_EFER 0x00100000 +#define VM_EXIT_LOAD_IA32_EFER 0x00200000 +#define VM_EXIT_SAVE_VMX_PREEMPTION_TIMER 0x00400000 + +#define VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR 0x00036dff + +#define VM_ENTRY_LOAD_DEBUG_CONTROLS 0x00000004 +#define VM_ENTRY_IA32E_MODE 0x00000200 +#define VM_ENTRY_SMM 0x00000400 +#define VM_ENTRY_DEACT_DUAL_MONITOR 0x00000800 +#define VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL 0x00002000 +#define VM_ENTRY_LOAD_IA32_PAT 0x00004000 +#define VM_ENTRY_LOAD_IA32_EFER 0x00008000 + +#define VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR 0x000011ff + +#define VMX_MISC_PREEMPTION_TIMER_RATE_MASK 0x0000001f +#define VMX_MISC_SAVE_EFER_LMA 0x00000020 + +#define EXIT_REASON_FAILED_VMENTRY 0x80000000 +#define EXIT_REASON_EXCEPTION_NMI 0 +#define EXIT_REASON_EXTERNAL_INTERRUPT 1 +#define EXIT_REASON_TRIPLE_FAULT 2 +#define EXIT_REASON_PENDING_INTERRUPT 7 +#define EXIT_REASON_NMI_WINDOW 8 +#define EXIT_REASON_TASK_SWITCH 9 +#define EXIT_REASON_CPUID 10 +#define EXIT_REASON_HLT 12 +#define EXIT_REASON_INVD 13 +#define EXIT_REASON_INVLPG 14 +#define EXIT_REASON_RDPMC 15 +#define EXIT_REASON_RDTSC 16 +#define EXIT_REASON_VMCALL 18 +#define EXIT_REASON_VMCLEAR 19 +#define EXIT_REASON_VMLAUNCH 20 +#define EXIT_REASON_VMPTRLD 21 +#define EXIT_REASON_VMPTRST 22 +#define EXIT_REASON_VMREAD 23 +#define EXIT_REASON_VMRESUME 24 +#define EXIT_REASON_VMWRITE 25 +#define EXIT_REASON_VMOFF 26 +#define EXIT_REASON_VMON 27 +#define EXIT_REASON_CR_ACCESS 28 +#define EXIT_REASON_DR_ACCESS 29 +#define EXIT_REASON_IO_INSTRUCTION 30 +#define EXIT_REASON_MSR_READ 31 +#define EXIT_REASON_MSR_WRITE 32 +#define EXIT_REASON_INVALID_STATE 33 +#define EXIT_REASON_MWAIT_INSTRUCTION 36 +#define EXIT_REASON_MONITOR_INSTRUCTION 39 +#define EXIT_REASON_PAUSE_INSTRUCTION 40 +#define EXIT_REASON_MCE_DURING_VMENTRY 41 +#define EXIT_REASON_TPR_BELOW_THRESHOLD 43 +#define EXIT_REASON_APIC_ACCESS 44 +#define EXIT_REASON_EOI_INDUCED 45 +#define EXIT_REASON_EPT_VIOLATION 48 +#define EXIT_REASON_EPT_MISCONFIG 49 +#define EXIT_REASON_INVEPT 50 +#define EXIT_REASON_RDTSCP 51 +#define EXIT_REASON_PREEMPTION_TIMER 52 +#define EXIT_REASON_INVVPID 53 +#define EXIT_REASON_WBINVD 54 +#define EXIT_REASON_XSETBV 55 +#define EXIT_REASON_APIC_WRITE 56 +#define EXIT_REASON_INVPCID 58 +#define EXIT_REASON_PML_FULL 62 +#define EXIT_REASON_XSAVES 63 +#define EXIT_REASON_XRSTORS 64 +#define LAST_EXIT_REASON 64 + +enum vmcs_field { + VIRTUAL_PROCESSOR_ID = 0x00000000, + POSTED_INTR_NV = 0x00000002, + GUEST_ES_SELECTOR = 0x00000800, + GUEST_CS_SELECTOR = 0x00000802, + GUEST_SS_SELECTOR = 0x00000804, + GUEST_DS_SELECTOR = 0x00000806, + GUEST_FS_SELECTOR = 0x00000808, + GUEST_GS_SELECTOR = 0x0000080a, + GUEST_LDTR_SELECTOR = 0x0000080c, + GUEST_TR_SELECTOR = 0x0000080e, + GUEST_INTR_STATUS = 0x00000810, + GUEST_PML_INDEX = 0x00000812, + HOST_ES_SELECTOR = 0x00000c00, + HOST_CS_SELECTOR = 0x00000c02, + HOST_SS_SELECTOR = 0x00000c04, + HOST_DS_SELECTOR = 0x00000c06, + HOST_FS_SELECTOR = 0x00000c08, + HOST_GS_SELECTOR = 0x00000c0a, + HOST_TR_SELECTOR = 0x00000c0c, + IO_BITMAP_A = 0x00002000, + IO_BITMAP_A_HIGH = 0x00002001, + IO_BITMAP_B = 0x00002002, + IO_BITMAP_B_HIGH = 0x00002003, + MSR_BITMAP = 0x00002004, + MSR_BITMAP_HIGH = 0x00002005, + VM_EXIT_MSR_STORE_ADDR = 0x00002006, + VM_EXIT_MSR_STORE_ADDR_HIGH = 0x00002007, + VM_EXIT_MSR_LOAD_ADDR = 0x00002008, + VM_EXIT_MSR_LOAD_ADDR_HIGH = 0x00002009, + VM_ENTRY_MSR_LOAD_ADDR = 0x0000200a, + VM_ENTRY_MSR_LOAD_ADDR_HIGH = 0x0000200b, + PML_ADDRESS = 0x0000200e, + PML_ADDRESS_HIGH = 0x0000200f, + TSC_OFFSET = 0x00002010, + TSC_OFFSET_HIGH = 0x00002011, + VIRTUAL_APIC_PAGE_ADDR = 0x00002012, + VIRTUAL_APIC_PAGE_ADDR_HIGH = 0x00002013, + APIC_ACCESS_ADDR = 0x00002014, + APIC_ACCESS_ADDR_HIGH = 0x00002015, + POSTED_INTR_DESC_ADDR = 0x00002016, + POSTED_INTR_DESC_ADDR_HIGH = 0x00002017, + EPT_POINTER = 0x0000201a, + EPT_POINTER_HIGH = 0x0000201b, + EOI_EXIT_BITMAP0 = 0x0000201c, + EOI_EXIT_BITMAP0_HIGH = 0x0000201d, + EOI_EXIT_BITMAP1 = 0x0000201e, + EOI_EXIT_BITMAP1_HIGH = 0x0000201f, + EOI_EXIT_BITMAP2 = 0x00002020, + EOI_EXIT_BITMAP2_HIGH = 0x00002021, + EOI_EXIT_BITMAP3 = 0x00002022, + EOI_EXIT_BITMAP3_HIGH = 0x00002023, + VMREAD_BITMAP = 0x00002026, + VMREAD_BITMAP_HIGH = 0x00002027, + VMWRITE_BITMAP = 0x00002028, + VMWRITE_BITMAP_HIGH = 0x00002029, + XSS_EXIT_BITMAP = 0x0000202C, + XSS_EXIT_BITMAP_HIGH = 0x0000202D, + TSC_MULTIPLIER = 0x00002032, + TSC_MULTIPLIER_HIGH = 0x00002033, + GUEST_PHYSICAL_ADDRESS = 0x00002400, + GUEST_PHYSICAL_ADDRESS_HIGH = 0x00002401, + VMCS_LINK_POINTER = 0x00002800, + VMCS_LINK_POINTER_HIGH = 0x00002801, + GUEST_IA32_DEBUGCTL = 0x00002802, + GUEST_IA32_DEBUGCTL_HIGH = 0x00002803, + GUEST_IA32_PAT = 0x00002804, + GUEST_IA32_PAT_HIGH = 0x00002805, + GUEST_IA32_EFER = 0x00002806, + GUEST_IA32_EFER_HIGH = 0x00002807, + GUEST_IA32_PERF_GLOBAL_CTRL = 0x00002808, + GUEST_IA32_PERF_GLOBAL_CTRL_HIGH= 0x00002809, + GUEST_PDPTR0 = 0x0000280a, + GUEST_PDPTR0_HIGH = 0x0000280b, + GUEST_PDPTR1 = 0x0000280c, + GUEST_PDPTR1_HIGH = 0x0000280d, + GUEST_PDPTR2 = 0x0000280e, + GUEST_PDPTR2_HIGH = 0x0000280f, + GUEST_PDPTR3 = 0x00002810, + GUEST_PDPTR3_HIGH = 0x00002811, + GUEST_BNDCFGS = 0x00002812, + GUEST_BNDCFGS_HIGH = 0x00002813, + HOST_IA32_PAT = 0x00002c00, + HOST_IA32_PAT_HIGH = 0x00002c01, + HOST_IA32_EFER = 0x00002c02, + HOST_IA32_EFER_HIGH = 0x00002c03, + HOST_IA32_PERF_GLOBAL_CTRL = 0x00002c04, + HOST_IA32_PERF_GLOBAL_CTRL_HIGH = 0x00002c05, + PIN_BASED_VM_EXEC_CONTROL = 0x00004000, + CPU_BASED_VM_EXEC_CONTROL = 0x00004002, + EXCEPTION_BITMAP = 0x00004004, + PAGE_FAULT_ERROR_CODE_MASK = 0x00004006, + PAGE_FAULT_ERROR_CODE_MATCH = 0x00004008, + CR3_TARGET_COUNT = 0x0000400a, + VM_EXIT_CONTROLS = 0x0000400c, + VM_EXIT_MSR_STORE_COUNT = 0x0000400e, + VM_EXIT_MSR_LOAD_COUNT = 0x00004010, + VM_ENTRY_CONTROLS = 0x00004012, + VM_ENTRY_MSR_LOAD_COUNT = 0x00004014, + VM_ENTRY_INTR_INFO_FIELD = 0x00004016, + VM_ENTRY_EXCEPTION_ERROR_CODE = 0x00004018, + VM_ENTRY_INSTRUCTION_LEN = 0x0000401a, + TPR_THRESHOLD = 0x0000401c, + SECONDARY_VM_EXEC_CONTROL = 0x0000401e, + PLE_GAP = 0x00004020, + PLE_WINDOW = 0x00004022, + VM_INSTRUCTION_ERROR = 0x00004400, + VM_EXIT_REASON = 0x00004402, + VM_EXIT_INTR_INFO = 0x00004404, + VM_EXIT_INTR_ERROR_CODE = 0x00004406, + IDT_VECTORING_INFO_FIELD = 0x00004408, + IDT_VECTORING_ERROR_CODE = 0x0000440a, + VM_EXIT_INSTRUCTION_LEN = 0x0000440c, + VMX_INSTRUCTION_INFO = 0x0000440e, + GUEST_ES_LIMIT = 0x00004800, + GUEST_CS_LIMIT = 0x00004802, + GUEST_SS_LIMIT = 0x00004804, + GUEST_DS_LIMIT = 0x00004806, + GUEST_FS_LIMIT = 0x00004808, + GUEST_GS_LIMIT = 0x0000480a, + GUEST_LDTR_LIMIT = 0x0000480c, + GUEST_TR_LIMIT = 0x0000480e, + GUEST_GDTR_LIMIT = 0x00004810, + GUEST_IDTR_LIMIT = 0x00004812, + GUEST_ES_AR_BYTES = 0x00004814, + GUEST_CS_AR_BYTES = 0x00004816, + GUEST_SS_AR_BYTES = 0x00004818, + GUEST_DS_AR_BYTES = 0x0000481a, + GUEST_FS_AR_BYTES = 0x0000481c, + GUEST_GS_AR_BYTES = 0x0000481e, + GUEST_LDTR_AR_BYTES = 0x00004820, + GUEST_TR_AR_BYTES = 0x00004822, + GUEST_INTERRUPTIBILITY_INFO = 0x00004824, + GUEST_ACTIVITY_STATE = 0X00004826, + GUEST_SYSENTER_CS = 0x0000482A, + VMX_PREEMPTION_TIMER_VALUE = 0x0000482E, + HOST_IA32_SYSENTER_CS = 0x00004c00, + CR0_GUEST_HOST_MASK = 0x00006000, + CR4_GUEST_HOST_MASK = 0x00006002, + CR0_READ_SHADOW = 0x00006004, + CR4_READ_SHADOW = 0x00006006, + CR3_TARGET_VALUE0 = 0x00006008, + CR3_TARGET_VALUE1 = 0x0000600a, + CR3_TARGET_VALUE2 = 0x0000600c, + CR3_TARGET_VALUE3 = 0x0000600e, + EXIT_QUALIFICATION = 0x00006400, + GUEST_LINEAR_ADDRESS = 0x0000640a, + GUEST_CR0 = 0x00006800, + GUEST_CR3 = 0x00006802, + GUEST_CR4 = 0x00006804, + GUEST_ES_BASE = 0x00006806, + GUEST_CS_BASE = 0x00006808, + GUEST_SS_BASE = 0x0000680a, + GUEST_DS_BASE = 0x0000680c, + GUEST_FS_BASE = 0x0000680e, + GUEST_GS_BASE = 0x00006810, + GUEST_LDTR_BASE = 0x00006812, + GUEST_TR_BASE = 0x00006814, + GUEST_GDTR_BASE = 0x00006816, + GUEST_IDTR_BASE = 0x00006818, + GUEST_DR7 = 0x0000681a, + GUEST_RSP = 0x0000681c, + GUEST_RIP = 0x0000681e, + GUEST_RFLAGS = 0x00006820, + GUEST_PENDING_DBG_EXCEPTIONS = 0x00006822, + GUEST_SYSENTER_ESP = 0x00006824, + GUEST_SYSENTER_EIP = 0x00006826, + HOST_CR0 = 0x00006c00, + HOST_CR3 = 0x00006c02, + HOST_CR4 = 0x00006c04, + HOST_FS_BASE = 0x00006c06, + HOST_GS_BASE = 0x00006c08, + HOST_TR_BASE = 0x00006c0a, + HOST_GDTR_BASE = 0x00006c0c, + HOST_IDTR_BASE = 0x00006c0e, + HOST_IA32_SYSENTER_ESP = 0x00006c10, + HOST_IA32_SYSENTER_EIP = 0x00006c12, + HOST_RSP = 0x00006c14, + HOST_RIP = 0x00006c16, +}; + +struct vmx_msr_entry { + uint32_t index; + uint32_t reserved; + uint64_t value; +} __attribute__ ((aligned(16))); + +static inline int vmxon(uint64_t phys) +{ + uint8_t ret; + + __asm__ __volatile__ ("vmxon %[pa]; setna %[ret]" + : [ret]"=rm"(ret) + : [pa]"m"(phys) + : "cc", "memory"); + + return ret; +} + +static inline void vmxoff(void) +{ + __asm__ __volatile__("vmxoff"); +} + +static inline int vmclear(uint64_t vmcs_pa) +{ + uint8_t ret; + + __asm__ __volatile__ ("vmclear %[pa]; setna %[ret]" + : [ret]"=rm"(ret) + : [pa]"m"(vmcs_pa) + : "cc", "memory"); + + return ret; +} + +static inline int vmptrld(uint64_t vmcs_pa) +{ + uint8_t ret; + + __asm__ __volatile__ ("vmptrld %[pa]; setna %[ret]" + : [ret]"=rm"(ret) + : [pa]"m"(vmcs_pa) + : "cc", "memory"); + + return ret; +} + +/* + * No guest state (e.g. GPRs) is established by this vmlaunch. + */ +static inline int vmlaunch(void) +{ + int ret; + + __asm__ __volatile__("push %%rbp;" + "push %%rcx;" + "push %%rdx;" + "push %%rsi;" + "push %%rdi;" + "push $0;" + "vmwrite %%rsp, %[host_rsp];" + "lea 1f(%%rip), %%rax;" + "vmwrite %%rax, %[host_rip];" + "vmlaunch;" + "incq (%%rsp);" + "1: pop %%rax;" + "pop %%rdi;" + "pop %%rsi;" + "pop %%rdx;" + "pop %%rcx;" + "pop %%rbp;" + : [ret]"=&a"(ret) + : [host_rsp]"r"((uint64_t)HOST_RSP), + [host_rip]"r"((uint64_t)HOST_RIP) + : "memory", "cc", "rbx", "r8", "r9", "r10", + "r11", "r12", "r13", "r14", "r15"); + return ret; +} + +/* + * No guest state (e.g. GPRs) is established by this vmresume. + */ +static inline int vmresume(void) +{ + int ret; + + __asm__ __volatile__("push %%rbp;" + "push %%rcx;" + "push %%rdx;" + "push %%rsi;" + "push %%rdi;" + "push $0;" + "vmwrite %%rsp, %[host_rsp];" + "lea 1f(%%rip), %%rax;" + "vmwrite %%rax, %[host_rip];" + "vmresume;" + "incq (%%rsp);" + "1: pop %%rax;" + "pop %%rdi;" + "pop %%rsi;" + "pop %%rdx;" + "pop %%rcx;" + "pop %%rbp;" + : [ret]"=&a"(ret) + : [host_rsp]"r"((uint64_t)HOST_RSP), + [host_rip]"r"((uint64_t)HOST_RIP) + : "memory", "cc", "rbx", "r8", "r9", "r10", + "r11", "r12", "r13", "r14", "r15"); + return ret; +} + +static inline int vmread(uint64_t encoding, uint64_t *value) +{ + uint64_t tmp; + uint8_t ret; + + __asm__ __volatile__("vmread %[encoding], %[value]; setna %[ret]" + : [value]"=rm"(tmp), [ret]"=rm"(ret) + : [encoding]"r"(encoding) + : "cc", "memory"); + + *value = tmp; + return ret; +} + +/* + * A wrapper around vmread that ignores errors and returns zero if the + * vmread instruction fails. + */ +static inline uint64_t vmreadz(uint64_t encoding) +{ + uint64_t value = 0; + vmread(encoding, &value); + return value; +} + +static inline int vmwrite(uint64_t encoding, uint64_t value) +{ + uint8_t ret; + + __asm__ __volatile__ ("vmwrite %[value], %[encoding]; setna %[ret]" + : [ret]"=rm"(ret) + : [value]"rm"(value), [encoding]"r"(encoding) + : "cc", "memory"); + + return ret; +} + +static inline uint32_t vmcs_revision(void) +{ + return rdmsr(MSR_IA32_VMX_BASIC); +} + +void prepare_for_vmx_operation(void); +void prepare_vmcs(void *guest_rip, void *guest_rsp); +struct kvm_vm *vm_create_default_vmx(uint32_t vcpuid, + vmx_guest_code_t guest_code); + +#endif /* !SELFTEST_KVM_VMX_H */ diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c index 7ca1bb40c498..2cedfda181d4 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util.c +++ b/tools/testing/selftests/kvm/lib/kvm_util.c @@ -378,7 +378,7 @@ int kvm_memcmp_hva_gva(void *hva, * complicated. This function uses a reasonable default length for * the array and performs the appropriate allocation. */ -struct kvm_cpuid2 *allocate_kvm_cpuid2(void) +static struct kvm_cpuid2 *allocate_kvm_cpuid2(void) { struct kvm_cpuid2 *cpuid; int nent = 100; @@ -402,17 +402,21 @@ struct kvm_cpuid2 *allocate_kvm_cpuid2(void) * Input Args: None * * Output Args: - * cpuid - The supported KVM CPUID * - * Return: void + * Return: The supported KVM CPUID * * Get the guest CPUID supported by KVM. */ -void kvm_get_supported_cpuid(struct kvm_cpuid2 *cpuid) +struct kvm_cpuid2 *kvm_get_supported_cpuid(void) { + static struct kvm_cpuid2 *cpuid; int ret; int kvm_fd; + if (cpuid) + return cpuid; + + cpuid = allocate_kvm_cpuid2(); kvm_fd = open(KVM_DEV_PATH, O_RDONLY); TEST_ASSERT(kvm_fd >= 0, "open %s failed, rc: %i errno: %i", KVM_DEV_PATH, kvm_fd, errno); @@ -422,6 +426,7 @@ void kvm_get_supported_cpuid(struct kvm_cpuid2 *cpuid) ret, errno); close(kvm_fd); + return cpuid; } /* Locate a cpuid entry. @@ -435,12 +440,13 @@ void kvm_get_supported_cpuid(struct kvm_cpuid2 *cpuid) * Return: A pointer to the cpuid entry. Never returns NULL. */ struct kvm_cpuid_entry2 * -find_cpuid_index_entry(struct kvm_cpuid2 *cpuid, uint32_t function, - uint32_t index) +kvm_get_supported_cpuid_index(uint32_t function, uint32_t index) { + struct kvm_cpuid2 *cpuid; struct kvm_cpuid_entry2 *entry = NULL; int i; + cpuid = kvm_get_supported_cpuid(); for (i = 0; i < cpuid->nent; i++) { if (cpuid->entries[i].function == function && cpuid->entries[i].index == index) { @@ -1435,7 +1441,7 @@ vm_paddr_t vm_phy_page_alloc(struct kvm_vm *vm, sparsebit_idx_t pg; TEST_ASSERT((paddr_min % vm->page_size) == 0, "Min physical address " - "not divisable by page size.\n" + "not divisible by page size.\n" " paddr_min: 0x%lx page_size: 0x%x", paddr_min, vm->page_size); diff --git a/tools/testing/selftests/kvm/lib/sparsebit.c b/tools/testing/selftests/kvm/lib/sparsebit.c index 0c5cf3e0cb6f..b132bc95d183 100644 --- a/tools/testing/selftests/kvm/lib/sparsebit.c +++ b/tools/testing/selftests/kvm/lib/sparsebit.c @@ -121,7 +121,7 @@ * avoided by moving the setting of the nodes mask bits into * the previous nodes num_after setting. * - * + Node starting index is evenly divisable by the number of bits + * + Node starting index is evenly divisible by the number of bits * within a nodes mask member. * * + Nodes never represent a range of bits that wrap around the @@ -1741,7 +1741,7 @@ void sparsebit_validate_internal(struct sparsebit *s) /* Validate node index is divisible by the mask size */ if (nodep->idx % MASK_BITS) { - fprintf(stderr, "Node index not divisable by " + fprintf(stderr, "Node index not divisible by " "mask size,\n" " nodep: %p nodep->idx: 0x%lx " "MASK_BITS: %lu\n", diff --git a/tools/testing/selftests/kvm/lib/vmx.c b/tools/testing/selftests/kvm/lib/vmx.c new file mode 100644 index 000000000000..0231bc0aae7b --- /dev/null +++ b/tools/testing/selftests/kvm/lib/vmx.c @@ -0,0 +1,243 @@ +/* + * tools/testing/selftests/kvm/lib/x86.c + * + * Copyright (C) 2018, Google LLC. + * + * This work is licensed under the terms of the GNU GPL, version 2. + */ + +#define _GNU_SOURCE /* for program_invocation_name */ + +#include "test_util.h" +#include "kvm_util.h" +#include "x86.h" +#include "vmx.h" + +/* Create a default VM for VMX tests. + * + * Input Args: + * vcpuid - The id of the single VCPU to add to the VM. + * guest_code - The vCPU's entry point + * + * Output Args: None + * + * Return: + * Pointer to opaque structure that describes the created VM. + */ +struct kvm_vm * +vm_create_default_vmx(uint32_t vcpuid, vmx_guest_code_t guest_code) +{ + struct kvm_cpuid2 *cpuid; + struct kvm_vm *vm; + vm_vaddr_t vmxon_vaddr; + vm_paddr_t vmxon_paddr; + vm_vaddr_t vmcs_vaddr; + vm_paddr_t vmcs_paddr; + + vm = vm_create_default(vcpuid, (void *) guest_code); + + /* Enable nesting in CPUID */ + vcpu_set_cpuid(vm, vcpuid, kvm_get_supported_cpuid()); + + /* Setup of a region of guest memory for the vmxon region. */ + vmxon_vaddr = vm_vaddr_alloc(vm, getpagesize(), 0, 0, 0); + vmxon_paddr = addr_gva2gpa(vm, vmxon_vaddr); + + /* Setup of a region of guest memory for a vmcs. */ + vmcs_vaddr = vm_vaddr_alloc(vm, getpagesize(), 0, 0, 0); + vmcs_paddr = addr_gva2gpa(vm, vmcs_vaddr); + + vcpu_args_set(vm, vcpuid, 4, vmxon_vaddr, vmxon_paddr, vmcs_vaddr, + vmcs_paddr); + + return vm; +} + +void prepare_for_vmx_operation(void) +{ + uint64_t feature_control; + uint64_t required; + unsigned long cr0; + unsigned long cr4; + + /* + * Ensure bits in CR0 and CR4 are valid in VMX operation: + * - Bit X is 1 in _FIXED0: bit X is fixed to 1 in CRx. + * - Bit X is 0 in _FIXED1: bit X is fixed to 0 in CRx. + */ + __asm__ __volatile__("mov %%cr0, %0" : "=r"(cr0) : : "memory"); + cr0 &= rdmsr(MSR_IA32_VMX_CR0_FIXED1); + cr0 |= rdmsr(MSR_IA32_VMX_CR0_FIXED0); + __asm__ __volatile__("mov %0, %%cr0" : : "r"(cr0) : "memory"); + + __asm__ __volatile__("mov %%cr4, %0" : "=r"(cr4) : : "memory"); + cr4 &= rdmsr(MSR_IA32_VMX_CR4_FIXED1); + cr4 |= rdmsr(MSR_IA32_VMX_CR4_FIXED0); + /* Enable VMX operation */ + cr4 |= X86_CR4_VMXE; + __asm__ __volatile__("mov %0, %%cr4" : : "r"(cr4) : "memory"); + + /* + * Configure IA32_FEATURE_CONTROL MSR to allow VMXON: + * Bit 0: Lock bit. If clear, VMXON causes a #GP. + * Bit 2: Enables VMXON outside of SMX operation. If clear, VMXON + * outside of SMX causes a #GP. + */ + required = FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX; + required |= FEATURE_CONTROL_LOCKED; + feature_control = rdmsr(MSR_IA32_FEATURE_CONTROL); + if ((feature_control & required) != required) + wrmsr(MSR_IA32_FEATURE_CONTROL, feature_control | required); +} + +/* + * Initialize the control fields to the most basic settings possible. + */ +static inline void init_vmcs_control_fields(void) +{ + vmwrite(VIRTUAL_PROCESSOR_ID, 0); + vmwrite(POSTED_INTR_NV, 0); + + vmwrite(PIN_BASED_VM_EXEC_CONTROL, rdmsr(MSR_IA32_VMX_PINBASED_CTLS)); + vmwrite(CPU_BASED_VM_EXEC_CONTROL, rdmsr(MSR_IA32_VMX_PROCBASED_CTLS)); + vmwrite(EXCEPTION_BITMAP, 0); + vmwrite(PAGE_FAULT_ERROR_CODE_MASK, 0); + vmwrite(PAGE_FAULT_ERROR_CODE_MATCH, -1); /* Never match */ + vmwrite(CR3_TARGET_COUNT, 0); + vmwrite(VM_EXIT_CONTROLS, rdmsr(MSR_IA32_VMX_EXIT_CTLS) | + VM_EXIT_HOST_ADDR_SPACE_SIZE); /* 64-bit host */ + vmwrite(VM_EXIT_MSR_STORE_COUNT, 0); + vmwrite(VM_EXIT_MSR_LOAD_COUNT, 0); + vmwrite(VM_ENTRY_CONTROLS, rdmsr(MSR_IA32_VMX_ENTRY_CTLS) | + VM_ENTRY_IA32E_MODE); /* 64-bit guest */ + vmwrite(VM_ENTRY_MSR_LOAD_COUNT, 0); + vmwrite(VM_ENTRY_INTR_INFO_FIELD, 0); + vmwrite(TPR_THRESHOLD, 0); + vmwrite(SECONDARY_VM_EXEC_CONTROL, 0); + + vmwrite(CR0_GUEST_HOST_MASK, 0); + vmwrite(CR4_GUEST_HOST_MASK, 0); + vmwrite(CR0_READ_SHADOW, get_cr0()); + vmwrite(CR4_READ_SHADOW, get_cr4()); +} + +/* + * Initialize the host state fields based on the current host state, with + * the exception of HOST_RSP and HOST_RIP, which should be set by vmlaunch + * or vmresume. + */ +static inline void init_vmcs_host_state(void) +{ + uint32_t exit_controls = vmreadz(VM_EXIT_CONTROLS); + + vmwrite(HOST_ES_SELECTOR, get_es()); + vmwrite(HOST_CS_SELECTOR, get_cs()); + vmwrite(HOST_SS_SELECTOR, get_ss()); + vmwrite(HOST_DS_SELECTOR, get_ds()); + vmwrite(HOST_FS_SELECTOR, get_fs()); + vmwrite(HOST_GS_SELECTOR, get_gs()); + vmwrite(HOST_TR_SELECTOR, get_tr()); + + if (exit_controls & VM_EXIT_LOAD_IA32_PAT) + vmwrite(HOST_IA32_PAT, rdmsr(MSR_IA32_CR_PAT)); + if (exit_controls & VM_EXIT_LOAD_IA32_EFER) + vmwrite(HOST_IA32_EFER, rdmsr(MSR_EFER)); + if (exit_controls & VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL) + vmwrite(HOST_IA32_PERF_GLOBAL_CTRL, + rdmsr(MSR_CORE_PERF_GLOBAL_CTRL)); + + vmwrite(HOST_IA32_SYSENTER_CS, rdmsr(MSR_IA32_SYSENTER_CS)); + + vmwrite(HOST_CR0, get_cr0()); + vmwrite(HOST_CR3, get_cr3()); + vmwrite(HOST_CR4, get_cr4()); + vmwrite(HOST_FS_BASE, rdmsr(MSR_FS_BASE)); + vmwrite(HOST_GS_BASE, rdmsr(MSR_GS_BASE)); + vmwrite(HOST_TR_BASE, + get_desc64_base((struct desc64 *)(get_gdt_base() + get_tr()))); + vmwrite(HOST_GDTR_BASE, get_gdt_base()); + vmwrite(HOST_IDTR_BASE, get_idt_base()); + vmwrite(HOST_IA32_SYSENTER_ESP, rdmsr(MSR_IA32_SYSENTER_ESP)); + vmwrite(HOST_IA32_SYSENTER_EIP, rdmsr(MSR_IA32_SYSENTER_EIP)); +} + +/* + * Initialize the guest state fields essentially as a clone of + * the host state fields. Some host state fields have fixed + * values, and we set the corresponding guest state fields accordingly. + */ +static inline void init_vmcs_guest_state(void *rip, void *rsp) +{ + vmwrite(GUEST_ES_SELECTOR, vmreadz(HOST_ES_SELECTOR)); + vmwrite(GUEST_CS_SELECTOR, vmreadz(HOST_CS_SELECTOR)); + vmwrite(GUEST_SS_SELECTOR, vmreadz(HOST_SS_SELECTOR)); + vmwrite(GUEST_DS_SELECTOR, vmreadz(HOST_DS_SELECTOR)); + vmwrite(GUEST_FS_SELECTOR, vmreadz(HOST_FS_SELECTOR)); + vmwrite(GUEST_GS_SELECTOR, vmreadz(HOST_GS_SELECTOR)); + vmwrite(GUEST_LDTR_SELECTOR, 0); + vmwrite(GUEST_TR_SELECTOR, vmreadz(HOST_TR_SELECTOR)); + vmwrite(GUEST_INTR_STATUS, 0); + vmwrite(GUEST_PML_INDEX, 0); + + vmwrite(VMCS_LINK_POINTER, -1ll); + vmwrite(GUEST_IA32_DEBUGCTL, 0); + vmwrite(GUEST_IA32_PAT, vmreadz(HOST_IA32_PAT)); + vmwrite(GUEST_IA32_EFER, vmreadz(HOST_IA32_EFER)); + vmwrite(GUEST_IA32_PERF_GLOBAL_CTRL, + vmreadz(HOST_IA32_PERF_GLOBAL_CTRL)); + + vmwrite(GUEST_ES_LIMIT, -1); + vmwrite(GUEST_CS_LIMIT, -1); + vmwrite(GUEST_SS_LIMIT, -1); + vmwrite(GUEST_DS_LIMIT, -1); + vmwrite(GUEST_FS_LIMIT, -1); + vmwrite(GUEST_GS_LIMIT, -1); + vmwrite(GUEST_LDTR_LIMIT, -1); + vmwrite(GUEST_TR_LIMIT, 0x67); + vmwrite(GUEST_GDTR_LIMIT, 0xffff); + vmwrite(GUEST_IDTR_LIMIT, 0xffff); + vmwrite(GUEST_ES_AR_BYTES, + vmreadz(GUEST_ES_SELECTOR) == 0 ? 0x10000 : 0xc093); + vmwrite(GUEST_CS_AR_BYTES, 0xa09b); + vmwrite(GUEST_SS_AR_BYTES, 0xc093); + vmwrite(GUEST_DS_AR_BYTES, + vmreadz(GUEST_DS_SELECTOR) == 0 ? 0x10000 : 0xc093); + vmwrite(GUEST_FS_AR_BYTES, + vmreadz(GUEST_FS_SELECTOR) == 0 ? 0x10000 : 0xc093); + vmwrite(GUEST_GS_AR_BYTES, + vmreadz(GUEST_GS_SELECTOR) == 0 ? 0x10000 : 0xc093); + vmwrite(GUEST_LDTR_AR_BYTES, 0x10000); + vmwrite(GUEST_TR_AR_BYTES, 0x8b); + vmwrite(GUEST_INTERRUPTIBILITY_INFO, 0); + vmwrite(GUEST_ACTIVITY_STATE, 0); + vmwrite(GUEST_SYSENTER_CS, vmreadz(HOST_IA32_SYSENTER_CS)); + vmwrite(VMX_PREEMPTION_TIMER_VALUE, 0); + + vmwrite(GUEST_CR0, vmreadz(HOST_CR0)); + vmwrite(GUEST_CR3, vmreadz(HOST_CR3)); + vmwrite(GUEST_CR4, vmreadz(HOST_CR4)); + vmwrite(GUEST_ES_BASE, 0); + vmwrite(GUEST_CS_BASE, 0); + vmwrite(GUEST_SS_BASE, 0); + vmwrite(GUEST_DS_BASE, 0); + vmwrite(GUEST_FS_BASE, vmreadz(HOST_FS_BASE)); + vmwrite(GUEST_GS_BASE, vmreadz(HOST_GS_BASE)); + vmwrite(GUEST_LDTR_BASE, 0); + vmwrite(GUEST_TR_BASE, vmreadz(HOST_TR_BASE)); + vmwrite(GUEST_GDTR_BASE, vmreadz(HOST_GDTR_BASE)); + vmwrite(GUEST_IDTR_BASE, vmreadz(HOST_IDTR_BASE)); + vmwrite(GUEST_DR7, 0x400); + vmwrite(GUEST_RSP, (uint64_t)rsp); + vmwrite(GUEST_RIP, (uint64_t)rip); + vmwrite(GUEST_RFLAGS, 2); + vmwrite(GUEST_PENDING_DBG_EXCEPTIONS, 0); + vmwrite(GUEST_SYSENTER_ESP, vmreadz(HOST_IA32_SYSENTER_ESP)); + vmwrite(GUEST_SYSENTER_EIP, vmreadz(HOST_IA32_SYSENTER_EIP)); +} + +void prepare_vmcs(void *guest_rip, void *guest_rsp) +{ + init_vmcs_control_fields(); + init_vmcs_host_state(); + init_vmcs_guest_state(guest_rip, guest_rsp); +} diff --git a/tools/testing/selftests/kvm/vmx_tsc_adjust_test.c b/tools/testing/selftests/kvm/vmx_tsc_adjust_test.c new file mode 100644 index 000000000000..8f7f62093add --- /dev/null +++ b/tools/testing/selftests/kvm/vmx_tsc_adjust_test.c @@ -0,0 +1,231 @@ +/* + * gtests/tests/vmx_tsc_adjust_test.c + * + * Copyright (C) 2018, Google LLC. + * + * This work is licensed under the terms of the GNU GPL, version 2. + * + * + * IA32_TSC_ADJUST test + * + * According to the SDM, "if an execution of WRMSR to the + * IA32_TIME_STAMP_COUNTER MSR adds (or subtracts) value X from the TSC, + * the logical processor also adds (or subtracts) value X from the + * IA32_TSC_ADJUST MSR. + * + * Note that when L1 doesn't intercept writes to IA32_TSC, a + * WRMSR(IA32_TSC) from L2 sets L1's TSC value, not L2's perceived TSC + * value. + * + * This test verifies that this unusual case is handled correctly. + */ + +#include "test_util.h" +#include "kvm_util.h" +#include "x86.h" +#include "vmx.h" + +#include <string.h> +#include <sys/ioctl.h> + +#ifndef MSR_IA32_TSC_ADJUST +#define MSR_IA32_TSC_ADJUST 0x3b +#endif + +#define PAGE_SIZE 4096 +#define VCPU_ID 5 + +#define TSC_ADJUST_VALUE (1ll << 32) +#define TSC_OFFSET_VALUE -(1ll << 48) + +enum { + PORT_ABORT = 0x1000, + PORT_REPORT, + PORT_DONE, +}; + +struct vmx_page { + vm_vaddr_t virt; + vm_paddr_t phys; +}; + +enum { + VMXON_PAGE = 0, + VMCS_PAGE, + MSR_BITMAP_PAGE, + + NUM_VMX_PAGES, +}; + +struct kvm_single_msr { + struct kvm_msrs header; + struct kvm_msr_entry entry; +} __attribute__((packed)); + +/* The virtual machine object. */ +static struct kvm_vm *vm; + +/* Array of vmx_page descriptors that is shared with the guest. */ +struct vmx_page *vmx_pages; + +#define exit_to_l0(_port, _arg) do_exit_to_l0(_port, (unsigned long) (_arg)) +static void do_exit_to_l0(uint16_t port, unsigned long arg) +{ + __asm__ __volatile__("in %[port], %%al" + : + : [port]"d"(port), "D"(arg) + : "rax"); +} + + +#define GUEST_ASSERT(_condition) do { \ + if (!(_condition)) \ + exit_to_l0(PORT_ABORT, "Failed guest assert: " #_condition); \ +} while (0) + +static void check_ia32_tsc_adjust(int64_t max) +{ + int64_t adjust; + + adjust = rdmsr(MSR_IA32_TSC_ADJUST); + exit_to_l0(PORT_REPORT, adjust); + GUEST_ASSERT(adjust <= max); +} + +static void l2_guest_code(void) +{ + uint64_t l1_tsc = rdtsc() - TSC_OFFSET_VALUE; + + wrmsr(MSR_IA32_TSC, l1_tsc - TSC_ADJUST_VALUE); + check_ia32_tsc_adjust(-2 * TSC_ADJUST_VALUE); + + /* Exit to L1 */ + __asm__ __volatile__("vmcall"); +} + +static void l1_guest_code(struct vmx_page *vmx_pages) +{ +#define L2_GUEST_STACK_SIZE 64 + unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; + uint32_t control; + uintptr_t save_cr3; + + GUEST_ASSERT(rdtsc() < TSC_ADJUST_VALUE); + wrmsr(MSR_IA32_TSC, rdtsc() - TSC_ADJUST_VALUE); + check_ia32_tsc_adjust(-1 * TSC_ADJUST_VALUE); + + prepare_for_vmx_operation(); + + /* Enter VMX root operation. */ + *(uint32_t *)vmx_pages[VMXON_PAGE].virt = vmcs_revision(); + GUEST_ASSERT(!vmxon(vmx_pages[VMXON_PAGE].phys)); + + /* Load a VMCS. */ + *(uint32_t *)vmx_pages[VMCS_PAGE].virt = vmcs_revision(); + GUEST_ASSERT(!vmclear(vmx_pages[VMCS_PAGE].phys)); + GUEST_ASSERT(!vmptrld(vmx_pages[VMCS_PAGE].phys)); + + /* Prepare the VMCS for L2 execution. */ + prepare_vmcs(l2_guest_code, &l2_guest_stack[L2_GUEST_STACK_SIZE]); + control = vmreadz(CPU_BASED_VM_EXEC_CONTROL); + control |= CPU_BASED_USE_MSR_BITMAPS | CPU_BASED_USE_TSC_OFFSETING; + vmwrite(CPU_BASED_VM_EXEC_CONTROL, control); + vmwrite(MSR_BITMAP, vmx_pages[MSR_BITMAP_PAGE].phys); + vmwrite(TSC_OFFSET, TSC_OFFSET_VALUE); + + /* Jump into L2. First, test failure to load guest CR3. */ + save_cr3 = vmreadz(GUEST_CR3); + vmwrite(GUEST_CR3, -1ull); + GUEST_ASSERT(!vmlaunch()); + GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == + (EXIT_REASON_FAILED_VMENTRY | EXIT_REASON_INVALID_STATE)); + check_ia32_tsc_adjust(-1 * TSC_ADJUST_VALUE); + vmwrite(GUEST_CR3, save_cr3); + + GUEST_ASSERT(!vmlaunch()); + GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL); + + check_ia32_tsc_adjust(-2 * TSC_ADJUST_VALUE); + + exit_to_l0(PORT_DONE, 0); +} + +static void allocate_vmx_page(struct vmx_page *page) +{ + vm_vaddr_t virt; + + virt = vm_vaddr_alloc(vm, PAGE_SIZE, 0, 0, 0); + memset(addr_gva2hva(vm, virt), 0, PAGE_SIZE); + + page->virt = virt; + page->phys = addr_gva2gpa(vm, virt); +} + +static vm_vaddr_t allocate_vmx_pages(void) +{ + vm_vaddr_t vmx_pages_vaddr; + int i; + + vmx_pages_vaddr = vm_vaddr_alloc( + vm, sizeof(struct vmx_page) * NUM_VMX_PAGES, 0, 0, 0); + + vmx_pages = (void *) addr_gva2hva(vm, vmx_pages_vaddr); + + for (i = 0; i < NUM_VMX_PAGES; i++) + allocate_vmx_page(&vmx_pages[i]); + + return vmx_pages_vaddr; +} + +void report(int64_t val) +{ + printf("IA32_TSC_ADJUST is %ld (%lld * TSC_ADJUST_VALUE + %lld).\n", + val, val / TSC_ADJUST_VALUE, val % TSC_ADJUST_VALUE); +} + +int main(int argc, char *argv[]) +{ + vm_vaddr_t vmx_pages_vaddr; + struct kvm_cpuid_entry2 *entry = kvm_get_supported_cpuid_entry(1); + + if (!(entry->ecx & CPUID_VMX)) { + printf("nested VMX not enabled, skipping test"); + return 0; + } + + vm = vm_create_default_vmx(VCPU_ID, (void *) l1_guest_code); + + /* Allocate VMX pages and shared descriptors (vmx_pages). */ + vmx_pages_vaddr = allocate_vmx_pages(); + vcpu_args_set(vm, VCPU_ID, 1, vmx_pages_vaddr); + + for (;;) { + volatile struct kvm_run *run = vcpu_state(vm, VCPU_ID); + struct kvm_regs regs; + + vcpu_run(vm, VCPU_ID); + TEST_ASSERT(run->exit_reason == KVM_EXIT_IO, + "Got exit_reason other than KVM_EXIT_IO: %u (%s),\n", + run->exit_reason, + exit_reason_str(run->exit_reason)); + + vcpu_regs_get(vm, VCPU_ID, ®s); + + switch (run->io.port) { + case PORT_ABORT: + TEST_ASSERT(false, "%s", (const char *) regs.rdi); + /* NOT REACHED */ + case PORT_REPORT: + report(regs.rdi); + break; + case PORT_DONE: + goto done; + default: + TEST_ASSERT(false, "Unknown port 0x%x.", run->io.port); + } + } + + kvm_vm_free(vm); +done: + return 0; +} |