From faeb7833eee0d6afe0ecb6bdfa6042556c2c352e Mon Sep 17 00:00:00 2001 From: Roman Kagan Date: Thu, 1 Feb 2018 16:48:32 +0300 Subject: kvm: x86: hyperv: guest->host event signaling via eventfd MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In Hyper-V, the fast guest->host notification mechanism is the SIGNAL_EVENT hypercall, with a single parameter of the connection ID to signal. Currently this hypercall incurs a user exit and requires the userspace to decode the parameters and trigger the notification of the potentially different I/O context. To avoid the costly user exit, process this hypercall and signal the corresponding eventfd in KVM, similar to ioeventfd. The association between the connection id and the eventfd is established via the newly introduced KVM_HYPERV_EVENTFD ioctl, and maintained in an (srcu-protected) IDR. Signed-off-by: Roman Kagan Reviewed-by: David Hildenbrand [asm/hyperv.h changes approved by KY Srinivasan. - Radim] Signed-off-by: Radim Krčmář --- include/uapi/linux/kvm.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 7b26d4b0b052..2d2d926113ba 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -936,6 +936,7 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_PPC_GET_CPU_CHAR 151 #define KVM_CAP_S390_BPB 152 #define KVM_CAP_GET_MSR_FEATURES 153 +#define KVM_CAP_HYPERV_EVENTFD 154 #ifdef KVM_CAP_IRQ_ROUTING @@ -1375,6 +1376,10 @@ struct kvm_enc_region { #define KVM_MEMORY_ENCRYPT_REG_REGION _IOR(KVMIO, 0xbb, struct kvm_enc_region) #define KVM_MEMORY_ENCRYPT_UNREG_REGION _IOR(KVMIO, 0xbc, struct kvm_enc_region) +/* Available with KVM_CAP_HYPERV_EVENTFD */ +#define KVM_HYPERV_EVENTFD _IOW(KVMIO, 0xbd, struct kvm_hyperv_eventfd) + + /* Secure Encrypted Virtualization command */ enum sev_cmd_id { /* Guest initialization commands */ @@ -1515,4 +1520,14 @@ struct kvm_assigned_msix_entry { #define KVM_ARM_DEV_EL1_PTIMER (1 << 1) #define KVM_ARM_DEV_PMU (1 << 2) +struct kvm_hyperv_eventfd { + __u32 conn_id; + __s32 fd; + __u32 flags; + __u32 padding[3]; +}; + +#define KVM_HYPERV_CONN_ID_MASK 0x00ffffff +#define KVM_HYPERV_EVENTFD_DEASSIGN (1 << 0) + #endif /* __LINUX_KVM_H */ -- cgit v1.2.3 From 7b7e39522a61f402d41dd9a67f3fa2133ef9d4e8 Mon Sep 17 00:00:00 2001 From: Ken Hofsass Date: Wed, 31 Jan 2018 16:03:35 -0800 Subject: KVM: x86: add SYNC_REGS_SIZE_BYTES #define. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace hardcoded padding size value for struct kvm_sync_regs with #define SYNC_REGS_SIZE_BYTES. Also update the value specified in api.txt from outdated hardcoded value to SYNC_REGS_SIZE_BYTES. Signed-off-by: Ken Hofsass Reviewed-by: David Hildenbrand Acked-by: Christian Borntraeger Signed-off-by: Radim Krčmář --- Documentation/virtual/kvm/api.txt | 2 +- include/uapi/linux/kvm.h | 6 +++++- 2 files changed, 6 insertions(+), 2 deletions(-) (limited to 'include/uapi') diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index db992e036bdf..55867a2a460c 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -3905,7 +3905,7 @@ in userspace. __u64 kvm_dirty_regs; union { struct kvm_sync_regs regs; - char padding[1024]; + char padding[SYNC_REGS_SIZE_BYTES]; } s; If KVM_CAP_SYNC_REGS is defined, these fields allow userspace to access diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 2d2d926113ba..088c2c92db55 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -396,6 +396,10 @@ struct kvm_run { char padding[256]; }; + /* 2048 is the size of the char array used to bound/pad the size + * of the union that holds sync regs. + */ + #define SYNC_REGS_SIZE_BYTES 2048 /* * shared registers between kvm and userspace. * kvm_valid_regs specifies the register classes set by the host @@ -407,7 +411,7 @@ struct kvm_run { __u64 kvm_dirty_regs; union { struct kvm_sync_regs regs; - char padding[2048]; + char padding[SYNC_REGS_SIZE_BYTES]; } s; }; -- cgit v1.2.3 From 4d5422cea3b61f158d58924cbb43feada456ba5c Mon Sep 17 00:00:00 2001 From: Wanpeng Li Date: Mon, 12 Mar 2018 04:53:02 -0700 Subject: KVM: X86: Provide a capability to disable MWAIT intercepts MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Allowing a guest to execute MWAIT without interception enables a guest to put a (physical) CPU into a power saving state, where it takes longer to return from than what may be desired by the host. Don't give a guest that power over a host by default. (Especially, since nothing prevents a guest from using MWAIT even when it is not advertised via CPUID.) Cc: Paolo Bonzini Cc: Radim Krčmář Cc: Jan H. Schönherr Signed-off-by: Wanpeng Li Signed-off-by: Paolo Bonzini --- Documentation/virtual/kvm/api.txt | 27 ++++++++++++++++++--------- arch/x86/include/asm/kvm_host.h | 2 ++ arch/x86/kvm/svm.c | 2 +- arch/x86/kvm/vmx.c | 9 +++++---- arch/x86/kvm/x86.c | 24 ++++++++++++++++++++---- arch/x86/kvm/x86.h | 10 +++++----- include/uapi/linux/kvm.h | 2 +- tools/include/uapi/linux/kvm.h | 2 +- 8 files changed, 53 insertions(+), 25 deletions(-) (limited to 'include/uapi') diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index 786c1b4ecb59..744a202cca31 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -4358,6 +4358,24 @@ enables QEMU to build error log and branch to guest kernel registered machine check handling routine. Without this capability KVM will branch to guests' 0x200 interrupt vector. +7.13 KVM_CAP_X86_DISABLE_EXITS + +Architectures: x86 +Parameters: args[0] defines which exits are disabled +Returns: 0 on success, -EINVAL when args[0] contains invalid exits + +Valid bits in args[0] are + +#define KVM_X86_DISABLE_EXITS_MWAIT (1 << 0) + +Enabling this capability on a VM provides userspace with a way to no +longer intercept some instructions for improved latency in some +workloads, and is suggested when vCPUs are associated to dedicated +physical CPUs. More bits can be added in the future; userspace can +just pass the KVM_CHECK_EXTENSION result to KVM_ENABLE_CAP to disable +all such vmexits. + + 8. Other capabilities. ---------------------- @@ -4470,15 +4488,6 @@ reserved. Both registers and addresses are 64-bits wide. It will be possible to run 64-bit or 32-bit guest code. -8.8 KVM_CAP_X86_GUEST_MWAIT - -Architectures: x86 - -This capability indicates that guest using memory monotoring instructions -(MWAIT/MWAITX) to stop the virtual CPU will not cause a VM exit. As such time -spent while virtual CPU is halted in this way will then be accounted for as -guest running time on the host (as opposed to e.g. HLT). - 8.9 KVM_CAP_ARM_USER_IRQ Architectures: arm, arm64 diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 480a75b22b69..a85b640aee1e 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -811,6 +811,8 @@ struct kvm_arch { gpa_t wall_clock; + bool mwait_in_guest; + bool ept_identity_pagetable_done; gpa_t ept_identity_map_addr; diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index fa1c4977e1c2..f6578cee6bb6 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -1398,7 +1398,7 @@ static void init_vmcb(struct vcpu_svm *svm) set_intercept(svm, INTERCEPT_XSETBV); set_intercept(svm, INTERCEPT_RSM); - if (!kvm_mwait_in_guest()) { + if (!kvm_mwait_in_guest(svm->vcpu.kvm)) { set_intercept(svm, INTERCEPT_MONITOR); set_intercept(svm, INTERCEPT_MWAIT); } diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index b4d8da6c62c8..7cef183993ba 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -3746,13 +3746,11 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf) CPU_BASED_UNCOND_IO_EXITING | CPU_BASED_MOV_DR_EXITING | CPU_BASED_USE_TSC_OFFSETING | + CPU_BASED_MWAIT_EXITING | + CPU_BASED_MONITOR_EXITING | CPU_BASED_INVLPG_EXITING | CPU_BASED_RDPMC_EXITING; - if (!kvm_mwait_in_guest()) - min |= CPU_BASED_MWAIT_EXITING | - CPU_BASED_MONITOR_EXITING; - opt = CPU_BASED_TPR_SHADOW | CPU_BASED_USE_MSR_BITMAPS | CPU_BASED_ACTIVATE_SECONDARY_CONTROLS; @@ -5544,6 +5542,9 @@ static u32 vmx_exec_control(struct vcpu_vmx *vmx) exec_control |= CPU_BASED_CR3_STORE_EXITING | CPU_BASED_CR3_LOAD_EXITING | CPU_BASED_INVLPG_EXITING; + if (kvm_mwait_in_guest(vmx->vcpu.kvm)) + exec_control &= ~(CPU_BASED_MWAIT_EXITING | + CPU_BASED_MONITOR_EXITING); return exec_control; } diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 9e1496cb2345..db95d4d6f57b 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -2813,9 +2813,15 @@ out: return r; } +static inline bool kvm_can_mwait_in_guest(void) +{ + return boot_cpu_has(X86_FEATURE_MWAIT) && + !boot_cpu_has_bug(X86_BUG_MONITOR); +} + int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) { - int r; + int r = 0; switch (ext) { case KVM_CAP_IRQCHIP: @@ -2871,8 +2877,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_ADJUST_CLOCK: r = KVM_CLOCK_TSC_STABLE; break; - case KVM_CAP_X86_GUEST_MWAIT: - r = kvm_mwait_in_guest(); + case KVM_CAP_X86_DISABLE_EXITS: + if(kvm_can_mwait_in_guest()) + r |= KVM_X86_DISABLE_EXITS_MWAIT; break; case KVM_CAP_X86_SMM: /* SMBASE is usually relocated above 1M on modern chipsets, @@ -2913,7 +2920,6 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) r = KVM_X2APIC_API_VALID_FLAGS; break; default: - r = 0; break; } return r; @@ -4218,6 +4224,16 @@ split_irqchip_unlock: r = 0; break; + case KVM_CAP_X86_DISABLE_EXITS: + r = -EINVAL; + if (cap->args[0] & ~KVM_X86_DISABLE_VALID_EXITS) + break; + + if ((cap->args[0] & KVM_X86_DISABLE_EXITS_MWAIT) && + kvm_can_mwait_in_guest()) + kvm->arch.mwait_in_guest = true; + r = 0; + break; default: r = -EINVAL; break; diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h index 18e2e0a91edc..026b239bf058 100644 --- a/arch/x86/kvm/x86.h +++ b/arch/x86/kvm/x86.h @@ -2,8 +2,6 @@ #ifndef ARCH_X86_KVM_X86_H #define ARCH_X86_KVM_X86_H -#include -#include #include #include #include "kvm_cache_regs.h" @@ -266,10 +264,12 @@ static inline u64 nsec_to_cycles(struct kvm_vcpu *vcpu, u64 nsec) __rem; \ }) -static inline bool kvm_mwait_in_guest(void) +#define KVM_X86_DISABLE_EXITS_MWAIT (1 << 0) +#define KVM_X86_DISABLE_VALID_EXITS (KVM_X86_DISABLE_EXITS_MWAIT) + +static inline bool kvm_mwait_in_guest(struct kvm *kvm) { - return boot_cpu_has(X86_FEATURE_MWAIT) && - !boot_cpu_has_bug(X86_BUG_MONITOR); + return kvm->arch.mwait_in_guest; } #endif diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 088c2c92db55..1065006c9bf5 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -929,7 +929,7 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_S390_GS 140 #define KVM_CAP_S390_AIS 141 #define KVM_CAP_SPAPR_TCE_VFIO 142 -#define KVM_CAP_X86_GUEST_MWAIT 143 +#define KVM_CAP_X86_DISABLE_EXITS 143 #define KVM_CAP_ARM_USER_IRQ 144 #define KVM_CAP_S390_CMMA_MIGRATION 145 #define KVM_CAP_PPC_FWNMI 146 diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h index 0fb5ef939732..b13c257261af 100644 --- a/tools/include/uapi/linux/kvm.h +++ b/tools/include/uapi/linux/kvm.h @@ -924,7 +924,7 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_S390_GS 140 #define KVM_CAP_S390_AIS 141 #define KVM_CAP_SPAPR_TCE_VFIO 142 -#define KVM_CAP_X86_GUEST_MWAIT 143 +#define KVM_CAP_X86_DISABLE_EXITS 143 #define KVM_CAP_ARM_USER_IRQ 144 #define KVM_CAP_S390_CMMA_MIGRATION 145 #define KVM_CAP_PPC_FWNMI 146 -- cgit v1.2.3