diff options
Diffstat (limited to 'arch/x86/kvm/vmx/vmx.c')
-rw-r--r-- | arch/x86/kvm/vmx/vmx.c | 239 |
1 files changed, 66 insertions, 173 deletions
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 7fddb0abbeaa..aa157fe5b7b3 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -75,6 +75,8 @@ #include "vmx_onhyperv.h" #include "posted_intr.h" +#include "mmu/spte.h" + MODULE_AUTHOR("Qumranet"); MODULE_DESCRIPTION("KVM support for VMX (Intel VT-x) extensions"); MODULE_LICENSE("GPL"); @@ -113,8 +115,6 @@ static bool __read_mostly fasteoi = 1; module_param(fasteoi, bool, 0444); module_param(enable_apicv, bool, 0444); - -bool __read_mostly enable_ipiv = true; module_param(enable_ipiv, bool, 0444); module_param(enable_device_posted_irqs, bool, 0444); @@ -168,31 +168,6 @@ module_param(allow_smaller_maxphyaddr, bool, S_IRUGO); RTIT_STATUS_BYTECNT)) /* - * List of MSRs that can be directly passed to the guest. - * In addition to these x2apic, PT and LBR MSRs are handled specially. - */ -static u32 vmx_possible_passthrough_msrs[MAX_POSSIBLE_PASSTHROUGH_MSRS] = { - MSR_IA32_SPEC_CTRL, - MSR_IA32_PRED_CMD, - MSR_IA32_FLUSH_CMD, - MSR_IA32_TSC, -#ifdef CONFIG_X86_64 - MSR_FS_BASE, - MSR_GS_BASE, - MSR_KERNEL_GS_BASE, - MSR_IA32_XFD, - MSR_IA32_XFD_ERR, -#endif - MSR_IA32_SYSENTER_CS, - MSR_IA32_SYSENTER_ESP, - MSR_IA32_SYSENTER_EIP, - MSR_CORE_C1_RES, - MSR_CORE_C3_RESIDENCY, - MSR_CORE_C6_RESIDENCY, - MSR_CORE_C7_RESIDENCY, -}; - -/* * These 2 parameters are used to config the controls for Pause-Loop Exiting: * ple_gap: upper bound on the amount of time between two successive * executions of PAUSE in a loop. Also indicate if ple enabled. @@ -674,40 +649,6 @@ static inline bool cpu_need_virtualize_apic_accesses(struct kvm_vcpu *vcpu) return flexpriority_enabled && lapic_in_kernel(vcpu); } -static int vmx_get_passthrough_msr_slot(u32 msr) -{ - int i; - - switch (msr) { - case 0x800 ... 0x8ff: - /* x2APIC MSRs. These are handled in vmx_update_msr_bitmap_x2apic() */ - return -ENOENT; - case MSR_IA32_RTIT_STATUS: - case MSR_IA32_RTIT_OUTPUT_BASE: - case MSR_IA32_RTIT_OUTPUT_MASK: - case MSR_IA32_RTIT_CR3_MATCH: - case MSR_IA32_RTIT_ADDR0_A ... MSR_IA32_RTIT_ADDR3_B: - /* PT MSRs. These are handled in pt_update_intercept_for_msr() */ - case MSR_LBR_SELECT: - case MSR_LBR_TOS: - case MSR_LBR_INFO_0 ... MSR_LBR_INFO_0 + 31: - case MSR_LBR_NHM_FROM ... MSR_LBR_NHM_FROM + 31: - case MSR_LBR_NHM_TO ... MSR_LBR_NHM_TO + 31: - case MSR_LBR_CORE_FROM ... MSR_LBR_CORE_FROM + 8: - case MSR_LBR_CORE_TO ... MSR_LBR_CORE_TO + 8: - /* LBR MSRs. These are handled in vmx_update_intercept_for_lbr_msrs() */ - return -ENOENT; - } - - for (i = 0; i < ARRAY_SIZE(vmx_possible_passthrough_msrs); i++) { - if (vmx_possible_passthrough_msrs[i] == msr) - return i; - } - - WARN(1, "Invalid MSR %x, please adapt vmx_possible_passthrough_msrs[]", msr); - return -ENOENT; -} - struct vmx_uret_msr *vmx_find_uret_msr(struct vcpu_vmx *vmx, u32 msr) { int i; @@ -963,6 +904,10 @@ unsigned int __vmx_vcpu_run_flags(struct vcpu_vmx *vmx) if (!msr_write_intercepted(vmx, MSR_IA32_SPEC_CTRL)) flags |= VMX_RUN_SAVE_SPEC_CTRL; + if (static_branch_unlikely(&cpu_buf_vm_clear) && + kvm_vcpu_can_access_host_mmio(&vmx->vcpu)) + flags |= VMX_RUN_CLEAR_CPU_BUFFERS_FOR_MMIO; + return flags; } @@ -4022,76 +3967,29 @@ static void vmx_msr_bitmap_l01_changed(struct vcpu_vmx *vmx) vmx->nested.force_msr_bitmap_recalc = true; } -void vmx_disable_intercept_for_msr(struct kvm_vcpu *vcpu, u32 msr, int type) +void vmx_set_intercept_for_msr(struct kvm_vcpu *vcpu, u32 msr, int type, bool set) { struct vcpu_vmx *vmx = to_vmx(vcpu); unsigned long *msr_bitmap = vmx->vmcs01.msr_bitmap; - int idx; if (!cpu_has_vmx_msr_bitmap()) return; vmx_msr_bitmap_l01_changed(vmx); - /* - * Mark the desired intercept state in shadow bitmap, this is needed - * for resync when the MSR filters change. - */ - idx = vmx_get_passthrough_msr_slot(msr); - if (idx >= 0) { - if (type & MSR_TYPE_R) - clear_bit(idx, vmx->shadow_msr_intercept.read); - if (type & MSR_TYPE_W) - clear_bit(idx, vmx->shadow_msr_intercept.write); - } - - if ((type & MSR_TYPE_R) && - !kvm_msr_allowed(vcpu, msr, KVM_MSR_FILTER_READ)) { - vmx_set_msr_bitmap_read(msr_bitmap, msr); - type &= ~MSR_TYPE_R; - } - - if ((type & MSR_TYPE_W) && - !kvm_msr_allowed(vcpu, msr, KVM_MSR_FILTER_WRITE)) { - vmx_set_msr_bitmap_write(msr_bitmap, msr); - type &= ~MSR_TYPE_W; + if (type & MSR_TYPE_R) { + if (!set && kvm_msr_allowed(vcpu, msr, KVM_MSR_FILTER_READ)) + vmx_clear_msr_bitmap_read(msr_bitmap, msr); + else + vmx_set_msr_bitmap_read(msr_bitmap, msr); } - if (type & MSR_TYPE_R) - vmx_clear_msr_bitmap_read(msr_bitmap, msr); - - if (type & MSR_TYPE_W) - vmx_clear_msr_bitmap_write(msr_bitmap, msr); -} - -void vmx_enable_intercept_for_msr(struct kvm_vcpu *vcpu, u32 msr, int type) -{ - struct vcpu_vmx *vmx = to_vmx(vcpu); - unsigned long *msr_bitmap = vmx->vmcs01.msr_bitmap; - int idx; - - if (!cpu_has_vmx_msr_bitmap()) - return; - - vmx_msr_bitmap_l01_changed(vmx); - - /* - * Mark the desired intercept state in shadow bitmap, this is needed - * for resync when the MSR filter changes. - */ - idx = vmx_get_passthrough_msr_slot(msr); - if (idx >= 0) { - if (type & MSR_TYPE_R) - set_bit(idx, vmx->shadow_msr_intercept.read); - if (type & MSR_TYPE_W) - set_bit(idx, vmx->shadow_msr_intercept.write); + if (type & MSR_TYPE_W) { + if (!set && kvm_msr_allowed(vcpu, msr, KVM_MSR_FILTER_WRITE)) + vmx_clear_msr_bitmap_write(msr_bitmap, msr); + else + vmx_set_msr_bitmap_write(msr_bitmap, msr); } - - if (type & MSR_TYPE_R) - vmx_set_msr_bitmap_read(msr_bitmap, msr); - - if (type & MSR_TYPE_W) - vmx_set_msr_bitmap_write(msr_bitmap, msr); } static void vmx_update_msr_bitmap_x2apic(struct kvm_vcpu *vcpu) @@ -4170,35 +4068,57 @@ void pt_update_intercept_for_msr(struct kvm_vcpu *vcpu) } } -void vmx_msr_filter_changed(struct kvm_vcpu *vcpu) +void vmx_recalc_msr_intercepts(struct kvm_vcpu *vcpu) { - struct vcpu_vmx *vmx = to_vmx(vcpu); - u32 i; - if (!cpu_has_vmx_msr_bitmap()) return; - /* - * Redo intercept permissions for MSRs that KVM is passing through to - * the guest. Disabling interception will check the new MSR filter and - * ensure that KVM enables interception if usersepace wants to filter - * the MSR. MSRs that KVM is already intercepting don't need to be - * refreshed since KVM is going to intercept them regardless of what - * userspace wants. - */ - for (i = 0; i < ARRAY_SIZE(vmx_possible_passthrough_msrs); i++) { - u32 msr = vmx_possible_passthrough_msrs[i]; - - if (!test_bit(i, vmx->shadow_msr_intercept.read)) - vmx_disable_intercept_for_msr(vcpu, msr, MSR_TYPE_R); - - if (!test_bit(i, vmx->shadow_msr_intercept.write)) - vmx_disable_intercept_for_msr(vcpu, msr, MSR_TYPE_W); + vmx_disable_intercept_for_msr(vcpu, MSR_IA32_TSC, MSR_TYPE_R); +#ifdef CONFIG_X86_64 + vmx_disable_intercept_for_msr(vcpu, MSR_FS_BASE, MSR_TYPE_RW); + vmx_disable_intercept_for_msr(vcpu, MSR_GS_BASE, MSR_TYPE_RW); + vmx_disable_intercept_for_msr(vcpu, MSR_KERNEL_GS_BASE, MSR_TYPE_RW); +#endif + vmx_disable_intercept_for_msr(vcpu, MSR_IA32_SYSENTER_CS, MSR_TYPE_RW); + vmx_disable_intercept_for_msr(vcpu, MSR_IA32_SYSENTER_ESP, MSR_TYPE_RW); + vmx_disable_intercept_for_msr(vcpu, MSR_IA32_SYSENTER_EIP, MSR_TYPE_RW); + if (kvm_cstate_in_guest(vcpu->kvm)) { + vmx_disable_intercept_for_msr(vcpu, MSR_CORE_C1_RES, MSR_TYPE_R); + vmx_disable_intercept_for_msr(vcpu, MSR_CORE_C3_RESIDENCY, MSR_TYPE_R); + vmx_disable_intercept_for_msr(vcpu, MSR_CORE_C6_RESIDENCY, MSR_TYPE_R); + vmx_disable_intercept_for_msr(vcpu, MSR_CORE_C7_RESIDENCY, MSR_TYPE_R); + } + if (kvm_aperfmperf_in_guest(vcpu->kvm)) { + vmx_disable_intercept_for_msr(vcpu, MSR_IA32_APERF, MSR_TYPE_R); + vmx_disable_intercept_for_msr(vcpu, MSR_IA32_MPERF, MSR_TYPE_R); } /* PT MSRs can be passed through iff PT is exposed to the guest. */ if (vmx_pt_mode_is_host_guest()) pt_update_intercept_for_msr(vcpu); + + if (vcpu->arch.xfd_no_write_intercept) + vmx_disable_intercept_for_msr(vcpu, MSR_IA32_XFD, MSR_TYPE_RW); + + vmx_set_intercept_for_msr(vcpu, MSR_IA32_SPEC_CTRL, MSR_TYPE_RW, + !to_vmx(vcpu)->spec_ctrl); + + if (kvm_cpu_cap_has(X86_FEATURE_XFD)) + vmx_set_intercept_for_msr(vcpu, MSR_IA32_XFD_ERR, MSR_TYPE_R, + !guest_cpu_cap_has(vcpu, X86_FEATURE_XFD)); + + if (cpu_feature_enabled(X86_FEATURE_IBPB)) + vmx_set_intercept_for_msr(vcpu, MSR_IA32_PRED_CMD, MSR_TYPE_W, + !guest_has_pred_cmd_msr(vcpu)); + + if (cpu_feature_enabled(X86_FEATURE_FLUSH_L1D)) + vmx_set_intercept_for_msr(vcpu, MSR_IA32_FLUSH_CMD, MSR_TYPE_W, + !guest_cpu_cap_has(vcpu, X86_FEATURE_FLUSH_L1D)); + + /* + * x2APIC and LBR MSR intercepts are modified on-demand and cannot be + * filtered by userspace. + */ } static int vmx_deliver_nested_posted_interrupt(struct kvm_vcpu *vcpu, @@ -7294,7 +7214,7 @@ static noinstr void vmx_vcpu_enter_exit(struct kvm_vcpu *vcpu, if (static_branch_unlikely(&vmx_l1d_should_flush)) vmx_l1d_flush(vcpu); else if (static_branch_unlikely(&cpu_buf_vm_clear) && - kvm_arch_has_assigned_device(vcpu->kvm)) + (flags & VMX_RUN_CLEAR_CPU_BUFFERS_FOR_MMIO)) x86_clear_cpu_buffers(); vmx_disable_fb_clear(vmx); @@ -7554,26 +7474,6 @@ int vmx_vcpu_create(struct kvm_vcpu *vcpu) evmcs->hv_enlightenments_control.msr_bitmap = 1; } - /* The MSR bitmap starts with all ones */ - bitmap_fill(vmx->shadow_msr_intercept.read, MAX_POSSIBLE_PASSTHROUGH_MSRS); - bitmap_fill(vmx->shadow_msr_intercept.write, MAX_POSSIBLE_PASSTHROUGH_MSRS); - - vmx_disable_intercept_for_msr(vcpu, MSR_IA32_TSC, MSR_TYPE_R); -#ifdef CONFIG_X86_64 - vmx_disable_intercept_for_msr(vcpu, MSR_FS_BASE, MSR_TYPE_RW); - vmx_disable_intercept_for_msr(vcpu, MSR_GS_BASE, MSR_TYPE_RW); - vmx_disable_intercept_for_msr(vcpu, MSR_KERNEL_GS_BASE, MSR_TYPE_RW); -#endif - vmx_disable_intercept_for_msr(vcpu, MSR_IA32_SYSENTER_CS, MSR_TYPE_RW); - vmx_disable_intercept_for_msr(vcpu, MSR_IA32_SYSENTER_ESP, MSR_TYPE_RW); - vmx_disable_intercept_for_msr(vcpu, MSR_IA32_SYSENTER_EIP, MSR_TYPE_RW); - if (kvm_cstate_in_guest(vcpu->kvm)) { - vmx_disable_intercept_for_msr(vcpu, MSR_CORE_C1_RES, MSR_TYPE_R); - vmx_disable_intercept_for_msr(vcpu, MSR_CORE_C3_RESIDENCY, MSR_TYPE_R); - vmx_disable_intercept_for_msr(vcpu, MSR_CORE_C6_RESIDENCY, MSR_TYPE_R); - vmx_disable_intercept_for_msr(vcpu, MSR_CORE_C7_RESIDENCY, MSR_TYPE_R); - } - vmx->loaded_vmcs = &vmx->vmcs01; if (cpu_need_virtualize_apic_accesses(vcpu)) { @@ -7623,7 +7523,7 @@ free_vpid: int vmx_vm_init(struct kvm *kvm) { if (!ple_gap) - kvm->arch.pause_in_guest = true; + kvm_disable_exits(kvm, KVM_X86_DISABLE_EXITS_PAUSE); if (boot_cpu_has(X86_BUG_L1TF) && enable_ept) { switch (l1tf_mitigation) { @@ -7860,18 +7760,6 @@ void vmx_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu) } } - if (kvm_cpu_cap_has(X86_FEATURE_XFD)) - vmx_set_intercept_for_msr(vcpu, MSR_IA32_XFD_ERR, MSR_TYPE_R, - !guest_cpu_cap_has(vcpu, X86_FEATURE_XFD)); - - if (boot_cpu_has(X86_FEATURE_IBPB)) - vmx_set_intercept_for_msr(vcpu, MSR_IA32_PRED_CMD, MSR_TYPE_W, - !guest_has_pred_cmd_msr(vcpu)); - - if (boot_cpu_has(X86_FEATURE_FLUSH_L1D)) - vmx_set_intercept_for_msr(vcpu, MSR_IA32_FLUSH_CMD, MSR_TYPE_W, - !guest_cpu_cap_has(vcpu, X86_FEATURE_FLUSH_L1D)); - set_cr4_guest_host_mask(vmx); vmx_write_encls_bitmap(vcpu, NULL); @@ -7887,6 +7775,9 @@ void vmx_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu) vmx->msr_ia32_feature_control_valid_bits &= ~FEAT_CTL_SGX_LC_ENABLED; + /* Recalc MSR interception to account for feature changes. */ + vmx_recalc_msr_intercepts(vcpu); + /* Refresh #PF interception to account for MAXPHYADDR changes. */ vmx_update_exception_bitmap(vcpu); } @@ -8661,6 +8552,8 @@ int __init vmx_init(void) { int r, cpu; + KVM_SANITY_CHECK_VM_STRUCT_SIZE(kvm_vmx); + if (!kvm_is_vmx_supported()) return -EOPNOTSUPP; |