diff options
author | Paolo Bonzini <pbonzini@redhat.com> | 2024-05-07 18:45:35 +0300 |
---|---|---|
committer | Paolo Bonzini <pbonzini@redhat.com> | 2024-05-10 20:18:36 +0300 |
commit | 1e21b53825bd5cd388d745d8c95a2c5aef33e96f (patch) | |
tree | 672e96b1d0f573b1685a04f2ceab16c0255e9b1e /arch/x86 | |
parent | 40269c03fdbff2171af246795a4c639cb0cf1ed5 (diff) | |
parent | 8131cf5b4fd8c58f30a01b906a86a77a33b0293a (diff) | |
download | linux-1e21b53825bd5cd388d745d8c95a2c5aef33e96f.tar.xz |
Merge branch 'kvm-vmx-ve' into HEAD
Allow a non-zero value for non-present SPTE and removed SPTE,
so that TDX can set the "suppress VE" bit.
Diffstat (limited to 'arch/x86')
-rw-r--r-- | arch/x86/include/asm/kvm_host.h | 2 | ||||
-rw-r--r-- | arch/x86/include/asm/vmx.h | 13 | ||||
-rw-r--r-- | arch/x86/kvm/Kconfig | 13 | ||||
-rw-r--r-- | arch/x86/kvm/mmu/mmu.c | 21 | ||||
-rw-r--r-- | arch/x86/kvm/mmu/paging_tmpl.h | 14 | ||||
-rw-r--r-- | arch/x86/kvm/mmu/spte.c | 24 | ||||
-rw-r--r-- | arch/x86/kvm/mmu/spte.h | 24 | ||||
-rw-r--r-- | arch/x86/kvm/mmu/tdp_mmu.c | 18 | ||||
-rw-r--r-- | arch/x86/kvm/vmx/vmcs.h | 5 | ||||
-rw-r--r-- | arch/x86/kvm/vmx/vmx.c | 53 | ||||
-rw-r--r-- | arch/x86/kvm/vmx/vmx.h | 6 |
11 files changed, 152 insertions, 41 deletions
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 01c69840647e..9f92bdb78504 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1313,6 +1313,8 @@ struct kvm_arch { */ spinlock_t mmu_unsync_pages_lock; + u64 shadow_mmio_value; + struct iommu_domain *iommu_domain; bool iommu_noncoherent; #define __KVM_HAVE_ARCH_NONCOHERENT_DMA diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h index 4dba17363008..d77a31039f24 100644 --- a/arch/x86/include/asm/vmx.h +++ b/arch/x86/include/asm/vmx.h @@ -71,6 +71,7 @@ #define SECONDARY_EXEC_ENCLS_EXITING VMCS_CONTROL_BIT(ENCLS_EXITING) #define SECONDARY_EXEC_RDSEED_EXITING VMCS_CONTROL_BIT(RDSEED_EXITING) #define SECONDARY_EXEC_ENABLE_PML VMCS_CONTROL_BIT(PAGE_MOD_LOGGING) +#define SECONDARY_EXEC_EPT_VIOLATION_VE VMCS_CONTROL_BIT(EPT_VIOLATION_VE) #define SECONDARY_EXEC_PT_CONCEAL_VMX VMCS_CONTROL_BIT(PT_CONCEAL_VMX) #define SECONDARY_EXEC_ENABLE_XSAVES VMCS_CONTROL_BIT(XSAVES) #define SECONDARY_EXEC_MODE_BASED_EPT_EXEC VMCS_CONTROL_BIT(MODE_BASED_EPT_EXEC) @@ -226,6 +227,8 @@ enum vmcs_field { VMREAD_BITMAP_HIGH = 0x00002027, VMWRITE_BITMAP = 0x00002028, VMWRITE_BITMAP_HIGH = 0x00002029, + VE_INFORMATION_ADDRESS = 0x0000202A, + VE_INFORMATION_ADDRESS_HIGH = 0x0000202B, XSS_EXIT_BITMAP = 0x0000202C, XSS_EXIT_BITMAP_HIGH = 0x0000202D, ENCLS_EXITING_BITMAP = 0x0000202E, @@ -514,6 +517,7 @@ enum vmcs_field { #define VMX_EPT_IPAT_BIT (1ull << 6) #define VMX_EPT_ACCESS_BIT (1ull << 8) #define VMX_EPT_DIRTY_BIT (1ull << 9) +#define VMX_EPT_SUPPRESS_VE_BIT (1ull << 63) #define VMX_EPT_RWX_MASK (VMX_EPT_READABLE_MASK | \ VMX_EPT_WRITABLE_MASK | \ VMX_EPT_EXECUTABLE_MASK) @@ -630,4 +634,13 @@ enum vmx_l1d_flush_state { extern enum vmx_l1d_flush_state l1tf_vmx_mitigation; +struct vmx_ve_information { + u32 exit_reason; + u32 delivery; + u64 exit_qualification; + u64 guest_linear_address; + u64 guest_physical_address; + u16 eptp_index; +}; + #endif diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig index 0ebdd088f28b..d64fb2b3eb69 100644 --- a/arch/x86/kvm/Kconfig +++ b/arch/x86/kvm/Kconfig @@ -95,6 +95,19 @@ config KVM_INTEL To compile this as a module, choose M here: the module will be called kvm-intel. +config KVM_INTEL_PROVE_VE + bool "Check that guests do not receive #VE exceptions" + default KVM_PROVE_MMU || DEBUG_KERNEL + depends on KVM_INTEL + help + + Checks that KVM's page table management code will not incorrectly + let guests receive a virtualization exception. Virtualization + exceptions will be trapped by the hypervisor rather than injected + in the guest. + + If unsure, say N. + config X86_SGX_KVM bool "Software Guard eXtensions (SGX) Virtualization" depends on X86_SGX && KVM_INTEL diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index 08900a0563f9..45b6d8f9e359 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -567,9 +567,9 @@ static u64 mmu_spte_clear_track_bits(struct kvm *kvm, u64 *sptep) if (!is_shadow_present_pte(old_spte) || !spte_has_volatile_bits(old_spte)) - __update_clear_spte_fast(sptep, 0ull); + __update_clear_spte_fast(sptep, SHADOW_NONPRESENT_VALUE); else - old_spte = __update_clear_spte_slow(sptep, 0ull); + old_spte = __update_clear_spte_slow(sptep, SHADOW_NONPRESENT_VALUE); if (!is_shadow_present_pte(old_spte)) return old_spte; @@ -603,7 +603,7 @@ static u64 mmu_spte_clear_track_bits(struct kvm *kvm, u64 *sptep) */ static void mmu_spte_clear_no_track(u64 *sptep) { - __update_clear_spte_fast(sptep, 0ull); + __update_clear_spte_fast(sptep, SHADOW_NONPRESENT_VALUE); } static u64 mmu_spte_get_lockless(u64 *sptep) @@ -1897,7 +1897,8 @@ static bool kvm_sync_page_check(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) static int kvm_sync_spte(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, int i) { - if (!sp->spt[i]) + /* sp->spt[i] has initial value of shadow page table allocation */ + if (sp->spt[i] == SHADOW_NONPRESENT_VALUE) return 0; return vcpu->arch.mmu->sync_spte(vcpu, sp, i); @@ -2461,7 +2462,7 @@ static int mmu_page_zap_pte(struct kvm *kvm, struct kvm_mmu_page *sp, return kvm_mmu_prepare_zap_page(kvm, child, invalid_list); } - } else if (is_mmio_spte(pte)) { + } else if (is_mmio_spte(kvm, pte)) { mmu_spte_clear_no_track(spte); } return 0; @@ -4143,7 +4144,7 @@ static int handle_mmio_page_fault(struct kvm_vcpu *vcpu, u64 addr, bool direct) if (WARN_ON_ONCE(reserved)) return -EINVAL; - if (is_mmio_spte(spte)) { + if (is_mmio_spte(vcpu->kvm, spte)) { gfn_t gfn = get_mmio_spte_gfn(spte); unsigned int access = get_mmio_spte_access(spte); @@ -4759,7 +4760,7 @@ EXPORT_SYMBOL_GPL(kvm_mmu_new_pgd); static bool sync_mmio_spte(struct kvm_vcpu *vcpu, u64 *sptep, gfn_t gfn, unsigned int access) { - if (unlikely(is_mmio_spte(*sptep))) { + if (unlikely(is_mmio_spte(vcpu->kvm, *sptep))) { if (gfn != get_mmio_spte_gfn(*sptep)) { mmu_spte_clear_no_track(sptep); return true; @@ -6120,7 +6121,10 @@ int kvm_mmu_create(struct kvm_vcpu *vcpu) vcpu->arch.mmu_page_header_cache.kmem_cache = mmu_page_header_cache; vcpu->arch.mmu_page_header_cache.gfp_zero = __GFP_ZERO; - vcpu->arch.mmu_shadow_page_cache.gfp_zero = __GFP_ZERO; + vcpu->arch.mmu_shadow_page_cache.init_value = + SHADOW_NONPRESENT_VALUE; + if (!vcpu->arch.mmu_shadow_page_cache.init_value) + vcpu->arch.mmu_shadow_page_cache.gfp_zero = __GFP_ZERO; vcpu->arch.mmu = &vcpu->arch.root_mmu; vcpu->arch.walk_mmu = &vcpu->arch.root_mmu; @@ -6263,6 +6267,7 @@ static bool kvm_has_zapped_obsolete_pages(struct kvm *kvm) void kvm_mmu_init_vm(struct kvm *kvm) { + kvm->arch.shadow_mmio_value = shadow_mmio_value; INIT_LIST_HEAD(&kvm->arch.active_mmu_pages); INIT_LIST_HEAD(&kvm->arch.zapped_obsolete_pages); INIT_LIST_HEAD(&kvm->arch.possible_nx_huge_pages); diff --git a/arch/x86/kvm/mmu/paging_tmpl.h b/arch/x86/kvm/mmu/paging_tmpl.h index 4d4e98fe4f35..9aac3aa93d88 100644 --- a/arch/x86/kvm/mmu/paging_tmpl.h +++ b/arch/x86/kvm/mmu/paging_tmpl.h @@ -911,7 +911,7 @@ static int FNAME(sync_spte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, int gpa_t pte_gpa; gfn_t gfn; - if (WARN_ON_ONCE(!sp->spt[i])) + if (WARN_ON_ONCE(sp->spt[i] == SHADOW_NONPRESENT_VALUE)) return 0; first_pte_gpa = FNAME(get_level1_sp_gpa)(sp); @@ -933,13 +933,13 @@ static int FNAME(sync_spte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, int return 0; /* - * Drop the SPTE if the new protections would result in a RWX=0 - * SPTE or if the gfn is changing. The RWX=0 case only affects - * EPT with execute-only support, i.e. EPT without an effective - * "present" bit, as all other paging modes will create a - * read-only SPTE if pte_access is zero. + * Drop the SPTE if the new protections result in no effective + * "present" bit or if the gfn is changing. The former case + * only affects EPT with execute-only support with pte_access==0; + * all other paging modes will create a read-only SPTE if + * pte_access is zero. */ - if ((!pte_access && !shadow_present_mask) || + if ((pte_access | shadow_present_mask) == SHADOW_NONPRESENT_VALUE || gfn != kvm_mmu_page_get_gfn(sp, i)) { drop_spte(vcpu->kvm, &sp->spt[i]); return 1; diff --git a/arch/x86/kvm/mmu/spte.c b/arch/x86/kvm/mmu/spte.c index 6c7ab3aa6aa7..a5e014d7bc62 100644 --- a/arch/x86/kvm/mmu/spte.c +++ b/arch/x86/kvm/mmu/spte.c @@ -74,10 +74,10 @@ u64 make_mmio_spte(struct kvm_vcpu *vcpu, u64 gfn, unsigned int access) u64 spte = generation_mmio_spte_mask(gen); u64 gpa = gfn << PAGE_SHIFT; - WARN_ON_ONCE(!shadow_mmio_value); + WARN_ON_ONCE(!vcpu->kvm->arch.shadow_mmio_value); access &= shadow_mmio_access_mask; - spte |= shadow_mmio_value | access; + spte |= vcpu->kvm->arch.shadow_mmio_value | access; spte |= gpa | shadow_nonpresent_or_rsvd_mask; spte |= (gpa & shadow_nonpresent_or_rsvd_mask) << SHADOW_NONPRESENT_OR_RSVD_MASK_LEN; @@ -144,19 +144,19 @@ bool make_spte(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, u64 spte = SPTE_MMU_PRESENT_MASK; bool wrprot = false; - WARN_ON_ONCE(!pte_access && !shadow_present_mask); + /* + * For the EPT case, shadow_present_mask has no RWX bits set if + * exec-only page table entries are supported. In that case, + * ACC_USER_MASK and shadow_user_mask are used to represent + * read access. See FNAME(gpte_access) in paging_tmpl.h. + */ + WARN_ON_ONCE((pte_access | shadow_present_mask) == SHADOW_NONPRESENT_VALUE); if (sp->role.ad_disabled) spte |= SPTE_TDP_AD_DISABLED; else if (kvm_mmu_page_ad_need_write_protect(sp)) spte |= SPTE_TDP_AD_WRPROT_ONLY; - /* - * For the EPT case, shadow_present_mask is 0 if hardware - * supports exec-only page table entries. In that case, - * ACC_USER_MASK and shadow_user_mask are used to represent - * read access. See FNAME(gpte_access) in paging_tmpl.h. - */ spte |= shadow_present_mask; if (!prefetch) spte |= spte_shadow_accessed_mask(spte); @@ -413,7 +413,9 @@ void kvm_mmu_set_ept_masks(bool has_ad_bits, bool has_exec_only) shadow_dirty_mask = has_ad_bits ? VMX_EPT_DIRTY_BIT : 0ull; shadow_nx_mask = 0ull; shadow_x_mask = VMX_EPT_EXECUTABLE_MASK; - shadow_present_mask = has_exec_only ? 0ull : VMX_EPT_READABLE_MASK; + /* VMX_EPT_SUPPRESS_VE_BIT is needed for W or X violation. */ + shadow_present_mask = + (has_exec_only ? 0ull : VMX_EPT_READABLE_MASK) | VMX_EPT_SUPPRESS_VE_BIT; /* * EPT overrides the host MTRRs, and so KVM must program the desired * memtype directly into the SPTEs. Note, this mask is just the mask @@ -430,7 +432,7 @@ void kvm_mmu_set_ept_masks(bool has_ad_bits, bool has_exec_only) * of an EPT paging-structure entry is 110b (write/execute). */ kvm_mmu_set_mmio_spte_mask(VMX_EPT_MISCONFIG_WX_VALUE, - VMX_EPT_RWX_MASK, 0); + VMX_EPT_RWX_MASK | VMX_EPT_SUPPRESS_VE_BIT, 0); } EXPORT_SYMBOL_GPL(kvm_mmu_set_ept_masks); diff --git a/arch/x86/kvm/mmu/spte.h b/arch/x86/kvm/mmu/spte.h index f5c600c52f83..5dd5405fa07a 100644 --- a/arch/x86/kvm/mmu/spte.h +++ b/arch/x86/kvm/mmu/spte.h @@ -149,6 +149,22 @@ static_assert(MMIO_SPTE_GEN_LOW_BITS == 8 && MMIO_SPTE_GEN_HIGH_BITS == 11); #define MMIO_SPTE_GEN_MASK GENMASK_ULL(MMIO_SPTE_GEN_LOW_BITS + MMIO_SPTE_GEN_HIGH_BITS - 1, 0) +/* + * Non-present SPTE value needs to set bit 63 for TDX, in order to suppress + * #VE and get EPT violations on non-present PTEs. We can use the + * same value also without TDX for both VMX and SVM: + * + * For SVM NPT, for non-present spte (bit 0 = 0), other bits are ignored. + * For VMX EPT, bit 63 is ignored if #VE is disabled. (EPT_VIOLATION_VE=0) + * bit 63 is #VE suppress if #VE is enabled. (EPT_VIOLATION_VE=1) + */ +#ifdef CONFIG_X86_64 +#define SHADOW_NONPRESENT_VALUE BIT_ULL(63) +static_assert(!(SHADOW_NONPRESENT_VALUE & SPTE_MMU_PRESENT_MASK)); +#else +#define SHADOW_NONPRESENT_VALUE 0ULL +#endif + extern u64 __read_mostly shadow_host_writable_mask; extern u64 __read_mostly shadow_mmu_writable_mask; extern u64 __read_mostly shadow_nx_mask; @@ -190,11 +206,11 @@ extern u64 __read_mostly shadow_nonpresent_or_rsvd_mask; * * Use a semi-arbitrary value that doesn't set RWX bits, i.e. is not-present on * both AMD and Intel CPUs, and doesn't set PFN bits, i.e. doesn't create a L1TF - * vulnerability. Use only low bits to avoid 64-bit immediates. + * vulnerability. * * Only used by the TDP MMU. */ -#define REMOVED_SPTE 0x5a0ULL +#define REMOVED_SPTE (SHADOW_NONPRESENT_VALUE | 0x5a0ULL) /* Removed SPTEs must not be misconstrued as shadow present PTEs. */ static_assert(!(REMOVED_SPTE & SPTE_MMU_PRESENT_MASK)); @@ -249,9 +265,9 @@ static inline struct kvm_mmu_page *root_to_sp(hpa_t root) return spte_to_child_sp(root); } -static inline bool is_mmio_spte(u64 spte) +static inline bool is_mmio_spte(struct kvm *kvm, u64 spte) { - return (spte & shadow_mmio_mask) == shadow_mmio_value && + return (spte & shadow_mmio_mask) == kvm->arch.shadow_mmio_value && likely(enable_mmio_caching); } diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c index c6192a52bd31..5fd618abc243 100644 --- a/arch/x86/kvm/mmu/tdp_mmu.c +++ b/arch/x86/kvm/mmu/tdp_mmu.c @@ -495,8 +495,8 @@ static void handle_changed_spte(struct kvm *kvm, int as_id, gfn_t gfn, * impact the guest since both the former and current SPTEs * are nonpresent. */ - if (WARN_ON_ONCE(!is_mmio_spte(old_spte) && - !is_mmio_spte(new_spte) && + if (WARN_ON_ONCE(!is_mmio_spte(kvm, old_spte) && + !is_mmio_spte(kvm, new_spte) && !is_removed_spte(new_spte))) pr_err("Unexpected SPTE change! Nonpresent SPTEs\n" "should not be replaced with another,\n" @@ -603,7 +603,7 @@ static inline int tdp_mmu_zap_spte_atomic(struct kvm *kvm, * here since the SPTE is going from non-present to non-present. Use * the raw write helper to avoid an unnecessary check on volatile bits. */ - __kvm_tdp_mmu_write_spte(iter->sptep, 0); + __kvm_tdp_mmu_write_spte(iter->sptep, SHADOW_NONPRESENT_VALUE); return 0; } @@ -740,8 +740,8 @@ retry: continue; if (!shared) - tdp_mmu_iter_set_spte(kvm, &iter, 0); - else if (tdp_mmu_set_spte_atomic(kvm, &iter, 0)) + tdp_mmu_iter_set_spte(kvm, &iter, SHADOW_NONPRESENT_VALUE); + else if (tdp_mmu_set_spte_atomic(kvm, &iter, SHADOW_NONPRESENT_VALUE)) goto retry; } } @@ -808,8 +808,8 @@ bool kvm_tdp_mmu_zap_sp(struct kvm *kvm, struct kvm_mmu_page *sp) if (WARN_ON_ONCE(!is_shadow_present_pte(old_spte))) return false; - tdp_mmu_set_spte(kvm, kvm_mmu_page_as_id(sp), sp->ptep, old_spte, 0, - sp->gfn, sp->role.level + 1); + tdp_mmu_set_spte(kvm, kvm_mmu_page_as_id(sp), sp->ptep, old_spte, + SHADOW_NONPRESENT_VALUE, sp->gfn, sp->role.level + 1); return true; } @@ -843,7 +843,7 @@ static bool tdp_mmu_zap_leafs(struct kvm *kvm, struct kvm_mmu_page *root, !is_last_spte(iter.old_spte, iter.level)) continue; - tdp_mmu_iter_set_spte(kvm, &iter, 0); + tdp_mmu_iter_set_spte(kvm, &iter, SHADOW_NONPRESENT_VALUE); /* * Zappings SPTEs in invalid roots doesn't require a TLB flush, @@ -1028,7 +1028,7 @@ static int tdp_mmu_map_handle_target_level(struct kvm_vcpu *vcpu, } /* If a MMIO SPTE is installed, the MMIO will need to be emulated. */ - if (unlikely(is_mmio_spte(new_spte))) { + if (unlikely(is_mmio_spte(vcpu->kvm, new_spte))) { vcpu->stat.pf_mmio_spte_created++; trace_mark_mmio_spte(rcu_dereference(iter->sptep), iter->gfn, new_spte); diff --git a/arch/x86/kvm/vmx/vmcs.h b/arch/x86/kvm/vmx/vmcs.h index 7c1996b433e2..b25625314658 100644 --- a/arch/x86/kvm/vmx/vmcs.h +++ b/arch/x86/kvm/vmx/vmcs.h @@ -140,6 +140,11 @@ static inline bool is_nm_fault(u32 intr_info) return is_exception_n(intr_info, NM_VECTOR); } +static inline bool is_ve_fault(u32 intr_info) +{ + return is_exception_n(intr_info, VE_VECTOR); +} + /* Undocumented: icebp/int1 */ static inline bool is_icebp(u32 intr_info) { diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 6780313914f8..f4644f61d770 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -870,6 +870,12 @@ void vmx_update_exception_bitmap(struct kvm_vcpu *vcpu) eb = (1u << PF_VECTOR) | (1u << UD_VECTOR) | (1u << MC_VECTOR) | (1u << DB_VECTOR) | (1u << AC_VECTOR); /* + * #VE isn't used for VMX. To test against unexpected changes + * related to #VE for VMX, intercept unexpected #VE and warn on it. + */ + if (IS_ENABLED(CONFIG_KVM_INTEL_PROVE_VE)) + eb |= 1u << VE_VECTOR; + /* * Guest access to VMware backdoor ports could legitimately * trigger #GP because of TSS I/O permission bitmap. * We intercept those #GP and allow access to them anyway @@ -2602,6 +2608,9 @@ static int setup_vmcs_config(struct vmcs_config *vmcs_conf, &_cpu_based_2nd_exec_control)) return -EIO; } + if (!IS_ENABLED(CONFIG_KVM_INTEL_PROVE_VE)) + _cpu_based_2nd_exec_control &= ~SECONDARY_EXEC_EPT_VIOLATION_VE; + #ifndef CONFIG_X86_64 if (!(_cpu_based_2nd_exec_control & SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)) @@ -2626,6 +2635,7 @@ static int setup_vmcs_config(struct vmcs_config *vmcs_conf, return -EIO; vmx_cap->ept = 0; + _cpu_based_2nd_exec_control &= ~SECONDARY_EXEC_EPT_VIOLATION_VE; } if (!(_cpu_based_2nd_exec_control & SECONDARY_EXEC_ENABLE_VPID) && vmx_cap->vpid) { @@ -4588,6 +4598,7 @@ static u32 vmx_secondary_exec_control(struct vcpu_vmx *vmx) exec_control &= ~SECONDARY_EXEC_ENABLE_VPID; if (!enable_ept) { exec_control &= ~SECONDARY_EXEC_ENABLE_EPT; + exec_control &= ~SECONDARY_EXEC_EPT_VIOLATION_VE; enable_unrestricted_guest = 0; } if (!enable_unrestricted_guest) @@ -4711,8 +4722,12 @@ static void init_vmcs(struct vcpu_vmx *vmx) exec_controls_set(vmx, vmx_exec_control(vmx)); - if (cpu_has_secondary_exec_ctrls()) + if (cpu_has_secondary_exec_ctrls()) { secondary_exec_controls_set(vmx, vmx_secondary_exec_control(vmx)); + if (vmx->ve_info) + vmcs_write64(VE_INFORMATION_ADDRESS, + __pa(vmx->ve_info)); + } if (cpu_has_tertiary_exec_ctrls()) tertiary_exec_controls_set(vmx, vmx_tertiary_exec_control(vmx)); @@ -5200,6 +5215,9 @@ static int handle_exception_nmi(struct kvm_vcpu *vcpu) if (is_invalid_opcode(intr_info)) return handle_ud(vcpu); + if (KVM_BUG_ON(is_ve_fault(intr_info), vcpu->kvm)) + return -EIO; + error_code = 0; if (intr_info & INTR_INFO_DELIVER_CODE_MASK) error_code = vmcs_read32(VM_EXIT_INTR_ERROR_CODE); @@ -6408,6 +6426,24 @@ void dump_vmcs(struct kvm_vcpu *vcpu) if (secondary_exec_control & SECONDARY_EXEC_ENABLE_VPID) pr_err("Virtual processor ID = 0x%04x\n", vmcs_read16(VIRTUAL_PROCESSOR_ID)); + if (secondary_exec_control & SECONDARY_EXEC_EPT_VIOLATION_VE) { + struct vmx_ve_information *ve_info = vmx->ve_info; + u64 ve_info_pa = vmcs_read64(VE_INFORMATION_ADDRESS); + + /* + * If KVM is dumping the VMCS, then something has gone wrong + * already. Derefencing an address from the VMCS, which could + * very well be corrupted, is a terrible idea. The virtual + * address is known so use it. + */ + pr_err("VE info address = 0x%016llx%s\n", ve_info_pa, + ve_info_pa == __pa(ve_info) ? "" : "(corrupted!)"); + pr_err("ve_info: 0x%08x 0x%08x 0x%016llx 0x%016llx 0x%016llx 0x%04x\n", + ve_info->exit_reason, ve_info->delivery, + ve_info->exit_qualification, + ve_info->guest_linear_address, + ve_info->guest_physical_address, ve_info->eptp_index); + } } /* @@ -7462,6 +7498,7 @@ void vmx_vcpu_free(struct kvm_vcpu *vcpu) free_vpid(vmx->vpid); nested_vmx_free_vcpu(vcpu); free_loaded_vmcs(vmx->loaded_vmcs); + free_page((unsigned long)vmx->ve_info); } int vmx_vcpu_create(struct kvm_vcpu *vcpu) @@ -7555,6 +7592,20 @@ int vmx_vcpu_create(struct kvm_vcpu *vcpu) goto free_vmcs; } + err = -ENOMEM; + if (vmcs_config.cpu_based_2nd_exec_ctrl & SECONDARY_EXEC_EPT_VIOLATION_VE) { + struct page *page; + + BUILD_BUG_ON(sizeof(*vmx->ve_info) > PAGE_SIZE); + + /* ve_info must be page aligned. */ + page = alloc_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO); + if (!page) + goto free_vmcs; + + vmx->ve_info = page_to_virt(page); + } + if (vmx_can_use_ipiv(vcpu)) WRITE_ONCE(to_kvm_vmx(vcpu->kvm)->pid_table[vcpu->vcpu_id], __pa(&vmx->pi_desc) | PID_TABLE_ENTRY_VALID); diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h index 65786dbe7d60..0da79a386825 100644 --- a/arch/x86/kvm/vmx/vmx.h +++ b/arch/x86/kvm/vmx/vmx.h @@ -362,6 +362,9 @@ struct vcpu_vmx { DECLARE_BITMAP(read, MAX_POSSIBLE_PASSTHROUGH_MSRS); DECLARE_BITMAP(write, MAX_POSSIBLE_PASSTHROUGH_MSRS); } shadow_msr_intercept; + + /* ve_info must be page aligned. */ + struct vmx_ve_information *ve_info; }; struct kvm_vmx { @@ -574,7 +577,8 @@ static inline u8 vmx_get_rvi(void) SECONDARY_EXEC_ENABLE_VMFUNC | \ SECONDARY_EXEC_BUS_LOCK_DETECTION | \ SECONDARY_EXEC_NOTIFY_VM_EXITING | \ - SECONDARY_EXEC_ENCLS_EXITING) + SECONDARY_EXEC_ENCLS_EXITING | \ + SECONDARY_EXEC_EPT_VIOLATION_VE) #define KVM_REQUIRED_VMX_TERTIARY_VM_EXEC_CONTROL 0 #define KVM_OPTIONAL_VMX_TERTIARY_VM_EXEC_CONTROL \ |