diff options
author | Paolo Bonzini <pbonzini@redhat.com> | 2023-07-01 14:20:04 +0300 |
---|---|---|
committer | Paolo Bonzini <pbonzini@redhat.com> | 2023-07-01 14:20:04 +0300 |
commit | 255006adb3da71bb75c334453786df781b415f54 (patch) | |
tree | dc9030af6576d97f8b7489310f76e7406dfb4afb | |
parent | 24975ce8b2f863547d0b79a5a4552d5ea3522a3c (diff) | |
parent | 0a3869e14d4a5e1016aad6bc6c5b70f82bc0dbbe (diff) | |
download | linux-255006adb3da71bb75c334453786df781b415f54.tar.xz |
Merge tag 'kvm-x86-vmx-6.5' of https://github.com/kvm-x86/linux into HEAD
KVM VMX changes for 6.5:
- Fix missing/incorrect #GP checks on ENCLS
- Use standard mmu_notifier hooks for handling APIC access page
- Misc cleanups
-rw-r--r-- | arch/x86/kvm/mmu/mmu.c | 4 | ||||
-rw-r--r-- | arch/x86/kvm/vmx/capabilities.h | 4 | ||||
-rw-r--r-- | arch/x86/kvm/vmx/nested.c | 3 | ||||
-rw-r--r-- | arch/x86/kvm/vmx/pmu_intel.c | 2 | ||||
-rw-r--r-- | arch/x86/kvm/vmx/sgx.c | 15 | ||||
-rw-r--r-- | arch/x86/kvm/vmx/vmenter.S | 2 | ||||
-rw-r--r-- | arch/x86/kvm/vmx/vmx.c | 66 | ||||
-rw-r--r-- | arch/x86/kvm/x86.c | 14 | ||||
-rw-r--r-- | include/linux/kvm_host.h | 3 | ||||
-rw-r--r-- | virt/kvm/kvm_main.c | 18 |
10 files changed, 73 insertions, 58 deletions
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index 03ff06cd65b3..ec169f5c7dce 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -1603,6 +1603,10 @@ bool kvm_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range) if (tdp_mmu_enabled) flush = kvm_tdp_mmu_unmap_gfn_range(kvm, range, flush); + if (kvm_x86_ops.set_apic_access_page_addr && + range->slot->id == APIC_ACCESS_PAGE_PRIVATE_MEMSLOT) + kvm_make_all_cpus_request(kvm, KVM_REQ_APIC_PAGE_RELOAD); + return flush; } diff --git a/arch/x86/kvm/vmx/capabilities.h b/arch/x86/kvm/vmx/capabilities.h index 45162c1bcd8f..d0abee35d7ba 100644 --- a/arch/x86/kvm/vmx/capabilities.h +++ b/arch/x86/kvm/vmx/capabilities.h @@ -152,8 +152,8 @@ static inline bool cpu_has_vmx_ept(void) static inline bool vmx_umip_emulated(void) { - return vmcs_config.cpu_based_2nd_exec_ctrl & - SECONDARY_EXEC_DESC; + return !boot_cpu_has(X86_FEATURE_UMIP) && + (vmcs_config.cpu_based_2nd_exec_ctrl & SECONDARY_EXEC_DESC); } static inline bool cpu_has_vmx_rdtscp(void) diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index ba2ed6d87364..516391cc0d64 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -2328,8 +2328,7 @@ static void prepare_vmcs02_early(struct vcpu_vmx *vmx, struct loaded_vmcs *vmcs0 * Preset *DT exiting when emulating UMIP, so that vmx_set_cr4() * will not have to rewrite the controls just for this bit. */ - if (!boot_cpu_has(X86_FEATURE_UMIP) && vmx_umip_emulated() && - (vmcs12->guest_cr4 & X86_CR4_UMIP)) + if (vmx_umip_emulated() && (vmcs12->guest_cr4 & X86_CR4_UMIP)) exec_control |= SECONDARY_EXEC_DESC; if (exec_control & SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY) diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c index 30ec9ccdea47..80c769c58a87 100644 --- a/arch/x86/kvm/vmx/pmu_intel.c +++ b/arch/x86/kvm/vmx/pmu_intel.c @@ -385,8 +385,6 @@ static int intel_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) } break; case MSR_IA32_DS_AREA: - if (msr_info->host_initiated && data && !guest_cpuid_has(vcpu, X86_FEATURE_DS)) - return 1; if (is_noncanonical_address(data, vcpu)) return 1; diff --git a/arch/x86/kvm/vmx/sgx.c b/arch/x86/kvm/vmx/sgx.c index 2261b684a7d4..3e822e582497 100644 --- a/arch/x86/kvm/vmx/sgx.c +++ b/arch/x86/kvm/vmx/sgx.c @@ -357,11 +357,12 @@ static int handle_encls_einit(struct kvm_vcpu *vcpu) static inline bool encls_leaf_enabled_in_guest(struct kvm_vcpu *vcpu, u32 leaf) { - if (!enable_sgx || !guest_cpuid_has(vcpu, X86_FEATURE_SGX)) - return false; - + /* + * ENCLS generates a #UD if SGX1 isn't supported, i.e. this point will + * be reached if and only if the SGX1 leafs are enabled. + */ if (leaf >= ECREATE && leaf <= ETRACK) - return guest_cpuid_has(vcpu, X86_FEATURE_SGX1); + return true; if (leaf >= EAUG && leaf <= EMODT) return guest_cpuid_has(vcpu, X86_FEATURE_SGX2); @@ -380,9 +381,11 @@ int handle_encls(struct kvm_vcpu *vcpu) { u32 leaf = (u32)kvm_rax_read(vcpu); - if (!encls_leaf_enabled_in_guest(vcpu, leaf)) { + if (!enable_sgx || !guest_cpuid_has(vcpu, X86_FEATURE_SGX) || + !guest_cpuid_has(vcpu, X86_FEATURE_SGX1)) { kvm_queue_exception(vcpu, UD_VECTOR); - } else if (!sgx_enabled_in_guest_bios(vcpu)) { + } else if (!encls_leaf_enabled_in_guest(vcpu, leaf) || + !sgx_enabled_in_guest_bios(vcpu) || !is_paging(vcpu)) { kvm_inject_gp(vcpu, 0); } else { if (leaf == ECREATE) diff --git a/arch/x86/kvm/vmx/vmenter.S b/arch/x86/kvm/vmx/vmenter.S index 631fd7da2bc3..07e927d4d099 100644 --- a/arch/x86/kvm/vmx/vmenter.S +++ b/arch/x86/kvm/vmx/vmenter.S @@ -187,7 +187,7 @@ SYM_FUNC_START(__vmx_vcpu_run) _ASM_EXTABLE(.Lvmresume, .Lfixup) _ASM_EXTABLE(.Lvmlaunch, .Lfixup) -SYM_INNER_LABEL(vmx_vmexit, SYM_L_GLOBAL) +SYM_INNER_LABEL_ALIGN(vmx_vmexit, SYM_L_GLOBAL) /* Restore unwind state from before the VMRESUME/VMLAUNCH. */ UNWIND_HINT_RESTORE diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 2d9d155691a7..0ecf4be2c6af 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -3384,15 +3384,15 @@ static bool vmx_is_valid_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) void vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) { - unsigned long old_cr4 = vcpu->arch.cr4; + unsigned long old_cr4 = kvm_read_cr4(vcpu); struct vcpu_vmx *vmx = to_vmx(vcpu); + unsigned long hw_cr4; + /* * Pass through host's Machine Check Enable value to hw_cr4, which * is in force while we are in guest mode. Do not let guests control * this bit, even if host CR4.MCE == 0. */ - unsigned long hw_cr4; - hw_cr4 = (cr4_read_shadow() & X86_CR4_MCE) | (cr4 & ~X86_CR4_MCE); if (is_unrestricted_guest(vcpu)) hw_cr4 |= KVM_VM_CR4_ALWAYS_ON_UNRESTRICTED_GUEST; @@ -3401,7 +3401,7 @@ void vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) else hw_cr4 |= KVM_PMODE_VM_CR4_ALWAYS_ON; - if (!boot_cpu_has(X86_FEATURE_UMIP) && vmx_umip_emulated()) { + if (vmx_umip_emulated()) { if (cr4 & X86_CR4_UMIP) { secondary_exec_controls_setbit(vmx, SECONDARY_EXEC_DESC); hw_cr4 &= ~X86_CR4_UMIP; @@ -5399,7 +5399,13 @@ static int handle_set_cr4(struct kvm_vcpu *vcpu, unsigned long val) static int handle_desc(struct kvm_vcpu *vcpu) { - WARN_ON(!(vcpu->arch.cr4 & X86_CR4_UMIP)); + /* + * UMIP emulation relies on intercepting writes to CR4.UMIP, i.e. this + * and other code needs to be updated if UMIP can be guest owned. + */ + BUILD_BUG_ON(KVM_POSSIBLE_CR4_GUEST_BITS & X86_CR4_UMIP); + + WARN_ON_ONCE(!kvm_is_cr4_bit_set(vcpu, X86_CR4_UMIP)); return kvm_emulate_instruction(vcpu, 0); } @@ -6705,7 +6711,12 @@ void vmx_set_virtual_apic_mode(struct kvm_vcpu *vcpu) static void vmx_set_apic_access_page_addr(struct kvm_vcpu *vcpu) { - struct page *page; + const gfn_t gfn = APIC_DEFAULT_PHYS_BASE >> PAGE_SHIFT; + struct kvm *kvm = vcpu->kvm; + struct kvm_memslots *slots = kvm_memslots(kvm); + struct kvm_memory_slot *slot; + unsigned long mmu_seq; + kvm_pfn_t pfn; /* Defer reload until vmcs01 is the current VMCS. */ if (is_guest_mode(vcpu)) { @@ -6717,18 +6728,53 @@ static void vmx_set_apic_access_page_addr(struct kvm_vcpu *vcpu) SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)) return; - page = gfn_to_page(vcpu->kvm, APIC_DEFAULT_PHYS_BASE >> PAGE_SHIFT); - if (is_error_page(page)) + /* + * Grab the memslot so that the hva lookup for the mmu_notifier retry + * is guaranteed to use the same memslot as the pfn lookup, i.e. rely + * on the pfn lookup's validation of the memslot to ensure a valid hva + * is used for the retry check. + */ + slot = id_to_memslot(slots, APIC_ACCESS_PAGE_PRIVATE_MEMSLOT); + if (!slot || slot->flags & KVM_MEMSLOT_INVALID) return; - vmcs_write64(APIC_ACCESS_ADDR, page_to_phys(page)); + /* + * Ensure that the mmu_notifier sequence count is read before KVM + * retrieves the pfn from the primary MMU. Note, the memslot is + * protected by SRCU, not the mmu_notifier. Pairs with the smp_wmb() + * in kvm_mmu_invalidate_end(). + */ + mmu_seq = kvm->mmu_invalidate_seq; + smp_rmb(); + + /* + * No need to retry if the memslot does not exist or is invalid. KVM + * controls the APIC-access page memslot, and only deletes the memslot + * if APICv is permanently inhibited, i.e. the memslot won't reappear. + */ + pfn = gfn_to_pfn_memslot(slot, gfn); + if (is_error_noslot_pfn(pfn)) + return; + + read_lock(&vcpu->kvm->mmu_lock); + if (mmu_invalidate_retry_hva(kvm, mmu_seq, + gfn_to_hva_memslot(slot, gfn))) { + kvm_make_request(KVM_REQ_APIC_PAGE_RELOAD, vcpu); + read_unlock(&vcpu->kvm->mmu_lock); + goto out; + } + + vmcs_write64(APIC_ACCESS_ADDR, pfn_to_hpa(pfn)); + read_unlock(&vcpu->kvm->mmu_lock); + vmx_flush_tlb_current(vcpu); +out: /* * Do not pin apic access page in memory, the MMU notifier * will call us again if it is migrated or swapped out. */ - put_page(page); + kvm_release_pfn_clean(pfn); } static void vmx_hwapic_isr_update(int max_isr) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 7d6e04450448..8bca4d2405f8 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -10449,20 +10449,6 @@ static void vcpu_load_eoi_exitmap(struct kvm_vcpu *vcpu) vcpu, (u64 *)vcpu->arch.ioapic_handled_vectors); } -void kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm, - unsigned long start, unsigned long end) -{ - unsigned long apic_address; - - /* - * The physical address of apic access page is stored in the VMCS. - * Update it when it becomes invalid. - */ - apic_address = gfn_to_hva(kvm, APIC_DEFAULT_PHYS_BASE >> PAGE_SHIFT); - if (start <= apic_address && apic_address < end) - kvm_make_all_cpus_request(kvm, KVM_REQ_APIC_PAGE_RELOAD); -} - void kvm_arch_guest_memory_reclaimed(struct kvm *kvm) { static_call_cond(kvm_x86_guest_memory_reclaimed)(kvm); diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 84ba21c8093f..9d3ac7720da9 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -2239,9 +2239,6 @@ static inline long kvm_arch_vcpu_async_ioctl(struct file *filp, } #endif /* CONFIG_HAVE_KVM_VCPU_ASYNC_IOCTL */ -void kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm, - unsigned long start, unsigned long end); - void kvm_arch_guest_memory_reclaimed(struct kvm *kvm); #ifdef CONFIG_HAVE_KVM_VCPU_RUN_PID_CHANGE diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index ab8c8eb9fd62..b838c8f71349 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -154,11 +154,6 @@ static unsigned long long kvm_active_vms; static DEFINE_PER_CPU(cpumask_var_t, cpu_kick_mask); -__weak void kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm, - unsigned long start, unsigned long end) -{ -} - __weak void kvm_arch_guest_memory_reclaimed(struct kvm *kvm) { } @@ -521,18 +516,6 @@ static inline struct kvm *mmu_notifier_to_kvm(struct mmu_notifier *mn) return container_of(mn, struct kvm, mmu_notifier); } -static void kvm_mmu_notifier_invalidate_range(struct mmu_notifier *mn, - struct mm_struct *mm, - unsigned long start, unsigned long end) -{ - struct kvm *kvm = mmu_notifier_to_kvm(mn); - int idx; - - idx = srcu_read_lock(&kvm->srcu); - kvm_arch_mmu_notifier_invalidate_range(kvm, start, end); - srcu_read_unlock(&kvm->srcu, idx); -} - typedef bool (*hva_handler_t)(struct kvm *kvm, struct kvm_gfn_range *range); typedef void (*on_lock_fn_t)(struct kvm *kvm, unsigned long start, @@ -910,7 +893,6 @@ static void kvm_mmu_notifier_release(struct mmu_notifier *mn, } static const struct mmu_notifier_ops kvm_mmu_notifier_ops = { - .invalidate_range = kvm_mmu_notifier_invalidate_range, .invalidate_range_start = kvm_mmu_notifier_invalidate_range_start, .invalidate_range_end = kvm_mmu_notifier_invalidate_range_end, .clear_flush_young = kvm_mmu_notifier_clear_flush_young, |