diff options
Diffstat (limited to 'arch/x86/kvm/mmu')
-rw-r--r-- | arch/x86/kvm/mmu/mmu.c | 320 | ||||
-rw-r--r-- | arch/x86/kvm/mmu/mmu_internal.h | 22 | ||||
-rw-r--r-- | arch/x86/kvm/mmu/page_track.c | 1 | ||||
-rw-r--r-- | arch/x86/kvm/mmu/paging_tmpl.h | 25 | ||||
-rw-r--r-- | arch/x86/kvm/mmu/spte.c | 10 | ||||
-rw-r--r-- | arch/x86/kvm/mmu/spte.h | 20 | ||||
-rw-r--r-- | arch/x86/kvm/mmu/tdp_iter.c | 12 | ||||
-rw-r--r-- | arch/x86/kvm/mmu/tdp_mmu.c | 20 | ||||
-rw-r--r-- | arch/x86/kvm/mmu/tdp_mmu.h | 25 |
9 files changed, 258 insertions, 197 deletions
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index 835426254e76..c8ebe542c565 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -14,6 +14,7 @@ * Yaniv Kamay <yaniv@qumranet.com> * Avi Kivity <avi@qumranet.com> */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include "irq.h" #include "ioapic.h" @@ -43,6 +44,7 @@ #include <linux/uaccess.h> #include <linux/hash.h> #include <linux/kern_levels.h> +#include <linux/kstrtox.h> #include <linux/kthread.h> #include <asm/page.h> @@ -99,6 +101,13 @@ module_param_named(flush_on_reuse, force_flush_and_sync_on_reuse, bool, 0644); */ bool tdp_enabled = false; +static bool __ro_after_init tdp_mmu_allowed; + +#ifdef CONFIG_X86_64 +bool __read_mostly tdp_mmu_enabled = true; +module_param_named(tdp_mmu, tdp_mmu_enabled, bool, 0444); +#endif + static int max_huge_page_level __read_mostly; static int tdp_root_level __read_mostly; static int max_tdp_level __read_mostly; @@ -261,6 +270,17 @@ void kvm_flush_remote_tlbs_with_address(struct kvm *kvm, kvm_flush_remote_tlbs_with_range(kvm, &range); } +static gfn_t kvm_mmu_page_get_gfn(struct kvm_mmu_page *sp, int index); + +/* Flush the range of guest memory mapped by the given SPTE. */ +static void kvm_flush_remote_tlbs_sptep(struct kvm *kvm, u64 *sptep) +{ + struct kvm_mmu_page *sp = sptep_to_sp(sptep); + gfn_t gfn = kvm_mmu_page_get_gfn(sp, spte_index(sptep)); + + kvm_flush_remote_tlbs_gfn(kvm, gfn, sp->role.level); +} + static void mark_mmio_spte(struct kvm_vcpu *vcpu, u64 *sptep, u64 gfn, unsigned int access) { @@ -609,9 +629,14 @@ static bool mmu_spte_age(u64 *sptep) return true; } +static inline bool is_tdp_mmu_active(struct kvm_vcpu *vcpu) +{ + return tdp_mmu_enabled && vcpu->arch.mmu->root_role.direct; +} + static void walk_shadow_page_lockless_begin(struct kvm_vcpu *vcpu) { - if (is_tdp_mmu(vcpu->arch.mmu)) { + if (is_tdp_mmu_active(vcpu)) { kvm_tdp_mmu_walk_lockless_begin(); } else { /* @@ -630,7 +655,7 @@ static void walk_shadow_page_lockless_begin(struct kvm_vcpu *vcpu) static void walk_shadow_page_lockless_end(struct kvm_vcpu *vcpu) { - if (is_tdp_mmu(vcpu->arch.mmu)) { + if (is_tdp_mmu_active(vcpu)) { kvm_tdp_mmu_walk_lockless_end(); } else { /* @@ -800,7 +825,7 @@ static void account_shadowed(struct kvm *kvm, struct kvm_mmu_page *sp) kvm_mmu_gfn_disallow_lpage(slot, gfn); if (kvm_mmu_slot_gfn_write_protect(kvm, slot, gfn, PG_LEVEL_4K)) - kvm_flush_remote_tlbs_with_address(kvm, gfn, 1); + kvm_flush_remote_tlbs_gfn(kvm, gfn, PG_LEVEL_4K); } void track_possible_nx_huge_page(struct kvm *kvm, struct kvm_mmu_page *sp) @@ -1174,8 +1199,7 @@ static void drop_large_spte(struct kvm *kvm, u64 *sptep, bool flush) drop_spte(kvm, sptep); if (flush) - kvm_flush_remote_tlbs_with_address(kvm, sp->gfn, - KVM_PAGES_PER_HPAGE(sp->role.level)); + kvm_flush_remote_tlbs_sptep(kvm, sptep); } /* @@ -1279,7 +1303,7 @@ static void kvm_mmu_write_protect_pt_masked(struct kvm *kvm, { struct kvm_rmap_head *rmap_head; - if (is_tdp_mmu_enabled(kvm)) + if (tdp_mmu_enabled) kvm_tdp_mmu_clear_dirty_pt_masked(kvm, slot, slot->base_gfn + gfn_offset, mask, true); @@ -1312,7 +1336,7 @@ static void kvm_mmu_clear_dirty_pt_masked(struct kvm *kvm, { struct kvm_rmap_head *rmap_head; - if (is_tdp_mmu_enabled(kvm)) + if (tdp_mmu_enabled) kvm_tdp_mmu_clear_dirty_pt_masked(kvm, slot, slot->base_gfn + gfn_offset, mask, false); @@ -1395,7 +1419,7 @@ bool kvm_mmu_slot_gfn_write_protect(struct kvm *kvm, } } - if (is_tdp_mmu_enabled(kvm)) + if (tdp_mmu_enabled) write_protected |= kvm_tdp_mmu_write_protect_gfn(kvm, slot, gfn, min_level); @@ -1456,7 +1480,7 @@ restart: } if (need_flush && kvm_available_flush_tlb_with_range()) { - kvm_flush_remote_tlbs_with_address(kvm, gfn, 1); + kvm_flush_remote_tlbs_gfn(kvm, gfn, level); return false; } @@ -1558,7 +1582,7 @@ bool kvm_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range) if (kvm_memslots_have_rmaps(kvm)) flush = kvm_handle_gfn_range(kvm, range, kvm_zap_rmap); - if (is_tdp_mmu_enabled(kvm)) + if (tdp_mmu_enabled) flush = kvm_tdp_mmu_unmap_gfn_range(kvm, range, flush); return flush; @@ -1571,7 +1595,7 @@ bool kvm_set_spte_gfn(struct kvm *kvm, struct kvm_gfn_range *range) if (kvm_memslots_have_rmaps(kvm)) flush = kvm_handle_gfn_range(kvm, range, kvm_set_pte_rmap); - if (is_tdp_mmu_enabled(kvm)) + if (tdp_mmu_enabled) flush |= kvm_tdp_mmu_set_spte_gfn(kvm, range); return flush; @@ -1626,8 +1650,7 @@ static void __rmap_add(struct kvm *kvm, kvm->stat.max_mmu_rmap_size = rmap_count; if (rmap_count > RMAP_RECYCLE_THRESHOLD) { kvm_zap_all_rmap_sptes(kvm, rmap_head); - kvm_flush_remote_tlbs_with_address( - kvm, sp->gfn, KVM_PAGES_PER_HPAGE(sp->role.level)); + kvm_flush_remote_tlbs_gfn(kvm, gfn, sp->role.level); } } @@ -1646,7 +1669,7 @@ bool kvm_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range) if (kvm_memslots_have_rmaps(kvm)) young = kvm_handle_gfn_range(kvm, range, kvm_age_rmap); - if (is_tdp_mmu_enabled(kvm)) + if (tdp_mmu_enabled) young |= kvm_tdp_mmu_age_gfn_range(kvm, range); return young; @@ -1659,7 +1682,7 @@ bool kvm_test_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range) if (kvm_memslots_have_rmaps(kvm)) young = kvm_handle_gfn_range(kvm, range, kvm_test_age_rmap); - if (is_tdp_mmu_enabled(kvm)) + if (tdp_mmu_enabled) young |= kvm_tdp_mmu_test_age_gfn(kvm, range); return young; @@ -1921,7 +1944,7 @@ static bool is_obsolete_sp(struct kvm *kvm, struct kvm_mmu_page *sp) return true; /* TDP MMU pages do not use the MMU generation. */ - return !sp->tdp_mmu_page && + return !is_tdp_mmu_page(sp) && unlikely(sp->mmu_valid_gen != kvm->arch.mmu_valid_gen); } @@ -2355,7 +2378,16 @@ static void __link_shadow_page(struct kvm *kvm, mmu_page_add_parent_pte(cache, sp, sptep); - if (sp->unsync_children || sp->unsync) + /* + * The non-direct sub-pagetable must be updated before linking. For + * L1 sp, the pagetable is updated via kvm_sync_page() in + * kvm_mmu_find_shadow_page() without write-protecting the gfn, + * so sp->unsync can be true or false. For higher level non-direct + * sp, the pagetable is updated/synced via mmu_sync_children() in + * FNAME(fetch)(), so sp->unsync_children can only be false. + * WARN_ON_ONCE() if anything happens unexpectedly. + */ + if (WARN_ON_ONCE(sp->unsync_children) || sp->unsync) mark_unsync(sptep); } @@ -2383,7 +2415,7 @@ static void validate_direct_spte(struct kvm_vcpu *vcpu, u64 *sptep, return; drop_parent_pte(child, sptep); - kvm_flush_remote_tlbs_with_address(vcpu->kvm, child->gfn, 1); + kvm_flush_remote_tlbs_sptep(vcpu->kvm, sptep); } } @@ -2867,8 +2899,7 @@ static int mmu_set_spte(struct kvm_vcpu *vcpu, struct kvm_memory_slot *slot, } if (flush) - kvm_flush_remote_tlbs_with_address(vcpu->kvm, gfn, - KVM_PAGES_PER_HPAGE(level)); + kvm_flush_remote_tlbs_gfn(vcpu->kvm, gfn, level); pgprintk("%s: setting spte %llx\n", __func__, *sptep); @@ -3116,11 +3147,11 @@ void disallowed_hugepage_adjust(struct kvm_page_fault *fault, u64 spte, int cur_ !is_large_pte(spte) && spte_to_child_sp(spte)->nx_huge_page_disallowed) { /* - * A small SPTE exists for this pfn, but FNAME(fetch) - * and __direct_map would like to create a large PTE - * instead: just force them to go down another level, - * patching back for them into pfn the next 9 bits of - * the address. + * A small SPTE exists for this pfn, but FNAME(fetch), + * direct_map(), or kvm_tdp_mmu_map() would like to create a + * large PTE instead: just force them to go down another level, + * patching back for them into pfn the next 9 bits of the + * address. */ u64 page_mask = KVM_PAGES_PER_HPAGE(cur_level) - KVM_PAGES_PER_HPAGE(cur_level - 1); @@ -3129,7 +3160,7 @@ void disallowed_hugepage_adjust(struct kvm_page_fault *fault, u64 spte, int cur_ } } -static int __direct_map(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault) +static int direct_map(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault) { struct kvm_shadow_walk_iterator it; struct kvm_mmu_page *sp; @@ -3147,7 +3178,7 @@ static int __direct_map(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault) if (fault->nx_huge_page_workaround_enabled) disallowed_hugepage_adjust(fault, *it.sptep, it.level); - base_gfn = fault->gfn & ~(KVM_PAGES_PER_HPAGE(it.level) - 1); + base_gfn = gfn_round_for_level(fault->gfn, it.level); if (it.level == fault->goal_level) break; @@ -3173,14 +3204,16 @@ static int __direct_map(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault) return ret; } -static void kvm_send_hwpoison_signal(unsigned long address, struct task_struct *tsk) +static void kvm_send_hwpoison_signal(struct kvm_memory_slot *slot, gfn_t gfn) { - send_sig_mceerr(BUS_MCEERR_AR, (void __user *)address, PAGE_SHIFT, tsk); + unsigned long hva = gfn_to_hva_memslot(slot, gfn); + + send_sig_mceerr(BUS_MCEERR_AR, (void __user *)hva, PAGE_SHIFT, current); } -static int kvm_handle_error_pfn(struct kvm_vcpu *vcpu, gfn_t gfn, kvm_pfn_t pfn) +static int kvm_handle_error_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault) { - if (is_sigpending_pfn(pfn)) { + if (is_sigpending_pfn(fault->pfn)) { kvm_handle_signal_exit(vcpu); return -EINTR; } @@ -3190,43 +3223,43 @@ static int kvm_handle_error_pfn(struct kvm_vcpu *vcpu, gfn_t gfn, kvm_pfn_t pfn) * into the spte otherwise read access on readonly gfn also can * caused mmio page fault and treat it as mmio access. */ - if (pfn == KVM_PFN_ERR_RO_FAULT) + if (fault->pfn == KVM_PFN_ERR_RO_FAULT) return RET_PF_EMULATE; - if (pfn == KVM_PFN_ERR_HWPOISON) { - kvm_send_hwpoison_signal(kvm_vcpu_gfn_to_hva(vcpu, gfn), current); + if (fault->pfn == KVM_PFN_ERR_HWPOISON) { + kvm_send_hwpoison_signal(fault->slot, fault->gfn); return RET_PF_RETRY; } return -EFAULT; } -static int handle_abnormal_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault, - unsigned int access) +static int kvm_handle_noslot_fault(struct kvm_vcpu *vcpu, + struct kvm_page_fault *fault, + unsigned int access) { - /* The pfn is invalid, report the error! */ - if (unlikely(is_error_pfn(fault->pfn))) - return kvm_handle_error_pfn(vcpu, fault->gfn, fault->pfn); + gva_t gva = fault->is_tdp ? 0 : fault->addr; - if (unlikely(!fault->slot)) { - gva_t gva = fault->is_tdp ? 0 : fault->addr; + vcpu_cache_mmio_info(vcpu, gva, fault->gfn, + access & shadow_mmio_access_mask); - vcpu_cache_mmio_info(vcpu, gva, fault->gfn, - access & shadow_mmio_access_mask); - /* - * If MMIO caching is disabled, emulate immediately without - * touching the shadow page tables as attempting to install an - * MMIO SPTE will just be an expensive nop. Do not cache MMIO - * whose gfn is greater than host.MAXPHYADDR, any guest that - * generates such gfns is running nested and is being tricked - * by L0 userspace (you can observe gfn > L1.MAXPHYADDR if - * and only if L1's MAXPHYADDR is inaccurate with respect to - * the hardware's). - */ - if (unlikely(!enable_mmio_caching) || - unlikely(fault->gfn > kvm_mmu_max_gfn())) - return RET_PF_EMULATE; - } + /* + * If MMIO caching is disabled, emulate immediately without + * touching the shadow page tables as attempting to install an + * MMIO SPTE will just be an expensive nop. + */ + if (unlikely(!enable_mmio_caching)) + return RET_PF_EMULATE; + + /* + * Do not create an MMIO SPTE for a gfn greater than host.MAXPHYADDR, + * any guest that generates such gfns is running nested and is being + * tricked by L0 userspace (you can observe gfn > L1.MAXPHYADDR if and + * only if L1's MAXPHYADDR is inaccurate with respect to the + * hardware's). + */ + if (unlikely(fault->gfn > kvm_mmu_max_gfn())) + return RET_PF_EMULATE; return RET_PF_CONTINUE; } @@ -3350,7 +3383,7 @@ static int fast_page_fault(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault) do { u64 new_spte; - if (is_tdp_mmu(vcpu->arch.mmu)) + if (tdp_mmu_enabled) sptep = kvm_tdp_mmu_fast_pf_get_last_sptep(vcpu, fault->addr, &spte); else sptep = fast_pf_get_last_sptep(vcpu, fault->addr, &spte); @@ -3433,8 +3466,7 @@ static int fast_page_fault(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault) } if (++retry_count > 4) { - printk_once(KERN_WARNING - "kvm: Fast #PF retrying more than 4 times.\n"); + pr_warn_once("Fast #PF retrying more than 4 times.\n"); break; } @@ -3596,7 +3628,7 @@ static int mmu_alloc_direct_roots(struct kvm_vcpu *vcpu) if (r < 0) goto out_unlock; - if (is_tdp_mmu_enabled(vcpu->kvm)) { + if (tdp_mmu_enabled) { root = kvm_tdp_mmu_get_vcpu_root_hpa(vcpu); mmu->root.hpa = root; } else if (shadow_root_level >= PT64_ROOT_4LEVEL) { @@ -4026,7 +4058,7 @@ static bool get_mmio_spte(struct kvm_vcpu *vcpu, u64 addr, u64 *sptep) walk_shadow_page_lockless_begin(vcpu); - if (is_tdp_mmu(vcpu->arch.mmu)) + if (is_tdp_mmu_active(vcpu)) leaf = kvm_tdp_mmu_get_walk(vcpu, addr, sptes, &root); else leaf = get_walk(vcpu, addr, sptes, &root); @@ -4174,7 +4206,7 @@ void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu, struct kvm_async_pf *work) kvm_mmu_do_page_fault(vcpu, work->cr2_or_gpa, 0, true); } -static int kvm_faultin_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault) +static int __kvm_faultin_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault) { struct kvm_memory_slot *slot = fault->slot; bool async; @@ -4235,12 +4267,33 @@ static int kvm_faultin_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault) return RET_PF_CONTINUE; } +static int kvm_faultin_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault, + unsigned int access) +{ + int ret; + + fault->mmu_seq = vcpu->kvm->mmu_invalidate_seq; + smp_rmb(); + + ret = __kvm_faultin_pfn(vcpu, fault); + if (ret != RET_PF_CONTINUE) + return ret; + + if (unlikely(is_error_pfn(fault->pfn))) + return kvm_handle_error_pfn(vcpu, fault); + + if (unlikely(!fault->slot)) + return kvm_handle_noslot_fault(vcpu, fault, access); + + return RET_PF_CONTINUE; +} + /* * Returns true if the page fault is stale and needs to be retried, i.e. if the * root was invalidated by a memslot update or a relevant mmu_notifier fired. */ static bool is_page_fault_stale(struct kvm_vcpu *vcpu, - struct kvm_page_fault *fault, int mmu_seq) + struct kvm_page_fault *fault) { struct kvm_mmu_page *sp = to_shadow_page(vcpu->arch.mmu->root.hpa); @@ -4260,19 +4313,13 @@ static bool is_page_fault_stale(struct kvm_vcpu *vcpu, return true; return fault->slot && - mmu_invalidate_retry_hva(vcpu->kvm, mmu_seq, fault->hva); + mmu_invalidate_retry_hva(vcpu->kvm, fault->mmu_seq, fault->hva); } static int direct_page_fault(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault) { - bool is_tdp_mmu_fault = is_tdp_mmu(vcpu->arch.mmu); - - unsigned long mmu_seq; int r; - fault->gfn = fault->addr >> PAGE_SHIFT; - fault->slot = kvm_vcpu_gfn_to_memslot(vcpu, fault->gfn); - if (page_fault_handle_page_track(vcpu, fault)) return RET_PF_EMULATE; @@ -4284,41 +4331,24 @@ static int direct_page_fault(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault if (r) return r; - mmu_seq = vcpu->kvm->mmu_invalidate_seq; - smp_rmb(); - - r = kvm_faultin_pfn(vcpu, fault); - if (r != RET_PF_CONTINUE) - return r; - - r = handle_abnormal_pfn(vcpu, fault, ACC_ALL); + r = kvm_faultin_pfn(vcpu, fault, ACC_ALL); if (r != RET_PF_CONTINUE) return r; r = RET_PF_RETRY; + write_lock(&vcpu->kvm->mmu_lock); - if (is_tdp_mmu_fault) - read_lock(&vcpu->kvm->mmu_lock); - else - write_lock(&vcpu->kvm->mmu_lock); + if (is_page_fault_stale(vcpu, fault)) + goto out_unlock; - if (is_page_fault_stale(vcpu, fault, mmu_seq)) + r = make_mmu_pages_available(vcpu); + if (r) goto out_unlock; - if (is_tdp_mmu_fault) { - r = kvm_tdp_mmu_map(vcpu, fault); - } else { - r = make_mmu_pages_available(vcpu); - if (r) - goto out_unlock; - r = __direct_map(vcpu, fault); - } + r = direct_map(vcpu, fault); out_unlock: - if (is_tdp_mmu_fault) - read_unlock(&vcpu->kvm->mmu_lock); - else - write_unlock(&vcpu->kvm->mmu_lock); + write_unlock(&vcpu->kvm->mmu_lock); kvm_release_pfn_clean(fault->pfn); return r; } @@ -4366,6 +4396,42 @@ int kvm_handle_page_fault(struct kvm_vcpu *vcpu, u64 error_code, } EXPORT_SYMBOL_GPL(kvm_handle_page_fault); +#ifdef CONFIG_X86_64 +static int kvm_tdp_mmu_page_fault(struct kvm_vcpu *vcpu, + struct kvm_page_fault *fault) +{ + int r; + + if (page_fault_handle_page_track(vcpu, fault)) + return RET_PF_EMULATE; + + r = fast_page_fault(vcpu, fault); + if (r != RET_PF_INVALID) + return r; + + r = mmu_topup_memory_caches(vcpu, false); + if (r) + return r; + + r = kvm_faultin_pfn(vcpu, fault, ACC_ALL); + if (r != RET_PF_CONTINUE) + return r; + + r = RET_PF_RETRY; + read_lock(&vcpu->kvm->mmu_lock); + + if (is_page_fault_stale(vcpu, fault)) + goto out_unlock; + + r = kvm_tdp_mmu_map(vcpu, fault); + +out_unlock: + read_unlock(&vcpu->kvm->mmu_lock); + kvm_release_pfn_clean(fault->pfn); + return r; +} +#endif + int kvm_tdp_page_fault(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault) { /* @@ -4383,13 +4449,19 @@ int kvm_tdp_page_fault(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault) if (shadow_memtype_mask && kvm_arch_has_noncoherent_dma(vcpu->kvm)) { for ( ; fault->max_level > PG_LEVEL_4K; --fault->max_level) { int page_num = KVM_PAGES_PER_HPAGE(fault->max_level); - gfn_t base = (fault->addr >> PAGE_SHIFT) & ~(page_num - 1); + gfn_t base = gfn_round_for_level(fault->gfn, + fault->max_level); if (kvm_mtrr_check_gfn_range_consistency(vcpu, base, page_num)) break; } } +#ifdef CONFIG_X86_64 + if (tdp_mmu_enabled) + return kvm_tdp_mmu_page_fault(vcpu, fault); +#endif + return direct_page_fault(vcpu, fault); } @@ -4494,10 +4566,12 @@ void kvm_mmu_new_pgd(struct kvm_vcpu *vcpu, gpa_t new_pgd) struct kvm_mmu *mmu = vcpu->arch.mmu; union kvm_mmu_page_role new_role = mmu->root_role; - if (!fast_pgd_switch(vcpu->kvm, mmu, new_pgd, new_role)) { - /* kvm_mmu_ensure_valid_pgd will set up a new root. */ + /* + * Return immediately if no usable root was found, kvm_mmu_reload() + * will establish a valid root prior to the next VM-Enter. + */ + if (!fast_pgd_switch(vcpu->kvm, mmu, new_pgd, new_role)) return; - } /* * It's possible that the cached previous root page is obsolete because @@ -5719,6 +5793,9 @@ void kvm_configure_mmu(bool enable_tdp, int tdp_forced_root_level, tdp_root_level = tdp_forced_root_level; max_tdp_level = tdp_max_root_level; +#ifdef CONFIG_X86_64 + tdp_mmu_enabled = tdp_mmu_allowed && tdp_enabled; +#endif /* * max_huge_page_level reflects KVM's MMU capabilities irrespective * of kernel support, e.g. KVM may be capable of using 1GB pages when @@ -5966,7 +6043,7 @@ static void kvm_mmu_zap_all_fast(struct kvm *kvm) * write and in the same critical section as making the reload request, * e.g. before kvm_zap_obsolete_pages() could drop mmu_lock and yield. */ - if (is_tdp_mmu_enabled(kvm)) + if (tdp_mmu_enabled) kvm_tdp_mmu_invalidate_all_roots(kvm); /* @@ -5991,7 +6068,7 @@ static void kvm_mmu_zap_all_fast(struct kvm *kvm) * Deferring the zap until the final reference to the root is put would * lead to use-after-free. */ - if (is_tdp_mmu_enabled(kvm)) + if (tdp_mmu_enabled) kvm_tdp_mmu_zap_invalidated_roots(kvm); } @@ -6017,9 +6094,11 @@ int kvm_mmu_init_vm(struct kvm *kvm) INIT_LIST_HEAD(&kvm->arch.possible_nx_huge_pages); spin_lock_init(&kvm->arch.mmu_unsync_pages_lock); - r = kvm_mmu_init_tdp_mmu(kvm); - if (r < 0) - return r; + if (tdp_mmu_enabled) { + r = kvm_mmu_init_tdp_mmu(kvm); + if (r < 0) + return r; + } node->track_write = kvm_mmu_pte_write; node->track_flush_slot = kvm_mmu_invalidate_zap_pages_in_memslot; @@ -6049,7 +6128,8 @@ void kvm_mmu_uninit_vm(struct kvm *kvm) kvm_page_track_unregister_notifier(kvm, node); - kvm_mmu_uninit_tdp_mmu(kvm); + if (tdp_mmu_enabled) + kvm_mmu_uninit_tdp_mmu(kvm); mmu_free_vm_memory_caches(kvm); } @@ -6103,7 +6183,7 @@ void kvm_zap_gfn_range(struct kvm *kvm, gfn_t gfn_start, gfn_t gfn_end) flush = kvm_rmap_zap_gfn_range(kvm, gfn_start, gfn_end); - if (is_tdp_mmu_enabled(kvm)) { + if (tdp_mmu_enabled) { for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) flush = kvm_tdp_mmu_zap_leafs(kvm, i, gfn_start, gfn_end, true, flush); @@ -6136,7 +6216,7 @@ void kvm_mmu_slot_remove_write_access(struct kvm *kvm, write_unlock(&kvm->mmu_lock); } - if (is_tdp_mmu_enabled(kvm)) { + if (tdp_mmu_enabled) { read_lock(&kvm->mmu_lock); kvm_tdp_mmu_wrprot_slot(kvm, memslot, start_level); read_unlock(&kvm->mmu_lock); @@ -6379,7 +6459,7 @@ void kvm_mmu_try_split_huge_pages(struct kvm *kvm, u64 start, u64 end, int target_level) { - if (!is_tdp_mmu_enabled(kvm)) + if (!tdp_mmu_enabled) return; if (kvm_memslots_have_rmaps(kvm)) @@ -6400,7 +6480,7 @@ void kvm_mmu_slot_try_split_huge_pages(struct kvm *kvm, u64 start = memslot->base_gfn; u64 end = start + memslot->npages; - if (!is_tdp_mmu_enabled(kvm)) + if (!tdp_mmu_enabled) return; if (kvm_memslots_have_rmaps(kvm)) { @@ -6450,8 +6530,7 @@ restart: kvm_zap_one_rmap_spte(kvm, rmap_head, sptep); if (kvm_available_flush_tlb_with_range()) - kvm_flush_remote_tlbs_with_address(kvm, sp->gfn, - KVM_PAGES_PER_HPAGE(sp->role.level)); + kvm_flush_remote_tlbs_sptep(kvm, sptep); else need_tlb_flush = 1; @@ -6483,7 +6562,7 @@ void kvm_mmu_zap_collapsible_sptes(struct kvm *kvm, write_unlock(&kvm->mmu_lock); } - if (is_tdp_mmu_enabled(kvm)) { + if (tdp_mmu_enabled) { read_lock(&kvm->mmu_lock); kvm_tdp_mmu_zap_collapsible_sptes(kvm, slot); read_unlock(&kvm->mmu_lock); @@ -6518,7 +6597,7 @@ void kvm_mmu_slot_leaf_clear_dirty(struct kvm *kvm, write_unlock(&kvm->mmu_lock); } - if (is_tdp_mmu_enabled(kvm)) { + if (tdp_mmu_enabled) { read_lock(&kvm->mmu_lock); kvm_tdp_mmu_clear_dirty_slot(kvm, memslot); read_unlock(&kvm->mmu_lock); @@ -6553,7 +6632,7 @@ restart: kvm_mmu_commit_zap_page(kvm, &invalid_list); - if (is_tdp_mmu_enabled(kvm)) + if (tdp_mmu_enabled) kvm_tdp_mmu_zap_all(kvm); write_unlock(&kvm->mmu_lock); @@ -6579,7 +6658,7 @@ void kvm_mmu_invalidate_mmio_sptes(struct kvm *kvm, u64 gen) * zap all shadow pages. */ if (unlikely(gen == 0)) { - kvm_debug_ratelimited("kvm: zapping shadow pages for mmio generation wraparound\n"); + kvm_debug_ratelimited("zapping shadow pages for mmio generation wraparound\n"); kvm_mmu_zap_all_fast(kvm); } } @@ -6684,7 +6763,7 @@ static int set_nx_huge_pages(const char *val, const struct kernel_param *kp) new_val = 1; else if (sysfs_streq(val, "auto")) new_val = get_nx_auto_mode(); - else if (strtobool(val, &new_val) < 0) + else if (kstrtobool(val, &new_val) < 0) return -EINVAL; __set_nx_huge_pages(new_val); @@ -6718,6 +6797,13 @@ void __init kvm_mmu_x86_module_init(void) if (nx_huge_pages == -1) __set_nx_huge_pages(get_nx_auto_mode()); + /* + * Snapshot userspace's desire to enable the TDP MMU. Whether or not the + * TDP MMU is actually enabled is determined in kvm_configure_mmu() + * when the vendor module is loaded. + */ + tdp_mmu_allowed = tdp_mmu_enabled; + kvm_mmu_spte_module_init(); } diff --git a/arch/x86/kvm/mmu/mmu_internal.h b/arch/x86/kvm/mmu/mmu_internal.h index dbaf6755c5a7..cc58631e2336 100644 --- a/arch/x86/kvm/mmu/mmu_internal.h +++ b/arch/x86/kvm/mmu/mmu_internal.h @@ -156,6 +156,11 @@ static inline bool kvm_mmu_page_ad_need_write_protect(struct kvm_mmu_page *sp) return kvm_x86_ops.cpu_dirty_log_size && sp->role.guest_mode; } +static inline gfn_t gfn_round_for_level(gfn_t gfn, int level) +{ + return gfn & -KVM_PAGES_PER_HPAGE(level); +} + int mmu_try_to_unsync_pages(struct kvm *kvm, const struct kvm_memory_slot *slot, gfn_t gfn, bool can_unsync, bool prefetch); @@ -164,8 +169,17 @@ void kvm_mmu_gfn_allow_lpage(const struct kvm_memory_slot *slot, gfn_t gfn); bool kvm_mmu_slot_gfn_write_protect(struct kvm *kvm, struct kvm_memory_slot *slot, u64 gfn, int min_level); + void kvm_flush_remote_tlbs_with_address(struct kvm *kvm, u64 start_gfn, u64 pages); + +/* Flush the given page (huge or not) of guest memory. */ +static inline void kvm_flush_remote_tlbs_gfn(struct kvm *kvm, gfn_t gfn, int level) +{ + kvm_flush_remote_tlbs_with_address(kvm, gfn_round_for_level(gfn, level), + KVM_PAGES_PER_HPAGE(level)); +} + unsigned int pte_list_count(struct kvm_rmap_head *rmap_head); extern int nx_huge_pages; @@ -199,7 +213,7 @@ struct kvm_page_fault { /* * Maximum page size that can be created for this fault; input to - * FNAME(fetch), __direct_map and kvm_tdp_mmu_map. + * FNAME(fetch), direct_map() and kvm_tdp_mmu_map(). */ u8 max_level; @@ -222,6 +236,7 @@ struct kvm_page_fault { struct kvm_memory_slot *slot; /* Outputs of kvm_faultin_pfn. */ + unsigned long mmu_seq; kvm_pfn_t pfn; hva_t hva; bool map_writable; @@ -279,6 +294,11 @@ static inline int kvm_mmu_do_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, }; int r; + if (vcpu->arch.mmu->root_role.direct) { + fault.gfn = fault.addr >> PAGE_SHIFT; + fault.slot = kvm_vcpu_gfn_to_memslot(vcpu, fault.gfn); + } + /* * Async #PF "faults", a.k.a. prefetch faults, are not faults from the * guest perspective and have already been counted at the time of the diff --git a/arch/x86/kvm/mmu/page_track.c b/arch/x86/kvm/mmu/page_track.c index 2e09d1b6249f..0a2ac438d647 100644 --- a/arch/x86/kvm/mmu/page_track.c +++ b/arch/x86/kvm/mmu/page_track.c @@ -10,6 +10,7 @@ * Author: * Xiao Guangrong <guangrong.xiao@linux.intel.com> */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/kvm_host.h> #include <linux/rculist.h> diff --git a/arch/x86/kvm/mmu/paging_tmpl.h b/arch/x86/kvm/mmu/paging_tmpl.h index 0f6455072055..57f0b75c80f9 100644 --- a/arch/x86/kvm/mmu/paging_tmpl.h +++ b/arch/x86/kvm/mmu/paging_tmpl.h @@ -642,12 +642,12 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault, if (WARN_ON(!VALID_PAGE(vcpu->arch.mmu->root.hpa))) goto out_gpte_changed; - for (shadow_walk_init(&it, vcpu, fault->addr); - shadow_walk_okay(&it) && it.level > gw->level; - shadow_walk_next(&it)) { + for_each_shadow_entry(vcpu, fault->addr, it) { gfn_t table_gfn; clear_sp_write_flooding_count(it.sptep); + if (it.level == gw->level) + break; table_gfn = gw->table_gfn[it.level - 2]; access = gw->pt_access[it.level - 2]; @@ -692,8 +692,6 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault, trace_kvm_mmu_spte_requested(fault); for (; shadow_walk_okay(&it); shadow_walk_next(&it)) { - clear_sp_write_flooding_count(it.sptep); - /* * We cannot overwrite existing page tables with an NX * large page, as the leaf could be executable. @@ -701,7 +699,7 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault, if (fault->nx_huge_page_workaround_enabled) disallowed_hugepage_adjust(fault, *it.sptep, it.level); - base_gfn = fault->gfn & ~(KVM_PAGES_PER_HPAGE(it.level) - 1); + base_gfn = gfn_round_for_level(fault->gfn, it.level); if (it.level == fault->goal_level) break; @@ -791,7 +789,6 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault { struct guest_walker walker; int r; - unsigned long mmu_seq; bool is_self_change_mapping; pgprintk("%s: addr %lx err %x\n", __func__, fault->addr, fault->error_code); @@ -838,14 +835,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault else fault->max_level = walker.level; - mmu_seq = vcpu->kvm->mmu_invalidate_seq; - smp_rmb(); - - r = kvm_faultin_pfn(vcpu, fault); - if (r != RET_PF_CONTINUE) - return r; - - r = handle_abnormal_pfn(vcpu, fault, walker.pte_access); + r = kvm_faultin_pfn(vcpu, fault, walker.pte_access); if (r != RET_PF_CONTINUE) return r; @@ -871,7 +861,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault r = RET_PF_RETRY; write_lock(&vcpu->kvm->mmu_lock); - if (is_page_fault_stale(vcpu, fault, mmu_seq)) + if (is_page_fault_stale(vcpu, fault)) goto out_unlock; r = make_mmu_pages_available(vcpu); @@ -937,8 +927,7 @@ static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva, hpa_t root_hpa) mmu_page_zap_pte(vcpu->kvm, sp, sptep, NULL); if (is_shadow_present_pte(old_spte)) - kvm_flush_remote_tlbs_with_address(vcpu->kvm, - sp->gfn, KVM_PAGES_PER_HPAGE(sp->role.level)); + kvm_flush_remote_tlbs_sptep(vcpu->kvm, sptep); if (!rmap_can_add(vcpu)) break; diff --git a/arch/x86/kvm/mmu/spte.c b/arch/x86/kvm/mmu/spte.c index c0fd7e049b4e..c15bfca3ed15 100644 --- a/arch/x86/kvm/mmu/spte.c +++ b/arch/x86/kvm/mmu/spte.c @@ -7,7 +7,7 @@ * Copyright (C) 2006 Qumranet, Inc. * Copyright 2020 Red Hat, Inc. and/or its affiliates. */ - +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/kvm_host.h> #include "mmu.h" @@ -147,9 +147,9 @@ bool make_spte(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, WARN_ON_ONCE(!pte_access && !shadow_present_mask); if (sp->role.ad_disabled) - spte |= SPTE_TDP_AD_DISABLED_MASK; + spte |= SPTE_TDP_AD_DISABLED; else if (kvm_mmu_page_ad_need_write_protect(sp)) - spte |= SPTE_TDP_AD_WRPROT_ONLY_MASK; + spte |= SPTE_TDP_AD_WRPROT_ONLY; /* * For the EPT case, shadow_present_mask is 0 if hardware @@ -317,7 +317,7 @@ u64 make_nonleaf_spte(u64 *child_pt, bool ad_disabled) shadow_user_mask | shadow_x_mask | shadow_me_value; if (ad_disabled) - spte |= SPTE_TDP_AD_DISABLED_MASK; + spte |= SPTE_TDP_AD_DISABLED; else spte |= shadow_accessed_mask; @@ -352,7 +352,7 @@ u64 mark_spte_for_access_track(u64 spte) WARN_ONCE(spte & (SHADOW_ACC_TRACK_SAVED_BITS_MASK << SHADOW_ACC_TRACK_SAVED_BITS_SHIFT), - "kvm: Access Tracking saved bit locations are not zero\n"); + "Access Tracking saved bit locations are not zero\n"); spte |= (spte & SHADOW_ACC_TRACK_SAVED_BITS_MASK) << SHADOW_ACC_TRACK_SAVED_BITS_SHIFT; diff --git a/arch/x86/kvm/mmu/spte.h b/arch/x86/kvm/mmu/spte.h index 6f54dc9409c9..1279db2eab44 100644 --- a/arch/x86/kvm/mmu/spte.h +++ b/arch/x86/kvm/mmu/spte.h @@ -28,10 +28,10 @@ */ #define SPTE_TDP_AD_SHIFT 52 #define SPTE_TDP_AD_MASK (3ULL << SPTE_TDP_AD_SHIFT) -#define SPTE_TDP_AD_ENABLED_MASK (0ULL << SPTE_TDP_AD_SHIFT) -#define SPTE_TDP_AD_DISABLED_MASK (1ULL << SPTE_TDP_AD_SHIFT) -#define SPTE_TDP_AD_WRPROT_ONLY_MASK (2ULL << SPTE_TDP_AD_SHIFT) -static_assert(SPTE_TDP_AD_ENABLED_MASK == 0); +#define SPTE_TDP_AD_ENABLED (0ULL << SPTE_TDP_AD_SHIFT) +#define SPTE_TDP_AD_DISABLED (1ULL << SPTE_TDP_AD_SHIFT) +#define SPTE_TDP_AD_WRPROT_ONLY (2ULL << SPTE_TDP_AD_SHIFT) +static_assert(SPTE_TDP_AD_ENABLED == 0); #ifdef CONFIG_DYNAMIC_PHYSICAL_MASK #define SPTE_BASE_ADDR_MASK (physical_mask & ~(u64)(PAGE_SIZE-1)) @@ -164,7 +164,7 @@ extern u64 __read_mostly shadow_me_value; extern u64 __read_mostly shadow_me_mask; /* - * SPTEs in MMUs without A/D bits are marked with SPTE_TDP_AD_DISABLED_MASK; + * SPTEs in MMUs without A/D bits are marked with SPTE_TDP_AD_DISABLED; * shadow_acc_track_mask is the set of bits to be cleared in non-accessed * pages. */ @@ -266,18 +266,18 @@ static inline bool sp_ad_disabled(struct kvm_mmu_page *sp) static inline bool spte_ad_enabled(u64 spte) { MMU_WARN_ON(!is_shadow_present_pte(spte)); - return (spte & SPTE_TDP_AD_MASK) != SPTE_TDP_AD_DISABLED_MASK; + return (spte & SPTE_TDP_AD_MASK) != SPTE_TDP_AD_DISABLED; } static inline bool spte_ad_need_write_protect(u64 spte) { MMU_WARN_ON(!is_shadow_present_pte(spte)); /* - * This is benign for non-TDP SPTEs as SPTE_TDP_AD_ENABLED_MASK is '0', + * This is benign for non-TDP SPTEs as SPTE_TDP_AD_ENABLED is '0', * and non-TDP SPTEs will never set these bits. Optimize for 64-bit * TDP and do the A/D type check unconditionally. */ - return (spte & SPTE_TDP_AD_MASK) != SPTE_TDP_AD_ENABLED_MASK; + return (spte & SPTE_TDP_AD_MASK) != SPTE_TDP_AD_ENABLED; } static inline u64 spte_shadow_accessed_mask(u64 spte) @@ -435,11 +435,11 @@ static inline void check_spte_writable_invariants(u64 spte) { if (spte & shadow_mmu_writable_mask) WARN_ONCE(!(spte & shadow_host_writable_mask), - "kvm: MMU-writable SPTE is not Host-writable: %llx", + KBUILD_MODNAME ": MMU-writable SPTE is not Host-writable: %llx", spte); else WARN_ONCE(is_writable_pte(spte), - "kvm: Writable SPTE is not MMU-writable: %llx", spte); + KBUILD_MODNAME ": Writable SPTE is not MMU-writable: %llx", spte); } static inline bool is_mmu_writable_spte(u64 spte) diff --git a/arch/x86/kvm/mmu/tdp_iter.c b/arch/x86/kvm/mmu/tdp_iter.c index 39b48e7d7d1a..d2eb0d4f8710 100644 --- a/arch/x86/kvm/mmu/tdp_iter.c +++ b/arch/x86/kvm/mmu/tdp_iter.c @@ -1,4 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include "mmu_internal.h" #include "tdp_iter.h" @@ -15,11 +16,6 @@ static void tdp_iter_refresh_sptep(struct tdp_iter *iter) iter->old_spte = kvm_tdp_mmu_read_spte(iter->sptep); } -static gfn_t round_gfn_for_level(gfn_t gfn, int level) -{ - return gfn & -KVM_PAGES_PER_HPAGE(level); -} - /* * Return the TDP iterator to the root PT and allow it to continue its * traversal over the paging structure from there. @@ -30,7 +26,7 @@ void tdp_iter_restart(struct tdp_iter *iter) iter->yielded_gfn = iter->next_last_level_gfn; iter->level = iter->root_level; - iter->gfn = round_gfn_for_level(iter->next_last_level_gfn, iter->level); + iter->gfn = gfn_round_for_level(iter->next_last_level_gfn, iter->level); tdp_iter_refresh_sptep(iter); iter->valid = true; @@ -97,7 +93,7 @@ static bool try_step_down(struct tdp_iter *iter) iter->level--; iter->pt_path[iter->level - 1] = child_pt; - iter->gfn = round_gfn_for_level(iter->next_last_level_gfn, iter->level); + iter->gfn = gfn_round_for_level(iter->next_last_level_gfn, iter->level); tdp_iter_refresh_sptep(iter); return true; @@ -139,7 +135,7 @@ static bool try_step_up(struct tdp_iter *iter) return false; iter->level++; - iter->gfn = round_gfn_for_level(iter->gfn, iter->level); + iter->gfn = gfn_round_for_level(iter->gfn, iter->level); tdp_iter_refresh_sptep(iter); return true; diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c index d6df38d371a0..7c25dbf32ecc 100644 --- a/arch/x86/kvm/mmu/tdp_mmu.c +++ b/arch/x86/kvm/mmu/tdp_mmu.c @@ -1,4 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include "mmu.h" #include "mmu_internal.h" @@ -10,23 +11,15 @@ #include <asm/cmpxchg.h> #include <trace/events/kvm.h> -static bool __read_mostly tdp_mmu_enabled = true; -module_param_named(tdp_mmu, tdp_mmu_enabled, bool, 0644); - /* Initializes the TDP MMU for the VM, if enabled. */ int kvm_mmu_init_tdp_mmu(struct kvm *kvm) { struct workqueue_struct *wq; - if (!tdp_enabled || !READ_ONCE(tdp_mmu_enabled)) - return 0; - wq = alloc_workqueue("kvm", WQ_UNBOUND|WQ_MEM_RECLAIM|WQ_CPU_INTENSIVE, 0); if (!wq) return -ENOMEM; - /* This should not be changed for the lifetime of the VM. */ - kvm->arch.tdp_mmu_enabled = true; INIT_LIST_HEAD(&kvm->arch.tdp_mmu_roots); spin_lock_init(&kvm->arch.tdp_mmu_pages_lock); kvm->arch.tdp_mmu_zap_wq = wq; @@ -47,9 +40,6 @@ static __always_inline bool kvm_lockdep_assert_mmu_lock_held(struct kvm *kvm, void kvm_mmu_uninit_tdp_mmu(struct kvm *kvm) { - if (!kvm->arch.tdp_mmu_enabled) - return; - /* Also waits for any queued work items. */ destroy_workqueue(kvm->arch.tdp_mmu_zap_wq); @@ -144,7 +134,7 @@ void kvm_tdp_mmu_put_root(struct kvm *kvm, struct kvm_mmu_page *root, if (!refcount_dec_and_test(&root->tdp_mmu_root_count)) return; - WARN_ON(!root->tdp_mmu_page); + WARN_ON(!is_tdp_mmu_page(root)); /* * The root now has refcount=0. It is valid, but readers already @@ -690,8 +680,7 @@ static inline int tdp_mmu_zap_spte_atomic(struct kvm *kvm, if (ret) return ret; - kvm_flush_remote_tlbs_with_address(kvm, iter->gfn, - KVM_PAGES_PER_HPAGE(iter->level)); + kvm_flush_remote_tlbs_gfn(kvm, iter->gfn, iter->level); /* * No other thread can overwrite the removed SPTE as they must either @@ -1090,8 +1079,7 @@ static int tdp_mmu_map_handle_target_level(struct kvm_vcpu *vcpu, return RET_PF_RETRY; else if (is_shadow_present_pte(iter->old_spte) && !is_last_spte(iter->old_spte, iter->level)) - kvm_flush_remote_tlbs_with_address(vcpu->kvm, sp->gfn, - KVM_PAGES_PER_HPAGE(iter->level + 1)); + kvm_flush_remote_tlbs_gfn(vcpu->kvm, iter->gfn, iter->level); /* * If the page fault was caused by a write but the page is write diff --git a/arch/x86/kvm/mmu/tdp_mmu.h b/arch/x86/kvm/mmu/tdp_mmu.h index d3714200b932..0a63b1afabd3 100644 --- a/arch/x86/kvm/mmu/tdp_mmu.h +++ b/arch/x86/kvm/mmu/tdp_mmu.h @@ -7,6 +7,9 @@ #include "spte.h" +int kvm_mmu_init_tdp_mmu(struct kvm *kvm); +void kvm_mmu_uninit_tdp_mmu(struct kvm *kvm); + hpa_t kvm_tdp_mmu_get_vcpu_root_hpa(struct kvm_vcpu *vcpu); __must_check static inline bool kvm_tdp_mmu_get_root(struct kvm_mmu_page *root) @@ -68,31 +71,9 @@ u64 *kvm_tdp_mmu_fast_pf_get_last_sptep(struct kvm_vcpu *vcpu, u64 addr, u64 *spte); #ifdef CONFIG_X86_64 -int kvm_mmu_init_tdp_mmu(struct kvm *kvm); -void kvm_mmu_uninit_tdp_mmu(struct kvm *kvm); static inline bool is_tdp_mmu_page(struct kvm_mmu_page *sp) { return sp->tdp_mmu_page; } - -static inline bool is_tdp_mmu(struct kvm_mmu *mmu) -{ - struct kvm_mmu_page *sp; - hpa_t hpa = mmu->root.hpa; - - if (WARN_ON(!VALID_PAGE(hpa))) - return false; - - /* - * A NULL shadow page is legal when shadowing a non-paging guest with - * PAE paging, as the MMU will be direct with root_hpa pointing at the - * pae_root page, not a shadow page. - */ - sp = to_shadow_page(hpa); - return sp && is_tdp_mmu_page(sp) && sp->root_count; -} #else -static inline int kvm_mmu_init_tdp_mmu(struct kvm *kvm) { return 0; } -static inline void kvm_mmu_uninit_tdp_mmu(struct kvm *kvm) {} static inline bool is_tdp_mmu_page(struct kvm_mmu_page *sp) { return false; } -static inline bool is_tdp_mmu(struct kvm_mmu *mmu) { return false; } #endif #endif /* __KVM_X86_MMU_TDP_MMU_H */ |