summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSean Christopherson <seanjc@google.com>2022-04-29 04:04:15 +0300
committerPaolo Bonzini <pbonzini@redhat.com>2022-06-20 13:21:35 +0300
commitb14b2690c50e02145bb867dfcde8845eb17aa8a4 (patch)
treec2bcbe2c4cef258a7523defc5a733b99de1a079d
parent284dc49307738d2a897fd375431f741213cd0f27 (diff)
downloadlinux-b14b2690c50e02145bb867dfcde8845eb17aa8a4.tar.xz
KVM: Rename/refactor kvm_is_reserved_pfn() to kvm_pfn_to_refcounted_page()
Rename and refactor kvm_is_reserved_pfn() to kvm_pfn_to_refcounted_page() to better reflect what KVM is actually checking, and to eliminate extra pfn_to_page() lookups. The kvm_release_pfn_*() an kvm_try_get_pfn() helpers in particular benefit from "refouncted" nomenclature, as it's not all that obvious why KVM needs to get/put refcounts for some PG_reserved pages (ZERO_PAGE and ZONE_DEVICE). Add a comment to call out that the list of exceptions to PG_reserved is all but guaranteed to be incomplete. The list has mostly been compiled by people throwing noodles at KVM and finding out they stick a little too well, e.g. the ZERO_PAGE's refcount overflowed and ZONE_DEVICE pages didn't get freed. No functional change intended. Signed-off-by: Sean Christopherson <seanjc@google.com> Message-Id: <20220429010416.2788472-10-seanjc@google.com> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
-rw-r--r--arch/x86/kvm/mmu/mmu.c15
-rw-r--r--arch/x86/kvm/mmu/tdp_mmu.c2
-rw-r--r--include/linux/kvm_host.h2
-rw-r--r--virt/kvm/kvm_main.c66
4 files changed, 63 insertions, 22 deletions
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index b209aaf096df..7ebb97c95da7 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -534,6 +534,7 @@ static int mmu_spte_clear_track_bits(struct kvm *kvm, u64 *sptep)
kvm_pfn_t pfn;
u64 old_spte = *sptep;
int level = sptep_to_sp(sptep)->role.level;
+ struct page *page;
if (!is_shadow_present_pte(old_spte) ||
!spte_has_volatile_bits(old_spte))
@@ -549,11 +550,13 @@ static int mmu_spte_clear_track_bits(struct kvm *kvm, u64 *sptep)
pfn = spte_to_pfn(old_spte);
/*
- * KVM does not hold the refcount of the page used by
- * kvm mmu, before reclaiming the page, we should
- * unmap it from mmu first.
+ * KVM doesn't hold a reference to any pages mapped into the guest, and
+ * instead uses the mmu_notifier to ensure that KVM unmaps any pages
+ * before they are reclaimed. Sanity check that, if the pfn is backed
+ * by a refcounted page, the refcount is elevated.
*/
- WARN_ON(!kvm_is_reserved_pfn(pfn) && !page_count(pfn_to_page(pfn)));
+ page = kvm_pfn_to_refcounted_page(pfn);
+ WARN_ON(page && !page_count(page));
if (is_accessed_spte(old_spte))
kvm_set_pfn_accessed(pfn);
@@ -2881,7 +2884,7 @@ void kvm_mmu_hugepage_adjust(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault
if (unlikely(fault->max_level == PG_LEVEL_4K))
return;
- if (is_error_noslot_pfn(fault->pfn) || kvm_is_reserved_pfn(fault->pfn))
+ if (is_error_noslot_pfn(fault->pfn) || !kvm_pfn_to_refcounted_page(fault->pfn))
return;
if (kvm_slot_dirty_track_enabled(slot))
@@ -5993,7 +5996,7 @@ restart:
* the guest, and the guest page table is using 4K page size
* mapping if the indirect sp has level = 1.
*/
- if (sp->role.direct && !kvm_is_reserved_pfn(pfn) &&
+ if (sp->role.direct && kvm_pfn_to_refcounted_page(pfn) &&
sp->role.level < kvm_mmu_max_mapping_level(kvm, slot, sp->gfn,
pfn, PG_LEVEL_NUM)) {
pte_list_remove(kvm, rmap_head, sptep);
diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c
index 8e8d412fb65c..1e777b739ac4 100644
--- a/arch/x86/kvm/mmu/tdp_mmu.c
+++ b/arch/x86/kvm/mmu/tdp_mmu.c
@@ -1751,7 +1751,7 @@ static void zap_collapsible_spte_range(struct kvm *kvm,
*/
pfn = spte_to_pfn(iter.old_spte);
- if (kvm_is_reserved_pfn(pfn))
+ if (!kvm_pfn_to_refcounted_page(pfn))
continue;
max_mapping_level = kvm_mmu_max_mapping_level(kvm, slot,
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 6fb433ac2ba1..a2bbdf3ab086 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -1570,7 +1570,7 @@ void kvm_arch_sync_events(struct kvm *kvm);
int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu);
-bool kvm_is_reserved_pfn(kvm_pfn_t pfn);
+struct page *kvm_pfn_to_refcounted_page(kvm_pfn_t pfn);
bool kvm_is_zone_device_page(struct page *page);
struct kvm_irq_ack_notifier {
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index aa1bcd5f140d..04196096f9e4 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -182,19 +182,36 @@ bool kvm_is_zone_device_page(struct page *page)
return is_zone_device_page(page);
}
-bool kvm_is_reserved_pfn(kvm_pfn_t pfn)
+/*
+ * Returns a 'struct page' if the pfn is "valid" and backed by a refcounted
+ * page, NULL otherwise. Note, the list of refcounted PG_reserved page types
+ * is likely incomplete, it has been compiled purely through people wanting to
+ * back guest with a certain type of memory and encountering issues.
+ */
+struct page *kvm_pfn_to_refcounted_page(kvm_pfn_t pfn)
{
+ struct page *page;
+
+ if (!pfn_valid(pfn))
+ return NULL;
+
+ page = pfn_to_page(pfn);
+ if (!PageReserved(page))
+ return page;
+
+ /* The ZERO_PAGE(s) is marked PG_reserved, but is refcounted. */
+ if (is_zero_pfn(pfn))
+ return page;
+
/*
* ZONE_DEVICE pages currently set PG_reserved, but from a refcounting
* perspective they are "normal" pages, albeit with slightly different
* usage rules.
*/
- if (pfn_valid(pfn))
- return PageReserved(pfn_to_page(pfn)) &&
- !is_zero_pfn(pfn) &&
- !kvm_is_zone_device_page(pfn_to_page(pfn));
+ if (kvm_is_zone_device_page(page))
+ return page;
- return true;
+ return NULL;
}
/*
@@ -2501,9 +2518,12 @@ static bool vma_is_valid(struct vm_area_struct *vma, bool write_fault)
static int kvm_try_get_pfn(kvm_pfn_t pfn)
{
- if (kvm_is_reserved_pfn(pfn))
+ struct page *page = kvm_pfn_to_refcounted_page(pfn);
+
+ if (!page)
return 1;
- return get_page_unless_zero(pfn_to_page(pfn));
+
+ return get_page_unless_zero(page);
}
static int hva_to_pfn_remapped(struct vm_area_struct *vma,
@@ -2728,6 +2748,7 @@ EXPORT_SYMBOL_GPL(gfn_to_page_many_atomic);
*/
struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn)
{
+ struct page *page;
kvm_pfn_t pfn;
pfn = gfn_to_pfn(kvm, gfn);
@@ -2735,10 +2756,11 @@ struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn)
if (is_error_noslot_pfn(pfn))
return KVM_ERR_PTR_BAD_PAGE;
- if (kvm_is_reserved_pfn(pfn))
+ page = kvm_pfn_to_refcounted_page(pfn);
+ if (!page)
return KVM_ERR_PTR_BAD_PAGE;
- return pfn_to_page(pfn);
+ return page;
}
EXPORT_SYMBOL_GPL(gfn_to_page);
@@ -2841,8 +2863,16 @@ EXPORT_SYMBOL_GPL(kvm_release_page_clean);
void kvm_release_pfn_clean(kvm_pfn_t pfn)
{
- if (!is_error_noslot_pfn(pfn) && !kvm_is_reserved_pfn(pfn))
- kvm_release_page_clean(pfn_to_page(pfn));
+ struct page *page;
+
+ if (is_error_noslot_pfn(pfn))
+ return;
+
+ page = kvm_pfn_to_refcounted_page(pfn);
+ if (!page)
+ return;
+
+ kvm_release_page_clean(page);
}
EXPORT_SYMBOL_GPL(kvm_release_pfn_clean);
@@ -2857,8 +2887,16 @@ EXPORT_SYMBOL_GPL(kvm_release_page_dirty);
void kvm_release_pfn_dirty(kvm_pfn_t pfn)
{
- if (!is_error_noslot_pfn(pfn) && !kvm_is_reserved_pfn(pfn))
- kvm_release_page_dirty(pfn_to_page(pfn));
+ struct page *page;
+
+ if (is_error_noslot_pfn(pfn))
+ return;
+
+ page = kvm_pfn_to_refcounted_page(pfn);
+ if (!page)
+ return;
+
+ kvm_release_page_dirty(page);
}
EXPORT_SYMBOL_GPL(kvm_release_pfn_dirty);