summaryrefslogtreecommitdiff
path: root/arch/x86/kvm/mmu.c
diff options
context:
space:
mode:
authorSean Christopherson <sean.j.christopherson@intel.com>2019-11-12 01:12:27 +0300
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>2019-11-28 20:29:02 +0300
commite528acd31a13f4115ab5a0bf804fd2356a8d3c32 (patch)
tree1d1b14736c2bbe6c5a5e9ce3d1a5ec69940fc513 /arch/x86/kvm/mmu.c
parent368bc43ff0e9b166403029e5500ac1cc703c7c74 (diff)
downloadlinux-e528acd31a13f4115ab5a0bf804fd2356a8d3c32.tar.xz
KVM: MMU: Do not treat ZONE_DEVICE pages as being reserved
commit a78986aae9b2988f8493f9f65a587ee433e83bc3 upstream. Explicitly exempt ZONE_DEVICE pages from kvm_is_reserved_pfn() and instead manually handle ZONE_DEVICE on a case-by-case basis. For things like page refcounts, KVM needs to treat ZONE_DEVICE pages like normal pages, e.g. put pages grabbed via gup(). But for flows such as setting A/D bits or shifting refcounts for transparent huge pages, KVM needs to to avoid processing ZONE_DEVICE pages as the flows in question lack the underlying machinery for proper handling of ZONE_DEVICE pages. This fixes a hang reported by Adam Borowski[*] in dev_pagemap_cleanup() when running a KVM guest backed with /dev/dax memory, as KVM straight up doesn't put any references to ZONE_DEVICE pages acquired by gup(). Note, Dan Williams proposed an alternative solution of doing put_page() on ZONE_DEVICE pages immediately after gup() in order to simplify the auditing needed to ensure is_zone_device_page() is called if and only if the backing device is pinned (via gup()). But that approach would break kvm_vcpu_{un}map() as KVM requires the page to be pinned from map() 'til unmap() when accessing guest memory, unlike KVM's secondary MMU, which coordinates with mmu_notifier invalidations to avoid creating stale page references, i.e. doesn't rely on pages being pinned. [*] http://lkml.kernel.org/r/20190919115547.GA17963@angband.pl Reported-by: Adam Borowski <kilobyte@angband.pl> Analyzed-by: David Hildenbrand <david@redhat.com> Acked-by: Dan Williams <dan.j.williams@intel.com> Cc: stable@vger.kernel.org Fixes: 3565fce3a659 ("mm, x86: get_user_pages() for dax mappings") Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> [sean: backport to 4.x; resolve conflict in mmu.c] Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com>
Diffstat (limited to 'arch/x86/kvm/mmu.c')
-rw-r--r--arch/x86/kvm/mmu.c8
1 files changed, 4 insertions, 4 deletions
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index f0f180158c26..3a281a2decde 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -2934,7 +2934,7 @@ static void transparent_hugepage_adjust(struct kvm_vcpu *vcpu,
* here.
*/
if (!is_error_noslot_pfn(pfn) && !kvm_is_reserved_pfn(pfn) &&
- level == PT_PAGE_TABLE_LEVEL &&
+ !kvm_is_zone_device_pfn(pfn) && level == PT_PAGE_TABLE_LEVEL &&
PageTransCompoundMap(pfn_to_page(pfn)) &&
!mmu_gfn_lpage_is_disallowed(vcpu, gfn, PT_DIRECTORY_LEVEL)) {
unsigned long mask;
@@ -4890,9 +4890,9 @@ restart:
* the guest, and the guest page table is using 4K page size
* mapping if the indirect sp has level = 1.
*/
- if (sp->role.direct &&
- !kvm_is_reserved_pfn(pfn) &&
- PageTransCompoundMap(pfn_to_page(pfn))) {
+ if (sp->role.direct && !kvm_is_reserved_pfn(pfn) &&
+ !kvm_is_zone_device_pfn(pfn) &&
+ PageTransCompoundMap(pfn_to_page(pfn))) {
drop_spte(kvm, sptep);
need_tlb_flush = 1;
goto restart;