summaryrefslogtreecommitdiff
path: root/arch/x86/kvm/mmu/tdp_iter.h
diff options
context:
space:
mode:
authorSean Christopherson <seanjc@google.com>2022-04-23 06:47:43 +0300
committerPaolo Bonzini <pbonzini@redhat.com>2022-05-03 14:22:32 +0300
commitba3a6120a4e7efc13d19fe43eb6c5caf1da05b72 (patch)
treee39136b325d9f32662348205daf5e1384cf6b980 /arch/x86/kvm/mmu/tdp_iter.h
parent54eb3ef56f36827aad90915df33387d4c2b5df5a (diff)
downloadlinux-ba3a6120a4e7efc13d19fe43eb6c5caf1da05b72.tar.xz
KVM: x86/mmu: Use atomic XCHG to write TDP MMU SPTEs with volatile bits
Use an atomic XCHG to write TDP MMU SPTEs that have volatile bits, even if mmu_lock is held for write, as volatile SPTEs can be written by other tasks/vCPUs outside of mmu_lock. If a vCPU uses the to-be-modified SPTE to write a page, the CPU can cache the translation as WRITABLE in the TLB despite it being seen by KVM as !WRITABLE, and/or KVM can clobber the Accessed/Dirty bits and not properly tag the backing page. Exempt non-leaf SPTEs from atomic updates as KVM itself doesn't modify non-leaf SPTEs without holding mmu_lock, they do not have Dirty bits, and KVM doesn't consume the Accessed bit of non-leaf SPTEs. Dropping the Dirty and/or Writable bits is most problematic for dirty logging, as doing so can result in a missed TLB flush and eventually a missed dirty page. In the unlikely event that the only dirty page(s) is a clobbered SPTE, clear_dirty_gfn_range() will see the SPTE as not dirty (based on the Dirty or Writable bit depending on the method) and so not update the SPTE and ultimately not flush. If the SPTE is cached in the TLB as writable before it is clobbered, the guest can continue writing the associated page without ever taking a write-protect fault. For most (all?) file back memory, dropping the Dirty bit is a non-issue. The primary MMU write-protects its PTEs on writeback, i.e. KVM's dirty bit is effectively ignored because the primary MMU will mark that page dirty when the write-protection is lifted, e.g. when KVM faults the page back in for write. The Accessed bit is a complete non-issue. Aside from being unused for non-leaf SPTEs, KVM doesn't do a TLB flush when aging SPTEs, i.e. the Accessed bit may be dropped anyways. Lastly, the Writable bit is also problematic as an extension of the Dirty bit, as KVM (correctly) treats the Dirty bit as volatile iff the SPTE is !DIRTY && WRITABLE. If KVM fixes an MMU-writable, but !WRITABLE, SPTE out of mmu_lock, then it can allow the CPU to set the Dirty bit despite the SPTE being !WRITABLE when it is checked by KVM. But that all depends on the Dirty bit being problematic in the first place. Fixes: 2f2fad0897cb ("kvm: x86/mmu: Add functions to handle changed TDP SPTEs") Cc: stable@vger.kernel.org Cc: Ben Gardon <bgardon@google.com> Cc: David Matlack <dmatlack@google.com> Cc: Venkatesh Srinivas <venkateshs@google.com> Signed-off-by: Sean Christopherson <seanjc@google.com> Message-Id: <20220423034752.1161007-4-seanjc@google.com> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Diffstat (limited to 'arch/x86/kvm/mmu/tdp_iter.h')
-rw-r--r--arch/x86/kvm/mmu/tdp_iter.h34
1 files changed, 32 insertions, 2 deletions
diff --git a/arch/x86/kvm/mmu/tdp_iter.h b/arch/x86/kvm/mmu/tdp_iter.h
index b1eaf6ec0e0b..f0af385c56e0 100644
--- a/arch/x86/kvm/mmu/tdp_iter.h
+++ b/arch/x86/kvm/mmu/tdp_iter.h
@@ -6,6 +6,7 @@
#include <linux/kvm_host.h>
#include "mmu.h"
+#include "spte.h"
/*
* TDP MMU SPTEs are RCU protected to allow paging structures (non-leaf SPTEs)
@@ -17,9 +18,38 @@ static inline u64 kvm_tdp_mmu_read_spte(tdp_ptep_t sptep)
{
return READ_ONCE(*rcu_dereference(sptep));
}
-static inline void kvm_tdp_mmu_write_spte(tdp_ptep_t sptep, u64 val)
+
+static inline u64 kvm_tdp_mmu_write_spte_atomic(tdp_ptep_t sptep, u64 new_spte)
+{
+ return xchg(rcu_dereference(sptep), new_spte);
+}
+
+static inline void __kvm_tdp_mmu_write_spte(tdp_ptep_t sptep, u64 new_spte)
+{
+ WRITE_ONCE(*rcu_dereference(sptep), new_spte);
+}
+
+static inline u64 kvm_tdp_mmu_write_spte(tdp_ptep_t sptep, u64 old_spte,
+ u64 new_spte, int level)
{
- WRITE_ONCE(*rcu_dereference(sptep), val);
+ /*
+ * Atomically write the SPTE if it is a shadow-present, leaf SPTE with
+ * volatile bits, i.e. has bits that can be set outside of mmu_lock.
+ * The Writable bit can be set by KVM's fast page fault handler, and
+ * Accessed and Dirty bits can be set by the CPU.
+ *
+ * Note, non-leaf SPTEs do have Accessed bits and those bits are
+ * technically volatile, but KVM doesn't consume the Accessed bit of
+ * non-leaf SPTEs, i.e. KVM doesn't care if it clobbers the bit. This
+ * logic needs to be reassessed if KVM were to use non-leaf Accessed
+ * bits, e.g. to skip stepping down into child SPTEs when aging SPTEs.
+ */
+ if (is_shadow_present_pte(old_spte) && is_last_spte(old_spte, level) &&
+ spte_has_volatile_bits(old_spte))
+ return kvm_tdp_mmu_write_spte_atomic(sptep, new_spte);
+
+ __kvm_tdp_mmu_write_spte(sptep, new_spte);
+ return old_spte;
}
/*