summaryrefslogtreecommitdiff
path: root/arch/x86/kvm
diff options
context:
space:
mode:
authorLai Jiangshan <laijs@linux.alibaba.com>2021-10-19 14:01:52 +0300
committerPaolo Bonzini <pbonzini@redhat.com>2021-10-22 12:19:30 +0300
commit509bfe3d979672cd69c318d520420cf95b474fd9 (patch)
tree07bdaaafcca9436ff1d86bd825ef4f5695c5df71 /arch/x86/kvm
parente45e9e3998f0001079b09555db5bb3b4257f6746 (diff)
downloadlinux-509bfe3d979672cd69c318d520420cf95b474fd9.tar.xz
KVM: X86: Cache CR3 in prev_roots when PCID is disabled
The commit 21823fbda5522 ("KVM: x86: Invalidate all PGDs for the current PCID on MOV CR3 w/ flush") invalidates all PGDs for the specific PCID and in the case of PCID is disabled, it includes all PGDs in the prev_roots and the commit made prev_roots totally unused in this case. Not using prev_roots fixes a problem when CR4.PCIDE is changed 0 -> 1 before the said commit: (CR4.PCIDE=0, CR4.PGE=1; CR3=cr3_a; the page for the guest RIP is global; cr3_b is cached in prev_roots) modify page tables under cr3_b the shadow root of cr3_b is unsync in kvm INVPCID single context the guest expects the TLB is clean for PCID=0 change CR4.PCIDE 0 -> 1 switch to cr3_b with PCID=0,NOFLUSH=1 No sync in kvm, cr3_b is still unsync in kvm jump to the page that was modified in step 1 shadow page tables point to the wrong page It is a very unlikely case, but it shows that stale prev_roots can be a problem after CR4.PCIDE changes from 0 to 1. However, to fix this case, the commit disabled caching CR3 in prev_roots altogether when PCID is disabled. Not all CPUs have PCID; especially the PCID support for AMD CPUs is kind of recent. To restore the prev_roots optimization for CR4.PCIDE=0, flush the whole MMU (including all prev_roots) when CR4.PCIDE changes. Signed-off-by: Lai Jiangshan <laijs@linux.alibaba.com> Message-Id: <20211019110154.4091-3-jiangshanlai@gmail.com> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Diffstat (limited to 'arch/x86/kvm')
-rw-r--r--arch/x86/kvm/x86.c29
1 files changed, 27 insertions, 2 deletions
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index f7806e3f3019..9d25ef7d4d53 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1022,10 +1022,27 @@ EXPORT_SYMBOL_GPL(kvm_is_valid_cr4);
void kvm_post_set_cr4(struct kvm_vcpu *vcpu, unsigned long old_cr4, unsigned long cr4)
{
+ /*
+ * If any role bit is changed, the MMU needs to be reset.
+ *
+ * If CR4.PCIDE is changed 1 -> 0, the guest TLB must be flushed.
+ * If CR4.PCIDE is changed 0 -> 1, there is no need to flush the TLB
+ * according to the SDM; however, stale prev_roots could be reused
+ * incorrectly in the future after a MOV to CR3 with NOFLUSH=1, so we
+ * free them all. KVM_REQ_MMU_RELOAD is fit for the both cases; it
+ * is slow, but changing CR4.PCIDE is a rare case.
+ *
+ * If CR4.PGE is changed, the guest TLB must be flushed.
+ *
+ * Note: resetting MMU is a superset of KVM_REQ_MMU_RELOAD and
+ * KVM_REQ_MMU_RELOAD is a superset of KVM_REQ_TLB_FLUSH_GUEST, hence
+ * the usage of "else if".
+ */
if ((cr4 ^ old_cr4) & KVM_MMU_CR4_ROLE_BITS)
kvm_mmu_reset_context(vcpu);
- else if (((cr4 ^ old_cr4) & X86_CR4_PGE) ||
- (!(cr4 & X86_CR4_PCIDE) && (old_cr4 & X86_CR4_PCIDE)))
+ else if ((cr4 ^ old_cr4) & X86_CR4_PCIDE)
+ kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
+ else if ((cr4 ^ old_cr4) & X86_CR4_PGE)
kvm_make_request(KVM_REQ_TLB_FLUSH_GUEST, vcpu);
}
EXPORT_SYMBOL_GPL(kvm_post_set_cr4);
@@ -1095,6 +1112,14 @@ static void kvm_invalidate_pcid(struct kvm_vcpu *vcpu, unsigned long pcid)
kvm_make_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu);
}
+ /*
+ * If PCID is disabled, there is no need to free prev_roots even if the
+ * PCIDs for them are also 0, because MOV to CR3 always flushes the TLB
+ * with PCIDE=0.
+ */
+ if (!kvm_read_cr4_bits(vcpu, X86_CR4_PCIDE))
+ return;
+
for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++)
if (kvm_get_pcid(vcpu, mmu->prev_roots[i].pgd) == pcid)
roots_to_free |= KVM_MMU_ROOT_PREVIOUS(i);