summaryrefslogtreecommitdiff
path: root/arch/x86/kvm/svm/nested.c
diff options
context:
space:
mode:
authorManali Shukla <manali.shukla@amd.com>2025-05-02 08:03:45 +0300
committerSean Christopherson <seanjc@google.com>2025-05-19 21:05:10 +0300
commit89f9edf4c69ddd345b07be414e6a99ace0757ff5 (patch)
treef045445f5c3fb55bb0935d19f9476c43165b0be1 /arch/x86/kvm/svm/nested.c
parent827547bc3a2a2af6391260de8874008c5a52f238 (diff)
downloadlinux-89f9edf4c69ddd345b07be414e6a99ace0757ff5.tar.xz
KVM: SVM: Add support for KVM_CAP_X86_BUS_LOCK_EXIT on SVM CPUs
Add support for KVM_CAP_X86_BUS_LOCK_EXIT on SVM CPUs with Bus Lock Threshold, which is close enough to VMX's Bus Lock Detection VM-Exit to allow reusing KVM_CAP_X86_BUS_LOCK_EXIT. The biggest difference between the two features is that Threshold is fault-like, whereas Detection is trap-like. To allow the guest to make forward progress, Threshold provides a per-VMCB counter which is decremented every time a bus lock occurs, and a VM-Exit is triggered if and only if the counter is '0'. To provide Detection-like semantics, initialize the counter to '0', i.e. exit on every bus lock, and when re-executing the guilty instruction, set the counter to '1' to effectively step past the instruction. Note, in the unlikely scenario that re-executing the instruction doesn't trigger a bus lock, e.g. because the guest has changed memory types or patched the guilty instruction, the bus lock counter will be left at '1', i.e. the guest will be able to do a bus lock on a different instruction. In a perfect world, KVM would ensure the counter is '0' if the guest has made forward progress, e.g. if RIP has changed. But trying to close that hole would incur non-trivial complexity, for marginal benefit; the intent of KVM_CAP_X86_BUS_LOCK_EXIT is to allow userspace rate-limit bus locks, not to allow for precise detection of problematic guest code. And, it's simply not feasible to fully close the hole, e.g. if an interrupt arrives before the original instruction can re-execute, the guest could step past a different bus lock. Suggested-by: Sean Christopherson <seanjc@google.com> Signed-off-by: Manali Shukla <manali.shukla@amd.com> Link: https://lore.kernel.org/r/20250502050346.14274-5-manali.shukla@amd.com [sean: fix typo in comment] Signed-off-by: Sean Christopherson <seanjc@google.com>
Diffstat (limited to 'arch/x86/kvm/svm/nested.c')
-rw-r--r--arch/x86/kvm/svm/nested.c34
1 files changed, 34 insertions, 0 deletions
diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c
index 834b67672d50..6221a341f500 100644
--- a/arch/x86/kvm/svm/nested.c
+++ b/arch/x86/kvm/svm/nested.c
@@ -678,6 +678,33 @@ static void nested_vmcb02_prepare_control(struct vcpu_svm *svm,
vmcb02->control.iopm_base_pa = vmcb01->control.iopm_base_pa;
vmcb02->control.msrpm_base_pa = vmcb01->control.msrpm_base_pa;
+ /*
+ * Stash vmcb02's counter if the guest hasn't moved past the guilty
+ * instruction; otherwise, reset the counter to '0'.
+ *
+ * In order to detect if L2 has made forward progress or not, track the
+ * RIP at which a bus lock has occurred on a per-vmcb12 basis. If RIP
+ * is changed, guest has clearly made forward progress, bus_lock_counter
+ * still remained '1', so reset bus_lock_counter to '0'. Eg. In the
+ * scenario, where a buslock happened in L1 before VMRUN, the bus lock
+ * firmly happened on an instruction in the past. Even if vmcb01's
+ * counter is still '1', (because the guilty instruction got patched),
+ * the vCPU has clearly made forward progress and so KVM should reset
+ * vmcb02's counter to '0'.
+ *
+ * If the RIP hasn't changed, stash the bus lock counter at nested VMRUN
+ * to prevent the same guilty instruction from triggering a VM-Exit. Eg.
+ * if userspace rate-limits the vCPU, then it's entirely possible that
+ * L1's tick interrupt is pending by the time userspace re-runs the
+ * vCPU. If KVM unconditionally clears the counter on VMRUN, then when
+ * L1 re-enters L2, the same instruction will trigger a VM-Exit and the
+ * entire cycle start over.
+ */
+ if (vmcb02->save.rip && (svm->nested.ctl.bus_lock_rip == vmcb02->save.rip))
+ vmcb02->control.bus_lock_counter = 1;
+ else
+ vmcb02->control.bus_lock_counter = 0;
+
/* Done at vmrun: asid. */
/* Also overwritten later if necessary. */
@@ -1039,6 +1066,13 @@ int nested_svm_vmexit(struct vcpu_svm *svm)
}
+ /*
+ * Invalidate bus_lock_rip unless KVM is still waiting for the guest
+ * to make forward progress before re-enabling bus lock detection.
+ */
+ if (!vmcb02->control.bus_lock_counter)
+ svm->nested.ctl.bus_lock_rip = INVALID_GPA;
+
nested_svm_copy_common_state(svm->nested.vmcb02.ptr, svm->vmcb01.ptr);
svm_switch_vmcb(svm, &svm->vmcb01);