summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSean Christopherson <seanjc@google.com>2026-02-25 04:20:37 +0300
committerSean Christopherson <seanjc@google.com>2026-03-03 03:02:52 +0300
commit4046823e78b0c9abd25d23bffd7f1c773532dbfd (patch)
tree63d93bbb8c794c68b1e9d9bf7077017fb8054273
parent0b16e69d17d8c35c5c9d5918bf596c75a44655d3 (diff)
downloadlinux-4046823e78b0c9abd25d23bffd7f1c773532dbfd.tar.xz
KVM: x86: Open code handling of completed MMIO reads in emulator_read_write()
Open code the handling of completed MMIO reads instead of using an ops hook, as burying the logic behind a (likely RETPOLINE'd) indirect call, and with an unintuitive name, makes relatively straightforward code hard to comprehend. Opportunistically add comments to explain the dependencies between the emulator's mem_read cache and the MMIO read completion logic, as it's very easy to overlook the cache's role in getting the read data into the correct destination. No functional change intended. Tested-by: Tom Lendacky <thomas.lendacky@gmail.com> Tested-by: Rick Edgecombe <rick.p.edgecombe@intel.com> Link: https://patch.msgid.link/20260225012049.920665-3-seanjc@google.com Signed-off-by: Sean Christopherson <seanjc@google.com>
-rw-r--r--arch/x86/kvm/emulate.c13
-rw-r--r--arch/x86/kvm/x86.c33
2 files changed, 29 insertions, 17 deletions
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index c8e292e9a24d..70850e591350 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -1297,12 +1297,25 @@ static int read_emulated(struct x86_emulate_ctxt *ctxt,
int rc;
struct read_cache *mc = &ctxt->mem_read;
+ /*
+ * If the read gets a cache hit, simply copy the value from the cache.
+ * A "hit" here means that there is unused data in the cache, i.e. when
+ * re-emulating an instruction to complete a userspace exit, KVM relies
+ * on "no decode" to ensure the instruction is re-emulated in the same
+ * sequence, so that multiple reads are fulfilled in the correct order.
+ */
if (mc->pos < mc->end)
goto read_cached;
if (KVM_EMULATOR_BUG_ON((mc->end + size) >= sizeof(mc->data), ctxt))
return X86EMUL_UNHANDLEABLE;
+ /*
+ * Route all reads to the cache. This allows @dest to be an on-stack
+ * variable without triggering use-after-free if KVM needs to exit to
+ * userspace to handle an MMIO read (the MMIO fragment will point at
+ * the current location in the cache).
+ */
rc = ctxt->ops->read_emulated(ctxt, addr, mc->data + mc->end, size,
&ctxt->exception);
if (rc != X86EMUL_CONTINUE)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 9bb9d7f078fc..b0bb9cd9df8a 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -8108,8 +8108,6 @@ int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa,
}
struct read_write_emulator_ops {
- int (*read_write_prepare)(struct kvm_vcpu *vcpu, void *val,
- int bytes);
int (*read_write_emulate)(struct kvm_vcpu *vcpu, gpa_t gpa,
void *val, int bytes);
int (*read_write_mmio)(struct kvm_vcpu *vcpu, gpa_t gpa,
@@ -8119,18 +8117,6 @@ struct read_write_emulator_ops {
bool write;
};
-static int read_prepare(struct kvm_vcpu *vcpu, void *val, int bytes)
-{
- if (vcpu->mmio_read_completed) {
- trace_kvm_mmio(KVM_TRACE_MMIO_READ, bytes,
- vcpu->mmio_fragments[0].gpa, val);
- vcpu->mmio_read_completed = 0;
- return 1;
- }
-
- return 0;
-}
-
static int read_emulate(struct kvm_vcpu *vcpu, gpa_t gpa,
void *val, int bytes)
{
@@ -8166,7 +8152,6 @@ static int write_exit_mmio(struct kvm_vcpu *vcpu, gpa_t gpa,
}
static const struct read_write_emulator_ops read_emultor = {
- .read_write_prepare = read_prepare,
.read_write_emulate = read_emulate,
.read_write_mmio = vcpu_mmio_read,
.read_write_exit_mmio = read_exit_mmio,
@@ -8249,9 +8234,23 @@ static int emulator_read_write(struct x86_emulate_ctxt *ctxt,
if (WARN_ON_ONCE((bytes > 8u || !ops->write) && object_is_on_stack(val)))
return X86EMUL_UNHANDLEABLE;
- if (ops->read_write_prepare &&
- ops->read_write_prepare(vcpu, val, bytes))
+ /*
+ * If the read was already completed via a userspace MMIO exit, there's
+ * nothing left to do except trace the MMIO read. When completing MMIO
+ * reads, KVM re-emulates the instruction to propagate the value into
+ * the correct destination, e.g. into the correct register, but the
+ * value itself has already been copied to the read cache.
+ *
+ * Note! This is *tightly* coupled to read_emulated() satisfying reads
+ * from the emulator's mem_read cache, so that the MMIO fragment data
+ * is copied to the correct chunk of the correct operand.
+ */
+ if (!ops->write && vcpu->mmio_read_completed) {
+ trace_kvm_mmio(KVM_TRACE_MMIO_READ, bytes,
+ vcpu->mmio_fragments[0].gpa, val);
+ vcpu->mmio_read_completed = 0;
return X86EMUL_CONTINUE;
+ }
vcpu->mmio_nr_fragments = 0;