summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPaolo Bonzini <pbonzini@redhat.com>2026-02-09 21:33:30 +0300
committerPaolo Bonzini <pbonzini@redhat.com>2026-02-11 20:45:32 +0300
commit1b13885edf0a55a451a26d5fa53e7877b31debb5 (patch)
tree1252f341999af4d3f513a545769c0717da8344a3
parent9123c5f956b1fbedd63821eb528ece55ddd0e49c (diff)
parentac4f869c56301831a60706a84acbf13b4f0f9886 (diff)
downloadlinux-1b13885edf0a55a451a26d5fa53e7877b31debb5.tar.xz
Merge tag 'kvm-x86-apic-6.20' of https://github.com/kvm-x86/linux into HEAD
KVM x86 APIC-ish changes for 6.20 - Fix a benign bug where KVM could use the wrong memslots (ignored SMM) when creating a vCPU-specific mapping of guest memory. - Clean up KVM's handling of marking mapped vCPU pages dirty. - Drop a pile of *ancient* sanity checks hidden behind in KVM's unused ASSERT() macro, most of which could be trivially triggered by the guest and/or user, and all of which were useless. - Fold "struct dest_map" into its sole user, "struct rtc_status", to make it more obvious what the weird parameter is used for, and to allow burying the RTC shenanigans behind CONFIG_KVM_IOAPIC=y. - Bury all of ioapic.h and KVM_IRQCHIP_KERNEL behind CONFIG_KVM_IOAPIC=y. - Add a regression test for recent APICv update fixes. - Rework KVM's handling of VMCS updates while L2 is active to temporarily switch to vmcs01 instead of deferring the update until the next nested VM-Exit. The deferred updates approach directly contributed to several bugs, was proving to be a maintenance burden due to the difficulty in auditing the correctness of deferred updates, and was polluting "struct nested_vmx" with a growing pile of booleans. - Handle "hardware APIC ISR", a.k.a. SVI, updates in kvm_apic_update_apicv() to consolidate the updates, and to co-locate SVI updates with the updates for KVM's own cache of ISR information. - Drop a dead function declaration.
-rw-r--r--arch/x86/include/asm/kvm_host.h2
-rw-r--r--arch/x86/kvm/hyperv.c2
-rw-r--r--arch/x86/kvm/ioapic.c43
-rw-r--r--arch/x86/kvm/ioapic.h38
-rw-r--r--arch/x86/kvm/irq.c4
-rw-r--r--arch/x86/kvm/lapic.c97
-rw-r--r--arch/x86/kvm/lapic.h21
-rw-r--r--arch/x86/kvm/vmx/nested.c54
-rw-r--r--arch/x86/kvm/vmx/nested.h1
-rw-r--r--arch/x86/kvm/vmx/vmx.c106
-rw-r--r--arch/x86/kvm/vmx/vmx.h9
-rw-r--r--arch/x86/kvm/x86.c11
-rw-r--r--arch/x86/kvm/xen.c2
-rw-r--r--include/linux/kvm_host.h9
-rw-r--r--tools/testing/selftests/kvm/Makefile.kvm1
-rw-r--r--tools/testing/selftests/kvm/include/x86/apic.h4
-rw-r--r--tools/testing/selftests/kvm/x86/vmx_apicv_updates_test.c155
-rw-r--r--virt/kvm/kvm_main.c2
18 files changed, 334 insertions, 227 deletions
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 91c26f159d89..94cd4dc0e2a1 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1232,7 +1232,9 @@ struct kvm_xen {
enum kvm_irqchip_mode {
KVM_IRQCHIP_NONE,
+#ifdef CONFIG_KVM_IOAPIC
KVM_IRQCHIP_KERNEL, /* created with KVM_CREATE_IRQCHIP */
+#endif
KVM_IRQCHIP_SPLIT, /* created with KVM_CAP_SPLIT_IRQCHIP */
};
diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
index de92292eb1f5..49bf744ca8e3 100644
--- a/arch/x86/kvm/hyperv.c
+++ b/arch/x86/kvm/hyperv.c
@@ -492,7 +492,7 @@ static int synic_set_irq(struct kvm_vcpu_hv_synic *synic, u32 sint)
irq.vector = vector;
irq.level = 1;
- ret = kvm_irq_delivery_to_apic(vcpu->kvm, vcpu->arch.apic, &irq, NULL);
+ ret = kvm_irq_delivery_to_apic(vcpu->kvm, vcpu->arch.apic, &irq);
trace_kvm_hv_synic_set_irq(vcpu->vcpu_id, sint, irq.vector, ret);
return ret;
}
diff --git a/arch/x86/kvm/ioapic.c b/arch/x86/kvm/ioapic.c
index a26fa4222f29..a38a8e2ac70b 100644
--- a/arch/x86/kvm/ioapic.c
+++ b/arch/x86/kvm/ioapic.c
@@ -37,11 +37,6 @@
static int ioapic_service(struct kvm_ioapic *vioapic, int irq,
bool line_status);
-static void kvm_ioapic_update_eoi_one(struct kvm_vcpu *vcpu,
- struct kvm_ioapic *ioapic,
- int trigger_mode,
- int pin);
-
static unsigned long ioapic_read_indirect(struct kvm_ioapic *ioapic)
{
unsigned long result = 0;
@@ -82,7 +77,7 @@ static unsigned long ioapic_read_indirect(struct kvm_ioapic *ioapic)
static void rtc_irq_eoi_tracking_reset(struct kvm_ioapic *ioapic)
{
ioapic->rtc_status.pending_eoi = 0;
- bitmap_zero(ioapic->rtc_status.dest_map.map, KVM_MAX_VCPU_IDS);
+ bitmap_zero(ioapic->rtc_status.map, KVM_MAX_VCPU_IDS);
}
static void kvm_rtc_eoi_tracking_restore_all(struct kvm_ioapic *ioapic);
@@ -97,7 +92,7 @@ static void __rtc_irq_eoi_tracking_restore_one(struct kvm_vcpu *vcpu)
{
bool new_val, old_val;
struct kvm_ioapic *ioapic = vcpu->kvm->arch.vioapic;
- struct dest_map *dest_map = &ioapic->rtc_status.dest_map;
+ struct rtc_status *status = &ioapic->rtc_status;
union kvm_ioapic_redirect_entry *e;
e = &ioapic->redirtbl[RTC_GSI];
@@ -107,17 +102,17 @@ static void __rtc_irq_eoi_tracking_restore_one(struct kvm_vcpu *vcpu)
return;
new_val = kvm_apic_pending_eoi(vcpu, e->fields.vector);
- old_val = test_bit(vcpu->vcpu_id, dest_map->map);
+ old_val = test_bit(vcpu->vcpu_id, status->map);
if (new_val == old_val)
return;
if (new_val) {
- __set_bit(vcpu->vcpu_id, dest_map->map);
- dest_map->vectors[vcpu->vcpu_id] = e->fields.vector;
+ __set_bit(vcpu->vcpu_id, status->map);
+ status->vectors[vcpu->vcpu_id] = e->fields.vector;
ioapic->rtc_status.pending_eoi++;
} else {
- __clear_bit(vcpu->vcpu_id, dest_map->map);
+ __clear_bit(vcpu->vcpu_id, status->map);
ioapic->rtc_status.pending_eoi--;
rtc_status_pending_eoi_check_valid(ioapic);
}
@@ -148,13 +143,12 @@ static void kvm_rtc_eoi_tracking_restore_all(struct kvm_ioapic *ioapic)
static void rtc_irq_eoi(struct kvm_ioapic *ioapic, struct kvm_vcpu *vcpu,
int vector)
{
- struct dest_map *dest_map = &ioapic->rtc_status.dest_map;
+ struct rtc_status *status = &ioapic->rtc_status;
/* RTC special handling */
- if (test_bit(vcpu->vcpu_id, dest_map->map) &&
- (vector == dest_map->vectors[vcpu->vcpu_id]) &&
- (test_and_clear_bit(vcpu->vcpu_id,
- ioapic->rtc_status.dest_map.map))) {
+ if (test_bit(vcpu->vcpu_id, status->map) &&
+ (vector == status->vectors[vcpu->vcpu_id]) &&
+ (test_and_clear_bit(vcpu->vcpu_id, status->map))) {
--ioapic->rtc_status.pending_eoi;
rtc_status_pending_eoi_check_valid(ioapic);
}
@@ -265,15 +259,15 @@ static void kvm_ioapic_inject_all(struct kvm_ioapic *ioapic, unsigned long irr)
void kvm_ioapic_scan_entry(struct kvm_vcpu *vcpu, ulong *ioapic_handled_vectors)
{
struct kvm_ioapic *ioapic = vcpu->kvm->arch.vioapic;
- struct dest_map *dest_map = &ioapic->rtc_status.dest_map;
+ struct rtc_status *status = &ioapic->rtc_status;
union kvm_ioapic_redirect_entry *e;
int index;
spin_lock(&ioapic->lock);
/* Make sure we see any missing RTC EOI */
- if (test_bit(vcpu->vcpu_id, dest_map->map))
- __set_bit(dest_map->vectors[vcpu->vcpu_id],
+ if (test_bit(vcpu->vcpu_id, status->map))
+ __set_bit(status->vectors[vcpu->vcpu_id],
ioapic_handled_vectors);
for (index = 0; index < IOAPIC_NUM_PINS; index++) {
@@ -490,11 +484,11 @@ static int ioapic_service(struct kvm_ioapic *ioapic, int irq, bool line_status)
* if rtc_irq_check_coalesced returns false).
*/
BUG_ON(ioapic->rtc_status.pending_eoi != 0);
- ret = kvm_irq_delivery_to_apic(ioapic->kvm, NULL, &irqe,
- &ioapic->rtc_status.dest_map);
+ ret = __kvm_irq_delivery_to_apic(ioapic->kvm, NULL, &irqe,
+ &ioapic->rtc_status);
ioapic->rtc_status.pending_eoi = (ret < 0 ? 0 : ret);
} else
- ret = kvm_irq_delivery_to_apic(ioapic->kvm, NULL, &irqe, NULL);
+ ret = kvm_irq_delivery_to_apic(ioapic->kvm, NULL, &irqe);
if (ret && irqe.trig_mode == IOAPIC_LEVEL_TRIG)
entry->fields.remote_irr = 1;
@@ -564,7 +558,6 @@ static void kvm_ioapic_update_eoi_one(struct kvm_vcpu *vcpu,
kvm_lapic_suppress_eoi_broadcast(apic))
return;
- ASSERT(ent->fields.trig_mode == IOAPIC_LEVEL_TRIG);
ent->fields.remote_irr = 0;
if (!ent->fields.mask && (ioapic->irr & (1 << pin))) {
++ioapic->irq_eoi[pin];
@@ -624,8 +617,6 @@ static int ioapic_mmio_read(struct kvm_vcpu *vcpu, struct kvm_io_device *this,
if (!ioapic_in_range(ioapic, addr))
return -EOPNOTSUPP;
- ASSERT(!(addr & 0xf)); /* check alignment */
-
addr &= 0xff;
spin_lock(&ioapic->lock);
switch (addr) {
@@ -666,8 +657,6 @@ static int ioapic_mmio_write(struct kvm_vcpu *vcpu, struct kvm_io_device *this,
if (!ioapic_in_range(ioapic, addr))
return -EOPNOTSUPP;
- ASSERT(!(addr & 0xf)); /* check alignment */
-
switch (len) {
case 8:
case 4:
diff --git a/arch/x86/kvm/ioapic.h b/arch/x86/kvm/ioapic.h
index bf28dbc11ff6..3dadae093690 100644
--- a/arch/x86/kvm/ioapic.h
+++ b/arch/x86/kvm/ioapic.h
@@ -6,11 +6,12 @@
#include <kvm/iodev.h>
#include "irq.h"
+#ifdef CONFIG_KVM_IOAPIC
+
struct kvm;
struct kvm_vcpu;
#define IOAPIC_NUM_PINS KVM_IOAPIC_NUM_PINS
-#define MAX_NR_RESERVED_IOAPIC_PINS KVM_MAX_IRQ_ROUTES
#define IOAPIC_VERSION_ID 0x11 /* IOAPIC version */
#define IOAPIC_EDGE_TRIG 0
#define IOAPIC_LEVEL_TRIG 1
@@ -37,7 +38,9 @@ struct kvm_vcpu;
#define RTC_GSI 8
-struct dest_map {
+struct rtc_status {
+ int pending_eoi;
+
/* vcpu bitmap where IRQ has been sent */
DECLARE_BITMAP(map, KVM_MAX_VCPU_IDS);
@@ -48,12 +51,6 @@ struct dest_map {
u8 vectors[KVM_MAX_VCPU_IDS];
};
-
-struct rtc_status {
- int pending_eoi;
- struct dest_map dest_map;
-};
-
union kvm_ioapic_redirect_entry {
u64 bits;
struct {
@@ -104,24 +101,6 @@ void kvm_unregister_irq_mask_notifier(struct kvm *kvm, int irq,
void kvm_fire_mask_notifiers(struct kvm *kvm, unsigned irqchip, unsigned pin,
bool mask);
-#ifdef DEBUG
-#define ASSERT(x) \
-do { \
- if (!(x)) { \
- printk(KERN_EMERG "assertion failed %s: %d: %s\n", \
- __FILE__, __LINE__, #x); \
- BUG(); \
- } \
-} while (0)
-#else
-#define ASSERT(x) do { } while (0)
-#endif
-
-static inline int ioapic_in_kernel(struct kvm *kvm)
-{
- return irqchip_full(kvm);
-}
-
void kvm_rtc_eoi_tracking_restore_one(struct kvm_vcpu *vcpu);
void kvm_ioapic_update_eoi(struct kvm_vcpu *vcpu, int vector,
int trigger_mode);
@@ -134,6 +113,13 @@ void kvm_get_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state);
void kvm_set_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state);
void kvm_ioapic_scan_entry(struct kvm_vcpu *vcpu,
ulong *ioapic_handled_vectors);
+#endif /* CONFIG_KVM_IOAPIC */
+
+static inline int ioapic_in_kernel(struct kvm *kvm)
+{
+ return irqchip_full(kvm);
+}
+
void kvm_scan_ioapic_routes(struct kvm_vcpu *vcpu,
ulong *ioapic_handled_vectors);
void kvm_scan_ioapic_irq(struct kvm_vcpu *vcpu, u32 dest_id, u16 dest_mode,
diff --git a/arch/x86/kvm/irq.c b/arch/x86/kvm/irq.c
index 4c7688670c2d..9519fec09ee6 100644
--- a/arch/x86/kvm/irq.c
+++ b/arch/x86/kvm/irq.c
@@ -235,7 +235,7 @@ int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e,
kvm_msi_to_lapic_irq(kvm, e, &irq);
- return kvm_irq_delivery_to_apic(kvm, NULL, &irq, NULL);
+ return kvm_irq_delivery_to_apic(kvm, NULL, &irq);
}
int kvm_arch_set_irq_inatomic(struct kvm_kernel_irq_routing_entry *e,
@@ -258,7 +258,7 @@ int kvm_arch_set_irq_inatomic(struct kvm_kernel_irq_routing_entry *e,
kvm_msi_to_lapic_irq(kvm, e, &irq);
- if (kvm_irq_delivery_to_apic_fast(kvm, NULL, &irq, &r, NULL))
+ if (kvm_irq_delivery_to_apic_fast(kvm, NULL, &irq, &r))
return r;
break;
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 738ec3c1b0b5..2332a258de91 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -717,8 +717,6 @@ static inline int apic_search_irr(struct kvm_lapic *apic)
static inline int apic_find_highest_irr(struct kvm_lapic *apic)
{
- int result;
-
/*
* Note that irr_pending is just a hint. It will be always
* true with virtual interrupt delivery enabled.
@@ -726,10 +724,7 @@ static inline int apic_find_highest_irr(struct kvm_lapic *apic)
if (!apic->irr_pending)
return -1;
- result = apic_search_irr(apic);
- ASSERT(result == -1 || result >= 16);
-
- return result;
+ return apic_search_irr(apic);
}
static inline void apic_clear_irr(int vec, struct kvm_lapic *apic)
@@ -782,8 +777,6 @@ static inline void apic_set_isr(int vec, struct kvm_lapic *apic)
static inline int apic_find_highest_isr(struct kvm_lapic *apic)
{
- int result;
-
/*
* Note that isr_count is always 1, and highest_isr_cache
* is always -1, with APIC virtualization enabled.
@@ -793,10 +786,7 @@ static inline int apic_find_highest_isr(struct kvm_lapic *apic)
if (likely(apic->highest_isr_cache != -1))
return apic->highest_isr_cache;
- result = apic_find_highest_vector(apic->regs + APIC_ISR);
- ASSERT(result == -1 || result >= 16);
-
- return result;
+ return apic_find_highest_vector(apic->regs + APIC_ISR);
}
static inline void apic_clear_isr(int vec, struct kvm_lapic *apic)
@@ -821,17 +811,6 @@ static inline void apic_clear_isr(int vec, struct kvm_lapic *apic)
}
}
-void kvm_apic_update_hwapic_isr(struct kvm_vcpu *vcpu)
-{
- struct kvm_lapic *apic = vcpu->arch.apic;
-
- if (WARN_ON_ONCE(!lapic_in_kernel(vcpu)) || !apic->apicv_active)
- return;
-
- kvm_x86_call(hwapic_isr_update)(vcpu, apic_find_highest_isr(apic));
-}
-EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_apic_update_hwapic_isr);
-
int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu)
{
/* This may race with setting of irr in __apic_accept_irq() and
@@ -845,15 +824,15 @@ EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_lapic_find_highest_irr);
static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
int vector, int level, int trig_mode,
- struct dest_map *dest_map);
+ struct rtc_status *rtc_status);
int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq,
- struct dest_map *dest_map)
+ struct rtc_status *rtc_status)
{
struct kvm_lapic *apic = vcpu->arch.apic;
return __apic_accept_irq(apic, irq->delivery_mode, irq->vector,
- irq->level, irq->trig_mode, dest_map);
+ irq->level, irq->trig_mode, rtc_status);
}
static int __pv_send_ipi(unsigned long *ipi_bitmap, struct kvm_apic_map *map,
@@ -1099,7 +1078,6 @@ bool kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source,
struct kvm_lapic *target = vcpu->arch.apic;
u32 mda = kvm_apic_mda(vcpu, dest, source, target);
- ASSERT(target);
switch (shorthand) {
case APIC_DEST_NOSHORT:
if (dest_mode == APIC_DEST_PHYSICAL)
@@ -1237,8 +1215,9 @@ static inline bool kvm_apic_map_get_dest_lapic(struct kvm *kvm,
return true;
}
-bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src,
- struct kvm_lapic_irq *irq, int *r, struct dest_map *dest_map)
+static bool __kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src,
+ struct kvm_lapic_irq *irq, int *r,
+ struct rtc_status *rtc_status)
{
struct kvm_apic_map *map;
unsigned long bitmap;
@@ -1253,7 +1232,7 @@ bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src,
*r = 0;
return true;
}
- *r = kvm_apic_set_irq(src->vcpu, irq, dest_map);
+ *r = kvm_apic_set_irq(src->vcpu, irq, rtc_status);
return true;
}
@@ -1266,7 +1245,7 @@ bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src,
for_each_set_bit(i, &bitmap, 16) {
if (!dst[i])
continue;
- *r += kvm_apic_set_irq(dst[i]->vcpu, irq, dest_map);
+ *r += kvm_apic_set_irq(dst[i]->vcpu, irq, rtc_status);
}
}
@@ -1274,6 +1253,13 @@ bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src,
return ret;
}
+
+bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src,
+ struct kvm_lapic_irq *irq, int *r)
+{
+ return __kvm_irq_delivery_to_apic_fast(kvm, src, irq, r, NULL);
+}
+
/*
* This routine tries to handle interrupts in posted mode, here is how
* it deals with different cases:
@@ -1345,15 +1331,16 @@ bool kvm_intr_is_single_vcpu(struct kvm *kvm, struct kvm_lapic_irq *irq,
}
EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_intr_is_single_vcpu);
-int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src,
- struct kvm_lapic_irq *irq, struct dest_map *dest_map)
+int __kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src,
+ struct kvm_lapic_irq *irq,
+ struct rtc_status *rtc_status)
{
int r = -1;
struct kvm_vcpu *vcpu, *lowest = NULL;
unsigned long i, dest_vcpu_bitmap[BITS_TO_LONGS(KVM_MAX_VCPUS)];
unsigned int dest_vcpus = 0;
- if (kvm_irq_delivery_to_apic_fast(kvm, src, irq, &r, dest_map))
+ if (__kvm_irq_delivery_to_apic_fast(kvm, src, irq, &r, rtc_status))
return r;
if (irq->dest_mode == APIC_DEST_PHYSICAL &&
@@ -1375,7 +1362,7 @@ int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src,
if (!kvm_lowest_prio_delivery(irq)) {
if (r < 0)
r = 0;
- r += kvm_apic_set_irq(vcpu, irq, dest_map);
+ r += kvm_apic_set_irq(vcpu, irq, rtc_status);
} else if (kvm_apic_sw_enabled(vcpu->arch.apic)) {
if (!vector_hashing_enabled) {
if (!lowest)
@@ -1397,7 +1384,7 @@ int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src,
}
if (lowest)
- r = kvm_apic_set_irq(lowest, irq, dest_map);
+ r = kvm_apic_set_irq(lowest, irq, rtc_status);
return r;
}
@@ -1408,7 +1395,7 @@ int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src,
*/
static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
int vector, int level, int trig_mode,
- struct dest_map *dest_map)
+ struct rtc_status *rtc_status)
{
int result = 0;
struct kvm_vcpu *vcpu = apic->vcpu;
@@ -1429,10 +1416,12 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
result = 1;
- if (dest_map) {
- __set_bit(vcpu->vcpu_id, dest_map->map);
- dest_map->vectors[vcpu->vcpu_id] = vector;
+#ifdef CONFIG_KVM_IOAPIC
+ if (rtc_status) {
+ __set_bit(vcpu->vcpu_id, rtc_status->map);
+ rtc_status->vectors[vcpu->vcpu_id] = vector;
}
+#endif
if (apic_test_vector(vector, apic->regs + APIC_TMR) != !!trig_mode) {
if (trig_mode)
@@ -1658,7 +1647,7 @@ void kvm_apic_send_ipi(struct kvm_lapic *apic, u32 icr_low, u32 icr_high)
trace_kvm_apic_ipi(icr_low, irq.dest_id);
- kvm_irq_delivery_to_apic(apic->vcpu->kvm, apic, &irq, NULL);
+ kvm_irq_delivery_to_apic(apic->vcpu->kvm, apic, &irq);
}
EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_apic_send_ipi);
@@ -1667,8 +1656,6 @@ static u32 apic_get_tmcct(struct kvm_lapic *apic)
ktime_t remaining, now;
s64 ns;
- ASSERT(apic != NULL);
-
/* if initial count is 0, current count should also be 0 */
if (kvm_lapic_get_reg(apic, APIC_TMICT) == 0 ||
apic->lapic_timer.period == 0)
@@ -2629,7 +2616,7 @@ static int __kvm_x2apic_icr_write(struct kvm_lapic *apic, u64 data, bool fast)
kvm_icr_to_lapic_irq(apic, (u32)data, (u32)(data >> 32), &irq);
if (!kvm_irq_delivery_to_apic_fast(apic->vcpu->kvm, apic, &irq,
- &ignored, NULL))
+ &ignored))
return -EWOULDBLOCK;
trace_kvm_apic_ipi((u32)data, irq.dest_id);
@@ -2845,10 +2832,18 @@ void kvm_apic_update_apicv(struct kvm_vcpu *vcpu)
*/
apic->irr_pending = true;
- if (apic->apicv_active)
+ /*
+ * Update SVI when APICv gets enabled, otherwise SVI won't reflect the
+ * highest bit in vISR and the next accelerated EOI in the guest won't
+ * be virtualized correctly (the CPU uses SVI to determine which vISR
+ * vector to clear).
+ */
+ if (apic->apicv_active) {
apic->isr_count = 1;
- else
+ kvm_x86_call(hwapic_isr_update)(vcpu, apic_find_highest_isr(apic));
+ } else {
apic->isr_count = count_vectors(apic->regs + APIC_ISR);
+ }
apic->highest_isr_cache = -1;
}
@@ -2976,10 +2971,8 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu, bool init_event)
vcpu->arch.pv_eoi.msr_val = 0;
apic_update_ppr(apic);
- if (apic->apicv_active) {
+ if (apic->apicv_active)
kvm_x86_call(apicv_post_state_restore)(vcpu);
- kvm_x86_call(hwapic_isr_update)(vcpu, -1);
- }
vcpu->arch.apic_arb_prio = 0;
vcpu->arch.apic_attention = 0;
@@ -3060,8 +3053,6 @@ int kvm_create_lapic(struct kvm_vcpu *vcpu)
{
struct kvm_lapic *apic;
- ASSERT(vcpu != NULL);
-
if (!irqchip_in_kernel(vcpu->kvm)) {
static_branch_inc(&kvm_has_noapic_vcpu);
return 0;
@@ -3292,10 +3283,8 @@ int kvm_apic_set_state(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s)
__start_apic_timer(apic, APIC_TMCCT);
kvm_lapic_set_reg(apic, APIC_TMCCT, 0);
kvm_apic_update_apicv(vcpu);
- if (apic->apicv_active) {
+ if (apic->apicv_active)
kvm_x86_call(apicv_post_state_restore)(vcpu);
- kvm_x86_call(hwapic_isr_update)(vcpu, apic_find_highest_isr(apic));
- }
kvm_make_request(KVM_REQ_EVENT, vcpu);
#ifdef CONFIG_KVM_IOAPIC
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
index e5f5a222eced..274885af4ebc 100644
--- a/arch/x86/kvm/lapic.h
+++ b/arch/x86/kvm/lapic.h
@@ -88,7 +88,7 @@ struct kvm_lapic {
int nr_lvt_entries;
};
-struct dest_map;
+struct rtc_status;
int kvm_create_lapic(struct kvm_vcpu *vcpu);
void kvm_free_lapic(struct kvm_vcpu *vcpu);
@@ -110,23 +110,30 @@ bool __kvm_apic_update_irr(unsigned long *pir, void *regs, int *max_irr);
bool kvm_apic_update_irr(struct kvm_vcpu *vcpu, unsigned long *pir, int *max_irr);
void kvm_apic_update_ppr(struct kvm_vcpu *vcpu);
int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq,
- struct dest_map *dest_map);
+ struct rtc_status *rtc_status);
int kvm_apic_local_deliver(struct kvm_lapic *apic, int lvt_type);
void kvm_apic_update_apicv(struct kvm_vcpu *vcpu);
int kvm_alloc_apic_access_page(struct kvm *kvm);
void kvm_inhibit_apic_access_page(struct kvm_vcpu *vcpu);
bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src,
- struct kvm_lapic_irq *irq, int *r, struct dest_map *dest_map);
-int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src,
- struct kvm_lapic_irq *irq,
- struct dest_map *dest_map);
+ struct kvm_lapic_irq *irq, int *r);
+int __kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src,
+ struct kvm_lapic_irq *irq,
+ struct rtc_status *rtc_status);
+
+static inline int kvm_irq_delivery_to_apic(struct kvm *kvm,
+ struct kvm_lapic *src,
+ struct kvm_lapic_irq *irq)
+{
+ return __kvm_irq_delivery_to_apic(kvm, src, irq, NULL);
+}
+
void kvm_apic_send_ipi(struct kvm_lapic *apic, u32 icr_low, u32 icr_high);
int kvm_apic_set_base(struct kvm_vcpu *vcpu, u64 value, bool host_initiated);
int kvm_apic_get_state(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s);
int kvm_apic_set_state(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s);
-void kvm_apic_update_hwapic_isr(struct kvm_vcpu *vcpu);
int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu);
u64 kvm_get_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu);
diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index a5529a4bc87d..881bb914c164 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -2405,7 +2405,6 @@ static void prepare_vmcs02_early(struct vcpu_vmx *vmx, struct loaded_vmcs *vmcs0
exec_control &= ~CPU_BASED_TPR_SHADOW;
exec_control |= vmcs12->cpu_based_vm_exec_control;
- vmx->nested.l1_tpr_threshold = -1;
if (exec_control & CPU_BASED_TPR_SHADOW)
vmcs_write32(TPR_THRESHOLD, vmcs12->tpr_threshold);
#ifdef CONFIG_X86_64
@@ -3984,28 +3983,6 @@ static void vmcs12_save_pending_event(struct kvm_vcpu *vcpu,
}
}
-
-void nested_mark_vmcs12_pages_dirty(struct kvm_vcpu *vcpu)
-{
- struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
- gfn_t gfn;
-
- /*
- * Don't need to mark the APIC access page dirty; it is never
- * written to by the CPU during APIC virtualization.
- */
-
- if (nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW)) {
- gfn = vmcs12->virtual_apic_page_addr >> PAGE_SHIFT;
- kvm_vcpu_mark_page_dirty(vcpu, gfn);
- }
-
- if (nested_cpu_has_posted_intr(vmcs12)) {
- gfn = vmcs12->posted_intr_desc_addr >> PAGE_SHIFT;
- kvm_vcpu_mark_page_dirty(vcpu, gfn);
- }
-}
-
static int vmx_complete_nested_posted_interrupt(struct kvm_vcpu *vcpu)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
@@ -4040,7 +4017,8 @@ static int vmx_complete_nested_posted_interrupt(struct kvm_vcpu *vcpu)
}
}
- nested_mark_vmcs12_pages_dirty(vcpu);
+ kvm_vcpu_map_mark_dirty(vcpu, &vmx->nested.virtual_apic_map);
+ kvm_vcpu_map_mark_dirty(vcpu, &vmx->nested.pi_desc_map);
return 0;
mmio_needed:
@@ -5147,36 +5125,8 @@ void __nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 vm_exit_reason,
if (kvm_caps.has_tsc_control)
vmcs_write64(TSC_MULTIPLIER, vcpu->arch.tsc_scaling_ratio);
- if (vmx->nested.l1_tpr_threshold != -1)
- vmcs_write32(TPR_THRESHOLD, vmx->nested.l1_tpr_threshold);
-
- if (vmx->nested.change_vmcs01_virtual_apic_mode) {
- vmx->nested.change_vmcs01_virtual_apic_mode = false;
- vmx_set_virtual_apic_mode(vcpu);
- }
-
- if (vmx->nested.update_vmcs01_cpu_dirty_logging) {
- vmx->nested.update_vmcs01_cpu_dirty_logging = false;
- vmx_update_cpu_dirty_logging(vcpu);
- }
-
nested_put_vmcs12_pages(vcpu);
- if (vmx->nested.reload_vmcs01_apic_access_page) {
- vmx->nested.reload_vmcs01_apic_access_page = false;
- kvm_make_request(KVM_REQ_APIC_PAGE_RELOAD, vcpu);
- }
-
- if (vmx->nested.update_vmcs01_apicv_status) {
- vmx->nested.update_vmcs01_apicv_status = false;
- vmx_refresh_apicv_exec_ctrl(vcpu);
- }
-
- if (vmx->nested.update_vmcs01_hwapic_isr) {
- vmx->nested.update_vmcs01_hwapic_isr = false;
- kvm_apic_update_hwapic_isr(vcpu);
- }
-
if ((vm_exit_reason != -1) &&
(enable_shadow_vmcs || nested_vmx_is_evmptr12_valid(vmx)))
vmx->nested.need_vmcs12_to_shadow_sync = true;
diff --git a/arch/x86/kvm/vmx/nested.h b/arch/x86/kvm/vmx/nested.h
index 983484d42ebf..b844c5d59025 100644
--- a/arch/x86/kvm/vmx/nested.h
+++ b/arch/x86/kvm/vmx/nested.h
@@ -51,7 +51,6 @@ int vmx_set_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data);
int vmx_get_vmx_msr(struct nested_vmx_msrs *msrs, u32 msr_index, u64 *pdata);
int get_vmx_mem_address(struct kvm_vcpu *vcpu, unsigned long exit_qualification,
u32 vmx_instruction_info, bool wr, int len, gva_t *ret);
-void nested_mark_vmcs12_pages_dirty(struct kvm_vcpu *vcpu);
bool nested_vmx_check_io_bitmaps(struct kvm_vcpu *vcpu, unsigned int port,
int size);
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index edf12bf58578..49f5caa45e13 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -1594,6 +1594,41 @@ void vmx_vcpu_put(struct kvm_vcpu *vcpu)
vmx_prepare_switch_to_host(to_vmx(vcpu));
}
+static void vmx_switch_loaded_vmcs(struct kvm_vcpu *vcpu,
+ struct loaded_vmcs *vmcs)
+{
+ struct vcpu_vmx *vmx = to_vmx(vcpu);
+ int cpu;
+
+ cpu = get_cpu();
+ vmx->loaded_vmcs = vmcs;
+ vmx_vcpu_load_vmcs(vcpu, cpu);
+ put_cpu();
+}
+
+static void vmx_load_vmcs01(struct kvm_vcpu *vcpu)
+{
+ struct vcpu_vmx *vmx = to_vmx(vcpu);
+
+ if (!is_guest_mode(vcpu)) {
+ WARN_ON_ONCE(vmx->loaded_vmcs != &vmx->vmcs01);
+ return;
+ }
+
+ WARN_ON_ONCE(vmx->loaded_vmcs != &vmx->nested.vmcs02);
+ vmx_switch_loaded_vmcs(vcpu, &vmx->vmcs01);
+}
+
+static void vmx_put_vmcs01(struct kvm_vcpu *vcpu)
+{
+ if (!is_guest_mode(vcpu))
+ return;
+
+ vmx_switch_loaded_vmcs(vcpu, &to_vmx(vcpu)->nested.vmcs02);
+}
+DEFINE_GUARD(vmx_vmcs01, struct kvm_vcpu *,
+ vmx_load_vmcs01(_T), vmx_put_vmcs01(_T))
+
bool vmx_emulation_required(struct kvm_vcpu *vcpu)
{
return emulate_invalid_guest_state && !vmx_guest_state_valid(vcpu);
@@ -4558,10 +4593,7 @@ void vmx_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
- if (is_guest_mode(vcpu)) {
- vmx->nested.update_vmcs01_apicv_status = true;
- return;
- }
+ guard(vmx_vmcs01)(vcpu);
pin_controls_set(vmx, vmx_pin_based_exec_ctrl(vmx));
@@ -6423,6 +6455,15 @@ static void vmx_flush_pml_buffer(struct kvm_vcpu *vcpu)
vmcs_write16(GUEST_PML_INDEX, PML_HEAD_INDEX);
}
+static void nested_vmx_mark_all_vmcs12_pages_dirty(struct kvm_vcpu *vcpu)
+{
+ struct vcpu_vmx *vmx = to_vmx(vcpu);
+
+ kvm_vcpu_map_mark_dirty(vcpu, &vmx->nested.apic_access_page_map);
+ kvm_vcpu_map_mark_dirty(vcpu, &vmx->nested.virtual_apic_map);
+ kvm_vcpu_map_mark_dirty(vcpu, &vmx->nested.pi_desc_map);
+}
+
static void vmx_dump_sel(char *name, uint32_t sel)
{
pr_err("%s sel=0x%04x, attr=0x%05x, limit=0x%08x, base=0x%016lx\n",
@@ -6700,7 +6741,7 @@ static int __vmx_handle_exit(struct kvm_vcpu *vcpu, fastpath_t exit_fastpath)
* Mark them dirty on every exit from L2 to prevent them from
* getting out of sync with dirty tracking.
*/
- nested_mark_vmcs12_pages_dirty(vcpu);
+ nested_vmx_mark_all_vmcs12_pages_dirty(vcpu);
/*
* Synthesize a triple fault if L2 state is invalid. In normal
@@ -6837,11 +6878,10 @@ void vmx_update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr)
nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW))
return;
+ guard(vmx_vmcs01)(vcpu);
+
tpr_threshold = (irr == -1 || tpr < irr) ? 0 : irr;
- if (is_guest_mode(vcpu))
- to_vmx(vcpu)->nested.l1_tpr_threshold = tpr_threshold;
- else
- vmcs_write32(TPR_THRESHOLD, tpr_threshold);
+ vmcs_write32(TPR_THRESHOLD, tpr_threshold);
}
void vmx_set_virtual_apic_mode(struct kvm_vcpu *vcpu)
@@ -6856,11 +6896,7 @@ void vmx_set_virtual_apic_mode(struct kvm_vcpu *vcpu)
!cpu_has_vmx_virtualize_x2apic_mode())
return;
- /* Postpone execution until vmcs01 is the current VMCS. */
- if (is_guest_mode(vcpu)) {
- vmx->nested.change_vmcs01_virtual_apic_mode = true;
- return;
- }
+ guard(vmx_vmcs01)(vcpu);
sec_exec_control = secondary_exec_controls_get(vmx);
sec_exec_control &= ~(SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |
@@ -6883,8 +6919,17 @@ void vmx_set_virtual_apic_mode(struct kvm_vcpu *vcpu)
* only do so if its physical address has changed, but
* the guest may have inserted a non-APIC mapping into
* the TLB while the APIC access page was disabled.
+ *
+ * If L2 is active, immediately flush L1's TLB instead
+ * of requesting a flush of the current TLB, because
+ * the current TLB context is L2's.
*/
- kvm_make_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu);
+ if (!is_guest_mode(vcpu))
+ kvm_make_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu);
+ else if (!enable_ept)
+ vpid_sync_context(vmx->vpid);
+ else if (VALID_PAGE(vcpu->arch.root_mmu.root.hpa))
+ vmx_flush_tlb_ept_root(vcpu->arch.root_mmu.root.hpa);
}
break;
case LAPIC_MODE_X2APIC:
@@ -6909,11 +6954,8 @@ void vmx_set_apic_access_page_addr(struct kvm_vcpu *vcpu)
kvm_pfn_t pfn;
bool writable;
- /* Defer reload until vmcs01 is the current VMCS. */
- if (is_guest_mode(vcpu)) {
- to_vmx(vcpu)->nested.reload_vmcs01_apic_access_page = true;
- return;
- }
+ /* Note, the VIRTUALIZE_APIC_ACCESSES check needs to query vmcs01. */
+ guard(vmx_vmcs01)(vcpu);
if (!(secondary_exec_controls_get(to_vmx(vcpu)) &
SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES))
@@ -6974,21 +7016,16 @@ void vmx_hwapic_isr_update(struct kvm_vcpu *vcpu, int max_isr)
u16 status;
u8 old;
- /*
- * If L2 is active, defer the SVI update until vmcs01 is loaded, as SVI
- * is only relevant for if and only if Virtual Interrupt Delivery is
- * enabled in vmcs12, and if VID is enabled then L2 EOIs affect L2's
- * vAPIC, not L1's vAPIC. KVM must update vmcs01 on the next nested
- * VM-Exit, otherwise L1 with run with a stale SVI.
- */
- if (is_guest_mode(vcpu)) {
- to_vmx(vcpu)->nested.update_vmcs01_hwapic_isr = true;
- return;
- }
-
if (max_isr == -1)
max_isr = 0;
+ /*
+ * Always update SVI in vmcs01, as SVI is only relevant for L2 if and
+ * only if Virtual Interrupt Delivery is enabled in vmcs12, and if VID
+ * is enabled then L2 EOIs affect L2's vAPIC, not L1's vAPIC.
+ */
+ guard(vmx_vmcs01)(vcpu);
+
status = vmcs_read16(GUEST_INTR_STATUS);
old = status >> 8;
if (max_isr != old) {
@@ -8315,10 +8352,7 @@ void vmx_update_cpu_dirty_logging(struct kvm_vcpu *vcpu)
if (WARN_ON_ONCE(!enable_pml))
return;
- if (is_guest_mode(vcpu)) {
- vmx->nested.update_vmcs01_cpu_dirty_logging = true;
- return;
- }
+ guard(vmx_vmcs01)(vcpu);
/*
* Note, nr_memslots_dirty_logging can be changed concurrent with this
diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h
index bc3ed3145d7e..a926ce43ad40 100644
--- a/arch/x86/kvm/vmx/vmx.h
+++ b/arch/x86/kvm/vmx/vmx.h
@@ -131,12 +131,6 @@ struct nested_vmx {
*/
bool vmcs02_initialized;
- bool change_vmcs01_virtual_apic_mode;
- bool reload_vmcs01_apic_access_page;
- bool update_vmcs01_cpu_dirty_logging;
- bool update_vmcs01_apicv_status;
- bool update_vmcs01_hwapic_isr;
-
/*
* Enlightened VMCS has been enabled. It does not mean that L1 has to
* use it. However, VMX features available to L1 will be limited based
@@ -185,9 +179,6 @@ struct nested_vmx {
u64 pre_vmenter_ssp;
u64 pre_vmenter_ssp_tbl;
- /* to migrate it to L1 if L2 writes to L1's CR8 directly */
- int l1_tpr_threshold;
-
u16 vpid02;
u16 last_vpid;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 08a6d6e20e9b..06f55aa55172 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -6731,7 +6731,7 @@ int kvm_vm_ioctl_enable_cap(struct kvm *kvm,
case KVM_CAP_SPLIT_IRQCHIP: {
mutex_lock(&kvm->lock);
r = -EINVAL;
- if (cap->args[0] > MAX_NR_RESERVED_IOAPIC_PINS)
+ if (cap->args[0] > KVM_MAX_IRQ_ROUTES)
goto split_irqchip_unlock;
r = -EEXIST;
if (irqchip_in_kernel(kvm))
@@ -10276,7 +10276,7 @@ static void kvm_pv_kick_cpu_op(struct kvm *kvm, int apicid)
.dest_id = apicid,
};
- kvm_irq_delivery_to_apic(kvm, NULL, &lapic_irq, NULL);
+ kvm_irq_delivery_to_apic(kvm, NULL, &lapic_irq);
}
bool kvm_apicv_activated(struct kvm *kvm)
@@ -10917,16 +10917,9 @@ void __kvm_vcpu_update_apicv(struct kvm_vcpu *vcpu)
* pending. At the same time, KVM_REQ_EVENT may not be set as APICv was
* still active when the interrupt got accepted. Make sure
* kvm_check_and_inject_events() is called to check for that.
- *
- * Update SVI when APICv gets enabled, otherwise SVI won't reflect the
- * highest bit in vISR and the next accelerated EOI in the guest won't
- * be virtualized correctly (the CPU uses SVI to determine which vISR
- * vector to clear).
*/
if (!apic->apicv_active)
kvm_make_request(KVM_REQ_EVENT, vcpu);
- else
- kvm_apic_update_hwapic_isr(vcpu);
out:
preempt_enable();
diff --git a/arch/x86/kvm/xen.c b/arch/x86/kvm/xen.c
index d6b2a665b499..28eeb1b2a16c 100644
--- a/arch/x86/kvm/xen.c
+++ b/arch/x86/kvm/xen.c
@@ -626,7 +626,7 @@ void kvm_xen_inject_vcpu_vector(struct kvm_vcpu *v)
irq.delivery_mode = APIC_DM_FIXED;
irq.level = 1;
- kvm_irq_delivery_to_apic(v->kvm, NULL, &irq, NULL);
+ kvm_irq_delivery_to_apic(v->kvm, NULL, &irq);
}
/*
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 49c0cfe24fd8..021d1fa09e92 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -1381,6 +1381,7 @@ bool kvm_vcpu_is_visible_gfn(struct kvm_vcpu *vcpu, gfn_t gfn);
unsigned long kvm_host_page_size(struct kvm_vcpu *vcpu, gfn_t gfn);
void mark_page_dirty_in_slot(struct kvm *kvm, const struct kvm_memory_slot *memslot, gfn_t gfn);
void mark_page_dirty(struct kvm *kvm, gfn_t gfn);
+void kvm_vcpu_mark_page_dirty(struct kvm_vcpu *vcpu, gfn_t gfn);
int __kvm_vcpu_map(struct kvm_vcpu *vcpu, gpa_t gpa, struct kvm_host_map *map,
bool writable);
@@ -1398,6 +1399,13 @@ static inline int kvm_vcpu_map_readonly(struct kvm_vcpu *vcpu, gpa_t gpa,
return __kvm_vcpu_map(vcpu, gpa, map, false);
}
+static inline void kvm_vcpu_map_mark_dirty(struct kvm_vcpu *vcpu,
+ struct kvm_host_map *map)
+{
+ if (kvm_vcpu_mapped(map))
+ kvm_vcpu_mark_page_dirty(vcpu, map->gfn);
+}
+
unsigned long kvm_vcpu_gfn_to_hva(struct kvm_vcpu *vcpu, gfn_t gfn);
unsigned long kvm_vcpu_gfn_to_hva_prot(struct kvm_vcpu *vcpu, gfn_t gfn, bool *writable);
int kvm_vcpu_read_guest_page(struct kvm_vcpu *vcpu, gfn_t gfn, void *data, int offset,
@@ -1410,7 +1418,6 @@ int kvm_vcpu_write_guest_page(struct kvm_vcpu *vcpu, gfn_t gfn, const void *data
int offset, int len);
int kvm_vcpu_write_guest(struct kvm_vcpu *vcpu, gpa_t gpa, const void *data,
unsigned long len);
-void kvm_vcpu_mark_page_dirty(struct kvm_vcpu *vcpu, gfn_t gfn);
/**
* kvm_gpc_init - initialize gfn_to_pfn_cache.
diff --git a/tools/testing/selftests/kvm/Makefile.kvm b/tools/testing/selftests/kvm/Makefile.kvm
index c3c464513b4e..0166944fa998 100644
--- a/tools/testing/selftests/kvm/Makefile.kvm
+++ b/tools/testing/selftests/kvm/Makefile.kvm
@@ -118,6 +118,7 @@ TEST_GEN_PROGS_x86 += x86/ucna_injection_test
TEST_GEN_PROGS_x86 += x86/userspace_io_test
TEST_GEN_PROGS_x86 += x86/userspace_msr_exit_test
TEST_GEN_PROGS_x86 += x86/vmx_apic_access_test
+TEST_GEN_PROGS_x86 += x86/vmx_apicv_updates_test
TEST_GEN_PROGS_x86 += x86/vmx_exception_with_invalid_guest_state
TEST_GEN_PROGS_x86 += x86/vmx_msrs_test
TEST_GEN_PROGS_x86 += x86/vmx_invalid_nested_guest_state
diff --git a/tools/testing/selftests/kvm/include/x86/apic.h b/tools/testing/selftests/kvm/include/x86/apic.h
index e9b9aebaac97..5ca6bacbd70e 100644
--- a/tools/testing/selftests/kvm/include/x86/apic.h
+++ b/tools/testing/selftests/kvm/include/x86/apic.h
@@ -34,6 +34,7 @@
#define APIC_SPIV 0xF0
#define APIC_SPIV_FOCUS_DISABLED (1 << 9)
#define APIC_SPIV_APIC_ENABLED (1 << 8)
+#define APIC_ISR 0x100
#define APIC_IRR 0x200
#define APIC_ICR 0x300
#define APIC_LVTCMCI 0x2f0
@@ -71,6 +72,9 @@
#define APIC_TDCR 0x3E0
#define APIC_SELF_IPI 0x3F0
+#define APIC_VECTOR_TO_BIT_NUMBER(v) ((unsigned int)(v) % 32)
+#define APIC_VECTOR_TO_REG_OFFSET(v) ((unsigned int)(v) / 32 * 0x10)
+
void apic_disable(void);
void xapic_enable(void);
void x2apic_enable(void);
diff --git a/tools/testing/selftests/kvm/x86/vmx_apicv_updates_test.c b/tools/testing/selftests/kvm/x86/vmx_apicv_updates_test.c
new file mode 100644
index 000000000000..337c53fddeff
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86/vmx_apicv_updates_test.c
@@ -0,0 +1,155 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+#include "vmx.h"
+
+#define GOOD_IPI_VECTOR 0xe0
+#define BAD_IPI_VECTOR 0xf0
+
+static volatile int good_ipis_received;
+
+static void good_ipi_handler(struct ex_regs *regs)
+{
+ good_ipis_received++;
+}
+
+static void bad_ipi_handler(struct ex_regs *regs)
+{
+ GUEST_FAIL("Received \"bad\" IPI; ICR MMIO write should have been ignored");
+}
+
+static void l2_guest_code(void)
+{
+ x2apic_enable();
+ vmcall();
+
+ xapic_enable();
+ xapic_write_reg(APIC_ID, 1 << 24);
+ vmcall();
+}
+
+static void l1_guest_code(struct vmx_pages *vmx_pages)
+{
+#define L2_GUEST_STACK_SIZE 64
+ unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+ uint32_t control;
+
+ GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages));
+ GUEST_ASSERT(load_vmcs(vmx_pages));
+
+ /* Prepare the VMCS for L2 execution. */
+ prepare_vmcs(vmx_pages, l2_guest_code, &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+ control = vmreadz(CPU_BASED_VM_EXEC_CONTROL);
+ control |= CPU_BASED_USE_MSR_BITMAPS;
+ vmwrite(CPU_BASED_VM_EXEC_CONTROL, control);
+
+ /* Modify APIC ID to coerce KVM into inhibiting APICv. */
+ xapic_enable();
+ xapic_write_reg(APIC_ID, 1 << 24);
+
+ /*
+ * Generate+receive an IRQ without doing EOI to get an IRQ set in vISR
+ * but not SVI. APICv should be inhibited due to running with a
+ * modified APIC ID.
+ */
+ xapic_write_reg(APIC_ICR, APIC_DEST_SELF | APIC_DM_FIXED | GOOD_IPI_VECTOR);
+ GUEST_ASSERT_EQ(xapic_read_reg(APIC_ID), 1 << 24);
+
+ /* Enable IRQs and verify the IRQ was received. */
+ sti_nop();
+ GUEST_ASSERT_EQ(good_ipis_received, 1);
+
+ /*
+ * Run L2 to switch to x2APIC mode, which in turn will uninhibit APICv,
+ * as KVM should force the APIC ID back to its default.
+ */
+ GUEST_ASSERT(!vmlaunch());
+ GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
+ vmwrite(GUEST_RIP, vmreadz(GUEST_RIP) + vmreadz(VM_EXIT_INSTRUCTION_LEN));
+ GUEST_ASSERT(rdmsr(MSR_IA32_APICBASE) & MSR_IA32_APICBASE_EXTD);
+
+ /*
+ * Scribble the APIC access page to verify KVM disabled xAPIC
+ * virtualization in vmcs01, and to verify that KVM flushes L1's TLB
+ * when L2 switches back to accelerated xAPIC mode.
+ */
+ xapic_write_reg(APIC_ICR2, 0xdeadbeefu);
+ xapic_write_reg(APIC_ICR, APIC_DEST_SELF | APIC_DM_FIXED | BAD_IPI_VECTOR);
+
+ /*
+ * Verify the IRQ is still in-service and emit an EOI to verify KVM
+ * propagates the highest vISR vector to SVI when APICv is activated
+ * (and does so even if APICv was uninhibited while L2 was active).
+ */
+ GUEST_ASSERT_EQ(x2apic_read_reg(APIC_ISR + APIC_VECTOR_TO_REG_OFFSET(GOOD_IPI_VECTOR)),
+ BIT(APIC_VECTOR_TO_BIT_NUMBER(GOOD_IPI_VECTOR)));
+ x2apic_write_reg(APIC_EOI, 0);
+ GUEST_ASSERT_EQ(x2apic_read_reg(APIC_ISR + APIC_VECTOR_TO_REG_OFFSET(GOOD_IPI_VECTOR)), 0);
+
+ /*
+ * Run L2 one more time to switch back to xAPIC mode to verify that KVM
+ * handles the x2APIC => xAPIC transition and inhibits APICv while L2
+ * is active.
+ */
+ GUEST_ASSERT(!vmresume());
+ GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
+ GUEST_ASSERT(!(rdmsr(MSR_IA32_APICBASE) & MSR_IA32_APICBASE_EXTD));
+
+ xapic_write_reg(APIC_ICR, APIC_DEST_SELF | APIC_DM_FIXED | GOOD_IPI_VECTOR);
+ /* Re-enable IRQs, as VM-Exit clears RFLAGS.IF. */
+ sti_nop();
+ GUEST_ASSERT_EQ(good_ipis_received, 2);
+
+ GUEST_ASSERT_EQ(xapic_read_reg(APIC_ISR + APIC_VECTOR_TO_REG_OFFSET(GOOD_IPI_VECTOR)),
+ BIT(APIC_VECTOR_TO_BIT_NUMBER(GOOD_IPI_VECTOR)));
+ xapic_write_reg(APIC_EOI, 0);
+ GUEST_ASSERT_EQ(xapic_read_reg(APIC_ISR + APIC_VECTOR_TO_REG_OFFSET(GOOD_IPI_VECTOR)), 0);
+ GUEST_DONE();
+}
+
+int main(int argc, char *argv[])
+{
+ vm_vaddr_t vmx_pages_gva;
+ struct vmx_pages *vmx;
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+ struct ucall uc;
+
+ TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX));
+
+ vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code);
+
+ vmx = vcpu_alloc_vmx(vm, &vmx_pages_gva);
+ prepare_virtualize_apic_accesses(vmx, vm);
+ vcpu_args_set(vcpu, 1, vmx_pages_gva);
+
+ virt_pg_map(vm, APIC_DEFAULT_GPA, APIC_DEFAULT_GPA);
+ vm_install_exception_handler(vm, BAD_IPI_VECTOR, bad_ipi_handler);
+ vm_install_exception_handler(vm, GOOD_IPI_VECTOR, good_ipi_handler);
+
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ /* NOT REACHED */
+ case UCALL_DONE:
+ break;
+ default:
+ TEST_FAIL("Unexpected ucall %lu", uc.cmd);
+ }
+
+ /*
+ * Verify at least two IRQs were injected. Unfortunately, KVM counts
+ * re-injected IRQs (e.g. if delivering the IRQ hits an EPT violation),
+ * so being more precise isn't possible given the current stats.
+ */
+ TEST_ASSERT(vcpu_get_stat(vcpu, irq_injections) >= 2,
+ "Wanted at least 2 IRQ injections, got %lu\n",
+ vcpu_get_stat(vcpu, irq_injections));
+
+ kvm_vm_free(vm);
+ return 0;
+}
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 5b5b69c97665..6b1097e76288 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -3134,7 +3134,7 @@ int __kvm_vcpu_map(struct kvm_vcpu *vcpu, gfn_t gfn, struct kvm_host_map *map,
bool writable)
{
struct kvm_follow_pfn kfp = {
- .slot = gfn_to_memslot(vcpu->kvm, gfn),
+ .slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn),
.gfn = gfn,
.flags = writable ? FOLL_WRITE : 0,
.refcounted_page = &map->pinned_page,