diff options
| author | Paolo Bonzini <pbonzini@redhat.com> | 2026-02-09 21:33:30 +0300 |
|---|---|---|
| committer | Paolo Bonzini <pbonzini@redhat.com> | 2026-02-11 20:45:32 +0300 |
| commit | 1b13885edf0a55a451a26d5fa53e7877b31debb5 (patch) | |
| tree | 1252f341999af4d3f513a545769c0717da8344a3 | |
| parent | 9123c5f956b1fbedd63821eb528ece55ddd0e49c (diff) | |
| parent | ac4f869c56301831a60706a84acbf13b4f0f9886 (diff) | |
| download | linux-1b13885edf0a55a451a26d5fa53e7877b31debb5.tar.xz | |
Merge tag 'kvm-x86-apic-6.20' of https://github.com/kvm-x86/linux into HEAD
KVM x86 APIC-ish changes for 6.20
- Fix a benign bug where KVM could use the wrong memslots (ignored SMM) when
creating a vCPU-specific mapping of guest memory.
- Clean up KVM's handling of marking mapped vCPU pages dirty.
- Drop a pile of *ancient* sanity checks hidden behind in KVM's unused
ASSERT() macro, most of which could be trivially triggered by the guest
and/or user, and all of which were useless.
- Fold "struct dest_map" into its sole user, "struct rtc_status", to make it
more obvious what the weird parameter is used for, and to allow burying the
RTC shenanigans behind CONFIG_KVM_IOAPIC=y.
- Bury all of ioapic.h and KVM_IRQCHIP_KERNEL behind CONFIG_KVM_IOAPIC=y.
- Add a regression test for recent APICv update fixes.
- Rework KVM's handling of VMCS updates while L2 is active to temporarily
switch to vmcs01 instead of deferring the update until the next nested
VM-Exit. The deferred updates approach directly contributed to several
bugs, was proving to be a maintenance burden due to the difficulty in
auditing the correctness of deferred updates, and was polluting
"struct nested_vmx" with a growing pile of booleans.
- Handle "hardware APIC ISR", a.k.a. SVI, updates in kvm_apic_update_apicv()
to consolidate the updates, and to co-locate SVI updates with the updates
for KVM's own cache of ISR information.
- Drop a dead function declaration.
| -rw-r--r-- | arch/x86/include/asm/kvm_host.h | 2 | ||||
| -rw-r--r-- | arch/x86/kvm/hyperv.c | 2 | ||||
| -rw-r--r-- | arch/x86/kvm/ioapic.c | 43 | ||||
| -rw-r--r-- | arch/x86/kvm/ioapic.h | 38 | ||||
| -rw-r--r-- | arch/x86/kvm/irq.c | 4 | ||||
| -rw-r--r-- | arch/x86/kvm/lapic.c | 97 | ||||
| -rw-r--r-- | arch/x86/kvm/lapic.h | 21 | ||||
| -rw-r--r-- | arch/x86/kvm/vmx/nested.c | 54 | ||||
| -rw-r--r-- | arch/x86/kvm/vmx/nested.h | 1 | ||||
| -rw-r--r-- | arch/x86/kvm/vmx/vmx.c | 106 | ||||
| -rw-r--r-- | arch/x86/kvm/vmx/vmx.h | 9 | ||||
| -rw-r--r-- | arch/x86/kvm/x86.c | 11 | ||||
| -rw-r--r-- | arch/x86/kvm/xen.c | 2 | ||||
| -rw-r--r-- | include/linux/kvm_host.h | 9 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/Makefile.kvm | 1 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/include/x86/apic.h | 4 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/x86/vmx_apicv_updates_test.c | 155 | ||||
| -rw-r--r-- | virt/kvm/kvm_main.c | 2 |
18 files changed, 334 insertions, 227 deletions
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 91c26f159d89..94cd4dc0e2a1 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1232,7 +1232,9 @@ struct kvm_xen { enum kvm_irqchip_mode { KVM_IRQCHIP_NONE, +#ifdef CONFIG_KVM_IOAPIC KVM_IRQCHIP_KERNEL, /* created with KVM_CREATE_IRQCHIP */ +#endif KVM_IRQCHIP_SPLIT, /* created with KVM_CAP_SPLIT_IRQCHIP */ }; diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c index de92292eb1f5..49bf744ca8e3 100644 --- a/arch/x86/kvm/hyperv.c +++ b/arch/x86/kvm/hyperv.c @@ -492,7 +492,7 @@ static int synic_set_irq(struct kvm_vcpu_hv_synic *synic, u32 sint) irq.vector = vector; irq.level = 1; - ret = kvm_irq_delivery_to_apic(vcpu->kvm, vcpu->arch.apic, &irq, NULL); + ret = kvm_irq_delivery_to_apic(vcpu->kvm, vcpu->arch.apic, &irq); trace_kvm_hv_synic_set_irq(vcpu->vcpu_id, sint, irq.vector, ret); return ret; } diff --git a/arch/x86/kvm/ioapic.c b/arch/x86/kvm/ioapic.c index a26fa4222f29..a38a8e2ac70b 100644 --- a/arch/x86/kvm/ioapic.c +++ b/arch/x86/kvm/ioapic.c @@ -37,11 +37,6 @@ static int ioapic_service(struct kvm_ioapic *vioapic, int irq, bool line_status); -static void kvm_ioapic_update_eoi_one(struct kvm_vcpu *vcpu, - struct kvm_ioapic *ioapic, - int trigger_mode, - int pin); - static unsigned long ioapic_read_indirect(struct kvm_ioapic *ioapic) { unsigned long result = 0; @@ -82,7 +77,7 @@ static unsigned long ioapic_read_indirect(struct kvm_ioapic *ioapic) static void rtc_irq_eoi_tracking_reset(struct kvm_ioapic *ioapic) { ioapic->rtc_status.pending_eoi = 0; - bitmap_zero(ioapic->rtc_status.dest_map.map, KVM_MAX_VCPU_IDS); + bitmap_zero(ioapic->rtc_status.map, KVM_MAX_VCPU_IDS); } static void kvm_rtc_eoi_tracking_restore_all(struct kvm_ioapic *ioapic); @@ -97,7 +92,7 @@ static void __rtc_irq_eoi_tracking_restore_one(struct kvm_vcpu *vcpu) { bool new_val, old_val; struct kvm_ioapic *ioapic = vcpu->kvm->arch.vioapic; - struct dest_map *dest_map = &ioapic->rtc_status.dest_map; + struct rtc_status *status = &ioapic->rtc_status; union kvm_ioapic_redirect_entry *e; e = &ioapic->redirtbl[RTC_GSI]; @@ -107,17 +102,17 @@ static void __rtc_irq_eoi_tracking_restore_one(struct kvm_vcpu *vcpu) return; new_val = kvm_apic_pending_eoi(vcpu, e->fields.vector); - old_val = test_bit(vcpu->vcpu_id, dest_map->map); + old_val = test_bit(vcpu->vcpu_id, status->map); if (new_val == old_val) return; if (new_val) { - __set_bit(vcpu->vcpu_id, dest_map->map); - dest_map->vectors[vcpu->vcpu_id] = e->fields.vector; + __set_bit(vcpu->vcpu_id, status->map); + status->vectors[vcpu->vcpu_id] = e->fields.vector; ioapic->rtc_status.pending_eoi++; } else { - __clear_bit(vcpu->vcpu_id, dest_map->map); + __clear_bit(vcpu->vcpu_id, status->map); ioapic->rtc_status.pending_eoi--; rtc_status_pending_eoi_check_valid(ioapic); } @@ -148,13 +143,12 @@ static void kvm_rtc_eoi_tracking_restore_all(struct kvm_ioapic *ioapic) static void rtc_irq_eoi(struct kvm_ioapic *ioapic, struct kvm_vcpu *vcpu, int vector) { - struct dest_map *dest_map = &ioapic->rtc_status.dest_map; + struct rtc_status *status = &ioapic->rtc_status; /* RTC special handling */ - if (test_bit(vcpu->vcpu_id, dest_map->map) && - (vector == dest_map->vectors[vcpu->vcpu_id]) && - (test_and_clear_bit(vcpu->vcpu_id, - ioapic->rtc_status.dest_map.map))) { + if (test_bit(vcpu->vcpu_id, status->map) && + (vector == status->vectors[vcpu->vcpu_id]) && + (test_and_clear_bit(vcpu->vcpu_id, status->map))) { --ioapic->rtc_status.pending_eoi; rtc_status_pending_eoi_check_valid(ioapic); } @@ -265,15 +259,15 @@ static void kvm_ioapic_inject_all(struct kvm_ioapic *ioapic, unsigned long irr) void kvm_ioapic_scan_entry(struct kvm_vcpu *vcpu, ulong *ioapic_handled_vectors) { struct kvm_ioapic *ioapic = vcpu->kvm->arch.vioapic; - struct dest_map *dest_map = &ioapic->rtc_status.dest_map; + struct rtc_status *status = &ioapic->rtc_status; union kvm_ioapic_redirect_entry *e; int index; spin_lock(&ioapic->lock); /* Make sure we see any missing RTC EOI */ - if (test_bit(vcpu->vcpu_id, dest_map->map)) - __set_bit(dest_map->vectors[vcpu->vcpu_id], + if (test_bit(vcpu->vcpu_id, status->map)) + __set_bit(status->vectors[vcpu->vcpu_id], ioapic_handled_vectors); for (index = 0; index < IOAPIC_NUM_PINS; index++) { @@ -490,11 +484,11 @@ static int ioapic_service(struct kvm_ioapic *ioapic, int irq, bool line_status) * if rtc_irq_check_coalesced returns false). */ BUG_ON(ioapic->rtc_status.pending_eoi != 0); - ret = kvm_irq_delivery_to_apic(ioapic->kvm, NULL, &irqe, - &ioapic->rtc_status.dest_map); + ret = __kvm_irq_delivery_to_apic(ioapic->kvm, NULL, &irqe, + &ioapic->rtc_status); ioapic->rtc_status.pending_eoi = (ret < 0 ? 0 : ret); } else - ret = kvm_irq_delivery_to_apic(ioapic->kvm, NULL, &irqe, NULL); + ret = kvm_irq_delivery_to_apic(ioapic->kvm, NULL, &irqe); if (ret && irqe.trig_mode == IOAPIC_LEVEL_TRIG) entry->fields.remote_irr = 1; @@ -564,7 +558,6 @@ static void kvm_ioapic_update_eoi_one(struct kvm_vcpu *vcpu, kvm_lapic_suppress_eoi_broadcast(apic)) return; - ASSERT(ent->fields.trig_mode == IOAPIC_LEVEL_TRIG); ent->fields.remote_irr = 0; if (!ent->fields.mask && (ioapic->irr & (1 << pin))) { ++ioapic->irq_eoi[pin]; @@ -624,8 +617,6 @@ static int ioapic_mmio_read(struct kvm_vcpu *vcpu, struct kvm_io_device *this, if (!ioapic_in_range(ioapic, addr)) return -EOPNOTSUPP; - ASSERT(!(addr & 0xf)); /* check alignment */ - addr &= 0xff; spin_lock(&ioapic->lock); switch (addr) { @@ -666,8 +657,6 @@ static int ioapic_mmio_write(struct kvm_vcpu *vcpu, struct kvm_io_device *this, if (!ioapic_in_range(ioapic, addr)) return -EOPNOTSUPP; - ASSERT(!(addr & 0xf)); /* check alignment */ - switch (len) { case 8: case 4: diff --git a/arch/x86/kvm/ioapic.h b/arch/x86/kvm/ioapic.h index bf28dbc11ff6..3dadae093690 100644 --- a/arch/x86/kvm/ioapic.h +++ b/arch/x86/kvm/ioapic.h @@ -6,11 +6,12 @@ #include <kvm/iodev.h> #include "irq.h" +#ifdef CONFIG_KVM_IOAPIC + struct kvm; struct kvm_vcpu; #define IOAPIC_NUM_PINS KVM_IOAPIC_NUM_PINS -#define MAX_NR_RESERVED_IOAPIC_PINS KVM_MAX_IRQ_ROUTES #define IOAPIC_VERSION_ID 0x11 /* IOAPIC version */ #define IOAPIC_EDGE_TRIG 0 #define IOAPIC_LEVEL_TRIG 1 @@ -37,7 +38,9 @@ struct kvm_vcpu; #define RTC_GSI 8 -struct dest_map { +struct rtc_status { + int pending_eoi; + /* vcpu bitmap where IRQ has been sent */ DECLARE_BITMAP(map, KVM_MAX_VCPU_IDS); @@ -48,12 +51,6 @@ struct dest_map { u8 vectors[KVM_MAX_VCPU_IDS]; }; - -struct rtc_status { - int pending_eoi; - struct dest_map dest_map; -}; - union kvm_ioapic_redirect_entry { u64 bits; struct { @@ -104,24 +101,6 @@ void kvm_unregister_irq_mask_notifier(struct kvm *kvm, int irq, void kvm_fire_mask_notifiers(struct kvm *kvm, unsigned irqchip, unsigned pin, bool mask); -#ifdef DEBUG -#define ASSERT(x) \ -do { \ - if (!(x)) { \ - printk(KERN_EMERG "assertion failed %s: %d: %s\n", \ - __FILE__, __LINE__, #x); \ - BUG(); \ - } \ -} while (0) -#else -#define ASSERT(x) do { } while (0) -#endif - -static inline int ioapic_in_kernel(struct kvm *kvm) -{ - return irqchip_full(kvm); -} - void kvm_rtc_eoi_tracking_restore_one(struct kvm_vcpu *vcpu); void kvm_ioapic_update_eoi(struct kvm_vcpu *vcpu, int vector, int trigger_mode); @@ -134,6 +113,13 @@ void kvm_get_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state); void kvm_set_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state); void kvm_ioapic_scan_entry(struct kvm_vcpu *vcpu, ulong *ioapic_handled_vectors); +#endif /* CONFIG_KVM_IOAPIC */ + +static inline int ioapic_in_kernel(struct kvm *kvm) +{ + return irqchip_full(kvm); +} + void kvm_scan_ioapic_routes(struct kvm_vcpu *vcpu, ulong *ioapic_handled_vectors); void kvm_scan_ioapic_irq(struct kvm_vcpu *vcpu, u32 dest_id, u16 dest_mode, diff --git a/arch/x86/kvm/irq.c b/arch/x86/kvm/irq.c index 4c7688670c2d..9519fec09ee6 100644 --- a/arch/x86/kvm/irq.c +++ b/arch/x86/kvm/irq.c @@ -235,7 +235,7 @@ int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e, kvm_msi_to_lapic_irq(kvm, e, &irq); - return kvm_irq_delivery_to_apic(kvm, NULL, &irq, NULL); + return kvm_irq_delivery_to_apic(kvm, NULL, &irq); } int kvm_arch_set_irq_inatomic(struct kvm_kernel_irq_routing_entry *e, @@ -258,7 +258,7 @@ int kvm_arch_set_irq_inatomic(struct kvm_kernel_irq_routing_entry *e, kvm_msi_to_lapic_irq(kvm, e, &irq); - if (kvm_irq_delivery_to_apic_fast(kvm, NULL, &irq, &r, NULL)) + if (kvm_irq_delivery_to_apic_fast(kvm, NULL, &irq, &r)) return r; break; diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 738ec3c1b0b5..2332a258de91 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -717,8 +717,6 @@ static inline int apic_search_irr(struct kvm_lapic *apic) static inline int apic_find_highest_irr(struct kvm_lapic *apic) { - int result; - /* * Note that irr_pending is just a hint. It will be always * true with virtual interrupt delivery enabled. @@ -726,10 +724,7 @@ static inline int apic_find_highest_irr(struct kvm_lapic *apic) if (!apic->irr_pending) return -1; - result = apic_search_irr(apic); - ASSERT(result == -1 || result >= 16); - - return result; + return apic_search_irr(apic); } static inline void apic_clear_irr(int vec, struct kvm_lapic *apic) @@ -782,8 +777,6 @@ static inline void apic_set_isr(int vec, struct kvm_lapic *apic) static inline int apic_find_highest_isr(struct kvm_lapic *apic) { - int result; - /* * Note that isr_count is always 1, and highest_isr_cache * is always -1, with APIC virtualization enabled. @@ -793,10 +786,7 @@ static inline int apic_find_highest_isr(struct kvm_lapic *apic) if (likely(apic->highest_isr_cache != -1)) return apic->highest_isr_cache; - result = apic_find_highest_vector(apic->regs + APIC_ISR); - ASSERT(result == -1 || result >= 16); - - return result; + return apic_find_highest_vector(apic->regs + APIC_ISR); } static inline void apic_clear_isr(int vec, struct kvm_lapic *apic) @@ -821,17 +811,6 @@ static inline void apic_clear_isr(int vec, struct kvm_lapic *apic) } } -void kvm_apic_update_hwapic_isr(struct kvm_vcpu *vcpu) -{ - struct kvm_lapic *apic = vcpu->arch.apic; - - if (WARN_ON_ONCE(!lapic_in_kernel(vcpu)) || !apic->apicv_active) - return; - - kvm_x86_call(hwapic_isr_update)(vcpu, apic_find_highest_isr(apic)); -} -EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_apic_update_hwapic_isr); - int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu) { /* This may race with setting of irr in __apic_accept_irq() and @@ -845,15 +824,15 @@ EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_lapic_find_highest_irr); static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, int vector, int level, int trig_mode, - struct dest_map *dest_map); + struct rtc_status *rtc_status); int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq, - struct dest_map *dest_map) + struct rtc_status *rtc_status) { struct kvm_lapic *apic = vcpu->arch.apic; return __apic_accept_irq(apic, irq->delivery_mode, irq->vector, - irq->level, irq->trig_mode, dest_map); + irq->level, irq->trig_mode, rtc_status); } static int __pv_send_ipi(unsigned long *ipi_bitmap, struct kvm_apic_map *map, @@ -1099,7 +1078,6 @@ bool kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source, struct kvm_lapic *target = vcpu->arch.apic; u32 mda = kvm_apic_mda(vcpu, dest, source, target); - ASSERT(target); switch (shorthand) { case APIC_DEST_NOSHORT: if (dest_mode == APIC_DEST_PHYSICAL) @@ -1237,8 +1215,9 @@ static inline bool kvm_apic_map_get_dest_lapic(struct kvm *kvm, return true; } -bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src, - struct kvm_lapic_irq *irq, int *r, struct dest_map *dest_map) +static bool __kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src, + struct kvm_lapic_irq *irq, int *r, + struct rtc_status *rtc_status) { struct kvm_apic_map *map; unsigned long bitmap; @@ -1253,7 +1232,7 @@ bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src, *r = 0; return true; } - *r = kvm_apic_set_irq(src->vcpu, irq, dest_map); + *r = kvm_apic_set_irq(src->vcpu, irq, rtc_status); return true; } @@ -1266,7 +1245,7 @@ bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src, for_each_set_bit(i, &bitmap, 16) { if (!dst[i]) continue; - *r += kvm_apic_set_irq(dst[i]->vcpu, irq, dest_map); + *r += kvm_apic_set_irq(dst[i]->vcpu, irq, rtc_status); } } @@ -1274,6 +1253,13 @@ bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src, return ret; } + +bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src, + struct kvm_lapic_irq *irq, int *r) +{ + return __kvm_irq_delivery_to_apic_fast(kvm, src, irq, r, NULL); +} + /* * This routine tries to handle interrupts in posted mode, here is how * it deals with different cases: @@ -1345,15 +1331,16 @@ bool kvm_intr_is_single_vcpu(struct kvm *kvm, struct kvm_lapic_irq *irq, } EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_intr_is_single_vcpu); -int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src, - struct kvm_lapic_irq *irq, struct dest_map *dest_map) +int __kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src, + struct kvm_lapic_irq *irq, + struct rtc_status *rtc_status) { int r = -1; struct kvm_vcpu *vcpu, *lowest = NULL; unsigned long i, dest_vcpu_bitmap[BITS_TO_LONGS(KVM_MAX_VCPUS)]; unsigned int dest_vcpus = 0; - if (kvm_irq_delivery_to_apic_fast(kvm, src, irq, &r, dest_map)) + if (__kvm_irq_delivery_to_apic_fast(kvm, src, irq, &r, rtc_status)) return r; if (irq->dest_mode == APIC_DEST_PHYSICAL && @@ -1375,7 +1362,7 @@ int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src, if (!kvm_lowest_prio_delivery(irq)) { if (r < 0) r = 0; - r += kvm_apic_set_irq(vcpu, irq, dest_map); + r += kvm_apic_set_irq(vcpu, irq, rtc_status); } else if (kvm_apic_sw_enabled(vcpu->arch.apic)) { if (!vector_hashing_enabled) { if (!lowest) @@ -1397,7 +1384,7 @@ int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src, } if (lowest) - r = kvm_apic_set_irq(lowest, irq, dest_map); + r = kvm_apic_set_irq(lowest, irq, rtc_status); return r; } @@ -1408,7 +1395,7 @@ int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src, */ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, int vector, int level, int trig_mode, - struct dest_map *dest_map) + struct rtc_status *rtc_status) { int result = 0; struct kvm_vcpu *vcpu = apic->vcpu; @@ -1429,10 +1416,12 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, result = 1; - if (dest_map) { - __set_bit(vcpu->vcpu_id, dest_map->map); - dest_map->vectors[vcpu->vcpu_id] = vector; +#ifdef CONFIG_KVM_IOAPIC + if (rtc_status) { + __set_bit(vcpu->vcpu_id, rtc_status->map); + rtc_status->vectors[vcpu->vcpu_id] = vector; } +#endif if (apic_test_vector(vector, apic->regs + APIC_TMR) != !!trig_mode) { if (trig_mode) @@ -1658,7 +1647,7 @@ void kvm_apic_send_ipi(struct kvm_lapic *apic, u32 icr_low, u32 icr_high) trace_kvm_apic_ipi(icr_low, irq.dest_id); - kvm_irq_delivery_to_apic(apic->vcpu->kvm, apic, &irq, NULL); + kvm_irq_delivery_to_apic(apic->vcpu->kvm, apic, &irq); } EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_apic_send_ipi); @@ -1667,8 +1656,6 @@ static u32 apic_get_tmcct(struct kvm_lapic *apic) ktime_t remaining, now; s64 ns; - ASSERT(apic != NULL); - /* if initial count is 0, current count should also be 0 */ if (kvm_lapic_get_reg(apic, APIC_TMICT) == 0 || apic->lapic_timer.period == 0) @@ -2629,7 +2616,7 @@ static int __kvm_x2apic_icr_write(struct kvm_lapic *apic, u64 data, bool fast) kvm_icr_to_lapic_irq(apic, (u32)data, (u32)(data >> 32), &irq); if (!kvm_irq_delivery_to_apic_fast(apic->vcpu->kvm, apic, &irq, - &ignored, NULL)) + &ignored)) return -EWOULDBLOCK; trace_kvm_apic_ipi((u32)data, irq.dest_id); @@ -2845,10 +2832,18 @@ void kvm_apic_update_apicv(struct kvm_vcpu *vcpu) */ apic->irr_pending = true; - if (apic->apicv_active) + /* + * Update SVI when APICv gets enabled, otherwise SVI won't reflect the + * highest bit in vISR and the next accelerated EOI in the guest won't + * be virtualized correctly (the CPU uses SVI to determine which vISR + * vector to clear). + */ + if (apic->apicv_active) { apic->isr_count = 1; - else + kvm_x86_call(hwapic_isr_update)(vcpu, apic_find_highest_isr(apic)); + } else { apic->isr_count = count_vectors(apic->regs + APIC_ISR); + } apic->highest_isr_cache = -1; } @@ -2976,10 +2971,8 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu, bool init_event) vcpu->arch.pv_eoi.msr_val = 0; apic_update_ppr(apic); - if (apic->apicv_active) { + if (apic->apicv_active) kvm_x86_call(apicv_post_state_restore)(vcpu); - kvm_x86_call(hwapic_isr_update)(vcpu, -1); - } vcpu->arch.apic_arb_prio = 0; vcpu->arch.apic_attention = 0; @@ -3060,8 +3053,6 @@ int kvm_create_lapic(struct kvm_vcpu *vcpu) { struct kvm_lapic *apic; - ASSERT(vcpu != NULL); - if (!irqchip_in_kernel(vcpu->kvm)) { static_branch_inc(&kvm_has_noapic_vcpu); return 0; @@ -3292,10 +3283,8 @@ int kvm_apic_set_state(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s) __start_apic_timer(apic, APIC_TMCCT); kvm_lapic_set_reg(apic, APIC_TMCCT, 0); kvm_apic_update_apicv(vcpu); - if (apic->apicv_active) { + if (apic->apicv_active) kvm_x86_call(apicv_post_state_restore)(vcpu); - kvm_x86_call(hwapic_isr_update)(vcpu, apic_find_highest_isr(apic)); - } kvm_make_request(KVM_REQ_EVENT, vcpu); #ifdef CONFIG_KVM_IOAPIC diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h index e5f5a222eced..274885af4ebc 100644 --- a/arch/x86/kvm/lapic.h +++ b/arch/x86/kvm/lapic.h @@ -88,7 +88,7 @@ struct kvm_lapic { int nr_lvt_entries; }; -struct dest_map; +struct rtc_status; int kvm_create_lapic(struct kvm_vcpu *vcpu); void kvm_free_lapic(struct kvm_vcpu *vcpu); @@ -110,23 +110,30 @@ bool __kvm_apic_update_irr(unsigned long *pir, void *regs, int *max_irr); bool kvm_apic_update_irr(struct kvm_vcpu *vcpu, unsigned long *pir, int *max_irr); void kvm_apic_update_ppr(struct kvm_vcpu *vcpu); int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq, - struct dest_map *dest_map); + struct rtc_status *rtc_status); int kvm_apic_local_deliver(struct kvm_lapic *apic, int lvt_type); void kvm_apic_update_apicv(struct kvm_vcpu *vcpu); int kvm_alloc_apic_access_page(struct kvm *kvm); void kvm_inhibit_apic_access_page(struct kvm_vcpu *vcpu); bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src, - struct kvm_lapic_irq *irq, int *r, struct dest_map *dest_map); -int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src, - struct kvm_lapic_irq *irq, - struct dest_map *dest_map); + struct kvm_lapic_irq *irq, int *r); +int __kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src, + struct kvm_lapic_irq *irq, + struct rtc_status *rtc_status); + +static inline int kvm_irq_delivery_to_apic(struct kvm *kvm, + struct kvm_lapic *src, + struct kvm_lapic_irq *irq) +{ + return __kvm_irq_delivery_to_apic(kvm, src, irq, NULL); +} + void kvm_apic_send_ipi(struct kvm_lapic *apic, u32 icr_low, u32 icr_high); int kvm_apic_set_base(struct kvm_vcpu *vcpu, u64 value, bool host_initiated); int kvm_apic_get_state(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s); int kvm_apic_set_state(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s); -void kvm_apic_update_hwapic_isr(struct kvm_vcpu *vcpu); int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu); u64 kvm_get_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu); diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index a5529a4bc87d..881bb914c164 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -2405,7 +2405,6 @@ static void prepare_vmcs02_early(struct vcpu_vmx *vmx, struct loaded_vmcs *vmcs0 exec_control &= ~CPU_BASED_TPR_SHADOW; exec_control |= vmcs12->cpu_based_vm_exec_control; - vmx->nested.l1_tpr_threshold = -1; if (exec_control & CPU_BASED_TPR_SHADOW) vmcs_write32(TPR_THRESHOLD, vmcs12->tpr_threshold); #ifdef CONFIG_X86_64 @@ -3984,28 +3983,6 @@ static void vmcs12_save_pending_event(struct kvm_vcpu *vcpu, } } - -void nested_mark_vmcs12_pages_dirty(struct kvm_vcpu *vcpu) -{ - struct vmcs12 *vmcs12 = get_vmcs12(vcpu); - gfn_t gfn; - - /* - * Don't need to mark the APIC access page dirty; it is never - * written to by the CPU during APIC virtualization. - */ - - if (nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW)) { - gfn = vmcs12->virtual_apic_page_addr >> PAGE_SHIFT; - kvm_vcpu_mark_page_dirty(vcpu, gfn); - } - - if (nested_cpu_has_posted_intr(vmcs12)) { - gfn = vmcs12->posted_intr_desc_addr >> PAGE_SHIFT; - kvm_vcpu_mark_page_dirty(vcpu, gfn); - } -} - static int vmx_complete_nested_posted_interrupt(struct kvm_vcpu *vcpu) { struct vcpu_vmx *vmx = to_vmx(vcpu); @@ -4040,7 +4017,8 @@ static int vmx_complete_nested_posted_interrupt(struct kvm_vcpu *vcpu) } } - nested_mark_vmcs12_pages_dirty(vcpu); + kvm_vcpu_map_mark_dirty(vcpu, &vmx->nested.virtual_apic_map); + kvm_vcpu_map_mark_dirty(vcpu, &vmx->nested.pi_desc_map); return 0; mmio_needed: @@ -5147,36 +5125,8 @@ void __nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 vm_exit_reason, if (kvm_caps.has_tsc_control) vmcs_write64(TSC_MULTIPLIER, vcpu->arch.tsc_scaling_ratio); - if (vmx->nested.l1_tpr_threshold != -1) - vmcs_write32(TPR_THRESHOLD, vmx->nested.l1_tpr_threshold); - - if (vmx->nested.change_vmcs01_virtual_apic_mode) { - vmx->nested.change_vmcs01_virtual_apic_mode = false; - vmx_set_virtual_apic_mode(vcpu); - } - - if (vmx->nested.update_vmcs01_cpu_dirty_logging) { - vmx->nested.update_vmcs01_cpu_dirty_logging = false; - vmx_update_cpu_dirty_logging(vcpu); - } - nested_put_vmcs12_pages(vcpu); - if (vmx->nested.reload_vmcs01_apic_access_page) { - vmx->nested.reload_vmcs01_apic_access_page = false; - kvm_make_request(KVM_REQ_APIC_PAGE_RELOAD, vcpu); - } - - if (vmx->nested.update_vmcs01_apicv_status) { - vmx->nested.update_vmcs01_apicv_status = false; - vmx_refresh_apicv_exec_ctrl(vcpu); - } - - if (vmx->nested.update_vmcs01_hwapic_isr) { - vmx->nested.update_vmcs01_hwapic_isr = false; - kvm_apic_update_hwapic_isr(vcpu); - } - if ((vm_exit_reason != -1) && (enable_shadow_vmcs || nested_vmx_is_evmptr12_valid(vmx))) vmx->nested.need_vmcs12_to_shadow_sync = true; diff --git a/arch/x86/kvm/vmx/nested.h b/arch/x86/kvm/vmx/nested.h index 983484d42ebf..b844c5d59025 100644 --- a/arch/x86/kvm/vmx/nested.h +++ b/arch/x86/kvm/vmx/nested.h @@ -51,7 +51,6 @@ int vmx_set_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data); int vmx_get_vmx_msr(struct nested_vmx_msrs *msrs, u32 msr_index, u64 *pdata); int get_vmx_mem_address(struct kvm_vcpu *vcpu, unsigned long exit_qualification, u32 vmx_instruction_info, bool wr, int len, gva_t *ret); -void nested_mark_vmcs12_pages_dirty(struct kvm_vcpu *vcpu); bool nested_vmx_check_io_bitmaps(struct kvm_vcpu *vcpu, unsigned int port, int size); diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index edf12bf58578..49f5caa45e13 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -1594,6 +1594,41 @@ void vmx_vcpu_put(struct kvm_vcpu *vcpu) vmx_prepare_switch_to_host(to_vmx(vcpu)); } +static void vmx_switch_loaded_vmcs(struct kvm_vcpu *vcpu, + struct loaded_vmcs *vmcs) +{ + struct vcpu_vmx *vmx = to_vmx(vcpu); + int cpu; + + cpu = get_cpu(); + vmx->loaded_vmcs = vmcs; + vmx_vcpu_load_vmcs(vcpu, cpu); + put_cpu(); +} + +static void vmx_load_vmcs01(struct kvm_vcpu *vcpu) +{ + struct vcpu_vmx *vmx = to_vmx(vcpu); + + if (!is_guest_mode(vcpu)) { + WARN_ON_ONCE(vmx->loaded_vmcs != &vmx->vmcs01); + return; + } + + WARN_ON_ONCE(vmx->loaded_vmcs != &vmx->nested.vmcs02); + vmx_switch_loaded_vmcs(vcpu, &vmx->vmcs01); +} + +static void vmx_put_vmcs01(struct kvm_vcpu *vcpu) +{ + if (!is_guest_mode(vcpu)) + return; + + vmx_switch_loaded_vmcs(vcpu, &to_vmx(vcpu)->nested.vmcs02); +} +DEFINE_GUARD(vmx_vmcs01, struct kvm_vcpu *, + vmx_load_vmcs01(_T), vmx_put_vmcs01(_T)) + bool vmx_emulation_required(struct kvm_vcpu *vcpu) { return emulate_invalid_guest_state && !vmx_guest_state_valid(vcpu); @@ -4558,10 +4593,7 @@ void vmx_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu) { struct vcpu_vmx *vmx = to_vmx(vcpu); - if (is_guest_mode(vcpu)) { - vmx->nested.update_vmcs01_apicv_status = true; - return; - } + guard(vmx_vmcs01)(vcpu); pin_controls_set(vmx, vmx_pin_based_exec_ctrl(vmx)); @@ -6423,6 +6455,15 @@ static void vmx_flush_pml_buffer(struct kvm_vcpu *vcpu) vmcs_write16(GUEST_PML_INDEX, PML_HEAD_INDEX); } +static void nested_vmx_mark_all_vmcs12_pages_dirty(struct kvm_vcpu *vcpu) +{ + struct vcpu_vmx *vmx = to_vmx(vcpu); + + kvm_vcpu_map_mark_dirty(vcpu, &vmx->nested.apic_access_page_map); + kvm_vcpu_map_mark_dirty(vcpu, &vmx->nested.virtual_apic_map); + kvm_vcpu_map_mark_dirty(vcpu, &vmx->nested.pi_desc_map); +} + static void vmx_dump_sel(char *name, uint32_t sel) { pr_err("%s sel=0x%04x, attr=0x%05x, limit=0x%08x, base=0x%016lx\n", @@ -6700,7 +6741,7 @@ static int __vmx_handle_exit(struct kvm_vcpu *vcpu, fastpath_t exit_fastpath) * Mark them dirty on every exit from L2 to prevent them from * getting out of sync with dirty tracking. */ - nested_mark_vmcs12_pages_dirty(vcpu); + nested_vmx_mark_all_vmcs12_pages_dirty(vcpu); /* * Synthesize a triple fault if L2 state is invalid. In normal @@ -6837,11 +6878,10 @@ void vmx_update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr) nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW)) return; + guard(vmx_vmcs01)(vcpu); + tpr_threshold = (irr == -1 || tpr < irr) ? 0 : irr; - if (is_guest_mode(vcpu)) - to_vmx(vcpu)->nested.l1_tpr_threshold = tpr_threshold; - else - vmcs_write32(TPR_THRESHOLD, tpr_threshold); + vmcs_write32(TPR_THRESHOLD, tpr_threshold); } void vmx_set_virtual_apic_mode(struct kvm_vcpu *vcpu) @@ -6856,11 +6896,7 @@ void vmx_set_virtual_apic_mode(struct kvm_vcpu *vcpu) !cpu_has_vmx_virtualize_x2apic_mode()) return; - /* Postpone execution until vmcs01 is the current VMCS. */ - if (is_guest_mode(vcpu)) { - vmx->nested.change_vmcs01_virtual_apic_mode = true; - return; - } + guard(vmx_vmcs01)(vcpu); sec_exec_control = secondary_exec_controls_get(vmx); sec_exec_control &= ~(SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | @@ -6883,8 +6919,17 @@ void vmx_set_virtual_apic_mode(struct kvm_vcpu *vcpu) * only do so if its physical address has changed, but * the guest may have inserted a non-APIC mapping into * the TLB while the APIC access page was disabled. + * + * If L2 is active, immediately flush L1's TLB instead + * of requesting a flush of the current TLB, because + * the current TLB context is L2's. */ - kvm_make_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu); + if (!is_guest_mode(vcpu)) + kvm_make_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu); + else if (!enable_ept) + vpid_sync_context(vmx->vpid); + else if (VALID_PAGE(vcpu->arch.root_mmu.root.hpa)) + vmx_flush_tlb_ept_root(vcpu->arch.root_mmu.root.hpa); } break; case LAPIC_MODE_X2APIC: @@ -6909,11 +6954,8 @@ void vmx_set_apic_access_page_addr(struct kvm_vcpu *vcpu) kvm_pfn_t pfn; bool writable; - /* Defer reload until vmcs01 is the current VMCS. */ - if (is_guest_mode(vcpu)) { - to_vmx(vcpu)->nested.reload_vmcs01_apic_access_page = true; - return; - } + /* Note, the VIRTUALIZE_APIC_ACCESSES check needs to query vmcs01. */ + guard(vmx_vmcs01)(vcpu); if (!(secondary_exec_controls_get(to_vmx(vcpu)) & SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)) @@ -6974,21 +7016,16 @@ void vmx_hwapic_isr_update(struct kvm_vcpu *vcpu, int max_isr) u16 status; u8 old; - /* - * If L2 is active, defer the SVI update until vmcs01 is loaded, as SVI - * is only relevant for if and only if Virtual Interrupt Delivery is - * enabled in vmcs12, and if VID is enabled then L2 EOIs affect L2's - * vAPIC, not L1's vAPIC. KVM must update vmcs01 on the next nested - * VM-Exit, otherwise L1 with run with a stale SVI. - */ - if (is_guest_mode(vcpu)) { - to_vmx(vcpu)->nested.update_vmcs01_hwapic_isr = true; - return; - } - if (max_isr == -1) max_isr = 0; + /* + * Always update SVI in vmcs01, as SVI is only relevant for L2 if and + * only if Virtual Interrupt Delivery is enabled in vmcs12, and if VID + * is enabled then L2 EOIs affect L2's vAPIC, not L1's vAPIC. + */ + guard(vmx_vmcs01)(vcpu); + status = vmcs_read16(GUEST_INTR_STATUS); old = status >> 8; if (max_isr != old) { @@ -8315,10 +8352,7 @@ void vmx_update_cpu_dirty_logging(struct kvm_vcpu *vcpu) if (WARN_ON_ONCE(!enable_pml)) return; - if (is_guest_mode(vcpu)) { - vmx->nested.update_vmcs01_cpu_dirty_logging = true; - return; - } + guard(vmx_vmcs01)(vcpu); /* * Note, nr_memslots_dirty_logging can be changed concurrent with this diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h index bc3ed3145d7e..a926ce43ad40 100644 --- a/arch/x86/kvm/vmx/vmx.h +++ b/arch/x86/kvm/vmx/vmx.h @@ -131,12 +131,6 @@ struct nested_vmx { */ bool vmcs02_initialized; - bool change_vmcs01_virtual_apic_mode; - bool reload_vmcs01_apic_access_page; - bool update_vmcs01_cpu_dirty_logging; - bool update_vmcs01_apicv_status; - bool update_vmcs01_hwapic_isr; - /* * Enlightened VMCS has been enabled. It does not mean that L1 has to * use it. However, VMX features available to L1 will be limited based @@ -185,9 +179,6 @@ struct nested_vmx { u64 pre_vmenter_ssp; u64 pre_vmenter_ssp_tbl; - /* to migrate it to L1 if L2 writes to L1's CR8 directly */ - int l1_tpr_threshold; - u16 vpid02; u16 last_vpid; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 08a6d6e20e9b..06f55aa55172 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -6731,7 +6731,7 @@ int kvm_vm_ioctl_enable_cap(struct kvm *kvm, case KVM_CAP_SPLIT_IRQCHIP: { mutex_lock(&kvm->lock); r = -EINVAL; - if (cap->args[0] > MAX_NR_RESERVED_IOAPIC_PINS) + if (cap->args[0] > KVM_MAX_IRQ_ROUTES) goto split_irqchip_unlock; r = -EEXIST; if (irqchip_in_kernel(kvm)) @@ -10276,7 +10276,7 @@ static void kvm_pv_kick_cpu_op(struct kvm *kvm, int apicid) .dest_id = apicid, }; - kvm_irq_delivery_to_apic(kvm, NULL, &lapic_irq, NULL); + kvm_irq_delivery_to_apic(kvm, NULL, &lapic_irq); } bool kvm_apicv_activated(struct kvm *kvm) @@ -10917,16 +10917,9 @@ void __kvm_vcpu_update_apicv(struct kvm_vcpu *vcpu) * pending. At the same time, KVM_REQ_EVENT may not be set as APICv was * still active when the interrupt got accepted. Make sure * kvm_check_and_inject_events() is called to check for that. - * - * Update SVI when APICv gets enabled, otherwise SVI won't reflect the - * highest bit in vISR and the next accelerated EOI in the guest won't - * be virtualized correctly (the CPU uses SVI to determine which vISR - * vector to clear). */ if (!apic->apicv_active) kvm_make_request(KVM_REQ_EVENT, vcpu); - else - kvm_apic_update_hwapic_isr(vcpu); out: preempt_enable(); diff --git a/arch/x86/kvm/xen.c b/arch/x86/kvm/xen.c index d6b2a665b499..28eeb1b2a16c 100644 --- a/arch/x86/kvm/xen.c +++ b/arch/x86/kvm/xen.c @@ -626,7 +626,7 @@ void kvm_xen_inject_vcpu_vector(struct kvm_vcpu *v) irq.delivery_mode = APIC_DM_FIXED; irq.level = 1; - kvm_irq_delivery_to_apic(v->kvm, NULL, &irq, NULL); + kvm_irq_delivery_to_apic(v->kvm, NULL, &irq); } /* diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 49c0cfe24fd8..021d1fa09e92 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -1381,6 +1381,7 @@ bool kvm_vcpu_is_visible_gfn(struct kvm_vcpu *vcpu, gfn_t gfn); unsigned long kvm_host_page_size(struct kvm_vcpu *vcpu, gfn_t gfn); void mark_page_dirty_in_slot(struct kvm *kvm, const struct kvm_memory_slot *memslot, gfn_t gfn); void mark_page_dirty(struct kvm *kvm, gfn_t gfn); +void kvm_vcpu_mark_page_dirty(struct kvm_vcpu *vcpu, gfn_t gfn); int __kvm_vcpu_map(struct kvm_vcpu *vcpu, gpa_t gpa, struct kvm_host_map *map, bool writable); @@ -1398,6 +1399,13 @@ static inline int kvm_vcpu_map_readonly(struct kvm_vcpu *vcpu, gpa_t gpa, return __kvm_vcpu_map(vcpu, gpa, map, false); } +static inline void kvm_vcpu_map_mark_dirty(struct kvm_vcpu *vcpu, + struct kvm_host_map *map) +{ + if (kvm_vcpu_mapped(map)) + kvm_vcpu_mark_page_dirty(vcpu, map->gfn); +} + unsigned long kvm_vcpu_gfn_to_hva(struct kvm_vcpu *vcpu, gfn_t gfn); unsigned long kvm_vcpu_gfn_to_hva_prot(struct kvm_vcpu *vcpu, gfn_t gfn, bool *writable); int kvm_vcpu_read_guest_page(struct kvm_vcpu *vcpu, gfn_t gfn, void *data, int offset, @@ -1410,7 +1418,6 @@ int kvm_vcpu_write_guest_page(struct kvm_vcpu *vcpu, gfn_t gfn, const void *data int offset, int len); int kvm_vcpu_write_guest(struct kvm_vcpu *vcpu, gpa_t gpa, const void *data, unsigned long len); -void kvm_vcpu_mark_page_dirty(struct kvm_vcpu *vcpu, gfn_t gfn); /** * kvm_gpc_init - initialize gfn_to_pfn_cache. diff --git a/tools/testing/selftests/kvm/Makefile.kvm b/tools/testing/selftests/kvm/Makefile.kvm index c3c464513b4e..0166944fa998 100644 --- a/tools/testing/selftests/kvm/Makefile.kvm +++ b/tools/testing/selftests/kvm/Makefile.kvm @@ -118,6 +118,7 @@ TEST_GEN_PROGS_x86 += x86/ucna_injection_test TEST_GEN_PROGS_x86 += x86/userspace_io_test TEST_GEN_PROGS_x86 += x86/userspace_msr_exit_test TEST_GEN_PROGS_x86 += x86/vmx_apic_access_test +TEST_GEN_PROGS_x86 += x86/vmx_apicv_updates_test TEST_GEN_PROGS_x86 += x86/vmx_exception_with_invalid_guest_state TEST_GEN_PROGS_x86 += x86/vmx_msrs_test TEST_GEN_PROGS_x86 += x86/vmx_invalid_nested_guest_state diff --git a/tools/testing/selftests/kvm/include/x86/apic.h b/tools/testing/selftests/kvm/include/x86/apic.h index e9b9aebaac97..5ca6bacbd70e 100644 --- a/tools/testing/selftests/kvm/include/x86/apic.h +++ b/tools/testing/selftests/kvm/include/x86/apic.h @@ -34,6 +34,7 @@ #define APIC_SPIV 0xF0 #define APIC_SPIV_FOCUS_DISABLED (1 << 9) #define APIC_SPIV_APIC_ENABLED (1 << 8) +#define APIC_ISR 0x100 #define APIC_IRR 0x200 #define APIC_ICR 0x300 #define APIC_LVTCMCI 0x2f0 @@ -71,6 +72,9 @@ #define APIC_TDCR 0x3E0 #define APIC_SELF_IPI 0x3F0 +#define APIC_VECTOR_TO_BIT_NUMBER(v) ((unsigned int)(v) % 32) +#define APIC_VECTOR_TO_REG_OFFSET(v) ((unsigned int)(v) / 32 * 0x10) + void apic_disable(void); void xapic_enable(void); void x2apic_enable(void); diff --git a/tools/testing/selftests/kvm/x86/vmx_apicv_updates_test.c b/tools/testing/selftests/kvm/x86/vmx_apicv_updates_test.c new file mode 100644 index 000000000000..337c53fddeff --- /dev/null +++ b/tools/testing/selftests/kvm/x86/vmx_apicv_updates_test.c @@ -0,0 +1,155 @@ +// SPDX-License-Identifier: GPL-2.0-only +#include "test_util.h" +#include "kvm_util.h" +#include "processor.h" +#include "vmx.h" + +#define GOOD_IPI_VECTOR 0xe0 +#define BAD_IPI_VECTOR 0xf0 + +static volatile int good_ipis_received; + +static void good_ipi_handler(struct ex_regs *regs) +{ + good_ipis_received++; +} + +static void bad_ipi_handler(struct ex_regs *regs) +{ + GUEST_FAIL("Received \"bad\" IPI; ICR MMIO write should have been ignored"); +} + +static void l2_guest_code(void) +{ + x2apic_enable(); + vmcall(); + + xapic_enable(); + xapic_write_reg(APIC_ID, 1 << 24); + vmcall(); +} + +static void l1_guest_code(struct vmx_pages *vmx_pages) +{ +#define L2_GUEST_STACK_SIZE 64 + unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; + uint32_t control; + + GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages)); + GUEST_ASSERT(load_vmcs(vmx_pages)); + + /* Prepare the VMCS for L2 execution. */ + prepare_vmcs(vmx_pages, l2_guest_code, &l2_guest_stack[L2_GUEST_STACK_SIZE]); + control = vmreadz(CPU_BASED_VM_EXEC_CONTROL); + control |= CPU_BASED_USE_MSR_BITMAPS; + vmwrite(CPU_BASED_VM_EXEC_CONTROL, control); + + /* Modify APIC ID to coerce KVM into inhibiting APICv. */ + xapic_enable(); + xapic_write_reg(APIC_ID, 1 << 24); + + /* + * Generate+receive an IRQ without doing EOI to get an IRQ set in vISR + * but not SVI. APICv should be inhibited due to running with a + * modified APIC ID. + */ + xapic_write_reg(APIC_ICR, APIC_DEST_SELF | APIC_DM_FIXED | GOOD_IPI_VECTOR); + GUEST_ASSERT_EQ(xapic_read_reg(APIC_ID), 1 << 24); + + /* Enable IRQs and verify the IRQ was received. */ + sti_nop(); + GUEST_ASSERT_EQ(good_ipis_received, 1); + + /* + * Run L2 to switch to x2APIC mode, which in turn will uninhibit APICv, + * as KVM should force the APIC ID back to its default. + */ + GUEST_ASSERT(!vmlaunch()); + GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL); + vmwrite(GUEST_RIP, vmreadz(GUEST_RIP) + vmreadz(VM_EXIT_INSTRUCTION_LEN)); + GUEST_ASSERT(rdmsr(MSR_IA32_APICBASE) & MSR_IA32_APICBASE_EXTD); + + /* + * Scribble the APIC access page to verify KVM disabled xAPIC + * virtualization in vmcs01, and to verify that KVM flushes L1's TLB + * when L2 switches back to accelerated xAPIC mode. + */ + xapic_write_reg(APIC_ICR2, 0xdeadbeefu); + xapic_write_reg(APIC_ICR, APIC_DEST_SELF | APIC_DM_FIXED | BAD_IPI_VECTOR); + + /* + * Verify the IRQ is still in-service and emit an EOI to verify KVM + * propagates the highest vISR vector to SVI when APICv is activated + * (and does so even if APICv was uninhibited while L2 was active). + */ + GUEST_ASSERT_EQ(x2apic_read_reg(APIC_ISR + APIC_VECTOR_TO_REG_OFFSET(GOOD_IPI_VECTOR)), + BIT(APIC_VECTOR_TO_BIT_NUMBER(GOOD_IPI_VECTOR))); + x2apic_write_reg(APIC_EOI, 0); + GUEST_ASSERT_EQ(x2apic_read_reg(APIC_ISR + APIC_VECTOR_TO_REG_OFFSET(GOOD_IPI_VECTOR)), 0); + + /* + * Run L2 one more time to switch back to xAPIC mode to verify that KVM + * handles the x2APIC => xAPIC transition and inhibits APICv while L2 + * is active. + */ + GUEST_ASSERT(!vmresume()); + GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL); + GUEST_ASSERT(!(rdmsr(MSR_IA32_APICBASE) & MSR_IA32_APICBASE_EXTD)); + + xapic_write_reg(APIC_ICR, APIC_DEST_SELF | APIC_DM_FIXED | GOOD_IPI_VECTOR); + /* Re-enable IRQs, as VM-Exit clears RFLAGS.IF. */ + sti_nop(); + GUEST_ASSERT_EQ(good_ipis_received, 2); + + GUEST_ASSERT_EQ(xapic_read_reg(APIC_ISR + APIC_VECTOR_TO_REG_OFFSET(GOOD_IPI_VECTOR)), + BIT(APIC_VECTOR_TO_BIT_NUMBER(GOOD_IPI_VECTOR))); + xapic_write_reg(APIC_EOI, 0); + GUEST_ASSERT_EQ(xapic_read_reg(APIC_ISR + APIC_VECTOR_TO_REG_OFFSET(GOOD_IPI_VECTOR)), 0); + GUEST_DONE(); +} + +int main(int argc, char *argv[]) +{ + vm_vaddr_t vmx_pages_gva; + struct vmx_pages *vmx; + struct kvm_vcpu *vcpu; + struct kvm_vm *vm; + struct ucall uc; + + TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX)); + + vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code); + + vmx = vcpu_alloc_vmx(vm, &vmx_pages_gva); + prepare_virtualize_apic_accesses(vmx, vm); + vcpu_args_set(vcpu, 1, vmx_pages_gva); + + virt_pg_map(vm, APIC_DEFAULT_GPA, APIC_DEFAULT_GPA); + vm_install_exception_handler(vm, BAD_IPI_VECTOR, bad_ipi_handler); + vm_install_exception_handler(vm, GOOD_IPI_VECTOR, good_ipi_handler); + + vcpu_run(vcpu); + TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO); + + switch (get_ucall(vcpu, &uc)) { + case UCALL_ABORT: + REPORT_GUEST_ASSERT(uc); + /* NOT REACHED */ + case UCALL_DONE: + break; + default: + TEST_FAIL("Unexpected ucall %lu", uc.cmd); + } + + /* + * Verify at least two IRQs were injected. Unfortunately, KVM counts + * re-injected IRQs (e.g. if delivering the IRQ hits an EPT violation), + * so being more precise isn't possible given the current stats. + */ + TEST_ASSERT(vcpu_get_stat(vcpu, irq_injections) >= 2, + "Wanted at least 2 IRQ injections, got %lu\n", + vcpu_get_stat(vcpu, irq_injections)); + + kvm_vm_free(vm); + return 0; +} diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 5b5b69c97665..6b1097e76288 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -3134,7 +3134,7 @@ int __kvm_vcpu_map(struct kvm_vcpu *vcpu, gfn_t gfn, struct kvm_host_map *map, bool writable) { struct kvm_follow_pfn kfp = { - .slot = gfn_to_memslot(vcpu->kvm, gfn), + .slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn), .gfn = gfn, .flags = writable ? FOLL_WRITE : 0, .refcounted_page = &map->pinned_page, |
