diff options
Diffstat (limited to 'arch/x86/kvm/lapic.c')
-rw-r--r-- | arch/x86/kvm/lapic.c | 183 |
1 files changed, 120 insertions, 63 deletions
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 0e68b4c937fc..9dda989a1cf0 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -27,6 +27,7 @@ #include <linux/math64.h> #include <linux/slab.h> #include <asm/processor.h> +#include <asm/mce.h> #include <asm/msr.h> #include <asm/page.h> #include <asm/current.h> @@ -54,7 +55,7 @@ #define PRIo64 "o" /* 14 is the version for Xeon and Pentium 8.4.8*/ -#define APIC_VERSION (0x14UL | ((KVM_APIC_LVT_NUM - 1) << 16)) +#define APIC_VERSION 0x14UL #define LAPIC_MMIO_LENGTH (1 << 12) /* followed define is not in apicdef.h */ #define MAX_APIC_VECTOR 256 @@ -67,6 +68,8 @@ static bool lapic_timer_advance_dynamic __read_mostly; #define LAPIC_TIMER_ADVANCE_NS_MAX 5000 /* step-by-step approximation to mitigate fluctuation */ #define LAPIC_TIMER_ADVANCE_ADJUST_STEP 8 +static int kvm_lapic_msr_read(struct kvm_lapic *apic, u32 reg, u64 *data); +static int kvm_lapic_msr_write(struct kvm_lapic *apic, u32 reg, u64 data); static inline void __kvm_lapic_set_reg(char *regs, int reg_off, u32 val) { @@ -398,14 +401,26 @@ static inline int apic_lvt_nmi_mode(u32 lvt_val) return (lvt_val & (APIC_MODE_MASK | APIC_LVT_MASKED)) == APIC_DM_NMI; } +static inline bool kvm_lapic_lvt_supported(struct kvm_lapic *apic, int lvt_index) +{ + return apic->nr_lvt_entries > lvt_index; +} + +static inline int kvm_apic_calc_nr_lvt_entries(struct kvm_vcpu *vcpu) +{ + return KVM_APIC_MAX_NR_LVT_ENTRIES - !(vcpu->arch.mcg_cap & MCG_CMCI_P); +} + void kvm_apic_set_version(struct kvm_vcpu *vcpu) { struct kvm_lapic *apic = vcpu->arch.apic; - u32 v = APIC_VERSION; + u32 v = 0; if (!lapic_in_kernel(vcpu)) return; + v = APIC_VERSION | ((apic->nr_lvt_entries - 1) << 16); + /* * KVM emulates 82093AA datasheet (with in-kernel IOAPIC implementation) * which doesn't have EOI register; Some buggy OSes (e.g. Windows with @@ -419,12 +434,33 @@ void kvm_apic_set_version(struct kvm_vcpu *vcpu) kvm_lapic_set_reg(apic, APIC_LVR, v); } -static const unsigned int apic_lvt_mask[KVM_APIC_LVT_NUM] = { - LVT_MASK , /* part LVTT mask, timer mode mask added at runtime */ - LVT_MASK | APIC_MODE_MASK, /* LVTTHMR */ - LVT_MASK | APIC_MODE_MASK, /* LVTPC */ - LINT_MASK, LINT_MASK, /* LVT0-1 */ - LVT_MASK /* LVTERR */ +void kvm_apic_after_set_mcg_cap(struct kvm_vcpu *vcpu) +{ + int nr_lvt_entries = kvm_apic_calc_nr_lvt_entries(vcpu); + struct kvm_lapic *apic = vcpu->arch.apic; + int i; + + if (!lapic_in_kernel(vcpu) || nr_lvt_entries == apic->nr_lvt_entries) + return; + + /* Initialize/mask any "new" LVT entries. */ + for (i = apic->nr_lvt_entries; i < nr_lvt_entries; i++) + kvm_lapic_set_reg(apic, APIC_LVTx(i), APIC_LVT_MASKED); + + apic->nr_lvt_entries = nr_lvt_entries; + + /* The number of LVT entries is reflected in the version register. */ + kvm_apic_set_version(vcpu); +} + +static const unsigned int apic_lvt_mask[KVM_APIC_MAX_NR_LVT_ENTRIES] = { + [LVT_TIMER] = LVT_MASK, /* timer mode mask added at runtime */ + [LVT_THERMAL_MONITOR] = LVT_MASK | APIC_MODE_MASK, + [LVT_PERFORMANCE_COUNTER] = LVT_MASK | APIC_MODE_MASK, + [LVT_LINT0] = LINT_MASK, + [LVT_LINT1] = LINT_MASK, + [LVT_ERROR] = LVT_MASK, + [LVT_CMCI] = LVT_MASK | APIC_MODE_MASK }; static int find_highest_vector(void *bitmap) @@ -518,14 +554,11 @@ static inline int apic_find_highest_irr(struct kvm_lapic *apic) static inline void apic_clear_irr(int vec, struct kvm_lapic *apic) { - struct kvm_vcpu *vcpu; - - vcpu = apic->vcpu; - - if (unlikely(vcpu->arch.apicv_active)) { + if (unlikely(apic->apicv_active)) { /* need to update RVI */ kvm_lapic_clear_vector(vec, apic->regs + APIC_IRR); - static_call_cond(kvm_x86_hwapic_irr_update)(vcpu, apic_find_highest_irr(apic)); + static_call_cond(kvm_x86_hwapic_irr_update)(apic->vcpu, + apic_find_highest_irr(apic)); } else { apic->irr_pending = false; kvm_lapic_clear_vector(vec, apic->regs + APIC_IRR); @@ -542,20 +575,16 @@ EXPORT_SYMBOL_GPL(kvm_apic_clear_irr); static inline void apic_set_isr(int vec, struct kvm_lapic *apic) { - struct kvm_vcpu *vcpu; - if (__apic_test_and_set_vector(vec, apic->regs + APIC_ISR)) return; - vcpu = apic->vcpu; - /* * With APIC virtualization enabled, all caching is disabled * because the processor can modify ISR under the hood. Instead * just set SVI. */ - if (unlikely(vcpu->arch.apicv_active)) - static_call_cond(kvm_x86_hwapic_isr_update)(vcpu, vec); + if (unlikely(apic->apicv_active)) + static_call_cond(kvm_x86_hwapic_isr_update)(vec); else { ++apic->isr_count; BUG_ON(apic->isr_count > MAX_APIC_VECTOR); @@ -589,12 +618,9 @@ static inline int apic_find_highest_isr(struct kvm_lapic *apic) static inline void apic_clear_isr(int vec, struct kvm_lapic *apic) { - struct kvm_vcpu *vcpu; if (!__apic_test_and_clear_vector(vec, apic->regs + APIC_ISR)) return; - vcpu = apic->vcpu; - /* * We do get here for APIC virtualization enabled if the guest * uses the Hyper-V APIC enlightenment. In this case we may need @@ -602,8 +628,8 @@ static inline void apic_clear_isr(int vec, struct kvm_lapic *apic) * on the other hand isr_count and highest_isr_cache are unused * and must be left alone. */ - if (unlikely(vcpu->arch.apicv_active)) - static_call_cond(kvm_x86_hwapic_isr_update)(vcpu, apic_find_highest_isr(apic)); + if (unlikely(apic->apicv_active)) + static_call_cond(kvm_x86_hwapic_isr_update)(apic_find_highest_isr(apic)); else { --apic->isr_count; BUG_ON(apic->isr_count < 0); @@ -801,17 +827,17 @@ static bool kvm_apic_match_physical_addr(struct kvm_lapic *apic, u32 mda) if (kvm_apic_broadcast(apic, mda)) return true; - if (apic_x2apic_mode(apic)) - return mda == kvm_x2apic_id(apic); - /* - * Hotplug hack: Make LAPIC in xAPIC mode also accept interrupts as if - * it were in x2APIC mode. Hotplugged VCPUs start in xAPIC mode and - * this allows unique addressing of VCPUs with APIC ID over 0xff. - * The 0xff condition is needed because writeable xAPIC ID. + * Hotplug hack: Accept interrupts for vCPUs in xAPIC mode as if they + * were in x2APIC mode if the target APIC ID can't be encoded as an + * xAPIC ID. This allows unique addressing of hotplugged vCPUs (which + * start in xAPIC mode) with an APIC ID that is unaddressable in xAPIC + * mode. Match the x2APIC ID if and only if the target APIC ID can't + * be encoded in xAPIC to avoid spurious matches against a vCPU that + * changed its (addressable) xAPIC ID (which is writable). */ - if (kvm_x2apic_id(apic) > 0xff && mda == kvm_x2apic_id(apic)) - return true; + if (apic_x2apic_mode(apic) || mda > 0xff) + return mda == kvm_x2apic_id(apic); return mda == kvm_xapic_id(apic); } @@ -1325,7 +1351,7 @@ void kvm_apic_send_ipi(struct kvm_lapic *apic, u32 icr_low, u32 icr_high) if (apic_x2apic_mode(apic)) irq.dest_id = icr_high; else - irq.dest_id = GET_APIC_DEST_FIELD(icr_high); + irq.dest_id = GET_XAPIC_DEST_FIELD(icr_high); trace_kvm_apic_ipi(icr_low, irq.dest_id); @@ -1444,6 +1470,9 @@ static int kvm_lapic_reg_read(struct kvm_lapic *apic, u32 offset, int len, APIC_REG_MASK(APIC_TMCCT) | APIC_REG_MASK(APIC_TDCR); + if (kvm_lapic_lvt_supported(apic, LVT_CMCI)) + valid_reg_mask |= APIC_REG_MASK(APIC_LVTCMCI); + /* * ARBPRI and ICR2 are not valid in x2APIC mode. WARN if KVM reads ICR * in x2APIC mode as it's an 8-byte register in x2APIC and needs to be @@ -1583,7 +1612,7 @@ static bool lapic_timer_int_injected(struct kvm_vcpu *vcpu) int vec = reg & APIC_VECTOR_MASK; void *bitmap = apic->regs + APIC_ISR; - if (vcpu->arch.apicv_active) + if (apic->apicv_active) bitmap = apic->regs + APIC_IRR; if (apic_test_vector(vec, bitmap)) @@ -1602,7 +1631,7 @@ static inline void __wait_lapic_expire(struct kvm_vcpu *vcpu, u64 guest_cycles) * that __delay() uses delay_tsc whenever the hardware has TSC, thus * always for VMX enabled hardware. */ - if (vcpu->arch.tsc_scaling_ratio == kvm_default_tsc_scaling_ratio) { + if (vcpu->arch.tsc_scaling_ratio == kvm_caps.default_tsc_scaling_ratio) { __delay(min(guest_cycles, nsec_to_cycles(vcpu, timer_advance_ns))); } else { @@ -1700,7 +1729,7 @@ static void apic_timer_expired(struct kvm_lapic *apic, bool from_timer_fn) if (apic_lvtt_tscdeadline(apic) || ktimer->hv_timer_in_use) ktimer->expired_tscdeadline = ktimer->tscdeadline; - if (!from_timer_fn && vcpu->arch.apicv_active) { + if (!from_timer_fn && apic->apicv_active) { WARN_ON(kvm_get_running_vcpu() != vcpu); kvm_apic_inject_pending_timer_irqs(apic); return; @@ -2052,6 +2081,16 @@ static void kvm_lapic_xapic_id_updated(struct kvm_lapic *apic) kvm_set_apicv_inhibit(apic->vcpu->kvm, APICV_INHIBIT_REASON_APIC_ID_MODIFIED); } +static int get_lvt_index(u32 reg) +{ + if (reg == APIC_LVTCMCI) + return LVT_CMCI; + if (reg < APIC_LVTT || reg > APIC_LVTERR) + return -1; + return array_index_nospec( + (reg - APIC_LVTT) >> 4, KVM_APIC_MAX_NR_LVT_ENTRIES); +} + static int kvm_lapic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val) { int ret = 0; @@ -2098,13 +2137,10 @@ static int kvm_lapic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val) apic_set_spiv(apic, val & mask); if (!(val & APIC_SPIV_APIC_ENABLED)) { int i; - u32 lvt_val; - for (i = 0; i < KVM_APIC_LVT_NUM; i++) { - lvt_val = kvm_lapic_get_reg(apic, - APIC_LVTT + 0x10 * i); - kvm_lapic_set_reg(apic, APIC_LVTT + 0x10 * i, - lvt_val | APIC_LVT_MASKED); + for (i = 0; i < apic->nr_lvt_entries; i++) { + kvm_lapic_set_reg(apic, APIC_LVTx(i), + kvm_lapic_get_reg(apic, APIC_LVTx(i)) | APIC_LVT_MASKED); } apic_update_lvtt(apic); atomic_set(&apic->lapic_timer.pending, 0); @@ -2133,16 +2169,15 @@ static int kvm_lapic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val) case APIC_LVTTHMR: case APIC_LVTPC: case APIC_LVT1: - case APIC_LVTERR: { - /* TODO: Check vector */ - size_t size; - u32 index; - + case APIC_LVTERR: + case APIC_LVTCMCI: { + u32 index = get_lvt_index(reg); + if (!kvm_lapic_lvt_supported(apic, index)) { + ret = 1; + break; + } if (!kvm_apic_sw_enabled(apic)) val |= APIC_LVT_MASKED; - size = ARRAY_SIZE(apic_lvt_mask); - index = array_index_nospec( - (reg - APIC_LVTT) >> 4, size); val &= apic_lvt_mask[index]; kvm_lapic_set_reg(apic, reg, val); break; @@ -2246,10 +2281,28 @@ EXPORT_SYMBOL_GPL(kvm_lapic_set_eoi); /* emulate APIC access in a trap manner */ void kvm_apic_write_nodecode(struct kvm_vcpu *vcpu, u32 offset) { - u32 val = kvm_lapic_get_reg(vcpu->arch.apic, offset); + struct kvm_lapic *apic = vcpu->arch.apic; + u64 val; + + if (apic_x2apic_mode(apic)) { + if (KVM_BUG_ON(kvm_lapic_msr_read(apic, offset, &val), vcpu->kvm)) + return; + } else { + val = kvm_lapic_get_reg(apic, offset); + } - /* TODO: optimize to just emulate side effect w/o one more write */ - kvm_lapic_reg_write(vcpu->arch.apic, offset, val); + /* + * ICR is a single 64-bit register when x2APIC is enabled. For legacy + * xAPIC, ICR writes need to go down the common (slightly slower) path + * to get the upper half from ICR2. + */ + if (apic_x2apic_mode(apic) && offset == APIC_ICR) { + kvm_apic_send_ipi(apic, (u32)val, (u32)(val >> 32)); + trace_kvm_apic_write(APIC_ICR, val); + } else { + /* TODO: optimize to just emulate side effect w/o one more write */ + kvm_lapic_reg_write(apic, offset, (u32)val); + } } EXPORT_SYMBOL_GPL(kvm_apic_write_nodecode); @@ -2344,8 +2397,10 @@ void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value) if (((old_value ^ value) & X2APIC_ENABLE) && (value & X2APIC_ENABLE)) kvm_apic_set_x2apic_id(apic, vcpu->vcpu_id); - if ((old_value ^ value) & (MSR_IA32_APICBASE_ENABLE | X2APIC_ENABLE)) + if ((old_value ^ value) & (MSR_IA32_APICBASE_ENABLE | X2APIC_ENABLE)) { + kvm_vcpu_update_apicv(vcpu); static_call_cond(kvm_x86_set_virtual_apic_mode)(vcpu); + } apic->base_address = apic->vcpu->arch.apic_base & MSR_IA32_APICBASE_BASE; @@ -2361,7 +2416,7 @@ void kvm_apic_update_apicv(struct kvm_vcpu *vcpu) { struct kvm_lapic *apic = vcpu->arch.apic; - if (vcpu->arch.apicv_active) { + if (apic->apicv_active) { /* irr_pending is always true when apicv is activated. */ apic->irr_pending = true; apic->isr_count = 1; @@ -2401,8 +2456,8 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu, bool init_event) kvm_apic_set_xapic_id(apic, vcpu->vcpu_id); kvm_apic_set_version(apic->vcpu); - for (i = 0; i < KVM_APIC_LVT_NUM; i++) - kvm_lapic_set_reg(apic, APIC_LVTT + 0x10 * i, APIC_LVT_MASKED); + for (i = 0; i < apic->nr_lvt_entries; i++) + kvm_lapic_set_reg(apic, APIC_LVTx(i), APIC_LVT_MASKED); apic_update_lvtt(apic); if (kvm_vcpu_is_reset_bsp(vcpu) && kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_LINT0_REENABLED)) @@ -2436,10 +2491,10 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu, bool init_event) vcpu->arch.pv_eoi.msr_val = 0; apic_update_ppr(apic); - if (vcpu->arch.apicv_active) { + if (apic->apicv_active) { static_call_cond(kvm_x86_apicv_post_state_restore)(vcpu); static_call_cond(kvm_x86_hwapic_irr_update)(vcpu, -1); - static_call_cond(kvm_x86_hwapic_isr_update)(vcpu, -1); + static_call_cond(kvm_x86_hwapic_isr_update)(-1); } vcpu->arch.apic_arb_prio = 0; @@ -2532,6 +2587,8 @@ int kvm_create_lapic(struct kvm_vcpu *vcpu, int timer_advance_ns) } apic->vcpu = vcpu; + apic->nr_lvt_entries = kvm_apic_calc_nr_lvt_entries(vcpu); + hrtimer_init(&apic->lapic_timer.timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_HARD); apic->lapic_timer.timer.function = apic_timer_fn; @@ -2716,10 +2773,10 @@ int kvm_apic_set_state(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s) kvm_lapic_set_reg(apic, APIC_TMCCT, 0); kvm_apic_update_apicv(vcpu); apic->highest_isr_cache = -1; - if (vcpu->arch.apicv_active) { + if (apic->apicv_active) { static_call_cond(kvm_x86_apicv_post_state_restore)(vcpu); static_call_cond(kvm_x86_hwapic_irr_update)(vcpu, apic_find_highest_irr(apic)); - static_call_cond(kvm_x86_hwapic_isr_update)(vcpu, apic_find_highest_isr(apic)); + static_call_cond(kvm_x86_hwapic_isr_update)(apic_find_highest_isr(apic)); } kvm_make_request(KVM_REQ_EVENT, vcpu); if (ioapic_in_kernel(vcpu->kvm)) |