diff options
Diffstat (limited to 'arch/x86/kvm/lapic.c')
-rw-r--r-- | arch/x86/kvm/lapic.c | 174 |
1 files changed, 110 insertions, 64 deletions
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 2098dc689088..73418dc0ebb2 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -221,13 +221,6 @@ static inline bool kvm_apic_map_get_logical_dest(struct kvm_apic_map *map, } } -static void kvm_apic_map_free(struct rcu_head *rcu) -{ - struct kvm_apic_map *map = container_of(rcu, struct kvm_apic_map, rcu); - - kvfree(map); -} - static int kvm_recalculate_phys_map(struct kvm_apic_map *new, struct kvm_vcpu *vcpu, bool *xapic_id_mismatch) @@ -382,7 +375,7 @@ enum { DIRTY }; -void kvm_recalculate_apic_map(struct kvm *kvm) +static void kvm_recalculate_apic_map(struct kvm *kvm) { struct kvm_apic_map *new, *old = NULL; struct kvm_vcpu *vcpu; @@ -489,7 +482,7 @@ out: mutex_unlock(&kvm->arch.apic_map_lock); if (old) - call_rcu(&old->rcu, kvm_apic_map_free); + kvfree_rcu(old, rcu); kvm_make_scan_ioapic_request(kvm); } @@ -598,7 +591,7 @@ void kvm_apic_set_version(struct kvm_vcpu *vcpu) * version first and level-triggered interrupts never get EOIed in * IOAPIC. */ - if (guest_cpuid_has(vcpu, X86_FEATURE_X2APIC) && + if (guest_cpu_cap_has(vcpu, X86_FEATURE_X2APIC) && !ioapic_in_kernel(vcpu->kvm)) v |= APIC_LVR_DIRECTED_EOI; kvm_lapic_set_reg(apic, APIC_LVR, v); @@ -662,27 +655,29 @@ static u8 count_vectors(void *bitmap) return count; } -bool __kvm_apic_update_irr(u32 *pir, void *regs, int *max_irr) +bool __kvm_apic_update_irr(unsigned long *pir, void *regs, int *max_irr) { + unsigned long pir_vals[NR_PIR_WORDS]; + u32 *__pir = (void *)pir_vals; u32 i, vec; - u32 pir_val, irr_val, prev_irr_val; + u32 irr_val, prev_irr_val; int max_updated_irr; max_updated_irr = -1; *max_irr = -1; + if (!pi_harvest_pir(pir, pir_vals)) + return false; + for (i = vec = 0; i <= 7; i++, vec += 32) { u32 *p_irr = (u32 *)(regs + APIC_IRR + i * 0x10); - irr_val = *p_irr; - pir_val = READ_ONCE(pir[i]); - - if (pir_val) { - pir_val = xchg(&pir[i], 0); + irr_val = READ_ONCE(*p_irr); + if (__pir[i]) { prev_irr_val = irr_val; do { - irr_val = prev_irr_val | pir_val; + irr_val = prev_irr_val | __pir[i]; } while (prev_irr_val != irr_val && !try_cmpxchg(p_irr, &prev_irr_val, irr_val)); @@ -698,7 +693,7 @@ bool __kvm_apic_update_irr(u32 *pir, void *regs, int *max_irr) } EXPORT_SYMBOL_GPL(__kvm_apic_update_irr); -bool kvm_apic_update_irr(struct kvm_vcpu *vcpu, u32 *pir, int *max_irr) +bool kvm_apic_update_irr(struct kvm_vcpu *vcpu, unsigned long *pir, int *max_irr) { struct kvm_lapic *apic = vcpu->arch.apic; bool irr_updated = __kvm_apic_update_irr(pir, apic->regs, max_irr); @@ -734,10 +729,7 @@ static inline int apic_find_highest_irr(struct kvm_lapic *apic) static inline void apic_clear_irr(int vec, struct kvm_lapic *apic) { if (unlikely(apic->apicv_active)) { - /* need to update RVI */ kvm_lapic_clear_vector(vec, apic->regs + APIC_IRR); - kvm_x86_call(hwapic_irr_update)(apic->vcpu, - apic_find_highest_irr(apic)); } else { apic->irr_pending = false; kvm_lapic_clear_vector(vec, apic->regs + APIC_IRR); @@ -763,7 +755,7 @@ static inline void apic_set_isr(int vec, struct kvm_lapic *apic) * just set SVI. */ if (unlikely(apic->apicv_active)) - kvm_x86_call(hwapic_isr_update)(vec); + kvm_x86_call(hwapic_isr_update)(apic->vcpu, vec); else { ++apic->isr_count; BUG_ON(apic->isr_count > MAX_APIC_VECTOR); @@ -808,7 +800,7 @@ static inline void apic_clear_isr(int vec, struct kvm_lapic *apic) * and must be left alone. */ if (unlikely(apic->apicv_active)) - kvm_x86_call(hwapic_isr_update)(apic_find_highest_isr(apic)); + kvm_x86_call(hwapic_isr_update)(apic->vcpu, apic_find_highest_isr(apic)); else { --apic->isr_count; BUG_ON(apic->isr_count < 0); @@ -816,6 +808,17 @@ static inline void apic_clear_isr(int vec, struct kvm_lapic *apic) } } +void kvm_apic_update_hwapic_isr(struct kvm_vcpu *vcpu) +{ + struct kvm_lapic *apic = vcpu->arch.apic; + + if (WARN_ON_ONCE(!lapic_in_kernel(vcpu)) || !apic->apicv_active) + return; + + kvm_x86_call(hwapic_isr_update)(vcpu, apic_find_highest_isr(apic)); +} +EXPORT_SYMBOL_GPL(kvm_apic_update_hwapic_isr); + int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu) { /* This may race with setting of irr in __apic_accept_irq() and @@ -1458,6 +1461,14 @@ static void kvm_ioapic_send_eoi(struct kvm_lapic *apic, int vector) if (!kvm_ioapic_handles_vector(apic, vector)) return; + /* + * If the intercepted EOI is for an IRQ that was pending from previous + * routing, then re-scan the I/O APIC routes as EOIs for the IRQ likely + * no longer need to be intercepted. + */ + if (apic->vcpu->arch.highest_stale_pending_ioapic_eoi == vector) + kvm_make_request(KVM_REQ_SCAN_IOAPIC, apic->vcpu); + /* Request a KVM exit to inform the userspace IOAPIC. */ if (irqchip_split(apic->vcpu->kvm)) { apic->vcpu->arch.pending_ioapic_eoi = vector; @@ -1789,8 +1800,17 @@ static void apic_update_lvtt(struct kvm_lapic *apic) static bool lapic_timer_int_injected(struct kvm_vcpu *vcpu) { struct kvm_lapic *apic = vcpu->arch.apic; - u32 reg = kvm_lapic_get_reg(apic, APIC_LVTT); + u32 reg; + + /* + * Assume a timer IRQ was "injected" if the APIC is protected. KVM's + * copy of the vIRR is bogus, it's the responsibility of the caller to + * precisely check whether or not a timer IRQ is pending. + */ + if (apic->guest_apic_protected) + return true; + reg = kvm_lapic_get_reg(apic, APIC_LVTT); if (kvm_apic_hw_enabled(apic)) { int vec = reg & APIC_VECTOR_MASK; void *bitmap = apic->regs + APIC_ISR; @@ -2357,7 +2377,7 @@ static int kvm_lapic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val) case APIC_LVTT: if (!kvm_apic_sw_enabled(apic)) val |= APIC_LVT_MASKED; - val &= (apic_lvt_mask[0] | apic->lapic_timer.timer_mode_mask); + val &= (apic_lvt_mask[LVT_TIMER] | apic->lapic_timer.timer_mode_mask); kvm_lapic_set_reg(apic, APIC_LVTT, val); apic_update_lvtt(apic); break; @@ -2577,7 +2597,7 @@ u64 kvm_lapic_get_cr8(struct kvm_vcpu *vcpu) return (tpr & 0xf0) >> 4; } -void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value) +static void __kvm_apic_set_base(struct kvm_vcpu *vcpu, u64 value) { u64 old_value = vcpu->arch.apic_base; struct kvm_lapic *apic = vcpu->arch.apic; @@ -2585,7 +2605,7 @@ void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value) vcpu->arch.apic_base = value; if ((old_value ^ value) & MSR_IA32_APICBASE_ENABLE) - kvm_update_cpuid_runtime(vcpu); + vcpu->arch.cpuid_dynamic_bits_dirty = true; if (!apic) return; @@ -2625,29 +2645,61 @@ void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value) } } +int kvm_apic_set_base(struct kvm_vcpu *vcpu, u64 value, bool host_initiated) +{ + enum lapic_mode old_mode = kvm_get_apic_mode(vcpu); + enum lapic_mode new_mode = kvm_apic_mode(value); + + if (vcpu->arch.apic_base == value) + return 0; + + u64 reserved_bits = kvm_vcpu_reserved_gpa_bits_raw(vcpu) | 0x2ff | + (guest_cpu_cap_has(vcpu, X86_FEATURE_X2APIC) ? 0 : X2APIC_ENABLE); + + if ((value & reserved_bits) != 0 || new_mode == LAPIC_MODE_INVALID) + return 1; + if (!host_initiated) { + if (old_mode == LAPIC_MODE_X2APIC && new_mode == LAPIC_MODE_XAPIC) + return 1; + if (old_mode == LAPIC_MODE_DISABLED && new_mode == LAPIC_MODE_X2APIC) + return 1; + } + + __kvm_apic_set_base(vcpu, value); + kvm_recalculate_apic_map(vcpu->kvm); + return 0; +} +EXPORT_SYMBOL_GPL(kvm_apic_set_base); + void kvm_apic_update_apicv(struct kvm_vcpu *vcpu) { struct kvm_lapic *apic = vcpu->arch.apic; - if (apic->apicv_active) { - /* irr_pending is always true when apicv is activated. */ - apic->irr_pending = true; + /* + * When APICv is enabled, KVM must always search the IRR for a pending + * IRQ, as other vCPUs and devices can set IRR bits even if the vCPU + * isn't running. If APICv is disabled, KVM _should_ search the IRR + * for a pending IRQ. But KVM currently doesn't ensure *all* hardware, + * e.g. CPUs and IOMMUs, has seen the change in state, i.e. searching + * the IRR at this time could race with IRQ delivery from hardware that + * still sees APICv as being enabled. + * + * FIXME: Ensure other vCPUs and devices observe the change in APICv + * state prior to updating KVM's metadata caches, so that KVM + * can safely search the IRR and set irr_pending accordingly. + */ + apic->irr_pending = true; + + if (apic->apicv_active) apic->isr_count = 1; - } else { - /* - * Don't clear irr_pending, searching the IRR can race with - * updates from the CPU as APICv is still active from hardware's - * perspective. The flag will be cleared as appropriate when - * KVM injects the interrupt. - */ + else apic->isr_count = count_vectors(apic->regs + APIC_ISR); - } + apic->highest_isr_cache = -1; } int kvm_alloc_apic_access_page(struct kvm *kvm) { - struct page *page; void __user *hva; int ret = 0; @@ -2663,17 +2715,6 @@ int kvm_alloc_apic_access_page(struct kvm *kvm) goto out; } - page = gfn_to_page(kvm, APIC_DEFAULT_PHYS_BASE >> PAGE_SHIFT); - if (is_error_page(page)) { - ret = -EFAULT; - goto out; - } - - /* - * Do not pin the page in memory, so that memory hot-unplug - * is able to migrate it. - */ - put_page(page); kvm->arch.apic_access_memslot_enabled = true; out: mutex_unlock(&kvm->slots_lock); @@ -2728,7 +2769,14 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu, bool init_event) msr_val = APIC_DEFAULT_PHYS_BASE | MSR_IA32_APICBASE_ENABLE; if (kvm_vcpu_is_reset_bsp(vcpu)) msr_val |= MSR_IA32_APICBASE_BSP; - kvm_lapic_set_base(vcpu, msr_val); + + /* + * Use the inner helper to avoid an extra recalcuation of the + * optimized APIC map if some other task has dirtied the map. + * The recalculation needed for this vCPU will be done after + * all APIC state has been initialized (see below). + */ + __kvm_apic_set_base(vcpu, msr_val); } if (!apic) @@ -2778,8 +2826,7 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu, bool init_event) apic_update_ppr(apic); if (apic->apicv_active) { kvm_x86_call(apicv_post_state_restore)(vcpu); - kvm_x86_call(hwapic_irr_update)(vcpu, -1); - kvm_x86_call(hwapic_isr_update)(-1); + kvm_x86_call(hwapic_isr_update)(vcpu, -1); } vcpu->arch.apic_arb_prio = 0; @@ -2887,9 +2934,8 @@ int kvm_create_lapic(struct kvm_vcpu *vcpu) apic->nr_lvt_entries = kvm_apic_calc_nr_lvt_entries(vcpu); - hrtimer_init(&apic->lapic_timer.timer, CLOCK_MONOTONIC, - HRTIMER_MODE_ABS_HARD); - apic->lapic_timer.timer.function = apic_timer_fn; + hrtimer_setup(&apic->lapic_timer.timer, apic_timer_fn, CLOCK_MONOTONIC, + HRTIMER_MODE_ABS_HARD); if (lapic_timer_advance) apic->lapic_timer.timer_advance_ns = LAPIC_TIMER_ADVANCE_NS_INIT; @@ -2932,6 +2978,9 @@ int kvm_apic_has_interrupt(struct kvm_vcpu *vcpu) if (!kvm_apic_present(vcpu)) return -1; + if (apic->guest_apic_protected) + return -1; + __apic_update_ppr(apic, &ppr); return apic_has_interrupt_for_ppr(apic, ppr); } @@ -3069,7 +3118,6 @@ int kvm_apic_set_state(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s) kvm_x86_call(apicv_pre_state_restore)(vcpu); - kvm_lapic_set_base(vcpu, vcpu->arch.apic_base); /* set SPIV separately to get count of SW disabled APICs right */ apic_set_spiv(apic, *((u32 *)(s->regs + APIC_SPIV))); @@ -3095,9 +3143,7 @@ int kvm_apic_set_state(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s) kvm_apic_update_apicv(vcpu); if (apic->apicv_active) { kvm_x86_call(apicv_post_state_restore)(vcpu); - kvm_x86_call(hwapic_irr_update)(vcpu, - apic_find_highest_irr(apic)); - kvm_x86_call(hwapic_isr_update)(apic_find_highest_isr(apic)); + kvm_x86_call(hwapic_isr_update)(vcpu, apic_find_highest_isr(apic)); } kvm_make_request(KVM_REQ_EVENT, vcpu); if (ioapic_in_kernel(vcpu->kvm)) @@ -3366,9 +3412,9 @@ int kvm_apic_accept_events(struct kvm_vcpu *vcpu) if (test_and_clear_bit(KVM_APIC_INIT, &apic->pending_events)) { kvm_vcpu_reset(vcpu, true); if (kvm_vcpu_is_bsp(apic->vcpu)) - vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; + kvm_set_mp_state(vcpu, KVM_MP_STATE_RUNNABLE); else - vcpu->arch.mp_state = KVM_MP_STATE_INIT_RECEIVED; + kvm_set_mp_state(vcpu, KVM_MP_STATE_INIT_RECEIVED); } if (test_and_clear_bit(KVM_APIC_SIPI, &apic->pending_events)) { if (vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED) { @@ -3377,7 +3423,7 @@ int kvm_apic_accept_events(struct kvm_vcpu *vcpu) sipi_vector = apic->sipi_vector; kvm_x86_call(vcpu_deliver_sipi_vector)(vcpu, sipi_vector); - vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; + kvm_set_mp_state(vcpu, KVM_MP_STATE_RUNNABLE); } } return 0; |