diff options
Diffstat (limited to 'arch/powerpc/kvm/book3s_hv.c')
-rw-r--r-- | arch/powerpc/kvm/book3s_hv.c | 860 |
1 files changed, 451 insertions, 409 deletions
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 7b74fc0a986b..f64e45d6c0f4 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -80,6 +80,7 @@ #include <asm/plpar_wrappers.h> #include "book3s.h" +#include "book3s_hv.h" #define CREATE_TRACE_POINTS #include "trace_hv.h" @@ -127,11 +128,6 @@ static bool nested = true; module_param(nested, bool, S_IRUGO | S_IWUSR); MODULE_PARM_DESC(nested, "Enable nested virtualization (only on POWER9)"); -static inline bool nesting_enabled(struct kvm *kvm) -{ - return kvm->arch.nested_enable && kvm_is_radix(kvm); -} - static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu); /* @@ -276,22 +272,26 @@ static void kvmppc_fast_vcpu_kick_hv(struct kvm_vcpu *vcpu) * they should never fail.) */ -static void kvmppc_core_start_stolen(struct kvmppc_vcore *vc) +static void kvmppc_core_start_stolen(struct kvmppc_vcore *vc, u64 tb) { unsigned long flags; + WARN_ON_ONCE(cpu_has_feature(CPU_FTR_ARCH_300)); + spin_lock_irqsave(&vc->stoltb_lock, flags); - vc->preempt_tb = mftb(); + vc->preempt_tb = tb; spin_unlock_irqrestore(&vc->stoltb_lock, flags); } -static void kvmppc_core_end_stolen(struct kvmppc_vcore *vc) +static void kvmppc_core_end_stolen(struct kvmppc_vcore *vc, u64 tb) { unsigned long flags; + WARN_ON_ONCE(cpu_has_feature(CPU_FTR_ARCH_300)); + spin_lock_irqsave(&vc->stoltb_lock, flags); if (vc->preempt_tb != TB_NIL) { - vc->stolen_tb += mftb() - vc->preempt_tb; + vc->stolen_tb += tb - vc->preempt_tb; vc->preempt_tb = TB_NIL; } spin_unlock_irqrestore(&vc->stoltb_lock, flags); @@ -301,6 +301,12 @@ static void kvmppc_core_vcpu_load_hv(struct kvm_vcpu *vcpu, int cpu) { struct kvmppc_vcore *vc = vcpu->arch.vcore; unsigned long flags; + u64 now; + + if (cpu_has_feature(CPU_FTR_ARCH_300)) + return; + + now = mftb(); /* * We can test vc->runner without taking the vcore lock, @@ -309,12 +315,12 @@ static void kvmppc_core_vcpu_load_hv(struct kvm_vcpu *vcpu, int cpu) * ever sets it to NULL. */ if (vc->runner == vcpu && vc->vcore_state >= VCORE_SLEEPING) - kvmppc_core_end_stolen(vc); + kvmppc_core_end_stolen(vc, now); spin_lock_irqsave(&vcpu->arch.tbacct_lock, flags); if (vcpu->arch.state == KVMPPC_VCPU_BUSY_IN_HOST && vcpu->arch.busy_preempt != TB_NIL) { - vcpu->arch.busy_stolen += mftb() - vcpu->arch.busy_preempt; + vcpu->arch.busy_stolen += now - vcpu->arch.busy_preempt; vcpu->arch.busy_preempt = TB_NIL; } spin_unlock_irqrestore(&vcpu->arch.tbacct_lock, flags); @@ -324,13 +330,19 @@ static void kvmppc_core_vcpu_put_hv(struct kvm_vcpu *vcpu) { struct kvmppc_vcore *vc = vcpu->arch.vcore; unsigned long flags; + u64 now; + + if (cpu_has_feature(CPU_FTR_ARCH_300)) + return; + + now = mftb(); if (vc->runner == vcpu && vc->vcore_state >= VCORE_SLEEPING) - kvmppc_core_start_stolen(vc); + kvmppc_core_start_stolen(vc, now); spin_lock_irqsave(&vcpu->arch.tbacct_lock, flags); if (vcpu->arch.state == KVMPPC_VCPU_BUSY_IN_HOST) - vcpu->arch.busy_preempt = mftb(); + vcpu->arch.busy_preempt = now; spin_unlock_irqrestore(&vcpu->arch.tbacct_lock, flags); } @@ -675,6 +687,8 @@ static u64 vcore_stolen_time(struct kvmppc_vcore *vc, u64 now) u64 p; unsigned long flags; + WARN_ON_ONCE(cpu_has_feature(CPU_FTR_ARCH_300)); + spin_lock_irqsave(&vc->stoltb_lock, flags); p = vc->stolen_tb; if (vc->vcore_state != VCORE_INACTIVE && @@ -684,35 +698,30 @@ static u64 vcore_stolen_time(struct kvmppc_vcore *vc, u64 now) return p; } -static void kvmppc_create_dtl_entry(struct kvm_vcpu *vcpu, - struct kvmppc_vcore *vc) +static void __kvmppc_create_dtl_entry(struct kvm_vcpu *vcpu, + unsigned int pcpu, u64 now, + unsigned long stolen) { struct dtl_entry *dt; struct lppaca *vpa; - unsigned long stolen; - unsigned long core_stolen; - u64 now; - unsigned long flags; dt = vcpu->arch.dtl_ptr; vpa = vcpu->arch.vpa.pinned_addr; - now = mftb(); - core_stolen = vcore_stolen_time(vc, now); - stolen = core_stolen - vcpu->arch.stolen_logged; - vcpu->arch.stolen_logged = core_stolen; - spin_lock_irqsave(&vcpu->arch.tbacct_lock, flags); - stolen += vcpu->arch.busy_stolen; - vcpu->arch.busy_stolen = 0; - spin_unlock_irqrestore(&vcpu->arch.tbacct_lock, flags); + if (!dt || !vpa) return; - memset(dt, 0, sizeof(struct dtl_entry)); + dt->dispatch_reason = 7; - dt->processor_id = cpu_to_be16(vc->pcpu + vcpu->arch.ptid); - dt->timebase = cpu_to_be64(now + vc->tb_offset); + dt->preempt_reason = 0; + dt->processor_id = cpu_to_be16(pcpu + vcpu->arch.ptid); dt->enqueue_to_dispatch_time = cpu_to_be32(stolen); + dt->ready_to_enqueue_time = 0; + dt->waiting_to_ready_time = 0; + dt->timebase = cpu_to_be64(now); + dt->fault_addr = 0; dt->srr0 = cpu_to_be64(kvmppc_get_pc(vcpu)); dt->srr1 = cpu_to_be64(vcpu->arch.shregs.msr); + ++dt; if (dt == vcpu->arch.dtl.pinned_end) dt = vcpu->arch.dtl.pinned_addr; @@ -723,6 +732,27 @@ static void kvmppc_create_dtl_entry(struct kvm_vcpu *vcpu, vcpu->arch.dtl.dirty = true; } +static void kvmppc_create_dtl_entry(struct kvm_vcpu *vcpu, + struct kvmppc_vcore *vc) +{ + unsigned long stolen; + unsigned long core_stolen; + u64 now; + unsigned long flags; + + now = mftb(); + + core_stolen = vcore_stolen_time(vc, now); + stolen = core_stolen - vcpu->arch.stolen_logged; + vcpu->arch.stolen_logged = core_stolen; + spin_lock_irqsave(&vcpu->arch.tbacct_lock, flags); + stolen += vcpu->arch.busy_stolen; + vcpu->arch.busy_stolen = 0; + spin_unlock_irqrestore(&vcpu->arch.tbacct_lock, flags); + + __kvmppc_create_dtl_entry(vcpu, vc->pcpu, now + vc->tb_offset, stolen); +} + /* See if there is a doorbell interrupt pending for a vcpu */ static bool kvmppc_doorbell_pending(struct kvm_vcpu *vcpu) { @@ -731,6 +761,8 @@ static bool kvmppc_doorbell_pending(struct kvm_vcpu *vcpu) if (vcpu->arch.doorbell_request) return true; + if (cpu_has_feature(CPU_FTR_ARCH_300)) + return false; /* * Ensure that the read of vcore->dpdes comes after the read * of vcpu->doorbell_request. This barrier matches the @@ -900,13 +932,14 @@ static int kvm_arch_vcpu_yield_to(struct kvm_vcpu *target) * mode handler is not called but no other threads are in the * source vcore. */ - - spin_lock(&vcore->lock); - if (target->arch.state == KVMPPC_VCPU_RUNNABLE && - vcore->vcore_state != VCORE_INACTIVE && - vcore->runner) - target = vcore->runner; - spin_unlock(&vcore->lock); + if (!cpu_has_feature(CPU_FTR_ARCH_300)) { + spin_lock(&vcore->lock); + if (target->arch.state == KVMPPC_VCPU_RUNNABLE && + vcore->vcore_state != VCORE_INACTIVE && + vcore->runner) + target = vcore->runner; + spin_unlock(&vcore->lock); + } return kvm_vcpu_yield_to(target); } @@ -1421,6 +1454,43 @@ static int kvmppc_emulate_doorbell_instr(struct kvm_vcpu *vcpu) return RESUME_GUEST; } +/* + * If the lppaca had pmcregs_in_use clear when we exited the guest, then + * HFSCR_PM is cleared for next entry. If the guest then tries to access + * the PMU SPRs, we get this facility unavailable interrupt. Putting HFSCR_PM + * back in the guest HFSCR will cause the next entry to load the PMU SPRs and + * allow the guest access to continue. + */ +static int kvmppc_pmu_unavailable(struct kvm_vcpu *vcpu) +{ + if (!(vcpu->arch.hfscr_permitted & HFSCR_PM)) + return EMULATE_FAIL; + + vcpu->arch.hfscr |= HFSCR_PM; + + return RESUME_GUEST; +} + +static int kvmppc_ebb_unavailable(struct kvm_vcpu *vcpu) +{ + if (!(vcpu->arch.hfscr_permitted & HFSCR_EBB)) + return EMULATE_FAIL; + + vcpu->arch.hfscr |= HFSCR_EBB; + + return RESUME_GUEST; +} + +static int kvmppc_tm_unavailable(struct kvm_vcpu *vcpu) +{ + if (!(vcpu->arch.hfscr_permitted & HFSCR_TM)) + return EMULATE_FAIL; + + vcpu->arch.hfscr |= HFSCR_TM; + + return RESUME_GUEST; +} + static int kvmppc_handle_exit_hv(struct kvm_vcpu *vcpu, struct task_struct *tsk) { @@ -1451,6 +1521,10 @@ static int kvmppc_handle_exit_hv(struct kvm_vcpu *vcpu, run->ready_for_interrupt_injection = 1; switch (vcpu->arch.trap) { /* We're good on these - the host merely wanted to get our attention */ + case BOOK3S_INTERRUPT_NESTED_HV_DECREMENTER: + WARN_ON_ONCE(1); /* Should never happen */ + vcpu->arch.trap = BOOK3S_INTERRUPT_HV_DECREMENTER; + fallthrough; case BOOK3S_INTERRUPT_HV_DECREMENTER: vcpu->stat.dec_exits++; r = RESUME_GUEST; @@ -1575,7 +1649,8 @@ static int kvmppc_handle_exit_hv(struct kvm_vcpu *vcpu, unsigned long vsid; long err; - if (vcpu->arch.fault_dsisr == HDSISR_CANARY) { + if (cpu_has_feature(CPU_FTR_P9_RADIX_PREFETCH_BUG) && + unlikely(vcpu->arch.fault_dsisr == HDSISR_CANARY)) { r = RESUME_GUEST; /* Just retry if it's the canary */ break; } @@ -1702,16 +1777,26 @@ static int kvmppc_handle_exit_hv(struct kvm_vcpu *vcpu, * to emulate. * Otherwise, we just generate a program interrupt to the guest. */ - case BOOK3S_INTERRUPT_H_FAC_UNAVAIL: + case BOOK3S_INTERRUPT_H_FAC_UNAVAIL: { + u64 cause = vcpu->arch.hfscr >> 56; + r = EMULATE_FAIL; - if (((vcpu->arch.hfscr >> 56) == FSCR_MSGP_LG) && - cpu_has_feature(CPU_FTR_ARCH_300)) - r = kvmppc_emulate_doorbell_instr(vcpu); + if (cpu_has_feature(CPU_FTR_ARCH_300)) { + if (cause == FSCR_MSGP_LG) + r = kvmppc_emulate_doorbell_instr(vcpu); + if (cause == FSCR_PM_LG) + r = kvmppc_pmu_unavailable(vcpu); + if (cause == FSCR_EBB_LG) + r = kvmppc_ebb_unavailable(vcpu); + if (cause == FSCR_TM_LG) + r = kvmppc_tm_unavailable(vcpu); + } if (r == EMULATE_FAIL) { kvmppc_core_queue_program(vcpu, SRR1_PROGILL); r = RESUME_GUEST; } break; + } case BOOK3S_INTERRUPT_HV_RM_HARD: r = RESUME_PASSTHROUGH; @@ -1768,6 +1853,12 @@ static int kvmppc_handle_nested_exit(struct kvm_vcpu *vcpu) vcpu->stat.ext_intr_exits++; r = RESUME_GUEST; break; + /* These need to go to the nested HV */ + case BOOK3S_INTERRUPT_NESTED_HV_DECREMENTER: + vcpu->arch.trap = BOOK3S_INTERRUPT_HV_DECREMENTER; + vcpu->stat.dec_exits++; + r = RESUME_HOST; + break; /* SR/HMI/PMI are HV interrupts that host has handled. Resume guest.*/ case BOOK3S_INTERRUPT_HMI: case BOOK3S_INTERRUPT_PERFMON: @@ -2096,8 +2187,10 @@ static int kvmppc_get_one_reg_hv(struct kvm_vcpu *vcpu, u64 id, * either vcore->dpdes or doorbell_request. * On POWER8, doorbell_request is 0. */ - *val = get_reg_val(id, vcpu->arch.vcore->dpdes | - vcpu->arch.doorbell_request); + if (cpu_has_feature(CPU_FTR_ARCH_300)) + *val = get_reg_val(id, vcpu->arch.doorbell_request); + else + *val = get_reg_val(id, vcpu->arch.vcore->dpdes); break; case KVM_REG_PPC_VTB: *val = get_reg_val(id, vcpu->arch.vcore->vtb); @@ -2238,8 +2331,7 @@ static int kvmppc_get_one_reg_hv(struct kvm_vcpu *vcpu, u64 id, *val = get_reg_val(id, vcpu->arch.vcore->arch_compat); break; case KVM_REG_PPC_DEC_EXPIRY: - *val = get_reg_val(id, vcpu->arch.dec_expires + - vcpu->arch.vcore->tb_offset); + *val = get_reg_val(id, vcpu->arch.dec_expires); break; case KVM_REG_PPC_ONLINE: *val = get_reg_val(id, vcpu->arch.online); @@ -2335,7 +2427,10 @@ static int kvmppc_set_one_reg_hv(struct kvm_vcpu *vcpu, u64 id, vcpu->arch.pspb = set_reg_val(id, *val); break; case KVM_REG_PPC_DPDES: - vcpu->arch.vcore->dpdes = set_reg_val(id, *val); + if (cpu_has_feature(CPU_FTR_ARCH_300)) + vcpu->arch.doorbell_request = set_reg_val(id, *val) & 1; + else + vcpu->arch.vcore->dpdes = set_reg_val(id, *val); break; case KVM_REG_PPC_VTB: vcpu->arch.vcore->vtb = set_reg_val(id, *val); @@ -2491,8 +2586,7 @@ static int kvmppc_set_one_reg_hv(struct kvm_vcpu *vcpu, u64 id, r = kvmppc_set_arch_compat(vcpu, set_reg_val(id, *val)); break; case KVM_REG_PPC_DEC_EXPIRY: - vcpu->arch.dec_expires = set_reg_val(id, *val) - - vcpu->arch.vcore->tb_offset; + vcpu->arch.dec_expires = set_reg_val(id, *val); break; case KVM_REG_PPC_ONLINE: i = set_reg_val(id, *val); @@ -2715,6 +2809,11 @@ static int kvmppc_core_vcpu_create_hv(struct kvm_vcpu *vcpu) #endif #endif vcpu->arch.mmcr[0] = MMCR0_FC; + if (cpu_has_feature(CPU_FTR_ARCH_31)) { + vcpu->arch.mmcr[0] |= MMCR0_PMCCEXT; + vcpu->arch.mmcra = MMCRA_BHRB_DISABLE; + } + vcpu->arch.ctrl = CTRL_RUNLATCH; /* default to host PVR, since we can't spoof it */ kvmppc_set_pvr_hv(vcpu, mfspr(SPRN_PVR)); @@ -2745,6 +2844,11 @@ static int kvmppc_core_vcpu_create_hv(struct kvm_vcpu *vcpu) vcpu->arch.hfscr_permitted = vcpu->arch.hfscr; + /* + * PM, EBB, TM are demand-faulted so start with it clear. + */ + vcpu->arch.hfscr &= ~(HFSCR_PM | HFSCR_EBB | HFSCR_TM); + kvmppc_mmu_book3s_hv_init(vcpu); vcpu->arch.state = KVMPPC_VCPU_NOTREADY; @@ -2869,13 +2973,13 @@ static void kvmppc_set_timer(struct kvm_vcpu *vcpu) unsigned long dec_nsec, now; now = get_tb(); - if (now > vcpu->arch.dec_expires) { + if (now > kvmppc_dec_expires_host_tb(vcpu)) { /* decrementer has already gone negative */ kvmppc_core_queue_dec(vcpu); kvmppc_core_prepare_to_enter(vcpu); return; } - dec_nsec = tb_to_ns(vcpu->arch.dec_expires - now); + dec_nsec = tb_to_ns(kvmppc_dec_expires_host_tb(vcpu) - now); hrtimer_start(&vcpu->arch.dec_timer, dec_nsec, HRTIMER_MODE_REL); vcpu->arch.timer_running = 1; } @@ -2883,14 +2987,14 @@ static void kvmppc_set_timer(struct kvm_vcpu *vcpu) extern int __kvmppc_vcore_entry(void); static void kvmppc_remove_runnable(struct kvmppc_vcore *vc, - struct kvm_vcpu *vcpu) + struct kvm_vcpu *vcpu, u64 tb) { u64 now; if (vcpu->arch.state != KVMPPC_VCPU_RUNNABLE) return; spin_lock_irq(&vcpu->arch.tbacct_lock); - now = mftb(); + now = tb; vcpu->arch.busy_stolen += vcore_stolen_time(vc, now) - vcpu->arch.stolen_logged; vcpu->arch.busy_preempt = now; @@ -2945,30 +3049,59 @@ static void kvmppc_release_hwthread(int cpu) tpaca->kvm_hstate.kvm_split_mode = NULL; } +static DEFINE_PER_CPU(struct kvm *, cpu_in_guest); + static void radix_flush_cpu(struct kvm *kvm, int cpu, struct kvm_vcpu *vcpu) { struct kvm_nested_guest *nested = vcpu->arch.nested; - cpumask_t *cpu_in_guest; + cpumask_t *need_tlb_flush; int i; + if (nested) + need_tlb_flush = &nested->need_tlb_flush; + else + need_tlb_flush = &kvm->arch.need_tlb_flush; + cpu = cpu_first_tlb_thread_sibling(cpu); - if (nested) { - cpumask_set_cpu(cpu, &nested->need_tlb_flush); - cpu_in_guest = &nested->cpu_in_guest; - } else { - cpumask_set_cpu(cpu, &kvm->arch.need_tlb_flush); - cpu_in_guest = &kvm->arch.cpu_in_guest; - } + for (i = cpu; i <= cpu_last_tlb_thread_sibling(cpu); + i += cpu_tlb_thread_sibling_step()) + cpumask_set_cpu(i, need_tlb_flush); + /* - * Make sure setting of bit in need_tlb_flush precedes - * testing of cpu_in_guest bits. The matching barrier on - * the other side is the first smp_mb() in kvmppc_run_core(). + * Make sure setting of bit in need_tlb_flush precedes testing of + * cpu_in_guest. The matching barrier on the other side is hwsync + * when switching to guest MMU mode, which happens between + * cpu_in_guest being set to the guest kvm, and need_tlb_flush bit + * being tested. */ smp_mb(); + for (i = cpu; i <= cpu_last_tlb_thread_sibling(cpu); - i += cpu_tlb_thread_sibling_step()) - if (cpumask_test_cpu(i, cpu_in_guest)) + i += cpu_tlb_thread_sibling_step()) { + struct kvm *running = *per_cpu_ptr(&cpu_in_guest, i); + + if (running == kvm) smp_call_function_single(i, do_nothing, NULL, 1); + } +} + +static void do_migrate_away_vcpu(void *arg) +{ + struct kvm_vcpu *vcpu = arg; + struct kvm *kvm = vcpu->kvm; + + /* + * If the guest has GTSE, it may execute tlbie, so do a eieio; tlbsync; + * ptesync sequence on the old CPU before migrating to a new one, in + * case we interrupted the guest between a tlbie ; eieio ; + * tlbsync; ptesync sequence. + * + * Otherwise, ptesync is sufficient for ordering tlbiel sequences. + */ + if (kvm->arch.lpcr & LPCR_GTSE) + asm volatile("eieio; tlbsync; ptesync"); + else + asm volatile("ptesync"); } static void kvmppc_prepare_radix_vcpu(struct kvm_vcpu *vcpu, int pcpu) @@ -2994,14 +3127,17 @@ static void kvmppc_prepare_radix_vcpu(struct kvm_vcpu *vcpu, int pcpu) * can move around between pcpus. To cope with this, when * a vcpu moves from one pcpu to another, we need to tell * any vcpus running on the same core as this vcpu previously - * ran to flush the TLB. The TLB is shared between threads, - * so we use a single bit in .need_tlb_flush for all 4 threads. + * ran to flush the TLB. */ if (prev_cpu != pcpu) { - if (prev_cpu >= 0 && - cpu_first_tlb_thread_sibling(prev_cpu) != - cpu_first_tlb_thread_sibling(pcpu)) - radix_flush_cpu(kvm, prev_cpu, vcpu); + if (prev_cpu >= 0) { + if (cpu_first_tlb_thread_sibling(prev_cpu) != + cpu_first_tlb_thread_sibling(pcpu)) + radix_flush_cpu(kvm, prev_cpu, vcpu); + + smp_call_function_single(prev_cpu, + do_migrate_away_vcpu, vcpu, 1); + } if (nested) nested->prev_cpu[vcpu->arch.nested_vcpu_id] = pcpu; else @@ -3013,7 +3149,6 @@ static void kvmppc_start_thread(struct kvm_vcpu *vcpu, struct kvmppc_vcore *vc) { int cpu; struct paca_struct *tpaca; - struct kvm *kvm = vc->kvm; cpu = vc->pcpu; if (vcpu) { @@ -3024,7 +3159,6 @@ static void kvmppc_start_thread(struct kvm_vcpu *vcpu, struct kvmppc_vcore *vc) cpu += vcpu->arch.ptid; vcpu->cpu = vc->pcpu; vcpu->arch.thread_cpu = cpu; - cpumask_set_cpu(cpu, &kvm->arch.cpu_in_guest); } tpaca = paca_ptrs[cpu]; tpaca->kvm_hstate.kvm_vcpu = vcpu; @@ -3125,6 +3259,8 @@ static void kvmppc_vcore_preempt(struct kvmppc_vcore *vc) { struct preempted_vcore_list *lp = this_cpu_ptr(&preempted_vcores); + WARN_ON_ONCE(cpu_has_feature(CPU_FTR_ARCH_300)); + vc->vcore_state = VCORE_PREEMPT; vc->pcpu = smp_processor_id(); if (vc->num_threads < threads_per_vcore(vc->kvm)) { @@ -3134,14 +3270,16 @@ static void kvmppc_vcore_preempt(struct kvmppc_vcore *vc) } /* Start accumulating stolen time */ - kvmppc_core_start_stolen(vc); + kvmppc_core_start_stolen(vc, mftb()); } static void kvmppc_vcore_end_preempt(struct kvmppc_vcore *vc) { struct preempted_vcore_list *lp; - kvmppc_core_end_stolen(vc); + WARN_ON_ONCE(cpu_has_feature(CPU_FTR_ARCH_300)); + + kvmppc_core_end_stolen(vc, mftb()); if (!list_empty(&vc->preempt_list)) { lp = &per_cpu(preempted_vcores, vc->pcpu); spin_lock(&lp->lock); @@ -3268,7 +3406,7 @@ static void prepare_threads(struct kvmppc_vcore *vc) vcpu->arch.ret = RESUME_GUEST; else continue; - kvmppc_remove_runnable(vc, vcpu); + kvmppc_remove_runnable(vc, vcpu, mftb()); wake_up(&vcpu->arch.cpu_run); } } @@ -3287,7 +3425,7 @@ static void collect_piggybacks(struct core_info *cip, int target_threads) list_del_init(&pvc->preempt_list); if (pvc->runner == NULL) { pvc->vcore_state = VCORE_INACTIVE; - kvmppc_core_end_stolen(pvc); + kvmppc_core_end_stolen(pvc, mftb()); } spin_unlock(&pvc->lock); continue; @@ -3296,7 +3434,7 @@ static void collect_piggybacks(struct core_info *cip, int target_threads) spin_unlock(&pvc->lock); continue; } - kvmppc_core_end_stolen(pvc); + kvmppc_core_end_stolen(pvc, mftb()); pvc->vcore_state = VCORE_PIGGYBACK; if (cip->total_threads >= target_threads) break; @@ -3340,7 +3478,7 @@ static void post_guest_process(struct kvmppc_vcore *vc, bool is_master) */ spin_unlock(&vc->lock); /* cancel pending dec exception if dec is positive */ - if (now < vcpu->arch.dec_expires && + if (now < kvmppc_dec_expires_host_tb(vcpu) && kvmppc_core_pending_dec(vcpu)) kvmppc_core_dequeue_dec(vcpu); @@ -3363,7 +3501,7 @@ static void post_guest_process(struct kvmppc_vcore *vc, bool is_master) else ++still_running; } else { - kvmppc_remove_runnable(vc, vcpu); + kvmppc_remove_runnable(vc, vcpu, mftb()); wake_up(&vcpu->arch.cpu_run); } } @@ -3372,7 +3510,7 @@ static void post_guest_process(struct kvmppc_vcore *vc, bool is_master) kvmppc_vcore_preempt(vc); } else if (vc->runner) { vc->vcore_state = VCORE_PREEMPT; - kvmppc_core_start_stolen(vc); + kvmppc_core_start_stolen(vc, mftb()); } else { vc->vcore_state = VCORE_INACTIVE; } @@ -3503,7 +3641,7 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc) ((vc->num_threads > threads_per_subcore) || !on_primary_thread())) { for_each_runnable_thread(i, vcpu, vc) { vcpu->arch.ret = -EBUSY; - kvmppc_remove_runnable(vc, vcpu); + kvmppc_remove_runnable(vc, vcpu, mftb()); wake_up(&vcpu->arch.cpu_run); } goto out; @@ -3748,7 +3886,6 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc) kvmppc_release_hwthread(pcpu + i); if (sip && sip->napped[i]) kvmppc_ipi_thread(pcpu + i); - cpumask_clear_cpu(pcpu + i, &vc->kvm->arch.cpu_in_guest); } spin_unlock(&vc->lock); @@ -3770,211 +3907,137 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc) trace_kvmppc_run_core(vc, 1); } -static void load_spr_state(struct kvm_vcpu *vcpu) -{ - mtspr(SPRN_DSCR, vcpu->arch.dscr); - mtspr(SPRN_IAMR, vcpu->arch.iamr); - mtspr(SPRN_PSPB, vcpu->arch.pspb); - mtspr(SPRN_FSCR, vcpu->arch.fscr); - mtspr(SPRN_TAR, vcpu->arch.tar); - mtspr(SPRN_EBBHR, vcpu->arch.ebbhr); - mtspr(SPRN_EBBRR, vcpu->arch.ebbrr); - mtspr(SPRN_BESCR, vcpu->arch.bescr); - mtspr(SPRN_TIDR, vcpu->arch.tid); - mtspr(SPRN_AMR, vcpu->arch.amr); - mtspr(SPRN_UAMOR, vcpu->arch.uamor); - - /* - * DAR, DSISR, and for nested HV, SPRGs must be set with MSR[RI] - * clear (or hstate set appropriately to catch those registers - * being clobbered if we take a MCE or SRESET), so those are done - * later. - */ - - if (!(vcpu->arch.ctrl & 1)) - mtspr(SPRN_CTRLT, mfspr(SPRN_CTRLF) & ~1); -} - -static void store_spr_state(struct kvm_vcpu *vcpu) -{ - vcpu->arch.ctrl = mfspr(SPRN_CTRLF); - - vcpu->arch.iamr = mfspr(SPRN_IAMR); - vcpu->arch.pspb = mfspr(SPRN_PSPB); - vcpu->arch.fscr = mfspr(SPRN_FSCR); - vcpu->arch.tar = mfspr(SPRN_TAR); - vcpu->arch.ebbhr = mfspr(SPRN_EBBHR); - vcpu->arch.ebbrr = mfspr(SPRN_EBBRR); - vcpu->arch.bescr = mfspr(SPRN_BESCR); - vcpu->arch.tid = mfspr(SPRN_TIDR); - vcpu->arch.amr = mfspr(SPRN_AMR); - vcpu->arch.uamor = mfspr(SPRN_UAMOR); - vcpu->arch.dscr = mfspr(SPRN_DSCR); -} - -/* - * Privileged (non-hypervisor) host registers to save. - */ -struct p9_host_os_sprs { - unsigned long dscr; - unsigned long tidr; - unsigned long iamr; - unsigned long amr; - unsigned long fscr; -}; - -static void save_p9_host_os_sprs(struct p9_host_os_sprs *host_os_sprs) -{ - host_os_sprs->dscr = mfspr(SPRN_DSCR); - host_os_sprs->tidr = mfspr(SPRN_TIDR); - host_os_sprs->iamr = mfspr(SPRN_IAMR); - host_os_sprs->amr = mfspr(SPRN_AMR); - host_os_sprs->fscr = mfspr(SPRN_FSCR); -} - -/* vcpu guest regs must already be saved */ -static void restore_p9_host_os_sprs(struct kvm_vcpu *vcpu, - struct p9_host_os_sprs *host_os_sprs) -{ - mtspr(SPRN_PSPB, 0); - mtspr(SPRN_UAMOR, 0); - - mtspr(SPRN_DSCR, host_os_sprs->dscr); - mtspr(SPRN_TIDR, host_os_sprs->tidr); - mtspr(SPRN_IAMR, host_os_sprs->iamr); - - if (host_os_sprs->amr != vcpu->arch.amr) - mtspr(SPRN_AMR, host_os_sprs->amr); - - if (host_os_sprs->fscr != vcpu->arch.fscr) - mtspr(SPRN_FSCR, host_os_sprs->fscr); - - /* Save guest CTRL register, set runlatch to 1 */ - if (!(vcpu->arch.ctrl & 1)) - mtspr(SPRN_CTRLT, 1); -} - static inline bool hcall_is_xics(unsigned long req) { return req == H_EOI || req == H_CPPR || req == H_IPI || req == H_IPOLL || req == H_XIRR || req == H_XIRR_X; } -/* - * Guest entry for POWER9 and later CPUs. - */ -static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit, - unsigned long lpcr) +static void vcpu_vpa_increment_dispatch(struct kvm_vcpu *vcpu) +{ + struct lppaca *lp = vcpu->arch.vpa.pinned_addr; + if (lp) { + u32 yield_count = be32_to_cpu(lp->yield_count) + 1; + lp->yield_count = cpu_to_be32(yield_count); + vcpu->arch.vpa.dirty = 1; + } +} + +/* call our hypervisor to load up HV regs and go */ +static int kvmhv_vcpu_entry_p9_nested(struct kvm_vcpu *vcpu, u64 time_limit, unsigned long lpcr, u64 *tb) { struct kvmppc_vcore *vc = vcpu->arch.vcore; + unsigned long host_psscr; + unsigned long msr; + struct hv_guest_state hvregs; struct p9_host_os_sprs host_os_sprs; s64 dec; - u64 tb; - int trap, save_pmu; - - WARN_ON_ONCE(vcpu->arch.ceded); + int trap; - dec = mfspr(SPRN_DEC); - tb = mftb(); - if (dec < 0) - return BOOK3S_INTERRUPT_HV_DECREMENTER; - local_paca->kvm_hstate.dec_expires = dec + tb; - if (local_paca->kvm_hstate.dec_expires < time_limit) - time_limit = local_paca->kvm_hstate.dec_expires; + msr = mfmsr(); save_p9_host_os_sprs(&host_os_sprs); - kvmhv_save_host_pmu(); /* saves it to PACA kvm_hstate */ + /* + * We need to save and restore the guest visible part of the + * psscr (i.e. using SPRN_PSSCR_PR) since the hypervisor + * doesn't do this for us. Note only required if pseries since + * this is done in kvmhv_vcpu_entry_p9() below otherwise. + */ + host_psscr = mfspr(SPRN_PSSCR_PR); - kvmppc_subcore_enter_guest(); + kvmppc_msr_hard_disable_set_facilities(vcpu, msr); + if (lazy_irq_pending()) + return 0; - vc->entry_exit_map = 1; - vc->in_guest = 1; + if (unlikely(load_vcpu_state(vcpu, &host_os_sprs))) + msr = mfmsr(); /* TM restore can update msr */ - if (vcpu->arch.vpa.pinned_addr) { - struct lppaca *lp = vcpu->arch.vpa.pinned_addr; - u32 yield_count = be32_to_cpu(lp->yield_count) + 1; - lp->yield_count = cpu_to_be32(yield_count); - vcpu->arch.vpa.dirty = 1; - } - - if (cpu_has_feature(CPU_FTR_TM) || - cpu_has_feature(CPU_FTR_P9_TM_HV_ASSIST)) - kvmppc_restore_tm_hv(vcpu, vcpu->arch.shregs.msr, true); + if (vcpu->arch.psscr != host_psscr) + mtspr(SPRN_PSSCR_PR, vcpu->arch.psscr); -#ifdef CONFIG_PPC_PSERIES - if (kvmhv_on_pseries()) { - barrier(); - if (vcpu->arch.vpa.pinned_addr) { - struct lppaca *lp = vcpu->arch.vpa.pinned_addr; - get_lppaca()->pmcregs_in_use = lp->pmcregs_in_use; - } else { - get_lppaca()->pmcregs_in_use = 1; - } - barrier(); + kvmhv_save_hv_regs(vcpu, &hvregs); + hvregs.lpcr = lpcr; + vcpu->arch.regs.msr = vcpu->arch.shregs.msr; + hvregs.version = HV_GUEST_STATE_VERSION; + if (vcpu->arch.nested) { + hvregs.lpid = vcpu->arch.nested->shadow_lpid; + hvregs.vcpu_token = vcpu->arch.nested_vcpu_id; + } else { + hvregs.lpid = vcpu->kvm->arch.lpid; + hvregs.vcpu_token = vcpu->vcpu_id; } -#endif - kvmhv_load_guest_pmu(vcpu); - - msr_check_and_set(MSR_FP | MSR_VEC | MSR_VSX); - load_fp_state(&vcpu->arch.fp); -#ifdef CONFIG_ALTIVEC - load_vr_state(&vcpu->arch.vr); -#endif - mtspr(SPRN_VRSAVE, vcpu->arch.vrsave); - - load_spr_state(vcpu); + hvregs.hdec_expiry = time_limit; /* - * When setting DEC, we must always deal with irq_work_raise via NMI vs - * setting DEC. The problem occurs right as we switch into guest mode - * if a NMI hits and sets pending work and sets DEC, then that will - * apply to the guest and not bring us back to the host. + * When setting DEC, we must always deal with irq_work_raise + * via NMI vs setting DEC. The problem occurs right as we + * switch into guest mode if a NMI hits and sets pending work + * and sets DEC, then that will apply to the guest and not + * bring us back to the host. * - * irq_work_raise could check a flag (or possibly LPCR[HDICE] for - * example) and set HDEC to 1? That wouldn't solve the nested hv - * case which needs to abort the hcall or zero the time limit. + * irq_work_raise could check a flag (or possibly LPCR[HDICE] + * for example) and set HDEC to 1? That wouldn't solve the + * nested hv case which needs to abort the hcall or zero the + * time limit. * * XXX: Another day's problem. */ - mtspr(SPRN_DEC, vcpu->arch.dec_expires - mftb()); + mtspr(SPRN_DEC, kvmppc_dec_expires_host_tb(vcpu) - *tb); + + mtspr(SPRN_DAR, vcpu->arch.shregs.dar); + mtspr(SPRN_DSISR, vcpu->arch.shregs.dsisr); + switch_pmu_to_guest(vcpu, &host_os_sprs); + trap = plpar_hcall_norets(H_ENTER_NESTED, __pa(&hvregs), + __pa(&vcpu->arch.regs)); + kvmhv_restore_hv_return_state(vcpu, &hvregs); + switch_pmu_to_host(vcpu, &host_os_sprs); + vcpu->arch.shregs.msr = vcpu->arch.regs.msr; + vcpu->arch.shregs.dar = mfspr(SPRN_DAR); + vcpu->arch.shregs.dsisr = mfspr(SPRN_DSISR); + vcpu->arch.psscr = mfspr(SPRN_PSSCR_PR); + + store_vcpu_state(vcpu); - if (kvmhv_on_pseries()) { - /* - * We need to save and restore the guest visible part of the - * psscr (i.e. using SPRN_PSSCR_PR) since the hypervisor - * doesn't do this for us. Note only required if pseries since - * this is done in kvmhv_vcpu_entry_p9() below otherwise. - */ - unsigned long host_psscr; - /* call our hypervisor to load up HV regs and go */ - struct hv_guest_state hvregs; + dec = mfspr(SPRN_DEC); + if (!(lpcr & LPCR_LD)) /* Sign extend if not using large decrementer */ + dec = (s32) dec; + *tb = mftb(); + vcpu->arch.dec_expires = dec + (*tb + vc->tb_offset); - host_psscr = mfspr(SPRN_PSSCR_PR); - mtspr(SPRN_PSSCR_PR, vcpu->arch.psscr); - kvmhv_save_hv_regs(vcpu, &hvregs); - hvregs.lpcr = lpcr; - vcpu->arch.regs.msr = vcpu->arch.shregs.msr; - hvregs.version = HV_GUEST_STATE_VERSION; - if (vcpu->arch.nested) { - hvregs.lpid = vcpu->arch.nested->shadow_lpid; - hvregs.vcpu_token = vcpu->arch.nested_vcpu_id; - } else { - hvregs.lpid = vcpu->kvm->arch.lpid; - hvregs.vcpu_token = vcpu->vcpu_id; - } - hvregs.hdec_expiry = time_limit; - mtspr(SPRN_DAR, vcpu->arch.shregs.dar); - mtspr(SPRN_DSISR, vcpu->arch.shregs.dsisr); - trap = plpar_hcall_norets(H_ENTER_NESTED, __pa(&hvregs), - __pa(&vcpu->arch.regs)); - kvmhv_restore_hv_return_state(vcpu, &hvregs); - vcpu->arch.shregs.msr = vcpu->arch.regs.msr; - vcpu->arch.shregs.dar = mfspr(SPRN_DAR); - vcpu->arch.shregs.dsisr = mfspr(SPRN_DSISR); - vcpu->arch.psscr = mfspr(SPRN_PSSCR_PR); + timer_rearm_host_dec(*tb); + + restore_p9_host_os_sprs(vcpu, &host_os_sprs); + if (vcpu->arch.psscr != host_psscr) mtspr(SPRN_PSSCR_PR, host_psscr); + return trap; +} + +/* + * Guest entry for POWER9 and later CPUs. + */ +static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit, + unsigned long lpcr, u64 *tb) +{ + u64 next_timer; + int trap; + + next_timer = timer_get_next_tb(); + if (*tb >= next_timer) + return BOOK3S_INTERRUPT_HV_DECREMENTER; + if (next_timer < time_limit) + time_limit = next_timer; + else if (*tb >= time_limit) /* nested time limit */ + return BOOK3S_INTERRUPT_NESTED_HV_DECREMENTER; + + vcpu->arch.ceded = 0; + + vcpu_vpa_increment_dispatch(vcpu); + + if (kvmhv_on_pseries()) { + trap = kvmhv_vcpu_entry_p9_nested(vcpu, time_limit, lpcr, tb); + /* H_CEDE has to be handled now, not later */ if (trap == BOOK3S_INTERRUPT_SYSCALL && !vcpu->arch.nested && kvmppc_get_gpr(vcpu, 3) == H_CEDE) { @@ -3982,9 +4045,16 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit, kvmppc_set_gpr(vcpu, 3, 0); trap = 0; } + } else { + struct kvm *kvm = vcpu->kvm; + kvmppc_xive_push_vcpu(vcpu); - trap = kvmhv_vcpu_entry_p9(vcpu, time_limit, lpcr); + + __this_cpu_write(cpu_in_guest, kvm); + trap = kvmhv_vcpu_entry_p9(vcpu, time_limit, lpcr, tb); + __this_cpu_write(cpu_in_guest, NULL); + if (trap == BOOK3S_INTERRUPT_SYSCALL && !vcpu->arch.nested && !(vcpu->arch.shregs.msr & MSR_PR)) { unsigned long req = kvmppc_get_gpr(vcpu, 3); @@ -4009,65 +4079,11 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit, } kvmppc_xive_pull_vcpu(vcpu); - if (kvm_is_radix(vcpu->kvm)) + if (kvm_is_radix(kvm)) vcpu->arch.slb_max = 0; } - dec = mfspr(SPRN_DEC); - if (!(lpcr & LPCR_LD)) /* Sign extend if not using large decrementer */ - dec = (s32) dec; - tb = mftb(); - vcpu->arch.dec_expires = dec + tb; - vcpu->cpu = -1; - vcpu->arch.thread_cpu = -1; - - store_spr_state(vcpu); - - restore_p9_host_os_sprs(vcpu, &host_os_sprs); - - msr_check_and_set(MSR_FP | MSR_VEC | MSR_VSX); - store_fp_state(&vcpu->arch.fp); -#ifdef CONFIG_ALTIVEC - store_vr_state(&vcpu->arch.vr); -#endif - vcpu->arch.vrsave = mfspr(SPRN_VRSAVE); - - if (cpu_has_feature(CPU_FTR_TM) || - cpu_has_feature(CPU_FTR_P9_TM_HV_ASSIST)) - kvmppc_save_tm_hv(vcpu, vcpu->arch.shregs.msr, true); - - save_pmu = 1; - if (vcpu->arch.vpa.pinned_addr) { - struct lppaca *lp = vcpu->arch.vpa.pinned_addr; - u32 yield_count = be32_to_cpu(lp->yield_count) + 1; - lp->yield_count = cpu_to_be32(yield_count); - vcpu->arch.vpa.dirty = 1; - save_pmu = lp->pmcregs_in_use; - } - /* Must save pmu if this guest is capable of running nested guests */ - save_pmu |= nesting_enabled(vcpu->kvm); - - kvmhv_save_guest_pmu(vcpu, save_pmu); -#ifdef CONFIG_PPC_PSERIES - if (kvmhv_on_pseries()) { - barrier(); - get_lppaca()->pmcregs_in_use = ppc_get_pmu_inuse(); - barrier(); - } -#endif - - vc->entry_exit_map = 0x101; - vc->in_guest = 0; - - mtspr(SPRN_DEC, local_paca->kvm_hstate.dec_expires - mftb()); - /* We may have raced with new irq work */ - if (test_irq_work_pending()) - set_dec(1); - mtspr(SPRN_SPRG_VDSO_WRITE, local_paca->sprg_vdso); - - kvmhv_load_host_pmu(); - - kvmppc_subcore_exit_guest(); + vcpu_vpa_increment_dispatch(vcpu); return trap; } @@ -4132,6 +4148,13 @@ static bool kvmppc_vcpu_woken(struct kvm_vcpu *vcpu) return false; } +static bool kvmppc_vcpu_check_block(struct kvm_vcpu *vcpu) +{ + if (!vcpu->arch.ceded || kvmppc_vcpu_woken(vcpu)) + return true; + return false; +} + /* * Check to see if any of the runnable vcpus on the vcore have pending * exceptions or are no longer ceded @@ -4142,7 +4165,7 @@ static int kvmppc_vcore_check_block(struct kvmppc_vcore *vc) int i; for_each_runnable_thread(i, vcpu, vc) { - if (!vcpu->arch.ceded || kvmppc_vcpu_woken(vcpu)) + if (kvmppc_vcpu_check_block(vcpu)) return 1; } @@ -4159,6 +4182,8 @@ static void kvmppc_vcore_blocked(struct kvmppc_vcore *vc) int do_sleep = 1; u64 block_ns; + WARN_ON_ONCE(cpu_has_feature(CPU_FTR_ARCH_300)); + /* Poll for pending exceptions and ceded state */ cur = start_poll = ktime_get(); if (vc->halt_poll_ns) { @@ -4355,7 +4380,7 @@ static int kvmppc_run_vcpu(struct kvm_vcpu *vcpu) for_each_runnable_thread(i, v, vc) { kvmppc_core_prepare_to_enter(v); if (signal_pending(v->arch.run_task)) { - kvmppc_remove_runnable(vc, v); + kvmppc_remove_runnable(vc, v, mftb()); v->stat.signal_exits++; v->run->exit_reason = KVM_EXIT_INTR; v->arch.ret = -EINTR; @@ -4396,7 +4421,7 @@ static int kvmppc_run_vcpu(struct kvm_vcpu *vcpu) kvmppc_vcore_end_preempt(vc); if (vcpu->arch.state == KVMPPC_VCPU_RUNNABLE) { - kvmppc_remove_runnable(vc, vcpu); + kvmppc_remove_runnable(vc, vcpu, mftb()); vcpu->stat.signal_exits++; run->exit_reason = KVM_EXIT_INTR; vcpu->arch.ret = -EINTR; @@ -4417,12 +4442,15 @@ static int kvmppc_run_vcpu(struct kvm_vcpu *vcpu) int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu, u64 time_limit, unsigned long lpcr) { + struct rcuwait *wait = kvm_arch_vcpu_get_wait(vcpu); struct kvm_run *run = vcpu->run; int trap, r, pcpu; int srcu_idx; struct kvmppc_vcore *vc; struct kvm *kvm = vcpu->kvm; struct kvm_nested_guest *nested = vcpu->arch.nested; + unsigned long flags; + u64 tb; trace_kvmppc_run_vcpu_enter(vcpu); @@ -4433,16 +4461,11 @@ int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu, u64 time_limit, vc = vcpu->arch.vcore; vcpu->arch.ceded = 0; vcpu->arch.run_task = current; - vcpu->arch.stolen_logged = vcore_stolen_time(vc, mftb()); vcpu->arch.state = KVMPPC_VCPU_RUNNABLE; - vcpu->arch.busy_preempt = TB_NIL; vcpu->arch.last_inst = KVM_INST_FETCH_FAILED; - vc->runnable_threads[0] = vcpu; - vc->n_runnable = 1; - vc->runner = vcpu; /* See if the MMU is ready to go */ - if (!kvm->arch.mmu_ready) { + if (unlikely(!kvm->arch.mmu_ready)) { r = kvmhv_setup_mmu(vcpu); if (r) { run->exit_reason = KVM_EXIT_FAIL_ENTRY; @@ -4457,29 +4480,21 @@ int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu, u64 time_limit, kvmppc_update_vpas(vcpu); - init_vcore_to_run(vc); - vc->preempt_tb = TB_NIL; - preempt_disable(); pcpu = smp_processor_id(); - vc->pcpu = pcpu; if (kvm_is_radix(kvm)) kvmppc_prepare_radix_vcpu(vcpu, pcpu); - local_irq_disable(); - hard_irq_disable(); + /* flags save not required, but irq_pmu has no disable/enable API */ + powerpc_local_irq_pmu_save(flags); + if (signal_pending(current)) goto sigpend; - if (lazy_irq_pending() || need_resched() || !kvm->arch.mmu_ready) + if (need_resched() || !kvm->arch.mmu_ready) goto out; if (!nested) { kvmppc_core_prepare_to_enter(vcpu); - if (vcpu->arch.doorbell_request) { - vc->dpdes = 1; - smp_wmb(); - vcpu->arch.doorbell_request = 0; - } if (test_bit(BOOK3S_IRQPRIO_EXTERNAL, &vcpu->arch.pending_exceptions)) lpcr |= LPCR_MER; @@ -4490,16 +4505,23 @@ int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu, u64 time_limit, goto out; } - kvmppc_clear_host_core(pcpu); + if (vcpu->arch.timer_running) { + hrtimer_try_to_cancel(&vcpu->arch.dec_timer); + vcpu->arch.timer_running = 0; + } - local_paca->kvm_hstate.napping = 0; - local_paca->kvm_hstate.kvm_split_mode = NULL; - kvmppc_start_thread(vcpu, vc); - kvmppc_create_dtl_entry(vcpu, vc); - trace_kvm_guest_enter(vcpu); + tb = mftb(); - vc->vcore_state = VCORE_RUNNING; - trace_kvmppc_run_core(vc, 0); + vcpu->cpu = pcpu; + vcpu->arch.thread_cpu = pcpu; + vc->pcpu = pcpu; + local_paca->kvm_hstate.kvm_vcpu = vcpu; + local_paca->kvm_hstate.ptid = 0; + local_paca->kvm_hstate.fake_suspend = 0; + + __kvmppc_create_dtl_entry(vcpu, pcpu, tb + vc->tb_offset, 0); + + trace_kvm_guest_enter(vcpu); guest_enter_irqoff(); @@ -4510,7 +4532,7 @@ int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu, u64 time_limit, /* Tell lockdep that we're about to enable interrupts */ trace_hardirqs_on(); - trap = kvmhv_p9_guest_entry(vcpu, time_limit, lpcr); + trap = kvmhv_p9_guest_entry(vcpu, time_limit, lpcr, &tb); vcpu->arch.trap = trap; trace_hardirqs_off(); @@ -4521,8 +4543,6 @@ int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu, u64 time_limit, set_irq_happened(trap); - kvmppc_set_host_core(pcpu); - context_tracking_guest_exit(); if (!vtime_accounting_enabled_this_cpu()) { local_irq_enable(); @@ -4538,9 +4558,10 @@ int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu, u64 time_limit, } vtime_account_guest_exit(); - local_irq_enable(); + vcpu->cpu = -1; + vcpu->arch.thread_cpu = -1; - cpumask_clear_cpu(pcpu, &kvm->arch.cpu_in_guest); + powerpc_local_irq_pmu_restore(flags); preempt_enable(); @@ -4550,7 +4571,7 @@ int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu, u64 time_limit, * by L2 and the L1 decrementer is provided in hdec_expires */ if (kvmppc_core_pending_dec(vcpu) && - ((get_tb() < vcpu->arch.dec_expires) || + ((tb < kvmppc_dec_expires_host_tb(vcpu)) || (trap == BOOK3S_INTERRUPT_SYSCALL && kvmppc_get_gpr(vcpu, 3) == H_ENTER_NESTED))) kvmppc_core_dequeue_dec(vcpu); @@ -4565,28 +4586,31 @@ int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu, u64 time_limit, } vcpu->arch.ret = r; - if (is_kvmppc_resume_guest(r) && vcpu->arch.ceded && - !kvmppc_vcpu_woken(vcpu)) { + if (is_kvmppc_resume_guest(r) && !kvmppc_vcpu_check_block(vcpu)) { kvmppc_set_timer(vcpu); - while (vcpu->arch.ceded && !kvmppc_vcpu_woken(vcpu)) { + + prepare_to_rcuwait(wait); + for (;;) { + set_current_state(TASK_INTERRUPTIBLE); if (signal_pending(current)) { vcpu->stat.signal_exits++; run->exit_reason = KVM_EXIT_INTR; vcpu->arch.ret = -EINTR; break; } - spin_lock(&vc->lock); - kvmppc_vcore_blocked(vc); - spin_unlock(&vc->lock); + + if (kvmppc_vcpu_check_block(vcpu)) + break; + + trace_kvmppc_vcore_blocked(vc, 0); + schedule(); + trace_kvmppc_vcore_blocked(vc, 1); } + finish_rcuwait(wait); } vcpu->arch.ceded = 0; - vc->vcore_state = VCORE_INACTIVE; - trace_kvmppc_run_core(vc, 1); - done: - kvmppc_remove_runnable(vc, vcpu); trace_kvmppc_run_vcpu_exit(vcpu); return vcpu->arch.ret; @@ -4596,7 +4620,7 @@ int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu, u64 time_limit, run->exit_reason = KVM_EXIT_INTR; vcpu->arch.ret = -EINTR; out: - local_irq_enable(); + powerpc_local_irq_pmu_restore(flags); preempt_enable(); goto done; } @@ -4606,23 +4630,25 @@ static int kvmppc_vcpu_run_hv(struct kvm_vcpu *vcpu) struct kvm_run *run = vcpu->run; int r; int srcu_idx; - unsigned long ebb_regs[3] = {}; /* shut up GCC */ - unsigned long user_tar = 0; - unsigned int user_vrsave; struct kvm *kvm; + unsigned long msr; if (!vcpu->arch.sane) { run->exit_reason = KVM_EXIT_INTERNAL_ERROR; return -EINVAL; } + /* No need to go into the guest when all we'll do is come back out */ + if (signal_pending(current)) { + run->exit_reason = KVM_EXIT_INTR; + return -EINTR; + } + +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM /* * Don't allow entry with a suspended transaction, because * the guest entry/exit code will lose it. - * If the guest has TM enabled, save away their TM-related SPRs - * (they will get restored by the TM unavailable interrupt). */ -#ifdef CONFIG_PPC_TRANSACTIONAL_MEM if (cpu_has_feature(CPU_FTR_TM) && current->thread.regs && (current->thread.regs->msr & MSR_TM)) { if (MSR_TM_ACTIVE(current->thread.regs->msr)) { @@ -4630,12 +4656,6 @@ static int kvmppc_vcpu_run_hv(struct kvm_vcpu *vcpu) run->fail_entry.hardware_entry_failure_reason = 0; return -EINVAL; } - /* Enable TM so we can read the TM SPRs */ - mtmsr(mfmsr() | MSR_TM); - current->thread.tm_tfhar = mfspr(SPRN_TFHAR); - current->thread.tm_tfiar = mfspr(SPRN_TFIAR); - current->thread.tm_texasr = mfspr(SPRN_TEXASR); - current->thread.regs->msr &= ~MSR_TM; } #endif @@ -4650,29 +4670,30 @@ static int kvmppc_vcpu_run_hv(struct kvm_vcpu *vcpu) kvmppc_core_prepare_to_enter(vcpu); - /* No need to go into the guest when all we'll do is come back out */ - if (signal_pending(current)) { - run->exit_reason = KVM_EXIT_INTR; - return -EINTR; - } - kvm = vcpu->kvm; atomic_inc(&kvm->arch.vcpus_running); /* Order vcpus_running vs. mmu_ready, see kvmppc_alloc_reset_hpt */ smp_mb(); - flush_all_to_thread(current); + msr = 0; + if (IS_ENABLED(CONFIG_PPC_FPU)) + msr |= MSR_FP; + if (cpu_has_feature(CPU_FTR_ALTIVEC)) + msr |= MSR_VEC; + if (cpu_has_feature(CPU_FTR_VSX)) + msr |= MSR_VSX; + if ((cpu_has_feature(CPU_FTR_TM) || + cpu_has_feature(CPU_FTR_P9_TM_HV_ASSIST)) && + (vcpu->arch.hfscr & HFSCR_TM)) + msr |= MSR_TM; + msr = msr_check_and_set(msr); - /* Save userspace EBB and other register values */ - if (cpu_has_feature(CPU_FTR_ARCH_207S)) { - ebb_regs[0] = mfspr(SPRN_EBBHR); - ebb_regs[1] = mfspr(SPRN_EBBRR); - ebb_regs[2] = mfspr(SPRN_BESCR); - user_tar = mfspr(SPRN_TAR); - } - user_vrsave = mfspr(SPRN_VRSAVE); + kvmppc_save_user_regs(); - vcpu->arch.waitp = &vcpu->arch.vcore->wait; + kvmppc_save_current_sprs(); + + if (!cpu_has_feature(CPU_FTR_ARCH_300)) + vcpu->arch.waitp = &vcpu->arch.vcore->wait; vcpu->arch.pgdir = kvm->mm->pgd; vcpu->arch.state = KVMPPC_VCPU_BUSY_IN_HOST; @@ -4711,15 +4732,6 @@ static int kvmppc_vcpu_run_hv(struct kvm_vcpu *vcpu) } } while (is_kvmppc_resume_guest(r)); - /* Restore userspace EBB and other register values */ - if (cpu_has_feature(CPU_FTR_ARCH_207S)) { - mtspr(SPRN_EBBHR, ebb_regs[0]); - mtspr(SPRN_EBBRR, ebb_regs[1]); - mtspr(SPRN_BESCR, ebb_regs[2]); - mtspr(SPRN_TAR, user_tar); - } - mtspr(SPRN_VRSAVE, user_vrsave); - vcpu->arch.state = KVMPPC_VCPU_NOTREADY; atomic_dec(&kvm->arch.vcpus_running); @@ -4861,8 +4873,12 @@ static int kvmppc_core_prepare_memory_region_hv(struct kvm *kvm, unsigned long npages = mem->memory_size >> PAGE_SHIFT; if (change == KVM_MR_CREATE) { - slot->arch.rmap = vzalloc(array_size(npages, - sizeof(*slot->arch.rmap))); + unsigned long size = array_size(npages, sizeof(*slot->arch.rmap)); + + if ((size >> PAGE_SHIFT) > totalram_pages()) + return -ENOMEM; + + slot->arch.rmap = vzalloc(size); if (!slot->arch.rmap) return -ENOMEM; } @@ -5072,6 +5088,8 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu) */ int kvmppc_switch_mmu_to_hpt(struct kvm *kvm) { + unsigned long lpcr, lpcr_mask; + if (nesting_enabled(kvm)) kvmhv_release_all_nested(kvm); kvmppc_rmap_reset(kvm); @@ -5081,8 +5099,13 @@ int kvmppc_switch_mmu_to_hpt(struct kvm *kvm) kvm->arch.radix = 0; spin_unlock(&kvm->mmu_lock); kvmppc_free_radix(kvm); - kvmppc_update_lpcr(kvm, LPCR_VPM1, - LPCR_VPM1 | LPCR_UPRT | LPCR_GTSE | LPCR_HR); + + lpcr = LPCR_VPM1; + lpcr_mask = LPCR_VPM1 | LPCR_UPRT | LPCR_GTSE | LPCR_HR; + if (cpu_has_feature(CPU_FTR_ARCH_31)) + lpcr_mask |= LPCR_HAIL; + kvmppc_update_lpcr(kvm, lpcr, lpcr_mask); + return 0; } @@ -5092,6 +5115,7 @@ int kvmppc_switch_mmu_to_hpt(struct kvm *kvm) */ int kvmppc_switch_mmu_to_radix(struct kvm *kvm) { + unsigned long lpcr, lpcr_mask; int err; err = kvmppc_init_vm_radix(kvm); @@ -5103,8 +5127,17 @@ int kvmppc_switch_mmu_to_radix(struct kvm *kvm) kvm->arch.radix = 1; spin_unlock(&kvm->mmu_lock); kvmppc_free_hpt(&kvm->arch.hpt); - kvmppc_update_lpcr(kvm, LPCR_UPRT | LPCR_GTSE | LPCR_HR, - LPCR_VPM1 | LPCR_UPRT | LPCR_GTSE | LPCR_HR); + + lpcr = LPCR_UPRT | LPCR_GTSE | LPCR_HR; + lpcr_mask = LPCR_VPM1 | LPCR_UPRT | LPCR_GTSE | LPCR_HR; + if (cpu_has_feature(CPU_FTR_ARCH_31)) { + lpcr_mask |= LPCR_HAIL; + if (cpu_has_feature(CPU_FTR_HVMODE) && + (kvm->arch.host_lpcr & LPCR_HAIL)) + lpcr |= LPCR_HAIL; + } + kvmppc_update_lpcr(kvm, lpcr, lpcr_mask); + return 0; } @@ -5126,6 +5159,9 @@ void kvmppc_alloc_host_rm_ops(void) int cpu, core; int size; + if (cpu_has_feature(CPU_FTR_ARCH_300)) + return; + /* Not the first time here ? */ if (kvmppc_host_rm_ops_hv != NULL) return; @@ -5268,6 +5304,10 @@ static int kvmppc_core_init_vm_hv(struct kvm *kvm) kvm->arch.mmu_ready = 1; lpcr &= ~LPCR_VPM1; lpcr |= LPCR_UPRT | LPCR_GTSE | LPCR_HR; + if (cpu_has_feature(CPU_FTR_HVMODE) && + cpu_has_feature(CPU_FTR_ARCH_31) && + (kvm->arch.host_lpcr & LPCR_HAIL)) + lpcr |= LPCR_HAIL; ret = kvmppc_init_vm_radix(kvm); if (ret) { kvmppc_free_lpid(kvm->arch.lpid); @@ -6063,9 +6103,11 @@ static int kvmppc_book3s_init_hv(void) if (r) return r; - r = kvm_init_subcore_bitmap(); - if (r) - return r; + if (!cpu_has_feature(CPU_FTR_ARCH_300)) { + r = kvm_init_subcore_bitmap(); + if (r) + return r; + } /* * We need a way of accessing the XICS interrupt controller, |