diff options
Diffstat (limited to 'arch/powerpc/kvm/book3s_hv.c')
-rw-r--r-- | arch/powerpc/kvm/book3s_hv.c | 146 |
1 files changed, 105 insertions, 41 deletions
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 57d0835e56fd..6ba68dd6190b 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -249,6 +249,7 @@ static void kvmppc_fast_vcpu_kick_hv(struct kvm_vcpu *vcpu) /* * We use the vcpu_load/put functions to measure stolen time. + * * Stolen time is counted as time when either the vcpu is able to * run as part of a virtual core, but the task running the vcore * is preempted or sleeping, or when the vcpu needs something done @@ -278,6 +279,12 @@ static void kvmppc_fast_vcpu_kick_hv(struct kvm_vcpu *vcpu) * lock. The stolen times are measured in units of timebase ticks. * (Note that the != TB_NIL checks below are purely defensive; * they should never fail.) + * + * The POWER9 path is simpler, one vcpu per virtual core so the + * former case does not exist. If a vcpu is preempted when it is + * BUSY_IN_HOST and not ceded or otherwise blocked, then accumulate + * the stolen cycles in busy_stolen. RUNNING is not a preemptible + * state in the P9 path. */ static void kvmppc_core_start_stolen(struct kvmppc_vcore *vc, u64 tb) @@ -311,8 +318,14 @@ static void kvmppc_core_vcpu_load_hv(struct kvm_vcpu *vcpu, int cpu) unsigned long flags; u64 now; - if (cpu_has_feature(CPU_FTR_ARCH_300)) + if (cpu_has_feature(CPU_FTR_ARCH_300)) { + if (vcpu->arch.busy_preempt != TB_NIL) { + WARN_ON_ONCE(vcpu->arch.state != KVMPPC_VCPU_BUSY_IN_HOST); + vc->stolen_tb += mftb() - vcpu->arch.busy_preempt; + vcpu->arch.busy_preempt = TB_NIL; + } return; + } now = mftb(); @@ -340,8 +353,21 @@ static void kvmppc_core_vcpu_put_hv(struct kvm_vcpu *vcpu) unsigned long flags; u64 now; - if (cpu_has_feature(CPU_FTR_ARCH_300)) + if (cpu_has_feature(CPU_FTR_ARCH_300)) { + /* + * In the P9 path, RUNNABLE is not preemptible + * (nor takes host interrupts) + */ + WARN_ON_ONCE(vcpu->arch.state == KVMPPC_VCPU_RUNNABLE); + /* + * Account stolen time when preempted while the vcpu task is + * running in the kernel (but not in qemu, which is INACTIVE). + */ + if (task_is_running(current) && + vcpu->arch.state == KVMPPC_VCPU_BUSY_IN_HOST) + vcpu->arch.busy_preempt = mftb(); return; + } now = mftb(); @@ -707,16 +733,15 @@ static u64 vcore_stolen_time(struct kvmppc_vcore *vc, u64 now) } static void __kvmppc_create_dtl_entry(struct kvm_vcpu *vcpu, + struct lppaca *vpa, unsigned int pcpu, u64 now, unsigned long stolen) { struct dtl_entry *dt; - struct lppaca *vpa; dt = vcpu->arch.dtl_ptr; - vpa = vcpu->arch.vpa.pinned_addr; - if (!dt || !vpa) + if (!dt) return; dt->dispatch_reason = 7; @@ -737,17 +762,23 @@ static void __kvmppc_create_dtl_entry(struct kvm_vcpu *vcpu, /* order writing *dt vs. writing vpa->dtl_idx */ smp_wmb(); vpa->dtl_idx = cpu_to_be64(++vcpu->arch.dtl_index); - vcpu->arch.dtl.dirty = true; + + /* vcpu->arch.dtl.dirty is set by the caller */ } -static void kvmppc_create_dtl_entry(struct kvm_vcpu *vcpu, - struct kvmppc_vcore *vc) +static void kvmppc_update_vpa_dispatch(struct kvm_vcpu *vcpu, + struct kvmppc_vcore *vc) { + struct lppaca *vpa; unsigned long stolen; unsigned long core_stolen; u64 now; unsigned long flags; + vpa = vcpu->arch.vpa.pinned_addr; + if (!vpa) + return; + now = mftb(); core_stolen = vcore_stolen_time(vc, now); @@ -758,7 +789,34 @@ static void kvmppc_create_dtl_entry(struct kvm_vcpu *vcpu, vcpu->arch.busy_stolen = 0; spin_unlock_irqrestore(&vcpu->arch.tbacct_lock, flags); - __kvmppc_create_dtl_entry(vcpu, vc->pcpu, now + vc->tb_offset, stolen); + vpa->enqueue_dispatch_tb = cpu_to_be64(be64_to_cpu(vpa->enqueue_dispatch_tb) + stolen); + + __kvmppc_create_dtl_entry(vcpu, vpa, vc->pcpu, now + vc->tb_offset, stolen); + + vcpu->arch.vpa.dirty = true; +} + +static void kvmppc_update_vpa_dispatch_p9(struct kvm_vcpu *vcpu, + struct kvmppc_vcore *vc, + u64 now) +{ + struct lppaca *vpa; + unsigned long stolen; + unsigned long stolen_delta; + + vpa = vcpu->arch.vpa.pinned_addr; + if (!vpa) + return; + + stolen = vc->stolen_tb; + stolen_delta = stolen - vcpu->arch.stolen_logged; + vcpu->arch.stolen_logged = stolen; + + vpa->enqueue_dispatch_tb = cpu_to_be64(stolen); + + __kvmppc_create_dtl_entry(vcpu, vpa, vc->pcpu, now, stolen_delta); + + vcpu->arch.vpa.dirty = true; } /* See if there is a doorbell interrupt pending for a vcpu */ @@ -2517,10 +2575,24 @@ static int kvmppc_set_one_reg_hv(struct kvm_vcpu *vcpu, u64 id, r = set_vpa(vcpu, &vcpu->arch.dtl, addr, len); break; case KVM_REG_PPC_TB_OFFSET: + { /* round up to multiple of 2^24 */ - vcpu->arch.vcore->tb_offset = - ALIGN(set_reg_val(id, *val), 1UL << 24); + u64 tb_offset = ALIGN(set_reg_val(id, *val), 1UL << 24); + + /* + * Now that we know the timebase offset, update the + * decrementer expiry with a guest timebase value. If + * the userspace does not set DEC_EXPIRY, this ensures + * a migrated vcpu at least starts with an expired + * decrementer, which is better than a large one that + * causes a hang. + */ + if (!vcpu->arch.dec_expires && tb_offset) + vcpu->arch.dec_expires = get_tb() + tb_offset; + + vcpu->arch.vcore->tb_offset = tb_offset; break; + } case KVM_REG_PPC_LPCR: kvmppc_set_lpcr(vcpu, set_reg_val(id, *val), true); break; @@ -3800,7 +3872,7 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc) * kvmppc_core_prepare_to_enter. */ kvmppc_start_thread(vcpu, pvc); - kvmppc_create_dtl_entry(vcpu, pvc); + kvmppc_update_vpa_dispatch(vcpu, pvc); trace_kvm_guest_enter(vcpu); if (!vcpu->arch.ptid) thr0_done = true; @@ -3840,23 +3912,17 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc) for (sub = 0; sub < core_info.n_subcores; ++sub) spin_unlock(&core_info.vc[sub]->lock); - guest_enter_irqoff(); + guest_timing_enter_irqoff(); srcu_idx = srcu_read_lock(&vc->kvm->srcu); + guest_state_enter_irqoff(); this_cpu_disable_ftrace(); - /* - * Interrupts will be enabled once we get into the guest, - * so tell lockdep that we're about to enable interrupts. - */ - trace_hardirqs_on(); - trap = __kvmppc_vcore_entry(); - trace_hardirqs_off(); - this_cpu_enable_ftrace(); + guest_state_exit_irqoff(); srcu_read_unlock(&vc->kvm->srcu, srcu_idx); @@ -3891,11 +3957,10 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc) kvmppc_set_host_core(pcpu); - context_tracking_guest_exit(); if (!vtime_accounting_enabled_this_cpu()) { local_irq_enable(); /* - * Service IRQs here before vtime_account_guest_exit() so any + * Service IRQs here before guest_timing_exit_irqoff() so any * ticks that occurred while running the guest are accounted to * the guest. If vtime accounting is enabled, accounting uses * TB rather than ticks, so it can be done without enabling @@ -3904,7 +3969,7 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc) */ local_irq_disable(); } - vtime_account_guest_exit(); + guest_timing_exit_irqoff(); local_irq_enable(); @@ -4404,7 +4469,7 @@ static int kvmppc_run_vcpu(struct kvm_vcpu *vcpu) if ((vc->vcore_state == VCORE_PIGGYBACK || vc->vcore_state == VCORE_RUNNING) && !VCORE_IS_EXITING(vc)) { - kvmppc_create_dtl_entry(vcpu, vc); + kvmppc_update_vpa_dispatch(vcpu, vc); kvmppc_start_thread(vcpu, vc); trace_kvm_guest_enter(vcpu); } else if (vc->vcore_state == VCORE_SLEEPING) { @@ -4520,7 +4585,6 @@ int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu, u64 time_limit, vc = vcpu->arch.vcore; vcpu->arch.ceded = 0; vcpu->arch.run_task = current; - vcpu->arch.state = KVMPPC_VCPU_RUNNABLE; vcpu->arch.last_inst = KVM_INST_FETCH_FAILED; /* See if the MMU is ready to go */ @@ -4547,6 +4611,8 @@ int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu, u64 time_limit, /* flags save not required, but irq_pmu has no disable/enable API */ powerpc_local_irq_pmu_save(flags); + vcpu->arch.state = KVMPPC_VCPU_RUNNABLE; + if (signal_pending(current)) goto sigpend; if (need_resched() || !kvm->arch.mmu_ready) @@ -4591,47 +4657,44 @@ int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu, u64 time_limit, tb = mftb(); - __kvmppc_create_dtl_entry(vcpu, pcpu, tb + vc->tb_offset, 0); + kvmppc_update_vpa_dispatch_p9(vcpu, vc, tb + vc->tb_offset); trace_kvm_guest_enter(vcpu); - guest_enter_irqoff(); + guest_timing_enter_irqoff(); srcu_idx = srcu_read_lock(&kvm->srcu); + guest_state_enter_irqoff(); this_cpu_disable_ftrace(); - /* Tell lockdep that we're about to enable interrupts */ - trace_hardirqs_on(); - trap = kvmhv_p9_guest_entry(vcpu, time_limit, lpcr, &tb); vcpu->arch.trap = trap; - trace_hardirqs_off(); - this_cpu_enable_ftrace(); + guest_state_exit_irqoff(); srcu_read_unlock(&kvm->srcu, srcu_idx); set_irq_happened(trap); - context_tracking_guest_exit(); + vcpu->cpu = -1; + vcpu->arch.thread_cpu = -1; + vcpu->arch.state = KVMPPC_VCPU_BUSY_IN_HOST; + if (!vtime_accounting_enabled_this_cpu()) { - local_irq_enable(); + powerpc_local_irq_pmu_restore(flags); /* - * Service IRQs here before vtime_account_guest_exit() so any + * Service IRQs here before guest_timing_exit_irqoff() so any * ticks that occurred while running the guest are accounted to * the guest. If vtime accounting is enabled, accounting uses * TB rather than ticks, so it can be done without enabling * interrupts here, which has the problem that it accounts * interrupt processing overhead to the host. */ - local_irq_disable(); + powerpc_local_irq_pmu_save(flags); } - vtime_account_guest_exit(); - - vcpu->cpu = -1; - vcpu->arch.thread_cpu = -1; + guest_timing_exit_irqoff(); powerpc_local_irq_pmu_restore(flags); @@ -4694,6 +4757,7 @@ int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu, u64 time_limit, out: vcpu->cpu = -1; vcpu->arch.thread_cpu = -1; + vcpu->arch.state = KVMPPC_VCPU_BUSY_IN_HOST; powerpc_local_irq_pmu_restore(flags); preempt_enable(); goto done; |