diff options
Diffstat (limited to 'arch/powerpc/kvm')
-rw-r--r-- | arch/powerpc/kvm/book3s_64_mmu_radix.c | 2 | ||||
-rw-r--r-- | arch/powerpc/kvm/book3s_hv.c | 54 | ||||
-rw-r--r-- | arch/powerpc/kvm/book3s_hv_ras.c | 8 | ||||
-rw-r--r-- | arch/powerpc/kvm/book3s_hv_rmhandlers.S | 237 | ||||
-rw-r--r-- | arch/powerpc/kvm/book3s_xive.c | 109 | ||||
-rw-r--r-- | arch/powerpc/kvm/book3s_xive.h | 15 | ||||
-rw-r--r-- | arch/powerpc/kvm/powerpc.c | 14 | ||||
-rw-r--r-- | arch/powerpc/kvm/timing.c | 3 |
8 files changed, 298 insertions, 144 deletions
diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c index 58618f644c56..0c854816e653 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_radix.c +++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c @@ -573,7 +573,7 @@ long kvmppc_hv_get_dirty_log_radix(struct kvm *kvm, j = i + 1; if (npages) { set_dirty_bits(map, i, npages); - i = j + npages; + j = i + npages; } } return 0; diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 2d46037ce936..e5f81fc108e0 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -118,6 +118,9 @@ module_param_cb(h_ipi_redirect, &module_param_ops, &h_ipi_redirect, MODULE_PARM_DESC(h_ipi_redirect, "Redirect H_IPI wakeup to a free host core"); #endif +/* If set, the threads on each CPU core have to be in the same MMU mode */ +static bool no_mixing_hpt_and_radix; + static void kvmppc_end_cede(struct kvm_vcpu *vcpu); static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu); @@ -1497,6 +1500,10 @@ static int kvmppc_get_one_reg_hv(struct kvm_vcpu *vcpu, u64 id, case KVM_REG_PPC_ARCH_COMPAT: *val = get_reg_val(id, vcpu->arch.vcore->arch_compat); break; + case KVM_REG_PPC_DEC_EXPIRY: + *val = get_reg_val(id, vcpu->arch.dec_expires + + vcpu->arch.vcore->tb_offset); + break; default: r = -EINVAL; break; @@ -1724,6 +1731,10 @@ static int kvmppc_set_one_reg_hv(struct kvm_vcpu *vcpu, u64 id, case KVM_REG_PPC_ARCH_COMPAT: r = kvmppc_set_arch_compat(vcpu, set_reg_val(id, *val)); break; + case KVM_REG_PPC_DEC_EXPIRY: + vcpu->arch.dec_expires = set_reg_val(id, *val) - + vcpu->arch.vcore->tb_offset; + break; default: r = -EINVAL; break; @@ -2378,8 +2389,8 @@ static void init_core_info(struct core_info *cip, struct kvmppc_vcore *vc) static bool subcore_config_ok(int n_subcores, int n_threads) { /* - * POWER9 "SMT4" cores are permanently in what is effectively a 4-way split-core - * mode, with one thread per subcore. + * POWER9 "SMT4" cores are permanently in what is effectively a 4-way + * split-core mode, with one thread per subcore. */ if (cpu_has_feature(CPU_FTR_ARCH_300)) return n_subcores <= 4 && n_threads == 1; @@ -2415,8 +2426,8 @@ static bool can_dynamic_split(struct kvmppc_vcore *vc, struct core_info *cip) if (!cpu_has_feature(CPU_FTR_ARCH_207S)) return false; - /* POWER9 currently requires all threads to be in the same MMU mode */ - if (cpu_has_feature(CPU_FTR_ARCH_300) && + /* Some POWER9 chips require all threads to be in the same MMU mode */ + if (no_mixing_hpt_and_radix && kvm_is_radix(vc->kvm) != kvm_is_radix(cip->vc[0]->kvm)) return false; @@ -2679,9 +2690,11 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc) * threads are offline. Also check if the number of threads in this * guest are greater than the current system threads per guest. * On POWER9, we need to be not in independent-threads mode if - * this is a HPT guest on a radix host. + * this is a HPT guest on a radix host machine where the + * CPU threads may not be in different MMU modes. */ - hpt_on_radix = radix_enabled() && !kvm_is_radix(vc->kvm); + hpt_on_radix = no_mixing_hpt_and_radix && radix_enabled() && + !kvm_is_radix(vc->kvm); if (((controlled_threads > 1) && ((vc->num_threads > threads_per_subcore) || !on_primary_thread())) || (hpt_on_radix && vc->kvm->arch.threads_indep)) { @@ -2831,7 +2844,6 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc) */ if (!thr0_done) kvmppc_start_thread(NULL, pvc); - thr += pvc->num_threads; } /* @@ -2987,7 +2999,7 @@ static inline bool xive_interrupt_pending(struct kvm_vcpu *vcpu) { if (!xive_enabled()) return false; - return vcpu->arch.xive_saved_state.pipr < + return vcpu->arch.irq_pending || vcpu->arch.xive_saved_state.pipr < vcpu->arch.xive_saved_state.cppr; } #else @@ -3176,17 +3188,8 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) * this thread straight away and have it join in. */ if (!signal_pending(current)) { - if (vc->vcore_state == VCORE_PIGGYBACK) { - if (spin_trylock(&vc->lock)) { - if (vc->vcore_state == VCORE_RUNNING && - !VCORE_IS_EXITING(vc)) { - kvmppc_create_dtl_entry(vcpu, vc); - kvmppc_start_thread(vcpu, vc); - trace_kvm_guest_enter(vcpu); - } - spin_unlock(&vc->lock); - } - } else if (vc->vcore_state == VCORE_RUNNING && + if ((vc->vcore_state == VCORE_PIGGYBACK || + vc->vcore_state == VCORE_RUNNING) && !VCORE_IS_EXITING(vc)) { kvmppc_create_dtl_entry(vcpu, vc); kvmppc_start_thread(vcpu, vc); @@ -4448,6 +4451,19 @@ static int kvmppc_book3s_init_hv(void) if (kvmppc_radix_possible()) r = kvmppc_radix_init(); + + /* + * POWER9 chips before version 2.02 can't have some threads in + * HPT mode and some in radix mode on the same core. + */ + if (cpu_has_feature(CPU_FTR_ARCH_300)) { + unsigned int pvr = mfspr(SPRN_PVR); + if ((pvr >> 16) == PVR_POWER9 && + (((pvr & 0xe000) == 0 && (pvr & 0xfff) < 0x202) || + ((pvr & 0xe000) == 0x2000 && (pvr & 0xfff) < 0x101))) + no_mixing_hpt_and_radix = true; + } + return r; } diff --git a/arch/powerpc/kvm/book3s_hv_ras.c b/arch/powerpc/kvm/book3s_hv_ras.c index c356f9a40b24..c296343d0dcc 100644 --- a/arch/powerpc/kvm/book3s_hv_ras.c +++ b/arch/powerpc/kvm/book3s_hv_ras.c @@ -268,17 +268,19 @@ static void kvmppc_tb_resync_done(void) * secondary threads to proceed. * - All secondary threads will eventually call opal hmi handler on * their exit path. + * + * Returns 1 if the timebase offset should be applied, 0 if not. */ long kvmppc_realmode_hmi_handler(void) { - int ptid = local_paca->kvm_hstate.ptid; bool resync_req; - /* This is only called on primary thread. */ - BUG_ON(ptid != 0); __this_cpu_inc(irq_stat.hmi_exceptions); + if (hmi_handle_debugtrig(NULL) >= 0) + return 1; + /* * By now primary thread has already completed guest->host * partition switch but haven't signaled secondaries yet. diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index 9c61f736c75b..b64f10a5f5e7 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -617,13 +617,6 @@ kvmppc_hv_entry: lbz r0, KVM_RADIX(r9) cmpwi cr7, r0, 0 - /* Clear out SLB if hash */ - bne cr7, 2f - li r6,0 - slbmte r6,r6 - slbia - ptesync -2: /* * POWER7/POWER8 host -> guest partition switch code. * We don't have to lock against concurrent tlbies, @@ -738,19 +731,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) 10: cmpdi r4, 0 beq kvmppc_primary_no_guest kvmppc_got_guest: - - /* Load up guest SLB entries (N.B. slb_max will be 0 for radix) */ - lwz r5,VCPU_SLB_MAX(r4) - cmpwi r5,0 - beq 9f - mtctr r5 - addi r6,r4,VCPU_SLB -1: ld r8,VCPU_SLB_E(r6) - ld r9,VCPU_SLB_V(r6) - slbmte r9,r8 - addi r6,r6,VCPU_SLB_SIZE - bdnz 1b -9: /* Increment yield count if they have a VPA */ ld r3, VCPU_VPA(r4) cmpdi r3, 0 @@ -957,7 +937,6 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300) mftb r7 subf r3,r7,r8 mtspr SPRN_DEC,r3 - std r3,VCPU_DEC(r4) ld r5, VCPU_SPRG0(r4) ld r6, VCPU_SPRG1(r4) @@ -1018,6 +997,29 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300) cmpdi r3, 512 /* 1 microsecond */ blt hdec_soon + /* For hash guest, clear out and reload the SLB */ + ld r6, VCPU_KVM(r4) + lbz r0, KVM_RADIX(r6) + cmpwi r0, 0 + bne 9f + li r6, 0 + slbmte r6, r6 + slbia + ptesync + + /* Load up guest SLB entries (N.B. slb_max will be 0 for radix) */ + lwz r5,VCPU_SLB_MAX(r4) + cmpwi r5,0 + beq 9f + mtctr r5 + addi r6,r4,VCPU_SLB +1: ld r8,VCPU_SLB_E(r6) + ld r9,VCPU_SLB_V(r6) + slbmte r9,r8 + addi r6,r6,VCPU_SLB_SIZE + bdnz 1b +9: + #ifdef CONFIG_KVM_XICS /* We are entering the guest on that thread, push VCPU to XIVE */ ld r10, HSTATE_XIVE_TIMA_PHYS(r13) @@ -1031,8 +1033,53 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300) li r9, TM_QW1_OS + TM_WORD2 stwcix r11,r9,r10 li r9, 1 - stw r9, VCPU_XIVE_PUSHED(r4) + stb r9, VCPU_XIVE_PUSHED(r4) eieio + + /* + * We clear the irq_pending flag. There is a small chance of a + * race vs. the escalation interrupt happening on another + * processor setting it again, but the only consequence is to + * cause a spurrious wakeup on the next H_CEDE which is not an + * issue. + */ + li r0,0 + stb r0, VCPU_IRQ_PENDING(r4) + + /* + * In single escalation mode, if the escalation interrupt is + * on, we mask it. + */ + lbz r0, VCPU_XIVE_ESC_ON(r4) + cmpwi r0,0 + beq 1f + ld r10, VCPU_XIVE_ESC_RADDR(r4) + li r9, XIVE_ESB_SET_PQ_01 + ldcix r0, r10, r9 + sync + + /* We have a possible subtle race here: The escalation interrupt might + * have fired and be on its way to the host queue while we mask it, + * and if we unmask it early enough (re-cede right away), there is + * a theorical possibility that it fires again, thus landing in the + * target queue more than once which is a big no-no. + * + * Fortunately, solving this is rather easy. If the above load setting + * PQ to 01 returns a previous value where P is set, then we know the + * escalation interrupt is somewhere on its way to the host. In that + * case we simply don't clear the xive_esc_on flag below. It will be + * eventually cleared by the handler for the escalation interrupt. + * + * Then, when doing a cede, we check that flag again before re-enabling + * the escalation interrupt, and if set, we abort the cede. + */ + andi. r0, r0, XIVE_ESB_VAL_P + bne- 1f + + /* Now P is 0, we can clear the flag */ + li r0, 0 + stb r0, VCPU_XIVE_ESC_ON(r4) +1: no_xive: #endif /* CONFIG_KVM_XICS */ @@ -1193,7 +1240,7 @@ hdec_soon: addi r3, r4, VCPU_TB_RMEXIT bl kvmhv_accumulate_time #endif - b guest_exit_cont + b guest_bypass /****************************************************************************** * * @@ -1423,15 +1470,35 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) blt deliver_guest_interrupt guest_exit_cont: /* r9 = vcpu, r12 = trap, r13 = paca */ + /* Save more register state */ + mfdar r6 + mfdsisr r7 + std r6, VCPU_DAR(r9) + stw r7, VCPU_DSISR(r9) + /* don't overwrite fault_dar/fault_dsisr if HDSI */ + cmpwi r12,BOOK3S_INTERRUPT_H_DATA_STORAGE + beq mc_cont + std r6, VCPU_FAULT_DAR(r9) + stw r7, VCPU_FAULT_DSISR(r9) + + /* See if it is a machine check */ + cmpwi r12, BOOK3S_INTERRUPT_MACHINE_CHECK + beq machine_check_realmode +mc_cont: +#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING + addi r3, r9, VCPU_TB_RMEXIT + mr r4, r9 + bl kvmhv_accumulate_time +#endif #ifdef CONFIG_KVM_XICS /* We are exiting, pull the VP from the XIVE */ - lwz r0, VCPU_XIVE_PUSHED(r9) + lbz r0, VCPU_XIVE_PUSHED(r9) cmpwi cr0, r0, 0 beq 1f li r7, TM_SPC_PULL_OS_CTX li r6, TM_QW1_OS mfmsr r0 - andi. r0, r0, MSR_IR /* in real mode? */ + andi. r0, r0, MSR_DR /* in real mode? */ beq 2f ld r10, HSTATE_XIVE_TIMA_VIRT(r13) cmpldi cr0, r10, 0 @@ -1454,33 +1521,42 @@ guest_exit_cont: /* r9 = vcpu, r12 = trap, r13 = paca */ /* Fixup some of the state for the next load */ li r10, 0 li r0, 0xff - stw r10, VCPU_XIVE_PUSHED(r9) + stb r10, VCPU_XIVE_PUSHED(r9) stb r10, (VCPU_XIVE_SAVED_STATE+3)(r9) stb r0, (VCPU_XIVE_SAVED_STATE+4)(r9) eieio 1: #endif /* CONFIG_KVM_XICS */ - /* Save more register state */ - mfdar r6 - mfdsisr r7 - std r6, VCPU_DAR(r9) - stw r7, VCPU_DSISR(r9) - /* don't overwrite fault_dar/fault_dsisr if HDSI */ - cmpwi r12,BOOK3S_INTERRUPT_H_DATA_STORAGE - beq mc_cont - std r6, VCPU_FAULT_DAR(r9) - stw r7, VCPU_FAULT_DSISR(r9) - /* See if it is a machine check */ - cmpwi r12, BOOK3S_INTERRUPT_MACHINE_CHECK - beq machine_check_realmode -mc_cont: -#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING - addi r3, r9, VCPU_TB_RMEXIT - mr r4, r9 - bl kvmhv_accumulate_time -#endif + /* For hash guest, read the guest SLB and save it away */ + ld r5, VCPU_KVM(r9) + lbz r0, KVM_RADIX(r5) + li r5, 0 + cmpwi r0, 0 + bne 3f /* for radix, save 0 entries */ + lwz r0,VCPU_SLB_NR(r9) /* number of entries in SLB */ + mtctr r0 + li r6,0 + addi r7,r9,VCPU_SLB +1: slbmfee r8,r6 + andis. r0,r8,SLB_ESID_V@h + beq 2f + add r8,r8,r6 /* put index in */ + slbmfev r3,r6 + std r8,VCPU_SLB_E(r7) + std r3,VCPU_SLB_V(r7) + addi r7,r7,VCPU_SLB_SIZE + addi r5,r5,1 +2: addi r6,r6,1 + bdnz 1b + /* Finally clear out the SLB */ + li r0,0 + slbmte r0,r0 + slbia + ptesync +3: stw r5,VCPU_SLB_MAX(r9) +guest_bypass: mr r3, r12 /* Increment exit count, poke other threads to exit */ bl kvmhv_commence_exit @@ -1501,31 +1577,6 @@ mc_cont: ori r6,r6,1 mtspr SPRN_CTRLT,r6 4: - /* Check if we are running hash or radix and store it in cr2 */ - ld r5, VCPU_KVM(r9) - lbz r0, KVM_RADIX(r5) - cmpwi cr2,r0,0 - - /* Read the guest SLB and save it away */ - li r5, 0 - bne cr2, 3f /* for radix, save 0 entries */ - lwz r0,VCPU_SLB_NR(r9) /* number of entries in SLB */ - mtctr r0 - li r6,0 - addi r7,r9,VCPU_SLB -1: slbmfee r8,r6 - andis. r0,r8,SLB_ESID_V@h - beq 2f - add r8,r8,r6 /* put index in */ - slbmfev r3,r6 - std r8,VCPU_SLB_E(r7) - std r3,VCPU_SLB_V(r7) - addi r7,r7,VCPU_SLB_SIZE - addi r5,r5,1 -2: addi r6,r6,1 - bdnz 1b -3: stw r5,VCPU_SLB_MAX(r9) - /* * Save the guest PURR/SPURR */ @@ -1803,7 +1854,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) ld r5, VCPU_KVM(r9) lbz r0, KVM_RADIX(r5) cmpwi cr2, r0, 0 - beq cr2, 3f + beq cr2, 4f /* Radix: Handle the case where the guest used an illegal PID */ LOAD_REG_ADDR(r4, mmu_base_pid) @@ -1839,15 +1890,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) BEGIN_FTR_SECTION PPC_INVALIDATE_ERAT END_FTR_SECTION_IFSET(CPU_FTR_POWER9_DD1) - b 4f +4: #endif /* CONFIG_PPC_RADIX_MMU */ - /* Hash: clear out SLB */ -3: li r5,0 - slbmte r5,r5 - slbia - ptesync -4: /* * POWER7/POWER8 guest -> host partition switch code. * We don't have to lock against tlbies but we do @@ -1908,16 +1953,17 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) bne 27f bl kvmppc_realmode_hmi_handler nop + cmpdi r3, 0 li r12, BOOK3S_INTERRUPT_HMI /* - * At this point kvmppc_realmode_hmi_handler would have resync-ed - * the TB. Hence it is not required to subtract guest timebase - * offset from timebase. So, skip it. + * At this point kvmppc_realmode_hmi_handler may have resync-ed + * the TB, and if it has, we must not subtract the guest timebase + * offset from the timebase. So, skip it. * * Also, do not call kvmppc_subcore_exit_guest() because it has * been invoked as part of kvmppc_realmode_hmi_handler(). */ - b 30f + beq 30f 27: /* Subtract timebase offset from timebase */ @@ -2744,7 +2790,32 @@ kvm_cede_prodded: /* we've ceded but we want to give control to the host */ kvm_cede_exit: ld r9, HSTATE_KVM_VCPU(r13) - b guest_exit_cont +#ifdef CONFIG_KVM_XICS + /* Abort if we still have a pending escalation */ + lbz r5, VCPU_XIVE_ESC_ON(r9) + cmpwi r5, 0 + beq 1f + li r0, 0 + stb r0, VCPU_CEDED(r9) +1: /* Enable XIVE escalation */ + li r5, XIVE_ESB_SET_PQ_00 + mfmsr r0 + andi. r0, r0, MSR_DR /* in real mode? */ + beq 1f + ld r10, VCPU_XIVE_ESC_VADDR(r9) + cmpdi r10, 0 + beq 3f + ldx r0, r10, r5 + b 2f +1: ld r10, VCPU_XIVE_ESC_RADDR(r9) + cmpdi r10, 0 + beq 3f + ldcix r0, r10, r5 +2: sync + li r0, 1 + stb r0, VCPU_XIVE_ESC_ON(r9) +#endif /* CONFIG_KVM_XICS */ +3: b guest_exit_cont /* Try to handle a machine check in real mode */ machine_check_realmode: diff --git a/arch/powerpc/kvm/book3s_xive.c b/arch/powerpc/kvm/book3s_xive.c index 0d750d274c4e..badfdbb857a2 100644 --- a/arch/powerpc/kvm/book3s_xive.c +++ b/arch/powerpc/kvm/book3s_xive.c @@ -84,12 +84,22 @@ static irqreturn_t xive_esc_irq(int irq, void *data) { struct kvm_vcpu *vcpu = data; - /* We use the existing H_PROD mechanism to wake up the target */ - vcpu->arch.prodded = 1; + vcpu->arch.irq_pending = 1; smp_mb(); if (vcpu->arch.ceded) kvmppc_fast_vcpu_kick(vcpu); + /* Since we have the no-EOI flag, the interrupt is effectively + * disabled now. Clearing xive_esc_on means we won't bother + * doing so on the next entry. + * + * This also allows the entry code to know that if a PQ combination + * of 10 is observed while xive_esc_on is true, it means the queue + * contains an unprocessed escalation interrupt. We don't make use of + * that knowledge today but might (see comment in book3s_hv_rmhandler.S) + */ + vcpu->arch.xive_esc_on = false; + return IRQ_HANDLED; } @@ -112,19 +122,21 @@ static int xive_attach_escalation(struct kvm_vcpu *vcpu, u8 prio) return -EIO; } - /* - * Future improvement: start with them disabled - * and handle DD2 and later scheme of merged escalation - * interrupts - */ - name = kasprintf(GFP_KERNEL, "kvm-%d-%d-%d", - vcpu->kvm->arch.lpid, xc->server_num, prio); + if (xc->xive->single_escalation) + name = kasprintf(GFP_KERNEL, "kvm-%d-%d", + vcpu->kvm->arch.lpid, xc->server_num); + else + name = kasprintf(GFP_KERNEL, "kvm-%d-%d-%d", + vcpu->kvm->arch.lpid, xc->server_num, prio); if (!name) { pr_err("Failed to allocate escalation irq name for queue %d of VCPU %d\n", prio, xc->server_num); rc = -ENOMEM; goto error; } + + pr_devel("Escalation %s irq %d (prio %d)\n", name, xc->esc_virq[prio], prio); + rc = request_irq(xc->esc_virq[prio], xive_esc_irq, IRQF_NO_THREAD, name, vcpu); if (rc) { @@ -133,6 +145,25 @@ static int xive_attach_escalation(struct kvm_vcpu *vcpu, u8 prio) goto error; } xc->esc_virq_names[prio] = name; + + /* In single escalation mode, we grab the ESB MMIO of the + * interrupt and mask it. Also populate the VCPU v/raddr + * of the ESB page for use by asm entry/exit code. Finally + * set the XIVE_IRQ_NO_EOI flag which will prevent the + * core code from performing an EOI on the escalation + * interrupt, thus leaving it effectively masked after + * it fires once. + */ + if (xc->xive->single_escalation) { + struct irq_data *d = irq_get_irq_data(xc->esc_virq[prio]); + struct xive_irq_data *xd = irq_data_get_irq_handler_data(d); + + xive_vm_esb_load(xd, XIVE_ESB_SET_PQ_01); + vcpu->arch.xive_esc_raddr = xd->eoi_page; + vcpu->arch.xive_esc_vaddr = (__force u64)xd->eoi_mmio; + xd->flags |= XIVE_IRQ_NO_EOI; + } + return 0; error: irq_dispose_mapping(xc->esc_virq[prio]); @@ -191,12 +222,12 @@ static int xive_check_provisioning(struct kvm *kvm, u8 prio) pr_devel("Provisioning prio... %d\n", prio); - /* Provision each VCPU and enable escalations */ + /* Provision each VCPU and enable escalations if needed */ kvm_for_each_vcpu(i, vcpu, kvm) { if (!vcpu->arch.xive_vcpu) continue; rc = xive_provision_queue(vcpu, prio); - if (rc == 0) + if (rc == 0 && !xive->single_escalation) xive_attach_escalation(vcpu, prio); if (rc) return rc; @@ -1082,6 +1113,7 @@ int kvmppc_xive_connect_vcpu(struct kvm_device *dev, /* Allocate IPI */ xc->vp_ipi = xive_native_alloc_irq(); if (!xc->vp_ipi) { + pr_err("Failed to allocate xive irq for VCPU IPI\n"); r = -EIO; goto bail; } @@ -1092,18 +1124,33 @@ int kvmppc_xive_connect_vcpu(struct kvm_device *dev, goto bail; /* + * Enable the VP first as the single escalation mode will + * affect escalation interrupts numbering + */ + r = xive_native_enable_vp(xc->vp_id, xive->single_escalation); + if (r) { + pr_err("Failed to enable VP in OPAL, err %d\n", r); + goto bail; + } + + /* * Initialize queues. Initially we set them all for no queueing * and we enable escalation for queue 0 only which we'll use for * our mfrr change notifications. If the VCPU is hot-plugged, we - * do handle provisioning however. + * do handle provisioning however based on the existing "map" + * of enabled queues. */ for (i = 0; i < KVMPPC_XIVE_Q_COUNT; i++) { struct xive_q *q = &xc->queues[i]; + /* Single escalation, no queue 7 */ + if (i == 7 && xive->single_escalation) + break; + /* Is queue already enabled ? Provision it */ if (xive->qmap & (1 << i)) { r = xive_provision_queue(vcpu, i); - if (r == 0) + if (r == 0 && !xive->single_escalation) xive_attach_escalation(vcpu, i); if (r) goto bail; @@ -1123,11 +1170,6 @@ int kvmppc_xive_connect_vcpu(struct kvm_device *dev, if (r) goto bail; - /* Enable the VP */ - r = xive_native_enable_vp(xc->vp_id); - if (r) - goto bail; - /* Route the IPI */ r = xive_native_configure_irq(xc->vp_ipi, xc->vp_id, 0, XICS_IPI); if (!r) @@ -1474,6 +1516,7 @@ static int xive_set_source(struct kvmppc_xive *xive, long irq, u64 addr) pr_devel(" val=0x016%llx (server=0x%x, guest_prio=%d)\n", val, server, guest_prio); + /* * If the source doesn't already have an IPI, allocate * one and get the corresponding data @@ -1762,6 +1805,8 @@ static int kvmppc_xive_create(struct kvm_device *dev, u32 type) if (xive->vp_base == XIVE_INVALID_VP) ret = -ENOMEM; + xive->single_escalation = xive_native_has_single_escalation(); + if (ret) { kfree(xive); return ret; @@ -1795,6 +1840,7 @@ static int xive_debug_show(struct seq_file *m, void *private) kvm_for_each_vcpu(i, vcpu, kvm) { struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu; + unsigned int i; if (!xc) continue; @@ -1804,6 +1850,33 @@ static int xive_debug_show(struct seq_file *m, void *private) xc->server_num, xc->cppr, xc->hw_cppr, xc->mfrr, xc->pending, xc->stat_rm_h_xirr, xc->stat_vm_h_xirr); + for (i = 0; i < KVMPPC_XIVE_Q_COUNT; i++) { + struct xive_q *q = &xc->queues[i]; + u32 i0, i1, idx; + + if (!q->qpage && !xc->esc_virq[i]) + continue; + + seq_printf(m, " [q%d]: ", i); + + if (q->qpage) { + idx = q->idx; + i0 = be32_to_cpup(q->qpage + idx); + idx = (idx + 1) & q->msk; + i1 = be32_to_cpup(q->qpage + idx); + seq_printf(m, "T=%d %08x %08x... \n", q->toggle, i0, i1); + } + if (xc->esc_virq[i]) { + struct irq_data *d = irq_get_irq_data(xc->esc_virq[i]); + struct xive_irq_data *xd = irq_data_get_irq_handler_data(d); + u64 pq = xive_vm_esb_load(xd, XIVE_ESB_GET); + seq_printf(m, "E:%c%c I(%d:%llx:%llx)", + (pq & XIVE_ESB_VAL_P) ? 'P' : 'p', + (pq & XIVE_ESB_VAL_Q) ? 'Q' : 'q', + xc->esc_virq[i], pq, xd->eoi_page); + seq_printf(m, "\n"); + } + } t_rm_h_xirr += xc->stat_rm_h_xirr; t_rm_h_ipoll += xc->stat_rm_h_ipoll; diff --git a/arch/powerpc/kvm/book3s_xive.h b/arch/powerpc/kvm/book3s_xive.h index 6ba63f8e8a61..a08ae6fd4c51 100644 --- a/arch/powerpc/kvm/book3s_xive.h +++ b/arch/powerpc/kvm/book3s_xive.h @@ -120,6 +120,8 @@ struct kvmppc_xive { u32 q_order; u32 q_page_order; + /* Flags */ + u8 single_escalation; }; #define KVMPPC_XIVE_Q_COUNT 8 @@ -201,25 +203,20 @@ static inline struct kvmppc_xive_src_block *kvmppc_xive_find_source(struct kvmpp * is as follow. * * Guest request for 0...6 are honored. Guest request for anything - * higher results in a priority of 7 being applied. - * - * However, when XIRR is returned via H_XIRR, 7 is translated to 0xb - * in order to match AIX expectations + * higher results in a priority of 6 being applied. * * Similar mapping is done for CPPR values */ static inline u8 xive_prio_from_guest(u8 prio) { - if (prio == 0xff || prio < 8) + if (prio == 0xff || prio < 6) return prio; - return 7; + return 6; } static inline u8 xive_prio_to_guest(u8 prio) { - if (prio == 0xff || prio < 7) - return prio; - return 0xb; + return prio; } static inline u32 __xive_read_eq(__be32 *qpage, u32 msk, u32 *idx, u32 *toggle) diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 545a230f675f..748562ec9a04 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -763,7 +763,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) hrtimer_init(&vcpu->arch.dec_timer, CLOCK_REALTIME, HRTIMER_MODE_ABS); vcpu->arch.dec_timer.function = kvmppc_decrementer_wakeup; - vcpu->arch.dec_expires = ~(u64)0; + vcpu->arch.dec_expires = get_tb(); #ifdef CONFIG_KVM_EXIT_TIMING mutex_init(&vcpu->arch.exit_timing_lock); @@ -1106,11 +1106,9 @@ int kvmppc_handle_vsx_load(struct kvm_run *run, struct kvm_vcpu *vcpu, { enum emulation_result emulated = EMULATE_DONE; - /* Currently, mmio_vsx_copy_nums only allowed to be less than 4 */ - if ( (vcpu->arch.mmio_vsx_copy_nums > 4) || - (vcpu->arch.mmio_vsx_copy_nums < 0) ) { + /* Currently, mmio_vsx_copy_nums only allowed to be 4 or less */ + if (vcpu->arch.mmio_vsx_copy_nums > 4) return EMULATE_FAIL; - } while (vcpu->arch.mmio_vsx_copy_nums) { emulated = __kvmppc_handle_load(run, vcpu, rt, bytes, @@ -1252,11 +1250,9 @@ int kvmppc_handle_vsx_store(struct kvm_run *run, struct kvm_vcpu *vcpu, vcpu->arch.io_gpr = rs; - /* Currently, mmio_vsx_copy_nums only allowed to be less than 4 */ - if ( (vcpu->arch.mmio_vsx_copy_nums > 4) || - (vcpu->arch.mmio_vsx_copy_nums < 0) ) { + /* Currently, mmio_vsx_copy_nums only allowed to be 4 or less */ + if (vcpu->arch.mmio_vsx_copy_nums > 4) return EMULATE_FAIL; - } while (vcpu->arch.mmio_vsx_copy_nums) { if (kvmppc_get_vsr_data(vcpu, rs, &val) == -1) diff --git a/arch/powerpc/kvm/timing.c b/arch/powerpc/kvm/timing.c index e44d2b2ea97e..1c03c978eb18 100644 --- a/arch/powerpc/kvm/timing.c +++ b/arch/powerpc/kvm/timing.c @@ -143,8 +143,7 @@ static int kvmppc_exit_timing_show(struct seq_file *m, void *private) int i; u64 min, max, sum, sum_quad; - seq_printf(m, "%s", "type count min max sum sum_squared\n"); - + seq_puts(m, "type count min max sum sum_squared\n"); for (i = 0; i < __NUMBER_OF_KVM_EXIT_TYPES; i++) { |