diff options
Diffstat (limited to 'arch/powerpc/kvm')
-rw-r--r-- | arch/powerpc/kvm/Kconfig | 1 | ||||
-rw-r--r-- | arch/powerpc/kvm/book3s.c | 5 | ||||
-rw-r--r-- | arch/powerpc/kvm/book3s.h | 3 | ||||
-rw-r--r-- | arch/powerpc/kvm/book3s_64_mmu.c | 3 | ||||
-rw-r--r-- | arch/powerpc/kvm/book3s_64_mmu_radix.c | 12 | ||||
-rw-r--r-- | arch/powerpc/kvm/book3s_64_vio.c | 2 | ||||
-rw-r--r-- | arch/powerpc/kvm/book3s_64_vio_hv.c | 11 | ||||
-rw-r--r-- | arch/powerpc/kvm/book3s_hv.c | 126 | ||||
-rw-r--r-- | arch/powerpc/kvm/book3s_hv_builtin.c | 10 | ||||
-rw-r--r-- | arch/powerpc/kvm/book3s_hv_nested.c | 101 | ||||
-rw-r--r-- | arch/powerpc/kvm/book3s_hv_rm_xics.c | 8 | ||||
-rw-r--r-- | arch/powerpc/kvm/book3s_hv_rmhandlers.S | 42 | ||||
-rw-r--r-- | arch/powerpc/kvm/book3s_hv_tm.c | 61 | ||||
-rw-r--r-- | arch/powerpc/kvm/book3s_xics.c | 6 | ||||
-rw-r--r-- | arch/powerpc/kvm/book3s_xive.c | 74 | ||||
-rw-r--r-- | arch/powerpc/kvm/book3s_xive.h | 11 | ||||
-rw-r--r-- | arch/powerpc/kvm/book3s_xive_native.c | 24 | ||||
-rw-r--r-- | arch/powerpc/kvm/booke.c | 5 |
18 files changed, 299 insertions, 206 deletions
diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig index e45644657d49..ff581d70f20c 100644 --- a/arch/powerpc/kvm/Kconfig +++ b/arch/powerpc/kvm/Kconfig @@ -38,7 +38,6 @@ config KVM_BOOK3S_32_HANDLER config KVM_BOOK3S_64_HANDLER bool select KVM_BOOK3S_HANDLER - select PPC_DAWR_FORCE_ENABLE config KVM_BOOK3S_PR_POSSIBLE bool diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c index 79833f78d1da..b785f6772391 100644 --- a/arch/powerpc/kvm/book3s.c +++ b/arch/powerpc/kvm/book3s.c @@ -43,8 +43,6 @@ const struct _kvm_stats_desc kvm_vm_stats_desc[] = { STATS_DESC_ICOUNTER(VM, num_2M_pages), STATS_DESC_ICOUNTER(VM, num_1G_pages) }; -static_assert(ARRAY_SIZE(kvm_vm_stats_desc) == - sizeof(struct kvm_vm_stat) / sizeof(u64)); const struct kvm_stats_header kvm_vm_stats_header = { .name_size = KVM_STATS_NAME_SIZE, @@ -71,7 +69,6 @@ const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = { STATS_DESC_COUNTER(VCPU, emulated_inst_exits), STATS_DESC_COUNTER(VCPU, dec_exits), STATS_DESC_COUNTER(VCPU, ext_intr_exits), - STATS_DESC_TIME_NSEC(VCPU, halt_wait_ns), STATS_DESC_COUNTER(VCPU, halt_successful_wait), STATS_DESC_COUNTER(VCPU, dbell_exits), STATS_DESC_COUNTER(VCPU, gdbell_exits), @@ -88,8 +85,6 @@ const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = { STATS_DESC_COUNTER(VCPU, pthru_host), STATS_DESC_COUNTER(VCPU, pthru_bad_aff) }; -static_assert(ARRAY_SIZE(kvm_vcpu_stats_desc) == - sizeof(struct kvm_vcpu_stat) / sizeof(u64)); const struct kvm_stats_header kvm_vcpu_stats_header = { .name_size = KVM_STATS_NAME_SIZE, diff --git a/arch/powerpc/kvm/book3s.h b/arch/powerpc/kvm/book3s.h index 740e51def5a5..58391b4b32ed 100644 --- a/arch/powerpc/kvm/book3s.h +++ b/arch/powerpc/kvm/book3s.h @@ -23,7 +23,8 @@ extern int kvmppc_core_emulate_mtspr_pr(struct kvm_vcpu *vcpu, extern int kvmppc_core_emulate_mfspr_pr(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val); extern int kvmppc_book3s_init_pr(void); -extern void kvmppc_book3s_exit_pr(void); +void kvmppc_book3s_exit_pr(void); +extern int kvmppc_handle_exit_pr(struct kvm_vcpu *vcpu, unsigned int exit_nr); #ifdef CONFIG_PPC_TRANSACTIONAL_MEM extern void kvmppc_emulate_tabort(struct kvm_vcpu *vcpu, int ra_val); diff --git a/arch/powerpc/kvm/book3s_64_mmu.c b/arch/powerpc/kvm/book3s_64_mmu.c index 26b8b27a3755..feee40cb2ba1 100644 --- a/arch/powerpc/kvm/book3s_64_mmu.c +++ b/arch/powerpc/kvm/book3s_64_mmu.c @@ -196,7 +196,7 @@ static int kvmppc_mmu_book3s_64_xlate(struct kvm_vcpu *vcpu, gva_t eaddr, hva_t ptegp; u64 pteg[16]; u64 avpn = 0; - u64 v, r; + u64 r; u64 v_val, v_mask; u64 eaddr_mask; int i; @@ -285,7 +285,6 @@ do_second: goto do_second; } - v = be64_to_cpu(pteg[i]); r = be64_to_cpu(pteg[i+1]); pp = (r & HPTE_R_PP) | key; if (r & HPTE_R_PP0) diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c index b5905ae4377c..16359525a40f 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_radix.c +++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c @@ -44,6 +44,9 @@ unsigned long __kvmhv_copy_tofrom_guest_radix(int lpid, int pid, (to != NULL) ? __pa(to): 0, (from != NULL) ? __pa(from): 0, n); + if (eaddr & (0xFFFUL << 52)) + return ret; + quadrant = 1; if (!pid) quadrant = 2; @@ -65,10 +68,12 @@ unsigned long __kvmhv_copy_tofrom_guest_radix(int lpid, int pid, } isync(); + pagefault_disable(); if (is_load) - ret = copy_from_user_nofault(to, (const void __user *)from, n); + ret = __copy_from_user_inatomic(to, (const void __user *)from, n); else - ret = copy_to_user_nofault((void __user *)to, from, n); + ret = __copy_to_user_inatomic((void __user *)to, from, n); + pagefault_enable(); /* switch the pid first to avoid running host with unallocated pid */ if (quadrant == 1 && pid != old_pid) @@ -81,7 +86,6 @@ unsigned long __kvmhv_copy_tofrom_guest_radix(int lpid, int pid, return ret; } -EXPORT_SYMBOL_GPL(__kvmhv_copy_tofrom_guest_radix); static long kvmhv_copy_tofrom_guest_radix(struct kvm_vcpu *vcpu, gva_t eaddr, void *to, void *from, unsigned long n) @@ -117,14 +121,12 @@ long kvmhv_copy_from_guest_radix(struct kvm_vcpu *vcpu, gva_t eaddr, void *to, return ret; } -EXPORT_SYMBOL_GPL(kvmhv_copy_from_guest_radix); long kvmhv_copy_to_guest_radix(struct kvm_vcpu *vcpu, gva_t eaddr, void *from, unsigned long n) { return kvmhv_copy_tofrom_guest_radix(vcpu, eaddr, NULL, from, n); } -EXPORT_SYMBOL_GPL(kvmhv_copy_to_guest_radix); int kvmppc_mmu_walk_radix_tree(struct kvm_vcpu *vcpu, gva_t eaddr, struct kvmppc_pte *gpte, u64 root, diff --git a/arch/powerpc/kvm/book3s_64_vio.c b/arch/powerpc/kvm/book3s_64_vio.c index 8da93fdfa59e..6365087f3160 100644 --- a/arch/powerpc/kvm/book3s_64_vio.c +++ b/arch/powerpc/kvm/book3s_64_vio.c @@ -346,7 +346,7 @@ static long kvmppc_tce_to_ua(struct kvm *kvm, unsigned long tce, unsigned long gfn = tce >> PAGE_SHIFT; struct kvm_memory_slot *memslot; - memslot = search_memslots(kvm_memslots(kvm), gfn); + memslot = __gfn_to_memslot(kvm_memslots(kvm), gfn); if (!memslot) return -EINVAL; diff --git a/arch/powerpc/kvm/book3s_64_vio_hv.c b/arch/powerpc/kvm/book3s_64_vio_hv.c index dc6591548f0c..870b7f0c7ea5 100644 --- a/arch/powerpc/kvm/book3s_64_vio_hv.c +++ b/arch/powerpc/kvm/book3s_64_vio_hv.c @@ -80,7 +80,7 @@ static long kvmppc_rm_tce_to_ua(struct kvm *kvm, unsigned long gfn = tce >> PAGE_SHIFT; struct kvm_memory_slot *memslot; - memslot = search_memslots(kvm_memslots_raw(kvm), gfn); + memslot = __gfn_to_memslot(kvm_memslots_raw(kvm), gfn); if (!memslot) return -EINVAL; @@ -173,10 +173,13 @@ static void kvmppc_rm_tce_put(struct kvmppc_spapr_tce_table *stt, idx -= stt->offset; page = stt->pages[idx / TCES_PER_PAGE]; /* - * page must not be NULL in real mode, - * kvmppc_rm_ioba_validate() must have taken care of this. + * kvmppc_rm_ioba_validate() allows pages not be allocated if TCE is + * being cleared, otherwise it returns H_TOO_HARD and we skip this. */ - WARN_ON_ONCE_RM(!page); + if (!page) { + WARN_ON_ONCE_RM(tce != 0); + return; + } tbl = kvmppc_page_address(page); tbl[idx % TCES_PER_PAGE] = tce; diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 085fb8ecbf68..2acb1c96cfaf 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -59,6 +59,7 @@ #include <asm/kvm_book3s.h> #include <asm/mmu_context.h> #include <asm/lppaca.h> +#include <asm/pmc.h> #include <asm/processor.h> #include <asm/cputhreads.h> #include <asm/page.h> @@ -1165,7 +1166,7 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu) break; #endif case H_RANDOM: - if (!powernv_get_random_long(&vcpu->arch.regs.gpr[4])) + if (!arch_get_random_seed_long(&vcpu->arch.regs.gpr[4])) ret = H_HARDWARE; break; case H_RPT_INVALIDATE: @@ -1679,6 +1680,21 @@ static int kvmppc_handle_exit_hv(struct kvm_vcpu *vcpu, r = RESUME_GUEST; } break; + +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM + case BOOK3S_INTERRUPT_HV_SOFTPATCH: + /* + * This occurs for various TM-related instructions that + * we need to emulate on POWER9 DD2.2. We have already + * handled the cases where the guest was in real-suspend + * mode and was transitioning to transactional state. + */ + r = kvmhv_p9_tm_emulation(vcpu); + if (r != -1) + break; + fallthrough; /* go to facility unavailable handler */ +#endif + /* * This occurs if the guest (kernel or userspace), does something that * is prohibited by HFSCR. @@ -1697,18 +1713,6 @@ static int kvmppc_handle_exit_hv(struct kvm_vcpu *vcpu, } break; -#ifdef CONFIG_PPC_TRANSACTIONAL_MEM - case BOOK3S_INTERRUPT_HV_SOFTPATCH: - /* - * This occurs for various TM-related instructions that - * we need to emulate on POWER9 DD2.2. We have already - * handled the cases where the guest was in real-suspend - * mode and was transitioning to transactional state. - */ - r = kvmhv_p9_tm_emulation(vcpu); - break; -#endif - case BOOK3S_INTERRUPT_HV_RM_HARD: r = RESUME_PASSTHROUGH; break; @@ -1727,6 +1731,7 @@ static int kvmppc_handle_exit_hv(struct kvm_vcpu *vcpu, static int kvmppc_handle_nested_exit(struct kvm_vcpu *vcpu) { + struct kvm_nested_guest *nested = vcpu->arch.nested; int r; int srcu_idx; @@ -1811,9 +1816,41 @@ static int kvmppc_handle_nested_exit(struct kvm_vcpu *vcpu) * mode and was transitioning to transactional state. */ r = kvmhv_p9_tm_emulation(vcpu); - break; + if (r != -1) + break; + fallthrough; /* go to facility unavailable handler */ #endif + case BOOK3S_INTERRUPT_H_FAC_UNAVAIL: { + u64 cause = vcpu->arch.hfscr >> 56; + + /* + * Only pass HFU interrupts to the L1 if the facility is + * permitted but disabled by the L1's HFSCR, otherwise + * the interrupt does not make sense to the L1 so turn + * it into a HEAI. + */ + if (!(vcpu->arch.hfscr_permitted & (1UL << cause)) || + (nested->hfscr & (1UL << cause))) { + vcpu->arch.trap = BOOK3S_INTERRUPT_H_EMUL_ASSIST; + + /* + * If the fetch failed, return to guest and + * try executing it again. + */ + r = kvmppc_get_last_inst(vcpu, INST_GENERIC, + &vcpu->arch.emul_inst); + if (r != EMULATE_DONE) + r = RESUME_GUEST; + else + r = RESUME_HOST; + } else { + r = RESUME_HOST; + } + + break; + } + case BOOK3S_INTERRUPT_HV_RM_HARD: vcpu->arch.trap = 0; r = RESUME_GUEST; @@ -2684,6 +2721,7 @@ static int kvmppc_core_vcpu_create_hv(struct kvm_vcpu *vcpu) spin_lock_init(&vcpu->arch.vpa_update_lock); spin_lock_init(&vcpu->arch.tbacct_lock); vcpu->arch.busy_preempt = TB_NIL; + vcpu->arch.shregs.msr = MSR_ME; vcpu->arch.intr_msr = MSR_SF | MSR_ME; /* @@ -2705,6 +2743,8 @@ static int kvmppc_core_vcpu_create_hv(struct kvm_vcpu *vcpu) if (cpu_has_feature(CPU_FTR_TM_COMP)) vcpu->arch.hfscr |= HFSCR_TM; + vcpu->arch.hfscr_permitted = vcpu->arch.hfscr; + kvmppc_mmu_book3s_hv_init(vcpu); vcpu->arch.state = KVMPPC_VCPU_NOTREADY; @@ -3727,7 +3767,6 @@ static void load_spr_state(struct kvm_vcpu *vcpu) mtspr(SPRN_EBBHR, vcpu->arch.ebbhr); mtspr(SPRN_EBBRR, vcpu->arch.ebbrr); mtspr(SPRN_BESCR, vcpu->arch.bescr); - mtspr(SPRN_WORT, vcpu->arch.wort); mtspr(SPRN_TIDR, vcpu->arch.tid); mtspr(SPRN_AMR, vcpu->arch.amr); mtspr(SPRN_UAMOR, vcpu->arch.uamor); @@ -3754,7 +3793,6 @@ static void store_spr_state(struct kvm_vcpu *vcpu) vcpu->arch.ebbhr = mfspr(SPRN_EBBHR); vcpu->arch.ebbrr = mfspr(SPRN_EBBRR); vcpu->arch.bescr = mfspr(SPRN_BESCR); - vcpu->arch.wort = mfspr(SPRN_WORT); vcpu->arch.tid = mfspr(SPRN_TIDR); vcpu->arch.amr = mfspr(SPRN_AMR); vcpu->arch.uamor = mfspr(SPRN_UAMOR); @@ -3786,7 +3824,6 @@ static void restore_p9_host_os_sprs(struct kvm_vcpu *vcpu, struct p9_host_os_sprs *host_os_sprs) { mtspr(SPRN_PSPB, 0); - mtspr(SPRN_WORT, 0); mtspr(SPRN_UAMOR, 0); mtspr(SPRN_DSCR, host_os_sprs->dscr); @@ -3852,6 +3889,18 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit, cpu_has_feature(CPU_FTR_P9_TM_HV_ASSIST)) kvmppc_restore_tm_hv(vcpu, vcpu->arch.shregs.msr, true); +#ifdef CONFIG_PPC_PSERIES + if (kvmhv_on_pseries()) { + barrier(); + if (vcpu->arch.vpa.pinned_addr) { + struct lppaca *lp = vcpu->arch.vpa.pinned_addr; + get_lppaca()->pmcregs_in_use = lp->pmcregs_in_use; + } else { + get_lppaca()->pmcregs_in_use = 1; + } + barrier(); + } +#endif kvmhv_load_guest_pmu(vcpu); msr_check_and_set(MSR_FP | MSR_VEC | MSR_VSX); @@ -3986,6 +4035,13 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit, save_pmu |= nesting_enabled(vcpu->kvm); kvmhv_save_guest_pmu(vcpu, save_pmu); +#ifdef CONFIG_PPC_PSERIES + if (kvmhv_on_pseries()) { + barrier(); + get_lppaca()->pmcregs_in_use = ppc_get_pmu_inuse(); + barrier(); + } +#endif vc->entry_exit_map = 0x101; vc->in_guest = 0; @@ -4146,19 +4202,31 @@ out: /* Attribute wait time */ if (do_sleep) { - vc->runner->stat.halt_wait_ns += + vc->runner->stat.generic.halt_wait_ns += ktime_to_ns(cur) - ktime_to_ns(start_wait); + KVM_STATS_LOG_HIST_UPDATE( + vc->runner->stat.generic.halt_wait_hist, + ktime_to_ns(cur) - ktime_to_ns(start_wait)); /* Attribute failed poll time */ - if (vc->halt_poll_ns) + if (vc->halt_poll_ns) { vc->runner->stat.generic.halt_poll_fail_ns += ktime_to_ns(start_wait) - ktime_to_ns(start_poll); + KVM_STATS_LOG_HIST_UPDATE( + vc->runner->stat.generic.halt_poll_fail_hist, + ktime_to_ns(start_wait) - + ktime_to_ns(start_poll)); + } } else { /* Attribute successful poll time */ - if (vc->halt_poll_ns) + if (vc->halt_poll_ns) { vc->runner->stat.generic.halt_poll_success_ns += ktime_to_ns(cur) - ktime_to_ns(start_poll); + KVM_STATS_LOG_HIST_UPDATE( + vc->runner->stat.generic.halt_poll_success_hist, + ktime_to_ns(cur) - ktime_to_ns(start_poll)); + } } /* Adjust poll time */ @@ -5328,6 +5396,7 @@ static int kvmppc_set_passthru_irq(struct kvm *kvm, int host_irq, int guest_gsi) struct kvmppc_passthru_irqmap *pimap; struct irq_chip *chip; int i, rc = 0; + struct irq_data *host_data; if (!kvm_irq_bypass) return 1; @@ -5355,7 +5424,7 @@ static int kvmppc_set_passthru_irq(struct kvm *kvm, int host_irq, int guest_gsi) * what our real-mode EOI code does, or a XIVE interrupt */ chip = irq_data_get_irq_chip(&desc->irq_data); - if (!chip || !(is_pnv_opal_msi(chip) || is_xive_irq(chip))) { + if (!chip || !is_pnv_opal_msi(chip)) { pr_warn("kvmppc_set_passthru_irq_hv: Could not assign IRQ map for (%d,%d)\n", host_irq, guest_gsi); mutex_unlock(&kvm->lock); @@ -5392,15 +5461,22 @@ static int kvmppc_set_passthru_irq(struct kvm *kvm, int host_irq, int guest_gsi) * the KVM real mode handler. */ smp_wmb(); - irq_map->r_hwirq = desc->irq_data.hwirq; + + /* + * The 'host_irq' number is mapped in the PCI-MSI domain but + * the underlying calls, which will EOI the interrupt in real + * mode, need an HW IRQ number mapped in the XICS IRQ domain. + */ + host_data = irq_domain_get_irq_data(irq_get_default_host(), host_irq); + irq_map->r_hwirq = (unsigned int)irqd_to_hwirq(host_data); if (i == pimap->n_mapped) pimap->n_mapped++; if (xics_on_xive()) - rc = kvmppc_xive_set_mapped(kvm, guest_gsi, desc); + rc = kvmppc_xive_set_mapped(kvm, guest_gsi, host_irq); else - kvmppc_xics_set_mapped(kvm, guest_gsi, desc->irq_data.hwirq); + kvmppc_xics_set_mapped(kvm, guest_gsi, irq_map->r_hwirq); if (rc) irq_map->r_hwirq = 0; @@ -5439,7 +5515,7 @@ static int kvmppc_clr_passthru_irq(struct kvm *kvm, int host_irq, int guest_gsi) } if (xics_on_xive()) - rc = kvmppc_xive_clr_mapped(kvm, guest_gsi, pimap->mapped[i].desc); + rc = kvmppc_xive_clr_mapped(kvm, guest_gsi, host_irq); else kvmppc_xics_clr_mapped(kvm, guest_gsi, pimap->mapped[i].r_hwirq); diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c index be8ef1c5b1bf..fcf4760a3a0e 100644 --- a/arch/powerpc/kvm/book3s_hv_builtin.c +++ b/arch/powerpc/kvm/book3s_hv_builtin.c @@ -137,23 +137,23 @@ long int kvmppc_rm_h_confer(struct kvm_vcpu *vcpu, int target, * exist in the system. We use a counter of VMs to track this. * * One of the operations we need to block is onlining of secondaries, so we - * protect hv_vm_count with get/put_online_cpus(). + * protect hv_vm_count with cpus_read_lock/unlock(). */ static atomic_t hv_vm_count; void kvm_hv_vm_activated(void) { - get_online_cpus(); + cpus_read_lock(); atomic_inc(&hv_vm_count); - put_online_cpus(); + cpus_read_unlock(); } EXPORT_SYMBOL_GPL(kvm_hv_vm_activated); void kvm_hv_vm_deactivated(void) { - get_online_cpus(); + cpus_read_lock(); atomic_dec(&hv_vm_count); - put_online_cpus(); + cpus_read_unlock(); } EXPORT_SYMBOL_GPL(kvm_hv_vm_deactivated); diff --git a/arch/powerpc/kvm/book3s_hv_nested.c b/arch/powerpc/kvm/book3s_hv_nested.c index 898f942eb198..ed8a2c9f5629 100644 --- a/arch/powerpc/kvm/book3s_hv_nested.c +++ b/arch/powerpc/kvm/book3s_hv_nested.c @@ -99,13 +99,12 @@ static void byteswap_hv_regs(struct hv_guest_state *hr) hr->dawrx1 = swab64(hr->dawrx1); } -static void save_hv_return_state(struct kvm_vcpu *vcpu, int trap, +static void save_hv_return_state(struct kvm_vcpu *vcpu, struct hv_guest_state *hr) { struct kvmppc_vcore *vc = vcpu->arch.vcore; hr->dpdes = vc->dpdes; - hr->hfscr = vcpu->arch.hfscr; hr->purr = vcpu->arch.purr; hr->spurr = vcpu->arch.spurr; hr->ic = vcpu->arch.ic; @@ -119,7 +118,7 @@ static void save_hv_return_state(struct kvm_vcpu *vcpu, int trap, hr->pidr = vcpu->arch.pid; hr->cfar = vcpu->arch.cfar; hr->ppr = vcpu->arch.ppr; - switch (trap) { + switch (vcpu->arch.trap) { case BOOK3S_INTERRUPT_H_DATA_STORAGE: hr->hdar = vcpu->arch.fault_dar; hr->hdsisr = vcpu->arch.fault_dsisr; @@ -128,55 +127,17 @@ static void save_hv_return_state(struct kvm_vcpu *vcpu, int trap, case BOOK3S_INTERRUPT_H_INST_STORAGE: hr->asdr = vcpu->arch.fault_gpa; break; + case BOOK3S_INTERRUPT_H_FAC_UNAVAIL: + hr->hfscr = ((~HFSCR_INTR_CAUSE & hr->hfscr) | + (HFSCR_INTR_CAUSE & vcpu->arch.hfscr)); + break; case BOOK3S_INTERRUPT_H_EMUL_ASSIST: hr->heir = vcpu->arch.emul_inst; break; } } -/* - * This can result in some L0 HV register state being leaked to an L1 - * hypervisor when the hv_guest_state is copied back to the guest after - * being modified here. - * - * There is no known problem with such a leak, and in many cases these - * register settings could be derived by the guest by observing behaviour - * and timing, interrupts, etc., but it is an issue to consider. - */ -static void sanitise_hv_regs(struct kvm_vcpu *vcpu, struct hv_guest_state *hr) -{ - struct kvmppc_vcore *vc = vcpu->arch.vcore; - u64 mask; - - /* - * Don't let L1 change LPCR bits for the L2 except these: - */ - mask = LPCR_DPFD | LPCR_ILE | LPCR_TC | LPCR_AIL | LPCR_LD | - LPCR_LPES | LPCR_MER; - - /* - * Additional filtering is required depending on hardware - * and configuration. - */ - hr->lpcr = kvmppc_filter_lpcr_hv(vcpu->kvm, - (vc->lpcr & ~mask) | (hr->lpcr & mask)); - - /* - * Don't let L1 enable features for L2 which we've disabled for L1, - * but preserve the interrupt cause field. - */ - hr->hfscr &= (HFSCR_INTR_CAUSE | vcpu->arch.hfscr); - - /* Don't let data address watchpoint match in hypervisor state */ - hr->dawrx0 &= ~DAWRX_HYP; - hr->dawrx1 &= ~DAWRX_HYP; - - /* Don't let completed instruction address breakpt match in HV state */ - if ((hr->ciabr & CIABR_PRIV) == CIABR_PRIV_HYPER) - hr->ciabr &= ~CIABR_PRIV; -} - -static void restore_hv_regs(struct kvm_vcpu *vcpu, struct hv_guest_state *hr) +static void restore_hv_regs(struct kvm_vcpu *vcpu, const struct hv_guest_state *hr) { struct kvmppc_vcore *vc = vcpu->arch.vcore; @@ -288,6 +249,43 @@ static int kvmhv_write_guest_state_and_regs(struct kvm_vcpu *vcpu, sizeof(struct pt_regs)); } +static void load_l2_hv_regs(struct kvm_vcpu *vcpu, + const struct hv_guest_state *l2_hv, + const struct hv_guest_state *l1_hv, u64 *lpcr) +{ + struct kvmppc_vcore *vc = vcpu->arch.vcore; + u64 mask; + + restore_hv_regs(vcpu, l2_hv); + + /* + * Don't let L1 change LPCR bits for the L2 except these: + */ + mask = LPCR_DPFD | LPCR_ILE | LPCR_TC | LPCR_AIL | LPCR_LD | + LPCR_LPES | LPCR_MER; + + /* + * Additional filtering is required depending on hardware + * and configuration. + */ + *lpcr = kvmppc_filter_lpcr_hv(vcpu->kvm, + (vc->lpcr & ~mask) | (*lpcr & mask)); + + /* + * Don't let L1 enable features for L2 which we don't allow for L1, + * but preserve the interrupt cause field. + */ + vcpu->arch.hfscr = l2_hv->hfscr & (HFSCR_INTR_CAUSE | vcpu->arch.hfscr_permitted); + + /* Don't let data address watchpoint match in hypervisor state */ + vcpu->arch.dawrx0 = l2_hv->dawrx0 & ~DAWRX_HYP; + vcpu->arch.dawrx1 = l2_hv->dawrx1 & ~DAWRX_HYP; + + /* Don't let completed instruction address breakpt match in HV state */ + if ((l2_hv->ciabr & CIABR_PRIV) == CIABR_PRIV_HYPER) + vcpu->arch.ciabr = l2_hv->ciabr & ~CIABR_PRIV; +} + long kvmhv_enter_nested_guest(struct kvm_vcpu *vcpu) { long int err, r; @@ -296,7 +294,7 @@ long kvmhv_enter_nested_guest(struct kvm_vcpu *vcpu) struct hv_guest_state l2_hv = {0}, saved_l1_hv; struct kvmppc_vcore *vc = vcpu->arch.vcore; u64 hv_ptr, regs_ptr; - u64 hdec_exp; + u64 hdec_exp, lpcr; s64 delta_purr, delta_spurr, delta_ic, delta_vtb; if (vcpu->kvm->arch.l1_ptcr == 0) @@ -364,13 +362,14 @@ long kvmhv_enter_nested_guest(struct kvm_vcpu *vcpu) /* set L1 state to L2 state */ vcpu->arch.nested = l2; vcpu->arch.nested_vcpu_id = l2_hv.vcpu_token; + l2->hfscr = l2_hv.hfscr; vcpu->arch.regs = l2_regs; /* Guest must always run with ME enabled, HV disabled. */ vcpu->arch.shregs.msr = (vcpu->arch.regs.msr | MSR_ME) & ~MSR_HV; - sanitise_hv_regs(vcpu, &l2_hv); - restore_hv_regs(vcpu, &l2_hv); + lpcr = l2_hv.lpcr; + load_l2_hv_regs(vcpu, &l2_hv, &saved_l1_hv, &lpcr); vcpu->arch.ret = RESUME_GUEST; vcpu->arch.trap = 0; @@ -380,7 +379,7 @@ long kvmhv_enter_nested_guest(struct kvm_vcpu *vcpu) r = RESUME_HOST; break; } - r = kvmhv_run_single_vcpu(vcpu, hdec_exp, l2_hv.lpcr); + r = kvmhv_run_single_vcpu(vcpu, hdec_exp, lpcr); } while (is_kvmppc_resume_guest(r)); /* save L2 state for return */ @@ -390,7 +389,7 @@ long kvmhv_enter_nested_guest(struct kvm_vcpu *vcpu) delta_spurr = vcpu->arch.spurr - l2_hv.spurr; delta_ic = vcpu->arch.ic - l2_hv.ic; delta_vtb = vc->vtb - l2_hv.vtb; - save_hv_return_state(vcpu, vcpu->arch.trap, &l2_hv); + save_hv_return_state(vcpu, &l2_hv); /* restore L1 state */ vcpu->arch.nested = NULL; diff --git a/arch/powerpc/kvm/book3s_hv_rm_xics.c b/arch/powerpc/kvm/book3s_hv_rm_xics.c index 0a11ec88a0ae..587c33fc4564 100644 --- a/arch/powerpc/kvm/book3s_hv_rm_xics.c +++ b/arch/powerpc/kvm/book3s_hv_rm_xics.c @@ -706,6 +706,7 @@ static int ics_rm_eoi(struct kvm_vcpu *vcpu, u32 irq) icp->rm_eoied_irq = irq; } + /* Handle passthrough interrupts */ if (state->host_irq) { ++vcpu->stat.pthru_all; if (state->intr_cpu != -1) { @@ -759,12 +760,12 @@ int xics_rm_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr) static unsigned long eoi_rc; -static void icp_eoi(struct irq_chip *c, u32 hwirq, __be32 xirr, bool *again) +static void icp_eoi(struct irq_data *d, u32 hwirq, __be32 xirr, bool *again) { void __iomem *xics_phys; int64_t rc; - rc = pnv_opal_pci_msi_eoi(c, hwirq); + rc = pnv_opal_pci_msi_eoi(d); if (rc) eoi_rc = rc; @@ -872,8 +873,7 @@ long kvmppc_deliver_irq_passthru(struct kvm_vcpu *vcpu, icp_rm_deliver_irq(xics, icp, irq, false); /* EOI the interrupt */ - icp_eoi(irq_desc_get_chip(irq_map->desc), irq_map->r_hwirq, xirr, - again); + icp_eoi(irq_desc_get_irq_data(irq_map->desc), irq_map->r_hwirq, xirr, again); if (check_too_hard(xics, icp) == H_TOO_HARD) return 2; diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index 8dd437d7a2c6..75079397c2a5 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -1088,12 +1088,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) cmpwi r12, BOOK3S_INTERRUPT_H_INST_STORAGE beq kvmppc_hisi -#ifdef CONFIG_PPC_TRANSACTIONAL_MEM - /* For softpatch interrupt, go off and do TM instruction emulation */ - cmpwi r12, BOOK3S_INTERRUPT_HV_SOFTPATCH - beq kvmppc_tm_emul -#endif - /* See if this is a leftover HDEC interrupt */ cmpwi r12,BOOK3S_INTERRUPT_HV_DECREMENTER bne 2f @@ -1599,42 +1593,6 @@ maybe_reenter_guest: blt deliver_guest_interrupt b guest_exit_cont -#ifdef CONFIG_PPC_TRANSACTIONAL_MEM -/* - * Softpatch interrupt for transactional memory emulation cases - * on POWER9 DD2.2. This is early in the guest exit path - we - * haven't saved registers or done a treclaim yet. - */ -kvmppc_tm_emul: - /* Save instruction image in HEIR */ - mfspr r3, SPRN_HEIR - stw r3, VCPU_HEIR(r9) - - /* - * The cases we want to handle here are those where the guest - * is in real suspend mode and is trying to transition to - * transactional mode. - */ - lbz r0, HSTATE_FAKE_SUSPEND(r13) - cmpwi r0, 0 /* keep exiting guest if in fake suspend */ - bne guest_exit_cont - rldicl r3, r11, 64 - MSR_TS_S_LG, 62 - cmpwi r3, 1 /* or if not in suspend state */ - bne guest_exit_cont - - /* Call C code to do the emulation */ - mr r3, r9 - bl kvmhv_p9_tm_emulation_early - nop - ld r9, HSTATE_KVM_VCPU(r13) - li r12, BOOK3S_INTERRUPT_HV_SOFTPATCH - cmpwi r3, 0 - beq guest_exit_cont /* continue exiting if not handled */ - ld r10, VCPU_PC(r9) - ld r11, VCPU_MSR(r9) - b fast_interrupt_c_return /* go back to guest if handled */ -#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */ - /* * Check whether an HDSI is an HPTE not found fault or something else. * If it is an HPTE not found fault that is due to the guest accessing diff --git a/arch/powerpc/kvm/book3s_hv_tm.c b/arch/powerpc/kvm/book3s_hv_tm.c index cc90b8b82329..866cadd70094 100644 --- a/arch/powerpc/kvm/book3s_hv_tm.c +++ b/arch/powerpc/kvm/book3s_hv_tm.c @@ -47,6 +47,15 @@ int kvmhv_p9_tm_emulation(struct kvm_vcpu *vcpu) int ra, rs; /* + * The TM softpatch interrupt sets NIP to the instruction following + * the faulting instruction, which is not executed. Rewind nip to the + * faulting instruction so it looks like a normal synchronous + * interrupt, then update nip in the places where the instruction is + * emulated. + */ + vcpu->arch.regs.nip -= 4; + + /* * rfid, rfebb, and mtmsrd encode bit 31 = 0 since it's a reserved bit * in these instructions, so masking bit 31 out doesn't change these * instructions. For treclaim., tsr., and trechkpt. instructions if bit @@ -67,7 +76,7 @@ int kvmhv_p9_tm_emulation(struct kvm_vcpu *vcpu) (newmsr & MSR_TM))); newmsr = sanitize_msr(newmsr); vcpu->arch.shregs.msr = newmsr; - vcpu->arch.cfar = vcpu->arch.regs.nip - 4; + vcpu->arch.cfar = vcpu->arch.regs.nip; vcpu->arch.regs.nip = vcpu->arch.shregs.srr0; return RESUME_GUEST; @@ -79,14 +88,15 @@ int kvmhv_p9_tm_emulation(struct kvm_vcpu *vcpu) } /* check EBB facility is available */ if (!(vcpu->arch.hfscr & HFSCR_EBB)) { - /* generate an illegal instruction interrupt */ - kvmppc_core_queue_program(vcpu, SRR1_PROGILL); - return RESUME_GUEST; + vcpu->arch.hfscr &= ~HFSCR_INTR_CAUSE; + vcpu->arch.hfscr |= (u64)FSCR_EBB_LG << 56; + vcpu->arch.trap = BOOK3S_INTERRUPT_H_FAC_UNAVAIL; + return -1; /* rerun host interrupt handler */ } if ((msr & MSR_PR) && !(vcpu->arch.fscr & FSCR_EBB)) { /* generate a facility unavailable interrupt */ - vcpu->arch.fscr = (vcpu->arch.fscr & ~(0xffull << 56)) | - ((u64)FSCR_EBB_LG << 56); + vcpu->arch.fscr &= ~FSCR_INTR_CAUSE; + vcpu->arch.fscr |= (u64)FSCR_EBB_LG << 56; kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_FAC_UNAVAIL); return RESUME_GUEST; } @@ -100,7 +110,7 @@ int kvmhv_p9_tm_emulation(struct kvm_vcpu *vcpu) vcpu->arch.bescr = bescr; msr = (msr & ~MSR_TS_MASK) | MSR_TS_T; vcpu->arch.shregs.msr = msr; - vcpu->arch.cfar = vcpu->arch.regs.nip - 4; + vcpu->arch.cfar = vcpu->arch.regs.nip; vcpu->arch.regs.nip = vcpu->arch.ebbrr; return RESUME_GUEST; @@ -116,6 +126,7 @@ int kvmhv_p9_tm_emulation(struct kvm_vcpu *vcpu) newmsr = (newmsr & ~MSR_LE) | (msr & MSR_LE); newmsr = sanitize_msr(newmsr); vcpu->arch.shregs.msr = newmsr; + vcpu->arch.regs.nip += 4; return RESUME_GUEST; /* ignore bit 31, see comment above */ @@ -128,14 +139,15 @@ int kvmhv_p9_tm_emulation(struct kvm_vcpu *vcpu) } /* check for TM disabled in the HFSCR or MSR */ if (!(vcpu->arch.hfscr & HFSCR_TM)) { - /* generate an illegal instruction interrupt */ - kvmppc_core_queue_program(vcpu, SRR1_PROGILL); - return RESUME_GUEST; + vcpu->arch.hfscr &= ~HFSCR_INTR_CAUSE; + vcpu->arch.hfscr |= (u64)FSCR_TM_LG << 56; + vcpu->arch.trap = BOOK3S_INTERRUPT_H_FAC_UNAVAIL; + return -1; /* rerun host interrupt handler */ } if (!(msr & MSR_TM)) { /* generate a facility unavailable interrupt */ - vcpu->arch.fscr = (vcpu->arch.fscr & ~(0xffull << 56)) | - ((u64)FSCR_TM_LG << 56); + vcpu->arch.fscr &= ~FSCR_INTR_CAUSE; + vcpu->arch.fscr |= (u64)FSCR_TM_LG << 56; kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_FAC_UNAVAIL); return RESUME_GUEST; @@ -152,20 +164,22 @@ int kvmhv_p9_tm_emulation(struct kvm_vcpu *vcpu) msr = (msr & ~MSR_TS_MASK) | MSR_TS_S; } vcpu->arch.shregs.msr = msr; + vcpu->arch.regs.nip += 4; return RESUME_GUEST; /* ignore bit 31, see comment above */ case (PPC_INST_TRECLAIM & PO_XOP_OPCODE_MASK): /* check for TM disabled in the HFSCR or MSR */ if (!(vcpu->arch.hfscr & HFSCR_TM)) { - /* generate an illegal instruction interrupt */ - kvmppc_core_queue_program(vcpu, SRR1_PROGILL); - return RESUME_GUEST; + vcpu->arch.hfscr &= ~HFSCR_INTR_CAUSE; + vcpu->arch.hfscr |= (u64)FSCR_TM_LG << 56; + vcpu->arch.trap = BOOK3S_INTERRUPT_H_FAC_UNAVAIL; + return -1; /* rerun host interrupt handler */ } if (!(msr & MSR_TM)) { /* generate a facility unavailable interrupt */ - vcpu->arch.fscr = (vcpu->arch.fscr & ~(0xffull << 56)) | - ((u64)FSCR_TM_LG << 56); + vcpu->arch.fscr &= ~FSCR_INTR_CAUSE; + vcpu->arch.fscr |= (u64)FSCR_TM_LG << 56; kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_FAC_UNAVAIL); return RESUME_GUEST; @@ -189,6 +203,7 @@ int kvmhv_p9_tm_emulation(struct kvm_vcpu *vcpu) vcpu->arch.regs.ccr = (vcpu->arch.regs.ccr & 0x0fffffff) | (((msr & MSR_TS_MASK) >> MSR_TS_S_LG) << 29); vcpu->arch.shregs.msr &= ~MSR_TS_MASK; + vcpu->arch.regs.nip += 4; return RESUME_GUEST; /* ignore bit 31, see comment above */ @@ -196,14 +211,15 @@ int kvmhv_p9_tm_emulation(struct kvm_vcpu *vcpu) /* XXX do we need to check for PR=0 here? */ /* check for TM disabled in the HFSCR or MSR */ if (!(vcpu->arch.hfscr & HFSCR_TM)) { - /* generate an illegal instruction interrupt */ - kvmppc_core_queue_program(vcpu, SRR1_PROGILL); - return RESUME_GUEST; + vcpu->arch.hfscr &= ~HFSCR_INTR_CAUSE; + vcpu->arch.hfscr |= (u64)FSCR_TM_LG << 56; + vcpu->arch.trap = BOOK3S_INTERRUPT_H_FAC_UNAVAIL; + return -1; /* rerun host interrupt handler */ } if (!(msr & MSR_TM)) { /* generate a facility unavailable interrupt */ - vcpu->arch.fscr = (vcpu->arch.fscr & ~(0xffull << 56)) | - ((u64)FSCR_TM_LG << 56); + vcpu->arch.fscr &= ~FSCR_INTR_CAUSE; + vcpu->arch.fscr |= (u64)FSCR_TM_LG << 56; kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_FAC_UNAVAIL); return RESUME_GUEST; @@ -220,6 +236,7 @@ int kvmhv_p9_tm_emulation(struct kvm_vcpu *vcpu) vcpu->arch.regs.ccr = (vcpu->arch.regs.ccr & 0x0fffffff) | (((msr & MSR_TS_MASK) >> MSR_TS_S_LG) << 29); vcpu->arch.shregs.msr = msr | MSR_TS_S; + vcpu->arch.regs.nip += 4; return RESUME_GUEST; } diff --git a/arch/powerpc/kvm/book3s_xics.c b/arch/powerpc/kvm/book3s_xics.c index 303e3cb096db..ebd5d920de8c 100644 --- a/arch/powerpc/kvm/book3s_xics.c +++ b/arch/powerpc/kvm/book3s_xics.c @@ -10,13 +10,13 @@ #include <linux/gfp.h> #include <linux/anon_inodes.h> #include <linux/spinlock.h> - +#include <linux/debugfs.h> #include <linux/uaccess.h> + #include <asm/kvm_book3s.h> #include <asm/kvm_ppc.h> #include <asm/hvcall.h> #include <asm/xics.h> -#include <asm/debugfs.h> #include <asm/time.h> #include <linux/seq_file.h> @@ -1024,7 +1024,7 @@ static void xics_debugfs_init(struct kvmppc_xics *xics) return; } - xics->dentry = debugfs_create_file(name, 0444, powerpc_debugfs_root, + xics->dentry = debugfs_create_file(name, 0444, arch_debugfs_dir, xics, &xics_debug_fops); pr_debug("%s: created %s\n", __func__, name); diff --git a/arch/powerpc/kvm/book3s_xive.c b/arch/powerpc/kvm/book3s_xive.c index 8cfab3547494..a18db9e16ea4 100644 --- a/arch/powerpc/kvm/book3s_xive.c +++ b/arch/powerpc/kvm/book3s_xive.c @@ -22,7 +22,6 @@ #include <asm/xive.h> #include <asm/xive-regs.h> #include <asm/debug.h> -#include <asm/debugfs.h> #include <asm/time.h> #include <asm/opal.h> @@ -59,6 +58,25 @@ */ #define XIVE_Q_GAP 2 +static bool kvmppc_xive_vcpu_has_save_restore(struct kvm_vcpu *vcpu) +{ + struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu; + + /* Check enablement at VP level */ + return xc->vp_cam & TM_QW1W2_HO; +} + +bool kvmppc_xive_check_save_restore(struct kvm_vcpu *vcpu) +{ + struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu; + struct kvmppc_xive *xive = xc->xive; + + if (xive->flags & KVMPPC_XIVE_FLAG_SAVE_RESTORE) + return kvmppc_xive_vcpu_has_save_restore(vcpu); + + return true; +} + /* * Push a vcpu's context to the XIVE on guest entry. * This assumes we are in virtual mode (MMU on) @@ -77,7 +95,8 @@ void kvmppc_xive_push_vcpu(struct kvm_vcpu *vcpu) return; eieio(); - __raw_writeq(vcpu->arch.xive_saved_state.w01, tima + TM_QW1_OS); + if (!kvmppc_xive_vcpu_has_save_restore(vcpu)) + __raw_writeq(vcpu->arch.xive_saved_state.w01, tima + TM_QW1_OS); __raw_writel(vcpu->arch.xive_cam_word, tima + TM_QW1_OS + TM_WORD2); vcpu->arch.xive_pushed = 1; eieio(); @@ -149,7 +168,8 @@ void kvmppc_xive_pull_vcpu(struct kvm_vcpu *vcpu) /* First load to pull the context, we ignore the value */ __raw_readl(tima + TM_SPC_PULL_OS_CTX); /* Second load to recover the context state (Words 0 and 1) */ - vcpu->arch.xive_saved_state.w01 = __raw_readq(tima + TM_QW1_OS); + if (!kvmppc_xive_vcpu_has_save_restore(vcpu)) + vcpu->arch.xive_saved_state.w01 = __raw_readq(tima + TM_QW1_OS); /* Fixup some of the state for the next load */ vcpu->arch.xive_saved_state.lsmfb = 0; @@ -363,9 +383,9 @@ static int xive_check_provisioning(struct kvm *kvm, u8 prio) if (!vcpu->arch.xive_vcpu) continue; rc = xive_provision_queue(vcpu, prio); - if (rc == 0 && !xive->single_escalation) + if (rc == 0 && !kvmppc_xive_has_single_escalation(xive)) kvmppc_xive_attach_escalation(vcpu, prio, - xive->single_escalation); + kvmppc_xive_has_single_escalation(xive)); if (rc) return rc; } @@ -922,13 +942,13 @@ int kvmppc_xive_set_icp(struct kvm_vcpu *vcpu, u64 icpval) } int kvmppc_xive_set_mapped(struct kvm *kvm, unsigned long guest_irq, - struct irq_desc *host_desc) + unsigned long host_irq) { struct kvmppc_xive *xive = kvm->arch.xive; struct kvmppc_xive_src_block *sb; struct kvmppc_xive_irq_state *state; - struct irq_data *host_data = irq_desc_get_irq_data(host_desc); - unsigned int host_irq = irq_desc_get_irq(host_desc); + struct irq_data *host_data = + irq_domain_get_irq_data(irq_get_default_host(), host_irq); unsigned int hw_irq = (unsigned int)irqd_to_hwirq(host_data); u16 idx; u8 prio; @@ -937,7 +957,8 @@ int kvmppc_xive_set_mapped(struct kvm *kvm, unsigned long guest_irq, if (!xive) return -ENODEV; - pr_devel("set_mapped girq 0x%lx host HW irq 0x%x...\n",guest_irq, hw_irq); + pr_debug("%s: GIRQ 0x%lx host IRQ %ld XIVE HW IRQ 0x%x\n", + __func__, guest_irq, host_irq, hw_irq); sb = kvmppc_xive_find_source(xive, guest_irq, &idx); if (!sb) @@ -959,7 +980,7 @@ int kvmppc_xive_set_mapped(struct kvm *kvm, unsigned long guest_irq, */ rc = irq_set_vcpu_affinity(host_irq, state); if (rc) { - pr_err("Failed to set VCPU affinity for irq %d\n", host_irq); + pr_err("Failed to set VCPU affinity for host IRQ %ld\n", host_irq); return rc; } @@ -1019,12 +1040,11 @@ int kvmppc_xive_set_mapped(struct kvm *kvm, unsigned long guest_irq, EXPORT_SYMBOL_GPL(kvmppc_xive_set_mapped); int kvmppc_xive_clr_mapped(struct kvm *kvm, unsigned long guest_irq, - struct irq_desc *host_desc) + unsigned long host_irq) { struct kvmppc_xive *xive = kvm->arch.xive; struct kvmppc_xive_src_block *sb; struct kvmppc_xive_irq_state *state; - unsigned int host_irq = irq_desc_get_irq(host_desc); u16 idx; u8 prio; int rc; @@ -1032,7 +1052,7 @@ int kvmppc_xive_clr_mapped(struct kvm *kvm, unsigned long guest_irq, if (!xive) return -ENODEV; - pr_devel("clr_mapped girq 0x%lx...\n", guest_irq); + pr_debug("%s: GIRQ 0x%lx host IRQ %ld\n", __func__, guest_irq, host_irq); sb = kvmppc_xive_find_source(xive, guest_irq, &idx); if (!sb) @@ -1059,7 +1079,7 @@ int kvmppc_xive_clr_mapped(struct kvm *kvm, unsigned long guest_irq, /* Release the passed-through interrupt to the host */ rc = irq_set_vcpu_affinity(host_irq, NULL); if (rc) { - pr_err("Failed to clr VCPU affinity for irq %d\n", host_irq); + pr_err("Failed to clr VCPU affinity for host IRQ %ld\n", host_irq); return rc; } @@ -1199,7 +1219,7 @@ void kvmppc_xive_cleanup_vcpu(struct kvm_vcpu *vcpu) /* Free escalations */ for (i = 0; i < KVMPPC_XIVE_Q_COUNT; i++) { if (xc->esc_virq[i]) { - if (xc->xive->single_escalation) + if (kvmppc_xive_has_single_escalation(xc->xive)) xive_cleanup_single_escalation(vcpu, xc, xc->esc_virq[i]); free_irq(xc->esc_virq[i], vcpu); @@ -1319,6 +1339,12 @@ int kvmppc_xive_connect_vcpu(struct kvm_device *dev, if (r) goto bail; + if (!kvmppc_xive_check_save_restore(vcpu)) { + pr_err("inconsistent save-restore setup for VCPU %d\n", cpu); + r = -EIO; + goto bail; + } + /* Configure VCPU fields for use by assembly push/pull */ vcpu->arch.xive_saved_state.w01 = cpu_to_be64(0xff000000); vcpu->arch.xive_cam_word = cpu_to_be32(xc->vp_cam | TM_QW1W2_VO); @@ -1340,7 +1366,7 @@ int kvmppc_xive_connect_vcpu(struct kvm_device *dev, * Enable the VP first as the single escalation mode will * affect escalation interrupts numbering */ - r = xive_native_enable_vp(xc->vp_id, xive->single_escalation); + r = xive_native_enable_vp(xc->vp_id, kvmppc_xive_has_single_escalation(xive)); if (r) { pr_err("Failed to enable VP in OPAL, err %d\n", r); goto bail; @@ -1357,15 +1383,15 @@ int kvmppc_xive_connect_vcpu(struct kvm_device *dev, struct xive_q *q = &xc->queues[i]; /* Single escalation, no queue 7 */ - if (i == 7 && xive->single_escalation) + if (i == 7 && kvmppc_xive_has_single_escalation(xive)) break; /* Is queue already enabled ? Provision it */ if (xive->qmap & (1 << i)) { r = xive_provision_queue(vcpu, i); - if (r == 0 && !xive->single_escalation) + if (r == 0 && !kvmppc_xive_has_single_escalation(xive)) kvmppc_xive_attach_escalation( - vcpu, i, xive->single_escalation); + vcpu, i, kvmppc_xive_has_single_escalation(xive)); if (r) goto bail; } else { @@ -1380,7 +1406,7 @@ int kvmppc_xive_connect_vcpu(struct kvm_device *dev, } /* If not done above, attach priority 0 escalation */ - r = kvmppc_xive_attach_escalation(vcpu, 0, xive->single_escalation); + r = kvmppc_xive_attach_escalation(vcpu, 0, kvmppc_xive_has_single_escalation(xive)); if (r) goto bail; @@ -2135,7 +2161,11 @@ static int kvmppc_xive_create(struct kvm_device *dev, u32 type) */ xive->nr_servers = KVM_MAX_VCPUS; - xive->single_escalation = xive_native_has_single_escalation(); + if (xive_native_has_single_escalation()) + xive->flags |= KVMPPC_XIVE_FLAG_SINGLE_ESCALATION; + + if (xive_native_has_save_restore()) + xive->flags |= KVMPPC_XIVE_FLAG_SAVE_RESTORE; kvm->arch.xive = xive; return 0; @@ -2329,7 +2359,7 @@ static void xive_debugfs_init(struct kvmppc_xive *xive) return; } - xive->dentry = debugfs_create_file(name, S_IRUGO, powerpc_debugfs_root, + xive->dentry = debugfs_create_file(name, S_IRUGO, arch_debugfs_dir, xive, &xive_debug_fops); pr_debug("%s: created %s\n", __func__, name); diff --git a/arch/powerpc/kvm/book3s_xive.h b/arch/powerpc/kvm/book3s_xive.h index afe9eeac6d56..e6a9651c6f1e 100644 --- a/arch/powerpc/kvm/book3s_xive.h +++ b/arch/powerpc/kvm/book3s_xive.h @@ -97,6 +97,9 @@ struct kvmppc_xive_ops { int (*reset_mapped)(struct kvm *kvm, unsigned long guest_irq); }; +#define KVMPPC_XIVE_FLAG_SINGLE_ESCALATION 0x1 +#define KVMPPC_XIVE_FLAG_SAVE_RESTORE 0x2 + struct kvmppc_xive { struct kvm *kvm; struct kvm_device *dev; @@ -133,7 +136,7 @@ struct kvmppc_xive { u32 q_page_order; /* Flags */ - u8 single_escalation; + u8 flags; /* Number of entries in the VP block */ u32 nr_servers; @@ -307,6 +310,12 @@ void xive_cleanup_single_escalation(struct kvm_vcpu *vcpu, struct kvmppc_xive_vcpu *xc, int irq); int kvmppc_xive_compute_vp_id(struct kvmppc_xive *xive, u32 cpu, u32 *vp); int kvmppc_xive_set_nr_servers(struct kvmppc_xive *xive, u64 addr); +bool kvmppc_xive_check_save_restore(struct kvm_vcpu *vcpu); + +static inline bool kvmppc_xive_has_single_escalation(struct kvmppc_xive *xive) +{ + return xive->flags & KVMPPC_XIVE_FLAG_SINGLE_ESCALATION; +} #endif /* CONFIG_KVM_XICS */ #endif /* _KVM_PPC_BOOK3S_XICS_H */ diff --git a/arch/powerpc/kvm/book3s_xive_native.c b/arch/powerpc/kvm/book3s_xive_native.c index 573ecaab3597..99db9ac49901 100644 --- a/arch/powerpc/kvm/book3s_xive_native.c +++ b/arch/powerpc/kvm/book3s_xive_native.c @@ -20,7 +20,6 @@ #include <asm/xive.h> #include <asm/xive-regs.h> #include <asm/debug.h> -#include <asm/debugfs.h> #include <asm/opal.h> #include <linux/debugfs.h> @@ -93,7 +92,7 @@ void kvmppc_xive_native_cleanup_vcpu(struct kvm_vcpu *vcpu) for (i = 0; i < KVMPPC_XIVE_Q_COUNT; i++) { /* Free the escalation irq */ if (xc->esc_virq[i]) { - if (xc->xive->single_escalation) + if (kvmppc_xive_has_single_escalation(xc->xive)) xive_cleanup_single_escalation(vcpu, xc, xc->esc_virq[i]); free_irq(xc->esc_virq[i], vcpu); @@ -168,11 +167,17 @@ int kvmppc_xive_native_connect_vcpu(struct kvm_device *dev, goto bail; } + if (!kvmppc_xive_check_save_restore(vcpu)) { + pr_err("inconsistent save-restore setup for VCPU %d\n", server_num); + rc = -EIO; + goto bail; + } + /* * Enable the VP first as the single escalation mode will * affect escalation interrupts numbering */ - rc = xive_native_enable_vp(xc->vp_id, xive->single_escalation); + rc = xive_native_enable_vp(xc->vp_id, kvmppc_xive_has_single_escalation(xive)); if (rc) { pr_err("Failed to enable VP in OPAL: %d\n", rc); goto bail; @@ -693,7 +698,7 @@ static int kvmppc_xive_native_set_queue_config(struct kvmppc_xive *xive, } rc = kvmppc_xive_attach_escalation(vcpu, priority, - xive->single_escalation); + kvmppc_xive_has_single_escalation(xive)); error: if (rc) kvmppc_xive_native_cleanup_queue(vcpu, priority); @@ -820,7 +825,7 @@ static int kvmppc_xive_reset(struct kvmppc_xive *xive) for (prio = 0; prio < KVMPPC_XIVE_Q_COUNT; prio++) { /* Single escalation, no queue 7 */ - if (prio == 7 && xive->single_escalation) + if (prio == 7 && kvmppc_xive_has_single_escalation(xive)) break; if (xc->esc_virq[prio]) { @@ -1111,7 +1116,12 @@ static int kvmppc_xive_native_create(struct kvm_device *dev, u32 type) */ xive->nr_servers = KVM_MAX_VCPUS; - xive->single_escalation = xive_native_has_single_escalation(); + if (xive_native_has_single_escalation()) + xive->flags |= KVMPPC_XIVE_FLAG_SINGLE_ESCALATION; + + if (xive_native_has_save_restore()) + xive->flags |= KVMPPC_XIVE_FLAG_SAVE_RESTORE; + xive->ops = &kvmppc_xive_native_ops; kvm->arch.xive = xive; @@ -1257,7 +1267,7 @@ static void xive_native_debugfs_init(struct kvmppc_xive *xive) return; } - xive->dentry = debugfs_create_file(name, 0444, powerpc_debugfs_root, + xive->dentry = debugfs_create_file(name, 0444, arch_debugfs_dir, xive, &xive_native_debug_fops); pr_debug("%s: created %s\n", __func__, name); diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index 551b30d84aee..977801c83aff 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -41,8 +41,6 @@ const struct _kvm_stats_desc kvm_vm_stats_desc[] = { STATS_DESC_ICOUNTER(VM, num_2M_pages), STATS_DESC_ICOUNTER(VM, num_1G_pages) }; -static_assert(ARRAY_SIZE(kvm_vm_stats_desc) == - sizeof(struct kvm_vm_stat) / sizeof(u64)); const struct kvm_stats_header kvm_vm_stats_header = { .name_size = KVM_STATS_NAME_SIZE, @@ -69,7 +67,6 @@ const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = { STATS_DESC_COUNTER(VCPU, emulated_inst_exits), STATS_DESC_COUNTER(VCPU, dec_exits), STATS_DESC_COUNTER(VCPU, ext_intr_exits), - STATS_DESC_TIME_NSEC(VCPU, halt_wait_ns), STATS_DESC_COUNTER(VCPU, halt_successful_wait), STATS_DESC_COUNTER(VCPU, dbell_exits), STATS_DESC_COUNTER(VCPU, gdbell_exits), @@ -79,8 +76,6 @@ const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = { STATS_DESC_COUNTER(VCPU, pthru_host), STATS_DESC_COUNTER(VCPU, pthru_bad_aff) }; -static_assert(ARRAY_SIZE(kvm_vcpu_stats_desc) == - sizeof(struct kvm_vcpu_stat) / sizeof(u64)); const struct kvm_stats_header kvm_vcpu_stats_header = { .name_size = KVM_STATS_NAME_SIZE, |