diff options
Diffstat (limited to 'arch/powerpc/kvm')
-rw-r--r-- | arch/powerpc/kvm/Kconfig | 13 | ||||
-rw-r--r-- | arch/powerpc/kvm/book3s_32_mmu_host.c | 2 | ||||
-rw-r--r-- | arch/powerpc/kvm/book3s_hv.c | 24 | ||||
-rw-r--r-- | arch/powerpc/kvm/book3s_hv_nestedv2.c | 6 | ||||
-rw-r--r-- | arch/powerpc/kvm/book3s_hv_rmhandlers.S | 2 | ||||
-rw-r--r-- | arch/powerpc/kvm/book3s_hv_uvmem.c | 2 | ||||
-rw-r--r-- | arch/powerpc/kvm/book3s_xive.c | 2 | ||||
-rw-r--r-- | arch/powerpc/kvm/booke.c | 14 | ||||
-rw-r--r-- | arch/powerpc/kvm/e500.h | 2 | ||||
-rw-r--r-- | arch/powerpc/kvm/e500_mmu_host.c | 199 | ||||
-rw-r--r-- | arch/powerpc/kvm/guest-state-buffer.c | 39 | ||||
-rw-r--r-- | arch/powerpc/kvm/powerpc.c | 9 | ||||
-rw-r--r-- | arch/powerpc/kvm/test-guest-state-buffer.c | 214 | ||||
-rw-r--r-- | arch/powerpc/kvm/timing.h | 4 | ||||
-rw-r--r-- | arch/powerpc/kvm/trace_book3s.h | 1 |
15 files changed, 388 insertions, 145 deletions
diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig index dbfdc126bf14..2f2702c867f7 100644 --- a/arch/powerpc/kvm/Kconfig +++ b/arch/powerpc/kvm/Kconfig @@ -83,6 +83,7 @@ config KVM_BOOK3S_64_HV depends on KVM_BOOK3S_64 && PPC_POWERNV select KVM_BOOK3S_HV_POSSIBLE select KVM_GENERIC_MMU_NOTIFIER + select KVM_BOOK3S_HV_PMU select CMA help Support running unmodified book3s_64 guest kernels in @@ -171,6 +172,18 @@ config KVM_BOOK3S_HV_NESTED_PMU_WORKAROUND those buggy L1s which saves the L2 state, at the cost of performance in all nested-capable guest entry/exit. +config KVM_BOOK3S_HV_PMU + tristate "Hypervisor Perf events for KVM Book3s-HV" + depends on KVM_BOOK3S_64_HV + help + Enable Book3s-HV Hypervisor Perf events PMU named 'kvm-hv'. These + Perf events give an overview of hypervisor performance overall + instead of a specific guests. Currently the PMU reports + L0-Hypervisor stats on a kvm-hv enabled PSeries LPAR like: + * Total/Used Guest-Heap + * Total/Used Guest Page-table Memory + * Total amount of Guest Page-table Memory reclaimed + config KVM_BOOKE_HV bool diff --git a/arch/powerpc/kvm/book3s_32_mmu_host.c b/arch/powerpc/kvm/book3s_32_mmu_host.c index 5b7212edbb13..c7e4b62642ea 100644 --- a/arch/powerpc/kvm/book3s_32_mmu_host.c +++ b/arch/powerpc/kvm/book3s_32_mmu_host.c @@ -125,8 +125,6 @@ static u32 *kvmppc_mmu_get_pteg(struct kvm_vcpu *vcpu, u32 vsid, u32 eaddr, return (u32*)pteg; } -extern char etext[]; - int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *orig_pte, bool iswrite) { diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 25429905ae90..7667563fb9ff 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -4957,7 +4957,7 @@ int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu, u64 time_limit, * states are synchronized from L0 to L1. L1 needs to inform L0 about * MER=1 only when there are pending external interrupts. * In the above if check, MER bit is set if there are pending - * external interrupts. Hence, explicity mask off MER bit + * external interrupts. Hence, explicitly mask off MER bit * here as otherwise it may generate spurious interrupts in L2 KVM * causing an endless loop, which results in L2 guest getting hung. */ @@ -6041,7 +6041,7 @@ static int kvmppc_set_passthru_irq(struct kvm *kvm, int host_irq, int guest_gsi) * the underlying calls, which will EOI the interrupt in real * mode, need an HW IRQ number mapped in the XICS IRQ domain. */ - host_data = irq_domain_get_irq_data(irq_get_default_host(), host_irq); + host_data = irq_domain_get_irq_data(irq_get_default_domain(), host_irq); irq_map->r_hwirq = (unsigned int)irqd_to_hwirq(host_data); if (i == pimap->n_mapped) @@ -6541,10 +6541,6 @@ static struct kvmppc_ops kvm_ops_hv = { .fast_vcpu_kick = kvmppc_fast_vcpu_kick_hv, .arch_vm_ioctl = kvm_arch_vm_ioctl_hv, .hcall_implemented = kvmppc_hcall_impl_hv, -#ifdef CONFIG_KVM_XICS - .irq_bypass_add_producer = kvmppc_irq_bypass_add_producer_hv, - .irq_bypass_del_producer = kvmppc_irq_bypass_del_producer_hv, -#endif .configure_mmu = kvmhv_configure_mmu, .get_rmmu_info = kvmhv_get_rmmu_info, .set_smt_mode = kvmhv_set_smt_mode, @@ -6662,6 +6658,22 @@ static int kvmppc_book3s_init_hv(void) return r; } +#if defined(CONFIG_KVM_XICS) + /* + * IRQ bypass is supported only for interrupts whose EOI operations are + * handled via OPAL calls. Therefore, register IRQ bypass handlers + * exclusively for PowerNV KVM when booted with 'xive=off', indicating + * the use of the emulated XICS interrupt controller. + */ + if (!kvmhv_on_pseries()) { + pr_info("KVM-HV: Enabling IRQ bypass\n"); + kvm_ops_hv.irq_bypass_add_producer = + kvmppc_irq_bypass_add_producer_hv; + kvm_ops_hv.irq_bypass_del_producer = + kvmppc_irq_bypass_del_producer_hv; + } +#endif + kvm_ops_hv.owner = THIS_MODULE; kvmppc_hv_ops = &kvm_ops_hv; diff --git a/arch/powerpc/kvm/book3s_hv_nestedv2.c b/arch/powerpc/kvm/book3s_hv_nestedv2.c index e5c7ce1fb761..87691cf86cae 100644 --- a/arch/powerpc/kvm/book3s_hv_nestedv2.c +++ b/arch/powerpc/kvm/book3s_hv_nestedv2.c @@ -123,6 +123,12 @@ static size_t gs_msg_ops_vcpu_get_size(struct kvmppc_gs_msg *gsm) case KVMPPC_GSID_PROCESS_TABLE: case KVMPPC_GSID_RUN_INPUT: case KVMPPC_GSID_RUN_OUTPUT: + /* Host wide counters */ + case KVMPPC_GSID_L0_GUEST_HEAP: + case KVMPPC_GSID_L0_GUEST_HEAP_MAX: + case KVMPPC_GSID_L0_GUEST_PGTABLE_SIZE: + case KVMPPC_GSID_L0_GUEST_PGTABLE_SIZE_MAX: + case KVMPPC_GSID_L0_GUEST_PGTABLE_RECLAIM: break; default: size += kvmppc_gse_total_size(kvmppc_gsid_size(iden)); diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index ea7ad200b330..83f7504349d2 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -1524,14 +1524,12 @@ kvm_flush_link_stack: /* Flush the link stack. On Power8 it's up to 32 entries in size. */ .rept 32 - ANNOTATE_INTRA_FUNCTION_CALL bl .+4 .endr /* And on Power9 it's up to 64. */ BEGIN_FTR_SECTION .rept 32 - ANNOTATE_INTRA_FUNCTION_CALL bl .+4 .endr END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) diff --git a/arch/powerpc/kvm/book3s_hv_uvmem.c b/arch/powerpc/kvm/book3s_hv_uvmem.c index 3a6592a31a10..03f8c34fa0a2 100644 --- a/arch/powerpc/kvm/book3s_hv_uvmem.c +++ b/arch/powerpc/kvm/book3s_hv_uvmem.c @@ -393,7 +393,7 @@ static int kvmppc_memslot_page_merge(struct kvm *kvm, { unsigned long gfn = memslot->base_gfn; unsigned long end, start = gfn_to_hva(kvm, gfn); - unsigned long vm_flags; + vm_flags_t vm_flags; int ret = 0; struct vm_area_struct *vma; int merge_flag = (merge) ? MADV_MERGEABLE : MADV_UNMERGEABLE; diff --git a/arch/powerpc/kvm/book3s_xive.c b/arch/powerpc/kvm/book3s_xive.c index 1362c672387e..1302b5ac5672 100644 --- a/arch/powerpc/kvm/book3s_xive.c +++ b/arch/powerpc/kvm/book3s_xive.c @@ -1555,7 +1555,7 @@ int kvmppc_xive_set_mapped(struct kvm *kvm, unsigned long guest_irq, struct kvmppc_xive_src_block *sb; struct kvmppc_xive_irq_state *state; struct irq_data *host_data = - irq_domain_get_irq_data(irq_get_default_host(), host_irq); + irq_domain_get_irq_data(irq_get_default_domain(), host_irq); unsigned int hw_irq = (unsigned int)irqd_to_hwirq(host_data); u16 idx; u8 prio; diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index 6a5be025a8af..3401b96be475 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -572,7 +572,7 @@ static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu, /* * Return the number of jiffies until the next timeout. If the timeout is - * longer than the NEXT_TIMER_MAX_DELTA, then return NEXT_TIMER_MAX_DELTA + * longer than the TIMER_NEXT_MAX_DELTA, then return TIMER_NEXT_MAX_DELTA * because the larger value can break the timer APIs. */ static unsigned long watchdog_next_timeout(struct kvm_vcpu *vcpu) @@ -598,7 +598,7 @@ static unsigned long watchdog_next_timeout(struct kvm_vcpu *vcpu) if (do_div(nr_jiffies, tb_ticks_per_jiffy)) nr_jiffies++; - return min_t(unsigned long long, nr_jiffies, NEXT_TIMER_MAX_DELTA); + return min_t(unsigned long long, nr_jiffies, TIMER_NEXT_MAX_DELTA); } static void arm_next_watchdog(struct kvm_vcpu *vcpu) @@ -616,19 +616,19 @@ static void arm_next_watchdog(struct kvm_vcpu *vcpu) spin_lock_irqsave(&vcpu->arch.wdt_lock, flags); nr_jiffies = watchdog_next_timeout(vcpu); /* - * If the number of jiffies of watchdog timer >= NEXT_TIMER_MAX_DELTA + * If the number of jiffies of watchdog timer >= TIMER_NEXT_MAX_DELTA * then do not run the watchdog timer as this can break timer APIs. */ - if (nr_jiffies < NEXT_TIMER_MAX_DELTA) + if (nr_jiffies < TIMER_NEXT_MAX_DELTA) mod_timer(&vcpu->arch.wdt_timer, jiffies + nr_jiffies); else - del_timer(&vcpu->arch.wdt_timer); + timer_delete(&vcpu->arch.wdt_timer); spin_unlock_irqrestore(&vcpu->arch.wdt_lock, flags); } static void kvmppc_watchdog_func(struct timer_list *t) { - struct kvm_vcpu *vcpu = from_timer(vcpu, t, arch.wdt_timer); + struct kvm_vcpu *vcpu = timer_container_of(vcpu, t, arch.wdt_timer); u32 tsr, new_tsr; int final; @@ -1441,7 +1441,7 @@ int kvmppc_subarch_vcpu_init(struct kvm_vcpu *vcpu) void kvmppc_subarch_vcpu_uninit(struct kvm_vcpu *vcpu) { - del_timer_sync(&vcpu->arch.wdt_timer); + timer_delete_sync(&vcpu->arch.wdt_timer); } int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) diff --git a/arch/powerpc/kvm/e500.h b/arch/powerpc/kvm/e500.h index 6d0d329cbb35..f9acf866c709 100644 --- a/arch/powerpc/kvm/e500.h +++ b/arch/powerpc/kvm/e500.h @@ -34,6 +34,8 @@ enum vcpu_ftr { #define E500_TLB_BITMAP (1 << 30) /* TLB1 entry is mapped by host TLB0 */ #define E500_TLB_TLB0 (1 << 29) +/* entry is writable on the host */ +#define E500_TLB_WRITABLE (1 << 28) /* bits [6-5] MAS2_X1 and MAS2_X0 and [4-0] bits for WIMGE */ #define E500_TLB_MAS2_ATTR (0x7f) diff --git a/arch/powerpc/kvm/e500_mmu_host.c b/arch/powerpc/kvm/e500_mmu_host.c index e5a145b578a4..06caf8bbbe2b 100644 --- a/arch/powerpc/kvm/e500_mmu_host.c +++ b/arch/powerpc/kvm/e500_mmu_host.c @@ -45,11 +45,14 @@ static inline unsigned int tlb1_max_shadow_size(void) return host_tlb_params[1].entries - tlbcam_index - 1; } -static inline u32 e500_shadow_mas3_attrib(u32 mas3, int usermode) +static inline u32 e500_shadow_mas3_attrib(u32 mas3, bool writable, int usermode) { /* Mask off reserved bits. */ mas3 &= MAS3_ATTRIB_MASK; + if (!writable) + mas3 &= ~(MAS3_UW|MAS3_SW); + #ifndef CONFIG_KVM_BOOKE_HV if (!usermode) { /* Guest is in supervisor mode, @@ -242,17 +245,18 @@ static inline int tlbe_is_writable(struct kvm_book3e_206_tlb_entry *tlbe) return tlbe->mas7_3 & (MAS3_SW|MAS3_UW); } -static inline bool kvmppc_e500_ref_setup(struct tlbe_ref *ref, +static inline void kvmppc_e500_ref_setup(struct tlbe_ref *ref, struct kvm_book3e_206_tlb_entry *gtlbe, - kvm_pfn_t pfn, unsigned int wimg) + kvm_pfn_t pfn, unsigned int wimg, + bool writable) { ref->pfn = pfn; ref->flags = E500_TLB_VALID; + if (writable) + ref->flags |= E500_TLB_WRITABLE; /* Use guest supplied MAS2_G and MAS2_E */ ref->flags |= (gtlbe->mas2 & MAS2_ATTRIB_MASK) | wimg; - - return tlbe_is_writable(gtlbe); } static inline void kvmppc_e500_ref_release(struct tlbe_ref *ref) @@ -305,6 +309,7 @@ static void kvmppc_e500_setup_stlbe( { kvm_pfn_t pfn = ref->pfn; u32 pr = vcpu->arch.shared->msr & MSR_PR; + bool writable = !!(ref->flags & E500_TLB_WRITABLE); BUG_ON(!(ref->flags & E500_TLB_VALID)); @@ -312,7 +317,7 @@ static void kvmppc_e500_setup_stlbe( stlbe->mas1 = MAS1_TSIZE(tsize) | get_tlb_sts(gtlbe) | MAS1_VALID; stlbe->mas2 = (gvaddr & MAS2_EPN) | (ref->flags & E500_TLB_MAS2_ATTR); stlbe->mas7_3 = ((u64)pfn << PAGE_SHIFT) | - e500_shadow_mas3_attrib(gtlbe->mas7_3, pr); + e500_shadow_mas3_attrib(gtlbe->mas7_3, writable, pr); } static inline int kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500, @@ -321,15 +326,14 @@ static inline int kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500, struct tlbe_ref *ref) { struct kvm_memory_slot *slot; - unsigned long pfn = 0; /* silence GCC warning */ + unsigned int psize; + unsigned long pfn; struct page *page = NULL; unsigned long hva; - int pfnmap = 0; int tsize = BOOK3E_PAGESZ_4K; int ret = 0; unsigned long mmu_seq; struct kvm *kvm = vcpu_e500->vcpu.kvm; - unsigned long tsize_pages = 0; pte_t *ptep; unsigned int wimg = 0; pgd_t *pgdir; @@ -351,110 +355,12 @@ static inline int kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500, slot = gfn_to_memslot(vcpu_e500->vcpu.kvm, gfn); hva = gfn_to_hva_memslot(slot, gfn); - if (tlbsel == 1) { - struct vm_area_struct *vma; - mmap_read_lock(kvm->mm); - - vma = find_vma(kvm->mm, hva); - if (vma && hva >= vma->vm_start && - (vma->vm_flags & VM_PFNMAP)) { - /* - * This VMA is a physically contiguous region (e.g. - * /dev/mem) that bypasses normal Linux page - * management. Find the overlap between the - * vma and the memslot. - */ - - unsigned long start, end; - unsigned long slot_start, slot_end; - - pfnmap = 1; - - start = vma->vm_pgoff; - end = start + - vma_pages(vma); - - pfn = start + ((hva - vma->vm_start) >> PAGE_SHIFT); - - slot_start = pfn - (gfn - slot->base_gfn); - slot_end = slot_start + slot->npages; - - if (start < slot_start) - start = slot_start; - if (end > slot_end) - end = slot_end; - - tsize = (gtlbe->mas1 & MAS1_TSIZE_MASK) >> - MAS1_TSIZE_SHIFT; - - /* - * e500 doesn't implement the lowest tsize bit, - * or 1K pages. - */ - tsize = max(BOOK3E_PAGESZ_4K, tsize & ~1); - - /* - * Now find the largest tsize (up to what the guest - * requested) that will cover gfn, stay within the - * range, and for which gfn and pfn are mutually - * aligned. - */ - - for (; tsize > BOOK3E_PAGESZ_4K; tsize -= 2) { - unsigned long gfn_start, gfn_end; - tsize_pages = 1UL << (tsize - 2); - - gfn_start = gfn & ~(tsize_pages - 1); - gfn_end = gfn_start + tsize_pages; - - if (gfn_start + pfn - gfn < start) - continue; - if (gfn_end + pfn - gfn > end) - continue; - if ((gfn & (tsize_pages - 1)) != - (pfn & (tsize_pages - 1))) - continue; - - gvaddr &= ~((tsize_pages << PAGE_SHIFT) - 1); - pfn &= ~(tsize_pages - 1); - break; - } - } else if (vma && hva >= vma->vm_start && - is_vm_hugetlb_page(vma)) { - unsigned long psize = vma_kernel_pagesize(vma); - - tsize = (gtlbe->mas1 & MAS1_TSIZE_MASK) >> - MAS1_TSIZE_SHIFT; - - /* - * Take the largest page size that satisfies both host - * and guest mapping - */ - tsize = min(__ilog2(psize) - 10, tsize); - - /* - * e500 doesn't implement the lowest tsize bit, - * or 1K pages. - */ - tsize = max(BOOK3E_PAGESZ_4K, tsize & ~1); - } - - mmap_read_unlock(kvm->mm); - } - - if (likely(!pfnmap)) { - tsize_pages = 1UL << (tsize + 10 - PAGE_SHIFT); - pfn = __kvm_faultin_pfn(slot, gfn, FOLL_WRITE, NULL, &page); - if (is_error_noslot_pfn(pfn)) { - if (printk_ratelimit()) - pr_err("%s: real page not found for gfn %lx\n", - __func__, (long)gfn); - return -EINVAL; - } - - /* Align guest and physical address to page map boundaries */ - pfn &= ~(tsize_pages - 1); - gvaddr &= ~((tsize_pages << PAGE_SHIFT) - 1); + pfn = __kvm_faultin_pfn(slot, gfn, FOLL_WRITE, &writable, &page); + if (is_error_noslot_pfn(pfn)) { + if (printk_ratelimit()) + pr_err("%s: real page not found for gfn %lx\n", + __func__, (long)gfn); + return -EINVAL; } spin_lock(&kvm->mmu_lock); @@ -472,14 +378,13 @@ static inline int kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500, * can't run hence pfn won't change. */ local_irq_save(flags); - ptep = find_linux_pte(pgdir, hva, NULL, NULL); + ptep = find_linux_pte(pgdir, hva, NULL, &psize); if (ptep) { pte_t pte = READ_ONCE(*ptep); if (pte_present(pte)) { wimg = (pte_val(pte) >> PTE_WIMGE_SHIFT) & MAS2_WIMGE_MASK; - local_irq_restore(flags); } else { local_irq_restore(flags); pr_err_ratelimited("%s: pte not present: gfn %lx,pfn %lx\n", @@ -488,10 +393,72 @@ static inline int kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500, goto out; } } - writable = kvmppc_e500_ref_setup(ref, gtlbe, pfn, wimg); + local_irq_restore(flags); + + if (psize && tlbsel == 1) { + unsigned long psize_pages, tsize_pages; + unsigned long start, end; + unsigned long slot_start, slot_end; + + psize_pages = 1UL << (psize - PAGE_SHIFT); + start = pfn & ~(psize_pages - 1); + end = start + psize_pages; + + slot_start = pfn - (gfn - slot->base_gfn); + slot_end = slot_start + slot->npages; + + if (start < slot_start) + start = slot_start; + if (end > slot_end) + end = slot_end; + + tsize = (gtlbe->mas1 & MAS1_TSIZE_MASK) >> + MAS1_TSIZE_SHIFT; + + /* + * Any page size that doesn't satisfy the host mapping + * will fail the start and end tests. + */ + tsize = min(psize - PAGE_SHIFT + BOOK3E_PAGESZ_4K, tsize); + + /* + * e500 doesn't implement the lowest tsize bit, + * or 1K pages. + */ + tsize = max(BOOK3E_PAGESZ_4K, tsize & ~1); + + /* + * Now find the largest tsize (up to what the guest + * requested) that will cover gfn, stay within the + * range, and for which gfn and pfn are mutually + * aligned. + */ + + for (; tsize > BOOK3E_PAGESZ_4K; tsize -= 2) { + unsigned long gfn_start, gfn_end; + tsize_pages = 1UL << (tsize - 2); + + gfn_start = gfn & ~(tsize_pages - 1); + gfn_end = gfn_start + tsize_pages; + + if (gfn_start + pfn - gfn < start) + continue; + if (gfn_end + pfn - gfn > end) + continue; + if ((gfn & (tsize_pages - 1)) != + (pfn & (tsize_pages - 1))) + continue; + + gvaddr &= ~((tsize_pages << PAGE_SHIFT) - 1); + pfn &= ~(tsize_pages - 1); + break; + } + } + kvmppc_e500_ref_setup(ref, gtlbe, pfn, wimg, writable); kvmppc_e500_setup_stlbe(&vcpu_e500->vcpu, gtlbe, tsize, ref, gvaddr, stlbe); + writable = tlbe_is_writable(stlbe); /* Clear i-cache for new pages */ kvmppc_mmu_flush_icache(pfn); diff --git a/arch/powerpc/kvm/guest-state-buffer.c b/arch/powerpc/kvm/guest-state-buffer.c index b80dbc58621f..871cf60ddeb6 100644 --- a/arch/powerpc/kvm/guest-state-buffer.c +++ b/arch/powerpc/kvm/guest-state-buffer.c @@ -92,6 +92,10 @@ static int kvmppc_gsid_class(u16 iden) (iden <= KVMPPC_GSE_GUESTWIDE_END)) return KVMPPC_GS_CLASS_GUESTWIDE; + if ((iden >= KVMPPC_GSE_HOSTWIDE_START) && + (iden <= KVMPPC_GSE_HOSTWIDE_END)) + return KVMPPC_GS_CLASS_HOSTWIDE; + if ((iden >= KVMPPC_GSE_META_START) && (iden <= KVMPPC_GSE_META_END)) return KVMPPC_GS_CLASS_META; @@ -118,6 +122,21 @@ static int kvmppc_gsid_type(u16 iden) int type = -1; switch (kvmppc_gsid_class(iden)) { + case KVMPPC_GS_CLASS_HOSTWIDE: + switch (iden) { + case KVMPPC_GSID_L0_GUEST_HEAP: + fallthrough; + case KVMPPC_GSID_L0_GUEST_HEAP_MAX: + fallthrough; + case KVMPPC_GSID_L0_GUEST_PGTABLE_SIZE: + fallthrough; + case KVMPPC_GSID_L0_GUEST_PGTABLE_SIZE_MAX: + fallthrough; + case KVMPPC_GSID_L0_GUEST_PGTABLE_RECLAIM: + type = KVMPPC_GSE_BE64; + break; + } + break; case KVMPPC_GS_CLASS_GUESTWIDE: switch (iden) { case KVMPPC_GSID_HOST_STATE_SIZE: @@ -187,6 +206,9 @@ unsigned long kvmppc_gsid_flags(u16 iden) case KVMPPC_GS_CLASS_GUESTWIDE: flags = KVMPPC_GS_FLAGS_WIDE; break; + case KVMPPC_GS_CLASS_HOSTWIDE: + flags = KVMPPC_GS_FLAGS_HOST_WIDE; + break; case KVMPPC_GS_CLASS_META: case KVMPPC_GS_CLASS_DWORD_REG: case KVMPPC_GS_CLASS_WORD_REG: @@ -310,6 +332,13 @@ static inline int kvmppc_gse_flatten_iden(u16 iden) bit += KVMPPC_GSE_GUESTWIDE_COUNT; + if (class == KVMPPC_GS_CLASS_HOSTWIDE) { + bit += iden - KVMPPC_GSE_HOSTWIDE_START; + return bit; + } + + bit += KVMPPC_GSE_HOSTWIDE_COUNT; + if (class == KVMPPC_GS_CLASS_META) { bit += iden - KVMPPC_GSE_META_START; return bit; @@ -356,6 +385,12 @@ static inline u16 kvmppc_gse_unflatten_iden(int bit) } bit -= KVMPPC_GSE_GUESTWIDE_COUNT; + if (bit < KVMPPC_GSE_HOSTWIDE_COUNT) { + iden = KVMPPC_GSE_HOSTWIDE_START + bit; + return iden; + } + bit -= KVMPPC_GSE_HOSTWIDE_COUNT; + if (bit < KVMPPC_GSE_META_COUNT) { iden = KVMPPC_GSE_META_START + bit; return iden; @@ -588,6 +623,8 @@ int kvmppc_gsb_send(struct kvmppc_gs_buff *gsb, unsigned long flags) if (flags & KVMPPC_GS_FLAGS_WIDE) hflags |= H_GUEST_FLAGS_WIDE; + if (flags & KVMPPC_GS_FLAGS_HOST_WIDE) + hflags |= H_GUEST_FLAGS_HOST_WIDE; rc = plpar_guest_set_state(hflags, gsb->guest_id, gsb->vcpu_id, __pa(gsb->hdr), gsb->capacity, &i); @@ -613,6 +650,8 @@ int kvmppc_gsb_recv(struct kvmppc_gs_buff *gsb, unsigned long flags) if (flags & KVMPPC_GS_FLAGS_WIDE) hflags |= H_GUEST_FLAGS_WIDE; + if (flags & KVMPPC_GS_FLAGS_HOST_WIDE) + hflags |= H_GUEST_FLAGS_HOST_WIDE; rc = plpar_guest_get_state(hflags, gsb->guest_id, gsb->vcpu_id, __pa(gsb->hdr), gsb->capacity, &i); diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index ce1d91eed231..153587741864 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -550,12 +550,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) #ifdef CONFIG_PPC_BOOK3S_64 case KVM_CAP_SPAPR_TCE: + fallthrough; case KVM_CAP_SPAPR_TCE_64: - r = 1; - break; case KVM_CAP_SPAPR_TCE_VFIO: - r = !!cpu_has_feature(CPU_FTR_HVMODE); - break; case KVM_CAP_PPC_RTAS: case KVM_CAP_PPC_FIXUP_HCALL: case KVM_CAP_PPC_ENABLE_HCALL: @@ -766,8 +763,8 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu) { int err; - hrtimer_init(&vcpu->arch.dec_timer, CLOCK_REALTIME, HRTIMER_MODE_ABS); - vcpu->arch.dec_timer.function = kvmppc_decrementer_wakeup; + hrtimer_setup(&vcpu->arch.dec_timer, kvmppc_decrementer_wakeup, CLOCK_REALTIME, + HRTIMER_MODE_ABS); #ifdef CONFIG_KVM_EXIT_TIMING mutex_init(&vcpu->arch.exit_timing_lock); diff --git a/arch/powerpc/kvm/test-guest-state-buffer.c b/arch/powerpc/kvm/test-guest-state-buffer.c index bfd225329a18..5ccca306997a 100644 --- a/arch/powerpc/kvm/test-guest-state-buffer.c +++ b/arch/powerpc/kvm/test-guest-state-buffer.c @@ -5,6 +5,7 @@ #include <kunit/test.h> #include <asm/guest-state-buffer.h> +#include <asm/kvm_ppc.h> static void test_creating_buffer(struct kunit *test) { @@ -141,6 +142,16 @@ static void test_gs_bitmap(struct kunit *test) i++; } + for (u16 iden = KVMPPC_GSID_L0_GUEST_HEAP; + iden <= KVMPPC_GSID_L0_GUEST_PGTABLE_RECLAIM; iden++) { + kvmppc_gsbm_set(&gsbm, iden); + kvmppc_gsbm_set(&gsbm1, iden); + KUNIT_EXPECT_TRUE(test, kvmppc_gsbm_test(&gsbm, iden)); + kvmppc_gsbm_clear(&gsbm, iden); + KUNIT_EXPECT_FALSE(test, kvmppc_gsbm_test(&gsbm, iden)); + i++; + } + for (u16 iden = KVMPPC_GSID_RUN_INPUT; iden <= KVMPPC_GSID_VPA; iden++) { kvmppc_gsbm_set(&gsbm, iden); @@ -309,12 +320,215 @@ static void test_gs_msg(struct kunit *test) kvmppc_gsm_free(gsm); } +/* Test data struct for hostwide/L0 counters */ +struct kvmppc_gs_msg_test_hostwide_data { + u64 guest_heap; + u64 guest_heap_max; + u64 guest_pgtable_size; + u64 guest_pgtable_size_max; + u64 guest_pgtable_reclaim; +}; + +static size_t test_hostwide_get_size(struct kvmppc_gs_msg *gsm) + +{ + size_t size = 0; + u16 ids[] = { + KVMPPC_GSID_L0_GUEST_HEAP, + KVMPPC_GSID_L0_GUEST_HEAP_MAX, + KVMPPC_GSID_L0_GUEST_PGTABLE_SIZE, + KVMPPC_GSID_L0_GUEST_PGTABLE_SIZE_MAX, + KVMPPC_GSID_L0_GUEST_PGTABLE_RECLAIM + }; + + for (int i = 0; i < ARRAY_SIZE(ids); i++) + size += kvmppc_gse_total_size(kvmppc_gsid_size(ids[i])); + return size; +} + +static int test_hostwide_fill_info(struct kvmppc_gs_buff *gsb, + struct kvmppc_gs_msg *gsm) +{ + struct kvmppc_gs_msg_test_hostwide_data *data = gsm->data; + + if (kvmppc_gsm_includes(gsm, KVMPPC_GSID_L0_GUEST_HEAP)) + kvmppc_gse_put_u64(gsb, KVMPPC_GSID_L0_GUEST_HEAP, + data->guest_heap); + if (kvmppc_gsm_includes(gsm, KVMPPC_GSID_L0_GUEST_HEAP_MAX)) + kvmppc_gse_put_u64(gsb, KVMPPC_GSID_L0_GUEST_HEAP_MAX, + data->guest_heap_max); + if (kvmppc_gsm_includes(gsm, KVMPPC_GSID_L0_GUEST_PGTABLE_SIZE)) + kvmppc_gse_put_u64(gsb, KVMPPC_GSID_L0_GUEST_PGTABLE_SIZE, + data->guest_pgtable_size); + if (kvmppc_gsm_includes(gsm, KVMPPC_GSID_L0_GUEST_PGTABLE_SIZE_MAX)) + kvmppc_gse_put_u64(gsb, KVMPPC_GSID_L0_GUEST_PGTABLE_SIZE_MAX, + data->guest_pgtable_size_max); + if (kvmppc_gsm_includes(gsm, KVMPPC_GSID_L0_GUEST_PGTABLE_RECLAIM)) + kvmppc_gse_put_u64(gsb, KVMPPC_GSID_L0_GUEST_PGTABLE_RECLAIM, + data->guest_pgtable_reclaim); + + return 0; +} + +static int test_hostwide_refresh_info(struct kvmppc_gs_msg *gsm, + struct kvmppc_gs_buff *gsb) +{ + struct kvmppc_gs_parser gsp = { 0 }; + struct kvmppc_gs_msg_test_hostwide_data *data = gsm->data; + struct kvmppc_gs_elem *gse; + int rc; + + rc = kvmppc_gse_parse(&gsp, gsb); + if (rc < 0) + return rc; + + gse = kvmppc_gsp_lookup(&gsp, KVMPPC_GSID_L0_GUEST_HEAP); + if (gse) + data->guest_heap = kvmppc_gse_get_u64(gse); + + gse = kvmppc_gsp_lookup(&gsp, KVMPPC_GSID_L0_GUEST_HEAP_MAX); + if (gse) + data->guest_heap_max = kvmppc_gse_get_u64(gse); + + gse = kvmppc_gsp_lookup(&gsp, KVMPPC_GSID_L0_GUEST_PGTABLE_SIZE); + if (gse) + data->guest_pgtable_size = kvmppc_gse_get_u64(gse); + + gse = kvmppc_gsp_lookup(&gsp, KVMPPC_GSID_L0_GUEST_PGTABLE_SIZE_MAX); + if (gse) + data->guest_pgtable_size_max = kvmppc_gse_get_u64(gse); + + gse = kvmppc_gsp_lookup(&gsp, KVMPPC_GSID_L0_GUEST_PGTABLE_RECLAIM); + if (gse) + data->guest_pgtable_reclaim = kvmppc_gse_get_u64(gse); + + return 0; +} + +static struct kvmppc_gs_msg_ops gs_msg_test_hostwide_ops = { + .get_size = test_hostwide_get_size, + .fill_info = test_hostwide_fill_info, + .refresh_info = test_hostwide_refresh_info, +}; + +static void test_gs_hostwide_msg(struct kunit *test) +{ + struct kvmppc_gs_msg_test_hostwide_data test_data = { + .guest_heap = 0xdeadbeef, + .guest_heap_max = ~0ULL, + .guest_pgtable_size = 0xff, + .guest_pgtable_size_max = 0xffffff, + .guest_pgtable_reclaim = 0xdeadbeef, + }; + struct kvmppc_gs_msg *gsm; + struct kvmppc_gs_buff *gsb; + + gsm = kvmppc_gsm_new(&gs_msg_test_hostwide_ops, &test_data, GSM_SEND, + GFP_KERNEL); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, gsm); + + gsb = kvmppc_gsb_new(kvmppc_gsm_size(gsm), 0, 0, GFP_KERNEL); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, gsb); + + kvmppc_gsm_include(gsm, KVMPPC_GSID_L0_GUEST_HEAP); + kvmppc_gsm_include(gsm, KVMPPC_GSID_L0_GUEST_HEAP_MAX); + kvmppc_gsm_include(gsm, KVMPPC_GSID_L0_GUEST_PGTABLE_SIZE); + kvmppc_gsm_include(gsm, KVMPPC_GSID_L0_GUEST_PGTABLE_SIZE_MAX); + kvmppc_gsm_include(gsm, KVMPPC_GSID_L0_GUEST_PGTABLE_RECLAIM); + + kvmppc_gsm_fill_info(gsm, gsb); + + memset(&test_data, 0, sizeof(test_data)); + + kvmppc_gsm_refresh_info(gsm, gsb); + KUNIT_EXPECT_EQ(test, test_data.guest_heap, 0xdeadbeef); + KUNIT_EXPECT_EQ(test, test_data.guest_heap_max, ~0ULL); + KUNIT_EXPECT_EQ(test, test_data.guest_pgtable_size, 0xff); + KUNIT_EXPECT_EQ(test, test_data.guest_pgtable_size_max, 0xffffff); + KUNIT_EXPECT_EQ(test, test_data.guest_pgtable_reclaim, 0xdeadbeef); + + kvmppc_gsm_free(gsm); +} + +/* Test if the H_GUEST_GET_STATE for hostwide counters works */ +static void test_gs_hostwide_counters(struct kunit *test) +{ + struct kvmppc_gs_msg_test_hostwide_data test_data; + struct kvmppc_gs_parser gsp = { 0 }; + + struct kvmppc_gs_msg *gsm; + struct kvmppc_gs_buff *gsb; + struct kvmppc_gs_elem *gse; + int rc; + + if (!kvmhv_on_pseries()) + kunit_skip(test, "This test need a kmv-hv guest"); + + gsm = kvmppc_gsm_new(&gs_msg_test_hostwide_ops, &test_data, GSM_SEND, + GFP_KERNEL); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, gsm); + + gsb = kvmppc_gsb_new(kvmppc_gsm_size(gsm), 0, 0, GFP_KERNEL); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, gsb); + + kvmppc_gsm_include(gsm, KVMPPC_GSID_L0_GUEST_HEAP); + + kvmppc_gsm_include(gsm, KVMPPC_GSID_L0_GUEST_HEAP_MAX); + + kvmppc_gsm_include(gsm, KVMPPC_GSID_L0_GUEST_PGTABLE_SIZE); + + kvmppc_gsm_include(gsm, KVMPPC_GSID_L0_GUEST_PGTABLE_SIZE_MAX); + + kvmppc_gsm_include(gsm, KVMPPC_GSID_L0_GUEST_PGTABLE_RECLAIM); + + kvmppc_gsm_fill_info(gsm, gsb); + + /* With HOST_WIDE flags guestid and vcpuid will be ignored */ + rc = kvmppc_gsb_recv(gsb, KVMPPC_GS_FLAGS_HOST_WIDE); + KUNIT_ASSERT_EQ(test, rc, 0); + + /* Parse the guest state buffer is successful */ + rc = kvmppc_gse_parse(&gsp, gsb); + KUNIT_ASSERT_EQ(test, rc, 0); + + /* Parse the GSB and get the counters */ + gse = kvmppc_gsp_lookup(&gsp, KVMPPC_GSID_L0_GUEST_HEAP); + KUNIT_ASSERT_NOT_NULL_MSG(test, gse, "L0 Heap counter missing"); + kunit_info(test, "Guest Heap Size=%llu bytes", + kvmppc_gse_get_u64(gse)); + + gse = kvmppc_gsp_lookup(&gsp, KVMPPC_GSID_L0_GUEST_HEAP_MAX); + KUNIT_ASSERT_NOT_NULL_MSG(test, gse, "L0 Heap counter max missing"); + kunit_info(test, "Guest Heap Size Max=%llu bytes", + kvmppc_gse_get_u64(gse)); + + gse = kvmppc_gsp_lookup(&gsp, KVMPPC_GSID_L0_GUEST_PGTABLE_SIZE); + KUNIT_ASSERT_NOT_NULL_MSG(test, gse, "L0 page-table size missing"); + kunit_info(test, "Guest Page-table Size=%llu bytes", + kvmppc_gse_get_u64(gse)); + + gse = kvmppc_gsp_lookup(&gsp, KVMPPC_GSID_L0_GUEST_PGTABLE_SIZE_MAX); + KUNIT_ASSERT_NOT_NULL_MSG(test, gse, "L0 page-table size-max missing"); + kunit_info(test, "Guest Page-table Size Max=%llu bytes", + kvmppc_gse_get_u64(gse)); + + gse = kvmppc_gsp_lookup(&gsp, KVMPPC_GSID_L0_GUEST_PGTABLE_RECLAIM); + KUNIT_ASSERT_NOT_NULL_MSG(test, gse, "L0 page-table reclaim size missing"); + kunit_info(test, "Guest Page-table Reclaim Size=%llu bytes", + kvmppc_gse_get_u64(gse)); + + kvmppc_gsm_free(gsm); + kvmppc_gsb_free(gsb); +} + static struct kunit_case guest_state_buffer_testcases[] = { KUNIT_CASE(test_creating_buffer), KUNIT_CASE(test_adding_element), KUNIT_CASE(test_gs_bitmap), KUNIT_CASE(test_gs_parsing), KUNIT_CASE(test_gs_msg), + KUNIT_CASE(test_gs_hostwide_msg), + KUNIT_CASE(test_gs_hostwide_counters), {} }; diff --git a/arch/powerpc/kvm/timing.h b/arch/powerpc/kvm/timing.h index 45817ab82bb4..14b0e23f601f 100644 --- a/arch/powerpc/kvm/timing.h +++ b/arch/powerpc/kvm/timing.h @@ -38,11 +38,7 @@ static inline void kvmppc_set_exit_type(struct kvm_vcpu *vcpu, int type) {} static inline void kvmppc_account_exit_stat(struct kvm_vcpu *vcpu, int type) { /* type has to be known at build time for optimization */ - - /* The BUILD_BUG_ON below breaks in funny ways, commented out - * for now ... -BenH BUILD_BUG_ON(!__builtin_constant_p(type)); - */ switch (type) { case EXT_INTR_EXITS: vcpu->stat.ext_intr_exits++; diff --git a/arch/powerpc/kvm/trace_book3s.h b/arch/powerpc/kvm/trace_book3s.h index 372a82fa2de3..9260ddbd557f 100644 --- a/arch/powerpc/kvm/trace_book3s.h +++ b/arch/powerpc/kvm/trace_book3s.h @@ -25,6 +25,7 @@ {0xe00, "H_DATA_STORAGE"}, \ {0xe20, "H_INST_STORAGE"}, \ {0xe40, "H_EMUL_ASSIST"}, \ + {0xea0, "H_VIRT"}, \ {0xf00, "PERFMON"}, \ {0xf20, "ALTIVEC"}, \ {0xf40, "VSX"} |