diff options
Diffstat (limited to 'arch/powerpc/kvm/book3s_hv_rm_mmu.c')
-rw-r--r-- | arch/powerpc/kvm/book3s_hv_rm_mmu.c | 111 |
1 files changed, 56 insertions, 55 deletions
diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c index 625407e4d3b0..b027a89737b6 100644 --- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c +++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c @@ -26,11 +26,14 @@ static void *real_vmalloc_addr(void *x) { unsigned long addr = (unsigned long) x; pte_t *p; - - p = find_linux_pte_or_hugepte(swapper_pg_dir, addr, NULL); + /* + * assume we don't have huge pages in vmalloc space... + * So don't worry about THP collapse/split. Called + * Only in realmode, hence won't need irq_save/restore. + */ + p = __find_linux_pte_or_hugepte(swapper_pg_dir, addr, NULL); if (!p || !pte_present(*p)) return NULL; - /* assume we don't have huge pages in vmalloc space... */ addr = (pte_pfn(*p) << PAGE_SHIFT) | (addr & ~PAGE_MASK); return __va(addr); } @@ -131,31 +134,6 @@ static void remove_revmap_chain(struct kvm *kvm, long pte_index, unlock_rmap(rmap); } -static pte_t lookup_linux_pte_and_update(pgd_t *pgdir, unsigned long hva, - int writing, unsigned long *pte_sizep) -{ - pte_t *ptep; - unsigned long ps = *pte_sizep; - unsigned int hugepage_shift; - - ptep = find_linux_pte_or_hugepte(pgdir, hva, &hugepage_shift); - if (!ptep) - return __pte(0); - if (hugepage_shift) - *pte_sizep = 1ul << hugepage_shift; - else - *pte_sizep = PAGE_SIZE; - if (ps > *pte_sizep) - return __pte(0); - return kvmppc_read_update_linux_pte(ptep, writing, hugepage_shift); -} - -static inline void unlock_hpte(__be64 *hpte, unsigned long hpte_v) -{ - asm volatile(PPC_RELEASE_BARRIER "" : : : "memory"); - hpte[0] = cpu_to_be64(hpte_v); -} - long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags, long pte_index, unsigned long pteh, unsigned long ptel, pgd_t *pgdir, bool realmode, unsigned long *pte_idx_ret) @@ -166,13 +144,13 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags, struct revmap_entry *rev; unsigned long g_ptel; struct kvm_memory_slot *memslot; - unsigned long pte_size; + unsigned hpage_shift; unsigned long is_io; unsigned long *rmap; - pte_t pte; + pte_t *ptep; unsigned int writing; unsigned long mmu_seq; - unsigned long rcbits; + unsigned long rcbits, irq_flags = 0; psize = hpte_page_size(pteh, ptel); if (!psize) @@ -208,22 +186,46 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags, /* Translate to host virtual address */ hva = __gfn_to_hva_memslot(memslot, gfn); - - /* Look up the Linux PTE for the backing page */ - pte_size = psize; - pte = lookup_linux_pte_and_update(pgdir, hva, writing, &pte_size); - if (pte_present(pte) && !pte_protnone(pte)) { - if (writing && !pte_write(pte)) - /* make the actual HPTE be read-only */ - ptel = hpte_make_readonly(ptel); - is_io = hpte_cache_bits(pte_val(pte)); - pa = pte_pfn(pte) << PAGE_SHIFT; - pa |= hva & (pte_size - 1); - pa |= gpa & ~PAGE_MASK; + /* + * If we had a page table table change after lookup, we would + * retry via mmu_notifier_retry. + */ + if (realmode) + ptep = __find_linux_pte_or_hugepte(pgdir, hva, &hpage_shift); + else { + local_irq_save(irq_flags); + ptep = find_linux_pte_or_hugepte(pgdir, hva, &hpage_shift); } + if (ptep) { + pte_t pte; + unsigned int host_pte_size; - if (pte_size < psize) - return H_PARAMETER; + if (hpage_shift) + host_pte_size = 1ul << hpage_shift; + else + host_pte_size = PAGE_SIZE; + /* + * We should always find the guest page size + * to <= host page size, if host is using hugepage + */ + if (host_pte_size < psize) { + if (!realmode) + local_irq_restore(flags); + return H_PARAMETER; + } + pte = kvmppc_read_update_linux_pte(ptep, writing); + if (pte_present(pte) && !pte_protnone(pte)) { + if (writing && !pte_write(pte)) + /* make the actual HPTE be read-only */ + ptel = hpte_make_readonly(ptel); + is_io = hpte_cache_bits(pte_val(pte)); + pa = pte_pfn(pte) << PAGE_SHIFT; + pa |= hva & (host_pte_size - 1); + pa |= gpa & ~PAGE_MASK; + } + } + if (!realmode) + local_irq_restore(irq_flags); ptel &= ~(HPTE_R_PP0 - psize); ptel |= pa; @@ -271,10 +273,10 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags, u64 pte; while (!try_lock_hpte(hpte, HPTE_V_HVLOCK)) cpu_relax(); - pte = be64_to_cpu(*hpte); + pte = be64_to_cpu(hpte[0]); if (!(pte & (HPTE_V_VALID | HPTE_V_ABSENT))) break; - *hpte &= ~cpu_to_be64(HPTE_V_HVLOCK); + __unlock_hpte(hpte, pte); hpte += 2; } if (i == 8) @@ -290,9 +292,9 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags, while (!try_lock_hpte(hpte, HPTE_V_HVLOCK)) cpu_relax(); - pte = be64_to_cpu(*hpte); + pte = be64_to_cpu(hpte[0]); if (pte & (HPTE_V_VALID | HPTE_V_ABSENT)) { - *hpte &= ~cpu_to_be64(HPTE_V_HVLOCK); + __unlock_hpte(hpte, pte); return H_PTEG_FULL; } } @@ -331,7 +333,7 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags, /* Write the first HPTE dword, unlocking the HPTE and making it valid */ eieio(); - hpte[0] = cpu_to_be64(pteh); + __unlock_hpte(hpte, pteh); asm volatile("ptesync" : : : "memory"); *pte_idx_ret = pte_index; @@ -412,7 +414,7 @@ long kvmppc_do_h_remove(struct kvm *kvm, unsigned long flags, if ((pte & (HPTE_V_ABSENT | HPTE_V_VALID)) == 0 || ((flags & H_AVPN) && (pte & ~0x7fUL) != avpn) || ((flags & H_ANDCOND) && (pte & avpn) != 0)) { - hpte[0] &= ~cpu_to_be64(HPTE_V_HVLOCK); + __unlock_hpte(hpte, pte); return H_NOT_FOUND; } @@ -548,7 +550,7 @@ long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu) be64_to_cpu(hp[0]), be64_to_cpu(hp[1])); rcbits = rev->guest_rpte & (HPTE_R_R|HPTE_R_C); args[j] |= rcbits << (56 - 5); - hp[0] = 0; + __unlock_hpte(hp, 0); } } @@ -574,7 +576,7 @@ long kvmppc_h_protect(struct kvm_vcpu *vcpu, unsigned long flags, pte = be64_to_cpu(hpte[0]); if ((pte & (HPTE_V_ABSENT | HPTE_V_VALID)) == 0 || ((flags & H_AVPN) && (pte & ~0x7fUL) != avpn)) { - hpte[0] &= ~cpu_to_be64(HPTE_V_HVLOCK); + __unlock_hpte(hpte, pte); return H_NOT_FOUND; } @@ -755,8 +757,7 @@ long kvmppc_hv_find_lock_hpte(struct kvm *kvm, gva_t eaddr, unsigned long slb_v, /* Return with the HPTE still locked */ return (hash << 3) + (i >> 1); - /* Unlock and move on */ - hpte[i] = cpu_to_be64(v); + __unlock_hpte(&hpte[i], v); } if (val & HPTE_V_SECONDARY) |