diff options
Diffstat (limited to 'arch/arm64/kvm/mmu.c')
-rw-r--r-- | arch/arm64/kvm/mmu.c | 311 |
1 files changed, 178 insertions, 133 deletions
diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index 7a7ddc4558a7..0121ef2c7c8d 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -55,12 +55,13 @@ static bool memslot_is_logging(struct kvm_memory_slot *memslot) */ void kvm_flush_remote_tlbs(struct kvm *kvm) { - kvm_call_hyp(__kvm_tlb_flush_vmid, kvm); + kvm_call_hyp(__kvm_tlb_flush_vmid, &kvm->arch.mmu); } -static void kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa) +static void kvm_tlb_flush_vmid_ipa(struct kvm_s2_mmu *mmu, phys_addr_t ipa, + int level) { - kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, kvm, ipa); + kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, mmu, ipa, level); } /* @@ -90,74 +91,80 @@ static bool kvm_is_device_pfn(unsigned long pfn) /** * stage2_dissolve_pmd() - clear and flush huge PMD entry - * @kvm: pointer to kvm structure. + * @mmu: pointer to mmu structure to operate on * @addr: IPA * @pmd: pmd pointer for IPA * * Function clears a PMD entry, flushes addr 1st and 2nd stage TLBs. */ -static void stage2_dissolve_pmd(struct kvm *kvm, phys_addr_t addr, pmd_t *pmd) +static void stage2_dissolve_pmd(struct kvm_s2_mmu *mmu, phys_addr_t addr, pmd_t *pmd) { if (!pmd_thp_or_huge(*pmd)) return; pmd_clear(pmd); - kvm_tlb_flush_vmid_ipa(kvm, addr); + kvm_tlb_flush_vmid_ipa(mmu, addr, S2_PMD_LEVEL); put_page(virt_to_page(pmd)); } /** * stage2_dissolve_pud() - clear and flush huge PUD entry - * @kvm: pointer to kvm structure. + * @mmu: pointer to mmu structure to operate on * @addr: IPA * @pud: pud pointer for IPA * * Function clears a PUD entry, flushes addr 1st and 2nd stage TLBs. */ -static void stage2_dissolve_pud(struct kvm *kvm, phys_addr_t addr, pud_t *pudp) +static void stage2_dissolve_pud(struct kvm_s2_mmu *mmu, phys_addr_t addr, pud_t *pudp) { + struct kvm *kvm = mmu->kvm; + if (!stage2_pud_huge(kvm, *pudp)) return; stage2_pud_clear(kvm, pudp); - kvm_tlb_flush_vmid_ipa(kvm, addr); + kvm_tlb_flush_vmid_ipa(mmu, addr, S2_PUD_LEVEL); put_page(virt_to_page(pudp)); } -static void clear_stage2_pgd_entry(struct kvm *kvm, pgd_t *pgd, phys_addr_t addr) +static void clear_stage2_pgd_entry(struct kvm_s2_mmu *mmu, pgd_t *pgd, phys_addr_t addr) { + struct kvm *kvm = mmu->kvm; p4d_t *p4d_table __maybe_unused = stage2_p4d_offset(kvm, pgd, 0UL); stage2_pgd_clear(kvm, pgd); - kvm_tlb_flush_vmid_ipa(kvm, addr); + kvm_tlb_flush_vmid_ipa(mmu, addr, S2_NO_LEVEL_HINT); stage2_p4d_free(kvm, p4d_table); put_page(virt_to_page(pgd)); } -static void clear_stage2_p4d_entry(struct kvm *kvm, p4d_t *p4d, phys_addr_t addr) +static void clear_stage2_p4d_entry(struct kvm_s2_mmu *mmu, p4d_t *p4d, phys_addr_t addr) { + struct kvm *kvm = mmu->kvm; pud_t *pud_table __maybe_unused = stage2_pud_offset(kvm, p4d, 0); stage2_p4d_clear(kvm, p4d); - kvm_tlb_flush_vmid_ipa(kvm, addr); + kvm_tlb_flush_vmid_ipa(mmu, addr, S2_NO_LEVEL_HINT); stage2_pud_free(kvm, pud_table); put_page(virt_to_page(p4d)); } -static void clear_stage2_pud_entry(struct kvm *kvm, pud_t *pud, phys_addr_t addr) +static void clear_stage2_pud_entry(struct kvm_s2_mmu *mmu, pud_t *pud, phys_addr_t addr) { + struct kvm *kvm = mmu->kvm; pmd_t *pmd_table __maybe_unused = stage2_pmd_offset(kvm, pud, 0); + VM_BUG_ON(stage2_pud_huge(kvm, *pud)); stage2_pud_clear(kvm, pud); - kvm_tlb_flush_vmid_ipa(kvm, addr); + kvm_tlb_flush_vmid_ipa(mmu, addr, S2_NO_LEVEL_HINT); stage2_pmd_free(kvm, pmd_table); put_page(virt_to_page(pud)); } -static void clear_stage2_pmd_entry(struct kvm *kvm, pmd_t *pmd, phys_addr_t addr) +static void clear_stage2_pmd_entry(struct kvm_s2_mmu *mmu, pmd_t *pmd, phys_addr_t addr) { pte_t *pte_table = pte_offset_kernel(pmd, 0); VM_BUG_ON(pmd_thp_or_huge(*pmd)); pmd_clear(pmd); - kvm_tlb_flush_vmid_ipa(kvm, addr); + kvm_tlb_flush_vmid_ipa(mmu, addr, S2_NO_LEVEL_HINT); free_page((unsigned long)pte_table); put_page(virt_to_page(pmd)); } @@ -223,7 +230,7 @@ static inline void kvm_pgd_populate(pgd_t *pgdp, p4d_t *p4dp) * we then fully enforce cacheability of RAM, no matter what the guest * does. */ -static void unmap_stage2_ptes(struct kvm *kvm, pmd_t *pmd, +static void unmap_stage2_ptes(struct kvm_s2_mmu *mmu, pmd_t *pmd, phys_addr_t addr, phys_addr_t end) { phys_addr_t start_addr = addr; @@ -235,7 +242,7 @@ static void unmap_stage2_ptes(struct kvm *kvm, pmd_t *pmd, pte_t old_pte = *pte; kvm_set_pte(pte, __pte(0)); - kvm_tlb_flush_vmid_ipa(kvm, addr); + kvm_tlb_flush_vmid_ipa(mmu, addr, S2_PTE_LEVEL); /* No need to invalidate the cache for device mappings */ if (!kvm_is_device_pfn(pte_pfn(old_pte))) @@ -245,13 +252,14 @@ static void unmap_stage2_ptes(struct kvm *kvm, pmd_t *pmd, } } while (pte++, addr += PAGE_SIZE, addr != end); - if (stage2_pte_table_empty(kvm, start_pte)) - clear_stage2_pmd_entry(kvm, pmd, start_addr); + if (stage2_pte_table_empty(mmu->kvm, start_pte)) + clear_stage2_pmd_entry(mmu, pmd, start_addr); } -static void unmap_stage2_pmds(struct kvm *kvm, pud_t *pud, +static void unmap_stage2_pmds(struct kvm_s2_mmu *mmu, pud_t *pud, phys_addr_t addr, phys_addr_t end) { + struct kvm *kvm = mmu->kvm; phys_addr_t next, start_addr = addr; pmd_t *pmd, *start_pmd; @@ -263,24 +271,25 @@ static void unmap_stage2_pmds(struct kvm *kvm, pud_t *pud, pmd_t old_pmd = *pmd; pmd_clear(pmd); - kvm_tlb_flush_vmid_ipa(kvm, addr); + kvm_tlb_flush_vmid_ipa(mmu, addr, S2_PMD_LEVEL); kvm_flush_dcache_pmd(old_pmd); put_page(virt_to_page(pmd)); } else { - unmap_stage2_ptes(kvm, pmd, addr, next); + unmap_stage2_ptes(mmu, pmd, addr, next); } } } while (pmd++, addr = next, addr != end); if (stage2_pmd_table_empty(kvm, start_pmd)) - clear_stage2_pud_entry(kvm, pud, start_addr); + clear_stage2_pud_entry(mmu, pud, start_addr); } -static void unmap_stage2_puds(struct kvm *kvm, p4d_t *p4d, +static void unmap_stage2_puds(struct kvm_s2_mmu *mmu, p4d_t *p4d, phys_addr_t addr, phys_addr_t end) { + struct kvm *kvm = mmu->kvm; phys_addr_t next, start_addr = addr; pud_t *pud, *start_pud; @@ -292,22 +301,23 @@ static void unmap_stage2_puds(struct kvm *kvm, p4d_t *p4d, pud_t old_pud = *pud; stage2_pud_clear(kvm, pud); - kvm_tlb_flush_vmid_ipa(kvm, addr); + kvm_tlb_flush_vmid_ipa(mmu, addr, S2_PUD_LEVEL); kvm_flush_dcache_pud(old_pud); put_page(virt_to_page(pud)); } else { - unmap_stage2_pmds(kvm, pud, addr, next); + unmap_stage2_pmds(mmu, pud, addr, next); } } } while (pud++, addr = next, addr != end); if (stage2_pud_table_empty(kvm, start_pud)) - clear_stage2_p4d_entry(kvm, p4d, start_addr); + clear_stage2_p4d_entry(mmu, p4d, start_addr); } -static void unmap_stage2_p4ds(struct kvm *kvm, pgd_t *pgd, +static void unmap_stage2_p4ds(struct kvm_s2_mmu *mmu, pgd_t *pgd, phys_addr_t addr, phys_addr_t end) { + struct kvm *kvm = mmu->kvm; phys_addr_t next, start_addr = addr; p4d_t *p4d, *start_p4d; @@ -315,11 +325,11 @@ static void unmap_stage2_p4ds(struct kvm *kvm, pgd_t *pgd, do { next = stage2_p4d_addr_end(kvm, addr, end); if (!stage2_p4d_none(kvm, *p4d)) - unmap_stage2_puds(kvm, p4d, addr, next); + unmap_stage2_puds(mmu, p4d, addr, next); } while (p4d++, addr = next, addr != end); if (stage2_p4d_table_empty(kvm, start_p4d)) - clear_stage2_pgd_entry(kvm, pgd, start_addr); + clear_stage2_pgd_entry(mmu, pgd, start_addr); } /** @@ -333,8 +343,9 @@ static void unmap_stage2_p4ds(struct kvm *kvm, pgd_t *pgd, * destroying the VM), otherwise another faulting VCPU may come in and mess * with things behind our backs. */ -static void unmap_stage2_range(struct kvm *kvm, phys_addr_t start, u64 size) +static void unmap_stage2_range(struct kvm_s2_mmu *mmu, phys_addr_t start, u64 size) { + struct kvm *kvm = mmu->kvm; pgd_t *pgd; phys_addr_t addr = start, end = start + size; phys_addr_t next; @@ -342,18 +353,18 @@ static void unmap_stage2_range(struct kvm *kvm, phys_addr_t start, u64 size) assert_spin_locked(&kvm->mmu_lock); WARN_ON(size & ~PAGE_MASK); - pgd = kvm->arch.pgd + stage2_pgd_index(kvm, addr); + pgd = mmu->pgd + stage2_pgd_index(kvm, addr); do { /* * Make sure the page table is still active, as another thread * could have possibly freed the page table, while we released * the lock. */ - if (!READ_ONCE(kvm->arch.pgd)) + if (!READ_ONCE(mmu->pgd)) break; next = stage2_pgd_addr_end(kvm, addr, end); if (!stage2_pgd_none(kvm, *pgd)) - unmap_stage2_p4ds(kvm, pgd, addr, next); + unmap_stage2_p4ds(mmu, pgd, addr, next); /* * If the range is too large, release the kvm->mmu_lock * to prevent starvation and lockup detector warnings. @@ -363,7 +374,7 @@ static void unmap_stage2_range(struct kvm *kvm, phys_addr_t start, u64 size) } while (pgd++, addr = next, addr != end); } -static void stage2_flush_ptes(struct kvm *kvm, pmd_t *pmd, +static void stage2_flush_ptes(struct kvm_s2_mmu *mmu, pmd_t *pmd, phys_addr_t addr, phys_addr_t end) { pte_t *pte; @@ -375,9 +386,10 @@ static void stage2_flush_ptes(struct kvm *kvm, pmd_t *pmd, } while (pte++, addr += PAGE_SIZE, addr != end); } -static void stage2_flush_pmds(struct kvm *kvm, pud_t *pud, +static void stage2_flush_pmds(struct kvm_s2_mmu *mmu, pud_t *pud, phys_addr_t addr, phys_addr_t end) { + struct kvm *kvm = mmu->kvm; pmd_t *pmd; phys_addr_t next; @@ -388,14 +400,15 @@ static void stage2_flush_pmds(struct kvm *kvm, pud_t *pud, if (pmd_thp_or_huge(*pmd)) kvm_flush_dcache_pmd(*pmd); else - stage2_flush_ptes(kvm, pmd, addr, next); + stage2_flush_ptes(mmu, pmd, addr, next); } } while (pmd++, addr = next, addr != end); } -static void stage2_flush_puds(struct kvm *kvm, p4d_t *p4d, +static void stage2_flush_puds(struct kvm_s2_mmu *mmu, p4d_t *p4d, phys_addr_t addr, phys_addr_t end) { + struct kvm *kvm = mmu->kvm; pud_t *pud; phys_addr_t next; @@ -406,14 +419,15 @@ static void stage2_flush_puds(struct kvm *kvm, p4d_t *p4d, if (stage2_pud_huge(kvm, *pud)) kvm_flush_dcache_pud(*pud); else - stage2_flush_pmds(kvm, pud, addr, next); + stage2_flush_pmds(mmu, pud, addr, next); } } while (pud++, addr = next, addr != end); } -static void stage2_flush_p4ds(struct kvm *kvm, pgd_t *pgd, +static void stage2_flush_p4ds(struct kvm_s2_mmu *mmu, pgd_t *pgd, phys_addr_t addr, phys_addr_t end) { + struct kvm *kvm = mmu->kvm; p4d_t *p4d; phys_addr_t next; @@ -421,23 +435,24 @@ static void stage2_flush_p4ds(struct kvm *kvm, pgd_t *pgd, do { next = stage2_p4d_addr_end(kvm, addr, end); if (!stage2_p4d_none(kvm, *p4d)) - stage2_flush_puds(kvm, p4d, addr, next); + stage2_flush_puds(mmu, p4d, addr, next); } while (p4d++, addr = next, addr != end); } static void stage2_flush_memslot(struct kvm *kvm, struct kvm_memory_slot *memslot) { + struct kvm_s2_mmu *mmu = &kvm->arch.mmu; phys_addr_t addr = memslot->base_gfn << PAGE_SHIFT; phys_addr_t end = addr + PAGE_SIZE * memslot->npages; phys_addr_t next; pgd_t *pgd; - pgd = kvm->arch.pgd + stage2_pgd_index(kvm, addr); + pgd = mmu->pgd + stage2_pgd_index(kvm, addr); do { next = stage2_pgd_addr_end(kvm, addr, end); if (!stage2_pgd_none(kvm, *pgd)) - stage2_flush_p4ds(kvm, pgd, addr, next); + stage2_flush_p4ds(mmu, pgd, addr, next); if (next != end) cond_resched_lock(&kvm->mmu_lock); @@ -964,21 +979,23 @@ int create_hyp_exec_mappings(phys_addr_t phys_addr, size_t size, } /** - * kvm_alloc_stage2_pgd - allocate level-1 table for stage-2 translation. - * @kvm: The KVM struct pointer for the VM. + * kvm_init_stage2_mmu - Initialise a S2 MMU strucrure + * @kvm: The pointer to the KVM structure + * @mmu: The pointer to the s2 MMU structure * * Allocates only the stage-2 HW PGD level table(s) of size defined by - * stage2_pgd_size(kvm). + * stage2_pgd_size(mmu->kvm). * * Note we don't need locking here as this is only called when the VM is * created, which can only be done once. */ -int kvm_alloc_stage2_pgd(struct kvm *kvm) +int kvm_init_stage2_mmu(struct kvm *kvm, struct kvm_s2_mmu *mmu) { phys_addr_t pgd_phys; pgd_t *pgd; + int cpu; - if (kvm->arch.pgd != NULL) { + if (mmu->pgd != NULL) { kvm_err("kvm_arch already initialized?\n"); return -EINVAL; } @@ -992,8 +1009,20 @@ int kvm_alloc_stage2_pgd(struct kvm *kvm) if (WARN_ON(pgd_phys & ~kvm_vttbr_baddr_mask(kvm))) return -EINVAL; - kvm->arch.pgd = pgd; - kvm->arch.pgd_phys = pgd_phys; + mmu->last_vcpu_ran = alloc_percpu(typeof(*mmu->last_vcpu_ran)); + if (!mmu->last_vcpu_ran) { + free_pages_exact(pgd, stage2_pgd_size(kvm)); + return -ENOMEM; + } + + for_each_possible_cpu(cpu) + *per_cpu_ptr(mmu->last_vcpu_ran, cpu) = -1; + + mmu->kvm = kvm; + mmu->pgd = pgd; + mmu->pgd_phys = pgd_phys; + mmu->vmid.vmid_gen = 0; + return 0; } @@ -1032,7 +1061,7 @@ static void stage2_unmap_memslot(struct kvm *kvm, if (!(vma->vm_flags & VM_PFNMAP)) { gpa_t gpa = addr + (vm_start - memslot->userspace_addr); - unmap_stage2_range(kvm, gpa, vm_end - vm_start); + unmap_stage2_range(&kvm->arch.mmu, gpa, vm_end - vm_start); } hva = vm_end; } while (hva < reg_end); @@ -1064,39 +1093,34 @@ void stage2_unmap_vm(struct kvm *kvm) srcu_read_unlock(&kvm->srcu, idx); } -/** - * kvm_free_stage2_pgd - free all stage-2 tables - * @kvm: The KVM struct pointer for the VM. - * - * Walks the level-1 page table pointed to by kvm->arch.pgd and frees all - * underlying level-2 and level-3 tables before freeing the actual level-1 table - * and setting the struct pointer to NULL. - */ -void kvm_free_stage2_pgd(struct kvm *kvm) +void kvm_free_stage2_pgd(struct kvm_s2_mmu *mmu) { + struct kvm *kvm = mmu->kvm; void *pgd = NULL; spin_lock(&kvm->mmu_lock); - if (kvm->arch.pgd) { - unmap_stage2_range(kvm, 0, kvm_phys_size(kvm)); - pgd = READ_ONCE(kvm->arch.pgd); - kvm->arch.pgd = NULL; - kvm->arch.pgd_phys = 0; + if (mmu->pgd) { + unmap_stage2_range(mmu, 0, kvm_phys_size(kvm)); + pgd = READ_ONCE(mmu->pgd); + mmu->pgd = NULL; } spin_unlock(&kvm->mmu_lock); /* Free the HW pgd, one page at a time */ - if (pgd) + if (pgd) { free_pages_exact(pgd, stage2_pgd_size(kvm)); + free_percpu(mmu->last_vcpu_ran); + } } -static p4d_t *stage2_get_p4d(struct kvm *kvm, struct kvm_mmu_memory_cache *cache, +static p4d_t *stage2_get_p4d(struct kvm_s2_mmu *mmu, struct kvm_mmu_memory_cache *cache, phys_addr_t addr) { + struct kvm *kvm = mmu->kvm; pgd_t *pgd; p4d_t *p4d; - pgd = kvm->arch.pgd + stage2_pgd_index(kvm, addr); + pgd = mmu->pgd + stage2_pgd_index(kvm, addr); if (stage2_pgd_none(kvm, *pgd)) { if (!cache) return NULL; @@ -1108,13 +1132,14 @@ static p4d_t *stage2_get_p4d(struct kvm *kvm, struct kvm_mmu_memory_cache *cache return stage2_p4d_offset(kvm, pgd, addr); } -static pud_t *stage2_get_pud(struct kvm *kvm, struct kvm_mmu_memory_cache *cache, +static pud_t *stage2_get_pud(struct kvm_s2_mmu *mmu, struct kvm_mmu_memory_cache *cache, phys_addr_t addr) { + struct kvm *kvm = mmu->kvm; p4d_t *p4d; pud_t *pud; - p4d = stage2_get_p4d(kvm, cache, addr); + p4d = stage2_get_p4d(mmu, cache, addr); if (stage2_p4d_none(kvm, *p4d)) { if (!cache) return NULL; @@ -1126,13 +1151,14 @@ static pud_t *stage2_get_pud(struct kvm *kvm, struct kvm_mmu_memory_cache *cache return stage2_pud_offset(kvm, p4d, addr); } -static pmd_t *stage2_get_pmd(struct kvm *kvm, struct kvm_mmu_memory_cache *cache, +static pmd_t *stage2_get_pmd(struct kvm_s2_mmu *mmu, struct kvm_mmu_memory_cache *cache, phys_addr_t addr) { + struct kvm *kvm = mmu->kvm; pud_t *pud; pmd_t *pmd; - pud = stage2_get_pud(kvm, cache, addr); + pud = stage2_get_pud(mmu, cache, addr); if (!pud || stage2_pud_huge(kvm, *pud)) return NULL; @@ -1147,13 +1173,14 @@ static pmd_t *stage2_get_pmd(struct kvm *kvm, struct kvm_mmu_memory_cache *cache return stage2_pmd_offset(kvm, pud, addr); } -static int stage2_set_pmd_huge(struct kvm *kvm, struct kvm_mmu_memory_cache - *cache, phys_addr_t addr, const pmd_t *new_pmd) +static int stage2_set_pmd_huge(struct kvm_s2_mmu *mmu, + struct kvm_mmu_memory_cache *cache, + phys_addr_t addr, const pmd_t *new_pmd) { pmd_t *pmd, old_pmd; retry: - pmd = stage2_get_pmd(kvm, cache, addr); + pmd = stage2_get_pmd(mmu, cache, addr); VM_BUG_ON(!pmd); old_pmd = *pmd; @@ -1186,7 +1213,7 @@ retry: * get handled accordingly. */ if (!pmd_thp_or_huge(old_pmd)) { - unmap_stage2_range(kvm, addr & S2_PMD_MASK, S2_PMD_SIZE); + unmap_stage2_range(mmu, addr & S2_PMD_MASK, S2_PMD_SIZE); goto retry; } /* @@ -1202,7 +1229,7 @@ retry: */ WARN_ON_ONCE(pmd_pfn(old_pmd) != pmd_pfn(*new_pmd)); pmd_clear(pmd); - kvm_tlb_flush_vmid_ipa(kvm, addr); + kvm_tlb_flush_vmid_ipa(mmu, addr, S2_PMD_LEVEL); } else { get_page(virt_to_page(pmd)); } @@ -1211,13 +1238,15 @@ retry: return 0; } -static int stage2_set_pud_huge(struct kvm *kvm, struct kvm_mmu_memory_cache *cache, +static int stage2_set_pud_huge(struct kvm_s2_mmu *mmu, + struct kvm_mmu_memory_cache *cache, phys_addr_t addr, const pud_t *new_pudp) { + struct kvm *kvm = mmu->kvm; pud_t *pudp, old_pud; retry: - pudp = stage2_get_pud(kvm, cache, addr); + pudp = stage2_get_pud(mmu, cache, addr); VM_BUG_ON(!pudp); old_pud = *pudp; @@ -1236,13 +1265,13 @@ retry: * the range for this block and retry. */ if (!stage2_pud_huge(kvm, old_pud)) { - unmap_stage2_range(kvm, addr & S2_PUD_MASK, S2_PUD_SIZE); + unmap_stage2_range(mmu, addr & S2_PUD_MASK, S2_PUD_SIZE); goto retry; } WARN_ON_ONCE(kvm_pud_pfn(old_pud) != kvm_pud_pfn(*new_pudp)); stage2_pud_clear(kvm, pudp); - kvm_tlb_flush_vmid_ipa(kvm, addr); + kvm_tlb_flush_vmid_ipa(mmu, addr, S2_PUD_LEVEL); } else { get_page(virt_to_page(pudp)); } @@ -1257,9 +1286,10 @@ retry: * leaf-entry is returned in the appropriate level variable - pudpp, * pmdpp, ptepp. */ -static bool stage2_get_leaf_entry(struct kvm *kvm, phys_addr_t addr, +static bool stage2_get_leaf_entry(struct kvm_s2_mmu *mmu, phys_addr_t addr, pud_t **pudpp, pmd_t **pmdpp, pte_t **ptepp) { + struct kvm *kvm = mmu->kvm; pud_t *pudp; pmd_t *pmdp; pte_t *ptep; @@ -1268,7 +1298,7 @@ static bool stage2_get_leaf_entry(struct kvm *kvm, phys_addr_t addr, *pmdpp = NULL; *ptepp = NULL; - pudp = stage2_get_pud(kvm, NULL, addr); + pudp = stage2_get_pud(mmu, NULL, addr); if (!pudp || stage2_pud_none(kvm, *pudp) || !stage2_pud_present(kvm, *pudp)) return false; @@ -1294,14 +1324,14 @@ static bool stage2_get_leaf_entry(struct kvm *kvm, phys_addr_t addr, return true; } -static bool stage2_is_exec(struct kvm *kvm, phys_addr_t addr, unsigned long sz) +static bool stage2_is_exec(struct kvm_s2_mmu *mmu, phys_addr_t addr, unsigned long sz) { pud_t *pudp; pmd_t *pmdp; pte_t *ptep; bool found; - found = stage2_get_leaf_entry(kvm, addr, &pudp, &pmdp, &ptep); + found = stage2_get_leaf_entry(mmu, addr, &pudp, &pmdp, &ptep); if (!found) return false; @@ -1313,10 +1343,12 @@ static bool stage2_is_exec(struct kvm *kvm, phys_addr_t addr, unsigned long sz) return sz == PAGE_SIZE && kvm_s2pte_exec(ptep); } -static int stage2_set_pte(struct kvm *kvm, struct kvm_mmu_memory_cache *cache, +static int stage2_set_pte(struct kvm_s2_mmu *mmu, + struct kvm_mmu_memory_cache *cache, phys_addr_t addr, const pte_t *new_pte, unsigned long flags) { + struct kvm *kvm = mmu->kvm; pud_t *pud; pmd_t *pmd; pte_t *pte, old_pte; @@ -1326,7 +1358,7 @@ static int stage2_set_pte(struct kvm *kvm, struct kvm_mmu_memory_cache *cache, VM_BUG_ON(logging_active && !cache); /* Create stage-2 page table mapping - Levels 0 and 1 */ - pud = stage2_get_pud(kvm, cache, addr); + pud = stage2_get_pud(mmu, cache, addr); if (!pud) { /* * Ignore calls from kvm_set_spte_hva for unallocated @@ -1340,7 +1372,7 @@ static int stage2_set_pte(struct kvm *kvm, struct kvm_mmu_memory_cache *cache, * on to allocate page. */ if (logging_active) - stage2_dissolve_pud(kvm, addr, pud); + stage2_dissolve_pud(mmu, addr, pud); if (stage2_pud_none(kvm, *pud)) { if (!cache) @@ -1364,7 +1396,7 @@ static int stage2_set_pte(struct kvm *kvm, struct kvm_mmu_memory_cache *cache, * allocate page. */ if (logging_active) - stage2_dissolve_pmd(kvm, addr, pmd); + stage2_dissolve_pmd(mmu, addr, pmd); /* Create stage-2 page mappings - Level 2 */ if (pmd_none(*pmd)) { @@ -1388,7 +1420,7 @@ static int stage2_set_pte(struct kvm *kvm, struct kvm_mmu_memory_cache *cache, return 0; kvm_set_pte(pte, __pte(0)); - kvm_tlb_flush_vmid_ipa(kvm, addr); + kvm_tlb_flush_vmid_ipa(mmu, addr, S2_PTE_LEVEL); } else { get_page(virt_to_page(pte)); } @@ -1453,8 +1485,8 @@ int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa, if (ret) goto out; spin_lock(&kvm->mmu_lock); - ret = stage2_set_pte(kvm, &cache, addr, &pte, - KVM_S2PTE_FLAG_IS_IOMAP); + ret = stage2_set_pte(&kvm->arch.mmu, &cache, addr, &pte, + KVM_S2PTE_FLAG_IS_IOMAP); spin_unlock(&kvm->mmu_lock); if (ret) goto out; @@ -1493,9 +1525,10 @@ static void stage2_wp_ptes(pmd_t *pmd, phys_addr_t addr, phys_addr_t end) * @addr: range start address * @end: range end address */ -static void stage2_wp_pmds(struct kvm *kvm, pud_t *pud, +static void stage2_wp_pmds(struct kvm_s2_mmu *mmu, pud_t *pud, phys_addr_t addr, phys_addr_t end) { + struct kvm *kvm = mmu->kvm; pmd_t *pmd; phys_addr_t next; @@ -1516,13 +1549,14 @@ static void stage2_wp_pmds(struct kvm *kvm, pud_t *pud, /** * stage2_wp_puds - write protect P4D range - * @pgd: pointer to pgd entry + * @p4d: pointer to p4d entry * @addr: range start address * @end: range end address */ -static void stage2_wp_puds(struct kvm *kvm, p4d_t *p4d, +static void stage2_wp_puds(struct kvm_s2_mmu *mmu, p4d_t *p4d, phys_addr_t addr, phys_addr_t end) { + struct kvm *kvm = mmu->kvm; pud_t *pud; phys_addr_t next; @@ -1534,7 +1568,7 @@ static void stage2_wp_puds(struct kvm *kvm, p4d_t *p4d, if (!kvm_s2pud_readonly(pud)) kvm_set_s2pud_readonly(pud); } else { - stage2_wp_pmds(kvm, pud, addr, next); + stage2_wp_pmds(mmu, pud, addr, next); } } } while (pud++, addr = next, addr != end); @@ -1546,9 +1580,10 @@ static void stage2_wp_puds(struct kvm *kvm, p4d_t *p4d, * @addr: range start address * @end: range end address */ -static void stage2_wp_p4ds(struct kvm *kvm, pgd_t *pgd, +static void stage2_wp_p4ds(struct kvm_s2_mmu *mmu, pgd_t *pgd, phys_addr_t addr, phys_addr_t end) { + struct kvm *kvm = mmu->kvm; p4d_t *p4d; phys_addr_t next; @@ -1556,7 +1591,7 @@ static void stage2_wp_p4ds(struct kvm *kvm, pgd_t *pgd, do { next = stage2_p4d_addr_end(kvm, addr, end); if (!stage2_p4d_none(kvm, *p4d)) - stage2_wp_puds(kvm, p4d, addr, next); + stage2_wp_puds(mmu, p4d, addr, next); } while (p4d++, addr = next, addr != end); } @@ -1566,12 +1601,13 @@ static void stage2_wp_p4ds(struct kvm *kvm, pgd_t *pgd, * @addr: Start address of range * @end: End address of range */ -static void stage2_wp_range(struct kvm *kvm, phys_addr_t addr, phys_addr_t end) +static void stage2_wp_range(struct kvm_s2_mmu *mmu, phys_addr_t addr, phys_addr_t end) { + struct kvm *kvm = mmu->kvm; pgd_t *pgd; phys_addr_t next; - pgd = kvm->arch.pgd + stage2_pgd_index(kvm, addr); + pgd = mmu->pgd + stage2_pgd_index(kvm, addr); do { /* * Release kvm_mmu_lock periodically if the memory region is @@ -1583,11 +1619,11 @@ static void stage2_wp_range(struct kvm *kvm, phys_addr_t addr, phys_addr_t end) * the lock. */ cond_resched_lock(&kvm->mmu_lock); - if (!READ_ONCE(kvm->arch.pgd)) + if (!READ_ONCE(mmu->pgd)) break; next = stage2_pgd_addr_end(kvm, addr, end); if (stage2_pgd_present(kvm, *pgd)) - stage2_wp_p4ds(kvm, pgd, addr, next); + stage2_wp_p4ds(mmu, pgd, addr, next); } while (pgd++, addr = next, addr != end); } @@ -1617,7 +1653,7 @@ void kvm_mmu_wp_memory_region(struct kvm *kvm, int slot) end = (memslot->base_gfn + memslot->npages) << PAGE_SHIFT; spin_lock(&kvm->mmu_lock); - stage2_wp_range(kvm, start, end); + stage2_wp_range(&kvm->arch.mmu, start, end); spin_unlock(&kvm->mmu_lock); kvm_flush_remote_tlbs(kvm); } @@ -1641,7 +1677,7 @@ static void kvm_mmu_write_protect_pt_masked(struct kvm *kvm, phys_addr_t start = (base_gfn + __ffs(mask)) << PAGE_SHIFT; phys_addr_t end = (base_gfn + __fls(mask) + 1) << PAGE_SHIFT; - stage2_wp_range(kvm, start, end); + stage2_wp_range(&kvm->arch.mmu, start, end); } /* @@ -1804,6 +1840,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, pgprot_t mem_type = PAGE_S2; bool logging_active = memslot_is_logging(memslot); unsigned long vma_pagesize, flags = 0; + struct kvm_s2_mmu *mmu = vcpu->arch.hw_mmu; write_fault = kvm_is_write_fault(vcpu); exec_fault = kvm_vcpu_trap_is_iabt(vcpu); @@ -1925,7 +1962,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, */ needs_exec = exec_fault || (fault_status == FSC_PERM && - stage2_is_exec(kvm, fault_ipa, vma_pagesize)); + stage2_is_exec(mmu, fault_ipa, vma_pagesize)); if (vma_pagesize == PUD_SIZE) { pud_t new_pud = kvm_pfn_pud(pfn, mem_type); @@ -1937,7 +1974,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, if (needs_exec) new_pud = kvm_s2pud_mkexec(new_pud); - ret = stage2_set_pud_huge(kvm, memcache, fault_ipa, &new_pud); + ret = stage2_set_pud_huge(mmu, memcache, fault_ipa, &new_pud); } else if (vma_pagesize == PMD_SIZE) { pmd_t new_pmd = kvm_pfn_pmd(pfn, mem_type); @@ -1949,7 +1986,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, if (needs_exec) new_pmd = kvm_s2pmd_mkexec(new_pmd); - ret = stage2_set_pmd_huge(kvm, memcache, fault_ipa, &new_pmd); + ret = stage2_set_pmd_huge(mmu, memcache, fault_ipa, &new_pmd); } else { pte_t new_pte = kvm_pfn_pte(pfn, mem_type); @@ -1961,7 +1998,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, if (needs_exec) new_pte = kvm_s2pte_mkexec(new_pte); - ret = stage2_set_pte(kvm, memcache, fault_ipa, &new_pte, flags); + ret = stage2_set_pte(mmu, memcache, fault_ipa, &new_pte, flags); } out_unlock: @@ -1990,7 +2027,7 @@ static void handle_access_fault(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa) spin_lock(&vcpu->kvm->mmu_lock); - if (!stage2_get_leaf_entry(vcpu->kvm, fault_ipa, &pud, &pmd, &pte)) + if (!stage2_get_leaf_entry(vcpu->arch.hw_mmu, fault_ipa, &pud, &pmd, &pte)) goto out; if (pud) { /* HugeTLB */ @@ -2040,21 +2077,18 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu) is_iabt = kvm_vcpu_trap_is_iabt(vcpu); /* Synchronous External Abort? */ - if (kvm_vcpu_dabt_isextabt(vcpu)) { + if (kvm_vcpu_abt_issea(vcpu)) { /* * For RAS the host kernel may handle this abort. * There is no need to pass the error into the guest. */ - if (!kvm_handle_guest_sea(fault_ipa, kvm_vcpu_get_hsr(vcpu))) - return 1; - - if (unlikely(!is_iabt)) { + if (kvm_handle_guest_sea(fault_ipa, kvm_vcpu_get_esr(vcpu))) kvm_inject_vabt(vcpu); - return 1; - } + + return 1; } - trace_kvm_guest_fault(*vcpu_pc(vcpu), kvm_vcpu_get_hsr(vcpu), + trace_kvm_guest_fault(*vcpu_pc(vcpu), kvm_vcpu_get_esr(vcpu), kvm_vcpu_get_hfar(vcpu), fault_ipa); /* Check the stage-2 fault is trans. fault or write fault */ @@ -2063,7 +2097,7 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu) kvm_err("Unsupported FSC: EC=%#x xFSC=%#lx ESR_EL2=%#lx\n", kvm_vcpu_trap_get_class(vcpu), (unsigned long)kvm_vcpu_trap_get_fault(vcpu), - (unsigned long)kvm_vcpu_get_hsr(vcpu)); + (unsigned long)kvm_vcpu_get_esr(vcpu)); return -EFAULT; } @@ -2074,12 +2108,23 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu) hva = gfn_to_hva_memslot_prot(memslot, gfn, &writable); write_fault = kvm_is_write_fault(vcpu); if (kvm_is_error_hva(hva) || (write_fault && !writable)) { + /* + * The guest has put either its instructions or its page-tables + * somewhere it shouldn't have. Userspace won't be able to do + * anything about this (there's no syndrome for a start), so + * re-inject the abort back into the guest. + */ if (is_iabt) { - /* Prefetch Abort on I/O address */ ret = -ENOEXEC; goto out; } + if (kvm_vcpu_dabt_iss1tw(vcpu)) { + kvm_inject_dabt(vcpu, kvm_vcpu_get_hfar(vcpu)); + ret = 1; + goto out_unlock; + } + /* * Check for a cache maintenance operation. Since we * ended-up here, we know it is outside of any memory @@ -2090,7 +2135,7 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu) * So let's assume that the guest is just being * cautious, and skip the instruction. */ - if (kvm_vcpu_dabt_is_cm(vcpu)) { + if (kvm_is_error_hva(hva) && kvm_vcpu_dabt_is_cm(vcpu)) { kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu)); ret = 1; goto out_unlock; @@ -2163,14 +2208,14 @@ static int handle_hva_to_gpa(struct kvm *kvm, static int kvm_unmap_hva_handler(struct kvm *kvm, gpa_t gpa, u64 size, void *data) { - unmap_stage2_range(kvm, gpa, size); + unmap_stage2_range(&kvm->arch.mmu, gpa, size); return 0; } int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end) { - if (!kvm->arch.pgd) + if (!kvm->arch.mmu.pgd) return 0; trace_kvm_unmap_hva_range(start, end); @@ -2190,7 +2235,7 @@ static int kvm_set_spte_handler(struct kvm *kvm, gpa_t gpa, u64 size, void *data * therefore stage2_set_pte() never needs to clear out a huge PMD * through this calling path. */ - stage2_set_pte(kvm, NULL, gpa, pte, 0); + stage2_set_pte(&kvm->arch.mmu, NULL, gpa, pte, 0); return 0; } @@ -2201,7 +2246,7 @@ int kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte) kvm_pfn_t pfn = pte_pfn(pte); pte_t stage2_pte; - if (!kvm->arch.pgd) + if (!kvm->arch.mmu.pgd) return 0; trace_kvm_set_spte_hva(hva); @@ -2224,7 +2269,7 @@ static int kvm_age_hva_handler(struct kvm *kvm, gpa_t gpa, u64 size, void *data) pte_t *pte; WARN_ON(size != PAGE_SIZE && size != PMD_SIZE && size != PUD_SIZE); - if (!stage2_get_leaf_entry(kvm, gpa, &pud, &pmd, &pte)) + if (!stage2_get_leaf_entry(&kvm->arch.mmu, gpa, &pud, &pmd, &pte)) return 0; if (pud) @@ -2242,7 +2287,7 @@ static int kvm_test_age_hva_handler(struct kvm *kvm, gpa_t gpa, u64 size, void * pte_t *pte; WARN_ON(size != PAGE_SIZE && size != PMD_SIZE && size != PUD_SIZE); - if (!stage2_get_leaf_entry(kvm, gpa, &pud, &pmd, &pte)) + if (!stage2_get_leaf_entry(&kvm->arch.mmu, gpa, &pud, &pmd, &pte)) return 0; if (pud) @@ -2255,7 +2300,7 @@ static int kvm_test_age_hva_handler(struct kvm *kvm, gpa_t gpa, u64 size, void * int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end) { - if (!kvm->arch.pgd) + if (!kvm->arch.mmu.pgd) return 0; trace_kvm_age_hva(start, end); return handle_hva_to_gpa(kvm, start, end, kvm_age_hva_handler, NULL); @@ -2263,7 +2308,7 @@ int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end) int kvm_test_age_hva(struct kvm *kvm, unsigned long hva) { - if (!kvm->arch.pgd) + if (!kvm->arch.mmu.pgd) return 0; trace_kvm_test_age_hva(hva); return handle_hva_to_gpa(kvm, hva, hva + PAGE_SIZE, @@ -2476,7 +2521,7 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, spin_lock(&kvm->mmu_lock); if (ret) - unmap_stage2_range(kvm, mem->guest_phys_addr, mem->memory_size); + unmap_stage2_range(&kvm->arch.mmu, mem->guest_phys_addr, mem->memory_size); else stage2_flush_memslot(kvm, memslot); spin_unlock(&kvm->mmu_lock); @@ -2495,7 +2540,7 @@ void kvm_arch_memslots_updated(struct kvm *kvm, u64 gen) void kvm_arch_flush_shadow_all(struct kvm *kvm) { - kvm_free_stage2_pgd(kvm); + kvm_free_stage2_pgd(&kvm->arch.mmu); } void kvm_arch_flush_shadow_memslot(struct kvm *kvm, @@ -2505,7 +2550,7 @@ void kvm_arch_flush_shadow_memslot(struct kvm *kvm, phys_addr_t size = slot->npages << PAGE_SHIFT; spin_lock(&kvm->mmu_lock); - unmap_stage2_range(kvm, gpa, size); + unmap_stage2_range(&kvm->arch.mmu, gpa, size); spin_unlock(&kvm->mmu_lock); } |