diff options
| -rw-r--r-- | arch/arm64/kvm/mmu.c | 117 |
1 files changed, 65 insertions, 52 deletions
diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index 1aedc066ba65..9f92892b27a4 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -1646,6 +1646,15 @@ out_unlock: return ret != -EAGAIN ? ret : 0; } +struct kvm_s2_fault_vma_info { + unsigned long mmu_seq; + long vma_pagesize; + vm_flags_t vm_flags; + gfn_t gfn; + bool mte_allowed; + bool is_vma_cacheable; +}; + static short kvm_s2_resolve_vma_size(const struct kvm_s2_fault_desc *s2fd, struct vm_area_struct *vma, bool *force_pte) { @@ -1710,18 +1719,12 @@ static short kvm_s2_resolve_vma_size(const struct kvm_s2_fault_desc *s2fd, struct kvm_s2_fault { bool writable; - bool mte_allowed; - bool is_vma_cacheable; bool s2_force_noncacheable; - unsigned long mmu_seq; - gfn_t gfn; kvm_pfn_t pfn; bool logging_active; bool force_pte; - long vma_pagesize; enum kvm_pgtable_prot prot; struct page *page; - vm_flags_t vm_flags; }; static bool kvm_s2_fault_is_perm(const struct kvm_s2_fault_desc *s2fd) @@ -1730,7 +1733,8 @@ static bool kvm_s2_fault_is_perm(const struct kvm_s2_fault_desc *s2fd) } static int kvm_s2_fault_get_vma_info(const struct kvm_s2_fault_desc *s2fd, - struct kvm_s2_fault *fault) + struct kvm_s2_fault *fault, + struct kvm_s2_fault_vma_info *s2vi) { struct vm_area_struct *vma; struct kvm *kvm = s2fd->vcpu->kvm; @@ -1743,20 +1747,20 @@ static int kvm_s2_fault_get_vma_info(const struct kvm_s2_fault_desc *s2fd, return -EFAULT; } - fault->vma_pagesize = BIT(kvm_s2_resolve_vma_size(s2fd, vma, &fault->force_pte)); + s2vi->vma_pagesize = BIT(kvm_s2_resolve_vma_size(s2fd, vma, &fault->force_pte)); /* * Both the canonical IPA and fault IPA must be aligned to the * mapping size to ensure we find the right PFN and lay down the * mapping in the right place. */ - fault->gfn = ALIGN_DOWN(s2fd->fault_ipa, fault->vma_pagesize) >> PAGE_SHIFT; + s2vi->gfn = ALIGN_DOWN(s2fd->fault_ipa, s2vi->vma_pagesize) >> PAGE_SHIFT; - fault->mte_allowed = kvm_vma_mte_allowed(vma); + s2vi->mte_allowed = kvm_vma_mte_allowed(vma); - fault->vm_flags = vma->vm_flags; + s2vi->vm_flags = vma->vm_flags; - fault->is_vma_cacheable = kvm_vma_is_cacheable(vma); + s2vi->is_vma_cacheable = kvm_vma_is_cacheable(vma); /* * Read mmu_invalidate_seq so that KVM can detect if the results of @@ -1766,39 +1770,40 @@ static int kvm_s2_fault_get_vma_info(const struct kvm_s2_fault_desc *s2fd, * Rely on mmap_read_unlock() for an implicit smp_rmb(), which pairs * with the smp_wmb() in kvm_mmu_invalidate_end(). */ - fault->mmu_seq = kvm->mmu_invalidate_seq; + s2vi->mmu_seq = kvm->mmu_invalidate_seq; mmap_read_unlock(current->mm); return 0; } static gfn_t get_canonical_gfn(const struct kvm_s2_fault_desc *s2fd, - const struct kvm_s2_fault *fault) + const struct kvm_s2_fault_vma_info *s2vi) { phys_addr_t ipa; if (!s2fd->nested) - return fault->gfn; + return s2vi->gfn; ipa = kvm_s2_trans_output(s2fd->nested); - return ALIGN_DOWN(ipa, fault->vma_pagesize) >> PAGE_SHIFT; + return ALIGN_DOWN(ipa, s2vi->vma_pagesize) >> PAGE_SHIFT; } static int kvm_s2_fault_pin_pfn(const struct kvm_s2_fault_desc *s2fd, - struct kvm_s2_fault *fault) + struct kvm_s2_fault *fault, + struct kvm_s2_fault_vma_info *s2vi) { int ret; - ret = kvm_s2_fault_get_vma_info(s2fd, fault); + ret = kvm_s2_fault_get_vma_info(s2fd, fault, s2vi); if (ret) return ret; - fault->pfn = __kvm_faultin_pfn(s2fd->memslot, get_canonical_gfn(s2fd, fault), + fault->pfn = __kvm_faultin_pfn(s2fd->memslot, get_canonical_gfn(s2fd, s2vi), kvm_is_write_fault(s2fd->vcpu) ? FOLL_WRITE : 0, &fault->writable, &fault->page); if (unlikely(is_error_noslot_pfn(fault->pfn))) { if (fault->pfn == KVM_PFN_ERR_HWPOISON) { - kvm_send_hwpoison_signal(s2fd->hva, __ffs(fault->vma_pagesize)); + kvm_send_hwpoison_signal(s2fd->hva, __ffs(s2vi->vma_pagesize)); return 0; } return -EFAULT; @@ -1808,7 +1813,8 @@ static int kvm_s2_fault_pin_pfn(const struct kvm_s2_fault_desc *s2fd, } static int kvm_s2_fault_compute_prot(const struct kvm_s2_fault_desc *s2fd, - struct kvm_s2_fault *fault) + struct kvm_s2_fault *fault, + const struct kvm_s2_fault_vma_info *s2vi) { struct kvm *kvm = s2fd->vcpu->kvm; @@ -1816,8 +1822,8 @@ static int kvm_s2_fault_compute_prot(const struct kvm_s2_fault_desc *s2fd, * Check if this is non-struct page memory PFN, and cannot support * CMOs. It could potentially be unsafe to access as cacheable. */ - if (fault->vm_flags & (VM_PFNMAP | VM_MIXEDMAP) && !pfn_is_map_memory(fault->pfn)) { - if (fault->is_vma_cacheable) { + if (s2vi->vm_flags & (VM_PFNMAP | VM_MIXEDMAP) && !pfn_is_map_memory(fault->pfn)) { + if (s2vi->is_vma_cacheable) { /* * Whilst the VMA owner expects cacheable mapping to this * PFN, hardware also has to support the FWB and CACHE DIC @@ -1877,7 +1883,7 @@ static int kvm_s2_fault_compute_prot(const struct kvm_s2_fault_desc *s2fd, fault->prot |= KVM_PGTABLE_PROT_X; if (fault->s2_force_noncacheable) - fault->prot |= (fault->vm_flags & VM_ALLOW_ANY_UNCACHED) ? + fault->prot |= (s2vi->vm_flags & VM_ALLOW_ANY_UNCACHED) ? KVM_PGTABLE_PROT_NORMAL_NC : KVM_PGTABLE_PROT_DEVICE; else if (cpus_have_final_cap(ARM64_HAS_CACHE_DIC)) fault->prot |= KVM_PGTABLE_PROT_X; @@ -1887,74 +1893,73 @@ static int kvm_s2_fault_compute_prot(const struct kvm_s2_fault_desc *s2fd, if (!kvm_s2_fault_is_perm(s2fd) && !fault->s2_force_noncacheable && kvm_has_mte(kvm)) { /* Check the VMM hasn't introduced a new disallowed VMA */ - if (!fault->mte_allowed) + if (!s2vi->mte_allowed) return -EFAULT; } return 0; } -static phys_addr_t get_ipa(const struct kvm_s2_fault *fault) -{ - return gfn_to_gpa(fault->gfn); -} - static int kvm_s2_fault_map(const struct kvm_s2_fault_desc *s2fd, - struct kvm_s2_fault *fault, void *memcache) + struct kvm_s2_fault *fault, + const struct kvm_s2_fault_vma_info *s2vi, void *memcache) { + enum kvm_pgtable_walk_flags flags = KVM_PGTABLE_WALK_SHARED; struct kvm *kvm = s2fd->vcpu->kvm; struct kvm_pgtable *pgt; long perm_fault_granule; + long mapping_size; + gfn_t gfn; int ret; - enum kvm_pgtable_walk_flags flags = KVM_PGTABLE_WALK_SHARED; kvm_fault_lock(kvm); pgt = s2fd->vcpu->arch.hw_mmu->pgt; ret = -EAGAIN; - if (mmu_invalidate_retry(kvm, fault->mmu_seq)) + if (mmu_invalidate_retry(kvm, s2vi->mmu_seq)) goto out_unlock; perm_fault_granule = (kvm_s2_fault_is_perm(s2fd) ? kvm_vcpu_trap_get_perm_fault_granule(s2fd->vcpu) : 0); + mapping_size = s2vi->vma_pagesize; + gfn = s2vi->gfn; /* * If we are not forced to use page mapping, check if we are * backed by a THP and thus use block mapping if possible. */ - if (fault->vma_pagesize == PAGE_SIZE && + if (mapping_size == PAGE_SIZE && !(fault->force_pte || fault->s2_force_noncacheable)) { if (perm_fault_granule > PAGE_SIZE) { - fault->vma_pagesize = perm_fault_granule; + mapping_size = perm_fault_granule; } else { - fault->vma_pagesize = transparent_hugepage_adjust(kvm, s2fd->memslot, - s2fd->hva, &fault->pfn, - &fault->gfn); - - if (fault->vma_pagesize < 0) { - ret = fault->vma_pagesize; + mapping_size = transparent_hugepage_adjust(kvm, s2fd->memslot, + s2fd->hva, &fault->pfn, + &gfn); + if (mapping_size < 0) { + ret = mapping_size; goto out_unlock; } } } if (!perm_fault_granule && !fault->s2_force_noncacheable && kvm_has_mte(kvm)) - sanitise_mte_tags(kvm, fault->pfn, fault->vma_pagesize); + sanitise_mte_tags(kvm, fault->pfn, mapping_size); /* * Under the premise of getting a FSC_PERM fault, we just need to relax - * permissions only if vma_pagesize equals perm_fault_granule. Otherwise, + * permissions only if mapping_size equals perm_fault_granule. Otherwise, * kvm_pgtable_stage2_map() should be called to change block size. */ - if (fault->vma_pagesize == perm_fault_granule) { + if (mapping_size == perm_fault_granule) { /* * Drop the SW bits in favour of those stored in the * PTE, which will be preserved. */ fault->prot &= ~KVM_NV_GUEST_MAP_SZ; - ret = KVM_PGT_FN(kvm_pgtable_stage2_relax_perms)(pgt, get_ipa(fault), + ret = KVM_PGT_FN(kvm_pgtable_stage2_relax_perms)(pgt, gfn_to_gpa(gfn), fault->prot, flags); } else { - ret = KVM_PGT_FN(kvm_pgtable_stage2_map)(pgt, get_ipa(fault), fault->vma_pagesize, + ret = KVM_PGT_FN(kvm_pgtable_stage2_map)(pgt, gfn_to_gpa(gfn), mapping_size, __pfn_to_phys(fault->pfn), fault->prot, memcache, flags); } @@ -1963,9 +1968,16 @@ out_unlock: kvm_release_faultin_page(kvm, fault->page, !!ret, fault->writable); kvm_fault_unlock(kvm); - /* Mark the page dirty only if the fault is handled successfully */ - if (fault->writable && !ret) - mark_page_dirty_in_slot(kvm, s2fd->memslot, get_canonical_gfn(s2fd, fault)); + /* + * Mark the page dirty only if the fault is handled successfully, + * making sure we adjust the canonical IPA if the mapping size has + * been updated (via a THP upgrade, for example). + */ + if (fault->writable && !ret) { + phys_addr_t ipa = gfn_to_gpa(get_canonical_gfn(s2fd, s2vi)); + ipa &= ~(mapping_size - 1); + mark_page_dirty_in_slot(kvm, s2fd->memslot, gpa_to_gfn(ipa)); + } if (ret != -EAGAIN) return ret; @@ -1976,6 +1988,7 @@ static int user_mem_abort(const struct kvm_s2_fault_desc *s2fd) { bool perm_fault = kvm_vcpu_trap_is_permission_fault(s2fd->vcpu); bool logging_active = memslot_is_logging(s2fd->memslot); + struct kvm_s2_fault_vma_info s2vi = {}; struct kvm_s2_fault fault = { .logging_active = logging_active, .force_pte = logging_active, @@ -2001,17 +2014,17 @@ static int user_mem_abort(const struct kvm_s2_fault_desc *s2fd) * Let's check if we will get back a huge page backed by hugetlbfs, or * get block mapping for device MMIO region. */ - ret = kvm_s2_fault_pin_pfn(s2fd, &fault); + ret = kvm_s2_fault_pin_pfn(s2fd, &fault, &s2vi); if (ret != 1) return ret; - ret = kvm_s2_fault_compute_prot(s2fd, &fault); + ret = kvm_s2_fault_compute_prot(s2fd, &fault, &s2vi); if (ret) { kvm_release_page_unused(fault.page); return ret; } - return kvm_s2_fault_map(s2fd, &fault, memcache); + return kvm_s2_fault_map(s2fd, &fault, &s2vi, memcache); } /* Resolve the access fault by making the page young again. */ |
