From 9a7ad19ac804df56d3a150bcbe8f467a63821625 Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Fri, 2 Dec 2022 18:51:51 +0000 Subject: KVM: arm64: Use KVM's pte type/helpers in handle_access_fault() Consistently use KVM's own pte types and helpers in handle_access_fault(). No functional change intended. Link: https://lore.kernel.org/r/20221202185156.696189-2-oliver.upton@linux.dev Signed-off-by: Oliver Upton --- arch/arm64/include/asm/kvm_pgtable.h | 5 +++++ arch/arm64/kvm/mmu.c | 10 ++++------ 2 files changed, 9 insertions(+), 6 deletions(-) (limited to 'arch/arm64') diff --git a/arch/arm64/include/asm/kvm_pgtable.h b/arch/arm64/include/asm/kvm_pgtable.h index 63f81b27a4e3..192f33b88dc1 100644 --- a/arch/arm64/include/asm/kvm_pgtable.h +++ b/arch/arm64/include/asm/kvm_pgtable.h @@ -71,6 +71,11 @@ static inline kvm_pte_t kvm_phys_to_pte(u64 pa) return pte; } +static inline kvm_pfn_t kvm_pte_to_pfn(kvm_pte_t pte) +{ + return __phys_to_pfn(kvm_pte_to_phys(pte)); +} + static inline u64 kvm_granule_shift(u32 level) { /* Assumes KVM_PGTABLE_MAX_LEVELS is 4 */ diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index 31d7fa4c7c14..3ddee4137912 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -1399,20 +1399,18 @@ out_unlock: /* Resolve the access fault by making the page young again. */ static void handle_access_fault(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa) { - pte_t pte; - kvm_pte_t kpte; + kvm_pte_t pte; struct kvm_s2_mmu *mmu; trace_kvm_access_fault(fault_ipa); write_lock(&vcpu->kvm->mmu_lock); mmu = vcpu->arch.hw_mmu; - kpte = kvm_pgtable_stage2_mkyoung(mmu->pgt, fault_ipa); + pte = kvm_pgtable_stage2_mkyoung(mmu->pgt, fault_ipa); write_unlock(&vcpu->kvm->mmu_lock); - pte = __pte(kpte); - if (pte_valid(pte)) - kvm_set_pfn_accessed(pte_pfn(pte)); + if (kvm_pte_valid(pte)) + kvm_set_pfn_accessed(kvm_pte_to_pfn(pte)); } /** -- cgit v1.2.3 From ddcadb297ce5cc1cec0b0882d8b750e5e06fc9f4 Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Fri, 2 Dec 2022 18:51:52 +0000 Subject: KVM: arm64: Ignore EAGAIN for walks outside of a fault The page table walkers are invoked outside fault handling paths, such as write protecting a range of memory. EAGAIN is generally used by the walkers to retry execution due to races on a particular PTE, like taking an access fault on a PTE being invalidated from another thread. This early return behavior is undesirable for walkers that operate outside a fault handler. Suppress EAGAIN and continue the walk if operating outside a fault handler. Link: https://lore.kernel.org/r/20221202185156.696189-3-oliver.upton@linux.dev Signed-off-by: Oliver Upton --- arch/arm64/include/asm/kvm_pgtable.h | 3 +++ arch/arm64/kvm/hyp/pgtable.c | 30 +++++++++++++++++++++++++++--- arch/arm64/kvm/mmu.c | 4 +++- 3 files changed, 33 insertions(+), 4 deletions(-) (limited to 'arch/arm64') diff --git a/arch/arm64/include/asm/kvm_pgtable.h b/arch/arm64/include/asm/kvm_pgtable.h index 192f33b88dc1..4cd6762bda80 100644 --- a/arch/arm64/include/asm/kvm_pgtable.h +++ b/arch/arm64/include/asm/kvm_pgtable.h @@ -193,12 +193,15 @@ typedef bool (*kvm_pgtable_force_pte_cb_t)(u64 addr, u64 end, * children. * @KVM_PGTABLE_WALK_SHARED: Indicates the page-tables may be shared * with other software walkers. + * @KVM_PGTABLE_WALK_HANDLE_FAULT: Indicates the page-table walk was + * invoked from a fault handler. */ enum kvm_pgtable_walk_flags { KVM_PGTABLE_WALK_LEAF = BIT(0), KVM_PGTABLE_WALK_TABLE_PRE = BIT(1), KVM_PGTABLE_WALK_TABLE_POST = BIT(2), KVM_PGTABLE_WALK_SHARED = BIT(3), + KVM_PGTABLE_WALK_HANDLE_FAULT = BIT(4), }; struct kvm_pgtable_visit_ctx { diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c index b11cf2c618a6..98818214a479 100644 --- a/arch/arm64/kvm/hyp/pgtable.c +++ b/arch/arm64/kvm/hyp/pgtable.c @@ -168,6 +168,25 @@ static int kvm_pgtable_visitor_cb(struct kvm_pgtable_walk_data *data, return walker->cb(ctx, visit); } +static bool kvm_pgtable_walk_continue(const struct kvm_pgtable_walker *walker, + int r) +{ + /* + * Visitor callbacks return EAGAIN when the conditions that led to a + * fault are no longer reflected in the page tables due to a race to + * update a PTE. In the context of a fault handler this is interpreted + * as a signal to retry guest execution. + * + * Ignore the return code altogether for walkers outside a fault handler + * (e.g. write protecting a range of memory) and chug along with the + * page table walk. + */ + if (r == -EAGAIN) + return !(walker->flags & KVM_PGTABLE_WALK_HANDLE_FAULT); + + return !r; +} + static int __kvm_pgtable_walk(struct kvm_pgtable_walk_data *data, struct kvm_pgtable_mm_ops *mm_ops, kvm_pteref_t pgtable, u32 level); @@ -200,7 +219,7 @@ static inline int __kvm_pgtable_visit(struct kvm_pgtable_walk_data *data, table = kvm_pte_table(ctx.old, level); } - if (ret) + if (!kvm_pgtable_walk_continue(data->walker, ret)) goto out; if (!table) { @@ -211,13 +230,16 @@ static inline int __kvm_pgtable_visit(struct kvm_pgtable_walk_data *data, childp = (kvm_pteref_t)kvm_pte_follow(ctx.old, mm_ops); ret = __kvm_pgtable_walk(data, mm_ops, childp, level + 1); - if (ret) + if (!kvm_pgtable_walk_continue(data->walker, ret)) goto out; if (ctx.flags & KVM_PGTABLE_WALK_TABLE_POST) ret = kvm_pgtable_visitor_cb(data, &ctx, KVM_PGTABLE_WALK_TABLE_POST); out: + if (kvm_pgtable_walk_continue(data->walker, ret)) + return 0; + return ret; } @@ -1095,7 +1117,8 @@ kvm_pte_t kvm_pgtable_stage2_mkyoung(struct kvm_pgtable *pgt, u64 addr) { kvm_pte_t pte = 0; stage2_update_leaf_attrs(pgt, addr, 1, KVM_PTE_LEAF_ATTR_LO_S2_AF, 0, - &pte, NULL, 0); + &pte, NULL, + KVM_PGTABLE_WALK_HANDLE_FAULT); dsb(ishst); return pte; } @@ -1141,6 +1164,7 @@ int kvm_pgtable_stage2_relax_perms(struct kvm_pgtable *pgt, u64 addr, clr |= KVM_PTE_LEAF_ATTR_HI_S2_XN; ret = stage2_update_leaf_attrs(pgt, addr, 1, set, clr, NULL, &level, + KVM_PGTABLE_WALK_HANDLE_FAULT | KVM_PGTABLE_WALK_SHARED); if (!ret) kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, pgt->mmu, addr, level); diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index 3ddee4137912..edfbe85f8f8a 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -1381,7 +1381,9 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, else ret = kvm_pgtable_stage2_map(pgt, fault_ipa, vma_pagesize, __pfn_to_phys(pfn), prot, - memcache, KVM_PGTABLE_WALK_SHARED); + memcache, + KVM_PGTABLE_WALK_HANDLE_FAULT | + KVM_PGTABLE_WALK_SHARED); /* Mark the page dirty only if the fault is handled successfully */ if (writable && !ret) { -- cgit v1.2.3 From 76259cca479501579b0c524826c25fcc8bb90e97 Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Fri, 2 Dec 2022 18:51:53 +0000 Subject: KVM: arm64: Return EAGAIN for invalid PTE in attr walker Return EAGAIN for invalid PTEs in the attr walker, signaling to the caller that any serialization and/or invalidation can be elided. Link: https://lore.kernel.org/r/20221202185156.696189-4-oliver.upton@linux.dev Signed-off-by: Oliver Upton --- arch/arm64/kvm/hyp/pgtable.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/arm64') diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c index 98818214a479..204e59e05674 100644 --- a/arch/arm64/kvm/hyp/pgtable.c +++ b/arch/arm64/kvm/hyp/pgtable.c @@ -1048,7 +1048,7 @@ static int stage2_attr_walker(const struct kvm_pgtable_visit_ctx *ctx, struct kvm_pgtable_mm_ops *mm_ops = ctx->mm_ops; if (!kvm_pte_valid(ctx->old)) - return 0; + return -EAGAIN; data->level = ctx->level; data->pte = pte; -- cgit v1.2.3 From 7d29a2407df612b0903cee94fc3469d7335b442c Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Fri, 2 Dec 2022 18:51:54 +0000 Subject: KVM: arm64: Don't serialize if the access flag isn't set Of course, if the PTE wasn't changed then there are absolutely no serialization requirements. Skip the DSB for an unsuccessful update to the access flag. Link: https://lore.kernel.org/r/20221202185156.696189-5-oliver.upton@linux.dev Signed-off-by: Oliver Upton --- arch/arm64/kvm/hyp/pgtable.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) (limited to 'arch/arm64') diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c index 204e59e05674..aa36d896bd8c 100644 --- a/arch/arm64/kvm/hyp/pgtable.c +++ b/arch/arm64/kvm/hyp/pgtable.c @@ -1116,10 +1116,14 @@ int kvm_pgtable_stage2_wrprotect(struct kvm_pgtable *pgt, u64 addr, u64 size) kvm_pte_t kvm_pgtable_stage2_mkyoung(struct kvm_pgtable *pgt, u64 addr) { kvm_pte_t pte = 0; - stage2_update_leaf_attrs(pgt, addr, 1, KVM_PTE_LEAF_ATTR_LO_S2_AF, 0, - &pte, NULL, - KVM_PGTABLE_WALK_HANDLE_FAULT); - dsb(ishst); + int ret; + + ret = stage2_update_leaf_attrs(pgt, addr, 1, KVM_PTE_LEAF_ATTR_LO_S2_AF, 0, + &pte, NULL, + KVM_PGTABLE_WALK_HANDLE_FAULT); + if (!ret) + dsb(ishst); + return pte; } -- cgit v1.2.3 From fc61f554e6947edd21cd84fb814f8418349a3569 Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Fri, 2 Dec 2022 18:51:55 +0000 Subject: KVM: arm64: Handle access faults behind the read lock As the underlying software walkers are able to traverse and update stage-2 in parallel there is no need to serialize access faults. Only take the read lock when handling an access fault. Link: https://lore.kernel.org/r/20221202185156.696189-6-oliver.upton@linux.dev Signed-off-by: Oliver Upton --- arch/arm64/kvm/hyp/pgtable.c | 3 ++- arch/arm64/kvm/mmu.c | 4 ++-- 2 files changed, 4 insertions(+), 3 deletions(-) (limited to 'arch/arm64') diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c index aa36d896bd8c..30575b5f5dcd 100644 --- a/arch/arm64/kvm/hyp/pgtable.c +++ b/arch/arm64/kvm/hyp/pgtable.c @@ -1120,7 +1120,8 @@ kvm_pte_t kvm_pgtable_stage2_mkyoung(struct kvm_pgtable *pgt, u64 addr) ret = stage2_update_leaf_attrs(pgt, addr, 1, KVM_PTE_LEAF_ATTR_LO_S2_AF, 0, &pte, NULL, - KVM_PGTABLE_WALK_HANDLE_FAULT); + KVM_PGTABLE_WALK_HANDLE_FAULT | + KVM_PGTABLE_WALK_SHARED); if (!ret) dsb(ishst); diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index edfbe85f8f8a..d24ce2ddb38c 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -1406,10 +1406,10 @@ static void handle_access_fault(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa) trace_kvm_access_fault(fault_ipa); - write_lock(&vcpu->kvm->mmu_lock); + read_lock(&vcpu->kvm->mmu_lock); mmu = vcpu->arch.hw_mmu; pte = kvm_pgtable_stage2_mkyoung(mmu->pgt, fault_ipa); - write_unlock(&vcpu->kvm->mmu_lock); + read_unlock(&vcpu->kvm->mmu_lock); if (kvm_pte_valid(pte)) kvm_set_pfn_accessed(kvm_pte_to_pfn(pte)); -- cgit v1.2.3 From 1dfc3e905089a0bcada268fb5691a605655e0319 Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Fri, 2 Dec 2022 18:51:56 +0000 Subject: KVM: arm64: Condition HW AF updates on config option As it currently stands, KVM makes use of FEAT_HAFDBS unconditionally. Use of the feature in the rest of the kernel is guarded by an associated Kconfig option. Align KVM with the rest of the kernel and only enable VTCR_HA when ARM64_HW_AFDBM is enabled. This can be helpful for testing changes to the stage-2 access fault path on Armv8.1+ implementations. Link: https://lore.kernel.org/r/20221202185156.696189-7-oliver.upton@linux.dev Signed-off-by: Oliver Upton --- arch/arm64/kvm/hyp/pgtable.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/arm64') diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c index 30575b5f5dcd..3d61bd3e591d 100644 --- a/arch/arm64/kvm/hyp/pgtable.c +++ b/arch/arm64/kvm/hyp/pgtable.c @@ -606,12 +606,14 @@ u64 kvm_get_vtcr(u64 mmfr0, u64 mmfr1, u32 phys_shift) lvls = 2; vtcr |= VTCR_EL2_LVLS_TO_SL0(lvls); +#ifdef CONFIG_ARM64_HW_AFDBM /* * Enable the Hardware Access Flag management, unconditionally * on all CPUs. The features is RES0 on CPUs without the support * and must be ignored by the CPUs. */ vtcr |= VTCR_EL2_HA; +#endif /* CONFIG_ARM64_HW_AFDBM */ /* Set the vmid bits */ vtcr |= (get_vmid_bits(mmfr1) == 16) ? -- cgit v1.2.3