summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorWill Deacon <will@kernel.org>2026-03-30 17:48:15 +0300
committerMarc Zyngier <maz@kernel.org>2026-03-30 18:58:08 +0300
commitea03466e806fea942841a41cfaab8db8c851aa71 (patch)
tree530120b429bbdce96879245dfbba4df066b39cb1
parent5fef16ef49126b0f71fb3e401aae4dca1865e6f9 (diff)
downloadlinux-ea03466e806fea942841a41cfaab8db8c851aa71.tar.xz
KVM: arm64: Handle aborts from protected VMs
Introduce a new abort handler for resolving stage-2 page faults from protected VMs by pinning and donating anonymous memory. This is considerably simpler than the infamous user_mem_abort() as we only have to deal with translation faults at the pte level. Reviewed-by: Fuad Tabba <tabba@google.com> Tested-by: Fuad Tabba <tabba@google.com> Tested-by: Mostafa Saleh <smostafa@google.com> Signed-off-by: Will Deacon <will@kernel.org> Link: https://patch.msgid.link/20260330144841.26181-15-will@kernel.org Signed-off-by: Marc Zyngier <maz@kernel.org>
-rw-r--r--arch/arm64/kvm/mmu.c89
1 files changed, 81 insertions, 8 deletions
diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c
index b3cc5dfe5723..6a4151e3e4a3 100644
--- a/arch/arm64/kvm/mmu.c
+++ b/arch/arm64/kvm/mmu.c
@@ -1642,6 +1642,74 @@ out_unlock:
return ret != -EAGAIN ? ret : 0;
}
+static int pkvm_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
+ struct kvm_memory_slot *memslot, unsigned long hva)
+{
+ unsigned int flags = FOLL_HWPOISON | FOLL_LONGTERM | FOLL_WRITE;
+ struct kvm_pgtable *pgt = vcpu->arch.hw_mmu->pgt;
+ struct mm_struct *mm = current->mm;
+ struct kvm *kvm = vcpu->kvm;
+ void *hyp_memcache;
+ struct page *page;
+ int ret;
+
+ ret = prepare_mmu_memcache(vcpu, true, &hyp_memcache);
+ if (ret)
+ return -ENOMEM;
+
+ ret = account_locked_vm(mm, 1, true);
+ if (ret)
+ return ret;
+
+ mmap_read_lock(mm);
+ ret = pin_user_pages(hva, 1, flags, &page);
+ mmap_read_unlock(mm);
+
+ if (ret == -EHWPOISON) {
+ kvm_send_hwpoison_signal(hva, PAGE_SHIFT);
+ ret = 0;
+ goto dec_account;
+ } else if (ret != 1) {
+ ret = -EFAULT;
+ goto dec_account;
+ } else if (!folio_test_swapbacked(page_folio(page))) {
+ /*
+ * We really can't deal with page-cache pages returned by GUP
+ * because (a) we may trigger writeback of a page for which we
+ * no longer have access and (b) page_mkclean() won't find the
+ * stage-2 mapping in the rmap so we can get out-of-whack with
+ * the filesystem when marking the page dirty during unpinning
+ * (see cc5095747edf ("ext4: don't BUG if someone dirty pages
+ * without asking ext4 first")).
+ *
+ * Ideally we'd just restrict ourselves to anonymous pages, but
+ * we also want to allow memfd (i.e. shmem) pages, so check for
+ * pages backed by swap in the knowledge that the GUP pin will
+ * prevent try_to_unmap() from succeeding.
+ */
+ ret = -EIO;
+ goto unpin;
+ }
+
+ write_lock(&kvm->mmu_lock);
+ ret = pkvm_pgtable_stage2_map(pgt, fault_ipa, PAGE_SIZE,
+ page_to_phys(page), KVM_PGTABLE_PROT_RWX,
+ hyp_memcache, 0);
+ write_unlock(&kvm->mmu_lock);
+ if (ret) {
+ if (ret == -EAGAIN)
+ ret = 0;
+ goto unpin;
+ }
+
+ return 0;
+unpin:
+ unpin_user_pages(&page, 1);
+dec_account:
+ account_locked_vm(mm, 1, false);
+ return ret;
+}
+
static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
struct kvm_s2_trans *nested,
struct kvm_memory_slot *memslot, unsigned long hva,
@@ -2205,15 +2273,20 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu)
goto out_unlock;
}
- VM_WARN_ON_ONCE(kvm_vcpu_trap_is_permission_fault(vcpu) &&
- !write_fault && !kvm_vcpu_trap_is_exec_fault(vcpu));
+ if (kvm_vm_is_protected(vcpu->kvm)) {
+ ret = pkvm_mem_abort(vcpu, fault_ipa, memslot, hva);
+ } else {
+ VM_WARN_ON_ONCE(kvm_vcpu_trap_is_permission_fault(vcpu) &&
+ !write_fault &&
+ !kvm_vcpu_trap_is_exec_fault(vcpu));
- if (kvm_slot_has_gmem(memslot))
- ret = gmem_abort(vcpu, fault_ipa, nested, memslot,
- esr_fsc_is_permission_fault(esr));
- else
- ret = user_mem_abort(vcpu, fault_ipa, nested, memslot, hva,
- esr_fsc_is_permission_fault(esr));
+ if (kvm_slot_has_gmem(memslot))
+ ret = gmem_abort(vcpu, fault_ipa, nested, memslot,
+ esr_fsc_is_permission_fault(esr));
+ else
+ ret = user_mem_abort(vcpu, fault_ipa, nested, memslot, hva,
+ esr_fsc_is_permission_fault(esr));
+ }
if (ret == 0)
ret = 1;
out: