diff options
| author | Marc Zyngier <maz@kernel.org> | 2026-01-23 13:04:47 +0300 |
|---|---|---|
| committer | Marc Zyngier <maz@kernel.org> | 2026-01-23 13:04:47 +0300 |
| commit | c983b3e276859edfeb836ba9f981ed71eee249df (patch) | |
| tree | 9912995e4ce5699ac4ab9e65e780ad4d0cb618de | |
| parent | cb6cd8a86d5b024f181b5520bcb948a27a257914 (diff) | |
| parent | f7d05ee84a6a8d3775e0f0c3070d9380bed844a9 (diff) | |
| download | linux-c983b3e276859edfeb836ba9f981ed71eee249df.tar.xz | |
Merge branch kvm-arm64/pkvm-features-6.20 into kvmarm-master/next
* kvm-arm64/pkvm-features-6.20:
: .
: pKVM guest feature trapping fixes, courtesy of Fuad Tabba.
: .
KVM: arm64: Prevent host from managing timer offsets for protected VMs
KVM: arm64: Check whether a VM IOCTL is allowed in pKVM
KVM: arm64: Track KVM IOCTLs and their associated KVM caps
KVM: arm64: Do not allow KVM_CAP_ARM_MTE for any guest in pKVM
KVM: arm64: Include VM type when checking VM capabilities in pKVM
KVM: arm64: Introduce helper to calculate fault IPA offset
KVM: arm64: Fix MTE flag initialization for protected VMs
KVM: arm64: Fix Trace Buffer trap polarity for protected VMs
KVM: arm64: Fix Trace Buffer trapping for protected VMs
Signed-off-by: Marc Zyngier <maz@kernel.org>
| -rw-r--r-- | arch/arm64/include/asm/kvm_arm.h | 2 | ||||
| -rw-r--r-- | arch/arm64/include/asm/kvm_host.h | 2 | ||||
| -rw-r--r-- | arch/arm64/include/asm/kvm_pkvm.h | 32 | ||||
| -rw-r--r-- | arch/arm64/kvm/arch_timer.c | 18 | ||||
| -rw-r--r-- | arch/arm64/kvm/arm.c | 52 | ||||
| -rw-r--r-- | arch/arm64/kvm/hyp/nvhe/pkvm.c | 18 | ||||
| -rw-r--r-- | arch/arm64/kvm/hyp/vgic-v2-cpuif-proxy.c | 2 | ||||
| -rw-r--r-- | arch/arm64/kvm/inject_fault.c | 2 | ||||
| -rw-r--r-- | arch/arm64/kvm/mmu.c | 4 |
9 files changed, 108 insertions, 24 deletions
diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h index dfdbd2b65db9..e3abef594ba6 100644 --- a/arch/arm64/include/asm/kvm_arm.h +++ b/arch/arm64/include/asm/kvm_arm.h @@ -316,6 +316,8 @@ #define PAR_TO_HPFAR(par) \ (((par) & GENMASK_ULL(52 - 1, 12)) >> 8) +#define FAR_TO_FIPA_OFFSET(far) ((far) & GENMASK_ULL(11, 0)) + #define ECN(x) { ESR_ELx_EC_##x, #x } #define kvm_arm_exception_class \ diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index b552a1e03848..ec3d39416ec5 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -1656,4 +1656,6 @@ static __always_inline enum fgt_group_id __fgt_reg_to_group_id(enum vcpu_sysreg p; \ }) +long kvm_get_cap_for_kvm_ioctl(unsigned int ioctl, long *ext); + #endif /* __ARM64_KVM_HOST_H__ */ diff --git a/arch/arm64/include/asm/kvm_pkvm.h b/arch/arm64/include/asm/kvm_pkvm.h index 0aecd4ac5f45..757076ad4ec9 100644 --- a/arch/arm64/include/asm/kvm_pkvm.h +++ b/arch/arm64/include/asm/kvm_pkvm.h @@ -9,6 +9,7 @@ #include <linux/arm_ffa.h> #include <linux/memblock.h> #include <linux/scatterlist.h> +#include <asm/kvm_host.h> #include <asm/kvm_pgtable.h> /* Maximum number of VMs that can co-exist under pKVM. */ @@ -23,10 +24,12 @@ void pkvm_destroy_hyp_vm(struct kvm *kvm); int pkvm_create_hyp_vcpu(struct kvm_vcpu *vcpu); /* - * This functions as an allow-list of protected VM capabilities. - * Features not explicitly allowed by this function are denied. + * Check whether the specific capability is allowed in pKVM. + * + * Certain features are allowed only for non-protected VMs in pKVM, which is why + * this takes the VM (kvm) as a parameter. */ -static inline bool kvm_pvm_ext_allowed(long ext) +static inline bool kvm_pkvm_ext_allowed(struct kvm *kvm, long ext) { switch (ext) { case KVM_CAP_IRQCHIP: @@ -42,11 +45,32 @@ static inline bool kvm_pvm_ext_allowed(long ext) case KVM_CAP_ARM_PTRAUTH_ADDRESS: case KVM_CAP_ARM_PTRAUTH_GENERIC: return true; - default: + case KVM_CAP_ARM_MTE: return false; + default: + return !kvm || !kvm_vm_is_protected(kvm); } } +/* + * Check whether the KVM VM IOCTL is allowed in pKVM. + * + * Certain features are allowed only for non-protected VMs in pKVM, which is why + * this takes the VM (kvm) as a parameter. + */ +static inline bool kvm_pkvm_ioctl_allowed(struct kvm *kvm, unsigned int ioctl) +{ + long ext; + int r; + + r = kvm_get_cap_for_kvm_ioctl(ioctl, &ext); + + if (WARN_ON_ONCE(r < 0)) + return false; + + return kvm_pkvm_ext_allowed(kvm, ext); +} + extern struct memblock_region kvm_nvhe_sym(hyp_memory)[]; extern unsigned int kvm_nvhe_sym(hyp_memblock_nr); diff --git a/arch/arm64/kvm/arch_timer.c b/arch/arm64/kvm/arch_timer.c index 99a07972068d..600f250753b4 100644 --- a/arch/arm64/kvm/arch_timer.c +++ b/arch/arm64/kvm/arch_timer.c @@ -1056,10 +1056,14 @@ static void timer_context_init(struct kvm_vcpu *vcpu, int timerid) ctxt->timer_id = timerid; - if (timerid == TIMER_VTIMER) - ctxt->offset.vm_offset = &kvm->arch.timer_data.voffset; - else - ctxt->offset.vm_offset = &kvm->arch.timer_data.poffset; + if (!kvm_vm_is_protected(vcpu->kvm)) { + if (timerid == TIMER_VTIMER) + ctxt->offset.vm_offset = &kvm->arch.timer_data.voffset; + else + ctxt->offset.vm_offset = &kvm->arch.timer_data.poffset; + } else { + ctxt->offset.vm_offset = NULL; + } hrtimer_setup(&ctxt->hrtimer, kvm_hrtimer_expire, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_HARD); @@ -1083,7 +1087,8 @@ void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu) timer_context_init(vcpu, i); /* Synchronize offsets across timers of a VM if not already provided */ - if (!test_bit(KVM_ARCH_FLAG_VM_COUNTER_OFFSET, &vcpu->kvm->arch.flags)) { + if (!vcpu_is_protected(vcpu) && + !test_bit(KVM_ARCH_FLAG_VM_COUNTER_OFFSET, &vcpu->kvm->arch.flags)) { timer_set_offset(vcpu_vtimer(vcpu), kvm_phys_timer_read()); timer_set_offset(vcpu_ptimer(vcpu), 0); } @@ -1687,6 +1692,9 @@ int kvm_vm_ioctl_set_counter_offset(struct kvm *kvm, if (offset->reserved) return -EINVAL; + if (kvm_vm_is_protected(kvm)) + return -EINVAL; + mutex_lock(&kvm->lock); if (!kvm_trylock_all_vcpus(kvm)) { diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 620a465248d1..296a9e24a3fc 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -58,6 +58,51 @@ enum kvm_wfx_trap_policy { static enum kvm_wfx_trap_policy kvm_wfi_trap_policy __read_mostly = KVM_WFX_NOTRAP_SINGLE_TASK; static enum kvm_wfx_trap_policy kvm_wfe_trap_policy __read_mostly = KVM_WFX_NOTRAP_SINGLE_TASK; +/* + * Tracks KVM IOCTLs and their associated KVM capabilities. + */ +struct kvm_ioctl_cap_map { + unsigned int ioctl; + long ext; +}; + +/* Make KVM_CAP_NR_VCPUS the reference for features we always supported */ +#define KVM_CAP_ARM_BASIC KVM_CAP_NR_VCPUS + +/* + * Sorted by ioctl to allow for potential binary search, + * though linear scan is sufficient for this size. + */ +static const struct kvm_ioctl_cap_map vm_ioctl_caps[] = { + { KVM_CREATE_IRQCHIP, KVM_CAP_IRQCHIP }, + { KVM_ARM_SET_DEVICE_ADDR, KVM_CAP_ARM_SET_DEVICE_ADDR }, + { KVM_ARM_MTE_COPY_TAGS, KVM_CAP_ARM_MTE }, + { KVM_SET_DEVICE_ATTR, KVM_CAP_DEVICE_CTRL }, + { KVM_GET_DEVICE_ATTR, KVM_CAP_DEVICE_CTRL }, + { KVM_HAS_DEVICE_ATTR, KVM_CAP_DEVICE_CTRL }, + { KVM_ARM_SET_COUNTER_OFFSET, KVM_CAP_COUNTER_OFFSET }, + { KVM_ARM_GET_REG_WRITABLE_MASKS, KVM_CAP_ARM_SUPPORTED_REG_MASK_RANGES }, + { KVM_ARM_PREFERRED_TARGET, KVM_CAP_ARM_BASIC }, +}; + +/* + * Set *ext to the capability. + * Return 0 if found, or -EINVAL if no IOCTL matches. + */ +long kvm_get_cap_for_kvm_ioctl(unsigned int ioctl, long *ext) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(vm_ioctl_caps); i++) { + if (vm_ioctl_caps[i].ioctl == ioctl) { + *ext = vm_ioctl_caps[i].ext; + return 0; + } + } + + return -EINVAL; +} + DECLARE_KVM_HYP_PER_CPU(unsigned long, kvm_hyp_vector); DEFINE_PER_CPU(unsigned long, kvm_arm_hyp_stack_base); @@ -87,7 +132,7 @@ int kvm_vm_ioctl_enable_cap(struct kvm *kvm, if (cap->flags) return -EINVAL; - if (kvm_vm_is_protected(kvm) && !kvm_pvm_ext_allowed(cap->cap)) + if (is_protected_kvm_enabled() && !kvm_pkvm_ext_allowed(kvm, cap->cap)) return -EINVAL; switch (cap->cap) { @@ -303,7 +348,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) { int r; - if (kvm && kvm_vm_is_protected(kvm) && !kvm_pvm_ext_allowed(ext)) + if (is_protected_kvm_enabled() && !kvm_pkvm_ext_allowed(kvm, ext)) return 0; switch (ext) { @@ -1894,6 +1939,9 @@ int kvm_arch_vm_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) void __user *argp = (void __user *)arg; struct kvm_device_attr attr; + if (is_protected_kvm_enabled() && !kvm_pkvm_ioctl_allowed(kvm, ioctl)) + return -EINVAL; + switch (ioctl) { case KVM_CREATE_IRQCHIP: { int ret; diff --git a/arch/arm64/kvm/hyp/nvhe/pkvm.c b/arch/arm64/kvm/hyp/nvhe/pkvm.c index 12b2acfbcfd1..987f42f15d17 100644 --- a/arch/arm64/kvm/hyp/nvhe/pkvm.c +++ b/arch/arm64/kvm/hyp/nvhe/pkvm.c @@ -117,8 +117,8 @@ static void pvm_init_traps_mdcr(struct kvm_vcpu *vcpu) if (!kvm_has_feat(kvm, ID_AA64DFR0_EL1, TraceFilt, IMP)) val |= MDCR_EL2_TTRF; - if (!kvm_has_feat(kvm, ID_AA64DFR0_EL1, ExtTrcBuff, IMP)) - val |= MDCR_EL2_E2TB_MASK; + if (!kvm_has_feat(kvm, ID_AA64DFR0_EL1, TraceBuffer, IMP)) + val &= ~MDCR_EL2_E2TB_MASK; /* Trap Debug Communications Channel registers */ if (!kvm_has_feat(kvm, ID_AA64MMFR0_EL1, FGT, IMP)) @@ -339,9 +339,6 @@ static void pkvm_init_features_from_host(struct pkvm_hyp_vm *hyp_vm, const struc /* Preserve the vgic model so that GICv3 emulation works */ hyp_vm->kvm.arch.vgic.vgic_model = host_kvm->arch.vgic.vgic_model; - if (test_bit(KVM_ARCH_FLAG_MTE_ENABLED, &host_kvm->arch.flags)) - set_bit(KVM_ARCH_FLAG_MTE_ENABLED, &kvm->arch.flags); - /* No restrictions for non-protected VMs. */ if (!kvm_vm_is_protected(kvm)) { hyp_vm->kvm.arch.flags = host_arch_flags; @@ -356,20 +353,23 @@ static void pkvm_init_features_from_host(struct pkvm_hyp_vm *hyp_vm, const struc return; } + if (kvm_pkvm_ext_allowed(kvm, KVM_CAP_ARM_MTE)) + kvm->arch.flags |= host_arch_flags & BIT(KVM_ARCH_FLAG_MTE_ENABLED); + bitmap_zero(allowed_features, KVM_VCPU_MAX_FEATURES); set_bit(KVM_ARM_VCPU_PSCI_0_2, allowed_features); - if (kvm_pvm_ext_allowed(KVM_CAP_ARM_PMU_V3)) + if (kvm_pkvm_ext_allowed(kvm, KVM_CAP_ARM_PMU_V3)) set_bit(KVM_ARM_VCPU_PMU_V3, allowed_features); - if (kvm_pvm_ext_allowed(KVM_CAP_ARM_PTRAUTH_ADDRESS)) + if (kvm_pkvm_ext_allowed(kvm, KVM_CAP_ARM_PTRAUTH_ADDRESS)) set_bit(KVM_ARM_VCPU_PTRAUTH_ADDRESS, allowed_features); - if (kvm_pvm_ext_allowed(KVM_CAP_ARM_PTRAUTH_GENERIC)) + if (kvm_pkvm_ext_allowed(kvm, KVM_CAP_ARM_PTRAUTH_GENERIC)) set_bit(KVM_ARM_VCPU_PTRAUTH_GENERIC, allowed_features); - if (kvm_pvm_ext_allowed(KVM_CAP_ARM_SVE)) { + if (kvm_pkvm_ext_allowed(kvm, KVM_CAP_ARM_SVE)) { set_bit(KVM_ARM_VCPU_SVE, allowed_features); kvm->arch.flags |= host_arch_flags & BIT(KVM_ARCH_FLAG_GUEST_HAS_SVE); } diff --git a/arch/arm64/kvm/hyp/vgic-v2-cpuif-proxy.c b/arch/arm64/kvm/hyp/vgic-v2-cpuif-proxy.c index 5fd99763b54d..f0605836821e 100644 --- a/arch/arm64/kvm/hyp/vgic-v2-cpuif-proxy.c +++ b/arch/arm64/kvm/hyp/vgic-v2-cpuif-proxy.c @@ -44,7 +44,7 @@ int __vgic_v2_perform_cpuif_access(struct kvm_vcpu *vcpu) /* Build the full address */ fault_ipa = kvm_vcpu_get_fault_ipa(vcpu); - fault_ipa |= kvm_vcpu_get_hfar(vcpu) & GENMASK(11, 0); + fault_ipa |= FAR_TO_FIPA_OFFSET(kvm_vcpu_get_hfar(vcpu)); /* If not for GICV, move on */ if (fault_ipa < vgic->vgic_cpu_base || diff --git a/arch/arm64/kvm/inject_fault.c b/arch/arm64/kvm/inject_fault.c index 99b696144b1e..89982bd3345f 100644 --- a/arch/arm64/kvm/inject_fault.c +++ b/arch/arm64/kvm/inject_fault.c @@ -296,7 +296,7 @@ void kvm_inject_size_fault(struct kvm_vcpu *vcpu) unsigned long addr, esr; addr = kvm_vcpu_get_fault_ipa(vcpu); - addr |= kvm_vcpu_get_hfar(vcpu) & GENMASK(11, 0); + addr |= FAR_TO_FIPA_OFFSET(kvm_vcpu_get_hfar(vcpu)); __kvm_inject_sea(vcpu, kvm_vcpu_trap_is_iabt(vcpu), addr); diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index 2ada3624d631..e613ec5adc7a 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -2079,7 +2079,7 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu) /* Falls between the IPA range and the PARange? */ if (fault_ipa >= BIT_ULL(VTCR_EL2_IPA(vcpu->arch.hw_mmu->vtcr))) { - fault_ipa |= kvm_vcpu_get_hfar(vcpu) & GENMASK(11, 0); + fault_ipa |= FAR_TO_FIPA_OFFSET(kvm_vcpu_get_hfar(vcpu)); return kvm_inject_sea(vcpu, is_iabt, fault_ipa); } @@ -2185,7 +2185,7 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu) * faulting VA. This is always 12 bits, irrespective * of the page size. */ - ipa |= kvm_vcpu_get_hfar(vcpu) & GENMASK(11, 0); + ipa |= FAR_TO_FIPA_OFFSET(kvm_vcpu_get_hfar(vcpu)); ret = io_mem_abort(vcpu, ipa); goto out_unlock; } |
