summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMarc Zyngier <maz@kernel.org>2026-01-23 13:04:47 +0300
committerMarc Zyngier <maz@kernel.org>2026-01-23 13:04:47 +0300
commitc983b3e276859edfeb836ba9f981ed71eee249df (patch)
tree9912995e4ce5699ac4ab9e65e780ad4d0cb618de
parentcb6cd8a86d5b024f181b5520bcb948a27a257914 (diff)
parentf7d05ee84a6a8d3775e0f0c3070d9380bed844a9 (diff)
downloadlinux-c983b3e276859edfeb836ba9f981ed71eee249df.tar.xz
Merge branch kvm-arm64/pkvm-features-6.20 into kvmarm-master/next
* kvm-arm64/pkvm-features-6.20: : . : pKVM guest feature trapping fixes, courtesy of Fuad Tabba. : . KVM: arm64: Prevent host from managing timer offsets for protected VMs KVM: arm64: Check whether a VM IOCTL is allowed in pKVM KVM: arm64: Track KVM IOCTLs and their associated KVM caps KVM: arm64: Do not allow KVM_CAP_ARM_MTE for any guest in pKVM KVM: arm64: Include VM type when checking VM capabilities in pKVM KVM: arm64: Introduce helper to calculate fault IPA offset KVM: arm64: Fix MTE flag initialization for protected VMs KVM: arm64: Fix Trace Buffer trap polarity for protected VMs KVM: arm64: Fix Trace Buffer trapping for protected VMs Signed-off-by: Marc Zyngier <maz@kernel.org>
-rw-r--r--arch/arm64/include/asm/kvm_arm.h2
-rw-r--r--arch/arm64/include/asm/kvm_host.h2
-rw-r--r--arch/arm64/include/asm/kvm_pkvm.h32
-rw-r--r--arch/arm64/kvm/arch_timer.c18
-rw-r--r--arch/arm64/kvm/arm.c52
-rw-r--r--arch/arm64/kvm/hyp/nvhe/pkvm.c18
-rw-r--r--arch/arm64/kvm/hyp/vgic-v2-cpuif-proxy.c2
-rw-r--r--arch/arm64/kvm/inject_fault.c2
-rw-r--r--arch/arm64/kvm/mmu.c4
9 files changed, 108 insertions, 24 deletions
diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h
index dfdbd2b65db9..e3abef594ba6 100644
--- a/arch/arm64/include/asm/kvm_arm.h
+++ b/arch/arm64/include/asm/kvm_arm.h
@@ -316,6 +316,8 @@
#define PAR_TO_HPFAR(par) \
(((par) & GENMASK_ULL(52 - 1, 12)) >> 8)
+#define FAR_TO_FIPA_OFFSET(far) ((far) & GENMASK_ULL(11, 0))
+
#define ECN(x) { ESR_ELx_EC_##x, #x }
#define kvm_arm_exception_class \
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index b552a1e03848..ec3d39416ec5 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -1656,4 +1656,6 @@ static __always_inline enum fgt_group_id __fgt_reg_to_group_id(enum vcpu_sysreg
p; \
})
+long kvm_get_cap_for_kvm_ioctl(unsigned int ioctl, long *ext);
+
#endif /* __ARM64_KVM_HOST_H__ */
diff --git a/arch/arm64/include/asm/kvm_pkvm.h b/arch/arm64/include/asm/kvm_pkvm.h
index 0aecd4ac5f45..757076ad4ec9 100644
--- a/arch/arm64/include/asm/kvm_pkvm.h
+++ b/arch/arm64/include/asm/kvm_pkvm.h
@@ -9,6 +9,7 @@
#include <linux/arm_ffa.h>
#include <linux/memblock.h>
#include <linux/scatterlist.h>
+#include <asm/kvm_host.h>
#include <asm/kvm_pgtable.h>
/* Maximum number of VMs that can co-exist under pKVM. */
@@ -23,10 +24,12 @@ void pkvm_destroy_hyp_vm(struct kvm *kvm);
int pkvm_create_hyp_vcpu(struct kvm_vcpu *vcpu);
/*
- * This functions as an allow-list of protected VM capabilities.
- * Features not explicitly allowed by this function are denied.
+ * Check whether the specific capability is allowed in pKVM.
+ *
+ * Certain features are allowed only for non-protected VMs in pKVM, which is why
+ * this takes the VM (kvm) as a parameter.
*/
-static inline bool kvm_pvm_ext_allowed(long ext)
+static inline bool kvm_pkvm_ext_allowed(struct kvm *kvm, long ext)
{
switch (ext) {
case KVM_CAP_IRQCHIP:
@@ -42,11 +45,32 @@ static inline bool kvm_pvm_ext_allowed(long ext)
case KVM_CAP_ARM_PTRAUTH_ADDRESS:
case KVM_CAP_ARM_PTRAUTH_GENERIC:
return true;
- default:
+ case KVM_CAP_ARM_MTE:
return false;
+ default:
+ return !kvm || !kvm_vm_is_protected(kvm);
}
}
+/*
+ * Check whether the KVM VM IOCTL is allowed in pKVM.
+ *
+ * Certain features are allowed only for non-protected VMs in pKVM, which is why
+ * this takes the VM (kvm) as a parameter.
+ */
+static inline bool kvm_pkvm_ioctl_allowed(struct kvm *kvm, unsigned int ioctl)
+{
+ long ext;
+ int r;
+
+ r = kvm_get_cap_for_kvm_ioctl(ioctl, &ext);
+
+ if (WARN_ON_ONCE(r < 0))
+ return false;
+
+ return kvm_pkvm_ext_allowed(kvm, ext);
+}
+
extern struct memblock_region kvm_nvhe_sym(hyp_memory)[];
extern unsigned int kvm_nvhe_sym(hyp_memblock_nr);
diff --git a/arch/arm64/kvm/arch_timer.c b/arch/arm64/kvm/arch_timer.c
index 99a07972068d..600f250753b4 100644
--- a/arch/arm64/kvm/arch_timer.c
+++ b/arch/arm64/kvm/arch_timer.c
@@ -1056,10 +1056,14 @@ static void timer_context_init(struct kvm_vcpu *vcpu, int timerid)
ctxt->timer_id = timerid;
- if (timerid == TIMER_VTIMER)
- ctxt->offset.vm_offset = &kvm->arch.timer_data.voffset;
- else
- ctxt->offset.vm_offset = &kvm->arch.timer_data.poffset;
+ if (!kvm_vm_is_protected(vcpu->kvm)) {
+ if (timerid == TIMER_VTIMER)
+ ctxt->offset.vm_offset = &kvm->arch.timer_data.voffset;
+ else
+ ctxt->offset.vm_offset = &kvm->arch.timer_data.poffset;
+ } else {
+ ctxt->offset.vm_offset = NULL;
+ }
hrtimer_setup(&ctxt->hrtimer, kvm_hrtimer_expire, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_HARD);
@@ -1083,7 +1087,8 @@ void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu)
timer_context_init(vcpu, i);
/* Synchronize offsets across timers of a VM if not already provided */
- if (!test_bit(KVM_ARCH_FLAG_VM_COUNTER_OFFSET, &vcpu->kvm->arch.flags)) {
+ if (!vcpu_is_protected(vcpu) &&
+ !test_bit(KVM_ARCH_FLAG_VM_COUNTER_OFFSET, &vcpu->kvm->arch.flags)) {
timer_set_offset(vcpu_vtimer(vcpu), kvm_phys_timer_read());
timer_set_offset(vcpu_ptimer(vcpu), 0);
}
@@ -1687,6 +1692,9 @@ int kvm_vm_ioctl_set_counter_offset(struct kvm *kvm,
if (offset->reserved)
return -EINVAL;
+ if (kvm_vm_is_protected(kvm))
+ return -EINVAL;
+
mutex_lock(&kvm->lock);
if (!kvm_trylock_all_vcpus(kvm)) {
diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
index 620a465248d1..296a9e24a3fc 100644
--- a/arch/arm64/kvm/arm.c
+++ b/arch/arm64/kvm/arm.c
@@ -58,6 +58,51 @@ enum kvm_wfx_trap_policy {
static enum kvm_wfx_trap_policy kvm_wfi_trap_policy __read_mostly = KVM_WFX_NOTRAP_SINGLE_TASK;
static enum kvm_wfx_trap_policy kvm_wfe_trap_policy __read_mostly = KVM_WFX_NOTRAP_SINGLE_TASK;
+/*
+ * Tracks KVM IOCTLs and their associated KVM capabilities.
+ */
+struct kvm_ioctl_cap_map {
+ unsigned int ioctl;
+ long ext;
+};
+
+/* Make KVM_CAP_NR_VCPUS the reference for features we always supported */
+#define KVM_CAP_ARM_BASIC KVM_CAP_NR_VCPUS
+
+/*
+ * Sorted by ioctl to allow for potential binary search,
+ * though linear scan is sufficient for this size.
+ */
+static const struct kvm_ioctl_cap_map vm_ioctl_caps[] = {
+ { KVM_CREATE_IRQCHIP, KVM_CAP_IRQCHIP },
+ { KVM_ARM_SET_DEVICE_ADDR, KVM_CAP_ARM_SET_DEVICE_ADDR },
+ { KVM_ARM_MTE_COPY_TAGS, KVM_CAP_ARM_MTE },
+ { KVM_SET_DEVICE_ATTR, KVM_CAP_DEVICE_CTRL },
+ { KVM_GET_DEVICE_ATTR, KVM_CAP_DEVICE_CTRL },
+ { KVM_HAS_DEVICE_ATTR, KVM_CAP_DEVICE_CTRL },
+ { KVM_ARM_SET_COUNTER_OFFSET, KVM_CAP_COUNTER_OFFSET },
+ { KVM_ARM_GET_REG_WRITABLE_MASKS, KVM_CAP_ARM_SUPPORTED_REG_MASK_RANGES },
+ { KVM_ARM_PREFERRED_TARGET, KVM_CAP_ARM_BASIC },
+};
+
+/*
+ * Set *ext to the capability.
+ * Return 0 if found, or -EINVAL if no IOCTL matches.
+ */
+long kvm_get_cap_for_kvm_ioctl(unsigned int ioctl, long *ext)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(vm_ioctl_caps); i++) {
+ if (vm_ioctl_caps[i].ioctl == ioctl) {
+ *ext = vm_ioctl_caps[i].ext;
+ return 0;
+ }
+ }
+
+ return -EINVAL;
+}
+
DECLARE_KVM_HYP_PER_CPU(unsigned long, kvm_hyp_vector);
DEFINE_PER_CPU(unsigned long, kvm_arm_hyp_stack_base);
@@ -87,7 +132,7 @@ int kvm_vm_ioctl_enable_cap(struct kvm *kvm,
if (cap->flags)
return -EINVAL;
- if (kvm_vm_is_protected(kvm) && !kvm_pvm_ext_allowed(cap->cap))
+ if (is_protected_kvm_enabled() && !kvm_pkvm_ext_allowed(kvm, cap->cap))
return -EINVAL;
switch (cap->cap) {
@@ -303,7 +348,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
{
int r;
- if (kvm && kvm_vm_is_protected(kvm) && !kvm_pvm_ext_allowed(ext))
+ if (is_protected_kvm_enabled() && !kvm_pkvm_ext_allowed(kvm, ext))
return 0;
switch (ext) {
@@ -1894,6 +1939,9 @@ int kvm_arch_vm_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg)
void __user *argp = (void __user *)arg;
struct kvm_device_attr attr;
+ if (is_protected_kvm_enabled() && !kvm_pkvm_ioctl_allowed(kvm, ioctl))
+ return -EINVAL;
+
switch (ioctl) {
case KVM_CREATE_IRQCHIP: {
int ret;
diff --git a/arch/arm64/kvm/hyp/nvhe/pkvm.c b/arch/arm64/kvm/hyp/nvhe/pkvm.c
index 12b2acfbcfd1..987f42f15d17 100644
--- a/arch/arm64/kvm/hyp/nvhe/pkvm.c
+++ b/arch/arm64/kvm/hyp/nvhe/pkvm.c
@@ -117,8 +117,8 @@ static void pvm_init_traps_mdcr(struct kvm_vcpu *vcpu)
if (!kvm_has_feat(kvm, ID_AA64DFR0_EL1, TraceFilt, IMP))
val |= MDCR_EL2_TTRF;
- if (!kvm_has_feat(kvm, ID_AA64DFR0_EL1, ExtTrcBuff, IMP))
- val |= MDCR_EL2_E2TB_MASK;
+ if (!kvm_has_feat(kvm, ID_AA64DFR0_EL1, TraceBuffer, IMP))
+ val &= ~MDCR_EL2_E2TB_MASK;
/* Trap Debug Communications Channel registers */
if (!kvm_has_feat(kvm, ID_AA64MMFR0_EL1, FGT, IMP))
@@ -339,9 +339,6 @@ static void pkvm_init_features_from_host(struct pkvm_hyp_vm *hyp_vm, const struc
/* Preserve the vgic model so that GICv3 emulation works */
hyp_vm->kvm.arch.vgic.vgic_model = host_kvm->arch.vgic.vgic_model;
- if (test_bit(KVM_ARCH_FLAG_MTE_ENABLED, &host_kvm->arch.flags))
- set_bit(KVM_ARCH_FLAG_MTE_ENABLED, &kvm->arch.flags);
-
/* No restrictions for non-protected VMs. */
if (!kvm_vm_is_protected(kvm)) {
hyp_vm->kvm.arch.flags = host_arch_flags;
@@ -356,20 +353,23 @@ static void pkvm_init_features_from_host(struct pkvm_hyp_vm *hyp_vm, const struc
return;
}
+ if (kvm_pkvm_ext_allowed(kvm, KVM_CAP_ARM_MTE))
+ kvm->arch.flags |= host_arch_flags & BIT(KVM_ARCH_FLAG_MTE_ENABLED);
+
bitmap_zero(allowed_features, KVM_VCPU_MAX_FEATURES);
set_bit(KVM_ARM_VCPU_PSCI_0_2, allowed_features);
- if (kvm_pvm_ext_allowed(KVM_CAP_ARM_PMU_V3))
+ if (kvm_pkvm_ext_allowed(kvm, KVM_CAP_ARM_PMU_V3))
set_bit(KVM_ARM_VCPU_PMU_V3, allowed_features);
- if (kvm_pvm_ext_allowed(KVM_CAP_ARM_PTRAUTH_ADDRESS))
+ if (kvm_pkvm_ext_allowed(kvm, KVM_CAP_ARM_PTRAUTH_ADDRESS))
set_bit(KVM_ARM_VCPU_PTRAUTH_ADDRESS, allowed_features);
- if (kvm_pvm_ext_allowed(KVM_CAP_ARM_PTRAUTH_GENERIC))
+ if (kvm_pkvm_ext_allowed(kvm, KVM_CAP_ARM_PTRAUTH_GENERIC))
set_bit(KVM_ARM_VCPU_PTRAUTH_GENERIC, allowed_features);
- if (kvm_pvm_ext_allowed(KVM_CAP_ARM_SVE)) {
+ if (kvm_pkvm_ext_allowed(kvm, KVM_CAP_ARM_SVE)) {
set_bit(KVM_ARM_VCPU_SVE, allowed_features);
kvm->arch.flags |= host_arch_flags & BIT(KVM_ARCH_FLAG_GUEST_HAS_SVE);
}
diff --git a/arch/arm64/kvm/hyp/vgic-v2-cpuif-proxy.c b/arch/arm64/kvm/hyp/vgic-v2-cpuif-proxy.c
index 5fd99763b54d..f0605836821e 100644
--- a/arch/arm64/kvm/hyp/vgic-v2-cpuif-proxy.c
+++ b/arch/arm64/kvm/hyp/vgic-v2-cpuif-proxy.c
@@ -44,7 +44,7 @@ int __vgic_v2_perform_cpuif_access(struct kvm_vcpu *vcpu)
/* Build the full address */
fault_ipa = kvm_vcpu_get_fault_ipa(vcpu);
- fault_ipa |= kvm_vcpu_get_hfar(vcpu) & GENMASK(11, 0);
+ fault_ipa |= FAR_TO_FIPA_OFFSET(kvm_vcpu_get_hfar(vcpu));
/* If not for GICV, move on */
if (fault_ipa < vgic->vgic_cpu_base ||
diff --git a/arch/arm64/kvm/inject_fault.c b/arch/arm64/kvm/inject_fault.c
index 99b696144b1e..89982bd3345f 100644
--- a/arch/arm64/kvm/inject_fault.c
+++ b/arch/arm64/kvm/inject_fault.c
@@ -296,7 +296,7 @@ void kvm_inject_size_fault(struct kvm_vcpu *vcpu)
unsigned long addr, esr;
addr = kvm_vcpu_get_fault_ipa(vcpu);
- addr |= kvm_vcpu_get_hfar(vcpu) & GENMASK(11, 0);
+ addr |= FAR_TO_FIPA_OFFSET(kvm_vcpu_get_hfar(vcpu));
__kvm_inject_sea(vcpu, kvm_vcpu_trap_is_iabt(vcpu), addr);
diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c
index 2ada3624d631..e613ec5adc7a 100644
--- a/arch/arm64/kvm/mmu.c
+++ b/arch/arm64/kvm/mmu.c
@@ -2079,7 +2079,7 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu)
/* Falls between the IPA range and the PARange? */
if (fault_ipa >= BIT_ULL(VTCR_EL2_IPA(vcpu->arch.hw_mmu->vtcr))) {
- fault_ipa |= kvm_vcpu_get_hfar(vcpu) & GENMASK(11, 0);
+ fault_ipa |= FAR_TO_FIPA_OFFSET(kvm_vcpu_get_hfar(vcpu));
return kvm_inject_sea(vcpu, is_iabt, fault_ipa);
}
@@ -2185,7 +2185,7 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu)
* faulting VA. This is always 12 bits, irrespective
* of the page size.
*/
- ipa |= kvm_vcpu_get_hfar(vcpu) & GENMASK(11, 0);
+ ipa |= FAR_TO_FIPA_OFFSET(kvm_vcpu_get_hfar(vcpu));
ret = io_mem_abort(vcpu, ipa);
goto out_unlock;
}