summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/virt/kvm/devices/vcpu.rst14
-rw-r--r--arch/arm64/include/asm/cpucaps.h2
-rw-r--r--arch/arm64/include/asm/cpufeature.h3
-rw-r--r--arch/arm64/include/asm/cputype.h2
-rw-r--r--arch/arm64/include/asm/el2_setup.h6
-rw-r--r--arch/arm64/include/asm/kvm_arm.h4
-rw-r--r--arch/arm64/include/asm/kvm_asm.h5
-rw-r--r--arch/arm64/include/asm/kvm_emulate.h69
-rw-r--r--arch/arm64/include/asm/kvm_host.h121
-rw-r--r--arch/arm64/include/asm/kvm_nested.h9
-rw-r--r--arch/arm64/include/asm/kvm_pkvm.h25
-rw-r--r--arch/arm64/include/asm/sysreg.h4
-rw-r--r--arch/arm64/kernel/cpu_errata.c8
-rw-r--r--arch/arm64/kernel/cpufeature.c17
-rw-r--r--arch/arm64/kernel/image-vars.h3
-rw-r--r--arch/arm64/kvm/arch_timer.c179
-rw-r--r--arch/arm64/kvm/arm.c47
-rw-r--r--arch/arm64/kvm/at.c6
-rw-r--r--arch/arm64/kvm/debug.c380
-rw-r--r--arch/arm64/kvm/emulate-nested.c83
-rw-r--r--arch/arm64/kvm/fpsimd.c14
-rw-r--r--arch/arm64/kvm/guest.c31
-rw-r--r--arch/arm64/kvm/handle_exit.c5
-rw-r--r--arch/arm64/kvm/hyp/include/hyp/debug-sr.h42
-rw-r--r--arch/arm64/kvm/hyp/include/hyp/switch.h43
-rw-r--r--arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h43
-rw-r--r--arch/arm64/kvm/hyp/include/nvhe/fixed_config.h223
-rw-r--r--arch/arm64/kvm/hyp/include/nvhe/pkvm.h7
-rw-r--r--arch/arm64/kvm/hyp/nvhe/debug-sr.c13
-rw-r--r--arch/arm64/kvm/hyp/nvhe/hyp-main.c12
-rw-r--r--arch/arm64/kvm/hyp/nvhe/pkvm.c345
-rw-r--r--arch/arm64/kvm/hyp/nvhe/setup.c1
-rw-r--r--arch/arm64/kvm/hyp/nvhe/switch.c54
-rw-r--r--arch/arm64/kvm/hyp/nvhe/sys_regs.c404
-rw-r--r--arch/arm64/kvm/hyp/nvhe/timer-sr.c16
-rw-r--r--arch/arm64/kvm/hyp/vhe/debug-sr.c5
-rw-r--r--arch/arm64/kvm/hyp/vhe/switch.c123
-rw-r--r--arch/arm64/kvm/nested.c23
-rw-r--r--arch/arm64/kvm/reset.c6
-rw-r--r--arch/arm64/kvm/sys_regs.c395
-rw-r--r--arch/arm64/kvm/trace_handle_exit.h75
-rw-r--r--arch/arm64/mm/proc.S5
-rw-r--r--arch/arm64/tools/cpucaps1
-rwxr-xr-xarch/arm64/tools/gen-sysreg.awk2
-rw-r--r--arch/arm64/tools/sysreg44
-rw-r--r--include/clocksource/arm_arch_timer.h6
-rw-r--r--include/kvm/arm_arch_timer.h23
47 files changed, 1462 insertions, 1486 deletions
diff --git a/Documentation/virt/kvm/devices/vcpu.rst b/Documentation/virt/kvm/devices/vcpu.rst
index 31f14ec4a65b..31a9576c07af 100644
--- a/Documentation/virt/kvm/devices/vcpu.rst
+++ b/Documentation/virt/kvm/devices/vcpu.rst
@@ -142,8 +142,8 @@ the cpu field to the processor id.
:Architectures: ARM64
-2.1. ATTRIBUTES: KVM_ARM_VCPU_TIMER_IRQ_VTIMER, KVM_ARM_VCPU_TIMER_IRQ_PTIMER
------------------------------------------------------------------------------
+2.1. ATTRIBUTES: KVM_ARM_VCPU_TIMER_IRQ_{VTIMER,PTIMER,HVTIMER,HPTIMER}
+-----------------------------------------------------------------------
:Parameters: in kvm_device_attr.addr the address for the timer interrupt is a
pointer to an int
@@ -159,10 +159,12 @@ A value describing the architected timer interrupt number when connected to an
in-kernel virtual GIC. These must be a PPI (16 <= intid < 32). Setting the
attribute overrides the default values (see below).
-============================= ==========================================
-KVM_ARM_VCPU_TIMER_IRQ_VTIMER The EL1 virtual timer intid (default: 27)
-KVM_ARM_VCPU_TIMER_IRQ_PTIMER The EL1 physical timer intid (default: 30)
-============================= ==========================================
+============================== ==========================================
+KVM_ARM_VCPU_TIMER_IRQ_VTIMER The EL1 virtual timer intid (default: 27)
+KVM_ARM_VCPU_TIMER_IRQ_PTIMER The EL1 physical timer intid (default: 30)
+KVM_ARM_VCPU_TIMER_IRQ_HVTIMER The EL2 virtual timer intid (default: 28)
+KVM_ARM_VCPU_TIMER_IRQ_HPTIMER The EL2 physical timer intid (default: 26)
+============================== ==========================================
Setting the same PPI for different timers will prevent the VCPUs from running.
Setting the interrupt number on a VCPU configures all VCPUs created at that
diff --git a/arch/arm64/include/asm/cpucaps.h b/arch/arm64/include/asm/cpucaps.h
index cbbf70e0f204..0b5ca6e0eb09 100644
--- a/arch/arm64/include/asm/cpucaps.h
+++ b/arch/arm64/include/asm/cpucaps.h
@@ -46,6 +46,8 @@ cpucap_is_possible(const unsigned int cap)
return IS_ENABLED(CONFIG_ARM64_POE);
case ARM64_HAS_GCS:
return IS_ENABLED(CONFIG_ARM64_GCS);
+ case ARM64_HAFT:
+ return IS_ENABLED(CONFIG_ARM64_HAFT);
case ARM64_UNMAP_KERNEL_AT_EL0:
return IS_ENABLED(CONFIG_UNMAP_KERNEL_AT_EL0);
case ARM64_WORKAROUND_843419:
diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h
index 8b4e5a3cd24c..a4d0b77a68d9 100644
--- a/arch/arm64/include/asm/cpufeature.h
+++ b/arch/arm64/include/asm/cpufeature.h
@@ -852,8 +852,7 @@ static inline bool system_supports_gcs(void)
static inline bool system_supports_haft(void)
{
- return IS_ENABLED(CONFIG_ARM64_HAFT) &&
- cpus_have_final_cap(ARM64_HAFT);
+ return cpus_have_final_cap(ARM64_HAFT);
}
static __always_inline bool system_supports_mpam(void)
diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h
index 488f8e751349..6f3f4142e214 100644
--- a/arch/arm64/include/asm/cputype.h
+++ b/arch/arm64/include/asm/cputype.h
@@ -122,6 +122,7 @@
#define QCOM_CPU_PART_KRYO_3XX_SILVER 0x803
#define QCOM_CPU_PART_KRYO_4XX_GOLD 0x804
#define QCOM_CPU_PART_KRYO_4XX_SILVER 0x805
+#define QCOM_CPU_PART_ORYON_X1 0x001
#define NVIDIA_CPU_PART_DENVER 0x003
#define NVIDIA_CPU_PART_CARMEL 0x004
@@ -198,6 +199,7 @@
#define MIDR_QCOM_KRYO_3XX_SILVER MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_KRYO_3XX_SILVER)
#define MIDR_QCOM_KRYO_4XX_GOLD MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_KRYO_4XX_GOLD)
#define MIDR_QCOM_KRYO_4XX_SILVER MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_KRYO_4XX_SILVER)
+#define MIDR_QCOM_ORYON_X1 MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_ORYON_X1)
#define MIDR_NVIDIA_DENVER MIDR_CPU_MODEL(ARM_CPU_IMP_NVIDIA, NVIDIA_CPU_PART_DENVER)
#define MIDR_NVIDIA_CARMEL MIDR_CPU_MODEL(ARM_CPU_IMP_NVIDIA, NVIDIA_CPU_PART_CARMEL)
#define MIDR_FUJITSU_A64FX MIDR_CPU_MODEL(ARM_CPU_IMP_FUJITSU, FUJITSU_CPU_PART_A64FX)
diff --git a/arch/arm64/include/asm/el2_setup.h b/arch/arm64/include/asm/el2_setup.h
index 4ef52d7245bb..25e162651750 100644
--- a/arch/arm64/include/asm/el2_setup.h
+++ b/arch/arm64/include/asm/el2_setup.h
@@ -154,7 +154,7 @@
/* Coprocessor traps */
.macro __init_el2_cptr
__check_hvhe .LnVHE_\@, x1
- mov x0, #CPACR_ELx_FPEN
+ mov x0, #CPACR_EL1_FPEN
msr cpacr_el1, x0
b .Lskip_set_cptr_\@
.LnVHE_\@:
@@ -332,7 +332,7 @@
// (h)VHE case
mrs x0, cpacr_el1 // Disable SVE traps
- orr x0, x0, #CPACR_ELx_ZEN
+ orr x0, x0, #CPACR_EL1_ZEN
msr cpacr_el1, x0
b .Lskip_set_cptr_\@
@@ -353,7 +353,7 @@
// (h)VHE case
mrs x0, cpacr_el1 // Disable SME traps
- orr x0, x0, #CPACR_ELx_SMEN
+ orr x0, x0, #CPACR_EL1_SMEN
msr cpacr_el1, x0
b .Lskip_set_cptr_sme_\@
diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h
index 3e0f0de1d2da..8d94a6c0ed5c 100644
--- a/arch/arm64/include/asm/kvm_arm.h
+++ b/arch/arm64/include/asm/kvm_arm.h
@@ -300,7 +300,7 @@
#define CPTR_EL2_TSM (1 << 12)
#define CPTR_EL2_TFP (1 << CPTR_EL2_TFP_SHIFT)
#define CPTR_EL2_TZ (1 << 8)
-#define CPTR_NVHE_EL2_RES1 0x000032ff /* known RES1 bits in CPTR_EL2 (nVHE) */
+#define CPTR_NVHE_EL2_RES1 (BIT(13) | BIT(9) | GENMASK(7, 0))
#define CPTR_NVHE_EL2_RES0 (GENMASK(63, 32) | \
GENMASK(29, 21) | \
GENMASK(19, 14) | \
@@ -391,8 +391,6 @@
ECN(SOFTSTP_CUR), ECN(WATCHPT_LOW), ECN(WATCHPT_CUR), \
ECN(BKPT32), ECN(VECTOR32), ECN(BRK64), ECN(ERET)
-#define CPACR_EL1_TTA (1 << 28)
-
#define kvm_mode_names \
{ PSR_MODE_EL0t, "EL0t" }, \
{ PSR_MODE_EL1t, "EL1t" }, \
diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h
index 002088c6e297..bec227f9500a 100644
--- a/arch/arm64/include/asm/kvm_asm.h
+++ b/arch/arm64/include/asm/kvm_asm.h
@@ -53,8 +53,7 @@
enum __kvm_host_smccc_func {
/* Hypercalls available only prior to pKVM finalisation */
/* __KVM_HOST_SMCCC_FUNC___kvm_hyp_init */
- __KVM_HOST_SMCCC_FUNC___kvm_get_mdcr_el2 = __KVM_HOST_SMCCC_FUNC___kvm_hyp_init + 1,
- __KVM_HOST_SMCCC_FUNC___pkvm_init,
+ __KVM_HOST_SMCCC_FUNC___pkvm_init = __KVM_HOST_SMCCC_FUNC___kvm_hyp_init + 1,
__KVM_HOST_SMCCC_FUNC___pkvm_create_private_mapping,
__KVM_HOST_SMCCC_FUNC___pkvm_cpu_set_vector,
__KVM_HOST_SMCCC_FUNC___kvm_enable_ssbs,
@@ -256,8 +255,6 @@ extern void __kvm_adjust_pc(struct kvm_vcpu *vcpu);
extern u64 __vgic_v3_get_gic_config(void);
extern void __vgic_v3_init_lrs(void);
-extern u64 __kvm_get_mdcr_el2(void);
-
#define __KVM_EXTABLE(from, to) \
" .pushsection __kvm_ex_table, \"a\"\n" \
" .align 3\n" \
diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h
index cf811009a33c..55ddc1352373 100644
--- a/arch/arm64/include/asm/kvm_emulate.h
+++ b/arch/arm64/include/asm/kvm_emulate.h
@@ -556,13 +556,13 @@ static __always_inline void kvm_incr_pc(struct kvm_vcpu *vcpu)
({ \
u64 cptr = 0; \
\
- if ((set) & CPACR_ELx_FPEN) \
+ if ((set) & CPACR_EL1_FPEN) \
cptr |= CPTR_EL2_TFP; \
- if ((set) & CPACR_ELx_ZEN) \
+ if ((set) & CPACR_EL1_ZEN) \
cptr |= CPTR_EL2_TZ; \
- if ((set) & CPACR_ELx_SMEN) \
+ if ((set) & CPACR_EL1_SMEN) \
cptr |= CPTR_EL2_TSM; \
- if ((clr) & CPACR_ELx_TTA) \
+ if ((clr) & CPACR_EL1_TTA) \
cptr |= CPTR_EL2_TTA; \
if ((clr) & CPTR_EL2_TAM) \
cptr |= CPTR_EL2_TAM; \
@@ -576,13 +576,13 @@ static __always_inline void kvm_incr_pc(struct kvm_vcpu *vcpu)
({ \
u64 cptr = 0; \
\
- if ((clr) & CPACR_ELx_FPEN) \
+ if ((clr) & CPACR_EL1_FPEN) \
cptr |= CPTR_EL2_TFP; \
- if ((clr) & CPACR_ELx_ZEN) \
+ if ((clr) & CPACR_EL1_ZEN) \
cptr |= CPTR_EL2_TZ; \
- if ((clr) & CPACR_ELx_SMEN) \
+ if ((clr) & CPACR_EL1_SMEN) \
cptr |= CPTR_EL2_TSM; \
- if ((set) & CPACR_ELx_TTA) \
+ if ((set) & CPACR_EL1_TTA) \
cptr |= CPTR_EL2_TTA; \
if ((set) & CPTR_EL2_TAM) \
cptr |= CPTR_EL2_TAM; \
@@ -595,13 +595,13 @@ static __always_inline void kvm_incr_pc(struct kvm_vcpu *vcpu)
#define cpacr_clear_set(clr, set) \
do { \
BUILD_BUG_ON((set) & CPTR_VHE_EL2_RES0); \
- BUILD_BUG_ON((clr) & CPACR_ELx_E0POE); \
- __build_check_all_or_none((clr), CPACR_ELx_FPEN); \
- __build_check_all_or_none((set), CPACR_ELx_FPEN); \
- __build_check_all_or_none((clr), CPACR_ELx_ZEN); \
- __build_check_all_or_none((set), CPACR_ELx_ZEN); \
- __build_check_all_or_none((clr), CPACR_ELx_SMEN); \
- __build_check_all_or_none((set), CPACR_ELx_SMEN); \
+ BUILD_BUG_ON((clr) & CPACR_EL1_E0POE); \
+ __build_check_all_or_none((clr), CPACR_EL1_FPEN); \
+ __build_check_all_or_none((set), CPACR_EL1_FPEN); \
+ __build_check_all_or_none((clr), CPACR_EL1_ZEN); \
+ __build_check_all_or_none((set), CPACR_EL1_ZEN); \
+ __build_check_all_or_none((clr), CPACR_EL1_SMEN); \
+ __build_check_all_or_none((set), CPACR_EL1_SMEN); \
\
if (has_vhe() || has_hvhe()) \
sysreg_clear_set(cpacr_el1, clr, set); \
@@ -619,40 +619,40 @@ static __always_inline void kvm_write_cptr_el2(u64 val)
write_sysreg(val, cptr_el2);
}
-static __always_inline u64 kvm_get_reset_cptr_el2(struct kvm_vcpu *vcpu)
+/* Resets the value of cptr_el2 when returning to the host. */
+static __always_inline void __kvm_reset_cptr_el2(struct kvm *kvm)
{
u64 val;
if (has_vhe()) {
- val = (CPACR_ELx_FPEN | CPACR_EL1_ZEN_EL1EN);
+ val = (CPACR_EL1_FPEN | CPACR_EL1_ZEN_EL1EN);
if (cpus_have_final_cap(ARM64_SME))
val |= CPACR_EL1_SMEN_EL1EN;
} else if (has_hvhe()) {
- val = CPACR_ELx_FPEN;
+ val = CPACR_EL1_FPEN;
- if (!vcpu_has_sve(vcpu) || !guest_owns_fp_regs())
- val |= CPACR_ELx_ZEN;
+ if (!kvm_has_sve(kvm) || !guest_owns_fp_regs())
+ val |= CPACR_EL1_ZEN;
if (cpus_have_final_cap(ARM64_SME))
- val |= CPACR_ELx_SMEN;
+ val |= CPACR_EL1_SMEN;
} else {
val = CPTR_NVHE_EL2_RES1;
- if (vcpu_has_sve(vcpu) && guest_owns_fp_regs())
+ if (kvm_has_sve(kvm) && guest_owns_fp_regs())
val |= CPTR_EL2_TZ;
- if (cpus_have_final_cap(ARM64_SME))
- val &= ~CPTR_EL2_TSM;
+ if (!cpus_have_final_cap(ARM64_SME))
+ val |= CPTR_EL2_TSM;
}
- return val;
-}
-
-static __always_inline void kvm_reset_cptr_el2(struct kvm_vcpu *vcpu)
-{
- u64 val = kvm_get_reset_cptr_el2(vcpu);
-
kvm_write_cptr_el2(val);
}
+#ifdef __KVM_NVHE_HYPERVISOR__
+#define kvm_reset_cptr_el2(v) __kvm_reset_cptr_el2(kern_hyp_va((v)->kvm))
+#else
+#define kvm_reset_cptr_el2(v) __kvm_reset_cptr_el2((v)->kvm)
+#endif
+
/*
* Returns a 'sanitised' view of CPTR_EL2, translating from nVHE to the VHE
* format if E2H isn't set.
@@ -685,7 +685,7 @@ static inline bool ____cptr_xen_trap_enabled(const struct kvm_vcpu *vcpu,
#define __guest_hyp_cptr_xen_trap_enabled(vcpu, xen) \
(!vcpu_has_nv(vcpu) ? false : \
____cptr_xen_trap_enabled(vcpu, \
- SYS_FIELD_GET(CPACR_ELx, xen, \
+ SYS_FIELD_GET(CPACR_EL1, xen, \
vcpu_sanitised_cptr_el2(vcpu))))
static inline bool guest_hyp_fpsimd_traps_enabled(const struct kvm_vcpu *vcpu)
@@ -697,9 +697,4 @@ static inline bool guest_hyp_sve_traps_enabled(const struct kvm_vcpu *vcpu)
{
return __guest_hyp_cptr_xen_trap_enabled(vcpu, ZEN);
}
-
-static inline void kvm_vcpu_enable_ptrauth(struct kvm_vcpu *vcpu)
-{
- vcpu_set_flag(vcpu, GUEST_HAS_PTRAUTH);
-}
#endif /* __ARM64_KVM_EMULATE_H__ */
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index f23f4ea9ec8b..04cd488f6787 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -332,6 +332,8 @@ struct kvm_arch {
#define KVM_ARCH_FLAG_ID_REGS_INITIALIZED 7
/* Fine-Grained UNDEF initialised */
#define KVM_ARCH_FLAG_FGU_INITIALIZED 8
+ /* SVE exposed to guest */
+#define KVM_ARCH_FLAG_GUEST_HAS_SVE 9
unsigned long flags;
/* VM-wide vCPU feature set */
@@ -491,7 +493,6 @@ enum vcpu_sysreg {
VBAR_EL2, /* Vector Base Address Register (EL2) */
RVBAR_EL2, /* Reset Vector Base Address Register */
CONTEXTIDR_EL2, /* Context ID Register (EL2) */
- CNTHCTL_EL2, /* Counter-timer Hypervisor Control register */
SP_EL2, /* EL2 Stack Pointer */
CNTHP_CTL_EL2,
CNTHP_CVAL_EL2,
@@ -502,6 +503,7 @@ enum vcpu_sysreg {
MARKER(__SANITISED_REG_START__),
TCR2_EL2, /* Extended Translation Control Register (EL2) */
MDCR_EL2, /* Monitor Debug Configuration Register (EL2) */
+ CNTHCTL_EL2, /* Counter-timer Hypervisor Control register */
/* Any VNCR-capable reg goes after this point */
MARKER(__VNCR_START__),
@@ -611,6 +613,12 @@ struct cpu_sve_state {
* field.
*/
struct kvm_host_data {
+#define KVM_HOST_DATA_FLAG_HAS_SPE 0
+#define KVM_HOST_DATA_FLAG_HAS_TRBE 1
+#define KVM_HOST_DATA_FLAG_HOST_SVE_ENABLED 2
+#define KVM_HOST_DATA_FLAG_HOST_SME_ENABLED 3
+ unsigned long flags;
+
struct kvm_cpu_context host_ctxt;
/*
@@ -643,7 +651,7 @@ struct kvm_host_data {
* host_debug_state contains the host registers which are
* saved and restored during world switches.
*/
- struct {
+ struct {
/* {Break,watch}point registers */
struct kvm_guest_debug_arch regs;
/* Statistical profiling extension */
@@ -653,6 +661,13 @@ struct kvm_host_data {
/* Values of trap registers for the host before guest entry. */
u64 mdcr_el2;
} host_debug_state;
+
+ /* Number of programmable event counters (PMCR_EL0.N) for this CPU */
+ unsigned int nr_event_counters;
+
+ /* Number of debug breakpoints/watchpoints for this CPU (minus 1) */
+ unsigned int debug_brps;
+ unsigned int debug_wrps;
};
struct kvm_host_psci_config {
@@ -709,7 +724,6 @@ struct kvm_vcpu_arch {
u64 hcr_el2;
u64 hcrx_el2;
u64 mdcr_el2;
- u64 cptr_el2;
/* Exception Information */
struct kvm_vcpu_fault_info fault;
@@ -740,31 +754,22 @@ struct kvm_vcpu_arch {
*
* external_debug_state contains the debug values we want to debug the
* guest. This is set via the KVM_SET_GUEST_DEBUG ioctl.
- *
- * debug_ptr points to the set of debug registers that should be loaded
- * onto the hardware when running the guest.
*/
- struct kvm_guest_debug_arch *debug_ptr;
struct kvm_guest_debug_arch vcpu_debug_state;
struct kvm_guest_debug_arch external_debug_state;
+ u64 external_mdscr_el1;
+
+ enum {
+ VCPU_DEBUG_FREE,
+ VCPU_DEBUG_HOST_OWNED,
+ VCPU_DEBUG_GUEST_OWNED,
+ } debug_owner;
/* VGIC state */
struct vgic_cpu vgic_cpu;
struct arch_timer_cpu timer_cpu;
struct kvm_pmu pmu;
- /*
- * Guest registers we preserve during guest debugging.
- *
- * These shadow registers are updated by the kvm_handle_sys_reg
- * trap handler if the guest accesses or updates them while we
- * are using guest debug.
- */
- struct {
- u32 mdscr_el1;
- bool pstate_ss;
- } guest_debug_preserved;
-
/* vcpu power state */
struct kvm_mp_state mp_state;
spinlock_t mp_state_lock;
@@ -867,14 +872,10 @@ struct kvm_vcpu_arch {
#define vcpu_set_flag(v, ...) __vcpu_set_flag((v), __VA_ARGS__)
#define vcpu_clear_flag(v, ...) __vcpu_clear_flag((v), __VA_ARGS__)
-/* SVE exposed to guest */
-#define GUEST_HAS_SVE __vcpu_single_flag(cflags, BIT(0))
+/* KVM_ARM_VCPU_INIT completed */
+#define VCPU_INITIALIZED __vcpu_single_flag(cflags, BIT(0))
/* SVE config completed */
#define VCPU_SVE_FINALIZED __vcpu_single_flag(cflags, BIT(1))
-/* PTRAUTH exposed to guest */
-#define GUEST_HAS_PTRAUTH __vcpu_single_flag(cflags, BIT(2))
-/* KVM_ARM_VCPU_INIT completed */
-#define VCPU_INITIALIZED __vcpu_single_flag(cflags, BIT(3))
/* Exception pending */
#define PENDING_EXCEPTION __vcpu_single_flag(iflags, BIT(0))
@@ -910,29 +911,21 @@ struct kvm_vcpu_arch {
#define EXCEPT_AA64_EL2_IRQ __vcpu_except_flags(5)
#define EXCEPT_AA64_EL2_FIQ __vcpu_except_flags(6)
#define EXCEPT_AA64_EL2_SERR __vcpu_except_flags(7)
-/* Guest debug is live */
-#define DEBUG_DIRTY __vcpu_single_flag(iflags, BIT(4))
-/* Save SPE context if active */
-#define DEBUG_STATE_SAVE_SPE __vcpu_single_flag(iflags, BIT(5))
-/* Save TRBE context if active */
-#define DEBUG_STATE_SAVE_TRBE __vcpu_single_flag(iflags, BIT(6))
-
-/* SVE enabled for host EL0 */
-#define HOST_SVE_ENABLED __vcpu_single_flag(sflags, BIT(0))
-/* SME enabled for EL0 */
-#define HOST_SME_ENABLED __vcpu_single_flag(sflags, BIT(1))
+
/* Physical CPU not in supported_cpus */
-#define ON_UNSUPPORTED_CPU __vcpu_single_flag(sflags, BIT(2))
+#define ON_UNSUPPORTED_CPU __vcpu_single_flag(sflags, BIT(0))
/* WFIT instruction trapped */
-#define IN_WFIT __vcpu_single_flag(sflags, BIT(3))
+#define IN_WFIT __vcpu_single_flag(sflags, BIT(1))
/* vcpu system registers loaded on physical CPU */
-#define SYSREGS_ON_CPU __vcpu_single_flag(sflags, BIT(4))
-/* Software step state is Active-pending */
-#define DBG_SS_ACTIVE_PENDING __vcpu_single_flag(sflags, BIT(5))
+#define SYSREGS_ON_CPU __vcpu_single_flag(sflags, BIT(2))
+/* Software step state is Active-pending for external debug */
+#define HOST_SS_ACTIVE_PENDING __vcpu_single_flag(sflags, BIT(3))
+/* Software step state is Active pending for guest debug */
+#define GUEST_SS_ACTIVE_PENDING __vcpu_single_flag(sflags, BIT(4))
/* PMUSERENR for the guest EL0 is on physical CPU */
-#define PMUSERENR_ON_CPU __vcpu_single_flag(sflags, BIT(6))
+#define PMUSERENR_ON_CPU __vcpu_single_flag(sflags, BIT(5))
/* WFI instruction trapped */
-#define IN_WFI __vcpu_single_flag(sflags, BIT(7))
+#define IN_WFI __vcpu_single_flag(sflags, BIT(6))
/* Pointer to the vcpu's SVE FFR for sve_{save,load}_state() */
@@ -963,14 +956,21 @@ struct kvm_vcpu_arch {
KVM_GUESTDBG_USE_HW | \
KVM_GUESTDBG_SINGLESTEP)
-#define vcpu_has_sve(vcpu) (system_supports_sve() && \
- vcpu_get_flag(vcpu, GUEST_HAS_SVE))
+#define kvm_has_sve(kvm) (system_supports_sve() && \
+ test_bit(KVM_ARCH_FLAG_GUEST_HAS_SVE, &(kvm)->arch.flags))
+
+#ifdef __KVM_NVHE_HYPERVISOR__
+#define vcpu_has_sve(vcpu) kvm_has_sve(kern_hyp_va((vcpu)->kvm))
+#else
+#define vcpu_has_sve(vcpu) kvm_has_sve((vcpu)->kvm)
+#endif
#ifdef CONFIG_ARM64_PTR_AUTH
#define vcpu_has_ptrauth(vcpu) \
((cpus_have_final_cap(ARM64_HAS_ADDRESS_AUTH) || \
cpus_have_final_cap(ARM64_HAS_GENERIC_AUTH)) && \
- vcpu_get_flag(vcpu, GUEST_HAS_PTRAUTH))
+ (vcpu_has_feature(vcpu, KVM_ARM_VCPU_PTRAUTH_ADDRESS) || \
+ vcpu_has_feature(vcpu, KVM_ARM_VCPU_PTRAUTH_GENERIC)))
#else
#define vcpu_has_ptrauth(vcpu) false
#endif
@@ -1311,6 +1311,13 @@ DECLARE_KVM_HYP_PER_CPU(struct kvm_host_data, kvm_host_data);
&this_cpu_ptr_hyp_sym(kvm_host_data)->f)
#endif
+#define host_data_test_flag(flag) \
+ (test_bit(KVM_HOST_DATA_FLAG_##flag, host_data_ptr(flags)))
+#define host_data_set_flag(flag) \
+ set_bit(KVM_HOST_DATA_FLAG_##flag, host_data_ptr(flags))
+#define host_data_clear_flag(flag) \
+ clear_bit(KVM_HOST_DATA_FLAG_##flag, host_data_ptr(flags))
+
/* Check whether the FP regs are owned by the guest */
static inline bool guest_owns_fp_regs(void)
{
@@ -1336,15 +1343,22 @@ static inline bool kvm_system_needs_idmapped_vectors(void)
static inline void kvm_arch_sync_events(struct kvm *kvm) {}
-void kvm_arm_init_debug(void);
-void kvm_arm_vcpu_init_debug(struct kvm_vcpu *vcpu);
-void kvm_arm_setup_debug(struct kvm_vcpu *vcpu);
-void kvm_arm_clear_debug(struct kvm_vcpu *vcpu);
-void kvm_arm_reset_debug_ptr(struct kvm_vcpu *vcpu);
+void kvm_init_host_debug_data(void);
+void kvm_vcpu_load_debug(struct kvm_vcpu *vcpu);
+void kvm_vcpu_put_debug(struct kvm_vcpu *vcpu);
+void kvm_debug_set_guest_ownership(struct kvm_vcpu *vcpu);
+void kvm_debug_handle_oslar(struct kvm_vcpu *vcpu, u64 val);
#define kvm_vcpu_os_lock_enabled(vcpu) \
(!!(__vcpu_sys_reg(vcpu, OSLSR_EL1) & OSLSR_EL1_OSLK))
+#define kvm_debug_regs_in_use(vcpu) \
+ ((vcpu)->arch.debug_owner != VCPU_DEBUG_FREE)
+#define kvm_host_owns_debug_regs(vcpu) \
+ ((vcpu)->arch.debug_owner == VCPU_DEBUG_HOST_OWNED)
+#define kvm_guest_owns_debug_regs(vcpu) \
+ ((vcpu)->arch.debug_owner == VCPU_DEBUG_GUEST_OWNED)
+
int kvm_arm_vcpu_arch_set_attr(struct kvm_vcpu *vcpu,
struct kvm_device_attr *attr);
int kvm_arm_vcpu_arch_get_attr(struct kvm_vcpu *vcpu,
@@ -1371,10 +1385,6 @@ static inline bool kvm_pmu_counter_deferred(struct perf_event_attr *attr)
return (!has_vhe() && attr->exclude_host);
}
-/* Flags for host debug state */
-void kvm_arch_vcpu_load_debug_state_flags(struct kvm_vcpu *vcpu);
-void kvm_arch_vcpu_put_debug_state_flags(struct kvm_vcpu *vcpu);
-
#ifdef CONFIG_KVM
void kvm_set_pmu_events(u64 set, struct perf_event_attr *attr);
void kvm_clr_pmu_events(u64 clr);
@@ -1426,6 +1436,7 @@ static inline bool __vcpu_has_feature(const struct kvm_arch *ka, int feature)
return test_bit(feature, ka->vcpu_features);
}
+#define kvm_vcpu_has_feature(k, f) __vcpu_has_feature(&(k)->arch, (f))
#define vcpu_has_feature(v, f) __vcpu_has_feature(&(v)->kvm->arch, (f))
#define kvm_vcpu_initialized(v) vcpu_get_flag(vcpu, VCPU_INITIALIZED)
diff --git a/arch/arm64/include/asm/kvm_nested.h b/arch/arm64/include/asm/kvm_nested.h
index 233e65522716..bc0f2f4fa1ba 100644
--- a/arch/arm64/include/asm/kvm_nested.h
+++ b/arch/arm64/include/asm/kvm_nested.h
@@ -33,14 +33,14 @@ static inline u64 translate_tcr_el2_to_tcr_el1(u64 tcr)
static inline u64 translate_cptr_el2_to_cpacr_el1(u64 cptr_el2)
{
- u64 cpacr_el1 = CPACR_ELx_RES1;
+ u64 cpacr_el1 = CPACR_EL1_RES1;
if (cptr_el2 & CPTR_EL2_TTA)
- cpacr_el1 |= CPACR_ELx_TTA;
+ cpacr_el1 |= CPACR_EL1_TTA;
if (!(cptr_el2 & CPTR_EL2_TFP))
- cpacr_el1 |= CPACR_ELx_FPEN;
+ cpacr_el1 |= CPACR_EL1_FPEN;
if (!(cptr_el2 & CPTR_EL2_TZ))
- cpacr_el1 |= CPACR_ELx_ZEN;
+ cpacr_el1 |= CPACR_EL1_ZEN;
cpacr_el1 |= cptr_el2 & (CPTR_EL2_TCPAC | CPTR_EL2_TAM);
@@ -64,6 +64,7 @@ static inline u64 translate_ttbr0_el2_to_ttbr0_el1(u64 ttbr0)
}
extern bool forward_smc_trap(struct kvm_vcpu *vcpu);
+extern bool forward_debug_exception(struct kvm_vcpu *vcpu);
extern void kvm_init_nested(struct kvm *kvm);
extern int kvm_vcpu_init_nested(struct kvm_vcpu *vcpu);
extern void kvm_init_nested_s2_mmu(struct kvm_s2_mmu *mmu);
diff --git a/arch/arm64/include/asm/kvm_pkvm.h b/arch/arm64/include/asm/kvm_pkvm.h
index 65f988b6fe0d..eb65f12e81d9 100644
--- a/arch/arm64/include/asm/kvm_pkvm.h
+++ b/arch/arm64/include/asm/kvm_pkvm.h
@@ -20,6 +20,31 @@ int pkvm_init_host_vm(struct kvm *kvm);
int pkvm_create_hyp_vm(struct kvm *kvm);
void pkvm_destroy_hyp_vm(struct kvm *kvm);
+/*
+ * This functions as an allow-list of protected VM capabilities.
+ * Features not explicitly allowed by this function are denied.
+ */
+static inline bool kvm_pvm_ext_allowed(long ext)
+{
+ switch (ext) {
+ case KVM_CAP_IRQCHIP:
+ case KVM_CAP_ARM_PSCI:
+ case KVM_CAP_ARM_PSCI_0_2:
+ case KVM_CAP_NR_VCPUS:
+ case KVM_CAP_MAX_VCPUS:
+ case KVM_CAP_MAX_VCPU_ID:
+ case KVM_CAP_MSI_DEVID:
+ case KVM_CAP_ARM_VM_IPA_SIZE:
+ case KVM_CAP_ARM_PMU_V3:
+ case KVM_CAP_ARM_SVE:
+ case KVM_CAP_ARM_PTRAUTH_ADDRESS:
+ case KVM_CAP_ARM_PTRAUTH_GENERIC:
+ return true;
+ default:
+ return false;
+ }
+}
+
extern struct memblock_region kvm_nvhe_sym(hyp_memory)[];
extern unsigned int kvm_nvhe_sym(hyp_memblock_nr);
diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
index b8303a83c0bf..2ed33737c7a9 100644
--- a/arch/arm64/include/asm/sysreg.h
+++ b/arch/arm64/include/asm/sysreg.h
@@ -477,6 +477,7 @@
#define SYS_CNTFRQ_EL0 sys_reg(3, 3, 14, 0, 0)
#define SYS_CNTPCT_EL0 sys_reg(3, 3, 14, 0, 1)
+#define SYS_CNTVCT_EL0 sys_reg(3, 3, 14, 0, 2)
#define SYS_CNTPCTSS_EL0 sys_reg(3, 3, 14, 0, 5)
#define SYS_CNTVCTSS_EL0 sys_reg(3, 3, 14, 0, 6)
@@ -484,14 +485,17 @@
#define SYS_CNTP_CTL_EL0 sys_reg(3, 3, 14, 2, 1)
#define SYS_CNTP_CVAL_EL0 sys_reg(3, 3, 14, 2, 2)
+#define SYS_CNTV_TVAL_EL0 sys_reg(3, 3, 14, 3, 0)
#define SYS_CNTV_CTL_EL0 sys_reg(3, 3, 14, 3, 1)
#define SYS_CNTV_CVAL_EL0 sys_reg(3, 3, 14, 3, 2)
#define SYS_AARCH32_CNTP_TVAL sys_reg(0, 0, 14, 2, 0)
#define SYS_AARCH32_CNTP_CTL sys_reg(0, 0, 14, 2, 1)
#define SYS_AARCH32_CNTPCT sys_reg(0, 0, 0, 14, 0)
+#define SYS_AARCH32_CNTVCT sys_reg(0, 1, 0, 14, 0)
#define SYS_AARCH32_CNTP_CVAL sys_reg(0, 2, 0, 14, 0)
#define SYS_AARCH32_CNTPCTSS sys_reg(0, 8, 0, 14, 0)
+#define SYS_AARCH32_CNTVCTSS sys_reg(0, 9, 0, 14, 0)
#define __PMEV_op2(n) ((n) & 0x7)
#define __CNTR_CRm(n) (0x8 | (((n) >> 3) & 0x3))
diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c
index a78f247029ae..7ce555862895 100644
--- a/arch/arm64/kernel/cpu_errata.c
+++ b/arch/arm64/kernel/cpu_errata.c
@@ -787,5 +787,13 @@ const struct arm64_cpu_capabilities arm64_errata[] = {
},
#endif
{
+ .desc = "Broken CNTVOFF_EL2",
+ .capability = ARM64_WORKAROUND_QCOM_ORYON_CNTVOFF,
+ ERRATA_MIDR_RANGE_LIST(((const struct midr_range[]) {
+ MIDR_ALL_VERSIONS(MIDR_QCOM_ORYON_X1),
+ {}
+ })),
+ },
+ {
}
};
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index 6ce71f444ed8..13de0c7af053 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -1004,17 +1004,16 @@ static void init_cpu_ftr_reg(u32 sys_reg, u64 new)
/* Override was valid */
ftr_new = tmp;
str = "forced";
- } else if (ftr_ovr == tmp) {
+ } else {
/* Override was the safe value */
str = "already set";
}
- if (str)
- pr_warn("%s[%d:%d]: %s to %llx\n",
- reg->name,
- ftrp->shift + ftrp->width - 1,
- ftrp->shift, str,
- tmp & (BIT(ftrp->width) - 1));
+ pr_warn("%s[%d:%d]: %s to %llx\n",
+ reg->name,
+ ftrp->shift + ftrp->width - 1,
+ ftrp->shift, str,
+ tmp & (BIT(ftrp->width) - 1));
} else if ((ftr_mask & reg->override->val) == ftr_mask) {
reg->override->val &= ~ftr_mask;
pr_warn("%s[%d:%d]: impossible override, ignored\n",
@@ -2376,8 +2375,8 @@ static void cpu_enable_mops(const struct arm64_cpu_capabilities *__unused)
#ifdef CONFIG_ARM64_POE
static void cpu_enable_poe(const struct arm64_cpu_capabilities *__unused)
{
- sysreg_clear_set(REG_TCR2_EL1, 0, TCR2_EL1x_E0POE);
- sysreg_clear_set(CPACR_EL1, 0, CPACR_ELx_E0POE);
+ sysreg_clear_set(REG_TCR2_EL1, 0, TCR2_EL1_E0POE);
+ sysreg_clear_set(CPACR_EL1, 0, CPACR_EL1_E0POE);
}
#endif
diff --git a/arch/arm64/kernel/image-vars.h b/arch/arm64/kernel/image-vars.h
index 8f5422ed1b75..ef3a69cc398e 100644
--- a/arch/arm64/kernel/image-vars.h
+++ b/arch/arm64/kernel/image-vars.h
@@ -105,6 +105,9 @@ KVM_NVHE_ALIAS(__hyp_stub_vectors);
KVM_NVHE_ALIAS(vgic_v2_cpuif_trap);
KVM_NVHE_ALIAS(vgic_v3_cpuif_trap);
+/* Static key which is set if CNTVOFF_EL2 is unusable */
+KVM_NVHE_ALIAS(broken_cntvoff_key);
+
/* EL2 exception handling */
KVM_NVHE_ALIAS(__start___kvm_ex_table);
KVM_NVHE_ALIAS(__stop___kvm_ex_table);
diff --git a/arch/arm64/kvm/arch_timer.c b/arch/arm64/kvm/arch_timer.c
index 1215df590418..d3d243366536 100644
--- a/arch/arm64/kvm/arch_timer.c
+++ b/arch/arm64/kvm/arch_timer.c
@@ -30,6 +30,7 @@ static u32 host_vtimer_irq_flags;
static u32 host_ptimer_irq_flags;
static DEFINE_STATIC_KEY_FALSE(has_gic_active_state);
+DEFINE_STATIC_KEY_FALSE(broken_cntvoff_key);
static const u8 default_ppi[] = {
[TIMER_PTIMER] = 30,
@@ -101,21 +102,6 @@ u64 timer_get_cval(struct arch_timer_context *ctxt)
}
}
-static u64 timer_get_offset(struct arch_timer_context *ctxt)
-{
- u64 offset = 0;
-
- if (!ctxt)
- return 0;
-
- if (ctxt->offset.vm_offset)
- offset += *ctxt->offset.vm_offset;
- if (ctxt->offset.vcpu_offset)
- offset += *ctxt->offset.vcpu_offset;
-
- return offset;
-}
-
static void timer_set_ctl(struct arch_timer_context *ctxt, u32 ctl)
{
struct kvm_vcpu *vcpu = ctxt->vcpu;
@@ -441,11 +427,30 @@ void kvm_timer_update_run(struct kvm_vcpu *vcpu)
regs->device_irq_level |= KVM_ARM_DEV_EL1_PTIMER;
}
+static void kvm_timer_update_status(struct arch_timer_context *ctx, bool level)
+{
+ /*
+ * Paper over NV2 brokenness by publishing the interrupt status
+ * bit. This still results in a poor quality of emulation (guest
+ * writes will have no effect until the next exit).
+ *
+ * But hey, it's fast, right?
+ */
+ if (is_hyp_ctxt(ctx->vcpu) &&
+ (ctx == vcpu_vtimer(ctx->vcpu) || ctx == vcpu_ptimer(ctx->vcpu))) {
+ unsigned long val = timer_get_ctl(ctx);
+ __assign_bit(__ffs(ARCH_TIMER_CTRL_IT_STAT), &val, level);
+ timer_set_ctl(ctx, val);
+ }
+}
+
static void kvm_timer_update_irq(struct kvm_vcpu *vcpu, bool new_level,
struct arch_timer_context *timer_ctx)
{
int ret;
+ kvm_timer_update_status(timer_ctx, new_level);
+
timer_ctx->irq.level = new_level;
trace_kvm_timer_update_irq(vcpu->vcpu_id, timer_irq(timer_ctx),
timer_ctx->irq.level);
@@ -471,6 +476,8 @@ static void timer_emulate(struct arch_timer_context *ctx)
return;
}
+ kvm_timer_update_status(ctx, should_fire);
+
/*
* If the timer can fire now, we don't need to have a soft timer
* scheduled for the future. If the timer cannot fire at all,
@@ -513,7 +520,12 @@ static void timer_save_state(struct arch_timer_context *ctx)
case TIMER_VTIMER:
case TIMER_HVTIMER:
timer_set_ctl(ctx, read_sysreg_el0(SYS_CNTV_CTL));
- timer_set_cval(ctx, read_sysreg_el0(SYS_CNTV_CVAL));
+ cval = read_sysreg_el0(SYS_CNTV_CVAL);
+
+ if (has_broken_cntvoff())
+ cval -= timer_get_offset(ctx);
+
+ timer_set_cval(ctx, cval);
/* Disable the timer */
write_sysreg_el0(0, SYS_CNTV_CTL);
@@ -618,8 +630,15 @@ static void timer_restore_state(struct arch_timer_context *ctx)
case TIMER_VTIMER:
case TIMER_HVTIMER:
- set_cntvoff(timer_get_offset(ctx));
- write_sysreg_el0(timer_get_cval(ctx), SYS_CNTV_CVAL);
+ cval = timer_get_cval(ctx);
+ offset = timer_get_offset(ctx);
+ if (has_broken_cntvoff()) {
+ set_cntvoff(0);
+ cval += offset;
+ } else {
+ set_cntvoff(offset);
+ }
+ write_sysreg_el0(cval, SYS_CNTV_CVAL);
isb();
write_sysreg_el0(timer_get_ctl(ctx), SYS_CNTV_CTL);
break;
@@ -762,7 +781,7 @@ static void kvm_timer_vcpu_load_nested_switch(struct kvm_vcpu *vcpu,
static void timer_set_traps(struct kvm_vcpu *vcpu, struct timer_map *map)
{
- bool tpt, tpc;
+ bool tvt, tpt, tvc, tpc, tvt02, tpt02;
u64 clr, set;
/*
@@ -777,7 +796,29 @@ static void timer_set_traps(struct kvm_vcpu *vcpu, struct timer_map *map)
* within this function, reality kicks in and we start adding
* traps based on emulation requirements.
*/
- tpt = tpc = false;
+ tvt = tpt = tvc = tpc = false;
+ tvt02 = tpt02 = false;
+
+ /*
+ * NV2 badly breaks the timer semantics by redirecting accesses to
+ * the EL1 timer state to memory, so let's call ECV to the rescue if
+ * available: we trap all CNT{P,V}_{CTL,CVAL,TVAL}_EL0 accesses.
+ *
+ * The treatment slightly varies depending whether we run a nVHE or
+ * VHE guest: nVHE will use the _EL0 registers directly, while VHE
+ * will use the _EL02 accessors. This translates in different trap
+ * bits.
+ *
+ * None of the trapping is required when running in non-HYP context,
+ * unless required by the L1 hypervisor settings once we advertise
+ * ECV+NV in the guest, or that we need trapping for other reasons.
+ */
+ if (cpus_have_final_cap(ARM64_HAS_ECV) && is_hyp_ctxt(vcpu)) {
+ if (vcpu_el2_e2h_is_set(vcpu))
+ tvt02 = tpt02 = true;
+ else
+ tvt = tpt = true;
+ }
/*
* We have two possibility to deal with a physical offset:
@@ -793,9 +834,20 @@ static void timer_set_traps(struct kvm_vcpu *vcpu, struct timer_map *map)
tpt = tpc = true;
/*
+ * For the poor sods that could not correctly substract one value
+ * from another, trap the full virtual timer and counter.
+ */
+ if (has_broken_cntvoff() && timer_get_offset(map->direct_vtimer))
+ tvt = tvc = true;
+
+ /*
* Apply the enable bits that the guest hypervisor has requested for
* its own guest. We can only add traps that wouldn't have been set
* above.
+ * Implementation choices: we do not support NV when E2H=0 in the
+ * guest, and we don't support configuration where E2H is writable
+ * by the guest (either FEAT_VHE or FEAT_E2H0 is implemented, but
+ * not both). This simplifies the handling of the EL1NV* bits.
*/
if (vcpu_has_nv(vcpu) && !is_hyp_ctxt(vcpu)) {
u64 val = __vcpu_sys_reg(vcpu, CNTHCTL_EL2);
@@ -806,6 +858,9 @@ static void timer_set_traps(struct kvm_vcpu *vcpu, struct timer_map *map)
tpt |= !(val & (CNTHCTL_EL1PCEN << 10));
tpc |= !(val & (CNTHCTL_EL1PCTEN << 10));
+
+ tpt02 |= (val & CNTHCTL_EL1NVPCT);
+ tvt02 |= (val & CNTHCTL_EL1NVVCT);
}
/*
@@ -817,6 +872,10 @@ static void timer_set_traps(struct kvm_vcpu *vcpu, struct timer_map *map)
assign_clear_set_bit(tpt, CNTHCTL_EL1PCEN << 10, set, clr);
assign_clear_set_bit(tpc, CNTHCTL_EL1PCTEN << 10, set, clr);
+ assign_clear_set_bit(tvt, CNTHCTL_EL1TVT, clr, set);
+ assign_clear_set_bit(tvc, CNTHCTL_EL1TVCT, clr, set);
+ assign_clear_set_bit(tvt02, CNTHCTL_EL1NVVCT, clr, set);
+ assign_clear_set_bit(tpt02, CNTHCTL_EL1NVPCT, clr, set);
/* This only happens on VHE, so use the CNTHCTL_EL2 accessor. */
sysreg_clear_set(cnthctl_el2, clr, set);
@@ -905,6 +964,54 @@ void kvm_timer_vcpu_put(struct kvm_vcpu *vcpu)
kvm_timer_blocking(vcpu);
}
+void kvm_timer_sync_nested(struct kvm_vcpu *vcpu)
+{
+ /*
+ * When NV2 is on, guest hypervisors have their EL1 timer register
+ * accesses redirected to the VNCR page. Any guest action taken on
+ * the timer is postponed until the next exit, leading to a very
+ * poor quality of emulation.
+ *
+ * This is an unmitigated disaster, only papered over by FEAT_ECV,
+ * which allows trapping of the timer registers even with NV2.
+ * Still, this is still worse than FEAT_NV on its own. Meh.
+ */
+ if (!vcpu_el2_e2h_is_set(vcpu)) {
+ if (cpus_have_final_cap(ARM64_HAS_ECV))
+ return;
+
+ /*
+ * A non-VHE guest hypervisor doesn't have any direct access
+ * to its timers: the EL2 registers trap (and the HW is
+ * fully emulated), while the EL0 registers access memory
+ * despite the access being notionally direct. Boo.
+ *
+ * We update the hardware timer registers with the
+ * latest value written by the guest to the VNCR page
+ * and let the hardware take care of the rest.
+ */
+ write_sysreg_el0(__vcpu_sys_reg(vcpu, CNTV_CTL_EL0), SYS_CNTV_CTL);
+ write_sysreg_el0(__vcpu_sys_reg(vcpu, CNTV_CVAL_EL0), SYS_CNTV_CVAL);
+ write_sysreg_el0(__vcpu_sys_reg(vcpu, CNTP_CTL_EL0), SYS_CNTP_CTL);
+ write_sysreg_el0(__vcpu_sys_reg(vcpu, CNTP_CVAL_EL0), SYS_CNTP_CVAL);
+ } else {
+ /*
+ * For a VHE guest hypervisor, the EL2 state is directly
+ * stored in the host EL1 timers, while the emulated EL0
+ * state is stored in the VNCR page. The latter could have
+ * been updated behind our back, and we must reset the
+ * emulation of the timers.
+ */
+ struct timer_map map;
+ get_timer_map(vcpu, &map);
+
+ soft_timer_cancel(&map.emul_vtimer->hrtimer);
+ soft_timer_cancel(&map.emul_ptimer->hrtimer);
+ timer_emulate(map.emul_vtimer);
+ timer_emulate(map.emul_ptimer);
+ }
+}
+
/*
* With a userspace irqchip we have to check if the guest de-asserted the
* timer and if so, unmask the timer irq signal on the host interrupt
@@ -1363,6 +1470,37 @@ static int kvm_irq_init(struct arch_timer_kvm_info *info)
return 0;
}
+static void kvm_timer_handle_errata(void)
+{
+ u64 mmfr0, mmfr1, mmfr4;
+
+ /*
+ * CNTVOFF_EL2 is broken on some implementations. For those, we trap
+ * all virtual timer/counter accesses, requiring FEAT_ECV.
+ *
+ * However, a hypervisor supporting nesting is likely to mitigate the
+ * erratum at L0, and not require other levels to mitigate it (which
+ * would otherwise be a terrible performance sink due to trap
+ * amplification).
+ *
+ * Given that the affected HW implements both FEAT_VHE and FEAT_E2H0,
+ * and that NV is likely not to (because of limitations of the
+ * architecture), only enable the workaround when FEAT_VHE and
+ * FEAT_E2H0 are both detected. Time will tell if this actually holds.
+ */
+ mmfr0 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1);
+ mmfr1 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR1_EL1);
+ mmfr4 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR4_EL1);
+ if (SYS_FIELD_GET(ID_AA64MMFR1_EL1, VH, mmfr1) &&
+ !SYS_FIELD_GET(ID_AA64MMFR4_EL1, E2H0, mmfr4) &&
+ SYS_FIELD_GET(ID_AA64MMFR0_EL1, ECV, mmfr0) &&
+ (has_vhe() || has_hvhe()) &&
+ cpus_have_final_cap(ARM64_WORKAROUND_QCOM_ORYON_CNTVOFF)) {
+ static_branch_enable(&broken_cntvoff_key);
+ kvm_info("Broken CNTVOFF_EL2, trapping virtual timer\n");
+ }
+}
+
int __init kvm_timer_hyp_init(bool has_gic)
{
struct arch_timer_kvm_info *info;
@@ -1431,6 +1569,7 @@ int __init kvm_timer_hyp_init(bool has_gic)
goto out_free_vtimer_irq;
}
+ kvm_timer_handle_errata();
return 0;
out_free_ptimer_irq:
diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
index 9bcbc7b8ed38..0772670a3334 100644
--- a/arch/arm64/kvm/arm.c
+++ b/arch/arm64/kvm/arm.c
@@ -80,31 +80,6 @@ int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
return kvm_vcpu_exiting_guest_mode(vcpu) == IN_GUEST_MODE;
}
-/*
- * This functions as an allow-list of protected VM capabilities.
- * Features not explicitly allowed by this function are denied.
- */
-static bool pkvm_ext_allowed(struct kvm *kvm, long ext)
-{
- switch (ext) {
- case KVM_CAP_IRQCHIP:
- case KVM_CAP_ARM_PSCI:
- case KVM_CAP_ARM_PSCI_0_2:
- case KVM_CAP_NR_VCPUS:
- case KVM_CAP_MAX_VCPUS:
- case KVM_CAP_MAX_VCPU_ID:
- case KVM_CAP_MSI_DEVID:
- case KVM_CAP_ARM_VM_IPA_SIZE:
- case KVM_CAP_ARM_PMU_V3:
- case KVM_CAP_ARM_SVE:
- case KVM_CAP_ARM_PTRAUTH_ADDRESS:
- case KVM_CAP_ARM_PTRAUTH_GENERIC:
- return true;
- default:
- return false;
- }
-}
-
int kvm_vm_ioctl_enable_cap(struct kvm *kvm,
struct kvm_enable_cap *cap)
{
@@ -113,7 +88,7 @@ int kvm_vm_ioctl_enable_cap(struct kvm *kvm,
if (cap->flags)
return -EINVAL;
- if (kvm_vm_is_protected(kvm) && !pkvm_ext_allowed(kvm, cap->cap))
+ if (kvm_vm_is_protected(kvm) && !kvm_pvm_ext_allowed(cap->cap))
return -EINVAL;
switch (cap->cap) {
@@ -311,7 +286,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
{
int r;
- if (kvm && kvm_vm_is_protected(kvm) && !pkvm_ext_allowed(kvm, ext))
+ if (kvm && kvm_vm_is_protected(kvm) && !kvm_pvm_ext_allowed(ext))
return 0;
switch (ext) {
@@ -476,8 +451,6 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
kvm_pmu_vcpu_init(vcpu);
- kvm_arm_reset_debug_ptr(vcpu);
-
kvm_arm_pvtime_vcpu_init(&vcpu->arch);
vcpu->arch.hw_mmu = &vcpu->kvm->arch.mmu;
@@ -605,6 +578,7 @@ nommu:
kvm_vgic_load(vcpu);
kvm_timer_vcpu_load(vcpu);
+ kvm_vcpu_load_debug(vcpu);
if (has_vhe())
kvm_vcpu_load_vhe(vcpu);
kvm_arch_vcpu_load_fp(vcpu);
@@ -624,8 +598,6 @@ nommu:
vcpu_set_pauth_traps(vcpu);
- kvm_arch_vcpu_load_debug_state_flags(vcpu);
-
if (is_protected_kvm_enabled()) {
kvm_call_hyp_nvhe(__pkvm_vcpu_load,
vcpu->kvm->arch.pkvm.handle,
@@ -646,7 +618,7 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
kvm_call_hyp_nvhe(__pkvm_vcpu_put);
}
- kvm_arch_vcpu_put_debug_state_flags(vcpu);
+ kvm_vcpu_put_debug(vcpu);
kvm_arch_vcpu_put_fp(vcpu);
if (has_vhe())
kvm_vcpu_put_vhe(vcpu);
@@ -829,8 +801,6 @@ int kvm_arch_vcpu_run_pid_change(struct kvm_vcpu *vcpu)
kvm_init_mpidr_data(kvm);
- kvm_arm_vcpu_init_debug(vcpu);
-
if (likely(irqchip_in_kernel(kvm))) {
/*
* Map the VGIC hardware resources before running a vcpu the
@@ -1208,7 +1178,6 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
continue;
}
- kvm_arm_setup_debug(vcpu);
kvm_arch_vcpu_ctxflush_fp(vcpu);
/**************************************************************
@@ -1225,8 +1194,6 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
* Back from guest
*************************************************************/
- kvm_arm_clear_debug(vcpu);
-
/*
* We must sync the PMU state before the vgic state so
* that the vgic can properly sample the updated state of the
@@ -1249,6 +1216,9 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
if (unlikely(!irqchip_in_kernel(vcpu->kvm)))
kvm_timer_sync_user(vcpu);
+ if (is_hyp_ctxt(vcpu))
+ kvm_timer_sync_nested(vcpu);
+
kvm_arch_vcpu_ctxsync_fp(vcpu);
/*
@@ -1592,7 +1562,6 @@ static int kvm_arch_vcpu_ioctl_vcpu_init(struct kvm_vcpu *vcpu,
}
vcpu_reset_hcr(vcpu);
- vcpu->arch.cptr_el2 = kvm_get_reset_cptr_el2(vcpu);
/*
* Handle the "start in power-off" case.
@@ -2130,6 +2099,7 @@ static void cpu_set_hyp_vector(void)
static void cpu_hyp_init_context(void)
{
kvm_init_host_cpu_context(host_data_ptr(host_ctxt));
+ kvm_init_host_debug_data();
if (!is_kernel_in_hyp_mode())
cpu_init_hyp_mode();
@@ -2138,7 +2108,6 @@ static void cpu_hyp_init_context(void)
static void cpu_hyp_init_features(void)
{
cpu_set_hyp_vector();
- kvm_arm_init_debug();
if (is_kernel_in_hyp_mode())
kvm_timer_init_vhe();
diff --git a/arch/arm64/kvm/at.c b/arch/arm64/kvm/at.c
index 3d7eb395e33d..3a96c96816e9 100644
--- a/arch/arm64/kvm/at.c
+++ b/arch/arm64/kvm/at.c
@@ -111,7 +111,7 @@ static bool s1pie_enabled(struct kvm_vcpu *vcpu, enum trans_regime regime)
return vcpu_read_sys_reg(vcpu, TCR2_EL2) & TCR2_EL2_PIE;
case TR_EL10:
return (__vcpu_sys_reg(vcpu, HCRX_EL2) & HCRX_EL2_TCR2En) &&
- (__vcpu_sys_reg(vcpu, TCR2_EL1) & TCR2_EL1x_PIE);
+ (__vcpu_sys_reg(vcpu, TCR2_EL1) & TCR2_EL1_PIE);
default:
BUG();
}
@@ -140,8 +140,8 @@ static void compute_s1poe(struct kvm_vcpu *vcpu, struct s1_walk_info *wi)
}
val = __vcpu_sys_reg(vcpu, TCR2_EL1);
- wi->poe = val & TCR2_EL1x_POE;
- wi->e0poe = val & TCR2_EL1x_E0POE;
+ wi->poe = val & TCR2_EL1_POE;
+ wi->e0poe = val & TCR2_EL1_E0POE;
}
}
diff --git a/arch/arm64/kvm/debug.c b/arch/arm64/kvm/debug.c
index ce8886122ed3..d921e7f7bd59 100644
--- a/arch/arm64/kvm/debug.c
+++ b/arch/arm64/kvm/debug.c
@@ -3,7 +3,8 @@
* Debug and Guest Debug support
*
* Copyright (C) 2015 - Linaro Ltd
- * Author: Alex Bennée <alex.bennee@linaro.org>
+ * Authors: Alex Bennée <alex.bennee@linaro.org>
+ * Oliver Upton <oliver.upton@linux.dev>
*/
#include <linux/kvm_host.h>
@@ -14,72 +15,6 @@
#include <asm/kvm_arm.h>
#include <asm/kvm_emulate.h>
-#include "trace.h"
-
-/* These are the bits of MDSCR_EL1 we may manipulate */
-#define MDSCR_EL1_DEBUG_MASK (DBG_MDSCR_SS | \
- DBG_MDSCR_KDE | \
- DBG_MDSCR_MDE)
-
-static DEFINE_PER_CPU(u64, mdcr_el2);
-
-/*
- * save/restore_guest_debug_regs
- *
- * For some debug operations we need to tweak some guest registers. As
- * a result we need to save the state of those registers before we
- * make those modifications.
- *
- * Guest access to MDSCR_EL1 is trapped by the hypervisor and handled
- * after we have restored the preserved value to the main context.
- *
- * When single-step is enabled by userspace, we tweak PSTATE.SS on every
- * guest entry. Preserve PSTATE.SS so we can restore the original value
- * for the vcpu after the single-step is disabled.
- */
-static void save_guest_debug_regs(struct kvm_vcpu *vcpu)
-{
- u64 val = vcpu_read_sys_reg(vcpu, MDSCR_EL1);
-
- vcpu->arch.guest_debug_preserved.mdscr_el1 = val;
-
- trace_kvm_arm_set_dreg32("Saved MDSCR_EL1",
- vcpu->arch.guest_debug_preserved.mdscr_el1);
-
- vcpu->arch.guest_debug_preserved.pstate_ss =
- (*vcpu_cpsr(vcpu) & DBG_SPSR_SS);
-}
-
-static void restore_guest_debug_regs(struct kvm_vcpu *vcpu)
-{
- u64 val = vcpu->arch.guest_debug_preserved.mdscr_el1;
-
- vcpu_write_sys_reg(vcpu, val, MDSCR_EL1);
-
- trace_kvm_arm_set_dreg32("Restored MDSCR_EL1",
- vcpu_read_sys_reg(vcpu, MDSCR_EL1));
-
- if (vcpu->arch.guest_debug_preserved.pstate_ss)
- *vcpu_cpsr(vcpu) |= DBG_SPSR_SS;
- else
- *vcpu_cpsr(vcpu) &= ~DBG_SPSR_SS;
-}
-
-/**
- * kvm_arm_init_debug - grab what we need for debug
- *
- * Currently the sole task of this function is to retrieve the initial
- * value of mdcr_el2 so we can preserve MDCR_EL2.HPMN which has
- * presumably been set-up by some knowledgeable bootcode.
- *
- * It is called once per-cpu during CPU hyp initialisation.
- */
-
-void kvm_arm_init_debug(void)
-{
- __this_cpu_write(mdcr_el2, kvm_call_hyp_ret(__kvm_get_mdcr_el2));
-}
-
/**
* kvm_arm_setup_mdcr_el2 - configure vcpu mdcr_el2 value
*
@@ -95,11 +30,14 @@ void kvm_arm_init_debug(void)
*/
static void kvm_arm_setup_mdcr_el2(struct kvm_vcpu *vcpu)
{
+ preempt_disable();
+
/*
* This also clears MDCR_EL2_E2PB_MASK and MDCR_EL2_E2TB_MASK
* to disable guest access to the profiling and trace buffers
*/
- vcpu->arch.mdcr_el2 = __this_cpu_read(mdcr_el2) & MDCR_EL2_HPMN_MASK;
+ vcpu->arch.mdcr_el2 = FIELD_PREP(MDCR_EL2_HPMN,
+ *host_data_ptr(nr_event_counters));
vcpu->arch.mdcr_el2 |= (MDCR_EL2_TPM |
MDCR_EL2_TPMS |
MDCR_EL2_TTRF |
@@ -113,233 +51,171 @@ static void kvm_arm_setup_mdcr_el2(struct kvm_vcpu *vcpu)
vcpu->arch.mdcr_el2 |= MDCR_EL2_TDE;
/*
- * Trap debug register access when one of the following is true:
- * - Userspace is using the hardware to debug the guest
- * (KVM_GUESTDBG_USE_HW is set).
- * - The guest is not using debug (DEBUG_DIRTY clear).
- * - The guest has enabled the OS Lock (debug exceptions are blocked).
+ * Trap debug registers if the guest doesn't have ownership of them.
*/
- if ((vcpu->guest_debug & KVM_GUESTDBG_USE_HW) ||
- !vcpu_get_flag(vcpu, DEBUG_DIRTY) ||
- kvm_vcpu_os_lock_enabled(vcpu))
+ if (!kvm_guest_owns_debug_regs(vcpu))
vcpu->arch.mdcr_el2 |= MDCR_EL2_TDA;
- trace_kvm_arm_set_dreg32("MDCR_EL2", vcpu->arch.mdcr_el2);
-}
+ /* Write MDCR_EL2 directly if we're already at EL2 */
+ if (has_vhe())
+ write_sysreg(vcpu->arch.mdcr_el2, mdcr_el2);
-/**
- * kvm_arm_vcpu_init_debug - setup vcpu debug traps
- *
- * @vcpu: the vcpu pointer
- *
- * Set vcpu initial mdcr_el2 value.
- */
-void kvm_arm_vcpu_init_debug(struct kvm_vcpu *vcpu)
-{
- preempt_disable();
- kvm_arm_setup_mdcr_el2(vcpu);
preempt_enable();
}
-/**
- * kvm_arm_reset_debug_ptr - reset the debug ptr to point to the vcpu state
- * @vcpu: the vcpu pointer
- */
-
-void kvm_arm_reset_debug_ptr(struct kvm_vcpu *vcpu)
+void kvm_init_host_debug_data(void)
{
- vcpu->arch.debug_ptr = &vcpu->arch.vcpu_debug_state;
+ u64 dfr0 = read_sysreg(id_aa64dfr0_el1);
+
+ if (cpuid_feature_extract_signed_field(dfr0, ID_AA64DFR0_EL1_PMUVer_SHIFT) > 0)
+ *host_data_ptr(nr_event_counters) = FIELD_GET(ARMV8_PMU_PMCR_N,
+ read_sysreg(pmcr_el0));
+
+ *host_data_ptr(debug_brps) = SYS_FIELD_GET(ID_AA64DFR0_EL1, BRPs, dfr0);
+ *host_data_ptr(debug_wrps) = SYS_FIELD_GET(ID_AA64DFR0_EL1, WRPs, dfr0);
+
+ if (has_vhe())
+ return;
+
+ if (cpuid_feature_extract_unsigned_field(dfr0, ID_AA64DFR0_EL1_PMSVer_SHIFT) &&
+ !(read_sysreg_s(SYS_PMBIDR_EL1) & PMBIDR_EL1_P))
+ host_data_set_flag(HAS_SPE);
+
+ if (cpuid_feature_extract_unsigned_field(dfr0, ID_AA64DFR0_EL1_TraceBuffer_SHIFT) &&
+ !(read_sysreg_s(SYS_TRBIDR_EL1) & TRBIDR_EL1_P))
+ host_data_set_flag(HAS_TRBE);
}
-/**
- * kvm_arm_setup_debug - set up debug related stuff
+/*
+ * Configures the 'external' MDSCR_EL1 value for the guest, i.e. when the host
+ * has taken over MDSCR_EL1.
*
- * @vcpu: the vcpu pointer
+ * - Userspace is single-stepping the guest, and MDSCR_EL1.SS is forced to 1.
*
- * This is called before each entry into the hypervisor to setup any
- * debug related registers.
+ * - Userspace is using the breakpoint/watchpoint registers to debug the
+ * guest, and MDSCR_EL1.MDE is forced to 1.
*
- * Additionally, KVM only traps guest accesses to the debug registers if
- * the guest is not actively using them (see the DEBUG_DIRTY
- * flag on vcpu->arch.iflags). Since the guest must not interfere
- * with the hardware state when debugging the guest, we must ensure that
- * trapping is enabled whenever we are debugging the guest using the
- * debug registers.
+ * - The guest has enabled the OS Lock, and KVM is forcing MDSCR_EL1.MDE to 0,
+ * masking all debug exceptions affected by the OS Lock.
*/
-
-void kvm_arm_setup_debug(struct kvm_vcpu *vcpu)
+static void setup_external_mdscr(struct kvm_vcpu *vcpu)
{
- unsigned long mdscr, orig_mdcr_el2 = vcpu->arch.mdcr_el2;
+ /*
+ * Use the guest's MDSCR_EL1 as a starting point, since there are
+ * several other features controlled by MDSCR_EL1 that are not relevant
+ * to the host.
+ *
+ * Clear the bits that KVM may use which also satisfies emulation of
+ * the OS Lock as MDSCR_EL1.MDE is cleared.
+ */
+ u64 mdscr = vcpu_read_sys_reg(vcpu, MDSCR_EL1) & ~(MDSCR_EL1_SS |
+ MDSCR_EL1_MDE |
+ MDSCR_EL1_KDE);
- trace_kvm_arm_setup_debug(vcpu, vcpu->guest_debug);
+ if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
+ mdscr |= MDSCR_EL1_SS;
- kvm_arm_setup_mdcr_el2(vcpu);
+ if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW)
+ mdscr |= MDSCR_EL1_MDE | MDSCR_EL1_KDE;
+
+ vcpu->arch.external_mdscr_el1 = mdscr;
+}
- /* Check if we need to use the debug registers. */
+void kvm_vcpu_load_debug(struct kvm_vcpu *vcpu)
+{
+ u64 mdscr;
+
+ /* Must be called before kvm_vcpu_load_vhe() */
+ KVM_BUG_ON(vcpu_get_flag(vcpu, SYSREGS_ON_CPU), vcpu->kvm);
+
+ /*
+ * Determine which of the possible debug states we're in:
+ *
+ * - VCPU_DEBUG_HOST_OWNED: KVM has taken ownership of the guest's
+ * breakpoint/watchpoint registers, or needs to use MDSCR_EL1 to do
+ * software step or emulate the effects of the OS Lock being enabled.
+ *
+ * - VCPU_DEBUG_GUEST_OWNED: The guest has debug exceptions enabled, and
+ * the breakpoint/watchpoint registers need to be loaded eagerly.
+ *
+ * - VCPU_DEBUG_FREE: Neither of the above apply, no breakpoint/watchpoint
+ * context needs to be loaded on the CPU.
+ */
if (vcpu->guest_debug || kvm_vcpu_os_lock_enabled(vcpu)) {
- /* Save guest debug state */
- save_guest_debug_regs(vcpu);
+ vcpu->arch.debug_owner = VCPU_DEBUG_HOST_OWNED;
+ setup_external_mdscr(vcpu);
/*
- * Single Step (ARM ARM D2.12.3 The software step state
- * machine)
- *
- * If we are doing Single Step we need to manipulate
- * the guest's MDSCR_EL1.SS and PSTATE.SS. Once the
- * step has occurred the hypervisor will trap the
- * debug exception and we return to userspace.
- *
- * If the guest attempts to single step its userspace
- * we would have to deal with a trapped exception
- * while in the guest kernel. Because this would be
- * hard to unwind we suppress the guest's ability to
- * do so by masking MDSCR_EL.SS.
- *
- * This confuses guest debuggers which use
- * single-step behind the scenes but everything
- * returns to normal once the host is no longer
- * debugging the system.
+ * Steal the guest's single-step state machine if userspace wants
+ * single-step the guest.
*/
if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) {
- /*
- * If the software step state at the last guest exit
- * was Active-pending, we don't set DBG_SPSR_SS so
- * that the state is maintained (to not run another
- * single-step until the pending Software Step
- * exception is taken).
- */
- if (!vcpu_get_flag(vcpu, DBG_SS_ACTIVE_PENDING))
+ if (*vcpu_cpsr(vcpu) & DBG_SPSR_SS)
+ vcpu_clear_flag(vcpu, GUEST_SS_ACTIVE_PENDING);
+ else
+ vcpu_set_flag(vcpu, GUEST_SS_ACTIVE_PENDING);
+
+ if (!vcpu_get_flag(vcpu, HOST_SS_ACTIVE_PENDING))
*vcpu_cpsr(vcpu) |= DBG_SPSR_SS;
else
*vcpu_cpsr(vcpu) &= ~DBG_SPSR_SS;
-
- mdscr = vcpu_read_sys_reg(vcpu, MDSCR_EL1);
- mdscr |= DBG_MDSCR_SS;
- vcpu_write_sys_reg(vcpu, mdscr, MDSCR_EL1);
- } else {
- mdscr = vcpu_read_sys_reg(vcpu, MDSCR_EL1);
- mdscr &= ~DBG_MDSCR_SS;
- vcpu_write_sys_reg(vcpu, mdscr, MDSCR_EL1);
}
+ } else {
+ mdscr = vcpu_read_sys_reg(vcpu, MDSCR_EL1);
- trace_kvm_arm_set_dreg32("SPSR_EL2", *vcpu_cpsr(vcpu));
-
- /*
- * HW Breakpoints and watchpoints
- *
- * We simply switch the debug_ptr to point to our new
- * external_debug_state which has been populated by the
- * debug ioctl. The existing DEBUG_DIRTY mechanism ensures
- * the registers are updated on the world switch.
- */
- if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW) {
- /* Enable breakpoints/watchpoints */
- mdscr = vcpu_read_sys_reg(vcpu, MDSCR_EL1);
- mdscr |= DBG_MDSCR_MDE;
- vcpu_write_sys_reg(vcpu, mdscr, MDSCR_EL1);
-
- vcpu->arch.debug_ptr = &vcpu->arch.external_debug_state;
- vcpu_set_flag(vcpu, DEBUG_DIRTY);
-
- trace_kvm_arm_set_regset("BKPTS", get_num_brps(),
- &vcpu->arch.debug_ptr->dbg_bcr[0],
- &vcpu->arch.debug_ptr->dbg_bvr[0]);
-
- trace_kvm_arm_set_regset("WAPTS", get_num_wrps(),
- &vcpu->arch.debug_ptr->dbg_wcr[0],
- &vcpu->arch.debug_ptr->dbg_wvr[0]);
-
- /*
- * The OS Lock blocks debug exceptions in all ELs when it is
- * enabled. If the guest has enabled the OS Lock, constrain its
- * effects to the guest. Emulate the behavior by clearing
- * MDSCR_EL1.MDE. In so doing, we ensure that host debug
- * exceptions are unaffected by guest configuration of the OS
- * Lock.
- */
- } else if (kvm_vcpu_os_lock_enabled(vcpu)) {
- mdscr = vcpu_read_sys_reg(vcpu, MDSCR_EL1);
- mdscr &= ~DBG_MDSCR_MDE;
- vcpu_write_sys_reg(vcpu, mdscr, MDSCR_EL1);
- }
+ if (mdscr & (MDSCR_EL1_KDE | MDSCR_EL1_MDE))
+ vcpu->arch.debug_owner = VCPU_DEBUG_GUEST_OWNED;
+ else
+ vcpu->arch.debug_owner = VCPU_DEBUG_FREE;
}
- BUG_ON(!vcpu->guest_debug &&
- vcpu->arch.debug_ptr != &vcpu->arch.vcpu_debug_state);
-
- /* If KDE or MDE are set, perform a full save/restore cycle. */
- if (vcpu_read_sys_reg(vcpu, MDSCR_EL1) & (DBG_MDSCR_KDE | DBG_MDSCR_MDE))
- vcpu_set_flag(vcpu, DEBUG_DIRTY);
-
- /* Write mdcr_el2 changes since vcpu_load on VHE systems */
- if (has_vhe() && orig_mdcr_el2 != vcpu->arch.mdcr_el2)
- write_sysreg(vcpu->arch.mdcr_el2, mdcr_el2);
-
- trace_kvm_arm_set_dreg32("MDSCR_EL1", vcpu_read_sys_reg(vcpu, MDSCR_EL1));
+ kvm_arm_setup_mdcr_el2(vcpu);
}
-void kvm_arm_clear_debug(struct kvm_vcpu *vcpu)
+void kvm_vcpu_put_debug(struct kvm_vcpu *vcpu)
{
- trace_kvm_arm_clear_debug(vcpu->guest_debug);
+ if (likely(!(vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)))
+ return;
/*
- * Restore the guest's debug registers if we were using them.
+ * Save the host's software step state and restore the guest's before
+ * potentially returning to userspace.
*/
- if (vcpu->guest_debug || kvm_vcpu_os_lock_enabled(vcpu)) {
- if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) {
- if (!(*vcpu_cpsr(vcpu) & DBG_SPSR_SS))
- /*
- * Mark the vcpu as ACTIVE_PENDING
- * until Software Step exception is taken.
- */
- vcpu_set_flag(vcpu, DBG_SS_ACTIVE_PENDING);
- }
-
- restore_guest_debug_regs(vcpu);
-
- /*
- * If we were using HW debug we need to restore the
- * debug_ptr to the guest debug state.
- */
- if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW) {
- kvm_arm_reset_debug_ptr(vcpu);
-
- trace_kvm_arm_set_regset("BKPTS", get_num_brps(),
- &vcpu->arch.debug_ptr->dbg_bcr[0],
- &vcpu->arch.debug_ptr->dbg_bvr[0]);
+ if (!(*vcpu_cpsr(vcpu) & DBG_SPSR_SS))
+ vcpu_set_flag(vcpu, HOST_SS_ACTIVE_PENDING);
+ else
+ vcpu_clear_flag(vcpu, HOST_SS_ACTIVE_PENDING);
- trace_kvm_arm_set_regset("WAPTS", get_num_wrps(),
- &vcpu->arch.debug_ptr->dbg_wcr[0],
- &vcpu->arch.debug_ptr->dbg_wvr[0]);
- }
- }
+ if (vcpu_get_flag(vcpu, GUEST_SS_ACTIVE_PENDING))
+ *vcpu_cpsr(vcpu) &= ~DBG_SPSR_SS;
+ else
+ *vcpu_cpsr(vcpu) |= DBG_SPSR_SS;
}
-void kvm_arch_vcpu_load_debug_state_flags(struct kvm_vcpu *vcpu)
+/*
+ * Updates ownership of the debug registers after a trapped guest access to a
+ * breakpoint/watchpoint register. Host ownership of the debug registers is of
+ * strictly higher priority, and it is the responsibility of the VMM to emulate
+ * guest debug exceptions in this configuration.
+ */
+void kvm_debug_set_guest_ownership(struct kvm_vcpu *vcpu)
{
- u64 dfr0;
-
- /* For VHE, there is nothing to do */
- if (has_vhe())
+ if (kvm_host_owns_debug_regs(vcpu))
return;
- dfr0 = read_sysreg(id_aa64dfr0_el1);
- /*
- * If SPE is present on this CPU and is available at current EL,
- * we may need to check if the host state needs to be saved.
- */
- if (cpuid_feature_extract_unsigned_field(dfr0, ID_AA64DFR0_EL1_PMSVer_SHIFT) &&
- !(read_sysreg_s(SYS_PMBIDR_EL1) & BIT(PMBIDR_EL1_P_SHIFT)))
- vcpu_set_flag(vcpu, DEBUG_STATE_SAVE_SPE);
-
- /* Check if we have TRBE implemented and available at the host */
- if (cpuid_feature_extract_unsigned_field(dfr0, ID_AA64DFR0_EL1_TraceBuffer_SHIFT) &&
- !(read_sysreg_s(SYS_TRBIDR_EL1) & TRBIDR_EL1_P))
- vcpu_set_flag(vcpu, DEBUG_STATE_SAVE_TRBE);
+ vcpu->arch.debug_owner = VCPU_DEBUG_GUEST_OWNED;
+ kvm_arm_setup_mdcr_el2(vcpu);
}
-void kvm_arch_vcpu_put_debug_state_flags(struct kvm_vcpu *vcpu)
+void kvm_debug_handle_oslar(struct kvm_vcpu *vcpu, u64 val)
{
- vcpu_clear_flag(vcpu, DEBUG_STATE_SAVE_SPE);
- vcpu_clear_flag(vcpu, DEBUG_STATE_SAVE_TRBE);
+ if (val & OSLAR_EL1_OSLK)
+ __vcpu_sys_reg(vcpu, OSLSR_EL1) |= OSLSR_EL1_OSLK;
+ else
+ __vcpu_sys_reg(vcpu, OSLSR_EL1) &= ~OSLSR_EL1_OSLK;
+
+ preempt_disable();
+ kvm_arch_vcpu_put(vcpu);
+ kvm_arch_vcpu_load(vcpu, smp_processor_id());
+ preempt_enable();
}
diff --git a/arch/arm64/kvm/emulate-nested.c b/arch/arm64/kvm/emulate-nested.c
index 1ffbfd1c3cf2..607d37bab70b 100644
--- a/arch/arm64/kvm/emulate-nested.c
+++ b/arch/arm64/kvm/emulate-nested.c
@@ -89,6 +89,9 @@ enum cgt_group_id {
CGT_HCRX_EnFPM,
CGT_HCRX_TCR2En,
+ CGT_CNTHCTL_EL1TVT,
+ CGT_CNTHCTL_EL1TVCT,
+
CGT_ICH_HCR_TC,
CGT_ICH_HCR_TALL0,
CGT_ICH_HCR_TALL1,
@@ -124,6 +127,8 @@ enum cgt_group_id {
__COMPLEX_CONDITIONS__,
CGT_CNTHCTL_EL1PCTEN = __COMPLEX_CONDITIONS__,
CGT_CNTHCTL_EL1PTEN,
+ CGT_CNTHCTL_EL1NVPCT,
+ CGT_CNTHCTL_EL1NVVCT,
CGT_CPTR_TTA,
CGT_MDCR_HPMN,
@@ -393,6 +398,18 @@ static const struct trap_bits coarse_trap_bits[] = {
.mask = HCRX_EL2_TCR2En,
.behaviour = BEHAVE_FORWARD_RW,
},
+ [CGT_CNTHCTL_EL1TVT] = {
+ .index = CNTHCTL_EL2,
+ .value = CNTHCTL_EL1TVT,
+ .mask = CNTHCTL_EL1TVT,
+ .behaviour = BEHAVE_FORWARD_RW,
+ },
+ [CGT_CNTHCTL_EL1TVCT] = {
+ .index = CNTHCTL_EL2,
+ .value = CNTHCTL_EL1TVCT,
+ .mask = CNTHCTL_EL1TVCT,
+ .behaviour = BEHAVE_FORWARD_READ,
+ },
[CGT_ICH_HCR_TC] = {
.index = ICH_HCR_EL2,
.value = ICH_HCR_TC,
@@ -487,6 +504,32 @@ static enum trap_behaviour check_cnthctl_el1pten(struct kvm_vcpu *vcpu)
return BEHAVE_FORWARD_RW;
}
+static bool is_nested_nv2_guest(struct kvm_vcpu *vcpu)
+{
+ u64 val;
+
+ val = __vcpu_sys_reg(vcpu, HCR_EL2);
+ return ((val & (HCR_E2H | HCR_TGE | HCR_NV2 | HCR_NV1 | HCR_NV)) == (HCR_E2H | HCR_NV2 | HCR_NV));
+}
+
+static enum trap_behaviour check_cnthctl_el1nvpct(struct kvm_vcpu *vcpu)
+{
+ if (!is_nested_nv2_guest(vcpu) ||
+ !(__vcpu_sys_reg(vcpu, CNTHCTL_EL2) & CNTHCTL_EL1NVPCT))
+ return BEHAVE_HANDLE_LOCALLY;
+
+ return BEHAVE_FORWARD_RW;
+}
+
+static enum trap_behaviour check_cnthctl_el1nvvct(struct kvm_vcpu *vcpu)
+{
+ if (!is_nested_nv2_guest(vcpu) ||
+ !(__vcpu_sys_reg(vcpu, CNTHCTL_EL2) & CNTHCTL_EL1NVVCT))
+ return BEHAVE_HANDLE_LOCALLY;
+
+ return BEHAVE_FORWARD_RW;
+}
+
static enum trap_behaviour check_cptr_tta(struct kvm_vcpu *vcpu)
{
u64 val = __vcpu_sys_reg(vcpu, CPTR_EL2);
@@ -494,7 +537,7 @@ static enum trap_behaviour check_cptr_tta(struct kvm_vcpu *vcpu)
if (!vcpu_el2_e2h_is_set(vcpu))
val = translate_cptr_el2_to_cpacr_el1(val);
- if (val & CPACR_ELx_TTA)
+ if (val & CPACR_EL1_TTA)
return BEHAVE_FORWARD_RW;
return BEHAVE_HANDLE_LOCALLY;
@@ -534,6 +577,8 @@ static enum trap_behaviour check_mdcr_hpmn(struct kvm_vcpu *vcpu)
static const complex_condition_check ccc[] = {
CCC(CGT_CNTHCTL_EL1PCTEN, check_cnthctl_el1pcten),
CCC(CGT_CNTHCTL_EL1PTEN, check_cnthctl_el1pten),
+ CCC(CGT_CNTHCTL_EL1NVPCT, check_cnthctl_el1nvpct),
+ CCC(CGT_CNTHCTL_EL1NVVCT, check_cnthctl_el1nvvct),
CCC(CGT_CPTR_TTA, check_cptr_tta),
CCC(CGT_MDCR_HPMN, check_mdcr_hpmn),
};
@@ -850,11 +895,15 @@ static const struct encoding_to_trap_config encoding_to_cgt[] __initconst = {
SYS_CNTHP_CVAL_EL2, CGT_HCR_NV),
SR_RANGE_TRAP(SYS_CNTHV_TVAL_EL2,
SYS_CNTHV_CVAL_EL2, CGT_HCR_NV),
- /* All _EL02, _EL12 registers */
+ /* All _EL02, _EL12 registers up to CNTKCTL_EL12*/
SR_RANGE_TRAP(sys_reg(3, 5, 0, 0, 0),
sys_reg(3, 5, 10, 15, 7), CGT_HCR_NV),
SR_RANGE_TRAP(sys_reg(3, 5, 12, 0, 0),
- sys_reg(3, 5, 14, 15, 7), CGT_HCR_NV),
+ sys_reg(3, 5, 14, 1, 0), CGT_HCR_NV),
+ SR_TRAP(SYS_CNTP_CTL_EL02, CGT_CNTHCTL_EL1NVPCT),
+ SR_TRAP(SYS_CNTP_CVAL_EL02, CGT_CNTHCTL_EL1NVPCT),
+ SR_TRAP(SYS_CNTV_CTL_EL02, CGT_CNTHCTL_EL1NVVCT),
+ SR_TRAP(SYS_CNTV_CVAL_EL02, CGT_CNTHCTL_EL1NVVCT),
SR_TRAP(OP_AT_S1E2R, CGT_HCR_NV),
SR_TRAP(OP_AT_S1E2W, CGT_HCR_NV),
SR_TRAP(OP_AT_S12E1R, CGT_HCR_NV),
@@ -1184,6 +1233,11 @@ static const struct encoding_to_trap_config encoding_to_cgt[] __initconst = {
SR_TRAP(SYS_CNTP_CTL_EL0, CGT_CNTHCTL_EL1PTEN),
SR_TRAP(SYS_CNTPCT_EL0, CGT_CNTHCTL_EL1PCTEN),
SR_TRAP(SYS_CNTPCTSS_EL0, CGT_CNTHCTL_EL1PCTEN),
+ SR_TRAP(SYS_CNTV_TVAL_EL0, CGT_CNTHCTL_EL1TVT),
+ SR_TRAP(SYS_CNTV_CVAL_EL0, CGT_CNTHCTL_EL1TVT),
+ SR_TRAP(SYS_CNTV_CTL_EL0, CGT_CNTHCTL_EL1TVT),
+ SR_TRAP(SYS_CNTVCT_EL0, CGT_CNTHCTL_EL1TVCT),
+ SR_TRAP(SYS_CNTVCTSS_EL0, CGT_CNTHCTL_EL1TVCT),
SR_TRAP(SYS_FPMR, CGT_HCRX_EnFPM),
/*
* IMPDEF choice:
@@ -2345,14 +2399,14 @@ inject:
return true;
}
-static bool forward_traps(struct kvm_vcpu *vcpu, u64 control_bit)
+static bool __forward_traps(struct kvm_vcpu *vcpu, unsigned int reg, u64 control_bit)
{
bool control_bit_set;
if (!vcpu_has_nv(vcpu))
return false;
- control_bit_set = __vcpu_sys_reg(vcpu, HCR_EL2) & control_bit;
+ control_bit_set = __vcpu_sys_reg(vcpu, reg) & control_bit;
if (!is_hyp_ctxt(vcpu) && control_bit_set) {
kvm_inject_nested_sync(vcpu, kvm_vcpu_get_esr(vcpu));
return true;
@@ -2360,9 +2414,24 @@ static bool forward_traps(struct kvm_vcpu *vcpu, u64 control_bit)
return false;
}
+static bool forward_hcr_traps(struct kvm_vcpu *vcpu, u64 control_bit)
+{
+ return __forward_traps(vcpu, HCR_EL2, control_bit);
+}
+
bool forward_smc_trap(struct kvm_vcpu *vcpu)
{
- return forward_traps(vcpu, HCR_TSC);
+ return forward_hcr_traps(vcpu, HCR_TSC);
+}
+
+static bool forward_mdcr_traps(struct kvm_vcpu *vcpu, u64 control_bit)
+{
+ return __forward_traps(vcpu, MDCR_EL2, control_bit);
+}
+
+bool forward_debug_exception(struct kvm_vcpu *vcpu)
+{
+ return forward_mdcr_traps(vcpu, MDCR_EL2_TDE);
}
static u64 kvm_check_illegal_exception_return(struct kvm_vcpu *vcpu, u64 spsr)
@@ -2406,7 +2475,7 @@ void kvm_emulate_nested_eret(struct kvm_vcpu *vcpu)
* Forward this trap to the virtual EL2 if the virtual
* HCR_EL2.NV bit is set and this is coming from !EL2.
*/
- if (forward_traps(vcpu, HCR_NV))
+ if (forward_hcr_traps(vcpu, HCR_NV))
return;
spsr = vcpu_read_sys_reg(vcpu, SPSR_EL2);
diff --git a/arch/arm64/kvm/fpsimd.c b/arch/arm64/kvm/fpsimd.c
index ea5484ce1f3b..4d3d1a2eb157 100644
--- a/arch/arm64/kvm/fpsimd.c
+++ b/arch/arm64/kvm/fpsimd.c
@@ -65,14 +65,14 @@ void kvm_arch_vcpu_load_fp(struct kvm_vcpu *vcpu)
*host_data_ptr(fpsimd_state) = kern_hyp_va(&current->thread.uw.fpsimd_state);
*host_data_ptr(fpmr_ptr) = kern_hyp_va(&current->thread.uw.fpmr);
- vcpu_clear_flag(vcpu, HOST_SVE_ENABLED);
+ host_data_clear_flag(HOST_SVE_ENABLED);
if (read_sysreg(cpacr_el1) & CPACR_EL1_ZEN_EL0EN)
- vcpu_set_flag(vcpu, HOST_SVE_ENABLED);
+ host_data_set_flag(HOST_SVE_ENABLED);
if (system_supports_sme()) {
- vcpu_clear_flag(vcpu, HOST_SME_ENABLED);
+ host_data_clear_flag(HOST_SME_ENABLED);
if (read_sysreg(cpacr_el1) & CPACR_EL1_SMEN_EL0EN)
- vcpu_set_flag(vcpu, HOST_SME_ENABLED);
+ host_data_set_flag(HOST_SME_ENABLED);
/*
* If PSTATE.SM is enabled then save any pending FP
@@ -168,8 +168,8 @@ void kvm_arch_vcpu_put_fp(struct kvm_vcpu *vcpu)
*/
if (has_vhe() && system_supports_sme()) {
/* Also restore EL0 state seen on entry */
- if (vcpu_get_flag(vcpu, HOST_SME_ENABLED))
- sysreg_clear_set(CPACR_EL1, 0, CPACR_ELx_SMEN);
+ if (host_data_test_flag(HOST_SME_ENABLED))
+ sysreg_clear_set(CPACR_EL1, 0, CPACR_EL1_SMEN);
else
sysreg_clear_set(CPACR_EL1,
CPACR_EL1_SMEN_EL0EN,
@@ -227,7 +227,7 @@ void kvm_arch_vcpu_put_fp(struct kvm_vcpu *vcpu)
* for EL0. To avoid spurious traps, restore the trap state
* seen by kvm_arch_vcpu_load_fp():
*/
- if (vcpu_get_flag(vcpu, HOST_SVE_ENABLED))
+ if (host_data_test_flag(HOST_SVE_ENABLED))
sysreg_clear_set(CPACR_EL1, 0, CPACR_EL1_ZEN_EL0EN);
else
sysreg_clear_set(CPACR_EL1, CPACR_EL1_ZEN_EL0EN, 0);
diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c
index 12dad841f2a5..2196979a24a3 100644
--- a/arch/arm64/kvm/guest.c
+++ b/arch/arm64/kvm/guest.c
@@ -917,31 +917,24 @@ int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
struct kvm_guest_debug *dbg)
{
- int ret = 0;
-
trace_kvm_set_guest_debug(vcpu, dbg->control);
- if (dbg->control & ~KVM_GUESTDBG_VALID_MASK) {
- ret = -EINVAL;
- goto out;
- }
-
- if (dbg->control & KVM_GUESTDBG_ENABLE) {
- vcpu->guest_debug = dbg->control;
-
- /* Hardware assisted Break and Watch points */
- if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW) {
- vcpu->arch.external_debug_state = dbg->arch;
- }
+ if (dbg->control & ~KVM_GUESTDBG_VALID_MASK)
+ return -EINVAL;
- } else {
- /* If not enabled clear all flags */
+ if (!(dbg->control & KVM_GUESTDBG_ENABLE)) {
vcpu->guest_debug = 0;
- vcpu_clear_flag(vcpu, DBG_SS_ACTIVE_PENDING);
+ vcpu_clear_flag(vcpu, HOST_SS_ACTIVE_PENDING);
+ return 0;
}
-out:
- return ret;
+ vcpu->guest_debug = dbg->control;
+
+ /* Hardware assisted Break and Watch points */
+ if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW)
+ vcpu->arch.external_debug_state = dbg->arch;
+
+ return 0;
}
int kvm_arm_vcpu_arch_set_attr(struct kvm_vcpu *vcpu,
diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c
index d7c2990e7c9e..512d152233ff 100644
--- a/arch/arm64/kvm/handle_exit.c
+++ b/arch/arm64/kvm/handle_exit.c
@@ -183,6 +183,9 @@ static int kvm_handle_guest_debug(struct kvm_vcpu *vcpu)
struct kvm_run *run = vcpu->run;
u64 esr = kvm_vcpu_get_esr(vcpu);
+ if (!vcpu->guest_debug && forward_debug_exception(vcpu))
+ return 1;
+
run->exit_reason = KVM_EXIT_DEBUG;
run->debug.arch.hsr = lower_32_bits(esr);
run->debug.arch.hsr_high = upper_32_bits(esr);
@@ -193,7 +196,7 @@ static int kvm_handle_guest_debug(struct kvm_vcpu *vcpu)
run->debug.arch.far = vcpu->arch.fault.far_el2;
break;
case ESR_ELx_EC_SOFTSTP_LOW:
- vcpu_clear_flag(vcpu, DBG_SS_ACTIVE_PENDING);
+ *vcpu_cpsr(vcpu) |= DBG_SPSR_SS;
break;
}
diff --git a/arch/arm64/kvm/hyp/include/hyp/debug-sr.h b/arch/arm64/kvm/hyp/include/hyp/debug-sr.h
index d00093699aaf..502a5b73ee70 100644
--- a/arch/arm64/kvm/hyp/include/hyp/debug-sr.h
+++ b/arch/arm64/kvm/hyp/include/hyp/debug-sr.h
@@ -88,15 +88,26 @@
default: write_debug(ptr[0], reg, 0); \
}
+static struct kvm_guest_debug_arch *__vcpu_debug_regs(struct kvm_vcpu *vcpu)
+{
+ switch (vcpu->arch.debug_owner) {
+ case VCPU_DEBUG_FREE:
+ WARN_ON_ONCE(1);
+ fallthrough;
+ case VCPU_DEBUG_GUEST_OWNED:
+ return &vcpu->arch.vcpu_debug_state;
+ case VCPU_DEBUG_HOST_OWNED:
+ return &vcpu->arch.external_debug_state;
+ }
+
+ return NULL;
+}
+
static void __debug_save_state(struct kvm_guest_debug_arch *dbg,
struct kvm_cpu_context *ctxt)
{
- u64 aa64dfr0;
- int brps, wrps;
-
- aa64dfr0 = read_sysreg(id_aa64dfr0_el1);
- brps = (aa64dfr0 >> 12) & 0xf;
- wrps = (aa64dfr0 >> 20) & 0xf;
+ int brps = *host_data_ptr(debug_brps);
+ int wrps = *host_data_ptr(debug_wrps);
save_debug(dbg->dbg_bcr, dbgbcr, brps);
save_debug(dbg->dbg_bvr, dbgbvr, brps);
@@ -109,13 +120,8 @@ static void __debug_save_state(struct kvm_guest_debug_arch *dbg,
static void __debug_restore_state(struct kvm_guest_debug_arch *dbg,
struct kvm_cpu_context *ctxt)
{
- u64 aa64dfr0;
- int brps, wrps;
-
- aa64dfr0 = read_sysreg(id_aa64dfr0_el1);
-
- brps = (aa64dfr0 >> 12) & 0xf;
- wrps = (aa64dfr0 >> 20) & 0xf;
+ int brps = *host_data_ptr(debug_brps);
+ int wrps = *host_data_ptr(debug_wrps);
restore_debug(dbg->dbg_bcr, dbgbcr, brps);
restore_debug(dbg->dbg_bvr, dbgbvr, brps);
@@ -132,13 +138,13 @@ static inline void __debug_switch_to_guest_common(struct kvm_vcpu *vcpu)
struct kvm_guest_debug_arch *host_dbg;
struct kvm_guest_debug_arch *guest_dbg;
- if (!vcpu_get_flag(vcpu, DEBUG_DIRTY))
+ if (!kvm_debug_regs_in_use(vcpu))
return;
host_ctxt = host_data_ptr(host_ctxt);
guest_ctxt = &vcpu->arch.ctxt;
host_dbg = host_data_ptr(host_debug_state.regs);
- guest_dbg = kern_hyp_va(vcpu->arch.debug_ptr);
+ guest_dbg = __vcpu_debug_regs(vcpu);
__debug_save_state(host_dbg, host_ctxt);
__debug_restore_state(guest_dbg, guest_ctxt);
@@ -151,18 +157,16 @@ static inline void __debug_switch_to_host_common(struct kvm_vcpu *vcpu)
struct kvm_guest_debug_arch *host_dbg;
struct kvm_guest_debug_arch *guest_dbg;
- if (!vcpu_get_flag(vcpu, DEBUG_DIRTY))
+ if (!kvm_debug_regs_in_use(vcpu))
return;
host_ctxt = host_data_ptr(host_ctxt);
guest_ctxt = &vcpu->arch.ctxt;
host_dbg = host_data_ptr(host_debug_state.regs);
- guest_dbg = kern_hyp_va(vcpu->arch.debug_ptr);
+ guest_dbg = __vcpu_debug_regs(vcpu);
__debug_save_state(guest_dbg, guest_ctxt);
__debug_restore_state(host_dbg, host_ctxt);
-
- vcpu_clear_flag(vcpu, DEBUG_DIRTY);
}
#endif /* __ARM64_KVM_HYP_DEBUG_SR_H__ */
diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h
index 34f53707892d..f838a45665f2 100644
--- a/arch/arm64/kvm/hyp/include/hyp/switch.h
+++ b/arch/arm64/kvm/hyp/include/hyp/switch.h
@@ -419,9 +419,9 @@ static bool kvm_hyp_handle_fpsimd(struct kvm_vcpu *vcpu, u64 *exit_code)
/* First disable enough traps to allow us to update the registers */
if (sve_guest || (is_protected_kvm_enabled() && system_supports_sve()))
- cpacr_clear_set(0, CPACR_ELx_FPEN | CPACR_ELx_ZEN);
+ cpacr_clear_set(0, CPACR_EL1_FPEN | CPACR_EL1_ZEN);
else
- cpacr_clear_set(0, CPACR_ELx_FPEN);
+ cpacr_clear_set(0, CPACR_EL1_FPEN);
isb();
/* Write out the host state if it's in the registers */
@@ -501,7 +501,12 @@ static inline bool handle_tx2_tvm(struct kvm_vcpu *vcpu)
return true;
}
-static bool kvm_hyp_handle_cntpct(struct kvm_vcpu *vcpu)
+static inline u64 compute_counter_value(struct arch_timer_context *ctxt)
+{
+ return arch_timer_read_cntpct_el0() - timer_get_offset(ctxt);
+}
+
+static bool kvm_handle_cntxct(struct kvm_vcpu *vcpu)
{
struct arch_timer_context *ctxt;
u32 sysreg;
@@ -511,18 +516,19 @@ static bool kvm_hyp_handle_cntpct(struct kvm_vcpu *vcpu)
* We only get here for 64bit guests, 32bit guests will hit
* the long and winding road all the way to the standard
* handling. Yes, it sucks to be irrelevant.
+ *
+ * Also, we only deal with non-hypervisor context here (either
+ * an EL1 guest, or a non-HYP context of an EL2 guest).
*/
+ if (is_hyp_ctxt(vcpu))
+ return false;
+
sysreg = esr_sys64_to_sysreg(kvm_vcpu_get_esr(vcpu));
switch (sysreg) {
case SYS_CNTPCT_EL0:
case SYS_CNTPCTSS_EL0:
if (vcpu_has_nv(vcpu)) {
- if (is_hyp_ctxt(vcpu)) {
- ctxt = vcpu_hptimer(vcpu);
- break;
- }
-
/* Check for guest hypervisor trapping */
val = __vcpu_sys_reg(vcpu, CNTHCTL_EL2);
if (!vcpu_el2_e2h_is_set(vcpu))
@@ -534,16 +540,23 @@ static bool kvm_hyp_handle_cntpct(struct kvm_vcpu *vcpu)
ctxt = vcpu_ptimer(vcpu);
break;
+ case SYS_CNTVCT_EL0:
+ case SYS_CNTVCTSS_EL0:
+ if (vcpu_has_nv(vcpu)) {
+ /* Check for guest hypervisor trapping */
+ val = __vcpu_sys_reg(vcpu, CNTHCTL_EL2);
+
+ if (val & CNTHCTL_EL1TVCT)
+ return false;
+ }
+
+ ctxt = vcpu_vtimer(vcpu);
+ break;
default:
return false;
}
- val = arch_timer_read_cntpct_el0();
-
- if (ctxt->offset.vm_offset)
- val -= *kern_hyp_va(ctxt->offset.vm_offset);
- if (ctxt->offset.vcpu_offset)
- val -= *kern_hyp_va(ctxt->offset.vcpu_offset);
+ val = compute_counter_value(ctxt);
vcpu_set_reg(vcpu, kvm_vcpu_sys_get_rt(vcpu), val);
__kvm_skip_instr(vcpu);
@@ -588,7 +601,7 @@ static bool kvm_hyp_handle_sysreg(struct kvm_vcpu *vcpu, u64 *exit_code)
__vgic_v3_perform_cpuif_access(vcpu) == 1)
return true;
- if (kvm_hyp_handle_cntpct(vcpu))
+ if (kvm_handle_cntxct(vcpu))
return true;
return false;
diff --git a/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h b/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h
index a651c43ad679..76ff095c6b6e 100644
--- a/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h
+++ b/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h
@@ -18,9 +18,34 @@
static inline bool ctxt_has_s1poe(struct kvm_cpu_context *ctxt);
+static inline struct kvm_vcpu *ctxt_to_vcpu(struct kvm_cpu_context *ctxt)
+{
+ struct kvm_vcpu *vcpu = ctxt->__hyp_running_vcpu;
+
+ if (!vcpu)
+ vcpu = container_of(ctxt, struct kvm_vcpu, arch.ctxt);
+
+ return vcpu;
+}
+
+static inline bool ctxt_is_guest(struct kvm_cpu_context *ctxt)
+{
+ return host_data_ptr(host_ctxt) != ctxt;
+}
+
+static inline u64 *ctxt_mdscr_el1(struct kvm_cpu_context *ctxt)
+{
+ struct kvm_vcpu *vcpu = ctxt_to_vcpu(ctxt);
+
+ if (ctxt_is_guest(ctxt) && kvm_host_owns_debug_regs(vcpu))
+ return &vcpu->arch.external_mdscr_el1;
+
+ return &ctxt_sys_reg(ctxt, MDSCR_EL1);
+}
+
static inline void __sysreg_save_common_state(struct kvm_cpu_context *ctxt)
{
- ctxt_sys_reg(ctxt, MDSCR_EL1) = read_sysreg(mdscr_el1);
+ *ctxt_mdscr_el1(ctxt) = read_sysreg(mdscr_el1);
// POR_EL0 can affect uaccess, so must be saved/restored early.
if (ctxt_has_s1poe(ctxt))
@@ -33,16 +58,6 @@ static inline void __sysreg_save_user_state(struct kvm_cpu_context *ctxt)
ctxt_sys_reg(ctxt, TPIDRRO_EL0) = read_sysreg(tpidrro_el0);
}
-static inline struct kvm_vcpu *ctxt_to_vcpu(struct kvm_cpu_context *ctxt)
-{
- struct kvm_vcpu *vcpu = ctxt->__hyp_running_vcpu;
-
- if (!vcpu)
- vcpu = container_of(ctxt, struct kvm_vcpu, arch.ctxt);
-
- return vcpu;
-}
-
static inline bool ctxt_has_mte(struct kvm_cpu_context *ctxt)
{
struct kvm_vcpu *vcpu = ctxt_to_vcpu(ctxt);
@@ -139,7 +154,7 @@ static inline void __sysreg_save_el2_return_state(struct kvm_cpu_context *ctxt)
static inline void __sysreg_restore_common_state(struct kvm_cpu_context *ctxt)
{
- write_sysreg(ctxt_sys_reg(ctxt, MDSCR_EL1), mdscr_el1);
+ write_sysreg(*ctxt_mdscr_el1(ctxt), mdscr_el1);
// POR_EL0 can affect uaccess, so must be saved/restored early.
if (ctxt_has_s1poe(ctxt))
@@ -283,7 +298,7 @@ static inline void __sysreg32_save_state(struct kvm_vcpu *vcpu)
__vcpu_sys_reg(vcpu, DACR32_EL2) = read_sysreg(dacr32_el2);
__vcpu_sys_reg(vcpu, IFSR32_EL2) = read_sysreg(ifsr32_el2);
- if (has_vhe() || vcpu_get_flag(vcpu, DEBUG_DIRTY))
+ if (has_vhe() || kvm_debug_regs_in_use(vcpu))
__vcpu_sys_reg(vcpu, DBGVCR32_EL2) = read_sysreg(dbgvcr32_el2);
}
@@ -300,7 +315,7 @@ static inline void __sysreg32_restore_state(struct kvm_vcpu *vcpu)
write_sysreg(__vcpu_sys_reg(vcpu, DACR32_EL2), dacr32_el2);
write_sysreg(__vcpu_sys_reg(vcpu, IFSR32_EL2), ifsr32_el2);
- if (has_vhe() || vcpu_get_flag(vcpu, DEBUG_DIRTY))
+ if (has_vhe() || kvm_debug_regs_in_use(vcpu))
write_sysreg(__vcpu_sys_reg(vcpu, DBGVCR32_EL2), dbgvcr32_el2);
}
diff --git a/arch/arm64/kvm/hyp/include/nvhe/fixed_config.h b/arch/arm64/kvm/hyp/include/nvhe/fixed_config.h
deleted file mode 100644
index f957890c7e38..000000000000
--- a/arch/arm64/kvm/hyp/include/nvhe/fixed_config.h
+++ /dev/null
@@ -1,223 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Copyright (C) 2021 Google LLC
- * Author: Fuad Tabba <tabba@google.com>
- */
-
-#ifndef __ARM64_KVM_FIXED_CONFIG_H__
-#define __ARM64_KVM_FIXED_CONFIG_H__
-
-#include <asm/sysreg.h>
-
-/*
- * This file contains definitions for features to be allowed or restricted for
- * guest virtual machines, depending on the mode KVM is running in and on the
- * type of guest that is running.
- *
- * The ALLOW masks represent a bitmask of feature fields that are allowed
- * without any restrictions as long as they are supported by the system.
- *
- * The RESTRICT_UNSIGNED masks, if present, represent unsigned fields for
- * features that are restricted to support at most the specified feature.
- *
- * If a feature field is not present in either, than it is not supported.
- *
- * The approach taken for protected VMs is to allow features that are:
- * - Needed by common Linux distributions (e.g., floating point)
- * - Trivial to support, e.g., supporting the feature does not introduce or
- * require tracking of additional state in KVM
- * - Cannot be trapped or prevent the guest from using anyway
- */
-
-/*
- * Allow for protected VMs:
- * - Floating-point and Advanced SIMD
- * - Data Independent Timing
- * - Spectre/Meltdown Mitigation
- */
-#define PVM_ID_AA64PFR0_ALLOW (\
- ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_FP) | \
- ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_AdvSIMD) | \
- ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_DIT) | \
- ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_CSV2) | \
- ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_CSV3) \
- )
-
-/*
- * Restrict to the following *unsigned* features for protected VMs:
- * - AArch64 guests only (no support for AArch32 guests):
- * AArch32 adds complexity in trap handling, emulation, condition codes,
- * etc...
- * - RAS (v1)
- * Supported by KVM
- */
-#define PVM_ID_AA64PFR0_RESTRICT_UNSIGNED (\
- SYS_FIELD_PREP_ENUM(ID_AA64PFR0_EL1, EL0, IMP) | \
- SYS_FIELD_PREP_ENUM(ID_AA64PFR0_EL1, EL1, IMP) | \
- SYS_FIELD_PREP_ENUM(ID_AA64PFR0_EL1, EL2, IMP) | \
- SYS_FIELD_PREP_ENUM(ID_AA64PFR0_EL1, EL3, IMP) | \
- SYS_FIELD_PREP_ENUM(ID_AA64PFR0_EL1, RAS, IMP) \
- )
-
-/*
- * Allow for protected VMs:
- * - Branch Target Identification
- * - Speculative Store Bypassing
- */
-#define PVM_ID_AA64PFR1_ALLOW (\
- ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_BT) | \
- ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_SSBS) \
- )
-
-#define PVM_ID_AA64PFR2_ALLOW 0ULL
-
-/*
- * Allow for protected VMs:
- * - Mixed-endian
- * - Distinction between Secure and Non-secure Memory
- * - Mixed-endian at EL0 only
- * - Non-context synchronizing exception entry and exit
- */
-#define PVM_ID_AA64MMFR0_ALLOW (\
- ARM64_FEATURE_MASK(ID_AA64MMFR0_EL1_BIGEND) | \
- ARM64_FEATURE_MASK(ID_AA64MMFR0_EL1_SNSMEM) | \
- ARM64_FEATURE_MASK(ID_AA64MMFR0_EL1_BIGENDEL0) | \
- ARM64_FEATURE_MASK(ID_AA64MMFR0_EL1_EXS) \
- )
-
-/*
- * Restrict to the following *unsigned* features for protected VMs:
- * - 40-bit IPA
- * - 16-bit ASID
- */
-#define PVM_ID_AA64MMFR0_RESTRICT_UNSIGNED (\
- FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64MMFR0_EL1_PARANGE), ID_AA64MMFR0_EL1_PARANGE_40) | \
- FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64MMFR0_EL1_ASIDBITS), ID_AA64MMFR0_EL1_ASIDBITS_16) \
- )
-
-/*
- * Allow for protected VMs:
- * - Hardware translation table updates to Access flag and Dirty state
- * - Number of VMID bits from CPU
- * - Hierarchical Permission Disables
- * - Privileged Access Never
- * - SError interrupt exceptions from speculative reads
- * - Enhanced Translation Synchronization
- * - Control for cache maintenance permission
- */
-#define PVM_ID_AA64MMFR1_ALLOW (\
- ARM64_FEATURE_MASK(ID_AA64MMFR1_EL1_HAFDBS) | \
- ARM64_FEATURE_MASK(ID_AA64MMFR1_EL1_VMIDBits) | \
- ARM64_FEATURE_MASK(ID_AA64MMFR1_EL1_HPDS) | \
- ARM64_FEATURE_MASK(ID_AA64MMFR1_EL1_PAN) | \
- ARM64_FEATURE_MASK(ID_AA64MMFR1_EL1_SpecSEI) | \
- ARM64_FEATURE_MASK(ID_AA64MMFR1_EL1_ETS) | \
- ARM64_FEATURE_MASK(ID_AA64MMFR1_EL1_CMOW) \
- )
-
-/*
- * Allow for protected VMs:
- * - Common not Private translations
- * - User Access Override
- * - IESB bit in the SCTLR_ELx registers
- * - Unaligned single-copy atomicity and atomic functions
- * - ESR_ELx.EC value on an exception by read access to feature ID space
- * - TTL field in address operations.
- * - Break-before-make sequences when changing translation block size
- * - E0PDx mechanism
- */
-#define PVM_ID_AA64MMFR2_ALLOW (\
- ARM64_FEATURE_MASK(ID_AA64MMFR2_EL1_CnP) | \
- ARM64_FEATURE_MASK(ID_AA64MMFR2_EL1_UAO) | \
- ARM64_FEATURE_MASK(ID_AA64MMFR2_EL1_IESB) | \
- ARM64_FEATURE_MASK(ID_AA64MMFR2_EL1_AT) | \
- ARM64_FEATURE_MASK(ID_AA64MMFR2_EL1_IDS) | \
- ARM64_FEATURE_MASK(ID_AA64MMFR2_EL1_TTL) | \
- ARM64_FEATURE_MASK(ID_AA64MMFR2_EL1_BBM) | \
- ARM64_FEATURE_MASK(ID_AA64MMFR2_EL1_E0PD) \
- )
-
-#define PVM_ID_AA64MMFR3_ALLOW (0ULL)
-
-/*
- * No support for Scalable Vectors for protected VMs:
- * Requires additional support from KVM, e.g., context-switching and
- * trapping at EL2
- */
-#define PVM_ID_AA64ZFR0_ALLOW (0ULL)
-
-/*
- * No support for debug, including breakpoints, and watchpoints for protected
- * VMs:
- * The Arm architecture mandates support for at least the Armv8 debug
- * architecture, which would include at least 2 hardware breakpoints and
- * watchpoints. Providing that support to protected guests adds
- * considerable state and complexity. Therefore, the reserved value of 0 is
- * used for debug-related fields.
- */
-#define PVM_ID_AA64DFR0_ALLOW (0ULL)
-#define PVM_ID_AA64DFR1_ALLOW (0ULL)
-
-/*
- * No support for implementation defined features.
- */
-#define PVM_ID_AA64AFR0_ALLOW (0ULL)
-#define PVM_ID_AA64AFR1_ALLOW (0ULL)
-
-/*
- * No restrictions on instructions implemented in AArch64.
- */
-#define PVM_ID_AA64ISAR0_ALLOW (\
- ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_AES) | \
- ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_SHA1) | \
- ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_SHA2) | \
- ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_CRC32) | \
- ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_ATOMIC) | \
- ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_RDM) | \
- ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_SHA3) | \
- ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_SM3) | \
- ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_SM4) | \
- ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_DP) | \
- ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_FHM) | \
- ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_TS) | \
- ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_TLB) | \
- ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_RNDR) \
- )
-
-/* Restrict pointer authentication to the basic version. */
-#define PVM_ID_AA64ISAR1_RESTRICT_UNSIGNED (\
- FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_APA), ID_AA64ISAR1_EL1_APA_PAuth) | \
- FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_API), ID_AA64ISAR1_EL1_API_PAuth) \
- )
-
-#define PVM_ID_AA64ISAR2_RESTRICT_UNSIGNED (\
- FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64ISAR2_EL1_APA3), ID_AA64ISAR2_EL1_APA3_PAuth) \
- )
-
-#define PVM_ID_AA64ISAR1_ALLOW (\
- ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_DPB) | \
- ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_JSCVT) | \
- ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_FCMA) | \
- ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_LRCPC) | \
- ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_GPA) | \
- ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_GPI) | \
- ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_FRINTTS) | \
- ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_SB) | \
- ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_SPECRES) | \
- ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_BF16) | \
- ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_DGH) | \
- ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_I8MM) \
- )
-
-#define PVM_ID_AA64ISAR2_ALLOW (\
- ARM64_FEATURE_MASK(ID_AA64ISAR2_EL1_ATS1A)| \
- ARM64_FEATURE_MASK(ID_AA64ISAR2_EL1_GPA3) | \
- ARM64_FEATURE_MASK(ID_AA64ISAR2_EL1_MOPS) \
- )
-
-u64 pvm_read_id_reg(const struct kvm_vcpu *vcpu, u32 id);
-bool kvm_handle_pvm_sysreg(struct kvm_vcpu *vcpu, u64 *exit_code);
-bool kvm_handle_pvm_restricted(struct kvm_vcpu *vcpu, u64 *exit_code);
-int kvm_check_pvm_sysreg_table(void);
-
-#endif /* __ARM64_KVM_FIXED_CONFIG_H__ */
diff --git a/arch/arm64/kvm/hyp/include/nvhe/pkvm.h b/arch/arm64/kvm/hyp/include/nvhe/pkvm.h
index 0cc2a429f1fb..e42bf68c8848 100644
--- a/arch/arm64/kvm/hyp/include/nvhe/pkvm.h
+++ b/arch/arm64/kvm/hyp/include/nvhe/pkvm.h
@@ -53,6 +53,8 @@ struct pkvm_hyp_vm {
struct pkvm_hyp_vcpu *vcpus[];
};
+extern hyp_spinlock_t vm_table_lock;
+
static inline struct pkvm_hyp_vm *
pkvm_hyp_vcpu_to_hyp_vm(struct pkvm_hyp_vcpu *hyp_vcpu)
{
@@ -86,4 +88,9 @@ struct pkvm_hyp_vm *get_pkvm_hyp_vm(pkvm_handle_t handle);
struct pkvm_hyp_vm *get_np_pkvm_hyp_vm(pkvm_handle_t handle);
void put_pkvm_hyp_vm(struct pkvm_hyp_vm *hyp_vm);
+bool kvm_handle_pvm_sysreg(struct kvm_vcpu *vcpu, u64 *exit_code);
+bool kvm_handle_pvm_restricted(struct kvm_vcpu *vcpu, u64 *exit_code);
+void kvm_init_pvm_id_regs(struct kvm_vcpu *vcpu);
+int kvm_check_pvm_sysreg_table(void);
+
#endif /* __ARM64_KVM_NVHE_PKVM_H__ */
diff --git a/arch/arm64/kvm/hyp/nvhe/debug-sr.c b/arch/arm64/kvm/hyp/nvhe/debug-sr.c
index 53efda0235cf..858bb38e273f 100644
--- a/arch/arm64/kvm/hyp/nvhe/debug-sr.c
+++ b/arch/arm64/kvm/hyp/nvhe/debug-sr.c
@@ -82,10 +82,10 @@ static void __debug_restore_trace(u64 trfcr_el1)
void __debug_save_host_buffers_nvhe(struct kvm_vcpu *vcpu)
{
/* Disable and flush SPE data generation */
- if (vcpu_get_flag(vcpu, DEBUG_STATE_SAVE_SPE))
+ if (host_data_test_flag(HAS_SPE))
__debug_save_spe(host_data_ptr(host_debug_state.pmscr_el1));
/* Disable and flush Self-Hosted Trace generation */
- if (vcpu_get_flag(vcpu, DEBUG_STATE_SAVE_TRBE))
+ if (host_data_test_flag(HAS_TRBE))
__debug_save_trace(host_data_ptr(host_debug_state.trfcr_el1));
}
@@ -96,9 +96,9 @@ void __debug_switch_to_guest(struct kvm_vcpu *vcpu)
void __debug_restore_host_buffers_nvhe(struct kvm_vcpu *vcpu)
{
- if (vcpu_get_flag(vcpu, DEBUG_STATE_SAVE_SPE))
+ if (host_data_test_flag(HAS_SPE))
__debug_restore_spe(*host_data_ptr(host_debug_state.pmscr_el1));
- if (vcpu_get_flag(vcpu, DEBUG_STATE_SAVE_TRBE))
+ if (host_data_test_flag(HAS_TRBE))
__debug_restore_trace(*host_data_ptr(host_debug_state.trfcr_el1));
}
@@ -106,8 +106,3 @@ void __debug_switch_to_host(struct kvm_vcpu *vcpu)
{
__debug_switch_to_host_common(vcpu);
}
-
-u64 __kvm_get_mdcr_el2(void)
-{
- return read_sysreg(mdcr_el2);
-}
diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-main.c b/arch/arm64/kvm/hyp/nvhe/hyp-main.c
index 258d572eed62..5c134520e180 100644
--- a/arch/arm64/kvm/hyp/nvhe/hyp-main.c
+++ b/arch/arm64/kvm/hyp/nvhe/hyp-main.c
@@ -68,7 +68,7 @@ static void fpsimd_sve_sync(struct kvm_vcpu *vcpu)
if (!guest_owns_fp_regs())
return;
- cpacr_clear_set(0, CPACR_ELx_FPEN | CPACR_ELx_ZEN);
+ cpacr_clear_set(0, CPACR_EL1_FPEN | CPACR_EL1_ZEN);
isb();
if (vcpu_has_sve(vcpu))
@@ -110,8 +110,6 @@ static void flush_hyp_vcpu(struct pkvm_hyp_vcpu *hyp_vcpu)
hyp_vcpu->vcpu.arch.iflags = host_vcpu->arch.iflags;
- hyp_vcpu->vcpu.arch.debug_ptr = kern_hyp_va(host_vcpu->arch.debug_ptr);
-
hyp_vcpu->vcpu.arch.vsesr_el2 = host_vcpu->arch.vsesr_el2;
hyp_vcpu->vcpu.arch.vgic_cpu.vgic_v3 = host_vcpu->arch.vgic_cpu.vgic_v3;
@@ -434,11 +432,6 @@ static void handle___vgic_v3_init_lrs(struct kvm_cpu_context *host_ctxt)
__vgic_v3_init_lrs();
}
-static void handle___kvm_get_mdcr_el2(struct kvm_cpu_context *host_ctxt)
-{
- cpu_reg(host_ctxt, 1) = __kvm_get_mdcr_el2();
-}
-
static void handle___vgic_v3_save_vmcr_aprs(struct kvm_cpu_context *host_ctxt)
{
DECLARE_REG(struct vgic_v3_cpu_if *, cpu_if, host_ctxt, 1);
@@ -554,7 +547,6 @@ typedef void (*hcall_t)(struct kvm_cpu_context *);
static const hcall_t host_hcall[] = {
/* ___kvm_hyp_init */
- HANDLE_FUNC(__kvm_get_mdcr_el2),
HANDLE_FUNC(__pkvm_init),
HANDLE_FUNC(__pkvm_create_private_mapping),
HANDLE_FUNC(__pkvm_cpu_set_vector),
@@ -660,7 +652,7 @@ void handle_trap(struct kvm_cpu_context *host_ctxt)
handle_host_smc(host_ctxt);
break;
case ESR_ELx_EC_SVE:
- cpacr_clear_set(0, CPACR_ELx_ZEN);
+ cpacr_clear_set(0, CPACR_EL1_ZEN);
isb();
sve_cond_update_zcr_vq(sve_vq_from_vl(kvm_host_sve_max_vl) - 1,
SYS_ZCR_EL2);
diff --git a/arch/arm64/kvm/hyp/nvhe/pkvm.c b/arch/arm64/kvm/hyp/nvhe/pkvm.c
index 2c618f2f2769..3927fe52a3dd 100644
--- a/arch/arm64/kvm/hyp/nvhe/pkvm.c
+++ b/arch/arm64/kvm/hyp/nvhe/pkvm.c
@@ -9,7 +9,6 @@
#include <asm/kvm_emulate.h>
-#include <nvhe/fixed_config.h>
#include <nvhe/mem_protect.h>
#include <nvhe/memory.h>
#include <nvhe/pkvm.h>
@@ -29,233 +28,155 @@ unsigned int kvm_host_sve_max_vl;
*/
static DEFINE_PER_CPU(struct pkvm_hyp_vcpu *, loaded_hyp_vcpu);
-/*
- * Set trap register values based on features in ID_AA64PFR0.
- */
-static void pvm_init_traps_aa64pfr0(struct kvm_vcpu *vcpu)
+static void pkvm_vcpu_reset_hcr(struct kvm_vcpu *vcpu)
{
- const u64 feature_ids = pvm_read_id_reg(vcpu, SYS_ID_AA64PFR0_EL1);
- u64 hcr_set = HCR_RW;
- u64 hcr_clear = 0;
- u64 cptr_set = 0;
- u64 cptr_clear = 0;
-
- /* Protected KVM does not support AArch32 guests. */
- BUILD_BUG_ON(FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_EL0),
- PVM_ID_AA64PFR0_RESTRICT_UNSIGNED) != ID_AA64PFR0_EL1_EL0_IMP);
- BUILD_BUG_ON(FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_EL1),
- PVM_ID_AA64PFR0_RESTRICT_UNSIGNED) != ID_AA64PFR0_EL1_EL1_IMP);
-
- /*
- * Linux guests assume support for floating-point and Advanced SIMD. Do
- * not change the trapping behavior for these from the KVM default.
- */
- BUILD_BUG_ON(!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_FP),
- PVM_ID_AA64PFR0_ALLOW));
- BUILD_BUG_ON(!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_AdvSIMD),
- PVM_ID_AA64PFR0_ALLOW));
+ vcpu->arch.hcr_el2 = HCR_GUEST_FLAGS;
if (has_hvhe())
- hcr_set |= HCR_E2H;
+ vcpu->arch.hcr_el2 |= HCR_E2H;
- /* Trap RAS unless all current versions are supported */
- if (FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_RAS), feature_ids) <
- ID_AA64PFR0_EL1_RAS_V1P1) {
- hcr_set |= HCR_TERR | HCR_TEA;
- hcr_clear |= HCR_FIEN;
+ if (cpus_have_final_cap(ARM64_HAS_RAS_EXTN)) {
+ /* route synchronous external abort exceptions to EL2 */
+ vcpu->arch.hcr_el2 |= HCR_TEA;
+ /* trap error record accesses */
+ vcpu->arch.hcr_el2 |= HCR_TERR;
}
- /* Trap AMU */
- if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_AMU), feature_ids)) {
- hcr_clear |= HCR_AMVOFFEN;
- cptr_set |= CPTR_EL2_TAM;
- }
+ if (cpus_have_final_cap(ARM64_HAS_STAGE2_FWB))
+ vcpu->arch.hcr_el2 |= HCR_FWB;
- /* Trap SVE */
- if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_SVE), feature_ids)) {
- if (has_hvhe())
- cptr_clear |= CPACR_ELx_ZEN;
- else
- cptr_set |= CPTR_EL2_TZ;
- }
+ if (cpus_have_final_cap(ARM64_HAS_EVT) &&
+ !cpus_have_final_cap(ARM64_MISMATCHED_CACHE_TYPE))
+ vcpu->arch.hcr_el2 |= HCR_TID4;
+ else
+ vcpu->arch.hcr_el2 |= HCR_TID2;
- vcpu->arch.hcr_el2 |= hcr_set;
- vcpu->arch.hcr_el2 &= ~hcr_clear;
- vcpu->arch.cptr_el2 |= cptr_set;
- vcpu->arch.cptr_el2 &= ~cptr_clear;
+ if (vcpu_has_ptrauth(vcpu))
+ vcpu->arch.hcr_el2 |= (HCR_API | HCR_APK);
+
+ if (kvm_has_mte(vcpu->kvm))
+ vcpu->arch.hcr_el2 |= HCR_ATA;
}
-/*
- * Set trap register values based on features in ID_AA64PFR1.
- */
-static void pvm_init_traps_aa64pfr1(struct kvm_vcpu *vcpu)
+static void pvm_init_traps_hcr(struct kvm_vcpu *vcpu)
{
- const u64 feature_ids = pvm_read_id_reg(vcpu, SYS_ID_AA64PFR1_EL1);
- u64 hcr_set = 0;
- u64 hcr_clear = 0;
+ struct kvm *kvm = vcpu->kvm;
+ u64 val = vcpu->arch.hcr_el2;
- /* Memory Tagging: Trap and Treat as Untagged if not supported. */
- if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_MTE), feature_ids)) {
- hcr_set |= HCR_TID5;
- hcr_clear |= HCR_DCT | HCR_ATA;
+ /* No support for AArch32. */
+ val |= HCR_RW;
+
+ /*
+ * Always trap:
+ * - Feature id registers: to control features exposed to guests
+ * - Implementation-defined features
+ */
+ val |= HCR_TACR | HCR_TIDCP | HCR_TID3 | HCR_TID1;
+
+ if (!kvm_has_feat(kvm, ID_AA64PFR0_EL1, RAS, IMP)) {
+ val |= HCR_TERR | HCR_TEA;
+ val &= ~(HCR_FIEN);
}
- vcpu->arch.hcr_el2 |= hcr_set;
- vcpu->arch.hcr_el2 &= ~hcr_clear;
-}
+ if (!kvm_has_feat(kvm, ID_AA64PFR0_EL1, AMU, IMP))
+ val &= ~(HCR_AMVOFFEN);
-/*
- * Set trap register values based on features in ID_AA64DFR0.
- */
-static void pvm_init_traps_aa64dfr0(struct kvm_vcpu *vcpu)
-{
- const u64 feature_ids = pvm_read_id_reg(vcpu, SYS_ID_AA64DFR0_EL1);
- u64 mdcr_set = 0;
- u64 mdcr_clear = 0;
- u64 cptr_set = 0;
-
- /* Trap/constrain PMU */
- if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_PMUVer), feature_ids)) {
- mdcr_set |= MDCR_EL2_TPM | MDCR_EL2_TPMCR;
- mdcr_clear |= MDCR_EL2_HPME | MDCR_EL2_MTPME |
- MDCR_EL2_HPMN_MASK;
+ if (!kvm_has_feat(kvm, ID_AA64PFR1_EL1, MTE, IMP)) {
+ val |= HCR_TID5;
+ val &= ~(HCR_DCT | HCR_ATA);
}
- /* Trap Debug */
- if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_DebugVer), feature_ids))
- mdcr_set |= MDCR_EL2_TDRA | MDCR_EL2_TDA | MDCR_EL2_TDE;
+ if (!kvm_has_feat(kvm, ID_AA64MMFR1_EL1, LO, IMP))
+ val |= HCR_TLOR;
+
+ vcpu->arch.hcr_el2 = val;
+}
- /* Trap OS Double Lock */
- if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_DoubleLock), feature_ids))
- mdcr_set |= MDCR_EL2_TDOSA;
+static void pvm_init_traps_mdcr(struct kvm_vcpu *vcpu)
+{
+ struct kvm *kvm = vcpu->kvm;
+ u64 val = vcpu->arch.mdcr_el2;
- /* Trap SPE */
- if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_PMSVer), feature_ids)) {
- mdcr_set |= MDCR_EL2_TPMS;
- mdcr_clear |= MDCR_EL2_E2PB_MASK;
+ if (!kvm_has_feat(kvm, ID_AA64DFR0_EL1, PMUVer, IMP)) {
+ val |= MDCR_EL2_TPM | MDCR_EL2_TPMCR;
+ val &= ~(MDCR_EL2_HPME | MDCR_EL2_MTPME | MDCR_EL2_HPMN_MASK);
}
- /* Trap Trace Filter */
- if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_TraceFilt), feature_ids))
- mdcr_set |= MDCR_EL2_TTRF;
+ if (!kvm_has_feat(kvm, ID_AA64DFR0_EL1, DebugVer, IMP))
+ val |= MDCR_EL2_TDRA | MDCR_EL2_TDA;
+
+ if (!kvm_has_feat(kvm, ID_AA64DFR0_EL1, DoubleLock, IMP))
+ val |= MDCR_EL2_TDOSA;
- /* Trap Trace */
- if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_TraceVer), feature_ids)) {
- if (has_hvhe())
- cptr_set |= CPACR_EL1_TTA;
- else
- cptr_set |= CPTR_EL2_TTA;
+ if (!kvm_has_feat(kvm, ID_AA64DFR0_EL1, PMSVer, IMP)) {
+ val |= MDCR_EL2_TPMS;
+ val &= ~MDCR_EL2_E2PB_MASK;
}
- /* Trap External Trace */
- if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_ExtTrcBuff), feature_ids))
- mdcr_clear |= MDCR_EL2_E2TB_MASK;
+ if (!kvm_has_feat(kvm, ID_AA64DFR0_EL1, TraceFilt, IMP))
+ val |= MDCR_EL2_TTRF;
- vcpu->arch.mdcr_el2 |= mdcr_set;
- vcpu->arch.mdcr_el2 &= ~mdcr_clear;
- vcpu->arch.cptr_el2 |= cptr_set;
-}
-
-/*
- * Set trap register values based on features in ID_AA64MMFR0.
- */
-static void pvm_init_traps_aa64mmfr0(struct kvm_vcpu *vcpu)
-{
- const u64 feature_ids = pvm_read_id_reg(vcpu, SYS_ID_AA64MMFR0_EL1);
- u64 mdcr_set = 0;
+ if (!kvm_has_feat(kvm, ID_AA64DFR0_EL1, ExtTrcBuff, IMP))
+ val |= MDCR_EL2_E2TB_MASK;
/* Trap Debug Communications Channel registers */
- if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64MMFR0_EL1_FGT), feature_ids))
- mdcr_set |= MDCR_EL2_TDCC;
+ if (!kvm_has_feat(kvm, ID_AA64MMFR0_EL1, FGT, IMP))
+ val |= MDCR_EL2_TDCC;
- vcpu->arch.mdcr_el2 |= mdcr_set;
+ vcpu->arch.mdcr_el2 = val;
}
/*
- * Set trap register values based on features in ID_AA64MMFR1.
+ * Check that cpu features that are neither trapped nor supported are not
+ * enabled for protected VMs.
*/
-static void pvm_init_traps_aa64mmfr1(struct kvm_vcpu *vcpu)
+static int pkvm_check_pvm_cpu_features(struct kvm_vcpu *vcpu)
{
- const u64 feature_ids = pvm_read_id_reg(vcpu, SYS_ID_AA64MMFR1_EL1);
- u64 hcr_set = 0;
-
- /* Trap LOR */
- if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64MMFR1_EL1_LO), feature_ids))
- hcr_set |= HCR_TLOR;
+ struct kvm *kvm = vcpu->kvm;
- vcpu->arch.hcr_el2 |= hcr_set;
-}
-
-/*
- * Set baseline trap register values.
- */
-static void pvm_init_trap_regs(struct kvm_vcpu *vcpu)
-{
- const u64 hcr_trap_feat_regs = HCR_TID3;
- const u64 hcr_trap_impdef = HCR_TACR | HCR_TIDCP | HCR_TID1;
+ /* Protected KVM does not support AArch32 guests. */
+ if (kvm_has_feat(kvm, ID_AA64PFR0_EL1, EL0, AARCH32) ||
+ kvm_has_feat(kvm, ID_AA64PFR0_EL1, EL1, AARCH32))
+ return -EINVAL;
/*
- * Always trap:
- * - Feature id registers: to control features exposed to guests
- * - Implementation-defined features
+ * Linux guests assume support for floating-point and Advanced SIMD. Do
+ * not change the trapping behavior for these from the KVM default.
*/
- vcpu->arch.hcr_el2 |= hcr_trap_feat_regs | hcr_trap_impdef;
-
- /* Clear res0 and set res1 bits to trap potential new features. */
- vcpu->arch.hcr_el2 &= ~(HCR_RES0);
- vcpu->arch.mdcr_el2 &= ~(MDCR_EL2_RES0);
- if (!has_hvhe()) {
- vcpu->arch.cptr_el2 |= CPTR_NVHE_EL2_RES1;
- vcpu->arch.cptr_el2 &= ~(CPTR_NVHE_EL2_RES0);
- }
-}
-
-static void pkvm_vcpu_reset_hcr(struct kvm_vcpu *vcpu)
-{
- vcpu->arch.hcr_el2 = HCR_GUEST_FLAGS;
-
- if (has_hvhe())
- vcpu->arch.hcr_el2 |= HCR_E2H;
-
- if (cpus_have_final_cap(ARM64_HAS_RAS_EXTN)) {
- /* route synchronous external abort exceptions to EL2 */
- vcpu->arch.hcr_el2 |= HCR_TEA;
- /* trap error record accesses */
- vcpu->arch.hcr_el2 |= HCR_TERR;
- }
-
- if (cpus_have_final_cap(ARM64_HAS_STAGE2_FWB))
- vcpu->arch.hcr_el2 |= HCR_FWB;
+ if (!kvm_has_feat(kvm, ID_AA64PFR0_EL1, FP, IMP) ||
+ !kvm_has_feat(kvm, ID_AA64PFR0_EL1, AdvSIMD, IMP))
+ return -EINVAL;
- if (cpus_have_final_cap(ARM64_HAS_EVT) &&
- !cpus_have_final_cap(ARM64_MISMATCHED_CACHE_TYPE))
- vcpu->arch.hcr_el2 |= HCR_TID4;
- else
- vcpu->arch.hcr_el2 |= HCR_TID2;
+ /* No SME support in KVM right now. Check to catch if it changes. */
+ if (kvm_has_feat(kvm, ID_AA64PFR1_EL1, SME, IMP))
+ return -EINVAL;
- if (vcpu_has_ptrauth(vcpu))
- vcpu->arch.hcr_el2 |= (HCR_API | HCR_APK);
+ return 0;
}
/*
* Initialize trap register values in protected mode.
*/
-static void pkvm_vcpu_init_traps(struct kvm_vcpu *vcpu)
+static int pkvm_vcpu_init_traps(struct pkvm_hyp_vcpu *hyp_vcpu)
{
- vcpu->arch.cptr_el2 = kvm_get_reset_cptr_el2(vcpu);
+ struct kvm_vcpu *vcpu = &hyp_vcpu->vcpu;
+ int ret;
+
vcpu->arch.mdcr_el2 = 0;
pkvm_vcpu_reset_hcr(vcpu);
- if ((!vcpu_is_protected(vcpu)))
- return;
+ if ((!pkvm_hyp_vcpu_is_protected(hyp_vcpu)))
+ return 0;
+
+ ret = pkvm_check_pvm_cpu_features(vcpu);
+ if (ret)
+ return ret;
- pvm_init_trap_regs(vcpu);
- pvm_init_traps_aa64pfr0(vcpu);
- pvm_init_traps_aa64pfr1(vcpu);
- pvm_init_traps_aa64dfr0(vcpu);
- pvm_init_traps_aa64mmfr0(vcpu);
- pvm_init_traps_aa64mmfr1(vcpu);
+ pvm_init_traps_hcr(vcpu);
+ pvm_init_traps_mdcr(vcpu);
+
+ return 0;
}
/*
@@ -276,10 +197,10 @@ static pkvm_handle_t idx_to_vm_handle(unsigned int idx)
/*
* Spinlock for protecting state related to the VM table. Protects writes
- * to 'vm_table' and 'nr_table_entries' as well as reads and writes to
- * 'last_hyp_vcpu_lookup'.
+ * to 'vm_table', 'nr_table_entries', and other per-vm state on initialization.
+ * Also protects reads and writes to 'last_hyp_vcpu_lookup'.
*/
-static DEFINE_HYP_SPINLOCK(vm_table_lock);
+DEFINE_HYP_SPINLOCK(vm_table_lock);
/*
* The table of VM entries for protected VMs in hyp.
@@ -391,10 +312,16 @@ struct pkvm_hyp_vm *get_np_pkvm_hyp_vm(pkvm_handle_t handle)
static void pkvm_init_features_from_host(struct pkvm_hyp_vm *hyp_vm, const struct kvm *host_kvm)
{
struct kvm *kvm = &hyp_vm->kvm;
+ unsigned long host_arch_flags = READ_ONCE(host_kvm->arch.flags);
DECLARE_BITMAP(allowed_features, KVM_VCPU_MAX_FEATURES);
+ if (test_bit(KVM_ARCH_FLAG_MTE_ENABLED, &host_kvm->arch.flags))
+ set_bit(KVM_ARCH_FLAG_MTE_ENABLED, &kvm->arch.flags);
+
/* No restrictions for non-protected VMs. */
if (!kvm_vm_is_protected(kvm)) {
+ hyp_vm->kvm.arch.flags = host_arch_flags;
+
bitmap_copy(kvm->arch.vcpu_features,
host_kvm->arch.vcpu_features,
KVM_VCPU_MAX_FEATURES);
@@ -403,50 +330,26 @@ static void pkvm_init_features_from_host(struct pkvm_hyp_vm *hyp_vm, const struc
bitmap_zero(allowed_features, KVM_VCPU_MAX_FEATURES);
- /*
- * For protected VMs, always allow:
- * - CPU starting in poweroff state
- * - PSCI v0.2
- */
- set_bit(KVM_ARM_VCPU_POWER_OFF, allowed_features);
set_bit(KVM_ARM_VCPU_PSCI_0_2, allowed_features);
- /*
- * Check if remaining features are allowed:
- * - Performance Monitoring
- * - Scalable Vectors
- * - Pointer Authentication
- */
- if (FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_PMUVer), PVM_ID_AA64DFR0_ALLOW))
+ if (kvm_pvm_ext_allowed(KVM_CAP_ARM_PMU_V3))
set_bit(KVM_ARM_VCPU_PMU_V3, allowed_features);
- if (FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_SVE), PVM_ID_AA64PFR0_ALLOW))
- set_bit(KVM_ARM_VCPU_SVE, allowed_features);
-
- if (FIELD_GET(ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_API), PVM_ID_AA64ISAR1_RESTRICT_UNSIGNED) &&
- FIELD_GET(ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_APA), PVM_ID_AA64ISAR1_RESTRICT_UNSIGNED))
+ if (kvm_pvm_ext_allowed(KVM_CAP_ARM_PTRAUTH_ADDRESS))
set_bit(KVM_ARM_VCPU_PTRAUTH_ADDRESS, allowed_features);
- if (FIELD_GET(ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_GPI), PVM_ID_AA64ISAR1_ALLOW) &&
- FIELD_GET(ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_GPA), PVM_ID_AA64ISAR1_ALLOW))
+ if (kvm_pvm_ext_allowed(KVM_CAP_ARM_PTRAUTH_GENERIC))
set_bit(KVM_ARM_VCPU_PTRAUTH_GENERIC, allowed_features);
+ if (kvm_pvm_ext_allowed(KVM_CAP_ARM_SVE)) {
+ set_bit(KVM_ARM_VCPU_SVE, allowed_features);
+ kvm->arch.flags |= host_arch_flags & BIT(KVM_ARCH_FLAG_GUEST_HAS_SVE);
+ }
+
bitmap_and(kvm->arch.vcpu_features, host_kvm->arch.vcpu_features,
allowed_features, KVM_VCPU_MAX_FEATURES);
}
-static void pkvm_vcpu_init_ptrauth(struct pkvm_hyp_vcpu *hyp_vcpu)
-{
- struct kvm_vcpu *vcpu = &hyp_vcpu->vcpu;
-
- if (vcpu_has_feature(vcpu, KVM_ARM_VCPU_PTRAUTH_ADDRESS) ||
- vcpu_has_feature(vcpu, KVM_ARM_VCPU_PTRAUTH_GENERIC)) {
- kvm_vcpu_enable_ptrauth(vcpu);
- } else {
- vcpu_clear_flag(&hyp_vcpu->vcpu, GUEST_HAS_PTRAUTH);
- }
-}
-
static void unpin_host_vcpu(struct kvm_vcpu *host_vcpu)
{
if (host_vcpu)
@@ -469,6 +372,7 @@ static void init_pkvm_hyp_vm(struct kvm *host_kvm, struct pkvm_hyp_vm *hyp_vm,
hyp_vm->kvm.created_vcpus = nr_vcpus;
hyp_vm->kvm.arch.mmu.vtcr = host_mmu.arch.mmu.vtcr;
hyp_vm->kvm.arch.pkvm.enabled = READ_ONCE(host_kvm->arch.pkvm.enabled);
+ hyp_vm->kvm.arch.flags = 0;
pkvm_init_features_from_host(hyp_vm, host_kvm);
}
@@ -476,10 +380,8 @@ static void pkvm_vcpu_init_sve(struct pkvm_hyp_vcpu *hyp_vcpu, struct kvm_vcpu *
{
struct kvm_vcpu *vcpu = &hyp_vcpu->vcpu;
- if (!vcpu_has_feature(vcpu, KVM_ARM_VCPU_SVE)) {
- vcpu_clear_flag(vcpu, GUEST_HAS_SVE);
+ if (!vcpu_has_feature(vcpu, KVM_ARM_VCPU_SVE))
vcpu_clear_flag(vcpu, VCPU_SVE_FINALIZED);
- }
}
static int init_pkvm_hyp_vcpu(struct pkvm_hyp_vcpu *hyp_vcpu,
@@ -507,9 +409,14 @@ static int init_pkvm_hyp_vcpu(struct pkvm_hyp_vcpu *hyp_vcpu,
hyp_vcpu->vcpu.arch.cflags = READ_ONCE(host_vcpu->arch.cflags);
hyp_vcpu->vcpu.arch.mp_state.mp_state = KVM_MP_STATE_STOPPED;
+ if (pkvm_hyp_vcpu_is_protected(hyp_vcpu))
+ kvm_init_pvm_id_regs(&hyp_vcpu->vcpu);
+
+ ret = pkvm_vcpu_init_traps(hyp_vcpu);
+ if (ret)
+ goto done;
+
pkvm_vcpu_init_sve(hyp_vcpu, host_vcpu);
- pkvm_vcpu_init_ptrauth(hyp_vcpu);
- pkvm_vcpu_init_traps(&hyp_vcpu->vcpu);
done:
if (ret)
unpin_host_vcpu(host_vcpu);
@@ -754,8 +661,6 @@ unlock:
return ret;
}
- hyp_vcpu->vcpu.arch.cptr_el2 = kvm_get_reset_cptr_el2(&hyp_vcpu->vcpu);
-
return 0;
}
diff --git a/arch/arm64/kvm/hyp/nvhe/setup.c b/arch/arm64/kvm/hyp/nvhe/setup.c
index 7e04d1c2a03d..d62bcb5634a2 100644
--- a/arch/arm64/kvm/hyp/nvhe/setup.c
+++ b/arch/arm64/kvm/hyp/nvhe/setup.c
@@ -12,7 +12,6 @@
#include <nvhe/early_alloc.h>
#include <nvhe/ffa.h>
-#include <nvhe/fixed_config.h>
#include <nvhe/gfp.h>
#include <nvhe/memory.h>
#include <nvhe/mem_protect.h>
diff --git a/arch/arm64/kvm/hyp/nvhe/switch.c b/arch/arm64/kvm/hyp/nvhe/switch.c
index cc69106734ca..6c846d033d24 100644
--- a/arch/arm64/kvm/hyp/nvhe/switch.c
+++ b/arch/arm64/kvm/hyp/nvhe/switch.c
@@ -26,7 +26,6 @@
#include <asm/debug-monitors.h>
#include <asm/processor.h>
-#include <nvhe/fixed_config.h>
#include <nvhe/mem_protect.h>
/* Non-VHE specific context */
@@ -36,33 +35,46 @@ DEFINE_PER_CPU(unsigned long, kvm_hyp_vector);
extern void kvm_nvhe_prepare_backtrace(unsigned long fp, unsigned long pc);
-static void __activate_traps(struct kvm_vcpu *vcpu)
+static void __activate_cptr_traps(struct kvm_vcpu *vcpu)
{
- u64 val;
+ u64 val = CPTR_EL2_TAM; /* Same bit irrespective of E2H */
- ___activate_traps(vcpu, vcpu->arch.hcr_el2);
- __activate_traps_common(vcpu);
+ if (has_hvhe()) {
+ val |= CPACR_EL1_TTA;
- val = vcpu->arch.cptr_el2;
- val |= CPTR_EL2_TAM; /* Same bit irrespective of E2H */
- val |= has_hvhe() ? CPACR_EL1_TTA : CPTR_EL2_TTA;
- if (cpus_have_final_cap(ARM64_SME)) {
- if (has_hvhe())
- val &= ~CPACR_ELx_SMEN;
- else
- val |= CPTR_EL2_TSM;
- }
+ if (guest_owns_fp_regs()) {
+ val |= CPACR_EL1_FPEN;
+ if (vcpu_has_sve(vcpu))
+ val |= CPACR_EL1_ZEN;
+ }
+ } else {
+ val |= CPTR_EL2_TTA | CPTR_NVHE_EL2_RES1;
- if (!guest_owns_fp_regs()) {
- if (has_hvhe())
- val &= ~(CPACR_ELx_FPEN | CPACR_ELx_ZEN);
- else
- val |= CPTR_EL2_TFP | CPTR_EL2_TZ;
+ /*
+ * Always trap SME since it's not supported in KVM.
+ * TSM is RES1 if SME isn't implemented.
+ */
+ val |= CPTR_EL2_TSM;
- __activate_traps_fpsimd32(vcpu);
+ if (!vcpu_has_sve(vcpu) || !guest_owns_fp_regs())
+ val |= CPTR_EL2_TZ;
+
+ if (!guest_owns_fp_regs())
+ val |= CPTR_EL2_TFP;
}
+ if (!guest_owns_fp_regs())
+ __activate_traps_fpsimd32(vcpu);
+
kvm_write_cptr_el2(val);
+}
+
+static void __activate_traps(struct kvm_vcpu *vcpu)
+{
+ ___activate_traps(vcpu, vcpu->arch.hcr_el2);
+ __activate_traps_common(vcpu);
+ __activate_cptr_traps(vcpu);
+
write_sysreg(__this_cpu_read(kvm_hyp_vector), vbar_el2);
if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT)) {
@@ -192,7 +204,7 @@ static void kvm_hyp_save_fpsimd_host(struct kvm_vcpu *vcpu)
/* Re-enable SVE traps if not supported for the guest vcpu. */
if (!vcpu_has_sve(vcpu))
- cpacr_clear_set(CPACR_ELx_ZEN, 0);
+ cpacr_clear_set(CPACR_EL1_ZEN, 0);
} else {
__fpsimd_save_state(*host_data_ptr(fpsimd_state));
diff --git a/arch/arm64/kvm/hyp/nvhe/sys_regs.c b/arch/arm64/kvm/hyp/nvhe/sys_regs.c
index 2860548d4250..1ddd9ed3cbb3 100644
--- a/arch/arm64/kvm/hyp/nvhe/sys_regs.c
+++ b/arch/arm64/kvm/hyp/nvhe/sys_regs.c
@@ -11,7 +11,7 @@
#include <hyp/adjust_pc.h>
-#include <nvhe/fixed_config.h>
+#include <nvhe/pkvm.h>
#include "../../sys_regs.h"
@@ -28,222 +28,255 @@ u64 id_aa64mmfr1_el1_sys_val;
u64 id_aa64mmfr2_el1_sys_val;
u64 id_aa64smfr0_el1_sys_val;
-/*
- * Inject an unknown/undefined exception to an AArch64 guest while most of its
- * sysregs are live.
- */
-static void inject_undef64(struct kvm_vcpu *vcpu)
-{
- u64 esr = (ESR_ELx_EC_UNKNOWN << ESR_ELx_EC_SHIFT);
-
- *vcpu_pc(vcpu) = read_sysreg_el2(SYS_ELR);
- *vcpu_cpsr(vcpu) = read_sysreg_el2(SYS_SPSR);
-
- kvm_pend_exception(vcpu, EXCEPT_AA64_EL1_SYNC);
-
- __kvm_adjust_pc(vcpu);
-
- write_sysreg_el1(esr, SYS_ESR);
- write_sysreg_el1(read_sysreg_el2(SYS_ELR), SYS_ELR);
- write_sysreg_el2(*vcpu_pc(vcpu), SYS_ELR);
- write_sysreg_el2(*vcpu_cpsr(vcpu), SYS_SPSR);
-}
-
-/*
- * Returns the restricted features values of the feature register based on the
- * limitations in restrict_fields.
- * A feature id field value of 0b0000 does not impose any restrictions.
- * Note: Use only for unsigned feature field values.
- */
-static u64 get_restricted_features_unsigned(u64 sys_reg_val,
- u64 restrict_fields)
-{
- u64 value = 0UL;
- u64 mask = GENMASK_ULL(ARM64_FEATURE_FIELD_BITS - 1, 0);
+struct pvm_ftr_bits {
+ bool sign;
+ u8 shift;
+ u8 width;
+ u8 max_val;
+ bool (*vm_supported)(const struct kvm *kvm);
+};
- /*
- * According to the Arm Architecture Reference Manual, feature fields
- * use increasing values to indicate increases in functionality.
- * Iterate over the restricted feature fields and calculate the minimum
- * unsigned value between the one supported by the system, and what the
- * value is being restricted to.
- */
- while (sys_reg_val && restrict_fields) {
- value |= min(sys_reg_val & mask, restrict_fields & mask);
- sys_reg_val &= ~mask;
- restrict_fields &= ~mask;
- mask <<= ARM64_FEATURE_FIELD_BITS;
+#define __MAX_FEAT_FUNC(id, fld, max, func, sgn) \
+ { \
+ .sign = sgn, \
+ .shift = id##_##fld##_SHIFT, \
+ .width = id##_##fld##_WIDTH, \
+ .max_val = id##_##fld##_##max, \
+ .vm_supported = func, \
}
- return value;
-}
-
-/*
- * Functions that return the value of feature id registers for protected VMs
- * based on allowed features, system features, and KVM support.
- */
+#define MAX_FEAT_FUNC(id, fld, max, func) \
+ __MAX_FEAT_FUNC(id, fld, max, func, id##_##fld##_SIGNED)
-static u64 get_pvm_id_aa64pfr0(const struct kvm_vcpu *vcpu)
-{
- u64 set_mask = 0;
- u64 allow_mask = PVM_ID_AA64PFR0_ALLOW;
+#define MAX_FEAT(id, fld, max) \
+ MAX_FEAT_FUNC(id, fld, max, NULL)
- set_mask |= get_restricted_features_unsigned(id_aa64pfr0_el1_sys_val,
- PVM_ID_AA64PFR0_RESTRICT_UNSIGNED);
+#define MAX_FEAT_ENUM(id, fld, max) \
+ __MAX_FEAT_FUNC(id, fld, max, NULL, false)
- return (id_aa64pfr0_el1_sys_val & allow_mask) | set_mask;
-}
+#define FEAT_END { .width = 0, }
-static u64 get_pvm_id_aa64pfr1(const struct kvm_vcpu *vcpu)
+static bool vm_has_ptrauth(const struct kvm *kvm)
{
- const struct kvm *kvm = (const struct kvm *)kern_hyp_va(vcpu->kvm);
- u64 allow_mask = PVM_ID_AA64PFR1_ALLOW;
-
- if (!kvm_has_mte(kvm))
- allow_mask &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_MTE);
-
- return id_aa64pfr1_el1_sys_val & allow_mask;
-}
-
-static u64 get_pvm_id_aa64zfr0(const struct kvm_vcpu *vcpu)
-{
- /*
- * No support for Scalable Vectors, therefore, hyp has no sanitized
- * copy of the feature id register.
- */
- BUILD_BUG_ON(PVM_ID_AA64ZFR0_ALLOW != 0ULL);
- return 0;
-}
-
-static u64 get_pvm_id_aa64dfr0(const struct kvm_vcpu *vcpu)
-{
- /*
- * No support for debug, including breakpoints, and watchpoints,
- * therefore, pKVM has no sanitized copy of the feature id register.
- */
- BUILD_BUG_ON(PVM_ID_AA64DFR0_ALLOW != 0ULL);
- return 0;
-}
-
-static u64 get_pvm_id_aa64dfr1(const struct kvm_vcpu *vcpu)
-{
- /*
- * No support for debug, therefore, hyp has no sanitized copy of the
- * feature id register.
- */
- BUILD_BUG_ON(PVM_ID_AA64DFR1_ALLOW != 0ULL);
- return 0;
-}
+ if (!IS_ENABLED(CONFIG_ARM64_PTR_AUTH))
+ return false;
-static u64 get_pvm_id_aa64afr0(const struct kvm_vcpu *vcpu)
-{
- /*
- * No support for implementation defined features, therefore, hyp has no
- * sanitized copy of the feature id register.
- */
- BUILD_BUG_ON(PVM_ID_AA64AFR0_ALLOW != 0ULL);
- return 0;
+ return (cpus_have_final_cap(ARM64_HAS_ADDRESS_AUTH) ||
+ cpus_have_final_cap(ARM64_HAS_GENERIC_AUTH)) &&
+ kvm_vcpu_has_feature(kvm, KVM_ARM_VCPU_PTRAUTH_GENERIC);
}
-static u64 get_pvm_id_aa64afr1(const struct kvm_vcpu *vcpu)
+static bool vm_has_sve(const struct kvm *kvm)
{
- /*
- * No support for implementation defined features, therefore, hyp has no
- * sanitized copy of the feature id register.
- */
- BUILD_BUG_ON(PVM_ID_AA64AFR1_ALLOW != 0ULL);
- return 0;
+ return system_supports_sve() && kvm_vcpu_has_feature(kvm, KVM_ARM_VCPU_SVE);
}
-static u64 get_pvm_id_aa64isar0(const struct kvm_vcpu *vcpu)
-{
- return id_aa64isar0_el1_sys_val & PVM_ID_AA64ISAR0_ALLOW;
-}
+/*
+ * Definitions for features to be allowed or restricted for protected guests.
+ *
+ * Each field in the masks represents the highest supported value for the
+ * feature. If a feature field is not present, it is not supported. Moreover,
+ * these are used to generate the guest's view of the feature registers.
+ *
+ * The approach for protected VMs is to at least support features that are:
+ * - Needed by common Linux distributions (e.g., floating point)
+ * - Trivial to support, e.g., supporting the feature does not introduce or
+ * require tracking of additional state in KVM
+ * - Cannot be trapped or prevent the guest from using anyway
+ */
-static u64 get_pvm_id_aa64isar1(const struct kvm_vcpu *vcpu)
-{
- u64 allow_mask = PVM_ID_AA64ISAR1_ALLOW;
+static const struct pvm_ftr_bits pvmid_aa64pfr0[] = {
+ MAX_FEAT(ID_AA64PFR0_EL1, EL0, IMP),
+ MAX_FEAT(ID_AA64PFR0_EL1, EL1, IMP),
+ MAX_FEAT(ID_AA64PFR0_EL1, EL2, IMP),
+ MAX_FEAT(ID_AA64PFR0_EL1, EL3, IMP),
+ MAX_FEAT(ID_AA64PFR0_EL1, FP, FP16),
+ MAX_FEAT(ID_AA64PFR0_EL1, AdvSIMD, FP16),
+ MAX_FEAT(ID_AA64PFR0_EL1, GIC, IMP),
+ MAX_FEAT_FUNC(ID_AA64PFR0_EL1, SVE, IMP, vm_has_sve),
+ MAX_FEAT(ID_AA64PFR0_EL1, RAS, IMP),
+ MAX_FEAT(ID_AA64PFR0_EL1, DIT, IMP),
+ MAX_FEAT(ID_AA64PFR0_EL1, CSV2, IMP),
+ MAX_FEAT(ID_AA64PFR0_EL1, CSV3, IMP),
+ FEAT_END
+};
- if (!vcpu_has_ptrauth(vcpu))
- allow_mask &= ~(ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_APA) |
- ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_API) |
- ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_GPA) |
- ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_GPI));
+static const struct pvm_ftr_bits pvmid_aa64pfr1[] = {
+ MAX_FEAT(ID_AA64PFR1_EL1, BT, IMP),
+ MAX_FEAT(ID_AA64PFR1_EL1, SSBS, SSBS2),
+ MAX_FEAT_ENUM(ID_AA64PFR1_EL1, MTE_frac, NI),
+ FEAT_END
+};
- return id_aa64isar1_el1_sys_val & allow_mask;
-}
+static const struct pvm_ftr_bits pvmid_aa64mmfr0[] = {
+ MAX_FEAT_ENUM(ID_AA64MMFR0_EL1, PARANGE, 40),
+ MAX_FEAT_ENUM(ID_AA64MMFR0_EL1, ASIDBITS, 16),
+ MAX_FEAT(ID_AA64MMFR0_EL1, BIGEND, IMP),
+ MAX_FEAT(ID_AA64MMFR0_EL1, SNSMEM, IMP),
+ MAX_FEAT(ID_AA64MMFR0_EL1, BIGENDEL0, IMP),
+ MAX_FEAT(ID_AA64MMFR0_EL1, EXS, IMP),
+ FEAT_END
+};
-static u64 get_pvm_id_aa64isar2(const struct kvm_vcpu *vcpu)
-{
- u64 allow_mask = PVM_ID_AA64ISAR2_ALLOW;
+static const struct pvm_ftr_bits pvmid_aa64mmfr1[] = {
+ MAX_FEAT(ID_AA64MMFR1_EL1, HAFDBS, DBM),
+ MAX_FEAT_ENUM(ID_AA64MMFR1_EL1, VMIDBits, 16),
+ MAX_FEAT(ID_AA64MMFR1_EL1, HPDS, HPDS2),
+ MAX_FEAT(ID_AA64MMFR1_EL1, PAN, PAN3),
+ MAX_FEAT(ID_AA64MMFR1_EL1, SpecSEI, IMP),
+ MAX_FEAT(ID_AA64MMFR1_EL1, ETS, IMP),
+ MAX_FEAT(ID_AA64MMFR1_EL1, CMOW, IMP),
+ FEAT_END
+};
- if (!vcpu_has_ptrauth(vcpu))
- allow_mask &= ~(ARM64_FEATURE_MASK(ID_AA64ISAR2_EL1_APA3) |
- ARM64_FEATURE_MASK(ID_AA64ISAR2_EL1_GPA3));
+static const struct pvm_ftr_bits pvmid_aa64mmfr2[] = {
+ MAX_FEAT(ID_AA64MMFR2_EL1, CnP, IMP),
+ MAX_FEAT(ID_AA64MMFR2_EL1, UAO, IMP),
+ MAX_FEAT(ID_AA64MMFR2_EL1, IESB, IMP),
+ MAX_FEAT(ID_AA64MMFR2_EL1, AT, IMP),
+ MAX_FEAT_ENUM(ID_AA64MMFR2_EL1, IDS, 0x18),
+ MAX_FEAT(ID_AA64MMFR2_EL1, TTL, IMP),
+ MAX_FEAT(ID_AA64MMFR2_EL1, BBM, 2),
+ MAX_FEAT(ID_AA64MMFR2_EL1, E0PD, IMP),
+ FEAT_END
+};
- return id_aa64isar2_el1_sys_val & allow_mask;
-}
+static const struct pvm_ftr_bits pvmid_aa64isar1[] = {
+ MAX_FEAT(ID_AA64ISAR1_EL1, DPB, DPB2),
+ MAX_FEAT_FUNC(ID_AA64ISAR1_EL1, APA, PAuth, vm_has_ptrauth),
+ MAX_FEAT_FUNC(ID_AA64ISAR1_EL1, API, PAuth, vm_has_ptrauth),
+ MAX_FEAT(ID_AA64ISAR1_EL1, JSCVT, IMP),
+ MAX_FEAT(ID_AA64ISAR1_EL1, FCMA, IMP),
+ MAX_FEAT(ID_AA64ISAR1_EL1, LRCPC, LRCPC3),
+ MAX_FEAT(ID_AA64ISAR1_EL1, GPA, IMP),
+ MAX_FEAT(ID_AA64ISAR1_EL1, GPI, IMP),
+ MAX_FEAT(ID_AA64ISAR1_EL1, FRINTTS, IMP),
+ MAX_FEAT(ID_AA64ISAR1_EL1, SB, IMP),
+ MAX_FEAT(ID_AA64ISAR1_EL1, SPECRES, COSP_RCTX),
+ MAX_FEAT(ID_AA64ISAR1_EL1, BF16, EBF16),
+ MAX_FEAT(ID_AA64ISAR1_EL1, DGH, IMP),
+ MAX_FEAT(ID_AA64ISAR1_EL1, I8MM, IMP),
+ FEAT_END
+};
-static u64 get_pvm_id_aa64mmfr0(const struct kvm_vcpu *vcpu)
-{
- u64 set_mask;
+static const struct pvm_ftr_bits pvmid_aa64isar2[] = {
+ MAX_FEAT_FUNC(ID_AA64ISAR2_EL1, GPA3, IMP, vm_has_ptrauth),
+ MAX_FEAT_FUNC(ID_AA64ISAR2_EL1, APA3, PAuth, vm_has_ptrauth),
+ MAX_FEAT(ID_AA64ISAR2_EL1, ATS1A, IMP),
+ FEAT_END
+};
- set_mask = get_restricted_features_unsigned(id_aa64mmfr0_el1_sys_val,
- PVM_ID_AA64MMFR0_RESTRICT_UNSIGNED);
+/*
+ * None of the features in ID_AA64DFR0_EL1 nor ID_AA64MMFR4_EL1 are supported.
+ * However, both have Not-Implemented values that are non-zero. Define them
+ * so they can be used when getting the value of these registers.
+ */
+#define ID_AA64DFR0_EL1_NONZERO_NI \
+( \
+ SYS_FIELD_PREP_ENUM(ID_AA64DFR0_EL1, DoubleLock, NI) | \
+ SYS_FIELD_PREP_ENUM(ID_AA64DFR0_EL1, MTPMU, NI) \
+)
- return (id_aa64mmfr0_el1_sys_val & PVM_ID_AA64MMFR0_ALLOW) | set_mask;
-}
+#define ID_AA64MMFR4_EL1_NONZERO_NI \
+ SYS_FIELD_PREP_ENUM(ID_AA64MMFR4_EL1, E2H0, NI)
-static u64 get_pvm_id_aa64mmfr1(const struct kvm_vcpu *vcpu)
+/*
+ * Returns the value of the feature registers based on the system register
+ * value, the vcpu support for the revelant features, and the additional
+ * restrictions for protected VMs.
+ */
+static u64 get_restricted_features(const struct kvm_vcpu *vcpu,
+ u64 sys_reg_val,
+ const struct pvm_ftr_bits restrictions[])
{
- return id_aa64mmfr1_el1_sys_val & PVM_ID_AA64MMFR1_ALLOW;
-}
+ u64 val = 0UL;
+ int i;
+
+ for (i = 0; restrictions[i].width != 0; i++) {
+ bool (*vm_supported)(const struct kvm *) = restrictions[i].vm_supported;
+ bool sign = restrictions[i].sign;
+ int shift = restrictions[i].shift;
+ int width = restrictions[i].width;
+ u64 min_signed = (1UL << width) - 1UL;
+ u64 sign_bit = 1UL << (width - 1);
+ u64 mask = GENMASK_ULL(width + shift - 1, shift);
+ u64 sys_val = (sys_reg_val & mask) >> shift;
+ u64 pvm_max = restrictions[i].max_val;
+
+ if (vm_supported && !vm_supported(vcpu->kvm))
+ val |= (sign ? min_signed : 0) << shift;
+ else if (sign && (sys_val >= sign_bit || pvm_max >= sign_bit))
+ val |= max(sys_val, pvm_max) << shift;
+ else
+ val |= min(sys_val, pvm_max) << shift;
+ }
-static u64 get_pvm_id_aa64mmfr2(const struct kvm_vcpu *vcpu)
-{
- return id_aa64mmfr2_el1_sys_val & PVM_ID_AA64MMFR2_ALLOW;
+ return val;
}
-/* Read a sanitized cpufeature ID register by its encoding */
-u64 pvm_read_id_reg(const struct kvm_vcpu *vcpu, u32 id)
+static u64 pvm_calc_id_reg(const struct kvm_vcpu *vcpu, u32 id)
{
switch (id) {
case SYS_ID_AA64PFR0_EL1:
- return get_pvm_id_aa64pfr0(vcpu);
+ return get_restricted_features(vcpu, id_aa64pfr0_el1_sys_val, pvmid_aa64pfr0);
case SYS_ID_AA64PFR1_EL1:
- return get_pvm_id_aa64pfr1(vcpu);
- case SYS_ID_AA64ZFR0_EL1:
- return get_pvm_id_aa64zfr0(vcpu);
- case SYS_ID_AA64DFR0_EL1:
- return get_pvm_id_aa64dfr0(vcpu);
- case SYS_ID_AA64DFR1_EL1:
- return get_pvm_id_aa64dfr1(vcpu);
- case SYS_ID_AA64AFR0_EL1:
- return get_pvm_id_aa64afr0(vcpu);
- case SYS_ID_AA64AFR1_EL1:
- return get_pvm_id_aa64afr1(vcpu);
+ return get_restricted_features(vcpu, id_aa64pfr1_el1_sys_val, pvmid_aa64pfr1);
case SYS_ID_AA64ISAR0_EL1:
- return get_pvm_id_aa64isar0(vcpu);
+ return id_aa64isar0_el1_sys_val;
case SYS_ID_AA64ISAR1_EL1:
- return get_pvm_id_aa64isar1(vcpu);
+ return get_restricted_features(vcpu, id_aa64isar1_el1_sys_val, pvmid_aa64isar1);
case SYS_ID_AA64ISAR2_EL1:
- return get_pvm_id_aa64isar2(vcpu);
+ return get_restricted_features(vcpu, id_aa64isar2_el1_sys_val, pvmid_aa64isar2);
case SYS_ID_AA64MMFR0_EL1:
- return get_pvm_id_aa64mmfr0(vcpu);
+ return get_restricted_features(vcpu, id_aa64mmfr0_el1_sys_val, pvmid_aa64mmfr0);
case SYS_ID_AA64MMFR1_EL1:
- return get_pvm_id_aa64mmfr1(vcpu);
+ return get_restricted_features(vcpu, id_aa64mmfr1_el1_sys_val, pvmid_aa64mmfr1);
case SYS_ID_AA64MMFR2_EL1:
- return get_pvm_id_aa64mmfr2(vcpu);
+ return get_restricted_features(vcpu, id_aa64mmfr2_el1_sys_val, pvmid_aa64mmfr2);
+ case SYS_ID_AA64DFR0_EL1:
+ return ID_AA64DFR0_EL1_NONZERO_NI;
+ case SYS_ID_AA64MMFR4_EL1:
+ return ID_AA64MMFR4_EL1_NONZERO_NI;
default:
/* Unhandled ID register, RAZ */
return 0;
}
}
+/*
+ * Inject an unknown/undefined exception to an AArch64 guest while most of its
+ * sysregs are live.
+ */
+static void inject_undef64(struct kvm_vcpu *vcpu)
+{
+ u64 esr = (ESR_ELx_EC_UNKNOWN << ESR_ELx_EC_SHIFT);
+
+ *vcpu_pc(vcpu) = read_sysreg_el2(SYS_ELR);
+ *vcpu_cpsr(vcpu) = read_sysreg_el2(SYS_SPSR);
+
+ kvm_pend_exception(vcpu, EXCEPT_AA64_EL1_SYNC);
+
+ __kvm_adjust_pc(vcpu);
+
+ write_sysreg_el1(esr, SYS_ESR);
+ write_sysreg_el1(read_sysreg_el2(SYS_ELR), SYS_ELR);
+ write_sysreg_el2(*vcpu_pc(vcpu), SYS_ELR);
+ write_sysreg_el2(*vcpu_cpsr(vcpu), SYS_SPSR);
+}
+
static u64 read_id_reg(const struct kvm_vcpu *vcpu,
struct sys_reg_desc const *r)
{
- return pvm_read_id_reg(vcpu, reg_to_encoding(r));
+ struct kvm *kvm = vcpu->kvm;
+ u32 reg = reg_to_encoding(r);
+
+ if (WARN_ON_ONCE(!test_bit(KVM_ARCH_FLAG_ID_REGS_INITIALIZED, &kvm->arch.flags)))
+ return 0;
+
+ if (reg >= sys_reg(3, 0, 0, 1, 0) && reg <= sys_reg(3, 0, 0, 7, 7))
+ return kvm->arch.id_regs[IDREG_IDX(reg)];
+
+ return 0;
}
/* Handler to RAZ/WI sysregs */
@@ -271,13 +304,6 @@ static bool pvm_access_id_aarch32(struct kvm_vcpu *vcpu,
return false;
}
- /*
- * No support for AArch32 guests, therefore, pKVM has no sanitized copy
- * of AArch32 feature id registers.
- */
- BUILD_BUG_ON(FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_EL1),
- PVM_ID_AA64PFR0_RESTRICT_UNSIGNED) > ID_AA64PFR0_EL1_EL1_IMP);
-
return pvm_access_raz_wi(vcpu, p, r);
}
@@ -449,6 +475,30 @@ static const struct sys_reg_desc pvm_sys_reg_descs[] = {
};
/*
+ * Initializes feature registers for protected vms.
+ */
+void kvm_init_pvm_id_regs(struct kvm_vcpu *vcpu)
+{
+ struct kvm *kvm = vcpu->kvm;
+ struct kvm_arch *ka = &kvm->arch;
+ u32 r;
+
+ hyp_assert_lock_held(&vm_table_lock);
+
+ if (test_bit(KVM_ARCH_FLAG_ID_REGS_INITIALIZED, &kvm->arch.flags))
+ return;
+
+ /*
+ * Initialize only AArch64 id registers since AArch32 isn't supported
+ * for protected VMs.
+ */
+ for (r = sys_reg(3, 0, 0, 4, 0); r <= sys_reg(3, 0, 0, 7, 7); r += sys_reg(0, 0, 0, 0, 1))
+ ka->id_regs[IDREG_IDX(r)] = pvm_calc_id_reg(vcpu, r);
+
+ set_bit(KVM_ARCH_FLAG_ID_REGS_INITIALIZED, &kvm->arch.flags);
+}
+
+/*
* Checks that the sysreg table is unique and in-order.
*
* Returns 0 if the table is consistent, or 1 otherwise.
diff --git a/arch/arm64/kvm/hyp/nvhe/timer-sr.c b/arch/arm64/kvm/hyp/nvhe/timer-sr.c
index 3aaab20ae5b4..ff176f4ce7de 100644
--- a/arch/arm64/kvm/hyp/nvhe/timer-sr.c
+++ b/arch/arm64/kvm/hyp/nvhe/timer-sr.c
@@ -22,15 +22,16 @@ void __kvm_timer_set_cntvoff(u64 cntvoff)
*/
void __timer_disable_traps(struct kvm_vcpu *vcpu)
{
- u64 val, shift = 0;
+ u64 set, clr, shift = 0;
if (has_hvhe())
shift = 10;
/* Allow physical timer/counter access for the host */
- val = read_sysreg(cnthctl_el2);
- val |= (CNTHCTL_EL1PCTEN | CNTHCTL_EL1PCEN) << shift;
- write_sysreg(val, cnthctl_el2);
+ set = (CNTHCTL_EL1PCTEN | CNTHCTL_EL1PCEN) << shift;
+ clr = CNTHCTL_EL1TVT | CNTHCTL_EL1TVCT;
+
+ sysreg_clear_set(cnthctl_el2, clr, set);
}
/*
@@ -58,5 +59,12 @@ void __timer_enable_traps(struct kvm_vcpu *vcpu)
set <<= 10;
}
+ /*
+ * Trap the virtual counter/timer if we have a broken cntvoff
+ * implementation.
+ */
+ if (has_broken_cntvoff())
+ set |= CNTHCTL_EL1TVT | CNTHCTL_EL1TVCT;
+
sysreg_clear_set(cnthctl_el2, clr, set);
}
diff --git a/arch/arm64/kvm/hyp/vhe/debug-sr.c b/arch/arm64/kvm/hyp/vhe/debug-sr.c
index 289689b2682d..0100339b09e0 100644
--- a/arch/arm64/kvm/hyp/vhe/debug-sr.c
+++ b/arch/arm64/kvm/hyp/vhe/debug-sr.c
@@ -19,8 +19,3 @@ void __debug_switch_to_host(struct kvm_vcpu *vcpu)
{
__debug_switch_to_host_common(vcpu);
}
-
-u64 __kvm_get_mdcr_el2(void)
-{
- return read_sysreg(mdcr_el2);
-}
diff --git a/arch/arm64/kvm/hyp/vhe/switch.c b/arch/arm64/kvm/hyp/vhe/switch.c
index 80581b1c3995..b5b9dbaf1fdd 100644
--- a/arch/arm64/kvm/hyp/vhe/switch.c
+++ b/arch/arm64/kvm/hyp/vhe/switch.c
@@ -77,12 +77,12 @@ static void __activate_cptr_traps(struct kvm_vcpu *vcpu)
* VHE (HCR.E2H == 1) which allows us to use here the CPTR_EL2.TAM
* shift value for trapping the AMU accesses.
*/
- u64 val = CPACR_ELx_TTA | CPTR_EL2_TAM;
+ u64 val = CPACR_EL1_TTA | CPTR_EL2_TAM;
if (guest_owns_fp_regs()) {
- val |= CPACR_ELx_FPEN;
+ val |= CPACR_EL1_FPEN;
if (vcpu_has_sve(vcpu))
- val |= CPACR_ELx_ZEN;
+ val |= CPACR_EL1_ZEN;
} else {
__activate_traps_fpsimd32(vcpu);
}
@@ -122,13 +122,13 @@ static void __activate_cptr_traps(struct kvm_vcpu *vcpu)
* hypervisor has traps enabled to dispel any illusion of something more
* complicated taking place.
*/
- if (!(SYS_FIELD_GET(CPACR_ELx, FPEN, cptr) & BIT(0)))
- val &= ~CPACR_ELx_FPEN;
- if (!(SYS_FIELD_GET(CPACR_ELx, ZEN, cptr) & BIT(0)))
- val &= ~CPACR_ELx_ZEN;
+ if (!(SYS_FIELD_GET(CPACR_EL1, FPEN, cptr) & BIT(0)))
+ val &= ~CPACR_EL1_FPEN;
+ if (!(SYS_FIELD_GET(CPACR_EL1, ZEN, cptr) & BIT(0)))
+ val &= ~CPACR_EL1_ZEN;
if (kvm_has_feat(vcpu->kvm, ID_AA64MMFR3_EL1, S2POE, IMP))
- val |= cptr & CPACR_ELx_E0POE;
+ val |= cptr & CPACR_EL1_E0POE;
val |= cptr & CPTR_EL2_TCPAC;
@@ -256,6 +256,110 @@ void kvm_vcpu_put_vhe(struct kvm_vcpu *vcpu)
host_data_ptr(host_ctxt)->__hyp_running_vcpu = NULL;
}
+static u64 compute_emulated_cntx_ctl_el0(struct kvm_vcpu *vcpu,
+ enum vcpu_sysreg reg)
+{
+ unsigned long ctl;
+ u64 cval, cnt;
+ bool stat;
+
+ switch (reg) {
+ case CNTP_CTL_EL0:
+ cval = __vcpu_sys_reg(vcpu, CNTP_CVAL_EL0);
+ ctl = __vcpu_sys_reg(vcpu, CNTP_CTL_EL0);
+ cnt = compute_counter_value(vcpu_ptimer(vcpu));
+ break;
+ case CNTV_CTL_EL0:
+ cval = __vcpu_sys_reg(vcpu, CNTV_CVAL_EL0);
+ ctl = __vcpu_sys_reg(vcpu, CNTV_CTL_EL0);
+ cnt = compute_counter_value(vcpu_vtimer(vcpu));
+ break;
+ default:
+ BUG();
+ }
+
+ stat = cval <= cnt;
+ __assign_bit(__ffs(ARCH_TIMER_CTRL_IT_STAT), &ctl, stat);
+
+ return ctl;
+}
+
+static bool kvm_hyp_handle_timer(struct kvm_vcpu *vcpu, u64 *exit_code)
+{
+ u64 esr, val;
+
+ /*
+ * Having FEAT_ECV allows for a better quality of timer emulation.
+ * However, this comes at a huge cost in terms of traps. Try and
+ * satisfy the reads from guest's hypervisor context without
+ * returning to the kernel if we can.
+ */
+ if (!is_hyp_ctxt(vcpu))
+ return false;
+
+ esr = kvm_vcpu_get_esr(vcpu);
+ if ((esr & ESR_ELx_SYS64_ISS_DIR_MASK) != ESR_ELx_SYS64_ISS_DIR_READ)
+ return false;
+
+ switch (esr_sys64_to_sysreg(esr)) {
+ case SYS_CNTP_CTL_EL02:
+ val = compute_emulated_cntx_ctl_el0(vcpu, CNTP_CTL_EL0);
+ break;
+ case SYS_CNTP_CTL_EL0:
+ if (vcpu_el2_e2h_is_set(vcpu))
+ val = read_sysreg_el0(SYS_CNTP_CTL);
+ else
+ val = compute_emulated_cntx_ctl_el0(vcpu, CNTP_CTL_EL0);
+ break;
+ case SYS_CNTP_CVAL_EL02:
+ val = __vcpu_sys_reg(vcpu, CNTP_CVAL_EL0);
+ break;
+ case SYS_CNTP_CVAL_EL0:
+ if (vcpu_el2_e2h_is_set(vcpu)) {
+ val = read_sysreg_el0(SYS_CNTP_CVAL);
+
+ if (!has_cntpoff())
+ val -= timer_get_offset(vcpu_hptimer(vcpu));
+ } else {
+ val = __vcpu_sys_reg(vcpu, CNTP_CVAL_EL0);
+ }
+ break;
+ case SYS_CNTPCT_EL0:
+ case SYS_CNTPCTSS_EL0:
+ val = compute_counter_value(vcpu_hptimer(vcpu));
+ break;
+ case SYS_CNTV_CTL_EL02:
+ val = compute_emulated_cntx_ctl_el0(vcpu, CNTV_CTL_EL0);
+ break;
+ case SYS_CNTV_CTL_EL0:
+ if (vcpu_el2_e2h_is_set(vcpu))
+ val = read_sysreg_el0(SYS_CNTV_CTL);
+ else
+ val = compute_emulated_cntx_ctl_el0(vcpu, CNTV_CTL_EL0);
+ break;
+ case SYS_CNTV_CVAL_EL02:
+ val = __vcpu_sys_reg(vcpu, CNTV_CVAL_EL0);
+ break;
+ case SYS_CNTV_CVAL_EL0:
+ if (vcpu_el2_e2h_is_set(vcpu))
+ val = read_sysreg_el0(SYS_CNTV_CVAL);
+ else
+ val = __vcpu_sys_reg(vcpu, CNTV_CVAL_EL0);
+ break;
+ case SYS_CNTVCT_EL0:
+ case SYS_CNTVCTSS_EL0:
+ val = compute_counter_value(vcpu_hvtimer(vcpu));
+ break;
+ default:
+ return false;
+ }
+
+ vcpu_set_reg(vcpu, kvm_vcpu_sys_get_rt(vcpu), val);
+ __kvm_skip_instr(vcpu);
+
+ return true;
+}
+
static bool kvm_hyp_handle_eret(struct kvm_vcpu *vcpu, u64 *exit_code)
{
u64 esr = kvm_vcpu_get_esr(vcpu);
@@ -409,6 +513,9 @@ static bool kvm_hyp_handle_sysreg_vhe(struct kvm_vcpu *vcpu, u64 *exit_code)
if (kvm_hyp_handle_tlbi_el2(vcpu, exit_code))
return true;
+ if (kvm_hyp_handle_timer(vcpu, exit_code))
+ return true;
+
if (kvm_hyp_handle_cpacr_el1(vcpu, exit_code))
return true;
diff --git a/arch/arm64/kvm/nested.c b/arch/arm64/kvm/nested.c
index 9b36218b48de..9895372fb3b6 100644
--- a/arch/arm64/kvm/nested.c
+++ b/arch/arm64/kvm/nested.c
@@ -1021,8 +1021,8 @@ int kvm_init_nv_sysregs(struct kvm *kvm)
res0 |= HCR_NV2;
if (!kvm_has_feat(kvm, ID_AA64MMFR2_EL1, NV, IMP))
res0 |= (HCR_AT | HCR_NV1 | HCR_NV);
- if (!(__vcpu_has_feature(&kvm->arch, KVM_ARM_VCPU_PTRAUTH_ADDRESS) &&
- __vcpu_has_feature(&kvm->arch, KVM_ARM_VCPU_PTRAUTH_GENERIC)))
+ if (!(kvm_vcpu_has_feature(kvm, KVM_ARM_VCPU_PTRAUTH_ADDRESS) &&
+ kvm_vcpu_has_feature(kvm, KVM_ARM_VCPU_PTRAUTH_GENERIC)))
res0 |= (HCR_API | HCR_APK);
if (!kvm_has_feat(kvm, ID_AA64ISAR0_EL1, TME, IMP))
res0 |= BIT(39);
@@ -1078,8 +1078,8 @@ int kvm_init_nv_sysregs(struct kvm *kvm)
/* HFG[RW]TR_EL2 */
res0 = res1 = 0;
- if (!(__vcpu_has_feature(&kvm->arch, KVM_ARM_VCPU_PTRAUTH_ADDRESS) &&
- __vcpu_has_feature(&kvm->arch, KVM_ARM_VCPU_PTRAUTH_GENERIC)))
+ if (!(kvm_vcpu_has_feature(kvm, KVM_ARM_VCPU_PTRAUTH_ADDRESS) &&
+ kvm_vcpu_has_feature(kvm, KVM_ARM_VCPU_PTRAUTH_GENERIC)))
res0 |= (HFGxTR_EL2_APDAKey | HFGxTR_EL2_APDBKey |
HFGxTR_EL2_APGAKey | HFGxTR_EL2_APIAKey |
HFGxTR_EL2_APIBKey);
@@ -1271,6 +1271,21 @@ int kvm_init_nv_sysregs(struct kvm *kvm)
res0 |= MDCR_EL2_EnSTEPOP;
set_sysreg_masks(kvm, MDCR_EL2, res0, res1);
+ /* CNTHCTL_EL2 */
+ res0 = GENMASK(63, 20);
+ res1 = 0;
+ if (!kvm_has_feat(kvm, ID_AA64PFR0_EL1, RME, IMP))
+ res0 |= CNTHCTL_CNTPMASK | CNTHCTL_CNTVMASK;
+ if (!kvm_has_feat(kvm, ID_AA64MMFR0_EL1, ECV, CNTPOFF)) {
+ res0 |= CNTHCTL_ECV;
+ if (!kvm_has_feat(kvm, ID_AA64MMFR0_EL1, ECV, IMP))
+ res0 |= (CNTHCTL_EL1TVT | CNTHCTL_EL1TVCT |
+ CNTHCTL_EL1NVPCT | CNTHCTL_EL1NVVCT);
+ }
+ if (!kvm_has_feat(kvm, ID_AA64MMFR1_EL1, VH, IMP))
+ res0 |= GENMASK(11, 8);
+ set_sysreg_masks(kvm, CNTHCTL_EL2, res0, res1);
+
return 0;
}
diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c
index 470524b31951..803e11b0dc8f 100644
--- a/arch/arm64/kvm/reset.c
+++ b/arch/arm64/kvm/reset.c
@@ -85,7 +85,7 @@ static void kvm_vcpu_enable_sve(struct kvm_vcpu *vcpu)
* KVM_REG_ARM64_SVE_VLS. Allocation is deferred until
* kvm_arm_vcpu_finalize(), which freezes the configuration.
*/
- vcpu_set_flag(vcpu, GUEST_HAS_SVE);
+ set_bit(KVM_ARCH_FLAG_GUEST_HAS_SVE, &vcpu->kvm->arch.flags);
}
/*
@@ -211,10 +211,6 @@ void kvm_reset_vcpu(struct kvm_vcpu *vcpu)
kvm_vcpu_reset_sve(vcpu);
}
- if (vcpu_has_feature(vcpu, KVM_ARM_VCPU_PTRAUTH_ADDRESS) ||
- vcpu_has_feature(vcpu, KVM_ARM_VCPU_PTRAUTH_GENERIC))
- kvm_vcpu_enable_ptrauth(vcpu);
-
if (vcpu_el1_is_32bit(vcpu))
pstate = VCPU_RESET_PSTATE_SVC;
else if (vcpu_has_nv(vcpu))
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index 634ff18a59a1..ef045f69e7d0 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -570,17 +570,10 @@ static bool trap_oslar_el1(struct kvm_vcpu *vcpu,
struct sys_reg_params *p,
const struct sys_reg_desc *r)
{
- u64 oslsr;
-
if (!p->is_write)
return read_from_write_only(vcpu, p, r);
- /* Forward the OSLK bit to OSLSR */
- oslsr = __vcpu_sys_reg(vcpu, OSLSR_EL1) & ~OSLSR_EL1_OSLK;
- if (p->regval & OSLAR_EL1_OSLK)
- oslsr |= OSLSR_EL1_OSLK;
-
- __vcpu_sys_reg(vcpu, OSLSR_EL1) = oslsr;
+ kvm_debug_handle_oslar(vcpu, p->regval);
return true;
}
@@ -621,43 +614,13 @@ static bool trap_dbgauthstatus_el1(struct kvm_vcpu *vcpu,
}
}
-/*
- * We want to avoid world-switching all the DBG registers all the
- * time:
- *
- * - If we've touched any debug register, it is likely that we're
- * going to touch more of them. It then makes sense to disable the
- * traps and start doing the save/restore dance
- * - If debug is active (DBG_MDSCR_KDE or DBG_MDSCR_MDE set), it is
- * then mandatory to save/restore the registers, as the guest
- * depends on them.
- *
- * For this, we use a DIRTY bit, indicating the guest has modified the
- * debug registers, used as follow:
- *
- * On guest entry:
- * - If the dirty bit is set (because we're coming back from trapping),
- * disable the traps, save host registers, restore guest registers.
- * - If debug is actively in use (DBG_MDSCR_KDE or DBG_MDSCR_MDE set),
- * set the dirty bit, disable the traps, save host registers,
- * restore guest registers.
- * - Otherwise, enable the traps
- *
- * On guest exit:
- * - If the dirty bit is set, save guest registers, restore host
- * registers and clear the dirty bit. This ensure that the host can
- * now use the debug registers.
- */
static bool trap_debug_regs(struct kvm_vcpu *vcpu,
struct sys_reg_params *p,
const struct sys_reg_desc *r)
{
access_rw(vcpu, p, r);
- if (p->is_write)
- vcpu_set_flag(vcpu, DEBUG_DIRTY);
-
- trace_trap_reg(__func__, r->reg, p->is_write, p->regval);
+ kvm_debug_set_guest_ownership(vcpu);
return true;
}
@@ -666,9 +629,6 @@ static bool trap_debug_regs(struct kvm_vcpu *vcpu,
*
* A 32 bit write to a debug register leave top bits alone
* A 32 bit read from a debug register only returns the bottom bits
- *
- * All writes will set the DEBUG_DIRTY flag to ensure the hyp code
- * switches between host and guest values in future.
*/
static void reg_to_dbg(struct kvm_vcpu *vcpu,
struct sys_reg_params *p,
@@ -683,8 +643,6 @@ static void reg_to_dbg(struct kvm_vcpu *vcpu,
val &= ~mask;
val |= (p->regval & (mask >> shift)) << shift;
*dbg_reg = val;
-
- vcpu_set_flag(vcpu, DEBUG_DIRTY);
}
static void dbg_to_reg(struct kvm_vcpu *vcpu,
@@ -698,152 +656,79 @@ static void dbg_to_reg(struct kvm_vcpu *vcpu,
p->regval = (*dbg_reg & mask) >> shift;
}
-static bool trap_bvr(struct kvm_vcpu *vcpu,
- struct sys_reg_params *p,
- const struct sys_reg_desc *rd)
+static u64 *demux_wb_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd)
{
- u64 *dbg_reg = &vcpu->arch.vcpu_debug_state.dbg_bvr[rd->CRm];
-
- if (p->is_write)
- reg_to_dbg(vcpu, p, rd, dbg_reg);
- else
- dbg_to_reg(vcpu, p, rd, dbg_reg);
-
- trace_trap_reg(__func__, rd->CRm, p->is_write, *dbg_reg);
-
- return true;
-}
+ struct kvm_guest_debug_arch *dbg = &vcpu->arch.vcpu_debug_state;
-static int set_bvr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
- u64 val)
-{
- vcpu->arch.vcpu_debug_state.dbg_bvr[rd->CRm] = val;
- return 0;
-}
-
-static int get_bvr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
- u64 *val)
-{
- *val = vcpu->arch.vcpu_debug_state.dbg_bvr[rd->CRm];
- return 0;
+ switch (rd->Op2) {
+ case 0b100:
+ return &dbg->dbg_bvr[rd->CRm];
+ case 0b101:
+ return &dbg->dbg_bcr[rd->CRm];
+ case 0b110:
+ return &dbg->dbg_wvr[rd->CRm];
+ case 0b111:
+ return &dbg->dbg_wcr[rd->CRm];
+ default:
+ KVM_BUG_ON(1, vcpu->kvm);
+ return NULL;
+ }
}
-static u64 reset_bvr(struct kvm_vcpu *vcpu,
- const struct sys_reg_desc *rd)
+static bool trap_dbg_wb_reg(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
+ const struct sys_reg_desc *rd)
{
- vcpu->arch.vcpu_debug_state.dbg_bvr[rd->CRm] = rd->val;
- return rd->val;
-}
+ u64 *reg = demux_wb_reg(vcpu, rd);
-static bool trap_bcr(struct kvm_vcpu *vcpu,
- struct sys_reg_params *p,
- const struct sys_reg_desc *rd)
-{
- u64 *dbg_reg = &vcpu->arch.vcpu_debug_state.dbg_bcr[rd->CRm];
+ if (!reg)
+ return false;
if (p->is_write)
- reg_to_dbg(vcpu, p, rd, dbg_reg);
+ reg_to_dbg(vcpu, p, rd, reg);
else
- dbg_to_reg(vcpu, p, rd, dbg_reg);
-
- trace_trap_reg(__func__, rd->CRm, p->is_write, *dbg_reg);
+ dbg_to_reg(vcpu, p, rd, reg);
+ kvm_debug_set_guest_ownership(vcpu);
return true;
}
-static int set_bcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
- u64 val)
-{
- vcpu->arch.vcpu_debug_state.dbg_bcr[rd->CRm] = val;
- return 0;
-}
-
-static int get_bcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
- u64 *val)
+static int set_dbg_wb_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
+ u64 val)
{
- *val = vcpu->arch.vcpu_debug_state.dbg_bcr[rd->CRm];
- return 0;
-}
-
-static u64 reset_bcr(struct kvm_vcpu *vcpu,
- const struct sys_reg_desc *rd)
-{
- vcpu->arch.vcpu_debug_state.dbg_bcr[rd->CRm] = rd->val;
- return rd->val;
-}
-
-static bool trap_wvr(struct kvm_vcpu *vcpu,
- struct sys_reg_params *p,
- const struct sys_reg_desc *rd)
-{
- u64 *dbg_reg = &vcpu->arch.vcpu_debug_state.dbg_wvr[rd->CRm];
+ u64 *reg = demux_wb_reg(vcpu, rd);
- if (p->is_write)
- reg_to_dbg(vcpu, p, rd, dbg_reg);
- else
- dbg_to_reg(vcpu, p, rd, dbg_reg);
-
- trace_trap_reg(__func__, rd->CRm, p->is_write,
- vcpu->arch.vcpu_debug_state.dbg_wvr[rd->CRm]);
-
- return true;
-}
+ if (!reg)
+ return -EINVAL;
-static int set_wvr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
- u64 val)
-{
- vcpu->arch.vcpu_debug_state.dbg_wvr[rd->CRm] = val;
+ *reg = val;
return 0;
}
-static int get_wvr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
- u64 *val)
-{
- *val = vcpu->arch.vcpu_debug_state.dbg_wvr[rd->CRm];
- return 0;
-}
-
-static u64 reset_wvr(struct kvm_vcpu *vcpu,
- const struct sys_reg_desc *rd)
-{
- vcpu->arch.vcpu_debug_state.dbg_wvr[rd->CRm] = rd->val;
- return rd->val;
-}
-
-static bool trap_wcr(struct kvm_vcpu *vcpu,
- struct sys_reg_params *p,
- const struct sys_reg_desc *rd)
+static int get_dbg_wb_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
+ u64 *val)
{
- u64 *dbg_reg = &vcpu->arch.vcpu_debug_state.dbg_wcr[rd->CRm];
-
- if (p->is_write)
- reg_to_dbg(vcpu, p, rd, dbg_reg);
- else
- dbg_to_reg(vcpu, p, rd, dbg_reg);
+ u64 *reg = demux_wb_reg(vcpu, rd);
- trace_trap_reg(__func__, rd->CRm, p->is_write, *dbg_reg);
-
- return true;
-}
+ if (!reg)
+ return -EINVAL;
-static int set_wcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
- u64 val)
-{
- vcpu->arch.vcpu_debug_state.dbg_wcr[rd->CRm] = val;
+ *val = *reg;
return 0;
}
-static int get_wcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
- u64 *val)
+static u64 reset_dbg_wb_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd)
{
- *val = vcpu->arch.vcpu_debug_state.dbg_wcr[rd->CRm];
- return 0;
-}
+ u64 *reg = demux_wb_reg(vcpu, rd);
-static u64 reset_wcr(struct kvm_vcpu *vcpu,
- const struct sys_reg_desc *rd)
-{
- vcpu->arch.vcpu_debug_state.dbg_wcr[rd->CRm] = rd->val;
+ /*
+ * Bail early if we couldn't find storage for the register, the
+ * KVM_BUG_ON() in demux_wb_reg() will prevent this VM from ever
+ * being run.
+ */
+ if (!reg)
+ return 0;
+
+ *reg = rd->val;
return rd->val;
}
@@ -1350,13 +1235,17 @@ static int set_pmcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r,
/* Silly macro to expand the DBG{BCR,BVR,WVR,WCR}n_EL1 registers in one go */
#define DBG_BCR_BVR_WCR_WVR_EL1(n) \
{ SYS_DESC(SYS_DBGBVRn_EL1(n)), \
- trap_bvr, reset_bvr, 0, 0, get_bvr, set_bvr }, \
+ trap_dbg_wb_reg, reset_dbg_wb_reg, 0, 0, \
+ get_dbg_wb_reg, set_dbg_wb_reg }, \
{ SYS_DESC(SYS_DBGBCRn_EL1(n)), \
- trap_bcr, reset_bcr, 0, 0, get_bcr, set_bcr }, \
+ trap_dbg_wb_reg, reset_dbg_wb_reg, 0, 0, \
+ get_dbg_wb_reg, set_dbg_wb_reg }, \
{ SYS_DESC(SYS_DBGWVRn_EL1(n)), \
- trap_wvr, reset_wvr, 0, 0, get_wvr, set_wvr }, \
+ trap_dbg_wb_reg, reset_dbg_wb_reg, 0, 0, \
+ get_dbg_wb_reg, set_dbg_wb_reg }, \
{ SYS_DESC(SYS_DBGWCRn_EL1(n)), \
- trap_wcr, reset_wcr, 0, 0, get_wcr, set_wcr }
+ trap_dbg_wb_reg, reset_dbg_wb_reg, 0, 0, \
+ get_dbg_wb_reg, set_dbg_wb_reg }
#define PMU_SYS_REG(name) \
SYS_DESC(SYS_##name), .reset = reset_pmu_reg, \
@@ -1410,26 +1299,146 @@ static bool access_arch_timer(struct kvm_vcpu *vcpu,
switch (reg) {
case SYS_CNTP_TVAL_EL0:
+ if (is_hyp_ctxt(vcpu) && vcpu_el2_e2h_is_set(vcpu))
+ tmr = TIMER_HPTIMER;
+ else
+ tmr = TIMER_PTIMER;
+ treg = TIMER_REG_TVAL;
+ break;
+
+ case SYS_CNTV_TVAL_EL0:
+ if (is_hyp_ctxt(vcpu) && vcpu_el2_e2h_is_set(vcpu))
+ tmr = TIMER_HVTIMER;
+ else
+ tmr = TIMER_VTIMER;
+ treg = TIMER_REG_TVAL;
+ break;
+
case SYS_AARCH32_CNTP_TVAL:
+ case SYS_CNTP_TVAL_EL02:
tmr = TIMER_PTIMER;
treg = TIMER_REG_TVAL;
break;
+
+ case SYS_CNTV_TVAL_EL02:
+ tmr = TIMER_VTIMER;
+ treg = TIMER_REG_TVAL;
+ break;
+
+ case SYS_CNTHP_TVAL_EL2:
+ tmr = TIMER_HPTIMER;
+ treg = TIMER_REG_TVAL;
+ break;
+
+ case SYS_CNTHV_TVAL_EL2:
+ tmr = TIMER_HVTIMER;
+ treg = TIMER_REG_TVAL;
+ break;
+
case SYS_CNTP_CTL_EL0:
+ if (is_hyp_ctxt(vcpu) && vcpu_el2_e2h_is_set(vcpu))
+ tmr = TIMER_HPTIMER;
+ else
+ tmr = TIMER_PTIMER;
+ treg = TIMER_REG_CTL;
+ break;
+
+ case SYS_CNTV_CTL_EL0:
+ if (is_hyp_ctxt(vcpu) && vcpu_el2_e2h_is_set(vcpu))
+ tmr = TIMER_HVTIMER;
+ else
+ tmr = TIMER_VTIMER;
+ treg = TIMER_REG_CTL;
+ break;
+
case SYS_AARCH32_CNTP_CTL:
+ case SYS_CNTP_CTL_EL02:
tmr = TIMER_PTIMER;
treg = TIMER_REG_CTL;
break;
+
+ case SYS_CNTV_CTL_EL02:
+ tmr = TIMER_VTIMER;
+ treg = TIMER_REG_CTL;
+ break;
+
+ case SYS_CNTHP_CTL_EL2:
+ tmr = TIMER_HPTIMER;
+ treg = TIMER_REG_CTL;
+ break;
+
+ case SYS_CNTHV_CTL_EL2:
+ tmr = TIMER_HVTIMER;
+ treg = TIMER_REG_CTL;
+ break;
+
case SYS_CNTP_CVAL_EL0:
+ if (is_hyp_ctxt(vcpu) && vcpu_el2_e2h_is_set(vcpu))
+ tmr = TIMER_HPTIMER;
+ else
+ tmr = TIMER_PTIMER;
+ treg = TIMER_REG_CVAL;
+ break;
+
+ case SYS_CNTV_CVAL_EL0:
+ if (is_hyp_ctxt(vcpu) && vcpu_el2_e2h_is_set(vcpu))
+ tmr = TIMER_HVTIMER;
+ else
+ tmr = TIMER_VTIMER;
+ treg = TIMER_REG_CVAL;
+ break;
+
case SYS_AARCH32_CNTP_CVAL:
+ case SYS_CNTP_CVAL_EL02:
tmr = TIMER_PTIMER;
treg = TIMER_REG_CVAL;
break;
+
+ case SYS_CNTV_CVAL_EL02:
+ tmr = TIMER_VTIMER;
+ treg = TIMER_REG_CVAL;
+ break;
+
+ case SYS_CNTHP_CVAL_EL2:
+ tmr = TIMER_HPTIMER;
+ treg = TIMER_REG_CVAL;
+ break;
+
+ case SYS_CNTHV_CVAL_EL2:
+ tmr = TIMER_HVTIMER;
+ treg = TIMER_REG_CVAL;
+ break;
+
case SYS_CNTPCT_EL0:
case SYS_CNTPCTSS_EL0:
+ if (is_hyp_ctxt(vcpu))
+ tmr = TIMER_HPTIMER;
+ else
+ tmr = TIMER_PTIMER;
+ treg = TIMER_REG_CNT;
+ break;
+
case SYS_AARCH32_CNTPCT:
+ case SYS_AARCH32_CNTPCTSS:
tmr = TIMER_PTIMER;
treg = TIMER_REG_CNT;
break;
+
+ case SYS_CNTVCT_EL0:
+ case SYS_CNTVCTSS_EL0:
+ if (is_hyp_ctxt(vcpu))
+ tmr = TIMER_HVTIMER;
+ else
+ tmr = TIMER_VTIMER;
+ treg = TIMER_REG_CNT;
+ break;
+
+ case SYS_AARCH32_CNTVCT:
+ case SYS_AARCH32_CNTVCTSS:
+ tmr = TIMER_VTIMER;
+ treg = TIMER_REG_CNT;
+ break;
+
default:
print_sys_reg_msg(p, "%s", "Unhandled trapped timer register");
return undef_access(vcpu, p, r);
@@ -1599,7 +1608,8 @@ static u64 __kvm_read_sanitised_id_reg(const struct kvm_vcpu *vcpu,
if (!vcpu_has_ptrauth(vcpu))
val &= ~(ARM64_FEATURE_MASK(ID_AA64ISAR2_EL1_APA3) |
ARM64_FEATURE_MASK(ID_AA64ISAR2_EL1_GPA3));
- if (!cpus_have_final_cap(ARM64_HAS_WFXT))
+ if (!cpus_have_final_cap(ARM64_HAS_WFXT) ||
+ has_broken_cntvoff())
val &= ~ARM64_FEATURE_MASK(ID_AA64ISAR2_EL1_WFxT);
break;
case SYS_ID_AA64MMFR2_EL1:
@@ -2920,11 +2930,17 @@ static const struct sys_reg_desc sys_reg_descs[] = {
AMU_AMEVTYPER1_EL0(15),
{ SYS_DESC(SYS_CNTPCT_EL0), access_arch_timer },
+ { SYS_DESC(SYS_CNTVCT_EL0), access_arch_timer },
{ SYS_DESC(SYS_CNTPCTSS_EL0), access_arch_timer },
+ { SYS_DESC(SYS_CNTVCTSS_EL0), access_arch_timer },
{ SYS_DESC(SYS_CNTP_TVAL_EL0), access_arch_timer },
{ SYS_DESC(SYS_CNTP_CTL_EL0), access_arch_timer },
{ SYS_DESC(SYS_CNTP_CVAL_EL0), access_arch_timer },
+ { SYS_DESC(SYS_CNTV_TVAL_EL0), access_arch_timer },
+ { SYS_DESC(SYS_CNTV_CTL_EL0), access_arch_timer },
+ { SYS_DESC(SYS_CNTV_CVAL_EL0), access_arch_timer },
+
/* PMEVCNTRn_EL0 */
PMU_PMEVCNTR_EL0(0),
PMU_PMEVCNTR_EL0(1),
@@ -3076,9 +3092,24 @@ static const struct sys_reg_desc sys_reg_descs[] = {
EL2_REG_VNCR(CNTVOFF_EL2, reset_val, 0),
EL2_REG(CNTHCTL_EL2, access_rw, reset_val, 0),
+ { SYS_DESC(SYS_CNTHP_TVAL_EL2), access_arch_timer },
+ EL2_REG(CNTHP_CTL_EL2, access_arch_timer, reset_val, 0),
+ EL2_REG(CNTHP_CVAL_EL2, access_arch_timer, reset_val, 0),
+
+ { SYS_DESC(SYS_CNTHV_TVAL_EL2), access_arch_timer },
+ EL2_REG(CNTHV_CTL_EL2, access_arch_timer, reset_val, 0),
+ EL2_REG(CNTHV_CVAL_EL2, access_arch_timer, reset_val, 0),
{ SYS_DESC(SYS_CNTKCTL_EL12), access_cntkctl_el12 },
+ { SYS_DESC(SYS_CNTP_TVAL_EL02), access_arch_timer },
+ { SYS_DESC(SYS_CNTP_CTL_EL02), access_arch_timer },
+ { SYS_DESC(SYS_CNTP_CVAL_EL02), access_arch_timer },
+
+ { SYS_DESC(SYS_CNTV_TVAL_EL02), access_arch_timer },
+ { SYS_DESC(SYS_CNTV_CTL_EL02), access_arch_timer },
+ { SYS_DESC(SYS_CNTV_CVAL_EL02), access_arch_timer },
+
EL2_REG(SP_EL2, NULL, reset_unknown, 0),
};
@@ -3590,18 +3621,20 @@ static bool trap_dbgdidr(struct kvm_vcpu *vcpu,
* None of the other registers share their location, so treat them as
* if they were 64bit.
*/
-#define DBG_BCR_BVR_WCR_WVR(n) \
- /* DBGBVRn */ \
- { AA32(LO), Op1( 0), CRn( 0), CRm((n)), Op2( 4), trap_bvr, NULL, n }, \
- /* DBGBCRn */ \
- { Op1( 0), CRn( 0), CRm((n)), Op2( 5), trap_bcr, NULL, n }, \
- /* DBGWVRn */ \
- { Op1( 0), CRn( 0), CRm((n)), Op2( 6), trap_wvr, NULL, n }, \
- /* DBGWCRn */ \
- { Op1( 0), CRn( 0), CRm((n)), Op2( 7), trap_wcr, NULL, n }
-
-#define DBGBXVR(n) \
- { AA32(HI), Op1( 0), CRn( 1), CRm((n)), Op2( 1), trap_bvr, NULL, n }
+#define DBG_BCR_BVR_WCR_WVR(n) \
+ /* DBGBVRn */ \
+ { AA32(LO), Op1( 0), CRn( 0), CRm((n)), Op2( 4), \
+ trap_dbg_wb_reg, NULL, n }, \
+ /* DBGBCRn */ \
+ { Op1( 0), CRn( 0), CRm((n)), Op2( 5), trap_dbg_wb_reg, NULL, n }, \
+ /* DBGWVRn */ \
+ { Op1( 0), CRn( 0), CRm((n)), Op2( 6), trap_dbg_wb_reg, NULL, n }, \
+ /* DBGWCRn */ \
+ { Op1( 0), CRn( 0), CRm((n)), Op2( 7), trap_dbg_wb_reg, NULL, n }
+
+#define DBGBXVR(n) \
+ { AA32(HI), Op1( 0), CRn( 1), CRm((n)), Op2( 1), \
+ trap_dbg_wb_reg, NULL, n }
/*
* Trapped cp14 registers. We generally ignore most of the external
@@ -3898,9 +3931,11 @@ static const struct sys_reg_desc cp15_64_regs[] = {
{ SYS_DESC(SYS_AARCH32_CNTPCT), access_arch_timer },
{ Op1( 1), CRn( 0), CRm( 2), Op2( 0), access_vm_reg, NULL, TTBR1_EL1 },
{ Op1( 1), CRn( 0), CRm(12), Op2( 0), access_gic_sgi }, /* ICC_ASGI1R */
+ { SYS_DESC(SYS_AARCH32_CNTVCT), access_arch_timer },
{ Op1( 2), CRn( 0), CRm(12), Op2( 0), access_gic_sgi }, /* ICC_SGI0R */
{ SYS_DESC(SYS_AARCH32_CNTP_CVAL), access_arch_timer },
{ SYS_DESC(SYS_AARCH32_CNTPCTSS), access_arch_timer },
+ { SYS_DESC(SYS_AARCH32_CNTVCTSS), access_arch_timer },
};
static bool check_sysreg_table(const struct sys_reg_desc *table, unsigned int n,
diff --git a/arch/arm64/kvm/trace_handle_exit.h b/arch/arm64/kvm/trace_handle_exit.h
index 064a58c19f48..f85415db7713 100644
--- a/arch/arm64/kvm/trace_handle_exit.h
+++ b/arch/arm64/kvm/trace_handle_exit.h
@@ -46,38 +46,6 @@ TRACE_EVENT(kvm_hvc_arm64,
__entry->vcpu_pc, __entry->r0, __entry->imm)
);
-TRACE_EVENT(kvm_arm_setup_debug,
- TP_PROTO(struct kvm_vcpu *vcpu, __u32 guest_debug),
- TP_ARGS(vcpu, guest_debug),
-
- TP_STRUCT__entry(
- __field(struct kvm_vcpu *, vcpu)
- __field(__u32, guest_debug)
- ),
-
- TP_fast_assign(
- __entry->vcpu = vcpu;
- __entry->guest_debug = guest_debug;
- ),
-
- TP_printk("vcpu: %p, flags: 0x%08x", __entry->vcpu, __entry->guest_debug)
-);
-
-TRACE_EVENT(kvm_arm_clear_debug,
- TP_PROTO(__u32 guest_debug),
- TP_ARGS(guest_debug),
-
- TP_STRUCT__entry(
- __field(__u32, guest_debug)
- ),
-
- TP_fast_assign(
- __entry->guest_debug = guest_debug;
- ),
-
- TP_printk("flags: 0x%08x", __entry->guest_debug)
-);
-
/*
* The dreg32 name is a leftover from a distant past. This will really
* output a 64bit value...
@@ -99,49 +67,6 @@ TRACE_EVENT(kvm_arm_set_dreg32,
TP_printk("%s: 0x%llx", __entry->name, __entry->value)
);
-TRACE_DEFINE_SIZEOF(__u64);
-
-TRACE_EVENT(kvm_arm_set_regset,
- TP_PROTO(const char *type, int len, __u64 *control, __u64 *value),
- TP_ARGS(type, len, control, value),
- TP_STRUCT__entry(
- __field(const char *, name)
- __field(int, len)
- __array(u64, ctrls, 16)
- __array(u64, values, 16)
- ),
- TP_fast_assign(
- __entry->name = type;
- __entry->len = len;
- memcpy(__entry->ctrls, control, len << 3);
- memcpy(__entry->values, value, len << 3);
- ),
- TP_printk("%d %s CTRL:%s VALUE:%s", __entry->len, __entry->name,
- __print_array(__entry->ctrls, __entry->len, sizeof(__u64)),
- __print_array(__entry->values, __entry->len, sizeof(__u64)))
-);
-
-TRACE_EVENT(trap_reg,
- TP_PROTO(const char *fn, int reg, bool is_write, u64 write_value),
- TP_ARGS(fn, reg, is_write, write_value),
-
- TP_STRUCT__entry(
- __field(const char *, fn)
- __field(int, reg)
- __field(bool, is_write)
- __field(u64, write_value)
- ),
-
- TP_fast_assign(
- __entry->fn = fn;
- __entry->reg = reg;
- __entry->is_write = is_write;
- __entry->write_value = write_value;
- ),
-
- TP_printk("%s %s reg %d (0x%016llx)", __entry->fn, __entry->is_write?"write to":"read from", __entry->reg, __entry->write_value)
-);
-
TRACE_EVENT(kvm_handle_sys_reg,
TP_PROTO(unsigned long hsr),
TP_ARGS(hsr),
diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
index b8edc5765441..fb30c8804f87 100644
--- a/arch/arm64/mm/proc.S
+++ b/arch/arm64/mm/proc.S
@@ -501,7 +501,7 @@ alternative_else_nop_endif
#ifdef CONFIG_ARM64_HAFT
cmp x9, ID_AA64MMFR1_EL1_HAFDBS_HAFT
b.lt 1f
- orr tcr2, tcr2, TCR2_EL1x_HAFT
+ orr tcr2, tcr2, TCR2_EL1_HAFT
#endif /* CONFIG_ARM64_HAFT */
1:
#endif /* CONFIG_ARM64_HW_AFDBM */
@@ -532,7 +532,8 @@ alternative_else_nop_endif
#undef PTE_MAYBE_NG
#undef PTE_MAYBE_SHARED
- orr tcr2, tcr2, TCR2_EL1x_PIE
+ orr tcr2, tcr2, TCR2_EL1_PIE
+ msr REG_TCR2_EL1, x0
.Lskip_indirection:
diff --git a/arch/arm64/tools/cpucaps b/arch/arm64/tools/cpucaps
index eb17f59e543c..1e65f2fb45bd 100644
--- a/arch/arm64/tools/cpucaps
+++ b/arch/arm64/tools/cpucaps
@@ -105,6 +105,7 @@ WORKAROUND_CLEAN_CACHE
WORKAROUND_DEVICE_LOAD_ACQUIRE
WORKAROUND_NVIDIA_CARMEL_CNP
WORKAROUND_QCOM_FALKOR_E1003
+WORKAROUND_QCOM_ORYON_CNTVOFF
WORKAROUND_REPEAT_TLBI
WORKAROUND_SPECULATIVE_AT
WORKAROUND_SPECULATIVE_SSBS
diff --git a/arch/arm64/tools/gen-sysreg.awk b/arch/arm64/tools/gen-sysreg.awk
index d1254a056114..1a2afc9fdd42 100755
--- a/arch/arm64/tools/gen-sysreg.awk
+++ b/arch/arm64/tools/gen-sysreg.awk
@@ -206,7 +206,7 @@ END {
# Currently this is effectivey a comment, in future we may want to emit
# defines for the fields.
-/^Fields/ && block_current() == "Sysreg" {
+(/^Fields/ || /^Mapping/) && block_current() == "Sysreg" {
expect_fields(2)
if (next_bit != 63)
diff --git a/arch/arm64/tools/sysreg b/arch/arm64/tools/sysreg
index b081b54d6d22..40a9e4e2cae6 100644
--- a/arch/arm64/tools/sysreg
+++ b/arch/arm64/tools/sysreg
@@ -24,8 +24,16 @@
# ...
# EndEnum
-# Alternatively if multiple registers share the same layout then
-# a SysregFields block can be used to describe the shared layout
+# For VHE aliases (*_EL12, *_EL02) of system registers, a Mapping
+# entry describes the register the alias actually accesses:
+
+# Sysreg <name_EL12> <op0> <op1> <crn> <crm> <op2>
+# Mapping <name_EL1>
+# EndSysreg
+
+# Where multiple system regsiters are not VHE aliases but share a
+# common layout, a SysregFields block can be used to describe the
+# shared layout:
# SysregFields <fieldsname>
# <field>
@@ -1978,7 +1986,7 @@ Field 1 A
Field 0 M
EndSysreg
-SysregFields CPACR_ELx
+Sysreg CPACR_EL1 3 0 1 0 2
Res0 63:30
Field 29 E0POE
Field 28 TTA
@@ -1989,10 +1997,6 @@ Field 21:20 FPEN
Res0 19:18
Field 17:16 ZEN
Res0 15:0
-EndSysregFields
-
-Sysreg CPACR_EL1 3 0 1 0 2
-Fields CPACR_ELx
EndSysreg
Sysreg SMPRI_EL1 3 0 1 2 4
@@ -2947,23 +2951,23 @@ Field 63:0 PhysicalOffset
EndSysreg
Sysreg CPACR_EL12 3 5 1 0 2
-Fields CPACR_ELx
+Mapping CPACR_EL1
EndSysreg
Sysreg ZCR_EL12 3 5 1 2 0
-Fields ZCR_ELx
+Mapping ZCR_EL1
EndSysreg
Sysreg SMCR_EL12 3 5 1 2 6
-Fields SMCR_ELx
+Mapping SMCR_EL1
EndSysreg
Sysreg GCSCR_EL12 3 5 2 5 0
-Fields GCSCR_ELx
+Mapping GCSCR_EL1
EndSysreg
Sysreg GCSPR_EL12 3 5 2 5 1
-Fields GCSPR_ELx
+Mapping GCSPR_EL1
EndSysreg
Sysreg FAR_EL12 3 5 6 0 0
@@ -2975,7 +2979,7 @@ Fields MPAM1_ELx
EndSysreg
Sysreg CONTEXTIDR_EL12 3 5 13 0 1
-Fields CONTEXTIDR_ELx
+Mapping CONTEXTIDR_EL1
EndSysreg
SysregFields TTBRx_EL1
@@ -2992,7 +2996,7 @@ Sysreg TTBR1_EL1 3 0 2 0 1
Fields TTBRx_EL1
EndSysreg
-SysregFields TCR2_EL1x
+Sysreg TCR2_EL1 3 0 2 0 3
Res0 63:16
Field 15 DisCH1
Field 14 DisCH0
@@ -3006,14 +3010,10 @@ Field 3 POE
Field 2 E0POE
Field 1 PIE
Field 0 PnCH
-EndSysregFields
-
-Sysreg TCR2_EL1 3 0 2 0 3
-Fields TCR2_EL1x
EndSysreg
Sysreg TCR2_EL12 3 5 2 0 3
-Fields TCR2_EL1x
+Mapping TCR2_EL1
EndSysreg
Sysreg TCR2_EL2 3 4 2 0 3
@@ -3084,7 +3084,7 @@ Fields PIRx_ELx
EndSysreg
Sysreg PIRE0_EL12 3 5 10 2 2
-Fields PIRx_ELx
+Mapping PIRE0_EL1
EndSysreg
Sysreg PIRE0_EL2 3 4 10 2 2
@@ -3096,7 +3096,7 @@ Fields PIRx_ELx
EndSysreg
Sysreg PIR_EL12 3 5 10 2 3
-Fields PIRx_ELx
+Mapping PIR_EL1
EndSysreg
Sysreg PIR_EL2 3 4 10 2 3
@@ -3116,7 +3116,7 @@ Fields PIRx_ELx
EndSysreg
Sysreg POR_EL12 3 5 10 2 4
-Fields PIRx_ELx
+Mapping POR_EL1
EndSysreg
Sysreg S2POR_EL1 3 0 10 2 5
diff --git a/include/clocksource/arm_arch_timer.h b/include/clocksource/arm_arch_timer.h
index cbbc9a6dc571..ce6521ad04d1 100644
--- a/include/clocksource/arm_arch_timer.h
+++ b/include/clocksource/arm_arch_timer.h
@@ -22,6 +22,12 @@
#define CNTHCTL_EVNTDIR (1 << 3)
#define CNTHCTL_EVNTI (0xF << 4)
#define CNTHCTL_ECV (1 << 12)
+#define CNTHCTL_EL1TVT (1 << 13)
+#define CNTHCTL_EL1TVCT (1 << 14)
+#define CNTHCTL_EL1NVPCT (1 << 15)
+#define CNTHCTL_EL1NVVCT (1 << 16)
+#define CNTHCTL_CNTVMASK (1 << 18)
+#define CNTHCTL_CNTPMASK (1 << 19)
enum arch_timer_reg {
ARCH_TIMER_REG_CTRL,
diff --git a/include/kvm/arm_arch_timer.h b/include/kvm/arm_arch_timer.h
index fd650a8789b9..681cf0c8b9df 100644
--- a/include/kvm/arm_arch_timer.h
+++ b/include/kvm/arm_arch_timer.h
@@ -98,6 +98,7 @@ int __init kvm_timer_hyp_init(bool has_gic);
int kvm_timer_enable(struct kvm_vcpu *vcpu);
void kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu);
void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu);
+void kvm_timer_sync_nested(struct kvm_vcpu *vcpu);
void kvm_timer_sync_user(struct kvm_vcpu *vcpu);
bool kvm_timer_should_notify_user(struct kvm_vcpu *vcpu);
void kvm_timer_update_run(struct kvm_vcpu *vcpu);
@@ -150,9 +151,31 @@ void kvm_timer_cpu_down(void);
/* CNTKCTL_EL1 valid bits as of DDI0487J.a */
#define CNTKCTL_VALID_BITS (BIT(17) | GENMASK_ULL(9, 0))
+DECLARE_STATIC_KEY_FALSE(broken_cntvoff_key);
+
+static inline bool has_broken_cntvoff(void)
+{
+ return static_branch_unlikely(&broken_cntvoff_key);
+}
+
static inline bool has_cntpoff(void)
{
return (has_vhe() && cpus_have_final_cap(ARM64_HAS_ECV_CNTPOFF));
}
+static inline u64 timer_get_offset(struct arch_timer_context *ctxt)
+{
+ u64 offset = 0;
+
+ if (!ctxt)
+ return 0;
+
+ if (ctxt->offset.vm_offset)
+ offset += *ctxt->offset.vm_offset;
+ if (ctxt->offset.vcpu_offset)
+ offset += *ctxt->offset.vcpu_offset;
+
+ return offset;
+}
+
#endif