summaryrefslogtreecommitdiff
path: root/arch/arm64/kvm
diff options
context:
space:
mode:
Diffstat (limited to 'arch/arm64/kvm')
-rw-r--r--arch/arm64/kvm/Makefile2
-rw-r--r--arch/arm64/kvm/arch_timer.c77
-rw-r--r--arch/arm64/kvm/arm.c16
-rw-r--r--arch/arm64/kvm/emulate-nested.c203
-rw-r--r--arch/arm64/kvm/fpsimd.c3
-rw-r--r--arch/arm64/kvm/guest.c6
-rw-r--r--arch/arm64/kvm/handle_exit.c47
-rw-r--r--arch/arm64/kvm/hyp/exception.c48
-rw-r--r--arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h21
-rw-r--r--arch/arm64/kvm/hyp/nvhe/hyp-init.S1
-rw-r--r--arch/arm64/kvm/hyp/nvhe/sys_regs.c1
-rw-r--r--arch/arm64/kvm/hyp/pgtable.c43
-rw-r--r--arch/arm64/kvm/hyp/vhe/switch.c26
-rw-r--r--arch/arm64/kvm/hypercalls.c2
-rw-r--r--arch/arm64/kvm/inject_fault.c61
-rw-r--r--arch/arm64/kvm/mmu.c34
-rw-r--r--arch/arm64/kvm/nested.c161
-rw-r--r--arch/arm64/kvm/pvtime.c8
-rw-r--r--arch/arm64/kvm/reset.c17
-rw-r--r--arch/arm64/kvm/sys_regs.c455
-rw-r--r--arch/arm64/kvm/sys_regs.h14
-rw-r--r--arch/arm64/kvm/trace_arm.h59
-rw-r--r--arch/arm64/kvm/vgic/vgic-init.c2
-rw-r--r--arch/arm64/kvm/vgic/vgic-mmio.c13
-rw-r--r--arch/arm64/kvm/vgic/vgic-v3.c9
25 files changed, 1121 insertions, 208 deletions
diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile
index 5e33c2d4645a..c0c050e53157 100644
--- a/arch/arm64/kvm/Makefile
+++ b/arch/arm64/kvm/Makefile
@@ -14,7 +14,7 @@ kvm-y += arm.o mmu.o mmio.o psci.o hypercalls.o pvtime.o \
inject_fault.o va_layout.o handle_exit.o \
guest.o debug.o reset.o sys_regs.o stacktrace.o \
vgic-sys-reg-v3.o fpsimd.o pkvm.o \
- arch_timer.o trng.o vmid.o \
+ arch_timer.o trng.o vmid.o emulate-nested.o nested.o \
vgic/vgic.o vgic/vgic-init.o \
vgic/vgic-irqfd.o vgic/vgic-v2.o \
vgic/vgic-v3.o vgic/vgic-v4.o \
diff --git a/arch/arm64/kvm/arch_timer.c b/arch/arm64/kvm/arch_timer.c
index 23346585a294..00610477ec7b 100644
--- a/arch/arm64/kvm/arch_timer.c
+++ b/arch/arm64/kvm/arch_timer.c
@@ -428,14 +428,17 @@ static void timer_emulate(struct arch_timer_context *ctx)
* scheduled for the future. If the timer cannot fire at all,
* then we also don't need a soft timer.
*/
- if (!kvm_timer_irq_can_fire(ctx)) {
- soft_timer_cancel(&ctx->hrtimer);
+ if (should_fire || !kvm_timer_irq_can_fire(ctx))
return;
- }
soft_timer_start(&ctx->hrtimer, kvm_timer_compute_delta(ctx));
}
+static void set_cntvoff(u64 cntvoff)
+{
+ kvm_call_hyp(__kvm_timer_set_cntvoff, cntvoff);
+}
+
static void timer_save_state(struct arch_timer_context *ctx)
{
struct arch_timer_cpu *timer = vcpu_timer(ctx->vcpu);
@@ -459,6 +462,22 @@ static void timer_save_state(struct arch_timer_context *ctx)
write_sysreg_el0(0, SYS_CNTV_CTL);
isb();
+ /*
+ * The kernel may decide to run userspace after
+ * calling vcpu_put, so we reset cntvoff to 0 to
+ * ensure a consistent read between user accesses to
+ * the virtual counter and kernel access to the
+ * physical counter of non-VHE case.
+ *
+ * For VHE, the virtual counter uses a fixed virtual
+ * offset of zero, so no need to zero CNTVOFF_EL2
+ * register, but this is actually useful when switching
+ * between EL1/vEL2 with NV.
+ *
+ * Do it unconditionally, as this is either unavoidable
+ * or dirt cheap.
+ */
+ set_cntvoff(0);
break;
case TIMER_PTIMER:
timer_set_ctl(ctx, read_sysreg_el0(SYS_CNTP_CTL));
@@ -532,6 +551,7 @@ static void timer_restore_state(struct arch_timer_context *ctx)
switch (index) {
case TIMER_VTIMER:
+ set_cntvoff(timer_get_offset(ctx));
write_sysreg_el0(timer_get_cval(ctx), SYS_CNTV_CVAL);
isb();
write_sysreg_el0(timer_get_ctl(ctx), SYS_CNTV_CTL);
@@ -552,11 +572,6 @@ out:
local_irq_restore(flags);
}
-static void set_cntvoff(u64 cntvoff)
-{
- kvm_call_hyp(__kvm_timer_set_cntvoff, cntvoff);
-}
-
static inline void set_timer_irq_phys_active(struct arch_timer_context *ctx, bool active)
{
int r;
@@ -631,8 +646,6 @@ void kvm_timer_vcpu_load(struct kvm_vcpu *vcpu)
kvm_timer_vcpu_load_nogic(vcpu);
}
- set_cntvoff(timer_get_offset(map.direct_vtimer));
-
kvm_timer_unblocking(vcpu);
timer_restore_state(map.direct_vtimer);
@@ -688,15 +701,6 @@ void kvm_timer_vcpu_put(struct kvm_vcpu *vcpu)
if (kvm_vcpu_is_blocking(vcpu))
kvm_timer_blocking(vcpu);
-
- /*
- * The kernel may decide to run userspace after calling vcpu_put, so
- * we reset cntvoff to 0 to ensure a consistent read between user
- * accesses to the virtual counter and kernel access to the physical
- * counter of non-VHE case. For VHE, the virtual counter uses a fixed
- * virtual offset of zero, so no need to zero CNTVOFF_EL2 register.
- */
- set_cntvoff(0);
}
/*
@@ -934,14 +938,22 @@ u64 kvm_arm_timer_read_sysreg(struct kvm_vcpu *vcpu,
enum kvm_arch_timers tmr,
enum kvm_arch_timer_regs treg)
{
+ struct arch_timer_context *timer;
+ struct timer_map map;
u64 val;
+ get_timer_map(vcpu, &map);
+ timer = vcpu_get_timer(vcpu, tmr);
+
+ if (timer == map.emul_ptimer)
+ return kvm_arm_timer_read(vcpu, timer, treg);
+
preempt_disable();
- kvm_timer_vcpu_put(vcpu);
+ timer_save_state(timer);
- val = kvm_arm_timer_read(vcpu, vcpu_get_timer(vcpu, tmr), treg);
+ val = kvm_arm_timer_read(vcpu, timer, treg);
- kvm_timer_vcpu_load(vcpu);
+ timer_restore_state(timer);
preempt_enable();
return val;
@@ -975,13 +987,22 @@ void kvm_arm_timer_write_sysreg(struct kvm_vcpu *vcpu,
enum kvm_arch_timer_regs treg,
u64 val)
{
- preempt_disable();
- kvm_timer_vcpu_put(vcpu);
-
- kvm_arm_timer_write(vcpu, vcpu_get_timer(vcpu, tmr), treg, val);
+ struct arch_timer_context *timer;
+ struct timer_map map;
- kvm_timer_vcpu_load(vcpu);
- preempt_enable();
+ get_timer_map(vcpu, &map);
+ timer = vcpu_get_timer(vcpu, tmr);
+ if (timer == map.emul_ptimer) {
+ soft_timer_cancel(&timer->hrtimer);
+ kvm_arm_timer_write(vcpu, timer, treg, val);
+ timer_emulate(timer);
+ } else {
+ preempt_disable();
+ timer_save_state(timer);
+ kvm_arm_timer_write(vcpu, timer, treg, val);
+ timer_restore_state(timer);
+ preempt_enable();
+ }
}
static int timer_irq_set_vcpu_affinity(struct irq_data *d, void *vcpu)
diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
index 698787ed87e9..3bd732eaf087 100644
--- a/arch/arm64/kvm/arm.c
+++ b/arch/arm64/kvm/arm.c
@@ -136,7 +136,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
if (ret)
goto err_unshare_kvm;
- if (!zalloc_cpumask_var(&kvm->arch.supported_cpus, GFP_KERNEL)) {
+ if (!zalloc_cpumask_var(&kvm->arch.supported_cpus, GFP_KERNEL_ACCOUNT)) {
ret = -ENOMEM;
goto err_unshare_kvm;
}
@@ -1899,6 +1899,7 @@ static void kvm_hyp_init_symbols(void)
kvm_nvhe_sym(id_aa64mmfr0_el1_sys_val) = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1);
kvm_nvhe_sym(id_aa64mmfr1_el1_sys_val) = read_sanitised_ftr_reg(SYS_ID_AA64MMFR1_EL1);
kvm_nvhe_sym(id_aa64mmfr2_el1_sys_val) = read_sanitised_ftr_reg(SYS_ID_AA64MMFR2_EL1);
+ kvm_nvhe_sym(id_aa64smfr0_el1_sys_val) = read_sanitised_ftr_reg(SYS_ID_AA64SMFR0_EL1);
kvm_nvhe_sym(__icache_flags) = __icache_flags;
kvm_nvhe_sym(kvm_arm_vmid_bits) = kvm_arm_vmid_bits;
}
@@ -1921,9 +1922,7 @@ static int __init kvm_hyp_init_protection(u32 hyp_va_bits)
return 0;
}
-/**
- * Inits Hyp-mode on all online CPUs
- */
+/* Inits Hyp-mode on all online CPUs */
static int __init init_hyp_mode(void)
{
u32 hyp_va_bits;
@@ -2199,9 +2198,7 @@ void kvm_arch_irq_bypass_start(struct irq_bypass_consumer *cons)
kvm_arm_resume_guest(irqfd->kvm);
}
-/**
- * Initialize Hyp-mode and memory mappings on all CPUs.
- */
+/* Initialize Hyp-mode and memory mappings on all CPUs */
static __init int kvm_arm_init(void)
{
int err;
@@ -2325,6 +2322,11 @@ static int __init early_kvm_mode_cfg(char *arg)
return 0;
}
+ if (strcmp(arg, "nested") == 0 && !WARN_ON(!is_kernel_in_hyp_mode())) {
+ kvm_mode = KVM_MODE_NV;
+ return 0;
+ }
+
return -EINVAL;
}
early_param("kvm-arm.mode", early_kvm_mode_cfg);
diff --git a/arch/arm64/kvm/emulate-nested.c b/arch/arm64/kvm/emulate-nested.c
new file mode 100644
index 000000000000..b96662029fb1
--- /dev/null
+++ b/arch/arm64/kvm/emulate-nested.c
@@ -0,0 +1,203 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2016 - Linaro and Columbia University
+ * Author: Jintack Lim <jintack.lim@linaro.org>
+ */
+
+#include <linux/kvm.h>
+#include <linux/kvm_host.h>
+
+#include <asm/kvm_emulate.h>
+#include <asm/kvm_nested.h>
+
+#include "hyp/include/hyp/adjust_pc.h"
+
+#include "trace.h"
+
+static u64 kvm_check_illegal_exception_return(struct kvm_vcpu *vcpu, u64 spsr)
+{
+ u64 mode = spsr & PSR_MODE_MASK;
+
+ /*
+ * Possible causes for an Illegal Exception Return from EL2:
+ * - trying to return to EL3
+ * - trying to return to an illegal M value
+ * - trying to return to a 32bit EL
+ * - trying to return to EL1 with HCR_EL2.TGE set
+ */
+ if (mode == PSR_MODE_EL3t || mode == PSR_MODE_EL3h ||
+ mode == 0b00001 || (mode & BIT(1)) ||
+ (spsr & PSR_MODE32_BIT) ||
+ (vcpu_el2_tge_is_set(vcpu) && (mode == PSR_MODE_EL1t ||
+ mode == PSR_MODE_EL1h))) {
+ /*
+ * The guest is playing with our nerves. Preserve EL, SP,
+ * masks, flags from the existing PSTATE, and set IL.
+ * The HW will then generate an Illegal State Exception
+ * immediately after ERET.
+ */
+ spsr = *vcpu_cpsr(vcpu);
+
+ spsr &= (PSR_D_BIT | PSR_A_BIT | PSR_I_BIT | PSR_F_BIT |
+ PSR_N_BIT | PSR_Z_BIT | PSR_C_BIT | PSR_V_BIT |
+ PSR_MODE_MASK | PSR_MODE32_BIT);
+ spsr |= PSR_IL_BIT;
+ }
+
+ return spsr;
+}
+
+void kvm_emulate_nested_eret(struct kvm_vcpu *vcpu)
+{
+ u64 spsr, elr, mode;
+ bool direct_eret;
+
+ /*
+ * Going through the whole put/load motions is a waste of time
+ * if this is a VHE guest hypervisor returning to its own
+ * userspace, or the hypervisor performing a local exception
+ * return. No need to save/restore registers, no need to
+ * switch S2 MMU. Just do the canonical ERET.
+ */
+ spsr = vcpu_read_sys_reg(vcpu, SPSR_EL2);
+ spsr = kvm_check_illegal_exception_return(vcpu, spsr);
+
+ mode = spsr & (PSR_MODE_MASK | PSR_MODE32_BIT);
+
+ direct_eret = (mode == PSR_MODE_EL0t &&
+ vcpu_el2_e2h_is_set(vcpu) &&
+ vcpu_el2_tge_is_set(vcpu));
+ direct_eret |= (mode == PSR_MODE_EL2h || mode == PSR_MODE_EL2t);
+
+ if (direct_eret) {
+ *vcpu_pc(vcpu) = vcpu_read_sys_reg(vcpu, ELR_EL2);
+ *vcpu_cpsr(vcpu) = spsr;
+ trace_kvm_nested_eret(vcpu, *vcpu_pc(vcpu), spsr);
+ return;
+ }
+
+ preempt_disable();
+ kvm_arch_vcpu_put(vcpu);
+
+ elr = __vcpu_sys_reg(vcpu, ELR_EL2);
+
+ trace_kvm_nested_eret(vcpu, elr, spsr);
+
+ /*
+ * Note that the current exception level is always the virtual EL2,
+ * since we set HCR_EL2.NV bit only when entering the virtual EL2.
+ */
+ *vcpu_pc(vcpu) = elr;
+ *vcpu_cpsr(vcpu) = spsr;
+
+ kvm_arch_vcpu_load(vcpu, smp_processor_id());
+ preempt_enable();
+}
+
+static void kvm_inject_el2_exception(struct kvm_vcpu *vcpu, u64 esr_el2,
+ enum exception_type type)
+{
+ trace_kvm_inject_nested_exception(vcpu, esr_el2, type);
+
+ switch (type) {
+ case except_type_sync:
+ kvm_pend_exception(vcpu, EXCEPT_AA64_EL2_SYNC);
+ vcpu_write_sys_reg(vcpu, esr_el2, ESR_EL2);
+ break;
+ case except_type_irq:
+ kvm_pend_exception(vcpu, EXCEPT_AA64_EL2_IRQ);
+ break;
+ default:
+ WARN_ONCE(1, "Unsupported EL2 exception injection %d\n", type);
+ }
+}
+
+/*
+ * Emulate taking an exception to EL2.
+ * See ARM ARM J8.1.2 AArch64.TakeException()
+ */
+static int kvm_inject_nested(struct kvm_vcpu *vcpu, u64 esr_el2,
+ enum exception_type type)
+{
+ u64 pstate, mode;
+ bool direct_inject;
+
+ if (!vcpu_has_nv(vcpu)) {
+ kvm_err("Unexpected call to %s for the non-nesting configuration\n",
+ __func__);
+ return -EINVAL;
+ }
+
+ /*
+ * As for ERET, we can avoid doing too much on the injection path by
+ * checking that we either took the exception from a VHE host
+ * userspace or from vEL2. In these cases, there is no change in
+ * translation regime (or anything else), so let's do as little as
+ * possible.
+ */
+ pstate = *vcpu_cpsr(vcpu);
+ mode = pstate & (PSR_MODE_MASK | PSR_MODE32_BIT);
+
+ direct_inject = (mode == PSR_MODE_EL0t &&
+ vcpu_el2_e2h_is_set(vcpu) &&
+ vcpu_el2_tge_is_set(vcpu));
+ direct_inject |= (mode == PSR_MODE_EL2h || mode == PSR_MODE_EL2t);
+
+ if (direct_inject) {
+ kvm_inject_el2_exception(vcpu, esr_el2, type);
+ return 1;
+ }
+
+ preempt_disable();
+
+ /*
+ * We may have an exception or PC update in the EL0/EL1 context.
+ * Commit it before entering EL2.
+ */
+ __kvm_adjust_pc(vcpu);
+
+ kvm_arch_vcpu_put(vcpu);
+
+ kvm_inject_el2_exception(vcpu, esr_el2, type);
+
+ /*
+ * A hard requirement is that a switch between EL1 and EL2
+ * contexts has to happen between a put/load, so that we can
+ * pick the correct timer and interrupt configuration, among
+ * other things.
+ *
+ * Make sure the exception actually took place before we load
+ * the new context.
+ */
+ __kvm_adjust_pc(vcpu);
+
+ kvm_arch_vcpu_load(vcpu, smp_processor_id());
+ preempt_enable();
+
+ return 1;
+}
+
+int kvm_inject_nested_sync(struct kvm_vcpu *vcpu, u64 esr_el2)
+{
+ return kvm_inject_nested(vcpu, esr_el2, except_type_sync);
+}
+
+int kvm_inject_nested_irq(struct kvm_vcpu *vcpu)
+{
+ /*
+ * Do not inject an irq if the:
+ * - Current exception level is EL2, and
+ * - virtual HCR_EL2.TGE == 0
+ * - virtual HCR_EL2.IMO == 0
+ *
+ * See Table D1-17 "Physical interrupt target and masking when EL3 is
+ * not implemented and EL2 is implemented" in ARM DDI 0487C.a.
+ */
+
+ if (vcpu_is_el2(vcpu) && !vcpu_el2_tge_is_set(vcpu) &&
+ !(__vcpu_sys_reg(vcpu, HCR_EL2) & HCR_IMO))
+ return 1;
+
+ /* esr_el2 value doesn't matter for exits due to irqs. */
+ return kvm_inject_nested(vcpu, 0, except_type_irq);
+}
diff --git a/arch/arm64/kvm/fpsimd.c b/arch/arm64/kvm/fpsimd.c
index 02dd7e9ebd39..1279949599b5 100644
--- a/arch/arm64/kvm/fpsimd.c
+++ b/arch/arm64/kvm/fpsimd.c
@@ -143,7 +143,7 @@ void kvm_arch_vcpu_ctxsync_fp(struct kvm_vcpu *vcpu)
fp_state.st = &vcpu->arch.ctxt.fp_regs;
fp_state.sve_state = vcpu->arch.sve_state;
fp_state.sve_vl = vcpu->arch.sve_max_vl;
- fp_state.za_state = NULL;
+ fp_state.sme_state = NULL;
fp_state.svcr = &vcpu->arch.svcr;
fp_state.fp_type = &vcpu->arch.fp_type;
@@ -184,6 +184,7 @@ void kvm_arch_vcpu_put_fp(struct kvm_vcpu *vcpu)
sysreg_clear_set(CPACR_EL1,
CPACR_EL1_SMEN_EL0EN,
CPACR_EL1_SMEN_EL1EN);
+ isb();
}
if (vcpu->arch.fp_state == FP_STATE_GUEST_OWNED) {
diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c
index cf4c495a4321..07444fa22888 100644
--- a/arch/arm64/kvm/guest.c
+++ b/arch/arm64/kvm/guest.c
@@ -24,6 +24,7 @@
#include <asm/fpsimd.h>
#include <asm/kvm.h>
#include <asm/kvm_emulate.h>
+#include <asm/kvm_nested.h>
#include <asm/sigcontext.h>
#include "trace.h"
@@ -253,6 +254,11 @@ static int set_core_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
if (!vcpu_el1_is_32bit(vcpu))
return -EINVAL;
break;
+ case PSR_MODE_EL2h:
+ case PSR_MODE_EL2t:
+ if (!vcpu_has_nv(vcpu))
+ return -EINVAL;
+ fallthrough;
case PSR_MODE_EL0t:
case PSR_MODE_EL1t:
case PSR_MODE_EL1h:
diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c
index e778eefcf214..a798c0b4d717 100644
--- a/arch/arm64/kvm/handle_exit.c
+++ b/arch/arm64/kvm/handle_exit.c
@@ -16,6 +16,7 @@
#include <asm/kvm_asm.h>
#include <asm/kvm_emulate.h>
#include <asm/kvm_mmu.h>
+#include <asm/kvm_nested.h>
#include <asm/debug-monitors.h>
#include <asm/stacktrace/nvhe.h>
#include <asm/traps.h>
@@ -41,6 +42,16 @@ static int handle_hvc(struct kvm_vcpu *vcpu)
kvm_vcpu_hvc_get_imm(vcpu));
vcpu->stat.hvc_exit_stat++;
+ /* Forward hvc instructions to the virtual EL2 if the guest has EL2. */
+ if (vcpu_has_nv(vcpu)) {
+ if (vcpu_read_sys_reg(vcpu, HCR_EL2) & HCR_HCD)
+ kvm_inject_undefined(vcpu);
+ else
+ kvm_inject_nested_sync(vcpu, kvm_vcpu_get_esr(vcpu));
+
+ return 1;
+ }
+
ret = kvm_hvc_call_handler(vcpu);
if (ret < 0) {
vcpu_set_reg(vcpu, 0, ~0UL);
@@ -52,6 +63,8 @@ static int handle_hvc(struct kvm_vcpu *vcpu)
static int handle_smc(struct kvm_vcpu *vcpu)
{
+ int ret;
+
/*
* "If an SMC instruction executed at Non-secure EL1 is
* trapped to EL2 because HCR_EL2.TSC is 1, the exception is a
@@ -59,10 +72,30 @@ static int handle_smc(struct kvm_vcpu *vcpu)
*
* We need to advance the PC after the trap, as it would
* otherwise return to the same address...
+ *
+ * Only handle SMCs from the virtual EL2 with an immediate of zero and
+ * skip it otherwise.
*/
- vcpu_set_reg(vcpu, 0, ~0UL);
+ if (!vcpu_is_el2(vcpu) || kvm_vcpu_hvc_get_imm(vcpu)) {
+ vcpu_set_reg(vcpu, 0, ~0UL);
+ kvm_incr_pc(vcpu);
+ return 1;
+ }
+
+ /*
+ * If imm is zero then it is likely an SMCCC call.
+ *
+ * Note that on ARMv8.3, even if EL3 is not implemented, SMC executed
+ * at Non-secure EL1 is trapped to EL2 if HCR_EL2.TSC==1, rather than
+ * being treated as UNDEFINED.
+ */
+ ret = kvm_hvc_call_handler(vcpu);
+ if (ret < 0)
+ vcpu_set_reg(vcpu, 0, ~0UL);
+
kvm_incr_pc(vcpu);
- return 1;
+
+ return ret;
}
/*
@@ -196,6 +229,15 @@ static int kvm_handle_ptrauth(struct kvm_vcpu *vcpu)
return 1;
}
+static int kvm_handle_eret(struct kvm_vcpu *vcpu)
+{
+ if (kvm_vcpu_get_esr(vcpu) & ESR_ELx_ERET_ISS_ERET)
+ return kvm_handle_ptrauth(vcpu);
+
+ kvm_emulate_nested_eret(vcpu);
+ return 1;
+}
+
static exit_handle_fn arm_exit_handlers[] = {
[0 ... ESR_ELx_EC_MAX] = kvm_handle_unknown_ec,
[ESR_ELx_EC_WFx] = kvm_handle_wfx,
@@ -211,6 +253,7 @@ static exit_handle_fn arm_exit_handlers[] = {
[ESR_ELx_EC_SMC64] = handle_smc,
[ESR_ELx_EC_SYS64] = kvm_handle_sys_reg,
[ESR_ELx_EC_SVE] = handle_sve,
+ [ESR_ELx_EC_ERET] = kvm_handle_eret,
[ESR_ELx_EC_IABT_LOW] = kvm_handle_guest_abort,
[ESR_ELx_EC_DABT_LOW] = kvm_handle_guest_abort,
[ESR_ELx_EC_SOFTSTP_LOW]= kvm_handle_guest_debug,
diff --git a/arch/arm64/kvm/hyp/exception.c b/arch/arm64/kvm/hyp/exception.c
index 791d3de76771..424a5107cddb 100644
--- a/arch/arm64/kvm/hyp/exception.c
+++ b/arch/arm64/kvm/hyp/exception.c
@@ -14,6 +14,7 @@
#include <linux/kvm_host.h>
#include <asm/kvm_emulate.h>
#include <asm/kvm_mmu.h>
+#include <asm/kvm_nested.h>
#if !defined (__KVM_NVHE_HYPERVISOR__) && !defined (__KVM_VHE_HYPERVISOR__)
#error Hypervisor code only!
@@ -23,7 +24,9 @@ static inline u64 __vcpu_read_sys_reg(const struct kvm_vcpu *vcpu, int reg)
{
u64 val;
- if (__vcpu_read_sys_reg_from_cpu(reg, &val))
+ if (unlikely(vcpu_has_nv(vcpu)))
+ return vcpu_read_sys_reg(vcpu, reg);
+ else if (__vcpu_read_sys_reg_from_cpu(reg, &val))
return val;
return __vcpu_sys_reg(vcpu, reg);
@@ -31,18 +34,25 @@ static inline u64 __vcpu_read_sys_reg(const struct kvm_vcpu *vcpu, int reg)
static inline void __vcpu_write_sys_reg(struct kvm_vcpu *vcpu, u64 val, int reg)
{
- if (__vcpu_write_sys_reg_to_cpu(val, reg))
- return;
-
- __vcpu_sys_reg(vcpu, reg) = val;
+ if (unlikely(vcpu_has_nv(vcpu)))
+ vcpu_write_sys_reg(vcpu, val, reg);
+ else if (!__vcpu_write_sys_reg_to_cpu(val, reg))
+ __vcpu_sys_reg(vcpu, reg) = val;
}
-static void __vcpu_write_spsr(struct kvm_vcpu *vcpu, u64 val)
+static void __vcpu_write_spsr(struct kvm_vcpu *vcpu, unsigned long target_mode,
+ u64 val)
{
- if (has_vhe())
+ if (unlikely(vcpu_has_nv(vcpu))) {
+ if (target_mode == PSR_MODE_EL1h)
+ vcpu_write_sys_reg(vcpu, val, SPSR_EL1);
+ else
+ vcpu_write_sys_reg(vcpu, val, SPSR_EL2);
+ } else if (has_vhe()) {
write_sysreg_el1(val, SYS_SPSR);
- else
+ } else {
__vcpu_sys_reg(vcpu, SPSR_EL1) = val;
+ }
}
static void __vcpu_write_spsr_abt(struct kvm_vcpu *vcpu, u64 val)
@@ -101,6 +111,11 @@ static void enter_exception64(struct kvm_vcpu *vcpu, unsigned long target_mode,
sctlr = __vcpu_read_sys_reg(vcpu, SCTLR_EL1);
__vcpu_write_sys_reg(vcpu, *vcpu_pc(vcpu), ELR_EL1);
break;
+ case PSR_MODE_EL2h:
+ vbar = __vcpu_read_sys_reg(vcpu, VBAR_EL2);
+ sctlr = __vcpu_read_sys_reg(vcpu, SCTLR_EL2);
+ __vcpu_write_sys_reg(vcpu, *vcpu_pc(vcpu), ELR_EL2);
+ break;
default:
/* Don't do that */
BUG();
@@ -153,7 +168,7 @@ static void enter_exception64(struct kvm_vcpu *vcpu, unsigned long target_mode,
new |= target_mode;
*vcpu_cpsr(vcpu) = new;
- __vcpu_write_spsr(vcpu, old);
+ __vcpu_write_spsr(vcpu, target_mode, old);
}
/*
@@ -323,11 +338,20 @@ static void kvm_inject_exception(struct kvm_vcpu *vcpu)
case unpack_vcpu_flag(EXCEPT_AA64_EL1_SYNC):
enter_exception64(vcpu, PSR_MODE_EL1h, except_type_sync);
break;
+
+ case unpack_vcpu_flag(EXCEPT_AA64_EL2_SYNC):
+ enter_exception64(vcpu, PSR_MODE_EL2h, except_type_sync);
+ break;
+
+ case unpack_vcpu_flag(EXCEPT_AA64_EL2_IRQ):
+ enter_exception64(vcpu, PSR_MODE_EL2h, except_type_irq);
+ break;
+
default:
/*
- * Only EL1_SYNC makes sense so far, EL2_{SYNC,IRQ}
- * will be implemented at some point. Everything
- * else gets silently ignored.
+ * Only EL1_SYNC and EL2_{SYNC,IRQ} makes
+ * sense so far. Everything else gets silently
+ * ignored.
*/
break;
}
diff --git a/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h b/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h
index baa5b9b3dde5..699ea1f8d409 100644
--- a/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h
+++ b/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h
@@ -39,7 +39,6 @@ static inline bool ctxt_has_mte(struct kvm_cpu_context *ctxt)
static inline void __sysreg_save_el1_state(struct kvm_cpu_context *ctxt)
{
- ctxt_sys_reg(ctxt, CSSELR_EL1) = read_sysreg(csselr_el1);
ctxt_sys_reg(ctxt, SCTLR_EL1) = read_sysreg_el1(SYS_SCTLR);
ctxt_sys_reg(ctxt, CPACR_EL1) = read_sysreg_el1(SYS_CPACR);
ctxt_sys_reg(ctxt, TTBR0_EL1) = read_sysreg_el1(SYS_TTBR0);
@@ -95,7 +94,6 @@ static inline void __sysreg_restore_user_state(struct kvm_cpu_context *ctxt)
static inline void __sysreg_restore_el1_state(struct kvm_cpu_context *ctxt)
{
write_sysreg(ctxt_sys_reg(ctxt, MPIDR_EL1), vmpidr_el2);
- write_sysreg(ctxt_sys_reg(ctxt, CSSELR_EL1), csselr_el1);
if (has_vhe() ||
!cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT)) {
@@ -156,9 +154,26 @@ static inline void __sysreg_restore_el1_state(struct kvm_cpu_context *ctxt)
write_sysreg_el1(ctxt_sys_reg(ctxt, SPSR_EL1), SYS_SPSR);
}
+/* Read the VCPU state's PSTATE, but translate (v)EL2 to EL1. */
+static inline u64 to_hw_pstate(const struct kvm_cpu_context *ctxt)
+{
+ u64 mode = ctxt->regs.pstate & (PSR_MODE_MASK | PSR_MODE32_BIT);
+
+ switch (mode) {
+ case PSR_MODE_EL2t:
+ mode = PSR_MODE_EL1t;
+ break;
+ case PSR_MODE_EL2h:
+ mode = PSR_MODE_EL1h;
+ break;
+ }
+
+ return (ctxt->regs.pstate & ~(PSR_MODE_MASK | PSR_MODE32_BIT)) | mode;
+}
+
static inline void __sysreg_restore_el2_return_state(struct kvm_cpu_context *ctxt)
{
- u64 pstate = ctxt->regs.pstate;
+ u64 pstate = to_hw_pstate(ctxt);
u64 mode = pstate & PSR_AA32_MODE_MASK;
/*
diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-init.S b/arch/arm64/kvm/hyp/nvhe/hyp-init.S
index c953fb4b9a13..a6d67c2bb5ae 100644
--- a/arch/arm64/kvm/hyp/nvhe/hyp-init.S
+++ b/arch/arm64/kvm/hyp/nvhe/hyp-init.S
@@ -183,6 +183,7 @@ SYM_CODE_START_LOCAL(__kvm_hyp_init_cpu)
/* Initialize EL2 CPU state to sane values. */
init_el2_state // Clobbers x0..x2
+ finalise_el2_state
/* Enable MMU, set vectors and stack. */
mov x0, x28
diff --git a/arch/arm64/kvm/hyp/nvhe/sys_regs.c b/arch/arm64/kvm/hyp/nvhe/sys_regs.c
index 0f9ac25afdf4..08d2b004f4b7 100644
--- a/arch/arm64/kvm/hyp/nvhe/sys_regs.c
+++ b/arch/arm64/kvm/hyp/nvhe/sys_regs.c
@@ -26,6 +26,7 @@ u64 id_aa64isar2_el1_sys_val;
u64 id_aa64mmfr0_el1_sys_val;
u64 id_aa64mmfr1_el1_sys_val;
u64 id_aa64mmfr2_el1_sys_val;
+u64 id_aa64smfr0_el1_sys_val;
/*
* Inject an unknown/undefined exception to an AArch64 guest while most of its
diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c
index b11cf2c618a6..3d61bd3e591d 100644
--- a/arch/arm64/kvm/hyp/pgtable.c
+++ b/arch/arm64/kvm/hyp/pgtable.c
@@ -168,6 +168,25 @@ static int kvm_pgtable_visitor_cb(struct kvm_pgtable_walk_data *data,
return walker->cb(ctx, visit);
}
+static bool kvm_pgtable_walk_continue(const struct kvm_pgtable_walker *walker,
+ int r)
+{
+ /*
+ * Visitor callbacks return EAGAIN when the conditions that led to a
+ * fault are no longer reflected in the page tables due to a race to
+ * update a PTE. In the context of a fault handler this is interpreted
+ * as a signal to retry guest execution.
+ *
+ * Ignore the return code altogether for walkers outside a fault handler
+ * (e.g. write protecting a range of memory) and chug along with the
+ * page table walk.
+ */
+ if (r == -EAGAIN)
+ return !(walker->flags & KVM_PGTABLE_WALK_HANDLE_FAULT);
+
+ return !r;
+}
+
static int __kvm_pgtable_walk(struct kvm_pgtable_walk_data *data,
struct kvm_pgtable_mm_ops *mm_ops, kvm_pteref_t pgtable, u32 level);
@@ -200,7 +219,7 @@ static inline int __kvm_pgtable_visit(struct kvm_pgtable_walk_data *data,
table = kvm_pte_table(ctx.old, level);
}
- if (ret)
+ if (!kvm_pgtable_walk_continue(data->walker, ret))
goto out;
if (!table) {
@@ -211,13 +230,16 @@ static inline int __kvm_pgtable_visit(struct kvm_pgtable_walk_data *data,
childp = (kvm_pteref_t)kvm_pte_follow(ctx.old, mm_ops);
ret = __kvm_pgtable_walk(data, mm_ops, childp, level + 1);
- if (ret)
+ if (!kvm_pgtable_walk_continue(data->walker, ret))
goto out;
if (ctx.flags & KVM_PGTABLE_WALK_TABLE_POST)
ret = kvm_pgtable_visitor_cb(data, &ctx, KVM_PGTABLE_WALK_TABLE_POST);
out:
+ if (kvm_pgtable_walk_continue(data->walker, ret))
+ return 0;
+
return ret;
}
@@ -584,12 +606,14 @@ u64 kvm_get_vtcr(u64 mmfr0, u64 mmfr1, u32 phys_shift)
lvls = 2;
vtcr |= VTCR_EL2_LVLS_TO_SL0(lvls);
+#ifdef CONFIG_ARM64_HW_AFDBM
/*
* Enable the Hardware Access Flag management, unconditionally
* on all CPUs. The features is RES0 on CPUs without the support
* and must be ignored by the CPUs.
*/
vtcr |= VTCR_EL2_HA;
+#endif /* CONFIG_ARM64_HW_AFDBM */
/* Set the vmid bits */
vtcr |= (get_vmid_bits(mmfr1) == 16) ?
@@ -1026,7 +1050,7 @@ static int stage2_attr_walker(const struct kvm_pgtable_visit_ctx *ctx,
struct kvm_pgtable_mm_ops *mm_ops = ctx->mm_ops;
if (!kvm_pte_valid(ctx->old))
- return 0;
+ return -EAGAIN;
data->level = ctx->level;
data->pte = pte;
@@ -1094,9 +1118,15 @@ int kvm_pgtable_stage2_wrprotect(struct kvm_pgtable *pgt, u64 addr, u64 size)
kvm_pte_t kvm_pgtable_stage2_mkyoung(struct kvm_pgtable *pgt, u64 addr)
{
kvm_pte_t pte = 0;
- stage2_update_leaf_attrs(pgt, addr, 1, KVM_PTE_LEAF_ATTR_LO_S2_AF, 0,
- &pte, NULL, 0);
- dsb(ishst);
+ int ret;
+
+ ret = stage2_update_leaf_attrs(pgt, addr, 1, KVM_PTE_LEAF_ATTR_LO_S2_AF, 0,
+ &pte, NULL,
+ KVM_PGTABLE_WALK_HANDLE_FAULT |
+ KVM_PGTABLE_WALK_SHARED);
+ if (!ret)
+ dsb(ishst);
+
return pte;
}
@@ -1141,6 +1171,7 @@ int kvm_pgtable_stage2_relax_perms(struct kvm_pgtable *pgt, u64 addr,
clr |= KVM_PTE_LEAF_ATTR_HI_S2_XN;
ret = stage2_update_leaf_attrs(pgt, addr, 1, set, clr, NULL, &level,
+ KVM_PGTABLE_WALK_HANDLE_FAULT |
KVM_PGTABLE_WALK_SHARED);
if (!ret)
kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, pgt->mmu, addr, level);
diff --git a/arch/arm64/kvm/hyp/vhe/switch.c b/arch/arm64/kvm/hyp/vhe/switch.c
index 1a97391fedd2..cd3f3117bf16 100644
--- a/arch/arm64/kvm/hyp/vhe/switch.c
+++ b/arch/arm64/kvm/hyp/vhe/switch.c
@@ -40,7 +40,7 @@ static void __activate_traps(struct kvm_vcpu *vcpu)
___activate_traps(vcpu);
val = read_sysreg(cpacr_el1);
- val |= CPACR_EL1_TTA;
+ val |= CPACR_ELx_TTA;
val &= ~(CPACR_EL1_ZEN_EL0EN | CPACR_EL1_ZEN_EL1EN |
CPACR_EL1_SMEN_EL0EN | CPACR_EL1_SMEN_EL1EN);
@@ -120,6 +120,25 @@ static const exit_handler_fn *kvm_get_exit_handler_array(struct kvm_vcpu *vcpu)
static void early_exit_filter(struct kvm_vcpu *vcpu, u64 *exit_code)
{
+ /*
+ * If we were in HYP context on entry, adjust the PSTATE view
+ * so that the usual helpers work correctly.
+ */
+ if (unlikely(vcpu_get_flag(vcpu, VCPU_HYP_CONTEXT))) {
+ u64 mode = *vcpu_cpsr(vcpu) & (PSR_MODE_MASK | PSR_MODE32_BIT);
+
+ switch (mode) {
+ case PSR_MODE_EL1t:
+ mode = PSR_MODE_EL2t;
+ break;
+ case PSR_MODE_EL1h:
+ mode = PSR_MODE_EL2h;
+ break;
+ }
+
+ *vcpu_cpsr(vcpu) &= ~(PSR_MODE_MASK | PSR_MODE32_BIT);
+ *vcpu_cpsr(vcpu) |= mode;
+ }
}
/* Switch to the guest for VHE systems running in EL2 */
@@ -154,6 +173,11 @@ static int __kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu)
sysreg_restore_guest_state_vhe(guest_ctxt);
__debug_switch_to_guest(vcpu);
+ if (is_hyp_ctxt(vcpu))
+ vcpu_set_flag(vcpu, VCPU_HYP_CONTEXT);
+ else
+ vcpu_clear_flag(vcpu, VCPU_HYP_CONTEXT);
+
do {
/* Jump in the fire! */
exit_code = __guest_enter(vcpu);
diff --git a/arch/arm64/kvm/hypercalls.c b/arch/arm64/kvm/hypercalls.c
index c9f401fa01a9..64c086c02c60 100644
--- a/arch/arm64/kvm/hypercalls.c
+++ b/arch/arm64/kvm/hypercalls.c
@@ -198,7 +198,7 @@ int kvm_hvc_call_handler(struct kvm_vcpu *vcpu)
break;
case ARM_SMCCC_HV_PV_TIME_ST:
gpa = kvm_init_stolen_time(vcpu);
- if (gpa != GPA_INVALID)
+ if (gpa != INVALID_GPA)
val[0] = gpa;
break;
case ARM_SMCCC_VENDOR_HYP_CALL_UID_FUNC_ID:
diff --git a/arch/arm64/kvm/inject_fault.c b/arch/arm64/kvm/inject_fault.c
index f32f4a2a347f..64c3aec0d937 100644
--- a/arch/arm64/kvm/inject_fault.c
+++ b/arch/arm64/kvm/inject_fault.c
@@ -12,17 +12,55 @@
#include <linux/kvm_host.h>
#include <asm/kvm_emulate.h>
+#include <asm/kvm_nested.h>
#include <asm/esr.h>
+static void pend_sync_exception(struct kvm_vcpu *vcpu)
+{
+ /* If not nesting, EL1 is the only possible exception target */
+ if (likely(!vcpu_has_nv(vcpu))) {
+ kvm_pend_exception(vcpu, EXCEPT_AA64_EL1_SYNC);
+ return;
+ }
+
+ /*
+ * With NV, we need to pick between EL1 and EL2. Note that we
+ * never deal with a nesting exception here, hence never
+ * changing context, and the exception itself can be delayed
+ * until the next entry.
+ */
+ switch(*vcpu_cpsr(vcpu) & PSR_MODE_MASK) {
+ case PSR_MODE_EL2h:
+ case PSR_MODE_EL2t:
+ kvm_pend_exception(vcpu, EXCEPT_AA64_EL2_SYNC);
+ break;
+ case PSR_MODE_EL1h:
+ case PSR_MODE_EL1t:
+ kvm_pend_exception(vcpu, EXCEPT_AA64_EL1_SYNC);
+ break;
+ case PSR_MODE_EL0t:
+ if (vcpu_el2_tge_is_set(vcpu))
+ kvm_pend_exception(vcpu, EXCEPT_AA64_EL2_SYNC);
+ else
+ kvm_pend_exception(vcpu, EXCEPT_AA64_EL1_SYNC);
+ break;
+ default:
+ BUG();
+ }
+}
+
+static bool match_target_el(struct kvm_vcpu *vcpu, unsigned long target)
+{
+ return (vcpu_get_flag(vcpu, EXCEPT_MASK) == target);
+}
+
static void inject_abt64(struct kvm_vcpu *vcpu, bool is_iabt, unsigned long addr)
{
unsigned long cpsr = *vcpu_cpsr(vcpu);
bool is_aarch32 = vcpu_mode_is_32bit(vcpu);
u64 esr = 0;
- kvm_pend_exception(vcpu, EXCEPT_AA64_EL1_SYNC);
-
- vcpu_write_sys_reg(vcpu, addr, FAR_EL1);
+ pend_sync_exception(vcpu);
/*
* Build an {i,d}abort, depending on the level and the
@@ -43,14 +81,22 @@ static void inject_abt64(struct kvm_vcpu *vcpu, bool is_iabt, unsigned long addr
if (!is_iabt)
esr |= ESR_ELx_EC_DABT_LOW << ESR_ELx_EC_SHIFT;
- vcpu_write_sys_reg(vcpu, esr | ESR_ELx_FSC_EXTABT, ESR_EL1);
+ esr |= ESR_ELx_FSC_EXTABT;
+
+ if (match_target_el(vcpu, unpack_vcpu_flag(EXCEPT_AA64_EL1_SYNC))) {
+ vcpu_write_sys_reg(vcpu, addr, FAR_EL1);
+ vcpu_write_sys_reg(vcpu, esr, ESR_EL1);
+ } else {
+ vcpu_write_sys_reg(vcpu, addr, FAR_EL2);
+ vcpu_write_sys_reg(vcpu, esr, ESR_EL2);
+ }
}
static void inject_undef64(struct kvm_vcpu *vcpu)
{
u64 esr = (ESR_ELx_EC_UNKNOWN << ESR_ELx_EC_SHIFT);
- kvm_pend_exception(vcpu, EXCEPT_AA64_EL1_SYNC);
+ pend_sync_exception(vcpu);
/*
* Build an unknown exception, depending on the instruction
@@ -59,7 +105,10 @@ static void inject_undef64(struct kvm_vcpu *vcpu)
if (kvm_vcpu_trap_il_is32bit(vcpu))
esr |= ESR_ELx_IL;
- vcpu_write_sys_reg(vcpu, esr, ESR_EL1);
+ if (match_target_el(vcpu, unpack_vcpu_flag(EXCEPT_AA64_EL1_SYNC)))
+ vcpu_write_sys_reg(vcpu, esr, ESR_EL1);
+ else
+ vcpu_write_sys_reg(vcpu, esr, ESR_EL2);
}
#define DFSR_FSC_EXTABT_LPAE 0x10
diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c
index 01352f5838a0..7113587222ff 100644
--- a/arch/arm64/kvm/mmu.c
+++ b/arch/arm64/kvm/mmu.c
@@ -46,16 +46,17 @@ static phys_addr_t stage2_range_addr_end(phys_addr_t addr, phys_addr_t end)
* long will also starve other vCPUs. We have to also make sure that the page
* tables are not freed while we released the lock.
*/
-static int stage2_apply_range(struct kvm *kvm, phys_addr_t addr,
+static int stage2_apply_range(struct kvm_s2_mmu *mmu, phys_addr_t addr,
phys_addr_t end,
int (*fn)(struct kvm_pgtable *, u64, u64),
bool resched)
{
+ struct kvm *kvm = kvm_s2_mmu_to_kvm(mmu);
int ret;
u64 next;
do {
- struct kvm_pgtable *pgt = kvm->arch.mmu.pgt;
+ struct kvm_pgtable *pgt = mmu->pgt;
if (!pgt)
return -EINVAL;
@@ -71,8 +72,8 @@ static int stage2_apply_range(struct kvm *kvm, phys_addr_t addr,
return ret;
}
-#define stage2_apply_range_resched(kvm, addr, end, fn) \
- stage2_apply_range(kvm, addr, end, fn, true)
+#define stage2_apply_range_resched(mmu, addr, end, fn) \
+ stage2_apply_range(mmu, addr, end, fn, true)
static bool memslot_is_logging(struct kvm_memory_slot *memslot)
{
@@ -235,7 +236,7 @@ static void __unmap_stage2_range(struct kvm_s2_mmu *mmu, phys_addr_t start, u64
lockdep_assert_held_write(&kvm->mmu_lock);
WARN_ON(size & ~PAGE_MASK);
- WARN_ON(stage2_apply_range(kvm, start, end, kvm_pgtable_stage2_unmap,
+ WARN_ON(stage2_apply_range(mmu, start, end, kvm_pgtable_stage2_unmap,
may_block));
}
@@ -250,7 +251,7 @@ static void stage2_flush_memslot(struct kvm *kvm,
phys_addr_t addr = memslot->base_gfn << PAGE_SHIFT;
phys_addr_t end = addr + PAGE_SIZE * memslot->npages;
- stage2_apply_range_resched(kvm, addr, end, kvm_pgtable_stage2_flush);
+ stage2_apply_range_resched(&kvm->arch.mmu, addr, end, kvm_pgtable_stage2_flush);
}
/**
@@ -934,8 +935,7 @@ int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa,
*/
static void stage2_wp_range(struct kvm_s2_mmu *mmu, phys_addr_t addr, phys_addr_t end)
{
- struct kvm *kvm = kvm_s2_mmu_to_kvm(mmu);
- stage2_apply_range_resched(kvm, addr, end, kvm_pgtable_stage2_wrprotect);
+ stage2_apply_range_resched(mmu, addr, end, kvm_pgtable_stage2_wrprotect);
}
/**
@@ -1383,7 +1383,9 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
else
ret = kvm_pgtable_stage2_map(pgt, fault_ipa, vma_pagesize,
__pfn_to_phys(pfn), prot,
- memcache, KVM_PGTABLE_WALK_SHARED);
+ memcache,
+ KVM_PGTABLE_WALK_HANDLE_FAULT |
+ KVM_PGTABLE_WALK_SHARED);
/* Mark the page dirty only if the fault is handled successfully */
if (writable && !ret) {
@@ -1401,20 +1403,18 @@ out_unlock:
/* Resolve the access fault by making the page young again. */
static void handle_access_fault(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa)
{
- pte_t pte;
- kvm_pte_t kpte;
+ kvm_pte_t pte;
struct kvm_s2_mmu *mmu;
trace_kvm_access_fault(fault_ipa);
- write_lock(&vcpu->kvm->mmu_lock);
+ read_lock(&vcpu->kvm->mmu_lock);
mmu = vcpu->arch.hw_mmu;
- kpte = kvm_pgtable_stage2_mkyoung(mmu->pgt, fault_ipa);
- write_unlock(&vcpu->kvm->mmu_lock);
+ pte = kvm_pgtable_stage2_mkyoung(mmu->pgt, fault_ipa);
+ read_unlock(&vcpu->kvm->mmu_lock);
- pte = __pte(kpte);
- if (pte_valid(pte))
- kvm_set_pfn_accessed(pte_pfn(pte));
+ if (kvm_pte_valid(pte))
+ kvm_set_pfn_accessed(kvm_pte_to_pfn(pte));
}
/**
diff --git a/arch/arm64/kvm/nested.c b/arch/arm64/kvm/nested.c
new file mode 100644
index 000000000000..315354d27978
--- /dev/null
+++ b/arch/arm64/kvm/nested.c
@@ -0,0 +1,161 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2017 - Columbia University and Linaro Ltd.
+ * Author: Jintack Lim <jintack.lim@linaro.org>
+ */
+
+#include <linux/kvm.h>
+#include <linux/kvm_host.h>
+
+#include <asm/kvm_emulate.h>
+#include <asm/kvm_nested.h>
+#include <asm/sysreg.h>
+
+#include "sys_regs.h"
+
+/* Protection against the sysreg repainting madness... */
+#define NV_FTR(r, f) ID_AA64##r##_EL1_##f
+
+/*
+ * Our emulated CPU doesn't support all the possible features. For the
+ * sake of simplicity (and probably mental sanity), wipe out a number
+ * of feature bits we don't intend to support for the time being.
+ * This list should get updated as new features get added to the NV
+ * support, and new extension to the architecture.
+ */
+void access_nested_id_reg(struct kvm_vcpu *v, struct sys_reg_params *p,
+ const struct sys_reg_desc *r)
+{
+ u32 id = reg_to_encoding(r);
+ u64 val, tmp;
+
+ val = p->regval;
+
+ switch (id) {
+ case SYS_ID_AA64ISAR0_EL1:
+ /* Support everything but TME, O.S. and Range TLBIs */
+ val &= ~(NV_FTR(ISAR0, TLB) |
+ NV_FTR(ISAR0, TME));
+ break;
+
+ case SYS_ID_AA64ISAR1_EL1:
+ /* Support everything but PtrAuth and Spec Invalidation */
+ val &= ~(GENMASK_ULL(63, 56) |
+ NV_FTR(ISAR1, SPECRES) |
+ NV_FTR(ISAR1, GPI) |
+ NV_FTR(ISAR1, GPA) |
+ NV_FTR(ISAR1, API) |
+ NV_FTR(ISAR1, APA));
+ break;
+
+ case SYS_ID_AA64PFR0_EL1:
+ /* No AMU, MPAM, S-EL2, RAS or SVE */
+ val &= ~(GENMASK_ULL(55, 52) |
+ NV_FTR(PFR0, AMU) |
+ NV_FTR(PFR0, MPAM) |
+ NV_FTR(PFR0, SEL2) |
+ NV_FTR(PFR0, RAS) |
+ NV_FTR(PFR0, SVE) |
+ NV_FTR(PFR0, EL3) |
+ NV_FTR(PFR0, EL2) |
+ NV_FTR(PFR0, EL1));
+ /* 64bit EL1/EL2/EL3 only */
+ val |= FIELD_PREP(NV_FTR(PFR0, EL1), 0b0001);
+ val |= FIELD_PREP(NV_FTR(PFR0, EL2), 0b0001);
+ val |= FIELD_PREP(NV_FTR(PFR0, EL3), 0b0001);
+ break;
+
+ case SYS_ID_AA64PFR1_EL1:
+ /* Only support SSBS */
+ val &= NV_FTR(PFR1, SSBS);
+ break;
+
+ case SYS_ID_AA64MMFR0_EL1:
+ /* Hide ECV, FGT, ExS, Secure Memory */
+ val &= ~(GENMASK_ULL(63, 43) |
+ NV_FTR(MMFR0, TGRAN4_2) |
+ NV_FTR(MMFR0, TGRAN16_2) |
+ NV_FTR(MMFR0, TGRAN64_2) |
+ NV_FTR(MMFR0, SNSMEM));
+
+ /* Disallow unsupported S2 page sizes */
+ switch (PAGE_SIZE) {
+ case SZ_64K:
+ val |= FIELD_PREP(NV_FTR(MMFR0, TGRAN16_2), 0b0001);
+ fallthrough;
+ case SZ_16K:
+ val |= FIELD_PREP(NV_FTR(MMFR0, TGRAN4_2), 0b0001);
+ fallthrough;
+ case SZ_4K:
+ /* Support everything */
+ break;
+ }
+ /*
+ * Since we can't support a guest S2 page size smaller than
+ * the host's own page size (due to KVM only populating its
+ * own S2 using the kernel's page size), advertise the
+ * limitation using FEAT_GTG.
+ */
+ switch (PAGE_SIZE) {
+ case SZ_4K:
+ val |= FIELD_PREP(NV_FTR(MMFR0, TGRAN4_2), 0b0010);
+ fallthrough;
+ case SZ_16K:
+ val |= FIELD_PREP(NV_FTR(MMFR0, TGRAN16_2), 0b0010);
+ fallthrough;
+ case SZ_64K:
+ val |= FIELD_PREP(NV_FTR(MMFR0, TGRAN64_2), 0b0010);
+ break;
+ }
+ /* Cap PARange to 48bits */
+ tmp = FIELD_GET(NV_FTR(MMFR0, PARANGE), val);
+ if (tmp > 0b0101) {
+ val &= ~NV_FTR(MMFR0, PARANGE);
+ val |= FIELD_PREP(NV_FTR(MMFR0, PARANGE), 0b0101);
+ }
+ break;
+
+ case SYS_ID_AA64MMFR1_EL1:
+ val &= (NV_FTR(MMFR1, PAN) |
+ NV_FTR(MMFR1, LO) |
+ NV_FTR(MMFR1, HPDS) |
+ NV_FTR(MMFR1, VH) |
+ NV_FTR(MMFR1, VMIDBits));
+ break;
+
+ case SYS_ID_AA64MMFR2_EL1:
+ val &= ~(NV_FTR(MMFR2, EVT) |
+ NV_FTR(MMFR2, BBM) |
+ NV_FTR(MMFR2, TTL) |
+ GENMASK_ULL(47, 44) |
+ NV_FTR(MMFR2, ST) |
+ NV_FTR(MMFR2, CCIDX) |
+ NV_FTR(MMFR2, VARange));
+
+ /* Force TTL support */
+ val |= FIELD_PREP(NV_FTR(MMFR2, TTL), 0b0001);
+ break;
+
+ case SYS_ID_AA64DFR0_EL1:
+ /* Only limited support for PMU, Debug, BPs and WPs */
+ val &= (NV_FTR(DFR0, PMUVer) |
+ NV_FTR(DFR0, WRPs) |
+ NV_FTR(DFR0, BRPs) |
+ NV_FTR(DFR0, DebugVer));
+
+ /* Cap Debug to ARMv8.1 */
+ tmp = FIELD_GET(NV_FTR(DFR0, DebugVer), val);
+ if (tmp > 0b0111) {
+ val &= ~NV_FTR(DFR0, DebugVer);
+ val |= FIELD_PREP(NV_FTR(DFR0, DebugVer), 0b0111);
+ }
+ break;
+
+ default:
+ /* Unknown register, just wipe it clean */
+ val = 0;
+ break;
+ }
+
+ p->regval = val;
+}
diff --git a/arch/arm64/kvm/pvtime.c b/arch/arm64/kvm/pvtime.c
index 78a09f7a6637..4ceabaa4c30b 100644
--- a/arch/arm64/kvm/pvtime.c
+++ b/arch/arm64/kvm/pvtime.c
@@ -19,7 +19,7 @@ void kvm_update_stolen_time(struct kvm_vcpu *vcpu)
u64 steal = 0;
int idx;
- if (base == GPA_INVALID)
+ if (base == INVALID_GPA)
return;
idx = srcu_read_lock(&kvm->srcu);
@@ -40,7 +40,7 @@ long kvm_hypercall_pv_features(struct kvm_vcpu *vcpu)
switch (feature) {
case ARM_SMCCC_HV_PV_TIME_FEATURES:
case ARM_SMCCC_HV_PV_TIME_ST:
- if (vcpu->arch.steal.base != GPA_INVALID)
+ if (vcpu->arch.steal.base != INVALID_GPA)
val = SMCCC_RET_SUCCESS;
break;
}
@@ -54,7 +54,7 @@ gpa_t kvm_init_stolen_time(struct kvm_vcpu *vcpu)
struct kvm *kvm = vcpu->kvm;
u64 base = vcpu->arch.steal.base;
- if (base == GPA_INVALID)
+ if (base == INVALID_GPA)
return base;
/*
@@ -89,7 +89,7 @@ int kvm_arm_pvtime_set_attr(struct kvm_vcpu *vcpu,
return -EFAULT;
if (!IS_ALIGNED(ipa, 64))
return -EINVAL;
- if (vcpu->arch.steal.base != GPA_INVALID)
+ if (vcpu->arch.steal.base != INVALID_GPA)
return -EEXIST;
/* Check the address is in a valid memslot */
diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c
index 2bc74739a6df..49a3257dec46 100644
--- a/arch/arm64/kvm/reset.c
+++ b/arch/arm64/kvm/reset.c
@@ -27,6 +27,7 @@
#include <asm/kvm_asm.h>
#include <asm/kvm_emulate.h>
#include <asm/kvm_mmu.h>
+#include <asm/kvm_nested.h>
#include <asm/virt.h>
/* Maximum phys_shift supported for any VM on this host */
@@ -38,6 +39,9 @@ static u32 __ro_after_init kvm_ipa_limit;
#define VCPU_RESET_PSTATE_EL1 (PSR_MODE_EL1h | PSR_A_BIT | PSR_I_BIT | \
PSR_F_BIT | PSR_D_BIT)
+#define VCPU_RESET_PSTATE_EL2 (PSR_MODE_EL2h | PSR_A_BIT | PSR_I_BIT | \
+ PSR_F_BIT | PSR_D_BIT)
+
#define VCPU_RESET_PSTATE_SVC (PSR_AA32_MODE_SVC | PSR_AA32_A_BIT | \
PSR_AA32_I_BIT | PSR_AA32_F_BIT)
@@ -157,6 +161,7 @@ void kvm_arm_vcpu_destroy(struct kvm_vcpu *vcpu)
if (sve_state)
kvm_unshare_hyp(sve_state, sve_state + vcpu_sve_state_size(vcpu));
kfree(sve_state);
+ kfree(vcpu->arch.ccsidr);
}
static void kvm_vcpu_reset_sve(struct kvm_vcpu *vcpu)
@@ -220,6 +225,10 @@ static int kvm_set_vm_width(struct kvm_vcpu *vcpu)
if (kvm_has_mte(kvm) && is32bit)
return -EINVAL;
+ /* NV is incompatible with AArch32 */
+ if (vcpu_has_nv(vcpu) && is32bit)
+ return -EINVAL;
+
if (is32bit)
set_bit(KVM_ARCH_FLAG_EL1_32BIT, &kvm->arch.flags);
@@ -272,6 +281,12 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu)
if (loaded)
kvm_arch_vcpu_put(vcpu);
+ /* Disallow NV+SVE for the time being */
+ if (vcpu_has_nv(vcpu) && vcpu_has_feature(vcpu, KVM_ARM_VCPU_SVE)) {
+ ret = -EINVAL;
+ goto out;
+ }
+
if (!kvm_arm_vcpu_sve_finalized(vcpu)) {
if (test_bit(KVM_ARM_VCPU_SVE, vcpu->arch.features)) {
ret = kvm_vcpu_enable_sve(vcpu);
@@ -294,6 +309,8 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu)
default:
if (vcpu_el1_is_32bit(vcpu)) {
pstate = VCPU_RESET_PSTATE_SVC;
+ } else if (vcpu_has_nv(vcpu)) {
+ pstate = VCPU_RESET_PSTATE_EL2;
} else {
pstate = VCPU_RESET_PSTATE_EL1;
}
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index 46d161fe08d3..53749d3a0996 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -11,6 +11,7 @@
#include <linux/bitfield.h>
#include <linux/bsearch.h>
+#include <linux/cacheinfo.h>
#include <linux/kvm_host.h>
#include <linux/mm.h>
#include <linux/printk.h>
@@ -24,6 +25,7 @@
#include <asm/kvm_emulate.h>
#include <asm/kvm_hyp.h>
#include <asm/kvm_mmu.h>
+#include <asm/kvm_nested.h>
#include <asm/perf_event.h>
#include <asm/sysreg.h>
@@ -78,28 +80,112 @@ void vcpu_write_sys_reg(struct kvm_vcpu *vcpu, u64 val, int reg)
__vcpu_write_sys_reg_to_cpu(val, reg))
return;
- __vcpu_sys_reg(vcpu, reg) = val;
+ __vcpu_sys_reg(vcpu, reg) = val;
}
-/* 3 bits per cache level, as per CLIDR, but non-existent caches always 0 */
-static u32 __ro_after_init cache_levels;
-
/* CSSELR values; used to index KVM_REG_ARM_DEMUX_ID_CCSIDR */
#define CSSELR_MAX 14
+/*
+ * Returns the minimum line size for the selected cache, expressed as
+ * Log2(bytes).
+ */
+static u8 get_min_cache_line_size(bool icache)
+{
+ u64 ctr = read_sanitised_ftr_reg(SYS_CTR_EL0);
+ u8 field;
+
+ if (icache)
+ field = SYS_FIELD_GET(CTR_EL0, IminLine, ctr);
+ else
+ field = SYS_FIELD_GET(CTR_EL0, DminLine, ctr);
+
+ /*
+ * Cache line size is represented as Log2(words) in CTR_EL0.
+ * Log2(bytes) can be derived with the following:
+ *
+ * Log2(words) + 2 = Log2(bytes / 4) + 2
+ * = Log2(bytes) - 2 + 2
+ * = Log2(bytes)
+ */
+ return field + 2;
+}
+
/* Which cache CCSIDR represents depends on CSSELR value. */
-static u32 get_ccsidr(u32 csselr)
+static u32 get_ccsidr(struct kvm_vcpu *vcpu, u32 csselr)
{
- u32 ccsidr;
+ u8 line_size;
- /* Make sure noone else changes CSSELR during this! */
- local_irq_disable();
- write_sysreg(csselr, csselr_el1);
- isb();
- ccsidr = read_sysreg(ccsidr_el1);
- local_irq_enable();
+ if (vcpu->arch.ccsidr)
+ return vcpu->arch.ccsidr[csselr];
- return ccsidr;
+ line_size = get_min_cache_line_size(csselr & CSSELR_EL1_InD);
+
+ /*
+ * Fabricate a CCSIDR value as the overriding value does not exist.
+ * The real CCSIDR value will not be used as it can vary by the
+ * physical CPU which the vcpu currently resides in.
+ *
+ * The line size is determined with get_min_cache_line_size(), which
+ * should be valid for all CPUs even if they have different cache
+ * configuration.
+ *
+ * The associativity bits are cleared, meaning the geometry of all data
+ * and unified caches (which are guaranteed to be PIPT and thus
+ * non-aliasing) are 1 set and 1 way.
+ * Guests should not be doing cache operations by set/way at all, and
+ * for this reason, we trap them and attempt to infer the intent, so
+ * that we can flush the entire guest's address space at the appropriate
+ * time. The exposed geometry minimizes the number of the traps.
+ * [If guests should attempt to infer aliasing properties from the
+ * geometry (which is not permitted by the architecture), they would
+ * only do so for virtually indexed caches.]
+ *
+ * We don't check if the cache level exists as it is allowed to return
+ * an UNKNOWN value if not.
+ */
+ return SYS_FIELD_PREP(CCSIDR_EL1, LineSize, line_size - 4);
+}
+
+static int set_ccsidr(struct kvm_vcpu *vcpu, u32 csselr, u32 val)
+{
+ u8 line_size = FIELD_GET(CCSIDR_EL1_LineSize, val) + 4;
+ u32 *ccsidr = vcpu->arch.ccsidr;
+ u32 i;
+
+ if ((val & CCSIDR_EL1_RES0) ||
+ line_size < get_min_cache_line_size(csselr & CSSELR_EL1_InD))
+ return -EINVAL;
+
+ if (!ccsidr) {
+ if (val == get_ccsidr(vcpu, csselr))
+ return 0;
+
+ ccsidr = kmalloc_array(CSSELR_MAX, sizeof(u32), GFP_KERNEL_ACCOUNT);
+ if (!ccsidr)
+ return -ENOMEM;
+
+ for (i = 0; i < CSSELR_MAX; i++)
+ ccsidr[i] = get_ccsidr(vcpu, i);
+
+ vcpu->arch.ccsidr = ccsidr;
+ }
+
+ ccsidr[csselr] = val;
+
+ return 0;
+}
+
+static bool access_rw(struct kvm_vcpu *vcpu,
+ struct sys_reg_params *p,
+ const struct sys_reg_desc *r)
+{
+ if (p->is_write)
+ vcpu_write_sys_reg(vcpu, p->regval, r->reg);
+ else
+ p->regval = vcpu_read_sys_reg(vcpu, r->reg);
+
+ return true;
}
/*
@@ -260,6 +346,14 @@ static bool trap_raz_wi(struct kvm_vcpu *vcpu,
return read_zero(vcpu, p);
}
+static bool trap_undef(struct kvm_vcpu *vcpu,
+ struct sys_reg_params *p,
+ const struct sys_reg_desc *r)
+{
+ kvm_inject_undefined(vcpu);
+ return false;
+}
+
/*
* ARMv8.1 mandates at least a trivial LORegion implementation, where all the
* RW registers are RES0 (which we can implement as RAZ/WI). On an ARMv8.0
@@ -370,12 +464,9 @@ static bool trap_debug_regs(struct kvm_vcpu *vcpu,
struct sys_reg_params *p,
const struct sys_reg_desc *r)
{
- if (p->is_write) {
- vcpu_write_sys_reg(vcpu, p->regval, r->reg);
+ access_rw(vcpu, p, r);
+ if (p->is_write)
vcpu_set_flag(vcpu, DEBUG_DIRTY);
- } else {
- p->regval = vcpu_read_sys_reg(vcpu, r->reg);
- }
trace_trap_reg(__func__, r->reg, p->is_write, p->regval);
@@ -1049,7 +1140,9 @@ static bool access_arch_timer(struct kvm_vcpu *vcpu,
treg = TIMER_REG_CVAL;
break;
default:
- BUG();
+ print_sys_reg_msg(p, "%s", "Unhandled trapped timer register");
+ kvm_inject_undefined(vcpu);
+ return false;
}
if (p->is_write)
@@ -1155,6 +1248,12 @@ static u64 read_id_reg(const struct kvm_vcpu *vcpu, struct sys_reg_desc const *r
val |= FIELD_PREP(ARM64_FEATURE_MASK(ID_DFR0_EL1_PerfMon),
pmuver_to_perfmon(vcpu_pmuver(vcpu)));
break;
+ case SYS_ID_AA64MMFR2_EL1:
+ val &= ~ID_AA64MMFR2_EL1_CCIDX_MASK;
+ break;
+ case SYS_ID_MMFR4_EL1:
+ val &= ~ARM64_FEATURE_MASK(ID_MMFR4_EL1_CCIDX);
+ break;
}
return val;
@@ -1205,6 +1304,9 @@ static bool access_id_reg(struct kvm_vcpu *vcpu,
return write_to_read_only(vcpu, p, r);
p->regval = read_id_reg(vcpu, r);
+ if (vcpu_has_nv(vcpu))
+ access_nested_id_reg(vcpu, p, r);
+
return true;
}
@@ -1385,10 +1487,78 @@ static bool access_clidr(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
if (p->is_write)
return write_to_read_only(vcpu, p, r);
- p->regval = read_sysreg(clidr_el1);
+ p->regval = __vcpu_sys_reg(vcpu, r->reg);
return true;
}
+/*
+ * Fabricate a CLIDR_EL1 value instead of using the real value, which can vary
+ * by the physical CPU which the vcpu currently resides in.
+ */
+static void reset_clidr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
+{
+ u64 ctr_el0 = read_sanitised_ftr_reg(SYS_CTR_EL0);
+ u64 clidr;
+ u8 loc;
+
+ if ((ctr_el0 & CTR_EL0_IDC)) {
+ /*
+ * Data cache clean to the PoU is not required so LoUU and LoUIS
+ * will not be set and a unified cache, which will be marked as
+ * LoC, will be added.
+ *
+ * If not DIC, let the unified cache L2 so that an instruction
+ * cache can be added as L1 later.
+ */
+ loc = (ctr_el0 & CTR_EL0_DIC) ? 1 : 2;
+ clidr = CACHE_TYPE_UNIFIED << CLIDR_CTYPE_SHIFT(loc);
+ } else {
+ /*
+ * Data cache clean to the PoU is required so let L1 have a data
+ * cache and mark it as LoUU and LoUIS. As L1 has a data cache,
+ * it can be marked as LoC too.
+ */
+ loc = 1;
+ clidr = 1 << CLIDR_LOUU_SHIFT;
+ clidr |= 1 << CLIDR_LOUIS_SHIFT;
+ clidr |= CACHE_TYPE_DATA << CLIDR_CTYPE_SHIFT(1);
+ }
+
+ /*
+ * Instruction cache invalidation to the PoU is required so let L1 have
+ * an instruction cache. If L1 already has a data cache, it will be
+ * CACHE_TYPE_SEPARATE.
+ */
+ if (!(ctr_el0 & CTR_EL0_DIC))
+ clidr |= CACHE_TYPE_INST << CLIDR_CTYPE_SHIFT(1);
+
+ clidr |= loc << CLIDR_LOC_SHIFT;
+
+ /*
+ * Add tag cache unified to data cache. Allocation tags and data are
+ * unified in a cache line so that it looks valid even if there is only
+ * one cache line.
+ */
+ if (kvm_has_mte(vcpu->kvm))
+ clidr |= 2 << CLIDR_TTYPE_SHIFT(loc);
+
+ __vcpu_sys_reg(vcpu, r->reg) = clidr;
+}
+
+static int set_clidr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
+ u64 val)
+{
+ u64 ctr_el0 = read_sanitised_ftr_reg(SYS_CTR_EL0);
+ u64 idc = !CLIDR_LOC(val) || (!CLIDR_LOUIS(val) && !CLIDR_LOUU(val));
+
+ if ((val & CLIDR_EL1_RES0) || (!(ctr_el0 & CTR_EL0_IDC) && idc))
+ return -EINVAL;
+
+ __vcpu_sys_reg(vcpu, rd->reg) = val;
+
+ return 0;
+}
+
static bool access_csselr(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
const struct sys_reg_desc *r)
{
@@ -1410,22 +1580,10 @@ static bool access_ccsidr(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
return write_to_read_only(vcpu, p, r);
csselr = vcpu_read_sys_reg(vcpu, CSSELR_EL1);
- p->regval = get_ccsidr(csselr);
+ csselr &= CSSELR_EL1_Level | CSSELR_EL1_InD;
+ if (csselr < CSSELR_MAX)
+ p->regval = get_ccsidr(vcpu, csselr);
- /*
- * Guests should not be doing cache operations by set/way at all, and
- * for this reason, we trap them and attempt to infer the intent, so
- * that we can flush the entire guest's address space at the appropriate
- * time.
- * To prevent this trapping from causing performance problems, let's
- * expose the geometry of all data and unified caches (which are
- * guaranteed to be PIPT and thus non-aliasing) as 1 set and 1 way.
- * [If guests should attempt to infer aliasing properties from the
- * geometry (which is not permitted by the architecture), they would
- * only do so for virtually indexed caches.]
- */
- if (!(csselr & 1)) // data or unified cache
- p->regval &= ~GENMASK(27, 3);
return true;
}
@@ -1446,6 +1604,44 @@ static unsigned int mte_visibility(const struct kvm_vcpu *vcpu,
.visibility = mte_visibility, \
}
+static unsigned int el2_visibility(const struct kvm_vcpu *vcpu,
+ const struct sys_reg_desc *rd)
+{
+ if (vcpu_has_nv(vcpu))
+ return 0;
+
+ return REG_HIDDEN;
+}
+
+#define EL2_REG(name, acc, rst, v) { \
+ SYS_DESC(SYS_##name), \
+ .access = acc, \
+ .reset = rst, \
+ .reg = name, \
+ .visibility = el2_visibility, \
+ .val = v, \
+}
+
+/*
+ * EL{0,1}2 registers are the EL2 view on an EL0 or EL1 register when
+ * HCR_EL2.E2H==1, and only in the sysreg table for convenience of
+ * handling traps. Given that, they are always hidden from userspace.
+ */
+static unsigned int elx2_visibility(const struct kvm_vcpu *vcpu,
+ const struct sys_reg_desc *rd)
+{
+ return REG_HIDDEN_USER;
+}
+
+#define EL12_REG(name, acc, rst, v) { \
+ SYS_DESC(SYS_##name##_EL12), \
+ .access = acc, \
+ .reset = rst, \
+ .reg = name##_EL1, \
+ .val = v, \
+ .visibility = elx2_visibility, \
+}
+
/* sys_reg_desc initialiser for known cpufeature ID registers */
#define ID_SANITISED(name) { \
SYS_DESC(SYS_##name), \
@@ -1490,6 +1686,42 @@ static unsigned int mte_visibility(const struct kvm_vcpu *vcpu,
.visibility = raz_visibility, \
}
+static bool access_sp_el1(struct kvm_vcpu *vcpu,
+ struct sys_reg_params *p,
+ const struct sys_reg_desc *r)
+{
+ if (p->is_write)
+ __vcpu_sys_reg(vcpu, SP_EL1) = p->regval;
+ else
+ p->regval = __vcpu_sys_reg(vcpu, SP_EL1);
+
+ return true;
+}
+
+static bool access_elr(struct kvm_vcpu *vcpu,
+ struct sys_reg_params *p,
+ const struct sys_reg_desc *r)
+{
+ if (p->is_write)
+ vcpu_write_sys_reg(vcpu, p->regval, ELR_EL1);
+ else
+ p->regval = vcpu_read_sys_reg(vcpu, ELR_EL1);
+
+ return true;
+}
+
+static bool access_spsr(struct kvm_vcpu *vcpu,
+ struct sys_reg_params *p,
+ const struct sys_reg_desc *r)
+{
+ if (p->is_write)
+ __vcpu_sys_reg(vcpu, SPSR_EL1) = p->regval;
+ else
+ p->regval = __vcpu_sys_reg(vcpu, SPSR_EL1);
+
+ return true;
+}
+
/*
* Architected system registers.
* Important: Must be sorted ascending by Op0, Op1, CRn, CRm, Op2
@@ -1646,6 +1878,9 @@ static const struct sys_reg_desc sys_reg_descs[] = {
PTRAUTH_KEY(APDB),
PTRAUTH_KEY(APGA),
+ { SYS_DESC(SYS_SPSR_EL1), access_spsr},
+ { SYS_DESC(SYS_ELR_EL1), access_elr},
+
{ SYS_DESC(SYS_AFSR0_EL1), access_vm_reg, reset_unknown, AFSR0_EL1 },
{ SYS_DESC(SYS_AFSR1_EL1), access_vm_reg, reset_unknown, AFSR1_EL1 },
{ SYS_DESC(SYS_ESR_EL1), access_vm_reg, reset_unknown, ESR_EL1 },
@@ -1693,7 +1928,7 @@ static const struct sys_reg_desc sys_reg_descs[] = {
{ SYS_DESC(SYS_LORC_EL1), trap_loregion },
{ SYS_DESC(SYS_LORID_EL1), trap_loregion },
- { SYS_DESC(SYS_VBAR_EL1), NULL, reset_val, VBAR_EL1, 0 },
+ { SYS_DESC(SYS_VBAR_EL1), access_rw, reset_val, VBAR_EL1, 0 },
{ SYS_DESC(SYS_DISR_EL1), NULL, reset_val, DISR_EL1, 0 },
{ SYS_DESC(SYS_ICC_IAR0_EL1), write_to_read_only },
@@ -1717,7 +1952,9 @@ static const struct sys_reg_desc sys_reg_descs[] = {
{ SYS_DESC(SYS_CNTKCTL_EL1), NULL, reset_val, CNTKCTL_EL1, 0},
{ SYS_DESC(SYS_CCSIDR_EL1), access_ccsidr },
- { SYS_DESC(SYS_CLIDR_EL1), access_clidr },
+ { SYS_DESC(SYS_CLIDR_EL1), access_clidr, reset_clidr, CLIDR_EL1,
+ .set_user = set_clidr },
+ { SYS_DESC(SYS_CCSIDR2_EL1), undef_access },
{ SYS_DESC(SYS_SMIDR_EL1), undef_access },
{ SYS_DESC(SYS_CSSELR_EL1), access_csselr, reset_unknown, CSSELR_EL1 },
{ SYS_DESC(SYS_CTR_EL0), access_ctr },
@@ -1913,9 +2150,67 @@ static const struct sys_reg_desc sys_reg_descs[] = {
{ PMU_SYS_REG(SYS_PMCCFILTR_EL0), .access = access_pmu_evtyper,
.reset = reset_val, .reg = PMCCFILTR_EL0, .val = 0 },
+ EL2_REG(VPIDR_EL2, access_rw, reset_unknown, 0),
+ EL2_REG(VMPIDR_EL2, access_rw, reset_unknown, 0),
+ EL2_REG(SCTLR_EL2, access_rw, reset_val, SCTLR_EL2_RES1),
+ EL2_REG(ACTLR_EL2, access_rw, reset_val, 0),
+ EL2_REG(HCR_EL2, access_rw, reset_val, 0),
+ EL2_REG(MDCR_EL2, access_rw, reset_val, 0),
+ EL2_REG(CPTR_EL2, access_rw, reset_val, CPTR_EL2_DEFAULT ),
+ EL2_REG(HSTR_EL2, access_rw, reset_val, 0),
+ EL2_REG(HACR_EL2, access_rw, reset_val, 0),
+
+ EL2_REG(TTBR0_EL2, access_rw, reset_val, 0),
+ EL2_REG(TTBR1_EL2, access_rw, reset_val, 0),
+ EL2_REG(TCR_EL2, access_rw, reset_val, TCR_EL2_RES1),
+ EL2_REG(VTTBR_EL2, access_rw, reset_val, 0),
+ EL2_REG(VTCR_EL2, access_rw, reset_val, 0),
+
{ SYS_DESC(SYS_DACR32_EL2), NULL, reset_unknown, DACR32_EL2 },
+ EL2_REG(SPSR_EL2, access_rw, reset_val, 0),
+ EL2_REG(ELR_EL2, access_rw, reset_val, 0),
+ { SYS_DESC(SYS_SP_EL1), access_sp_el1},
+
{ SYS_DESC(SYS_IFSR32_EL2), NULL, reset_unknown, IFSR32_EL2 },
+ EL2_REG(AFSR0_EL2, access_rw, reset_val, 0),
+ EL2_REG(AFSR1_EL2, access_rw, reset_val, 0),
+ EL2_REG(ESR_EL2, access_rw, reset_val, 0),
{ SYS_DESC(SYS_FPEXC32_EL2), NULL, reset_val, FPEXC32_EL2, 0x700 },
+
+ EL2_REG(FAR_EL2, access_rw, reset_val, 0),
+ EL2_REG(HPFAR_EL2, access_rw, reset_val, 0),
+
+ EL2_REG(MAIR_EL2, access_rw, reset_val, 0),
+ EL2_REG(AMAIR_EL2, access_rw, reset_val, 0),
+
+ EL2_REG(VBAR_EL2, access_rw, reset_val, 0),
+ EL2_REG(RVBAR_EL2, access_rw, reset_val, 0),
+ { SYS_DESC(SYS_RMR_EL2), trap_undef },
+
+ EL2_REG(CONTEXTIDR_EL2, access_rw, reset_val, 0),
+ EL2_REG(TPIDR_EL2, access_rw, reset_val, 0),
+
+ EL2_REG(CNTVOFF_EL2, access_rw, reset_val, 0),
+ EL2_REG(CNTHCTL_EL2, access_rw, reset_val, 0),
+
+ EL12_REG(SCTLR, access_vm_reg, reset_val, 0x00C50078),
+ EL12_REG(CPACR, access_rw, reset_val, 0),
+ EL12_REG(TTBR0, access_vm_reg, reset_unknown, 0),
+ EL12_REG(TTBR1, access_vm_reg, reset_unknown, 0),
+ EL12_REG(TCR, access_vm_reg, reset_val, 0),
+ { SYS_DESC(SYS_SPSR_EL12), access_spsr},
+ { SYS_DESC(SYS_ELR_EL12), access_elr},
+ EL12_REG(AFSR0, access_vm_reg, reset_unknown, 0),
+ EL12_REG(AFSR1, access_vm_reg, reset_unknown, 0),
+ EL12_REG(ESR, access_vm_reg, reset_unknown, 0),
+ EL12_REG(FAR, access_vm_reg, reset_unknown, 0),
+ EL12_REG(MAIR, access_vm_reg, reset_unknown, 0),
+ EL12_REG(AMAIR, access_vm_reg, reset_amair_el1, 0),
+ EL12_REG(VBAR, access_rw, reset_val, 0),
+ EL12_REG(CONTEXTIDR, access_vm_reg, reset_val, 0),
+ EL12_REG(CNTKCTL, access_rw, reset_val, 0),
+
+ EL2_REG(SP_EL2, NULL, reset_unknown, 0),
};
static bool trap_dbgdidr(struct kvm_vcpu *vcpu,
@@ -2219,6 +2514,10 @@ static const struct sys_reg_desc cp15_regs[] = {
{ Op1(1), CRn( 0), CRm( 0), Op2(0), access_ccsidr },
{ Op1(1), CRn( 0), CRm( 0), Op2(1), access_clidr },
+
+ /* CCSIDR2 */
+ { Op1(1), CRn( 0), CRm( 0), Op2(2), undef_access },
+
{ Op1(2), CRn( 0), CRm( 0), Op2(0), access_csselr, NULL, CSSELR_EL1 },
};
@@ -2724,7 +3023,6 @@ id_to_sys_reg_desc(struct kvm_vcpu *vcpu, u64 id,
FUNCTION_INVARIANT(midr_el1)
FUNCTION_INVARIANT(revidr_el1)
-FUNCTION_INVARIANT(clidr_el1)
FUNCTION_INVARIANT(aidr_el1)
static void get_ctr_el0(struct kvm_vcpu *v, const struct sys_reg_desc *r)
@@ -2736,7 +3034,6 @@ static void get_ctr_el0(struct kvm_vcpu *v, const struct sys_reg_desc *r)
static struct sys_reg_desc invariant_sys_regs[] __ro_after_init = {
{ SYS_DESC(SYS_MIDR_EL1), NULL, get_midr_el1 },
{ SYS_DESC(SYS_REVIDR_EL1), NULL, get_revidr_el1 },
- { SYS_DESC(SYS_CLIDR_EL1), NULL, get_clidr_el1 },
{ SYS_DESC(SYS_AIDR_EL1), NULL, get_aidr_el1 },
{ SYS_DESC(SYS_CTR_EL0), NULL, get_ctr_el0 },
};
@@ -2773,33 +3070,7 @@ static int set_invariant_sys_reg(u64 id, u64 __user *uaddr)
return 0;
}
-static bool is_valid_cache(u32 val)
-{
- u32 level, ctype;
-
- if (val >= CSSELR_MAX)
- return false;
-
- /* Bottom bit is Instruction or Data bit. Next 3 bits are level. */
- level = (val >> 1);
- ctype = (cache_levels >> (level * 3)) & 7;
-
- switch (ctype) {
- case 0: /* No cache */
- return false;
- case 1: /* Instruction cache only */
- return (val & 1);
- case 2: /* Data cache only */
- case 4: /* Unified cache */
- return !(val & 1);
- case 3: /* Separate instruction and data caches */
- return true;
- default: /* Reserved: we can't know instruction or data. */
- return false;
- }
-}
-
-static int demux_c15_get(u64 id, void __user *uaddr)
+static int demux_c15_get(struct kvm_vcpu *vcpu, u64 id, void __user *uaddr)
{
u32 val;
u32 __user *uval = uaddr;
@@ -2815,16 +3086,16 @@ static int demux_c15_get(u64 id, void __user *uaddr)
return -ENOENT;
val = (id & KVM_REG_ARM_DEMUX_VAL_MASK)
>> KVM_REG_ARM_DEMUX_VAL_SHIFT;
- if (!is_valid_cache(val))
+ if (val >= CSSELR_MAX)
return -ENOENT;
- return put_user(get_ccsidr(val), uval);
+ return put_user(get_ccsidr(vcpu, val), uval);
default:
return -ENOENT;
}
}
-static int demux_c15_set(u64 id, void __user *uaddr)
+static int demux_c15_set(struct kvm_vcpu *vcpu, u64 id, void __user *uaddr)
{
u32 val, newval;
u32 __user *uval = uaddr;
@@ -2840,16 +3111,13 @@ static int demux_c15_set(u64 id, void __user *uaddr)
return -ENOENT;
val = (id & KVM_REG_ARM_DEMUX_VAL_MASK)
>> KVM_REG_ARM_DEMUX_VAL_SHIFT;
- if (!is_valid_cache(val))
+ if (val >= CSSELR_MAX)
return -ENOENT;
if (get_user(newval, uval))
return -EFAULT;
- /* This is also invariant: you can't change it. */
- if (newval != get_ccsidr(val))
- return -EINVAL;
- return 0;
+ return set_ccsidr(vcpu, val, newval);
default:
return -ENOENT;
}
@@ -2864,7 +3132,7 @@ int kvm_sys_reg_get_user(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg,
int ret;
r = id_to_sys_reg_desc(vcpu, reg->id, table, num);
- if (!r)
+ if (!r || sysreg_hidden_user(vcpu, r))
return -ENOENT;
if (r->get_user) {
@@ -2886,7 +3154,7 @@ int kvm_arm_sys_reg_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg
int err;
if ((reg->id & KVM_REG_ARM_COPROC_MASK) == KVM_REG_ARM_DEMUX)
- return demux_c15_get(reg->id, uaddr);
+ return demux_c15_get(vcpu, reg->id, uaddr);
err = get_invariant_sys_reg(reg->id, uaddr);
if (err != -ENOENT)
@@ -2908,7 +3176,7 @@ int kvm_sys_reg_set_user(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg,
return -EFAULT;
r = id_to_sys_reg_desc(vcpu, reg->id, table, num);
- if (!r)
+ if (!r || sysreg_hidden_user(vcpu, r))
return -ENOENT;
if (sysreg_user_write_ignore(vcpu, r))
@@ -2930,7 +3198,7 @@ int kvm_arm_sys_reg_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg
int err;
if ((reg->id & KVM_REG_ARM_COPROC_MASK) == KVM_REG_ARM_DEMUX)
- return demux_c15_set(reg->id, uaddr);
+ return demux_c15_set(vcpu, reg->id, uaddr);
err = set_invariant_sys_reg(reg->id, uaddr);
if (err != -ENOENT)
@@ -2942,13 +3210,7 @@ int kvm_arm_sys_reg_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg
static unsigned int num_demux_regs(void)
{
- unsigned int i, count = 0;
-
- for (i = 0; i < CSSELR_MAX; i++)
- if (is_valid_cache(i))
- count++;
-
- return count;
+ return CSSELR_MAX;
}
static int write_demux_regids(u64 __user *uindices)
@@ -2958,8 +3220,6 @@ static int write_demux_regids(u64 __user *uindices)
val |= KVM_REG_ARM_DEMUX_ID_CCSIDR;
for (i = 0; i < CSSELR_MAX; i++) {
- if (!is_valid_cache(i))
- continue;
if (put_user(val | i, uindices))
return -EFAULT;
uindices++;
@@ -3002,7 +3262,7 @@ static int walk_one_sys_reg(const struct kvm_vcpu *vcpu,
if (!(rd->reg || rd->get_user))
return 0;
- if (sysreg_hidden(vcpu, rd))
+ if (sysreg_hidden_user(vcpu, rd))
return 0;
if (!copy_reg_to_user(rd, uind))
@@ -3061,7 +3321,6 @@ int __init kvm_sys_reg_table_init(void)
{
bool valid = true;
unsigned int i;
- struct sys_reg_desc clidr;
/* Make sure tables are unique and in order. */
valid &= check_sysreg_table(sys_reg_descs, ARRAY_SIZE(sys_reg_descs), false);
@@ -3078,23 +3337,5 @@ int __init kvm_sys_reg_table_init(void)
for (i = 0; i < ARRAY_SIZE(invariant_sys_regs); i++)
invariant_sys_regs[i].reset(NULL, &invariant_sys_regs[i]);
- /*
- * CLIDR format is awkward, so clean it up. See ARM B4.1.20:
- *
- * If software reads the Cache Type fields from Ctype1
- * upwards, once it has seen a value of 0b000, no caches
- * exist at further-out levels of the hierarchy. So, for
- * example, if Ctype3 is the first Cache Type field with a
- * value of 0b000, the values of Ctype4 to Ctype7 must be
- * ignored.
- */
- get_clidr_el1(NULL, &clidr); /* Ugly... */
- cache_levels = clidr.val;
- for (i = 0; i < 7; i++)
- if (((cache_levels >> (i*3)) & 7) == 0)
- break;
- /* Clear all higher bits. */
- cache_levels &= (1 << (i*3))-1;
-
return 0;
}
diff --git a/arch/arm64/kvm/sys_regs.h b/arch/arm64/kvm/sys_regs.h
index e4ebb3a379fd..6b11f2cc7146 100644
--- a/arch/arm64/kvm/sys_regs.h
+++ b/arch/arm64/kvm/sys_regs.h
@@ -85,8 +85,9 @@ struct sys_reg_desc {
};
#define REG_HIDDEN (1 << 0) /* hidden from userspace and guest */
-#define REG_RAZ (1 << 1) /* RAZ from userspace and guest */
-#define REG_USER_WI (1 << 2) /* WI from userspace only */
+#define REG_HIDDEN_USER (1 << 1) /* hidden from userspace only */
+#define REG_RAZ (1 << 2) /* RAZ from userspace and guest */
+#define REG_USER_WI (1 << 3) /* WI from userspace only */
static __printf(2, 3)
inline void print_sys_reg_msg(const struct sys_reg_params *p,
@@ -152,6 +153,15 @@ static inline bool sysreg_hidden(const struct kvm_vcpu *vcpu,
return sysreg_visibility(vcpu, r) & REG_HIDDEN;
}
+static inline bool sysreg_hidden_user(const struct kvm_vcpu *vcpu,
+ const struct sys_reg_desc *r)
+{
+ if (likely(!r->visibility))
+ return false;
+
+ return r->visibility(vcpu, r) & (REG_HIDDEN | REG_HIDDEN_USER);
+}
+
static inline bool sysreg_visible_as_raz(const struct kvm_vcpu *vcpu,
const struct sys_reg_desc *r)
{
diff --git a/arch/arm64/kvm/trace_arm.h b/arch/arm64/kvm/trace_arm.h
index 33e4e7dd2719..f3e46a976125 100644
--- a/arch/arm64/kvm/trace_arm.h
+++ b/arch/arm64/kvm/trace_arm.h
@@ -2,6 +2,7 @@
#if !defined(_TRACE_ARM_ARM64_KVM_H) || defined(TRACE_HEADER_MULTI_READ)
#define _TRACE_ARM_ARM64_KVM_H
+#include <asm/kvm_emulate.h>
#include <kvm/arm_arch_timer.h>
#include <linux/tracepoint.h>
@@ -301,6 +302,64 @@ TRACE_EVENT(kvm_timer_emulate,
__entry->timer_idx, __entry->should_fire)
);
+TRACE_EVENT(kvm_nested_eret,
+ TP_PROTO(struct kvm_vcpu *vcpu, unsigned long elr_el2,
+ unsigned long spsr_el2),
+ TP_ARGS(vcpu, elr_el2, spsr_el2),
+
+ TP_STRUCT__entry(
+ __field(struct kvm_vcpu *, vcpu)
+ __field(unsigned long, elr_el2)
+ __field(unsigned long, spsr_el2)
+ __field(unsigned long, target_mode)
+ __field(unsigned long, hcr_el2)
+ ),
+
+ TP_fast_assign(
+ __entry->vcpu = vcpu;
+ __entry->elr_el2 = elr_el2;
+ __entry->spsr_el2 = spsr_el2;
+ __entry->target_mode = spsr_el2 & (PSR_MODE_MASK | PSR_MODE32_BIT);
+ __entry->hcr_el2 = __vcpu_sys_reg(vcpu, HCR_EL2);
+ ),
+
+ TP_printk("elr_el2: 0x%lx spsr_el2: 0x%08lx (M: %s) hcr_el2: %lx",
+ __entry->elr_el2, __entry->spsr_el2,
+ __print_symbolic(__entry->target_mode, kvm_mode_names),
+ __entry->hcr_el2)
+);
+
+TRACE_EVENT(kvm_inject_nested_exception,
+ TP_PROTO(struct kvm_vcpu *vcpu, u64 esr_el2, int type),
+ TP_ARGS(vcpu, esr_el2, type),
+
+ TP_STRUCT__entry(
+ __field(struct kvm_vcpu *, vcpu)
+ __field(unsigned long, esr_el2)
+ __field(int, type)
+ __field(unsigned long, spsr_el2)
+ __field(unsigned long, pc)
+ __field(unsigned long, source_mode)
+ __field(unsigned long, hcr_el2)
+ ),
+
+ TP_fast_assign(
+ __entry->vcpu = vcpu;
+ __entry->esr_el2 = esr_el2;
+ __entry->type = type;
+ __entry->spsr_el2 = *vcpu_cpsr(vcpu);
+ __entry->pc = *vcpu_pc(vcpu);
+ __entry->source_mode = *vcpu_cpsr(vcpu) & (PSR_MODE_MASK | PSR_MODE32_BIT);
+ __entry->hcr_el2 = __vcpu_sys_reg(vcpu, HCR_EL2);
+ ),
+
+ TP_printk("%s: esr_el2 0x%lx elr_el2: 0x%lx spsr_el2: 0x%08lx (M: %s) hcr_el2: %lx",
+ __print_symbolic(__entry->type, kvm_exception_type_names),
+ __entry->esr_el2, __entry->pc, __entry->spsr_el2,
+ __print_symbolic(__entry->source_mode, kvm_mode_names),
+ __entry->hcr_el2)
+);
+
#endif /* _TRACE_ARM_ARM64_KVM_H */
#undef TRACE_INCLUDE_PATH
diff --git a/arch/arm64/kvm/vgic/vgic-init.c b/arch/arm64/kvm/vgic/vgic-init.c
index 6c7f6ae21ec0..cd134db41a57 100644
--- a/arch/arm64/kvm/vgic/vgic-init.c
+++ b/arch/arm64/kvm/vgic/vgic-init.c
@@ -570,7 +570,7 @@ int kvm_vgic_hyp_init(void)
if (ret)
return ret;
- if (!has_mask)
+ if (!has_mask && !kvm_vgic_global_state.maint_irq)
return 0;
ret = request_percpu_irq(kvm_vgic_global_state.maint_irq,
diff --git a/arch/arm64/kvm/vgic/vgic-mmio.c b/arch/arm64/kvm/vgic/vgic-mmio.c
index b32d434c1d4a..e67b3b2c8044 100644
--- a/arch/arm64/kvm/vgic/vgic-mmio.c
+++ b/arch/arm64/kvm/vgic/vgic-mmio.c
@@ -473,9 +473,10 @@ int vgic_uaccess_write_cpending(struct kvm_vcpu *vcpu,
* active state can be overwritten when the VCPU's state is synced coming back
* from the guest.
*
- * For shared interrupts as well as GICv3 private interrupts, we have to
- * stop all the VCPUs because interrupts can be migrated while we don't hold
- * the IRQ locks and we don't want to be chasing moving targets.
+ * For shared interrupts as well as GICv3 private interrupts accessed from the
+ * non-owning CPU, we have to stop all the VCPUs because interrupts can be
+ * migrated while we don't hold the IRQ locks and we don't want to be chasing
+ * moving targets.
*
* For GICv2 private interrupts we don't have to do anything because
* userspace accesses to the VGIC state already require all VCPUs to be
@@ -484,7 +485,8 @@ int vgic_uaccess_write_cpending(struct kvm_vcpu *vcpu,
*/
static void vgic_access_active_prepare(struct kvm_vcpu *vcpu, u32 intid)
{
- if (vcpu->kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3 ||
+ if ((vcpu->kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3 &&
+ vcpu != kvm_get_running_vcpu()) ||
intid >= VGIC_NR_PRIVATE_IRQS)
kvm_arm_halt_guest(vcpu->kvm);
}
@@ -492,7 +494,8 @@ static void vgic_access_active_prepare(struct kvm_vcpu *vcpu, u32 intid)
/* See vgic_access_active_prepare */
static void vgic_access_active_finish(struct kvm_vcpu *vcpu, u32 intid)
{
- if (vcpu->kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3 ||
+ if ((vcpu->kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3 &&
+ vcpu != kvm_get_running_vcpu()) ||
intid >= VGIC_NR_PRIVATE_IRQS)
kvm_arm_resume_guest(vcpu->kvm);
}
diff --git a/arch/arm64/kvm/vgic/vgic-v3.c b/arch/arm64/kvm/vgic/vgic-v3.c
index 684bdfaad4a9..469d816f356f 100644
--- a/arch/arm64/kvm/vgic/vgic-v3.c
+++ b/arch/arm64/kvm/vgic/vgic-v3.c
@@ -3,6 +3,7 @@
#include <linux/irqchip/arm-gic-v3.h>
#include <linux/irq.h>
#include <linux/irqdomain.h>
+#include <linux/kstrtox.h>
#include <linux/kvm.h>
#include <linux/kvm_host.h>
#include <kvm/arm_vgic.h>
@@ -584,25 +585,25 @@ DEFINE_STATIC_KEY_FALSE(vgic_v3_cpuif_trap);
static int __init early_group0_trap_cfg(char *buf)
{
- return strtobool(buf, &group0_trap);
+ return kstrtobool(buf, &group0_trap);
}
early_param("kvm-arm.vgic_v3_group0_trap", early_group0_trap_cfg);
static int __init early_group1_trap_cfg(char *buf)
{
- return strtobool(buf, &group1_trap);
+ return kstrtobool(buf, &group1_trap);
}
early_param("kvm-arm.vgic_v3_group1_trap", early_group1_trap_cfg);
static int __init early_common_trap_cfg(char *buf)
{
- return strtobool(buf, &common_trap);
+ return kstrtobool(buf, &common_trap);
}
early_param("kvm-arm.vgic_v3_common_trap", early_common_trap_cfg);
static int __init early_gicv4_enable(char *buf)
{
- return strtobool(buf, &gicv4_enable);
+ return kstrtobool(buf, &gicv4_enable);
}
early_param("kvm-arm.vgic_v4_enable", early_gicv4_enable);