summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPaolo Bonzini <pbonzini@redhat.com>2018-06-01 20:17:22 +0300
committerPaolo Bonzini <pbonzini@redhat.com>2018-06-01 20:17:22 +0300
commit5eec43a1fa2a7ec5225411c97538fa582d36f579 (patch)
tree22928042a707851ab4359eb45e7bda04a374f4d3
parent75025cc9d13f2093bb1ee4388dbaae3182c97bab (diff)
parente25028c8ded011d19f9a11164807507c94febc01 (diff)
downloadlinux-5eec43a1fa2a7ec5225411c97538fa582d36f579.tar.xz
Merge tag 'kvmarm-for-v4.18' of git://git.kernel.org/pub/scm/linux/kernel/git/kvmarm/kvmarm into HEAD
KVM/ARM updates for 4.18 - Lazy context-switching of FPSIMD registers on arm64 - Allow virtual redistributors to be part of two or more MMIO ranges
-rw-r--r--Documentation/virtual/kvm/devices/arm-vgic-v3.txt30
-rw-r--r--arch/arm/include/asm/kvm_host.h10
-rw-r--r--arch/arm/include/uapi/asm/kvm.h1
-rw-r--r--arch/arm64/Kconfig7
-rw-r--r--arch/arm64/include/asm/cpufeature.h29
-rw-r--r--arch/arm64/include/asm/fpsimd.h21
-rw-r--r--arch/arm64/include/asm/kvm_asm.h8
-rw-r--r--arch/arm64/include/asm/kvm_host.h45
-rw-r--r--arch/arm64/include/asm/processor.h15
-rw-r--r--arch/arm64/include/asm/thread_info.h13
-rw-r--r--arch/arm64/include/uapi/asm/kvm.h1
-rw-r--r--arch/arm64/kernel/fpsimd.c177
-rw-r--r--arch/arm64/kernel/ptrace.c1
-rw-r--r--arch/arm64/kvm/Kconfig1
-rw-r--r--arch/arm64/kvm/Makefile2
-rw-r--r--arch/arm64/kvm/debug.c8
-rw-r--r--arch/arm64/kvm/fpsimd.c110
-rw-r--r--arch/arm64/kvm/hyp/debug-sr.c6
-rw-r--r--arch/arm64/kvm/hyp/entry.S43
-rw-r--r--arch/arm64/kvm/hyp/hyp-entry.S19
-rw-r--r--arch/arm64/kvm/hyp/switch.c124
-rw-r--r--arch/arm64/kvm/hyp/sysreg-sr.c4
-rw-r--r--arch/arm64/kvm/sys_regs.c9
-rw-r--r--include/kvm/arm_vgic.h17
-rw-r--r--include/linux/kvm_host.h9
-rw-r--r--include/linux/sched.h6
-rw-r--r--include/linux/thread_info.h11
-rw-r--r--virt/kvm/Kconfig3
-rw-r--r--virt/kvm/arm/arm.c15
-rw-r--r--virt/kvm/arm/vgic/vgic-init.c100
-rw-r--r--virt/kvm/arm/vgic/vgic-kvm-device.c53
-rw-r--r--virt/kvm/arm/vgic/vgic-mmio-v3.c112
-rw-r--r--virt/kvm/arm/vgic/vgic-v3.c99
-rw-r--r--virt/kvm/arm/vgic/vgic.h42
-rw-r--r--virt/kvm/kvm_main.c7
35 files changed, 809 insertions, 349 deletions
diff --git a/Documentation/virtual/kvm/devices/arm-vgic-v3.txt b/Documentation/virtual/kvm/devices/arm-vgic-v3.txt
index 9293b45abdb9..2408ab720ef7 100644
--- a/Documentation/virtual/kvm/devices/arm-vgic-v3.txt
+++ b/Documentation/virtual/kvm/devices/arm-vgic-v3.txt
@@ -27,16 +27,42 @@ Groups:
VCPU and all of the redistributor pages are contiguous.
Only valid for KVM_DEV_TYPE_ARM_VGIC_V3.
This address needs to be 64K aligned.
+
+ KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION (rw, 64-bit)
+ The attribute data pointed to by kvm_device_attr.addr is a __u64 value:
+ bits: | 63 .... 52 | 51 .... 16 | 15 - 12 |11 - 0
+ values: | count | base | flags | index
+ - index encodes the unique redistributor region index
+ - flags: reserved for future use, currently 0
+ - base field encodes bits [51:16] of the guest physical base address
+ of the first redistributor in the region.
+ - count encodes the number of redistributors in the region. Must be
+ greater than 0.
+ There are two 64K pages for each redistributor in the region and
+ redistributors are laid out contiguously within the region. Regions
+ are filled with redistributors in the index order. The sum of all
+ region count fields must be greater than or equal to the number of
+ VCPUs. Redistributor regions must be registered in the incremental
+ index order, starting from index 0.
+ The characteristics of a specific redistributor region can be read
+ by presetting the index field in the attr data.
+ Only valid for KVM_DEV_TYPE_ARM_VGIC_V3.
+
+ It is invalid to mix calls with KVM_VGIC_V3_ADDR_TYPE_REDIST and
+ KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION attributes.
+
Errors:
-E2BIG: Address outside of addressable IPA range
- -EINVAL: Incorrectly aligned address
+ -EINVAL: Incorrectly aligned address, bad redistributor region
+ count/index, mixed redistributor region attribute usage
-EEXIST: Address already configured
+ -ENOENT: Attempt to read the characteristics of a non existing
+ redistributor region
-ENXIO: The group or attribute is unknown/unsupported for this device
or hardware support is missing.
-EFAULT: Invalid user pointer for attr->addr.
-
KVM_DEV_ARM_VGIC_GRP_DIST_REGS
KVM_DEV_ARM_VGIC_GRP_REDIST_REGS
Attributes:
diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
index c7c28c885a19..f079a2039c8a 100644
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h
@@ -280,6 +280,7 @@ void kvm_mmu_wp_memory_region(struct kvm *kvm, int slot);
struct kvm_vcpu *kvm_mpidr_to_vcpu(struct kvm *kvm, unsigned long mpidr);
+static inline bool kvm_arch_check_sve_has_vhe(void) { return true; }
static inline void kvm_arch_hardware_unsetup(void) {}
static inline void kvm_arch_sync_events(struct kvm *kvm) {}
static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {}
@@ -303,8 +304,13 @@ int kvm_arm_vcpu_arch_get_attr(struct kvm_vcpu *vcpu,
int kvm_arm_vcpu_arch_has_attr(struct kvm_vcpu *vcpu,
struct kvm_device_attr *attr);
-/* All host FP/SIMD state is restored on guest exit, so nothing to save: */
-static inline void kvm_fpsimd_flush_cpu_state(void) {}
+/*
+ * VFP/NEON switching is all done by the hyp switch code, so no need to
+ * coordinate with host context handling for this state:
+ */
+static inline void kvm_arch_vcpu_load_fp(struct kvm_vcpu *vcpu) {}
+static inline void kvm_arch_vcpu_ctxsync_fp(struct kvm_vcpu *vcpu) {}
+static inline void kvm_arch_vcpu_put_fp(struct kvm_vcpu *vcpu) {}
static inline void kvm_arm_vhe_guest_enter(void) {}
static inline void kvm_arm_vhe_guest_exit(void) {}
diff --git a/arch/arm/include/uapi/asm/kvm.h b/arch/arm/include/uapi/asm/kvm.h
index caae4843cb70..16e006f708ca 100644
--- a/arch/arm/include/uapi/asm/kvm.h
+++ b/arch/arm/include/uapi/asm/kvm.h
@@ -91,6 +91,7 @@ struct kvm_regs {
#define KVM_VGIC_V3_ADDR_TYPE_DIST 2
#define KVM_VGIC_V3_ADDR_TYPE_REDIST 3
#define KVM_VGIC_ITS_ADDR_TYPE 4
+#define KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION 5
#define KVM_VGIC_V3_DIST_SIZE SZ_64K
#define KVM_VGIC_V3_REDIST_SIZE (2 * SZ_64K)
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index eb2cf4938f6d..b0d3820081c8 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -1130,6 +1130,7 @@ endmenu
config ARM64_SVE
bool "ARM Scalable Vector Extension support"
default y
+ depends on !KVM || ARM64_VHE
help
The Scalable Vector Extension (SVE) is an extension to the AArch64
execution state which complements and extends the SIMD functionality
@@ -1155,6 +1156,12 @@ config ARM64_SVE
booting the kernel. If unsure and you are not observing these
symptoms, you should assume that it is safe to say Y.
+ CPUs that support SVE are architecturally required to support the
+ Virtualization Host Extensions (VHE), so the kernel makes no
+ provision for supporting SVE alongside KVM without VHE enabled.
+ Thus, you will need to enable CONFIG_ARM64_VHE if you want to support
+ KVM in the same kernel image.
+
config ARM64_MODULE_PLTS
bool
select HAVE_MOD_ARCH_SPECIFIC
diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h
index 09b0f2a80c8f..0a6b7133195e 100644
--- a/arch/arm64/include/asm/cpufeature.h
+++ b/arch/arm64/include/asm/cpufeature.h
@@ -11,9 +11,7 @@
#include <asm/cpucaps.h>
#include <asm/cputype.h>
-#include <asm/fpsimd.h>
#include <asm/hwcap.h>
-#include <asm/sigcontext.h>
#include <asm/sysreg.h>
/*
@@ -510,33 +508,6 @@ static inline bool system_supports_sve(void)
cpus_have_const_cap(ARM64_SVE);
}
-/*
- * Read the pseudo-ZCR used by cpufeatures to identify the supported SVE
- * vector length.
- *
- * Use only if SVE is present.
- * This function clobbers the SVE vector length.
- */
-static inline u64 read_zcr_features(void)
-{
- u64 zcr;
- unsigned int vq_max;
-
- /*
- * Set the maximum possible VL, and write zeroes to all other
- * bits to see if they stick.
- */
- sve_kernel_enable(NULL);
- write_sysreg_s(ZCR_ELx_LEN_MASK, SYS_ZCR_EL1);
-
- zcr = read_sysreg_s(SYS_ZCR_EL1);
- zcr &= ~(u64)ZCR_ELx_LEN_MASK; /* find sticky 1s outside LEN field */
- vq_max = sve_vq_from_vl(sve_get_vl());
- zcr |= vq_max - 1; /* set LEN field to maximum effective value */
-
- return zcr;
-}
-
#endif /* __ASSEMBLY__ */
#endif
diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h
index aa7162ae93e3..fa92747a49c8 100644
--- a/arch/arm64/include/asm/fpsimd.h
+++ b/arch/arm64/include/asm/fpsimd.h
@@ -18,6 +18,8 @@
#include <asm/ptrace.h>
#include <asm/errno.h>
+#include <asm/processor.h>
+#include <asm/sigcontext.h>
#ifndef __ASSEMBLY__
@@ -41,6 +43,8 @@ struct task_struct;
extern void fpsimd_save_state(struct user_fpsimd_state *state);
extern void fpsimd_load_state(struct user_fpsimd_state *state);
+extern void fpsimd_save(void);
+
extern void fpsimd_thread_switch(struct task_struct *next);
extern void fpsimd_flush_thread(void);
@@ -49,12 +53,27 @@ extern void fpsimd_preserve_current_state(void);
extern void fpsimd_restore_current_state(void);
extern void fpsimd_update_current_state(struct user_fpsimd_state const *state);
+extern void fpsimd_bind_task_to_cpu(void);
+extern void fpsimd_bind_state_to_cpu(struct user_fpsimd_state *state);
+
extern void fpsimd_flush_task_state(struct task_struct *target);
+extern void fpsimd_flush_cpu_state(void);
extern void sve_flush_cpu_state(void);
/* Maximum VL that SVE VL-agnostic software can transparently support */
#define SVE_VL_ARCH_MAX 0x100
+/* Offset of FFR in the SVE register dump */
+static inline size_t sve_ffr_offset(int vl)
+{
+ return SVE_SIG_FFR_OFFSET(sve_vq_from_vl(vl)) - SVE_SIG_REGS_OFFSET;
+}
+
+static inline void *sve_pffr(struct thread_struct *thread)
+{
+ return (char *)thread->sve_state + sve_ffr_offset(thread->sve_vl);
+}
+
extern void sve_save_state(void *state, u32 *pfpsr);
extern void sve_load_state(void const *state, u32 const *pfpsr,
unsigned long vq_minus_1);
@@ -63,6 +82,8 @@ extern unsigned int sve_get_vl(void);
struct arm64_cpu_capabilities;
extern void sve_kernel_enable(const struct arm64_cpu_capabilities *__unused);
+extern u64 read_zcr_features(void);
+
extern int __ro_after_init sve_max_vl;
#ifdef CONFIG_ARM64_SVE
diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h
index f6648a3e4152..821a7032c0f7 100644
--- a/arch/arm64/include/asm/kvm_asm.h
+++ b/arch/arm64/include/asm/kvm_asm.h
@@ -30,19 +30,19 @@
/* The hyp-stub will return this for any kvm_call_hyp() call */
#define ARM_EXCEPTION_HYP_GONE HVC_STUB_ERR
-#define KVM_ARM64_DEBUG_DIRTY_SHIFT 0
-#define KVM_ARM64_DEBUG_DIRTY (1 << KVM_ARM64_DEBUG_DIRTY_SHIFT)
+#ifndef __ASSEMBLY__
+
+#include <linux/mm.h>
/* Translate a kernel address of @sym into its equivalent linear mapping */
#define kvm_ksym_ref(sym) \
({ \
void *val = &sym; \
if (!is_kernel_in_hyp_mode()) \
- val = phys_to_virt((u64)&sym - kimage_voffset); \
+ val = lm_alias(&sym); \
val; \
})
-#ifndef __ASSEMBLY__
struct kvm;
struct kvm_vcpu;
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 469de8acd06f..a4ca202ff3f2 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -30,6 +30,7 @@
#include <asm/kvm.h>
#include <asm/kvm_asm.h>
#include <asm/kvm_mmio.h>
+#include <asm/thread_info.h>
#define __KVM_HAVE_ARCH_INTC_INITIALIZED
@@ -216,8 +217,8 @@ struct kvm_vcpu_arch {
/* Exception Information */
struct kvm_vcpu_fault_info fault;
- /* Guest debug state */
- u64 debug_flags;
+ /* Miscellaneous vcpu state flags */
+ u64 flags;
/*
* We maintain more than a single set of debug registers to support
@@ -238,6 +239,10 @@ struct kvm_vcpu_arch {
/* Pointer to host CPU context */
kvm_cpu_context_t *host_cpu_context;
+
+ struct thread_info *host_thread_info; /* hyp VA */
+ struct user_fpsimd_state *host_fpsimd_state; /* hyp VA */
+
struct {
/* {Break,watch}point registers */
struct kvm_guest_debug_arch regs;
@@ -293,6 +298,12 @@ struct kvm_vcpu_arch {
bool sysregs_loaded_on_cpu;
};
+/* vcpu_arch flags field values: */
+#define KVM_ARM64_DEBUG_DIRTY (1 << 0)
+#define KVM_ARM64_FP_ENABLED (1 << 1) /* guest FP regs loaded */
+#define KVM_ARM64_FP_HOST (1 << 2) /* host FP regs loaded */
+#define KVM_ARM64_HOST_SVE_IN_USE (1 << 3) /* backup for host TIF_SVE */
+
#define vcpu_gp_regs(v) (&(v)->arch.ctxt.gp_regs)
/*
@@ -394,6 +405,19 @@ static inline void __cpu_init_hyp_mode(phys_addr_t pgd_ptr,
kvm_call_hyp(__kvm_set_tpidr_el2, tpidr_el2);
}
+static inline bool kvm_arch_check_sve_has_vhe(void)
+{
+ /*
+ * The Arm architecture specifies that implementation of SVE
+ * requires VHE also to be implemented. The KVM code for arm64
+ * relies on this when SVE is present:
+ */
+ if (system_supports_sve())
+ return has_vhe();
+ else
+ return true;
+}
+
static inline void kvm_arch_hardware_unsetup(void) {}
static inline void kvm_arch_sync_events(struct kvm *kvm) {}
static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {}
@@ -420,15 +444,18 @@ static inline void __cpu_init_stage2(void)
"PARange is %d bits, unsupported configuration!", parange);
}
-/*
- * All host FP/SIMD state is restored on guest exit, so nothing needs
- * doing here except in the SVE case:
-*/
-static inline void kvm_fpsimd_flush_cpu_state(void)
+/* Guest/host FPSIMD coordination helpers */
+int kvm_arch_vcpu_run_map_fp(struct kvm_vcpu *vcpu);
+void kvm_arch_vcpu_load_fp(struct kvm_vcpu *vcpu);
+void kvm_arch_vcpu_ctxsync_fp(struct kvm_vcpu *vcpu);
+void kvm_arch_vcpu_put_fp(struct kvm_vcpu *vcpu);
+
+#ifdef CONFIG_KVM /* Avoid conflicts with core headers if CONFIG_KVM=n */
+static inline int kvm_arch_vcpu_run_pid_change(struct kvm_vcpu *vcpu)
{
- if (system_supports_sve())
- sve_flush_cpu_state();
+ return kvm_arch_vcpu_run_map_fp(vcpu);
}
+#endif
static inline void kvm_arm_vhe_guest_enter(void)
{
diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h
index 767598932549..c99e657fdd57 100644
--- a/arch/arm64/include/asm/processor.h
+++ b/arch/arm64/include/asm/processor.h
@@ -156,7 +156,9 @@ static inline void arch_thread_struct_whitelist(unsigned long *offset,
/* Sync TPIDR_EL0 back to thread_struct for current */
void tls_preserve_current_state(void);
-#define INIT_THREAD { }
+#define INIT_THREAD { \
+ .fpsimd_cpu = NR_CPUS, \
+}
static inline void start_thread_common(struct pt_regs *regs, unsigned long pc)
{
@@ -244,6 +246,17 @@ void cpu_enable_pan(const struct arm64_cpu_capabilities *__unused);
void cpu_enable_cache_maint_trap(const struct arm64_cpu_capabilities *__unused);
void cpu_clear_disr(const struct arm64_cpu_capabilities *__unused);
+/*
+ * Not at the top of the file due to a direct #include cycle between
+ * <asm/fpsimd.h> and <asm/processor.h>. Deferring this #include
+ * ensures that contents of processor.h are visible to fpsimd.h even if
+ * processor.h is included first.
+ *
+ * These prctl helpers are the only things in this file that require
+ * fpsimd.h. The core code expects them to be in this header.
+ */
+#include <asm/fpsimd.h>
+
/* Userspace interface for PR_SVE_{SET,GET}_VL prctl()s: */
#define SVE_SET_VL(arg) sve_set_current_vl(arg)
#define SVE_GET_VL() sve_get_current_vl()
diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h
index 740aa03c5f0d..af271f9a6c9f 100644
--- a/arch/arm64/include/asm/thread_info.h
+++ b/arch/arm64/include/asm/thread_info.h
@@ -45,12 +45,6 @@ struct thread_info {
int preempt_count; /* 0 => preemptable, <0 => bug */
};
-#define INIT_THREAD_INFO(tsk) \
-{ \
- .preempt_count = INIT_PREEMPT_COUNT, \
- .addr_limit = KERNEL_DS, \
-}
-
#define thread_saved_pc(tsk) \
((unsigned long)(tsk->thread.cpu_context.pc))
#define thread_saved_sp(tsk) \
@@ -117,5 +111,12 @@ void arch_release_task_struct(struct task_struct *tsk);
_TIF_SYSCALL_TRACEPOINT | _TIF_SECCOMP | \
_TIF_NOHZ)
+#define INIT_THREAD_INFO(tsk) \
+{ \
+ .flags = _TIF_FOREIGN_FPSTATE, \
+ .preempt_count = INIT_PREEMPT_COUNT, \
+ .addr_limit = KERNEL_DS, \
+}
+
#endif /* __KERNEL__ */
#endif /* __ASM_THREAD_INFO_H */
diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h
index 04b3256f8e6d..4e76630dd655 100644
--- a/arch/arm64/include/uapi/asm/kvm.h
+++ b/arch/arm64/include/uapi/asm/kvm.h
@@ -91,6 +91,7 @@ struct kvm_regs {
#define KVM_VGIC_V3_ADDR_TYPE_DIST 2
#define KVM_VGIC_V3_ADDR_TYPE_REDIST 3
#define KVM_VGIC_ITS_ADDR_TYPE 4
+#define KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION 5
#define KVM_VGIC_V3_DIST_SIZE SZ_64K
#define KVM_VGIC_V3_REDIST_SIZE (2 * SZ_64K)
diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
index 87a35364e750..7074c4cd0e0e 100644
--- a/arch/arm64/kernel/fpsimd.c
+++ b/arch/arm64/kernel/fpsimd.c
@@ -37,12 +37,14 @@
#include <linux/sched/task_stack.h>
#include <linux/signal.h>
#include <linux/slab.h>
+#include <linux/stddef.h>
#include <linux/sysctl.h>
#include <asm/esr.h>
#include <asm/fpsimd.h>
#include <asm/cpufeature.h>
#include <asm/cputype.h>
+#include <asm/processor.h>
#include <asm/simd.h>
#include <asm/sigcontext.h>
#include <asm/sysreg.h>
@@ -118,7 +120,6 @@
*/
struct fpsimd_last_state_struct {
struct user_fpsimd_state *st;
- bool sve_in_use;
};
static DEFINE_PER_CPU(struct fpsimd_last_state_struct, fpsimd_last_state);
@@ -159,19 +160,6 @@ static void sve_free(struct task_struct *task)
__sve_free(task);
}
-
-/* Offset of FFR in the SVE register dump */
-static size_t sve_ffr_offset(int vl)
-{
- return SVE_SIG_FFR_OFFSET(sve_vq_from_vl(vl)) - SVE_SIG_REGS_OFFSET;
-}
-
-static void *sve_pffr(struct task_struct *task)
-{
- return (char *)task->thread.sve_state +
- sve_ffr_offset(task->thread.sve_vl);
-}
-
static void change_cpacr(u64 val, u64 mask)
{
u64 cpacr = read_sysreg(CPACR_EL1);
@@ -252,31 +240,24 @@ static void task_fpsimd_load(void)
WARN_ON(!in_softirq() && !irqs_disabled());
if (system_supports_sve() && test_thread_flag(TIF_SVE))
- sve_load_state(sve_pffr(current),
+ sve_load_state(sve_pffr(&current->thread),
&current->thread.uw.fpsimd_state.fpsr,
sve_vq_from_vl(current->thread.sve_vl) - 1);
else
fpsimd_load_state(&current->thread.uw.fpsimd_state);
-
- if (system_supports_sve()) {
- /* Toggle SVE trapping for userspace if needed */
- if (test_thread_flag(TIF_SVE))
- sve_user_enable();
- else
- sve_user_disable();
-
- /* Serialised by exception return to user */
- }
}
/*
- * Ensure current's FPSIMD/SVE storage in thread_struct is up to date
- * with respect to the CPU registers.
+ * Ensure FPSIMD/SVE storage in memory for the loaded context is up to
+ * date with respect to the CPU registers.
*
* Softirqs (and preemption) must be disabled.
*/
-static void task_fpsimd_save(void)
+void fpsimd_save(void)
{
+ struct user_fpsimd_state *st = __this_cpu_read(fpsimd_last_state.st);
+ /* set by fpsimd_bind_task_to_cpu() or fpsimd_bind_state_to_cpu() */
+
WARN_ON(!in_softirq() && !irqs_disabled());
if (!test_thread_flag(TIF_FOREIGN_FPSTATE)) {
@@ -291,10 +272,9 @@ static void task_fpsimd_save(void)
return;
}
- sve_save_state(sve_pffr(current),
- &current->thread.uw.fpsimd_state.fpsr);
+ sve_save_state(sve_pffr(&current->thread), &st->fpsr);
} else
- fpsimd_save_state(&current->thread.uw.fpsimd_state);
+ fpsimd_save_state(st);
}
}
@@ -598,7 +578,7 @@ int sve_set_vector_length(struct task_struct *task,
if (task == current) {
local_bh_disable();
- task_fpsimd_save();
+ fpsimd_save();
set_thread_flag(TIF_FOREIGN_FPSTATE);
}
@@ -618,10 +598,8 @@ int sve_set_vector_length(struct task_struct *task,
task->thread.sve_vl = vl;
out:
- if (flags & PR_SVE_VL_INHERIT)
- set_tsk_thread_flag(task, TIF_SVE_VL_INHERIT);
- else
- clear_tsk_thread_flag(task, TIF_SVE_VL_INHERIT);
+ update_tsk_thread_flag(task, TIF_SVE_VL_INHERIT,
+ flags & PR_SVE_VL_INHERIT);
return 0;
}
@@ -765,6 +743,33 @@ void sve_kernel_enable(const struct arm64_cpu_capabilities *__always_unused p)
isb();
}
+/*
+ * Read the pseudo-ZCR used by cpufeatures to identify the supported SVE
+ * vector length.
+ *
+ * Use only if SVE is present.
+ * This function clobbers the SVE vector length.
+ */
+u64 read_zcr_features(void)
+{
+ u64 zcr;
+ unsigned int vq_max;
+
+ /*
+ * Set the maximum possible VL, and write zeroes to all other
+ * bits to see if they stick.
+ */
+ sve_kernel_enable(NULL);
+ write_sysreg_s(ZCR_ELx_LEN_MASK, SYS_ZCR_EL1);
+
+ zcr = read_sysreg_s(SYS_ZCR_EL1);
+ zcr &= ~(u64)ZCR_ELx_LEN_MASK; /* find sticky 1s outside LEN field */
+ vq_max = sve_vq_from_vl(sve_get_vl());
+ zcr |= vq_max - 1; /* set LEN field to maximum effective value */
+
+ return zcr;
+}
+
void __init sve_setup(void)
{
u64 zcr;
@@ -839,7 +844,7 @@ asmlinkage void do_sve_acc(unsigned int esr, struct pt_regs *regs)
local_bh_disable();
- task_fpsimd_save();
+ fpsimd_save();
fpsimd_to_sve(current);
/* Force ret_to_user to reload the registers: */
@@ -892,31 +897,25 @@ asmlinkage void do_fpsimd_exc(unsigned int esr, struct pt_regs *regs)
void fpsimd_thread_switch(struct task_struct *next)
{
+ bool wrong_task, wrong_cpu;
+
if (!system_supports_fpsimd())
return;
+
+ /* Save unsaved fpsimd state, if any: */
+ fpsimd_save();
+
/*
- * Save the current FPSIMD state to memory, but only if whatever is in
- * the registers is in fact the most recent userland FPSIMD state of
- * 'current'.
+ * Fix up TIF_FOREIGN_FPSTATE to correctly describe next's
+ * state. For kernel threads, FPSIMD registers are never loaded
+ * and wrong_task and wrong_cpu will always be true.
*/
- if (current->mm)
- task_fpsimd_save();
+ wrong_task = __this_cpu_read(fpsimd_last_state.st) !=
+ &next->thread.uw.fpsimd_state;
+ wrong_cpu = next->thread.fpsimd_cpu != smp_processor_id();
- if (next->mm) {
- /*
- * If we are switching to a task whose most recent userland
- * FPSIMD state is already in the registers of *this* cpu,
- * we can skip loading the state from memory. Otherwise, set
- * the TIF_FOREIGN_FPSTATE flag so the state will be loaded
- * upon the next return to userland.
- */
- if (__this_cpu_read(fpsimd_last_state.st) ==
- &next->thread.uw.fpsimd_state
- && next->thread.fpsimd_cpu == smp_processor_id())
- clear_tsk_thread_flag(next, TIF_FOREIGN_FPSTATE);
- else
- set_tsk_thread_flag(next, TIF_FOREIGN_FPSTATE);
- }
+ update_tsk_thread_flag(next, TIF_FOREIGN_FPSTATE,
+ wrong_task || wrong_cpu);
}
void fpsimd_flush_thread(void)
@@ -982,7 +981,7 @@ void fpsimd_preserve_current_state(void)
return;
local_bh_disable();
- task_fpsimd_save();
+ fpsimd_save();
local_bh_enable();
}
@@ -1002,14 +1001,33 @@ void fpsimd_signal_preserve_current_state(void)
* Associate current's FPSIMD context with this cpu
* Preemption must be disabled when calling this function.
*/
-static void fpsimd_bind_to_cpu(void)
+void fpsimd_bind_task_to_cpu(void)
{
struct fpsimd_last_state_struct *last =
this_cpu_ptr(&fpsimd_last_state);
last->st = &current->thread.uw.fpsimd_state;
- last->sve_in_use = test_thread_flag(TIF_SVE);
current->thread.fpsimd_cpu = smp_processor_id();
+
+ if (system_supports_sve()) {
+ /* Toggle SVE trapping for userspace if needed */
+ if (test_thread_flag(TIF_SVE))
+ sve_user_enable();
+ else
+ sve_user_disable();
+
+ /* Serialised by exception return to user */
+ }
+}
+
+void fpsimd_bind_state_to_cpu(struct user_fpsimd_state *st)
+{
+ struct fpsimd_last_state_struct *last =
+ this_cpu_ptr(&fpsimd_last_state);
+
+ WARN_ON(!in_softirq() && !irqs_disabled());
+
+ last->st = st;
}
/*
@@ -1026,7 +1044,7 @@ void fpsimd_restore_current_state(void)
if (test_and_clear_thread_flag(TIF_FOREIGN_FPSTATE)) {
task_fpsimd_load();
- fpsimd_bind_to_cpu();
+ fpsimd_bind_task_to_cpu();
}
local_bh_enable();
@@ -1049,9 +1067,9 @@ void fpsimd_update_current_state(struct user_fpsimd_state const *state)
fpsimd_to_sve(current);
task_fpsimd_load();
+ fpsimd_bind_task_to_cpu();
- if (test_and_clear_thread_flag(TIF_FOREIGN_FPSTATE))
- fpsimd_bind_to_cpu();
+ clear_thread_flag(TIF_FOREIGN_FPSTATE);
local_bh_enable();
}
@@ -1064,29 +1082,12 @@ void fpsimd_flush_task_state(struct task_struct *t)
t->thread.fpsimd_cpu = NR_CPUS;
}
-static inline void fpsimd_flush_cpu_state(void)
+void fpsimd_flush_cpu_state(void)
{
__this_cpu_write(fpsimd_last_state.st, NULL);
+ set_thread_flag(TIF_FOREIGN_FPSTATE);
}
-/*
- * Invalidate any task SVE state currently held in this CPU's regs.
- *
- * This is used to prevent the kernel from trying to reuse SVE register data
- * that is detroyed by KVM guest enter/exit. This function should go away when
- * KVM SVE support is implemented. Don't use it for anything else.
- */
-#ifdef CONFIG_ARM64_SVE
-void sve_flush_cpu_state(void)
-{
- struct fpsimd_last_state_struct const *last =
- this_cpu_ptr(&fpsimd_last_state);
-
- if (last->st && last->sve_in_use)
- fpsimd_flush_cpu_state();
-}
-#endif /* CONFIG_ARM64_SVE */
-
#ifdef CONFIG_KERNEL_MODE_NEON
DEFINE_PER_CPU(bool, kernel_neon_busy);
@@ -1120,11 +1121,8 @@ void kernel_neon_begin(void)
__this_cpu_write(kernel_neon_busy, true);
- /* Save unsaved task fpsimd state, if any: */
- if (current->mm) {
- task_fpsimd_save();
- set_thread_flag(TIF_FOREIGN_FPSTATE);
- }
+ /* Save unsaved fpsimd state, if any: */
+ fpsimd_save();
/* Invalidate any task state remaining in the fpsimd regs: */
fpsimd_flush_cpu_state();
@@ -1246,13 +1244,10 @@ static int fpsimd_cpu_pm_notifier(struct notifier_block *self,
{
switch (cmd) {
case CPU_PM_ENTER:
- if (current->mm)
- task_fpsimd_save();
+ fpsimd_save();
fpsimd_flush_cpu_state();
break;
case CPU_PM_EXIT:
- if (current->mm)
- set_thread_flag(TIF_FOREIGN_FPSTATE);
break;
case CPU_PM_ENTER_FAILED:
default:
diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c
index 7ff81fed46e1..78889c4546d7 100644
--- a/arch/arm64/kernel/ptrace.c
+++ b/arch/arm64/kernel/ptrace.c
@@ -44,6 +44,7 @@
#include <asm/compat.h>
#include <asm/cpufeature.h>
#include <asm/debug-monitors.h>
+#include <asm/fpsimd.h>
#include <asm/pgtable.h>
#include <asm/stacktrace.h>
#include <asm/syscall.h>
diff --git a/arch/arm64/kvm/Kconfig b/arch/arm64/kvm/Kconfig
index a2e3a5af1113..47b23bf617c7 100644
--- a/arch/arm64/kvm/Kconfig
+++ b/arch/arm64/kvm/Kconfig
@@ -39,6 +39,7 @@ config KVM
select HAVE_KVM_IRQ_ROUTING
select IRQ_BYPASS_MANAGER
select HAVE_KVM_IRQ_BYPASS
+ select HAVE_KVM_VCPU_RUN_PID_CHANGE
---help---
Support hosting virtualized guest machines.
We don't support KVM with 16K page tables yet, due to the multiple
diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile
index 93afff91cb7c..0f2a135ba15b 100644
--- a/arch/arm64/kvm/Makefile
+++ b/arch/arm64/kvm/Makefile
@@ -19,7 +19,7 @@ kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/psci.o $(KVM)/arm/perf.o
kvm-$(CONFIG_KVM_ARM_HOST) += inject_fault.o regmap.o va_layout.o
kvm-$(CONFIG_KVM_ARM_HOST) += hyp.o hyp-init.o handle_exit.o
kvm-$(CONFIG_KVM_ARM_HOST) += guest.o debug.o reset.o sys_regs.o sys_regs_generic_v8.o
-kvm-$(CONFIG_KVM_ARM_HOST) += vgic-sys-reg-v3.o
+kvm-$(CONFIG_KVM_ARM_HOST) += vgic-sys-reg-v3.o fpsimd.o
kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/aarch32.o
kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic.o
diff --git a/arch/arm64/kvm/debug.c b/arch/arm64/kvm/debug.c
index a1f4ebdfe6d3..00d422336a45 100644
--- a/arch/arm64/kvm/debug.c
+++ b/arch/arm64/kvm/debug.c
@@ -103,7 +103,7 @@ void kvm_arm_reset_debug_ptr(struct kvm_vcpu *vcpu)
*
* Additionally, KVM only traps guest accesses to the debug registers if
* the guest is not actively using them (see the KVM_ARM64_DEBUG_DIRTY
- * flag on vcpu->arch.debug_flags). Since the guest must not interfere
+ * flag on vcpu->arch.flags). Since the guest must not interfere
* with the hardware state when debugging the guest, we must ensure that
* trapping is enabled whenever we are debugging the guest using the
* debug registers.
@@ -111,7 +111,7 @@ void kvm_arm_reset_debug_ptr(struct kvm_vcpu *vcpu)
void kvm_arm_setup_debug(struct kvm_vcpu *vcpu)
{
- bool trap_debug = !(vcpu->arch.debug_flags & KVM_ARM64_DEBUG_DIRTY);
+ bool trap_debug = !(vcpu->arch.flags & KVM_ARM64_DEBUG_DIRTY);
unsigned long mdscr;
trace_kvm_arm_setup_debug(vcpu, vcpu->guest_debug);
@@ -184,7 +184,7 @@ void kvm_arm_setup_debug(struct kvm_vcpu *vcpu)
vcpu_write_sys_reg(vcpu, mdscr, MDSCR_EL1);
vcpu->arch.debug_ptr = &vcpu->arch.external_debug_state;
- vcpu->arch.debug_flags |= KVM_ARM64_DEBUG_DIRTY;
+ vcpu->arch.flags |= KVM_ARM64_DEBUG_DIRTY;
trap_debug = true;
trace_kvm_arm_set_regset("BKPTS", get_num_brps(),
@@ -206,7 +206,7 @@ void kvm_arm_setup_debug(struct kvm_vcpu *vcpu)
/* If KDE or MDE are set, perform a full save/restore cycle. */
if (vcpu_read_sys_reg(vcpu, MDSCR_EL1) & (DBG_MDSCR_KDE | DBG_MDSCR_MDE))
- vcpu->arch.debug_flags |= KVM_ARM64_DEBUG_DIRTY;
+ vcpu->arch.flags |= KVM_ARM64_DEBUG_DIRTY;
trace_kvm_arm_set_dreg32("MDCR_EL2", vcpu->arch.mdcr_el2);
trace_kvm_arm_set_dreg32("MDSCR_EL1", vcpu_read_sys_reg(vcpu, MDSCR_EL1));
diff --git a/arch/arm64/kvm/fpsimd.c b/arch/arm64/kvm/fpsimd.c
new file mode 100644
index 000000000000..dc6ecfa5a2d2
--- /dev/null
+++ b/arch/arm64/kvm/fpsimd.c
@@ -0,0 +1,110 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * arch/arm64/kvm/fpsimd.c: Guest/host FPSIMD context coordination helpers
+ *
+ * Copyright 2018 Arm Limited
+ * Author: Dave Martin <Dave.Martin@arm.com>
+ */
+#include <linux/bottom_half.h>
+#include <linux/sched.h>
+#include <linux/thread_info.h>
+#include <linux/kvm_host.h>
+#include <asm/kvm_asm.h>
+#include <asm/kvm_host.h>
+#include <asm/kvm_mmu.h>
+
+/*
+ * Called on entry to KVM_RUN unless this vcpu previously ran at least
+ * once and the most recent prior KVM_RUN for this vcpu was called from
+ * the same task as current (highly likely).
+ *
+ * This is guaranteed to execute before kvm_arch_vcpu_load_fp(vcpu),
+ * such that on entering hyp the relevant parts of current are already
+ * mapped.
+ */
+int kvm_arch_vcpu_run_map_fp(struct kvm_vcpu *vcpu)
+{
+ int ret;
+
+ struct thread_info *ti = &current->thread_info;
+ struct user_fpsimd_state *fpsimd = &current->thread.uw.fpsimd_state;
+
+ /*
+ * Make sure the host task thread flags and fpsimd state are
+ * visible to hyp:
+ */
+ ret = create_hyp_mappings(ti, ti + 1, PAGE_HYP);
+ if (ret)
+ goto error;
+
+ ret = create_hyp_mappings(fpsimd, fpsimd + 1, PAGE_HYP);
+ if (ret)
+ goto error;
+
+ vcpu->arch.host_thread_info = kern_hyp_va(ti);
+ vcpu->arch.host_fpsimd_state = kern_hyp_va(fpsimd);
+error:
+ return ret;
+}
+
+/*
+ * Prepare vcpu for saving the host's FPSIMD state and loading the guest's.
+ * The actual loading is done by the FPSIMD access trap taken to hyp.
+ *
+ * Here, we just set the correct metadata to indicate that the FPSIMD
+ * state in the cpu regs (if any) belongs to current on the host.
+ *
+ * TIF_SVE is backed up here, since it may get clobbered with guest state.
+ * This flag is restored by kvm_arch_vcpu_put_fp(vcpu).
+ */
+void kvm_arch_vcpu_load_fp(struct kvm_vcpu *vcpu)
+{
+ BUG_ON(!current->mm);
+
+ vcpu->arch.flags &= ~(KVM_ARM64_FP_ENABLED | KVM_ARM64_HOST_SVE_IN_USE);
+ vcpu->arch.flags |= KVM_ARM64_FP_HOST;
+ if (test_thread_flag(TIF_SVE))
+ vcpu->arch.flags |= KVM_ARM64_HOST_SVE_IN_USE;
+}
+
+/*
+ * If the guest FPSIMD state was loaded, update the host's context
+ * tracking data mark the CPU FPSIMD regs as dirty and belonging to vcpu
+ * so that they will be written back if the kernel clobbers them due to
+ * kernel-mode NEON before re-entry into the guest.
+ */
+void kvm_arch_vcpu_ctxsync_fp(struct kvm_vcpu *vcpu)
+{
+ WARN_ON_ONCE(!irqs_disabled());
+
+ if (vcpu->arch.flags & KVM_ARM64_FP_ENABLED) {
+ fpsimd_bind_state_to_cpu(&vcpu->arch.ctxt.gp_regs.fp_regs);
+ clear_thread_flag(TIF_FOREIGN_FPSTATE);
+ clear_thread_flag(TIF_SVE);
+ }
+}
+
+/*
+ * Write back the vcpu FPSIMD regs if they are dirty, and invalidate the
+ * cpu FPSIMD regs so that they can't be spuriously reused if this vcpu
+ * disappears and another task or vcpu appears that recycles the same
+ * struct fpsimd_state.
+ */
+void kvm_arch_vcpu_put_fp(struct kvm_vcpu *vcpu)
+{
+ local_bh_disable();
+
+ update_thread_flag(TIF_SVE,
+ vcpu->arch.flags & KVM_ARM64_HOST_SVE_IN_USE);
+
+ if (vcpu->arch.flags & KVM_ARM64_FP_ENABLED) {
+ /* Clean guest FP state to memory and invalidate cpu view */
+ fpsimd_save();
+ fpsimd_flush_cpu_state();
+ } else if (!test_thread_flag(TIF_FOREIGN_FPSTATE)) {
+ /* Ensure user trap controls are correctly restored */
+ fpsimd_bind_task_to_cpu();
+ }
+
+ local_bh_enable();
+}
diff --git a/arch/arm64/kvm/hyp/debug-sr.c b/arch/arm64/kvm/hyp/debug-sr.c
index 3e717f66f011..50009766e5e5 100644
--- a/arch/arm64/kvm/hyp/debug-sr.c
+++ b/arch/arm64/kvm/hyp/debug-sr.c
@@ -163,7 +163,7 @@ void __hyp_text __debug_switch_to_guest(struct kvm_vcpu *vcpu)
if (!has_vhe())
__debug_save_spe_nvhe(&vcpu->arch.host_debug_state.pmscr_el1);
- if (!(vcpu->arch.debug_flags & KVM_ARM64_DEBUG_DIRTY))
+ if (!(vcpu->arch.flags & KVM_ARM64_DEBUG_DIRTY))
return;
host_ctxt = kern_hyp_va(vcpu->arch.host_cpu_context);
@@ -185,7 +185,7 @@ void __hyp_text __debug_switch_to_host(struct kvm_vcpu *vcpu)
if (!has_vhe())
__debug_restore_spe_nvhe(vcpu->arch.host_debug_state.pmscr_el1);
- if (!(vcpu->arch.debug_flags & KVM_ARM64_DEBUG_DIRTY))
+ if (!(vcpu->arch.flags & KVM_ARM64_DEBUG_DIRTY))
return;
host_ctxt = kern_hyp_va(vcpu->arch.host_cpu_context);
@@ -196,7 +196,7 @@ void __hyp_text __debug_switch_to_host(struct kvm_vcpu *vcpu)
__debug_save_state(vcpu, guest_dbg, guest_ctxt);
__debug_restore_state(vcpu, host_dbg, host_ctxt);
- vcpu->arch.debug_flags &= ~KVM_ARM64_DEBUG_DIRTY;
+ vcpu->arch.flags &= ~KVM_ARM64_DEBUG_DIRTY;
}
u32 __hyp_text __kvm_get_mdcr_el2(void)
diff --git a/arch/arm64/kvm/hyp/entry.S b/arch/arm64/kvm/hyp/entry.S
index e41a161d313a..fad1e164fe48 100644
--- a/arch/arm64/kvm/hyp/entry.S
+++ b/arch/arm64/kvm/hyp/entry.S
@@ -166,46 +166,3 @@ abort_guest_exit_end:
orr x0, x0, x5
1: ret
ENDPROC(__guest_exit)
-
-ENTRY(__fpsimd_guest_restore)
- // x0: esr
- // x1: vcpu
- // x2-x29,lr: vcpu regs
- // vcpu x0-x1 on the stack
- stp x2, x3, [sp, #-16]!
- stp x4, lr, [sp, #-16]!
-
-alternative_if_not ARM64_HAS_VIRT_HOST_EXTN
- mrs x2, cptr_el2
- bic x2, x2, #CPTR_EL2_TFP
- msr cptr_el2, x2
-alternative_else
- mrs x2, cpacr_el1
- orr x2, x2, #CPACR_EL1_FPEN
- msr cpacr_el1, x2
-alternative_endif
- isb
-
- mov x3, x1
-
- ldr x0, [x3, #VCPU_HOST_CONTEXT]
- kern_hyp_va x0
- add x0, x0, #CPU_GP_REG_OFFSET(CPU_FP_REGS)
- bl __fpsimd_save_state
-
- add x2, x3, #VCPU_CONTEXT
- add x0, x2, #CPU_GP_REG_OFFSET(CPU_FP_REGS)
- bl __fpsimd_restore_state
-
- // Skip restoring fpexc32 for AArch64 guests
- mrs x1, hcr_el2
- tbnz x1, #HCR_RW_SHIFT, 1f
- ldr x4, [x3, #VCPU_FPEXC32_EL2]
- msr fpexc32_el2, x4
-1:
- ldp x4, lr, [sp], #16
- ldp x2, x3, [sp], #16
- ldp x0, x1, [sp], #16
-
- eret
-ENDPROC(__fpsimd_guest_restore)
diff --git a/arch/arm64/kvm/hyp/hyp-entry.S b/arch/arm64/kvm/hyp/hyp-entry.S
index bffece27b5c1..753b9d213651 100644
--- a/arch/arm64/kvm/hyp/hyp-entry.S
+++ b/arch/arm64/kvm/hyp/hyp-entry.S
@@ -113,25 +113,6 @@ el1_hvc_guest:
el1_trap:
get_vcpu_ptr x1, x0
-
- mrs x0, esr_el2
- lsr x0, x0, #ESR_ELx_EC_SHIFT
- /*
- * x0: ESR_EC
- * x1: vcpu pointer
- */
-
- /*
- * We trap the first access to the FP/SIMD to save the host context
- * and restore the guest context lazily.
- * If FP/SIMD is not implemented, handle the trap and inject an
- * undefined instruction exception to the guest.
- */
-alternative_if_not ARM64_HAS_NO_FPSIMD
- cmp x0, #ESR_ELx_EC_FP_ASIMD
- b.eq __fpsimd_guest_restore
-alternative_else_nop_endif
-
mov x0, #ARM_EXCEPTION_TRAP
b __guest_exit
diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c
index d9645236e474..2d45bd719a5d 100644
--- a/arch/arm64/kvm/hyp/switch.c
+++ b/arch/arm64/kvm/hyp/switch.c
@@ -21,21 +21,25 @@
#include <kvm/arm_psci.h>
+#include <asm/cpufeature.h>
#include <asm/kvm_asm.h>
#include <asm/kvm_emulate.h>
+#include <asm/kvm_host.h>
#include <asm/kvm_hyp.h>
#include <asm/kvm_mmu.h>
#include <asm/fpsimd.h>
#include <asm/debug-monitors.h>
+#include <asm/processor.h>
+#include <asm/thread_info.h>
-static bool __hyp_text __fpsimd_enabled_nvhe(void)
+/* Check whether the FP regs were dirtied while in the host-side run loop: */
+static bool __hyp_text update_fp_enabled(struct kvm_vcpu *vcpu)
{
- return !(read_sysreg(cptr_el2) & CPTR_EL2_TFP);
-}
+ if (vcpu->arch.host_thread_info->flags & _TIF_FOREIGN_FPSTATE)
+ vcpu->arch.flags &= ~(KVM_ARM64_FP_ENABLED |
+ KVM_ARM64_FP_HOST);
-static bool fpsimd_enabled_vhe(void)
-{
- return !!(read_sysreg(cpacr_el1) & CPACR_EL1_FPEN);
+ return !!(vcpu->arch.flags & KVM_ARM64_FP_ENABLED);
}
/* Save the 32-bit only FPSIMD system register state */
@@ -92,7 +96,10 @@ static void activate_traps_vhe(struct kvm_vcpu *vcpu)
val = read_sysreg(cpacr_el1);
val |= CPACR_EL1_TTA;
- val &= ~(CPACR_EL1_FPEN | CPACR_EL1_ZEN);
+ val &= ~CPACR_EL1_ZEN;
+ if (!update_fp_enabled(vcpu))
+ val &= ~CPACR_EL1_FPEN;
+
write_sysreg(val, cpacr_el1);
write_sysreg(kvm_get_hyp_vector(), vbar_el1);
@@ -105,7 +112,10 @@ static void __hyp_text __activate_traps_nvhe(struct kvm_vcpu *vcpu)
__activate_traps_common(vcpu);
val = CPTR_EL2_DEFAULT;
- val |= CPTR_EL2_TTA | CPTR_EL2_TFP | CPTR_EL2_TZ;
+ val |= CPTR_EL2_TTA | CPTR_EL2_TZ;
+ if (!update_fp_enabled(vcpu))
+ val |= CPTR_EL2_TFP;
+
write_sysreg(val, cptr_el2);
}
@@ -318,6 +328,50 @@ static bool __hyp_text __skip_instr(struct kvm_vcpu *vcpu)
}
}
+static bool __hyp_text __hyp_switch_fpsimd(struct kvm_vcpu *vcpu)
+{
+ struct user_fpsimd_state *host_fpsimd = vcpu->arch.host_fpsimd_state;
+
+ if (has_vhe())
+ write_sysreg(read_sysreg(cpacr_el1) | CPACR_EL1_FPEN,
+ cpacr_el1);
+ else
+ write_sysreg(read_sysreg(cptr_el2) & ~(u64)CPTR_EL2_TFP,
+ cptr_el2);
+
+ isb();
+
+ if (vcpu->arch.flags & KVM_ARM64_FP_HOST) {
+ /*
+ * In the SVE case, VHE is assumed: it is enforced by
+ * Kconfig and kvm_arch_init().
+ */
+ if (system_supports_sve() &&
+ (vcpu->arch.flags & KVM_ARM64_HOST_SVE_IN_USE)) {
+ struct thread_struct *thread = container_of(
+ host_fpsimd,
+ struct thread_struct, uw.fpsimd_state);
+
+ sve_save_state(sve_pffr(thread), &host_fpsimd->fpsr);
+ } else {
+ __fpsimd_save_state(host_fpsimd);
+ }
+
+ vcpu->arch.flags &= ~KVM_ARM64_FP_HOST;
+ }
+
+ __fpsimd_restore_state(&vcpu->arch.ctxt.gp_regs.fp_regs);
+
+ /* Skip restoring fpexc32 for AArch64 guests */
+ if (!(read_sysreg(hcr_el2) & HCR_RW))
+ write_sysreg(vcpu->arch.ctxt.sys_regs[FPEXC32_EL2],
+ fpexc32_el2);
+
+ vcpu->arch.flags |= KVM_ARM64_FP_ENABLED;
+
+ return true;
+}
+
/*
* Return true when we were able to fixup the guest exit and should return to
* the guest, false when we should restore the host state and return to the
@@ -334,11 +388,23 @@ static bool __hyp_text fixup_guest_exit(struct kvm_vcpu *vcpu, u64 *exit_code)
* same PC once the SError has been injected, and replay the
* trapping instruction.
*/
- if (*exit_code == ARM_EXCEPTION_TRAP && !__populate_fault_info(vcpu))
+ if (*exit_code != ARM_EXCEPTION_TRAP)
+ goto exit;
+
+ /*
+ * We trap the first access to the FP/SIMD to save the host context
+ * and restore the guest context lazily.
+ * If FP/SIMD is not implemented, handle the trap and inject an
+ * undefined instruction exception to the guest.
+ */
+ if (system_supports_fpsimd() &&
+ kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_FP_ASIMD)
+ return __hyp_switch_fpsimd(vcpu);
+
+ if (!__populate_fault_info(vcpu))
return true;
- if (static_branch_unlikely(&vgic_v2_cpuif_trap) &&
- *exit_code == ARM_EXCEPTION_TRAP) {
+ if (static_branch_unlikely(&vgic_v2_cpuif_trap)) {
bool valid;
valid = kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_DABT_LOW &&
@@ -350,12 +416,8 @@ static bool __hyp_text fixup_guest_exit(struct kvm_vcpu *vcpu, u64 *exit_code)
if (valid) {
int ret = __vgic_v2_perform_cpuif_access(vcpu);
- if (ret == 1) {
- if (__skip_instr(vcpu))
- return true;
- else
- *exit_code = ARM_EXCEPTION_TRAP;
- }
+ if (ret == 1 && __skip_instr(vcpu))
+ return true;
if (ret == -1) {
/* Promote an illegal access to an
@@ -368,23 +430,21 @@ static bool __hyp_text fixup_guest_exit(struct kvm_vcpu *vcpu, u64 *exit_code)
*vcpu_cpsr(vcpu) &= ~DBG_SPSR_SS;
*exit_code = ARM_EXCEPTION_EL1_SERROR;
}
+
+ goto exit;
}
}
if (static_branch_unlikely(&vgic_v3_cpuif_trap) &&
- *exit_code == ARM_EXCEPTION_TRAP &&
(kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_SYS64 ||
kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_CP15_32)) {
int ret = __vgic_v3_perform_cpuif_access(vcpu);
- if (ret == 1) {
- if (__skip_instr(vcpu))
- return true;
- else
- *exit_code = ARM_EXCEPTION_TRAP;
- }
+ if (ret == 1 && __skip_instr(vcpu))
+ return true;
}
+exit:
/* Return to the host kernel and handle the exit */
return false;
}
@@ -394,7 +454,6 @@ int kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu)
{
struct kvm_cpu_context *host_ctxt;
struct kvm_cpu_context *guest_ctxt;
- bool fp_enabled;
u64 exit_code;
host_ctxt = vcpu->arch.host_cpu_context;
@@ -416,19 +475,14 @@ int kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu)
/* And we're baaack! */
} while (fixup_guest_exit(vcpu, &exit_code));
- fp_enabled = fpsimd_enabled_vhe();
-
sysreg_save_guest_state_vhe(guest_ctxt);
__deactivate_traps(vcpu);
sysreg_restore_host_state_vhe(host_ctxt);
- if (fp_enabled) {
- __fpsimd_save_state(&guest_ctxt->gp_regs.fp_regs);
- __fpsimd_restore_state(&host_ctxt->gp_regs.fp_regs);
+ if (vcpu->arch.flags & KVM_ARM64_FP_ENABLED)
__fpsimd_save_fpexc32(vcpu);
- }
__debug_switch_to_host(vcpu);
@@ -440,7 +494,6 @@ int __hyp_text __kvm_vcpu_run_nvhe(struct kvm_vcpu *vcpu)
{
struct kvm_cpu_context *host_ctxt;
struct kvm_cpu_context *guest_ctxt;
- bool fp_enabled;
u64 exit_code;
vcpu = kern_hyp_va(vcpu);
@@ -472,8 +525,6 @@ int __hyp_text __kvm_vcpu_run_nvhe(struct kvm_vcpu *vcpu)
/* And we're baaack! */
} while (fixup_guest_exit(vcpu, &exit_code));
- fp_enabled = __fpsimd_enabled_nvhe();
-
__sysreg_save_state_nvhe(guest_ctxt);
__sysreg32_save_state(vcpu);
__timer_disable_traps(vcpu);
@@ -484,11 +535,8 @@ int __hyp_text __kvm_vcpu_run_nvhe(struct kvm_vcpu *vcpu)
__sysreg_restore_state_nvhe(host_ctxt);
- if (fp_enabled) {
- __fpsimd_save_state(&guest_ctxt->gp_regs.fp_regs);
- __fpsimd_restore_state(&host_ctxt->gp_regs.fp_regs);
+ if (vcpu->arch.flags & KVM_ARM64_FP_ENABLED)
__fpsimd_save_fpexc32(vcpu);
- }
/*
* This must come after restoring the host sysregs, since a non-VHE
diff --git a/arch/arm64/kvm/hyp/sysreg-sr.c b/arch/arm64/kvm/hyp/sysreg-sr.c
index b3894df6bf1a..35bc16832efe 100644
--- a/arch/arm64/kvm/hyp/sysreg-sr.c
+++ b/arch/arm64/kvm/hyp/sysreg-sr.c
@@ -196,7 +196,7 @@ void __hyp_text __sysreg32_save_state(struct kvm_vcpu *vcpu)
sysreg[DACR32_EL2] = read_sysreg(dacr32_el2);
sysreg[IFSR32_EL2] = read_sysreg(ifsr32_el2);
- if (has_vhe() || vcpu->arch.debug_flags & KVM_ARM64_DEBUG_DIRTY)
+ if (has_vhe() || vcpu->arch.flags & KVM_ARM64_DEBUG_DIRTY)
sysreg[DBGVCR32_EL2] = read_sysreg(dbgvcr32_el2);
}
@@ -218,7 +218,7 @@ void __hyp_text __sysreg32_restore_state(struct kvm_vcpu *vcpu)
write_sysreg(sysreg[DACR32_EL2], dacr32_el2);
write_sysreg(sysreg[IFSR32_EL2], ifsr32_el2);
- if (has_vhe() || vcpu->arch.debug_flags & KVM_ARM64_DEBUG_DIRTY)
+ if (has_vhe() || vcpu->arch.flags & KVM_ARM64_DEBUG_DIRTY)
write_sysreg(sysreg[DBGVCR32_EL2], dbgvcr32_el2);
}
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index 6e3b969391fd..a4363735d3f8 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -31,7 +31,6 @@
#include <asm/debug-monitors.h>
#include <asm/esr.h>
#include <asm/kvm_arm.h>
-#include <asm/kvm_asm.h>
#include <asm/kvm_coproc.h>
#include <asm/kvm_emulate.h>
#include <asm/kvm_host.h>
@@ -338,7 +337,7 @@ static bool trap_debug_regs(struct kvm_vcpu *vcpu,
{
if (p->is_write) {
vcpu_write_sys_reg(vcpu, p->regval, r->reg);
- vcpu->arch.debug_flags |= KVM_ARM64_DEBUG_DIRTY;
+ vcpu->arch.flags |= KVM_ARM64_DEBUG_DIRTY;
} else {
p->regval = vcpu_read_sys_reg(vcpu, r->reg);
}
@@ -369,7 +368,7 @@ static void reg_to_dbg(struct kvm_vcpu *vcpu,
}
*dbg_reg = val;
- vcpu->arch.debug_flags |= KVM_ARM64_DEBUG_DIRTY;
+ vcpu->arch.flags |= KVM_ARM64_DEBUG_DIRTY;
}
static void dbg_to_reg(struct kvm_vcpu *vcpu,
@@ -1441,7 +1440,7 @@ static bool trap_debug32(struct kvm_vcpu *vcpu,
{
if (p->is_write) {
vcpu_cp14(vcpu, r->reg) = p->regval;
- vcpu->arch.debug_flags |= KVM_ARM64_DEBUG_DIRTY;
+ vcpu->arch.flags |= KVM_ARM64_DEBUG_DIRTY;
} else {
p->regval = vcpu_cp14(vcpu, r->reg);
}
@@ -1473,7 +1472,7 @@ static bool trap_xvr(struct kvm_vcpu *vcpu,
val |= p->regval << 32;
*dbg_reg = val;
- vcpu->arch.debug_flags |= KVM_ARM64_DEBUG_DIRTY;
+ vcpu->arch.flags |= KVM_ARM64_DEBUG_DIRTY;
} else {
p->regval = *dbg_reg >> 32;
}
diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
index e7efe12a81bd..cfdd2484cc42 100644
--- a/include/kvm/arm_vgic.h
+++ b/include/kvm/arm_vgic.h
@@ -28,7 +28,7 @@
#include <linux/irqchip/arm-gic-v4.h>
-#define VGIC_V3_MAX_CPUS 255
+#define VGIC_V3_MAX_CPUS 512
#define VGIC_V2_MAX_CPUS 8
#define VGIC_NR_IRQS_LEGACY 256
#define VGIC_NR_SGIS 16
@@ -201,6 +201,14 @@ struct vgic_its {
struct vgic_state_iter;
+struct vgic_redist_region {
+ u32 index;
+ gpa_t base;
+ u32 count; /* number of redistributors or 0 if single region */
+ u32 free_index; /* index of the next free redistributor */
+ struct list_head list;
+};
+
struct vgic_dist {
bool in_kernel;
bool ready;
@@ -220,10 +228,7 @@ struct vgic_dist {
/* either a GICv2 CPU interface */
gpa_t vgic_cpu_base;
/* or a number of GICv3 redistributor regions */
- struct {
- gpa_t vgic_redist_base;
- gpa_t vgic_redist_free_offset;
- };
+ struct list_head rd_regions;
};
/* distributor enabled */
@@ -311,6 +316,7 @@ struct vgic_cpu {
*/
struct vgic_io_device rd_iodev;
struct vgic_io_device sgi_iodev;
+ struct vgic_redist_region *rdreg;
/* Contains the attributes and gpa of the LPI pending tables. */
u64 pendbaser;
@@ -332,7 +338,6 @@ void kvm_vgic_early_init(struct kvm *kvm);
int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu);
int kvm_vgic_create(struct kvm *kvm, u32 type);
void kvm_vgic_destroy(struct kvm *kvm);
-void kvm_vgic_vcpu_early_init(struct kvm_vcpu *vcpu);
void kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu);
int kvm_vgic_map_resources(struct kvm *kvm);
int kvm_vgic_hyp_init(void);
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 14e710d639c7..b81769a5a2b7 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -1273,4 +1273,13 @@ static inline long kvm_arch_vcpu_async_ioctl(struct file *filp,
void kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm,
unsigned long start, unsigned long end);
+#ifdef CONFIG_HAVE_KVM_VCPU_RUN_PID_CHANGE
+int kvm_arch_vcpu_run_pid_change(struct kvm_vcpu *vcpu);
+#else
+static inline int kvm_arch_vcpu_run_pid_change(struct kvm_vcpu *vcpu)
+{
+ return 0;
+}
+#endif /* CONFIG_HAVE_KVM_VCPU_RUN_PID_CHANGE */
+
#endif
diff --git a/include/linux/sched.h b/include/linux/sched.h
index c2413703f45d..ff289ae6b787 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1618,6 +1618,12 @@ static inline void clear_tsk_thread_flag(struct task_struct *tsk, int flag)
clear_ti_thread_flag(task_thread_info(tsk), flag);
}
+static inline void update_tsk_thread_flag(struct task_struct *tsk, int flag,
+ bool value)
+{
+ update_ti_thread_flag(task_thread_info(tsk), flag, value);
+}
+
static inline int test_and_set_tsk_thread_flag(struct task_struct *tsk, int flag)
{
return test_and_set_ti_thread_flag(task_thread_info(tsk), flag);
diff --git a/include/linux/thread_info.h b/include/linux/thread_info.h
index cf2862bd134a..8d8821b3689a 100644
--- a/include/linux/thread_info.h
+++ b/include/linux/thread_info.h
@@ -60,6 +60,15 @@ static inline void clear_ti_thread_flag(struct thread_info *ti, int flag)
clear_bit(flag, (unsigned long *)&ti->flags);
}
+static inline void update_ti_thread_flag(struct thread_info *ti, int flag,
+ bool value)
+{
+ if (value)
+ set_ti_thread_flag(ti, flag);
+ else
+ clear_ti_thread_flag(ti, flag);
+}
+
static inline int test_and_set_ti_thread_flag(struct thread_info *ti, int flag)
{
return test_and_set_bit(flag, (unsigned long *)&ti->flags);
@@ -79,6 +88,8 @@ static inline int test_ti_thread_flag(struct thread_info *ti, int flag)
set_ti_thread_flag(current_thread_info(), flag)
#define clear_thread_flag(flag) \
clear_ti_thread_flag(current_thread_info(), flag)
+#define update_thread_flag(flag, value) \
+ update_ti_thread_flag(current_thread_info(), flag, value)
#define test_and_set_thread_flag(flag) \
test_and_set_ti_thread_flag(current_thread_info(), flag)
#define test_and_clear_thread_flag(flag) \
diff --git a/virt/kvm/Kconfig b/virt/kvm/Kconfig
index cca7e065a075..72143cfaf6ec 100644
--- a/virt/kvm/Kconfig
+++ b/virt/kvm/Kconfig
@@ -54,3 +54,6 @@ config HAVE_KVM_IRQ_BYPASS
config HAVE_KVM_VCPU_ASYNC_IOCTL
bool
+
+config HAVE_KVM_VCPU_RUN_PID_CHANGE
+ bool
diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c
index a4c1b76240df..126b98fbf9ba 100644
--- a/virt/kvm/arm/arm.c
+++ b/virt/kvm/arm/arm.c
@@ -16,6 +16,7 @@
* Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*/
+#include <linux/bug.h>
#include <linux/cpu_pm.h>
#include <linux/errno.h>
#include <linux/err.h>
@@ -41,6 +42,7 @@
#include <asm/mman.h>
#include <asm/tlbflush.h>
#include <asm/cacheflush.h>
+#include <asm/cpufeature.h>
#include <asm/virt.h>
#include <asm/kvm_arm.h>
#include <asm/kvm_asm.h>
@@ -290,7 +292,6 @@ out:
void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
{
- kvm_vgic_vcpu_early_init(vcpu);
}
void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu)
@@ -363,10 +364,12 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
kvm_vgic_load(vcpu);
kvm_timer_vcpu_load(vcpu);
kvm_vcpu_load_sysregs(vcpu);
+ kvm_arch_vcpu_load_fp(vcpu);
}
void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
{
+ kvm_arch_vcpu_put_fp(vcpu);
kvm_vcpu_put_sysregs(vcpu);
kvm_timer_vcpu_put(vcpu);
kvm_vgic_put(vcpu);
@@ -678,9 +681,6 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
*/
preempt_disable();
- /* Flush FP/SIMD state that can't survive guest entry/exit */
- kvm_fpsimd_flush_cpu_state();
-
kvm_pmu_flush_hwstate(vcpu);
local_irq_disable();
@@ -778,6 +778,8 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
if (static_branch_unlikely(&userspace_irqchip_in_use))
kvm_timer_sync_hwstate(vcpu);
+ kvm_arch_vcpu_ctxsync_fp(vcpu);
+
/*
* We may have taken a host interrupt in HYP mode (ie
* while executing the guest). This interrupt is still
@@ -1570,6 +1572,11 @@ int kvm_arch_init(void *opaque)
return -ENODEV;
}
+ if (!kvm_arch_check_sve_has_vhe()) {
+ kvm_pr_unimpl("SVE system without VHE unsupported. Broken cpu?");
+ return -ENODEV;
+ }
+
for_each_online_cpu(cpu) {
smp_call_function_single(cpu, check_kvm_target_cpu, &ret, 1);
if (ret < 0) {
diff --git a/virt/kvm/arm/vgic/vgic-init.c b/virt/kvm/arm/vgic/vgic-init.c
index e07156c30323..2673efce65f3 100644
--- a/virt/kvm/arm/vgic/vgic-init.c
+++ b/virt/kvm/arm/vgic/vgic-init.c
@@ -44,7 +44,7 @@
*
* CPU Interface:
*
- * - kvm_vgic_vcpu_early_init(): initialization of static data that
+ * - kvm_vgic_vcpu_init(): initialization of static data that
* doesn't depend on any sizing information or emulation type. No
* allocation is allowed there.
*/
@@ -67,46 +67,6 @@ void kvm_vgic_early_init(struct kvm *kvm)
spin_lock_init(&dist->lpi_list_lock);
}
-/**
- * kvm_vgic_vcpu_early_init() - Initialize static VGIC VCPU data structures
- * @vcpu: The VCPU whose VGIC data structures whould be initialized
- *
- * Only do initialization, but do not actually enable the VGIC CPU interface
- * yet.
- */
-void kvm_vgic_vcpu_early_init(struct kvm_vcpu *vcpu)
-{
- struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
- int i;
-
- INIT_LIST_HEAD(&vgic_cpu->ap_list_head);
- spin_lock_init(&vgic_cpu->ap_list_lock);
-
- /*
- * Enable and configure all SGIs to be edge-triggered and
- * configure all PPIs as level-triggered.
- */
- for (i = 0; i < VGIC_NR_PRIVATE_IRQS; i++) {
- struct vgic_irq *irq = &vgic_cpu->private_irqs[i];
-
- INIT_LIST_HEAD(&irq->ap_list);
- spin_lock_init(&irq->irq_lock);
- irq->intid = i;
- irq->vcpu = NULL;
- irq->target_vcpu = vcpu;
- irq->targets = 1U << vcpu->vcpu_id;
- kref_init(&irq->refcount);
- if (vgic_irq_is_sgi(i)) {
- /* SGIs */
- irq->enabled = 1;
- irq->config = VGIC_CONFIG_EDGE;
- } else {
- /* PPIs */
- irq->config = VGIC_CONFIG_LEVEL;
- }
- }
-}
-
/* CREATION */
/**
@@ -167,8 +127,11 @@ int kvm_vgic_create(struct kvm *kvm, u32 type)
kvm->arch.vgic.vgic_model = type;
kvm->arch.vgic.vgic_dist_base = VGIC_ADDR_UNDEF;
- kvm->arch.vgic.vgic_cpu_base = VGIC_ADDR_UNDEF;
- kvm->arch.vgic.vgic_redist_base = VGIC_ADDR_UNDEF;
+
+ if (type == KVM_DEV_TYPE_ARM_VGIC_V2)
+ kvm->arch.vgic.vgic_cpu_base = VGIC_ADDR_UNDEF;
+ else
+ INIT_LIST_HEAD(&kvm->arch.vgic.rd_regions);
out_unlock:
for (; vcpu_lock_idx >= 0; vcpu_lock_idx--) {
@@ -221,13 +184,50 @@ static int kvm_vgic_dist_init(struct kvm *kvm, unsigned int nr_spis)
}
/**
- * kvm_vgic_vcpu_init() - Register VCPU-specific KVM iodevs
+ * kvm_vgic_vcpu_init() - Initialize static VGIC VCPU data
+ * structures and register VCPU-specific KVM iodevs
+ *
* @vcpu: pointer to the VCPU being created and initialized
+ *
+ * Only do initialization, but do not actually enable the
+ * VGIC CPU interface
*/
int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu)
{
- int ret = 0;
+ struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
+ int ret = 0;
+ int i;
+
+ vgic_cpu->rd_iodev.base_addr = VGIC_ADDR_UNDEF;
+ vgic_cpu->sgi_iodev.base_addr = VGIC_ADDR_UNDEF;
+
+ INIT_LIST_HEAD(&vgic_cpu->ap_list_head);
+ spin_lock_init(&vgic_cpu->ap_list_lock);
+
+ /*
+ * Enable and configure all SGIs to be edge-triggered and
+ * configure all PPIs as level-triggered.
+ */
+ for (i = 0; i < VGIC_NR_PRIVATE_IRQS; i++) {
+ struct vgic_irq *irq = &vgic_cpu->private_irqs[i];
+
+ INIT_LIST_HEAD(&irq->ap_list);
+ spin_lock_init(&irq->irq_lock);
+ irq->intid = i;
+ irq->vcpu = NULL;
+ irq->target_vcpu = vcpu;
+ irq->targets = 1U << vcpu->vcpu_id;
+ kref_init(&irq->refcount);
+ if (vgic_irq_is_sgi(i)) {
+ /* SGIs */
+ irq->enabled = 1;
+ irq->config = VGIC_CONFIG_EDGE;
+ } else {
+ /* PPIs */
+ irq->config = VGIC_CONFIG_LEVEL;
+ }
+ }
if (!irqchip_in_kernel(vcpu->kvm))
return 0;
@@ -303,13 +303,23 @@ out:
static void kvm_vgic_dist_destroy(struct kvm *kvm)
{
struct vgic_dist *dist = &kvm->arch.vgic;
+ struct vgic_redist_region *rdreg, *next;
dist->ready = false;
dist->initialized = false;
kfree(dist->spis);
+ dist->spis = NULL;
dist->nr_spis = 0;
+ if (kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3) {
+ list_for_each_entry_safe(rdreg, next, &dist->rd_regions, list) {
+ list_del(&rdreg->list);
+ kfree(rdreg);
+ }
+ INIT_LIST_HEAD(&dist->rd_regions);
+ }
+
if (vgic_supports_direct_msis(kvm))
vgic_v4_teardown(kvm);
}
diff --git a/virt/kvm/arm/vgic/vgic-kvm-device.c b/virt/kvm/arm/vgic/vgic-kvm-device.c
index 10ae6f394b71..6ada2432e37c 100644
--- a/virt/kvm/arm/vgic/vgic-kvm-device.c
+++ b/virt/kvm/arm/vgic/vgic-kvm-device.c
@@ -66,6 +66,7 @@ int kvm_vgic_addr(struct kvm *kvm, unsigned long type, u64 *addr, bool write)
int r = 0;
struct vgic_dist *vgic = &kvm->arch.vgic;
phys_addr_t *addr_ptr, alignment;
+ u64 undef_value = VGIC_ADDR_UNDEF;
mutex_lock(&kvm->lock);
switch (type) {
@@ -84,16 +85,61 @@ int kvm_vgic_addr(struct kvm *kvm, unsigned long type, u64 *addr, bool write)
addr_ptr = &vgic->vgic_dist_base;
alignment = SZ_64K;
break;
- case KVM_VGIC_V3_ADDR_TYPE_REDIST:
+ case KVM_VGIC_V3_ADDR_TYPE_REDIST: {
+ struct vgic_redist_region *rdreg;
+
r = vgic_check_type(kvm, KVM_DEV_TYPE_ARM_VGIC_V3);
if (r)
break;
if (write) {
- r = vgic_v3_set_redist_base(kvm, *addr);
+ r = vgic_v3_set_redist_base(kvm, 0, *addr, 0);
goto out;
}
- addr_ptr = &vgic->vgic_redist_base;
+ rdreg = list_first_entry(&vgic->rd_regions,
+ struct vgic_redist_region, list);
+ if (!rdreg)
+ addr_ptr = &undef_value;
+ else
+ addr_ptr = &rdreg->base;
break;
+ }
+ case KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION:
+ {
+ struct vgic_redist_region *rdreg;
+ u8 index;
+
+ r = vgic_check_type(kvm, KVM_DEV_TYPE_ARM_VGIC_V3);
+ if (r)
+ break;
+
+ index = *addr & KVM_VGIC_V3_RDIST_INDEX_MASK;
+
+ if (write) {
+ gpa_t base = *addr & KVM_VGIC_V3_RDIST_BASE_MASK;
+ u32 count = (*addr & KVM_VGIC_V3_RDIST_COUNT_MASK)
+ >> KVM_VGIC_V3_RDIST_COUNT_SHIFT;
+ u8 flags = (*addr & KVM_VGIC_V3_RDIST_FLAGS_MASK)
+ >> KVM_VGIC_V3_RDIST_FLAGS_SHIFT;
+
+ if (!count || flags)
+ r = -EINVAL;
+ else
+ r = vgic_v3_set_redist_base(kvm, index,
+ base, count);
+ goto out;
+ }
+
+ rdreg = vgic_v3_rdist_region_from_index(kvm, index);
+ if (!rdreg) {
+ r = -ENOENT;
+ goto out;
+ }
+
+ *addr = index;
+ *addr |= rdreg->base;
+ *addr |= (u64)rdreg->count << KVM_VGIC_V3_RDIST_COUNT_SHIFT;
+ goto out;
+ }
default:
r = -ENODEV;
}
@@ -665,6 +711,7 @@ static int vgic_v3_has_attr(struct kvm_device *dev,
switch (attr->attr) {
case KVM_VGIC_V3_ADDR_TYPE_DIST:
case KVM_VGIC_V3_ADDR_TYPE_REDIST:
+ case KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION:
return 0;
}
break;
diff --git a/virt/kvm/arm/vgic/vgic-mmio-v3.c b/virt/kvm/arm/vgic/vgic-mmio-v3.c
index 671fe81f8e1d..287784095b5b 100644
--- a/virt/kvm/arm/vgic/vgic-mmio-v3.c
+++ b/virt/kvm/arm/vgic/vgic-mmio-v3.c
@@ -184,12 +184,17 @@ static unsigned long vgic_mmio_read_v3r_typer(struct kvm_vcpu *vcpu,
gpa_t addr, unsigned int len)
{
unsigned long mpidr = kvm_vcpu_get_mpidr_aff(vcpu);
+ struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
+ struct vgic_redist_region *rdreg = vgic_cpu->rdreg;
int target_vcpu_id = vcpu->vcpu_id;
+ gpa_t last_rdist_typer = rdreg->base + GICR_TYPER +
+ (rdreg->free_index - 1) * KVM_VGIC_V3_REDIST_SIZE;
u64 value;
value = (u64)(mpidr & GENMASK(23, 0)) << 32;
value |= ((target_vcpu_id & 0xffff) << 8);
- if (target_vcpu_id == atomic_read(&vcpu->kvm->online_vcpus) - 1)
+
+ if (addr == last_rdist_typer)
value |= GICR_TYPER_LAST;
if (vgic_has_its(vcpu->kvm))
value |= GICR_TYPER_PLPIS;
@@ -580,24 +585,32 @@ int vgic_register_redist_iodev(struct kvm_vcpu *vcpu)
{
struct kvm *kvm = vcpu->kvm;
struct vgic_dist *vgic = &kvm->arch.vgic;
+ struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
struct vgic_io_device *rd_dev = &vcpu->arch.vgic_cpu.rd_iodev;
struct vgic_io_device *sgi_dev = &vcpu->arch.vgic_cpu.sgi_iodev;
+ struct vgic_redist_region *rdreg;
gpa_t rd_base, sgi_base;
int ret;
+ if (!IS_VGIC_ADDR_UNDEF(vgic_cpu->rd_iodev.base_addr))
+ return 0;
+
/*
* We may be creating VCPUs before having set the base address for the
* redistributor region, in which case we will come back to this
* function for all VCPUs when the base address is set. Just return
* without doing any work for now.
*/
- if (IS_VGIC_ADDR_UNDEF(vgic->vgic_redist_base))
+ rdreg = vgic_v3_rdist_free_slot(&vgic->rd_regions);
+ if (!rdreg)
return 0;
if (!vgic_v3_check_base(kvm))
return -EINVAL;
- rd_base = vgic->vgic_redist_base + vgic->vgic_redist_free_offset;
+ vgic_cpu->rdreg = rdreg;
+
+ rd_base = rdreg->base + rdreg->free_index * KVM_VGIC_V3_REDIST_SIZE;
sgi_base = rd_base + SZ_64K;
kvm_iodevice_init(&rd_dev->dev, &kvm_io_gic_ops);
@@ -631,7 +644,7 @@ int vgic_register_redist_iodev(struct kvm_vcpu *vcpu)
goto out;
}
- vgic->vgic_redist_free_offset += 2 * SZ_64K;
+ rdreg->free_index++;
out:
mutex_unlock(&kvm->slots_lock);
return ret;
@@ -670,23 +683,96 @@ static int vgic_register_all_redist_iodevs(struct kvm *kvm)
return ret;
}
-int vgic_v3_set_redist_base(struct kvm *kvm, u64 addr)
+/**
+ * vgic_v3_insert_redist_region - Insert a new redistributor region
+ *
+ * Performs various checks before inserting the rdist region in the list.
+ * Those tests depend on whether the size of the rdist region is known
+ * (ie. count != 0). The list is sorted by rdist region index.
+ *
+ * @kvm: kvm handle
+ * @index: redist region index
+ * @base: base of the new rdist region
+ * @count: number of redistributors the region is made of (0 in the old style
+ * single region, whose size is induced from the number of vcpus)
+ *
+ * Return 0 on success, < 0 otherwise
+ */
+static int vgic_v3_insert_redist_region(struct kvm *kvm, uint32_t index,
+ gpa_t base, uint32_t count)
{
- struct vgic_dist *vgic = &kvm->arch.vgic;
+ struct vgic_dist *d = &kvm->arch.vgic;
+ struct vgic_redist_region *rdreg;
+ struct list_head *rd_regions = &d->rd_regions;
+ size_t size = count * KVM_VGIC_V3_REDIST_SIZE;
int ret;
- /* vgic_check_ioaddr makes sure we don't do this twice */
- ret = vgic_check_ioaddr(kvm, &vgic->vgic_redist_base, addr, SZ_64K);
- if (ret)
- return ret;
+ /* single rdist region already set ?*/
+ if (!count && !list_empty(rd_regions))
+ return -EINVAL;
- vgic->vgic_redist_base = addr;
- if (!vgic_v3_check_base(kvm)) {
- vgic->vgic_redist_base = VGIC_ADDR_UNDEF;
+ /* cross the end of memory ? */
+ if (base + size < base)
return -EINVAL;
+
+ if (list_empty(rd_regions)) {
+ if (index != 0)
+ return -EINVAL;
+ } else {
+ rdreg = list_last_entry(rd_regions,
+ struct vgic_redist_region, list);
+ if (index != rdreg->index + 1)
+ return -EINVAL;
+
+ /* Cannot add an explicitly sized regions after legacy region */
+ if (!rdreg->count)
+ return -EINVAL;
}
/*
+ * For legacy single-region redistributor regions (!count),
+ * check that the redistributor region does not overlap with the
+ * distributor's address space.
+ */
+ if (!count && !IS_VGIC_ADDR_UNDEF(d->vgic_dist_base) &&
+ vgic_dist_overlap(kvm, base, size))
+ return -EINVAL;
+
+ /* collision with any other rdist region? */
+ if (vgic_v3_rdist_overlap(kvm, base, size))
+ return -EINVAL;
+
+ rdreg = kzalloc(sizeof(*rdreg), GFP_KERNEL);
+ if (!rdreg)
+ return -ENOMEM;
+
+ rdreg->base = VGIC_ADDR_UNDEF;
+
+ ret = vgic_check_ioaddr(kvm, &rdreg->base, base, SZ_64K);
+ if (ret)
+ goto free;
+
+ rdreg->base = base;
+ rdreg->count = count;
+ rdreg->free_index = 0;
+ rdreg->index = index;
+
+ list_add_tail(&rdreg->list, rd_regions);
+ return 0;
+free:
+ kfree(rdreg);
+ return ret;
+}
+
+int vgic_v3_set_redist_base(struct kvm *kvm, u32 index, u64 addr, u32 count)
+{
+ int ret;
+
+ ret = vgic_v3_insert_redist_region(kvm, index, addr, count);
+ if (ret)
+ return ret;
+
+ /*
* Register iodevs for each existing VCPU. Adding more VCPUs
* afterwards will register the iodevs when needed.
*/
diff --git a/virt/kvm/arm/vgic/vgic-v3.c b/virt/kvm/arm/vgic/vgic-v3.c
index bdcf8e7a6161..ff7dc890941a 100644
--- a/virt/kvm/arm/vgic/vgic-v3.c
+++ b/virt/kvm/arm/vgic/vgic-v3.c
@@ -419,6 +419,29 @@ int vgic_v3_save_pending_tables(struct kvm *kvm)
return 0;
}
+/**
+ * vgic_v3_rdist_overlap - check if a region overlaps with any
+ * existing redistributor region
+ *
+ * @kvm: kvm handle
+ * @base: base of the region
+ * @size: size of region
+ *
+ * Return: true if there is an overlap
+ */
+bool vgic_v3_rdist_overlap(struct kvm *kvm, gpa_t base, size_t size)
+{
+ struct vgic_dist *d = &kvm->arch.vgic;
+ struct vgic_redist_region *rdreg;
+
+ list_for_each_entry(rdreg, &d->rd_regions, list) {
+ if ((base + size > rdreg->base) &&
+ (base < rdreg->base + vgic_v3_rd_region_size(kvm, rdreg)))
+ return true;
+ }
+ return false;
+}
+
/*
* Check for overlapping regions and for regions crossing the end of memory
* for base addresses which have already been set.
@@ -426,41 +449,83 @@ int vgic_v3_save_pending_tables(struct kvm *kvm)
bool vgic_v3_check_base(struct kvm *kvm)
{
struct vgic_dist *d = &kvm->arch.vgic;
- gpa_t redist_size = KVM_VGIC_V3_REDIST_SIZE;
-
- redist_size *= atomic_read(&kvm->online_vcpus);
+ struct vgic_redist_region *rdreg;
if (!IS_VGIC_ADDR_UNDEF(d->vgic_dist_base) &&
d->vgic_dist_base + KVM_VGIC_V3_DIST_SIZE < d->vgic_dist_base)
return false;
- if (!IS_VGIC_ADDR_UNDEF(d->vgic_redist_base) &&
- d->vgic_redist_base + redist_size < d->vgic_redist_base)
- return false;
+ list_for_each_entry(rdreg, &d->rd_regions, list) {
+ if (rdreg->base + vgic_v3_rd_region_size(kvm, rdreg) <
+ rdreg->base)
+ return false;
+ }
- /* Both base addresses must be set to check if they overlap */
- if (IS_VGIC_ADDR_UNDEF(d->vgic_dist_base) ||
- IS_VGIC_ADDR_UNDEF(d->vgic_redist_base))
+ if (IS_VGIC_ADDR_UNDEF(d->vgic_dist_base))
return true;
- if (d->vgic_dist_base + KVM_VGIC_V3_DIST_SIZE <= d->vgic_redist_base)
- return true;
- if (d->vgic_redist_base + redist_size <= d->vgic_dist_base)
- return true;
+ return !vgic_v3_rdist_overlap(kvm, d->vgic_dist_base,
+ KVM_VGIC_V3_DIST_SIZE);
+}
- return false;
+/**
+ * vgic_v3_rdist_free_slot - Look up registered rdist regions and identify one
+ * which has free space to put a new rdist region.
+ *
+ * @rd_regions: redistributor region list head
+ *
+ * A redistributor regions maps n redistributors, n = region size / (2 x 64kB).
+ * Stride between redistributors is 0 and regions are filled in the index order.
+ *
+ * Return: the redist region handle, if any, that has space to map a new rdist
+ * region.
+ */
+struct vgic_redist_region *vgic_v3_rdist_free_slot(struct list_head *rd_regions)
+{
+ struct vgic_redist_region *rdreg;
+
+ list_for_each_entry(rdreg, rd_regions, list) {
+ if (!vgic_v3_redist_region_full(rdreg))
+ return rdreg;
+ }
+ return NULL;
}
+struct vgic_redist_region *vgic_v3_rdist_region_from_index(struct kvm *kvm,
+ u32 index)
+{
+ struct list_head *rd_regions = &kvm->arch.vgic.rd_regions;
+ struct vgic_redist_region *rdreg;
+
+ list_for_each_entry(rdreg, rd_regions, list) {
+ if (rdreg->index == index)
+ return rdreg;
+ }
+ return NULL;
+}
+
+
int vgic_v3_map_resources(struct kvm *kvm)
{
- int ret = 0;
struct vgic_dist *dist = &kvm->arch.vgic;
+ struct kvm_vcpu *vcpu;
+ int ret = 0;
+ int c;
if (vgic_ready(kvm))
goto out;
- if (IS_VGIC_ADDR_UNDEF(dist->vgic_dist_base) ||
- IS_VGIC_ADDR_UNDEF(dist->vgic_redist_base)) {
+ kvm_for_each_vcpu(c, vcpu, kvm) {
+ struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
+
+ if (IS_VGIC_ADDR_UNDEF(vgic_cpu->rd_iodev.base_addr)) {
+ kvm_debug("vcpu %d redistributor base not set\n", c);
+ ret = -ENXIO;
+ goto out;
+ }
+ }
+
+ if (IS_VGIC_ADDR_UNDEF(dist->vgic_dist_base)) {
kvm_err("Need to set vgic distributor addresses first\n");
ret = -ENXIO;
goto out;
diff --git a/virt/kvm/arm/vgic/vgic.h b/virt/kvm/arm/vgic/vgic.h
index 32c25d42c93f..6879cf48652a 100644
--- a/virt/kvm/arm/vgic/vgic.h
+++ b/virt/kvm/arm/vgic/vgic.h
@@ -96,6 +96,13 @@
/* we only support 64 kB translation table page size */
#define KVM_ITS_L1E_ADDR_MASK GENMASK_ULL(51, 16)
+#define KVM_VGIC_V3_RDIST_INDEX_MASK GENMASK_ULL(11, 0)
+#define KVM_VGIC_V3_RDIST_FLAGS_MASK GENMASK_ULL(15, 12)
+#define KVM_VGIC_V3_RDIST_FLAGS_SHIFT 12
+#define KVM_VGIC_V3_RDIST_BASE_MASK GENMASK_ULL(51, 16)
+#define KVM_VGIC_V3_RDIST_COUNT_MASK GENMASK_ULL(63, 52)
+#define KVM_VGIC_V3_RDIST_COUNT_SHIFT 52
+
/* Requires the irq_lock to be held by the caller. */
static inline bool irq_is_pending(struct vgic_irq *irq)
{
@@ -215,7 +222,7 @@ int vgic_v3_probe(const struct gic_kvm_info *info);
int vgic_v3_map_resources(struct kvm *kvm);
int vgic_v3_lpi_sync_pending_status(struct kvm *kvm, struct vgic_irq *irq);
int vgic_v3_save_pending_tables(struct kvm *kvm);
-int vgic_v3_set_redist_base(struct kvm *kvm, u64 addr);
+int vgic_v3_set_redist_base(struct kvm *kvm, u32 index, u64 addr, u32 count);
int vgic_register_redist_iodev(struct kvm_vcpu *vcpu);
bool vgic_v3_check_base(struct kvm *kvm);
@@ -265,6 +272,39 @@ static inline int vgic_v3_max_apr_idx(struct kvm_vcpu *vcpu)
}
}
+static inline bool
+vgic_v3_redist_region_full(struct vgic_redist_region *region)
+{
+ if (!region->count)
+ return false;
+
+ return (region->free_index >= region->count);
+}
+
+struct vgic_redist_region *vgic_v3_rdist_free_slot(struct list_head *rdregs);
+
+static inline size_t
+vgic_v3_rd_region_size(struct kvm *kvm, struct vgic_redist_region *rdreg)
+{
+ if (!rdreg->count)
+ return atomic_read(&kvm->online_vcpus) * KVM_VGIC_V3_REDIST_SIZE;
+ else
+ return rdreg->count * KVM_VGIC_V3_REDIST_SIZE;
+}
+
+struct vgic_redist_region *vgic_v3_rdist_region_from_index(struct kvm *kvm,
+ u32 index);
+
+bool vgic_v3_rdist_overlap(struct kvm *kvm, gpa_t base, size_t size);
+
+static inline bool vgic_dist_overlap(struct kvm *kvm, gpa_t base, size_t size)
+{
+ struct vgic_dist *d = &kvm->arch.vgic;
+
+ return (base + size > d->vgic_dist_base) &&
+ (base < d->vgic_dist_base + KVM_VGIC_V3_DIST_SIZE);
+}
+
int vgic_its_resolve_lpi(struct kvm *kvm, struct vgic_its *its,
u32 devid, u32 eventid, struct vgic_irq **irq);
struct vgic_its *vgic_msi_to_its(struct kvm *kvm, struct kvm_msi *msi);
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index b125d94307d2..c5f6a552e486 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -2568,8 +2568,13 @@ static long kvm_vcpu_ioctl(struct file *filp,
oldpid = rcu_access_pointer(vcpu->pid);
if (unlikely(oldpid != current->pids[PIDTYPE_PID].pid)) {
/* The thread running this VCPU changed. */
- struct pid *newpid = get_task_pid(current, PIDTYPE_PID);
+ struct pid *newpid;
+ r = kvm_arch_vcpu_run_pid_change(vcpu);
+ if (r)
+ break;
+
+ newpid = get_task_pid(current, PIDTYPE_PID);
rcu_assign_pointer(vcpu->pid, newpid);
if (oldpid)
synchronize_rcu();