summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorWill Deacon <will@kernel.org>2026-06-14 14:16:59 +0300
committerWill Deacon <will@kernel.org>2026-06-14 14:16:59 +0300
commit35d2b77d8dd76cfccf54cc0c6453584ea4f31224 (patch)
tree3f3850b5a08a10d3bacf8e8aecb2b67585cb4894
parent0fccc93585c11e594920e5d203d152e89bf16687 (diff)
parent987ec51e18419cc0ebf6f6fa1cfbfd149eca443d (diff)
downloadlinux-35d2b77d8dd76cfccf54cc0c6453584ea4f31224.tar.xz
Merge branch 'for-next/fpsimd-cleanups' into for-next/core
* for-next/fpsimd-cleanups: arm64: fpsimd: Remove <asm/fpsimdmacros.h> arm64: fpsimd: Move SME save/restore inline arm64: fpsimd: Move sve_flush_live() inline arm64: fpsimd: Move SVE save/restore inline arm64: fpsimd: Use opaque type for SME state arm64: fpsimd: Use opaque type for SVE state arm64: fpsimd: Move fpsimd save/restore inline arm64: fpsimd: Split FPSR/FPCR from SVE save/restore arm64: sysreg: Add FPCR and FPSR arm64: fpsimd: Move sve_get_vl() and sme_get_vl() inline arm64: fpsimd: Use assembler for baseline SME instructions arm64: fpsimd: Use assembler for SVE instructions arm64: fpsimd: Remove sve_set_vq() and sme_set_vq() arm64: fpsimd: Fold sve_init_regs() into do_sve_acc() KVM: arm64: pkvm: Remove struct cpu_sve_state KVM: arm64: pkvm: Save host FPMR in host cpu context KVM: arm64: Don't override FFR save/restore argument KVM: arm64: Don't include <asm/fpsimdmacros.h> arm64: fpsimd: Fix type mismatch in sme_{save,load}_state() arm64: fpsimd: Fix type mismatch in sve_{save,load}_state()
-rw-r--r--arch/arm64/Kconfig5
-rw-r--r--arch/arm64/include/asm/fpsimd.h374
-rw-r--r--arch/arm64/include/asm/fpsimdmacros.h357
-rw-r--r--arch/arm64/include/asm/kvm_host.h27
-rw-r--r--arch/arm64/include/asm/kvm_hyp.h5
-rw-r--r--arch/arm64/include/asm/kvm_pkvm.h3
-rw-r--r--arch/arm64/include/asm/processor.h7
-rw-r--r--arch/arm64/kernel/Makefile2
-rw-r--r--arch/arm64/kernel/entry-common.c8
-rw-r--r--arch/arm64/kernel/entry-fpsimd.S134
-rw-r--r--arch/arm64/kernel/fpsimd.c90
-rw-r--r--arch/arm64/kvm/arm.c16
-rw-r--r--arch/arm64/kvm/guest.c4
-rw-r--r--arch/arm64/kvm/hyp/entry.S1
-rw-r--r--arch/arm64/kvm/hyp/fpsimd.S33
-rw-r--r--arch/arm64/kvm/hyp/include/hyp/switch.h23
-rw-r--r--arch/arm64/kvm/hyp/nvhe/Makefile2
-rw-r--r--arch/arm64/kvm/hyp/nvhe/hyp-main.c20
-rw-r--r--arch/arm64/kvm/hyp/nvhe/setup.c4
-rw-r--r--arch/arm64/kvm/hyp/vhe/Makefile2
-rw-r--r--arch/arm64/tools/sysreg45
21 files changed, 485 insertions, 677 deletions
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index c053f012c6a6..3af035b73878 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -2305,10 +2305,15 @@ config ARM64_SVE
booting the kernel. If unsure and you are not observing these
symptoms, you should assume that it is safe to say Y.
+config AS_HAS_SME
+ # Supported by LLVM 13+ and binutils 2.38+
+ def_bool $(as-instr,.arch_extension sme)
+
config ARM64_SME
bool "ARM Scalable Matrix Extension support"
default y
depends on ARM64_SVE
+ depends on AS_HAS_SME
help
The Scalable Matrix Extension (SME) is an extension to the AArch64
execution state which utilises a substantial subset of the SVE
diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h
index d9d00b45ab11..a67d5774e672 100644
--- a/arch/arm64/include/asm/fpsimd.h
+++ b/arch/arm64/include/asm/fpsimd.h
@@ -22,6 +22,11 @@
#include <linux/stddef.h>
#include <linux/types.h>
+#define __FPSIMD_PREAMBLE ".arch_extension fp\n" \
+ ".arch_extension simd\n"
+#define __SVE_PREAMBLE ".arch_extension sve\n"
+#define __SME_PREAMBLE ".arch_extension sme\n"
+
/* Masks for extracting the FPSR and FPCR from the FPSCR */
#define VFP_FPSCR_STAT_MASK 0xf800009f
#define VFP_FPSCR_CTRL_MASK 0x07f79f00
@@ -71,8 +76,82 @@ static inline void cpacr_restore(unsigned long cpacr)
struct task_struct;
-extern void fpsimd_save_state(struct user_fpsimd_state *state);
-extern void fpsimd_load_state(struct user_fpsimd_state *state);
+static inline void fpsimd_save_common(struct user_fpsimd_state *state)
+{
+ state->fpsr = read_sysreg_s(SYS_FPSR);
+ state->fpcr = read_sysreg_s(SYS_FPCR);
+}
+
+static inline void fpsimd_load_common(const struct user_fpsimd_state *state)
+{
+ write_sysreg_s(state->fpsr, SYS_FPSR);
+ write_sysreg_s(state->fpcr, SYS_FPCR);
+}
+
+static inline void fpsimd_save_vregs(struct user_fpsimd_state *state)
+{
+ instrument_write(state->vregs, sizeof(state->vregs));
+ asm volatile(
+ __FPSIMD_PREAMBLE
+ " stp q0, q1, [%[vregs], #16 * 0]\n"
+ " stp q2, q3, [%[vregs], #16 * 2]\n"
+ " stp q4, q5, [%[vregs], #16 * 4]\n"
+ " stp q6, q7, [%[vregs], #16 * 6]\n"
+ " stp q8, q9, [%[vregs], #16 * 8]\n"
+ " stp q10, q11, [%[vregs], #16 * 10]\n"
+ " stp q12, q13, [%[vregs], #16 * 12]\n"
+ " stp q14, q15, [%[vregs], #16 * 14]\n"
+ " stp q16, q17, [%[vregs], #16 * 16]\n"
+ " stp q18, q19, [%[vregs], #16 * 18]\n"
+ " stp q20, q21, [%[vregs], #16 * 20]\n"
+ " stp q22, q23, [%[vregs], #16 * 22]\n"
+ " stp q24, q25, [%[vregs], #16 * 24]\n"
+ " stp q26, q27, [%[vregs], #16 * 26]\n"
+ " stp q28, q29, [%[vregs], #16 * 28]\n"
+ " stp q30, q31, [%[vregs], #16 * 30]\n"
+ : "=Q" (state->vregs)
+ : [vregs] "r" (state->vregs)
+ );
+}
+
+static inline void fpsimd_load_vregs(const struct user_fpsimd_state *state)
+{
+ instrument_read(state->vregs, sizeof(state->vregs));
+ asm volatile(
+ __FPSIMD_PREAMBLE
+ " ldp q0, q1, [%[vregs], #16 * 0]\n"
+ " ldp q2, q3, [%[vregs], #16 * 2]\n"
+ " ldp q4, q5, [%[vregs], #16 * 4]\n"
+ " ldp q6, q7, [%[vregs], #16 * 6]\n"
+ " ldp q8, q9, [%[vregs], #16 * 8]\n"
+ " ldp q10, q11, [%[vregs], #16 * 10]\n"
+ " ldp q12, q13, [%[vregs], #16 * 12]\n"
+ " ldp q14, q15, [%[vregs], #16 * 14]\n"
+ " ldp q16, q17, [%[vregs], #16 * 16]\n"
+ " ldp q18, q19, [%[vregs], #16 * 18]\n"
+ " ldp q20, q21, [%[vregs], #16 * 20]\n"
+ " ldp q22, q23, [%[vregs], #16 * 22]\n"
+ " ldp q24, q25, [%[vregs], #16 * 24]\n"
+ " ldp q26, q27, [%[vregs], #16 * 26]\n"
+ " ldp q28, q29, [%[vregs], #16 * 28]\n"
+ " ldp q30, q31, [%[vregs], #16 * 30]\n"
+ :
+ : "Q" (state->vregs),
+ [vregs] "r" (state->vregs)
+ );
+}
+
+static inline void fpsimd_save_state(struct user_fpsimd_state *state)
+{
+ fpsimd_save_vregs(state);
+ fpsimd_save_common(state);
+}
+
+static inline void fpsimd_load_state(const struct user_fpsimd_state *state)
+{
+ fpsimd_load_vregs(state);
+ fpsimd_load_common(state);
+}
extern void fpsimd_thread_switch(struct task_struct *next);
extern void fpsimd_flush_thread(void);
@@ -83,8 +162,8 @@ extern void fpsimd_update_current_state(struct user_fpsimd_state const *state);
struct cpu_fp_state {
struct user_fpsimd_state *st;
- void *sve_state;
- void *sme_state;
+ struct arm64_sve_state *sve_state;
+ struct arm64_sme_state *sme_state;
u64 *svcr;
u64 *fpmr;
unsigned int sve_vl;
@@ -116,40 +195,166 @@ extern void task_smstop_sm(struct task_struct *task);
/* Maximum VL that SVE/SME VL-agnostic software can transparently support */
#define VL_ARCH_MAX 0x100
-/* Offset of FFR in the SVE register dump */
-static inline size_t sve_ffr_offset(int vl)
+static inline void *thread_zt_state(struct thread_struct *thread)
{
- return SVE_SIG_FFR_OFFSET(sve_vq_from_vl(vl)) - SVE_SIG_REGS_OFFSET;
+ /* The ZT register state is stored immediately after the ZA state */
+ unsigned int sme_vq = sve_vq_from_vl(thread_get_sme_vl(thread));
+ return (void *)thread->sme_state + ZA_SIG_REGS_SIZE(sme_vq);
}
-static inline void *sve_pffr(struct thread_struct *thread)
+static inline unsigned int sve_get_vl(void)
{
unsigned int vl;
- if (system_supports_sme() && thread_sm_enabled(thread))
- vl = thread_get_sme_vl(thread);
- else
- vl = thread_get_sve_vl(thread);
+ asm volatile(
+ __SVE_PREAMBLE
+ " rdvl %x[vl], #1\n"
+ : [vl] "=r" (vl)
+ );
+
+ return vl;
+}
+
+#define FOR_EACH_Z_REG(idx_str, asm_str) \
+ " .irp " idx_str ",0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31\n" \
+ asm_str "\n" \
+ " .endr\n"
+
+#define FOR_EACH_P_REG(idx_str, asm_str) \
+ " .irp " idx_str ",0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15\n" \
+ asm_str "\n" \
+ " .endr\n"
+
+static inline void __sve_save_z(struct arm64_sve_state *state, unsigned long vl)
+{
+ instrument_write(state, SVE_NUM_ZREGS * vl);
+ asm volatile(
+ __SVE_PREAMBLE
+ FOR_EACH_Z_REG("n", "str z\\n, [%[zregs], #\\n, MUL VL]")
+ :
+ : [zregs] "r" (state)
+ : "memory"
+ );
+}
+
+static inline void __sve_load_z(const struct arm64_sve_state *state, unsigned long vl)
+{
+ instrument_read(state, SVE_NUM_ZREGS * vl);
+ asm volatile(
+ __SVE_PREAMBLE
+ FOR_EACH_Z_REG("n", "ldr z\\n, [%[zregs], #\\n, MUL VL]")
+ :
+ : [zregs] "r" (state)
+ : "memory"
+ );
+}
+
+static inline void __sve_save_p(struct arm64_sve_state *state, unsigned long vl, bool ffr)
+{
+ void *pregs = (void *)state + SVE_NUM_ZREGS * vl;
+ unsigned long pl = vl / 8;
+ void *pffr = pregs + SVE_NUM_PREGS * pl;
+
+ instrument_write(pregs, SVE_NUM_PREGS * pl);
+ asm volatile(
+ __SVE_PREAMBLE
+ FOR_EACH_P_REG("n", "str p\\n, [%[pregs], #\\n, MUL VL]\n")
+ :
+ : [pregs] "r" (pregs)
+ : "memory"
+ );
+
+ instrument_write(pffr, pl);
+ if (ffr) {
+ asm volatile(
+ __SVE_PREAMBLE
+ " rdffr p0.b\n"
+ " str p0, [%[pffr]]\n"
+ " ldr p0, [%[pregs]]\n"
+ :
+ : [pregs] "r" (pregs),
+ [pffr] "r" (pffr)
+ : "memory"
+ );
+ } else {
+ asm volatile(
+ __SVE_PREAMBLE
+ " pfalse p0.b\n"
+ " str p0, [%[pffr]]\n"
+ " ldr p0, [%[pregs]]\n"
+ :
+ : [pregs] "r" (pregs),
+ [pffr] "r" (pffr)
+ : "memory"
+ );
+ }
+}
+
+static inline void __sve_load_p(const struct arm64_sve_state *state, unsigned long vl, bool ffr)
+{
+ const void *pregs = (const void *)state + SVE_NUM_ZREGS * vl;
+ unsigned long pl = vl / 8;
+ const void *pffr = pregs + SVE_NUM_PREGS * pl;
+
+ if (ffr) {
+ instrument_read(pffr, pl);
+ asm volatile(
+ __SVE_PREAMBLE
+ " ldr p0, [%[pffr]]\n"
+ " wrffr p0.b\n"
+ :
+ : [pffr] "r" (pffr)
+ : "memory"
+ );
+ }
- return (char *)thread->sve_state + sve_ffr_offset(vl);
+ instrument_read(pregs, SVE_NUM_PREGS * pl);
+ asm volatile(
+ __SVE_PREAMBLE
+ FOR_EACH_P_REG("n", "ldr p\\n, [%[pregs], #\\n, MUL VL]\n")
+ :
+ : [pregs] "r" (pregs)
+ : "memory"
+ );
}
-static inline void *thread_zt_state(struct thread_struct *thread)
+static inline void sve_save_state(struct arm64_sve_state *state, bool ffr)
{
- /* The ZT register state is stored immediately after the ZA state */
- unsigned int sme_vq = sve_vq_from_vl(thread_get_sme_vl(thread));
- return thread->sme_state + ZA_SIG_REGS_SIZE(sme_vq);
+ unsigned long vl = sve_get_vl();
+ __sve_save_z(state, vl);
+ __sve_save_p(state, vl, ffr);
}
-extern void sve_save_state(void *state, u32 *pfpsr, int save_ffr);
-extern void sve_load_state(void const *state, u32 const *pfpsr,
- int restore_ffr);
-extern void sve_flush_live(bool flush_ffr, unsigned long vq_minus_1);
-extern unsigned int sve_get_vl(void);
-extern void sve_set_vq(unsigned long vq_minus_1);
-extern void sme_set_vq(unsigned long vq_minus_1);
-extern void sme_save_state(void *state, int zt);
-extern void sme_load_state(void const *state, int zt);
+static inline void sve_load_state(const struct arm64_sve_state *state, bool ffr)
+{
+ unsigned long vl = sve_get_vl();
+ __sve_load_z(state, vl);
+ __sve_load_p(state, vl, ffr);
+}
+
+/*
+ * Zero all SVE registers except for the first 128 bits of each vector.
+ *
+ * The caller must ensure that the VL has been configured and the CPU must be
+ * in non-streaming mode.
+ */
+static inline void sve_flush_live(void)
+{
+ unsigned long vl = sve_get_vl();
+
+ if (vl > sizeof(__uint128_t)) {
+ asm volatile(
+ __FPSIMD_PREAMBLE
+ FOR_EACH_Z_REG("n", "mov v\\n\\().16b, v\\n\\().16b")
+ );
+ }
+
+ asm volatile(
+ __SVE_PREAMBLE
+ FOR_EACH_P_REG("n", "pfalse p\\n\\().b")
+ " wrffr p0.b\n"
+ );
+}
struct arm64_cpu_capabilities;
extern void cpu_enable_fpsimd(const struct arm64_cpu_capabilities *__unused);
@@ -402,8 +607,20 @@ static inline int sme_max_virtualisable_vl(void)
return vec_max_virtualisable_vl(ARM64_VEC_SME);
}
+static inline unsigned int sme_get_vl(void)
+{
+ unsigned int vl;
+
+ asm volatile(
+ __SME_PREAMBLE
+ " rdsvl %x[vl], #1\n"
+ : [vl] "=r" (vl)
+ );
+
+ return vl;
+}
+
extern void sme_alloc(struct task_struct *task, bool flush);
-extern unsigned int sme_get_vl(void);
extern int sme_set_current_vl(unsigned long arg);
extern int sme_get_current_vl(void);
extern void sme_suspend_exit(void);
@@ -418,6 +635,106 @@ static inline size_t __sme_state_size(unsigned int sme_vl)
return size;
}
+static inline void __sme_save_za(struct arm64_sme_state *state, unsigned long svl)
+{
+ /*
+ * The <Wv> argument to LDR/STR (array vector) can only encode W12-W15.
+ * The "Ucj" constraint exists for this, but is only supported by GCC
+ * 14.1.0+ and LLVM 18.1.0+.
+ */
+ register unsigned int v asm ("w12");
+
+ instrument_write(state, svl * svl);
+ for (v = 0; v < svl; v++) {
+ void *pav = (void *)state + v * svl;
+
+ asm volatile(
+ __SME_PREAMBLE
+ " str za[%w[v], #0], [%[pav]]\n"
+ :
+ : [v] "r" (v),
+ [pav] "r" (pav)
+ : "memory"
+ );
+ }
+}
+
+static inline void __sme_load_za(const struct arm64_sme_state *state, unsigned long svl)
+{
+ /* See comment in __sme_save_za */
+ register unsigned int v asm ("w12");
+
+ instrument_read(state, svl * svl);
+ for (v = 0; v < svl; v++) {
+ void *pav = (void *)state + v * svl;
+
+ asm volatile(
+ __SME_PREAMBLE
+ " ldr za[%w[v], #0], [%[pav]]\n"
+ :
+ : [v] "r" (v),
+ [pav] "r" (pav)
+ : "memory"
+ );
+ }
+}
+
+static inline void __sme_save_zt(struct arm64_sme_state *state, unsigned long svl)
+{
+ void *pzt = (void *)state + svl * svl;
+
+ instrument_write(pzt, 64);
+ asm volatile(
+ __DEFINE_ASM_GPR_NUMS
+ /*
+ * STR ZT0, [<Xn|SP>]
+ * Supported by binutils 2.41+.
+ * Supported by LLVM 16+
+ */
+ " .inst 0xe13f8000 | ((.L__gpr_num_%[pzt]) << 5)\n"
+ :
+ : [pzt] "r" (pzt)
+ : "memory"
+ );
+}
+
+static inline void __sme_load_zt(const struct arm64_sme_state *state, unsigned long svl)
+{
+ void *pzt = (void *)state + svl * svl;
+
+ instrument_read(pzt, 64);
+ asm volatile(
+ __DEFINE_ASM_GPR_NUMS
+ /*
+ * LDR ZT0, [<Xn|SP>]
+ * Supported by binutils 2.41+.
+ * Supported by LLVM 16+
+ */
+ " .inst 0xe11f8000 | ((.L__gpr_num_%[pzt]) << 5)\n"
+ :
+ : [pzt] "r" (pzt)
+ : "memory"
+ );
+}
+
+static inline void sme_save_state(struct arm64_sme_state *state, bool zt)
+{
+ unsigned long svl = sme_get_vl();
+
+ __sme_save_za(state, svl);
+ if (zt)
+ __sme_save_zt(state, svl);
+}
+
+static inline void sme_load_state(const struct arm64_sme_state *state, bool zt)
+{
+ unsigned long svl = sme_get_vl();
+
+ __sme_load_za(state, svl);
+ if (zt)
+ __sme_load_zt(state, svl);
+}
+
/*
* Return how many bytes of memory are required to store the full SME
* specific state for task, given task's currently configured vector
@@ -474,6 +791,9 @@ static inline size_t sme_state_size(struct task_struct const *task)
return 0;
}
+static inline void sme_save_state(struct arm64_sme_state *state, bool zt) { BUILD_BUG(); }
+static inline void sme_load_state(const struct arm64_sme_state *state, bool zt) { BUILD_BUG(); }
+
static inline void sme_enter_from_user_mode(void) { }
static inline void sme_exit_to_user_mode(void) { }
diff --git a/arch/arm64/include/asm/fpsimdmacros.h b/arch/arm64/include/asm/fpsimdmacros.h
deleted file mode 100644
index cda81d009c9b..000000000000
--- a/arch/arm64/include/asm/fpsimdmacros.h
+++ /dev/null
@@ -1,357 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * FP/SIMD state saving and restoring macros
- *
- * Copyright (C) 2012 ARM Ltd.
- * Author: Catalin Marinas <catalin.marinas@arm.com>
- */
-
-#include <asm/assembler.h>
-
-.macro fpsimd_save state, tmpnr
- stp q0, q1, [\state, #16 * 0]
- stp q2, q3, [\state, #16 * 2]
- stp q4, q5, [\state, #16 * 4]
- stp q6, q7, [\state, #16 * 6]
- stp q8, q9, [\state, #16 * 8]
- stp q10, q11, [\state, #16 * 10]
- stp q12, q13, [\state, #16 * 12]
- stp q14, q15, [\state, #16 * 14]
- stp q16, q17, [\state, #16 * 16]
- stp q18, q19, [\state, #16 * 18]
- stp q20, q21, [\state, #16 * 20]
- stp q22, q23, [\state, #16 * 22]
- stp q24, q25, [\state, #16 * 24]
- stp q26, q27, [\state, #16 * 26]
- stp q28, q29, [\state, #16 * 28]
- stp q30, q31, [\state, #16 * 30]!
- mrs x\tmpnr, fpsr
- str w\tmpnr, [\state, #16 * 2]
- mrs x\tmpnr, fpcr
- str w\tmpnr, [\state, #16 * 2 + 4]
-.endm
-
-.macro fpsimd_restore_fpcr state, tmp
- /*
- * Writes to fpcr may be self-synchronising, so avoid restoring
- * the register if it hasn't changed.
- */
- mrs \tmp, fpcr
- cmp \tmp, \state
- b.eq 9999f
- msr fpcr, \state
-9999:
-.endm
-
-/* Clobbers \state */
-.macro fpsimd_restore state, tmpnr
- ldp q0, q1, [\state, #16 * 0]
- ldp q2, q3, [\state, #16 * 2]
- ldp q4, q5, [\state, #16 * 4]
- ldp q6, q7, [\state, #16 * 6]
- ldp q8, q9, [\state, #16 * 8]
- ldp q10, q11, [\state, #16 * 10]
- ldp q12, q13, [\state, #16 * 12]
- ldp q14, q15, [\state, #16 * 14]
- ldp q16, q17, [\state, #16 * 16]
- ldp q18, q19, [\state, #16 * 18]
- ldp q20, q21, [\state, #16 * 20]
- ldp q22, q23, [\state, #16 * 22]
- ldp q24, q25, [\state, #16 * 24]
- ldp q26, q27, [\state, #16 * 26]
- ldp q28, q29, [\state, #16 * 28]
- ldp q30, q31, [\state, #16 * 30]!
- ldr w\tmpnr, [\state, #16 * 2]
- msr fpsr, x\tmpnr
- ldr w\tmpnr, [\state, #16 * 2 + 4]
- fpsimd_restore_fpcr x\tmpnr, \state
-.endm
-
-/* Sanity-check macros to help avoid encoding garbage instructions */
-
-.macro _check_general_reg nr
- .if (\nr) < 0 || (\nr) > 30
- .error "Bad register number \nr."
- .endif
-.endm
-
-.macro _sve_check_zreg znr
- .if (\znr) < 0 || (\znr) > 31
- .error "Bad Scalable Vector Extension vector register number \znr."
- .endif
-.endm
-
-.macro _sve_check_preg pnr
- .if (\pnr) < 0 || (\pnr) > 15
- .error "Bad Scalable Vector Extension predicate register number \pnr."
- .endif
-.endm
-
-.macro _check_num n, min, max
- .if (\n) < (\min) || (\n) > (\max)
- .error "Number \n out of range [\min,\max]"
- .endif
-.endm
-
-.macro _sme_check_wv v
- .if (\v) < 12 || (\v) > 15
- .error "Bad vector select register \v."
- .endif
-.endm
-
-/* SVE instruction encodings for non-SVE-capable assemblers */
-/* (pre binutils 2.28, all kernel capable clang versions support SVE) */
-
-/* STR (vector): STR Z\nz, [X\nxbase, #\offset, MUL VL] */
-.macro _sve_str_v nz, nxbase, offset=0
- _sve_check_zreg \nz
- _check_general_reg \nxbase
- _check_num (\offset), -0x100, 0xff
- .inst 0xe5804000 \
- | (\nz) \
- | ((\nxbase) << 5) \
- | (((\offset) & 7) << 10) \
- | (((\offset) & 0x1f8) << 13)
-.endm
-
-/* LDR (vector): LDR Z\nz, [X\nxbase, #\offset, MUL VL] */
-.macro _sve_ldr_v nz, nxbase, offset=0
- _sve_check_zreg \nz
- _check_general_reg \nxbase
- _check_num (\offset), -0x100, 0xff
- .inst 0x85804000 \
- | (\nz) \
- | ((\nxbase) << 5) \
- | (((\offset) & 7) << 10) \
- | (((\offset) & 0x1f8) << 13)
-.endm
-
-/* STR (predicate): STR P\np, [X\nxbase, #\offset, MUL VL] */
-.macro _sve_str_p np, nxbase, offset=0
- _sve_check_preg \np
- _check_general_reg \nxbase
- _check_num (\offset), -0x100, 0xff
- .inst 0xe5800000 \
- | (\np) \
- | ((\nxbase) << 5) \
- | (((\offset) & 7) << 10) \
- | (((\offset) & 0x1f8) << 13)
-.endm
-
-/* LDR (predicate): LDR P\np, [X\nxbase, #\offset, MUL VL] */
-.macro _sve_ldr_p np, nxbase, offset=0
- _sve_check_preg \np
- _check_general_reg \nxbase
- _check_num (\offset), -0x100, 0xff
- .inst 0x85800000 \
- | (\np) \
- | ((\nxbase) << 5) \
- | (((\offset) & 7) << 10) \
- | (((\offset) & 0x1f8) << 13)
-.endm
-
-/* RDVL X\nx, #\imm */
-.macro _sve_rdvl nx, imm
- _check_general_reg \nx
- _check_num (\imm), -0x20, 0x1f
- .inst 0x04bf5000 \
- | (\nx) \
- | (((\imm) & 0x3f) << 5)
-.endm
-
-/* RDFFR (unpredicated): RDFFR P\np.B */
-.macro _sve_rdffr np
- _sve_check_preg \np
- .inst 0x2519f000 \
- | (\np)
-.endm
-
-/* WRFFR P\np.B */
-.macro _sve_wrffr np
- _sve_check_preg \np
- .inst 0x25289000 \
- | ((\np) << 5)
-.endm
-
-/* PFALSE P\np.B */
-.macro _sve_pfalse np
- _sve_check_preg \np
- .inst 0x2518e400 \
- | (\np)
-.endm
-
-/* SME instruction encodings for non-SME-capable assemblers */
-/* (pre binutils 2.38/LLVM 13) */
-
-/* RDSVL X\nx, #\imm */
-.macro _sme_rdsvl nx, imm
- _check_general_reg \nx
- _check_num (\imm), -0x20, 0x1f
- .inst 0x04bf5800 \
- | (\nx) \
- | (((\imm) & 0x3f) << 5)
-.endm
-
-/*
- * STR (vector from ZA array):
- * STR ZA[\nw, #\offset], [X\nxbase, #\offset, MUL VL]
- */
-.macro _sme_str_zav nw, nxbase, offset=0
- _sme_check_wv \nw
- _check_general_reg \nxbase
- _check_num (\offset), -0x100, 0xff
- .inst 0xe1200000 \
- | (((\nw) & 3) << 13) \
- | ((\nxbase) << 5) \
- | ((\offset) & 7)
-.endm
-
-/*
- * LDR (vector to ZA array):
- * LDR ZA[\nw, #\offset], [X\nxbase, #\offset, MUL VL]
- */
-.macro _sme_ldr_zav nw, nxbase, offset=0
- _sme_check_wv \nw
- _check_general_reg \nxbase
- _check_num (\offset), -0x100, 0xff
- .inst 0xe1000000 \
- | (((\nw) & 3) << 13) \
- | ((\nxbase) << 5) \
- | ((\offset) & 7)
-.endm
-
-/*
- * LDR (ZT0)
- *
- * LDR ZT0, nx
- */
-.macro _ldr_zt nx
- _check_general_reg \nx
- .inst 0xe11f8000 \
- | (\nx << 5)
-.endm
-
-/*
- * STR (ZT0)
- *
- * STR ZT0, nx
- */
-.macro _str_zt nx
- _check_general_reg \nx
- .inst 0xe13f8000 \
- | (\nx << 5)
-.endm
-
-.macro __for from:req, to:req
- .if (\from) == (\to)
- _for__body %\from
- .else
- __for %\from, %((\from) + ((\to) - (\from)) / 2)
- __for %((\from) + ((\to) - (\from)) / 2 + 1), %\to
- .endif
-.endm
-
-.macro _for var:req, from:req, to:req, insn:vararg
- .macro _for__body \var:req
- .noaltmacro
- \insn
- .altmacro
- .endm
-
- .altmacro
- __for \from, \to
- .noaltmacro
-
- .purgem _for__body
-.endm
-
-/* Update ZCR_EL1.LEN with the new VQ */
-.macro sve_load_vq xvqminus1, xtmp, xtmp2
- mrs_s \xtmp, SYS_ZCR_EL1
- bic \xtmp2, \xtmp, ZCR_ELx_LEN_MASK
- orr \xtmp2, \xtmp2, \xvqminus1
- cmp \xtmp2, \xtmp
- b.eq 921f
- msr_s SYS_ZCR_EL1, \xtmp2 //self-synchronising
-921:
-.endm
-
-/* Update SMCR_EL1.LEN with the new VQ */
-.macro sme_load_vq xvqminus1, xtmp, xtmp2
- mrs_s \xtmp, SYS_SMCR_EL1
- bic \xtmp2, \xtmp, SMCR_ELx_LEN_MASK
- orr \xtmp2, \xtmp2, \xvqminus1
- cmp \xtmp2, \xtmp
- b.eq 921f
- msr_s SYS_SMCR_EL1, \xtmp2 //self-synchronising
-921:
-.endm
-
-/* Preserve the first 128-bits of Znz and zero the rest. */
-.macro _sve_flush_z nz
- _sve_check_zreg \nz
- mov v\nz\().16b, v\nz\().16b
-.endm
-
-.macro sve_flush_z
- _for n, 0, 31, _sve_flush_z \n
-.endm
-.macro sve_flush_p
- _for n, 0, 15, _sve_pfalse \n
-.endm
-.macro sve_flush_ffr
- _sve_wrffr 0
-.endm
-
-.macro sve_save nxbase, xpfpsr, save_ffr, nxtmp
- _for n, 0, 31, _sve_str_v \n, \nxbase, \n - 34
- _for n, 0, 15, _sve_str_p \n, \nxbase, \n - 16
- cbz \save_ffr, 921f
- _sve_rdffr 0
- b 922f
-921:
- _sve_pfalse 0 // Zero out FFR
-922:
- _sve_str_p 0, \nxbase
- _sve_ldr_p 0, \nxbase, -16
- mrs x\nxtmp, fpsr
- str w\nxtmp, [\xpfpsr]
- mrs x\nxtmp, fpcr
- str w\nxtmp, [\xpfpsr, #4]
-.endm
-
-.macro sve_load nxbase, xpfpsr, restore_ffr, nxtmp
- _for n, 0, 31, _sve_ldr_v \n, \nxbase, \n - 34
- cbz \restore_ffr, 921f
- _sve_ldr_p 0, \nxbase
- _sve_wrffr 0
-921:
- _for n, 0, 15, _sve_ldr_p \n, \nxbase, \n - 16
-
- ldr w\nxtmp, [\xpfpsr]
- msr fpsr, x\nxtmp
- ldr w\nxtmp, [\xpfpsr, #4]
- msr fpcr, x\nxtmp
-.endm
-
-.macro sme_save_za nxbase, xvl, nw
- mov w\nw, #0
-
-423:
- _sme_str_zav \nw, \nxbase
- add x\nxbase, x\nxbase, \xvl
- add x\nw, x\nw, #1
- cmp \xvl, x\nw
- bne 423b
-.endm
-
-.macro sme_load_za nxbase, xvl, nw
- mov w\nw, #0
-
-423:
- _sme_ldr_zav \nw, \nxbase
- add x\nxbase, x\nxbase, \xvl
- add x\nw, x\nw, #1
- cmp \xvl, x\nw
- bne 423b
-.endm
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 851f6171751c..639f9dd967f9 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -735,20 +735,6 @@ struct kvm_cpu_context {
u64 *vncr_array;
};
-struct cpu_sve_state {
- __u64 zcr_el1;
-
- /*
- * Ordering is important since __sve_save_state/__sve_restore_state
- * relies on it.
- */
- __u32 fpsr;
- __u32 fpcr;
-
- /* Must be SVE_VQ_BYTES (128 bit) aligned. */
- __u8 sve_regs[];
-};
-
/*
* This structure is instantiated on a per-CPU basis, and contains
* data that is:
@@ -774,12 +760,9 @@ struct kvm_host_data {
/*
* Hyp VA.
- * sve_state is only used in pKVM and if system_supports_sve().
+ * sve_regs is only used in pKVM and if system_supports_sve().
*/
- struct cpu_sve_state *sve_state;
-
- /* Used by pKVM only. */
- u64 fpmr;
+ struct arm64_sve_state *sve_regs;
/* Ownership of the FP regs */
enum {
@@ -873,7 +856,7 @@ struct kvm_vcpu_arch {
* floating point code saves the register state of a task it
* records which view it saved in fp_type.
*/
- void *sve_state;
+ struct arm64_sve_state *sve_state;
enum fp_type fp_type;
unsigned int sve_max_vl;
@@ -1117,10 +1100,6 @@ struct kvm_vcpu_arch {
#define NESTED_SERROR_PENDING __vcpu_single_flag(sflags, BIT(8))
-/* Pointer to the vcpu's SVE FFR for sve_{save,load}_state() */
-#define vcpu_sve_pffr(vcpu) (kern_hyp_va((vcpu)->arch.sve_state) + \
- sve_ffr_offset((vcpu)->arch.sve_max_vl))
-
#define vcpu_sve_max_vq(vcpu) sve_vq_from_vl((vcpu)->arch.sve_max_vl)
#define vcpu_sve_zcr_elx(vcpu) \
diff --git a/arch/arm64/include/asm/kvm_hyp.h b/arch/arm64/include/asm/kvm_hyp.h
index 8d06b62e7188..ad19de1d0654 100644
--- a/arch/arm64/include/asm/kvm_hyp.h
+++ b/arch/arm64/include/asm/kvm_hyp.h
@@ -121,11 +121,6 @@ void __debug_save_host_buffers_nvhe(struct kvm_vcpu *vcpu);
void __debug_restore_host_buffers_nvhe(struct kvm_vcpu *vcpu);
#endif
-void __fpsimd_save_state(struct user_fpsimd_state *fp_regs);
-void __fpsimd_restore_state(struct user_fpsimd_state *fp_regs);
-void __sve_save_state(void *sve_pffr, u32 *fpsr, int save_ffr);
-void __sve_restore_state(void *sve_pffr, u32 *fpsr, int restore_ffr);
-
u64 __guest_enter(struct kvm_vcpu *vcpu);
bool kvm_host_psci_handler(struct kvm_cpu_context *host_ctxt, u32 func_id);
diff --git a/arch/arm64/include/asm/kvm_pkvm.h b/arch/arm64/include/asm/kvm_pkvm.h
index 2954b311128c..74fedd9c5ff0 100644
--- a/arch/arm64/include/asm/kvm_pkvm.h
+++ b/arch/arm64/include/asm/kvm_pkvm.h
@@ -188,8 +188,7 @@ static inline size_t pkvm_host_sve_state_size(void)
if (!system_supports_sve())
return 0;
- return size_add(sizeof(struct cpu_sve_state),
- SVE_SIG_REGS_SIZE(sve_vq_from_vl(kvm_host_sve_max_vl)));
+ return SVE_SIG_REGS_SIZE(sve_vq_from_vl(kvm_host_sve_max_vl));
}
struct pkvm_mapping {
diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h
index e30c4c8e3a7a..c2a627f39314 100644
--- a/arch/arm64/include/asm/processor.h
+++ b/arch/arm64/include/asm/processor.h
@@ -130,6 +130,9 @@ enum fp_type {
FP_STATE_SVE,
};
+struct arm64_sve_state; /* Opaque type */
+struct arm64_sme_state; /* Opaque type */
+
struct cpu_context {
unsigned long x19;
unsigned long x20;
@@ -164,8 +167,8 @@ struct thread_struct {
enum fp_type fp_type; /* registers FPSIMD or SVE? */
unsigned int fpsimd_cpu;
- void *sve_state; /* SVE registers, if any */
- void *sme_state; /* ZA and ZT state, if any */
+ struct arm64_sve_state *sve_state; /* SVE registers, if any */
+ struct arm64_sme_state *sme_state; /* ZA and ZT state, if any */
unsigned int vl[ARM64_VEC_MAX]; /* vector length */
unsigned int vl_onexec[ARM64_VEC_MAX]; /* vl after next exec */
unsigned long fault_address; /* fault info */
diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile
index 74b76bb70452..d2690c3ec528 100644
--- a/arch/arm64/kernel/Makefile
+++ b/arch/arm64/kernel/Makefile
@@ -27,7 +27,7 @@ KCOV_INSTRUMENT_idle.o := n
# Object file lists.
obj-y := debug-monitors.o entry.o irq.o fpsimd.o \
- entry-common.o entry-fpsimd.o process.o ptrace.o \
+ entry-common.o process.o ptrace.o \
setup.o signal.o sys.o stacktrace.o time.o traps.o \
io.o vdso.o hyp-stub.o psci.o cpu_ops.o \
return_address.o cpuinfo.o cpu_errata.o \
diff --git a/arch/arm64/kernel/entry-common.c b/arch/arm64/kernel/entry-common.c
index cb54335465f6..2352297330e1 100644
--- a/arch/arm64/kernel/entry-common.c
+++ b/arch/arm64/kernel/entry-common.c
@@ -237,12 +237,8 @@ static inline void fpsimd_syscall_enter(void)
if (!system_supports_sve())
return;
- if (test_thread_flag(TIF_SVE)) {
- unsigned int sve_vq_minus_one;
-
- sve_vq_minus_one = sve_vq_from_vl(task_get_sve_vl(current)) - 1;
- sve_flush_live(true, sve_vq_minus_one);
- }
+ if (test_thread_flag(TIF_SVE))
+ sve_flush_live();
/*
* Any live non-FPSIMD SVE state has been zeroed. Allow
diff --git a/arch/arm64/kernel/entry-fpsimd.S b/arch/arm64/kernel/entry-fpsimd.S
deleted file mode 100644
index 6325db1a2179..000000000000
--- a/arch/arm64/kernel/entry-fpsimd.S
+++ /dev/null
@@ -1,134 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * FP/SIMD state saving and restoring
- *
- * Copyright (C) 2012 ARM Ltd.
- * Author: Catalin Marinas <catalin.marinas@arm.com>
- */
-
-#include <linux/linkage.h>
-
-#include <asm/assembler.h>
-#include <asm/fpsimdmacros.h>
-
-/*
- * Save the FP registers.
- *
- * x0 - pointer to struct fpsimd_state
- */
-SYM_FUNC_START(fpsimd_save_state)
- fpsimd_save x0, 8
- ret
-SYM_FUNC_END(fpsimd_save_state)
-
-/*
- * Load the FP registers.
- *
- * x0 - pointer to struct fpsimd_state
- */
-SYM_FUNC_START(fpsimd_load_state)
- fpsimd_restore x0, 8
- ret
-SYM_FUNC_END(fpsimd_load_state)
-
-#ifdef CONFIG_ARM64_SVE
-
-/*
- * Save the SVE state
- *
- * x0 - pointer to buffer for state
- * x1 - pointer to storage for FPSR
- * x2 - Save FFR if non-zero
- */
-SYM_FUNC_START(sve_save_state)
- sve_save 0, x1, x2, 3
- ret
-SYM_FUNC_END(sve_save_state)
-
-/*
- * Load the SVE state
- *
- * x0 - pointer to buffer for state
- * x1 - pointer to storage for FPSR
- * x2 - Restore FFR if non-zero
- */
-SYM_FUNC_START(sve_load_state)
- sve_load 0, x1, x2, 4
- ret
-SYM_FUNC_END(sve_load_state)
-
-SYM_FUNC_START(sve_get_vl)
- _sve_rdvl 0, 1
- ret
-SYM_FUNC_END(sve_get_vl)
-
-SYM_FUNC_START(sve_set_vq)
- sve_load_vq x0, x1, x2
- ret
-SYM_FUNC_END(sve_set_vq)
-
-/*
- * Zero all SVE registers but the first 128-bits of each vector
- *
- * VQ must already be configured by caller, any further updates of VQ
- * will need to ensure that the register state remains valid.
- *
- * x0 = include FFR?
- * x1 = VQ - 1
- */
-SYM_FUNC_START(sve_flush_live)
- cbz x1, 1f // A VQ-1 of 0 is 128 bits so no extra Z state
- sve_flush_z
-1: sve_flush_p
- tbz x0, #0, 2f
- sve_flush_ffr
-2: ret
-SYM_FUNC_END(sve_flush_live)
-
-#endif /* CONFIG_ARM64_SVE */
-
-#ifdef CONFIG_ARM64_SME
-
-SYM_FUNC_START(sme_get_vl)
- _sme_rdsvl 0, 1
- ret
-SYM_FUNC_END(sme_get_vl)
-
-SYM_FUNC_START(sme_set_vq)
- sme_load_vq x0, x1, x2
- ret
-SYM_FUNC_END(sme_set_vq)
-
-/*
- * Save the ZA and ZT state
- *
- * x0 - pointer to buffer for state
- * x1 - number of ZT registers to save
- */
-SYM_FUNC_START(sme_save_state)
- _sme_rdsvl 2, 1 // x2 = VL/8
- sme_save_za 0, x2, 12 // Leaves x0 pointing to the end of ZA
-
- cbz x1, 1f
- _str_zt 0
-1:
- ret
-SYM_FUNC_END(sme_save_state)
-
-/*
- * Load the ZA and ZT state
- *
- * x0 - pointer to buffer for state
- * x1 - number of ZT registers to save
- */
-SYM_FUNC_START(sme_load_state)
- _sme_rdsvl 2, 1 // x2 = VL/8
- sme_load_za 0, x2, 12 // Leaves x0 pointing to the end of ZA
-
- cbz x1, 1f
- _ldr_zt 0
-1:
- ret
-SYM_FUNC_END(sme_load_state)
-
-#endif /* CONFIG_ARM64_SME */
diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
index 60a45d600b46..25dc5afe9ba0 100644
--- a/arch/arm64/kernel/fpsimd.c
+++ b/arch/arm64/kernel/fpsimd.c
@@ -377,8 +377,10 @@ static void task_fpsimd_load(void)
if (!thread_sm_enabled(&current->thread))
WARN_ON_ONCE(!test_and_set_thread_flag(TIF_SVE));
- if (test_thread_flag(TIF_SVE))
- sve_set_vq(sve_vq_from_vl(task_get_sve_vl(current)) - 1);
+ if (test_thread_flag(TIF_SVE)) {
+ unsigned long vq = sve_vq_from_vl(task_get_sve_vl(current));
+ sysreg_clear_set_s(SYS_ZCR_EL1, ZCR_ELx_LEN, vq - 1);
+ }
restore_sve_regs = true;
restore_ffr = true;
@@ -403,8 +405,10 @@ static void task_fpsimd_load(void)
unsigned long sme_vl = task_get_sme_vl(current);
/* Ensure VL is set up for restoring data */
- if (test_thread_flag(TIF_SME))
- sme_set_vq(sve_vq_from_vl(sme_vl) - 1);
+ if (test_thread_flag(TIF_SME)) {
+ unsigned long vq = sve_vq_from_vl(sme_vl);
+ sysreg_clear_set_s(SYS_SMCR_EL1, SMCR_ELx_LEN, vq - 1);
+ }
write_sysreg_s(current->thread.svcr, SYS_SVCR);
@@ -421,9 +425,8 @@ static void task_fpsimd_load(void)
if (restore_sve_regs) {
WARN_ON_ONCE(current->thread.fp_type != FP_STATE_SVE);
- sve_load_state(sve_pffr(&current->thread),
- &current->thread.uw.fpsimd_state.fpsr,
- restore_ffr);
+ sve_load_state(current->thread.sve_state, restore_ffr);
+ fpsimd_load_common(&current->thread.uw.fpsimd_state);
} else {
WARN_ON_ONCE(current->thread.fp_type != FP_STATE_FPSIMD);
fpsimd_load_state(&current->thread.uw.fpsimd_state);
@@ -503,9 +506,8 @@ static void fpsimd_save_user_state(void)
return;
}
- sve_save_state((char *)last->sve_state +
- sve_ffr_offset(vl),
- &last->st->fpsr, save_ffr);
+ sve_save_state(last->sve_state, save_ffr);
+ fpsimd_save_common(last->st);
*last->fp_type = FP_STATE_SVE;
} else {
fpsimd_save_state(last->st);
@@ -636,7 +638,8 @@ static __uint128_t arm64_cpu_to_le128(__uint128_t x)
#define arm64_le128_to_cpu(x) arm64_cpu_to_le128(x)
-static void __fpsimd_to_sve(void *sst, struct user_fpsimd_state const *fst,
+static void __fpsimd_to_sve(struct arm64_sve_state *sst,
+ struct user_fpsimd_state const *fst,
unsigned int vq)
{
unsigned int i;
@@ -663,7 +666,7 @@ static void __fpsimd_to_sve(void *sst, struct user_fpsimd_state const *fst,
static inline void fpsimd_to_sve(struct task_struct *task)
{
unsigned int vq;
- void *sst = task->thread.sve_state;
+ struct arm64_sve_state *sst = task->thread.sve_state;
struct user_fpsimd_state const *fst = &task->thread.uw.fpsimd_state;
if (!system_supports_sve() && !system_supports_sme())
@@ -687,7 +690,7 @@ static inline void fpsimd_to_sve(struct task_struct *task)
static inline void sve_to_fpsimd(struct task_struct *task)
{
unsigned int vq, vl;
- void const *sst = task->thread.sve_state;
+ const struct arm64_sve_state *sst = task->thread.sve_state;
struct user_fpsimd_state *fst = &task->thread.uw.fpsimd_state;
unsigned int i;
__uint128_t const *p;
@@ -786,7 +789,7 @@ void fpsimd_sync_from_effective_state(struct task_struct *task)
void fpsimd_sync_to_effective_state_zeropad(struct task_struct *task)
{
unsigned int vq;
- void *sst = task->thread.sve_state;
+ struct arm64_sve_state *sst = task->thread.sve_state;
struct user_fpsimd_state const *fst = &task->thread.uw.fpsimd_state;
if (task->thread.fp_type != FP_STATE_SVE)
@@ -804,7 +807,8 @@ static int change_live_vector_length(struct task_struct *task,
{
unsigned int sve_vl = task_get_sve_vl(task);
unsigned int sme_vl = task_get_sme_vl(task);
- void *sve_state = NULL, *sme_state = NULL;
+ struct arm64_sve_state *sve_state = NULL;
+ struct arm64_sme_state *sme_state = NULL;
if (type == ARM64_VEC_SME)
sme_vl = vl;
@@ -1293,31 +1297,6 @@ void sme_suspend_exit(void)
#endif /* CONFIG_ARM64_SME */
-static void sve_init_regs(void)
-{
- /*
- * Convert the FPSIMD state to SVE, zeroing all the state that
- * is not shared with FPSIMD. If (as is likely) the current
- * state is live in the registers then do this there and
- * update our metadata for the current task including
- * disabling the trap, otherwise update our in-memory copy.
- * We are guaranteed to not be in streaming mode, we can only
- * take a SVE trap when not in streaming mode and we can't be
- * in streaming mode when taking a SME trap.
- */
- if (!test_thread_flag(TIF_FOREIGN_FPSTATE)) {
- unsigned long vq_minus_one =
- sve_vq_from_vl(task_get_sve_vl(current)) - 1;
- sve_set_vq(vq_minus_one);
- sve_flush_live(true, vq_minus_one);
- fpsimd_bind_task_to_cpu();
- } else {
- fpsimd_to_sve(current);
- current->thread.fp_type = FP_STATE_SVE;
- fpsimd_flush_task_state(current);
- }
-}
-
/*
* Trapped SVE access
*
@@ -1349,13 +1328,23 @@ void do_sve_acc(unsigned long esr, struct pt_regs *regs)
WARN_ON(1); /* SVE access shouldn't have trapped */
/*
- * Even if the task can have used streaming mode we can only
- * generate SVE access traps in normal SVE mode and
- * transitioning out of streaming mode may discard any
- * streaming mode state. Always clear the high bits to avoid
- * any potential errors tracking what is properly initialised.
+ * Convert the FPSIMD state to SVE. Stale SVE state can be present in
+ * registers or memory, so we must zero all state that is not shared
+ * with FPSIMD.
+ *
+ * SVE traps cannot be taken from streaming mode, so there cannot be
+ * any effective streaming mode SVE state.
*/
- sve_init_regs();
+ if (!test_thread_flag(TIF_FOREIGN_FPSTATE)) {
+ unsigned long vq = sve_vq_from_vl(task_get_sve_vl(current));
+ sysreg_clear_set_s(SYS_ZCR_EL1, ZCR_ELx_LEN, vq - 1);
+ sve_flush_live();
+ fpsimd_bind_task_to_cpu();
+ } else {
+ fpsimd_to_sve(current);
+ current->thread.fp_type = FP_STATE_SVE;
+ fpsimd_flush_task_state(current);
+ }
put_cpu_fpsimd_context();
}
@@ -1479,9 +1468,8 @@ void do_sme_acc(unsigned long esr, struct pt_regs *regs)
WARN_ON(1);
if (!test_thread_flag(TIF_FOREIGN_FPSTATE)) {
- unsigned long vq_minus_one =
- sve_vq_from_vl(task_get_sme_vl(current)) - 1;
- sme_set_vq(vq_minus_one);
+ unsigned long vq = sve_vq_from_vl(task_get_sme_vl(current));
+ sysreg_clear_set_s(SYS_SMCR_EL1, SMCR_ELx_LEN, vq - 1);
fpsimd_bind_task_to_cpu();
} else {
@@ -1656,8 +1644,8 @@ static void fpsimd_flush_thread_vl(enum vec_type type)
void fpsimd_flush_thread(void)
{
- void *sve_state = NULL;
- void *sme_state = NULL;
+ struct arm64_sve_state *sve_state = NULL;
+ struct arm64_sme_state *sme_state = NULL;
if (!system_supports_fpsimd())
return;
diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
index 176cbe8baad3..d84e80b2ce54 100644
--- a/arch/arm64/kvm/arm.c
+++ b/arch/arm64/kvm/arm.c
@@ -2495,10 +2495,10 @@ static void __init teardown_hyp_mode(void)
continue;
if (free_sve) {
- struct cpu_sve_state *sve_state;
+ struct arm64_sve_state *sve_regs;
- sve_state = per_cpu_ptr_nvhe_sym(kvm_host_data, cpu)->sve_state;
- free_pages((unsigned long) sve_state, pkvm_host_sve_state_order());
+ sve_regs = per_cpu_ptr_nvhe_sym(kvm_host_data, cpu)->sve_regs;
+ free_pages((unsigned long) sve_regs, pkvm_host_sve_state_order());
}
free_pages(kvm_nvhe_sym(kvm_arm_hyp_percpu_base)[cpu], nvhe_percpu_order());
@@ -2623,7 +2623,7 @@ static int init_pkvm_host_sve_state(void)
if (!page)
return -ENOMEM;
- per_cpu_ptr_nvhe_sym(kvm_host_data, cpu)->sve_state = page_address(page);
+ per_cpu_ptr_nvhe_sym(kvm_host_data, cpu)->sve_regs = page_address(page);
}
/*
@@ -2644,11 +2644,11 @@ static void finalize_init_hyp_mode(void)
if (system_supports_sve() && is_protected_kvm_enabled()) {
for_each_possible_cpu(cpu) {
- struct cpu_sve_state *sve_state;
+ struct arm64_sve_state *sve_regs;
- sve_state = per_cpu_ptr_nvhe_sym(kvm_host_data, cpu)->sve_state;
- per_cpu_ptr_nvhe_sym(kvm_host_data, cpu)->sve_state =
- kern_hyp_va(sve_state);
+ sve_regs = per_cpu_ptr_nvhe_sym(kvm_host_data, cpu)->sve_regs;
+ per_cpu_ptr_nvhe_sym(kvm_host_data, cpu)->sve_regs =
+ kern_hyp_va(sve_regs);
}
}
}
diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c
index 332c453b87cf..b01d6622b872 100644
--- a/arch/arm64/kvm/guest.c
+++ b/arch/arm64/kvm/guest.c
@@ -500,7 +500,7 @@ static int get_sve_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
if (!kvm_arm_vcpu_sve_finalized(vcpu))
return -EPERM;
- if (copy_to_user(uptr, vcpu->arch.sve_state + region.koffset,
+ if (copy_to_user(uptr, (void *)vcpu->arch.sve_state + region.koffset,
region.klen) ||
clear_user(uptr + region.klen, region.upad))
return -EFAULT;
@@ -526,7 +526,7 @@ static int set_sve_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
if (!kvm_arm_vcpu_sve_finalized(vcpu))
return -EPERM;
- if (copy_from_user(vcpu->arch.sve_state + region.koffset, uptr,
+ if (copy_from_user((void *)vcpu->arch.sve_state + region.koffset, uptr,
region.klen))
return -EFAULT;
diff --git a/arch/arm64/kvm/hyp/entry.S b/arch/arm64/kvm/hyp/entry.S
index 11a10d8f5beb..308100ed25de 100644
--- a/arch/arm64/kvm/hyp/entry.S
+++ b/arch/arm64/kvm/hyp/entry.S
@@ -8,7 +8,6 @@
#include <asm/alternative.h>
#include <asm/assembler.h>
-#include <asm/fpsimdmacros.h>
#include <asm/kvm.h>
#include <asm/kvm_arm.h>
#include <asm/kvm_asm.h>
diff --git a/arch/arm64/kvm/hyp/fpsimd.S b/arch/arm64/kvm/hyp/fpsimd.S
deleted file mode 100644
index e950875e31ce..000000000000
--- a/arch/arm64/kvm/hyp/fpsimd.S
+++ /dev/null
@@ -1,33 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Copyright (C) 2015 - ARM Ltd
- * Author: Marc Zyngier <marc.zyngier@arm.com>
- */
-
-#include <linux/linkage.h>
-
-#include <asm/fpsimdmacros.h>
-
- .text
-
-SYM_FUNC_START(__fpsimd_save_state)
- fpsimd_save x0, 1
- ret
-SYM_FUNC_END(__fpsimd_save_state)
-
-SYM_FUNC_START(__fpsimd_restore_state)
- fpsimd_restore x0, 1
- ret
-SYM_FUNC_END(__fpsimd_restore_state)
-
-SYM_FUNC_START(__sve_restore_state)
- mov x2, #1
- sve_load 0, x1, x2, 3
- ret
-SYM_FUNC_END(__sve_restore_state)
-
-SYM_FUNC_START(__sve_save_state)
- mov x2, #1
- sve_save 0, x1, x2, 3
- ret
-SYM_FUNC_END(__sve_save_state)
diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h
index 98b2976837b1..1f12c4ba295a 100644
--- a/arch/arm64/kvm/hyp/include/hyp/switch.h
+++ b/arch/arm64/kvm/hyp/include/hyp/switch.h
@@ -467,9 +467,8 @@ static inline void __hyp_sve_restore_guest(struct kvm_vcpu *vcpu)
* vCPU. Start off with the max VL so we can load the SVE state.
*/
sve_cond_update_zcr_vq(vcpu_sve_max_vq(vcpu) - 1, SYS_ZCR_EL2);
- __sve_restore_state(vcpu_sve_pffr(vcpu),
- &vcpu->arch.ctxt.fp_regs.fpsr,
- true);
+ sve_load_state(kern_hyp_va(vcpu->arch.sve_state), true);
+ fpsimd_load_common(&vcpu->arch.ctxt.fp_regs);
/*
* The effective VL for a VM could differ from the max VL when running a
@@ -484,13 +483,13 @@ static inline void __hyp_sve_restore_guest(struct kvm_vcpu *vcpu)
static inline void __hyp_sve_save_host(void)
{
- struct cpu_sve_state *sve_state = *host_data_ptr(sve_state);
+ struct kvm_cpu_context *hctxt = host_data_ptr(host_ctxt);
+ struct arm64_sve_state *sve_regs = *host_data_ptr(sve_regs);
- sve_state->zcr_el1 = read_sysreg_el1(SYS_ZCR);
+ ctxt_sys_reg(hctxt, ZCR_EL1) = read_sysreg_el1(SYS_ZCR);
write_sysreg_s(sve_vq_from_vl(kvm_host_sve_max_vl) - 1, SYS_ZCR_EL2);
- __sve_save_state(sve_state->sve_regs + sve_ffr_offset(kvm_host_sve_max_vl),
- &sve_state->fpsr,
- true);
+ sve_save_state(sve_regs, true);
+ fpsimd_save_common(&hctxt->fp_regs);
}
static inline void fpsimd_lazy_switch_to_guest(struct kvm_vcpu *vcpu)
@@ -554,6 +553,8 @@ static inline void fpsimd_lazy_switch_to_host(struct kvm_vcpu *vcpu)
static void kvm_hyp_save_fpsimd_host(struct kvm_vcpu *vcpu)
{
+ struct kvm_cpu_context *hctxt = host_data_ptr(host_ctxt);
+
/*
* Non-protected kvm relies on the host restoring its sve state.
* Protected kvm restores the host's sve state as not to reveal that
@@ -562,11 +563,11 @@ static void kvm_hyp_save_fpsimd_host(struct kvm_vcpu *vcpu)
if (system_supports_sve()) {
__hyp_sve_save_host();
} else {
- __fpsimd_save_state(host_data_ptr(host_ctxt.fp_regs));
+ fpsimd_save_state(&hctxt->fp_regs);
}
if (kvm_has_fpmr(kern_hyp_va(vcpu->kvm)))
- *host_data_ptr(fpmr) = read_sysreg_s(SYS_FPMR);
+ ctxt_sys_reg(hctxt, FPMR) = read_sysreg_s(SYS_FPMR);
}
@@ -622,7 +623,7 @@ static inline bool kvm_hyp_handle_fpsimd(struct kvm_vcpu *vcpu, u64 *exit_code)
if (sve_guest)
__hyp_sve_restore_guest(vcpu);
else
- __fpsimd_restore_state(&vcpu->arch.ctxt.fp_regs);
+ fpsimd_load_state(&vcpu->arch.ctxt.fp_regs);
if (kvm_has_fpmr(kern_hyp_va(vcpu->kvm)))
write_sysreg_s(__vcpu_sys_reg(vcpu, FPMR), SYS_FPMR);
diff --git a/arch/arm64/kvm/hyp/nvhe/Makefile b/arch/arm64/kvm/hyp/nvhe/Makefile
index 62cdfbff7562..f57450ebcb49 100644
--- a/arch/arm64/kvm/hyp/nvhe/Makefile
+++ b/arch/arm64/kvm/hyp/nvhe/Makefile
@@ -26,7 +26,7 @@ hyp-obj-y := timer-sr.o sysreg-sr.o debug-sr.o switch.o tlb.o hyp-init.o host.o
hyp-main.o hyp-smp.o psci-relay.o early_alloc.o page_alloc.o \
cache.o setup.o mm.o mem_protect.o sys_regs.o pkvm.o stacktrace.o ffa.o
hyp-obj-y += ../vgic-v3-sr.o ../aarch32.o ../vgic-v2-cpuif-proxy.o ../entry.o \
- ../fpsimd.o ../hyp-entry.o ../exception.o ../pgtable.o ../vgic-v5-sr.o
+ ../hyp-entry.o ../exception.o ../pgtable.o ../vgic-v5-sr.o
hyp-obj-y += ../../../kernel/smccc-call.o
hyp-obj-$(CONFIG_LIST_HARDENED) += list_debug.o
hyp-obj-$(CONFIG_NVHE_EL2_TRACING) += clock.o trace.o events.o
diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-main.c b/arch/arm64/kvm/hyp/nvhe/hyp-main.c
index 73f2e0221e70..676f756e084d 100644
--- a/arch/arm64/kvm/hyp/nvhe/hyp-main.c
+++ b/arch/arm64/kvm/hyp/nvhe/hyp-main.c
@@ -35,13 +35,15 @@ static void __hyp_sve_save_guest(struct kvm_vcpu *vcpu)
* on the VL, so use a consistent (i.e., the maximum) guest VL.
*/
sve_cond_update_zcr_vq(vcpu_sve_max_vq(vcpu) - 1, SYS_ZCR_EL2);
- __sve_save_state(vcpu_sve_pffr(vcpu), &vcpu->arch.ctxt.fp_regs.fpsr, true);
+ sve_save_state(kern_hyp_va(vcpu->arch.sve_state), true);
+ fpsimd_save_common(&vcpu->arch.ctxt.fp_regs);
write_sysreg_s(sve_vq_from_vl(kvm_host_sve_max_vl) - 1, SYS_ZCR_EL2);
}
static void __hyp_sve_restore_host(void)
{
- struct cpu_sve_state *sve_state = *host_data_ptr(sve_state);
+ struct kvm_cpu_context *hctxt = host_data_ptr(host_ctxt);
+ struct arm64_sve_state *sve_regs = *host_data_ptr(sve_regs);
/*
* On saving/restoring host sve state, always use the maximum VL for
@@ -53,10 +55,9 @@ static void __hyp_sve_restore_host(void)
* need to be revisited.
*/
write_sysreg_s(sve_vq_from_vl(kvm_host_sve_max_vl) - 1, SYS_ZCR_EL2);
- __sve_restore_state(sve_state->sve_regs + sve_ffr_offset(kvm_host_sve_max_vl),
- &sve_state->fpsr,
- true);
- write_sysreg_el1(sve_state->zcr_el1, SYS_ZCR);
+ sve_load_state(sve_regs, true);
+ fpsimd_load_common(&hctxt->fp_regs);
+ write_sysreg_el1(ctxt_sys_reg(hctxt, ZCR_EL1), SYS_ZCR);
}
static void fpsimd_sve_flush(void)
@@ -66,6 +67,7 @@ static void fpsimd_sve_flush(void)
static void fpsimd_sve_sync(struct kvm_vcpu *vcpu)
{
+ struct kvm_cpu_context *hctxt = host_data_ptr(host_ctxt);
bool has_fpmr;
if (!guest_owns_fp_regs())
@@ -80,7 +82,7 @@ static void fpsimd_sve_sync(struct kvm_vcpu *vcpu)
if (vcpu_has_sve(vcpu))
__hyp_sve_save_guest(vcpu);
else
- __fpsimd_save_state(&vcpu->arch.ctxt.fp_regs);
+ fpsimd_save_state(&vcpu->arch.ctxt.fp_regs);
has_fpmr = kvm_has_fpmr(kern_hyp_va(vcpu->kvm));
if (has_fpmr)
@@ -89,10 +91,10 @@ static void fpsimd_sve_sync(struct kvm_vcpu *vcpu)
if (system_supports_sve())
__hyp_sve_restore_host();
else
- __fpsimd_restore_state(host_data_ptr(host_ctxt.fp_regs));
+ fpsimd_load_state(&hctxt->fp_regs);
if (has_fpmr)
- write_sysreg_s(*host_data_ptr(fpmr), SYS_FPMR);
+ write_sysreg_s(ctxt_sys_reg(hctxt, FPMR), SYS_FPMR);
*host_data_ptr(fp_owner) = FP_STATE_HOST_OWNED;
}
diff --git a/arch/arm64/kvm/hyp/nvhe/setup.c b/arch/arm64/kvm/hyp/nvhe/setup.c
index d8e5b563fd3d..e704199048c4 100644
--- a/arch/arm64/kvm/hyp/nvhe/setup.c
+++ b/arch/arm64/kvm/hyp/nvhe/setup.c
@@ -82,9 +82,9 @@ static int pkvm_create_host_sve_mappings(void)
for (i = 0; i < hyp_nr_cpus; i++) {
struct kvm_host_data *host_data = per_cpu_ptr(&kvm_host_data, i);
- struct cpu_sve_state *sve_state = host_data->sve_state;
+ struct arm64_sve_state *sve_regs = host_data->sve_regs;
- start = kern_hyp_va(sve_state);
+ start = kern_hyp_va(sve_regs);
end = start + PAGE_ALIGN(pkvm_host_sve_state_size());
ret = pkvm_create_mappings(start, end, PAGE_HYP);
if (ret)
diff --git a/arch/arm64/kvm/hyp/vhe/Makefile b/arch/arm64/kvm/hyp/vhe/Makefile
index 9695328bbd96..d6b3475145c0 100644
--- a/arch/arm64/kvm/hyp/vhe/Makefile
+++ b/arch/arm64/kvm/hyp/vhe/Makefile
@@ -10,4 +10,4 @@ CFLAGS_switch.o += -Wno-override-init
obj-y := timer-sr.o sysreg-sr.o debug-sr.o switch.o tlb.o
obj-y += ../vgic-v3-sr.o ../aarch32.o ../vgic-v2-cpuif-proxy.o ../entry.o \
- ../fpsimd.o ../hyp-entry.o ../exception.o ../vgic-v5-sr.o
+ ../hyp-entry.o ../exception.o ../vgic-v5-sr.o
diff --git a/arch/arm64/tools/sysreg b/arch/arm64/tools/sysreg
index 6c3ff14e561e..8b219d656660 100644
--- a/arch/arm64/tools/sysreg
+++ b/arch/arm64/tools/sysreg
@@ -3790,6 +3790,51 @@ Field 1 ZA
Field 0 SM
EndSysreg
+Sysreg FPCR 3 3 4 4 0
+Res0 63:27
+Field 26 AHP
+Field 25 DN
+Field 24 FZ
+Enum 23:22 RMode
+ 0b00 RN
+ 0b01 RP
+ 0b10 RM
+ 0b11 RZ
+EndEnum
+Field 21:20 Stride
+Field 19 FZ16
+Field 18:16 Len
+Field 15 IDE
+Res0 14
+Field 13 EBF
+Field 12 IXE
+Field 11 UFE
+Field 10 OFE
+Field 9 DZE
+Field 8 IOE
+Res0 7:3
+Field 2 NEP
+Field 1 AH
+Field 0 FIZ
+EndSysreg
+
+Sysreg FPSR 3 3 4 4 1
+Res0 63:32
+Field 31 N
+Field 30 Z
+Field 29 C
+Field 28 V
+Field 27 QC
+Res0 26:8
+Field 7 IDC
+Res0 6:5
+Field 4 IXC
+Field 3 UFC
+Field 2 OFC
+Field 1 DZC
+Field 0 IOC
+EndSysreg
+
Sysreg FPMR 3 3 4 4 2
Res0 63:38
Field 37:32 LSCALE2