diff options
| author | Will Deacon <will@kernel.org> | 2026-06-14 14:16:59 +0300 |
|---|---|---|
| committer | Will Deacon <will@kernel.org> | 2026-06-14 14:16:59 +0300 |
| commit | 35d2b77d8dd76cfccf54cc0c6453584ea4f31224 (patch) | |
| tree | 3f3850b5a08a10d3bacf8e8aecb2b67585cb4894 | |
| parent | 0fccc93585c11e594920e5d203d152e89bf16687 (diff) | |
| parent | 987ec51e18419cc0ebf6f6fa1cfbfd149eca443d (diff) | |
| download | linux-35d2b77d8dd76cfccf54cc0c6453584ea4f31224.tar.xz | |
Merge branch 'for-next/fpsimd-cleanups' into for-next/core
* for-next/fpsimd-cleanups:
arm64: fpsimd: Remove <asm/fpsimdmacros.h>
arm64: fpsimd: Move SME save/restore inline
arm64: fpsimd: Move sve_flush_live() inline
arm64: fpsimd: Move SVE save/restore inline
arm64: fpsimd: Use opaque type for SME state
arm64: fpsimd: Use opaque type for SVE state
arm64: fpsimd: Move fpsimd save/restore inline
arm64: fpsimd: Split FPSR/FPCR from SVE save/restore
arm64: sysreg: Add FPCR and FPSR
arm64: fpsimd: Move sve_get_vl() and sme_get_vl() inline
arm64: fpsimd: Use assembler for baseline SME instructions
arm64: fpsimd: Use assembler for SVE instructions
arm64: fpsimd: Remove sve_set_vq() and sme_set_vq()
arm64: fpsimd: Fold sve_init_regs() into do_sve_acc()
KVM: arm64: pkvm: Remove struct cpu_sve_state
KVM: arm64: pkvm: Save host FPMR in host cpu context
KVM: arm64: Don't override FFR save/restore argument
KVM: arm64: Don't include <asm/fpsimdmacros.h>
arm64: fpsimd: Fix type mismatch in sme_{save,load}_state()
arm64: fpsimd: Fix type mismatch in sve_{save,load}_state()
| -rw-r--r-- | arch/arm64/Kconfig | 5 | ||||
| -rw-r--r-- | arch/arm64/include/asm/fpsimd.h | 374 | ||||
| -rw-r--r-- | arch/arm64/include/asm/fpsimdmacros.h | 357 | ||||
| -rw-r--r-- | arch/arm64/include/asm/kvm_host.h | 27 | ||||
| -rw-r--r-- | arch/arm64/include/asm/kvm_hyp.h | 5 | ||||
| -rw-r--r-- | arch/arm64/include/asm/kvm_pkvm.h | 3 | ||||
| -rw-r--r-- | arch/arm64/include/asm/processor.h | 7 | ||||
| -rw-r--r-- | arch/arm64/kernel/Makefile | 2 | ||||
| -rw-r--r-- | arch/arm64/kernel/entry-common.c | 8 | ||||
| -rw-r--r-- | arch/arm64/kernel/entry-fpsimd.S | 134 | ||||
| -rw-r--r-- | arch/arm64/kernel/fpsimd.c | 90 | ||||
| -rw-r--r-- | arch/arm64/kvm/arm.c | 16 | ||||
| -rw-r--r-- | arch/arm64/kvm/guest.c | 4 | ||||
| -rw-r--r-- | arch/arm64/kvm/hyp/entry.S | 1 | ||||
| -rw-r--r-- | arch/arm64/kvm/hyp/fpsimd.S | 33 | ||||
| -rw-r--r-- | arch/arm64/kvm/hyp/include/hyp/switch.h | 23 | ||||
| -rw-r--r-- | arch/arm64/kvm/hyp/nvhe/Makefile | 2 | ||||
| -rw-r--r-- | arch/arm64/kvm/hyp/nvhe/hyp-main.c | 20 | ||||
| -rw-r--r-- | arch/arm64/kvm/hyp/nvhe/setup.c | 4 | ||||
| -rw-r--r-- | arch/arm64/kvm/hyp/vhe/Makefile | 2 | ||||
| -rw-r--r-- | arch/arm64/tools/sysreg | 45 |
21 files changed, 485 insertions, 677 deletions
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index c053f012c6a6..3af035b73878 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -2305,10 +2305,15 @@ config ARM64_SVE booting the kernel. If unsure and you are not observing these symptoms, you should assume that it is safe to say Y. +config AS_HAS_SME + # Supported by LLVM 13+ and binutils 2.38+ + def_bool $(as-instr,.arch_extension sme) + config ARM64_SME bool "ARM Scalable Matrix Extension support" default y depends on ARM64_SVE + depends on AS_HAS_SME help The Scalable Matrix Extension (SME) is an extension to the AArch64 execution state which utilises a substantial subset of the SVE diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h index d9d00b45ab11..a67d5774e672 100644 --- a/arch/arm64/include/asm/fpsimd.h +++ b/arch/arm64/include/asm/fpsimd.h @@ -22,6 +22,11 @@ #include <linux/stddef.h> #include <linux/types.h> +#define __FPSIMD_PREAMBLE ".arch_extension fp\n" \ + ".arch_extension simd\n" +#define __SVE_PREAMBLE ".arch_extension sve\n" +#define __SME_PREAMBLE ".arch_extension sme\n" + /* Masks for extracting the FPSR and FPCR from the FPSCR */ #define VFP_FPSCR_STAT_MASK 0xf800009f #define VFP_FPSCR_CTRL_MASK 0x07f79f00 @@ -71,8 +76,82 @@ static inline void cpacr_restore(unsigned long cpacr) struct task_struct; -extern void fpsimd_save_state(struct user_fpsimd_state *state); -extern void fpsimd_load_state(struct user_fpsimd_state *state); +static inline void fpsimd_save_common(struct user_fpsimd_state *state) +{ + state->fpsr = read_sysreg_s(SYS_FPSR); + state->fpcr = read_sysreg_s(SYS_FPCR); +} + +static inline void fpsimd_load_common(const struct user_fpsimd_state *state) +{ + write_sysreg_s(state->fpsr, SYS_FPSR); + write_sysreg_s(state->fpcr, SYS_FPCR); +} + +static inline void fpsimd_save_vregs(struct user_fpsimd_state *state) +{ + instrument_write(state->vregs, sizeof(state->vregs)); + asm volatile( + __FPSIMD_PREAMBLE + " stp q0, q1, [%[vregs], #16 * 0]\n" + " stp q2, q3, [%[vregs], #16 * 2]\n" + " stp q4, q5, [%[vregs], #16 * 4]\n" + " stp q6, q7, [%[vregs], #16 * 6]\n" + " stp q8, q9, [%[vregs], #16 * 8]\n" + " stp q10, q11, [%[vregs], #16 * 10]\n" + " stp q12, q13, [%[vregs], #16 * 12]\n" + " stp q14, q15, [%[vregs], #16 * 14]\n" + " stp q16, q17, [%[vregs], #16 * 16]\n" + " stp q18, q19, [%[vregs], #16 * 18]\n" + " stp q20, q21, [%[vregs], #16 * 20]\n" + " stp q22, q23, [%[vregs], #16 * 22]\n" + " stp q24, q25, [%[vregs], #16 * 24]\n" + " stp q26, q27, [%[vregs], #16 * 26]\n" + " stp q28, q29, [%[vregs], #16 * 28]\n" + " stp q30, q31, [%[vregs], #16 * 30]\n" + : "=Q" (state->vregs) + : [vregs] "r" (state->vregs) + ); +} + +static inline void fpsimd_load_vregs(const struct user_fpsimd_state *state) +{ + instrument_read(state->vregs, sizeof(state->vregs)); + asm volatile( + __FPSIMD_PREAMBLE + " ldp q0, q1, [%[vregs], #16 * 0]\n" + " ldp q2, q3, [%[vregs], #16 * 2]\n" + " ldp q4, q5, [%[vregs], #16 * 4]\n" + " ldp q6, q7, [%[vregs], #16 * 6]\n" + " ldp q8, q9, [%[vregs], #16 * 8]\n" + " ldp q10, q11, [%[vregs], #16 * 10]\n" + " ldp q12, q13, [%[vregs], #16 * 12]\n" + " ldp q14, q15, [%[vregs], #16 * 14]\n" + " ldp q16, q17, [%[vregs], #16 * 16]\n" + " ldp q18, q19, [%[vregs], #16 * 18]\n" + " ldp q20, q21, [%[vregs], #16 * 20]\n" + " ldp q22, q23, [%[vregs], #16 * 22]\n" + " ldp q24, q25, [%[vregs], #16 * 24]\n" + " ldp q26, q27, [%[vregs], #16 * 26]\n" + " ldp q28, q29, [%[vregs], #16 * 28]\n" + " ldp q30, q31, [%[vregs], #16 * 30]\n" + : + : "Q" (state->vregs), + [vregs] "r" (state->vregs) + ); +} + +static inline void fpsimd_save_state(struct user_fpsimd_state *state) +{ + fpsimd_save_vregs(state); + fpsimd_save_common(state); +} + +static inline void fpsimd_load_state(const struct user_fpsimd_state *state) +{ + fpsimd_load_vregs(state); + fpsimd_load_common(state); +} extern void fpsimd_thread_switch(struct task_struct *next); extern void fpsimd_flush_thread(void); @@ -83,8 +162,8 @@ extern void fpsimd_update_current_state(struct user_fpsimd_state const *state); struct cpu_fp_state { struct user_fpsimd_state *st; - void *sve_state; - void *sme_state; + struct arm64_sve_state *sve_state; + struct arm64_sme_state *sme_state; u64 *svcr; u64 *fpmr; unsigned int sve_vl; @@ -116,40 +195,166 @@ extern void task_smstop_sm(struct task_struct *task); /* Maximum VL that SVE/SME VL-agnostic software can transparently support */ #define VL_ARCH_MAX 0x100 -/* Offset of FFR in the SVE register dump */ -static inline size_t sve_ffr_offset(int vl) +static inline void *thread_zt_state(struct thread_struct *thread) { - return SVE_SIG_FFR_OFFSET(sve_vq_from_vl(vl)) - SVE_SIG_REGS_OFFSET; + /* The ZT register state is stored immediately after the ZA state */ + unsigned int sme_vq = sve_vq_from_vl(thread_get_sme_vl(thread)); + return (void *)thread->sme_state + ZA_SIG_REGS_SIZE(sme_vq); } -static inline void *sve_pffr(struct thread_struct *thread) +static inline unsigned int sve_get_vl(void) { unsigned int vl; - if (system_supports_sme() && thread_sm_enabled(thread)) - vl = thread_get_sme_vl(thread); - else - vl = thread_get_sve_vl(thread); + asm volatile( + __SVE_PREAMBLE + " rdvl %x[vl], #1\n" + : [vl] "=r" (vl) + ); + + return vl; +} + +#define FOR_EACH_Z_REG(idx_str, asm_str) \ + " .irp " idx_str ",0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31\n" \ + asm_str "\n" \ + " .endr\n" + +#define FOR_EACH_P_REG(idx_str, asm_str) \ + " .irp " idx_str ",0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15\n" \ + asm_str "\n" \ + " .endr\n" + +static inline void __sve_save_z(struct arm64_sve_state *state, unsigned long vl) +{ + instrument_write(state, SVE_NUM_ZREGS * vl); + asm volatile( + __SVE_PREAMBLE + FOR_EACH_Z_REG("n", "str z\\n, [%[zregs], #\\n, MUL VL]") + : + : [zregs] "r" (state) + : "memory" + ); +} + +static inline void __sve_load_z(const struct arm64_sve_state *state, unsigned long vl) +{ + instrument_read(state, SVE_NUM_ZREGS * vl); + asm volatile( + __SVE_PREAMBLE + FOR_EACH_Z_REG("n", "ldr z\\n, [%[zregs], #\\n, MUL VL]") + : + : [zregs] "r" (state) + : "memory" + ); +} + +static inline void __sve_save_p(struct arm64_sve_state *state, unsigned long vl, bool ffr) +{ + void *pregs = (void *)state + SVE_NUM_ZREGS * vl; + unsigned long pl = vl / 8; + void *pffr = pregs + SVE_NUM_PREGS * pl; + + instrument_write(pregs, SVE_NUM_PREGS * pl); + asm volatile( + __SVE_PREAMBLE + FOR_EACH_P_REG("n", "str p\\n, [%[pregs], #\\n, MUL VL]\n") + : + : [pregs] "r" (pregs) + : "memory" + ); + + instrument_write(pffr, pl); + if (ffr) { + asm volatile( + __SVE_PREAMBLE + " rdffr p0.b\n" + " str p0, [%[pffr]]\n" + " ldr p0, [%[pregs]]\n" + : + : [pregs] "r" (pregs), + [pffr] "r" (pffr) + : "memory" + ); + } else { + asm volatile( + __SVE_PREAMBLE + " pfalse p0.b\n" + " str p0, [%[pffr]]\n" + " ldr p0, [%[pregs]]\n" + : + : [pregs] "r" (pregs), + [pffr] "r" (pffr) + : "memory" + ); + } +} + +static inline void __sve_load_p(const struct arm64_sve_state *state, unsigned long vl, bool ffr) +{ + const void *pregs = (const void *)state + SVE_NUM_ZREGS * vl; + unsigned long pl = vl / 8; + const void *pffr = pregs + SVE_NUM_PREGS * pl; + + if (ffr) { + instrument_read(pffr, pl); + asm volatile( + __SVE_PREAMBLE + " ldr p0, [%[pffr]]\n" + " wrffr p0.b\n" + : + : [pffr] "r" (pffr) + : "memory" + ); + } - return (char *)thread->sve_state + sve_ffr_offset(vl); + instrument_read(pregs, SVE_NUM_PREGS * pl); + asm volatile( + __SVE_PREAMBLE + FOR_EACH_P_REG("n", "ldr p\\n, [%[pregs], #\\n, MUL VL]\n") + : + : [pregs] "r" (pregs) + : "memory" + ); } -static inline void *thread_zt_state(struct thread_struct *thread) +static inline void sve_save_state(struct arm64_sve_state *state, bool ffr) { - /* The ZT register state is stored immediately after the ZA state */ - unsigned int sme_vq = sve_vq_from_vl(thread_get_sme_vl(thread)); - return thread->sme_state + ZA_SIG_REGS_SIZE(sme_vq); + unsigned long vl = sve_get_vl(); + __sve_save_z(state, vl); + __sve_save_p(state, vl, ffr); } -extern void sve_save_state(void *state, u32 *pfpsr, int save_ffr); -extern void sve_load_state(void const *state, u32 const *pfpsr, - int restore_ffr); -extern void sve_flush_live(bool flush_ffr, unsigned long vq_minus_1); -extern unsigned int sve_get_vl(void); -extern void sve_set_vq(unsigned long vq_minus_1); -extern void sme_set_vq(unsigned long vq_minus_1); -extern void sme_save_state(void *state, int zt); -extern void sme_load_state(void const *state, int zt); +static inline void sve_load_state(const struct arm64_sve_state *state, bool ffr) +{ + unsigned long vl = sve_get_vl(); + __sve_load_z(state, vl); + __sve_load_p(state, vl, ffr); +} + +/* + * Zero all SVE registers except for the first 128 bits of each vector. + * + * The caller must ensure that the VL has been configured and the CPU must be + * in non-streaming mode. + */ +static inline void sve_flush_live(void) +{ + unsigned long vl = sve_get_vl(); + + if (vl > sizeof(__uint128_t)) { + asm volatile( + __FPSIMD_PREAMBLE + FOR_EACH_Z_REG("n", "mov v\\n\\().16b, v\\n\\().16b") + ); + } + + asm volatile( + __SVE_PREAMBLE + FOR_EACH_P_REG("n", "pfalse p\\n\\().b") + " wrffr p0.b\n" + ); +} struct arm64_cpu_capabilities; extern void cpu_enable_fpsimd(const struct arm64_cpu_capabilities *__unused); @@ -402,8 +607,20 @@ static inline int sme_max_virtualisable_vl(void) return vec_max_virtualisable_vl(ARM64_VEC_SME); } +static inline unsigned int sme_get_vl(void) +{ + unsigned int vl; + + asm volatile( + __SME_PREAMBLE + " rdsvl %x[vl], #1\n" + : [vl] "=r" (vl) + ); + + return vl; +} + extern void sme_alloc(struct task_struct *task, bool flush); -extern unsigned int sme_get_vl(void); extern int sme_set_current_vl(unsigned long arg); extern int sme_get_current_vl(void); extern void sme_suspend_exit(void); @@ -418,6 +635,106 @@ static inline size_t __sme_state_size(unsigned int sme_vl) return size; } +static inline void __sme_save_za(struct arm64_sme_state *state, unsigned long svl) +{ + /* + * The <Wv> argument to LDR/STR (array vector) can only encode W12-W15. + * The "Ucj" constraint exists for this, but is only supported by GCC + * 14.1.0+ and LLVM 18.1.0+. + */ + register unsigned int v asm ("w12"); + + instrument_write(state, svl * svl); + for (v = 0; v < svl; v++) { + void *pav = (void *)state + v * svl; + + asm volatile( + __SME_PREAMBLE + " str za[%w[v], #0], [%[pav]]\n" + : + : [v] "r" (v), + [pav] "r" (pav) + : "memory" + ); + } +} + +static inline void __sme_load_za(const struct arm64_sme_state *state, unsigned long svl) +{ + /* See comment in __sme_save_za */ + register unsigned int v asm ("w12"); + + instrument_read(state, svl * svl); + for (v = 0; v < svl; v++) { + void *pav = (void *)state + v * svl; + + asm volatile( + __SME_PREAMBLE + " ldr za[%w[v], #0], [%[pav]]\n" + : + : [v] "r" (v), + [pav] "r" (pav) + : "memory" + ); + } +} + +static inline void __sme_save_zt(struct arm64_sme_state *state, unsigned long svl) +{ + void *pzt = (void *)state + svl * svl; + + instrument_write(pzt, 64); + asm volatile( + __DEFINE_ASM_GPR_NUMS + /* + * STR ZT0, [<Xn|SP>] + * Supported by binutils 2.41+. + * Supported by LLVM 16+ + */ + " .inst 0xe13f8000 | ((.L__gpr_num_%[pzt]) << 5)\n" + : + : [pzt] "r" (pzt) + : "memory" + ); +} + +static inline void __sme_load_zt(const struct arm64_sme_state *state, unsigned long svl) +{ + void *pzt = (void *)state + svl * svl; + + instrument_read(pzt, 64); + asm volatile( + __DEFINE_ASM_GPR_NUMS + /* + * LDR ZT0, [<Xn|SP>] + * Supported by binutils 2.41+. + * Supported by LLVM 16+ + */ + " .inst 0xe11f8000 | ((.L__gpr_num_%[pzt]) << 5)\n" + : + : [pzt] "r" (pzt) + : "memory" + ); +} + +static inline void sme_save_state(struct arm64_sme_state *state, bool zt) +{ + unsigned long svl = sme_get_vl(); + + __sme_save_za(state, svl); + if (zt) + __sme_save_zt(state, svl); +} + +static inline void sme_load_state(const struct arm64_sme_state *state, bool zt) +{ + unsigned long svl = sme_get_vl(); + + __sme_load_za(state, svl); + if (zt) + __sme_load_zt(state, svl); +} + /* * Return how many bytes of memory are required to store the full SME * specific state for task, given task's currently configured vector @@ -474,6 +791,9 @@ static inline size_t sme_state_size(struct task_struct const *task) return 0; } +static inline void sme_save_state(struct arm64_sme_state *state, bool zt) { BUILD_BUG(); } +static inline void sme_load_state(const struct arm64_sme_state *state, bool zt) { BUILD_BUG(); } + static inline void sme_enter_from_user_mode(void) { } static inline void sme_exit_to_user_mode(void) { } diff --git a/arch/arm64/include/asm/fpsimdmacros.h b/arch/arm64/include/asm/fpsimdmacros.h deleted file mode 100644 index cda81d009c9b..000000000000 --- a/arch/arm64/include/asm/fpsimdmacros.h +++ /dev/null @@ -1,357 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * FP/SIMD state saving and restoring macros - * - * Copyright (C) 2012 ARM Ltd. - * Author: Catalin Marinas <catalin.marinas@arm.com> - */ - -#include <asm/assembler.h> - -.macro fpsimd_save state, tmpnr - stp q0, q1, [\state, #16 * 0] - stp q2, q3, [\state, #16 * 2] - stp q4, q5, [\state, #16 * 4] - stp q6, q7, [\state, #16 * 6] - stp q8, q9, [\state, #16 * 8] - stp q10, q11, [\state, #16 * 10] - stp q12, q13, [\state, #16 * 12] - stp q14, q15, [\state, #16 * 14] - stp q16, q17, [\state, #16 * 16] - stp q18, q19, [\state, #16 * 18] - stp q20, q21, [\state, #16 * 20] - stp q22, q23, [\state, #16 * 22] - stp q24, q25, [\state, #16 * 24] - stp q26, q27, [\state, #16 * 26] - stp q28, q29, [\state, #16 * 28] - stp q30, q31, [\state, #16 * 30]! - mrs x\tmpnr, fpsr - str w\tmpnr, [\state, #16 * 2] - mrs x\tmpnr, fpcr - str w\tmpnr, [\state, #16 * 2 + 4] -.endm - -.macro fpsimd_restore_fpcr state, tmp - /* - * Writes to fpcr may be self-synchronising, so avoid restoring - * the register if it hasn't changed. - */ - mrs \tmp, fpcr - cmp \tmp, \state - b.eq 9999f - msr fpcr, \state -9999: -.endm - -/* Clobbers \state */ -.macro fpsimd_restore state, tmpnr - ldp q0, q1, [\state, #16 * 0] - ldp q2, q3, [\state, #16 * 2] - ldp q4, q5, [\state, #16 * 4] - ldp q6, q7, [\state, #16 * 6] - ldp q8, q9, [\state, #16 * 8] - ldp q10, q11, [\state, #16 * 10] - ldp q12, q13, [\state, #16 * 12] - ldp q14, q15, [\state, #16 * 14] - ldp q16, q17, [\state, #16 * 16] - ldp q18, q19, [\state, #16 * 18] - ldp q20, q21, [\state, #16 * 20] - ldp q22, q23, [\state, #16 * 22] - ldp q24, q25, [\state, #16 * 24] - ldp q26, q27, [\state, #16 * 26] - ldp q28, q29, [\state, #16 * 28] - ldp q30, q31, [\state, #16 * 30]! - ldr w\tmpnr, [\state, #16 * 2] - msr fpsr, x\tmpnr - ldr w\tmpnr, [\state, #16 * 2 + 4] - fpsimd_restore_fpcr x\tmpnr, \state -.endm - -/* Sanity-check macros to help avoid encoding garbage instructions */ - -.macro _check_general_reg nr - .if (\nr) < 0 || (\nr) > 30 - .error "Bad register number \nr." - .endif -.endm - -.macro _sve_check_zreg znr - .if (\znr) < 0 || (\znr) > 31 - .error "Bad Scalable Vector Extension vector register number \znr." - .endif -.endm - -.macro _sve_check_preg pnr - .if (\pnr) < 0 || (\pnr) > 15 - .error "Bad Scalable Vector Extension predicate register number \pnr." - .endif -.endm - -.macro _check_num n, min, max - .if (\n) < (\min) || (\n) > (\max) - .error "Number \n out of range [\min,\max]" - .endif -.endm - -.macro _sme_check_wv v - .if (\v) < 12 || (\v) > 15 - .error "Bad vector select register \v." - .endif -.endm - -/* SVE instruction encodings for non-SVE-capable assemblers */ -/* (pre binutils 2.28, all kernel capable clang versions support SVE) */ - -/* STR (vector): STR Z\nz, [X\nxbase, #\offset, MUL VL] */ -.macro _sve_str_v nz, nxbase, offset=0 - _sve_check_zreg \nz - _check_general_reg \nxbase - _check_num (\offset), -0x100, 0xff - .inst 0xe5804000 \ - | (\nz) \ - | ((\nxbase) << 5) \ - | (((\offset) & 7) << 10) \ - | (((\offset) & 0x1f8) << 13) -.endm - -/* LDR (vector): LDR Z\nz, [X\nxbase, #\offset, MUL VL] */ -.macro _sve_ldr_v nz, nxbase, offset=0 - _sve_check_zreg \nz - _check_general_reg \nxbase - _check_num (\offset), -0x100, 0xff - .inst 0x85804000 \ - | (\nz) \ - | ((\nxbase) << 5) \ - | (((\offset) & 7) << 10) \ - | (((\offset) & 0x1f8) << 13) -.endm - -/* STR (predicate): STR P\np, [X\nxbase, #\offset, MUL VL] */ -.macro _sve_str_p np, nxbase, offset=0 - _sve_check_preg \np - _check_general_reg \nxbase - _check_num (\offset), -0x100, 0xff - .inst 0xe5800000 \ - | (\np) \ - | ((\nxbase) << 5) \ - | (((\offset) & 7) << 10) \ - | (((\offset) & 0x1f8) << 13) -.endm - -/* LDR (predicate): LDR P\np, [X\nxbase, #\offset, MUL VL] */ -.macro _sve_ldr_p np, nxbase, offset=0 - _sve_check_preg \np - _check_general_reg \nxbase - _check_num (\offset), -0x100, 0xff - .inst 0x85800000 \ - | (\np) \ - | ((\nxbase) << 5) \ - | (((\offset) & 7) << 10) \ - | (((\offset) & 0x1f8) << 13) -.endm - -/* RDVL X\nx, #\imm */ -.macro _sve_rdvl nx, imm - _check_general_reg \nx - _check_num (\imm), -0x20, 0x1f - .inst 0x04bf5000 \ - | (\nx) \ - | (((\imm) & 0x3f) << 5) -.endm - -/* RDFFR (unpredicated): RDFFR P\np.B */ -.macro _sve_rdffr np - _sve_check_preg \np - .inst 0x2519f000 \ - | (\np) -.endm - -/* WRFFR P\np.B */ -.macro _sve_wrffr np - _sve_check_preg \np - .inst 0x25289000 \ - | ((\np) << 5) -.endm - -/* PFALSE P\np.B */ -.macro _sve_pfalse np - _sve_check_preg \np - .inst 0x2518e400 \ - | (\np) -.endm - -/* SME instruction encodings for non-SME-capable assemblers */ -/* (pre binutils 2.38/LLVM 13) */ - -/* RDSVL X\nx, #\imm */ -.macro _sme_rdsvl nx, imm - _check_general_reg \nx - _check_num (\imm), -0x20, 0x1f - .inst 0x04bf5800 \ - | (\nx) \ - | (((\imm) & 0x3f) << 5) -.endm - -/* - * STR (vector from ZA array): - * STR ZA[\nw, #\offset], [X\nxbase, #\offset, MUL VL] - */ -.macro _sme_str_zav nw, nxbase, offset=0 - _sme_check_wv \nw - _check_general_reg \nxbase - _check_num (\offset), -0x100, 0xff - .inst 0xe1200000 \ - | (((\nw) & 3) << 13) \ - | ((\nxbase) << 5) \ - | ((\offset) & 7) -.endm - -/* - * LDR (vector to ZA array): - * LDR ZA[\nw, #\offset], [X\nxbase, #\offset, MUL VL] - */ -.macro _sme_ldr_zav nw, nxbase, offset=0 - _sme_check_wv \nw - _check_general_reg \nxbase - _check_num (\offset), -0x100, 0xff - .inst 0xe1000000 \ - | (((\nw) & 3) << 13) \ - | ((\nxbase) << 5) \ - | ((\offset) & 7) -.endm - -/* - * LDR (ZT0) - * - * LDR ZT0, nx - */ -.macro _ldr_zt nx - _check_general_reg \nx - .inst 0xe11f8000 \ - | (\nx << 5) -.endm - -/* - * STR (ZT0) - * - * STR ZT0, nx - */ -.macro _str_zt nx - _check_general_reg \nx - .inst 0xe13f8000 \ - | (\nx << 5) -.endm - -.macro __for from:req, to:req - .if (\from) == (\to) - _for__body %\from - .else - __for %\from, %((\from) + ((\to) - (\from)) / 2) - __for %((\from) + ((\to) - (\from)) / 2 + 1), %\to - .endif -.endm - -.macro _for var:req, from:req, to:req, insn:vararg - .macro _for__body \var:req - .noaltmacro - \insn - .altmacro - .endm - - .altmacro - __for \from, \to - .noaltmacro - - .purgem _for__body -.endm - -/* Update ZCR_EL1.LEN with the new VQ */ -.macro sve_load_vq xvqminus1, xtmp, xtmp2 - mrs_s \xtmp, SYS_ZCR_EL1 - bic \xtmp2, \xtmp, ZCR_ELx_LEN_MASK - orr \xtmp2, \xtmp2, \xvqminus1 - cmp \xtmp2, \xtmp - b.eq 921f - msr_s SYS_ZCR_EL1, \xtmp2 //self-synchronising -921: -.endm - -/* Update SMCR_EL1.LEN with the new VQ */ -.macro sme_load_vq xvqminus1, xtmp, xtmp2 - mrs_s \xtmp, SYS_SMCR_EL1 - bic \xtmp2, \xtmp, SMCR_ELx_LEN_MASK - orr \xtmp2, \xtmp2, \xvqminus1 - cmp \xtmp2, \xtmp - b.eq 921f - msr_s SYS_SMCR_EL1, \xtmp2 //self-synchronising -921: -.endm - -/* Preserve the first 128-bits of Znz and zero the rest. */ -.macro _sve_flush_z nz - _sve_check_zreg \nz - mov v\nz\().16b, v\nz\().16b -.endm - -.macro sve_flush_z - _for n, 0, 31, _sve_flush_z \n -.endm -.macro sve_flush_p - _for n, 0, 15, _sve_pfalse \n -.endm -.macro sve_flush_ffr - _sve_wrffr 0 -.endm - -.macro sve_save nxbase, xpfpsr, save_ffr, nxtmp - _for n, 0, 31, _sve_str_v \n, \nxbase, \n - 34 - _for n, 0, 15, _sve_str_p \n, \nxbase, \n - 16 - cbz \save_ffr, 921f - _sve_rdffr 0 - b 922f -921: - _sve_pfalse 0 // Zero out FFR -922: - _sve_str_p 0, \nxbase - _sve_ldr_p 0, \nxbase, -16 - mrs x\nxtmp, fpsr - str w\nxtmp, [\xpfpsr] - mrs x\nxtmp, fpcr - str w\nxtmp, [\xpfpsr, #4] -.endm - -.macro sve_load nxbase, xpfpsr, restore_ffr, nxtmp - _for n, 0, 31, _sve_ldr_v \n, \nxbase, \n - 34 - cbz \restore_ffr, 921f - _sve_ldr_p 0, \nxbase - _sve_wrffr 0 -921: - _for n, 0, 15, _sve_ldr_p \n, \nxbase, \n - 16 - - ldr w\nxtmp, [\xpfpsr] - msr fpsr, x\nxtmp - ldr w\nxtmp, [\xpfpsr, #4] - msr fpcr, x\nxtmp -.endm - -.macro sme_save_za nxbase, xvl, nw - mov w\nw, #0 - -423: - _sme_str_zav \nw, \nxbase - add x\nxbase, x\nxbase, \xvl - add x\nw, x\nw, #1 - cmp \xvl, x\nw - bne 423b -.endm - -.macro sme_load_za nxbase, xvl, nw - mov w\nw, #0 - -423: - _sme_ldr_zav \nw, \nxbase - add x\nxbase, x\nxbase, \xvl - add x\nw, x\nw, #1 - cmp \xvl, x\nw - bne 423b -.endm diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 851f6171751c..639f9dd967f9 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -735,20 +735,6 @@ struct kvm_cpu_context { u64 *vncr_array; }; -struct cpu_sve_state { - __u64 zcr_el1; - - /* - * Ordering is important since __sve_save_state/__sve_restore_state - * relies on it. - */ - __u32 fpsr; - __u32 fpcr; - - /* Must be SVE_VQ_BYTES (128 bit) aligned. */ - __u8 sve_regs[]; -}; - /* * This structure is instantiated on a per-CPU basis, and contains * data that is: @@ -774,12 +760,9 @@ struct kvm_host_data { /* * Hyp VA. - * sve_state is only used in pKVM and if system_supports_sve(). + * sve_regs is only used in pKVM and if system_supports_sve(). */ - struct cpu_sve_state *sve_state; - - /* Used by pKVM only. */ - u64 fpmr; + struct arm64_sve_state *sve_regs; /* Ownership of the FP regs */ enum { @@ -873,7 +856,7 @@ struct kvm_vcpu_arch { * floating point code saves the register state of a task it * records which view it saved in fp_type. */ - void *sve_state; + struct arm64_sve_state *sve_state; enum fp_type fp_type; unsigned int sve_max_vl; @@ -1117,10 +1100,6 @@ struct kvm_vcpu_arch { #define NESTED_SERROR_PENDING __vcpu_single_flag(sflags, BIT(8)) -/* Pointer to the vcpu's SVE FFR for sve_{save,load}_state() */ -#define vcpu_sve_pffr(vcpu) (kern_hyp_va((vcpu)->arch.sve_state) + \ - sve_ffr_offset((vcpu)->arch.sve_max_vl)) - #define vcpu_sve_max_vq(vcpu) sve_vq_from_vl((vcpu)->arch.sve_max_vl) #define vcpu_sve_zcr_elx(vcpu) \ diff --git a/arch/arm64/include/asm/kvm_hyp.h b/arch/arm64/include/asm/kvm_hyp.h index 8d06b62e7188..ad19de1d0654 100644 --- a/arch/arm64/include/asm/kvm_hyp.h +++ b/arch/arm64/include/asm/kvm_hyp.h @@ -121,11 +121,6 @@ void __debug_save_host_buffers_nvhe(struct kvm_vcpu *vcpu); void __debug_restore_host_buffers_nvhe(struct kvm_vcpu *vcpu); #endif -void __fpsimd_save_state(struct user_fpsimd_state *fp_regs); -void __fpsimd_restore_state(struct user_fpsimd_state *fp_regs); -void __sve_save_state(void *sve_pffr, u32 *fpsr, int save_ffr); -void __sve_restore_state(void *sve_pffr, u32 *fpsr, int restore_ffr); - u64 __guest_enter(struct kvm_vcpu *vcpu); bool kvm_host_psci_handler(struct kvm_cpu_context *host_ctxt, u32 func_id); diff --git a/arch/arm64/include/asm/kvm_pkvm.h b/arch/arm64/include/asm/kvm_pkvm.h index 2954b311128c..74fedd9c5ff0 100644 --- a/arch/arm64/include/asm/kvm_pkvm.h +++ b/arch/arm64/include/asm/kvm_pkvm.h @@ -188,8 +188,7 @@ static inline size_t pkvm_host_sve_state_size(void) if (!system_supports_sve()) return 0; - return size_add(sizeof(struct cpu_sve_state), - SVE_SIG_REGS_SIZE(sve_vq_from_vl(kvm_host_sve_max_vl))); + return SVE_SIG_REGS_SIZE(sve_vq_from_vl(kvm_host_sve_max_vl)); } struct pkvm_mapping { diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h index e30c4c8e3a7a..c2a627f39314 100644 --- a/arch/arm64/include/asm/processor.h +++ b/arch/arm64/include/asm/processor.h @@ -130,6 +130,9 @@ enum fp_type { FP_STATE_SVE, }; +struct arm64_sve_state; /* Opaque type */ +struct arm64_sme_state; /* Opaque type */ + struct cpu_context { unsigned long x19; unsigned long x20; @@ -164,8 +167,8 @@ struct thread_struct { enum fp_type fp_type; /* registers FPSIMD or SVE? */ unsigned int fpsimd_cpu; - void *sve_state; /* SVE registers, if any */ - void *sme_state; /* ZA and ZT state, if any */ + struct arm64_sve_state *sve_state; /* SVE registers, if any */ + struct arm64_sme_state *sme_state; /* ZA and ZT state, if any */ unsigned int vl[ARM64_VEC_MAX]; /* vector length */ unsigned int vl_onexec[ARM64_VEC_MAX]; /* vl after next exec */ unsigned long fault_address; /* fault info */ diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile index 74b76bb70452..d2690c3ec528 100644 --- a/arch/arm64/kernel/Makefile +++ b/arch/arm64/kernel/Makefile @@ -27,7 +27,7 @@ KCOV_INSTRUMENT_idle.o := n # Object file lists. obj-y := debug-monitors.o entry.o irq.o fpsimd.o \ - entry-common.o entry-fpsimd.o process.o ptrace.o \ + entry-common.o process.o ptrace.o \ setup.o signal.o sys.o stacktrace.o time.o traps.o \ io.o vdso.o hyp-stub.o psci.o cpu_ops.o \ return_address.o cpuinfo.o cpu_errata.o \ diff --git a/arch/arm64/kernel/entry-common.c b/arch/arm64/kernel/entry-common.c index cb54335465f6..2352297330e1 100644 --- a/arch/arm64/kernel/entry-common.c +++ b/arch/arm64/kernel/entry-common.c @@ -237,12 +237,8 @@ static inline void fpsimd_syscall_enter(void) if (!system_supports_sve()) return; - if (test_thread_flag(TIF_SVE)) { - unsigned int sve_vq_minus_one; - - sve_vq_minus_one = sve_vq_from_vl(task_get_sve_vl(current)) - 1; - sve_flush_live(true, sve_vq_minus_one); - } + if (test_thread_flag(TIF_SVE)) + sve_flush_live(); /* * Any live non-FPSIMD SVE state has been zeroed. Allow diff --git a/arch/arm64/kernel/entry-fpsimd.S b/arch/arm64/kernel/entry-fpsimd.S deleted file mode 100644 index 6325db1a2179..000000000000 --- a/arch/arm64/kernel/entry-fpsimd.S +++ /dev/null @@ -1,134 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * FP/SIMD state saving and restoring - * - * Copyright (C) 2012 ARM Ltd. - * Author: Catalin Marinas <catalin.marinas@arm.com> - */ - -#include <linux/linkage.h> - -#include <asm/assembler.h> -#include <asm/fpsimdmacros.h> - -/* - * Save the FP registers. - * - * x0 - pointer to struct fpsimd_state - */ -SYM_FUNC_START(fpsimd_save_state) - fpsimd_save x0, 8 - ret -SYM_FUNC_END(fpsimd_save_state) - -/* - * Load the FP registers. - * - * x0 - pointer to struct fpsimd_state - */ -SYM_FUNC_START(fpsimd_load_state) - fpsimd_restore x0, 8 - ret -SYM_FUNC_END(fpsimd_load_state) - -#ifdef CONFIG_ARM64_SVE - -/* - * Save the SVE state - * - * x0 - pointer to buffer for state - * x1 - pointer to storage for FPSR - * x2 - Save FFR if non-zero - */ -SYM_FUNC_START(sve_save_state) - sve_save 0, x1, x2, 3 - ret -SYM_FUNC_END(sve_save_state) - -/* - * Load the SVE state - * - * x0 - pointer to buffer for state - * x1 - pointer to storage for FPSR - * x2 - Restore FFR if non-zero - */ -SYM_FUNC_START(sve_load_state) - sve_load 0, x1, x2, 4 - ret -SYM_FUNC_END(sve_load_state) - -SYM_FUNC_START(sve_get_vl) - _sve_rdvl 0, 1 - ret -SYM_FUNC_END(sve_get_vl) - -SYM_FUNC_START(sve_set_vq) - sve_load_vq x0, x1, x2 - ret -SYM_FUNC_END(sve_set_vq) - -/* - * Zero all SVE registers but the first 128-bits of each vector - * - * VQ must already be configured by caller, any further updates of VQ - * will need to ensure that the register state remains valid. - * - * x0 = include FFR? - * x1 = VQ - 1 - */ -SYM_FUNC_START(sve_flush_live) - cbz x1, 1f // A VQ-1 of 0 is 128 bits so no extra Z state - sve_flush_z -1: sve_flush_p - tbz x0, #0, 2f - sve_flush_ffr -2: ret -SYM_FUNC_END(sve_flush_live) - -#endif /* CONFIG_ARM64_SVE */ - -#ifdef CONFIG_ARM64_SME - -SYM_FUNC_START(sme_get_vl) - _sme_rdsvl 0, 1 - ret -SYM_FUNC_END(sme_get_vl) - -SYM_FUNC_START(sme_set_vq) - sme_load_vq x0, x1, x2 - ret -SYM_FUNC_END(sme_set_vq) - -/* - * Save the ZA and ZT state - * - * x0 - pointer to buffer for state - * x1 - number of ZT registers to save - */ -SYM_FUNC_START(sme_save_state) - _sme_rdsvl 2, 1 // x2 = VL/8 - sme_save_za 0, x2, 12 // Leaves x0 pointing to the end of ZA - - cbz x1, 1f - _str_zt 0 -1: - ret -SYM_FUNC_END(sme_save_state) - -/* - * Load the ZA and ZT state - * - * x0 - pointer to buffer for state - * x1 - number of ZT registers to save - */ -SYM_FUNC_START(sme_load_state) - _sme_rdsvl 2, 1 // x2 = VL/8 - sme_load_za 0, x2, 12 // Leaves x0 pointing to the end of ZA - - cbz x1, 1f - _ldr_zt 0 -1: - ret -SYM_FUNC_END(sme_load_state) - -#endif /* CONFIG_ARM64_SME */ diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index 60a45d600b46..25dc5afe9ba0 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -377,8 +377,10 @@ static void task_fpsimd_load(void) if (!thread_sm_enabled(¤t->thread)) WARN_ON_ONCE(!test_and_set_thread_flag(TIF_SVE)); - if (test_thread_flag(TIF_SVE)) - sve_set_vq(sve_vq_from_vl(task_get_sve_vl(current)) - 1); + if (test_thread_flag(TIF_SVE)) { + unsigned long vq = sve_vq_from_vl(task_get_sve_vl(current)); + sysreg_clear_set_s(SYS_ZCR_EL1, ZCR_ELx_LEN, vq - 1); + } restore_sve_regs = true; restore_ffr = true; @@ -403,8 +405,10 @@ static void task_fpsimd_load(void) unsigned long sme_vl = task_get_sme_vl(current); /* Ensure VL is set up for restoring data */ - if (test_thread_flag(TIF_SME)) - sme_set_vq(sve_vq_from_vl(sme_vl) - 1); + if (test_thread_flag(TIF_SME)) { + unsigned long vq = sve_vq_from_vl(sme_vl); + sysreg_clear_set_s(SYS_SMCR_EL1, SMCR_ELx_LEN, vq - 1); + } write_sysreg_s(current->thread.svcr, SYS_SVCR); @@ -421,9 +425,8 @@ static void task_fpsimd_load(void) if (restore_sve_regs) { WARN_ON_ONCE(current->thread.fp_type != FP_STATE_SVE); - sve_load_state(sve_pffr(¤t->thread), - ¤t->thread.uw.fpsimd_state.fpsr, - restore_ffr); + sve_load_state(current->thread.sve_state, restore_ffr); + fpsimd_load_common(¤t->thread.uw.fpsimd_state); } else { WARN_ON_ONCE(current->thread.fp_type != FP_STATE_FPSIMD); fpsimd_load_state(¤t->thread.uw.fpsimd_state); @@ -503,9 +506,8 @@ static void fpsimd_save_user_state(void) return; } - sve_save_state((char *)last->sve_state + - sve_ffr_offset(vl), - &last->st->fpsr, save_ffr); + sve_save_state(last->sve_state, save_ffr); + fpsimd_save_common(last->st); *last->fp_type = FP_STATE_SVE; } else { fpsimd_save_state(last->st); @@ -636,7 +638,8 @@ static __uint128_t arm64_cpu_to_le128(__uint128_t x) #define arm64_le128_to_cpu(x) arm64_cpu_to_le128(x) -static void __fpsimd_to_sve(void *sst, struct user_fpsimd_state const *fst, +static void __fpsimd_to_sve(struct arm64_sve_state *sst, + struct user_fpsimd_state const *fst, unsigned int vq) { unsigned int i; @@ -663,7 +666,7 @@ static void __fpsimd_to_sve(void *sst, struct user_fpsimd_state const *fst, static inline void fpsimd_to_sve(struct task_struct *task) { unsigned int vq; - void *sst = task->thread.sve_state; + struct arm64_sve_state *sst = task->thread.sve_state; struct user_fpsimd_state const *fst = &task->thread.uw.fpsimd_state; if (!system_supports_sve() && !system_supports_sme()) @@ -687,7 +690,7 @@ static inline void fpsimd_to_sve(struct task_struct *task) static inline void sve_to_fpsimd(struct task_struct *task) { unsigned int vq, vl; - void const *sst = task->thread.sve_state; + const struct arm64_sve_state *sst = task->thread.sve_state; struct user_fpsimd_state *fst = &task->thread.uw.fpsimd_state; unsigned int i; __uint128_t const *p; @@ -786,7 +789,7 @@ void fpsimd_sync_from_effective_state(struct task_struct *task) void fpsimd_sync_to_effective_state_zeropad(struct task_struct *task) { unsigned int vq; - void *sst = task->thread.sve_state; + struct arm64_sve_state *sst = task->thread.sve_state; struct user_fpsimd_state const *fst = &task->thread.uw.fpsimd_state; if (task->thread.fp_type != FP_STATE_SVE) @@ -804,7 +807,8 @@ static int change_live_vector_length(struct task_struct *task, { unsigned int sve_vl = task_get_sve_vl(task); unsigned int sme_vl = task_get_sme_vl(task); - void *sve_state = NULL, *sme_state = NULL; + struct arm64_sve_state *sve_state = NULL; + struct arm64_sme_state *sme_state = NULL; if (type == ARM64_VEC_SME) sme_vl = vl; @@ -1293,31 +1297,6 @@ void sme_suspend_exit(void) #endif /* CONFIG_ARM64_SME */ -static void sve_init_regs(void) -{ - /* - * Convert the FPSIMD state to SVE, zeroing all the state that - * is not shared with FPSIMD. If (as is likely) the current - * state is live in the registers then do this there and - * update our metadata for the current task including - * disabling the trap, otherwise update our in-memory copy. - * We are guaranteed to not be in streaming mode, we can only - * take a SVE trap when not in streaming mode and we can't be - * in streaming mode when taking a SME trap. - */ - if (!test_thread_flag(TIF_FOREIGN_FPSTATE)) { - unsigned long vq_minus_one = - sve_vq_from_vl(task_get_sve_vl(current)) - 1; - sve_set_vq(vq_minus_one); - sve_flush_live(true, vq_minus_one); - fpsimd_bind_task_to_cpu(); - } else { - fpsimd_to_sve(current); - current->thread.fp_type = FP_STATE_SVE; - fpsimd_flush_task_state(current); - } -} - /* * Trapped SVE access * @@ -1349,13 +1328,23 @@ void do_sve_acc(unsigned long esr, struct pt_regs *regs) WARN_ON(1); /* SVE access shouldn't have trapped */ /* - * Even if the task can have used streaming mode we can only - * generate SVE access traps in normal SVE mode and - * transitioning out of streaming mode may discard any - * streaming mode state. Always clear the high bits to avoid - * any potential errors tracking what is properly initialised. + * Convert the FPSIMD state to SVE. Stale SVE state can be present in + * registers or memory, so we must zero all state that is not shared + * with FPSIMD. + * + * SVE traps cannot be taken from streaming mode, so there cannot be + * any effective streaming mode SVE state. */ - sve_init_regs(); + if (!test_thread_flag(TIF_FOREIGN_FPSTATE)) { + unsigned long vq = sve_vq_from_vl(task_get_sve_vl(current)); + sysreg_clear_set_s(SYS_ZCR_EL1, ZCR_ELx_LEN, vq - 1); + sve_flush_live(); + fpsimd_bind_task_to_cpu(); + } else { + fpsimd_to_sve(current); + current->thread.fp_type = FP_STATE_SVE; + fpsimd_flush_task_state(current); + } put_cpu_fpsimd_context(); } @@ -1479,9 +1468,8 @@ void do_sme_acc(unsigned long esr, struct pt_regs *regs) WARN_ON(1); if (!test_thread_flag(TIF_FOREIGN_FPSTATE)) { - unsigned long vq_minus_one = - sve_vq_from_vl(task_get_sme_vl(current)) - 1; - sme_set_vq(vq_minus_one); + unsigned long vq = sve_vq_from_vl(task_get_sme_vl(current)); + sysreg_clear_set_s(SYS_SMCR_EL1, SMCR_ELx_LEN, vq - 1); fpsimd_bind_task_to_cpu(); } else { @@ -1656,8 +1644,8 @@ static void fpsimd_flush_thread_vl(enum vec_type type) void fpsimd_flush_thread(void) { - void *sve_state = NULL; - void *sme_state = NULL; + struct arm64_sve_state *sve_state = NULL; + struct arm64_sme_state *sme_state = NULL; if (!system_supports_fpsimd()) return; diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 176cbe8baad3..d84e80b2ce54 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -2495,10 +2495,10 @@ static void __init teardown_hyp_mode(void) continue; if (free_sve) { - struct cpu_sve_state *sve_state; + struct arm64_sve_state *sve_regs; - sve_state = per_cpu_ptr_nvhe_sym(kvm_host_data, cpu)->sve_state; - free_pages((unsigned long) sve_state, pkvm_host_sve_state_order()); + sve_regs = per_cpu_ptr_nvhe_sym(kvm_host_data, cpu)->sve_regs; + free_pages((unsigned long) sve_regs, pkvm_host_sve_state_order()); } free_pages(kvm_nvhe_sym(kvm_arm_hyp_percpu_base)[cpu], nvhe_percpu_order()); @@ -2623,7 +2623,7 @@ static int init_pkvm_host_sve_state(void) if (!page) return -ENOMEM; - per_cpu_ptr_nvhe_sym(kvm_host_data, cpu)->sve_state = page_address(page); + per_cpu_ptr_nvhe_sym(kvm_host_data, cpu)->sve_regs = page_address(page); } /* @@ -2644,11 +2644,11 @@ static void finalize_init_hyp_mode(void) if (system_supports_sve() && is_protected_kvm_enabled()) { for_each_possible_cpu(cpu) { - struct cpu_sve_state *sve_state; + struct arm64_sve_state *sve_regs; - sve_state = per_cpu_ptr_nvhe_sym(kvm_host_data, cpu)->sve_state; - per_cpu_ptr_nvhe_sym(kvm_host_data, cpu)->sve_state = - kern_hyp_va(sve_state); + sve_regs = per_cpu_ptr_nvhe_sym(kvm_host_data, cpu)->sve_regs; + per_cpu_ptr_nvhe_sym(kvm_host_data, cpu)->sve_regs = + kern_hyp_va(sve_regs); } } } diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c index 332c453b87cf..b01d6622b872 100644 --- a/arch/arm64/kvm/guest.c +++ b/arch/arm64/kvm/guest.c @@ -500,7 +500,7 @@ static int get_sve_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) if (!kvm_arm_vcpu_sve_finalized(vcpu)) return -EPERM; - if (copy_to_user(uptr, vcpu->arch.sve_state + region.koffset, + if (copy_to_user(uptr, (void *)vcpu->arch.sve_state + region.koffset, region.klen) || clear_user(uptr + region.klen, region.upad)) return -EFAULT; @@ -526,7 +526,7 @@ static int set_sve_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) if (!kvm_arm_vcpu_sve_finalized(vcpu)) return -EPERM; - if (copy_from_user(vcpu->arch.sve_state + region.koffset, uptr, + if (copy_from_user((void *)vcpu->arch.sve_state + region.koffset, uptr, region.klen)) return -EFAULT; diff --git a/arch/arm64/kvm/hyp/entry.S b/arch/arm64/kvm/hyp/entry.S index 11a10d8f5beb..308100ed25de 100644 --- a/arch/arm64/kvm/hyp/entry.S +++ b/arch/arm64/kvm/hyp/entry.S @@ -8,7 +8,6 @@ #include <asm/alternative.h> #include <asm/assembler.h> -#include <asm/fpsimdmacros.h> #include <asm/kvm.h> #include <asm/kvm_arm.h> #include <asm/kvm_asm.h> diff --git a/arch/arm64/kvm/hyp/fpsimd.S b/arch/arm64/kvm/hyp/fpsimd.S deleted file mode 100644 index e950875e31ce..000000000000 --- a/arch/arm64/kvm/hyp/fpsimd.S +++ /dev/null @@ -1,33 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Copyright (C) 2015 - ARM Ltd - * Author: Marc Zyngier <marc.zyngier@arm.com> - */ - -#include <linux/linkage.h> - -#include <asm/fpsimdmacros.h> - - .text - -SYM_FUNC_START(__fpsimd_save_state) - fpsimd_save x0, 1 - ret -SYM_FUNC_END(__fpsimd_save_state) - -SYM_FUNC_START(__fpsimd_restore_state) - fpsimd_restore x0, 1 - ret -SYM_FUNC_END(__fpsimd_restore_state) - -SYM_FUNC_START(__sve_restore_state) - mov x2, #1 - sve_load 0, x1, x2, 3 - ret -SYM_FUNC_END(__sve_restore_state) - -SYM_FUNC_START(__sve_save_state) - mov x2, #1 - sve_save 0, x1, x2, 3 - ret -SYM_FUNC_END(__sve_save_state) diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h index 98b2976837b1..1f12c4ba295a 100644 --- a/arch/arm64/kvm/hyp/include/hyp/switch.h +++ b/arch/arm64/kvm/hyp/include/hyp/switch.h @@ -467,9 +467,8 @@ static inline void __hyp_sve_restore_guest(struct kvm_vcpu *vcpu) * vCPU. Start off with the max VL so we can load the SVE state. */ sve_cond_update_zcr_vq(vcpu_sve_max_vq(vcpu) - 1, SYS_ZCR_EL2); - __sve_restore_state(vcpu_sve_pffr(vcpu), - &vcpu->arch.ctxt.fp_regs.fpsr, - true); + sve_load_state(kern_hyp_va(vcpu->arch.sve_state), true); + fpsimd_load_common(&vcpu->arch.ctxt.fp_regs); /* * The effective VL for a VM could differ from the max VL when running a @@ -484,13 +483,13 @@ static inline void __hyp_sve_restore_guest(struct kvm_vcpu *vcpu) static inline void __hyp_sve_save_host(void) { - struct cpu_sve_state *sve_state = *host_data_ptr(sve_state); + struct kvm_cpu_context *hctxt = host_data_ptr(host_ctxt); + struct arm64_sve_state *sve_regs = *host_data_ptr(sve_regs); - sve_state->zcr_el1 = read_sysreg_el1(SYS_ZCR); + ctxt_sys_reg(hctxt, ZCR_EL1) = read_sysreg_el1(SYS_ZCR); write_sysreg_s(sve_vq_from_vl(kvm_host_sve_max_vl) - 1, SYS_ZCR_EL2); - __sve_save_state(sve_state->sve_regs + sve_ffr_offset(kvm_host_sve_max_vl), - &sve_state->fpsr, - true); + sve_save_state(sve_regs, true); + fpsimd_save_common(&hctxt->fp_regs); } static inline void fpsimd_lazy_switch_to_guest(struct kvm_vcpu *vcpu) @@ -554,6 +553,8 @@ static inline void fpsimd_lazy_switch_to_host(struct kvm_vcpu *vcpu) static void kvm_hyp_save_fpsimd_host(struct kvm_vcpu *vcpu) { + struct kvm_cpu_context *hctxt = host_data_ptr(host_ctxt); + /* * Non-protected kvm relies on the host restoring its sve state. * Protected kvm restores the host's sve state as not to reveal that @@ -562,11 +563,11 @@ static void kvm_hyp_save_fpsimd_host(struct kvm_vcpu *vcpu) if (system_supports_sve()) { __hyp_sve_save_host(); } else { - __fpsimd_save_state(host_data_ptr(host_ctxt.fp_regs)); + fpsimd_save_state(&hctxt->fp_regs); } if (kvm_has_fpmr(kern_hyp_va(vcpu->kvm))) - *host_data_ptr(fpmr) = read_sysreg_s(SYS_FPMR); + ctxt_sys_reg(hctxt, FPMR) = read_sysreg_s(SYS_FPMR); } @@ -622,7 +623,7 @@ static inline bool kvm_hyp_handle_fpsimd(struct kvm_vcpu *vcpu, u64 *exit_code) if (sve_guest) __hyp_sve_restore_guest(vcpu); else - __fpsimd_restore_state(&vcpu->arch.ctxt.fp_regs); + fpsimd_load_state(&vcpu->arch.ctxt.fp_regs); if (kvm_has_fpmr(kern_hyp_va(vcpu->kvm))) write_sysreg_s(__vcpu_sys_reg(vcpu, FPMR), SYS_FPMR); diff --git a/arch/arm64/kvm/hyp/nvhe/Makefile b/arch/arm64/kvm/hyp/nvhe/Makefile index 62cdfbff7562..f57450ebcb49 100644 --- a/arch/arm64/kvm/hyp/nvhe/Makefile +++ b/arch/arm64/kvm/hyp/nvhe/Makefile @@ -26,7 +26,7 @@ hyp-obj-y := timer-sr.o sysreg-sr.o debug-sr.o switch.o tlb.o hyp-init.o host.o hyp-main.o hyp-smp.o psci-relay.o early_alloc.o page_alloc.o \ cache.o setup.o mm.o mem_protect.o sys_regs.o pkvm.o stacktrace.o ffa.o hyp-obj-y += ../vgic-v3-sr.o ../aarch32.o ../vgic-v2-cpuif-proxy.o ../entry.o \ - ../fpsimd.o ../hyp-entry.o ../exception.o ../pgtable.o ../vgic-v5-sr.o + ../hyp-entry.o ../exception.o ../pgtable.o ../vgic-v5-sr.o hyp-obj-y += ../../../kernel/smccc-call.o hyp-obj-$(CONFIG_LIST_HARDENED) += list_debug.o hyp-obj-$(CONFIG_NVHE_EL2_TRACING) += clock.o trace.o events.o diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-main.c b/arch/arm64/kvm/hyp/nvhe/hyp-main.c index 73f2e0221e70..676f756e084d 100644 --- a/arch/arm64/kvm/hyp/nvhe/hyp-main.c +++ b/arch/arm64/kvm/hyp/nvhe/hyp-main.c @@ -35,13 +35,15 @@ static void __hyp_sve_save_guest(struct kvm_vcpu *vcpu) * on the VL, so use a consistent (i.e., the maximum) guest VL. */ sve_cond_update_zcr_vq(vcpu_sve_max_vq(vcpu) - 1, SYS_ZCR_EL2); - __sve_save_state(vcpu_sve_pffr(vcpu), &vcpu->arch.ctxt.fp_regs.fpsr, true); + sve_save_state(kern_hyp_va(vcpu->arch.sve_state), true); + fpsimd_save_common(&vcpu->arch.ctxt.fp_regs); write_sysreg_s(sve_vq_from_vl(kvm_host_sve_max_vl) - 1, SYS_ZCR_EL2); } static void __hyp_sve_restore_host(void) { - struct cpu_sve_state *sve_state = *host_data_ptr(sve_state); + struct kvm_cpu_context *hctxt = host_data_ptr(host_ctxt); + struct arm64_sve_state *sve_regs = *host_data_ptr(sve_regs); /* * On saving/restoring host sve state, always use the maximum VL for @@ -53,10 +55,9 @@ static void __hyp_sve_restore_host(void) * need to be revisited. */ write_sysreg_s(sve_vq_from_vl(kvm_host_sve_max_vl) - 1, SYS_ZCR_EL2); - __sve_restore_state(sve_state->sve_regs + sve_ffr_offset(kvm_host_sve_max_vl), - &sve_state->fpsr, - true); - write_sysreg_el1(sve_state->zcr_el1, SYS_ZCR); + sve_load_state(sve_regs, true); + fpsimd_load_common(&hctxt->fp_regs); + write_sysreg_el1(ctxt_sys_reg(hctxt, ZCR_EL1), SYS_ZCR); } static void fpsimd_sve_flush(void) @@ -66,6 +67,7 @@ static void fpsimd_sve_flush(void) static void fpsimd_sve_sync(struct kvm_vcpu *vcpu) { + struct kvm_cpu_context *hctxt = host_data_ptr(host_ctxt); bool has_fpmr; if (!guest_owns_fp_regs()) @@ -80,7 +82,7 @@ static void fpsimd_sve_sync(struct kvm_vcpu *vcpu) if (vcpu_has_sve(vcpu)) __hyp_sve_save_guest(vcpu); else - __fpsimd_save_state(&vcpu->arch.ctxt.fp_regs); + fpsimd_save_state(&vcpu->arch.ctxt.fp_regs); has_fpmr = kvm_has_fpmr(kern_hyp_va(vcpu->kvm)); if (has_fpmr) @@ -89,10 +91,10 @@ static void fpsimd_sve_sync(struct kvm_vcpu *vcpu) if (system_supports_sve()) __hyp_sve_restore_host(); else - __fpsimd_restore_state(host_data_ptr(host_ctxt.fp_regs)); + fpsimd_load_state(&hctxt->fp_regs); if (has_fpmr) - write_sysreg_s(*host_data_ptr(fpmr), SYS_FPMR); + write_sysreg_s(ctxt_sys_reg(hctxt, FPMR), SYS_FPMR); *host_data_ptr(fp_owner) = FP_STATE_HOST_OWNED; } diff --git a/arch/arm64/kvm/hyp/nvhe/setup.c b/arch/arm64/kvm/hyp/nvhe/setup.c index d8e5b563fd3d..e704199048c4 100644 --- a/arch/arm64/kvm/hyp/nvhe/setup.c +++ b/arch/arm64/kvm/hyp/nvhe/setup.c @@ -82,9 +82,9 @@ static int pkvm_create_host_sve_mappings(void) for (i = 0; i < hyp_nr_cpus; i++) { struct kvm_host_data *host_data = per_cpu_ptr(&kvm_host_data, i); - struct cpu_sve_state *sve_state = host_data->sve_state; + struct arm64_sve_state *sve_regs = host_data->sve_regs; - start = kern_hyp_va(sve_state); + start = kern_hyp_va(sve_regs); end = start + PAGE_ALIGN(pkvm_host_sve_state_size()); ret = pkvm_create_mappings(start, end, PAGE_HYP); if (ret) diff --git a/arch/arm64/kvm/hyp/vhe/Makefile b/arch/arm64/kvm/hyp/vhe/Makefile index 9695328bbd96..d6b3475145c0 100644 --- a/arch/arm64/kvm/hyp/vhe/Makefile +++ b/arch/arm64/kvm/hyp/vhe/Makefile @@ -10,4 +10,4 @@ CFLAGS_switch.o += -Wno-override-init obj-y := timer-sr.o sysreg-sr.o debug-sr.o switch.o tlb.o obj-y += ../vgic-v3-sr.o ../aarch32.o ../vgic-v2-cpuif-proxy.o ../entry.o \ - ../fpsimd.o ../hyp-entry.o ../exception.o ../vgic-v5-sr.o + ../hyp-entry.o ../exception.o ../vgic-v5-sr.o diff --git a/arch/arm64/tools/sysreg b/arch/arm64/tools/sysreg index 6c3ff14e561e..8b219d656660 100644 --- a/arch/arm64/tools/sysreg +++ b/arch/arm64/tools/sysreg @@ -3790,6 +3790,51 @@ Field 1 ZA Field 0 SM EndSysreg +Sysreg FPCR 3 3 4 4 0 +Res0 63:27 +Field 26 AHP +Field 25 DN +Field 24 FZ +Enum 23:22 RMode + 0b00 RN + 0b01 RP + 0b10 RM + 0b11 RZ +EndEnum +Field 21:20 Stride +Field 19 FZ16 +Field 18:16 Len +Field 15 IDE +Res0 14 +Field 13 EBF +Field 12 IXE +Field 11 UFE +Field 10 OFE +Field 9 DZE +Field 8 IOE +Res0 7:3 +Field 2 NEP +Field 1 AH +Field 0 FIZ +EndSysreg + +Sysreg FPSR 3 3 4 4 1 +Res0 63:32 +Field 31 N +Field 30 Z +Field 29 C +Field 28 V +Field 27 QC +Res0 26:8 +Field 7 IDC +Res0 6:5 +Field 4 IXC +Field 3 UFC +Field 2 OFC +Field 1 DZC +Field 0 IOC +EndSysreg + Sysreg FPMR 3 3 4 4 2 Res0 63:38 Field 37:32 LSCALE2 |
