diff options
author | Will Deacon <will@kernel.org> | 2022-12-06 14:27:28 +0300 |
---|---|---|
committer | Will Deacon <will@kernel.org> | 2022-12-06 14:27:28 +0300 |
commit | 75bc81d08fda1b90dcc09dd068d144398ddd9a2d (patch) | |
tree | 57acc3748f81e7f97ccd2310d3fbc4c0d98b97b3 /arch/arm64 | |
parent | 595a121e8901c4f1c0a876cbd6322a0f418791e1 (diff) | |
parent | 1192b93ba3528feaf37dc4b5d8d6bdbb475791ad (diff) | |
download | linux-75bc81d08fda1b90dcc09dd068d144398ddd9a2d.tar.xz |
Merge branch 'for-next/sve-state' into for-next/core
* for-next/sve-state:
arm64/fp: Use a struct to pass data to fpsimd_bind_state_to_cpu()
arm64/sve: Leave SVE enabled on syscall if we don't context switch
arm64/fpsimd: SME no longer requires SVE register state
arm64/fpsimd: Load FP state based on recorded data type
arm64/fpsimd: Stop using TIF_SVE to manage register saving in KVM
arm64/fpsimd: Have KVM explicitly say which FP registers to save
arm64/fpsimd: Track the saved FPSIMD state type separately to TIF_SVE
KVM: arm64: Discard any SVE state when entering KVM guests
Diffstat (limited to 'arch/arm64')
-rw-r--r-- | arch/arm64/include/asm/fpsimd.h | 17 | ||||
-rw-r--r-- | arch/arm64/include/asm/kvm_host.h | 12 | ||||
-rw-r--r-- | arch/arm64/include/asm/processor.h | 7 | ||||
-rw-r--r-- | arch/arm64/kernel/fpsimd.c | 165 | ||||
-rw-r--r-- | arch/arm64/kernel/process.c | 2 | ||||
-rw-r--r-- | arch/arm64/kernel/ptrace.c | 5 | ||||
-rw-r--r-- | arch/arm64/kernel/signal.c | 7 | ||||
-rw-r--r-- | arch/arm64/kernel/syscall.c | 19 | ||||
-rw-r--r-- | arch/arm64/kvm/fpsimd.c | 26 |
9 files changed, 180 insertions, 80 deletions
diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h index 6f86b7ab6c28..e6fa1e2982c8 100644 --- a/arch/arm64/include/asm/fpsimd.h +++ b/arch/arm64/include/asm/fpsimd.h @@ -56,11 +56,20 @@ extern void fpsimd_signal_preserve_current_state(void); extern void fpsimd_preserve_current_state(void); extern void fpsimd_restore_current_state(void); extern void fpsimd_update_current_state(struct user_fpsimd_state const *state); +extern void fpsimd_kvm_prepare(void); + +struct cpu_fp_state { + struct user_fpsimd_state *st; + void *sve_state; + void *za_state; + u64 *svcr; + unsigned int sve_vl; + unsigned int sme_vl; + enum fp_type *fp_type; + enum fp_type to_save; +}; -extern void fpsimd_bind_state_to_cpu(struct user_fpsimd_state *state, - void *sve_state, unsigned int sve_vl, - void *za_state, unsigned int sme_vl, - u64 *svcr); +extern void fpsimd_bind_state_to_cpu(struct cpu_fp_state *fp_state); extern void fpsimd_flush_task_state(struct task_struct *target); extern void fpsimd_save_and_flush_cpu_state(void); diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 45e2136322ba..fd34ab155d0b 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -306,8 +306,18 @@ struct vcpu_reset_state { struct kvm_vcpu_arch { struct kvm_cpu_context ctxt; - /* Guest floating point state */ + /* + * Guest floating point state + * + * The architecture has two main floating point extensions, + * the original FPSIMD and SVE. These have overlapping + * register views, with the FPSIMD V registers occupying the + * low 128 bits of the SVE Z registers. When the core + * floating point code saves the register state of a task it + * records which view it saved in fp_type. + */ void *sve_state; + enum fp_type fp_type; unsigned int sve_max_vl; u64 svcr; diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h index b15fd6c11294..0dcb1f7f485d 100644 --- a/arch/arm64/include/asm/processor.h +++ b/arch/arm64/include/asm/processor.h @@ -122,6 +122,12 @@ enum vec_type { ARM64_VEC_MAX, }; +enum fp_type { + FP_STATE_CURRENT, /* Save based on current task state. */ + FP_STATE_FPSIMD, + FP_STATE_SVE, +}; + struct cpu_context { unsigned long x19; unsigned long x20; @@ -152,6 +158,7 @@ struct thread_struct { struct user_fpsimd_state fpsimd_state; } uw; + enum fp_type fp_type; /* registers FPSIMD or SVE? */ unsigned int fpsimd_cpu; void *sve_state; /* SVE registers, if any */ void *za_state; /* ZA register, if any */ diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index 1fe7d65096ac..dcc81e7200d4 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -118,16 +118,8 @@ * returned from the 2nd syscall yet, TIF_FOREIGN_FPSTATE is still set so * whatever is in the FPSIMD registers is not saved to memory, but discarded. */ -struct fpsimd_last_state_struct { - struct user_fpsimd_state *st; - void *sve_state; - void *za_state; - u64 *svcr; - unsigned int sve_vl; - unsigned int sme_vl; -}; -static DEFINE_PER_CPU(struct fpsimd_last_state_struct, fpsimd_last_state); +static DEFINE_PER_CPU(struct cpu_fp_state, fpsimd_last_state); __ro_after_init struct vl_info vl_info[ARM64_VEC_MAX] = { #ifdef CONFIG_ARM64_SVE @@ -330,15 +322,6 @@ void task_set_vl_onexec(struct task_struct *task, enum vec_type type, * The task can execute SVE instructions while in userspace without * trapping to the kernel. * - * When stored, Z0-Z31 (incorporating Vn in bits[127:0] or the - * corresponding Zn), P0-P15 and FFR are encoded in - * task->thread.sve_state, formatted appropriately for vector - * length task->thread.sve_vl or, if SVCR.SM is set, - * task->thread.sme_vl. - * - * task->thread.sve_state must point to a valid buffer at least - * sve_state_size(task) bytes in size. - * * During any syscall, the kernel may optionally clear TIF_SVE and * discard the vector state except for the FPSIMD subset. * @@ -348,7 +331,15 @@ void task_set_vl_onexec(struct task_struct *task, enum vec_type type, * do_sve_acc() to be called, which does some preparation and then * sets TIF_SVE. * - * When stored, FPSIMD registers V0-V31 are encoded in + * During any syscall, the kernel may optionally clear TIF_SVE and + * discard the vector state except for the FPSIMD subset. + * + * The data will be stored in one of two formats: + * + * * FPSIMD only - FP_STATE_FPSIMD: + * + * When the FPSIMD only state stored task->thread.fp_type is set to + * FP_STATE_FPSIMD, the FPSIMD registers V0-V31 are encoded in * task->thread.uw.fpsimd_state; bits [max : 128] for each of Z0-Z31 are * logically zero but not stored anywhere; P0-P15 and FFR are not * stored and have unspecified values from userspace's point of @@ -356,7 +347,23 @@ void task_set_vl_onexec(struct task_struct *task, enum vec_type type, * but userspace is discouraged from relying on this. * * task->thread.sve_state does not need to be non-NULL, valid or any - * particular size: it must not be dereferenced. + * particular size: it must not be dereferenced and any data stored + * there should be considered stale and not referenced. + * + * * SVE state - FP_STATE_SVE: + * + * When the full SVE state is stored task->thread.fp_type is set to + * FP_STATE_SVE and Z0-Z31 (incorporating Vn in bits[127:0] or the + * corresponding Zn), P0-P15 and FFR are encoded in in + * task->thread.sve_state, formatted appropriately for vector + * length task->thread.sve_vl or, if SVCR.SM is set, + * task->thread.sme_vl. The storage for the vector registers in + * task->thread.uw.fpsimd_state should be ignored. + * + * task->thread.sve_state must point to a valid buffer at least + * sve_state_size(task) bytes in size. The data stored in + * task->thread.uw.fpsimd_state.vregs should be considered stale + * and not referenced. * * * FPSR and FPCR are always stored in task->thread.uw.fpsimd_state * irrespective of whether TIF_SVE is clear or set, since these are @@ -378,11 +385,37 @@ static void task_fpsimd_load(void) WARN_ON(!system_supports_fpsimd()); WARN_ON(!have_cpu_fpsimd_context()); - /* Check if we should restore SVE first */ - if (IS_ENABLED(CONFIG_ARM64_SVE) && test_thread_flag(TIF_SVE)) { - sve_set_vq(sve_vq_from_vl(task_get_sve_vl(current)) - 1); - restore_sve_regs = true; - restore_ffr = true; + if (system_supports_sve()) { + switch (current->thread.fp_type) { + case FP_STATE_FPSIMD: + /* Stop tracking SVE for this task until next use. */ + if (test_and_clear_thread_flag(TIF_SVE)) + sve_user_disable(); + break; + case FP_STATE_SVE: + if (!thread_sm_enabled(¤t->thread) && + !WARN_ON_ONCE(!test_and_set_thread_flag(TIF_SVE))) + sve_user_enable(); + + if (test_thread_flag(TIF_SVE)) + sve_set_vq(sve_vq_from_vl(task_get_sve_vl(current)) - 1); + + restore_sve_regs = true; + restore_ffr = true; + break; + default: + /* + * This indicates either a bug in + * fpsimd_save() or memory corruption, we + * should always record an explicit format + * when we save. We always at least have the + * memory allocated for FPSMID registers so + * try that and hope for the best. + */ + WARN_ON_ONCE(1); + clear_thread_flag(TIF_SVE); + break; + } } /* Restore SME, override SVE register configuration if needed */ @@ -398,18 +431,19 @@ static void task_fpsimd_load(void) if (thread_za_enabled(¤t->thread)) za_load_state(current->thread.za_state); - if (thread_sm_enabled(¤t->thread)) { - restore_sve_regs = true; + if (thread_sm_enabled(¤t->thread)) restore_ffr = system_supports_fa64(); - } } - if (restore_sve_regs) + if (restore_sve_regs) { + WARN_ON_ONCE(current->thread.fp_type != FP_STATE_SVE); sve_load_state(sve_pffr(¤t->thread), ¤t->thread.uw.fpsimd_state.fpsr, restore_ffr); - else + } else { + WARN_ON_ONCE(current->thread.fp_type != FP_STATE_FPSIMD); fpsimd_load_state(¤t->thread.uw.fpsimd_state); + } } /* @@ -419,12 +453,12 @@ static void task_fpsimd_load(void) * last, if KVM is involved this may be the guest VM context rather * than the host thread for the VM pointed to by current. This means * that we must always reference the state storage via last rather - * than via current, other than the TIF_ flags which KVM will - * carefully maintain for us. + * than via current, if we are saving KVM state then it will have + * ensured that the type of registers to save is set in last->to_save. */ static void fpsimd_save(void) { - struct fpsimd_last_state_struct const *last = + struct cpu_fp_state const *last = this_cpu_ptr(&fpsimd_last_state); /* set by fpsimd_bind_task_to_cpu() or fpsimd_bind_state_to_cpu() */ bool save_sve_regs = false; @@ -437,7 +471,14 @@ static void fpsimd_save(void) if (test_thread_flag(TIF_FOREIGN_FPSTATE)) return; - if (test_thread_flag(TIF_SVE)) { + /* + * If a task is in a syscall the ABI allows us to only + * preserve the state shared with FPSIMD so don't bother + * saving the full SVE state in that case. + */ + if ((last->to_save == FP_STATE_CURRENT && test_thread_flag(TIF_SVE) && + !in_syscall(current_pt_regs())) || + last->to_save == FP_STATE_SVE) { save_sve_regs = true; save_ffr = true; vl = last->sve_vl; @@ -474,8 +515,10 @@ static void fpsimd_save(void) sve_save_state((char *)last->sve_state + sve_ffr_offset(vl), &last->st->fpsr, save_ffr); + *last->fp_type = FP_STATE_SVE; } else { fpsimd_save_state(last->st); + *last->fp_type = FP_STATE_FPSIMD; } } @@ -768,8 +811,7 @@ void fpsimd_sync_to_sve(struct task_struct *task) */ void sve_sync_to_fpsimd(struct task_struct *task) { - if (test_tsk_thread_flag(task, TIF_SVE) || - thread_sm_enabled(&task->thread)) + if (task->thread.fp_type == FP_STATE_SVE) sve_to_fpsimd(task); } @@ -848,8 +890,10 @@ int vec_set_vector_length(struct task_struct *task, enum vec_type type, fpsimd_flush_task_state(task); if (test_and_clear_tsk_thread_flag(task, TIF_SVE) || - thread_sm_enabled(&task->thread)) + thread_sm_enabled(&task->thread)) { sve_to_fpsimd(task); + task->thread.fp_type = FP_STATE_FPSIMD; + } if (system_supports_sme() && type == ARM64_VEC_SME) { task->thread.svcr &= ~(SVCR_SM_MASK | @@ -1368,6 +1412,7 @@ static void sve_init_regs(void) fpsimd_bind_task_to_cpu(); } else { fpsimd_to_sve(current); + current->thread.fp_type = FP_STATE_SVE; } } @@ -1596,6 +1641,8 @@ void fpsimd_flush_thread(void) current->thread.svcr = 0; } + current->thread.fp_type = FP_STATE_FPSIMD; + put_cpu_fpsimd_context(); kfree(sve_state); kfree(za_state); @@ -1628,14 +1675,38 @@ void fpsimd_signal_preserve_current_state(void) } /* + * Called by KVM when entering the guest. + */ +void fpsimd_kvm_prepare(void) +{ + if (!system_supports_sve()) + return; + + /* + * KVM does not save host SVE state since we can only enter + * the guest from a syscall so the ABI means that only the + * non-saved SVE state needs to be saved. If we have left + * SVE enabled for performance reasons then update the task + * state to be FPSIMD only. + */ + get_cpu_fpsimd_context(); + + if (test_and_clear_thread_flag(TIF_SVE)) { + sve_to_fpsimd(current); + current->thread.fp_type = FP_STATE_FPSIMD; + } + + put_cpu_fpsimd_context(); +} + +/* * Associate current's FPSIMD context with this cpu * The caller must have ownership of the cpu FPSIMD context before calling * this function. */ static void fpsimd_bind_task_to_cpu(void) { - struct fpsimd_last_state_struct *last = - this_cpu_ptr(&fpsimd_last_state); + struct cpu_fp_state *last = this_cpu_ptr(&fpsimd_last_state); WARN_ON(!system_supports_fpsimd()); last->st = ¤t->thread.uw.fpsimd_state; @@ -1644,6 +1715,8 @@ static void fpsimd_bind_task_to_cpu(void) last->sve_vl = task_get_sve_vl(current); last->sme_vl = task_get_sme_vl(current); last->svcr = ¤t->thread.svcr; + last->fp_type = ¤t->thread.fp_type; + last->to_save = FP_STATE_CURRENT; current->thread.fpsimd_cpu = smp_processor_id(); /* @@ -1665,22 +1738,14 @@ static void fpsimd_bind_task_to_cpu(void) } } -void fpsimd_bind_state_to_cpu(struct user_fpsimd_state *st, void *sve_state, - unsigned int sve_vl, void *za_state, - unsigned int sme_vl, u64 *svcr) +void fpsimd_bind_state_to_cpu(struct cpu_fp_state *state) { - struct fpsimd_last_state_struct *last = - this_cpu_ptr(&fpsimd_last_state); + struct cpu_fp_state *last = this_cpu_ptr(&fpsimd_last_state); WARN_ON(!system_supports_fpsimd()); WARN_ON(!in_softirq() && !irqs_disabled()); - last->st = st; - last->svcr = svcr; - last->sve_state = sve_state; - last->za_state = za_state; - last->sve_vl = sve_vl; - last->sme_vl = sme_vl; + *last = *state; } /* diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index 044a7d7f1f6a..19cd05eea3f0 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -331,6 +331,8 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) clear_tsk_thread_flag(dst, TIF_SME); } + dst->thread.fp_type = FP_STATE_FPSIMD; + /* clear any pending asynchronous tag fault raised by the parent */ clear_tsk_thread_flag(dst, TIF_MTE_ASYNC_FAULT); diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c index c2fb5755bbec..979dbdc36d52 100644 --- a/arch/arm64/kernel/ptrace.c +++ b/arch/arm64/kernel/ptrace.c @@ -907,8 +907,7 @@ static int sve_set_common(struct task_struct *target, ret = __fpr_set(target, regset, pos, count, kbuf, ubuf, SVE_PT_FPSIMD_OFFSET); clear_tsk_thread_flag(target, TIF_SVE); - if (type == ARM64_VEC_SME) - fpsimd_force_sync_to_sve(target); + target->thread.fp_type = FP_STATE_FPSIMD; goto out; } @@ -931,6 +930,7 @@ static int sve_set_common(struct task_struct *target, if (!target->thread.sve_state) { ret = -ENOMEM; clear_tsk_thread_flag(target, TIF_SVE); + target->thread.fp_type = FP_STATE_FPSIMD; goto out; } @@ -942,6 +942,7 @@ static int sve_set_common(struct task_struct *target, */ fpsimd_sync_to_sve(target); set_tsk_thread_flag(target, TIF_SVE); + target->thread.fp_type = FP_STATE_SVE; BUILD_BUG_ON(SVE_PT_SVE_OFFSET != sizeof(header)); start = SVE_PT_SVE_OFFSET; diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c index 9ad911f1647c..e0d09bf5b01b 100644 --- a/arch/arm64/kernel/signal.c +++ b/arch/arm64/kernel/signal.c @@ -207,6 +207,7 @@ static int restore_fpsimd_context(struct fpsimd_context __user *ctx) __get_user_error(fpsimd.fpcr, &ctx->fpcr, err); clear_thread_flag(TIF_SVE); + current->thread.fp_type = FP_STATE_FPSIMD; /* load the hardware registers from the fpsimd_state structure */ if (!err) @@ -292,6 +293,7 @@ static int restore_sve_fpsimd_context(struct user_ctxs *user) if (sve.head.size <= sizeof(*user->sve)) { clear_thread_flag(TIF_SVE); current->thread.svcr &= ~SVCR_SM_MASK; + current->thread.fp_type = FP_STATE_FPSIMD; goto fpsimd_only; } @@ -327,6 +329,7 @@ static int restore_sve_fpsimd_context(struct user_ctxs *user) current->thread.svcr |= SVCR_SM_MASK; else set_thread_flag(TIF_SVE); + current->thread.fp_type = FP_STATE_SVE; fpsimd_only: /* copy the FP and status/control registers */ @@ -932,9 +935,11 @@ static void setup_return(struct pt_regs *regs, struct k_sigaction *ka, * FPSIMD register state - flush the saved FPSIMD * register state in case it gets loaded. */ - if (current->thread.svcr & SVCR_SM_MASK) + if (current->thread.svcr & SVCR_SM_MASK) { memset(¤t->thread.uw.fpsimd_state, 0, sizeof(current->thread.uw.fpsimd_state)); + current->thread.fp_type = FP_STATE_FPSIMD; + } current->thread.svcr &= ~(SVCR_ZA_MASK | SVCR_SM_MASK); diff --git a/arch/arm64/kernel/syscall.c b/arch/arm64/kernel/syscall.c index d72e8f23422d..a5de47e3df2b 100644 --- a/arch/arm64/kernel/syscall.c +++ b/arch/arm64/kernel/syscall.c @@ -183,21 +183,12 @@ static inline void fp_user_discard(void) if (!system_supports_sve()) return; - /* - * If SME is not active then disable SVE, the registers will - * be cleared when userspace next attempts to access them and - * we do not need to track the SVE register state until then. - */ - clear_thread_flag(TIF_SVE); + if (test_thread_flag(TIF_SVE)) { + unsigned int sve_vq_minus_one; - /* - * task_fpsimd_load() won't be called to update CPACR_EL1 in - * ret_to_user unless TIF_FOREIGN_FPSTATE is still set, which only - * happens if a context switch or kernel_neon_begin() or context - * modification (sigreturn, ptrace) intervenes. - * So, ensure that CPACR_EL1 is already correct for the fast-path case. - */ - sve_user_disable(); + sve_vq_minus_one = sve_vq_from_vl(task_get_sve_vl(current)) - 1; + sve_flush_live(true, sve_vq_minus_one); + } } void do_el0_svc(struct pt_regs *regs) diff --git a/arch/arm64/kvm/fpsimd.c b/arch/arm64/kvm/fpsimd.c index ec8e4494873d..02dd7e9ebd39 100644 --- a/arch/arm64/kvm/fpsimd.c +++ b/arch/arm64/kvm/fpsimd.c @@ -75,11 +75,12 @@ int kvm_arch_vcpu_run_map_fp(struct kvm_vcpu *vcpu) void kvm_arch_vcpu_load_fp(struct kvm_vcpu *vcpu) { BUG_ON(!current->mm); - BUG_ON(test_thread_flag(TIF_SVE)); if (!system_supports_fpsimd()) return; + fpsimd_kvm_prepare(); + vcpu->arch.fp_state = FP_STATE_HOST_OWNED; vcpu_clear_flag(vcpu, HOST_SVE_ENABLED); @@ -129,20 +130,31 @@ void kvm_arch_vcpu_ctxflush_fp(struct kvm_vcpu *vcpu) */ void kvm_arch_vcpu_ctxsync_fp(struct kvm_vcpu *vcpu) { + struct cpu_fp_state fp_state; + WARN_ON_ONCE(!irqs_disabled()); if (vcpu->arch.fp_state == FP_STATE_GUEST_OWNED) { + /* * Currently we do not support SME guests so SVCR is * always 0 and we just need a variable to point to. */ - fpsimd_bind_state_to_cpu(&vcpu->arch.ctxt.fp_regs, - vcpu->arch.sve_state, - vcpu->arch.sve_max_vl, - NULL, 0, &vcpu->arch.svcr); + fp_state.st = &vcpu->arch.ctxt.fp_regs; + fp_state.sve_state = vcpu->arch.sve_state; + fp_state.sve_vl = vcpu->arch.sve_max_vl; + fp_state.za_state = NULL; + fp_state.svcr = &vcpu->arch.svcr; + fp_state.fp_type = &vcpu->arch.fp_type; + + if (vcpu_has_sve(vcpu)) + fp_state.to_save = FP_STATE_SVE; + else + fp_state.to_save = FP_STATE_FPSIMD; + + fpsimd_bind_state_to_cpu(&fp_state); clear_thread_flag(TIF_FOREIGN_FPSTATE); - update_thread_flag(TIF_SVE, vcpu_has_sve(vcpu)); } } @@ -199,7 +211,5 @@ void kvm_arch_vcpu_put_fp(struct kvm_vcpu *vcpu) sysreg_clear_set(CPACR_EL1, CPACR_EL1_ZEN_EL0EN, 0); } - update_thread_flag(TIF_SVE, 0); - local_irq_restore(flags); } |