From df3fb96820455ef70a51630d1be336d4f2602111 Mon Sep 17 00:00:00 2001 From: Dave Martin Date: Mon, 21 May 2018 19:08:15 +0100 Subject: arm64: fpsimd: Eliminate task->mm checks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently the FPSIMD handling code uses the condition task->mm == NULL as a hint that task has no FPSIMD register context. The ->mm check is only there to filter out tasks that cannot possibly have FPSIMD context loaded, for optimisation purposes. Also, TIF_FOREIGN_FPSTATE must always be checked anyway before saving FPSIMD context back to memory. For these reasons, the ->mm checks are not useful, providing that TIF_FOREIGN_FPSTATE is maintained in a consistent way for all threads. The context switch logic is already deliberately optimised to defer reloads of the regs until ret_to_user (or sigreturn as a special case), and save them only if they have been previously loaded. These paths are the only places where the wrong_task and wrong_cpu conditions can be made false, by calling fpsimd_bind_task_to_cpu(). Kernel threads by definition never reach these paths. As a result, the wrong_task and wrong_cpu tests in fpsimd_thread_switch() will always yield true for kernel threads. This patch removes the redundant checks and special-case code, ensuring that TIF_FOREIGN_FPSTATE is set whenever a kernel thread is scheduled in, and ensures that this flag is set for the init task. The fpsimd_flush_task_state() call already present in copy_thread() ensures the same for any new task. With TIF_FOREIGN_FPSTATE always set for kernel threads, this patch ensures that no extra context save work is added for kernel threads, and eliminates the redundant context saving that may currently occur for kernel threads that have acquired an mm via use_mm(). Signed-off-by: Dave Martin Reviewed-by: Catalin Marinas Reviewed-by: Alex Bennée Reviewed-by: Christoffer Dall Cc: Catalin Marinas Cc: Will Deacon Cc: Ard Biesheuvel Signed-off-by: Marc Zyngier --- arch/arm64/include/asm/processor.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'arch/arm64/include/asm/processor.h') diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h index 767598932549..36d64f83cdfb 100644 --- a/arch/arm64/include/asm/processor.h +++ b/arch/arm64/include/asm/processor.h @@ -156,7 +156,9 @@ static inline void arch_thread_struct_whitelist(unsigned long *offset, /* Sync TPIDR_EL0 back to thread_struct for current */ void tls_preserve_current_state(void); -#define INIT_THREAD { } +#define INIT_THREAD { \ + .fpsimd_cpu = NR_CPUS, \ +} static inline void start_thread_common(struct pt_regs *regs, unsigned long pc) { -- cgit v1.2.3 From 31dc52b3c8faf47bf3ff5ced661488a20e5d1811 Mon Sep 17 00:00:00 2001 From: Dave Martin Date: Thu, 12 Apr 2018 16:47:20 +0100 Subject: arm64/sve: Move read_zcr_features() out of cpufeature.h MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Having read_zcr_features() inline in cpufeature.h results in that header requiring #includes which make it hard to include elsewhere without triggering header inclusion cycles. This is not a hot-path function and arguably should not be in cpufeature.h in the first place, so this patch moves it to fpsimd.c, compiled conditionally if CONFIG_ARM64_SVE=y. This allows some SVE-related #includes to be dropped from cpufeature.h, which will ease future maintenance. A couple of missing #includes of are exposed by this change under arch/arm64/. This patch adds the missing #includes as necessary. No functional change. Signed-off-by: Dave Martin Reviewed-by: Alex Bennée Acked-by: Catalin Marinas Acked-by: Marc Zyngier Signed-off-by: Marc Zyngier --- arch/arm64/include/asm/cpufeature.h | 29 ----------------------------- arch/arm64/include/asm/fpsimd.h | 2 ++ arch/arm64/include/asm/processor.h | 1 + arch/arm64/kernel/fpsimd.c | 28 ++++++++++++++++++++++++++++ arch/arm64/kernel/ptrace.c | 1 + 5 files changed, 32 insertions(+), 29 deletions(-) (limited to 'arch/arm64/include/asm/processor.h') diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index 09b0f2a80c8f..0a6b7133195e 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -11,9 +11,7 @@ #include #include -#include #include -#include #include /* @@ -510,33 +508,6 @@ static inline bool system_supports_sve(void) cpus_have_const_cap(ARM64_SVE); } -/* - * Read the pseudo-ZCR used by cpufeatures to identify the supported SVE - * vector length. - * - * Use only if SVE is present. - * This function clobbers the SVE vector length. - */ -static inline u64 read_zcr_features(void) -{ - u64 zcr; - unsigned int vq_max; - - /* - * Set the maximum possible VL, and write zeroes to all other - * bits to see if they stick. - */ - sve_kernel_enable(NULL); - write_sysreg_s(ZCR_ELx_LEN_MASK, SYS_ZCR_EL1); - - zcr = read_sysreg_s(SYS_ZCR_EL1); - zcr &= ~(u64)ZCR_ELx_LEN_MASK; /* find sticky 1s outside LEN field */ - vq_max = sve_vq_from_vl(sve_get_vl()); - zcr |= vq_max - 1; /* set LEN field to maximum effective value */ - - return zcr; -} - #endif /* __ASSEMBLY__ */ #endif diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h index 3e00f701cb9c..fb60b22b8bbf 100644 --- a/arch/arm64/include/asm/fpsimd.h +++ b/arch/arm64/include/asm/fpsimd.h @@ -69,6 +69,8 @@ extern unsigned int sve_get_vl(void); struct arm64_cpu_capabilities; extern void sve_kernel_enable(const struct arm64_cpu_capabilities *__unused); +extern u64 read_zcr_features(void); + extern int __ro_after_init sve_max_vl; #ifdef CONFIG_ARM64_SVE diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h index 36d64f83cdfb..9231b8762ca6 100644 --- a/arch/arm64/include/asm/processor.h +++ b/arch/arm64/include/asm/processor.h @@ -40,6 +40,7 @@ #include #include +#include #include #include #include diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index 794dd990da82..6c01ee2062c4 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -37,6 +37,7 @@ #include #include #include +#include #include #include @@ -755,6 +756,33 @@ void sve_kernel_enable(const struct arm64_cpu_capabilities *__always_unused p) isb(); } +/* + * Read the pseudo-ZCR used by cpufeatures to identify the supported SVE + * vector length. + * + * Use only if SVE is present. + * This function clobbers the SVE vector length. + */ +u64 read_zcr_features(void) +{ + u64 zcr; + unsigned int vq_max; + + /* + * Set the maximum possible VL, and write zeroes to all other + * bits to see if they stick. + */ + sve_kernel_enable(NULL); + write_sysreg_s(ZCR_ELx_LEN_MASK, SYS_ZCR_EL1); + + zcr = read_sysreg_s(SYS_ZCR_EL1); + zcr &= ~(u64)ZCR_ELx_LEN_MASK; /* find sticky 1s outside LEN field */ + vq_max = sve_vq_from_vl(sve_get_vl()); + zcr |= vq_max - 1; /* set LEN field to maximum effective value */ + + return zcr; +} + void __init sve_setup(void) { u64 zcr; diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c index 7ff81fed46e1..78889c4546d7 100644 --- a/arch/arm64/kernel/ptrace.c +++ b/arch/arm64/kernel/ptrace.c @@ -44,6 +44,7 @@ #include #include #include +#include #include #include #include -- cgit v1.2.3 From 9a6e594869b29ccec4f99db83c071e4f2dbfc11f Mon Sep 17 00:00:00 2001 From: Dave Martin Date: Thu, 12 Apr 2018 17:32:35 +0100 Subject: arm64/sve: Move sve_pffr() to fpsimd.h and make inline MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In order to make sve_save_state()/sve_load_state() more easily reusable and to get rid of a potential branch on context switch critical paths, this patch makes sve_pffr() inline and moves it to fpsimd.h. must be included in fpsimd.h in order to make this work, and this creates an #include cycle that is tricky to avoid without modifying core code, due to the way the PR_SVE_*() prctl helpers are included in the core prctl implementation. Instead of breaking the cycle, this patch defers inclusion of in until the point where it is actually needed: i.e., immediately before the prctl definitions. No functional change. Signed-off-by: Dave Martin Reviewed-by: Alex Bennée Acked-by: Catalin Marinas Acked-by: Marc Zyngier Signed-off-by: Marc Zyngier --- arch/arm64/include/asm/fpsimd.h | 13 +++++++++++++ arch/arm64/include/asm/processor.h | 12 +++++++++++- arch/arm64/kernel/fpsimd.c | 12 ------------ 3 files changed, 24 insertions(+), 13 deletions(-) (limited to 'arch/arm64/include/asm/processor.h') diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h index fb60b22b8bbf..fa92747a49c8 100644 --- a/arch/arm64/include/asm/fpsimd.h +++ b/arch/arm64/include/asm/fpsimd.h @@ -18,6 +18,8 @@ #include #include +#include +#include #ifndef __ASSEMBLY__ @@ -61,6 +63,17 @@ extern void sve_flush_cpu_state(void); /* Maximum VL that SVE VL-agnostic software can transparently support */ #define SVE_VL_ARCH_MAX 0x100 +/* Offset of FFR in the SVE register dump */ +static inline size_t sve_ffr_offset(int vl) +{ + return SVE_SIG_FFR_OFFSET(sve_vq_from_vl(vl)) - SVE_SIG_REGS_OFFSET; +} + +static inline void *sve_pffr(struct thread_struct *thread) +{ + return (char *)thread->sve_state + sve_ffr_offset(thread->sve_vl); +} + extern void sve_save_state(void *state, u32 *pfpsr); extern void sve_load_state(void const *state, u32 const *pfpsr, unsigned long vq_minus_1); diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h index 9231b8762ca6..c99e657fdd57 100644 --- a/arch/arm64/include/asm/processor.h +++ b/arch/arm64/include/asm/processor.h @@ -40,7 +40,6 @@ #include #include -#include #include #include #include @@ -247,6 +246,17 @@ void cpu_enable_pan(const struct arm64_cpu_capabilities *__unused); void cpu_enable_cache_maint_trap(const struct arm64_cpu_capabilities *__unused); void cpu_clear_disr(const struct arm64_cpu_capabilities *__unused); +/* + * Not at the top of the file due to a direct #include cycle between + * and . Deferring this #include + * ensures that contents of processor.h are visible to fpsimd.h even if + * processor.h is included first. + * + * These prctl helpers are the only things in this file that require + * fpsimd.h. The core code expects them to be in this header. + */ +#include + /* Userspace interface for PR_SVE_{SET,GET}_VL prctl()s: */ #define SVE_SET_VL(arg) sve_set_current_vl(arg) #define SVE_GET_VL() sve_get_current_vl() diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index 842b2ad08bec..e60c3a28380f 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -161,18 +161,6 @@ static void sve_free(struct task_struct *task) __sve_free(task); } - -/* Offset of FFR in the SVE register dump */ -static size_t sve_ffr_offset(int vl) -{ - return SVE_SIG_FFR_OFFSET(sve_vq_from_vl(vl)) - SVE_SIG_REGS_OFFSET; -} - -static void *sve_pffr(struct thread_struct *thread) -{ - return (char *)thread->sve_state + sve_ffr_offset(thread->sve_vl); -} - static void change_cpacr(u64 val, u64 mask) { u64 cpacr = read_sysreg(CPACR_EL1); -- cgit v1.2.3