diff options
| author | Lisa Robinson <lisa@bytefly.space> | 2026-04-22 10:45:11 +0300 |
|---|---|---|
| committer | Huacai Chen <chenhuacai@loongson.cn> | 2026-04-22 10:45:11 +0300 |
| commit | e3f4591f7920ce169f2f78fa5a89639ada7d7058 (patch) | |
| tree | e359772df8ec1ca32c159ccb91a87aaaddcc1d5a | |
| parent | 1829419bc3b291ad9547abe70053c2620832ac41 (diff) | |
| download | linux-e3f4591f7920ce169f2f78fa5a89639ada7d7058.tar.xz | |
LoongArch: Align FPU register state to 32 bytes
Move fpr to the beginning of struct loongarch_fpu so it is naturally
aligned to FPU_ALIGN (32 bytes), improving 256-bit SIMD (LASX) context
switch performance.
Also adjust process.c and fpu.S to work well with the new loongarch_fpu
layout.
Signed-off-by: Lisa Robinson <lisa@bytefly.space>
Signed-off-by: Huacai Chen <chenhuacai@loongson.cn>
| -rw-r--r-- | arch/loongarch/include/asm/processor.h | 2 | ||||
| -rw-r--r-- | arch/loongarch/kernel/fpu.S | 12 | ||||
| -rw-r--r-- | arch/loongarch/kernel/process.c | 2 |
3 files changed, 9 insertions, 7 deletions
diff --git a/arch/loongarch/include/asm/processor.h b/arch/loongarch/include/asm/processor.h index c3bc44b5f5b3..ce8b953f8c79 100644 --- a/arch/loongarch/include/asm/processor.h +++ b/arch/loongarch/include/asm/processor.h @@ -80,10 +80,10 @@ BUILD_FPR_ACCESS(32) BUILD_FPR_ACCESS(64) struct loongarch_fpu { + union fpureg fpr[NUM_FPU_REGS]; uint64_t fcc; /* 8x8 */ uint32_t fcsr; uint32_t ftop; - union fpureg fpr[NUM_FPU_REGS]; }; struct loongarch_lbt { diff --git a/arch/loongarch/kernel/fpu.S b/arch/loongarch/kernel/fpu.S index f225dcc5b530..bf7d6b8bf600 100644 --- a/arch/loongarch/kernel/fpu.S +++ b/arch/loongarch/kernel/fpu.S @@ -97,7 +97,7 @@ .endm #ifdef CONFIG_32BIT - .macro sc_save_fcc thread tmp0 tmp1 + .macro sc_save_fcc base tmp0 tmp1 movcf2gr \tmp0, $fcc0 move \tmp1, \tmp0 movcf2gr \tmp0, $fcc1 @@ -106,7 +106,7 @@ bstrins.w \tmp1, \tmp0, 23, 16 movcf2gr \tmp0, $fcc3 bstrins.w \tmp1, \tmp0, 31, 24 - EX st.w \tmp1, \thread, THREAD_FCC + EX st.w \tmp1, \base, 0 movcf2gr \tmp0, $fcc4 move \tmp1, \tmp0 movcf2gr \tmp0, $fcc5 @@ -115,11 +115,11 @@ bstrins.w \tmp1, \tmp0, 23, 16 movcf2gr \tmp0, $fcc7 bstrins.w \tmp1, \tmp0, 31, 24 - EX st.w \tmp1, \thread, (THREAD_FCC + 4) + EX st.w \tmp1, \base, 4 .endm - .macro sc_restore_fcc thread tmp0 tmp1 - EX ld.w \tmp0, \thread, THREAD_FCC + .macro sc_restore_fcc base tmp0 tmp1 + EX ld.w \tmp0, \base, 0 bstrpick.w \tmp1, \tmp0, 7, 0 movgr2cf $fcc0, \tmp1 bstrpick.w \tmp1, \tmp0, 15, 8 @@ -128,7 +128,7 @@ movgr2cf $fcc2, \tmp1 bstrpick.w \tmp1, \tmp0, 31, 24 movgr2cf $fcc3, \tmp1 - EX ld.w \tmp0, \thread, (THREAD_FCC + 4) + EX ld.w \tmp0, \base, 4 bstrpick.w \tmp1, \tmp0, 7, 0 movgr2cf $fcc4, \tmp1 bstrpick.w \tmp1, \tmp0, 15, 8 diff --git a/arch/loongarch/kernel/process.c b/arch/loongarch/kernel/process.c index 4ac1c3086152..17e88eedb154 100644 --- a/arch/loongarch/kernel/process.c +++ b/arch/loongarch/kernel/process.c @@ -135,6 +135,8 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) return 0; } + dst->thread.fpu.fcsr = src->thread.fpu.fcsr; + if (!used_math()) memcpy(dst, src, offsetof(struct task_struct, thread.fpu.fpr)); else |
