summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorIngo Molnar <mingo@kernel.org>2025-04-10 00:11:23 +0300
committerIngo Molnar <mingo@kernel.org>2025-04-14 09:18:29 +0300
commit55bc30f2e34dcc17a370d1f6c1c992be107c4502 (patch)
tree5c7102010a2b29b0bc2c9d83df4330abfbcf5bf6
parentcb7ca40a3882360ce87191793449d48df0b29184 (diff)
downloadlinux-55bc30f2e34dcc17a370d1f6c1c992be107c4502.tar.xz
x86/fpu: Remove the thread::fpu pointer
As suggested by Oleg, remove the thread::fpu pointer, as we can calculate it via x86_task_fpu() at compile-time. This improves code generation a bit: kepler:~/tip> size vmlinux.before vmlinux.after text data bss dec hex filename 26475405 10435342 1740804 38651551 24dc69f vmlinux.before 26475339 10959630 1216516 38651485 24dc65d vmlinux.after Suggested-by: Oleg Nesterov <oleg@redhat.com> Signed-off-by: Ingo Molnar <mingo@kernel.org> Cc: Andy Lutomirski <luto@kernel.org> Cc: Brian Gerst <brgerst@gmail.com> Cc: Chang S. Bae <chang.seok.bae@intel.com> Cc: H. Peter Anvin <hpa@zytor.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Uros Bizjak <ubizjak@gmail.com> Link: https://lore.kernel.org/r/20250409211127.3544993-5-mingo@kernel.org
-rw-r--r--arch/x86/include/asm/processor.h5
-rw-r--r--arch/x86/kernel/fpu/core.c4
-rw-r--r--arch/x86/kernel/fpu/init.c1
-rw-r--r--arch/x86/kernel/process.c2
-rw-r--r--arch/x86/kernel/vmlinux.lds.S4
5 files changed, 6 insertions, 10 deletions
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 5ea7e5d2c4de..b7f7c9c83409 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -514,12 +514,9 @@ struct thread_struct {
struct thread_shstk shstk;
#endif
-
- /* Floating point and extended processor state */
- struct fpu *fpu;
};
-#define x86_task_fpu(task) ((task)->thread.fpu)
+#define x86_task_fpu(task) ((struct fpu *)((void *)(task) + sizeof(*(task))))
/*
* X86 doesn't need any embedded-FPU-struct quirks:
diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c
index 853a738fdf2d..974b276ff0da 100644
--- a/arch/x86/kernel/fpu/core.c
+++ b/arch/x86/kernel/fpu/core.c
@@ -600,13 +600,11 @@ int fpu_clone(struct task_struct *dst, unsigned long clone_flags, bool minimal,
* This is safe because task_struct size is a multiple of cacheline size.
*/
struct fpu *src_fpu = x86_task_fpu(current);
- struct fpu *dst_fpu = (void *)dst + sizeof(*dst);
+ struct fpu *dst_fpu = x86_task_fpu(dst);
BUILD_BUG_ON(sizeof(*dst) % SMP_CACHE_BYTES != 0);
BUG_ON(!src_fpu);
- dst->thread.fpu = dst_fpu;
-
/* The new task's FPU state cannot be valid in the hardware. */
dst_fpu->last_cpu = -1;
diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c
index 848ea79886ba..da41a1d2c40f 100644
--- a/arch/x86/kernel/fpu/init.c
+++ b/arch/x86/kernel/fpu/init.c
@@ -76,7 +76,6 @@ static struct fpu x86_init_fpu __attribute__ ((aligned (64))) __read_mostly;
static void __init fpu__init_system_early_generic(void)
{
fpstate_reset(&x86_init_fpu);
- current->thread.fpu = &x86_init_fpu;
set_thread_flag(TIF_NEED_FPU_LOAD);
x86_init_fpu.last_cpu = -1;
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 3ce4cce46f3f..88868a90459e 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -102,8 +102,6 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
#ifdef CONFIG_VM86
dst->thread.vm86 = NULL;
#endif
- /* Drop the copied pointer to current's fpstate */
- dst->thread.fpu = NULL;
return 0;
}
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index ccdc45e5b759..d9ca2d1754da 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -181,6 +181,10 @@ SECTIONS
/* equivalent to task_pt_regs(&init_task) */
__top_init_kernel_stack = __end_init_stack - TOP_OF_KERNEL_STACK_PADDING - PTREGS_SIZE;
+ __x86_init_fpu_begin = .;
+ . = __x86_init_fpu_begin + 128*PAGE_SIZE;
+ __x86_init_fpu_end = .;
+
#ifdef CONFIG_X86_32
/* 32 bit has nosave before _edata */
NOSAVE_DATA