summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorIngo Molnar <mingo@kernel.org>2025-05-13 11:37:29 +0300
committerIngo Molnar <mingo@kernel.org>2025-05-13 11:37:29 +0300
commitec8f353f5262acff120a0dc83b714d899ee94b7f (patch)
treea5eab4c8ced9b7ea467da00aa5166a8c59d813d0
parent2fb8414e644b133a9c3250f408232af99da256a4 (diff)
parent32d5fa804dc9bd7cf6651a1378ba616d332e7444 (diff)
downloadlinux-ec8f353f5262acff120a0dc83b714d899ee94b7f.tar.xz
Merge branch 'x86/fpu' into x86/core, to merge dependent commits
Prepare to resolve conflicts with an upstream series of fixes that conflict with pending x86 changes: 6f5bf947bab0 Merge tag 'its-for-linus-20250509' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip Signed-off-by: Ingo Molnar <mingo@kernel.org>
-rw-r--r--arch/x86/include/asm/cpufeatures.h1
-rw-r--r--arch/x86/include/asm/entry-common.h5
-rw-r--r--arch/x86/include/asm/fpu/api.h2
-rw-r--r--arch/x86/include/asm/fpu/sched.h38
-rw-r--r--arch/x86/include/asm/fpu/types.h24
-rw-r--r--arch/x86/include/asm/fpu/xstate.h3
-rw-r--r--arch/x86/include/asm/processor.h13
-rw-r--r--arch/x86/include/asm/trace/fpu.h5
-rw-r--r--arch/x86/kernel/cpu/cpuid-deps.c1
-rw-r--r--arch/x86/kernel/cpu/scattered.c1
-rw-r--r--arch/x86/kernel/fpu/context.h4
-rw-r--r--arch/x86/kernel/fpu/core.c93
-rw-r--r--arch/x86/kernel/fpu/init.c18
-rw-r--r--arch/x86/kernel/fpu/regset.c22
-rw-r--r--arch/x86/kernel/fpu/signal.c29
-rw-r--r--arch/x86/kernel/fpu/xstate.c166
-rw-r--r--arch/x86/kernel/fpu/xstate.h28
-rw-r--r--arch/x86/kernel/process.c16
-rw-r--r--arch/x86/kernel/process_32.c5
-rw-r--r--arch/x86/kernel/process_64.c5
-rw-r--r--arch/x86/kernel/signal.c6
-rw-r--r--arch/x86/kernel/traps.c2
-rw-r--r--arch/x86/math-emu/fpu_aux.c2
-rw-r--r--arch/x86/math-emu/fpu_entry.c4
-rw-r--r--arch/x86/math-emu/fpu_system.h2
-rw-r--r--arch/x86/mm/extable.c2
-rw-r--r--include/linux/sched.h15
-rw-r--r--tools/testing/selftests/x86/Makefile3
-rw-r--r--tools/testing/selftests/x86/apx.c10
-rw-r--r--tools/testing/selftests/x86/xstate.c3
-rw-r--r--tools/testing/selftests/x86/xstate.h2
31 files changed, 294 insertions, 236 deletions
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index bc81b9d1aeca..478ab362fda2 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -481,6 +481,7 @@
#define X86_FEATURE_AMD_HTR_CORES (21*32+ 6) /* Heterogeneous Core Topology */
#define X86_FEATURE_AMD_WORKLOAD_CLASS (21*32+ 7) /* Workload Classification */
#define X86_FEATURE_PREFER_YMM (21*32+ 8) /* Avoid ZMM registers due to downclocking */
+#define X86_FEATURE_APX (21*32+ 9) /* Advanced Performance Extensions */
/*
* BUG word(s)
diff --git a/arch/x86/include/asm/entry-common.h b/arch/x86/include/asm/entry-common.h
index 77d20555e04d..d535a97c7284 100644
--- a/arch/x86/include/asm/entry-common.h
+++ b/arch/x86/include/asm/entry-common.h
@@ -53,7 +53,6 @@ static inline void arch_exit_work(unsigned long ti_work)
if (unlikely(ti_work & _TIF_IO_BITMAP))
tss_update_io_bitmap();
- fpregs_assert_state_consistent();
if (unlikely(ti_work & _TIF_NEED_FPU_LOAD))
switch_fpu_return();
}
@@ -61,7 +60,9 @@ static inline void arch_exit_work(unsigned long ti_work)
static inline void arch_exit_to_user_mode_prepare(struct pt_regs *regs,
unsigned long ti_work)
{
- if (IS_ENABLED(CONFIG_X86_DEBUG_FPU) || unlikely(ti_work))
+ fpregs_assert_state_consistent();
+
+ if (unlikely(ti_work))
arch_exit_work(ti_work);
fred_update_rsp0();
diff --git a/arch/x86/include/asm/fpu/api.h b/arch/x86/include/asm/fpu/api.h
index f42de5f05e7e..8e6848f55dcd 100644
--- a/arch/x86/include/asm/fpu/api.h
+++ b/arch/x86/include/asm/fpu/api.h
@@ -136,7 +136,7 @@ static inline void fpstate_free(struct fpu *fpu) { }
#endif
/* fpstate-related functions which are exported to KVM */
-extern void fpstate_clear_xstate_component(struct fpstate *fps, unsigned int xfeature);
+extern void fpstate_clear_xstate_component(struct fpstate *fpstate, unsigned int xfeature);
extern u64 xstate_get_guest_group_perm(void);
diff --git a/arch/x86/include/asm/fpu/sched.h b/arch/x86/include/asm/fpu/sched.h
index c485f1944c5f..c060549c6c94 100644
--- a/arch/x86/include/asm/fpu/sched.h
+++ b/arch/x86/include/asm/fpu/sched.h
@@ -10,7 +10,7 @@
#include <asm/trace/fpu.h>
extern void save_fpregs_to_fpstate(struct fpu *fpu);
-extern void fpu__drop(struct fpu *fpu);
+extern void fpu__drop(struct task_struct *tsk);
extern int fpu_clone(struct task_struct *dst, unsigned long clone_flags, bool minimal,
unsigned long shstk_addr);
extern void fpu_flush_thread(void);
@@ -18,31 +18,25 @@ extern void fpu_flush_thread(void);
/*
* FPU state switching for scheduling.
*
- * This is a two-stage process:
+ * switch_fpu() saves the old state and sets TIF_NEED_FPU_LOAD if
+ * TIF_NEED_FPU_LOAD is not set. This is done within the context
+ * of the old process.
*
- * - switch_fpu_prepare() saves the old state.
- * This is done within the context of the old process.
- *
- * - switch_fpu_finish() sets TIF_NEED_FPU_LOAD; the floating point state
- * will get loaded on return to userspace, or when the kernel needs it.
- *
- * If TIF_NEED_FPU_LOAD is cleared then the CPU's FPU registers
- * are saved in the current thread's FPU register state.
- *
- * If TIF_NEED_FPU_LOAD is set then CPU's FPU registers may not
- * hold current()'s FPU registers. It is required to load the
+ * Once TIF_NEED_FPU_LOAD is set, it is required to load the
* registers before returning to userland or using the content
* otherwise.
*
* The FPU context is only stored/restored for a user task and
* PF_KTHREAD is used to distinguish between kernel and user threads.
*/
-static inline void switch_fpu_prepare(struct task_struct *old, int cpu)
+static inline void switch_fpu(struct task_struct *old, int cpu)
{
- if (cpu_feature_enabled(X86_FEATURE_FPU) &&
+ if (!test_tsk_thread_flag(old, TIF_NEED_FPU_LOAD) &&
+ cpu_feature_enabled(X86_FEATURE_FPU) &&
!(old->flags & (PF_KTHREAD | PF_USER_WORKER))) {
- struct fpu *old_fpu = &old->thread.fpu;
+ struct fpu *old_fpu = x86_task_fpu(old);
+ set_tsk_thread_flag(old, TIF_NEED_FPU_LOAD);
save_fpregs_to_fpstate(old_fpu);
/*
* The save operation preserved register state, so the
@@ -50,7 +44,7 @@ static inline void switch_fpu_prepare(struct task_struct *old, int cpu)
* current CPU number in @old_fpu, so the next return
* to user space can avoid the FPU register restore
* when is returns on the same CPU and still owns the
- * context.
+ * context. See fpregs_restore_userregs().
*/
old_fpu->last_cpu = cpu;
@@ -58,14 +52,4 @@ static inline void switch_fpu_prepare(struct task_struct *old, int cpu)
}
}
-/*
- * Delay loading of the complete FPU state until the return to userland.
- * PKRU is handled separately.
- */
-static inline void switch_fpu_finish(struct task_struct *new)
-{
- if (cpu_feature_enabled(X86_FEATURE_FPU))
- set_tsk_thread_flag(new, TIF_NEED_FPU_LOAD);
-}
-
#endif /* _ASM_X86_FPU_SCHED_H */
diff --git a/arch/x86/include/asm/fpu/types.h b/arch/x86/include/asm/fpu/types.h
index de16862bf230..1c94121acd3d 100644
--- a/arch/x86/include/asm/fpu/types.h
+++ b/arch/x86/include/asm/fpu/types.h
@@ -125,6 +125,7 @@ enum xfeature {
XFEATURE_RSRVD_COMP_16,
XFEATURE_XTILE_CFG,
XFEATURE_XTILE_DATA,
+ XFEATURE_APX,
XFEATURE_MAX,
};
@@ -145,6 +146,7 @@ enum xfeature {
#define XFEATURE_MASK_LBR (1 << XFEATURE_LBR)
#define XFEATURE_MASK_XTILE_CFG (1 << XFEATURE_XTILE_CFG)
#define XFEATURE_MASK_XTILE_DATA (1 << XFEATURE_XTILE_DATA)
+#define XFEATURE_MASK_APX (1 << XFEATURE_APX)
#define XFEATURE_MASK_FPSSE (XFEATURE_MASK_FP | XFEATURE_MASK_SSE)
#define XFEATURE_MASK_AVX512 (XFEATURE_MASK_OPMASK \
@@ -304,6 +306,13 @@ struct xtile_data {
} __packed;
/*
+ * State component 19: 8B extended general purpose register.
+ */
+struct apx_state {
+ u64 egpr[16];
+} __packed;
+
+/*
* State component 10 is supervisor state used for context-switching the
* PASID state.
*/
@@ -407,9 +416,11 @@ struct fpu_state_perm {
/*
* @__state_perm:
*
- * This bitmap indicates the permission for state components, which
- * are available to a thread group. The permission prctl() sets the
- * enabled state bits in thread_group_leader()->thread.fpu.
+ * This bitmap indicates the permission for state components
+ * available to a thread group, including both user and supervisor
+ * components and software-defined bits like FPU_GUEST_PERM_LOCKED.
+ * The permission prctl() sets the enabled state bits in
+ * thread_group_leader()->thread.fpu.
*
* All run time operations use the per thread information in the
* currently active fpu.fpstate which contains the xfeature masks
@@ -525,13 +536,6 @@ struct fpu_guest {
u64 xfeatures;
/*
- * @perm: xfeature bitmap of features which are
- * permitted to be enabled for the guest
- * vCPU.
- */
- u64 perm;
-
- /*
* @xfd_err: Save the guest value.
*/
u64 xfd_err;
diff --git a/arch/x86/include/asm/fpu/xstate.h b/arch/x86/include/asm/fpu/xstate.h
index 7f39fe7980c5..b308a76afbb7 100644
--- a/arch/x86/include/asm/fpu/xstate.h
+++ b/arch/x86/include/asm/fpu/xstate.h
@@ -32,7 +32,8 @@
XFEATURE_MASK_PKRU | \
XFEATURE_MASK_BNDREGS | \
XFEATURE_MASK_BNDCSR | \
- XFEATURE_MASK_XTILE)
+ XFEATURE_MASK_XTILE | \
+ XFEATURE_MASK_APX)
/*
* Features which are restored when returning to user space.
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 0973bed22172..50d34698036d 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -514,15 +514,14 @@ struct thread_struct {
struct thread_shstk shstk;
#endif
-
- /* Floating point and extended processor state */
- struct fpu fpu;
- /*
- * WARNING: 'fpu' is dynamically-sized. It *MUST* be at
- * the end.
- */
};
+#ifdef CONFIG_X86_DEBUG_FPU
+extern struct fpu *x86_task_fpu(struct task_struct *task);
+#else
+# define x86_task_fpu(task) ((struct fpu *)((void *)(task) + sizeof(*(task))))
+#endif
+
extern void fpu_thread_struct_whitelist(unsigned long *offset, unsigned long *size);
static inline void arch_thread_struct_whitelist(unsigned long *offset,
diff --git a/arch/x86/include/asm/trace/fpu.h b/arch/x86/include/asm/trace/fpu.h
index 4645a6334063..0454d5e60e5d 100644
--- a/arch/x86/include/asm/trace/fpu.h
+++ b/arch/x86/include/asm/trace/fpu.h
@@ -74,11 +74,6 @@ DEFINE_EVENT(x86_fpu, x86_fpu_dropped,
TP_ARGS(fpu)
);
-DEFINE_EVENT(x86_fpu, x86_fpu_copy_src,
- TP_PROTO(struct fpu *fpu),
- TP_ARGS(fpu)
-);
-
DEFINE_EVENT(x86_fpu, x86_fpu_copy_dst,
TP_PROTO(struct fpu *fpu),
TP_ARGS(fpu)
diff --git a/arch/x86/kernel/cpu/cpuid-deps.c b/arch/x86/kernel/cpu/cpuid-deps.c
index 94c062cddfa4..46efcbd6afa4 100644
--- a/arch/x86/kernel/cpu/cpuid-deps.c
+++ b/arch/x86/kernel/cpu/cpuid-deps.c
@@ -28,6 +28,7 @@ static const struct cpuid_dep cpuid_deps[] = {
{ X86_FEATURE_PKU, X86_FEATURE_XSAVE },
{ X86_FEATURE_MPX, X86_FEATURE_XSAVE },
{ X86_FEATURE_XGETBV1, X86_FEATURE_XSAVE },
+ { X86_FEATURE_APX, X86_FEATURE_XSAVE },
{ X86_FEATURE_CMOV, X86_FEATURE_FXSR },
{ X86_FEATURE_MMX, X86_FEATURE_FXSR },
{ X86_FEATURE_MMXEXT, X86_FEATURE_MMX },
diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c
index c75c57b32b74..dbf6d71bdf18 100644
--- a/arch/x86/kernel/cpu/scattered.c
+++ b/arch/x86/kernel/cpu/scattered.c
@@ -27,6 +27,7 @@ static const struct cpuid_bit cpuid_bits[] = {
{ X86_FEATURE_APERFMPERF, CPUID_ECX, 0, 0x00000006, 0 },
{ X86_FEATURE_EPB, CPUID_ECX, 3, 0x00000006, 0 },
{ X86_FEATURE_INTEL_PPIN, CPUID_EBX, 0, 0x00000007, 1 },
+ { X86_FEATURE_APX, CPUID_EDX, 21, 0x00000007, 1 },
{ X86_FEATURE_RRSBA_CTRL, CPUID_EDX, 2, 0x00000007, 2 },
{ X86_FEATURE_BHI_CTRL, CPUID_EDX, 4, 0x00000007, 2 },
{ X86_FEATURE_CQM_LLC, CPUID_EDX, 1, 0x0000000f, 0 },
diff --git a/arch/x86/kernel/fpu/context.h b/arch/x86/kernel/fpu/context.h
index f6d856bd50bc..10d0a720659c 100644
--- a/arch/x86/kernel/fpu/context.h
+++ b/arch/x86/kernel/fpu/context.h
@@ -53,7 +53,7 @@ static inline void fpregs_activate(struct fpu *fpu)
/* Internal helper for switch_fpu_return() and signal frame setup */
static inline void fpregs_restore_userregs(void)
{
- struct fpu *fpu = &current->thread.fpu;
+ struct fpu *fpu = x86_task_fpu(current);
int cpu = smp_processor_id();
if (WARN_ON_ONCE(current->flags & (PF_KTHREAD | PF_USER_WORKER)))
@@ -67,7 +67,7 @@ static inline void fpregs_restore_userregs(void)
* If PKRU is enabled, then the PKRU value is already
* correct because it was either set in switch_to() or in
* flush_thread(). So it is excluded because it might be
- * not up to date in current->thread.fpu.xsave state.
+ * not up to date in current->thread.fpu->xsave state.
*
* XFD state is handled in restore_fpregs_from_fpstate().
*/
diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c
index 91d6341f281f..1cda5b78540b 100644
--- a/arch/x86/kernel/fpu/core.c
+++ b/arch/x86/kernel/fpu/core.c
@@ -51,6 +51,16 @@ static DEFINE_PER_CPU(bool, in_kernel_fpu);
*/
DEFINE_PER_CPU(struct fpu *, fpu_fpregs_owner_ctx);
+#ifdef CONFIG_X86_DEBUG_FPU
+struct fpu *x86_task_fpu(struct task_struct *task)
+{
+ if (WARN_ON_ONCE(task->flags & PF_KTHREAD))
+ return NULL;
+
+ return (void *)task + sizeof(*task);
+}
+#endif
+
/*
* Can we use the FPU in kernel mode with the
* whole "kernel_fpu_begin/end()" sequence?
@@ -202,7 +212,7 @@ void fpu_reset_from_exception_fixup(void)
#if IS_ENABLED(CONFIG_KVM)
static void __fpstate_reset(struct fpstate *fpstate, u64 xfd);
-static void fpu_init_guest_permissions(struct fpu_guest *gfpu)
+static void fpu_lock_guest_permissions(void)
{
struct fpu_state_perm *fpuperm;
u64 perm;
@@ -211,15 +221,13 @@ static void fpu_init_guest_permissions(struct fpu_guest *gfpu)
return;
spin_lock_irq(&current->sighand->siglock);
- fpuperm = &current->group_leader->thread.fpu.guest_perm;
+ fpuperm = &x86_task_fpu(current->group_leader)->guest_perm;
perm = fpuperm->__state_perm;
/* First fpstate allocation locks down permissions. */
WRITE_ONCE(fpuperm->__state_perm, perm | FPU_GUEST_PERM_LOCKED);
spin_unlock_irq(&current->sighand->siglock);
-
- gfpu->perm = perm & ~FPU_GUEST_PERM_LOCKED;
}
bool fpu_alloc_guest_fpstate(struct fpu_guest *gfpu)
@@ -240,7 +248,6 @@ bool fpu_alloc_guest_fpstate(struct fpu_guest *gfpu)
gfpu->fpstate = fpstate;
gfpu->xfeatures = fpu_kernel_cfg.default_features;
- gfpu->perm = fpu_kernel_cfg.default_features;
/*
* KVM sets the FP+SSE bits in the XSAVE header when copying FPU state
@@ -255,7 +262,7 @@ bool fpu_alloc_guest_fpstate(struct fpu_guest *gfpu)
if (WARN_ON_ONCE(fpu_user_cfg.default_size > gfpu->uabi_size))
gfpu->uabi_size = fpu_user_cfg.default_size;
- fpu_init_guest_permissions(gfpu);
+ fpu_lock_guest_permissions();
return true;
}
@@ -263,16 +270,16 @@ EXPORT_SYMBOL_GPL(fpu_alloc_guest_fpstate);
void fpu_free_guest_fpstate(struct fpu_guest *gfpu)
{
- struct fpstate *fps = gfpu->fpstate;
+ struct fpstate *fpstate = gfpu->fpstate;
- if (!fps)
+ if (!fpstate)
return;
- if (WARN_ON_ONCE(!fps->is_valloc || !fps->is_guest || fps->in_use))
+ if (WARN_ON_ONCE(!fpstate->is_valloc || !fpstate->is_guest || fpstate->in_use))
return;
gfpu->fpstate = NULL;
- vfree(fps);
+ vfree(fpstate);
}
EXPORT_SYMBOL_GPL(fpu_free_guest_fpstate);
@@ -323,12 +330,12 @@ EXPORT_SYMBOL_GPL(fpu_update_guest_xfd);
*/
void fpu_sync_guest_vmexit_xfd_state(void)
{
- struct fpstate *fps = current->thread.fpu.fpstate;
+ struct fpstate *fpstate = x86_task_fpu(current)->fpstate;
lockdep_assert_irqs_disabled();
if (fpu_state_size_dynamic()) {
- rdmsrl(MSR_IA32_XFD, fps->xfd);
- __this_cpu_write(xfd_state, fps->xfd);
+ rdmsrl(MSR_IA32_XFD, fpstate->xfd);
+ __this_cpu_write(xfd_state, fpstate->xfd);
}
}
EXPORT_SYMBOL_GPL(fpu_sync_guest_vmexit_xfd_state);
@@ -337,7 +344,7 @@ EXPORT_SYMBOL_GPL(fpu_sync_guest_vmexit_xfd_state);
int fpu_swap_kvm_fpstate(struct fpu_guest *guest_fpu, bool enter_guest)
{
struct fpstate *guest_fps = guest_fpu->fpstate;
- struct fpu *fpu = &current->thread.fpu;
+ struct fpu *fpu = x86_task_fpu(current);
struct fpstate *cur_fps = fpu->fpstate;
fpregs_lock();
@@ -438,7 +445,7 @@ void kernel_fpu_begin_mask(unsigned int kfpu_mask)
if (!(current->flags & (PF_KTHREAD | PF_USER_WORKER)) &&
!test_thread_flag(TIF_NEED_FPU_LOAD)) {
set_thread_flag(TIF_NEED_FPU_LOAD);
- save_fpregs_to_fpstate(&current->thread.fpu);
+ save_fpregs_to_fpstate(x86_task_fpu(current));
}
__cpu_invalidate_fpregs_state();
@@ -467,7 +474,7 @@ EXPORT_SYMBOL_GPL(kernel_fpu_end);
*/
void fpu_sync_fpstate(struct fpu *fpu)
{
- WARN_ON_FPU(fpu != &current->thread.fpu);
+ WARN_ON_FPU(fpu != x86_task_fpu(current));
fpregs_lock();
trace_x86_fpu_before_save(fpu);
@@ -552,7 +559,7 @@ void fpstate_reset(struct fpu *fpu)
static inline void fpu_inherit_perms(struct fpu *dst_fpu)
{
if (fpu_state_size_dynamic()) {
- struct fpu *src_fpu = &current->group_leader->thread.fpu;
+ struct fpu *src_fpu = x86_task_fpu(current->group_leader);
spin_lock_irq(&current->sighand->siglock);
/* Fork also inherits the permissions of the parent */
@@ -572,7 +579,7 @@ static int update_fpu_shstk(struct task_struct *dst, unsigned long ssp)
if (!ssp)
return 0;
- xstate = get_xsave_addr(&dst->thread.fpu.fpstate->regs.xsave,
+ xstate = get_xsave_addr(&x86_task_fpu(dst)->fpstate->regs.xsave,
XFEATURE_CET_USER);
/*
@@ -593,8 +600,16 @@ static int update_fpu_shstk(struct task_struct *dst, unsigned long ssp)
int fpu_clone(struct task_struct *dst, unsigned long clone_flags, bool minimal,
unsigned long ssp)
{
- struct fpu *src_fpu = &current->thread.fpu;
- struct fpu *dst_fpu = &dst->thread.fpu;
+ /*
+ * We allocate the new FPU structure right after the end of the task struct.
+ * task allocation size already took this into account.
+ *
+ * This is safe because task_struct size is a multiple of cacheline size,
+ * thus x86_task_fpu() will always be cacheline aligned as well.
+ */
+ struct fpu *dst_fpu = (void *)dst + sizeof(*dst);
+
+ BUILD_BUG_ON(sizeof(*dst) % SMP_CACHE_BYTES != 0);
/* The new task's FPU state cannot be valid in the hardware. */
dst_fpu->last_cpu = -1;
@@ -657,19 +672,22 @@ int fpu_clone(struct task_struct *dst, unsigned long clone_flags, bool minimal,
if (update_fpu_shstk(dst, ssp))
return 1;
- trace_x86_fpu_copy_src(src_fpu);
trace_x86_fpu_copy_dst(dst_fpu);
return 0;
}
/*
- * Whitelist the FPU register state embedded into task_struct for hardened
- * usercopy.
+ * While struct fpu is no longer part of struct thread_struct, it is still
+ * allocated after struct task_struct in the "task_struct" kmem cache. But
+ * since FPU is expected to be part of struct thread_struct, we have to
+ * adjust for it here.
*/
void fpu_thread_struct_whitelist(unsigned long *offset, unsigned long *size)
{
- *offset = offsetof(struct thread_struct, fpu.__fpstate.regs);
+ /* The allocation follows struct task_struct. */
+ *offset = sizeof(struct task_struct) - offsetof(struct task_struct, thread);
+ *offset += offsetof(struct fpu, __fpstate.regs);
*size = fpu_kernel_cfg.default_size;
}
@@ -682,11 +700,18 @@ void fpu_thread_struct_whitelist(unsigned long *offset, unsigned long *size)
* a state-restore is coming: either an explicit one,
* or a reschedule.
*/
-void fpu__drop(struct fpu *fpu)
+void fpu__drop(struct task_struct *tsk)
{
+ struct fpu *fpu;
+
+ if (test_tsk_thread_flag(tsk, TIF_NEED_FPU_LOAD))
+ return;
+
+ fpu = x86_task_fpu(tsk);
+
preempt_disable();
- if (fpu == &current->thread.fpu) {
+ if (fpu == x86_task_fpu(current)) {
/* Ignore delayed exceptions from user space */
asm volatile("1: fwait\n"
"2:\n"
@@ -718,9 +743,9 @@ static inline void restore_fpregs_from_init_fpstate(u64 features_mask)
/*
* Reset current->fpu memory state to the init values.
*/
-static void fpu_reset_fpregs(void)
+static void fpu_reset_fpstate_regs(void)
{
- struct fpu *fpu = &current->thread.fpu;
+ struct fpu *fpu = x86_task_fpu(current);
fpregs_lock();
__fpu_invalidate_fpregs_state(fpu);
@@ -749,11 +774,11 @@ static void fpu_reset_fpregs(void)
*/
void fpu__clear_user_states(struct fpu *fpu)
{
- WARN_ON_FPU(fpu != &current->thread.fpu);
+ WARN_ON_FPU(fpu != x86_task_fpu(current));
fpregs_lock();
if (!cpu_feature_enabled(X86_FEATURE_FPU)) {
- fpu_reset_fpregs();
+ fpu_reset_fpstate_regs();
fpregs_unlock();
return;
}
@@ -782,8 +807,8 @@ void fpu__clear_user_states(struct fpu *fpu)
void fpu_flush_thread(void)
{
- fpstate_reset(&current->thread.fpu);
- fpu_reset_fpregs();
+ fpstate_reset(x86_task_fpu(current));
+ fpu_reset_fpstate_regs();
}
/*
* Load FPU context before returning to userspace.
@@ -823,7 +848,7 @@ void fpregs_lock_and_load(void)
*/
void fpregs_assert_state_consistent(void)
{
- struct fpu *fpu = &current->thread.fpu;
+ struct fpu *fpu = x86_task_fpu(current);
if (test_thread_flag(TIF_NEED_FPU_LOAD))
return;
@@ -835,7 +860,7 @@ EXPORT_SYMBOL_GPL(fpregs_assert_state_consistent);
void fpregs_mark_activate(void)
{
- struct fpu *fpu = &current->thread.fpu;
+ struct fpu *fpu = x86_task_fpu(current);
fpregs_activate(fpu);
fpu->last_cpu = smp_processor_id();
diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c
index 998a08f17e33..6bb3e35c40e2 100644
--- a/arch/x86/kernel/fpu/init.c
+++ b/arch/x86/kernel/fpu/init.c
@@ -38,7 +38,7 @@ static void fpu__init_cpu_generic(void)
/* Flush out any pending x87 state: */
#ifdef CONFIG_MATH_EMULATION
if (!boot_cpu_has(X86_FEATURE_FPU))
- fpstate_init_soft(&current->thread.fpu.fpstate->regs.soft);
+ ;
else
#endif
asm volatile ("fninit");
@@ -73,6 +73,8 @@ static bool __init fpu__probe_without_cpuid(void)
static void __init fpu__init_system_early_generic(void)
{
+ set_thread_flag(TIF_NEED_FPU_LOAD);
+
if (!boot_cpu_has(X86_FEATURE_CPUID) &&
!test_bit(X86_FEATURE_FPU, (unsigned long *)cpu_caps_cleared)) {
if (fpu__probe_without_cpuid())
@@ -94,7 +96,6 @@ static void __init fpu__init_system_early_generic(void)
* Boot time FPU feature detection code:
*/
unsigned int mxcsr_feature_mask __ro_after_init = 0xffffffffu;
-EXPORT_SYMBOL_GPL(mxcsr_feature_mask);
static void __init fpu__init_system_mxcsr(void)
{
@@ -150,11 +151,13 @@ static void __init fpu__init_task_struct_size(void)
{
int task_size = sizeof(struct task_struct);
+ task_size += sizeof(struct fpu);
+
/*
* Subtract off the static size of the register state.
* It potentially has a bunch of padding.
*/
- task_size -= sizeof(current->thread.fpu.__fpstate.regs);
+ task_size -= sizeof(union fpregs_state);
/*
* Add back the dynamically-calculated register state
@@ -164,14 +167,9 @@ static void __init fpu__init_task_struct_size(void)
/*
* We dynamically size 'struct fpu', so we require that
- * it be at the end of 'thread_struct' and that
- * 'thread_struct' be at the end of 'task_struct'. If
- * you hit a compile error here, check the structure to
- * see if something got added to the end.
+ * 'state' be at the end of 'it:
*/
CHECK_MEMBER_AT_END_OF(struct fpu, __fpstate);
- CHECK_MEMBER_AT_END_OF(struct thread_struct, fpu);
- CHECK_MEMBER_AT_END_OF(struct task_struct, thread);
arch_task_struct_size = task_size;
}
@@ -204,7 +202,6 @@ static void __init fpu__init_system_xstate_size_legacy(void)
fpu_kernel_cfg.default_size = size;
fpu_user_cfg.max_size = size;
fpu_user_cfg.default_size = size;
- fpstate_reset(&current->thread.fpu);
}
/*
@@ -213,7 +210,6 @@ static void __init fpu__init_system_xstate_size_legacy(void)
*/
void __init fpu__init_system(void)
{
- fpstate_reset(&current->thread.fpu);
fpu__init_system_early_generic();
/*
diff --git a/arch/x86/kernel/fpu/regset.c b/arch/x86/kernel/fpu/regset.c
index 887b0b8e21e3..0986c2200adc 100644
--- a/arch/x86/kernel/fpu/regset.c
+++ b/arch/x86/kernel/fpu/regset.c
@@ -45,7 +45,7 @@ int regset_xregset_fpregs_active(struct task_struct *target, const struct user_r
*/
static void sync_fpstate(struct fpu *fpu)
{
- if (fpu == &current->thread.fpu)
+ if (fpu == x86_task_fpu(current))
fpu_sync_fpstate(fpu);
}
@@ -63,7 +63,7 @@ static void fpu_force_restore(struct fpu *fpu)
* Only stopped child tasks can be used to modify the FPU
* state in the fpstate buffer:
*/
- WARN_ON_FPU(fpu == &current->thread.fpu);
+ WARN_ON_FPU(fpu == x86_task_fpu(current));
__fpu_invalidate_fpregs_state(fpu);
}
@@ -71,7 +71,7 @@ static void fpu_force_restore(struct fpu *fpu)
int xfpregs_get(struct task_struct *target, const struct user_regset *regset,
struct membuf to)
{
- struct fpu *fpu = &target->thread.fpu;
+ struct fpu *fpu = x86_task_fpu(target);
if (!cpu_feature_enabled(X86_FEATURE_FXSR))
return -ENODEV;
@@ -91,7 +91,7 @@ int xfpregs_set(struct task_struct *target, const struct user_regset *regset,
unsigned int pos, unsigned int count,
const void *kbuf, const void __user *ubuf)
{
- struct fpu *fpu = &target->thread.fpu;
+ struct fpu *fpu = x86_task_fpu(target);
struct fxregs_state newstate;
int ret;
@@ -133,7 +133,7 @@ int xstateregs_get(struct task_struct *target, const struct user_regset *regset,
if (!cpu_feature_enabled(X86_FEATURE_XSAVE))
return -ENODEV;
- sync_fpstate(&target->thread.fpu);
+ sync_fpstate(x86_task_fpu(target));
copy_xstate_to_uabi_buf(to, target, XSTATE_COPY_XSAVE);
return 0;
@@ -143,7 +143,7 @@ int xstateregs_set(struct task_struct *target, const struct user_regset *regset,
unsigned int pos, unsigned int count,
const void *kbuf, const void __user *ubuf)
{
- struct fpu *fpu = &target->thread.fpu;
+ struct fpu *fpu = x86_task_fpu(target);
struct xregs_state *tmpbuf = NULL;
int ret;
@@ -187,7 +187,7 @@ int ssp_active(struct task_struct *target, const struct user_regset *regset)
int ssp_get(struct task_struct *target, const struct user_regset *regset,
struct membuf to)
{
- struct fpu *fpu = &target->thread.fpu;
+ struct fpu *fpu = x86_task_fpu(target);
struct cet_user_state *cetregs;
if (!cpu_feature_enabled(X86_FEATURE_USER_SHSTK) ||
@@ -214,7 +214,7 @@ int ssp_set(struct task_struct *target, const struct user_regset *regset,
unsigned int pos, unsigned int count,
const void *kbuf, const void __user *ubuf)
{
- struct fpu *fpu = &target->thread.fpu;
+ struct fpu *fpu = x86_task_fpu(target);
struct xregs_state *xsave = &fpu->fpstate->regs.xsave;
struct cet_user_state *cetregs;
unsigned long user_ssp;
@@ -368,7 +368,7 @@ static void __convert_from_fxsr(struct user_i387_ia32_struct *env,
void
convert_from_fxsr(struct user_i387_ia32_struct *env, struct task_struct *tsk)
{
- __convert_from_fxsr(env, tsk, &tsk->thread.fpu.fpstate->regs.fxsave);
+ __convert_from_fxsr(env, tsk, &x86_task_fpu(tsk)->fpstate->regs.fxsave);
}
void convert_to_fxsr(struct fxregs_state *fxsave,
@@ -401,7 +401,7 @@ void convert_to_fxsr(struct fxregs_state *fxsave,
int fpregs_get(struct task_struct *target, const struct user_regset *regset,
struct membuf to)
{
- struct fpu *fpu = &target->thread.fpu;
+ struct fpu *fpu = x86_task_fpu(target);
struct user_i387_ia32_struct env;
struct fxregs_state fxsave, *fx;
@@ -433,7 +433,7 @@ int fpregs_set(struct task_struct *target, const struct user_regset *regset,
unsigned int pos, unsigned int count,
const void *kbuf, const void __user *ubuf)
{
- struct fpu *fpu = &target->thread.fpu;
+ struct fpu *fpu = x86_task_fpu(target);
struct user_i387_ia32_struct env;
int ret;
diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c
index 6c69cb28b298..c3ec2512f2bb 100644
--- a/arch/x86/kernel/fpu/signal.c
+++ b/arch/x86/kernel/fpu/signal.c
@@ -43,13 +43,13 @@ static inline bool check_xstate_in_sigframe(struct fxregs_state __user *fxbuf,
* fpstate layout with out copying the extended state information
* in the memory layout.
*/
- if (__get_user(magic2, (__u32 __user *)(fpstate + current->thread.fpu.fpstate->user_size)))
+ if (__get_user(magic2, (__u32 __user *)(fpstate + x86_task_fpu(current)->fpstate->user_size)))
return false;
if (likely(magic2 == FP_XSTATE_MAGIC2))
return true;
setfx:
- trace_x86_fpu_xstate_check_failed(&current->thread.fpu);
+ trace_x86_fpu_xstate_check_failed(x86_task_fpu(current));
/* Set the parameters for fx only state */
fx_sw->magic1 = 0;
@@ -64,13 +64,13 @@ setfx:
static inline bool save_fsave_header(struct task_struct *tsk, void __user *buf)
{
if (use_fxsr()) {
- struct xregs_state *xsave = &tsk->thread.fpu.fpstate->regs.xsave;
+ struct xregs_state *xsave = &x86_task_fpu(tsk)->fpstate->regs.xsave;
struct user_i387_ia32_struct env;
struct _fpstate_32 __user *fp = buf;
fpregs_lock();
if (!test_thread_flag(TIF_NEED_FPU_LOAD))
- fxsave(&tsk->thread.fpu.fpstate->regs.fxsave);
+ fxsave(&x86_task_fpu(tsk)->fpstate->regs.fxsave);
fpregs_unlock();
convert_from_fxsr(&env, tsk);
@@ -114,7 +114,6 @@ static inline bool save_xstate_epilog(void __user *buf, int ia32_frame,
{
struct xregs_state __user *x = buf;
struct _fpx_sw_bytes sw_bytes = {};
- u32 xfeatures;
int err;
/* Setup the bytes not touched by the [f]xsave and reserved for SW. */
@@ -128,12 +127,6 @@ static inline bool save_xstate_epilog(void __user *buf, int ia32_frame,
(__u32 __user *)(buf + fpstate->user_size));
/*
- * Read the xfeatures which we copied (directly from the cpu or
- * from the state in task struct) to the user buffers.
- */
- err |= __get_user(xfeatures, (__u32 __user *)&x->header.xfeatures);
-
- /*
* For legacy compatible, we always set FP/SSE bits in the bit
* vector while saving the state to the user context. This will
* enable us capturing any changes(during sigreturn) to
@@ -144,9 +137,7 @@ static inline bool save_xstate_epilog(void __user *buf, int ia32_frame,
* header as well as change any contents in the memory layout.
* xrestore as part of sigreturn will capture all the changes.
*/
- xfeatures |= XFEATURE_MASK_FPSSE;
-
- err |= __put_user(xfeatures, (__u32 __user *)&x->header.xfeatures);
+ err |= set_xfeature_in_sigframe(x, XFEATURE_MASK_FPSSE);
return !err;
}
@@ -184,7 +175,7 @@ static inline int copy_fpregs_to_sigframe(struct xregs_state __user *buf, u32 pk
bool copy_fpstate_to_sigframe(void __user *buf, void __user *buf_fx, int size, u32 pkru)
{
struct task_struct *tsk = current;
- struct fpstate *fpstate = tsk->thread.fpu.fpstate;
+ struct fpstate *fpstate = x86_task_fpu(tsk)->fpstate;
bool ia32_fxstate = (buf != buf_fx);
int ret;
@@ -272,7 +263,7 @@ static int __restore_fpregs_from_user(void __user *buf, u64 ufeatures,
*/
static bool restore_fpregs_from_user(void __user *buf, u64 xrestore, bool fx_only)
{
- struct fpu *fpu = &current->thread.fpu;
+ struct fpu *fpu = x86_task_fpu(current);
int ret;
/* Restore enabled features only. */
@@ -332,7 +323,7 @@ static bool __fpu_restore_sig(void __user *buf, void __user *buf_fx,
bool ia32_fxstate)
{
struct task_struct *tsk = current;
- struct fpu *fpu = &tsk->thread.fpu;
+ struct fpu *fpu = x86_task_fpu(tsk);
struct user_i387_ia32_struct env;
bool success, fx_only = false;
union fpregs_state *fpregs;
@@ -452,7 +443,7 @@ static inline unsigned int xstate_sigframe_size(struct fpstate *fpstate)
*/
bool fpu__restore_sig(void __user *buf, int ia32_frame)
{
- struct fpu *fpu = &current->thread.fpu;
+ struct fpu *fpu = x86_task_fpu(current);
void __user *buf_fx = buf;
bool ia32_fxstate = false;
bool success = false;
@@ -499,7 +490,7 @@ unsigned long
fpu__alloc_mathframe(unsigned long sp, int ia32_frame,
unsigned long *buf_fx, unsigned long *size)
{
- unsigned long frame_size = xstate_sigframe_size(current->thread.fpu.fpstate);
+ unsigned long frame_size = xstate_sigframe_size(x86_task_fpu(current)->fpstate);
*buf_fx = sp = round_down(sp - frame_size, 64);
if (ia32_frame && use_fxsr()) {
diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index 6a41d1610d8b..1c8410b68108 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -14,6 +14,7 @@
#include <linux/proc_fs.h>
#include <linux/vmalloc.h>
#include <linux/coredump.h>
+#include <linux/sort.h>
#include <asm/fpu/api.h>
#include <asm/fpu/regset.h>
@@ -62,6 +63,7 @@ static const char *xfeature_names[] =
"unknown xstate feature",
"AMX Tile config",
"AMX Tile data",
+ "APX registers",
"unknown xstate feature",
};
@@ -80,6 +82,7 @@ static unsigned short xsave_cpuid_features[] __initdata = {
[XFEATURE_CET_USER] = X86_FEATURE_SHSTK,
[XFEATURE_XTILE_CFG] = X86_FEATURE_AMX_TILE,
[XFEATURE_XTILE_DATA] = X86_FEATURE_AMX_TILE,
+ [XFEATURE_APX] = X86_FEATURE_APX,
};
static unsigned int xstate_offsets[XFEATURE_MAX] __ro_after_init =
@@ -88,6 +91,31 @@ static unsigned int xstate_sizes[XFEATURE_MAX] __ro_after_init =
{ [ 0 ... XFEATURE_MAX - 1] = -1};
static unsigned int xstate_flags[XFEATURE_MAX] __ro_after_init;
+/*
+ * Ordering of xstate components in uncompacted format: The xfeature
+ * number does not necessarily indicate its position in the XSAVE buffer.
+ * This array defines the traversal order of xstate features.
+ */
+static unsigned int xfeature_uncompact_order[XFEATURE_MAX] __ro_after_init =
+ { [ 0 ... XFEATURE_MAX - 1] = -1};
+
+static inline unsigned int next_xfeature_order(unsigned int i, u64 mask)
+{
+ for (; xfeature_uncompact_order[i] != -1; i++) {
+ if (mask & BIT_ULL(xfeature_uncompact_order[i]))
+ break;
+ }
+
+ return i;
+}
+
+/* Iterate xstate features in uncompacted order: */
+#define for_each_extended_xfeature_in_order(i, mask) \
+ for (i = 0; \
+ i = next_xfeature_order(i, mask), \
+ xfeature_uncompact_order[i] != -1; \
+ i++)
+
#define XSTATE_FLAG_SUPERVISOR BIT(0)
#define XSTATE_FLAG_ALIGNED64 BIT(1)
@@ -209,16 +237,20 @@ static bool xfeature_enabled(enum xfeature xfeature)
return fpu_kernel_cfg.max_features & BIT_ULL(xfeature);
}
+static int compare_xstate_offsets(const void *xfeature1, const void *xfeature2)
+{
+ return xstate_offsets[*(unsigned int *)xfeature1] -
+ xstate_offsets[*(unsigned int *)xfeature2];
+}
+
/*
* Record the offsets and sizes of various xstates contained
- * in the XSAVE state memory layout.
+ * in the XSAVE state memory layout. Also, create an ordered
+ * list of xfeatures for handling out-of-order offsets.
*/
static void __init setup_xstate_cache(void)
{
- u32 eax, ebx, ecx, edx, i;
- /* start at the beginning of the "extended state" */
- unsigned int last_good_offset = offsetof(struct xregs_state,
- extended_state_area);
+ u32 eax, ebx, ecx, edx, xfeature, i = 0;
/*
* The FP xstates and SSE xstates are legacy states. They are always
* in the fixed offsets in the xsave area in either compacted form
@@ -232,31 +264,30 @@ static void __init setup_xstate_cache(void)
xstate_sizes[XFEATURE_SSE] = sizeof_field(struct fxregs_state,
xmm_space);
- for_each_extended_xfeature(i, fpu_kernel_cfg.max_features) {
- cpuid_count(CPUID_LEAF_XSTATE, i, &eax, &ebx, &ecx, &edx);
+ for_each_extended_xfeature(xfeature, fpu_kernel_cfg.max_features) {
+ cpuid_count(CPUID_LEAF_XSTATE, xfeature, &eax, &ebx, &ecx, &edx);
- xstate_sizes[i] = eax;
- xstate_flags[i] = ecx;
+ xstate_sizes[xfeature] = eax;
+ xstate_flags[xfeature] = ecx;
/*
* If an xfeature is supervisor state, the offset in EBX is
* invalid, leave it to -1.
*/
- if (xfeature_is_supervisor(i))
+ if (xfeature_is_supervisor(xfeature))
continue;
- xstate_offsets[i] = ebx;
+ xstate_offsets[xfeature] = ebx;
- /*
- * In our xstate size checks, we assume that the highest-numbered
- * xstate feature has the highest offset in the buffer. Ensure
- * it does.
- */
- WARN_ONCE(last_good_offset > xstate_offsets[i],
- "x86/fpu: misordered xstate at %d\n", last_good_offset);
-
- last_good_offset = xstate_offsets[i];
+ /* Populate the list of xfeatures before sorting */
+ xfeature_uncompact_order[i++] = xfeature;
}
+
+ /*
+ * Sort xfeatures by their offsets to support out-of-order
+ * offsets in the uncompacted format.
+ */
+ sort(xfeature_uncompact_order, i, sizeof(unsigned int), compare_xstate_offsets, NULL);
}
/*
@@ -340,7 +371,8 @@ static __init void os_xrstor_booting(struct xregs_state *xstate)
XFEATURE_MASK_BNDCSR | \
XFEATURE_MASK_PASID | \
XFEATURE_MASK_CET_USER | \
- XFEATURE_MASK_XTILE)
+ XFEATURE_MASK_XTILE | \
+ XFEATURE_MASK_APX)
/*
* setup the xstate image representing the init state
@@ -540,6 +572,7 @@ static bool __init check_xstate_against_struct(int nr)
case XFEATURE_PASID: return XCHECK_SZ(sz, nr, struct ia32_pasid_state);
case XFEATURE_XTILE_CFG: return XCHECK_SZ(sz, nr, struct xtile_cfg);
case XFEATURE_CET_USER: return XCHECK_SZ(sz, nr, struct cet_user_state);
+ case XFEATURE_APX: return XCHECK_SZ(sz, nr, struct apx_state);
case XFEATURE_XTILE_DATA: check_xtile_data_against_struct(sz); return true;
default:
XSTATE_WARN_ON(1, "No structure for xstate: %d\n", nr);
@@ -552,13 +585,20 @@ static bool __init check_xstate_against_struct(int nr)
static unsigned int xstate_calculate_size(u64 xfeatures, bool compacted)
{
unsigned int topmost = fls64(xfeatures) - 1;
- unsigned int offset = xstate_offsets[topmost];
+ unsigned int offset, i;
if (topmost <= XFEATURE_SSE)
return sizeof(struct xregs_state);
- if (compacted)
+ if (compacted) {
offset = xfeature_get_offset(xfeatures, topmost);
+ } else {
+ /* Walk through the xfeature order to pick the last */
+ for_each_extended_xfeature_in_order(i, xfeatures)
+ topmost = xfeature_uncompact_order[i];
+ offset = xstate_offsets[topmost];
+ }
+
return offset + xstate_sizes[topmost];
}
@@ -711,6 +751,8 @@ static int __init init_xstate_size(void)
*/
static void __init fpu__init_disable_system_xstate(unsigned int legacy_size)
{
+ pr_info("x86/fpu: XSAVE disabled\n");
+
fpu_kernel_cfg.max_features = 0;
cr4_clear_bits(X86_CR4_OSXSAVE);
setup_clear_cpu_cap(X86_FEATURE_XSAVE);
@@ -727,7 +769,7 @@ static void __init fpu__init_disable_system_xstate(unsigned int legacy_size)
*/
init_fpstate.xfd = 0;
- fpstate_reset(&current->thread.fpu);
+ fpstate_reset(x86_task_fpu(current));
}
/*
@@ -775,6 +817,17 @@ void __init fpu__init_system_xstate(unsigned int legacy_size)
goto out_disable;
}
+ if (fpu_kernel_cfg.max_features & XFEATURE_MASK_APX &&
+ fpu_kernel_cfg.max_features & (XFEATURE_MASK_BNDREGS | XFEATURE_MASK_BNDCSR)) {
+ /*
+ * This is a problematic CPU configuration where two
+ * conflicting state components are both enumerated.
+ */
+ pr_err("x86/fpu: Both APX/MPX present in the CPU's xstate features: 0x%llx.\n",
+ fpu_kernel_cfg.max_features);
+ goto out_disable;
+ }
+
fpu_kernel_cfg.independent_features = fpu_kernel_cfg.max_features &
XFEATURE_MASK_INDEPENDENT;
@@ -834,9 +887,6 @@ void __init fpu__init_system_xstate(unsigned int legacy_size)
if (err)
goto out_disable;
- /* Reset the state for the current task */
- fpstate_reset(&current->thread.fpu);
-
/*
* Update info used for ptrace frames; use standard-format size and no
* supervisor xstates:
@@ -852,7 +902,7 @@ void __init fpu__init_system_xstate(unsigned int legacy_size)
init_fpstate.xfeatures = fpu_kernel_cfg.default_features;
if (init_fpstate.size > sizeof(init_fpstate.regs)) {
- pr_warn("x86/fpu: init_fpstate buffer too small (%zu < %d), disabling XSAVE\n",
+ pr_warn("x86/fpu: init_fpstate buffer too small (%zu < %d)\n",
sizeof(init_fpstate.regs), init_fpstate.size);
goto out_disable;
}
@@ -864,7 +914,7 @@ void __init fpu__init_system_xstate(unsigned int legacy_size)
* xfeatures mask.
*/
if (xfeatures != fpu_kernel_cfg.max_features) {
- pr_err("x86/fpu: xfeatures modified from 0x%016llx to 0x%016llx during init, disabling XSAVE\n",
+ pr_err("x86/fpu: xfeatures modified from 0x%016llx to 0x%016llx during init\n",
xfeatures, fpu_kernel_cfg.max_features);
goto out_disable;
}
@@ -909,7 +959,7 @@ void fpu__resume_cpu(void)
}
if (fpu_state_size_dynamic())
- wrmsrl(MSR_IA32_XFD, current->thread.fpu.fpstate->xfd);
+ wrmsrl(MSR_IA32_XFD, x86_task_fpu(current)->fpstate->xfd);
}
/*
@@ -1071,10 +1121,9 @@ void __copy_xstate_to_uabi_buf(struct membuf to, struct fpstate *fpstate,
const unsigned int off_mxcsr = offsetof(struct fxregs_state, mxcsr);
struct xregs_state *xinit = &init_fpstate.regs.xsave;
struct xregs_state *xsave = &fpstate->regs.xsave;
+ unsigned int zerofrom, i, xfeature;
struct xstate_header header;
- unsigned int zerofrom;
u64 mask;
- int i;
memset(&header, 0, sizeof(header));
header.xfeatures = xsave->header.xfeatures;
@@ -1143,15 +1192,16 @@ void __copy_xstate_to_uabi_buf(struct membuf to, struct fpstate *fpstate,
*/
mask = header.xfeatures;
- for_each_extended_xfeature(i, mask) {
+ for_each_extended_xfeature_in_order(i, mask) {
+ xfeature = xfeature_uncompact_order[i];
/*
* If there was a feature or alignment gap, zero the space
* in the destination buffer.
*/
- if (zerofrom < xstate_offsets[i])
- membuf_zero(&to, xstate_offsets[i] - zerofrom);
+ if (zerofrom < xstate_offsets[xfeature])
+ membuf_zero(&to, xstate_offsets[xfeature] - zerofrom);
- if (i == XFEATURE_PKRU) {
+ if (xfeature == XFEATURE_PKRU) {
struct pkru_state pkru = {0};
/*
* PKRU is not necessarily up to date in the
@@ -1161,14 +1211,14 @@ void __copy_xstate_to_uabi_buf(struct membuf to, struct fpstate *fpstate,
membuf_write(&to, &pkru, sizeof(pkru));
} else {
membuf_write(&to,
- __raw_xsave_addr(xsave, i),
- xstate_sizes[i]);
+ __raw_xsave_addr(xsave, xfeature),
+ xstate_sizes[xfeature]);
}
/*
* Keep track of the last copied state in the non-compacted
* target buffer for gap zeroing.
*/
- zerofrom = xstate_offsets[i] + xstate_sizes[i];
+ zerofrom = xstate_offsets[xfeature] + xstate_sizes[xfeature];
}
out:
@@ -1191,8 +1241,8 @@ out:
void copy_xstate_to_uabi_buf(struct membuf to, struct task_struct *tsk,
enum xstate_copy_mode copy_mode)
{
- __copy_xstate_to_uabi_buf(to, tsk->thread.fpu.fpstate,
- tsk->thread.fpu.fpstate->user_xfeatures,
+ __copy_xstate_to_uabi_buf(to, x86_task_fpu(tsk)->fpstate,
+ x86_task_fpu(tsk)->fpstate->user_xfeatures,
tsk->thread.pkru, copy_mode);
}
@@ -1332,7 +1382,7 @@ int copy_uabi_from_kernel_to_xstate(struct fpstate *fpstate, const void *kbuf, u
int copy_sigframe_from_user_to_xstate(struct task_struct *tsk,
const void __user *ubuf)
{
- return copy_uabi_to_xstate(tsk->thread.fpu.fpstate, NULL, ubuf, &tsk->thread.pkru);
+ return copy_uabi_to_xstate(x86_task_fpu(tsk)->fpstate, NULL, ubuf, &tsk->thread.pkru);
}
static bool validate_independent_components(u64 mask)
@@ -1398,9 +1448,9 @@ void xrstors(struct xregs_state *xstate, u64 mask)
}
#if IS_ENABLED(CONFIG_KVM)
-void fpstate_clear_xstate_component(struct fpstate *fps, unsigned int xfeature)
+void fpstate_clear_xstate_component(struct fpstate *fpstate, unsigned int xfeature)
{
- void *addr = get_xsave_addr(&fps->regs.xsave, xfeature);
+ void *addr = get_xsave_addr(&fpstate->regs.xsave, xfeature);
if (addr)
memset(addr, 0, xstate_sizes[xfeature]);
@@ -1426,7 +1476,7 @@ static bool xstate_op_valid(struct fpstate *fpstate, u64 mask, bool rstor)
* The XFD MSR does not match fpstate->xfd. That's invalid when
* the passed in fpstate is current's fpstate.
*/
- if (fpstate->xfd == current->thread.fpu.fpstate->xfd)
+ if (fpstate->xfd == x86_task_fpu(current)->fpstate->xfd)
return false;
/*
@@ -1503,7 +1553,7 @@ void fpstate_free(struct fpu *fpu)
static int fpstate_realloc(u64 xfeatures, unsigned int ksize,
unsigned int usize, struct fpu_guest *guest_fpu)
{
- struct fpu *fpu = &current->thread.fpu;
+ struct fpu *fpu = x86_task_fpu(current);
struct fpstate *curfps, *newfps = NULL;
unsigned int fpsize;
bool in_use;
@@ -1596,7 +1646,7 @@ static int __xstate_request_perm(u64 permitted, u64 requested, bool guest)
* AVX512.
*/
bool compacted = cpu_feature_enabled(X86_FEATURE_XCOMPACTED);
- struct fpu *fpu = &current->group_leader->thread.fpu;
+ struct fpu *fpu = x86_task_fpu(current->group_leader);
struct fpu_state_perm *perm;
unsigned int ksize, usize;
u64 mask;
@@ -1606,16 +1656,20 @@ static int __xstate_request_perm(u64 permitted, u64 requested, bool guest)
if ((permitted & requested) == requested)
return 0;
- /* Calculate the resulting kernel state size */
+ /*
+ * Calculate the resulting kernel state size. Note, @permitted also
+ * contains supervisor xfeatures even though supervisor are always
+ * permitted for kernel and guest FPUs, and never permitted for user
+ * FPUs.
+ */
mask = permitted | requested;
- /* Take supervisor states into account on the host */
- if (!guest)
- mask |= xfeatures_mask_supervisor();
ksize = xstate_calculate_size(mask, compacted);
- /* Calculate the resulting user state size */
- mask &= XFEATURE_MASK_USER_SUPPORTED;
- usize = xstate_calculate_size(mask, false);
+ /*
+ * Calculate the resulting user state size. Take care not to clobber
+ * the supervisor xfeatures in the new mask!
+ */
+ usize = xstate_calculate_size(mask & XFEATURE_MASK_USER_SUPPORTED, false);
if (!guest) {
ret = validate_sigaltstack(usize);
@@ -1699,7 +1753,7 @@ int __xfd_enable_feature(u64 xfd_err, struct fpu_guest *guest_fpu)
return -EPERM;
}
- fpu = &current->group_leader->thread.fpu;
+ fpu = x86_task_fpu(current->group_leader);
perm = guest_fpu ? &fpu->guest_perm : &fpu->perm;
ksize = perm->__state_size;
usize = perm->__user_state_size;
@@ -1804,7 +1858,7 @@ long fpu_xstate_prctl(int option, unsigned long arg2)
*/
static void avx512_status(struct seq_file *m, struct task_struct *task)
{
- unsigned long timestamp = READ_ONCE(task->thread.fpu.avx512_timestamp);
+ unsigned long timestamp = READ_ONCE(x86_task_fpu(task)->avx512_timestamp);
long delta;
if (!timestamp) {
diff --git a/arch/x86/kernel/fpu/xstate.h b/arch/x86/kernel/fpu/xstate.h
index 0fd34f53f025..a0256ef34ecb 100644
--- a/arch/x86/kernel/fpu/xstate.h
+++ b/arch/x86/kernel/fpu/xstate.h
@@ -22,7 +22,7 @@ static inline void xstate_init_xcomp_bv(struct xregs_state *xsave, u64 mask)
static inline u64 xstate_get_group_perm(bool guest)
{
- struct fpu *fpu = &current->group_leader->thread.fpu;
+ struct fpu *fpu = x86_task_fpu(current->group_leader);
struct fpu_state_perm *perm;
/* Pairs with WRITE_ONCE() in xstate_request_perm() */
@@ -69,21 +69,31 @@ static inline u64 xfeatures_mask_independent(void)
return fpu_kernel_cfg.independent_features;
}
+static inline int set_xfeature_in_sigframe(struct xregs_state __user *xbuf, u64 mask)
+{
+ u64 xfeatures;
+ int err;
+
+ /* Read the xfeatures value already saved in the user buffer */
+ err = __get_user(xfeatures, &xbuf->header.xfeatures);
+ xfeatures |= mask;
+ err |= __put_user(xfeatures, &xbuf->header.xfeatures);
+
+ return err;
+}
+
/*
* Update the value of PKRU register that was already pushed onto the signal frame.
*/
-static inline int update_pkru_in_sigframe(struct xregs_state __user *buf, u64 mask, u32 pkru)
+static inline int update_pkru_in_sigframe(struct xregs_state __user *buf, u32 pkru)
{
- u64 xstate_bv;
int err;
if (unlikely(!cpu_feature_enabled(X86_FEATURE_OSPKE)))
return 0;
/* Mark PKRU as in-use so that it is restored correctly. */
- xstate_bv = (mask & xfeatures_in_use()) | XFEATURE_MASK_PKRU;
-
- err = __put_user(xstate_bv, &buf->header.xfeatures);
+ err = set_xfeature_in_sigframe(buf, XFEATURE_MASK_PKRU);
if (err)
return err;
@@ -288,7 +298,7 @@ static inline int xsave_to_user_sigframe(struct xregs_state __user *buf, u32 pkr
* internally, e.g. PKRU. That's user space ABI and also required
* to allow the signal handler to modify PKRU.
*/
- struct fpstate *fpstate = current->thread.fpu.fpstate;
+ struct fpstate *fpstate = x86_task_fpu(current)->fpstate;
u64 mask = fpstate->user_xfeatures;
u32 lmask;
u32 hmask;
@@ -307,7 +317,7 @@ static inline int xsave_to_user_sigframe(struct xregs_state __user *buf, u32 pkr
clac();
if (!err)
- err = update_pkru_in_sigframe(buf, mask, pkru);
+ err = update_pkru_in_sigframe(buf, pkru);
return err;
}
@@ -322,7 +332,7 @@ static inline int xrstor_from_user_sigframe(struct xregs_state __user *buf, u64
u32 hmask = mask >> 32;
int err;
- xfd_validate_state(current->thread.fpu.fpstate, mask, true);
+ xfd_validate_state(x86_task_fpu(current)->fpstate, mask, true);
stac();
XSTATE_OP(XRSTOR, xstate, lmask, hmask, err);
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 962c3ce39323..9e6180777565 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -93,17 +93,12 @@ EXPORT_PER_CPU_SYMBOL_GPL(__tss_limit_invalid);
*/
int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
{
- /* init_task is not dynamically sized (incomplete FPU state) */
- if (unlikely(src == &init_task))
- memcpy_and_pad(dst, arch_task_struct_size, src, sizeof(init_task), 0);
- else
- memcpy(dst, src, arch_task_struct_size);
+ /* fpu_clone() will initialize the "dst_fpu" memory */
+ memcpy_and_pad(dst, arch_task_struct_size, src, sizeof(*dst), 0);
#ifdef CONFIG_VM86
dst->thread.vm86 = NULL;
#endif
- /* Drop the copied pointer to current's fpstate */
- dst->thread.fpu.fpstate = NULL;
return 0;
}
@@ -111,8 +106,8 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
#ifdef CONFIG_X86_64
void arch_release_task_struct(struct task_struct *tsk)
{
- if (fpu_state_size_dynamic())
- fpstate_free(&tsk->thread.fpu);
+ if (fpu_state_size_dynamic() && !(tsk->flags & (PF_KTHREAD | PF_USER_WORKER)))
+ fpstate_free(x86_task_fpu(tsk));
}
#endif
@@ -122,7 +117,6 @@ void arch_release_task_struct(struct task_struct *tsk)
void exit_thread(struct task_struct *tsk)
{
struct thread_struct *t = &tsk->thread;
- struct fpu *fpu = &t->fpu;
if (test_thread_flag(TIF_IO_BITMAP))
io_bitmap_exit(tsk);
@@ -130,7 +124,7 @@ void exit_thread(struct task_struct *tsk)
free_vm86(t);
shstk_free(tsk);
- fpu__drop(fpu);
+ fpu__drop(tsk);
}
static int set_new_tls(struct task_struct *p, unsigned long tls)
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 4636ef359973..9bd4fa694da5 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -160,8 +160,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
/* never put a printk in __switch_to... printk() calls wake_up*() indirectly */
- if (!test_tsk_thread_flag(prev_p, TIF_NEED_FPU_LOAD))
- switch_fpu_prepare(prev_p, cpu);
+ switch_fpu(prev_p, cpu);
/*
* Save away %gs. No need to save %fs, as it was saved on the
@@ -208,8 +207,6 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
raw_cpu_write(current_task, next_p);
- switch_fpu_finish(next_p);
-
/* Load the Intel cache allocation PQR MSR. */
resctrl_sched_in(next_p);
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 7196ca7048be..d55310d3133c 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -616,8 +616,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
WARN_ON_ONCE(IS_ENABLED(CONFIG_DEBUG_ENTRY) &&
this_cpu_read(hardirq_stack_inuse));
- if (!test_tsk_thread_flag(prev_p, TIF_NEED_FPU_LOAD))
- switch_fpu_prepare(prev_p, cpu);
+ switch_fpu(prev_p, cpu);
/* We must save %fs and %gs before load_TLS() because
* %fs and %gs may be cleared by load_TLS().
@@ -671,8 +670,6 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
raw_cpu_write(current_task, next_p);
raw_cpu_write(cpu_current_top_of_stack, task_top_of_stack(next_p));
- switch_fpu_finish(next_p);
-
/* Reload sp0. */
update_task_stack(next_p);
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index 5f441039b572..2404233336ab 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -255,7 +255,7 @@ static void
handle_signal(struct ksignal *ksig, struct pt_regs *regs)
{
bool stepping, failed;
- struct fpu *fpu = &current->thread.fpu;
+ struct fpu *fpu = x86_task_fpu(current);
if (v8086_mode(regs))
save_v86_state((struct kernel_vm86_regs *) regs, VM86_SIGNAL);
@@ -423,14 +423,14 @@ bool sigaltstack_size_valid(size_t ss_size)
if (!fpu_state_size_dynamic() && !strict_sigaltstack_size)
return true;
- fsize += current->group_leader->thread.fpu.perm.__user_state_size;
+ fsize += x86_task_fpu(current->group_leader)->perm.__user_state_size;
if (likely(ss_size > fsize))
return true;
if (strict_sigaltstack_size)
return ss_size > fsize;
- mask = current->group_leader->thread.fpu.perm.__state_perm;
+ mask = x86_task_fpu(current->group_leader)->perm.__state_perm;
if (mask & XFEATURE_MASK_USER_DYNAMIC)
return ss_size > fsize;
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index d67407c623f3..42e1d6cc48e9 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -1295,7 +1295,7 @@ DEFINE_IDTENTRY_RAW(exc_debug)
static void math_error(struct pt_regs *regs, int trapnr)
{
struct task_struct *task = current;
- struct fpu *fpu = &task->thread.fpu;
+ struct fpu *fpu = x86_task_fpu(task);
int si_code;
char *str = (trapnr == X86_TRAP_MF) ? "fpu exception" :
"simd exception";
diff --git a/arch/x86/math-emu/fpu_aux.c b/arch/x86/math-emu/fpu_aux.c
index d62662bdd460..5f253ae406b6 100644
--- a/arch/x86/math-emu/fpu_aux.c
+++ b/arch/x86/math-emu/fpu_aux.c
@@ -53,7 +53,7 @@ void fpstate_init_soft(struct swregs_state *soft)
void finit(void)
{
- fpstate_init_soft(&current->thread.fpu.fpstate->regs.soft);
+ fpstate_init_soft(&x86_task_fpu(current)->fpstate->regs.soft);
}
/*
diff --git a/arch/x86/math-emu/fpu_entry.c b/arch/x86/math-emu/fpu_entry.c
index 91c52ead1226..5034df617740 100644
--- a/arch/x86/math-emu/fpu_entry.c
+++ b/arch/x86/math-emu/fpu_entry.c
@@ -641,7 +641,7 @@ int fpregs_soft_set(struct task_struct *target,
unsigned int pos, unsigned int count,
const void *kbuf, const void __user *ubuf)
{
- struct swregs_state *s387 = &target->thread.fpu.fpstate->regs.soft;
+ struct swregs_state *s387 = &x86_task_fpu(target)->fpstate->regs.soft;
void *space = s387->st_space;
int ret;
int offset, other, i, tags, regnr, tag, newtop;
@@ -692,7 +692,7 @@ int fpregs_soft_get(struct task_struct *target,
const struct user_regset *regset,
struct membuf to)
{
- struct swregs_state *s387 = &target->thread.fpu.fpstate->regs.soft;
+ struct swregs_state *s387 = &x86_task_fpu(target)->fpstate->regs.soft;
const void *space = s387->st_space;
int offset = (S387->ftop & 7) * 10, other = 80 - offset;
diff --git a/arch/x86/math-emu/fpu_system.h b/arch/x86/math-emu/fpu_system.h
index eec3e4805c75..5e238e930fe3 100644
--- a/arch/x86/math-emu/fpu_system.h
+++ b/arch/x86/math-emu/fpu_system.h
@@ -73,7 +73,7 @@ static inline bool seg_writable(struct desc_struct *d)
return (d->type & SEG_TYPE_EXECUTE_MASK) == SEG_TYPE_WRITABLE;
}
-#define I387 (&current->thread.fpu.fpstate->regs)
+#define I387 (&x86_task_fpu(current)->fpstate->regs)
#define FPU_info (I387->soft.info)
#define FPU_CS (*(unsigned short *) &(FPU_info->regs->cs))
diff --git a/arch/x86/mm/extable.c b/arch/x86/mm/extable.c
index 51986e8a9d35..bf8dab18be97 100644
--- a/arch/x86/mm/extable.c
+++ b/arch/x86/mm/extable.c
@@ -111,7 +111,7 @@ static bool ex_handler_sgx(const struct exception_table_entry *fixup,
/*
* Handler for when we fail to restore a task's FPU state. We should never get
- * here because the FPU state of a task using the FPU (task->thread.fpu.state)
+ * here because the FPU state of a task using the FPU (struct fpu::fpstate)
* should always be valid. However, past bugs have allowed userspace to set
* reserved bits in the XSAVE area using PTRACE_SETREGSET or sys_rt_sigreturn().
* These caused XRSTOR to fail when switching to the task, leaking the FPU
diff --git a/include/linux/sched.h b/include/linux/sched.h
index f96ac1982893..4ecc0c6b1cb0 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1646,22 +1646,15 @@ struct task_struct {
struct user_event_mm *user_event_mm;
#endif
- /*
- * New fields for task_struct should be added above here, so that
- * they are included in the randomized portion of task_struct.
- */
- randomized_struct_fields_end
-
/* CPU-specific state of this task: */
struct thread_struct thread;
/*
- * WARNING: on x86, 'thread_struct' contains a variable-sized
- * structure. It *MUST* be at the end of 'task_struct'.
- *
- * Do not put anything below here!
+ * New fields for task_struct should be added above here, so that
+ * they are included in the randomized portion of task_struct.
*/
-};
+ randomized_struct_fields_end
+} __attribute__ ((aligned (64)));
#define TASK_REPORT_IDLE (TASK_REPORT + 1)
#define TASK_REPORT_MAX (TASK_REPORT_IDLE << 1)
diff --git a/tools/testing/selftests/x86/Makefile b/tools/testing/selftests/x86/Makefile
index 28422c32cc8f..f703fcfe9f7c 100644
--- a/tools/testing/selftests/x86/Makefile
+++ b/tools/testing/selftests/x86/Makefile
@@ -19,7 +19,7 @@ TARGETS_C_32BIT_ONLY := entry_from_vm86 test_syscall_vdso unwind_vdso \
test_FCMOV test_FCOMI test_FISTTP \
vdso_restorer
TARGETS_C_64BIT_ONLY := fsgsbase sysret_rip syscall_numbering \
- corrupt_xstate_header amx lam test_shadow_stack avx
+ corrupt_xstate_header amx lam test_shadow_stack avx apx
# Some selftests require 32bit support enabled also on 64bit systems
TARGETS_C_32BIT_NEEDED := ldt_gdt ptrace_syscall
@@ -136,3 +136,4 @@ $(OUTPUT)/nx_stack_64: CFLAGS += -Wl,-z,noexecstack
$(OUTPUT)/avx_64: CFLAGS += -mno-avx -mno-avx512f
$(OUTPUT)/amx_64: EXTRA_FILES += xstate.c
$(OUTPUT)/avx_64: EXTRA_FILES += xstate.c
+$(OUTPUT)/apx_64: EXTRA_FILES += xstate.c
diff --git a/tools/testing/selftests/x86/apx.c b/tools/testing/selftests/x86/apx.c
new file mode 100644
index 000000000000..d9c8d41b8c5a
--- /dev/null
+++ b/tools/testing/selftests/x86/apx.c
@@ -0,0 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#define _GNU_SOURCE
+
+#include "xstate.h"
+
+int main(void)
+{
+ test_xstate(XFEATURE_APX);
+}
diff --git a/tools/testing/selftests/x86/xstate.c b/tools/testing/selftests/x86/xstate.c
index 23c1d6c964ea..97fe4bd8bc77 100644
--- a/tools/testing/selftests/x86/xstate.c
+++ b/tools/testing/selftests/x86/xstate.c
@@ -31,7 +31,8 @@
(1 << XFEATURE_OPMASK) | \
(1 << XFEATURE_ZMM_Hi256) | \
(1 << XFEATURE_Hi16_ZMM) | \
- (1 << XFEATURE_XTILEDATA))
+ (1 << XFEATURE_XTILEDATA) | \
+ (1 << XFEATURE_APX))
static inline uint64_t xgetbv(uint32_t index)
{
diff --git a/tools/testing/selftests/x86/xstate.h b/tools/testing/selftests/x86/xstate.h
index 42af36ec852f..e91e3092b5d2 100644
--- a/tools/testing/selftests/x86/xstate.h
+++ b/tools/testing/selftests/x86/xstate.h
@@ -33,6 +33,7 @@ enum xfeature {
XFEATURE_RSRVD_COMP_16,
XFEATURE_XTILECFG,
XFEATURE_XTILEDATA,
+ XFEATURE_APX,
XFEATURE_MAX,
};
@@ -59,6 +60,7 @@ static const char *xfeature_names[] =
"unknown xstate feature",
"AMX Tile config",
"AMX Tile data",
+ "APX registers",
"unknown xstate feature",
};