summaryrefslogtreecommitdiff
path: root/arch/x86/include/asm/fpu
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/include/asm/fpu')
-rw-r--r--arch/x86/include/asm/fpu/api.h10
-rw-r--r--arch/x86/include/asm/fpu/internal.h139
-rw-r--r--arch/x86/include/asm/fpu/types.h34
-rw-r--r--arch/x86/include/asm/fpu/xstate.h17
4 files changed, 48 insertions, 152 deletions
diff --git a/arch/x86/include/asm/fpu/api.h b/arch/x86/include/asm/fpu/api.h
index 1429a7c736db..0877ae018fc9 100644
--- a/arch/x86/include/asm/fpu/api.h
+++ b/arch/x86/include/asm/fpu/api.h
@@ -27,16 +27,6 @@ extern void kernel_fpu_end(void);
extern bool irq_fpu_usable(void);
/*
- * Some instructions like VIA's padlock instructions generate a spurious
- * DNA fault but don't modify SSE registers. And these instructions
- * get used from interrupt context as well. To prevent these kernel instructions
- * in interrupt context interacting wrongly with other user/kernel fpu usage, we
- * should use them only in the context of irq_ts_save/restore()
- */
-extern int irq_ts_save(void);
-extern void irq_ts_restore(int TS_state);
-
-/*
* Query the presence of one or more xfeatures. Works on any legacy CPU as well.
*
* If 'feature_name' is set then put a human-readable description of
diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h
index 2737366ea583..d4a684997497 100644
--- a/arch/x86/include/asm/fpu/internal.h
+++ b/arch/x86/include/asm/fpu/internal.h
@@ -60,11 +60,6 @@ extern u64 fpu__get_supported_xfeatures_mask(void);
/*
* FPU related CPU feature flag helper routines:
*/
-static __always_inline __pure bool use_eager_fpu(void)
-{
- return static_cpu_has(X86_FEATURE_EAGER_FPU);
-}
-
static __always_inline __pure bool use_xsaveopt(void)
{
return static_cpu_has(X86_FEATURE_XSAVEOPT);
@@ -484,42 +479,42 @@ extern int copy_fpstate_to_sigframe(void __user *buf, void __user *fp, int size)
DECLARE_PER_CPU(struct fpu *, fpu_fpregs_owner_ctx);
/*
- * Must be run with preemption disabled: this clears the fpu_fpregs_owner_ctx,
- * on this CPU.
+ * The in-register FPU state for an FPU context on a CPU is assumed to be
+ * valid if the fpu->last_cpu matches the CPU, and the fpu_fpregs_owner_ctx
+ * matches the FPU.
*
- * This will disable any lazy FPU state restore of the current FPU state,
- * but if the current thread owns the FPU, it will still be saved by.
+ * If the FPU register state is valid, the kernel can skip restoring the
+ * FPU state from memory.
+ *
+ * Any code that clobbers the FPU registers or updates the in-memory
+ * FPU state for a task MUST let the rest of the kernel know that the
+ * FPU registers are no longer valid for this task.
+ *
+ * Either one of these invalidation functions is enough. Invalidate
+ * a resource you control: CPU if using the CPU for something else
+ * (with preemption disabled), FPU for the current task, or a task that
+ * is prevented from running by the current task.
*/
-static inline void __cpu_disable_lazy_restore(unsigned int cpu)
+static inline void __cpu_invalidate_fpregs_state(void)
{
- per_cpu(fpu_fpregs_owner_ctx, cpu) = NULL;
+ __this_cpu_write(fpu_fpregs_owner_ctx, NULL);
}
-static inline int fpu_want_lazy_restore(struct fpu *fpu, unsigned int cpu)
-{
- return fpu == this_cpu_read_stable(fpu_fpregs_owner_ctx) && cpu == fpu->last_cpu;
-}
-
-
-/*
- * Wrap lazy FPU TS handling in a 'hw fpregs activation/deactivation'
- * idiom, which is then paired with the sw-flag (fpregs_active) later on:
- */
-
-static inline void __fpregs_activate_hw(void)
+static inline void __fpu_invalidate_fpregs_state(struct fpu *fpu)
{
- if (!use_eager_fpu())
- clts();
+ fpu->last_cpu = -1;
}
-static inline void __fpregs_deactivate_hw(void)
+static inline int fpregs_state_valid(struct fpu *fpu, unsigned int cpu)
{
- if (!use_eager_fpu())
- stts();
+ return fpu == this_cpu_read_stable(fpu_fpregs_owner_ctx) && cpu == fpu->last_cpu;
}
-/* Must be paired with an 'stts' (fpregs_deactivate_hw()) after! */
-static inline void __fpregs_deactivate(struct fpu *fpu)
+/*
+ * These generally need preemption protection to work,
+ * do try to avoid using these on their own:
+ */
+static inline void fpregs_deactivate(struct fpu *fpu)
{
WARN_ON_FPU(!fpu->fpregs_active);
@@ -528,8 +523,7 @@ static inline void __fpregs_deactivate(struct fpu *fpu)
trace_x86_fpu_regs_deactivated(fpu);
}
-/* Must be paired with a 'clts' (fpregs_activate_hw()) before! */
-static inline void __fpregs_activate(struct fpu *fpu)
+static inline void fpregs_activate(struct fpu *fpu)
{
WARN_ON_FPU(fpu->fpregs_active);
@@ -554,51 +548,19 @@ static inline int fpregs_active(void)
}
/*
- * Encapsulate the CR0.TS handling together with the
- * software flag.
- *
- * These generally need preemption protection to work,
- * do try to avoid using these on their own.
- */
-static inline void fpregs_activate(struct fpu *fpu)
-{
- __fpregs_activate_hw();
- __fpregs_activate(fpu);
-}
-
-static inline void fpregs_deactivate(struct fpu *fpu)
-{
- __fpregs_deactivate(fpu);
- __fpregs_deactivate_hw();
-}
-
-/*
* FPU state switching for scheduling.
*
* This is a two-stage process:
*
- * - switch_fpu_prepare() saves the old state and
- * sets the new state of the CR0.TS bit. This is
- * done within the context of the old process.
+ * - switch_fpu_prepare() saves the old state.
+ * This is done within the context of the old process.
*
* - switch_fpu_finish() restores the new state as
* necessary.
*/
-typedef struct { int preload; } fpu_switch_t;
-
-static inline fpu_switch_t
-switch_fpu_prepare(struct fpu *old_fpu, struct fpu *new_fpu, int cpu)
+static inline void
+switch_fpu_prepare(struct fpu *old_fpu, int cpu)
{
- fpu_switch_t fpu;
-
- /*
- * If the task has used the math, pre-load the FPU on xsave processors
- * or if the past 5 consecutive context-switches used math.
- */
- fpu.preload = static_cpu_has(X86_FEATURE_FPU) &&
- new_fpu->fpstate_active &&
- (use_eager_fpu() || new_fpu->counter > 5);
-
if (old_fpu->fpregs_active) {
if (!copy_fpregs_to_fpstate(old_fpu))
old_fpu->last_cpu = -1;
@@ -608,29 +570,8 @@ switch_fpu_prepare(struct fpu *old_fpu, struct fpu *new_fpu, int cpu)
/* But leave fpu_fpregs_owner_ctx! */
old_fpu->fpregs_active = 0;
trace_x86_fpu_regs_deactivated(old_fpu);
-
- /* Don't change CR0.TS if we just switch! */
- if (fpu.preload) {
- new_fpu->counter++;
- __fpregs_activate(new_fpu);
- trace_x86_fpu_regs_activated(new_fpu);
- prefetch(&new_fpu->state);
- } else {
- __fpregs_deactivate_hw();
- }
- } else {
- old_fpu->counter = 0;
+ } else
old_fpu->last_cpu = -1;
- if (fpu.preload) {
- new_fpu->counter++;
- if (fpu_want_lazy_restore(new_fpu, cpu))
- fpu.preload = 0;
- else
- prefetch(&new_fpu->state);
- fpregs_activate(new_fpu);
- }
- }
- return fpu;
}
/*
@@ -638,15 +579,19 @@ switch_fpu_prepare(struct fpu *old_fpu, struct fpu *new_fpu, int cpu)
*/
/*
- * By the time this gets called, we've already cleared CR0.TS and
- * given the process the FPU if we are going to preload the FPU
- * state - all we need to do is to conditionally restore the register
- * state itself.
+ * Set up the userspace FPU context for the new task, if the task
+ * has used the FPU.
*/
-static inline void switch_fpu_finish(struct fpu *new_fpu, fpu_switch_t fpu_switch)
+static inline void switch_fpu_finish(struct fpu *new_fpu, int cpu)
{
- if (fpu_switch.preload)
- copy_kernel_to_fpregs(&new_fpu->state);
+ bool preload = static_cpu_has(X86_FEATURE_FPU) &&
+ new_fpu->fpstate_active;
+
+ if (preload) {
+ if (!fpregs_state_valid(new_fpu, cpu))
+ copy_kernel_to_fpregs(&new_fpu->state);
+ fpregs_activate(new_fpu);
+ }
}
/*
diff --git a/arch/x86/include/asm/fpu/types.h b/arch/x86/include/asm/fpu/types.h
index 48df486b02f9..3c80f5b9c09d 100644
--- a/arch/x86/include/asm/fpu/types.h
+++ b/arch/x86/include/asm/fpu/types.h
@@ -322,17 +322,6 @@ struct fpu {
unsigned char fpregs_active;
/*
- * @counter:
- *
- * This counter contains the number of consecutive context switches
- * during which the FPU stays used. If this is over a threshold, the
- * lazy FPU restore logic becomes eager, to save the trap overhead.
- * This is an unsigned char so that after 256 iterations the counter
- * wraps and the context switch behavior turns lazy again; this is to
- * deal with bursty apps that only use the FPU for a short time:
- */
- unsigned char counter;
- /*
* @state:
*
* In-memory copy of all FPU registers that we save/restore
@@ -340,29 +329,6 @@ struct fpu {
* the registers in the FPU are more recent than this state
* copy. If the task context-switches away then they get
* saved here and represent the FPU state.
- *
- * After context switches there may be a (short) time period
- * during which the in-FPU hardware registers are unchanged
- * and still perfectly match this state, if the tasks
- * scheduled afterwards are not using the FPU.
- *
- * This is the 'lazy restore' window of optimization, which
- * we track though 'fpu_fpregs_owner_ctx' and 'fpu->last_cpu'.
- *
- * We detect whether a subsequent task uses the FPU via setting
- * CR0::TS to 1, which causes any FPU use to raise a #NM fault.
- *
- * During this window, if the task gets scheduled again, we
- * might be able to skip having to do a restore from this
- * memory buffer to the hardware registers - at the cost of
- * incurring the overhead of #NM fault traps.
- *
- * Note that on modern CPUs that support the XSAVEOPT (or other
- * optimized XSAVE instructions), we don't use #NM traps anymore,
- * as the hardware can track whether FPU registers need saving
- * or not. On such CPUs we activate the non-lazy ('eagerfpu')
- * logic, which unconditionally saves/restores all FPU state
- * across context switches. (if FPU state exists.)
*/
union fpregs_state state;
/*
diff --git a/arch/x86/include/asm/fpu/xstate.h b/arch/x86/include/asm/fpu/xstate.h
index 430bacf73074..1b2799e0699a 100644
--- a/arch/x86/include/asm/fpu/xstate.h
+++ b/arch/x86/include/asm/fpu/xstate.h
@@ -21,21 +21,16 @@
/* Supervisor features */
#define XFEATURE_MASK_SUPERVISOR (XFEATURE_MASK_PT)
-/* Supported features which support lazy state saving */
-#define XFEATURE_MASK_LAZY (XFEATURE_MASK_FP | \
+/* All currently supported features */
+#define XCNTXT_MASK (XFEATURE_MASK_FP | \
XFEATURE_MASK_SSE | \
XFEATURE_MASK_YMM | \
XFEATURE_MASK_OPMASK | \
XFEATURE_MASK_ZMM_Hi256 | \
- XFEATURE_MASK_Hi16_ZMM)
-
-/* Supported features which require eager state saving */
-#define XFEATURE_MASK_EAGER (XFEATURE_MASK_BNDREGS | \
- XFEATURE_MASK_BNDCSR | \
- XFEATURE_MASK_PKRU)
-
-/* All currently supported features */
-#define XCNTXT_MASK (XFEATURE_MASK_LAZY | XFEATURE_MASK_EAGER)
+ XFEATURE_MASK_Hi16_ZMM | \
+ XFEATURE_MASK_PKRU | \
+ XFEATURE_MASK_BNDREGS | \
+ XFEATURE_MASK_BNDCSR)
#ifdef CONFIG_X86_64
#define REX_PREFIX "0x48, "