diff options
Diffstat (limited to 'arch/x86/kernel/fpu')
-rw-r--r-- | arch/x86/kernel/fpu/core.c | 119 | ||||
-rw-r--r-- | arch/x86/kernel/fpu/init.c | 35 | ||||
-rw-r--r-- | arch/x86/kernel/fpu/regset.c | 2 | ||||
-rw-r--r-- | arch/x86/kernel/fpu/xstate.c | 188 |
4 files changed, 292 insertions, 52 deletions
diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index d25097c3fc1d..8e37cc8a539a 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -114,6 +114,10 @@ void __kernel_fpu_begin(void) kernel_fpu_disable(); if (fpu->fpregs_active) { + /* + * Ignore return value -- we don't care if reg state + * is clobbered. + */ copy_fpregs_to_fpstate(fpu); } else { this_cpu_write(fpu_fpregs_owner_ctx, NULL); @@ -189,8 +193,12 @@ void fpu__save(struct fpu *fpu) preempt_disable(); if (fpu->fpregs_active) { - if (!copy_fpregs_to_fpstate(fpu)) - fpregs_deactivate(fpu); + if (!copy_fpregs_to_fpstate(fpu)) { + if (use_eager_fpu()) + copy_kernel_to_fpregs(&fpu->state); + else + fpregs_deactivate(fpu); + } } preempt_enable(); } @@ -223,14 +231,15 @@ void fpstate_init(union fpregs_state *state) } EXPORT_SYMBOL_GPL(fpstate_init); -/* - * Copy the current task's FPU state to a new task's FPU context. - * - * In both the 'eager' and the 'lazy' case we save hardware registers - * directly to the destination buffer. - */ -static void fpu_copy(struct fpu *dst_fpu, struct fpu *src_fpu) +int fpu__copy(struct fpu *dst_fpu, struct fpu *src_fpu) { + dst_fpu->counter = 0; + dst_fpu->fpregs_active = 0; + dst_fpu->last_cpu = -1; + + if (!src_fpu->fpstate_active || !cpu_has_fpu) + return 0; + WARN_ON_FPU(src_fpu != ¤t->thread.fpu); /* @@ -243,10 +252,9 @@ static void fpu_copy(struct fpu *dst_fpu, struct fpu *src_fpu) /* * Save current FPU registers directly into the child * FPU context, without any memory-to-memory copying. - * - * If the FPU context got destroyed in the process (FNSAVE - * done on old CPUs) then copy it back into the source - * context and mark the current task for lazy restore. + * In lazy mode, if the FPU context isn't loaded into + * fpregs, CR0.TS will be set and do_device_not_available + * will load the FPU context. * * We have to do all this with preemption disabled, * mostly because of the FNSAVE case, because in that @@ -259,19 +267,13 @@ static void fpu_copy(struct fpu *dst_fpu, struct fpu *src_fpu) preempt_disable(); if (!copy_fpregs_to_fpstate(dst_fpu)) { memcpy(&src_fpu->state, &dst_fpu->state, xstate_size); - fpregs_deactivate(src_fpu); + + if (use_eager_fpu()) + copy_kernel_to_fpregs(&src_fpu->state); + else + fpregs_deactivate(src_fpu); } preempt_enable(); -} - -int fpu__copy(struct fpu *dst_fpu, struct fpu *src_fpu) -{ - dst_fpu->counter = 0; - dst_fpu->fpregs_active = 0; - dst_fpu->last_cpu = -1; - - if (src_fpu->fpstate_active && cpu_has_fpu) - fpu_copy(dst_fpu, src_fpu); return 0; } @@ -352,6 +354,69 @@ void fpu__activate_fpstate_write(struct fpu *fpu) } /* + * This function must be called before we write the current + * task's fpstate. + * + * This call gets the current FPU register state and moves + * it in to the 'fpstate'. Preemption is disabled so that + * no writes to the 'fpstate' can occur from context + * swiches. + * + * Must be followed by a fpu__current_fpstate_write_end(). + */ +void fpu__current_fpstate_write_begin(void) +{ + struct fpu *fpu = ¤t->thread.fpu; + + /* + * Ensure that the context-switching code does not write + * over the fpstate while we are doing our update. + */ + preempt_disable(); + + /* + * Move the fpregs in to the fpu's 'fpstate'. + */ + fpu__activate_fpstate_read(fpu); + + /* + * The caller is about to write to 'fpu'. Ensure that no + * CPU thinks that its fpregs match the fpstate. This + * ensures we will not be lazy and skip a XRSTOR in the + * future. + */ + fpu->last_cpu = -1; +} + +/* + * This function must be paired with fpu__current_fpstate_write_begin() + * + * This will ensure that the modified fpstate gets placed back in + * the fpregs if necessary. + * + * Note: This function may be called whether or not an _actual_ + * write to the fpstate occurred. + */ +void fpu__current_fpstate_write_end(void) +{ + struct fpu *fpu = ¤t->thread.fpu; + + /* + * 'fpu' now has an updated copy of the state, but the + * registers may still be out of date. Update them with + * an XRSTOR if they are active. + */ + if (fpregs_active()) + copy_kernel_to_fpregs(&fpu->state); + + /* + * Our update is done and the fpregs/fpstate are in sync + * if necessary. Context switches can happen again. + */ + preempt_enable(); +} + +/* * 'fpu__restore()' is called to copy FPU registers from * the FPU fpstate to the live hw registers and to activate * access to the hardware registers, so that FPU instructions @@ -409,8 +474,10 @@ static inline void copy_init_fpstate_to_fpregs(void) { if (use_xsave()) copy_kernel_to_xregs(&init_fpstate.xsave, -1); - else + else if (static_cpu_has(X86_FEATURE_FXSR)) copy_kernel_to_fxregs(&init_fpstate.fxsave); + else + copy_kernel_to_fregs(&init_fpstate.fsave); } /* @@ -423,7 +490,7 @@ void fpu__clear(struct fpu *fpu) { WARN_ON_FPU(fpu != ¤t->thread.fpu); /* Almost certainly an anomaly */ - if (!use_eager_fpu()) { + if (!use_eager_fpu() || !static_cpu_has(X86_FEATURE_FPU)) { /* FPU state will be reallocated lazily at the first use. */ fpu__drop(fpu); } else { diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c index 6d9f0a7ef4c8..54c86fffbf9f 100644 --- a/arch/x86/kernel/fpu/init.c +++ b/arch/x86/kernel/fpu/init.c @@ -78,13 +78,15 @@ static void fpu__init_system_early_generic(struct cpuinfo_x86 *c) cr0 &= ~(X86_CR0_TS | X86_CR0_EM); write_cr0(cr0); - asm volatile("fninit ; fnstsw %0 ; fnstcw %1" - : "+m" (fsw), "+m" (fcw)); + if (!test_bit(X86_FEATURE_FPU, (unsigned long *)cpu_caps_cleared)) { + asm volatile("fninit ; fnstsw %0 ; fnstcw %1" + : "+m" (fsw), "+m" (fcw)); - if (fsw == 0 && (fcw & 0x103f) == 0x003f) - set_cpu_cap(c, X86_FEATURE_FPU); - else - clear_cpu_cap(c, X86_FEATURE_FPU); + if (fsw == 0 && (fcw & 0x103f) == 0x003f) + set_cpu_cap(c, X86_FEATURE_FPU); + else + clear_cpu_cap(c, X86_FEATURE_FPU); + } #ifndef CONFIG_MATH_EMULATION if (!cpu_has_fpu) { @@ -132,7 +134,7 @@ static void __init fpu__init_system_generic(void) * Set up the legacy init FPU context. (xstate init might overwrite this * with a more modern format, if the CPU supports it.) */ - fpstate_init_fxstate(&init_fpstate.fxsave); + fpstate_init(&init_fpstate); fpu__init_system_mxcsr(); } @@ -260,7 +262,10 @@ static void __init fpu__init_system_xstate_size_legacy(void) * not only saved the restores along the way, but we also have the * FPU ready to be used for the original task. * - * 'eager' switching is used on modern CPUs, there we switch the FPU + * 'lazy' is deprecated because it's almost never a performance win + * and it's much more complicated than 'eager'. + * + * 'eager' switching is by default on all CPUs, there we switch the FPU * state during every context switch, regardless of whether the task * has used FPU instructions in that time slice or not. This is done * because modern FPU context saving instructions are able to optimize @@ -271,7 +276,7 @@ static void __init fpu__init_system_xstate_size_legacy(void) * to use 'eager' restores, if we detect that a task is using the FPU * frequently. See the fpu->counter logic in fpu/internal.h for that. ] */ -static enum { AUTO, ENABLE, DISABLE } eagerfpu = AUTO; +static enum { ENABLE, DISABLE } eagerfpu = ENABLE; /* * Find supported xfeatures based on cpu features and command-line input. @@ -300,12 +305,6 @@ u64 __init fpu__get_supported_xfeatures_mask(void) static void __init fpu__clear_eager_fpu_features(void) { setup_clear_cpu_cap(X86_FEATURE_MPX); - setup_clear_cpu_cap(X86_FEATURE_AVX); - setup_clear_cpu_cap(X86_FEATURE_AVX2); - setup_clear_cpu_cap(X86_FEATURE_AVX512F); - setup_clear_cpu_cap(X86_FEATURE_AVX512PF); - setup_clear_cpu_cap(X86_FEATURE_AVX512ER); - setup_clear_cpu_cap(X86_FEATURE_AVX512CD); } /* @@ -348,15 +347,9 @@ static void __init fpu__init_system_ctx_switch(void) */ static void __init fpu__init_parse_early_param(void) { - /* - * No need to check "eagerfpu=auto" again, since it is the - * initial default. - */ if (cmdline_find_option_bool(boot_command_line, "eagerfpu=off")) { eagerfpu = DISABLE; fpu__clear_eager_fpu_features(); - } else if (cmdline_find_option_bool(boot_command_line, "eagerfpu=on")) { - eagerfpu = ENABLE; } if (cmdline_find_option_bool(boot_command_line, "no387")) diff --git a/arch/x86/kernel/fpu/regset.c b/arch/x86/kernel/fpu/regset.c index 0bc3490420c5..8bd1c003942a 100644 --- a/arch/x86/kernel/fpu/regset.c +++ b/arch/x86/kernel/fpu/regset.c @@ -8,7 +8,7 @@ /* * The xstateregs_active() routine is the same as the regset_fpregs_active() routine, * as the "regset->n" for the xstate regset will be updated based on the feature - * capabilites supported by the xsave. + * capabilities supported by the xsave. */ int regset_fpregs_active(struct task_struct *target, const struct user_regset *regset) { diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index d425cda5ae6d..b48ef35b28d4 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -5,6 +5,7 @@ */ #include <linux/compat.h> #include <linux/cpu.h> +#include <linux/pkeys.h> #include <asm/fpu/api.h> #include <asm/fpu/internal.h> @@ -13,6 +14,11 @@ #include <asm/tlbflush.h> +/* + * Although we spell it out in here, the Processor Trace + * xfeature is completely unused. We use other mechanisms + * to save/restore PT state in Linux. + */ static const char *xfeature_names[] = { "x87 floating point registers" , @@ -23,6 +29,8 @@ static const char *xfeature_names[] = "AVX-512 opmask" , "AVX-512 Hi256" , "AVX-512 ZMM_Hi256" , + "Processor Trace (unused)" , + "Protection Keys User registers", "unknown xstate feature" , }; @@ -51,8 +59,12 @@ void fpu__xstate_clear_all_cpu_caps(void) setup_clear_cpu_cap(X86_FEATURE_AVX512PF); setup_clear_cpu_cap(X86_FEATURE_AVX512ER); setup_clear_cpu_cap(X86_FEATURE_AVX512CD); + setup_clear_cpu_cap(X86_FEATURE_AVX512DQ); + setup_clear_cpu_cap(X86_FEATURE_AVX512BW); + setup_clear_cpu_cap(X86_FEATURE_AVX512VL); setup_clear_cpu_cap(X86_FEATURE_MPX); setup_clear_cpu_cap(X86_FEATURE_XGETBV1); + setup_clear_cpu_cap(X86_FEATURE_PKU); } /* @@ -231,7 +243,7 @@ static void __init print_xstate_feature(u64 xstate_mask) const char *feature_name; if (cpu_has_xfeatures(xstate_mask, &feature_name)) - pr_info("x86/fpu: Supporting XSAVE feature 0x%02Lx: '%s'\n", xstate_mask, feature_name); + pr_info("x86/fpu: Supporting XSAVE feature 0x%03Lx: '%s'\n", xstate_mask, feature_name); } /* @@ -247,6 +259,7 @@ static void __init print_xstate_features(void) print_xstate_feature(XFEATURE_MASK_OPMASK); print_xstate_feature(XFEATURE_MASK_ZMM_Hi256); print_xstate_feature(XFEATURE_MASK_Hi16_ZMM); + print_xstate_feature(XFEATURE_MASK_PKRU); } /* @@ -463,6 +476,7 @@ static void check_xstate_against_struct(int nr) XCHECK_SZ(sz, nr, XFEATURE_OPMASK, struct avx_512_opmask_state); XCHECK_SZ(sz, nr, XFEATURE_ZMM_Hi256, struct avx_512_zmm_uppers_state); XCHECK_SZ(sz, nr, XFEATURE_Hi16_ZMM, struct avx_512_hi16_state); + XCHECK_SZ(sz, nr, XFEATURE_PKRU, struct pkru_state); /* * Make *SURE* to add any feature numbers in below if @@ -470,7 +484,8 @@ static void check_xstate_against_struct(int nr) * numbers. */ if ((nr < XFEATURE_YMM) || - (nr >= XFEATURE_MAX)) { + (nr >= XFEATURE_MAX) || + (nr == XFEATURE_PT_UNIMPLEMENTED_SO_FAR)) { WARN_ONCE(1, "no structure for xstate: %d\n", nr); XSTATE_WARN_ON(1); } @@ -668,6 +683,19 @@ void fpu__resume_cpu(void) } /* + * Given an xstate feature mask, calculate where in the xsave + * buffer the state is. Callers should ensure that the buffer + * is valid. + * + * Note: does not work for compacted buffers. + */ +void *__raw_xsave_addr(struct xregs_state *xsave, int xstate_feature_mask) +{ + int feature_nr = fls64(xstate_feature_mask) - 1; + + return (void *)xsave + xstate_comp_offsets[feature_nr]; +} +/* * Given the xsave area and a state inside, this function returns the * address of the state. * @@ -687,7 +715,6 @@ void fpu__resume_cpu(void) */ void *get_xsave_addr(struct xregs_state *xsave, int xstate_feature) { - int feature_nr = fls64(xstate_feature) - 1; /* * Do we even *have* xsave state? */ @@ -715,7 +742,7 @@ void *get_xsave_addr(struct xregs_state *xsave, int xstate_feature) if (!(xsave->header.xfeatures & xstate_feature)) return NULL; - return (void *)xsave + xstate_comp_offsets[feature_nr]; + return __raw_xsave_addr(xsave, xstate_feature); } EXPORT_SYMBOL_GPL(get_xsave_addr); @@ -750,3 +777,156 @@ const void *get_xsave_field_ptr(int xsave_state) return get_xsave_addr(&fpu->state.xsave, xsave_state); } + + +/* + * Set xfeatures (aka XSTATE_BV) bit for a feature that we want + * to take out of its "init state". This will ensure that an + * XRSTOR actually restores the state. + */ +static void fpu__xfeature_set_non_init(struct xregs_state *xsave, + int xstate_feature_mask) +{ + xsave->header.xfeatures |= xstate_feature_mask; +} + +/* + * This function is safe to call whether the FPU is in use or not. + * + * Note that this only works on the current task. + * + * Inputs: + * @xsave_state: state which is defined in xsave.h (e.g. XFEATURE_MASK_FP, + * XFEATURE_MASK_SSE, etc...) + * @xsave_state_ptr: a pointer to a copy of the state that you would + * like written in to the current task's FPU xsave state. This pointer + * must not be located in the current tasks's xsave area. + * Output: + * address of the state in the xsave area or NULL if the state + * is not present or is in its 'init state'. + */ +static void fpu__xfeature_set_state(int xstate_feature_mask, + void *xstate_feature_src, size_t len) +{ + struct xregs_state *xsave = ¤t->thread.fpu.state.xsave; + struct fpu *fpu = ¤t->thread.fpu; + void *dst; + + if (!boot_cpu_has(X86_FEATURE_XSAVE)) { + WARN_ONCE(1, "%s() attempted with no xsave support", __func__); + return; + } + + /* + * Tell the FPU code that we need the FPU state to be in + * 'fpu' (not in the registers), and that we need it to + * be stable while we write to it. + */ + fpu__current_fpstate_write_begin(); + + /* + * This method *WILL* *NOT* work for compact-format + * buffers. If the 'xstate_feature_mask' is unset in + * xcomp_bv then we may need to move other feature state + * "up" in the buffer. + */ + if (xsave->header.xcomp_bv & xstate_feature_mask) { + WARN_ON_ONCE(1); + goto out; + } + + /* find the location in the xsave buffer of the desired state */ + dst = __raw_xsave_addr(&fpu->state.xsave, xstate_feature_mask); + + /* + * Make sure that the pointer being passed in did not + * come from the xsave buffer itself. + */ + WARN_ONCE(xstate_feature_src == dst, "set from xsave buffer itself"); + + /* put the caller-provided data in the location */ + memcpy(dst, xstate_feature_src, len); + + /* + * Mark the xfeature so that the CPU knows there is state + * in the buffer now. + */ + fpu__xfeature_set_non_init(xsave, xstate_feature_mask); +out: + /* + * We are done writing to the 'fpu'. Reenable preeption + * and (possibly) move the fpstate back in to the fpregs. + */ + fpu__current_fpstate_write_end(); +} + +#define NR_VALID_PKRU_BITS (CONFIG_NR_PROTECTION_KEYS * 2) +#define PKRU_VALID_MASK (NR_VALID_PKRU_BITS - 1) + +/* + * This will go out and modify the XSAVE buffer so that PKRU is + * set to a particular state for access to 'pkey'. + * + * PKRU state does affect kernel access to user memory. We do + * not modfiy PKRU *itself* here, only the XSAVE state that will + * be restored in to PKRU when we return back to userspace. + */ +int arch_set_user_pkey_access(struct task_struct *tsk, int pkey, + unsigned long init_val) +{ + struct xregs_state *xsave = &tsk->thread.fpu.state.xsave; + struct pkru_state *old_pkru_state; + struct pkru_state new_pkru_state; + int pkey_shift = (pkey * PKRU_BITS_PER_PKEY); + u32 new_pkru_bits = 0; + + /* + * This check implies XSAVE support. OSPKE only gets + * set if we enable XSAVE and we enable PKU in XCR0. + */ + if (!boot_cpu_has(X86_FEATURE_OSPKE)) + return -EINVAL; + + /* Set the bits we need in PKRU */ + if (init_val & PKEY_DISABLE_ACCESS) + new_pkru_bits |= PKRU_AD_BIT; + if (init_val & PKEY_DISABLE_WRITE) + new_pkru_bits |= PKRU_WD_BIT; + + /* Shift the bits in to the correct place in PKRU for pkey. */ + new_pkru_bits <<= pkey_shift; + + /* Locate old copy of the state in the xsave buffer */ + old_pkru_state = get_xsave_addr(xsave, XFEATURE_MASK_PKRU); + + /* + * When state is not in the buffer, it is in the init + * state, set it manually. Otherwise, copy out the old + * state. + */ + if (!old_pkru_state) + new_pkru_state.pkru = 0; + else + new_pkru_state.pkru = old_pkru_state->pkru; + + /* mask off any old bits in place */ + new_pkru_state.pkru &= ~((PKRU_AD_BIT|PKRU_WD_BIT) << pkey_shift); + /* Set the newly-requested bits */ + new_pkru_state.pkru |= new_pkru_bits; + + /* + * We could theoretically live without zeroing pkru.pad. + * The current XSAVE feature state definition says that + * only bytes 0->3 are used. But we do not want to + * chance leaking kernel stack out to userspace in case a + * memcpy() of the whole xsave buffer was done. + * + * They're in the same cacheline anyway. + */ + new_pkru_state.pad = 0; + + fpu__xfeature_set_state(XFEATURE_MASK_PKRU, &new_pkru_state, + sizeof(new_pkru_state)); + + return 0; +} |