diff options
Diffstat (limited to 'arch/x86/include/asm')
58 files changed, 784 insertions, 893 deletions
diff --git a/arch/x86/include/asm/GEN-for-each-reg.h b/arch/x86/include/asm/GEN-for-each-reg.h index 1b07fb102c4e..07949102a08d 100644 --- a/arch/x86/include/asm/GEN-for-each-reg.h +++ b/arch/x86/include/asm/GEN-for-each-reg.h @@ -1,11 +1,16 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * These are in machine order; things rely on that. + */ #ifdef CONFIG_64BIT GEN(rax) -GEN(rbx) GEN(rcx) GEN(rdx) +GEN(rbx) +GEN(rsp) +GEN(rbp) GEN(rsi) GEN(rdi) -GEN(rbp) GEN(r8) GEN(r9) GEN(r10) @@ -16,10 +21,11 @@ GEN(r14) GEN(r15) #else GEN(eax) -GEN(ebx) GEN(ecx) GEN(edx) +GEN(ebx) +GEN(esp) +GEN(ebp) GEN(esi) GEN(edi) -GEN(ebp) #endif diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h index a3c2315aca12..58eee6402832 100644 --- a/arch/x86/include/asm/alternative.h +++ b/arch/x86/include/asm/alternative.h @@ -75,6 +75,7 @@ extern int alternatives_patched; extern void alternative_instructions(void); extern void apply_alternatives(struct alt_instr *start, struct alt_instr *end); +extern void apply_retpolines(s32 *start, s32 *end); struct module; diff --git a/arch/x86/include/asm/asm-prototypes.h b/arch/x86/include/asm/asm-prototypes.h index 4cb726c71ed8..8f80de627c60 100644 --- a/arch/x86/include/asm/asm-prototypes.h +++ b/arch/x86/include/asm/asm-prototypes.h @@ -17,21 +17,3 @@ extern void cmpxchg8b_emu(void); #endif -#ifdef CONFIG_RETPOLINE - -#undef GEN -#define GEN(reg) \ - extern asmlinkage void __x86_indirect_thunk_ ## reg (void); -#include <asm/GEN-for-each-reg.h> - -#undef GEN -#define GEN(reg) \ - extern asmlinkage void __x86_indirect_alt_call_ ## reg (void); -#include <asm/GEN-for-each-reg.h> - -#undef GEN -#define GEN(reg) \ - extern asmlinkage void __x86_indirect_alt_jmp_ ## reg (void); -#include <asm/GEN-for-each-reg.h> - -#endif /* CONFIG_RETPOLINE */ diff --git a/arch/x86/include/asm/asm.h b/arch/x86/include/asm/asm.h index 3ad3da9a7d97..3a168483bc8e 100644 --- a/arch/x86/include/asm/asm.h +++ b/arch/x86/include/asm/asm.h @@ -6,11 +6,13 @@ # define __ASM_FORM(x, ...) x,## __VA_ARGS__ # define __ASM_FORM_RAW(x, ...) x,## __VA_ARGS__ # define __ASM_FORM_COMMA(x, ...) x,## __VA_ARGS__, +# define __ASM_REGPFX % #else #include <linux/stringify.h> # define __ASM_FORM(x, ...) " " __stringify(x,##__VA_ARGS__) " " # define __ASM_FORM_RAW(x, ...) __stringify(x,##__VA_ARGS__) # define __ASM_FORM_COMMA(x, ...) " " __stringify(x,##__VA_ARGS__) "," +# define __ASM_REGPFX %% #endif #define _ASM_BYTES(x, ...) __ASM_FORM(.byte x,##__VA_ARGS__ ;) @@ -49,6 +51,9 @@ #define _ASM_SI __ASM_REG(si) #define _ASM_DI __ASM_REG(di) +/* Adds a (%rip) suffix on 64 bits only; for immediate memory references */ +#define _ASM_RIP(x) __ASM_SEL_RAW(x, x (__ASM_REGPFX rip)) + #ifndef __x86_64__ /* 32 bit */ @@ -122,28 +127,19 @@ #ifdef __KERNEL__ +# include <asm/extable_fixup_types.h> + /* Exception table entry */ #ifdef __ASSEMBLY__ -# define _ASM_EXTABLE_HANDLE(from, to, handler) \ + +# define _ASM_EXTABLE_TYPE(from, to, type) \ .pushsection "__ex_table","a" ; \ .balign 4 ; \ .long (from) - . ; \ .long (to) - . ; \ - .long (handler) - . ; \ + .long type ; \ .popsection -# define _ASM_EXTABLE(from, to) \ - _ASM_EXTABLE_HANDLE(from, to, ex_handler_default) - -# define _ASM_EXTABLE_UA(from, to) \ - _ASM_EXTABLE_HANDLE(from, to, ex_handler_uaccess) - -# define _ASM_EXTABLE_CPY(from, to) \ - _ASM_EXTABLE_HANDLE(from, to, ex_handler_copy) - -# define _ASM_EXTABLE_FAULT(from, to) \ - _ASM_EXTABLE_HANDLE(from, to, ex_handler_fault) - # ifdef CONFIG_KPROBES # define _ASM_NOKPROBE(entry) \ .pushsection "_kprobe_blacklist","aw" ; \ @@ -155,27 +151,15 @@ # endif #else /* ! __ASSEMBLY__ */ -# define _EXPAND_EXTABLE_HANDLE(x) #x -# define _ASM_EXTABLE_HANDLE(from, to, handler) \ + +# define _ASM_EXTABLE_TYPE(from, to, type) \ " .pushsection \"__ex_table\",\"a\"\n" \ " .balign 4\n" \ " .long (" #from ") - .\n" \ " .long (" #to ") - .\n" \ - " .long (" _EXPAND_EXTABLE_HANDLE(handler) ") - .\n" \ + " .long " __stringify(type) " \n" \ " .popsection\n" -# define _ASM_EXTABLE(from, to) \ - _ASM_EXTABLE_HANDLE(from, to, ex_handler_default) - -# define _ASM_EXTABLE_UA(from, to) \ - _ASM_EXTABLE_HANDLE(from, to, ex_handler_uaccess) - -# define _ASM_EXTABLE_CPY(from, to) \ - _ASM_EXTABLE_HANDLE(from, to, ex_handler_copy) - -# define _ASM_EXTABLE_FAULT(from, to) \ - _ASM_EXTABLE_HANDLE(from, to, ex_handler_fault) - /* For C file, we already have NOKPROBE_SYMBOL macro */ /* @@ -188,6 +172,17 @@ register unsigned long current_stack_pointer asm(_ASM_SP); #define ASM_CALL_CONSTRAINT "+r" (current_stack_pointer) #endif /* __ASSEMBLY__ */ -#endif /* __KERNEL__ */ +#define _ASM_EXTABLE(from, to) \ + _ASM_EXTABLE_TYPE(from, to, EX_TYPE_DEFAULT) +#define _ASM_EXTABLE_UA(from, to) \ + _ASM_EXTABLE_TYPE(from, to, EX_TYPE_UACCESS) + +#define _ASM_EXTABLE_CPY(from, to) \ + _ASM_EXTABLE_TYPE(from, to, EX_TYPE_COPY) + +#define _ASM_EXTABLE_FAULT(from, to) \ + _ASM_EXTABLE_TYPE(from, to, EX_TYPE_FAULT) + +#endif /* __KERNEL__ */ #endif /* _ASM_X86_ASM_H */ diff --git a/arch/x86/include/asm/cpu_entry_area.h b/arch/x86/include/asm/cpu_entry_area.h index 3d52b094850a..dd5ea1bdf04c 100644 --- a/arch/x86/include/asm/cpu_entry_area.h +++ b/arch/x86/include/asm/cpu_entry_area.h @@ -10,6 +10,12 @@ #ifdef CONFIG_X86_64 +#ifdef CONFIG_AMD_MEM_ENCRYPT +#define VC_EXCEPTION_STKSZ EXCEPTION_STKSZ +#else +#define VC_EXCEPTION_STKSZ 0 +#endif + /* Macro to enforce the same ordering and stack sizes */ #define ESTACKS_MEMBERS(guardsize, optional_stack_size) \ char DF_stack_guard[guardsize]; \ @@ -28,7 +34,7 @@ /* The exception stacks' physical storage. No guard pages required */ struct exception_stacks { - ESTACKS_MEMBERS(0, 0) + ESTACKS_MEMBERS(0, VC_EXCEPTION_STKSZ) }; /* The effective cpu entry area mapping with guard pages. */ diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index 16a51e7288d5..1261842d006c 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h @@ -173,20 +173,25 @@ extern void clear_cpu_cap(struct cpuinfo_x86 *c, unsigned int bit); * means that the boot_cpu_has() variant is already fast enough for the * majority of cases and you should stick to using it as it is generally * only two instructions: a RIP-relative MOV and a TEST. + * + * Do not use an "m" constraint for [cap_byte] here: gcc doesn't know + * that this is only used on a fallback path and will sometimes cause + * it to manifest the address of boot_cpu_data in a register, fouling + * the mainline (post-initialization) code. */ static __always_inline bool _static_cpu_has(u16 bit) { asm_volatile_goto( ALTERNATIVE_TERNARY("jmp 6f", %P[feature], "", "jmp %l[t_no]") - ".section .altinstr_aux,\"ax\"\n" + ".pushsection .altinstr_aux,\"ax\"\n" "6:\n" - " testb %[bitnum],%[cap_byte]\n" + " testb %[bitnum]," _ASM_RIP(%P[cap_byte]) "\n" " jnz %l[t_yes]\n" " jmp %l[t_no]\n" - ".previous\n" + ".popsection\n" : : [feature] "i" (bit), [bitnum] "i" (1 << (bit & 7)), - [cap_byte] "m" (((const char *)boot_cpu_data.x86_capability)[bit >> 3]) + [cap_byte] "i" (&((const char *)boot_cpu_data.x86_capability)[bit >> 3]) : : t_yes, t_no); t_yes: return true; diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index d0ce5cfd3ac1..d5b5f2ab87a0 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -277,6 +277,7 @@ #define X86_FEATURE_XSAVEC (10*32+ 1) /* XSAVEC instruction */ #define X86_FEATURE_XGETBV1 (10*32+ 2) /* XGETBV with ECX = 1 instruction */ #define X86_FEATURE_XSAVES (10*32+ 3) /* XSAVES/XRSTORS instructions */ +#define X86_FEATURE_XFD (10*32+ 4) /* "" eXtended Feature Disabling */ /* * Extended auxiliary flags: Linux defined - for features scattered in various @@ -298,6 +299,7 @@ /* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */ #define X86_FEATURE_AVX_VNNI (12*32+ 4) /* AVX VNNI instructions */ #define X86_FEATURE_AVX512_BF16 (12*32+ 5) /* AVX512 BFLOAT16 instructions */ +#define X86_FEATURE_AMX_TILE (18*32+24) /* AMX tile Support */ /* AMD-defined CPU features, CPUID level 0x80000008 (EBX), word 13 */ #define X86_FEATURE_CLZERO (13*32+ 0) /* CLZERO instruction */ diff --git a/arch/x86/include/asm/entry-common.h b/arch/x86/include/asm/entry-common.h index 14ebd2196569..43184640b579 100644 --- a/arch/x86/include/asm/entry-common.h +++ b/arch/x86/include/asm/entry-common.h @@ -25,7 +25,7 @@ static __always_inline void arch_check_user_regs(struct pt_regs *regs) * For !SMAP hardware we patch out CLAC on entry. */ if (boot_cpu_has(X86_FEATURE_SMAP) || - (IS_ENABLED(CONFIG_64_BIT) && boot_cpu_has(X86_FEATURE_XENPV))) + (IS_ENABLED(CONFIG_64BIT) && boot_cpu_has(X86_FEATURE_XENPV))) mask |= X86_EFLAGS_AC; WARN_ON_ONCE(flags & mask); diff --git a/arch/x86/include/asm/extable.h b/arch/x86/include/asm/extable.h index 1f0cbc52937c..93f400eb728f 100644 --- a/arch/x86/include/asm/extable.h +++ b/arch/x86/include/asm/extable.h @@ -1,12 +1,18 @@ /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _ASM_X86_EXTABLE_H #define _ASM_X86_EXTABLE_H + +#include <asm/extable_fixup_types.h> + /* - * The exception table consists of triples of addresses relative to the - * exception table entry itself. The first address is of an instruction - * that is allowed to fault, the second is the target at which the program - * should continue. The third is a handler function to deal with the fault - * caused by the instruction in the first field. + * The exception table consists of two addresses relative to the + * exception table entry itself and a type selector field. + * + * The first address is of an instruction that is allowed to fault, the + * second is the target at which the program should continue. + * + * The type entry is used by fixup_exception() to select the handler to + * deal with the fault caused by the instruction in the first field. * * All the routines below use bits of fixup code that are out of line * with the main instruction path. This means when everything is well, @@ -15,7 +21,7 @@ */ struct exception_table_entry { - int insn, fixup, handler; + int insn, fixup, type; }; struct pt_regs; @@ -25,21 +31,27 @@ struct pt_regs; do { \ (a)->fixup = (b)->fixup + (delta); \ (b)->fixup = (tmp).fixup - (delta); \ - (a)->handler = (b)->handler + (delta); \ - (b)->handler = (tmp).handler - (delta); \ + (a)->type = (b)->type; \ + (b)->type = (tmp).type; \ } while (0) -enum handler_type { - EX_HANDLER_NONE, - EX_HANDLER_FAULT, - EX_HANDLER_UACCESS, - EX_HANDLER_OTHER -}; - extern int fixup_exception(struct pt_regs *regs, int trapnr, unsigned long error_code, unsigned long fault_addr); extern int fixup_bug(struct pt_regs *regs, int trapnr); -extern enum handler_type ex_get_fault_handler_type(unsigned long ip); +extern int ex_get_fixup_type(unsigned long ip); extern void early_fixup_exception(struct pt_regs *regs, int trapnr); +#ifdef CONFIG_X86_MCE +extern void ex_handler_msr_mce(struct pt_regs *regs, bool wrmsr); +#else +static inline void ex_handler_msr_mce(struct pt_regs *regs, bool wrmsr) { } +#endif + +#if defined(CONFIG_BPF_JIT) && defined(CONFIG_X86_64) +bool ex_handler_bpf(const struct exception_table_entry *x, struct pt_regs *regs); +#else +static inline bool ex_handler_bpf(const struct exception_table_entry *x, + struct pt_regs *regs) { return false; } +#endif + #endif diff --git a/arch/x86/include/asm/extable_fixup_types.h b/arch/x86/include/asm/extable_fixup_types.h new file mode 100644 index 000000000000..409524d5d2eb --- /dev/null +++ b/arch/x86/include/asm/extable_fixup_types.h @@ -0,0 +1,22 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_EXTABLE_FIXUP_TYPES_H +#define _ASM_X86_EXTABLE_FIXUP_TYPES_H + +#define EX_TYPE_NONE 0 +#define EX_TYPE_DEFAULT 1 +#define EX_TYPE_FAULT 2 +#define EX_TYPE_UACCESS 3 +#define EX_TYPE_COPY 4 +#define EX_TYPE_CLEAR_FS 5 +#define EX_TYPE_FPU_RESTORE 6 +#define EX_TYPE_WRMSR 7 +#define EX_TYPE_RDMSR 8 +#define EX_TYPE_BPF 9 + +#define EX_TYPE_WRMSR_IN_MCE 10 +#define EX_TYPE_RDMSR_IN_MCE 11 + +#define EX_TYPE_DEFAULT_MCE_SAFE 12 +#define EX_TYPE_FAULT_MCE_SAFE 13 + +#endif diff --git a/arch/x86/include/asm/fpu/api.h b/arch/x86/include/asm/fpu/api.h index 23bef08a8388..6053674f9132 100644 --- a/arch/x86/include/asm/fpu/api.h +++ b/arch/x86/include/asm/fpu/api.h @@ -12,6 +12,8 @@ #define _ASM_X86_FPU_API_H #include <linux/bottom_half.h> +#include <asm/fpu/types.h> + /* * Use kernel_fpu_begin/end() if you intend to use FPU in kernel context. It * disables preemption so be careful if you intend to use it for long periods @@ -48,9 +50,9 @@ static inline void kernel_fpu_begin(void) } /* - * Use fpregs_lock() while editing CPU's FPU registers or fpu->state. + * Use fpregs_lock() while editing CPU's FPU registers or fpu->fpstate. * A context switch will (and softirq might) save CPU's FPU registers to - * fpu->state and set TIF_NEED_FPU_LOAD leaving CPU's FPU registers in + * fpu->fpstate.regs and set TIF_NEED_FPU_LOAD leaving CPU's FPU registers in * a random state. * * local_bh_disable() protects against both preemption and soft interrupts @@ -106,6 +108,56 @@ extern int cpu_has_xfeatures(u64 xfeatures_mask, const char **feature_name); */ #define PASID_DISABLED 0 -static inline void update_pasid(void) { } +/* Trap handling */ +extern int fpu__exception_code(struct fpu *fpu, int trap_nr); +extern void fpu_sync_fpstate(struct fpu *fpu); +extern void fpu_reset_from_exception_fixup(void); + +/* Boot, hotplug and resume */ +extern void fpu__init_cpu(void); +extern void fpu__init_system(struct cpuinfo_x86 *c); +extern void fpu__init_check_bugs(void); +extern void fpu__resume_cpu(void); + +#ifdef CONFIG_MATH_EMULATION +extern void fpstate_init_soft(struct swregs_state *soft); +#else +static inline void fpstate_init_soft(struct swregs_state *soft) {} +#endif + +/* State tracking */ +DECLARE_PER_CPU(struct fpu *, fpu_fpregs_owner_ctx); + +/* Process cleanup */ +#ifdef CONFIG_X86_64 +extern void fpstate_free(struct fpu *fpu); +#else +static inline void fpstate_free(struct fpu *fpu) { } +#endif + +/* fpstate-related functions which are exported to KVM */ +extern void fpstate_clear_xstate_component(struct fpstate *fps, unsigned int xfeature); + +/* KVM specific functions */ +extern bool fpu_alloc_guest_fpstate(struct fpu_guest *gfpu); +extern void fpu_free_guest_fpstate(struct fpu_guest *gfpu); +extern int fpu_swap_kvm_fpstate(struct fpu_guest *gfpu, bool enter_guest); + +extern void fpu_copy_guest_fpstate_to_uabi(struct fpu_guest *gfpu, void *buf, unsigned int size, u32 pkru); +extern int fpu_copy_uabi_to_guest_fpstate(struct fpu_guest *gfpu, const void *buf, u64 xcr0, u32 *vpkru); + +static inline void fpstate_set_confidential(struct fpu_guest *gfpu) +{ + gfpu->fpstate->is_confidential = true; +} + +static inline bool fpstate_is_confidential(struct fpu_guest *gfpu) +{ + return gfpu->fpstate->is_confidential; +} + +/* prctl */ +struct task_struct; +extern long fpu_xstate_prctl(struct task_struct *tsk, int option, unsigned long arg2); #endif /* _ASM_X86_FPU_API_H */ diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h index 5a18694a89b2..e69de29bb2d1 100644 --- a/arch/x86/include/asm/fpu/internal.h +++ b/arch/x86/include/asm/fpu/internal.h @@ -1,540 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * Copyright (C) 1994 Linus Torvalds - * - * Pentium III FXSR, SSE support - * General FPU state handling cleanups - * Gareth Hughes <gareth@valinux.com>, May 2000 - * x86-64 work by Andi Kleen 2002 - */ - -#ifndef _ASM_X86_FPU_INTERNAL_H -#define _ASM_X86_FPU_INTERNAL_H - -#include <linux/compat.h> -#include <linux/sched.h> -#include <linux/slab.h> -#include <linux/mm.h> - -#include <asm/user.h> -#include <asm/fpu/api.h> -#include <asm/fpu/xstate.h> -#include <asm/fpu/xcr.h> -#include <asm/cpufeature.h> -#include <asm/trace/fpu.h> - -/* - * High level FPU state handling functions: - */ -extern int fpu__restore_sig(void __user *buf, int ia32_frame); -extern void fpu__drop(struct fpu *fpu); -extern void fpu__clear_user_states(struct fpu *fpu); -extern int fpu__exception_code(struct fpu *fpu, int trap_nr); - -extern void fpu_sync_fpstate(struct fpu *fpu); - -/* Clone and exit operations */ -extern int fpu_clone(struct task_struct *dst); -extern void fpu_flush_thread(void); - -/* - * Boot time FPU initialization functions: - */ -extern void fpu__init_cpu(void); -extern void fpu__init_system_xstate(void); -extern void fpu__init_cpu_xstate(void); -extern void fpu__init_system(struct cpuinfo_x86 *c); -extern void fpu__init_check_bugs(void); -extern void fpu__resume_cpu(void); - -/* - * Debugging facility: - */ -#ifdef CONFIG_X86_DEBUG_FPU -# define WARN_ON_FPU(x) WARN_ON_ONCE(x) -#else -# define WARN_ON_FPU(x) ({ (void)(x); 0; }) -#endif - -/* - * FPU related CPU feature flag helper routines: - */ -static __always_inline __pure bool use_xsaveopt(void) -{ - return static_cpu_has(X86_FEATURE_XSAVEOPT); -} - -static __always_inline __pure bool use_xsave(void) -{ - return static_cpu_has(X86_FEATURE_XSAVE); -} - -static __always_inline __pure bool use_fxsr(void) -{ - return static_cpu_has(X86_FEATURE_FXSR); -} - -/* - * fpstate handling functions: - */ - -extern union fpregs_state init_fpstate; - -extern void fpstate_init(union fpregs_state *state); -#ifdef CONFIG_MATH_EMULATION -extern void fpstate_init_soft(struct swregs_state *soft); -#else -static inline void fpstate_init_soft(struct swregs_state *soft) {} -#endif -extern void save_fpregs_to_fpstate(struct fpu *fpu); - -/* Returns 0 or the negated trap number, which results in -EFAULT for #PF */ -#define user_insn(insn, output, input...) \ -({ \ - int err; \ - \ - might_fault(); \ - \ - asm volatile(ASM_STAC "\n" \ - "1: " #insn "\n" \ - "2: " ASM_CLAC "\n" \ - ".section .fixup,\"ax\"\n" \ - "3: negl %%eax\n" \ - " jmp 2b\n" \ - ".previous\n" \ - _ASM_EXTABLE_FAULT(1b, 3b) \ - : [err] "=a" (err), output \ - : "0"(0), input); \ - err; \ -}) - -#define kernel_insn_err(insn, output, input...) \ -({ \ - int err; \ - asm volatile("1:" #insn "\n\t" \ - "2:\n" \ - ".section .fixup,\"ax\"\n" \ - "3: movl $-1,%[err]\n" \ - " jmp 2b\n" \ - ".previous\n" \ - _ASM_EXTABLE(1b, 3b) \ - : [err] "=r" (err), output \ - : "0"(0), input); \ - err; \ -}) - -#define kernel_insn(insn, output, input...) \ - asm volatile("1:" #insn "\n\t" \ - "2:\n" \ - _ASM_EXTABLE_HANDLE(1b, 2b, ex_handler_fprestore) \ - : output : input) - -static inline int fnsave_to_user_sigframe(struct fregs_state __user *fx) -{ - return user_insn(fnsave %[fx]; fwait, [fx] "=m" (*fx), "m" (*fx)); -} - -static inline int fxsave_to_user_sigframe(struct fxregs_state __user *fx) -{ - if (IS_ENABLED(CONFIG_X86_32)) - return user_insn(fxsave %[fx], [fx] "=m" (*fx), "m" (*fx)); - else - return user_insn(fxsaveq %[fx], [fx] "=m" (*fx), "m" (*fx)); - -} - -static inline void fxrstor(struct fxregs_state *fx) -{ - if (IS_ENABLED(CONFIG_X86_32)) - kernel_insn(fxrstor %[fx], "=m" (*fx), [fx] "m" (*fx)); - else - kernel_insn(fxrstorq %[fx], "=m" (*fx), [fx] "m" (*fx)); -} - -static inline int fxrstor_safe(struct fxregs_state *fx) -{ - if (IS_ENABLED(CONFIG_X86_32)) - return kernel_insn_err(fxrstor %[fx], "=m" (*fx), [fx] "m" (*fx)); - else - return kernel_insn_err(fxrstorq %[fx], "=m" (*fx), [fx] "m" (*fx)); -} - -static inline int fxrstor_from_user_sigframe(struct fxregs_state __user *fx) -{ - if (IS_ENABLED(CONFIG_X86_32)) - return user_insn(fxrstor %[fx], "=m" (*fx), [fx] "m" (*fx)); - else - return user_insn(fxrstorq %[fx], "=m" (*fx), [fx] "m" (*fx)); -} - -static inline void frstor(struct fregs_state *fx) -{ - kernel_insn(frstor %[fx], "=m" (*fx), [fx] "m" (*fx)); -} - -static inline int frstor_safe(struct fregs_state *fx) -{ - return kernel_insn_err(frstor %[fx], "=m" (*fx), [fx] "m" (*fx)); -} - -static inline int frstor_from_user_sigframe(struct fregs_state __user *fx) -{ - return user_insn(frstor %[fx], "=m" (*fx), [fx] "m" (*fx)); -} - -static inline void fxsave(struct fxregs_state *fx) -{ - if (IS_ENABLED(CONFIG_X86_32)) - asm volatile( "fxsave %[fx]" : [fx] "=m" (*fx)); - else - asm volatile("fxsaveq %[fx]" : [fx] "=m" (*fx)); -} - -/* These macros all use (%edi)/(%rdi) as the single memory argument. */ -#define XSAVE ".byte " REX_PREFIX "0x0f,0xae,0x27" -#define XSAVEOPT ".byte " REX_PREFIX "0x0f,0xae,0x37" -#define XSAVES ".byte " REX_PREFIX "0x0f,0xc7,0x2f" -#define XRSTOR ".byte " REX_PREFIX "0x0f,0xae,0x2f" -#define XRSTORS ".byte " REX_PREFIX "0x0f,0xc7,0x1f" - -/* - * After this @err contains 0 on success or the negated trap number when - * the operation raises an exception. For faults this results in -EFAULT. - */ -#define XSTATE_OP(op, st, lmask, hmask, err) \ - asm volatile("1:" op "\n\t" \ - "xor %[err], %[err]\n" \ - "2:\n\t" \ - ".pushsection .fixup,\"ax\"\n\t" \ - "3: negl %%eax\n\t" \ - "jmp 2b\n\t" \ - ".popsection\n\t" \ - _ASM_EXTABLE_FAULT(1b, 3b) \ - : [err] "=a" (err) \ - : "D" (st), "m" (*st), "a" (lmask), "d" (hmask) \ - : "memory") - -/* - * If XSAVES is enabled, it replaces XSAVEOPT because it supports a compact - * format and supervisor states in addition to modified optimization in - * XSAVEOPT. - * - * Otherwise, if XSAVEOPT is enabled, XSAVEOPT replaces XSAVE because XSAVEOPT - * supports modified optimization which is not supported by XSAVE. - * - * We use XSAVE as a fallback. - * - * The 661 label is defined in the ALTERNATIVE* macros as the address of the - * original instruction which gets replaced. We need to use it here as the - * address of the instruction where we might get an exception at. - */ -#define XSTATE_XSAVE(st, lmask, hmask, err) \ - asm volatile(ALTERNATIVE_2(XSAVE, \ - XSAVEOPT, X86_FEATURE_XSAVEOPT, \ - XSAVES, X86_FEATURE_XSAVES) \ - "\n" \ - "xor %[err], %[err]\n" \ - "3:\n" \ - ".pushsection .fixup,\"ax\"\n" \ - "4: movl $-2, %[err]\n" \ - "jmp 3b\n" \ - ".popsection\n" \ - _ASM_EXTABLE(661b, 4b) \ - : [err] "=r" (err) \ - : "D" (st), "m" (*st), "a" (lmask), "d" (hmask) \ - : "memory") - -/* - * Use XRSTORS to restore context if it is enabled. XRSTORS supports compact - * XSAVE area format. - */ -#define XSTATE_XRESTORE(st, lmask, hmask) \ - asm volatile(ALTERNATIVE(XRSTOR, \ - XRSTORS, X86_FEATURE_XSAVES) \ - "\n" \ - "3:\n" \ - _ASM_EXTABLE_HANDLE(661b, 3b, ex_handler_fprestore)\ - : \ - : "D" (st), "m" (*st), "a" (lmask), "d" (hmask) \ - : "memory") - -/* - * This function is called only during boot time when x86 caps are not set - * up and alternative can not be used yet. - */ -static inline void os_xrstor_booting(struct xregs_state *xstate) -{ - u64 mask = xfeatures_mask_fpstate(); - u32 lmask = mask; - u32 hmask = mask >> 32; - int err; - - WARN_ON(system_state != SYSTEM_BOOTING); - - if (boot_cpu_has(X86_FEATURE_XSAVES)) - XSTATE_OP(XRSTORS, xstate, lmask, hmask, err); - else - XSTATE_OP(XRSTOR, xstate, lmask, hmask, err); - - /* - * We should never fault when copying from a kernel buffer, and the FPU - * state we set at boot time should be valid. - */ - WARN_ON_FPU(err); -} - -/* - * Save processor xstate to xsave area. - * - * Uses either XSAVE or XSAVEOPT or XSAVES depending on the CPU features - * and command line options. The choice is permanent until the next reboot. - */ -static inline void os_xsave(struct xregs_state *xstate) -{ - u64 mask = xfeatures_mask_all; - u32 lmask = mask; - u32 hmask = mask >> 32; - int err; - - WARN_ON_FPU(!alternatives_patched); - - XSTATE_XSAVE(xstate, lmask, hmask, err); - - /* We should never fault when copying to a kernel buffer: */ - WARN_ON_FPU(err); -} - -/* - * Restore processor xstate from xsave area. - * - * Uses XRSTORS when XSAVES is used, XRSTOR otherwise. - */ -static inline void os_xrstor(struct xregs_state *xstate, u64 mask) -{ - u32 lmask = mask; - u32 hmask = mask >> 32; - - XSTATE_XRESTORE(xstate, lmask, hmask); -} - -/* - * Save xstate to user space xsave area. - * - * We don't use modified optimization because xrstor/xrstors might track - * a different application. - * - * We don't use compacted format xsave area for - * backward compatibility for old applications which don't understand - * compacted format of xsave area. - */ -static inline int xsave_to_user_sigframe(struct xregs_state __user *buf) -{ - /* - * Include the features which are not xsaved/rstored by the kernel - * internally, e.g. PKRU. That's user space ABI and also required - * to allow the signal handler to modify PKRU. - */ - u64 mask = xfeatures_mask_uabi(); - u32 lmask = mask; - u32 hmask = mask >> 32; - int err; - - /* - * Clear the xsave header first, so that reserved fields are - * initialized to zero. - */ - err = __clear_user(&buf->header, sizeof(buf->header)); - if (unlikely(err)) - return -EFAULT; - - stac(); - XSTATE_OP(XSAVE, buf, lmask, hmask, err); - clac(); - - return err; -} - -/* - * Restore xstate from user space xsave area. - */ -static inline int xrstor_from_user_sigframe(struct xregs_state __user *buf, u64 mask) -{ - struct xregs_state *xstate = ((__force struct xregs_state *)buf); - u32 lmask = mask; - u32 hmask = mask >> 32; - int err; - - stac(); - XSTATE_OP(XRSTOR, xstate, lmask, hmask, err); - clac(); - - return err; -} - -/* - * Restore xstate from kernel space xsave area, return an error code instead of - * an exception. - */ -static inline int os_xrstor_safe(struct xregs_state *xstate, u64 mask) -{ - u32 lmask = mask; - u32 hmask = mask >> 32; - int err; - - if (cpu_feature_enabled(X86_FEATURE_XSAVES)) - XSTATE_OP(XRSTORS, xstate, lmask, hmask, err); - else - XSTATE_OP(XRSTOR, xstate, lmask, hmask, err); - - return err; -} - -extern void __restore_fpregs_from_fpstate(union fpregs_state *fpstate, u64 mask); - -static inline void restore_fpregs_from_fpstate(union fpregs_state *fpstate) -{ - __restore_fpregs_from_fpstate(fpstate, xfeatures_mask_fpstate()); -} - -extern int copy_fpstate_to_sigframe(void __user *buf, void __user *fp, int size); - -/* - * FPU context switch related helper methods: - */ - -DECLARE_PER_CPU(struct fpu *, fpu_fpregs_owner_ctx); - -/* - * The in-register FPU state for an FPU context on a CPU is assumed to be - * valid if the fpu->last_cpu matches the CPU, and the fpu_fpregs_owner_ctx - * matches the FPU. - * - * If the FPU register state is valid, the kernel can skip restoring the - * FPU state from memory. - * - * Any code that clobbers the FPU registers or updates the in-memory - * FPU state for a task MUST let the rest of the kernel know that the - * FPU registers are no longer valid for this task. - * - * Either one of these invalidation functions is enough. Invalidate - * a resource you control: CPU if using the CPU for something else - * (with preemption disabled), FPU for the current task, or a task that - * is prevented from running by the current task. - */ -static inline void __cpu_invalidate_fpregs_state(void) -{ - __this_cpu_write(fpu_fpregs_owner_ctx, NULL); -} - -static inline void __fpu_invalidate_fpregs_state(struct fpu *fpu) -{ - fpu->last_cpu = -1; -} - -static inline int fpregs_state_valid(struct fpu *fpu, unsigned int cpu) -{ - return fpu == this_cpu_read(fpu_fpregs_owner_ctx) && cpu == fpu->last_cpu; -} - -/* - * These generally need preemption protection to work, - * do try to avoid using these on their own: - */ -static inline void fpregs_deactivate(struct fpu *fpu) -{ - this_cpu_write(fpu_fpregs_owner_ctx, NULL); - trace_x86_fpu_regs_deactivated(fpu); -} - -static inline void fpregs_activate(struct fpu *fpu) -{ - this_cpu_write(fpu_fpregs_owner_ctx, fpu); - trace_x86_fpu_regs_activated(fpu); -} - -/* Internal helper for switch_fpu_return() and signal frame setup */ -static inline void fpregs_restore_userregs(void) -{ - struct fpu *fpu = ¤t->thread.fpu; - int cpu = smp_processor_id(); - - if (WARN_ON_ONCE(current->flags & PF_KTHREAD)) - return; - - if (!fpregs_state_valid(fpu, cpu)) { - u64 mask; - - /* - * This restores _all_ xstate which has not been - * established yet. - * - * If PKRU is enabled, then the PKRU value is already - * correct because it was either set in switch_to() or in - * flush_thread(). So it is excluded because it might be - * not up to date in current->thread.fpu.xsave state. - */ - mask = xfeatures_mask_restore_user() | - xfeatures_mask_supervisor(); - __restore_fpregs_from_fpstate(&fpu->state, mask); - - fpregs_activate(fpu); - fpu->last_cpu = cpu; - } - clear_thread_flag(TIF_NEED_FPU_LOAD); -} - -/* - * FPU state switching for scheduling. - * - * This is a two-stage process: - * - * - switch_fpu_prepare() saves the old state. - * This is done within the context of the old process. - * - * - switch_fpu_finish() sets TIF_NEED_FPU_LOAD; the floating point state - * will get loaded on return to userspace, or when the kernel needs it. - * - * If TIF_NEED_FPU_LOAD is cleared then the CPU's FPU registers - * are saved in the current thread's FPU register state. - * - * If TIF_NEED_FPU_LOAD is set then CPU's FPU registers may not - * hold current()'s FPU registers. It is required to load the - * registers before returning to userland or using the content - * otherwise. - * - * The FPU context is only stored/restored for a user task and - * PF_KTHREAD is used to distinguish between kernel and user threads. - */ -static inline void switch_fpu_prepare(struct fpu *old_fpu, int cpu) -{ - if (static_cpu_has(X86_FEATURE_FPU) && !(current->flags & PF_KTHREAD)) { - save_fpregs_to_fpstate(old_fpu); - /* - * The save operation preserved register state, so the - * fpu_fpregs_owner_ctx is still @old_fpu. Store the - * current CPU number in @old_fpu, so the next return - * to user space can avoid the FPU register restore - * when is returns on the same CPU and still owns the - * context. - */ - old_fpu->last_cpu = cpu; - - trace_x86_fpu_regs_deactivated(old_fpu); - } -} - -/* - * Misc helper functions: - */ - -/* - * Delay loading of the complete FPU state until the return to userland. - * PKRU is handled separately. - */ -static inline void switch_fpu_finish(struct fpu *new_fpu) -{ - if (cpu_feature_enabled(X86_FEATURE_FPU)) - set_thread_flag(TIF_NEED_FPU_LOAD); -} - -#endif /* _ASM_X86_FPU_INTERNAL_H */ diff --git a/arch/x86/include/asm/fpu/sched.h b/arch/x86/include/asm/fpu/sched.h new file mode 100644 index 000000000000..99a8820e8cc4 --- /dev/null +++ b/arch/x86/include/asm/fpu/sched.h @@ -0,0 +1,68 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_FPU_SCHED_H +#define _ASM_X86_FPU_SCHED_H + +#include <linux/sched.h> + +#include <asm/cpufeature.h> +#include <asm/fpu/types.h> + +#include <asm/trace/fpu.h> + +extern void save_fpregs_to_fpstate(struct fpu *fpu); +extern void fpu__drop(struct fpu *fpu); +extern int fpu_clone(struct task_struct *dst, unsigned long clone_flags); +extern void fpu_flush_thread(void); + +/* + * FPU state switching for scheduling. + * + * This is a two-stage process: + * + * - switch_fpu_prepare() saves the old state. + * This is done within the context of the old process. + * + * - switch_fpu_finish() sets TIF_NEED_FPU_LOAD; the floating point state + * will get loaded on return to userspace, or when the kernel needs it. + * + * If TIF_NEED_FPU_LOAD is cleared then the CPU's FPU registers + * are saved in the current thread's FPU register state. + * + * If TIF_NEED_FPU_LOAD is set then CPU's FPU registers may not + * hold current()'s FPU registers. It is required to load the + * registers before returning to userland or using the content + * otherwise. + * + * The FPU context is only stored/restored for a user task and + * PF_KTHREAD is used to distinguish between kernel and user threads. + */ +static inline void switch_fpu_prepare(struct fpu *old_fpu, int cpu) +{ + if (cpu_feature_enabled(X86_FEATURE_FPU) && + !(current->flags & PF_KTHREAD)) { + save_fpregs_to_fpstate(old_fpu); + /* + * The save operation preserved register state, so the + * fpu_fpregs_owner_ctx is still @old_fpu. Store the + * current CPU number in @old_fpu, so the next return + * to user space can avoid the FPU register restore + * when is returns on the same CPU and still owns the + * context. + */ + old_fpu->last_cpu = cpu; + + trace_x86_fpu_regs_deactivated(old_fpu); + } +} + +/* + * Delay loading of the complete FPU state until the return to userland. + * PKRU is handled separately. + */ +static inline void switch_fpu_finish(void) +{ + if (cpu_feature_enabled(X86_FEATURE_FPU)) + set_thread_flag(TIF_NEED_FPU_LOAD); +} + +#endif /* _ASM_X86_FPU_SCHED_H */ diff --git a/arch/x86/include/asm/fpu/signal.h b/arch/x86/include/asm/fpu/signal.h index 8b6631dffefd..22b0273a8bf1 100644 --- a/arch/x86/include/asm/fpu/signal.h +++ b/arch/x86/include/asm/fpu/signal.h @@ -5,6 +5,11 @@ #ifndef _ASM_X86_FPU_SIGNAL_H #define _ASM_X86_FPU_SIGNAL_H +#include <linux/compat.h> +#include <linux/user.h> + +#include <asm/fpu/types.h> + #ifdef CONFIG_X86_64 # include <uapi/asm/sigcontext.h> # include <asm/user32.h> @@ -31,6 +36,12 @@ fpu__alloc_mathframe(unsigned long sp, int ia32_frame, unsigned long fpu__get_fpstate_size(void); -extern void fpu__init_prepare_fx_sw_frame(void); +extern bool copy_fpstate_to_sigframe(void __user *buf, void __user *fp, int size); +extern void fpu__clear_user_states(struct fpu *fpu); +extern bool fpu__restore_sig(void __user *buf, int ia32_frame); + +extern void restore_fpregs_from_fpstate(struct fpstate *fpstate, u64 mask); + +extern bool copy_fpstate_to_sigframe(void __user *buf, void __user *fp, int size); #endif /* _ASM_X86_FPU_SIGNAL_H */ diff --git a/arch/x86/include/asm/fpu/types.h b/arch/x86/include/asm/fpu/types.h index f5a38a5f3ae1..3c06c82ab355 100644 --- a/arch/x86/include/asm/fpu/types.h +++ b/arch/x86/include/asm/fpu/types.h @@ -120,6 +120,9 @@ enum xfeature { XFEATURE_RSRVD_COMP_13, XFEATURE_RSRVD_COMP_14, XFEATURE_LBR, + XFEATURE_RSRVD_COMP_16, + XFEATURE_XTILE_CFG, + XFEATURE_XTILE_DATA, XFEATURE_MAX, }; @@ -136,12 +139,21 @@ enum xfeature { #define XFEATURE_MASK_PKRU (1 << XFEATURE_PKRU) #define XFEATURE_MASK_PASID (1 << XFEATURE_PASID) #define XFEATURE_MASK_LBR (1 << XFEATURE_LBR) +#define XFEATURE_MASK_XTILE_CFG (1 << XFEATURE_XTILE_CFG) +#define XFEATURE_MASK_XTILE_DATA (1 << XFEATURE_XTILE_DATA) #define XFEATURE_MASK_FPSSE (XFEATURE_MASK_FP | XFEATURE_MASK_SSE) #define XFEATURE_MASK_AVX512 (XFEATURE_MASK_OPMASK \ | XFEATURE_MASK_ZMM_Hi256 \ | XFEATURE_MASK_Hi16_ZMM) +#ifdef CONFIG_X86_64 +# define XFEATURE_MASK_XTILE (XFEATURE_MASK_XTILE_DATA \ + | XFEATURE_MASK_XTILE_CFG) +#else +# define XFEATURE_MASK_XTILE (0) +#endif + #define FIRST_EXTENDED_XFEATURE XFEATURE_YMM struct reg_128_bit { @@ -153,6 +165,9 @@ struct reg_256_bit { struct reg_512_bit { u8 regbytes[512/8]; }; +struct reg_1024_byte { + u8 regbytes[1024]; +}; /* * State component 2: @@ -255,6 +270,23 @@ struct arch_lbr_state { u64 ler_to; u64 ler_info; struct lbr_entry entries[]; +}; + +/* + * State component 17: 64-byte tile configuration register. + */ +struct xtile_cfg { + u64 tcfg[8]; +} __packed; + +/* + * State component 18: 1KB tile data register. + * Each register represents 16 64-byte rows of the matrix + * data. But the number of registers depends on the actual + * implementation. + */ +struct xtile_data { + struct reg_1024_byte tmm; } __packed; /* @@ -309,6 +341,91 @@ union fpregs_state { u8 __padding[PAGE_SIZE]; }; +struct fpstate { + /* @kernel_size: The size of the kernel register image */ + unsigned int size; + + /* @user_size: The size in non-compacted UABI format */ + unsigned int user_size; + + /* @xfeatures: xfeatures for which the storage is sized */ + u64 xfeatures; + + /* @user_xfeatures: xfeatures valid in UABI buffers */ + u64 user_xfeatures; + + /* @xfd: xfeatures disabled to trap userspace use. */ + u64 xfd; + + /* @is_valloc: Indicator for dynamically allocated state */ + unsigned int is_valloc : 1; + + /* @is_guest: Indicator for guest state (KVM) */ + unsigned int is_guest : 1; + + /* + * @is_confidential: Indicator for KVM confidential mode. + * The FPU registers are restored by the + * vmentry firmware from encrypted guest + * memory. On vmexit the FPU registers are + * saved by firmware to encrypted guest memory + * and the registers are scrubbed before + * returning to the host. So there is no + * content which is worth saving and restoring. + * The fpstate has to be there so that + * preemption and softirq FPU usage works + * without special casing. + */ + unsigned int is_confidential : 1; + + /* @in_use: State is in use */ + unsigned int in_use : 1; + + /* @regs: The register state union for all supported formats */ + union fpregs_state regs; + + /* @regs is dynamically sized! Don't add anything after @regs! */ +} __aligned(64); + +struct fpu_state_perm { + /* + * @__state_perm: + * + * This bitmap indicates the permission for state components, which + * are available to a thread group. The permission prctl() sets the + * enabled state bits in thread_group_leader()->thread.fpu. + * + * All run time operations use the per thread information in the + * currently active fpu.fpstate which contains the xfeature masks + * and sizes for kernel and user space. + * + * This master permission field is only to be used when + * task.fpu.fpstate based checks fail to validate whether the task + * is allowed to expand it's xfeatures set which requires to + * allocate a larger sized fpstate buffer. + * + * Do not access this field directly. Use the provided helper + * function. Unlocked access is possible for quick checks. + */ + u64 __state_perm; + + /* + * @__state_size: + * + * The size required for @__state_perm. Only valid to access + * with sighand locked. + */ + unsigned int __state_size; + + /* + * @__user_state_size: + * + * The size required for @__state_perm user part. Only valid to + * access with sighand locked. + */ + unsigned int __user_state_size; +}; + /* * Highest level per task FPU state data structure that * contains the FPU register state plus various FPU @@ -337,19 +454,100 @@ struct fpu { unsigned long avx512_timestamp; /* - * @state: + * @fpstate: + * + * Pointer to the active struct fpstate. Initialized to + * point at @__fpstate below. + */ + struct fpstate *fpstate; + + /* + * @__task_fpstate: + * + * Pointer to an inactive struct fpstate. Initialized to NULL. Is + * used only for KVM support to swap out the regular task fpstate. + */ + struct fpstate *__task_fpstate; + + /* + * @perm: + * + * Permission related information + */ + struct fpu_state_perm perm; + + /* + * @__fpstate: * - * In-memory copy of all FPU registers that we save/restore - * over context switches. If the task is using the FPU then - * the registers in the FPU are more recent than this state - * copy. If the task context-switches away then they get - * saved here and represent the FPU state. + * Initial in-memory storage for FPU registers which are saved in + * context switch and when the kernel uses the FPU. The registers + * are restored from this storage on return to user space if they + * are not longer containing the tasks FPU register state. */ - union fpregs_state state; + struct fpstate __fpstate; /* - * WARNING: 'state' is dynamically-sized. Do not put + * WARNING: '__fpstate' is dynamically-sized. Do not put * anything after it here. */ }; +/* + * Guest pseudo FPU container + */ +struct fpu_guest { + /* + * @fpstate: Pointer to the allocated guest fpstate + */ + struct fpstate *fpstate; +}; + +/* + * FPU state configuration data. Initialized at boot time. Read only after init. + */ +struct fpu_state_config { + /* + * @max_size: + * + * The maximum size of the register state buffer. Includes all + * supported features except independent managed features. + */ + unsigned int max_size; + + /* + * @default_size: + * + * The default size of the register state buffer. Includes all + * supported features except independent managed features and + * features which have to be requested by user space before usage. + */ + unsigned int default_size; + + /* + * @max_features: + * + * The maximum supported features bitmap. Does not include + * independent managed features. + */ + u64 max_features; + + /* + * @default_features: + * + * The default supported features bitmap. Does not include + * independent managed features and features which have to + * be requested by user space before usage. + */ + u64 default_features; + /* + * @legacy_features: + * + * Features which can be reported back to user space + * even without XSAVE support, i.e. legacy features FP + SSE + */ + u64 legacy_features; +}; + +/* FPU state configuration information */ +extern struct fpu_state_config fpu_kernel_cfg, fpu_user_cfg; + #endif /* _ASM_X86_FPU_H */ diff --git a/arch/x86/include/asm/fpu/xcr.h b/arch/x86/include/asm/fpu/xcr.h index 1c7ab8d95da5..79f95d3787e2 100644 --- a/arch/x86/include/asm/fpu/xcr.h +++ b/arch/x86/include/asm/fpu/xcr.h @@ -2,17 +2,6 @@ #ifndef _ASM_X86_FPU_XCR_H #define _ASM_X86_FPU_XCR_H -/* - * MXCSR and XCR definitions: - */ - -static inline void ldmxcsr(u32 mxcsr) -{ - asm volatile("ldmxcsr %0" :: "m" (mxcsr)); -} - -extern unsigned int mxcsr_feature_mask; - #define XCR_XFEATURE_ENABLED_MASK 0x00000000 static inline u64 xgetbv(u32 index) diff --git a/arch/x86/include/asm/fpu/xstate.h b/arch/x86/include/asm/fpu/xstate.h index 109dfcc75299..0f8b90ab18c9 100644 --- a/arch/x86/include/asm/fpu/xstate.h +++ b/arch/x86/include/asm/fpu/xstate.h @@ -14,6 +14,8 @@ #define XSTATE_CPUID 0x0000000d +#define TILE_CPUID 0x0000001d + #define FXSAVE_SIZE 512 #define XSAVE_HDR_SIZE 64 @@ -33,7 +35,8 @@ XFEATURE_MASK_Hi16_ZMM | \ XFEATURE_MASK_PKRU | \ XFEATURE_MASK_BNDREGS | \ - XFEATURE_MASK_BNDCSR) + XFEATURE_MASK_BNDCSR | \ + XFEATURE_MASK_XTILE) /* * Features which are restored when returning to user space. @@ -43,6 +46,9 @@ #define XFEATURE_MASK_USER_RESTORE \ (XFEATURE_MASK_USER_SUPPORTED & ~XFEATURE_MASK_PKRU) +/* Features which are dynamically enabled for a process on request */ +#define XFEATURE_MASK_USER_DYNAMIC XFEATURE_MASK_XTILE_DATA + /* All currently supported supervisor features */ #define XFEATURE_MASK_SUPERVISOR_SUPPORTED (XFEATURE_MASK_PASID) @@ -78,78 +84,42 @@ XFEATURE_MASK_INDEPENDENT | \ XFEATURE_MASK_SUPERVISOR_UNSUPPORTED) -#ifdef CONFIG_X86_64 -#define REX_PREFIX "0x48, " -#else -#define REX_PREFIX -#endif - -extern u64 xfeatures_mask_all; - -static inline u64 xfeatures_mask_supervisor(void) -{ - return xfeatures_mask_all & XFEATURE_MASK_SUPERVISOR_SUPPORTED; -} - /* - * The xfeatures which are enabled in XCR0 and expected to be in ptrace - * buffers and signal frames. + * The feature mask required to restore FPU state: + * - All user states which are not eagerly switched in switch_to()/exec() + * - The suporvisor states */ -static inline u64 xfeatures_mask_uabi(void) -{ - return xfeatures_mask_all & XFEATURE_MASK_USER_SUPPORTED; -} - -/* - * The xfeatures which are restored by the kernel when returning to user - * mode. This is not necessarily the same as xfeatures_mask_uabi() as the - * kernel does not manage all XCR0 enabled features via xsave/xrstor as - * some of them have to be switched eagerly on context switch and exec(). - */ -static inline u64 xfeatures_mask_restore_user(void) -{ - return xfeatures_mask_all & XFEATURE_MASK_USER_RESTORE; -} - -/* - * Like xfeatures_mask_restore_user() but additionally restors the - * supported supervisor states. - */ -static inline u64 xfeatures_mask_fpstate(void) -{ - return xfeatures_mask_all & \ - (XFEATURE_MASK_USER_RESTORE | XFEATURE_MASK_SUPERVISOR_SUPPORTED); -} - -static inline u64 xfeatures_mask_independent(void) -{ - if (!boot_cpu_has(X86_FEATURE_ARCH_LBR)) - return XFEATURE_MASK_INDEPENDENT & ~XFEATURE_MASK_LBR; - - return XFEATURE_MASK_INDEPENDENT; -} +#define XFEATURE_MASK_FPSTATE (XFEATURE_MASK_USER_RESTORE | \ + XFEATURE_MASK_SUPERVISOR_SUPPORTED) extern u64 xstate_fx_sw_bytes[USER_XSTATE_FX_SW_WORDS]; extern void __init update_regset_xstate_info(unsigned int size, u64 xstate_mask); -void *get_xsave_addr(struct xregs_state *xsave, int xfeature_nr); int xfeature_size(int xfeature_nr); -int copy_uabi_from_kernel_to_xstate(struct xregs_state *xsave, const void *kbuf); -int copy_sigframe_from_user_to_xstate(struct xregs_state *xsave, const void __user *ubuf); void xsaves(struct xregs_state *xsave, u64 mask); void xrstors(struct xregs_state *xsave, u64 mask); -enum xstate_copy_mode { - XSTATE_COPY_FP, - XSTATE_COPY_FX, - XSTATE_COPY_XSAVE, -}; +int xfd_enable_feature(u64 xfd_err); + +#ifdef CONFIG_X86_64 +DECLARE_STATIC_KEY_FALSE(__fpu_state_size_dynamic); +#endif + +#ifdef CONFIG_X86_64 +DECLARE_STATIC_KEY_FALSE(__fpu_state_size_dynamic); -struct membuf; -void copy_xstate_to_uabi_buf(struct membuf to, struct task_struct *tsk, - enum xstate_copy_mode mode); +static __always_inline __pure bool fpu_state_size_dynamic(void) +{ + return static_branch_unlikely(&__fpu_state_size_dynamic); +} +#else +static __always_inline __pure bool fpu_state_size_dynamic(void) +{ + return false; +} +#endif #endif diff --git a/arch/x86/include/asm/ftrace.h b/arch/x86/include/asm/ftrace.h index 9f3130f40807..024d9797646e 100644 --- a/arch/x86/include/asm/ftrace.h +++ b/arch/x86/include/asm/ftrace.h @@ -57,6 +57,13 @@ arch_ftrace_get_regs(struct ftrace_regs *fregs) #define ftrace_instruction_pointer_set(fregs, _ip) \ do { (fregs)->regs.ip = (_ip); } while (0) + +struct ftrace_ops; +#define ftrace_graph_func ftrace_graph_func +void ftrace_graph_func(unsigned long ip, unsigned long parent_ip, + struct ftrace_ops *op, struct ftrace_regs *fregs); +#else +#define FTRACE_GRAPH_TRAMP_ADDR FTRACE_GRAPH_ADDR #endif #ifdef CONFIG_DYNAMIC_FTRACE @@ -65,8 +72,6 @@ struct dyn_arch_ftrace { /* No extra data needed for x86 */ }; -#define FTRACE_GRAPH_TRAMP_ADDR FTRACE_GRAPH_ADDR - #endif /* CONFIG_DYNAMIC_FTRACE */ #endif /* __ASSEMBLY__ */ #endif /* CONFIG_FUNCTION_TRACER */ diff --git a/arch/x86/include/asm/hyperv-tlfs.h b/arch/x86/include/asm/hyperv-tlfs.h index 2322d6bd5883..381e88122a5f 100644 --- a/arch/x86/include/asm/hyperv-tlfs.h +++ b/arch/x86/include/asm/hyperv-tlfs.h @@ -276,6 +276,23 @@ enum hv_isolation_type { #define HV_X64_MSR_TIME_REF_COUNT HV_REGISTER_TIME_REF_COUNT #define HV_X64_MSR_REFERENCE_TSC HV_REGISTER_REFERENCE_TSC +/* Hyper-V memory host visibility */ +enum hv_mem_host_visibility { + VMBUS_PAGE_NOT_VISIBLE = 0, + VMBUS_PAGE_VISIBLE_READ_ONLY = 1, + VMBUS_PAGE_VISIBLE_READ_WRITE = 3 +}; + +/* HvCallModifySparseGpaPageHostVisibility hypercall */ +#define HV_MAX_MODIFY_GPA_REP_COUNT ((PAGE_SIZE / sizeof(u64)) - 2) +struct hv_gpa_range_for_visibility { + u64 partition_id; + u32 host_visibility:2; + u32 reserved0:30; + u32 reserved1; + u64 gpa_page_list[HV_MAX_MODIFY_GPA_REP_COUNT]; +} __packed; + /* * Declare the MSR used to setup pages used to communicate with the hypervisor. */ diff --git a/arch/x86/include/asm/ia32.h b/arch/x86/include/asm/ia32.h index 2c5f7861d373..fada857f0a1e 100644 --- a/arch/x86/include/asm/ia32.h +++ b/arch/x86/include/asm/ia32.h @@ -68,6 +68,6 @@ extern void ia32_pick_mmap_layout(struct mm_struct *mm); #endif -#endif /* !CONFIG_IA32_SUPPORT */ +#endif /* CONFIG_IA32_EMULATION */ #endif /* _ASM_X86_IA32_H */ diff --git a/arch/x86/include/asm/insn-eval.h b/arch/x86/include/asm/insn-eval.h index 91d7182ad2d6..4ec3613551e3 100644 --- a/arch/x86/include/asm/insn-eval.h +++ b/arch/x86/include/asm/insn-eval.h @@ -21,6 +21,7 @@ int insn_get_modrm_rm_off(struct insn *insn, struct pt_regs *regs); int insn_get_modrm_reg_off(struct insn *insn, struct pt_regs *regs); unsigned long insn_get_seg_base(struct pt_regs *regs, int seg_reg_idx); int insn_get_code_seg_params(struct pt_regs *regs); +int insn_get_effective_ip(struct pt_regs *regs, unsigned long *ip); int insn_fetch_from_user(struct pt_regs *regs, unsigned char buf[MAX_INSN_SIZE]); int insn_fetch_from_user_inatomic(struct pt_regs *regs, diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h index 841a5d104afa..5c6a4af0b911 100644 --- a/arch/x86/include/asm/io.h +++ b/arch/x86/include/asm/io.h @@ -391,6 +391,7 @@ extern void arch_io_free_memtype_wc(resource_size_t start, resource_size_t size) #define arch_io_reserve_memtype_wc arch_io_reserve_memtype_wc #endif +#ifdef CONFIG_AMD_MEM_ENCRYPT extern bool arch_memremap_can_ram_remap(resource_size_t offset, unsigned long size, unsigned long flags); @@ -398,6 +399,13 @@ extern bool arch_memremap_can_ram_remap(resource_size_t offset, extern bool phys_mem_access_encrypted(unsigned long phys_addr, unsigned long size); +#else +static inline bool phys_mem_access_encrypted(unsigned long phys_addr, + unsigned long size) +{ + return true; +} +#endif /** * iosubmit_cmds512 - copy data to single MMIO location, in 512-bit units diff --git a/arch/x86/include/asm/irq_stack.h b/arch/x86/include/asm/irq_stack.h index 562854c60808..ae9d40f6c706 100644 --- a/arch/x86/include/asm/irq_stack.h +++ b/arch/x86/include/asm/irq_stack.h @@ -58,7 +58,7 @@ * the output constraints to make the compiler aware that R11 cannot be * reused after the asm() statement. * - * For builds with CONFIG_UNWIND_FRAME_POINTER ASM_CALL_CONSTRAINT is + * For builds with CONFIG_UNWINDER_FRAME_POINTER, ASM_CALL_CONSTRAINT is * required as well as this prevents certain creative GCC variants from * misplacing the ASM code. * @@ -77,11 +77,11 @@ * Function calls can clobber anything except the callee-saved * registers. Tell the compiler. */ -#define call_on_irqstack(func, asm_call, argconstr...) \ +#define call_on_stack(stack, func, asm_call, argconstr...) \ { \ register void *tos asm("r11"); \ \ - tos = ((void *)__this_cpu_read(hardirq_stack_ptr)); \ + tos = ((void *)(stack)); \ \ asm_inline volatile( \ "movq %%rsp, (%[tos]) \n" \ @@ -98,6 +98,25 @@ ); \ } +#define ASM_CALL_ARG0 \ + "call %P[__func] \n" + +#define ASM_CALL_ARG1 \ + "movq %[arg1], %%rdi \n" \ + ASM_CALL_ARG0 + +#define ASM_CALL_ARG2 \ + "movq %[arg2], %%rsi \n" \ + ASM_CALL_ARG1 + +#define ASM_CALL_ARG3 \ + "movq %[arg3], %%rdx \n" \ + ASM_CALL_ARG2 + +#define call_on_irqstack(func, asm_call, argconstr...) \ + call_on_stack(__this_cpu_read(hardirq_stack_ptr), \ + func, asm_call, argconstr) + /* Macros to assert type correctness for run_*_on_irqstack macros */ #define assert_function_type(func, proto) \ static_assert(__builtin_types_compatible_p(typeof(&func), proto)) @@ -147,8 +166,7 @@ */ #define ASM_CALL_SYSVEC \ "call irq_enter_rcu \n" \ - "movq %[arg1], %%rdi \n" \ - "call %P[__func] \n" \ + ASM_CALL_ARG1 \ "call irq_exit_rcu \n" #define SYSVEC_CONSTRAINTS , [arg1] "r" (regs) @@ -168,12 +186,10 @@ */ #define ASM_CALL_IRQ \ "call irq_enter_rcu \n" \ - "movq %[arg1], %%rdi \n" \ - "movl %[arg2], %%esi \n" \ - "call %P[__func] \n" \ + ASM_CALL_ARG2 \ "call irq_exit_rcu \n" -#define IRQ_CONSTRAINTS , [arg1] "r" (regs), [arg2] "r" (vector) +#define IRQ_CONSTRAINTS , [arg1] "r" (regs), [arg2] "r" ((unsigned long)vector) #define run_irq_on_irqstack_cond(func, regs, vector) \ { \ @@ -185,9 +201,7 @@ IRQ_CONSTRAINTS, regs, vector); \ } -#define ASM_CALL_SOFTIRQ \ - "call %P[__func] \n" - +#ifndef CONFIG_PREEMPT_RT /* * Macro to invoke __do_softirq on the irq stack. This is only called from * task context when bottom halves are about to be reenabled and soft @@ -197,10 +211,12 @@ #define do_softirq_own_stack() \ { \ __this_cpu_write(hardirq_stack_inuse, true); \ - call_on_irqstack(__do_softirq, ASM_CALL_SOFTIRQ); \ + call_on_irqstack(__do_softirq, ASM_CALL_ARG0); \ __this_cpu_write(hardirq_stack_inuse, false); \ } +#endif + #else /* CONFIG_X86_64 */ /* System vector handlers always run on the stack they interrupted. */ #define run_sysvec_on_irqstack_cond(func, regs) \ diff --git a/arch/x86/include/asm/kexec.h b/arch/x86/include/asm/kexec.h index 0a6e34b07017..11b7c06e2828 100644 --- a/arch/x86/include/asm/kexec.h +++ b/arch/x86/include/asm/kexec.h @@ -129,7 +129,7 @@ relocate_kernel(unsigned long indirection_page, unsigned long page_list, unsigned long start_address, unsigned int preserve_context, - unsigned int sme_active); + unsigned int host_mem_enc_active); #endif #define ARCH_HAS_KIMAGE_ARCH diff --git a/arch/x86/include/asm/kprobes.h b/arch/x86/include/asm/kprobes.h index bd7f5886a789..71ea2eab43d5 100644 --- a/arch/x86/include/asm/kprobes.h +++ b/arch/x86/include/asm/kprobes.h @@ -49,7 +49,6 @@ extern __visible kprobe_opcode_t optprobe_template_end[]; extern const int kretprobe_blacklist_size; void arch_remove_kprobe(struct kprobe *p); -asmlinkage void kretprobe_trampoline(void); extern void arch_kprobe_override_function(struct pt_regs *regs); diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index f8f48a7ec577..2acf37cc1991 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -50,7 +50,7 @@ * so ratio of 4 should be enough. */ #define KVM_VCPU_ID_RATIO 4 -#define KVM_MAX_VCPU_ID (KVM_MAX_VCPUS * KVM_VCPU_ID_RATIO) +#define KVM_MAX_VCPU_IDS (KVM_MAX_VCPUS * KVM_VCPU_ID_RATIO) /* memory slots that are not exposed to userspace */ #define KVM_PRIVATE_MEM_SLOTS 3 @@ -407,6 +407,7 @@ struct kvm_mmu_root_info { #define KVM_HAVE_MMU_RWLOCK struct kvm_mmu_page; +struct kvm_page_fault; /* * x86 supports 4 paging modes (5-level 64-bit, 4-level 64-bit, 3-level 32-bit, @@ -416,8 +417,7 @@ struct kvm_mmu_page; struct kvm_mmu { unsigned long (*get_guest_pgd)(struct kvm_vcpu *vcpu); u64 (*get_pdptr)(struct kvm_vcpu *vcpu, int index); - int (*page_fault)(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, u32 err, - bool prefault); + int (*page_fault)(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault); void (*inject_page_fault)(struct kvm_vcpu *vcpu, struct x86_exception *fault); gpa_t (*gva_to_gpa)(struct kvm_vcpu *vcpu, gpa_t gva_or_gpa, @@ -499,7 +499,6 @@ struct kvm_pmu { u64 fixed_ctr_ctrl; u64 global_ctrl; u64 global_status; - u64 global_ovf_ctrl; u64 counter_bitmask[2]; u64 global_ctrl_mask; u64 global_ovf_ctrl_mask; @@ -581,7 +580,6 @@ struct kvm_vcpu_hv { struct kvm_hyperv_exit exit; struct kvm_vcpu_hv_stimer stimer[HV_SYNIC_STIMER_COUNT]; DECLARE_BITMAP(stimer_pending_bitmap, HV_SYNIC_STIMER_COUNT); - cpumask_t tlb_flush; bool enforce_cpuid; struct { u32 features_eax; /* HYPERV_CPUID_FEATURES.EAX */ @@ -691,18 +689,18 @@ struct kvm_vcpu_arch { * * Note that while the PKRU state lives inside the fpu registers, * it is switched out separately at VMENTER and VMEXIT time. The - * "guest_fpu" state here contains the guest FPU context, with the + * "guest_fpstate" state here contains the guest FPU context, with the * host PRKU bits. */ - struct fpu *user_fpu; - struct fpu *guest_fpu; + struct fpu_guest guest_fpu; u64 xcr0; u64 guest_supported_xcr0; struct kvm_pio_request pio; void *pio_data; - void *guest_ins_data; + void *sev_pio_data; + unsigned sev_pio_count; u8 event_exit_inst_len; @@ -1073,7 +1071,7 @@ struct kvm_arch { atomic_t apic_map_dirty; /* Protects apic_access_memslot_enabled and apicv_inhibit_reasons */ - struct mutex apicv_update_lock; + struct rw_semaphore apicv_update_lock; bool apic_access_memslot_enabled; unsigned long apicv_inhibit_reasons; @@ -1087,17 +1085,23 @@ struct kvm_arch { unsigned long irq_sources_bitmap; s64 kvmclock_offset; + + /* + * This also protects nr_vcpus_matched_tsc which is read from a + * preemption-disabled region, so it must be a raw spinlock. + */ raw_spinlock_t tsc_write_lock; u64 last_tsc_nsec; u64 last_tsc_write; u32 last_tsc_khz; + u64 last_tsc_offset; u64 cur_tsc_nsec; u64 cur_tsc_write; u64 cur_tsc_offset; u64 cur_tsc_generation; int nr_vcpus_matched_tsc; - spinlock_t pvclock_gtod_sync_lock; + seqcount_raw_spinlock_t pvclock_sc; bool use_master_clock; u64 master_kernel_ns; u64 master_cycle_now; @@ -1207,10 +1211,11 @@ struct kvm_arch { #endif /* CONFIG_X86_64 */ /* - * If set, rmaps have been allocated for all memslots and should be - * allocated for any newly created or modified memslots. + * If set, at least one shadow root has been allocated. This flag + * is used as one input when determining whether certain memslot + * related allocations are necessary. */ - bool memslots_have_rmaps; + bool shadow_root_allocated; #if IS_ENABLED(CONFIG_HYPERV) hpa_t hv_root_tdp; @@ -1296,6 +1301,8 @@ static inline u16 kvm_lapic_irq_dest_mode(bool dest_mode_logical) } struct kvm_x86_ops { + const char *name; + int (*hardware_enable)(void); void (*hardware_disable)(void); void (*hardware_unsetup)(void); @@ -1405,10 +1412,11 @@ struct kvm_x86_ops { void (*write_tsc_multiplier)(struct kvm_vcpu *vcpu, u64 multiplier); /* - * Retrieve somewhat arbitrary exit information. Intended to be used - * only from within tracepoints to avoid VMREADs when tracing is off. + * Retrieve somewhat arbitrary exit information. Intended to + * be used only from within tracepoints or error paths. */ - void (*get_exit_info)(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2, + void (*get_exit_info)(struct kvm_vcpu *vcpu, u32 *reason, + u64 *info1, u64 *info2, u32 *exit_int_info, u32 *exit_int_info_err_code); int (*check_intercept)(struct kvm_vcpu *vcpu, @@ -1541,6 +1549,8 @@ static inline struct kvm *kvm_arch_alloc_vm(void) { return __vmalloc(kvm_x86_ops.vm_size, GFP_KERNEL_ACCOUNT | __GFP_ZERO); } + +#define __KVM_HAVE_ARCH_VM_FREE void kvm_arch_free_vm(struct kvm *kvm); #define __KVM_HAVE_ARCH_FLUSH_REMOTE_TLB @@ -1657,6 +1667,9 @@ extern u64 kvm_mce_cap_supported; int kvm_emulate_instruction(struct kvm_vcpu *vcpu, int emulation_type); int kvm_emulate_instruction_from_buffer(struct kvm_vcpu *vcpu, void *insn, int insn_len); +void __kvm_prepare_emulation_failure_exit(struct kvm_vcpu *vcpu, + u64 *data, u8 ndata); +void kvm_prepare_emulation_failure_exit(struct kvm_vcpu *vcpu); void kvm_enable_efer_bits(u64); bool kvm_valid_efer(struct kvm_vcpu *vcpu, u64 efer); @@ -1685,8 +1698,6 @@ void kvm_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector); int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int idt_index, int reason, bool has_error_code, u32 error_code); -void kvm_free_guest_fpu(struct kvm_vcpu *vcpu); - void kvm_post_set_cr0(struct kvm_vcpu *vcpu, unsigned long old_cr0, unsigned long cr0); void kvm_post_set_cr4(struct kvm_vcpu *vcpu, unsigned long old_cr4, unsigned long cr4); int kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0); @@ -1715,9 +1726,6 @@ void kvm_requeue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code) void kvm_inject_page_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault); bool kvm_inject_emulated_page_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault); -int kvm_read_guest_page_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, - gfn_t gfn, void *data, int offset, int len, - u32 access); bool kvm_require_cpl(struct kvm_vcpu *vcpu, int required_cpl); bool kvm_require_dr(struct kvm_vcpu *vcpu, int dr); @@ -1866,7 +1874,6 @@ u64 kvm_calc_nested_tsc_multiplier(u64 l1_multiplier, u64 l2_multiplier); unsigned long kvm_get_linear_rip(struct kvm_vcpu *vcpu); bool kvm_is_linear_rip(struct kvm_vcpu *vcpu, unsigned long linear_rip); -void kvm_make_mclock_inprogress_request(struct kvm *kvm); void kvm_make_scan_ioapic_request(struct kvm *kvm); void kvm_make_scan_ioapic_request_mask(struct kvm *kvm, unsigned long *vcpu_bitmap); @@ -1935,6 +1942,9 @@ static inline int kvm_cpu_get_apicid(int mps_cpu) int kvm_cpu_dirty_log_size(void); -int alloc_all_memslots_rmaps(struct kvm *kvm); +int memslot_rmap_alloc(struct kvm_memory_slot *slot, unsigned long npages); + +#define KVM_CLOCK_VALID_FLAGS \ + (KVM_CLOCK_TSC_STABLE | KVM_CLOCK_REALTIME | KVM_CLOCK_HOST_TSC) #endif /* _ASM_X86_KVM_HOST_H */ diff --git a/arch/x86/include/asm/kvm_page_track.h b/arch/x86/include/asm/kvm_page_track.h index 87bd6025d91d..9d4a3b1b25b9 100644 --- a/arch/x86/include/asm/kvm_page_track.h +++ b/arch/x86/include/asm/kvm_page_track.h @@ -46,11 +46,15 @@ struct kvm_page_track_notifier_node { struct kvm_page_track_notifier_node *node); }; -void kvm_page_track_init(struct kvm *kvm); +int kvm_page_track_init(struct kvm *kvm); void kvm_page_track_cleanup(struct kvm *kvm); +bool kvm_page_track_write_tracking_enabled(struct kvm *kvm); +int kvm_page_track_write_tracking_alloc(struct kvm_memory_slot *slot); + void kvm_page_track_free_memslot(struct kvm_memory_slot *slot); -int kvm_page_track_create_memslot(struct kvm_memory_slot *slot, +int kvm_page_track_create_memslot(struct kvm *kvm, + struct kvm_memory_slot *slot, unsigned long npages); void kvm_slot_page_track_add_page(struct kvm *kvm, @@ -59,8 +63,9 @@ void kvm_slot_page_track_add_page(struct kvm *kvm, void kvm_slot_page_track_remove_page(struct kvm *kvm, struct kvm_memory_slot *slot, gfn_t gfn, enum kvm_page_track_mode mode); -bool kvm_page_track_is_active(struct kvm_vcpu *vcpu, gfn_t gfn, - enum kvm_page_track_mode mode); +bool kvm_slot_page_track_is_active(struct kvm_vcpu *vcpu, + struct kvm_memory_slot *slot, gfn_t gfn, + enum kvm_page_track_mode mode); void kvm_page_track_register_notifier(struct kvm *kvm, diff --git a/arch/x86/include/asm/kvmclock.h b/arch/x86/include/asm/kvmclock.h index eceea9299097..6c5765192102 100644 --- a/arch/x86/include/asm/kvmclock.h +++ b/arch/x86/include/asm/kvmclock.h @@ -2,6 +2,20 @@ #ifndef _ASM_X86_KVM_CLOCK_H #define _ASM_X86_KVM_CLOCK_H +#include <linux/percpu.h> + extern struct clocksource kvm_clock; +DECLARE_PER_CPU(struct pvclock_vsyscall_time_info *, hv_clock_per_cpu); + +static inline struct pvclock_vcpu_time_info *this_cpu_pvti(void) +{ + return &this_cpu_read(hv_clock_per_cpu)->pvti; +} + +static inline struct pvclock_vsyscall_time_info *this_cpu_hvclock(void) +{ + return this_cpu_read(hv_clock_per_cpu); +} + #endif /* _ASM_X86_KVM_CLOCK_H */ diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h index da9321548f6f..8f6395d9e209 100644 --- a/arch/x86/include/asm/mce.h +++ b/arch/x86/include/asm/mce.h @@ -205,28 +205,16 @@ struct cper_ia_proc_ctx; int mcheck_init(void); void mcheck_cpu_init(struct cpuinfo_x86 *c); void mcheck_cpu_clear(struct cpuinfo_x86 *c); -void mcheck_vendor_init_severity(void); int apei_smca_report_x86_error(struct cper_ia_proc_ctx *ctx_info, u64 lapic_id); #else static inline int mcheck_init(void) { return 0; } static inline void mcheck_cpu_init(struct cpuinfo_x86 *c) {} static inline void mcheck_cpu_clear(struct cpuinfo_x86 *c) {} -static inline void mcheck_vendor_init_severity(void) {} static inline int apei_smca_report_x86_error(struct cper_ia_proc_ctx *ctx_info, u64 lapic_id) { return -EINVAL; } #endif -#ifdef CONFIG_X86_ANCIENT_MCE -void intel_p5_mcheck_init(struct cpuinfo_x86 *c); -void winchip_mcheck_init(struct cpuinfo_x86 *c); -static inline void enable_p5_mce(void) { mce_p5_enabled = 1; } -#else -static inline void intel_p5_mcheck_init(struct cpuinfo_x86 *c) {} -static inline void winchip_mcheck_init(struct cpuinfo_x86 *c) {} -static inline void enable_p5_mce(void) {} -#endif - void mce_setup(struct mce *m); void mce_log(struct mce *m); DECLARE_PER_CPU(struct device *, mce_device); @@ -358,7 +346,7 @@ extern int mce_threshold_remove_device(unsigned int cpu); void mce_amd_feature_init(struct cpuinfo_x86 *c); int umc_normaddr_to_sysaddr(u64 norm_addr, u16 nid, u8 umc, u64 *sys_addr); - +enum smca_bank_types smca_get_bank_type(unsigned int bank); #else static inline int mce_threshold_create_device(unsigned int cpu) { return 0; }; diff --git a/arch/x86/include/asm/mem_encrypt.h b/arch/x86/include/asm/mem_encrypt.h index 9c80c68d75b5..2d4f5c17d79c 100644 --- a/arch/x86/include/asm/mem_encrypt.h +++ b/arch/x86/include/asm/mem_encrypt.h @@ -13,6 +13,7 @@ #ifndef __ASSEMBLY__ #include <linux/init.h> +#include <linux/cc_platform.h> #include <asm/bootparam.h> @@ -50,9 +51,6 @@ void __init mem_encrypt_free_decrypted_mem(void); void __init mem_encrypt_init(void); void __init sev_es_init_vc_handling(void); -bool sme_active(void); -bool sev_active(void); -bool sev_es_active(void); #define __bss_decrypted __section(".bss..decrypted") @@ -75,9 +73,6 @@ static inline void __init sme_encrypt_kernel(struct boot_params *bp) { } static inline void __init sme_enable(struct boot_params *bp) { } static inline void sev_es_init_vc_handling(void) { } -static inline bool sme_active(void) { return false; } -static inline bool sev_active(void) { return false; } -static inline bool sev_es_active(void) { return false; } static inline int __init early_set_memory_decrypted(unsigned long vaddr, unsigned long size) { return 0; } @@ -101,11 +96,6 @@ static inline void mem_encrypt_free_decrypted_mem(void) { } extern char __start_bss_decrypted[], __end_bss_decrypted[], __start_bss_decrypted_unused[]; -static inline bool mem_encrypt_active(void) -{ - return sme_me_mask; -} - static inline u64 sme_get_me_mask(void) { return sme_me_mask; diff --git a/arch/x86/include/asm/microcode.h b/arch/x86/include/asm/microcode.h index ab45a220fac4..d6bfdfb0f0af 100644 --- a/arch/x86/include/asm/microcode.h +++ b/arch/x86/include/asm/microcode.h @@ -130,14 +130,11 @@ static inline unsigned int x86_cpuid_family(void) extern void __init load_ucode_bsp(void); extern void load_ucode_ap(void); void reload_early_microcode(void); -extern bool get_builtin_firmware(struct cpio_data *cd, const char *name); extern bool initrd_gone; #else static inline void __init load_ucode_bsp(void) { } static inline void load_ucode_ap(void) { } static inline void reload_early_microcode(void) { } -static inline bool -get_builtin_firmware(struct cpio_data *cd, const char *name) { return false; } #endif #endif /* _ASM_X86_MICROCODE_H */ diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h index adccbc209169..da3972fe5a7a 100644 --- a/arch/x86/include/asm/mshyperv.h +++ b/arch/x86/include/asm/mshyperv.h @@ -11,23 +11,14 @@ #include <asm/paravirt.h> #include <asm/mshyperv.h> +union hv_ghcb; + +DECLARE_STATIC_KEY_FALSE(isolation_type_snp); + typedef int (*hyperv_fill_flush_list_func)( struct hv_guest_mapping_flush_list *flush, void *data); -static inline void hv_set_register(unsigned int reg, u64 value) -{ - wrmsrl(reg, value); -} - -static inline u64 hv_get_register(unsigned int reg) -{ - u64 value; - - rdmsrl(reg, value); - return value; -} - #define hv_get_raw_timer() rdtsc_ordered() void hyperv_vector_handler(struct pt_regs *regs); @@ -39,6 +30,8 @@ extern void *hv_hypercall_pg; extern u64 hv_current_partition_id; +extern union hv_ghcb __percpu **hv_ghcb_pg; + int hv_call_deposit_pages(int node, u64 partition_id, u32 num_pages); int hv_call_add_logical_proc(int node, u32 lp_index, u32 acpi_id); int hv_call_create_vp(int node, u64 partition_id, u32 vp_index, u32 flags); @@ -188,6 +181,50 @@ struct irq_domain *hv_create_pci_msi_domain(void); int hv_map_ioapic_interrupt(int ioapic_id, bool level, int vcpu, int vector, struct hv_interrupt_entry *entry); int hv_unmap_ioapic_interrupt(int ioapic_id, struct hv_interrupt_entry *entry); +int hv_set_mem_host_visibility(unsigned long addr, int numpages, bool visible); + +#ifdef CONFIG_AMD_MEM_ENCRYPT +void hv_ghcb_msr_write(u64 msr, u64 value); +void hv_ghcb_msr_read(u64 msr, u64 *value); +#else +static inline void hv_ghcb_msr_write(u64 msr, u64 value) {} +static inline void hv_ghcb_msr_read(u64 msr, u64 *value) {} +#endif + +extern bool hv_isolation_type_snp(void); + +static inline bool hv_is_synic_reg(unsigned int reg) +{ + if ((reg >= HV_REGISTER_SCONTROL) && + (reg <= HV_REGISTER_SINT15)) + return true; + return false; +} + +static inline u64 hv_get_register(unsigned int reg) +{ + u64 value; + + if (hv_is_synic_reg(reg) && hv_isolation_type_snp()) + hv_ghcb_msr_read(reg, &value); + else + rdmsrl(reg, value); + return value; +} + +static inline void hv_set_register(unsigned int reg, u64 value) +{ + if (hv_is_synic_reg(reg) && hv_isolation_type_snp()) { + hv_ghcb_msr_write(reg, value); + + /* Write proxy bit via wrmsl instruction */ + if (reg >= HV_REGISTER_SINT0 && + reg <= HV_REGISTER_SINT15) + wrmsrl(reg, value | 1 << 20); + } else { + wrmsrl(reg, value); + } +} #else /* CONFIG_HYPERV */ static inline void hyperv_init(void) {} @@ -205,6 +242,13 @@ static inline int hyperv_flush_guest_mapping_range(u64 as, { return -1; } +static inline void hv_set_register(unsigned int reg, u64 value) { } +static inline u64 hv_get_register(unsigned int reg) { return 0; } +static inline int hv_set_mem_host_visibility(unsigned long addr, int numpages, + bool visible) +{ + return -1; +} #endif /* CONFIG_HYPERV */ diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index a7c413432b33..01e2650b9585 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -625,6 +625,8 @@ #define MSR_IA32_BNDCFGS_RSVD 0x00000ffc +#define MSR_IA32_XFD 0x000001c4 +#define MSR_IA32_XFD_ERR 0x000001c5 #define MSR_IA32_XSS 0x00000da0 #define MSR_IA32_APICBASE 0x0000001b diff --git a/arch/x86/include/asm/msr.h b/arch/x86/include/asm/msr.h index a3f87f1015d3..6b52182e178a 100644 --- a/arch/x86/include/asm/msr.h +++ b/arch/x86/include/asm/msr.h @@ -92,7 +92,7 @@ static __always_inline unsigned long long __rdmsr(unsigned int msr) asm volatile("1: rdmsr\n" "2:\n" - _ASM_EXTABLE_HANDLE(1b, 2b, ex_handler_rdmsr_unsafe) + _ASM_EXTABLE_TYPE(1b, 2b, EX_TYPE_RDMSR) : EAX_EDX_RET(val, low, high) : "c" (msr)); return EAX_EDX_VAL(val, low, high); @@ -102,7 +102,7 @@ static __always_inline void __wrmsr(unsigned int msr, u32 low, u32 high) { asm volatile("1: wrmsr\n" "2:\n" - _ASM_EXTABLE_HANDLE(1b, 2b, ex_handler_wrmsr_unsafe) + _ASM_EXTABLE_TYPE(1b, 2b, EX_TYPE_WRMSR) : : "c" (msr), "a"(low), "d" (high) : "memory"); } diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index ec2d5c8c6694..cc74dc584836 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -5,12 +5,15 @@ #include <linux/static_key.h> #include <linux/objtool.h> +#include <linux/linkage.h> #include <asm/alternative.h> #include <asm/cpufeatures.h> #include <asm/msr-index.h> #include <asm/unwind_hints.h> +#define RETPOLINE_THUNK_SIZE 32 + /* * Fill the CPU return stack buffer. * @@ -118,6 +121,16 @@ ".popsection\n\t" #ifdef CONFIG_RETPOLINE + +typedef u8 retpoline_thunk_t[RETPOLINE_THUNK_SIZE]; + +#define GEN(reg) \ + extern retpoline_thunk_t __x86_indirect_thunk_ ## reg; +#include <asm/GEN-for-each-reg.h> +#undef GEN + +extern retpoline_thunk_t __x86_indirect_thunk_array[]; + #ifdef CONFIG_X86_64 /* @@ -303,63 +316,4 @@ static inline void mds_idle_clear_cpu_buffers(void) #endif /* __ASSEMBLY__ */ -/* - * Below is used in the eBPF JIT compiler and emits the byte sequence - * for the following assembly: - * - * With retpolines configured: - * - * callq do_rop - * spec_trap: - * pause - * lfence - * jmp spec_trap - * do_rop: - * mov %rcx,(%rsp) for x86_64 - * mov %edx,(%esp) for x86_32 - * retq - * - * Without retpolines configured: - * - * jmp *%rcx for x86_64 - * jmp *%edx for x86_32 - */ -#ifdef CONFIG_RETPOLINE -# ifdef CONFIG_X86_64 -# define RETPOLINE_RCX_BPF_JIT_SIZE 17 -# define RETPOLINE_RCX_BPF_JIT() \ -do { \ - EMIT1_off32(0xE8, 7); /* callq do_rop */ \ - /* spec_trap: */ \ - EMIT2(0xF3, 0x90); /* pause */ \ - EMIT3(0x0F, 0xAE, 0xE8); /* lfence */ \ - EMIT2(0xEB, 0xF9); /* jmp spec_trap */ \ - /* do_rop: */ \ - EMIT4(0x48, 0x89, 0x0C, 0x24); /* mov %rcx,(%rsp) */ \ - EMIT1(0xC3); /* retq */ \ -} while (0) -# else /* !CONFIG_X86_64 */ -# define RETPOLINE_EDX_BPF_JIT() \ -do { \ - EMIT1_off32(0xE8, 7); /* call do_rop */ \ - /* spec_trap: */ \ - EMIT2(0xF3, 0x90); /* pause */ \ - EMIT3(0x0F, 0xAE, 0xE8); /* lfence */ \ - EMIT2(0xEB, 0xF9); /* jmp spec_trap */ \ - /* do_rop: */ \ - EMIT3(0x89, 0x14, 0x24); /* mov %edx,(%esp) */ \ - EMIT1(0xC3); /* ret */ \ -} while (0) -# endif -#else /* !CONFIG_RETPOLINE */ -# ifdef CONFIG_X86_64 -# define RETPOLINE_RCX_BPF_JIT_SIZE 2 -# define RETPOLINE_RCX_BPF_JIT() \ - EMIT2(0xFF, 0xE1); /* jmp *%rcx */ -# else /* !CONFIG_X86_64 */ -# define RETPOLINE_EDX_BPF_JIT() \ - EMIT2(0xFF, 0xE2) /* jmp *%edx */ -# endif -#endif - #endif /* _ASM_X86_NOSPEC_BRANCH_H_ */ diff --git a/arch/x86/include/asm/page_32.h b/arch/x86/include/asm/page_32.h index 94dbd51df58f..b13f8488ac85 100644 --- a/arch/x86/include/asm/page_32.h +++ b/arch/x86/include/asm/page_32.h @@ -43,7 +43,7 @@ static inline void copy_page(void *to, void *from) { memcpy(to, from, PAGE_SIZE); } -#endif /* CONFIG_X86_3DNOW */ +#endif /* CONFIG_X86_USE_3DNOW */ #endif /* !__ASSEMBLY__ */ #endif /* _ASM_X86_PAGE_32_H */ diff --git a/arch/x86/include/asm/page_64_types.h b/arch/x86/include/asm/page_64_types.h index a8d4ad856568..e9e2c3ba5923 100644 --- a/arch/x86/include/asm/page_64_types.h +++ b/arch/x86/include/asm/page_64_types.h @@ -15,7 +15,7 @@ #define THREAD_SIZE_ORDER (2 + KASAN_STACK_ORDER) #define THREAD_SIZE (PAGE_SIZE << THREAD_SIZE_ORDER) -#define EXCEPTION_STACK_ORDER (0 + KASAN_STACK_ORDER) +#define EXCEPTION_STACK_ORDER (1 + KASAN_STACK_ORDER) #define EXCEPTION_STKSZ (PAGE_SIZE << EXCEPTION_STACK_ORDER) #define IRQ_STACK_ORDER (2 + KASAN_STACK_ORDER) diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index da3a1ac82be5..cebec95a7124 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h @@ -52,11 +52,11 @@ void __init paravirt_set_cap(void); /* The paravirtualized I/O functions */ static inline void slow_down_io(void) { - pv_ops.cpu.io_delay(); + PVOP_VCALL0(cpu.io_delay); #ifdef REALLY_SLOW_IO - pv_ops.cpu.io_delay(); - pv_ops.cpu.io_delay(); - pv_ops.cpu.io_delay(); + PVOP_VCALL0(cpu.io_delay); + PVOP_VCALL0(cpu.io_delay); + PVOP_VCALL0(cpu.io_delay); #endif } @@ -113,12 +113,12 @@ static inline void __cpuid(unsigned int *eax, unsigned int *ebx, /* * These special macros can be used to get or set a debugging register */ -static inline unsigned long paravirt_get_debugreg(int reg) +static __always_inline unsigned long paravirt_get_debugreg(int reg) { return PVOP_CALL1(unsigned long, cpu.get_debugreg, reg); } #define get_debugreg(var, reg) var = paravirt_get_debugreg(reg) -static inline void set_debugreg(unsigned long val, int reg) +static __always_inline void set_debugreg(unsigned long val, int reg) { PVOP_VCALL2(cpu.set_debugreg, reg, val); } @@ -133,14 +133,14 @@ static inline void write_cr0(unsigned long x) PVOP_VCALL1(cpu.write_cr0, x); } -static inline unsigned long read_cr2(void) +static __always_inline unsigned long read_cr2(void) { return PVOP_ALT_CALLEE0(unsigned long, mmu.read_cr2, "mov %%cr2, %%rax;", ALT_NOT(X86_FEATURE_XENPV)); } -static inline void write_cr2(unsigned long x) +static __always_inline void write_cr2(unsigned long x) { PVOP_VCALL1(mmu.write_cr2, x); } @@ -653,10 +653,10 @@ bool __raw_callee_save___native_vcpu_is_preempted(long cpu); * functions. */ #define PV_THUNK_NAME(func) "__raw_callee_save_" #func -#define PV_CALLEE_SAVE_REGS_THUNK(func) \ +#define __PV_CALLEE_SAVE_REGS_THUNK(func, section) \ extern typeof(func) __raw_callee_save_##func; \ \ - asm(".pushsection .text;" \ + asm(".pushsection " section ", \"ax\";" \ ".globl " PV_THUNK_NAME(func) ";" \ ".type " PV_THUNK_NAME(func) ", @function;" \ PV_THUNK_NAME(func) ":" \ @@ -669,6 +669,9 @@ bool __raw_callee_save___native_vcpu_is_preempted(long cpu); ".size " PV_THUNK_NAME(func) ", .-" PV_THUNK_NAME(func) ";" \ ".popsection") +#define PV_CALLEE_SAVE_REGS_THUNK(func) \ + __PV_CALLEE_SAVE_REGS_THUNK(func, ".text") + /* Get a reference to a callee-save function */ #define PV_CALLEE_SAVE(func) \ ((struct paravirt_callee_save) { __raw_callee_save_##func }) @@ -678,23 +681,23 @@ bool __raw_callee_save___native_vcpu_is_preempted(long cpu); ((struct paravirt_callee_save) { func }) #ifdef CONFIG_PARAVIRT_XXL -static inline notrace unsigned long arch_local_save_flags(void) +static __always_inline unsigned long arch_local_save_flags(void) { return PVOP_ALT_CALLEE0(unsigned long, irq.save_fl, "pushf; pop %%rax;", ALT_NOT(X86_FEATURE_XENPV)); } -static inline notrace void arch_local_irq_disable(void) +static __always_inline void arch_local_irq_disable(void) { PVOP_ALT_VCALLEE0(irq.irq_disable, "cli;", ALT_NOT(X86_FEATURE_XENPV)); } -static inline notrace void arch_local_irq_enable(void) +static __always_inline void arch_local_irq_enable(void) { PVOP_ALT_VCALLEE0(irq.irq_enable, "sti;", ALT_NOT(X86_FEATURE_XENPV)); } -static inline notrace unsigned long arch_local_irq_save(void) +static __always_inline unsigned long arch_local_irq_save(void) { unsigned long f; diff --git a/arch/x86/include/asm/pkeys.h b/arch/x86/include/asm/pkeys.h index 5c7bcaa79623..1d5f14aff5f6 100644 --- a/arch/x86/include/asm/pkeys.h +++ b/arch/x86/include/asm/pkeys.h @@ -2,8 +2,6 @@ #ifndef _ASM_X86_PKEYS_H #define _ASM_X86_PKEYS_H -#define ARCH_DEFAULT_PKEY 0 - /* * If more than 16 keys are ever supported, a thorough audit * will be necessary to ensure that the types that store key diff --git a/arch/x86/include/asm/pkru.h b/arch/x86/include/asm/pkru.h index ccc539faa5bb..4cd49afa0ca4 100644 --- a/arch/x86/include/asm/pkru.h +++ b/arch/x86/include/asm/pkru.h @@ -2,7 +2,7 @@ #ifndef _ASM_X86_PKRU_H #define _ASM_X86_PKRU_H -#include <asm/fpu/xstate.h> +#include <asm/cpufeature.h> #define PKRU_AD_BIT 0x1 #define PKRU_WD_BIT 0x2 diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 9ad2acaaae9b..191878a65c61 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -164,7 +164,8 @@ enum cpuid_regs_idx { #define X86_VENDOR_NSC 8 #define X86_VENDOR_HYGON 9 #define X86_VENDOR_ZHAOXIN 10 -#define X86_VENDOR_NUM 11 +#define X86_VENDOR_VORTEX 11 +#define X86_VENDOR_NUM 12 #define X86_VENDOR_UNKNOWN 0xff @@ -461,9 +462,6 @@ DECLARE_PER_CPU(struct irq_stack *, hardirq_stack_ptr); DECLARE_PER_CPU(struct irq_stack *, softirq_stack_ptr); #endif /* !X86_64 */ -extern unsigned int fpu_kernel_xstate_size; -extern unsigned int fpu_user_xstate_size; - struct perf_event; struct thread_struct { @@ -518,6 +516,7 @@ struct thread_struct { */ unsigned long iopl_emul; + unsigned int iopl_warn:1; unsigned int sig_on_uaccess_err:1; /* @@ -537,12 +536,12 @@ struct thread_struct { */ }; -/* Whitelist the FPU state from the task_struct for hardened usercopy. */ +extern void fpu_thread_struct_whitelist(unsigned long *offset, unsigned long *size); + static inline void arch_thread_struct_whitelist(unsigned long *offset, unsigned long *size) { - *offset = offsetof(struct thread_struct, fpu.state); - *size = fpu_kernel_xstate_size; + fpu_thread_struct_whitelist(offset, size); } static inline void @@ -589,7 +588,7 @@ static inline void load_sp0(unsigned long sp0) /* Free all resources held by a thread. */ extern void release_thread(struct task_struct *); -unsigned long get_wchan(struct task_struct *p); +unsigned long __get_wchan(struct task_struct *p); /* * Generic CPUID function diff --git a/arch/x86/include/asm/proto.h b/arch/x86/include/asm/proto.h index 8c5d1910a848..feed36d44d04 100644 --- a/arch/x86/include/asm/proto.h +++ b/arch/x86/include/asm/proto.h @@ -40,6 +40,6 @@ void x86_report_nx(void); extern int reboot_force; long do_arch_prctl_common(struct task_struct *task, int option, - unsigned long cpuid_enabled); + unsigned long arg2); #endif /* _ASM_X86_PROTO_H */ diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h index b94f615600d5..703663175a5a 100644 --- a/arch/x86/include/asm/ptrace.h +++ b/arch/x86/include/asm/ptrace.h @@ -181,7 +181,7 @@ static inline bool any_64bit_mode(struct pt_regs *regs) #define current_user_stack_pointer() current_pt_regs()->sp #define compat_user_stack_pointer() current_pt_regs()->sp -static inline bool ip_within_syscall_gap(struct pt_regs *regs) +static __always_inline bool ip_within_syscall_gap(struct pt_regs *regs) { bool ret = (regs->ip >= (unsigned long)entry_SYSCALL_64 && regs->ip < (unsigned long)entry_SYSCALL_64_safe_stack); diff --git a/arch/x86/include/asm/segment.h b/arch/x86/include/asm/segment.h index 72044026eb3c..8dd8e8ec9fa5 100644 --- a/arch/x86/include/asm/segment.h +++ b/arch/x86/include/asm/segment.h @@ -339,7 +339,7 @@ static inline void __loadsegment_fs(unsigned short value) "1: movw %0, %%fs \n" "2: \n" - _ASM_EXTABLE_HANDLE(1b, 2b, ex_handler_clear_fs) + _ASM_EXTABLE_TYPE(1b, 2b, EX_TYPE_CLEAR_FS) : : "rm" (value) : "memory"); } diff --git a/arch/x86/include/asm/sev.h b/arch/x86/include/asm/sev.h index fa5cd05d3b5b..ec060c433589 100644 --- a/arch/x86/include/asm/sev.h +++ b/arch/x86/include/asm/sev.h @@ -53,6 +53,7 @@ static inline u64 lower_bits(u64 val, unsigned int bits) struct real_mode_header; enum stack_type; +struct ghcb; /* Early IDT entry points for #VC handler */ extern void vc_no_ghcb(void); @@ -81,6 +82,11 @@ static __always_inline void sev_es_nmi_complete(void) __sev_es_nmi_complete(); } extern int __init sev_es_efi_map_ghcbs(pgd_t *pgd); +extern enum es_result sev_es_ghcb_hv_call(struct ghcb *ghcb, + bool set_ghcb_msr, + struct es_em_ctxt *ctxt, + u64 exit_code, u64 exit_info_1, + u64 exit_info_2); #else static inline void sev_es_ist_enter(struct pt_regs *regs) { } static inline void sev_es_ist_exit(void) { } diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h index 630ff08532be..08b0e90623ad 100644 --- a/arch/x86/include/asm/smp.h +++ b/arch/x86/include/asm/smp.h @@ -16,7 +16,9 @@ DECLARE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_core_map); DECLARE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_die_map); /* cpus sharing the last level cache: */ DECLARE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_llc_shared_map); +DECLARE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_l2c_shared_map); DECLARE_PER_CPU_READ_MOSTLY(u16, cpu_llc_id); +DECLARE_PER_CPU_READ_MOSTLY(u16, cpu_l2c_id); DECLARE_PER_CPU_READ_MOSTLY(int, cpu_number); static inline struct cpumask *cpu_llc_shared_mask(int cpu) @@ -24,6 +26,11 @@ static inline struct cpumask *cpu_llc_shared_mask(int cpu) return per_cpu(cpu_llc_shared_map, cpu); } +static inline struct cpumask *cpu_l2c_shared_mask(int cpu) +{ + return per_cpu(cpu_l2c_shared_map, cpu); +} + DECLARE_EARLY_PER_CPU_READ_MOSTLY(u16, x86_cpu_to_apicid); DECLARE_EARLY_PER_CPU_READ_MOSTLY(u32, x86_cpu_to_acpiid); DECLARE_EARLY_PER_CPU_READ_MOSTLY(u16, x86_bios_cpu_apicid); diff --git a/arch/x86/include/asm/special_insns.h b/arch/x86/include/asm/special_insns.h index f3fbb84ff8a7..68c257a3de0d 100644 --- a/arch/x86/include/asm/special_insns.h +++ b/arch/x86/include/asm/special_insns.h @@ -275,7 +275,7 @@ static inline int enqcmds(void __iomem *dst, const void *src) { const struct { char _[64]; } *__src = src; struct { char _[64]; } __iomem *__dst = dst; - int zf; + bool zf; /* * ENQCMDS %(rdx), rax diff --git a/arch/x86/include/asm/stacktrace.h b/arch/x86/include/asm/stacktrace.h index f248eb2ac2d4..3881b5333eb8 100644 --- a/arch/x86/include/asm/stacktrace.h +++ b/arch/x86/include/asm/stacktrace.h @@ -38,6 +38,16 @@ int get_stack_info(unsigned long *stack, struct task_struct *task, bool get_stack_info_noinstr(unsigned long *stack, struct task_struct *task, struct stack_info *info); +static __always_inline +bool get_stack_guard_info(unsigned long *stack, struct stack_info *info) +{ + /* make sure it's not in the stack proper */ + if (get_stack_info_noinstr(stack, current, info)) + return false; + /* but if it is in the page below it, we hit a guard */ + return get_stack_info_noinstr((void *)stack + PAGE_SIZE, current, info); +} + const char *stack_type_name(enum stack_type type); static inline bool on_stack(struct stack_info *info, void *addr, size_t len) diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index cf132663c219..ebec69c35e95 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -57,6 +57,9 @@ struct thread_info { unsigned long flags; /* low level flags */ unsigned long syscall_work; /* SYSCALL_WORK_ flags */ u32 status; /* thread synchronous flags */ +#ifdef CONFIG_SMP + u32 cpu; /* current CPU */ +#endif }; #define INIT_THREAD_INFO(tsk) \ diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h index 9239399e5491..cc164777e661 100644 --- a/arch/x86/include/asm/topology.h +++ b/arch/x86/include/asm/topology.h @@ -103,6 +103,7 @@ static inline void setup_node_to_cpumask_map(void) { } #include <asm-generic/topology.h> extern const struct cpumask *cpu_coregroup_mask(int cpu); +extern const struct cpumask *cpu_clustergroup_mask(int cpu); #define topology_logical_package_id(cpu) (cpu_data(cpu).logical_proc_id) #define topology_physical_package_id(cpu) (cpu_data(cpu).phys_proc_id) @@ -113,7 +114,9 @@ extern const struct cpumask *cpu_coregroup_mask(int cpu); extern unsigned int __max_die_per_package; #ifdef CONFIG_SMP +#define topology_cluster_id(cpu) (per_cpu(cpu_l2c_id, cpu)) #define topology_die_cpumask(cpu) (per_cpu(cpu_die_map, cpu)) +#define topology_cluster_cpumask(cpu) (cpu_clustergroup_mask(cpu)) #define topology_core_cpumask(cpu) (per_cpu(cpu_core_map, cpu)) #define topology_sibling_cpumask(cpu) (per_cpu(cpu_sibling_map, cpu)) diff --git a/arch/x86/include/asm/trace/fpu.h b/arch/x86/include/asm/trace/fpu.h index 879b77792f94..4645a6334063 100644 --- a/arch/x86/include/asm/trace/fpu.h +++ b/arch/x86/include/asm/trace/fpu.h @@ -22,8 +22,8 @@ DECLARE_EVENT_CLASS(x86_fpu, __entry->fpu = fpu; __entry->load_fpu = test_thread_flag(TIF_NEED_FPU_LOAD); if (boot_cpu_has(X86_FEATURE_OSXSAVE)) { - __entry->xfeatures = fpu->state.xsave.header.xfeatures; - __entry->xcomp_bv = fpu->state.xsave.header.xcomp_bv; + __entry->xfeatures = fpu->fpstate->regs.xsave.header.xfeatures; + __entry->xcomp_bv = fpu->fpstate->regs.xsave.header.xcomp_bv; } ), TP_printk("x86/fpu: %p load: %d xfeatures: %llx xcomp_bv: %llx", diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h index 7f7200021bd1..6221be7cafc3 100644 --- a/arch/x86/include/asm/traps.h +++ b/arch/x86/include/asm/traps.h @@ -40,9 +40,9 @@ void math_emulate(struct math_emu_info *); bool fault_in_kernel_space(unsigned long address); #ifdef CONFIG_VMAP_STACK -void __noreturn handle_stack_overflow(const char *message, - struct pt_regs *regs, - unsigned long fault_address); +void __noreturn handle_stack_overflow(struct pt_regs *regs, + unsigned long fault_address, + struct stack_info *info); #endif #endif /* _ASM_X86_TRAPS_H */ diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h index 5c95d242f38d..33a68407def3 100644 --- a/arch/x86/include/asm/uaccess.h +++ b/arch/x86/include/asm/uaccess.h @@ -411,7 +411,7 @@ do { \ : [umem] "m" (__m(addr)), \ [efault] "i" (-EFAULT), "0" (err)) -#endif // CONFIG_CC_ASM_GOTO_OUTPUT +#endif // CONFIG_CC_HAS_ASM_GOTO_OUTPUT /* FIXME: this hack is definitely wrong -AK */ struct __large_struct { unsigned long buf[100]; }; diff --git a/arch/x86/include/asm/unwind.h b/arch/x86/include/asm/unwind.h index 70fc159ebe69..2a1f8734416d 100644 --- a/arch/x86/include/asm/unwind.h +++ b/arch/x86/include/asm/unwind.h @@ -4,6 +4,7 @@ #include <linux/sched.h> #include <linux/ftrace.h> +#include <linux/kprobes.h> #include <asm/ptrace.h> #include <asm/stacktrace.h> @@ -15,6 +16,9 @@ struct unwind_state { unsigned long stack_mask; struct task_struct *task; int graph_idx; +#ifdef CONFIG_KRETPROBES + struct llist_node *kr_cur; +#endif bool error; #if defined(CONFIG_UNWINDER_ORC) bool signal, full_regs; @@ -99,6 +103,31 @@ void unwind_module_init(struct module *mod, void *orc_ip, size_t orc_ip_size, void *orc, size_t orc_size) {} #endif +static inline +unsigned long unwind_recover_kretprobe(struct unwind_state *state, + unsigned long addr, unsigned long *addr_p) +{ +#ifdef CONFIG_KRETPROBES + return is_kretprobe_trampoline(addr) ? + kretprobe_find_ret_addr(state->task, addr_p, &state->kr_cur) : + addr; +#else + return addr; +#endif +} + +/* Recover the return address modified by kretprobe and ftrace_graph. */ +static inline +unsigned long unwind_recover_ret_addr(struct unwind_state *state, + unsigned long addr, unsigned long *addr_p) +{ + unsigned long ret; + + ret = ftrace_graph_ret_addr(state->task, &state->graph_idx, + addr, addr_p); + return unwind_recover_kretprobe(state, ret, addr_p); +} + /* * This disables KASAN checking when reading a value from another task's stack, * since the other task could be running on another CPU and could have poisoned diff --git a/arch/x86/include/asm/unwind_hints.h b/arch/x86/include/asm/unwind_hints.h index 8e574c0afef8..8b33674288ea 100644 --- a/arch/x86/include/asm/unwind_hints.h +++ b/arch/x86/include/asm/unwind_hints.h @@ -52,6 +52,11 @@ UNWIND_HINT sp_reg=ORC_REG_SP sp_offset=8 type=UNWIND_HINT_TYPE_FUNC .endm +#else + +#define UNWIND_HINT_FUNC \ + UNWIND_HINT(ORC_REG_SP, 8, UNWIND_HINT_TYPE_FUNC, 0) + #endif /* __ASSEMBLY__ */ #endif /* _ASM_X86_UNWIND_HINTS_H */ diff --git a/arch/x86/include/asm/xen/hypercall.h b/arch/x86/include/asm/xen/hypercall.h index 454b20815f35..4a7ff8b0db20 100644 --- a/arch/x86/include/asm/xen/hypercall.h +++ b/arch/x86/include/asm/xen/hypercall.h @@ -308,13 +308,13 @@ HYPERVISOR_platform_op(struct xen_platform_op *op) return _hypercall1(int, platform_op, op); } -static inline int +static __always_inline int HYPERVISOR_set_debugreg(int reg, unsigned long value) { return _hypercall2(int, set_debugreg, reg, value); } -static inline unsigned long +static __always_inline unsigned long HYPERVISOR_get_debugreg(int reg) { return _hypercall1(unsigned long, get_debugreg, reg); @@ -358,7 +358,7 @@ HYPERVISOR_event_channel_op(int cmd, void *arg) return _hypercall2(int, event_channel_op, cmd, arg); } -static inline int +static __always_inline int HYPERVISOR_xen_version(int cmd, void *arg) { return _hypercall2(int, xen_version, cmd, arg); diff --git a/arch/x86/include/asm/xen/pci.h b/arch/x86/include/asm/xen/pci.h index 3506d8c598c1..4557f7cb0fa6 100644 --- a/arch/x86/include/asm/xen/pci.h +++ b/arch/x86/include/asm/xen/pci.h @@ -14,16 +14,19 @@ static inline int pci_xen_hvm_init(void) return -1; } #endif -#if defined(CONFIG_XEN_DOM0) +#ifdef CONFIG_XEN_PV_DOM0 int __init pci_xen_initial_domain(void); -int xen_find_device_domain_owner(struct pci_dev *dev); -int xen_register_device_domain_owner(struct pci_dev *dev, uint16_t domain); -int xen_unregister_device_domain_owner(struct pci_dev *dev); #else static inline int __init pci_xen_initial_domain(void) { return -1; } +#endif +#ifdef CONFIG_XEN_DOM0 +int xen_find_device_domain_owner(struct pci_dev *dev); +int xen_register_device_domain_owner(struct pci_dev *dev, uint16_t domain); +int xen_unregister_device_domain_owner(struct pci_dev *dev); +#else static inline int xen_find_device_domain_owner(struct pci_dev *dev) { return -1; diff --git a/arch/x86/include/asm/xen/swiotlb-xen.h b/arch/x86/include/asm/xen/swiotlb-xen.h index 6b56d0d45d15..66b4ddde7743 100644 --- a/arch/x86/include/asm/xen/swiotlb-xen.h +++ b/arch/x86/include/asm/xen/swiotlb-xen.h @@ -3,14 +3,10 @@ #define _ASM_X86_SWIOTLB_XEN_H #ifdef CONFIG_SWIOTLB_XEN -extern int xen_swiotlb; extern int __init pci_xen_swiotlb_detect(void); -extern void __init pci_xen_swiotlb_init(void); extern int pci_xen_swiotlb_init_late(void); #else -#define xen_swiotlb (0) -static inline int __init pci_xen_swiotlb_detect(void) { return 0; } -static inline void __init pci_xen_swiotlb_init(void) { } +#define pci_xen_swiotlb_detect NULL static inline int pci_xen_swiotlb_init_late(void) { return -ENXIO; } #endif |