diff options
Diffstat (limited to 'arch/x86/include')
50 files changed, 579 insertions, 469 deletions
diff --git a/arch/x86/include/asm/Kbuild b/arch/x86/include/asm/Kbuild index b19ec8282d50..1e51650b79d7 100644 --- a/arch/x86/include/asm/Kbuild +++ b/arch/x86/include/asm/Kbuild @@ -3,6 +3,7 @@ generated-y += syscalls_32.h generated-y += syscalls_64.h +generated-y += syscalls_x32.h generated-y += unistd_32_ia32.h generated-y += unistd_64_x32.h generated-y += xen-hypercalls.h diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index 412b51e059c8..48067af94678 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -174,6 +174,7 @@ static inline int apic_is_clustered_box(void) extern int setup_APIC_eilvt(u8 lvt_off, u8 vector, u8 msg_type, u8 mask); extern void lapic_assign_system_vectors(void); extern void lapic_assign_legacy_vector(unsigned int isairq, bool replace); +extern void lapic_update_legacy_vectors(void); extern void lapic_online(void); extern void lapic_offline(void); extern bool apic_needs_pit(void); diff --git a/arch/x86/include/asm/asm.h b/arch/x86/include/asm/asm.h index 0603c7423aca..3ad3da9a7d97 100644 --- a/arch/x86/include/asm/asm.h +++ b/arch/x86/include/asm/asm.h @@ -3,25 +3,26 @@ #define _ASM_X86_ASM_H #ifdef __ASSEMBLY__ -# define __ASM_FORM(x) x -# define __ASM_FORM_RAW(x) x -# define __ASM_FORM_COMMA(x) x, +# define __ASM_FORM(x, ...) x,## __VA_ARGS__ +# define __ASM_FORM_RAW(x, ...) x,## __VA_ARGS__ +# define __ASM_FORM_COMMA(x, ...) x,## __VA_ARGS__, #else #include <linux/stringify.h> - -# define __ASM_FORM(x) " " __stringify(x) " " -# define __ASM_FORM_RAW(x) __stringify(x) -# define __ASM_FORM_COMMA(x) " " __stringify(x) "," +# define __ASM_FORM(x, ...) " " __stringify(x,##__VA_ARGS__) " " +# define __ASM_FORM_RAW(x, ...) __stringify(x,##__VA_ARGS__) +# define __ASM_FORM_COMMA(x, ...) " " __stringify(x,##__VA_ARGS__) "," #endif +#define _ASM_BYTES(x, ...) __ASM_FORM(.byte x,##__VA_ARGS__ ;) + #ifndef __x86_64__ /* 32 bit */ -# define __ASM_SEL(a,b) __ASM_FORM(a) -# define __ASM_SEL_RAW(a,b) __ASM_FORM_RAW(a) +# define __ASM_SEL(a,b) __ASM_FORM(a) +# define __ASM_SEL_RAW(a,b) __ASM_FORM_RAW(a) #else /* 64 bit */ -# define __ASM_SEL(a,b) __ASM_FORM(b) -# define __ASM_SEL_RAW(a,b) __ASM_FORM_RAW(b) +# define __ASM_SEL(a,b) __ASM_FORM(b) +# define __ASM_SEL_RAW(a,b) __ASM_FORM_RAW(b) #endif #define __ASM_SIZE(inst, ...) __ASM_SEL(inst##l##__VA_ARGS__, \ @@ -119,6 +120,8 @@ # define CC_OUT(c) [_cc_ ## c] "=qm" #endif +#ifdef __KERNEL__ + /* Exception table entry */ #ifdef __ASSEMBLY__ # define _ASM_EXTABLE_HANDLE(from, to, handler) \ @@ -185,4 +188,6 @@ register unsigned long current_stack_pointer asm(_ASM_SP); #define ASM_CALL_CONSTRAINT "+r" (current_stack_pointer) #endif /* __ASSEMBLY__ */ +#endif /* __KERNEL__ */ + #endif /* _ASM_X86_ASM_H */ diff --git a/arch/x86/include/asm/atomic.h b/arch/x86/include/asm/atomic.h index f732741ad7c7..5e754e895767 100644 --- a/arch/x86/include/asm/atomic.h +++ b/arch/x86/include/asm/atomic.h @@ -269,6 +269,4 @@ static __always_inline int arch_atomic_fetch_xor(int i, atomic_t *v) # include <asm/atomic64_64.h> #endif -#define ARCH_ATOMIC - #endif /* _ASM_X86_ATOMIC_H */ diff --git a/arch/x86/include/asm/barrier.h b/arch/x86/include/asm/barrier.h index 4819d5e5a335..3ba772a69cc8 100644 --- a/arch/x86/include/asm/barrier.h +++ b/arch/x86/include/asm/barrier.h @@ -54,11 +54,8 @@ static inline unsigned long array_index_mask_nospec(unsigned long index, #define dma_rmb() barrier() #define dma_wmb() barrier() -#ifdef CONFIG_X86_32 -#define __smp_mb() asm volatile("lock; addl $0,-4(%%esp)" ::: "memory", "cc") -#else -#define __smp_mb() asm volatile("lock; addl $0,-4(%%rsp)" ::: "memory", "cc") -#endif +#define __smp_mb() asm volatile("lock; addl $0,-4(%%" _ASM_SP ")" ::: "memory", "cc") + #define __smp_rmb() dma_rmb() #define __smp_wmb() barrier() #define __smp_store_mb(var, value) do { (void)xchg(&var, value); } while (0) diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index ac37830ae941..d0ce5cfd3ac1 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -108,7 +108,7 @@ #define X86_FEATURE_EXTD_APICID ( 3*32+26) /* Extended APICID (8 bits) */ #define X86_FEATURE_AMD_DCM ( 3*32+27) /* AMD multi-node processor */ #define X86_FEATURE_APERFMPERF ( 3*32+28) /* P-State hardware coordination feedback capability (APERF/MPERF MSRs) */ -/* free ( 3*32+29) */ +#define X86_FEATURE_RAPL ( 3*32+29) /* AMD/Hygon RAPL interface */ #define X86_FEATURE_NONSTOP_TSC_S3 ( 3*32+30) /* TSC doesn't stop in S3 state */ #define X86_FEATURE_TSC_KNOWN_FREQ ( 3*32+31) /* TSC has known frequency */ @@ -378,6 +378,7 @@ #define X86_FEATURE_AVX512_VP2INTERSECT (18*32+ 8) /* AVX-512 Intersect for D/Q */ #define X86_FEATURE_SRBDS_CTRL (18*32+ 9) /* "" SRBDS mitigation MSR available */ #define X86_FEATURE_MD_CLEAR (18*32+10) /* VERW clears CPU buffers */ +#define X86_FEATURE_RTM_ALWAYS_ABORT (18*32+11) /* "" RTM transaction always aborts */ #define X86_FEATURE_TSX_FORCE_ABORT (18*32+13) /* "" TSX_FORCE_ABORT */ #define X86_FEATURE_SERIALIZE (18*32+14) /* SERIALIZE instruction */ #define X86_FEATURE_HYBRID_CPU (18*32+15) /* "" This part has CPUs of more than one type */ diff --git a/arch/x86/include/asm/crash.h b/arch/x86/include/asm/crash.h index f58de66091e5..8b6bd63530dc 100644 --- a/arch/x86/include/asm/crash.h +++ b/arch/x86/include/asm/crash.h @@ -9,10 +9,4 @@ int crash_setup_memmap_entries(struct kimage *image, struct boot_params *params); void crash_smp_send_stop(void); -#ifdef CONFIG_KEXEC_CORE -void __init crash_reserve_low_1M(void); -#else -static inline void __init crash_reserve_low_1M(void) { } -#endif - #endif /* _ASM_X86_CRASH_H */ diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h index 476082a83d1c..ab97b22ac04a 100644 --- a/arch/x86/include/asm/desc.h +++ b/arch/x86/include/asm/desc.h @@ -9,6 +9,7 @@ #include <asm/irq_vectors.h> #include <asm/cpu_entry_area.h> +#include <linux/debug_locks.h> #include <linux/smp.h> #include <linux/percpu.h> @@ -224,6 +225,26 @@ static inline void store_idt(struct desc_ptr *dtr) asm volatile("sidt %0":"=m" (*dtr)); } +static inline void native_gdt_invalidate(void) +{ + const struct desc_ptr invalid_gdt = { + .address = 0, + .size = 0 + }; + + native_load_gdt(&invalid_gdt); +} + +static inline void native_idt_invalidate(void) +{ + const struct desc_ptr invalid_idt = { + .address = 0, + .size = 0 + }; + + native_load_idt(&invalid_idt); +} + /* * The LTR instruction marks the TSS GDT entry as busy. On 64-bit, the GDT is * a read-only remapping. To prevent a page fault, the GDT is switched to the @@ -421,12 +442,10 @@ extern bool idt_is_f00f_address(unsigned long address); #ifdef CONFIG_X86_64 extern void idt_setup_early_pf(void); -extern void idt_setup_ist_traps(void); #else static inline void idt_setup_early_pf(void) { } -static inline void idt_setup_ist_traps(void) { } #endif -extern void idt_invalidate(void *addr); +extern void idt_invalidate(void); #endif /* _ASM_X86_DESC_H */ diff --git a/arch/x86/include/asm/disabled-features.h b/arch/x86/include/asm/disabled-features.h index b7dd944dc867..8f28fafa98b3 100644 --- a/arch/x86/include/asm/disabled-features.h +++ b/arch/x86/include/asm/disabled-features.h @@ -56,11 +56,8 @@ # define DISABLE_PTI (1 << (X86_FEATURE_PTI & 31)) #endif -#ifdef CONFIG_IOMMU_SUPPORT -# define DISABLE_ENQCMD 0 -#else -# define DISABLE_ENQCMD (1 << (X86_FEATURE_ENQCMD & 31)) -#endif +/* Force disable because it's broken beyond repair */ +#define DISABLE_ENQCMD (1 << (X86_FEATURE_ENQCMD & 31)) #ifdef CONFIG_X86_SGX # define DISABLE_SGX 0 diff --git a/arch/x86/include/asm/elf.h b/arch/x86/include/asm/elf.h index 7d7500806af8..29fea180a665 100644 --- a/arch/x86/include/asm/elf.h +++ b/arch/x86/include/asm/elf.h @@ -312,6 +312,7 @@ do { \ NEW_AUX_ENT(AT_SYSINFO, VDSO_ENTRY); \ NEW_AUX_ENT(AT_SYSINFO_EHDR, VDSO_CURRENT_BASE); \ } \ + NEW_AUX_ENT(AT_MINSIGSTKSZ, get_sigframe_size()); \ } while (0) /* @@ -328,6 +329,7 @@ extern unsigned long task_size_32bit(void); extern unsigned long task_size_64bit(int full_addr_space); extern unsigned long get_mmap_base(int is_legacy); extern bool mmap_address_hint_valid(unsigned long addr, unsigned long len); +extern unsigned long get_sigframe_size(void); #ifdef CONFIG_X86_32 @@ -349,6 +351,7 @@ do { \ if (vdso64_enabled) \ NEW_AUX_ENT(AT_SYSINFO_EHDR, \ (unsigned long __force)current->mm->context.vdso); \ + NEW_AUX_ENT(AT_MINSIGSTKSZ, get_sigframe_size()); \ } while (0) /* As a historical oddity, the x32 and x86_64 vDSOs are controlled together. */ @@ -357,6 +360,7 @@ do { \ if (vdso64_enabled) \ NEW_AUX_ENT(AT_SYSINFO_EHDR, \ (unsigned long __force)current->mm->context.vdso); \ + NEW_AUX_ENT(AT_MINSIGSTKSZ, get_sigframe_size()); \ } while (0) #define AT_SYSINFO 32 diff --git a/arch/x86/include/asm/fpu/api.h b/arch/x86/include/asm/fpu/api.h index ed33a14188f6..23bef08a8388 100644 --- a/arch/x86/include/asm/fpu/api.h +++ b/arch/x86/include/asm/fpu/api.h @@ -106,10 +106,6 @@ extern int cpu_has_xfeatures(u64 xfeatures_mask, const char **feature_name); */ #define PASID_DISABLED 0 -#ifdef CONFIG_IOMMU_SUPPORT -/* Update current's PASID MSR/state by mm's PASID. */ -void update_pasid(void); -#else static inline void update_pasid(void) { } -#endif + #endif /* _ASM_X86_FPU_API_H */ diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h index 8d33ad80704f..5a18694a89b2 100644 --- a/arch/x86/include/asm/fpu/internal.h +++ b/arch/x86/include/asm/fpu/internal.h @@ -26,16 +26,17 @@ /* * High level FPU state handling functions: */ -extern void fpu__prepare_read(struct fpu *fpu); -extern void fpu__prepare_write(struct fpu *fpu); -extern void fpu__save(struct fpu *fpu); extern int fpu__restore_sig(void __user *buf, int ia32_frame); extern void fpu__drop(struct fpu *fpu); -extern int fpu__copy(struct task_struct *dst, struct task_struct *src); extern void fpu__clear_user_states(struct fpu *fpu); -extern void fpu__clear_all(struct fpu *fpu); extern int fpu__exception_code(struct fpu *fpu, int trap_nr); +extern void fpu_sync_fpstate(struct fpu *fpu); + +/* Clone and exit operations */ +extern int fpu_clone(struct task_struct *dst); +extern void fpu_flush_thread(void); + /* * Boot time FPU initialization functions: */ @@ -45,7 +46,6 @@ extern void fpu__init_cpu_xstate(void); extern void fpu__init_system(struct cpuinfo_x86 *c); extern void fpu__init_check_bugs(void); extern void fpu__resume_cpu(void); -extern u64 fpu__get_supported_xfeatures_mask(void); /* * Debugging facility: @@ -86,23 +86,9 @@ extern void fpstate_init_soft(struct swregs_state *soft); #else static inline void fpstate_init_soft(struct swregs_state *soft) {} #endif +extern void save_fpregs_to_fpstate(struct fpu *fpu); -static inline void fpstate_init_xstate(struct xregs_state *xsave) -{ - /* - * XRSTORS requires these bits set in xcomp_bv, or it will - * trigger #GP: - */ - xsave->header.xcomp_bv = XCOMP_BV_COMPACTED_FORMAT | xfeatures_mask_all; -} - -static inline void fpstate_init_fxstate(struct fxregs_state *fx) -{ - fx->cwd = 0x37f; - fx->mxcsr = MXCSR_DEFAULT; -} -extern void fpstate_sanitize_xstate(struct fpu *fpu); - +/* Returns 0 or the negated trap number, which results in -EFAULT for #PF */ #define user_insn(insn, output, input...) \ ({ \ int err; \ @@ -110,14 +96,14 @@ extern void fpstate_sanitize_xstate(struct fpu *fpu); might_fault(); \ \ asm volatile(ASM_STAC "\n" \ - "1:" #insn "\n\t" \ + "1: " #insn "\n" \ "2: " ASM_CLAC "\n" \ ".section .fixup,\"ax\"\n" \ - "3: movl $-1,%[err]\n" \ + "3: negl %%eax\n" \ " jmp 2b\n" \ ".previous\n" \ - _ASM_EXTABLE(1b, 3b) \ - : [err] "=r" (err), output \ + _ASM_EXTABLE_FAULT(1b, 3b) \ + : [err] "=a" (err), output \ : "0"(0), input); \ err; \ }) @@ -143,12 +129,12 @@ extern void fpstate_sanitize_xstate(struct fpu *fpu); _ASM_EXTABLE_HANDLE(1b, 2b, ex_handler_fprestore) \ : output : input) -static inline int copy_fregs_to_user(struct fregs_state __user *fx) +static inline int fnsave_to_user_sigframe(struct fregs_state __user *fx) { return user_insn(fnsave %[fx]; fwait, [fx] "=m" (*fx), "m" (*fx)); } -static inline int copy_fxregs_to_user(struct fxregs_state __user *fx) +static inline int fxsave_to_user_sigframe(struct fxregs_state __user *fx) { if (IS_ENABLED(CONFIG_X86_32)) return user_insn(fxsave %[fx], [fx] "=m" (*fx), "m" (*fx)); @@ -157,7 +143,7 @@ static inline int copy_fxregs_to_user(struct fxregs_state __user *fx) } -static inline void copy_kernel_to_fxregs(struct fxregs_state *fx) +static inline void fxrstor(struct fxregs_state *fx) { if (IS_ENABLED(CONFIG_X86_32)) kernel_insn(fxrstor %[fx], "=m" (*fx), [fx] "m" (*fx)); @@ -165,7 +151,7 @@ static inline void copy_kernel_to_fxregs(struct fxregs_state *fx) kernel_insn(fxrstorq %[fx], "=m" (*fx), [fx] "m" (*fx)); } -static inline int copy_kernel_to_fxregs_err(struct fxregs_state *fx) +static inline int fxrstor_safe(struct fxregs_state *fx) { if (IS_ENABLED(CONFIG_X86_32)) return kernel_insn_err(fxrstor %[fx], "=m" (*fx), [fx] "m" (*fx)); @@ -173,7 +159,7 @@ static inline int copy_kernel_to_fxregs_err(struct fxregs_state *fx) return kernel_insn_err(fxrstorq %[fx], "=m" (*fx), [fx] "m" (*fx)); } -static inline int copy_user_to_fxregs(struct fxregs_state __user *fx) +static inline int fxrstor_from_user_sigframe(struct fxregs_state __user *fx) { if (IS_ENABLED(CONFIG_X86_32)) return user_insn(fxrstor %[fx], "=m" (*fx), [fx] "m" (*fx)); @@ -181,27 +167,27 @@ static inline int copy_user_to_fxregs(struct fxregs_state __user *fx) return user_insn(fxrstorq %[fx], "=m" (*fx), [fx] "m" (*fx)); } -static inline void copy_kernel_to_fregs(struct fregs_state *fx) +static inline void frstor(struct fregs_state *fx) { kernel_insn(frstor %[fx], "=m" (*fx), [fx] "m" (*fx)); } -static inline int copy_kernel_to_fregs_err(struct fregs_state *fx) +static inline int frstor_safe(struct fregs_state *fx) { return kernel_insn_err(frstor %[fx], "=m" (*fx), [fx] "m" (*fx)); } -static inline int copy_user_to_fregs(struct fregs_state __user *fx) +static inline int frstor_from_user_sigframe(struct fregs_state __user *fx) { return user_insn(frstor %[fx], "=m" (*fx), [fx] "m" (*fx)); } -static inline void copy_fxregs_to_kernel(struct fpu *fpu) +static inline void fxsave(struct fxregs_state *fx) { if (IS_ENABLED(CONFIG_X86_32)) - asm volatile( "fxsave %[fx]" : [fx] "=m" (fpu->state.fxsave)); + asm volatile( "fxsave %[fx]" : [fx] "=m" (*fx)); else - asm volatile("fxsaveq %[fx]" : [fx] "=m" (fpu->state.fxsave)); + asm volatile("fxsaveq %[fx]" : [fx] "=m" (*fx)); } /* These macros all use (%edi)/(%rdi) as the single memory argument. */ @@ -211,16 +197,20 @@ static inline void copy_fxregs_to_kernel(struct fpu *fpu) #define XRSTOR ".byte " REX_PREFIX "0x0f,0xae,0x2f" #define XRSTORS ".byte " REX_PREFIX "0x0f,0xc7,0x1f" +/* + * After this @err contains 0 on success or the negated trap number when + * the operation raises an exception. For faults this results in -EFAULT. + */ #define XSTATE_OP(op, st, lmask, hmask, err) \ asm volatile("1:" op "\n\t" \ "xor %[err], %[err]\n" \ "2:\n\t" \ ".pushsection .fixup,\"ax\"\n\t" \ - "3: movl $-2,%[err]\n\t" \ + "3: negl %%eax\n\t" \ "jmp 2b\n\t" \ ".popsection\n\t" \ - _ASM_EXTABLE(1b, 3b) \ - : [err] "=r" (err) \ + _ASM_EXTABLE_FAULT(1b, 3b) \ + : [err] "=a" (err) \ : "D" (st), "m" (*st), "a" (lmask), "d" (hmask) \ : "memory") @@ -272,31 +262,9 @@ static inline void copy_fxregs_to_kernel(struct fpu *fpu) * This function is called only during boot time when x86 caps are not set * up and alternative can not be used yet. */ -static inline void copy_xregs_to_kernel_booting(struct xregs_state *xstate) +static inline void os_xrstor_booting(struct xregs_state *xstate) { - u64 mask = xfeatures_mask_all; - u32 lmask = mask; - u32 hmask = mask >> 32; - int err; - - WARN_ON(system_state != SYSTEM_BOOTING); - - if (boot_cpu_has(X86_FEATURE_XSAVES)) - XSTATE_OP(XSAVES, xstate, lmask, hmask, err); - else - XSTATE_OP(XSAVE, xstate, lmask, hmask, err); - - /* We should never fault when copying to a kernel buffer: */ - WARN_ON_FPU(err); -} - -/* - * This function is called only during boot time when x86 caps are not set - * up and alternative can not be used yet. - */ -static inline void copy_kernel_to_xregs_booting(struct xregs_state *xstate) -{ - u64 mask = -1; + u64 mask = xfeatures_mask_fpstate(); u32 lmask = mask; u32 hmask = mask >> 32; int err; @@ -317,8 +285,11 @@ static inline void copy_kernel_to_xregs_booting(struct xregs_state *xstate) /* * Save processor xstate to xsave area. + * + * Uses either XSAVE or XSAVEOPT or XSAVES depending on the CPU features + * and command line options. The choice is permanent until the next reboot. */ -static inline void copy_xregs_to_kernel(struct xregs_state *xstate) +static inline void os_xsave(struct xregs_state *xstate) { u64 mask = xfeatures_mask_all; u32 lmask = mask; @@ -335,8 +306,10 @@ static inline void copy_xregs_to_kernel(struct xregs_state *xstate) /* * Restore processor xstate from xsave area. + * + * Uses XRSTORS when XSAVES is used, XRSTOR otherwise. */ -static inline void copy_kernel_to_xregs(struct xregs_state *xstate, u64 mask) +static inline void os_xrstor(struct xregs_state *xstate, u64 mask) { u32 lmask = mask; u32 hmask = mask >> 32; @@ -354,9 +327,14 @@ static inline void copy_kernel_to_xregs(struct xregs_state *xstate, u64 mask) * backward compatibility for old applications which don't understand * compacted format of xsave area. */ -static inline int copy_xregs_to_user(struct xregs_state __user *buf) +static inline int xsave_to_user_sigframe(struct xregs_state __user *buf) { - u64 mask = xfeatures_mask_user(); + /* + * Include the features which are not xsaved/rstored by the kernel + * internally, e.g. PKRU. That's user space ABI and also required + * to allow the signal handler to modify PKRU. + */ + u64 mask = xfeatures_mask_uabi(); u32 lmask = mask; u32 hmask = mask >> 32; int err; @@ -379,7 +357,7 @@ static inline int copy_xregs_to_user(struct xregs_state __user *buf) /* * Restore xstate from user space xsave area. */ -static inline int copy_user_to_xregs(struct xregs_state __user *buf, u64 mask) +static inline int xrstor_from_user_sigframe(struct xregs_state __user *buf, u64 mask) { struct xregs_state *xstate = ((__force struct xregs_state *)buf); u32 lmask = mask; @@ -397,13 +375,13 @@ static inline int copy_user_to_xregs(struct xregs_state __user *buf, u64 mask) * Restore xstate from kernel space xsave area, return an error code instead of * an exception. */ -static inline int copy_kernel_to_xregs_err(struct xregs_state *xstate, u64 mask) +static inline int os_xrstor_safe(struct xregs_state *xstate, u64 mask) { u32 lmask = mask; u32 hmask = mask >> 32; int err; - if (static_cpu_has(X86_FEATURE_XSAVES)) + if (cpu_feature_enabled(X86_FEATURE_XSAVES)) XSTATE_OP(XRSTORS, xstate, lmask, hmask, err); else XSTATE_OP(XRSTOR, xstate, lmask, hmask, err); @@ -411,36 +389,11 @@ static inline int copy_kernel_to_xregs_err(struct xregs_state *xstate, u64 mask) return err; } -extern int copy_fpregs_to_fpstate(struct fpu *fpu); +extern void __restore_fpregs_from_fpstate(union fpregs_state *fpstate, u64 mask); -static inline void __copy_kernel_to_fpregs(union fpregs_state *fpstate, u64 mask) +static inline void restore_fpregs_from_fpstate(union fpregs_state *fpstate) { - if (use_xsave()) { - copy_kernel_to_xregs(&fpstate->xsave, mask); - } else { - if (use_fxsr()) - copy_kernel_to_fxregs(&fpstate->fxsave); - else - copy_kernel_to_fregs(&fpstate->fsave); - } -} - -static inline void copy_kernel_to_fpregs(union fpregs_state *fpstate) -{ - /* - * AMD K7/K8 CPUs don't save/restore FDP/FIP/FOP unless an exception is - * pending. Clear the x87 state here by setting it to fixed values. - * "m" is a random variable that should be in L1. - */ - if (unlikely(static_cpu_has_bug(X86_BUG_FXSAVE_LEAK))) { - asm volatile( - "fnclex\n\t" - "emms\n\t" - "fildl %P[addr]" /* set F?P to defined value */ - : : [addr] "m" (fpstate)); - } - - __copy_kernel_to_fpregs(fpstate, -1); + __restore_fpregs_from_fpstate(fpstate, xfeatures_mask_fpstate()); } extern int copy_fpstate_to_sigframe(void __user *buf, void __user *fp, int size); @@ -499,10 +452,8 @@ static inline void fpregs_activate(struct fpu *fpu) trace_x86_fpu_regs_activated(fpu); } -/* - * Internal helper, do not use directly. Use switch_fpu_return() instead. - */ -static inline void __fpregs_load_activate(void) +/* Internal helper for switch_fpu_return() and signal frame setup */ +static inline void fpregs_restore_userregs(void) { struct fpu *fpu = ¤t->thread.fpu; int cpu = smp_processor_id(); @@ -511,7 +462,21 @@ static inline void __fpregs_load_activate(void) return; if (!fpregs_state_valid(fpu, cpu)) { - copy_kernel_to_fpregs(&fpu->state); + u64 mask; + + /* + * This restores _all_ xstate which has not been + * established yet. + * + * If PKRU is enabled, then the PKRU value is already + * correct because it was either set in switch_to() or in + * flush_thread(). So it is excluded because it might be + * not up to date in current->thread.fpu.xsave state. + */ + mask = xfeatures_mask_restore_user() | + xfeatures_mask_supervisor(); + __restore_fpregs_from_fpstate(&fpu->state, mask); + fpregs_activate(fpu); fpu->last_cpu = cpu; } @@ -543,12 +508,17 @@ static inline void __fpregs_load_activate(void) static inline void switch_fpu_prepare(struct fpu *old_fpu, int cpu) { if (static_cpu_has(X86_FEATURE_FPU) && !(current->flags & PF_KTHREAD)) { - if (!copy_fpregs_to_fpstate(old_fpu)) - old_fpu->last_cpu = -1; - else - old_fpu->last_cpu = cpu; + save_fpregs_to_fpstate(old_fpu); + /* + * The save operation preserved register state, so the + * fpu_fpregs_owner_ctx is still @old_fpu. Store the + * current CPU number in @old_fpu, so the next return + * to user space can avoid the FPU register restore + * when is returns on the same CPU and still owns the + * context. + */ + old_fpu->last_cpu = cpu; - /* But leave fpu_fpregs_owner_ctx! */ trace_x86_fpu_regs_deactivated(old_fpu); } } @@ -558,39 +528,13 @@ static inline void switch_fpu_prepare(struct fpu *old_fpu, int cpu) */ /* - * Load PKRU from the FPU context if available. Delay loading of the - * complete FPU state until the return to userland. + * Delay loading of the complete FPU state until the return to userland. + * PKRU is handled separately. */ static inline void switch_fpu_finish(struct fpu *new_fpu) { - u32 pkru_val = init_pkru_value; - struct pkru_state *pk; - - if (!static_cpu_has(X86_FEATURE_FPU)) - return; - - set_thread_flag(TIF_NEED_FPU_LOAD); - - if (!cpu_feature_enabled(X86_FEATURE_OSPKE)) - return; - - /* - * PKRU state is switched eagerly because it needs to be valid before we - * return to userland e.g. for a copy_to_user() operation. - */ - if (current->mm) { - pk = get_xsave_addr(&new_fpu->state.xsave, XFEATURE_PKRU); - if (pk) - pkru_val = pk->pkru; - } - __write_pkru(pkru_val); - - /* - * Expensive PASID MSR write will be avoided in update_pasid() because - * TIF_NEED_FPU_LOAD was set. And the PASID state won't be updated - * unless it's different from mm->pasid to reduce overhead. - */ - update_pasid(); + if (cpu_feature_enabled(X86_FEATURE_FPU)) + set_thread_flag(TIF_NEED_FPU_LOAD); } #endif /* _ASM_X86_FPU_INTERNAL_H */ diff --git a/arch/x86/include/asm/fpu/signal.h b/arch/x86/include/asm/fpu/signal.h index 7fb516b6893a..8b6631dffefd 100644 --- a/arch/x86/include/asm/fpu/signal.h +++ b/arch/x86/include/asm/fpu/signal.h @@ -29,6 +29,8 @@ unsigned long fpu__alloc_mathframe(unsigned long sp, int ia32_frame, unsigned long *buf_fx, unsigned long *size); +unsigned long fpu__get_fpstate_size(void); + extern void fpu__init_prepare_fx_sw_frame(void); #endif /* _ASM_X86_FPU_SIGNAL_H */ diff --git a/arch/x86/include/asm/fpu/xstate.h b/arch/x86/include/asm/fpu/xstate.h index 47a92232d595..109dfcc75299 100644 --- a/arch/x86/include/asm/fpu/xstate.h +++ b/arch/x86/include/asm/fpu/xstate.h @@ -6,6 +6,7 @@ #include <linux/types.h> #include <asm/processor.h> +#include <asm/fpu/api.h> #include <asm/user.h> /* Bit 63 of XCR0 is reserved for future expansion */ @@ -34,6 +35,14 @@ XFEATURE_MASK_BNDREGS | \ XFEATURE_MASK_BNDCSR) +/* + * Features which are restored when returning to user space. + * PKRU is not restored on return to user space because PKRU + * is switched eagerly in switch_to() and flush_thread() + */ +#define XFEATURE_MASK_USER_RESTORE \ + (XFEATURE_MASK_USER_SUPPORTED & ~XFEATURE_MASK_PKRU) + /* All currently supported supervisor features */ #define XFEATURE_MASK_SUPERVISOR_SUPPORTED (XFEATURE_MASK_PASID) @@ -42,21 +51,21 @@ * and its size may be huge. Saving/restoring such supervisor state components * at each context switch can cause high CPU and space overhead, which should * be avoided. Such supervisor state components should only be saved/restored - * on demand. The on-demand dynamic supervisor features are set in this mask. + * on demand. The on-demand supervisor features are set in this mask. * - * Unlike the existing supported supervisor features, a dynamic supervisor + * Unlike the existing supported supervisor features, an independent supervisor * feature does not allocate a buffer in task->fpu, and the corresponding * supervisor state component cannot be saved/restored at each context switch. * - * To support a dynamic supervisor feature, a developer should follow the + * To support an independent supervisor feature, a developer should follow the * dos and don'ts as below: * - Do dynamically allocate a buffer for the supervisor state component. * - Do manually invoke the XSAVES/XRSTORS instruction to save/restore the * state component to/from the buffer. - * - Don't set the bit corresponding to the dynamic supervisor feature in + * - Don't set the bit corresponding to the independent supervisor feature in * IA32_XSS at run time, since it has been set at boot time. */ -#define XFEATURE_MASK_DYNAMIC (XFEATURE_MASK_LBR) +#define XFEATURE_MASK_INDEPENDENT (XFEATURE_MASK_LBR) /* * Unsupported supervisor features. When a supervisor feature in this mask is @@ -66,7 +75,7 @@ /* All supervisor states including supported and unsupported states. */ #define XFEATURE_MASK_SUPERVISOR_ALL (XFEATURE_MASK_SUPERVISOR_SUPPORTED | \ - XFEATURE_MASK_DYNAMIC | \ + XFEATURE_MASK_INDEPENDENT | \ XFEATURE_MASK_SUPERVISOR_UNSUPPORTED) #ifdef CONFIG_X86_64 @@ -82,17 +91,42 @@ static inline u64 xfeatures_mask_supervisor(void) return xfeatures_mask_all & XFEATURE_MASK_SUPERVISOR_SUPPORTED; } -static inline u64 xfeatures_mask_user(void) +/* + * The xfeatures which are enabled in XCR0 and expected to be in ptrace + * buffers and signal frames. + */ +static inline u64 xfeatures_mask_uabi(void) { return xfeatures_mask_all & XFEATURE_MASK_USER_SUPPORTED; } -static inline u64 xfeatures_mask_dynamic(void) +/* + * The xfeatures which are restored by the kernel when returning to user + * mode. This is not necessarily the same as xfeatures_mask_uabi() as the + * kernel does not manage all XCR0 enabled features via xsave/xrstor as + * some of them have to be switched eagerly on context switch and exec(). + */ +static inline u64 xfeatures_mask_restore_user(void) +{ + return xfeatures_mask_all & XFEATURE_MASK_USER_RESTORE; +} + +/* + * Like xfeatures_mask_restore_user() but additionally restors the + * supported supervisor states. + */ +static inline u64 xfeatures_mask_fpstate(void) +{ + return xfeatures_mask_all & \ + (XFEATURE_MASK_USER_RESTORE | XFEATURE_MASK_SUPERVISOR_SUPPORTED); +} + +static inline u64 xfeatures_mask_independent(void) { if (!boot_cpu_has(X86_FEATURE_ARCH_LBR)) - return XFEATURE_MASK_DYNAMIC & ~XFEATURE_MASK_LBR; + return XFEATURE_MASK_INDEPENDENT & ~XFEATURE_MASK_LBR; - return XFEATURE_MASK_DYNAMIC; + return XFEATURE_MASK_INDEPENDENT; } extern u64 xstate_fx_sw_bytes[USER_XSTATE_FX_SW_WORDS]; @@ -101,19 +135,21 @@ extern void __init update_regset_xstate_info(unsigned int size, u64 xstate_mask); void *get_xsave_addr(struct xregs_state *xsave, int xfeature_nr); -const void *get_xsave_field_ptr(int xfeature_nr); -int using_compacted_format(void); int xfeature_size(int xfeature_nr); -struct membuf; -void copy_xstate_to_kernel(struct membuf to, struct xregs_state *xsave); -int copy_kernel_to_xstate(struct xregs_state *xsave, const void *kbuf); -int copy_user_to_xstate(struct xregs_state *xsave, const void __user *ubuf); -void copy_supervisor_to_kernel(struct xregs_state *xsave); -void copy_dynamic_supervisor_to_kernel(struct xregs_state *xstate, u64 mask); -void copy_kernel_to_dynamic_supervisor(struct xregs_state *xstate, u64 mask); +int copy_uabi_from_kernel_to_xstate(struct xregs_state *xsave, const void *kbuf); +int copy_sigframe_from_user_to_xstate(struct xregs_state *xsave, const void __user *ubuf); +void xsaves(struct xregs_state *xsave, u64 mask); +void xrstors(struct xregs_state *xsave, u64 mask); -/* Validate an xstate header supplied by userspace (ptrace or sigreturn) */ -int validate_user_xstate_header(const struct xstate_header *hdr); +enum xstate_copy_mode { + XSTATE_COPY_FP, + XSTATE_COPY_FX, + XSTATE_COPY_XSAVE, +}; + +struct membuf; +void copy_xstate_to_uabi_buf(struct membuf to, struct task_struct *tsk, + enum xstate_copy_mode mode); #endif diff --git a/arch/x86/include/asm/hyperv-tlfs.h b/arch/x86/include/asm/hyperv-tlfs.h index 606f5cc579b2..f1366ce609e3 100644 --- a/arch/x86/include/asm/hyperv-tlfs.h +++ b/arch/x86/include/asm/hyperv-tlfs.h @@ -52,7 +52,7 @@ * Support for passing hypercall input parameter block via XMM * registers is available */ -#define HV_X64_HYPERCALL_PARAMS_XMM_AVAILABLE BIT(4) +#define HV_X64_HYPERCALL_XMM_INPUT_AVAILABLE BIT(4) /* Support for a virtual guest idle state is available */ #define HV_X64_GUEST_IDLE_STATE_AVAILABLE BIT(5) /* Frequency MSRs available */ @@ -61,6 +61,11 @@ #define HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE BIT(10) /* Support for debug MSRs available */ #define HV_FEATURE_DEBUG_MSRS_AVAILABLE BIT(11) +/* + * Support for returning hypercall output block via XMM + * registers is available + */ +#define HV_X64_HYPERCALL_XMM_OUTPUT_AVAILABLE BIT(15) /* stimer Direct Mode is available */ #define HV_STIMER_DIRECT_MODE_AVAILABLE BIT(19) @@ -133,6 +138,15 @@ #define HV_X64_NESTED_GUEST_MAPPING_FLUSH BIT(18) #define HV_X64_NESTED_MSR_BITMAP BIT(19) +/* + * This is specific to AMD and specifies that enlightened TLB flush is + * supported. If guest opts in to this feature, ASID invalidations only + * flushes gva -> hpa mapping entries. To flush the TLB entries derived + * from NPT, hypercalls should be used (HvFlushGuestPhysicalAddressSpace + * or HvFlushGuestPhysicalAddressList). + */ +#define HV_X64_NESTED_ENLIGHTENED_TLB BIT(22) + /* HYPERV_CPUID_ISOLATION_CONFIG.EAX bits. */ #define HV_PARAVISOR_PRESENT BIT(0) @@ -314,6 +328,9 @@ struct hv_tsc_emulation_status { #define HV_X64_MSR_TSC_REFERENCE_ENABLE 0x00000001 #define HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT 12 +/* Number of XMM registers used in hypercall input/output */ +#define HV_HYPERCALL_MAX_XMM_REGISTERS 6 + struct hv_nested_enlightenments_control { struct { __u32 directhypercall:1; diff --git a/arch/x86/include/asm/idtentry.h b/arch/x86/include/asm/idtentry.h index 73d45b0dfff2..1345088e9902 100644 --- a/arch/x86/include/asm/idtentry.h +++ b/arch/x86/include/asm/idtentry.h @@ -312,8 +312,8 @@ static __always_inline void __##func(struct pt_regs *regs) */ #define DECLARE_IDTENTRY_VC(vector, func) \ DECLARE_IDTENTRY_RAW_ERRORCODE(vector, func); \ - __visible noinstr void ist_##func(struct pt_regs *regs, unsigned long error_code); \ - __visible noinstr void safe_stack_##func(struct pt_regs *regs, unsigned long error_code) + __visible noinstr void kernel_##func(struct pt_regs *regs, unsigned long error_code); \ + __visible noinstr void user_##func(struct pt_regs *regs, unsigned long error_code) /** * DEFINE_IDTENTRY_IST - Emit code for IST entry points @@ -355,33 +355,24 @@ static __always_inline void __##func(struct pt_regs *regs) DEFINE_IDTENTRY_RAW_ERRORCODE(func) /** - * DEFINE_IDTENTRY_VC_SAFE_STACK - Emit code for VMM communication handler - which runs on a safe stack. + * DEFINE_IDTENTRY_VC_KERNEL - Emit code for VMM communication handler + when raised from kernel mode * @func: Function name of the entry point * * Maps to DEFINE_IDTENTRY_RAW_ERRORCODE */ -#define DEFINE_IDTENTRY_VC_SAFE_STACK(func) \ - DEFINE_IDTENTRY_RAW_ERRORCODE(safe_stack_##func) +#define DEFINE_IDTENTRY_VC_KERNEL(func) \ + DEFINE_IDTENTRY_RAW_ERRORCODE(kernel_##func) /** - * DEFINE_IDTENTRY_VC_IST - Emit code for VMM communication handler - which runs on the VC fall-back stack + * DEFINE_IDTENTRY_VC_USER - Emit code for VMM communication handler + when raised from user mode * @func: Function name of the entry point * * Maps to DEFINE_IDTENTRY_RAW_ERRORCODE */ -#define DEFINE_IDTENTRY_VC_IST(func) \ - DEFINE_IDTENTRY_RAW_ERRORCODE(ist_##func) - -/** - * DEFINE_IDTENTRY_VC - Emit code for VMM communication handler - * @func: Function name of the entry point - * - * Maps to DEFINE_IDTENTRY_RAW_ERRORCODE - */ -#define DEFINE_IDTENTRY_VC(func) \ - DEFINE_IDTENTRY_RAW_ERRORCODE(func) +#define DEFINE_IDTENTRY_VC_USER(func) \ + DEFINE_IDTENTRY_RAW_ERRORCODE(user_##func) #else /* CONFIG_X86_64 */ @@ -504,7 +495,7 @@ __visible noinstr void func(struct pt_regs *regs, \ .align 8 SYM_CODE_START(irq_entries_start) vector=FIRST_EXTERNAL_VECTOR - .rept (FIRST_SYSTEM_VECTOR - FIRST_EXTERNAL_VECTOR) + .rept NR_EXTERNAL_VECTORS UNWIND_HINT_IRET_REGS 0 : .byte 0x6a, vector @@ -520,7 +511,7 @@ SYM_CODE_END(irq_entries_start) .align 8 SYM_CODE_START(spurious_entries_start) vector=FIRST_SYSTEM_VECTOR - .rept (NR_VECTORS - FIRST_SYSTEM_VECTOR) + .rept NR_SYSTEM_VECTORS UNWIND_HINT_IRET_REGS 0 : .byte 0x6a, vector diff --git a/arch/x86/include/asm/intel-family.h b/arch/x86/include/asm/intel-family.h index 955b06d6325a..27158436f322 100644 --- a/arch/x86/include/asm/intel-family.h +++ b/arch/x86/include/asm/intel-family.h @@ -102,7 +102,8 @@ #define INTEL_FAM6_TIGERLAKE_L 0x8C /* Willow Cove */ #define INTEL_FAM6_TIGERLAKE 0x8D /* Willow Cove */ -#define INTEL_FAM6_SAPPHIRERAPIDS_X 0x8F /* Willow Cove */ + +#define INTEL_FAM6_SAPPHIRERAPIDS_X 0x8F /* Golden Cove */ #define INTEL_FAM6_ALDERLAKE 0x97 /* Golden Cove / Gracemont */ #define INTEL_FAM6_ALDERLAKE_L 0x9A /* Golden Cove / Gracemont */ diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h index 889f8b1b5b7f..43dcb9284208 100644 --- a/arch/x86/include/asm/irq_vectors.h +++ b/arch/x86/include/asm/irq_vectors.h @@ -26,8 +26,8 @@ * This file enumerates the exact layout of them: */ +/* This is used as an interrupt vector when programming the APIC. */ #define NMI_VECTOR 0x02 -#define MCE_VECTOR 0x12 /* * IDT vectors usable for external interrupt sources start at 0x20. @@ -84,7 +84,7 @@ */ #define IRQ_WORK_VECTOR 0xf6 -#define UV_BAU_MESSAGE 0xf5 +/* 0xf5 - unused, was UV_BAU_MESSAGE */ #define DEFERRED_ERROR_VECTOR 0xf4 /* Vector on which hypervisor callbacks will be delivered */ @@ -114,6 +114,9 @@ #define FIRST_SYSTEM_VECTOR NR_VECTORS #endif +#define NR_EXTERNAL_VECTORS (FIRST_SYSTEM_VECTOR - FIRST_EXTERNAL_VECTOR) +#define NR_SYSTEM_VECTORS (NR_VECTORS - FIRST_SYSTEM_VECTOR) + /* * Size the maximum number of interrupts. * diff --git a/arch/x86/include/asm/jump_label.h b/arch/x86/include/asm/jump_label.h index 610a05374c02..0449b125d27f 100644 --- a/arch/x86/include/asm/jump_label.h +++ b/arch/x86/include/asm/jump_label.h @@ -4,8 +4,6 @@ #define HAVE_JUMP_LABEL_BATCH -#define JUMP_LABEL_NOP_SIZE 5 - #include <asm/asm.h> #include <asm/nops.h> @@ -14,15 +12,35 @@ #include <linux/stringify.h> #include <linux/types.h> +#define JUMP_TABLE_ENTRY \ + ".pushsection __jump_table, \"aw\" \n\t" \ + _ASM_ALIGN "\n\t" \ + ".long 1b - . \n\t" \ + ".long %l[l_yes] - . \n\t" \ + _ASM_PTR "%c0 + %c1 - .\n\t" \ + ".popsection \n\t" + +#ifdef CONFIG_STACK_VALIDATION + +static __always_inline bool arch_static_branch(struct static_key *key, bool branch) +{ + asm_volatile_goto("1:" + "jmp %l[l_yes] # objtool NOPs this \n\t" + JUMP_TABLE_ENTRY + : : "i" (key), "i" (2 | branch) : : l_yes); + + return false; +l_yes: + return true; +} + +#else + static __always_inline bool arch_static_branch(struct static_key * const key, const bool branch) { asm_volatile_goto("1:" ".byte " __stringify(BYTES_NOP5) "\n\t" - ".pushsection __jump_table, \"aw\" \n\t" - _ASM_ALIGN "\n\t" - ".long 1b - ., %l[l_yes] - . \n\t" - _ASM_PTR "%c0 + %c1 - .\n\t" - ".popsection \n\t" + JUMP_TABLE_ENTRY : : "i" (key), "i" (branch) : : l_yes); return false; @@ -30,16 +48,13 @@ l_yes: return true; } +#endif /* STACK_VALIDATION */ + static __always_inline bool arch_static_branch_jump(struct static_key * const key, const bool branch) { asm_volatile_goto("1:" - ".byte 0xe9\n\t .long %l[l_yes] - 2f\n\t" - "2:\n\t" - ".pushsection __jump_table, \"aw\" \n\t" - _ASM_ALIGN "\n\t" - ".long 1b - ., %l[l_yes] - . \n\t" - _ASM_PTR "%c0 + %c1 - .\n\t" - ".popsection \n\t" + "jmp %l[l_yes]\n\t" + JUMP_TABLE_ENTRY : : "i" (key), "i" (branch) : : l_yes); return false; @@ -47,41 +62,7 @@ l_yes: return true; } -#else /* __ASSEMBLY__ */ - -.macro STATIC_JUMP_IF_TRUE target, key, def -.Lstatic_jump_\@: - .if \def - /* Equivalent to "jmp.d32 \target" */ - .byte 0xe9 - .long \target - .Lstatic_jump_after_\@ -.Lstatic_jump_after_\@: - .else - .byte BYTES_NOP5 - .endif - .pushsection __jump_table, "aw" - _ASM_ALIGN - .long .Lstatic_jump_\@ - ., \target - . - _ASM_PTR \key - . - .popsection -.endm - -.macro STATIC_JUMP_IF_FALSE target, key, def -.Lstatic_jump_\@: - .if \def - .byte BYTES_NOP5 - .else - /* Equivalent to "jmp.d32 \target" */ - .byte 0xe9 - .long \target - .Lstatic_jump_after_\@ -.Lstatic_jump_after_\@: - .endif - .pushsection __jump_table, "aw" - _ASM_ALIGN - .long .Lstatic_jump_\@ - ., \target - . - _ASM_PTR \key + 1 - . - .popsection -.endm +extern int arch_jump_entry_size(struct jump_entry *entry); #endif /* __ASSEMBLY__ */ diff --git a/arch/x86/include/asm/kvm-x86-ops.h b/arch/x86/include/asm/kvm-x86-ops.h index 323641097f63..a12a4987154e 100644 --- a/arch/x86/include/asm/kvm-x86-ops.h +++ b/arch/x86/include/asm/kvm-x86-ops.h @@ -87,7 +87,10 @@ KVM_X86_OP(set_identity_map_addr) KVM_X86_OP(get_mt_mask) KVM_X86_OP(load_mmu_pgd) KVM_X86_OP_NULL(has_wbinvd_exit) -KVM_X86_OP(write_l1_tsc_offset) +KVM_X86_OP(get_l2_tsc_offset) +KVM_X86_OP(get_l2_tsc_multiplier) +KVM_X86_OP(write_tsc_offset) +KVM_X86_OP(write_tsc_multiplier) KVM_X86_OP(get_exit_info) KVM_X86_OP(check_intercept) KVM_X86_OP(handle_exit_irqoff) @@ -99,14 +102,15 @@ KVM_X86_OP_NULL(post_block) KVM_X86_OP_NULL(vcpu_blocking) KVM_X86_OP_NULL(vcpu_unblocking) KVM_X86_OP_NULL(update_pi_irte) +KVM_X86_OP_NULL(start_assignment) KVM_X86_OP_NULL(apicv_post_state_restore) KVM_X86_OP_NULL(dy_apicv_has_pending_interrupt) KVM_X86_OP_NULL(set_hv_timer) KVM_X86_OP_NULL(cancel_hv_timer) KVM_X86_OP(setup_mce) KVM_X86_OP(smi_allowed) -KVM_X86_OP(pre_enter_smm) -KVM_X86_OP(pre_leave_smm) +KVM_X86_OP(enter_smm) +KVM_X86_OP(leave_smm) KVM_X86_OP(enable_smi_window) KVM_X86_OP_NULL(mem_enc_op) KVM_X86_OP_NULL(mem_enc_reg_region) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 55efbacfc244..974cbfb1eefe 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -85,7 +85,7 @@ #define KVM_REQ_APICV_UPDATE \ KVM_ARCH_REQ_FLAGS(25, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP) #define KVM_REQ_TLB_FLUSH_CURRENT KVM_ARCH_REQ(26) -#define KVM_REQ_HV_TLB_FLUSH \ +#define KVM_REQ_TLB_FLUSH_GUEST \ KVM_ARCH_REQ_FLAGS(27, KVM_REQUEST_NO_WAKEUP) #define KVM_REQ_APF_READY KVM_ARCH_REQ(28) #define KVM_REQ_MSR_FILTER_CHANGED KVM_ARCH_REQ(29) @@ -269,12 +269,36 @@ enum x86_intercept_stage; struct kvm_kernel_irq_routing_entry; /* - * the pages used as guest page table on soft mmu are tracked by - * kvm_memory_slot.arch.gfn_track which is 16 bits, so the role bits used - * by indirect shadow page can not be more than 15 bits. + * kvm_mmu_page_role tracks the properties of a shadow page (where shadow page + * also includes TDP pages) to determine whether or not a page can be used in + * the given MMU context. This is a subset of the overall kvm_mmu_role to + * minimize the size of kvm_memory_slot.arch.gfn_track, i.e. allows allocating + * 2 bytes per gfn instead of 4 bytes per gfn. * - * Currently, we used 14 bits that are @level, @gpte_is_8_bytes, @quadrant, @access, - * @nxe, @cr0_wp, @smep_andnot_wp and @smap_andnot_wp. + * Indirect upper-level shadow pages are tracked for write-protection via + * gfn_track. As above, gfn_track is a 16 bit counter, so KVM must not create + * more than 2^16-1 upper-level shadow pages at a single gfn, otherwise + * gfn_track will overflow and explosions will ensure. + * + * A unique shadow page (SP) for a gfn is created if and only if an existing SP + * cannot be reused. The ability to reuse a SP is tracked by its role, which + * incorporates various mode bits and properties of the SP. Roughly speaking, + * the number of unique SPs that can theoretically be created is 2^n, where n + * is the number of bits that are used to compute the role. + * + * But, even though there are 18 bits in the mask below, not all combinations + * of modes and flags are possible. The maximum number of possible upper-level + * shadow pages for a single gfn is in the neighborhood of 2^13. + * + * - invalid shadow pages are not accounted. + * - level is effectively limited to four combinations, not 16 as the number + * bits would imply, as 4k SPs are not tracked (allowed to go unsync). + * - level is effectively unused for non-PAE paging because there is exactly + * one upper level (see 4k SP exception above). + * - quadrant is used only for non-PAE paging and is exclusive with + * gpte_is_8_bytes. + * - execonly and ad_disabled are used only for nested EPT, which makes it + * exclusive with quadrant. */ union kvm_mmu_page_role { u32 word; @@ -285,7 +309,7 @@ union kvm_mmu_page_role { unsigned direct:1; unsigned access:3; unsigned invalid:1; - unsigned nxe:1; + unsigned efer_nx:1; unsigned cr0_wp:1; unsigned smep_andnot_wp:1; unsigned smap_andnot_wp:1; @@ -303,13 +327,26 @@ union kvm_mmu_page_role { }; }; -union kvm_mmu_extended_role { /* - * This structure complements kvm_mmu_page_role caching everything needed for - * MMU configuration. If nothing in both these structures changed, MMU - * re-configuration can be skipped. @valid bit is set on first usage so we don't - * treat all-zero structure as valid data. + * kvm_mmu_extended_role complements kvm_mmu_page_role, tracking properties + * relevant to the current MMU configuration. When loading CR0, CR4, or EFER, + * including on nested transitions, if nothing in the full role changes then + * MMU re-configuration can be skipped. @valid bit is set on first usage so we + * don't treat all-zero structure as valid data. + * + * The properties that are tracked in the extended role but not the page role + * are for things that either (a) do not affect the validity of the shadow page + * or (b) are indirectly reflected in the shadow page's role. For example, + * CR4.PKE only affects permission checks for software walks of the guest page + * tables (because KVM doesn't support Protection Keys with shadow paging), and + * CR0.PG, CR4.PAE, and CR4.PSE are indirectly reflected in role.level. + * + * Note, SMEP and SMAP are not redundant with sm*p_andnot_wp in the page role. + * If CR0.WP=1, KVM can reuse shadow pages for the guest regardless of SMEP and + * SMAP, but the MMU's permission checks for software walks need to be SMEP and + * SMAP aware regardless of CR0.WP. */ +union kvm_mmu_extended_role { u32 word; struct { unsigned int valid:1; @@ -320,7 +357,7 @@ union kvm_mmu_extended_role { unsigned int cr4_pke:1; unsigned int cr4_smap:1; unsigned int cr4_smep:1; - unsigned int maxphyaddr:6; + unsigned int cr4_la57:1; }; }; @@ -420,11 +457,6 @@ struct kvm_mmu { struct rsvd_bits_validate guest_rsvd_check; - /* Can have large pages at levels 2..last_nonleaf_level-1. */ - u8 last_nonleaf_level; - - bool nx; - u64 pdptrs[4]; /* pae */ }; @@ -543,6 +575,15 @@ struct kvm_vcpu_hv { struct kvm_vcpu_hv_stimer stimer[HV_SYNIC_STIMER_COUNT]; DECLARE_BITMAP(stimer_pending_bitmap, HV_SYNIC_STIMER_COUNT); cpumask_t tlb_flush; + bool enforce_cpuid; + struct { + u32 features_eax; /* HYPERV_CPUID_FEATURES.EAX */ + u32 features_ebx; /* HYPERV_CPUID_FEATURES.EBX */ + u32 features_edx; /* HYPERV_CPUID_FEATURES.EDX */ + u32 enlightenments_eax; /* HYPERV_CPUID_ENLIGHTMENT_INFO.EAX */ + u32 enlightenments_ebx; /* HYPERV_CPUID_ENLIGHTMENT_INFO.EBX */ + u32 syndbg_cap_eax; /* HYPERV_CPUID_SYNDBG_PLATFORM_CAPABILITIES.EAX */ + } cpuid_cache; }; /* Xen HVM per vcpu emulation context */ @@ -707,7 +748,7 @@ struct kvm_vcpu_arch { } st; u64 l1_tsc_offset; - u64 tsc_offset; + u64 tsc_offset; /* current tsc offset */ u64 last_guest_tsc; u64 last_host_tsc; u64 tsc_offset_adjustment; @@ -721,7 +762,8 @@ struct kvm_vcpu_arch { u32 virtual_tsc_khz; s64 ia32_tsc_adjust_msr; u64 msr_ia32_power_ctl; - u64 tsc_scaling_ratio; + u64 l1_tsc_scaling_ratio; + u64 tsc_scaling_ratio; /* current scaling ratio */ atomic_t nmi_queued; /* unprocessed asynchronous NMIs */ unsigned nmi_pending; /* NMI queued after currently running handler */ @@ -829,7 +871,7 @@ struct kvm_vcpu_arch { bool l1tf_flush_l1d; /* Host CPU on which VM-entry was most recently attempted */ - unsigned int last_vmentry_cpu; + int last_vmentry_cpu; /* AMD MSRC001_0015 Hardware Configuration */ u64 msr_hwcr; @@ -851,6 +893,16 @@ struct kvm_vcpu_arch { /* Protected Guests */ bool guest_state_protected; + + /* + * Set when PDPTS were loaded directly by the userspace without + * reading the guest memory + */ + bool pdptrs_from_userspace; + +#if IS_ENABLED(CONFIG_HYPERV) + hpa_t hv_root_tdp; +#endif }; struct kvm_lpage_info { @@ -1002,7 +1054,7 @@ struct kvm_arch { struct kvm_apic_map __rcu *apic_map; atomic_t apic_map_dirty; - bool apic_access_page_done; + bool apic_access_memslot_enabled; unsigned long apicv_inhibit_reasons; gpa_t wall_clock; @@ -1062,11 +1114,19 @@ struct kvm_arch { bool exception_payload_enabled; bool bus_lock_detection_enabled; + /* + * If exit_on_emulation_error is set, and the in-kernel instruction + * emulator fails to emulate an instruction, allow userspace + * the opportunity to look at it. + */ + bool exit_on_emulation_error; /* Deflect RDMSR and WRMSR to user space when they trigger a #GP */ u32 user_space_msr_mask; struct kvm_x86_msr_filter __rcu *msr_filter; + u32 hypercall_exit_enabled; + /* Guest can access the SGX PROVISIONKEY. */ bool sgx_provisioning_allowed; @@ -1124,23 +1184,35 @@ struct kvm_arch { */ spinlock_t tdp_mmu_pages_lock; #endif /* CONFIG_X86_64 */ + + /* + * If set, rmaps have been allocated for all memslots and should be + * allocated for any newly created or modified memslots. + */ + bool memslots_have_rmaps; + +#if IS_ENABLED(CONFIG_HYPERV) + hpa_t hv_root_tdp; + spinlock_t hv_root_tdp_lock; +#endif }; struct kvm_vm_stat { - ulong mmu_shadow_zapped; - ulong mmu_pte_write; - ulong mmu_pde_zapped; - ulong mmu_flooded; - ulong mmu_recycled; - ulong mmu_cache_miss; - ulong mmu_unsync; - ulong remote_tlb_flush; - ulong lpages; - ulong nx_lpage_splits; - ulong max_mmu_page_hash_collisions; + struct kvm_vm_stat_generic generic; + u64 mmu_shadow_zapped; + u64 mmu_pte_write; + u64 mmu_pde_zapped; + u64 mmu_flooded; + u64 mmu_recycled; + u64 mmu_cache_miss; + u64 mmu_unsync; + u64 lpages; + u64 nx_lpage_splits; + u64 max_mmu_page_hash_collisions; }; struct kvm_vcpu_stat { + struct kvm_vcpu_stat_generic generic; u64 pf_fixed; u64 pf_guest; u64 tlb_flush; @@ -1154,10 +1226,6 @@ struct kvm_vcpu_stat { u64 nmi_window_exits; u64 l1d_flush; u64 halt_exits; - u64 halt_successful_poll; - u64 halt_attempted_poll; - u64 halt_poll_invalid; - u64 halt_wakeup; u64 request_irq_exits; u64 irq_exits; u64 host_state_reload; @@ -1168,11 +1236,10 @@ struct kvm_vcpu_stat { u64 irq_injections; u64 nmi_injections; u64 req_event; - u64 halt_poll_success_ns; - u64 halt_poll_fail_ns; u64 nested_run; u64 directed_yield_attempted; u64 directed_yield_successful; + u64 guest_mode; }; struct x86_instruction_info; @@ -1304,8 +1371,10 @@ struct kvm_x86_ops { bool (*has_wbinvd_exit)(void); - /* Returns actual tsc_offset set in active VMCS */ - u64 (*write_l1_tsc_offset)(struct kvm_vcpu *vcpu, u64 offset); + u64 (*get_l2_tsc_offset)(struct kvm_vcpu *vcpu); + u64 (*get_l2_tsc_multiplier)(struct kvm_vcpu *vcpu); + void (*write_tsc_offset)(struct kvm_vcpu *vcpu, u64 offset); + void (*write_tsc_multiplier)(struct kvm_vcpu *vcpu, u64 multiplier); /* * Retrieve somewhat arbitrary exit information. Intended to be used @@ -1352,6 +1421,7 @@ struct kvm_x86_ops { int (*update_pi_irte)(struct kvm *kvm, unsigned int host_irq, uint32_t guest_irq, bool set); + void (*start_assignment)(struct kvm *kvm); void (*apicv_post_state_restore)(struct kvm_vcpu *vcpu); bool (*dy_apicv_has_pending_interrupt)(struct kvm_vcpu *vcpu); @@ -1362,8 +1432,8 @@ struct kvm_x86_ops { void (*setup_mce)(struct kvm_vcpu *vcpu); int (*smi_allowed)(struct kvm_vcpu *vcpu, bool for_injection); - int (*pre_enter_smm)(struct kvm_vcpu *vcpu, char *smstate); - int (*pre_leave_smm)(struct kvm_vcpu *vcpu, const char *smstate); + int (*enter_smm)(struct kvm_vcpu *vcpu, char *smstate); + int (*leave_smm)(struct kvm_vcpu *vcpu, const char *smstate); void (*enable_smi_window)(struct kvm_vcpu *vcpu); int (*mem_enc_op)(struct kvm *kvm, void __user *argp); @@ -1422,6 +1492,7 @@ struct kvm_arch_async_pf { extern u32 __read_mostly kvm_nr_uret_msrs; extern u64 __read_mostly host_efer; extern bool __read_mostly allow_smaller_maxphyaddr; +extern bool __read_mostly enable_apicv; extern struct kvm_x86_ops kvm_x86_ops; #define KVM_X86_OP(func) \ @@ -1462,6 +1533,7 @@ int kvm_mmu_create(struct kvm_vcpu *vcpu); void kvm_mmu_init_vm(struct kvm *kvm); void kvm_mmu_uninit_vm(struct kvm *kvm); +void kvm_mmu_after_set_cpuid(struct kvm_vcpu *vcpu); void kvm_mmu_reset_context(struct kvm_vcpu *vcpu); void kvm_mmu_slot_remove_write_access(struct kvm *kvm, struct kvm_memory_slot *memslot, @@ -1476,7 +1548,6 @@ unsigned long kvm_mmu_calculate_default_mmu_pages(struct kvm *kvm); void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned long kvm_nr_mmu_pages); int load_pdptrs(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, unsigned long cr3); -bool pdptrs_changed(struct kvm_vcpu *vcpu); int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa, const void *val, int bytes); @@ -1649,6 +1720,7 @@ int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn); void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu); void kvm_mmu_free_roots(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, ulong roots_to_free); +void kvm_mmu_free_guest_mode_roots(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu); gpa_t translate_nested_gpa(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access, struct x86_exception *exception); gpa_t kvm_mmu_gva_to_gpa_read(struct kvm_vcpu *vcpu, gva_t gva, @@ -1661,7 +1733,6 @@ gpa_t kvm_mmu_gva_to_gpa_system(struct kvm_vcpu *vcpu, gva_t gva, struct x86_exception *exception); bool kvm_apicv_activated(struct kvm *kvm); -void kvm_apicv_init(struct kvm *kvm, bool enable); void kvm_vcpu_update_apicv(struct kvm_vcpu *vcpu); void kvm_request_apicv_update(struct kvm *kvm, bool activate, unsigned long bit); @@ -1674,8 +1745,7 @@ void kvm_mmu_invlpg(struct kvm_vcpu *vcpu, gva_t gva); void kvm_mmu_invalidate_gva(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, gva_t gva, hpa_t root_hpa); void kvm_mmu_invpcid_gva(struct kvm_vcpu *vcpu, gva_t gva, unsigned long pcid); -void kvm_mmu_new_pgd(struct kvm_vcpu *vcpu, gpa_t new_pgd, bool skip_tlb_flush, - bool skip_mmu_sync); +void kvm_mmu_new_pgd(struct kvm_vcpu *vcpu, gpa_t new_pgd); void kvm_configure_mmu(bool enable_tdp, int tdp_max_root_level, int tdp_huge_page_level); @@ -1787,8 +1857,10 @@ static inline bool kvm_is_supported_user_return_msr(u32 msr) return kvm_find_user_return_msr(msr) >= 0; } -u64 kvm_scale_tsc(struct kvm_vcpu *vcpu, u64 tsc); +u64 kvm_scale_tsc(struct kvm_vcpu *vcpu, u64 tsc, u64 ratio); u64 kvm_read_l1_tsc(struct kvm_vcpu *vcpu, u64 host_tsc); +u64 kvm_calc_nested_tsc_offset(u64 l1_offset, u64 l2_offset, u64 l2_multiplier); +u64 kvm_calc_nested_tsc_multiplier(u64 l1_multiplier, u64 l2_multiplier); unsigned long kvm_get_linear_rip(struct kvm_vcpu *vcpu); bool kvm_is_linear_rip(struct kvm_vcpu *vcpu, unsigned long linear_rip); @@ -1862,4 +1934,6 @@ static inline int kvm_cpu_get_apicid(int mps_cpu) int kvm_cpu_dirty_log_size(void); +int alloc_all_memslots_rmaps(struct kvm *kvm); + #endif /* _ASM_X86_KVM_HOST_H */ diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h index ddfb3cad8dff..0607ec4f5091 100644 --- a/arch/x86/include/asm/mce.h +++ b/arch/x86/include/asm/mce.h @@ -305,7 +305,7 @@ extern void apei_mce_report_mem_error(int corrected, /* These may be used by multiple smca_hwid_mcatypes */ enum smca_bank_types { SMCA_LS = 0, /* Load Store */ - SMCA_LS_V2, /* Load Store */ + SMCA_LS_V2, SMCA_IF, /* Instruction Fetch */ SMCA_L2_CACHE, /* L2 Cache */ SMCA_DE, /* Decoder Unit */ @@ -314,17 +314,22 @@ enum smca_bank_types { SMCA_FP, /* Floating Point */ SMCA_L3_CACHE, /* L3 Cache */ SMCA_CS, /* Coherent Slave */ - SMCA_CS_V2, /* Coherent Slave */ + SMCA_CS_V2, SMCA_PIE, /* Power, Interrupts, etc. */ SMCA_UMC, /* Unified Memory Controller */ + SMCA_UMC_V2, SMCA_PB, /* Parameter Block */ SMCA_PSP, /* Platform Security Processor */ - SMCA_PSP_V2, /* Platform Security Processor */ + SMCA_PSP_V2, SMCA_SMU, /* System Management Unit */ - SMCA_SMU_V2, /* System Management Unit */ + SMCA_SMU_V2, SMCA_MP5, /* Microprocessor 5 Unit */ SMCA_NBIO, /* Northbridge IO Unit */ SMCA_PCIE, /* PCI Express Unit */ + SMCA_PCIE_V2, + SMCA_XGMI_PCS, /* xGMI PCS Unit */ + SMCA_XGMI_PHY, /* xGMI PHY Unit */ + SMCA_WAFL_PHY, /* WAFL PHY Unit */ N_SMCA_BANK_TYPES }; diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 211ba3375ee9..a7c413432b33 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -772,6 +772,10 @@ #define MSR_TFA_RTM_FORCE_ABORT_BIT 0 #define MSR_TFA_RTM_FORCE_ABORT BIT_ULL(MSR_TFA_RTM_FORCE_ABORT_BIT) +#define MSR_TFA_TSX_CPUID_CLEAR_BIT 1 +#define MSR_TFA_TSX_CPUID_CLEAR BIT_ULL(MSR_TFA_TSX_CPUID_CLEAR_BIT) +#define MSR_TFA_SDV_ENABLE_RTM_BIT 2 +#define MSR_TFA_SDV_ENABLE_RTM BIT_ULL(MSR_TFA_SDV_ENABLE_RTM_BIT) /* P4/Xeon+ specific */ #define MSR_IA32_MCG_EAX 0x00000180 diff --git a/arch/x86/include/asm/nops.h b/arch/x86/include/asm/nops.h index c1e5e818ba16..c5573eaa5bb9 100644 --- a/arch/x86/include/asm/nops.h +++ b/arch/x86/include/asm/nops.h @@ -2,6 +2,8 @@ #ifndef _ASM_X86_NOPS_H #define _ASM_X86_NOPS_H +#include <asm/asm.h> + /* * Define nops for use with alternative() and for tracing. */ @@ -57,20 +59,14 @@ #endif /* CONFIG_64BIT */ -#ifdef __ASSEMBLY__ -#define _ASM_MK_NOP(x) .byte x -#else -#define _ASM_MK_NOP(x) ".byte " __stringify(x) "\n" -#endif - -#define ASM_NOP1 _ASM_MK_NOP(BYTES_NOP1) -#define ASM_NOP2 _ASM_MK_NOP(BYTES_NOP2) -#define ASM_NOP3 _ASM_MK_NOP(BYTES_NOP3) -#define ASM_NOP4 _ASM_MK_NOP(BYTES_NOP4) -#define ASM_NOP5 _ASM_MK_NOP(BYTES_NOP5) -#define ASM_NOP6 _ASM_MK_NOP(BYTES_NOP6) -#define ASM_NOP7 _ASM_MK_NOP(BYTES_NOP7) -#define ASM_NOP8 _ASM_MK_NOP(BYTES_NOP8) +#define ASM_NOP1 _ASM_BYTES(BYTES_NOP1) +#define ASM_NOP2 _ASM_BYTES(BYTES_NOP2) +#define ASM_NOP3 _ASM_BYTES(BYTES_NOP3) +#define ASM_NOP4 _ASM_BYTES(BYTES_NOP4) +#define ASM_NOP5 _ASM_BYTES(BYTES_NOP5) +#define ASM_NOP6 _ASM_BYTES(BYTES_NOP6) +#define ASM_NOP7 _ASM_BYTES(BYTES_NOP7) +#define ASM_NOP8 _ASM_BYTES(BYTES_NOP8) #define ASM_NOP_MAX 8 diff --git a/arch/x86/include/asm/page.h b/arch/x86/include/asm/page.h index 7555b48803a8..4d5810c8fab7 100644 --- a/arch/x86/include/asm/page.h +++ b/arch/x86/include/asm/page.h @@ -34,9 +34,9 @@ static inline void copy_user_page(void *to, void *from, unsigned long vaddr, copy_page(to, from); } -#define __alloc_zeroed_user_highpage(movableflags, vma, vaddr) \ - alloc_page_vma(GFP_HIGHUSER | __GFP_ZERO | movableflags, vma, vaddr) -#define __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE +#define alloc_zeroed_user_highpage_movable(vma, vaddr) \ + alloc_page_vma(GFP_HIGHUSER_MOVABLE | __GFP_ZERO, vma, vaddr) +#define __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE_MOVABLE #ifndef __pa #define __pa(x) __phys_addr((unsigned long)(x)) diff --git a/arch/x86/include/asm/page_64.h b/arch/x86/include/asm/page_64.h index ca840fec7776..4bde0dc66100 100644 --- a/arch/x86/include/asm/page_64.h +++ b/arch/x86/include/asm/page_64.h @@ -75,7 +75,7 @@ void copy_page(void *to, void *from); * * With page table isolation enabled, we map the LDT in ... [stay tuned] */ -static inline unsigned long task_size_max(void) +static __always_inline unsigned long task_size_max(void) { unsigned long ret; diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h index 544f41a179fb..8fc1b5003713 100644 --- a/arch/x86/include/asm/perf_event.h +++ b/arch/x86/include/asm/perf_event.h @@ -478,6 +478,7 @@ struct x86_pmu_lbr { extern void perf_get_x86_pmu_capability(struct x86_pmu_capability *cap); extern void perf_check_microcode(void); +extern void perf_clear_dirty_counters(void); extern int x86_perf_rdpmc_index(struct perf_event *event); #else static inline void perf_get_x86_pmu_capability(struct x86_pmu_capability *cap) diff --git a/arch/x86/include/asm/pgalloc.h b/arch/x86/include/asm/pgalloc.h index 62ad61d6fefc..c7ec5bb88334 100644 --- a/arch/x86/include/asm/pgalloc.h +++ b/arch/x86/include/asm/pgalloc.h @@ -84,8 +84,6 @@ static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd, set_pmd(pmd, __pmd(((pteval_t)pfn << PAGE_SHIFT) | _PAGE_TABLE)); } -#define pmd_pgtable(pmd) pmd_page(pmd) - #if CONFIG_PGTABLE_LEVELS > 2 extern void ___pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd); diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index b1099f2d9800..448cd01eb3ec 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -23,7 +23,7 @@ #ifndef __ASSEMBLY__ #include <asm/x86_init.h> -#include <asm/fpu/xstate.h> +#include <asm/pkru.h> #include <asm/fpu/api.h> #include <asm-generic/pgtable_uffd.h> @@ -126,35 +126,6 @@ static inline int pte_dirty(pte_t pte) return pte_flags(pte) & _PAGE_DIRTY; } - -static inline u32 read_pkru(void) -{ - if (boot_cpu_has(X86_FEATURE_OSPKE)) - return rdpkru(); - return 0; -} - -static inline void write_pkru(u32 pkru) -{ - struct pkru_state *pk; - - if (!boot_cpu_has(X86_FEATURE_OSPKE)) - return; - - pk = get_xsave_addr(¤t->thread.fpu.state.xsave, XFEATURE_PKRU); - - /* - * The PKRU value in xstate needs to be in sync with the value that is - * written to the CPU. The FPU restore on return to userland would - * otherwise load the previous value again. - */ - fpregs_lock(); - if (pk) - pk->pkru = pkru; - __write_pkru(pkru); - fpregs_unlock(); -} - static inline int pte_young(pte_t pte) { return pte_flags(pte) & _PAGE_ACCESSED; @@ -865,9 +836,9 @@ static inline int pud_present(pud_t pud) return pud_flags(pud) & _PAGE_PRESENT; } -static inline unsigned long pud_page_vaddr(pud_t pud) +static inline pmd_t *pud_pgtable(pud_t pud) { - return (unsigned long)__va(pud_val(pud) & pud_pfn_mask(pud)); + return (pmd_t *)__va(pud_val(pud) & pud_pfn_mask(pud)); } /* @@ -906,9 +877,9 @@ static inline int p4d_present(p4d_t p4d) return p4d_flags(p4d) & _PAGE_PRESENT; } -static inline unsigned long p4d_page_vaddr(p4d_t p4d) +static inline pud_t *p4d_pgtable(p4d_t p4d) { - return (unsigned long)__va(p4d_val(p4d) & p4d_pfn_mask(p4d)); + return (pud_t *)__va(p4d_val(p4d) & p4d_pfn_mask(p4d)); } /* @@ -1360,32 +1331,6 @@ static inline pmd_t pmd_swp_clear_uffd_wp(pmd_t pmd) } #endif /* CONFIG_HAVE_ARCH_USERFAULTFD_WP */ -#define PKRU_AD_BIT 0x1 -#define PKRU_WD_BIT 0x2 -#define PKRU_BITS_PER_PKEY 2 - -#ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS -extern u32 init_pkru_value; -#else -#define init_pkru_value 0 -#endif - -static inline bool __pkru_allows_read(u32 pkru, u16 pkey) -{ - int pkru_pkey_bits = pkey * PKRU_BITS_PER_PKEY; - return !(pkru & (PKRU_AD_BIT << pkru_pkey_bits)); -} - -static inline bool __pkru_allows_write(u32 pkru, u16 pkey) -{ - int pkru_pkey_bits = pkey * PKRU_BITS_PER_PKEY; - /* - * Access-disable disables writes too so we need to check - * both bits here. - */ - return !(pkru & ((PKRU_AD_BIT|PKRU_WD_BIT) << pkru_pkey_bits)); -} - static inline u16 pte_flags_pkey(unsigned long pte_flags) { #ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h index f24d7ef8fffa..40497a9020c6 100644 --- a/arch/x86/include/asm/pgtable_types.h +++ b/arch/x86/include/asm/pgtable_types.h @@ -7,8 +7,6 @@ #include <asm/page_types.h> -#define FIRST_USER_ADDRESS 0UL - #define _PAGE_BIT_PRESENT 0 /* is present */ #define _PAGE_BIT_RW 1 /* writeable */ #define _PAGE_BIT_USER 2 /* userspace addressable */ diff --git a/arch/x86/include/asm/pkeys.h b/arch/x86/include/asm/pkeys.h index 2ff9b98812b7..5c7bcaa79623 100644 --- a/arch/x86/include/asm/pkeys.h +++ b/arch/x86/include/asm/pkeys.h @@ -9,14 +9,14 @@ * will be necessary to ensure that the types that store key * numbers and masks have sufficient capacity. */ -#define arch_max_pkey() (boot_cpu_has(X86_FEATURE_OSPKE) ? 16 : 1) +#define arch_max_pkey() (cpu_feature_enabled(X86_FEATURE_OSPKE) ? 16 : 1) extern int arch_set_user_pkey_access(struct task_struct *tsk, int pkey, unsigned long init_val); static inline bool arch_pkeys_enabled(void) { - return boot_cpu_has(X86_FEATURE_OSPKE); + return cpu_feature_enabled(X86_FEATURE_OSPKE); } /* @@ -26,7 +26,7 @@ static inline bool arch_pkeys_enabled(void) extern int __execute_only_pkey(struct mm_struct *mm); static inline int execute_only_pkey(struct mm_struct *mm) { - if (!boot_cpu_has(X86_FEATURE_OSPKE)) + if (!cpu_feature_enabled(X86_FEATURE_OSPKE)) return ARCH_DEFAULT_PKEY; return __execute_only_pkey(mm); @@ -37,7 +37,7 @@ extern int __arch_override_mprotect_pkey(struct vm_area_struct *vma, static inline int arch_override_mprotect_pkey(struct vm_area_struct *vma, int prot, int pkey) { - if (!boot_cpu_has(X86_FEATURE_OSPKE)) + if (!cpu_feature_enabled(X86_FEATURE_OSPKE)) return 0; return __arch_override_mprotect_pkey(vma, prot, pkey); @@ -124,7 +124,6 @@ extern int arch_set_user_pkey_access(struct task_struct *tsk, int pkey, unsigned long init_val); extern int __arch_set_user_pkey_access(struct task_struct *tsk, int pkey, unsigned long init_val); -extern void copy_init_pkru_to_fpregs(void); static inline int vma_pkey(struct vm_area_struct *vma) { diff --git a/arch/x86/include/asm/pkru.h b/arch/x86/include/asm/pkru.h new file mode 100644 index 000000000000..ccc539faa5bb --- /dev/null +++ b/arch/x86/include/asm/pkru.h @@ -0,0 +1,62 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_PKRU_H +#define _ASM_X86_PKRU_H + +#include <asm/fpu/xstate.h> + +#define PKRU_AD_BIT 0x1 +#define PKRU_WD_BIT 0x2 +#define PKRU_BITS_PER_PKEY 2 + +#ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS +extern u32 init_pkru_value; +#define pkru_get_init_value() READ_ONCE(init_pkru_value) +#else +#define init_pkru_value 0 +#define pkru_get_init_value() 0 +#endif + +static inline bool __pkru_allows_read(u32 pkru, u16 pkey) +{ + int pkru_pkey_bits = pkey * PKRU_BITS_PER_PKEY; + return !(pkru & (PKRU_AD_BIT << pkru_pkey_bits)); +} + +static inline bool __pkru_allows_write(u32 pkru, u16 pkey) +{ + int pkru_pkey_bits = pkey * PKRU_BITS_PER_PKEY; + /* + * Access-disable disables writes too so we need to check + * both bits here. + */ + return !(pkru & ((PKRU_AD_BIT|PKRU_WD_BIT) << pkru_pkey_bits)); +} + +static inline u32 read_pkru(void) +{ + if (cpu_feature_enabled(X86_FEATURE_OSPKE)) + return rdpkru(); + return 0; +} + +static inline void write_pkru(u32 pkru) +{ + if (!cpu_feature_enabled(X86_FEATURE_OSPKE)) + return; + /* + * WRPKRU is relatively expensive compared to RDPKRU. + * Avoid WRPKRU when it would not change the value. + */ + if (pkru != rdpkru()) + wrpkru(pkru); +} + +static inline void pkru_write_default(void) +{ + if (!cpu_feature_enabled(X86_FEATURE_OSPKE)) + return; + + wrpkru(pkru_get_init_value()); +} + +#endif diff --git a/arch/x86/include/asm/preempt.h b/arch/x86/include/asm/preempt.h index f8cb8af4de5c..fe5efbcba824 100644 --- a/arch/x86/include/asm/preempt.h +++ b/arch/x86/include/asm/preempt.h @@ -44,7 +44,7 @@ static __always_inline void preempt_count_set(int pc) #define init_task_preempt_count(p) do { } while (0) #define init_idle_preempt_count(p, cpu) do { \ - per_cpu(__preempt_count, (cpu)) = PREEMPT_ENABLED; \ + per_cpu(__preempt_count, (cpu)) = PREEMPT_DISABLED; \ } while (0) /* diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 556b2b17c3e2..f3020c54e2cb 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -518,6 +518,15 @@ struct thread_struct { unsigned int sig_on_uaccess_err:1; + /* + * Protection Keys Register for Userspace. Loaded immediately on + * context switch. Store it in thread_struct to avoid a lookup in + * the tasks's FPU xstate buffer. This value is only valid when a + * task is scheduled out. For 'current' the authoritative source of + * PKRU is the hardware itself. + */ + u32 pkru; + /* Floating point and extended processor state */ struct fpu fpu; /* @@ -663,6 +672,7 @@ extern void load_direct_gdt(int); extern void load_fixmap_gdt(int); extern void load_percpu_segment(int); extern void cpu_init(void); +extern void cpu_init_secondary(void); extern void cpu_init_exception_handling(void); extern void cr4_init(void); diff --git a/arch/x86/include/asm/sev-common.h b/arch/x86/include/asm/sev-common.h index 629c3df243f0..2cef6c5a52c2 100644 --- a/arch/x86/include/asm/sev-common.h +++ b/arch/x86/include/asm/sev-common.h @@ -9,8 +9,13 @@ #define __ASM_X86_SEV_COMMON_H #define GHCB_MSR_INFO_POS 0 -#define GHCB_MSR_INFO_MASK (BIT_ULL(12) - 1) +#define GHCB_DATA_LOW 12 +#define GHCB_MSR_INFO_MASK (BIT_ULL(GHCB_DATA_LOW) - 1) +#define GHCB_DATA(v) \ + (((unsigned long)(v) & ~GHCB_MSR_INFO_MASK) >> GHCB_DATA_LOW) + +/* SEV Information Request/Response */ #define GHCB_MSR_SEV_INFO_RESP 0x001 #define GHCB_MSR_SEV_INFO_REQ 0x002 #define GHCB_MSR_VER_MAX_POS 48 @@ -28,6 +33,7 @@ #define GHCB_MSR_PROTO_MAX(v) (((v) >> GHCB_MSR_VER_MAX_POS) & GHCB_MSR_VER_MAX_MASK) #define GHCB_MSR_PROTO_MIN(v) (((v) >> GHCB_MSR_VER_MIN_POS) & GHCB_MSR_VER_MIN_MASK) +/* CPUID Request/Response */ #define GHCB_MSR_CPUID_REQ 0x004 #define GHCB_MSR_CPUID_RESP 0x005 #define GHCB_MSR_CPUID_FUNC_POS 32 @@ -45,6 +51,14 @@ (((unsigned long)reg & GHCB_MSR_CPUID_REG_MASK) << GHCB_MSR_CPUID_REG_POS) | \ (((unsigned long)fn) << GHCB_MSR_CPUID_FUNC_POS)) +/* AP Reset Hold */ +#define GHCB_MSR_AP_RESET_HOLD_REQ 0x006 +#define GHCB_MSR_AP_RESET_HOLD_RESP 0x007 + +/* GHCB Hypervisor Feature Request/Response */ +#define GHCB_MSR_HV_FT_REQ 0x080 +#define GHCB_MSR_HV_FT_RESP 0x081 + #define GHCB_MSR_TERM_REQ 0x100 #define GHCB_MSR_TERM_REASON_SET_POS 12 #define GHCB_MSR_TERM_REASON_SET_MASK 0xf diff --git a/arch/x86/include/asm/sgx.h b/arch/x86/include/asm/sgx.h index 9c31e0ebc55b..05f3e21f01a7 100644 --- a/arch/x86/include/asm/sgx.h +++ b/arch/x86/include/asm/sgx.h @@ -13,7 +13,7 @@ /* * This file contains both data structures defined by SGX architecture and Linux * defined software data structures and functions. The two should not be mixed - * together for better readibility. The architectural definitions come first. + * together for better readability. The architectural definitions come first. */ /* The SGX specific CPUID function. */ diff --git a/arch/x86/include/asm/sigframe.h b/arch/x86/include/asm/sigframe.h index 84eab2724875..5b1ed650b124 100644 --- a/arch/x86/include/asm/sigframe.h +++ b/arch/x86/include/asm/sigframe.h @@ -85,4 +85,6 @@ struct rt_sigframe_x32 { #endif /* CONFIG_X86_64 */ +void __init init_sigframe_size(void); + #endif /* _ASM_X86_SIGFRAME_H */ diff --git a/arch/x86/include/asm/special_insns.h b/arch/x86/include/asm/special_insns.h index 2acd6cb62328..f3fbb84ff8a7 100644 --- a/arch/x86/include/asm/special_insns.h +++ b/arch/x86/include/asm/special_insns.h @@ -104,25 +104,13 @@ static inline void wrpkru(u32 pkru) : : "a" (pkru), "c"(ecx), "d"(edx)); } -static inline void __write_pkru(u32 pkru) -{ - /* - * WRPKRU is relatively expensive compared to RDPKRU. - * Avoid WRPKRU when it would not change the value. - */ - if (pkru == rdpkru()) - return; - - wrpkru(pkru); -} - #else static inline u32 rdpkru(void) { return 0; } -static inline void __write_pkru(u32 pkru) +static inline void wrpkru(u32 pkru) { } #endif diff --git a/arch/x86/include/asm/stackprotector.h b/arch/x86/include/asm/stackprotector.h index b6ffe58c70fa..24a8d6c4fb18 100644 --- a/arch/x86/include/asm/stackprotector.h +++ b/arch/x86/include/asm/stackprotector.h @@ -11,7 +11,7 @@ * The same segment is shared by percpu area and stack canary. On * x86_64, percpu symbols are zero based and %gs (64-bit) points to the * base of percpu area. The first occupant of the percpu area is always - * fixed_percpu_data which contains stack_canary at the approproate + * fixed_percpu_data which contains stack_canary at the appropriate * offset. On x86_32, the stack canary is just a regular percpu * variable. * diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h index 772e60efe243..e322676039f4 100644 --- a/arch/x86/include/asm/svm.h +++ b/arch/x86/include/asm/svm.h @@ -156,6 +156,12 @@ struct __attribute__ ((__packed__)) vmcb_control_area { u64 avic_physical_id; /* Offset 0xf8 */ u8 reserved_7[8]; u64 vmsa_pa; /* Used for an SEV-ES guest */ + u8 reserved_8[720]; + /* + * Offset 0x3e0, 32 bytes reserved + * for use by hypervisor/software. + */ + u8 reserved_sw[32]; }; @@ -314,7 +320,7 @@ struct ghcb { #define EXPECTED_VMCB_SAVE_AREA_SIZE 1032 -#define EXPECTED_VMCB_CONTROL_AREA_SIZE 272 +#define EXPECTED_VMCB_CONTROL_AREA_SIZE 1024 #define EXPECTED_GHCB_SIZE PAGE_SIZE static inline void __unused_size_checks(void) @@ -326,7 +332,6 @@ static inline void __unused_size_checks(void) struct vmcb { struct vmcb_control_area control; - u8 reserved_control[1024 - sizeof(struct vmcb_control_area)]; struct vmcb_save_area save; } __packed; diff --git a/arch/x86/include/asm/syscall.h b/arch/x86/include/asm/syscall.h index 7cbf733d11af..f7e2d82d24fb 100644 --- a/arch/x86/include/asm/syscall.h +++ b/arch/x86/include/asm/syscall.h @@ -21,13 +21,12 @@ extern const sys_call_ptr_t sys_call_table[]; #if defined(CONFIG_X86_32) #define ia32_sys_call_table sys_call_table -#endif - -#if defined(CONFIG_IA32_EMULATION) +#else +/* + * These may not exist, but still put the prototypes in so we + * can use IS_ENABLED(). + */ extern const sys_call_ptr_t ia32_sys_call_table[]; -#endif - -#ifdef CONFIG_X86_X32_ABI extern const sys_call_ptr_t x32_sys_call_table[]; #endif @@ -160,7 +159,7 @@ static inline int syscall_get_arch(struct task_struct *task) ? AUDIT_ARCH_I386 : AUDIT_ARCH_X86_64; } -void do_syscall_64(unsigned long nr, struct pt_regs *regs); +void do_syscall_64(struct pt_regs *regs, int nr); void do_int80_syscall_32(struct pt_regs *regs); long do_fast_syscall_32(struct pt_regs *regs); diff --git a/arch/x86/include/asm/syscall_wrapper.h b/arch/x86/include/asm/syscall_wrapper.h index 80c08c7d5e72..6a2827d0681f 100644 --- a/arch/x86/include/asm/syscall_wrapper.h +++ b/arch/x86/include/asm/syscall_wrapper.h @@ -17,7 +17,7 @@ extern long __ia32_sys_ni_syscall(const struct pt_regs *regs); * __x64_sys_*() - 64-bit native syscall * __ia32_sys_*() - 32-bit native syscall or common compat syscall * __ia32_compat_sys_*() - 32-bit compat syscall - * __x32_compat_sys_*() - 64-bit X32 compat syscall + * __x64_compat_sys_*() - 64-bit X32 compat syscall * * The registers are decoded according to the ABI: * 64-bit: RDI, RSI, RDX, R10, R8, R9 @@ -166,17 +166,17 @@ extern long __ia32_sys_ni_syscall(const struct pt_regs *regs); * with x86_64 obviously do not need such care. */ #define __X32_COMPAT_SYS_STUB0(name) \ - __SYS_STUB0(x32, compat_sys_##name) + __SYS_STUB0(x64, compat_sys_##name) #define __X32_COMPAT_SYS_STUBx(x, name, ...) \ - __SYS_STUBx(x32, compat_sys##name, \ + __SYS_STUBx(x64, compat_sys##name, \ SC_X86_64_REGS_TO_ARGS(x, __VA_ARGS__)) #define __X32_COMPAT_COND_SYSCALL(name) \ - __COND_SYSCALL(x32, compat_sys_##name) + __COND_SYSCALL(x64, compat_sys_##name) #define __X32_COMPAT_SYS_NI(name) \ - __SYS_NI(x32, compat_sys_##name) + __SYS_NI(x64, compat_sys_##name) #else /* CONFIG_X86_X32 */ #define __X32_COMPAT_SYS_STUB0(name) #define __X32_COMPAT_SYS_STUBx(x, name, ...) diff --git a/arch/x86/include/asm/thermal.h b/arch/x86/include/asm/thermal.h index ddbdefd5b94f..91a7b6687c3b 100644 --- a/arch/x86/include/asm/thermal.h +++ b/arch/x86/include/asm/thermal.h @@ -3,11 +3,13 @@ #define _ASM_X86_THERMAL_H #ifdef CONFIG_X86_THERMAL_VECTOR +void therm_lvt_init(void); void intel_init_thermal(struct cpuinfo_x86 *c); bool x86_thermal_enabled(void); void intel_thermal_interrupt(void); #else -static inline void intel_init_thermal(struct cpuinfo_x86 *c) { } +static inline void therm_lvt_init(void) { } +static inline void intel_init_thermal(struct cpuinfo_x86 *c) { } #endif #endif /* _ASM_X86_THERMAL_H */ diff --git a/arch/x86/include/asm/unaligned.h b/arch/x86/include/asm/unaligned.h deleted file mode 100644 index 9c754a7447aa..000000000000 --- a/arch/x86/include/asm/unaligned.h +++ /dev/null @@ -1,15 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_X86_UNALIGNED_H -#define _ASM_X86_UNALIGNED_H - -/* - * The x86 can do unaligned accesses itself. - */ - -#include <linux/unaligned/access_ok.h> -#include <linux/unaligned/generic.h> - -#define get_unaligned __get_unaligned_le -#define put_unaligned __put_unaligned_le - -#endif /* _ASM_X86_UNALIGNED_H */ diff --git a/arch/x86/include/asm/unistd.h b/arch/x86/include/asm/unistd.h index c1c3d31b15c0..80e9d5206a71 100644 --- a/arch/x86/include/asm/unistd.h +++ b/arch/x86/include/asm/unistd.h @@ -13,7 +13,7 @@ # define __ARCH_WANT_SYS_OLD_MMAP # define __ARCH_WANT_SYS_OLD_SELECT -# define __NR_ia32_syscall_max __NR_syscall_max +# define IA32_NR_syscalls (__NR_syscalls) # else @@ -26,12 +26,12 @@ # define __ARCH_WANT_COMPAT_SYS_PWRITEV64 # define __ARCH_WANT_COMPAT_SYS_PREADV64V2 # define __ARCH_WANT_COMPAT_SYS_PWRITEV64V2 +# define X32_NR_syscalls (__NR_x32_syscalls) +# define IA32_NR_syscalls (__NR_ia32_syscalls) # endif -# define NR_syscalls (__NR_syscall_max + 1) -# define X32_NR_syscalls (__NR_x32_syscall_max + 1) -# define IA32_NR_syscalls (__NR_ia32_syscall_max + 1) +# define NR_syscalls (__NR_syscalls) # define __ARCH_WANT_NEW_STAT # define __ARCH_WANT_OLD_READDIR diff --git a/arch/x86/include/uapi/asm/auxvec.h b/arch/x86/include/uapi/asm/auxvec.h index 580e3c567046..6beb55bbefa4 100644 --- a/arch/x86/include/uapi/asm/auxvec.h +++ b/arch/x86/include/uapi/asm/auxvec.h @@ -12,9 +12,9 @@ /* entries in ARCH_DLINFO: */ #if defined(CONFIG_IA32_EMULATION) || !defined(CONFIG_X86_64) -# define AT_VECTOR_SIZE_ARCH 2 +# define AT_VECTOR_SIZE_ARCH 3 #else /* else it's non-compat x86-64 */ -# define AT_VECTOR_SIZE_ARCH 1 +# define AT_VECTOR_SIZE_ARCH 2 #endif #endif /* _ASM_X86_AUXVEC_H */ diff --git a/arch/x86/include/uapi/asm/hwcap2.h b/arch/x86/include/uapi/asm/hwcap2.h index 5fdfcb47000f..054604aba9f0 100644 --- a/arch/x86/include/uapi/asm/hwcap2.h +++ b/arch/x86/include/uapi/asm/hwcap2.h @@ -2,10 +2,12 @@ #ifndef _ASM_X86_HWCAP2_H #define _ASM_X86_HWCAP2_H +#include <linux/const.h> + /* MONITOR/MWAIT enabled in Ring 3 */ -#define HWCAP2_RING3MWAIT (1 << 0) +#define HWCAP2_RING3MWAIT _BITUL(0) /* Kernel allows FSGSBASE instructions available in Ring 3 */ -#define HWCAP2_FSGSBASE BIT(1) +#define HWCAP2_FSGSBASE _BITUL(1) #endif diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h index 0662f644aad9..a6c327f8ad9e 100644 --- a/arch/x86/include/uapi/asm/kvm.h +++ b/arch/x86/include/uapi/asm/kvm.h @@ -159,6 +159,19 @@ struct kvm_sregs { __u64 interrupt_bitmap[(KVM_NR_INTERRUPTS + 63) / 64]; }; +struct kvm_sregs2 { + /* out (KVM_GET_SREGS2) / in (KVM_SET_SREGS2) */ + struct kvm_segment cs, ds, es, fs, gs, ss; + struct kvm_segment tr, ldt; + struct kvm_dtable gdt, idt; + __u64 cr0, cr2, cr3, cr4, cr8; + __u64 efer; + __u64 apic_base; + __u64 flags; + __u64 pdptrs[4]; +}; +#define KVM_SREGS2_FLAGS_PDPTRS_VALID 1 + /* for KVM_GET_FPU and KVM_SET_FPU */ struct kvm_fpu { __u8 fpr[8][16]; diff --git a/arch/x86/include/uapi/asm/kvm_para.h b/arch/x86/include/uapi/asm/kvm_para.h index 950afebfba88..5146bbab84d4 100644 --- a/arch/x86/include/uapi/asm/kvm_para.h +++ b/arch/x86/include/uapi/asm/kvm_para.h @@ -33,6 +33,8 @@ #define KVM_FEATURE_PV_SCHED_YIELD 13 #define KVM_FEATURE_ASYNC_PF_INT 14 #define KVM_FEATURE_MSI_EXT_DEST_ID 15 +#define KVM_FEATURE_HC_MAP_GPA_RANGE 16 +#define KVM_FEATURE_MIGRATION_CONTROL 17 #define KVM_HINTS_REALTIME 0 @@ -54,6 +56,7 @@ #define MSR_KVM_POLL_CONTROL 0x4b564d05 #define MSR_KVM_ASYNC_PF_INT 0x4b564d06 #define MSR_KVM_ASYNC_PF_ACK 0x4b564d07 +#define MSR_KVM_MIGRATION_CONTROL 0x4b564d08 struct kvm_steal_time { __u64 steal; @@ -90,6 +93,16 @@ struct kvm_clock_pairing { /* MSR_KVM_ASYNC_PF_INT */ #define KVM_ASYNC_PF_VEC_MASK GENMASK(7, 0) +/* MSR_KVM_MIGRATION_CONTROL */ +#define KVM_MIGRATION_READY (1 << 0) + +/* KVM_HC_MAP_GPA_RANGE */ +#define KVM_MAP_GPA_RANGE_PAGE_SZ_4K 0 +#define KVM_MAP_GPA_RANGE_PAGE_SZ_2M (1 << 0) +#define KVM_MAP_GPA_RANGE_PAGE_SZ_1G (1 << 1) +#define KVM_MAP_GPA_RANGE_ENC_STAT(n) (n << 4) +#define KVM_MAP_GPA_RANGE_ENCRYPTED KVM_MAP_GPA_RANGE_ENC_STAT(1) +#define KVM_MAP_GPA_RANGE_DECRYPTED KVM_MAP_GPA_RANGE_ENC_STAT(0) /* Operations for KVM_HC_MMU_OP */ #define KVM_MMU_OP_WRITE_PTE 1 diff --git a/arch/x86/include/uapi/asm/svm.h b/arch/x86/include/uapi/asm/svm.h index 554f75fe013c..efa969325ede 100644 --- a/arch/x86/include/uapi/asm/svm.h +++ b/arch/x86/include/uapi/asm/svm.h @@ -110,6 +110,9 @@ #define SVM_VMGEXIT_GET_AP_JUMP_TABLE 1 #define SVM_VMGEXIT_UNSUPPORTED_EVENT 0x8000ffff +/* Exit code reserved for hypervisor/software use */ +#define SVM_EXIT_SW 0xf0000000 + #define SVM_EXIT_ERR -1 #define SVM_EXIT_REASONS \ |