diff options
Diffstat (limited to 'arch/x86/include')
32 files changed, 337 insertions, 198 deletions
diff --git a/arch/x86/include/asm/Kbuild b/arch/x86/include/asm/Kbuild index b19ec8282d50..1e51650b79d7 100644 --- a/arch/x86/include/asm/Kbuild +++ b/arch/x86/include/asm/Kbuild @@ -3,6 +3,7 @@ generated-y += syscalls_32.h generated-y += syscalls_64.h +generated-y += syscalls_x32.h generated-y += unistd_32_ia32.h generated-y += unistd_64_x32.h generated-y += xen-hypercalls.h diff --git a/arch/x86/include/asm/asm.h b/arch/x86/include/asm/asm.h index 0603c7423aca..3ad3da9a7d97 100644 --- a/arch/x86/include/asm/asm.h +++ b/arch/x86/include/asm/asm.h @@ -3,25 +3,26 @@ #define _ASM_X86_ASM_H #ifdef __ASSEMBLY__ -# define __ASM_FORM(x) x -# define __ASM_FORM_RAW(x) x -# define __ASM_FORM_COMMA(x) x, +# define __ASM_FORM(x, ...) x,## __VA_ARGS__ +# define __ASM_FORM_RAW(x, ...) x,## __VA_ARGS__ +# define __ASM_FORM_COMMA(x, ...) x,## __VA_ARGS__, #else #include <linux/stringify.h> - -# define __ASM_FORM(x) " " __stringify(x) " " -# define __ASM_FORM_RAW(x) __stringify(x) -# define __ASM_FORM_COMMA(x) " " __stringify(x) "," +# define __ASM_FORM(x, ...) " " __stringify(x,##__VA_ARGS__) " " +# define __ASM_FORM_RAW(x, ...) __stringify(x,##__VA_ARGS__) +# define __ASM_FORM_COMMA(x, ...) " " __stringify(x,##__VA_ARGS__) "," #endif +#define _ASM_BYTES(x, ...) __ASM_FORM(.byte x,##__VA_ARGS__ ;) + #ifndef __x86_64__ /* 32 bit */ -# define __ASM_SEL(a,b) __ASM_FORM(a) -# define __ASM_SEL_RAW(a,b) __ASM_FORM_RAW(a) +# define __ASM_SEL(a,b) __ASM_FORM(a) +# define __ASM_SEL_RAW(a,b) __ASM_FORM_RAW(a) #else /* 64 bit */ -# define __ASM_SEL(a,b) __ASM_FORM(b) -# define __ASM_SEL_RAW(a,b) __ASM_FORM_RAW(b) +# define __ASM_SEL(a,b) __ASM_FORM(b) +# define __ASM_SEL_RAW(a,b) __ASM_FORM_RAW(b) #endif #define __ASM_SIZE(inst, ...) __ASM_SEL(inst##l##__VA_ARGS__, \ @@ -119,6 +120,8 @@ # define CC_OUT(c) [_cc_ ## c] "=qm" #endif +#ifdef __KERNEL__ + /* Exception table entry */ #ifdef __ASSEMBLY__ # define _ASM_EXTABLE_HANDLE(from, to, handler) \ @@ -185,4 +188,6 @@ register unsigned long current_stack_pointer asm(_ASM_SP); #define ASM_CALL_CONSTRAINT "+r" (current_stack_pointer) #endif /* __ASSEMBLY__ */ +#endif /* __KERNEL__ */ + #endif /* _ASM_X86_ASM_H */ diff --git a/arch/x86/include/asm/atomic.h b/arch/x86/include/asm/atomic.h index f732741ad7c7..5e754e895767 100644 --- a/arch/x86/include/asm/atomic.h +++ b/arch/x86/include/asm/atomic.h @@ -269,6 +269,4 @@ static __always_inline int arch_atomic_fetch_xor(int i, atomic_t *v) # include <asm/atomic64_64.h> #endif -#define ARCH_ATOMIC - #endif /* _ASM_X86_ATOMIC_H */ diff --git a/arch/x86/include/asm/barrier.h b/arch/x86/include/asm/barrier.h index 4819d5e5a335..3ba772a69cc8 100644 --- a/arch/x86/include/asm/barrier.h +++ b/arch/x86/include/asm/barrier.h @@ -54,11 +54,8 @@ static inline unsigned long array_index_mask_nospec(unsigned long index, #define dma_rmb() barrier() #define dma_wmb() barrier() -#ifdef CONFIG_X86_32 -#define __smp_mb() asm volatile("lock; addl $0,-4(%%esp)" ::: "memory", "cc") -#else -#define __smp_mb() asm volatile("lock; addl $0,-4(%%rsp)" ::: "memory", "cc") -#endif +#define __smp_mb() asm volatile("lock; addl $0,-4(%%" _ASM_SP ")" ::: "memory", "cc") + #define __smp_rmb() dma_rmb() #define __smp_wmb() barrier() #define __smp_store_mb(var, value) do { (void)xchg(&var, value); } while (0) diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index ac37830ae941..d0ce5cfd3ac1 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -108,7 +108,7 @@ #define X86_FEATURE_EXTD_APICID ( 3*32+26) /* Extended APICID (8 bits) */ #define X86_FEATURE_AMD_DCM ( 3*32+27) /* AMD multi-node processor */ #define X86_FEATURE_APERFMPERF ( 3*32+28) /* P-State hardware coordination feedback capability (APERF/MPERF MSRs) */ -/* free ( 3*32+29) */ +#define X86_FEATURE_RAPL ( 3*32+29) /* AMD/Hygon RAPL interface */ #define X86_FEATURE_NONSTOP_TSC_S3 ( 3*32+30) /* TSC doesn't stop in S3 state */ #define X86_FEATURE_TSC_KNOWN_FREQ ( 3*32+31) /* TSC has known frequency */ @@ -378,6 +378,7 @@ #define X86_FEATURE_AVX512_VP2INTERSECT (18*32+ 8) /* AVX-512 Intersect for D/Q */ #define X86_FEATURE_SRBDS_CTRL (18*32+ 9) /* "" SRBDS mitigation MSR available */ #define X86_FEATURE_MD_CLEAR (18*32+10) /* VERW clears CPU buffers */ +#define X86_FEATURE_RTM_ALWAYS_ABORT (18*32+11) /* "" RTM transaction always aborts */ #define X86_FEATURE_TSX_FORCE_ABORT (18*32+13) /* "" TSX_FORCE_ABORT */ #define X86_FEATURE_SERIALIZE (18*32+14) /* SERIALIZE instruction */ #define X86_FEATURE_HYBRID_CPU (18*32+15) /* "" This part has CPUs of more than one type */ diff --git a/arch/x86/include/asm/crash.h b/arch/x86/include/asm/crash.h index f58de66091e5..8b6bd63530dc 100644 --- a/arch/x86/include/asm/crash.h +++ b/arch/x86/include/asm/crash.h @@ -9,10 +9,4 @@ int crash_setup_memmap_entries(struct kimage *image, struct boot_params *params); void crash_smp_send_stop(void); -#ifdef CONFIG_KEXEC_CORE -void __init crash_reserve_low_1M(void); -#else -static inline void __init crash_reserve_low_1M(void) { } -#endif - #endif /* _ASM_X86_CRASH_H */ diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h index ceb12683b6d1..ab97b22ac04a 100644 --- a/arch/x86/include/asm/desc.h +++ b/arch/x86/include/asm/desc.h @@ -225,6 +225,26 @@ static inline void store_idt(struct desc_ptr *dtr) asm volatile("sidt %0":"=m" (*dtr)); } +static inline void native_gdt_invalidate(void) +{ + const struct desc_ptr invalid_gdt = { + .address = 0, + .size = 0 + }; + + native_load_gdt(&invalid_gdt); +} + +static inline void native_idt_invalidate(void) +{ + const struct desc_ptr invalid_idt = { + .address = 0, + .size = 0 + }; + + native_load_idt(&invalid_idt); +} + /* * The LTR instruction marks the TSS GDT entry as busy. On 64-bit, the GDT is * a read-only remapping. To prevent a page fault, the GDT is switched to the @@ -422,12 +442,10 @@ extern bool idt_is_f00f_address(unsigned long address); #ifdef CONFIG_X86_64 extern void idt_setup_early_pf(void); -extern void idt_setup_ist_traps(void); #else static inline void idt_setup_early_pf(void) { } -static inline void idt_setup_ist_traps(void) { } #endif -extern void idt_invalidate(void *addr); +extern void idt_invalidate(void); #endif /* _ASM_X86_DESC_H */ diff --git a/arch/x86/include/asm/hyperv-tlfs.h b/arch/x86/include/asm/hyperv-tlfs.h index 606f5cc579b2..f1366ce609e3 100644 --- a/arch/x86/include/asm/hyperv-tlfs.h +++ b/arch/x86/include/asm/hyperv-tlfs.h @@ -52,7 +52,7 @@ * Support for passing hypercall input parameter block via XMM * registers is available */ -#define HV_X64_HYPERCALL_PARAMS_XMM_AVAILABLE BIT(4) +#define HV_X64_HYPERCALL_XMM_INPUT_AVAILABLE BIT(4) /* Support for a virtual guest idle state is available */ #define HV_X64_GUEST_IDLE_STATE_AVAILABLE BIT(5) /* Frequency MSRs available */ @@ -61,6 +61,11 @@ #define HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE BIT(10) /* Support for debug MSRs available */ #define HV_FEATURE_DEBUG_MSRS_AVAILABLE BIT(11) +/* + * Support for returning hypercall output block via XMM + * registers is available + */ +#define HV_X64_HYPERCALL_XMM_OUTPUT_AVAILABLE BIT(15) /* stimer Direct Mode is available */ #define HV_STIMER_DIRECT_MODE_AVAILABLE BIT(19) @@ -133,6 +138,15 @@ #define HV_X64_NESTED_GUEST_MAPPING_FLUSH BIT(18) #define HV_X64_NESTED_MSR_BITMAP BIT(19) +/* + * This is specific to AMD and specifies that enlightened TLB flush is + * supported. If guest opts in to this feature, ASID invalidations only + * flushes gva -> hpa mapping entries. To flush the TLB entries derived + * from NPT, hypercalls should be used (HvFlushGuestPhysicalAddressSpace + * or HvFlushGuestPhysicalAddressList). + */ +#define HV_X64_NESTED_ENLIGHTENED_TLB BIT(22) + /* HYPERV_CPUID_ISOLATION_CONFIG.EAX bits. */ #define HV_PARAVISOR_PRESENT BIT(0) @@ -314,6 +328,9 @@ struct hv_tsc_emulation_status { #define HV_X64_MSR_TSC_REFERENCE_ENABLE 0x00000001 #define HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT 12 +/* Number of XMM registers used in hypercall input/output */ +#define HV_HYPERCALL_MAX_XMM_REGISTERS 6 + struct hv_nested_enlightenments_control { struct { __u32 directhypercall:1; diff --git a/arch/x86/include/asm/idtentry.h b/arch/x86/include/asm/idtentry.h index 73d45b0dfff2..1345088e9902 100644 --- a/arch/x86/include/asm/idtentry.h +++ b/arch/x86/include/asm/idtentry.h @@ -312,8 +312,8 @@ static __always_inline void __##func(struct pt_regs *regs) */ #define DECLARE_IDTENTRY_VC(vector, func) \ DECLARE_IDTENTRY_RAW_ERRORCODE(vector, func); \ - __visible noinstr void ist_##func(struct pt_regs *regs, unsigned long error_code); \ - __visible noinstr void safe_stack_##func(struct pt_regs *regs, unsigned long error_code) + __visible noinstr void kernel_##func(struct pt_regs *regs, unsigned long error_code); \ + __visible noinstr void user_##func(struct pt_regs *regs, unsigned long error_code) /** * DEFINE_IDTENTRY_IST - Emit code for IST entry points @@ -355,33 +355,24 @@ static __always_inline void __##func(struct pt_regs *regs) DEFINE_IDTENTRY_RAW_ERRORCODE(func) /** - * DEFINE_IDTENTRY_VC_SAFE_STACK - Emit code for VMM communication handler - which runs on a safe stack. + * DEFINE_IDTENTRY_VC_KERNEL - Emit code for VMM communication handler + when raised from kernel mode * @func: Function name of the entry point * * Maps to DEFINE_IDTENTRY_RAW_ERRORCODE */ -#define DEFINE_IDTENTRY_VC_SAFE_STACK(func) \ - DEFINE_IDTENTRY_RAW_ERRORCODE(safe_stack_##func) +#define DEFINE_IDTENTRY_VC_KERNEL(func) \ + DEFINE_IDTENTRY_RAW_ERRORCODE(kernel_##func) /** - * DEFINE_IDTENTRY_VC_IST - Emit code for VMM communication handler - which runs on the VC fall-back stack + * DEFINE_IDTENTRY_VC_USER - Emit code for VMM communication handler + when raised from user mode * @func: Function name of the entry point * * Maps to DEFINE_IDTENTRY_RAW_ERRORCODE */ -#define DEFINE_IDTENTRY_VC_IST(func) \ - DEFINE_IDTENTRY_RAW_ERRORCODE(ist_##func) - -/** - * DEFINE_IDTENTRY_VC - Emit code for VMM communication handler - * @func: Function name of the entry point - * - * Maps to DEFINE_IDTENTRY_RAW_ERRORCODE - */ -#define DEFINE_IDTENTRY_VC(func) \ - DEFINE_IDTENTRY_RAW_ERRORCODE(func) +#define DEFINE_IDTENTRY_VC_USER(func) \ + DEFINE_IDTENTRY_RAW_ERRORCODE(user_##func) #else /* CONFIG_X86_64 */ @@ -504,7 +495,7 @@ __visible noinstr void func(struct pt_regs *regs, \ .align 8 SYM_CODE_START(irq_entries_start) vector=FIRST_EXTERNAL_VECTOR - .rept (FIRST_SYSTEM_VECTOR - FIRST_EXTERNAL_VECTOR) + .rept NR_EXTERNAL_VECTORS UNWIND_HINT_IRET_REGS 0 : .byte 0x6a, vector @@ -520,7 +511,7 @@ SYM_CODE_END(irq_entries_start) .align 8 SYM_CODE_START(spurious_entries_start) vector=FIRST_SYSTEM_VECTOR - .rept (NR_VECTORS - FIRST_SYSTEM_VECTOR) + .rept NR_SYSTEM_VECTORS UNWIND_HINT_IRET_REGS 0 : .byte 0x6a, vector diff --git a/arch/x86/include/asm/intel-family.h b/arch/x86/include/asm/intel-family.h index 955b06d6325a..27158436f322 100644 --- a/arch/x86/include/asm/intel-family.h +++ b/arch/x86/include/asm/intel-family.h @@ -102,7 +102,8 @@ #define INTEL_FAM6_TIGERLAKE_L 0x8C /* Willow Cove */ #define INTEL_FAM6_TIGERLAKE 0x8D /* Willow Cove */ -#define INTEL_FAM6_SAPPHIRERAPIDS_X 0x8F /* Willow Cove */ + +#define INTEL_FAM6_SAPPHIRERAPIDS_X 0x8F /* Golden Cove */ #define INTEL_FAM6_ALDERLAKE 0x97 /* Golden Cove / Gracemont */ #define INTEL_FAM6_ALDERLAKE_L 0x9A /* Golden Cove / Gracemont */ diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h index 889f8b1b5b7f..43dcb9284208 100644 --- a/arch/x86/include/asm/irq_vectors.h +++ b/arch/x86/include/asm/irq_vectors.h @@ -26,8 +26,8 @@ * This file enumerates the exact layout of them: */ +/* This is used as an interrupt vector when programming the APIC. */ #define NMI_VECTOR 0x02 -#define MCE_VECTOR 0x12 /* * IDT vectors usable for external interrupt sources start at 0x20. @@ -84,7 +84,7 @@ */ #define IRQ_WORK_VECTOR 0xf6 -#define UV_BAU_MESSAGE 0xf5 +/* 0xf5 - unused, was UV_BAU_MESSAGE */ #define DEFERRED_ERROR_VECTOR 0xf4 /* Vector on which hypervisor callbacks will be delivered */ @@ -114,6 +114,9 @@ #define FIRST_SYSTEM_VECTOR NR_VECTORS #endif +#define NR_EXTERNAL_VECTORS (FIRST_SYSTEM_VECTOR - FIRST_EXTERNAL_VECTOR) +#define NR_SYSTEM_VECTORS (NR_VECTORS - FIRST_SYSTEM_VECTOR) + /* * Size the maximum number of interrupts. * diff --git a/arch/x86/include/asm/jump_label.h b/arch/x86/include/asm/jump_label.h index 610a05374c02..0449b125d27f 100644 --- a/arch/x86/include/asm/jump_label.h +++ b/arch/x86/include/asm/jump_label.h @@ -4,8 +4,6 @@ #define HAVE_JUMP_LABEL_BATCH -#define JUMP_LABEL_NOP_SIZE 5 - #include <asm/asm.h> #include <asm/nops.h> @@ -14,15 +12,35 @@ #include <linux/stringify.h> #include <linux/types.h> +#define JUMP_TABLE_ENTRY \ + ".pushsection __jump_table, \"aw\" \n\t" \ + _ASM_ALIGN "\n\t" \ + ".long 1b - . \n\t" \ + ".long %l[l_yes] - . \n\t" \ + _ASM_PTR "%c0 + %c1 - .\n\t" \ + ".popsection \n\t" + +#ifdef CONFIG_STACK_VALIDATION + +static __always_inline bool arch_static_branch(struct static_key *key, bool branch) +{ + asm_volatile_goto("1:" + "jmp %l[l_yes] # objtool NOPs this \n\t" + JUMP_TABLE_ENTRY + : : "i" (key), "i" (2 | branch) : : l_yes); + + return false; +l_yes: + return true; +} + +#else + static __always_inline bool arch_static_branch(struct static_key * const key, const bool branch) { asm_volatile_goto("1:" ".byte " __stringify(BYTES_NOP5) "\n\t" - ".pushsection __jump_table, \"aw\" \n\t" - _ASM_ALIGN "\n\t" - ".long 1b - ., %l[l_yes] - . \n\t" - _ASM_PTR "%c0 + %c1 - .\n\t" - ".popsection \n\t" + JUMP_TABLE_ENTRY : : "i" (key), "i" (branch) : : l_yes); return false; @@ -30,16 +48,13 @@ l_yes: return true; } +#endif /* STACK_VALIDATION */ + static __always_inline bool arch_static_branch_jump(struct static_key * const key, const bool branch) { asm_volatile_goto("1:" - ".byte 0xe9\n\t .long %l[l_yes] - 2f\n\t" - "2:\n\t" - ".pushsection __jump_table, \"aw\" \n\t" - _ASM_ALIGN "\n\t" - ".long 1b - ., %l[l_yes] - . \n\t" - _ASM_PTR "%c0 + %c1 - .\n\t" - ".popsection \n\t" + "jmp %l[l_yes]\n\t" + JUMP_TABLE_ENTRY : : "i" (key), "i" (branch) : : l_yes); return false; @@ -47,41 +62,7 @@ l_yes: return true; } -#else /* __ASSEMBLY__ */ - -.macro STATIC_JUMP_IF_TRUE target, key, def -.Lstatic_jump_\@: - .if \def - /* Equivalent to "jmp.d32 \target" */ - .byte 0xe9 - .long \target - .Lstatic_jump_after_\@ -.Lstatic_jump_after_\@: - .else - .byte BYTES_NOP5 - .endif - .pushsection __jump_table, "aw" - _ASM_ALIGN - .long .Lstatic_jump_\@ - ., \target - . - _ASM_PTR \key - . - .popsection -.endm - -.macro STATIC_JUMP_IF_FALSE target, key, def -.Lstatic_jump_\@: - .if \def - .byte BYTES_NOP5 - .else - /* Equivalent to "jmp.d32 \target" */ - .byte 0xe9 - .long \target - .Lstatic_jump_after_\@ -.Lstatic_jump_after_\@: - .endif - .pushsection __jump_table, "aw" - _ASM_ALIGN - .long .Lstatic_jump_\@ - ., \target - . - _ASM_PTR \key + 1 - . - .popsection -.endm +extern int arch_jump_entry_size(struct jump_entry *entry); #endif /* __ASSEMBLY__ */ diff --git a/arch/x86/include/asm/kvm-x86-ops.h b/arch/x86/include/asm/kvm-x86-ops.h index e7bef91cee04..a12a4987154e 100644 --- a/arch/x86/include/asm/kvm-x86-ops.h +++ b/arch/x86/include/asm/kvm-x86-ops.h @@ -87,7 +87,10 @@ KVM_X86_OP(set_identity_map_addr) KVM_X86_OP(get_mt_mask) KVM_X86_OP(load_mmu_pgd) KVM_X86_OP_NULL(has_wbinvd_exit) -KVM_X86_OP(write_l1_tsc_offset) +KVM_X86_OP(get_l2_tsc_offset) +KVM_X86_OP(get_l2_tsc_multiplier) +KVM_X86_OP(write_tsc_offset) +KVM_X86_OP(write_tsc_multiplier) KVM_X86_OP(get_exit_info) KVM_X86_OP(check_intercept) KVM_X86_OP(handle_exit_irqoff) @@ -106,8 +109,8 @@ KVM_X86_OP_NULL(set_hv_timer) KVM_X86_OP_NULL(cancel_hv_timer) KVM_X86_OP(setup_mce) KVM_X86_OP(smi_allowed) -KVM_X86_OP(pre_enter_smm) -KVM_X86_OP(pre_leave_smm) +KVM_X86_OP(enter_smm) +KVM_X86_OP(leave_smm) KVM_X86_OP(enable_smi_window) KVM_X86_OP_NULL(mem_enc_op) KVM_X86_OP_NULL(mem_enc_reg_region) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 9c7ced0e3171..974cbfb1eefe 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -85,7 +85,7 @@ #define KVM_REQ_APICV_UPDATE \ KVM_ARCH_REQ_FLAGS(25, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP) #define KVM_REQ_TLB_FLUSH_CURRENT KVM_ARCH_REQ(26) -#define KVM_REQ_HV_TLB_FLUSH \ +#define KVM_REQ_TLB_FLUSH_GUEST \ KVM_ARCH_REQ_FLAGS(27, KVM_REQUEST_NO_WAKEUP) #define KVM_REQ_APF_READY KVM_ARCH_REQ(28) #define KVM_REQ_MSR_FILTER_CHANGED KVM_ARCH_REQ(29) @@ -269,12 +269,36 @@ enum x86_intercept_stage; struct kvm_kernel_irq_routing_entry; /* - * the pages used as guest page table on soft mmu are tracked by - * kvm_memory_slot.arch.gfn_track which is 16 bits, so the role bits used - * by indirect shadow page can not be more than 15 bits. + * kvm_mmu_page_role tracks the properties of a shadow page (where shadow page + * also includes TDP pages) to determine whether or not a page can be used in + * the given MMU context. This is a subset of the overall kvm_mmu_role to + * minimize the size of kvm_memory_slot.arch.gfn_track, i.e. allows allocating + * 2 bytes per gfn instead of 4 bytes per gfn. * - * Currently, we used 14 bits that are @level, @gpte_is_8_bytes, @quadrant, @access, - * @nxe, @cr0_wp, @smep_andnot_wp and @smap_andnot_wp. + * Indirect upper-level shadow pages are tracked for write-protection via + * gfn_track. As above, gfn_track is a 16 bit counter, so KVM must not create + * more than 2^16-1 upper-level shadow pages at a single gfn, otherwise + * gfn_track will overflow and explosions will ensure. + * + * A unique shadow page (SP) for a gfn is created if and only if an existing SP + * cannot be reused. The ability to reuse a SP is tracked by its role, which + * incorporates various mode bits and properties of the SP. Roughly speaking, + * the number of unique SPs that can theoretically be created is 2^n, where n + * is the number of bits that are used to compute the role. + * + * But, even though there are 18 bits in the mask below, not all combinations + * of modes and flags are possible. The maximum number of possible upper-level + * shadow pages for a single gfn is in the neighborhood of 2^13. + * + * - invalid shadow pages are not accounted. + * - level is effectively limited to four combinations, not 16 as the number + * bits would imply, as 4k SPs are not tracked (allowed to go unsync). + * - level is effectively unused for non-PAE paging because there is exactly + * one upper level (see 4k SP exception above). + * - quadrant is used only for non-PAE paging and is exclusive with + * gpte_is_8_bytes. + * - execonly and ad_disabled are used only for nested EPT, which makes it + * exclusive with quadrant. */ union kvm_mmu_page_role { u32 word; @@ -285,7 +309,7 @@ union kvm_mmu_page_role { unsigned direct:1; unsigned access:3; unsigned invalid:1; - unsigned nxe:1; + unsigned efer_nx:1; unsigned cr0_wp:1; unsigned smep_andnot_wp:1; unsigned smap_andnot_wp:1; @@ -303,13 +327,26 @@ union kvm_mmu_page_role { }; }; -union kvm_mmu_extended_role { /* - * This structure complements kvm_mmu_page_role caching everything needed for - * MMU configuration. If nothing in both these structures changed, MMU - * re-configuration can be skipped. @valid bit is set on first usage so we don't - * treat all-zero structure as valid data. + * kvm_mmu_extended_role complements kvm_mmu_page_role, tracking properties + * relevant to the current MMU configuration. When loading CR0, CR4, or EFER, + * including on nested transitions, if nothing in the full role changes then + * MMU re-configuration can be skipped. @valid bit is set on first usage so we + * don't treat all-zero structure as valid data. + * + * The properties that are tracked in the extended role but not the page role + * are for things that either (a) do not affect the validity of the shadow page + * or (b) are indirectly reflected in the shadow page's role. For example, + * CR4.PKE only affects permission checks for software walks of the guest page + * tables (because KVM doesn't support Protection Keys with shadow paging), and + * CR0.PG, CR4.PAE, and CR4.PSE are indirectly reflected in role.level. + * + * Note, SMEP and SMAP are not redundant with sm*p_andnot_wp in the page role. + * If CR0.WP=1, KVM can reuse shadow pages for the guest regardless of SMEP and + * SMAP, but the MMU's permission checks for software walks need to be SMEP and + * SMAP aware regardless of CR0.WP. */ +union kvm_mmu_extended_role { u32 word; struct { unsigned int valid:1; @@ -320,7 +357,7 @@ union kvm_mmu_extended_role { unsigned int cr4_pke:1; unsigned int cr4_smap:1; unsigned int cr4_smep:1; - unsigned int maxphyaddr:6; + unsigned int cr4_la57:1; }; }; @@ -420,11 +457,6 @@ struct kvm_mmu { struct rsvd_bits_validate guest_rsvd_check; - /* Can have large pages at levels 2..last_nonleaf_level-1. */ - u8 last_nonleaf_level; - - bool nx; - u64 pdptrs[4]; /* pae */ }; @@ -543,6 +575,15 @@ struct kvm_vcpu_hv { struct kvm_vcpu_hv_stimer stimer[HV_SYNIC_STIMER_COUNT]; DECLARE_BITMAP(stimer_pending_bitmap, HV_SYNIC_STIMER_COUNT); cpumask_t tlb_flush; + bool enforce_cpuid; + struct { + u32 features_eax; /* HYPERV_CPUID_FEATURES.EAX */ + u32 features_ebx; /* HYPERV_CPUID_FEATURES.EBX */ + u32 features_edx; /* HYPERV_CPUID_FEATURES.EDX */ + u32 enlightenments_eax; /* HYPERV_CPUID_ENLIGHTMENT_INFO.EAX */ + u32 enlightenments_ebx; /* HYPERV_CPUID_ENLIGHTMENT_INFO.EBX */ + u32 syndbg_cap_eax; /* HYPERV_CPUID_SYNDBG_PLATFORM_CAPABILITIES.EAX */ + } cpuid_cache; }; /* Xen HVM per vcpu emulation context */ @@ -707,7 +748,7 @@ struct kvm_vcpu_arch { } st; u64 l1_tsc_offset; - u64 tsc_offset; + u64 tsc_offset; /* current tsc offset */ u64 last_guest_tsc; u64 last_host_tsc; u64 tsc_offset_adjustment; @@ -721,7 +762,8 @@ struct kvm_vcpu_arch { u32 virtual_tsc_khz; s64 ia32_tsc_adjust_msr; u64 msr_ia32_power_ctl; - u64 tsc_scaling_ratio; + u64 l1_tsc_scaling_ratio; + u64 tsc_scaling_ratio; /* current scaling ratio */ atomic_t nmi_queued; /* unprocessed asynchronous NMIs */ unsigned nmi_pending; /* NMI queued after currently running handler */ @@ -829,7 +871,7 @@ struct kvm_vcpu_arch { bool l1tf_flush_l1d; /* Host CPU on which VM-entry was most recently attempted */ - unsigned int last_vmentry_cpu; + int last_vmentry_cpu; /* AMD MSRC001_0015 Hardware Configuration */ u64 msr_hwcr; @@ -851,6 +893,16 @@ struct kvm_vcpu_arch { /* Protected Guests */ bool guest_state_protected; + + /* + * Set when PDPTS were loaded directly by the userspace without + * reading the guest memory + */ + bool pdptrs_from_userspace; + +#if IS_ENABLED(CONFIG_HYPERV) + hpa_t hv_root_tdp; +#endif }; struct kvm_lpage_info { @@ -1002,7 +1054,7 @@ struct kvm_arch { struct kvm_apic_map __rcu *apic_map; atomic_t apic_map_dirty; - bool apic_access_page_done; + bool apic_access_memslot_enabled; unsigned long apicv_inhibit_reasons; gpa_t wall_clock; @@ -1062,11 +1114,19 @@ struct kvm_arch { bool exception_payload_enabled; bool bus_lock_detection_enabled; + /* + * If exit_on_emulation_error is set, and the in-kernel instruction + * emulator fails to emulate an instruction, allow userspace + * the opportunity to look at it. + */ + bool exit_on_emulation_error; /* Deflect RDMSR and WRMSR to user space when they trigger a #GP */ u32 user_space_msr_mask; struct kvm_x86_msr_filter __rcu *msr_filter; + u32 hypercall_exit_enabled; + /* Guest can access the SGX PROVISIONKEY. */ bool sgx_provisioning_allowed; @@ -1124,23 +1184,35 @@ struct kvm_arch { */ spinlock_t tdp_mmu_pages_lock; #endif /* CONFIG_X86_64 */ + + /* + * If set, rmaps have been allocated for all memslots and should be + * allocated for any newly created or modified memslots. + */ + bool memslots_have_rmaps; + +#if IS_ENABLED(CONFIG_HYPERV) + hpa_t hv_root_tdp; + spinlock_t hv_root_tdp_lock; +#endif }; struct kvm_vm_stat { - ulong mmu_shadow_zapped; - ulong mmu_pte_write; - ulong mmu_pde_zapped; - ulong mmu_flooded; - ulong mmu_recycled; - ulong mmu_cache_miss; - ulong mmu_unsync; - ulong remote_tlb_flush; - ulong lpages; - ulong nx_lpage_splits; - ulong max_mmu_page_hash_collisions; + struct kvm_vm_stat_generic generic; + u64 mmu_shadow_zapped; + u64 mmu_pte_write; + u64 mmu_pde_zapped; + u64 mmu_flooded; + u64 mmu_recycled; + u64 mmu_cache_miss; + u64 mmu_unsync; + u64 lpages; + u64 nx_lpage_splits; + u64 max_mmu_page_hash_collisions; }; struct kvm_vcpu_stat { + struct kvm_vcpu_stat_generic generic; u64 pf_fixed; u64 pf_guest; u64 tlb_flush; @@ -1154,10 +1226,6 @@ struct kvm_vcpu_stat { u64 nmi_window_exits; u64 l1d_flush; u64 halt_exits; - u64 halt_successful_poll; - u64 halt_attempted_poll; - u64 halt_poll_invalid; - u64 halt_wakeup; u64 request_irq_exits; u64 irq_exits; u64 host_state_reload; @@ -1168,11 +1236,10 @@ struct kvm_vcpu_stat { u64 irq_injections; u64 nmi_injections; u64 req_event; - u64 halt_poll_success_ns; - u64 halt_poll_fail_ns; u64 nested_run; u64 directed_yield_attempted; u64 directed_yield_successful; + u64 guest_mode; }; struct x86_instruction_info; @@ -1304,8 +1371,10 @@ struct kvm_x86_ops { bool (*has_wbinvd_exit)(void); - /* Returns actual tsc_offset set in active VMCS */ - u64 (*write_l1_tsc_offset)(struct kvm_vcpu *vcpu, u64 offset); + u64 (*get_l2_tsc_offset)(struct kvm_vcpu *vcpu); + u64 (*get_l2_tsc_multiplier)(struct kvm_vcpu *vcpu); + void (*write_tsc_offset)(struct kvm_vcpu *vcpu, u64 offset); + void (*write_tsc_multiplier)(struct kvm_vcpu *vcpu, u64 multiplier); /* * Retrieve somewhat arbitrary exit information. Intended to be used @@ -1363,8 +1432,8 @@ struct kvm_x86_ops { void (*setup_mce)(struct kvm_vcpu *vcpu); int (*smi_allowed)(struct kvm_vcpu *vcpu, bool for_injection); - int (*pre_enter_smm)(struct kvm_vcpu *vcpu, char *smstate); - int (*pre_leave_smm)(struct kvm_vcpu *vcpu, const char *smstate); + int (*enter_smm)(struct kvm_vcpu *vcpu, char *smstate); + int (*leave_smm)(struct kvm_vcpu *vcpu, const char *smstate); void (*enable_smi_window)(struct kvm_vcpu *vcpu); int (*mem_enc_op)(struct kvm *kvm, void __user *argp); @@ -1423,6 +1492,7 @@ struct kvm_arch_async_pf { extern u32 __read_mostly kvm_nr_uret_msrs; extern u64 __read_mostly host_efer; extern bool __read_mostly allow_smaller_maxphyaddr; +extern bool __read_mostly enable_apicv; extern struct kvm_x86_ops kvm_x86_ops; #define KVM_X86_OP(func) \ @@ -1463,6 +1533,7 @@ int kvm_mmu_create(struct kvm_vcpu *vcpu); void kvm_mmu_init_vm(struct kvm *kvm); void kvm_mmu_uninit_vm(struct kvm *kvm); +void kvm_mmu_after_set_cpuid(struct kvm_vcpu *vcpu); void kvm_mmu_reset_context(struct kvm_vcpu *vcpu); void kvm_mmu_slot_remove_write_access(struct kvm *kvm, struct kvm_memory_slot *memslot, @@ -1477,7 +1548,6 @@ unsigned long kvm_mmu_calculate_default_mmu_pages(struct kvm *kvm); void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned long kvm_nr_mmu_pages); int load_pdptrs(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, unsigned long cr3); -bool pdptrs_changed(struct kvm_vcpu *vcpu); int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa, const void *val, int bytes); @@ -1650,6 +1720,7 @@ int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn); void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu); void kvm_mmu_free_roots(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, ulong roots_to_free); +void kvm_mmu_free_guest_mode_roots(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu); gpa_t translate_nested_gpa(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access, struct x86_exception *exception); gpa_t kvm_mmu_gva_to_gpa_read(struct kvm_vcpu *vcpu, gva_t gva, @@ -1662,7 +1733,6 @@ gpa_t kvm_mmu_gva_to_gpa_system(struct kvm_vcpu *vcpu, gva_t gva, struct x86_exception *exception); bool kvm_apicv_activated(struct kvm *kvm); -void kvm_apicv_init(struct kvm *kvm, bool enable); void kvm_vcpu_update_apicv(struct kvm_vcpu *vcpu); void kvm_request_apicv_update(struct kvm *kvm, bool activate, unsigned long bit); @@ -1675,8 +1745,7 @@ void kvm_mmu_invlpg(struct kvm_vcpu *vcpu, gva_t gva); void kvm_mmu_invalidate_gva(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, gva_t gva, hpa_t root_hpa); void kvm_mmu_invpcid_gva(struct kvm_vcpu *vcpu, gva_t gva, unsigned long pcid); -void kvm_mmu_new_pgd(struct kvm_vcpu *vcpu, gpa_t new_pgd, bool skip_tlb_flush, - bool skip_mmu_sync); +void kvm_mmu_new_pgd(struct kvm_vcpu *vcpu, gpa_t new_pgd); void kvm_configure_mmu(bool enable_tdp, int tdp_max_root_level, int tdp_huge_page_level); @@ -1788,8 +1857,10 @@ static inline bool kvm_is_supported_user_return_msr(u32 msr) return kvm_find_user_return_msr(msr) >= 0; } -u64 kvm_scale_tsc(struct kvm_vcpu *vcpu, u64 tsc); +u64 kvm_scale_tsc(struct kvm_vcpu *vcpu, u64 tsc, u64 ratio); u64 kvm_read_l1_tsc(struct kvm_vcpu *vcpu, u64 host_tsc); +u64 kvm_calc_nested_tsc_offset(u64 l1_offset, u64 l2_offset, u64 l2_multiplier); +u64 kvm_calc_nested_tsc_multiplier(u64 l1_multiplier, u64 l2_multiplier); unsigned long kvm_get_linear_rip(struct kvm_vcpu *vcpu); bool kvm_is_linear_rip(struct kvm_vcpu *vcpu, unsigned long linear_rip); @@ -1863,4 +1934,6 @@ static inline int kvm_cpu_get_apicid(int mps_cpu) int kvm_cpu_dirty_log_size(void); +int alloc_all_memslots_rmaps(struct kvm *kvm); + #endif /* _ASM_X86_KVM_HOST_H */ diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h index ddfb3cad8dff..0607ec4f5091 100644 --- a/arch/x86/include/asm/mce.h +++ b/arch/x86/include/asm/mce.h @@ -305,7 +305,7 @@ extern void apei_mce_report_mem_error(int corrected, /* These may be used by multiple smca_hwid_mcatypes */ enum smca_bank_types { SMCA_LS = 0, /* Load Store */ - SMCA_LS_V2, /* Load Store */ + SMCA_LS_V2, SMCA_IF, /* Instruction Fetch */ SMCA_L2_CACHE, /* L2 Cache */ SMCA_DE, /* Decoder Unit */ @@ -314,17 +314,22 @@ enum smca_bank_types { SMCA_FP, /* Floating Point */ SMCA_L3_CACHE, /* L3 Cache */ SMCA_CS, /* Coherent Slave */ - SMCA_CS_V2, /* Coherent Slave */ + SMCA_CS_V2, SMCA_PIE, /* Power, Interrupts, etc. */ SMCA_UMC, /* Unified Memory Controller */ + SMCA_UMC_V2, SMCA_PB, /* Parameter Block */ SMCA_PSP, /* Platform Security Processor */ - SMCA_PSP_V2, /* Platform Security Processor */ + SMCA_PSP_V2, SMCA_SMU, /* System Management Unit */ - SMCA_SMU_V2, /* System Management Unit */ + SMCA_SMU_V2, SMCA_MP5, /* Microprocessor 5 Unit */ SMCA_NBIO, /* Northbridge IO Unit */ SMCA_PCIE, /* PCI Express Unit */ + SMCA_PCIE_V2, + SMCA_XGMI_PCS, /* xGMI PCS Unit */ + SMCA_XGMI_PHY, /* xGMI PHY Unit */ + SMCA_WAFL_PHY, /* WAFL PHY Unit */ N_SMCA_BANK_TYPES }; diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 211ba3375ee9..a7c413432b33 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -772,6 +772,10 @@ #define MSR_TFA_RTM_FORCE_ABORT_BIT 0 #define MSR_TFA_RTM_FORCE_ABORT BIT_ULL(MSR_TFA_RTM_FORCE_ABORT_BIT) +#define MSR_TFA_TSX_CPUID_CLEAR_BIT 1 +#define MSR_TFA_TSX_CPUID_CLEAR BIT_ULL(MSR_TFA_TSX_CPUID_CLEAR_BIT) +#define MSR_TFA_SDV_ENABLE_RTM_BIT 2 +#define MSR_TFA_SDV_ENABLE_RTM BIT_ULL(MSR_TFA_SDV_ENABLE_RTM_BIT) /* P4/Xeon+ specific */ #define MSR_IA32_MCG_EAX 0x00000180 diff --git a/arch/x86/include/asm/nops.h b/arch/x86/include/asm/nops.h index c1e5e818ba16..c5573eaa5bb9 100644 --- a/arch/x86/include/asm/nops.h +++ b/arch/x86/include/asm/nops.h @@ -2,6 +2,8 @@ #ifndef _ASM_X86_NOPS_H #define _ASM_X86_NOPS_H +#include <asm/asm.h> + /* * Define nops for use with alternative() and for tracing. */ @@ -57,20 +59,14 @@ #endif /* CONFIG_64BIT */ -#ifdef __ASSEMBLY__ -#define _ASM_MK_NOP(x) .byte x -#else -#define _ASM_MK_NOP(x) ".byte " __stringify(x) "\n" -#endif - -#define ASM_NOP1 _ASM_MK_NOP(BYTES_NOP1) -#define ASM_NOP2 _ASM_MK_NOP(BYTES_NOP2) -#define ASM_NOP3 _ASM_MK_NOP(BYTES_NOP3) -#define ASM_NOP4 _ASM_MK_NOP(BYTES_NOP4) -#define ASM_NOP5 _ASM_MK_NOP(BYTES_NOP5) -#define ASM_NOP6 _ASM_MK_NOP(BYTES_NOP6) -#define ASM_NOP7 _ASM_MK_NOP(BYTES_NOP7) -#define ASM_NOP8 _ASM_MK_NOP(BYTES_NOP8) +#define ASM_NOP1 _ASM_BYTES(BYTES_NOP1) +#define ASM_NOP2 _ASM_BYTES(BYTES_NOP2) +#define ASM_NOP3 _ASM_BYTES(BYTES_NOP3) +#define ASM_NOP4 _ASM_BYTES(BYTES_NOP4) +#define ASM_NOP5 _ASM_BYTES(BYTES_NOP5) +#define ASM_NOP6 _ASM_BYTES(BYTES_NOP6) +#define ASM_NOP7 _ASM_BYTES(BYTES_NOP7) +#define ASM_NOP8 _ASM_BYTES(BYTES_NOP8) #define ASM_NOP_MAX 8 diff --git a/arch/x86/include/asm/page.h b/arch/x86/include/asm/page.h index 7555b48803a8..4d5810c8fab7 100644 --- a/arch/x86/include/asm/page.h +++ b/arch/x86/include/asm/page.h @@ -34,9 +34,9 @@ static inline void copy_user_page(void *to, void *from, unsigned long vaddr, copy_page(to, from); } -#define __alloc_zeroed_user_highpage(movableflags, vma, vaddr) \ - alloc_page_vma(GFP_HIGHUSER | __GFP_ZERO | movableflags, vma, vaddr) -#define __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE +#define alloc_zeroed_user_highpage_movable(vma, vaddr) \ + alloc_page_vma(GFP_HIGHUSER_MOVABLE | __GFP_ZERO, vma, vaddr) +#define __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE_MOVABLE #ifndef __pa #define __pa(x) __phys_addr((unsigned long)(x)) diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h index 544f41a179fb..8fc1b5003713 100644 --- a/arch/x86/include/asm/perf_event.h +++ b/arch/x86/include/asm/perf_event.h @@ -478,6 +478,7 @@ struct x86_pmu_lbr { extern void perf_get_x86_pmu_capability(struct x86_pmu_capability *cap); extern void perf_check_microcode(void); +extern void perf_clear_dirty_counters(void); extern int x86_perf_rdpmc_index(struct perf_event *event); #else static inline void perf_get_x86_pmu_capability(struct x86_pmu_capability *cap) diff --git a/arch/x86/include/asm/preempt.h b/arch/x86/include/asm/preempt.h index f8cb8af4de5c..fe5efbcba824 100644 --- a/arch/x86/include/asm/preempt.h +++ b/arch/x86/include/asm/preempt.h @@ -44,7 +44,7 @@ static __always_inline void preempt_count_set(int pc) #define init_task_preempt_count(p) do { } while (0) #define init_idle_preempt_count(p, cpu) do { \ - per_cpu(__preempt_count, (cpu)) = PREEMPT_ENABLED; \ + per_cpu(__preempt_count, (cpu)) = PREEMPT_DISABLED; \ } while (0) /* diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 556b2b17c3e2..364d0e42e280 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -663,6 +663,7 @@ extern void load_direct_gdt(int); extern void load_fixmap_gdt(int); extern void load_percpu_segment(int); extern void cpu_init(void); +extern void cpu_init_secondary(void); extern void cpu_init_exception_handling(void); extern void cr4_init(void); diff --git a/arch/x86/include/asm/sev-common.h b/arch/x86/include/asm/sev-common.h index 629c3df243f0..2cef6c5a52c2 100644 --- a/arch/x86/include/asm/sev-common.h +++ b/arch/x86/include/asm/sev-common.h @@ -9,8 +9,13 @@ #define __ASM_X86_SEV_COMMON_H #define GHCB_MSR_INFO_POS 0 -#define GHCB_MSR_INFO_MASK (BIT_ULL(12) - 1) +#define GHCB_DATA_LOW 12 +#define GHCB_MSR_INFO_MASK (BIT_ULL(GHCB_DATA_LOW) - 1) +#define GHCB_DATA(v) \ + (((unsigned long)(v) & ~GHCB_MSR_INFO_MASK) >> GHCB_DATA_LOW) + +/* SEV Information Request/Response */ #define GHCB_MSR_SEV_INFO_RESP 0x001 #define GHCB_MSR_SEV_INFO_REQ 0x002 #define GHCB_MSR_VER_MAX_POS 48 @@ -28,6 +33,7 @@ #define GHCB_MSR_PROTO_MAX(v) (((v) >> GHCB_MSR_VER_MAX_POS) & GHCB_MSR_VER_MAX_MASK) #define GHCB_MSR_PROTO_MIN(v) (((v) >> GHCB_MSR_VER_MIN_POS) & GHCB_MSR_VER_MIN_MASK) +/* CPUID Request/Response */ #define GHCB_MSR_CPUID_REQ 0x004 #define GHCB_MSR_CPUID_RESP 0x005 #define GHCB_MSR_CPUID_FUNC_POS 32 @@ -45,6 +51,14 @@ (((unsigned long)reg & GHCB_MSR_CPUID_REG_MASK) << GHCB_MSR_CPUID_REG_POS) | \ (((unsigned long)fn) << GHCB_MSR_CPUID_FUNC_POS)) +/* AP Reset Hold */ +#define GHCB_MSR_AP_RESET_HOLD_REQ 0x006 +#define GHCB_MSR_AP_RESET_HOLD_RESP 0x007 + +/* GHCB Hypervisor Feature Request/Response */ +#define GHCB_MSR_HV_FT_REQ 0x080 +#define GHCB_MSR_HV_FT_RESP 0x081 + #define GHCB_MSR_TERM_REQ 0x100 #define GHCB_MSR_TERM_REASON_SET_POS 12 #define GHCB_MSR_TERM_REASON_SET_MASK 0xf diff --git a/arch/x86/include/asm/sgx.h b/arch/x86/include/asm/sgx.h index 9c31e0ebc55b..05f3e21f01a7 100644 --- a/arch/x86/include/asm/sgx.h +++ b/arch/x86/include/asm/sgx.h @@ -13,7 +13,7 @@ /* * This file contains both data structures defined by SGX architecture and Linux * defined software data structures and functions. The two should not be mixed - * together for better readibility. The architectural definitions come first. + * together for better readability. The architectural definitions come first. */ /* The SGX specific CPUID function. */ diff --git a/arch/x86/include/asm/stackprotector.h b/arch/x86/include/asm/stackprotector.h index b6ffe58c70fa..24a8d6c4fb18 100644 --- a/arch/x86/include/asm/stackprotector.h +++ b/arch/x86/include/asm/stackprotector.h @@ -11,7 +11,7 @@ * The same segment is shared by percpu area and stack canary. On * x86_64, percpu symbols are zero based and %gs (64-bit) points to the * base of percpu area. The first occupant of the percpu area is always - * fixed_percpu_data which contains stack_canary at the approproate + * fixed_percpu_data which contains stack_canary at the appropriate * offset. On x86_32, the stack canary is just a regular percpu * variable. * diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h index 772e60efe243..e322676039f4 100644 --- a/arch/x86/include/asm/svm.h +++ b/arch/x86/include/asm/svm.h @@ -156,6 +156,12 @@ struct __attribute__ ((__packed__)) vmcb_control_area { u64 avic_physical_id; /* Offset 0xf8 */ u8 reserved_7[8]; u64 vmsa_pa; /* Used for an SEV-ES guest */ + u8 reserved_8[720]; + /* + * Offset 0x3e0, 32 bytes reserved + * for use by hypervisor/software. + */ + u8 reserved_sw[32]; }; @@ -314,7 +320,7 @@ struct ghcb { #define EXPECTED_VMCB_SAVE_AREA_SIZE 1032 -#define EXPECTED_VMCB_CONTROL_AREA_SIZE 272 +#define EXPECTED_VMCB_CONTROL_AREA_SIZE 1024 #define EXPECTED_GHCB_SIZE PAGE_SIZE static inline void __unused_size_checks(void) @@ -326,7 +332,6 @@ static inline void __unused_size_checks(void) struct vmcb { struct vmcb_control_area control; - u8 reserved_control[1024 - sizeof(struct vmcb_control_area)]; struct vmcb_save_area save; } __packed; diff --git a/arch/x86/include/asm/syscall.h b/arch/x86/include/asm/syscall.h index 7cbf733d11af..f7e2d82d24fb 100644 --- a/arch/x86/include/asm/syscall.h +++ b/arch/x86/include/asm/syscall.h @@ -21,13 +21,12 @@ extern const sys_call_ptr_t sys_call_table[]; #if defined(CONFIG_X86_32) #define ia32_sys_call_table sys_call_table -#endif - -#if defined(CONFIG_IA32_EMULATION) +#else +/* + * These may not exist, but still put the prototypes in so we + * can use IS_ENABLED(). + */ extern const sys_call_ptr_t ia32_sys_call_table[]; -#endif - -#ifdef CONFIG_X86_X32_ABI extern const sys_call_ptr_t x32_sys_call_table[]; #endif @@ -160,7 +159,7 @@ static inline int syscall_get_arch(struct task_struct *task) ? AUDIT_ARCH_I386 : AUDIT_ARCH_X86_64; } -void do_syscall_64(unsigned long nr, struct pt_regs *regs); +void do_syscall_64(struct pt_regs *regs, int nr); void do_int80_syscall_32(struct pt_regs *regs); long do_fast_syscall_32(struct pt_regs *regs); diff --git a/arch/x86/include/asm/syscall_wrapper.h b/arch/x86/include/asm/syscall_wrapper.h index 80c08c7d5e72..6a2827d0681f 100644 --- a/arch/x86/include/asm/syscall_wrapper.h +++ b/arch/x86/include/asm/syscall_wrapper.h @@ -17,7 +17,7 @@ extern long __ia32_sys_ni_syscall(const struct pt_regs *regs); * __x64_sys_*() - 64-bit native syscall * __ia32_sys_*() - 32-bit native syscall or common compat syscall * __ia32_compat_sys_*() - 32-bit compat syscall - * __x32_compat_sys_*() - 64-bit X32 compat syscall + * __x64_compat_sys_*() - 64-bit X32 compat syscall * * The registers are decoded according to the ABI: * 64-bit: RDI, RSI, RDX, R10, R8, R9 @@ -166,17 +166,17 @@ extern long __ia32_sys_ni_syscall(const struct pt_regs *regs); * with x86_64 obviously do not need such care. */ #define __X32_COMPAT_SYS_STUB0(name) \ - __SYS_STUB0(x32, compat_sys_##name) + __SYS_STUB0(x64, compat_sys_##name) #define __X32_COMPAT_SYS_STUBx(x, name, ...) \ - __SYS_STUBx(x32, compat_sys##name, \ + __SYS_STUBx(x64, compat_sys##name, \ SC_X86_64_REGS_TO_ARGS(x, __VA_ARGS__)) #define __X32_COMPAT_COND_SYSCALL(name) \ - __COND_SYSCALL(x32, compat_sys_##name) + __COND_SYSCALL(x64, compat_sys_##name) #define __X32_COMPAT_SYS_NI(name) \ - __SYS_NI(x32, compat_sys_##name) + __SYS_NI(x64, compat_sys_##name) #else /* CONFIG_X86_X32 */ #define __X32_COMPAT_SYS_STUB0(name) #define __X32_COMPAT_SYS_STUBx(x, name, ...) diff --git a/arch/x86/include/asm/unistd.h b/arch/x86/include/asm/unistd.h index c1c3d31b15c0..80e9d5206a71 100644 --- a/arch/x86/include/asm/unistd.h +++ b/arch/x86/include/asm/unistd.h @@ -13,7 +13,7 @@ # define __ARCH_WANT_SYS_OLD_MMAP # define __ARCH_WANT_SYS_OLD_SELECT -# define __NR_ia32_syscall_max __NR_syscall_max +# define IA32_NR_syscalls (__NR_syscalls) # else @@ -26,12 +26,12 @@ # define __ARCH_WANT_COMPAT_SYS_PWRITEV64 # define __ARCH_WANT_COMPAT_SYS_PREADV64V2 # define __ARCH_WANT_COMPAT_SYS_PWRITEV64V2 +# define X32_NR_syscalls (__NR_x32_syscalls) +# define IA32_NR_syscalls (__NR_ia32_syscalls) # endif -# define NR_syscalls (__NR_syscall_max + 1) -# define X32_NR_syscalls (__NR_x32_syscall_max + 1) -# define IA32_NR_syscalls (__NR_ia32_syscall_max + 1) +# define NR_syscalls (__NR_syscalls) # define __ARCH_WANT_NEW_STAT # define __ARCH_WANT_OLD_READDIR diff --git a/arch/x86/include/uapi/asm/hwcap2.h b/arch/x86/include/uapi/asm/hwcap2.h index 5fdfcb47000f..054604aba9f0 100644 --- a/arch/x86/include/uapi/asm/hwcap2.h +++ b/arch/x86/include/uapi/asm/hwcap2.h @@ -2,10 +2,12 @@ #ifndef _ASM_X86_HWCAP2_H #define _ASM_X86_HWCAP2_H +#include <linux/const.h> + /* MONITOR/MWAIT enabled in Ring 3 */ -#define HWCAP2_RING3MWAIT (1 << 0) +#define HWCAP2_RING3MWAIT _BITUL(0) /* Kernel allows FSGSBASE instructions available in Ring 3 */ -#define HWCAP2_FSGSBASE BIT(1) +#define HWCAP2_FSGSBASE _BITUL(1) #endif diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h index 0662f644aad9..a6c327f8ad9e 100644 --- a/arch/x86/include/uapi/asm/kvm.h +++ b/arch/x86/include/uapi/asm/kvm.h @@ -159,6 +159,19 @@ struct kvm_sregs { __u64 interrupt_bitmap[(KVM_NR_INTERRUPTS + 63) / 64]; }; +struct kvm_sregs2 { + /* out (KVM_GET_SREGS2) / in (KVM_SET_SREGS2) */ + struct kvm_segment cs, ds, es, fs, gs, ss; + struct kvm_segment tr, ldt; + struct kvm_dtable gdt, idt; + __u64 cr0, cr2, cr3, cr4, cr8; + __u64 efer; + __u64 apic_base; + __u64 flags; + __u64 pdptrs[4]; +}; +#define KVM_SREGS2_FLAGS_PDPTRS_VALID 1 + /* for KVM_GET_FPU and KVM_SET_FPU */ struct kvm_fpu { __u8 fpr[8][16]; diff --git a/arch/x86/include/uapi/asm/kvm_para.h b/arch/x86/include/uapi/asm/kvm_para.h index 950afebfba88..5146bbab84d4 100644 --- a/arch/x86/include/uapi/asm/kvm_para.h +++ b/arch/x86/include/uapi/asm/kvm_para.h @@ -33,6 +33,8 @@ #define KVM_FEATURE_PV_SCHED_YIELD 13 #define KVM_FEATURE_ASYNC_PF_INT 14 #define KVM_FEATURE_MSI_EXT_DEST_ID 15 +#define KVM_FEATURE_HC_MAP_GPA_RANGE 16 +#define KVM_FEATURE_MIGRATION_CONTROL 17 #define KVM_HINTS_REALTIME 0 @@ -54,6 +56,7 @@ #define MSR_KVM_POLL_CONTROL 0x4b564d05 #define MSR_KVM_ASYNC_PF_INT 0x4b564d06 #define MSR_KVM_ASYNC_PF_ACK 0x4b564d07 +#define MSR_KVM_MIGRATION_CONTROL 0x4b564d08 struct kvm_steal_time { __u64 steal; @@ -90,6 +93,16 @@ struct kvm_clock_pairing { /* MSR_KVM_ASYNC_PF_INT */ #define KVM_ASYNC_PF_VEC_MASK GENMASK(7, 0) +/* MSR_KVM_MIGRATION_CONTROL */ +#define KVM_MIGRATION_READY (1 << 0) + +/* KVM_HC_MAP_GPA_RANGE */ +#define KVM_MAP_GPA_RANGE_PAGE_SZ_4K 0 +#define KVM_MAP_GPA_RANGE_PAGE_SZ_2M (1 << 0) +#define KVM_MAP_GPA_RANGE_PAGE_SZ_1G (1 << 1) +#define KVM_MAP_GPA_RANGE_ENC_STAT(n) (n << 4) +#define KVM_MAP_GPA_RANGE_ENCRYPTED KVM_MAP_GPA_RANGE_ENC_STAT(1) +#define KVM_MAP_GPA_RANGE_DECRYPTED KVM_MAP_GPA_RANGE_ENC_STAT(0) /* Operations for KVM_HC_MMU_OP */ #define KVM_MMU_OP_WRITE_PTE 1 diff --git a/arch/x86/include/uapi/asm/svm.h b/arch/x86/include/uapi/asm/svm.h index 554f75fe013c..efa969325ede 100644 --- a/arch/x86/include/uapi/asm/svm.h +++ b/arch/x86/include/uapi/asm/svm.h @@ -110,6 +110,9 @@ #define SVM_VMGEXIT_GET_AP_JUMP_TABLE 1 #define SVM_VMGEXIT_UNSUPPORTED_EVENT 0x8000ffff +/* Exit code reserved for hypervisor/software use */ +#define SVM_EXIT_SW 0xf0000000 + #define SVM_EXIT_ERR -1 #define SVM_EXIT_REASONS \ |