summaryrefslogtreecommitdiff
path: root/arch/x86/include
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/include')
-rw-r--r--arch/x86/include/asm/Kbuild1
-rw-r--r--arch/x86/include/asm/asm.h27
-rw-r--r--arch/x86/include/asm/atomic.h2
-rw-r--r--arch/x86/include/asm/barrier.h7
-rw-r--r--arch/x86/include/asm/cpufeatures.h3
-rw-r--r--arch/x86/include/asm/crash.h6
-rw-r--r--arch/x86/include/asm/desc.h24
-rw-r--r--arch/x86/include/asm/hyperv-tlfs.h19
-rw-r--r--arch/x86/include/asm/idtentry.h33
-rw-r--r--arch/x86/include/asm/intel-family.h3
-rw-r--r--arch/x86/include/asm/irq_vectors.h7
-rw-r--r--arch/x86/include/asm/jump_label.h79
-rw-r--r--arch/x86/include/asm/kvm-x86-ops.h9
-rw-r--r--arch/x86/include/asm/kvm_host.h169
-rw-r--r--arch/x86/include/asm/mce.h13
-rw-r--r--arch/x86/include/asm/msr-index.h4
-rw-r--r--arch/x86/include/asm/nops.h24
-rw-r--r--arch/x86/include/asm/page.h6
-rw-r--r--arch/x86/include/asm/perf_event.h1
-rw-r--r--arch/x86/include/asm/preempt.h2
-rw-r--r--arch/x86/include/asm/processor.h1
-rw-r--r--arch/x86/include/asm/sev-common.h16
-rw-r--r--arch/x86/include/asm/sgx.h2
-rw-r--r--arch/x86/include/asm/stackprotector.h2
-rw-r--r--arch/x86/include/asm/svm.h9
-rw-r--r--arch/x86/include/asm/syscall.h13
-rw-r--r--arch/x86/include/asm/syscall_wrapper.h10
-rw-r--r--arch/x86/include/asm/unistd.h8
-rw-r--r--arch/x86/include/uapi/asm/hwcap2.h6
-rw-r--r--arch/x86/include/uapi/asm/kvm.h13
-rw-r--r--arch/x86/include/uapi/asm/kvm_para.h13
-rw-r--r--arch/x86/include/uapi/asm/svm.h3
32 files changed, 337 insertions, 198 deletions
diff --git a/arch/x86/include/asm/Kbuild b/arch/x86/include/asm/Kbuild
index b19ec8282d50..1e51650b79d7 100644
--- a/arch/x86/include/asm/Kbuild
+++ b/arch/x86/include/asm/Kbuild
@@ -3,6 +3,7 @@
generated-y += syscalls_32.h
generated-y += syscalls_64.h
+generated-y += syscalls_x32.h
generated-y += unistd_32_ia32.h
generated-y += unistd_64_x32.h
generated-y += xen-hypercalls.h
diff --git a/arch/x86/include/asm/asm.h b/arch/x86/include/asm/asm.h
index 0603c7423aca..3ad3da9a7d97 100644
--- a/arch/x86/include/asm/asm.h
+++ b/arch/x86/include/asm/asm.h
@@ -3,25 +3,26 @@
#define _ASM_X86_ASM_H
#ifdef __ASSEMBLY__
-# define __ASM_FORM(x) x
-# define __ASM_FORM_RAW(x) x
-# define __ASM_FORM_COMMA(x) x,
+# define __ASM_FORM(x, ...) x,## __VA_ARGS__
+# define __ASM_FORM_RAW(x, ...) x,## __VA_ARGS__
+# define __ASM_FORM_COMMA(x, ...) x,## __VA_ARGS__,
#else
#include <linux/stringify.h>
-
-# define __ASM_FORM(x) " " __stringify(x) " "
-# define __ASM_FORM_RAW(x) __stringify(x)
-# define __ASM_FORM_COMMA(x) " " __stringify(x) ","
+# define __ASM_FORM(x, ...) " " __stringify(x,##__VA_ARGS__) " "
+# define __ASM_FORM_RAW(x, ...) __stringify(x,##__VA_ARGS__)
+# define __ASM_FORM_COMMA(x, ...) " " __stringify(x,##__VA_ARGS__) ","
#endif
+#define _ASM_BYTES(x, ...) __ASM_FORM(.byte x,##__VA_ARGS__ ;)
+
#ifndef __x86_64__
/* 32 bit */
-# define __ASM_SEL(a,b) __ASM_FORM(a)
-# define __ASM_SEL_RAW(a,b) __ASM_FORM_RAW(a)
+# define __ASM_SEL(a,b) __ASM_FORM(a)
+# define __ASM_SEL_RAW(a,b) __ASM_FORM_RAW(a)
#else
/* 64 bit */
-# define __ASM_SEL(a,b) __ASM_FORM(b)
-# define __ASM_SEL_RAW(a,b) __ASM_FORM_RAW(b)
+# define __ASM_SEL(a,b) __ASM_FORM(b)
+# define __ASM_SEL_RAW(a,b) __ASM_FORM_RAW(b)
#endif
#define __ASM_SIZE(inst, ...) __ASM_SEL(inst##l##__VA_ARGS__, \
@@ -119,6 +120,8 @@
# define CC_OUT(c) [_cc_ ## c] "=qm"
#endif
+#ifdef __KERNEL__
+
/* Exception table entry */
#ifdef __ASSEMBLY__
# define _ASM_EXTABLE_HANDLE(from, to, handler) \
@@ -185,4 +188,6 @@ register unsigned long current_stack_pointer asm(_ASM_SP);
#define ASM_CALL_CONSTRAINT "+r" (current_stack_pointer)
#endif /* __ASSEMBLY__ */
+#endif /* __KERNEL__ */
+
#endif /* _ASM_X86_ASM_H */
diff --git a/arch/x86/include/asm/atomic.h b/arch/x86/include/asm/atomic.h
index f732741ad7c7..5e754e895767 100644
--- a/arch/x86/include/asm/atomic.h
+++ b/arch/x86/include/asm/atomic.h
@@ -269,6 +269,4 @@ static __always_inline int arch_atomic_fetch_xor(int i, atomic_t *v)
# include <asm/atomic64_64.h>
#endif
-#define ARCH_ATOMIC
-
#endif /* _ASM_X86_ATOMIC_H */
diff --git a/arch/x86/include/asm/barrier.h b/arch/x86/include/asm/barrier.h
index 4819d5e5a335..3ba772a69cc8 100644
--- a/arch/x86/include/asm/barrier.h
+++ b/arch/x86/include/asm/barrier.h
@@ -54,11 +54,8 @@ static inline unsigned long array_index_mask_nospec(unsigned long index,
#define dma_rmb() barrier()
#define dma_wmb() barrier()
-#ifdef CONFIG_X86_32
-#define __smp_mb() asm volatile("lock; addl $0,-4(%%esp)" ::: "memory", "cc")
-#else
-#define __smp_mb() asm volatile("lock; addl $0,-4(%%rsp)" ::: "memory", "cc")
-#endif
+#define __smp_mb() asm volatile("lock; addl $0,-4(%%" _ASM_SP ")" ::: "memory", "cc")
+
#define __smp_rmb() dma_rmb()
#define __smp_wmb() barrier()
#define __smp_store_mb(var, value) do { (void)xchg(&var, value); } while (0)
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index ac37830ae941..d0ce5cfd3ac1 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -108,7 +108,7 @@
#define X86_FEATURE_EXTD_APICID ( 3*32+26) /* Extended APICID (8 bits) */
#define X86_FEATURE_AMD_DCM ( 3*32+27) /* AMD multi-node processor */
#define X86_FEATURE_APERFMPERF ( 3*32+28) /* P-State hardware coordination feedback capability (APERF/MPERF MSRs) */
-/* free ( 3*32+29) */
+#define X86_FEATURE_RAPL ( 3*32+29) /* AMD/Hygon RAPL interface */
#define X86_FEATURE_NONSTOP_TSC_S3 ( 3*32+30) /* TSC doesn't stop in S3 state */
#define X86_FEATURE_TSC_KNOWN_FREQ ( 3*32+31) /* TSC has known frequency */
@@ -378,6 +378,7 @@
#define X86_FEATURE_AVX512_VP2INTERSECT (18*32+ 8) /* AVX-512 Intersect for D/Q */
#define X86_FEATURE_SRBDS_CTRL (18*32+ 9) /* "" SRBDS mitigation MSR available */
#define X86_FEATURE_MD_CLEAR (18*32+10) /* VERW clears CPU buffers */
+#define X86_FEATURE_RTM_ALWAYS_ABORT (18*32+11) /* "" RTM transaction always aborts */
#define X86_FEATURE_TSX_FORCE_ABORT (18*32+13) /* "" TSX_FORCE_ABORT */
#define X86_FEATURE_SERIALIZE (18*32+14) /* SERIALIZE instruction */
#define X86_FEATURE_HYBRID_CPU (18*32+15) /* "" This part has CPUs of more than one type */
diff --git a/arch/x86/include/asm/crash.h b/arch/x86/include/asm/crash.h
index f58de66091e5..8b6bd63530dc 100644
--- a/arch/x86/include/asm/crash.h
+++ b/arch/x86/include/asm/crash.h
@@ -9,10 +9,4 @@ int crash_setup_memmap_entries(struct kimage *image,
struct boot_params *params);
void crash_smp_send_stop(void);
-#ifdef CONFIG_KEXEC_CORE
-void __init crash_reserve_low_1M(void);
-#else
-static inline void __init crash_reserve_low_1M(void) { }
-#endif
-
#endif /* _ASM_X86_CRASH_H */
diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h
index ceb12683b6d1..ab97b22ac04a 100644
--- a/arch/x86/include/asm/desc.h
+++ b/arch/x86/include/asm/desc.h
@@ -225,6 +225,26 @@ static inline void store_idt(struct desc_ptr *dtr)
asm volatile("sidt %0":"=m" (*dtr));
}
+static inline void native_gdt_invalidate(void)
+{
+ const struct desc_ptr invalid_gdt = {
+ .address = 0,
+ .size = 0
+ };
+
+ native_load_gdt(&invalid_gdt);
+}
+
+static inline void native_idt_invalidate(void)
+{
+ const struct desc_ptr invalid_idt = {
+ .address = 0,
+ .size = 0
+ };
+
+ native_load_idt(&invalid_idt);
+}
+
/*
* The LTR instruction marks the TSS GDT entry as busy. On 64-bit, the GDT is
* a read-only remapping. To prevent a page fault, the GDT is switched to the
@@ -422,12 +442,10 @@ extern bool idt_is_f00f_address(unsigned long address);
#ifdef CONFIG_X86_64
extern void idt_setup_early_pf(void);
-extern void idt_setup_ist_traps(void);
#else
static inline void idt_setup_early_pf(void) { }
-static inline void idt_setup_ist_traps(void) { }
#endif
-extern void idt_invalidate(void *addr);
+extern void idt_invalidate(void);
#endif /* _ASM_X86_DESC_H */
diff --git a/arch/x86/include/asm/hyperv-tlfs.h b/arch/x86/include/asm/hyperv-tlfs.h
index 606f5cc579b2..f1366ce609e3 100644
--- a/arch/x86/include/asm/hyperv-tlfs.h
+++ b/arch/x86/include/asm/hyperv-tlfs.h
@@ -52,7 +52,7 @@
* Support for passing hypercall input parameter block via XMM
* registers is available
*/
-#define HV_X64_HYPERCALL_PARAMS_XMM_AVAILABLE BIT(4)
+#define HV_X64_HYPERCALL_XMM_INPUT_AVAILABLE BIT(4)
/* Support for a virtual guest idle state is available */
#define HV_X64_GUEST_IDLE_STATE_AVAILABLE BIT(5)
/* Frequency MSRs available */
@@ -61,6 +61,11 @@
#define HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE BIT(10)
/* Support for debug MSRs available */
#define HV_FEATURE_DEBUG_MSRS_AVAILABLE BIT(11)
+/*
+ * Support for returning hypercall output block via XMM
+ * registers is available
+ */
+#define HV_X64_HYPERCALL_XMM_OUTPUT_AVAILABLE BIT(15)
/* stimer Direct Mode is available */
#define HV_STIMER_DIRECT_MODE_AVAILABLE BIT(19)
@@ -133,6 +138,15 @@
#define HV_X64_NESTED_GUEST_MAPPING_FLUSH BIT(18)
#define HV_X64_NESTED_MSR_BITMAP BIT(19)
+/*
+ * This is specific to AMD and specifies that enlightened TLB flush is
+ * supported. If guest opts in to this feature, ASID invalidations only
+ * flushes gva -> hpa mapping entries. To flush the TLB entries derived
+ * from NPT, hypercalls should be used (HvFlushGuestPhysicalAddressSpace
+ * or HvFlushGuestPhysicalAddressList).
+ */
+#define HV_X64_NESTED_ENLIGHTENED_TLB BIT(22)
+
/* HYPERV_CPUID_ISOLATION_CONFIG.EAX bits. */
#define HV_PARAVISOR_PRESENT BIT(0)
@@ -314,6 +328,9 @@ struct hv_tsc_emulation_status {
#define HV_X64_MSR_TSC_REFERENCE_ENABLE 0x00000001
#define HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT 12
+/* Number of XMM registers used in hypercall input/output */
+#define HV_HYPERCALL_MAX_XMM_REGISTERS 6
+
struct hv_nested_enlightenments_control {
struct {
__u32 directhypercall:1;
diff --git a/arch/x86/include/asm/idtentry.h b/arch/x86/include/asm/idtentry.h
index 73d45b0dfff2..1345088e9902 100644
--- a/arch/x86/include/asm/idtentry.h
+++ b/arch/x86/include/asm/idtentry.h
@@ -312,8 +312,8 @@ static __always_inline void __##func(struct pt_regs *regs)
*/
#define DECLARE_IDTENTRY_VC(vector, func) \
DECLARE_IDTENTRY_RAW_ERRORCODE(vector, func); \
- __visible noinstr void ist_##func(struct pt_regs *regs, unsigned long error_code); \
- __visible noinstr void safe_stack_##func(struct pt_regs *regs, unsigned long error_code)
+ __visible noinstr void kernel_##func(struct pt_regs *regs, unsigned long error_code); \
+ __visible noinstr void user_##func(struct pt_regs *regs, unsigned long error_code)
/**
* DEFINE_IDTENTRY_IST - Emit code for IST entry points
@@ -355,33 +355,24 @@ static __always_inline void __##func(struct pt_regs *regs)
DEFINE_IDTENTRY_RAW_ERRORCODE(func)
/**
- * DEFINE_IDTENTRY_VC_SAFE_STACK - Emit code for VMM communication handler
- which runs on a safe stack.
+ * DEFINE_IDTENTRY_VC_KERNEL - Emit code for VMM communication handler
+ when raised from kernel mode
* @func: Function name of the entry point
*
* Maps to DEFINE_IDTENTRY_RAW_ERRORCODE
*/
-#define DEFINE_IDTENTRY_VC_SAFE_STACK(func) \
- DEFINE_IDTENTRY_RAW_ERRORCODE(safe_stack_##func)
+#define DEFINE_IDTENTRY_VC_KERNEL(func) \
+ DEFINE_IDTENTRY_RAW_ERRORCODE(kernel_##func)
/**
- * DEFINE_IDTENTRY_VC_IST - Emit code for VMM communication handler
- which runs on the VC fall-back stack
+ * DEFINE_IDTENTRY_VC_USER - Emit code for VMM communication handler
+ when raised from user mode
* @func: Function name of the entry point
*
* Maps to DEFINE_IDTENTRY_RAW_ERRORCODE
*/
-#define DEFINE_IDTENTRY_VC_IST(func) \
- DEFINE_IDTENTRY_RAW_ERRORCODE(ist_##func)
-
-/**
- * DEFINE_IDTENTRY_VC - Emit code for VMM communication handler
- * @func: Function name of the entry point
- *
- * Maps to DEFINE_IDTENTRY_RAW_ERRORCODE
- */
-#define DEFINE_IDTENTRY_VC(func) \
- DEFINE_IDTENTRY_RAW_ERRORCODE(func)
+#define DEFINE_IDTENTRY_VC_USER(func) \
+ DEFINE_IDTENTRY_RAW_ERRORCODE(user_##func)
#else /* CONFIG_X86_64 */
@@ -504,7 +495,7 @@ __visible noinstr void func(struct pt_regs *regs, \
.align 8
SYM_CODE_START(irq_entries_start)
vector=FIRST_EXTERNAL_VECTOR
- .rept (FIRST_SYSTEM_VECTOR - FIRST_EXTERNAL_VECTOR)
+ .rept NR_EXTERNAL_VECTORS
UNWIND_HINT_IRET_REGS
0 :
.byte 0x6a, vector
@@ -520,7 +511,7 @@ SYM_CODE_END(irq_entries_start)
.align 8
SYM_CODE_START(spurious_entries_start)
vector=FIRST_SYSTEM_VECTOR
- .rept (NR_VECTORS - FIRST_SYSTEM_VECTOR)
+ .rept NR_SYSTEM_VECTORS
UNWIND_HINT_IRET_REGS
0 :
.byte 0x6a, vector
diff --git a/arch/x86/include/asm/intel-family.h b/arch/x86/include/asm/intel-family.h
index 955b06d6325a..27158436f322 100644
--- a/arch/x86/include/asm/intel-family.h
+++ b/arch/x86/include/asm/intel-family.h
@@ -102,7 +102,8 @@
#define INTEL_FAM6_TIGERLAKE_L 0x8C /* Willow Cove */
#define INTEL_FAM6_TIGERLAKE 0x8D /* Willow Cove */
-#define INTEL_FAM6_SAPPHIRERAPIDS_X 0x8F /* Willow Cove */
+
+#define INTEL_FAM6_SAPPHIRERAPIDS_X 0x8F /* Golden Cove */
#define INTEL_FAM6_ALDERLAKE 0x97 /* Golden Cove / Gracemont */
#define INTEL_FAM6_ALDERLAKE_L 0x9A /* Golden Cove / Gracemont */
diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h
index 889f8b1b5b7f..43dcb9284208 100644
--- a/arch/x86/include/asm/irq_vectors.h
+++ b/arch/x86/include/asm/irq_vectors.h
@@ -26,8 +26,8 @@
* This file enumerates the exact layout of them:
*/
+/* This is used as an interrupt vector when programming the APIC. */
#define NMI_VECTOR 0x02
-#define MCE_VECTOR 0x12
/*
* IDT vectors usable for external interrupt sources start at 0x20.
@@ -84,7 +84,7 @@
*/
#define IRQ_WORK_VECTOR 0xf6
-#define UV_BAU_MESSAGE 0xf5
+/* 0xf5 - unused, was UV_BAU_MESSAGE */
#define DEFERRED_ERROR_VECTOR 0xf4
/* Vector on which hypervisor callbacks will be delivered */
@@ -114,6 +114,9 @@
#define FIRST_SYSTEM_VECTOR NR_VECTORS
#endif
+#define NR_EXTERNAL_VECTORS (FIRST_SYSTEM_VECTOR - FIRST_EXTERNAL_VECTOR)
+#define NR_SYSTEM_VECTORS (NR_VECTORS - FIRST_SYSTEM_VECTOR)
+
/*
* Size the maximum number of interrupts.
*
diff --git a/arch/x86/include/asm/jump_label.h b/arch/x86/include/asm/jump_label.h
index 610a05374c02..0449b125d27f 100644
--- a/arch/x86/include/asm/jump_label.h
+++ b/arch/x86/include/asm/jump_label.h
@@ -4,8 +4,6 @@
#define HAVE_JUMP_LABEL_BATCH
-#define JUMP_LABEL_NOP_SIZE 5
-
#include <asm/asm.h>
#include <asm/nops.h>
@@ -14,15 +12,35 @@
#include <linux/stringify.h>
#include <linux/types.h>
+#define JUMP_TABLE_ENTRY \
+ ".pushsection __jump_table, \"aw\" \n\t" \
+ _ASM_ALIGN "\n\t" \
+ ".long 1b - . \n\t" \
+ ".long %l[l_yes] - . \n\t" \
+ _ASM_PTR "%c0 + %c1 - .\n\t" \
+ ".popsection \n\t"
+
+#ifdef CONFIG_STACK_VALIDATION
+
+static __always_inline bool arch_static_branch(struct static_key *key, bool branch)
+{
+ asm_volatile_goto("1:"
+ "jmp %l[l_yes] # objtool NOPs this \n\t"
+ JUMP_TABLE_ENTRY
+ : : "i" (key), "i" (2 | branch) : : l_yes);
+
+ return false;
+l_yes:
+ return true;
+}
+
+#else
+
static __always_inline bool arch_static_branch(struct static_key * const key, const bool branch)
{
asm_volatile_goto("1:"
".byte " __stringify(BYTES_NOP5) "\n\t"
- ".pushsection __jump_table, \"aw\" \n\t"
- _ASM_ALIGN "\n\t"
- ".long 1b - ., %l[l_yes] - . \n\t"
- _ASM_PTR "%c0 + %c1 - .\n\t"
- ".popsection \n\t"
+ JUMP_TABLE_ENTRY
: : "i" (key), "i" (branch) : : l_yes);
return false;
@@ -30,16 +48,13 @@ l_yes:
return true;
}
+#endif /* STACK_VALIDATION */
+
static __always_inline bool arch_static_branch_jump(struct static_key * const key, const bool branch)
{
asm_volatile_goto("1:"
- ".byte 0xe9\n\t .long %l[l_yes] - 2f\n\t"
- "2:\n\t"
- ".pushsection __jump_table, \"aw\" \n\t"
- _ASM_ALIGN "\n\t"
- ".long 1b - ., %l[l_yes] - . \n\t"
- _ASM_PTR "%c0 + %c1 - .\n\t"
- ".popsection \n\t"
+ "jmp %l[l_yes]\n\t"
+ JUMP_TABLE_ENTRY
: : "i" (key), "i" (branch) : : l_yes);
return false;
@@ -47,41 +62,7 @@ l_yes:
return true;
}
-#else /* __ASSEMBLY__ */
-
-.macro STATIC_JUMP_IF_TRUE target, key, def
-.Lstatic_jump_\@:
- .if \def
- /* Equivalent to "jmp.d32 \target" */
- .byte 0xe9
- .long \target - .Lstatic_jump_after_\@
-.Lstatic_jump_after_\@:
- .else
- .byte BYTES_NOP5
- .endif
- .pushsection __jump_table, "aw"
- _ASM_ALIGN
- .long .Lstatic_jump_\@ - ., \target - .
- _ASM_PTR \key - .
- .popsection
-.endm
-
-.macro STATIC_JUMP_IF_FALSE target, key, def
-.Lstatic_jump_\@:
- .if \def
- .byte BYTES_NOP5
- .else
- /* Equivalent to "jmp.d32 \target" */
- .byte 0xe9
- .long \target - .Lstatic_jump_after_\@
-.Lstatic_jump_after_\@:
- .endif
- .pushsection __jump_table, "aw"
- _ASM_ALIGN
- .long .Lstatic_jump_\@ - ., \target - .
- _ASM_PTR \key + 1 - .
- .popsection
-.endm
+extern int arch_jump_entry_size(struct jump_entry *entry);
#endif /* __ASSEMBLY__ */
diff --git a/arch/x86/include/asm/kvm-x86-ops.h b/arch/x86/include/asm/kvm-x86-ops.h
index e7bef91cee04..a12a4987154e 100644
--- a/arch/x86/include/asm/kvm-x86-ops.h
+++ b/arch/x86/include/asm/kvm-x86-ops.h
@@ -87,7 +87,10 @@ KVM_X86_OP(set_identity_map_addr)
KVM_X86_OP(get_mt_mask)
KVM_X86_OP(load_mmu_pgd)
KVM_X86_OP_NULL(has_wbinvd_exit)
-KVM_X86_OP(write_l1_tsc_offset)
+KVM_X86_OP(get_l2_tsc_offset)
+KVM_X86_OP(get_l2_tsc_multiplier)
+KVM_X86_OP(write_tsc_offset)
+KVM_X86_OP(write_tsc_multiplier)
KVM_X86_OP(get_exit_info)
KVM_X86_OP(check_intercept)
KVM_X86_OP(handle_exit_irqoff)
@@ -106,8 +109,8 @@ KVM_X86_OP_NULL(set_hv_timer)
KVM_X86_OP_NULL(cancel_hv_timer)
KVM_X86_OP(setup_mce)
KVM_X86_OP(smi_allowed)
-KVM_X86_OP(pre_enter_smm)
-KVM_X86_OP(pre_leave_smm)
+KVM_X86_OP(enter_smm)
+KVM_X86_OP(leave_smm)
KVM_X86_OP(enable_smi_window)
KVM_X86_OP_NULL(mem_enc_op)
KVM_X86_OP_NULL(mem_enc_reg_region)
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 9c7ced0e3171..974cbfb1eefe 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -85,7 +85,7 @@
#define KVM_REQ_APICV_UPDATE \
KVM_ARCH_REQ_FLAGS(25, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
#define KVM_REQ_TLB_FLUSH_CURRENT KVM_ARCH_REQ(26)
-#define KVM_REQ_HV_TLB_FLUSH \
+#define KVM_REQ_TLB_FLUSH_GUEST \
KVM_ARCH_REQ_FLAGS(27, KVM_REQUEST_NO_WAKEUP)
#define KVM_REQ_APF_READY KVM_ARCH_REQ(28)
#define KVM_REQ_MSR_FILTER_CHANGED KVM_ARCH_REQ(29)
@@ -269,12 +269,36 @@ enum x86_intercept_stage;
struct kvm_kernel_irq_routing_entry;
/*
- * the pages used as guest page table on soft mmu are tracked by
- * kvm_memory_slot.arch.gfn_track which is 16 bits, so the role bits used
- * by indirect shadow page can not be more than 15 bits.
+ * kvm_mmu_page_role tracks the properties of a shadow page (where shadow page
+ * also includes TDP pages) to determine whether or not a page can be used in
+ * the given MMU context. This is a subset of the overall kvm_mmu_role to
+ * minimize the size of kvm_memory_slot.arch.gfn_track, i.e. allows allocating
+ * 2 bytes per gfn instead of 4 bytes per gfn.
*
- * Currently, we used 14 bits that are @level, @gpte_is_8_bytes, @quadrant, @access,
- * @nxe, @cr0_wp, @smep_andnot_wp and @smap_andnot_wp.
+ * Indirect upper-level shadow pages are tracked for write-protection via
+ * gfn_track. As above, gfn_track is a 16 bit counter, so KVM must not create
+ * more than 2^16-1 upper-level shadow pages at a single gfn, otherwise
+ * gfn_track will overflow and explosions will ensure.
+ *
+ * A unique shadow page (SP) for a gfn is created if and only if an existing SP
+ * cannot be reused. The ability to reuse a SP is tracked by its role, which
+ * incorporates various mode bits and properties of the SP. Roughly speaking,
+ * the number of unique SPs that can theoretically be created is 2^n, where n
+ * is the number of bits that are used to compute the role.
+ *
+ * But, even though there are 18 bits in the mask below, not all combinations
+ * of modes and flags are possible. The maximum number of possible upper-level
+ * shadow pages for a single gfn is in the neighborhood of 2^13.
+ *
+ * - invalid shadow pages are not accounted.
+ * - level is effectively limited to four combinations, not 16 as the number
+ * bits would imply, as 4k SPs are not tracked (allowed to go unsync).
+ * - level is effectively unused for non-PAE paging because there is exactly
+ * one upper level (see 4k SP exception above).
+ * - quadrant is used only for non-PAE paging and is exclusive with
+ * gpte_is_8_bytes.
+ * - execonly and ad_disabled are used only for nested EPT, which makes it
+ * exclusive with quadrant.
*/
union kvm_mmu_page_role {
u32 word;
@@ -285,7 +309,7 @@ union kvm_mmu_page_role {
unsigned direct:1;
unsigned access:3;
unsigned invalid:1;
- unsigned nxe:1;
+ unsigned efer_nx:1;
unsigned cr0_wp:1;
unsigned smep_andnot_wp:1;
unsigned smap_andnot_wp:1;
@@ -303,13 +327,26 @@ union kvm_mmu_page_role {
};
};
-union kvm_mmu_extended_role {
/*
- * This structure complements kvm_mmu_page_role caching everything needed for
- * MMU configuration. If nothing in both these structures changed, MMU
- * re-configuration can be skipped. @valid bit is set on first usage so we don't
- * treat all-zero structure as valid data.
+ * kvm_mmu_extended_role complements kvm_mmu_page_role, tracking properties
+ * relevant to the current MMU configuration. When loading CR0, CR4, or EFER,
+ * including on nested transitions, if nothing in the full role changes then
+ * MMU re-configuration can be skipped. @valid bit is set on first usage so we
+ * don't treat all-zero structure as valid data.
+ *
+ * The properties that are tracked in the extended role but not the page role
+ * are for things that either (a) do not affect the validity of the shadow page
+ * or (b) are indirectly reflected in the shadow page's role. For example,
+ * CR4.PKE only affects permission checks for software walks of the guest page
+ * tables (because KVM doesn't support Protection Keys with shadow paging), and
+ * CR0.PG, CR4.PAE, and CR4.PSE are indirectly reflected in role.level.
+ *
+ * Note, SMEP and SMAP are not redundant with sm*p_andnot_wp in the page role.
+ * If CR0.WP=1, KVM can reuse shadow pages for the guest regardless of SMEP and
+ * SMAP, but the MMU's permission checks for software walks need to be SMEP and
+ * SMAP aware regardless of CR0.WP.
*/
+union kvm_mmu_extended_role {
u32 word;
struct {
unsigned int valid:1;
@@ -320,7 +357,7 @@ union kvm_mmu_extended_role {
unsigned int cr4_pke:1;
unsigned int cr4_smap:1;
unsigned int cr4_smep:1;
- unsigned int maxphyaddr:6;
+ unsigned int cr4_la57:1;
};
};
@@ -420,11 +457,6 @@ struct kvm_mmu {
struct rsvd_bits_validate guest_rsvd_check;
- /* Can have large pages at levels 2..last_nonleaf_level-1. */
- u8 last_nonleaf_level;
-
- bool nx;
-
u64 pdptrs[4]; /* pae */
};
@@ -543,6 +575,15 @@ struct kvm_vcpu_hv {
struct kvm_vcpu_hv_stimer stimer[HV_SYNIC_STIMER_COUNT];
DECLARE_BITMAP(stimer_pending_bitmap, HV_SYNIC_STIMER_COUNT);
cpumask_t tlb_flush;
+ bool enforce_cpuid;
+ struct {
+ u32 features_eax; /* HYPERV_CPUID_FEATURES.EAX */
+ u32 features_ebx; /* HYPERV_CPUID_FEATURES.EBX */
+ u32 features_edx; /* HYPERV_CPUID_FEATURES.EDX */
+ u32 enlightenments_eax; /* HYPERV_CPUID_ENLIGHTMENT_INFO.EAX */
+ u32 enlightenments_ebx; /* HYPERV_CPUID_ENLIGHTMENT_INFO.EBX */
+ u32 syndbg_cap_eax; /* HYPERV_CPUID_SYNDBG_PLATFORM_CAPABILITIES.EAX */
+ } cpuid_cache;
};
/* Xen HVM per vcpu emulation context */
@@ -707,7 +748,7 @@ struct kvm_vcpu_arch {
} st;
u64 l1_tsc_offset;
- u64 tsc_offset;
+ u64 tsc_offset; /* current tsc offset */
u64 last_guest_tsc;
u64 last_host_tsc;
u64 tsc_offset_adjustment;
@@ -721,7 +762,8 @@ struct kvm_vcpu_arch {
u32 virtual_tsc_khz;
s64 ia32_tsc_adjust_msr;
u64 msr_ia32_power_ctl;
- u64 tsc_scaling_ratio;
+ u64 l1_tsc_scaling_ratio;
+ u64 tsc_scaling_ratio; /* current scaling ratio */
atomic_t nmi_queued; /* unprocessed asynchronous NMIs */
unsigned nmi_pending; /* NMI queued after currently running handler */
@@ -829,7 +871,7 @@ struct kvm_vcpu_arch {
bool l1tf_flush_l1d;
/* Host CPU on which VM-entry was most recently attempted */
- unsigned int last_vmentry_cpu;
+ int last_vmentry_cpu;
/* AMD MSRC001_0015 Hardware Configuration */
u64 msr_hwcr;
@@ -851,6 +893,16 @@ struct kvm_vcpu_arch {
/* Protected Guests */
bool guest_state_protected;
+
+ /*
+ * Set when PDPTS were loaded directly by the userspace without
+ * reading the guest memory
+ */
+ bool pdptrs_from_userspace;
+
+#if IS_ENABLED(CONFIG_HYPERV)
+ hpa_t hv_root_tdp;
+#endif
};
struct kvm_lpage_info {
@@ -1002,7 +1054,7 @@ struct kvm_arch {
struct kvm_apic_map __rcu *apic_map;
atomic_t apic_map_dirty;
- bool apic_access_page_done;
+ bool apic_access_memslot_enabled;
unsigned long apicv_inhibit_reasons;
gpa_t wall_clock;
@@ -1062,11 +1114,19 @@ struct kvm_arch {
bool exception_payload_enabled;
bool bus_lock_detection_enabled;
+ /*
+ * If exit_on_emulation_error is set, and the in-kernel instruction
+ * emulator fails to emulate an instruction, allow userspace
+ * the opportunity to look at it.
+ */
+ bool exit_on_emulation_error;
/* Deflect RDMSR and WRMSR to user space when they trigger a #GP */
u32 user_space_msr_mask;
struct kvm_x86_msr_filter __rcu *msr_filter;
+ u32 hypercall_exit_enabled;
+
/* Guest can access the SGX PROVISIONKEY. */
bool sgx_provisioning_allowed;
@@ -1124,23 +1184,35 @@ struct kvm_arch {
*/
spinlock_t tdp_mmu_pages_lock;
#endif /* CONFIG_X86_64 */
+
+ /*
+ * If set, rmaps have been allocated for all memslots and should be
+ * allocated for any newly created or modified memslots.
+ */
+ bool memslots_have_rmaps;
+
+#if IS_ENABLED(CONFIG_HYPERV)
+ hpa_t hv_root_tdp;
+ spinlock_t hv_root_tdp_lock;
+#endif
};
struct kvm_vm_stat {
- ulong mmu_shadow_zapped;
- ulong mmu_pte_write;
- ulong mmu_pde_zapped;
- ulong mmu_flooded;
- ulong mmu_recycled;
- ulong mmu_cache_miss;
- ulong mmu_unsync;
- ulong remote_tlb_flush;
- ulong lpages;
- ulong nx_lpage_splits;
- ulong max_mmu_page_hash_collisions;
+ struct kvm_vm_stat_generic generic;
+ u64 mmu_shadow_zapped;
+ u64 mmu_pte_write;
+ u64 mmu_pde_zapped;
+ u64 mmu_flooded;
+ u64 mmu_recycled;
+ u64 mmu_cache_miss;
+ u64 mmu_unsync;
+ u64 lpages;
+ u64 nx_lpage_splits;
+ u64 max_mmu_page_hash_collisions;
};
struct kvm_vcpu_stat {
+ struct kvm_vcpu_stat_generic generic;
u64 pf_fixed;
u64 pf_guest;
u64 tlb_flush;
@@ -1154,10 +1226,6 @@ struct kvm_vcpu_stat {
u64 nmi_window_exits;
u64 l1d_flush;
u64 halt_exits;
- u64 halt_successful_poll;
- u64 halt_attempted_poll;
- u64 halt_poll_invalid;
- u64 halt_wakeup;
u64 request_irq_exits;
u64 irq_exits;
u64 host_state_reload;
@@ -1168,11 +1236,10 @@ struct kvm_vcpu_stat {
u64 irq_injections;
u64 nmi_injections;
u64 req_event;
- u64 halt_poll_success_ns;
- u64 halt_poll_fail_ns;
u64 nested_run;
u64 directed_yield_attempted;
u64 directed_yield_successful;
+ u64 guest_mode;
};
struct x86_instruction_info;
@@ -1304,8 +1371,10 @@ struct kvm_x86_ops {
bool (*has_wbinvd_exit)(void);
- /* Returns actual tsc_offset set in active VMCS */
- u64 (*write_l1_tsc_offset)(struct kvm_vcpu *vcpu, u64 offset);
+ u64 (*get_l2_tsc_offset)(struct kvm_vcpu *vcpu);
+ u64 (*get_l2_tsc_multiplier)(struct kvm_vcpu *vcpu);
+ void (*write_tsc_offset)(struct kvm_vcpu *vcpu, u64 offset);
+ void (*write_tsc_multiplier)(struct kvm_vcpu *vcpu, u64 multiplier);
/*
* Retrieve somewhat arbitrary exit information. Intended to be used
@@ -1363,8 +1432,8 @@ struct kvm_x86_ops {
void (*setup_mce)(struct kvm_vcpu *vcpu);
int (*smi_allowed)(struct kvm_vcpu *vcpu, bool for_injection);
- int (*pre_enter_smm)(struct kvm_vcpu *vcpu, char *smstate);
- int (*pre_leave_smm)(struct kvm_vcpu *vcpu, const char *smstate);
+ int (*enter_smm)(struct kvm_vcpu *vcpu, char *smstate);
+ int (*leave_smm)(struct kvm_vcpu *vcpu, const char *smstate);
void (*enable_smi_window)(struct kvm_vcpu *vcpu);
int (*mem_enc_op)(struct kvm *kvm, void __user *argp);
@@ -1423,6 +1492,7 @@ struct kvm_arch_async_pf {
extern u32 __read_mostly kvm_nr_uret_msrs;
extern u64 __read_mostly host_efer;
extern bool __read_mostly allow_smaller_maxphyaddr;
+extern bool __read_mostly enable_apicv;
extern struct kvm_x86_ops kvm_x86_ops;
#define KVM_X86_OP(func) \
@@ -1463,6 +1533,7 @@ int kvm_mmu_create(struct kvm_vcpu *vcpu);
void kvm_mmu_init_vm(struct kvm *kvm);
void kvm_mmu_uninit_vm(struct kvm *kvm);
+void kvm_mmu_after_set_cpuid(struct kvm_vcpu *vcpu);
void kvm_mmu_reset_context(struct kvm_vcpu *vcpu);
void kvm_mmu_slot_remove_write_access(struct kvm *kvm,
struct kvm_memory_slot *memslot,
@@ -1477,7 +1548,6 @@ unsigned long kvm_mmu_calculate_default_mmu_pages(struct kvm *kvm);
void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned long kvm_nr_mmu_pages);
int load_pdptrs(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, unsigned long cr3);
-bool pdptrs_changed(struct kvm_vcpu *vcpu);
int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa,
const void *val, int bytes);
@@ -1650,6 +1720,7 @@ int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn);
void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu);
void kvm_mmu_free_roots(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
ulong roots_to_free);
+void kvm_mmu_free_guest_mode_roots(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu);
gpa_t translate_nested_gpa(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access,
struct x86_exception *exception);
gpa_t kvm_mmu_gva_to_gpa_read(struct kvm_vcpu *vcpu, gva_t gva,
@@ -1662,7 +1733,6 @@ gpa_t kvm_mmu_gva_to_gpa_system(struct kvm_vcpu *vcpu, gva_t gva,
struct x86_exception *exception);
bool kvm_apicv_activated(struct kvm *kvm);
-void kvm_apicv_init(struct kvm *kvm, bool enable);
void kvm_vcpu_update_apicv(struct kvm_vcpu *vcpu);
void kvm_request_apicv_update(struct kvm *kvm, bool activate,
unsigned long bit);
@@ -1675,8 +1745,7 @@ void kvm_mmu_invlpg(struct kvm_vcpu *vcpu, gva_t gva);
void kvm_mmu_invalidate_gva(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
gva_t gva, hpa_t root_hpa);
void kvm_mmu_invpcid_gva(struct kvm_vcpu *vcpu, gva_t gva, unsigned long pcid);
-void kvm_mmu_new_pgd(struct kvm_vcpu *vcpu, gpa_t new_pgd, bool skip_tlb_flush,
- bool skip_mmu_sync);
+void kvm_mmu_new_pgd(struct kvm_vcpu *vcpu, gpa_t new_pgd);
void kvm_configure_mmu(bool enable_tdp, int tdp_max_root_level,
int tdp_huge_page_level);
@@ -1788,8 +1857,10 @@ static inline bool kvm_is_supported_user_return_msr(u32 msr)
return kvm_find_user_return_msr(msr) >= 0;
}
-u64 kvm_scale_tsc(struct kvm_vcpu *vcpu, u64 tsc);
+u64 kvm_scale_tsc(struct kvm_vcpu *vcpu, u64 tsc, u64 ratio);
u64 kvm_read_l1_tsc(struct kvm_vcpu *vcpu, u64 host_tsc);
+u64 kvm_calc_nested_tsc_offset(u64 l1_offset, u64 l2_offset, u64 l2_multiplier);
+u64 kvm_calc_nested_tsc_multiplier(u64 l1_multiplier, u64 l2_multiplier);
unsigned long kvm_get_linear_rip(struct kvm_vcpu *vcpu);
bool kvm_is_linear_rip(struct kvm_vcpu *vcpu, unsigned long linear_rip);
@@ -1863,4 +1934,6 @@ static inline int kvm_cpu_get_apicid(int mps_cpu)
int kvm_cpu_dirty_log_size(void);
+int alloc_all_memslots_rmaps(struct kvm *kvm);
+
#endif /* _ASM_X86_KVM_HOST_H */
diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index ddfb3cad8dff..0607ec4f5091 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -305,7 +305,7 @@ extern void apei_mce_report_mem_error(int corrected,
/* These may be used by multiple smca_hwid_mcatypes */
enum smca_bank_types {
SMCA_LS = 0, /* Load Store */
- SMCA_LS_V2, /* Load Store */
+ SMCA_LS_V2,
SMCA_IF, /* Instruction Fetch */
SMCA_L2_CACHE, /* L2 Cache */
SMCA_DE, /* Decoder Unit */
@@ -314,17 +314,22 @@ enum smca_bank_types {
SMCA_FP, /* Floating Point */
SMCA_L3_CACHE, /* L3 Cache */
SMCA_CS, /* Coherent Slave */
- SMCA_CS_V2, /* Coherent Slave */
+ SMCA_CS_V2,
SMCA_PIE, /* Power, Interrupts, etc. */
SMCA_UMC, /* Unified Memory Controller */
+ SMCA_UMC_V2,
SMCA_PB, /* Parameter Block */
SMCA_PSP, /* Platform Security Processor */
- SMCA_PSP_V2, /* Platform Security Processor */
+ SMCA_PSP_V2,
SMCA_SMU, /* System Management Unit */
- SMCA_SMU_V2, /* System Management Unit */
+ SMCA_SMU_V2,
SMCA_MP5, /* Microprocessor 5 Unit */
SMCA_NBIO, /* Northbridge IO Unit */
SMCA_PCIE, /* PCI Express Unit */
+ SMCA_PCIE_V2,
+ SMCA_XGMI_PCS, /* xGMI PCS Unit */
+ SMCA_XGMI_PHY, /* xGMI PHY Unit */
+ SMCA_WAFL_PHY, /* WAFL PHY Unit */
N_SMCA_BANK_TYPES
};
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index 211ba3375ee9..a7c413432b33 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -772,6 +772,10 @@
#define MSR_TFA_RTM_FORCE_ABORT_BIT 0
#define MSR_TFA_RTM_FORCE_ABORT BIT_ULL(MSR_TFA_RTM_FORCE_ABORT_BIT)
+#define MSR_TFA_TSX_CPUID_CLEAR_BIT 1
+#define MSR_TFA_TSX_CPUID_CLEAR BIT_ULL(MSR_TFA_TSX_CPUID_CLEAR_BIT)
+#define MSR_TFA_SDV_ENABLE_RTM_BIT 2
+#define MSR_TFA_SDV_ENABLE_RTM BIT_ULL(MSR_TFA_SDV_ENABLE_RTM_BIT)
/* P4/Xeon+ specific */
#define MSR_IA32_MCG_EAX 0x00000180
diff --git a/arch/x86/include/asm/nops.h b/arch/x86/include/asm/nops.h
index c1e5e818ba16..c5573eaa5bb9 100644
--- a/arch/x86/include/asm/nops.h
+++ b/arch/x86/include/asm/nops.h
@@ -2,6 +2,8 @@
#ifndef _ASM_X86_NOPS_H
#define _ASM_X86_NOPS_H
+#include <asm/asm.h>
+
/*
* Define nops for use with alternative() and for tracing.
*/
@@ -57,20 +59,14 @@
#endif /* CONFIG_64BIT */
-#ifdef __ASSEMBLY__
-#define _ASM_MK_NOP(x) .byte x
-#else
-#define _ASM_MK_NOP(x) ".byte " __stringify(x) "\n"
-#endif
-
-#define ASM_NOP1 _ASM_MK_NOP(BYTES_NOP1)
-#define ASM_NOP2 _ASM_MK_NOP(BYTES_NOP2)
-#define ASM_NOP3 _ASM_MK_NOP(BYTES_NOP3)
-#define ASM_NOP4 _ASM_MK_NOP(BYTES_NOP4)
-#define ASM_NOP5 _ASM_MK_NOP(BYTES_NOP5)
-#define ASM_NOP6 _ASM_MK_NOP(BYTES_NOP6)
-#define ASM_NOP7 _ASM_MK_NOP(BYTES_NOP7)
-#define ASM_NOP8 _ASM_MK_NOP(BYTES_NOP8)
+#define ASM_NOP1 _ASM_BYTES(BYTES_NOP1)
+#define ASM_NOP2 _ASM_BYTES(BYTES_NOP2)
+#define ASM_NOP3 _ASM_BYTES(BYTES_NOP3)
+#define ASM_NOP4 _ASM_BYTES(BYTES_NOP4)
+#define ASM_NOP5 _ASM_BYTES(BYTES_NOP5)
+#define ASM_NOP6 _ASM_BYTES(BYTES_NOP6)
+#define ASM_NOP7 _ASM_BYTES(BYTES_NOP7)
+#define ASM_NOP8 _ASM_BYTES(BYTES_NOP8)
#define ASM_NOP_MAX 8
diff --git a/arch/x86/include/asm/page.h b/arch/x86/include/asm/page.h
index 7555b48803a8..4d5810c8fab7 100644
--- a/arch/x86/include/asm/page.h
+++ b/arch/x86/include/asm/page.h
@@ -34,9 +34,9 @@ static inline void copy_user_page(void *to, void *from, unsigned long vaddr,
copy_page(to, from);
}
-#define __alloc_zeroed_user_highpage(movableflags, vma, vaddr) \
- alloc_page_vma(GFP_HIGHUSER | __GFP_ZERO | movableflags, vma, vaddr)
-#define __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE
+#define alloc_zeroed_user_highpage_movable(vma, vaddr) \
+ alloc_page_vma(GFP_HIGHUSER_MOVABLE | __GFP_ZERO, vma, vaddr)
+#define __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE_MOVABLE
#ifndef __pa
#define __pa(x) __phys_addr((unsigned long)(x))
diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
index 544f41a179fb..8fc1b5003713 100644
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -478,6 +478,7 @@ struct x86_pmu_lbr {
extern void perf_get_x86_pmu_capability(struct x86_pmu_capability *cap);
extern void perf_check_microcode(void);
+extern void perf_clear_dirty_counters(void);
extern int x86_perf_rdpmc_index(struct perf_event *event);
#else
static inline void perf_get_x86_pmu_capability(struct x86_pmu_capability *cap)
diff --git a/arch/x86/include/asm/preempt.h b/arch/x86/include/asm/preempt.h
index f8cb8af4de5c..fe5efbcba824 100644
--- a/arch/x86/include/asm/preempt.h
+++ b/arch/x86/include/asm/preempt.h
@@ -44,7 +44,7 @@ static __always_inline void preempt_count_set(int pc)
#define init_task_preempt_count(p) do { } while (0)
#define init_idle_preempt_count(p, cpu) do { \
- per_cpu(__preempt_count, (cpu)) = PREEMPT_ENABLED; \
+ per_cpu(__preempt_count, (cpu)) = PREEMPT_DISABLED; \
} while (0)
/*
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 556b2b17c3e2..364d0e42e280 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -663,6 +663,7 @@ extern void load_direct_gdt(int);
extern void load_fixmap_gdt(int);
extern void load_percpu_segment(int);
extern void cpu_init(void);
+extern void cpu_init_secondary(void);
extern void cpu_init_exception_handling(void);
extern void cr4_init(void);
diff --git a/arch/x86/include/asm/sev-common.h b/arch/x86/include/asm/sev-common.h
index 629c3df243f0..2cef6c5a52c2 100644
--- a/arch/x86/include/asm/sev-common.h
+++ b/arch/x86/include/asm/sev-common.h
@@ -9,8 +9,13 @@
#define __ASM_X86_SEV_COMMON_H
#define GHCB_MSR_INFO_POS 0
-#define GHCB_MSR_INFO_MASK (BIT_ULL(12) - 1)
+#define GHCB_DATA_LOW 12
+#define GHCB_MSR_INFO_MASK (BIT_ULL(GHCB_DATA_LOW) - 1)
+#define GHCB_DATA(v) \
+ (((unsigned long)(v) & ~GHCB_MSR_INFO_MASK) >> GHCB_DATA_LOW)
+
+/* SEV Information Request/Response */
#define GHCB_MSR_SEV_INFO_RESP 0x001
#define GHCB_MSR_SEV_INFO_REQ 0x002
#define GHCB_MSR_VER_MAX_POS 48
@@ -28,6 +33,7 @@
#define GHCB_MSR_PROTO_MAX(v) (((v) >> GHCB_MSR_VER_MAX_POS) & GHCB_MSR_VER_MAX_MASK)
#define GHCB_MSR_PROTO_MIN(v) (((v) >> GHCB_MSR_VER_MIN_POS) & GHCB_MSR_VER_MIN_MASK)
+/* CPUID Request/Response */
#define GHCB_MSR_CPUID_REQ 0x004
#define GHCB_MSR_CPUID_RESP 0x005
#define GHCB_MSR_CPUID_FUNC_POS 32
@@ -45,6 +51,14 @@
(((unsigned long)reg & GHCB_MSR_CPUID_REG_MASK) << GHCB_MSR_CPUID_REG_POS) | \
(((unsigned long)fn) << GHCB_MSR_CPUID_FUNC_POS))
+/* AP Reset Hold */
+#define GHCB_MSR_AP_RESET_HOLD_REQ 0x006
+#define GHCB_MSR_AP_RESET_HOLD_RESP 0x007
+
+/* GHCB Hypervisor Feature Request/Response */
+#define GHCB_MSR_HV_FT_REQ 0x080
+#define GHCB_MSR_HV_FT_RESP 0x081
+
#define GHCB_MSR_TERM_REQ 0x100
#define GHCB_MSR_TERM_REASON_SET_POS 12
#define GHCB_MSR_TERM_REASON_SET_MASK 0xf
diff --git a/arch/x86/include/asm/sgx.h b/arch/x86/include/asm/sgx.h
index 9c31e0ebc55b..05f3e21f01a7 100644
--- a/arch/x86/include/asm/sgx.h
+++ b/arch/x86/include/asm/sgx.h
@@ -13,7 +13,7 @@
/*
* This file contains both data structures defined by SGX architecture and Linux
* defined software data structures and functions. The two should not be mixed
- * together for better readibility. The architectural definitions come first.
+ * together for better readability. The architectural definitions come first.
*/
/* The SGX specific CPUID function. */
diff --git a/arch/x86/include/asm/stackprotector.h b/arch/x86/include/asm/stackprotector.h
index b6ffe58c70fa..24a8d6c4fb18 100644
--- a/arch/x86/include/asm/stackprotector.h
+++ b/arch/x86/include/asm/stackprotector.h
@@ -11,7 +11,7 @@
* The same segment is shared by percpu area and stack canary. On
* x86_64, percpu symbols are zero based and %gs (64-bit) points to the
* base of percpu area. The first occupant of the percpu area is always
- * fixed_percpu_data which contains stack_canary at the approproate
+ * fixed_percpu_data which contains stack_canary at the appropriate
* offset. On x86_32, the stack canary is just a regular percpu
* variable.
*
diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h
index 772e60efe243..e322676039f4 100644
--- a/arch/x86/include/asm/svm.h
+++ b/arch/x86/include/asm/svm.h
@@ -156,6 +156,12 @@ struct __attribute__ ((__packed__)) vmcb_control_area {
u64 avic_physical_id; /* Offset 0xf8 */
u8 reserved_7[8];
u64 vmsa_pa; /* Used for an SEV-ES guest */
+ u8 reserved_8[720];
+ /*
+ * Offset 0x3e0, 32 bytes reserved
+ * for use by hypervisor/software.
+ */
+ u8 reserved_sw[32];
};
@@ -314,7 +320,7 @@ struct ghcb {
#define EXPECTED_VMCB_SAVE_AREA_SIZE 1032
-#define EXPECTED_VMCB_CONTROL_AREA_SIZE 272
+#define EXPECTED_VMCB_CONTROL_AREA_SIZE 1024
#define EXPECTED_GHCB_SIZE PAGE_SIZE
static inline void __unused_size_checks(void)
@@ -326,7 +332,6 @@ static inline void __unused_size_checks(void)
struct vmcb {
struct vmcb_control_area control;
- u8 reserved_control[1024 - sizeof(struct vmcb_control_area)];
struct vmcb_save_area save;
} __packed;
diff --git a/arch/x86/include/asm/syscall.h b/arch/x86/include/asm/syscall.h
index 7cbf733d11af..f7e2d82d24fb 100644
--- a/arch/x86/include/asm/syscall.h
+++ b/arch/x86/include/asm/syscall.h
@@ -21,13 +21,12 @@ extern const sys_call_ptr_t sys_call_table[];
#if defined(CONFIG_X86_32)
#define ia32_sys_call_table sys_call_table
-#endif
-
-#if defined(CONFIG_IA32_EMULATION)
+#else
+/*
+ * These may not exist, but still put the prototypes in so we
+ * can use IS_ENABLED().
+ */
extern const sys_call_ptr_t ia32_sys_call_table[];
-#endif
-
-#ifdef CONFIG_X86_X32_ABI
extern const sys_call_ptr_t x32_sys_call_table[];
#endif
@@ -160,7 +159,7 @@ static inline int syscall_get_arch(struct task_struct *task)
? AUDIT_ARCH_I386 : AUDIT_ARCH_X86_64;
}
-void do_syscall_64(unsigned long nr, struct pt_regs *regs);
+void do_syscall_64(struct pt_regs *regs, int nr);
void do_int80_syscall_32(struct pt_regs *regs);
long do_fast_syscall_32(struct pt_regs *regs);
diff --git a/arch/x86/include/asm/syscall_wrapper.h b/arch/x86/include/asm/syscall_wrapper.h
index 80c08c7d5e72..6a2827d0681f 100644
--- a/arch/x86/include/asm/syscall_wrapper.h
+++ b/arch/x86/include/asm/syscall_wrapper.h
@@ -17,7 +17,7 @@ extern long __ia32_sys_ni_syscall(const struct pt_regs *regs);
* __x64_sys_*() - 64-bit native syscall
* __ia32_sys_*() - 32-bit native syscall or common compat syscall
* __ia32_compat_sys_*() - 32-bit compat syscall
- * __x32_compat_sys_*() - 64-bit X32 compat syscall
+ * __x64_compat_sys_*() - 64-bit X32 compat syscall
*
* The registers are decoded according to the ABI:
* 64-bit: RDI, RSI, RDX, R10, R8, R9
@@ -166,17 +166,17 @@ extern long __ia32_sys_ni_syscall(const struct pt_regs *regs);
* with x86_64 obviously do not need such care.
*/
#define __X32_COMPAT_SYS_STUB0(name) \
- __SYS_STUB0(x32, compat_sys_##name)
+ __SYS_STUB0(x64, compat_sys_##name)
#define __X32_COMPAT_SYS_STUBx(x, name, ...) \
- __SYS_STUBx(x32, compat_sys##name, \
+ __SYS_STUBx(x64, compat_sys##name, \
SC_X86_64_REGS_TO_ARGS(x, __VA_ARGS__))
#define __X32_COMPAT_COND_SYSCALL(name) \
- __COND_SYSCALL(x32, compat_sys_##name)
+ __COND_SYSCALL(x64, compat_sys_##name)
#define __X32_COMPAT_SYS_NI(name) \
- __SYS_NI(x32, compat_sys_##name)
+ __SYS_NI(x64, compat_sys_##name)
#else /* CONFIG_X86_X32 */
#define __X32_COMPAT_SYS_STUB0(name)
#define __X32_COMPAT_SYS_STUBx(x, name, ...)
diff --git a/arch/x86/include/asm/unistd.h b/arch/x86/include/asm/unistd.h
index c1c3d31b15c0..80e9d5206a71 100644
--- a/arch/x86/include/asm/unistd.h
+++ b/arch/x86/include/asm/unistd.h
@@ -13,7 +13,7 @@
# define __ARCH_WANT_SYS_OLD_MMAP
# define __ARCH_WANT_SYS_OLD_SELECT
-# define __NR_ia32_syscall_max __NR_syscall_max
+# define IA32_NR_syscalls (__NR_syscalls)
# else
@@ -26,12 +26,12 @@
# define __ARCH_WANT_COMPAT_SYS_PWRITEV64
# define __ARCH_WANT_COMPAT_SYS_PREADV64V2
# define __ARCH_WANT_COMPAT_SYS_PWRITEV64V2
+# define X32_NR_syscalls (__NR_x32_syscalls)
+# define IA32_NR_syscalls (__NR_ia32_syscalls)
# endif
-# define NR_syscalls (__NR_syscall_max + 1)
-# define X32_NR_syscalls (__NR_x32_syscall_max + 1)
-# define IA32_NR_syscalls (__NR_ia32_syscall_max + 1)
+# define NR_syscalls (__NR_syscalls)
# define __ARCH_WANT_NEW_STAT
# define __ARCH_WANT_OLD_READDIR
diff --git a/arch/x86/include/uapi/asm/hwcap2.h b/arch/x86/include/uapi/asm/hwcap2.h
index 5fdfcb47000f..054604aba9f0 100644
--- a/arch/x86/include/uapi/asm/hwcap2.h
+++ b/arch/x86/include/uapi/asm/hwcap2.h
@@ -2,10 +2,12 @@
#ifndef _ASM_X86_HWCAP2_H
#define _ASM_X86_HWCAP2_H
+#include <linux/const.h>
+
/* MONITOR/MWAIT enabled in Ring 3 */
-#define HWCAP2_RING3MWAIT (1 << 0)
+#define HWCAP2_RING3MWAIT _BITUL(0)
/* Kernel allows FSGSBASE instructions available in Ring 3 */
-#define HWCAP2_FSGSBASE BIT(1)
+#define HWCAP2_FSGSBASE _BITUL(1)
#endif
diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h
index 0662f644aad9..a6c327f8ad9e 100644
--- a/arch/x86/include/uapi/asm/kvm.h
+++ b/arch/x86/include/uapi/asm/kvm.h
@@ -159,6 +159,19 @@ struct kvm_sregs {
__u64 interrupt_bitmap[(KVM_NR_INTERRUPTS + 63) / 64];
};
+struct kvm_sregs2 {
+ /* out (KVM_GET_SREGS2) / in (KVM_SET_SREGS2) */
+ struct kvm_segment cs, ds, es, fs, gs, ss;
+ struct kvm_segment tr, ldt;
+ struct kvm_dtable gdt, idt;
+ __u64 cr0, cr2, cr3, cr4, cr8;
+ __u64 efer;
+ __u64 apic_base;
+ __u64 flags;
+ __u64 pdptrs[4];
+};
+#define KVM_SREGS2_FLAGS_PDPTRS_VALID 1
+
/* for KVM_GET_FPU and KVM_SET_FPU */
struct kvm_fpu {
__u8 fpr[8][16];
diff --git a/arch/x86/include/uapi/asm/kvm_para.h b/arch/x86/include/uapi/asm/kvm_para.h
index 950afebfba88..5146bbab84d4 100644
--- a/arch/x86/include/uapi/asm/kvm_para.h
+++ b/arch/x86/include/uapi/asm/kvm_para.h
@@ -33,6 +33,8 @@
#define KVM_FEATURE_PV_SCHED_YIELD 13
#define KVM_FEATURE_ASYNC_PF_INT 14
#define KVM_FEATURE_MSI_EXT_DEST_ID 15
+#define KVM_FEATURE_HC_MAP_GPA_RANGE 16
+#define KVM_FEATURE_MIGRATION_CONTROL 17
#define KVM_HINTS_REALTIME 0
@@ -54,6 +56,7 @@
#define MSR_KVM_POLL_CONTROL 0x4b564d05
#define MSR_KVM_ASYNC_PF_INT 0x4b564d06
#define MSR_KVM_ASYNC_PF_ACK 0x4b564d07
+#define MSR_KVM_MIGRATION_CONTROL 0x4b564d08
struct kvm_steal_time {
__u64 steal;
@@ -90,6 +93,16 @@ struct kvm_clock_pairing {
/* MSR_KVM_ASYNC_PF_INT */
#define KVM_ASYNC_PF_VEC_MASK GENMASK(7, 0)
+/* MSR_KVM_MIGRATION_CONTROL */
+#define KVM_MIGRATION_READY (1 << 0)
+
+/* KVM_HC_MAP_GPA_RANGE */
+#define KVM_MAP_GPA_RANGE_PAGE_SZ_4K 0
+#define KVM_MAP_GPA_RANGE_PAGE_SZ_2M (1 << 0)
+#define KVM_MAP_GPA_RANGE_PAGE_SZ_1G (1 << 1)
+#define KVM_MAP_GPA_RANGE_ENC_STAT(n) (n << 4)
+#define KVM_MAP_GPA_RANGE_ENCRYPTED KVM_MAP_GPA_RANGE_ENC_STAT(1)
+#define KVM_MAP_GPA_RANGE_DECRYPTED KVM_MAP_GPA_RANGE_ENC_STAT(0)
/* Operations for KVM_HC_MMU_OP */
#define KVM_MMU_OP_WRITE_PTE 1
diff --git a/arch/x86/include/uapi/asm/svm.h b/arch/x86/include/uapi/asm/svm.h
index 554f75fe013c..efa969325ede 100644
--- a/arch/x86/include/uapi/asm/svm.h
+++ b/arch/x86/include/uapi/asm/svm.h
@@ -110,6 +110,9 @@
#define SVM_VMGEXIT_GET_AP_JUMP_TABLE 1
#define SVM_VMGEXIT_UNSUPPORTED_EVENT 0x8000ffff
+/* Exit code reserved for hypervisor/software use */
+#define SVM_EXIT_SW 0xf0000000
+
#define SVM_EXIT_ERR -1
#define SVM_EXIT_REASONS \