summaryrefslogtreecommitdiff
path: root/arch/x86
diff options
context:
space:
mode:
authorJakub Kicinski <kuba@kernel.org>2024-04-12 00:20:04 +0300
committerJakub Kicinski <kuba@kernel.org>2024-04-12 00:23:47 +0300
commit94426ed2137a948d212302df571445d4245772b9 (patch)
tree2bf20010e453ba302a818ed70271e073078bc4ba /arch/x86
parenta55b39e858901986408391c574f414ef889f1c53 (diff)
parent2ae9a8972ce04046957f8af214509cebfd3bfb9c (diff)
downloadlinux-94426ed2137a948d212302df571445d4245772b9.tar.xz
Merge git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net
Cross-merge networking fixes after downstream PR. Conflicts: net/unix/garbage.c 47d8ac011fe1 ("af_unix: Fix garbage collector racing against connect()") 4090fa373f0e ("af_unix: Replace garbage collection algorithm.") Adjacent changes: drivers/net/ethernet/broadcom/bnxt/bnxt.c faa12ca24558 ("bnxt_en: Reset PTP tx_avail after possible firmware reset") b3d0083caf9a ("bnxt_en: Support RSS contexts in ethtool .{get|set}_rxfh()") drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c 7ac10c7d728d ("bnxt_en: Fix possible memory leak in bnxt_rdma_aux_device_init()") 194fad5b2781 ("bnxt_en: Refactor bnxt_rdma_aux_device_init/uninit functions") drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c 958f56e48385 ("net/mlx5e: Un-expose functions in en.h") 49e6c9387051 ("net/mlx5e: RSS, Block XOR hash with over 128 channels") Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/Kconfig26
-rw-r--r--arch/x86/coco/core.c93
-rw-r--r--arch/x86/entry/common.c10
-rw-r--r--arch/x86/entry/entry_64.S61
-rw-r--r--arch/x86/entry/entry_64_compat.S16
-rw-r--r--arch/x86/entry/syscall_32.c21
-rw-r--r--arch/x86/entry/syscall_64.c19
-rw-r--r--arch/x86/entry/syscall_x32.c10
-rw-r--r--arch/x86/events/intel/ds.c8
-rw-r--r--arch/x86/include/asm/coco.h2
-rw-r--r--arch/x86/include/asm/cpufeature.h2
-rw-r--r--arch/x86/include/asm/cpufeatures.h7
-rw-r--r--arch/x86/include/asm/msr-index.h9
-rw-r--r--arch/x86/include/asm/nospec-branch.h17
-rw-r--r--arch/x86/include/asm/sev.h4
-rw-r--r--arch/x86/include/asm/syscall.h11
-rw-r--r--arch/x86/kernel/cpu/amd.c38
-rw-r--r--arch/x86/kernel/cpu/bugs.c121
-rw-r--r--arch/x86/kernel/cpu/common.c24
-rw-r--r--arch/x86/kernel/cpu/mce/core.c4
-rw-r--r--arch/x86/kernel/cpu/mtrr/generic.c2
-rw-r--r--arch/x86/kernel/cpu/resctrl/internal.h3
-rw-r--r--arch/x86/kernel/cpu/scattered.c1
-rw-r--r--arch/x86/kernel/setup.c2
-rw-r--r--arch/x86/kernel/sev.c10
-rw-r--r--arch/x86/kvm/Kconfig1
-rw-r--r--arch/x86/kvm/reverse_cpuid.h5
-rw-r--r--arch/x86/kvm/svm/sev.c2
-rw-r--r--arch/x86/kvm/vmx/vmenter.S2
-rw-r--r--arch/x86/kvm/x86.c2
-rw-r--r--arch/x86/lib/retpoline.S1
-rw-r--r--arch/x86/mm/numa_32.c1
-rw-r--r--arch/x86/mm/pat/memtype.c49
-rw-r--r--arch/x86/virt/svm/sev.c26
34 files changed, 510 insertions, 100 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 4fff6ed46e90..10a6251f58f3 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -2633,6 +2633,32 @@ config MITIGATION_RFDS
stored in floating point, vector and integer registers.
See also <file:Documentation/admin-guide/hw-vuln/reg-file-data-sampling.rst>
+choice
+ prompt "Clear branch history"
+ depends on CPU_SUP_INTEL
+ default SPECTRE_BHI_ON
+ help
+ Enable BHI mitigations. BHI attacks are a form of Spectre V2 attacks
+ where the branch history buffer is poisoned to speculatively steer
+ indirect branches.
+ See <file:Documentation/admin-guide/hw-vuln/spectre.rst>
+
+config SPECTRE_BHI_ON
+ bool "on"
+ help
+ Equivalent to setting spectre_bhi=on command line parameter.
+config SPECTRE_BHI_OFF
+ bool "off"
+ help
+ Equivalent to setting spectre_bhi=off command line parameter.
+config SPECTRE_BHI_AUTO
+ bool "auto"
+ depends on BROKEN
+ help
+ Equivalent to setting spectre_bhi=auto command line parameter.
+
+endchoice
+
endif
config ARCH_HAS_ADD_PAGES
diff --git a/arch/x86/coco/core.c b/arch/x86/coco/core.c
index d07be9d05cd0..b31ef2424d19 100644
--- a/arch/x86/coco/core.c
+++ b/arch/x86/coco/core.c
@@ -3,19 +3,28 @@
* Confidential Computing Platform Capability checks
*
* Copyright (C) 2021 Advanced Micro Devices, Inc.
+ * Copyright (C) 2024 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
*
* Author: Tom Lendacky <thomas.lendacky@amd.com>
*/
#include <linux/export.h>
#include <linux/cc_platform.h>
+#include <linux/string.h>
+#include <linux/random.h>
+#include <asm/archrandom.h>
#include <asm/coco.h>
#include <asm/processor.h>
enum cc_vendor cc_vendor __ro_after_init = CC_VENDOR_NONE;
u64 cc_mask __ro_after_init;
+static struct cc_attr_flags {
+ __u64 host_sev_snp : 1,
+ __resv : 63;
+} cc_flags;
+
static bool noinstr intel_cc_platform_has(enum cc_attr attr)
{
switch (attr) {
@@ -89,6 +98,9 @@ static bool noinstr amd_cc_platform_has(enum cc_attr attr)
case CC_ATTR_GUEST_SEV_SNP:
return sev_status & MSR_AMD64_SEV_SNP_ENABLED;
+ case CC_ATTR_HOST_SEV_SNP:
+ return cc_flags.host_sev_snp;
+
default:
return false;
}
@@ -148,3 +160,84 @@ u64 cc_mkdec(u64 val)
}
}
EXPORT_SYMBOL_GPL(cc_mkdec);
+
+static void amd_cc_platform_clear(enum cc_attr attr)
+{
+ switch (attr) {
+ case CC_ATTR_HOST_SEV_SNP:
+ cc_flags.host_sev_snp = 0;
+ break;
+ default:
+ break;
+ }
+}
+
+void cc_platform_clear(enum cc_attr attr)
+{
+ switch (cc_vendor) {
+ case CC_VENDOR_AMD:
+ amd_cc_platform_clear(attr);
+ break;
+ default:
+ break;
+ }
+}
+
+static void amd_cc_platform_set(enum cc_attr attr)
+{
+ switch (attr) {
+ case CC_ATTR_HOST_SEV_SNP:
+ cc_flags.host_sev_snp = 1;
+ break;
+ default:
+ break;
+ }
+}
+
+void cc_platform_set(enum cc_attr attr)
+{
+ switch (cc_vendor) {
+ case CC_VENDOR_AMD:
+ amd_cc_platform_set(attr);
+ break;
+ default:
+ break;
+ }
+}
+
+__init void cc_random_init(void)
+{
+ /*
+ * The seed is 32 bytes (in units of longs), which is 256 bits, which
+ * is the security level that the RNG is targeting.
+ */
+ unsigned long rng_seed[32 / sizeof(long)];
+ size_t i, longs;
+
+ if (!cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT))
+ return;
+
+ /*
+ * Since the CoCo threat model includes the host, the only reliable
+ * source of entropy that can be neither observed nor manipulated is
+ * RDRAND. Usually, RDRAND failure is considered tolerable, but since
+ * CoCo guests have no other unobservable source of entropy, it's
+ * important to at least ensure the RNG gets some initial random seeds.
+ */
+ for (i = 0; i < ARRAY_SIZE(rng_seed); i += longs) {
+ longs = arch_get_random_longs(&rng_seed[i], ARRAY_SIZE(rng_seed) - i);
+
+ /*
+ * A zero return value means that the guest doesn't have RDRAND
+ * or the CPU is physically broken, and in both cases that
+ * means most crypto inside of the CoCo instance will be
+ * broken, defeating the purpose of CoCo in the first place. So
+ * just panic here because it's absolutely unsafe to continue
+ * executing.
+ */
+ if (longs == 0)
+ panic("RDRAND is defective.");
+ }
+ add_device_randomness(rng_seed, sizeof(rng_seed));
+ memzero_explicit(rng_seed, sizeof(rng_seed));
+}
diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c
index 6356060caaf3..6de50b80702e 100644
--- a/arch/x86/entry/common.c
+++ b/arch/x86/entry/common.c
@@ -49,7 +49,7 @@ static __always_inline bool do_syscall_x64(struct pt_regs *regs, int nr)
if (likely(unr < NR_syscalls)) {
unr = array_index_nospec(unr, NR_syscalls);
- regs->ax = sys_call_table[unr](regs);
+ regs->ax = x64_sys_call(regs, unr);
return true;
}
return false;
@@ -66,7 +66,7 @@ static __always_inline bool do_syscall_x32(struct pt_regs *regs, int nr)
if (IS_ENABLED(CONFIG_X86_X32_ABI) && likely(xnr < X32_NR_syscalls)) {
xnr = array_index_nospec(xnr, X32_NR_syscalls);
- regs->ax = x32_sys_call_table[xnr](regs);
+ regs->ax = x32_sys_call(regs, xnr);
return true;
}
return false;
@@ -162,7 +162,7 @@ static __always_inline void do_syscall_32_irqs_on(struct pt_regs *regs, int nr)
if (likely(unr < IA32_NR_syscalls)) {
unr = array_index_nospec(unr, IA32_NR_syscalls);
- regs->ax = ia32_sys_call_table[unr](regs);
+ regs->ax = ia32_sys_call(regs, unr);
} else if (nr != -1) {
regs->ax = __ia32_sys_ni_syscall(regs);
}
@@ -189,7 +189,7 @@ static __always_inline bool int80_is_external(void)
}
/**
- * int80_emulation - 32-bit legacy syscall entry
+ * do_int80_emulation - 32-bit legacy syscall C entry from asm
*
* This entry point can be used by 32-bit and 64-bit programs to perform
* 32-bit system calls. Instances of INT $0x80 can be found inline in
@@ -207,7 +207,7 @@ static __always_inline bool int80_is_external(void)
* eax: system call number
* ebx, ecx, edx, esi, edi, ebp: arg1 - arg 6
*/
-DEFINE_IDTENTRY_RAW(int80_emulation)
+__visible noinstr void do_int80_emulation(struct pt_regs *regs)
{
int nr;
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index 8af2a26b24f6..1b5be07f8669 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -116,6 +116,7 @@ SYM_INNER_LABEL(entry_SYSCALL_64_after_hwframe, SYM_L_GLOBAL)
/* clobbers %rax, make sure it is after saving the syscall nr */
IBRS_ENTER
UNTRAIN_RET
+ CLEAR_BRANCH_HISTORY
call do_syscall_64 /* returns with IRQs disabled */
@@ -1491,3 +1492,63 @@ SYM_CODE_START_NOALIGN(rewind_stack_and_make_dead)
call make_task_dead
SYM_CODE_END(rewind_stack_and_make_dead)
.popsection
+
+/*
+ * This sequence executes branches in order to remove user branch information
+ * from the branch history tracker in the Branch Predictor, therefore removing
+ * user influence on subsequent BTB lookups.
+ *
+ * It should be used on parts prior to Alder Lake. Newer parts should use the
+ * BHI_DIS_S hardware control instead. If a pre-Alder Lake part is being
+ * virtualized on newer hardware the VMM should protect against BHI attacks by
+ * setting BHI_DIS_S for the guests.
+ *
+ * CALLs/RETs are necessary to prevent Loop Stream Detector(LSD) from engaging
+ * and not clearing the branch history. The call tree looks like:
+ *
+ * call 1
+ * call 2
+ * call 2
+ * call 2
+ * call 2
+ * call 2
+ * ret
+ * ret
+ * ret
+ * ret
+ * ret
+ * ret
+ *
+ * This means that the stack is non-constant and ORC can't unwind it with %rsp
+ * alone. Therefore we unconditionally set up the frame pointer, which allows
+ * ORC to unwind properly.
+ *
+ * The alignment is for performance and not for safety, and may be safely
+ * refactored in the future if needed.
+ */
+SYM_FUNC_START(clear_bhb_loop)
+ push %rbp
+ mov %rsp, %rbp
+ movl $5, %ecx
+ ANNOTATE_INTRA_FUNCTION_CALL
+ call 1f
+ jmp 5f
+ .align 64, 0xcc
+ ANNOTATE_INTRA_FUNCTION_CALL
+1: call 2f
+ RET
+ .align 64, 0xcc
+2: movl $5, %eax
+3: jmp 4f
+ nop
+4: sub $1, %eax
+ jnz 3b
+ sub $1, %ecx
+ jnz 1b
+ RET
+5: lfence
+ pop %rbp
+ RET
+SYM_FUNC_END(clear_bhb_loop)
+EXPORT_SYMBOL_GPL(clear_bhb_loop)
+STACK_FRAME_NON_STANDARD(clear_bhb_loop)
diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S
index eabf48c4d4b4..c779046cc3fe 100644
--- a/arch/x86/entry/entry_64_compat.S
+++ b/arch/x86/entry/entry_64_compat.S
@@ -92,6 +92,7 @@ SYM_INNER_LABEL(entry_SYSENTER_compat_after_hwframe, SYM_L_GLOBAL)
IBRS_ENTER
UNTRAIN_RET
+ CLEAR_BRANCH_HISTORY
/*
* SYSENTER doesn't filter flags, so we need to clear NT and AC
@@ -206,6 +207,7 @@ SYM_INNER_LABEL(entry_SYSCALL_compat_after_hwframe, SYM_L_GLOBAL)
IBRS_ENTER
UNTRAIN_RET
+ CLEAR_BRANCH_HISTORY
movq %rsp, %rdi
call do_fast_syscall_32
@@ -276,3 +278,17 @@ SYM_INNER_LABEL(entry_SYSRETL_compat_end, SYM_L_GLOBAL)
ANNOTATE_NOENDBR
int3
SYM_CODE_END(entry_SYSCALL_compat)
+
+/*
+ * int 0x80 is used by 32 bit mode as a system call entry. Normally idt entries
+ * point to C routines, however since this is a system call interface the branch
+ * history needs to be scrubbed to protect against BHI attacks, and that
+ * scrubbing needs to take place in assembly code prior to entering any C
+ * routines.
+ */
+SYM_CODE_START(int80_emulation)
+ ANNOTATE_NOENDBR
+ UNWIND_HINT_FUNC
+ CLEAR_BRANCH_HISTORY
+ jmp do_int80_emulation
+SYM_CODE_END(int80_emulation)
diff --git a/arch/x86/entry/syscall_32.c b/arch/x86/entry/syscall_32.c
index 8cfc9bc73e7f..c2235bae17ef 100644
--- a/arch/x86/entry/syscall_32.c
+++ b/arch/x86/entry/syscall_32.c
@@ -18,8 +18,25 @@
#include <asm/syscalls_32.h>
#undef __SYSCALL
+/*
+ * The sys_call_table[] is no longer used for system calls, but
+ * kernel/trace/trace_syscalls.c still wants to know the system
+ * call address.
+ */
+#ifdef CONFIG_X86_32
#define __SYSCALL(nr, sym) __ia32_##sym,
-
-__visible const sys_call_ptr_t ia32_sys_call_table[] = {
+const sys_call_ptr_t sys_call_table[] = {
#include <asm/syscalls_32.h>
};
+#undef __SYSCALL
+#endif
+
+#define __SYSCALL(nr, sym) case nr: return __ia32_##sym(regs);
+
+long ia32_sys_call(const struct pt_regs *regs, unsigned int nr)
+{
+ switch (nr) {
+ #include <asm/syscalls_32.h>
+ default: return __ia32_sys_ni_syscall(regs);
+ }
+};
diff --git a/arch/x86/entry/syscall_64.c b/arch/x86/entry/syscall_64.c
index be120eec1fc9..33b3f09e6f15 100644
--- a/arch/x86/entry/syscall_64.c
+++ b/arch/x86/entry/syscall_64.c
@@ -11,8 +11,23 @@
#include <asm/syscalls_64.h>
#undef __SYSCALL
+/*
+ * The sys_call_table[] is no longer used for system calls, but
+ * kernel/trace/trace_syscalls.c still wants to know the system
+ * call address.
+ */
#define __SYSCALL(nr, sym) __x64_##sym,
-
-asmlinkage const sys_call_ptr_t sys_call_table[] = {
+const sys_call_ptr_t sys_call_table[] = {
#include <asm/syscalls_64.h>
};
+#undef __SYSCALL
+
+#define __SYSCALL(nr, sym) case nr: return __x64_##sym(regs);
+
+long x64_sys_call(const struct pt_regs *regs, unsigned int nr)
+{
+ switch (nr) {
+ #include <asm/syscalls_64.h>
+ default: return __x64_sys_ni_syscall(regs);
+ }
+};
diff --git a/arch/x86/entry/syscall_x32.c b/arch/x86/entry/syscall_x32.c
index bdd0e03a1265..03de4a932131 100644
--- a/arch/x86/entry/syscall_x32.c
+++ b/arch/x86/entry/syscall_x32.c
@@ -11,8 +11,12 @@
#include <asm/syscalls_x32.h>
#undef __SYSCALL
-#define __SYSCALL(nr, sym) __x64_##sym,
+#define __SYSCALL(nr, sym) case nr: return __x64_##sym(regs);
-asmlinkage const sys_call_ptr_t x32_sys_call_table[] = {
-#include <asm/syscalls_x32.h>
+long x32_sys_call(const struct pt_regs *regs, unsigned int nr)
+{
+ switch (nr) {
+ #include <asm/syscalls_x32.h>
+ default: return __x64_sys_ni_syscall(regs);
+ }
};
diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
index 2641ba620f12..e010bfed8417 100644
--- a/arch/x86/events/intel/ds.c
+++ b/arch/x86/events/intel/ds.c
@@ -1237,11 +1237,11 @@ pebs_update_state(bool needed_cb, struct cpu_hw_events *cpuc,
struct pmu *pmu = event->pmu;
/*
- * Make sure we get updated with the first PEBS
- * event. It will trigger also during removal, but
- * that does not hurt:
+ * Make sure we get updated with the first PEBS event.
+ * During removal, ->pebs_data_cfg is still valid for
+ * the last PEBS event. Don't clear it.
*/
- if (cpuc->n_pebs == 1)
+ if ((cpuc->n_pebs == 1) && add)
cpuc->pebs_data_cfg = PEBS_UPDATE_DS_SW;
if (needed_cb != pebs_needs_sched_cb(cpuc)) {
diff --git a/arch/x86/include/asm/coco.h b/arch/x86/include/asm/coco.h
index fb7388bbc212..c086699b0d0c 100644
--- a/arch/x86/include/asm/coco.h
+++ b/arch/x86/include/asm/coco.h
@@ -22,6 +22,7 @@ static inline void cc_set_mask(u64 mask)
u64 cc_mkenc(u64 val);
u64 cc_mkdec(u64 val);
+void cc_random_init(void);
#else
#define cc_vendor (CC_VENDOR_NONE)
@@ -34,6 +35,7 @@ static inline u64 cc_mkdec(u64 val)
{
return val;
}
+static inline void cc_random_init(void) { }
#endif
#endif /* _ASM_X86_COCO_H */
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index 42157ddcc09d..686e92d2663e 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -33,6 +33,8 @@ enum cpuid_leafs
CPUID_7_EDX,
CPUID_8000_001F_EAX,
CPUID_8000_0021_EAX,
+ CPUID_LNX_5,
+ NR_CPUID_WORDS,
};
#define X86_CAP_FMT_NUM "%d:%d"
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index a38f8f9ba657..3c7434329661 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -461,11 +461,15 @@
/*
* Extended auxiliary flags: Linux defined - for features scattered in various
- * CPUID levels like 0x80000022, etc.
+ * CPUID levels like 0x80000022, etc and Linux defined features.
*
* Reuse free bits when adding new feature flags!
*/
#define X86_FEATURE_AMD_LBR_PMC_FREEZE (21*32+ 0) /* AMD LBR and PMC Freeze */
+#define X86_FEATURE_CLEAR_BHB_LOOP (21*32+ 1) /* "" Clear branch history at syscall entry using SW loop */
+#define X86_FEATURE_BHI_CTRL (21*32+ 2) /* "" BHI_DIS_S HW control available */
+#define X86_FEATURE_CLEAR_BHB_HW (21*32+ 3) /* "" BHI_DIS_S HW control enabled */
+#define X86_FEATURE_CLEAR_BHB_LOOP_ON_VMEXIT (21*32+ 4) /* "" Clear branch history at vmexit using SW loop */
/*
* BUG word(s)
@@ -515,4 +519,5 @@
#define X86_BUG_SRSO X86_BUG(1*32 + 0) /* AMD SRSO bug */
#define X86_BUG_DIV0 X86_BUG(1*32 + 1) /* AMD DIV0 speculation bug */
#define X86_BUG_RFDS X86_BUG(1*32 + 2) /* CPU is vulnerable to Register File Data Sampling */
+#define X86_BUG_BHI X86_BUG(1*32 + 3) /* CPU is affected by Branch History Injection */
#endif /* _ASM_X86_CPUFEATURES_H */
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index 05956bd8bacf..e72c2b872957 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -61,10 +61,13 @@
#define SPEC_CTRL_SSBD BIT(SPEC_CTRL_SSBD_SHIFT) /* Speculative Store Bypass Disable */
#define SPEC_CTRL_RRSBA_DIS_S_SHIFT 6 /* Disable RRSBA behavior */
#define SPEC_CTRL_RRSBA_DIS_S BIT(SPEC_CTRL_RRSBA_DIS_S_SHIFT)
+#define SPEC_CTRL_BHI_DIS_S_SHIFT 10 /* Disable Branch History Injection behavior */
+#define SPEC_CTRL_BHI_DIS_S BIT(SPEC_CTRL_BHI_DIS_S_SHIFT)
/* A mask for bits which the kernel toggles when controlling mitigations */
#define SPEC_CTRL_MITIGATIONS_MASK (SPEC_CTRL_IBRS | SPEC_CTRL_STIBP | SPEC_CTRL_SSBD \
- | SPEC_CTRL_RRSBA_DIS_S)
+ | SPEC_CTRL_RRSBA_DIS_S \
+ | SPEC_CTRL_BHI_DIS_S)
#define MSR_IA32_PRED_CMD 0x00000049 /* Prediction Command */
#define PRED_CMD_IBPB BIT(0) /* Indirect Branch Prediction Barrier */
@@ -163,6 +166,10 @@
* are restricted to targets in
* kernel.
*/
+#define ARCH_CAP_BHI_NO BIT(20) /*
+ * CPU is not affected by Branch
+ * History Injection.
+ */
#define ARCH_CAP_PBRSB_NO BIT(24) /*
* Not susceptible to Post-Barrier
* Return Stack Buffer Predictions.
diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h
index 170c89ed22fc..ff5f1ecc7d1e 100644
--- a/arch/x86/include/asm/nospec-branch.h
+++ b/arch/x86/include/asm/nospec-branch.h
@@ -326,6 +326,19 @@
ALTERNATIVE "", __stringify(verw _ASM_RIP(mds_verw_sel)), X86_FEATURE_CLEAR_CPU_BUF
.endm
+#ifdef CONFIG_X86_64
+.macro CLEAR_BRANCH_HISTORY
+ ALTERNATIVE "", "call clear_bhb_loop", X86_FEATURE_CLEAR_BHB_LOOP
+.endm
+
+.macro CLEAR_BRANCH_HISTORY_VMEXIT
+ ALTERNATIVE "", "call clear_bhb_loop", X86_FEATURE_CLEAR_BHB_LOOP_ON_VMEXIT
+.endm
+#else
+#define CLEAR_BRANCH_HISTORY
+#define CLEAR_BRANCH_HISTORY_VMEXIT
+#endif
+
#else /* __ASSEMBLY__ */
#define ANNOTATE_RETPOLINE_SAFE \
@@ -368,6 +381,10 @@ extern void srso_alias_return_thunk(void);
extern void entry_untrain_ret(void);
extern void entry_ibpb(void);
+#ifdef CONFIG_X86_64
+extern void clear_bhb_loop(void);
+#endif
+
extern void (*x86_return_thunk)(void);
extern void __warn_thunk(void);
diff --git a/arch/x86/include/asm/sev.h b/arch/x86/include/asm/sev.h
index 07e125f32528..7f57382afee4 100644
--- a/arch/x86/include/asm/sev.h
+++ b/arch/x86/include/asm/sev.h
@@ -228,7 +228,6 @@ int snp_issue_guest_request(u64 exit_code, struct snp_req_data *input, struct sn
void snp_accept_memory(phys_addr_t start, phys_addr_t end);
u64 snp_get_unsupported_features(u64 status);
u64 sev_get_status(void);
-void kdump_sev_callback(void);
void sev_show_status(void);
#else
static inline void sev_es_ist_enter(struct pt_regs *regs) { }
@@ -258,7 +257,6 @@ static inline int snp_issue_guest_request(u64 exit_code, struct snp_req_data *in
static inline void snp_accept_memory(phys_addr_t start, phys_addr_t end) { }
static inline u64 snp_get_unsupported_features(u64 status) { return 0; }
static inline u64 sev_get_status(void) { return 0; }
-static inline void kdump_sev_callback(void) { }
static inline void sev_show_status(void) { }
#endif
@@ -270,6 +268,7 @@ int psmash(u64 pfn);
int rmp_make_private(u64 pfn, u64 gpa, enum pg_level level, u32 asid, bool immutable);
int rmp_make_shared(u64 pfn, enum pg_level level);
void snp_leak_pages(u64 pfn, unsigned int npages);
+void kdump_sev_callback(void);
#else
static inline bool snp_probe_rmptable_info(void) { return false; }
static inline int snp_lookup_rmpentry(u64 pfn, bool *assigned, int *level) { return -ENODEV; }
@@ -282,6 +281,7 @@ static inline int rmp_make_private(u64 pfn, u64 gpa, enum pg_level level, u32 as
}
static inline int rmp_make_shared(u64 pfn, enum pg_level level) { return -ENODEV; }
static inline void snp_leak_pages(u64 pfn, unsigned int npages) {}
+static inline void kdump_sev_callback(void) { }
#endif
#endif
diff --git a/arch/x86/include/asm/syscall.h b/arch/x86/include/asm/syscall.h
index f44e2f9ab65d..2fc7bc3863ff 100644
--- a/arch/x86/include/asm/syscall.h
+++ b/arch/x86/include/asm/syscall.h
@@ -16,19 +16,17 @@
#include <asm/thread_info.h> /* for TS_COMPAT */
#include <asm/unistd.h>
+/* This is used purely for kernel/trace/trace_syscalls.c */
typedef long (*sys_call_ptr_t)(const struct pt_regs *);
extern const sys_call_ptr_t sys_call_table[];
-#if defined(CONFIG_X86_32)
-#define ia32_sys_call_table sys_call_table
-#else
/*
* These may not exist, but still put the prototypes in so we
* can use IS_ENABLED().
*/
-extern const sys_call_ptr_t ia32_sys_call_table[];
-extern const sys_call_ptr_t x32_sys_call_table[];
-#endif
+extern long ia32_sys_call(const struct pt_regs *, unsigned int nr);
+extern long x32_sys_call(const struct pt_regs *, unsigned int nr);
+extern long x64_sys_call(const struct pt_regs *, unsigned int nr);
/*
* Only the low 32 bits of orig_ax are meaningful, so we return int.
@@ -127,6 +125,7 @@ static inline int syscall_get_arch(struct task_struct *task)
}
bool do_syscall_64(struct pt_regs *regs, int nr);
+void do_int80_emulation(struct pt_regs *regs);
#endif /* CONFIG_X86_32 */
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 6d8677e80ddb..9bf17c9c29da 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -345,6 +345,28 @@ static void srat_detect_node(struct cpuinfo_x86 *c)
#endif
}
+static void bsp_determine_snp(struct cpuinfo_x86 *c)
+{
+#ifdef CONFIG_ARCH_HAS_CC_PLATFORM
+ cc_vendor = CC_VENDOR_AMD;
+
+ if (cpu_has(c, X86_FEATURE_SEV_SNP)) {
+ /*
+ * RMP table entry format is not architectural and is defined by the
+ * per-processor PPR. Restrict SNP support on the known CPU models
+ * for which the RMP table entry format is currently defined for.
+ */
+ if (!cpu_has(c, X86_FEATURE_HYPERVISOR) &&
+ c->x86 >= 0x19 && snp_probe_rmptable_info()) {
+ cc_platform_set(CC_ATTR_HOST_SEV_SNP);
+ } else {
+ setup_clear_cpu_cap(X86_FEATURE_SEV_SNP);
+ cc_platform_clear(CC_ATTR_HOST_SEV_SNP);
+ }
+ }
+#endif
+}
+
static void bsp_init_amd(struct cpuinfo_x86 *c)
{
if (cpu_has(c, X86_FEATURE_CONSTANT_TSC)) {
@@ -452,21 +474,7 @@ static void bsp_init_amd(struct cpuinfo_x86 *c)
break;
}
- if (cpu_has(c, X86_FEATURE_SEV_SNP)) {
- /*
- * RMP table entry format is not architectural and it can vary by processor
- * and is defined by the per-processor PPR. Restrict SNP support on the
- * known CPU model and family for which the RMP table entry format is
- * currently defined for.
- */
- if (!boot_cpu_has(X86_FEATURE_ZEN3) &&
- !boot_cpu_has(X86_FEATURE_ZEN4) &&
- !boot_cpu_has(X86_FEATURE_ZEN5))
- setup_clear_cpu_cap(X86_FEATURE_SEV_SNP);
- else if (!snp_probe_rmptable_info())
- setup_clear_cpu_cap(X86_FEATURE_SEV_SNP);
- }
-
+ bsp_determine_snp(c);
return;
warn:
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index e7ba936d798b..295463707e68 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -1607,6 +1607,79 @@ static void __init spectre_v2_determine_rsb_fill_type_at_vmexit(enum spectre_v2_
dump_stack();
}
+/*
+ * Set BHI_DIS_S to prevent indirect branches in kernel to be influenced by
+ * branch history in userspace. Not needed if BHI_NO is set.
+ */
+static bool __init spec_ctrl_bhi_dis(void)
+{
+ if (!boot_cpu_has(X86_FEATURE_BHI_CTRL))
+ return false;
+
+ x86_spec_ctrl_base |= SPEC_CTRL_BHI_DIS_S;
+ update_spec_ctrl(x86_spec_ctrl_base);
+ setup_force_cpu_cap(X86_FEATURE_CLEAR_BHB_HW);
+
+ return true;
+}
+
+enum bhi_mitigations {
+ BHI_MITIGATION_OFF,
+ BHI_MITIGATION_ON,
+ BHI_MITIGATION_AUTO,
+};
+
+static enum bhi_mitigations bhi_mitigation __ro_after_init =
+ IS_ENABLED(CONFIG_SPECTRE_BHI_ON) ? BHI_MITIGATION_ON :
+ IS_ENABLED(CONFIG_SPECTRE_BHI_OFF) ? BHI_MITIGATION_OFF :
+ BHI_MITIGATION_AUTO;
+
+static int __init spectre_bhi_parse_cmdline(char *str)
+{
+ if (!str)
+ return -EINVAL;
+
+ if (!strcmp(str, "off"))
+ bhi_mitigation = BHI_MITIGATION_OFF;
+ else if (!strcmp(str, "on"))
+ bhi_mitigation = BHI_MITIGATION_ON;
+ else if (!strcmp(str, "auto"))
+ bhi_mitigation = BHI_MITIGATION_AUTO;
+ else
+ pr_err("Ignoring unknown spectre_bhi option (%s)", str);
+
+ return 0;
+}
+early_param("spectre_bhi", spectre_bhi_parse_cmdline);
+
+static void __init bhi_select_mitigation(void)
+{
+ if (bhi_mitigation == BHI_MITIGATION_OFF)
+ return;
+
+ /* Retpoline mitigates against BHI unless the CPU has RRSBA behavior */
+ if (cpu_feature_enabled(X86_FEATURE_RETPOLINE) &&
+ !(x86_read_arch_cap_msr() & ARCH_CAP_RRSBA))
+ return;
+
+ if (spec_ctrl_bhi_dis())
+ return;
+
+ if (!IS_ENABLED(CONFIG_X86_64))
+ return;
+
+ /* Mitigate KVM by default */
+ setup_force_cpu_cap(X86_FEATURE_CLEAR_BHB_LOOP_ON_VMEXIT);
+ pr_info("Spectre BHI mitigation: SW BHB clearing on vm exit\n");
+
+ if (bhi_mitigation == BHI_MITIGATION_AUTO)
+ return;
+
+ /* Mitigate syscalls when the mitigation is forced =on */
+ setup_force_cpu_cap(X86_FEATURE_CLEAR_BHB_LOOP);
+ pr_info("Spectre BHI mitigation: SW BHB clearing on syscall\n");
+}
+
static void __init spectre_v2_select_mitigation(void)
{
enum spectre_v2_mitigation_cmd cmd = spectre_v2_parse_cmdline();
@@ -1718,6 +1791,9 @@ static void __init spectre_v2_select_mitigation(void)
mode == SPECTRE_V2_RETPOLINE)
spec_ctrl_disable_kernel_rrsba();
+ if (boot_cpu_has(X86_BUG_BHI))
+ bhi_select_mitigation();
+
spectre_v2_enabled = mode;
pr_info("%s\n", spectre_v2_strings[mode]);
@@ -2695,15 +2771,15 @@ static char *stibp_state(void)
switch (spectre_v2_user_stibp) {
case SPECTRE_V2_USER_NONE:
- return ", STIBP: disabled";
+ return "; STIBP: disabled";
case SPECTRE_V2_USER_STRICT:
- return ", STIBP: forced";
+ return "; STIBP: forced";
case SPECTRE_V2_USER_STRICT_PREFERRED:
- return ", STIBP: always-on";
+ return "; STIBP: always-on";
case SPECTRE_V2_USER_PRCTL:
case SPECTRE_V2_USER_SECCOMP:
if (static_key_enabled(&switch_to_cond_stibp))
- return ", STIBP: conditional";
+ return "; STIBP: conditional";
}
return "";
}
@@ -2712,10 +2788,10 @@ static char *ibpb_state(void)
{
if (boot_cpu_has(X86_FEATURE_IBPB)) {
if (static_key_enabled(&switch_mm_always_ibpb))
- return ", IBPB: always-on";
+ return "; IBPB: always-on";
if (static_key_enabled(&switch_mm_cond_ibpb))
- return ", IBPB: conditional";
- return ", IBPB: disabled";
+ return "; IBPB: conditional";
+ return "; IBPB: disabled";
}
return "";
}
@@ -2725,14 +2801,31 @@ static char *pbrsb_eibrs_state(void)
if (boot_cpu_has_bug(X86_BUG_EIBRS_PBRSB)) {
if (boot_cpu_has(X86_FEATURE_RSB_VMEXIT_LITE) ||
boot_cpu_has(X86_FEATURE_RSB_VMEXIT))
- return ", PBRSB-eIBRS: SW sequence";
+ return "; PBRSB-eIBRS: SW sequence";
else
- return ", PBRSB-eIBRS: Vulnerable";
+ return "; PBRSB-eIBRS: Vulnerable";
} else {
- return ", PBRSB-eIBRS: Not affected";
+ return "; PBRSB-eIBRS: Not affected";
}
}
+static const char * const spectre_bhi_state(void)
+{
+ if (!boot_cpu_has_bug(X86_BUG_BHI))
+ return "; BHI: Not affected";
+ else if (boot_cpu_has(X86_FEATURE_CLEAR_BHB_HW))
+ return "; BHI: BHI_DIS_S";
+ else if (boot_cpu_has(X86_FEATURE_CLEAR_BHB_LOOP))
+ return "; BHI: SW loop, KVM: SW loop";
+ else if (boot_cpu_has(X86_FEATURE_RETPOLINE) &&
+ !(x86_read_arch_cap_msr() & ARCH_CAP_RRSBA))
+ return "; BHI: Retpoline";
+ else if (boot_cpu_has(X86_FEATURE_CLEAR_BHB_LOOP_ON_VMEXIT))
+ return "; BHI: Syscall hardening, KVM: SW loop";
+
+ return "; BHI: Vulnerable (Syscall hardening enabled)";
+}
+
static ssize_t spectre_v2_show_state(char *buf)
{
if (spectre_v2_enabled == SPECTRE_V2_LFENCE)
@@ -2745,13 +2838,15 @@ static ssize_t spectre_v2_show_state(char *buf)
spectre_v2_enabled == SPECTRE_V2_EIBRS_LFENCE)
return sysfs_emit(buf, "Vulnerable: eIBRS+LFENCE with unprivileged eBPF and SMT\n");
- return sysfs_emit(buf, "%s%s%s%s%s%s%s\n",
+ return sysfs_emit(buf, "%s%s%s%s%s%s%s%s\n",
spectre_v2_strings[spectre_v2_enabled],
ibpb_state(),
- boot_cpu_has(X86_FEATURE_USE_IBRS_FW) ? ", IBRS_FW" : "",
+ boot_cpu_has(X86_FEATURE_USE_IBRS_FW) ? "; IBRS_FW" : "",
stibp_state(),
- boot_cpu_has(X86_FEATURE_RSB_CTXSW) ? ", RSB filling" : "",
+ boot_cpu_has(X86_FEATURE_RSB_CTXSW) ? "; RSB filling" : "",
pbrsb_eibrs_state(),
+ spectre_bhi_state(),
+ /* this should always be at the end */
spectre_v2_module_string());
}
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 5c1e6d6be267..754d91857d63 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -1120,6 +1120,7 @@ static void identify_cpu_without_cpuid(struct cpuinfo_x86 *c)
#define NO_SPECTRE_V2 BIT(8)
#define NO_MMIO BIT(9)
#define NO_EIBRS_PBRSB BIT(10)
+#define NO_BHI BIT(11)
#define VULNWL(vendor, family, model, whitelist) \
X86_MATCH_VENDOR_FAM_MODEL(vendor, family, model, whitelist)
@@ -1182,18 +1183,18 @@ static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = {
VULNWL_INTEL(ATOM_TREMONT_D, NO_ITLB_MULTIHIT | NO_EIBRS_PBRSB),
/* AMD Family 0xf - 0x12 */
- VULNWL_AMD(0x0f, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO),
- VULNWL_AMD(0x10, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO),
- VULNWL_AMD(0x11, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO),
- VULNWL_AMD(0x12, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO),
+ VULNWL_AMD(0x0f, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO | NO_BHI),
+ VULNWL_AMD(0x10, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO | NO_BHI),
+ VULNWL_AMD(0x11, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO | NO_BHI),
+ VULNWL_AMD(0x12, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO | NO_BHI),
/* FAMILY_ANY must be last, otherwise 0x0f - 0x12 matches won't work */
- VULNWL_AMD(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO | NO_EIBRS_PBRSB),
- VULNWL_HYGON(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO | NO_EIBRS_PBRSB),
+ VULNWL_AMD(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO | NO_EIBRS_PBRSB | NO_BHI),
+ VULNWL_HYGON(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO | NO_EIBRS_PBRSB | NO_BHI),
/* Zhaoxin Family 7 */
- VULNWL(CENTAUR, 7, X86_MODEL_ANY, NO_SPECTRE_V2 | NO_SWAPGS | NO_MMIO),
- VULNWL(ZHAOXIN, 7, X86_MODEL_ANY, NO_SPECTRE_V2 | NO_SWAPGS | NO_MMIO),
+ VULNWL(CENTAUR, 7, X86_MODEL_ANY, NO_SPECTRE_V2 | NO_SWAPGS | NO_MMIO | NO_BHI),
+ VULNWL(ZHAOXIN, 7, X86_MODEL_ANY, NO_SPECTRE_V2 | NO_SWAPGS | NO_MMIO | NO_BHI),
{}
};
@@ -1435,6 +1436,13 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c)
if (vulnerable_to_rfds(ia32_cap))
setup_force_cpu_bug(X86_BUG_RFDS);
+ /* When virtualized, eIBRS could be hidden, assume vulnerable */
+ if (!(ia32_cap & ARCH_CAP_BHI_NO) &&
+ !cpu_matches(cpu_vuln_whitelist, NO_BHI) &&
+ (boot_cpu_has(X86_FEATURE_IBRS_ENHANCED) ||
+ boot_cpu_has(X86_FEATURE_HYPERVISOR)))
+ setup_force_cpu_bug(X86_BUG_BHI);
+
if (cpu_matches(cpu_vuln_whitelist, NO_MELTDOWN))
return;
diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c
index b5cc557cfc37..84d41be6d06b 100644
--- a/arch/x86/kernel/cpu/mce/core.c
+++ b/arch/x86/kernel/cpu/mce/core.c
@@ -2500,12 +2500,14 @@ static ssize_t set_bank(struct device *s, struct device_attribute *attr,
return -EINVAL;
b = &per_cpu(mce_banks_array, s->id)[bank];
-
if (!b->init)
return -ENODEV;
b->ctl = new;
+
+ mutex_lock(&mce_sysfs_mutex);
mce_restart();
+ mutex_unlock(&mce_sysfs_mutex);
return size;
}
diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c
index 422a4ddc2ab7..7b29ebda024f 100644
--- a/arch/x86/kernel/cpu/mtrr/generic.c
+++ b/arch/x86/kernel/cpu/mtrr/generic.c
@@ -108,7 +108,7 @@ static inline void k8_check_syscfg_dram_mod_en(void)
(boot_cpu_data.x86 >= 0x0f)))
return;
- if (cpu_feature_enabled(X86_FEATURE_SEV_SNP))
+ if (cc_platform_has(CC_ATTR_HOST_SEV_SNP))
return;
rdmsr(MSR_AMD64_SYSCFG, lo, hi);
diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h
index c99f26ebe7a6..1a8687f8073a 100644
--- a/arch/x86/kernel/cpu/resctrl/internal.h
+++ b/arch/x86/kernel/cpu/resctrl/internal.h
@@ -78,7 +78,8 @@ cpumask_any_housekeeping(const struct cpumask *mask, int exclude_cpu)
else
cpu = cpumask_any_but(mask, exclude_cpu);
- if (!IS_ENABLED(CONFIG_NO_HZ_FULL))
+ /* Only continue if tick_nohz_full_mask has been initialized. */
+ if (!tick_nohz_full_enabled())
return cpu;
/* If the CPU picked isn't marked nohz_full nothing more needs doing. */
diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c
index a515328d9d7d..af5aa2c754c2 100644
--- a/arch/x86/kernel/cpu/scattered.c
+++ b/arch/x86/kernel/cpu/scattered.c
@@ -28,6 +28,7 @@ static const struct cpuid_bit cpuid_bits[] = {
{ X86_FEATURE_EPB, CPUID_ECX, 3, 0x00000006, 0 },
{ X86_FEATURE_INTEL_PPIN, CPUID_EBX, 0, 0x00000007, 1 },
{ X86_FEATURE_RRSBA_CTRL, CPUID_EDX, 2, 0x00000007, 2 },
+ { X86_FEATURE_BHI_CTRL, CPUID_EDX, 4, 0x00000007, 2 },
{ X86_FEATURE_CQM_LLC, CPUID_EDX, 1, 0x0000000f, 0 },
{ X86_FEATURE_CQM_OCCUP_LLC, CPUID_EDX, 0, 0x0000000f, 1 },
{ X86_FEATURE_CQM_MBM_TOTAL, CPUID_EDX, 1, 0x0000000f, 1 },
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 0109e6c510e0..e125e059e2c4 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -35,6 +35,7 @@
#include <asm/bios_ebda.h>
#include <asm/bugs.h>
#include <asm/cacheinfo.h>
+#include <asm/coco.h>
#include <asm/cpu.h>
#include <asm/efi.h>
#include <asm/gart.h>
@@ -991,6 +992,7 @@ void __init setup_arch(char **cmdline_p)
* memory size.
*/
mem_encrypt_setup_arch();
+ cc_random_init();
efi_fake_memmap();
efi_find_mirror();
diff --git a/arch/x86/kernel/sev.c b/arch/x86/kernel/sev.c
index 7e1e63cc48e6..38ad066179d8 100644
--- a/arch/x86/kernel/sev.c
+++ b/arch/x86/kernel/sev.c
@@ -2284,16 +2284,6 @@ static int __init snp_init_platform_device(void)
}
device_initcall(snp_init_platform_device);
-void kdump_sev_callback(void)
-{
- /*
- * Do wbinvd() on remote CPUs when SNP is enabled in order to
- * safely do SNP_SHUTDOWN on the local CPU.
- */
- if (cpu_feature_enabled(X86_FEATURE_SEV_SNP))
- wbinvd();
-}
-
void sev_show_status(void)
{
int i;
diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig
index 3aaf7e86a859..0ebdd088f28b 100644
--- a/arch/x86/kvm/Kconfig
+++ b/arch/x86/kvm/Kconfig
@@ -122,6 +122,7 @@ config KVM_AMD_SEV
default y
depends on KVM_AMD && X86_64
depends on CRYPTO_DEV_SP_PSP && !(KVM_AMD=y && CRYPTO_DEV_CCP_DD=m)
+ select ARCH_HAS_CC_PLATFORM
help
Provides support for launching Encrypted VMs (SEV) and Encrypted VMs
with Encrypted State (SEV-ES) on AMD processors.
diff --git a/arch/x86/kvm/reverse_cpuid.h b/arch/x86/kvm/reverse_cpuid.h
index aadefcaa9561..2f4e155080ba 100644
--- a/arch/x86/kvm/reverse_cpuid.h
+++ b/arch/x86/kvm/reverse_cpuid.h
@@ -52,7 +52,7 @@ enum kvm_only_cpuid_leafs {
#define X86_FEATURE_IPRED_CTRL KVM_X86_FEATURE(CPUID_7_2_EDX, 1)
#define KVM_X86_FEATURE_RRSBA_CTRL KVM_X86_FEATURE(CPUID_7_2_EDX, 2)
#define X86_FEATURE_DDPD_U KVM_X86_FEATURE(CPUID_7_2_EDX, 3)
-#define X86_FEATURE_BHI_CTRL KVM_X86_FEATURE(CPUID_7_2_EDX, 4)
+#define KVM_X86_FEATURE_BHI_CTRL KVM_X86_FEATURE(CPUID_7_2_EDX, 4)
#define X86_FEATURE_MCDT_NO KVM_X86_FEATURE(CPUID_7_2_EDX, 5)
/* CPUID level 0x80000007 (EDX). */
@@ -102,10 +102,12 @@ static const struct cpuid_reg reverse_cpuid[] = {
*/
static __always_inline void reverse_cpuid_check(unsigned int x86_leaf)
{
+ BUILD_BUG_ON(NR_CPUID_WORDS != NCAPINTS);
BUILD_BUG_ON(x86_leaf == CPUID_LNX_1);
BUILD_BUG_ON(x86_leaf == CPUID_LNX_2);
BUILD_BUG_ON(x86_leaf == CPUID_LNX_3);
BUILD_BUG_ON(x86_leaf == CPUID_LNX_4);
+ BUILD_BUG_ON(x86_leaf == CPUID_LNX_5);
BUILD_BUG_ON(x86_leaf >= ARRAY_SIZE(reverse_cpuid));
BUILD_BUG_ON(reverse_cpuid[x86_leaf].function == 0);
}
@@ -126,6 +128,7 @@ static __always_inline u32 __feature_translate(int x86_feature)
KVM_X86_TRANSLATE_FEATURE(CONSTANT_TSC);
KVM_X86_TRANSLATE_FEATURE(PERFMON_V2);
KVM_X86_TRANSLATE_FEATURE(RRSBA_CTRL);
+ KVM_X86_TRANSLATE_FEATURE(BHI_CTRL);
default:
return x86_feature;
}
diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
index e5a4d9b0e79f..61a7531d41b0 100644
--- a/arch/x86/kvm/svm/sev.c
+++ b/arch/x86/kvm/svm/sev.c
@@ -3184,7 +3184,7 @@ struct page *snp_safe_alloc_page(struct kvm_vcpu *vcpu)
unsigned long pfn;
struct page *p;
- if (!cpu_feature_enabled(X86_FEATURE_SEV_SNP))
+ if (!cc_platform_has(CC_ATTR_HOST_SEV_SNP))
return alloc_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO);
/*
diff --git a/arch/x86/kvm/vmx/vmenter.S b/arch/x86/kvm/vmx/vmenter.S
index 2bfbf758d061..f6986dee6f8c 100644
--- a/arch/x86/kvm/vmx/vmenter.S
+++ b/arch/x86/kvm/vmx/vmenter.S
@@ -275,6 +275,8 @@ SYM_INNER_LABEL_ALIGN(vmx_vmexit, SYM_L_GLOBAL)
call vmx_spec_ctrl_restore_host
+ CLEAR_BRANCH_HISTORY_VMEXIT
+
/* Put return value in AX */
mov %_ASM_BX, %_ASM_AX
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 47d9f03b7778..984ea2089efc 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1621,7 +1621,7 @@ static bool kvm_is_immutable_feature_msr(u32 msr)
ARCH_CAP_PSCHANGE_MC_NO | ARCH_CAP_TSX_CTRL_MSR | ARCH_CAP_TAA_NO | \
ARCH_CAP_SBDR_SSDP_NO | ARCH_CAP_FBSDP_NO | ARCH_CAP_PSDP_NO | \
ARCH_CAP_FB_CLEAR | ARCH_CAP_RRSBA | ARCH_CAP_PBRSB_NO | ARCH_CAP_GDS_NO | \
- ARCH_CAP_RFDS_NO | ARCH_CAP_RFDS_CLEAR)
+ ARCH_CAP_RFDS_NO | ARCH_CAP_RFDS_CLEAR | ARCH_CAP_BHI_NO)
static u64 kvm_get_arch_capabilities(void)
{
diff --git a/arch/x86/lib/retpoline.S b/arch/x86/lib/retpoline.S
index 0795b3464058..e674ccf720b9 100644
--- a/arch/x86/lib/retpoline.S
+++ b/arch/x86/lib/retpoline.S
@@ -229,6 +229,7 @@ SYM_CODE_END(srso_return_thunk)
/* Dummy for the alternative in CALL_UNTRAIN_RET. */
SYM_CODE_START(srso_alias_untrain_ret)
ANNOTATE_UNRET_SAFE
+ ANNOTATE_NOENDBR
ret
int3
SYM_FUNC_END(srso_alias_untrain_ret)
diff --git a/arch/x86/mm/numa_32.c b/arch/x86/mm/numa_32.c
index 104544359d69..025fd7ea5d69 100644
--- a/arch/x86/mm/numa_32.c
+++ b/arch/x86/mm/numa_32.c
@@ -24,6 +24,7 @@
#include <linux/memblock.h>
#include <linux/init.h>
+#include <asm/pgtable_areas.h>
#include "numa_internal.h"
diff --git a/arch/x86/mm/pat/memtype.c b/arch/x86/mm/pat/memtype.c
index 0d72183b5dd0..36b603d0cdde 100644
--- a/arch/x86/mm/pat/memtype.c
+++ b/arch/x86/mm/pat/memtype.c
@@ -947,6 +947,38 @@ static void free_pfn_range(u64 paddr, unsigned long size)
memtype_free(paddr, paddr + size);
}
+static int get_pat_info(struct vm_area_struct *vma, resource_size_t *paddr,
+ pgprot_t *pgprot)
+{
+ unsigned long prot;
+
+ VM_WARN_ON_ONCE(!(vma->vm_flags & VM_PAT));
+
+ /*
+ * We need the starting PFN and cachemode used for track_pfn_remap()
+ * that covered the whole VMA. For most mappings, we can obtain that
+ * information from the page tables. For COW mappings, we might now
+ * suddenly have anon folios mapped and follow_phys() will fail.
+ *
+ * Fallback to using vma->vm_pgoff, see remap_pfn_range_notrack(), to
+ * detect the PFN. If we need the cachemode as well, we're out of luck
+ * for now and have to fail fork().
+ */
+ if (!follow_phys(vma, vma->vm_start, 0, &prot, paddr)) {
+ if (pgprot)
+ *pgprot = __pgprot(prot);
+ return 0;
+ }
+ if (is_cow_mapping(vma->vm_flags)) {
+ if (pgprot)
+ return -EINVAL;
+ *paddr = (resource_size_t)vma->vm_pgoff << PAGE_SHIFT;
+ return 0;
+ }
+ WARN_ON_ONCE(1);
+ return -EINVAL;
+}
+
/*
* track_pfn_copy is called when vma that is covering the pfnmap gets
* copied through copy_page_range().
@@ -957,20 +989,13 @@ static void free_pfn_range(u64 paddr, unsigned long size)
int track_pfn_copy(struct vm_area_struct *vma)
{
resource_size_t paddr;
- unsigned long prot;
unsigned long vma_size = vma->vm_end - vma->vm_start;
pgprot_t pgprot;
if (vma->vm_flags & VM_PAT) {
- /*
- * reserve the whole chunk covered by vma. We need the
- * starting address and protection from pte.
- */
- if (follow_phys(vma, vma->vm_start, 0, &prot, &paddr)) {
- WARN_ON_ONCE(1);
+ if (get_pat_info(vma, &paddr, &pgprot))
return -EINVAL;
- }
- pgprot = __pgprot(prot);
+ /* reserve the whole chunk covered by vma. */
return reserve_pfn_range(paddr, vma_size, &pgprot, 1);
}
@@ -1045,7 +1070,6 @@ void untrack_pfn(struct vm_area_struct *vma, unsigned long pfn,
unsigned long size, bool mm_wr_locked)
{
resource_size_t paddr;
- unsigned long prot;
if (vma && !(vma->vm_flags & VM_PAT))
return;
@@ -1053,11 +1077,8 @@ void untrack_pfn(struct vm_area_struct *vma, unsigned long pfn,
/* free the chunk starting from pfn or the whole chunk */
paddr = (resource_size_t)pfn << PAGE_SHIFT;
if (!paddr && !size) {
- if (follow_phys(vma, vma->vm_start, 0, &prot, &paddr)) {
- WARN_ON_ONCE(1);
+ if (get_pat_info(vma, &paddr, NULL))
return;
- }
-
size = vma->vm_end - vma->vm_start;
}
free_pfn_range(paddr, size);
diff --git a/arch/x86/virt/svm/sev.c b/arch/x86/virt/svm/sev.c
index cffe1157a90a..ab0e8448bb6e 100644
--- a/arch/x86/virt/svm/sev.c
+++ b/arch/x86/virt/svm/sev.c
@@ -77,7 +77,7 @@ static int __mfd_enable(unsigned int cpu)
{
u64 val;
- if (!cpu_feature_enabled(X86_FEATURE_SEV_SNP))
+ if (!cc_platform_has(CC_ATTR_HOST_SEV_SNP))
return 0;
rdmsrl(MSR_AMD64_SYSCFG, val);
@@ -98,7 +98,7 @@ static int __snp_enable(unsigned int cpu)
{
u64 val;
- if (!cpu_feature_enabled(X86_FEATURE_SEV_SNP))
+ if (!cc_platform_has(CC_ATTR_HOST_SEV_SNP))
return 0;
rdmsrl(MSR_AMD64_SYSCFG, val);
@@ -174,11 +174,11 @@ static int __init snp_rmptable_init(void)
u64 rmptable_size;
u64 val;
- if (!cpu_feature_enabled(X86_FEATURE_SEV_SNP))
+ if (!cc_platform_has(CC_ATTR_HOST_SEV_SNP))
return 0;
if (!amd_iommu_snp_en)
- return 0;
+ goto nosnp;
if (!probed_rmp_size)
goto nosnp;
@@ -225,7 +225,7 @@ skip_enable:
return 0;
nosnp:
- setup_clear_cpu_cap(X86_FEATURE_SEV_SNP);
+ cc_platform_clear(CC_ATTR_HOST_SEV_SNP);
return -ENOSYS;
}
@@ -246,7 +246,7 @@ static struct rmpentry *__snp_lookup_rmpentry(u64 pfn, int *level)
{
struct rmpentry *large_entry, *entry;
- if (!cpu_feature_enabled(X86_FEATURE_SEV_SNP))
+ if (!cc_platform_has(CC_ATTR_HOST_SEV_SNP))
return ERR_PTR(-ENODEV);
entry = get_rmpentry(pfn);
@@ -363,7 +363,7 @@ int psmash(u64 pfn)
unsigned long paddr = pfn << PAGE_SHIFT;
int ret;
- if (!cpu_feature_enabled(X86_FEATURE_SEV_SNP))
+ if (!cc_platform_has(CC_ATTR_HOST_SEV_SNP))
return -ENODEV;
if (!pfn_valid(pfn))
@@ -472,7 +472,7 @@ static int rmpupdate(u64 pfn, struct rmp_state *state)
unsigned long paddr = pfn << PAGE_SHIFT;
int ret, level;
- if (!cpu_feature_enabled(X86_FEATURE_SEV_SNP))
+ if (!cc_platform_has(CC_ATTR_HOST_SEV_SNP))
return -ENODEV;
level = RMP_TO_PG_LEVEL(state->pagesize);
@@ -558,3 +558,13 @@ void snp_leak_pages(u64 pfn, unsigned int npages)
spin_unlock(&snp_leaked_pages_list_lock);
}
EXPORT_SYMBOL_GPL(snp_leak_pages);
+
+void kdump_sev_callback(void)
+{
+ /*
+ * Do wbinvd() on remote CPUs when SNP is enabled in order to
+ * safely do SNP_SHUTDOWN on the local CPU.
+ */
+ if (cc_platform_has(CC_ATTR_HOST_SEV_SNP))
+ wbinvd();
+}