diff options
author | Jakub Kicinski <kuba@kernel.org> | 2024-04-12 00:20:04 +0300 |
---|---|---|
committer | Jakub Kicinski <kuba@kernel.org> | 2024-04-12 00:23:47 +0300 |
commit | 94426ed2137a948d212302df571445d4245772b9 (patch) | |
tree | 2bf20010e453ba302a818ed70271e073078bc4ba /arch/x86 | |
parent | a55b39e858901986408391c574f414ef889f1c53 (diff) | |
parent | 2ae9a8972ce04046957f8af214509cebfd3bfb9c (diff) | |
download | linux-94426ed2137a948d212302df571445d4245772b9.tar.xz |
Merge git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net
Cross-merge networking fixes after downstream PR.
Conflicts:
net/unix/garbage.c
47d8ac011fe1 ("af_unix: Fix garbage collector racing against connect()")
4090fa373f0e ("af_unix: Replace garbage collection algorithm.")
Adjacent changes:
drivers/net/ethernet/broadcom/bnxt/bnxt.c
faa12ca24558 ("bnxt_en: Reset PTP tx_avail after possible firmware reset")
b3d0083caf9a ("bnxt_en: Support RSS contexts in ethtool .{get|set}_rxfh()")
drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c
7ac10c7d728d ("bnxt_en: Fix possible memory leak in bnxt_rdma_aux_device_init()")
194fad5b2781 ("bnxt_en: Refactor bnxt_rdma_aux_device_init/uninit functions")
drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
958f56e48385 ("net/mlx5e: Un-expose functions in en.h")
49e6c9387051 ("net/mlx5e: RSS, Block XOR hash with over 128 channels")
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Diffstat (limited to 'arch/x86')
34 files changed, 510 insertions, 100 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 4fff6ed46e90..10a6251f58f3 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -2633,6 +2633,32 @@ config MITIGATION_RFDS stored in floating point, vector and integer registers. See also <file:Documentation/admin-guide/hw-vuln/reg-file-data-sampling.rst> +choice + prompt "Clear branch history" + depends on CPU_SUP_INTEL + default SPECTRE_BHI_ON + help + Enable BHI mitigations. BHI attacks are a form of Spectre V2 attacks + where the branch history buffer is poisoned to speculatively steer + indirect branches. + See <file:Documentation/admin-guide/hw-vuln/spectre.rst> + +config SPECTRE_BHI_ON + bool "on" + help + Equivalent to setting spectre_bhi=on command line parameter. +config SPECTRE_BHI_OFF + bool "off" + help + Equivalent to setting spectre_bhi=off command line parameter. +config SPECTRE_BHI_AUTO + bool "auto" + depends on BROKEN + help + Equivalent to setting spectre_bhi=auto command line parameter. + +endchoice + endif config ARCH_HAS_ADD_PAGES diff --git a/arch/x86/coco/core.c b/arch/x86/coco/core.c index d07be9d05cd0..b31ef2424d19 100644 --- a/arch/x86/coco/core.c +++ b/arch/x86/coco/core.c @@ -3,19 +3,28 @@ * Confidential Computing Platform Capability checks * * Copyright (C) 2021 Advanced Micro Devices, Inc. + * Copyright (C) 2024 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. * * Author: Tom Lendacky <thomas.lendacky@amd.com> */ #include <linux/export.h> #include <linux/cc_platform.h> +#include <linux/string.h> +#include <linux/random.h> +#include <asm/archrandom.h> #include <asm/coco.h> #include <asm/processor.h> enum cc_vendor cc_vendor __ro_after_init = CC_VENDOR_NONE; u64 cc_mask __ro_after_init; +static struct cc_attr_flags { + __u64 host_sev_snp : 1, + __resv : 63; +} cc_flags; + static bool noinstr intel_cc_platform_has(enum cc_attr attr) { switch (attr) { @@ -89,6 +98,9 @@ static bool noinstr amd_cc_platform_has(enum cc_attr attr) case CC_ATTR_GUEST_SEV_SNP: return sev_status & MSR_AMD64_SEV_SNP_ENABLED; + case CC_ATTR_HOST_SEV_SNP: + return cc_flags.host_sev_snp; + default: return false; } @@ -148,3 +160,84 @@ u64 cc_mkdec(u64 val) } } EXPORT_SYMBOL_GPL(cc_mkdec); + +static void amd_cc_platform_clear(enum cc_attr attr) +{ + switch (attr) { + case CC_ATTR_HOST_SEV_SNP: + cc_flags.host_sev_snp = 0; + break; + default: + break; + } +} + +void cc_platform_clear(enum cc_attr attr) +{ + switch (cc_vendor) { + case CC_VENDOR_AMD: + amd_cc_platform_clear(attr); + break; + default: + break; + } +} + +static void amd_cc_platform_set(enum cc_attr attr) +{ + switch (attr) { + case CC_ATTR_HOST_SEV_SNP: + cc_flags.host_sev_snp = 1; + break; + default: + break; + } +} + +void cc_platform_set(enum cc_attr attr) +{ + switch (cc_vendor) { + case CC_VENDOR_AMD: + amd_cc_platform_set(attr); + break; + default: + break; + } +} + +__init void cc_random_init(void) +{ + /* + * The seed is 32 bytes (in units of longs), which is 256 bits, which + * is the security level that the RNG is targeting. + */ + unsigned long rng_seed[32 / sizeof(long)]; + size_t i, longs; + + if (!cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT)) + return; + + /* + * Since the CoCo threat model includes the host, the only reliable + * source of entropy that can be neither observed nor manipulated is + * RDRAND. Usually, RDRAND failure is considered tolerable, but since + * CoCo guests have no other unobservable source of entropy, it's + * important to at least ensure the RNG gets some initial random seeds. + */ + for (i = 0; i < ARRAY_SIZE(rng_seed); i += longs) { + longs = arch_get_random_longs(&rng_seed[i], ARRAY_SIZE(rng_seed) - i); + + /* + * A zero return value means that the guest doesn't have RDRAND + * or the CPU is physically broken, and in both cases that + * means most crypto inside of the CoCo instance will be + * broken, defeating the purpose of CoCo in the first place. So + * just panic here because it's absolutely unsafe to continue + * executing. + */ + if (longs == 0) + panic("RDRAND is defective."); + } + add_device_randomness(rng_seed, sizeof(rng_seed)); + memzero_explicit(rng_seed, sizeof(rng_seed)); +} diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c index 6356060caaf3..6de50b80702e 100644 --- a/arch/x86/entry/common.c +++ b/arch/x86/entry/common.c @@ -49,7 +49,7 @@ static __always_inline bool do_syscall_x64(struct pt_regs *regs, int nr) if (likely(unr < NR_syscalls)) { unr = array_index_nospec(unr, NR_syscalls); - regs->ax = sys_call_table[unr](regs); + regs->ax = x64_sys_call(regs, unr); return true; } return false; @@ -66,7 +66,7 @@ static __always_inline bool do_syscall_x32(struct pt_regs *regs, int nr) if (IS_ENABLED(CONFIG_X86_X32_ABI) && likely(xnr < X32_NR_syscalls)) { xnr = array_index_nospec(xnr, X32_NR_syscalls); - regs->ax = x32_sys_call_table[xnr](regs); + regs->ax = x32_sys_call(regs, xnr); return true; } return false; @@ -162,7 +162,7 @@ static __always_inline void do_syscall_32_irqs_on(struct pt_regs *regs, int nr) if (likely(unr < IA32_NR_syscalls)) { unr = array_index_nospec(unr, IA32_NR_syscalls); - regs->ax = ia32_sys_call_table[unr](regs); + regs->ax = ia32_sys_call(regs, unr); } else if (nr != -1) { regs->ax = __ia32_sys_ni_syscall(regs); } @@ -189,7 +189,7 @@ static __always_inline bool int80_is_external(void) } /** - * int80_emulation - 32-bit legacy syscall entry + * do_int80_emulation - 32-bit legacy syscall C entry from asm * * This entry point can be used by 32-bit and 64-bit programs to perform * 32-bit system calls. Instances of INT $0x80 can be found inline in @@ -207,7 +207,7 @@ static __always_inline bool int80_is_external(void) * eax: system call number * ebx, ecx, edx, esi, edi, ebp: arg1 - arg 6 */ -DEFINE_IDTENTRY_RAW(int80_emulation) +__visible noinstr void do_int80_emulation(struct pt_regs *regs) { int nr; diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 8af2a26b24f6..1b5be07f8669 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -116,6 +116,7 @@ SYM_INNER_LABEL(entry_SYSCALL_64_after_hwframe, SYM_L_GLOBAL) /* clobbers %rax, make sure it is after saving the syscall nr */ IBRS_ENTER UNTRAIN_RET + CLEAR_BRANCH_HISTORY call do_syscall_64 /* returns with IRQs disabled */ @@ -1491,3 +1492,63 @@ SYM_CODE_START_NOALIGN(rewind_stack_and_make_dead) call make_task_dead SYM_CODE_END(rewind_stack_and_make_dead) .popsection + +/* + * This sequence executes branches in order to remove user branch information + * from the branch history tracker in the Branch Predictor, therefore removing + * user influence on subsequent BTB lookups. + * + * It should be used on parts prior to Alder Lake. Newer parts should use the + * BHI_DIS_S hardware control instead. If a pre-Alder Lake part is being + * virtualized on newer hardware the VMM should protect against BHI attacks by + * setting BHI_DIS_S for the guests. + * + * CALLs/RETs are necessary to prevent Loop Stream Detector(LSD) from engaging + * and not clearing the branch history. The call tree looks like: + * + * call 1 + * call 2 + * call 2 + * call 2 + * call 2 + * call 2 + * ret + * ret + * ret + * ret + * ret + * ret + * + * This means that the stack is non-constant and ORC can't unwind it with %rsp + * alone. Therefore we unconditionally set up the frame pointer, which allows + * ORC to unwind properly. + * + * The alignment is for performance and not for safety, and may be safely + * refactored in the future if needed. + */ +SYM_FUNC_START(clear_bhb_loop) + push %rbp + mov %rsp, %rbp + movl $5, %ecx + ANNOTATE_INTRA_FUNCTION_CALL + call 1f + jmp 5f + .align 64, 0xcc + ANNOTATE_INTRA_FUNCTION_CALL +1: call 2f + RET + .align 64, 0xcc +2: movl $5, %eax +3: jmp 4f + nop +4: sub $1, %eax + jnz 3b + sub $1, %ecx + jnz 1b + RET +5: lfence + pop %rbp + RET +SYM_FUNC_END(clear_bhb_loop) +EXPORT_SYMBOL_GPL(clear_bhb_loop) +STACK_FRAME_NON_STANDARD(clear_bhb_loop) diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S index eabf48c4d4b4..c779046cc3fe 100644 --- a/arch/x86/entry/entry_64_compat.S +++ b/arch/x86/entry/entry_64_compat.S @@ -92,6 +92,7 @@ SYM_INNER_LABEL(entry_SYSENTER_compat_after_hwframe, SYM_L_GLOBAL) IBRS_ENTER UNTRAIN_RET + CLEAR_BRANCH_HISTORY /* * SYSENTER doesn't filter flags, so we need to clear NT and AC @@ -206,6 +207,7 @@ SYM_INNER_LABEL(entry_SYSCALL_compat_after_hwframe, SYM_L_GLOBAL) IBRS_ENTER UNTRAIN_RET + CLEAR_BRANCH_HISTORY movq %rsp, %rdi call do_fast_syscall_32 @@ -276,3 +278,17 @@ SYM_INNER_LABEL(entry_SYSRETL_compat_end, SYM_L_GLOBAL) ANNOTATE_NOENDBR int3 SYM_CODE_END(entry_SYSCALL_compat) + +/* + * int 0x80 is used by 32 bit mode as a system call entry. Normally idt entries + * point to C routines, however since this is a system call interface the branch + * history needs to be scrubbed to protect against BHI attacks, and that + * scrubbing needs to take place in assembly code prior to entering any C + * routines. + */ +SYM_CODE_START(int80_emulation) + ANNOTATE_NOENDBR + UNWIND_HINT_FUNC + CLEAR_BRANCH_HISTORY + jmp do_int80_emulation +SYM_CODE_END(int80_emulation) diff --git a/arch/x86/entry/syscall_32.c b/arch/x86/entry/syscall_32.c index 8cfc9bc73e7f..c2235bae17ef 100644 --- a/arch/x86/entry/syscall_32.c +++ b/arch/x86/entry/syscall_32.c @@ -18,8 +18,25 @@ #include <asm/syscalls_32.h> #undef __SYSCALL +/* + * The sys_call_table[] is no longer used for system calls, but + * kernel/trace/trace_syscalls.c still wants to know the system + * call address. + */ +#ifdef CONFIG_X86_32 #define __SYSCALL(nr, sym) __ia32_##sym, - -__visible const sys_call_ptr_t ia32_sys_call_table[] = { +const sys_call_ptr_t sys_call_table[] = { #include <asm/syscalls_32.h> }; +#undef __SYSCALL +#endif + +#define __SYSCALL(nr, sym) case nr: return __ia32_##sym(regs); + +long ia32_sys_call(const struct pt_regs *regs, unsigned int nr) +{ + switch (nr) { + #include <asm/syscalls_32.h> + default: return __ia32_sys_ni_syscall(regs); + } +}; diff --git a/arch/x86/entry/syscall_64.c b/arch/x86/entry/syscall_64.c index be120eec1fc9..33b3f09e6f15 100644 --- a/arch/x86/entry/syscall_64.c +++ b/arch/x86/entry/syscall_64.c @@ -11,8 +11,23 @@ #include <asm/syscalls_64.h> #undef __SYSCALL +/* + * The sys_call_table[] is no longer used for system calls, but + * kernel/trace/trace_syscalls.c still wants to know the system + * call address. + */ #define __SYSCALL(nr, sym) __x64_##sym, - -asmlinkage const sys_call_ptr_t sys_call_table[] = { +const sys_call_ptr_t sys_call_table[] = { #include <asm/syscalls_64.h> }; +#undef __SYSCALL + +#define __SYSCALL(nr, sym) case nr: return __x64_##sym(regs); + +long x64_sys_call(const struct pt_regs *regs, unsigned int nr) +{ + switch (nr) { + #include <asm/syscalls_64.h> + default: return __x64_sys_ni_syscall(regs); + } +}; diff --git a/arch/x86/entry/syscall_x32.c b/arch/x86/entry/syscall_x32.c index bdd0e03a1265..03de4a932131 100644 --- a/arch/x86/entry/syscall_x32.c +++ b/arch/x86/entry/syscall_x32.c @@ -11,8 +11,12 @@ #include <asm/syscalls_x32.h> #undef __SYSCALL -#define __SYSCALL(nr, sym) __x64_##sym, +#define __SYSCALL(nr, sym) case nr: return __x64_##sym(regs); -asmlinkage const sys_call_ptr_t x32_sys_call_table[] = { -#include <asm/syscalls_x32.h> +long x32_sys_call(const struct pt_regs *regs, unsigned int nr) +{ + switch (nr) { + #include <asm/syscalls_x32.h> + default: return __x64_sys_ni_syscall(regs); + } }; diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c index 2641ba620f12..e010bfed8417 100644 --- a/arch/x86/events/intel/ds.c +++ b/arch/x86/events/intel/ds.c @@ -1237,11 +1237,11 @@ pebs_update_state(bool needed_cb, struct cpu_hw_events *cpuc, struct pmu *pmu = event->pmu; /* - * Make sure we get updated with the first PEBS - * event. It will trigger also during removal, but - * that does not hurt: + * Make sure we get updated with the first PEBS event. + * During removal, ->pebs_data_cfg is still valid for + * the last PEBS event. Don't clear it. */ - if (cpuc->n_pebs == 1) + if ((cpuc->n_pebs == 1) && add) cpuc->pebs_data_cfg = PEBS_UPDATE_DS_SW; if (needed_cb != pebs_needs_sched_cb(cpuc)) { diff --git a/arch/x86/include/asm/coco.h b/arch/x86/include/asm/coco.h index fb7388bbc212..c086699b0d0c 100644 --- a/arch/x86/include/asm/coco.h +++ b/arch/x86/include/asm/coco.h @@ -22,6 +22,7 @@ static inline void cc_set_mask(u64 mask) u64 cc_mkenc(u64 val); u64 cc_mkdec(u64 val); +void cc_random_init(void); #else #define cc_vendor (CC_VENDOR_NONE) @@ -34,6 +35,7 @@ static inline u64 cc_mkdec(u64 val) { return val; } +static inline void cc_random_init(void) { } #endif #endif /* _ASM_X86_COCO_H */ diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index 42157ddcc09d..686e92d2663e 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h @@ -33,6 +33,8 @@ enum cpuid_leafs CPUID_7_EDX, CPUID_8000_001F_EAX, CPUID_8000_0021_EAX, + CPUID_LNX_5, + NR_CPUID_WORDS, }; #define X86_CAP_FMT_NUM "%d:%d" diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index a38f8f9ba657..3c7434329661 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -461,11 +461,15 @@ /* * Extended auxiliary flags: Linux defined - for features scattered in various - * CPUID levels like 0x80000022, etc. + * CPUID levels like 0x80000022, etc and Linux defined features. * * Reuse free bits when adding new feature flags! */ #define X86_FEATURE_AMD_LBR_PMC_FREEZE (21*32+ 0) /* AMD LBR and PMC Freeze */ +#define X86_FEATURE_CLEAR_BHB_LOOP (21*32+ 1) /* "" Clear branch history at syscall entry using SW loop */ +#define X86_FEATURE_BHI_CTRL (21*32+ 2) /* "" BHI_DIS_S HW control available */ +#define X86_FEATURE_CLEAR_BHB_HW (21*32+ 3) /* "" BHI_DIS_S HW control enabled */ +#define X86_FEATURE_CLEAR_BHB_LOOP_ON_VMEXIT (21*32+ 4) /* "" Clear branch history at vmexit using SW loop */ /* * BUG word(s) @@ -515,4 +519,5 @@ #define X86_BUG_SRSO X86_BUG(1*32 + 0) /* AMD SRSO bug */ #define X86_BUG_DIV0 X86_BUG(1*32 + 1) /* AMD DIV0 speculation bug */ #define X86_BUG_RFDS X86_BUG(1*32 + 2) /* CPU is vulnerable to Register File Data Sampling */ +#define X86_BUG_BHI X86_BUG(1*32 + 3) /* CPU is affected by Branch History Injection */ #endif /* _ASM_X86_CPUFEATURES_H */ diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 05956bd8bacf..e72c2b872957 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -61,10 +61,13 @@ #define SPEC_CTRL_SSBD BIT(SPEC_CTRL_SSBD_SHIFT) /* Speculative Store Bypass Disable */ #define SPEC_CTRL_RRSBA_DIS_S_SHIFT 6 /* Disable RRSBA behavior */ #define SPEC_CTRL_RRSBA_DIS_S BIT(SPEC_CTRL_RRSBA_DIS_S_SHIFT) +#define SPEC_CTRL_BHI_DIS_S_SHIFT 10 /* Disable Branch History Injection behavior */ +#define SPEC_CTRL_BHI_DIS_S BIT(SPEC_CTRL_BHI_DIS_S_SHIFT) /* A mask for bits which the kernel toggles when controlling mitigations */ #define SPEC_CTRL_MITIGATIONS_MASK (SPEC_CTRL_IBRS | SPEC_CTRL_STIBP | SPEC_CTRL_SSBD \ - | SPEC_CTRL_RRSBA_DIS_S) + | SPEC_CTRL_RRSBA_DIS_S \ + | SPEC_CTRL_BHI_DIS_S) #define MSR_IA32_PRED_CMD 0x00000049 /* Prediction Command */ #define PRED_CMD_IBPB BIT(0) /* Indirect Branch Prediction Barrier */ @@ -163,6 +166,10 @@ * are restricted to targets in * kernel. */ +#define ARCH_CAP_BHI_NO BIT(20) /* + * CPU is not affected by Branch + * History Injection. + */ #define ARCH_CAP_PBRSB_NO BIT(24) /* * Not susceptible to Post-Barrier * Return Stack Buffer Predictions. diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index 170c89ed22fc..ff5f1ecc7d1e 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -326,6 +326,19 @@ ALTERNATIVE "", __stringify(verw _ASM_RIP(mds_verw_sel)), X86_FEATURE_CLEAR_CPU_BUF .endm +#ifdef CONFIG_X86_64 +.macro CLEAR_BRANCH_HISTORY + ALTERNATIVE "", "call clear_bhb_loop", X86_FEATURE_CLEAR_BHB_LOOP +.endm + +.macro CLEAR_BRANCH_HISTORY_VMEXIT + ALTERNATIVE "", "call clear_bhb_loop", X86_FEATURE_CLEAR_BHB_LOOP_ON_VMEXIT +.endm +#else +#define CLEAR_BRANCH_HISTORY +#define CLEAR_BRANCH_HISTORY_VMEXIT +#endif + #else /* __ASSEMBLY__ */ #define ANNOTATE_RETPOLINE_SAFE \ @@ -368,6 +381,10 @@ extern void srso_alias_return_thunk(void); extern void entry_untrain_ret(void); extern void entry_ibpb(void); +#ifdef CONFIG_X86_64 +extern void clear_bhb_loop(void); +#endif + extern void (*x86_return_thunk)(void); extern void __warn_thunk(void); diff --git a/arch/x86/include/asm/sev.h b/arch/x86/include/asm/sev.h index 07e125f32528..7f57382afee4 100644 --- a/arch/x86/include/asm/sev.h +++ b/arch/x86/include/asm/sev.h @@ -228,7 +228,6 @@ int snp_issue_guest_request(u64 exit_code, struct snp_req_data *input, struct sn void snp_accept_memory(phys_addr_t start, phys_addr_t end); u64 snp_get_unsupported_features(u64 status); u64 sev_get_status(void); -void kdump_sev_callback(void); void sev_show_status(void); #else static inline void sev_es_ist_enter(struct pt_regs *regs) { } @@ -258,7 +257,6 @@ static inline int snp_issue_guest_request(u64 exit_code, struct snp_req_data *in static inline void snp_accept_memory(phys_addr_t start, phys_addr_t end) { } static inline u64 snp_get_unsupported_features(u64 status) { return 0; } static inline u64 sev_get_status(void) { return 0; } -static inline void kdump_sev_callback(void) { } static inline void sev_show_status(void) { } #endif @@ -270,6 +268,7 @@ int psmash(u64 pfn); int rmp_make_private(u64 pfn, u64 gpa, enum pg_level level, u32 asid, bool immutable); int rmp_make_shared(u64 pfn, enum pg_level level); void snp_leak_pages(u64 pfn, unsigned int npages); +void kdump_sev_callback(void); #else static inline bool snp_probe_rmptable_info(void) { return false; } static inline int snp_lookup_rmpentry(u64 pfn, bool *assigned, int *level) { return -ENODEV; } @@ -282,6 +281,7 @@ static inline int rmp_make_private(u64 pfn, u64 gpa, enum pg_level level, u32 as } static inline int rmp_make_shared(u64 pfn, enum pg_level level) { return -ENODEV; } static inline void snp_leak_pages(u64 pfn, unsigned int npages) {} +static inline void kdump_sev_callback(void) { } #endif #endif diff --git a/arch/x86/include/asm/syscall.h b/arch/x86/include/asm/syscall.h index f44e2f9ab65d..2fc7bc3863ff 100644 --- a/arch/x86/include/asm/syscall.h +++ b/arch/x86/include/asm/syscall.h @@ -16,19 +16,17 @@ #include <asm/thread_info.h> /* for TS_COMPAT */ #include <asm/unistd.h> +/* This is used purely for kernel/trace/trace_syscalls.c */ typedef long (*sys_call_ptr_t)(const struct pt_regs *); extern const sys_call_ptr_t sys_call_table[]; -#if defined(CONFIG_X86_32) -#define ia32_sys_call_table sys_call_table -#else /* * These may not exist, but still put the prototypes in so we * can use IS_ENABLED(). */ -extern const sys_call_ptr_t ia32_sys_call_table[]; -extern const sys_call_ptr_t x32_sys_call_table[]; -#endif +extern long ia32_sys_call(const struct pt_regs *, unsigned int nr); +extern long x32_sys_call(const struct pt_regs *, unsigned int nr); +extern long x64_sys_call(const struct pt_regs *, unsigned int nr); /* * Only the low 32 bits of orig_ax are meaningful, so we return int. @@ -127,6 +125,7 @@ static inline int syscall_get_arch(struct task_struct *task) } bool do_syscall_64(struct pt_regs *regs, int nr); +void do_int80_emulation(struct pt_regs *regs); #endif /* CONFIG_X86_32 */ diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 6d8677e80ddb..9bf17c9c29da 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -345,6 +345,28 @@ static void srat_detect_node(struct cpuinfo_x86 *c) #endif } +static void bsp_determine_snp(struct cpuinfo_x86 *c) +{ +#ifdef CONFIG_ARCH_HAS_CC_PLATFORM + cc_vendor = CC_VENDOR_AMD; + + if (cpu_has(c, X86_FEATURE_SEV_SNP)) { + /* + * RMP table entry format is not architectural and is defined by the + * per-processor PPR. Restrict SNP support on the known CPU models + * for which the RMP table entry format is currently defined for. + */ + if (!cpu_has(c, X86_FEATURE_HYPERVISOR) && + c->x86 >= 0x19 && snp_probe_rmptable_info()) { + cc_platform_set(CC_ATTR_HOST_SEV_SNP); + } else { + setup_clear_cpu_cap(X86_FEATURE_SEV_SNP); + cc_platform_clear(CC_ATTR_HOST_SEV_SNP); + } + } +#endif +} + static void bsp_init_amd(struct cpuinfo_x86 *c) { if (cpu_has(c, X86_FEATURE_CONSTANT_TSC)) { @@ -452,21 +474,7 @@ static void bsp_init_amd(struct cpuinfo_x86 *c) break; } - if (cpu_has(c, X86_FEATURE_SEV_SNP)) { - /* - * RMP table entry format is not architectural and it can vary by processor - * and is defined by the per-processor PPR. Restrict SNP support on the - * known CPU model and family for which the RMP table entry format is - * currently defined for. - */ - if (!boot_cpu_has(X86_FEATURE_ZEN3) && - !boot_cpu_has(X86_FEATURE_ZEN4) && - !boot_cpu_has(X86_FEATURE_ZEN5)) - setup_clear_cpu_cap(X86_FEATURE_SEV_SNP); - else if (!snp_probe_rmptable_info()) - setup_clear_cpu_cap(X86_FEATURE_SEV_SNP); - } - + bsp_determine_snp(c); return; warn: diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index e7ba936d798b..295463707e68 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -1607,6 +1607,79 @@ static void __init spectre_v2_determine_rsb_fill_type_at_vmexit(enum spectre_v2_ dump_stack(); } +/* + * Set BHI_DIS_S to prevent indirect branches in kernel to be influenced by + * branch history in userspace. Not needed if BHI_NO is set. + */ +static bool __init spec_ctrl_bhi_dis(void) +{ + if (!boot_cpu_has(X86_FEATURE_BHI_CTRL)) + return false; + + x86_spec_ctrl_base |= SPEC_CTRL_BHI_DIS_S; + update_spec_ctrl(x86_spec_ctrl_base); + setup_force_cpu_cap(X86_FEATURE_CLEAR_BHB_HW); + + return true; +} + +enum bhi_mitigations { + BHI_MITIGATION_OFF, + BHI_MITIGATION_ON, + BHI_MITIGATION_AUTO, +}; + +static enum bhi_mitigations bhi_mitigation __ro_after_init = + IS_ENABLED(CONFIG_SPECTRE_BHI_ON) ? BHI_MITIGATION_ON : + IS_ENABLED(CONFIG_SPECTRE_BHI_OFF) ? BHI_MITIGATION_OFF : + BHI_MITIGATION_AUTO; + +static int __init spectre_bhi_parse_cmdline(char *str) +{ + if (!str) + return -EINVAL; + + if (!strcmp(str, "off")) + bhi_mitigation = BHI_MITIGATION_OFF; + else if (!strcmp(str, "on")) + bhi_mitigation = BHI_MITIGATION_ON; + else if (!strcmp(str, "auto")) + bhi_mitigation = BHI_MITIGATION_AUTO; + else + pr_err("Ignoring unknown spectre_bhi option (%s)", str); + + return 0; +} +early_param("spectre_bhi", spectre_bhi_parse_cmdline); + +static void __init bhi_select_mitigation(void) +{ + if (bhi_mitigation == BHI_MITIGATION_OFF) + return; + + /* Retpoline mitigates against BHI unless the CPU has RRSBA behavior */ + if (cpu_feature_enabled(X86_FEATURE_RETPOLINE) && + !(x86_read_arch_cap_msr() & ARCH_CAP_RRSBA)) + return; + + if (spec_ctrl_bhi_dis()) + return; + + if (!IS_ENABLED(CONFIG_X86_64)) + return; + + /* Mitigate KVM by default */ + setup_force_cpu_cap(X86_FEATURE_CLEAR_BHB_LOOP_ON_VMEXIT); + pr_info("Spectre BHI mitigation: SW BHB clearing on vm exit\n"); + + if (bhi_mitigation == BHI_MITIGATION_AUTO) + return; + + /* Mitigate syscalls when the mitigation is forced =on */ + setup_force_cpu_cap(X86_FEATURE_CLEAR_BHB_LOOP); + pr_info("Spectre BHI mitigation: SW BHB clearing on syscall\n"); +} + static void __init spectre_v2_select_mitigation(void) { enum spectre_v2_mitigation_cmd cmd = spectre_v2_parse_cmdline(); @@ -1718,6 +1791,9 @@ static void __init spectre_v2_select_mitigation(void) mode == SPECTRE_V2_RETPOLINE) spec_ctrl_disable_kernel_rrsba(); + if (boot_cpu_has(X86_BUG_BHI)) + bhi_select_mitigation(); + spectre_v2_enabled = mode; pr_info("%s\n", spectre_v2_strings[mode]); @@ -2695,15 +2771,15 @@ static char *stibp_state(void) switch (spectre_v2_user_stibp) { case SPECTRE_V2_USER_NONE: - return ", STIBP: disabled"; + return "; STIBP: disabled"; case SPECTRE_V2_USER_STRICT: - return ", STIBP: forced"; + return "; STIBP: forced"; case SPECTRE_V2_USER_STRICT_PREFERRED: - return ", STIBP: always-on"; + return "; STIBP: always-on"; case SPECTRE_V2_USER_PRCTL: case SPECTRE_V2_USER_SECCOMP: if (static_key_enabled(&switch_to_cond_stibp)) - return ", STIBP: conditional"; + return "; STIBP: conditional"; } return ""; } @@ -2712,10 +2788,10 @@ static char *ibpb_state(void) { if (boot_cpu_has(X86_FEATURE_IBPB)) { if (static_key_enabled(&switch_mm_always_ibpb)) - return ", IBPB: always-on"; + return "; IBPB: always-on"; if (static_key_enabled(&switch_mm_cond_ibpb)) - return ", IBPB: conditional"; - return ", IBPB: disabled"; + return "; IBPB: conditional"; + return "; IBPB: disabled"; } return ""; } @@ -2725,14 +2801,31 @@ static char *pbrsb_eibrs_state(void) if (boot_cpu_has_bug(X86_BUG_EIBRS_PBRSB)) { if (boot_cpu_has(X86_FEATURE_RSB_VMEXIT_LITE) || boot_cpu_has(X86_FEATURE_RSB_VMEXIT)) - return ", PBRSB-eIBRS: SW sequence"; + return "; PBRSB-eIBRS: SW sequence"; else - return ", PBRSB-eIBRS: Vulnerable"; + return "; PBRSB-eIBRS: Vulnerable"; } else { - return ", PBRSB-eIBRS: Not affected"; + return "; PBRSB-eIBRS: Not affected"; } } +static const char * const spectre_bhi_state(void) +{ + if (!boot_cpu_has_bug(X86_BUG_BHI)) + return "; BHI: Not affected"; + else if (boot_cpu_has(X86_FEATURE_CLEAR_BHB_HW)) + return "; BHI: BHI_DIS_S"; + else if (boot_cpu_has(X86_FEATURE_CLEAR_BHB_LOOP)) + return "; BHI: SW loop, KVM: SW loop"; + else if (boot_cpu_has(X86_FEATURE_RETPOLINE) && + !(x86_read_arch_cap_msr() & ARCH_CAP_RRSBA)) + return "; BHI: Retpoline"; + else if (boot_cpu_has(X86_FEATURE_CLEAR_BHB_LOOP_ON_VMEXIT)) + return "; BHI: Syscall hardening, KVM: SW loop"; + + return "; BHI: Vulnerable (Syscall hardening enabled)"; +} + static ssize_t spectre_v2_show_state(char *buf) { if (spectre_v2_enabled == SPECTRE_V2_LFENCE) @@ -2745,13 +2838,15 @@ static ssize_t spectre_v2_show_state(char *buf) spectre_v2_enabled == SPECTRE_V2_EIBRS_LFENCE) return sysfs_emit(buf, "Vulnerable: eIBRS+LFENCE with unprivileged eBPF and SMT\n"); - return sysfs_emit(buf, "%s%s%s%s%s%s%s\n", + return sysfs_emit(buf, "%s%s%s%s%s%s%s%s\n", spectre_v2_strings[spectre_v2_enabled], ibpb_state(), - boot_cpu_has(X86_FEATURE_USE_IBRS_FW) ? ", IBRS_FW" : "", + boot_cpu_has(X86_FEATURE_USE_IBRS_FW) ? "; IBRS_FW" : "", stibp_state(), - boot_cpu_has(X86_FEATURE_RSB_CTXSW) ? ", RSB filling" : "", + boot_cpu_has(X86_FEATURE_RSB_CTXSW) ? "; RSB filling" : "", pbrsb_eibrs_state(), + spectre_bhi_state(), + /* this should always be at the end */ spectre_v2_module_string()); } diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 5c1e6d6be267..754d91857d63 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1120,6 +1120,7 @@ static void identify_cpu_without_cpuid(struct cpuinfo_x86 *c) #define NO_SPECTRE_V2 BIT(8) #define NO_MMIO BIT(9) #define NO_EIBRS_PBRSB BIT(10) +#define NO_BHI BIT(11) #define VULNWL(vendor, family, model, whitelist) \ X86_MATCH_VENDOR_FAM_MODEL(vendor, family, model, whitelist) @@ -1182,18 +1183,18 @@ static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = { VULNWL_INTEL(ATOM_TREMONT_D, NO_ITLB_MULTIHIT | NO_EIBRS_PBRSB), /* AMD Family 0xf - 0x12 */ - VULNWL_AMD(0x0f, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO), - VULNWL_AMD(0x10, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO), - VULNWL_AMD(0x11, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO), - VULNWL_AMD(0x12, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO), + VULNWL_AMD(0x0f, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO | NO_BHI), + VULNWL_AMD(0x10, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO | NO_BHI), + VULNWL_AMD(0x11, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO | NO_BHI), + VULNWL_AMD(0x12, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO | NO_BHI), /* FAMILY_ANY must be last, otherwise 0x0f - 0x12 matches won't work */ - VULNWL_AMD(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO | NO_EIBRS_PBRSB), - VULNWL_HYGON(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO | NO_EIBRS_PBRSB), + VULNWL_AMD(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO | NO_EIBRS_PBRSB | NO_BHI), + VULNWL_HYGON(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO | NO_EIBRS_PBRSB | NO_BHI), /* Zhaoxin Family 7 */ - VULNWL(CENTAUR, 7, X86_MODEL_ANY, NO_SPECTRE_V2 | NO_SWAPGS | NO_MMIO), - VULNWL(ZHAOXIN, 7, X86_MODEL_ANY, NO_SPECTRE_V2 | NO_SWAPGS | NO_MMIO), + VULNWL(CENTAUR, 7, X86_MODEL_ANY, NO_SPECTRE_V2 | NO_SWAPGS | NO_MMIO | NO_BHI), + VULNWL(ZHAOXIN, 7, X86_MODEL_ANY, NO_SPECTRE_V2 | NO_SWAPGS | NO_MMIO | NO_BHI), {} }; @@ -1435,6 +1436,13 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) if (vulnerable_to_rfds(ia32_cap)) setup_force_cpu_bug(X86_BUG_RFDS); + /* When virtualized, eIBRS could be hidden, assume vulnerable */ + if (!(ia32_cap & ARCH_CAP_BHI_NO) && + !cpu_matches(cpu_vuln_whitelist, NO_BHI) && + (boot_cpu_has(X86_FEATURE_IBRS_ENHANCED) || + boot_cpu_has(X86_FEATURE_HYPERVISOR))) + setup_force_cpu_bug(X86_BUG_BHI); + if (cpu_matches(cpu_vuln_whitelist, NO_MELTDOWN)) return; diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c index b5cc557cfc37..84d41be6d06b 100644 --- a/arch/x86/kernel/cpu/mce/core.c +++ b/arch/x86/kernel/cpu/mce/core.c @@ -2500,12 +2500,14 @@ static ssize_t set_bank(struct device *s, struct device_attribute *attr, return -EINVAL; b = &per_cpu(mce_banks_array, s->id)[bank]; - if (!b->init) return -ENODEV; b->ctl = new; + + mutex_lock(&mce_sysfs_mutex); mce_restart(); + mutex_unlock(&mce_sysfs_mutex); return size; } diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c index 422a4ddc2ab7..7b29ebda024f 100644 --- a/arch/x86/kernel/cpu/mtrr/generic.c +++ b/arch/x86/kernel/cpu/mtrr/generic.c @@ -108,7 +108,7 @@ static inline void k8_check_syscfg_dram_mod_en(void) (boot_cpu_data.x86 >= 0x0f))) return; - if (cpu_feature_enabled(X86_FEATURE_SEV_SNP)) + if (cc_platform_has(CC_ATTR_HOST_SEV_SNP)) return; rdmsr(MSR_AMD64_SYSCFG, lo, hi); diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h index c99f26ebe7a6..1a8687f8073a 100644 --- a/arch/x86/kernel/cpu/resctrl/internal.h +++ b/arch/x86/kernel/cpu/resctrl/internal.h @@ -78,7 +78,8 @@ cpumask_any_housekeeping(const struct cpumask *mask, int exclude_cpu) else cpu = cpumask_any_but(mask, exclude_cpu); - if (!IS_ENABLED(CONFIG_NO_HZ_FULL)) + /* Only continue if tick_nohz_full_mask has been initialized. */ + if (!tick_nohz_full_enabled()) return cpu; /* If the CPU picked isn't marked nohz_full nothing more needs doing. */ diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c index a515328d9d7d..af5aa2c754c2 100644 --- a/arch/x86/kernel/cpu/scattered.c +++ b/arch/x86/kernel/cpu/scattered.c @@ -28,6 +28,7 @@ static const struct cpuid_bit cpuid_bits[] = { { X86_FEATURE_EPB, CPUID_ECX, 3, 0x00000006, 0 }, { X86_FEATURE_INTEL_PPIN, CPUID_EBX, 0, 0x00000007, 1 }, { X86_FEATURE_RRSBA_CTRL, CPUID_EDX, 2, 0x00000007, 2 }, + { X86_FEATURE_BHI_CTRL, CPUID_EDX, 4, 0x00000007, 2 }, { X86_FEATURE_CQM_LLC, CPUID_EDX, 1, 0x0000000f, 0 }, { X86_FEATURE_CQM_OCCUP_LLC, CPUID_EDX, 0, 0x0000000f, 1 }, { X86_FEATURE_CQM_MBM_TOTAL, CPUID_EDX, 1, 0x0000000f, 1 }, diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 0109e6c510e0..e125e059e2c4 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -35,6 +35,7 @@ #include <asm/bios_ebda.h> #include <asm/bugs.h> #include <asm/cacheinfo.h> +#include <asm/coco.h> #include <asm/cpu.h> #include <asm/efi.h> #include <asm/gart.h> @@ -991,6 +992,7 @@ void __init setup_arch(char **cmdline_p) * memory size. */ mem_encrypt_setup_arch(); + cc_random_init(); efi_fake_memmap(); efi_find_mirror(); diff --git a/arch/x86/kernel/sev.c b/arch/x86/kernel/sev.c index 7e1e63cc48e6..38ad066179d8 100644 --- a/arch/x86/kernel/sev.c +++ b/arch/x86/kernel/sev.c @@ -2284,16 +2284,6 @@ static int __init snp_init_platform_device(void) } device_initcall(snp_init_platform_device); -void kdump_sev_callback(void) -{ - /* - * Do wbinvd() on remote CPUs when SNP is enabled in order to - * safely do SNP_SHUTDOWN on the local CPU. - */ - if (cpu_feature_enabled(X86_FEATURE_SEV_SNP)) - wbinvd(); -} - void sev_show_status(void) { int i; diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig index 3aaf7e86a859..0ebdd088f28b 100644 --- a/arch/x86/kvm/Kconfig +++ b/arch/x86/kvm/Kconfig @@ -122,6 +122,7 @@ config KVM_AMD_SEV default y depends on KVM_AMD && X86_64 depends on CRYPTO_DEV_SP_PSP && !(KVM_AMD=y && CRYPTO_DEV_CCP_DD=m) + select ARCH_HAS_CC_PLATFORM help Provides support for launching Encrypted VMs (SEV) and Encrypted VMs with Encrypted State (SEV-ES) on AMD processors. diff --git a/arch/x86/kvm/reverse_cpuid.h b/arch/x86/kvm/reverse_cpuid.h index aadefcaa9561..2f4e155080ba 100644 --- a/arch/x86/kvm/reverse_cpuid.h +++ b/arch/x86/kvm/reverse_cpuid.h @@ -52,7 +52,7 @@ enum kvm_only_cpuid_leafs { #define X86_FEATURE_IPRED_CTRL KVM_X86_FEATURE(CPUID_7_2_EDX, 1) #define KVM_X86_FEATURE_RRSBA_CTRL KVM_X86_FEATURE(CPUID_7_2_EDX, 2) #define X86_FEATURE_DDPD_U KVM_X86_FEATURE(CPUID_7_2_EDX, 3) -#define X86_FEATURE_BHI_CTRL KVM_X86_FEATURE(CPUID_7_2_EDX, 4) +#define KVM_X86_FEATURE_BHI_CTRL KVM_X86_FEATURE(CPUID_7_2_EDX, 4) #define X86_FEATURE_MCDT_NO KVM_X86_FEATURE(CPUID_7_2_EDX, 5) /* CPUID level 0x80000007 (EDX). */ @@ -102,10 +102,12 @@ static const struct cpuid_reg reverse_cpuid[] = { */ static __always_inline void reverse_cpuid_check(unsigned int x86_leaf) { + BUILD_BUG_ON(NR_CPUID_WORDS != NCAPINTS); BUILD_BUG_ON(x86_leaf == CPUID_LNX_1); BUILD_BUG_ON(x86_leaf == CPUID_LNX_2); BUILD_BUG_ON(x86_leaf == CPUID_LNX_3); BUILD_BUG_ON(x86_leaf == CPUID_LNX_4); + BUILD_BUG_ON(x86_leaf == CPUID_LNX_5); BUILD_BUG_ON(x86_leaf >= ARRAY_SIZE(reverse_cpuid)); BUILD_BUG_ON(reverse_cpuid[x86_leaf].function == 0); } @@ -126,6 +128,7 @@ static __always_inline u32 __feature_translate(int x86_feature) KVM_X86_TRANSLATE_FEATURE(CONSTANT_TSC); KVM_X86_TRANSLATE_FEATURE(PERFMON_V2); KVM_X86_TRANSLATE_FEATURE(RRSBA_CTRL); + KVM_X86_TRANSLATE_FEATURE(BHI_CTRL); default: return x86_feature; } diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index e5a4d9b0e79f..61a7531d41b0 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -3184,7 +3184,7 @@ struct page *snp_safe_alloc_page(struct kvm_vcpu *vcpu) unsigned long pfn; struct page *p; - if (!cpu_feature_enabled(X86_FEATURE_SEV_SNP)) + if (!cc_platform_has(CC_ATTR_HOST_SEV_SNP)) return alloc_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO); /* diff --git a/arch/x86/kvm/vmx/vmenter.S b/arch/x86/kvm/vmx/vmenter.S index 2bfbf758d061..f6986dee6f8c 100644 --- a/arch/x86/kvm/vmx/vmenter.S +++ b/arch/x86/kvm/vmx/vmenter.S @@ -275,6 +275,8 @@ SYM_INNER_LABEL_ALIGN(vmx_vmexit, SYM_L_GLOBAL) call vmx_spec_ctrl_restore_host + CLEAR_BRANCH_HISTORY_VMEXIT + /* Put return value in AX */ mov %_ASM_BX, %_ASM_AX diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 47d9f03b7778..984ea2089efc 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1621,7 +1621,7 @@ static bool kvm_is_immutable_feature_msr(u32 msr) ARCH_CAP_PSCHANGE_MC_NO | ARCH_CAP_TSX_CTRL_MSR | ARCH_CAP_TAA_NO | \ ARCH_CAP_SBDR_SSDP_NO | ARCH_CAP_FBSDP_NO | ARCH_CAP_PSDP_NO | \ ARCH_CAP_FB_CLEAR | ARCH_CAP_RRSBA | ARCH_CAP_PBRSB_NO | ARCH_CAP_GDS_NO | \ - ARCH_CAP_RFDS_NO | ARCH_CAP_RFDS_CLEAR) + ARCH_CAP_RFDS_NO | ARCH_CAP_RFDS_CLEAR | ARCH_CAP_BHI_NO) static u64 kvm_get_arch_capabilities(void) { diff --git a/arch/x86/lib/retpoline.S b/arch/x86/lib/retpoline.S index 0795b3464058..e674ccf720b9 100644 --- a/arch/x86/lib/retpoline.S +++ b/arch/x86/lib/retpoline.S @@ -229,6 +229,7 @@ SYM_CODE_END(srso_return_thunk) /* Dummy for the alternative in CALL_UNTRAIN_RET. */ SYM_CODE_START(srso_alias_untrain_ret) ANNOTATE_UNRET_SAFE + ANNOTATE_NOENDBR ret int3 SYM_FUNC_END(srso_alias_untrain_ret) diff --git a/arch/x86/mm/numa_32.c b/arch/x86/mm/numa_32.c index 104544359d69..025fd7ea5d69 100644 --- a/arch/x86/mm/numa_32.c +++ b/arch/x86/mm/numa_32.c @@ -24,6 +24,7 @@ #include <linux/memblock.h> #include <linux/init.h> +#include <asm/pgtable_areas.h> #include "numa_internal.h" diff --git a/arch/x86/mm/pat/memtype.c b/arch/x86/mm/pat/memtype.c index 0d72183b5dd0..36b603d0cdde 100644 --- a/arch/x86/mm/pat/memtype.c +++ b/arch/x86/mm/pat/memtype.c @@ -947,6 +947,38 @@ static void free_pfn_range(u64 paddr, unsigned long size) memtype_free(paddr, paddr + size); } +static int get_pat_info(struct vm_area_struct *vma, resource_size_t *paddr, + pgprot_t *pgprot) +{ + unsigned long prot; + + VM_WARN_ON_ONCE(!(vma->vm_flags & VM_PAT)); + + /* + * We need the starting PFN and cachemode used for track_pfn_remap() + * that covered the whole VMA. For most mappings, we can obtain that + * information from the page tables. For COW mappings, we might now + * suddenly have anon folios mapped and follow_phys() will fail. + * + * Fallback to using vma->vm_pgoff, see remap_pfn_range_notrack(), to + * detect the PFN. If we need the cachemode as well, we're out of luck + * for now and have to fail fork(). + */ + if (!follow_phys(vma, vma->vm_start, 0, &prot, paddr)) { + if (pgprot) + *pgprot = __pgprot(prot); + return 0; + } + if (is_cow_mapping(vma->vm_flags)) { + if (pgprot) + return -EINVAL; + *paddr = (resource_size_t)vma->vm_pgoff << PAGE_SHIFT; + return 0; + } + WARN_ON_ONCE(1); + return -EINVAL; +} + /* * track_pfn_copy is called when vma that is covering the pfnmap gets * copied through copy_page_range(). @@ -957,20 +989,13 @@ static void free_pfn_range(u64 paddr, unsigned long size) int track_pfn_copy(struct vm_area_struct *vma) { resource_size_t paddr; - unsigned long prot; unsigned long vma_size = vma->vm_end - vma->vm_start; pgprot_t pgprot; if (vma->vm_flags & VM_PAT) { - /* - * reserve the whole chunk covered by vma. We need the - * starting address and protection from pte. - */ - if (follow_phys(vma, vma->vm_start, 0, &prot, &paddr)) { - WARN_ON_ONCE(1); + if (get_pat_info(vma, &paddr, &pgprot)) return -EINVAL; - } - pgprot = __pgprot(prot); + /* reserve the whole chunk covered by vma. */ return reserve_pfn_range(paddr, vma_size, &pgprot, 1); } @@ -1045,7 +1070,6 @@ void untrack_pfn(struct vm_area_struct *vma, unsigned long pfn, unsigned long size, bool mm_wr_locked) { resource_size_t paddr; - unsigned long prot; if (vma && !(vma->vm_flags & VM_PAT)) return; @@ -1053,11 +1077,8 @@ void untrack_pfn(struct vm_area_struct *vma, unsigned long pfn, /* free the chunk starting from pfn or the whole chunk */ paddr = (resource_size_t)pfn << PAGE_SHIFT; if (!paddr && !size) { - if (follow_phys(vma, vma->vm_start, 0, &prot, &paddr)) { - WARN_ON_ONCE(1); + if (get_pat_info(vma, &paddr, NULL)) return; - } - size = vma->vm_end - vma->vm_start; } free_pfn_range(paddr, size); diff --git a/arch/x86/virt/svm/sev.c b/arch/x86/virt/svm/sev.c index cffe1157a90a..ab0e8448bb6e 100644 --- a/arch/x86/virt/svm/sev.c +++ b/arch/x86/virt/svm/sev.c @@ -77,7 +77,7 @@ static int __mfd_enable(unsigned int cpu) { u64 val; - if (!cpu_feature_enabled(X86_FEATURE_SEV_SNP)) + if (!cc_platform_has(CC_ATTR_HOST_SEV_SNP)) return 0; rdmsrl(MSR_AMD64_SYSCFG, val); @@ -98,7 +98,7 @@ static int __snp_enable(unsigned int cpu) { u64 val; - if (!cpu_feature_enabled(X86_FEATURE_SEV_SNP)) + if (!cc_platform_has(CC_ATTR_HOST_SEV_SNP)) return 0; rdmsrl(MSR_AMD64_SYSCFG, val); @@ -174,11 +174,11 @@ static int __init snp_rmptable_init(void) u64 rmptable_size; u64 val; - if (!cpu_feature_enabled(X86_FEATURE_SEV_SNP)) + if (!cc_platform_has(CC_ATTR_HOST_SEV_SNP)) return 0; if (!amd_iommu_snp_en) - return 0; + goto nosnp; if (!probed_rmp_size) goto nosnp; @@ -225,7 +225,7 @@ skip_enable: return 0; nosnp: - setup_clear_cpu_cap(X86_FEATURE_SEV_SNP); + cc_platform_clear(CC_ATTR_HOST_SEV_SNP); return -ENOSYS; } @@ -246,7 +246,7 @@ static struct rmpentry *__snp_lookup_rmpentry(u64 pfn, int *level) { struct rmpentry *large_entry, *entry; - if (!cpu_feature_enabled(X86_FEATURE_SEV_SNP)) + if (!cc_platform_has(CC_ATTR_HOST_SEV_SNP)) return ERR_PTR(-ENODEV); entry = get_rmpentry(pfn); @@ -363,7 +363,7 @@ int psmash(u64 pfn) unsigned long paddr = pfn << PAGE_SHIFT; int ret; - if (!cpu_feature_enabled(X86_FEATURE_SEV_SNP)) + if (!cc_platform_has(CC_ATTR_HOST_SEV_SNP)) return -ENODEV; if (!pfn_valid(pfn)) @@ -472,7 +472,7 @@ static int rmpupdate(u64 pfn, struct rmp_state *state) unsigned long paddr = pfn << PAGE_SHIFT; int ret, level; - if (!cpu_feature_enabled(X86_FEATURE_SEV_SNP)) + if (!cc_platform_has(CC_ATTR_HOST_SEV_SNP)) return -ENODEV; level = RMP_TO_PG_LEVEL(state->pagesize); @@ -558,3 +558,13 @@ void snp_leak_pages(u64 pfn, unsigned int npages) spin_unlock(&snp_leaked_pages_list_lock); } EXPORT_SYMBOL_GPL(snp_leak_pages); + +void kdump_sev_callback(void) +{ + /* + * Do wbinvd() on remote CPUs when SNP is enabled in order to + * safely do SNP_SHUTDOWN on the local CPU. + */ + if (cc_platform_has(CC_ATTR_HOST_SEV_SNP)) + wbinvd(); +} |