summaryrefslogtreecommitdiff
path: root/arch/x86
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/Kconfig22
-rw-r--r--arch/x86/boot/compressed/mem.c5
-rw-r--r--arch/x86/boot/compressed/sev.c40
-rw-r--r--arch/x86/boot/compressed/sev.h2
-rw-r--r--arch/x86/boot/genimage.sh5
-rw-r--r--arch/x86/crypto/Kconfig11
-rw-r--r--arch/x86/entry/entry.S12
-rw-r--r--arch/x86/entry/entry_64.S20
-rw-r--r--arch/x86/events/amd/ibs.c20
-rw-r--r--arch/x86/events/amd/uncore.c36
-rw-r--r--arch/x86/events/core.c4
-rw-r--r--arch/x86/events/intel/core.c2
-rw-r--r--arch/x86/events/intel/ds.c9
-rw-r--r--arch/x86/events/perf_event.h9
-rw-r--r--arch/x86/include/asm/alternative.h32
-rw-r--r--arch/x86/include/asm/bug.h5
-rw-r--r--arch/x86/include/asm/cfi.h11
-rw-r--r--arch/x86/include/asm/cpu.h12
-rw-r--r--arch/x86/include/asm/cpufeatures.h10
-rw-r--r--arch/x86/include/asm/ibt.h4
-rw-r--r--arch/x86/include/asm/intel-family.h3
-rw-r--r--arch/x86/include/asm/irqflags.h4
-rw-r--r--arch/x86/include/asm/microcode.h2
-rw-r--r--arch/x86/include/asm/msr-index.h8
-rw-r--r--arch/x86/include/asm/mwait.h37
-rw-r--r--arch/x86/include/asm/nmi.h2
-rw-r--r--arch/x86/include/asm/nospec-branch.h75
-rw-r--r--arch/x86/include/asm/percpu.h28
-rw-r--r--arch/x86/include/asm/perf_event.h1
-rw-r--r--arch/x86/include/asm/sev-common.h2
-rw-r--r--arch/x86/include/asm/sighandling.h22
-rw-r--r--arch/x86/include/asm/tdx.h2
-rw-r--r--arch/x86/include/uapi/asm/bootparam.h4
-rw-r--r--arch/x86/include/uapi/asm/debugreg.h21
-rw-r--r--arch/x86/include/uapi/asm/e820.h4
-rw-r--r--arch/x86/include/uapi/asm/ldt.h4
-rw-r--r--arch/x86/include/uapi/asm/msr.h4
-rw-r--r--arch/x86/include/uapi/asm/ptrace-abi.h6
-rw-r--r--arch/x86/include/uapi/asm/ptrace.h4
-rw-r--r--arch/x86/include/uapi/asm/setup_data.h4
-rw-r--r--arch/x86/include/uapi/asm/signal.h8
-rw-r--r--arch/x86/kernel/alternative.c238
-rw-r--r--arch/x86/kernel/cfi.c22
-rw-r--r--arch/x86/kernel/cpu/amd.c60
-rw-r--r--arch/x86/kernel/cpu/bugs.c349
-rw-r--r--arch/x86/kernel/cpu/common.c127
-rw-r--r--arch/x86/kernel/cpu/microcode/amd.c18
-rw-r--r--arch/x86/kernel/cpu/microcode/amd_shas.c112
-rw-r--r--arch/x86/kernel/cpu/microcode/core.c60
-rw-r--r--arch/x86/kernel/cpu/microcode/intel.c4
-rw-r--r--arch/x86/kernel/cpu/microcode/internal.h1
-rw-r--r--arch/x86/kernel/cpu/mtrr/generic.c2
-rw-r--r--arch/x86/kernel/cpu/scattered.c2
-rw-r--r--arch/x86/kernel/cpu/sgx/main.c2
-rw-r--r--arch/x86/kernel/fpu/signal.c11
-rw-r--r--arch/x86/kernel/fpu/xstate.h22
-rw-r--r--arch/x86/kernel/ftrace.c2
-rw-r--r--arch/x86/kernel/head32.c4
-rw-r--r--arch/x86/kernel/i8253.c3
-rw-r--r--arch/x86/kernel/ioport.c13
-rw-r--r--arch/x86/kernel/irq.c2
-rw-r--r--arch/x86/kernel/module.c6
-rw-r--r--arch/x86/kernel/nmi.c42
-rw-r--r--arch/x86/kernel/process.c31
-rw-r--r--arch/x86/kernel/reboot.c10
-rw-r--r--arch/x86/kernel/signal_32.c4
-rw-r--r--arch/x86/kernel/signal_64.c4
-rw-r--r--arch/x86/kernel/smpboot.c6
-rw-r--r--arch/x86/kernel/static_call.c4
-rw-r--r--arch/x86/kernel/traps.c116
-rw-r--r--arch/x86/kernel/vmlinux.lds.S10
-rw-r--r--arch/x86/kvm/cpuid.c8
-rw-r--r--arch/x86/kvm/mmu/mmu.c75
-rw-r--r--arch/x86/kvm/reverse_cpuid.h8
-rw-r--r--arch/x86/kvm/smm.c1
-rw-r--r--arch/x86/kvm/svm/avic.c66
-rw-r--r--arch/x86/kvm/svm/svm.c6
-rw-r--r--arch/x86/kvm/svm/vmenter.S6
-rw-r--r--arch/x86/kvm/vmx/posted_intr.c28
-rw-r--r--arch/x86/kvm/vmx/vmx.c7
-rw-r--r--arch/x86/kvm/x86.c24
-rw-r--r--arch/x86/lib/retpoline.S39
-rw-r--r--arch/x86/lib/x86-opcode-map.txt54
-rw-r--r--arch/x86/mm/init.c9
-rw-r--r--arch/x86/mm/init_64.c15
-rw-r--r--arch/x86/mm/kaslr.c10
-rw-r--r--arch/x86/mm/tlb.c29
-rw-r--r--arch/x86/net/bpf_jit_comp.c58
-rw-r--r--arch/x86/pci/xen.c8
-rw-r--r--arch/x86/power/cpu.c14
-rw-r--r--arch/x86/tools/insn_decoder_test.c5
-rw-r--r--arch/x86/um/asm/checksum.h3
-rw-r--r--arch/x86/um/os-Linux/mcontext.c3
-rw-r--r--arch/x86/virt/vmx/tdx/tdx.c5
-rw-r--r--arch/x86/xen/enlighten_pvh.c19
95 files changed, 1855 insertions, 448 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index e54da3b4d334..dfa334e3d1a0 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -2420,6 +2420,7 @@ config STRICT_SIGALTSTACK_SIZE
config CFI_AUTO_DEFAULT
bool "Attempt to use FineIBT by default at boot time"
depends on FINEIBT
+ depends on !RUST || RUSTC_VERSION >= 108800
default y
help
Attempt to use FineIBT by default at boot time. If enabled,
@@ -2747,6 +2748,27 @@ config MITIGATION_SSB
of speculative execution in a similar way to the Meltdown and Spectre
security vulnerabilities.
+config MITIGATION_ITS
+ bool "Enable Indirect Target Selection mitigation"
+ depends on CPU_SUP_INTEL && X86_64
+ depends on MITIGATION_RETPOLINE && MITIGATION_RETHUNK
+ select EXECMEM
+ default y
+ help
+ Enable Indirect Target Selection (ITS) mitigation. ITS is a bug in
+ BPU on some Intel CPUs that may allow Spectre V2 style attacks. If
+ disabled, mitigation cannot be enabled via cmdline.
+ See <file:Documentation/admin-guide/hw-vuln/indirect-target-selection.rst>
+
+config MITIGATION_TSA
+ bool "Mitigate Transient Scheduler Attacks"
+ depends on CPU_SUP_AMD
+ default y
+ help
+ Enable mitigation for Transient Scheduler Attacks. TSA is a hardware
+ security vulnerability on AMD CPUs which can lead to forwarding of
+ invalid info to subsequent instructions and thus can affect their
+ timing and thereby cause a leakage.
endif
config ARCH_HAS_ADD_PAGES
diff --git a/arch/x86/boot/compressed/mem.c b/arch/x86/boot/compressed/mem.c
index f676156d9f3d..0e9f84ab4bdc 100644
--- a/arch/x86/boot/compressed/mem.c
+++ b/arch/x86/boot/compressed/mem.c
@@ -34,14 +34,11 @@ static bool early_is_tdx_guest(void)
void arch_accept_memory(phys_addr_t start, phys_addr_t end)
{
- static bool sevsnp;
-
/* Platform-specific memory-acceptance call goes here */
if (early_is_tdx_guest()) {
if (!tdx_accept_memory(start, end))
panic("TDX: Failed to accept memory\n");
- } else if (sevsnp || (sev_get_status() & MSR_AMD64_SEV_SNP_ENABLED)) {
- sevsnp = true;
+ } else if (early_is_sevsnp_guest()) {
snp_accept_memory(start, end);
} else {
error("Cannot accept memory: unknown platform\n");
diff --git a/arch/x86/boot/compressed/sev.c b/arch/x86/boot/compressed/sev.c
index f49f7eef1dba..a93e36338866 100644
--- a/arch/x86/boot/compressed/sev.c
+++ b/arch/x86/boot/compressed/sev.c
@@ -644,3 +644,43 @@ void sev_prep_identity_maps(unsigned long top_level_pgt)
sev_verify_cbit(top_level_pgt);
}
+
+bool early_is_sevsnp_guest(void)
+{
+ static bool sevsnp;
+
+ if (sevsnp)
+ return true;
+
+ if (!(sev_get_status() & MSR_AMD64_SEV_SNP_ENABLED))
+ return false;
+
+ sevsnp = true;
+
+ if (!snp_vmpl) {
+ unsigned int eax, ebx, ecx, edx;
+
+ /*
+ * CPUID Fn8000_001F_EAX[28] - SVSM support
+ */
+ eax = 0x8000001f;
+ ecx = 0;
+ native_cpuid(&eax, &ebx, &ecx, &edx);
+ if (eax & BIT(28)) {
+ struct msr m;
+
+ /* Obtain the address of the calling area to use */
+ boot_rdmsr(MSR_SVSM_CAA, &m);
+ boot_svsm_caa = (void *)m.q;
+ boot_svsm_caa_pa = m.q;
+
+ /*
+ * The real VMPL level cannot be discovered, but the
+ * memory acceptance routines make no use of that so
+ * any non-zero value suffices here.
+ */
+ snp_vmpl = U8_MAX;
+ }
+ }
+ return true;
+}
diff --git a/arch/x86/boot/compressed/sev.h b/arch/x86/boot/compressed/sev.h
index 4e463f33186d..d3900384b8ab 100644
--- a/arch/x86/boot/compressed/sev.h
+++ b/arch/x86/boot/compressed/sev.h
@@ -13,12 +13,14 @@
bool sev_snp_enabled(void);
void snp_accept_memory(phys_addr_t start, phys_addr_t end);
u64 sev_get_status(void);
+bool early_is_sevsnp_guest(void);
#else
static inline bool sev_snp_enabled(void) { return false; }
static inline void snp_accept_memory(phys_addr_t start, phys_addr_t end) { }
static inline u64 sev_get_status(void) { return 0; }
+static inline bool early_is_sevsnp_guest(void) { return false; }
#endif
diff --git a/arch/x86/boot/genimage.sh b/arch/x86/boot/genimage.sh
index c9299aeb7333..3882ead513f7 100644
--- a/arch/x86/boot/genimage.sh
+++ b/arch/x86/boot/genimage.sh
@@ -22,6 +22,7 @@
# This script requires:
# bash
# syslinux
+# genisoimage
# mtools (for fdimage* and hdimage)
# edk2/OVMF (for hdimage)
#
@@ -251,7 +252,9 @@ geniso() {
cp "$isolinux" "$ldlinux" "$tmp_dir"
cp "$FBZIMAGE" "$tmp_dir"/linux
echo default linux "$KCMDLINE" > "$tmp_dir"/isolinux.cfg
- cp "${FDINITRDS[@]}" "$tmp_dir"/
+ if [ ${#FDINITRDS[@]} -gt 0 ]; then
+ cp "${FDINITRDS[@]}" "$tmp_dir"/
+ fi
genisoimage -J -r -appid 'LINUX_BOOT' -input-charset=utf-8 \
-quiet -o "$FIMAGE" -b isolinux.bin \
-c boot.cat -no-emul-boot -boot-load-size 4 \
diff --git a/arch/x86/crypto/Kconfig b/arch/x86/crypto/Kconfig
index 7b1bebed879d..46b53ab06165 100644
--- a/arch/x86/crypto/Kconfig
+++ b/arch/x86/crypto/Kconfig
@@ -3,10 +3,12 @@
menu "Accelerated Cryptographic Algorithms for CPU (x86)"
config CRYPTO_CURVE25519_X86
- tristate "Public key crypto: Curve25519 (ADX)"
+ tristate
depends on X86 && 64BIT
+ select CRYPTO_KPP
select CRYPTO_LIB_CURVE25519_GENERIC
select CRYPTO_ARCH_HAVE_LIB_CURVE25519
+ default CRYPTO_LIB_CURVE25519_INTERNAL
help
Curve25519 algorithm
@@ -348,11 +350,12 @@ config CRYPTO_ARIA_GFNI_AVX512_X86_64
Processes 64 blocks in parallel.
config CRYPTO_CHACHA20_X86_64
- tristate "Ciphers: ChaCha20, XChaCha20, XChaCha12 (SSSE3/AVX2/AVX-512VL)"
+ tristate
depends on X86 && 64BIT
select CRYPTO_SKCIPHER
select CRYPTO_LIB_CHACHA_GENERIC
select CRYPTO_ARCH_HAVE_LIB_CHACHA
+ default CRYPTO_LIB_CHACHA_INTERNAL
help
Length-preserving ciphers: ChaCha20, XChaCha20, and XChaCha12
stream cipher algorithms
@@ -417,10 +420,12 @@ config CRYPTO_POLYVAL_CLMUL_NI
- CLMUL-NI (carry-less multiplication new instructions)
config CRYPTO_POLY1305_X86_64
- tristate "Hash functions: Poly1305 (SSE2/AVX2)"
+ tristate
depends on X86 && 64BIT
+ select CRYPTO_HASH
select CRYPTO_LIB_POLY1305_GENERIC
select CRYPTO_ARCH_HAVE_LIB_POLY1305
+ default CRYPTO_LIB_POLY1305_INTERNAL
help
Poly1305 authenticator algorithm (RFC7539)
diff --git a/arch/x86/entry/entry.S b/arch/x86/entry/entry.S
index b7ea3e8e9ecc..b0d5ab951231 100644
--- a/arch/x86/entry/entry.S
+++ b/arch/x86/entry/entry.S
@@ -18,7 +18,7 @@
SYM_FUNC_START(entry_ibpb)
movl $MSR_IA32_PRED_CMD, %ecx
- movl $PRED_CMD_IBPB, %eax
+ movl _ASM_RIP(x86_pred_cmd), %eax
xorl %edx, %edx
wrmsr
@@ -33,20 +33,20 @@ EXPORT_SYMBOL_GPL(entry_ibpb);
/*
* Define the VERW operand that is disguised as entry code so that
- * it can be referenced with KPTI enabled. This ensure VERW can be
+ * it can be referenced with KPTI enabled. This ensures VERW can be
* used late in exit-to-user path after page tables are switched.
*/
.pushsection .entry.text, "ax"
.align L1_CACHE_BYTES, 0xcc
-SYM_CODE_START_NOALIGN(mds_verw_sel)
+SYM_CODE_START_NOALIGN(x86_verw_sel)
UNWIND_HINT_UNDEFINED
ANNOTATE_NOENDBR
.word __KERNEL_DS
.align L1_CACHE_BYTES, 0xcc
-SYM_CODE_END(mds_verw_sel);
+SYM_CODE_END(x86_verw_sel);
/* For KVM */
-EXPORT_SYMBOL_GPL(mds_verw_sel);
+EXPORT_SYMBOL_GPL(x86_verw_sel);
.popsection
@@ -63,7 +63,7 @@ THUNK warn_thunk_thunk, __warn_thunk
* entirely in the C code, and use an alias emitted by the linker script
* instead.
*/
-#ifdef CONFIG_STACKPROTECTOR
+#if defined(CONFIG_STACKPROTECTOR) && defined(CONFIG_SMP)
EXPORT_SYMBOL(__ref_stack_chk_guard);
#endif
#endif
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index 1b5be07f8669..9c6a110a52d4 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -1524,7 +1524,9 @@ SYM_CODE_END(rewind_stack_and_make_dead)
* ORC to unwind properly.
*
* The alignment is for performance and not for safety, and may be safely
- * refactored in the future if needed.
+ * refactored in the future if needed. The .skips are for safety, to ensure
+ * that all RETs are in the second half of a cacheline to mitigate Indirect
+ * Target Selection, rather than taking the slowpath via its_return_thunk.
*/
SYM_FUNC_START(clear_bhb_loop)
push %rbp
@@ -1534,10 +1536,22 @@ SYM_FUNC_START(clear_bhb_loop)
call 1f
jmp 5f
.align 64, 0xcc
+ /*
+ * Shift instructions so that the RET is in the upper half of the
+ * cacheline and don't take the slowpath to its_return_thunk.
+ */
+ .skip 32 - (.Lret1 - 1f), 0xcc
ANNOTATE_INTRA_FUNCTION_CALL
1: call 2f
- RET
+.Lret1: RET
.align 64, 0xcc
+ /*
+ * As above shift instructions for RET at .Lret2 as well.
+ *
+ * This should be ideally be: .skip 32 - (.Lret2 - 2f), 0xcc
+ * but some Clang versions (e.g. 18) don't like this.
+ */
+ .skip 32 - 18, 0xcc
2: movl $5, %eax
3: jmp 4f
nop
@@ -1545,7 +1559,7 @@ SYM_FUNC_START(clear_bhb_loop)
jnz 3b
sub $1, %ecx
jnz 1b
- RET
+.Lret2: RET
5: lfence
pop %rbp
RET
diff --git a/arch/x86/events/amd/ibs.c b/arch/x86/events/amd/ibs.c
index c3a2f6f57770..d34ee6f04f18 100644
--- a/arch/x86/events/amd/ibs.c
+++ b/arch/x86/events/amd/ibs.c
@@ -272,7 +272,7 @@ static int perf_ibs_init(struct perf_event *event)
{
struct hw_perf_event *hwc = &event->hw;
struct perf_ibs *perf_ibs;
- u64 max_cnt, config;
+ u64 config;
int ret;
perf_ibs = get_ibs_pmu(event->attr.type);
@@ -309,10 +309,19 @@ static int perf_ibs_init(struct perf_event *event)
if (!hwc->sample_period)
hwc->sample_period = 0x10;
} else {
- max_cnt = config & perf_ibs->cnt_mask;
+ u64 period = 0;
+
+ if (perf_ibs == &perf_ibs_op) {
+ period = (config & IBS_OP_MAX_CNT) << 4;
+ if (ibs_caps & IBS_CAPS_OPCNTEXT)
+ period |= config & IBS_OP_MAX_CNT_EXT_MASK;
+ } else {
+ period = (config & IBS_FETCH_MAX_CNT) << 4;
+ }
+
config &= ~perf_ibs->cnt_mask;
- event->attr.sample_period = max_cnt << 4;
- hwc->sample_period = event->attr.sample_period;
+ event->attr.sample_period = period;
+ hwc->sample_period = period;
}
if (!hwc->sample_period)
@@ -1222,7 +1231,8 @@ static __init int perf_ibs_op_init(void)
if (ibs_caps & IBS_CAPS_OPCNTEXT) {
perf_ibs_op.max_period |= IBS_OP_MAX_CNT_EXT_MASK;
perf_ibs_op.config_mask |= IBS_OP_MAX_CNT_EXT_MASK;
- perf_ibs_op.cnt_mask |= IBS_OP_MAX_CNT_EXT_MASK;
+ perf_ibs_op.cnt_mask |= (IBS_OP_MAX_CNT_EXT_MASK |
+ IBS_OP_CUR_CNT_EXT_MASK);
}
if (ibs_caps & IBS_CAPS_ZEN4)
diff --git a/arch/x86/events/amd/uncore.c b/arch/x86/events/amd/uncore.c
index 0bfde2ea5cb8..cdf7bf029836 100644
--- a/arch/x86/events/amd/uncore.c
+++ b/arch/x86/events/amd/uncore.c
@@ -38,7 +38,6 @@ struct amd_uncore_ctx {
int refcnt;
int cpu;
struct perf_event **events;
- struct hlist_node node;
};
struct amd_uncore_pmu {
@@ -890,6 +889,39 @@ static void amd_uncore_umc_start(struct perf_event *event, int flags)
perf_event_update_userpage(event);
}
+static void amd_uncore_umc_read(struct perf_event *event)
+{
+ struct hw_perf_event *hwc = &event->hw;
+ u64 prev, new, shift;
+ s64 delta;
+
+ shift = COUNTER_SHIFT + 1;
+ prev = local64_read(&hwc->prev_count);
+
+ /*
+ * UMC counters do not have RDPMC assignments. Read counts directly
+ * from the corresponding PERF_CTR.
+ */
+ rdmsrl(hwc->event_base, new);
+
+ /*
+ * Unlike the other uncore counters, UMC counters saturate and set the
+ * Overflow bit (bit 48) on overflow. Since they do not roll over,
+ * proactively reset the corresponding PERF_CTR when bit 47 is set so
+ * that the counter never gets a chance to saturate.
+ */
+ if (new & BIT_ULL(63 - COUNTER_SHIFT)) {
+ wrmsrl(hwc->event_base, 0);
+ local64_set(&hwc->prev_count, 0);
+ } else {
+ local64_set(&hwc->prev_count, new);
+ }
+
+ delta = (new << shift) - (prev << shift);
+ delta >>= shift;
+ local64_add(delta, &event->count);
+}
+
static
void amd_uncore_umc_ctx_scan(struct amd_uncore *uncore, unsigned int cpu)
{
@@ -967,7 +999,7 @@ int amd_uncore_umc_ctx_init(struct amd_uncore *uncore, unsigned int cpu)
.del = amd_uncore_del,
.start = amd_uncore_umc_start,
.stop = amd_uncore_stop,
- .read = amd_uncore_read,
+ .read = amd_uncore_umc_read,
.capabilities = PERF_PMU_CAP_NO_EXCLUDE | PERF_PMU_CAP_NO_INTERRUPT,
.module = THIS_MODULE,
};
diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index 0d33c85da453..471eaa46d55f 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -628,7 +628,7 @@ int x86_pmu_hw_config(struct perf_event *event)
if (event->attr.type == event->pmu->type)
event->hw.config |= x86_pmu_get_event_config(event);
- if (!event->attr.freq && x86_pmu.limit_period) {
+ if (is_sampling_event(event) && !event->attr.freq && x86_pmu.limit_period) {
s64 left = event->attr.sample_period;
x86_pmu.limit_period(event, &left);
if (left > event->attr.sample_period)
@@ -753,7 +753,7 @@ void x86_pmu_enable_all(int added)
}
}
-static inline int is_x86_event(struct perf_event *event)
+int is_x86_event(struct perf_event *event)
{
int i;
diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index f86e47afd560..5e43d390f7a3 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -4333,7 +4333,7 @@ static struct perf_guest_switch_msr *intel_guest_get_msrs(int *nr, void *data)
arr[pebs_enable] = (struct perf_guest_switch_msr){
.msr = MSR_IA32_PEBS_ENABLE,
.host = cpuc->pebs_enabled & ~cpuc->intel_ctrl_guest_mask,
- .guest = pebs_mask & ~cpuc->intel_ctrl_host_mask,
+ .guest = pebs_mask & ~cpuc->intel_ctrl_host_mask & kvm_pmu->pebs_enable,
};
if (arr[pebs_enable].host) {
diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
index 1b82bcc6fa55..54007174c15b 100644
--- a/arch/x86/events/intel/ds.c
+++ b/arch/x86/events/intel/ds.c
@@ -2240,8 +2240,9 @@ static void intel_pmu_drain_pebs_core(struct pt_regs *iregs, struct perf_sample_
setup_pebs_fixed_sample_data);
}
-static void intel_pmu_pebs_event_update_no_drain(struct cpu_hw_events *cpuc, int size)
+static void intel_pmu_pebs_event_update_no_drain(struct cpu_hw_events *cpuc, u64 mask)
{
+ u64 pebs_enabled = cpuc->pebs_enabled & mask;
struct perf_event *event;
int bit;
@@ -2252,7 +2253,7 @@ static void intel_pmu_pebs_event_update_no_drain(struct cpu_hw_events *cpuc, int
* It needs to call intel_pmu_save_and_restart_reload() to
* update the event->count for this case.
*/
- for_each_set_bit(bit, (unsigned long *)&cpuc->pebs_enabled, size) {
+ for_each_set_bit(bit, (unsigned long *)&pebs_enabled, X86_PMC_IDX_MAX) {
event = cpuc->events[bit];
if (event->hw.flags & PERF_X86_EVENT_AUTO_RELOAD)
intel_pmu_save_and_restart_reload(event, 0);
@@ -2287,7 +2288,7 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs, struct perf_sample_d
}
if (unlikely(base >= top)) {
- intel_pmu_pebs_event_update_no_drain(cpuc, size);
+ intel_pmu_pebs_event_update_no_drain(cpuc, mask);
return;
}
@@ -2397,7 +2398,7 @@ static void intel_pmu_drain_pebs_icl(struct pt_regs *iregs, struct perf_sample_d
(hybrid(cpuc->pmu, fixed_cntr_mask64) << INTEL_PMC_IDX_FIXED);
if (unlikely(base >= top)) {
- intel_pmu_pebs_event_update_no_drain(cpuc, X86_PMC_IDX_MAX);
+ intel_pmu_pebs_event_update_no_drain(cpuc, mask);
return;
}
diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h
index 8c616656391e..76b6d8469dba 100644
--- a/arch/x86/events/perf_event.h
+++ b/arch/x86/events/perf_event.h
@@ -110,9 +110,16 @@ static inline bool is_topdown_event(struct perf_event *event)
return is_metric_event(event) || is_slots_event(event);
}
+int is_x86_event(struct perf_event *event);
+
+static inline bool check_leader_group(struct perf_event *leader, int flags)
+{
+ return is_x86_event(leader) ? !!(leader->hw.flags & flags) : false;
+}
+
static inline bool is_branch_counters_group(struct perf_event *event)
{
- return event->group_leader->hw.flags & PERF_X86_EVENT_BRANCH_COUNTERS;
+ return check_leader_group(event->group_leader, PERF_X86_EVENT_BRANCH_COUNTERS);
}
struct amd_nb {
diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h
index ca9ae606aab9..f8dab517de8a 100644
--- a/arch/x86/include/asm/alternative.h
+++ b/arch/x86/include/asm/alternative.h
@@ -5,6 +5,7 @@
#include <linux/types.h>
#include <linux/stringify.h>
#include <asm/asm.h>
+#include <asm/bug.h>
#define ALT_FLAGS_SHIFT 16
@@ -134,6 +135,37 @@ static __always_inline int x86_call_depth_emit_accounting(u8 **pprog,
}
#endif
+#ifdef CONFIG_MITIGATION_ITS
+extern void its_init_mod(struct module *mod);
+extern void its_fini_mod(struct module *mod);
+extern void its_free_mod(struct module *mod);
+extern u8 *its_static_thunk(int reg);
+#else /* CONFIG_MITIGATION_ITS */
+static inline void its_init_mod(struct module *mod) { }
+static inline void its_fini_mod(struct module *mod) { }
+static inline void its_free_mod(struct module *mod) { }
+static inline u8 *its_static_thunk(int reg)
+{
+ WARN_ONCE(1, "ITS not compiled in");
+
+ return NULL;
+}
+#endif
+
+#if defined(CONFIG_MITIGATION_RETHUNK) && defined(CONFIG_OBJTOOL)
+extern bool cpu_wants_rethunk(void);
+extern bool cpu_wants_rethunk_at(void *addr);
+#else
+static __always_inline bool cpu_wants_rethunk(void)
+{
+ return false;
+}
+static __always_inline bool cpu_wants_rethunk_at(void *addr)
+{
+ return false;
+}
+#endif
+
#ifdef CONFIG_SMP
extern void alternatives_smp_module_add(struct module *mod, char *name,
void *locks, void *locks_end,
diff --git a/arch/x86/include/asm/bug.h b/arch/x86/include/asm/bug.h
index 806649c7f23d..9a0f29be1a9e 100644
--- a/arch/x86/include/asm/bug.h
+++ b/arch/x86/include/asm/bug.h
@@ -22,8 +22,9 @@
#define SECOND_BYTE_OPCODE_UD2 0x0b
#define BUG_NONE 0xffff
-#define BUG_UD1 0xfffe
-#define BUG_UD2 0xfffd
+#define BUG_UD2 0xfffe
+#define BUG_UD1 0xfffd
+#define BUG_UD1_UBSAN 0xfffc
#ifdef CONFIG_GENERIC_BUG
diff --git a/arch/x86/include/asm/cfi.h b/arch/x86/include/asm/cfi.h
index 31d19c815f99..7dd5ab239c87 100644
--- a/arch/x86/include/asm/cfi.h
+++ b/arch/x86/include/asm/cfi.h
@@ -126,6 +126,17 @@ static inline int cfi_get_offset(void)
extern u32 cfi_get_func_hash(void *func);
+#ifdef CONFIG_FINEIBT
+extern bool decode_fineibt_insn(struct pt_regs *regs, unsigned long *target, u32 *type);
+#else
+static inline bool
+decode_fineibt_insn(struct pt_regs *regs, unsigned long *target, u32 *type)
+{
+ return false;
+}
+
+#endif
+
#else
static inline enum bug_trap_type handle_cfi_failure(struct pt_regs *regs)
{
diff --git a/arch/x86/include/asm/cpu.h b/arch/x86/include/asm/cpu.h
index aa30fd8cad7f..b6099456477c 100644
--- a/arch/x86/include/asm/cpu.h
+++ b/arch/x86/include/asm/cpu.h
@@ -69,4 +69,16 @@ int intel_microcode_sanity_check(void *mc, bool print_err, int hdr_type);
extern struct cpumask cpus_stop_mask;
+union zen_patch_rev {
+ struct {
+ __u32 rev : 8,
+ stepping : 4,
+ model : 4,
+ __reserved : 4,
+ ext_model : 4,
+ ext_fam : 8;
+ };
+ __u32 ucode_rev;
+};
+
#endif /* _ASM_X86_CPU_H */
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index 913fd3a7bac6..ef5749a0d8c2 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -449,11 +449,13 @@
#define X86_FEATURE_SME_COHERENT (19*32+10) /* AMD hardware-enforced cache coherency */
#define X86_FEATURE_DEBUG_SWAP (19*32+14) /* "debug_swap" AMD SEV-ES full debug state swap support */
#define X86_FEATURE_SVSM (19*32+28) /* "svsm" SVSM present */
+#define X86_FEATURE_HV_INUSE_WR_ALLOWED (19*32+30) /* Allow Write to in-use hypervisor-owned pages */
/* AMD-defined Extended Feature 2 EAX, CPUID level 0x80000021 (EAX), word 20 */
#define X86_FEATURE_NO_NESTED_DATA_BP (20*32+ 0) /* No Nested Data Breakpoints */
#define X86_FEATURE_WRMSR_XX_BASE_NS (20*32+ 1) /* WRMSR to {FS,GS,KERNEL_GS}_BASE is non-serializing */
#define X86_FEATURE_LFENCE_RDTSC (20*32+ 2) /* LFENCE always serializing / synchronizes RDTSC */
+#define X86_FEATURE_VERW_CLEAR (20*32+ 5) /* The memory form of VERW mitigates TSA */
#define X86_FEATURE_NULL_SEL_CLR_BASE (20*32+ 6) /* Null Selector Clears Base */
#define X86_FEATURE_AUTOIBRS (20*32+ 8) /* Automatic IBRS */
#define X86_FEATURE_NO_SMM_CTL_MSR (20*32+ 9) /* SMM_CTL MSR is not present */
@@ -474,6 +476,11 @@
#define X86_FEATURE_CLEAR_BHB_HW (21*32+ 3) /* BHI_DIS_S HW control enabled */
#define X86_FEATURE_CLEAR_BHB_LOOP_ON_VMEXIT (21*32+ 4) /* Clear branch history at vmexit using SW loop */
#define X86_FEATURE_FAST_CPPC (21*32 + 5) /* AMD Fast CPPC */
+#define X86_FEATURE_INDIRECT_THUNK_ITS (21*32 + 6) /* Use thunk for indirect branches in lower half of cacheline */
+
+#define X86_FEATURE_TSA_SQ_NO (21*32+11) /* AMD CPU not vulnerable to TSA-SQ */
+#define X86_FEATURE_TSA_L1_NO (21*32+12) /* AMD CPU not vulnerable to TSA-L1 */
+#define X86_FEATURE_CLEAR_CPU_BUF_VM (21*32+13) /* Clear CPU buffers using VERW before VMRUN */
/*
* BUG word(s)
@@ -525,4 +532,7 @@
#define X86_BUG_RFDS X86_BUG(1*32 + 2) /* "rfds" CPU is vulnerable to Register File Data Sampling */
#define X86_BUG_BHI X86_BUG(1*32 + 3) /* "bhi" CPU is affected by Branch History Injection */
#define X86_BUG_IBPB_NO_RET X86_BUG(1*32 + 4) /* "ibpb_no_ret" IBPB omits return target predictions */
+#define X86_BUG_ITS X86_BUG(1*32 + 5) /* "its" CPU is affected by Indirect Target Selection */
+#define X86_BUG_ITS_NATIVE_ONLY X86_BUG(1*32 + 6) /* "its_native_only" CPU is affected by ITS, VMX is not affected */
+#define X86_BUG_TSA X86_BUG( 1*32+ 9) /* "tsa" CPU is affected by Transient Scheduler Attacks */
#endif /* _ASM_X86_CPUFEATURES_H */
diff --git a/arch/x86/include/asm/ibt.h b/arch/x86/include/asm/ibt.h
index 1e59581d500c..b778ae6e67ee 100644
--- a/arch/x86/include/asm/ibt.h
+++ b/arch/x86/include/asm/ibt.h
@@ -41,7 +41,7 @@
_ASM_PTR fname "\n\t" \
".popsection\n\t"
-static inline __attribute_const__ u32 gen_endbr(void)
+static __always_inline __attribute_const__ u32 gen_endbr(void)
{
u32 endbr;
@@ -56,7 +56,7 @@ static inline __attribute_const__ u32 gen_endbr(void)
return endbr;
}
-static inline __attribute_const__ u32 gen_endbr_poison(void)
+static __always_inline __attribute_const__ u32 gen_endbr_poison(void)
{
/*
* 4 byte NOP that isn't NOP4 (in fact it is OSP NOP3), such that it
diff --git a/arch/x86/include/asm/intel-family.h b/arch/x86/include/asm/intel-family.h
index 1a42f829667a..c6198fbcc1d7 100644
--- a/arch/x86/include/asm/intel-family.h
+++ b/arch/x86/include/asm/intel-family.h
@@ -46,6 +46,7 @@
#define INTEL_ANY IFM(X86_FAMILY_ANY, X86_MODEL_ANY)
#define INTEL_PENTIUM_PRO IFM(6, 0x01)
+#define INTEL_PENTIUM_III_DESCHUTES IFM(6, 0x05)
#define INTEL_CORE_YONAH IFM(6, 0x0E)
@@ -115,6 +116,8 @@
#define INTEL_GRANITERAPIDS_X IFM(6, 0xAD)
#define INTEL_GRANITERAPIDS_D IFM(6, 0xAE)
+#define INTEL_BARTLETTLAKE IFM(6, 0xD7) /* Raptor Cove */
+
/* "Hybrid" Processors (P-Core/E-Core) */
#define INTEL_LAKEFIELD IFM(6, 0x8A) /* Sunny Cove / Tremont */
diff --git a/arch/x86/include/asm/irqflags.h b/arch/x86/include/asm/irqflags.h
index 1c2db11a2c3c..2b75fe80fcb2 100644
--- a/arch/x86/include/asm/irqflags.h
+++ b/arch/x86/include/asm/irqflags.h
@@ -44,13 +44,13 @@ static __always_inline void native_irq_enable(void)
static __always_inline void native_safe_halt(void)
{
- mds_idle_clear_cpu_buffers();
+ x86_idle_clear_cpu_buffers();
asm volatile("sti; hlt": : :"memory");
}
static __always_inline void native_halt(void)
{
- mds_idle_clear_cpu_buffers();
+ x86_idle_clear_cpu_buffers();
asm volatile("hlt": : :"memory");
}
diff --git a/arch/x86/include/asm/microcode.h b/arch/x86/include/asm/microcode.h
index 695e569159c1..be7cddc414e4 100644
--- a/arch/x86/include/asm/microcode.h
+++ b/arch/x86/include/asm/microcode.h
@@ -17,10 +17,12 @@ struct ucode_cpu_info {
void load_ucode_bsp(void);
void load_ucode_ap(void);
void microcode_bsp_resume(void);
+bool __init microcode_loader_disabled(void);
#else
static inline void load_ucode_bsp(void) { }
static inline void load_ucode_ap(void) { }
static inline void microcode_bsp_resume(void) { }
+static inline bool __init microcode_loader_disabled(void) { return false; }
#endif
extern unsigned long initrd_start_early;
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index 61e991507353..ac25f9eb5912 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -209,6 +209,14 @@
* VERW clears CPU Register
* File.
*/
+#define ARCH_CAP_ITS_NO BIT_ULL(62) /*
+ * Not susceptible to
+ * Indirect Target Selection.
+ * This bit is not set by
+ * HW, but is synthesized by
+ * VMMs for guests to know
+ * their affected status.
+ */
#define MSR_IA32_FLUSH_CMD 0x0000010b
#define L1D_FLUSH BIT(0) /*
diff --git a/arch/x86/include/asm/mwait.h b/arch/x86/include/asm/mwait.h
index 920426d691ce..7f9a97c572fe 100644
--- a/arch/x86/include/asm/mwait.h
+++ b/arch/x86/include/asm/mwait.h
@@ -44,8 +44,6 @@ static __always_inline void __monitorx(const void *eax, unsigned long ecx,
static __always_inline void __mwait(unsigned long eax, unsigned long ecx)
{
- mds_idle_clear_cpu_buffers();
-
/* "mwait %eax, %ecx;" */
asm volatile(".byte 0x0f, 0x01, 0xc9;"
:: "a" (eax), "c" (ecx));
@@ -80,7 +78,7 @@ static __always_inline void __mwait(unsigned long eax, unsigned long ecx)
static __always_inline void __mwaitx(unsigned long eax, unsigned long ebx,
unsigned long ecx)
{
- /* No MDS buffer clear as this is AMD/HYGON only */
+ /* No need for TSA buffer clearing on AMD */
/* "mwaitx %eax, %ebx, %ecx;" */
asm volatile(".byte 0x0f, 0x01, 0xfb;"
@@ -98,7 +96,7 @@ static __always_inline void __mwaitx(unsigned long eax, unsigned long ebx,
*/
static __always_inline void __sti_mwait(unsigned long eax, unsigned long ecx)
{
- mds_idle_clear_cpu_buffers();
+
/* "mwait %eax, %ecx;" */
asm volatile("sti; .byte 0x0f, 0x01, 0xc9;"
:: "a" (eax), "c" (ecx));
@@ -116,24 +114,29 @@ static __always_inline void __sti_mwait(unsigned long eax, unsigned long ecx)
*/
static __always_inline void mwait_idle_with_hints(unsigned long eax, unsigned long ecx)
{
+ if (need_resched())
+ return;
+
+ x86_idle_clear_cpu_buffers();
+
if (static_cpu_has_bug(X86_BUG_MONITOR) || !current_set_polling_and_test()) {
- if (static_cpu_has_bug(X86_BUG_CLFLUSH_MONITOR)) {
- mb();
- clflush((void *)&current_thread_info()->flags);
- mb();
- }
+ const void *addr = &current_thread_info()->flags;
+
+ alternative_input("", "clflush (%[addr])", X86_BUG_CLFLUSH_MONITOR, [addr] "a" (addr));
+ __monitor(addr, 0, 0);
- __monitor((void *)&current_thread_info()->flags, 0, 0);
+ if (need_resched())
+ goto out;
- if (!need_resched()) {
- if (ecx & 1) {
- __mwait(eax, ecx);
- } else {
- __sti_mwait(eax, ecx);
- raw_local_irq_disable();
- }
+ if (ecx & 1) {
+ __mwait(eax, ecx);
+ } else {
+ __sti_mwait(eax, ecx);
+ raw_local_irq_disable();
}
}
+
+out:
current_clr_polling();
}
diff --git a/arch/x86/include/asm/nmi.h b/arch/x86/include/asm/nmi.h
index 41a0ebb699ec..f677382093f3 100644
--- a/arch/x86/include/asm/nmi.h
+++ b/arch/x86/include/asm/nmi.h
@@ -56,6 +56,8 @@ int __register_nmi_handler(unsigned int, struct nmiaction *);
void unregister_nmi_handler(unsigned int, const char *);
+void set_emergency_nmi_handler(unsigned int type, nmi_handler_t handler);
+
void stop_nmi(void);
void restart_nmi(void);
void local_touch_nmi(void);
diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h
index 96b410b1d4e8..331f6a05535d 100644
--- a/arch/x86/include/asm/nospec-branch.h
+++ b/arch/x86/include/asm/nospec-branch.h
@@ -210,9 +210,8 @@
.endm
/*
- * Equivalent to -mindirect-branch-cs-prefix; emit the 5 byte jmp/call
- * to the retpoline thunk with a CS prefix when the register requires
- * a RAX prefix byte to encode. Also see apply_retpolines().
+ * Emits a conditional CS prefix that is compatible with
+ * -mindirect-branch-cs-prefix.
*/
.macro __CS_PREFIX reg:req
.irp rs,r8,r9,r10,r11,r12,r13,r14,r15
@@ -316,25 +315,31 @@
.endm
/*
- * Macro to execute VERW instruction that mitigate transient data sampling
- * attacks such as MDS. On affected systems a microcode update overloaded VERW
- * instruction to also clear the CPU buffers. VERW clobbers CFLAGS.ZF.
- *
+ * Macro to execute VERW insns that mitigate transient data sampling
+ * attacks such as MDS or TSA. On affected systems a microcode update
+ * overloaded VERW insns to also clear the CPU buffers. VERW clobbers
+ * CFLAGS.ZF.
* Note: Only the memory operand variant of VERW clears the CPU buffers.
*/
-.macro CLEAR_CPU_BUFFERS
+.macro __CLEAR_CPU_BUFFERS feature
#ifdef CONFIG_X86_64
- ALTERNATIVE "", "verw mds_verw_sel(%rip)", X86_FEATURE_CLEAR_CPU_BUF
+ ALTERNATIVE "", "verw x86_verw_sel(%rip)", \feature
#else
/*
* In 32bit mode, the memory operand must be a %cs reference. The data
* segments may not be usable (vm86 mode), and the stack segment may not
* be flat (ESPFIX32).
*/
- ALTERNATIVE "", "verw %cs:mds_verw_sel", X86_FEATURE_CLEAR_CPU_BUF
+ ALTERNATIVE "", "verw %cs:x86_verw_sel", \feature
#endif
.endm
+#define CLEAR_CPU_BUFFERS \
+ __CLEAR_CPU_BUFFERS X86_FEATURE_CLEAR_CPU_BUF
+
+#define VM_CLEAR_CPU_BUFFERS \
+ __CLEAR_CPU_BUFFERS X86_FEATURE_CLEAR_CPU_BUF_VM
+
#ifdef CONFIG_X86_64
.macro CLEAR_BRANCH_HISTORY
ALTERNATIVE "", "call clear_bhb_loop", X86_FEATURE_CLEAR_BHB_LOOP
@@ -356,10 +361,14 @@
".long 999b\n\t" \
".popsection\n\t"
+#define ITS_THUNK_SIZE 64
+
typedef u8 retpoline_thunk_t[RETPOLINE_THUNK_SIZE];
+typedef u8 its_thunk_t[ITS_THUNK_SIZE];
extern retpoline_thunk_t __x86_indirect_thunk_array[];
extern retpoline_thunk_t __x86_indirect_call_thunk_array[];
extern retpoline_thunk_t __x86_indirect_jump_thunk_array[];
+extern its_thunk_t __x86_indirect_its_thunk_array[];
#ifdef CONFIG_MITIGATION_RETHUNK
extern void __x86_return_thunk(void);
@@ -383,6 +392,12 @@ static inline void srso_return_thunk(void) {}
static inline void srso_alias_return_thunk(void) {}
#endif
+#ifdef CONFIG_MITIGATION_ITS
+extern void its_return_thunk(void);
+#else
+static inline void its_return_thunk(void) {}
+#endif
+
extern void retbleed_return_thunk(void);
extern void srso_return_thunk(void);
extern void srso_alias_return_thunk(void);
@@ -439,19 +454,22 @@ static inline void call_depth_return_thunk(void) {}
#ifdef CONFIG_X86_64
/*
+ * Emits a conditional CS prefix that is compatible with
+ * -mindirect-branch-cs-prefix.
+ */
+#define __CS_PREFIX(reg) \
+ ".irp rs,r8,r9,r10,r11,r12,r13,r14,r15\n" \
+ ".ifc \\rs," reg "\n" \
+ ".byte 0x2e\n" \
+ ".endif\n" \
+ ".endr\n"
+
+/*
* Inline asm uses the %V modifier which is only in newer GCC
* which is ensured when CONFIG_MITIGATION_RETPOLINE is defined.
*/
-# define CALL_NOSPEC \
- ALTERNATIVE_2( \
- ANNOTATE_RETPOLINE_SAFE \
- "call *%[thunk_target]\n", \
- "call __x86_indirect_thunk_%V[thunk_target]\n", \
- X86_FEATURE_RETPOLINE, \
- "lfence;\n" \
- ANNOTATE_RETPOLINE_SAFE \
- "call *%[thunk_target]\n", \
- X86_FEATURE_RETPOLINE_LFENCE)
+#define CALL_NOSPEC __CS_PREFIX("%V[thunk_target]") \
+ "call __x86_indirect_thunk_%V[thunk_target]\n"
# define THUNK_TARGET(addr) [thunk_target] "r" (addr)
@@ -570,24 +588,24 @@ DECLARE_STATIC_KEY_FALSE(switch_to_cond_stibp);
DECLARE_STATIC_KEY_FALSE(switch_mm_cond_ibpb);
DECLARE_STATIC_KEY_FALSE(switch_mm_always_ibpb);
-DECLARE_STATIC_KEY_FALSE(mds_idle_clear);
+DECLARE_STATIC_KEY_FALSE(cpu_buf_idle_clear);
DECLARE_STATIC_KEY_FALSE(switch_mm_cond_l1d_flush);
DECLARE_STATIC_KEY_FALSE(mmio_stale_data_clear);
-extern u16 mds_verw_sel;
+extern u16 x86_verw_sel;
#include <asm/segment.h>
/**
- * mds_clear_cpu_buffers - Mitigation for MDS and TAA vulnerability
+ * x86_clear_cpu_buffers - Buffer clearing support for different x86 CPU vulns
*
* This uses the otherwise unused and obsolete VERW instruction in
* combination with microcode which triggers a CPU buffer flush when the
* instruction is executed.
*/
-static __always_inline void mds_clear_cpu_buffers(void)
+static __always_inline void x86_clear_cpu_buffers(void)
{
static const u16 ds = __KERNEL_DS;
@@ -604,14 +622,15 @@ static __always_inline void mds_clear_cpu_buffers(void)
}
/**
- * mds_idle_clear_cpu_buffers - Mitigation for MDS vulnerability
+ * x86_idle_clear_cpu_buffers - Buffer clearing support in idle for the MDS
+ * and TSA vulnerabilities.
*
* Clear CPU buffers if the corresponding static key is enabled
*/
-static __always_inline void mds_idle_clear_cpu_buffers(void)
+static __always_inline void x86_idle_clear_cpu_buffers(void)
{
- if (static_branch_likely(&mds_idle_clear))
- mds_clear_cpu_buffers();
+ if (static_branch_likely(&cpu_buf_idle_clear))
+ x86_clear_cpu_buffers();
}
#endif /* __ASSEMBLY__ */
diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h
index c55a79d5feae..2d9c250b3c8d 100644
--- a/arch/x86/include/asm/percpu.h
+++ b/arch/x86/include/asm/percpu.h
@@ -349,9 +349,9 @@ do { \
\
asm qual (ALTERNATIVE("call this_cpu_cmpxchg8b_emu", \
"cmpxchg8b " __percpu_arg([var]), X86_FEATURE_CX8) \
- : [var] "+m" (__my_cpu_var(_var)), \
- "+a" (old__.low), \
- "+d" (old__.high) \
+ : ALT_OUTPUT_SP([var] "+m" (__my_cpu_var(_var)), \
+ "+a" (old__.low), \
+ "+d" (old__.high)) \
: "b" (new__.low), \
"c" (new__.high), \
"S" (&(_var)) \
@@ -380,10 +380,10 @@ do { \
asm qual (ALTERNATIVE("call this_cpu_cmpxchg8b_emu", \
"cmpxchg8b " __percpu_arg([var]), X86_FEATURE_CX8) \
CC_SET(z) \
- : CC_OUT(z) (success), \
- [var] "+m" (__my_cpu_var(_var)), \
- "+a" (old__.low), \
- "+d" (old__.high) \
+ : ALT_OUTPUT_SP(CC_OUT(z) (success), \
+ [var] "+m" (__my_cpu_var(_var)), \
+ "+a" (old__.low), \
+ "+d" (old__.high)) \
: "b" (new__.low), \
"c" (new__.high), \
"S" (&(_var)) \
@@ -420,9 +420,9 @@ do { \
\
asm qual (ALTERNATIVE("call this_cpu_cmpxchg16b_emu", \
"cmpxchg16b " __percpu_arg([var]), X86_FEATURE_CX16) \
- : [var] "+m" (__my_cpu_var(_var)), \
- "+a" (old__.low), \
- "+d" (old__.high) \
+ : ALT_OUTPUT_SP([var] "+m" (__my_cpu_var(_var)), \
+ "+a" (old__.low), \
+ "+d" (old__.high)) \
: "b" (new__.low), \
"c" (new__.high), \
"S" (&(_var)) \
@@ -451,10 +451,10 @@ do { \
asm qual (ALTERNATIVE("call this_cpu_cmpxchg16b_emu", \
"cmpxchg16b " __percpu_arg([var]), X86_FEATURE_CX16) \
CC_SET(z) \
- : CC_OUT(z) (success), \
- [var] "+m" (__my_cpu_var(_var)), \
- "+a" (old__.low), \
- "+d" (old__.high) \
+ : ALT_OUTPUT_SP(CC_OUT(z) (success), \
+ [var] "+m" (__my_cpu_var(_var)), \
+ "+a" (old__.low), \
+ "+d" (old__.high)) \
: "b" (new__.low), \
"c" (new__.high), \
"S" (&(_var)) \
diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
index 7505bb5d260a..aa351c4a20ee 100644
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -520,6 +520,7 @@ struct pebs_xmm {
*/
#define IBS_OP_CUR_CNT (0xFFF80ULL<<32)
#define IBS_OP_CUR_CNT_RAND (0x0007FULL<<32)
+#define IBS_OP_CUR_CNT_EXT_MASK (0x7FULL<<52)
#define IBS_OP_CNT_CTL (1ULL<<19)
#define IBS_OP_VAL (1ULL<<18)
#define IBS_OP_ENABLE (1ULL<<17)
diff --git a/arch/x86/include/asm/sev-common.h b/arch/x86/include/asm/sev-common.h
index 98726c2b04f8..ddaf3c62efb5 100644
--- a/arch/x86/include/asm/sev-common.h
+++ b/arch/x86/include/asm/sev-common.h
@@ -116,7 +116,7 @@ enum psc_op {
#define GHCB_MSR_VMPL_REQ 0x016
#define GHCB_MSR_VMPL_REQ_LEVEL(v) \
/* GHCBData[39:32] */ \
- (((u64)(v) & GENMASK_ULL(7, 0) << 32) | \
+ ((((u64)(v) & GENMASK_ULL(7, 0)) << 32) | \
/* GHCBDdata[11:0] */ \
GHCB_MSR_VMPL_REQ)
diff --git a/arch/x86/include/asm/sighandling.h b/arch/x86/include/asm/sighandling.h
index e770c4fc47f4..8727c7e21dd1 100644
--- a/arch/x86/include/asm/sighandling.h
+++ b/arch/x86/include/asm/sighandling.h
@@ -24,4 +24,26 @@ int ia32_setup_rt_frame(struct ksignal *ksig, struct pt_regs *regs);
int x64_setup_rt_frame(struct ksignal *ksig, struct pt_regs *regs);
int x32_setup_rt_frame(struct ksignal *ksig, struct pt_regs *regs);
+/*
+ * To prevent immediate repeat of single step trap on return from SIGTRAP
+ * handler if the trap flag (TF) is set without an external debugger attached,
+ * clear the software event flag in the augmented SS, ensuring no single-step
+ * trap is pending upon ERETU completion.
+ *
+ * Note, this function should be called in sigreturn() before the original
+ * state is restored to make sure the TF is read from the entry frame.
+ */
+static __always_inline void prevent_single_step_upon_eretu(struct pt_regs *regs)
+{
+ /*
+ * If the trap flag (TF) is set, i.e., the sigreturn() SYSCALL instruction
+ * is being single-stepped, do not clear the software event flag in the
+ * augmented SS, thus a debugger won't skip over the following instruction.
+ */
+#ifdef CONFIG_X86_FRED
+ if (!(regs->flags & X86_EFLAGS_TF))
+ regs->fred_ss.swevent = 0;
+#endif
+}
+
#endif /* _ASM_X86_SIGHANDLING_H */
diff --git a/arch/x86/include/asm/tdx.h b/arch/x86/include/asm/tdx.h
index b5b633294061..2d13ef1f4b05 100644
--- a/arch/x86/include/asm/tdx.h
+++ b/arch/x86/include/asm/tdx.h
@@ -97,7 +97,7 @@ void tdx_init(void);
typedef u64 (*sc_func_t)(u64 fn, struct tdx_module_args *args);
-static inline u64 sc_retry(sc_func_t func, u64 fn,
+static __always_inline u64 sc_retry(sc_func_t func, u64 fn,
struct tdx_module_args *args)
{
int retry = RDRAND_RETRY_LOOPS;
diff --git a/arch/x86/include/uapi/asm/bootparam.h b/arch/x86/include/uapi/asm/bootparam.h
index 9b82eebd7add..dafbf581c515 100644
--- a/arch/x86/include/uapi/asm/bootparam.h
+++ b/arch/x86/include/uapi/asm/bootparam.h
@@ -26,7 +26,7 @@
#define XLF_5LEVEL_ENABLED (1<<6)
#define XLF_MEM_ENCRYPTION (1<<7)
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <linux/types.h>
#include <linux/screen_info.h>
@@ -210,6 +210,6 @@ enum x86_hardware_subarch {
X86_NR_SUBARCHS,
};
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif /* _ASM_X86_BOOTPARAM_H */
diff --git a/arch/x86/include/uapi/asm/debugreg.h b/arch/x86/include/uapi/asm/debugreg.h
index 0007ba077c0c..41da492dfb01 100644
--- a/arch/x86/include/uapi/asm/debugreg.h
+++ b/arch/x86/include/uapi/asm/debugreg.h
@@ -15,7 +15,26 @@
which debugging register was responsible for the trap. The other bits
are either reserved or not of interest to us. */
-/* Define reserved bits in DR6 which are always set to 1 */
+/*
+ * Define bits in DR6 which are set to 1 by default.
+ *
+ * This is also the DR6 architectural value following Power-up, Reset or INIT.
+ *
+ * Note, with the introduction of Bus Lock Detection (BLD) and Restricted
+ * Transactional Memory (RTM), the DR6 register has been modified:
+ *
+ * 1) BLD flag (bit 11) is no longer reserved to 1 if the CPU supports
+ * Bus Lock Detection. The assertion of a bus lock could clear it.
+ *
+ * 2) RTM flag (bit 16) is no longer reserved to 1 if the CPU supports
+ * restricted transactional memory. #DB occurred inside an RTM region
+ * could clear it.
+ *
+ * Apparently, DR6.BLD and DR6.RTM are active low bits.
+ *
+ * As a result, DR6_RESERVED is an incorrect name now, but it is kept for
+ * compatibility.
+ */
#define DR6_RESERVED (0xFFFF0FF0)
#define DR_TRAP0 (0x1) /* db0 */
diff --git a/arch/x86/include/uapi/asm/e820.h b/arch/x86/include/uapi/asm/e820.h
index 2f491efe3a12..55bc66867156 100644
--- a/arch/x86/include/uapi/asm/e820.h
+++ b/arch/x86/include/uapi/asm/e820.h
@@ -54,7 +54,7 @@
*/
#define E820_RESERVED_KERN 128
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <linux/types.h>
struct e820entry {
__u64 addr; /* start of memory segment */
@@ -76,7 +76,7 @@ struct e820map {
#define BIOS_ROM_BASE 0xffe00000
#define BIOS_ROM_END 0xffffffff
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif /* _UAPI_ASM_X86_E820_H */
diff --git a/arch/x86/include/uapi/asm/ldt.h b/arch/x86/include/uapi/asm/ldt.h
index d62ac5db093b..a82c039d8e6a 100644
--- a/arch/x86/include/uapi/asm/ldt.h
+++ b/arch/x86/include/uapi/asm/ldt.h
@@ -12,7 +12,7 @@
/* The size of each LDT entry. */
#define LDT_ENTRY_SIZE 8
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
/*
* Note on 64bit base and limit is ignored and you cannot set DS/ES/CS
* not to the default values if you still want to do syscalls. This
@@ -44,5 +44,5 @@ struct user_desc {
#define MODIFY_LDT_CONTENTS_STACK 1
#define MODIFY_LDT_CONTENTS_CODE 2
-#endif /* !__ASSEMBLY__ */
+#endif /* !__ASSEMBLER__ */
#endif /* _ASM_X86_LDT_H */
diff --git a/arch/x86/include/uapi/asm/msr.h b/arch/x86/include/uapi/asm/msr.h
index e7516b402a00..4b8917ca28fe 100644
--- a/arch/x86/include/uapi/asm/msr.h
+++ b/arch/x86/include/uapi/asm/msr.h
@@ -2,7 +2,7 @@
#ifndef _UAPI_ASM_X86_MSR_H
#define _UAPI_ASM_X86_MSR_H
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <linux/types.h>
#include <linux/ioctl.h>
@@ -10,5 +10,5 @@
#define X86_IOC_RDMSR_REGS _IOWR('c', 0xA0, __u32[8])
#define X86_IOC_WRMSR_REGS _IOWR('c', 0xA1, __u32[8])
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif /* _UAPI_ASM_X86_MSR_H */
diff --git a/arch/x86/include/uapi/asm/ptrace-abi.h b/arch/x86/include/uapi/asm/ptrace-abi.h
index 16074b9c93bb..5823584dea13 100644
--- a/arch/x86/include/uapi/asm/ptrace-abi.h
+++ b/arch/x86/include/uapi/asm/ptrace-abi.h
@@ -25,7 +25,7 @@
#else /* __i386__ */
-#if defined(__ASSEMBLY__) || defined(__FRAME_OFFSETS)
+#if defined(__ASSEMBLER__) || defined(__FRAME_OFFSETS)
/*
* C ABI says these regs are callee-preserved. They aren't saved on kernel entry
* unless syscall needs a complete, fully filled "struct pt_regs".
@@ -57,7 +57,7 @@
#define EFLAGS 144
#define RSP 152
#define SS 160
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
/* top of stack page */
#define FRAME_SIZE 168
@@ -87,7 +87,7 @@
#define PTRACE_SINGLEBLOCK 33 /* resume execution until next branch */
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <linux/types.h>
#endif
diff --git a/arch/x86/include/uapi/asm/ptrace.h b/arch/x86/include/uapi/asm/ptrace.h
index 85165c0edafc..e0b5b4f6226b 100644
--- a/arch/x86/include/uapi/asm/ptrace.h
+++ b/arch/x86/include/uapi/asm/ptrace.h
@@ -7,7 +7,7 @@
#include <asm/processor-flags.h>
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#ifdef __i386__
/* this struct defines the way the registers are stored on the
@@ -81,6 +81,6 @@ struct pt_regs {
-#endif /* !__ASSEMBLY__ */
+#endif /* !__ASSEMBLER__ */
#endif /* _UAPI_ASM_X86_PTRACE_H */
diff --git a/arch/x86/include/uapi/asm/setup_data.h b/arch/x86/include/uapi/asm/setup_data.h
index b111b0c18544..50c45ead4e7c 100644
--- a/arch/x86/include/uapi/asm/setup_data.h
+++ b/arch/x86/include/uapi/asm/setup_data.h
@@ -18,7 +18,7 @@
#define SETUP_INDIRECT (1<<31)
#define SETUP_TYPE_MAX (SETUP_ENUM_MAX | SETUP_INDIRECT)
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <linux/types.h>
@@ -78,6 +78,6 @@ struct ima_setup_data {
__u64 size;
} __attribute__((packed));
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif /* _UAPI_ASM_X86_SETUP_DATA_H */
diff --git a/arch/x86/include/uapi/asm/signal.h b/arch/x86/include/uapi/asm/signal.h
index f777346450ec..1067efabf18b 100644
--- a/arch/x86/include/uapi/asm/signal.h
+++ b/arch/x86/include/uapi/asm/signal.h
@@ -2,7 +2,7 @@
#ifndef _UAPI_ASM_X86_SIGNAL_H
#define _UAPI_ASM_X86_SIGNAL_H
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <linux/types.h>
#include <linux/compiler.h>
@@ -16,7 +16,7 @@ struct siginfo;
typedef unsigned long sigset_t;
#endif /* __KERNEL__ */
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#define SIGHUP 1
@@ -68,7 +68,7 @@ typedef unsigned long sigset_t;
#include <asm-generic/signal-defs.h>
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
# ifndef __KERNEL__
@@ -106,6 +106,6 @@ typedef struct sigaltstack {
__kernel_size_t ss_size;
} stack_t;
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif /* _UAPI_ASM_X86_SIGNAL_H */
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index d17518ca19b8..6ab96bc764cf 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -18,6 +18,7 @@
#include <linux/mmu_context.h>
#include <linux/bsearch.h>
#include <linux/sync_core.h>
+#include <linux/execmem.h>
#include <asm/text-patching.h>
#include <asm/alternative.h>
#include <asm/sections.h>
@@ -31,6 +32,8 @@
#include <asm/paravirt.h>
#include <asm/asm-prototypes.h>
#include <asm/cfi.h>
+#include <asm/ibt.h>
+#include <asm/set_memory.h>
int __read_mostly alternatives_patched;
@@ -124,6 +127,136 @@ const unsigned char * const x86_nops[ASM_NOP_MAX+1] =
#endif
};
+#ifdef CONFIG_MITIGATION_ITS
+
+#ifdef CONFIG_MODULES
+static struct module *its_mod;
+#endif
+static void *its_page;
+static unsigned int its_offset;
+
+/* Initialize a thunk with the "jmp *reg; int3" instructions. */
+static void *its_init_thunk(void *thunk, int reg)
+{
+ u8 *bytes = thunk;
+ int i = 0;
+
+ if (reg >= 8) {
+ bytes[i++] = 0x41; /* REX.B prefix */
+ reg -= 8;
+ }
+ bytes[i++] = 0xff;
+ bytes[i++] = 0xe0 + reg; /* jmp *reg */
+ bytes[i++] = 0xcc;
+
+ return thunk;
+}
+
+#ifdef CONFIG_MODULES
+void its_init_mod(struct module *mod)
+{
+ if (!cpu_feature_enabled(X86_FEATURE_INDIRECT_THUNK_ITS))
+ return;
+
+ mutex_lock(&text_mutex);
+ its_mod = mod;
+ its_page = NULL;
+}
+
+void its_fini_mod(struct module *mod)
+{
+ if (!cpu_feature_enabled(X86_FEATURE_INDIRECT_THUNK_ITS))
+ return;
+
+ WARN_ON_ONCE(its_mod != mod);
+
+ its_mod = NULL;
+ its_page = NULL;
+ mutex_unlock(&text_mutex);
+
+ for (int i = 0; i < mod->its_num_pages; i++) {
+ void *page = mod->its_page_array[i];
+ set_memory_rox((unsigned long)page, 1);
+ }
+}
+
+void its_free_mod(struct module *mod)
+{
+ if (!cpu_feature_enabled(X86_FEATURE_INDIRECT_THUNK_ITS))
+ return;
+
+ for (int i = 0; i < mod->its_num_pages; i++) {
+ void *page = mod->its_page_array[i];
+ execmem_free(page);
+ }
+ kfree(mod->its_page_array);
+}
+#endif /* CONFIG_MODULES */
+
+static void *its_alloc(void)
+{
+ void *page __free(execmem) = execmem_alloc(EXECMEM_MODULE_TEXT, PAGE_SIZE);
+
+ if (!page)
+ return NULL;
+
+#ifdef CONFIG_MODULES
+ if (its_mod) {
+ void *tmp = krealloc(its_mod->its_page_array,
+ (its_mod->its_num_pages+1) * sizeof(void *),
+ GFP_KERNEL);
+ if (!tmp)
+ return NULL;
+
+ its_mod->its_page_array = tmp;
+ its_mod->its_page_array[its_mod->its_num_pages++] = page;
+ }
+#endif /* CONFIG_MODULES */
+
+ return no_free_ptr(page);
+}
+
+static void *its_allocate_thunk(int reg)
+{
+ int size = 3 + (reg / 8);
+ void *thunk;
+
+ if (!its_page || (its_offset + size - 1) >= PAGE_SIZE) {
+ its_page = its_alloc();
+ if (!its_page) {
+ pr_err("ITS page allocation failed\n");
+ return NULL;
+ }
+ memset(its_page, INT3_INSN_OPCODE, PAGE_SIZE);
+ its_offset = 32;
+ }
+
+ /*
+ * If the indirect branch instruction will be in the lower half
+ * of a cacheline, then update the offset to reach the upper half.
+ */
+ if ((its_offset + size - 1) % 64 < 32)
+ its_offset = ((its_offset - 1) | 0x3F) + 33;
+
+ thunk = its_page + its_offset;
+ its_offset += size;
+
+ set_memory_rw((unsigned long)its_page, 1);
+ thunk = its_init_thunk(thunk, reg);
+ set_memory_rox((unsigned long)its_page, 1);
+
+ return thunk;
+}
+
+u8 *its_static_thunk(int reg)
+{
+ u8 *thunk = __x86_indirect_its_thunk_array[reg];
+
+ return thunk;
+}
+
+#endif
+
/*
* Nomenclature for variable names to simplify and clarify this code and ease
* any potential staring at it:
@@ -581,7 +714,8 @@ static int emit_indirect(int op, int reg, u8 *bytes)
return i;
}
-static int emit_call_track_retpoline(void *addr, struct insn *insn, int reg, u8 *bytes)
+static int __emit_trampoline(void *addr, struct insn *insn, u8 *bytes,
+ void *call_dest, void *jmp_dest)
{
u8 op = insn->opcode.bytes[0];
int i = 0;
@@ -602,7 +736,7 @@ static int emit_call_track_retpoline(void *addr, struct insn *insn, int reg, u8
switch (op) {
case CALL_INSN_OPCODE:
__text_gen_insn(bytes+i, op, addr+i,
- __x86_indirect_call_thunk_array[reg],
+ call_dest,
CALL_INSN_SIZE);
i += CALL_INSN_SIZE;
break;
@@ -610,7 +744,7 @@ static int emit_call_track_retpoline(void *addr, struct insn *insn, int reg, u8
case JMP32_INSN_OPCODE:
clang_jcc:
__text_gen_insn(bytes+i, op, addr+i,
- __x86_indirect_jump_thunk_array[reg],
+ jmp_dest,
JMP32_INSN_SIZE);
i += JMP32_INSN_SIZE;
break;
@@ -625,6 +759,39 @@ clang_jcc:
return i;
}
+static int emit_call_track_retpoline(void *addr, struct insn *insn, int reg, u8 *bytes)
+{
+ return __emit_trampoline(addr, insn, bytes,
+ __x86_indirect_call_thunk_array[reg],
+ __x86_indirect_jump_thunk_array[reg]);
+}
+
+#ifdef CONFIG_MITIGATION_ITS
+static int emit_its_trampoline(void *addr, struct insn *insn, int reg, u8 *bytes)
+{
+ u8 *thunk = __x86_indirect_its_thunk_array[reg];
+ u8 *tmp = its_allocate_thunk(reg);
+
+ if (tmp)
+ thunk = tmp;
+
+ return __emit_trampoline(addr, insn, bytes, thunk, thunk);
+}
+
+/* Check if an indirect branch is at ITS-unsafe address */
+static bool cpu_wants_indirect_its_thunk_at(unsigned long addr, int reg)
+{
+ if (!cpu_feature_enabled(X86_FEATURE_INDIRECT_THUNK_ITS))
+ return false;
+
+ /* Indirect branch opcode is 2 or 3 bytes depending on reg */
+ addr += 1 + reg / 8;
+
+ /* Lower-half of the cacheline? */
+ return !(addr & 0x20);
+}
+#endif
+
/*
* Rewrite the compiler generated retpoline thunk calls.
*
@@ -699,6 +866,15 @@ static int patch_retpoline(void *addr, struct insn *insn, u8 *bytes)
bytes[i++] = 0xe8; /* LFENCE */
}
+#ifdef CONFIG_MITIGATION_ITS
+ /*
+ * Check if the address of last byte of emitted-indirect is in
+ * lower-half of the cacheline. Such branches need ITS mitigation.
+ */
+ if (cpu_wants_indirect_its_thunk_at((unsigned long)addr + i, reg))
+ return emit_its_trampoline(addr, insn, reg, bytes);
+#endif
+
ret = emit_indirect(op, reg, bytes + i);
if (ret < 0)
return ret;
@@ -770,6 +946,21 @@ void __init_or_module noinline apply_retpolines(s32 *start, s32 *end)
#ifdef CONFIG_MITIGATION_RETHUNK
+bool cpu_wants_rethunk(void)
+{
+ return cpu_feature_enabled(X86_FEATURE_RETHUNK);
+}
+
+bool cpu_wants_rethunk_at(void *addr)
+{
+ if (!cpu_feature_enabled(X86_FEATURE_RETHUNK))
+ return false;
+ if (x86_return_thunk != its_return_thunk)
+ return true;
+
+ return !((unsigned long)addr & 0x20);
+}
+
/*
* Rewrite the compiler generated return thunk tail-calls.
*
@@ -786,7 +977,7 @@ static int patch_return(void *addr, struct insn *insn, u8 *bytes)
int i = 0;
/* Patch the custom return thunks... */
- if (cpu_feature_enabled(X86_FEATURE_RETHUNK)) {
+ if (cpu_wants_rethunk_at(addr)) {
i = JMP32_INSN_SIZE;
__text_gen_insn(bytes, JMP32_INSN_OPCODE, addr, x86_return_thunk, i);
} else {
@@ -803,7 +994,7 @@ void __init_or_module noinline apply_returns(s32 *start, s32 *end)
{
s32 *s;
- if (cpu_feature_enabled(X86_FEATURE_RETHUNK))
+ if (cpu_wants_rethunk())
static_call_force_reinit();
for (s = start; s < end; s++) {
@@ -1063,6 +1254,7 @@ asm( ".pushsection .rodata \n"
" endbr64 \n"
" subl $0x12345678, %r10d \n"
" je fineibt_preamble_end \n"
+ "fineibt_preamble_ud2: \n"
" ud2 \n"
" nop \n"
"fineibt_preamble_end: \n"
@@ -1070,9 +1262,11 @@ asm( ".pushsection .rodata \n"
);
extern u8 fineibt_preamble_start[];
+extern u8 fineibt_preamble_ud2[];
extern u8 fineibt_preamble_end[];
#define fineibt_preamble_size (fineibt_preamble_end - fineibt_preamble_start)
+#define fineibt_preamble_ud2 (fineibt_preamble_ud2 - fineibt_preamble_start)
#define fineibt_preamble_hash 7
asm( ".pushsection .rodata \n"
@@ -1377,6 +1571,33 @@ static void poison_cfi(void *addr)
}
}
+/*
+ * regs->ip points to a UD2 instruction, return true and fill out target and
+ * type when this UD2 is from a FineIBT preamble.
+ *
+ * We check the preamble by checking for the ENDBR instruction relative to the
+ * UD2 instruction.
+ */
+bool decode_fineibt_insn(struct pt_regs *regs, unsigned long *target, u32 *type)
+{
+ unsigned long addr = regs->ip - fineibt_preamble_ud2;
+ u32 endbr, hash;
+
+ __get_kernel_nofault(&endbr, addr, u32, Efault);
+ if (endbr != gen_endbr())
+ return false;
+
+ *target = addr + fineibt_preamble_size;
+
+ __get_kernel_nofault(&hash, addr + fineibt_preamble_hash, u32, Efault);
+ *type = (u32)regs->r10 + hash;
+
+ return true;
+
+Efault:
+ return false;
+}
+
#else
static void __apply_fineibt(s32 *start_retpoline, s32 *end_retpoline,
@@ -1665,6 +1886,8 @@ static noinline void __init alt_reloc_selftest(void)
void __init alternative_instructions(void)
{
+ u64 ibt;
+
int3_selftest();
/*
@@ -1691,6 +1914,9 @@ void __init alternative_instructions(void)
*/
paravirt_set_cap();
+ /* Keep CET-IBT disabled until caller/callee are patched */
+ ibt = ibt_save(/*disable*/ true);
+
__apply_fineibt(__retpoline_sites, __retpoline_sites_end,
__cfi_sites, __cfi_sites_end, true);
@@ -1714,6 +1940,8 @@ void __init alternative_instructions(void)
*/
apply_seal_endbr(__ibt_endbr_seal, __ibt_endbr_seal_end);
+ ibt_restore(ibt);
+
#ifdef CONFIG_SMP
/* Patch to UP if other cpus not imminent. */
if (!noreplace_smp && (num_present_cpus() == 1 || setup_max_cpus <= 1)) {
diff --git a/arch/x86/kernel/cfi.c b/arch/x86/kernel/cfi.c
index e6bf78fac146..f6905bef0af8 100644
--- a/arch/x86/kernel/cfi.c
+++ b/arch/x86/kernel/cfi.c
@@ -70,11 +70,25 @@ enum bug_trap_type handle_cfi_failure(struct pt_regs *regs)
unsigned long target;
u32 type;
- if (!is_cfi_trap(regs->ip))
- return BUG_TRAP_TYPE_NONE;
+ switch (cfi_mode) {
+ case CFI_KCFI:
+ if (!is_cfi_trap(regs->ip))
+ return BUG_TRAP_TYPE_NONE;
+
+ if (!decode_cfi_insn(regs, &target, &type))
+ return report_cfi_failure_noaddr(regs, regs->ip);
+
+ break;
- if (!decode_cfi_insn(regs, &target, &type))
- return report_cfi_failure_noaddr(regs, regs->ip);
+ case CFI_FINEIBT:
+ if (!decode_fineibt_insn(regs, &target, &type))
+ return BUG_TRAP_TYPE_NONE;
+
+ break;
+
+ default:
+ return BUG_TRAP_TYPE_NONE;
+ }
return report_cfi_failure(regs, regs->ip, &target, type);
}
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index e432910859cb..8a740e92e483 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -368,6 +368,63 @@ static void bsp_determine_snp(struct cpuinfo_x86 *c)
#endif
}
+static bool amd_check_tsa_microcode(void)
+{
+ struct cpuinfo_x86 *c = &boot_cpu_data;
+ union zen_patch_rev p;
+ u32 min_rev = 0;
+
+ p.ext_fam = c->x86 - 0xf;
+ p.model = c->x86_model;
+ p.stepping = c->x86_stepping;
+
+ if (cpu_has(c, X86_FEATURE_ZEN3) ||
+ cpu_has(c, X86_FEATURE_ZEN4)) {
+ switch (p.ucode_rev >> 8) {
+ case 0xa0011: min_rev = 0x0a0011d7; break;
+ case 0xa0012: min_rev = 0x0a00123b; break;
+ case 0xa0082: min_rev = 0x0a00820d; break;
+ case 0xa1011: min_rev = 0x0a10114c; break;
+ case 0xa1012: min_rev = 0x0a10124c; break;
+ case 0xa1081: min_rev = 0x0a108109; break;
+ case 0xa2010: min_rev = 0x0a20102e; break;
+ case 0xa2012: min_rev = 0x0a201211; break;
+ case 0xa4041: min_rev = 0x0a404108; break;
+ case 0xa5000: min_rev = 0x0a500012; break;
+ case 0xa6012: min_rev = 0x0a60120a; break;
+ case 0xa7041: min_rev = 0x0a704108; break;
+ case 0xa7052: min_rev = 0x0a705208; break;
+ case 0xa7080: min_rev = 0x0a708008; break;
+ case 0xa70c0: min_rev = 0x0a70c008; break;
+ case 0xaa002: min_rev = 0x0aa00216; break;
+ default:
+ pr_debug("%s: ucode_rev: 0x%x, current revision: 0x%x\n",
+ __func__, p.ucode_rev, c->microcode);
+ return false;
+ }
+ }
+
+ if (!min_rev)
+ return false;
+
+ return c->microcode >= min_rev;
+}
+
+static void tsa_init(struct cpuinfo_x86 *c)
+{
+ if (cpu_has(c, X86_FEATURE_HYPERVISOR))
+ return;
+
+ if (cpu_has(c, X86_FEATURE_ZEN3) ||
+ cpu_has(c, X86_FEATURE_ZEN4)) {
+ if (amd_check_tsa_microcode())
+ setup_force_cpu_cap(X86_FEATURE_VERW_CLEAR);
+ } else {
+ setup_force_cpu_cap(X86_FEATURE_TSA_SQ_NO);
+ setup_force_cpu_cap(X86_FEATURE_TSA_L1_NO);
+ }
+}
+
static void bsp_init_amd(struct cpuinfo_x86 *c)
{
if (cpu_has(c, X86_FEATURE_CONSTANT_TSC)) {
@@ -475,6 +532,9 @@ static void bsp_init_amd(struct cpuinfo_x86 *c)
}
bsp_determine_snp(c);
+
+ tsa_init(c);
+
return;
warn:
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index 5fba44a4f988..c2c7b76d953f 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -49,6 +49,8 @@ static void __init srbds_select_mitigation(void);
static void __init l1d_flush_select_mitigation(void);
static void __init srso_select_mitigation(void);
static void __init gds_select_mitigation(void);
+static void __init its_select_mitigation(void);
+static void __init tsa_select_mitigation(void);
/* The base value of the SPEC_CTRL MSR without task-specific bits set */
u64 x86_spec_ctrl_base;
@@ -67,6 +69,14 @@ static DEFINE_MUTEX(spec_ctrl_mutex);
void (*x86_return_thunk)(void) __ro_after_init = __x86_return_thunk;
+static void __init set_return_thunk(void *thunk)
+{
+ if (x86_return_thunk != __x86_return_thunk)
+ pr_warn("x86/bugs: return thunk changed\n");
+
+ x86_return_thunk = thunk;
+}
+
/* Update SPEC_CTRL MSR and its cached copy unconditionally */
static void update_spec_ctrl(u64 val)
{
@@ -113,9 +123,9 @@ DEFINE_STATIC_KEY_FALSE(switch_mm_cond_ibpb);
/* Control unconditional IBPB in switch_mm() */
DEFINE_STATIC_KEY_FALSE(switch_mm_always_ibpb);
-/* Control MDS CPU buffer clear before idling (halt, mwait) */
-DEFINE_STATIC_KEY_FALSE(mds_idle_clear);
-EXPORT_SYMBOL_GPL(mds_idle_clear);
+/* Control CPU buffer clear before idling (halt, mwait) */
+DEFINE_STATIC_KEY_FALSE(cpu_buf_idle_clear);
+EXPORT_SYMBOL_GPL(cpu_buf_idle_clear);
/*
* Controls whether l1d flush based mitigations are enabled,
@@ -175,6 +185,8 @@ void __init cpu_select_mitigations(void)
*/
srso_select_mitigation();
gds_select_mitigation();
+ its_select_mitigation();
+ tsa_select_mitigation();
}
/*
@@ -438,7 +450,7 @@ static void __init mmio_select_mitigation(void)
* is required irrespective of SMT state.
*/
if (!(x86_arch_cap_msr & ARCH_CAP_FBSDP_NO))
- static_branch_enable(&mds_idle_clear);
+ static_branch_enable(&cpu_buf_idle_clear);
/*
* Check if the system has the right microcode.
@@ -1104,7 +1116,7 @@ do_cmd_auto:
setup_force_cpu_cap(X86_FEATURE_RETHUNK);
setup_force_cpu_cap(X86_FEATURE_UNRET);
- x86_return_thunk = retbleed_return_thunk;
+ set_return_thunk(retbleed_return_thunk);
if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD &&
boot_cpu_data.x86_vendor != X86_VENDOR_HYGON)
@@ -1139,7 +1151,7 @@ do_cmd_auto:
setup_force_cpu_cap(X86_FEATURE_RETHUNK);
setup_force_cpu_cap(X86_FEATURE_CALL_DEPTH);
- x86_return_thunk = call_depth_return_thunk;
+ set_return_thunk(call_depth_return_thunk);
break;
default:
@@ -1174,6 +1186,145 @@ do_cmd_auto:
}
#undef pr_fmt
+#define pr_fmt(fmt) "ITS: " fmt
+
+enum its_mitigation_cmd {
+ ITS_CMD_OFF,
+ ITS_CMD_ON,
+ ITS_CMD_VMEXIT,
+ ITS_CMD_RSB_STUFF,
+};
+
+enum its_mitigation {
+ ITS_MITIGATION_OFF,
+ ITS_MITIGATION_VMEXIT_ONLY,
+ ITS_MITIGATION_ALIGNED_THUNKS,
+ ITS_MITIGATION_RETPOLINE_STUFF,
+};
+
+static const char * const its_strings[] = {
+ [ITS_MITIGATION_OFF] = "Vulnerable",
+ [ITS_MITIGATION_VMEXIT_ONLY] = "Mitigation: Vulnerable, KVM: Not affected",
+ [ITS_MITIGATION_ALIGNED_THUNKS] = "Mitigation: Aligned branch/return thunks",
+ [ITS_MITIGATION_RETPOLINE_STUFF] = "Mitigation: Retpolines, Stuffing RSB",
+};
+
+static enum its_mitigation its_mitigation __ro_after_init = ITS_MITIGATION_ALIGNED_THUNKS;
+
+static enum its_mitigation_cmd its_cmd __ro_after_init =
+ IS_ENABLED(CONFIG_MITIGATION_ITS) ? ITS_CMD_ON : ITS_CMD_OFF;
+
+static int __init its_parse_cmdline(char *str)
+{
+ if (!str)
+ return -EINVAL;
+
+ if (!IS_ENABLED(CONFIG_MITIGATION_ITS)) {
+ pr_err("Mitigation disabled at compile time, ignoring option (%s)", str);
+ return 0;
+ }
+
+ if (!strcmp(str, "off")) {
+ its_cmd = ITS_CMD_OFF;
+ } else if (!strcmp(str, "on")) {
+ its_cmd = ITS_CMD_ON;
+ } else if (!strcmp(str, "force")) {
+ its_cmd = ITS_CMD_ON;
+ setup_force_cpu_bug(X86_BUG_ITS);
+ } else if (!strcmp(str, "vmexit")) {
+ its_cmd = ITS_CMD_VMEXIT;
+ } else if (!strcmp(str, "stuff")) {
+ its_cmd = ITS_CMD_RSB_STUFF;
+ } else {
+ pr_err("Ignoring unknown indirect_target_selection option (%s).", str);
+ }
+
+ return 0;
+}
+early_param("indirect_target_selection", its_parse_cmdline);
+
+static void __init its_select_mitigation(void)
+{
+ enum its_mitigation_cmd cmd = its_cmd;
+
+ if (!boot_cpu_has_bug(X86_BUG_ITS) || cpu_mitigations_off()) {
+ its_mitigation = ITS_MITIGATION_OFF;
+ return;
+ }
+
+ /* Retpoline+CDT mitigates ITS, bail out */
+ if (boot_cpu_has(X86_FEATURE_RETPOLINE) &&
+ boot_cpu_has(X86_FEATURE_CALL_DEPTH)) {
+ its_mitigation = ITS_MITIGATION_RETPOLINE_STUFF;
+ goto out;
+ }
+
+ /* Exit early to avoid irrelevant warnings */
+ if (cmd == ITS_CMD_OFF) {
+ its_mitigation = ITS_MITIGATION_OFF;
+ goto out;
+ }
+ if (spectre_v2_enabled == SPECTRE_V2_NONE) {
+ pr_err("WARNING: Spectre-v2 mitigation is off, disabling ITS\n");
+ its_mitigation = ITS_MITIGATION_OFF;
+ goto out;
+ }
+ if (!IS_ENABLED(CONFIG_MITIGATION_RETPOLINE) ||
+ !IS_ENABLED(CONFIG_MITIGATION_RETHUNK)) {
+ pr_err("WARNING: ITS mitigation depends on retpoline and rethunk support\n");
+ its_mitigation = ITS_MITIGATION_OFF;
+ goto out;
+ }
+ if (IS_ENABLED(CONFIG_DEBUG_FORCE_FUNCTION_ALIGN_64B)) {
+ pr_err("WARNING: ITS mitigation is not compatible with CONFIG_DEBUG_FORCE_FUNCTION_ALIGN_64B\n");
+ its_mitigation = ITS_MITIGATION_OFF;
+ goto out;
+ }
+ if (boot_cpu_has(X86_FEATURE_RETPOLINE_LFENCE)) {
+ pr_err("WARNING: ITS mitigation is not compatible with lfence mitigation\n");
+ its_mitigation = ITS_MITIGATION_OFF;
+ goto out;
+ }
+
+ if (cmd == ITS_CMD_RSB_STUFF &&
+ (!boot_cpu_has(X86_FEATURE_RETPOLINE) || !IS_ENABLED(CONFIG_MITIGATION_CALL_DEPTH_TRACKING))) {
+ pr_err("RSB stuff mitigation not supported, using default\n");
+ cmd = ITS_CMD_ON;
+ }
+
+ switch (cmd) {
+ case ITS_CMD_OFF:
+ its_mitigation = ITS_MITIGATION_OFF;
+ break;
+ case ITS_CMD_VMEXIT:
+ if (boot_cpu_has_bug(X86_BUG_ITS_NATIVE_ONLY)) {
+ its_mitigation = ITS_MITIGATION_VMEXIT_ONLY;
+ goto out;
+ }
+ fallthrough;
+ case ITS_CMD_ON:
+ its_mitigation = ITS_MITIGATION_ALIGNED_THUNKS;
+ if (!boot_cpu_has(X86_FEATURE_RETPOLINE))
+ setup_force_cpu_cap(X86_FEATURE_INDIRECT_THUNK_ITS);
+ setup_force_cpu_cap(X86_FEATURE_RETHUNK);
+ set_return_thunk(its_return_thunk);
+ break;
+ case ITS_CMD_RSB_STUFF:
+ its_mitigation = ITS_MITIGATION_RETPOLINE_STUFF;
+ setup_force_cpu_cap(X86_FEATURE_RETHUNK);
+ setup_force_cpu_cap(X86_FEATURE_CALL_DEPTH);
+ set_return_thunk(call_depth_return_thunk);
+ if (retbleed_mitigation == RETBLEED_MITIGATION_NONE) {
+ retbleed_mitigation = RETBLEED_MITIGATION_STUFF;
+ pr_info("Retbleed mitigation updated to stuffing\n");
+ }
+ break;
+ }
+out:
+ pr_info("%s\n", its_strings[its_mitigation]);
+}
+
+#undef pr_fmt
#define pr_fmt(fmt) "Spectre V2 : " fmt
static enum spectre_v2_user_mitigation spectre_v2_user_stibp __ro_after_init =
@@ -1293,9 +1444,13 @@ static __ro_after_init enum spectre_v2_mitigation_cmd spectre_v2_cmd;
static enum spectre_v2_user_cmd __init
spectre_v2_parse_user_cmdline(void)
{
+ enum spectre_v2_user_cmd mode;
char arg[20];
int ret, i;
+ mode = IS_ENABLED(CONFIG_MITIGATION_SPECTRE_V2) ?
+ SPECTRE_V2_USER_CMD_AUTO : SPECTRE_V2_USER_CMD_NONE;
+
switch (spectre_v2_cmd) {
case SPECTRE_V2_CMD_NONE:
return SPECTRE_V2_USER_CMD_NONE;
@@ -1308,7 +1463,7 @@ spectre_v2_parse_user_cmdline(void)
ret = cmdline_find_option(boot_command_line, "spectre_v2_user",
arg, sizeof(arg));
if (ret < 0)
- return SPECTRE_V2_USER_CMD_AUTO;
+ return mode;
for (i = 0; i < ARRAY_SIZE(v2_user_options); i++) {
if (match_option(arg, ret, v2_user_options[i].option)) {
@@ -1318,8 +1473,8 @@ spectre_v2_parse_user_cmdline(void)
}
}
- pr_err("Unknown user space protection option (%s). Switching to AUTO select\n", arg);
- return SPECTRE_V2_USER_CMD_AUTO;
+ pr_err("Unknown user space protection option (%s). Switching to default\n", arg);
+ return mode;
}
static inline bool spectre_v2_in_ibrs_mode(enum spectre_v2_mitigation mode)
@@ -1578,7 +1733,7 @@ static void __init spec_ctrl_disable_kernel_rrsba(void)
rrsba_disabled = true;
}
-static void __init spectre_v2_determine_rsb_fill_type_at_vmexit(enum spectre_v2_mitigation mode)
+static void __init spectre_v2_select_rsb_mitigation(enum spectre_v2_mitigation mode)
{
/*
* Similar to context switches, there are two types of RSB attacks
@@ -1602,27 +1757,30 @@ static void __init spectre_v2_determine_rsb_fill_type_at_vmexit(enum spectre_v2_
*/
switch (mode) {
case SPECTRE_V2_NONE:
- return;
+ break;
- case SPECTRE_V2_EIBRS_LFENCE:
case SPECTRE_V2_EIBRS:
+ case SPECTRE_V2_EIBRS_LFENCE:
+ case SPECTRE_V2_EIBRS_RETPOLINE:
if (boot_cpu_has_bug(X86_BUG_EIBRS_PBRSB)) {
- setup_force_cpu_cap(X86_FEATURE_RSB_VMEXIT_LITE);
pr_info("Spectre v2 / PBRSB-eIBRS: Retire a single CALL on VMEXIT\n");
+ setup_force_cpu_cap(X86_FEATURE_RSB_VMEXIT_LITE);
}
- return;
+ break;
- case SPECTRE_V2_EIBRS_RETPOLINE:
case SPECTRE_V2_RETPOLINE:
case SPECTRE_V2_LFENCE:
case SPECTRE_V2_IBRS:
+ pr_info("Spectre v2 / SpectreRSB: Filling RSB on context switch and VMEXIT\n");
+ setup_force_cpu_cap(X86_FEATURE_RSB_CTXSW);
setup_force_cpu_cap(X86_FEATURE_RSB_VMEXIT);
- pr_info("Spectre v2 / SpectreRSB : Filling RSB on VMEXIT\n");
- return;
- }
+ break;
- pr_warn_once("Unknown Spectre v2 mode, disabling RSB mitigation at VM exit");
- dump_stack();
+ default:
+ pr_warn_once("Unknown Spectre v2 mode, disabling RSB mitigation\n");
+ dump_stack();
+ break;
+ }
}
/*
@@ -1681,11 +1839,11 @@ static void __init bhi_select_mitigation(void)
return;
}
- /* Mitigate in hardware if supported */
- if (spec_ctrl_bhi_dis())
+ if (!IS_ENABLED(CONFIG_X86_64))
return;
- if (!IS_ENABLED(CONFIG_X86_64))
+ /* Mitigate in hardware if supported */
+ if (spec_ctrl_bhi_dis())
return;
if (bhi_mitigation == BHI_MITIGATION_VMEXIT_ONLY) {
@@ -1854,10 +2012,7 @@ static void __init spectre_v2_select_mitigation(void)
*
* FIXME: Is this pointless for retbleed-affected AMD?
*/
- setup_force_cpu_cap(X86_FEATURE_RSB_CTXSW);
- pr_info("Spectre v2 / SpectreRSB mitigation: Filling RSB on context switch\n");
-
- spectre_v2_determine_rsb_fill_type_at_vmexit(mode);
+ spectre_v2_select_rsb_mitigation(mode);
/*
* Retpoline protects the kernel, but doesn't protect firmware. IBRS
@@ -1939,10 +2094,10 @@ static void update_mds_branch_idle(void)
return;
if (sched_smt_active()) {
- static_branch_enable(&mds_idle_clear);
+ static_branch_enable(&cpu_buf_idle_clear);
} else if (mmio_mitigation == MMIO_MITIGATION_OFF ||
(x86_arch_cap_msr & ARCH_CAP_FBSDP_NO)) {
- static_branch_disable(&mds_idle_clear);
+ static_branch_disable(&cpu_buf_idle_clear);
}
}
@@ -1950,6 +2105,94 @@ static void update_mds_branch_idle(void)
#define TAA_MSG_SMT "TAA CPU bug present and SMT on, data leak possible. See https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/tsx_async_abort.html for more details.\n"
#define MMIO_MSG_SMT "MMIO Stale Data CPU bug present and SMT on, data leak possible. See https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/processor_mmio_stale_data.html for more details.\n"
+#undef pr_fmt
+#define pr_fmt(fmt) "Transient Scheduler Attacks: " fmt
+
+enum tsa_mitigations {
+ TSA_MITIGATION_NONE,
+ TSA_MITIGATION_UCODE_NEEDED,
+ TSA_MITIGATION_USER_KERNEL,
+ TSA_MITIGATION_VM,
+ TSA_MITIGATION_FULL,
+};
+
+static const char * const tsa_strings[] = {
+ [TSA_MITIGATION_NONE] = "Vulnerable",
+ [TSA_MITIGATION_UCODE_NEEDED] = "Vulnerable: Clear CPU buffers attempted, no microcode",
+ [TSA_MITIGATION_USER_KERNEL] = "Mitigation: Clear CPU buffers: user/kernel boundary",
+ [TSA_MITIGATION_VM] = "Mitigation: Clear CPU buffers: VM",
+ [TSA_MITIGATION_FULL] = "Mitigation: Clear CPU buffers",
+};
+
+static enum tsa_mitigations tsa_mitigation __ro_after_init =
+ IS_ENABLED(CONFIG_MITIGATION_TSA) ? TSA_MITIGATION_FULL : TSA_MITIGATION_NONE;
+
+static int __init tsa_parse_cmdline(char *str)
+{
+ if (!str)
+ return -EINVAL;
+
+ if (!strcmp(str, "off"))
+ tsa_mitigation = TSA_MITIGATION_NONE;
+ else if (!strcmp(str, "on"))
+ tsa_mitigation = TSA_MITIGATION_FULL;
+ else if (!strcmp(str, "user"))
+ tsa_mitigation = TSA_MITIGATION_USER_KERNEL;
+ else if (!strcmp(str, "vm"))
+ tsa_mitigation = TSA_MITIGATION_VM;
+ else
+ pr_err("Ignoring unknown tsa=%s option.\n", str);
+
+ return 0;
+}
+early_param("tsa", tsa_parse_cmdline);
+
+static void __init tsa_select_mitigation(void)
+{
+ if (tsa_mitigation == TSA_MITIGATION_NONE)
+ return;
+
+ if (cpu_mitigations_off() || !boot_cpu_has_bug(X86_BUG_TSA)) {
+ tsa_mitigation = TSA_MITIGATION_NONE;
+ return;
+ }
+
+ if (!boot_cpu_has(X86_FEATURE_VERW_CLEAR))
+ tsa_mitigation = TSA_MITIGATION_UCODE_NEEDED;
+
+ switch (tsa_mitigation) {
+ case TSA_MITIGATION_USER_KERNEL:
+ setup_force_cpu_cap(X86_FEATURE_CLEAR_CPU_BUF);
+ break;
+
+ case TSA_MITIGATION_VM:
+ setup_force_cpu_cap(X86_FEATURE_CLEAR_CPU_BUF_VM);
+ break;
+
+ case TSA_MITIGATION_UCODE_NEEDED:
+ if (!boot_cpu_has(X86_FEATURE_HYPERVISOR))
+ goto out;
+
+ pr_notice("Forcing mitigation on in a VM\n");
+
+ /*
+ * On the off-chance that microcode has been updated
+ * on the host, enable the mitigation in the guest just
+ * in case.
+ */
+ fallthrough;
+ case TSA_MITIGATION_FULL:
+ setup_force_cpu_cap(X86_FEATURE_CLEAR_CPU_BUF);
+ setup_force_cpu_cap(X86_FEATURE_CLEAR_CPU_BUF_VM);
+ break;
+ default:
+ break;
+ }
+
+out:
+ pr_info("%s\n", tsa_strings[tsa_mitigation]);
+}
+
void cpu_bugs_smt_update(void)
{
mutex_lock(&spec_ctrl_mutex);
@@ -2003,6 +2246,24 @@ void cpu_bugs_smt_update(void)
break;
}
+ switch (tsa_mitigation) {
+ case TSA_MITIGATION_USER_KERNEL:
+ case TSA_MITIGATION_VM:
+ case TSA_MITIGATION_FULL:
+ case TSA_MITIGATION_UCODE_NEEDED:
+ /*
+ * TSA-SQ can potentially lead to info leakage between
+ * SMT threads.
+ */
+ if (sched_smt_active())
+ static_branch_enable(&cpu_buf_idle_clear);
+ else
+ static_branch_disable(&cpu_buf_idle_clear);
+ break;
+ case TSA_MITIGATION_NONE:
+ break;
+ }
+
mutex_unlock(&spec_ctrl_mutex);
}
@@ -2624,10 +2885,10 @@ static void __init srso_select_mitigation(void)
if (boot_cpu_data.x86 == 0x19) {
setup_force_cpu_cap(X86_FEATURE_SRSO_ALIAS);
- x86_return_thunk = srso_alias_return_thunk;
+ set_return_thunk(srso_alias_return_thunk);
} else {
setup_force_cpu_cap(X86_FEATURE_SRSO);
- x86_return_thunk = srso_return_thunk;
+ set_return_thunk(srso_return_thunk);
}
if (has_microcode)
srso_mitigation = SRSO_MITIGATION_SAFE_RET;
@@ -2802,6 +3063,11 @@ static ssize_t rfds_show_state(char *buf)
return sysfs_emit(buf, "%s\n", rfds_strings[rfds_mitigation]);
}
+static ssize_t its_show_state(char *buf)
+{
+ return sysfs_emit(buf, "%s\n", its_strings[its_mitigation]);
+}
+
static char *stibp_state(void)
{
if (spectre_v2_in_eibrs_mode(spectre_v2_enabled) &&
@@ -2926,6 +3192,11 @@ static ssize_t gds_show_state(char *buf)
return sysfs_emit(buf, "%s\n", gds_strings[gds_mitigation]);
}
+static ssize_t tsa_show_state(char *buf)
+{
+ return sysfs_emit(buf, "%s\n", tsa_strings[tsa_mitigation]);
+}
+
static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr,
char *buf, unsigned int bug)
{
@@ -2984,6 +3255,12 @@ static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr
case X86_BUG_RFDS:
return rfds_show_state(buf);
+ case X86_BUG_ITS:
+ return its_show_state(buf);
+
+ case X86_BUG_TSA:
+ return tsa_show_state(buf);
+
default:
break;
}
@@ -3063,6 +3340,16 @@ ssize_t cpu_show_reg_file_data_sampling(struct device *dev, struct device_attrib
{
return cpu_show_common(dev, attr, buf, X86_BUG_RFDS);
}
+
+ssize_t cpu_show_indirect_target_selection(struct device *dev, struct device_attribute *attr, char *buf)
+{
+ return cpu_show_common(dev, attr, buf, X86_BUG_ITS);
+}
+
+ssize_t cpu_show_tsa(struct device *dev, struct device_attribute *attr, char *buf)
+{
+ return cpu_show_common(dev, attr, buf, X86_BUG_TSA);
+}
#endif
void __warn_thunk(void)
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index f439763f45ae..ed072b126111 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -1007,17 +1007,18 @@ void get_cpu_cap(struct cpuinfo_x86 *c)
c->x86_capability[CPUID_D_1_EAX] = eax;
}
- /* AMD-defined flags: level 0x80000001 */
+ /*
+ * Check if extended CPUID leaves are implemented: Max extended
+ * CPUID leaf must be in the 0x80000001-0x8000ffff range.
+ */
eax = cpuid_eax(0x80000000);
- c->extended_cpuid_level = eax;
+ c->extended_cpuid_level = ((eax & 0xffff0000) == 0x80000000) ? eax : 0;
- if ((eax & 0xffff0000) == 0x80000000) {
- if (eax >= 0x80000001) {
- cpuid(0x80000001, &eax, &ebx, &ecx, &edx);
+ if (c->extended_cpuid_level >= 0x80000001) {
+ cpuid(0x80000001, &eax, &ebx, &ecx, &edx);
- c->x86_capability[CPUID_8000_0001_ECX] = ecx;
- c->x86_capability[CPUID_8000_0001_EDX] = edx;
- }
+ c->x86_capability[CPUID_8000_0001_ECX] = ecx;
+ c->x86_capability[CPUID_8000_0001_EDX] = edx;
}
if (c->extended_cpuid_level >= 0x80000007) {
@@ -1228,6 +1229,12 @@ static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = {
#define GDS BIT(6)
/* CPU is affected by Register File Data Sampling */
#define RFDS BIT(7)
+/* CPU is affected by Indirect Target Selection */
+#define ITS BIT(8)
+/* CPU is affected by Indirect Target Selection, but guest-host isolation is not affected */
+#define ITS_NATIVE_ONLY BIT(9)
+/* CPU is affected by Transient Scheduler Attacks */
+#define TSA BIT(10)
static const struct x86_cpu_id cpu_vuln_blacklist[] __initconst = {
VULNBL_INTEL_STEPPINGS(INTEL_IVYBRIDGE, X86_STEPPING_ANY, SRBDS),
@@ -1239,22 +1246,25 @@ static const struct x86_cpu_id cpu_vuln_blacklist[] __initconst = {
VULNBL_INTEL_STEPPINGS(INTEL_BROADWELL_G, X86_STEPPING_ANY, SRBDS),
VULNBL_INTEL_STEPPINGS(INTEL_BROADWELL_X, X86_STEPPING_ANY, MMIO),
VULNBL_INTEL_STEPPINGS(INTEL_BROADWELL, X86_STEPPING_ANY, SRBDS),
- VULNBL_INTEL_STEPPINGS(INTEL_SKYLAKE_X, X86_STEPPING_ANY, MMIO | RETBLEED | GDS),
+ VULNBL_INTEL_STEPPINGS(INTEL_SKYLAKE_X, X86_STEPPINGS(0x0, 0x5), MMIO | RETBLEED | GDS),
+ VULNBL_INTEL_STEPPINGS(INTEL_SKYLAKE_X, X86_STEPPING_ANY, MMIO | RETBLEED | GDS | ITS),
VULNBL_INTEL_STEPPINGS(INTEL_SKYLAKE_L, X86_STEPPING_ANY, MMIO | RETBLEED | GDS | SRBDS),
VULNBL_INTEL_STEPPINGS(INTEL_SKYLAKE, X86_STEPPING_ANY, MMIO | RETBLEED | GDS | SRBDS),
- VULNBL_INTEL_STEPPINGS(INTEL_KABYLAKE_L, X86_STEPPING_ANY, MMIO | RETBLEED | GDS | SRBDS),
- VULNBL_INTEL_STEPPINGS(INTEL_KABYLAKE, X86_STEPPING_ANY, MMIO | RETBLEED | GDS | SRBDS),
+ VULNBL_INTEL_STEPPINGS(INTEL_KABYLAKE_L, X86_STEPPINGS(0x0, 0xb), MMIO | RETBLEED | GDS | SRBDS),
+ VULNBL_INTEL_STEPPINGS(INTEL_KABYLAKE_L, X86_STEPPING_ANY, MMIO | RETBLEED | GDS | SRBDS | ITS),
+ VULNBL_INTEL_STEPPINGS(INTEL_KABYLAKE, X86_STEPPINGS(0x0, 0xc), MMIO | RETBLEED | GDS | SRBDS),
+ VULNBL_INTEL_STEPPINGS(INTEL_KABYLAKE, X86_STEPPING_ANY, MMIO | RETBLEED | GDS | SRBDS | ITS),
VULNBL_INTEL_STEPPINGS(INTEL_CANNONLAKE_L, X86_STEPPING_ANY, RETBLEED),
- VULNBL_INTEL_STEPPINGS(INTEL_ICELAKE_L, X86_STEPPING_ANY, MMIO | MMIO_SBDS | RETBLEED | GDS),
- VULNBL_INTEL_STEPPINGS(INTEL_ICELAKE_D, X86_STEPPING_ANY, MMIO | GDS),
- VULNBL_INTEL_STEPPINGS(INTEL_ICELAKE_X, X86_STEPPING_ANY, MMIO | GDS),
- VULNBL_INTEL_STEPPINGS(INTEL_COMETLAKE, X86_STEPPING_ANY, MMIO | MMIO_SBDS | RETBLEED | GDS),
- VULNBL_INTEL_STEPPINGS(INTEL_COMETLAKE_L, X86_STEPPINGS(0x0, 0x0), MMIO | RETBLEED),
- VULNBL_INTEL_STEPPINGS(INTEL_COMETLAKE_L, X86_STEPPING_ANY, MMIO | MMIO_SBDS | RETBLEED | GDS),
- VULNBL_INTEL_STEPPINGS(INTEL_TIGERLAKE_L, X86_STEPPING_ANY, GDS),
- VULNBL_INTEL_STEPPINGS(INTEL_TIGERLAKE, X86_STEPPING_ANY, GDS),
+ VULNBL_INTEL_STEPPINGS(INTEL_ICELAKE_L, X86_STEPPING_ANY, MMIO | MMIO_SBDS | RETBLEED | GDS | ITS | ITS_NATIVE_ONLY),
+ VULNBL_INTEL_STEPPINGS(INTEL_ICELAKE_D, X86_STEPPING_ANY, MMIO | GDS | ITS | ITS_NATIVE_ONLY),
+ VULNBL_INTEL_STEPPINGS(INTEL_ICELAKE_X, X86_STEPPING_ANY, MMIO | GDS | ITS | ITS_NATIVE_ONLY),
+ VULNBL_INTEL_STEPPINGS(INTEL_COMETLAKE, X86_STEPPING_ANY, MMIO | MMIO_SBDS | RETBLEED | GDS | ITS),
+ VULNBL_INTEL_STEPPINGS(INTEL_COMETLAKE_L, X86_STEPPINGS(0x0, 0x0), MMIO | RETBLEED | ITS),
+ VULNBL_INTEL_STEPPINGS(INTEL_COMETLAKE_L, X86_STEPPING_ANY, MMIO | MMIO_SBDS | RETBLEED | GDS | ITS),
+ VULNBL_INTEL_STEPPINGS(INTEL_TIGERLAKE_L, X86_STEPPING_ANY, GDS | ITS | ITS_NATIVE_ONLY),
+ VULNBL_INTEL_STEPPINGS(INTEL_TIGERLAKE, X86_STEPPING_ANY, GDS | ITS | ITS_NATIVE_ONLY),
VULNBL_INTEL_STEPPINGS(INTEL_LAKEFIELD, X86_STEPPING_ANY, MMIO | MMIO_SBDS | RETBLEED),
- VULNBL_INTEL_STEPPINGS(INTEL_ROCKETLAKE, X86_STEPPING_ANY, MMIO | RETBLEED | GDS),
+ VULNBL_INTEL_STEPPINGS(INTEL_ROCKETLAKE, X86_STEPPING_ANY, MMIO | RETBLEED | GDS | ITS | ITS_NATIVE_ONLY),
VULNBL_INTEL_STEPPINGS(INTEL_ALDERLAKE, X86_STEPPING_ANY, RFDS),
VULNBL_INTEL_STEPPINGS(INTEL_ALDERLAKE_L, X86_STEPPING_ANY, RFDS),
VULNBL_INTEL_STEPPINGS(INTEL_RAPTORLAKE, X86_STEPPING_ANY, RFDS),
@@ -1272,7 +1282,7 @@ static const struct x86_cpu_id cpu_vuln_blacklist[] __initconst = {
VULNBL_AMD(0x16, RETBLEED),
VULNBL_AMD(0x17, RETBLEED | SMT_RSB | SRSO),
VULNBL_HYGON(0x18, RETBLEED | SMT_RSB | SRSO),
- VULNBL_AMD(0x19, SRSO),
+ VULNBL_AMD(0x19, SRSO | TSA),
{}
};
@@ -1318,6 +1328,32 @@ static bool __init vulnerable_to_rfds(u64 x86_arch_cap_msr)
return cpu_matches(cpu_vuln_blacklist, RFDS);
}
+static bool __init vulnerable_to_its(u64 x86_arch_cap_msr)
+{
+ /* The "immunity" bit trumps everything else: */
+ if (x86_arch_cap_msr & ARCH_CAP_ITS_NO)
+ return false;
+ if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
+ return false;
+
+ /* None of the affected CPUs have BHI_CTRL */
+ if (boot_cpu_has(X86_FEATURE_BHI_CTRL))
+ return false;
+
+ /*
+ * If a VMM did not expose ITS_NO, assume that a guest could
+ * be running on a vulnerable hardware or may migrate to such
+ * hardware.
+ */
+ if (boot_cpu_has(X86_FEATURE_HYPERVISOR))
+ return true;
+
+ if (cpu_matches(cpu_vuln_blacklist, ITS))
+ return true;
+
+ return false;
+}
+
static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c)
{
u64 x86_arch_cap_msr = x86_read_arch_cap_msr();
@@ -1437,9 +1473,12 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c)
if (vulnerable_to_rfds(x86_arch_cap_msr))
setup_force_cpu_bug(X86_BUG_RFDS);
- /* When virtualized, eIBRS could be hidden, assume vulnerable */
- if (!(x86_arch_cap_msr & ARCH_CAP_BHI_NO) &&
- !cpu_matches(cpu_vuln_whitelist, NO_BHI) &&
+ /*
+ * Intel parts with eIBRS are vulnerable to BHI attacks. Parts with
+ * BHI_NO still need to use the BHI mitigation to prevent Intra-mode
+ * attacks. When virtualized, eIBRS could be hidden, assume vulnerable.
+ */
+ if (!cpu_matches(cpu_vuln_whitelist, NO_BHI) &&
(boot_cpu_has(X86_FEATURE_IBRS_ENHANCED) ||
boot_cpu_has(X86_FEATURE_HYPERVISOR)))
setup_force_cpu_bug(X86_BUG_BHI);
@@ -1447,6 +1486,22 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c)
if (cpu_has(c, X86_FEATURE_AMD_IBPB) && !cpu_has(c, X86_FEATURE_AMD_IBPB_RET))
setup_force_cpu_bug(X86_BUG_IBPB_NO_RET);
+ if (vulnerable_to_its(x86_arch_cap_msr)) {
+ setup_force_cpu_bug(X86_BUG_ITS);
+ if (cpu_matches(cpu_vuln_blacklist, ITS_NATIVE_ONLY))
+ setup_force_cpu_bug(X86_BUG_ITS_NATIVE_ONLY);
+ }
+
+ if (c->x86_vendor == X86_VENDOR_AMD) {
+ if (!cpu_has(c, X86_FEATURE_TSA_SQ_NO) ||
+ !cpu_has(c, X86_FEATURE_TSA_L1_NO)) {
+ if (cpu_matches(cpu_vuln_blacklist, TSA) ||
+ /* Enable bug on Zen guests to allow for live migration. */
+ (cpu_has(c, X86_FEATURE_HYPERVISOR) && cpu_has(c, X86_FEATURE_ZEN)))
+ setup_force_cpu_bug(X86_BUG_TSA);
+ }
+ }
+
if (cpu_matches(cpu_vuln_whitelist, NO_MELTDOWN))
return;
@@ -2102,20 +2157,16 @@ EXPORT_PER_CPU_SYMBOL(__stack_chk_guard);
#endif /* CONFIG_X86_64 */
-/*
- * Clear all 6 debug registers:
- */
-static void clear_all_debug_regs(void)
+static void initialize_debug_regs(void)
{
- int i;
-
- for (i = 0; i < 8; i++) {
- /* Ignore db4, db5 */
- if ((i == 4) || (i == 5))
- continue;
-
- set_debugreg(0, i);
- }
+ /* Control register first -- to make sure everything is disabled. */
+ set_debugreg(0, 7);
+ set_debugreg(DR6_RESERVED, 6);
+ /* dr5 and dr4 don't exist */
+ set_debugreg(0, 3);
+ set_debugreg(0, 2);
+ set_debugreg(0, 1);
+ set_debugreg(0, 0);
}
#ifdef CONFIG_KGDB
@@ -2276,7 +2327,7 @@ void cpu_init(void)
load_mm_ldt(&init_mm);
- clear_all_debug_regs();
+ initialize_debug_regs();
dbg_restore_debug_regs();
doublefault_init_cpu_tss();
diff --git a/arch/x86/kernel/cpu/microcode/amd.c b/arch/x86/kernel/cpu/microcode/amd.c
index 093d3ca43c46..765b4646648f 100644
--- a/arch/x86/kernel/cpu/microcode/amd.c
+++ b/arch/x86/kernel/cpu/microcode/amd.c
@@ -94,18 +94,6 @@ static struct equiv_cpu_table {
struct equiv_cpu_entry *entry;
} equiv_table;
-union zen_patch_rev {
- struct {
- __u32 rev : 8,
- stepping : 4,
- model : 4,
- __reserved : 4,
- ext_model : 4,
- ext_fam : 8;
- };
- __u32 ucode_rev;
-};
-
union cpuid_1_eax {
struct {
__u32 stepping : 4,
@@ -1099,15 +1087,17 @@ static enum ucode_state load_microcode_amd(u8 family, const u8 *data, size_t siz
static int __init save_microcode_in_initrd(void)
{
- unsigned int cpuid_1_eax = native_cpuid_eax(1);
struct cpuinfo_x86 *c = &boot_cpu_data;
struct cont_desc desc = { 0 };
+ unsigned int cpuid_1_eax;
enum ucode_state ret;
struct cpio_data cp;
- if (dis_ucode_ldr || c->x86_vendor != X86_VENDOR_AMD || c->x86 < 0x10)
+ if (microcode_loader_disabled() || c->x86_vendor != X86_VENDOR_AMD || c->x86 < 0x10)
return 0;
+ cpuid_1_eax = native_cpuid_eax(1);
+
if (!find_blobs_in_containers(&cp))
return -EINVAL;
diff --git a/arch/x86/kernel/cpu/microcode/amd_shas.c b/arch/x86/kernel/cpu/microcode/amd_shas.c
index 2a1655b1fdd8..1fd349cfc802 100644
--- a/arch/x86/kernel/cpu/microcode/amd_shas.c
+++ b/arch/x86/kernel/cpu/microcode/amd_shas.c
@@ -231,6 +231,13 @@ static const struct patch_digest phashes[] = {
0x0d,0x5b,0x65,0x34,0x69,0xb2,0x62,0x21,
}
},
+ { 0xa0011d7, {
+ 0x35,0x07,0xcd,0x40,0x94,0xbc,0x81,0x6b,
+ 0xfc,0x61,0x56,0x1a,0xe2,0xdb,0x96,0x12,
+ 0x1c,0x1c,0x31,0xb1,0x02,0x6f,0xe5,0xd2,
+ 0xfe,0x1b,0x04,0x03,0x2c,0x8f,0x4c,0x36,
+ }
+ },
{ 0xa001223, {
0xfb,0x32,0x5f,0xc6,0x83,0x4f,0x8c,0xb8,
0xa4,0x05,0xf9,0x71,0x53,0x01,0x16,0xc4,
@@ -294,6 +301,13 @@ static const struct patch_digest phashes[] = {
0xc0,0xcd,0x33,0xf2,0x8d,0xf9,0xef,0x59,
}
},
+ { 0xa00123b, {
+ 0xef,0xa1,0x1e,0x71,0xf1,0xc3,0x2c,0xe2,
+ 0xc3,0xef,0x69,0x41,0x7a,0x54,0xca,0xc3,
+ 0x8f,0x62,0x84,0xee,0xc2,0x39,0xd9,0x28,
+ 0x95,0xa7,0x12,0x49,0x1e,0x30,0x71,0x72,
+ }
+ },
{ 0xa00820c, {
0xa8,0x0c,0x81,0xc0,0xa6,0x00,0xe7,0xf3,
0x5f,0x65,0xd3,0xb9,0x6f,0xea,0x93,0x63,
@@ -301,6 +315,13 @@ static const struct patch_digest phashes[] = {
0xe1,0x3b,0x8d,0xb2,0xf8,0x22,0x03,0xe2,
}
},
+ { 0xa00820d, {
+ 0xf9,0x2a,0xc0,0xf4,0x9e,0xa4,0x87,0xa4,
+ 0x7d,0x87,0x00,0xfd,0xab,0xda,0x19,0xca,
+ 0x26,0x51,0x32,0xc1,0x57,0x91,0xdf,0xc1,
+ 0x05,0xeb,0x01,0x7c,0x5a,0x95,0x21,0xb7,
+ }
+ },
{ 0xa10113e, {
0x05,0x3c,0x66,0xd7,0xa9,0x5a,0x33,0x10,
0x1b,0xf8,0x9c,0x8f,0xed,0xfc,0xa7,0xa0,
@@ -322,6 +343,13 @@ static const struct patch_digest phashes[] = {
0xf1,0x5e,0xb0,0xde,0xb4,0x98,0xae,0xc4,
}
},
+ { 0xa10114c, {
+ 0x9e,0xb6,0xa2,0xd9,0x87,0x38,0xc5,0x64,
+ 0xd8,0x88,0xfa,0x78,0x98,0xf9,0x6f,0x74,
+ 0x39,0x90,0x1b,0xa5,0xcf,0x5e,0xb4,0x2a,
+ 0x02,0xff,0xd4,0x8c,0x71,0x8b,0xe2,0xc0,
+ }
+ },
{ 0xa10123e, {
0x03,0xb9,0x2c,0x76,0x48,0x93,0xc9,0x18,
0xfb,0x56,0xfd,0xf7,0xe2,0x1d,0xca,0x4d,
@@ -343,6 +371,13 @@ static const struct patch_digest phashes[] = {
0x1b,0x7d,0x64,0x9d,0x4b,0x53,0x13,0x75,
}
},
+ { 0xa10124c, {
+ 0x29,0xea,0xf1,0x2c,0xb2,0xe4,0xef,0x90,
+ 0xa4,0xcd,0x1d,0x86,0x97,0x17,0x61,0x46,
+ 0xfc,0x22,0xcb,0x57,0x75,0x19,0xc8,0xcc,
+ 0x0c,0xf5,0xbc,0xac,0x81,0x9d,0x9a,0xd2,
+ }
+ },
{ 0xa108108, {
0xed,0xc2,0xec,0xa1,0x15,0xc6,0x65,0xe9,
0xd0,0xef,0x39,0xaa,0x7f,0x55,0x06,0xc6,
@@ -350,6 +385,13 @@ static const struct patch_digest phashes[] = {
0x28,0x1e,0x9c,0x59,0x69,0x99,0x4d,0x16,
}
},
+ { 0xa108109, {
+ 0x85,0xb4,0xbd,0x7c,0x49,0xa7,0xbd,0xfa,
+ 0x49,0x36,0x80,0x81,0xc5,0xb7,0x39,0x1b,
+ 0x9a,0xaa,0x50,0xde,0x9b,0xe9,0x32,0x35,
+ 0x42,0x7e,0x51,0x4f,0x52,0x2c,0x28,0x59,
+ }
+ },
{ 0xa20102d, {
0xf9,0x6e,0xf2,0x32,0xd3,0x0f,0x5f,0x11,
0x59,0xa1,0xfe,0xcc,0xcd,0x9b,0x42,0x89,
@@ -357,6 +399,13 @@ static const struct patch_digest phashes[] = {
0x8c,0xe9,0x19,0x3e,0xcc,0x3f,0x7b,0xb4,
}
},
+ { 0xa20102e, {
+ 0xbe,0x1f,0x32,0x04,0x0d,0x3c,0x9c,0xdd,
+ 0xe1,0xa4,0xbf,0x76,0x3a,0xec,0xc2,0xf6,
+ 0x11,0x00,0xa7,0xaf,0x0f,0xe5,0x02,0xc5,
+ 0x54,0x3a,0x1f,0x8c,0x16,0xb5,0xff,0xbe,
+ }
+ },
{ 0xa201210, {
0xe8,0x6d,0x51,0x6a,0x8e,0x72,0xf3,0xfe,
0x6e,0x16,0xbc,0x62,0x59,0x40,0x17,0xe9,
@@ -364,6 +413,13 @@ static const struct patch_digest phashes[] = {
0xf7,0x55,0xf0,0x13,0xbb,0x22,0xf6,0x41,
}
},
+ { 0xa201211, {
+ 0x69,0xa1,0x17,0xec,0xd0,0xf6,0x6c,0x95,
+ 0xe2,0x1e,0xc5,0x59,0x1a,0x52,0x0a,0x27,
+ 0xc4,0xed,0xd5,0x59,0x1f,0xbf,0x00,0xff,
+ 0x08,0x88,0xb5,0xe1,0x12,0xb6,0xcc,0x27,
+ }
+ },
{ 0xa404107, {
0xbb,0x04,0x4e,0x47,0xdd,0x5e,0x26,0x45,
0x1a,0xc9,0x56,0x24,0xa4,0x4c,0x82,0xb0,
@@ -371,6 +427,13 @@ static const struct patch_digest phashes[] = {
0x13,0xbc,0xc5,0x25,0xe4,0xc5,0xc3,0x99,
}
},
+ { 0xa404108, {
+ 0x69,0x67,0x43,0x06,0xf8,0x0c,0x62,0xdc,
+ 0xa4,0x21,0x30,0x4f,0x0f,0x21,0x2c,0xcb,
+ 0xcc,0x37,0xf1,0x1c,0xc3,0xf8,0x2f,0x19,
+ 0xdf,0x53,0x53,0x46,0xb1,0x15,0xea,0x00,
+ }
+ },
{ 0xa500011, {
0x23,0x3d,0x70,0x7d,0x03,0xc3,0xc4,0xf4,
0x2b,0x82,0xc6,0x05,0xda,0x80,0x0a,0xf1,
@@ -378,6 +441,13 @@ static const struct patch_digest phashes[] = {
0x11,0x5e,0x96,0x7e,0x71,0xe9,0xfc,0x74,
}
},
+ { 0xa500012, {
+ 0xeb,0x74,0x0d,0x47,0xa1,0x8e,0x09,0xe4,
+ 0x93,0x4c,0xad,0x03,0x32,0x4c,0x38,0x16,
+ 0x10,0x39,0xdd,0x06,0xaa,0xce,0xd6,0x0f,
+ 0x62,0x83,0x9d,0x8e,0x64,0x55,0xbe,0x63,
+ }
+ },
{ 0xa601209, {
0x66,0x48,0xd4,0x09,0x05,0xcb,0x29,0x32,
0x66,0xb7,0x9a,0x76,0xcd,0x11,0xf3,0x30,
@@ -385,6 +455,13 @@ static const struct patch_digest phashes[] = {
0xe8,0x73,0xe2,0xd6,0xdb,0xd2,0x77,0x1d,
}
},
+ { 0xa60120a, {
+ 0x0c,0x8b,0x3d,0xfd,0x52,0x52,0x85,0x7d,
+ 0x20,0x3a,0xe1,0x7e,0xa4,0x21,0x3b,0x7b,
+ 0x17,0x86,0xae,0xac,0x13,0xb8,0x63,0x9d,
+ 0x06,0x01,0xd0,0xa0,0x51,0x9a,0x91,0x2c,
+ }
+ },
{ 0xa704107, {
0xf3,0xc6,0x58,0x26,0xee,0xac,0x3f,0xd6,
0xce,0xa1,0x72,0x47,0x3b,0xba,0x2b,0x93,
@@ -392,6 +469,13 @@ static const struct patch_digest phashes[] = {
0x64,0x39,0x71,0x8c,0xce,0xe7,0x41,0x39,
}
},
+ { 0xa704108, {
+ 0xd7,0x55,0x15,0x2b,0xfe,0xc4,0xbc,0x93,
+ 0xec,0x91,0xa0,0xae,0x45,0xb7,0xc3,0x98,
+ 0x4e,0xff,0x61,0x77,0x88,0xc2,0x70,0x49,
+ 0xe0,0x3a,0x1d,0x84,0x38,0x52,0xbf,0x5a,
+ }
+ },
{ 0xa705206, {
0x8d,0xc0,0x76,0xbd,0x58,0x9f,0x8f,0xa4,
0x12,0x9d,0x21,0xfb,0x48,0x21,0xbc,0xe7,
@@ -399,6 +483,13 @@ static const struct patch_digest phashes[] = {
0x03,0x35,0xe9,0xbe,0xfb,0x06,0xdf,0xfc,
}
},
+ { 0xa705208, {
+ 0x30,0x1d,0x55,0x24,0xbc,0x6b,0x5a,0x19,
+ 0x0c,0x7d,0x1d,0x74,0xaa,0xd1,0xeb,0xd2,
+ 0x16,0x62,0xf7,0x5b,0xe1,0x1f,0x18,0x11,
+ 0x5c,0xf0,0x94,0x90,0x26,0xec,0x69,0xff,
+ }
+ },
{ 0xa708007, {
0x6b,0x76,0xcc,0x78,0xc5,0x8a,0xa3,0xe3,
0x32,0x2d,0x79,0xe4,0xc3,0x80,0xdb,0xb2,
@@ -406,6 +497,13 @@ static const struct patch_digest phashes[] = {
0xdf,0x92,0x73,0x84,0x87,0x3c,0x73,0x93,
}
},
+ { 0xa708008, {
+ 0x08,0x6e,0xf0,0x22,0x4b,0x8e,0xc4,0x46,
+ 0x58,0x34,0xe6,0x47,0xa2,0x28,0xfd,0xab,
+ 0x22,0x3d,0xdd,0xd8,0x52,0x9e,0x1d,0x16,
+ 0xfa,0x01,0x68,0x14,0x79,0x3e,0xe8,0x6b,
+ }
+ },
{ 0xa70c005, {
0x88,0x5d,0xfb,0x79,0x64,0xd8,0x46,0x3b,
0x4a,0x83,0x8e,0x77,0x7e,0xcf,0xb3,0x0f,
@@ -413,6 +511,13 @@ static const struct patch_digest phashes[] = {
0xee,0x49,0xac,0xe1,0x8b,0x13,0xc5,0x13,
}
},
+ { 0xa70c008, {
+ 0x0f,0xdb,0x37,0xa1,0x10,0xaf,0xd4,0x21,
+ 0x94,0x0d,0xa4,0xa2,0xe9,0x86,0x6c,0x0e,
+ 0x85,0x7c,0x36,0x30,0xa3,0x3a,0x78,0x66,
+ 0x18,0x10,0x60,0x0d,0x78,0x3d,0x44,0xd0,
+ }
+ },
{ 0xaa00116, {
0xe8,0x4c,0x2c,0x88,0xa1,0xac,0x24,0x63,
0x65,0xe5,0xaa,0x2d,0x16,0xa9,0xc3,0xf5,
@@ -441,4 +546,11 @@ static const struct patch_digest phashes[] = {
0x68,0x2f,0x46,0xee,0xfe,0xc6,0x6d,0xef,
}
},
+ { 0xaa00216, {
+ 0x79,0xfb,0x5b,0x9f,0xb6,0xe6,0xa8,0xf5,
+ 0x4e,0x7c,0x4f,0x8e,0x1d,0xad,0xd0,0x08,
+ 0xc2,0x43,0x7c,0x8b,0xe6,0xdb,0xd0,0xd2,
+ 0xe8,0x39,0x26,0xc1,0xe5,0x5a,0x48,0xf1,
+ }
+ },
};
diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c
index b3658d11e7b6..e8021d3e5882 100644
--- a/arch/x86/kernel/cpu/microcode/core.c
+++ b/arch/x86/kernel/cpu/microcode/core.c
@@ -41,8 +41,8 @@
#include "internal.h"
-static struct microcode_ops *microcode_ops;
-bool dis_ucode_ldr = true;
+static struct microcode_ops *microcode_ops;
+static bool dis_ucode_ldr = false;
bool force_minrev = IS_ENABLED(CONFIG_MICROCODE_LATE_FORCE_MINREV);
module_param(force_minrev, bool, S_IRUSR | S_IWUSR);
@@ -84,6 +84,9 @@ static bool amd_check_current_patch_level(void)
u32 lvl, dummy, i;
u32 *levels;
+ if (x86_cpuid_vendor() != X86_VENDOR_AMD)
+ return false;
+
native_rdmsr(MSR_AMD64_PATCH_LEVEL, lvl, dummy);
levels = final_levels;
@@ -95,27 +98,29 @@ static bool amd_check_current_patch_level(void)
return false;
}
-static bool __init check_loader_disabled_bsp(void)
+bool __init microcode_loader_disabled(void)
{
- static const char *__dis_opt_str = "dis_ucode_ldr";
- const char *cmdline = boot_command_line;
- const char *option = __dis_opt_str;
+ if (dis_ucode_ldr)
+ return true;
/*
- * CPUID(1).ECX[31]: reserved for hypervisor use. This is still not
- * completely accurate as xen pv guests don't see that CPUID bit set but
- * that's good enough as they don't land on the BSP path anyway.
+ * Disable when:
+ *
+ * 1) The CPU does not support CPUID.
+ *
+ * 2) Bit 31 in CPUID[1]:ECX is clear
+ * The bit is reserved for hypervisor use. This is still not
+ * completely accurate as XEN PV guests don't see that CPUID bit
+ * set, but that's good enough as they don't land on the BSP
+ * path anyway.
+ *
+ * 3) Certain AMD patch levels are not allowed to be
+ * overwritten.
*/
- if (native_cpuid_ecx(1) & BIT(31))
- return true;
-
- if (x86_cpuid_vendor() == X86_VENDOR_AMD) {
- if (amd_check_current_patch_level())
- return true;
- }
-
- if (cmdline_find_option_bool(cmdline, option) <= 0)
- dis_ucode_ldr = false;
+ if (!have_cpuid_p() ||
+ native_cpuid_ecx(1) & BIT(31) ||
+ amd_check_current_patch_level())
+ dis_ucode_ldr = true;
return dis_ucode_ldr;
}
@@ -125,7 +130,10 @@ void __init load_ucode_bsp(void)
unsigned int cpuid_1_eax;
bool intel = true;
- if (!have_cpuid_p())
+ if (cmdline_find_option_bool(boot_command_line, "dis_ucode_ldr") > 0)
+ dis_ucode_ldr = true;
+
+ if (microcode_loader_disabled())
return;
cpuid_1_eax = native_cpuid_eax(1);
@@ -146,9 +154,6 @@ void __init load_ucode_bsp(void)
return;
}
- if (check_loader_disabled_bsp())
- return;
-
if (intel)
load_ucode_intel_bsp(&early_data);
else
@@ -159,6 +164,11 @@ void load_ucode_ap(void)
{
unsigned int cpuid_1_eax;
+ /*
+ * Can't use microcode_loader_disabled() here - .init section
+ * hell. It doesn't have to either - the BSP variant must've
+ * parsed cmdline already anyway.
+ */
if (dis_ucode_ldr)
return;
@@ -686,6 +696,8 @@ static int load_late_locked(void)
return load_late_stop_cpus(true);
case UCODE_NFOUND:
return -ENOENT;
+ case UCODE_OK:
+ return 0;
default:
return -EBADFD;
}
@@ -810,7 +822,7 @@ static int __init microcode_init(void)
struct cpuinfo_x86 *c = &boot_cpu_data;
int error;
- if (dis_ucode_ldr)
+ if (microcode_loader_disabled())
return -EINVAL;
if (c->x86_vendor == X86_VENDOR_INTEL)
diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c
index 815fa67356a2..362cc71bbc86 100644
--- a/arch/x86/kernel/cpu/microcode/intel.c
+++ b/arch/x86/kernel/cpu/microcode/intel.c
@@ -74,7 +74,7 @@ void intel_collect_cpu_info(struct cpu_signature *sig)
sig->pf = 0;
sig->rev = intel_get_microcode_revision();
- if (x86_model(sig->sig) >= 5 || x86_family(sig->sig) > 6) {
+ if (IFM(x86_family(sig->sig), x86_model(sig->sig)) >= INTEL_PENTIUM_III_DESCHUTES) {
unsigned int val[2];
/* get processor flags from MSR 0x17 */
@@ -395,7 +395,7 @@ static int __init save_builtin_microcode(void)
if (xchg(&ucode_patch_va, NULL) != UCODE_BSP_LOADED)
return 0;
- if (dis_ucode_ldr || boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
+ if (microcode_loader_disabled() || boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
return 0;
uci.mc = get_microcode_blob(&uci, true);
diff --git a/arch/x86/kernel/cpu/microcode/internal.h b/arch/x86/kernel/cpu/microcode/internal.h
index 5df621752fef..50a9702ae4e2 100644
--- a/arch/x86/kernel/cpu/microcode/internal.h
+++ b/arch/x86/kernel/cpu/microcode/internal.h
@@ -94,7 +94,6 @@ static inline unsigned int x86_cpuid_family(void)
return x86_family(eax);
}
-extern bool dis_ucode_ldr;
extern bool force_minrev;
#ifdef CONFIG_CPU_SUP_AMD
diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c
index 7b29ebda024f..1ececfce7a46 100644
--- a/arch/x86/kernel/cpu/mtrr/generic.c
+++ b/arch/x86/kernel/cpu/mtrr/generic.c
@@ -591,7 +591,7 @@ static void get_fixed_ranges(mtrr_type *frs)
void mtrr_save_fixed_ranges(void *info)
{
- if (boot_cpu_has(X86_FEATURE_MTRR))
+ if (mtrr_state.have_fixed)
get_fixed_ranges(mtrr_state.fixed_ranges);
}
diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c
index c84c30188fdf..bc4993aa41ed 100644
--- a/arch/x86/kernel/cpu/scattered.c
+++ b/arch/x86/kernel/cpu/scattered.c
@@ -49,6 +49,8 @@ static const struct cpuid_bit cpuid_bits[] = {
{ X86_FEATURE_MBA, CPUID_EBX, 6, 0x80000008, 0 },
{ X86_FEATURE_SMBA, CPUID_EBX, 2, 0x80000020, 0 },
{ X86_FEATURE_BMEC, CPUID_EBX, 3, 0x80000020, 0 },
+ { X86_FEATURE_TSA_SQ_NO, CPUID_ECX, 1, 0x80000021, 0 },
+ { X86_FEATURE_TSA_L1_NO, CPUID_ECX, 2, 0x80000021, 0 },
{ X86_FEATURE_PERFMON_V2, CPUID_EAX, 0, 0x80000022, 0 },
{ X86_FEATURE_AMD_LBR_V2, CPUID_EAX, 1, 0x80000022, 0 },
{ X86_FEATURE_AMD_LBR_PMC_FREEZE, CPUID_EAX, 2, 0x80000022, 0 },
diff --git a/arch/x86/kernel/cpu/sgx/main.c b/arch/x86/kernel/cpu/sgx/main.c
index 9ace84486499..147ea26dfdad 100644
--- a/arch/x86/kernel/cpu/sgx/main.c
+++ b/arch/x86/kernel/cpu/sgx/main.c
@@ -719,6 +719,8 @@ int arch_memory_failure(unsigned long pfn, int flags)
goto out;
}
+ sgx_unmark_page_reclaimable(page);
+
/*
* TBD: Add additional plumbing to enable pre-emptive
* action for asynchronous poison notification. Until
diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c
index 8f62e0666dea..8abe60919e2f 100644
--- a/arch/x86/kernel/fpu/signal.c
+++ b/arch/x86/kernel/fpu/signal.c
@@ -119,7 +119,6 @@ static inline bool save_xstate_epilog(void __user *buf, int ia32_frame,
{
struct xregs_state __user *x = buf;
struct _fpx_sw_bytes sw_bytes = {};
- u32 xfeatures;
int err;
/* Setup the bytes not touched by the [f]xsave and reserved for SW. */
@@ -133,12 +132,6 @@ static inline bool save_xstate_epilog(void __user *buf, int ia32_frame,
(__u32 __user *)(buf + fpstate->user_size));
/*
- * Read the xfeatures which we copied (directly from the cpu or
- * from the state in task struct) to the user buffers.
- */
- err |= __get_user(xfeatures, (__u32 __user *)&x->header.xfeatures);
-
- /*
* For legacy compatible, we always set FP/SSE bits in the bit
* vector while saving the state to the user context. This will
* enable us capturing any changes(during sigreturn) to
@@ -149,9 +142,7 @@ static inline bool save_xstate_epilog(void __user *buf, int ia32_frame,
* header as well as change any contents in the memory layout.
* xrestore as part of sigreturn will capture all the changes.
*/
- xfeatures |= XFEATURE_MASK_FPSSE;
-
- err |= __put_user(xfeatures, (__u32 __user *)&x->header.xfeatures);
+ err |= set_xfeature_in_sigframe(x, XFEATURE_MASK_FPSSE);
return !err;
}
diff --git a/arch/x86/kernel/fpu/xstate.h b/arch/x86/kernel/fpu/xstate.h
index aa16f1a1bbcf..f7d8f3d73599 100644
--- a/arch/x86/kernel/fpu/xstate.h
+++ b/arch/x86/kernel/fpu/xstate.h
@@ -69,21 +69,31 @@ static inline u64 xfeatures_mask_independent(void)
return fpu_kernel_cfg.independent_features;
}
+static inline int set_xfeature_in_sigframe(struct xregs_state __user *xbuf, u64 mask)
+{
+ u64 xfeatures;
+ int err;
+
+ /* Read the xfeatures value already saved in the user buffer */
+ err = __get_user(xfeatures, &xbuf->header.xfeatures);
+ xfeatures |= mask;
+ err |= __put_user(xfeatures, &xbuf->header.xfeatures);
+
+ return err;
+}
+
/*
* Update the value of PKRU register that was already pushed onto the signal frame.
*/
-static inline int update_pkru_in_sigframe(struct xregs_state __user *buf, u64 mask, u32 pkru)
+static inline int update_pkru_in_sigframe(struct xregs_state __user *buf, u32 pkru)
{
- u64 xstate_bv;
int err;
if (unlikely(!cpu_feature_enabled(X86_FEATURE_OSPKE)))
return 0;
/* Mark PKRU as in-use so that it is restored correctly. */
- xstate_bv = (mask & xfeatures_in_use()) | XFEATURE_MASK_PKRU;
-
- err = __put_user(xstate_bv, &buf->header.xfeatures);
+ err = set_xfeature_in_sigframe(buf, XFEATURE_MASK_PKRU);
if (err)
return err;
@@ -304,7 +314,7 @@ static inline int xsave_to_user_sigframe(struct xregs_state __user *buf, u32 pkr
clac();
if (!err)
- err = update_pkru_in_sigframe(buf, mask, pkru);
+ err = update_pkru_in_sigframe(buf, pkru);
return err;
}
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index 8da0e66ca22d..bfab966ea56e 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -354,7 +354,7 @@ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size)
goto fail;
ip = trampoline + size;
- if (cpu_feature_enabled(X86_FEATURE_RETHUNK))
+ if (cpu_wants_rethunk_at(ip))
__text_gen_insn(ip, JMP32_INSN_OPCODE, ip, x86_return_thunk, JMP32_INSN_SIZE);
else
memcpy(ip, retq, sizeof(retq));
diff --git a/arch/x86/kernel/head32.c b/arch/x86/kernel/head32.c
index de001b2146ab..375f2d7f1762 100644
--- a/arch/x86/kernel/head32.c
+++ b/arch/x86/kernel/head32.c
@@ -145,10 +145,6 @@ void __init __no_stack_protector mk_early_pgtbl_32(void)
*ptr = (unsigned long)ptep + PAGE_OFFSET;
#ifdef CONFIG_MICROCODE_INITRD32
- /* Running on a hypervisor? */
- if (native_cpuid_ecx(1) & BIT(31))
- return;
-
params = (struct boot_params *)__pa_nodebug(&boot_params);
if (!params->hdr.ramdisk_size || !params->hdr.ramdisk_image)
return;
diff --git a/arch/x86/kernel/i8253.c b/arch/x86/kernel/i8253.c
index 80e262bb627f..cb9852ad6098 100644
--- a/arch/x86/kernel/i8253.c
+++ b/arch/x86/kernel/i8253.c
@@ -46,7 +46,8 @@ bool __init pit_timer_init(void)
* VMMs otherwise steal CPU time just to pointlessly waggle
* the (masked) IRQ.
*/
- clockevent_i8253_disable();
+ scoped_guard(irq)
+ clockevent_i8253_disable();
return false;
}
clockevent_i8253_init(true);
diff --git a/arch/x86/kernel/ioport.c b/arch/x86/kernel/ioport.c
index e2fab3ceb09f..9a101150376d 100644
--- a/arch/x86/kernel/ioport.c
+++ b/arch/x86/kernel/ioport.c
@@ -33,8 +33,9 @@ void io_bitmap_share(struct task_struct *tsk)
set_tsk_thread_flag(tsk, TIF_IO_BITMAP);
}
-static void task_update_io_bitmap(struct task_struct *tsk)
+static void task_update_io_bitmap(void)
{
+ struct task_struct *tsk = current;
struct thread_struct *t = &tsk->thread;
if (t->iopl_emul == 3 || t->io_bitmap) {
@@ -54,7 +55,12 @@ void io_bitmap_exit(struct task_struct *tsk)
struct io_bitmap *iobm = tsk->thread.io_bitmap;
tsk->thread.io_bitmap = NULL;
- task_update_io_bitmap(tsk);
+ /*
+ * Don't touch the TSS when invoked on a failed fork(). TSS
+ * reflects the state of @current and not the state of @tsk.
+ */
+ if (tsk == current)
+ task_update_io_bitmap();
if (iobm && refcount_dec_and_test(&iobm->refcnt))
kfree(iobm);
}
@@ -192,8 +198,7 @@ SYSCALL_DEFINE1(iopl, unsigned int, level)
}
t->iopl_emul = level;
- task_update_io_bitmap(current);
-
+ task_update_io_bitmap();
return 0;
}
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index feca4f20b06a..85fa2db38dc4 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -414,7 +414,7 @@ static __always_inline bool handle_pending_pir(u64 *pir, struct pt_regs *regs)
bool handled = false;
for (i = 0; i < 4; i++)
- pir_copy[i] = pir[i];
+ pir_copy[i] = READ_ONCE(pir[i]);
for (i = 0; i < 4; i++) {
if (!pir_copy[i])
diff --git a/arch/x86/kernel/module.c b/arch/x86/kernel/module.c
index 837450b6e882..1e231dac61e3 100644
--- a/arch/x86/kernel/module.c
+++ b/arch/x86/kernel/module.c
@@ -251,6 +251,8 @@ int module_finalize(const Elf_Ehdr *hdr,
ibt_endbr = s;
}
+ its_init_mod(me);
+
if (retpolines || cfi) {
void *rseg = NULL, *cseg = NULL;
unsigned int rsize = 0, csize = 0;
@@ -271,6 +273,9 @@ int module_finalize(const Elf_Ehdr *hdr,
void *rseg = (void *)retpolines->sh_addr;
apply_retpolines(rseg, rseg + retpolines->sh_size);
}
+
+ its_fini_mod(me);
+
if (returns) {
void *rseg = (void *)returns->sh_addr;
apply_returns(rseg, rseg + returns->sh_size);
@@ -318,4 +323,5 @@ int module_finalize(const Elf_Ehdr *hdr,
void module_arch_cleanup(struct module *mod)
{
alternatives_smp_module_del(mod);
+ its_free_mod(mod);
}
diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c
index ed163c8c8604..9a95d00f1423 100644
--- a/arch/x86/kernel/nmi.c
+++ b/arch/x86/kernel/nmi.c
@@ -40,8 +40,12 @@
#define CREATE_TRACE_POINTS
#include <trace/events/nmi.h>
+/*
+ * An emergency handler can be set in any context including NMI
+ */
struct nmi_desc {
raw_spinlock_t lock;
+ nmi_handler_t emerg_handler;
struct list_head head;
};
@@ -132,9 +136,22 @@ static void nmi_check_duration(struct nmiaction *action, u64 duration)
static int nmi_handle(unsigned int type, struct pt_regs *regs)
{
struct nmi_desc *desc = nmi_to_desc(type);
+ nmi_handler_t ehandler;
struct nmiaction *a;
int handled=0;
+ /*
+ * Call the emergency handler, if set
+ *
+ * In the case of crash_nmi_callback() emergency handler, it will
+ * return in the case of the crashing CPU to enable it to complete
+ * other necessary crashing actions ASAP. Other handlers in the
+ * linked list won't need to be run.
+ */
+ ehandler = desc->emerg_handler;
+ if (ehandler)
+ return ehandler(type, regs);
+
rcu_read_lock();
/*
@@ -224,6 +241,31 @@ void unregister_nmi_handler(unsigned int type, const char *name)
}
EXPORT_SYMBOL_GPL(unregister_nmi_handler);
+/**
+ * set_emergency_nmi_handler - Set emergency handler
+ * @type: NMI type
+ * @handler: the emergency handler to be stored
+ *
+ * Set an emergency NMI handler which, if set, will preempt all the other
+ * handlers in the linked list. If a NULL handler is passed in, it will clear
+ * it. It is expected that concurrent calls to this function will not happen
+ * or the system is screwed beyond repair.
+ */
+void set_emergency_nmi_handler(unsigned int type, nmi_handler_t handler)
+{
+ struct nmi_desc *desc = nmi_to_desc(type);
+
+ if (WARN_ON_ONCE(desc->emerg_handler == handler))
+ return;
+ desc->emerg_handler = handler;
+
+ /*
+ * Ensure the emergency handler is visible to other CPUs before
+ * function return
+ */
+ smp_wmb();
+}
+
static void
pci_serr_error(unsigned char reason, struct pt_regs *regs)
{
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index c7ce3655b707..4c9c98c5deab 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -180,6 +180,7 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args)
frame->ret_addr = (unsigned long) ret_from_fork_asm;
p->thread.sp = (unsigned long) fork_frame;
p->thread.io_bitmap = NULL;
+ clear_tsk_thread_flag(p, TIF_IO_BITMAP);
p->thread.iopl_warn = 0;
memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps));
@@ -468,6 +469,11 @@ void native_tss_update_io_bitmap(void)
} else {
struct io_bitmap *iobm = t->io_bitmap;
+ if (WARN_ON_ONCE(!iobm)) {
+ clear_thread_flag(TIF_IO_BITMAP);
+ native_tss_invalidate_io_bitmap();
+ }
+
/*
* Only copy bitmap data when the sequence number differs. The
* update time is accounted to the incoming task.
@@ -905,19 +911,24 @@ static __init bool prefer_mwait_c1_over_halt(void)
*/
static __cpuidle void mwait_idle(void)
{
+ if (need_resched())
+ return;
+
+ x86_idle_clear_cpu_buffers();
+
if (!current_set_polling_and_test()) {
- if (this_cpu_has(X86_BUG_CLFLUSH_MONITOR)) {
- mb(); /* quirk */
- clflush((void *)&current_thread_info()->flags);
- mb(); /* quirk */
- }
+ const void *addr = &current_thread_info()->flags;
- __monitor((void *)&current_thread_info()->flags, 0, 0);
- if (!need_resched()) {
- __sti_mwait(0, 0);
- raw_local_irq_disable();
- }
+ alternative_input("", "clflush (%[addr])", X86_BUG_CLFLUSH_MONITOR, [addr] "a" (addr));
+ __monitor(addr, 0, 0);
+ if (need_resched())
+ goto out;
+
+ __sti_mwait(0, 0);
+ raw_local_irq_disable();
}
+
+out:
__current_clr_polling();
}
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index dc1dd3f3e67f..9aaac1f9f45b 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -926,15 +926,11 @@ void nmi_shootdown_cpus(nmi_shootdown_cb callback)
shootdown_callback = callback;
atomic_set(&waiting_for_crash_ipi, num_online_cpus() - 1);
- /* Would it be better to replace the trap vector here? */
- if (register_nmi_handler(NMI_LOCAL, crash_nmi_callback,
- NMI_FLAG_FIRST, "crash"))
- return; /* Return what? */
+
/*
- * Ensure the new callback function is set before sending
- * out the NMI
+ * Set emergency handler to preempt other handlers.
*/
- wmb();
+ set_emergency_nmi_handler(NMI_LOCAL, crash_nmi_callback);
apic_send_IPI_allbutself(NMI_VECTOR);
diff --git a/arch/x86/kernel/signal_32.c b/arch/x86/kernel/signal_32.c
index 98123ff10506..42bbc42bd350 100644
--- a/arch/x86/kernel/signal_32.c
+++ b/arch/x86/kernel/signal_32.c
@@ -152,6 +152,8 @@ SYSCALL32_DEFINE0(sigreturn)
struct sigframe_ia32 __user *frame = (struct sigframe_ia32 __user *)(regs->sp-8);
sigset_t set;
+ prevent_single_step_upon_eretu(regs);
+
if (!access_ok(frame, sizeof(*frame)))
goto badframe;
if (__get_user(set.sig[0], &frame->sc.oldmask)
@@ -175,6 +177,8 @@ SYSCALL32_DEFINE0(rt_sigreturn)
struct rt_sigframe_ia32 __user *frame;
sigset_t set;
+ prevent_single_step_upon_eretu(regs);
+
frame = (struct rt_sigframe_ia32 __user *)(regs->sp - 4);
if (!access_ok(frame, sizeof(*frame)))
diff --git a/arch/x86/kernel/signal_64.c b/arch/x86/kernel/signal_64.c
index ee9453891901..d483b585c6c6 100644
--- a/arch/x86/kernel/signal_64.c
+++ b/arch/x86/kernel/signal_64.c
@@ -250,6 +250,8 @@ SYSCALL_DEFINE0(rt_sigreturn)
sigset_t set;
unsigned long uc_flags;
+ prevent_single_step_upon_eretu(regs);
+
frame = (struct rt_sigframe __user *)(regs->sp - sizeof(long));
if (!access_ok(frame, sizeof(*frame)))
goto badframe;
@@ -366,6 +368,8 @@ COMPAT_SYSCALL_DEFINE0(x32_rt_sigreturn)
sigset_t set;
unsigned long uc_flags;
+ prevent_single_step_upon_eretu(regs);
+
frame = (struct rt_sigframe_x32 __user *)(regs->sp - 8);
if (!access_ok(frame, sizeof(*frame)))
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index f1fac08fdef2..2c451de702c8 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -681,9 +681,9 @@ static void __init smp_quirk_init_udelay(void)
return;
/* if modern processor, use no delay */
- if (((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) && (boot_cpu_data.x86 == 6)) ||
- ((boot_cpu_data.x86_vendor == X86_VENDOR_HYGON) && (boot_cpu_data.x86 >= 0x18)) ||
- ((boot_cpu_data.x86_vendor == X86_VENDOR_AMD) && (boot_cpu_data.x86 >= 0xF))) {
+ if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL && boot_cpu_data.x86_vfm >= INTEL_PENTIUM_PRO) ||
+ (boot_cpu_data.x86_vendor == X86_VENDOR_HYGON && boot_cpu_data.x86 >= 0x18) ||
+ (boot_cpu_data.x86_vendor == X86_VENDOR_AMD && boot_cpu_data.x86 >= 0xF)) {
init_udelay = 0;
return;
}
diff --git a/arch/x86/kernel/static_call.c b/arch/x86/kernel/static_call.c
index 9e51242ed125..aae909d4ed78 100644
--- a/arch/x86/kernel/static_call.c
+++ b/arch/x86/kernel/static_call.c
@@ -81,7 +81,7 @@ static void __ref __static_call_transform(void *insn, enum insn_type type,
break;
case RET:
- if (cpu_feature_enabled(X86_FEATURE_RETHUNK))
+ if (cpu_wants_rethunk_at(insn))
code = text_gen_insn(JMP32_INSN_OPCODE, insn, x86_return_thunk);
else
code = &retinsn;
@@ -90,7 +90,7 @@ static void __ref __static_call_transform(void *insn, enum insn_type type,
case JCC:
if (!func) {
func = __static_call_return;
- if (cpu_feature_enabled(X86_FEATURE_RETHUNK))
+ if (cpu_wants_rethunk())
func = x86_return_thunk;
}
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 5e3e036e6e53..243f3bc9b4dc 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -94,10 +94,17 @@ __always_inline int is_valid_bugaddr(unsigned long addr)
/*
* Check for UD1 or UD2, accounting for Address Size Override Prefixes.
- * If it's a UD1, get the ModRM byte to pass along to UBSan.
+ * If it's a UD1, further decode to determine its use:
+ *
+ * UBSan{0}: 67 0f b9 00 ud1 (%eax),%eax
+ * UBSan{10}: 67 0f b9 40 10 ud1 0x10(%eax),%eax
+ * static_call: 0f b9 cc ud1 %esp,%ecx
+ *
+ * Notably UBSAN uses EAX, static_call uses ECX.
*/
-__always_inline int decode_bug(unsigned long addr, u32 *imm)
+__always_inline int decode_bug(unsigned long addr, s32 *imm, int *len)
{
+ unsigned long start = addr;
u8 v;
if (addr < TASK_SIZE_MAX)
@@ -110,24 +117,42 @@ __always_inline int decode_bug(unsigned long addr, u32 *imm)
return BUG_NONE;
v = *(u8 *)(addr++);
- if (v == SECOND_BYTE_OPCODE_UD2)
+ if (v == SECOND_BYTE_OPCODE_UD2) {
+ *len = addr - start;
return BUG_UD2;
+ }
- if (!IS_ENABLED(CONFIG_UBSAN_TRAP) || v != SECOND_BYTE_OPCODE_UD1)
+ if (v != SECOND_BYTE_OPCODE_UD1)
return BUG_NONE;
- /* Retrieve the immediate (type value) for the UBSAN UD1 */
- v = *(u8 *)(addr++);
- if (X86_MODRM_RM(v) == 4)
- addr++;
-
*imm = 0;
- if (X86_MODRM_MOD(v) == 1)
- *imm = *(u8 *)addr;
- else if (X86_MODRM_MOD(v) == 2)
- *imm = *(u32 *)addr;
- else
- WARN_ONCE(1, "Unexpected MODRM_MOD: %u\n", X86_MODRM_MOD(v));
+ v = *(u8 *)(addr++); /* ModRM */
+
+ if (X86_MODRM_MOD(v) != 3 && X86_MODRM_RM(v) == 4)
+ addr++; /* SIB */
+
+ /* Decode immediate, if present */
+ switch (X86_MODRM_MOD(v)) {
+ case 0: if (X86_MODRM_RM(v) == 5)
+ addr += 4; /* RIP + disp32 */
+ break;
+
+ case 1: *imm = *(s8 *)addr;
+ addr += 1;
+ break;
+
+ case 2: *imm = *(s32 *)addr;
+ addr += 4;
+ break;
+
+ case 3: break;
+ }
+
+ /* record instruction length */
+ *len = addr - start;
+
+ if (X86_MODRM_REG(v) == 0) /* EAX */
+ return BUG_UD1_UBSAN;
return BUG_UD1;
}
@@ -258,10 +283,10 @@ static inline void handle_invalid_op(struct pt_regs *regs)
static noinstr bool handle_bug(struct pt_regs *regs)
{
bool handled = false;
- int ud_type;
- u32 imm;
+ int ud_type, ud_len;
+ s32 ud_imm;
- ud_type = decode_bug(regs->ip, &imm);
+ ud_type = decode_bug(regs->ip, &ud_imm, &ud_len);
if (ud_type == BUG_NONE)
return handled;
@@ -281,15 +306,28 @@ static noinstr bool handle_bug(struct pt_regs *regs)
*/
if (regs->flags & X86_EFLAGS_IF)
raw_local_irq_enable();
- if (ud_type == BUG_UD2) {
+
+ switch (ud_type) {
+ case BUG_UD2:
if (report_bug(regs->ip, regs) == BUG_TRAP_TYPE_WARN ||
handle_cfi_failure(regs) == BUG_TRAP_TYPE_WARN) {
- regs->ip += LEN_UD2;
+ regs->ip += ud_len;
handled = true;
}
- } else if (IS_ENABLED(CONFIG_UBSAN_TRAP)) {
- pr_crit("%s at %pS\n", report_ubsan_failure(regs, imm), (void *)regs->ip);
+ break;
+
+ case BUG_UD1_UBSAN:
+ if (IS_ENABLED(CONFIG_UBSAN_TRAP)) {
+ pr_crit("%s at %pS\n",
+ report_ubsan_failure(regs, ud_imm),
+ (void *)regs->ip);
+ }
+ break;
+
+ default:
+ break;
}
+
if (regs->flags & X86_EFLAGS_IF)
raw_local_irq_disable();
instrumentation_end();
@@ -939,24 +977,32 @@ static bool is_sysenter_singlestep(struct pt_regs *regs)
#endif
}
-static __always_inline unsigned long debug_read_clear_dr6(void)
+static __always_inline unsigned long debug_read_reset_dr6(void)
{
unsigned long dr6;
+ get_debugreg(dr6, 6);
+ dr6 ^= DR6_RESERVED; /* Flip to positive polarity */
+
/*
* The Intel SDM says:
*
- * Certain debug exceptions may clear bits 0-3. The remaining
- * contents of the DR6 register are never cleared by the
- * processor. To avoid confusion in identifying debug
- * exceptions, debug handlers should clear the register before
- * returning to the interrupted task.
+ * Certain debug exceptions may clear bits 0-3 of DR6.
+ *
+ * BLD induced #DB clears DR6.BLD and any other debug
+ * exception doesn't modify DR6.BLD.
*
- * Keep it simple: clear DR6 immediately.
+ * RTM induced #DB clears DR6.RTM and any other debug
+ * exception sets DR6.RTM.
+ *
+ * To avoid confusion in identifying debug exceptions,
+ * debug handlers should set DR6.BLD and DR6.RTM, and
+ * clear other DR6 bits before returning.
+ *
+ * Keep it simple: write DR6 with its architectural reset
+ * value 0xFFFF0FF0, defined as DR6_RESERVED, immediately.
*/
- get_debugreg(dr6, 6);
set_debugreg(DR6_RESERVED, 6);
- dr6 ^= DR6_RESERVED; /* Flip to positive polarity */
return dr6;
}
@@ -1156,13 +1202,13 @@ out:
/* IST stack entry */
DEFINE_IDTENTRY_DEBUG(exc_debug)
{
- exc_debug_kernel(regs, debug_read_clear_dr6());
+ exc_debug_kernel(regs, debug_read_reset_dr6());
}
/* User entry, runs on regular task stack */
DEFINE_IDTENTRY_DEBUG_USER(exc_debug)
{
- exc_debug_user(regs, debug_read_clear_dr6());
+ exc_debug_user(regs, debug_read_reset_dr6());
}
#ifdef CONFIG_X86_FRED
@@ -1181,7 +1227,7 @@ DEFINE_FREDENTRY_DEBUG(exc_debug)
{
/*
* FRED #DB stores DR6 on the stack in the format which
- * debug_read_clear_dr6() returns for the IDT entry points.
+ * debug_read_reset_dr6() returns for the IDT entry points.
*/
unsigned long dr6 = fred_event_data(regs);
@@ -1196,7 +1242,7 @@ DEFINE_FREDENTRY_DEBUG(exc_debug)
/* 32 bit does not have separate entry points. */
DEFINE_IDTENTRY_RAW(exc_debug)
{
- unsigned long dr6 = debug_read_clear_dr6();
+ unsigned long dr6 = debug_read_reset_dr6();
if (user_mode(regs))
exc_debug_user(regs, dr6);
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index feb8102a9ca7..e2567c8f6bac 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -530,4 +530,14 @@ INIT_PER_CPU(irq_stack_backing_store);
"SRSO function pair won't alias");
#endif
+#if defined(CONFIG_MITIGATION_ITS) && !defined(CONFIG_DEBUG_FORCE_FUNCTION_ALIGN_64B)
+. = ASSERT(__x86_indirect_its_thunk_rax & 0x20, "__x86_indirect_thunk_rax not in second half of cacheline");
+. = ASSERT(((__x86_indirect_its_thunk_rcx - __x86_indirect_its_thunk_rax) % 64) == 0, "Indirect thunks are not cacheline apart");
+. = ASSERT(__x86_indirect_its_thunk_array == __x86_indirect_its_thunk_rax, "Gap in ITS thunk array");
+#endif
+
+#if defined(CONFIG_MITIGATION_ITS) && !defined(CONFIG_DEBUG_FORCE_FUNCTION_ALIGN_64B)
+. = ASSERT(its_return_thunk & 0x20, "its_return_thunk not in second half of cacheline");
+#endif
+
#endif /* CONFIG_X86_64 */
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index c92e43f2d0c4..02196db26a08 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -814,6 +814,7 @@ void kvm_set_cpu_caps(void)
kvm_cpu_cap_mask(CPUID_8000_0021_EAX,
F(NO_NESTED_DATA_BP) | F(LFENCE_RDTSC) | 0 /* SmmPgCfgLock */ |
+ F(VERW_CLEAR) |
F(NULL_SEL_CLR_BASE) | F(AUTOIBRS) | 0 /* PrefetchCtlMsr */ |
F(WRMSR_XX_BASE_NS)
);
@@ -826,6 +827,10 @@ void kvm_set_cpu_caps(void)
F(PERFMON_V2)
);
+ kvm_cpu_cap_init_kvm_defined(CPUID_8000_0021_ECX,
+ F(TSA_SQ_NO) | F(TSA_L1_NO)
+ );
+
/*
* Synthesize "LFENCE is serializing" into the AMD-defined entry in
* KVM's supported CPUID if the feature is reported as supported by the
@@ -1376,8 +1381,9 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
entry->eax = entry->ebx = entry->ecx = entry->edx = 0;
break;
case 0x80000021:
- entry->ebx = entry->ecx = entry->edx = 0;
+ entry->ebx = entry->edx = 0;
cpuid_entry_override(entry, CPUID_8000_0021_EAX);
+ cpuid_entry_override(entry, CPUID_8000_0021_ECX);
break;
/* AMD Extended Performance Monitoring and Debug */
case 0x80000022: {
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 9242c0649adf..4607610ef062 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -7616,9 +7616,30 @@ void kvm_mmu_pre_destroy_vm(struct kvm *kvm)
}
#ifdef CONFIG_KVM_GENERIC_MEMORY_ATTRIBUTES
+static bool hugepage_test_mixed(struct kvm_memory_slot *slot, gfn_t gfn,
+ int level)
+{
+ return lpage_info_slot(gfn, slot, level)->disallow_lpage & KVM_LPAGE_MIXED_FLAG;
+}
+
+static void hugepage_clear_mixed(struct kvm_memory_slot *slot, gfn_t gfn,
+ int level)
+{
+ lpage_info_slot(gfn, slot, level)->disallow_lpage &= ~KVM_LPAGE_MIXED_FLAG;
+}
+
+static void hugepage_set_mixed(struct kvm_memory_slot *slot, gfn_t gfn,
+ int level)
+{
+ lpage_info_slot(gfn, slot, level)->disallow_lpage |= KVM_LPAGE_MIXED_FLAG;
+}
+
bool kvm_arch_pre_set_memory_attributes(struct kvm *kvm,
struct kvm_gfn_range *range)
{
+ struct kvm_memory_slot *slot = range->slot;
+ int level;
+
/*
* Zap SPTEs even if the slot can't be mapped PRIVATE. KVM x86 only
* supports KVM_MEMORY_ATTRIBUTE_PRIVATE, and so it *seems* like KVM
@@ -7633,27 +7654,49 @@ bool kvm_arch_pre_set_memory_attributes(struct kvm *kvm,
if (WARN_ON_ONCE(!kvm_arch_has_private_mem(kvm)))
return false;
- return kvm_unmap_gfn_range(kvm, range);
-}
+ if (WARN_ON_ONCE(range->end <= range->start))
+ return false;
-static bool hugepage_test_mixed(struct kvm_memory_slot *slot, gfn_t gfn,
- int level)
-{
- return lpage_info_slot(gfn, slot, level)->disallow_lpage & KVM_LPAGE_MIXED_FLAG;
-}
+ /*
+ * If the head and tail pages of the range currently allow a hugepage,
+ * i.e. reside fully in the slot and don't have mixed attributes, then
+ * add each corresponding hugepage range to the ongoing invalidation,
+ * e.g. to prevent KVM from creating a hugepage in response to a fault
+ * for a gfn whose attributes aren't changing. Note, only the range
+ * of gfns whose attributes are being modified needs to be explicitly
+ * unmapped, as that will unmap any existing hugepages.
+ */
+ for (level = PG_LEVEL_2M; level <= KVM_MAX_HUGEPAGE_LEVEL; level++) {
+ gfn_t start = gfn_round_for_level(range->start, level);
+ gfn_t end = gfn_round_for_level(range->end - 1, level);
+ gfn_t nr_pages = KVM_PAGES_PER_HPAGE(level);
-static void hugepage_clear_mixed(struct kvm_memory_slot *slot, gfn_t gfn,
- int level)
-{
- lpage_info_slot(gfn, slot, level)->disallow_lpage &= ~KVM_LPAGE_MIXED_FLAG;
-}
+ if ((start != range->start || start + nr_pages > range->end) &&
+ start >= slot->base_gfn &&
+ start + nr_pages <= slot->base_gfn + slot->npages &&
+ !hugepage_test_mixed(slot, start, level))
+ kvm_mmu_invalidate_range_add(kvm, start, start + nr_pages);
-static void hugepage_set_mixed(struct kvm_memory_slot *slot, gfn_t gfn,
- int level)
-{
- lpage_info_slot(gfn, slot, level)->disallow_lpage |= KVM_LPAGE_MIXED_FLAG;
+ if (end == start)
+ continue;
+
+ if ((end + nr_pages) > range->end &&
+ (end + nr_pages) <= (slot->base_gfn + slot->npages) &&
+ !hugepage_test_mixed(slot, end, level))
+ kvm_mmu_invalidate_range_add(kvm, end, end + nr_pages);
+ }
+
+ /* Unmap the old attribute page. */
+ if (range->arg.attributes & KVM_MEMORY_ATTRIBUTE_PRIVATE)
+ range->attr_filter = KVM_FILTER_SHARED;
+ else
+ range->attr_filter = KVM_FILTER_PRIVATE;
+
+ return kvm_unmap_gfn_range(kvm, range);
}
+
+
static bool hugepage_has_attrs(struct kvm *kvm, struct kvm_memory_slot *slot,
gfn_t gfn, int level, unsigned long attrs)
{
diff --git a/arch/x86/kvm/reverse_cpuid.h b/arch/x86/kvm/reverse_cpuid.h
index 0d17d6b70639..0ea847b82354 100644
--- a/arch/x86/kvm/reverse_cpuid.h
+++ b/arch/x86/kvm/reverse_cpuid.h
@@ -18,6 +18,7 @@ enum kvm_only_cpuid_leafs {
CPUID_8000_0022_EAX,
CPUID_7_2_EDX,
CPUID_24_0_EBX,
+ CPUID_8000_0021_ECX,
NR_KVM_CPU_CAPS,
NKVMCAPINTS = NR_KVM_CPU_CAPS - NCAPINTS,
@@ -68,6 +69,10 @@ enum kvm_only_cpuid_leafs {
/* CPUID level 0x80000022 (EAX) */
#define KVM_X86_FEATURE_PERFMON_V2 KVM_X86_FEATURE(CPUID_8000_0022_EAX, 0)
+/* CPUID level 0x80000021 (ECX) */
+#define KVM_X86_FEATURE_TSA_SQ_NO KVM_X86_FEATURE(CPUID_8000_0021_ECX, 1)
+#define KVM_X86_FEATURE_TSA_L1_NO KVM_X86_FEATURE(CPUID_8000_0021_ECX, 2)
+
struct cpuid_reg {
u32 function;
u32 index;
@@ -98,6 +103,7 @@ static const struct cpuid_reg reverse_cpuid[] = {
[CPUID_8000_0022_EAX] = {0x80000022, 0, CPUID_EAX},
[CPUID_7_2_EDX] = { 7, 2, CPUID_EDX},
[CPUID_24_0_EBX] = { 0x24, 0, CPUID_EBX},
+ [CPUID_8000_0021_ECX] = {0x80000021, 0, CPUID_ECX},
};
/*
@@ -137,6 +143,8 @@ static __always_inline u32 __feature_translate(int x86_feature)
KVM_X86_TRANSLATE_FEATURE(PERFMON_V2);
KVM_X86_TRANSLATE_FEATURE(RRSBA_CTRL);
KVM_X86_TRANSLATE_FEATURE(BHI_CTRL);
+ KVM_X86_TRANSLATE_FEATURE(TSA_SQ_NO);
+ KVM_X86_TRANSLATE_FEATURE(TSA_L1_NO);
default:
return x86_feature;
}
diff --git a/arch/x86/kvm/smm.c b/arch/x86/kvm/smm.c
index 85241c0c7f56..b0cab215b08f 100644
--- a/arch/x86/kvm/smm.c
+++ b/arch/x86/kvm/smm.c
@@ -131,6 +131,7 @@ void kvm_smm_changed(struct kvm_vcpu *vcpu, bool entering_smm)
kvm_mmu_reset_context(vcpu);
}
+EXPORT_SYMBOL_GPL(kvm_smm_changed);
void process_smi(struct kvm_vcpu *vcpu)
{
diff --git a/arch/x86/kvm/svm/avic.c b/arch/x86/kvm/svm/avic.c
index 4b74ea91f4e6..63dea8ecd7ef 100644
--- a/arch/x86/kvm/svm/avic.c
+++ b/arch/x86/kvm/svm/avic.c
@@ -820,7 +820,7 @@ static int svm_ir_list_add(struct vcpu_svm *svm, struct amd_iommu_pi_data *pi)
* Allocating new amd_iommu_pi_data, which will get
* add to the per-vcpu ir_list.
*/
- ir = kzalloc(sizeof(struct amd_svm_iommu_ir), GFP_KERNEL_ACCOUNT);
+ ir = kzalloc(sizeof(struct amd_svm_iommu_ir), GFP_ATOMIC | __GFP_ACCOUNT);
if (!ir) {
ret = -ENOMEM;
goto out;
@@ -896,6 +896,7 @@ int avic_pi_update_irte(struct kvm *kvm, unsigned int host_irq,
{
struct kvm_kernel_irq_routing_entry *e;
struct kvm_irq_routing_table *irq_rt;
+ bool enable_remapped_mode = true;
int idx, ret = 0;
if (!kvm_arch_has_assigned_device(kvm) ||
@@ -933,6 +934,8 @@ int avic_pi_update_irte(struct kvm *kvm, unsigned int host_irq,
kvm_vcpu_apicv_active(&svm->vcpu)) {
struct amd_iommu_pi_data pi;
+ enable_remapped_mode = false;
+
/* Try to enable guest_mode in IRTE */
pi.base = __sme_set(page_to_phys(svm->avic_backing_page) &
AVIC_HPA_MASK);
@@ -951,33 +954,6 @@ int avic_pi_update_irte(struct kvm *kvm, unsigned int host_irq,
*/
if (!ret && pi.is_guest_mode)
svm_ir_list_add(svm, &pi);
- } else {
- /* Use legacy mode in IRTE */
- struct amd_iommu_pi_data pi;
-
- /**
- * Here, pi is used to:
- * - Tell IOMMU to use legacy mode for this interrupt.
- * - Retrieve ga_tag of prior interrupt remapping data.
- */
- pi.prev_ga_tag = 0;
- pi.is_guest_mode = false;
- ret = irq_set_vcpu_affinity(host_irq, &pi);
-
- /**
- * Check if the posted interrupt was previously
- * setup with the guest_mode by checking if the ga_tag
- * was cached. If so, we need to clean up the per-vcpu
- * ir_list.
- */
- if (!ret && pi.prev_ga_tag) {
- int id = AVIC_GATAG_TO_VCPUID(pi.prev_ga_tag);
- struct kvm_vcpu *vcpu;
-
- vcpu = kvm_get_vcpu_by_id(kvm, id);
- if (vcpu)
- svm_ir_list_del(to_svm(vcpu), &pi);
- }
}
if (!ret && svm) {
@@ -993,6 +969,34 @@ int avic_pi_update_irte(struct kvm *kvm, unsigned int host_irq,
}
ret = 0;
+ if (enable_remapped_mode) {
+ /* Use legacy mode in IRTE */
+ struct amd_iommu_pi_data pi;
+
+ /**
+ * Here, pi is used to:
+ * - Tell IOMMU to use legacy mode for this interrupt.
+ * - Retrieve ga_tag of prior interrupt remapping data.
+ */
+ pi.prev_ga_tag = 0;
+ pi.is_guest_mode = false;
+ ret = irq_set_vcpu_affinity(host_irq, &pi);
+
+ /**
+ * Check if the posted interrupt was previously
+ * setup with the guest_mode by checking if the ga_tag
+ * was cached. If so, we need to clean up the per-vcpu
+ * ir_list.
+ */
+ if (!ret && pi.prev_ga_tag) {
+ int id = AVIC_GATAG_TO_VCPUID(pi.prev_ga_tag);
+ struct kvm_vcpu *vcpu;
+
+ vcpu = kvm_get_vcpu_by_id(kvm, id);
+ if (vcpu)
+ svm_ir_list_del(to_svm(vcpu), &pi);
+ }
+ }
out:
srcu_read_unlock(&kvm->irq_srcu, idx);
return ret;
@@ -1199,6 +1203,12 @@ bool avic_hardware_setup(void)
return false;
}
+ if (cc_platform_has(CC_ATTR_HOST_SEV_SNP) &&
+ !boot_cpu_has(X86_FEATURE_HV_INUSE_WR_ALLOWED)) {
+ pr_warn("AVIC disabled: missing HvInUseWrAllowed on SNP-enabled system\n");
+ return false;
+ }
+
if (boot_cpu_has(X86_FEATURE_AVIC)) {
pr_info("AVIC enabled\n");
} else if (force_avic) {
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index e39ab7c0be4e..1f42a71b15c0 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -1483,7 +1483,7 @@ static void svm_clear_current_vmcb(struct vmcb *vmcb)
{
int i;
- for_each_online_cpu(i)
+ for_each_possible_cpu(i)
cmpxchg(per_cpu_ptr(&svm_data.current_vmcb, i), vmcb, NULL);
}
@@ -2222,6 +2222,10 @@ static int shutdown_interception(struct kvm_vcpu *vcpu)
*/
if (!sev_es_guest(vcpu->kvm)) {
clear_page(svm->vmcb);
+#ifdef CONFIG_KVM_SMM
+ if (is_smm(vcpu))
+ kvm_smm_changed(vcpu, false);
+#endif
kvm_vcpu_reset(vcpu, true);
}
diff --git a/arch/x86/kvm/svm/vmenter.S b/arch/x86/kvm/svm/vmenter.S
index 0c61153b275f..235c4af6b692 100644
--- a/arch/x86/kvm/svm/vmenter.S
+++ b/arch/x86/kvm/svm/vmenter.S
@@ -169,6 +169,9 @@ SYM_FUNC_START(__svm_vcpu_run)
#endif
mov VCPU_RDI(%_ASM_DI), %_ASM_DI
+ /* Clobbers EFLAGS.ZF */
+ VM_CLEAR_CPU_BUFFERS
+
/* Enter guest mode */
3: vmrun %_ASM_AX
4:
@@ -335,6 +338,9 @@ SYM_FUNC_START(__svm_sev_es_vcpu_run)
mov SVM_current_vmcb(%rdi), %rax
mov KVM_VMCB_pa(%rax), %rax
+ /* Clobbers EFLAGS.ZF */
+ VM_CLEAR_CPU_BUFFERS
+
/* Enter guest mode */
1: vmrun %rax
2:
diff --git a/arch/x86/kvm/vmx/posted_intr.c b/arch/x86/kvm/vmx/posted_intr.c
index ec08fa3caf43..6b803324a981 100644
--- a/arch/x86/kvm/vmx/posted_intr.c
+++ b/arch/x86/kvm/vmx/posted_intr.c
@@ -274,6 +274,7 @@ int vmx_pi_update_irte(struct kvm *kvm, unsigned int host_irq,
{
struct kvm_kernel_irq_routing_entry *e;
struct kvm_irq_routing_table *irq_rt;
+ bool enable_remapped_mode = true;
struct kvm_lapic_irq irq;
struct kvm_vcpu *vcpu;
struct vcpu_data vcpu_info;
@@ -312,21 +313,8 @@ int vmx_pi_update_irte(struct kvm *kvm, unsigned int host_irq,
kvm_set_msi_irq(kvm, e, &irq);
if (!kvm_intr_is_single_vcpu(kvm, &irq, &vcpu) ||
- !kvm_irq_is_postable(&irq)) {
- /*
- * Make sure the IRTE is in remapped mode if
- * we don't handle it in posted mode.
- */
- ret = irq_set_vcpu_affinity(host_irq, NULL);
- if (ret < 0) {
- printk(KERN_INFO
- "failed to back to remapped mode, irq: %u\n",
- host_irq);
- goto out;
- }
-
+ !kvm_irq_is_postable(&irq))
continue;
- }
vcpu_info.pi_desc_addr = __pa(vcpu_to_pi_desc(vcpu));
vcpu_info.vector = irq.vector;
@@ -334,11 +322,12 @@ int vmx_pi_update_irte(struct kvm *kvm, unsigned int host_irq,
trace_kvm_pi_irte_update(host_irq, vcpu->vcpu_id, e->gsi,
vcpu_info.vector, vcpu_info.pi_desc_addr, set);
- if (set)
- ret = irq_set_vcpu_affinity(host_irq, &vcpu_info);
- else
- ret = irq_set_vcpu_affinity(host_irq, NULL);
+ if (!set)
+ continue;
+ enable_remapped_mode = false;
+
+ ret = irq_set_vcpu_affinity(host_irq, &vcpu_info);
if (ret < 0) {
printk(KERN_INFO "%s: failed to update PI IRTE\n",
__func__);
@@ -346,6 +335,9 @@ int vmx_pi_update_irte(struct kvm *kvm, unsigned int host_irq,
}
}
+ if (enable_remapped_mode)
+ ret = irq_set_vcpu_affinity(host_irq, NULL);
+
ret = 0;
out:
srcu_read_unlock(&kvm->irq_srcu, idx);
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index a3d45b01dbad..029fbf3791f1 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -770,8 +770,11 @@ void vmx_emergency_disable_virtualization_cpu(void)
return;
list_for_each_entry(v, &per_cpu(loaded_vmcss_on_cpu, cpu),
- loaded_vmcss_on_cpu_link)
+ loaded_vmcss_on_cpu_link) {
vmcs_clear(v->vmcs);
+ if (v->shadow_vmcs)
+ vmcs_clear(v->shadow_vmcs);
+ }
kvm_cpu_vmxoff();
}
@@ -7310,7 +7313,7 @@ static noinstr void vmx_vcpu_enter_exit(struct kvm_vcpu *vcpu,
vmx_l1d_flush(vcpu);
else if (static_branch_unlikely(&mmio_stale_data_clear) &&
kvm_arch_has_assigned_device(vcpu->kvm))
- mds_clear_cpu_buffers();
+ x86_clear_cpu_buffers();
vmx_disable_fb_clear(vmx);
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 1a4ca471d63d..f378d479fea3 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1623,7 +1623,7 @@ EXPORT_SYMBOL_GPL(kvm_emulate_rdpmc);
ARCH_CAP_PSCHANGE_MC_NO | ARCH_CAP_TSX_CTRL_MSR | ARCH_CAP_TAA_NO | \
ARCH_CAP_SBDR_SSDP_NO | ARCH_CAP_FBSDP_NO | ARCH_CAP_PSDP_NO | \
ARCH_CAP_FB_CLEAR | ARCH_CAP_RRSBA | ARCH_CAP_PBRSB_NO | ARCH_CAP_GDS_NO | \
- ARCH_CAP_RFDS_NO | ARCH_CAP_RFDS_CLEAR | ARCH_CAP_BHI_NO)
+ ARCH_CAP_RFDS_NO | ARCH_CAP_RFDS_CLEAR | ARCH_CAP_BHI_NO | ARCH_CAP_ITS_NO)
static u64 kvm_get_arch_capabilities(void)
{
@@ -1657,6 +1657,8 @@ static u64 kvm_get_arch_capabilities(void)
data |= ARCH_CAP_MDS_NO;
if (!boot_cpu_has_bug(X86_BUG_RFDS))
data |= ARCH_CAP_RFDS_NO;
+ if (!boot_cpu_has_bug(X86_BUG_ITS))
+ data |= ARCH_CAP_ITS_NO;
if (!boot_cpu_has(X86_FEATURE_RTM)) {
/*
@@ -13555,15 +13557,22 @@ int kvm_arch_irq_bypass_add_producer(struct irq_bypass_consumer *cons,
{
struct kvm_kernel_irqfd *irqfd =
container_of(cons, struct kvm_kernel_irqfd, consumer);
+ struct kvm *kvm = irqfd->kvm;
int ret;
- irqfd->producer = prod;
kvm_arch_start_assignment(irqfd->kvm);
+
+ spin_lock_irq(&kvm->irqfds.lock);
+ irqfd->producer = prod;
+
ret = kvm_x86_call(pi_update_irte)(irqfd->kvm,
prod->irq, irqfd->gsi, 1);
if (ret)
kvm_arch_end_assignment(irqfd->kvm);
+ spin_unlock_irq(&kvm->irqfds.lock);
+
+
return ret;
}
@@ -13573,9 +13582,9 @@ void kvm_arch_irq_bypass_del_producer(struct irq_bypass_consumer *cons,
int ret;
struct kvm_kernel_irqfd *irqfd =
container_of(cons, struct kvm_kernel_irqfd, consumer);
+ struct kvm *kvm = irqfd->kvm;
WARN_ON(irqfd->producer != prod);
- irqfd->producer = NULL;
/*
* When producer of consumer is unregistered, we change back to
@@ -13583,12 +13592,18 @@ void kvm_arch_irq_bypass_del_producer(struct irq_bypass_consumer *cons,
* when the irq is masked/disabled or the consumer side (KVM
* int this case doesn't want to receive the interrupts.
*/
+ spin_lock_irq(&kvm->irqfds.lock);
+ irqfd->producer = NULL;
+
ret = kvm_x86_call(pi_update_irte)(irqfd->kvm,
prod->irq, irqfd->gsi, 0);
if (ret)
printk(KERN_INFO "irq bypass consumer (token %p) unregistration"
" fails: %d\n", irqfd->consumer.token, ret);
+ spin_unlock_irq(&kvm->irqfds.lock);
+
+
kvm_arch_end_assignment(irqfd->kvm);
}
@@ -13601,7 +13616,8 @@ int kvm_arch_update_irqfd_routing(struct kvm *kvm, unsigned int host_irq,
bool kvm_arch_irqfd_route_changed(struct kvm_kernel_irq_routing_entry *old,
struct kvm_kernel_irq_routing_entry *new)
{
- if (new->type != KVM_IRQ_ROUTING_MSI)
+ if (old->type != KVM_IRQ_ROUTING_MSI ||
+ new->type != KVM_IRQ_ROUTING_MSI)
return true;
return !!memcmp(&old->msi, &new->msi, sizeof(new->msi));
diff --git a/arch/x86/lib/retpoline.S b/arch/x86/lib/retpoline.S
index 391059b2c6fb..614fb9aee2ff 100644
--- a/arch/x86/lib/retpoline.S
+++ b/arch/x86/lib/retpoline.S
@@ -366,6 +366,45 @@ SYM_FUNC_END(call_depth_return_thunk)
#endif /* CONFIG_MITIGATION_CALL_DEPTH_TRACKING */
+#ifdef CONFIG_MITIGATION_ITS
+
+.macro ITS_THUNK reg
+
+SYM_INNER_LABEL(__x86_indirect_its_thunk_\reg, SYM_L_GLOBAL)
+ UNWIND_HINT_UNDEFINED
+ ANNOTATE_NOENDBR
+ ANNOTATE_RETPOLINE_SAFE
+ jmp *%\reg
+ int3
+ .align 32, 0xcc /* fill to the end of the line */
+ .skip 32, 0xcc /* skip to the next upper half */
+.endm
+
+/* ITS mitigation requires thunks be aligned to upper half of cacheline */
+.align 64, 0xcc
+.skip 32, 0xcc
+SYM_CODE_START(__x86_indirect_its_thunk_array)
+
+#define GEN(reg) ITS_THUNK reg
+#include <asm/GEN-for-each-reg.h>
+#undef GEN
+
+ .align 64, 0xcc
+SYM_CODE_END(__x86_indirect_its_thunk_array)
+
+.align 64, 0xcc
+.skip 32, 0xcc
+SYM_CODE_START(its_return_thunk)
+ UNWIND_HINT_FUNC
+ ANNOTATE_NOENDBR
+ ANNOTATE_UNRET_SAFE
+ ret
+ int3
+SYM_CODE_END(its_return_thunk)
+EXPORT_SYMBOL(its_return_thunk)
+
+#endif /* CONFIG_MITIGATION_ITS */
+
/*
* This function name is magical and is used by -mfunction-return=thunk-extern
* for the compiler to generate JMPs to it.
diff --git a/arch/x86/lib/x86-opcode-map.txt b/arch/x86/lib/x86-opcode-map.txt
index caedb3ef6688..cd3fd5155f6e 100644
--- a/arch/x86/lib/x86-opcode-map.txt
+++ b/arch/x86/lib/x86-opcode-map.txt
@@ -35,7 +35,7 @@
# - (!F3) : the last prefix is not 0xF3 (including non-last prefix case)
# - (66&F2): Both 0x66 and 0xF2 prefixes are specified.
#
-# REX2 Prefix
+# REX2 Prefix Superscripts
# - (!REX2): REX2 is not allowed
# - (REX2): REX2 variant e.g. JMPABS
@@ -286,10 +286,10 @@ df: ESC
# Note: "forced64" is Intel CPU behavior: they ignore 0x66 prefix
# in 64-bit mode. AMD CPUs accept 0x66 prefix, it causes RIP truncation
# to 16 bits. In 32-bit mode, 0x66 is accepted by both Intel and AMD.
-e0: LOOPNE/LOOPNZ Jb (f64) (!REX2)
-e1: LOOPE/LOOPZ Jb (f64) (!REX2)
-e2: LOOP Jb (f64) (!REX2)
-e3: JrCXZ Jb (f64) (!REX2)
+e0: LOOPNE/LOOPNZ Jb (f64),(!REX2)
+e1: LOOPE/LOOPZ Jb (f64),(!REX2)
+e2: LOOP Jb (f64),(!REX2)
+e3: JrCXZ Jb (f64),(!REX2)
e4: IN AL,Ib (!REX2)
e5: IN eAX,Ib (!REX2)
e6: OUT Ib,AL (!REX2)
@@ -298,10 +298,10 @@ e7: OUT Ib,eAX (!REX2)
# in "near" jumps and calls is 16-bit. For CALL,
# push of return address is 16-bit wide, RSP is decremented by 2
# but is not truncated to 16 bits, unlike RIP.
-e8: CALL Jz (f64) (!REX2)
-e9: JMP-near Jz (f64) (!REX2)
-ea: JMP-far Ap (i64) (!REX2)
-eb: JMP-short Jb (f64) (!REX2)
+e8: CALL Jz (f64),(!REX2)
+e9: JMP-near Jz (f64),(!REX2)
+ea: JMP-far Ap (i64),(!REX2)
+eb: JMP-short Jb (f64),(!REX2)
ec: IN AL,DX (!REX2)
ed: IN eAX,DX (!REX2)
ee: OUT DX,AL (!REX2)
@@ -478,22 +478,22 @@ AVXcode: 1
7f: movq Qq,Pq | vmovdqa Wx,Vx (66) | vmovdqa32/64 Wx,Vx (66),(evo) | vmovdqu Wx,Vx (F3) | vmovdqu32/64 Wx,Vx (F3),(evo) | vmovdqu8/16 Wx,Vx (F2),(ev)
# 0x0f 0x80-0x8f
# Note: "forced64" is Intel CPU behavior (see comment about CALL insn).
-80: JO Jz (f64) (!REX2)
-81: JNO Jz (f64) (!REX2)
-82: JB/JC/JNAE Jz (f64) (!REX2)
-83: JAE/JNB/JNC Jz (f64) (!REX2)
-84: JE/JZ Jz (f64) (!REX2)
-85: JNE/JNZ Jz (f64) (!REX2)
-86: JBE/JNA Jz (f64) (!REX2)
-87: JA/JNBE Jz (f64) (!REX2)
-88: JS Jz (f64) (!REX2)
-89: JNS Jz (f64) (!REX2)
-8a: JP/JPE Jz (f64) (!REX2)
-8b: JNP/JPO Jz (f64) (!REX2)
-8c: JL/JNGE Jz (f64) (!REX2)
-8d: JNL/JGE Jz (f64) (!REX2)
-8e: JLE/JNG Jz (f64) (!REX2)
-8f: JNLE/JG Jz (f64) (!REX2)
+80: JO Jz (f64),(!REX2)
+81: JNO Jz (f64),(!REX2)
+82: JB/JC/JNAE Jz (f64),(!REX2)
+83: JAE/JNB/JNC Jz (f64),(!REX2)
+84: JE/JZ Jz (f64),(!REX2)
+85: JNE/JNZ Jz (f64),(!REX2)
+86: JBE/JNA Jz (f64),(!REX2)
+87: JA/JNBE Jz (f64),(!REX2)
+88: JS Jz (f64),(!REX2)
+89: JNS Jz (f64),(!REX2)
+8a: JP/JPE Jz (f64),(!REX2)
+8b: JNP/JPO Jz (f64),(!REX2)
+8c: JL/JNGE Jz (f64),(!REX2)
+8d: JNL/JGE Jz (f64),(!REX2)
+8e: JLE/JNG Jz (f64),(!REX2)
+8f: JNLE/JG Jz (f64),(!REX2)
# 0x0f 0x90-0x9f
90: SETO Eb | kmovw/q Vk,Wk | kmovb/d Vk,Wk (66)
91: SETNO Eb | kmovw/q Mv,Vk | kmovb/d Mv,Vk (66)
@@ -996,8 +996,8 @@ AVXcode: 4
83: Grp1 Ev,Ib (1A),(es)
# CTESTSCC instructions are: CTESTB, CTESTBE, CTESTF, CTESTL, CTESTLE, CTESTNB, CTESTNBE, CTESTNL,
# CTESTNLE, CTESTNO, CTESTNS, CTESTNZ, CTESTO, CTESTS, CTESTT, CTESTZ
-84: CTESTSCC (ev)
-85: CTESTSCC (es) | CTESTSCC (66),(es)
+84: CTESTSCC Eb,Gb (ev)
+85: CTESTSCC Ev,Gv (es) | CTESTSCC Ev,Gv (66),(es)
88: POPCNT Gv,Ev (es) | POPCNT Gv,Ev (66),(es)
8f: POP2 Bq,Rq (000),(11B),(ev)
a5: SHLD Ev,Gv,CL (es) | SHLD Ev,Gv,CL (66),(es)
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index 101725c149c4..9cbc1e6057d3 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -645,8 +645,13 @@ static void __init memory_map_top_down(unsigned long map_start,
*/
addr = memblock_phys_alloc_range(PMD_SIZE, PMD_SIZE, map_start,
map_end);
- memblock_phys_free(addr, PMD_SIZE);
- real_end = addr + PMD_SIZE;
+ if (!addr) {
+ pr_warn("Failed to release memory for alloc_low_pages()");
+ real_end = max(map_start, ALIGN_DOWN(map_end, PMD_SIZE));
+ } else {
+ memblock_phys_free(addr, PMD_SIZE);
+ real_end = addr + PMD_SIZE;
+ }
/* step_size need to be small so pgt_buf from BRK could cover it */
step_size = PMD_SIZE;
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index ff253648706f..d8853afd314b 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -967,9 +967,18 @@ int add_pages(int nid, unsigned long start_pfn, unsigned long nr_pages,
ret = __add_pages(nid, start_pfn, nr_pages, params);
WARN_ON_ONCE(ret);
- /* update max_pfn, max_low_pfn and high_memory */
- update_end_of_memory_vars(start_pfn << PAGE_SHIFT,
- nr_pages << PAGE_SHIFT);
+ /*
+ * Special case: add_pages() is called by memremap_pages() for adding device
+ * private pages. Do not bump up max_pfn in the device private path,
+ * because max_pfn changes affect dma_addressing_limited().
+ *
+ * dma_addressing_limited() returning true when max_pfn is the device's
+ * addressable memory can force device drivers to use bounce buffers
+ * and impact their performance negatively:
+ */
+ if (!params->pgmap)
+ /* update max_pfn, max_low_pfn and high_memory */
+ update_end_of_memory_vars(start_pfn << PAGE_SHIFT, nr_pages << PAGE_SHIFT);
return ret;
}
diff --git a/arch/x86/mm/kaslr.c b/arch/x86/mm/kaslr.c
index 230f1dee4f09..e0b0ec0f8245 100644
--- a/arch/x86/mm/kaslr.c
+++ b/arch/x86/mm/kaslr.c
@@ -109,8 +109,14 @@ void __init kernel_randomize_memory(void)
memory_tb = DIV_ROUND_UP(max_pfn << PAGE_SHIFT, 1UL << TB_SHIFT) +
CONFIG_RANDOMIZE_MEMORY_PHYSICAL_PADDING;
- /* Adapt physical memory region size based on available memory */
- if (memory_tb < kaslr_regions[0].size_tb)
+ /*
+ * Adapt physical memory region size based on available memory,
+ * except when CONFIG_PCI_P2PDMA is enabled. P2PDMA exposes the
+ * device BAR space assuming the direct map space is large enough
+ * for creating a ZONE_DEVICE mapping in the direct map corresponding
+ * to the physical BAR address.
+ */
+ if (!IS_ENABLED(CONFIG_PCI_P2PDMA) && (memory_tb < kaslr_regions[0].size_tb))
kaslr_regions[0].size_tb = memory_tb;
/*
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index 00ffa74d0dd0..8629d90fdcd9 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -389,9 +389,9 @@ static void cond_mitigation(struct task_struct *next)
prev_mm = this_cpu_read(cpu_tlbstate.last_user_mm_spec);
/*
- * Avoid user/user BTB poisoning by flushing the branch predictor
- * when switching between processes. This stops one process from
- * doing Spectre-v2 attacks on another.
+ * Avoid user->user BTB/RSB poisoning by flushing them when switching
+ * between processes. This stops one process from doing Spectre-v2
+ * attacks on another.
*
* Both, the conditional and the always IBPB mode use the mm
* pointer to avoid the IBPB when switching between tasks of the
@@ -624,7 +624,11 @@ void switch_mm_irqs_off(struct mm_struct *unused, struct mm_struct *next,
choose_new_asid(next, next_tlb_gen, &new_asid, &need_flush);
- /* Let nmi_uaccess_okay() know that we're changing CR3. */
+ /*
+ * Indicate that CR3 is about to change. nmi_uaccess_okay()
+ * and others are sensitive to the window where mm_cpumask(),
+ * CR3 and cpu_tlbstate.loaded_mm are not all in sync.
+ */
this_cpu_write(cpu_tlbstate.loaded_mm, LOADED_MM_SWITCHING);
barrier();
}
@@ -895,8 +899,16 @@ done:
static bool should_flush_tlb(int cpu, void *data)
{
+ struct mm_struct *loaded_mm = per_cpu(cpu_tlbstate.loaded_mm, cpu);
struct flush_tlb_info *info = data;
+ /*
+ * Order the 'loaded_mm' and 'is_lazy' against their
+ * write ordering in switch_mm_irqs_off(). Ensure
+ * 'is_lazy' is at least as new as 'loaded_mm'.
+ */
+ smp_rmb();
+
/* Lazy TLB will get flushed at the next context switch. */
if (per_cpu(cpu_tlbstate_shared.is_lazy, cpu))
return false;
@@ -905,8 +917,15 @@ static bool should_flush_tlb(int cpu, void *data)
if (!info->mm)
return true;
+ /*
+ * While switching, the remote CPU could have state from
+ * either the prev or next mm. Assume the worst and flush.
+ */
+ if (loaded_mm == LOADED_MM_SWITCHING)
+ return true;
+
/* The target mm is loaded, and the CPU is not lazy. */
- if (per_cpu(cpu_tlbstate.loaded_mm, cpu) == info->mm)
+ if (loaded_mm == info->mm)
return true;
/* In cpumask, but not the loaded mm? Periodically remove by flushing. */
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index 06b080b61aa5..ccb2f7703c33 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -41,6 +41,8 @@ static u8 *emit_code(u8 *ptr, u32 bytes, unsigned int len)
#define EMIT2(b1, b2) EMIT((b1) + ((b2) << 8), 2)
#define EMIT3(b1, b2, b3) EMIT((b1) + ((b2) << 8) + ((b3) << 16), 3)
#define EMIT4(b1, b2, b3, b4) EMIT((b1) + ((b2) << 8) + ((b3) << 16) + ((b4) << 24), 4)
+#define EMIT5(b1, b2, b3, b4, b5) \
+ do { EMIT1(b1); EMIT4(b2, b3, b4, b5); } while (0)
#define EMIT1_off32(b1, off) \
do { EMIT1(b1); EMIT(off, 4); } while (0)
@@ -637,7 +639,10 @@ static void emit_indirect_jump(u8 **pprog, int reg, u8 *ip)
{
u8 *prog = *pprog;
- if (cpu_feature_enabled(X86_FEATURE_RETPOLINE_LFENCE)) {
+ if (cpu_feature_enabled(X86_FEATURE_INDIRECT_THUNK_ITS)) {
+ OPTIMIZER_HIDE_VAR(reg);
+ emit_jump(&prog, its_static_thunk(reg), ip);
+ } else if (cpu_feature_enabled(X86_FEATURE_RETPOLINE_LFENCE)) {
EMIT_LFENCE();
EMIT2(0xFF, 0xE0 + reg);
} else if (cpu_feature_enabled(X86_FEATURE_RETPOLINE)) {
@@ -659,7 +664,7 @@ static void emit_return(u8 **pprog, u8 *ip)
{
u8 *prog = *pprog;
- if (cpu_feature_enabled(X86_FEATURE_RETHUNK)) {
+ if (cpu_wants_rethunk()) {
emit_jump(&prog, x86_return_thunk, ip);
} else {
EMIT1(0xC3); /* ret */
@@ -1412,6 +1417,48 @@ static void emit_shiftx(u8 **pprog, u32 dst_reg, u8 src_reg, bool is64, u8 op)
#define LOAD_TAIL_CALL_CNT_PTR(stack) \
__LOAD_TCC_PTR(BPF_TAIL_CALL_CNT_PTR_STACK_OFF(stack))
+static int emit_spectre_bhb_barrier(u8 **pprog, u8 *ip,
+ struct bpf_prog *bpf_prog)
+{
+ u8 *prog = *pprog;
+ u8 *func;
+
+ if (cpu_feature_enabled(X86_FEATURE_CLEAR_BHB_LOOP)) {
+ /* The clearing sequence clobbers eax and ecx. */
+ EMIT1(0x50); /* push rax */
+ EMIT1(0x51); /* push rcx */
+ ip += 2;
+
+ func = (u8 *)clear_bhb_loop;
+ ip += x86_call_depth_emit_accounting(&prog, func, ip);
+
+ if (emit_call(&prog, func, ip))
+ return -EINVAL;
+ EMIT1(0x59); /* pop rcx */
+ EMIT1(0x58); /* pop rax */
+ }
+ /* Insert IBHF instruction */
+ if ((cpu_feature_enabled(X86_FEATURE_CLEAR_BHB_LOOP) &&
+ cpu_feature_enabled(X86_FEATURE_HYPERVISOR)) ||
+ cpu_feature_enabled(X86_FEATURE_CLEAR_BHB_HW)) {
+ /*
+ * Add an Indirect Branch History Fence (IBHF). IBHF acts as a
+ * fence preventing branch history from before the fence from
+ * affecting indirect branches after the fence. This is
+ * specifically used in cBPF jitted code to prevent Intra-mode
+ * BHI attacks. The IBHF instruction is designed to be a NOP on
+ * hardware that doesn't need or support it. The REP and REX.W
+ * prefixes are required by the microcode, and they also ensure
+ * that the NOP is unlikely to be used in existing code.
+ *
+ * IBHF is not a valid instruction in 32-bit mode.
+ */
+ EMIT5(0xF3, 0x48, 0x0F, 0x1E, 0xF8); /* ibhf */
+ }
+ *pprog = prog;
+ return 0;
+}
+
static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, u8 *rw_image,
int oldproglen, struct jit_context *ctx, bool jmp_padding)
{
@@ -2402,6 +2449,13 @@ emit_jmp:
seen_exit = true;
/* Update cleanup_addr */
ctx->cleanup_addr = proglen;
+ if (bpf_prog_was_classic(bpf_prog) &&
+ !capable(CAP_SYS_ADMIN)) {
+ u8 *ip = image + addrs[i - 1];
+
+ if (emit_spectre_bhb_barrier(&prog, ip, bpf_prog))
+ return -EINVAL;
+ }
if (bpf_prog->aux->exception_boundary) {
pop_callee_regs(&prog, all_callee_regs_used);
pop_r12(&prog);
diff --git a/arch/x86/pci/xen.c b/arch/x86/pci/xen.c
index 0f2fe524f60d..b8755cde2419 100644
--- a/arch/x86/pci/xen.c
+++ b/arch/x86/pci/xen.c
@@ -436,7 +436,8 @@ static struct msi_domain_ops xen_pci_msi_domain_ops = {
};
static struct msi_domain_info xen_pci_msi_domain_info = {
- .flags = MSI_FLAG_PCI_MSIX | MSI_FLAG_FREE_MSI_DESCS | MSI_FLAG_DEV_SYSFS,
+ .flags = MSI_FLAG_PCI_MSIX | MSI_FLAG_FREE_MSI_DESCS |
+ MSI_FLAG_DEV_SYSFS | MSI_FLAG_NO_MASK,
.ops = &xen_pci_msi_domain_ops,
};
@@ -484,11 +485,6 @@ static __init void xen_setup_pci_msi(void)
* in allocating the native domain and never use it.
*/
x86_init.irqs.create_pci_msi_domain = xen_create_pci_msi_domain;
- /*
- * With XEN PIRQ/Eventchannels in use PCI/MSI[-X] masking is solely
- * controlled by the hypervisor.
- */
- pci_msi_ignore_mask = 1;
}
#else /* CONFIG_PCI_MSI */
diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c
index 63230ff8cf4f..08e76a5ca155 100644
--- a/arch/x86/power/cpu.c
+++ b/arch/x86/power/cpu.c
@@ -27,6 +27,7 @@
#include <asm/mmu_context.h>
#include <asm/cpu_device_id.h>
#include <asm/microcode.h>
+#include <asm/fred.h>
#ifdef CONFIG_X86_32
__visible unsigned long saved_context_ebx;
@@ -231,6 +232,19 @@ static void notrace __restore_processor_state(struct saved_context *ctxt)
*/
#ifdef CONFIG_X86_64
wrmsrl(MSR_GS_BASE, ctxt->kernelmode_gs_base);
+
+ /*
+ * Reinitialize FRED to ensure the FRED MSRs contain the same values
+ * as before hibernation.
+ *
+ * Note, the setup of FRED RSPs requires access to percpu data
+ * structures. Therefore, FRED reinitialization can only occur after
+ * the percpu access pointer (i.e., MSR_GS_BASE) is restored.
+ */
+ if (ctxt->cr4 & X86_CR4_FRED) {
+ cpu_init_fred_exceptions();
+ cpu_init_fred_rsps();
+ }
#else
loadsegment(fs, __KERNEL_PERCPU);
#endif
diff --git a/arch/x86/tools/insn_decoder_test.c b/arch/x86/tools/insn_decoder_test.c
index 472540aeabc2..08cd913cbd4e 100644
--- a/arch/x86/tools/insn_decoder_test.c
+++ b/arch/x86/tools/insn_decoder_test.c
@@ -10,8 +10,7 @@
#include <assert.h>
#include <unistd.h>
#include <stdarg.h>
-
-#define unlikely(cond) (cond)
+#include <linux/kallsyms.h>
#include <asm/insn.h>
#include <inat.c>
@@ -106,7 +105,7 @@ static void parse_args(int argc, char **argv)
}
}
-#define BUFSIZE 256
+#define BUFSIZE (256 + KSYM_NAME_LEN)
int main(int argc, char **argv)
{
diff --git a/arch/x86/um/asm/checksum.h b/arch/x86/um/asm/checksum.h
index b07824500363..ddc144657efa 100644
--- a/arch/x86/um/asm/checksum.h
+++ b/arch/x86/um/asm/checksum.h
@@ -20,6 +20,9 @@
*/
extern __wsum csum_partial(const void *buff, int len, __wsum sum);
+/* Do not call this directly. Declared for export type visibility. */
+extern __visible __wsum csum_partial_copy_generic(const void *src, void *dst, int len);
+
/**
* csum_fold - Fold and invert a 32bit checksum.
* sum: 32bit unfolded sum
diff --git a/arch/x86/um/os-Linux/mcontext.c b/arch/x86/um/os-Linux/mcontext.c
index e80ab7d28117..1b0d95328b2c 100644
--- a/arch/x86/um/os-Linux/mcontext.c
+++ b/arch/x86/um/os-Linux/mcontext.c
@@ -27,7 +27,6 @@ void get_regs_from_mc(struct uml_pt_regs *regs, mcontext_t *mc)
COPY(RIP);
COPY2(EFLAGS, EFL);
COPY2(CS, CSGSFS);
- regs->gp[CS / sizeof(unsigned long)] &= 0xffff;
- regs->gp[CS / sizeof(unsigned long)] |= 3;
+ regs->gp[SS / sizeof(unsigned long)] = mc->gregs[REG_CSGSFS] >> 48;
#endif
}
diff --git a/arch/x86/virt/vmx/tdx/tdx.c b/arch/x86/virt/vmx/tdx/tdx.c
index 4e2b2e2ac9f9..eb91bc5448de 100644
--- a/arch/x86/virt/vmx/tdx/tdx.c
+++ b/arch/x86/virt/vmx/tdx/tdx.c
@@ -69,8 +69,9 @@ static inline void seamcall_err_ret(u64 fn, u64 err,
args->r9, args->r10, args->r11);
}
-static inline int sc_retry_prerr(sc_func_t func, sc_err_func_t err_func,
- u64 fn, struct tdx_module_args *args)
+static __always_inline int sc_retry_prerr(sc_func_t func,
+ sc_err_func_t err_func,
+ u64 fn, struct tdx_module_args *args)
{
u64 sret = sc_retry(func, fn, args);
diff --git a/arch/x86/xen/enlighten_pvh.c b/arch/x86/xen/enlighten_pvh.c
index 0e3d930bcb89..9d25d9373945 100644
--- a/arch/x86/xen/enlighten_pvh.c
+++ b/arch/x86/xen/enlighten_pvh.c
@@ -1,5 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
#include <linux/acpi.h>
+#include <linux/cpufreq.h>
+#include <linux/cpuidle.h>
#include <linux/export.h>
#include <linux/mm.h>
@@ -123,8 +125,23 @@ static void __init pvh_arch_setup(void)
{
pvh_reserve_extra_memory();
- if (xen_initial_domain())
+ if (xen_initial_domain()) {
xen_add_preferred_consoles();
+
+ /*
+ * Disable usage of CPU idle and frequency drivers: when
+ * running as hardware domain the exposed native ACPI tables
+ * causes idle and/or frequency drivers to attach and
+ * malfunction. It's Xen the entity that controls the idle and
+ * frequency states.
+ *
+ * For unprivileged domains the exposed ACPI tables are
+ * fabricated and don't contain such data.
+ */
+ disable_cpuidle();
+ disable_cpufreq();
+ WARN_ON(xen_set_default_idle());
+ }
}
void __init xen_pvh_init(struct boot_params *boot_params)