summaryrefslogtreecommitdiff
path: root/arch/x86
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/entry/common.c10
-rw-r--r--arch/x86/include/asm/fpu/api.h15
-rw-r--r--arch/x86/include/asm/idtentry.h1
-rw-r--r--arch/x86/include/asm/intel-family.h1
-rw-r--r--arch/x86/include/asm/msr.h4
-rw-r--r--arch/x86/include/asm/topology.h4
-rw-r--r--arch/x86/kernel/cpu/amd.c4
-rw-r--r--arch/x86/kernel/cpu/mce/core.c7
-rw-r--r--arch/x86/kernel/cpu/topology.c2
-rw-r--r--arch/x86/kernel/fpu/core.c9
-rw-r--r--arch/x86/kernel/setup.c20
-rw-r--r--arch/x86/kernel/sev-es.c14
-rw-r--r--arch/x86/kernel/smpboot.c19
-rw-r--r--arch/x86/kvm/kvm_cache_regs.h51
-rw-r--r--arch/x86/kvm/mmu.h9
-rw-r--r--arch/x86/kvm/svm/nested.c3
-rw-r--r--arch/x86/kvm/svm/sev.c15
-rw-r--r--arch/x86/kvm/svm/svm.c2
-rw-r--r--arch/x86/kvm/vmx/nested.c44
-rw-r--r--arch/x86/kvm/vmx/pmu_intel.c6
-rw-r--r--arch/x86/kvm/vmx/vmx.c2
-rw-r--r--arch/x86/kvm/x86.c11
-rw-r--r--arch/x86/lib/mmx_32.c20
-rw-r--r--arch/x86/xen/enlighten_pv.c15
-rw-r--r--arch/x86/xen/xen-asm.S1
25 files changed, 199 insertions, 90 deletions
diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c
index 18d8f17f755c..0904f5676e4d 100644
--- a/arch/x86/entry/common.c
+++ b/arch/x86/entry/common.c
@@ -73,10 +73,8 @@ static __always_inline void do_syscall_32_irqs_on(struct pt_regs *regs,
unsigned int nr)
{
if (likely(nr < IA32_NR_syscalls)) {
- instrumentation_begin();
nr = array_index_nospec(nr, IA32_NR_syscalls);
regs->ax = ia32_sys_call_table[nr](regs);
- instrumentation_end();
}
}
@@ -91,8 +89,11 @@ __visible noinstr void do_int80_syscall_32(struct pt_regs *regs)
* or may not be necessary, but it matches the old asm behavior.
*/
nr = (unsigned int)syscall_enter_from_user_mode(regs, nr);
+ instrumentation_begin();
do_syscall_32_irqs_on(regs, nr);
+
+ instrumentation_end();
syscall_exit_to_user_mode(regs);
}
@@ -121,11 +122,12 @@ static noinstr bool __do_fast_syscall_32(struct pt_regs *regs)
res = get_user(*(u32 *)&regs->bp,
(u32 __user __force *)(unsigned long)(u32)regs->sp);
}
- instrumentation_end();
if (res) {
/* User code screwed up. */
regs->ax = -EFAULT;
+
+ instrumentation_end();
syscall_exit_to_user_mode(regs);
return false;
}
@@ -135,6 +137,8 @@ static noinstr bool __do_fast_syscall_32(struct pt_regs *regs)
/* Now this is just like a normal syscall. */
do_syscall_32_irqs_on(regs, nr);
+
+ instrumentation_end();
syscall_exit_to_user_mode(regs);
return true;
}
diff --git a/arch/x86/include/asm/fpu/api.h b/arch/x86/include/asm/fpu/api.h
index a5aba4ab0224..67a4f1cb2aac 100644
--- a/arch/x86/include/asm/fpu/api.h
+++ b/arch/x86/include/asm/fpu/api.h
@@ -16,14 +16,25 @@
* Use kernel_fpu_begin/end() if you intend to use FPU in kernel context. It
* disables preemption so be careful if you intend to use it for long periods
* of time.
- * If you intend to use the FPU in softirq you need to check first with
+ * If you intend to use the FPU in irq/softirq you need to check first with
* irq_fpu_usable() if it is possible.
*/
-extern void kernel_fpu_begin(void);
+
+/* Kernel FPU states to initialize in kernel_fpu_begin_mask() */
+#define KFPU_387 _BITUL(0) /* 387 state will be initialized */
+#define KFPU_MXCSR _BITUL(1) /* MXCSR will be initialized */
+
+extern void kernel_fpu_begin_mask(unsigned int kfpu_mask);
extern void kernel_fpu_end(void);
extern bool irq_fpu_usable(void);
extern void fpregs_mark_activate(void);
+/* Code that is unaware of kernel_fpu_begin_mask() can use this */
+static inline void kernel_fpu_begin(void)
+{
+ kernel_fpu_begin_mask(KFPU_387 | KFPU_MXCSR);
+}
+
/*
* Use fpregs_lock() while editing CPU's FPU registers or fpu->state.
* A context switch will (and softirq might) save CPU's FPU registers to
diff --git a/arch/x86/include/asm/idtentry.h b/arch/x86/include/asm/idtentry.h
index 247a60a47331..f656aabd1545 100644
--- a/arch/x86/include/asm/idtentry.h
+++ b/arch/x86/include/asm/idtentry.h
@@ -613,6 +613,7 @@ DECLARE_IDTENTRY_VC(X86_TRAP_VC, exc_vmm_communication);
#ifdef CONFIG_XEN_PV
DECLARE_IDTENTRY_XENCB(X86_TRAP_OTHER, exc_xen_hypervisor_callback);
+DECLARE_IDTENTRY_RAW(X86_TRAP_OTHER, exc_xen_unknown_trap);
#endif
/* Device interrupts common/spurious */
diff --git a/arch/x86/include/asm/intel-family.h b/arch/x86/include/asm/intel-family.h
index 5e658ba2654a..9abe842dbd84 100644
--- a/arch/x86/include/asm/intel-family.h
+++ b/arch/x86/include/asm/intel-family.h
@@ -97,6 +97,7 @@
#define INTEL_FAM6_LAKEFIELD 0x8A
#define INTEL_FAM6_ALDERLAKE 0x97
+#define INTEL_FAM6_ALDERLAKE_L 0x9A
/* "Small Core" Processors (Atom) */
diff --git a/arch/x86/include/asm/msr.h b/arch/x86/include/asm/msr.h
index 0b4920a7238e..e16cccdd0420 100644
--- a/arch/x86/include/asm/msr.h
+++ b/arch/x86/include/asm/msr.h
@@ -86,7 +86,7 @@ static inline void do_trace_rdpmc(unsigned int msr, u64 val, int failed) {}
* think of extending them - you will be slapped with a stinking trout or a frozen
* shark will reach you, wherever you are! You've been warned.
*/
-static inline unsigned long long notrace __rdmsr(unsigned int msr)
+static __always_inline unsigned long long __rdmsr(unsigned int msr)
{
DECLARE_ARGS(val, low, high);
@@ -98,7 +98,7 @@ static inline unsigned long long notrace __rdmsr(unsigned int msr)
return EAX_EDX_VAL(val, low, high);
}
-static inline void notrace __wrmsr(unsigned int msr, u32 low, u32 high)
+static __always_inline void __wrmsr(unsigned int msr, u32 low, u32 high)
{
asm volatile("1: wrmsr\n"
"2:\n"
diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h
index 488a8e848754..9239399e5491 100644
--- a/arch/x86/include/asm/topology.h
+++ b/arch/x86/include/asm/topology.h
@@ -110,6 +110,8 @@ extern const struct cpumask *cpu_coregroup_mask(int cpu);
#define topology_die_id(cpu) (cpu_data(cpu).cpu_die_id)
#define topology_core_id(cpu) (cpu_data(cpu).cpu_core_id)
+extern unsigned int __max_die_per_package;
+
#ifdef CONFIG_SMP
#define topology_die_cpumask(cpu) (per_cpu(cpu_die_map, cpu))
#define topology_core_cpumask(cpu) (per_cpu(cpu_core_map, cpu))
@@ -118,8 +120,6 @@ extern const struct cpumask *cpu_coregroup_mask(int cpu);
extern unsigned int __max_logical_packages;
#define topology_max_packages() (__max_logical_packages)
-extern unsigned int __max_die_per_package;
-
static inline int topology_max_die_per_package(void)
{
return __max_die_per_package;
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index f8ca66f3d861..347a956f71ca 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -542,12 +542,12 @@ static void bsp_init_amd(struct cpuinfo_x86 *c)
u32 ecx;
ecx = cpuid_ecx(0x8000001e);
- nodes_per_socket = ((ecx >> 8) & 7) + 1;
+ __max_die_per_package = nodes_per_socket = ((ecx >> 8) & 7) + 1;
} else if (boot_cpu_has(X86_FEATURE_NODEID_MSR)) {
u64 value;
rdmsrl(MSR_FAM10H_NODE_ID, value);
- nodes_per_socket = ((value >> 3) & 7) + 1;
+ __max_die_per_package = nodes_per_socket = ((value >> 3) & 7) + 1;
}
if (!boot_cpu_has(X86_FEATURE_AMD_SSBD) &&
diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c
index 13d3f1cbda17..e133ce1e562b 100644
--- a/arch/x86/kernel/cpu/mce/core.c
+++ b/arch/x86/kernel/cpu/mce/core.c
@@ -1992,10 +1992,9 @@ static __always_inline void exc_machine_check_kernel(struct pt_regs *regs)
* that out because it's an indirect call. Annotate it.
*/
instrumentation_begin();
- trace_hardirqs_off_finish();
+
machine_check_vector(regs);
- if (regs->flags & X86_EFLAGS_IF)
- trace_hardirqs_on_prepare();
+
instrumentation_end();
irqentry_nmi_exit(regs, irq_state);
}
@@ -2004,7 +2003,9 @@ static __always_inline void exc_machine_check_user(struct pt_regs *regs)
{
irqentry_enter_from_user_mode(regs);
instrumentation_begin();
+
machine_check_vector(regs);
+
instrumentation_end();
irqentry_exit_to_user_mode(regs);
}
diff --git a/arch/x86/kernel/cpu/topology.c b/arch/x86/kernel/cpu/topology.c
index 1068002c8532..8678864ce712 100644
--- a/arch/x86/kernel/cpu/topology.c
+++ b/arch/x86/kernel/cpu/topology.c
@@ -25,10 +25,10 @@
#define BITS_SHIFT_NEXT_LEVEL(eax) ((eax) & 0x1f)
#define LEVEL_MAX_SIBLINGS(ebx) ((ebx) & 0xffff)
-#ifdef CONFIG_SMP
unsigned int __max_die_per_package __read_mostly = 1;
EXPORT_SYMBOL(__max_die_per_package);
+#ifdef CONFIG_SMP
/*
* Check if given CPUID extended toplogy "leaf" is implemented
*/
diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c
index eb86a2b831b1..571220ac8bea 100644
--- a/arch/x86/kernel/fpu/core.c
+++ b/arch/x86/kernel/fpu/core.c
@@ -121,7 +121,7 @@ int copy_fpregs_to_fpstate(struct fpu *fpu)
}
EXPORT_SYMBOL(copy_fpregs_to_fpstate);
-void kernel_fpu_begin(void)
+void kernel_fpu_begin_mask(unsigned int kfpu_mask)
{
preempt_disable();
@@ -141,13 +141,14 @@ void kernel_fpu_begin(void)
}
__cpu_invalidate_fpregs_state();
- if (boot_cpu_has(X86_FEATURE_XMM))
+ /* Put sane initial values into the control registers. */
+ if (likely(kfpu_mask & KFPU_MXCSR) && boot_cpu_has(X86_FEATURE_XMM))
ldmxcsr(MXCSR_DEFAULT);
- if (boot_cpu_has(X86_FEATURE_FPU))
+ if (unlikely(kfpu_mask & KFPU_387) && boot_cpu_has(X86_FEATURE_FPU))
asm volatile ("fninit");
}
-EXPORT_SYMBOL_GPL(kernel_fpu_begin);
+EXPORT_SYMBOL_GPL(kernel_fpu_begin_mask);
void kernel_fpu_end(void)
{
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 740f3bdb3f61..3412c4595efd 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -661,17 +661,6 @@ static void __init trim_platform_memory_ranges(void)
static void __init trim_bios_range(void)
{
/*
- * A special case is the first 4Kb of memory;
- * This is a BIOS owned area, not kernel ram, but generally
- * not listed as such in the E820 table.
- *
- * This typically reserves additional memory (64KiB by default)
- * since some BIOSes are known to corrupt low memory. See the
- * Kconfig help text for X86_RESERVE_LOW.
- */
- e820__range_update(0, PAGE_SIZE, E820_TYPE_RAM, E820_TYPE_RESERVED);
-
- /*
* special case: Some BIOSes report the PC BIOS
* area (640Kb -> 1Mb) as RAM even though it is not.
* take them out.
@@ -728,6 +717,15 @@ early_param("reservelow", parse_reservelow);
static void __init trim_low_memory_range(void)
{
+ /*
+ * A special case is the first 4Kb of memory;
+ * This is a BIOS owned area, not kernel ram, but generally
+ * not listed as such in the E820 table.
+ *
+ * This typically reserves additional memory (64KiB by default)
+ * since some BIOSes are known to corrupt low memory. See the
+ * Kconfig help text for X86_RESERVE_LOW.
+ */
memblock_reserve(0, ALIGN(reserve_low, PAGE_SIZE));
}
diff --git a/arch/x86/kernel/sev-es.c b/arch/x86/kernel/sev-es.c
index 0bd1a0fc587e..84c1821819af 100644
--- a/arch/x86/kernel/sev-es.c
+++ b/arch/x86/kernel/sev-es.c
@@ -225,7 +225,7 @@ static inline u64 sev_es_rd_ghcb_msr(void)
return __rdmsr(MSR_AMD64_SEV_ES_GHCB);
}
-static inline void sev_es_wr_ghcb_msr(u64 val)
+static __always_inline void sev_es_wr_ghcb_msr(u64 val)
{
u32 low, high;
@@ -286,6 +286,12 @@ static enum es_result vc_write_mem(struct es_em_ctxt *ctxt,
u16 d2;
u8 d1;
+ /* If instruction ran in kernel mode and the I/O buffer is in kernel space */
+ if (!user_mode(ctxt->regs) && !access_ok(target, size)) {
+ memcpy(dst, buf, size);
+ return ES_OK;
+ }
+
switch (size) {
case 1:
memcpy(&d1, buf, 1);
@@ -335,6 +341,12 @@ static enum es_result vc_read_mem(struct es_em_ctxt *ctxt,
u16 d2;
u8 d1;
+ /* If instruction ran in kernel mode and the I/O buffer is in kernel space */
+ if (!user_mode(ctxt->regs) && !access_ok(s, size)) {
+ memcpy(buf, src, size);
+ return ES_OK;
+ }
+
switch (size) {
case 1:
if (get_user(d1, s))
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 8ca66af96a54..117e24fbfd8a 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -56,6 +56,7 @@
#include <linux/numa.h>
#include <linux/pgtable.h>
#include <linux/overflow.h>
+#include <linux/syscore_ops.h>
#include <asm/acpi.h>
#include <asm/desc.h>
@@ -2083,6 +2084,23 @@ static void init_counter_refs(void)
this_cpu_write(arch_prev_mperf, mperf);
}
+#ifdef CONFIG_PM_SLEEP
+static struct syscore_ops freq_invariance_syscore_ops = {
+ .resume = init_counter_refs,
+};
+
+static void register_freq_invariance_syscore_ops(void)
+{
+ /* Bail out if registered already. */
+ if (freq_invariance_syscore_ops.node.prev)
+ return;
+
+ register_syscore_ops(&freq_invariance_syscore_ops);
+}
+#else
+static inline void register_freq_invariance_syscore_ops(void) {}
+#endif
+
static void init_freq_invariance(bool secondary, bool cppc_ready)
{
bool ret = false;
@@ -2109,6 +2127,7 @@ static void init_freq_invariance(bool secondary, bool cppc_ready)
if (ret) {
init_counter_refs();
static_branch_enable(&arch_scale_freq_key);
+ register_freq_invariance_syscore_ops();
pr_info("Estimated ratio of average max frequency by base frequency (times 1024): %llu\n", arch_max_freq_ratio);
} else {
pr_debug("Couldn't determine max cpu frequency, necessary for scale-invariant accounting.\n");
diff --git a/arch/x86/kvm/kvm_cache_regs.h b/arch/x86/kvm/kvm_cache_regs.h
index f15bc16de07c..a889563ad02d 100644
--- a/arch/x86/kvm/kvm_cache_regs.h
+++ b/arch/x86/kvm/kvm_cache_regs.h
@@ -9,31 +9,6 @@
(X86_CR4_PVI | X86_CR4_DE | X86_CR4_PCE | X86_CR4_OSFXSR \
| X86_CR4_OSXMMEXCPT | X86_CR4_PGE | X86_CR4_TSD | X86_CR4_FSGSBASE)
-static inline bool kvm_register_is_available(struct kvm_vcpu *vcpu,
- enum kvm_reg reg)
-{
- return test_bit(reg, (unsigned long *)&vcpu->arch.regs_avail);
-}
-
-static inline bool kvm_register_is_dirty(struct kvm_vcpu *vcpu,
- enum kvm_reg reg)
-{
- return test_bit(reg, (unsigned long *)&vcpu->arch.regs_dirty);
-}
-
-static inline void kvm_register_mark_available(struct kvm_vcpu *vcpu,
- enum kvm_reg reg)
-{
- __set_bit(reg, (unsigned long *)&vcpu->arch.regs_avail);
-}
-
-static inline void kvm_register_mark_dirty(struct kvm_vcpu *vcpu,
- enum kvm_reg reg)
-{
- __set_bit(reg, (unsigned long *)&vcpu->arch.regs_avail);
- __set_bit(reg, (unsigned long *)&vcpu->arch.regs_dirty);
-}
-
#define BUILD_KVM_GPR_ACCESSORS(lname, uname) \
static __always_inline unsigned long kvm_##lname##_read(struct kvm_vcpu *vcpu)\
{ \
@@ -43,7 +18,6 @@ static __always_inline void kvm_##lname##_write(struct kvm_vcpu *vcpu, \
unsigned long val) \
{ \
vcpu->arch.regs[VCPU_REGS_##uname] = val; \
- kvm_register_mark_dirty(vcpu, VCPU_REGS_##uname); \
}
BUILD_KVM_GPR_ACCESSORS(rax, RAX)
BUILD_KVM_GPR_ACCESSORS(rbx, RBX)
@@ -63,6 +37,31 @@ BUILD_KVM_GPR_ACCESSORS(r14, R14)
BUILD_KVM_GPR_ACCESSORS(r15, R15)
#endif
+static inline bool kvm_register_is_available(struct kvm_vcpu *vcpu,
+ enum kvm_reg reg)
+{
+ return test_bit(reg, (unsigned long *)&vcpu->arch.regs_avail);
+}
+
+static inline bool kvm_register_is_dirty(struct kvm_vcpu *vcpu,
+ enum kvm_reg reg)
+{
+ return test_bit(reg, (unsigned long *)&vcpu->arch.regs_dirty);
+}
+
+static inline void kvm_register_mark_available(struct kvm_vcpu *vcpu,
+ enum kvm_reg reg)
+{
+ __set_bit(reg, (unsigned long *)&vcpu->arch.regs_avail);
+}
+
+static inline void kvm_register_mark_dirty(struct kvm_vcpu *vcpu,
+ enum kvm_reg reg)
+{
+ __set_bit(reg, (unsigned long *)&vcpu->arch.regs_avail);
+ __set_bit(reg, (unsigned long *)&vcpu->arch.regs_dirty);
+}
+
static inline unsigned long kvm_register_read(struct kvm_vcpu *vcpu, int reg)
{
if (WARN_ON_ONCE((unsigned int)reg >= NR_VCPU_REGS))
diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h
index 581925e476d6..261be1d2032b 100644
--- a/arch/x86/kvm/mmu.h
+++ b/arch/x86/kvm/mmu.h
@@ -44,8 +44,15 @@
#define PT32_ROOT_LEVEL 2
#define PT32E_ROOT_LEVEL 3
-static inline u64 rsvd_bits(int s, int e)
+static __always_inline u64 rsvd_bits(int s, int e)
{
+ BUILD_BUG_ON(__builtin_constant_p(e) && __builtin_constant_p(s) && e < s);
+
+ if (__builtin_constant_p(e))
+ BUILD_BUG_ON(e > 63);
+ else
+ e &= 63;
+
if (e < s)
return 0;
diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c
index cb4c6ee10029..7a605ad8254d 100644
--- a/arch/x86/kvm/svm/nested.c
+++ b/arch/x86/kvm/svm/nested.c
@@ -200,6 +200,9 @@ static bool svm_get_nested_state_pages(struct kvm_vcpu *vcpu)
{
struct vcpu_svm *svm = to_svm(vcpu);
+ if (WARN_ON(!is_guest_mode(vcpu)))
+ return true;
+
if (!nested_svm_vmrun_msrpm(svm)) {
vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
vcpu->run->internal.suberror =
diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
index c8ffdbc81709..ac652bc476ae 100644
--- a/arch/x86/kvm/svm/sev.c
+++ b/arch/x86/kvm/svm/sev.c
@@ -1415,16 +1415,13 @@ static void sev_es_sync_to_ghcb(struct vcpu_svm *svm)
* to be returned:
* GPRs RAX, RBX, RCX, RDX
*
- * Copy their values to the GHCB if they are dirty.
+ * Copy their values, even if they may not have been written during the
+ * VM-Exit. It's the guest's responsibility to not consume random data.
*/
- if (kvm_register_is_dirty(vcpu, VCPU_REGS_RAX))
- ghcb_set_rax(ghcb, vcpu->arch.regs[VCPU_REGS_RAX]);
- if (kvm_register_is_dirty(vcpu, VCPU_REGS_RBX))
- ghcb_set_rbx(ghcb, vcpu->arch.regs[VCPU_REGS_RBX]);
- if (kvm_register_is_dirty(vcpu, VCPU_REGS_RCX))
- ghcb_set_rcx(ghcb, vcpu->arch.regs[VCPU_REGS_RCX]);
- if (kvm_register_is_dirty(vcpu, VCPU_REGS_RDX))
- ghcb_set_rdx(ghcb, vcpu->arch.regs[VCPU_REGS_RDX]);
+ ghcb_set_rax(ghcb, vcpu->arch.regs[VCPU_REGS_RAX]);
+ ghcb_set_rbx(ghcb, vcpu->arch.regs[VCPU_REGS_RBX]);
+ ghcb_set_rcx(ghcb, vcpu->arch.regs[VCPU_REGS_RCX]);
+ ghcb_set_rdx(ghcb, vcpu->arch.regs[VCPU_REGS_RDX]);
}
static void sev_es_sync_from_ghcb(struct vcpu_svm *svm)
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 7ef171790d02..f923e14e87df 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -3739,6 +3739,8 @@ static __no_kcsan fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu)
{
struct vcpu_svm *svm = to_svm(vcpu);
+ trace_kvm_entry(vcpu);
+
svm->vmcb->save.rax = vcpu->arch.regs[VCPU_REGS_RAX];
svm->vmcb->save.rsp = vcpu->arch.regs[VCPU_REGS_RSP];
svm->vmcb->save.rip = vcpu->arch.regs[VCPU_REGS_RIP];
diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index 0fbb46990dfc..f2b9bfb58206 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -3124,13 +3124,9 @@ static int nested_vmx_check_vmentry_hw(struct kvm_vcpu *vcpu)
return 0;
}
-static bool nested_get_vmcs12_pages(struct kvm_vcpu *vcpu)
+static bool nested_get_evmcs_page(struct kvm_vcpu *vcpu)
{
- struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
struct vcpu_vmx *vmx = to_vmx(vcpu);
- struct kvm_host_map *map;
- struct page *page;
- u64 hpa;
/*
* hv_evmcs may end up being not mapped after migration (when
@@ -3153,6 +3149,17 @@ static bool nested_get_vmcs12_pages(struct kvm_vcpu *vcpu)
}
}
+ return true;
+}
+
+static bool nested_get_vmcs12_pages(struct kvm_vcpu *vcpu)
+{
+ struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
+ struct vcpu_vmx *vmx = to_vmx(vcpu);
+ struct kvm_host_map *map;
+ struct page *page;
+ u64 hpa;
+
if (nested_cpu_has2(vmcs12, SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)) {
/*
* Translate L1 physical address to host physical
@@ -3221,6 +3228,18 @@ static bool nested_get_vmcs12_pages(struct kvm_vcpu *vcpu)
exec_controls_setbit(vmx, CPU_BASED_USE_MSR_BITMAPS);
else
exec_controls_clearbit(vmx, CPU_BASED_USE_MSR_BITMAPS);
+
+ return true;
+}
+
+static bool vmx_get_nested_state_pages(struct kvm_vcpu *vcpu)
+{
+ if (!nested_get_evmcs_page(vcpu))
+ return false;
+
+ if (is_guest_mode(vcpu) && !nested_get_vmcs12_pages(vcpu))
+ return false;
+
return true;
}
@@ -6077,11 +6096,14 @@ static int vmx_get_nested_state(struct kvm_vcpu *vcpu,
if (is_guest_mode(vcpu)) {
sync_vmcs02_to_vmcs12(vcpu, vmcs12);
sync_vmcs02_to_vmcs12_rare(vcpu, vmcs12);
- } else if (!vmx->nested.need_vmcs12_to_shadow_sync) {
- if (vmx->nested.hv_evmcs)
- copy_enlightened_to_vmcs12(vmx);
- else if (enable_shadow_vmcs)
- copy_shadow_to_vmcs12(vmx);
+ } else {
+ copy_vmcs02_to_vmcs12_rare(vcpu, get_vmcs12(vcpu));
+ if (!vmx->nested.need_vmcs12_to_shadow_sync) {
+ if (vmx->nested.hv_evmcs)
+ copy_enlightened_to_vmcs12(vmx);
+ else if (enable_shadow_vmcs)
+ copy_shadow_to_vmcs12(vmx);
+ }
}
BUILD_BUG_ON(sizeof(user_vmx_nested_state->vmcs12) < VMCS12_SIZE);
@@ -6602,7 +6624,7 @@ struct kvm_x86_nested_ops vmx_nested_ops = {
.hv_timer_pending = nested_vmx_preemption_timer_pending,
.get_state = vmx_get_nested_state,
.set_state = vmx_set_nested_state,
- .get_nested_state_pages = nested_get_vmcs12_pages,
+ .get_nested_state_pages = vmx_get_nested_state_pages,
.write_log_dirty = nested_vmx_write_pml_buffer,
.enable_evmcs = nested_enable_evmcs,
.get_evmcs_version = nested_get_evmcs_version,
diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c
index a886a47daebd..cdf5f34518f4 100644
--- a/arch/x86/kvm/vmx/pmu_intel.c
+++ b/arch/x86/kvm/vmx/pmu_intel.c
@@ -29,7 +29,7 @@ static struct kvm_event_hw_type_mapping intel_arch_events[] = {
[4] = { 0x2e, 0x41, PERF_COUNT_HW_CACHE_MISSES },
[5] = { 0xc4, 0x00, PERF_COUNT_HW_BRANCH_INSTRUCTIONS },
[6] = { 0xc5, 0x00, PERF_COUNT_HW_BRANCH_MISSES },
- [7] = { 0x00, 0x30, PERF_COUNT_HW_REF_CPU_CYCLES },
+ [7] = { 0x00, 0x03, PERF_COUNT_HW_REF_CPU_CYCLES },
};
/* mapping between fixed pmc index and intel_arch_events array */
@@ -345,7 +345,9 @@ static void intel_pmu_refresh(struct kvm_vcpu *vcpu)
pmu->nr_arch_gp_counters = min_t(int, eax.split.num_counters,
x86_pmu.num_counters_gp);
+ eax.split.bit_width = min_t(int, eax.split.bit_width, x86_pmu.bit_width_gp);
pmu->counter_bitmask[KVM_PMC_GP] = ((u64)1 << eax.split.bit_width) - 1;
+ eax.split.mask_length = min_t(int, eax.split.mask_length, x86_pmu.events_mask_len);
pmu->available_event_types = ~entry->ebx &
((1ull << eax.split.mask_length) - 1);
@@ -355,6 +357,8 @@ static void intel_pmu_refresh(struct kvm_vcpu *vcpu)
pmu->nr_arch_fixed_counters =
min_t(int, edx.split.num_counters_fixed,
x86_pmu.num_counters_fixed);
+ edx.split.bit_width_fixed = min_t(int,
+ edx.split.bit_width_fixed, x86_pmu.bit_width_fixed);
pmu->counter_bitmask[KVM_PMC_FIXED] =
((u64)1 << edx.split.bit_width_fixed) - 1;
}
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 2af05d3b0590..cc60b1fc3ee7 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -6653,6 +6653,8 @@ reenter_guest:
if (vmx->emulation_required)
return EXIT_FASTPATH_NONE;
+ trace_kvm_entry(vcpu);
+
if (vmx->ple_window_dirty) {
vmx->ple_window_dirty = false;
vmcs_write32(PLE_WINDOW, vmx->ple_window);
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 9a8969a6dd06..76bce832cade 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -105,6 +105,7 @@ static u64 __read_mostly cr4_reserved_bits = CR4_RESERVED_BITS;
static void update_cr8_intercept(struct kvm_vcpu *vcpu);
static void process_nmi(struct kvm_vcpu *vcpu);
+static void process_smi(struct kvm_vcpu *vcpu);
static void enter_smm(struct kvm_vcpu *vcpu);
static void __kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags);
static void store_regs(struct kvm_vcpu *vcpu);
@@ -4230,6 +4231,9 @@ static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu,
{
process_nmi(vcpu);
+ if (kvm_check_request(KVM_REQ_SMI, vcpu))
+ process_smi(vcpu);
+
/*
* In guest mode, payload delivery should be deferred,
* so that the L1 hypervisor can intercept #PF before
@@ -8802,9 +8806,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
if (kvm_request_pending(vcpu)) {
if (kvm_check_request(KVM_REQ_GET_NESTED_STATE_PAGES, vcpu)) {
- if (WARN_ON_ONCE(!is_guest_mode(vcpu)))
- ;
- else if (unlikely(!kvm_x86_ops.nested_ops->get_nested_state_pages(vcpu))) {
+ if (unlikely(!kvm_x86_ops.nested_ops->get_nested_state_pages(vcpu))) {
r = 0;
goto out;
}
@@ -8988,8 +8990,6 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
kvm_x86_ops.request_immediate_exit(vcpu);
}
- trace_kvm_entry(vcpu);
-
fpregs_assert_state_consistent();
if (test_thread_flag(TIF_NEED_FPU_LOAD))
switch_fpu_return();
@@ -11556,6 +11556,7 @@ int kvm_sev_es_string_io(struct kvm_vcpu *vcpu, unsigned int size,
}
EXPORT_SYMBOL_GPL(kvm_sev_es_string_io);
+EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_entry);
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_exit);
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_fast_mmio);
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_inj_virq);
diff --git a/arch/x86/lib/mmx_32.c b/arch/x86/lib/mmx_32.c
index 4321fa02e18d..419365c48b2a 100644
--- a/arch/x86/lib/mmx_32.c
+++ b/arch/x86/lib/mmx_32.c
@@ -26,6 +26,16 @@
#include <asm/fpu/api.h>
#include <asm/asm.h>
+/*
+ * Use KFPU_387. MMX instructions are not affected by MXCSR,
+ * but both AMD and Intel documentation states that even integer MMX
+ * operations will result in #MF if an exception is pending in FCW.
+ *
+ * EMMS is not needed afterwards because, after calling kernel_fpu_end(),
+ * any subsequent user of the 387 stack will reinitialize it using
+ * KFPU_387.
+ */
+
void *_mmx_memcpy(void *to, const void *from, size_t len)
{
void *p;
@@ -37,7 +47,7 @@ void *_mmx_memcpy(void *to, const void *from, size_t len)
p = to;
i = len >> 6; /* len/64 */
- kernel_fpu_begin();
+ kernel_fpu_begin_mask(KFPU_387);
__asm__ __volatile__ (
"1: prefetch (%0)\n" /* This set is 28 bytes */
@@ -127,7 +137,7 @@ static void fast_clear_page(void *page)
{
int i;
- kernel_fpu_begin();
+ kernel_fpu_begin_mask(KFPU_387);
__asm__ __volatile__ (
" pxor %%mm0, %%mm0\n" : :
@@ -160,7 +170,7 @@ static void fast_copy_page(void *to, void *from)
{
int i;
- kernel_fpu_begin();
+ kernel_fpu_begin_mask(KFPU_387);
/*
* maybe the prefetch stuff can go before the expensive fnsave...
@@ -247,7 +257,7 @@ static void fast_clear_page(void *page)
{
int i;
- kernel_fpu_begin();
+ kernel_fpu_begin_mask(KFPU_387);
__asm__ __volatile__ (
" pxor %%mm0, %%mm0\n" : :
@@ -282,7 +292,7 @@ static void fast_copy_page(void *to, void *from)
{
int i;
- kernel_fpu_begin();
+ kernel_fpu_begin_mask(KFPU_387);
__asm__ __volatile__ (
"1: prefetch (%0)\n"
diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c
index 4409306364dc..9a5a50cdaab5 100644
--- a/arch/x86/xen/enlighten_pv.c
+++ b/arch/x86/xen/enlighten_pv.c
@@ -583,6 +583,13 @@ DEFINE_IDTENTRY_RAW(xenpv_exc_debug)
exc_debug(regs);
}
+DEFINE_IDTENTRY_RAW(exc_xen_unknown_trap)
+{
+ /* This should never happen and there is no way to handle it. */
+ pr_err("Unknown trap in Xen PV mode.");
+ BUG();
+}
+
struct trap_array_entry {
void (*orig)(void);
void (*xen)(void);
@@ -631,6 +638,7 @@ static bool __ref get_trap_addr(void **addr, unsigned int ist)
{
unsigned int nr;
bool ist_okay = false;
+ bool found = false;
/*
* Replace trap handler addresses by Xen specific ones.
@@ -645,6 +653,7 @@ static bool __ref get_trap_addr(void **addr, unsigned int ist)
if (*addr == entry->orig) {
*addr = entry->xen;
ist_okay = entry->ist_okay;
+ found = true;
break;
}
}
@@ -655,9 +664,13 @@ static bool __ref get_trap_addr(void **addr, unsigned int ist)
nr = (*addr - (void *)early_idt_handler_array[0]) /
EARLY_IDT_HANDLER_SIZE;
*addr = (void *)xen_early_idt_handler_array[nr];
+ found = true;
}
- if (WARN_ON(ist != 0 && !ist_okay))
+ if (!found)
+ *addr = (void *)xen_asm_exc_xen_unknown_trap;
+
+ if (WARN_ON(found && ist != 0 && !ist_okay))
return false;
return true;
diff --git a/arch/x86/xen/xen-asm.S b/arch/x86/xen/xen-asm.S
index 1cb0e84b9161..53cf8aa35032 100644
--- a/arch/x86/xen/xen-asm.S
+++ b/arch/x86/xen/xen-asm.S
@@ -178,6 +178,7 @@ xen_pv_trap asm_exc_simd_coprocessor_error
#ifdef CONFIG_IA32_EMULATION
xen_pv_trap entry_INT80_compat
#endif
+xen_pv_trap asm_exc_xen_unknown_trap
xen_pv_trap asm_exc_xen_hypervisor_callback
__INIT