diff options
Diffstat (limited to 'arch/x86')
29 files changed, 375 insertions, 220 deletions
diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c index 29774126e931..f49bc3ec76e6 100644 --- a/arch/x86/hyperv/hv_init.c +++ b/arch/x86/hyperv/hv_init.c @@ -444,7 +444,7 @@ void __init hyperv_init(void) if (hv_root_partition) { struct page *pg; - void *src, *dst; + void *src; /* * For the root partition, the hypervisor will set up its @@ -459,13 +459,11 @@ void __init hyperv_init(void) wrmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64); pg = vmalloc_to_page(hv_hypercall_pg); - dst = kmap_local_page(pg); src = memremap(hypercall_msr.guest_physical_address << PAGE_SHIFT, PAGE_SIZE, MEMREMAP_WB); - BUG_ON(!(src && dst)); - memcpy(dst, src, HV_HYP_PAGE_SIZE); + BUG_ON(!src); + memcpy_to_page(pg, 0, src, HV_HYP_PAGE_SIZE); memunmap(src); - kunmap_local(dst); } else { hypercall_msr.guest_physical_address = vmalloc_to_pfn(hv_hypercall_pg); wrmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64); @@ -537,6 +535,7 @@ common_free: void hyperv_cleanup(void) { union hv_x64_msr_hypercall_contents hypercall_msr; + union hv_reference_tsc_msr tsc_msr; unregister_syscore_ops(&hv_syscore_ops); @@ -552,12 +551,14 @@ void hyperv_cleanup(void) hv_hypercall_pg = NULL; /* Reset the hypercall page */ - hypercall_msr.as_uint64 = 0; - wrmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64); + hypercall_msr.as_uint64 = hv_get_register(HV_X64_MSR_HYPERCALL); + hypercall_msr.enable = 0; + hv_set_register(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64); /* Reset the TSC page */ - hypercall_msr.as_uint64 = 0; - wrmsrl(HV_X64_MSR_REFERENCE_TSC, hypercall_msr.as_uint64); + tsc_msr.as_uint64 = hv_get_register(HV_X64_MSR_REFERENCE_TSC); + tsc_msr.enable = 0; + hv_set_register(HV_X64_MSR_REFERENCE_TSC, tsc_msr.as_uint64); } void hyperv_report_panic(struct pt_regs *regs, long err, bool in_die) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 7551b6f9c31c..f05ebaa26f0f 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -501,7 +501,12 @@ struct kvm_pmc { bool intr; }; +/* More counters may conflict with other existing Architectural MSRs */ +#define KVM_INTEL_PMC_MAX_GENERIC 8 +#define MSR_ARCH_PERFMON_PERFCTR_MAX (MSR_ARCH_PERFMON_PERFCTR0 + KVM_INTEL_PMC_MAX_GENERIC - 1) +#define MSR_ARCH_PERFMON_EVENTSEL_MAX (MSR_ARCH_PERFMON_EVENTSEL0 + KVM_INTEL_PMC_MAX_GENERIC - 1) #define KVM_PMC_MAX_FIXED 3 +#define KVM_AMD_PMC_MAX_GENERIC 6 struct kvm_pmu { unsigned nr_arch_gp_counters; unsigned nr_arch_fixed_counters; @@ -516,7 +521,7 @@ struct kvm_pmu { u64 reserved_bits; u64 raw_event_mask; u8 version; - struct kvm_pmc gp_counters[INTEL_PMC_MAX_GENERIC]; + struct kvm_pmc gp_counters[KVM_INTEL_PMC_MAX_GENERIC]; struct kvm_pmc fixed_counters[KVM_PMC_MAX_FIXED]; struct irq_work irq_work; DECLARE_BITMAP(reprogram_pmi, X86_PMC_IDX_MAX); diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 10ac52705892..4a2af82553e4 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -535,6 +535,11 @@ #define MSR_AMD64_CPUID_FN_1 0xc0011004 #define MSR_AMD64_LS_CFG 0xc0011020 #define MSR_AMD64_DC_CFG 0xc0011022 + +#define MSR_AMD64_DE_CFG 0xc0011029 +#define MSR_AMD64_DE_CFG_LFENCE_SERIALIZE_BIT 1 +#define MSR_AMD64_DE_CFG_LFENCE_SERIALIZE BIT_ULL(MSR_AMD64_DE_CFG_LFENCE_SERIALIZE_BIT) + #define MSR_AMD64_BU_CFG2 0xc001102a #define MSR_AMD64_IBSFETCHCTL 0xc0011030 #define MSR_AMD64_IBSFETCHLINAD 0xc0011031 @@ -640,9 +645,6 @@ #define FAM10H_MMIO_CONF_BASE_MASK 0xfffffffULL #define FAM10H_MMIO_CONF_BASE_SHIFT 20 #define MSR_FAM10H_NODE_ID 0xc001100c -#define MSR_F10H_DECFG 0xc0011029 -#define MSR_F10H_DECFG_LFENCE_SERIALIZE_BIT 1 -#define MSR_F10H_DECFG_LFENCE_SERIALIZE BIT_ULL(MSR_F10H_DECFG_LFENCE_SERIALIZE_BIT) /* K8 MSRs */ #define MSR_K8_TOP_MEM1 0xc001001a diff --git a/arch/x86/include/asm/spec-ctrl.h b/arch/x86/include/asm/spec-ctrl.h index 5393babc0598..cb0386fc4dc3 100644 --- a/arch/x86/include/asm/spec-ctrl.h +++ b/arch/x86/include/asm/spec-ctrl.h @@ -13,7 +13,7 @@ * Takes the guest view of SPEC_CTRL MSR as a parameter and also * the guest's version of VIRT_SPEC_CTRL, if emulated. */ -extern void x86_virt_spec_ctrl(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl, bool guest); +extern void x86_virt_spec_ctrl(u64 guest_virt_spec_ctrl, bool guest); /** * x86_spec_ctrl_set_guest - Set speculation control registers for the guest @@ -24,9 +24,9 @@ extern void x86_virt_spec_ctrl(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl, bo * Avoids writing to the MSR if the content/bits are the same */ static inline -void x86_spec_ctrl_set_guest(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl) +void x86_spec_ctrl_set_guest(u64 guest_virt_spec_ctrl) { - x86_virt_spec_ctrl(guest_spec_ctrl, guest_virt_spec_ctrl, true); + x86_virt_spec_ctrl(guest_virt_spec_ctrl, true); } /** @@ -38,9 +38,9 @@ void x86_spec_ctrl_set_guest(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl) * Avoids writing to the MSR if the content/bits are the same */ static inline -void x86_spec_ctrl_restore_host(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl) +void x86_spec_ctrl_restore_host(u64 guest_virt_spec_ctrl) { - x86_virt_spec_ctrl(guest_spec_ctrl, guest_virt_spec_ctrl, false); + x86_virt_spec_ctrl(guest_virt_spec_ctrl, false); } /* AMD specific Speculative Store Bypass MSR data */ diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c index cb50589a7102..437308004ef2 100644 --- a/arch/x86/kernel/asm-offsets.c +++ b/arch/x86/kernel/asm-offsets.c @@ -19,7 +19,6 @@ #include <asm/suspend.h> #include <asm/tlbflush.h> #include <asm/tdx.h> -#include "../kvm/vmx/vmx.h" #ifdef CONFIG_XEN #include <xen/interface/xen.h> @@ -108,9 +107,4 @@ static void __used common(void) OFFSET(TSS_sp0, tss_struct, x86_tss.sp0); OFFSET(TSS_sp1, tss_struct, x86_tss.sp1); OFFSET(TSS_sp2, tss_struct, x86_tss.sp2); - - if (IS_ENABLED(CONFIG_KVM_INTEL)) { - BLANK(); - OFFSET(VMX_spec_ctrl, vcpu_vmx, spec_ctrl); - } } diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 860b60273df3..c75d75b9f11a 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -770,8 +770,6 @@ static void init_amd_gh(struct cpuinfo_x86 *c) set_cpu_bug(c, X86_BUG_AMD_TLB_MMATCH); } -#define MSR_AMD64_DE_CFG 0xC0011029 - static void init_amd_ln(struct cpuinfo_x86 *c) { /* @@ -965,8 +963,8 @@ static void init_amd(struct cpuinfo_x86 *c) * msr_set_bit() uses the safe accessors, too, even if the MSR * is not present. */ - msr_set_bit(MSR_F10H_DECFG, - MSR_F10H_DECFG_LFENCE_SERIALIZE_BIT); + msr_set_bit(MSR_AMD64_DE_CFG, + MSR_AMD64_DE_CFG_LFENCE_SERIALIZE_BIT); /* A serializing LFENCE stops RDTSC speculation */ set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC); diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index da7c361f47e0..3e3230cccaa7 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -196,22 +196,15 @@ void __init check_bugs(void) } /* - * NOTE: This function is *only* called for SVM. VMX spec_ctrl handling is - * done in vmenter.S. + * NOTE: This function is *only* called for SVM, since Intel uses + * MSR_IA32_SPEC_CTRL for SSBD. */ void -x86_virt_spec_ctrl(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl, bool setguest) +x86_virt_spec_ctrl(u64 guest_virt_spec_ctrl, bool setguest) { - u64 msrval, guestval = guest_spec_ctrl, hostval = spec_ctrl_current(); + u64 guestval, hostval; struct thread_info *ti = current_thread_info(); - if (static_cpu_has(X86_FEATURE_MSR_SPEC_CTRL)) { - if (hostval != guestval) { - msrval = setguest ? guestval : hostval; - wrmsrl(MSR_IA32_SPEC_CTRL, msrval); - } - } - /* * If SSBD is not handled in MSR_SPEC_CTRL on AMD, update * MSR_AMD64_L2_CFG or MSR_VIRT_SPEC_CTRL if supported. diff --git a/arch/x86/kernel/cpu/hygon.c b/arch/x86/kernel/cpu/hygon.c index 21fd425088fe..c393b8773ace 100644 --- a/arch/x86/kernel/cpu/hygon.c +++ b/arch/x86/kernel/cpu/hygon.c @@ -326,8 +326,8 @@ static void init_hygon(struct cpuinfo_x86 *c) * msr_set_bit() uses the safe accessors, too, even if the MSR * is not present. */ - msr_set_bit(MSR_F10H_DECFG, - MSR_F10H_DECFG_LFENCE_SERIALIZE_BIT); + msr_set_bit(MSR_AMD64_DE_CFG, + MSR_AMD64_DE_CFG_LFENCE_SERIALIZE_BIT); /* A serializing LFENCE stops RDTSC speculation */ set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC); diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 178015a820f0..d3fdec706f1d 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -15,6 +15,7 @@ #include <linux/context_tracking.h> #include <linux/interrupt.h> #include <linux/kallsyms.h> +#include <linux/kmsan.h> #include <linux/spinlock.h> #include <linux/kprobes.h> #include <linux/uaccess.h> @@ -301,6 +302,12 @@ static noinstr bool handle_bug(struct pt_regs *regs) { bool handled = false; + /* + * Normally @regs are unpoisoned by irqentry_enter(), but handle_bug() + * is a rare case that uses @regs without passing them to + * irqentry_enter(). + */ + kmsan_unpoison_entry_regs(regs); if (!is_valid_bugaddr(regs->ip)) return handled; diff --git a/arch/x86/kvm/.gitignore b/arch/x86/kvm/.gitignore new file mode 100644 index 000000000000..615d6ff35c00 --- /dev/null +++ b/arch/x86/kvm/.gitignore @@ -0,0 +1,2 @@ +/kvm-asm-offsets.s +/kvm-asm-offsets.h diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile index 30f244b64523..f453a0f96e24 100644 --- a/arch/x86/kvm/Makefile +++ b/arch/x86/kvm/Makefile @@ -34,3 +34,15 @@ endif obj-$(CONFIG_KVM) += kvm.o obj-$(CONFIG_KVM_INTEL) += kvm-intel.o obj-$(CONFIG_KVM_AMD) += kvm-amd.o + +AFLAGS_svm/vmenter.o := -iquote $(obj) +$(obj)/svm/vmenter.o: $(obj)/kvm-asm-offsets.h + +AFLAGS_vmx/vmenter.o := -iquote $(obj) +$(obj)/vmx/vmenter.o: $(obj)/kvm-asm-offsets.h + +$(obj)/kvm-asm-offsets.h: $(obj)/kvm-asm-offsets.s FORCE + $(call filechk,offsets,__KVM_ASM_OFFSETS_H__) + +targets += kvm-asm-offsets.s +clean-files += kvm-asm-offsets.h diff --git a/arch/x86/kvm/kvm-asm-offsets.c b/arch/x86/kvm/kvm-asm-offsets.c new file mode 100644 index 000000000000..24a710d37323 --- /dev/null +++ b/arch/x86/kvm/kvm-asm-offsets.c @@ -0,0 +1,29 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Generate definitions needed by assembly language modules. + * This code generates raw asm output which is post-processed to extract + * and format the required data. + */ +#define COMPILE_OFFSETS + +#include <linux/kbuild.h> +#include "vmx/vmx.h" +#include "svm/svm.h" + +static void __used common(void) +{ + if (IS_ENABLED(CONFIG_KVM_AMD)) { + BLANK(); + OFFSET(SVM_vcpu_arch_regs, vcpu_svm, vcpu.arch.regs); + OFFSET(SVM_current_vmcb, vcpu_svm, current_vmcb); + OFFSET(SVM_spec_ctrl, vcpu_svm, spec_ctrl); + OFFSET(SVM_vmcb01, vcpu_svm, vmcb01); + OFFSET(KVM_VMCB_pa, kvm_vmcb_info, pa); + OFFSET(SD_save_area_pa, svm_cpu_data, save_area_pa); + } + + if (IS_ENABLED(CONFIG_KVM_INTEL)) { + BLANK(); + OFFSET(VMX_spec_ctrl, vcpu_vmx, spec_ctrl); + } +} diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index 6f81539061d6..1ccb769f62af 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -6056,7 +6056,7 @@ void kvm_zap_gfn_range(struct kvm *kvm, gfn_t gfn_start, gfn_t gfn_end) write_lock(&kvm->mmu_lock); - kvm_mmu_invalidate_begin(kvm, gfn_start, gfn_end); + kvm_mmu_invalidate_begin(kvm, 0, -1ul); flush = kvm_rmap_zap_gfn_range(kvm, gfn_start, gfn_end); @@ -6070,7 +6070,7 @@ void kvm_zap_gfn_range(struct kvm *kvm, gfn_t gfn_start, gfn_t gfn_end) kvm_flush_remote_tlbs_with_address(kvm, gfn_start, gfn_end - gfn_start); - kvm_mmu_invalidate_end(kvm, gfn_start, gfn_end); + kvm_mmu_invalidate_end(kvm, 0, -1ul); write_unlock(&kvm->mmu_lock); } diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c index d9b9a0f0db17..de1fd7369736 100644 --- a/arch/x86/kvm/pmu.c +++ b/arch/x86/kvm/pmu.c @@ -56,7 +56,7 @@ static const struct x86_cpu_id vmx_icl_pebs_cpu[] = { * code. Each pmc, stored in kvm_pmc.idx field, is unique across * all perf counters (both gp and fixed). The mapping relationship * between pmc and perf counters is as the following: - * * Intel: [0 .. INTEL_PMC_MAX_GENERIC-1] <=> gp counters + * * Intel: [0 .. KVM_INTEL_PMC_MAX_GENERIC-1] <=> gp counters * [INTEL_PMC_IDX_FIXED .. INTEL_PMC_IDX_FIXED + 2] <=> fixed * * AMD: [0 .. AMD64_NUM_COUNTERS-1] and, for families 15H * and later, [0 .. AMD64_NUM_COUNTERS_CORE-1] <=> gp counters diff --git a/arch/x86/kvm/svm/pmu.c b/arch/x86/kvm/svm/pmu.c index b68956299fa8..9d65cd095691 100644 --- a/arch/x86/kvm/svm/pmu.c +++ b/arch/x86/kvm/svm/pmu.c @@ -192,9 +192,10 @@ static void amd_pmu_init(struct kvm_vcpu *vcpu) struct kvm_pmu *pmu = vcpu_to_pmu(vcpu); int i; - BUILD_BUG_ON(AMD64_NUM_COUNTERS_CORE > INTEL_PMC_MAX_GENERIC); + BUILD_BUG_ON(KVM_AMD_PMC_MAX_GENERIC > AMD64_NUM_COUNTERS_CORE); + BUILD_BUG_ON(KVM_AMD_PMC_MAX_GENERIC > INTEL_PMC_MAX_GENERIC); - for (i = 0; i < AMD64_NUM_COUNTERS_CORE ; i++) { + for (i = 0; i < KVM_AMD_PMC_MAX_GENERIC ; i++) { pmu->gp_counters[i].type = KVM_PMC_GP; pmu->gp_counters[i].vcpu = vcpu; pmu->gp_counters[i].idx = i; @@ -207,7 +208,7 @@ static void amd_pmu_reset(struct kvm_vcpu *vcpu) struct kvm_pmu *pmu = vcpu_to_pmu(vcpu); int i; - for (i = 0; i < AMD64_NUM_COUNTERS_CORE; i++) { + for (i = 0; i < KVM_AMD_PMC_MAX_GENERIC; i++) { struct kvm_pmc *pmc = &pmu->gp_counters[i]; pmc_stop_counter(pmc); diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index 28064060413a..efaaef2b7ae1 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -196,7 +196,7 @@ static void sev_asid_free(struct kvm_sev_info *sev) __set_bit(sev->asid, sev_reclaim_asid_bitmap); for_each_possible_cpu(cpu) { - sd = per_cpu(svm_data, cpu); + sd = per_cpu_ptr(&svm_data, cpu); sd->sev_vmcbs[sev->asid] = NULL; } @@ -605,7 +605,7 @@ static int sev_es_sync_vmsa(struct vcpu_svm *svm) save->dr6 = svm->vcpu.arch.dr6; pr_debug("Virtual Machine Save Area (VMSA):\n"); - print_hex_dump(KERN_CONT, "", DUMP_PREFIX_NONE, 16, 1, save, sizeof(*save), false); + print_hex_dump_debug("", DUMP_PREFIX_NONE, 16, 1, save, sizeof(*save), false); return 0; } @@ -2600,7 +2600,7 @@ void sev_es_unmap_ghcb(struct vcpu_svm *svm) void pre_sev_run(struct vcpu_svm *svm, int cpu) { - struct svm_cpu_data *sd = per_cpu(svm_data, cpu); + struct svm_cpu_data *sd = per_cpu_ptr(&svm_data, cpu); int asid = sev_get_asid(svm->vcpu.kvm); /* Assign the asid allocated with this SEV guest */ diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 58f0077d9357..4b6d2b050e57 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -245,7 +245,7 @@ struct kvm_ldttss_desc { u32 zero1; } __attribute__((packed)); -DEFINE_PER_CPU(struct svm_cpu_data *, svm_data); +DEFINE_PER_CPU(struct svm_cpu_data, svm_data); /* * Only MSR_TSC_AUX is switched via the user return hook. EFER is switched via @@ -581,12 +581,7 @@ static int svm_hardware_enable(void) pr_err("%s: err EOPNOTSUPP on %d\n", __func__, me); return -EINVAL; } - sd = per_cpu(svm_data, me); - if (!sd) { - pr_err("%s: svm_data is NULL on %d\n", __func__, me); - return -EINVAL; - } - + sd = per_cpu_ptr(&svm_data, me); sd->asid_generation = 1; sd->max_asid = cpuid_ebx(SVM_CPUID_FUNC) - 1; sd->next_asid = sd->max_asid + 1; @@ -597,7 +592,7 @@ static int svm_hardware_enable(void) wrmsrl(MSR_EFER, efer | EFER_SVME); - wrmsrl(MSR_VM_HSAVE_PA, __sme_page_pa(sd->save_area)); + wrmsrl(MSR_VM_HSAVE_PA, sd->save_area_pa); if (static_cpu_has(X86_FEATURE_TSCRATEMSR)) { /* @@ -646,42 +641,37 @@ static int svm_hardware_enable(void) static void svm_cpu_uninit(int cpu) { - struct svm_cpu_data *sd = per_cpu(svm_data, cpu); + struct svm_cpu_data *sd = per_cpu_ptr(&svm_data, cpu); - if (!sd) + if (!sd->save_area) return; - per_cpu(svm_data, cpu) = NULL; kfree(sd->sev_vmcbs); __free_page(sd->save_area); - kfree(sd); + sd->save_area_pa = 0; + sd->save_area = NULL; } static int svm_cpu_init(int cpu) { - struct svm_cpu_data *sd; + struct svm_cpu_data *sd = per_cpu_ptr(&svm_data, cpu); int ret = -ENOMEM; - sd = kzalloc(sizeof(struct svm_cpu_data), GFP_KERNEL); - if (!sd) - return ret; - sd->cpu = cpu; + memset(sd, 0, sizeof(struct svm_cpu_data)); sd->save_area = alloc_page(GFP_KERNEL | __GFP_ZERO); if (!sd->save_area) - goto free_cpu_data; + return ret; ret = sev_cpu_init(sd); if (ret) goto free_save_area; - per_cpu(svm_data, cpu) = sd; - + sd->save_area_pa = __sme_page_pa(sd->save_area); return 0; free_save_area: __free_page(sd->save_area); -free_cpu_data: - kfree(sd); + sd->save_area = NULL; return ret; } @@ -730,6 +720,15 @@ static bool msr_write_intercepted(struct kvm_vcpu *vcpu, u32 msr) u32 offset; u32 *msrpm; + /* + * For non-nested case: + * If the L01 MSR bitmap does not intercept the MSR, then we need to + * save it. + * + * For nested case: + * If the L02 MSR bitmap does not intercept the MSR, then we need to + * save it. + */ msrpm = is_guest_mode(vcpu) ? to_svm(vcpu)->nested.msrpm: to_svm(vcpu)->msrpm; @@ -1425,7 +1424,7 @@ static void svm_clear_current_vmcb(struct vmcb *vmcb) int i; for_each_online_cpu(i) - cmpxchg(&per_cpu(svm_data, i)->current_vmcb, vmcb, NULL); + cmpxchg(per_cpu_ptr(&svm_data.current_vmcb, i), vmcb, NULL); } static void svm_vcpu_free(struct kvm_vcpu *vcpu) @@ -1450,7 +1449,7 @@ static void svm_vcpu_free(struct kvm_vcpu *vcpu) static void svm_prepare_switch_to_guest(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); - struct svm_cpu_data *sd = per_cpu(svm_data, vcpu->cpu); + struct svm_cpu_data *sd = per_cpu_ptr(&svm_data, vcpu->cpu); if (sev_es_guest(vcpu->kvm)) sev_es_unmap_ghcb(svm); @@ -1462,7 +1461,7 @@ static void svm_prepare_switch_to_guest(struct kvm_vcpu *vcpu) * Save additional host state that will be restored on VMEXIT (sev-es) * or subsequent vmload of host save area. */ - vmsave(__sme_page_pa(sd->save_area)); + vmsave(sd->save_area_pa); if (sev_es_guest(vcpu->kvm)) { struct sev_es_save_area *hostsa; hostsa = (struct sev_es_save_area *)(page_address(sd->save_area) + 0x400); @@ -1487,7 +1486,7 @@ static void svm_prepare_host_switch(struct kvm_vcpu *vcpu) static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu) { struct vcpu_svm *svm = to_svm(vcpu); - struct svm_cpu_data *sd = per_cpu(svm_data, cpu); + struct svm_cpu_data *sd = per_cpu_ptr(&svm_data, cpu); if (sd->current_vmcb != svm->vmcb) { sd->current_vmcb = svm->vmcb; @@ -2710,9 +2709,9 @@ static int svm_get_msr_feature(struct kvm_msr_entry *msr) msr->data = 0; switch (msr->index) { - case MSR_F10H_DECFG: - if (boot_cpu_has(X86_FEATURE_LFENCE_RDTSC)) - msr->data |= MSR_F10H_DECFG_LFENCE_SERIALIZE; + case MSR_AMD64_DE_CFG: + if (cpu_feature_enabled(X86_FEATURE_LFENCE_RDTSC)) + msr->data |= MSR_AMD64_DE_CFG_LFENCE_SERIALIZE; break; case MSR_IA32_PERF_CAPABILITIES: return 0; @@ -2813,7 +2812,7 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) msr_info->data = 0x1E; } break; - case MSR_F10H_DECFG: + case MSR_AMD64_DE_CFG: msr_info->data = svm->msr_decfg; break; default: @@ -3042,7 +3041,7 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr) case MSR_VM_IGNNE: vcpu_unimpl(vcpu, "unimplemented wrmsr: 0x%x data 0x%llx\n", ecx, data); break; - case MSR_F10H_DECFG: { + case MSR_AMD64_DE_CFG: { struct kvm_msr_entry msr_entry; msr_entry.index = msr->index; @@ -3443,7 +3442,7 @@ static int svm_handle_exit(struct kvm_vcpu *vcpu, fastpath_t exit_fastpath) static void reload_tss(struct kvm_vcpu *vcpu) { - struct svm_cpu_data *sd = per_cpu(svm_data, vcpu->cpu); + struct svm_cpu_data *sd = per_cpu_ptr(&svm_data, vcpu->cpu); sd->tss_desc->type = 9; /* available 32/64-bit TSS */ load_TR_desc(); @@ -3451,7 +3450,7 @@ static void reload_tss(struct kvm_vcpu *vcpu) static void pre_svm_run(struct kvm_vcpu *vcpu) { - struct svm_cpu_data *sd = per_cpu(svm_data, vcpu->cpu); + struct svm_cpu_data *sd = per_cpu_ptr(&svm_data, vcpu->cpu); struct vcpu_svm *svm = to_svm(vcpu); /* @@ -3911,30 +3910,16 @@ static fastpath_t svm_exit_handlers_fastpath(struct kvm_vcpu *vcpu) return EXIT_FASTPATH_NONE; } -static noinstr void svm_vcpu_enter_exit(struct kvm_vcpu *vcpu) +static noinstr void svm_vcpu_enter_exit(struct kvm_vcpu *vcpu, bool spec_ctrl_intercepted) { struct vcpu_svm *svm = to_svm(vcpu); - unsigned long vmcb_pa = svm->current_vmcb->pa; guest_state_enter_irqoff(); - if (sev_es_guest(vcpu->kvm)) { - __svm_sev_es_vcpu_run(vmcb_pa); - } else { - struct svm_cpu_data *sd = per_cpu(svm_data, vcpu->cpu); - - /* - * Use a single vmcb (vmcb01 because it's always valid) for - * context switching guest state via VMLOAD/VMSAVE, that way - * the state doesn't need to be copied between vmcb01 and - * vmcb02 when switching vmcbs for nested virtualization. - */ - vmload(svm->vmcb01.pa); - __svm_vcpu_run(vmcb_pa, (unsigned long *)&vcpu->arch.regs); - vmsave(svm->vmcb01.pa); - - vmload(__sme_page_pa(sd->save_area)); - } + if (sev_es_guest(vcpu->kvm)) + __svm_sev_es_vcpu_run(svm, spec_ctrl_intercepted); + else + __svm_vcpu_run(svm, spec_ctrl_intercepted); guest_state_exit_irqoff(); } @@ -3942,6 +3927,7 @@ static noinstr void svm_vcpu_enter_exit(struct kvm_vcpu *vcpu) static __no_kcsan fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); + bool spec_ctrl_intercepted = msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL); trace_kvm_entry(vcpu); @@ -3998,34 +3984,15 @@ static __no_kcsan fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu) * being speculatively taken. */ if (!static_cpu_has(X86_FEATURE_V_SPEC_CTRL)) - x86_spec_ctrl_set_guest(svm->spec_ctrl, svm->virt_spec_ctrl); + x86_spec_ctrl_set_guest(svm->virt_spec_ctrl); - svm_vcpu_enter_exit(vcpu); - - /* - * We do not use IBRS in the kernel. If this vCPU has used the - * SPEC_CTRL MSR it may have left it on; save the value and - * turn it off. This is much more efficient than blindly adding - * it to the atomic save/restore list. Especially as the former - * (Saving guest MSRs on vmexit) doesn't even exist in KVM. - * - * For non-nested case: - * If the L01 MSR bitmap does not intercept the MSR, then we need to - * save it. - * - * For nested case: - * If the L02 MSR bitmap does not intercept the MSR, then we need to - * save it. - */ - if (!static_cpu_has(X86_FEATURE_V_SPEC_CTRL) && - unlikely(!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL))) - svm->spec_ctrl = native_read_msr(MSR_IA32_SPEC_CTRL); + svm_vcpu_enter_exit(vcpu, spec_ctrl_intercepted); if (!sev_es_guest(vcpu->kvm)) reload_tss(vcpu); if (!static_cpu_has(X86_FEATURE_V_SPEC_CTRL)) - x86_spec_ctrl_restore_host(svm->spec_ctrl, svm->virt_spec_ctrl); + x86_spec_ctrl_restore_host(svm->virt_spec_ctrl); if (!sev_es_guest(vcpu->kvm)) { vcpu->arch.cr2 = svm->vmcb->save.cr2; diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h index 6a7686bf6900..199a2ecef1ce 100644 --- a/arch/x86/kvm/svm/svm.h +++ b/arch/x86/kvm/svm/svm.h @@ -209,7 +209,6 @@ struct vcpu_svm { struct vmcb *vmcb; struct kvm_vmcb_info vmcb01; struct kvm_vmcb_info *current_vmcb; - struct svm_cpu_data *svm_data; u32 asid; u32 sysenter_esp_hi; u32 sysenter_eip_hi; @@ -281,8 +280,6 @@ struct vcpu_svm { }; struct svm_cpu_data { - int cpu; - u64 asid_generation; u32 max_asid; u32 next_asid; @@ -290,13 +287,15 @@ struct svm_cpu_data { struct kvm_ldttss_desc *tss_desc; struct page *save_area; + unsigned long save_area_pa; + struct vmcb *current_vmcb; /* index = sev_asid, value = vmcb pointer */ struct vmcb **sev_vmcbs; }; -DECLARE_PER_CPU(struct svm_cpu_data *, svm_data); +DECLARE_PER_CPU(struct svm_cpu_data, svm_data); void recalc_intercepts(struct vcpu_svm *svm); @@ -683,7 +682,7 @@ void sev_es_unmap_ghcb(struct vcpu_svm *svm); /* vmenter.S */ -void __svm_sev_es_vcpu_run(unsigned long vmcb_pa); -void __svm_vcpu_run(unsigned long vmcb_pa, unsigned long *regs); +void __svm_sev_es_vcpu_run(struct vcpu_svm *svm, bool spec_ctrl_intercepted); +void __svm_vcpu_run(struct vcpu_svm *svm, bool spec_ctrl_intercepted); #endif diff --git a/arch/x86/kvm/svm/svm_ops.h b/arch/x86/kvm/svm/svm_ops.h index 9430d6437c9f..36c8af87a707 100644 --- a/arch/x86/kvm/svm/svm_ops.h +++ b/arch/x86/kvm/svm/svm_ops.h @@ -61,9 +61,4 @@ static __always_inline void vmsave(unsigned long pa) svm_asm1(vmsave, "a" (pa), "memory"); } -static __always_inline void vmload(unsigned long pa) -{ - svm_asm1(vmload, "a" (pa), "memory"); -} - #endif /* __KVM_X86_SVM_OPS_H */ diff --git a/arch/x86/kvm/svm/vmenter.S b/arch/x86/kvm/svm/vmenter.S index 723f8534986c..34367dc203f2 100644 --- a/arch/x86/kvm/svm/vmenter.S +++ b/arch/x86/kvm/svm/vmenter.S @@ -4,35 +4,97 @@ #include <asm/bitsperlong.h> #include <asm/kvm_vcpu_regs.h> #include <asm/nospec-branch.h> +#include "kvm-asm-offsets.h" #define WORD_SIZE (BITS_PER_LONG / 8) /* Intentionally omit RAX as it's context switched by hardware */ -#define VCPU_RCX __VCPU_REGS_RCX * WORD_SIZE -#define VCPU_RDX __VCPU_REGS_RDX * WORD_SIZE -#define VCPU_RBX __VCPU_REGS_RBX * WORD_SIZE +#define VCPU_RCX (SVM_vcpu_arch_regs + __VCPU_REGS_RCX * WORD_SIZE) +#define VCPU_RDX (SVM_vcpu_arch_regs + __VCPU_REGS_RDX * WORD_SIZE) +#define VCPU_RBX (SVM_vcpu_arch_regs + __VCPU_REGS_RBX * WORD_SIZE) /* Intentionally omit RSP as it's context switched by hardware */ -#define VCPU_RBP __VCPU_REGS_RBP * WORD_SIZE -#define VCPU_RSI __VCPU_REGS_RSI * WORD_SIZE -#define VCPU_RDI __VCPU_REGS_RDI * WORD_SIZE +#define VCPU_RBP (SVM_vcpu_arch_regs + __VCPU_REGS_RBP * WORD_SIZE) +#define VCPU_RSI (SVM_vcpu_arch_regs + __VCPU_REGS_RSI * WORD_SIZE) +#define VCPU_RDI (SVM_vcpu_arch_regs + __VCPU_REGS_RDI * WORD_SIZE) #ifdef CONFIG_X86_64 -#define VCPU_R8 __VCPU_REGS_R8 * WORD_SIZE -#define VCPU_R9 __VCPU_REGS_R9 * WORD_SIZE -#define VCPU_R10 __VCPU_REGS_R10 * WORD_SIZE -#define VCPU_R11 __VCPU_REGS_R11 * WORD_SIZE -#define VCPU_R12 __VCPU_REGS_R12 * WORD_SIZE -#define VCPU_R13 __VCPU_REGS_R13 * WORD_SIZE -#define VCPU_R14 __VCPU_REGS_R14 * WORD_SIZE -#define VCPU_R15 __VCPU_REGS_R15 * WORD_SIZE +#define VCPU_R8 (SVM_vcpu_arch_regs + __VCPU_REGS_R8 * WORD_SIZE) +#define VCPU_R9 (SVM_vcpu_arch_regs + __VCPU_REGS_R9 * WORD_SIZE) +#define VCPU_R10 (SVM_vcpu_arch_regs + __VCPU_REGS_R10 * WORD_SIZE) +#define VCPU_R11 (SVM_vcpu_arch_regs + __VCPU_REGS_R11 * WORD_SIZE) +#define VCPU_R12 (SVM_vcpu_arch_regs + __VCPU_REGS_R12 * WORD_SIZE) +#define VCPU_R13 (SVM_vcpu_arch_regs + __VCPU_REGS_R13 * WORD_SIZE) +#define VCPU_R14 (SVM_vcpu_arch_regs + __VCPU_REGS_R14 * WORD_SIZE) +#define VCPU_R15 (SVM_vcpu_arch_regs + __VCPU_REGS_R15 * WORD_SIZE) #endif +#define SVM_vmcb01_pa (SVM_vmcb01 + KVM_VMCB_pa) + .section .noinstr.text, "ax" +.macro RESTORE_GUEST_SPEC_CTRL + /* No need to do anything if SPEC_CTRL is unset or V_SPEC_CTRL is set */ + ALTERNATIVE_2 "", \ + "jmp 800f", X86_FEATURE_MSR_SPEC_CTRL, \ + "", X86_FEATURE_V_SPEC_CTRL +801: +.endm +.macro RESTORE_GUEST_SPEC_CTRL_BODY +800: + /* + * SPEC_CTRL handling: if the guest's SPEC_CTRL value differs from the + * host's, write the MSR. This is kept out-of-line so that the common + * case does not have to jump. + * + * IMPORTANT: To avoid RSB underflow attacks and any other nastiness, + * there must not be any returns or indirect branches between this code + * and vmentry. + */ + movl SVM_spec_ctrl(%_ASM_DI), %eax + cmp PER_CPU_VAR(x86_spec_ctrl_current), %eax + je 801b + mov $MSR_IA32_SPEC_CTRL, %ecx + xor %edx, %edx + wrmsr + jmp 801b +.endm + +.macro RESTORE_HOST_SPEC_CTRL + /* No need to do anything if SPEC_CTRL is unset or V_SPEC_CTRL is set */ + ALTERNATIVE_2 "", \ + "jmp 900f", X86_FEATURE_MSR_SPEC_CTRL, \ + "", X86_FEATURE_V_SPEC_CTRL +901: +.endm +.macro RESTORE_HOST_SPEC_CTRL_BODY +900: + /* Same for after vmexit. */ + mov $MSR_IA32_SPEC_CTRL, %ecx + + /* + * Load the value that the guest had written into MSR_IA32_SPEC_CTRL, + * if it was not intercepted during guest execution. + */ + cmpb $0, (%_ASM_SP) + jnz 998f + rdmsr + movl %eax, SVM_spec_ctrl(%_ASM_DI) +998: + + /* Now restore the host value of the MSR if different from the guest's. */ + movl PER_CPU_VAR(x86_spec_ctrl_current), %eax + cmp SVM_spec_ctrl(%_ASM_DI), %eax + je 901b + xor %edx, %edx + wrmsr + jmp 901b +.endm + + /** * __svm_vcpu_run - Run a vCPU via a transition to SVM guest mode - * @vmcb_pa: unsigned long - * @regs: unsigned long * (to guest registers) + * @svm: struct vcpu_svm * + * @spec_ctrl_intercepted: bool */ SYM_FUNC_START(__svm_vcpu_run) push %_ASM_BP @@ -47,49 +109,71 @@ SYM_FUNC_START(__svm_vcpu_run) #endif push %_ASM_BX - /* Save @regs. */ + /* + * Save variables needed after vmexit on the stack, in inverse + * order compared to when they are needed. + */ + + /* Accessed directly from the stack in RESTORE_HOST_SPEC_CTRL. */ push %_ASM_ARG2 - /* Save @vmcb. */ + /* Needed to restore access to percpu variables. */ + __ASM_SIZE(push) PER_CPU_VAR(svm_data + SD_save_area_pa) + + /* Finally save @svm. */ push %_ASM_ARG1 - /* Move @regs to RAX. */ - mov %_ASM_ARG2, %_ASM_AX +.ifnc _ASM_ARG1, _ASM_DI + /* + * Stash @svm in RDI early. On 32-bit, arguments are in RAX, RCX + * and RDX which are clobbered by RESTORE_GUEST_SPEC_CTRL. + */ + mov %_ASM_ARG1, %_ASM_DI +.endif + + /* Clobbers RAX, RCX, RDX. */ + RESTORE_GUEST_SPEC_CTRL + + /* + * Use a single vmcb (vmcb01 because it's always valid) for + * context switching guest state via VMLOAD/VMSAVE, that way + * the state doesn't need to be copied between vmcb01 and + * vmcb02 when switching vmcbs for nested virtualization. + */ + mov SVM_vmcb01_pa(%_ASM_DI), %_ASM_AX +1: vmload %_ASM_AX +2: + + /* Get svm->current_vmcb->pa into RAX. */ + mov SVM_current_vmcb(%_ASM_DI), %_ASM_AX + mov KVM_VMCB_pa(%_ASM_AX), %_ASM_AX /* Load guest registers. */ - mov VCPU_RCX(%_ASM_AX), %_ASM_CX - mov VCPU_RDX(%_ASM_AX), %_ASM_DX - mov VCPU_RBX(%_ASM_AX), %_ASM_BX - mov VCPU_RBP(%_ASM_AX), %_ASM_BP - mov VCPU_RSI(%_ASM_AX), %_ASM_SI - mov VCPU_RDI(%_ASM_AX), %_ASM_DI + mov VCPU_RCX(%_ASM_DI), %_ASM_CX + mov VCPU_RDX(%_ASM_DI), %_ASM_DX + mov VCPU_RBX(%_ASM_DI), %_ASM_BX + mov VCPU_RBP(%_ASM_DI), %_ASM_BP + mov VCPU_RSI(%_ASM_DI), %_ASM_SI #ifdef CONFIG_X86_64 - mov VCPU_R8 (%_ASM_AX), %r8 - mov VCPU_R9 (%_ASM_AX), %r9 - mov VCPU_R10(%_ASM_AX), %r10 - mov VCPU_R11(%_ASM_AX), %r11 - mov VCPU_R12(%_ASM_AX), %r12 - mov VCPU_R13(%_ASM_AX), %r13 - mov VCPU_R14(%_ASM_AX), %r14 - mov VCPU_R15(%_ASM_AX), %r15 + mov VCPU_R8 (%_ASM_DI), %r8 + mov VCPU_R9 (%_ASM_DI), %r9 + mov VCPU_R10(%_ASM_DI), %r10 + mov VCPU_R11(%_ASM_DI), %r11 + mov VCPU_R12(%_ASM_DI), %r12 + mov VCPU_R13(%_ASM_DI), %r13 + mov VCPU_R14(%_ASM_DI), %r14 + mov VCPU_R15(%_ASM_DI), %r15 #endif - - /* "POP" @vmcb to RAX. */ - pop %_ASM_AX + mov VCPU_RDI(%_ASM_DI), %_ASM_DI /* Enter guest mode */ sti -1: vmrun %_ASM_AX - -2: cli - -#ifdef CONFIG_RETPOLINE - /* IMPORTANT: Stuff the RSB immediately after VM-Exit, before RET! */ - FILL_RETURN_BUFFER %_ASM_AX, RSB_CLEAR_LOOPS, X86_FEATURE_RETPOLINE -#endif +3: vmrun %_ASM_AX +4: + cli - /* "POP" @regs to RAX. */ + /* Pop @svm to RAX while it's the only available register. */ pop %_ASM_AX /* Save all guest registers. */ @@ -110,6 +194,26 @@ SYM_FUNC_START(__svm_vcpu_run) mov %r15, VCPU_R15(%_ASM_AX) #endif + /* @svm can stay in RDI from now on. */ + mov %_ASM_AX, %_ASM_DI + + mov SVM_vmcb01_pa(%_ASM_DI), %_ASM_AX +5: vmsave %_ASM_AX +6: + + /* Restores GSBASE among other things, allowing access to percpu data. */ + pop %_ASM_AX +7: vmload %_ASM_AX +8: + +#ifdef CONFIG_RETPOLINE + /* IMPORTANT: Stuff the RSB immediately after VM-Exit, before RET! */ + FILL_RETURN_BUFFER %_ASM_AX, RSB_CLEAR_LOOPS, X86_FEATURE_RETPOLINE +#endif + + /* Clobbers RAX, RCX, RDX. */ + RESTORE_HOST_SPEC_CTRL + /* * Mitigate RETBleed for AMD/Hygon Zen uarch. RET should be * untrained as soon as we exit the VM and are back to the @@ -145,6 +249,9 @@ SYM_FUNC_START(__svm_vcpu_run) xor %r15d, %r15d #endif + /* "Pop" @spec_ctrl_intercepted. */ + pop %_ASM_BX + pop %_ASM_BX #ifdef CONFIG_X86_64 @@ -159,17 +266,33 @@ SYM_FUNC_START(__svm_vcpu_run) pop %_ASM_BP RET -3: cmpb $0, kvm_rebooting + RESTORE_GUEST_SPEC_CTRL_BODY + RESTORE_HOST_SPEC_CTRL_BODY + +10: cmpb $0, kvm_rebooting jne 2b ud2 +30: cmpb $0, kvm_rebooting + jne 4b + ud2 +50: cmpb $0, kvm_rebooting + jne 6b + ud2 +70: cmpb $0, kvm_rebooting + jne 8b + ud2 - _ASM_EXTABLE(1b, 3b) + _ASM_EXTABLE(1b, 10b) + _ASM_EXTABLE(3b, 30b) + _ASM_EXTABLE(5b, 50b) + _ASM_EXTABLE(7b, 70b) SYM_FUNC_END(__svm_vcpu_run) /** * __svm_sev_es_vcpu_run - Run a SEV-ES vCPU via a transition to SVM guest mode - * @vmcb_pa: unsigned long + * @svm: struct vcpu_svm * + * @spec_ctrl_intercepted: bool */ SYM_FUNC_START(__svm_sev_es_vcpu_run) push %_ASM_BP @@ -184,8 +307,31 @@ SYM_FUNC_START(__svm_sev_es_vcpu_run) #endif push %_ASM_BX - /* Move @vmcb to RAX. */ - mov %_ASM_ARG1, %_ASM_AX + /* + * Save variables needed after vmexit on the stack, in inverse + * order compared to when they are needed. + */ + + /* Accessed directly from the stack in RESTORE_HOST_SPEC_CTRL. */ + push %_ASM_ARG2 + + /* Save @svm. */ + push %_ASM_ARG1 + +.ifnc _ASM_ARG1, _ASM_DI + /* + * Stash @svm in RDI early. On 32-bit, arguments are in RAX, RCX + * and RDX which are clobbered by RESTORE_GUEST_SPEC_CTRL. + */ + mov %_ASM_ARG1, %_ASM_DI +.endif + + /* Clobbers RAX, RCX, RDX. */ + RESTORE_GUEST_SPEC_CTRL + + /* Get svm->current_vmcb->pa into RAX. */ + mov SVM_current_vmcb(%_ASM_DI), %_ASM_AX + mov KVM_VMCB_pa(%_ASM_AX), %_ASM_AX /* Enter guest mode */ sti @@ -194,11 +340,17 @@ SYM_FUNC_START(__svm_sev_es_vcpu_run) 2: cli + /* Pop @svm to RDI, guest registers have been saved already. */ + pop %_ASM_DI + #ifdef CONFIG_RETPOLINE /* IMPORTANT: Stuff the RSB immediately after VM-Exit, before RET! */ FILL_RETURN_BUFFER %_ASM_AX, RSB_CLEAR_LOOPS, X86_FEATURE_RETPOLINE #endif + /* Clobbers RAX, RCX, RDX. */ + RESTORE_HOST_SPEC_CTRL + /* * Mitigate RETBleed for AMD/Hygon Zen uarch. RET should be * untrained as soon as we exit the VM and are back to the @@ -208,6 +360,9 @@ SYM_FUNC_START(__svm_sev_es_vcpu_run) */ UNTRAIN_RET + /* "Pop" @spec_ctrl_intercepted. */ + pop %_ASM_BX + pop %_ASM_BX #ifdef CONFIG_X86_64 @@ -222,6 +377,9 @@ SYM_FUNC_START(__svm_sev_es_vcpu_run) pop %_ASM_BP RET + RESTORE_GUEST_SPEC_CTRL_BODY + RESTORE_HOST_SPEC_CTRL_BODY + 3: cmpb $0, kvm_rebooting jne 2b ud2 diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c index 25b70a85bef5..10b33da9bd05 100644 --- a/arch/x86/kvm/vmx/pmu_intel.c +++ b/arch/x86/kvm/vmx/pmu_intel.c @@ -617,7 +617,7 @@ static void intel_pmu_init(struct kvm_vcpu *vcpu) struct kvm_pmu *pmu = vcpu_to_pmu(vcpu); struct lbr_desc *lbr_desc = vcpu_to_lbr_desc(vcpu); - for (i = 0; i < INTEL_PMC_MAX_GENERIC; i++) { + for (i = 0; i < KVM_INTEL_PMC_MAX_GENERIC; i++) { pmu->gp_counters[i].type = KVM_PMC_GP; pmu->gp_counters[i].vcpu = vcpu; pmu->gp_counters[i].idx = i; @@ -643,7 +643,7 @@ static void intel_pmu_reset(struct kvm_vcpu *vcpu) struct kvm_pmc *pmc = NULL; int i; - for (i = 0; i < INTEL_PMC_MAX_GENERIC; i++) { + for (i = 0; i < KVM_INTEL_PMC_MAX_GENERIC; i++) { pmc = &pmu->gp_counters[i]; pmc_stop_counter(pmc); diff --git a/arch/x86/kvm/vmx/vmenter.S b/arch/x86/kvm/vmx/vmenter.S index 8477d8bdd69c..0b5db4de4d09 100644 --- a/arch/x86/kvm/vmx/vmenter.S +++ b/arch/x86/kvm/vmx/vmenter.S @@ -1,12 +1,12 @@ /* SPDX-License-Identifier: GPL-2.0 */ #include <linux/linkage.h> #include <asm/asm.h> -#include <asm/asm-offsets.h> #include <asm/bitsperlong.h> #include <asm/kvm_vcpu_regs.h> #include <asm/nospec-branch.h> #include <asm/percpu.h> #include <asm/segment.h> +#include "kvm-asm-offsets.h" #include "run_flags.h" #define WORD_SIZE (BITS_PER_LONG / 8) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 5f5eb577d583..490ec23c8450 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1438,32 +1438,27 @@ static const u32 msrs_to_save_all[] = { MSR_ARCH_PERFMON_FIXED_CTR0 + 2, MSR_CORE_PERF_FIXED_CTR_CTRL, MSR_CORE_PERF_GLOBAL_STATUS, MSR_CORE_PERF_GLOBAL_CTRL, MSR_CORE_PERF_GLOBAL_OVF_CTRL, + MSR_IA32_PEBS_ENABLE, MSR_IA32_DS_AREA, MSR_PEBS_DATA_CFG, + + /* This part of MSRs should match KVM_INTEL_PMC_MAX_GENERIC. */ MSR_ARCH_PERFMON_PERFCTR0, MSR_ARCH_PERFMON_PERFCTR1, MSR_ARCH_PERFMON_PERFCTR0 + 2, MSR_ARCH_PERFMON_PERFCTR0 + 3, MSR_ARCH_PERFMON_PERFCTR0 + 4, MSR_ARCH_PERFMON_PERFCTR0 + 5, MSR_ARCH_PERFMON_PERFCTR0 + 6, MSR_ARCH_PERFMON_PERFCTR0 + 7, - MSR_ARCH_PERFMON_PERFCTR0 + 8, MSR_ARCH_PERFMON_PERFCTR0 + 9, - MSR_ARCH_PERFMON_PERFCTR0 + 10, MSR_ARCH_PERFMON_PERFCTR0 + 11, - MSR_ARCH_PERFMON_PERFCTR0 + 12, MSR_ARCH_PERFMON_PERFCTR0 + 13, - MSR_ARCH_PERFMON_PERFCTR0 + 14, MSR_ARCH_PERFMON_PERFCTR0 + 15, - MSR_ARCH_PERFMON_PERFCTR0 + 16, MSR_ARCH_PERFMON_PERFCTR0 + 17, MSR_ARCH_PERFMON_EVENTSEL0, MSR_ARCH_PERFMON_EVENTSEL1, MSR_ARCH_PERFMON_EVENTSEL0 + 2, MSR_ARCH_PERFMON_EVENTSEL0 + 3, MSR_ARCH_PERFMON_EVENTSEL0 + 4, MSR_ARCH_PERFMON_EVENTSEL0 + 5, MSR_ARCH_PERFMON_EVENTSEL0 + 6, MSR_ARCH_PERFMON_EVENTSEL0 + 7, - MSR_ARCH_PERFMON_EVENTSEL0 + 8, MSR_ARCH_PERFMON_EVENTSEL0 + 9, - MSR_ARCH_PERFMON_EVENTSEL0 + 10, MSR_ARCH_PERFMON_EVENTSEL0 + 11, - MSR_ARCH_PERFMON_EVENTSEL0 + 12, MSR_ARCH_PERFMON_EVENTSEL0 + 13, - MSR_ARCH_PERFMON_EVENTSEL0 + 14, MSR_ARCH_PERFMON_EVENTSEL0 + 15, - MSR_ARCH_PERFMON_EVENTSEL0 + 16, MSR_ARCH_PERFMON_EVENTSEL0 + 17, - MSR_IA32_PEBS_ENABLE, MSR_IA32_DS_AREA, MSR_PEBS_DATA_CFG, MSR_K7_EVNTSEL0, MSR_K7_EVNTSEL1, MSR_K7_EVNTSEL2, MSR_K7_EVNTSEL3, MSR_K7_PERFCTR0, MSR_K7_PERFCTR1, MSR_K7_PERFCTR2, MSR_K7_PERFCTR3, + + /* This part of MSRs should match KVM_AMD_PMC_MAX_GENERIC. */ MSR_F15H_PERF_CTL0, MSR_F15H_PERF_CTL1, MSR_F15H_PERF_CTL2, MSR_F15H_PERF_CTL3, MSR_F15H_PERF_CTL4, MSR_F15H_PERF_CTL5, MSR_F15H_PERF_CTR0, MSR_F15H_PERF_CTR1, MSR_F15H_PERF_CTR2, MSR_F15H_PERF_CTR3, MSR_F15H_PERF_CTR4, MSR_F15H_PERF_CTR5, + MSR_IA32_XFD, MSR_IA32_XFD_ERR, }; @@ -1562,7 +1557,7 @@ static const u32 msr_based_features_all[] = { MSR_IA32_VMX_EPT_VPID_CAP, MSR_IA32_VMX_VMFUNC, - MSR_F10H_DECFG, + MSR_AMD64_DE_CFG, MSR_IA32_UCODE_REV, MSR_IA32_ARCH_CAPABILITIES, MSR_IA32_PERF_CAPABILITIES, @@ -7041,14 +7036,14 @@ static void kvm_init_msr_list(void) intel_pt_validate_hw_cap(PT_CAP_num_address_ranges) * 2) continue; break; - case MSR_ARCH_PERFMON_PERFCTR0 ... MSR_ARCH_PERFMON_PERFCTR0 + 17: + case MSR_ARCH_PERFMON_PERFCTR0 ... MSR_ARCH_PERFMON_PERFCTR_MAX: if (msrs_to_save_all[i] - MSR_ARCH_PERFMON_PERFCTR0 >= - min(INTEL_PMC_MAX_GENERIC, kvm_pmu_cap.num_counters_gp)) + min(KVM_INTEL_PMC_MAX_GENERIC, kvm_pmu_cap.num_counters_gp)) continue; break; - case MSR_ARCH_PERFMON_EVENTSEL0 ... MSR_ARCH_PERFMON_EVENTSEL0 + 17: + case MSR_ARCH_PERFMON_EVENTSEL0 ... MSR_ARCH_PERFMON_EVENTSEL_MAX: if (msrs_to_save_all[i] - MSR_ARCH_PERFMON_EVENTSEL0 >= - min(INTEL_PMC_MAX_GENERIC, kvm_pmu_cap.num_counters_gp)) + min(KVM_INTEL_PMC_MAX_GENERIC, kvm_pmu_cap.num_counters_gp)) continue; break; case MSR_IA32_XFD: diff --git a/arch/x86/lib/usercopy.c b/arch/x86/lib/usercopy.c index f1bb18617156..24b48af27417 100644 --- a/arch/x86/lib/usercopy.c +++ b/arch/x86/lib/usercopy.c @@ -6,6 +6,7 @@ #include <linux/uaccess.h> #include <linux/export.h> +#include <linux/instrumented.h> #include <asm/tlbflush.h> @@ -44,7 +45,9 @@ copy_from_user_nmi(void *to, const void __user *from, unsigned long n) * called from other contexts. */ pagefault_disable(); + instrument_copy_from_user_before(to, from, n); ret = raw_copy_from_user(to, from, n); + instrument_copy_from_user_after(to, from, n, ret); pagefault_enable(); return ret; diff --git a/arch/x86/mm/hugetlbpage.c b/arch/x86/mm/hugetlbpage.c index 6b3033845c6d..5804bbae4f01 100644 --- a/arch/x86/mm/hugetlbpage.c +++ b/arch/x86/mm/hugetlbpage.c @@ -37,8 +37,12 @@ int pmd_huge(pmd_t pmd) */ int pud_huge(pud_t pud) { +#if CONFIG_PGTABLE_LEVELS > 2 return !pud_none(pud) && (pud_val(pud) & (_PAGE_PRESENT|_PAGE_PSE)) != _PAGE_PRESENT; +#else + return 0; +#endif } #ifdef CONFIG_HUGETLB_PAGE diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index cec5195602bc..36ffe67ad6e5 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -11,7 +11,6 @@ #include <linux/bpf.h> #include <linux/memory.h> #include <linux/sort.h> -#include <linux/init.h> #include <asm/extable.h> #include <asm/set_memory.h> #include <asm/nospec-branch.h> @@ -389,18 +388,6 @@ out: return ret; } -int __init bpf_arch_init_dispatcher_early(void *ip) -{ - const u8 *nop_insn = x86_nops[5]; - - if (is_endbr(*(u32 *)ip)) - ip += ENDBR_INSN_SIZE; - - if (memcmp(ip, nop_insn, X86_PATCH_SIZE)) - text_poke_early(ip, nop_insn, X86_PATCH_SIZE); - return 0; -} - int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t, void *old_addr, void *new_addr) { diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c index bb176c72891c..4cd39f304e20 100644 --- a/arch/x86/power/cpu.c +++ b/arch/x86/power/cpu.c @@ -519,6 +519,7 @@ static void pm_save_spec_msr(void) MSR_TSX_FORCE_ABORT, MSR_IA32_MCU_OPT_CTRL, MSR_AMD64_LS_CFG, + MSR_AMD64_DE_CFG, }; msr_build_context(spec_msr_id, ARRAY_SIZE(spec_msr_id)); diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c index f82857e48815..038da45f057a 100644 --- a/arch/x86/xen/enlighten_pv.c +++ b/arch/x86/xen/enlighten_pv.c @@ -23,6 +23,7 @@ #include <linux/start_kernel.h> #include <linux/sched.h> #include <linux/kprobes.h> +#include <linux/kstrtox.h> #include <linux/memblock.h> #include <linux/export.h> #include <linux/mm.h> @@ -113,7 +114,7 @@ static __read_mostly bool xen_msr_safe = IS_ENABLED(CONFIG_XEN_PV_MSR_SAFE); static int __init parse_xen_msr_safe(char *str) { if (str) - return strtobool(str, &xen_msr_safe); + return kstrtobool(str, &xen_msr_safe); return -EINVAL; } early_param("xen_msr_safe", parse_xen_msr_safe); diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c index 4f4309500559..8db26f10fb1d 100644 --- a/arch/x86/xen/setup.c +++ b/arch/x86/xen/setup.c @@ -7,6 +7,7 @@ #include <linux/init.h> #include <linux/sched.h> +#include <linux/kstrtox.h> #include <linux/mm.h> #include <linux/pm.h> #include <linux/memblock.h> @@ -85,7 +86,7 @@ static void __init xen_parse_512gb(void) arg = strstr(xen_start_info->cmd_line, "xen_512gb_limit="); if (!arg) val = true; - else if (strtobool(arg + strlen("xen_512gb_limit="), &val)) + else if (kstrtobool(arg + strlen("xen_512gb_limit="), &val)) return; xen_512gb_limit = val; |