diff options
30 files changed, 419 insertions, 156 deletions
diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst index edc070c6e19b..061ec93d9ecb 100644 --- a/Documentation/virt/kvm/api.rst +++ b/Documentation/virt/kvm/api.rst @@ -8107,6 +8107,28 @@ KVM_X86_QUIRK_SLOT_ZAP_ALL By default, for KVM_X86_DEFAULT_VM VMs, KVM or moved memslot isn't reachable, i.e KVM _may_ invalidate only SPTEs related to the memslot. + +KVM_X86_QUIRK_STUFF_FEATURE_MSRS By default, at vCPU creation, KVM sets the + vCPU's MSR_IA32_PERF_CAPABILITIES (0x345), + MSR_IA32_ARCH_CAPABILITIES (0x10a), + MSR_PLATFORM_INFO (0xce), and all VMX MSRs + (0x480..0x492) to the maximal capabilities + supported by KVM. KVM also sets + MSR_IA32_UCODE_REV (0x8b) to an arbitrary + value (which is different for Intel vs. + AMD). Lastly, when guest CPUID is set (by + userspace), KVM modifies select VMX MSR + fields to force consistency between guest + CPUID and L2's effective ISA. When this + quirk is disabled, KVM zeroes the vCPU's MSR + values (with two exceptions, see below), + i.e. treats the feature MSRs like CPUID + leaves and gives userspace full control of + the vCPU model definition. This quirk does + not affect VMX MSRs CR0/CR4_FIXED1 (0x487 + and 0x489), as KVM does now allow them to + be set by userspace (KVM sets them based on + guest CPUID, for safety purposes). =================================== ============================================ 7.32 KVM_CAP_MAX_VCPU_ID diff --git a/Documentation/virt/kvm/x86/errata.rst b/Documentation/virt/kvm/x86/errata.rst index 4116045a8744..37c79362a48f 100644 --- a/Documentation/virt/kvm/x86/errata.rst +++ b/Documentation/virt/kvm/x86/errata.rst @@ -33,6 +33,18 @@ Note however that any software (e.g ``WIN87EM.DLL``) expecting these features to be present likely predates these CPUID feature bits, and therefore doesn't know to check for them anyway. +``KVM_SET_VCPU_EVENTS`` issue +----------------------------- + +Invalid KVM_SET_VCPU_EVENTS input with respect to error codes *may* result in +failed VM-Entry on Intel CPUs. Pre-CET Intel CPUs require that exception +injection through the VMCS correctly set the "error code valid" flag, e.g. +require the flag be set when injecting a #GP, clear when injecting a #UD, +clear when injecting a soft exception, etc. Intel CPUs that enumerate +IA32_VMX_BASIC[56] as '1' relax VMX's consistency checks, and AMD CPUs have no +restrictions whatsoever. KVM_SET_VCPU_EVENTS doesn't sanity check the vector +versus "has_error_code", i.e. KVM's ABI follows AMD behavior. + Nested virtualization features ------------------------------ diff --git a/arch/x86/include/asm/kvm-x86-ops.h b/arch/x86/include/asm/kvm-x86-ops.h index 861d080ed4c6..5aff7222e40f 100644 --- a/arch/x86/include/asm/kvm-x86-ops.h +++ b/arch/x86/include/asm/kvm-x86-ops.h @@ -34,6 +34,7 @@ KVM_X86_OP(set_msr) KVM_X86_OP(get_segment_base) KVM_X86_OP(get_segment) KVM_X86_OP(get_cpl) +KVM_X86_OP(get_cpl_no_cache) KVM_X86_OP(set_segment) KVM_X86_OP(get_cs_db_l_bits) KVM_X86_OP(is_valid_cr0) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 433e65974e2b..3e8afc82ae2f 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1655,6 +1655,7 @@ struct kvm_x86_ops { void (*get_segment)(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg); int (*get_cpl)(struct kvm_vcpu *vcpu); + int (*get_cpl_no_cache)(struct kvm_vcpu *vcpu); void (*set_segment)(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg); void (*get_cs_db_l_bits)(struct kvm_vcpu *vcpu, int *db, int *l); @@ -2358,7 +2359,8 @@ int memslot_rmap_alloc(struct kvm_memory_slot *slot, unsigned long npages); KVM_X86_QUIRK_MISC_ENABLE_NO_MWAIT | \ KVM_X86_QUIRK_FIX_HYPERCALL_INSN | \ KVM_X86_QUIRK_MWAIT_NEVER_UD_FAULTS | \ - KVM_X86_QUIRK_SLOT_ZAP_ALL) + KVM_X86_QUIRK_SLOT_ZAP_ALL | \ + KVM_X86_QUIRK_STUFF_FEATURE_MSRS) /* * KVM previously used a u32 field in kvm_run to indicate the hypercall was diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h index a8debbf2f702..88585c1de416 100644 --- a/arch/x86/include/uapi/asm/kvm.h +++ b/arch/x86/include/uapi/asm/kvm.h @@ -440,6 +440,7 @@ struct kvm_sync_regs { #define KVM_X86_QUIRK_FIX_HYPERCALL_INSN (1 << 5) #define KVM_X86_QUIRK_MWAIT_NEVER_UD_FAULTS (1 << 6) #define KVM_X86_QUIRK_SLOT_ZAP_ALL (1 << 7) +#define KVM_X86_QUIRK_STUFF_FEATURE_MSRS (1 << 8) #define KVM_STATE_NESTED_FORMAT_VMX 0 #define KVM_STATE_NESTED_FORMAT_SVM 1 diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 41786b834b16..d695e7bc41ed 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -690,7 +690,9 @@ void kvm_set_cpu_caps(void) kvm_cpu_cap_set(X86_FEATURE_TSC_ADJUST); kvm_cpu_cap_set(X86_FEATURE_ARCH_CAPABILITIES); - if (boot_cpu_has(X86_FEATURE_IBPB) && boot_cpu_has(X86_FEATURE_IBRS)) + if (boot_cpu_has(X86_FEATURE_AMD_IBPB_RET) && + boot_cpu_has(X86_FEATURE_AMD_IBPB) && + boot_cpu_has(X86_FEATURE_AMD_IBRS)) kvm_cpu_cap_set(X86_FEATURE_SPEC_CTRL); if (boot_cpu_has(X86_FEATURE_STIBP)) kvm_cpu_cap_set(X86_FEATURE_INTEL_STIBP); @@ -755,7 +757,7 @@ void kvm_set_cpu_caps(void) F(CLZERO) | F(XSAVEERPTR) | F(WBNOINVD) | F(AMD_IBPB) | F(AMD_IBRS) | F(AMD_SSBD) | F(VIRT_SSBD) | F(AMD_SSB_NO) | F(AMD_STIBP) | F(AMD_STIBP_ALWAYS_ON) | - F(AMD_PSFD) + F(AMD_PSFD) | F(AMD_IBPB_RET) ); /* @@ -763,8 +765,12 @@ void kvm_set_cpu_caps(void) * arch/x86/kernel/cpu/bugs.c is kind enough to * record that in cpufeatures so use them. */ - if (boot_cpu_has(X86_FEATURE_IBPB)) + if (boot_cpu_has(X86_FEATURE_IBPB)) { kvm_cpu_cap_set(X86_FEATURE_AMD_IBPB); + if (boot_cpu_has(X86_FEATURE_SPEC_CTRL) && + !boot_cpu_has_bug(X86_BUG_EIBRS_PBRSB)) + kvm_cpu_cap_set(X86_FEATURE_AMD_IBPB_RET); + } if (boot_cpu_has(X86_FEATURE_IBRS)) kvm_cpu_cap_set(X86_FEATURE_AMD_IBRS); if (boot_cpu_has(X86_FEATURE_STIBP)) diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h index 41697cca354e..c8dc66eddefd 100644 --- a/arch/x86/kvm/cpuid.h +++ b/arch/x86/kvm/cpuid.h @@ -2,7 +2,6 @@ #ifndef ARCH_X86_KVM_CPUID_H #define ARCH_X86_KVM_CPUID_H -#include "x86.h" #include "reverse_cpuid.h" #include <asm/cpu.h> #include <asm/processor.h> diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index e72aed25d721..60986f67c35a 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -651,9 +651,10 @@ static inline u8 ctxt_virt_addr_bits(struct x86_emulate_ctxt *ctxt) } static inline bool emul_is_noncanonical_address(u64 la, - struct x86_emulate_ctxt *ctxt) + struct x86_emulate_ctxt *ctxt, + unsigned int flags) { - return !__is_canonical_address(la, ctxt_virt_addr_bits(ctxt)); + return !ctxt->ops->is_canonical_addr(ctxt, la, flags); } /* @@ -1733,7 +1734,8 @@ static int __load_segment_descriptor(struct x86_emulate_ctxt *ctxt, if (ret != X86EMUL_CONTINUE) return ret; if (emul_is_noncanonical_address(get_desc_base(&seg_desc) | - ((u64)base3 << 32), ctxt)) + ((u64)base3 << 32), ctxt, + X86EMUL_F_DT_LOAD)) return emulate_gp(ctxt, err_code); } @@ -2516,8 +2518,8 @@ static int em_sysexit(struct x86_emulate_ctxt *ctxt) ss_sel = cs_sel + 8; cs.d = 0; cs.l = 1; - if (emul_is_noncanonical_address(rcx, ctxt) || - emul_is_noncanonical_address(rdx, ctxt)) + if (emul_is_noncanonical_address(rcx, ctxt, 0) || + emul_is_noncanonical_address(rdx, ctxt, 0)) return emulate_gp(ctxt, 0); break; } @@ -3494,7 +3496,8 @@ static int em_lgdt_lidt(struct x86_emulate_ctxt *ctxt, bool lgdt) if (rc != X86EMUL_CONTINUE) return rc; if (ctxt->mode == X86EMUL_MODE_PROT64 && - emul_is_noncanonical_address(desc_ptr.address, ctxt)) + emul_is_noncanonical_address(desc_ptr.address, ctxt, + X86EMUL_F_DT_LOAD)) return emulate_gp(ctxt, 0); if (lgdt) ctxt->ops->set_gdt(ctxt, &desc_ptr); diff --git a/arch/x86/kvm/kvm_cache_regs.h b/arch/x86/kvm/kvm_cache_regs.h index b1eb46e26b2e..36a8786db291 100644 --- a/arch/x86/kvm/kvm_cache_regs.h +++ b/arch/x86/kvm/kvm_cache_regs.h @@ -44,6 +44,18 @@ BUILD_KVM_GPR_ACCESSORS(r15, R15) #endif /* + * Using the register cache from interrupt context is generally not allowed, as + * caching a register and marking it available/dirty can't be done atomically, + * i.e. accesses from interrupt context may clobber state or read stale data if + * the vCPU task is in the process of updating the cache. The exception is if + * KVM is handling a PMI IRQ/NMI VM-Exit, as that bound code sequence doesn't + * touch the cache, it runs after the cache is reset (post VM-Exit), and PMIs + * need to access several registers that are cacheable. + */ +#define kvm_assert_register_caching_allowed(vcpu) \ + lockdep_assert_once(in_task() || kvm_arch_pmi_in_guest(vcpu)) + +/* * avail dirty * 0 0 register in VMCS/VMCB * 0 1 *INVALID* @@ -53,24 +65,28 @@ BUILD_KVM_GPR_ACCESSORS(r15, R15) static inline bool kvm_register_is_available(struct kvm_vcpu *vcpu, enum kvm_reg reg) { + kvm_assert_register_caching_allowed(vcpu); return test_bit(reg, (unsigned long *)&vcpu->arch.regs_avail); } static inline bool kvm_register_is_dirty(struct kvm_vcpu *vcpu, enum kvm_reg reg) { + kvm_assert_register_caching_allowed(vcpu); return test_bit(reg, (unsigned long *)&vcpu->arch.regs_dirty); } static inline void kvm_register_mark_available(struct kvm_vcpu *vcpu, enum kvm_reg reg) { + kvm_assert_register_caching_allowed(vcpu); __set_bit(reg, (unsigned long *)&vcpu->arch.regs_avail); } static inline void kvm_register_mark_dirty(struct kvm_vcpu *vcpu, enum kvm_reg reg) { + kvm_assert_register_caching_allowed(vcpu); __set_bit(reg, (unsigned long *)&vcpu->arch.regs_avail); __set_bit(reg, (unsigned long *)&vcpu->arch.regs_dirty); } @@ -84,6 +100,7 @@ static inline void kvm_register_mark_dirty(struct kvm_vcpu *vcpu, static __always_inline bool kvm_register_test_and_mark_available(struct kvm_vcpu *vcpu, enum kvm_reg reg) { + kvm_assert_register_caching_allowed(vcpu); return arch___test_and_set_bit(reg, (unsigned long *)&vcpu->arch.regs_avail); } diff --git a/arch/x86/kvm/kvm_emulate.h b/arch/x86/kvm/kvm_emulate.h index 55a18e2f2dcd..10495fffb890 100644 --- a/arch/x86/kvm/kvm_emulate.h +++ b/arch/x86/kvm/kvm_emulate.h @@ -94,6 +94,8 @@ struct x86_instruction_info { #define X86EMUL_F_FETCH BIT(1) #define X86EMUL_F_IMPLICIT BIT(2) #define X86EMUL_F_INVLPG BIT(3) +#define X86EMUL_F_MSR BIT(4) +#define X86EMUL_F_DT_LOAD BIT(5) struct x86_emulate_ops { void (*vm_bugged)(struct x86_emulate_ctxt *ctxt); @@ -235,6 +237,9 @@ struct x86_emulate_ops { gva_t (*get_untagged_addr)(struct x86_emulate_ctxt *ctxt, gva_t addr, unsigned int flags); + + bool (*is_canonical_addr)(struct x86_emulate_ctxt *ctxt, gva_t addr, + unsigned int flags); }; /* Type, address-of, and value of an instruction's operand. */ diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 65412640cfc7..59a64b703aad 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -382,7 +382,7 @@ enum { DIRTY }; -void kvm_recalculate_apic_map(struct kvm *kvm) +static void kvm_recalculate_apic_map(struct kvm *kvm) { struct kvm_apic_map *new, *old = NULL; struct kvm_vcpu *vcpu; @@ -2577,7 +2577,7 @@ u64 kvm_lapic_get_cr8(struct kvm_vcpu *vcpu) return (tpr & 0xf0) >> 4; } -void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value) +static void __kvm_apic_set_base(struct kvm_vcpu *vcpu, u64 value) { u64 old_value = vcpu->arch.apic_base; struct kvm_lapic *apic = vcpu->arch.apic; @@ -2625,6 +2625,31 @@ void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value) } } +int kvm_apic_set_base(struct kvm_vcpu *vcpu, u64 value, bool host_initiated) +{ + enum lapic_mode old_mode = kvm_get_apic_mode(vcpu); + enum lapic_mode new_mode = kvm_apic_mode(value); + + if (vcpu->arch.apic_base == value) + return 0; + + u64 reserved_bits = kvm_vcpu_reserved_gpa_bits_raw(vcpu) | 0x2ff | + (guest_cpuid_has(vcpu, X86_FEATURE_X2APIC) ? 0 : X2APIC_ENABLE); + + if ((value & reserved_bits) != 0 || new_mode == LAPIC_MODE_INVALID) + return 1; + if (!host_initiated) { + if (old_mode == LAPIC_MODE_X2APIC && new_mode == LAPIC_MODE_XAPIC) + return 1; + if (old_mode == LAPIC_MODE_DISABLED && new_mode == LAPIC_MODE_X2APIC) + return 1; + } + + __kvm_apic_set_base(vcpu, value); + kvm_recalculate_apic_map(vcpu->kvm); + return 0; +} + void kvm_apic_update_apicv(struct kvm_vcpu *vcpu) { struct kvm_lapic *apic = vcpu->arch.apic; @@ -2716,7 +2741,14 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu, bool init_event) msr_val = APIC_DEFAULT_PHYS_BASE | MSR_IA32_APICBASE_ENABLE; if (kvm_vcpu_is_reset_bsp(vcpu)) msr_val |= MSR_IA32_APICBASE_BSP; - kvm_lapic_set_base(vcpu, msr_val); + + /* + * Use the inner helper to avoid an extra recalcuation of the + * optimized APIC map if some other task has dirtied the map. + * The recalculation needed for this vCPU will be done after + * all APIC state has been initialized (see below). + */ + __kvm_apic_set_base(vcpu, msr_val); } if (!apic) @@ -3057,7 +3089,6 @@ int kvm_apic_set_state(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s) kvm_x86_call(apicv_pre_state_restore)(vcpu); - kvm_lapic_set_base(vcpu, vcpu->arch.apic_base); /* set SPIV separately to get count of SW disabled APICs right */ apic_set_spiv(apic, *((u32 *)(s->regs + APIC_SPIV))); diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h index 1b8ef9856422..24add38beaf0 100644 --- a/arch/x86/kvm/lapic.h +++ b/arch/x86/kvm/lapic.h @@ -95,8 +95,6 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu, bool init_event); u64 kvm_lapic_get_cr8(struct kvm_vcpu *vcpu); void kvm_lapic_set_tpr(struct kvm_vcpu *vcpu, unsigned long cr8); void kvm_lapic_set_eoi(struct kvm_vcpu *vcpu); -void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value); -void kvm_recalculate_apic_map(struct kvm *kvm); void kvm_apic_set_version(struct kvm_vcpu *vcpu); void kvm_apic_after_set_mcg_cap(struct kvm_vcpu *vcpu); bool kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source, @@ -117,11 +115,9 @@ bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src, struct kvm_lapic_irq *irq, int *r, struct dest_map *dest_map); void kvm_apic_send_ipi(struct kvm_lapic *apic, u32 icr_low, u32 icr_high); -u64 kvm_get_apic_base(struct kvm_vcpu *vcpu); -int kvm_set_apic_base(struct kvm_vcpu *vcpu, struct msr_data *msr_info); +int kvm_apic_set_base(struct kvm_vcpu *vcpu, u64 value, bool host_initiated); int kvm_apic_get_state(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s); int kvm_apic_set_state(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s); -enum lapic_mode kvm_get_apic_mode(struct kvm_vcpu *vcpu); int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu); u64 kvm_get_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu); @@ -271,6 +267,11 @@ static inline enum lapic_mode kvm_apic_mode(u64 apic_base) return apic_base & (MSR_IA32_APICBASE_ENABLE | X2APIC_ENABLE); } +static inline enum lapic_mode kvm_get_apic_mode(struct kvm_vcpu *vcpu) +{ + return kvm_apic_mode(vcpu->arch.apic_base); +} + static inline u8 kvm_xapic_id(struct kvm_lapic *apic) { return kvm_lapic_get_reg(apic, APIC_ID) >> 24; diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h index 9dc5dd43ae7f..e9322358678b 100644 --- a/arch/x86/kvm/mmu.h +++ b/arch/x86/kvm/mmu.h @@ -4,6 +4,7 @@ #include <linux/kvm_host.h> #include "kvm_cache_regs.h" +#include "x86.h" #include "cpuid.h" extern bool __read_mostly enable_mmio_caching; diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index 38128e7b9af1..d7b391fe2c23 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -6179,7 +6179,7 @@ void kvm_mmu_invalidate_addr(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, /* It's actually a GPA for vcpu->arch.guest_mmu. */ if (mmu != &vcpu->arch.guest_mmu) { /* INVLPG on a non-canonical address is a NOP according to the SDM. */ - if (is_noncanonical_address(addr, vcpu)) + if (is_noncanonical_invlpg_address(addr, vcpu)) return; kvm_x86_call(flush_tlb_gva)(vcpu, addr); diff --git a/arch/x86/kvm/mtrr.c b/arch/x86/kvm/mtrr.c index 05490b9d8a43..6f74e2b27c1e 100644 --- a/arch/x86/kvm/mtrr.c +++ b/arch/x86/kvm/mtrr.c @@ -19,6 +19,7 @@ #include <asm/mtrr.h> #include "cpuid.h" +#include "x86.h" static u64 *find_mtrr(struct kvm_vcpu *vcpu, unsigned int msr) { diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index c1e29307826b..dd15cc635655 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -1390,7 +1390,9 @@ static void __svm_vcpu_reset(struct kvm_vcpu *vcpu) svm_vcpu_init_msrpm(vcpu, svm->msrpm); svm_init_osvw(vcpu); - vcpu->arch.microcode_version = 0x01000065; + + if (kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_STUFF_FEATURE_MSRS)) + vcpu->arch.microcode_version = 0x01000065; svm->tsc_ratio_msr = kvm_caps.default_tsc_scaling_ratio; svm->nmi_masked = false; @@ -5031,6 +5033,7 @@ static struct kvm_x86_ops svm_x86_ops __initdata = { .get_segment = svm_get_segment, .set_segment = svm_set_segment, .get_cpl = svm_get_cpl, + .get_cpl_no_cache = svm_get_cpl, .get_cs_db_l_bits = svm_get_cs_db_l_bits, .is_valid_cr0 = svm_is_valid_cr0, .set_cr0 = svm_set_cr0, diff --git a/arch/x86/kvm/vmx/hyperv.c b/arch/x86/kvm/vmx/hyperv.c index fab6a1ad98dc..fa41d036acd4 100644 --- a/arch/x86/kvm/vmx/hyperv.c +++ b/arch/x86/kvm/vmx/hyperv.c @@ -4,6 +4,7 @@ #include <linux/errno.h> #include <linux/smp.h> +#include "x86.h" #include "../cpuid.h" #include "hyperv.h" #include "nested.h" diff --git a/arch/x86/kvm/vmx/main.c b/arch/x86/kvm/vmx/main.c index 7668e2fb8043..92d35cc6cd15 100644 --- a/arch/x86/kvm/vmx/main.c +++ b/arch/x86/kvm/vmx/main.c @@ -50,6 +50,7 @@ struct kvm_x86_ops vt_x86_ops __initdata = { .get_segment = vmx_get_segment, .set_segment = vmx_set_segment, .get_cpl = vmx_get_cpl, + .get_cpl_no_cache = vmx_get_cpl_no_cache, .get_cs_db_l_bits = vmx_get_cs_db_l_bits, .is_valid_cr0 = vmx_is_valid_cr0, .set_cr0 = vmx_set_cr0, diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index 259fe445e695..746cb41c5b98 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -7,6 +7,7 @@ #include <asm/debugreg.h> #include <asm/mmu_context.h> +#include "x86.h" #include "cpuid.h" #include "hyperv.h" #include "mmu.h" @@ -16,7 +17,6 @@ #include "sgx.h" #include "trace.h" #include "vmx.h" -#include "x86.h" #include "smm.h" static bool __read_mostly enable_shadow_vmcs = 1; @@ -2996,6 +2996,17 @@ static int nested_vmx_check_address_space_size(struct kvm_vcpu *vcpu, return 0; } +static bool is_l1_noncanonical_address_on_vmexit(u64 la, struct vmcs12 *vmcs12) +{ + /* + * Check that the given linear address is canonical after a VM exit + * from L2, based on HOST_CR4.LA57 value that will be loaded for L1. + */ + u8 l1_address_bits_on_exit = (vmcs12->host_cr4 & X86_CR4_LA57) ? 57 : 48; + + return !__is_canonical_address(la, l1_address_bits_on_exit); +} + static int nested_vmx_check_host_state(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) { @@ -3006,8 +3017,8 @@ static int nested_vmx_check_host_state(struct kvm_vcpu *vcpu, CC(!kvm_vcpu_is_legal_cr3(vcpu, vmcs12->host_cr3))) return -EINVAL; - if (CC(is_noncanonical_address(vmcs12->host_ia32_sysenter_esp, vcpu)) || - CC(is_noncanonical_address(vmcs12->host_ia32_sysenter_eip, vcpu))) + if (CC(is_noncanonical_msr_address(vmcs12->host_ia32_sysenter_esp, vcpu)) || + CC(is_noncanonical_msr_address(vmcs12->host_ia32_sysenter_eip, vcpu))) return -EINVAL; if ((vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_PAT) && @@ -3041,12 +3052,12 @@ static int nested_vmx_check_host_state(struct kvm_vcpu *vcpu, CC(vmcs12->host_ss_selector == 0 && !ia32e)) return -EINVAL; - if (CC(is_noncanonical_address(vmcs12->host_fs_base, vcpu)) || - CC(is_noncanonical_address(vmcs12->host_gs_base, vcpu)) || - CC(is_noncanonical_address(vmcs12->host_gdtr_base, vcpu)) || - CC(is_noncanonical_address(vmcs12->host_idtr_base, vcpu)) || - CC(is_noncanonical_address(vmcs12->host_tr_base, vcpu)) || - CC(is_noncanonical_address(vmcs12->host_rip, vcpu))) + if (CC(is_noncanonical_base_address(vmcs12->host_fs_base, vcpu)) || + CC(is_noncanonical_base_address(vmcs12->host_gs_base, vcpu)) || + CC(is_noncanonical_base_address(vmcs12->host_gdtr_base, vcpu)) || + CC(is_noncanonical_base_address(vmcs12->host_idtr_base, vcpu)) || + CC(is_noncanonical_base_address(vmcs12->host_tr_base, vcpu)) || + CC(is_l1_noncanonical_address_on_vmexit(vmcs12->host_rip, vmcs12))) return -EINVAL; /* @@ -3164,7 +3175,7 @@ static int nested_vmx_check_guest_state(struct kvm_vcpu *vcpu, } if ((vmcs12->vm_entry_controls & VM_ENTRY_LOAD_BNDCFGS) && - (CC(is_noncanonical_address(vmcs12->guest_bndcfgs & PAGE_MASK, vcpu)) || + (CC(is_noncanonical_msr_address(vmcs12->guest_bndcfgs & PAGE_MASK, vcpu)) || CC((vmcs12->guest_bndcfgs & MSR_IA32_BNDCFGS_RSVD)))) return -EINVAL; @@ -5149,7 +5160,7 @@ int get_vmx_mem_address(struct kvm_vcpu *vcpu, unsigned long exit_qualification, * non-canonical form. This is the only check on the memory * destination for long mode! */ - exn = is_noncanonical_address(*ret, vcpu); + exn = is_noncanonical_address(*ret, vcpu, 0); } else { /* * When not in long mode, the virtual/linear address is @@ -5954,7 +5965,7 @@ static int handle_invvpid(struct kvm_vcpu *vcpu) * invalidation. */ if (!operand.vpid || - is_noncanonical_address(operand.gla, vcpu)) + is_noncanonical_invlpg_address(operand.gla, vcpu)) return nested_vmx_fail(vcpu, VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID); vpid_sync_vcpu_addr(vpid02, operand.gla); diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c index 83382a4d1d66..9c9d4a336166 100644 --- a/arch/x86/kvm/vmx/pmu_intel.c +++ b/arch/x86/kvm/vmx/pmu_intel.c @@ -365,7 +365,7 @@ static int intel_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) } break; case MSR_IA32_DS_AREA: - if (is_noncanonical_address(data, vcpu)) + if (is_noncanonical_msr_address(data, vcpu)) return 1; pmu->ds_area = data; diff --git a/arch/x86/kvm/vmx/sgx.c b/arch/x86/kvm/vmx/sgx.c index a3c3d2a51f47..b352a3ba7354 100644 --- a/arch/x86/kvm/vmx/sgx.c +++ b/arch/x86/kvm/vmx/sgx.c @@ -4,12 +4,11 @@ #include <asm/sgx.h> -#include "cpuid.h" +#include "x86.h" #include "kvm_cache_regs.h" #include "nested.h" #include "sgx.h" #include "vmx.h" -#include "x86.h" bool __read_mostly enable_sgx = 1; module_param_named(sgx, enable_sgx, bool, 0444); @@ -38,7 +37,7 @@ static int sgx_get_encls_gva(struct kvm_vcpu *vcpu, unsigned long offset, fault = true; } else if (likely(is_64_bit_mode(vcpu))) { *gva = vmx_get_untagged_addr(vcpu, *gva, 0); - fault = is_noncanonical_address(*gva, vcpu); + fault = is_noncanonical_address(*gva, vcpu, 0); } else { *gva &= 0xffffffff; fault = (s.unusable) || diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index b1bb64890cb2..6ed801ffe33f 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -2282,7 +2282,7 @@ int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) (!msr_info->host_initiated && !guest_cpuid_has(vcpu, X86_FEATURE_MPX))) return 1; - if (is_noncanonical_address(data & PAGE_MASK, vcpu) || + if (is_noncanonical_msr_address(data & PAGE_MASK, vcpu) || (data & MSR_IA32_BNDCFGS_RSVD)) return 1; @@ -2447,7 +2447,7 @@ int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) index = msr_info->index - MSR_IA32_RTIT_ADDR0_A; if (index >= 2 * vmx->pt_desc.num_address_ranges) return 1; - if (is_noncanonical_address(data, vcpu)) + if (is_noncanonical_msr_address(data, vcpu)) return 1; if (index % 2) vmx->pt_desc.guest.addr_b[index / 2] = data; @@ -2455,8 +2455,6 @@ int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) vmx->pt_desc.guest.addr_a[index / 2] = data; break; case MSR_IA32_PERF_CAPABILITIES: - if (data && !vcpu_to_pmu(vcpu)->version) - return 1; if (data & PMU_CAP_LBR_FMT) { if ((data & PMU_CAP_LBR_FMT) != (kvm_caps.supported_perf_cap & PMU_CAP_LBR_FMT)) @@ -3567,16 +3565,29 @@ u64 vmx_get_segment_base(struct kvm_vcpu *vcpu, int seg) return vmx_read_guest_seg_base(to_vmx(vcpu), seg); } -int vmx_get_cpl(struct kvm_vcpu *vcpu) +static int __vmx_get_cpl(struct kvm_vcpu *vcpu, bool no_cache) { struct vcpu_vmx *vmx = to_vmx(vcpu); + int ar; if (unlikely(vmx->rmode.vm86_active)) return 0; - else { - int ar = vmx_read_guest_seg_ar(vmx, VCPU_SREG_SS); - return VMX_AR_DPL(ar); - } + + if (no_cache) + ar = vmcs_read32(GUEST_SS_AR_BYTES); + else + ar = vmx_read_guest_seg_ar(vmx, VCPU_SREG_SS); + return VMX_AR_DPL(ar); +} + +int vmx_get_cpl(struct kvm_vcpu *vcpu) +{ + return __vmx_get_cpl(vcpu, false); +} + +int vmx_get_cpl_no_cache(struct kvm_vcpu *vcpu) +{ + return __vmx_get_cpl(vcpu, true); } static u32 vmx_segment_access_rights(struct kvm_segment *var) @@ -4558,7 +4569,8 @@ vmx_adjust_secondary_exec_control(struct vcpu_vmx *vmx, u32 *exec_control, * Update the nested MSR settings so that a nested VMM can/can't set * controls for features that are/aren't exposed to the guest. */ - if (nested) { + if (nested && + kvm_check_has_quirk(vmx->vcpu.kvm, KVM_X86_QUIRK_STUFF_FEATURE_MSRS)) { /* * All features that can be added or removed to VMX MSRs must * be supported in the first place for nested virtualization. @@ -4848,7 +4860,8 @@ static void __vmx_vcpu_reset(struct kvm_vcpu *vcpu) init_vmcs(vmx); - if (nested) + if (nested && + kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_STUFF_FEATURE_MSRS)) memcpy(&vmx->nested.msrs, &vmcs_config.nested, sizeof(vmx->nested.msrs)); vcpu_setup_sgx_lepubkeyhash(vcpu); @@ -4861,7 +4874,8 @@ static void __vmx_vcpu_reset(struct kvm_vcpu *vcpu) vmx->nested.hv_evmcs_vmptr = EVMPTR_INVALID; #endif - vcpu->arch.microcode_version = 0x100000000ULL; + if (kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_STUFF_FEATURE_MSRS)) + vcpu->arch.microcode_version = 0x100000000ULL; vmx->msr_ia32_feature_control_valid_bits = FEAT_CTL_LOCKED; /* diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h index 40303b43da6c..43f573f6ca46 100644 --- a/arch/x86/kvm/vmx/vmx.h +++ b/arch/x86/kvm/vmx/vmx.h @@ -383,6 +383,7 @@ void vmx_prepare_switch_to_guest(struct kvm_vcpu *vcpu); void vmx_set_host_fs_gs(struct vmcs_host_state *host, u16 fs_sel, u16 gs_sel, unsigned long fs_base, unsigned long gs_base); int vmx_get_cpl(struct kvm_vcpu *vcpu); +int vmx_get_cpl_no_cache(struct kvm_vcpu *vcpu); bool vmx_emulation_required(struct kvm_vcpu *vcpu); unsigned long vmx_get_rflags(struct kvm_vcpu *vcpu); void vmx_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 8507b300ff43..8637bc001096 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -451,6 +451,7 @@ static const u32 msr_based_features_all_except_vmx[] = { MSR_IA32_UCODE_REV, MSR_IA32_ARCH_CAPABILITIES, MSR_IA32_PERF_CAPABILITIES, + MSR_PLATFORM_INFO, }; static u32 msr_based_features[ARRAY_SIZE(msr_based_features_all_except_vmx) + @@ -667,38 +668,6 @@ static void drop_user_return_notifiers(void) kvm_on_user_return(&msrs->urn); } -u64 kvm_get_apic_base(struct kvm_vcpu *vcpu) -{ - return vcpu->arch.apic_base; -} - -enum lapic_mode kvm_get_apic_mode(struct kvm_vcpu *vcpu) -{ - return kvm_apic_mode(kvm_get_apic_base(vcpu)); -} -EXPORT_SYMBOL_GPL(kvm_get_apic_mode); - -int kvm_set_apic_base(struct kvm_vcpu *vcpu, struct msr_data *msr_info) -{ - enum lapic_mode old_mode = kvm_get_apic_mode(vcpu); - enum lapic_mode new_mode = kvm_apic_mode(msr_info->data); - u64 reserved_bits = kvm_vcpu_reserved_gpa_bits_raw(vcpu) | 0x2ff | - (guest_cpuid_has(vcpu, X86_FEATURE_X2APIC) ? 0 : X2APIC_ENABLE); - - if ((msr_info->data & reserved_bits) != 0 || new_mode == LAPIC_MODE_INVALID) - return 1; - if (!msr_info->host_initiated) { - if (old_mode == LAPIC_MODE_X2APIC && new_mode == LAPIC_MODE_XAPIC) - return 1; - if (old_mode == LAPIC_MODE_DISABLED && new_mode == LAPIC_MODE_X2APIC) - return 1; - } - - kvm_lapic_set_base(vcpu, msr_info->data); - kvm_recalculate_apic_map(vcpu->kvm); - return 0; -} - /* * Handle a fault on a hardware virtualization (VMX or SVM) instruction. * @@ -1854,7 +1823,7 @@ static int __kvm_set_msr(struct kvm_vcpu *vcpu, u32 index, u64 data, case MSR_KERNEL_GS_BASE: case MSR_CSTAR: case MSR_LSTAR: - if (is_noncanonical_address(data, vcpu)) + if (is_noncanonical_msr_address(data, vcpu)) return 1; break; case MSR_IA32_SYSENTER_EIP: @@ -1871,7 +1840,7 @@ static int __kvm_set_msr(struct kvm_vcpu *vcpu, u32 index, u64 data, * value, and that something deterministic happens if the guest * invokes 64-bit SYSENTER. */ - data = __canonical_address(data, vcpu_virt_addr_bits(vcpu)); + data = __canonical_address(data, max_host_virt_addr_bits()); break; case MSR_TSC_AUX: if (!kvm_is_supported_user_return_msr(MSR_TSC_AUX)) @@ -2144,8 +2113,9 @@ EXPORT_SYMBOL_GPL(kvm_emulate_monitor); static inline bool kvm_vcpu_exit_request(struct kvm_vcpu *vcpu) { xfer_to_guest_mode_prepare(); - return vcpu->mode == EXITING_GUEST_MODE || kvm_request_pending(vcpu) || - xfer_to_guest_mode_work_pending(); + + return READ_ONCE(vcpu->mode) == EXITING_GUEST_MODE || + kvm_request_pending(vcpu) || xfer_to_guest_mode_work_pending(); } /* @@ -3793,13 +3763,16 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) vcpu->arch.microcode_version = data; break; case MSR_IA32_ARCH_CAPABILITIES: - if (!msr_info->host_initiated) - return 1; + if (!msr_info->host_initiated || + !guest_cpuid_has(vcpu, X86_FEATURE_ARCH_CAPABILITIES)) + return KVM_MSR_RET_UNSUPPORTED; vcpu->arch.arch_capabilities = data; break; case MSR_IA32_PERF_CAPABILITIES: - if (!msr_info->host_initiated) - return 1; + if (!msr_info->host_initiated || + !guest_cpuid_has(vcpu, X86_FEATURE_PDCM)) + return KVM_MSR_RET_UNSUPPORTED; + if (data & ~kvm_caps.supported_perf_cap) return 1; @@ -3890,7 +3863,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) case MSR_MTRRdefType: return kvm_mtrr_set_msr(vcpu, msr, data); case MSR_IA32_APICBASE: - return kvm_set_apic_base(vcpu, msr_info); + return kvm_apic_set_base(vcpu, data, msr_info->host_initiated); case APIC_BASE_MSR ... APIC_BASE_MSR + 0xff: return kvm_x2apic_msr_write(vcpu, msr, data); case MSR_IA32_TSC_DEADLINE: @@ -4111,9 +4084,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) vcpu->arch.osvw.status = data; break; case MSR_PLATFORM_INFO: - if (!msr_info->host_initiated || - (!(data & MSR_PLATFORM_INFO_CPUID_FAULT) && - cpuid_fault_enabled(vcpu))) + if (!msr_info->host_initiated) return 1; vcpu->arch.msr_platform_info = data; break; @@ -4252,15 +4223,13 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) msr_info->data = vcpu->arch.microcode_version; break; case MSR_IA32_ARCH_CAPABILITIES: - if (!msr_info->host_initiated && - !guest_cpuid_has(vcpu, X86_FEATURE_ARCH_CAPABILITIES)) - return 1; + if (!guest_cpuid_has(vcpu, X86_FEATURE_ARCH_CAPABILITIES)) + return KVM_MSR_RET_UNSUPPORTED; msr_info->data = vcpu->arch.arch_capabilities; break; case MSR_IA32_PERF_CAPABILITIES: - if (!msr_info->host_initiated && - !guest_cpuid_has(vcpu, X86_FEATURE_PDCM)) - return 1; + if (!guest_cpuid_has(vcpu, X86_FEATURE_PDCM)) + return KVM_MSR_RET_UNSUPPORTED; msr_info->data = vcpu->arch.perf_capabilities; break; case MSR_IA32_POWER_CTL: @@ -4314,7 +4283,7 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) msr_info->data = 1 << 24; break; case MSR_IA32_APICBASE: - msr_info->data = kvm_get_apic_base(vcpu); + msr_info->data = vcpu->arch.apic_base; break; case APIC_BASE_MSR ... APIC_BASE_MSR + 0xff: return kvm_x2apic_msr_read(vcpu, msr_info->index, &msr_info->data); @@ -5094,7 +5063,13 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) int idx; if (vcpu->preempted) { - vcpu->arch.preempted_in_kernel = kvm_arch_vcpu_in_kernel(vcpu); + /* + * Assume protected guests are in-kernel. Inefficient yielding + * due to false positives is preferable to never yielding due + * to false negatives. + */ + vcpu->arch.preempted_in_kernel = vcpu->arch.guest_state_protected || + !kvm_x86_call(get_cpl_no_cache)(vcpu); /* * Take the srcu lock as memslots will be accessed to check the gfn @@ -8612,6 +8587,12 @@ static gva_t emulator_get_untagged_addr(struct x86_emulate_ctxt *ctxt, addr, flags); } +static bool emulator_is_canonical_addr(struct x86_emulate_ctxt *ctxt, + gva_t addr, unsigned int flags) +{ + return !is_noncanonical_address(addr, emul_to_vcpu(ctxt), flags); +} + static const struct x86_emulate_ops emulate_ops = { .vm_bugged = emulator_vm_bugged, .read_gpr = emulator_read_gpr, @@ -8658,6 +8639,7 @@ static const struct x86_emulate_ops emulate_ops = { .triple_fault = emulator_triple_fault, .set_xcr = emulator_set_xcr, .get_untagged_addr = emulator_get_untagged_addr, + .is_canonical_addr = emulator_is_canonical_addr, }; static void toggle_interruptibility(struct kvm_vcpu *vcpu, u32 mask) @@ -10159,7 +10141,7 @@ static void post_kvm_run_save(struct kvm_vcpu *vcpu) kvm_run->if_flag = kvm_x86_call(get_if_flag)(vcpu); kvm_run->cr8 = kvm_get_cr8(vcpu); - kvm_run->apic_base = kvm_get_apic_base(vcpu); + kvm_run->apic_base = vcpu->arch.apic_base; kvm_run->ready_for_interrupt_injection = pic_in_kernel(vcpu->kvm) || @@ -10576,8 +10558,8 @@ static void kvm_vcpu_update_apicv(struct kvm_vcpu *vcpu) * deleted if any vCPU has xAPIC virtualization and x2APIC enabled, but * and hardware doesn't support x2APIC virtualization. E.g. some AMD * CPUs support AVIC but not x2APIC. KVM still allows enabling AVIC in - * this case so that KVM can the AVIC doorbell to inject interrupts to - * running vCPUs, but KVM must not create SPTEs for the APIC base as + * this case so that KVM can use the AVIC doorbell to inject interrupts + * to running vCPUs, but KVM must not create SPTEs for the APIC base as * the vCPU would incorrectly be able to access the vAPIC page via MMIO * despite being in x2APIC mode. For simplicity, inhibiting the APIC * access page is sticky. @@ -10606,11 +10588,11 @@ void __kvm_set_or_clear_apicv_inhibit(struct kvm *kvm, if (!!old != !!new) { /* * Kick all vCPUs before setting apicv_inhibit_reasons to avoid - * false positives in the sanity check WARN in svm_vcpu_run(). + * false positives in the sanity check WARN in vcpu_enter_guest(). * This task will wait for all vCPUs to ack the kick IRQ before * updating apicv_inhibit_reasons, and all other vCPUs will * block on acquiring apicv_update_lock so that vCPUs can't - * redo svm_vcpu_run() without seeing the new inhibit state. + * redo vcpu_enter_guest() without seeing the new inhibit state. * * Note, holding apicv_update_lock and taking it in the read * side (handling the request) also prevents other vCPUs from @@ -11711,7 +11693,7 @@ skip_protected_regs: sregs->cr4 = kvm_read_cr4(vcpu); sregs->cr8 = kvm_get_cr8(vcpu); sregs->efer = vcpu->arch.efer; - sregs->apic_base = kvm_get_apic_base(vcpu); + sregs->apic_base = vcpu->arch.apic_base; } static void __get_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) @@ -11888,16 +11870,13 @@ static bool kvm_is_valid_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) static int __set_sregs_common(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs, int *mmu_reset_needed, bool update_pdptrs) { - struct msr_data apic_base_msr; int idx; struct desc_ptr dt; if (!kvm_is_valid_sregs(vcpu, sregs)) return -EINVAL; - apic_base_msr.data = sregs->apic_base; - apic_base_msr.host_initiated = true; - if (kvm_set_apic_base(vcpu, &apic_base_msr)) + if (kvm_apic_set_base(vcpu, sregs->apic_base, true)) return -EINVAL; if (vcpu->arch.guest_state_protected) @@ -12299,7 +12278,11 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu) kvm_async_pf_hash_reset(vcpu); - vcpu->arch.perf_capabilities = kvm_caps.supported_perf_cap; + if (kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_STUFF_FEATURE_MSRS)) { + vcpu->arch.arch_capabilities = kvm_get_arch_capabilities(); + vcpu->arch.msr_platform_info = MSR_PLATFORM_INFO_CPUID_FAULT; + vcpu->arch.perf_capabilities = kvm_caps.supported_perf_cap; + } kvm_pmu_init(vcpu); vcpu->arch.pending_external_vector = -1; @@ -12313,8 +12296,6 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu) if (r) goto free_guest_fpu; - vcpu->arch.arch_capabilities = kvm_get_arch_capabilities(); - vcpu->arch.msr_platform_info = MSR_PLATFORM_INFO_CPUID_FAULT; kvm_xen_init_vcpu(vcpu); vcpu_load(vcpu); kvm_set_tsc_khz(vcpu, vcpu->kvm->arch.default_tsc_khz); @@ -13203,6 +13184,8 @@ void kvm_arch_commit_memory_region(struct kvm *kvm, bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu) { + WARN_ON_ONCE(!kvm_arch_pmi_in_guest(vcpu)); + if (vcpu->arch.guest_state_protected) return true; @@ -13211,6 +13194,11 @@ bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu) unsigned long kvm_arch_vcpu_get_ip(struct kvm_vcpu *vcpu) { + WARN_ON_ONCE(!kvm_arch_pmi_in_guest(vcpu)); + + if (vcpu->arch.guest_state_protected) + return 0; + return kvm_rip_read(vcpu); } @@ -13726,7 +13714,7 @@ int kvm_handle_invpcid(struct kvm_vcpu *vcpu, unsigned long type, gva_t gva) * invalidation. */ if ((!pcid_enabled && (operand.pcid != 0)) || - is_noncanonical_address(operand.gla, vcpu)) { + is_noncanonical_invlpg_address(operand.gla, vcpu)) { kvm_inject_gp(vcpu, 0); return 1; } diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h index a84c48ef5278..ec623d23d13d 100644 --- a/arch/x86/kvm/x86.h +++ b/arch/x86/kvm/x86.h @@ -8,6 +8,7 @@ #include <asm/pvclock.h> #include "kvm_cache_regs.h" #include "kvm_emulate.h" +#include "cpuid.h" struct kvm_caps { /* control of guest tsc rate supported? */ @@ -233,9 +234,52 @@ static inline u8 vcpu_virt_addr_bits(struct kvm_vcpu *vcpu) return kvm_is_cr4_bit_set(vcpu, X86_CR4_LA57) ? 57 : 48; } -static inline bool is_noncanonical_address(u64 la, struct kvm_vcpu *vcpu) +static inline u8 max_host_virt_addr_bits(void) { - return !__is_canonical_address(la, vcpu_virt_addr_bits(vcpu)); + return kvm_cpu_cap_has(X86_FEATURE_LA57) ? 57 : 48; +} + +/* + * x86 MSRs which contain linear addresses, x86 hidden segment bases, and + * IDT/GDT bases have static canonicality checks, the size of which depends + * only on the CPU's support for 5-level paging, rather than on the state of + * CR4.LA57. This applies to both WRMSR and to other instructions that set + * their values, e.g. SGDT. + * + * KVM passes through most of these MSRS and also doesn't intercept the + * instructions that set the hidden segment bases. + * + * Because of this, to be consistent with hardware, even if the guest doesn't + * have LA57 enabled in its CPUID, perform canonicality checks based on *host* + * support for 5 level paging. + * + * Finally, instructions which are related to MMU invalidation of a given + * linear address, also have a similar static canonical check on address. + * This allows for example to invalidate 5-level addresses of a guest from a + * host which uses 4-level paging. + */ +static inline bool is_noncanonical_address(u64 la, struct kvm_vcpu *vcpu, + unsigned int flags) +{ + if (flags & (X86EMUL_F_INVLPG | X86EMUL_F_MSR | X86EMUL_F_DT_LOAD)) + return !__is_canonical_address(la, max_host_virt_addr_bits()); + else + return !__is_canonical_address(la, vcpu_virt_addr_bits(vcpu)); +} + +static inline bool is_noncanonical_msr_address(u64 la, struct kvm_vcpu *vcpu) +{ + return is_noncanonical_address(la, vcpu, X86EMUL_F_MSR); +} + +static inline bool is_noncanonical_base_address(u64 la, struct kvm_vcpu *vcpu) +{ + return is_noncanonical_address(la, vcpu, X86EMUL_F_DT_LOAD); +} + +static inline bool is_noncanonical_invlpg_address(u64 la, struct kvm_vcpu *vcpu) +{ + return is_noncanonical_address(la, vcpu, X86EMUL_F_INVLPG); } static inline void vcpu_cache_mmio_info(struct kvm_vcpu *vcpu, diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile index 5e5e0b5aae60..01a000a41693 100644 --- a/tools/testing/selftests/kvm/Makefile +++ b/tools/testing/selftests/kvm/Makefile @@ -68,7 +68,7 @@ TEST_PROGS_x86_64 += x86_64/nx_huge_pages_test.sh TEST_GEN_PROGS_x86_64 = x86_64/cpuid_test TEST_GEN_PROGS_x86_64 += x86_64/cr4_cpuid_sync_test TEST_GEN_PROGS_x86_64 += x86_64/dirty_log_page_splitting_test -TEST_GEN_PROGS_x86_64 += x86_64/get_msr_index_features +TEST_GEN_PROGS_x86_64 += x86_64/feature_msrs_test TEST_GEN_PROGS_x86_64 += x86_64/exit_on_emulation_failure_test TEST_GEN_PROGS_x86_64 += x86_64/fix_hypercall_test TEST_GEN_PROGS_x86_64 += x86_64/hwcr_msr_test diff --git a/tools/testing/selftests/kvm/x86_64/feature_msrs_test.c b/tools/testing/selftests/kvm/x86_64/feature_msrs_test.c new file mode 100644 index 000000000000..a72f13ae2edb --- /dev/null +++ b/tools/testing/selftests/kvm/x86_64/feature_msrs_test.c @@ -0,0 +1,113 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2020, Red Hat, Inc. + */ +#include <fcntl.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/ioctl.h> + +#include "test_util.h" +#include "kvm_util.h" +#include "processor.h" + +static bool is_kvm_controlled_msr(uint32_t msr) +{ + return msr == MSR_IA32_VMX_CR0_FIXED1 || msr == MSR_IA32_VMX_CR4_FIXED1; +} + +/* + * For VMX MSRs with a "true" variant, KVM requires userspace to set the "true" + * MSR, and doesn't allow setting the hidden version. + */ +static bool is_hidden_vmx_msr(uint32_t msr) +{ + switch (msr) { + case MSR_IA32_VMX_PINBASED_CTLS: + case MSR_IA32_VMX_PROCBASED_CTLS: + case MSR_IA32_VMX_EXIT_CTLS: + case MSR_IA32_VMX_ENTRY_CTLS: + return true; + default: + return false; + } +} + +static bool is_quirked_msr(uint32_t msr) +{ + return msr != MSR_AMD64_DE_CFG; +} + +static void test_feature_msr(uint32_t msr) +{ + const uint64_t supported_mask = kvm_get_feature_msr(msr); + uint64_t reset_value = is_quirked_msr(msr) ? supported_mask : 0; + struct kvm_vcpu *vcpu; + struct kvm_vm *vm; + + /* + * Don't bother testing KVM-controlled MSRs beyond verifying that the + * MSR can be read from userspace. Any value is effectively legal, as + * KVM is bound by x86 architecture, not by ABI. + */ + if (is_kvm_controlled_msr(msr)) + return; + + /* + * More goofy behavior. KVM reports the host CPU's actual revision ID, + * but initializes the vCPU's revision ID to an arbitrary value. + */ + if (msr == MSR_IA32_UCODE_REV) + reset_value = host_cpu_is_intel ? 0x100000000ULL : 0x01000065; + + /* + * For quirked MSRs, KVM's ABI is to initialize the vCPU's value to the + * full set of features supported by KVM. For non-quirked MSRs, and + * when the quirk is disabled, KVM must zero-initialize the MSR and let + * userspace do the configuration. + */ + vm = vm_create_with_one_vcpu(&vcpu, NULL); + TEST_ASSERT(vcpu_get_msr(vcpu, msr) == reset_value, + "Wanted 0x%lx for %squirked MSR 0x%x, got 0x%lx", + reset_value, is_quirked_msr(msr) ? "" : "non-", msr, + vcpu_get_msr(vcpu, msr)); + if (!is_hidden_vmx_msr(msr)) + vcpu_set_msr(vcpu, msr, supported_mask); + kvm_vm_free(vm); + + if (is_hidden_vmx_msr(msr)) + return; + + if (!kvm_has_cap(KVM_CAP_DISABLE_QUIRKS2) || + !(kvm_check_cap(KVM_CAP_DISABLE_QUIRKS2) & KVM_X86_QUIRK_STUFF_FEATURE_MSRS)) + return; + + vm = vm_create(1); + vm_enable_cap(vm, KVM_CAP_DISABLE_QUIRKS2, KVM_X86_QUIRK_STUFF_FEATURE_MSRS); + + vcpu = vm_vcpu_add(vm, 0, NULL); + TEST_ASSERT(!vcpu_get_msr(vcpu, msr), + "Quirk disabled, wanted '0' for MSR 0x%x, got 0x%lx", + msr, vcpu_get_msr(vcpu, msr)); + kvm_vm_free(vm); +} + +int main(int argc, char *argv[]) +{ + const struct kvm_msr_list *feature_list; + int i; + + /* + * Skip the entire test if MSR_FEATURES isn't supported, other tests + * will cover the "regular" list of MSRs, the coverage here is purely + * opportunistic and not interesting on its own. + */ + TEST_REQUIRE(kvm_has_cap(KVM_CAP_GET_MSR_FEATURES)); + + (void)kvm_get_msr_index_list(); + + feature_list = kvm_get_feature_msr_index_list(); + for (i = 0; i < feature_list->nmsrs; i++) + test_feature_msr(feature_list->indices[i]); +} diff --git a/tools/testing/selftests/kvm/x86_64/get_msr_index_features.c b/tools/testing/selftests/kvm/x86_64/get_msr_index_features.c deleted file mode 100644 index d09b3cbcadc6..000000000000 --- a/tools/testing/selftests/kvm/x86_64/get_msr_index_features.c +++ /dev/null @@ -1,35 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Test that KVM_GET_MSR_INDEX_LIST and - * KVM_GET_MSR_FEATURE_INDEX_LIST work as intended - * - * Copyright (C) 2020, Red Hat, Inc. - */ -#include <fcntl.h> -#include <stdio.h> -#include <stdlib.h> -#include <string.h> -#include <sys/ioctl.h> - -#include "test_util.h" -#include "kvm_util.h" -#include "processor.h" - -int main(int argc, char *argv[]) -{ - const struct kvm_msr_list *feature_list; - int i; - - /* - * Skip the entire test if MSR_FEATURES isn't supported, other tests - * will cover the "regular" list of MSRs, the coverage here is purely - * opportunistic and not interesting on its own. - */ - TEST_REQUIRE(kvm_has_cap(KVM_CAP_GET_MSR_FEATURES)); - - (void)kvm_get_msr_index_list(); - - feature_list = kvm_get_feature_msr_index_list(); - for (i = 0; i < feature_list->nmsrs; i++) - kvm_get_feature_msr(feature_list->indices[i]); -} diff --git a/tools/testing/selftests/kvm/x86_64/platform_info_test.c b/tools/testing/selftests/kvm/x86_64/platform_info_test.c index eda88080c186..9cbf283ebc55 100644 --- a/tools/testing/selftests/kvm/x86_64/platform_info_test.c +++ b/tools/testing/selftests/kvm/x86_64/platform_info_test.c @@ -72,8 +72,6 @@ int main(int argc, char *argv[]) } done: - vcpu_set_msr(vcpu, MSR_PLATFORM_INFO, msr_platform_info); - kvm_vm_free(vm); return 0; diff --git a/tools/testing/selftests/kvm/x86_64/vmx_pmu_caps_test.c b/tools/testing/selftests/kvm/x86_64/vmx_pmu_caps_test.c index 7c92536551cc..a1f5ff45d518 100644 --- a/tools/testing/selftests/kvm/x86_64/vmx_pmu_caps_test.c +++ b/tools/testing/selftests/kvm/x86_64/vmx_pmu_caps_test.c @@ -207,6 +207,29 @@ KVM_ONE_VCPU_TEST(vmx_pmu_caps, lbr_perf_capabilities, guest_code) TEST_ASSERT(!r, "Writing LBR_TOS should fail after disabling vPMU"); } +KVM_ONE_VCPU_TEST(vmx_pmu_caps, perf_capabilities_unsupported, guest_code) +{ + uint64_t val; + int i, r; + + vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, host_cap.capabilities); + val = vcpu_get_msr(vcpu, MSR_IA32_PERF_CAPABILITIES); + TEST_ASSERT_EQ(val, host_cap.capabilities); + + vcpu_clear_cpuid_feature(vcpu, X86_FEATURE_PDCM); + + val = vcpu_get_msr(vcpu, MSR_IA32_PERF_CAPABILITIES); + TEST_ASSERT_EQ(val, 0); + + vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, 0); + + for (i = 0; i < 64; i++) { + r = _vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, BIT_ULL(i)); + TEST_ASSERT(!r, "Setting PERF_CAPABILITIES bit %d (= 0x%llx) should fail without PDCM", + i, BIT_ULL(i)); + } +} + int main(int argc, char *argv[]) { TEST_REQUIRE(kvm_is_pmu_enabled()); |