summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/virt/kvm/api.rst22
-rw-r--r--Documentation/virt/kvm/x86/errata.rst12
-rw-r--r--arch/x86/include/asm/kvm-x86-ops.h1
-rw-r--r--arch/x86/include/asm/kvm_host.h4
-rw-r--r--arch/x86/include/uapi/asm/kvm.h1
-rw-r--r--arch/x86/kvm/cpuid.c12
-rw-r--r--arch/x86/kvm/cpuid.h1
-rw-r--r--arch/x86/kvm/emulate.c15
-rw-r--r--arch/x86/kvm/kvm_cache_regs.h17
-rw-r--r--arch/x86/kvm/kvm_emulate.h5
-rw-r--r--arch/x86/kvm/lapic.c39
-rw-r--r--arch/x86/kvm/lapic.h11
-rw-r--r--arch/x86/kvm/mmu.h1
-rw-r--r--arch/x86/kvm/mmu/mmu.c2
-rw-r--r--arch/x86/kvm/mtrr.c1
-rw-r--r--arch/x86/kvm/svm/svm.c5
-rw-r--r--arch/x86/kvm/vmx/hyperv.c1
-rw-r--r--arch/x86/kvm/vmx/main.c1
-rw-r--r--arch/x86/kvm/vmx/nested.c35
-rw-r--r--arch/x86/kvm/vmx/pmu_intel.c2
-rw-r--r--arch/x86/kvm/vmx/sgx.c5
-rw-r--r--arch/x86/kvm/vmx/vmx.c38
-rw-r--r--arch/x86/kvm/vmx/vmx.h1
-rw-r--r--arch/x86/kvm/x86.c120
-rw-r--r--arch/x86/kvm/x86.h48
-rw-r--r--tools/testing/selftests/kvm/Makefile2
-rw-r--r--tools/testing/selftests/kvm/x86_64/feature_msrs_test.c113
-rw-r--r--tools/testing/selftests/kvm/x86_64/get_msr_index_features.c35
-rw-r--r--tools/testing/selftests/kvm/x86_64/platform_info_test.c2
-rw-r--r--tools/testing/selftests/kvm/x86_64/vmx_pmu_caps_test.c23
30 files changed, 419 insertions, 156 deletions
diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst
index edc070c6e19b..061ec93d9ecb 100644
--- a/Documentation/virt/kvm/api.rst
+++ b/Documentation/virt/kvm/api.rst
@@ -8107,6 +8107,28 @@ KVM_X86_QUIRK_SLOT_ZAP_ALL By default, for KVM_X86_DEFAULT_VM VMs, KVM
or moved memslot isn't reachable, i.e KVM
_may_ invalidate only SPTEs related to the
memslot.
+
+KVM_X86_QUIRK_STUFF_FEATURE_MSRS By default, at vCPU creation, KVM sets the
+ vCPU's MSR_IA32_PERF_CAPABILITIES (0x345),
+ MSR_IA32_ARCH_CAPABILITIES (0x10a),
+ MSR_PLATFORM_INFO (0xce), and all VMX MSRs
+ (0x480..0x492) to the maximal capabilities
+ supported by KVM. KVM also sets
+ MSR_IA32_UCODE_REV (0x8b) to an arbitrary
+ value (which is different for Intel vs.
+ AMD). Lastly, when guest CPUID is set (by
+ userspace), KVM modifies select VMX MSR
+ fields to force consistency between guest
+ CPUID and L2's effective ISA. When this
+ quirk is disabled, KVM zeroes the vCPU's MSR
+ values (with two exceptions, see below),
+ i.e. treats the feature MSRs like CPUID
+ leaves and gives userspace full control of
+ the vCPU model definition. This quirk does
+ not affect VMX MSRs CR0/CR4_FIXED1 (0x487
+ and 0x489), as KVM does now allow them to
+ be set by userspace (KVM sets them based on
+ guest CPUID, for safety purposes).
=================================== ============================================
7.32 KVM_CAP_MAX_VCPU_ID
diff --git a/Documentation/virt/kvm/x86/errata.rst b/Documentation/virt/kvm/x86/errata.rst
index 4116045a8744..37c79362a48f 100644
--- a/Documentation/virt/kvm/x86/errata.rst
+++ b/Documentation/virt/kvm/x86/errata.rst
@@ -33,6 +33,18 @@ Note however that any software (e.g ``WIN87EM.DLL``) expecting these features
to be present likely predates these CPUID feature bits, and therefore
doesn't know to check for them anyway.
+``KVM_SET_VCPU_EVENTS`` issue
+-----------------------------
+
+Invalid KVM_SET_VCPU_EVENTS input with respect to error codes *may* result in
+failed VM-Entry on Intel CPUs. Pre-CET Intel CPUs require that exception
+injection through the VMCS correctly set the "error code valid" flag, e.g.
+require the flag be set when injecting a #GP, clear when injecting a #UD,
+clear when injecting a soft exception, etc. Intel CPUs that enumerate
+IA32_VMX_BASIC[56] as '1' relax VMX's consistency checks, and AMD CPUs have no
+restrictions whatsoever. KVM_SET_VCPU_EVENTS doesn't sanity check the vector
+versus "has_error_code", i.e. KVM's ABI follows AMD behavior.
+
Nested virtualization features
------------------------------
diff --git a/arch/x86/include/asm/kvm-x86-ops.h b/arch/x86/include/asm/kvm-x86-ops.h
index 861d080ed4c6..5aff7222e40f 100644
--- a/arch/x86/include/asm/kvm-x86-ops.h
+++ b/arch/x86/include/asm/kvm-x86-ops.h
@@ -34,6 +34,7 @@ KVM_X86_OP(set_msr)
KVM_X86_OP(get_segment_base)
KVM_X86_OP(get_segment)
KVM_X86_OP(get_cpl)
+KVM_X86_OP(get_cpl_no_cache)
KVM_X86_OP(set_segment)
KVM_X86_OP(get_cs_db_l_bits)
KVM_X86_OP(is_valid_cr0)
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 433e65974e2b..3e8afc82ae2f 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1655,6 +1655,7 @@ struct kvm_x86_ops {
void (*get_segment)(struct kvm_vcpu *vcpu,
struct kvm_segment *var, int seg);
int (*get_cpl)(struct kvm_vcpu *vcpu);
+ int (*get_cpl_no_cache)(struct kvm_vcpu *vcpu);
void (*set_segment)(struct kvm_vcpu *vcpu,
struct kvm_segment *var, int seg);
void (*get_cs_db_l_bits)(struct kvm_vcpu *vcpu, int *db, int *l);
@@ -2358,7 +2359,8 @@ int memslot_rmap_alloc(struct kvm_memory_slot *slot, unsigned long npages);
KVM_X86_QUIRK_MISC_ENABLE_NO_MWAIT | \
KVM_X86_QUIRK_FIX_HYPERCALL_INSN | \
KVM_X86_QUIRK_MWAIT_NEVER_UD_FAULTS | \
- KVM_X86_QUIRK_SLOT_ZAP_ALL)
+ KVM_X86_QUIRK_SLOT_ZAP_ALL | \
+ KVM_X86_QUIRK_STUFF_FEATURE_MSRS)
/*
* KVM previously used a u32 field in kvm_run to indicate the hypercall was
diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h
index a8debbf2f702..88585c1de416 100644
--- a/arch/x86/include/uapi/asm/kvm.h
+++ b/arch/x86/include/uapi/asm/kvm.h
@@ -440,6 +440,7 @@ struct kvm_sync_regs {
#define KVM_X86_QUIRK_FIX_HYPERCALL_INSN (1 << 5)
#define KVM_X86_QUIRK_MWAIT_NEVER_UD_FAULTS (1 << 6)
#define KVM_X86_QUIRK_SLOT_ZAP_ALL (1 << 7)
+#define KVM_X86_QUIRK_STUFF_FEATURE_MSRS (1 << 8)
#define KVM_STATE_NESTED_FORMAT_VMX 0
#define KVM_STATE_NESTED_FORMAT_SVM 1
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index 41786b834b16..d695e7bc41ed 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -690,7 +690,9 @@ void kvm_set_cpu_caps(void)
kvm_cpu_cap_set(X86_FEATURE_TSC_ADJUST);
kvm_cpu_cap_set(X86_FEATURE_ARCH_CAPABILITIES);
- if (boot_cpu_has(X86_FEATURE_IBPB) && boot_cpu_has(X86_FEATURE_IBRS))
+ if (boot_cpu_has(X86_FEATURE_AMD_IBPB_RET) &&
+ boot_cpu_has(X86_FEATURE_AMD_IBPB) &&
+ boot_cpu_has(X86_FEATURE_AMD_IBRS))
kvm_cpu_cap_set(X86_FEATURE_SPEC_CTRL);
if (boot_cpu_has(X86_FEATURE_STIBP))
kvm_cpu_cap_set(X86_FEATURE_INTEL_STIBP);
@@ -755,7 +757,7 @@ void kvm_set_cpu_caps(void)
F(CLZERO) | F(XSAVEERPTR) |
F(WBNOINVD) | F(AMD_IBPB) | F(AMD_IBRS) | F(AMD_SSBD) | F(VIRT_SSBD) |
F(AMD_SSB_NO) | F(AMD_STIBP) | F(AMD_STIBP_ALWAYS_ON) |
- F(AMD_PSFD)
+ F(AMD_PSFD) | F(AMD_IBPB_RET)
);
/*
@@ -763,8 +765,12 @@ void kvm_set_cpu_caps(void)
* arch/x86/kernel/cpu/bugs.c is kind enough to
* record that in cpufeatures so use them.
*/
- if (boot_cpu_has(X86_FEATURE_IBPB))
+ if (boot_cpu_has(X86_FEATURE_IBPB)) {
kvm_cpu_cap_set(X86_FEATURE_AMD_IBPB);
+ if (boot_cpu_has(X86_FEATURE_SPEC_CTRL) &&
+ !boot_cpu_has_bug(X86_BUG_EIBRS_PBRSB))
+ kvm_cpu_cap_set(X86_FEATURE_AMD_IBPB_RET);
+ }
if (boot_cpu_has(X86_FEATURE_IBRS))
kvm_cpu_cap_set(X86_FEATURE_AMD_IBRS);
if (boot_cpu_has(X86_FEATURE_STIBP))
diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h
index 41697cca354e..c8dc66eddefd 100644
--- a/arch/x86/kvm/cpuid.h
+++ b/arch/x86/kvm/cpuid.h
@@ -2,7 +2,6 @@
#ifndef ARCH_X86_KVM_CPUID_H
#define ARCH_X86_KVM_CPUID_H
-#include "x86.h"
#include "reverse_cpuid.h"
#include <asm/cpu.h>
#include <asm/processor.h>
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index e72aed25d721..60986f67c35a 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -651,9 +651,10 @@ static inline u8 ctxt_virt_addr_bits(struct x86_emulate_ctxt *ctxt)
}
static inline bool emul_is_noncanonical_address(u64 la,
- struct x86_emulate_ctxt *ctxt)
+ struct x86_emulate_ctxt *ctxt,
+ unsigned int flags)
{
- return !__is_canonical_address(la, ctxt_virt_addr_bits(ctxt));
+ return !ctxt->ops->is_canonical_addr(ctxt, la, flags);
}
/*
@@ -1733,7 +1734,8 @@ static int __load_segment_descriptor(struct x86_emulate_ctxt *ctxt,
if (ret != X86EMUL_CONTINUE)
return ret;
if (emul_is_noncanonical_address(get_desc_base(&seg_desc) |
- ((u64)base3 << 32), ctxt))
+ ((u64)base3 << 32), ctxt,
+ X86EMUL_F_DT_LOAD))
return emulate_gp(ctxt, err_code);
}
@@ -2516,8 +2518,8 @@ static int em_sysexit(struct x86_emulate_ctxt *ctxt)
ss_sel = cs_sel + 8;
cs.d = 0;
cs.l = 1;
- if (emul_is_noncanonical_address(rcx, ctxt) ||
- emul_is_noncanonical_address(rdx, ctxt))
+ if (emul_is_noncanonical_address(rcx, ctxt, 0) ||
+ emul_is_noncanonical_address(rdx, ctxt, 0))
return emulate_gp(ctxt, 0);
break;
}
@@ -3494,7 +3496,8 @@ static int em_lgdt_lidt(struct x86_emulate_ctxt *ctxt, bool lgdt)
if (rc != X86EMUL_CONTINUE)
return rc;
if (ctxt->mode == X86EMUL_MODE_PROT64 &&
- emul_is_noncanonical_address(desc_ptr.address, ctxt))
+ emul_is_noncanonical_address(desc_ptr.address, ctxt,
+ X86EMUL_F_DT_LOAD))
return emulate_gp(ctxt, 0);
if (lgdt)
ctxt->ops->set_gdt(ctxt, &desc_ptr);
diff --git a/arch/x86/kvm/kvm_cache_regs.h b/arch/x86/kvm/kvm_cache_regs.h
index b1eb46e26b2e..36a8786db291 100644
--- a/arch/x86/kvm/kvm_cache_regs.h
+++ b/arch/x86/kvm/kvm_cache_regs.h
@@ -44,6 +44,18 @@ BUILD_KVM_GPR_ACCESSORS(r15, R15)
#endif
/*
+ * Using the register cache from interrupt context is generally not allowed, as
+ * caching a register and marking it available/dirty can't be done atomically,
+ * i.e. accesses from interrupt context may clobber state or read stale data if
+ * the vCPU task is in the process of updating the cache. The exception is if
+ * KVM is handling a PMI IRQ/NMI VM-Exit, as that bound code sequence doesn't
+ * touch the cache, it runs after the cache is reset (post VM-Exit), and PMIs
+ * need to access several registers that are cacheable.
+ */
+#define kvm_assert_register_caching_allowed(vcpu) \
+ lockdep_assert_once(in_task() || kvm_arch_pmi_in_guest(vcpu))
+
+/*
* avail dirty
* 0 0 register in VMCS/VMCB
* 0 1 *INVALID*
@@ -53,24 +65,28 @@ BUILD_KVM_GPR_ACCESSORS(r15, R15)
static inline bool kvm_register_is_available(struct kvm_vcpu *vcpu,
enum kvm_reg reg)
{
+ kvm_assert_register_caching_allowed(vcpu);
return test_bit(reg, (unsigned long *)&vcpu->arch.regs_avail);
}
static inline bool kvm_register_is_dirty(struct kvm_vcpu *vcpu,
enum kvm_reg reg)
{
+ kvm_assert_register_caching_allowed(vcpu);
return test_bit(reg, (unsigned long *)&vcpu->arch.regs_dirty);
}
static inline void kvm_register_mark_available(struct kvm_vcpu *vcpu,
enum kvm_reg reg)
{
+ kvm_assert_register_caching_allowed(vcpu);
__set_bit(reg, (unsigned long *)&vcpu->arch.regs_avail);
}
static inline void kvm_register_mark_dirty(struct kvm_vcpu *vcpu,
enum kvm_reg reg)
{
+ kvm_assert_register_caching_allowed(vcpu);
__set_bit(reg, (unsigned long *)&vcpu->arch.regs_avail);
__set_bit(reg, (unsigned long *)&vcpu->arch.regs_dirty);
}
@@ -84,6 +100,7 @@ static inline void kvm_register_mark_dirty(struct kvm_vcpu *vcpu,
static __always_inline bool kvm_register_test_and_mark_available(struct kvm_vcpu *vcpu,
enum kvm_reg reg)
{
+ kvm_assert_register_caching_allowed(vcpu);
return arch___test_and_set_bit(reg, (unsigned long *)&vcpu->arch.regs_avail);
}
diff --git a/arch/x86/kvm/kvm_emulate.h b/arch/x86/kvm/kvm_emulate.h
index 55a18e2f2dcd..10495fffb890 100644
--- a/arch/x86/kvm/kvm_emulate.h
+++ b/arch/x86/kvm/kvm_emulate.h
@@ -94,6 +94,8 @@ struct x86_instruction_info {
#define X86EMUL_F_FETCH BIT(1)
#define X86EMUL_F_IMPLICIT BIT(2)
#define X86EMUL_F_INVLPG BIT(3)
+#define X86EMUL_F_MSR BIT(4)
+#define X86EMUL_F_DT_LOAD BIT(5)
struct x86_emulate_ops {
void (*vm_bugged)(struct x86_emulate_ctxt *ctxt);
@@ -235,6 +237,9 @@ struct x86_emulate_ops {
gva_t (*get_untagged_addr)(struct x86_emulate_ctxt *ctxt, gva_t addr,
unsigned int flags);
+
+ bool (*is_canonical_addr)(struct x86_emulate_ctxt *ctxt, gva_t addr,
+ unsigned int flags);
};
/* Type, address-of, and value of an instruction's operand. */
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 65412640cfc7..59a64b703aad 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -382,7 +382,7 @@ enum {
DIRTY
};
-void kvm_recalculate_apic_map(struct kvm *kvm)
+static void kvm_recalculate_apic_map(struct kvm *kvm)
{
struct kvm_apic_map *new, *old = NULL;
struct kvm_vcpu *vcpu;
@@ -2577,7 +2577,7 @@ u64 kvm_lapic_get_cr8(struct kvm_vcpu *vcpu)
return (tpr & 0xf0) >> 4;
}
-void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value)
+static void __kvm_apic_set_base(struct kvm_vcpu *vcpu, u64 value)
{
u64 old_value = vcpu->arch.apic_base;
struct kvm_lapic *apic = vcpu->arch.apic;
@@ -2625,6 +2625,31 @@ void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value)
}
}
+int kvm_apic_set_base(struct kvm_vcpu *vcpu, u64 value, bool host_initiated)
+{
+ enum lapic_mode old_mode = kvm_get_apic_mode(vcpu);
+ enum lapic_mode new_mode = kvm_apic_mode(value);
+
+ if (vcpu->arch.apic_base == value)
+ return 0;
+
+ u64 reserved_bits = kvm_vcpu_reserved_gpa_bits_raw(vcpu) | 0x2ff |
+ (guest_cpuid_has(vcpu, X86_FEATURE_X2APIC) ? 0 : X2APIC_ENABLE);
+
+ if ((value & reserved_bits) != 0 || new_mode == LAPIC_MODE_INVALID)
+ return 1;
+ if (!host_initiated) {
+ if (old_mode == LAPIC_MODE_X2APIC && new_mode == LAPIC_MODE_XAPIC)
+ return 1;
+ if (old_mode == LAPIC_MODE_DISABLED && new_mode == LAPIC_MODE_X2APIC)
+ return 1;
+ }
+
+ __kvm_apic_set_base(vcpu, value);
+ kvm_recalculate_apic_map(vcpu->kvm);
+ return 0;
+}
+
void kvm_apic_update_apicv(struct kvm_vcpu *vcpu)
{
struct kvm_lapic *apic = vcpu->arch.apic;
@@ -2716,7 +2741,14 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu, bool init_event)
msr_val = APIC_DEFAULT_PHYS_BASE | MSR_IA32_APICBASE_ENABLE;
if (kvm_vcpu_is_reset_bsp(vcpu))
msr_val |= MSR_IA32_APICBASE_BSP;
- kvm_lapic_set_base(vcpu, msr_val);
+
+ /*
+ * Use the inner helper to avoid an extra recalcuation of the
+ * optimized APIC map if some other task has dirtied the map.
+ * The recalculation needed for this vCPU will be done after
+ * all APIC state has been initialized (see below).
+ */
+ __kvm_apic_set_base(vcpu, msr_val);
}
if (!apic)
@@ -3057,7 +3089,6 @@ int kvm_apic_set_state(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s)
kvm_x86_call(apicv_pre_state_restore)(vcpu);
- kvm_lapic_set_base(vcpu, vcpu->arch.apic_base);
/* set SPIV separately to get count of SW disabled APICs right */
apic_set_spiv(apic, *((u32 *)(s->regs + APIC_SPIV)));
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
index 1b8ef9856422..24add38beaf0 100644
--- a/arch/x86/kvm/lapic.h
+++ b/arch/x86/kvm/lapic.h
@@ -95,8 +95,6 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu, bool init_event);
u64 kvm_lapic_get_cr8(struct kvm_vcpu *vcpu);
void kvm_lapic_set_tpr(struct kvm_vcpu *vcpu, unsigned long cr8);
void kvm_lapic_set_eoi(struct kvm_vcpu *vcpu);
-void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value);
-void kvm_recalculate_apic_map(struct kvm *kvm);
void kvm_apic_set_version(struct kvm_vcpu *vcpu);
void kvm_apic_after_set_mcg_cap(struct kvm_vcpu *vcpu);
bool kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source,
@@ -117,11 +115,9 @@ bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src,
struct kvm_lapic_irq *irq, int *r, struct dest_map *dest_map);
void kvm_apic_send_ipi(struct kvm_lapic *apic, u32 icr_low, u32 icr_high);
-u64 kvm_get_apic_base(struct kvm_vcpu *vcpu);
-int kvm_set_apic_base(struct kvm_vcpu *vcpu, struct msr_data *msr_info);
+int kvm_apic_set_base(struct kvm_vcpu *vcpu, u64 value, bool host_initiated);
int kvm_apic_get_state(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s);
int kvm_apic_set_state(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s);
-enum lapic_mode kvm_get_apic_mode(struct kvm_vcpu *vcpu);
int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu);
u64 kvm_get_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu);
@@ -271,6 +267,11 @@ static inline enum lapic_mode kvm_apic_mode(u64 apic_base)
return apic_base & (MSR_IA32_APICBASE_ENABLE | X2APIC_ENABLE);
}
+static inline enum lapic_mode kvm_get_apic_mode(struct kvm_vcpu *vcpu)
+{
+ return kvm_apic_mode(vcpu->arch.apic_base);
+}
+
static inline u8 kvm_xapic_id(struct kvm_lapic *apic)
{
return kvm_lapic_get_reg(apic, APIC_ID) >> 24;
diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h
index 9dc5dd43ae7f..e9322358678b 100644
--- a/arch/x86/kvm/mmu.h
+++ b/arch/x86/kvm/mmu.h
@@ -4,6 +4,7 @@
#include <linux/kvm_host.h>
#include "kvm_cache_regs.h"
+#include "x86.h"
#include "cpuid.h"
extern bool __read_mostly enable_mmio_caching;
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 38128e7b9af1..d7b391fe2c23 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -6179,7 +6179,7 @@ void kvm_mmu_invalidate_addr(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
/* It's actually a GPA for vcpu->arch.guest_mmu. */
if (mmu != &vcpu->arch.guest_mmu) {
/* INVLPG on a non-canonical address is a NOP according to the SDM. */
- if (is_noncanonical_address(addr, vcpu))
+ if (is_noncanonical_invlpg_address(addr, vcpu))
return;
kvm_x86_call(flush_tlb_gva)(vcpu, addr);
diff --git a/arch/x86/kvm/mtrr.c b/arch/x86/kvm/mtrr.c
index 05490b9d8a43..6f74e2b27c1e 100644
--- a/arch/x86/kvm/mtrr.c
+++ b/arch/x86/kvm/mtrr.c
@@ -19,6 +19,7 @@
#include <asm/mtrr.h>
#include "cpuid.h"
+#include "x86.h"
static u64 *find_mtrr(struct kvm_vcpu *vcpu, unsigned int msr)
{
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index c1e29307826b..dd15cc635655 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -1390,7 +1390,9 @@ static void __svm_vcpu_reset(struct kvm_vcpu *vcpu)
svm_vcpu_init_msrpm(vcpu, svm->msrpm);
svm_init_osvw(vcpu);
- vcpu->arch.microcode_version = 0x01000065;
+
+ if (kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_STUFF_FEATURE_MSRS))
+ vcpu->arch.microcode_version = 0x01000065;
svm->tsc_ratio_msr = kvm_caps.default_tsc_scaling_ratio;
svm->nmi_masked = false;
@@ -5031,6 +5033,7 @@ static struct kvm_x86_ops svm_x86_ops __initdata = {
.get_segment = svm_get_segment,
.set_segment = svm_set_segment,
.get_cpl = svm_get_cpl,
+ .get_cpl_no_cache = svm_get_cpl,
.get_cs_db_l_bits = svm_get_cs_db_l_bits,
.is_valid_cr0 = svm_is_valid_cr0,
.set_cr0 = svm_set_cr0,
diff --git a/arch/x86/kvm/vmx/hyperv.c b/arch/x86/kvm/vmx/hyperv.c
index fab6a1ad98dc..fa41d036acd4 100644
--- a/arch/x86/kvm/vmx/hyperv.c
+++ b/arch/x86/kvm/vmx/hyperv.c
@@ -4,6 +4,7 @@
#include <linux/errno.h>
#include <linux/smp.h>
+#include "x86.h"
#include "../cpuid.h"
#include "hyperv.h"
#include "nested.h"
diff --git a/arch/x86/kvm/vmx/main.c b/arch/x86/kvm/vmx/main.c
index 7668e2fb8043..92d35cc6cd15 100644
--- a/arch/x86/kvm/vmx/main.c
+++ b/arch/x86/kvm/vmx/main.c
@@ -50,6 +50,7 @@ struct kvm_x86_ops vt_x86_ops __initdata = {
.get_segment = vmx_get_segment,
.set_segment = vmx_set_segment,
.get_cpl = vmx_get_cpl,
+ .get_cpl_no_cache = vmx_get_cpl_no_cache,
.get_cs_db_l_bits = vmx_get_cs_db_l_bits,
.is_valid_cr0 = vmx_is_valid_cr0,
.set_cr0 = vmx_set_cr0,
diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index 259fe445e695..746cb41c5b98 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -7,6 +7,7 @@
#include <asm/debugreg.h>
#include <asm/mmu_context.h>
+#include "x86.h"
#include "cpuid.h"
#include "hyperv.h"
#include "mmu.h"
@@ -16,7 +17,6 @@
#include "sgx.h"
#include "trace.h"
#include "vmx.h"
-#include "x86.h"
#include "smm.h"
static bool __read_mostly enable_shadow_vmcs = 1;
@@ -2996,6 +2996,17 @@ static int nested_vmx_check_address_space_size(struct kvm_vcpu *vcpu,
return 0;
}
+static bool is_l1_noncanonical_address_on_vmexit(u64 la, struct vmcs12 *vmcs12)
+{
+ /*
+ * Check that the given linear address is canonical after a VM exit
+ * from L2, based on HOST_CR4.LA57 value that will be loaded for L1.
+ */
+ u8 l1_address_bits_on_exit = (vmcs12->host_cr4 & X86_CR4_LA57) ? 57 : 48;
+
+ return !__is_canonical_address(la, l1_address_bits_on_exit);
+}
+
static int nested_vmx_check_host_state(struct kvm_vcpu *vcpu,
struct vmcs12 *vmcs12)
{
@@ -3006,8 +3017,8 @@ static int nested_vmx_check_host_state(struct kvm_vcpu *vcpu,
CC(!kvm_vcpu_is_legal_cr3(vcpu, vmcs12->host_cr3)))
return -EINVAL;
- if (CC(is_noncanonical_address(vmcs12->host_ia32_sysenter_esp, vcpu)) ||
- CC(is_noncanonical_address(vmcs12->host_ia32_sysenter_eip, vcpu)))
+ if (CC(is_noncanonical_msr_address(vmcs12->host_ia32_sysenter_esp, vcpu)) ||
+ CC(is_noncanonical_msr_address(vmcs12->host_ia32_sysenter_eip, vcpu)))
return -EINVAL;
if ((vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_PAT) &&
@@ -3041,12 +3052,12 @@ static int nested_vmx_check_host_state(struct kvm_vcpu *vcpu,
CC(vmcs12->host_ss_selector == 0 && !ia32e))
return -EINVAL;
- if (CC(is_noncanonical_address(vmcs12->host_fs_base, vcpu)) ||
- CC(is_noncanonical_address(vmcs12->host_gs_base, vcpu)) ||
- CC(is_noncanonical_address(vmcs12->host_gdtr_base, vcpu)) ||
- CC(is_noncanonical_address(vmcs12->host_idtr_base, vcpu)) ||
- CC(is_noncanonical_address(vmcs12->host_tr_base, vcpu)) ||
- CC(is_noncanonical_address(vmcs12->host_rip, vcpu)))
+ if (CC(is_noncanonical_base_address(vmcs12->host_fs_base, vcpu)) ||
+ CC(is_noncanonical_base_address(vmcs12->host_gs_base, vcpu)) ||
+ CC(is_noncanonical_base_address(vmcs12->host_gdtr_base, vcpu)) ||
+ CC(is_noncanonical_base_address(vmcs12->host_idtr_base, vcpu)) ||
+ CC(is_noncanonical_base_address(vmcs12->host_tr_base, vcpu)) ||
+ CC(is_l1_noncanonical_address_on_vmexit(vmcs12->host_rip, vmcs12)))
return -EINVAL;
/*
@@ -3164,7 +3175,7 @@ static int nested_vmx_check_guest_state(struct kvm_vcpu *vcpu,
}
if ((vmcs12->vm_entry_controls & VM_ENTRY_LOAD_BNDCFGS) &&
- (CC(is_noncanonical_address(vmcs12->guest_bndcfgs & PAGE_MASK, vcpu)) ||
+ (CC(is_noncanonical_msr_address(vmcs12->guest_bndcfgs & PAGE_MASK, vcpu)) ||
CC((vmcs12->guest_bndcfgs & MSR_IA32_BNDCFGS_RSVD))))
return -EINVAL;
@@ -5149,7 +5160,7 @@ int get_vmx_mem_address(struct kvm_vcpu *vcpu, unsigned long exit_qualification,
* non-canonical form. This is the only check on the memory
* destination for long mode!
*/
- exn = is_noncanonical_address(*ret, vcpu);
+ exn = is_noncanonical_address(*ret, vcpu, 0);
} else {
/*
* When not in long mode, the virtual/linear address is
@@ -5954,7 +5965,7 @@ static int handle_invvpid(struct kvm_vcpu *vcpu)
* invalidation.
*/
if (!operand.vpid ||
- is_noncanonical_address(operand.gla, vcpu))
+ is_noncanonical_invlpg_address(operand.gla, vcpu))
return nested_vmx_fail(vcpu,
VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID);
vpid_sync_vcpu_addr(vpid02, operand.gla);
diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c
index 83382a4d1d66..9c9d4a336166 100644
--- a/arch/x86/kvm/vmx/pmu_intel.c
+++ b/arch/x86/kvm/vmx/pmu_intel.c
@@ -365,7 +365,7 @@ static int intel_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
}
break;
case MSR_IA32_DS_AREA:
- if (is_noncanonical_address(data, vcpu))
+ if (is_noncanonical_msr_address(data, vcpu))
return 1;
pmu->ds_area = data;
diff --git a/arch/x86/kvm/vmx/sgx.c b/arch/x86/kvm/vmx/sgx.c
index a3c3d2a51f47..b352a3ba7354 100644
--- a/arch/x86/kvm/vmx/sgx.c
+++ b/arch/x86/kvm/vmx/sgx.c
@@ -4,12 +4,11 @@
#include <asm/sgx.h>
-#include "cpuid.h"
+#include "x86.h"
#include "kvm_cache_regs.h"
#include "nested.h"
#include "sgx.h"
#include "vmx.h"
-#include "x86.h"
bool __read_mostly enable_sgx = 1;
module_param_named(sgx, enable_sgx, bool, 0444);
@@ -38,7 +37,7 @@ static int sgx_get_encls_gva(struct kvm_vcpu *vcpu, unsigned long offset,
fault = true;
} else if (likely(is_64_bit_mode(vcpu))) {
*gva = vmx_get_untagged_addr(vcpu, *gva, 0);
- fault = is_noncanonical_address(*gva, vcpu);
+ fault = is_noncanonical_address(*gva, vcpu, 0);
} else {
*gva &= 0xffffffff;
fault = (s.unusable) ||
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index b1bb64890cb2..6ed801ffe33f 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -2282,7 +2282,7 @@ int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
(!msr_info->host_initiated &&
!guest_cpuid_has(vcpu, X86_FEATURE_MPX)))
return 1;
- if (is_noncanonical_address(data & PAGE_MASK, vcpu) ||
+ if (is_noncanonical_msr_address(data & PAGE_MASK, vcpu) ||
(data & MSR_IA32_BNDCFGS_RSVD))
return 1;
@@ -2447,7 +2447,7 @@ int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
index = msr_info->index - MSR_IA32_RTIT_ADDR0_A;
if (index >= 2 * vmx->pt_desc.num_address_ranges)
return 1;
- if (is_noncanonical_address(data, vcpu))
+ if (is_noncanonical_msr_address(data, vcpu))
return 1;
if (index % 2)
vmx->pt_desc.guest.addr_b[index / 2] = data;
@@ -2455,8 +2455,6 @@ int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
vmx->pt_desc.guest.addr_a[index / 2] = data;
break;
case MSR_IA32_PERF_CAPABILITIES:
- if (data && !vcpu_to_pmu(vcpu)->version)
- return 1;
if (data & PMU_CAP_LBR_FMT) {
if ((data & PMU_CAP_LBR_FMT) !=
(kvm_caps.supported_perf_cap & PMU_CAP_LBR_FMT))
@@ -3567,16 +3565,29 @@ u64 vmx_get_segment_base(struct kvm_vcpu *vcpu, int seg)
return vmx_read_guest_seg_base(to_vmx(vcpu), seg);
}
-int vmx_get_cpl(struct kvm_vcpu *vcpu)
+static int __vmx_get_cpl(struct kvm_vcpu *vcpu, bool no_cache)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
+ int ar;
if (unlikely(vmx->rmode.vm86_active))
return 0;
- else {
- int ar = vmx_read_guest_seg_ar(vmx, VCPU_SREG_SS);
- return VMX_AR_DPL(ar);
- }
+
+ if (no_cache)
+ ar = vmcs_read32(GUEST_SS_AR_BYTES);
+ else
+ ar = vmx_read_guest_seg_ar(vmx, VCPU_SREG_SS);
+ return VMX_AR_DPL(ar);
+}
+
+int vmx_get_cpl(struct kvm_vcpu *vcpu)
+{
+ return __vmx_get_cpl(vcpu, false);
+}
+
+int vmx_get_cpl_no_cache(struct kvm_vcpu *vcpu)
+{
+ return __vmx_get_cpl(vcpu, true);
}
static u32 vmx_segment_access_rights(struct kvm_segment *var)
@@ -4558,7 +4569,8 @@ vmx_adjust_secondary_exec_control(struct vcpu_vmx *vmx, u32 *exec_control,
* Update the nested MSR settings so that a nested VMM can/can't set
* controls for features that are/aren't exposed to the guest.
*/
- if (nested) {
+ if (nested &&
+ kvm_check_has_quirk(vmx->vcpu.kvm, KVM_X86_QUIRK_STUFF_FEATURE_MSRS)) {
/*
* All features that can be added or removed to VMX MSRs must
* be supported in the first place for nested virtualization.
@@ -4848,7 +4860,8 @@ static void __vmx_vcpu_reset(struct kvm_vcpu *vcpu)
init_vmcs(vmx);
- if (nested)
+ if (nested &&
+ kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_STUFF_FEATURE_MSRS))
memcpy(&vmx->nested.msrs, &vmcs_config.nested, sizeof(vmx->nested.msrs));
vcpu_setup_sgx_lepubkeyhash(vcpu);
@@ -4861,7 +4874,8 @@ static void __vmx_vcpu_reset(struct kvm_vcpu *vcpu)
vmx->nested.hv_evmcs_vmptr = EVMPTR_INVALID;
#endif
- vcpu->arch.microcode_version = 0x100000000ULL;
+ if (kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_STUFF_FEATURE_MSRS))
+ vcpu->arch.microcode_version = 0x100000000ULL;
vmx->msr_ia32_feature_control_valid_bits = FEAT_CTL_LOCKED;
/*
diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h
index 40303b43da6c..43f573f6ca46 100644
--- a/arch/x86/kvm/vmx/vmx.h
+++ b/arch/x86/kvm/vmx/vmx.h
@@ -383,6 +383,7 @@ void vmx_prepare_switch_to_guest(struct kvm_vcpu *vcpu);
void vmx_set_host_fs_gs(struct vmcs_host_state *host, u16 fs_sel, u16 gs_sel,
unsigned long fs_base, unsigned long gs_base);
int vmx_get_cpl(struct kvm_vcpu *vcpu);
+int vmx_get_cpl_no_cache(struct kvm_vcpu *vcpu);
bool vmx_emulation_required(struct kvm_vcpu *vcpu);
unsigned long vmx_get_rflags(struct kvm_vcpu *vcpu);
void vmx_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags);
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 8507b300ff43..8637bc001096 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -451,6 +451,7 @@ static const u32 msr_based_features_all_except_vmx[] = {
MSR_IA32_UCODE_REV,
MSR_IA32_ARCH_CAPABILITIES,
MSR_IA32_PERF_CAPABILITIES,
+ MSR_PLATFORM_INFO,
};
static u32 msr_based_features[ARRAY_SIZE(msr_based_features_all_except_vmx) +
@@ -667,38 +668,6 @@ static void drop_user_return_notifiers(void)
kvm_on_user_return(&msrs->urn);
}
-u64 kvm_get_apic_base(struct kvm_vcpu *vcpu)
-{
- return vcpu->arch.apic_base;
-}
-
-enum lapic_mode kvm_get_apic_mode(struct kvm_vcpu *vcpu)
-{
- return kvm_apic_mode(kvm_get_apic_base(vcpu));
-}
-EXPORT_SYMBOL_GPL(kvm_get_apic_mode);
-
-int kvm_set_apic_base(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
-{
- enum lapic_mode old_mode = kvm_get_apic_mode(vcpu);
- enum lapic_mode new_mode = kvm_apic_mode(msr_info->data);
- u64 reserved_bits = kvm_vcpu_reserved_gpa_bits_raw(vcpu) | 0x2ff |
- (guest_cpuid_has(vcpu, X86_FEATURE_X2APIC) ? 0 : X2APIC_ENABLE);
-
- if ((msr_info->data & reserved_bits) != 0 || new_mode == LAPIC_MODE_INVALID)
- return 1;
- if (!msr_info->host_initiated) {
- if (old_mode == LAPIC_MODE_X2APIC && new_mode == LAPIC_MODE_XAPIC)
- return 1;
- if (old_mode == LAPIC_MODE_DISABLED && new_mode == LAPIC_MODE_X2APIC)
- return 1;
- }
-
- kvm_lapic_set_base(vcpu, msr_info->data);
- kvm_recalculate_apic_map(vcpu->kvm);
- return 0;
-}
-
/*
* Handle a fault on a hardware virtualization (VMX or SVM) instruction.
*
@@ -1854,7 +1823,7 @@ static int __kvm_set_msr(struct kvm_vcpu *vcpu, u32 index, u64 data,
case MSR_KERNEL_GS_BASE:
case MSR_CSTAR:
case MSR_LSTAR:
- if (is_noncanonical_address(data, vcpu))
+ if (is_noncanonical_msr_address(data, vcpu))
return 1;
break;
case MSR_IA32_SYSENTER_EIP:
@@ -1871,7 +1840,7 @@ static int __kvm_set_msr(struct kvm_vcpu *vcpu, u32 index, u64 data,
* value, and that something deterministic happens if the guest
* invokes 64-bit SYSENTER.
*/
- data = __canonical_address(data, vcpu_virt_addr_bits(vcpu));
+ data = __canonical_address(data, max_host_virt_addr_bits());
break;
case MSR_TSC_AUX:
if (!kvm_is_supported_user_return_msr(MSR_TSC_AUX))
@@ -2144,8 +2113,9 @@ EXPORT_SYMBOL_GPL(kvm_emulate_monitor);
static inline bool kvm_vcpu_exit_request(struct kvm_vcpu *vcpu)
{
xfer_to_guest_mode_prepare();
- return vcpu->mode == EXITING_GUEST_MODE || kvm_request_pending(vcpu) ||
- xfer_to_guest_mode_work_pending();
+
+ return READ_ONCE(vcpu->mode) == EXITING_GUEST_MODE ||
+ kvm_request_pending(vcpu) || xfer_to_guest_mode_work_pending();
}
/*
@@ -3793,13 +3763,16 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
vcpu->arch.microcode_version = data;
break;
case MSR_IA32_ARCH_CAPABILITIES:
- if (!msr_info->host_initiated)
- return 1;
+ if (!msr_info->host_initiated ||
+ !guest_cpuid_has(vcpu, X86_FEATURE_ARCH_CAPABILITIES))
+ return KVM_MSR_RET_UNSUPPORTED;
vcpu->arch.arch_capabilities = data;
break;
case MSR_IA32_PERF_CAPABILITIES:
- if (!msr_info->host_initiated)
- return 1;
+ if (!msr_info->host_initiated ||
+ !guest_cpuid_has(vcpu, X86_FEATURE_PDCM))
+ return KVM_MSR_RET_UNSUPPORTED;
+
if (data & ~kvm_caps.supported_perf_cap)
return 1;
@@ -3890,7 +3863,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
case MSR_MTRRdefType:
return kvm_mtrr_set_msr(vcpu, msr, data);
case MSR_IA32_APICBASE:
- return kvm_set_apic_base(vcpu, msr_info);
+ return kvm_apic_set_base(vcpu, data, msr_info->host_initiated);
case APIC_BASE_MSR ... APIC_BASE_MSR + 0xff:
return kvm_x2apic_msr_write(vcpu, msr, data);
case MSR_IA32_TSC_DEADLINE:
@@ -4111,9 +4084,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
vcpu->arch.osvw.status = data;
break;
case MSR_PLATFORM_INFO:
- if (!msr_info->host_initiated ||
- (!(data & MSR_PLATFORM_INFO_CPUID_FAULT) &&
- cpuid_fault_enabled(vcpu)))
+ if (!msr_info->host_initiated)
return 1;
vcpu->arch.msr_platform_info = data;
break;
@@ -4252,15 +4223,13 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
msr_info->data = vcpu->arch.microcode_version;
break;
case MSR_IA32_ARCH_CAPABILITIES:
- if (!msr_info->host_initiated &&
- !guest_cpuid_has(vcpu, X86_FEATURE_ARCH_CAPABILITIES))
- return 1;
+ if (!guest_cpuid_has(vcpu, X86_FEATURE_ARCH_CAPABILITIES))
+ return KVM_MSR_RET_UNSUPPORTED;
msr_info->data = vcpu->arch.arch_capabilities;
break;
case MSR_IA32_PERF_CAPABILITIES:
- if (!msr_info->host_initiated &&
- !guest_cpuid_has(vcpu, X86_FEATURE_PDCM))
- return 1;
+ if (!guest_cpuid_has(vcpu, X86_FEATURE_PDCM))
+ return KVM_MSR_RET_UNSUPPORTED;
msr_info->data = vcpu->arch.perf_capabilities;
break;
case MSR_IA32_POWER_CTL:
@@ -4314,7 +4283,7 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
msr_info->data = 1 << 24;
break;
case MSR_IA32_APICBASE:
- msr_info->data = kvm_get_apic_base(vcpu);
+ msr_info->data = vcpu->arch.apic_base;
break;
case APIC_BASE_MSR ... APIC_BASE_MSR + 0xff:
return kvm_x2apic_msr_read(vcpu, msr_info->index, &msr_info->data);
@@ -5094,7 +5063,13 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
int idx;
if (vcpu->preempted) {
- vcpu->arch.preempted_in_kernel = kvm_arch_vcpu_in_kernel(vcpu);
+ /*
+ * Assume protected guests are in-kernel. Inefficient yielding
+ * due to false positives is preferable to never yielding due
+ * to false negatives.
+ */
+ vcpu->arch.preempted_in_kernel = vcpu->arch.guest_state_protected ||
+ !kvm_x86_call(get_cpl_no_cache)(vcpu);
/*
* Take the srcu lock as memslots will be accessed to check the gfn
@@ -8612,6 +8587,12 @@ static gva_t emulator_get_untagged_addr(struct x86_emulate_ctxt *ctxt,
addr, flags);
}
+static bool emulator_is_canonical_addr(struct x86_emulate_ctxt *ctxt,
+ gva_t addr, unsigned int flags)
+{
+ return !is_noncanonical_address(addr, emul_to_vcpu(ctxt), flags);
+}
+
static const struct x86_emulate_ops emulate_ops = {
.vm_bugged = emulator_vm_bugged,
.read_gpr = emulator_read_gpr,
@@ -8658,6 +8639,7 @@ static const struct x86_emulate_ops emulate_ops = {
.triple_fault = emulator_triple_fault,
.set_xcr = emulator_set_xcr,
.get_untagged_addr = emulator_get_untagged_addr,
+ .is_canonical_addr = emulator_is_canonical_addr,
};
static void toggle_interruptibility(struct kvm_vcpu *vcpu, u32 mask)
@@ -10159,7 +10141,7 @@ static void post_kvm_run_save(struct kvm_vcpu *vcpu)
kvm_run->if_flag = kvm_x86_call(get_if_flag)(vcpu);
kvm_run->cr8 = kvm_get_cr8(vcpu);
- kvm_run->apic_base = kvm_get_apic_base(vcpu);
+ kvm_run->apic_base = vcpu->arch.apic_base;
kvm_run->ready_for_interrupt_injection =
pic_in_kernel(vcpu->kvm) ||
@@ -10576,8 +10558,8 @@ static void kvm_vcpu_update_apicv(struct kvm_vcpu *vcpu)
* deleted if any vCPU has xAPIC virtualization and x2APIC enabled, but
* and hardware doesn't support x2APIC virtualization. E.g. some AMD
* CPUs support AVIC but not x2APIC. KVM still allows enabling AVIC in
- * this case so that KVM can the AVIC doorbell to inject interrupts to
- * running vCPUs, but KVM must not create SPTEs for the APIC base as
+ * this case so that KVM can use the AVIC doorbell to inject interrupts
+ * to running vCPUs, but KVM must not create SPTEs for the APIC base as
* the vCPU would incorrectly be able to access the vAPIC page via MMIO
* despite being in x2APIC mode. For simplicity, inhibiting the APIC
* access page is sticky.
@@ -10606,11 +10588,11 @@ void __kvm_set_or_clear_apicv_inhibit(struct kvm *kvm,
if (!!old != !!new) {
/*
* Kick all vCPUs before setting apicv_inhibit_reasons to avoid
- * false positives in the sanity check WARN in svm_vcpu_run().
+ * false positives in the sanity check WARN in vcpu_enter_guest().
* This task will wait for all vCPUs to ack the kick IRQ before
* updating apicv_inhibit_reasons, and all other vCPUs will
* block on acquiring apicv_update_lock so that vCPUs can't
- * redo svm_vcpu_run() without seeing the new inhibit state.
+ * redo vcpu_enter_guest() without seeing the new inhibit state.
*
* Note, holding apicv_update_lock and taking it in the read
* side (handling the request) also prevents other vCPUs from
@@ -11711,7 +11693,7 @@ skip_protected_regs:
sregs->cr4 = kvm_read_cr4(vcpu);
sregs->cr8 = kvm_get_cr8(vcpu);
sregs->efer = vcpu->arch.efer;
- sregs->apic_base = kvm_get_apic_base(vcpu);
+ sregs->apic_base = vcpu->arch.apic_base;
}
static void __get_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
@@ -11888,16 +11870,13 @@ static bool kvm_is_valid_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
static int __set_sregs_common(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs,
int *mmu_reset_needed, bool update_pdptrs)
{
- struct msr_data apic_base_msr;
int idx;
struct desc_ptr dt;
if (!kvm_is_valid_sregs(vcpu, sregs))
return -EINVAL;
- apic_base_msr.data = sregs->apic_base;
- apic_base_msr.host_initiated = true;
- if (kvm_set_apic_base(vcpu, &apic_base_msr))
+ if (kvm_apic_set_base(vcpu, sregs->apic_base, true))
return -EINVAL;
if (vcpu->arch.guest_state_protected)
@@ -12299,7 +12278,11 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
kvm_async_pf_hash_reset(vcpu);
- vcpu->arch.perf_capabilities = kvm_caps.supported_perf_cap;
+ if (kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_STUFF_FEATURE_MSRS)) {
+ vcpu->arch.arch_capabilities = kvm_get_arch_capabilities();
+ vcpu->arch.msr_platform_info = MSR_PLATFORM_INFO_CPUID_FAULT;
+ vcpu->arch.perf_capabilities = kvm_caps.supported_perf_cap;
+ }
kvm_pmu_init(vcpu);
vcpu->arch.pending_external_vector = -1;
@@ -12313,8 +12296,6 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
if (r)
goto free_guest_fpu;
- vcpu->arch.arch_capabilities = kvm_get_arch_capabilities();
- vcpu->arch.msr_platform_info = MSR_PLATFORM_INFO_CPUID_FAULT;
kvm_xen_init_vcpu(vcpu);
vcpu_load(vcpu);
kvm_set_tsc_khz(vcpu, vcpu->kvm->arch.default_tsc_khz);
@@ -13203,6 +13184,8 @@ void kvm_arch_commit_memory_region(struct kvm *kvm,
bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
{
+ WARN_ON_ONCE(!kvm_arch_pmi_in_guest(vcpu));
+
if (vcpu->arch.guest_state_protected)
return true;
@@ -13211,6 +13194,11 @@ bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
unsigned long kvm_arch_vcpu_get_ip(struct kvm_vcpu *vcpu)
{
+ WARN_ON_ONCE(!kvm_arch_pmi_in_guest(vcpu));
+
+ if (vcpu->arch.guest_state_protected)
+ return 0;
+
return kvm_rip_read(vcpu);
}
@@ -13726,7 +13714,7 @@ int kvm_handle_invpcid(struct kvm_vcpu *vcpu, unsigned long type, gva_t gva)
* invalidation.
*/
if ((!pcid_enabled && (operand.pcid != 0)) ||
- is_noncanonical_address(operand.gla, vcpu)) {
+ is_noncanonical_invlpg_address(operand.gla, vcpu)) {
kvm_inject_gp(vcpu, 0);
return 1;
}
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index a84c48ef5278..ec623d23d13d 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -8,6 +8,7 @@
#include <asm/pvclock.h>
#include "kvm_cache_regs.h"
#include "kvm_emulate.h"
+#include "cpuid.h"
struct kvm_caps {
/* control of guest tsc rate supported? */
@@ -233,9 +234,52 @@ static inline u8 vcpu_virt_addr_bits(struct kvm_vcpu *vcpu)
return kvm_is_cr4_bit_set(vcpu, X86_CR4_LA57) ? 57 : 48;
}
-static inline bool is_noncanonical_address(u64 la, struct kvm_vcpu *vcpu)
+static inline u8 max_host_virt_addr_bits(void)
{
- return !__is_canonical_address(la, vcpu_virt_addr_bits(vcpu));
+ return kvm_cpu_cap_has(X86_FEATURE_LA57) ? 57 : 48;
+}
+
+/*
+ * x86 MSRs which contain linear addresses, x86 hidden segment bases, and
+ * IDT/GDT bases have static canonicality checks, the size of which depends
+ * only on the CPU's support for 5-level paging, rather than on the state of
+ * CR4.LA57. This applies to both WRMSR and to other instructions that set
+ * their values, e.g. SGDT.
+ *
+ * KVM passes through most of these MSRS and also doesn't intercept the
+ * instructions that set the hidden segment bases.
+ *
+ * Because of this, to be consistent with hardware, even if the guest doesn't
+ * have LA57 enabled in its CPUID, perform canonicality checks based on *host*
+ * support for 5 level paging.
+ *
+ * Finally, instructions which are related to MMU invalidation of a given
+ * linear address, also have a similar static canonical check on address.
+ * This allows for example to invalidate 5-level addresses of a guest from a
+ * host which uses 4-level paging.
+ */
+static inline bool is_noncanonical_address(u64 la, struct kvm_vcpu *vcpu,
+ unsigned int flags)
+{
+ if (flags & (X86EMUL_F_INVLPG | X86EMUL_F_MSR | X86EMUL_F_DT_LOAD))
+ return !__is_canonical_address(la, max_host_virt_addr_bits());
+ else
+ return !__is_canonical_address(la, vcpu_virt_addr_bits(vcpu));
+}
+
+static inline bool is_noncanonical_msr_address(u64 la, struct kvm_vcpu *vcpu)
+{
+ return is_noncanonical_address(la, vcpu, X86EMUL_F_MSR);
+}
+
+static inline bool is_noncanonical_base_address(u64 la, struct kvm_vcpu *vcpu)
+{
+ return is_noncanonical_address(la, vcpu, X86EMUL_F_DT_LOAD);
+}
+
+static inline bool is_noncanonical_invlpg_address(u64 la, struct kvm_vcpu *vcpu)
+{
+ return is_noncanonical_address(la, vcpu, X86EMUL_F_INVLPG);
}
static inline void vcpu_cache_mmio_info(struct kvm_vcpu *vcpu,
diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile
index 5e5e0b5aae60..01a000a41693 100644
--- a/tools/testing/selftests/kvm/Makefile
+++ b/tools/testing/selftests/kvm/Makefile
@@ -68,7 +68,7 @@ TEST_PROGS_x86_64 += x86_64/nx_huge_pages_test.sh
TEST_GEN_PROGS_x86_64 = x86_64/cpuid_test
TEST_GEN_PROGS_x86_64 += x86_64/cr4_cpuid_sync_test
TEST_GEN_PROGS_x86_64 += x86_64/dirty_log_page_splitting_test
-TEST_GEN_PROGS_x86_64 += x86_64/get_msr_index_features
+TEST_GEN_PROGS_x86_64 += x86_64/feature_msrs_test
TEST_GEN_PROGS_x86_64 += x86_64/exit_on_emulation_failure_test
TEST_GEN_PROGS_x86_64 += x86_64/fix_hypercall_test
TEST_GEN_PROGS_x86_64 += x86_64/hwcr_msr_test
diff --git a/tools/testing/selftests/kvm/x86_64/feature_msrs_test.c b/tools/testing/selftests/kvm/x86_64/feature_msrs_test.c
new file mode 100644
index 000000000000..a72f13ae2edb
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/feature_msrs_test.c
@@ -0,0 +1,113 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2020, Red Hat, Inc.
+ */
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+
+static bool is_kvm_controlled_msr(uint32_t msr)
+{
+ return msr == MSR_IA32_VMX_CR0_FIXED1 || msr == MSR_IA32_VMX_CR4_FIXED1;
+}
+
+/*
+ * For VMX MSRs with a "true" variant, KVM requires userspace to set the "true"
+ * MSR, and doesn't allow setting the hidden version.
+ */
+static bool is_hidden_vmx_msr(uint32_t msr)
+{
+ switch (msr) {
+ case MSR_IA32_VMX_PINBASED_CTLS:
+ case MSR_IA32_VMX_PROCBASED_CTLS:
+ case MSR_IA32_VMX_EXIT_CTLS:
+ case MSR_IA32_VMX_ENTRY_CTLS:
+ return true;
+ default:
+ return false;
+ }
+}
+
+static bool is_quirked_msr(uint32_t msr)
+{
+ return msr != MSR_AMD64_DE_CFG;
+}
+
+static void test_feature_msr(uint32_t msr)
+{
+ const uint64_t supported_mask = kvm_get_feature_msr(msr);
+ uint64_t reset_value = is_quirked_msr(msr) ? supported_mask : 0;
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+
+ /*
+ * Don't bother testing KVM-controlled MSRs beyond verifying that the
+ * MSR can be read from userspace. Any value is effectively legal, as
+ * KVM is bound by x86 architecture, not by ABI.
+ */
+ if (is_kvm_controlled_msr(msr))
+ return;
+
+ /*
+ * More goofy behavior. KVM reports the host CPU's actual revision ID,
+ * but initializes the vCPU's revision ID to an arbitrary value.
+ */
+ if (msr == MSR_IA32_UCODE_REV)
+ reset_value = host_cpu_is_intel ? 0x100000000ULL : 0x01000065;
+
+ /*
+ * For quirked MSRs, KVM's ABI is to initialize the vCPU's value to the
+ * full set of features supported by KVM. For non-quirked MSRs, and
+ * when the quirk is disabled, KVM must zero-initialize the MSR and let
+ * userspace do the configuration.
+ */
+ vm = vm_create_with_one_vcpu(&vcpu, NULL);
+ TEST_ASSERT(vcpu_get_msr(vcpu, msr) == reset_value,
+ "Wanted 0x%lx for %squirked MSR 0x%x, got 0x%lx",
+ reset_value, is_quirked_msr(msr) ? "" : "non-", msr,
+ vcpu_get_msr(vcpu, msr));
+ if (!is_hidden_vmx_msr(msr))
+ vcpu_set_msr(vcpu, msr, supported_mask);
+ kvm_vm_free(vm);
+
+ if (is_hidden_vmx_msr(msr))
+ return;
+
+ if (!kvm_has_cap(KVM_CAP_DISABLE_QUIRKS2) ||
+ !(kvm_check_cap(KVM_CAP_DISABLE_QUIRKS2) & KVM_X86_QUIRK_STUFF_FEATURE_MSRS))
+ return;
+
+ vm = vm_create(1);
+ vm_enable_cap(vm, KVM_CAP_DISABLE_QUIRKS2, KVM_X86_QUIRK_STUFF_FEATURE_MSRS);
+
+ vcpu = vm_vcpu_add(vm, 0, NULL);
+ TEST_ASSERT(!vcpu_get_msr(vcpu, msr),
+ "Quirk disabled, wanted '0' for MSR 0x%x, got 0x%lx",
+ msr, vcpu_get_msr(vcpu, msr));
+ kvm_vm_free(vm);
+}
+
+int main(int argc, char *argv[])
+{
+ const struct kvm_msr_list *feature_list;
+ int i;
+
+ /*
+ * Skip the entire test if MSR_FEATURES isn't supported, other tests
+ * will cover the "regular" list of MSRs, the coverage here is purely
+ * opportunistic and not interesting on its own.
+ */
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_GET_MSR_FEATURES));
+
+ (void)kvm_get_msr_index_list();
+
+ feature_list = kvm_get_feature_msr_index_list();
+ for (i = 0; i < feature_list->nmsrs; i++)
+ test_feature_msr(feature_list->indices[i]);
+}
diff --git a/tools/testing/selftests/kvm/x86_64/get_msr_index_features.c b/tools/testing/selftests/kvm/x86_64/get_msr_index_features.c
deleted file mode 100644
index d09b3cbcadc6..000000000000
--- a/tools/testing/selftests/kvm/x86_64/get_msr_index_features.c
+++ /dev/null
@@ -1,35 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Test that KVM_GET_MSR_INDEX_LIST and
- * KVM_GET_MSR_FEATURE_INDEX_LIST work as intended
- *
- * Copyright (C) 2020, Red Hat, Inc.
- */
-#include <fcntl.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/ioctl.h>
-
-#include "test_util.h"
-#include "kvm_util.h"
-#include "processor.h"
-
-int main(int argc, char *argv[])
-{
- const struct kvm_msr_list *feature_list;
- int i;
-
- /*
- * Skip the entire test if MSR_FEATURES isn't supported, other tests
- * will cover the "regular" list of MSRs, the coverage here is purely
- * opportunistic and not interesting on its own.
- */
- TEST_REQUIRE(kvm_has_cap(KVM_CAP_GET_MSR_FEATURES));
-
- (void)kvm_get_msr_index_list();
-
- feature_list = kvm_get_feature_msr_index_list();
- for (i = 0; i < feature_list->nmsrs; i++)
- kvm_get_feature_msr(feature_list->indices[i]);
-}
diff --git a/tools/testing/selftests/kvm/x86_64/platform_info_test.c b/tools/testing/selftests/kvm/x86_64/platform_info_test.c
index eda88080c186..9cbf283ebc55 100644
--- a/tools/testing/selftests/kvm/x86_64/platform_info_test.c
+++ b/tools/testing/selftests/kvm/x86_64/platform_info_test.c
@@ -72,8 +72,6 @@ int main(int argc, char *argv[])
}
done:
- vcpu_set_msr(vcpu, MSR_PLATFORM_INFO, msr_platform_info);
-
kvm_vm_free(vm);
return 0;
diff --git a/tools/testing/selftests/kvm/x86_64/vmx_pmu_caps_test.c b/tools/testing/selftests/kvm/x86_64/vmx_pmu_caps_test.c
index 7c92536551cc..a1f5ff45d518 100644
--- a/tools/testing/selftests/kvm/x86_64/vmx_pmu_caps_test.c
+++ b/tools/testing/selftests/kvm/x86_64/vmx_pmu_caps_test.c
@@ -207,6 +207,29 @@ KVM_ONE_VCPU_TEST(vmx_pmu_caps, lbr_perf_capabilities, guest_code)
TEST_ASSERT(!r, "Writing LBR_TOS should fail after disabling vPMU");
}
+KVM_ONE_VCPU_TEST(vmx_pmu_caps, perf_capabilities_unsupported, guest_code)
+{
+ uint64_t val;
+ int i, r;
+
+ vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, host_cap.capabilities);
+ val = vcpu_get_msr(vcpu, MSR_IA32_PERF_CAPABILITIES);
+ TEST_ASSERT_EQ(val, host_cap.capabilities);
+
+ vcpu_clear_cpuid_feature(vcpu, X86_FEATURE_PDCM);
+
+ val = vcpu_get_msr(vcpu, MSR_IA32_PERF_CAPABILITIES);
+ TEST_ASSERT_EQ(val, 0);
+
+ vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, 0);
+
+ for (i = 0; i < 64; i++) {
+ r = _vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, BIT_ULL(i));
+ TEST_ASSERT(!r, "Setting PERF_CAPABILITIES bit %d (= 0x%llx) should fail without PDCM",
+ i, BIT_ULL(i));
+ }
+}
+
int main(int argc, char *argv[])
{
TEST_REQUIRE(kvm_is_pmu_enabled());