summaryrefslogtreecommitdiff
path: root/arch/x86/kvm
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kvm')
-rw-r--r--arch/x86/kvm/cpuid.c12
-rw-r--r--arch/x86/kvm/cpuid.h1
-rw-r--r--arch/x86/kvm/emulate.c15
-rw-r--r--arch/x86/kvm/hyperv.c3
-rw-r--r--arch/x86/kvm/kvm_emulate.h5
-rw-r--r--arch/x86/kvm/mmu.h1
-rw-r--r--arch/x86/kvm/mmu/mmu.c12
-rw-r--r--arch/x86/kvm/mtrr.c1
-rw-r--r--arch/x86/kvm/reverse_cpuid.h8
-rw-r--r--arch/x86/kvm/svm/sev.c4
-rw-r--r--arch/x86/kvm/svm/svm.c16
-rw-r--r--arch/x86/kvm/svm/vmenter.S6
-rw-r--r--arch/x86/kvm/vmx/hyperv.c1
-rw-r--r--arch/x86/kvm/vmx/main.c3
-rw-r--r--arch/x86/kvm/vmx/nested.c45
-rw-r--r--arch/x86/kvm/vmx/pmu_intel.c10
-rw-r--r--arch/x86/kvm/vmx/sgx.c5
-rw-r--r--arch/x86/kvm/vmx/vmx.c68
-rw-r--r--arch/x86/kvm/vmx/vmx.h26
-rw-r--r--arch/x86/kvm/vmx/x86_ops.h2
-rw-r--r--arch/x86/kvm/x86.c44
-rw-r--r--arch/x86/kvm/x86.h48
-rw-r--r--arch/x86/kvm/xen.c17
23 files changed, 261 insertions, 92 deletions
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index c92e43f2d0c4..8f587c5bb6bc 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -814,6 +814,7 @@ void kvm_set_cpu_caps(void)
kvm_cpu_cap_mask(CPUID_8000_0021_EAX,
F(NO_NESTED_DATA_BP) | F(LFENCE_RDTSC) | 0 /* SmmPgCfgLock */ |
+ F(VERW_CLEAR) |
F(NULL_SEL_CLR_BASE) | F(AUTOIBRS) | 0 /* PrefetchCtlMsr */ |
F(WRMSR_XX_BASE_NS)
);
@@ -821,11 +822,19 @@ void kvm_set_cpu_caps(void)
kvm_cpu_cap_check_and_set(X86_FEATURE_SBPB);
kvm_cpu_cap_check_and_set(X86_FEATURE_IBPB_BRTYPE);
kvm_cpu_cap_check_and_set(X86_FEATURE_SRSO_NO);
+ kvm_cpu_cap_check_and_set(X86_FEATURE_VERW_CLEAR);
kvm_cpu_cap_init_kvm_defined(CPUID_8000_0022_EAX,
F(PERFMON_V2)
);
+ kvm_cpu_cap_init_kvm_defined(CPUID_8000_0021_ECX,
+ F(TSA_SQ_NO) | F(TSA_L1_NO)
+ );
+
+ kvm_cpu_cap_check_and_set(X86_FEATURE_TSA_SQ_NO);
+ kvm_cpu_cap_check_and_set(X86_FEATURE_TSA_L1_NO);
+
/*
* Synthesize "LFENCE is serializing" into the AMD-defined entry in
* KVM's supported CPUID if the feature is reported as supported by the
@@ -1376,8 +1385,9 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
entry->eax = entry->ebx = entry->ecx = entry->edx = 0;
break;
case 0x80000021:
- entry->ebx = entry->ecx = entry->edx = 0;
+ entry->ebx = entry->edx = 0;
cpuid_entry_override(entry, CPUID_8000_0021_EAX);
+ cpuid_entry_override(entry, CPUID_8000_0021_ECX);
break;
/* AMD Extended Performance Monitoring and Debug */
case 0x80000022: {
diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h
index ad479cfb91bc..f16a7b2c2adc 100644
--- a/arch/x86/kvm/cpuid.h
+++ b/arch/x86/kvm/cpuid.h
@@ -2,7 +2,6 @@
#ifndef ARCH_X86_KVM_CPUID_H
#define ARCH_X86_KVM_CPUID_H
-#include "x86.h"
#include "reverse_cpuid.h"
#include <asm/cpu.h>
#include <asm/processor.h>
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index e72aed25d721..60986f67c35a 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -651,9 +651,10 @@ static inline u8 ctxt_virt_addr_bits(struct x86_emulate_ctxt *ctxt)
}
static inline bool emul_is_noncanonical_address(u64 la,
- struct x86_emulate_ctxt *ctxt)
+ struct x86_emulate_ctxt *ctxt,
+ unsigned int flags)
{
- return !__is_canonical_address(la, ctxt_virt_addr_bits(ctxt));
+ return !ctxt->ops->is_canonical_addr(ctxt, la, flags);
}
/*
@@ -1733,7 +1734,8 @@ static int __load_segment_descriptor(struct x86_emulate_ctxt *ctxt,
if (ret != X86EMUL_CONTINUE)
return ret;
if (emul_is_noncanonical_address(get_desc_base(&seg_desc) |
- ((u64)base3 << 32), ctxt))
+ ((u64)base3 << 32), ctxt,
+ X86EMUL_F_DT_LOAD))
return emulate_gp(ctxt, err_code);
}
@@ -2516,8 +2518,8 @@ static int em_sysexit(struct x86_emulate_ctxt *ctxt)
ss_sel = cs_sel + 8;
cs.d = 0;
cs.l = 1;
- if (emul_is_noncanonical_address(rcx, ctxt) ||
- emul_is_noncanonical_address(rdx, ctxt))
+ if (emul_is_noncanonical_address(rcx, ctxt, 0) ||
+ emul_is_noncanonical_address(rdx, ctxt, 0))
return emulate_gp(ctxt, 0);
break;
}
@@ -3494,7 +3496,8 @@ static int em_lgdt_lidt(struct x86_emulate_ctxt *ctxt, bool lgdt)
if (rc != X86EMUL_CONTINUE)
return rc;
if (ctxt->mode == X86EMUL_MODE_PROT64 &&
- emul_is_noncanonical_address(desc_ptr.address, ctxt))
+ emul_is_noncanonical_address(desc_ptr.address, ctxt,
+ X86EMUL_F_DT_LOAD))
return emulate_gp(ctxt, 0);
if (lgdt)
ctxt->ops->set_gdt(ctxt, &desc_ptr);
diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
index 44c88537448c..79d06a8a5b7d 100644
--- a/arch/x86/kvm/hyperv.c
+++ b/arch/x86/kvm/hyperv.c
@@ -1980,6 +1980,9 @@ int kvm_hv_vcpu_flush_tlb(struct kvm_vcpu *vcpu)
if (entries[i] == KVM_HV_TLB_FLUSHALL_ENTRY)
goto out_flush_all;
+ if (is_noncanonical_invlpg_address(entries[i], vcpu))
+ continue;
+
/*
* Lower 12 bits of 'address' encode the number of additional
* pages to flush.
diff --git a/arch/x86/kvm/kvm_emulate.h b/arch/x86/kvm/kvm_emulate.h
index 55a18e2f2dcd..10495fffb890 100644
--- a/arch/x86/kvm/kvm_emulate.h
+++ b/arch/x86/kvm/kvm_emulate.h
@@ -94,6 +94,8 @@ struct x86_instruction_info {
#define X86EMUL_F_FETCH BIT(1)
#define X86EMUL_F_IMPLICIT BIT(2)
#define X86EMUL_F_INVLPG BIT(3)
+#define X86EMUL_F_MSR BIT(4)
+#define X86EMUL_F_DT_LOAD BIT(5)
struct x86_emulate_ops {
void (*vm_bugged)(struct x86_emulate_ctxt *ctxt);
@@ -235,6 +237,9 @@ struct x86_emulate_ops {
gva_t (*get_untagged_addr)(struct x86_emulate_ctxt *ctxt, gva_t addr,
unsigned int flags);
+
+ bool (*is_canonical_addr)(struct x86_emulate_ctxt *ctxt, gva_t addr,
+ unsigned int flags);
};
/* Type, address-of, and value of an instruction's operand. */
diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h
index 9dc5dd43ae7f..e9322358678b 100644
--- a/arch/x86/kvm/mmu.h
+++ b/arch/x86/kvm/mmu.h
@@ -4,6 +4,7 @@
#include <linux/kvm_host.h>
#include "kvm_cache_regs.h"
+#include "x86.h"
#include "cpuid.h"
extern bool __read_mostly enable_mmio_caching;
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 4607610ef062..700926eb77df 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -6234,7 +6234,7 @@ void kvm_mmu_invalidate_addr(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
/* It's actually a GPA for vcpu->arch.guest_mmu. */
if (mmu != &vcpu->arch.guest_mmu) {
/* INVLPG on a non-canonical address is a NOP according to the SDM. */
- if (is_noncanonical_address(addr, vcpu))
+ if (is_noncanonical_invlpg_address(addr, vcpu))
return;
kvm_x86_call(flush_tlb_gva)(vcpu, addr);
@@ -7578,7 +7578,7 @@ static bool kvm_nx_huge_page_recovery_worker(void *data)
return true;
}
-static void kvm_mmu_start_lpage_recovery(struct once *once)
+static int kvm_mmu_start_lpage_recovery(struct once *once)
{
struct kvm_arch *ka = container_of(once, struct kvm_arch, nx_once);
struct kvm *kvm = container_of(ka, struct kvm, arch);
@@ -7590,12 +7590,13 @@ static void kvm_mmu_start_lpage_recovery(struct once *once)
kvm, "kvm-nx-lpage-recovery");
if (IS_ERR(nx_thread))
- return;
+ return PTR_ERR(nx_thread);
vhost_task_start(nx_thread);
/* Make the task visible only once it is fully started. */
WRITE_ONCE(kvm->arch.nx_huge_page_recovery_thread, nx_thread);
+ return 0;
}
int kvm_mmu_post_init_vm(struct kvm *kvm)
@@ -7603,10 +7604,7 @@ int kvm_mmu_post_init_vm(struct kvm *kvm)
if (nx_hugepage_mitigation_hard_disabled)
return 0;
- call_once(&kvm->arch.nx_once, kvm_mmu_start_lpage_recovery);
- if (!kvm->arch.nx_huge_page_recovery_thread)
- return -ENOMEM;
- return 0;
+ return call_once(&kvm->arch.nx_once, kvm_mmu_start_lpage_recovery);
}
void kvm_mmu_pre_destroy_vm(struct kvm *kvm)
diff --git a/arch/x86/kvm/mtrr.c b/arch/x86/kvm/mtrr.c
index 05490b9d8a43..6f74e2b27c1e 100644
--- a/arch/x86/kvm/mtrr.c
+++ b/arch/x86/kvm/mtrr.c
@@ -19,6 +19,7 @@
#include <asm/mtrr.h>
#include "cpuid.h"
+#include "x86.h"
static u64 *find_mtrr(struct kvm_vcpu *vcpu, unsigned int msr)
{
diff --git a/arch/x86/kvm/reverse_cpuid.h b/arch/x86/kvm/reverse_cpuid.h
index 0d17d6b70639..0ea847b82354 100644
--- a/arch/x86/kvm/reverse_cpuid.h
+++ b/arch/x86/kvm/reverse_cpuid.h
@@ -18,6 +18,7 @@ enum kvm_only_cpuid_leafs {
CPUID_8000_0022_EAX,
CPUID_7_2_EDX,
CPUID_24_0_EBX,
+ CPUID_8000_0021_ECX,
NR_KVM_CPU_CAPS,
NKVMCAPINTS = NR_KVM_CPU_CAPS - NCAPINTS,
@@ -68,6 +69,10 @@ enum kvm_only_cpuid_leafs {
/* CPUID level 0x80000022 (EAX) */
#define KVM_X86_FEATURE_PERFMON_V2 KVM_X86_FEATURE(CPUID_8000_0022_EAX, 0)
+/* CPUID level 0x80000021 (ECX) */
+#define KVM_X86_FEATURE_TSA_SQ_NO KVM_X86_FEATURE(CPUID_8000_0021_ECX, 1)
+#define KVM_X86_FEATURE_TSA_L1_NO KVM_X86_FEATURE(CPUID_8000_0021_ECX, 2)
+
struct cpuid_reg {
u32 function;
u32 index;
@@ -98,6 +103,7 @@ static const struct cpuid_reg reverse_cpuid[] = {
[CPUID_8000_0022_EAX] = {0x80000022, 0, CPUID_EAX},
[CPUID_7_2_EDX] = { 7, 2, CPUID_EDX},
[CPUID_24_0_EBX] = { 0x24, 0, CPUID_EBX},
+ [CPUID_8000_0021_ECX] = {0x80000021, 0, CPUID_ECX},
};
/*
@@ -137,6 +143,8 @@ static __always_inline u32 __feature_translate(int x86_feature)
KVM_X86_TRANSLATE_FEATURE(PERFMON_V2);
KVM_X86_TRANSLATE_FEATURE(RRSBA_CTRL);
KVM_X86_TRANSLATE_FEATURE(BHI_CTRL);
+ KVM_X86_TRANSLATE_FEATURE(TSA_SQ_NO);
+ KVM_X86_TRANSLATE_FEATURE(TSA_L1_NO);
default:
return x86_feature;
}
diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
index 6154cb450b44..c4ae73541fc5 100644
--- a/arch/x86/kvm/svm/sev.c
+++ b/arch/x86/kvm/svm/sev.c
@@ -2058,6 +2058,10 @@ static int sev_check_source_vcpus(struct kvm *dst, struct kvm *src)
struct kvm_vcpu *src_vcpu;
unsigned long i;
+ if (src->created_vcpus != atomic_read(&src->online_vcpus) ||
+ dst->created_vcpus != atomic_read(&dst->online_vcpus))
+ return -EBUSY;
+
if (!sev_es_guest(src))
return 0;
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 7cbacd043921..800f781475c0 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -1483,7 +1483,7 @@ static void svm_clear_current_vmcb(struct vmcb *vmcb)
{
int i;
- for_each_online_cpu(i)
+ for_each_possible_cpu(i)
cmpxchg(per_cpu_ptr(&svm_data.current_vmcb, i), vmcb, NULL);
}
@@ -4226,9 +4226,9 @@ static noinstr void svm_vcpu_enter_exit(struct kvm_vcpu *vcpu, bool spec_ctrl_in
guest_state_exit_irqoff();
}
-static __no_kcsan fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu,
- bool force_immediate_exit)
+static __no_kcsan fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu, u64 run_flags)
{
+ bool force_immediate_exit = run_flags & KVM_RUN_FORCE_IMMEDIATE_EXIT;
struct vcpu_svm *svm = to_svm(vcpu);
bool spec_ctrl_intercepted = msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL);
@@ -4270,10 +4270,13 @@ static __no_kcsan fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu,
svm_hv_update_vp_id(svm->vmcb, vcpu);
/*
- * Run with all-zero DR6 unless needed, so that we can get the exact cause
- * of a #DB.
+ * Run with all-zero DR6 unless the guest can write DR6 freely, so that
+ * KVM can get the exact cause of a #DB. Note, loading guest DR6 from
+ * KVM's snapshot is only necessary when DR accesses won't exit.
*/
- if (likely(!(vcpu->arch.switch_db_regs & KVM_DEBUGREG_WONT_EXIT)))
+ if (unlikely(run_flags & KVM_RUN_LOAD_GUEST_DR6))
+ svm_set_dr6(vcpu, vcpu->arch.dr6);
+ else if (likely(!(vcpu->arch.switch_db_regs & KVM_DEBUGREG_WONT_EXIT)))
svm_set_dr6(vcpu, DR6_ACTIVE_LOW);
clgi();
@@ -5084,7 +5087,6 @@ static struct kvm_x86_ops svm_x86_ops __initdata = {
.set_idt = svm_set_idt,
.get_gdt = svm_get_gdt,
.set_gdt = svm_set_gdt,
- .set_dr6 = svm_set_dr6,
.set_dr7 = svm_set_dr7,
.sync_dirty_debug_regs = svm_sync_dirty_debug_regs,
.cache_reg = svm_cache_reg,
diff --git a/arch/x86/kvm/svm/vmenter.S b/arch/x86/kvm/svm/vmenter.S
index 0c61153b275f..235c4af6b692 100644
--- a/arch/x86/kvm/svm/vmenter.S
+++ b/arch/x86/kvm/svm/vmenter.S
@@ -169,6 +169,9 @@ SYM_FUNC_START(__svm_vcpu_run)
#endif
mov VCPU_RDI(%_ASM_DI), %_ASM_DI
+ /* Clobbers EFLAGS.ZF */
+ VM_CLEAR_CPU_BUFFERS
+
/* Enter guest mode */
3: vmrun %_ASM_AX
4:
@@ -335,6 +338,9 @@ SYM_FUNC_START(__svm_sev_es_vcpu_run)
mov SVM_current_vmcb(%rdi), %rax
mov KVM_VMCB_pa(%rax), %rax
+ /* Clobbers EFLAGS.ZF */
+ VM_CLEAR_CPU_BUFFERS
+
/* Enter guest mode */
1: vmrun %rax
2:
diff --git a/arch/x86/kvm/vmx/hyperv.c b/arch/x86/kvm/vmx/hyperv.c
index fab6a1ad98dc..fa41d036acd4 100644
--- a/arch/x86/kvm/vmx/hyperv.c
+++ b/arch/x86/kvm/vmx/hyperv.c
@@ -4,6 +4,7 @@
#include <linux/errno.h>
#include <linux/smp.h>
+#include "x86.h"
#include "../cpuid.h"
#include "hyperv.h"
#include "nested.h"
diff --git a/arch/x86/kvm/vmx/main.c b/arch/x86/kvm/vmx/main.c
index 47476fcc179a..3f83e36a657b 100644
--- a/arch/x86/kvm/vmx/main.c
+++ b/arch/x86/kvm/vmx/main.c
@@ -42,6 +42,8 @@ struct kvm_x86_ops vt_x86_ops __initdata = {
.vcpu_load = vmx_vcpu_load,
.vcpu_put = vmx_vcpu_put,
+ .HOST_OWNED_DEBUGCTL = DEBUGCTLMSR_FREEZE_IN_SMM,
+
.update_exception_bitmap = vmx_update_exception_bitmap,
.get_feature_msr = vmx_get_feature_msr,
.get_msr = vmx_get_msr,
@@ -60,7 +62,6 @@ struct kvm_x86_ops vt_x86_ops __initdata = {
.set_idt = vmx_set_idt,
.get_gdt = vmx_get_gdt,
.set_gdt = vmx_set_gdt,
- .set_dr6 = vmx_set_dr6,
.set_dr7 = vmx_set_dr7,
.sync_dirty_debug_regs = vmx_sync_dirty_debug_regs,
.cache_reg = vmx_cache_reg,
diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index 22bee8a71144..60bd2791d933 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -7,6 +7,7 @@
#include <asm/debugreg.h>
#include <asm/mmu_context.h>
+#include "x86.h"
#include "cpuid.h"
#include "hyperv.h"
#include "mmu.h"
@@ -16,7 +17,6 @@
#include "sgx.h"
#include "trace.h"
#include "vmx.h"
-#include "x86.h"
#include "smm.h"
static bool __read_mostly enable_shadow_vmcs = 1;
@@ -2653,10 +2653,11 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
if (vmx->nested.nested_run_pending &&
(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS)) {
kvm_set_dr(vcpu, 7, vmcs12->guest_dr7);
- vmcs_write64(GUEST_IA32_DEBUGCTL, vmcs12->guest_ia32_debugctl);
+ vmx_guest_debugctl_write(vcpu, vmcs12->guest_ia32_debugctl &
+ vmx_get_supported_debugctl(vcpu, false));
} else {
kvm_set_dr(vcpu, 7, vcpu->arch.dr7);
- vmcs_write64(GUEST_IA32_DEBUGCTL, vmx->nested.pre_vmenter_debugctl);
+ vmx_guest_debugctl_write(vcpu, vmx->nested.pre_vmenter_debugctl);
}
if (kvm_mpx_supported() && (!vmx->nested.nested_run_pending ||
!(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_BNDCFGS)))
@@ -3020,8 +3021,8 @@ static int nested_vmx_check_host_state(struct kvm_vcpu *vcpu,
CC(!kvm_vcpu_is_legal_cr3(vcpu, vmcs12->host_cr3)))
return -EINVAL;
- if (CC(is_noncanonical_address(vmcs12->host_ia32_sysenter_esp, vcpu)) ||
- CC(is_noncanonical_address(vmcs12->host_ia32_sysenter_eip, vcpu)))
+ if (CC(is_noncanonical_msr_address(vmcs12->host_ia32_sysenter_esp, vcpu)) ||
+ CC(is_noncanonical_msr_address(vmcs12->host_ia32_sysenter_eip, vcpu)))
return -EINVAL;
if ((vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_PAT) &&
@@ -3055,12 +3056,12 @@ static int nested_vmx_check_host_state(struct kvm_vcpu *vcpu,
CC(vmcs12->host_ss_selector == 0 && !ia32e))
return -EINVAL;
- if (CC(is_noncanonical_address(vmcs12->host_fs_base, vcpu)) ||
- CC(is_noncanonical_address(vmcs12->host_gs_base, vcpu)) ||
- CC(is_noncanonical_address(vmcs12->host_gdtr_base, vcpu)) ||
- CC(is_noncanonical_address(vmcs12->host_idtr_base, vcpu)) ||
- CC(is_noncanonical_address(vmcs12->host_tr_base, vcpu)) ||
- CC(is_noncanonical_address(vmcs12->host_rip, vcpu)))
+ if (CC(is_noncanonical_base_address(vmcs12->host_fs_base, vcpu)) ||
+ CC(is_noncanonical_base_address(vmcs12->host_gs_base, vcpu)) ||
+ CC(is_noncanonical_base_address(vmcs12->host_gdtr_base, vcpu)) ||
+ CC(is_noncanonical_base_address(vmcs12->host_idtr_base, vcpu)) ||
+ CC(is_noncanonical_base_address(vmcs12->host_tr_base, vcpu)) ||
+ CC(is_noncanonical_address(vmcs12->host_rip, vcpu, 0)))
return -EINVAL;
/*
@@ -3135,7 +3136,8 @@ static int nested_vmx_check_guest_state(struct kvm_vcpu *vcpu,
return -EINVAL;
if ((vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS) &&
- CC(!kvm_dr7_valid(vmcs12->guest_dr7)))
+ (CC(!kvm_dr7_valid(vmcs12->guest_dr7)) ||
+ CC(!vmx_is_valid_debugctl(vcpu, vmcs12->guest_ia32_debugctl, false))))
return -EINVAL;
if ((vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_PAT) &&
@@ -3178,7 +3180,7 @@ static int nested_vmx_check_guest_state(struct kvm_vcpu *vcpu,
}
if ((vmcs12->vm_entry_controls & VM_ENTRY_LOAD_BNDCFGS) &&
- (CC(is_noncanonical_address(vmcs12->guest_bndcfgs & PAGE_MASK, vcpu)) ||
+ (CC(is_noncanonical_msr_address(vmcs12->guest_bndcfgs & PAGE_MASK, vcpu)) ||
CC((vmcs12->guest_bndcfgs & MSR_IA32_BNDCFGS_RSVD))))
return -EINVAL;
@@ -3525,7 +3527,7 @@ enum nvmx_vmentry_status nested_vmx_enter_non_root_mode(struct kvm_vcpu *vcpu,
if (!vmx->nested.nested_run_pending ||
!(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS))
- vmx->nested.pre_vmenter_debugctl = vmcs_read64(GUEST_IA32_DEBUGCTL);
+ vmx->nested.pre_vmenter_debugctl = vmx_guest_debugctl_read();
if (kvm_mpx_supported() &&
(!vmx->nested.nested_run_pending ||
!(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_BNDCFGS)))
@@ -4576,6 +4578,12 @@ static void sync_vmcs02_to_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
(vmcs12->vm_entry_controls & ~VM_ENTRY_IA32E_MODE) |
(vm_entry_controls_get(to_vmx(vcpu)) & VM_ENTRY_IA32E_MODE);
+ /*
+ * Note! Save DR7, but intentionally don't grab DEBUGCTL from vmcs02.
+ * Writes to DEBUGCTL that aren't intercepted by L1 are immediately
+ * propagated to vmcs12 (see vmx_set_msr()), as the value loaded into
+ * vmcs02 doesn't strictly track vmcs12.
+ */
if (vmcs12->vm_exit_controls & VM_EXIT_SAVE_DEBUG_CONTROLS)
vmcs12->guest_dr7 = vcpu->arch.dr7;
@@ -4766,7 +4774,7 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu,
__vmx_set_segment(vcpu, &seg, VCPU_SREG_LDTR);
kvm_set_dr(vcpu, 7, 0x400);
- vmcs_write64(GUEST_IA32_DEBUGCTL, 0);
+ vmx_guest_debugctl_write(vcpu, 0);
if (nested_vmx_load_msr(vcpu, vmcs12->vm_exit_msr_load_addr,
vmcs12->vm_exit_msr_load_count))
@@ -4821,6 +4829,9 @@ static void nested_vmx_restore_host_state(struct kvm_vcpu *vcpu)
WARN_ON(kvm_set_dr(vcpu, 7, vmcs_readl(GUEST_DR7)));
}
+ /* Reload DEBUGCTL to ensure vmcs01 has a fresh FREEZE_IN_SMM value. */
+ vmx_reload_guest_debugctl(vcpu);
+
/*
* Note that calling vmx_set_{efer,cr0,cr4} is important as they
* handle a variety of side effects to KVM's software model.
@@ -5172,7 +5183,7 @@ int get_vmx_mem_address(struct kvm_vcpu *vcpu, unsigned long exit_qualification,
* non-canonical form. This is the only check on the memory
* destination for long mode!
*/
- exn = is_noncanonical_address(*ret, vcpu);
+ exn = is_noncanonical_address(*ret, vcpu, 0);
} else {
/*
* When not in long mode, the virtual/linear address is
@@ -5983,7 +5994,7 @@ static int handle_invvpid(struct kvm_vcpu *vcpu)
* invalidation.
*/
if (!operand.vpid ||
- is_noncanonical_address(operand.gla, vcpu))
+ is_noncanonical_invlpg_address(operand.gla, vcpu))
return nested_vmx_fail(vcpu,
VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID);
vpid_sync_vcpu_addr(vpid02, operand.gla);
diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c
index 83382a4d1d66..a5edc623166a 100644
--- a/arch/x86/kvm/vmx/pmu_intel.c
+++ b/arch/x86/kvm/vmx/pmu_intel.c
@@ -365,7 +365,7 @@ static int intel_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
}
break;
case MSR_IA32_DS_AREA:
- if (is_noncanonical_address(data, vcpu))
+ if (is_noncanonical_msr_address(data, vcpu))
return 1;
pmu->ds_area = data;
@@ -605,11 +605,11 @@ static void intel_pmu_reset(struct kvm_vcpu *vcpu)
*/
static void intel_pmu_legacy_freezing_lbrs_on_pmi(struct kvm_vcpu *vcpu)
{
- u64 data = vmcs_read64(GUEST_IA32_DEBUGCTL);
+ u64 data = vmx_guest_debugctl_read();
if (data & DEBUGCTLMSR_FREEZE_LBRS_ON_PMI) {
data &= ~DEBUGCTLMSR_LBR;
- vmcs_write64(GUEST_IA32_DEBUGCTL, data);
+ vmx_guest_debugctl_write(vcpu, data);
}
}
@@ -679,7 +679,7 @@ void vmx_passthrough_lbr_msrs(struct kvm_vcpu *vcpu)
if (!lbr_desc->event) {
vmx_disable_lbr_msrs_passthrough(vcpu);
- if (vmcs_read64(GUEST_IA32_DEBUGCTL) & DEBUGCTLMSR_LBR)
+ if (vmx_guest_debugctl_read() & DEBUGCTLMSR_LBR)
goto warn;
if (test_bit(INTEL_PMC_IDX_FIXED_VLBR, pmu->pmc_in_use))
goto warn;
@@ -701,7 +701,7 @@ warn:
static void intel_pmu_cleanup(struct kvm_vcpu *vcpu)
{
- if (!(vmcs_read64(GUEST_IA32_DEBUGCTL) & DEBUGCTLMSR_LBR))
+ if (!(vmx_guest_debugctl_read() & DEBUGCTLMSR_LBR))
intel_pmu_release_guest_lbr_event(vcpu);
}
diff --git a/arch/x86/kvm/vmx/sgx.c b/arch/x86/kvm/vmx/sgx.c
index a3c3d2a51f47..b352a3ba7354 100644
--- a/arch/x86/kvm/vmx/sgx.c
+++ b/arch/x86/kvm/vmx/sgx.c
@@ -4,12 +4,11 @@
#include <asm/sgx.h>
-#include "cpuid.h"
+#include "x86.h"
#include "kvm_cache_regs.h"
#include "nested.h"
#include "sgx.h"
#include "vmx.h"
-#include "x86.h"
bool __read_mostly enable_sgx = 1;
module_param_named(sgx, enable_sgx, bool, 0444);
@@ -38,7 +37,7 @@ static int sgx_get_encls_gva(struct kvm_vcpu *vcpu, unsigned long offset,
fault = true;
} else if (likely(is_64_bit_mode(vcpu))) {
*gva = vmx_get_untagged_addr(vcpu, *gva, 0);
- fault = is_noncanonical_address(*gva, vcpu);
+ fault = is_noncanonical_address(*gva, vcpu, 0);
} else {
*gva &= 0xffffffff;
fault = (s.unusable) ||
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index a3d45b01dbad..6c185a260c5b 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -770,8 +770,11 @@ void vmx_emergency_disable_virtualization_cpu(void)
return;
list_for_each_entry(v, &per_cpu(loaded_vmcss_on_cpu, cpu),
- loaded_vmcss_on_cpu_link)
+ loaded_vmcss_on_cpu_link) {
vmcs_clear(v->vmcs);
+ if (v->shadow_vmcs)
+ vmcs_clear(v->shadow_vmcs);
+ }
kvm_cpu_vmxoff();
}
@@ -2145,7 +2148,7 @@ int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
msr_info->data = vmx->pt_desc.guest.addr_a[index / 2];
break;
case MSR_IA32_DEBUGCTLMSR:
- msr_info->data = vmcs_read64(GUEST_IA32_DEBUGCTL);
+ msr_info->data = vmx_guest_debugctl_read();
break;
default:
find_uret_msr:
@@ -2170,7 +2173,7 @@ static u64 nested_vmx_truncate_sysenter_addr(struct kvm_vcpu *vcpu,
return (unsigned long)data;
}
-static u64 vmx_get_supported_debugctl(struct kvm_vcpu *vcpu, bool host_initiated)
+u64 vmx_get_supported_debugctl(struct kvm_vcpu *vcpu, bool host_initiated)
{
u64 debugctl = 0;
@@ -2182,9 +2185,25 @@ static u64 vmx_get_supported_debugctl(struct kvm_vcpu *vcpu, bool host_initiated
(host_initiated || intel_pmu_lbr_is_enabled(vcpu)))
debugctl |= DEBUGCTLMSR_LBR | DEBUGCTLMSR_FREEZE_LBRS_ON_PMI;
+ if (boot_cpu_has(X86_FEATURE_RTM) &&
+ (host_initiated || guest_cpuid_has(vcpu, X86_FEATURE_RTM)))
+ debugctl |= DEBUGCTLMSR_RTM_DEBUG;
+
return debugctl;
}
+bool vmx_is_valid_debugctl(struct kvm_vcpu *vcpu, u64 data, bool host_initiated)
+{
+ u64 invalid;
+
+ invalid = data & ~vmx_get_supported_debugctl(vcpu, host_initiated);
+ if (invalid & (DEBUGCTLMSR_BTF | DEBUGCTLMSR_LBR)) {
+ kvm_pr_unimpl_wrmsr(vcpu, MSR_IA32_DEBUGCTLMSR, data);
+ invalid &= ~(DEBUGCTLMSR_BTF | DEBUGCTLMSR_LBR);
+ }
+ return !invalid;
+}
+
/*
* Writes msr value into the appropriate "register".
* Returns 0 on success, non-0 otherwise.
@@ -2253,35 +2272,28 @@ int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
}
vmcs_writel(GUEST_SYSENTER_ESP, data);
break;
- case MSR_IA32_DEBUGCTLMSR: {
- u64 invalid;
-
- invalid = data & ~vmx_get_supported_debugctl(vcpu, msr_info->host_initiated);
- if (invalid & (DEBUGCTLMSR_BTF|DEBUGCTLMSR_LBR)) {
- kvm_pr_unimpl_wrmsr(vcpu, msr_index, data);
- data &= ~(DEBUGCTLMSR_BTF|DEBUGCTLMSR_LBR);
- invalid &= ~(DEBUGCTLMSR_BTF|DEBUGCTLMSR_LBR);
- }
-
- if (invalid)
+ case MSR_IA32_DEBUGCTLMSR:
+ if (!vmx_is_valid_debugctl(vcpu, data, msr_info->host_initiated))
return 1;
+ data &= vmx_get_supported_debugctl(vcpu, msr_info->host_initiated);
+
if (is_guest_mode(vcpu) && get_vmcs12(vcpu)->vm_exit_controls &
VM_EXIT_SAVE_DEBUG_CONTROLS)
get_vmcs12(vcpu)->guest_ia32_debugctl = data;
- vmcs_write64(GUEST_IA32_DEBUGCTL, data);
+ vmx_guest_debugctl_write(vcpu, data);
+
if (intel_pmu_lbr_is_enabled(vcpu) && !to_vmx(vcpu)->lbr_desc.event &&
(data & DEBUGCTLMSR_LBR))
intel_pmu_create_guest_lbr_event(vcpu);
return 0;
- }
case MSR_IA32_BNDCFGS:
if (!kvm_mpx_supported() ||
(!msr_info->host_initiated &&
!guest_cpuid_has(vcpu, X86_FEATURE_MPX)))
return 1;
- if (is_noncanonical_address(data & PAGE_MASK, vcpu) ||
+ if (is_noncanonical_msr_address(data & PAGE_MASK, vcpu) ||
(data & MSR_IA32_BNDCFGS_RSVD))
return 1;
@@ -2446,7 +2458,7 @@ int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
index = msr_info->index - MSR_IA32_RTIT_ADDR0_A;
if (index >= 2 * vmx->pt_desc.num_address_ranges)
return 1;
- if (is_noncanonical_address(data, vcpu))
+ if (is_noncanonical_msr_address(data, vcpu))
return 1;
if (index % 2)
vmx->pt_desc.guest.addr_b[index / 2] = data;
@@ -4820,7 +4832,8 @@ static void init_vmcs(struct vcpu_vmx *vmx)
vmcs_write32(GUEST_SYSENTER_CS, 0);
vmcs_writel(GUEST_SYSENTER_ESP, 0);
vmcs_writel(GUEST_SYSENTER_EIP, 0);
- vmcs_write64(GUEST_IA32_DEBUGCTL, 0);
+
+ vmx_guest_debugctl_write(&vmx->vcpu, 0);
if (cpu_has_vmx_tpr_shadow()) {
vmcs_write64(VIRTUAL_APIC_PAGE_ADDR, 0);
@@ -5627,12 +5640,6 @@ void vmx_sync_dirty_debug_regs(struct kvm_vcpu *vcpu)
set_debugreg(DR6_RESERVED, 6);
}
-void vmx_set_dr6(struct kvm_vcpu *vcpu, unsigned long val)
-{
- lockdep_assert_irqs_disabled();
- set_debugreg(vcpu->arch.dr6, 6);
-}
-
void vmx_set_dr7(struct kvm_vcpu *vcpu, unsigned long val)
{
vmcs_writel(GUEST_DR7, val);
@@ -7310,7 +7317,7 @@ static noinstr void vmx_vcpu_enter_exit(struct kvm_vcpu *vcpu,
vmx_l1d_flush(vcpu);
else if (static_branch_unlikely(&mmio_stale_data_clear) &&
kvm_arch_has_assigned_device(vcpu->kvm))
- mds_clear_cpu_buffers();
+ x86_clear_cpu_buffers();
vmx_disable_fb_clear(vmx);
@@ -7350,8 +7357,9 @@ out:
guest_state_exit_irqoff();
}
-fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu, bool force_immediate_exit)
+fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu, u64 run_flags)
{
+ bool force_immediate_exit = run_flags & KVM_RUN_FORCE_IMMEDIATE_EXIT;
struct vcpu_vmx *vmx = to_vmx(vcpu);
unsigned long cr3, cr4;
@@ -7396,6 +7404,12 @@ fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu, bool force_immediate_exit)
vmcs_writel(GUEST_RIP, vcpu->arch.regs[VCPU_REGS_RIP]);
vcpu->arch.regs_dirty = 0;
+ if (run_flags & KVM_RUN_LOAD_GUEST_DR6)
+ set_debugreg(vcpu->arch.dr6, 6);
+
+ if (run_flags & KVM_RUN_LOAD_DEBUGCTL)
+ vmx_reload_guest_debugctl(vcpu);
+
/*
* Refresh vmcs.HOST_CR3 if necessary. This must be done immediately
* prior to VM-Enter, as the kernel may load a new ASID (PCID) any time
diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h
index cf57fbf12104..a7e2de50d27f 100644
--- a/arch/x86/kvm/vmx/vmx.h
+++ b/arch/x86/kvm/vmx/vmx.h
@@ -435,6 +435,32 @@ static inline void vmx_set_intercept_for_msr(struct kvm_vcpu *vcpu, u32 msr,
void vmx_update_cpu_dirty_logging(struct kvm_vcpu *vcpu);
+u64 vmx_get_supported_debugctl(struct kvm_vcpu *vcpu, bool host_initiated);
+bool vmx_is_valid_debugctl(struct kvm_vcpu *vcpu, u64 data, bool host_initiated);
+
+static inline void vmx_guest_debugctl_write(struct kvm_vcpu *vcpu, u64 val)
+{
+ WARN_ON_ONCE(val & DEBUGCTLMSR_FREEZE_IN_SMM);
+
+ val |= vcpu->arch.host_debugctl & DEBUGCTLMSR_FREEZE_IN_SMM;
+ vmcs_write64(GUEST_IA32_DEBUGCTL, val);
+}
+
+static inline u64 vmx_guest_debugctl_read(void)
+{
+ return vmcs_read64(GUEST_IA32_DEBUGCTL) & ~DEBUGCTLMSR_FREEZE_IN_SMM;
+}
+
+static inline void vmx_reload_guest_debugctl(struct kvm_vcpu *vcpu)
+{
+ u64 val = vmcs_read64(GUEST_IA32_DEBUGCTL);
+
+ if (!((val ^ vcpu->arch.host_debugctl) & DEBUGCTLMSR_FREEZE_IN_SMM))
+ return;
+
+ vmx_guest_debugctl_write(vcpu, val & ~DEBUGCTLMSR_FREEZE_IN_SMM);
+}
+
/*
* Note, early Intel manuals have the write-low and read-high bitmap offsets
* the wrong way round. The bitmaps control MSRs 0x00000000-0x00001fff and
diff --git a/arch/x86/kvm/vmx/x86_ops.h b/arch/x86/kvm/vmx/x86_ops.h
index 4aba200f435d..5e4ce13ab305 100644
--- a/arch/x86/kvm/vmx/x86_ops.h
+++ b/arch/x86/kvm/vmx/x86_ops.h
@@ -21,7 +21,7 @@ void vmx_vm_destroy(struct kvm *kvm);
int vmx_vcpu_precreate(struct kvm *kvm);
int vmx_vcpu_create(struct kvm_vcpu *vcpu);
int vmx_vcpu_pre_run(struct kvm_vcpu *vcpu);
-fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu, bool force_immediate_exit);
+fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu, u64 run_flags);
void vmx_vcpu_free(struct kvm_vcpu *vcpu);
void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event);
void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu);
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index f378d479fea3..dbd295ef3eba 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1845,7 +1845,7 @@ static int __kvm_set_msr(struct kvm_vcpu *vcpu, u32 index, u64 data,
case MSR_KERNEL_GS_BASE:
case MSR_CSTAR:
case MSR_LSTAR:
- if (is_noncanonical_address(data, vcpu))
+ if (is_noncanonical_msr_address(data, vcpu))
return 1;
break;
case MSR_IA32_SYSENTER_EIP:
@@ -1862,7 +1862,7 @@ static int __kvm_set_msr(struct kvm_vcpu *vcpu, u32 index, u64 data,
* value, and that something deterministic happens if the guest
* invokes 64-bit SYSENTER.
*/
- data = __canonical_address(data, vcpu_virt_addr_bits(vcpu));
+ data = __canonical_address(data, max_host_virt_addr_bits());
break;
case MSR_TSC_AUX:
if (!kvm_is_supported_user_return_msr(MSR_TSC_AUX))
@@ -8608,6 +8608,12 @@ static gva_t emulator_get_untagged_addr(struct x86_emulate_ctxt *ctxt,
addr, flags);
}
+static bool emulator_is_canonical_addr(struct x86_emulate_ctxt *ctxt,
+ gva_t addr, unsigned int flags)
+{
+ return !is_noncanonical_address(addr, emul_to_vcpu(ctxt), flags);
+}
+
static const struct x86_emulate_ops emulate_ops = {
.vm_bugged = emulator_vm_bugged,
.read_gpr = emulator_read_gpr,
@@ -8654,6 +8660,7 @@ static const struct x86_emulate_ops emulate_ops = {
.triple_fault = emulator_triple_fault,
.set_xcr = emulator_set_xcr,
.get_untagged_addr = emulator_get_untagged_addr,
+ .is_canonical_addr = emulator_is_canonical_addr,
};
static void toggle_interruptibility(struct kvm_vcpu *vcpu, u32 mask)
@@ -10704,6 +10711,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
dm_request_for_irq_injection(vcpu) &&
kvm_cpu_accept_dm_intr(vcpu);
fastpath_t exit_fastpath;
+ u64 run_flags, debug_ctl;
bool req_immediate_exit = false;
@@ -10948,8 +10956,11 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
goto cancel_injection;
}
- if (req_immediate_exit)
+ run_flags = 0;
+ if (req_immediate_exit) {
+ run_flags |= KVM_RUN_FORCE_IMMEDIATE_EXIT;
kvm_make_request(KVM_REQ_EVENT, vcpu);
+ }
fpregs_assert_state_consistent();
if (test_thread_flag(TIF_NEED_FPU_LOAD))
@@ -10959,19 +10970,29 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
wrmsrl(MSR_IA32_XFD_ERR, vcpu->arch.guest_fpu.xfd_err);
if (unlikely(vcpu->arch.switch_db_regs)) {
- set_debugreg(0, 7);
+ set_debugreg(DR7_FIXED_1, 7);
set_debugreg(vcpu->arch.eff_db[0], 0);
set_debugreg(vcpu->arch.eff_db[1], 1);
set_debugreg(vcpu->arch.eff_db[2], 2);
set_debugreg(vcpu->arch.eff_db[3], 3);
/* When KVM_DEBUGREG_WONT_EXIT, dr6 is accessible in guest. */
if (unlikely(vcpu->arch.switch_db_regs & KVM_DEBUGREG_WONT_EXIT))
- kvm_x86_call(set_dr6)(vcpu, vcpu->arch.dr6);
+ run_flags |= KVM_RUN_LOAD_GUEST_DR6;
} else if (unlikely(hw_breakpoint_active())) {
- set_debugreg(0, 7);
+ set_debugreg(DR7_FIXED_1, 7);
}
- vcpu->arch.host_debugctl = get_debugctlmsr();
+ /*
+ * Refresh the host DEBUGCTL snapshot after disabling IRQs, as DEBUGCTL
+ * can be modified in IRQ context, e.g. via SMP function calls. Inform
+ * vendor code if any host-owned bits were changed, e.g. so that the
+ * value loaded into hardware while running the guest can be updated.
+ */
+ debug_ctl = get_debugctlmsr();
+ if ((debug_ctl ^ vcpu->arch.host_debugctl) & kvm_x86_ops.HOST_OWNED_DEBUGCTL &&
+ !vcpu->arch.guest_state_protected)
+ run_flags |= KVM_RUN_LOAD_DEBUGCTL;
+ vcpu->arch.host_debugctl = debug_ctl;
guest_timing_enter_irqoff();
@@ -10985,8 +11006,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
WARN_ON_ONCE((kvm_vcpu_apicv_activated(vcpu) != kvm_vcpu_apicv_active(vcpu)) &&
(kvm_get_apic_mode(vcpu) != LAPIC_MODE_DISABLED));
- exit_fastpath = kvm_x86_call(vcpu_run)(vcpu,
- req_immediate_exit);
+ exit_fastpath = kvm_x86_call(vcpu_run)(vcpu, run_flags);
if (likely(exit_fastpath != EXIT_FASTPATH_REENTER_GUEST))
break;
@@ -10998,6 +11018,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
break;
}
+ run_flags = 0;
+
/* Note, VM-Exits that go down the "slow" path are accounted below. */
++vcpu->stat.exits;
}
@@ -12888,11 +12910,11 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
mutex_unlock(&kvm->slots_lock);
}
kvm_unload_vcpu_mmus(kvm);
+ kvm_destroy_vcpus(kvm);
kvm_x86_call(vm_destroy)(kvm);
kvm_free_msr_filter(srcu_dereference_check(kvm->arch.msr_filter, &kvm->srcu, 1));
kvm_pic_destroy(kvm);
kvm_ioapic_destroy(kvm);
- kvm_destroy_vcpus(kvm);
kvfree(rcu_dereference_check(kvm->arch.apic_map, 1));
kfree(srcu_dereference_check(kvm->arch.pmu_event_filter, &kvm->srcu, 1));
kvm_mmu_uninit_vm(kvm);
@@ -13756,7 +13778,7 @@ int kvm_handle_invpcid(struct kvm_vcpu *vcpu, unsigned long type, gva_t gva)
* invalidation.
*/
if ((!pcid_enabled && (operand.pcid != 0)) ||
- is_noncanonical_address(operand.gla, vcpu)) {
+ is_noncanonical_invlpg_address(operand.gla, vcpu)) {
kvm_inject_gp(vcpu, 0);
return 1;
}
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index a84c48ef5278..ec623d23d13d 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -8,6 +8,7 @@
#include <asm/pvclock.h>
#include "kvm_cache_regs.h"
#include "kvm_emulate.h"
+#include "cpuid.h"
struct kvm_caps {
/* control of guest tsc rate supported? */
@@ -233,9 +234,52 @@ static inline u8 vcpu_virt_addr_bits(struct kvm_vcpu *vcpu)
return kvm_is_cr4_bit_set(vcpu, X86_CR4_LA57) ? 57 : 48;
}
-static inline bool is_noncanonical_address(u64 la, struct kvm_vcpu *vcpu)
+static inline u8 max_host_virt_addr_bits(void)
{
- return !__is_canonical_address(la, vcpu_virt_addr_bits(vcpu));
+ return kvm_cpu_cap_has(X86_FEATURE_LA57) ? 57 : 48;
+}
+
+/*
+ * x86 MSRs which contain linear addresses, x86 hidden segment bases, and
+ * IDT/GDT bases have static canonicality checks, the size of which depends
+ * only on the CPU's support for 5-level paging, rather than on the state of
+ * CR4.LA57. This applies to both WRMSR and to other instructions that set
+ * their values, e.g. SGDT.
+ *
+ * KVM passes through most of these MSRS and also doesn't intercept the
+ * instructions that set the hidden segment bases.
+ *
+ * Because of this, to be consistent with hardware, even if the guest doesn't
+ * have LA57 enabled in its CPUID, perform canonicality checks based on *host*
+ * support for 5 level paging.
+ *
+ * Finally, instructions which are related to MMU invalidation of a given
+ * linear address, also have a similar static canonical check on address.
+ * This allows for example to invalidate 5-level addresses of a guest from a
+ * host which uses 4-level paging.
+ */
+static inline bool is_noncanonical_address(u64 la, struct kvm_vcpu *vcpu,
+ unsigned int flags)
+{
+ if (flags & (X86EMUL_F_INVLPG | X86EMUL_F_MSR | X86EMUL_F_DT_LOAD))
+ return !__is_canonical_address(la, max_host_virt_addr_bits());
+ else
+ return !__is_canonical_address(la, vcpu_virt_addr_bits(vcpu));
+}
+
+static inline bool is_noncanonical_msr_address(u64 la, struct kvm_vcpu *vcpu)
+{
+ return is_noncanonical_address(la, vcpu, X86EMUL_F_MSR);
+}
+
+static inline bool is_noncanonical_base_address(u64 la, struct kvm_vcpu *vcpu)
+{
+ return is_noncanonical_address(la, vcpu, X86EMUL_F_DT_LOAD);
+}
+
+static inline bool is_noncanonical_invlpg_address(u64 la, struct kvm_vcpu *vcpu)
+{
+ return is_noncanonical_address(la, vcpu, X86EMUL_F_INVLPG);
}
static inline void vcpu_cache_mmio_info(struct kvm_vcpu *vcpu,
diff --git a/arch/x86/kvm/xen.c b/arch/x86/kvm/xen.c
index 622fe24da910..1fc2035df404 100644
--- a/arch/x86/kvm/xen.c
+++ b/arch/x86/kvm/xen.c
@@ -1472,7 +1472,7 @@ static bool kvm_xen_schedop_poll(struct kvm_vcpu *vcpu, bool longmode,
if (kvm_read_guest_virt(vcpu, (gva_t)sched_poll.ports, ports,
sched_poll.nr_ports * sizeof(*ports), &e)) {
*r = -EFAULT;
- return true;
+ goto out;
}
for (i = 0; i < sched_poll.nr_ports; i++) {
@@ -1916,8 +1916,19 @@ int kvm_xen_setup_evtchn(struct kvm *kvm,
{
struct kvm_vcpu *vcpu;
- if (ue->u.xen_evtchn.port >= max_evtchn_port(kvm))
- return -EINVAL;
+ /*
+ * Don't check for the port being within range of max_evtchn_port().
+ * Userspace can configure what ever targets it likes; events just won't
+ * be delivered if/while the target is invalid, just like userspace can
+ * configure MSIs which target non-existent APICs.
+ *
+ * This allow on Live Migration and Live Update, the IRQ routing table
+ * can be restored *independently* of other things like creating vCPUs,
+ * without imposing an ordering dependency on userspace. In this
+ * particular case, the problematic ordering would be with setting the
+ * Xen 'long mode' flag, which changes max_evtchn_port() to allow 4096
+ * instead of 1024 event channels.
+ */
/* We only support 2 level event channels for now */
if (ue->u.xen_evtchn.priority != KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL)