diff options
Diffstat (limited to 'arch/x86/kvm')
-rw-r--r-- | arch/x86/kvm/cpuid.c | 12 | ||||
-rw-r--r-- | arch/x86/kvm/cpuid.h | 1 | ||||
-rw-r--r-- | arch/x86/kvm/emulate.c | 15 | ||||
-rw-r--r-- | arch/x86/kvm/hyperv.c | 3 | ||||
-rw-r--r-- | arch/x86/kvm/kvm_emulate.h | 5 | ||||
-rw-r--r-- | arch/x86/kvm/mmu.h | 1 | ||||
-rw-r--r-- | arch/x86/kvm/mmu/mmu.c | 12 | ||||
-rw-r--r-- | arch/x86/kvm/mtrr.c | 1 | ||||
-rw-r--r-- | arch/x86/kvm/reverse_cpuid.h | 8 | ||||
-rw-r--r-- | arch/x86/kvm/svm/sev.c | 4 | ||||
-rw-r--r-- | arch/x86/kvm/svm/svm.c | 16 | ||||
-rw-r--r-- | arch/x86/kvm/svm/vmenter.S | 6 | ||||
-rw-r--r-- | arch/x86/kvm/vmx/hyperv.c | 1 | ||||
-rw-r--r-- | arch/x86/kvm/vmx/main.c | 3 | ||||
-rw-r--r-- | arch/x86/kvm/vmx/nested.c | 45 | ||||
-rw-r--r-- | arch/x86/kvm/vmx/pmu_intel.c | 10 | ||||
-rw-r--r-- | arch/x86/kvm/vmx/sgx.c | 5 | ||||
-rw-r--r-- | arch/x86/kvm/vmx/vmx.c | 68 | ||||
-rw-r--r-- | arch/x86/kvm/vmx/vmx.h | 26 | ||||
-rw-r--r-- | arch/x86/kvm/vmx/x86_ops.h | 2 | ||||
-rw-r--r-- | arch/x86/kvm/x86.c | 44 | ||||
-rw-r--r-- | arch/x86/kvm/x86.h | 48 | ||||
-rw-r--r-- | arch/x86/kvm/xen.c | 17 |
23 files changed, 261 insertions, 92 deletions
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index c92e43f2d0c4..8f587c5bb6bc 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -814,6 +814,7 @@ void kvm_set_cpu_caps(void) kvm_cpu_cap_mask(CPUID_8000_0021_EAX, F(NO_NESTED_DATA_BP) | F(LFENCE_RDTSC) | 0 /* SmmPgCfgLock */ | + F(VERW_CLEAR) | F(NULL_SEL_CLR_BASE) | F(AUTOIBRS) | 0 /* PrefetchCtlMsr */ | F(WRMSR_XX_BASE_NS) ); @@ -821,11 +822,19 @@ void kvm_set_cpu_caps(void) kvm_cpu_cap_check_and_set(X86_FEATURE_SBPB); kvm_cpu_cap_check_and_set(X86_FEATURE_IBPB_BRTYPE); kvm_cpu_cap_check_and_set(X86_FEATURE_SRSO_NO); + kvm_cpu_cap_check_and_set(X86_FEATURE_VERW_CLEAR); kvm_cpu_cap_init_kvm_defined(CPUID_8000_0022_EAX, F(PERFMON_V2) ); + kvm_cpu_cap_init_kvm_defined(CPUID_8000_0021_ECX, + F(TSA_SQ_NO) | F(TSA_L1_NO) + ); + + kvm_cpu_cap_check_and_set(X86_FEATURE_TSA_SQ_NO); + kvm_cpu_cap_check_and_set(X86_FEATURE_TSA_L1_NO); + /* * Synthesize "LFENCE is serializing" into the AMD-defined entry in * KVM's supported CPUID if the feature is reported as supported by the @@ -1376,8 +1385,9 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function) entry->eax = entry->ebx = entry->ecx = entry->edx = 0; break; case 0x80000021: - entry->ebx = entry->ecx = entry->edx = 0; + entry->ebx = entry->edx = 0; cpuid_entry_override(entry, CPUID_8000_0021_EAX); + cpuid_entry_override(entry, CPUID_8000_0021_ECX); break; /* AMD Extended Performance Monitoring and Debug */ case 0x80000022: { diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h index ad479cfb91bc..f16a7b2c2adc 100644 --- a/arch/x86/kvm/cpuid.h +++ b/arch/x86/kvm/cpuid.h @@ -2,7 +2,6 @@ #ifndef ARCH_X86_KVM_CPUID_H #define ARCH_X86_KVM_CPUID_H -#include "x86.h" #include "reverse_cpuid.h" #include <asm/cpu.h> #include <asm/processor.h> diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index e72aed25d721..60986f67c35a 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -651,9 +651,10 @@ static inline u8 ctxt_virt_addr_bits(struct x86_emulate_ctxt *ctxt) } static inline bool emul_is_noncanonical_address(u64 la, - struct x86_emulate_ctxt *ctxt) + struct x86_emulate_ctxt *ctxt, + unsigned int flags) { - return !__is_canonical_address(la, ctxt_virt_addr_bits(ctxt)); + return !ctxt->ops->is_canonical_addr(ctxt, la, flags); } /* @@ -1733,7 +1734,8 @@ static int __load_segment_descriptor(struct x86_emulate_ctxt *ctxt, if (ret != X86EMUL_CONTINUE) return ret; if (emul_is_noncanonical_address(get_desc_base(&seg_desc) | - ((u64)base3 << 32), ctxt)) + ((u64)base3 << 32), ctxt, + X86EMUL_F_DT_LOAD)) return emulate_gp(ctxt, err_code); } @@ -2516,8 +2518,8 @@ static int em_sysexit(struct x86_emulate_ctxt *ctxt) ss_sel = cs_sel + 8; cs.d = 0; cs.l = 1; - if (emul_is_noncanonical_address(rcx, ctxt) || - emul_is_noncanonical_address(rdx, ctxt)) + if (emul_is_noncanonical_address(rcx, ctxt, 0) || + emul_is_noncanonical_address(rdx, ctxt, 0)) return emulate_gp(ctxt, 0); break; } @@ -3494,7 +3496,8 @@ static int em_lgdt_lidt(struct x86_emulate_ctxt *ctxt, bool lgdt) if (rc != X86EMUL_CONTINUE) return rc; if (ctxt->mode == X86EMUL_MODE_PROT64 && - emul_is_noncanonical_address(desc_ptr.address, ctxt)) + emul_is_noncanonical_address(desc_ptr.address, ctxt, + X86EMUL_F_DT_LOAD)) return emulate_gp(ctxt, 0); if (lgdt) ctxt->ops->set_gdt(ctxt, &desc_ptr); diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c index 44c88537448c..79d06a8a5b7d 100644 --- a/arch/x86/kvm/hyperv.c +++ b/arch/x86/kvm/hyperv.c @@ -1980,6 +1980,9 @@ int kvm_hv_vcpu_flush_tlb(struct kvm_vcpu *vcpu) if (entries[i] == KVM_HV_TLB_FLUSHALL_ENTRY) goto out_flush_all; + if (is_noncanonical_invlpg_address(entries[i], vcpu)) + continue; + /* * Lower 12 bits of 'address' encode the number of additional * pages to flush. diff --git a/arch/x86/kvm/kvm_emulate.h b/arch/x86/kvm/kvm_emulate.h index 55a18e2f2dcd..10495fffb890 100644 --- a/arch/x86/kvm/kvm_emulate.h +++ b/arch/x86/kvm/kvm_emulate.h @@ -94,6 +94,8 @@ struct x86_instruction_info { #define X86EMUL_F_FETCH BIT(1) #define X86EMUL_F_IMPLICIT BIT(2) #define X86EMUL_F_INVLPG BIT(3) +#define X86EMUL_F_MSR BIT(4) +#define X86EMUL_F_DT_LOAD BIT(5) struct x86_emulate_ops { void (*vm_bugged)(struct x86_emulate_ctxt *ctxt); @@ -235,6 +237,9 @@ struct x86_emulate_ops { gva_t (*get_untagged_addr)(struct x86_emulate_ctxt *ctxt, gva_t addr, unsigned int flags); + + bool (*is_canonical_addr)(struct x86_emulate_ctxt *ctxt, gva_t addr, + unsigned int flags); }; /* Type, address-of, and value of an instruction's operand. */ diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h index 9dc5dd43ae7f..e9322358678b 100644 --- a/arch/x86/kvm/mmu.h +++ b/arch/x86/kvm/mmu.h @@ -4,6 +4,7 @@ #include <linux/kvm_host.h> #include "kvm_cache_regs.h" +#include "x86.h" #include "cpuid.h" extern bool __read_mostly enable_mmio_caching; diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index 4607610ef062..700926eb77df 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -6234,7 +6234,7 @@ void kvm_mmu_invalidate_addr(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, /* It's actually a GPA for vcpu->arch.guest_mmu. */ if (mmu != &vcpu->arch.guest_mmu) { /* INVLPG on a non-canonical address is a NOP according to the SDM. */ - if (is_noncanonical_address(addr, vcpu)) + if (is_noncanonical_invlpg_address(addr, vcpu)) return; kvm_x86_call(flush_tlb_gva)(vcpu, addr); @@ -7578,7 +7578,7 @@ static bool kvm_nx_huge_page_recovery_worker(void *data) return true; } -static void kvm_mmu_start_lpage_recovery(struct once *once) +static int kvm_mmu_start_lpage_recovery(struct once *once) { struct kvm_arch *ka = container_of(once, struct kvm_arch, nx_once); struct kvm *kvm = container_of(ka, struct kvm, arch); @@ -7590,12 +7590,13 @@ static void kvm_mmu_start_lpage_recovery(struct once *once) kvm, "kvm-nx-lpage-recovery"); if (IS_ERR(nx_thread)) - return; + return PTR_ERR(nx_thread); vhost_task_start(nx_thread); /* Make the task visible only once it is fully started. */ WRITE_ONCE(kvm->arch.nx_huge_page_recovery_thread, nx_thread); + return 0; } int kvm_mmu_post_init_vm(struct kvm *kvm) @@ -7603,10 +7604,7 @@ int kvm_mmu_post_init_vm(struct kvm *kvm) if (nx_hugepage_mitigation_hard_disabled) return 0; - call_once(&kvm->arch.nx_once, kvm_mmu_start_lpage_recovery); - if (!kvm->arch.nx_huge_page_recovery_thread) - return -ENOMEM; - return 0; + return call_once(&kvm->arch.nx_once, kvm_mmu_start_lpage_recovery); } void kvm_mmu_pre_destroy_vm(struct kvm *kvm) diff --git a/arch/x86/kvm/mtrr.c b/arch/x86/kvm/mtrr.c index 05490b9d8a43..6f74e2b27c1e 100644 --- a/arch/x86/kvm/mtrr.c +++ b/arch/x86/kvm/mtrr.c @@ -19,6 +19,7 @@ #include <asm/mtrr.h> #include "cpuid.h" +#include "x86.h" static u64 *find_mtrr(struct kvm_vcpu *vcpu, unsigned int msr) { diff --git a/arch/x86/kvm/reverse_cpuid.h b/arch/x86/kvm/reverse_cpuid.h index 0d17d6b70639..0ea847b82354 100644 --- a/arch/x86/kvm/reverse_cpuid.h +++ b/arch/x86/kvm/reverse_cpuid.h @@ -18,6 +18,7 @@ enum kvm_only_cpuid_leafs { CPUID_8000_0022_EAX, CPUID_7_2_EDX, CPUID_24_0_EBX, + CPUID_8000_0021_ECX, NR_KVM_CPU_CAPS, NKVMCAPINTS = NR_KVM_CPU_CAPS - NCAPINTS, @@ -68,6 +69,10 @@ enum kvm_only_cpuid_leafs { /* CPUID level 0x80000022 (EAX) */ #define KVM_X86_FEATURE_PERFMON_V2 KVM_X86_FEATURE(CPUID_8000_0022_EAX, 0) +/* CPUID level 0x80000021 (ECX) */ +#define KVM_X86_FEATURE_TSA_SQ_NO KVM_X86_FEATURE(CPUID_8000_0021_ECX, 1) +#define KVM_X86_FEATURE_TSA_L1_NO KVM_X86_FEATURE(CPUID_8000_0021_ECX, 2) + struct cpuid_reg { u32 function; u32 index; @@ -98,6 +103,7 @@ static const struct cpuid_reg reverse_cpuid[] = { [CPUID_8000_0022_EAX] = {0x80000022, 0, CPUID_EAX}, [CPUID_7_2_EDX] = { 7, 2, CPUID_EDX}, [CPUID_24_0_EBX] = { 0x24, 0, CPUID_EBX}, + [CPUID_8000_0021_ECX] = {0x80000021, 0, CPUID_ECX}, }; /* @@ -137,6 +143,8 @@ static __always_inline u32 __feature_translate(int x86_feature) KVM_X86_TRANSLATE_FEATURE(PERFMON_V2); KVM_X86_TRANSLATE_FEATURE(RRSBA_CTRL); KVM_X86_TRANSLATE_FEATURE(BHI_CTRL); + KVM_X86_TRANSLATE_FEATURE(TSA_SQ_NO); + KVM_X86_TRANSLATE_FEATURE(TSA_L1_NO); default: return x86_feature; } diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index 6154cb450b44..c4ae73541fc5 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -2058,6 +2058,10 @@ static int sev_check_source_vcpus(struct kvm *dst, struct kvm *src) struct kvm_vcpu *src_vcpu; unsigned long i; + if (src->created_vcpus != atomic_read(&src->online_vcpus) || + dst->created_vcpus != atomic_read(&dst->online_vcpus)) + return -EBUSY; + if (!sev_es_guest(src)) return 0; diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 7cbacd043921..800f781475c0 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -1483,7 +1483,7 @@ static void svm_clear_current_vmcb(struct vmcb *vmcb) { int i; - for_each_online_cpu(i) + for_each_possible_cpu(i) cmpxchg(per_cpu_ptr(&svm_data.current_vmcb, i), vmcb, NULL); } @@ -4226,9 +4226,9 @@ static noinstr void svm_vcpu_enter_exit(struct kvm_vcpu *vcpu, bool spec_ctrl_in guest_state_exit_irqoff(); } -static __no_kcsan fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu, - bool force_immediate_exit) +static __no_kcsan fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu, u64 run_flags) { + bool force_immediate_exit = run_flags & KVM_RUN_FORCE_IMMEDIATE_EXIT; struct vcpu_svm *svm = to_svm(vcpu); bool spec_ctrl_intercepted = msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL); @@ -4270,10 +4270,13 @@ static __no_kcsan fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu, svm_hv_update_vp_id(svm->vmcb, vcpu); /* - * Run with all-zero DR6 unless needed, so that we can get the exact cause - * of a #DB. + * Run with all-zero DR6 unless the guest can write DR6 freely, so that + * KVM can get the exact cause of a #DB. Note, loading guest DR6 from + * KVM's snapshot is only necessary when DR accesses won't exit. */ - if (likely(!(vcpu->arch.switch_db_regs & KVM_DEBUGREG_WONT_EXIT))) + if (unlikely(run_flags & KVM_RUN_LOAD_GUEST_DR6)) + svm_set_dr6(vcpu, vcpu->arch.dr6); + else if (likely(!(vcpu->arch.switch_db_regs & KVM_DEBUGREG_WONT_EXIT))) svm_set_dr6(vcpu, DR6_ACTIVE_LOW); clgi(); @@ -5084,7 +5087,6 @@ static struct kvm_x86_ops svm_x86_ops __initdata = { .set_idt = svm_set_idt, .get_gdt = svm_get_gdt, .set_gdt = svm_set_gdt, - .set_dr6 = svm_set_dr6, .set_dr7 = svm_set_dr7, .sync_dirty_debug_regs = svm_sync_dirty_debug_regs, .cache_reg = svm_cache_reg, diff --git a/arch/x86/kvm/svm/vmenter.S b/arch/x86/kvm/svm/vmenter.S index 0c61153b275f..235c4af6b692 100644 --- a/arch/x86/kvm/svm/vmenter.S +++ b/arch/x86/kvm/svm/vmenter.S @@ -169,6 +169,9 @@ SYM_FUNC_START(__svm_vcpu_run) #endif mov VCPU_RDI(%_ASM_DI), %_ASM_DI + /* Clobbers EFLAGS.ZF */ + VM_CLEAR_CPU_BUFFERS + /* Enter guest mode */ 3: vmrun %_ASM_AX 4: @@ -335,6 +338,9 @@ SYM_FUNC_START(__svm_sev_es_vcpu_run) mov SVM_current_vmcb(%rdi), %rax mov KVM_VMCB_pa(%rax), %rax + /* Clobbers EFLAGS.ZF */ + VM_CLEAR_CPU_BUFFERS + /* Enter guest mode */ 1: vmrun %rax 2: diff --git a/arch/x86/kvm/vmx/hyperv.c b/arch/x86/kvm/vmx/hyperv.c index fab6a1ad98dc..fa41d036acd4 100644 --- a/arch/x86/kvm/vmx/hyperv.c +++ b/arch/x86/kvm/vmx/hyperv.c @@ -4,6 +4,7 @@ #include <linux/errno.h> #include <linux/smp.h> +#include "x86.h" #include "../cpuid.h" #include "hyperv.h" #include "nested.h" diff --git a/arch/x86/kvm/vmx/main.c b/arch/x86/kvm/vmx/main.c index 47476fcc179a..3f83e36a657b 100644 --- a/arch/x86/kvm/vmx/main.c +++ b/arch/x86/kvm/vmx/main.c @@ -42,6 +42,8 @@ struct kvm_x86_ops vt_x86_ops __initdata = { .vcpu_load = vmx_vcpu_load, .vcpu_put = vmx_vcpu_put, + .HOST_OWNED_DEBUGCTL = DEBUGCTLMSR_FREEZE_IN_SMM, + .update_exception_bitmap = vmx_update_exception_bitmap, .get_feature_msr = vmx_get_feature_msr, .get_msr = vmx_get_msr, @@ -60,7 +62,6 @@ struct kvm_x86_ops vt_x86_ops __initdata = { .set_idt = vmx_set_idt, .get_gdt = vmx_get_gdt, .set_gdt = vmx_set_gdt, - .set_dr6 = vmx_set_dr6, .set_dr7 = vmx_set_dr7, .sync_dirty_debug_regs = vmx_sync_dirty_debug_regs, .cache_reg = vmx_cache_reg, diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index 22bee8a71144..60bd2791d933 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -7,6 +7,7 @@ #include <asm/debugreg.h> #include <asm/mmu_context.h> +#include "x86.h" #include "cpuid.h" #include "hyperv.h" #include "mmu.h" @@ -16,7 +17,6 @@ #include "sgx.h" #include "trace.h" #include "vmx.h" -#include "x86.h" #include "smm.h" static bool __read_mostly enable_shadow_vmcs = 1; @@ -2653,10 +2653,11 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, if (vmx->nested.nested_run_pending && (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS)) { kvm_set_dr(vcpu, 7, vmcs12->guest_dr7); - vmcs_write64(GUEST_IA32_DEBUGCTL, vmcs12->guest_ia32_debugctl); + vmx_guest_debugctl_write(vcpu, vmcs12->guest_ia32_debugctl & + vmx_get_supported_debugctl(vcpu, false)); } else { kvm_set_dr(vcpu, 7, vcpu->arch.dr7); - vmcs_write64(GUEST_IA32_DEBUGCTL, vmx->nested.pre_vmenter_debugctl); + vmx_guest_debugctl_write(vcpu, vmx->nested.pre_vmenter_debugctl); } if (kvm_mpx_supported() && (!vmx->nested.nested_run_pending || !(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_BNDCFGS))) @@ -3020,8 +3021,8 @@ static int nested_vmx_check_host_state(struct kvm_vcpu *vcpu, CC(!kvm_vcpu_is_legal_cr3(vcpu, vmcs12->host_cr3))) return -EINVAL; - if (CC(is_noncanonical_address(vmcs12->host_ia32_sysenter_esp, vcpu)) || - CC(is_noncanonical_address(vmcs12->host_ia32_sysenter_eip, vcpu))) + if (CC(is_noncanonical_msr_address(vmcs12->host_ia32_sysenter_esp, vcpu)) || + CC(is_noncanonical_msr_address(vmcs12->host_ia32_sysenter_eip, vcpu))) return -EINVAL; if ((vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_PAT) && @@ -3055,12 +3056,12 @@ static int nested_vmx_check_host_state(struct kvm_vcpu *vcpu, CC(vmcs12->host_ss_selector == 0 && !ia32e)) return -EINVAL; - if (CC(is_noncanonical_address(vmcs12->host_fs_base, vcpu)) || - CC(is_noncanonical_address(vmcs12->host_gs_base, vcpu)) || - CC(is_noncanonical_address(vmcs12->host_gdtr_base, vcpu)) || - CC(is_noncanonical_address(vmcs12->host_idtr_base, vcpu)) || - CC(is_noncanonical_address(vmcs12->host_tr_base, vcpu)) || - CC(is_noncanonical_address(vmcs12->host_rip, vcpu))) + if (CC(is_noncanonical_base_address(vmcs12->host_fs_base, vcpu)) || + CC(is_noncanonical_base_address(vmcs12->host_gs_base, vcpu)) || + CC(is_noncanonical_base_address(vmcs12->host_gdtr_base, vcpu)) || + CC(is_noncanonical_base_address(vmcs12->host_idtr_base, vcpu)) || + CC(is_noncanonical_base_address(vmcs12->host_tr_base, vcpu)) || + CC(is_noncanonical_address(vmcs12->host_rip, vcpu, 0))) return -EINVAL; /* @@ -3135,7 +3136,8 @@ static int nested_vmx_check_guest_state(struct kvm_vcpu *vcpu, return -EINVAL; if ((vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS) && - CC(!kvm_dr7_valid(vmcs12->guest_dr7))) + (CC(!kvm_dr7_valid(vmcs12->guest_dr7)) || + CC(!vmx_is_valid_debugctl(vcpu, vmcs12->guest_ia32_debugctl, false)))) return -EINVAL; if ((vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_PAT) && @@ -3178,7 +3180,7 @@ static int nested_vmx_check_guest_state(struct kvm_vcpu *vcpu, } if ((vmcs12->vm_entry_controls & VM_ENTRY_LOAD_BNDCFGS) && - (CC(is_noncanonical_address(vmcs12->guest_bndcfgs & PAGE_MASK, vcpu)) || + (CC(is_noncanonical_msr_address(vmcs12->guest_bndcfgs & PAGE_MASK, vcpu)) || CC((vmcs12->guest_bndcfgs & MSR_IA32_BNDCFGS_RSVD)))) return -EINVAL; @@ -3525,7 +3527,7 @@ enum nvmx_vmentry_status nested_vmx_enter_non_root_mode(struct kvm_vcpu *vcpu, if (!vmx->nested.nested_run_pending || !(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS)) - vmx->nested.pre_vmenter_debugctl = vmcs_read64(GUEST_IA32_DEBUGCTL); + vmx->nested.pre_vmenter_debugctl = vmx_guest_debugctl_read(); if (kvm_mpx_supported() && (!vmx->nested.nested_run_pending || !(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_BNDCFGS))) @@ -4576,6 +4578,12 @@ static void sync_vmcs02_to_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) (vmcs12->vm_entry_controls & ~VM_ENTRY_IA32E_MODE) | (vm_entry_controls_get(to_vmx(vcpu)) & VM_ENTRY_IA32E_MODE); + /* + * Note! Save DR7, but intentionally don't grab DEBUGCTL from vmcs02. + * Writes to DEBUGCTL that aren't intercepted by L1 are immediately + * propagated to vmcs12 (see vmx_set_msr()), as the value loaded into + * vmcs02 doesn't strictly track vmcs12. + */ if (vmcs12->vm_exit_controls & VM_EXIT_SAVE_DEBUG_CONTROLS) vmcs12->guest_dr7 = vcpu->arch.dr7; @@ -4766,7 +4774,7 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu, __vmx_set_segment(vcpu, &seg, VCPU_SREG_LDTR); kvm_set_dr(vcpu, 7, 0x400); - vmcs_write64(GUEST_IA32_DEBUGCTL, 0); + vmx_guest_debugctl_write(vcpu, 0); if (nested_vmx_load_msr(vcpu, vmcs12->vm_exit_msr_load_addr, vmcs12->vm_exit_msr_load_count)) @@ -4821,6 +4829,9 @@ static void nested_vmx_restore_host_state(struct kvm_vcpu *vcpu) WARN_ON(kvm_set_dr(vcpu, 7, vmcs_readl(GUEST_DR7))); } + /* Reload DEBUGCTL to ensure vmcs01 has a fresh FREEZE_IN_SMM value. */ + vmx_reload_guest_debugctl(vcpu); + /* * Note that calling vmx_set_{efer,cr0,cr4} is important as they * handle a variety of side effects to KVM's software model. @@ -5172,7 +5183,7 @@ int get_vmx_mem_address(struct kvm_vcpu *vcpu, unsigned long exit_qualification, * non-canonical form. This is the only check on the memory * destination for long mode! */ - exn = is_noncanonical_address(*ret, vcpu); + exn = is_noncanonical_address(*ret, vcpu, 0); } else { /* * When not in long mode, the virtual/linear address is @@ -5983,7 +5994,7 @@ static int handle_invvpid(struct kvm_vcpu *vcpu) * invalidation. */ if (!operand.vpid || - is_noncanonical_address(operand.gla, vcpu)) + is_noncanonical_invlpg_address(operand.gla, vcpu)) return nested_vmx_fail(vcpu, VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID); vpid_sync_vcpu_addr(vpid02, operand.gla); diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c index 83382a4d1d66..a5edc623166a 100644 --- a/arch/x86/kvm/vmx/pmu_intel.c +++ b/arch/x86/kvm/vmx/pmu_intel.c @@ -365,7 +365,7 @@ static int intel_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) } break; case MSR_IA32_DS_AREA: - if (is_noncanonical_address(data, vcpu)) + if (is_noncanonical_msr_address(data, vcpu)) return 1; pmu->ds_area = data; @@ -605,11 +605,11 @@ static void intel_pmu_reset(struct kvm_vcpu *vcpu) */ static void intel_pmu_legacy_freezing_lbrs_on_pmi(struct kvm_vcpu *vcpu) { - u64 data = vmcs_read64(GUEST_IA32_DEBUGCTL); + u64 data = vmx_guest_debugctl_read(); if (data & DEBUGCTLMSR_FREEZE_LBRS_ON_PMI) { data &= ~DEBUGCTLMSR_LBR; - vmcs_write64(GUEST_IA32_DEBUGCTL, data); + vmx_guest_debugctl_write(vcpu, data); } } @@ -679,7 +679,7 @@ void vmx_passthrough_lbr_msrs(struct kvm_vcpu *vcpu) if (!lbr_desc->event) { vmx_disable_lbr_msrs_passthrough(vcpu); - if (vmcs_read64(GUEST_IA32_DEBUGCTL) & DEBUGCTLMSR_LBR) + if (vmx_guest_debugctl_read() & DEBUGCTLMSR_LBR) goto warn; if (test_bit(INTEL_PMC_IDX_FIXED_VLBR, pmu->pmc_in_use)) goto warn; @@ -701,7 +701,7 @@ warn: static void intel_pmu_cleanup(struct kvm_vcpu *vcpu) { - if (!(vmcs_read64(GUEST_IA32_DEBUGCTL) & DEBUGCTLMSR_LBR)) + if (!(vmx_guest_debugctl_read() & DEBUGCTLMSR_LBR)) intel_pmu_release_guest_lbr_event(vcpu); } diff --git a/arch/x86/kvm/vmx/sgx.c b/arch/x86/kvm/vmx/sgx.c index a3c3d2a51f47..b352a3ba7354 100644 --- a/arch/x86/kvm/vmx/sgx.c +++ b/arch/x86/kvm/vmx/sgx.c @@ -4,12 +4,11 @@ #include <asm/sgx.h> -#include "cpuid.h" +#include "x86.h" #include "kvm_cache_regs.h" #include "nested.h" #include "sgx.h" #include "vmx.h" -#include "x86.h" bool __read_mostly enable_sgx = 1; module_param_named(sgx, enable_sgx, bool, 0444); @@ -38,7 +37,7 @@ static int sgx_get_encls_gva(struct kvm_vcpu *vcpu, unsigned long offset, fault = true; } else if (likely(is_64_bit_mode(vcpu))) { *gva = vmx_get_untagged_addr(vcpu, *gva, 0); - fault = is_noncanonical_address(*gva, vcpu); + fault = is_noncanonical_address(*gva, vcpu, 0); } else { *gva &= 0xffffffff; fault = (s.unusable) || diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index a3d45b01dbad..6c185a260c5b 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -770,8 +770,11 @@ void vmx_emergency_disable_virtualization_cpu(void) return; list_for_each_entry(v, &per_cpu(loaded_vmcss_on_cpu, cpu), - loaded_vmcss_on_cpu_link) + loaded_vmcss_on_cpu_link) { vmcs_clear(v->vmcs); + if (v->shadow_vmcs) + vmcs_clear(v->shadow_vmcs); + } kvm_cpu_vmxoff(); } @@ -2145,7 +2148,7 @@ int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) msr_info->data = vmx->pt_desc.guest.addr_a[index / 2]; break; case MSR_IA32_DEBUGCTLMSR: - msr_info->data = vmcs_read64(GUEST_IA32_DEBUGCTL); + msr_info->data = vmx_guest_debugctl_read(); break; default: find_uret_msr: @@ -2170,7 +2173,7 @@ static u64 nested_vmx_truncate_sysenter_addr(struct kvm_vcpu *vcpu, return (unsigned long)data; } -static u64 vmx_get_supported_debugctl(struct kvm_vcpu *vcpu, bool host_initiated) +u64 vmx_get_supported_debugctl(struct kvm_vcpu *vcpu, bool host_initiated) { u64 debugctl = 0; @@ -2182,9 +2185,25 @@ static u64 vmx_get_supported_debugctl(struct kvm_vcpu *vcpu, bool host_initiated (host_initiated || intel_pmu_lbr_is_enabled(vcpu))) debugctl |= DEBUGCTLMSR_LBR | DEBUGCTLMSR_FREEZE_LBRS_ON_PMI; + if (boot_cpu_has(X86_FEATURE_RTM) && + (host_initiated || guest_cpuid_has(vcpu, X86_FEATURE_RTM))) + debugctl |= DEBUGCTLMSR_RTM_DEBUG; + return debugctl; } +bool vmx_is_valid_debugctl(struct kvm_vcpu *vcpu, u64 data, bool host_initiated) +{ + u64 invalid; + + invalid = data & ~vmx_get_supported_debugctl(vcpu, host_initiated); + if (invalid & (DEBUGCTLMSR_BTF | DEBUGCTLMSR_LBR)) { + kvm_pr_unimpl_wrmsr(vcpu, MSR_IA32_DEBUGCTLMSR, data); + invalid &= ~(DEBUGCTLMSR_BTF | DEBUGCTLMSR_LBR); + } + return !invalid; +} + /* * Writes msr value into the appropriate "register". * Returns 0 on success, non-0 otherwise. @@ -2253,35 +2272,28 @@ int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) } vmcs_writel(GUEST_SYSENTER_ESP, data); break; - case MSR_IA32_DEBUGCTLMSR: { - u64 invalid; - - invalid = data & ~vmx_get_supported_debugctl(vcpu, msr_info->host_initiated); - if (invalid & (DEBUGCTLMSR_BTF|DEBUGCTLMSR_LBR)) { - kvm_pr_unimpl_wrmsr(vcpu, msr_index, data); - data &= ~(DEBUGCTLMSR_BTF|DEBUGCTLMSR_LBR); - invalid &= ~(DEBUGCTLMSR_BTF|DEBUGCTLMSR_LBR); - } - - if (invalid) + case MSR_IA32_DEBUGCTLMSR: + if (!vmx_is_valid_debugctl(vcpu, data, msr_info->host_initiated)) return 1; + data &= vmx_get_supported_debugctl(vcpu, msr_info->host_initiated); + if (is_guest_mode(vcpu) && get_vmcs12(vcpu)->vm_exit_controls & VM_EXIT_SAVE_DEBUG_CONTROLS) get_vmcs12(vcpu)->guest_ia32_debugctl = data; - vmcs_write64(GUEST_IA32_DEBUGCTL, data); + vmx_guest_debugctl_write(vcpu, data); + if (intel_pmu_lbr_is_enabled(vcpu) && !to_vmx(vcpu)->lbr_desc.event && (data & DEBUGCTLMSR_LBR)) intel_pmu_create_guest_lbr_event(vcpu); return 0; - } case MSR_IA32_BNDCFGS: if (!kvm_mpx_supported() || (!msr_info->host_initiated && !guest_cpuid_has(vcpu, X86_FEATURE_MPX))) return 1; - if (is_noncanonical_address(data & PAGE_MASK, vcpu) || + if (is_noncanonical_msr_address(data & PAGE_MASK, vcpu) || (data & MSR_IA32_BNDCFGS_RSVD)) return 1; @@ -2446,7 +2458,7 @@ int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) index = msr_info->index - MSR_IA32_RTIT_ADDR0_A; if (index >= 2 * vmx->pt_desc.num_address_ranges) return 1; - if (is_noncanonical_address(data, vcpu)) + if (is_noncanonical_msr_address(data, vcpu)) return 1; if (index % 2) vmx->pt_desc.guest.addr_b[index / 2] = data; @@ -4820,7 +4832,8 @@ static void init_vmcs(struct vcpu_vmx *vmx) vmcs_write32(GUEST_SYSENTER_CS, 0); vmcs_writel(GUEST_SYSENTER_ESP, 0); vmcs_writel(GUEST_SYSENTER_EIP, 0); - vmcs_write64(GUEST_IA32_DEBUGCTL, 0); + + vmx_guest_debugctl_write(&vmx->vcpu, 0); if (cpu_has_vmx_tpr_shadow()) { vmcs_write64(VIRTUAL_APIC_PAGE_ADDR, 0); @@ -5627,12 +5640,6 @@ void vmx_sync_dirty_debug_regs(struct kvm_vcpu *vcpu) set_debugreg(DR6_RESERVED, 6); } -void vmx_set_dr6(struct kvm_vcpu *vcpu, unsigned long val) -{ - lockdep_assert_irqs_disabled(); - set_debugreg(vcpu->arch.dr6, 6); -} - void vmx_set_dr7(struct kvm_vcpu *vcpu, unsigned long val) { vmcs_writel(GUEST_DR7, val); @@ -7310,7 +7317,7 @@ static noinstr void vmx_vcpu_enter_exit(struct kvm_vcpu *vcpu, vmx_l1d_flush(vcpu); else if (static_branch_unlikely(&mmio_stale_data_clear) && kvm_arch_has_assigned_device(vcpu->kvm)) - mds_clear_cpu_buffers(); + x86_clear_cpu_buffers(); vmx_disable_fb_clear(vmx); @@ -7350,8 +7357,9 @@ out: guest_state_exit_irqoff(); } -fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu, bool force_immediate_exit) +fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu, u64 run_flags) { + bool force_immediate_exit = run_flags & KVM_RUN_FORCE_IMMEDIATE_EXIT; struct vcpu_vmx *vmx = to_vmx(vcpu); unsigned long cr3, cr4; @@ -7396,6 +7404,12 @@ fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu, bool force_immediate_exit) vmcs_writel(GUEST_RIP, vcpu->arch.regs[VCPU_REGS_RIP]); vcpu->arch.regs_dirty = 0; + if (run_flags & KVM_RUN_LOAD_GUEST_DR6) + set_debugreg(vcpu->arch.dr6, 6); + + if (run_flags & KVM_RUN_LOAD_DEBUGCTL) + vmx_reload_guest_debugctl(vcpu); + /* * Refresh vmcs.HOST_CR3 if necessary. This must be done immediately * prior to VM-Enter, as the kernel may load a new ASID (PCID) any time diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h index cf57fbf12104..a7e2de50d27f 100644 --- a/arch/x86/kvm/vmx/vmx.h +++ b/arch/x86/kvm/vmx/vmx.h @@ -435,6 +435,32 @@ static inline void vmx_set_intercept_for_msr(struct kvm_vcpu *vcpu, u32 msr, void vmx_update_cpu_dirty_logging(struct kvm_vcpu *vcpu); +u64 vmx_get_supported_debugctl(struct kvm_vcpu *vcpu, bool host_initiated); +bool vmx_is_valid_debugctl(struct kvm_vcpu *vcpu, u64 data, bool host_initiated); + +static inline void vmx_guest_debugctl_write(struct kvm_vcpu *vcpu, u64 val) +{ + WARN_ON_ONCE(val & DEBUGCTLMSR_FREEZE_IN_SMM); + + val |= vcpu->arch.host_debugctl & DEBUGCTLMSR_FREEZE_IN_SMM; + vmcs_write64(GUEST_IA32_DEBUGCTL, val); +} + +static inline u64 vmx_guest_debugctl_read(void) +{ + return vmcs_read64(GUEST_IA32_DEBUGCTL) & ~DEBUGCTLMSR_FREEZE_IN_SMM; +} + +static inline void vmx_reload_guest_debugctl(struct kvm_vcpu *vcpu) +{ + u64 val = vmcs_read64(GUEST_IA32_DEBUGCTL); + + if (!((val ^ vcpu->arch.host_debugctl) & DEBUGCTLMSR_FREEZE_IN_SMM)) + return; + + vmx_guest_debugctl_write(vcpu, val & ~DEBUGCTLMSR_FREEZE_IN_SMM); +} + /* * Note, early Intel manuals have the write-low and read-high bitmap offsets * the wrong way round. The bitmaps control MSRs 0x00000000-0x00001fff and diff --git a/arch/x86/kvm/vmx/x86_ops.h b/arch/x86/kvm/vmx/x86_ops.h index 4aba200f435d..5e4ce13ab305 100644 --- a/arch/x86/kvm/vmx/x86_ops.h +++ b/arch/x86/kvm/vmx/x86_ops.h @@ -21,7 +21,7 @@ void vmx_vm_destroy(struct kvm *kvm); int vmx_vcpu_precreate(struct kvm *kvm); int vmx_vcpu_create(struct kvm_vcpu *vcpu); int vmx_vcpu_pre_run(struct kvm_vcpu *vcpu); -fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu, bool force_immediate_exit); +fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu, u64 run_flags); void vmx_vcpu_free(struct kvm_vcpu *vcpu); void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event); void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index f378d479fea3..dbd295ef3eba 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1845,7 +1845,7 @@ static int __kvm_set_msr(struct kvm_vcpu *vcpu, u32 index, u64 data, case MSR_KERNEL_GS_BASE: case MSR_CSTAR: case MSR_LSTAR: - if (is_noncanonical_address(data, vcpu)) + if (is_noncanonical_msr_address(data, vcpu)) return 1; break; case MSR_IA32_SYSENTER_EIP: @@ -1862,7 +1862,7 @@ static int __kvm_set_msr(struct kvm_vcpu *vcpu, u32 index, u64 data, * value, and that something deterministic happens if the guest * invokes 64-bit SYSENTER. */ - data = __canonical_address(data, vcpu_virt_addr_bits(vcpu)); + data = __canonical_address(data, max_host_virt_addr_bits()); break; case MSR_TSC_AUX: if (!kvm_is_supported_user_return_msr(MSR_TSC_AUX)) @@ -8608,6 +8608,12 @@ static gva_t emulator_get_untagged_addr(struct x86_emulate_ctxt *ctxt, addr, flags); } +static bool emulator_is_canonical_addr(struct x86_emulate_ctxt *ctxt, + gva_t addr, unsigned int flags) +{ + return !is_noncanonical_address(addr, emul_to_vcpu(ctxt), flags); +} + static const struct x86_emulate_ops emulate_ops = { .vm_bugged = emulator_vm_bugged, .read_gpr = emulator_read_gpr, @@ -8654,6 +8660,7 @@ static const struct x86_emulate_ops emulate_ops = { .triple_fault = emulator_triple_fault, .set_xcr = emulator_set_xcr, .get_untagged_addr = emulator_get_untagged_addr, + .is_canonical_addr = emulator_is_canonical_addr, }; static void toggle_interruptibility(struct kvm_vcpu *vcpu, u32 mask) @@ -10704,6 +10711,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) dm_request_for_irq_injection(vcpu) && kvm_cpu_accept_dm_intr(vcpu); fastpath_t exit_fastpath; + u64 run_flags, debug_ctl; bool req_immediate_exit = false; @@ -10948,8 +10956,11 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) goto cancel_injection; } - if (req_immediate_exit) + run_flags = 0; + if (req_immediate_exit) { + run_flags |= KVM_RUN_FORCE_IMMEDIATE_EXIT; kvm_make_request(KVM_REQ_EVENT, vcpu); + } fpregs_assert_state_consistent(); if (test_thread_flag(TIF_NEED_FPU_LOAD)) @@ -10959,19 +10970,29 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) wrmsrl(MSR_IA32_XFD_ERR, vcpu->arch.guest_fpu.xfd_err); if (unlikely(vcpu->arch.switch_db_regs)) { - set_debugreg(0, 7); + set_debugreg(DR7_FIXED_1, 7); set_debugreg(vcpu->arch.eff_db[0], 0); set_debugreg(vcpu->arch.eff_db[1], 1); set_debugreg(vcpu->arch.eff_db[2], 2); set_debugreg(vcpu->arch.eff_db[3], 3); /* When KVM_DEBUGREG_WONT_EXIT, dr6 is accessible in guest. */ if (unlikely(vcpu->arch.switch_db_regs & KVM_DEBUGREG_WONT_EXIT)) - kvm_x86_call(set_dr6)(vcpu, vcpu->arch.dr6); + run_flags |= KVM_RUN_LOAD_GUEST_DR6; } else if (unlikely(hw_breakpoint_active())) { - set_debugreg(0, 7); + set_debugreg(DR7_FIXED_1, 7); } - vcpu->arch.host_debugctl = get_debugctlmsr(); + /* + * Refresh the host DEBUGCTL snapshot after disabling IRQs, as DEBUGCTL + * can be modified in IRQ context, e.g. via SMP function calls. Inform + * vendor code if any host-owned bits were changed, e.g. so that the + * value loaded into hardware while running the guest can be updated. + */ + debug_ctl = get_debugctlmsr(); + if ((debug_ctl ^ vcpu->arch.host_debugctl) & kvm_x86_ops.HOST_OWNED_DEBUGCTL && + !vcpu->arch.guest_state_protected) + run_flags |= KVM_RUN_LOAD_DEBUGCTL; + vcpu->arch.host_debugctl = debug_ctl; guest_timing_enter_irqoff(); @@ -10985,8 +11006,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) WARN_ON_ONCE((kvm_vcpu_apicv_activated(vcpu) != kvm_vcpu_apicv_active(vcpu)) && (kvm_get_apic_mode(vcpu) != LAPIC_MODE_DISABLED)); - exit_fastpath = kvm_x86_call(vcpu_run)(vcpu, - req_immediate_exit); + exit_fastpath = kvm_x86_call(vcpu_run)(vcpu, run_flags); if (likely(exit_fastpath != EXIT_FASTPATH_REENTER_GUEST)) break; @@ -10998,6 +11018,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) break; } + run_flags = 0; + /* Note, VM-Exits that go down the "slow" path are accounted below. */ ++vcpu->stat.exits; } @@ -12888,11 +12910,11 @@ void kvm_arch_destroy_vm(struct kvm *kvm) mutex_unlock(&kvm->slots_lock); } kvm_unload_vcpu_mmus(kvm); + kvm_destroy_vcpus(kvm); kvm_x86_call(vm_destroy)(kvm); kvm_free_msr_filter(srcu_dereference_check(kvm->arch.msr_filter, &kvm->srcu, 1)); kvm_pic_destroy(kvm); kvm_ioapic_destroy(kvm); - kvm_destroy_vcpus(kvm); kvfree(rcu_dereference_check(kvm->arch.apic_map, 1)); kfree(srcu_dereference_check(kvm->arch.pmu_event_filter, &kvm->srcu, 1)); kvm_mmu_uninit_vm(kvm); @@ -13756,7 +13778,7 @@ int kvm_handle_invpcid(struct kvm_vcpu *vcpu, unsigned long type, gva_t gva) * invalidation. */ if ((!pcid_enabled && (operand.pcid != 0)) || - is_noncanonical_address(operand.gla, vcpu)) { + is_noncanonical_invlpg_address(operand.gla, vcpu)) { kvm_inject_gp(vcpu, 0); return 1; } diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h index a84c48ef5278..ec623d23d13d 100644 --- a/arch/x86/kvm/x86.h +++ b/arch/x86/kvm/x86.h @@ -8,6 +8,7 @@ #include <asm/pvclock.h> #include "kvm_cache_regs.h" #include "kvm_emulate.h" +#include "cpuid.h" struct kvm_caps { /* control of guest tsc rate supported? */ @@ -233,9 +234,52 @@ static inline u8 vcpu_virt_addr_bits(struct kvm_vcpu *vcpu) return kvm_is_cr4_bit_set(vcpu, X86_CR4_LA57) ? 57 : 48; } -static inline bool is_noncanonical_address(u64 la, struct kvm_vcpu *vcpu) +static inline u8 max_host_virt_addr_bits(void) { - return !__is_canonical_address(la, vcpu_virt_addr_bits(vcpu)); + return kvm_cpu_cap_has(X86_FEATURE_LA57) ? 57 : 48; +} + +/* + * x86 MSRs which contain linear addresses, x86 hidden segment bases, and + * IDT/GDT bases have static canonicality checks, the size of which depends + * only on the CPU's support for 5-level paging, rather than on the state of + * CR4.LA57. This applies to both WRMSR and to other instructions that set + * their values, e.g. SGDT. + * + * KVM passes through most of these MSRS and also doesn't intercept the + * instructions that set the hidden segment bases. + * + * Because of this, to be consistent with hardware, even if the guest doesn't + * have LA57 enabled in its CPUID, perform canonicality checks based on *host* + * support for 5 level paging. + * + * Finally, instructions which are related to MMU invalidation of a given + * linear address, also have a similar static canonical check on address. + * This allows for example to invalidate 5-level addresses of a guest from a + * host which uses 4-level paging. + */ +static inline bool is_noncanonical_address(u64 la, struct kvm_vcpu *vcpu, + unsigned int flags) +{ + if (flags & (X86EMUL_F_INVLPG | X86EMUL_F_MSR | X86EMUL_F_DT_LOAD)) + return !__is_canonical_address(la, max_host_virt_addr_bits()); + else + return !__is_canonical_address(la, vcpu_virt_addr_bits(vcpu)); +} + +static inline bool is_noncanonical_msr_address(u64 la, struct kvm_vcpu *vcpu) +{ + return is_noncanonical_address(la, vcpu, X86EMUL_F_MSR); +} + +static inline bool is_noncanonical_base_address(u64 la, struct kvm_vcpu *vcpu) +{ + return is_noncanonical_address(la, vcpu, X86EMUL_F_DT_LOAD); +} + +static inline bool is_noncanonical_invlpg_address(u64 la, struct kvm_vcpu *vcpu) +{ + return is_noncanonical_address(la, vcpu, X86EMUL_F_INVLPG); } static inline void vcpu_cache_mmio_info(struct kvm_vcpu *vcpu, diff --git a/arch/x86/kvm/xen.c b/arch/x86/kvm/xen.c index 622fe24da910..1fc2035df404 100644 --- a/arch/x86/kvm/xen.c +++ b/arch/x86/kvm/xen.c @@ -1472,7 +1472,7 @@ static bool kvm_xen_schedop_poll(struct kvm_vcpu *vcpu, bool longmode, if (kvm_read_guest_virt(vcpu, (gva_t)sched_poll.ports, ports, sched_poll.nr_ports * sizeof(*ports), &e)) { *r = -EFAULT; - return true; + goto out; } for (i = 0; i < sched_poll.nr_ports; i++) { @@ -1916,8 +1916,19 @@ int kvm_xen_setup_evtchn(struct kvm *kvm, { struct kvm_vcpu *vcpu; - if (ue->u.xen_evtchn.port >= max_evtchn_port(kvm)) - return -EINVAL; + /* + * Don't check for the port being within range of max_evtchn_port(). + * Userspace can configure what ever targets it likes; events just won't + * be delivered if/while the target is invalid, just like userspace can + * configure MSIs which target non-existent APICs. + * + * This allow on Live Migration and Live Update, the IRQ routing table + * can be restored *independently* of other things like creating vCPUs, + * without imposing an ordering dependency on userspace. In this + * particular case, the problematic ordering would be with setting the + * Xen 'long mode' flag, which changes max_evtchn_port() to allow 4096 + * instead of 1024 event channels. + */ /* We only support 2 level event channels for now */ if (ue->u.xen_evtchn.priority != KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL) |