diff options
Diffstat (limited to 'arch/x86/kvm')
-rw-r--r-- | arch/x86/kvm/cpuid.c | 4 | ||||
-rw-r--r-- | arch/x86/kvm/emulate.c | 10 | ||||
-rw-r--r-- | arch/x86/kvm/mmu.c | 25 | ||||
-rw-r--r-- | arch/x86/kvm/svm.c | 129 | ||||
-rw-r--r-- | arch/x86/kvm/vmx.c | 37 | ||||
-rw-r--r-- | arch/x86/kvm/x86.c | 168 | ||||
-rw-r--r-- | arch/x86/kvm/x86.h | 6 |
7 files changed, 151 insertions, 228 deletions
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 2fbea2544f24..156441bcaac8 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -30,7 +30,7 @@ static u32 xstate_required_size(u64 xstate_bv, bool compacted) int feature_bit = 0; u32 ret = XSAVE_HDR_SIZE + XSAVE_HDR_OFFSET; - xstate_bv &= XSTATE_EXTEND_MASK; + xstate_bv &= XFEATURE_MASK_EXTEND; while (xstate_bv) { if (xstate_bv & 0x1) { u32 eax, ebx, ecx, edx, offset; @@ -51,7 +51,7 @@ u64 kvm_supported_xcr0(void) u64 xcr0 = KVM_SUPPORTED_XCR0 & host_xcr0; if (!kvm_x86_ops->mpx_supported()) - xcr0 &= ~(XSTATE_BNDREGS | XSTATE_BNDCSR); + xcr0 &= ~(XFEATURE_MASK_BNDREGS | XFEATURE_MASK_BNDCSR); return xcr0; } diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index b372a7557c16..9da95b9daf8d 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -2418,7 +2418,7 @@ static int rsm_load_state_64(struct x86_emulate_ctxt *ctxt, u64 smbase) u64 val, cr0, cr4; u32 base3; u16 selector; - int i; + int i, r; for (i = 0; i < 16; i++) *reg_write(ctxt, i) = GET_SMSTATE(u64, smbase, 0x7ff8 - i * 8); @@ -2460,13 +2460,17 @@ static int rsm_load_state_64(struct x86_emulate_ctxt *ctxt, u64 smbase) dt.address = GET_SMSTATE(u64, smbase, 0x7e68); ctxt->ops->set_gdt(ctxt, &dt); + r = rsm_enter_protected_mode(ctxt, cr0, cr4); + if (r != X86EMUL_CONTINUE) + return r; + for (i = 0; i < 6; i++) { - int r = rsm_load_seg_64(ctxt, smbase, i); + r = rsm_load_seg_64(ctxt, smbase, i); if (r != X86EMUL_CONTINUE) return r; } - return rsm_enter_protected_mode(ctxt, cr0, cr4); + return X86EMUL_CONTINUE; } static int em_rsm(struct x86_emulate_ctxt *ctxt) diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 69088a1ba509..ff606f507913 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -3322,7 +3322,7 @@ walk_shadow_page_get_mmio_spte(struct kvm_vcpu *vcpu, u64 addr, u64 *sptep) break; reserved |= is_shadow_zero_bits_set(&vcpu->arch.mmu, spte, - leaf); + iterator.level); } walk_shadow_page_lockless_end(vcpu); @@ -3614,7 +3614,7 @@ static void __reset_rsvds_bits_mask(struct kvm_vcpu *vcpu, struct rsvd_bits_validate *rsvd_check, int maxphyaddr, int level, bool nx, bool gbpages, - bool pse) + bool pse, bool amd) { u64 exb_bit_rsvd = 0; u64 gbpages_bit_rsvd = 0; @@ -3631,7 +3631,7 @@ __reset_rsvds_bits_mask(struct kvm_vcpu *vcpu, * Non-leaf PML4Es and PDPEs reserve bit 8 (which would be the G bit for * leaf entries) on AMD CPUs only. */ - if (guest_cpuid_is_amd(vcpu)) + if (amd) nonleaf_bit8_rsvd = rsvd_bits(8, 8); switch (level) { @@ -3699,7 +3699,7 @@ static void reset_rsvds_bits_mask(struct kvm_vcpu *vcpu, __reset_rsvds_bits_mask(vcpu, &context->guest_rsvd_check, cpuid_maxphyaddr(vcpu), context->root_level, context->nx, guest_cpuid_has_gbpages(vcpu), - is_pse(vcpu)); + is_pse(vcpu), guest_cpuid_is_amd(vcpu)); } static void @@ -3749,13 +3749,24 @@ static void reset_rsvds_bits_mask_ept(struct kvm_vcpu *vcpu, void reset_shadow_zero_bits_mask(struct kvm_vcpu *vcpu, struct kvm_mmu *context) { + /* + * Passing "true" to the last argument is okay; it adds a check + * on bit 8 of the SPTEs which KVM doesn't use anyway. + */ __reset_rsvds_bits_mask(vcpu, &context->shadow_zero_check, boot_cpu_data.x86_phys_bits, context->shadow_root_level, context->nx, - guest_cpuid_has_gbpages(vcpu), is_pse(vcpu)); + guest_cpuid_has_gbpages(vcpu), is_pse(vcpu), + true); } EXPORT_SYMBOL_GPL(reset_shadow_zero_bits_mask); +static inline bool boot_cpu_is_amd(void) +{ + WARN_ON_ONCE(!tdp_enabled); + return shadow_x_mask == 0; +} + /* * the direct page table on host, use as much mmu features as * possible, however, kvm currently does not do execution-protection. @@ -3764,11 +3775,11 @@ static void reset_tdp_shadow_zero_bits_mask(struct kvm_vcpu *vcpu, struct kvm_mmu *context) { - if (guest_cpuid_is_amd(vcpu)) + if (boot_cpu_is_amd()) __reset_rsvds_bits_mask(vcpu, &context->shadow_zero_check, boot_cpu_data.x86_phys_bits, context->shadow_root_level, false, - cpu_has_gbpages, true); + cpu_has_gbpages, true, true); else __reset_rsvds_bits_mask_ept(&context->shadow_zero_check, boot_cpu_data.x86_phys_bits, diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index fdb8cb63a6c0..2f9ed1ff0632 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -202,6 +202,7 @@ module_param(npt, int, S_IRUGO); static int nested = true; module_param(nested, int, S_IRUGO); +static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0); static void svm_flush_tlb(struct kvm_vcpu *vcpu); static void svm_complete_interrupts(struct vcpu_svm *svm); @@ -513,7 +514,7 @@ static void skip_emulated_instruction(struct kvm_vcpu *vcpu) struct vcpu_svm *svm = to_svm(vcpu); if (svm->vmcb->control.next_rip != 0) { - WARN_ON(!static_cpu_has(X86_FEATURE_NRIPS)); + WARN_ON_ONCE(!static_cpu_has(X86_FEATURE_NRIPS)); svm->next_rip = svm->vmcb->control.next_rip; } @@ -865,64 +866,6 @@ static void svm_disable_lbrv(struct vcpu_svm *svm) set_msr_interception(msrpm, MSR_IA32_LASTINTTOIP, 0, 0); } -#define MTRR_TYPE_UC_MINUS 7 -#define MTRR2PROTVAL_INVALID 0xff - -static u8 mtrr2protval[8]; - -static u8 fallback_mtrr_type(int mtrr) -{ - /* - * WT and WP aren't always available in the host PAT. Treat - * them as UC and UC- respectively. Everything else should be - * there. - */ - switch (mtrr) - { - case MTRR_TYPE_WRTHROUGH: - return MTRR_TYPE_UNCACHABLE; - case MTRR_TYPE_WRPROT: - return MTRR_TYPE_UC_MINUS; - default: - BUG(); - } -} - -static void build_mtrr2protval(void) -{ - int i; - u64 pat; - - for (i = 0; i < 8; i++) - mtrr2protval[i] = MTRR2PROTVAL_INVALID; - - /* Ignore the invalid MTRR types. */ - mtrr2protval[2] = 0; - mtrr2protval[3] = 0; - - /* - * Use host PAT value to figure out the mapping from guest MTRR - * values to nested page table PAT/PCD/PWT values. We do not - * want to change the host PAT value every time we enter the - * guest. - */ - rdmsrl(MSR_IA32_CR_PAT, pat); - for (i = 0; i < 8; i++) { - u8 mtrr = pat >> (8 * i); - - if (mtrr2protval[mtrr] == MTRR2PROTVAL_INVALID) - mtrr2protval[mtrr] = __cm_idx2pte(i); - } - - for (i = 0; i < 8; i++) { - if (mtrr2protval[i] == MTRR2PROTVAL_INVALID) { - u8 fallback = fallback_mtrr_type(i); - mtrr2protval[i] = mtrr2protval[fallback]; - BUG_ON(mtrr2protval[i] == MTRR2PROTVAL_INVALID); - } - } -} - static __init int svm_hardware_setup(void) { int cpu; @@ -989,7 +932,6 @@ static __init int svm_hardware_setup(void) } else kvm_disable_tdp(); - build_mtrr2protval(); return 0; err: @@ -1144,43 +1086,6 @@ static u64 svm_compute_tsc_offset(struct kvm_vcpu *vcpu, u64 target_tsc) return target_tsc - tsc; } -static void svm_set_guest_pat(struct vcpu_svm *svm, u64 *g_pat) -{ - struct kvm_vcpu *vcpu = &svm->vcpu; - - /* Unlike Intel, AMD takes the guest's CR0.CD into account. - * - * AMD does not have IPAT. To emulate it for the case of guests - * with no assigned devices, just set everything to WB. If guests - * have assigned devices, however, we cannot force WB for RAM - * pages only, so use the guest PAT directly. - */ - if (!kvm_arch_has_assigned_device(vcpu->kvm)) - *g_pat = 0x0606060606060606; - else - *g_pat = vcpu->arch.pat; -} - -static u64 svm_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio) -{ - u8 mtrr; - - /* - * 1. MMIO: trust guest MTRR, so same as item 3. - * 2. No passthrough: always map as WB, and force guest PAT to WB as well - * 3. Passthrough: can't guarantee the result, try to trust guest. - */ - if (!is_mmio && !kvm_arch_has_assigned_device(vcpu->kvm)) - return 0; - - if (!kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_CD_NW_CLEARED) && - kvm_read_cr0(vcpu) & X86_CR0_CD) - return _PAGE_NOCACHE; - - mtrr = kvm_mtrr_get_guest_memory_type(vcpu, gfn); - return mtrr2protval[mtrr]; -} - static void init_vmcb(struct vcpu_svm *svm, bool init_event) { struct vmcb_control_area *control = &svm->vmcb->control; @@ -1263,7 +1168,8 @@ static void init_vmcb(struct vcpu_svm *svm, bool init_event) * svm_set_cr0() sets PG and WP and clears NW and CD on save->cr0. * It also updates the guest-visible cr0 value. */ - (void)kvm_set_cr0(&svm->vcpu, X86_CR0_NW | X86_CR0_CD | X86_CR0_ET); + svm_set_cr0(&svm->vcpu, X86_CR0_NW | X86_CR0_CD | X86_CR0_ET); + kvm_mmu_reset_context(&svm->vcpu); save->cr4 = X86_CR4_PAE; /* rdx = ?? */ @@ -1276,7 +1182,6 @@ static void init_vmcb(struct vcpu_svm *svm, bool init_event) clr_cr_intercept(svm, INTERCEPT_CR3_READ); clr_cr_intercept(svm, INTERCEPT_CR3_WRITE); save->g_pat = svm->vcpu.arch.pat; - svm_set_guest_pat(svm, &save->g_pat); save->cr3 = 0; save->cr4 = 0; } @@ -1671,10 +1576,13 @@ static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) if (!vcpu->fpu_active) cr0 |= X86_CR0_TS; - - /* These are emulated via page tables. */ - cr0 &= ~(X86_CR0_CD | X86_CR0_NW); - + /* + * re-enable caching here because the QEMU bios + * does not do it - this results in some delay at + * reboot + */ + if (kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_CD_NW_CLEARED)) + cr0 &= ~(X86_CR0_CD | X86_CR0_NW); svm->vmcb->save.cr0 = cr0; mark_dirty(svm->vmcb, VMCB_CR); update_cr0_intercept(svm); @@ -3349,16 +3257,6 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr) case MSR_VM_IGNNE: vcpu_unimpl(vcpu, "unimplemented wrmsr: 0x%x data 0x%llx\n", ecx, data); break; - case MSR_IA32_CR_PAT: - if (npt_enabled) { - if (!kvm_mtrr_valid(vcpu, MSR_IA32_CR_PAT, data)) - return 1; - vcpu->arch.pat = data; - svm_set_guest_pat(svm, &svm->vmcb->save.g_pat); - mark_dirty(svm->vmcb, VMCB_NPT); - break; - } - /* fall through */ default: return kvm_set_msr_common(vcpu, msr); } @@ -4193,6 +4091,11 @@ static bool svm_has_high_real_mode_segbase(void) return true; } +static u64 svm_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio) +{ + return 0; +} + static void svm_cpuid_update(struct kvm_vcpu *vcpu) { } diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 64076740251e..6a8bc64566ab 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -4105,17 +4105,13 @@ static void seg_setup(int seg) static int alloc_apic_access_page(struct kvm *kvm) { struct page *page; - struct kvm_userspace_memory_region kvm_userspace_mem; int r = 0; mutex_lock(&kvm->slots_lock); if (kvm->arch.apic_access_page_done) goto out; - kvm_userspace_mem.slot = APIC_ACCESS_PAGE_PRIVATE_MEMSLOT; - kvm_userspace_mem.flags = 0; - kvm_userspace_mem.guest_phys_addr = APIC_DEFAULT_PHYS_BASE; - kvm_userspace_mem.memory_size = PAGE_SIZE; - r = __x86_set_memory_region(kvm, &kvm_userspace_mem); + r = __x86_set_memory_region(kvm, APIC_ACCESS_PAGE_PRIVATE_MEMSLOT, + APIC_DEFAULT_PHYS_BASE, PAGE_SIZE); if (r) goto out; @@ -4140,17 +4136,12 @@ static int alloc_identity_pagetable(struct kvm *kvm) { /* Called with kvm->slots_lock held. */ - struct kvm_userspace_memory_region kvm_userspace_mem; int r = 0; BUG_ON(kvm->arch.ept_identity_pagetable_done); - kvm_userspace_mem.slot = IDENTITY_PAGETABLE_PRIVATE_MEMSLOT; - kvm_userspace_mem.flags = 0; - kvm_userspace_mem.guest_phys_addr = - kvm->arch.ept_identity_map_addr; - kvm_userspace_mem.memory_size = PAGE_SIZE; - r = __x86_set_memory_region(kvm, &kvm_userspace_mem); + r = __x86_set_memory_region(kvm, IDENTITY_PAGETABLE_PRIVATE_MEMSLOT, + kvm->arch.ept_identity_map_addr, PAGE_SIZE); return r; } @@ -4949,14 +4940,9 @@ static int vmx_interrupt_allowed(struct kvm_vcpu *vcpu) static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr) { int ret; - struct kvm_userspace_memory_region tss_mem = { - .slot = TSS_PRIVATE_MEMSLOT, - .guest_phys_addr = addr, - .memory_size = PAGE_SIZE * 3, - .flags = 0, - }; - ret = x86_set_memory_region(kvm, &tss_mem); + ret = x86_set_memory_region(kvm, TSS_PRIVATE_MEMSLOT, addr, + PAGE_SIZE * 3); if (ret) return ret; kvm->arch.tss_addr = addr; @@ -8617,17 +8603,22 @@ static u64 vmx_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio) u64 ipat = 0; /* For VT-d and EPT combination - * 1. MMIO: guest may want to apply WC, trust it. + * 1. MMIO: always map as UC * 2. EPT with VT-d: * a. VT-d without snooping control feature: can't guarantee the - * result, try to trust guest. So the same as item 1. + * result, try to trust guest. * b. VT-d with snooping control feature: snooping control feature of * VT-d engine can guarantee the cache correctness. Just set it * to WB to keep consistent with host. So the same as item 3. * 3. EPT without VT-d: always map as WB and set IPAT=1 to keep * consistent with host MTRR */ - if (!is_mmio && !kvm_arch_has_noncoherent_dma(vcpu->kvm)) { + if (is_mmio) { + cache = MTRR_TYPE_UNCACHABLE; + goto exit; + } + + if (!kvm_arch_has_noncoherent_dma(vcpu->kvm)) { ipat = VMX_EPT_IPAT_BIT; cache = MTRR_TYPE_WRBACK; goto exit; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 6bbb0dfb99d0..bda65690788e 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -663,9 +663,9 @@ static int __kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr) /* Only support XCR_XFEATURE_ENABLED_MASK(xcr0) now */ if (index != XCR_XFEATURE_ENABLED_MASK) return 1; - if (!(xcr0 & XSTATE_FP)) + if (!(xcr0 & XFEATURE_MASK_FP)) return 1; - if ((xcr0 & XSTATE_YMM) && !(xcr0 & XSTATE_SSE)) + if ((xcr0 & XFEATURE_MASK_YMM) && !(xcr0 & XFEATURE_MASK_SSE)) return 1; /* @@ -673,23 +673,24 @@ static int __kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr) * saving. However, xcr0 bit 0 is always set, even if the * emulated CPU does not support XSAVE (see fx_init). */ - valid_bits = vcpu->arch.guest_supported_xcr0 | XSTATE_FP; + valid_bits = vcpu->arch.guest_supported_xcr0 | XFEATURE_MASK_FP; if (xcr0 & ~valid_bits) return 1; - if ((!(xcr0 & XSTATE_BNDREGS)) != (!(xcr0 & XSTATE_BNDCSR))) + if ((!(xcr0 & XFEATURE_MASK_BNDREGS)) != + (!(xcr0 & XFEATURE_MASK_BNDCSR))) return 1; - if (xcr0 & XSTATE_AVX512) { - if (!(xcr0 & XSTATE_YMM)) + if (xcr0 & XFEATURE_MASK_AVX512) { + if (!(xcr0 & XFEATURE_MASK_YMM)) return 1; - if ((xcr0 & XSTATE_AVX512) != XSTATE_AVX512) + if ((xcr0 & XFEATURE_MASK_AVX512) != XFEATURE_MASK_AVX512) return 1; } kvm_put_guest_xcr0(vcpu); vcpu->arch.xcr0 = xcr0; - if ((xcr0 ^ old_xcr0) & XSTATE_EXTEND_MASK) + if ((xcr0 ^ old_xcr0) & XFEATURE_MASK_EXTEND) kvm_update_cpuid(vcpu); return 0; } @@ -1708,8 +1709,6 @@ static int kvm_guest_time_update(struct kvm_vcpu *v) vcpu->pvclock_set_guest_stopped_request = false; } - pvclock_flags |= PVCLOCK_COUNTS_FROM_ZERO; - /* If the host uses TSC clocksource, then it is stable */ if (use_master_clock) pvclock_flags |= PVCLOCK_TSC_STABLE_BIT; @@ -2007,8 +2006,6 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) &vcpu->requests); ka->boot_vcpu_runs_old_kvmclock = tmp; - - ka->kvmclock_offset = -get_kernel_ns(); } vcpu->arch.time = data; @@ -2190,6 +2187,8 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) case MSR_IA32_LASTINTFROMIP: case MSR_IA32_LASTINTTOIP: case MSR_K8_SYSCFG: + case MSR_K8_TSEG_ADDR: + case MSR_K8_TSEG_MASK: case MSR_K7_HWCR: case MSR_VM_HSAVE_PA: case MSR_K8_INT_PENDING_MSG: @@ -2907,7 +2906,7 @@ static void fill_xsave(u8 *dest, struct kvm_vcpu *vcpu) * Copy each region from the possibly compacted offset to the * non-compacted offset. */ - valid = xstate_bv & ~XSTATE_FPSSE; + valid = xstate_bv & ~XFEATURE_MASK_FPSSE; while (valid) { u64 feature = valid & -valid; int index = fls64(feature) - 1; @@ -2945,7 +2944,7 @@ static void load_xsave(struct kvm_vcpu *vcpu, u8 *src) * Copy each region from the non-compacted offset to the * possibly compacted offset. */ - valid = xstate_bv & ~XSTATE_FPSSE; + valid = xstate_bv & ~XFEATURE_MASK_FPSSE; while (valid) { u64 feature = valid & -valid; int index = fls64(feature) - 1; @@ -2973,7 +2972,7 @@ static void kvm_vcpu_ioctl_x86_get_xsave(struct kvm_vcpu *vcpu, &vcpu->arch.guest_fpu.state.fxsave, sizeof(struct fxregs_state)); *(u64 *)&guest_xsave->region[XSAVE_HDR_OFFSET / sizeof(u32)] = - XSTATE_FPSSE; + XFEATURE_MASK_FPSSE; } } @@ -2993,7 +2992,7 @@ static int kvm_vcpu_ioctl_x86_set_xsave(struct kvm_vcpu *vcpu, return -EINVAL; load_xsave(vcpu, (u8 *)guest_xsave->region); } else { - if (xstate_bv & ~XSTATE_FPSSE) + if (xstate_bv & ~XFEATURE_MASK_FPSSE) return -EINVAL; memcpy(&vcpu->arch.guest_fpu.state.fxsave, guest_xsave->region, sizeof(struct fxregs_state)); @@ -6455,6 +6454,12 @@ static inline int vcpu_block(struct kvm *kvm, struct kvm_vcpu *vcpu) return 1; } +static inline bool kvm_vcpu_running(struct kvm_vcpu *vcpu) +{ + return (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE && + !vcpu->arch.apf.halted); +} + static int vcpu_run(struct kvm_vcpu *vcpu) { int r; @@ -6463,8 +6468,7 @@ static int vcpu_run(struct kvm_vcpu *vcpu) vcpu->srcu_idx = srcu_read_lock(&kvm->srcu); for (;;) { - if (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE && - !vcpu->arch.apf.halted) + if (kvm_vcpu_running(vcpu)) r = vcpu_enter_guest(vcpu); else r = vcpu_block(kvm, vcpu); @@ -7002,7 +7006,7 @@ static void fx_init(struct kvm_vcpu *vcpu) /* * Ensure guest xcr0 is valid for loading */ - vcpu->arch.xcr0 = XSTATE_FP; + vcpu->arch.xcr0 = XFEATURE_MASK_FP; vcpu->arch.cr0 |= X86_CR0_ET; } @@ -7476,34 +7480,66 @@ void kvm_arch_sync_events(struct kvm *kvm) kvm_free_pit(kvm); } -int __x86_set_memory_region(struct kvm *kvm, - const struct kvm_userspace_memory_region *mem) +int __x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa, u32 size) { int i, r; + unsigned long hva; + struct kvm_memslots *slots = kvm_memslots(kvm); + struct kvm_memory_slot *slot, old; /* Called with kvm->slots_lock held. */ - BUG_ON(mem->slot >= KVM_MEM_SLOTS_NUM); + if (WARN_ON(id >= KVM_MEM_SLOTS_NUM)) + return -EINVAL; + slot = id_to_memslot(slots, id); + if (size) { + if (WARN_ON(slot->npages)) + return -EEXIST; + + /* + * MAP_SHARED to prevent internal slot pages from being moved + * by fork()/COW. + */ + hva = vm_mmap(NULL, 0, size, PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_ANONYMOUS, 0); + if (IS_ERR((void *)hva)) + return PTR_ERR((void *)hva); + } else { + if (!slot->npages) + return 0; + + hva = 0; + } + + old = *slot; for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) { - struct kvm_userspace_memory_region m = *mem; + struct kvm_userspace_memory_region m; - m.slot |= i << 16; + m.slot = id | (i << 16); + m.flags = 0; + m.guest_phys_addr = gpa; + m.userspace_addr = hva; + m.memory_size = size; r = __kvm_set_memory_region(kvm, &m); if (r < 0) return r; } + if (!size) { + r = vm_munmap(old.userspace_addr, old.npages * PAGE_SIZE); + WARN_ON(r < 0); + } + return 0; } EXPORT_SYMBOL_GPL(__x86_set_memory_region); -int x86_set_memory_region(struct kvm *kvm, - const struct kvm_userspace_memory_region *mem) +int x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa, u32 size) { int r; mutex_lock(&kvm->slots_lock); - r = __x86_set_memory_region(kvm, mem); + r = __x86_set_memory_region(kvm, id, gpa, size); mutex_unlock(&kvm->slots_lock); return r; @@ -7518,16 +7554,9 @@ void kvm_arch_destroy_vm(struct kvm *kvm) * unless the the memory map has changed due to process exit * or fd copying. */ - struct kvm_userspace_memory_region mem; - memset(&mem, 0, sizeof(mem)); - mem.slot = APIC_ACCESS_PAGE_PRIVATE_MEMSLOT; - x86_set_memory_region(kvm, &mem); - - mem.slot = IDENTITY_PAGETABLE_PRIVATE_MEMSLOT; - x86_set_memory_region(kvm, &mem); - - mem.slot = TSS_PRIVATE_MEMSLOT; - x86_set_memory_region(kvm, &mem); + x86_set_memory_region(kvm, APIC_ACCESS_PAGE_PRIVATE_MEMSLOT, 0, 0); + x86_set_memory_region(kvm, IDENTITY_PAGETABLE_PRIVATE_MEMSLOT, 0, 0); + x86_set_memory_region(kvm, TSS_PRIVATE_MEMSLOT, 0, 0); } kvm_iommu_unmap_guest(kvm); kfree(kvm->arch.vpic); @@ -7630,27 +7659,6 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, const struct kvm_userspace_memory_region *mem, enum kvm_mr_change change) { - /* - * Only private memory slots need to be mapped here since - * KVM_SET_MEMORY_REGION ioctl is no longer supported. - */ - if ((memslot->id >= KVM_USER_MEM_SLOTS) && (change == KVM_MR_CREATE)) { - unsigned long userspace_addr; - - /* - * MAP_SHARED to prevent internal slot pages from being moved - * by fork()/COW. - */ - userspace_addr = vm_mmap(NULL, 0, memslot->npages * PAGE_SIZE, - PROT_READ | PROT_WRITE, - MAP_SHARED | MAP_ANONYMOUS, 0); - - if (IS_ERR((void *)userspace_addr)) - return PTR_ERR((void *)userspace_addr); - - memslot->userspace_addr = userspace_addr; - } - return 0; } @@ -7712,17 +7720,6 @@ void kvm_arch_commit_memory_region(struct kvm *kvm, { int nr_mmu_pages = 0; - if (change == KVM_MR_DELETE && old->id >= KVM_USER_MEM_SLOTS) { - int ret; - - ret = vm_munmap(old->userspace_addr, - old->npages * PAGE_SIZE); - if (ret < 0) - printk(KERN_WARNING - "kvm_vm_ioctl_set_memory_region: " - "failed to munmap memory\n"); - } - if (!kvm->arch.n_requested_mmu_pages) nr_mmu_pages = kvm_mmu_calculate_mmu_pages(kvm); @@ -7771,19 +7768,36 @@ void kvm_arch_flush_shadow_memslot(struct kvm *kvm, kvm_mmu_invalidate_zap_all_pages(kvm); } +static inline bool kvm_vcpu_has_events(struct kvm_vcpu *vcpu) +{ + if (!list_empty_careful(&vcpu->async_pf.done)) + return true; + + if (kvm_apic_has_events(vcpu)) + return true; + + if (vcpu->arch.pv.pv_unhalted) + return true; + + if (atomic_read(&vcpu->arch.nmi_queued)) + return true; + + if (test_bit(KVM_REQ_SMI, &vcpu->requests)) + return true; + + if (kvm_arch_interrupt_allowed(vcpu) && + kvm_cpu_has_interrupt(vcpu)) + return true; + + return false; +} + int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) { if (is_guest_mode(vcpu) && kvm_x86_ops->check_nested_events) kvm_x86_ops->check_nested_events(vcpu, false); - return (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE && - !vcpu->arch.apf.halted) - || !list_empty_careful(&vcpu->async_pf.done) - || kvm_apic_has_events(vcpu) - || vcpu->arch.pv.pv_unhalted - || atomic_read(&vcpu->arch.nmi_queued) || - (kvm_arch_interrupt_allowed(vcpu) && - kvm_cpu_has_interrupt(vcpu)); + return kvm_vcpu_running(vcpu) || kvm_vcpu_has_events(vcpu); } int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu) diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h index 2f822cd886c2..f2afa5fe48a6 100644 --- a/arch/x86/kvm/x86.h +++ b/arch/x86/kvm/x86.h @@ -180,9 +180,9 @@ int kvm_mtrr_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata); bool kvm_mtrr_check_gfn_range_consistency(struct kvm_vcpu *vcpu, gfn_t gfn, int page_num); -#define KVM_SUPPORTED_XCR0 (XSTATE_FP | XSTATE_SSE | XSTATE_YMM \ - | XSTATE_BNDREGS | XSTATE_BNDCSR \ - | XSTATE_AVX512) +#define KVM_SUPPORTED_XCR0 (XFEATURE_MASK_FP | XFEATURE_MASK_SSE \ + | XFEATURE_MASK_YMM | XFEATURE_MASK_BNDREGS \ + | XFEATURE_MASK_BNDCSR | XFEATURE_MASK_AVX512) extern u64 host_xcr0; extern u64 kvm_supported_xcr0(void); |