diff options
Diffstat (limited to 'arch/x86/kvm/svm.c')
-rw-r--r-- | arch/x86/kvm/svm.c | 183 |
1 files changed, 100 insertions, 83 deletions
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 1107626938cc..2c1cfe68a9af 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -280,9 +280,9 @@ module_param(avic, int, S_IRUGO); static int vls = true; module_param(vls, int, 0444); -/* AVIC VM ID bit masks and lock */ -static DECLARE_BITMAP(avic_vm_id_bitmap, AVIC_VM_ID_NR); -static DEFINE_SPINLOCK(avic_vm_id_lock); +/* enable/disable Virtual GIF */ +static int vgif = true; +module_param(vgif, int, 0444); static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0); static void svm_flush_tlb(struct kvm_vcpu *vcpu); @@ -479,19 +479,33 @@ static inline void clr_intercept(struct vcpu_svm *svm, int bit) recalc_intercepts(svm); } +static inline bool vgif_enabled(struct vcpu_svm *svm) +{ + return !!(svm->vmcb->control.int_ctl & V_GIF_ENABLE_MASK); +} + static inline void enable_gif(struct vcpu_svm *svm) { - svm->vcpu.arch.hflags |= HF_GIF_MASK; + if (vgif_enabled(svm)) + svm->vmcb->control.int_ctl |= V_GIF_MASK; + else + svm->vcpu.arch.hflags |= HF_GIF_MASK; } static inline void disable_gif(struct vcpu_svm *svm) { - svm->vcpu.arch.hflags &= ~HF_GIF_MASK; + if (vgif_enabled(svm)) + svm->vmcb->control.int_ctl &= ~V_GIF_MASK; + else + svm->vcpu.arch.hflags &= ~HF_GIF_MASK; } static inline bool gif_set(struct vcpu_svm *svm) { - return !!(svm->vcpu.arch.hflags & HF_GIF_MASK); + if (vgif_enabled(svm)) + return !!(svm->vmcb->control.int_ctl & V_GIF_MASK); + else + return !!(svm->vcpu.arch.hflags & HF_GIF_MASK); } static unsigned long iopm_base; @@ -567,10 +581,10 @@ static inline void invlpga(unsigned long addr, u32 asid) asm volatile (__ex(SVM_INVLPGA) : : "a"(addr), "c"(asid)); } -static int get_npt_level(void) +static int get_npt_level(struct kvm_vcpu *vcpu) { #ifdef CONFIG_X86_64 - return PT64_ROOT_LEVEL; + return PT64_ROOT_4LEVEL; #else return PT32E_ROOT_LEVEL; #endif @@ -641,7 +655,7 @@ static void svm_queue_exception(struct kvm_vcpu *vcpu) struct vcpu_svm *svm = to_svm(vcpu); unsigned nr = vcpu->arch.exception.nr; bool has_error_code = vcpu->arch.exception.has_error_code; - bool reinject = vcpu->arch.exception.reinject; + bool reinject = vcpu->arch.exception.injected; u32 error_code = vcpu->arch.exception.error_code; /* @@ -973,6 +987,7 @@ static void svm_disable_lbrv(struct vcpu_svm *svm) static void disable_nmi_singlestep(struct vcpu_svm *svm) { svm->nmi_singlestep = false; + if (!(svm->vcpu.guest_debug & KVM_GUESTDBG_SINGLESTEP)) { /* Clear our flags if they were not set by the guest */ if (!(svm->nmi_singlestep_guest_rflags & X86_EFLAGS_TF)) @@ -989,6 +1004,8 @@ static void disable_nmi_singlestep(struct vcpu_svm *svm) */ #define SVM_VM_DATA_HASH_BITS 8 static DEFINE_HASHTABLE(svm_vm_data_hash, SVM_VM_DATA_HASH_BITS); +static u32 next_vm_id = 0; +static bool next_vm_id_wrapped = 0; static DEFINE_SPINLOCK(svm_vm_data_hash_lock); /* Note: @@ -1100,7 +1117,7 @@ static __init int svm_hardware_setup(void) if (vls) { if (!npt_enabled || - !boot_cpu_has(X86_FEATURE_VIRTUAL_VMLOAD_VMSAVE) || + !boot_cpu_has(X86_FEATURE_V_VMSAVE_VMLOAD) || !IS_ENABLED(CONFIG_X86_64)) { vls = false; } else { @@ -1108,6 +1125,13 @@ static __init int svm_hardware_setup(void) } } + if (vgif) { + if (!boot_cpu_has(X86_FEATURE_VGIF)) + vgif = false; + else + pr_info("Virtual GIF supported\n"); + } + return 0; err: @@ -1167,9 +1191,9 @@ static void avic_init_vmcb(struct vcpu_svm *svm) { struct vmcb *vmcb = svm->vmcb; struct kvm_arch *vm_data = &svm->vcpu.kvm->arch; - phys_addr_t bpa = page_to_phys(svm->avic_backing_page); - phys_addr_t lpa = page_to_phys(vm_data->avic_logical_id_table_page); - phys_addr_t ppa = page_to_phys(vm_data->avic_physical_id_table_page); + phys_addr_t bpa = __sme_set(page_to_phys(svm->avic_backing_page)); + phys_addr_t lpa = __sme_set(page_to_phys(vm_data->avic_logical_id_table_page)); + phys_addr_t ppa = __sme_set(page_to_phys(vm_data->avic_physical_id_table_page)); vmcb->control.avic_backing_page = bpa & AVIC_HPA_MASK; vmcb->control.avic_logical_id = lpa & AVIC_HPA_MASK; @@ -1232,8 +1256,8 @@ static void init_vmcb(struct vcpu_svm *svm) set_intercept(svm, INTERCEPT_MWAIT); } - control->iopm_base_pa = iopm_base; - control->msrpm_base_pa = __pa(svm->msrpm); + control->iopm_base_pa = __sme_set(iopm_base); + control->msrpm_base_pa = __sme_set(__pa(svm->msrpm)); control->int_ctl = V_INTR_MASKING_MASK; init_seg(&save->es); @@ -1305,6 +1329,12 @@ static void init_vmcb(struct vcpu_svm *svm) svm->vmcb->control.virt_ext |= VIRTUAL_VMLOAD_VMSAVE_ENABLE_MASK; } + if (vgif) { + clr_intercept(svm, INTERCEPT_STGI); + clr_intercept(svm, INTERCEPT_CLGI); + svm->vmcb->control.int_ctl |= V_GIF_ENABLE_MASK; + } + mark_all_dirty(svm->vmcb); enable_gif(svm); @@ -1377,9 +1407,9 @@ static int avic_init_backing_page(struct kvm_vcpu *vcpu) return -EINVAL; new_entry = READ_ONCE(*entry); - new_entry = (page_to_phys(svm->avic_backing_page) & - AVIC_PHYSICAL_ID_ENTRY_BACKING_PAGE_MASK) | - AVIC_PHYSICAL_ID_ENTRY_VALID_MASK; + new_entry = __sme_set((page_to_phys(svm->avic_backing_page) & + AVIC_PHYSICAL_ID_ENTRY_BACKING_PAGE_MASK) | + AVIC_PHYSICAL_ID_ENTRY_VALID_MASK); WRITE_ONCE(*entry, new_entry); svm->avic_physical_id_cache = entry; @@ -1387,34 +1417,6 @@ static int avic_init_backing_page(struct kvm_vcpu *vcpu) return 0; } -static inline int avic_get_next_vm_id(void) -{ - int id; - - spin_lock(&avic_vm_id_lock); - - /* AVIC VM ID is one-based. */ - id = find_next_zero_bit(avic_vm_id_bitmap, AVIC_VM_ID_NR, 1); - if (id <= AVIC_VM_ID_MASK) - __set_bit(id, avic_vm_id_bitmap); - else - id = -EAGAIN; - - spin_unlock(&avic_vm_id_lock); - return id; -} - -static inline int avic_free_vm_id(int id) -{ - if (id <= 0 || id > AVIC_VM_ID_MASK) - return -EINVAL; - - spin_lock(&avic_vm_id_lock); - __clear_bit(id, avic_vm_id_bitmap); - spin_unlock(&avic_vm_id_lock); - return 0; -} - static void avic_vm_destroy(struct kvm *kvm) { unsigned long flags; @@ -1423,8 +1425,6 @@ static void avic_vm_destroy(struct kvm *kvm) if (!avic) return; - avic_free_vm_id(vm_data->avic_vm_id); - if (vm_data->avic_logical_id_table_page) __free_page(vm_data->avic_logical_id_table_page); if (vm_data->avic_physical_id_table_page) @@ -1438,19 +1438,16 @@ static void avic_vm_destroy(struct kvm *kvm) static int avic_vm_init(struct kvm *kvm) { unsigned long flags; - int vm_id, err = -ENOMEM; + int err = -ENOMEM; struct kvm_arch *vm_data = &kvm->arch; struct page *p_page; struct page *l_page; + struct kvm_arch *ka; + u32 vm_id; if (!avic) return 0; - vm_id = avic_get_next_vm_id(); - if (vm_id < 0) - return vm_id; - vm_data->avic_vm_id = (u32)vm_id; - /* Allocating physical APIC ID table (4KB) */ p_page = alloc_page(GFP_KERNEL); if (!p_page) @@ -1468,6 +1465,22 @@ static int avic_vm_init(struct kvm *kvm) clear_page(page_address(l_page)); spin_lock_irqsave(&svm_vm_data_hash_lock, flags); + again: + vm_id = next_vm_id = (next_vm_id + 1) & AVIC_VM_ID_MASK; + if (vm_id == 0) { /* id is 1-based, zero is not okay */ + next_vm_id_wrapped = 1; + goto again; + } + /* Is it still in use? Only possible if wrapped at least once */ + if (next_vm_id_wrapped) { + hash_for_each_possible(svm_vm_data_hash, ka, hnode, vm_id) { + struct kvm *k2 = container_of(ka, struct kvm, arch); + struct kvm_arch *vd2 = &k2->arch; + if (vd2->avic_vm_id == vm_id) + goto again; + } + } + vm_data->avic_vm_id = vm_id; hash_add(svm_vm_data_hash, &vm_data->hnode, vm_data->avic_vm_id); spin_unlock_irqrestore(&svm_vm_data_hash_lock, flags); @@ -1580,7 +1593,7 @@ static void svm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) } init_vmcb(svm); - kvm_cpuid(vcpu, &eax, &dummy, &dummy, &dummy); + kvm_cpuid(vcpu, &eax, &dummy, &dummy, &dummy, true); kvm_register_write(vcpu, VCPU_REGS_RDX, eax); if (kvm_vcpu_apicv_active(vcpu) && !init_event) @@ -1647,7 +1660,7 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id) svm->vmcb = page_address(page); clear_page(svm->vmcb); - svm->vmcb_pa = page_to_pfn(page) << PAGE_SHIFT; + svm->vmcb_pa = __sme_set(page_to_pfn(page) << PAGE_SHIFT); svm->asid_generation = 0; init_vmcb(svm); @@ -1675,7 +1688,7 @@ static void svm_free_vcpu(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); - __free_page(pfn_to_page(svm->vmcb_pa >> PAGE_SHIFT)); + __free_page(pfn_to_page(__sme_clr(svm->vmcb_pa) >> PAGE_SHIFT)); __free_pages(virt_to_page(svm->msrpm), MSRPM_ALLOC_ORDER); __free_page(virt_to_page(svm->nested.hsave)); __free_pages(virt_to_page(svm->nested.msrpm), MSRPM_ALLOC_ORDER); @@ -1777,11 +1790,6 @@ static void svm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags) to_svm(vcpu)->vmcb->save.rflags = rflags; } -static u32 svm_get_pkru(struct kvm_vcpu *vcpu) -{ - return 0; -} - static void svm_cache_reg(struct kvm_vcpu *vcpu, enum kvm_reg reg) { switch (reg) { @@ -2335,7 +2343,7 @@ static u64 nested_svm_get_tdp_pdptr(struct kvm_vcpu *vcpu, int index) u64 pdpte; int ret; - ret = kvm_vcpu_read_guest_page(vcpu, gpa_to_gfn(cr3), &pdpte, + ret = kvm_vcpu_read_guest_page(vcpu, gpa_to_gfn(__sme_clr(cr3)), &pdpte, offset_in_page(cr3) + index * 8, 8); if (ret) return 0; @@ -2347,7 +2355,7 @@ static void nested_svm_set_tdp_cr3(struct kvm_vcpu *vcpu, { struct vcpu_svm *svm = to_svm(vcpu); - svm->vmcb->control.nested_cr3 = root; + svm->vmcb->control.nested_cr3 = __sme_set(root); mark_dirty(svm->vmcb, VMCB_NPT); svm_flush_tlb(vcpu); } @@ -2389,7 +2397,7 @@ static void nested_svm_init_mmu_context(struct kvm_vcpu *vcpu) vcpu->arch.mmu.get_cr3 = nested_svm_get_tdp_cr3; vcpu->arch.mmu.get_pdptr = nested_svm_get_tdp_pdptr; vcpu->arch.mmu.inject_page_fault = nested_svm_inject_npf_exit; - vcpu->arch.mmu.shadow_root_level = get_npt_level(); + vcpu->arch.mmu.shadow_root_level = get_npt_level(vcpu); reset_shadow_zero_bits_mask(vcpu, &vcpu->arch.mmu); vcpu->arch.walk_mmu = &vcpu->arch.nested_mmu; } @@ -2878,7 +2886,7 @@ static bool nested_svm_vmrun_msrpm(struct vcpu_svm *svm) svm->nested.msrpm[p] = svm->msrpm[p] | value; } - svm->vmcb->control.msrpm_base_pa = __pa(svm->nested.msrpm); + svm->vmcb->control.msrpm_base_pa = __sme_set(__pa(svm->nested.msrpm)); return true; } @@ -3152,6 +3160,13 @@ static int stgi_interception(struct vcpu_svm *svm) if (nested_svm_check_permissions(svm)) return 1; + /* + * If VGIF is enabled, the STGI intercept is only added to + * detect the opening of the NMI window; remove it now. + */ + if (vgif_enabled(svm)) + clr_intercept(svm, INTERCEPT_STGI); + svm->next_rip = kvm_rip_read(&svm->vcpu) + 3; ret = kvm_skip_emulated_instruction(&svm->vcpu); kvm_make_request(KVM_REQ_EVENT, &svm->vcpu); @@ -3749,7 +3764,10 @@ static int interrupt_window_interception(struct vcpu_svm *svm) static int pause_interception(struct vcpu_svm *svm) { - kvm_vcpu_on_spin(&(svm->vcpu)); + struct kvm_vcpu *vcpu = &svm->vcpu; + bool in_kernel = (svm_get_cpl(vcpu) == 0); + + kvm_vcpu_on_spin(vcpu, in_kernel); return 1; } @@ -4233,8 +4251,6 @@ static int handle_exit(struct kvm_vcpu *vcpu) trace_kvm_exit(exit_code, vcpu, KVM_ISA_SVM); - vcpu->arch.gpa_available = (exit_code == SVM_EXIT_NPF); - if (!is_cr_intercept(svm, INTERCEPT_CR0_WRITE)) vcpu->arch.cr0 = svm->vmcb->save.cr0; if (npt_enabled) @@ -4511,7 +4527,7 @@ get_pi_vcpu_info(struct kvm *kvm, struct kvm_kernel_irq_routing_entry *e, pr_debug("SVM: %s: use GA mode for irq %u\n", __func__, irq.vector); *svm = to_svm(vcpu); - vcpu_info->pi_desc_addr = page_to_phys((*svm)->avic_backing_page); + vcpu_info->pi_desc_addr = __sme_set(page_to_phys((*svm)->avic_backing_page)); vcpu_info->vector = irq.vector; return 0; @@ -4562,7 +4578,8 @@ static int svm_update_pi_irte(struct kvm *kvm, unsigned int host_irq, struct amd_iommu_pi_data pi; /* Try to enable guest_mode in IRTE */ - pi.base = page_to_phys(svm->avic_backing_page) & AVIC_HPA_MASK; + pi.base = __sme_set(page_to_phys(svm->avic_backing_page) & + AVIC_HPA_MASK); pi.ga_tag = AVIC_GATAG(kvm->arch.avic_vm_id, svm->vcpu.vcpu_id); pi.is_guest_mode = true; @@ -4686,9 +4703,11 @@ static void enable_irq_window(struct kvm_vcpu *vcpu) * In case GIF=0 we can't rely on the CPU to tell us when GIF becomes * 1, because that's a separate STGI/VMRUN intercept. The next time we * get that intercept, this function will be called again though and - * we'll get the vintr intercept. + * we'll get the vintr intercept. However, if the vGIF feature is + * enabled, the STGI interception will not occur. Enable the irq + * window under the assumption that the hardware will set the GIF. */ - if (gif_set(svm) && nested_svm_intr(svm)) { + if ((vgif_enabled(svm) || gif_set(svm)) && nested_svm_intr(svm)) { svm_set_vintr(svm); svm_inject_irq(svm, 0x0); } @@ -4702,8 +4721,11 @@ static void enable_nmi_window(struct kvm_vcpu *vcpu) == HF_NMI_MASK) return; /* IRET will cause a vm exit */ - if ((svm->vcpu.arch.hflags & HF_GIF_MASK) == 0) + if (!gif_set(svm)) { + if (vgif_enabled(svm)) + set_intercept(svm, INTERCEPT_STGI); return; /* STGI will cause a vm exit */ + } if (svm->nested.exit_required) return; /* we're not going to run the guest yet */ @@ -5011,7 +5033,7 @@ static void svm_set_cr3(struct kvm_vcpu *vcpu, unsigned long root) { struct vcpu_svm *svm = to_svm(vcpu); - svm->vmcb->save.cr3 = root; + svm->vmcb->save.cr3 = __sme_set(root); mark_dirty(svm->vmcb, VMCB_CR); svm_flush_tlb(vcpu); } @@ -5020,7 +5042,7 @@ static void set_tdp_cr3(struct kvm_vcpu *vcpu, unsigned long root) { struct vcpu_svm *svm = to_svm(vcpu); - svm->vmcb->control.nested_cr3 = root; + svm->vmcb->control.nested_cr3 = __sme_set(root); mark_dirty(svm->vmcb, VMCB_NPT); /* Also sync guest cr3 here in case we live migrate */ @@ -5075,17 +5097,14 @@ static u64 svm_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio) static void svm_cpuid_update(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); - struct kvm_cpuid_entry2 *entry; /* Update nrips enabled cache */ - svm->nrips_enabled = !!guest_cpuid_has_nrips(&svm->vcpu); + svm->nrips_enabled = !!guest_cpuid_has(&svm->vcpu, X86_FEATURE_NRIPS); if (!kvm_vcpu_apicv_active(vcpu)) return; - entry = kvm_find_cpuid_entry(vcpu, 1, 0); - if (entry) - entry->ecx &= ~bit(X86_FEATURE_X2APIC); + guest_cpuid_clear(vcpu, X86_FEATURE_X2APIC); } static void svm_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry) @@ -5413,8 +5432,6 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = { .get_rflags = svm_get_rflags, .set_rflags = svm_set_rflags, - .get_pkru = svm_get_pkru, - .tlb_flush = svm_flush_tlb, .run = svm_vcpu_run, |