diff options
Diffstat (limited to 'arch/x86/kvm/x86.c')
-rw-r--r-- | arch/x86/kvm/x86.c | 513 |
1 files changed, 396 insertions, 117 deletions
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index e545a8a613b1..3f7c1fc7a3ce 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -197,7 +197,8 @@ EXPORT_SYMBOL_GPL(host_efer); bool __read_mostly allow_smaller_maxphyaddr = 0; EXPORT_SYMBOL_GPL(allow_smaller_maxphyaddr); -static u64 __read_mostly host_xss; +u64 __read_mostly host_xss; +EXPORT_SYMBOL_GPL(host_xss); u64 __read_mostly supported_xss; EXPORT_SYMBOL_GPL(supported_xss); @@ -804,11 +805,29 @@ bool pdptrs_changed(struct kvm_vcpu *vcpu) } EXPORT_SYMBOL_GPL(pdptrs_changed); +void kvm_post_set_cr0(struct kvm_vcpu *vcpu, unsigned long old_cr0, unsigned long cr0) +{ + unsigned long update_bits = X86_CR0_PG | X86_CR0_WP; + + if ((cr0 ^ old_cr0) & X86_CR0_PG) { + kvm_clear_async_pf_completion_queue(vcpu); + kvm_async_pf_hash_reset(vcpu); + } + + if ((cr0 ^ old_cr0) & update_bits) + kvm_mmu_reset_context(vcpu); + + if (((cr0 ^ old_cr0) & X86_CR0_CD) && + kvm_arch_has_noncoherent_dma(vcpu->kvm) && + !kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_CD_NW_CLEARED)) + kvm_zap_gfn_range(vcpu->kvm, 0, ~0ULL); +} +EXPORT_SYMBOL_GPL(kvm_post_set_cr0); + int kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) { unsigned long old_cr0 = kvm_read_cr0(vcpu); unsigned long pdptr_bits = X86_CR0_CD | X86_CR0_NW | X86_CR0_PG; - unsigned long update_bits = X86_CR0_PG | X86_CR0_WP; cr0 |= X86_CR0_ET; @@ -847,18 +866,7 @@ int kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) kvm_x86_ops.set_cr0(vcpu, cr0); - if ((cr0 ^ old_cr0) & X86_CR0_PG) { - kvm_clear_async_pf_completion_queue(vcpu); - kvm_async_pf_hash_reset(vcpu); - } - - if ((cr0 ^ old_cr0) & update_bits) - kvm_mmu_reset_context(vcpu); - - if (((cr0 ^ old_cr0) & X86_CR0_CD) && - kvm_arch_has_noncoherent_dma(vcpu->kvm) && - !kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_CD_NW_CLEARED)) - kvm_zap_gfn_range(vcpu->kvm, 0, ~0ULL); + kvm_post_set_cr0(vcpu, old_cr0, cr0); return 0; } @@ -872,6 +880,9 @@ EXPORT_SYMBOL_GPL(kvm_lmsw); void kvm_load_guest_xsave_state(struct kvm_vcpu *vcpu) { + if (vcpu->arch.guest_state_protected) + return; + if (kvm_read_cr4_bits(vcpu, X86_CR4_OSXSAVE)) { if (vcpu->arch.xcr0 != host_xcr0) @@ -892,6 +903,9 @@ EXPORT_SYMBOL_GPL(kvm_load_guest_xsave_state); void kvm_load_host_xsave_state(struct kvm_vcpu *vcpu) { + if (vcpu->arch.guest_state_protected) + return; + if (static_cpu_has(X86_FEATURE_PKU) && (kvm_read_cr4_bits(vcpu, X86_CR4_PKE) || (vcpu->arch.xcr0 & XFEATURE_MASK_PKRU))) { @@ -964,26 +978,36 @@ int kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr) } EXPORT_SYMBOL_GPL(kvm_set_xcr); -int kvm_valid_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) +bool kvm_is_valid_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) { if (cr4 & cr4_reserved_bits) - return -EINVAL; + return false; if (cr4 & vcpu->arch.cr4_guest_rsvd_bits) - return -EINVAL; + return false; - return 0; + return kvm_x86_ops.is_valid_cr4(vcpu, cr4); +} +EXPORT_SYMBOL_GPL(kvm_is_valid_cr4); + +void kvm_post_set_cr4(struct kvm_vcpu *vcpu, unsigned long old_cr4, unsigned long cr4) +{ + unsigned long mmu_role_bits = X86_CR4_PGE | X86_CR4_PSE | X86_CR4_PAE | + X86_CR4_SMEP | X86_CR4_SMAP | X86_CR4_PKE; + + if (((cr4 ^ old_cr4) & mmu_role_bits) || + (!(cr4 & X86_CR4_PCIDE) && (old_cr4 & X86_CR4_PCIDE))) + kvm_mmu_reset_context(vcpu); } -EXPORT_SYMBOL_GPL(kvm_valid_cr4); +EXPORT_SYMBOL_GPL(kvm_post_set_cr4); int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) { unsigned long old_cr4 = kvm_read_cr4(vcpu); unsigned long pdptr_bits = X86_CR4_PGE | X86_CR4_PSE | X86_CR4_PAE | X86_CR4_SMEP; - unsigned long mmu_role_bits = pdptr_bits | X86_CR4_SMAP | X86_CR4_PKE; - if (kvm_valid_cr4(vcpu, cr4)) + if (!kvm_is_valid_cr4(vcpu, cr4)) return 1; if (is_long_mode(vcpu)) { @@ -1006,15 +1030,9 @@ int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) return 1; } - if (kvm_x86_ops.set_cr4(vcpu, cr4)) - return 1; + kvm_x86_ops.set_cr4(vcpu, cr4); - if (((cr4 ^ old_cr4) & mmu_role_bits) || - (!(cr4 & X86_CR4_PCIDE) && (old_cr4 & X86_CR4_PCIDE))) - kvm_mmu_reset_context(vcpu); - - if ((cr4 ^ old_cr4) & (X86_CR4_OSXSAVE | X86_CR4_PKE)) - kvm_update_cpuid_runtime(vcpu); + kvm_post_set_cr4(vcpu, old_cr4, cr4); return 0; } @@ -1638,27 +1656,20 @@ int kvm_set_msr(struct kvm_vcpu *vcpu, u32 index, u64 data) } EXPORT_SYMBOL_GPL(kvm_set_msr); -static int complete_emulated_msr(struct kvm_vcpu *vcpu, bool is_read) +static int complete_emulated_rdmsr(struct kvm_vcpu *vcpu) { - if (vcpu->run->msr.error) { - kvm_inject_gp(vcpu, 0); - return 1; - } else if (is_read) { + int err = vcpu->run->msr.error; + if (!err) { kvm_rax_write(vcpu, (u32)vcpu->run->msr.data); kvm_rdx_write(vcpu, vcpu->run->msr.data >> 32); } - return kvm_skip_emulated_instruction(vcpu); -} - -static int complete_emulated_rdmsr(struct kvm_vcpu *vcpu) -{ - return complete_emulated_msr(vcpu, true); + return kvm_x86_ops.complete_emulated_msr(vcpu, err); } static int complete_emulated_wrmsr(struct kvm_vcpu *vcpu) { - return complete_emulated_msr(vcpu, false); + return kvm_x86_ops.complete_emulated_msr(vcpu, vcpu->run->msr.error); } static u64 kvm_msr_reason(int r) @@ -1721,18 +1732,16 @@ int kvm_emulate_rdmsr(struct kvm_vcpu *vcpu) return 0; } - /* MSR read failed? Inject a #GP */ - if (r) { + if (!r) { + trace_kvm_msr_read(ecx, data); + + kvm_rax_write(vcpu, data & -1u); + kvm_rdx_write(vcpu, (data >> 32) & -1u); + } else { trace_kvm_msr_read_ex(ecx); - kvm_inject_gp(vcpu, 0); - return 1; } - trace_kvm_msr_read(ecx, data); - - kvm_rax_write(vcpu, data & -1u); - kvm_rdx_write(vcpu, (data >> 32) & -1u); - return kvm_skip_emulated_instruction(vcpu); + return kvm_x86_ops.complete_emulated_msr(vcpu, r); } EXPORT_SYMBOL_GPL(kvm_emulate_rdmsr); @@ -1753,15 +1762,12 @@ int kvm_emulate_wrmsr(struct kvm_vcpu *vcpu) if (r < 0) return r; - /* MSR write failed? Inject a #GP */ - if (r > 0) { + if (!r) + trace_kvm_msr_write(ecx, data); + else trace_kvm_msr_write_ex(ecx, data); - kvm_inject_gp(vcpu, 0); - return 1; - } - trace_kvm_msr_write(ecx, data); - return kvm_skip_emulated_instruction(vcpu); + return kvm_x86_ops.complete_emulated_msr(vcpu, r); } EXPORT_SYMBOL_GPL(kvm_emulate_wrmsr); @@ -3678,6 +3684,27 @@ static inline bool kvm_can_mwait_in_guest(void) boot_cpu_has(X86_FEATURE_ARAT); } +static int kvm_ioctl_get_supported_hv_cpuid(struct kvm_vcpu *vcpu, + struct kvm_cpuid2 __user *cpuid_arg) +{ + struct kvm_cpuid2 cpuid; + int r; + + r = -EFAULT; + if (copy_from_user(&cpuid, cpuid_arg, sizeof(cpuid))) + return r; + + r = kvm_get_hv_cpuid(vcpu, &cpuid, cpuid_arg->entries); + if (r) + return r; + + r = -EFAULT; + if (copy_to_user(cpuid_arg, &cpuid, sizeof(cpuid))) + return r; + + return 0; +} + int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) { int r = 0; @@ -3714,6 +3741,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_HYPERV_TLBFLUSH: case KVM_CAP_HYPERV_SEND_IPI: case KVM_CAP_HYPERV_CPUID: + case KVM_CAP_SYS_HYPERV_CPUID: case KVM_CAP_PCI_SEGMENT: case KVM_CAP_DEBUGREGS: case KVM_CAP_X86_ROBUST_SINGLESTEP: @@ -3762,7 +3790,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) * fringe case that is not enabled except via specific settings * of the module parameters. */ - r = kvm_x86_ops.has_emulated_msr(MSR_IA32_SMBASE); + r = kvm_x86_ops.has_emulated_msr(kvm, MSR_IA32_SMBASE); break; case KVM_CAP_VAPIC: r = !kvm_x86_ops.cpu_has_accelerated_tpr(); @@ -3899,6 +3927,9 @@ long kvm_arch_dev_ioctl(struct file *filp, case KVM_GET_MSRS: r = msr_io(NULL, argp, do_get_msr_feature, 1); break; + case KVM_GET_SUPPORTED_HV_CPUID: + r = kvm_ioctl_get_supported_hv_cpuid(NULL, argp); + break; default: r = -EINVAL; break; @@ -3997,7 +4028,7 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) { int idx; - if (vcpu->preempted) + if (vcpu->preempted && !vcpu->arch.guest_state_protected) vcpu->arch.preempted_in_kernel = !kvm_x86_ops.get_cpl(vcpu); /* @@ -4481,6 +4512,9 @@ static void load_xsave(struct kvm_vcpu *vcpu, u8 *src) static void kvm_vcpu_ioctl_x86_get_xsave(struct kvm_vcpu *vcpu, struct kvm_xsave *guest_xsave) { + if (!vcpu->arch.guest_fpu) + return; + if (boot_cpu_has(X86_FEATURE_XSAVE)) { memset(guest_xsave, 0, sizeof(struct kvm_xsave)); fill_xsave((u8 *) guest_xsave->region, vcpu); @@ -4498,9 +4532,14 @@ static void kvm_vcpu_ioctl_x86_get_xsave(struct kvm_vcpu *vcpu, static int kvm_vcpu_ioctl_x86_set_xsave(struct kvm_vcpu *vcpu, struct kvm_xsave *guest_xsave) { - u64 xstate_bv = - *(u64 *)&guest_xsave->region[XSAVE_HDR_OFFSET / sizeof(u32)]; - u32 mxcsr = *(u32 *)&guest_xsave->region[XSAVE_MXCSR_OFFSET / sizeof(u32)]; + u64 xstate_bv; + u32 mxcsr; + + if (!vcpu->arch.guest_fpu) + return 0; + + xstate_bv = *(u64 *)&guest_xsave->region[XSAVE_HDR_OFFSET / sizeof(u32)]; + mxcsr = *(u32 *)&guest_xsave->region[XSAVE_MXCSR_OFFSET / sizeof(u32)]; if (boot_cpu_has(X86_FEATURE_XSAVE)) { /* @@ -4977,25 +5016,9 @@ long kvm_arch_vcpu_ioctl(struct file *filp, srcu_read_unlock(&vcpu->kvm->srcu, idx); break; } - case KVM_GET_SUPPORTED_HV_CPUID: { - struct kvm_cpuid2 __user *cpuid_arg = argp; - struct kvm_cpuid2 cpuid; - - r = -EFAULT; - if (copy_from_user(&cpuid, cpuid_arg, sizeof(cpuid))) - goto out; - - r = kvm_vcpu_ioctl_get_hv_cpuid(vcpu, &cpuid, - cpuid_arg->entries); - if (r) - goto out; - - r = -EFAULT; - if (copy_to_user(cpuid_arg, &cpuid, sizeof(cpuid))) - goto out; - r = 0; + case KVM_GET_SUPPORTED_HV_CPUID: + r = kvm_ioctl_get_supported_hv_cpuid(vcpu, argp); break; - } default: r = -EINVAL; } @@ -5776,7 +5799,7 @@ static void kvm_init_msr_list(void) } for (i = 0; i < ARRAY_SIZE(emulated_msrs_all); i++) { - if (!kvm_x86_ops.has_emulated_msr(emulated_msrs_all[i])) + if (!kvm_x86_ops.has_emulated_msr(NULL, emulated_msrs_all[i])) continue; emulated_msrs[num_emulated_msrs++] = emulated_msrs_all[i]; @@ -8158,7 +8181,14 @@ static void post_kvm_run_save(struct kvm_vcpu *vcpu) { struct kvm_run *kvm_run = vcpu->run; - kvm_run->if_flag = (kvm_get_rflags(vcpu) & X86_EFLAGS_IF) != 0; + /* + * if_flag is obsolete and useless, so do not bother + * setting it for SEV-ES guests. Userspace can just + * use kvm_run->ready_for_interrupt_injection. + */ + kvm_run->if_flag = !vcpu->arch.guest_state_protected + && (kvm_get_rflags(vcpu) & X86_EFLAGS_IF) != 0; + kvm_run->flags = is_smm(vcpu) ? KVM_RUN_X86_SMM : 0; kvm_run->cr8 = kvm_get_cr8(vcpu); kvm_run->apic_base = kvm_get_apic_base(vcpu); @@ -8748,6 +8778,15 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) bool req_immediate_exit = false; + /* Forbid vmenter if vcpu dirty ring is soft-full */ + if (unlikely(vcpu->kvm->dirty_ring_size && + kvm_dirty_ring_soft_full(&vcpu->dirty_ring))) { + vcpu->run->exit_reason = KVM_EXIT_DIRTY_RING_FULL; + trace_kvm_dirty_ring_exit(vcpu); + r = 0; + goto out; + } + if (kvm_request_pending(vcpu)) { if (kvm_check_request(KVM_REQ_GET_NESTED_STATE_PAGES, vcpu)) { if (unlikely(!kvm_x86_ops.nested_ops->get_nested_state_pages(vcpu))) { @@ -9223,9 +9262,14 @@ static void kvm_load_guest_fpu(struct kvm_vcpu *vcpu) kvm_save_current_fpu(vcpu->arch.user_fpu); - /* PKRU is separately restored in kvm_x86_ops.run. */ - __copy_kernel_to_fpregs(&vcpu->arch.guest_fpu->state, - ~XFEATURE_MASK_PKRU); + /* + * Guests with protected state can't have it set by the hypervisor, + * so skip trying to set it. + */ + if (vcpu->arch.guest_fpu) + /* PKRU is separately restored in kvm_x86_ops.run. */ + __copy_kernel_to_fpregs(&vcpu->arch.guest_fpu->state, + ~XFEATURE_MASK_PKRU); fpregs_mark_activate(); fpregs_unlock(); @@ -9238,7 +9282,12 @@ static void kvm_put_guest_fpu(struct kvm_vcpu *vcpu) { fpregs_lock(); - kvm_save_current_fpu(vcpu->arch.guest_fpu); + /* + * Guests with protected state can't have it read by the hypervisor, + * so skip trying to save it. + */ + if (vcpu->arch.guest_fpu) + kvm_save_current_fpu(vcpu->arch.guest_fpu); copy_kernel_to_fpregs(&vcpu->arch.user_fpu->state); @@ -9417,6 +9466,9 @@ static void __get_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) { struct desc_ptr dt; + if (vcpu->arch.guest_state_protected) + goto skip_protected_regs; + kvm_get_segment(vcpu, &sregs->cs, VCPU_SREG_CS); kvm_get_segment(vcpu, &sregs->ds, VCPU_SREG_DS); kvm_get_segment(vcpu, &sregs->es, VCPU_SREG_ES); @@ -9434,9 +9486,11 @@ static void __get_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) sregs->gdt.limit = dt.size; sregs->gdt.base = dt.address; - sregs->cr0 = kvm_read_cr0(vcpu); sregs->cr2 = vcpu->arch.cr2; sregs->cr3 = kvm_read_cr3(vcpu); + +skip_protected_regs: + sregs->cr0 = kvm_read_cr0(vcpu); sregs->cr4 = kvm_read_cr4(vcpu); sregs->cr8 = kvm_get_cr8(vcpu); sregs->efer = vcpu->arch.efer; @@ -9535,7 +9589,7 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int idt_index, } EXPORT_SYMBOL_GPL(kvm_task_switch); -static int kvm_valid_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) +static bool kvm_is_valid_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) { if ((sregs->efer & EFER_LME) && (sregs->cr0 & X86_CR0_PG)) { /* @@ -9543,31 +9597,29 @@ static int kvm_valid_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) * 64-bit mode (though maybe in a 32-bit code segment). * CR4.PAE and EFER.LMA must be set. */ - if (!(sregs->cr4 & X86_CR4_PAE) - || !(sregs->efer & EFER_LMA)) - return -EINVAL; + if (!(sregs->cr4 & X86_CR4_PAE) || !(sregs->efer & EFER_LMA)) + return false; } else { /* * Not in 64-bit mode: EFER.LMA is clear and the code * segment cannot be 64-bit. */ if (sregs->efer & EFER_LMA || sregs->cs.l) - return -EINVAL; + return false; } - return kvm_valid_cr4(vcpu, sregs->cr4); + return kvm_is_valid_cr4(vcpu, sregs->cr4); } static int __set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) { struct msr_data apic_base_msr; int mmu_reset_needed = 0; - int cpuid_update_needed = 0; int pending_vec, max_bits, idx; struct desc_ptr dt; int ret = -EINVAL; - if (kvm_valid_sregs(vcpu, sregs)) + if (!kvm_is_valid_sregs(vcpu, sregs)) goto out; apic_base_msr.data = sregs->apic_base; @@ -9575,6 +9627,9 @@ static int __set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) if (kvm_set_apic_base(vcpu, &apic_base_msr)) goto out; + if (vcpu->arch.guest_state_protected) + goto skip_protected_regs; + dt.size = sregs->idt.limit; dt.address = sregs->idt.base; kvm_x86_ops.set_idt(vcpu, &dt); @@ -9597,11 +9652,7 @@ static int __set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) vcpu->arch.cr0 = sregs->cr0; mmu_reset_needed |= kvm_read_cr4(vcpu) != sregs->cr4; - cpuid_update_needed |= ((kvm_read_cr4(vcpu) ^ sregs->cr4) & - (X86_CR4_OSXSAVE | X86_CR4_PKE)); kvm_x86_ops.set_cr4(vcpu, sregs->cr4); - if (cpuid_update_needed) - kvm_update_cpuid_runtime(vcpu); idx = srcu_read_lock(&vcpu->kvm->srcu); if (is_pae_paging(vcpu)) { @@ -9613,14 +9664,6 @@ static int __set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) if (mmu_reset_needed) kvm_mmu_reset_context(vcpu); - max_bits = KVM_NR_INTERRUPTS; - pending_vec = find_first_bit( - (const unsigned long *)sregs->interrupt_bitmap, max_bits); - if (pending_vec < max_bits) { - kvm_queue_interrupt(vcpu, pending_vec, false); - pr_debug("Set back pending irq %d\n", pending_vec); - } - kvm_set_segment(vcpu, &sregs->cs, VCPU_SREG_CS); kvm_set_segment(vcpu, &sregs->ds, VCPU_SREG_DS); kvm_set_segment(vcpu, &sregs->es, VCPU_SREG_ES); @@ -9639,6 +9682,15 @@ static int __set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) !is_protmode(vcpu)) vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; +skip_protected_regs: + max_bits = KVM_NR_INTERRUPTS; + pending_vec = find_first_bit( + (const unsigned long *)sregs->interrupt_bitmap, max_bits); + if (pending_vec < max_bits) { + kvm_queue_interrupt(vcpu, pending_vec, false); + pr_debug("Set back pending irq %d\n", pending_vec); + } + kvm_make_request(KVM_REQ_EVENT, vcpu); ret = 0; @@ -9663,6 +9715,9 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, unsigned long rflags; int i, r; + if (vcpu->arch.guest_state_protected) + return -EINVAL; + vcpu_load(vcpu); if (dbg->control & (KVM_GUESTDBG_INJECT_DB | KVM_GUESTDBG_INJECT_BP)) { @@ -9742,6 +9797,9 @@ int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) { struct fxregs_state *fxsave; + if (!vcpu->arch.guest_fpu) + return 0; + vcpu_load(vcpu); fxsave = &vcpu->arch.guest_fpu->state.fxsave; @@ -9762,6 +9820,9 @@ int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) { struct fxregs_state *fxsave; + if (!vcpu->arch.guest_fpu) + return 0; + vcpu_load(vcpu); fxsave = &vcpu->arch.guest_fpu->state.fxsave; @@ -9820,6 +9881,9 @@ static int sync_regs(struct kvm_vcpu *vcpu) static void fx_init(struct kvm_vcpu *vcpu) { + if (!vcpu->arch.guest_fpu) + return; + fpstate_init(&vcpu->arch.guest_fpu->state); if (boot_cpu_has(X86_FEATURE_XSAVES)) vcpu->arch.guest_fpu->state.xsave.header.xcomp_bv = @@ -9833,6 +9897,15 @@ static void fx_init(struct kvm_vcpu *vcpu) vcpu->arch.cr0 |= X86_CR0_ET; } +void kvm_free_guest_fpu(struct kvm_vcpu *vcpu) +{ + if (vcpu->arch.guest_fpu) { + kmem_cache_free(x86_fpu_cache, vcpu->arch.guest_fpu); + vcpu->arch.guest_fpu = NULL; + } +} +EXPORT_SYMBOL_GPL(kvm_free_guest_fpu); + int kvm_arch_vcpu_precreate(struct kvm *kvm, unsigned int id) { if (kvm_check_tsc_unstable() && atomic_read(&kvm->online_vcpus) != 0) @@ -9869,7 +9942,7 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu) r = -ENOMEM; - page = alloc_page(GFP_KERNEL | __GFP_ZERO); + page = alloc_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO); if (!page) goto fail_free_lapic; vcpu->arch.pio_data = page_address(page); @@ -9928,7 +10001,7 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu) return 0; free_guest_fpu: - kmem_cache_free(x86_fpu_cache, vcpu->arch.guest_fpu); + kvm_free_guest_fpu(vcpu); free_user_fpu: kmem_cache_free(x86_fpu_cache, vcpu->arch.user_fpu); free_emulate_ctxt: @@ -9982,7 +10055,7 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) kmem_cache_free(x86_emulator_cache, vcpu->arch.emulate_ctxt); free_cpumask_var(vcpu->arch.wbinvd_dirty_mask); kmem_cache_free(x86_fpu_cache, vcpu->arch.user_fpu); - kmem_cache_free(x86_fpu_cache, vcpu->arch.guest_fpu); + kvm_free_guest_fpu(vcpu); kvm_hv_vcpu_uninit(vcpu); kvm_pmu_destroy(vcpu); @@ -10030,7 +10103,7 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) kvm_async_pf_hash_reset(vcpu); vcpu->arch.apf.halted = false; - if (kvm_mpx_supported()) { + if (vcpu->arch.guest_fpu && kvm_mpx_supported()) { void *mpx_state_buffer; /* @@ -10349,7 +10422,32 @@ void kvm_arch_sync_events(struct kvm *kvm) kvm_free_pit(kvm); } -int __x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa, u32 size) +#define ERR_PTR_USR(e) ((void __user *)ERR_PTR(e)) + +/** + * __x86_set_memory_region: Setup KVM internal memory slot + * + * @kvm: the kvm pointer to the VM. + * @id: the slot ID to setup. + * @gpa: the GPA to install the slot (unused when @size == 0). + * @size: the size of the slot. Set to zero to uninstall a slot. + * + * This function helps to setup a KVM internal memory slot. Specify + * @size > 0 to install a new slot, while @size == 0 to uninstall a + * slot. The return code can be one of the following: + * + * HVA: on success (uninstall will return a bogus HVA) + * -errno: on error + * + * The caller should always use IS_ERR() to check the return value + * before use. Note, the KVM internal memory slots are guaranteed to + * remain valid and unchanged until the VM is destroyed, i.e., the + * GPA->HVA translation will not change. However, the HVA is a user + * address, i.e. its accessibility is not guaranteed, and must be + * accessed via __copy_{to,from}_user(). + */ +void __user * __x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa, + u32 size) { int i, r; unsigned long hva, old_npages; @@ -10358,12 +10456,12 @@ int __x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa, u32 size) /* Called with kvm->slots_lock held. */ if (WARN_ON(id >= KVM_MEM_SLOTS_NUM)) - return -EINVAL; + return ERR_PTR_USR(-EINVAL); slot = id_to_memslot(slots, id); if (size) { if (slot && slot->npages) - return -EEXIST; + return ERR_PTR_USR(-EEXIST); /* * MAP_SHARED to prevent internal slot pages from being moved @@ -10372,7 +10470,7 @@ int __x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa, u32 size) hva = vm_mmap(NULL, 0, size, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, 0); if (IS_ERR((void *)hva)) - return PTR_ERR((void *)hva); + return (void __user *)hva; } else { if (!slot || !slot->npages) return 0; @@ -10391,13 +10489,13 @@ int __x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa, u32 size) m.memory_size = size; r = __kvm_set_memory_region(kvm, &m); if (r < 0) - return r; + return ERR_PTR_USR(r); } if (!size) vm_munmap(hva, old_npages * PAGE_SIZE); - return 0; + return (void __user *)hva; } EXPORT_SYMBOL_GPL(__x86_set_memory_region); @@ -10754,6 +10852,10 @@ int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu) unsigned long kvm_get_linear_rip(struct kvm_vcpu *vcpu) { + /* Can't read the RIP when guest state is protected, just return 0 */ + if (vcpu->arch.guest_state_protected) + return 0; + if (is_64_bit_mode(vcpu)) return kvm_rip_read(vcpu); return (u32)(get_segment_base(vcpu, VCPU_SREG_CS) + @@ -11263,6 +11365,179 @@ int kvm_handle_invpcid(struct kvm_vcpu *vcpu, unsigned long type, gva_t gva) } EXPORT_SYMBOL_GPL(kvm_handle_invpcid); +static int complete_sev_es_emulated_mmio(struct kvm_vcpu *vcpu) +{ + struct kvm_run *run = vcpu->run; + struct kvm_mmio_fragment *frag; + unsigned int len; + + BUG_ON(!vcpu->mmio_needed); + + /* Complete previous fragment */ + frag = &vcpu->mmio_fragments[vcpu->mmio_cur_fragment]; + len = min(8u, frag->len); + if (!vcpu->mmio_is_write) + memcpy(frag->data, run->mmio.data, len); + + if (frag->len <= 8) { + /* Switch to the next fragment. */ + frag++; + vcpu->mmio_cur_fragment++; + } else { + /* Go forward to the next mmio piece. */ + frag->data += len; + frag->gpa += len; + frag->len -= len; + } + + if (vcpu->mmio_cur_fragment >= vcpu->mmio_nr_fragments) { + vcpu->mmio_needed = 0; + + // VMG change, at this point, we're always done + // RIP has already been advanced + return 1; + } + + // More MMIO is needed + run->mmio.phys_addr = frag->gpa; + run->mmio.len = min(8u, frag->len); + run->mmio.is_write = vcpu->mmio_is_write; + if (run->mmio.is_write) + memcpy(run->mmio.data, frag->data, min(8u, frag->len)); + run->exit_reason = KVM_EXIT_MMIO; + + vcpu->arch.complete_userspace_io = complete_sev_es_emulated_mmio; + + return 0; +} + +int kvm_sev_es_mmio_write(struct kvm_vcpu *vcpu, gpa_t gpa, unsigned int bytes, + void *data) +{ + int handled; + struct kvm_mmio_fragment *frag; + + if (!data) + return -EINVAL; + + handled = write_emultor.read_write_mmio(vcpu, gpa, bytes, data); + if (handled == bytes) + return 1; + + bytes -= handled; + gpa += handled; + data += handled; + + /*TODO: Check if need to increment number of frags */ + frag = vcpu->mmio_fragments; + vcpu->mmio_nr_fragments = 1; + frag->len = bytes; + frag->gpa = gpa; + frag->data = data; + + vcpu->mmio_needed = 1; + vcpu->mmio_cur_fragment = 0; + + vcpu->run->mmio.phys_addr = gpa; + vcpu->run->mmio.len = min(8u, frag->len); + vcpu->run->mmio.is_write = 1; + memcpy(vcpu->run->mmio.data, frag->data, min(8u, frag->len)); + vcpu->run->exit_reason = KVM_EXIT_MMIO; + + vcpu->arch.complete_userspace_io = complete_sev_es_emulated_mmio; + + return 0; +} +EXPORT_SYMBOL_GPL(kvm_sev_es_mmio_write); + +int kvm_sev_es_mmio_read(struct kvm_vcpu *vcpu, gpa_t gpa, unsigned int bytes, + void *data) +{ + int handled; + struct kvm_mmio_fragment *frag; + + if (!data) + return -EINVAL; + + handled = read_emultor.read_write_mmio(vcpu, gpa, bytes, data); + if (handled == bytes) + return 1; + + bytes -= handled; + gpa += handled; + data += handled; + + /*TODO: Check if need to increment number of frags */ + frag = vcpu->mmio_fragments; + vcpu->mmio_nr_fragments = 1; + frag->len = bytes; + frag->gpa = gpa; + frag->data = data; + + vcpu->mmio_needed = 1; + vcpu->mmio_cur_fragment = 0; + + vcpu->run->mmio.phys_addr = gpa; + vcpu->run->mmio.len = min(8u, frag->len); + vcpu->run->mmio.is_write = 0; + vcpu->run->exit_reason = KVM_EXIT_MMIO; + + vcpu->arch.complete_userspace_io = complete_sev_es_emulated_mmio; + + return 0; +} +EXPORT_SYMBOL_GPL(kvm_sev_es_mmio_read); + +static int complete_sev_es_emulated_ins(struct kvm_vcpu *vcpu) +{ + memcpy(vcpu->arch.guest_ins_data, vcpu->arch.pio_data, + vcpu->arch.pio.count * vcpu->arch.pio.size); + vcpu->arch.pio.count = 0; + + return 1; +} + +static int kvm_sev_es_outs(struct kvm_vcpu *vcpu, unsigned int size, + unsigned int port, void *data, unsigned int count) +{ + int ret; + + ret = emulator_pio_out_emulated(vcpu->arch.emulate_ctxt, size, port, + data, count); + if (ret) + return ret; + + vcpu->arch.pio.count = 0; + + return 0; +} + +static int kvm_sev_es_ins(struct kvm_vcpu *vcpu, unsigned int size, + unsigned int port, void *data, unsigned int count) +{ + int ret; + + ret = emulator_pio_in_emulated(vcpu->arch.emulate_ctxt, size, port, + data, count); + if (ret) { + vcpu->arch.pio.count = 0; + } else { + vcpu->arch.guest_ins_data = data; + vcpu->arch.complete_userspace_io = complete_sev_es_emulated_ins; + } + + return 0; +} + +int kvm_sev_es_string_io(struct kvm_vcpu *vcpu, unsigned int size, + unsigned int port, void *data, unsigned int count, + int in) +{ + return in ? kvm_sev_es_ins(vcpu, size, port, data, count) + : kvm_sev_es_outs(vcpu, size, port, data, count); +} +EXPORT_SYMBOL_GPL(kvm_sev_es_string_io); + EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_exit); EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_fast_mmio); EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_inj_virq); @@ -11285,3 +11560,7 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_avic_unaccelerated_access); EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_avic_incomplete_ipi); EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_avic_ga_log); EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_apicv_update_request); +EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_vmgexit_enter); +EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_vmgexit_exit); +EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_vmgexit_msr_protocol_enter); +EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_vmgexit_msr_protocol_exit); |