diff options
Diffstat (limited to 'arch/x86/kvm/vmx.c')
-rw-r--r-- | arch/x86/kvm/vmx.c | 106 |
1 files changed, 68 insertions, 38 deletions
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 714a0673ec3c..c829d89e2e63 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -50,6 +50,7 @@ #include <asm/apic.h> #include <asm/irq_remapping.h> #include <asm/mmu_context.h> +#include <asm/nospec-branch.h> #include "trace.h" #include "pmu.h" @@ -899,8 +900,16 @@ static inline short vmcs_field_to_offset(unsigned long field) { BUILD_BUG_ON(ARRAY_SIZE(vmcs_field_to_offset_table) > SHRT_MAX); - if (field >= ARRAY_SIZE(vmcs_field_to_offset_table) || - vmcs_field_to_offset_table[field] == 0) + if (field >= ARRAY_SIZE(vmcs_field_to_offset_table)) + return -ENOENT; + + /* + * FIXME: Mitigation for CVE-2017-5753. To be replaced with a + * generic mechanism. + */ + asm("lfence"); + + if (vmcs_field_to_offset_table[field] == 0) return -ENOENT; return vmcs_field_to_offset_table[field]; @@ -2300,7 +2309,7 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu) * processors. See 22.2.4. */ vmcs_writel(HOST_TR_BASE, - (unsigned long)this_cpu_ptr(&cpu_tss)); + (unsigned long)&get_cpu_entry_area(cpu)->tss.x86_tss); vmcs_writel(HOST_GDTR_BASE, (unsigned long)gdt); /* 22.2.4 */ /* @@ -5600,7 +5609,7 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) vmcs_write64(GUEST_IA32_DEBUGCTL, 0); } - vmcs_writel(GUEST_RFLAGS, 0x02); + kvm_set_rflags(vcpu, X86_EFLAGS_FIXED); kvm_rip_write(vcpu, 0xfff0); vmcs_writel(GUEST_GDTR_BASE, 0); @@ -5915,11 +5924,9 @@ static int handle_exception(struct kvm_vcpu *vcpu) return 1; /* already handled by vmx_vcpu_run() */ if (is_invalid_opcode(intr_info)) { - if (is_guest_mode(vcpu)) { - kvm_queue_exception(vcpu, UD_VECTOR); - return 1; - } er = emulate_instruction(vcpu, EMULTYPE_TRAP_UD); + if (er == EMULATE_USER_EXIT) + return 0; if (er != EMULATE_DONE) kvm_queue_exception(vcpu, UD_VECTOR); return 1; @@ -6602,7 +6609,7 @@ static int handle_invalid_guest_state(struct kvm_vcpu *vcpu) if (kvm_test_request(KVM_REQ_EVENT, vcpu)) return 1; - err = emulate_instruction(vcpu, EMULTYPE_NO_REEXECUTE); + err = emulate_instruction(vcpu, 0); if (err == EMULATE_USER_EXIT) { ++vcpu->stat.mmio_exits; @@ -6750,16 +6757,10 @@ static __init int hardware_setup(void) goto out; } - vmx_io_bitmap_b = (unsigned long *)__get_free_page(GFP_KERNEL); memset(vmx_vmread_bitmap, 0xff, PAGE_SIZE); memset(vmx_vmwrite_bitmap, 0xff, PAGE_SIZE); - /* - * Allow direct access to the PC debug port (it is often used for I/O - * delays, but the vmexits simply slow things down). - */ memset(vmx_io_bitmap_a, 0xff, PAGE_SIZE); - clear_bit(0x80, vmx_io_bitmap_a); memset(vmx_io_bitmap_b, 0xff, PAGE_SIZE); @@ -7414,10 +7415,11 @@ static inline void nested_release_vmcs12(struct vcpu_vmx *vmx) */ static void free_nested(struct vcpu_vmx *vmx) { - if (!vmx->nested.vmxon) + if (!vmx->nested.vmxon && !vmx->nested.smm.vmxon) return; vmx->nested.vmxon = false; + vmx->nested.smm.vmxon = false; free_vpid(vmx->nested.vpid02); vmx->nested.posted_intr_nv = -1; vmx->nested.current_vmptr = -1ull; @@ -9419,6 +9421,7 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) /* Save guest registers, load host registers, keep flags */ "mov %0, %c[wordsize](%%" _ASM_SP ") \n\t" "pop %0 \n\t" + "setbe %c[fail](%0)\n\t" "mov %%" _ASM_AX ", %c[rax](%0) \n\t" "mov %%" _ASM_BX ", %c[rbx](%0) \n\t" __ASM_SIZE(pop) " %c[rcx](%0) \n\t" @@ -9435,12 +9438,23 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) "mov %%r13, %c[r13](%0) \n\t" "mov %%r14, %c[r14](%0) \n\t" "mov %%r15, %c[r15](%0) \n\t" + "xor %%r8d, %%r8d \n\t" + "xor %%r9d, %%r9d \n\t" + "xor %%r10d, %%r10d \n\t" + "xor %%r11d, %%r11d \n\t" + "xor %%r12d, %%r12d \n\t" + "xor %%r13d, %%r13d \n\t" + "xor %%r14d, %%r14d \n\t" + "xor %%r15d, %%r15d \n\t" #endif "mov %%cr2, %%" _ASM_AX " \n\t" "mov %%" _ASM_AX ", %c[cr2](%0) \n\t" + "xor %%eax, %%eax \n\t" + "xor %%ebx, %%ebx \n\t" + "xor %%esi, %%esi \n\t" + "xor %%edi, %%edi \n\t" "pop %%" _ASM_BP "; pop %%" _ASM_DX " \n\t" - "setbe %c[fail](%0) \n\t" ".pushsection .rodata \n\t" ".global vmx_return \n\t" "vmx_return: " _ASM_PTR " 2b \n\t" @@ -9477,6 +9491,9 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) #endif ); + /* Eliminate branch target predictions from guest mode */ + vmexit_fill_RSB(); + /* MSR_IA32_DEBUGCTLMSR is zeroed on vmexit. Restore it if needed */ if (debugctlmsr) update_debugctlmsr(debugctlmsr); @@ -9800,8 +9817,7 @@ static void nested_vmx_cr_fixed1_bits_update(struct kvm_vcpu *vcpu) cr4_fixed1_update(X86_CR4_SMEP, ebx, bit(X86_FEATURE_SMEP)); cr4_fixed1_update(X86_CR4_SMAP, ebx, bit(X86_FEATURE_SMAP)); cr4_fixed1_update(X86_CR4_PKE, ecx, bit(X86_FEATURE_PKU)); - /* TODO: Use X86_CR4_UMIP and X86_FEATURE_UMIP macros */ - cr4_fixed1_update(bit(11), ecx, bit(2)); + cr4_fixed1_update(X86_CR4_UMIP, ecx, bit(X86_FEATURE_UMIP)); #undef cr4_fixed1_update } @@ -10875,6 +10891,11 @@ static int check_vmentry_postreqs(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, return 1; } + if ((vmcs12->vm_entry_controls & VM_ENTRY_LOAD_BNDCFGS) && + (is_noncanonical_address(vmcs12->guest_bndcfgs & PAGE_MASK, vcpu) || + (vmcs12->guest_bndcfgs & MSR_IA32_BNDCFGS_RSVD))) + return 1; + return 0; } @@ -11099,13 +11120,12 @@ static int vmx_check_nested_events(struct kvm_vcpu *vcpu, bool external_intr) { struct vcpu_vmx *vmx = to_vmx(vcpu); unsigned long exit_qual; - - if (kvm_event_needs_reinjection(vcpu)) - return -EBUSY; + bool block_nested_events = + vmx->nested.nested_run_pending || kvm_event_needs_reinjection(vcpu); if (vcpu->arch.exception.pending && nested_vmx_check_exception(vcpu, &exit_qual)) { - if (vmx->nested.nested_run_pending) + if (block_nested_events) return -EBUSY; nested_vmx_inject_exception_vmexit(vcpu, exit_qual); vcpu->arch.exception.pending = false; @@ -11114,14 +11134,14 @@ static int vmx_check_nested_events(struct kvm_vcpu *vcpu, bool external_intr) if (nested_cpu_has_preemption_timer(get_vmcs12(vcpu)) && vmx->nested.preemption_timer_expired) { - if (vmx->nested.nested_run_pending) + if (block_nested_events) return -EBUSY; nested_vmx_vmexit(vcpu, EXIT_REASON_PREEMPTION_TIMER, 0, 0); return 0; } if (vcpu->arch.nmi_pending && nested_exit_on_nmi(vcpu)) { - if (vmx->nested.nested_run_pending) + if (block_nested_events) return -EBUSY; nested_vmx_vmexit(vcpu, EXIT_REASON_EXCEPTION_NMI, NMI_VECTOR | INTR_TYPE_NMI_INTR | @@ -11137,7 +11157,7 @@ static int vmx_check_nested_events(struct kvm_vcpu *vcpu, bool external_intr) if ((kvm_cpu_has_interrupt(vcpu) || external_intr) && nested_exit_on_intr(vcpu)) { - if (vmx->nested.nested_run_pending) + if (block_nested_events) return -EBUSY; nested_vmx_vmexit(vcpu, EXIT_REASON_EXTERNAL_INTERRUPT, 0, 0); return 0; @@ -11324,6 +11344,24 @@ static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, kvm_clear_interrupt_queue(vcpu); } +static void load_vmcs12_mmu_host_state(struct kvm_vcpu *vcpu, + struct vmcs12 *vmcs12) +{ + u32 entry_failure_code; + + nested_ept_uninit_mmu_context(vcpu); + + /* + * Only PDPTE load can fail as the value of cr3 was checked on entry and + * couldn't have changed. + */ + if (nested_vmx_load_cr3(vcpu, vmcs12->host_cr3, false, &entry_failure_code)) + nested_vmx_abort(vcpu, VMX_ABORT_LOAD_HOST_PDPTE_FAIL); + + if (!enable_ept) + vcpu->arch.walk_mmu->inject_page_fault = kvm_inject_page_fault; +} + /* * A part of what we need to when the nested L2 guest exits and we want to * run its L1 parent, is to reset L1's guest state to the host state specified @@ -11337,7 +11375,6 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) { struct kvm_segment seg; - u32 entry_failure_code; if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_EFER) vcpu->arch.efer = vmcs12->host_ia32_efer; @@ -11364,17 +11401,7 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu, vcpu->arch.cr4_guest_owned_bits = ~vmcs_readl(CR4_GUEST_HOST_MASK); vmx_set_cr4(vcpu, vmcs12->host_cr4); - nested_ept_uninit_mmu_context(vcpu); - - /* - * Only PDPTE load can fail as the value of cr3 was checked on entry and - * couldn't have changed. - */ - if (nested_vmx_load_cr3(vcpu, vmcs12->host_cr3, false, &entry_failure_code)) - nested_vmx_abort(vcpu, VMX_ABORT_LOAD_HOST_PDPTE_FAIL); - - if (!enable_ept) - vcpu->arch.walk_mmu->inject_page_fault = kvm_inject_page_fault; + load_vmcs12_mmu_host_state(vcpu, vmcs12); if (enable_vpid) { /* @@ -11604,6 +11631,9 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason, * accordingly. */ nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD); + + load_vmcs12_mmu_host_state(vcpu, vmcs12); + /* * The emulated instruction was already skipped in * nested_vmx_run, but the updated RIP was never |