diff options
| -rw-r--r-- | arch/x86/kvm/kvm-asm-offsets.c | 1 | ||||
| -rw-r--r-- | arch/x86/kvm/svm/svm.c | 23 | ||||
| -rw-r--r-- | arch/x86/kvm/svm/svm.h | 4 | ||||
| -rw-r--r-- | arch/x86/kvm/svm/vmenter.S | 103 | ||||
| -rw-r--r-- | arch/x86/kvm/vmenter.h | 80 | ||||
| -rw-r--r-- | arch/x86/kvm/vmx/run_flags.h | 9 | ||||
| -rw-r--r-- | arch/x86/kvm/vmx/vmenter.S | 186 | ||||
| -rw-r--r-- | arch/x86/kvm/vmx/vmx.c | 39 | ||||
| -rw-r--r-- | arch/x86/kvm/vmx/vmx.h | 7 |
9 files changed, 213 insertions, 239 deletions
diff --git a/arch/x86/kvm/kvm-asm-offsets.c b/arch/x86/kvm/kvm-asm-offsets.c index 24a710d37323..36ac61724dd7 100644 --- a/arch/x86/kvm/kvm-asm-offsets.c +++ b/arch/x86/kvm/kvm-asm-offsets.c @@ -24,6 +24,7 @@ static void __used common(void) if (IS_ENABLED(CONFIG_KVM_INTEL)) { BLANK(); + OFFSET(VMX_vcpu_arch_regs, vcpu_vmx, vcpu.arch.regs); OFFSET(VMX_spec_ctrl, vcpu_vmx, spec_ctrl); } } diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index a82471a6d3ea..4519a1f92584 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -50,6 +50,7 @@ #include "trace.h" +#include "vmenter.h" #include "svm.h" #include "svm_ops.h" @@ -664,7 +665,7 @@ static void clr_dr_intercepts(struct vcpu_svm *svm) svm_mark_intercepts_dirty(svm); } -static bool msr_write_intercepted(struct kvm_vcpu *vcpu, u32 msr) +static bool msr_write_intercepted(struct vcpu_svm *svm, u32 msr) { /* * For non-nested case: @@ -675,8 +676,7 @@ static bool msr_write_intercepted(struct kvm_vcpu *vcpu, u32 msr) * If the L02 MSR bitmap does not intercept the MSR, then we need to * save it. */ - void *msrpm = is_guest_mode(vcpu) ? to_svm(vcpu)->nested.msrpm : - to_svm(vcpu)->msrpm; + void *msrpm = is_guest_mode(&svm->vcpu) ? svm->nested.msrpm : svm->msrpm; return svm_test_msr_bitmap_write(msrpm, msr); } @@ -2783,7 +2783,7 @@ static bool sev_es_prevent_msr_access(struct kvm_vcpu *vcpu, { return is_sev_es_guest(vcpu) && vcpu->arch.guest_state_protected && msr_info->index != MSR_IA32_XSS && - !msr_write_intercepted(vcpu, msr_info->index); + !msr_write_intercepted(to_svm(vcpu), msr_info->index); } static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) @@ -4397,7 +4397,7 @@ static fastpath_t svm_exit_handlers_fastpath(struct kvm_vcpu *vcpu) return EXIT_FASTPATH_NONE; } -static noinstr void svm_vcpu_enter_exit(struct kvm_vcpu *vcpu, bool spec_ctrl_intercepted) +static noinstr void svm_vcpu_enter_exit(struct kvm_vcpu *vcpu, unsigned enter_flags) { struct svm_cpu_data *sd = per_cpu_ptr(&svm_data, vcpu->cpu); struct vcpu_svm *svm = to_svm(vcpu); @@ -4419,10 +4419,10 @@ static noinstr void svm_vcpu_enter_exit(struct kvm_vcpu *vcpu, bool spec_ctrl_in amd_clear_divider(); if (is_sev_es_guest(vcpu)) - __svm_sev_es_vcpu_run(svm, spec_ctrl_intercepted, + __svm_sev_es_vcpu_run(svm, enter_flags, sev_es_host_save_area(sd)); else - __svm_vcpu_run(svm, spec_ctrl_intercepted); + __svm_vcpu_run(svm, enter_flags); raw_local_irq_disable(); @@ -4433,7 +4433,10 @@ static __no_kcsan fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu, u64 run_flags) { bool force_immediate_exit = run_flags & KVM_RUN_FORCE_IMMEDIATE_EXIT; struct vcpu_svm *svm = to_svm(vcpu); - bool spec_ctrl_intercepted = msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL); + unsigned enter_flags = 0; + + if (!msr_write_intercepted(svm, MSR_IA32_SPEC_CTRL)) + enter_flags |= KVM_ENTER_SAVE_SPEC_CTRL; trace_kvm_entry(vcpu, force_immediate_exit); @@ -4516,7 +4519,7 @@ static __no_kcsan fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu, u64 run_flags) if (!static_cpu_has(X86_FEATURE_V_SPEC_CTRL)) x86_spec_ctrl_set_guest(svm->virt_spec_ctrl); - svm_vcpu_enter_exit(vcpu, spec_ctrl_intercepted); + svm_vcpu_enter_exit(vcpu, enter_flags); if (!static_cpu_has(X86_FEATURE_V_SPEC_CTRL)) x86_spec_ctrl_restore_host(svm->virt_spec_ctrl); @@ -4575,7 +4578,7 @@ static __no_kcsan fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu, u64 run_flags) vcpu->arch.regs_avail &= ~SVM_REGS_LAZY_LOAD_SET; - if (!msr_write_intercepted(vcpu, MSR_AMD64_PERF_CNTR_GLOBAL_CTL)) + if (!msr_write_intercepted(svm, MSR_AMD64_PERF_CNTR_GLOBAL_CTL)) rdmsrq(MSR_AMD64_PERF_CNTR_GLOBAL_CTL, vcpu_to_pmu(vcpu)->global_ctrl); trace_kvm_exit(vcpu, KVM_ISA_SVM); diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h index dd93b3daefa9..f63c95a20ad0 100644 --- a/arch/x86/kvm/svm/svm.h +++ b/arch/x86/kvm/svm/svm.h @@ -1008,9 +1008,9 @@ static inline void sev_free_decrypted_vmsa(struct kvm_vcpu *vcpu, struct vmcb_sa /* vmenter.S */ -void __svm_sev_es_vcpu_run(struct vcpu_svm *svm, bool spec_ctrl_intercepted, +void __svm_sev_es_vcpu_run(struct vcpu_svm *svm, unsigned int flags, struct sev_es_save_area *hostsa); -void __svm_vcpu_run(struct vcpu_svm *svm, bool spec_ctrl_intercepted); +void __svm_vcpu_run(struct vcpu_svm *svm, unsigned int flags); #define DEFINE_KVM_GHCB_ACCESSORS(field) \ static __always_inline u64 kvm_ghcb_get_##field(struct vcpu_svm *svm) \ diff --git a/arch/x86/kvm/svm/vmenter.S b/arch/x86/kvm/svm/vmenter.S index d47c5c93c991..f523d9e49839 100644 --- a/arch/x86/kvm/svm/vmenter.S +++ b/arch/x86/kvm/svm/vmenter.S @@ -7,6 +7,7 @@ #include <asm/kvm_vcpu_regs.h> #include <asm/nospec-branch.h> #include "kvm-asm-offsets.h" +#include "vmenter.h" #define WORD_SIZE (BITS_PER_LONG / 8) @@ -39,38 +40,6 @@ ALTERNATIVE_2 "", \ "jmp 800f", X86_FEATURE_MSR_SPEC_CTRL, \ "", X86_FEATURE_V_SPEC_CTRL -801: -.endm -.macro RESTORE_GUEST_SPEC_CTRL_BODY -800: - /* - * SPEC_CTRL handling: if the guest's SPEC_CTRL value differs from the - * host's, write the MSR. This is kept out-of-line so that the common - * case does not have to jump. - * - * IMPORTANT: To avoid RSB underflow attacks and any other nastiness, - * there must not be any returns or indirect branches between this code - * and vmentry. - */ -#ifdef CONFIG_X86_64 - mov SVM_spec_ctrl(%rdi), %rdx - cmp PER_CPU_VAR(x86_spec_ctrl_current), %rdx - je 801b - movl %edx, %eax - shr $32, %rdx -#else - mov SVM_spec_ctrl(%edi), %eax - mov PER_CPU_VAR(x86_spec_ctrl_current), %ecx - xor %eax, %ecx - mov SVM_spec_ctrl + 4(%edi), %edx - mov PER_CPU_VAR(x86_spec_ctrl_current + 4), %esi - xor %edx, %esi - or %esi, %ecx - je 801b -#endif - mov $MSR_IA32_SPEC_CTRL, %ecx - wrmsr - jmp 801b .endm .macro RESTORE_HOST_SPEC_CTRL @@ -78,42 +47,6 @@ ALTERNATIVE_2 "", \ "jmp 900f", X86_FEATURE_MSR_SPEC_CTRL, \ "", X86_FEATURE_V_SPEC_CTRL -901: -.endm -.macro RESTORE_HOST_SPEC_CTRL_BODY spec_ctrl_intercepted:req -900: - /* Same for after vmexit. */ - mov $MSR_IA32_SPEC_CTRL, %ecx - - /* - * Load the value that the guest had written into MSR_IA32_SPEC_CTRL, - * if it was not intercepted during guest execution. - */ - cmpb $0, \spec_ctrl_intercepted - jnz 998f - rdmsr - movl %eax, SVM_spec_ctrl(%_ASM_DI) - movl %edx, SVM_spec_ctrl + 4(%_ASM_DI) -998: - /* Now restore the host value of the MSR if different from the guest's. */ -#ifdef CONFIG_X86_64 - mov PER_CPU_VAR(x86_spec_ctrl_current), %rdx - cmp SVM_spec_ctrl(%rdi), %rdx - je 901b - movl %edx, %eax - shr $32, %rdx -#else - mov PER_CPU_VAR(x86_spec_ctrl_current), %eax - mov SVM_spec_ctrl(%edi), %esi - xor %eax, %esi - mov PER_CPU_VAR(x86_spec_ctrl_current + 4), %edx - mov SVM_spec_ctrl + 4(%edi), %edi - xor %edx, %edi - or %edi, %esi - je 901b -#endif - wrmsr - jmp 901b .endm #define SVM_CLEAR_CPU_BUFFERS \ @@ -121,8 +54,8 @@ /** * __svm_vcpu_run - Run a vCPU via a transition to SVM guest mode - * @svm: struct vcpu_svm * - * @spec_ctrl_intercepted: bool + * @svm: struct vcpu_svm * + * @enter_flags: u32 */ SYM_FUNC_START(__svm_vcpu_run) push %_ASM_BP @@ -162,6 +95,7 @@ SYM_FUNC_START(__svm_vcpu_run) /* Clobbers RAX, RCX, RDX (and ESI on 32-bit), consumes RDI (@svm). */ RESTORE_GUEST_SPEC_CTRL +801: /* * Use a single vmcb (vmcb01 because it's always valid) for @@ -242,6 +176,7 @@ SYM_FUNC_START(__svm_vcpu_run) * and RSP (pointer to @spec_ctrl_intercepted). */ RESTORE_HOST_SPEC_CTRL +901: /* * Mitigate RETBleed for AMD/Hygon Zen uarch. RET should be @@ -278,7 +213,7 @@ SYM_FUNC_START(__svm_vcpu_run) xor %r15d, %r15d #endif - /* "Pop" @spec_ctrl_intercepted. */ + /* "Pop" @enter_flags. */ pop %_ASM_BX pop %_ASM_BX @@ -295,8 +230,12 @@ SYM_FUNC_START(__svm_vcpu_run) pop %_ASM_BP RET - RESTORE_GUEST_SPEC_CTRL_BODY - RESTORE_HOST_SPEC_CTRL_BODY (%_ASM_SP) +800: + RESTORE_GUEST_SPEC_CTRL_BODY SVM_spec_ctrl(%_ASM_DI), 801b + jmp 801b +900: + RESTORE_HOST_SPEC_CTRL_BODY SVM_spec_ctrl(%_ASM_DI), (%_ASM_SP), 901b + jmp 901b 10: cmpb $0, _ASM_RIP(virt_rebooting) jne 2b @@ -335,8 +274,8 @@ SYM_FUNC_END(__svm_vcpu_run) /** * __svm_sev_es_vcpu_run - Run a SEV-ES vCPU via a transition to SVM guest mode - * @svm: struct vcpu_svm * - * @spec_ctrl_intercepted: bool + * @svm: struct vcpu_svm * + * @enter_flags: u32 */ SYM_FUNC_START(__svm_sev_es_vcpu_run) FRAME_BEGIN @@ -355,13 +294,14 @@ SYM_FUNC_START(__svm_sev_es_vcpu_run) /* * Save volatile registers that hold arguments that are needed after - * #VMEXIT (RDI=@svm and RSI=@spec_ctrl_intercepted). + * #VMEXIT (RDI=@svm and RSI=@enter_flags). */ mov %rdi, SEV_ES_RDI (%rdx) mov %rsi, SEV_ES_RSI (%rdx) /* Clobbers RAX, RCX, and RDX (@hostsa), consumes RDI (@svm). */ RESTORE_GUEST_SPEC_CTRL +801: /* Get svm->current_vmcb->pa into RAX. */ mov SVM_current_vmcb(%rdi), %rax @@ -376,8 +316,9 @@ SYM_FUNC_START(__svm_sev_es_vcpu_run) /* IMPORTANT: Stuff the RSB immediately after VM-Exit, before RET! */ FILL_RETURN_BUFFER %rax, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_VMEXIT - /* Clobbers RAX, RCX, RDX, consumes RDI (@svm) and RSI (@spec_ctrl_intercepted). */ + /* Clobbers RAX, RCX, RDX, consumes RDI (@svm) and RSI (@enter_flags). */ RESTORE_HOST_SPEC_CTRL +901: /* * Mitigate RETBleed for AMD/Hygon Zen uarch. RET should be @@ -391,8 +332,12 @@ SYM_FUNC_START(__svm_sev_es_vcpu_run) FRAME_END RET - RESTORE_GUEST_SPEC_CTRL_BODY - RESTORE_HOST_SPEC_CTRL_BODY %sil +800: + RESTORE_GUEST_SPEC_CTRL_BODY SVM_spec_ctrl(%_ASM_DI), 801b + jmp 801b +900: + RESTORE_HOST_SPEC_CTRL_BODY SVM_spec_ctrl(%_ASM_DI), %esi, 901b + jmp 901b 3: cmpb $0, virt_rebooting(%rip) jne 2b diff --git a/arch/x86/kvm/vmenter.h b/arch/x86/kvm/vmenter.h new file mode 100644 index 000000000000..ba3f71449c62 --- /dev/null +++ b/arch/x86/kvm/vmenter.h @@ -0,0 +1,80 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __KVM_X86_VMENTER_H +#define __KVM_X86_VMENTER_H + +#define KVM_ENTER_VMRESUME BIT(0) +#define KVM_ENTER_SAVE_SPEC_CTRL BIT(1) +#define KVM_ENTER_CLEAR_CPU_BUFFERS_FOR_MMIO BIT(2) + +#ifdef __ASSEMBLER__ +.macro RESTORE_GUEST_SPEC_CTRL_BODY guest_spec_ctrl:req, label:req + /* + * SPEC_CTRL handling: if the guest's SPEC_CTRL value differs from the + * host's, write the MSR. This is kept out-of-line so that the common + * case does not have to jump. + * + * IMPORTANT: To avoid RSB underflow attacks and any other nastiness, + * there must not be any returns or indirect branches between this code + * and vmentry. + */ +#ifdef CONFIG_X86_64 + mov \guest_spec_ctrl, %rdx + cmp PER_CPU_VAR(x86_spec_ctrl_current), %rdx + je \label + movl %edx, %eax + shr $32, %rdx +#else + mov \guest_spec_ctrl, %eax + mov PER_CPU_VAR(x86_spec_ctrl_current), %ecx + xor %eax, %ecx + mov 4 + \guest_spec_ctrl, %edx + mov PER_CPU_VAR(x86_spec_ctrl_current + 4), %esi + xor %edx, %esi + or %esi, %ecx + je \label +#endif + mov $MSR_IA32_SPEC_CTRL, %ecx + wrmsr +.endm + +.macro RESTORE_HOST_SPEC_CTRL_BODY guest_spec_ctrl:req, enter_flags:req, label:req + /* Same for after vmexit. */ + mov $MSR_IA32_SPEC_CTRL, %ecx + + /* + * Load the value that the guest had written into MSR_IA32_SPEC_CTRL, + * if it was not intercepted during guest execution. + */ + testl $KVM_ENTER_SAVE_SPEC_CTRL, \enter_flags + jz 998f + rdmsr + movl %eax, \guest_spec_ctrl + movl %edx, 4 + \guest_spec_ctrl +998: + /* Now restore the host value of the MSR if different from the guest's. */ +#ifdef CONFIG_X86_64 + mov PER_CPU_VAR(x86_spec_ctrl_current), %rdx + cmp \guest_spec_ctrl, %rdx + /* + * For legacy IBRS, the IBRS bit always needs to be written after + * transitioning from a less privileged predictor mode, regardless of + * whether the guest/host values differ. + */ + ALTERNATIVE __stringify(je \label), "", X86_FEATURE_KERNEL_IBRS + movl %edx, %eax + shr $32, %rdx +#else + mov PER_CPU_VAR(x86_spec_ctrl_current), %eax + mov \guest_spec_ctrl, %esi + xor %eax, %esi + mov PER_CPU_VAR(x86_spec_ctrl_current + 4), %edx + mov 4 + \guest_spec_ctrl, %edi + xor %edx, %edi + or %edi, %esi + ALTERNATIVE __stringify(je \label), "", X86_FEATURE_KERNEL_IBRS +#endif + wrmsr +.endm + +#endif /* __ASSEMBLER__ */ +#endif /* __KVM_X86_VMENTER_H */ diff --git a/arch/x86/kvm/vmx/run_flags.h b/arch/x86/kvm/vmx/run_flags.h deleted file mode 100644 index 6a87a12135fb..000000000000 --- a/arch/x86/kvm/vmx/run_flags.h +++ /dev/null @@ -1,9 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef __KVM_X86_VMX_RUN_FLAGS_H -#define __KVM_X86_VMX_RUN_FLAGS_H - -#define VMX_RUN_VMRESUME BIT(0) -#define VMX_RUN_SAVE_SPEC_CTRL BIT(1) -#define VMX_RUN_CLEAR_CPU_BUFFERS_FOR_MMIO BIT(2) - -#endif /* __KVM_X86_VMX_RUN_FLAGS_H */ diff --git a/arch/x86/kvm/vmx/vmenter.S b/arch/x86/kvm/vmx/vmenter.S index 8a481dae9cae..7e4dc17fc0b8 100644 --- a/arch/x86/kvm/vmx/vmenter.S +++ b/arch/x86/kvm/vmx/vmenter.S @@ -7,28 +7,28 @@ #include <asm/percpu.h> #include <asm/segment.h> #include "kvm-asm-offsets.h" -#include "run_flags.h" +#include "vmenter.h" #define WORD_SIZE (BITS_PER_LONG / 8) -#define VCPU_RAX __VCPU_REGS_RAX * WORD_SIZE -#define VCPU_RCX __VCPU_REGS_RCX * WORD_SIZE -#define VCPU_RDX __VCPU_REGS_RDX * WORD_SIZE -#define VCPU_RBX __VCPU_REGS_RBX * WORD_SIZE +#define VCPU_RAX (VMX_vcpu_arch_regs + __VCPU_REGS_RAX * WORD_SIZE) +#define VCPU_RCX (VMX_vcpu_arch_regs + __VCPU_REGS_RCX * WORD_SIZE) +#define VCPU_RDX (VMX_vcpu_arch_regs + __VCPU_REGS_RDX * WORD_SIZE) +#define VCPU_RBX (VMX_vcpu_arch_regs + __VCPU_REGS_RBX * WORD_SIZE) /* Intentionally omit RSP as it's context switched by hardware */ -#define VCPU_RBP __VCPU_REGS_RBP * WORD_SIZE -#define VCPU_RSI __VCPU_REGS_RSI * WORD_SIZE -#define VCPU_RDI __VCPU_REGS_RDI * WORD_SIZE +#define VCPU_RBP (VMX_vcpu_arch_regs + __VCPU_REGS_RBP * WORD_SIZE) +#define VCPU_RSI (VMX_vcpu_arch_regs + __VCPU_REGS_RSI * WORD_SIZE) +#define VCPU_RDI (VMX_vcpu_arch_regs + __VCPU_REGS_RDI * WORD_SIZE) #ifdef CONFIG_X86_64 -#define VCPU_R8 __VCPU_REGS_R8 * WORD_SIZE -#define VCPU_R9 __VCPU_REGS_R9 * WORD_SIZE -#define VCPU_R10 __VCPU_REGS_R10 * WORD_SIZE -#define VCPU_R11 __VCPU_REGS_R11 * WORD_SIZE -#define VCPU_R12 __VCPU_REGS_R12 * WORD_SIZE -#define VCPU_R13 __VCPU_REGS_R13 * WORD_SIZE -#define VCPU_R14 __VCPU_REGS_R14 * WORD_SIZE -#define VCPU_R15 __VCPU_REGS_R15 * WORD_SIZE +#define VCPU_R8 (VMX_vcpu_arch_regs + __VCPU_REGS_R8 * WORD_SIZE) +#define VCPU_R9 (VMX_vcpu_arch_regs + __VCPU_REGS_R9 * WORD_SIZE) +#define VCPU_R10 (VMX_vcpu_arch_regs + __VCPU_REGS_R10 * WORD_SIZE) +#define VCPU_R11 (VMX_vcpu_arch_regs + __VCPU_REGS_R11 * WORD_SIZE) +#define VCPU_R12 (VMX_vcpu_arch_regs + __VCPU_REGS_R12 * WORD_SIZE) +#define VCPU_R13 (VMX_vcpu_arch_regs + __VCPU_REGS_R13 * WORD_SIZE) +#define VCPU_R14 (VMX_vcpu_arch_regs + __VCPU_REGS_R14 * WORD_SIZE) +#define VCPU_R15 (VMX_vcpu_arch_regs + __VCPU_REGS_R15 * WORD_SIZE) #endif .macro VMX_DO_EVENT_IRQOFF call_insn call_target @@ -68,10 +68,9 @@ /** * __vmx_vcpu_run - Run a vCPU via a transition to VMX guest mode * @vmx: struct vcpu_vmx * - * @regs: unsigned long * (to guest registers) - * @flags: VMX_RUN_VMRESUME: use VMRESUME instead of VMLAUNCH - * VMX_RUN_SAVE_SPEC_CTRL: save guest SPEC_CTRL into vmx->spec_ctrl - * VMX_RUN_CLEAR_CPU_BUFFERS_FOR_MMIO: vCPU can access host MMIO + * @flags: KVM_ENTER_VMRESUME: use VMRESUME instead of VMLAUNCH + * KVM_ENTER_SAVE_SPEC_CTRL: save guest SPEC_CTRL into vmx->spec_ctrl + * KVM_ENTER_CLEAR_CPU_BUFFERS_FOR_MMIO: vCPU can access host MMIO * * Returns: * 0 on VM-Exit, 1 on VM-Fail @@ -94,76 +93,46 @@ SYM_FUNC_START(__vmx_vcpu_run) push %_ASM_ARG1 /* Save @flags (used for VMLAUNCH vs. VMRESUME and mitigations). */ - push %_ASM_ARG3 - - /* - * Save @regs, _ASM_ARG2 may be modified by vmx_update_host_rsp() and - * @regs is needed after VM-Exit to save the guest's register values. - */ push %_ASM_ARG2 lea (%_ASM_SP), %_ASM_ARG2 call vmx_update_host_rsp - ALTERNATIVE "jmp .Lspec_ctrl_done", "", X86_FEATURE_MSR_SPEC_CTRL + /* Reload @vmx, _ASM_ARG1 may be modified by vmx_update_host_rsp(). */ + mov WORD_SIZE(%_ASM_SP), %_ASM_DI /* - * SPEC_CTRL handling: if the guest's SPEC_CTRL value differs from the - * host's, write the MSR. - * - * IMPORTANT: To avoid RSB underflow attacks and any other nastiness, - * there must not be any returns or indirect branches between this code - * and vmentry. + * Unlike AMD there's no V_SPEC_CTRL here, so do not leave the body + * out of line. Clobbers RAX, RCX, RDX, RSI. */ - mov 2*WORD_SIZE(%_ASM_SP), %_ASM_DI -#ifdef CONFIG_X86_64 - mov VMX_spec_ctrl(%rdi), %rdx - cmp PER_CPU_VAR(x86_spec_ctrl_current), %rdx - je .Lspec_ctrl_done - movl %edx, %eax - shr $32, %rdx -#else - mov VMX_spec_ctrl(%edi), %eax - mov PER_CPU_VAR(x86_spec_ctrl_current), %ecx - xor %eax, %ecx - mov VMX_spec_ctrl + 4(%edi), %edx - mov PER_CPU_VAR(x86_spec_ctrl_current + 4), %edi - xor %edx, %edi - or %edi, %ecx - je .Lspec_ctrl_done -#endif - mov $MSR_IA32_SPEC_CTRL, %ecx - wrmsr - -.Lspec_ctrl_done: + ALTERNATIVE "jmp .Lspec_ctrl_guest_done", "", X86_FEATURE_MSR_SPEC_CTRL + RESTORE_GUEST_SPEC_CTRL_BODY VMX_spec_ctrl(%_ASM_DI), .Lspec_ctrl_guest_done +.Lspec_ctrl_guest_done: /* * Since vmentry is serializing on affected CPUs, there's no need for * an LFENCE to stop speculation from skipping the wrmsr. */ - /* Load @regs to RAX. */ - mov (%_ASM_SP), %_ASM_AX - /* Load guest registers. Don't clobber flags. */ - mov VCPU_RCX(%_ASM_AX), %_ASM_CX - mov VCPU_RDX(%_ASM_AX), %_ASM_DX - mov VCPU_RBX(%_ASM_AX), %_ASM_BX - mov VCPU_RBP(%_ASM_AX), %_ASM_BP - mov VCPU_RSI(%_ASM_AX), %_ASM_SI - mov VCPU_RDI(%_ASM_AX), %_ASM_DI + mov VCPU_RAX(%_ASM_DI), %_ASM_AX + mov VCPU_RCX(%_ASM_DI), %_ASM_CX + mov VCPU_RDX(%_ASM_DI), %_ASM_DX + mov VCPU_RBX(%_ASM_DI), %_ASM_BX + mov VCPU_RBP(%_ASM_DI), %_ASM_BP + mov VCPU_RSI(%_ASM_DI), %_ASM_SI #ifdef CONFIG_X86_64 - mov VCPU_R8 (%_ASM_AX), %r8 - mov VCPU_R9 (%_ASM_AX), %r9 - mov VCPU_R10(%_ASM_AX), %r10 - mov VCPU_R11(%_ASM_AX), %r11 - mov VCPU_R12(%_ASM_AX), %r12 - mov VCPU_R13(%_ASM_AX), %r13 - mov VCPU_R14(%_ASM_AX), %r14 - mov VCPU_R15(%_ASM_AX), %r15 + mov VCPU_R8 (%_ASM_DI), %r8 + mov VCPU_R9 (%_ASM_DI), %r9 + mov VCPU_R10(%_ASM_DI), %r10 + mov VCPU_R11(%_ASM_DI), %r11 + mov VCPU_R12(%_ASM_DI), %r12 + mov VCPU_R13(%_ASM_DI), %r13 + mov VCPU_R14(%_ASM_DI), %r14 + mov VCPU_R15(%_ASM_DI), %r15 #endif - /* Load guest RAX. This kills the @regs pointer! */ - mov VCPU_RAX(%_ASM_AX), %_ASM_AX + /* Load guest RDI. This kills the @vmx pointer! */ + mov VCPU_RDI(%_ASM_DI), %_ASM_DI /* * Note, ALTERNATIVE_2 works in reverse order. If CLEAR_CPU_BUF_VM is @@ -172,7 +141,7 @@ SYM_FUNC_START(__vmx_vcpu_run) * do VERW. Else, do nothing (no mitigations needed/enabled). */ ALTERNATIVE_2 "", \ - __stringify(testl $VMX_RUN_CLEAR_CPU_BUFFERS_FOR_MMIO, WORD_SIZE(%_ASM_SP); \ + __stringify(testl $KVM_ENTER_CLEAR_CPU_BUFFERS_FOR_MMIO, (%_ASM_SP); \ jz .Lskip_mmio_verw; \ VERW; \ .Lskip_mmio_verw:), \ @@ -180,7 +149,7 @@ SYM_FUNC_START(__vmx_vcpu_run) __stringify(VERW), X86_FEATURE_CLEAR_CPU_BUF_VM /* Check @flags to see if VMLAUNCH or VMRESUME is needed. */ - testl $VMX_RUN_VMRESUME, WORD_SIZE(%_ASM_SP) + testl $KVM_ENTER_VMRESUME, (%_ASM_SP) jz .Lvmlaunch /* @@ -212,38 +181,35 @@ SYM_INNER_LABEL_ALIGN(vmx_vmexit, SYM_L_GLOBAL) UNWIND_HINT_RESTORE ENDBR - /* Temporarily save guest's RAX. */ - push %_ASM_AX + /* Temporarily save guest's RDI. */ + push %_ASM_DI + + /* Reload @vmx to RDI. */ + mov 2*WORD_SIZE(%_ASM_SP), %_ASM_DI - /* Reload @regs to RAX. */ - mov WORD_SIZE(%_ASM_SP), %_ASM_AX - - /* Save all guest registers, including RAX from the stack */ - pop VCPU_RAX(%_ASM_AX) - mov %_ASM_CX, VCPU_RCX(%_ASM_AX) - mov %_ASM_DX, VCPU_RDX(%_ASM_AX) - mov %_ASM_BX, VCPU_RBX(%_ASM_AX) - mov %_ASM_BP, VCPU_RBP(%_ASM_AX) - mov %_ASM_SI, VCPU_RSI(%_ASM_AX) - mov %_ASM_DI, VCPU_RDI(%_ASM_AX) + /* Save all guest registers, including RDI from the stack */ + mov %_ASM_AX, VCPU_RAX(%_ASM_DI) + mov %_ASM_CX, VCPU_RCX(%_ASM_DI) + mov %_ASM_DX, VCPU_RDX(%_ASM_DI) + mov %_ASM_BX, VCPU_RBX(%_ASM_DI) + mov %_ASM_BP, VCPU_RBP(%_ASM_DI) + mov %_ASM_SI, VCPU_RSI(%_ASM_DI) + pop VCPU_RDI(%_ASM_DI) #ifdef CONFIG_X86_64 - mov %r8, VCPU_R8 (%_ASM_AX) - mov %r9, VCPU_R9 (%_ASM_AX) - mov %r10, VCPU_R10(%_ASM_AX) - mov %r11, VCPU_R11(%_ASM_AX) - mov %r12, VCPU_R12(%_ASM_AX) - mov %r13, VCPU_R13(%_ASM_AX) - mov %r14, VCPU_R14(%_ASM_AX) - mov %r15, VCPU_R15(%_ASM_AX) + mov %r8, VCPU_R8 (%_ASM_DI) + mov %r9, VCPU_R9 (%_ASM_DI) + mov %r10, VCPU_R10(%_ASM_DI) + mov %r11, VCPU_R11(%_ASM_DI) + mov %r12, VCPU_R12(%_ASM_DI) + mov %r13, VCPU_R13(%_ASM_DI) + mov %r14, VCPU_R14(%_ASM_DI) + mov %r15, VCPU_R15(%_ASM_DI) #endif /* Clear return value to indicate VM-Exit (as opposed to VM-Fail). */ xor %ebx, %ebx .Lclear_regs: - /* Discard @regs. The register is irrelevant, it just can't be RBX. */ - pop %_ASM_AX - /* * Clear all general purpose registers except RSP and RBX to prevent * speculative use of the guest's values, even those that are reloaded @@ -286,16 +252,32 @@ SYM_INNER_LABEL_ALIGN(vmx_vmexit, SYM_L_GLOBAL) FILL_RETURN_BUFFER %_ASM_CX, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_VMEXIT,\ X86_FEATURE_RSB_VMEXIT_LITE - pop %_ASM_ARG2 /* @flags */ - pop %_ASM_ARG1 /* @vmx */ + /* Clobbers RAX, RCX, RDX, RSI. */ + ALTERNATIVE "jmp .Lspec_ctrl_host_done", "", X86_FEATURE_MSR_SPEC_CTRL + mov WORD_SIZE(%_ASM_SP), %_ASM_DI + RESTORE_HOST_SPEC_CTRL_BODY VMX_spec_ctrl(%_ASM_DI), (%_ASM_SP), .Lspec_ctrl_host_done +.Lspec_ctrl_host_done: - call vmx_spec_ctrl_restore_host + /* + * Halt speculation past a conditional wrmsr. Intel's eIBRS + * guarantees that the guest cannot control the RSB "once IBRS is + * set", but in the eIBRS case speculative execution past the 'je' + * can go all the way to the RET below while MSR_IA32_SPEC_CTRL + * still holds the guest value. + */ + ALTERNATIVE_2 "", "lfence", X86_FEATURE_MSR_SPEC_CTRL, \ + "", X86_FEATURE_KERNEL_IBRS CLEAR_BRANCH_HISTORY_VMEXIT /* Put return value in AX */ mov %_ASM_BX, %_ASM_AX + /* Pop our saved arguments from the stack */ + pop %_ASM_BX + pop %_ASM_BX + + /* ... and then the callee-save registers */ pop %_ASM_BX #ifdef CONFIG_X86_64 pop %r12 diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index a0a7a2f267b3..cc14a6b96681 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -73,6 +73,7 @@ #include "x86_ops.h" #include "smm.h" #include "vmx_onhyperv.h" +#include "vmenter.h" #include "posted_intr.h" #include "mmu/spte.h" @@ -967,12 +968,12 @@ static bool msr_write_intercepted(struct vcpu_vmx *vmx, u32 msr) return vmx_test_msr_bitmap_write(vmx->loaded_vmcs->msr_bitmap, msr); } -unsigned int __vmx_vcpu_run_flags(struct vcpu_vmx *vmx) +unsigned int __vmx_vcpu_enter_flags(struct vcpu_vmx *vmx) { unsigned int flags = 0; if (vmx->loaded_vmcs->launched) - flags |= VMX_RUN_VMRESUME; + flags |= KVM_ENTER_VMRESUME; /* * If writes to the SPEC_CTRL MSR aren't intercepted, the guest is free @@ -980,11 +981,11 @@ unsigned int __vmx_vcpu_run_flags(struct vcpu_vmx *vmx) * it after vmexit and store it in vmx->spec_ctrl. */ if (!msr_write_intercepted(vmx, MSR_IA32_SPEC_CTRL)) - flags |= VMX_RUN_SAVE_SPEC_CTRL; + flags |= KVM_ENTER_SAVE_SPEC_CTRL; if (cpu_feature_enabled(X86_FEATURE_CLEAR_CPU_BUF_VM_MMIO) && kvm_vcpu_can_access_host_mmio(&vmx->vcpu)) - flags |= VMX_RUN_CLEAR_CPU_BUFFERS_FOR_MMIO; + flags |= KVM_ENTER_CLEAR_CPU_BUFFERS_FOR_MMIO; return flags; } @@ -7428,31 +7429,6 @@ void noinstr vmx_update_host_rsp(struct vcpu_vmx *vmx, unsigned long host_rsp) } } -void noinstr vmx_spec_ctrl_restore_host(struct vcpu_vmx *vmx, - unsigned int flags) -{ - u64 hostval = this_cpu_read(x86_spec_ctrl_current); - - if (!cpu_feature_enabled(X86_FEATURE_MSR_SPEC_CTRL)) - return; - - if (flags & VMX_RUN_SAVE_SPEC_CTRL) - vmx->spec_ctrl = native_rdmsrq(MSR_IA32_SPEC_CTRL); - - /* - * If the guest/host SPEC_CTRL values differ, restore the host value. - * - * For legacy IBRS, the IBRS bit always needs to be written after - * transitioning from a less privileged predictor mode, regardless of - * whether the guest/host values differ. - */ - if (cpu_feature_enabled(X86_FEATURE_KERNEL_IBRS) || - vmx->spec_ctrl != hostval) - native_wrmsrq(MSR_IA32_SPEC_CTRL, hostval); - - barrier_nospec(); -} - static fastpath_t vmx_exit_handlers_fastpath(struct kvm_vcpu *vcpu, bool force_immediate_exit) { @@ -7509,8 +7485,7 @@ static noinstr void vmx_vcpu_enter_exit(struct kvm_vcpu *vcpu, if (vcpu->arch.cr2 != native_read_cr2()) native_write_cr2(vcpu->arch.cr2); - vmx->fail = __vmx_vcpu_run(vmx, (unsigned long *)&vcpu->arch.regs, - flags); + vmx->fail = __vmx_vcpu_run(vmx, flags); vcpu->arch.cr2 = native_read_cr2(); vcpu->arch.regs_avail &= ~VMX_REGS_LAZY_LOAD_SET; @@ -7628,7 +7603,7 @@ fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu, u64 run_flags) kvm_wait_lapic_expire(vcpu); /* The actual VMENTER/EXIT is in the .noinstr.text section. */ - vmx_vcpu_enter_exit(vcpu, __vmx_vcpu_run_flags(vmx)); + vmx_vcpu_enter_exit(vcpu, __vmx_vcpu_enter_flags(vmx)); /* All fields are clean at this point */ if (kvm_is_using_evmcs()) { diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h index 0a4e263c4095..c5a2c7b47466 100644 --- a/arch/x86/kvm/vmx/vmx.h +++ b/arch/x86/kvm/vmx/vmx.h @@ -15,7 +15,6 @@ #include "vmcs.h" #include "vmx_ops.h" #include "../cpuid.h" -#include "run_flags.h" #include "../mmu.h" #include "common.h" @@ -368,10 +367,8 @@ void vmx_set_virtual_apic_mode(struct kvm_vcpu *vcpu); struct vmx_uret_msr *vmx_find_uret_msr(struct vcpu_vmx *vmx, u32 msr); void pt_update_intercept_for_msr(struct kvm_vcpu *vcpu); void vmx_update_host_rsp(struct vcpu_vmx *vmx, unsigned long host_rsp); -void vmx_spec_ctrl_restore_host(struct vcpu_vmx *vmx, unsigned int flags); -unsigned int __vmx_vcpu_run_flags(struct vcpu_vmx *vmx); -bool __vmx_vcpu_run(struct vcpu_vmx *vmx, unsigned long *regs, - unsigned int flags); +unsigned int __vmx_vcpu_enter_flags(struct vcpu_vmx *vmx); +bool __vmx_vcpu_run(struct vcpu_vmx *vmx, unsigned int flags); void vmx_ept_load_pdptrs(struct kvm_vcpu *vcpu); void vmx_set_intercept_for_msr(struct kvm_vcpu *vcpu, u32 msr, int type, bool set); |
