diff options
Diffstat (limited to 'arch/x86/kvm/svm/svm.c')
-rw-r--r-- | arch/x86/kvm/svm/svm.c | 262 |
1 files changed, 162 insertions, 100 deletions
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 5bbf76189afa..03dd7bac8034 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -254,7 +254,7 @@ static inline void invlpga(unsigned long addr, u32 asid) asm volatile (__ex("invlpga %1, %0") : : "c"(asid), "a"(addr)); } -static int get_npt_level(struct kvm_vcpu *vcpu) +static int get_max_npt_level(void) { #ifdef CONFIG_X86_64 return PT64_ROOT_4LEVEL; @@ -282,7 +282,7 @@ void svm_set_efer(struct kvm_vcpu *vcpu, u64 efer) } svm->vmcb->save.efer = efer | EFER_SVME; - mark_dirty(svm->vmcb, VMCB_CR); + vmcb_mark_dirty(svm->vmcb, VMCB_CR); } static int is_external_interrupt(u32 info) @@ -713,7 +713,7 @@ static void grow_ple_window(struct kvm_vcpu *vcpu) pause_filter_count_max); if (control->pause_filter_count != old) { - mark_dirty(svm->vmcb, VMCB_INTERCEPTS); + vmcb_mark_dirty(svm->vmcb, VMCB_INTERCEPTS); trace_kvm_ple_window_update(vcpu->vcpu_id, control->pause_filter_count, old); } @@ -731,7 +731,7 @@ static void shrink_ple_window(struct kvm_vcpu *vcpu) pause_filter_count_shrink, pause_filter_count); if (control->pause_filter_count != old) { - mark_dirty(svm->vmcb, VMCB_INTERCEPTS); + vmcb_mark_dirty(svm->vmcb, VMCB_INTERCEPTS); trace_kvm_ple_window_update(vcpu->vcpu_id, control->pause_filter_count, old); } @@ -885,7 +885,7 @@ static __init int svm_hardware_setup(void) if (npt_enabled && !npt) npt_enabled = false; - kvm_configure_mmu(npt_enabled, PG_LEVEL_1G); + kvm_configure_mmu(npt_enabled, get_max_npt_level(), PG_LEVEL_1G); pr_info("kvm: Nested Paging %sabled\n", npt_enabled ? "en" : "dis"); if (nrips) { @@ -924,6 +924,21 @@ static __init int svm_hardware_setup(void) svm_set_cpu_caps(); + /* + * It seems that on AMD processors PTE's accessed bit is + * being set by the CPU hardware before the NPF vmexit. + * This is not expected behaviour and our tests fail because + * of it. + * A workaround here is to disable support for + * GUEST_MAXPHYADDR < HOST_MAXPHYADDR if NPT is enabled. + * In this case userspace can know if there is support using + * KVM_CAP_SMALLER_MAXPHYADDR extension and decide how to handle + * it + * If future AMD CPU models change the behaviour described above, + * this variable can be changed accordingly + */ + allow_smaller_maxphyaddr = !npt_enabled; + return 0; err: @@ -966,7 +981,7 @@ static u64 svm_write_l1_tsc_offset(struct kvm_vcpu *vcpu, u64 offset) svm->vmcb->control.tsc_offset = offset + g_tsc_offset; - mark_dirty(svm->vmcb, VMCB_INTERCEPTS); + vmcb_mark_dirty(svm->vmcb, VMCB_INTERCEPTS); return svm->vmcb->control.tsc_offset; } @@ -1002,38 +1017,38 @@ static void init_vmcb(struct vcpu_svm *svm) if (enable_vmware_backdoor) set_exception_intercept(svm, GP_VECTOR); - set_intercept(svm, INTERCEPT_INTR); - set_intercept(svm, INTERCEPT_NMI); - set_intercept(svm, INTERCEPT_SMI); - set_intercept(svm, INTERCEPT_SELECTIVE_CR0); - set_intercept(svm, INTERCEPT_RDPMC); - set_intercept(svm, INTERCEPT_CPUID); - set_intercept(svm, INTERCEPT_INVD); - set_intercept(svm, INTERCEPT_INVLPG); - set_intercept(svm, INTERCEPT_INVLPGA); - set_intercept(svm, INTERCEPT_IOIO_PROT); - set_intercept(svm, INTERCEPT_MSR_PROT); - set_intercept(svm, INTERCEPT_TASK_SWITCH); - set_intercept(svm, INTERCEPT_SHUTDOWN); - set_intercept(svm, INTERCEPT_VMRUN); - set_intercept(svm, INTERCEPT_VMMCALL); - set_intercept(svm, INTERCEPT_VMLOAD); - set_intercept(svm, INTERCEPT_VMSAVE); - set_intercept(svm, INTERCEPT_STGI); - set_intercept(svm, INTERCEPT_CLGI); - set_intercept(svm, INTERCEPT_SKINIT); - set_intercept(svm, INTERCEPT_WBINVD); - set_intercept(svm, INTERCEPT_XSETBV); - set_intercept(svm, INTERCEPT_RDPRU); - set_intercept(svm, INTERCEPT_RSM); + svm_set_intercept(svm, INTERCEPT_INTR); + svm_set_intercept(svm, INTERCEPT_NMI); + svm_set_intercept(svm, INTERCEPT_SMI); + svm_set_intercept(svm, INTERCEPT_SELECTIVE_CR0); + svm_set_intercept(svm, INTERCEPT_RDPMC); + svm_set_intercept(svm, INTERCEPT_CPUID); + svm_set_intercept(svm, INTERCEPT_INVD); + svm_set_intercept(svm, INTERCEPT_INVLPG); + svm_set_intercept(svm, INTERCEPT_INVLPGA); + svm_set_intercept(svm, INTERCEPT_IOIO_PROT); + svm_set_intercept(svm, INTERCEPT_MSR_PROT); + svm_set_intercept(svm, INTERCEPT_TASK_SWITCH); + svm_set_intercept(svm, INTERCEPT_SHUTDOWN); + svm_set_intercept(svm, INTERCEPT_VMRUN); + svm_set_intercept(svm, INTERCEPT_VMMCALL); + svm_set_intercept(svm, INTERCEPT_VMLOAD); + svm_set_intercept(svm, INTERCEPT_VMSAVE); + svm_set_intercept(svm, INTERCEPT_STGI); + svm_set_intercept(svm, INTERCEPT_CLGI); + svm_set_intercept(svm, INTERCEPT_SKINIT); + svm_set_intercept(svm, INTERCEPT_WBINVD); + svm_set_intercept(svm, INTERCEPT_XSETBV); + svm_set_intercept(svm, INTERCEPT_RDPRU); + svm_set_intercept(svm, INTERCEPT_RSM); if (!kvm_mwait_in_guest(svm->vcpu.kvm)) { - set_intercept(svm, INTERCEPT_MONITOR); - set_intercept(svm, INTERCEPT_MWAIT); + svm_set_intercept(svm, INTERCEPT_MONITOR); + svm_set_intercept(svm, INTERCEPT_MWAIT); } if (!kvm_hlt_in_guest(svm->vcpu.kvm)) - set_intercept(svm, INTERCEPT_HLT); + svm_set_intercept(svm, INTERCEPT_HLT); control->iopm_base_pa = __sme_set(iopm_base); control->msrpm_base_pa = __sme_set(__pa(svm->msrpm)); @@ -1077,7 +1092,7 @@ static void init_vmcb(struct vcpu_svm *svm) if (npt_enabled) { /* Setup VMCB for Nested Paging */ control->nested_ctl |= SVM_NESTED_CTL_NP_ENABLE; - clr_intercept(svm, INTERCEPT_INVLPG); + svm_clr_intercept(svm, INTERCEPT_INVLPG); clr_exception_intercept(svm, PF_VECTOR); clr_cr_intercept(svm, INTERCEPT_CR3_READ); clr_cr_intercept(svm, INTERCEPT_CR3_WRITE); @@ -1094,9 +1109,9 @@ static void init_vmcb(struct vcpu_svm *svm) control->pause_filter_count = pause_filter_count; if (pause_filter_thresh) control->pause_filter_thresh = pause_filter_thresh; - set_intercept(svm, INTERCEPT_PAUSE); + svm_set_intercept(svm, INTERCEPT_PAUSE); } else { - clr_intercept(svm, INTERCEPT_PAUSE); + svm_clr_intercept(svm, INTERCEPT_PAUSE); } if (kvm_vcpu_apicv_active(&svm->vcpu)) @@ -1107,14 +1122,14 @@ static void init_vmcb(struct vcpu_svm *svm) * in VMCB and clear intercepts to avoid #VMEXIT. */ if (vls) { - clr_intercept(svm, INTERCEPT_VMLOAD); - clr_intercept(svm, INTERCEPT_VMSAVE); + svm_clr_intercept(svm, INTERCEPT_VMLOAD); + svm_clr_intercept(svm, INTERCEPT_VMSAVE); svm->vmcb->control.virt_ext |= VIRTUAL_VMLOAD_VMSAVE_ENABLE_MASK; } if (vgif) { - clr_intercept(svm, INTERCEPT_STGI); - clr_intercept(svm, INTERCEPT_CLGI); + svm_clr_intercept(svm, INTERCEPT_STGI); + svm_clr_intercept(svm, INTERCEPT_CLGI); svm->vmcb->control.int_ctl |= V_GIF_ENABLE_MASK; } @@ -1123,7 +1138,7 @@ static void init_vmcb(struct vcpu_svm *svm) clr_exception_intercept(svm, UD_VECTOR); } - mark_all_dirty(svm->vmcb); + vmcb_mark_all_dirty(svm->vmcb); enable_gif(svm); @@ -1257,7 +1272,7 @@ static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu) if (unlikely(cpu != vcpu->cpu)) { svm->asid_generation = 0; - mark_all_dirty(svm->vmcb); + vmcb_mark_all_dirty(svm->vmcb); } #ifdef CONFIG_X86_64 @@ -1356,7 +1371,7 @@ static void svm_set_vintr(struct vcpu_svm *svm) /* The following fields are ignored when AVIC is enabled */ WARN_ON(kvm_vcpu_apicv_active(&svm->vcpu)); - set_intercept(svm, INTERCEPT_VINTR); + svm_set_intercept(svm, INTERCEPT_VINTR); /* * This is just a dummy VINTR to actually cause a vmexit to happen. @@ -1367,13 +1382,13 @@ static void svm_set_vintr(struct vcpu_svm *svm) control->int_ctl &= ~V_INTR_PRIO_MASK; control->int_ctl |= V_IRQ_MASK | ((/*control->int_vector >> 4*/ 0xf) << V_INTR_PRIO_SHIFT); - mark_dirty(svm->vmcb, VMCB_INTR); + vmcb_mark_dirty(svm->vmcb, VMCB_INTR); } static void svm_clear_vintr(struct vcpu_svm *svm) { const u32 mask = V_TPR_MASK | V_GIF_ENABLE_MASK | V_GIF_MASK | V_INTR_MASKING_MASK; - clr_intercept(svm, INTERCEPT_VINTR); + svm_clr_intercept(svm, INTERCEPT_VINTR); /* Drop int_ctl fields related to VINTR injection. */ svm->vmcb->control.int_ctl &= mask; @@ -1385,7 +1400,7 @@ static void svm_clear_vintr(struct vcpu_svm *svm) svm->vmcb->control.int_ctl |= svm->nested.ctl.int_ctl & ~mask; } - mark_dirty(svm->vmcb, VMCB_INTR); + vmcb_mark_dirty(svm->vmcb, VMCB_INTR); } static struct vmcb_seg *svm_seg(struct kvm_vcpu *vcpu, int seg) @@ -1503,7 +1518,7 @@ static void svm_set_idt(struct kvm_vcpu *vcpu, struct desc_ptr *dt) svm->vmcb->save.idtr.limit = dt->size; svm->vmcb->save.idtr.base = dt->address ; - mark_dirty(svm->vmcb, VMCB_DT); + vmcb_mark_dirty(svm->vmcb, VMCB_DT); } static void svm_get_gdt(struct kvm_vcpu *vcpu, struct desc_ptr *dt) @@ -1520,7 +1535,7 @@ static void svm_set_gdt(struct kvm_vcpu *vcpu, struct desc_ptr *dt) svm->vmcb->save.gdtr.limit = dt->size; svm->vmcb->save.gdtr.base = dt->address ; - mark_dirty(svm->vmcb, VMCB_DT); + vmcb_mark_dirty(svm->vmcb, VMCB_DT); } static void update_cr0_intercept(struct vcpu_svm *svm) @@ -1531,7 +1546,7 @@ static void update_cr0_intercept(struct vcpu_svm *svm) *hcr0 = (*hcr0 & ~SVM_CR0_SELECTIVE_MASK) | (gcr0 & SVM_CR0_SELECTIVE_MASK); - mark_dirty(svm->vmcb, VMCB_CR); + vmcb_mark_dirty(svm->vmcb, VMCB_CR); if (gcr0 == *hcr0) { clr_cr_intercept(svm, INTERCEPT_CR0_READ); @@ -1572,7 +1587,7 @@ void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) if (kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_CD_NW_CLEARED)) cr0 &= ~(X86_CR0_CD | X86_CR0_NW); svm->vmcb->save.cr0 = cr0; - mark_dirty(svm->vmcb, VMCB_CR); + vmcb_mark_dirty(svm->vmcb, VMCB_CR); update_cr0_intercept(svm); } @@ -1592,7 +1607,7 @@ int svm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) cr4 |= X86_CR4_PAE; cr4 |= host_cr4_mce; to_svm(vcpu)->vmcb->save.cr4 = cr4; - mark_dirty(to_svm(vcpu)->vmcb, VMCB_CR); + vmcb_mark_dirty(to_svm(vcpu)->vmcb, VMCB_CR); return 0; } @@ -1624,10 +1639,10 @@ static void svm_set_segment(struct kvm_vcpu *vcpu, /* This is symmetric with svm_get_segment() */ svm->vmcb->save.cpl = (var->dpl & 3); - mark_dirty(svm->vmcb, VMCB_SEG); + vmcb_mark_dirty(svm->vmcb, VMCB_SEG); } -static void update_bp_intercept(struct kvm_vcpu *vcpu) +static void update_exception_bitmap(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); @@ -1636,8 +1651,7 @@ static void update_bp_intercept(struct kvm_vcpu *vcpu) if (vcpu->guest_debug & KVM_GUESTDBG_ENABLE) { if (vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP) set_exception_intercept(svm, BP_VECTOR); - } else - vcpu->guest_debug = 0; + } } static void new_asid(struct vcpu_svm *svm, struct svm_cpu_data *sd) @@ -1651,7 +1665,7 @@ static void new_asid(struct vcpu_svm *svm, struct svm_cpu_data *sd) svm->asid_generation = sd->asid_generation; svm->vmcb->control.asid = sd->next_asid++; - mark_dirty(svm->vmcb, VMCB_ASID); + vmcb_mark_dirty(svm->vmcb, VMCB_ASID); } static void svm_set_dr6(struct vcpu_svm *svm, unsigned long value) @@ -1660,7 +1674,7 @@ static void svm_set_dr6(struct vcpu_svm *svm, unsigned long value) if (unlikely(value != vmcb->save.dr6)) { vmcb->save.dr6 = value; - mark_dirty(vmcb, VMCB_DR); + vmcb_mark_dirty(vmcb, VMCB_DR); } } @@ -1687,7 +1701,7 @@ static void svm_set_dr7(struct kvm_vcpu *vcpu, unsigned long value) struct vcpu_svm *svm = to_svm(vcpu); svm->vmcb->save.dr7 = value; - mark_dirty(svm->vmcb, VMCB_DR); + vmcb_mark_dirty(svm->vmcb, VMCB_DR); } static int pf_interception(struct vcpu_svm *svm) @@ -2000,8 +2014,8 @@ void svm_set_gif(struct vcpu_svm *svm, bool value) * again while processing KVM_REQ_EVENT if needed. */ if (vgif_enabled(svm)) - clr_intercept(svm, INTERCEPT_STGI); - if (is_intercept(svm, INTERCEPT_VINTR)) + svm_clr_intercept(svm, INTERCEPT_STGI); + if (svm_is_intercept(svm, INTERCEPT_VINTR)) svm_clear_vintr(svm); enable_gif(svm); @@ -2162,7 +2176,7 @@ static int cpuid_interception(struct vcpu_svm *svm) static int iret_interception(struct vcpu_svm *svm) { ++svm->vcpu.stat.nmi_window_exits; - clr_intercept(svm, INTERCEPT_IRET); + svm_clr_intercept(svm, INTERCEPT_IRET); svm->vcpu.arch.hflags |= HF_IRET_MASK; svm->nmi_iret_rip = kvm_rip_read(&svm->vcpu); kvm_make_request(KVM_REQ_EVENT, &svm->vcpu); @@ -2358,8 +2372,10 @@ static int svm_get_msr_feature(struct kvm_msr_entry *msr) if (boot_cpu_has(X86_FEATURE_LFENCE_RDTSC)) msr->data |= MSR_F10H_DECFG_LFENCE_SERIALIZE; break; + case MSR_IA32_PERF_CAPABILITIES: + return 0; default: - return 1; + return KVM_MSR_RET_INVALID; } return 0; @@ -2512,7 +2528,7 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr) return 1; vcpu->arch.pat = data; svm->vmcb->save.g_pat = data; - mark_dirty(svm->vmcb, VMCB_NPT); + vmcb_mark_dirty(svm->vmcb, VMCB_NPT); break; case MSR_IA32_SPEC_CTRL: if (!msr->host_initiated && @@ -2522,7 +2538,7 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr) !guest_cpuid_has(vcpu, X86_FEATURE_AMD_SSBD)) return 1; - if (data & ~kvm_spec_ctrl_valid_bits(vcpu)) + if (kvm_spec_ctrl_test_value(data)) return 1; svm->spec_ctrl = data; @@ -2617,7 +2633,7 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr) return 1; svm->vmcb->save.dbgctl = data; - mark_dirty(svm->vmcb, VMCB_LBR); + vmcb_mark_dirty(svm->vmcb, VMCB_LBR); if (data & (1ULL<<0)) svm_enable_lbrv(svm); else @@ -2947,6 +2963,7 @@ static int handle_exit(struct kvm_vcpu *vcpu, fastpath_t exit_fastpath) kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY; kvm_run->fail_entry.hardware_entry_failure_reason = svm->vmcb->control.exit_code; + kvm_run->fail_entry.cpu = vcpu->arch.last_vmentry_cpu; dump_vmcb(vcpu); return 0; } @@ -2970,8 +2987,9 @@ static int handle_exit(struct kvm_vcpu *vcpu, fastpath_t exit_fastpath) vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_UNEXPECTED_EXIT_REASON; - vcpu->run->internal.ndata = 1; + vcpu->run->internal.ndata = 2; vcpu->run->internal.data[0] = exit_code; + vcpu->run->internal.data[1] = vcpu->arch.last_vmentry_cpu; return 0; } @@ -2992,21 +3010,18 @@ static int handle_exit(struct kvm_vcpu *vcpu, fastpath_t exit_fastpath) static void reload_tss(struct kvm_vcpu *vcpu) { - int cpu = raw_smp_processor_id(); + struct svm_cpu_data *sd = per_cpu(svm_data, vcpu->cpu); - struct svm_cpu_data *sd = per_cpu(svm_data, cpu); sd->tss_desc->type = 9; /* available 32/64-bit TSS */ load_TR_desc(); } static void pre_svm_run(struct vcpu_svm *svm) { - int cpu = raw_smp_processor_id(); - - struct svm_cpu_data *sd = per_cpu(svm_data, cpu); + struct svm_cpu_data *sd = per_cpu(svm_data, svm->vcpu.cpu); if (sev_guest(svm->vcpu.kvm)) - return pre_sev_run(svm, cpu); + return pre_sev_run(svm, svm->vcpu.cpu); /* FIXME: handle wraparound of asid_generation */ if (svm->asid_generation != sd->asid_generation) @@ -3019,7 +3034,7 @@ static void svm_inject_nmi(struct kvm_vcpu *vcpu) svm->vmcb->control.event_inj = SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_NMI; vcpu->arch.hflags |= HF_NMI_MASK; - set_intercept(svm, INTERCEPT_IRET); + svm_set_intercept(svm, INTERCEPT_IRET); ++vcpu->stat.nmi_injections; } @@ -3040,7 +3055,7 @@ static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr) { struct vcpu_svm *svm = to_svm(vcpu); - if (svm_nested_virtualize_tpr(vcpu)) + if (nested_svm_virtualize_tpr(vcpu)) return; clr_cr_intercept(svm, INTERCEPT_CR8_WRITE); @@ -3096,10 +3111,10 @@ static void svm_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked) if (masked) { svm->vcpu.arch.hflags |= HF_NMI_MASK; - set_intercept(svm, INTERCEPT_IRET); + svm_set_intercept(svm, INTERCEPT_IRET); } else { svm->vcpu.arch.hflags &= ~HF_NMI_MASK; - clr_intercept(svm, INTERCEPT_IRET); + svm_clr_intercept(svm, INTERCEPT_IRET); } } @@ -3179,7 +3194,7 @@ static void enable_nmi_window(struct kvm_vcpu *vcpu) if (!gif_set(svm)) { if (vgif_enabled(svm)) - set_intercept(svm, INTERCEPT_STGI); + svm_set_intercept(svm, INTERCEPT_STGI); return; /* STGI will cause a vm exit */ } @@ -3234,7 +3249,7 @@ static inline void sync_cr8_to_lapic(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); - if (svm_nested_virtualize_tpr(vcpu)) + if (nested_svm_virtualize_tpr(vcpu)) return; if (!is_cr_intercept(svm, INTERCEPT_CR8_WRITE)) { @@ -3248,7 +3263,7 @@ static inline void sync_lapic_to_cr8(struct kvm_vcpu *vcpu) struct vcpu_svm *svm = to_svm(vcpu); u64 cr8; - if (svm_nested_virtualize_tpr(vcpu) || + if (nested_svm_virtualize_tpr(vcpu) || kvm_vcpu_apicv_active(vcpu)) return; @@ -3344,6 +3359,60 @@ static fastpath_t svm_exit_handlers_fastpath(struct kvm_vcpu *vcpu) void __svm_vcpu_run(unsigned long vmcb_pa, unsigned long *regs); +static noinstr void svm_vcpu_enter_exit(struct kvm_vcpu *vcpu, + struct vcpu_svm *svm) +{ + /* + * VMENTER enables interrupts (host state), but the kernel state is + * interrupts disabled when this is invoked. Also tell RCU about + * it. This is the same logic as for exit_to_user_mode(). + * + * This ensures that e.g. latency analysis on the host observes + * guest mode as interrupt enabled. + * + * guest_enter_irqoff() informs context tracking about the + * transition to guest mode and if enabled adjusts RCU state + * accordingly. + */ + instrumentation_begin(); + trace_hardirqs_on_prepare(); + lockdep_hardirqs_on_prepare(CALLER_ADDR0); + instrumentation_end(); + + guest_enter_irqoff(); + lockdep_hardirqs_on(CALLER_ADDR0); + + __svm_vcpu_run(svm->vmcb_pa, (unsigned long *)&svm->vcpu.arch.regs); + +#ifdef CONFIG_X86_64 + native_wrmsrl(MSR_GS_BASE, svm->host.gs_base); +#else + loadsegment(fs, svm->host.fs); +#ifndef CONFIG_X86_32_LAZY_GS + loadsegment(gs, svm->host.gs); +#endif +#endif + + /* + * VMEXIT disables interrupts (host state), but tracing and lockdep + * have them in state 'on' as recorded before entering guest mode. + * Same as enter_from_user_mode(). + * + * guest_exit_irqoff() restores host context and reinstates RCU if + * enabled and required. + * + * This needs to be done before the below as native_read_msr() + * contains a tracepoint and x86_spec_ctrl_restore_host() calls + * into world and some more. + */ + lockdep_hardirqs_off(CALLER_ADDR0); + guest_exit_irqoff(); + + instrumentation_begin(); + trace_hardirqs_off_finish(); + instrumentation_end(); +} + static __no_kcsan fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu) { fastpath_t exit_fastpath; @@ -3399,16 +3468,7 @@ static __no_kcsan fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu) */ x86_spec_ctrl_set_guest(svm->spec_ctrl, svm->virt_spec_ctrl); - __svm_vcpu_run(svm->vmcb_pa, (unsigned long *)&svm->vcpu.arch.regs); - -#ifdef CONFIG_X86_64 - wrmsrl(MSR_GS_BASE, svm->host.gs_base); -#else - loadsegment(fs, svm->host.fs); -#ifndef CONFIG_X86_32_LAZY_GS - loadsegment(gs, svm->host.gs); -#endif -#endif + svm_vcpu_enter_exit(vcpu, svm); /* * We do not use IBRS in the kernel. If this vCPU has used the @@ -3477,11 +3537,12 @@ static __no_kcsan fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu) SVM_EXIT_EXCP_BASE + MC_VECTOR)) svm_handle_mce(svm); - mark_all_clean(svm->vmcb); + vmcb_mark_all_clean(svm->vmcb); return exit_fastpath; } -static void svm_load_mmu_pgd(struct kvm_vcpu *vcpu, unsigned long root) +static void svm_load_mmu_pgd(struct kvm_vcpu *vcpu, unsigned long root, + int root_level) { struct vcpu_svm *svm = to_svm(vcpu); unsigned long cr3; @@ -3489,7 +3550,7 @@ static void svm_load_mmu_pgd(struct kvm_vcpu *vcpu, unsigned long root) cr3 = __sme_set(root); if (npt_enabled) { svm->vmcb->control.nested_cr3 = cr3; - mark_dirty(svm->vmcb, VMCB_NPT); + vmcb_mark_dirty(svm->vmcb, VMCB_NPT); /* Loading L2's CR3 is handled by enter_svm_guest_mode. */ if (!test_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail)) @@ -3498,7 +3559,7 @@ static void svm_load_mmu_pgd(struct kvm_vcpu *vcpu, unsigned long root) } svm->vmcb->save.cr3 = cr3; - mark_dirty(svm->vmcb, VMCB_CR); + vmcb_mark_dirty(svm->vmcb, VMCB_CR); } static int is_disabled(void) @@ -3551,7 +3612,7 @@ static u64 svm_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio) return 0; } -static void svm_cpuid_update(struct kvm_vcpu *vcpu) +static void svm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); @@ -3843,6 +3904,7 @@ static int svm_pre_leave_smm(struct kvm_vcpu *vcpu, const char *smstate) struct kvm_host_map map; u64 guest; u64 vmcb; + int ret = 0; guest = GET_SMSTATE(u64, smstate, 0x7ed8); vmcb = GET_SMSTATE(u64, smstate, 0x7ee0); @@ -3851,10 +3913,11 @@ static int svm_pre_leave_smm(struct kvm_vcpu *vcpu, const char *smstate) if (kvm_vcpu_map(&svm->vcpu, gpa_to_gfn(vmcb), &map) == -EINVAL) return 1; nested_vmcb = map.hva; - enter_svm_guest_mode(svm, vmcb, nested_vmcb); + ret = enter_svm_guest_mode(svm, vmcb, nested_vmcb); kvm_vcpu_unmap(&svm->vcpu, &map, true); } - return 0; + + return ret; } static void enable_smi_window(struct kvm_vcpu *vcpu) @@ -3863,7 +3926,7 @@ static void enable_smi_window(struct kvm_vcpu *vcpu) if (!gif_set(svm)) { if (vgif_enabled(svm)) - set_intercept(svm, INTERCEPT_STGI); + svm_set_intercept(svm, INTERCEPT_STGI); /* STGI will cause a vm exit */ } else { /* We must be in SMM; RSM will cause a vmexit anyway. */ @@ -3992,7 +4055,7 @@ static struct kvm_x86_ops svm_x86_ops __initdata = { .vcpu_blocking = svm_vcpu_blocking, .vcpu_unblocking = svm_vcpu_unblocking, - .update_bp_intercept = update_bp_intercept, + .update_exception_bitmap = update_exception_bitmap, .get_msr_feature = svm_get_msr_feature, .get_msr = svm_get_msr, .set_msr = svm_set_msr, @@ -4049,12 +4112,11 @@ static struct kvm_x86_ops svm_x86_ops __initdata = { .set_tss_addr = svm_set_tss_addr, .set_identity_map_addr = svm_set_identity_map_addr, - .get_tdp_level = get_npt_level, .get_mt_mask = svm_get_mt_mask, .get_exit_info = svm_get_exit_info, - .cpuid_update = svm_cpuid_update, + .vcpu_after_set_cpuid = svm_vcpu_after_set_cpuid, .has_wbinvd_exit = svm_has_wbinvd_exit, |