diff options
Diffstat (limited to 'arch/x86/kvm/svm')
-rw-r--r-- | arch/x86/kvm/svm/avic.c | 66 | ||||
-rw-r--r-- | arch/x86/kvm/svm/nested.c | 10 | ||||
-rw-r--r-- | arch/x86/kvm/svm/sev.c | 32 | ||||
-rw-r--r-- | arch/x86/kvm/svm/svm.c | 78 | ||||
-rw-r--r-- | arch/x86/kvm/svm/svm.h | 2 | ||||
-rw-r--r-- | arch/x86/kvm/svm/vmenter.S | 16 |
6 files changed, 138 insertions, 66 deletions
diff --git a/arch/x86/kvm/svm/avic.c b/arch/x86/kvm/svm/avic.c index 4b74ea91f4e6..63dea8ecd7ef 100644 --- a/arch/x86/kvm/svm/avic.c +++ b/arch/x86/kvm/svm/avic.c @@ -820,7 +820,7 @@ static int svm_ir_list_add(struct vcpu_svm *svm, struct amd_iommu_pi_data *pi) * Allocating new amd_iommu_pi_data, which will get * add to the per-vcpu ir_list. */ - ir = kzalloc(sizeof(struct amd_svm_iommu_ir), GFP_KERNEL_ACCOUNT); + ir = kzalloc(sizeof(struct amd_svm_iommu_ir), GFP_ATOMIC | __GFP_ACCOUNT); if (!ir) { ret = -ENOMEM; goto out; @@ -896,6 +896,7 @@ int avic_pi_update_irte(struct kvm *kvm, unsigned int host_irq, { struct kvm_kernel_irq_routing_entry *e; struct kvm_irq_routing_table *irq_rt; + bool enable_remapped_mode = true; int idx, ret = 0; if (!kvm_arch_has_assigned_device(kvm) || @@ -933,6 +934,8 @@ int avic_pi_update_irte(struct kvm *kvm, unsigned int host_irq, kvm_vcpu_apicv_active(&svm->vcpu)) { struct amd_iommu_pi_data pi; + enable_remapped_mode = false; + /* Try to enable guest_mode in IRTE */ pi.base = __sme_set(page_to_phys(svm->avic_backing_page) & AVIC_HPA_MASK); @@ -951,33 +954,6 @@ int avic_pi_update_irte(struct kvm *kvm, unsigned int host_irq, */ if (!ret && pi.is_guest_mode) svm_ir_list_add(svm, &pi); - } else { - /* Use legacy mode in IRTE */ - struct amd_iommu_pi_data pi; - - /** - * Here, pi is used to: - * - Tell IOMMU to use legacy mode for this interrupt. - * - Retrieve ga_tag of prior interrupt remapping data. - */ - pi.prev_ga_tag = 0; - pi.is_guest_mode = false; - ret = irq_set_vcpu_affinity(host_irq, &pi); - - /** - * Check if the posted interrupt was previously - * setup with the guest_mode by checking if the ga_tag - * was cached. If so, we need to clean up the per-vcpu - * ir_list. - */ - if (!ret && pi.prev_ga_tag) { - int id = AVIC_GATAG_TO_VCPUID(pi.prev_ga_tag); - struct kvm_vcpu *vcpu; - - vcpu = kvm_get_vcpu_by_id(kvm, id); - if (vcpu) - svm_ir_list_del(to_svm(vcpu), &pi); - } } if (!ret && svm) { @@ -993,6 +969,34 @@ int avic_pi_update_irte(struct kvm *kvm, unsigned int host_irq, } ret = 0; + if (enable_remapped_mode) { + /* Use legacy mode in IRTE */ + struct amd_iommu_pi_data pi; + + /** + * Here, pi is used to: + * - Tell IOMMU to use legacy mode for this interrupt. + * - Retrieve ga_tag of prior interrupt remapping data. + */ + pi.prev_ga_tag = 0; + pi.is_guest_mode = false; + ret = irq_set_vcpu_affinity(host_irq, &pi); + + /** + * Check if the posted interrupt was previously + * setup with the guest_mode by checking if the ga_tag + * was cached. If so, we need to clean up the per-vcpu + * ir_list. + */ + if (!ret && pi.prev_ga_tag) { + int id = AVIC_GATAG_TO_VCPUID(pi.prev_ga_tag); + struct kvm_vcpu *vcpu; + + vcpu = kvm_get_vcpu_by_id(kvm, id); + if (vcpu) + svm_ir_list_del(to_svm(vcpu), &pi); + } + } out: srcu_read_unlock(&kvm->irq_srcu, idx); return ret; @@ -1199,6 +1203,12 @@ bool avic_hardware_setup(void) return false; } + if (cc_platform_has(CC_ATTR_HOST_SEV_SNP) && + !boot_cpu_has(X86_FEATURE_HV_INUSE_WR_ALLOWED)) { + pr_warn("AVIC disabled: missing HvInUseWrAllowed on SNP-enabled system\n"); + return false; + } + if (boot_cpu_has(X86_FEATURE_AVIC)) { pr_info("AVIC enabled\n"); } else if (force_avic) { diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c index cf84103ce38b..2dcb9c870d5a 100644 --- a/arch/x86/kvm/svm/nested.c +++ b/arch/x86/kvm/svm/nested.c @@ -646,6 +646,11 @@ static void nested_vmcb02_prepare_control(struct vcpu_svm *svm, u32 pause_count12; u32 pause_thresh12; + nested_svm_transition_tlb_flush(vcpu); + + /* Enter Guest-Mode */ + enter_guest_mode(vcpu); + /* * Filled at exit: exit_code, exit_code_hi, exit_info_1, exit_info_2, * exit_int_info, exit_int_info_err, next_rip, insn_len, insn_bytes. @@ -762,11 +767,6 @@ static void nested_vmcb02_prepare_control(struct vcpu_svm *svm, } } - nested_svm_transition_tlb_flush(vcpu); - - /* Enter Guest-Mode */ - enter_guest_mode(vcpu); - /* * Merge guest and host intercepts - must be called with vcpu in * guest-mode to take effect. diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index fb854cf20ac3..c4ae73541fc5 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -2058,6 +2058,10 @@ static int sev_check_source_vcpus(struct kvm *dst, struct kvm *src) struct kvm_vcpu *src_vcpu; unsigned long i; + if (src->created_vcpus != atomic_read(&src->online_vcpus) || + dst->created_vcpus != atomic_read(&dst->online_vcpus)) + return -EBUSY; + if (!sev_es_guest(src)) return 0; @@ -3833,7 +3837,7 @@ next_range: goto next_range; } - unreachable(); + BUG(); } static int __sev_snp_update_protected_guest_state(struct kvm_vcpu *vcpu) @@ -3957,16 +3961,12 @@ static int sev_snp_ap_creation(struct vcpu_svm *svm) /* * The target vCPU is valid, so the vCPU will be kicked unless the - * request is for CREATE_ON_INIT. For any errors at this stage, the - * kick will place the vCPU in an non-runnable state. + * request is for CREATE_ON_INIT. */ kick = true; mutex_lock(&target_svm->sev_es.snp_vmsa_mutex); - target_svm->sev_es.snp_vmsa_gpa = INVALID_PAGE; - target_svm->sev_es.snp_ap_waiting_for_reset = true; - /* Interrupt injection mode shouldn't change for AP creation */ if (request < SVM_VMGEXIT_AP_DESTROY) { u64 sev_features; @@ -4012,20 +4012,23 @@ static int sev_snp_ap_creation(struct vcpu_svm *svm) target_svm->sev_es.snp_vmsa_gpa = svm->vmcb->control.exit_info_2; break; case SVM_VMGEXIT_AP_DESTROY: + target_svm->sev_es.snp_vmsa_gpa = INVALID_PAGE; break; default: vcpu_unimpl(vcpu, "vmgexit: invalid AP creation request [%#x] from guest\n", request); ret = -EINVAL; - break; + goto out; } -out: + target_svm->sev_es.snp_ap_waiting_for_reset = true; + if (kick) { kvm_make_request(KVM_REQ_UPDATE_PROTECTED_GUEST_STATE, target_vcpu); kvm_vcpu_kick(target_vcpu); } +out: mutex_unlock(&target_svm->sev_es.snp_vmsa_mutex); return ret; @@ -4579,6 +4582,8 @@ void sev_es_vcpu_reset(struct vcpu_svm *svm) void sev_es_prepare_switch_to_guest(struct vcpu_svm *svm, struct sev_es_save_area *hostsa) { + struct kvm *kvm = svm->vcpu.kvm; + /* * All host state for SEV-ES guests is categorized into three swap types * based on how it is handled by hardware during a world switch: @@ -4602,10 +4607,15 @@ void sev_es_prepare_switch_to_guest(struct vcpu_svm *svm, struct sev_es_save_are /* * If DebugSwap is enabled, debug registers are loaded but NOT saved by - * the CPU (Type-B). If DebugSwap is disabled/unsupported, the CPU both - * saves and loads debug registers (Type-A). + * the CPU (Type-B). If DebugSwap is disabled/unsupported, the CPU does + * not save or load debug registers. Sadly, on CPUs without + * ALLOWED_SEV_FEATURES, KVM can't prevent SNP guests from enabling + * DebugSwap on secondary vCPUs without KVM's knowledge via "AP Create". + * Save all registers if DebugSwap is supported to prevent host state + * from being clobbered by a misbehaving guest. */ - if (sev_vcpu_has_debug_swap(svm)) { + if (sev_vcpu_has_debug_swap(svm) || + (sev_snp_guest(kvm) && cpu_feature_enabled(X86_FEATURE_DEBUG_SWAP))) { hostsa->dr0 = native_get_debugreg(0); hostsa->dr1 = native_get_debugreg(1); hostsa->dr2 = native_get_debugreg(2); diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 4543dd6bcab2..800f781475c0 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -1483,7 +1483,7 @@ static void svm_clear_current_vmcb(struct vmcb *vmcb) { int i; - for_each_online_cpu(i) + for_each_possible_cpu(i) cmpxchg(per_cpu_ptr(&svm_data.current_vmcb, i), vmcb, NULL); } @@ -1993,11 +1993,11 @@ static void new_asid(struct vcpu_svm *svm, struct svm_cpu_data *sd) svm->asid = sd->next_asid++; } -static void svm_set_dr6(struct vcpu_svm *svm, unsigned long value) +static void svm_set_dr6(struct kvm_vcpu *vcpu, unsigned long value) { - struct vmcb *vmcb = svm->vmcb; + struct vmcb *vmcb = to_svm(vcpu)->vmcb; - if (svm->vcpu.arch.guest_state_protected) + if (vcpu->arch.guest_state_protected) return; if (unlikely(value != vmcb->save.dr6)) { @@ -2222,6 +2222,10 @@ static int shutdown_interception(struct kvm_vcpu *vcpu) */ if (!sev_es_guest(vcpu->kvm)) { clear_page(svm->vmcb); +#ifdef CONFIG_KVM_SMM + if (is_smm(vcpu)) + kvm_smm_changed(vcpu, false); +#endif kvm_vcpu_reset(vcpu, true); } @@ -3167,6 +3171,27 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr) kvm_pr_unimpl_wrmsr(vcpu, ecx, data); break; } + + /* + * AMD changed the architectural behavior of bits 5:2. On CPUs + * without BusLockTrap, bits 5:2 control "external pins", but + * on CPUs that support BusLockDetect, bit 2 enables BusLockTrap + * and bits 5:3 are reserved-to-zero. Sadly, old KVM allowed + * the guest to set bits 5:2 despite not actually virtualizing + * Performance-Monitoring/Breakpoint external pins. Drop bits + * 5:2 for backwards compatibility. + */ + data &= ~GENMASK(5, 2); + + /* + * Suppress BTF as KVM doesn't virtualize BTF, but there's no + * way to communicate lack of support to the guest. + */ + if (data & DEBUGCTLMSR_BTF) { + kvm_pr_unimpl_wrmsr(vcpu, MSR_IA32_DEBUGCTLMSR, data); + data &= ~DEBUGCTLMSR_BTF; + } + if (data & DEBUGCTL_RESERVED_BITS) return 1; @@ -4176,6 +4201,18 @@ static noinstr void svm_vcpu_enter_exit(struct kvm_vcpu *vcpu, bool spec_ctrl_in guest_state_enter_irqoff(); + /* + * Set RFLAGS.IF prior to VMRUN, as the host's RFLAGS.IF at the time of + * VMRUN controls whether or not physical IRQs are masked (KVM always + * runs with V_INTR_MASKING_MASK). Toggle RFLAGS.IF here to avoid the + * temptation to do STI+VMRUN+CLI, as AMD CPUs bleed the STI shadow + * into guest state if delivery of an event during VMRUN triggers a + * #VMEXIT, and the guest_state transitions already tell lockdep that + * IRQs are being enabled/disabled. Note! GIF=0 for the entirety of + * this path, so IRQs aren't actually unmasked while running host code. + */ + raw_local_irq_enable(); + amd_clear_divider(); if (sev_es_guest(vcpu->kvm)) @@ -4184,12 +4221,14 @@ static noinstr void svm_vcpu_enter_exit(struct kvm_vcpu *vcpu, bool spec_ctrl_in else __svm_vcpu_run(svm, spec_ctrl_intercepted); + raw_local_irq_disable(); + guest_state_exit_irqoff(); } -static __no_kcsan fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu, - bool force_immediate_exit) +static __no_kcsan fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu, u64 run_flags) { + bool force_immediate_exit = run_flags & KVM_RUN_FORCE_IMMEDIATE_EXIT; struct vcpu_svm *svm = to_svm(vcpu); bool spec_ctrl_intercepted = msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL); @@ -4231,17 +4270,28 @@ static __no_kcsan fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu, svm_hv_update_vp_id(svm->vmcb, vcpu); /* - * Run with all-zero DR6 unless needed, so that we can get the exact cause - * of a #DB. + * Run with all-zero DR6 unless the guest can write DR6 freely, so that + * KVM can get the exact cause of a #DB. Note, loading guest DR6 from + * KVM's snapshot is only necessary when DR accesses won't exit. */ - if (unlikely(vcpu->arch.switch_db_regs & KVM_DEBUGREG_WONT_EXIT)) - svm_set_dr6(svm, vcpu->arch.dr6); - else - svm_set_dr6(svm, DR6_ACTIVE_LOW); + if (unlikely(run_flags & KVM_RUN_LOAD_GUEST_DR6)) + svm_set_dr6(vcpu, vcpu->arch.dr6); + else if (likely(!(vcpu->arch.switch_db_regs & KVM_DEBUGREG_WONT_EXIT))) + svm_set_dr6(vcpu, DR6_ACTIVE_LOW); clgi(); kvm_load_guest_xsave_state(vcpu); + /* + * Hardware only context switches DEBUGCTL if LBR virtualization is + * enabled. Manually load DEBUGCTL if necessary (and restore it after + * VM-Exit), as running with the host's DEBUGCTL can negatively affect + * guest state and can even be fatal, e.g. due to Bus Lock Detect. + */ + if (!(svm->vmcb->control.virt_ext & LBR_CTL_ENABLE_MASK) && + vcpu->arch.host_debugctl != svm->vmcb->save.dbgctl) + update_debugctlmsr(svm->vmcb->save.dbgctl); + kvm_wait_lapic_expire(vcpu); /* @@ -4269,6 +4319,10 @@ static __no_kcsan fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu, if (unlikely(svm->vmcb->control.exit_code == SVM_EXIT_NMI)) kvm_before_interrupt(vcpu, KVM_HANDLING_NMI); + if (!(svm->vmcb->control.virt_ext & LBR_CTL_ENABLE_MASK) && + vcpu->arch.host_debugctl != svm->vmcb->save.dbgctl) + update_debugctlmsr(vcpu->arch.host_debugctl); + kvm_load_host_xsave_state(vcpu); stgi(); diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h index 43fa6a16eb19..d114efac7af7 100644 --- a/arch/x86/kvm/svm/svm.h +++ b/arch/x86/kvm/svm/svm.h @@ -591,7 +591,7 @@ static inline bool is_vnmi_enabled(struct vcpu_svm *svm) /* svm.c */ #define MSR_INVALID 0xffffffffU -#define DEBUGCTL_RESERVED_BITS (~(0x3fULL)) +#define DEBUGCTL_RESERVED_BITS (~DEBUGCTLMSR_LBR) extern bool dump_invalid_vmcb; diff --git a/arch/x86/kvm/svm/vmenter.S b/arch/x86/kvm/svm/vmenter.S index 2ed80aea3bb1..235c4af6b692 100644 --- a/arch/x86/kvm/svm/vmenter.S +++ b/arch/x86/kvm/svm/vmenter.S @@ -169,13 +169,12 @@ SYM_FUNC_START(__svm_vcpu_run) #endif mov VCPU_RDI(%_ASM_DI), %_ASM_DI - /* Enter guest mode */ - sti + /* Clobbers EFLAGS.ZF */ + VM_CLEAR_CPU_BUFFERS + /* Enter guest mode */ 3: vmrun %_ASM_AX 4: - cli - /* Pop @svm to RAX while it's the only available register. */ pop %_ASM_AX @@ -339,13 +338,12 @@ SYM_FUNC_START(__svm_sev_es_vcpu_run) mov SVM_current_vmcb(%rdi), %rax mov KVM_VMCB_pa(%rax), %rax - /* Enter guest mode */ - sti + /* Clobbers EFLAGS.ZF */ + VM_CLEAR_CPU_BUFFERS + /* Enter guest mode */ 1: vmrun %rax - -2: cli - +2: /* IMPORTANT: Stuff the RSB immediately after VM-Exit, before RET! */ FILL_RETURN_BUFFER %rax, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_VMEXIT |