summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--arch/x86/include/asm/cpufeatures.h1
-rw-r--r--arch/x86/include/asm/kvm_host.h8
-rw-r--r--arch/x86/include/asm/svm.h6
-rw-r--r--arch/x86/kvm/cpuid.c9
-rw-r--r--arch/x86/kvm/svm/nested.c18
-rw-r--r--arch/x86/kvm/svm/svm.c25
-rw-r--r--arch/x86/kvm/svm/svm.h1
-rw-r--r--arch/x86/kvm/x86.c12
8 files changed, 77 insertions, 3 deletions
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index c3b53beb1300..81f7b3b91986 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -472,6 +472,7 @@
#define X86_FEATURE_GP_ON_USER_CPUID (20*32+17) /* User CPUID faulting */
#define X86_FEATURE_PREFETCHI (20*32+20) /* Prefetch Data/Instruction to Cache Level */
+#define X86_FEATURE_ERAPS (20*32+24) /* Enhanced Return Address Predictor Security */
#define X86_FEATURE_SBPB (20*32+27) /* Selective Branch Prediction Barrier */
#define X86_FEATURE_IBPB_BRTYPE (20*32+28) /* MSR_PRED_CMD[IBPB] flushes all branch type predictions */
#define X86_FEATURE_SRSO_NO (20*32+29) /* CPU is not affected by SRSO */
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 5a3bfa293e8b..0353d8b6988c 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -195,7 +195,15 @@ enum kvm_reg {
VCPU_EXREG_PDPTR = NR_VCPU_REGS,
VCPU_EXREG_CR0,
+ /*
+ * Alias AMD's ERAPS (not a real register) to CR3 so that common code
+ * can trigger emulation of the RAP (Return Address Predictor) with
+ * minimal support required in common code. Piggyback CR3 as the RAP
+ * is cleared on writes to CR3, i.e. marking CR3 dirty will naturally
+ * mark ERAPS dirty as well.
+ */
VCPU_EXREG_CR3,
+ VCPU_EXREG_ERAPS = VCPU_EXREG_CR3,
VCPU_EXREG_CR4,
VCPU_EXREG_RFLAGS,
VCPU_EXREG_SEGMENTS,
diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h
index 56aa99503dc4..50ece197c98a 100644
--- a/arch/x86/include/asm/svm.h
+++ b/arch/x86/include/asm/svm.h
@@ -131,7 +131,8 @@ struct __attribute__ ((__packed__)) vmcb_control_area {
u64 tsc_offset;
u32 asid;
u8 tlb_ctl;
- u8 reserved_2[3];
+ u8 erap_ctl;
+ u8 reserved_2[2];
u32 int_ctl;
u32 int_vector;
u32 int_state;
@@ -182,6 +183,9 @@ struct __attribute__ ((__packed__)) vmcb_control_area {
#define TLB_CONTROL_FLUSH_ASID 3
#define TLB_CONTROL_FLUSH_ASID_LOCAL 7
+#define ERAP_CONTROL_ALLOW_LARGER_RAP BIT(0)
+#define ERAP_CONTROL_CLEAR_RAP BIT(1)
+
#define V_TPR_MASK 0x0f
#define V_IRQ_SHIFT 8
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index 88a5426674a1..c590a5bd3196 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -1223,6 +1223,7 @@ void kvm_set_cpu_caps(void)
/* PrefetchCtlMsr */
/* GpOnUserCpuid */
/* EPSF */
+ F(ERAPS),
SYNTHESIZED_F(SBPB),
SYNTHESIZED_F(IBPB_BRTYPE),
SYNTHESIZED_F(SRSO_NO),
@@ -1803,8 +1804,14 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
entry->eax = entry->ebx = entry->ecx = entry->edx = 0;
break;
case 0x80000021:
- entry->ebx = entry->edx = 0;
+ entry->edx = 0;
cpuid_entry_override(entry, CPUID_8000_0021_EAX);
+
+ if (kvm_cpu_cap_has(X86_FEATURE_ERAPS))
+ entry->ebx &= GENMASK(23, 16);
+ else
+ entry->ebx = 0;
+
cpuid_entry_override(entry, CPUID_8000_0021_ECX);
break;
/* AMD Extended Performance Monitoring and Debug */
diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c
index aa1bea134ace..5a1e1164c197 100644
--- a/arch/x86/kvm/svm/nested.c
+++ b/arch/x86/kvm/svm/nested.c
@@ -436,6 +436,7 @@ void __nested_copy_vmcb_control_to_cache(struct kvm_vcpu *vcpu,
to->msrpm_base_pa = from->msrpm_base_pa;
to->tsc_offset = from->tsc_offset;
to->tlb_ctl = from->tlb_ctl;
+ to->erap_ctl = from->erap_ctl;
to->int_ctl = from->int_ctl;
to->int_vector = from->int_vector;
to->int_state = from->int_state;
@@ -886,6 +887,19 @@ static void nested_vmcb02_prepare_control(struct vcpu_svm *svm,
}
/*
+ * Take ALLOW_LARGER_RAP from vmcb12 even though it should be safe to
+ * let L2 use a larger RAP since KVM will emulate the necessary clears,
+ * as it's possible L1 deliberately wants to restrict L2 to the legacy
+ * RAP size. Unconditionally clear the RAP on nested VMRUN, as KVM is
+ * responsible for emulating the host vs. guest tags (L1 is the "host",
+ * L2 is the "guest").
+ */
+ if (guest_cpu_cap_has(vcpu, X86_FEATURE_ERAPS))
+ vmcb02->control.erap_ctl = (svm->nested.ctl.erap_ctl &
+ ERAP_CONTROL_ALLOW_LARGER_RAP) |
+ ERAP_CONTROL_CLEAR_RAP;
+
+ /*
* Merge guest and host intercepts - must be called with vcpu in
* guest-mode to take effect.
*/
@@ -1180,6 +1194,9 @@ int nested_svm_vmexit(struct vcpu_svm *svm)
kvm_nested_vmexit_handle_ibrs(vcpu);
+ if (guest_cpu_cap_has(vcpu, X86_FEATURE_ERAPS))
+ vmcb01->control.erap_ctl |= ERAP_CONTROL_CLEAR_RAP;
+
svm_switch_vmcb(svm, &svm->vmcb01);
/*
@@ -1686,6 +1703,7 @@ static void nested_copy_vmcb_cache_to_control(struct vmcb_control_area *dst,
dst->tsc_offset = from->tsc_offset;
dst->asid = from->asid;
dst->tlb_ctl = from->tlb_ctl;
+ dst->erap_ctl = from->erap_ctl;
dst->int_ctl = from->int_ctl;
dst->int_vector = from->int_vector;
dst->int_state = from->int_state;
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index d1ff23e02ecd..34c8a94b1b81 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -1141,6 +1141,9 @@ static void init_vmcb(struct kvm_vcpu *vcpu, bool init_event)
svm_clr_intercept(svm, INTERCEPT_PAUSE);
}
+ if (guest_cpu_cap_has(vcpu, X86_FEATURE_ERAPS))
+ svm->vmcb->control.erap_ctl |= ERAP_CONTROL_ALLOW_LARGER_RAP;
+
if (kvm_vcpu_apicv_active(vcpu))
avic_init_vmcb(svm, vmcb);
@@ -3293,6 +3296,7 @@ static void dump_vmcb(struct kvm_vcpu *vcpu)
pr_err("%-20s%016llx\n", "tsc_offset:", control->tsc_offset);
pr_err("%-20s%d\n", "asid:", control->asid);
pr_err("%-20s%d\n", "tlb_ctl:", control->tlb_ctl);
+ pr_err("%-20s%d\n", "erap_ctl:", control->erap_ctl);
pr_err("%-20s%08x\n", "int_ctl:", control->int_ctl);
pr_err("%-20s%08x\n", "int_vector:", control->int_vector);
pr_err("%-20s%08x\n", "int_state:", control->int_state);
@@ -4004,6 +4008,13 @@ static void svm_flush_tlb_gva(struct kvm_vcpu *vcpu, gva_t gva)
invlpga(gva, svm->vmcb->control.asid);
}
+static void svm_flush_tlb_guest(struct kvm_vcpu *vcpu)
+{
+ kvm_register_mark_dirty(vcpu, VCPU_EXREG_ERAPS);
+
+ svm_flush_tlb_asid(vcpu);
+}
+
static inline void sync_cr8_to_lapic(struct kvm_vcpu *vcpu)
{
struct vcpu_svm *svm = to_svm(vcpu);
@@ -4262,6 +4273,10 @@ static __no_kcsan fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu, u64 run_flags)
}
svm->vmcb->save.cr2 = vcpu->arch.cr2;
+ if (guest_cpu_cap_has(vcpu, X86_FEATURE_ERAPS) &&
+ kvm_register_is_dirty(vcpu, VCPU_EXREG_ERAPS))
+ svm->vmcb->control.erap_ctl |= ERAP_CONTROL_CLEAR_RAP;
+
svm_hv_update_vp_id(svm->vmcb, vcpu);
/*
@@ -4339,6 +4354,14 @@ static __no_kcsan fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu, u64 run_flags)
}
svm->vmcb->control.tlb_ctl = TLB_CONTROL_DO_NOTHING;
+
+ /*
+ * Unconditionally mask off the CLEAR_RAP bit, the AND is just as cheap
+ * as the TEST+Jcc to avoid it.
+ */
+ if (cpu_feature_enabled(X86_FEATURE_ERAPS))
+ svm->vmcb->control.erap_ctl &= ~ERAP_CONTROL_CLEAR_RAP;
+
vmcb_mark_all_clean(svm->vmcb);
/* if exit due to PF check for async PF */
@@ -5094,7 +5117,7 @@ struct kvm_x86_ops svm_x86_ops __initdata = {
.flush_tlb_all = svm_flush_tlb_all,
.flush_tlb_current = svm_flush_tlb_current,
.flush_tlb_gva = svm_flush_tlb_gva,
- .flush_tlb_guest = svm_flush_tlb_asid,
+ .flush_tlb_guest = svm_flush_tlb_guest,
.vcpu_pre_run = svm_vcpu_pre_run,
.vcpu_run = svm_vcpu_run,
diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
index 806e68ba821b..7d28a739865f 100644
--- a/arch/x86/kvm/svm/svm.h
+++ b/arch/x86/kvm/svm/svm.h
@@ -156,6 +156,7 @@ struct vmcb_ctrl_area_cached {
u64 tsc_offset;
u32 asid;
u8 tlb_ctl;
+ u8 erap_ctl;
u32 int_ctl;
u32 int_vector;
u32 int_state;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index ff8812f3a129..e013392fe20c 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -14130,6 +14130,13 @@ int kvm_handle_invpcid(struct kvm_vcpu *vcpu, unsigned long type, gva_t gva)
return 1;
}
+ /*
+ * When ERAPS is supported, invalidating a specific PCID clears
+ * the RAP (Return Address Predicator).
+ */
+ if (guest_cpu_cap_has(vcpu, X86_FEATURE_ERAPS))
+ kvm_register_is_dirty(vcpu, VCPU_EXREG_ERAPS);
+
kvm_invalidate_pcid(vcpu, operand.pcid);
return kvm_skip_emulated_instruction(vcpu);
@@ -14143,6 +14150,11 @@ int kvm_handle_invpcid(struct kvm_vcpu *vcpu, unsigned long type, gva_t gva)
fallthrough;
case INVPCID_TYPE_ALL_INCL_GLOBAL:
+ /*
+ * Don't bother marking VCPU_EXREG_ERAPS dirty, SVM will take
+ * care of doing so when emulating the full guest TLB flush
+ * (the RAP is cleared on all implicit TLB flushes).
+ */
kvm_make_request(KVM_REQ_TLB_FLUSH_GUEST, vcpu);
return kvm_skip_emulated_instruction(vcpu);