summaryrefslogtreecommitdiff
path: root/arch
diff options
context:
space:
mode:
Diffstat (limited to 'arch')
-rw-r--r--arch/arm64/include/asm/assembler.h10
-rw-r--r--arch/arm64/include/asm/cache.h4
-rw-r--r--arch/arm64/include/asm/cpufeature.h66
-rw-r--r--arch/arm64/include/asm/el2_setup.h18
-rw-r--r--arch/arm64/include/asm/hw_breakpoint.h4
-rw-r--r--arch/arm64/include/asm/kvm_host.h4
-rw-r--r--arch/arm64/include/asm/kvm_pgtable.h6
-rw-r--r--arch/arm64/include/asm/sysreg.h211
-rw-r--r--arch/arm64/kernel/cpufeature.c238
-rw-r--r--arch/arm64/kernel/debug-monitors.c2
-rw-r--r--arch/arm64/kernel/head.S10
-rw-r--r--arch/arm64/kernel/hyp-stub.S8
-rw-r--r--arch/arm64/kernel/idreg-override.c10
-rw-r--r--arch/arm64/kernel/perf_event.c8
-rw-r--r--arch/arm64/kernel/proton-pack.c4
-rw-r--r--arch/arm64/kvm/arm.c16
-rw-r--r--arch/arm64/kvm/debug.c38
-rw-r--r--arch/arm64/kvm/guest.c1
-rw-r--r--arch/arm64/kvm/handle_exit.c8
-rw-r--r--arch/arm64/kvm/hyp/include/nvhe/fixed_config.h60
-rw-r--r--arch/arm64/kvm/hyp/nvhe/pkvm.c38
-rw-r--r--arch/arm64/kvm/hyp/nvhe/switch.c2
-rw-r--r--arch/arm64/kvm/hyp/nvhe/sys_regs.c10
-rw-r--r--arch/arm64/kvm/hyp/pgtable.c2
-rw-r--r--arch/arm64/kvm/mmu.c36
-rw-r--r--arch/arm64/kvm/pmu-emul.c16
-rw-r--r--arch/arm64/kvm/reset.c12
-rw-r--r--arch/arm64/kvm/sys_regs.c198
-rw-r--r--arch/arm64/kvm/sys_regs.h24
-rw-r--r--arch/arm64/kvm/vgic/vgic-its.c2
-rw-r--r--arch/arm64/mm/context.c6
-rw-r--r--arch/arm64/mm/init.c2
-rw-r--r--arch/arm64/mm/mmu.c2
-rw-r--r--arch/arm64/mm/proc.S4
-rw-r--r--arch/arm64/tools/sysreg449
-rw-r--r--arch/mips/kvm/emulate.c6
-rw-r--r--arch/powerpc/kvm/book3s_pr.c1
-rw-r--r--arch/powerpc/kvm/book3s_pr_papr.c1
-rw-r--r--arch/powerpc/kvm/booke.c1
-rw-r--r--arch/powerpc/kvm/powerpc.c1
-rw-r--r--arch/riscv/kvm/vcpu_insn.c1
-rw-r--r--arch/s390/kvm/kvm-s390.c2
-rw-r--r--arch/x86/include/asm/hyperv-tlfs.h22
-rw-r--r--arch/x86/include/asm/kvm-x86-ops.h2
-rw-r--r--arch/x86/include/asm/kvm_host.h51
-rw-r--r--arch/x86/include/asm/vmx.h2
-rw-r--r--arch/x86/kvm/Kconfig3
-rw-r--r--arch/x86/kvm/cpuid.c20
-rw-r--r--arch/x86/kvm/emulate.c31
-rw-r--r--arch/x86/kvm/hyperv.c70
-rw-r--r--arch/x86/kvm/hyperv.h6
-rw-r--r--arch/x86/kvm/lapic.c38
-rw-r--r--arch/x86/kvm/lapic.h9
-rw-r--r--arch/x86/kvm/mmu/mmu.c22
-rw-r--r--arch/x86/kvm/mmu/paging_tmpl.h2
-rw-r--r--arch/x86/kvm/mmu/tdp_mmu.c12
-rw-r--r--arch/x86/kvm/pmu.c20
-rw-r--r--arch/x86/kvm/svm/nested.c124
-rw-r--r--arch/x86/kvm/svm/pmu.c117
-rw-r--r--arch/x86/kvm/svm/svm.c35
-rw-r--r--arch/x86/kvm/trace.h53
-rw-r--r--arch/x86/kvm/vmx/capabilities.h14
-rw-r--r--arch/x86/kvm/vmx/evmcs.c192
-rw-r--r--arch/x86/kvm/vmx/evmcs.h10
-rw-r--r--arch/x86/kvm/vmx/nested.c468
-rw-r--r--arch/x86/kvm/vmx/nested.h2
-rw-r--r--arch/x86/kvm/vmx/pmu_intel.c29
-rw-r--r--arch/x86/kvm/vmx/sgx.c2
-rw-r--r--arch/x86/kvm/vmx/vmenter.S24
-rw-r--r--arch/x86/kvm/vmx/vmx.c321
-rw-r--r--arch/x86/kvm/vmx/vmx.h172
-rw-r--r--arch/x86/kvm/vmx/vmx_ops.h2
-rw-r--r--arch/x86/kvm/x86.c576
-rw-r--r--arch/x86/kvm/x86.h16
-rw-r--r--arch/x86/kvm/xen.c1
75 files changed, 2446 insertions, 1564 deletions
diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h
index 5846145be523..cf8e72e733de 100644
--- a/arch/arm64/include/asm/assembler.h
+++ b/arch/arm64/include/asm/assembler.h
@@ -384,8 +384,8 @@ alternative_cb_end
.macro tcr_compute_pa_size, tcr, pos, tmp0, tmp1
mrs \tmp0, ID_AA64MMFR0_EL1
// Narrow PARange to fit the PS field in TCR_ELx
- ubfx \tmp0, \tmp0, #ID_AA64MMFR0_PARANGE_SHIFT, #3
- mov \tmp1, #ID_AA64MMFR0_PARANGE_MAX
+ ubfx \tmp0, \tmp0, #ID_AA64MMFR0_EL1_PARANGE_SHIFT, #3
+ mov \tmp1, #ID_AA64MMFR0_EL1_PARANGE_MAX
cmp \tmp0, \tmp1
csel \tmp0, \tmp1, \tmp0, hi
bfi \tcr, \tmp0, \pos, #3
@@ -512,7 +512,7 @@ alternative_endif
*/
.macro reset_pmuserenr_el0, tmpreg
mrs \tmpreg, id_aa64dfr0_el1
- sbfx \tmpreg, \tmpreg, #ID_AA64DFR0_PMUVER_SHIFT, #4
+ sbfx \tmpreg, \tmpreg, #ID_AA64DFR0_EL1_PMUVer_SHIFT, #4
cmp \tmpreg, #1 // Skip if no PMU present
b.lt 9000f
msr pmuserenr_el0, xzr // Disable PMU access from EL0
@@ -524,7 +524,7 @@ alternative_endif
*/
.macro reset_amuserenr_el0, tmpreg
mrs \tmpreg, id_aa64pfr0_el1 // Check ID_AA64PFR0_EL1
- ubfx \tmpreg, \tmpreg, #ID_AA64PFR0_AMU_SHIFT, #4
+ ubfx \tmpreg, \tmpreg, #ID_AA64PFR0_EL1_AMU_SHIFT, #4
cbz \tmpreg, .Lskip_\@ // Skip if no AMU present
msr_s SYS_AMUSERENR_EL0, xzr // Disable AMU access from EL0
.Lskip_\@:
@@ -612,7 +612,7 @@ alternative_endif
.macro offset_ttbr1, ttbr, tmp
#ifdef CONFIG_ARM64_VA_BITS_52
mrs_s \tmp, SYS_ID_AA64MMFR2_EL1
- and \tmp, \tmp, #(0xf << ID_AA64MMFR2_LVA_SHIFT)
+ and \tmp, \tmp, #(0xf << ID_AA64MMFR2_EL1_VARange_SHIFT)
cbnz \tmp, .Lskipoffs_\@
orr \ttbr, \ttbr, #TTBR1_BADDR_4852_OFFSET
.Lskipoffs_\@ :
diff --git a/arch/arm64/include/asm/cache.h b/arch/arm64/include/asm/cache.h
index 34256bda0da9..c0b178d1bb4f 100644
--- a/arch/arm64/include/asm/cache.h
+++ b/arch/arm64/include/asm/cache.h
@@ -45,10 +45,6 @@ static inline unsigned int arch_slab_minalign(void)
#define arch_slab_minalign() arch_slab_minalign()
#endif
-#define CTR_CACHE_MINLINE_MASK \
- (0xf << CTR_EL0_DMINLINE_SHIFT | \
- CTR_EL0_IMINLINE_MASK << CTR_EL0_IMINLINE_SHIFT)
-
#define CTR_L1IP(ctr) SYS_FIELD_GET(CTR_EL0, L1Ip, ctr)
#define ICACHEF_ALIASING 0
diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h
index fd7d75a275f6..ff06e6fb5939 100644
--- a/arch/arm64/include/asm/cpufeature.h
+++ b/arch/arm64/include/asm/cpufeature.h
@@ -553,7 +553,7 @@ cpuid_feature_cap_perfmon_field(u64 features, int field, u64 cap)
u64 mask = GENMASK_ULL(field + 3, field);
/* Treat IMPLEMENTATION DEFINED functionality as unimplemented */
- if (val == ID_AA64DFR0_PMUVER_IMP_DEF)
+ if (val == ID_AA64DFR0_EL1_PMUVer_IMP_DEF)
val = 0;
if (val > cap) {
@@ -597,43 +597,43 @@ static inline s64 arm64_ftr_value(const struct arm64_ftr_bits *ftrp, u64 val)
static inline bool id_aa64mmfr0_mixed_endian_el0(u64 mmfr0)
{
- return cpuid_feature_extract_unsigned_field(mmfr0, ID_AA64MMFR0_BIGENDEL_SHIFT) == 0x1 ||
- cpuid_feature_extract_unsigned_field(mmfr0, ID_AA64MMFR0_BIGENDEL0_SHIFT) == 0x1;
+ return cpuid_feature_extract_unsigned_field(mmfr0, ID_AA64MMFR0_EL1_BIGEND_SHIFT) == 0x1 ||
+ cpuid_feature_extract_unsigned_field(mmfr0, ID_AA64MMFR0_EL1_BIGENDEL0_SHIFT) == 0x1;
}
static inline bool id_aa64pfr0_32bit_el1(u64 pfr0)
{
- u32 val = cpuid_feature_extract_unsigned_field(pfr0, ID_AA64PFR0_EL1_SHIFT);
+ u32 val = cpuid_feature_extract_unsigned_field(pfr0, ID_AA64PFR0_EL1_EL1_SHIFT);
- return val == ID_AA64PFR0_ELx_32BIT_64BIT;
+ return val == ID_AA64PFR0_EL1_ELx_32BIT_64BIT;
}
static inline bool id_aa64pfr0_32bit_el0(u64 pfr0)
{
- u32 val = cpuid_feature_extract_unsigned_field(pfr0, ID_AA64PFR0_EL0_SHIFT);
+ u32 val = cpuid_feature_extract_unsigned_field(pfr0, ID_AA64PFR0_EL1_EL0_SHIFT);
- return val == ID_AA64PFR0_ELx_32BIT_64BIT;
+ return val == ID_AA64PFR0_EL1_ELx_32BIT_64BIT;
}
static inline bool id_aa64pfr0_sve(u64 pfr0)
{
- u32 val = cpuid_feature_extract_unsigned_field(pfr0, ID_AA64PFR0_SVE_SHIFT);
+ u32 val = cpuid_feature_extract_unsigned_field(pfr0, ID_AA64PFR0_EL1_SVE_SHIFT);
return val > 0;
}
static inline bool id_aa64pfr1_sme(u64 pfr1)
{
- u32 val = cpuid_feature_extract_unsigned_field(pfr1, ID_AA64PFR1_SME_SHIFT);
+ u32 val = cpuid_feature_extract_unsigned_field(pfr1, ID_AA64PFR1_EL1_SME_SHIFT);
return val > 0;
}
static inline bool id_aa64pfr1_mte(u64 pfr1)
{
- u32 val = cpuid_feature_extract_unsigned_field(pfr1, ID_AA64PFR1_MTE_SHIFT);
+ u32 val = cpuid_feature_extract_unsigned_field(pfr1, ID_AA64PFR1_EL1_MTE_SHIFT);
- return val >= ID_AA64PFR1_MTE;
+ return val >= ID_AA64PFR1_EL1_MTE_MTE2;
}
void __init setup_cpu_features(void);
@@ -659,7 +659,7 @@ static inline bool supports_csv2p3(int scope)
pfr0 = read_sanitised_ftr_reg(SYS_ID_AA64PFR0_EL1);
csv2_val = cpuid_feature_extract_unsigned_field(pfr0,
- ID_AA64PFR0_CSV2_SHIFT);
+ ID_AA64PFR0_EL1_CSV2_SHIFT);
return csv2_val == 3;
}
@@ -694,10 +694,10 @@ static inline bool system_supports_4kb_granule(void)
mmfr0 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1);
val = cpuid_feature_extract_unsigned_field(mmfr0,
- ID_AA64MMFR0_TGRAN4_SHIFT);
+ ID_AA64MMFR0_EL1_TGRAN4_SHIFT);
- return (val >= ID_AA64MMFR0_TGRAN4_SUPPORTED_MIN) &&
- (val <= ID_AA64MMFR0_TGRAN4_SUPPORTED_MAX);
+ return (val >= ID_AA64MMFR0_EL1_TGRAN4_SUPPORTED_MIN) &&
+ (val <= ID_AA64MMFR0_EL1_TGRAN4_SUPPORTED_MAX);
}
static inline bool system_supports_64kb_granule(void)
@@ -707,10 +707,10 @@ static inline bool system_supports_64kb_granule(void)
mmfr0 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1);
val = cpuid_feature_extract_unsigned_field(mmfr0,
- ID_AA64MMFR0_TGRAN64_SHIFT);
+ ID_AA64MMFR0_EL1_TGRAN64_SHIFT);
- return (val >= ID_AA64MMFR0_TGRAN64_SUPPORTED_MIN) &&
- (val <= ID_AA64MMFR0_TGRAN64_SUPPORTED_MAX);
+ return (val >= ID_AA64MMFR0_EL1_TGRAN64_SUPPORTED_MIN) &&
+ (val <= ID_AA64MMFR0_EL1_TGRAN64_SUPPORTED_MAX);
}
static inline bool system_supports_16kb_granule(void)
@@ -720,10 +720,10 @@ static inline bool system_supports_16kb_granule(void)
mmfr0 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1);
val = cpuid_feature_extract_unsigned_field(mmfr0,
- ID_AA64MMFR0_TGRAN16_SHIFT);
+ ID_AA64MMFR0_EL1_TGRAN16_SHIFT);
- return (val >= ID_AA64MMFR0_TGRAN16_SUPPORTED_MIN) &&
- (val <= ID_AA64MMFR0_TGRAN16_SUPPORTED_MAX);
+ return (val >= ID_AA64MMFR0_EL1_TGRAN16_SUPPORTED_MIN) &&
+ (val <= ID_AA64MMFR0_EL1_TGRAN16_SUPPORTED_MAX);
}
static inline bool system_supports_mixed_endian_el0(void)
@@ -738,7 +738,7 @@ static inline bool system_supports_mixed_endian(void)
mmfr0 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1);
val = cpuid_feature_extract_unsigned_field(mmfr0,
- ID_AA64MMFR0_BIGENDEL_SHIFT);
+ ID_AA64MMFR0_EL1_BIGEND_SHIFT);
return val == 0x1;
}
@@ -840,13 +840,13 @@ extern int do_emulate_mrs(struct pt_regs *regs, u32 sys_reg, u32 rt);
static inline u32 id_aa64mmfr0_parange_to_phys_shift(int parange)
{
switch (parange) {
- case ID_AA64MMFR0_PARANGE_32: return 32;
- case ID_AA64MMFR0_PARANGE_36: return 36;
- case ID_AA64MMFR0_PARANGE_40: return 40;
- case ID_AA64MMFR0_PARANGE_42: return 42;
- case ID_AA64MMFR0_PARANGE_44: return 44;
- case ID_AA64MMFR0_PARANGE_48: return 48;
- case ID_AA64MMFR0_PARANGE_52: return 52;
+ case ID_AA64MMFR0_EL1_PARANGE_32: return 32;
+ case ID_AA64MMFR0_EL1_PARANGE_36: return 36;
+ case ID_AA64MMFR0_EL1_PARANGE_40: return 40;
+ case ID_AA64MMFR0_EL1_PARANGE_42: return 42;
+ case ID_AA64MMFR0_EL1_PARANGE_44: return 44;
+ case ID_AA64MMFR0_EL1_PARANGE_48: return 48;
+ case ID_AA64MMFR0_EL1_PARANGE_52: return 52;
/*
* A future PE could use a value unknown to the kernel.
* However, by the "D10.1.4 Principles of the ID scheme
@@ -868,14 +868,14 @@ static inline bool cpu_has_hw_af(void)
mmfr1 = read_cpuid(ID_AA64MMFR1_EL1);
return cpuid_feature_extract_unsigned_field(mmfr1,
- ID_AA64MMFR1_HADBS_SHIFT);
+ ID_AA64MMFR1_EL1_HAFDBS_SHIFT);
}
static inline bool cpu_has_pan(void)
{
u64 mmfr1 = read_cpuid(ID_AA64MMFR1_EL1);
return cpuid_feature_extract_unsigned_field(mmfr1,
- ID_AA64MMFR1_PAN_SHIFT);
+ ID_AA64MMFR1_EL1_PAN_SHIFT);
}
#ifdef CONFIG_ARM64_AMU_EXTN
@@ -896,8 +896,8 @@ static inline unsigned int get_vmid_bits(u64 mmfr1)
int vmid_bits;
vmid_bits = cpuid_feature_extract_unsigned_field(mmfr1,
- ID_AA64MMFR1_VMIDBITS_SHIFT);
- if (vmid_bits == ID_AA64MMFR1_VMIDBITS_16)
+ ID_AA64MMFR1_EL1_VMIDBits_SHIFT);
+ if (vmid_bits == ID_AA64MMFR1_EL1_VMIDBits_16)
return 16;
/*
diff --git a/arch/arm64/include/asm/el2_setup.h b/arch/arm64/include/asm/el2_setup.h
index 2630faa5bc08..668569adf4d3 100644
--- a/arch/arm64/include/asm/el2_setup.h
+++ b/arch/arm64/include/asm/el2_setup.h
@@ -40,7 +40,7 @@
.macro __init_el2_debug
mrs x1, id_aa64dfr0_el1
- sbfx x0, x1, #ID_AA64DFR0_PMUVER_SHIFT, #4
+ sbfx x0, x1, #ID_AA64DFR0_EL1_PMUVer_SHIFT, #4
cmp x0, #1
b.lt .Lskip_pmu_\@ // Skip if no PMU present
mrs x0, pmcr_el0 // Disable debug access traps
@@ -49,7 +49,7 @@
csel x2, xzr, x0, lt // all PMU counters from EL1
/* Statistical profiling */
- ubfx x0, x1, #ID_AA64DFR0_PMSVER_SHIFT, #4
+ ubfx x0, x1, #ID_AA64DFR0_EL1_PMSVer_SHIFT, #4
cbz x0, .Lskip_spe_\@ // Skip if SPE not present
mrs_s x0, SYS_PMBIDR_EL1 // If SPE available at EL2,
@@ -65,7 +65,7 @@
.Lskip_spe_\@:
/* Trace buffer */
- ubfx x0, x1, #ID_AA64DFR0_TRBE_SHIFT, #4
+ ubfx x0, x1, #ID_AA64DFR0_EL1_TraceBuffer_SHIFT, #4
cbz x0, .Lskip_trace_\@ // Skip if TraceBuffer is not present
mrs_s x0, SYS_TRBIDR_EL1
@@ -83,7 +83,7 @@
/* LORegions */
.macro __init_el2_lor
mrs x1, id_aa64mmfr1_el1
- ubfx x0, x1, #ID_AA64MMFR1_LOR_SHIFT, 4
+ ubfx x0, x1, #ID_AA64MMFR1_EL1_LO_SHIFT, 4
cbz x0, .Lskip_lor_\@
msr_s SYS_LORC_EL1, xzr
.Lskip_lor_\@:
@@ -97,7 +97,7 @@
/* GICv3 system register access */
.macro __init_el2_gicv3
mrs x0, id_aa64pfr0_el1
- ubfx x0, x0, #ID_AA64PFR0_GIC_SHIFT, #4
+ ubfx x0, x0, #ID_AA64PFR0_EL1_GIC_SHIFT, #4
cbz x0, .Lskip_gicv3_\@
mrs_s x0, SYS_ICC_SRE_EL2
@@ -132,12 +132,12 @@
/* Disable any fine grained traps */
.macro __init_el2_fgt
mrs x1, id_aa64mmfr0_el1
- ubfx x1, x1, #ID_AA64MMFR0_FGT_SHIFT, #4
+ ubfx x1, x1, #ID_AA64MMFR0_EL1_FGT_SHIFT, #4
cbz x1, .Lskip_fgt_\@
mov x0, xzr
mrs x1, id_aa64dfr0_el1
- ubfx x1, x1, #ID_AA64DFR0_PMSVER_SHIFT, #4
+ ubfx x1, x1, #ID_AA64DFR0_EL1_PMSVer_SHIFT, #4
cmp x1, #3
b.lt .Lset_debug_fgt_\@
/* Disable PMSNEVFR_EL1 read and write traps */
@@ -149,7 +149,7 @@
mov x0, xzr
mrs x1, id_aa64pfr1_el1
- ubfx x1, x1, #ID_AA64PFR1_SME_SHIFT, #4
+ ubfx x1, x1, #ID_AA64PFR1_EL1_SME_SHIFT, #4
cbz x1, .Lset_fgt_\@
/* Disable nVHE traps of TPIDR2 and SMPRI */
@@ -162,7 +162,7 @@
msr_s SYS_HFGITR_EL2, xzr
mrs x1, id_aa64pfr0_el1 // AMU traps UNDEF without AMU
- ubfx x1, x1, #ID_AA64PFR0_AMU_SHIFT, #4
+ ubfx x1, x1, #ID_AA64PFR0_EL1_AMU_SHIFT, #4
cbz x1, .Lskip_fgt_\@
msr_s SYS_HAFGRTR_EL2, xzr
diff --git a/arch/arm64/include/asm/hw_breakpoint.h b/arch/arm64/include/asm/hw_breakpoint.h
index bc7aaed4b34e..fa4c6ff3aa9b 100644
--- a/arch/arm64/include/asm/hw_breakpoint.h
+++ b/arch/arm64/include/asm/hw_breakpoint.h
@@ -142,7 +142,7 @@ static inline int get_num_brps(void)
u64 dfr0 = read_sanitised_ftr_reg(SYS_ID_AA64DFR0_EL1);
return 1 +
cpuid_feature_extract_unsigned_field(dfr0,
- ID_AA64DFR0_BRPS_SHIFT);
+ ID_AA64DFR0_EL1_BRPs_SHIFT);
}
/* Determine number of WRP registers available. */
@@ -151,7 +151,7 @@ static inline int get_num_wrps(void)
u64 dfr0 = read_sanitised_ftr_reg(SYS_ID_AA64DFR0_EL1);
return 1 +
cpuid_feature_extract_unsigned_field(dfr0,
- ID_AA64DFR0_WRPS_SHIFT);
+ ID_AA64DFR0_EL1_WRPs_SHIFT);
}
#endif /* __ASM_BREAKPOINT_H */
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index e9c9388ccc02..45e2136322ba 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -393,6 +393,7 @@ struct kvm_vcpu_arch {
*/
struct {
u32 mdscr_el1;
+ bool pstate_ss;
} guest_debug_preserved;
/* vcpu power state */
@@ -535,6 +536,9 @@ struct kvm_vcpu_arch {
#define IN_WFIT __vcpu_single_flag(sflags, BIT(3))
/* vcpu system registers loaded on physical CPU */
#define SYSREGS_ON_CPU __vcpu_single_flag(sflags, BIT(4))
+/* Software step state is Active-pending */
+#define DBG_SS_ACTIVE_PENDING __vcpu_single_flag(sflags, BIT(5))
+
/* Pointer to the vcpu's SVE FFR for sve_{save,load}_state() */
#define vcpu_sve_pffr(vcpu) (kern_hyp_va((vcpu)->arch.sve_state) + \
diff --git a/arch/arm64/include/asm/kvm_pgtable.h b/arch/arm64/include/asm/kvm_pgtable.h
index 9f339dffbc1a..1b098bd4cd37 100644
--- a/arch/arm64/include/asm/kvm_pgtable.h
+++ b/arch/arm64/include/asm/kvm_pgtable.h
@@ -16,9 +16,9 @@
static inline u64 kvm_get_parange(u64 mmfr0)
{
u64 parange = cpuid_feature_extract_unsigned_field(mmfr0,
- ID_AA64MMFR0_PARANGE_SHIFT);
- if (parange > ID_AA64MMFR0_PARANGE_MAX)
- parange = ID_AA64MMFR0_PARANGE_MAX;
+ ID_AA64MMFR0_EL1_PARANGE_SHIFT);
+ if (parange > ID_AA64MMFR0_EL1_PARANGE_MAX)
+ parange = ID_AA64MMFR0_EL1_PARANGE_MAX;
return parange;
}
diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
index 818df938a7ad..debc1c0b2b7f 100644
--- a/arch/arm64/include/asm/sysreg.h
+++ b/arch/arm64/include/asm/sysreg.h
@@ -190,19 +190,6 @@
#define SYS_MVFR1_EL1 sys_reg(3, 0, 0, 3, 1)
#define SYS_MVFR2_EL1 sys_reg(3, 0, 0, 3, 2)
-#define SYS_ID_AA64PFR0_EL1 sys_reg(3, 0, 0, 4, 0)
-#define SYS_ID_AA64PFR1_EL1 sys_reg(3, 0, 0, 4, 1)
-
-#define SYS_ID_AA64DFR0_EL1 sys_reg(3, 0, 0, 5, 0)
-#define SYS_ID_AA64DFR1_EL1 sys_reg(3, 0, 0, 5, 1)
-
-#define SYS_ID_AA64AFR0_EL1 sys_reg(3, 0, 0, 5, 4)
-#define SYS_ID_AA64AFR1_EL1 sys_reg(3, 0, 0, 5, 5)
-
-#define SYS_ID_AA64MMFR0_EL1 sys_reg(3, 0, 0, 7, 0)
-#define SYS_ID_AA64MMFR1_EL1 sys_reg(3, 0, 0, 7, 1)
-#define SYS_ID_AA64MMFR2_EL1 sys_reg(3, 0, 0, 7, 2)
-
#define SYS_ACTLR_EL1 sys_reg(3, 0, 1, 0, 1)
#define SYS_RGSR_EL1 sys_reg(3, 0, 1, 0, 5)
#define SYS_GCR_EL1 sys_reg(3, 0, 1, 0, 6)
@@ -436,19 +423,11 @@
#define SYS_ICC_IGRPEN0_EL1 sys_reg(3, 0, 12, 12, 6)
#define SYS_ICC_IGRPEN1_EL1 sys_reg(3, 0, 12, 12, 7)
-#define SYS_TPIDR_EL1 sys_reg(3, 0, 13, 0, 4)
-
-#define SYS_SCXTNUM_EL1 sys_reg(3, 0, 13, 0, 7)
-
#define SYS_CNTKCTL_EL1 sys_reg(3, 0, 14, 1, 0)
#define SYS_CCSIDR_EL1 sys_reg(3, 1, 0, 0, 0)
#define SYS_AIDR_EL1 sys_reg(3, 1, 0, 0, 7)
-#define SMIDR_EL1_IMPLEMENTER_SHIFT 24
-#define SMIDR_EL1_SMPS_SHIFT 15
-#define SMIDR_EL1_AFFINITY_SHIFT 0
-
#define SYS_RNDR_EL0 sys_reg(3, 3, 2, 4, 0)
#define SYS_RNDRRS_EL0 sys_reg(3, 3, 2, 4, 1)
@@ -537,7 +516,6 @@
#define SYS_HFGWTR_EL2 sys_reg(3, 4, 1, 1, 5)
#define SYS_HFGITR_EL2 sys_reg(3, 4, 1, 1, 6)
#define SYS_TRFCR_EL2 sys_reg(3, 4, 1, 2, 1)
-#define SYS_HCRX_EL2 sys_reg(3, 4, 1, 2, 2)
#define SYS_HDFGRTR_EL2 sys_reg(3, 4, 3, 1, 4)
#define SYS_HDFGWTR_EL2 sys_reg(3, 4, 3, 1, 5)
#define SYS_HAFGRTR_EL2 sys_reg(3, 4, 3, 1, 6)
@@ -690,164 +668,30 @@
#define MAIR_ATTRIDX(attr, idx) ((attr) << ((idx) * 8))
/* id_aa64pfr0 */
-#define ID_AA64PFR0_CSV3_SHIFT 60
-#define ID_AA64PFR0_CSV2_SHIFT 56
-#define ID_AA64PFR0_DIT_SHIFT 48
-#define ID_AA64PFR0_AMU_SHIFT 44
-#define ID_AA64PFR0_MPAM_SHIFT 40
-#define ID_AA64PFR0_SEL2_SHIFT 36
-#define ID_AA64PFR0_SVE_SHIFT 32
-#define ID_AA64PFR0_RAS_SHIFT 28
-#define ID_AA64PFR0_GIC_SHIFT 24
-#define ID_AA64PFR0_ASIMD_SHIFT 20
-#define ID_AA64PFR0_FP_SHIFT 16
-#define ID_AA64PFR0_EL3_SHIFT 12
-#define ID_AA64PFR0_EL2_SHIFT 8
-#define ID_AA64PFR0_EL1_SHIFT 4
-#define ID_AA64PFR0_EL0_SHIFT 0
-
-#define ID_AA64PFR0_AMU 0x1
-#define ID_AA64PFR0_SVE 0x1
-#define ID_AA64PFR0_RAS_V1 0x1
-#define ID_AA64PFR0_RAS_V1P1 0x2
-#define ID_AA64PFR0_FP_NI 0xf
-#define ID_AA64PFR0_FP_SUPPORTED 0x0
-#define ID_AA64PFR0_ASIMD_NI 0xf
-#define ID_AA64PFR0_ASIMD_SUPPORTED 0x0
-#define ID_AA64PFR0_ELx_64BIT_ONLY 0x1
-#define ID_AA64PFR0_ELx_32BIT_64BIT 0x2
-
-/* id_aa64pfr1 */
-#define ID_AA64PFR1_SME_SHIFT 24
-#define ID_AA64PFR1_MPAMFRAC_SHIFT 16
-#define ID_AA64PFR1_RASFRAC_SHIFT 12
-#define ID_AA64PFR1_MTE_SHIFT 8
-#define ID_AA64PFR1_SSBS_SHIFT 4
-#define ID_AA64PFR1_BT_SHIFT 0
-
-#define ID_AA64PFR1_SSBS_PSTATE_NI 0
-#define ID_AA64PFR1_SSBS_PSTATE_ONLY 1
-#define ID_AA64PFR1_SSBS_PSTATE_INSNS 2
-#define ID_AA64PFR1_BT_BTI 0x1
-#define ID_AA64PFR1_SME 1
-
-#define ID_AA64PFR1_MTE_NI 0x0
-#define ID_AA64PFR1_MTE_EL0 0x1
-#define ID_AA64PFR1_MTE 0x2
-#define ID_AA64PFR1_MTE_ASYMM 0x3
+#define ID_AA64PFR0_EL1_ELx_64BIT_ONLY 0x1
+#define ID_AA64PFR0_EL1_ELx_32BIT_64BIT 0x2
/* id_aa64mmfr0 */
-#define ID_AA64MMFR0_ECV_SHIFT 60
-#define ID_AA64MMFR0_FGT_SHIFT 56
-#define ID_AA64MMFR0_EXS_SHIFT 44
-#define ID_AA64MMFR0_TGRAN4_2_SHIFT 40
-#define ID_AA64MMFR0_TGRAN64_2_SHIFT 36
-#define ID_AA64MMFR0_TGRAN16_2_SHIFT 32
-#define ID_AA64MMFR0_TGRAN4_SHIFT 28
-#define ID_AA64MMFR0_TGRAN64_SHIFT 24
-#define ID_AA64MMFR0_TGRAN16_SHIFT 20
-#define ID_AA64MMFR0_BIGENDEL0_SHIFT 16
-#define ID_AA64MMFR0_SNSMEM_SHIFT 12
-#define ID_AA64MMFR0_BIGENDEL_SHIFT 8
-#define ID_AA64MMFR0_ASID_SHIFT 4
-#define ID_AA64MMFR0_PARANGE_SHIFT 0
-
-#define ID_AA64MMFR0_ASID_8 0x0
-#define ID_AA64MMFR0_ASID_16 0x2
-
-#define ID_AA64MMFR0_TGRAN4_NI 0xf
-#define ID_AA64MMFR0_TGRAN4_SUPPORTED_MIN 0x0
-#define ID_AA64MMFR0_TGRAN4_SUPPORTED_MAX 0x7
-#define ID_AA64MMFR0_TGRAN64_NI 0xf
-#define ID_AA64MMFR0_TGRAN64_SUPPORTED_MIN 0x0
-#define ID_AA64MMFR0_TGRAN64_SUPPORTED_MAX 0x7
-#define ID_AA64MMFR0_TGRAN16_NI 0x0
-#define ID_AA64MMFR0_TGRAN16_SUPPORTED_MIN 0x1
-#define ID_AA64MMFR0_TGRAN16_SUPPORTED_MAX 0xf
-
-#define ID_AA64MMFR0_PARANGE_32 0x0
-#define ID_AA64MMFR0_PARANGE_36 0x1
-#define ID_AA64MMFR0_PARANGE_40 0x2
-#define ID_AA64MMFR0_PARANGE_42 0x3
-#define ID_AA64MMFR0_PARANGE_44 0x4
-#define ID_AA64MMFR0_PARANGE_48 0x5
-#define ID_AA64MMFR0_PARANGE_52 0x6
+#define ID_AA64MMFR0_EL1_TGRAN4_SUPPORTED_MIN 0x0
+#define ID_AA64MMFR0_EL1_TGRAN4_SUPPORTED_MAX 0x7
+#define ID_AA64MMFR0_EL1_TGRAN64_SUPPORTED_MIN 0x0
+#define ID_AA64MMFR0_EL1_TGRAN64_SUPPORTED_MAX 0x7
+#define ID_AA64MMFR0_EL1_TGRAN16_SUPPORTED_MIN 0x1
+#define ID_AA64MMFR0_EL1_TGRAN16_SUPPORTED_MAX 0xf
#define ARM64_MIN_PARANGE_BITS 32
-#define ID_AA64MMFR0_TGRAN_2_SUPPORTED_DEFAULT 0x0
-#define ID_AA64MMFR0_TGRAN_2_SUPPORTED_NONE 0x1
-#define ID_AA64MMFR0_TGRAN_2_SUPPORTED_MIN 0x2
-#define ID_AA64MMFR0_TGRAN_2_SUPPORTED_MAX 0x7
+#define ID_AA64MMFR0_EL1_TGRAN_2_SUPPORTED_DEFAULT 0x0
+#define ID_AA64MMFR0_EL1_TGRAN_2_SUPPORTED_NONE 0x1
+#define ID_AA64MMFR0_EL1_TGRAN_2_SUPPORTED_MIN 0x2
+#define ID_AA64MMFR0_EL1_TGRAN_2_SUPPORTED_MAX 0x7
#ifdef CONFIG_ARM64_PA_BITS_52
-#define ID_AA64MMFR0_PARANGE_MAX ID_AA64MMFR0_PARANGE_52
+#define ID_AA64MMFR0_EL1_PARANGE_MAX ID_AA64MMFR0_EL1_PARANGE_52
#else
-#define ID_AA64MMFR0_PARANGE_MAX ID_AA64MMFR0_PARANGE_48
+#define ID_AA64MMFR0_EL1_PARANGE_MAX ID_AA64MMFR0_EL1_PARANGE_48
#endif
-/* id_aa64mmfr1 */
-#define ID_AA64MMFR1_ECBHB_SHIFT 60
-#define ID_AA64MMFR1_TIDCP1_SHIFT 52
-#define ID_AA64MMFR1_HCX_SHIFT 40
-#define ID_AA64MMFR1_AFP_SHIFT 44
-#define ID_AA64MMFR1_ETS_SHIFT 36
-#define ID_AA64MMFR1_TWED_SHIFT 32
-#define ID_AA64MMFR1_XNX_SHIFT 28
-#define ID_AA64MMFR1_SPECSEI_SHIFT 24
-#define ID_AA64MMFR1_PAN_SHIFT 20
-#define ID_AA64MMFR1_LOR_SHIFT 16
-#define ID_AA64MMFR1_HPD_SHIFT 12
-#define ID_AA64MMFR1_VHE_SHIFT 8
-#define ID_AA64MMFR1_VMIDBITS_SHIFT 4
-#define ID_AA64MMFR1_HADBS_SHIFT 0
-
-#define ID_AA64MMFR1_VMIDBITS_8 0
-#define ID_AA64MMFR1_VMIDBITS_16 2
-
-#define ID_AA64MMFR1_TIDCP1_NI 0
-#define ID_AA64MMFR1_TIDCP1_IMP 1
-
-/* id_aa64mmfr2 */
-#define ID_AA64MMFR2_E0PD_SHIFT 60
-#define ID_AA64MMFR2_EVT_SHIFT 56
-#define ID_AA64MMFR2_BBM_SHIFT 52
-#define ID_AA64MMFR2_TTL_SHIFT 48
-#define ID_AA64MMFR2_FWB_SHIFT 40
-#define ID_AA64MMFR2_IDS_SHIFT 36
-#define ID_AA64MMFR2_AT_SHIFT 32
-#define ID_AA64MMFR2_ST_SHIFT 28
-#define ID_AA64MMFR2_NV_SHIFT 24
-#define ID_AA64MMFR2_CCIDX_SHIFT 20
-#define ID_AA64MMFR2_LVA_SHIFT 16
-#define ID_AA64MMFR2_IESB_SHIFT 12
-#define ID_AA64MMFR2_LSM_SHIFT 8
-#define ID_AA64MMFR2_UAO_SHIFT 4
-#define ID_AA64MMFR2_CNP_SHIFT 0
-
-/* id_aa64dfr0 */
-#define ID_AA64DFR0_MTPMU_SHIFT 48
-#define ID_AA64DFR0_TRBE_SHIFT 44
-#define ID_AA64DFR0_TRACE_FILT_SHIFT 40
-#define ID_AA64DFR0_DOUBLELOCK_SHIFT 36
-#define ID_AA64DFR0_PMSVER_SHIFT 32
-#define ID_AA64DFR0_CTX_CMPS_SHIFT 28
-#define ID_AA64DFR0_WRPS_SHIFT 20
-#define ID_AA64DFR0_BRPS_SHIFT 12
-#define ID_AA64DFR0_PMUVER_SHIFT 8
-#define ID_AA64DFR0_TRACEVER_SHIFT 4
-#define ID_AA64DFR0_DEBUGVER_SHIFT 0
-
-#define ID_AA64DFR0_PMUVER_8_0 0x1
-#define ID_AA64DFR0_PMUVER_8_1 0x4
-#define ID_AA64DFR0_PMUVER_8_4 0x5
-#define ID_AA64DFR0_PMUVER_8_5 0x6
-#define ID_AA64DFR0_PMUVER_8_7 0x7
-#define ID_AA64DFR0_PMUVER_IMP_DEF 0xf
-
-#define ID_AA64DFR0_PMSVER_8_2 0x1
-#define ID_AA64DFR0_PMSVER_8_3 0x2
-
#define ID_DFR0_PERFMON_SHIFT 24
#define ID_DFR0_PERFMON_8_0 0x3
@@ -955,20 +799,20 @@
#define ID_PFR1_PROGMOD_SHIFT 0
#if defined(CONFIG_ARM64_4K_PAGES)
-#define ID_AA64MMFR0_TGRAN_SHIFT ID_AA64MMFR0_TGRAN4_SHIFT
-#define ID_AA64MMFR0_TGRAN_SUPPORTED_MIN ID_AA64MMFR0_TGRAN4_SUPPORTED_MIN
-#define ID_AA64MMFR0_TGRAN_SUPPORTED_MAX ID_AA64MMFR0_TGRAN4_SUPPORTED_MAX
-#define ID_AA64MMFR0_TGRAN_2_SHIFT ID_AA64MMFR0_TGRAN4_2_SHIFT
+#define ID_AA64MMFR0_EL1_TGRAN_SHIFT ID_AA64MMFR0_EL1_TGRAN4_SHIFT
+#define ID_AA64MMFR0_EL1_TGRAN_SUPPORTED_MIN ID_AA64MMFR0_EL1_TGRAN4_SUPPORTED_MIN
+#define ID_AA64MMFR0_EL1_TGRAN_SUPPORTED_MAX ID_AA64MMFR0_EL1_TGRAN4_SUPPORTED_MAX
+#define ID_AA64MMFR0_EL1_TGRAN_2_SHIFT ID_AA64MMFR0_EL1_TGRAN4_2_SHIFT
#elif defined(CONFIG_ARM64_16K_PAGES)
-#define ID_AA64MMFR0_TGRAN_SHIFT ID_AA64MMFR0_TGRAN16_SHIFT
-#define ID_AA64MMFR0_TGRAN_SUPPORTED_MIN ID_AA64MMFR0_TGRAN16_SUPPORTED_MIN
-#define ID_AA64MMFR0_TGRAN_SUPPORTED_MAX ID_AA64MMFR0_TGRAN16_SUPPORTED_MAX
-#define ID_AA64MMFR0_TGRAN_2_SHIFT ID_AA64MMFR0_TGRAN16_2_SHIFT
+#define ID_AA64MMFR0_EL1_TGRAN_SHIFT ID_AA64MMFR0_EL1_TGRAN16_SHIFT
+#define ID_AA64MMFR0_EL1_TGRAN_SUPPORTED_MIN ID_AA64MMFR0_EL1_TGRAN16_SUPPORTED_MIN
+#define ID_AA64MMFR0_EL1_TGRAN_SUPPORTED_MAX ID_AA64MMFR0_EL1_TGRAN16_SUPPORTED_MAX
+#define ID_AA64MMFR0_EL1_TGRAN_2_SHIFT ID_AA64MMFR0_EL1_TGRAN16_2_SHIFT
#elif defined(CONFIG_ARM64_64K_PAGES)
-#define ID_AA64MMFR0_TGRAN_SHIFT ID_AA64MMFR0_TGRAN64_SHIFT
-#define ID_AA64MMFR0_TGRAN_SUPPORTED_MIN ID_AA64MMFR0_TGRAN64_SUPPORTED_MIN
-#define ID_AA64MMFR0_TGRAN_SUPPORTED_MAX ID_AA64MMFR0_TGRAN64_SUPPORTED_MAX
-#define ID_AA64MMFR0_TGRAN_2_SHIFT ID_AA64MMFR0_TGRAN64_2_SHIFT
+#define ID_AA64MMFR0_EL1_TGRAN_SHIFT ID_AA64MMFR0_EL1_TGRAN64_SHIFT
+#define ID_AA64MMFR0_EL1_TGRAN_SUPPORTED_MIN ID_AA64MMFR0_EL1_TGRAN64_SUPPORTED_MIN
+#define ID_AA64MMFR0_EL1_TGRAN_SUPPORTED_MAX ID_AA64MMFR0_EL1_TGRAN64_SUPPORTED_MAX
+#define ID_AA64MMFR0_EL1_TGRAN_2_SHIFT ID_AA64MMFR0_EL1_TGRAN64_2_SHIFT
#endif
#define MVFR2_FPMISC_SHIFT 4
@@ -1028,9 +872,6 @@
#define TRFCR_ELx_ExTRE BIT(1)
#define TRFCR_ELx_E0TRE BIT(0)
-/* HCRX_EL2 definitions */
-#define HCRX_EL2_SMPME_MASK (1 << 5)
-
/* GIC Hypervisor interface registers */
/* ICH_MISR_EL2 bit definitions */
#define ICH_MISR_EOI (1 << 0)
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index af4de817d712..c22732a6908b 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -243,35 +243,35 @@ static const struct arm64_ftr_bits ftr_id_aa64isar2[] = {
};
static const struct arm64_ftr_bits ftr_id_aa64pfr0[] = {
- ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR0_CSV3_SHIFT, 4, 0),
- ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR0_CSV2_SHIFT, 4, 0),
- ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_DIT_SHIFT, 4, 0),
- ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR0_AMU_SHIFT, 4, 0),
- ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_MPAM_SHIFT, 4, 0),
- ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR0_SEL2_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR0_EL1_CSV3_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR0_EL1_CSV2_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_EL1_DIT_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR0_EL1_AMU_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_EL1_MPAM_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR0_EL1_SEL2_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SVE),
- FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_SVE_SHIFT, 4, 0),
- ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_RAS_SHIFT, 4, 0),
- ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_GIC_SHIFT, 4, 0),
- S_ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_ASIMD_SHIFT, 4, ID_AA64PFR0_ASIMD_NI),
- S_ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_FP_SHIFT, 4, ID_AA64PFR0_FP_NI),
- ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR0_EL3_SHIFT, 4, 0),
- ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR0_EL2_SHIFT, 4, 0),
- ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR0_EL1_SHIFT, 4, ID_AA64PFR0_ELx_64BIT_ONLY),
- ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR0_EL0_SHIFT, 4, ID_AA64PFR0_ELx_64BIT_ONLY),
+ FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_EL1_SVE_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_EL1_RAS_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_EL1_GIC_SHIFT, 4, 0),
+ S_ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_EL1_AdvSIMD_SHIFT, 4, ID_AA64PFR0_EL1_AdvSIMD_NI),
+ S_ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_EL1_FP_SHIFT, 4, ID_AA64PFR0_EL1_FP_NI),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR0_EL1_EL3_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR0_EL1_EL2_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR0_EL1_EL1_SHIFT, 4, ID_AA64PFR0_EL1_ELx_64BIT_ONLY),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR0_EL1_EL0_SHIFT, 4, ID_AA64PFR0_EL1_ELx_64BIT_ONLY),
ARM64_FTR_END,
};
static const struct arm64_ftr_bits ftr_id_aa64pfr1[] = {
ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME),
- FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR1_SME_SHIFT, 4, 0),
- ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR1_MPAMFRAC_SHIFT, 4, 0),
- ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR1_RASFRAC_SHIFT, 4, 0),
+ FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR1_EL1_SME_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR1_EL1_MPAM_frac_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR1_EL1_RAS_frac_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_MTE),
- FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR1_MTE_SHIFT, 4, ID_AA64PFR1_MTE_NI),
- ARM64_FTR_BITS(FTR_VISIBLE, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR1_SSBS_SHIFT, 4, ID_AA64PFR1_SSBS_PSTATE_NI),
+ FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR1_EL1_MTE_SHIFT, 4, ID_AA64PFR1_EL1_MTE_NI),
+ ARM64_FTR_BITS(FTR_VISIBLE, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR1_EL1_SSBS_SHIFT, 4, ID_AA64PFR1_EL1_SSBS_NI),
ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_BTI),
- FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR1_BT_SHIFT, 4, 0),
+ FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR1_EL1_BT_SHIFT, 4, 0),
ARM64_FTR_END,
};
@@ -316,9 +316,9 @@ static const struct arm64_ftr_bits ftr_id_aa64smfr0[] = {
};
static const struct arm64_ftr_bits ftr_id_aa64mmfr0[] = {
- ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_ECV_SHIFT, 4, 0),
- ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_FGT_SHIFT, 4, 0),
- ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_EXS_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_EL1_ECV_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_EL1_FGT_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_EL1_EXS_SHIFT, 4, 0),
/*
* Page size not being supported at Stage-2 is not fatal. You
* just give up KVM if PAGE_SIZE isn't supported there. Go fix
@@ -334,9 +334,9 @@ static const struct arm64_ftr_bits ftr_id_aa64mmfr0[] = {
* fields are inconsistent across vCPUs, then it isn't worth
* trying to bring KVM up.
*/
- ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_EXACT, ID_AA64MMFR0_TGRAN4_2_SHIFT, 4, 1),
- ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_EXACT, ID_AA64MMFR0_TGRAN64_2_SHIFT, 4, 1),
- ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_EXACT, ID_AA64MMFR0_TGRAN16_2_SHIFT, 4, 1),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_EXACT, ID_AA64MMFR0_EL1_TGRAN4_2_SHIFT, 4, 1),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_EXACT, ID_AA64MMFR0_EL1_TGRAN64_2_SHIFT, 4, 1),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_EXACT, ID_AA64MMFR0_EL1_TGRAN16_2_SHIFT, 4, 1),
/*
* We already refuse to boot CPUs that don't support our configured
* page size, so we can only detect mismatches for a page size other
@@ -344,55 +344,55 @@ static const struct arm64_ftr_bits ftr_id_aa64mmfr0[] = {
* exist in the wild so, even though we don't like it, we'll have to go
* along with it and treat them as non-strict.
*/
- S_ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_TGRAN4_SHIFT, 4, ID_AA64MMFR0_TGRAN4_NI),
- S_ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_TGRAN64_SHIFT, 4, ID_AA64MMFR0_TGRAN64_NI),
- ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_TGRAN16_SHIFT, 4, ID_AA64MMFR0_TGRAN16_NI),
+ S_ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_EL1_TGRAN4_SHIFT, 4, ID_AA64MMFR0_EL1_TGRAN4_NI),
+ S_ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_EL1_TGRAN64_SHIFT, 4, ID_AA64MMFR0_EL1_TGRAN64_NI),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_EL1_TGRAN16_SHIFT, 4, ID_AA64MMFR0_EL1_TGRAN16_NI),
- ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_BIGENDEL0_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_EL1_BIGENDEL0_SHIFT, 4, 0),
/* Linux shouldn't care about secure memory */
- ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_SNSMEM_SHIFT, 4, 0),
- ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_BIGENDEL_SHIFT, 4, 0),
- ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_ASID_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_EL1_SNSMEM_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_EL1_BIGEND_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_EL1_ASIDBITS_SHIFT, 4, 0),
/*
* Differing PARange is fine as long as all peripherals and memory are mapped
* within the minimum PARange of all CPUs
*/
- ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_PARANGE_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_EL1_PARANGE_SHIFT, 4, 0),
ARM64_FTR_END,
};
static const struct arm64_ftr_bits ftr_id_aa64mmfr1[] = {
- ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_TIDCP1_SHIFT, 4, 0),
- ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_AFP_SHIFT, 4, 0),
- ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_ETS_SHIFT, 4, 0),
- ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_TWED_SHIFT, 4, 0),
- ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_XNX_SHIFT, 4, 0),
- ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_HIGHER_SAFE, ID_AA64MMFR1_SPECSEI_SHIFT, 4, 0),
- ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_PAN_SHIFT, 4, 0),
- ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_LOR_SHIFT, 4, 0),
- ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_HPD_SHIFT, 4, 0),
- ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_VHE_SHIFT, 4, 0),
- ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_VMIDBITS_SHIFT, 4, 0),
- ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_HADBS_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_EL1_TIDCP1_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_EL1_AFP_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_EL1_ETS_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_EL1_TWED_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_EL1_XNX_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_HIGHER_SAFE, ID_AA64MMFR1_EL1_SpecSEI_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_EL1_PAN_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_EL1_LO_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_EL1_HPDS_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_EL1_VH_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_EL1_VMIDBits_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_EL1_HAFDBS_SHIFT, 4, 0),
ARM64_FTR_END,
};
static const struct arm64_ftr_bits ftr_id_aa64mmfr2[] = {
- ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_E0PD_SHIFT, 4, 0),
- ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_EVT_SHIFT, 4, 0),
- ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_BBM_SHIFT, 4, 0),
- ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_TTL_SHIFT, 4, 0),
- ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_FWB_SHIFT, 4, 0),
- ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_IDS_SHIFT, 4, 0),
- ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_AT_SHIFT, 4, 0),
- ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_ST_SHIFT, 4, 0),
- ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_NV_SHIFT, 4, 0),
- ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_CCIDX_SHIFT, 4, 0),
- ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_LVA_SHIFT, 4, 0),
- ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_IESB_SHIFT, 4, 0),
- ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_LSM_SHIFT, 4, 0),
- ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_UAO_SHIFT, 4, 0),
- ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_CNP_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_EL1_E0PD_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_EL1_EVT_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_EL1_BBM_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_EL1_TTL_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_EL1_FWB_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_EL1_IDS_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_EL1_AT_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_EL1_ST_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_EL1_NV_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_EL1_CCIDX_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_EL1_VARange_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_EL1_IESB_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_EL1_LSM_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_EL1_UAO_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_EL1_CnP_SHIFT, 4, 0),
ARM64_FTR_END,
};
@@ -434,17 +434,17 @@ static const struct arm64_ftr_bits ftr_id_mmfr0[] = {
};
static const struct arm64_ftr_bits ftr_id_aa64dfr0[] = {
- S_ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64DFR0_DOUBLELOCK_SHIFT, 4, 0),
- ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64DFR0_PMSVER_SHIFT, 4, 0),
- ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64DFR0_CTX_CMPS_SHIFT, 4, 0),
- ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64DFR0_WRPS_SHIFT, 4, 0),
- ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64DFR0_BRPS_SHIFT, 4, 0),
+ S_ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64DFR0_EL1_DoubleLock_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64DFR0_EL1_PMSVer_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64DFR0_EL1_CTX_CMPs_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64DFR0_EL1_WRPs_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64DFR0_EL1_BRPs_SHIFT, 4, 0),
/*
* We can instantiate multiple PMU instances with different levels
* of support.
*/
- S_ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_EXACT, ID_AA64DFR0_PMUVER_SHIFT, 4, 0),
- ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, ID_AA64DFR0_DEBUGVER_SHIFT, 4, 0x6),
+ S_ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_EXACT, ID_AA64DFR0_EL1_PMUVer_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, ID_AA64DFR0_EL1_DebugVer_SHIFT, 4, 0x6),
ARM64_FTR_END,
};
@@ -1492,7 +1492,7 @@ static bool has_no_fpsimd(const struct arm64_cpu_capabilities *entry, int __unus
u64 pfr0 = read_sanitised_ftr_reg(SYS_ID_AA64PFR0_EL1);
return cpuid_feature_extract_signed_field(pfr0,
- ID_AA64PFR0_FP_SHIFT) < 0;
+ ID_AA64PFR0_EL1_FP_SHIFT) < 0;
}
static bool has_cache_idc(const struct arm64_cpu_capabilities *entry,
@@ -1571,7 +1571,7 @@ bool kaslr_requires_kpti(void)
if (IS_ENABLED(CONFIG_ARM64_E0PD)) {
u64 mmfr2 = read_sysreg_s(SYS_ID_AA64MMFR2_EL1);
if (cpuid_feature_extract_unsigned_field(mmfr2,
- ID_AA64MMFR2_E0PD_SHIFT))
+ ID_AA64MMFR2_EL1_E0PD_SHIFT))
return false;
}
@@ -2093,7 +2093,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
.type = ARM64_CPUCAP_STRICT_BOOT_CPU_FEATURE,
.matches = has_useable_gicv3_cpuif,
.sys_reg = SYS_ID_AA64PFR0_EL1,
- .field_pos = ID_AA64PFR0_GIC_SHIFT,
+ .field_pos = ID_AA64PFR0_EL1_GIC_SHIFT,
.field_width = 4,
.sign = FTR_UNSIGNED,
.min_field_value = 1,
@@ -2104,7 +2104,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
.type = ARM64_CPUCAP_SYSTEM_FEATURE,
.matches = has_cpuid_feature,
.sys_reg = SYS_ID_AA64MMFR0_EL1,
- .field_pos = ID_AA64MMFR0_ECV_SHIFT,
+ .field_pos = ID_AA64MMFR0_EL1_ECV_SHIFT,
.field_width = 4,
.sign = FTR_UNSIGNED,
.min_field_value = 1,
@@ -2116,7 +2116,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
.type = ARM64_CPUCAP_SYSTEM_FEATURE,
.matches = has_cpuid_feature,
.sys_reg = SYS_ID_AA64MMFR1_EL1,
- .field_pos = ID_AA64MMFR1_PAN_SHIFT,
+ .field_pos = ID_AA64MMFR1_EL1_PAN_SHIFT,
.field_width = 4,
.sign = FTR_UNSIGNED,
.min_field_value = 1,
@@ -2130,7 +2130,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
.type = ARM64_CPUCAP_SYSTEM_FEATURE,
.matches = has_cpuid_feature,
.sys_reg = SYS_ID_AA64MMFR1_EL1,
- .field_pos = ID_AA64MMFR1_PAN_SHIFT,
+ .field_pos = ID_AA64MMFR1_EL1_PAN_SHIFT,
.field_width = 4,
.sign = FTR_UNSIGNED,
.min_field_value = 3,
@@ -2168,9 +2168,9 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
.matches = has_32bit_el0,
.sys_reg = SYS_ID_AA64PFR0_EL1,
.sign = FTR_UNSIGNED,
- .field_pos = ID_AA64PFR0_EL0_SHIFT,
+ .field_pos = ID_AA64PFR0_EL1_EL0_SHIFT,
.field_width = 4,
- .min_field_value = ID_AA64PFR0_ELx_32BIT_64BIT,
+ .min_field_value = ID_AA64PFR0_EL1_ELx_32BIT_64BIT,
},
#ifdef CONFIG_KVM
{
@@ -2180,9 +2180,9 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
.matches = has_cpuid_feature,
.sys_reg = SYS_ID_AA64PFR0_EL1,
.sign = FTR_UNSIGNED,
- .field_pos = ID_AA64PFR0_EL1_SHIFT,
+ .field_pos = ID_AA64PFR0_EL1_EL1_SHIFT,
.field_width = 4,
- .min_field_value = ID_AA64PFR0_ELx_32BIT_64BIT,
+ .min_field_value = ID_AA64PFR0_EL1_ELx_32BIT_64BIT,
},
{
.desc = "Protected KVM",
@@ -2201,7 +2201,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
* more details.
*/
.sys_reg = SYS_ID_AA64PFR0_EL1,
- .field_pos = ID_AA64PFR0_CSV3_SHIFT,
+ .field_pos = ID_AA64PFR0_EL1_CSV3_SHIFT,
.field_width = 4,
.min_field_value = 1,
.matches = unmap_kernel_at_el0,
@@ -2244,9 +2244,9 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
.capability = ARM64_SVE,
.sys_reg = SYS_ID_AA64PFR0_EL1,
.sign = FTR_UNSIGNED,
- .field_pos = ID_AA64PFR0_SVE_SHIFT,
+ .field_pos = ID_AA64PFR0_EL1_SVE_SHIFT,
.field_width = 4,
- .min_field_value = ID_AA64PFR0_SVE,
+ .min_field_value = ID_AA64PFR0_EL1_SVE_IMP,
.matches = has_cpuid_feature,
.cpu_enable = sve_kernel_enable,
},
@@ -2259,9 +2259,9 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
.matches = has_cpuid_feature,
.sys_reg = SYS_ID_AA64PFR0_EL1,
.sign = FTR_UNSIGNED,
- .field_pos = ID_AA64PFR0_RAS_SHIFT,
+ .field_pos = ID_AA64PFR0_EL1_RAS_SHIFT,
.field_width = 4,
- .min_field_value = ID_AA64PFR0_RAS_V1,
+ .min_field_value = ID_AA64PFR0_EL1_RAS_IMP,
.cpu_enable = cpu_clear_disr,
},
#endif /* CONFIG_ARM64_RAS_EXTN */
@@ -2278,9 +2278,9 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
.matches = has_amu,
.sys_reg = SYS_ID_AA64PFR0_EL1,
.sign = FTR_UNSIGNED,
- .field_pos = ID_AA64PFR0_AMU_SHIFT,
+ .field_pos = ID_AA64PFR0_EL1_AMU_SHIFT,
.field_width = 4,
- .min_field_value = ID_AA64PFR0_AMU,
+ .min_field_value = ID_AA64PFR0_EL1_AMU_IMP,
.cpu_enable = cpu_amu_enable,
},
#endif /* CONFIG_ARM64_AMU_EXTN */
@@ -2303,7 +2303,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
.capability = ARM64_HAS_STAGE2_FWB,
.sys_reg = SYS_ID_AA64MMFR2_EL1,
.sign = FTR_UNSIGNED,
- .field_pos = ID_AA64MMFR2_FWB_SHIFT,
+ .field_pos = ID_AA64MMFR2_EL1_FWB_SHIFT,
.field_width = 4,
.min_field_value = 1,
.matches = has_cpuid_feature,
@@ -2314,7 +2314,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
.capability = ARM64_HAS_ARMv8_4_TTL,
.sys_reg = SYS_ID_AA64MMFR2_EL1,
.sign = FTR_UNSIGNED,
- .field_pos = ID_AA64MMFR2_TTL_SHIFT,
+ .field_pos = ID_AA64MMFR2_EL1_TTL_SHIFT,
.field_width = 4,
.min_field_value = 1,
.matches = has_cpuid_feature,
@@ -2344,7 +2344,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
.capability = ARM64_HW_DBM,
.sys_reg = SYS_ID_AA64MMFR1_EL1,
.sign = FTR_UNSIGNED,
- .field_pos = ID_AA64MMFR1_HADBS_SHIFT,
+ .field_pos = ID_AA64MMFR1_EL1_HAFDBS_SHIFT,
.field_width = 4,
.min_field_value = 2,
.matches = has_hw_dbm,
@@ -2367,10 +2367,10 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
.type = ARM64_CPUCAP_SYSTEM_FEATURE,
.matches = has_cpuid_feature,
.sys_reg = SYS_ID_AA64PFR1_EL1,
- .field_pos = ID_AA64PFR1_SSBS_SHIFT,
+ .field_pos = ID_AA64PFR1_EL1_SSBS_SHIFT,
.field_width = 4,
.sign = FTR_UNSIGNED,
- .min_field_value = ID_AA64PFR1_SSBS_PSTATE_ONLY,
+ .min_field_value = ID_AA64PFR1_EL1_SSBS_IMP,
},
#ifdef CONFIG_ARM64_CNP
{
@@ -2380,7 +2380,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
.matches = has_useable_cnp,
.sys_reg = SYS_ID_AA64MMFR2_EL1,
.sign = FTR_UNSIGNED,
- .field_pos = ID_AA64MMFR2_CNP_SHIFT,
+ .field_pos = ID_AA64MMFR2_EL1_CnP_SHIFT,
.field_width = 4,
.min_field_value = 1,
.cpu_enable = cpu_enable_cnp,
@@ -2485,7 +2485,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
.type = ARM64_CPUCAP_STRICT_BOOT_CPU_FEATURE,
.matches = can_use_gic_priorities,
.sys_reg = SYS_ID_AA64PFR0_EL1,
- .field_pos = ID_AA64PFR0_GIC_SHIFT,
+ .field_pos = ID_AA64PFR0_EL1_GIC_SHIFT,
.field_width = 4,
.sign = FTR_UNSIGNED,
.min_field_value = 1,
@@ -2499,7 +2499,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
.sys_reg = SYS_ID_AA64MMFR2_EL1,
.sign = FTR_UNSIGNED,
.field_width = 4,
- .field_pos = ID_AA64MMFR2_E0PD_SHIFT,
+ .field_pos = ID_AA64MMFR2_EL1_E0PD_SHIFT,
.matches = has_cpuid_feature,
.min_field_value = 1,
.cpu_enable = cpu_enable_e0pd,
@@ -2528,9 +2528,9 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
.matches = has_cpuid_feature,
.cpu_enable = bti_enable,
.sys_reg = SYS_ID_AA64PFR1_EL1,
- .field_pos = ID_AA64PFR1_BT_SHIFT,
+ .field_pos = ID_AA64PFR1_EL1_BT_SHIFT,
.field_width = 4,
- .min_field_value = ID_AA64PFR1_BT_BTI,
+ .min_field_value = ID_AA64PFR1_EL1_BT_IMP,
.sign = FTR_UNSIGNED,
},
#endif
@@ -2541,9 +2541,9 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
.type = ARM64_CPUCAP_STRICT_BOOT_CPU_FEATURE,
.matches = has_cpuid_feature,
.sys_reg = SYS_ID_AA64PFR1_EL1,
- .field_pos = ID_AA64PFR1_MTE_SHIFT,
+ .field_pos = ID_AA64PFR1_EL1_MTE_SHIFT,
.field_width = 4,
- .min_field_value = ID_AA64PFR1_MTE,
+ .min_field_value = ID_AA64PFR1_EL1_MTE_MTE2,
.sign = FTR_UNSIGNED,
.cpu_enable = cpu_enable_mte,
},
@@ -2553,9 +2553,9 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
.type = ARM64_CPUCAP_BOOT_CPU_FEATURE,
.matches = has_cpuid_feature,
.sys_reg = SYS_ID_AA64PFR1_EL1,
- .field_pos = ID_AA64PFR1_MTE_SHIFT,
+ .field_pos = ID_AA64PFR1_EL1_MTE_SHIFT,
.field_width = 4,
- .min_field_value = ID_AA64PFR1_MTE_ASYMM,
+ .min_field_value = ID_AA64PFR1_EL1_MTE_MTE3,
.sign = FTR_UNSIGNED,
},
#endif /* CONFIG_ARM64_MTE */
@@ -2577,9 +2577,9 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
.capability = ARM64_SME,
.sys_reg = SYS_ID_AA64PFR1_EL1,
.sign = FTR_UNSIGNED,
- .field_pos = ID_AA64PFR1_SME_SHIFT,
+ .field_pos = ID_AA64PFR1_EL1_SME_SHIFT,
.field_width = 4,
- .min_field_value = ID_AA64PFR1_SME,
+ .min_field_value = ID_AA64PFR1_EL1_SME_IMP,
.matches = has_cpuid_feature,
.cpu_enable = sme_kernel_enable,
},
@@ -2614,9 +2614,9 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
.type = ARM64_CPUCAP_SYSTEM_FEATURE,
.sys_reg = SYS_ID_AA64MMFR1_EL1,
.sign = FTR_UNSIGNED,
- .field_pos = ID_AA64MMFR1_TIDCP1_SHIFT,
+ .field_pos = ID_AA64MMFR1_EL1_TIDCP1_SHIFT,
.field_width = 4,
- .min_field_value = ID_AA64MMFR1_TIDCP1_IMP,
+ .min_field_value = ID_AA64MMFR1_EL1_TIDCP1_IMP,
.matches = has_cpuid_feature,
.cpu_enable = cpu_trap_el0_impdef,
},
@@ -2708,11 +2708,11 @@ static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = {
HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_EL1_TS_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_FLAGM),
HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_EL1_TS_SHIFT, 4, FTR_UNSIGNED, 2, CAP_HWCAP, KERNEL_HWCAP_FLAGM2),
HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_EL1_RNDR_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_RNG),
- HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_FP_SHIFT, 4, FTR_SIGNED, 0, CAP_HWCAP, KERNEL_HWCAP_FP),
- HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_FP_SHIFT, 4, FTR_SIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_FPHP),
- HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_ASIMD_SHIFT, 4, FTR_SIGNED, 0, CAP_HWCAP, KERNEL_HWCAP_ASIMD),
- HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_ASIMD_SHIFT, 4, FTR_SIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_ASIMDHP),
- HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_DIT_SHIFT, 4, FTR_SIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_DIT),
+ HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_EL1_FP_SHIFT, 4, FTR_SIGNED, 0, CAP_HWCAP, KERNEL_HWCAP_FP),
+ HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_EL1_FP_SHIFT, 4, FTR_SIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_FPHP),
+ HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_EL1_AdvSIMD_SHIFT, 4, FTR_SIGNED, 0, CAP_HWCAP, KERNEL_HWCAP_ASIMD),
+ HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_EL1_AdvSIMD_SHIFT, 4, FTR_SIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_ASIMDHP),
+ HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_EL1_DIT_SHIFT, 4, FTR_SIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_DIT),
HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_EL1_DPB_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_DCPOP),
HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_EL1_DPB_SHIFT, 4, FTR_UNSIGNED, 2, CAP_HWCAP, KERNEL_HWCAP_DCPODP),
HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_EL1_JSCVT_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_JSCVT),
@@ -2725,9 +2725,9 @@ static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = {
HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_EL1_BF16_SHIFT, 4, FTR_UNSIGNED, 2, CAP_HWCAP, KERNEL_HWCAP_EBF16),
HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_EL1_DGH_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_DGH),
HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_EL1_I8MM_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_I8MM),
- HWCAP_CAP(SYS_ID_AA64MMFR2_EL1, ID_AA64MMFR2_AT_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_USCAT),
+ HWCAP_CAP(SYS_ID_AA64MMFR2_EL1, ID_AA64MMFR2_EL1_AT_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_USCAT),
#ifdef CONFIG_ARM64_SVE
- HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_SVE_SHIFT, 4, FTR_UNSIGNED, ID_AA64PFR0_SVE, CAP_HWCAP, KERNEL_HWCAP_SVE),
+ HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_EL1_SVE_SHIFT, 4, FTR_UNSIGNED, ID_AA64PFR0_EL1_SVE_IMP, CAP_HWCAP, KERNEL_HWCAP_SVE),
HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_EL1_SVEver_SHIFT, 4, FTR_UNSIGNED, ID_AA64ZFR0_EL1_SVEver_SVE2, CAP_HWCAP, KERNEL_HWCAP_SVE2),
HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_EL1_AES_SHIFT, 4, FTR_UNSIGNED, ID_AA64ZFR0_EL1_AES_IMP, CAP_HWCAP, KERNEL_HWCAP_SVEAES),
HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_EL1_AES_SHIFT, 4, FTR_UNSIGNED, ID_AA64ZFR0_EL1_AES_PMULL128, CAP_HWCAP, KERNEL_HWCAP_SVEPMULL),
@@ -2739,24 +2739,24 @@ static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = {
HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_EL1_F32MM_SHIFT, 4, FTR_UNSIGNED, ID_AA64ZFR0_EL1_F32MM_IMP, CAP_HWCAP, KERNEL_HWCAP_SVEF32MM),
HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_EL1_F64MM_SHIFT, 4, FTR_UNSIGNED, ID_AA64ZFR0_EL1_F64MM_IMP, CAP_HWCAP, KERNEL_HWCAP_SVEF64MM),
#endif
- HWCAP_CAP(SYS_ID_AA64PFR1_EL1, ID_AA64PFR1_SSBS_SHIFT, 4, FTR_UNSIGNED, ID_AA64PFR1_SSBS_PSTATE_INSNS, CAP_HWCAP, KERNEL_HWCAP_SSBS),
+ HWCAP_CAP(SYS_ID_AA64PFR1_EL1, ID_AA64PFR1_EL1_SSBS_SHIFT, 4, FTR_UNSIGNED, ID_AA64PFR1_EL1_SSBS_SSBS2, CAP_HWCAP, KERNEL_HWCAP_SSBS),
#ifdef CONFIG_ARM64_BTI
- HWCAP_CAP(SYS_ID_AA64PFR1_EL1, ID_AA64PFR1_BT_SHIFT, 4, FTR_UNSIGNED, ID_AA64PFR1_BT_BTI, CAP_HWCAP, KERNEL_HWCAP_BTI),
+ HWCAP_CAP(SYS_ID_AA64PFR1_EL1, ID_AA64PFR1_EL1_BT_SHIFT, 4, FTR_UNSIGNED, ID_AA64PFR1_EL1_BT_IMP, CAP_HWCAP, KERNEL_HWCAP_BTI),
#endif
#ifdef CONFIG_ARM64_PTR_AUTH
HWCAP_MULTI_CAP(ptr_auth_hwcap_addr_matches, CAP_HWCAP, KERNEL_HWCAP_PACA),
HWCAP_MULTI_CAP(ptr_auth_hwcap_gen_matches, CAP_HWCAP, KERNEL_HWCAP_PACG),
#endif
#ifdef CONFIG_ARM64_MTE
- HWCAP_CAP(SYS_ID_AA64PFR1_EL1, ID_AA64PFR1_MTE_SHIFT, 4, FTR_UNSIGNED, ID_AA64PFR1_MTE, CAP_HWCAP, KERNEL_HWCAP_MTE),
- HWCAP_CAP(SYS_ID_AA64PFR1_EL1, ID_AA64PFR1_MTE_SHIFT, 4, FTR_UNSIGNED, ID_AA64PFR1_MTE_ASYMM, CAP_HWCAP, KERNEL_HWCAP_MTE3),
+ HWCAP_CAP(SYS_ID_AA64PFR1_EL1, ID_AA64PFR1_EL1_MTE_SHIFT, 4, FTR_UNSIGNED, ID_AA64PFR1_EL1_MTE_MTE2, CAP_HWCAP, KERNEL_HWCAP_MTE),
+ HWCAP_CAP(SYS_ID_AA64PFR1_EL1, ID_AA64PFR1_EL1_MTE_SHIFT, 4, FTR_UNSIGNED, ID_AA64PFR1_EL1_MTE_MTE3, CAP_HWCAP, KERNEL_HWCAP_MTE3),
#endif /* CONFIG_ARM64_MTE */
- HWCAP_CAP(SYS_ID_AA64MMFR0_EL1, ID_AA64MMFR0_ECV_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_ECV),
- HWCAP_CAP(SYS_ID_AA64MMFR1_EL1, ID_AA64MMFR1_AFP_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_AFP),
+ HWCAP_CAP(SYS_ID_AA64MMFR0_EL1, ID_AA64MMFR0_EL1_ECV_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_ECV),
+ HWCAP_CAP(SYS_ID_AA64MMFR1_EL1, ID_AA64MMFR1_EL1_AFP_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_AFP),
HWCAP_CAP(SYS_ID_AA64ISAR2_EL1, ID_AA64ISAR2_EL1_RPRES_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_RPRES),
HWCAP_CAP(SYS_ID_AA64ISAR2_EL1, ID_AA64ISAR2_EL1_WFxT_SHIFT, 4, FTR_UNSIGNED, ID_AA64ISAR2_EL1_WFxT_IMP, CAP_HWCAP, KERNEL_HWCAP_WFXT),
#ifdef CONFIG_ARM64_SME
- HWCAP_CAP(SYS_ID_AA64PFR1_EL1, ID_AA64PFR1_SME_SHIFT, 4, FTR_UNSIGNED, ID_AA64PFR1_SME, CAP_HWCAP, KERNEL_HWCAP_SME),
+ HWCAP_CAP(SYS_ID_AA64PFR1_EL1, ID_AA64PFR1_EL1_SME_SHIFT, 4, FTR_UNSIGNED, ID_AA64PFR1_EL1_SME_IMP, CAP_HWCAP, KERNEL_HWCAP_SME),
HWCAP_CAP(SYS_ID_AA64SMFR0_EL1, ID_AA64SMFR0_EL1_FA64_SHIFT, 1, FTR_UNSIGNED, ID_AA64SMFR0_EL1_FA64_IMP, CAP_HWCAP, KERNEL_HWCAP_SME_FA64),
HWCAP_CAP(SYS_ID_AA64SMFR0_EL1, ID_AA64SMFR0_EL1_I16I64_SHIFT, 4, FTR_UNSIGNED, ID_AA64SMFR0_EL1_I16I64_IMP, CAP_HWCAP, KERNEL_HWCAP_SME_I16I64),
HWCAP_CAP(SYS_ID_AA64SMFR0_EL1, ID_AA64SMFR0_EL1_F64F64_SHIFT, 1, FTR_UNSIGNED, ID_AA64SMFR0_EL1_F64F64_IMP, CAP_HWCAP, KERNEL_HWCAP_SME_F64F64),
@@ -3102,7 +3102,7 @@ static void verify_hyp_capabilities(void)
/* Verify IPA range */
parange = cpuid_feature_extract_unsigned_field(mmfr0,
- ID_AA64MMFR0_PARANGE_SHIFT);
+ ID_AA64MMFR0_EL1_PARANGE_SHIFT);
ipa_max = id_aa64mmfr0_parange_to_phys_shift(parange);
if (ipa_max < get_kvm_ipa_limit()) {
pr_crit("CPU%d: IPA range mismatch\n", smp_processor_id());
diff --git a/arch/arm64/kernel/debug-monitors.c b/arch/arm64/kernel/debug-monitors.c
index bf9fe71589bc..3da09778267e 100644
--- a/arch/arm64/kernel/debug-monitors.c
+++ b/arch/arm64/kernel/debug-monitors.c
@@ -28,7 +28,7 @@
u8 debug_monitors_arch(void)
{
return cpuid_feature_extract_unsigned_field(read_sanitised_ftr_reg(SYS_ID_AA64DFR0_EL1),
- ID_AA64DFR0_DEBUGVER_SHIFT);
+ ID_AA64DFR0_EL1_DebugVer_SHIFT);
}
/*
diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
index 814b6587ccb7..2196aad7b55b 100644
--- a/arch/arm64/kernel/head.S
+++ b/arch/arm64/kernel/head.S
@@ -99,7 +99,7 @@ SYM_CODE_START(primary_entry)
*/
#if VA_BITS > 48
mrs_s x0, SYS_ID_AA64MMFR2_EL1
- tst x0, #0xf << ID_AA64MMFR2_LVA_SHIFT
+ tst x0, #0xf << ID_AA64MMFR2_EL1_VARange_SHIFT
mov x0, #VA_BITS
mov x25, #VA_BITS_MIN
csel x25, x25, x0, eq
@@ -658,10 +658,10 @@ SYM_FUNC_END(__secondary_too_slow)
*/
SYM_FUNC_START(__enable_mmu)
mrs x3, ID_AA64MMFR0_EL1
- ubfx x3, x3, #ID_AA64MMFR0_TGRAN_SHIFT, 4
- cmp x3, #ID_AA64MMFR0_TGRAN_SUPPORTED_MIN
+ ubfx x3, x3, #ID_AA64MMFR0_EL1_TGRAN_SHIFT, 4
+ cmp x3, #ID_AA64MMFR0_EL1_TGRAN_SUPPORTED_MIN
b.lt __no_granule_support
- cmp x3, #ID_AA64MMFR0_TGRAN_SUPPORTED_MAX
+ cmp x3, #ID_AA64MMFR0_EL1_TGRAN_SUPPORTED_MAX
b.gt __no_granule_support
phys_to_ttbr x2, x2
msr ttbr0_el1, x2 // load TTBR0
@@ -679,7 +679,7 @@ SYM_FUNC_START(__cpu_secondary_check52bitva)
b.ne 2f
mrs_s x0, SYS_ID_AA64MMFR2_EL1
- and x0, x0, #(0xf << ID_AA64MMFR2_LVA_SHIFT)
+ and x0, x0, #(0xf << ID_AA64MMFR2_EL1_VARange_SHIFT)
cbnz x0, 2f
update_early_cpu_boot_status \
diff --git a/arch/arm64/kernel/hyp-stub.S b/arch/arm64/kernel/hyp-stub.S
index 12c7fad02ae5..2ee18c860f2a 100644
--- a/arch/arm64/kernel/hyp-stub.S
+++ b/arch/arm64/kernel/hyp-stub.S
@@ -98,7 +98,7 @@ SYM_CODE_START_LOCAL(elx_sync)
SYM_CODE_END(elx_sync)
SYM_CODE_START_LOCAL(__finalise_el2)
- check_override id_aa64pfr0 ID_AA64PFR0_SVE_SHIFT .Linit_sve .Lskip_sve
+ check_override id_aa64pfr0 ID_AA64PFR0_EL1_SVE_SHIFT .Linit_sve .Lskip_sve
.Linit_sve: /* SVE register access */
mrs x0, cptr_el2 // Disable SVE traps
@@ -109,7 +109,7 @@ SYM_CODE_START_LOCAL(__finalise_el2)
msr_s SYS_ZCR_EL2, x1 // length for EL1.
.Lskip_sve:
- check_override id_aa64pfr1 ID_AA64PFR1_SME_SHIFT .Linit_sme .Lskip_sme
+ check_override id_aa64pfr1 ID_AA64PFR1_EL1_SME_SHIFT .Linit_sme .Lskip_sme
.Linit_sme: /* SME register access and priority mapping */
mrs x0, cptr_el2 // Disable SME traps
@@ -142,7 +142,7 @@ SYM_CODE_START_LOCAL(__finalise_el2)
msr_s SYS_SMPRIMAP_EL2, xzr // Make all priorities equal
mrs x1, id_aa64mmfr1_el1 // HCRX_EL2 present?
- ubfx x1, x1, #ID_AA64MMFR1_HCX_SHIFT, #4
+ ubfx x1, x1, #ID_AA64MMFR1_EL1_HCX_SHIFT, #4
cbz x1, .Lskip_sme
mrs_s x1, SYS_HCRX_EL2
@@ -157,7 +157,7 @@ SYM_CODE_START_LOCAL(__finalise_el2)
tbnz x1, #0, 1f
// Needs to be VHE capable, obviously
- check_override id_aa64mmfr1 ID_AA64MMFR1_VHE_SHIFT 2f 1f
+ check_override id_aa64mmfr1 ID_AA64MMFR1_EL1_VH_SHIFT 2f 1f
1: mov_q x0, HVC_STUB_ERR
eret
diff --git a/arch/arm64/kernel/idreg-override.c b/arch/arm64/kernel/idreg-override.c
index 1b0542c69738..95133765ed29 100644
--- a/arch/arm64/kernel/idreg-override.c
+++ b/arch/arm64/kernel/idreg-override.c
@@ -50,7 +50,7 @@ static const struct ftr_set_desc mmfr1 __initconst = {
.name = "id_aa64mmfr1",
.override = &id_aa64mmfr1_override,
.fields = {
- FIELD("vh", ID_AA64MMFR1_VHE_SHIFT, mmfr1_vh_filter),
+ FIELD("vh", ID_AA64MMFR1_EL1_VH_SHIFT, mmfr1_vh_filter),
{}
},
};
@@ -74,7 +74,7 @@ static const struct ftr_set_desc pfr0 __initconst = {
.name = "id_aa64pfr0",
.override = &id_aa64pfr0_override,
.fields = {
- FIELD("sve", ID_AA64PFR0_SVE_SHIFT, pfr0_sve_filter),
+ FIELD("sve", ID_AA64PFR0_EL1_SVE_SHIFT, pfr0_sve_filter),
{}
},
};
@@ -98,9 +98,9 @@ static const struct ftr_set_desc pfr1 __initconst = {
.name = "id_aa64pfr1",
.override = &id_aa64pfr1_override,
.fields = {
- FIELD("bt", ID_AA64PFR1_BT_SHIFT, NULL ),
- FIELD("mte", ID_AA64PFR1_MTE_SHIFT, NULL),
- FIELD("sme", ID_AA64PFR1_SME_SHIFT, pfr1_sme_filter),
+ FIELD("bt", ID_AA64PFR1_EL1_BT_SHIFT, NULL ),
+ FIELD("mte", ID_AA64PFR1_EL1_MTE_SHIFT, NULL),
+ FIELD("sme", ID_AA64PFR1_EL1_SME_SHIFT, pfr1_sme_filter),
{}
},
};
diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c
index cb69ff1e6138..7b0643fe2f13 100644
--- a/arch/arm64/kernel/perf_event.c
+++ b/arch/arm64/kernel/perf_event.c
@@ -390,7 +390,7 @@ static const struct attribute_group armv8_pmuv3_caps_attr_group = {
*/
static bool armv8pmu_has_long_event(struct arm_pmu *cpu_pmu)
{
- return (cpu_pmu->pmuver >= ID_AA64DFR0_PMUVER_8_5);
+ return (cpu_pmu->pmuver >= ID_AA64DFR0_EL1_PMUVer_V3P5);
}
static inline bool armv8pmu_event_has_user_read(struct perf_event *event)
@@ -1145,8 +1145,8 @@ static void __armv8pmu_probe_pmu(void *info)
dfr0 = read_sysreg(id_aa64dfr0_el1);
pmuver = cpuid_feature_extract_unsigned_field(dfr0,
- ID_AA64DFR0_PMUVER_SHIFT);
- if (pmuver == ID_AA64DFR0_PMUVER_IMP_DEF || pmuver == 0)
+ ID_AA64DFR0_EL1_PMUVer_SHIFT);
+ if (pmuver == ID_AA64DFR0_EL1_PMUVer_IMP_DEF || pmuver == 0)
return;
cpu_pmu->pmuver = pmuver;
@@ -1172,7 +1172,7 @@ static void __armv8pmu_probe_pmu(void *info)
pmceid, ARMV8_PMUV3_MAX_COMMON_EVENTS);
/* store PMMIR_EL1 register for sysfs */
- if (pmuver >= ID_AA64DFR0_PMUVER_8_4 && (pmceid_raw[1] & BIT(31)))
+ if (pmuver >= ID_AA64DFR0_EL1_PMUVer_V3P4 && (pmceid_raw[1] & BIT(31)))
cpu_pmu->reg_pmmir = read_cpuid(PMMIR_EL1);
else
cpu_pmu->reg_pmmir = 0;
diff --git a/arch/arm64/kernel/proton-pack.c b/arch/arm64/kernel/proton-pack.c
index 40be3a7c2c53..fe3bc4c1c5ac 100644
--- a/arch/arm64/kernel/proton-pack.c
+++ b/arch/arm64/kernel/proton-pack.c
@@ -168,7 +168,7 @@ static enum mitigation_state spectre_v2_get_cpu_hw_mitigation_state(void)
/* If the CPU has CSV2 set, we're safe */
pfr0 = read_cpuid(ID_AA64PFR0_EL1);
- if (cpuid_feature_extract_unsigned_field(pfr0, ID_AA64PFR0_CSV2_SHIFT))
+ if (cpuid_feature_extract_unsigned_field(pfr0, ID_AA64PFR0_EL1_CSV2_SHIFT))
return SPECTRE_UNAFFECTED;
/* Alternatively, we have a list of unaffected CPUs */
@@ -945,7 +945,7 @@ static bool supports_ecbhb(int scope)
mmfr1 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR1_EL1);
return cpuid_feature_extract_unsigned_field(mmfr1,
- ID_AA64MMFR1_ECBHB_SHIFT);
+ ID_AA64MMFR1_EL1_ECBHB_SHIFT);
}
bool is_spectre_bhb_affected(const struct arm64_cpu_capabilities *entry,
diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
index 917086be5c6b..94d33e296e10 100644
--- a/arch/arm64/kvm/arm.c
+++ b/arch/arm64/kvm/arm.c
@@ -666,7 +666,6 @@ void kvm_vcpu_wfi(struct kvm_vcpu *vcpu)
kvm_vcpu_halt(vcpu);
vcpu_clear_flag(vcpu, IN_WFIT);
- kvm_clear_request(KVM_REQ_UNHALT, vcpu);
preempt_disable();
vgic_v4_load(vcpu);
@@ -2270,6 +2269,16 @@ static int __init early_kvm_mode_cfg(char *arg)
if (!arg)
return -EINVAL;
+ if (strcmp(arg, "none") == 0) {
+ kvm_mode = KVM_MODE_NONE;
+ return 0;
+ }
+
+ if (!is_hyp_mode_available()) {
+ pr_warn_once("KVM is not available. Ignoring kvm-arm.mode\n");
+ return 0;
+ }
+
if (strcmp(arg, "protected") == 0) {
if (!is_kernel_in_hyp_mode())
kvm_mode = KVM_MODE_PROTECTED;
@@ -2284,11 +2293,6 @@ static int __init early_kvm_mode_cfg(char *arg)
return 0;
}
- if (strcmp(arg, "none") == 0) {
- kvm_mode = KVM_MODE_NONE;
- return 0;
- }
-
return -EINVAL;
}
early_param("kvm-arm.mode", early_kvm_mode_cfg);
diff --git a/arch/arm64/kvm/debug.c b/arch/arm64/kvm/debug.c
index 0b28d7db7c76..fccf9ec01813 100644
--- a/arch/arm64/kvm/debug.c
+++ b/arch/arm64/kvm/debug.c
@@ -32,6 +32,10 @@ static DEFINE_PER_CPU(u64, mdcr_el2);
*
* Guest access to MDSCR_EL1 is trapped by the hypervisor and handled
* after we have restored the preserved value to the main context.
+ *
+ * When single-step is enabled by userspace, we tweak PSTATE.SS on every
+ * guest entry. Preserve PSTATE.SS so we can restore the original value
+ * for the vcpu after the single-step is disabled.
*/
static void save_guest_debug_regs(struct kvm_vcpu *vcpu)
{
@@ -41,6 +45,9 @@ static void save_guest_debug_regs(struct kvm_vcpu *vcpu)
trace_kvm_arm_set_dreg32("Saved MDSCR_EL1",
vcpu->arch.guest_debug_preserved.mdscr_el1);
+
+ vcpu->arch.guest_debug_preserved.pstate_ss =
+ (*vcpu_cpsr(vcpu) & DBG_SPSR_SS);
}
static void restore_guest_debug_regs(struct kvm_vcpu *vcpu)
@@ -51,6 +58,11 @@ static void restore_guest_debug_regs(struct kvm_vcpu *vcpu)
trace_kvm_arm_set_dreg32("Restored MDSCR_EL1",
vcpu_read_sys_reg(vcpu, MDSCR_EL1));
+
+ if (vcpu->arch.guest_debug_preserved.pstate_ss)
+ *vcpu_cpsr(vcpu) |= DBG_SPSR_SS;
+ else
+ *vcpu_cpsr(vcpu) &= ~DBG_SPSR_SS;
}
/**
@@ -188,7 +200,18 @@ void kvm_arm_setup_debug(struct kvm_vcpu *vcpu)
* debugging the system.
*/
if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) {
- *vcpu_cpsr(vcpu) |= DBG_SPSR_SS;
+ /*
+ * If the software step state at the last guest exit
+ * was Active-pending, we don't set DBG_SPSR_SS so
+ * that the state is maintained (to not run another
+ * single-step until the pending Software Step
+ * exception is taken).
+ */
+ if (!vcpu_get_flag(vcpu, DBG_SS_ACTIVE_PENDING))
+ *vcpu_cpsr(vcpu) |= DBG_SPSR_SS;
+ else
+ *vcpu_cpsr(vcpu) &= ~DBG_SPSR_SS;
+
mdscr = vcpu_read_sys_reg(vcpu, MDSCR_EL1);
mdscr |= DBG_MDSCR_SS;
vcpu_write_sys_reg(vcpu, mdscr, MDSCR_EL1);
@@ -262,6 +285,15 @@ void kvm_arm_clear_debug(struct kvm_vcpu *vcpu)
* Restore the guest's debug registers if we were using them.
*/
if (vcpu->guest_debug || kvm_vcpu_os_lock_enabled(vcpu)) {
+ if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) {
+ if (!(*vcpu_cpsr(vcpu) & DBG_SPSR_SS))
+ /*
+ * Mark the vcpu as ACTIVE_PENDING
+ * until Software Step exception is taken.
+ */
+ vcpu_set_flag(vcpu, DBG_SS_ACTIVE_PENDING);
+ }
+
restore_guest_debug_regs(vcpu);
/*
@@ -295,12 +327,12 @@ void kvm_arch_vcpu_load_debug_state_flags(struct kvm_vcpu *vcpu)
* If SPE is present on this CPU and is available at current EL,
* we may need to check if the host state needs to be saved.
*/
- if (cpuid_feature_extract_unsigned_field(dfr0, ID_AA64DFR0_PMSVER_SHIFT) &&
+ if (cpuid_feature_extract_unsigned_field(dfr0, ID_AA64DFR0_EL1_PMSVer_SHIFT) &&
!(read_sysreg_s(SYS_PMBIDR_EL1) & BIT(SYS_PMBIDR_EL1_P_SHIFT)))
vcpu_set_flag(vcpu, DEBUG_STATE_SAVE_SPE);
/* Check if we have TRBE implemented and available at the host */
- if (cpuid_feature_extract_unsigned_field(dfr0, ID_AA64DFR0_TRBE_SHIFT) &&
+ if (cpuid_feature_extract_unsigned_field(dfr0, ID_AA64DFR0_EL1_TraceBuffer_SHIFT) &&
!(read_sysreg_s(SYS_TRBIDR_EL1) & TRBIDR_PROG))
vcpu_set_flag(vcpu, DEBUG_STATE_SAVE_TRBE);
}
diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c
index f802a3b3f8db..2ff13a3f8479 100644
--- a/arch/arm64/kvm/guest.c
+++ b/arch/arm64/kvm/guest.c
@@ -937,6 +937,7 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
} else {
/* If not enabled clear all flags */
vcpu->guest_debug = 0;
+ vcpu_clear_flag(vcpu, DBG_SS_ACTIVE_PENDING);
}
out:
diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c
index bbe5b393d689..e778eefcf214 100644
--- a/arch/arm64/kvm/handle_exit.c
+++ b/arch/arm64/kvm/handle_exit.c
@@ -152,8 +152,14 @@ static int kvm_handle_guest_debug(struct kvm_vcpu *vcpu)
run->debug.arch.hsr_high = upper_32_bits(esr);
run->flags = KVM_DEBUG_ARCH_HSR_HIGH_VALID;
- if (ESR_ELx_EC(esr) == ESR_ELx_EC_WATCHPT_LOW)
+ switch (ESR_ELx_EC(esr)) {
+ case ESR_ELx_EC_WATCHPT_LOW:
run->debug.arch.far = vcpu->arch.fault.far_el2;
+ break;
+ case ESR_ELx_EC_SOFTSTP_LOW:
+ vcpu_clear_flag(vcpu, DBG_SS_ACTIVE_PENDING);
+ break;
+ }
return 0;
}
diff --git a/arch/arm64/kvm/hyp/include/nvhe/fixed_config.h b/arch/arm64/kvm/hyp/include/nvhe/fixed_config.h
index fa6e466ed57f..07edfc7524c9 100644
--- a/arch/arm64/kvm/hyp/include/nvhe/fixed_config.h
+++ b/arch/arm64/kvm/hyp/include/nvhe/fixed_config.h
@@ -35,9 +35,9 @@
* - Data Independent Timing
*/
#define PVM_ID_AA64PFR0_ALLOW (\
- ARM64_FEATURE_MASK(ID_AA64PFR0_FP) | \
- ARM64_FEATURE_MASK(ID_AA64PFR0_ASIMD) | \
- ARM64_FEATURE_MASK(ID_AA64PFR0_DIT) \
+ ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_FP) | \
+ ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_AdvSIMD) | \
+ ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_DIT) \
)
/*
@@ -49,11 +49,11 @@
* Supported by KVM
*/
#define PVM_ID_AA64PFR0_RESTRICT_UNSIGNED (\
- FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_EL0), ID_AA64PFR0_ELx_64BIT_ONLY) | \
- FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1), ID_AA64PFR0_ELx_64BIT_ONLY) | \
- FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_EL2), ID_AA64PFR0_ELx_64BIT_ONLY) | \
- FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_EL3), ID_AA64PFR0_ELx_64BIT_ONLY) | \
- FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_RAS), ID_AA64PFR0_RAS_V1) \
+ FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_EL0), ID_AA64PFR0_EL1_ELx_64BIT_ONLY) | \
+ FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_EL1), ID_AA64PFR0_EL1_ELx_64BIT_ONLY) | \
+ FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_EL2), ID_AA64PFR0_EL1_ELx_64BIT_ONLY) | \
+ FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_EL3), ID_AA64PFR0_EL1_ELx_64BIT_ONLY) | \
+ FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_RAS), ID_AA64PFR0_EL1_RAS_IMP) \
)
/*
@@ -62,8 +62,8 @@
* - Speculative Store Bypassing
*/
#define PVM_ID_AA64PFR1_ALLOW (\
- ARM64_FEATURE_MASK(ID_AA64PFR1_BT) | \
- ARM64_FEATURE_MASK(ID_AA64PFR1_SSBS) \
+ ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_BT) | \
+ ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_SSBS) \
)
/*
@@ -74,10 +74,10 @@
* - Non-context synchronizing exception entry and exit
*/
#define PVM_ID_AA64MMFR0_ALLOW (\
- ARM64_FEATURE_MASK(ID_AA64MMFR0_BIGENDEL) | \
- ARM64_FEATURE_MASK(ID_AA64MMFR0_SNSMEM) | \
- ARM64_FEATURE_MASK(ID_AA64MMFR0_BIGENDEL0) | \
- ARM64_FEATURE_MASK(ID_AA64MMFR0_EXS) \
+ ARM64_FEATURE_MASK(ID_AA64MMFR0_EL1_BIGEND) | \
+ ARM64_FEATURE_MASK(ID_AA64MMFR0_EL1_SNSMEM) | \
+ ARM64_FEATURE_MASK(ID_AA64MMFR0_EL1_BIGENDEL0) | \
+ ARM64_FEATURE_MASK(ID_AA64MMFR0_EL1_EXS) \
)
/*
@@ -86,8 +86,8 @@
* - 16-bit ASID
*/
#define PVM_ID_AA64MMFR0_RESTRICT_UNSIGNED (\
- FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64MMFR0_PARANGE), ID_AA64MMFR0_PARANGE_40) | \
- FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64MMFR0_ASID), ID_AA64MMFR0_ASID_16) \
+ FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64MMFR0_EL1_PARANGE), ID_AA64MMFR0_EL1_PARANGE_40) | \
+ FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64MMFR0_EL1_ASIDBITS), ID_AA64MMFR0_EL1_ASIDBITS_16) \
)
/*
@@ -100,12 +100,12 @@
* - Enhanced Translation Synchronization
*/
#define PVM_ID_AA64MMFR1_ALLOW (\
- ARM64_FEATURE_MASK(ID_AA64MMFR1_HADBS) | \
- ARM64_FEATURE_MASK(ID_AA64MMFR1_VMIDBITS) | \
- ARM64_FEATURE_MASK(ID_AA64MMFR1_HPD) | \
- ARM64_FEATURE_MASK(ID_AA64MMFR1_PAN) | \
- ARM64_FEATURE_MASK(ID_AA64MMFR1_SPECSEI) | \
- ARM64_FEATURE_MASK(ID_AA64MMFR1_ETS) \
+ ARM64_FEATURE_MASK(ID_AA64MMFR1_EL1_HAFDBS) | \
+ ARM64_FEATURE_MASK(ID_AA64MMFR1_EL1_VMIDBits) | \
+ ARM64_FEATURE_MASK(ID_AA64MMFR1_EL1_HPDS) | \
+ ARM64_FEATURE_MASK(ID_AA64MMFR1_EL1_PAN) | \
+ ARM64_FEATURE_MASK(ID_AA64MMFR1_EL1_SpecSEI) | \
+ ARM64_FEATURE_MASK(ID_AA64MMFR1_EL1_ETS) \
)
/*
@@ -120,14 +120,14 @@
* - E0PDx mechanism
*/
#define PVM_ID_AA64MMFR2_ALLOW (\
- ARM64_FEATURE_MASK(ID_AA64MMFR2_CNP) | \
- ARM64_FEATURE_MASK(ID_AA64MMFR2_UAO) | \
- ARM64_FEATURE_MASK(ID_AA64MMFR2_IESB) | \
- ARM64_FEATURE_MASK(ID_AA64MMFR2_AT) | \
- ARM64_FEATURE_MASK(ID_AA64MMFR2_IDS) | \
- ARM64_FEATURE_MASK(ID_AA64MMFR2_TTL) | \
- ARM64_FEATURE_MASK(ID_AA64MMFR2_BBM) | \
- ARM64_FEATURE_MASK(ID_AA64MMFR2_E0PD) \
+ ARM64_FEATURE_MASK(ID_AA64MMFR2_EL1_CnP) | \
+ ARM64_FEATURE_MASK(ID_AA64MMFR2_EL1_UAO) | \
+ ARM64_FEATURE_MASK(ID_AA64MMFR2_EL1_IESB) | \
+ ARM64_FEATURE_MASK(ID_AA64MMFR2_EL1_AT) | \
+ ARM64_FEATURE_MASK(ID_AA64MMFR2_EL1_IDS) | \
+ ARM64_FEATURE_MASK(ID_AA64MMFR2_EL1_TTL) | \
+ ARM64_FEATURE_MASK(ID_AA64MMFR2_EL1_BBM) | \
+ ARM64_FEATURE_MASK(ID_AA64MMFR2_EL1_E0PD) \
)
/*
diff --git a/arch/arm64/kvm/hyp/nvhe/pkvm.c b/arch/arm64/kvm/hyp/nvhe/pkvm.c
index 99c8d8b73e70..85d3b7ae720f 100644
--- a/arch/arm64/kvm/hyp/nvhe/pkvm.c
+++ b/arch/arm64/kvm/hyp/nvhe/pkvm.c
@@ -20,35 +20,35 @@ static void pvm_init_traps_aa64pfr0(struct kvm_vcpu *vcpu)
u64 cptr_set = 0;
/* Protected KVM does not support AArch32 guests. */
- BUILD_BUG_ON(FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL0),
- PVM_ID_AA64PFR0_RESTRICT_UNSIGNED) != ID_AA64PFR0_ELx_64BIT_ONLY);
- BUILD_BUG_ON(FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1),
- PVM_ID_AA64PFR0_RESTRICT_UNSIGNED) != ID_AA64PFR0_ELx_64BIT_ONLY);
+ BUILD_BUG_ON(FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_EL0),
+ PVM_ID_AA64PFR0_RESTRICT_UNSIGNED) != ID_AA64PFR0_EL1_ELx_64BIT_ONLY);
+ BUILD_BUG_ON(FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_EL1),
+ PVM_ID_AA64PFR0_RESTRICT_UNSIGNED) != ID_AA64PFR0_EL1_ELx_64BIT_ONLY);
/*
* Linux guests assume support for floating-point and Advanced SIMD. Do
* not change the trapping behavior for these from the KVM default.
*/
- BUILD_BUG_ON(!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_FP),
+ BUILD_BUG_ON(!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_FP),
PVM_ID_AA64PFR0_ALLOW));
- BUILD_BUG_ON(!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_ASIMD),
+ BUILD_BUG_ON(!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_AdvSIMD),
PVM_ID_AA64PFR0_ALLOW));
/* Trap RAS unless all current versions are supported */
- if (FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_RAS), feature_ids) <
- ID_AA64PFR0_RAS_V1P1) {
+ if (FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_RAS), feature_ids) <
+ ID_AA64PFR0_EL1_RAS_V1P1) {
hcr_set |= HCR_TERR | HCR_TEA;
hcr_clear |= HCR_FIEN;
}
/* Trap AMU */
- if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_AMU), feature_ids)) {
+ if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_AMU), feature_ids)) {
hcr_clear |= HCR_AMVOFFEN;
cptr_set |= CPTR_EL2_TAM;
}
/* Trap SVE */
- if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_SVE), feature_ids))
+ if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_SVE), feature_ids))
cptr_set |= CPTR_EL2_TZ;
vcpu->arch.hcr_el2 |= hcr_set;
@@ -66,7 +66,7 @@ static void pvm_init_traps_aa64pfr1(struct kvm_vcpu *vcpu)
u64 hcr_clear = 0;
/* Memory Tagging: Trap and Treat as Untagged if not supported. */
- if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR1_MTE), feature_ids)) {
+ if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_MTE), feature_ids)) {
hcr_set |= HCR_TID5;
hcr_clear |= HCR_DCT | HCR_ATA;
}
@@ -86,32 +86,32 @@ static void pvm_init_traps_aa64dfr0(struct kvm_vcpu *vcpu)
u64 cptr_set = 0;
/* Trap/constrain PMU */
- if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_PMUVER), feature_ids)) {
+ if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_PMUVer), feature_ids)) {
mdcr_set |= MDCR_EL2_TPM | MDCR_EL2_TPMCR;
mdcr_clear |= MDCR_EL2_HPME | MDCR_EL2_MTPME |
MDCR_EL2_HPMN_MASK;
}
/* Trap Debug */
- if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_DEBUGVER), feature_ids))
+ if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_DebugVer), feature_ids))
mdcr_set |= MDCR_EL2_TDRA | MDCR_EL2_TDA | MDCR_EL2_TDE;
/* Trap OS Double Lock */
- if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_DOUBLELOCK), feature_ids))
+ if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_DoubleLock), feature_ids))
mdcr_set |= MDCR_EL2_TDOSA;
/* Trap SPE */
- if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_PMSVER), feature_ids)) {
+ if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_PMSVer), feature_ids)) {
mdcr_set |= MDCR_EL2_TPMS;
mdcr_clear |= MDCR_EL2_E2PB_MASK << MDCR_EL2_E2PB_SHIFT;
}
/* Trap Trace Filter */
- if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_TRACE_FILT), feature_ids))
+ if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_TraceFilt), feature_ids))
mdcr_set |= MDCR_EL2_TTRF;
/* Trap Trace */
- if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_TRACEVER), feature_ids))
+ if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_TraceVer), feature_ids))
cptr_set |= CPTR_EL2_TTA;
vcpu->arch.mdcr_el2 |= mdcr_set;
@@ -128,7 +128,7 @@ static void pvm_init_traps_aa64mmfr0(struct kvm_vcpu *vcpu)
u64 mdcr_set = 0;
/* Trap Debug Communications Channel registers */
- if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64MMFR0_FGT), feature_ids))
+ if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64MMFR0_EL1_FGT), feature_ids))
mdcr_set |= MDCR_EL2_TDCC;
vcpu->arch.mdcr_el2 |= mdcr_set;
@@ -143,7 +143,7 @@ static void pvm_init_traps_aa64mmfr1(struct kvm_vcpu *vcpu)
u64 hcr_set = 0;
/* Trap LOR */
- if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64MMFR1_LOR), feature_ids))
+ if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64MMFR1_EL1_LO), feature_ids))
hcr_set |= HCR_TLOR;
vcpu->arch.hcr_el2 |= hcr_set;
diff --git a/arch/arm64/kvm/hyp/nvhe/switch.c b/arch/arm64/kvm/hyp/nvhe/switch.c
index 9f6385702061..8e9d49a964be 100644
--- a/arch/arm64/kvm/hyp/nvhe/switch.c
+++ b/arch/arm64/kvm/hyp/nvhe/switch.c
@@ -143,7 +143,7 @@ static void __hyp_vgic_save_state(struct kvm_vcpu *vcpu)
}
}
-/* Restore VGICv3 state on non_VEH systems */
+/* Restore VGICv3 state on non-VHE systems */
static void __hyp_vgic_restore_state(struct kvm_vcpu *vcpu)
{
if (static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif)) {
diff --git a/arch/arm64/kvm/hyp/nvhe/sys_regs.c b/arch/arm64/kvm/hyp/nvhe/sys_regs.c
index e20fa4475dac..0f9ac25afdf4 100644
--- a/arch/arm64/kvm/hyp/nvhe/sys_regs.c
+++ b/arch/arm64/kvm/hyp/nvhe/sys_regs.c
@@ -92,9 +92,9 @@ static u64 get_pvm_id_aa64pfr0(const struct kvm_vcpu *vcpu)
PVM_ID_AA64PFR0_RESTRICT_UNSIGNED);
/* Spectre and Meltdown mitigation in KVM */
- set_mask |= FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_CSV2),
+ set_mask |= FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_CSV2),
(u64)kvm->arch.pfr0_csv2);
- set_mask |= FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_CSV3),
+ set_mask |= FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_CSV3),
(u64)kvm->arch.pfr0_csv3);
return (id_aa64pfr0_el1_sys_val & allow_mask) | set_mask;
@@ -106,7 +106,7 @@ static u64 get_pvm_id_aa64pfr1(const struct kvm_vcpu *vcpu)
u64 allow_mask = PVM_ID_AA64PFR1_ALLOW;
if (!kvm_has_mte(kvm))
- allow_mask &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_MTE);
+ allow_mask &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_MTE);
return id_aa64pfr1_el1_sys_val & allow_mask;
}
@@ -281,8 +281,8 @@ static bool pvm_access_id_aarch32(struct kvm_vcpu *vcpu,
* No support for AArch32 guests, therefore, pKVM has no sanitized copy
* of AArch32 feature id registers.
*/
- BUILD_BUG_ON(FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1),
- PVM_ID_AA64PFR0_RESTRICT_UNSIGNED) > ID_AA64PFR0_ELx_64BIT_ONLY);
+ BUILD_BUG_ON(FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_EL1),
+ PVM_ID_AA64PFR0_RESTRICT_UNSIGNED) > ID_AA64PFR0_EL1_ELx_64BIT_ONLY);
return pvm_access_raz_wi(vcpu, p, r);
}
diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c
index 2cb3867eb7c2..cdf8e76b0be1 100644
--- a/arch/arm64/kvm/hyp/pgtable.c
+++ b/arch/arm64/kvm/hyp/pgtable.c
@@ -61,7 +61,7 @@ struct kvm_pgtable_walk_data {
static bool kvm_phys_is_valid(u64 phys)
{
- return phys < BIT(id_aa64mmfr0_parange_to_phys_shift(ID_AA64MMFR0_PARANGE_MAX));
+ return phys < BIT(id_aa64mmfr0_parange_to_phys_shift(ID_AA64MMFR0_EL1_PARANGE_MAX));
}
static bool kvm_block_mapping_supported(u64 addr, u64 end, u64 phys, u32 level)
diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c
index c9a13e487187..34c5feed9dc1 100644
--- a/arch/arm64/kvm/mmu.c
+++ b/arch/arm64/kvm/mmu.c
@@ -92,9 +92,13 @@ static bool kvm_is_device_pfn(unsigned long pfn)
static void *stage2_memcache_zalloc_page(void *arg)
{
struct kvm_mmu_memory_cache *mc = arg;
+ void *virt;
/* Allocated with __GFP_ZERO, so no need to zero */
- return kvm_mmu_memory_cache_alloc(mc);
+ virt = kvm_mmu_memory_cache_alloc(mc);
+ if (virt)
+ kvm_account_pgtable_pages(virt, 1);
+ return virt;
}
static void *kvm_host_zalloc_pages_exact(size_t size)
@@ -102,6 +106,21 @@ static void *kvm_host_zalloc_pages_exact(size_t size)
return alloc_pages_exact(size, GFP_KERNEL_ACCOUNT | __GFP_ZERO);
}
+static void *kvm_s2_zalloc_pages_exact(size_t size)
+{
+ void *virt = kvm_host_zalloc_pages_exact(size);
+
+ if (virt)
+ kvm_account_pgtable_pages(virt, (size >> PAGE_SHIFT));
+ return virt;
+}
+
+static void kvm_s2_free_pages_exact(void *virt, size_t size)
+{
+ kvm_account_pgtable_pages(virt, -(size >> PAGE_SHIFT));
+ free_pages_exact(virt, size);
+}
+
static void kvm_host_get_page(void *addr)
{
get_page(virt_to_page(addr));
@@ -112,6 +131,15 @@ static void kvm_host_put_page(void *addr)
put_page(virt_to_page(addr));
}
+static void kvm_s2_put_page(void *addr)
+{
+ struct page *p = virt_to_page(addr);
+ /* Dropping last refcount, the page will be freed */
+ if (page_count(p) == 1)
+ kvm_account_pgtable_pages(addr, -1);
+ put_page(p);
+}
+
static int kvm_host_page_count(void *addr)
{
return page_count(virt_to_page(addr));
@@ -625,10 +653,10 @@ static int get_user_mapping_size(struct kvm *kvm, u64 addr)
static struct kvm_pgtable_mm_ops kvm_s2_mm_ops = {
.zalloc_page = stage2_memcache_zalloc_page,
- .zalloc_pages_exact = kvm_host_zalloc_pages_exact,
- .free_pages_exact = free_pages_exact,
+ .zalloc_pages_exact = kvm_s2_zalloc_pages_exact,
+ .free_pages_exact = kvm_s2_free_pages_exact,
.get_page = kvm_host_get_page,
- .put_page = kvm_host_put_page,
+ .put_page = kvm_s2_put_page,
.page_count = kvm_host_page_count,
.phys_to_virt = kvm_host_va,
.virt_to_phys = kvm_host_pa,
diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c
index 11c43bed5f97..0003c7d37533 100644
--- a/arch/arm64/kvm/pmu-emul.c
+++ b/arch/arm64/kvm/pmu-emul.c
@@ -33,12 +33,12 @@ static u32 kvm_pmu_event_mask(struct kvm *kvm)
pmuver = kvm->arch.arm_pmu->pmuver;
switch (pmuver) {
- case ID_AA64DFR0_PMUVER_8_0:
+ case ID_AA64DFR0_EL1_PMUVer_IMP:
return GENMASK(9, 0);
- case ID_AA64DFR0_PMUVER_8_1:
- case ID_AA64DFR0_PMUVER_8_4:
- case ID_AA64DFR0_PMUVER_8_5:
- case ID_AA64DFR0_PMUVER_8_7:
+ case ID_AA64DFR0_EL1_PMUVer_V3P1:
+ case ID_AA64DFR0_EL1_PMUVer_V3P4:
+ case ID_AA64DFR0_EL1_PMUVer_V3P5:
+ case ID_AA64DFR0_EL1_PMUVer_V3P7:
return GENMASK(15, 0);
default: /* Shouldn't be here, just for sanity */
WARN_ONCE(1, "Unknown PMU version %d\n", pmuver);
@@ -774,7 +774,7 @@ void kvm_host_pmu_init(struct arm_pmu *pmu)
{
struct arm_pmu_entry *entry;
- if (pmu->pmuver == 0 || pmu->pmuver == ID_AA64DFR0_PMUVER_IMP_DEF)
+ if (pmu->pmuver == 0 || pmu->pmuver == ID_AA64DFR0_EL1_PMUVer_IMP_DEF)
return;
mutex_lock(&arm_pmus_lock);
@@ -828,7 +828,7 @@ static struct arm_pmu *kvm_pmu_probe_armpmu(void)
if (event->pmu) {
pmu = to_arm_pmu(event->pmu);
if (pmu->pmuver == 0 ||
- pmu->pmuver == ID_AA64DFR0_PMUVER_IMP_DEF)
+ pmu->pmuver == ID_AA64DFR0_EL1_PMUVer_IMP_DEF)
pmu = NULL;
}
@@ -856,7 +856,7 @@ u64 kvm_pmu_get_pmceid(struct kvm_vcpu *vcpu, bool pmceid1)
* Don't advertise STALL_SLOT, as PMMIR_EL0 is handled
* as RAZ
*/
- if (vcpu->kvm->arch.arm_pmu->pmuver >= ID_AA64DFR0_PMUVER_8_4)
+ if (vcpu->kvm->arch.arm_pmu->pmuver >= ID_AA64DFR0_EL1_PMUVer_V3P4)
val &= ~BIT_ULL(ARMV8_PMUV3_PERFCTR_STALL_SLOT - 32);
base = 32;
}
diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c
index 0e08fbe68715..5ae18472205a 100644
--- a/arch/arm64/kvm/reset.c
+++ b/arch/arm64/kvm/reset.c
@@ -359,7 +359,7 @@ int kvm_set_ipa_limit(void)
mmfr0 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1);
parange = cpuid_feature_extract_unsigned_field(mmfr0,
- ID_AA64MMFR0_PARANGE_SHIFT);
+ ID_AA64MMFR0_EL1_PARANGE_SHIFT);
/*
* IPA size beyond 48 bits could not be supported
* on either 4K or 16K page size. Hence let's cap
@@ -367,20 +367,20 @@ int kvm_set_ipa_limit(void)
* on the system.
*/
if (PAGE_SIZE != SZ_64K)
- parange = min(parange, (unsigned int)ID_AA64MMFR0_PARANGE_48);
+ parange = min(parange, (unsigned int)ID_AA64MMFR0_EL1_PARANGE_48);
/*
* Check with ARMv8.5-GTG that our PAGE_SIZE is supported at
* Stage-2. If not, things will stop very quickly.
*/
- switch (cpuid_feature_extract_unsigned_field(mmfr0, ID_AA64MMFR0_TGRAN_2_SHIFT)) {
- case ID_AA64MMFR0_TGRAN_2_SUPPORTED_NONE:
+ switch (cpuid_feature_extract_unsigned_field(mmfr0, ID_AA64MMFR0_EL1_TGRAN_2_SHIFT)) {
+ case ID_AA64MMFR0_EL1_TGRAN_2_SUPPORTED_NONE:
kvm_err("PAGE_SIZE not supported at Stage-2, giving up\n");
return -EINVAL;
- case ID_AA64MMFR0_TGRAN_2_SUPPORTED_DEFAULT:
+ case ID_AA64MMFR0_EL1_TGRAN_2_SUPPORTED_DEFAULT:
kvm_debug("PAGE_SIZE supported at Stage-2 (default)\n");
break;
- case ID_AA64MMFR0_TGRAN_2_SUPPORTED_MIN ... ID_AA64MMFR0_TGRAN_2_SUPPORTED_MAX:
+ case ID_AA64MMFR0_EL1_TGRAN_2_SUPPORTED_MIN ... ID_AA64MMFR0_EL1_TGRAN_2_SUPPORTED_MAX:
kvm_debug("PAGE_SIZE supported at Stage-2 (advertised)\n");
break;
default:
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index 3234f50b8c4b..f4a7c5abcbca 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -273,7 +273,7 @@ static bool trap_loregion(struct kvm_vcpu *vcpu,
u64 val = read_sanitised_ftr_reg(SYS_ID_AA64MMFR1_EL1);
u32 sr = reg_to_encoding(r);
- if (!(val & (0xfUL << ID_AA64MMFR1_LOR_SHIFT))) {
+ if (!(val & (0xfUL << ID_AA64MMFR1_EL1_LO_SHIFT))) {
kvm_inject_undefined(vcpu);
return false;
}
@@ -1063,13 +1063,12 @@ static bool access_arch_timer(struct kvm_vcpu *vcpu,
}
/* Read a sanitised cpufeature ID register by sys_reg_desc */
-static u64 read_id_reg(const struct kvm_vcpu *vcpu,
- struct sys_reg_desc const *r, bool raz)
+static u64 read_id_reg(const struct kvm_vcpu *vcpu, struct sys_reg_desc const *r)
{
u32 id = reg_to_encoding(r);
u64 val;
- if (raz)
+ if (sysreg_visible_as_raz(vcpu, r))
return 0;
val = read_sanitised_ftr_reg(id);
@@ -1077,22 +1076,22 @@ static u64 read_id_reg(const struct kvm_vcpu *vcpu,
switch (id) {
case SYS_ID_AA64PFR0_EL1:
if (!vcpu_has_sve(vcpu))
- val &= ~ARM64_FEATURE_MASK(ID_AA64PFR0_SVE);
- val &= ~ARM64_FEATURE_MASK(ID_AA64PFR0_AMU);
- val &= ~ARM64_FEATURE_MASK(ID_AA64PFR0_CSV2);
- val |= FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_CSV2), (u64)vcpu->kvm->arch.pfr0_csv2);
- val &= ~ARM64_FEATURE_MASK(ID_AA64PFR0_CSV3);
- val |= FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_CSV3), (u64)vcpu->kvm->arch.pfr0_csv3);
+ val &= ~ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_SVE);
+ val &= ~ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_AMU);
+ val &= ~ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_CSV2);
+ val |= FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_CSV2), (u64)vcpu->kvm->arch.pfr0_csv2);
+ val &= ~ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_CSV3);
+ val |= FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_CSV3), (u64)vcpu->kvm->arch.pfr0_csv3);
if (kvm_vgic_global_state.type == VGIC_V3) {
- val &= ~ARM64_FEATURE_MASK(ID_AA64PFR0_GIC);
- val |= FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_GIC), 1);
+ val &= ~ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_GIC);
+ val |= FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_GIC), 1);
}
break;
case SYS_ID_AA64PFR1_EL1:
if (!kvm_has_mte(vcpu->kvm))
- val &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_MTE);
+ val &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_MTE);
- val &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_SME);
+ val &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_SME);
break;
case SYS_ID_AA64ISAR1_EL1:
if (!vcpu_has_ptrauth(vcpu))
@@ -1110,14 +1109,14 @@ static u64 read_id_reg(const struct kvm_vcpu *vcpu,
break;
case SYS_ID_AA64DFR0_EL1:
/* Limit debug to ARMv8.0 */
- val &= ~ARM64_FEATURE_MASK(ID_AA64DFR0_DEBUGVER);
- val |= FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64DFR0_DEBUGVER), 6);
+ val &= ~ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_DebugVer);
+ val |= FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_DebugVer), 6);
/* Limit guests to PMUv3 for ARMv8.4 */
val = cpuid_feature_cap_perfmon_field(val,
- ID_AA64DFR0_PMUVER_SHIFT,
- kvm_vcpu_has_pmu(vcpu) ? ID_AA64DFR0_PMUVER_8_4 : 0);
+ ID_AA64DFR0_EL1_PMUVer_SHIFT,
+ kvm_vcpu_has_pmu(vcpu) ? ID_AA64DFR0_EL1_PMUVer_V3P4 : 0);
/* Hide SPE from guests */
- val &= ~ARM64_FEATURE_MASK(ID_AA64DFR0_PMSVER);
+ val &= ~ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_PMSVer);
break;
case SYS_ID_DFR0_EL1:
/* Limit guests to PMUv3 for ARMv8.4 */
@@ -1145,34 +1144,37 @@ static unsigned int id_visibility(const struct kvm_vcpu *vcpu,
return 0;
}
-/* cpufeature ID register access trap handlers */
-
-static bool __access_id_reg(struct kvm_vcpu *vcpu,
- struct sys_reg_params *p,
- const struct sys_reg_desc *r,
- bool raz)
+static unsigned int aa32_id_visibility(const struct kvm_vcpu *vcpu,
+ const struct sys_reg_desc *r)
{
- if (p->is_write)
- return write_to_read_only(vcpu, p, r);
+ /*
+ * AArch32 ID registers are UNKNOWN if AArch32 isn't implemented at any
+ * EL. Promote to RAZ/WI in order to guarantee consistency between
+ * systems.
+ */
+ if (!kvm_supports_32bit_el0())
+ return REG_RAZ | REG_USER_WI;
- p->regval = read_id_reg(vcpu, r, raz);
- return true;
+ return id_visibility(vcpu, r);
}
+static unsigned int raz_visibility(const struct kvm_vcpu *vcpu,
+ const struct sys_reg_desc *r)
+{
+ return REG_RAZ;
+}
+
+/* cpufeature ID register access trap handlers */
+
static bool access_id_reg(struct kvm_vcpu *vcpu,
struct sys_reg_params *p,
const struct sys_reg_desc *r)
{
- bool raz = sysreg_visible_as_raz(vcpu, r);
-
- return __access_id_reg(vcpu, p, r, raz);
-}
+ if (p->is_write)
+ return write_to_read_only(vcpu, p, r);
-static bool access_raz_id_reg(struct kvm_vcpu *vcpu,
- struct sys_reg_params *p,
- const struct sys_reg_desc *r)
-{
- return __access_id_reg(vcpu, p, r, true);
+ p->regval = read_id_reg(vcpu, r);
+ return true;
}
/* Visibility overrides for SVE-specific control registers */
@@ -1196,21 +1198,21 @@ static int set_id_aa64pfr0_el1(struct kvm_vcpu *vcpu,
* it doesn't promise more than what is actually provided (the
* guest could otherwise be covered in ectoplasmic residue).
*/
- csv2 = cpuid_feature_extract_unsigned_field(val, ID_AA64PFR0_CSV2_SHIFT);
+ csv2 = cpuid_feature_extract_unsigned_field(val, ID_AA64PFR0_EL1_CSV2_SHIFT);
if (csv2 > 1 ||
(csv2 && arm64_get_spectre_v2_state() != SPECTRE_UNAFFECTED))
return -EINVAL;
/* Same thing for CSV3 */
- csv3 = cpuid_feature_extract_unsigned_field(val, ID_AA64PFR0_CSV3_SHIFT);
+ csv3 = cpuid_feature_extract_unsigned_field(val, ID_AA64PFR0_EL1_CSV3_SHIFT);
if (csv3 > 1 ||
(csv3 && arm64_get_meltdown_state() != SPECTRE_UNAFFECTED))
return -EINVAL;
/* We can only differ with CSV[23], and anything else is an error */
- val ^= read_id_reg(vcpu, rd, false);
- val &= ~((0xFUL << ID_AA64PFR0_CSV2_SHIFT) |
- (0xFUL << ID_AA64PFR0_CSV3_SHIFT));
+ val ^= read_id_reg(vcpu, rd);
+ val &= ~(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_CSV2) |
+ ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_CSV3));
if (val)
return -EINVAL;
@@ -1227,45 +1229,21 @@ static int set_id_aa64pfr0_el1(struct kvm_vcpu *vcpu,
* are stored, and for set_id_reg() we don't allow the effective value
* to be changed.
*/
-static int __get_id_reg(const struct kvm_vcpu *vcpu,
- const struct sys_reg_desc *rd, u64 *val,
- bool raz)
-{
- *val = read_id_reg(vcpu, rd, raz);
- return 0;
-}
-
-static int __set_id_reg(const struct kvm_vcpu *vcpu,
- const struct sys_reg_desc *rd, u64 val,
- bool raz)
-{
- /* This is what we mean by invariant: you can't change it. */
- if (val != read_id_reg(vcpu, rd, raz))
- return -EINVAL;
-
- return 0;
-}
-
static int get_id_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
u64 *val)
{
- bool raz = sysreg_visible_as_raz(vcpu, rd);
-
- return __get_id_reg(vcpu, rd, val, raz);
+ *val = read_id_reg(vcpu, rd);
+ return 0;
}
static int set_id_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
u64 val)
{
- bool raz = sysreg_visible_as_raz(vcpu, rd);
-
- return __set_id_reg(vcpu, rd, val, raz);
-}
+ /* This is what we mean by invariant: you can't change it. */
+ if (val != read_id_reg(vcpu, rd))
+ return -EINVAL;
-static int set_raz_id_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
- u64 val)
-{
- return __set_id_reg(vcpu, rd, val, true);
+ return 0;
}
static int get_raz_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
@@ -1367,6 +1345,15 @@ static unsigned int mte_visibility(const struct kvm_vcpu *vcpu,
.visibility = id_visibility, \
}
+/* sys_reg_desc initialiser for known cpufeature ID registers */
+#define AA32_ID_SANITISED(name) { \
+ SYS_DESC(SYS_##name), \
+ .access = access_id_reg, \
+ .get_user = get_id_reg, \
+ .set_user = set_id_reg, \
+ .visibility = aa32_id_visibility, \
+}
+
/*
* sys_reg_desc initialiser for architecturally unallocated cpufeature ID
* register with encoding Op0=3, Op1=0, CRn=0, CRm=crm, Op2=op2
@@ -1374,9 +1361,10 @@ static unsigned int mte_visibility(const struct kvm_vcpu *vcpu,
*/
#define ID_UNALLOCATED(crm, op2) { \
Op0(3), Op1(0), CRn(0), CRm(crm), Op2(op2), \
- .access = access_raz_id_reg, \
- .get_user = get_raz_reg, \
- .set_user = set_raz_id_reg, \
+ .access = access_id_reg, \
+ .get_user = get_id_reg, \
+ .set_user = set_id_reg, \
+ .visibility = raz_visibility \
}
/*
@@ -1386,9 +1374,10 @@ static unsigned int mte_visibility(const struct kvm_vcpu *vcpu,
*/
#define ID_HIDDEN(name) { \
SYS_DESC(SYS_##name), \
- .access = access_raz_id_reg, \
- .get_user = get_raz_reg, \
- .set_user = set_raz_id_reg, \
+ .access = access_id_reg, \
+ .get_user = get_id_reg, \
+ .set_user = set_id_reg, \
+ .visibility = raz_visibility, \
}
/*
@@ -1452,33 +1441,33 @@ static const struct sys_reg_desc sys_reg_descs[] = {
/* AArch64 mappings of the AArch32 ID registers */
/* CRm=1 */
- ID_SANITISED(ID_PFR0_EL1),
- ID_SANITISED(ID_PFR1_EL1),
- ID_SANITISED(ID_DFR0_EL1),
+ AA32_ID_SANITISED(ID_PFR0_EL1),
+ AA32_ID_SANITISED(ID_PFR1_EL1),
+ AA32_ID_SANITISED(ID_DFR0_EL1),
ID_HIDDEN(ID_AFR0_EL1),
- ID_SANITISED(ID_MMFR0_EL1),
- ID_SANITISED(ID_MMFR1_EL1),
- ID_SANITISED(ID_MMFR2_EL1),
- ID_SANITISED(ID_MMFR3_EL1),
+ AA32_ID_SANITISED(ID_MMFR0_EL1),
+ AA32_ID_SANITISED(ID_MMFR1_EL1),
+ AA32_ID_SANITISED(ID_MMFR2_EL1),
+ AA32_ID_SANITISED(ID_MMFR3_EL1),
/* CRm=2 */
- ID_SANITISED(ID_ISAR0_EL1),
- ID_SANITISED(ID_ISAR1_EL1),
- ID_SANITISED(ID_ISAR2_EL1),
- ID_SANITISED(ID_ISAR3_EL1),
- ID_SANITISED(ID_ISAR4_EL1),
- ID_SANITISED(ID_ISAR5_EL1),
- ID_SANITISED(ID_MMFR4_EL1),
- ID_SANITISED(ID_ISAR6_EL1),
+ AA32_ID_SANITISED(ID_ISAR0_EL1),
+ AA32_ID_SANITISED(ID_ISAR1_EL1),
+ AA32_ID_SANITISED(ID_ISAR2_EL1),
+ AA32_ID_SANITISED(ID_ISAR3_EL1),
+ AA32_ID_SANITISED(ID_ISAR4_EL1),
+ AA32_ID_SANITISED(ID_ISAR5_EL1),
+ AA32_ID_SANITISED(ID_MMFR4_EL1),
+ AA32_ID_SANITISED(ID_ISAR6_EL1),
/* CRm=3 */
- ID_SANITISED(MVFR0_EL1),
- ID_SANITISED(MVFR1_EL1),
- ID_SANITISED(MVFR2_EL1),
+ AA32_ID_SANITISED(MVFR0_EL1),
+ AA32_ID_SANITISED(MVFR1_EL1),
+ AA32_ID_SANITISED(MVFR2_EL1),
ID_UNALLOCATED(3,3),
- ID_SANITISED(ID_PFR2_EL1),
+ AA32_ID_SANITISED(ID_PFR2_EL1),
ID_HIDDEN(ID_DFR1_EL1),
- ID_SANITISED(ID_MMFR5_EL1),
+ AA32_ID_SANITISED(ID_MMFR5_EL1),
ID_UNALLOCATED(3,7),
/* AArch64 ID registers */
@@ -1825,11 +1814,11 @@ static bool trap_dbgdidr(struct kvm_vcpu *vcpu,
} else {
u64 dfr = read_sanitised_ftr_reg(SYS_ID_AA64DFR0_EL1);
u64 pfr = read_sanitised_ftr_reg(SYS_ID_AA64PFR0_EL1);
- u32 el3 = !!cpuid_feature_extract_unsigned_field(pfr, ID_AA64PFR0_EL3_SHIFT);
+ u32 el3 = !!cpuid_feature_extract_unsigned_field(pfr, ID_AA64PFR0_EL1_EL3_SHIFT);
- p->regval = ((((dfr >> ID_AA64DFR0_WRPS_SHIFT) & 0xf) << 28) |
- (((dfr >> ID_AA64DFR0_BRPS_SHIFT) & 0xf) << 24) |
- (((dfr >> ID_AA64DFR0_CTX_CMPS_SHIFT) & 0xf) << 20)
+ p->regval = ((((dfr >> ID_AA64DFR0_EL1_WRPs_SHIFT) & 0xf) << 28) |
+ (((dfr >> ID_AA64DFR0_EL1_BRPs_SHIFT) & 0xf) << 24) |
+ (((dfr >> ID_AA64DFR0_EL1_CTX_CMPs_SHIFT) & 0xf) << 20)
| (6 << 16) | (1 << 15) | (el3 << 14) | (el3 << 12));
return true;
}
@@ -2809,6 +2798,9 @@ int kvm_sys_reg_set_user(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg,
if (!r)
return -ENOENT;
+ if (sysreg_user_write_ignore(vcpu, r))
+ return 0;
+
if (r->set_user) {
ret = (r->set_user)(vcpu, r, val);
} else {
diff --git a/arch/arm64/kvm/sys_regs.h b/arch/arm64/kvm/sys_regs.h
index a8c4cc32eb9a..e4ebb3a379fd 100644
--- a/arch/arm64/kvm/sys_regs.h
+++ b/arch/arm64/kvm/sys_regs.h
@@ -86,6 +86,7 @@ struct sys_reg_desc {
#define REG_HIDDEN (1 << 0) /* hidden from userspace and guest */
#define REG_RAZ (1 << 1) /* RAZ from userspace and guest */
+#define REG_USER_WI (1 << 2) /* WI from userspace only */
static __printf(2, 3)
inline void print_sys_reg_msg(const struct sys_reg_params *p,
@@ -136,22 +137,31 @@ static inline void reset_val(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r
__vcpu_sys_reg(vcpu, r->reg) = r->val;
}
-static inline bool sysreg_hidden(const struct kvm_vcpu *vcpu,
- const struct sys_reg_desc *r)
+static inline unsigned int sysreg_visibility(const struct kvm_vcpu *vcpu,
+ const struct sys_reg_desc *r)
{
if (likely(!r->visibility))
- return false;
+ return 0;
- return r->visibility(vcpu, r) & REG_HIDDEN;
+ return r->visibility(vcpu, r);
+}
+
+static inline bool sysreg_hidden(const struct kvm_vcpu *vcpu,
+ const struct sys_reg_desc *r)
+{
+ return sysreg_visibility(vcpu, r) & REG_HIDDEN;
}
static inline bool sysreg_visible_as_raz(const struct kvm_vcpu *vcpu,
const struct sys_reg_desc *r)
{
- if (likely(!r->visibility))
- return false;
+ return sysreg_visibility(vcpu, r) & REG_RAZ;
+}
- return r->visibility(vcpu, r) & REG_RAZ;
+static inline bool sysreg_user_write_ignore(const struct kvm_vcpu *vcpu,
+ const struct sys_reg_desc *r)
+{
+ return sysreg_visibility(vcpu, r) & REG_USER_WI;
}
static inline int cmp_sys_reg(const struct sys_reg_desc *i1,
diff --git a/arch/arm64/kvm/vgic/vgic-its.c b/arch/arm64/kvm/vgic/vgic-its.c
index 9d3299a70242..24d7778d1ce6 100644
--- a/arch/arm64/kvm/vgic/vgic-its.c
+++ b/arch/arm64/kvm/vgic/vgic-its.c
@@ -406,7 +406,7 @@ static void update_affinity_collection(struct kvm *kvm, struct vgic_its *its,
struct its_ite *ite;
for_each_lpi_its(device, ite, its) {
- if (!ite->collection || coll != ite->collection)
+ if (ite->collection != coll)
continue;
update_affinity_ite(kvm, ite);
diff --git a/arch/arm64/mm/context.c b/arch/arm64/mm/context.c
index b8b4cf0bcf39..e1e0dca01839 100644
--- a/arch/arm64/mm/context.c
+++ b/arch/arm64/mm/context.c
@@ -43,17 +43,17 @@ static u32 get_cpu_asid_bits(void)
{
u32 asid;
int fld = cpuid_feature_extract_unsigned_field(read_cpuid(ID_AA64MMFR0_EL1),
- ID_AA64MMFR0_ASID_SHIFT);
+ ID_AA64MMFR0_EL1_ASIDBITS_SHIFT);
switch (fld) {
default:
pr_warn("CPU%d: Unknown ASID size (%d); assuming 8-bit\n",
smp_processor_id(), fld);
fallthrough;
- case ID_AA64MMFR0_ASID_8:
+ case ID_AA64MMFR0_EL1_ASIDBITS_8:
asid = 8;
break;
- case ID_AA64MMFR0_ASID_16:
+ case ID_AA64MMFR0_EL1_ASIDBITS_16:
asid = 16;
}
diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
index b9af30be813e..4b4651ee47f2 100644
--- a/arch/arm64/mm/init.c
+++ b/arch/arm64/mm/init.c
@@ -360,7 +360,7 @@ void __init arm64_memblock_init(void)
extern u16 memstart_offset_seed;
u64 mmfr0 = read_cpuid(ID_AA64MMFR0_EL1);
int parange = cpuid_feature_extract_unsigned_field(
- mmfr0, ID_AA64MMFR0_PARANGE_SHIFT);
+ mmfr0, ID_AA64MMFR0_EL1_PARANGE_SHIFT);
s64 range = linear_region_size -
BIT(id_aa64mmfr0_parange_to_phys_shift(parange));
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index eb489302c28a..ddfb3fb18b30 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -690,7 +690,7 @@ static bool arm64_early_this_cpu_has_bti(void)
pfr1 = __read_sysreg_by_encoding(SYS_ID_AA64PFR1_EL1);
return cpuid_feature_extract_unsigned_field(pfr1,
- ID_AA64PFR1_BT_SHIFT);
+ ID_AA64PFR1_EL1_BT_SHIFT);
}
/*
diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
index 7837a69524c5..5f7784ee6044 100644
--- a/arch/arm64/mm/proc.S
+++ b/arch/arm64/mm/proc.S
@@ -434,8 +434,8 @@ SYM_FUNC_START(__cpu_setup)
* (ID_AA64PFR1_EL1[11:8] > 1).
*/
mrs x10, ID_AA64PFR1_EL1
- ubfx x10, x10, #ID_AA64PFR1_MTE_SHIFT, #4
- cmp x10, #ID_AA64PFR1_MTE
+ ubfx x10, x10, #ID_AA64PFR1_EL1_MTE_SHIFT, #4
+ cmp x10, #ID_AA64PFR1_EL1_MTE_MTE2
b.lt 1f
/* Normal Tagged memory type at the corresponding MAIR index */
diff --git a/arch/arm64/tools/sysreg b/arch/arm64/tools/sysreg
index 9ae483ec1e56..7f1fb36f208c 100644
--- a/arch/arm64/tools/sysreg
+++ b/arch/arm64/tools/sysreg
@@ -46,6 +46,127 @@
# feature that introduces them (eg, FEAT_LS64_ACCDATA introduces enumeration
# item ACCDATA) though it may be more taseful to do something else.
+Sysreg ID_AA64PFR0_EL1 3 0 0 4 0
+Enum 63:60 CSV3
+ 0b0000 NI
+ 0b0001 IMP
+EndEnum
+Enum 59:56 CSV2
+ 0b0000 NI
+ 0b0001 IMP
+ 0b0010 CSV2_2
+ 0b0011 CSV2_3
+EndEnum
+Enum 55:52 RME
+ 0b0000 NI
+ 0b0001 IMP
+EndEnum
+Enum 51:48 DIT
+ 0b0000 NI
+ 0b0001 IMP
+EndEnum
+Enum 47:44 AMU
+ 0b0000 NI
+ 0b0001 IMP
+ 0b0010 V1P1
+EndEnum
+Enum 43:40 MPAM
+ 0b0000 0
+ 0b0001 1
+EndEnum
+Enum 39:36 SEL2
+ 0b0000 NI
+ 0b0001 IMP
+EndEnum
+Enum 35:32 SVE
+ 0b0000 NI
+ 0b0001 IMP
+EndEnum
+Enum 31:28 RAS
+ 0b0000 NI
+ 0b0001 IMP
+ 0b0010 V1P1
+EndEnum
+Enum 27:24 GIC
+ 0b0000 NI
+ 0b0001 IMP
+ 0b0010 V4P1
+EndEnum
+Enum 23:20 AdvSIMD
+ 0b0000 IMP
+ 0b0001 FP16
+ 0b1111 NI
+EndEnum
+Enum 19:16 FP
+ 0b0000 IMP
+ 0b0001 FP16
+ 0b1111 NI
+EndEnum
+Enum 15:12 EL3
+ 0b0000 NI
+ 0b0001 IMP
+ 0b0010 AARCH32
+EndEnum
+Enum 11:8 EL2
+ 0b0000 NI
+ 0b0001 IMP
+ 0b0010 AARCH32
+EndEnum
+Enum 7:4 EL1
+ 0b0001 IMP
+ 0b0010 AARCH32
+EndEnum
+Enum 3:0 EL0
+ 0b0001 IMP
+ 0b0010 AARCH32
+EndEnum
+EndSysreg
+
+Sysreg ID_AA64PFR1_EL1 3 0 0 4 1
+Res0 63:40
+Enum 39:36 NMI
+ 0b0000 NI
+ 0b0001 IMP
+EndEnum
+Enum 35:32 CSV2_frac
+ 0b0000 NI
+ 0b0001 CSV2_1p1
+ 0b0010 CSV2_1p2
+EndEnum
+Enum 31:28 RNDR_trap
+ 0b0000 NI
+ 0b0001 IMP
+EndEnum
+Enum 27:24 SME
+ 0b0000 NI
+ 0b0001 IMP
+EndEnum
+Res0 23:20
+Enum 19:16 MPAM_frac
+ 0b0000 MINOR_0
+ 0b0001 MINOR_1
+EndEnum
+Enum 15:12 RAS_frac
+ 0b0000 NI
+ 0b0001 RASv1p1
+EndEnum
+Enum 11:8 MTE
+ 0b0000 NI
+ 0b0001 IMP
+ 0b0010 MTE2
+ 0b0011 MTE3
+EndEnum
+Enum 7:4 SSBS
+ 0b0000 NI
+ 0b0001 IMP
+ 0b0010 SSBS2
+EndEnum
+Enum 3:0 BT
+ 0b0000 NI
+ 0b0001 IMP
+EndEnum
+EndSysreg
+
Sysreg ID_AA64ZFR0_EL1 3 0 0 4 4
Res0 63:60
Enum 59:56 F64MM
@@ -98,7 +219,9 @@ Enum 63 FA64
0b1 IMP
EndEnum
Res0 62:60
-Field 59:56 SMEver
+Enum 59:56 SMEver
+ 0b0000 IMP
+EndEnum
Enum 55:52 I16I64
0b0000 NI
0b1111 IMP
@@ -129,6 +252,89 @@ EndEnum
Res0 31:0
EndSysreg
+Sysreg ID_AA64DFR0_EL1 3 0 0 5 0
+Enum 63:60 HPMN0
+ 0b0000 UNPREDICTABLE
+ 0b0001 DEF
+EndEnum
+Res0 59:56
+Enum 55:52 BRBE
+ 0b0000 NI
+ 0b0001 IMP
+ 0b0010 BRBE_V1P1
+EndEnum
+Enum 51:48 MTPMU
+ 0b0000 NI_IMPDEF
+ 0b0001 IMP
+ 0b1111 NI
+EndEnum
+Enum 47:44 TraceBuffer
+ 0b0000 NI
+ 0b0001 IMP
+EndEnum
+Enum 43:40 TraceFilt
+ 0b0000 NI
+ 0b0001 IMP
+EndEnum
+Enum 39:36 DoubleLock
+ 0b0000 IMP
+ 0b1111 NI
+EndEnum
+Enum 35:32 PMSVer
+ 0b0000 NI
+ 0b0001 IMP
+ 0b0010 V1P1
+ 0b0011 V1P2
+ 0b0100 V1P3
+EndEnum
+Field 31:28 CTX_CMPs
+Res0 27:24
+Field 23:20 WRPs
+Res0 19:16
+Field 15:12 BRPs
+Enum 11:8 PMUVer
+ 0b0000 NI
+ 0b0001 IMP
+ 0b0100 V3P1
+ 0b0101 V3P4
+ 0b0110 V3P5
+ 0b0111 V3P7
+ 0b1000 V3P8
+ 0b1111 IMP_DEF
+EndEnum
+Enum 7:4 TraceVer
+ 0b0000 NI
+ 0b0001 IMP
+EndEnum
+Enum 3:0 DebugVer
+ 0b0110 IMP
+ 0b0111 VHE
+ 0b1000 V8P2
+ 0b1001 V8P4
+ 0b1010 V8P8
+EndEnum
+EndSysreg
+
+Sysreg ID_AA64DFR1_EL1 3 0 0 5 1
+Res0 63:0
+EndSysreg
+
+Sysreg ID_AA64AFR0_EL1 3 0 0 5 4
+Res0 63:32
+Field 31:28 IMPDEF7
+Field 27:24 IMPDEF6
+Field 23:20 IMPDEF5
+Field 19:16 IMPDEF4
+Field 15:12 IMPDEF3
+Field 11:8 IMPDEF2
+Field 7:4 IMPDEF1
+Field 3:0 IMPDEF0
+EndSysreg
+
+Sysreg ID_AA64AFR1_EL1 3 0 0 5 5
+Res0 63:0
+EndSysreg
+
Sysreg ID_AA64ISAR0_EL1 3 0 0 6 0
Enum 63:60 RNDR
0b0000 NI
@@ -313,6 +519,217 @@ Enum 3:0 WFxT
EndEnum
EndSysreg
+Sysreg ID_AA64MMFR0_EL1 3 0 0 7 0
+Enum 63:60 ECV
+ 0b0000 NI
+ 0b0001 IMP
+ 0b0010 CNTPOFF
+EndEnum
+Enum 59:56 FGT
+ 0b0000 NI
+ 0b0001 IMP
+EndEnum
+Res0 55:48
+Enum 47:44 EXS
+ 0b0000 NI
+ 0b0001 IMP
+EndEnum
+Enum 43:40 TGRAN4_2
+ 0b0000 TGRAN4
+ 0b0001 NI
+ 0b0010 IMP
+ 0b0011 52_BIT
+EndEnum
+Enum 39:36 TGRAN64_2
+ 0b0000 TGRAN64
+ 0b0001 NI
+ 0b0010 IMP
+EndEnum
+Enum 35:32 TGRAN16_2
+ 0b0000 TGRAN16
+ 0b0001 NI
+ 0b0010 IMP
+ 0b0011 52_BIT
+EndEnum
+Enum 31:28 TGRAN4
+ 0b0000 IMP
+ 0b0001 52_BIT
+ 0b1111 NI
+EndEnum
+Enum 27:24 TGRAN64
+ 0b0000 IMP
+ 0b1111 NI
+EndEnum
+Enum 23:20 TGRAN16
+ 0b0000 NI
+ 0b0001 IMP
+ 0b0010 52_BIT
+EndEnum
+Enum 19:16 BIGENDEL0
+ 0b0000 NI
+ 0b0001 IMP
+EndEnum
+Enum 15:12 SNSMEM
+ 0b0000 NI
+ 0b0001 IMP
+EndEnum
+Enum 11:8 BIGEND
+ 0b0000 NI
+ 0b0001 IMP
+EndEnum
+Enum 7:4 ASIDBITS
+ 0b0000 8
+ 0b0010 16
+EndEnum
+Enum 3:0 PARANGE
+ 0b0000 32
+ 0b0001 36
+ 0b0010 40
+ 0b0011 42
+ 0b0100 44
+ 0b0101 48
+ 0b0110 52
+EndEnum
+EndSysreg
+
+Sysreg ID_AA64MMFR1_EL1 3 0 0 7 1
+Enum 63:60 ECBHB
+ 0b0000 NI
+ 0b0001 IMP
+EndEnum
+Enum 59:56 CMOW
+ 0b0000 NI
+ 0b0001 IMP
+EndEnum
+Enum 55:52 TIDCP1
+ 0b0000 NI
+ 0b0001 IMP
+EndEnum
+Enum 51:48 nTLBPA
+ 0b0000 NI
+ 0b0001 IMP
+EndEnum
+Enum 47:44 AFP
+ 0b0000 NI
+ 0b0001 IMP
+EndEnum
+Enum 43:40 HCX
+ 0b0000 NI
+ 0b0001 IMP
+EndEnum
+Enum 39:36 ETS
+ 0b0000 NI
+ 0b0001 IMP
+EndEnum
+Enum 35:32 TWED
+ 0b0000 NI
+ 0b0001 IMP
+EndEnum
+Enum 31:28 XNX
+ 0b0000 NI
+ 0b0001 IMP
+EndEnum
+Enum 27:24 SpecSEI
+ 0b0000 NI
+ 0b0001 IMP
+EndEnum
+Enum 23:20 PAN
+ 0b0000 NI
+ 0b0001 IMP
+ 0b0010 PAN2
+ 0b0011 PAN3
+EndEnum
+Enum 19:16 LO
+ 0b0000 NI
+ 0b0001 IMP
+EndEnum
+Enum 15:12 HPDS
+ 0b0000 NI
+ 0b0001 IMP
+ 0b0010 HPDS2
+EndEnum
+Enum 11:8 VH
+ 0b0000 NI
+ 0b0001 IMP
+EndEnum
+Enum 7:4 VMIDBits
+ 0b0000 8
+ 0b0010 16
+EndEnum
+Enum 3:0 HAFDBS
+ 0b0000 NI
+ 0b0001 AF
+ 0b0010 DBM
+EndEnum
+EndSysreg
+
+Sysreg ID_AA64MMFR2_EL1 3 0 0 7 2
+Enum 63:60 E0PD
+ 0b0000 NI
+ 0b0001 IMP
+EndEnum
+Enum 59:56 EVT
+ 0b0000 NI
+ 0b0001 IMP
+ 0b0010 TTLBxS
+EndEnum
+Enum 55:52 BBM
+ 0b0000 0
+ 0b0001 1
+ 0b0010 2
+EndEnum
+Enum 51:48 TTL
+ 0b0000 NI
+ 0b0001 IMP
+EndEnum
+Res0 47:44
+Enum 43:40 FWB
+ 0b0000 NI
+ 0b0001 IMP
+EndEnum
+Enum 39:36 IDS
+ 0b0000 0x0
+ 0b0001 0x18
+EndEnum
+Enum 35:32 AT
+ 0b0000 NI
+ 0b0001 IMP
+EndEnum
+Enum 31:28 ST
+ 0b0000 39
+ 0b0001 48_47
+EndEnum
+Enum 27:24 NV
+ 0b0000 NI
+ 0b0001 IMP
+ 0b0010 NV2
+EndEnum
+Enum 23:20 CCIDX
+ 0b0000 32
+ 0b0001 64
+EndEnum
+Enum 19:16 VARange
+ 0b0000 48
+ 0b0001 52
+EndEnum
+Enum 15:12 IESB
+ 0b0000 NI
+ 0b0001 IMP
+EndEnum
+Enum 11:8 LSM
+ 0b0000 NI
+ 0b0001 IMP
+EndEnum
+Enum 7:4 UAO
+ 0b0000 NI
+ 0b0001 IMP
+EndEnum
+Enum 3:0 CnP
+ 0b0000 NI
+ 0b0001 IMP
+EndEnum
+EndSysreg
+
Sysreg SCTLR_EL1 3 0 1 0 0
Field 63 TIDCP
Field 62 SPINMASK
@@ -427,6 +844,12 @@ Sysreg SMCR_EL1 3 0 1 2 6
Fields SMCR_ELx
EndSysreg
+Sysreg ALLINT 3 0 4 3 0
+Res0 63:14
+Field 13 ALLINT
+Res0 12:0
+EndSysreg
+
Sysreg FAR_EL1 3 0 6 0 0
Field 63:0 ADDR
EndSysreg
@@ -440,6 +863,14 @@ Sysreg CONTEXTIDR_EL1 3 0 13 0 1
Fields CONTEXTIDR_ELx
EndSysreg
+Sysreg TPIDR_EL1 3 0 13 0 4
+Field 63:0 ThreadID
+EndSysreg
+
+Sysreg SCXTNUM_EL1 3 0 13 0 7
+Field 63:0 SoftwareContextNumber
+EndSysreg
+
Sysreg CLIDR_EL1 3 1 0 0 1
Res0 63:47
Field 46:33 Ttypen
@@ -514,6 +945,22 @@ Sysreg ZCR_EL2 3 4 1 2 0
Fields ZCR_ELx
EndSysreg
+Sysreg HCRX_EL2 3 4 1 2 2
+Res0 63:12
+Field 11 MSCEn
+Field 10 MCE2
+Field 9 CMOW
+Field 8 VFNMI
+Field 7 VINMI
+Field 6 TALLINT
+Field 5 SMPME
+Field 4 FGTnXS
+Field 3 FnXS
+Field 2 EnASR
+Field 1 EnALS
+Field 0 EnAS0
+EndSysreg
+
Sysreg SMPRIMAP_EL2 3 4 1 2 5
Field 63:60 P15
Field 59:56 P14
diff --git a/arch/mips/kvm/emulate.c b/arch/mips/kvm/emulate.c
index b494d8d39290..edaec93a1a1f 100644
--- a/arch/mips/kvm/emulate.c
+++ b/arch/mips/kvm/emulate.c
@@ -955,13 +955,11 @@ enum emulation_result kvm_mips_emul_wait(struct kvm_vcpu *vcpu)
kvm_vcpu_halt(vcpu);
/*
- * We we are runnable, then definitely go off to user space to
+ * We are runnable, then definitely go off to user space to
* check if any I/O interrupts are pending.
*/
- if (kvm_check_request(KVM_REQ_UNHALT, vcpu)) {
- kvm_clear_request(KVM_REQ_UNHALT, vcpu);
+ if (kvm_arch_vcpu_runnable(vcpu))
vcpu->run->exit_reason = KVM_EXIT_IRQ_WINDOW_OPEN;
- }
}
return EMULATE_DONE;
diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c
index d6abed6e51e6..9fc4dd8f66eb 100644
--- a/arch/powerpc/kvm/book3s_pr.c
+++ b/arch/powerpc/kvm/book3s_pr.c
@@ -499,7 +499,6 @@ static void kvmppc_set_msr_pr(struct kvm_vcpu *vcpu, u64 msr)
if (msr & MSR_POW) {
if (!vcpu->arch.pending_exceptions) {
kvm_vcpu_halt(vcpu);
- kvm_clear_request(KVM_REQ_UNHALT, vcpu);
vcpu->stat.generic.halt_wakeup++;
/* Unset POW bit after we woke up */
diff --git a/arch/powerpc/kvm/book3s_pr_papr.c b/arch/powerpc/kvm/book3s_pr_papr.c
index a1f2978b2a86..b2c89e850d7a 100644
--- a/arch/powerpc/kvm/book3s_pr_papr.c
+++ b/arch/powerpc/kvm/book3s_pr_papr.c
@@ -393,7 +393,6 @@ int kvmppc_h_pr(struct kvm_vcpu *vcpu, unsigned long cmd)
case H_CEDE:
kvmppc_set_msr_fast(vcpu, kvmppc_get_msr(vcpu) | MSR_EE);
kvm_vcpu_halt(vcpu);
- kvm_clear_request(KVM_REQ_UNHALT, vcpu);
vcpu->stat.generic.halt_wakeup++;
return EMULATE_DONE;
case H_LOGICAL_CI_LOAD:
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index 06c5830a93f9..7b4920e9fd26 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -719,7 +719,6 @@ int kvmppc_core_prepare_to_enter(struct kvm_vcpu *vcpu)
if (vcpu->arch.shared->msr & MSR_WE) {
local_irq_enable();
kvm_vcpu_halt(vcpu);
- kvm_clear_request(KVM_REQ_UNHALT, vcpu);
hard_irq_disable();
kvmppc_set_exit_type(vcpu, EMULATED_MTMSRWE_EXITS);
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index fb1490761c87..ec9c1e3c2ff4 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -239,7 +239,6 @@ int kvmppc_kvm_pv(struct kvm_vcpu *vcpu)
case EV_HCALL_TOKEN(EV_IDLE):
r = EV_SUCCESS;
kvm_vcpu_halt(vcpu);
- kvm_clear_request(KVM_REQ_UNHALT, vcpu);
break;
default:
r = EV_UNIMPLEMENTED;
diff --git a/arch/riscv/kvm/vcpu_insn.c b/arch/riscv/kvm/vcpu_insn.c
index 7eb90a47b571..0bb52761a3f7 100644
--- a/arch/riscv/kvm/vcpu_insn.c
+++ b/arch/riscv/kvm/vcpu_insn.c
@@ -191,7 +191,6 @@ void kvm_riscv_vcpu_wfi(struct kvm_vcpu *vcpu)
kvm_vcpu_srcu_read_unlock(vcpu);
kvm_vcpu_halt(vcpu);
kvm_vcpu_srcu_read_lock(vcpu);
- kvm_clear_request(KVM_REQ_UNHALT, vcpu);
}
}
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index b7ef0b71014d..45d4b8182b07 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -4343,8 +4343,6 @@ retry:
goto retry;
}
- /* nothing to do, just clear the request */
- kvm_clear_request(KVM_REQ_UNHALT, vcpu);
/* we left the vsie handler, nothing to do, just clear the request */
kvm_clear_request(KVM_REQ_VSIE_RESTART, vcpu);
diff --git a/arch/x86/include/asm/hyperv-tlfs.h b/arch/x86/include/asm/hyperv-tlfs.h
index 0a9407dc0859..3089ec352743 100644
--- a/arch/x86/include/asm/hyperv-tlfs.h
+++ b/arch/x86/include/asm/hyperv-tlfs.h
@@ -138,6 +138,9 @@
#define HV_X64_NESTED_GUEST_MAPPING_FLUSH BIT(18)
#define HV_X64_NESTED_MSR_BITMAP BIT(19)
+/* Nested features #2. These are HYPERV_CPUID_NESTED_FEATURES.EBX bits. */
+#define HV_X64_NESTED_EVMCS1_PERF_GLOBAL_CTRL BIT(0)
+
/*
* This is specific to AMD and specifies that enlightened TLB flush is
* supported. If guest opts in to this feature, ASID invalidations only
@@ -546,7 +549,7 @@ struct hv_enlightened_vmcs {
u64 guest_rip;
u32 hv_clean_fields;
- u32 hv_padding_32;
+ u32 padding32_1;
u32 hv_synthetic_controls;
struct {
u32 nested_flush_hypercall:1;
@@ -554,14 +557,25 @@ struct hv_enlightened_vmcs {
u32 reserved:30;
} __packed hv_enlightenments_control;
u32 hv_vp_id;
-
+ u32 padding32_2;
u64 hv_vm_id;
u64 partition_assist_page;
u64 padding64_4[4];
u64 guest_bndcfgs;
- u64 padding64_5[7];
+ u64 guest_ia32_perf_global_ctrl;
+ u64 guest_ia32_s_cet;
+ u64 guest_ssp;
+ u64 guest_ia32_int_ssp_table_addr;
+ u64 guest_ia32_lbr_ctl;
+ u64 padding64_5[2];
u64 xss_exit_bitmap;
- u64 padding64_6[7];
+ u64 encls_exiting_bitmap;
+ u64 host_ia32_perf_global_ctrl;
+ u64 tsc_multiplier;
+ u64 host_ia32_s_cet;
+ u64 host_ssp;
+ u64 host_ia32_int_ssp_table_addr;
+ u64 padding64_6;
} __packed;
#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE 0
diff --git a/arch/x86/include/asm/kvm-x86-ops.h b/arch/x86/include/asm/kvm-x86-ops.h
index 51f777071584..82ba4a564e58 100644
--- a/arch/x86/include/asm/kvm-x86-ops.h
+++ b/arch/x86/include/asm/kvm-x86-ops.h
@@ -67,7 +67,7 @@ KVM_X86_OP(get_interrupt_shadow)
KVM_X86_OP(patch_hypercall)
KVM_X86_OP(inject_irq)
KVM_X86_OP(inject_nmi)
-KVM_X86_OP(queue_exception)
+KVM_X86_OP(inject_exception)
KVM_X86_OP(cancel_injection)
KVM_X86_OP(interrupt_allowed)
KVM_X86_OP(nmi_allowed)
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index aa381ab69a19..7551b6f9c31c 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -615,6 +615,8 @@ struct kvm_vcpu_hv {
u32 enlightenments_eax; /* HYPERV_CPUID_ENLIGHTMENT_INFO.EAX */
u32 enlightenments_ebx; /* HYPERV_CPUID_ENLIGHTMENT_INFO.EBX */
u32 syndbg_cap_eax; /* HYPERV_CPUID_SYNDBG_PLATFORM_CAPABILITIES.EAX */
+ u32 nested_eax; /* HYPERV_CPUID_NESTED_FEATURES.EAX */
+ u32 nested_ebx; /* HYPERV_CPUID_NESTED_FEATURES.EBX */
} cpuid_cache;
};
@@ -639,6 +641,16 @@ struct kvm_vcpu_xen {
struct timer_list poll_timer;
};
+struct kvm_queued_exception {
+ bool pending;
+ bool injected;
+ bool has_error_code;
+ u8 vector;
+ u32 error_code;
+ unsigned long payload;
+ bool has_payload;
+};
+
struct kvm_vcpu_arch {
/*
* rip and regs accesses must go through
@@ -738,16 +750,12 @@ struct kvm_vcpu_arch {
u8 event_exit_inst_len;
- struct kvm_queued_exception {
- bool pending;
- bool injected;
- bool has_error_code;
- u8 nr;
- u32 error_code;
- unsigned long payload;
- bool has_payload;
- u8 nested_apf;
- } exception;
+ bool exception_from_userspace;
+
+ /* Exceptions to be injected to the guest. */
+ struct kvm_queued_exception exception;
+ /* Exception VM-Exits to be synthesized to L1. */
+ struct kvm_queued_exception exception_vmexit;
struct kvm_queued_interrupt {
bool injected;
@@ -858,7 +866,6 @@ struct kvm_vcpu_arch {
u32 id;
bool send_user_only;
u32 host_apf_flags;
- unsigned long nested_apf_token;
bool delivery_as_pf_vmexit;
bool pageready_pending;
} apf;
@@ -1273,8 +1280,8 @@ struct kvm_arch {
bool tdp_mmu_enabled;
/*
- * List of struct kvm_mmu_pages being used as roots.
- * All struct kvm_mmu_pages in the list should have
+ * List of kvm_mmu_page structs being used as roots.
+ * All kvm_mmu_page structs in the list should have
* tdp_mmu_page set.
*
* For reads, this list is protected by:
@@ -1293,8 +1300,8 @@ struct kvm_arch {
struct list_head tdp_mmu_roots;
/*
- * List of struct kvmp_mmu_pages not being used as roots.
- * All struct kvm_mmu_pages in the list should have
+ * List of kvm_mmu_page structs not being used as roots.
+ * All kvm_mmu_page structs in the list should have
* tdp_mmu_page set and a tdp_mmu_root_count of 0.
*/
struct list_head tdp_mmu_pages;
@@ -1304,9 +1311,9 @@ struct kvm_arch {
* is held in read mode:
* - tdp_mmu_roots (above)
* - tdp_mmu_pages (above)
- * - the link field of struct kvm_mmu_pages used by the TDP MMU
+ * - the link field of kvm_mmu_page structs used by the TDP MMU
* - lpage_disallowed_mmu_pages
- * - the lpage_disallowed_link field of struct kvm_mmu_pages used
+ * - the lpage_disallowed_link field of kvm_mmu_page structs used
* by the TDP MMU
* It is acceptable, but not necessary, to acquire this lock when
* the thread holds the MMU lock in write mode.
@@ -1524,7 +1531,7 @@ struct kvm_x86_ops {
unsigned char *hypercall_addr);
void (*inject_irq)(struct kvm_vcpu *vcpu, bool reinjected);
void (*inject_nmi)(struct kvm_vcpu *vcpu);
- void (*queue_exception)(struct kvm_vcpu *vcpu);
+ void (*inject_exception)(struct kvm_vcpu *vcpu);
void (*cancel_injection)(struct kvm_vcpu *vcpu);
int (*interrupt_allowed)(struct kvm_vcpu *vcpu, bool for_injection);
int (*nmi_allowed)(struct kvm_vcpu *vcpu, bool for_injection);
@@ -1634,10 +1641,10 @@ struct kvm_x86_ops {
struct kvm_x86_nested_ops {
void (*leave_nested)(struct kvm_vcpu *vcpu);
+ bool (*is_exception_vmexit)(struct kvm_vcpu *vcpu, u8 vector,
+ u32 error_code);
int (*check_events)(struct kvm_vcpu *vcpu);
- bool (*handle_page_fault_workaround)(struct kvm_vcpu *vcpu,
- struct x86_exception *fault);
- bool (*hv_timer_pending)(struct kvm_vcpu *vcpu);
+ bool (*has_events)(struct kvm_vcpu *vcpu);
void (*triple_fault)(struct kvm_vcpu *vcpu);
int (*get_state)(struct kvm_vcpu *vcpu,
struct kvm_nested_state __user *user_kvm_nested_state,
@@ -1863,7 +1870,7 @@ void kvm_queue_exception_p(struct kvm_vcpu *vcpu, unsigned nr, unsigned long pay
void kvm_requeue_exception(struct kvm_vcpu *vcpu, unsigned nr);
void kvm_requeue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code);
void kvm_inject_page_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault);
-bool kvm_inject_emulated_page_fault(struct kvm_vcpu *vcpu,
+void kvm_inject_emulated_page_fault(struct kvm_vcpu *vcpu,
struct x86_exception *fault);
bool kvm_require_cpl(struct kvm_vcpu *vcpu, int required_cpl);
bool kvm_require_dr(struct kvm_vcpu *vcpu, int dr);
diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
index c371ef695fcc..498dc600bd5c 100644
--- a/arch/x86/include/asm/vmx.h
+++ b/arch/x86/include/asm/vmx.h
@@ -309,7 +309,7 @@ enum vmcs_field {
GUEST_LDTR_AR_BYTES = 0x00004820,
GUEST_TR_AR_BYTES = 0x00004822,
GUEST_INTERRUPTIBILITY_INFO = 0x00004824,
- GUEST_ACTIVITY_STATE = 0X00004826,
+ GUEST_ACTIVITY_STATE = 0x00004826,
GUEST_SYSENTER_CS = 0x0000482A,
VMX_PREEMPTION_TIMER_VALUE = 0x0000482E,
HOST_IA32_SYSENTER_CS = 0x00004c00,
diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig
index e3cbd7706136..67be7f217e37 100644
--- a/arch/x86/kvm/Kconfig
+++ b/arch/x86/kvm/Kconfig
@@ -28,7 +28,8 @@ config KVM
select HAVE_KVM_IRQCHIP
select HAVE_KVM_PFNCACHE
select HAVE_KVM_IRQFD
- select HAVE_KVM_DIRTY_RING
+ select HAVE_KVM_DIRTY_RING_TSO
+ select HAVE_KVM_DIRTY_RING_ACQ_REL
select IRQ_BYPASS_MANAGER
select HAVE_KVM_IRQ_BYPASS
select HAVE_KVM_IRQ_ROUTING
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index 4c1c2c06e96b..7065462378e2 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -311,6 +311,15 @@ void kvm_update_cpuid_runtime(struct kvm_vcpu *vcpu)
}
EXPORT_SYMBOL_GPL(kvm_update_cpuid_runtime);
+static bool kvm_cpuid_has_hyperv(struct kvm_cpuid_entry2 *entries, int nent)
+{
+ struct kvm_cpuid_entry2 *entry;
+
+ entry = cpuid_entry2_find(entries, nent, HYPERV_CPUID_INTERFACE,
+ KVM_CPUID_INDEX_NOT_SIGNIFICANT);
+ return entry && entry->eax == HYPERV_CPUID_SIGNATURE_EAX;
+}
+
static void kvm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
{
struct kvm_lapic *apic = vcpu->arch.apic;
@@ -346,7 +355,8 @@ static void kvm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
vcpu->arch.cr4_guest_rsvd_bits =
__cr4_reserved_bits(guest_cpuid_has, vcpu);
- kvm_hv_set_cpuid(vcpu);
+ kvm_hv_set_cpuid(vcpu, kvm_cpuid_has_hyperv(vcpu->arch.cpuid_entries,
+ vcpu->arch.cpuid_nent));
/* Invoke the vendor callback only after the above state is updated. */
static_call(kvm_x86_vcpu_after_set_cpuid)(vcpu);
@@ -409,6 +419,12 @@ static int kvm_set_cpuid(struct kvm_vcpu *vcpu, struct kvm_cpuid_entry2 *e2,
return 0;
}
+ if (kvm_cpuid_has_hyperv(e2, nent)) {
+ r = kvm_hv_vcpu_init(vcpu);
+ if (r)
+ return r;
+ }
+
r = kvm_check_cpuid(vcpu, e2, nent);
if (r)
return r;
@@ -902,8 +918,6 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
entry->edx = 0;
}
break;
- case 9:
- break;
case 0xa: { /* Architectural Performance Monitoring */
union cpuid10_eax eax;
union cpuid10_edx edx;
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index aacb28c83e43..3b27622d4642 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -1137,9 +1137,11 @@ static int em_fnstsw(struct x86_emulate_ctxt *ctxt)
static void decode_register_operand(struct x86_emulate_ctxt *ctxt,
struct operand *op)
{
- unsigned reg = ctxt->modrm_reg;
+ unsigned int reg;
- if (!(ctxt->d & ModRM))
+ if (ctxt->d & ModRM)
+ reg = ctxt->modrm_reg;
+ else
reg = (ctxt->b & 7) | ((ctxt->rex_prefix & 1) << 3);
if (ctxt->d & Sse) {
@@ -1953,7 +1955,7 @@ static int em_pop_sreg(struct x86_emulate_ctxt *ctxt)
if (rc != X86EMUL_CONTINUE)
return rc;
- if (ctxt->modrm_reg == VCPU_SREG_SS)
+ if (seg == VCPU_SREG_SS)
ctxt->interruptibility = KVM_X86_SHADOW_INT_MOV_SS;
if (ctxt->op_bytes > 2)
rsp_increment(ctxt, ctxt->op_bytes - 2);
@@ -3645,13 +3647,10 @@ static int em_wrmsr(struct x86_emulate_ctxt *ctxt)
| ((u64)reg_read(ctxt, VCPU_REGS_RDX) << 32);
r = ctxt->ops->set_msr_with_filter(ctxt, msr_index, msr_data);
- if (r == X86EMUL_IO_NEEDED)
- return r;
-
- if (r > 0)
+ if (r == X86EMUL_PROPAGATE_FAULT)
return emulate_gp(ctxt, 0);
- return r < 0 ? X86EMUL_UNHANDLEABLE : X86EMUL_CONTINUE;
+ return r;
}
static int em_rdmsr(struct x86_emulate_ctxt *ctxt)
@@ -3662,15 +3661,14 @@ static int em_rdmsr(struct x86_emulate_ctxt *ctxt)
r = ctxt->ops->get_msr_with_filter(ctxt, msr_index, &msr_data);
- if (r == X86EMUL_IO_NEEDED)
- return r;
-
- if (r)
+ if (r == X86EMUL_PROPAGATE_FAULT)
return emulate_gp(ctxt, 0);
- *reg_write(ctxt, VCPU_REGS_RAX) = (u32)msr_data;
- *reg_write(ctxt, VCPU_REGS_RDX) = msr_data >> 32;
- return X86EMUL_CONTINUE;
+ if (r == X86EMUL_CONTINUE) {
+ *reg_write(ctxt, VCPU_REGS_RAX) = (u32)msr_data;
+ *reg_write(ctxt, VCPU_REGS_RDX) = msr_data >> 32;
+ }
+ return r;
}
static int em_store_sreg(struct x86_emulate_ctxt *ctxt, int segment)
@@ -4171,8 +4169,7 @@ static int check_dr7_gd(struct x86_emulate_ctxt *ctxt)
ctxt->ops->get_dr(ctxt, 7, &dr7);
- /* Check if DR7.Global_Enable is set */
- return dr7 & (1 << 13);
+ return dr7 & DR7_GD;
}
static int check_dr_read(struct x86_emulate_ctxt *ctxt)
diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
index ed804447589c..0adf4a437e85 100644
--- a/arch/x86/kvm/hyperv.c
+++ b/arch/x86/kvm/hyperv.c
@@ -38,9 +38,6 @@
#include "irq.h"
#include "fpu.h"
-/* "Hv#1" signature */
-#define HYPERV_CPUID_SIGNATURE_EAX 0x31237648
-
#define KVM_HV_MAX_SPARSE_VCPU_SET_BITS DIV_ROUND_UP(KVM_MAX_VCPUS, 64)
static void stimer_mark_pending(struct kvm_vcpu_hv_stimer *stimer,
@@ -934,11 +931,14 @@ static void stimer_init(struct kvm_vcpu_hv_stimer *stimer, int timer_index)
stimer_prepare_msg(stimer);
}
-static int kvm_hv_vcpu_init(struct kvm_vcpu *vcpu)
+int kvm_hv_vcpu_init(struct kvm_vcpu *vcpu)
{
- struct kvm_vcpu_hv *hv_vcpu;
+ struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu);
int i;
+ if (hv_vcpu)
+ return 0;
+
hv_vcpu = kzalloc(sizeof(struct kvm_vcpu_hv), GFP_KERNEL_ACCOUNT);
if (!hv_vcpu)
return -ENOMEM;
@@ -962,11 +962,9 @@ int kvm_hv_activate_synic(struct kvm_vcpu *vcpu, bool dont_zero_synic_pages)
struct kvm_vcpu_hv_synic *synic;
int r;
- if (!to_hv_vcpu(vcpu)) {
- r = kvm_hv_vcpu_init(vcpu);
- if (r)
- return r;
- }
+ r = kvm_hv_vcpu_init(vcpu);
+ if (r)
+ return r;
synic = to_hv_synic(vcpu);
@@ -1660,10 +1658,8 @@ int kvm_hv_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data, bool host)
if (!host && !vcpu->arch.hyperv_enabled)
return 1;
- if (!to_hv_vcpu(vcpu)) {
- if (kvm_hv_vcpu_init(vcpu))
- return 1;
- }
+ if (kvm_hv_vcpu_init(vcpu))
+ return 1;
if (kvm_hv_msr_partition_wide(msr)) {
int r;
@@ -1683,10 +1679,8 @@ int kvm_hv_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata, bool host)
if (!host && !vcpu->arch.hyperv_enabled)
return 1;
- if (!to_hv_vcpu(vcpu)) {
- if (kvm_hv_vcpu_init(vcpu))
- return 1;
- }
+ if (kvm_hv_vcpu_init(vcpu))
+ return 1;
if (kvm_hv_msr_partition_wide(msr)) {
int r;
@@ -1987,49 +1981,49 @@ ret_success:
return HV_STATUS_SUCCESS;
}
-void kvm_hv_set_cpuid(struct kvm_vcpu *vcpu)
+void kvm_hv_set_cpuid(struct kvm_vcpu *vcpu, bool hyperv_enabled)
{
+ struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu);
struct kvm_cpuid_entry2 *entry;
- struct kvm_vcpu_hv *hv_vcpu;
- entry = kvm_find_cpuid_entry(vcpu, HYPERV_CPUID_INTERFACE);
- if (entry && entry->eax == HYPERV_CPUID_SIGNATURE_EAX) {
- vcpu->arch.hyperv_enabled = true;
- } else {
- vcpu->arch.hyperv_enabled = false;
+ vcpu->arch.hyperv_enabled = hyperv_enabled;
+
+ if (!hv_vcpu) {
+ /*
+ * KVM should have already allocated kvm_vcpu_hv if Hyper-V is
+ * enabled in CPUID.
+ */
+ WARN_ON_ONCE(vcpu->arch.hyperv_enabled);
return;
}
- if (!to_hv_vcpu(vcpu) && kvm_hv_vcpu_init(vcpu))
- return;
+ memset(&hv_vcpu->cpuid_cache, 0, sizeof(hv_vcpu->cpuid_cache));
- hv_vcpu = to_hv_vcpu(vcpu);
+ if (!vcpu->arch.hyperv_enabled)
+ return;
entry = kvm_find_cpuid_entry(vcpu, HYPERV_CPUID_FEATURES);
if (entry) {
hv_vcpu->cpuid_cache.features_eax = entry->eax;
hv_vcpu->cpuid_cache.features_ebx = entry->ebx;
hv_vcpu->cpuid_cache.features_edx = entry->edx;
- } else {
- hv_vcpu->cpuid_cache.features_eax = 0;
- hv_vcpu->cpuid_cache.features_ebx = 0;
- hv_vcpu->cpuid_cache.features_edx = 0;
}
entry = kvm_find_cpuid_entry(vcpu, HYPERV_CPUID_ENLIGHTMENT_INFO);
if (entry) {
hv_vcpu->cpuid_cache.enlightenments_eax = entry->eax;
hv_vcpu->cpuid_cache.enlightenments_ebx = entry->ebx;
- } else {
- hv_vcpu->cpuid_cache.enlightenments_eax = 0;
- hv_vcpu->cpuid_cache.enlightenments_ebx = 0;
}
entry = kvm_find_cpuid_entry(vcpu, HYPERV_CPUID_SYNDBG_PLATFORM_CAPABILITIES);
if (entry)
hv_vcpu->cpuid_cache.syndbg_cap_eax = entry->eax;
- else
- hv_vcpu->cpuid_cache.syndbg_cap_eax = 0;
+
+ entry = kvm_find_cpuid_entry(vcpu, HYPERV_CPUID_NESTED_FEATURES);
+ if (entry) {
+ hv_vcpu->cpuid_cache.nested_eax = entry->eax;
+ hv_vcpu->cpuid_cache.nested_ebx = entry->ebx;
+ }
}
int kvm_hv_set_enforce_cpuid(struct kvm_vcpu *vcpu, bool enforce)
@@ -2552,7 +2546,7 @@ int kvm_get_hv_cpuid(struct kvm_vcpu *vcpu, struct kvm_cpuid2 *cpuid,
case HYPERV_CPUID_NESTED_FEATURES:
ent->eax = evmcs_ver;
ent->eax |= HV_X64_NESTED_MSR_BITMAP;
-
+ ent->ebx |= HV_X64_NESTED_EVMCS1_PERF_GLOBAL_CTRL;
break;
case HYPERV_CPUID_SYNDBG_VENDOR_AND_MAX_FUNCTIONS:
diff --git a/arch/x86/kvm/hyperv.h b/arch/x86/kvm/hyperv.h
index da2737f2a956..1030b1b50552 100644
--- a/arch/x86/kvm/hyperv.h
+++ b/arch/x86/kvm/hyperv.h
@@ -23,6 +23,9 @@
#include <linux/kvm_host.h>
+/* "Hv#1" signature */
+#define HYPERV_CPUID_SIGNATURE_EAX 0x31237648
+
/*
* The #defines related to the synthetic debugger are required by KDNet, but
* they are not documented in the Hyper-V TLFS because the synthetic debugger
@@ -141,7 +144,8 @@ void kvm_hv_request_tsc_page_update(struct kvm *kvm);
void kvm_hv_init_vm(struct kvm *kvm);
void kvm_hv_destroy_vm(struct kvm *kvm);
-void kvm_hv_set_cpuid(struct kvm_vcpu *vcpu);
+int kvm_hv_vcpu_init(struct kvm_vcpu *vcpu);
+void kvm_hv_set_cpuid(struct kvm_vcpu *vcpu, bool hyperv_enabled);
int kvm_hv_set_enforce_cpuid(struct kvm_vcpu *vcpu, bool enforce);
int kvm_vm_ioctl_hv_eventfd(struct kvm *kvm, struct kvm_hyperv_eventfd *args);
int kvm_get_hv_cpuid(struct kvm_vcpu *vcpu, struct kvm_cpuid2 *cpuid,
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 9dda989a1cf0..d7639d126e6c 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -3025,17 +3025,8 @@ int kvm_apic_accept_events(struct kvm_vcpu *vcpu)
struct kvm_lapic *apic = vcpu->arch.apic;
u8 sipi_vector;
int r;
- unsigned long pe;
- if (!lapic_in_kernel(vcpu))
- return 0;
-
- /*
- * Read pending events before calling the check_events
- * callback.
- */
- pe = smp_load_acquire(&apic->pending_events);
- if (!pe)
+ if (!kvm_apic_has_pending_init_or_sipi(vcpu))
return 0;
if (is_guest_mode(vcpu)) {
@@ -3043,38 +3034,31 @@ int kvm_apic_accept_events(struct kvm_vcpu *vcpu)
if (r < 0)
return r == -EBUSY ? 0 : r;
/*
- * If an event has happened and caused a vmexit,
- * we know INITs are latched and therefore
- * we will not incorrectly deliver an APIC
- * event instead of a vmexit.
+ * Continue processing INIT/SIPI even if a nested VM-Exit
+ * occurred, e.g. pending SIPIs should be dropped if INIT+SIPI
+ * are blocked as a result of transitioning to VMX root mode.
*/
}
/*
- * INITs are latched while CPU is in specific states
- * (SMM, VMX root mode, SVM with GIF=0).
- * Because a CPU cannot be in these states immediately
- * after it has processed an INIT signal (and thus in
- * KVM_MP_STATE_INIT_RECEIVED state), just eat SIPIs
- * and leave the INIT pending.
+ * INITs are blocked while CPU is in specific states (SMM, VMX root
+ * mode, SVM with GIF=0), while SIPIs are dropped if the CPU isn't in
+ * wait-for-SIPI (WFS).
*/
- if (kvm_vcpu_latch_init(vcpu)) {
+ if (!kvm_apic_init_sipi_allowed(vcpu)) {
WARN_ON_ONCE(vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED);
- if (test_bit(KVM_APIC_SIPI, &pe))
- clear_bit(KVM_APIC_SIPI, &apic->pending_events);
+ clear_bit(KVM_APIC_SIPI, &apic->pending_events);
return 0;
}
- if (test_bit(KVM_APIC_INIT, &pe)) {
- clear_bit(KVM_APIC_INIT, &apic->pending_events);
+ if (test_and_clear_bit(KVM_APIC_INIT, &apic->pending_events)) {
kvm_vcpu_reset(vcpu, true);
if (kvm_vcpu_is_bsp(apic->vcpu))
vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
else
vcpu->arch.mp_state = KVM_MP_STATE_INIT_RECEIVED;
}
- if (test_bit(KVM_APIC_SIPI, &pe)) {
- clear_bit(KVM_APIC_SIPI, &apic->pending_events);
+ if (test_and_clear_bit(KVM_APIC_SIPI, &apic->pending_events)) {
if (vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED) {
/* evaluate pending_events before reading the vector */
smp_rmb();
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
index 117a46df5cc1..a5ac4a5a5179 100644
--- a/arch/x86/kvm/lapic.h
+++ b/arch/x86/kvm/lapic.h
@@ -7,6 +7,7 @@
#include <linux/kvm_host.h>
#include "hyperv.h"
+#include "kvm_cache_regs.h"
#define KVM_APIC_INIT 0
#define KVM_APIC_SIPI 1
@@ -223,11 +224,17 @@ static inline bool kvm_vcpu_apicv_active(struct kvm_vcpu *vcpu)
return lapic_in_kernel(vcpu) && vcpu->arch.apic->apicv_active;
}
-static inline bool kvm_apic_has_events(struct kvm_vcpu *vcpu)
+static inline bool kvm_apic_has_pending_init_or_sipi(struct kvm_vcpu *vcpu)
{
return lapic_in_kernel(vcpu) && vcpu->arch.apic->pending_events;
}
+static inline bool kvm_apic_init_sipi_allowed(struct kvm_vcpu *vcpu)
+{
+ return !is_smm(vcpu) &&
+ !static_call(kvm_x86_apic_init_signal_blocked)(vcpu);
+}
+
static inline bool kvm_lowest_prio_delivery(struct kvm_lapic_irq *irq)
{
return (irq->delivery_mode == APIC_DM_LOWEST ||
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 3552e6af3684..6f81539061d6 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -1667,6 +1667,18 @@ static inline void kvm_mod_used_mmu_pages(struct kvm *kvm, long nr)
percpu_counter_add(&kvm_total_used_mmu_pages, nr);
}
+static void kvm_account_mmu_page(struct kvm *kvm, struct kvm_mmu_page *sp)
+{
+ kvm_mod_used_mmu_pages(kvm, +1);
+ kvm_account_pgtable_pages((void *)sp->spt, +1);
+}
+
+static void kvm_unaccount_mmu_page(struct kvm *kvm, struct kvm_mmu_page *sp)
+{
+ kvm_mod_used_mmu_pages(kvm, -1);
+ kvm_account_pgtable_pages((void *)sp->spt, -1);
+}
+
static void kvm_mmu_free_shadow_page(struct kvm_mmu_page *sp)
{
MMU_WARN_ON(!is_empty_shadow_page(sp->spt));
@@ -2124,7 +2136,7 @@ static struct kvm_mmu_page *kvm_mmu_alloc_shadow_page(struct kvm *kvm,
*/
sp->mmu_valid_gen = kvm->arch.mmu_valid_gen;
list_add(&sp->link, &kvm->arch.active_mmu_pages);
- kvm_mod_used_mmu_pages(kvm, +1);
+ kvm_account_mmu_page(kvm, sp);
sp->gfn = gfn;
sp->role = role;
@@ -2458,7 +2470,7 @@ static bool __kvm_mmu_prepare_zap_page(struct kvm *kvm,
list_add(&sp->link, invalid_list);
else
list_move(&sp->link, invalid_list);
- kvm_mod_used_mmu_pages(kvm, -1);
+ kvm_unaccount_mmu_page(kvm, sp);
} else {
/*
* Remove the active root from the active page list, the root
@@ -4292,7 +4304,7 @@ int kvm_handle_page_fault(struct kvm_vcpu *vcpu, u64 error_code,
vcpu->arch.l1tf_flush_l1d = true;
if (!flags) {
- trace_kvm_page_fault(fault_address, error_code);
+ trace_kvm_page_fault(vcpu, fault_address, error_code);
if (kvm_event_needs_reinjection(vcpu))
kvm_mmu_unprotect_page_virt(vcpu, fault_address);
@@ -6704,10 +6716,12 @@ int kvm_mmu_vendor_module_init(void)
ret = register_shrinker(&mmu_shrinker, "x86-mmu");
if (ret)
- goto out;
+ goto out_shrinker;
return 0;
+out_shrinker:
+ percpu_counter_destroy(&kvm_total_used_mmu_pages);
out:
mmu_destroy_caches();
return ret;
diff --git a/arch/x86/kvm/mmu/paging_tmpl.h b/arch/x86/kvm/mmu/paging_tmpl.h
index 39e0205e7300..5ab5f94dcb6f 100644
--- a/arch/x86/kvm/mmu/paging_tmpl.h
+++ b/arch/x86/kvm/mmu/paging_tmpl.h
@@ -472,7 +472,7 @@ error:
#if PTTYPE == PTTYPE_EPT
/*
- * Use PFERR_RSVD_MASK in error_code to to tell if EPT
+ * Use PFERR_RSVD_MASK in error_code to tell if EPT
* misconfiguration requires to be injected. The detection is
* done by is_rsvd_bits_set() above.
*
diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c
index bf2ccf9debca..672f0432d777 100644
--- a/arch/x86/kvm/mmu/tdp_mmu.c
+++ b/arch/x86/kvm/mmu/tdp_mmu.c
@@ -372,6 +372,16 @@ static void handle_changed_spte_dirty_log(struct kvm *kvm, int as_id, gfn_t gfn,
}
}
+static void tdp_account_mmu_page(struct kvm *kvm, struct kvm_mmu_page *sp)
+{
+ kvm_account_pgtable_pages((void *)sp->spt, +1);
+}
+
+static void tdp_unaccount_mmu_page(struct kvm *kvm, struct kvm_mmu_page *sp)
+{
+ kvm_account_pgtable_pages((void *)sp->spt, -1);
+}
+
/**
* tdp_mmu_unlink_sp() - Remove a shadow page from the list of used pages
*
@@ -384,6 +394,7 @@ static void handle_changed_spte_dirty_log(struct kvm *kvm, int as_id, gfn_t gfn,
static void tdp_mmu_unlink_sp(struct kvm *kvm, struct kvm_mmu_page *sp,
bool shared)
{
+ tdp_unaccount_mmu_page(kvm, sp);
if (shared)
spin_lock(&kvm->arch.tdp_mmu_pages_lock);
else
@@ -1132,6 +1143,7 @@ static int tdp_mmu_link_sp(struct kvm *kvm, struct tdp_iter *iter,
if (account_nx)
account_huge_nx_page(kvm, sp);
spin_unlock(&kvm->arch.tdp_mmu_pages_lock);
+ tdp_account_mmu_page(kvm, sp);
return 0;
}
diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c
index 02f9e4f245bd..d9b9a0f0db17 100644
--- a/arch/x86/kvm/pmu.c
+++ b/arch/x86/kvm/pmu.c
@@ -106,9 +106,19 @@ static inline void __kvm_perf_overflow(struct kvm_pmc *pmc, bool in_pmi)
return;
if (pmc->perf_event && pmc->perf_event->attr.precise_ip) {
- /* Indicate PEBS overflow PMI to guest. */
- skip_pmi = __test_and_set_bit(GLOBAL_STATUS_BUFFER_OVF_BIT,
- (unsigned long *)&pmu->global_status);
+ if (!in_pmi) {
+ /*
+ * TODO: KVM is currently _choosing_ to not generate records
+ * for emulated instructions, avoiding BUFFER_OVF PMI when
+ * there are no records. Strictly speaking, it should be done
+ * as well in the right context to improve sampling accuracy.
+ */
+ skip_pmi = true;
+ } else {
+ /* Indicate PEBS overflow PMI to guest. */
+ skip_pmi = __test_and_set_bit(GLOBAL_STATUS_BUFFER_OVF_BIT,
+ (unsigned long *)&pmu->global_status);
+ }
} else {
__set_bit(pmc->idx, (unsigned long *)&pmu->global_status);
}
@@ -227,8 +237,8 @@ static bool pmc_resume_counter(struct kvm_pmc *pmc)
get_sample_period(pmc, pmc->counter)))
return false;
- if (!test_bit(pmc->idx, (unsigned long *)&pmc_to_pmu(pmc)->pebs_enable) &&
- pmc->perf_event->attr.precise_ip)
+ if (test_bit(pmc->idx, (unsigned long *)&pmc_to_pmu(pmc)->pebs_enable) !=
+ (!!pmc->perf_event->attr.precise_ip))
return false;
/* reuse perf_event to serve as pmc_reprogram_counter() does*/
diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c
index 76dcc8a3e849..4c620999d230 100644
--- a/arch/x86/kvm/svm/nested.c
+++ b/arch/x86/kvm/svm/nested.c
@@ -55,28 +55,6 @@ static void nested_svm_inject_npf_exit(struct kvm_vcpu *vcpu,
nested_svm_vmexit(svm);
}
-static bool nested_svm_handle_page_fault_workaround(struct kvm_vcpu *vcpu,
- struct x86_exception *fault)
-{
- struct vcpu_svm *svm = to_svm(vcpu);
- struct vmcb *vmcb = svm->vmcb;
-
- WARN_ON(!is_guest_mode(vcpu));
-
- if (vmcb12_is_intercept(&svm->nested.ctl,
- INTERCEPT_EXCEPTION_OFFSET + PF_VECTOR) &&
- !WARN_ON_ONCE(svm->nested.nested_run_pending)) {
- vmcb->control.exit_code = SVM_EXIT_EXCP_BASE + PF_VECTOR;
- vmcb->control.exit_code_hi = 0;
- vmcb->control.exit_info_1 = fault->error_code;
- vmcb->control.exit_info_2 = fault->address;
- nested_svm_vmexit(svm);
- return true;
- }
-
- return false;
-}
-
static u64 nested_svm_get_tdp_pdptr(struct kvm_vcpu *vcpu, int index)
{
struct vcpu_svm *svm = to_svm(vcpu);
@@ -468,7 +446,7 @@ static void nested_save_pending_event_to_vmcb12(struct vcpu_svm *svm,
unsigned int nr;
if (vcpu->arch.exception.injected) {
- nr = vcpu->arch.exception.nr;
+ nr = vcpu->arch.exception.vector;
exit_int_info = nr | SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_EXEPT;
if (vcpu->arch.exception.has_error_code) {
@@ -781,11 +759,15 @@ int enter_svm_guest_mode(struct kvm_vcpu *vcpu, u64 vmcb12_gpa,
struct vcpu_svm *svm = to_svm(vcpu);
int ret;
- trace_kvm_nested_vmrun(svm->vmcb->save.rip, vmcb12_gpa,
- vmcb12->save.rip,
- vmcb12->control.int_ctl,
- vmcb12->control.event_inj,
- vmcb12->control.nested_ctl);
+ trace_kvm_nested_vmenter(svm->vmcb->save.rip,
+ vmcb12_gpa,
+ vmcb12->save.rip,
+ vmcb12->control.int_ctl,
+ vmcb12->control.event_inj,
+ vmcb12->control.nested_ctl,
+ vmcb12->control.nested_cr3,
+ vmcb12->save.cr3,
+ KVM_ISA_SVM);
trace_kvm_nested_intercepts(vmcb12->control.intercepts[INTERCEPT_CR] & 0xffff,
vmcb12->control.intercepts[INTERCEPT_CR] >> 16,
@@ -1304,44 +1286,46 @@ int nested_svm_check_permissions(struct kvm_vcpu *vcpu)
return 0;
}
-static bool nested_exit_on_exception(struct vcpu_svm *svm)
+static bool nested_svm_is_exception_vmexit(struct kvm_vcpu *vcpu, u8 vector,
+ u32 error_code)
{
- unsigned int nr = svm->vcpu.arch.exception.nr;
+ struct vcpu_svm *svm = to_svm(vcpu);
- return (svm->nested.ctl.intercepts[INTERCEPT_EXCEPTION] & BIT(nr));
+ return (svm->nested.ctl.intercepts[INTERCEPT_EXCEPTION] & BIT(vector));
}
-static void nested_svm_inject_exception_vmexit(struct vcpu_svm *svm)
+static void nested_svm_inject_exception_vmexit(struct kvm_vcpu *vcpu)
{
- unsigned int nr = svm->vcpu.arch.exception.nr;
+ struct kvm_queued_exception *ex = &vcpu->arch.exception_vmexit;
+ struct vcpu_svm *svm = to_svm(vcpu);
struct vmcb *vmcb = svm->vmcb;
- vmcb->control.exit_code = SVM_EXIT_EXCP_BASE + nr;
+ vmcb->control.exit_code = SVM_EXIT_EXCP_BASE + ex->vector;
vmcb->control.exit_code_hi = 0;
- if (svm->vcpu.arch.exception.has_error_code)
- vmcb->control.exit_info_1 = svm->vcpu.arch.exception.error_code;
+ if (ex->has_error_code)
+ vmcb->control.exit_info_1 = ex->error_code;
/*
* EXITINFO2 is undefined for all exception intercepts other
* than #PF.
*/
- if (nr == PF_VECTOR) {
- if (svm->vcpu.arch.exception.nested_apf)
- vmcb->control.exit_info_2 = svm->vcpu.arch.apf.nested_apf_token;
- else if (svm->vcpu.arch.exception.has_payload)
- vmcb->control.exit_info_2 = svm->vcpu.arch.exception.payload;
+ if (ex->vector == PF_VECTOR) {
+ if (ex->has_payload)
+ vmcb->control.exit_info_2 = ex->payload;
else
- vmcb->control.exit_info_2 = svm->vcpu.arch.cr2;
- } else if (nr == DB_VECTOR) {
- /* See inject_pending_event. */
- kvm_deliver_exception_payload(&svm->vcpu);
- if (svm->vcpu.arch.dr7 & DR7_GD) {
- svm->vcpu.arch.dr7 &= ~DR7_GD;
- kvm_update_dr7(&svm->vcpu);
+ vmcb->control.exit_info_2 = vcpu->arch.cr2;
+ } else if (ex->vector == DB_VECTOR) {
+ /* See kvm_check_and_inject_events(). */
+ kvm_deliver_exception_payload(vcpu, ex);
+
+ if (vcpu->arch.dr7 & DR7_GD) {
+ vcpu->arch.dr7 &= ~DR7_GD;
+ kvm_update_dr7(vcpu);
}
- } else
- WARN_ON(svm->vcpu.arch.exception.has_payload);
+ } else {
+ WARN_ON(ex->has_payload);
+ }
nested_svm_vmexit(svm);
}
@@ -1353,10 +1337,22 @@ static inline bool nested_exit_on_init(struct vcpu_svm *svm)
static int svm_check_nested_events(struct kvm_vcpu *vcpu)
{
- struct vcpu_svm *svm = to_svm(vcpu);
- bool block_nested_events =
- kvm_event_needs_reinjection(vcpu) || svm->nested.nested_run_pending;
struct kvm_lapic *apic = vcpu->arch.apic;
+ struct vcpu_svm *svm = to_svm(vcpu);
+ /*
+ * Only a pending nested run blocks a pending exception. If there is a
+ * previously injected event, the pending exception occurred while said
+ * event was being delivered and thus needs to be handled.
+ */
+ bool block_nested_exceptions = svm->nested.nested_run_pending;
+ /*
+ * New events (not exceptions) are only recognized at instruction
+ * boundaries. If an event needs reinjection, then KVM is handling a
+ * VM-Exit that occurred _during_ instruction execution; new events are
+ * blocked until the instruction completes.
+ */
+ bool block_nested_events = block_nested_exceptions ||
+ kvm_event_needs_reinjection(vcpu);
if (lapic_in_kernel(vcpu) &&
test_bit(KVM_APIC_INIT, &apic->pending_events)) {
@@ -1368,18 +1364,16 @@ static int svm_check_nested_events(struct kvm_vcpu *vcpu)
return 0;
}
- if (vcpu->arch.exception.pending) {
- /*
- * Only a pending nested run can block a pending exception.
- * Otherwise an injected NMI/interrupt should either be
- * lost or delivered to the nested hypervisor in the EXITINTINFO
- * vmcb field, while delivering the pending exception.
- */
- if (svm->nested.nested_run_pending)
+ if (vcpu->arch.exception_vmexit.pending) {
+ if (block_nested_exceptions)
return -EBUSY;
- if (!nested_exit_on_exception(svm))
- return 0;
- nested_svm_inject_exception_vmexit(svm);
+ nested_svm_inject_exception_vmexit(vcpu);
+ return 0;
+ }
+
+ if (vcpu->arch.exception.pending) {
+ if (block_nested_exceptions)
+ return -EBUSY;
return 0;
}
@@ -1720,8 +1714,8 @@ static bool svm_get_nested_state_pages(struct kvm_vcpu *vcpu)
struct kvm_x86_nested_ops svm_nested_ops = {
.leave_nested = svm_leave_nested,
+ .is_exception_vmexit = nested_svm_is_exception_vmexit,
.check_events = svm_check_nested_events,
- .handle_page_fault_workaround = nested_svm_handle_page_fault_workaround,
.triple_fault = nested_svm_triple_fault,
.get_nested_state_pages = svm_get_nested_state_pages,
.get_state = svm_get_nested_state,
diff --git a/arch/x86/kvm/svm/pmu.c b/arch/x86/kvm/svm/pmu.c
index f24613a108c5..b68956299fa8 100644
--- a/arch/x86/kvm/svm/pmu.c
+++ b/arch/x86/kvm/svm/pmu.c
@@ -23,107 +23,52 @@ enum pmu_type {
PMU_TYPE_EVNTSEL,
};
-enum index {
- INDEX_ZERO = 0,
- INDEX_ONE,
- INDEX_TWO,
- INDEX_THREE,
- INDEX_FOUR,
- INDEX_FIVE,
- INDEX_ERROR,
-};
-
-static unsigned int get_msr_base(struct kvm_pmu *pmu, enum pmu_type type)
+static struct kvm_pmc *amd_pmc_idx_to_pmc(struct kvm_pmu *pmu, int pmc_idx)
{
- struct kvm_vcpu *vcpu = pmu_to_vcpu(pmu);
+ unsigned int num_counters = pmu->nr_arch_gp_counters;
- if (guest_cpuid_has(vcpu, X86_FEATURE_PERFCTR_CORE)) {
- if (type == PMU_TYPE_COUNTER)
- return MSR_F15H_PERF_CTR;
- else
- return MSR_F15H_PERF_CTL;
- } else {
- if (type == PMU_TYPE_COUNTER)
- return MSR_K7_PERFCTR0;
- else
- return MSR_K7_EVNTSEL0;
- }
-}
+ if (pmc_idx >= num_counters)
+ return NULL;
-static enum index msr_to_index(u32 msr)
-{
- switch (msr) {
- case MSR_F15H_PERF_CTL0:
- case MSR_F15H_PERF_CTR0:
- case MSR_K7_EVNTSEL0:
- case MSR_K7_PERFCTR0:
- return INDEX_ZERO;
- case MSR_F15H_PERF_CTL1:
- case MSR_F15H_PERF_CTR1:
- case MSR_K7_EVNTSEL1:
- case MSR_K7_PERFCTR1:
- return INDEX_ONE;
- case MSR_F15H_PERF_CTL2:
- case MSR_F15H_PERF_CTR2:
- case MSR_K7_EVNTSEL2:
- case MSR_K7_PERFCTR2:
- return INDEX_TWO;
- case MSR_F15H_PERF_CTL3:
- case MSR_F15H_PERF_CTR3:
- case MSR_K7_EVNTSEL3:
- case MSR_K7_PERFCTR3:
- return INDEX_THREE;
- case MSR_F15H_PERF_CTL4:
- case MSR_F15H_PERF_CTR4:
- return INDEX_FOUR;
- case MSR_F15H_PERF_CTL5:
- case MSR_F15H_PERF_CTR5:
- return INDEX_FIVE;
- default:
- return INDEX_ERROR;
- }
+ return &pmu->gp_counters[array_index_nospec(pmc_idx, num_counters)];
}
static inline struct kvm_pmc *get_gp_pmc_amd(struct kvm_pmu *pmu, u32 msr,
enum pmu_type type)
{
struct kvm_vcpu *vcpu = pmu_to_vcpu(pmu);
+ unsigned int idx;
if (!vcpu->kvm->arch.enable_pmu)
return NULL;
switch (msr) {
- case MSR_F15H_PERF_CTL0:
- case MSR_F15H_PERF_CTL1:
- case MSR_F15H_PERF_CTL2:
- case MSR_F15H_PERF_CTL3:
- case MSR_F15H_PERF_CTL4:
- case MSR_F15H_PERF_CTL5:
+ case MSR_F15H_PERF_CTL0 ... MSR_F15H_PERF_CTR5:
if (!guest_cpuid_has(vcpu, X86_FEATURE_PERFCTR_CORE))
return NULL;
- fallthrough;
+ /*
+ * Each PMU counter has a pair of CTL and CTR MSRs. CTLn
+ * MSRs (accessed via EVNTSEL) are even, CTRn MSRs are odd.
+ */
+ idx = (unsigned int)((msr - MSR_F15H_PERF_CTL0) / 2);
+ if (!(msr & 0x1) != (type == PMU_TYPE_EVNTSEL))
+ return NULL;
+ break;
case MSR_K7_EVNTSEL0 ... MSR_K7_EVNTSEL3:
if (type != PMU_TYPE_EVNTSEL)
return NULL;
+ idx = msr - MSR_K7_EVNTSEL0;
break;
- case MSR_F15H_PERF_CTR0:
- case MSR_F15H_PERF_CTR1:
- case MSR_F15H_PERF_CTR2:
- case MSR_F15H_PERF_CTR3:
- case MSR_F15H_PERF_CTR4:
- case MSR_F15H_PERF_CTR5:
- if (!guest_cpuid_has(vcpu, X86_FEATURE_PERFCTR_CORE))
- return NULL;
- fallthrough;
case MSR_K7_PERFCTR0 ... MSR_K7_PERFCTR3:
if (type != PMU_TYPE_COUNTER)
return NULL;
+ idx = msr - MSR_K7_PERFCTR0;
break;
default:
return NULL;
}
- return &pmu->gp_counters[msr_to_index(msr)];
+ return amd_pmc_idx_to_pmc(pmu, idx);
}
static bool amd_hw_event_available(struct kvm_pmc *pmc)
@@ -139,22 +84,6 @@ static bool amd_pmc_is_enabled(struct kvm_pmc *pmc)
return true;
}
-static struct kvm_pmc *amd_pmc_idx_to_pmc(struct kvm_pmu *pmu, int pmc_idx)
-{
- unsigned int base = get_msr_base(pmu, PMU_TYPE_COUNTER);
- struct kvm_vcpu *vcpu = pmu_to_vcpu(pmu);
-
- if (guest_cpuid_has(vcpu, X86_FEATURE_PERFCTR_CORE)) {
- /*
- * The idx is contiguous. The MSRs are not. The counter MSRs
- * are interleaved with the event select MSRs.
- */
- pmc_idx *= 2;
- }
-
- return get_gp_pmc_amd(pmu, base + pmc_idx, PMU_TYPE_COUNTER);
-}
-
static bool amd_is_valid_rdpmc_ecx(struct kvm_vcpu *vcpu, unsigned int idx)
{
struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
@@ -168,15 +97,7 @@ static bool amd_is_valid_rdpmc_ecx(struct kvm_vcpu *vcpu, unsigned int idx)
static struct kvm_pmc *amd_rdpmc_ecx_to_pmc(struct kvm_vcpu *vcpu,
unsigned int idx, u64 *mask)
{
- struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
- struct kvm_pmc *counters;
-
- idx &= ~(3u << 30);
- if (idx >= pmu->nr_arch_gp_counters)
- return NULL;
- counters = pmu->gp_counters;
-
- return &counters[idx];
+ return amd_pmc_idx_to_pmc(vcpu_to_pmu(vcpu), idx & ~(3u << 30));
}
static bool amd_is_valid_msr(struct kvm_vcpu *vcpu, u32 msr)
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index f3813dbacb9f..58f0077d9357 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -461,24 +461,22 @@ static int svm_update_soft_interrupt_rip(struct kvm_vcpu *vcpu)
return 0;
}
-static void svm_queue_exception(struct kvm_vcpu *vcpu)
+static void svm_inject_exception(struct kvm_vcpu *vcpu)
{
+ struct kvm_queued_exception *ex = &vcpu->arch.exception;
struct vcpu_svm *svm = to_svm(vcpu);
- unsigned nr = vcpu->arch.exception.nr;
- bool has_error_code = vcpu->arch.exception.has_error_code;
- u32 error_code = vcpu->arch.exception.error_code;
- kvm_deliver_exception_payload(vcpu);
+ kvm_deliver_exception_payload(vcpu, ex);
- if (kvm_exception_is_soft(nr) &&
+ if (kvm_exception_is_soft(ex->vector) &&
svm_update_soft_interrupt_rip(vcpu))
return;
- svm->vmcb->control.event_inj = nr
+ svm->vmcb->control.event_inj = ex->vector
| SVM_EVTINJ_VALID
- | (has_error_code ? SVM_EVTINJ_VALID_ERR : 0)
+ | (ex->has_error_code ? SVM_EVTINJ_VALID_ERR : 0)
| SVM_EVTINJ_TYPE_EXEPT;
- svm->vmcb->control.event_inj_err = error_code;
+ svm->vmcb->control.event_inj_err = ex->error_code;
}
static void svm_init_erratum_383(void)
@@ -1975,7 +1973,7 @@ static int npf_interception(struct kvm_vcpu *vcpu)
u64 fault_address = svm->vmcb->control.exit_info_2;
u64 error_code = svm->vmcb->control.exit_info_1;
- trace_kvm_page_fault(fault_address, error_code);
+ trace_kvm_page_fault(vcpu, fault_address, error_code);
return kvm_mmu_page_fault(vcpu, fault_address, error_code,
static_cpu_has(X86_FEATURE_DECODEASSISTS) ?
svm->vmcb->control.insn_bytes : NULL,
@@ -2341,7 +2339,8 @@ void svm_set_gif(struct vcpu_svm *svm, bool value)
enable_gif(svm);
if (svm->vcpu.arch.smi_pending ||
svm->vcpu.arch.nmi_pending ||
- kvm_cpu_has_injectable_intr(&svm->vcpu))
+ kvm_cpu_has_injectable_intr(&svm->vcpu) ||
+ kvm_apic_has_pending_init_or_sipi(&svm->vcpu))
kvm_make_request(KVM_REQ_EVENT, &svm->vcpu);
} else {
disable_gif(svm);
@@ -3522,7 +3521,7 @@ void svm_complete_interrupt_delivery(struct kvm_vcpu *vcpu, int delivery_mode,
/* Note, this is called iff the local APIC is in-kernel. */
if (!READ_ONCE(vcpu->arch.apic->apicv_active)) {
- /* Process the interrupt via inject_pending_event */
+ /* Process the interrupt via kvm_check_and_inject_events(). */
kvm_make_request(KVM_REQ_EVENT, vcpu);
kvm_vcpu_kick(vcpu);
return;
@@ -4697,15 +4696,7 @@ static bool svm_apic_init_signal_blocked(struct kvm_vcpu *vcpu)
{
struct vcpu_svm *svm = to_svm(vcpu);
- /*
- * TODO: Last condition latch INIT signals on vCPU when
- * vCPU is in guest-mode and vmcb12 defines intercept on INIT.
- * To properly emulate the INIT intercept,
- * svm_check_nested_events() should call nested_svm_vmexit()
- * if an INIT signal is pending.
- */
- return !gif_set(svm) ||
- (vmcb_is_intercept(&svm->vmcb->control, INTERCEPT_INIT));
+ return !gif_set(svm);
}
static void svm_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector)
@@ -4798,7 +4789,7 @@ static struct kvm_x86_ops svm_x86_ops __initdata = {
.patch_hypercall = svm_patch_hypercall,
.inject_irq = svm_inject_irq,
.inject_nmi = svm_inject_nmi,
- .queue_exception = svm_queue_exception,
+ .inject_exception = svm_inject_exception,
.cancel_injection = svm_cancel_injection,
.interrupt_allowed = svm_interrupt_allowed,
.nmi_allowed = svm_nmi_allowed,
diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h
index 2120d7c060a9..bc25589ad588 100644
--- a/arch/x86/kvm/trace.h
+++ b/arch/x86/kvm/trace.h
@@ -394,20 +394,25 @@ TRACE_EVENT(kvm_inj_exception,
* Tracepoint for page fault.
*/
TRACE_EVENT(kvm_page_fault,
- TP_PROTO(unsigned long fault_address, unsigned int error_code),
- TP_ARGS(fault_address, error_code),
+ TP_PROTO(struct kvm_vcpu *vcpu, u64 fault_address, u64 error_code),
+ TP_ARGS(vcpu, fault_address, error_code),
TP_STRUCT__entry(
- __field( unsigned long, fault_address )
- __field( unsigned int, error_code )
+ __field( unsigned int, vcpu_id )
+ __field( unsigned long, guest_rip )
+ __field( u64, fault_address )
+ __field( u64, error_code )
),
TP_fast_assign(
+ __entry->vcpu_id = vcpu->vcpu_id;
+ __entry->guest_rip = kvm_rip_read(vcpu);
__entry->fault_address = fault_address;
__entry->error_code = error_code;
),
- TP_printk("address %lx error_code %x",
+ TP_printk("vcpu %u rip 0x%lx address 0x%016llx error_code 0x%llx",
+ __entry->vcpu_id, __entry->guest_rip,
__entry->fault_address, __entry->error_code)
);
@@ -589,10 +594,12 @@ TRACE_EVENT(kvm_pv_eoi,
/*
* Tracepoint for nested VMRUN
*/
-TRACE_EVENT(kvm_nested_vmrun,
+TRACE_EVENT(kvm_nested_vmenter,
TP_PROTO(__u64 rip, __u64 vmcb, __u64 nested_rip, __u32 int_ctl,
- __u32 event_inj, bool npt),
- TP_ARGS(rip, vmcb, nested_rip, int_ctl, event_inj, npt),
+ __u32 event_inj, bool tdp_enabled, __u64 guest_tdp_pgd,
+ __u64 guest_cr3, __u32 isa),
+ TP_ARGS(rip, vmcb, nested_rip, int_ctl, event_inj, tdp_enabled,
+ guest_tdp_pgd, guest_cr3, isa),
TP_STRUCT__entry(
__field( __u64, rip )
@@ -600,7 +607,9 @@ TRACE_EVENT(kvm_nested_vmrun,
__field( __u64, nested_rip )
__field( __u32, int_ctl )
__field( __u32, event_inj )
- __field( bool, npt )
+ __field( bool, tdp_enabled )
+ __field( __u64, guest_pgd )
+ __field( __u32, isa )
),
TP_fast_assign(
@@ -609,14 +618,24 @@ TRACE_EVENT(kvm_nested_vmrun,
__entry->nested_rip = nested_rip;
__entry->int_ctl = int_ctl;
__entry->event_inj = event_inj;
- __entry->npt = npt;
- ),
-
- TP_printk("rip: 0x%016llx vmcb: 0x%016llx nrip: 0x%016llx int_ctl: 0x%08x "
- "event_inj: 0x%08x npt: %s",
- __entry->rip, __entry->vmcb, __entry->nested_rip,
- __entry->int_ctl, __entry->event_inj,
- __entry->npt ? "on" : "off")
+ __entry->tdp_enabled = tdp_enabled;
+ __entry->guest_pgd = tdp_enabled ? guest_tdp_pgd : guest_cr3;
+ __entry->isa = isa;
+ ),
+
+ TP_printk("rip: 0x%016llx %s: 0x%016llx nested_rip: 0x%016llx "
+ "int_ctl: 0x%08x event_inj: 0x%08x nested_%s=%s %s: 0x%016llx",
+ __entry->rip,
+ __entry->isa == KVM_ISA_VMX ? "vmcs" : "vmcb",
+ __entry->vmcb,
+ __entry->nested_rip,
+ __entry->int_ctl,
+ __entry->event_inj,
+ __entry->isa == KVM_ISA_VMX ? "ept" : "npt",
+ __entry->tdp_enabled ? "y" : "n",
+ !__entry->tdp_enabled ? "guest_cr3" :
+ __entry->isa == KVM_ISA_VMX ? "nested_eptp" : "nested_cr3",
+ __entry->guest_pgd)
);
TRACE_EVENT(kvm_nested_intercepts,
diff --git a/arch/x86/kvm/vmx/capabilities.h b/arch/x86/kvm/vmx/capabilities.h
index c5e5dfef69c7..87c4e46daf37 100644
--- a/arch/x86/kvm/vmx/capabilities.h
+++ b/arch/x86/kvm/vmx/capabilities.h
@@ -65,6 +65,7 @@ struct vmcs_config {
u64 cpu_based_3rd_exec_ctrl;
u32 vmexit_ctrl;
u32 vmentry_ctrl;
+ u64 misc;
struct nested_vmx_msrs nested;
};
extern struct vmcs_config vmcs_config;
@@ -82,7 +83,8 @@ static inline bool cpu_has_vmx_basic_inout(void)
static inline bool cpu_has_virtual_nmis(void)
{
- return vmcs_config.pin_based_exec_ctrl & PIN_BASED_VIRTUAL_NMIS;
+ return vmcs_config.pin_based_exec_ctrl & PIN_BASED_VIRTUAL_NMIS &&
+ vmcs_config.cpu_based_exec_ctrl & CPU_BASED_NMI_WINDOW_EXITING;
}
static inline bool cpu_has_vmx_preemption_timer(void)
@@ -224,11 +226,8 @@ static inline bool cpu_has_vmx_vmfunc(void)
static inline bool cpu_has_vmx_shadow_vmcs(void)
{
- u64 vmx_msr;
-
/* check if the cpu supports writing r/o exit information fields */
- rdmsrl(MSR_IA32_VMX_MISC, vmx_msr);
- if (!(vmx_msr & MSR_IA32_VMX_MISC_VMWRITE_SHADOW_RO_FIELDS))
+ if (!(vmcs_config.misc & MSR_IA32_VMX_MISC_VMWRITE_SHADOW_RO_FIELDS))
return false;
return vmcs_config.cpu_based_2nd_exec_ctrl &
@@ -370,10 +369,7 @@ static inline bool cpu_has_vmx_invvpid_global(void)
static inline bool cpu_has_vmx_intel_pt(void)
{
- u64 vmx_msr;
-
- rdmsrl(MSR_IA32_VMX_MISC, vmx_msr);
- return (vmx_msr & MSR_IA32_VMX_MISC_INTEL_PT) &&
+ return (vmcs_config.misc & MSR_IA32_VMX_MISC_INTEL_PT) &&
(vmcs_config.cpu_based_2nd_exec_ctrl & SECONDARY_EXEC_PT_USE_GPA) &&
(vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_RTIT_CTL);
}
diff --git a/arch/x86/kvm/vmx/evmcs.c b/arch/x86/kvm/vmx/evmcs.c
index 6a61b1ae7942..d8b23c96d627 100644
--- a/arch/x86/kvm/vmx/evmcs.c
+++ b/arch/x86/kvm/vmx/evmcs.c
@@ -10,6 +10,8 @@
#include "vmx.h"
#include "trace.h"
+#define CC KVM_NESTED_VMENTER_CONSISTENCY_CHECK
+
DEFINE_STATIC_KEY_FALSE(enable_evmcs);
#define EVMCS1_OFFSET(x) offsetof(struct hv_enlightened_vmcs, x)
@@ -28,6 +30,8 @@ const struct evmcs_field vmcs_field_to_evmcs_1[] = {
HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1),
EVMCS1_FIELD(HOST_IA32_EFER, host_ia32_efer,
HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1),
+ EVMCS1_FIELD(HOST_IA32_PERF_GLOBAL_CTRL, host_ia32_perf_global_ctrl,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1),
EVMCS1_FIELD(HOST_CR0, host_cr0,
HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1),
EVMCS1_FIELD(HOST_CR3, host_cr3,
@@ -78,6 +82,8 @@ const struct evmcs_field vmcs_field_to_evmcs_1[] = {
HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1),
EVMCS1_FIELD(GUEST_IA32_EFER, guest_ia32_efer,
HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1),
+ EVMCS1_FIELD(GUEST_IA32_PERF_GLOBAL_CTRL, guest_ia32_perf_global_ctrl,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1),
EVMCS1_FIELD(GUEST_PDPTR0, guest_pdptr0,
HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1),
EVMCS1_FIELD(GUEST_PDPTR1, guest_pdptr1,
@@ -126,6 +132,28 @@ const struct evmcs_field vmcs_field_to_evmcs_1[] = {
HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1),
EVMCS1_FIELD(XSS_EXIT_BITMAP, xss_exit_bitmap,
HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP2),
+ EVMCS1_FIELD(ENCLS_EXITING_BITMAP, encls_exiting_bitmap,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP2),
+ EVMCS1_FIELD(TSC_MULTIPLIER, tsc_multiplier,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP2),
+ /*
+ * Not used by KVM:
+ *
+ * EVMCS1_FIELD(0x00006828, guest_ia32_s_cet,
+ * HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1),
+ * EVMCS1_FIELD(0x0000682A, guest_ssp,
+ * HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_BASIC),
+ * EVMCS1_FIELD(0x0000682C, guest_ia32_int_ssp_table_addr,
+ * HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1),
+ * EVMCS1_FIELD(0x00002816, guest_ia32_lbr_ctl,
+ * HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1),
+ * EVMCS1_FIELD(0x00006C18, host_ia32_s_cet,
+ * HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1),
+ * EVMCS1_FIELD(0x00006C1A, host_ssp,
+ * HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1),
+ * EVMCS1_FIELD(0x00006C1C, host_ia32_int_ssp_table_addr,
+ * HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1),
+ */
/* 64 bit read only */
EVMCS1_FIELD(GUEST_PHYSICAL_ADDRESS, guest_physical_address,
@@ -294,19 +322,6 @@ const struct evmcs_field vmcs_field_to_evmcs_1[] = {
};
const unsigned int nr_evmcs_1_fields = ARRAY_SIZE(vmcs_field_to_evmcs_1);
-#if IS_ENABLED(CONFIG_HYPERV)
-__init void evmcs_sanitize_exec_ctrls(struct vmcs_config *vmcs_conf)
-{
- vmcs_conf->cpu_based_exec_ctrl &= ~EVMCS1_UNSUPPORTED_EXEC_CTRL;
- vmcs_conf->pin_based_exec_ctrl &= ~EVMCS1_UNSUPPORTED_PINCTRL;
- vmcs_conf->cpu_based_2nd_exec_ctrl &= ~EVMCS1_UNSUPPORTED_2NDEXEC;
- vmcs_conf->cpu_based_3rd_exec_ctrl = 0;
-
- vmcs_conf->vmexit_ctrl &= ~EVMCS1_UNSUPPORTED_VMEXIT_CTRL;
- vmcs_conf->vmentry_ctrl &= ~EVMCS1_UNSUPPORTED_VMENTRY_CTRL;
-}
-#endif
-
bool nested_enlightened_vmentry(struct kvm_vcpu *vcpu, u64 *evmcs_gpa)
{
struct hv_vp_assist_page assist_page;
@@ -334,6 +349,9 @@ uint16_t nested_get_evmcs_version(struct kvm_vcpu *vcpu)
* versions: lower 8 bits is the minimal version, higher 8 bits is the
* maximum supported version. KVM supports versions from 1 to
* KVM_EVMCS_VERSION.
+ *
+ * Note, do not check the Hyper-V is fully enabled in guest CPUID, this
+ * helper is used to _get_ the vCPU's supported CPUID.
*/
if (kvm_cpu_cap_get(X86_FEATURE_VMX) &&
(!vcpu || to_vmx(vcpu)->nested.enlightened_vmcs_enabled))
@@ -342,10 +360,67 @@ uint16_t nested_get_evmcs_version(struct kvm_vcpu *vcpu)
return 0;
}
-void nested_evmcs_filter_control_msr(u32 msr_index, u64 *pdata)
+enum evmcs_revision {
+ EVMCSv1_LEGACY,
+ NR_EVMCS_REVISIONS,
+};
+
+enum evmcs_ctrl_type {
+ EVMCS_EXIT_CTRLS,
+ EVMCS_ENTRY_CTRLS,
+ EVMCS_2NDEXEC,
+ EVMCS_PINCTRL,
+ EVMCS_VMFUNC,
+ NR_EVMCS_CTRLS,
+};
+
+static const u32 evmcs_unsupported_ctrls[NR_EVMCS_CTRLS][NR_EVMCS_REVISIONS] = {
+ [EVMCS_EXIT_CTRLS] = {
+ [EVMCSv1_LEGACY] = EVMCS1_UNSUPPORTED_VMEXIT_CTRL,
+ },
+ [EVMCS_ENTRY_CTRLS] = {
+ [EVMCSv1_LEGACY] = EVMCS1_UNSUPPORTED_VMENTRY_CTRL,
+ },
+ [EVMCS_2NDEXEC] = {
+ [EVMCSv1_LEGACY] = EVMCS1_UNSUPPORTED_2NDEXEC,
+ },
+ [EVMCS_PINCTRL] = {
+ [EVMCSv1_LEGACY] = EVMCS1_UNSUPPORTED_PINCTRL,
+ },
+ [EVMCS_VMFUNC] = {
+ [EVMCSv1_LEGACY] = EVMCS1_UNSUPPORTED_VMFUNC,
+ },
+};
+
+static u32 evmcs_get_unsupported_ctls(enum evmcs_ctrl_type ctrl_type)
+{
+ enum evmcs_revision evmcs_rev = EVMCSv1_LEGACY;
+
+ return evmcs_unsupported_ctrls[ctrl_type][evmcs_rev];
+}
+
+static bool evmcs_has_perf_global_ctrl(struct kvm_vcpu *vcpu)
+{
+ struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu);
+
+ /*
+ * PERF_GLOBAL_CTRL has a quirk where some Windows guests may fail to
+ * boot if a PV CPUID feature flag is not also set. Treat the fields
+ * as unsupported if the flag is not set in guest CPUID. This should
+ * be called only for guest accesses, and all guest accesses should be
+ * gated on Hyper-V being enabled and initialized.
+ */
+ if (WARN_ON_ONCE(!hv_vcpu))
+ return false;
+
+ return hv_vcpu->cpuid_cache.nested_ebx & HV_X64_NESTED_EVMCS1_PERF_GLOBAL_CTRL;
+}
+
+void nested_evmcs_filter_control_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata)
{
u32 ctl_low = (u32)*pdata;
u32 ctl_high = (u32)(*pdata >> 32);
+ u32 unsupported_ctrls;
/*
* Hyper-V 2016 and 2019 try using these features even when eVMCS
@@ -354,77 +429,70 @@ void nested_evmcs_filter_control_msr(u32 msr_index, u64 *pdata)
switch (msr_index) {
case MSR_IA32_VMX_EXIT_CTLS:
case MSR_IA32_VMX_TRUE_EXIT_CTLS:
- ctl_high &= ~EVMCS1_UNSUPPORTED_VMEXIT_CTRL;
+ unsupported_ctrls = evmcs_get_unsupported_ctls(EVMCS_EXIT_CTRLS);
+ if (!evmcs_has_perf_global_ctrl(vcpu))
+ unsupported_ctrls |= VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL;
+ ctl_high &= ~unsupported_ctrls;
break;
case MSR_IA32_VMX_ENTRY_CTLS:
case MSR_IA32_VMX_TRUE_ENTRY_CTLS:
- ctl_high &= ~EVMCS1_UNSUPPORTED_VMENTRY_CTRL;
+ unsupported_ctrls = evmcs_get_unsupported_ctls(EVMCS_ENTRY_CTRLS);
+ if (!evmcs_has_perf_global_ctrl(vcpu))
+ unsupported_ctrls |= VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL;
+ ctl_high &= ~unsupported_ctrls;
break;
case MSR_IA32_VMX_PROCBASED_CTLS2:
- ctl_high &= ~EVMCS1_UNSUPPORTED_2NDEXEC;
+ ctl_high &= ~evmcs_get_unsupported_ctls(EVMCS_2NDEXEC);
break;
case MSR_IA32_VMX_TRUE_PINBASED_CTLS:
case MSR_IA32_VMX_PINBASED_CTLS:
- ctl_high &= ~EVMCS1_UNSUPPORTED_PINCTRL;
+ ctl_high &= ~evmcs_get_unsupported_ctls(EVMCS_PINCTRL);
break;
case MSR_IA32_VMX_VMFUNC:
- ctl_low &= ~EVMCS1_UNSUPPORTED_VMFUNC;
+ ctl_low &= ~evmcs_get_unsupported_ctls(EVMCS_VMFUNC);
break;
}
*pdata = ctl_low | ((u64)ctl_high << 32);
}
+static bool nested_evmcs_is_valid_controls(enum evmcs_ctrl_type ctrl_type,
+ u32 val)
+{
+ return !(val & evmcs_get_unsupported_ctls(ctrl_type));
+}
+
int nested_evmcs_check_controls(struct vmcs12 *vmcs12)
{
- int ret = 0;
- u32 unsupp_ctl;
-
- unsupp_ctl = vmcs12->pin_based_vm_exec_control &
- EVMCS1_UNSUPPORTED_PINCTRL;
- if (unsupp_ctl) {
- trace_kvm_nested_vmenter_failed(
- "eVMCS: unsupported pin-based VM-execution controls",
- unsupp_ctl);
- ret = -EINVAL;
- }
+ if (CC(!nested_evmcs_is_valid_controls(EVMCS_PINCTRL,
+ vmcs12->pin_based_vm_exec_control)))
+ return -EINVAL;
- unsupp_ctl = vmcs12->secondary_vm_exec_control &
- EVMCS1_UNSUPPORTED_2NDEXEC;
- if (unsupp_ctl) {
- trace_kvm_nested_vmenter_failed(
- "eVMCS: unsupported secondary VM-execution controls",
- unsupp_ctl);
- ret = -EINVAL;
- }
+ if (CC(!nested_evmcs_is_valid_controls(EVMCS_2NDEXEC,
+ vmcs12->secondary_vm_exec_control)))
+ return -EINVAL;
- unsupp_ctl = vmcs12->vm_exit_controls &
- EVMCS1_UNSUPPORTED_VMEXIT_CTRL;
- if (unsupp_ctl) {
- trace_kvm_nested_vmenter_failed(
- "eVMCS: unsupported VM-exit controls",
- unsupp_ctl);
- ret = -EINVAL;
- }
+ if (CC(!nested_evmcs_is_valid_controls(EVMCS_EXIT_CTRLS,
+ vmcs12->vm_exit_controls)))
+ return -EINVAL;
- unsupp_ctl = vmcs12->vm_entry_controls &
- EVMCS1_UNSUPPORTED_VMENTRY_CTRL;
- if (unsupp_ctl) {
- trace_kvm_nested_vmenter_failed(
- "eVMCS: unsupported VM-entry controls",
- unsupp_ctl);
- ret = -EINVAL;
- }
+ if (CC(!nested_evmcs_is_valid_controls(EVMCS_ENTRY_CTRLS,
+ vmcs12->vm_entry_controls)))
+ return -EINVAL;
- unsupp_ctl = vmcs12->vm_function_control & EVMCS1_UNSUPPORTED_VMFUNC;
- if (unsupp_ctl) {
- trace_kvm_nested_vmenter_failed(
- "eVMCS: unsupported VM-function controls",
- unsupp_ctl);
- ret = -EINVAL;
- }
+ /*
+ * VM-Func controls are 64-bit, but KVM currently doesn't support any
+ * controls in bits 63:32, i.e. dropping those bits on the consistency
+ * check is intentional.
+ */
+ if (WARN_ON_ONCE(vmcs12->vm_function_control >> 32))
+ return -EINVAL;
- return ret;
+ if (CC(!nested_evmcs_is_valid_controls(EVMCS_VMFUNC,
+ vmcs12->vm_function_control)))
+ return -EINVAL;
+
+ return 0;
}
int nested_enable_evmcs(struct kvm_vcpu *vcpu,
diff --git a/arch/x86/kvm/vmx/evmcs.h b/arch/x86/kvm/vmx/evmcs.h
index f886a8ff0342..6f746ef3c038 100644
--- a/arch/x86/kvm/vmx/evmcs.h
+++ b/arch/x86/kvm/vmx/evmcs.h
@@ -42,8 +42,6 @@ DECLARE_STATIC_KEY_FALSE(enable_evmcs);
* PLE_GAP = 0x00004020,
* PLE_WINDOW = 0x00004022,
* VMX_PREEMPTION_TIMER_VALUE = 0x0000482E,
- * GUEST_IA32_PERF_GLOBAL_CTRL = 0x00002808,
- * HOST_IA32_PERF_GLOBAL_CTRL = 0x00002c04,
*
* Currently unsupported in KVM:
* GUEST_IA32_RTIT_CTL = 0x00002814,
@@ -61,9 +59,8 @@ DECLARE_STATIC_KEY_FALSE(enable_evmcs);
SECONDARY_EXEC_TSC_SCALING | \
SECONDARY_EXEC_PAUSE_LOOP_EXITING)
#define EVMCS1_UNSUPPORTED_VMEXIT_CTRL \
- (VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL | \
- VM_EXIT_SAVE_VMX_PREEMPTION_TIMER)
-#define EVMCS1_UNSUPPORTED_VMENTRY_CTRL (VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL)
+ (VM_EXIT_SAVE_VMX_PREEMPTION_TIMER)
+#define EVMCS1_UNSUPPORTED_VMENTRY_CTRL (0)
#define EVMCS1_UNSUPPORTED_VMFUNC (VMX_VMFUNC_EPTP_SWITCHING)
struct evmcs_field {
@@ -212,7 +209,6 @@ static inline void evmcs_load(u64 phys_addr)
vp_ap->enlighten_vmentry = 1;
}
-__init void evmcs_sanitize_exec_ctrls(struct vmcs_config *vmcs_conf);
#else /* !IS_ENABLED(CONFIG_HYPERV) */
static __always_inline void evmcs_write64(unsigned long field, u64 value) {}
static inline void evmcs_write32(unsigned long field, u32 value) {}
@@ -243,7 +239,7 @@ bool nested_enlightened_vmentry(struct kvm_vcpu *vcpu, u64 *evmcs_gpa);
uint16_t nested_get_evmcs_version(struct kvm_vcpu *vcpu);
int nested_enable_evmcs(struct kvm_vcpu *vcpu,
uint16_t *vmcs_version);
-void nested_evmcs_filter_control_msr(u32 msr_index, u64 *pdata);
+void nested_evmcs_filter_control_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata);
int nested_evmcs_check_controls(struct vmcs12 *vmcs12);
#endif /* __KVM_X86_VMX_EVMCS_H */
diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index ddd4367d4826..8f67a9c4a287 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -439,61 +439,22 @@ static bool nested_vmx_is_page_fault_vmexit(struct vmcs12 *vmcs12,
return inequality ^ bit;
}
-
-/*
- * KVM wants to inject page-faults which it got to the guest. This function
- * checks whether in a nested guest, we need to inject them to L1 or L2.
- */
-static int nested_vmx_check_exception(struct kvm_vcpu *vcpu, unsigned long *exit_qual)
+static bool nested_vmx_is_exception_vmexit(struct kvm_vcpu *vcpu, u8 vector,
+ u32 error_code)
{
struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
- unsigned int nr = vcpu->arch.exception.nr;
- bool has_payload = vcpu->arch.exception.has_payload;
- unsigned long payload = vcpu->arch.exception.payload;
-
- if (nr == PF_VECTOR) {
- if (vcpu->arch.exception.nested_apf) {
- *exit_qual = vcpu->arch.apf.nested_apf_token;
- return 1;
- }
- if (nested_vmx_is_page_fault_vmexit(vmcs12,
- vcpu->arch.exception.error_code)) {
- *exit_qual = has_payload ? payload : vcpu->arch.cr2;
- return 1;
- }
- } else if (vmcs12->exception_bitmap & (1u << nr)) {
- if (nr == DB_VECTOR) {
- if (!has_payload) {
- payload = vcpu->arch.dr6;
- payload &= ~DR6_BT;
- payload ^= DR6_ACTIVE_LOW;
- }
- *exit_qual = payload;
- } else
- *exit_qual = 0;
- return 1;
- }
- return 0;
-}
-
-static bool nested_vmx_handle_page_fault_workaround(struct kvm_vcpu *vcpu,
- struct x86_exception *fault)
-{
- struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
-
- WARN_ON(!is_guest_mode(vcpu));
+ /*
+ * Drop bits 31:16 of the error code when performing the #PF mask+match
+ * check. All VMCS fields involved are 32 bits, but Intel CPUs never
+ * set bits 31:16 and VMX disallows setting bits 31:16 in the injected
+ * error code. Including the to-be-dropped bits in the check might
+ * result in an "impossible" or missed exit from L1's perspective.
+ */
+ if (vector == PF_VECTOR)
+ return nested_vmx_is_page_fault_vmexit(vmcs12, (u16)error_code);
- if (nested_vmx_is_page_fault_vmexit(vmcs12, fault->error_code) &&
- !WARN_ON_ONCE(to_vmx(vcpu)->nested.nested_run_pending)) {
- vmcs12->vm_exit_intr_error_code = fault->error_code;
- nested_vmx_vmexit(vcpu, EXIT_REASON_EXCEPTION_NMI,
- PF_VECTOR | INTR_TYPE_HARD_EXCEPTION |
- INTR_INFO_DELIVER_CODE_MASK | INTR_INFO_VALID_MASK,
- fault->address);
- return true;
- }
- return false;
+ return (vmcs12->exception_bitmap & (1u << vector));
}
static int nested_vmx_check_io_bitmap_controls(struct kvm_vcpu *vcpu,
@@ -1607,6 +1568,10 @@ static void copy_enlightened_to_vmcs12(struct vcpu_vmx *vmx, u32 hv_clean_fields
vmcs12->guest_rflags = evmcs->guest_rflags;
vmcs12->guest_interruptibility_info =
evmcs->guest_interruptibility_info;
+ /*
+ * Not present in struct vmcs12:
+ * vmcs12->guest_ssp = evmcs->guest_ssp;
+ */
}
if (unlikely(!(hv_clean_fields &
@@ -1653,6 +1618,13 @@ static void copy_enlightened_to_vmcs12(struct vcpu_vmx *vmx, u32 hv_clean_fields
vmcs12->host_fs_selector = evmcs->host_fs_selector;
vmcs12->host_gs_selector = evmcs->host_gs_selector;
vmcs12->host_tr_selector = evmcs->host_tr_selector;
+ vmcs12->host_ia32_perf_global_ctrl = evmcs->host_ia32_perf_global_ctrl;
+ /*
+ * Not present in struct vmcs12:
+ * vmcs12->host_ia32_s_cet = evmcs->host_ia32_s_cet;
+ * vmcs12->host_ssp = evmcs->host_ssp;
+ * vmcs12->host_ia32_int_ssp_table_addr = evmcs->host_ia32_int_ssp_table_addr;
+ */
}
if (unlikely(!(hv_clean_fields &
@@ -1720,6 +1692,8 @@ static void copy_enlightened_to_vmcs12(struct vcpu_vmx *vmx, u32 hv_clean_fields
vmcs12->tsc_offset = evmcs->tsc_offset;
vmcs12->virtual_apic_page_addr = evmcs->virtual_apic_page_addr;
vmcs12->xss_exit_bitmap = evmcs->xss_exit_bitmap;
+ vmcs12->encls_exiting_bitmap = evmcs->encls_exiting_bitmap;
+ vmcs12->tsc_multiplier = evmcs->tsc_multiplier;
}
if (unlikely(!(hv_clean_fields &
@@ -1767,6 +1741,13 @@ static void copy_enlightened_to_vmcs12(struct vcpu_vmx *vmx, u32 hv_clean_fields
vmcs12->guest_bndcfgs = evmcs->guest_bndcfgs;
vmcs12->guest_activity_state = evmcs->guest_activity_state;
vmcs12->guest_sysenter_cs = evmcs->guest_sysenter_cs;
+ vmcs12->guest_ia32_perf_global_ctrl = evmcs->guest_ia32_perf_global_ctrl;
+ /*
+ * Not present in struct vmcs12:
+ * vmcs12->guest_ia32_s_cet = evmcs->guest_ia32_s_cet;
+ * vmcs12->guest_ia32_lbr_ctl = evmcs->guest_ia32_lbr_ctl;
+ * vmcs12->guest_ia32_int_ssp_table_addr = evmcs->guest_ia32_int_ssp_table_addr;
+ */
}
/*
@@ -1869,12 +1850,23 @@ static void copy_vmcs12_to_enlightened(struct vcpu_vmx *vmx)
* evmcs->vm_exit_msr_store_count = vmcs12->vm_exit_msr_store_count;
* evmcs->vm_exit_msr_load_count = vmcs12->vm_exit_msr_load_count;
* evmcs->vm_entry_msr_load_count = vmcs12->vm_entry_msr_load_count;
+ * evmcs->guest_ia32_perf_global_ctrl = vmcs12->guest_ia32_perf_global_ctrl;
+ * evmcs->host_ia32_perf_global_ctrl = vmcs12->host_ia32_perf_global_ctrl;
+ * evmcs->encls_exiting_bitmap = vmcs12->encls_exiting_bitmap;
+ * evmcs->tsc_multiplier = vmcs12->tsc_multiplier;
*
* Not present in struct vmcs12:
* evmcs->exit_io_instruction_ecx = vmcs12->exit_io_instruction_ecx;
* evmcs->exit_io_instruction_esi = vmcs12->exit_io_instruction_esi;
* evmcs->exit_io_instruction_edi = vmcs12->exit_io_instruction_edi;
* evmcs->exit_io_instruction_eip = vmcs12->exit_io_instruction_eip;
+ * evmcs->host_ia32_s_cet = vmcs12->host_ia32_s_cet;
+ * evmcs->host_ssp = vmcs12->host_ssp;
+ * evmcs->host_ia32_int_ssp_table_addr = vmcs12->host_ia32_int_ssp_table_addr;
+ * evmcs->guest_ia32_s_cet = vmcs12->guest_ia32_s_cet;
+ * evmcs->guest_ia32_lbr_ctl = vmcs12->guest_ia32_lbr_ctl;
+ * evmcs->guest_ia32_int_ssp_table_addr = vmcs12->guest_ia32_int_ssp_table_addr;
+ * evmcs->guest_ssp = vmcs12->guest_ssp;
*/
evmcs->guest_es_selector = vmcs12->guest_es_selector;
@@ -1982,7 +1974,7 @@ static enum nested_evmptrld_status nested_vmx_handle_enlightened_vmptrld(
bool evmcs_gpa_changed = false;
u64 evmcs_gpa;
- if (likely(!vmx->nested.enlightened_vmcs_enabled))
+ if (likely(!guest_cpuid_has_evmcs(vcpu)))
return EVMPTRLD_DISABLED;
if (!nested_enlightened_vmentry(vcpu, &evmcs_gpa)) {
@@ -2328,9 +2320,14 @@ static void prepare_vmcs02_early(struct vcpu_vmx *vmx, struct loaded_vmcs *vmcs0
* are emulated by vmx_set_efer() in prepare_vmcs02(), but speculate
* on the related bits (if supported by the CPU) in the hope that
* we can avoid VMWrites during vmx_set_efer().
+ *
+ * Similarly, take vmcs01's PERF_GLOBAL_CTRL in the hope that if KVM is
+ * loading PERF_GLOBAL_CTRL via the VMCS for L1, then KVM will want to
+ * do the same for L2.
*/
exec_control = __vm_entry_controls_get(vmcs01);
- exec_control |= vmcs12->vm_entry_controls;
+ exec_control |= (vmcs12->vm_entry_controls &
+ ~VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL);
exec_control &= ~(VM_ENTRY_IA32E_MODE | VM_ENTRY_LOAD_IA32_EFER);
if (cpu_has_load_ia32_efer()) {
if (guest_efer & EFER_LMA)
@@ -2863,7 +2860,7 @@ static int nested_vmx_check_controls(struct kvm_vcpu *vcpu,
nested_check_vm_entry_controls(vcpu, vmcs12))
return -EINVAL;
- if (to_vmx(vcpu)->nested.enlightened_vmcs_enabled)
+ if (guest_cpuid_has_evmcs(vcpu))
return nested_evmcs_check_controls(vmcs12);
return 0;
@@ -3145,7 +3142,7 @@ static bool nested_get_evmcs_page(struct kvm_vcpu *vcpu)
* L2 was running), map it here to make sure vmcs12 changes are
* properly reflected.
*/
- if (vmx->nested.enlightened_vmcs_enabled &&
+ if (guest_cpuid_has_evmcs(vcpu) &&
vmx->nested.hv_evmcs_vmptr == EVMPTR_MAP_PENDING) {
enum nested_evmptrld_status evmptrld_status =
nested_vmx_handle_enlightened_vmptrld(vcpu, false);
@@ -3364,12 +3361,24 @@ enum nvmx_vmentry_status nested_vmx_enter_non_root_mode(struct kvm_vcpu *vcpu,
};
u32 failed_index;
+ trace_kvm_nested_vmenter(kvm_rip_read(vcpu),
+ vmx->nested.current_vmptr,
+ vmcs12->guest_rip,
+ vmcs12->guest_intr_status,
+ vmcs12->vm_entry_intr_info_field,
+ vmcs12->secondary_vm_exec_control & SECONDARY_EXEC_ENABLE_EPT,
+ vmcs12->ept_pointer,
+ vmcs12->guest_cr3,
+ KVM_ISA_VMX);
+
kvm_service_local_tlb_flush_requests(vcpu);
evaluate_pending_interrupts = exec_controls_get(vmx) &
(CPU_BASED_INTR_WINDOW_EXITING | CPU_BASED_NMI_WINDOW_EXITING);
if (likely(!evaluate_pending_interrupts) && kvm_vcpu_apicv_active(vcpu))
evaluate_pending_interrupts |= vmx_has_apicv_interrupt(vcpu);
+ if (!evaluate_pending_interrupts)
+ evaluate_pending_interrupts |= kvm_apic_has_pending_init_or_sipi(vcpu);
if (!vmx->nested.nested_run_pending ||
!(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS))
@@ -3450,18 +3459,10 @@ enum nvmx_vmentry_status nested_vmx_enter_non_root_mode(struct kvm_vcpu *vcpu,
}
/*
- * If L1 had a pending IRQ/NMI until it executed
- * VMLAUNCH/VMRESUME which wasn't delivered because it was
- * disallowed (e.g. interrupts disabled), L0 needs to
- * evaluate if this pending event should cause an exit from L2
- * to L1 or delivered directly to L2 (e.g. In case L1 don't
- * intercept EXTERNAL_INTERRUPT).
- *
- * Usually this would be handled by the processor noticing an
- * IRQ/NMI window request, or checking RVI during evaluation of
- * pending virtual interrupts. However, this setting was done
- * on VMCS01 and now VMCS02 is active instead. Thus, we force L0
- * to perform pending event evaluation by requesting a KVM_REQ_EVENT.
+ * Re-evaluate pending events if L1 had a pending IRQ/NMI/INIT/SIPI
+ * when it executed VMLAUNCH/VMRESUME, as entering non-root mode can
+ * effectively unblock various events, e.g. INIT/SIPI cause VM-Exit
+ * unconditionally.
*/
if (unlikely(evaluate_pending_interrupts))
kvm_make_request(KVM_REQ_EVENT, vcpu);
@@ -3718,7 +3719,7 @@ static void vmcs12_save_pending_event(struct kvm_vcpu *vcpu,
is_double_fault(exit_intr_info))) {
vmcs12->idt_vectoring_info_field = 0;
} else if (vcpu->arch.exception.injected) {
- nr = vcpu->arch.exception.nr;
+ nr = vcpu->arch.exception.vector;
idt_vectoring = nr | VECTORING_INFO_VALID_MASK;
if (kvm_exception_is_soft(nr)) {
@@ -3819,19 +3820,40 @@ mmio_needed:
return -ENXIO;
}
-static void nested_vmx_inject_exception_vmexit(struct kvm_vcpu *vcpu,
- unsigned long exit_qual)
+static void nested_vmx_inject_exception_vmexit(struct kvm_vcpu *vcpu)
{
+ struct kvm_queued_exception *ex = &vcpu->arch.exception_vmexit;
+ u32 intr_info = ex->vector | INTR_INFO_VALID_MASK;
struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
- unsigned int nr = vcpu->arch.exception.nr;
- u32 intr_info = nr | INTR_INFO_VALID_MASK;
+ unsigned long exit_qual;
+
+ if (ex->has_payload) {
+ exit_qual = ex->payload;
+ } else if (ex->vector == PF_VECTOR) {
+ exit_qual = vcpu->arch.cr2;
+ } else if (ex->vector == DB_VECTOR) {
+ exit_qual = vcpu->arch.dr6;
+ exit_qual &= ~DR6_BT;
+ exit_qual ^= DR6_ACTIVE_LOW;
+ } else {
+ exit_qual = 0;
+ }
- if (vcpu->arch.exception.has_error_code) {
- vmcs12->vm_exit_intr_error_code = vcpu->arch.exception.error_code;
+ if (ex->has_error_code) {
+ /*
+ * Intel CPUs do not generate error codes with bits 31:16 set,
+ * and more importantly VMX disallows setting bits 31:16 in the
+ * injected error code for VM-Entry. Drop the bits to mimic
+ * hardware and avoid inducing failure on nested VM-Entry if L1
+ * chooses to inject the exception back to L2. AMD CPUs _do_
+ * generate "full" 32-bit error codes, so KVM allows userspace
+ * to inject exception error codes with bits 31:16 set.
+ */
+ vmcs12->vm_exit_intr_error_code = (u16)ex->error_code;
intr_info |= INTR_INFO_DELIVER_CODE_MASK;
}
- if (kvm_exception_is_soft(nr))
+ if (kvm_exception_is_soft(ex->vector))
intr_info |= INTR_TYPE_SOFT_EXCEPTION;
else
intr_info |= INTR_TYPE_HARD_EXCEPTION;
@@ -3844,16 +3866,39 @@ static void nested_vmx_inject_exception_vmexit(struct kvm_vcpu *vcpu,
}
/*
- * Returns true if a debug trap is pending delivery.
+ * Returns true if a debug trap is (likely) pending delivery. Infer the class
+ * of a #DB (trap-like vs. fault-like) from the exception payload (to-be-DR6).
+ * Using the payload is flawed because code breakpoints (fault-like) and data
+ * breakpoints (trap-like) set the same bits in DR6 (breakpoint detected), i.e.
+ * this will return false positives if a to-be-injected code breakpoint #DB is
+ * pending (from KVM's perspective, but not "pending" across an instruction
+ * boundary). ICEBP, a.k.a. INT1, is also not reflected here even though it
+ * too is trap-like.
*
- * In KVM, debug traps bear an exception payload. As such, the class of a #DB
- * exception may be inferred from the presence of an exception payload.
+ * KVM "works" despite these flaws as ICEBP isn't currently supported by the
+ * emulator, Monitor Trap Flag is not marked pending on intercepted #DBs (the
+ * #DB has already happened), and MTF isn't marked pending on code breakpoints
+ * from the emulator (because such #DBs are fault-like and thus don't trigger
+ * actions that fire on instruction retire).
+ */
+static unsigned long vmx_get_pending_dbg_trap(struct kvm_queued_exception *ex)
+{
+ if (!ex->pending || ex->vector != DB_VECTOR)
+ return 0;
+
+ /* General Detect #DBs are always fault-like. */
+ return ex->payload & ~DR6_BD;
+}
+
+/*
+ * Returns true if there's a pending #DB exception that is lower priority than
+ * a pending Monitor Trap Flag VM-Exit. TSS T-flag #DBs are not emulated by
+ * KVM, but could theoretically be injected by userspace. Note, this code is
+ * imperfect, see above.
*/
-static inline bool vmx_pending_dbg_trap(struct kvm_vcpu *vcpu)
+static bool vmx_is_low_priority_db_trap(struct kvm_queued_exception *ex)
{
- return vcpu->arch.exception.pending &&
- vcpu->arch.exception.nr == DB_VECTOR &&
- vcpu->arch.exception.payload;
+ return vmx_get_pending_dbg_trap(ex) & ~DR6_BT;
}
/*
@@ -3865,9 +3910,11 @@ static inline bool vmx_pending_dbg_trap(struct kvm_vcpu *vcpu)
*/
static void nested_vmx_update_pending_dbg(struct kvm_vcpu *vcpu)
{
- if (vmx_pending_dbg_trap(vcpu))
- vmcs_writel(GUEST_PENDING_DBG_EXCEPTIONS,
- vcpu->arch.exception.payload);
+ unsigned long pending_dbg;
+
+ pending_dbg = vmx_get_pending_dbg_trap(&vcpu->arch.exception);
+ if (pending_dbg)
+ vmcs_writel(GUEST_PENDING_DBG_EXCEPTIONS, pending_dbg);
}
static bool nested_vmx_preemption_timer_pending(struct kvm_vcpu *vcpu)
@@ -3876,21 +3923,113 @@ static bool nested_vmx_preemption_timer_pending(struct kvm_vcpu *vcpu)
to_vmx(vcpu)->nested.preemption_timer_expired;
}
+static bool vmx_has_nested_events(struct kvm_vcpu *vcpu)
+{
+ return nested_vmx_preemption_timer_pending(vcpu) ||
+ to_vmx(vcpu)->nested.mtf_pending;
+}
+
+/*
+ * Per the Intel SDM's table "Priority Among Concurrent Events", with minor
+ * edits to fill in missing examples, e.g. #DB due to split-lock accesses,
+ * and less minor edits to splice in the priority of VMX Non-Root specific
+ * events, e.g. MTF and NMI/INTR-window exiting.
+ *
+ * 1 Hardware Reset and Machine Checks
+ * - RESET
+ * - Machine Check
+ *
+ * 2 Trap on Task Switch
+ * - T flag in TSS is set (on task switch)
+ *
+ * 3 External Hardware Interventions
+ * - FLUSH
+ * - STOPCLK
+ * - SMI
+ * - INIT
+ *
+ * 3.5 Monitor Trap Flag (MTF) VM-exit[1]
+ *
+ * 4 Traps on Previous Instruction
+ * - Breakpoints
+ * - Trap-class Debug Exceptions (#DB due to TF flag set, data/I-O
+ * breakpoint, or #DB due to a split-lock access)
+ *
+ * 4.3 VMX-preemption timer expired VM-exit
+ *
+ * 4.6 NMI-window exiting VM-exit[2]
+ *
+ * 5 Nonmaskable Interrupts (NMI)
+ *
+ * 5.5 Interrupt-window exiting VM-exit and Virtual-interrupt delivery
+ *
+ * 6 Maskable Hardware Interrupts
+ *
+ * 7 Code Breakpoint Fault
+ *
+ * 8 Faults from Fetching Next Instruction
+ * - Code-Segment Limit Violation
+ * - Code Page Fault
+ * - Control protection exception (missing ENDBRANCH at target of indirect
+ * call or jump)
+ *
+ * 9 Faults from Decoding Next Instruction
+ * - Instruction length > 15 bytes
+ * - Invalid Opcode
+ * - Coprocessor Not Available
+ *
+ *10 Faults on Executing Instruction
+ * - Overflow
+ * - Bound error
+ * - Invalid TSS
+ * - Segment Not Present
+ * - Stack fault
+ * - General Protection
+ * - Data Page Fault
+ * - Alignment Check
+ * - x86 FPU Floating-point exception
+ * - SIMD floating-point exception
+ * - Virtualization exception
+ * - Control protection exception
+ *
+ * [1] Per the "Monitor Trap Flag" section: System-management interrupts (SMIs),
+ * INIT signals, and higher priority events take priority over MTF VM exits.
+ * MTF VM exits take priority over debug-trap exceptions and lower priority
+ * events.
+ *
+ * [2] Debug-trap exceptions and higher priority events take priority over VM exits
+ * caused by the VMX-preemption timer. VM exits caused by the VMX-preemption
+ * timer take priority over VM exits caused by the "NMI-window exiting"
+ * VM-execution control and lower priority events.
+ *
+ * [3] Debug-trap exceptions and higher priority events take priority over VM exits
+ * caused by "NMI-window exiting". VM exits caused by this control take
+ * priority over non-maskable interrupts (NMIs) and lower priority events.
+ *
+ * [4] Virtual-interrupt delivery has the same priority as that of VM exits due to
+ * the 1-setting of the "interrupt-window exiting" VM-execution control. Thus,
+ * non-maskable interrupts (NMIs) and higher priority events take priority over
+ * delivery of a virtual interrupt; delivery of a virtual interrupt takes
+ * priority over external interrupts and lower priority events.
+ */
static int vmx_check_nested_events(struct kvm_vcpu *vcpu)
{
- struct vcpu_vmx *vmx = to_vmx(vcpu);
- unsigned long exit_qual;
- bool block_nested_events =
- vmx->nested.nested_run_pending || kvm_event_needs_reinjection(vcpu);
- bool mtf_pending = vmx->nested.mtf_pending;
struct kvm_lapic *apic = vcpu->arch.apic;
-
+ struct vcpu_vmx *vmx = to_vmx(vcpu);
/*
- * Clear the MTF state. If a higher priority VM-exit is delivered first,
- * this state is discarded.
+ * Only a pending nested run blocks a pending exception. If there is a
+ * previously injected event, the pending exception occurred while said
+ * event was being delivered and thus needs to be handled.
*/
- if (!block_nested_events)
- vmx->nested.mtf_pending = false;
+ bool block_nested_exceptions = vmx->nested.nested_run_pending;
+ /*
+ * New events (not exceptions) are only recognized at instruction
+ * boundaries. If an event needs reinjection, then KVM is handling a
+ * VM-Exit that occurred _during_ instruction execution; new events are
+ * blocked until the instruction completes.
+ */
+ bool block_nested_events = block_nested_exceptions ||
+ kvm_event_needs_reinjection(vcpu);
if (lapic_in_kernel(vcpu) &&
test_bit(KVM_APIC_INIT, &apic->pending_events)) {
@@ -3900,6 +4039,9 @@ static int vmx_check_nested_events(struct kvm_vcpu *vcpu)
clear_bit(KVM_APIC_INIT, &apic->pending_events);
if (vcpu->arch.mp_state != KVM_MP_STATE_INIT_RECEIVED)
nested_vmx_vmexit(vcpu, EXIT_REASON_INIT_SIGNAL, 0, 0);
+
+ /* MTF is discarded if the vCPU is in WFS. */
+ vmx->nested.mtf_pending = false;
return 0;
}
@@ -3909,31 +4051,41 @@ static int vmx_check_nested_events(struct kvm_vcpu *vcpu)
return -EBUSY;
clear_bit(KVM_APIC_SIPI, &apic->pending_events);
- if (vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED)
+ if (vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED) {
nested_vmx_vmexit(vcpu, EXIT_REASON_SIPI_SIGNAL, 0,
apic->sipi_vector & 0xFFUL);
- return 0;
+ return 0;
+ }
+ /* Fallthrough, the SIPI is completely ignored. */
}
/*
- * Process any exceptions that are not debug traps before MTF.
+ * Process exceptions that are higher priority than Monitor Trap Flag:
+ * fault-like exceptions, TSS T flag #DB (not emulated by KVM, but
+ * could theoretically come in from userspace), and ICEBP (INT1).
*
- * Note that only a pending nested run can block a pending exception.
- * Otherwise an injected NMI/interrupt should either be
- * lost or delivered to the nested hypervisor in the IDT_VECTORING_INFO,
- * while delivering the pending exception.
+ * TODO: SMIs have higher priority than MTF and trap-like #DBs (except
+ * for TSS T flag #DBs). KVM also doesn't save/restore pending MTF
+ * across SMI/RSM as it should; that needs to be addressed in order to
+ * prioritize SMI over MTF and trap-like #DBs.
*/
-
- if (vcpu->arch.exception.pending && !vmx_pending_dbg_trap(vcpu)) {
- if (vmx->nested.nested_run_pending)
+ if (vcpu->arch.exception_vmexit.pending &&
+ !vmx_is_low_priority_db_trap(&vcpu->arch.exception_vmexit)) {
+ if (block_nested_exceptions)
return -EBUSY;
- if (!nested_vmx_check_exception(vcpu, &exit_qual))
- goto no_vmexit;
- nested_vmx_inject_exception_vmexit(vcpu, exit_qual);
+
+ nested_vmx_inject_exception_vmexit(vcpu);
return 0;
}
- if (mtf_pending) {
+ if (vcpu->arch.exception.pending &&
+ !vmx_is_low_priority_db_trap(&vcpu->arch.exception)) {
+ if (block_nested_exceptions)
+ return -EBUSY;
+ goto no_vmexit;
+ }
+
+ if (vmx->nested.mtf_pending) {
if (block_nested_events)
return -EBUSY;
nested_vmx_update_pending_dbg(vcpu);
@@ -3941,15 +4093,20 @@ static int vmx_check_nested_events(struct kvm_vcpu *vcpu)
return 0;
}
- if (vcpu->arch.exception.pending) {
- if (vmx->nested.nested_run_pending)
+ if (vcpu->arch.exception_vmexit.pending) {
+ if (block_nested_exceptions)
return -EBUSY;
- if (!nested_vmx_check_exception(vcpu, &exit_qual))
- goto no_vmexit;
- nested_vmx_inject_exception_vmexit(vcpu, exit_qual);
+
+ nested_vmx_inject_exception_vmexit(vcpu);
return 0;
}
+ if (vcpu->arch.exception.pending) {
+ if (block_nested_exceptions)
+ return -EBUSY;
+ goto no_vmexit;
+ }
+
if (nested_vmx_preemption_timer_pending(vcpu)) {
if (block_nested_events)
return -EBUSY;
@@ -4255,14 +4412,6 @@ static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
nested_vmx_abort(vcpu,
VMX_ABORT_SAVE_GUEST_MSR_FAIL);
}
-
- /*
- * Drop what we picked up for L2 via vmx_complete_interrupts. It is
- * preserved above and would only end up incorrectly in L1.
- */
- vcpu->arch.nmi_injected = false;
- kvm_clear_exception_queue(vcpu);
- kvm_clear_interrupt_queue(vcpu);
}
/*
@@ -4538,6 +4687,9 @@ void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 vm_exit_reason,
struct vcpu_vmx *vmx = to_vmx(vcpu);
struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
+ /* Pending MTF traps are discarded on VM-Exit. */
+ vmx->nested.mtf_pending = false;
+
/* trying to cancel vmlaunch/vmresume is a bug */
WARN_ON_ONCE(vmx->nested.nested_run_pending);
@@ -4602,6 +4754,17 @@ void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 vm_exit_reason,
WARN_ON_ONCE(nested_early_check);
}
+ /*
+ * Drop events/exceptions that were queued for re-injection to L2
+ * (picked up via vmx_complete_interrupts()), as well as exceptions
+ * that were pending for L2. Note, this must NOT be hoisted above
+ * prepare_vmcs12(), events/exceptions queued for re-injection need to
+ * be captured in vmcs12 (see vmcs12_save_pending_event()).
+ */
+ vcpu->arch.nmi_injected = false;
+ kvm_clear_exception_queue(vcpu);
+ kvm_clear_interrupt_queue(vcpu);
+
vmx_switch_vmcs(vcpu, &vmx->vmcs01);
/* Update any VMCS fields that might have changed while L2 ran */
@@ -5030,8 +5193,8 @@ static int handle_vmxoff(struct kvm_vcpu *vcpu)
free_nested(vcpu);
- /* Process a latched INIT during time CPU was in VMX operation */
- kvm_make_request(KVM_REQ_EVENT, vcpu);
+ if (kvm_apic_has_pending_init_or_sipi(vcpu))
+ kvm_make_request(KVM_REQ_EVENT, vcpu);
return nested_vmx_succeed(vcpu);
}
@@ -5067,7 +5230,7 @@ static int handle_vmclear(struct kvm_vcpu *vcpu)
* state. It is possible that the area will stay mapped as
* vmx->nested.hv_evmcs but this shouldn't be a problem.
*/
- if (likely(!vmx->nested.enlightened_vmcs_enabled ||
+ if (likely(!guest_cpuid_has_evmcs(vcpu) ||
!nested_enlightened_vmentry(vcpu, &evmcs_gpa))) {
if (vmptr == vmx->nested.current_vmptr)
nested_release_vmcs12(vcpu);
@@ -6463,6 +6626,9 @@ static int vmx_set_nested_state(struct kvm_vcpu *vcpu,
if (ret)
goto error_guest_mode;
+ if (vmx->nested.mtf_pending)
+ kvm_make_request(KVM_REQ_EVENT, vcpu);
+
return 0;
error_guest_mode:
@@ -6522,8 +6688,10 @@ static u64 nested_vmx_calc_vmcs_enum_msr(void)
* bit in the high half is on if the corresponding bit in the control field
* may be on. See also vmx_control_verify().
*/
-void nested_vmx_setup_ctls_msrs(struct nested_vmx_msrs *msrs, u32 ept_caps)
+void nested_vmx_setup_ctls_msrs(struct vmcs_config *vmcs_conf, u32 ept_caps)
{
+ struct nested_vmx_msrs *msrs = &vmcs_conf->nested;
+
/*
* Note that as a general rule, the high half of the MSRs (bits in
* the control fields which may be 1) should be initialized by the
@@ -6540,11 +6708,10 @@ void nested_vmx_setup_ctls_msrs(struct nested_vmx_msrs *msrs, u32 ept_caps)
*/
/* pin-based controls */
- rdmsr(MSR_IA32_VMX_PINBASED_CTLS,
- msrs->pinbased_ctls_low,
- msrs->pinbased_ctls_high);
- msrs->pinbased_ctls_low |=
+ msrs->pinbased_ctls_low =
PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR;
+
+ msrs->pinbased_ctls_high = vmcs_conf->pin_based_exec_ctrl;
msrs->pinbased_ctls_high &=
PIN_BASED_EXT_INTR_MASK |
PIN_BASED_NMI_EXITING |
@@ -6555,50 +6722,47 @@ void nested_vmx_setup_ctls_msrs(struct nested_vmx_msrs *msrs, u32 ept_caps)
PIN_BASED_VMX_PREEMPTION_TIMER;
/* exit controls */
- rdmsr(MSR_IA32_VMX_EXIT_CTLS,
- msrs->exit_ctls_low,
- msrs->exit_ctls_high);
msrs->exit_ctls_low =
VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR;
+ msrs->exit_ctls_high = vmcs_conf->vmexit_ctrl;
msrs->exit_ctls_high &=
#ifdef CONFIG_X86_64
VM_EXIT_HOST_ADDR_SPACE_SIZE |
#endif
VM_EXIT_LOAD_IA32_PAT | VM_EXIT_SAVE_IA32_PAT |
- VM_EXIT_CLEAR_BNDCFGS | VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL;
+ VM_EXIT_CLEAR_BNDCFGS;
msrs->exit_ctls_high |=
VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR |
VM_EXIT_LOAD_IA32_EFER | VM_EXIT_SAVE_IA32_EFER |
- VM_EXIT_SAVE_VMX_PREEMPTION_TIMER | VM_EXIT_ACK_INTR_ON_EXIT;
+ VM_EXIT_SAVE_VMX_PREEMPTION_TIMER | VM_EXIT_ACK_INTR_ON_EXIT |
+ VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL;
/* We support free control of debug control saving. */
msrs->exit_ctls_low &= ~VM_EXIT_SAVE_DEBUG_CONTROLS;
/* entry controls */
- rdmsr(MSR_IA32_VMX_ENTRY_CTLS,
- msrs->entry_ctls_low,
- msrs->entry_ctls_high);
msrs->entry_ctls_low =
VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR;
+
+ msrs->entry_ctls_high = vmcs_conf->vmentry_ctrl;
msrs->entry_ctls_high &=
#ifdef CONFIG_X86_64
VM_ENTRY_IA32E_MODE |
#endif
- VM_ENTRY_LOAD_IA32_PAT | VM_ENTRY_LOAD_BNDCFGS |
- VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL;
+ VM_ENTRY_LOAD_IA32_PAT | VM_ENTRY_LOAD_BNDCFGS;
msrs->entry_ctls_high |=
- (VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR | VM_ENTRY_LOAD_IA32_EFER);
+ (VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR | VM_ENTRY_LOAD_IA32_EFER |
+ VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL);
/* We support free control of debug control loading. */
msrs->entry_ctls_low &= ~VM_ENTRY_LOAD_DEBUG_CONTROLS;
/* cpu-based controls */
- rdmsr(MSR_IA32_VMX_PROCBASED_CTLS,
- msrs->procbased_ctls_low,
- msrs->procbased_ctls_high);
msrs->procbased_ctls_low =
CPU_BASED_ALWAYSON_WITHOUT_TRUE_MSR;
+
+ msrs->procbased_ctls_high = vmcs_conf->cpu_based_exec_ctrl;
msrs->procbased_ctls_high &=
CPU_BASED_INTR_WINDOW_EXITING |
CPU_BASED_NMI_WINDOW_EXITING | CPU_BASED_USE_TSC_OFFSETTING |
@@ -6632,12 +6796,9 @@ void nested_vmx_setup_ctls_msrs(struct nested_vmx_msrs *msrs, u32 ept_caps)
* depend on CPUID bits, they are added later by
* vmx_vcpu_after_set_cpuid.
*/
- if (msrs->procbased_ctls_high & CPU_BASED_ACTIVATE_SECONDARY_CONTROLS)
- rdmsr(MSR_IA32_VMX_PROCBASED_CTLS2,
- msrs->secondary_ctls_low,
- msrs->secondary_ctls_high);
-
msrs->secondary_ctls_low = 0;
+
+ msrs->secondary_ctls_high = vmcs_conf->cpu_based_2nd_exec_ctrl;
msrs->secondary_ctls_high &=
SECONDARY_EXEC_DESC |
SECONDARY_EXEC_ENABLE_RDTSCP |
@@ -6717,10 +6878,7 @@ void nested_vmx_setup_ctls_msrs(struct nested_vmx_msrs *msrs, u32 ept_caps)
msrs->secondary_ctls_high |= SECONDARY_EXEC_ENCLS_EXITING;
/* miscellaneous data */
- rdmsr(MSR_IA32_VMX_MISC,
- msrs->misc_low,
- msrs->misc_high);
- msrs->misc_low &= VMX_MISC_SAVE_EFER_LMA;
+ msrs->misc_low = (u32)vmcs_conf->misc & VMX_MISC_SAVE_EFER_LMA;
msrs->misc_low |=
MSR_IA32_VMX_MISC_VMWRITE_SHADOW_RO_FIELDS |
VMX_MISC_EMULATED_PREEMPTION_TIMER_RATE |
@@ -6814,9 +6972,9 @@ __init int nested_vmx_hardware_setup(int (*exit_handlers[])(struct kvm_vcpu *))
struct kvm_x86_nested_ops vmx_nested_ops = {
.leave_nested = vmx_leave_nested,
+ .is_exception_vmexit = nested_vmx_is_exception_vmexit,
.check_events = vmx_check_nested_events,
- .handle_page_fault_workaround = nested_vmx_handle_page_fault_workaround,
- .hv_timer_pending = nested_vmx_preemption_timer_pending,
+ .has_events = vmx_has_nested_events,
.triple_fault = nested_vmx_triple_fault,
.get_state = vmx_get_nested_state,
.set_state = vmx_set_nested_state,
diff --git a/arch/x86/kvm/vmx/nested.h b/arch/x86/kvm/vmx/nested.h
index 88b00a7359e4..6312c9541c3c 100644
--- a/arch/x86/kvm/vmx/nested.h
+++ b/arch/x86/kvm/vmx/nested.h
@@ -17,7 +17,7 @@ enum nvmx_vmentry_status {
};
void vmx_leave_nested(struct kvm_vcpu *vcpu);
-void nested_vmx_setup_ctls_msrs(struct nested_vmx_msrs *msrs, u32 ept_caps);
+void nested_vmx_setup_ctls_msrs(struct vmcs_config *vmcs_conf, u32 ept_caps);
void nested_vmx_hardware_unsetup(void);
__init int nested_vmx_hardware_setup(int (*exit_handlers[])(struct kvm_vcpu *));
void nested_vmx_set_vmcs_shadowing_bitmap(void);
diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c
index c399637a3a79..25b70a85bef5 100644
--- a/arch/x86/kvm/vmx/pmu_intel.c
+++ b/arch/x86/kvm/vmx/pmu_intel.c
@@ -68,15 +68,11 @@ static struct kvm_pmc *intel_pmc_idx_to_pmc(struct kvm_pmu *pmu, int pmc_idx)
}
}
-/* function is called when global control register has been updated. */
-static void global_ctrl_changed(struct kvm_pmu *pmu, u64 data)
+static void reprogram_counters(struct kvm_pmu *pmu, u64 diff)
{
int bit;
- u64 diff = pmu->global_ctrl ^ data;
struct kvm_pmc *pmc;
- pmu->global_ctrl = data;
-
for_each_set_bit(bit, (unsigned long *)&diff, X86_PMC_IDX_MAX) {
pmc = intel_pmc_idx_to_pmc(pmu, bit);
if (pmc)
@@ -397,7 +393,7 @@ static int intel_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
struct kvm_pmc *pmc;
u32 msr = msr_info->index;
u64 data = msr_info->data;
- u64 reserved_bits;
+ u64 reserved_bits, diff;
switch (msr) {
case MSR_CORE_PERF_FIXED_CTR_CTRL:
@@ -418,7 +414,9 @@ static int intel_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
if (pmu->global_ctrl == data)
return 0;
if (kvm_valid_perf_global_ctrl(pmu, data)) {
- global_ctrl_changed(pmu, data);
+ diff = pmu->global_ctrl ^ data;
+ pmu->global_ctrl = data;
+ reprogram_counters(pmu, diff);
return 0;
}
break;
@@ -433,7 +431,9 @@ static int intel_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
if (pmu->pebs_enable == data)
return 0;
if (!(data & pmu->pebs_enable_mask)) {
+ diff = pmu->pebs_enable ^ data;
pmu->pebs_enable = data;
+ reprogram_counters(pmu, diff);
return 0;
}
break;
@@ -776,20 +776,23 @@ static void intel_pmu_cleanup(struct kvm_vcpu *vcpu)
void intel_pmu_cross_mapped_check(struct kvm_pmu *pmu)
{
struct kvm_pmc *pmc = NULL;
- int bit;
+ int bit, hw_idx;
for_each_set_bit(bit, (unsigned long *)&pmu->global_ctrl,
X86_PMC_IDX_MAX) {
pmc = intel_pmc_idx_to_pmc(pmu, bit);
if (!pmc || !pmc_speculative_in_use(pmc) ||
- !intel_pmc_is_enabled(pmc))
+ !intel_pmc_is_enabled(pmc) || !pmc->perf_event)
continue;
- if (pmc->perf_event && pmc->idx != pmc->perf_event->hw.idx) {
- pmu->host_cross_mapped_mask |=
- BIT_ULL(pmc->perf_event->hw.idx);
- }
+ /*
+ * A negative index indicates the event isn't mapped to a
+ * physical counter in the host, e.g. due to contention.
+ */
+ hw_idx = pmc->perf_event->hw.idx;
+ if (hw_idx != pmc->idx && hw_idx > -1)
+ pmu->host_cross_mapped_mask |= BIT_ULL(hw_idx);
}
}
diff --git a/arch/x86/kvm/vmx/sgx.c b/arch/x86/kvm/vmx/sgx.c
index aba8cebdc587..8f95c7c01433 100644
--- a/arch/x86/kvm/vmx/sgx.c
+++ b/arch/x86/kvm/vmx/sgx.c
@@ -129,7 +129,7 @@ static int sgx_inject_fault(struct kvm_vcpu *vcpu, gva_t gva, int trapnr)
ex.address = gva;
ex.error_code_valid = true;
ex.nested_page_fault = false;
- kvm_inject_page_fault(vcpu, &ex);
+ kvm_inject_emulated_page_fault(vcpu, &ex);
} else {
kvm_inject_gp(vcpu, 0);
}
diff --git a/arch/x86/kvm/vmx/vmenter.S b/arch/x86/kvm/vmx/vmenter.S
index 6de96b943804..8477d8bdd69c 100644
--- a/arch/x86/kvm/vmx/vmenter.S
+++ b/arch/x86/kvm/vmx/vmenter.S
@@ -189,13 +189,16 @@ SYM_INNER_LABEL(vmx_vmexit, SYM_L_GLOBAL)
xor %ebx, %ebx
.Lclear_regs:
+ /* Discard @regs. The register is irrelevant, it just can't be RBX. */
+ pop %_ASM_AX
+
/*
* Clear all general purpose registers except RSP and RBX to prevent
* speculative use of the guest's values, even those that are reloaded
* via the stack. In theory, an L1 cache miss when restoring registers
* could lead to speculative execution with the guest's values.
* Zeroing XORs are dirt cheap, i.e. the extra paranoia is essentially
- * free. RSP and RAX are exempt as RSP is restored by hardware during
+ * free. RSP and RBX are exempt as RSP is restored by hardware during
* VM-Exit and RBX is explicitly loaded with 0 or 1 to hold the return
* value.
*/
@@ -216,9 +219,6 @@ SYM_INNER_LABEL(vmx_vmexit, SYM_L_GLOBAL)
xor %r15d, %r15d
#endif
- /* "POP" @regs. */
- add $WORD_SIZE, %_ASM_SP
-
/*
* IMPORTANT: RSB filling and SPEC_CTRL handling must be done before
* the first unbalanced RET after vmexit!
@@ -234,7 +234,6 @@ SYM_INNER_LABEL(vmx_vmexit, SYM_L_GLOBAL)
FILL_RETURN_BUFFER %_ASM_CX, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_VMEXIT,\
X86_FEATURE_RSB_VMEXIT_LITE
-
pop %_ASM_ARG2 /* @flags */
pop %_ASM_ARG1 /* @vmx */
@@ -293,22 +292,13 @@ SYM_FUNC_START(vmread_error_trampoline)
push %r10
push %r11
#endif
-#ifdef CONFIG_X86_64
+
/* Load @field and @fault to arg1 and arg2 respectively. */
- mov 3*WORD_SIZE(%rbp), %_ASM_ARG2
- mov 2*WORD_SIZE(%rbp), %_ASM_ARG1
-#else
- /* Parameters are passed on the stack for 32-bit (see asmlinkage). */
- push 3*WORD_SIZE(%ebp)
- push 2*WORD_SIZE(%ebp)
-#endif
+ mov 3*WORD_SIZE(%_ASM_BP), %_ASM_ARG2
+ mov 2*WORD_SIZE(%_ASM_BP), %_ASM_ARG1
call vmread_error
-#ifndef CONFIG_X86_64
- add $8, %esp
-#endif
-
/* Zero out @fault, which will be popped into the result register. */
_ASM_MOV $0, 3*WORD_SIZE(%_ASM_BP)
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index c9b49a09e6b5..9dba04b6b019 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -439,7 +439,7 @@ do { \
pr_warn_ratelimited(fmt); \
} while (0)
-asmlinkage void vmread_error(unsigned long field, bool fault)
+void vmread_error(unsigned long field, bool fault)
{
if (fault)
kvm_spurious_fault();
@@ -864,7 +864,7 @@ unsigned int __vmx_vcpu_run_flags(struct vcpu_vmx *vmx)
return flags;
}
-static void clear_atomic_switch_msr_special(struct vcpu_vmx *vmx,
+static __always_inline void clear_atomic_switch_msr_special(struct vcpu_vmx *vmx,
unsigned long entry, unsigned long exit)
{
vm_entry_controls_clearbit(vmx, entry);
@@ -922,7 +922,7 @@ skip_guest:
vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, m->host.nr);
}
-static void add_atomic_switch_msr_special(struct vcpu_vmx *vmx,
+static __always_inline void add_atomic_switch_msr_special(struct vcpu_vmx *vmx,
unsigned long entry, unsigned long exit,
unsigned long guest_val_vmcs, unsigned long host_val_vmcs,
u64 guest_val, u64 host_val)
@@ -1652,17 +1652,25 @@ static void vmx_update_emulated_instruction(struct kvm_vcpu *vcpu)
/*
* Per the SDM, MTF takes priority over debug-trap exceptions besides
- * T-bit traps. As instruction emulation is completed (i.e. at the
- * instruction boundary), any #DB exception pending delivery must be a
- * debug-trap. Record the pending MTF state to be delivered in
+ * TSS T-bit traps and ICEBP (INT1). KVM doesn't emulate T-bit traps
+ * or ICEBP (in the emulator proper), and skipping of ICEBP after an
+ * intercepted #DB deliberately avoids single-step #DB and MTF updates
+ * as ICEBP is higher priority than both. As instruction emulation is
+ * completed at this point (i.e. KVM is at the instruction boundary),
+ * any #DB exception pending delivery must be a debug-trap of lower
+ * priority than MTF. Record the pending MTF state to be delivered in
* vmx_check_nested_events().
*/
if (nested_cpu_has_mtf(vmcs12) &&
(!vcpu->arch.exception.pending ||
- vcpu->arch.exception.nr == DB_VECTOR))
+ vcpu->arch.exception.vector == DB_VECTOR) &&
+ (!vcpu->arch.exception_vmexit.pending ||
+ vcpu->arch.exception_vmexit.vector == DB_VECTOR)) {
vmx->nested.mtf_pending = true;
- else
+ kvm_make_request(KVM_REQ_EVENT, vcpu);
+ } else {
vmx->nested.mtf_pending = false;
+ }
}
static int vmx_skip_emulated_instruction(struct kvm_vcpu *vcpu)
@@ -1684,32 +1692,40 @@ static void vmx_clear_hlt(struct kvm_vcpu *vcpu)
vmcs_write32(GUEST_ACTIVITY_STATE, GUEST_ACTIVITY_ACTIVE);
}
-static void vmx_queue_exception(struct kvm_vcpu *vcpu)
+static void vmx_inject_exception(struct kvm_vcpu *vcpu)
{
+ struct kvm_queued_exception *ex = &vcpu->arch.exception;
+ u32 intr_info = ex->vector | INTR_INFO_VALID_MASK;
struct vcpu_vmx *vmx = to_vmx(vcpu);
- unsigned nr = vcpu->arch.exception.nr;
- bool has_error_code = vcpu->arch.exception.has_error_code;
- u32 error_code = vcpu->arch.exception.error_code;
- u32 intr_info = nr | INTR_INFO_VALID_MASK;
- kvm_deliver_exception_payload(vcpu);
+ kvm_deliver_exception_payload(vcpu, ex);
- if (has_error_code) {
- vmcs_write32(VM_ENTRY_EXCEPTION_ERROR_CODE, error_code);
+ if (ex->has_error_code) {
+ /*
+ * Despite the error code being architecturally defined as 32
+ * bits, and the VMCS field being 32 bits, Intel CPUs and thus
+ * VMX don't actually supporting setting bits 31:16. Hardware
+ * will (should) never provide a bogus error code, but AMD CPUs
+ * do generate error codes with bits 31:16 set, and so KVM's
+ * ABI lets userspace shove in arbitrary 32-bit values. Drop
+ * the upper bits to avoid VM-Fail, losing information that
+ * does't really exist is preferable to killing the VM.
+ */
+ vmcs_write32(VM_ENTRY_EXCEPTION_ERROR_CODE, (u16)ex->error_code);
intr_info |= INTR_INFO_DELIVER_CODE_MASK;
}
if (vmx->rmode.vm86_active) {
int inc_eip = 0;
- if (kvm_exception_is_soft(nr))
+ if (kvm_exception_is_soft(ex->vector))
inc_eip = vcpu->arch.event_exit_inst_len;
- kvm_inject_realmode_interrupt(vcpu, nr, inc_eip);
+ kvm_inject_realmode_interrupt(vcpu, ex->vector, inc_eip);
return;
}
WARN_ON_ONCE(vmx->emulation_required);
- if (kvm_exception_is_soft(nr)) {
+ if (kvm_exception_is_soft(ex->vector)) {
vmcs_write32(VM_ENTRY_INSTRUCTION_LEN,
vmx->vcpu.arch.event_exit_inst_len);
intr_info |= INTR_TYPE_SOFT_EXCEPTION;
@@ -1930,9 +1946,8 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
* sanity checking and refuse to boot. Filter all unsupported
* features out.
*/
- if (!msr_info->host_initiated &&
- vmx->nested.enlightened_vmcs_enabled)
- nested_evmcs_filter_control_msr(msr_info->index,
+ if (!msr_info->host_initiated && guest_cpuid_has_evmcs(vcpu))
+ nested_evmcs_filter_control_msr(vcpu, msr_info->index,
&msr_info->data);
break;
case MSR_IA32_RTIT_CTL:
@@ -2494,6 +2509,30 @@ static bool cpu_has_sgx(void)
return cpuid_eax(0) >= 0x12 && (cpuid_eax(0x12) & BIT(0));
}
+/*
+ * Some cpus support VM_{ENTRY,EXIT}_IA32_PERF_GLOBAL_CTRL but they
+ * can't be used due to errata where VM Exit may incorrectly clear
+ * IA32_PERF_GLOBAL_CTRL[34:32]. Work around the errata by using the
+ * MSR load mechanism to switch IA32_PERF_GLOBAL_CTRL.
+ */
+static bool cpu_has_perf_global_ctrl_bug(void)
+{
+ if (boot_cpu_data.x86 == 0x6) {
+ switch (boot_cpu_data.x86_model) {
+ case INTEL_FAM6_NEHALEM_EP: /* AAK155 */
+ case INTEL_FAM6_NEHALEM: /* AAP115 */
+ case INTEL_FAM6_WESTMERE: /* AAT100 */
+ case INTEL_FAM6_WESTMERE_EP: /* BC86,AAY89,BD102 */
+ case INTEL_FAM6_NEHALEM_EX: /* BA97 */
+ return true;
+ default:
+ break;
+ }
+ }
+
+ return false;
+}
+
static __init int adjust_vmx_controls(u32 ctl_min, u32 ctl_opt,
u32 msr, u32 *result)
{
@@ -2526,13 +2565,13 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf,
struct vmx_capability *vmx_cap)
{
u32 vmx_msr_low, vmx_msr_high;
- u32 min, opt, min2, opt2;
u32 _pin_based_exec_control = 0;
u32 _cpu_based_exec_control = 0;
u32 _cpu_based_2nd_exec_control = 0;
u64 _cpu_based_3rd_exec_control = 0;
u32 _vmexit_control = 0;
u32 _vmentry_control = 0;
+ u64 misc_msr;
int i;
/*
@@ -2552,64 +2591,17 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf,
};
memset(vmcs_conf, 0, sizeof(*vmcs_conf));
- min = CPU_BASED_HLT_EXITING |
-#ifdef CONFIG_X86_64
- CPU_BASED_CR8_LOAD_EXITING |
- CPU_BASED_CR8_STORE_EXITING |
-#endif
- CPU_BASED_CR3_LOAD_EXITING |
- CPU_BASED_CR3_STORE_EXITING |
- CPU_BASED_UNCOND_IO_EXITING |
- CPU_BASED_MOV_DR_EXITING |
- CPU_BASED_USE_TSC_OFFSETTING |
- CPU_BASED_MWAIT_EXITING |
- CPU_BASED_MONITOR_EXITING |
- CPU_BASED_INVLPG_EXITING |
- CPU_BASED_RDPMC_EXITING;
-
- opt = CPU_BASED_TPR_SHADOW |
- CPU_BASED_USE_MSR_BITMAPS |
- CPU_BASED_ACTIVATE_SECONDARY_CONTROLS |
- CPU_BASED_ACTIVATE_TERTIARY_CONTROLS;
- if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_PROCBASED_CTLS,
- &_cpu_based_exec_control) < 0)
+
+ if (adjust_vmx_controls(KVM_REQUIRED_VMX_CPU_BASED_VM_EXEC_CONTROL,
+ KVM_OPTIONAL_VMX_CPU_BASED_VM_EXEC_CONTROL,
+ MSR_IA32_VMX_PROCBASED_CTLS,
+ &_cpu_based_exec_control))
return -EIO;
-#ifdef CONFIG_X86_64
- if (_cpu_based_exec_control & CPU_BASED_TPR_SHADOW)
- _cpu_based_exec_control &= ~CPU_BASED_CR8_LOAD_EXITING &
- ~CPU_BASED_CR8_STORE_EXITING;
-#endif
if (_cpu_based_exec_control & CPU_BASED_ACTIVATE_SECONDARY_CONTROLS) {
- min2 = 0;
- opt2 = SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |
- SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE |
- SECONDARY_EXEC_WBINVD_EXITING |
- SECONDARY_EXEC_ENABLE_VPID |
- SECONDARY_EXEC_ENABLE_EPT |
- SECONDARY_EXEC_UNRESTRICTED_GUEST |
- SECONDARY_EXEC_PAUSE_LOOP_EXITING |
- SECONDARY_EXEC_DESC |
- SECONDARY_EXEC_ENABLE_RDTSCP |
- SECONDARY_EXEC_ENABLE_INVPCID |
- SECONDARY_EXEC_APIC_REGISTER_VIRT |
- SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY |
- SECONDARY_EXEC_SHADOW_VMCS |
- SECONDARY_EXEC_XSAVES |
- SECONDARY_EXEC_RDSEED_EXITING |
- SECONDARY_EXEC_RDRAND_EXITING |
- SECONDARY_EXEC_ENABLE_PML |
- SECONDARY_EXEC_TSC_SCALING |
- SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE |
- SECONDARY_EXEC_PT_USE_GPA |
- SECONDARY_EXEC_PT_CONCEAL_VMX |
- SECONDARY_EXEC_ENABLE_VMFUNC |
- SECONDARY_EXEC_BUS_LOCK_DETECTION |
- SECONDARY_EXEC_NOTIFY_VM_EXITING;
- if (cpu_has_sgx())
- opt2 |= SECONDARY_EXEC_ENCLS_EXITING;
- if (adjust_vmx_controls(min2, opt2,
+ if (adjust_vmx_controls(KVM_REQUIRED_VMX_SECONDARY_VM_EXEC_CONTROL,
+ KVM_OPTIONAL_VMX_SECONDARY_VM_EXEC_CONTROL,
MSR_IA32_VMX_PROCBASED_CTLS2,
- &_cpu_based_2nd_exec_control) < 0)
+ &_cpu_based_2nd_exec_control))
return -EIO;
}
#ifndef CONFIG_X86_64
@@ -2627,13 +2619,8 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf,
rdmsr_safe(MSR_IA32_VMX_EPT_VPID_CAP,
&vmx_cap->ept, &vmx_cap->vpid);
- if (_cpu_based_2nd_exec_control & SECONDARY_EXEC_ENABLE_EPT) {
- /* CR3 accesses and invlpg don't need to cause VM Exits when EPT
- enabled */
- _cpu_based_exec_control &= ~(CPU_BASED_CR3_LOAD_EXITING |
- CPU_BASED_CR3_STORE_EXITING |
- CPU_BASED_INVLPG_EXITING);
- } else if (vmx_cap->ept) {
+ if (!(_cpu_based_2nd_exec_control & SECONDARY_EXEC_ENABLE_EPT) &&
+ vmx_cap->ept) {
pr_warn_once("EPT CAP should not exist if not support "
"1-setting enable EPT VM-execution control\n");
@@ -2653,32 +2640,24 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf,
vmx_cap->vpid = 0;
}
- if (_cpu_based_exec_control & CPU_BASED_ACTIVATE_TERTIARY_CONTROLS) {
- u64 opt3 = TERTIARY_EXEC_IPI_VIRT;
+ if (!cpu_has_sgx())
+ _cpu_based_2nd_exec_control &= ~SECONDARY_EXEC_ENCLS_EXITING;
- _cpu_based_3rd_exec_control = adjust_vmx_controls64(opt3,
+ if (_cpu_based_exec_control & CPU_BASED_ACTIVATE_TERTIARY_CONTROLS)
+ _cpu_based_3rd_exec_control =
+ adjust_vmx_controls64(KVM_OPTIONAL_VMX_TERTIARY_VM_EXEC_CONTROL,
MSR_IA32_VMX_PROCBASED_CTLS3);
- }
- min = VM_EXIT_SAVE_DEBUG_CONTROLS | VM_EXIT_ACK_INTR_ON_EXIT;
-#ifdef CONFIG_X86_64
- min |= VM_EXIT_HOST_ADDR_SPACE_SIZE;
-#endif
- opt = VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL |
- VM_EXIT_LOAD_IA32_PAT |
- VM_EXIT_LOAD_IA32_EFER |
- VM_EXIT_CLEAR_BNDCFGS |
- VM_EXIT_PT_CONCEAL_PIP |
- VM_EXIT_CLEAR_IA32_RTIT_CTL;
- if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_EXIT_CTLS,
- &_vmexit_control) < 0)
+ if (adjust_vmx_controls(KVM_REQUIRED_VMX_VM_EXIT_CONTROLS,
+ KVM_OPTIONAL_VMX_VM_EXIT_CONTROLS,
+ MSR_IA32_VMX_EXIT_CTLS,
+ &_vmexit_control))
return -EIO;
- min = PIN_BASED_EXT_INTR_MASK | PIN_BASED_NMI_EXITING;
- opt = PIN_BASED_VIRTUAL_NMIS | PIN_BASED_POSTED_INTR |
- PIN_BASED_VMX_PREEMPTION_TIMER;
- if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_PINBASED_CTLS,
- &_pin_based_exec_control) < 0)
+ if (adjust_vmx_controls(KVM_REQUIRED_VMX_PIN_BASED_VM_EXEC_CONTROL,
+ KVM_OPTIONAL_VMX_PIN_BASED_VM_EXEC_CONTROL,
+ MSR_IA32_VMX_PINBASED_CTLS,
+ &_pin_based_exec_control))
return -EIO;
if (cpu_has_broken_vmx_preemption_timer())
@@ -2687,15 +2666,10 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf,
SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY))
_pin_based_exec_control &= ~PIN_BASED_POSTED_INTR;
- min = VM_ENTRY_LOAD_DEBUG_CONTROLS;
- opt = VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL |
- VM_ENTRY_LOAD_IA32_PAT |
- VM_ENTRY_LOAD_IA32_EFER |
- VM_ENTRY_LOAD_BNDCFGS |
- VM_ENTRY_PT_CONCEAL_PIP |
- VM_ENTRY_LOAD_IA32_RTIT_CTL;
- if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_ENTRY_CTLS,
- &_vmentry_control) < 0)
+ if (adjust_vmx_controls(KVM_REQUIRED_VMX_VM_ENTRY_CONTROLS,
+ KVM_OPTIONAL_VMX_VM_ENTRY_CONTROLS,
+ MSR_IA32_VMX_ENTRY_CTLS,
+ &_vmentry_control))
return -EIO;
for (i = 0; i < ARRAY_SIZE(vmcs_entry_exit_pairs); i++) {
@@ -2715,30 +2689,6 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf,
_vmexit_control &= ~x_ctrl;
}
- /*
- * Some cpus support VM_{ENTRY,EXIT}_IA32_PERF_GLOBAL_CTRL but they
- * can't be used due to an errata where VM Exit may incorrectly clear
- * IA32_PERF_GLOBAL_CTRL[34:32]. Workaround the errata by using the
- * MSR load mechanism to switch IA32_PERF_GLOBAL_CTRL.
- */
- if (boot_cpu_data.x86 == 0x6) {
- switch (boot_cpu_data.x86_model) {
- case 26: /* AAK155 */
- case 30: /* AAP115 */
- case 37: /* AAT100 */
- case 44: /* BC86,AAY89,BD102 */
- case 46: /* BA97 */
- _vmentry_control &= ~VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL;
- _vmexit_control &= ~VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL;
- pr_warn_once("kvm: VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL "
- "does not work properly. Using workaround\n");
- break;
- default:
- break;
- }
- }
-
-
rdmsr(MSR_IA32_VMX_BASIC, vmx_msr_low, vmx_msr_high);
/* IA-32 SDM Vol 3B: VMCS size is never greater than 4kB. */
@@ -2755,6 +2705,8 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf,
if (((vmx_msr_high >> 18) & 15) != 6)
return -EIO;
+ rdmsrl(MSR_IA32_VMX_MISC, misc_msr);
+
vmcs_conf->size = vmx_msr_high & 0x1fff;
vmcs_conf->basic_cap = vmx_msr_high & ~0x1fff;
@@ -2766,11 +2718,7 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf,
vmcs_conf->cpu_based_3rd_exec_ctrl = _cpu_based_3rd_exec_control;
vmcs_conf->vmexit_ctrl = _vmexit_control;
vmcs_conf->vmentry_ctrl = _vmentry_control;
-
-#if IS_ENABLED(CONFIG_HYPERV)
- if (enlightened_vmcs)
- evmcs_sanitize_exec_ctrls(vmcs_conf);
-#endif
+ vmcs_conf->misc = misc_msr;
return 0;
}
@@ -3037,10 +2985,15 @@ int vmx_set_efer(struct kvm_vcpu *vcpu, u64 efer)
return 0;
vcpu->arch.efer = efer;
+#ifdef CONFIG_X86_64
if (efer & EFER_LMA)
vm_entry_controls_setbit(vmx, VM_ENTRY_IA32E_MODE);
else
vm_entry_controls_clearbit(vmx, VM_ENTRY_IA32E_MODE);
+#else
+ if (KVM_BUG_ON(efer & EFER_LMA, vcpu->kvm))
+ return 1;
+#endif
vmx_setup_uret_msrs(vmx);
return 0;
@@ -4327,18 +4280,37 @@ static u32 vmx_vmentry_ctrl(void)
if (vmx_pt_mode_is_system())
vmentry_ctrl &= ~(VM_ENTRY_PT_CONCEAL_PIP |
VM_ENTRY_LOAD_IA32_RTIT_CTL);
- /* Loading of EFER and PERF_GLOBAL_CTRL are toggled dynamically */
- return vmentry_ctrl &
- ~(VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL | VM_ENTRY_LOAD_IA32_EFER);
+ /*
+ * IA32e mode, and loading of EFER and PERF_GLOBAL_CTRL are toggled dynamically.
+ */
+ vmentry_ctrl &= ~(VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL |
+ VM_ENTRY_LOAD_IA32_EFER |
+ VM_ENTRY_IA32E_MODE);
+
+ if (cpu_has_perf_global_ctrl_bug())
+ vmentry_ctrl &= ~VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL;
+
+ return vmentry_ctrl;
}
static u32 vmx_vmexit_ctrl(void)
{
u32 vmexit_ctrl = vmcs_config.vmexit_ctrl;
+ /*
+ * Not used by KVM and never set in vmcs01 or vmcs02, but emulated for
+ * nested virtualization and thus allowed to be set in vmcs12.
+ */
+ vmexit_ctrl &= ~(VM_EXIT_SAVE_IA32_PAT | VM_EXIT_SAVE_IA32_EFER |
+ VM_EXIT_SAVE_VMX_PREEMPTION_TIMER);
+
if (vmx_pt_mode_is_system())
vmexit_ctrl &= ~(VM_EXIT_PT_CONCEAL_PIP |
VM_EXIT_CLEAR_IA32_RTIT_CTL);
+
+ if (cpu_has_perf_global_ctrl_bug())
+ vmexit_ctrl &= ~VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL;
+
/* Loading of EFER and PERF_GLOBAL_CTRL are toggled dynamically */
return vmexit_ctrl &
~(VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL | VM_EXIT_LOAD_IA32_EFER);
@@ -4376,20 +4348,38 @@ static u32 vmx_exec_control(struct vcpu_vmx *vmx)
{
u32 exec_control = vmcs_config.cpu_based_exec_ctrl;
+ /*
+ * Not used by KVM, but fully supported for nesting, i.e. are allowed in
+ * vmcs12 and propagated to vmcs02 when set in vmcs12.
+ */
+ exec_control &= ~(CPU_BASED_RDTSC_EXITING |
+ CPU_BASED_USE_IO_BITMAPS |
+ CPU_BASED_MONITOR_TRAP_FLAG |
+ CPU_BASED_PAUSE_EXITING);
+
+ /* INTR_WINDOW_EXITING and NMI_WINDOW_EXITING are toggled dynamically */
+ exec_control &= ~(CPU_BASED_INTR_WINDOW_EXITING |
+ CPU_BASED_NMI_WINDOW_EXITING);
+
if (vmx->vcpu.arch.switch_db_regs & KVM_DEBUGREG_WONT_EXIT)
exec_control &= ~CPU_BASED_MOV_DR_EXITING;
- if (!cpu_need_tpr_shadow(&vmx->vcpu)) {
+ if (!cpu_need_tpr_shadow(&vmx->vcpu))
exec_control &= ~CPU_BASED_TPR_SHADOW;
+
#ifdef CONFIG_X86_64
+ if (exec_control & CPU_BASED_TPR_SHADOW)
+ exec_control &= ~(CPU_BASED_CR8_LOAD_EXITING |
+ CPU_BASED_CR8_STORE_EXITING);
+ else
exec_control |= CPU_BASED_CR8_STORE_EXITING |
CPU_BASED_CR8_LOAD_EXITING;
#endif
- }
- if (!enable_ept)
- exec_control |= CPU_BASED_CR3_STORE_EXITING |
- CPU_BASED_CR3_LOAD_EXITING |
- CPU_BASED_INVLPG_EXITING;
+ /* No need to intercept CR3 access or INVPLG when using EPT. */
+ if (enable_ept)
+ exec_control &= ~(CPU_BASED_CR3_LOAD_EXITING |
+ CPU_BASED_CR3_STORE_EXITING |
+ CPU_BASED_INVLPG_EXITING);
if (kvm_mwait_in_guest(vmx->vcpu.kvm))
exec_control &= ~(CPU_BASED_MWAIT_EXITING |
CPU_BASED_MONITOR_EXITING);
@@ -5155,8 +5145,10 @@ static int handle_exception_nmi(struct kvm_vcpu *vcpu)
* instruction. ICEBP generates a trap-like #DB, but
* despite its interception control being tied to #DB,
* is an instruction intercept, i.e. the VM-Exit occurs
- * on the ICEBP itself. Note, skipping ICEBP also
- * clears STI and MOVSS blocking.
+ * on the ICEBP itself. Use the inner "skip" helper to
+ * avoid single-step #DB and MTF updates, as ICEBP is
+ * higher priority. Note, skipping ICEBP still clears
+ * STI and MOVSS blocking.
*
* For all other #DBs, set vmcs.PENDING_DBG_EXCEPTIONS.BS
* if single-step is enabled in RFLAGS and STI or MOVSS
@@ -5638,7 +5630,7 @@ static int handle_ept_violation(struct kvm_vcpu *vcpu)
vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO, GUEST_INTR_STATE_NMI);
gpa = vmcs_read64(GUEST_PHYSICAL_ADDRESS);
- trace_kvm_page_fault(gpa, exit_qualification);
+ trace_kvm_page_fault(vcpu, gpa, exit_qualification);
/* Is it a read fault? */
error_code = (exit_qualification & EPT_VIOLATION_ACC_READ)
@@ -5710,7 +5702,7 @@ static bool vmx_emulation_required_with_pending_exception(struct kvm_vcpu *vcpu)
struct vcpu_vmx *vmx = to_vmx(vcpu);
return vmx->emulation_required && !vmx->rmode.vm86_active &&
- (vcpu->arch.exception.pending || vcpu->arch.exception.injected);
+ (kvm_is_exception_pending(vcpu) || vcpu->arch.exception.injected);
}
static int handle_invalid_guest_state(struct kvm_vcpu *vcpu)
@@ -7430,7 +7422,7 @@ static int __init vmx_check_processor_compat(void)
if (setup_vmcs_config(&vmcs_conf, &vmx_cap) < 0)
return -EIO;
if (nested)
- nested_vmx_setup_ctls_msrs(&vmcs_conf.nested, vmx_cap.ept);
+ nested_vmx_setup_ctls_msrs(&vmcs_conf, vmx_cap.ept);
if (memcmp(&vmcs_config, &vmcs_conf, sizeof(struct vmcs_config)) != 0) {
printk(KERN_ERR "kvm: CPU %d feature inconsistency!\n",
smp_processor_id());
@@ -8070,7 +8062,7 @@ static struct kvm_x86_ops vmx_x86_ops __initdata = {
.patch_hypercall = vmx_patch_hypercall,
.inject_irq = vmx_inject_irq,
.inject_nmi = vmx_inject_nmi,
- .queue_exception = vmx_queue_exception,
+ .inject_exception = vmx_inject_exception,
.cancel_injection = vmx_cancel_injection,
.interrupt_allowed = vmx_interrupt_allowed,
.nmi_allowed = vmx_nmi_allowed,
@@ -8227,6 +8219,10 @@ static __init int hardware_setup(void)
if (setup_vmcs_config(&vmcs_config, &vmx_capability) < 0)
return -EIO;
+ if (cpu_has_perf_global_ctrl_bug())
+ pr_warn_once("kvm: VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL "
+ "does not work properly. Using workaround\n");
+
if (boot_cpu_has(X86_FEATURE_NX))
kvm_enable_efer_bits(EFER_NX);
@@ -8341,11 +8337,9 @@ static __init int hardware_setup(void)
if (enable_preemption_timer) {
u64 use_timer_freq = 5000ULL * 1000 * 1000;
- u64 vmx_msr;
- rdmsrl(MSR_IA32_VMX_MISC, vmx_msr);
cpu_preemption_timer_multi =
- vmx_msr & VMX_MISC_PREEMPTION_TIMER_RATE_MASK;
+ vmcs_config.misc & VMX_MISC_PREEMPTION_TIMER_RATE_MASK;
if (tsc_khz)
use_timer_freq = (u64)tsc_khz * 1000;
@@ -8381,8 +8375,7 @@ static __init int hardware_setup(void)
setup_default_sgx_lepubkeyhash();
if (nested) {
- nested_vmx_setup_ctls_msrs(&vmcs_config.nested,
- vmx_capability.ept);
+ nested_vmx_setup_ctls_msrs(&vmcs_config, vmx_capability.ept);
r = nested_vmx_hardware_setup(kvm_vmx_exit_handlers);
if (r)
diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h
index 24d58c2ffaa3..a3da84f4ea45 100644
--- a/arch/x86/kvm/vmx/vmx.h
+++ b/arch/x86/kvm/vmx/vmx.h
@@ -477,29 +477,145 @@ static inline u8 vmx_get_rvi(void)
return vmcs_read16(GUEST_INTR_STATUS) & 0xff;
}
-#define BUILD_CONTROLS_SHADOW(lname, uname, bits) \
-static inline void lname##_controls_set(struct vcpu_vmx *vmx, u##bits val) \
-{ \
- if (vmx->loaded_vmcs->controls_shadow.lname != val) { \
- vmcs_write##bits(uname, val); \
- vmx->loaded_vmcs->controls_shadow.lname = val; \
- } \
-} \
-static inline u##bits __##lname##_controls_get(struct loaded_vmcs *vmcs) \
-{ \
- return vmcs->controls_shadow.lname; \
-} \
-static inline u##bits lname##_controls_get(struct vcpu_vmx *vmx) \
-{ \
- return __##lname##_controls_get(vmx->loaded_vmcs); \
-} \
-static inline void lname##_controls_setbit(struct vcpu_vmx *vmx, u##bits val) \
-{ \
- lname##_controls_set(vmx, lname##_controls_get(vmx) | val); \
-} \
-static inline void lname##_controls_clearbit(struct vcpu_vmx *vmx, u##bits val) \
-{ \
- lname##_controls_set(vmx, lname##_controls_get(vmx) & ~val); \
+#define __KVM_REQUIRED_VMX_VM_ENTRY_CONTROLS \
+ (VM_ENTRY_LOAD_DEBUG_CONTROLS)
+#ifdef CONFIG_X86_64
+ #define KVM_REQUIRED_VMX_VM_ENTRY_CONTROLS \
+ (__KVM_REQUIRED_VMX_VM_ENTRY_CONTROLS | \
+ VM_ENTRY_IA32E_MODE)
+#else
+ #define KVM_REQUIRED_VMX_VM_ENTRY_CONTROLS \
+ __KVM_REQUIRED_VMX_VM_ENTRY_CONTROLS
+#endif
+#define KVM_OPTIONAL_VMX_VM_ENTRY_CONTROLS \
+ (VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL | \
+ VM_ENTRY_LOAD_IA32_PAT | \
+ VM_ENTRY_LOAD_IA32_EFER | \
+ VM_ENTRY_LOAD_BNDCFGS | \
+ VM_ENTRY_PT_CONCEAL_PIP | \
+ VM_ENTRY_LOAD_IA32_RTIT_CTL)
+
+#define __KVM_REQUIRED_VMX_VM_EXIT_CONTROLS \
+ (VM_EXIT_SAVE_DEBUG_CONTROLS | \
+ VM_EXIT_ACK_INTR_ON_EXIT)
+#ifdef CONFIG_X86_64
+ #define KVM_REQUIRED_VMX_VM_EXIT_CONTROLS \
+ (__KVM_REQUIRED_VMX_VM_EXIT_CONTROLS | \
+ VM_EXIT_HOST_ADDR_SPACE_SIZE)
+#else
+ #define KVM_REQUIRED_VMX_VM_EXIT_CONTROLS \
+ __KVM_REQUIRED_VMX_VM_EXIT_CONTROLS
+#endif
+#define KVM_OPTIONAL_VMX_VM_EXIT_CONTROLS \
+ (VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL | \
+ VM_EXIT_SAVE_IA32_PAT | \
+ VM_EXIT_LOAD_IA32_PAT | \
+ VM_EXIT_SAVE_IA32_EFER | \
+ VM_EXIT_SAVE_VMX_PREEMPTION_TIMER | \
+ VM_EXIT_LOAD_IA32_EFER | \
+ VM_EXIT_CLEAR_BNDCFGS | \
+ VM_EXIT_PT_CONCEAL_PIP | \
+ VM_EXIT_CLEAR_IA32_RTIT_CTL)
+
+#define KVM_REQUIRED_VMX_PIN_BASED_VM_EXEC_CONTROL \
+ (PIN_BASED_EXT_INTR_MASK | \
+ PIN_BASED_NMI_EXITING)
+#define KVM_OPTIONAL_VMX_PIN_BASED_VM_EXEC_CONTROL \
+ (PIN_BASED_VIRTUAL_NMIS | \
+ PIN_BASED_POSTED_INTR | \
+ PIN_BASED_VMX_PREEMPTION_TIMER)
+
+#define __KVM_REQUIRED_VMX_CPU_BASED_VM_EXEC_CONTROL \
+ (CPU_BASED_HLT_EXITING | \
+ CPU_BASED_CR3_LOAD_EXITING | \
+ CPU_BASED_CR3_STORE_EXITING | \
+ CPU_BASED_UNCOND_IO_EXITING | \
+ CPU_BASED_MOV_DR_EXITING | \
+ CPU_BASED_USE_TSC_OFFSETTING | \
+ CPU_BASED_MWAIT_EXITING | \
+ CPU_BASED_MONITOR_EXITING | \
+ CPU_BASED_INVLPG_EXITING | \
+ CPU_BASED_RDPMC_EXITING | \
+ CPU_BASED_INTR_WINDOW_EXITING)
+
+#ifdef CONFIG_X86_64
+ #define KVM_REQUIRED_VMX_CPU_BASED_VM_EXEC_CONTROL \
+ (__KVM_REQUIRED_VMX_CPU_BASED_VM_EXEC_CONTROL | \
+ CPU_BASED_CR8_LOAD_EXITING | \
+ CPU_BASED_CR8_STORE_EXITING)
+#else
+ #define KVM_REQUIRED_VMX_CPU_BASED_VM_EXEC_CONTROL \
+ __KVM_REQUIRED_VMX_CPU_BASED_VM_EXEC_CONTROL
+#endif
+
+#define KVM_OPTIONAL_VMX_CPU_BASED_VM_EXEC_CONTROL \
+ (CPU_BASED_RDTSC_EXITING | \
+ CPU_BASED_TPR_SHADOW | \
+ CPU_BASED_USE_IO_BITMAPS | \
+ CPU_BASED_MONITOR_TRAP_FLAG | \
+ CPU_BASED_USE_MSR_BITMAPS | \
+ CPU_BASED_NMI_WINDOW_EXITING | \
+ CPU_BASED_PAUSE_EXITING | \
+ CPU_BASED_ACTIVATE_SECONDARY_CONTROLS | \
+ CPU_BASED_ACTIVATE_TERTIARY_CONTROLS)
+
+#define KVM_REQUIRED_VMX_SECONDARY_VM_EXEC_CONTROL 0
+#define KVM_OPTIONAL_VMX_SECONDARY_VM_EXEC_CONTROL \
+ (SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | \
+ SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE | \
+ SECONDARY_EXEC_WBINVD_EXITING | \
+ SECONDARY_EXEC_ENABLE_VPID | \
+ SECONDARY_EXEC_ENABLE_EPT | \
+ SECONDARY_EXEC_UNRESTRICTED_GUEST | \
+ SECONDARY_EXEC_PAUSE_LOOP_EXITING | \
+ SECONDARY_EXEC_DESC | \
+ SECONDARY_EXEC_ENABLE_RDTSCP | \
+ SECONDARY_EXEC_ENABLE_INVPCID | \
+ SECONDARY_EXEC_APIC_REGISTER_VIRT | \
+ SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY | \
+ SECONDARY_EXEC_SHADOW_VMCS | \
+ SECONDARY_EXEC_XSAVES | \
+ SECONDARY_EXEC_RDSEED_EXITING | \
+ SECONDARY_EXEC_RDRAND_EXITING | \
+ SECONDARY_EXEC_ENABLE_PML | \
+ SECONDARY_EXEC_TSC_SCALING | \
+ SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE | \
+ SECONDARY_EXEC_PT_USE_GPA | \
+ SECONDARY_EXEC_PT_CONCEAL_VMX | \
+ SECONDARY_EXEC_ENABLE_VMFUNC | \
+ SECONDARY_EXEC_BUS_LOCK_DETECTION | \
+ SECONDARY_EXEC_NOTIFY_VM_EXITING | \
+ SECONDARY_EXEC_ENCLS_EXITING)
+
+#define KVM_REQUIRED_VMX_TERTIARY_VM_EXEC_CONTROL 0
+#define KVM_OPTIONAL_VMX_TERTIARY_VM_EXEC_CONTROL \
+ (TERTIARY_EXEC_IPI_VIRT)
+
+#define BUILD_CONTROLS_SHADOW(lname, uname, bits) \
+static inline void lname##_controls_set(struct vcpu_vmx *vmx, u##bits val) \
+{ \
+ if (vmx->loaded_vmcs->controls_shadow.lname != val) { \
+ vmcs_write##bits(uname, val); \
+ vmx->loaded_vmcs->controls_shadow.lname = val; \
+ } \
+} \
+static inline u##bits __##lname##_controls_get(struct loaded_vmcs *vmcs) \
+{ \
+ return vmcs->controls_shadow.lname; \
+} \
+static inline u##bits lname##_controls_get(struct vcpu_vmx *vmx) \
+{ \
+ return __##lname##_controls_get(vmx->loaded_vmcs); \
+} \
+static __always_inline void lname##_controls_setbit(struct vcpu_vmx *vmx, u##bits val) \
+{ \
+ BUILD_BUG_ON(!(val & (KVM_REQUIRED_VMX_##uname | KVM_OPTIONAL_VMX_##uname))); \
+ lname##_controls_set(vmx, lname##_controls_get(vmx) | val); \
+} \
+static __always_inline void lname##_controls_clearbit(struct vcpu_vmx *vmx, u##bits val) \
+{ \
+ BUILD_BUG_ON(!(val & (KVM_REQUIRED_VMX_##uname | KVM_OPTIONAL_VMX_##uname))); \
+ lname##_controls_set(vmx, lname##_controls_get(vmx) & ~val); \
}
BUILD_CONTROLS_SHADOW(vm_entry, VM_ENTRY_CONTROLS, 32)
BUILD_CONTROLS_SHADOW(vm_exit, VM_EXIT_CONTROLS, 32)
@@ -626,4 +742,14 @@ static inline bool vmx_can_use_ipiv(struct kvm_vcpu *vcpu)
return lapic_in_kernel(vcpu) && enable_ipiv;
}
+static inline bool guest_cpuid_has_evmcs(struct kvm_vcpu *vcpu)
+{
+ /*
+ * eVMCS is exposed to the guest if Hyper-V is enabled in CPUID and
+ * eVMCS has been explicitly enabled by userspace.
+ */
+ return vcpu->arch.hyperv_enabled &&
+ to_vmx(vcpu)->nested.enlightened_vmcs_enabled;
+}
+
#endif /* __KVM_X86_VMX_H */
diff --git a/arch/x86/kvm/vmx/vmx_ops.h b/arch/x86/kvm/vmx/vmx_ops.h
index 5cfc49ddb1b4..ec268df83ed6 100644
--- a/arch/x86/kvm/vmx/vmx_ops.h
+++ b/arch/x86/kvm/vmx/vmx_ops.h
@@ -10,7 +10,7 @@
#include "vmcs.h"
#include "../x86.h"
-asmlinkage void vmread_error(unsigned long field, bool fault);
+void vmread_error(unsigned long field, bool fault);
__attribute__((regparm(0))) void vmread_error_trampoline(unsigned long field,
bool fault);
void vmwrite_error(unsigned long field, unsigned long value);
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index b0c47b41c264..4bd5f8a751de 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -173,8 +173,13 @@ bool __read_mostly enable_vmware_backdoor = false;
module_param(enable_vmware_backdoor, bool, S_IRUGO);
EXPORT_SYMBOL_GPL(enable_vmware_backdoor);
-static bool __read_mostly force_emulation_prefix = false;
-module_param(force_emulation_prefix, bool, S_IRUGO);
+/*
+ * Flags to manipulate forced emulation behavior (any non-zero value will
+ * enable forced emulation).
+ */
+#define KVM_FEP_CLEAR_RFLAGS_RF BIT(1)
+static int __read_mostly force_emulation_prefix;
+module_param(force_emulation_prefix, int, 0644);
int __read_mostly pi_inject_timer = -1;
module_param(pi_inject_timer, bint, S_IRUGO | S_IWUSR);
@@ -528,6 +533,7 @@ static int exception_class(int vector)
#define EXCPT_TRAP 1
#define EXCPT_ABORT 2
#define EXCPT_INTERRUPT 3
+#define EXCPT_DB 4
static int exception_type(int vector)
{
@@ -538,8 +544,14 @@ static int exception_type(int vector)
mask = 1 << vector;
- /* #DB is trap, as instruction watchpoints are handled elsewhere */
- if (mask & ((1 << DB_VECTOR) | (1 << BP_VECTOR) | (1 << OF_VECTOR)))
+ /*
+ * #DBs can be trap-like or fault-like, the caller must check other CPU
+ * state, e.g. DR6, to determine whether a #DB is a trap or fault.
+ */
+ if (mask & (1 << DB_VECTOR))
+ return EXCPT_DB;
+
+ if (mask & ((1 << BP_VECTOR) | (1 << OF_VECTOR)))
return EXCPT_TRAP;
if (mask & ((1 << DF_VECTOR) | (1 << MC_VECTOR)))
@@ -549,16 +561,13 @@ static int exception_type(int vector)
return EXCPT_FAULT;
}
-void kvm_deliver_exception_payload(struct kvm_vcpu *vcpu)
+void kvm_deliver_exception_payload(struct kvm_vcpu *vcpu,
+ struct kvm_queued_exception *ex)
{
- unsigned nr = vcpu->arch.exception.nr;
- bool has_payload = vcpu->arch.exception.has_payload;
- unsigned long payload = vcpu->arch.exception.payload;
-
- if (!has_payload)
+ if (!ex->has_payload)
return;
- switch (nr) {
+ switch (ex->vector) {
case DB_VECTOR:
/*
* "Certain debug exceptions may clear bit 0-3. The
@@ -583,8 +592,8 @@ void kvm_deliver_exception_payload(struct kvm_vcpu *vcpu)
* So they need to be flipped for DR6.
*/
vcpu->arch.dr6 |= DR6_ACTIVE_LOW;
- vcpu->arch.dr6 |= payload;
- vcpu->arch.dr6 ^= payload & DR6_ACTIVE_LOW;
+ vcpu->arch.dr6 |= ex->payload;
+ vcpu->arch.dr6 ^= ex->payload & DR6_ACTIVE_LOW;
/*
* The #DB payload is defined as compatible with the 'pending
@@ -595,15 +604,30 @@ void kvm_deliver_exception_payload(struct kvm_vcpu *vcpu)
vcpu->arch.dr6 &= ~BIT(12);
break;
case PF_VECTOR:
- vcpu->arch.cr2 = payload;
+ vcpu->arch.cr2 = ex->payload;
break;
}
- vcpu->arch.exception.has_payload = false;
- vcpu->arch.exception.payload = 0;
+ ex->has_payload = false;
+ ex->payload = 0;
}
EXPORT_SYMBOL_GPL(kvm_deliver_exception_payload);
+static void kvm_queue_exception_vmexit(struct kvm_vcpu *vcpu, unsigned int vector,
+ bool has_error_code, u32 error_code,
+ bool has_payload, unsigned long payload)
+{
+ struct kvm_queued_exception *ex = &vcpu->arch.exception_vmexit;
+
+ ex->vector = vector;
+ ex->injected = false;
+ ex->pending = true;
+ ex->has_error_code = has_error_code;
+ ex->error_code = error_code;
+ ex->has_payload = has_payload;
+ ex->payload = payload;
+}
+
static void kvm_multiple_exception(struct kvm_vcpu *vcpu,
unsigned nr, bool has_error, u32 error_code,
bool has_payload, unsigned long payload, bool reinject)
@@ -613,18 +637,31 @@ static void kvm_multiple_exception(struct kvm_vcpu *vcpu,
kvm_make_request(KVM_REQ_EVENT, vcpu);
+ /*
+ * If the exception is destined for L2 and isn't being reinjected,
+ * morph it to a VM-Exit if L1 wants to intercept the exception. A
+ * previously injected exception is not checked because it was checked
+ * when it was original queued, and re-checking is incorrect if _L1_
+ * injected the exception, in which case it's exempt from interception.
+ */
+ if (!reinject && is_guest_mode(vcpu) &&
+ kvm_x86_ops.nested_ops->is_exception_vmexit(vcpu, nr, error_code)) {
+ kvm_queue_exception_vmexit(vcpu, nr, has_error, error_code,
+ has_payload, payload);
+ return;
+ }
+
if (!vcpu->arch.exception.pending && !vcpu->arch.exception.injected) {
queue:
if (reinject) {
/*
- * On vmentry, vcpu->arch.exception.pending is only
- * true if an event injection was blocked by
- * nested_run_pending. In that case, however,
- * vcpu_enter_guest requests an immediate exit,
- * and the guest shouldn't proceed far enough to
- * need reinjection.
+ * On VM-Entry, an exception can be pending if and only
+ * if event injection was blocked by nested_run_pending.
+ * In that case, however, vcpu_enter_guest() requests an
+ * immediate exit, and the guest shouldn't proceed far
+ * enough to need reinjection.
*/
- WARN_ON_ONCE(vcpu->arch.exception.pending);
+ WARN_ON_ONCE(kvm_is_exception_pending(vcpu));
vcpu->arch.exception.injected = true;
if (WARN_ON_ONCE(has_payload)) {
/*
@@ -639,17 +676,18 @@ static void kvm_multiple_exception(struct kvm_vcpu *vcpu,
vcpu->arch.exception.injected = false;
}
vcpu->arch.exception.has_error_code = has_error;
- vcpu->arch.exception.nr = nr;
+ vcpu->arch.exception.vector = nr;
vcpu->arch.exception.error_code = error_code;
vcpu->arch.exception.has_payload = has_payload;
vcpu->arch.exception.payload = payload;
if (!is_guest_mode(vcpu))
- kvm_deliver_exception_payload(vcpu);
+ kvm_deliver_exception_payload(vcpu,
+ &vcpu->arch.exception);
return;
}
/* to check exception */
- prev_nr = vcpu->arch.exception.nr;
+ prev_nr = vcpu->arch.exception.vector;
if (prev_nr == DF_VECTOR) {
/* triple fault -> shutdown */
kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
@@ -657,25 +695,22 @@ static void kvm_multiple_exception(struct kvm_vcpu *vcpu,
}
class1 = exception_class(prev_nr);
class2 = exception_class(nr);
- if ((class1 == EXCPT_CONTRIBUTORY && class2 == EXCPT_CONTRIBUTORY)
- || (class1 == EXCPT_PF && class2 != EXCPT_BENIGN)) {
+ if ((class1 == EXCPT_CONTRIBUTORY && class2 == EXCPT_CONTRIBUTORY) ||
+ (class1 == EXCPT_PF && class2 != EXCPT_BENIGN)) {
/*
- * Generate double fault per SDM Table 5-5. Set
- * exception.pending = true so that the double fault
- * can trigger a nested vmexit.
+ * Synthesize #DF. Clear the previously injected or pending
+ * exception so as not to incorrectly trigger shutdown.
*/
- vcpu->arch.exception.pending = true;
vcpu->arch.exception.injected = false;
- vcpu->arch.exception.has_error_code = true;
- vcpu->arch.exception.nr = DF_VECTOR;
- vcpu->arch.exception.error_code = 0;
- vcpu->arch.exception.has_payload = false;
- vcpu->arch.exception.payload = 0;
- } else
+ vcpu->arch.exception.pending = false;
+
+ kvm_queue_exception_e(vcpu, DF_VECTOR, 0);
+ } else {
/* replace previous exception with a new one in a hope
that instruction re-execution will regenerate lost
exception */
goto queue;
+ }
}
void kvm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr)
@@ -729,20 +764,22 @@ static int complete_emulated_insn_gp(struct kvm_vcpu *vcpu, int err)
void kvm_inject_page_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault)
{
++vcpu->stat.pf_guest;
- vcpu->arch.exception.nested_apf =
- is_guest_mode(vcpu) && fault->async_page_fault;
- if (vcpu->arch.exception.nested_apf) {
- vcpu->arch.apf.nested_apf_token = fault->address;
- kvm_queue_exception_e(vcpu, PF_VECTOR, fault->error_code);
- } else {
+
+ /*
+ * Async #PF in L2 is always forwarded to L1 as a VM-Exit regardless of
+ * whether or not L1 wants to intercept "regular" #PF.
+ */
+ if (is_guest_mode(vcpu) && fault->async_page_fault)
+ kvm_queue_exception_vmexit(vcpu, PF_VECTOR,
+ true, fault->error_code,
+ true, fault->address);
+ else
kvm_queue_exception_e_p(vcpu, PF_VECTOR, fault->error_code,
fault->address);
- }
}
EXPORT_SYMBOL_GPL(kvm_inject_page_fault);
-/* Returns true if the page fault was immediately morphed into a VM-Exit. */
-bool kvm_inject_emulated_page_fault(struct kvm_vcpu *vcpu,
+void kvm_inject_emulated_page_fault(struct kvm_vcpu *vcpu,
struct x86_exception *fault)
{
struct kvm_mmu *fault_mmu;
@@ -760,26 +797,7 @@ bool kvm_inject_emulated_page_fault(struct kvm_vcpu *vcpu,
kvm_mmu_invalidate_gva(vcpu, fault_mmu, fault->address,
fault_mmu->root.hpa);
- /*
- * A workaround for KVM's bad exception handling. If KVM injected an
- * exception into L2, and L2 encountered a #PF while vectoring the
- * injected exception, manually check to see if L1 wants to intercept
- * #PF, otherwise queuing the #PF will lead to #DF or a lost exception.
- * In all other cases, defer the check to nested_ops->check_events(),
- * which will correctly handle priority (this does not). Note, other
- * exceptions, e.g. #GP, are theoretically affected, #PF is simply the
- * most problematic, e.g. when L0 and L1 are both intercepting #PF for
- * shadow paging.
- *
- * TODO: Rewrite exception handling to track injected and pending
- * (VM-Exit) exceptions separately.
- */
- if (unlikely(vcpu->arch.exception.injected && is_guest_mode(vcpu)) &&
- kvm_x86_ops.nested_ops->handle_page_fault_workaround(vcpu, fault))
- return true;
-
fault_mmu->inject_page_fault(vcpu, fault);
- return false;
}
EXPORT_SYMBOL_GPL(kvm_inject_emulated_page_fault);
@@ -4841,7 +4859,7 @@ static int kvm_vcpu_ready_for_interrupt_injection(struct kvm_vcpu *vcpu)
return (kvm_arch_interrupt_allowed(vcpu) &&
kvm_cpu_accept_dm_intr(vcpu) &&
!kvm_event_needs_reinjection(vcpu) &&
- !vcpu->arch.exception.pending);
+ !kvm_is_exception_pending(vcpu));
}
static int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu,
@@ -5016,25 +5034,38 @@ static int kvm_vcpu_ioctl_x86_set_mce(struct kvm_vcpu *vcpu,
static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu,
struct kvm_vcpu_events *events)
{
+ struct kvm_queued_exception *ex;
+
process_nmi(vcpu);
if (kvm_check_request(KVM_REQ_SMI, vcpu))
process_smi(vcpu);
/*
- * In guest mode, payload delivery should be deferred,
- * so that the L1 hypervisor can intercept #PF before
- * CR2 is modified (or intercept #DB before DR6 is
- * modified under nVMX). Unless the per-VM capability,
- * KVM_CAP_EXCEPTION_PAYLOAD, is set, we may not defer the delivery of
- * an exception payload and handle after a KVM_GET_VCPU_EVENTS. Since we
- * opportunistically defer the exception payload, deliver it if the
- * capability hasn't been requested before processing a
- * KVM_GET_VCPU_EVENTS.
+ * KVM's ABI only allows for one exception to be migrated. Luckily,
+ * the only time there can be two queued exceptions is if there's a
+ * non-exiting _injected_ exception, and a pending exiting exception.
+ * In that case, ignore the VM-Exiting exception as it's an extension
+ * of the injected exception.
+ */
+ if (vcpu->arch.exception_vmexit.pending &&
+ !vcpu->arch.exception.pending &&
+ !vcpu->arch.exception.injected)
+ ex = &vcpu->arch.exception_vmexit;
+ else
+ ex = &vcpu->arch.exception;
+
+ /*
+ * In guest mode, payload delivery should be deferred if the exception
+ * will be intercepted by L1, e.g. KVM should not modifying CR2 if L1
+ * intercepts #PF, ditto for DR6 and #DBs. If the per-VM capability,
+ * KVM_CAP_EXCEPTION_PAYLOAD, is not set, userspace may or may not
+ * propagate the payload and so it cannot be safely deferred. Deliver
+ * the payload if the capability hasn't been requested.
*/
if (!vcpu->kvm->arch.exception_payload_enabled &&
- vcpu->arch.exception.pending && vcpu->arch.exception.has_payload)
- kvm_deliver_exception_payload(vcpu);
+ ex->pending && ex->has_payload)
+ kvm_deliver_exception_payload(vcpu, ex);
/*
* The API doesn't provide the instruction length for software
@@ -5042,26 +5073,25 @@ static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu,
* isn't advanced, we should expect to encounter the exception
* again.
*/
- if (kvm_exception_is_soft(vcpu->arch.exception.nr)) {
+ if (kvm_exception_is_soft(ex->vector)) {
events->exception.injected = 0;
events->exception.pending = 0;
} else {
- events->exception.injected = vcpu->arch.exception.injected;
- events->exception.pending = vcpu->arch.exception.pending;
+ events->exception.injected = ex->injected;
+ events->exception.pending = ex->pending;
/*
* For ABI compatibility, deliberately conflate
* pending and injected exceptions when
* KVM_CAP_EXCEPTION_PAYLOAD isn't enabled.
*/
if (!vcpu->kvm->arch.exception_payload_enabled)
- events->exception.injected |=
- vcpu->arch.exception.pending;
+ events->exception.injected |= ex->pending;
}
- events->exception.nr = vcpu->arch.exception.nr;
- events->exception.has_error_code = vcpu->arch.exception.has_error_code;
- events->exception.error_code = vcpu->arch.exception.error_code;
- events->exception_has_payload = vcpu->arch.exception.has_payload;
- events->exception_payload = vcpu->arch.exception.payload;
+ events->exception.nr = ex->vector;
+ events->exception.has_error_code = ex->has_error_code;
+ events->exception.error_code = ex->error_code;
+ events->exception_has_payload = ex->has_payload;
+ events->exception_payload = ex->payload;
events->interrupt.injected =
vcpu->arch.interrupt.injected && !vcpu->arch.interrupt.soft;
@@ -5131,9 +5161,22 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu,
return -EINVAL;
process_nmi(vcpu);
+
+ /*
+ * Flag that userspace is stuffing an exception, the next KVM_RUN will
+ * morph the exception to a VM-Exit if appropriate. Do this only for
+ * pending exceptions, already-injected exceptions are not subject to
+ * intercpetion. Note, userspace that conflates pending and injected
+ * is hosed, and will incorrectly convert an injected exception into a
+ * pending exception, which in turn may cause a spurious VM-Exit.
+ */
+ vcpu->arch.exception_from_userspace = events->exception.pending;
+
+ vcpu->arch.exception_vmexit.pending = false;
+
vcpu->arch.exception.injected = events->exception.injected;
vcpu->arch.exception.pending = events->exception.pending;
- vcpu->arch.exception.nr = events->exception.nr;
+ vcpu->arch.exception.vector = events->exception.nr;
vcpu->arch.exception.has_error_code = events->exception.has_error_code;
vcpu->arch.exception.error_code = events->exception.error_code;
vcpu->arch.exception.has_payload = events->exception_has_payload;
@@ -7257,6 +7300,7 @@ static int kvm_can_emulate_insn(struct kvm_vcpu *vcpu, int emul_type,
int handle_ud(struct kvm_vcpu *vcpu)
{
static const char kvm_emulate_prefix[] = { __KVM_EMULATE_PREFIX };
+ int fep_flags = READ_ONCE(force_emulation_prefix);
int emul_type = EMULTYPE_TRAP_UD;
char sig[5]; /* ud2; .ascii "kvm" */
struct x86_exception e;
@@ -7264,10 +7308,12 @@ int handle_ud(struct kvm_vcpu *vcpu)
if (unlikely(!kvm_can_emulate_insn(vcpu, emul_type, NULL, 0)))
return 1;
- if (force_emulation_prefix &&
+ if (fep_flags &&
kvm_read_guest_virt(vcpu, kvm_get_linear_rip(vcpu),
sig, sizeof(sig), &e) == 0 &&
memcmp(sig, kvm_emulate_prefix, sizeof(sig)) == 0) {
+ if (fep_flags & KVM_FEP_CLEAR_RFLAGS_RF)
+ kvm_set_rflags(vcpu, kvm_get_rflags(vcpu) & ~X86_EFLAGS_RF);
kvm_rip_write(vcpu, kvm_rip_read(vcpu) + sizeof(sig));
emul_type = EMULTYPE_TRAP_UD_FORCED;
}
@@ -7933,14 +7979,20 @@ static int emulator_get_msr_with_filter(struct x86_emulate_ctxt *ctxt,
int r;
r = kvm_get_msr_with_filter(vcpu, msr_index, pdata);
+ if (r < 0)
+ return X86EMUL_UNHANDLEABLE;
- if (r && kvm_msr_user_space(vcpu, msr_index, KVM_EXIT_X86_RDMSR, 0,
- complete_emulated_rdmsr, r)) {
- /* Bounce to user space */
- return X86EMUL_IO_NEEDED;
+ if (r) {
+ if (kvm_msr_user_space(vcpu, msr_index, KVM_EXIT_X86_RDMSR, 0,
+ complete_emulated_rdmsr, r))
+ return X86EMUL_IO_NEEDED;
+
+ trace_kvm_msr_read_ex(msr_index);
+ return X86EMUL_PROPAGATE_FAULT;
}
- return r;
+ trace_kvm_msr_read(msr_index, *pdata);
+ return X86EMUL_CONTINUE;
}
static int emulator_set_msr_with_filter(struct x86_emulate_ctxt *ctxt,
@@ -7950,14 +8002,20 @@ static int emulator_set_msr_with_filter(struct x86_emulate_ctxt *ctxt,
int r;
r = kvm_set_msr_with_filter(vcpu, msr_index, data);
+ if (r < 0)
+ return X86EMUL_UNHANDLEABLE;
- if (r && kvm_msr_user_space(vcpu, msr_index, KVM_EXIT_X86_WRMSR, data,
- complete_emulated_msr_access, r)) {
- /* Bounce to user space */
- return X86EMUL_IO_NEEDED;
+ if (r) {
+ if (kvm_msr_user_space(vcpu, msr_index, KVM_EXIT_X86_WRMSR, data,
+ complete_emulated_msr_access, r))
+ return X86EMUL_IO_NEEDED;
+
+ trace_kvm_msr_write_ex(msr_index, data);
+ return X86EMUL_PROPAGATE_FAULT;
}
- return r;
+ trace_kvm_msr_write(msr_index, data);
+ return X86EMUL_CONTINUE;
}
static int emulator_get_msr(struct x86_emulate_ctxt *ctxt,
@@ -8161,18 +8219,17 @@ static void toggle_interruptibility(struct kvm_vcpu *vcpu, u32 mask)
}
}
-static bool inject_emulated_exception(struct kvm_vcpu *vcpu)
+static void inject_emulated_exception(struct kvm_vcpu *vcpu)
{
struct x86_emulate_ctxt *ctxt = vcpu->arch.emulate_ctxt;
- if (ctxt->exception.vector == PF_VECTOR)
- return kvm_inject_emulated_page_fault(vcpu, &ctxt->exception);
- if (ctxt->exception.error_code_valid)
+ if (ctxt->exception.vector == PF_VECTOR)
+ kvm_inject_emulated_page_fault(vcpu, &ctxt->exception);
+ else if (ctxt->exception.error_code_valid)
kvm_queue_exception_e(vcpu, ctxt->exception.vector,
ctxt->exception.error_code);
else
kvm_queue_exception(vcpu, ctxt->exception.vector);
- return false;
}
static struct x86_emulate_ctxt *alloc_emulate_ctxt(struct kvm_vcpu *vcpu)
@@ -8548,8 +8605,46 @@ int kvm_skip_emulated_instruction(struct kvm_vcpu *vcpu)
}
EXPORT_SYMBOL_GPL(kvm_skip_emulated_instruction);
-static bool kvm_vcpu_check_code_breakpoint(struct kvm_vcpu *vcpu, int *r)
+static bool kvm_is_code_breakpoint_inhibited(struct kvm_vcpu *vcpu)
{
+ u32 shadow;
+
+ if (kvm_get_rflags(vcpu) & X86_EFLAGS_RF)
+ return true;
+
+ /*
+ * Intel CPUs inhibit code #DBs when MOV/POP SS blocking is active,
+ * but AMD CPUs do not. MOV/POP SS blocking is rare, check that first
+ * to avoid the relatively expensive CPUID lookup.
+ */
+ shadow = static_call(kvm_x86_get_interrupt_shadow)(vcpu);
+ return (shadow & KVM_X86_SHADOW_INT_MOV_SS) &&
+ guest_cpuid_is_intel(vcpu);
+}
+
+static bool kvm_vcpu_check_code_breakpoint(struct kvm_vcpu *vcpu,
+ int emulation_type, int *r)
+{
+ WARN_ON_ONCE(emulation_type & EMULTYPE_NO_DECODE);
+
+ /*
+ * Do not check for code breakpoints if hardware has already done the
+ * checks, as inferred from the emulation type. On NO_DECODE and SKIP,
+ * the instruction has passed all exception checks, and all intercepted
+ * exceptions that trigger emulation have lower priority than code
+ * breakpoints, i.e. the fact that the intercepted exception occurred
+ * means any code breakpoints have already been serviced.
+ *
+ * Note, KVM needs to check for code #DBs on EMULTYPE_TRAP_UD_FORCED as
+ * hardware has checked the RIP of the magic prefix, but not the RIP of
+ * the instruction being emulated. The intent of forced emulation is
+ * to behave as if KVM intercepted the instruction without an exception
+ * and without a prefix.
+ */
+ if (emulation_type & (EMULTYPE_NO_DECODE | EMULTYPE_SKIP |
+ EMULTYPE_TRAP_UD | EMULTYPE_VMWARE_GP | EMULTYPE_PF))
+ return false;
+
if (unlikely(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) &&
(vcpu->arch.guest_debug_dr7 & DR7_BP_EN_MASK)) {
struct kvm_run *kvm_run = vcpu->run;
@@ -8569,7 +8664,7 @@ static bool kvm_vcpu_check_code_breakpoint(struct kvm_vcpu *vcpu, int *r)
}
if (unlikely(vcpu->arch.dr7 & DR7_BP_EN_MASK) &&
- !(kvm_get_rflags(vcpu) & X86_EFLAGS_RF)) {
+ !kvm_is_code_breakpoint_inhibited(vcpu)) {
unsigned long eip = kvm_get_linear_rip(vcpu);
u32 dr6 = kvm_vcpu_check_hw_bp(eip, 0,
vcpu->arch.dr7,
@@ -8671,8 +8766,7 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
* are fault-like and are higher priority than any faults on
* the code fetch itself.
*/
- if (!(emulation_type & EMULTYPE_SKIP) &&
- kvm_vcpu_check_code_breakpoint(vcpu, &r))
+ if (kvm_vcpu_check_code_breakpoint(vcpu, emulation_type, &r))
return r;
r = x86_decode_emulated_instruction(vcpu, emulation_type,
@@ -8770,8 +8864,7 @@ restart:
if (ctxt->have_exception) {
r = 1;
- if (inject_emulated_exception(vcpu))
- return r;
+ inject_emulated_exception(vcpu);
} else if (vcpu->arch.pio.count) {
if (!vcpu->arch.pio.in) {
/* FIXME: return into emulator if single-stepping. */
@@ -8801,6 +8894,12 @@ writeback:
unsigned long rflags = static_call(kvm_x86_get_rflags)(vcpu);
toggle_interruptibility(vcpu, ctxt->interruptibility);
vcpu->arch.emulate_regs_need_sync_to_vcpu = false;
+
+ /*
+ * Note, EXCPT_DB is assumed to be fault-like as the emulator
+ * only supports code breakpoints and general detect #DB, both
+ * of which are fault-like.
+ */
if (!ctxt->have_exception ||
exception_type(ctxt->exception.vector) == EXCPT_TRAP) {
kvm_pmu_trigger_event(vcpu, PERF_COUNT_HW_INSTRUCTIONS);
@@ -9662,74 +9761,155 @@ int kvm_check_nested_events(struct kvm_vcpu *vcpu)
static void kvm_inject_exception(struct kvm_vcpu *vcpu)
{
- trace_kvm_inj_exception(vcpu->arch.exception.nr,
+ trace_kvm_inj_exception(vcpu->arch.exception.vector,
vcpu->arch.exception.has_error_code,
vcpu->arch.exception.error_code,
vcpu->arch.exception.injected);
if (vcpu->arch.exception.error_code && !is_protmode(vcpu))
vcpu->arch.exception.error_code = false;
- static_call(kvm_x86_queue_exception)(vcpu);
+ static_call(kvm_x86_inject_exception)(vcpu);
}
-static int inject_pending_event(struct kvm_vcpu *vcpu, bool *req_immediate_exit)
+/*
+ * Check for any event (interrupt or exception) that is ready to be injected,
+ * and if there is at least one event, inject the event with the highest
+ * priority. This handles both "pending" events, i.e. events that have never
+ * been injected into the guest, and "injected" events, i.e. events that were
+ * injected as part of a previous VM-Enter, but weren't successfully delivered
+ * and need to be re-injected.
+ *
+ * Note, this is not guaranteed to be invoked on a guest instruction boundary,
+ * i.e. doesn't guarantee that there's an event window in the guest. KVM must
+ * be able to inject exceptions in the "middle" of an instruction, and so must
+ * also be able to re-inject NMIs and IRQs in the middle of an instruction.
+ * I.e. for exceptions and re-injected events, NOT invoking this on instruction
+ * boundaries is necessary and correct.
+ *
+ * For simplicity, KVM uses a single path to inject all events (except events
+ * that are injected directly from L1 to L2) and doesn't explicitly track
+ * instruction boundaries for asynchronous events. However, because VM-Exits
+ * that can occur during instruction execution typically result in KVM skipping
+ * the instruction or injecting an exception, e.g. instruction and exception
+ * intercepts, and because pending exceptions have higher priority than pending
+ * interrupts, KVM still honors instruction boundaries in most scenarios.
+ *
+ * But, if a VM-Exit occurs during instruction execution, and KVM does NOT skip
+ * the instruction or inject an exception, then KVM can incorrecty inject a new
+ * asynchrounous event if the event became pending after the CPU fetched the
+ * instruction (in the guest). E.g. if a page fault (#PF, #NPF, EPT violation)
+ * occurs and is resolved by KVM, a coincident NMI, SMI, IRQ, etc... can be
+ * injected on the restarted instruction instead of being deferred until the
+ * instruction completes.
+ *
+ * In practice, this virtualization hole is unlikely to be observed by the
+ * guest, and even less likely to cause functional problems. To detect the
+ * hole, the guest would have to trigger an event on a side effect of an early
+ * phase of instruction execution, e.g. on the instruction fetch from memory.
+ * And for it to be a functional problem, the guest would need to depend on the
+ * ordering between that side effect, the instruction completing, _and_ the
+ * delivery of the asynchronous event.
+ */
+static int kvm_check_and_inject_events(struct kvm_vcpu *vcpu,
+ bool *req_immediate_exit)
{
+ bool can_inject;
int r;
- bool can_inject = true;
- /* try to reinject previous events if any */
+ /*
+ * Process nested events first, as nested VM-Exit supercedes event
+ * re-injection. If there's an event queued for re-injection, it will
+ * be saved into the appropriate vmc{b,s}12 fields on nested VM-Exit.
+ */
+ if (is_guest_mode(vcpu))
+ r = kvm_check_nested_events(vcpu);
+ else
+ r = 0;
- if (vcpu->arch.exception.injected) {
- kvm_inject_exception(vcpu);
- can_inject = false;
- }
/*
- * Do not inject an NMI or interrupt if there is a pending
- * exception. Exceptions and interrupts are recognized at
- * instruction boundaries, i.e. the start of an instruction.
- * Trap-like exceptions, e.g. #DB, have higher priority than
- * NMIs and interrupts, i.e. traps are recognized before an
- * NMI/interrupt that's pending on the same instruction.
- * Fault-like exceptions, e.g. #GP and #PF, are the lowest
- * priority, but are only generated (pended) during instruction
- * execution, i.e. a pending fault-like exception means the
- * fault occurred on the *previous* instruction and must be
- * serviced prior to recognizing any new events in order to
- * fully complete the previous instruction.
+ * Re-inject exceptions and events *especially* if immediate entry+exit
+ * to/from L2 is needed, as any event that has already been injected
+ * into L2 needs to complete its lifecycle before injecting a new event.
+ *
+ * Don't re-inject an NMI or interrupt if there is a pending exception.
+ * This collision arises if an exception occurred while vectoring the
+ * injected event, KVM intercepted said exception, and KVM ultimately
+ * determined the fault belongs to the guest and queues the exception
+ * for injection back into the guest.
+ *
+ * "Injected" interrupts can also collide with pending exceptions if
+ * userspace ignores the "ready for injection" flag and blindly queues
+ * an interrupt. In that case, prioritizing the exception is correct,
+ * as the exception "occurred" before the exit to userspace. Trap-like
+ * exceptions, e.g. most #DBs, have higher priority than interrupts.
+ * And while fault-like exceptions, e.g. #GP and #PF, are the lowest
+ * priority, they're only generated (pended) during instruction
+ * execution, and interrupts are recognized at instruction boundaries.
+ * Thus a pending fault-like exception means the fault occurred on the
+ * *previous* instruction and must be serviced prior to recognizing any
+ * new events in order to fully complete the previous instruction.
*/
- else if (!vcpu->arch.exception.pending) {
- if (vcpu->arch.nmi_injected) {
- static_call(kvm_x86_inject_nmi)(vcpu);
- can_inject = false;
- } else if (vcpu->arch.interrupt.injected) {
- static_call(kvm_x86_inject_irq)(vcpu, true);
- can_inject = false;
- }
- }
+ if (vcpu->arch.exception.injected)
+ kvm_inject_exception(vcpu);
+ else if (kvm_is_exception_pending(vcpu))
+ ; /* see above */
+ else if (vcpu->arch.nmi_injected)
+ static_call(kvm_x86_inject_nmi)(vcpu);
+ else if (vcpu->arch.interrupt.injected)
+ static_call(kvm_x86_inject_irq)(vcpu, true);
+ /*
+ * Exceptions that morph to VM-Exits are handled above, and pending
+ * exceptions on top of injected exceptions that do not VM-Exit should
+ * either morph to #DF or, sadly, override the injected exception.
+ */
WARN_ON_ONCE(vcpu->arch.exception.injected &&
vcpu->arch.exception.pending);
/*
- * Call check_nested_events() even if we reinjected a previous event
- * in order for caller to determine if it should require immediate-exit
- * from L2 to L1 due to pending L1 events which require exit
- * from L2 to L1.
+ * Bail if immediate entry+exit to/from the guest is needed to complete
+ * nested VM-Enter or event re-injection so that a different pending
+ * event can be serviced (or if KVM needs to exit to userspace).
+ *
+ * Otherwise, continue processing events even if VM-Exit occurred. The
+ * VM-Exit will have cleared exceptions that were meant for L2, but
+ * there may now be events that can be injected into L1.
*/
- if (is_guest_mode(vcpu)) {
- r = kvm_check_nested_events(vcpu);
- if (r < 0)
- goto out;
- }
+ if (r < 0)
+ goto out;
+
+ /*
+ * A pending exception VM-Exit should either result in nested VM-Exit
+ * or force an immediate re-entry and exit to/from L2, and exception
+ * VM-Exits cannot be injected (flag should _never_ be set).
+ */
+ WARN_ON_ONCE(vcpu->arch.exception_vmexit.injected ||
+ vcpu->arch.exception_vmexit.pending);
+
+ /*
+ * New events, other than exceptions, cannot be injected if KVM needs
+ * to re-inject a previous event. See above comments on re-injecting
+ * for why pending exceptions get priority.
+ */
+ can_inject = !kvm_event_needs_reinjection(vcpu);
- /* try to inject new event if pending */
if (vcpu->arch.exception.pending) {
- if (exception_type(vcpu->arch.exception.nr) == EXCPT_FAULT)
+ /*
+ * Fault-class exceptions, except #DBs, set RF=1 in the RFLAGS
+ * value pushed on the stack. Trap-like exception and all #DBs
+ * leave RF as-is (KVM follows Intel's behavior in this regard;
+ * AMD states that code breakpoint #DBs excplitly clear RF=0).
+ *
+ * Note, most versions of Intel's SDM and AMD's APM incorrectly
+ * describe the behavior of General Detect #DBs, which are
+ * fault-like. They do _not_ set RF, a la code breakpoints.
+ */
+ if (exception_type(vcpu->arch.exception.vector) == EXCPT_FAULT)
__kvm_set_rflags(vcpu, kvm_get_rflags(vcpu) |
X86_EFLAGS_RF);
- if (vcpu->arch.exception.nr == DB_VECTOR) {
- kvm_deliver_exception_payload(vcpu);
+ if (vcpu->arch.exception.vector == DB_VECTOR) {
+ kvm_deliver_exception_payload(vcpu, &vcpu->arch.exception);
if (vcpu->arch.dr7 & DR7_GD) {
vcpu->arch.dr7 &= ~DR7_GD;
kvm_update_dr7(vcpu);
@@ -9801,11 +9981,11 @@ static int inject_pending_event(struct kvm_vcpu *vcpu, bool *req_immediate_exit)
}
if (is_guest_mode(vcpu) &&
- kvm_x86_ops.nested_ops->hv_timer_pending &&
- kvm_x86_ops.nested_ops->hv_timer_pending(vcpu))
+ kvm_x86_ops.nested_ops->has_events &&
+ kvm_x86_ops.nested_ops->has_events(vcpu))
*req_immediate_exit = true;
- WARN_ON(vcpu->arch.exception.pending);
+ WARN_ON(kvm_is_exception_pending(vcpu));
return 0;
out:
@@ -10110,7 +10290,7 @@ void kvm_vcpu_update_apicv(struct kvm_vcpu *vcpu)
* When APICv gets disabled, we may still have injected interrupts
* pending. At the same time, KVM_REQ_EVENT may not be set as APICv was
* still active when the interrupt got accepted. Make sure
- * inject_pending_event() is called to check for that.
+ * kvm_check_and_inject_events() is called to check for that.
*/
if (!apic->apicv_active)
kvm_make_request(KVM_REQ_EVENT, vcpu);
@@ -10407,7 +10587,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
goto out;
}
- r = inject_pending_event(vcpu, &req_immediate_exit);
+ r = kvm_check_and_inject_events(vcpu, &req_immediate_exit);
if (r < 0) {
r = 0;
goto out;
@@ -10646,10 +10826,26 @@ static inline int vcpu_block(struct kvm_vcpu *vcpu)
if (hv_timer)
kvm_lapic_switch_to_hv_timer(vcpu);
- if (!kvm_check_request(KVM_REQ_UNHALT, vcpu))
+ /*
+ * If the vCPU is not runnable, a signal or another host event
+ * of some kind is pending; service it without changing the
+ * vCPU's activity state.
+ */
+ if (!kvm_arch_vcpu_runnable(vcpu))
return 1;
}
+ /*
+ * Evaluate nested events before exiting the halted state. This allows
+ * the halt state to be recorded properly in the VMCS12's activity
+ * state field (AMD does not have a similar field and a VM-Exit always
+ * causes a spurious wakeup from HLT).
+ */
+ if (is_guest_mode(vcpu)) {
+ if (kvm_check_nested_events(vcpu) < 0)
+ return 0;
+ }
+
if (kvm_apic_accept_events(vcpu) < 0)
return 0;
switch(vcpu->arch.mp_state) {
@@ -10673,9 +10869,6 @@ static inline int vcpu_block(struct kvm_vcpu *vcpu)
static inline bool kvm_vcpu_running(struct kvm_vcpu *vcpu)
{
- if (is_guest_mode(vcpu))
- kvm_check_nested_events(vcpu);
-
return (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE &&
!vcpu->arch.apf.halted);
}
@@ -10824,6 +11017,7 @@ static void kvm_put_guest_fpu(struct kvm_vcpu *vcpu)
int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
{
+ struct kvm_queued_exception *ex = &vcpu->arch.exception;
struct kvm_run *kvm_run = vcpu->run;
int r;
@@ -10852,7 +11046,6 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
r = 0;
goto out;
}
- kvm_clear_request(KVM_REQ_UNHALT, vcpu);
r = -EAGAIN;
if (signal_pending(current)) {
r = -EINTR;
@@ -10882,6 +11075,21 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
}
}
+ /*
+ * If userspace set a pending exception and L2 is active, convert it to
+ * a pending VM-Exit if L1 wants to intercept the exception.
+ */
+ if (vcpu->arch.exception_from_userspace && is_guest_mode(vcpu) &&
+ kvm_x86_ops.nested_ops->is_exception_vmexit(vcpu, ex->vector,
+ ex->error_code)) {
+ kvm_queue_exception_vmexit(vcpu, ex->vector,
+ ex->has_error_code, ex->error_code,
+ ex->has_payload, ex->payload);
+ ex->injected = false;
+ ex->pending = false;
+ }
+ vcpu->arch.exception_from_userspace = false;
+
if (unlikely(vcpu->arch.complete_userspace_io)) {
int (*cui)(struct kvm_vcpu *) = vcpu->arch.complete_userspace_io;
vcpu->arch.complete_userspace_io = NULL;
@@ -10988,6 +11196,7 @@ static void __set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
kvm_set_rflags(vcpu, regs->rflags | X86_EFLAGS_FIXED);
vcpu->arch.exception.pending = false;
+ vcpu->arch.exception_vmexit.pending = false;
kvm_make_request(KVM_REQ_EVENT, vcpu);
}
@@ -11125,11 +11334,12 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
}
/*
- * KVM_MP_STATE_INIT_RECEIVED means the processor is in
- * INIT state; latched init should be reported using
- * KVM_SET_VCPU_EVENTS, so reject it here.
+ * Pending INITs are reported using KVM_SET_VCPU_EVENTS, disallow
+ * forcing the guest into INIT/SIPI if those events are supposed to be
+ * blocked. KVM prioritizes SMI over INIT, so reject INIT/SIPI state
+ * if an SMI is pending as well.
*/
- if ((kvm_vcpu_latch_init(vcpu) || vcpu->arch.smi_pending) &&
+ if ((!kvm_apic_init_sipi_allowed(vcpu) || vcpu->arch.smi_pending) &&
(mp_state->mp_state == KVM_MP_STATE_SIPI_RECEIVED ||
mp_state->mp_state == KVM_MP_STATE_INIT_RECEIVED))
goto out;
@@ -11368,7 +11578,7 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
if (dbg->control & (KVM_GUESTDBG_INJECT_DB | KVM_GUESTDBG_INJECT_BP)) {
r = -EBUSY;
- if (vcpu->arch.exception.pending)
+ if (kvm_is_exception_pending(vcpu))
goto out;
if (dbg->control & KVM_GUESTDBG_INJECT_DB)
kvm_queue_exception(vcpu, DB_VECTOR);
@@ -11750,8 +11960,8 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
struct fpstate *fpstate = vcpu->arch.guest_fpu.fpstate;
/*
- * To avoid have the INIT path from kvm_apic_has_events() that be
- * called with loaded FPU and does not let userspace fix the state.
+ * All paths that lead to INIT are required to load the guest's
+ * FPU state (because most paths are buried in KVM_RUN).
*/
if (init_event)
kvm_put_guest_fpu(vcpu);
@@ -12080,6 +12290,10 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
if (ret)
goto out_page_track;
+ ret = static_call(kvm_x86_vm_init)(kvm);
+ if (ret)
+ goto out_uninit_mmu;
+
INIT_HLIST_HEAD(&kvm->arch.mask_notifier_list);
INIT_LIST_HEAD(&kvm->arch.assigned_dev_head);
atomic_set(&kvm->arch.noncoherent_dma_count, 0);
@@ -12115,8 +12329,10 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
kvm_hv_init_vm(kvm);
kvm_xen_init_vm(kvm);
- return static_call(kvm_x86_vm_init)(kvm);
+ return 0;
+out_uninit_mmu:
+ kvm_mmu_uninit_vm(kvm);
out_page_track:
kvm_page_track_cleanup(kvm);
out:
@@ -12589,13 +12805,14 @@ static inline bool kvm_vcpu_has_events(struct kvm_vcpu *vcpu)
if (!list_empty_careful(&vcpu->async_pf.done))
return true;
- if (kvm_apic_has_events(vcpu))
+ if (kvm_apic_has_pending_init_or_sipi(vcpu) &&
+ kvm_apic_init_sipi_allowed(vcpu))
return true;
if (vcpu->arch.pv.pv_unhalted)
return true;
- if (vcpu->arch.exception.pending)
+ if (kvm_is_exception_pending(vcpu))
return true;
if (kvm_test_request(KVM_REQ_NMI, vcpu) ||
@@ -12617,16 +12834,13 @@ static inline bool kvm_vcpu_has_events(struct kvm_vcpu *vcpu)
return true;
if (is_guest_mode(vcpu) &&
- kvm_x86_ops.nested_ops->hv_timer_pending &&
- kvm_x86_ops.nested_ops->hv_timer_pending(vcpu))
+ kvm_x86_ops.nested_ops->has_events &&
+ kvm_x86_ops.nested_ops->has_events(vcpu))
return true;
if (kvm_xen_has_pending_events(vcpu))
return true;
- if (kvm_test_request(KVM_REQ_TRIPLE_FAULT, vcpu))
- return true;
-
return false;
}
@@ -12850,7 +13064,7 @@ bool kvm_can_do_async_pf(struct kvm_vcpu *vcpu)
{
if (unlikely(!lapic_in_kernel(vcpu) ||
kvm_event_needs_reinjection(vcpu) ||
- vcpu->arch.exception.pending))
+ kvm_is_exception_pending(vcpu)))
return false;
if (kvm_hlt_in_guest(vcpu->kvm) && !kvm_can_deliver_async_pf(vcpu))
@@ -13401,7 +13615,7 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_inj_virq);
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_page_fault);
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_msr);
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_cr);
-EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmrun);
+EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmenter);
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmexit);
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmexit_inject);
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_intr_vmexit);
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index 1926d2cb8e79..829d3134c1eb 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -82,10 +82,18 @@ static inline unsigned int __shrink_ple_window(unsigned int val,
void kvm_service_local_tlb_flush_requests(struct kvm_vcpu *vcpu);
int kvm_check_nested_events(struct kvm_vcpu *vcpu);
+static inline bool kvm_is_exception_pending(struct kvm_vcpu *vcpu)
+{
+ return vcpu->arch.exception.pending ||
+ vcpu->arch.exception_vmexit.pending ||
+ kvm_test_request(KVM_REQ_TRIPLE_FAULT, vcpu);
+}
+
static inline void kvm_clear_exception_queue(struct kvm_vcpu *vcpu)
{
vcpu->arch.exception.pending = false;
vcpu->arch.exception.injected = false;
+ vcpu->arch.exception_vmexit.pending = false;
}
static inline void kvm_queue_interrupt(struct kvm_vcpu *vcpu, u8 vector,
@@ -267,11 +275,6 @@ static inline bool kvm_check_has_quirk(struct kvm *kvm, u64 quirk)
return !(kvm->arch.disabled_quirks & quirk);
}
-static inline bool kvm_vcpu_latch_init(struct kvm_vcpu *vcpu)
-{
- return is_smm(vcpu) || static_call(kvm_x86_apic_init_signal_blocked)(vcpu);
-}
-
void kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq, int inc_eip);
u64 get_kvmclock_ns(struct kvm *kvm);
@@ -286,7 +289,8 @@ int kvm_write_guest_virt_system(struct kvm_vcpu *vcpu,
int handle_ud(struct kvm_vcpu *vcpu);
-void kvm_deliver_exception_payload(struct kvm_vcpu *vcpu);
+void kvm_deliver_exception_payload(struct kvm_vcpu *vcpu,
+ struct kvm_queued_exception *ex);
void kvm_vcpu_mtrr_init(struct kvm_vcpu *vcpu);
u8 kvm_mtrr_get_guest_memory_type(struct kvm_vcpu *vcpu, gfn_t gfn);
diff --git a/arch/x86/kvm/xen.c b/arch/x86/kvm/xen.c
index 280cb5dc7341..93c628d3e3a9 100644
--- a/arch/x86/kvm/xen.c
+++ b/arch/x86/kvm/xen.c
@@ -1065,7 +1065,6 @@ static bool kvm_xen_schedop_poll(struct kvm_vcpu *vcpu, bool longmode,
del_timer(&vcpu->arch.xen.poll_timer);
vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
- kvm_clear_request(KVM_REQ_UNHALT, vcpu);
}
vcpu->arch.xen.poll_evtchn = 0;