summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2022-11-06 21:46:59 +0300
committerLinus Torvalds <torvalds@linux-foundation.org>2022-11-06 21:46:59 +0300
commit089d1c31224e6b266ece3ee555a3ea2c9acbe5c2 (patch)
treed66c1d1883293a70c747981790f10611d576938f
parent6e8c78d32b5c60737bcd71346130f0bf91d6e066 (diff)
parentf4298cac2bfcced49ab308756dc8fef684f3da81 (diff)
downloadlinux-089d1c31224e6b266ece3ee555a3ea2c9acbe5c2.tar.xz
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull kvm fixes from Paolo Bonzini: "ARM: - Fix the pKVM stage-1 walker erronously using the stage-2 accessor - Correctly convert vcpu->kvm to a hyp pointer when generating an exception in a nVHE+MTE configuration - Check that KVM_CAP_DIRTY_LOG_* are valid before enabling them - Fix SMPRI_EL1/TPIDR2_EL0 trapping on VHE - Document the boot requirements for FGT when entering the kernel at EL1 x86: - Use SRCU to protect zap in __kvm_set_or_clear_apicv_inhibit() - Make argument order consistent for kvcalloc() - Userspace API fixes for DEBUGCTL and LBRs" * tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: KVM: x86: Fix a typo about the usage of kvcalloc() KVM: x86: Use SRCU to protect zap in __kvm_set_or_clear_apicv_inhibit() KVM: VMX: Ignore guest CPUID for host userspace writes to DEBUGCTL KVM: VMX: Fold vmx_supported_debugctl() into vcpu_supported_debugctl() KVM: VMX: Advertise PMU LBRs if and only if perf supports LBRs arm64: booting: Document our requirements for fine grained traps with SME KVM: arm64: Fix SMPRI_EL1/TPIDR2_EL0 trapping on VHE KVM: Check KVM_CAP_DIRTY_LOG_{RING, RING_ACQ_REL} prior to enabling them KVM: arm64: Fix bad dereference on MTE-enabled systems KVM: arm64: Use correct accessor to parse stage-1 PTEs
-rw-r--r--Documentation/arm64/booting.rst8
-rw-r--r--arch/arm64/kvm/hyp/exception.c3
-rw-r--r--arch/arm64/kvm/hyp/include/hyp/switch.h20
-rw-r--r--arch/arm64/kvm/hyp/nvhe/mem_protect.c2
-rw-r--r--arch/arm64/kvm/hyp/nvhe/switch.c26
-rw-r--r--arch/arm64/kvm/hyp/vhe/switch.c8
-rw-r--r--arch/x86/kvm/cpuid.c2
-rw-r--r--arch/x86/kvm/vmx/capabilities.h19
-rw-r--r--arch/x86/kvm/vmx/vmx.c18
-rw-r--r--arch/x86/kvm/x86.c3
-rw-r--r--virt/kvm/kvm_main.c3
11 files changed, 52 insertions, 60 deletions
diff --git a/Documentation/arm64/booting.rst b/Documentation/arm64/booting.rst
index 8aefa1001ae5..8c324ad638de 100644
--- a/Documentation/arm64/booting.rst
+++ b/Documentation/arm64/booting.rst
@@ -340,6 +340,14 @@ Before jumping into the kernel, the following conditions must be met:
- SMCR_EL2.LEN must be initialised to the same value for all CPUs the
kernel will execute on.
+ - HWFGRTR_EL2.nTPIDR2_EL0 (bit 55) must be initialised to 0b01.
+
+ - HWFGWTR_EL2.nTPIDR2_EL0 (bit 55) must be initialised to 0b01.
+
+ - HWFGRTR_EL2.nSMPRI_EL1 (bit 54) must be initialised to 0b01.
+
+ - HWFGWTR_EL2.nSMPRI_EL1 (bit 54) must be initialised to 0b01.
+
For CPUs with the Scalable Matrix Extension FA64 feature (FEAT_SME_FA64)
- If EL3 is present:
diff --git a/arch/arm64/kvm/hyp/exception.c b/arch/arm64/kvm/hyp/exception.c
index b7557b25ed56..791d3de76771 100644
--- a/arch/arm64/kvm/hyp/exception.c
+++ b/arch/arm64/kvm/hyp/exception.c
@@ -13,6 +13,7 @@
#include <hyp/adjust_pc.h>
#include <linux/kvm_host.h>
#include <asm/kvm_emulate.h>
+#include <asm/kvm_mmu.h>
#if !defined (__KVM_NVHE_HYPERVISOR__) && !defined (__KVM_VHE_HYPERVISOR__)
#error Hypervisor code only!
@@ -115,7 +116,7 @@ static void enter_exception64(struct kvm_vcpu *vcpu, unsigned long target_mode,
new |= (old & PSR_C_BIT);
new |= (old & PSR_V_BIT);
- if (kvm_has_mte(vcpu->kvm))
+ if (kvm_has_mte(kern_hyp_va(vcpu->kvm)))
new |= PSR_TCO_BIT;
new |= (old & PSR_DIT_BIT);
diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h
index 6cbbb6c02f66..3330d1b76bdd 100644
--- a/arch/arm64/kvm/hyp/include/hyp/switch.h
+++ b/arch/arm64/kvm/hyp/include/hyp/switch.h
@@ -87,6 +87,17 @@ static inline void __activate_traps_common(struct kvm_vcpu *vcpu)
vcpu->arch.mdcr_el2_host = read_sysreg(mdcr_el2);
write_sysreg(vcpu->arch.mdcr_el2, mdcr_el2);
+
+ if (cpus_have_final_cap(ARM64_SME)) {
+ sysreg_clear_set_s(SYS_HFGRTR_EL2,
+ HFGxTR_EL2_nSMPRI_EL1_MASK |
+ HFGxTR_EL2_nTPIDR2_EL0_MASK,
+ 0);
+ sysreg_clear_set_s(SYS_HFGWTR_EL2,
+ HFGxTR_EL2_nSMPRI_EL1_MASK |
+ HFGxTR_EL2_nTPIDR2_EL0_MASK,
+ 0);
+ }
}
static inline void __deactivate_traps_common(struct kvm_vcpu *vcpu)
@@ -96,6 +107,15 @@ static inline void __deactivate_traps_common(struct kvm_vcpu *vcpu)
write_sysreg(0, hstr_el2);
if (kvm_arm_support_pmu_v3())
write_sysreg(0, pmuserenr_el0);
+
+ if (cpus_have_final_cap(ARM64_SME)) {
+ sysreg_clear_set_s(SYS_HFGRTR_EL2, 0,
+ HFGxTR_EL2_nSMPRI_EL1_MASK |
+ HFGxTR_EL2_nTPIDR2_EL0_MASK);
+ sysreg_clear_set_s(SYS_HFGWTR_EL2, 0,
+ HFGxTR_EL2_nSMPRI_EL1_MASK |
+ HFGxTR_EL2_nTPIDR2_EL0_MASK);
+ }
}
static inline void ___activate_traps(struct kvm_vcpu *vcpu)
diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
index 1e78acf9662e..07f9dc9848ef 100644
--- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c
+++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
@@ -516,7 +516,7 @@ static enum pkvm_page_state hyp_get_page_state(kvm_pte_t pte)
if (!kvm_pte_valid(pte))
return PKVM_NOPAGE;
- return pkvm_getstate(kvm_pgtable_stage2_pte_prot(pte));
+ return pkvm_getstate(kvm_pgtable_hyp_pte_prot(pte));
}
static int __hyp_check_page_state_range(u64 addr, u64 size,
diff --git a/arch/arm64/kvm/hyp/nvhe/switch.c b/arch/arm64/kvm/hyp/nvhe/switch.c
index 8e9d49a964be..c2cb46ca4fb6 100644
--- a/arch/arm64/kvm/hyp/nvhe/switch.c
+++ b/arch/arm64/kvm/hyp/nvhe/switch.c
@@ -55,18 +55,6 @@ static void __activate_traps(struct kvm_vcpu *vcpu)
write_sysreg(val, cptr_el2);
write_sysreg(__this_cpu_read(kvm_hyp_vector), vbar_el2);
- if (cpus_have_final_cap(ARM64_SME)) {
- val = read_sysreg_s(SYS_HFGRTR_EL2);
- val &= ~(HFGxTR_EL2_nTPIDR2_EL0_MASK |
- HFGxTR_EL2_nSMPRI_EL1_MASK);
- write_sysreg_s(val, SYS_HFGRTR_EL2);
-
- val = read_sysreg_s(SYS_HFGWTR_EL2);
- val &= ~(HFGxTR_EL2_nTPIDR2_EL0_MASK |
- HFGxTR_EL2_nSMPRI_EL1_MASK);
- write_sysreg_s(val, SYS_HFGWTR_EL2);
- }
-
if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT)) {
struct kvm_cpu_context *ctxt = &vcpu->arch.ctxt;
@@ -110,20 +98,6 @@ static void __deactivate_traps(struct kvm_vcpu *vcpu)
write_sysreg(this_cpu_ptr(&kvm_init_params)->hcr_el2, hcr_el2);
- if (cpus_have_final_cap(ARM64_SME)) {
- u64 val;
-
- val = read_sysreg_s(SYS_HFGRTR_EL2);
- val |= HFGxTR_EL2_nTPIDR2_EL0_MASK |
- HFGxTR_EL2_nSMPRI_EL1_MASK;
- write_sysreg_s(val, SYS_HFGRTR_EL2);
-
- val = read_sysreg_s(SYS_HFGWTR_EL2);
- val |= HFGxTR_EL2_nTPIDR2_EL0_MASK |
- HFGxTR_EL2_nSMPRI_EL1_MASK;
- write_sysreg_s(val, SYS_HFGWTR_EL2);
- }
-
cptr = CPTR_EL2_DEFAULT;
if (vcpu_has_sve(vcpu) && (vcpu->arch.fp_state == FP_STATE_GUEST_OWNED))
cptr |= CPTR_EL2_TZ;
diff --git a/arch/arm64/kvm/hyp/vhe/switch.c b/arch/arm64/kvm/hyp/vhe/switch.c
index 7acb87eaa092..1a97391fedd2 100644
--- a/arch/arm64/kvm/hyp/vhe/switch.c
+++ b/arch/arm64/kvm/hyp/vhe/switch.c
@@ -63,10 +63,6 @@ static void __activate_traps(struct kvm_vcpu *vcpu)
__activate_traps_fpsimd32(vcpu);
}
- if (cpus_have_final_cap(ARM64_SME))
- write_sysreg(read_sysreg(sctlr_el2) & ~SCTLR_ELx_ENTP2,
- sctlr_el2);
-
write_sysreg(val, cpacr_el1);
write_sysreg(__this_cpu_read(kvm_hyp_vector), vbar_el1);
@@ -88,10 +84,6 @@ static void __deactivate_traps(struct kvm_vcpu *vcpu)
*/
asm(ALTERNATIVE("nop", "isb", ARM64_WORKAROUND_SPECULATIVE_AT));
- if (cpus_have_final_cap(ARM64_SME))
- write_sysreg(read_sysreg(sctlr_el2) | SCTLR_ELx_ENTP2,
- sctlr_el2);
-
write_sysreg(CPACR_EL1_DEFAULT, cpacr_el1);
if (!arm64_kernel_unmapped_at_el0())
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index 0810e93cbedc..62bc7a01cecc 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -1338,7 +1338,7 @@ int kvm_dev_ioctl_get_cpuid(struct kvm_cpuid2 *cpuid,
if (sanity_check_entries(entries, cpuid->nent, type))
return -EINVAL;
- array.entries = kvcalloc(sizeof(struct kvm_cpuid_entry2), cpuid->nent, GFP_KERNEL);
+ array.entries = kvcalloc(cpuid->nent, sizeof(struct kvm_cpuid_entry2), GFP_KERNEL);
if (!array.entries)
return -ENOMEM;
diff --git a/arch/x86/kvm/vmx/capabilities.h b/arch/x86/kvm/vmx/capabilities.h
index 87c4e46daf37..07254314f3dd 100644
--- a/arch/x86/kvm/vmx/capabilities.h
+++ b/arch/x86/kvm/vmx/capabilities.h
@@ -24,8 +24,6 @@ extern int __read_mostly pt_mode;
#define PMU_CAP_FW_WRITES (1ULL << 13)
#define PMU_CAP_LBR_FMT 0x3f
-#define DEBUGCTLMSR_LBR_MASK (DEBUGCTLMSR_LBR | DEBUGCTLMSR_FREEZE_LBRS_ON_PMI)
-
struct nested_vmx_msrs {
/*
* We only store the "true" versions of the VMX capability MSRs. We
@@ -400,6 +398,7 @@ static inline bool vmx_pebs_supported(void)
static inline u64 vmx_get_perf_capabilities(void)
{
u64 perf_cap = PMU_CAP_FW_WRITES;
+ struct x86_pmu_lbr lbr;
u64 host_perf_cap = 0;
if (!enable_pmu)
@@ -408,7 +407,8 @@ static inline u64 vmx_get_perf_capabilities(void)
if (boot_cpu_has(X86_FEATURE_PDCM))
rdmsrl(MSR_IA32_PERF_CAPABILITIES, host_perf_cap);
- perf_cap |= host_perf_cap & PMU_CAP_LBR_FMT;
+ if (x86_perf_get_lbr(&lbr) >= 0 && lbr.nr)
+ perf_cap |= host_perf_cap & PMU_CAP_LBR_FMT;
if (vmx_pebs_supported()) {
perf_cap |= host_perf_cap & PERF_CAP_PEBS_MASK;
@@ -419,19 +419,6 @@ static inline u64 vmx_get_perf_capabilities(void)
return perf_cap;
}
-static inline u64 vmx_supported_debugctl(void)
-{
- u64 debugctl = 0;
-
- if (boot_cpu_has(X86_FEATURE_BUS_LOCK_DETECT))
- debugctl |= DEBUGCTLMSR_BUS_LOCK_DETECT;
-
- if (vmx_get_perf_capabilities() & PMU_CAP_LBR_FMT)
- debugctl |= DEBUGCTLMSR_LBR_MASK;
-
- return debugctl;
-}
-
static inline bool cpu_has_notify_vmexit(void)
{
return vmcs_config.cpu_based_2nd_exec_ctrl &
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 65f092e4a81b..63247c57c72c 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -2021,15 +2021,17 @@ static u64 nested_vmx_truncate_sysenter_addr(struct kvm_vcpu *vcpu,
return (unsigned long)data;
}
-static u64 vcpu_supported_debugctl(struct kvm_vcpu *vcpu)
+static u64 vmx_get_supported_debugctl(struct kvm_vcpu *vcpu, bool host_initiated)
{
- u64 debugctl = vmx_supported_debugctl();
+ u64 debugctl = 0;
- if (!intel_pmu_lbr_is_enabled(vcpu))
- debugctl &= ~DEBUGCTLMSR_LBR_MASK;
+ if (boot_cpu_has(X86_FEATURE_BUS_LOCK_DETECT) &&
+ (host_initiated || guest_cpuid_has(vcpu, X86_FEATURE_BUS_LOCK_DETECT)))
+ debugctl |= DEBUGCTLMSR_BUS_LOCK_DETECT;
- if (!guest_cpuid_has(vcpu, X86_FEATURE_BUS_LOCK_DETECT))
- debugctl &= ~DEBUGCTLMSR_BUS_LOCK_DETECT;
+ if ((vmx_get_perf_capabilities() & PMU_CAP_LBR_FMT) &&
+ (host_initiated || intel_pmu_lbr_is_enabled(vcpu)))
+ debugctl |= DEBUGCTLMSR_LBR | DEBUGCTLMSR_FREEZE_LBRS_ON_PMI;
return debugctl;
}
@@ -2103,7 +2105,9 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
vmcs_writel(GUEST_SYSENTER_ESP, data);
break;
case MSR_IA32_DEBUGCTLMSR: {
- u64 invalid = data & ~vcpu_supported_debugctl(vcpu);
+ u64 invalid;
+
+ invalid = data & ~vmx_get_supported_debugctl(vcpu, msr_info->host_initiated);
if (invalid & (DEBUGCTLMSR_BTF|DEBUGCTLMSR_LBR)) {
if (report_ignored_msrs)
vcpu_unimpl(vcpu, "%s: BTF|LBR in IA32_DEBUGCTLMSR 0x%llx, nop\n",
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 521b433f978c..5f5eb577d583 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -10404,7 +10404,10 @@ void __kvm_set_or_clear_apicv_inhibit(struct kvm *kvm,
kvm->arch.apicv_inhibit_reasons = new;
if (new) {
unsigned long gfn = gpa_to_gfn(APIC_DEFAULT_PHYS_BASE);
+ int idx = srcu_read_lock(&kvm->srcu);
+
kvm_zap_gfn_range(kvm, gfn, gfn+1);
+ srcu_read_unlock(&kvm->srcu, idx);
}
} else {
kvm->arch.apicv_inhibit_reasons = new;
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index f1df24c2bc84..25d7872b29c1 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -4585,6 +4585,9 @@ static int kvm_vm_ioctl_enable_cap_generic(struct kvm *kvm,
}
case KVM_CAP_DIRTY_LOG_RING:
case KVM_CAP_DIRTY_LOG_RING_ACQ_REL:
+ if (!kvm_vm_ioctl_check_extension_generic(kvm, cap->cap))
+ return -EINVAL;
+
return kvm_vm_ioctl_enable_dirty_log_ring(kvm, cap->args[0]);
default:
return kvm_vm_ioctl_enable_cap(kvm, cap);