summaryrefslogtreecommitdiff
path: root/arch/arm64/kvm/hyp/nvhe
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2023-09-07 23:52:20 +0300
committerLinus Torvalds <torvalds@linux-foundation.org>2023-09-07 23:52:20 +0300
commit0c02183427b4d2002992f26d4917c1263c5d4a7f (patch)
tree426a0b282af3f309934cf0ff813b02c385e7ea04 /arch/arm64/kvm/hyp/nvhe
parent4a0fc73da97efd23a383ca839e6fe86410268f6b (diff)
parentd011151616e73de20c139580b73fa4c7042bd861 (diff)
downloadlinux-0c02183427b4d2002992f26d4917c1263c5d4a7f.tar.xz
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull kvm updates from Paolo Bonzini: "ARM: - Clean up vCPU targets, always returning generic v8 as the preferred target - Trap forwarding infrastructure for nested virtualization (used for traps that are taken from an L2 guest and are needed by the L1 hypervisor) - FEAT_TLBIRANGE support to only invalidate specific ranges of addresses when collapsing a table PTE to a block PTE. This avoids that the guest refills the TLBs again for addresses that aren't covered by the table PTE. - Fix vPMU issues related to handling of PMUver. - Don't unnecessary align non-stack allocations in the EL2 VA space - Drop HCR_VIRT_EXCP_MASK, which was never used... - Don't use smp_processor_id() in kvm_arch_vcpu_load(), but the cpu parameter instead - Drop redundant call to kvm_set_pfn_accessed() in user_mem_abort() - Remove prototypes without implementations RISC-V: - Zba, Zbs, Zicntr, Zicsr, Zifencei, and Zihpm support for guest - Added ONE_REG interface for SATP mode - Added ONE_REG interface to enable/disable multiple ISA extensions - Improved error codes returned by ONE_REG interfaces - Added KVM_GET_REG_LIST ioctl() implementation for KVM RISC-V - Added get-reg-list selftest for KVM RISC-V s390: - PV crypto passthrough enablement (Tony, Steffen, Viktor, Janosch) Allows a PV guest to use crypto cards. Card access is governed by the firmware and once a crypto queue is "bound" to a PV VM every other entity (PV or not) looses access until it is not bound anymore. Enablement is done via flags when creating the PV VM. - Guest debug fixes (Ilya) x86: - Clean up KVM's handling of Intel architectural events - Intel bugfixes - Add support for SEV-ES DebugSwap, allowing SEV-ES guests to use debug registers and generate/handle #DBs - Clean up LBR virtualization code - Fix a bug where KVM fails to set the target pCPU during an IRTE update - Fix fatal bugs in SEV-ES intrahost migration - Fix a bug where the recent (architecturally correct) change to reinject #BP and skip INT3 broke SEV guests (can't decode INT3 to skip it) - Retry APIC map recalculation if a vCPU is added/enabled - Overhaul emergency reboot code to bring SVM up to par with VMX, tie the "emergency disabling" behavior to KVM actually being loaded, and move all of the logic within KVM - Fix user triggerable WARNs in SVM where KVM incorrectly assumes the TSC ratio MSR cannot diverge from the default when TSC scaling is disabled up related code - Add a framework to allow "caching" feature flags so that KVM can check if the guest can use a feature without needing to search guest CPUID - Rip out the ancient MMU_DEBUG crud and replace the useful bits with CONFIG_KVM_PROVE_MMU - Fix KVM's handling of !visible guest roots to avoid premature triple fault injection - Overhaul KVM's page-track APIs, and KVMGT's usage, to reduce the API surface that is needed by external users (currently only KVMGT), and fix a variety of issues in the process Generic: - Wrap kvm_{gfn,hva}_range.pte in a union to allow mmu_notifier events to pass action specific data without needing to constantly update the main handlers. - Drop unused function declarations Selftests: - Add testcases to x86's sync_regs_test for detecting KVM TOCTOU bugs - Add support for printf() in guest code and covert all guest asserts to use printf-based reporting - Clean up the PMU event filter test and add new testcases - Include x86 selftests in the KVM x86 MAINTAINERS entry" * tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (279 commits) KVM: x86/mmu: Include mmu.h in spte.h KVM: x86/mmu: Use dummy root, backed by zero page, for !visible guest roots KVM: x86/mmu: Disallow guest from using !visible slots for page tables KVM: x86/mmu: Harden TDP MMU iteration against root w/o shadow page KVM: x86/mmu: Harden new PGD against roots without shadow pages KVM: x86/mmu: Add helper to convert root hpa to shadow page drm/i915/gvt: Drop final dependencies on KVM internal details KVM: x86/mmu: Handle KVM bookkeeping in page-track APIs, not callers KVM: x86/mmu: Drop @slot param from exported/external page-track APIs KVM: x86/mmu: Bug the VM if write-tracking is used but not enabled KVM: x86/mmu: Assert that correct locks are held for page write-tracking KVM: x86/mmu: Rename page-track APIs to reflect the new reality KVM: x86/mmu: Drop infrastructure for multiple page-track modes KVM: x86/mmu: Use page-track notifiers iff there are external users KVM: x86/mmu: Move KVM-only page-track declarations to internal header KVM: x86: Remove the unused page-track hook track_flush_slot() drm/i915/gvt: switch from ->track_flush_slot() to ->track_remove_region() KVM: x86: Add a new page-track hook to handle memslot deletion drm/i915/gvt: Don't bother removing write-protection on to-be-deleted slot KVM: x86: Reject memslot MOVE operations if KVMGT is attached ...
Diffstat (limited to 'arch/arm64/kvm/hyp/nvhe')
-rw-r--r--arch/arm64/kvm/hyp/nvhe/hyp-main.c11
-rw-r--r--arch/arm64/kvm/hyp/nvhe/mm.c83
-rw-r--r--arch/arm64/kvm/hyp/nvhe/setup.c27
-rw-r--r--arch/arm64/kvm/hyp/nvhe/switch.c2
-rw-r--r--arch/arm64/kvm/hyp/nvhe/tlb.c30
5 files changed, 109 insertions, 44 deletions
diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-main.c b/arch/arm64/kvm/hyp/nvhe/hyp-main.c
index a169c619db60..857d9bc04fd4 100644
--- a/arch/arm64/kvm/hyp/nvhe/hyp-main.c
+++ b/arch/arm64/kvm/hyp/nvhe/hyp-main.c
@@ -135,6 +135,16 @@ static void handle___kvm_tlb_flush_vmid_ipa_nsh(struct kvm_cpu_context *host_ctx
__kvm_tlb_flush_vmid_ipa_nsh(kern_hyp_va(mmu), ipa, level);
}
+static void
+handle___kvm_tlb_flush_vmid_range(struct kvm_cpu_context *host_ctxt)
+{
+ DECLARE_REG(struct kvm_s2_mmu *, mmu, host_ctxt, 1);
+ DECLARE_REG(phys_addr_t, start, host_ctxt, 2);
+ DECLARE_REG(unsigned long, pages, host_ctxt, 3);
+
+ __kvm_tlb_flush_vmid_range(kern_hyp_va(mmu), start, pages);
+}
+
static void handle___kvm_tlb_flush_vmid(struct kvm_cpu_context *host_ctxt)
{
DECLARE_REG(struct kvm_s2_mmu *, mmu, host_ctxt, 1);
@@ -327,6 +337,7 @@ static const hcall_t host_hcall[] = {
HANDLE_FUNC(__kvm_tlb_flush_vmid_ipa),
HANDLE_FUNC(__kvm_tlb_flush_vmid_ipa_nsh),
HANDLE_FUNC(__kvm_tlb_flush_vmid),
+ HANDLE_FUNC(__kvm_tlb_flush_vmid_range),
HANDLE_FUNC(__kvm_flush_cpu_context),
HANDLE_FUNC(__kvm_timer_set_cntvoff),
HANDLE_FUNC(__vgic_v3_read_vmcr),
diff --git a/arch/arm64/kvm/hyp/nvhe/mm.c b/arch/arm64/kvm/hyp/nvhe/mm.c
index 318298eb3d6b..65a7a186d7b2 100644
--- a/arch/arm64/kvm/hyp/nvhe/mm.c
+++ b/arch/arm64/kvm/hyp/nvhe/mm.c
@@ -44,6 +44,27 @@ static int __pkvm_create_mappings(unsigned long start, unsigned long size,
return err;
}
+static int __pkvm_alloc_private_va_range(unsigned long start, size_t size)
+{
+ unsigned long cur;
+
+ hyp_assert_lock_held(&pkvm_pgd_lock);
+
+ if (!start || start < __io_map_base)
+ return -EINVAL;
+
+ /* The allocated size is always a multiple of PAGE_SIZE */
+ cur = start + PAGE_ALIGN(size);
+
+ /* Are we overflowing on the vmemmap ? */
+ if (cur > __hyp_vmemmap)
+ return -ENOMEM;
+
+ __io_map_base = cur;
+
+ return 0;
+}
+
/**
* pkvm_alloc_private_va_range - Allocates a private VA range.
* @size: The size of the VA range to reserve.
@@ -56,27 +77,16 @@ static int __pkvm_create_mappings(unsigned long start, unsigned long size,
*/
int pkvm_alloc_private_va_range(size_t size, unsigned long *haddr)
{
- unsigned long base, addr;
- int ret = 0;
+ unsigned long addr;
+ int ret;
hyp_spin_lock(&pkvm_pgd_lock);
-
- /* Align the allocation based on the order of its size */
- addr = ALIGN(__io_map_base, PAGE_SIZE << get_order(size));
-
- /* The allocated size is always a multiple of PAGE_SIZE */
- base = addr + PAGE_ALIGN(size);
-
- /* Are we overflowing on the vmemmap ? */
- if (!addr || base > __hyp_vmemmap)
- ret = -ENOMEM;
- else {
- __io_map_base = base;
- *haddr = addr;
- }
-
+ addr = __io_map_base;
+ ret = __pkvm_alloc_private_va_range(addr, size);
hyp_spin_unlock(&pkvm_pgd_lock);
+ *haddr = addr;
+
return ret;
}
@@ -340,6 +350,45 @@ int hyp_create_idmap(u32 hyp_va_bits)
return __pkvm_create_mappings(start, end - start, start, PAGE_HYP_EXEC);
}
+int pkvm_create_stack(phys_addr_t phys, unsigned long *haddr)
+{
+ unsigned long addr, prev_base;
+ size_t size;
+ int ret;
+
+ hyp_spin_lock(&pkvm_pgd_lock);
+
+ prev_base = __io_map_base;
+ /*
+ * Efficient stack verification using the PAGE_SHIFT bit implies
+ * an alignment of our allocation on the order of the size.
+ */
+ size = PAGE_SIZE * 2;
+ addr = ALIGN(__io_map_base, size);
+
+ ret = __pkvm_alloc_private_va_range(addr, size);
+ if (!ret) {
+ /*
+ * Since the stack grows downwards, map the stack to the page
+ * at the higher address and leave the lower guard page
+ * unbacked.
+ *
+ * Any valid stack address now has the PAGE_SHIFT bit as 1
+ * and addresses corresponding to the guard page have the
+ * PAGE_SHIFT bit as 0 - this is used for overflow detection.
+ */
+ ret = kvm_pgtable_hyp_map(&pkvm_pgtable, addr + PAGE_SIZE,
+ PAGE_SIZE, phys, PAGE_HYP);
+ if (ret)
+ __io_map_base = prev_base;
+ }
+ hyp_spin_unlock(&pkvm_pgd_lock);
+
+ *haddr = addr + size;
+
+ return ret;
+}
+
static void *admit_host_page(void *arg)
{
struct kvm_hyp_memcache *host_mc = arg;
diff --git a/arch/arm64/kvm/hyp/nvhe/setup.c b/arch/arm64/kvm/hyp/nvhe/setup.c
index bb98630dfeaf..0d5e0a89ddce 100644
--- a/arch/arm64/kvm/hyp/nvhe/setup.c
+++ b/arch/arm64/kvm/hyp/nvhe/setup.c
@@ -113,7 +113,6 @@ static int recreate_hyp_mappings(phys_addr_t phys, unsigned long size,
for (i = 0; i < hyp_nr_cpus; i++) {
struct kvm_nvhe_init_params *params = per_cpu_ptr(&kvm_init_params, i);
- unsigned long hyp_addr;
start = (void *)kern_hyp_va(per_cpu_base[i]);
end = start + PAGE_ALIGN(hyp_percpu_size);
@@ -121,33 +120,9 @@ static int recreate_hyp_mappings(phys_addr_t phys, unsigned long size,
if (ret)
return ret;
- /*
- * Allocate a contiguous HYP private VA range for the stack
- * and guard page. The allocation is also aligned based on
- * the order of its size.
- */
- ret = pkvm_alloc_private_va_range(PAGE_SIZE * 2, &hyp_addr);
+ ret = pkvm_create_stack(params->stack_pa, &params->stack_hyp_va);
if (ret)
return ret;
-
- /*
- * Since the stack grows downwards, map the stack to the page
- * at the higher address and leave the lower guard page
- * unbacked.
- *
- * Any valid stack address now has the PAGE_SHIFT bit as 1
- * and addresses corresponding to the guard page have the
- * PAGE_SHIFT bit as 0 - this is used for overflow detection.
- */
- hyp_spin_lock(&pkvm_pgd_lock);
- ret = kvm_pgtable_hyp_map(&pkvm_pgtable, hyp_addr + PAGE_SIZE,
- PAGE_SIZE, params->stack_pa, PAGE_HYP);
- hyp_spin_unlock(&pkvm_pgd_lock);
- if (ret)
- return ret;
-
- /* Update stack_hyp_va to end of the stack's private VA range */
- params->stack_hyp_va = hyp_addr + (2 * PAGE_SIZE);
}
/*
diff --git a/arch/arm64/kvm/hyp/nvhe/switch.c b/arch/arm64/kvm/hyp/nvhe/switch.c
index e89a23153e85..c353a06ee7e6 100644
--- a/arch/arm64/kvm/hyp/nvhe/switch.c
+++ b/arch/arm64/kvm/hyp/nvhe/switch.c
@@ -236,7 +236,7 @@ static void early_exit_filter(struct kvm_vcpu *vcpu, u64 *exit_code)
* KVM_ARM_VCPU_INIT, however, this is likely not possible for
* protected VMs.
*/
- vcpu->arch.target = -1;
+ vcpu_clear_flag(vcpu, VCPU_INITIALIZED);
*exit_code &= BIT(ARM_EXIT_WITH_SERROR_BIT);
*exit_code |= ARM_EXCEPTION_IL;
}
diff --git a/arch/arm64/kvm/hyp/nvhe/tlb.c b/arch/arm64/kvm/hyp/nvhe/tlb.c
index b9991bbd8e3f..1b265713d6be 100644
--- a/arch/arm64/kvm/hyp/nvhe/tlb.c
+++ b/arch/arm64/kvm/hyp/nvhe/tlb.c
@@ -182,6 +182,36 @@ void __kvm_tlb_flush_vmid_ipa_nsh(struct kvm_s2_mmu *mmu,
__tlb_switch_to_host(&cxt);
}
+void __kvm_tlb_flush_vmid_range(struct kvm_s2_mmu *mmu,
+ phys_addr_t start, unsigned long pages)
+{
+ struct tlb_inv_context cxt;
+ unsigned long stride;
+
+ /*
+ * Since the range of addresses may not be mapped at
+ * the same level, assume the worst case as PAGE_SIZE
+ */
+ stride = PAGE_SIZE;
+ start = round_down(start, stride);
+
+ /* Switch to requested VMID */
+ __tlb_switch_to_guest(mmu, &cxt, false);
+
+ __flush_s2_tlb_range_op(ipas2e1is, start, pages, stride, 0);
+
+ dsb(ish);
+ __tlbi(vmalle1is);
+ dsb(ish);
+ isb();
+
+ /* See the comment in __kvm_tlb_flush_vmid_ipa() */
+ if (icache_is_vpipt())
+ icache_inval_all_pou();
+
+ __tlb_switch_to_host(&cxt);
+}
+
void __kvm_tlb_flush_vmid(struct kvm_s2_mmu *mmu)
{
struct tlb_inv_context cxt;