diff options
Diffstat (limited to 'arch/arm64/kvm/hyp/nvhe')
-rw-r--r-- | arch/arm64/kvm/hyp/nvhe/Makefile | 8 | ||||
-rw-r--r-- | arch/arm64/kvm/hyp/nvhe/debug-sr.c | 32 | ||||
-rw-r--r-- | arch/arm64/kvm/hyp/nvhe/ffa.c | 9 | ||||
-rw-r--r-- | arch/arm64/kvm/hyp/nvhe/host.S | 2 | ||||
-rw-r--r-- | arch/arm64/kvm/hyp/nvhe/hyp-init.S | 14 | ||||
-rw-r--r-- | arch/arm64/kvm/hyp/nvhe/hyp-main.c | 68 | ||||
-rw-r--r-- | arch/arm64/kvm/hyp/nvhe/hyp.lds.S | 2 | ||||
-rw-r--r-- | arch/arm64/kvm/hyp/nvhe/mem_protect.c | 593 | ||||
-rw-r--r-- | arch/arm64/kvm/hyp/nvhe/mm.c | 97 | ||||
-rw-r--r-- | arch/arm64/kvm/hyp/nvhe/pkvm.c | 126 | ||||
-rw-r--r-- | arch/arm64/kvm/hyp/nvhe/psci-relay.c | 3 | ||||
-rw-r--r-- | arch/arm64/kvm/hyp/nvhe/setup.c | 27 | ||||
-rw-r--r-- | arch/arm64/kvm/hyp/nvhe/switch.c | 108 | ||||
-rw-r--r-- | arch/arm64/kvm/hyp/nvhe/sysreg-sr.c | 4 |
14 files changed, 786 insertions, 307 deletions
diff --git a/arch/arm64/kvm/hyp/nvhe/Makefile b/arch/arm64/kvm/hyp/nvhe/Makefile index b43426a493df..0b0a68b663d4 100644 --- a/arch/arm64/kvm/hyp/nvhe/Makefile +++ b/arch/arm64/kvm/hyp/nvhe/Makefile @@ -12,7 +12,7 @@ asflags-y := -D__KVM_NVHE_HYPERVISOR__ -D__DISABLE_EXPORTS ccflags-y := -D__KVM_NVHE_HYPERVISOR__ -D__DISABLE_EXPORTS -D__DISABLE_TRACE_MMIO__ ccflags-y += -fno-stack-protector \ -DDISABLE_BRANCH_PROFILING \ - $(DISABLE_STACKLEAK_PLUGIN) + $(DISABLE_KSTACK_ERASE) hostprogs := gen-hyprel HOST_EXTRACFLAGS += -I$(objtree)/include @@ -99,3 +99,9 @@ KBUILD_CFLAGS := $(filter-out $(CC_FLAGS_FTRACE) $(CC_FLAGS_SCS), $(KBUILD_CFLAG # causes a build failure. Remove profile optimization flags. KBUILD_CFLAGS := $(filter-out -fprofile-sample-use=% -fprofile-use=%, $(KBUILD_CFLAGS)) KBUILD_CFLAGS += -fno-asynchronous-unwind-tables -fno-unwind-tables + +ifeq ($(CONFIG_UBSAN_KVM_EL2),y) +UBSAN_SANITIZE := y +# Always use brk and not hooks +ccflags-y += $(CFLAGS_UBSAN_TRAP) +endif diff --git a/arch/arm64/kvm/hyp/nvhe/debug-sr.c b/arch/arm64/kvm/hyp/nvhe/debug-sr.c index 2f4a4f5036bb..2a1c0f49792b 100644 --- a/arch/arm64/kvm/hyp/nvhe/debug-sr.c +++ b/arch/arm64/kvm/hyp/nvhe/debug-sr.c @@ -92,12 +92,42 @@ static void __trace_switch_to_host(void) *host_data_ptr(host_debug_state.trfcr_el1)); } +static void __debug_save_brbe(u64 *brbcr_el1) +{ + *brbcr_el1 = 0; + + /* Check if the BRBE is enabled */ + if (!(read_sysreg_el1(SYS_BRBCR) & (BRBCR_ELx_E0BRE | BRBCR_ELx_ExBRE))) + return; + + /* + * Prohibit branch record generation while we are in guest. + * Since access to BRBCR_EL1 is trapped, the guest can't + * modify the filtering set by the host. + */ + *brbcr_el1 = read_sysreg_el1(SYS_BRBCR); + write_sysreg_el1(0, SYS_BRBCR); +} + +static void __debug_restore_brbe(u64 brbcr_el1) +{ + if (!brbcr_el1) + return; + + /* Restore BRBE controls */ + write_sysreg_el1(brbcr_el1, SYS_BRBCR); +} + void __debug_save_host_buffers_nvhe(struct kvm_vcpu *vcpu) { /* Disable and flush SPE data generation */ if (host_data_test_flag(HAS_SPE)) __debug_save_spe(host_data_ptr(host_debug_state.pmscr_el1)); + /* Disable BRBE branch records */ + if (host_data_test_flag(HAS_BRBE)) + __debug_save_brbe(host_data_ptr(host_debug_state.brbcr_el1)); + if (__trace_needs_switch()) __trace_switch_to_guest(); } @@ -111,6 +141,8 @@ void __debug_restore_host_buffers_nvhe(struct kvm_vcpu *vcpu) { if (host_data_test_flag(HAS_SPE)) __debug_restore_spe(*host_data_ptr(host_debug_state.pmscr_el1)); + if (host_data_test_flag(HAS_BRBE)) + __debug_restore_brbe(*host_data_ptr(host_debug_state.brbcr_el1)); if (__trace_needs_switch()) __trace_switch_to_host(); } diff --git a/arch/arm64/kvm/hyp/nvhe/ffa.c b/arch/arm64/kvm/hyp/nvhe/ffa.c index e433dfab882a..3369dd0c4009 100644 --- a/arch/arm64/kvm/hyp/nvhe/ffa.c +++ b/arch/arm64/kvm/hyp/nvhe/ffa.c @@ -730,10 +730,10 @@ static void do_ffa_version(struct arm_smccc_res *res, hyp_ffa_version = ffa_req_version; } - if (hyp_ffa_post_init()) + if (hyp_ffa_post_init()) { res->a0 = FFA_RET_NOT_SUPPORTED; - else { - has_version_negotiated = true; + } else { + smp_store_release(&has_version_negotiated, true); res->a0 = hyp_ffa_version; } unlock: @@ -809,7 +809,8 @@ bool kvm_host_ffa_handler(struct kvm_cpu_context *host_ctxt, u32 func_id) if (!is_ffa_call(func_id)) return false; - if (!has_version_negotiated && func_id != FFA_VERSION) { + if (func_id != FFA_VERSION && + !smp_load_acquire(&has_version_negotiated)) { ffa_to_smccc_error(&res, FFA_RET_INVALID_PARAMETERS); goto out_handled; } diff --git a/arch/arm64/kvm/hyp/nvhe/host.S b/arch/arm64/kvm/hyp/nvhe/host.S index 58f0cb2298cc..eef15b374abb 100644 --- a/arch/arm64/kvm/hyp/nvhe/host.S +++ b/arch/arm64/kvm/hyp/nvhe/host.S @@ -124,7 +124,7 @@ SYM_FUNC_START(__hyp_do_panic) /* Ensure host stage-2 is disabled */ mrs x0, hcr_el2 bic x0, x0, #HCR_VM - msr hcr_el2, x0 + msr_hcr_el2 x0 isb tlbi vmalls12e1 dsb nsh diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-init.S b/arch/arm64/kvm/hyp/nvhe/hyp-init.S index fc1866226067..aada42522e7b 100644 --- a/arch/arm64/kvm/hyp/nvhe/hyp-init.S +++ b/arch/arm64/kvm/hyp/nvhe/hyp-init.S @@ -73,8 +73,12 @@ __do_hyp_init: eret SYM_CODE_END(__kvm_hyp_init) +/* + * Initialize EL2 CPU state to sane values. + * + * HCR_EL2.E2H must have been initialized already. + */ SYM_CODE_START_LOCAL(__kvm_init_el2_state) - /* Initialize EL2 CPU state to sane values. */ init_el2_state // Clobbers x0..x2 finalise_el2_state ret @@ -96,7 +100,7 @@ SYM_CODE_START_LOCAL(___kvm_hyp_init) msr mair_el2, x1 ldr x1, [x0, #NVHE_INIT_HCR_EL2] - msr hcr_el2, x1 + msr_hcr_el2 x1 mov x2, #HCR_E2H and x2, x1, x2 @@ -206,9 +210,9 @@ SYM_CODE_START_LOCAL(__kvm_hyp_init_cpu) 2: msr SPsel, #1 // We want to use SP_EL{1,2} - bl __kvm_init_el2_state + init_el2_hcr 0 - __init_el2_nvhe_prepare_eret + bl __kvm_init_el2_state /* Enable MMU, set vectors and stack. */ mov x0, x28 @@ -258,7 +262,7 @@ reset: alternative_if ARM64_KVM_PROTECTED_MODE mov_q x5, HCR_HOST_NVHE_FLAGS - msr hcr_el2, x5 + msr_hcr_el2 x5 alternative_else_nop_endif /* Install stub vectors */ diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-main.c b/arch/arm64/kvm/hyp/nvhe/hyp-main.c index 5c134520e180..3206b2c07f82 100644 --- a/arch/arm64/kvm/hyp/nvhe/hyp-main.c +++ b/arch/arm64/kvm/hyp/nvhe/hyp-main.c @@ -5,6 +5,7 @@ */ #include <hyp/adjust_pc.h> +#include <hyp/switch.h> #include <asm/pgtable-types.h> #include <asm/kvm_asm.h> @@ -25,7 +26,7 @@ void __kvm_hyp_host_forward_smc(struct kvm_cpu_context *host_ctxt); static void __hyp_sve_save_guest(struct kvm_vcpu *vcpu) { - __vcpu_sys_reg(vcpu, ZCR_EL1) = read_sysreg_el1(SYS_ZCR); + __vcpu_assign_sys_reg(vcpu, ZCR_EL1, read_sysreg_el1(SYS_ZCR)); /* * On saving/restoring guest sve state, always use the maximum VL for * the guest. The layout of the data when saving the sve state depends @@ -68,7 +69,10 @@ static void fpsimd_sve_sync(struct kvm_vcpu *vcpu) if (!guest_owns_fp_regs()) return; - cpacr_clear_set(0, CPACR_EL1_FPEN | CPACR_EL1_ZEN); + /* + * Traps have been disabled by __deactivate_cptr_traps(), but there + * hasn't necessarily been a context synchronization event yet. + */ isb(); if (vcpu_has_sve(vcpu)) @@ -78,12 +82,12 @@ static void fpsimd_sve_sync(struct kvm_vcpu *vcpu) has_fpmr = kvm_has_fpmr(kern_hyp_va(vcpu->kvm)); if (has_fpmr) - __vcpu_sys_reg(vcpu, FPMR) = read_sysreg_s(SYS_FPMR); + __vcpu_assign_sys_reg(vcpu, FPMR, read_sysreg_s(SYS_FPMR)); if (system_supports_sve()) __hyp_sve_restore_host(); else - __fpsimd_restore_state(*host_data_ptr(fpsimd_state)); + __fpsimd_restore_state(host_data_ptr(host_ctxt.fp_regs)); if (has_fpmr) write_sysreg_s(*host_data_ptr(fpmr), SYS_FPMR); @@ -91,18 +95,37 @@ static void fpsimd_sve_sync(struct kvm_vcpu *vcpu) *host_data_ptr(fp_owner) = FP_STATE_HOST_OWNED; } +static void flush_debug_state(struct pkvm_hyp_vcpu *hyp_vcpu) +{ + struct kvm_vcpu *host_vcpu = hyp_vcpu->host_vcpu; + + hyp_vcpu->vcpu.arch.debug_owner = host_vcpu->arch.debug_owner; + + if (kvm_guest_owns_debug_regs(&hyp_vcpu->vcpu)) + hyp_vcpu->vcpu.arch.vcpu_debug_state = host_vcpu->arch.vcpu_debug_state; + else if (kvm_host_owns_debug_regs(&hyp_vcpu->vcpu)) + hyp_vcpu->vcpu.arch.external_debug_state = host_vcpu->arch.external_debug_state; +} + +static void sync_debug_state(struct pkvm_hyp_vcpu *hyp_vcpu) +{ + struct kvm_vcpu *host_vcpu = hyp_vcpu->host_vcpu; + + if (kvm_guest_owns_debug_regs(&hyp_vcpu->vcpu)) + host_vcpu->arch.vcpu_debug_state = hyp_vcpu->vcpu.arch.vcpu_debug_state; + else if (kvm_host_owns_debug_regs(&hyp_vcpu->vcpu)) + host_vcpu->arch.external_debug_state = hyp_vcpu->vcpu.arch.external_debug_state; +} + static void flush_hyp_vcpu(struct pkvm_hyp_vcpu *hyp_vcpu) { struct kvm_vcpu *host_vcpu = hyp_vcpu->host_vcpu; fpsimd_sve_flush(); + flush_debug_state(hyp_vcpu); hyp_vcpu->vcpu.arch.ctxt = host_vcpu->arch.ctxt; - hyp_vcpu->vcpu.arch.sve_state = kern_hyp_va(host_vcpu->arch.sve_state); - /* Limit guest vector length to the maximum supported by the host. */ - hyp_vcpu->vcpu.arch.sve_max_vl = min(host_vcpu->arch.sve_max_vl, kvm_host_sve_max_vl); - hyp_vcpu->vcpu.arch.mdcr_el2 = host_vcpu->arch.mdcr_el2; hyp_vcpu->vcpu.arch.hcr_el2 &= ~(HCR_TWI | HCR_TWE); hyp_vcpu->vcpu.arch.hcr_el2 |= READ_ONCE(host_vcpu->arch.hcr_el2) & @@ -123,6 +146,7 @@ static void sync_hyp_vcpu(struct pkvm_hyp_vcpu *hyp_vcpu) unsigned int i; fpsimd_sve_sync(&hyp_vcpu->vcpu); + sync_debug_state(hyp_vcpu); host_vcpu->arch.ctxt = hyp_vcpu->vcpu.arch.ctxt; @@ -200,8 +224,12 @@ static void handle___kvm_vcpu_run(struct kvm_cpu_context *host_ctxt) sync_hyp_vcpu(hyp_vcpu); } else { + struct kvm_vcpu *vcpu = kern_hyp_va(host_vcpu); + /* The host is fully trusted, run its vCPU directly. */ - ret = __kvm_vcpu_run(kern_hyp_va(host_vcpu)); + fpsimd_lazy_switch_to_guest(vcpu); + ret = __kvm_vcpu_run(vcpu); + fpsimd_lazy_switch_to_host(vcpu); } out: cpu_reg(host_ctxt, 1) = ret; @@ -220,7 +248,8 @@ static void handle___pkvm_host_share_guest(struct kvm_cpu_context *host_ctxt) { DECLARE_REG(u64, pfn, host_ctxt, 1); DECLARE_REG(u64, gfn, host_ctxt, 2); - DECLARE_REG(enum kvm_pgtable_prot, prot, host_ctxt, 3); + DECLARE_REG(u64, nr_pages, host_ctxt, 3); + DECLARE_REG(enum kvm_pgtable_prot, prot, host_ctxt, 4); struct pkvm_hyp_vcpu *hyp_vcpu; int ret = -EINVAL; @@ -235,7 +264,7 @@ static void handle___pkvm_host_share_guest(struct kvm_cpu_context *host_ctxt) if (ret) goto out; - ret = __pkvm_host_share_guest(pfn, gfn, hyp_vcpu, prot); + ret = __pkvm_host_share_guest(pfn, gfn, nr_pages, hyp_vcpu, prot); out: cpu_reg(host_ctxt, 1) = ret; } @@ -244,6 +273,7 @@ static void handle___pkvm_host_unshare_guest(struct kvm_cpu_context *host_ctxt) { DECLARE_REG(pkvm_handle_t, handle, host_ctxt, 1); DECLARE_REG(u64, gfn, host_ctxt, 2); + DECLARE_REG(u64, nr_pages, host_ctxt, 3); struct pkvm_hyp_vm *hyp_vm; int ret = -EINVAL; @@ -254,7 +284,7 @@ static void handle___pkvm_host_unshare_guest(struct kvm_cpu_context *host_ctxt) if (!hyp_vm) goto out; - ret = __pkvm_host_unshare_guest(gfn, hyp_vm); + ret = __pkvm_host_unshare_guest(gfn, nr_pages, hyp_vm); put_pkvm_hyp_vm(hyp_vm); out: cpu_reg(host_ctxt, 1) = ret; @@ -283,6 +313,7 @@ static void handle___pkvm_host_wrprotect_guest(struct kvm_cpu_context *host_ctxt { DECLARE_REG(pkvm_handle_t, handle, host_ctxt, 1); DECLARE_REG(u64, gfn, host_ctxt, 2); + DECLARE_REG(u64, nr_pages, host_ctxt, 3); struct pkvm_hyp_vm *hyp_vm; int ret = -EINVAL; @@ -293,7 +324,7 @@ static void handle___pkvm_host_wrprotect_guest(struct kvm_cpu_context *host_ctxt if (!hyp_vm) goto out; - ret = __pkvm_host_wrprotect_guest(gfn, hyp_vm); + ret = __pkvm_host_wrprotect_guest(gfn, nr_pages, hyp_vm); put_pkvm_hyp_vm(hyp_vm); out: cpu_reg(host_ctxt, 1) = ret; @@ -303,7 +334,8 @@ static void handle___pkvm_host_test_clear_young_guest(struct kvm_cpu_context *ho { DECLARE_REG(pkvm_handle_t, handle, host_ctxt, 1); DECLARE_REG(u64, gfn, host_ctxt, 2); - DECLARE_REG(bool, mkold, host_ctxt, 3); + DECLARE_REG(u64, nr_pages, host_ctxt, 3); + DECLARE_REG(bool, mkold, host_ctxt, 4); struct pkvm_hyp_vm *hyp_vm; int ret = -EINVAL; @@ -314,7 +346,7 @@ static void handle___pkvm_host_test_clear_young_guest(struct kvm_cpu_context *ho if (!hyp_vm) goto out; - ret = __pkvm_host_test_clear_young_guest(gfn, mkold, hyp_vm); + ret = __pkvm_host_test_clear_young_guest(gfn, nr_pages, mkold, hyp_vm); put_pkvm_hyp_vm(hyp_vm); out: cpu_reg(host_ctxt, 1) = ret; @@ -651,12 +683,6 @@ void handle_trap(struct kvm_cpu_context *host_ctxt) case ESR_ELx_EC_SMC64: handle_host_smc(host_ctxt); break; - case ESR_ELx_EC_SVE: - cpacr_clear_set(0, CPACR_EL1_ZEN); - isb(); - sve_cond_update_zcr_vq(sve_vq_from_vl(kvm_host_sve_max_vl) - 1, - SYS_ZCR_EL2); - break; case ESR_ELx_EC_IABT_LOW: case ESR_ELx_EC_DABT_LOW: handle_host_mem_abort(host_ctxt); diff --git a/arch/arm64/kvm/hyp/nvhe/hyp.lds.S b/arch/arm64/kvm/hyp/nvhe/hyp.lds.S index f4562f417d3f..d724f6d69302 100644 --- a/arch/arm64/kvm/hyp/nvhe/hyp.lds.S +++ b/arch/arm64/kvm/hyp/nvhe/hyp.lds.S @@ -25,5 +25,7 @@ SECTIONS { BEGIN_HYP_SECTION(.data..percpu) PERCPU_INPUT(L1_CACHE_BYTES) END_HYP_SECTION + HYP_SECTION(.bss) + HYP_SECTION(.data) } diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c index 7ad7b133b81a..8957734d6183 100644 --- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c +++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c @@ -60,6 +60,11 @@ static void hyp_unlock_component(void) hyp_spin_unlock(&pkvm_pgd_lock); } +#define for_each_hyp_page(__p, __st, __sz) \ + for (struct hyp_page *__p = hyp_phys_to_page(__st), \ + *__e = __p + ((__sz) >> PAGE_SHIFT); \ + __p < __e; __p++) + static void *host_s2_zalloc_pages_exact(size_t size) { void *addr = hyp_alloc_pages(&host_s2_pool, get_order(size)); @@ -161,12 +166,6 @@ int kvm_host_prepare_stage2(void *pgt_pool_base) return 0; } -static bool guest_stage2_force_pte_cb(u64 addr, u64 end, - enum kvm_pgtable_prot prot) -{ - return true; -} - static void *guest_s2_zalloc_pages_exact(size_t size) { void *addr = hyp_alloc_pages(¤t_vm->pool, get_order(size)); @@ -217,16 +216,42 @@ static void guest_s2_put_page(void *addr) hyp_put_page(¤t_vm->pool, addr); } +static void __apply_guest_page(void *va, size_t size, + void (*func)(void *addr, size_t size)) +{ + size += va - PTR_ALIGN_DOWN(va, PAGE_SIZE); + va = PTR_ALIGN_DOWN(va, PAGE_SIZE); + size = PAGE_ALIGN(size); + + while (size) { + size_t map_size = PAGE_SIZE; + void *map; + + if (IS_ALIGNED((unsigned long)va, PMD_SIZE) && size >= PMD_SIZE) + map = hyp_fixblock_map(__hyp_pa(va), &map_size); + else + map = hyp_fixmap_map(__hyp_pa(va)); + + func(map, map_size); + + if (map_size == PMD_SIZE) + hyp_fixblock_unmap(); + else + hyp_fixmap_unmap(); + + size -= map_size; + va += map_size; + } +} + static void clean_dcache_guest_page(void *va, size_t size) { - __clean_dcache_guest_page(hyp_fixmap_map(__hyp_pa(va)), size); - hyp_fixmap_unmap(); + __apply_guest_page(va, size, __clean_dcache_guest_page); } static void invalidate_icache_guest_page(void *va, size_t size) { - __invalidate_icache_guest_page(hyp_fixmap_map(__hyp_pa(va)), size); - hyp_fixmap_unmap(); + __apply_guest_page(va, size, __invalidate_icache_guest_page); } int kvm_guest_prepare_stage2(struct pkvm_hyp_vm *vm, void *pgd) @@ -255,8 +280,7 @@ int kvm_guest_prepare_stage2(struct pkvm_hyp_vm *vm, void *pgd) }; guest_lock_component(vm); - ret = __kvm_pgtable_stage2_init(mmu->pgt, mmu, &vm->mm_ops, 0, - guest_stage2_force_pte_cb); + ret = __kvm_pgtable_stage2_init(mmu->pgt, mmu, &vm->mm_ops, 0, NULL); guest_unlock_component(vm); if (ret) return ret; @@ -266,7 +290,7 @@ int kvm_guest_prepare_stage2(struct pkvm_hyp_vm *vm, void *pgd) return 0; } -void reclaim_guest_pages(struct pkvm_hyp_vm *vm, struct kvm_hyp_memcache *mc) +void reclaim_pgtable_pages(struct pkvm_hyp_vm *vm, struct kvm_hyp_memcache *mc) { struct hyp_page *page; void *addr; @@ -309,7 +333,7 @@ int __pkvm_prot_finalize(void) */ kvm_flush_dcache_to_poc(params, sizeof(*params)); - write_sysreg(params->hcr_el2, hcr_el2); + write_sysreg_hcr(params->hcr_el2); __load_stage2(&host_mmu.arch.mmu, &host_mmu.arch); /* @@ -455,6 +479,7 @@ static int host_stage2_adjust_range(u64 addr, struct kvm_mem_range *range) { struct kvm_mem_range cur; kvm_pte_t pte; + u64 granule; s8 level; int ret; @@ -467,22 +492,26 @@ static int host_stage2_adjust_range(u64 addr, struct kvm_mem_range *range) return -EAGAIN; if (pte) { - WARN_ON(addr_is_memory(addr) && hyp_phys_to_page(addr)->host_state != PKVM_NOPAGE); + WARN_ON(addr_is_memory(addr) && + get_host_state(hyp_phys_to_page(addr)) != PKVM_NOPAGE); return -EPERM; } - do { - u64 granule = kvm_granule_size(level); + for (; level <= KVM_PGTABLE_LAST_LEVEL; level++) { + if (!kvm_level_supports_block_mapping(level)) + continue; + granule = kvm_granule_size(level); cur.start = ALIGN_DOWN(addr, granule); cur.end = cur.start + granule; - level++; - } while ((level <= KVM_PGTABLE_LAST_LEVEL) && - !(kvm_level_supports_block_mapping(level) && - range_included(&cur, range))); + if (!range_included(&cur, range)) + continue; + *range = cur; + return 0; + } - *range = cur; + WARN_ON(1); - return 0; + return -EINVAL; } int host_stage2_idmap_locked(phys_addr_t addr, u64 size, @@ -493,17 +522,15 @@ int host_stage2_idmap_locked(phys_addr_t addr, u64 size, static void __host_update_page_state(phys_addr_t addr, u64 size, enum pkvm_page_state state) { - phys_addr_t end = addr + size; - - for (; addr < end; addr += PAGE_SIZE) - hyp_phys_to_page(addr)->host_state = state; + for_each_hyp_page(page, addr, size) + set_host_state(page, state); } int host_stage2_set_owner_locked(phys_addr_t addr, u64 size, u8 owner_id) { int ret; - if (!addr_is_memory(addr)) + if (!range_is_memory(addr, addr + size)) return -EPERM; ret = host_stage2_try(kvm_pgtable_stage2_set_owner, &host_mmu.pgt, @@ -578,7 +605,14 @@ void handle_host_mem_abort(struct kvm_cpu_context *host_ctxt) return; } - addr = (fault.hpfar_el2 & HPFAR_MASK) << 8; + + /* + * Yikes, we couldn't resolve the fault IPA. This should reinject an + * abort into the host when we figure out how to do that. + */ + BUG_ON(!(fault.hpfar_el2 & HPFAR_EL2_NS)); + addr = FIELD_GET(HPFAR_EL2_FIPA, fault.hpfar_el2) << 12; + ret = host_stage2_idmap(addr); BUG_ON(ret && ret != -EAGAIN); } @@ -611,16 +645,16 @@ static int check_page_state_range(struct kvm_pgtable *pgt, u64 addr, u64 size, static int __host_check_page_state_range(u64 addr, u64 size, enum pkvm_page_state state) { - u64 end = addr + size; int ret; - ret = check_range_allowed_memory(addr, end); + ret = check_range_allowed_memory(addr, addr + size); if (ret) return ret; hyp_assert_lock_held(&host_mmu.lock); - for (; addr < end; addr += PAGE_SIZE) { - if (hyp_phys_to_page(addr)->host_state != state) + + for_each_hyp_page(page, addr, size) { + if (get_host_state(page) != state) return -EPERM; } @@ -630,7 +664,7 @@ static int __host_check_page_state_range(u64 addr, u64 size, static int __host_set_page_state_range(u64 addr, u64 size, enum pkvm_page_state state) { - if (hyp_phys_to_page(addr)->host_state == PKVM_NOPAGE) { + if (get_host_state(hyp_phys_to_page(addr)) == PKVM_NOPAGE) { int ret = host_stage2_idmap_locked(addr, size, PKVM_HOST_MEM_PROT); if (ret) @@ -642,24 +676,20 @@ static int __host_set_page_state_range(u64 addr, u64 size, return 0; } -static enum pkvm_page_state hyp_get_page_state(kvm_pte_t pte, u64 addr) +static void __hyp_set_page_state_range(phys_addr_t phys, u64 size, enum pkvm_page_state state) { - if (!kvm_pte_valid(pte)) - return PKVM_NOPAGE; - - return pkvm_getstate(kvm_pgtable_hyp_pte_prot(pte)); + for_each_hyp_page(page, phys, size) + set_hyp_state(page, state); } -static int __hyp_check_page_state_range(u64 addr, u64 size, - enum pkvm_page_state state) +static int __hyp_check_page_state_range(phys_addr_t phys, u64 size, enum pkvm_page_state state) { - struct check_walk_data d = { - .desired = state, - .get_page_state = hyp_get_page_state, - }; + for_each_hyp_page(page, phys, size) { + if (get_hyp_state(page) != state) + return -EPERM; + } - hyp_assert_lock_held(&pkvm_pgd_lock); - return check_page_state_range(&pkvm_pgtable, addr, size, &d); + return 0; } static enum pkvm_page_state guest_get_page_state(kvm_pte_t pte, u64 addr) @@ -670,10 +700,9 @@ static enum pkvm_page_state guest_get_page_state(kvm_pte_t pte, u64 addr) return pkvm_getstate(kvm_pgtable_stage2_pte_prot(pte)); } -static int __guest_check_page_state_range(struct pkvm_hyp_vcpu *vcpu, u64 addr, +static int __guest_check_page_state_range(struct pkvm_hyp_vm *vm, u64 addr, u64 size, enum pkvm_page_state state) { - struct pkvm_hyp_vm *vm = pkvm_hyp_vcpu_to_hyp_vm(vcpu); struct check_walk_data d = { .desired = state, .get_page_state = guest_get_page_state, @@ -686,8 +715,6 @@ static int __guest_check_page_state_range(struct pkvm_hyp_vcpu *vcpu, u64 addr, int __pkvm_host_share_hyp(u64 pfn) { u64 phys = hyp_pfn_to_phys(pfn); - void *virt = __hyp_va(phys); - enum kvm_pgtable_prot prot; u64 size = PAGE_SIZE; int ret; @@ -697,14 +724,11 @@ int __pkvm_host_share_hyp(u64 pfn) ret = __host_check_page_state_range(phys, size, PKVM_PAGE_OWNED); if (ret) goto unlock; - if (IS_ENABLED(CONFIG_NVHE_EL2_DEBUG)) { - ret = __hyp_check_page_state_range((u64)virt, size, PKVM_NOPAGE); - if (ret) - goto unlock; - } + ret = __hyp_check_page_state_range(phys, size, PKVM_NOPAGE); + if (ret) + goto unlock; - prot = pkvm_mkstate(PAGE_HYP, PKVM_PAGE_SHARED_BORROWED); - WARN_ON(pkvm_create_mappings_locked(virt, virt + size, prot)); + __hyp_set_page_state_range(phys, size, PKVM_PAGE_SHARED_BORROWED); WARN_ON(__host_set_page_state_range(phys, size, PKVM_PAGE_SHARED_OWNED)); unlock: @@ -727,7 +751,7 @@ int __pkvm_host_unshare_hyp(u64 pfn) ret = __host_check_page_state_range(phys, size, PKVM_PAGE_SHARED_OWNED); if (ret) goto unlock; - ret = __hyp_check_page_state_range(virt, size, PKVM_PAGE_SHARED_BORROWED); + ret = __hyp_check_page_state_range(phys, size, PKVM_PAGE_SHARED_BORROWED); if (ret) goto unlock; if (hyp_page_count((void *)virt)) { @@ -735,7 +759,7 @@ int __pkvm_host_unshare_hyp(u64 pfn) goto unlock; } - WARN_ON(kvm_pgtable_hyp_unmap(&pkvm_pgtable, virt, size) != size); + __hyp_set_page_state_range(phys, size, PKVM_NOPAGE); WARN_ON(__host_set_page_state_range(phys, size, PKVM_PAGE_OWNED)); unlock: @@ -750,7 +774,6 @@ int __pkvm_host_donate_hyp(u64 pfn, u64 nr_pages) u64 phys = hyp_pfn_to_phys(pfn); u64 size = PAGE_SIZE * nr_pages; void *virt = __hyp_va(phys); - enum kvm_pgtable_prot prot; int ret; host_lock_component(); @@ -759,14 +782,12 @@ int __pkvm_host_donate_hyp(u64 pfn, u64 nr_pages) ret = __host_check_page_state_range(phys, size, PKVM_PAGE_OWNED); if (ret) goto unlock; - if (IS_ENABLED(CONFIG_NVHE_EL2_DEBUG)) { - ret = __hyp_check_page_state_range((u64)virt, size, PKVM_NOPAGE); - if (ret) - goto unlock; - } + ret = __hyp_check_page_state_range(phys, size, PKVM_NOPAGE); + if (ret) + goto unlock; - prot = pkvm_mkstate(PAGE_HYP, PKVM_PAGE_OWNED); - WARN_ON(pkvm_create_mappings_locked(virt, virt + size, prot)); + __hyp_set_page_state_range(phys, size, PKVM_PAGE_OWNED); + WARN_ON(pkvm_create_mappings_locked(virt, virt + size, PAGE_HYP)); WARN_ON(host_stage2_set_owner_locked(phys, size, PKVM_ID_HYP)); unlock: @@ -786,15 +807,14 @@ int __pkvm_hyp_donate_host(u64 pfn, u64 nr_pages) host_lock_component(); hyp_lock_component(); - ret = __hyp_check_page_state_range(virt, size, PKVM_PAGE_OWNED); + ret = __hyp_check_page_state_range(phys, size, PKVM_PAGE_OWNED); + if (ret) + goto unlock; + ret = __host_check_page_state_range(phys, size, PKVM_NOPAGE); if (ret) goto unlock; - if (IS_ENABLED(CONFIG_NVHE_EL2_DEBUG)) { - ret = __host_check_page_state_range(phys, size, PKVM_NOPAGE); - if (ret) - goto unlock; - } + __hyp_set_page_state_range(phys, size, PKVM_NOPAGE); WARN_ON(kvm_pgtable_hyp_unmap(&pkvm_pgtable, virt, size) != size); WARN_ON(host_stage2_set_owner_locked(phys, size, PKVM_ID_HOST)); @@ -809,24 +829,30 @@ int hyp_pin_shared_mem(void *from, void *to) { u64 cur, start = ALIGN_DOWN((u64)from, PAGE_SIZE); u64 end = PAGE_ALIGN((u64)to); + u64 phys = __hyp_pa(start); u64 size = end - start; + struct hyp_page *p; int ret; host_lock_component(); hyp_lock_component(); - ret = __host_check_page_state_range(__hyp_pa(start), size, - PKVM_PAGE_SHARED_OWNED); + ret = __host_check_page_state_range(phys, size, PKVM_PAGE_SHARED_OWNED); if (ret) goto unlock; - ret = __hyp_check_page_state_range(start, size, - PKVM_PAGE_SHARED_BORROWED); + ret = __hyp_check_page_state_range(phys, size, PKVM_PAGE_SHARED_BORROWED); if (ret) goto unlock; - for (cur = start; cur < end; cur += PAGE_SIZE) - hyp_page_ref_inc(hyp_virt_to_page(cur)); + for (cur = start; cur < end; cur += PAGE_SIZE) { + p = hyp_virt_to_page(cur); + hyp_page_ref_inc(p); + if (p->refcount == 1) + WARN_ON(pkvm_create_mappings_locked((void *)cur, + (void *)cur + PAGE_SIZE, + PAGE_HYP)); + } unlock: hyp_unlock_component(); @@ -839,12 +865,17 @@ void hyp_unpin_shared_mem(void *from, void *to) { u64 cur, start = ALIGN_DOWN((u64)from, PAGE_SIZE); u64 end = PAGE_ALIGN((u64)to); + struct hyp_page *p; host_lock_component(); hyp_lock_component(); - for (cur = start; cur < end; cur += PAGE_SIZE) - hyp_page_ref_dec(hyp_virt_to_page(cur)); + for (cur = start; cur < end; cur += PAGE_SIZE) { + p = hyp_virt_to_page(cur); + if (p->refcount == 1) + WARN_ON(kvm_pgtable_hyp_unmap(&pkvm_pgtable, cur, PAGE_SIZE) != PAGE_SIZE); + hyp_page_ref_dec(p); + } hyp_unlock_component(); host_unlock_component(); @@ -880,49 +911,84 @@ int __pkvm_host_unshare_ffa(u64 pfn, u64 nr_pages) return ret; } -int __pkvm_host_share_guest(u64 pfn, u64 gfn, struct pkvm_hyp_vcpu *vcpu, +static int __guest_check_transition_size(u64 phys, u64 ipa, u64 nr_pages, u64 *size) +{ + size_t block_size; + + if (nr_pages == 1) { + *size = PAGE_SIZE; + return 0; + } + + /* We solely support second to last level huge mapping */ + block_size = kvm_granule_size(KVM_PGTABLE_LAST_LEVEL - 1); + + if (nr_pages != block_size >> PAGE_SHIFT) + return -EINVAL; + + if (!IS_ALIGNED(phys | ipa, block_size)) + return -EINVAL; + + *size = block_size; + return 0; +} + +int __pkvm_host_share_guest(u64 pfn, u64 gfn, u64 nr_pages, struct pkvm_hyp_vcpu *vcpu, enum kvm_pgtable_prot prot) { struct pkvm_hyp_vm *vm = pkvm_hyp_vcpu_to_hyp_vm(vcpu); u64 phys = hyp_pfn_to_phys(pfn); u64 ipa = hyp_pfn_to_phys(gfn); - struct hyp_page *page; + u64 size; int ret; if (prot & ~KVM_PGTABLE_PROT_RWX) return -EINVAL; - ret = check_range_allowed_memory(phys, phys + PAGE_SIZE); + ret = __guest_check_transition_size(phys, ipa, nr_pages, &size); + if (ret) + return ret; + + ret = check_range_allowed_memory(phys, phys + size); if (ret) return ret; host_lock_component(); guest_lock_component(vm); - ret = __guest_check_page_state_range(vcpu, ipa, PAGE_SIZE, PKVM_NOPAGE); + ret = __guest_check_page_state_range(vm, ipa, size, PKVM_NOPAGE); if (ret) goto unlock; - page = hyp_phys_to_page(phys); - switch (page->host_state) { - case PKVM_PAGE_OWNED: - WARN_ON(__host_set_page_state_range(phys, PAGE_SIZE, PKVM_PAGE_SHARED_OWNED)); - break; - case PKVM_PAGE_SHARED_OWNED: - if (page->host_share_guest_count) - break; - /* Only host to np-guest multi-sharing is tolerated */ - WARN_ON(1); - fallthrough; - default: - ret = -EPERM; - goto unlock; + for_each_hyp_page(page, phys, size) { + switch (get_host_state(page)) { + case PKVM_PAGE_OWNED: + continue; + case PKVM_PAGE_SHARED_OWNED: + if (page->host_share_guest_count == U32_MAX) { + ret = -EBUSY; + goto unlock; + } + + /* Only host to np-guest multi-sharing is tolerated */ + if (page->host_share_guest_count) + continue; + + fallthrough; + default: + ret = -EPERM; + goto unlock; + } + } + + for_each_hyp_page(page, phys, size) { + set_host_state(page, PKVM_PAGE_SHARED_OWNED); + page->host_share_guest_count++; } - WARN_ON(kvm_pgtable_stage2_map(&vm->pgt, ipa, PAGE_SIZE, phys, + WARN_ON(kvm_pgtable_stage2_map(&vm->pgt, ipa, size, phys, pkvm_mkstate(prot, PKVM_PAGE_SHARED_BORROWED), &vcpu->vcpu.arch.pkvm_memcache, 0)); - page->host_share_guest_count++; unlock: guest_unlock_component(vm); @@ -931,10 +997,9 @@ unlock: return ret; } -static int __check_host_shared_guest(struct pkvm_hyp_vm *vm, u64 *__phys, u64 ipa) +static int __check_host_shared_guest(struct pkvm_hyp_vm *vm, u64 *__phys, u64 ipa, u64 size) { enum pkvm_page_state state; - struct hyp_page *page; kvm_pte_t pte; u64 phys; s8 level; @@ -943,53 +1008,59 @@ static int __check_host_shared_guest(struct pkvm_hyp_vm *vm, u64 *__phys, u64 ip ret = kvm_pgtable_get_leaf(&vm->pgt, ipa, &pte, &level); if (ret) return ret; - if (level != KVM_PGTABLE_LAST_LEVEL) - return -E2BIG; if (!kvm_pte_valid(pte)) return -ENOENT; + if (kvm_granule_size(level) != size) + return -E2BIG; state = guest_get_page_state(pte, ipa); if (state != PKVM_PAGE_SHARED_BORROWED) return -EPERM; phys = kvm_pte_to_phys(pte); - ret = check_range_allowed_memory(phys, phys + PAGE_SIZE); + ret = check_range_allowed_memory(phys, phys + size); if (WARN_ON(ret)) return ret; - page = hyp_phys_to_page(phys); - if (page->host_state != PKVM_PAGE_SHARED_OWNED) - return -EPERM; - if (WARN_ON(!page->host_share_guest_count)) - return -EINVAL; + for_each_hyp_page(page, phys, size) { + if (get_host_state(page) != PKVM_PAGE_SHARED_OWNED) + return -EPERM; + if (WARN_ON(!page->host_share_guest_count)) + return -EINVAL; + } *__phys = phys; return 0; } -int __pkvm_host_unshare_guest(u64 gfn, struct pkvm_hyp_vm *vm) +int __pkvm_host_unshare_guest(u64 gfn, u64 nr_pages, struct pkvm_hyp_vm *vm) { u64 ipa = hyp_pfn_to_phys(gfn); - struct hyp_page *page; - u64 phys; + u64 size, phys; int ret; + ret = __guest_check_transition_size(0, ipa, nr_pages, &size); + if (ret) + return ret; + host_lock_component(); guest_lock_component(vm); - ret = __check_host_shared_guest(vm, &phys, ipa); + ret = __check_host_shared_guest(vm, &phys, ipa, size); if (ret) goto unlock; - ret = kvm_pgtable_stage2_unmap(&vm->pgt, ipa, PAGE_SIZE); + ret = kvm_pgtable_stage2_unmap(&vm->pgt, ipa, size); if (ret) goto unlock; - page = hyp_phys_to_page(phys); - page->host_share_guest_count--; - if (!page->host_share_guest_count) - WARN_ON(__host_set_page_state_range(phys, PAGE_SIZE, PKVM_PAGE_OWNED)); + for_each_hyp_page(page, phys, size) { + /* __check_host_shared_guest() protects against underflow */ + page->host_share_guest_count--; + if (!page->host_share_guest_count) + set_host_state(page, PKVM_PAGE_OWNED); + } unlock: guest_unlock_component(vm); @@ -998,63 +1069,81 @@ unlock: return ret; } -int __pkvm_host_relax_perms_guest(u64 gfn, struct pkvm_hyp_vcpu *vcpu, enum kvm_pgtable_prot prot) +static void assert_host_shared_guest(struct pkvm_hyp_vm *vm, u64 ipa, u64 size) { - struct pkvm_hyp_vm *vm = pkvm_hyp_vcpu_to_hyp_vm(vcpu); - u64 ipa = hyp_pfn_to_phys(gfn); u64 phys; int ret; - if (prot & ~KVM_PGTABLE_PROT_RWX) - return -EINVAL; + if (!IS_ENABLED(CONFIG_NVHE_EL2_DEBUG)) + return; host_lock_component(); guest_lock_component(vm); - ret = __check_host_shared_guest(vm, &phys, ipa); - if (!ret) - ret = kvm_pgtable_stage2_relax_perms(&vm->pgt, ipa, prot, 0); + ret = __check_host_shared_guest(vm, &phys, ipa, size); guest_unlock_component(vm); host_unlock_component(); - return ret; + WARN_ON(ret && ret != -ENOENT); } -int __pkvm_host_wrprotect_guest(u64 gfn, struct pkvm_hyp_vm *vm) +int __pkvm_host_relax_perms_guest(u64 gfn, struct pkvm_hyp_vcpu *vcpu, enum kvm_pgtable_prot prot) { + struct pkvm_hyp_vm *vm = pkvm_hyp_vcpu_to_hyp_vm(vcpu); u64 ipa = hyp_pfn_to_phys(gfn); - u64 phys; int ret; - host_lock_component(); - guest_lock_component(vm); + if (pkvm_hyp_vm_is_protected(vm)) + return -EPERM; - ret = __check_host_shared_guest(vm, &phys, ipa); - if (!ret) - ret = kvm_pgtable_stage2_wrprotect(&vm->pgt, ipa, PAGE_SIZE); + if (prot & ~KVM_PGTABLE_PROT_RWX) + return -EINVAL; + assert_host_shared_guest(vm, ipa, PAGE_SIZE); + guest_lock_component(vm); + ret = kvm_pgtable_stage2_relax_perms(&vm->pgt, ipa, prot, 0); guest_unlock_component(vm); - host_unlock_component(); return ret; } -int __pkvm_host_test_clear_young_guest(u64 gfn, bool mkold, struct pkvm_hyp_vm *vm) +int __pkvm_host_wrprotect_guest(u64 gfn, u64 nr_pages, struct pkvm_hyp_vm *vm) { - u64 ipa = hyp_pfn_to_phys(gfn); - u64 phys; + u64 size, ipa = hyp_pfn_to_phys(gfn); int ret; - host_lock_component(); + if (pkvm_hyp_vm_is_protected(vm)) + return -EPERM; + + ret = __guest_check_transition_size(0, ipa, nr_pages, &size); + if (ret) + return ret; + + assert_host_shared_guest(vm, ipa, size); guest_lock_component(vm); + ret = kvm_pgtable_stage2_wrprotect(&vm->pgt, ipa, size); + guest_unlock_component(vm); - ret = __check_host_shared_guest(vm, &phys, ipa); - if (!ret) - ret = kvm_pgtable_stage2_test_clear_young(&vm->pgt, ipa, PAGE_SIZE, mkold); + return ret; +} + +int __pkvm_host_test_clear_young_guest(u64 gfn, u64 nr_pages, bool mkold, struct pkvm_hyp_vm *vm) +{ + u64 size, ipa = hyp_pfn_to_phys(gfn); + int ret; + + if (pkvm_hyp_vm_is_protected(vm)) + return -EPERM; + ret = __guest_check_transition_size(0, ipa, nr_pages, &size); + if (ret) + return ret; + + assert_host_shared_guest(vm, ipa, size); + guest_lock_component(vm); + ret = kvm_pgtable_stage2_test_clear_young(&vm->pgt, ipa, size, mkold); guest_unlock_component(vm); - host_unlock_component(); return ret; } @@ -1063,18 +1152,214 @@ int __pkvm_host_mkyoung_guest(u64 gfn, struct pkvm_hyp_vcpu *vcpu) { struct pkvm_hyp_vm *vm = pkvm_hyp_vcpu_to_hyp_vm(vcpu); u64 ipa = hyp_pfn_to_phys(gfn); - u64 phys; - int ret; - host_lock_component(); + if (pkvm_hyp_vm_is_protected(vm)) + return -EPERM; + + assert_host_shared_guest(vm, ipa, PAGE_SIZE); guest_lock_component(vm); + kvm_pgtable_stage2_mkyoung(&vm->pgt, ipa, 0); + guest_unlock_component(vm); - ret = __check_host_shared_guest(vm, &phys, ipa); - if (!ret) - kvm_pgtable_stage2_mkyoung(&vm->pgt, ipa, 0); + return 0; +} - guest_unlock_component(vm); +#ifdef CONFIG_NVHE_EL2_DEBUG +struct pkvm_expected_state { + enum pkvm_page_state host; + enum pkvm_page_state hyp; + enum pkvm_page_state guest[2]; /* [ gfn, gfn + 1 ] */ +}; + +static struct pkvm_expected_state selftest_state; +static struct hyp_page *selftest_page; + +static struct pkvm_hyp_vm selftest_vm = { + .kvm = { + .arch = { + .mmu = { + .arch = &selftest_vm.kvm.arch, + .pgt = &selftest_vm.pgt, + }, + }, + }, +}; + +static struct pkvm_hyp_vcpu selftest_vcpu = { + .vcpu = { + .arch = { + .hw_mmu = &selftest_vm.kvm.arch.mmu, + }, + .kvm = &selftest_vm.kvm, + }, +}; + +static void init_selftest_vm(void *virt) +{ + struct hyp_page *p = hyp_virt_to_page(virt); + int i; + + selftest_vm.kvm.arch.mmu.vtcr = host_mmu.arch.mmu.vtcr; + WARN_ON(kvm_guest_prepare_stage2(&selftest_vm, virt)); + + for (i = 0; i < pkvm_selftest_pages(); i++) { + if (p[i].refcount) + continue; + p[i].refcount = 1; + hyp_put_page(&selftest_vm.pool, hyp_page_to_virt(&p[i])); + } +} + +static u64 selftest_ipa(void) +{ + return BIT(selftest_vm.pgt.ia_bits - 1); +} + +static void assert_page_state(void) +{ + void *virt = hyp_page_to_virt(selftest_page); + u64 size = PAGE_SIZE << selftest_page->order; + struct pkvm_hyp_vcpu *vcpu = &selftest_vcpu; + u64 phys = hyp_virt_to_phys(virt); + u64 ipa[2] = { selftest_ipa(), selftest_ipa() + PAGE_SIZE }; + struct pkvm_hyp_vm *vm; + + vm = pkvm_hyp_vcpu_to_hyp_vm(vcpu); + + host_lock_component(); + WARN_ON(__host_check_page_state_range(phys, size, selftest_state.host)); host_unlock_component(); - return ret; + hyp_lock_component(); + WARN_ON(__hyp_check_page_state_range(phys, size, selftest_state.hyp)); + hyp_unlock_component(); + + guest_lock_component(&selftest_vm); + WARN_ON(__guest_check_page_state_range(vm, ipa[0], size, selftest_state.guest[0])); + WARN_ON(__guest_check_page_state_range(vm, ipa[1], size, selftest_state.guest[1])); + guest_unlock_component(&selftest_vm); +} + +#define assert_transition_res(res, fn, ...) \ + do { \ + WARN_ON(fn(__VA_ARGS__) != res); \ + assert_page_state(); \ + } while (0) + +void pkvm_ownership_selftest(void *base) +{ + enum kvm_pgtable_prot prot = KVM_PGTABLE_PROT_RWX; + void *virt = hyp_alloc_pages(&host_s2_pool, 0); + struct pkvm_hyp_vcpu *vcpu = &selftest_vcpu; + struct pkvm_hyp_vm *vm = &selftest_vm; + u64 phys, size, pfn, gfn; + + WARN_ON(!virt); + selftest_page = hyp_virt_to_page(virt); + selftest_page->refcount = 0; + init_selftest_vm(base); + + size = PAGE_SIZE << selftest_page->order; + phys = hyp_virt_to_phys(virt); + pfn = hyp_phys_to_pfn(phys); + gfn = hyp_phys_to_pfn(selftest_ipa()); + + selftest_state.host = PKVM_NOPAGE; + selftest_state.hyp = PKVM_PAGE_OWNED; + selftest_state.guest[0] = selftest_state.guest[1] = PKVM_NOPAGE; + assert_page_state(); + assert_transition_res(-EPERM, __pkvm_host_donate_hyp, pfn, 1); + assert_transition_res(-EPERM, __pkvm_host_share_hyp, pfn); + assert_transition_res(-EPERM, __pkvm_host_unshare_hyp, pfn); + assert_transition_res(-EPERM, __pkvm_host_share_ffa, pfn, 1); + assert_transition_res(-EPERM, __pkvm_host_unshare_ffa, pfn, 1); + assert_transition_res(-EPERM, hyp_pin_shared_mem, virt, virt + size); + assert_transition_res(-EPERM, __pkvm_host_share_guest, pfn, gfn, 1, vcpu, prot); + assert_transition_res(-ENOENT, __pkvm_host_unshare_guest, gfn, 1, vm); + + selftest_state.host = PKVM_PAGE_OWNED; + selftest_state.hyp = PKVM_NOPAGE; + assert_transition_res(0, __pkvm_hyp_donate_host, pfn, 1); + assert_transition_res(-EPERM, __pkvm_hyp_donate_host, pfn, 1); + assert_transition_res(-EPERM, __pkvm_host_unshare_hyp, pfn); + assert_transition_res(-EPERM, __pkvm_host_unshare_ffa, pfn, 1); + assert_transition_res(-ENOENT, __pkvm_host_unshare_guest, gfn, 1, vm); + assert_transition_res(-EPERM, hyp_pin_shared_mem, virt, virt + size); + + selftest_state.host = PKVM_PAGE_SHARED_OWNED; + selftest_state.hyp = PKVM_PAGE_SHARED_BORROWED; + assert_transition_res(0, __pkvm_host_share_hyp, pfn); + assert_transition_res(-EPERM, __pkvm_host_share_hyp, pfn); + assert_transition_res(-EPERM, __pkvm_host_donate_hyp, pfn, 1); + assert_transition_res(-EPERM, __pkvm_host_share_ffa, pfn, 1); + assert_transition_res(-EPERM, __pkvm_hyp_donate_host, pfn, 1); + assert_transition_res(-EPERM, __pkvm_host_share_guest, pfn, gfn, 1, vcpu, prot); + assert_transition_res(-ENOENT, __pkvm_host_unshare_guest, gfn, 1, vm); + + assert_transition_res(0, hyp_pin_shared_mem, virt, virt + size); + assert_transition_res(0, hyp_pin_shared_mem, virt, virt + size); + hyp_unpin_shared_mem(virt, virt + size); + WARN_ON(hyp_page_count(virt) != 1); + assert_transition_res(-EBUSY, __pkvm_host_unshare_hyp, pfn); + assert_transition_res(-EPERM, __pkvm_host_share_hyp, pfn); + assert_transition_res(-EPERM, __pkvm_host_donate_hyp, pfn, 1); + assert_transition_res(-EPERM, __pkvm_host_share_ffa, pfn, 1); + assert_transition_res(-EPERM, __pkvm_hyp_donate_host, pfn, 1); + assert_transition_res(-EPERM, __pkvm_host_share_guest, pfn, gfn, 1, vcpu, prot); + assert_transition_res(-ENOENT, __pkvm_host_unshare_guest, gfn, 1, vm); + + hyp_unpin_shared_mem(virt, virt + size); + assert_page_state(); + WARN_ON(hyp_page_count(virt)); + + selftest_state.host = PKVM_PAGE_OWNED; + selftest_state.hyp = PKVM_NOPAGE; + assert_transition_res(0, __pkvm_host_unshare_hyp, pfn); + + selftest_state.host = PKVM_PAGE_SHARED_OWNED; + selftest_state.hyp = PKVM_NOPAGE; + assert_transition_res(0, __pkvm_host_share_ffa, pfn, 1); + assert_transition_res(-EPERM, __pkvm_host_share_ffa, pfn, 1); + assert_transition_res(-EPERM, __pkvm_host_donate_hyp, pfn, 1); + assert_transition_res(-EPERM, __pkvm_host_share_hyp, pfn); + assert_transition_res(-EPERM, __pkvm_host_unshare_hyp, pfn); + assert_transition_res(-EPERM, __pkvm_hyp_donate_host, pfn, 1); + assert_transition_res(-EPERM, __pkvm_host_share_guest, pfn, gfn, 1, vcpu, prot); + assert_transition_res(-ENOENT, __pkvm_host_unshare_guest, gfn, 1, vm); + assert_transition_res(-EPERM, hyp_pin_shared_mem, virt, virt + size); + + selftest_state.host = PKVM_PAGE_OWNED; + selftest_state.hyp = PKVM_NOPAGE; + assert_transition_res(0, __pkvm_host_unshare_ffa, pfn, 1); + assert_transition_res(-EPERM, __pkvm_host_unshare_ffa, pfn, 1); + + selftest_state.host = PKVM_PAGE_SHARED_OWNED; + selftest_state.guest[0] = PKVM_PAGE_SHARED_BORROWED; + assert_transition_res(0, __pkvm_host_share_guest, pfn, gfn, 1, vcpu, prot); + assert_transition_res(-EPERM, __pkvm_host_share_guest, pfn, gfn, 1, vcpu, prot); + assert_transition_res(-EPERM, __pkvm_host_share_ffa, pfn, 1); + assert_transition_res(-EPERM, __pkvm_host_donate_hyp, pfn, 1); + assert_transition_res(-EPERM, __pkvm_host_share_hyp, pfn); + assert_transition_res(-EPERM, __pkvm_host_unshare_hyp, pfn); + assert_transition_res(-EPERM, __pkvm_hyp_donate_host, pfn, 1); + assert_transition_res(-EPERM, hyp_pin_shared_mem, virt, virt + size); + + selftest_state.guest[1] = PKVM_PAGE_SHARED_BORROWED; + assert_transition_res(0, __pkvm_host_share_guest, pfn, gfn + 1, 1, vcpu, prot); + WARN_ON(hyp_virt_to_page(virt)->host_share_guest_count != 2); + + selftest_state.guest[0] = PKVM_NOPAGE; + assert_transition_res(0, __pkvm_host_unshare_guest, gfn, 1, vm); + + selftest_state.guest[1] = PKVM_NOPAGE; + selftest_state.host = PKVM_PAGE_OWNED; + assert_transition_res(0, __pkvm_host_unshare_guest, gfn + 1, 1, vm); + + selftest_state.host = PKVM_NOPAGE; + selftest_state.hyp = PKVM_PAGE_OWNED; + assert_transition_res(0, __pkvm_host_donate_hyp, pfn, 1); + + selftest_page->refcount = 1; + hyp_put_page(&host_s2_pool, virt); } +#endif diff --git a/arch/arm64/kvm/hyp/nvhe/mm.c b/arch/arm64/kvm/hyp/nvhe/mm.c index f41c7440b34b..ae8391baebc3 100644 --- a/arch/arm64/kvm/hyp/nvhe/mm.c +++ b/arch/arm64/kvm/hyp/nvhe/mm.c @@ -229,9 +229,8 @@ int hyp_map_vectors(void) return 0; } -void *hyp_fixmap_map(phys_addr_t phys) +static void *fixmap_map_slot(struct hyp_fixmap_slot *slot, phys_addr_t phys) { - struct hyp_fixmap_slot *slot = this_cpu_ptr(&fixmap_slots); kvm_pte_t pte, *ptep = slot->ptep; pte = *ptep; @@ -243,10 +242,21 @@ void *hyp_fixmap_map(phys_addr_t phys) return (void *)slot->addr; } +void *hyp_fixmap_map(phys_addr_t phys) +{ + return fixmap_map_slot(this_cpu_ptr(&fixmap_slots), phys); +} + static void fixmap_clear_slot(struct hyp_fixmap_slot *slot) { kvm_pte_t *ptep = slot->ptep; u64 addr = slot->addr; + u32 level; + + if (FIELD_GET(KVM_PTE_TYPE, *ptep) == KVM_PTE_TYPE_PAGE) + level = KVM_PGTABLE_LAST_LEVEL; + else + level = KVM_PGTABLE_LAST_LEVEL - 1; /* create_fixblock() guarantees PMD level */ WRITE_ONCE(*ptep, *ptep & ~KVM_PTE_VALID); @@ -260,7 +270,7 @@ static void fixmap_clear_slot(struct hyp_fixmap_slot *slot) * https://lore.kernel.org/kvm/20221017115209.2099-1-will@kernel.org/T/#mf10dfbaf1eaef9274c581b81c53758918c1d0f03 */ dsb(ishst); - __tlbi_level(vale2is, __TLBI_VADDR(addr, 0), KVM_PGTABLE_LAST_LEVEL); + __tlbi_level(vale2is, __TLBI_VADDR(addr, 0), level); dsb(ish); isb(); } @@ -273,9 +283,9 @@ void hyp_fixmap_unmap(void) static int __create_fixmap_slot_cb(const struct kvm_pgtable_visit_ctx *ctx, enum kvm_pgtable_walk_flags visit) { - struct hyp_fixmap_slot *slot = per_cpu_ptr(&fixmap_slots, (u64)ctx->arg); + struct hyp_fixmap_slot *slot = (struct hyp_fixmap_slot *)ctx->arg; - if (!kvm_pte_valid(ctx->old) || ctx->level != KVM_PGTABLE_LAST_LEVEL) + if (!kvm_pte_valid(ctx->old) || (ctx->end - ctx->start) != kvm_granule_size(ctx->level)) return -EINVAL; slot->addr = ctx->addr; @@ -296,13 +306,84 @@ static int create_fixmap_slot(u64 addr, u64 cpu) struct kvm_pgtable_walker walker = { .cb = __create_fixmap_slot_cb, .flags = KVM_PGTABLE_WALK_LEAF, - .arg = (void *)cpu, + .arg = per_cpu_ptr(&fixmap_slots, cpu), }; return kvm_pgtable_walk(&pkvm_pgtable, addr, PAGE_SIZE, &walker); } -int hyp_create_pcpu_fixmap(void) +#if PAGE_SHIFT < 16 +#define HAS_FIXBLOCK +static struct hyp_fixmap_slot hyp_fixblock_slot; +static DEFINE_HYP_SPINLOCK(hyp_fixblock_lock); +#endif + +static int create_fixblock(void) +{ +#ifdef HAS_FIXBLOCK + struct kvm_pgtable_walker walker = { + .cb = __create_fixmap_slot_cb, + .flags = KVM_PGTABLE_WALK_LEAF, + .arg = &hyp_fixblock_slot, + }; + unsigned long addr; + phys_addr_t phys; + int ret, i; + + /* Find a RAM phys address, PMD aligned */ + for (i = 0; i < hyp_memblock_nr; i++) { + phys = ALIGN(hyp_memory[i].base, PMD_SIZE); + if (phys + PMD_SIZE < (hyp_memory[i].base + hyp_memory[i].size)) + break; + } + + if (i >= hyp_memblock_nr) + return -EINVAL; + + hyp_spin_lock(&pkvm_pgd_lock); + addr = ALIGN(__io_map_base, PMD_SIZE); + ret = __pkvm_alloc_private_va_range(addr, PMD_SIZE); + if (ret) + goto unlock; + + ret = kvm_pgtable_hyp_map(&pkvm_pgtable, addr, PMD_SIZE, phys, PAGE_HYP); + if (ret) + goto unlock; + + ret = kvm_pgtable_walk(&pkvm_pgtable, addr, PMD_SIZE, &walker); + +unlock: + hyp_spin_unlock(&pkvm_pgd_lock); + + return ret; +#else + return 0; +#endif +} + +void *hyp_fixblock_map(phys_addr_t phys, size_t *size) +{ +#ifdef HAS_FIXBLOCK + *size = PMD_SIZE; + hyp_spin_lock(&hyp_fixblock_lock); + return fixmap_map_slot(&hyp_fixblock_slot, phys); +#else + *size = PAGE_SIZE; + return hyp_fixmap_map(phys); +#endif +} + +void hyp_fixblock_unmap(void) +{ +#ifdef HAS_FIXBLOCK + fixmap_clear_slot(&hyp_fixblock_slot); + hyp_spin_unlock(&hyp_fixblock_lock); +#else + hyp_fixmap_unmap(); +#endif +} + +int hyp_create_fixmap(void) { unsigned long addr, i; int ret; @@ -322,7 +403,7 @@ int hyp_create_pcpu_fixmap(void) return ret; } - return 0; + return create_fixblock(); } int hyp_create_idmap(u32 hyp_va_bits) diff --git a/arch/arm64/kvm/hyp/nvhe/pkvm.c b/arch/arm64/kvm/hyp/nvhe/pkvm.c index 3927fe52a3dd..338505cb0171 100644 --- a/arch/arm64/kvm/hyp/nvhe/pkvm.c +++ b/arch/arm64/kvm/hyp/nvhe/pkvm.c @@ -46,7 +46,8 @@ static void pkvm_vcpu_reset_hcr(struct kvm_vcpu *vcpu) vcpu->arch.hcr_el2 |= HCR_FWB; if (cpus_have_final_cap(ARM64_HAS_EVT) && - !cpus_have_final_cap(ARM64_MISMATCHED_CACHE_TYPE)) + !cpus_have_final_cap(ARM64_MISMATCHED_CACHE_TYPE) && + kvm_read_vm_id_reg(vcpu->kvm, SYS_CTR_EL0) == read_cpuid(CTR_EL0)) vcpu->arch.hcr_el2 |= HCR_TID4; else vcpu->arch.hcr_el2 |= HCR_TID2; @@ -166,8 +167,13 @@ static int pkvm_vcpu_init_traps(struct pkvm_hyp_vcpu *hyp_vcpu) pkvm_vcpu_reset_hcr(vcpu); - if ((!pkvm_hyp_vcpu_is_protected(hyp_vcpu))) + if ((!pkvm_hyp_vcpu_is_protected(hyp_vcpu))) { + struct kvm_vcpu *host_vcpu = hyp_vcpu->host_vcpu; + + /* Trust the host for non-protected vcpu features. */ + vcpu->arch.hcrx_el2 = host_vcpu->arch.hcrx_el2; return 0; + } ret = pkvm_check_pvm_cpu_features(vcpu); if (ret) @@ -175,6 +181,7 @@ static int pkvm_vcpu_init_traps(struct pkvm_hyp_vcpu *hyp_vcpu) pvm_init_traps_hcr(vcpu); pvm_init_traps_mdcr(vcpu); + vcpu_set_hcrx(vcpu); return 0; } @@ -239,10 +246,12 @@ struct pkvm_hyp_vcpu *pkvm_load_hyp_vcpu(pkvm_handle_t handle, hyp_spin_lock(&vm_table_lock); hyp_vm = get_vm_by_handle(handle); - if (!hyp_vm || hyp_vm->nr_vcpus <= vcpu_idx) + if (!hyp_vm || hyp_vm->kvm.created_vcpus <= vcpu_idx) goto unlock; hyp_vcpu = hyp_vm->vcpus[vcpu_idx]; + if (!hyp_vcpu) + goto unlock; /* Ensure vcpu isn't loaded on more than one cpu simultaneously. */ if (unlikely(hyp_vcpu->loaded_hyp_vcpu)) { @@ -315,6 +324,9 @@ static void pkvm_init_features_from_host(struct pkvm_hyp_vm *hyp_vm, const struc unsigned long host_arch_flags = READ_ONCE(host_kvm->arch.flags); DECLARE_BITMAP(allowed_features, KVM_VCPU_MAX_FEATURES); + /* CTR_EL0 is always under host control, even for protected VMs. */ + hyp_vm->kvm.arch.ctr_el0 = host_kvm->arch.ctr_el0; + if (test_bit(KVM_ARCH_FLAG_MTE_ENABLED, &host_kvm->arch.flags)) set_bit(KVM_ARCH_FLAG_MTE_ENABLED, &kvm->arch.flags); @@ -325,6 +337,10 @@ static void pkvm_init_features_from_host(struct pkvm_hyp_vm *hyp_vm, const struc bitmap_copy(kvm->arch.vcpu_features, host_kvm->arch.vcpu_features, KVM_VCPU_MAX_FEATURES); + + if (test_bit(KVM_ARCH_FLAG_WRITABLE_IMP_ID_REGS, &host_arch_flags)) + hyp_vm->kvm.arch.midr_el1 = host_kvm->arch.midr_el1; + return; } @@ -356,13 +372,32 @@ static void unpin_host_vcpu(struct kvm_vcpu *host_vcpu) hyp_unpin_shared_mem(host_vcpu, host_vcpu + 1); } +static void unpin_host_sve_state(struct pkvm_hyp_vcpu *hyp_vcpu) +{ + void *sve_state; + + if (!vcpu_has_feature(&hyp_vcpu->vcpu, KVM_ARM_VCPU_SVE)) + return; + + sve_state = kern_hyp_va(hyp_vcpu->vcpu.arch.sve_state); + hyp_unpin_shared_mem(sve_state, + sve_state + vcpu_sve_state_size(&hyp_vcpu->vcpu)); +} + static void unpin_host_vcpus(struct pkvm_hyp_vcpu *hyp_vcpus[], unsigned int nr_vcpus) { int i; - for (i = 0; i < nr_vcpus; i++) - unpin_host_vcpu(hyp_vcpus[i]->host_vcpu); + for (i = 0; i < nr_vcpus; i++) { + struct pkvm_hyp_vcpu *hyp_vcpu = hyp_vcpus[i]; + + if (!hyp_vcpu) + continue; + + unpin_host_vcpu(hyp_vcpu->host_vcpu); + unpin_host_sve_state(hyp_vcpu); + } } static void init_pkvm_hyp_vm(struct kvm *host_kvm, struct pkvm_hyp_vm *hyp_vm, @@ -376,34 +411,56 @@ static void init_pkvm_hyp_vm(struct kvm *host_kvm, struct pkvm_hyp_vm *hyp_vm, pkvm_init_features_from_host(hyp_vm, host_kvm); } -static void pkvm_vcpu_init_sve(struct pkvm_hyp_vcpu *hyp_vcpu, struct kvm_vcpu *host_vcpu) +static int pkvm_vcpu_init_sve(struct pkvm_hyp_vcpu *hyp_vcpu, struct kvm_vcpu *host_vcpu) { struct kvm_vcpu *vcpu = &hyp_vcpu->vcpu; + unsigned int sve_max_vl; + size_t sve_state_size; + void *sve_state; + int ret = 0; - if (!vcpu_has_feature(vcpu, KVM_ARM_VCPU_SVE)) + if (!vcpu_has_feature(vcpu, KVM_ARM_VCPU_SVE)) { vcpu_clear_flag(vcpu, VCPU_SVE_FINALIZED); + return 0; + } + + /* Limit guest vector length to the maximum supported by the host. */ + sve_max_vl = min(READ_ONCE(host_vcpu->arch.sve_max_vl), kvm_host_sve_max_vl); + sve_state_size = sve_state_size_from_vl(sve_max_vl); + sve_state = kern_hyp_va(READ_ONCE(host_vcpu->arch.sve_state)); + + if (!sve_state || !sve_state_size) { + ret = -EINVAL; + goto err; + } + + ret = hyp_pin_shared_mem(sve_state, sve_state + sve_state_size); + if (ret) + goto err; + + vcpu->arch.sve_state = sve_state; + vcpu->arch.sve_max_vl = sve_max_vl; + + return 0; +err: + clear_bit(KVM_ARM_VCPU_SVE, vcpu->kvm->arch.vcpu_features); + return ret; } static int init_pkvm_hyp_vcpu(struct pkvm_hyp_vcpu *hyp_vcpu, struct pkvm_hyp_vm *hyp_vm, - struct kvm_vcpu *host_vcpu, - unsigned int vcpu_idx) + struct kvm_vcpu *host_vcpu) { int ret = 0; if (hyp_pin_shared_mem(host_vcpu, host_vcpu + 1)) return -EBUSY; - if (host_vcpu->vcpu_idx != vcpu_idx) { - ret = -EINVAL; - goto done; - } - hyp_vcpu->host_vcpu = host_vcpu; hyp_vcpu->vcpu.kvm = &hyp_vm->kvm; hyp_vcpu->vcpu.vcpu_id = READ_ONCE(host_vcpu->vcpu_id); - hyp_vcpu->vcpu.vcpu_idx = vcpu_idx; + hyp_vcpu->vcpu.vcpu_idx = READ_ONCE(host_vcpu->vcpu_idx); hyp_vcpu->vcpu.arch.hw_mmu = &hyp_vm->kvm.arch.mmu; hyp_vcpu->vcpu.arch.cflags = READ_ONCE(host_vcpu->arch.cflags); @@ -416,7 +473,7 @@ static int init_pkvm_hyp_vcpu(struct pkvm_hyp_vcpu *hyp_vcpu, if (ret) goto done; - pkvm_vcpu_init_sve(hyp_vcpu, host_vcpu); + ret = pkvm_vcpu_init_sve(hyp_vcpu, host_vcpu); done: if (ret) unpin_host_vcpu(host_vcpu); @@ -641,27 +698,28 @@ int __pkvm_init_vcpu(pkvm_handle_t handle, struct kvm_vcpu *host_vcpu, goto unlock; } - idx = hyp_vm->nr_vcpus; + ret = init_pkvm_hyp_vcpu(hyp_vcpu, hyp_vm, host_vcpu); + if (ret) + goto unlock; + + idx = hyp_vcpu->vcpu.vcpu_idx; if (idx >= hyp_vm->kvm.created_vcpus) { ret = -EINVAL; goto unlock; } - ret = init_pkvm_hyp_vcpu(hyp_vcpu, hyp_vm, host_vcpu, idx); - if (ret) + if (hyp_vm->vcpus[idx]) { + ret = -EINVAL; goto unlock; + } hyp_vm->vcpus[idx] = hyp_vcpu; - hyp_vm->nr_vcpus++; unlock: hyp_spin_unlock(&vm_table_lock); - if (ret) { + if (ret) unmap_donated_memory(hyp_vcpu, sizeof(*hyp_vcpu)); - return ret; - } - - return 0; + return ret; } static void @@ -678,7 +736,7 @@ teardown_donated_memory(struct kvm_hyp_memcache *mc, void *addr, size_t size) int __pkvm_teardown_vm(pkvm_handle_t handle) { - struct kvm_hyp_memcache *mc; + struct kvm_hyp_memcache *mc, *stage2_mc; struct pkvm_hyp_vm *hyp_vm; struct kvm *host_kvm; unsigned int idx; @@ -706,18 +764,24 @@ int __pkvm_teardown_vm(pkvm_handle_t handle) /* Reclaim guest pages (including page-table pages) */ mc = &host_kvm->arch.pkvm.teardown_mc; - reclaim_guest_pages(hyp_vm, mc); - unpin_host_vcpus(hyp_vm->vcpus, hyp_vm->nr_vcpus); + stage2_mc = &host_kvm->arch.pkvm.stage2_teardown_mc; + reclaim_pgtable_pages(hyp_vm, stage2_mc); + unpin_host_vcpus(hyp_vm->vcpus, hyp_vm->kvm.created_vcpus); /* Push the metadata pages to the teardown memcache */ - for (idx = 0; idx < hyp_vm->nr_vcpus; ++idx) { + for (idx = 0; idx < hyp_vm->kvm.created_vcpus; ++idx) { struct pkvm_hyp_vcpu *hyp_vcpu = hyp_vm->vcpus[idx]; - struct kvm_hyp_memcache *vcpu_mc = &hyp_vcpu->vcpu.arch.pkvm_memcache; + struct kvm_hyp_memcache *vcpu_mc; + + if (!hyp_vcpu) + continue; + + vcpu_mc = &hyp_vcpu->vcpu.arch.pkvm_memcache; while (vcpu_mc->nr_pages) { void *addr = pop_hyp_memcache(vcpu_mc, hyp_phys_to_virt); - push_hyp_memcache(mc, addr, hyp_virt_to_phys); + push_hyp_memcache(stage2_mc, addr, hyp_virt_to_phys); unmap_donated_memory_noclear(addr, PAGE_SIZE); } diff --git a/arch/arm64/kvm/hyp/nvhe/psci-relay.c b/arch/arm64/kvm/hyp/nvhe/psci-relay.c index 9c2ce1e0e99a..c3e196fb8b18 100644 --- a/arch/arm64/kvm/hyp/nvhe/psci-relay.c +++ b/arch/arm64/kvm/hyp/nvhe/psci-relay.c @@ -218,6 +218,9 @@ asmlinkage void __noreturn __kvm_host_psci_cpu_entry(bool is_cpu_on) if (is_cpu_on) release_boot_args(boot_args); + write_sysreg_el1(INIT_SCTLR_EL1_MMU_OFF, SYS_SCTLR); + write_sysreg(INIT_PSTATE_EL1, SPSR_EL2); + __host_enter(host_ctxt); } diff --git a/arch/arm64/kvm/hyp/nvhe/setup.c b/arch/arm64/kvm/hyp/nvhe/setup.c index d62bcb5634a2..a48d3f5a5afb 100644 --- a/arch/arm64/kvm/hyp/nvhe/setup.c +++ b/arch/arm64/kvm/hyp/nvhe/setup.c @@ -28,6 +28,7 @@ static void *vmemmap_base; static void *vm_table_base; static void *hyp_pgt_base; static void *host_s2_pgt_base; +static void *selftest_base; static void *ffa_proxy_pages; static struct kvm_pgtable_mm_ops pkvm_pgtable_mm_ops; static struct hyp_pool hpool; @@ -38,6 +39,11 @@ static int divide_memory_pool(void *virt, unsigned long size) hyp_early_alloc_init(virt, size); + nr_pages = pkvm_selftest_pages(); + selftest_base = hyp_early_alloc_contig(nr_pages); + if (nr_pages && !selftest_base) + return -ENOMEM; + nr_pages = hyp_vmemmap_pages(sizeof(struct hyp_page)); vmemmap_base = hyp_early_alloc_contig(nr_pages); if (!vmemmap_base) @@ -119,6 +125,10 @@ static int recreate_hyp_mappings(phys_addr_t phys, unsigned long size, if (ret) return ret; + ret = pkvm_create_mappings(__hyp_data_start, __hyp_data_end, PAGE_HYP); + if (ret) + return ret; + ret = pkvm_create_mappings(__hyp_rodata_start, __hyp_rodata_end, PAGE_HYP_RO); if (ret) return ret; @@ -180,6 +190,7 @@ static int fix_host_ownership_walker(const struct kvm_pgtable_visit_ctx *ctx, enum kvm_pgtable_walk_flags visit) { enum pkvm_page_state state; + struct hyp_page *page; phys_addr_t phys; if (!kvm_pte_valid(ctx->old)) @@ -192,19 +203,25 @@ static int fix_host_ownership_walker(const struct kvm_pgtable_visit_ctx *ctx, if (!addr_is_memory(phys)) return -EINVAL; + page = hyp_phys_to_page(phys); + /* * Adjust the host stage-2 mappings to match the ownership attributes - * configured in the hypervisor stage-1. + * configured in the hypervisor stage-1, and make sure to propagate them + * to the hyp_vmemmap state. */ state = pkvm_getstate(kvm_pgtable_hyp_pte_prot(ctx->old)); switch (state) { case PKVM_PAGE_OWNED: + set_hyp_state(page, PKVM_PAGE_OWNED); return host_stage2_set_owner_locked(phys, PAGE_SIZE, PKVM_ID_HYP); case PKVM_PAGE_SHARED_OWNED: - hyp_phys_to_page(phys)->host_state = PKVM_PAGE_SHARED_BORROWED; + set_hyp_state(page, PKVM_PAGE_SHARED_OWNED); + set_host_state(page, PKVM_PAGE_SHARED_BORROWED); break; case PKVM_PAGE_SHARED_BORROWED: - hyp_phys_to_page(phys)->host_state = PKVM_PAGE_SHARED_OWNED; + set_hyp_state(page, PKVM_PAGE_SHARED_BORROWED); + set_host_state(page, PKVM_PAGE_SHARED_OWNED); break; default: return -EINVAL; @@ -295,7 +312,7 @@ void __noreturn __pkvm_init_finalise(void) if (ret) goto out; - ret = hyp_create_pcpu_fixmap(); + ret = hyp_create_fixmap(); if (ret) goto out; @@ -304,6 +321,8 @@ void __noreturn __pkvm_init_finalise(void) goto out; pkvm_hyp_vm_table_init(vm_table_base); + + pkvm_ownership_selftest(selftest_base); out: /* * We tail-called to here from handle___pkvm_init() and will not return, diff --git a/arch/arm64/kvm/hyp/nvhe/switch.c b/arch/arm64/kvm/hyp/nvhe/switch.c index 6c846d033d24..ccd575d5f6de 100644 --- a/arch/arm64/kvm/hyp/nvhe/switch.c +++ b/arch/arm64/kvm/hyp/nvhe/switch.c @@ -33,41 +33,19 @@ DEFINE_PER_CPU(struct kvm_host_data, kvm_host_data); DEFINE_PER_CPU(struct kvm_cpu_context, kvm_hyp_ctxt); DEFINE_PER_CPU(unsigned long, kvm_hyp_vector); -extern void kvm_nvhe_prepare_backtrace(unsigned long fp, unsigned long pc); - -static void __activate_cptr_traps(struct kvm_vcpu *vcpu) -{ - u64 val = CPTR_EL2_TAM; /* Same bit irrespective of E2H */ - - if (has_hvhe()) { - val |= CPACR_EL1_TTA; - - if (guest_owns_fp_regs()) { - val |= CPACR_EL1_FPEN; - if (vcpu_has_sve(vcpu)) - val |= CPACR_EL1_ZEN; - } - } else { - val |= CPTR_EL2_TTA | CPTR_NVHE_EL2_RES1; - - /* - * Always trap SME since it's not supported in KVM. - * TSM is RES1 if SME isn't implemented. - */ - val |= CPTR_EL2_TSM; - - if (!vcpu_has_sve(vcpu) || !guest_owns_fp_regs()) - val |= CPTR_EL2_TZ; +struct fgt_masks hfgrtr_masks; +struct fgt_masks hfgwtr_masks; +struct fgt_masks hfgitr_masks; +struct fgt_masks hdfgrtr_masks; +struct fgt_masks hdfgwtr_masks; +struct fgt_masks hafgrtr_masks; +struct fgt_masks hfgrtr2_masks; +struct fgt_masks hfgwtr2_masks; +struct fgt_masks hfgitr2_masks; +struct fgt_masks hdfgrtr2_masks; +struct fgt_masks hdfgwtr2_masks; - if (!guest_owns_fp_regs()) - val |= CPTR_EL2_TFP; - } - - if (!guest_owns_fp_regs()) - __activate_traps_fpsimd32(vcpu); - - kvm_write_cptr_el2(val); -} +extern void kvm_nvhe_prepare_backtrace(unsigned long fp, unsigned long pc); static void __activate_traps(struct kvm_vcpu *vcpu) { @@ -117,9 +95,9 @@ static void __deactivate_traps(struct kvm_vcpu *vcpu) __deactivate_traps_common(vcpu); - write_sysreg(this_cpu_ptr(&kvm_init_params)->hcr_el2, hcr_el2); + write_sysreg_hcr(this_cpu_ptr(&kvm_init_params)->hcr_el2); - kvm_reset_cptr_el2(vcpu); + __deactivate_cptr_traps(vcpu); write_sysreg(__kvm_hyp_host_vector, vbar_el2); } @@ -192,34 +170,6 @@ static bool kvm_handle_pvm_sys64(struct kvm_vcpu *vcpu, u64 *exit_code) kvm_handle_pvm_sysreg(vcpu, exit_code)); } -static void kvm_hyp_save_fpsimd_host(struct kvm_vcpu *vcpu) -{ - /* - * Non-protected kvm relies on the host restoring its sve state. - * Protected kvm restores the host's sve state as not to reveal that - * fpsimd was used by a guest nor leak upper sve bits. - */ - if (unlikely(is_protected_kvm_enabled() && system_supports_sve())) { - __hyp_sve_save_host(); - - /* Re-enable SVE traps if not supported for the guest vcpu. */ - if (!vcpu_has_sve(vcpu)) - cpacr_clear_set(CPACR_EL1_ZEN, 0); - - } else { - __fpsimd_save_state(*host_data_ptr(fpsimd_state)); - } - - if (kvm_has_fpmr(kern_hyp_va(vcpu->kvm))) { - u64 val = read_sysreg_s(SYS_FPMR); - - if (unlikely(is_protected_kvm_enabled())) - *host_data_ptr(fpmr) = val; - else - **host_data_ptr(fpmr_ptr) = val; - } -} - static const exit_handler_fn hyp_exit_handlers[] = { [0 ... ESR_ELx_EC_MAX] = NULL, [ESR_ELx_EC_CP15_32] = kvm_hyp_handle_cp15_32, @@ -251,19 +201,21 @@ static const exit_handler_fn *kvm_get_exit_handler_array(struct kvm_vcpu *vcpu) return hyp_exit_handlers; } -/* - * Some guests (e.g., protected VMs) are not be allowed to run in AArch32. - * The ARMv8 architecture does not give the hypervisor a mechanism to prevent a - * guest from dropping to AArch32 EL0 if implemented by the CPU. If the - * hypervisor spots a guest in such a state ensure it is handled, and don't - * trust the host to spot or fix it. The check below is based on the one in - * kvm_arch_vcpu_ioctl_run(). - * - * Returns false if the guest ran in AArch32 when it shouldn't have, and - * thus should exit to the host, or true if a the guest run loop can continue. - */ -static void early_exit_filter(struct kvm_vcpu *vcpu, u64 *exit_code) +static inline bool fixup_guest_exit(struct kvm_vcpu *vcpu, u64 *exit_code) { + const exit_handler_fn *handlers = kvm_get_exit_handler_array(vcpu); + + synchronize_vcpu_pstate(vcpu, exit_code); + + /* + * Some guests (e.g., protected VMs) are not be allowed to run in + * AArch32. The ARMv8 architecture does not give the hypervisor a + * mechanism to prevent a guest from dropping to AArch32 EL0 if + * implemented by the CPU. If the hypervisor spots a guest in such a + * state ensure it is handled, and don't trust the host to spot or fix + * it. The check below is based on the one in + * kvm_arch_vcpu_ioctl_run(). + */ if (unlikely(vcpu_is_protected(vcpu) && vcpu_mode_is_32bit(vcpu))) { /* * As we have caught the guest red-handed, decide that it isn't @@ -276,6 +228,8 @@ static void early_exit_filter(struct kvm_vcpu *vcpu, u64 *exit_code) *exit_code &= BIT(ARM_EXIT_WITH_SERROR_BIT); *exit_code |= ARM_EXCEPTION_IL; } + + return __fixup_guest_exit(vcpu, exit_code, handlers); } /* Switch to the guest for legacy non-VHE systems */ @@ -318,7 +272,7 @@ int __kvm_vcpu_run(struct kvm_vcpu *vcpu) * We're about to restore some new MMU state. Make sure * ongoing page-table walks that have started before we * trapped to EL2 have completed. This also synchronises the - * above disabling of SPE and TRBE. + * above disabling of BRBE, SPE and TRBE. * * See DDI0487I.a D8.1.5 "Out-of-context translation regimes", * rule R_LFHQG and subsequent information statements. diff --git a/arch/arm64/kvm/hyp/nvhe/sysreg-sr.c b/arch/arm64/kvm/hyp/nvhe/sysreg-sr.c index dba101565de3..3cc613cce5f5 100644 --- a/arch/arm64/kvm/hyp/nvhe/sysreg-sr.c +++ b/arch/arm64/kvm/hyp/nvhe/sysreg-sr.c @@ -28,7 +28,9 @@ void __sysreg_save_state_nvhe(struct kvm_cpu_context *ctxt) void __sysreg_restore_state_nvhe(struct kvm_cpu_context *ctxt) { - __sysreg_restore_el1_state(ctxt, ctxt_sys_reg(ctxt, MPIDR_EL1)); + u64 midr = ctxt_midr_el1(ctxt); + + __sysreg_restore_el1_state(ctxt, midr, ctxt_sys_reg(ctxt, MPIDR_EL1)); __sysreg_restore_common_state(ctxt); __sysreg_restore_user_state(ctxt); __sysreg_restore_el2_return_state(ctxt); |