diff options
Diffstat (limited to 'arch/arm64/kvm/hyp/include')
-rw-r--r-- | arch/arm64/kvm/hyp/include/hyp/debug-sr.h | 42 | ||||
-rw-r--r-- | arch/arm64/kvm/hyp/include/hyp/fault.h | 70 | ||||
-rw-r--r-- | arch/arm64/kvm/hyp/include/hyp/switch.h | 501 | ||||
-rw-r--r-- | arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h | 63 | ||||
-rw-r--r-- | arch/arm64/kvm/hyp/include/nvhe/fixed_config.h | 223 | ||||
-rw-r--r-- | arch/arm64/kvm/hyp/include/nvhe/gfp.h | 6 | ||||
-rw-r--r-- | arch/arm64/kvm/hyp/include/nvhe/mem_protect.h | 47 | ||||
-rw-r--r-- | arch/arm64/kvm/hyp/include/nvhe/memory.h | 84 | ||||
-rw-r--r-- | arch/arm64/kvm/hyp/include/nvhe/mm.h | 4 | ||||
-rw-r--r-- | arch/arm64/kvm/hyp/include/nvhe/pkvm.h | 29 |
10 files changed, 621 insertions, 448 deletions
diff --git a/arch/arm64/kvm/hyp/include/hyp/debug-sr.h b/arch/arm64/kvm/hyp/include/hyp/debug-sr.h index d00093699aaf..502a5b73ee70 100644 --- a/arch/arm64/kvm/hyp/include/hyp/debug-sr.h +++ b/arch/arm64/kvm/hyp/include/hyp/debug-sr.h @@ -88,15 +88,26 @@ default: write_debug(ptr[0], reg, 0); \ } +static struct kvm_guest_debug_arch *__vcpu_debug_regs(struct kvm_vcpu *vcpu) +{ + switch (vcpu->arch.debug_owner) { + case VCPU_DEBUG_FREE: + WARN_ON_ONCE(1); + fallthrough; + case VCPU_DEBUG_GUEST_OWNED: + return &vcpu->arch.vcpu_debug_state; + case VCPU_DEBUG_HOST_OWNED: + return &vcpu->arch.external_debug_state; + } + + return NULL; +} + static void __debug_save_state(struct kvm_guest_debug_arch *dbg, struct kvm_cpu_context *ctxt) { - u64 aa64dfr0; - int brps, wrps; - - aa64dfr0 = read_sysreg(id_aa64dfr0_el1); - brps = (aa64dfr0 >> 12) & 0xf; - wrps = (aa64dfr0 >> 20) & 0xf; + int brps = *host_data_ptr(debug_brps); + int wrps = *host_data_ptr(debug_wrps); save_debug(dbg->dbg_bcr, dbgbcr, brps); save_debug(dbg->dbg_bvr, dbgbvr, brps); @@ -109,13 +120,8 @@ static void __debug_save_state(struct kvm_guest_debug_arch *dbg, static void __debug_restore_state(struct kvm_guest_debug_arch *dbg, struct kvm_cpu_context *ctxt) { - u64 aa64dfr0; - int brps, wrps; - - aa64dfr0 = read_sysreg(id_aa64dfr0_el1); - - brps = (aa64dfr0 >> 12) & 0xf; - wrps = (aa64dfr0 >> 20) & 0xf; + int brps = *host_data_ptr(debug_brps); + int wrps = *host_data_ptr(debug_wrps); restore_debug(dbg->dbg_bcr, dbgbcr, brps); restore_debug(dbg->dbg_bvr, dbgbvr, brps); @@ -132,13 +138,13 @@ static inline void __debug_switch_to_guest_common(struct kvm_vcpu *vcpu) struct kvm_guest_debug_arch *host_dbg; struct kvm_guest_debug_arch *guest_dbg; - if (!vcpu_get_flag(vcpu, DEBUG_DIRTY)) + if (!kvm_debug_regs_in_use(vcpu)) return; host_ctxt = host_data_ptr(host_ctxt); guest_ctxt = &vcpu->arch.ctxt; host_dbg = host_data_ptr(host_debug_state.regs); - guest_dbg = kern_hyp_va(vcpu->arch.debug_ptr); + guest_dbg = __vcpu_debug_regs(vcpu); __debug_save_state(host_dbg, host_ctxt); __debug_restore_state(guest_dbg, guest_ctxt); @@ -151,18 +157,16 @@ static inline void __debug_switch_to_host_common(struct kvm_vcpu *vcpu) struct kvm_guest_debug_arch *host_dbg; struct kvm_guest_debug_arch *guest_dbg; - if (!vcpu_get_flag(vcpu, DEBUG_DIRTY)) + if (!kvm_debug_regs_in_use(vcpu)) return; host_ctxt = host_data_ptr(host_ctxt); guest_ctxt = &vcpu->arch.ctxt; host_dbg = host_data_ptr(host_debug_state.regs); - guest_dbg = kern_hyp_va(vcpu->arch.debug_ptr); + guest_dbg = __vcpu_debug_regs(vcpu); __debug_save_state(guest_dbg, guest_ctxt); __debug_restore_state(host_dbg, host_ctxt); - - vcpu_clear_flag(vcpu, DEBUG_DIRTY); } #endif /* __ARM64_KVM_HYP_DEBUG_SR_H__ */ diff --git a/arch/arm64/kvm/hyp/include/hyp/fault.h b/arch/arm64/kvm/hyp/include/hyp/fault.h index 17df94570f03..fc573fc767b0 100644 --- a/arch/arm64/kvm/hyp/include/hyp/fault.h +++ b/arch/arm64/kvm/hyp/include/hyp/fault.h @@ -12,6 +12,16 @@ #include <asm/kvm_hyp.h> #include <asm/kvm_mmu.h> +static inline bool __fault_safe_to_translate(u64 esr) +{ + u64 fsc = esr & ESR_ELx_FSC; + + if (esr_fsc_is_sea_ttw(esr) || esr_fsc_is_secc_ttw(esr)) + return false; + + return !(fsc == ESR_ELx_FSC_EXTABT && (esr & ESR_ELx_FnV)); +} + static inline bool __translate_far_to_hpfar(u64 far, u64 *hpfar) { int ret; @@ -44,34 +54,50 @@ static inline bool __translate_far_to_hpfar(u64 far, u64 *hpfar) return true; } -static inline bool __get_fault_info(u64 esr, struct kvm_vcpu_fault_info *fault) +/* + * Checks for the conditions when HPFAR_EL2 is written, per ARM ARM R_FKLWR. + */ +static inline bool __hpfar_valid(u64 esr) { - u64 hpfar, far; - - far = read_sysreg_el2(SYS_FAR); - /* - * The HPFAR can be invalid if the stage 2 fault did not - * happen during a stage 1 page table walk (the ESR_EL2.S1PTW - * bit is clear) and one of the two following cases are true: - * 1. The fault was due to a permission fault - * 2. The processor carries errata 834220 + * CPUs affected by ARM erratum #834220 may incorrectly report a + * stage-2 translation fault when a stage-1 permission fault occurs. * - * Therefore, for all non S1PTW faults where we either have a - * permission fault or the errata workaround is enabled, we - * resolve the IPA using the AT instruction. + * Re-walk the page tables to determine if a stage-1 fault actually + * occurred. */ - if (!(esr & ESR_ELx_S1PTW) && - (cpus_have_final_cap(ARM64_WORKAROUND_834220) || - esr_fsc_is_permission_fault(esr))) { - if (!__translate_far_to_hpfar(far, &hpfar)) - return false; - } else { + if (cpus_have_final_cap(ARM64_WORKAROUND_834220) && + esr_fsc_is_translation_fault(esr)) + return false; + + if (esr_fsc_is_translation_fault(esr) || esr_fsc_is_access_flag_fault(esr)) + return true; + + if ((esr & ESR_ELx_S1PTW) && esr_fsc_is_permission_fault(esr)) + return true; + + return esr_fsc_is_addr_sz_fault(esr); +} + +static inline bool __get_fault_info(u64 esr, struct kvm_vcpu_fault_info *fault) +{ + u64 hpfar; + + fault->far_el2 = read_sysreg_el2(SYS_FAR); + fault->hpfar_el2 = 0; + + if (__hpfar_valid(esr)) hpfar = read_sysreg(hpfar_el2); - } + else if (unlikely(!__fault_safe_to_translate(esr))) + return true; + else if (!__translate_far_to_hpfar(fault->far_el2, &hpfar)) + return false; - fault->far_el2 = far; - fault->hpfar_el2 = hpfar; + /* + * Hijack HPFAR_EL2.NS (RES0 in Non-secure) to indicate a valid + * HPFAR value. + */ + fault->hpfar_el2 = hpfar | HPFAR_EL2_NS; return true; } diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h index 34f53707892d..2ad57b117385 100644 --- a/arch/arm64/kvm/hyp/include/hyp/switch.h +++ b/arch/arm64/kvm/hyp/include/hyp/switch.h @@ -45,7 +45,7 @@ static inline void __fpsimd_save_fpexc32(struct kvm_vcpu *vcpu) if (!vcpu_el1_is_32bit(vcpu)) return; - __vcpu_sys_reg(vcpu, FPEXC32_EL2) = read_sysreg(fpexc32_el2); + __vcpu_assign_sys_reg(vcpu, FPEXC32_EL2, read_sysreg(fpexc32_el2)); } static inline void __activate_traps_fpsimd32(struct kvm_vcpu *vcpu) @@ -65,12 +65,186 @@ static inline void __activate_traps_fpsimd32(struct kvm_vcpu *vcpu) } } +static inline void __activate_cptr_traps_nvhe(struct kvm_vcpu *vcpu) +{ + u64 val = CPTR_NVHE_EL2_RES1 | CPTR_EL2_TAM | CPTR_EL2_TTA; + + /* + * Always trap SME since it's not supported in KVM. + * TSM is RES1 if SME isn't implemented. + */ + val |= CPTR_EL2_TSM; + + if (!vcpu_has_sve(vcpu) || !guest_owns_fp_regs()) + val |= CPTR_EL2_TZ; + + if (!guest_owns_fp_regs()) + val |= CPTR_EL2_TFP; + + write_sysreg(val, cptr_el2); +} + +static inline void __activate_cptr_traps_vhe(struct kvm_vcpu *vcpu) +{ + /* + * With VHE (HCR.E2H == 1), accesses to CPACR_EL1 are routed to + * CPTR_EL2. In general, CPACR_EL1 has the same layout as CPTR_EL2, + * except for some missing controls, such as TAM. + * In this case, CPTR_EL2.TAM has the same position with or without + * VHE (HCR.E2H == 1) which allows us to use here the CPTR_EL2.TAM + * shift value for trapping the AMU accesses. + */ + u64 val = CPTR_EL2_TAM | CPACR_EL1_TTA; + u64 cptr; + + if (guest_owns_fp_regs()) { + val |= CPACR_EL1_FPEN; + if (vcpu_has_sve(vcpu)) + val |= CPACR_EL1_ZEN; + } + + if (!vcpu_has_nv(vcpu)) + goto write; + + /* + * The architecture is a bit crap (what a surprise): an EL2 guest + * writing to CPTR_EL2 via CPACR_EL1 can't set any of TCPAC or TTA, + * as they are RES0 in the guest's view. To work around it, trap the + * sucker using the very same bit it can't set... + */ + if (vcpu_el2_e2h_is_set(vcpu) && is_hyp_ctxt(vcpu)) + val |= CPTR_EL2_TCPAC; + + /* + * Layer the guest hypervisor's trap configuration on top of our own if + * we're in a nested context. + */ + if (is_hyp_ctxt(vcpu)) + goto write; + + cptr = vcpu_sanitised_cptr_el2(vcpu); + + /* + * Pay attention, there's some interesting detail here. + * + * The CPTR_EL2.xEN fields are 2 bits wide, although there are only two + * meaningful trap states when HCR_EL2.TGE = 0 (running a nested guest): + * + * - CPTR_EL2.xEN = x0, traps are enabled + * - CPTR_EL2.xEN = x1, traps are disabled + * + * In other words, bit[0] determines if guest accesses trap or not. In + * the interest of simplicity, clear the entire field if the guest + * hypervisor has traps enabled to dispel any illusion of something more + * complicated taking place. + */ + if (!(SYS_FIELD_GET(CPACR_EL1, FPEN, cptr) & BIT(0))) + val &= ~CPACR_EL1_FPEN; + if (!(SYS_FIELD_GET(CPACR_EL1, ZEN, cptr) & BIT(0))) + val &= ~CPACR_EL1_ZEN; + + if (kvm_has_feat(vcpu->kvm, ID_AA64MMFR3_EL1, S2POE, IMP)) + val |= cptr & CPACR_EL1_E0POE; + + val |= cptr & CPTR_EL2_TCPAC; + +write: + write_sysreg(val, cpacr_el1); +} + +static inline void __activate_cptr_traps(struct kvm_vcpu *vcpu) +{ + if (!guest_owns_fp_regs()) + __activate_traps_fpsimd32(vcpu); + + if (has_vhe() || has_hvhe()) + __activate_cptr_traps_vhe(vcpu); + else + __activate_cptr_traps_nvhe(vcpu); +} + +static inline void __deactivate_cptr_traps_nvhe(struct kvm_vcpu *vcpu) +{ + u64 val = CPTR_NVHE_EL2_RES1; + + if (!cpus_have_final_cap(ARM64_SVE)) + val |= CPTR_EL2_TZ; + if (!cpus_have_final_cap(ARM64_SME)) + val |= CPTR_EL2_TSM; + + write_sysreg(val, cptr_el2); +} + +static inline void __deactivate_cptr_traps_vhe(struct kvm_vcpu *vcpu) +{ + u64 val = CPACR_EL1_FPEN; + + if (cpus_have_final_cap(ARM64_SVE)) + val |= CPACR_EL1_ZEN; + if (cpus_have_final_cap(ARM64_SME)) + val |= CPACR_EL1_SMEN; + + write_sysreg(val, cpacr_el1); +} + +static inline void __deactivate_cptr_traps(struct kvm_vcpu *vcpu) +{ + if (has_vhe() || has_hvhe()) + __deactivate_cptr_traps_vhe(vcpu); + else + __deactivate_cptr_traps_nvhe(vcpu); +} + +#define reg_to_fgt_masks(reg) \ + ({ \ + struct fgt_masks *m; \ + switch(reg) { \ + case HFGRTR_EL2: \ + m = &hfgrtr_masks; \ + break; \ + case HFGWTR_EL2: \ + m = &hfgwtr_masks; \ + break; \ + case HFGITR_EL2: \ + m = &hfgitr_masks; \ + break; \ + case HDFGRTR_EL2: \ + m = &hdfgrtr_masks; \ + break; \ + case HDFGWTR_EL2: \ + m = &hdfgwtr_masks; \ + break; \ + case HAFGRTR_EL2: \ + m = &hafgrtr_masks; \ + break; \ + case HFGRTR2_EL2: \ + m = &hfgrtr2_masks; \ + break; \ + case HFGWTR2_EL2: \ + m = &hfgwtr2_masks; \ + break; \ + case HFGITR2_EL2: \ + m = &hfgitr2_masks; \ + break; \ + case HDFGRTR2_EL2: \ + m = &hdfgrtr2_masks; \ + break; \ + case HDFGWTR2_EL2: \ + m = &hdfgwtr2_masks; \ + break; \ + default: \ + BUILD_BUG_ON(1); \ + } \ + \ + m; \ + }) + #define compute_clr_set(vcpu, reg, clr, set) \ do { \ - u64 hfg; \ - hfg = __vcpu_sys_reg(vcpu, reg) & ~__ ## reg ## _RES0; \ - set |= hfg & __ ## reg ## _MASK; \ - clr |= ~hfg & __ ## reg ## _nMASK; \ + u64 hfg = __vcpu_sys_reg(vcpu, reg); \ + struct fgt_masks *m = reg_to_fgt_masks(reg); \ + set |= hfg & m->mask; \ + clr |= ~hfg & m->nmask; \ } while(0) #define reg_to_fgt_group_id(reg) \ @@ -79,7 +253,7 @@ static inline void __activate_traps_fpsimd32(struct kvm_vcpu *vcpu) switch(reg) { \ case HFGRTR_EL2: \ case HFGWTR_EL2: \ - id = HFGxTR_GROUP; \ + id = HFGRTR_GROUP; \ break; \ case HFGITR_EL2: \ id = HFGITR_GROUP; \ @@ -91,6 +265,17 @@ static inline void __activate_traps_fpsimd32(struct kvm_vcpu *vcpu) case HAFGRTR_EL2: \ id = HAFGRTR_GROUP; \ break; \ + case HFGRTR2_EL2: \ + case HFGWTR2_EL2: \ + id = HFGRTR2_GROUP; \ + break; \ + case HFGITR2_EL2: \ + id = HFGITR2_GROUP; \ + break; \ + case HDFGRTR2_EL2: \ + case HDFGWTR2_EL2: \ + id = HDFGRTR2_GROUP; \ + break; \ default: \ BUILD_BUG_ON(1); \ } \ @@ -101,13 +286,16 @@ static inline void __activate_traps_fpsimd32(struct kvm_vcpu *vcpu) #define compute_undef_clr_set(vcpu, kvm, reg, clr, set) \ do { \ u64 hfg = kvm->arch.fgu[reg_to_fgt_group_id(reg)]; \ - set |= hfg & __ ## reg ## _MASK; \ - clr |= hfg & __ ## reg ## _nMASK; \ + struct fgt_masks *m = reg_to_fgt_masks(reg); \ + set |= hfg & m->mask; \ + clr |= hfg & m->nmask; \ } while(0) #define update_fgt_traps_cs(hctxt, vcpu, kvm, reg, clr, set) \ do { \ - u64 c = 0, s = 0; \ + struct fgt_masks *m = reg_to_fgt_masks(reg); \ + u64 c = clr, s = set; \ + u64 val; \ \ ctxt_sys_reg(hctxt, reg) = read_sysreg_s(SYS_ ## reg); \ if (vcpu_has_nv(vcpu) && !is_hyp_ctxt(vcpu)) \ @@ -115,30 +303,15 @@ static inline void __activate_traps_fpsimd32(struct kvm_vcpu *vcpu) \ compute_undef_clr_set(vcpu, kvm, reg, c, s); \ \ - s |= set; \ - c |= clr; \ - if (c || s) { \ - u64 val = __ ## reg ## _nMASK; \ - val |= s; \ - val &= ~c; \ - write_sysreg_s(val, SYS_ ## reg); \ - } \ + val = m->nmask; \ + val |= s; \ + val &= ~c; \ + write_sysreg_s(val, SYS_ ## reg); \ } while(0) #define update_fgt_traps(hctxt, vcpu, kvm, reg) \ update_fgt_traps_cs(hctxt, vcpu, kvm, reg, 0, 0) -/* - * Validate the fine grain trap masks. - * Check that the masks do not overlap and that all bits are accounted for. - */ -#define CHECK_FGT_MASKS(reg) \ - do { \ - BUILD_BUG_ON((__ ## reg ## _MASK) & (__ ## reg ## _nMASK)); \ - BUILD_BUG_ON(~((__ ## reg ## _RES0) ^ (__ ## reg ## _MASK) ^ \ - (__ ## reg ## _nMASK))); \ - } while(0) - static inline bool cpu_has_amu(void) { u64 pfr0 = read_sysreg_s(SYS_ID_AA64PFR0_EL1); @@ -152,56 +325,60 @@ static inline void __activate_traps_hfgxtr(struct kvm_vcpu *vcpu) struct kvm_cpu_context *hctxt = host_data_ptr(host_ctxt); struct kvm *kvm = kern_hyp_va(vcpu->kvm); - CHECK_FGT_MASKS(HFGRTR_EL2); - CHECK_FGT_MASKS(HFGWTR_EL2); - CHECK_FGT_MASKS(HFGITR_EL2); - CHECK_FGT_MASKS(HDFGRTR_EL2); - CHECK_FGT_MASKS(HDFGWTR_EL2); - CHECK_FGT_MASKS(HAFGRTR_EL2); - CHECK_FGT_MASKS(HCRX_EL2); - if (!cpus_have_final_cap(ARM64_HAS_FGT)) return; update_fgt_traps(hctxt, vcpu, kvm, HFGRTR_EL2); update_fgt_traps_cs(hctxt, vcpu, kvm, HFGWTR_EL2, 0, cpus_have_final_cap(ARM64_WORKAROUND_AMPERE_AC03_CPU_38) ? - HFGxTR_EL2_TCR_EL1_MASK : 0); + HFGWTR_EL2_TCR_EL1_MASK : 0); update_fgt_traps(hctxt, vcpu, kvm, HFGITR_EL2); update_fgt_traps(hctxt, vcpu, kvm, HDFGRTR_EL2); update_fgt_traps(hctxt, vcpu, kvm, HDFGWTR_EL2); if (cpu_has_amu()) update_fgt_traps(hctxt, vcpu, kvm, HAFGRTR_EL2); + + if (!cpus_have_final_cap(ARM64_HAS_FGT2)) + return; + + update_fgt_traps(hctxt, vcpu, kvm, HFGRTR2_EL2); + update_fgt_traps(hctxt, vcpu, kvm, HFGWTR2_EL2); + update_fgt_traps(hctxt, vcpu, kvm, HFGITR2_EL2); + update_fgt_traps(hctxt, vcpu, kvm, HDFGRTR2_EL2); + update_fgt_traps(hctxt, vcpu, kvm, HDFGWTR2_EL2); } -#define __deactivate_fgt(htcxt, vcpu, kvm, reg) \ +#define __deactivate_fgt(htcxt, vcpu, reg) \ do { \ - if ((vcpu_has_nv(vcpu) && !is_hyp_ctxt(vcpu)) || \ - kvm->arch.fgu[reg_to_fgt_group_id(reg)]) \ - write_sysreg_s(ctxt_sys_reg(hctxt, reg), \ - SYS_ ## reg); \ + write_sysreg_s(ctxt_sys_reg(hctxt, reg), \ + SYS_ ## reg); \ } while(0) static inline void __deactivate_traps_hfgxtr(struct kvm_vcpu *vcpu) { struct kvm_cpu_context *hctxt = host_data_ptr(host_ctxt); - struct kvm *kvm = kern_hyp_va(vcpu->kvm); if (!cpus_have_final_cap(ARM64_HAS_FGT)) return; - __deactivate_fgt(hctxt, vcpu, kvm, HFGRTR_EL2); - if (cpus_have_final_cap(ARM64_WORKAROUND_AMPERE_AC03_CPU_38)) - write_sysreg_s(ctxt_sys_reg(hctxt, HFGWTR_EL2), SYS_HFGWTR_EL2); - else - __deactivate_fgt(hctxt, vcpu, kvm, HFGWTR_EL2); - __deactivate_fgt(hctxt, vcpu, kvm, HFGITR_EL2); - __deactivate_fgt(hctxt, vcpu, kvm, HDFGRTR_EL2); - __deactivate_fgt(hctxt, vcpu, kvm, HDFGWTR_EL2); + __deactivate_fgt(hctxt, vcpu, HFGRTR_EL2); + __deactivate_fgt(hctxt, vcpu, HFGWTR_EL2); + __deactivate_fgt(hctxt, vcpu, HFGITR_EL2); + __deactivate_fgt(hctxt, vcpu, HDFGRTR_EL2); + __deactivate_fgt(hctxt, vcpu, HDFGWTR_EL2); if (cpu_has_amu()) - __deactivate_fgt(hctxt, vcpu, kvm, HAFGRTR_EL2); + __deactivate_fgt(hctxt, vcpu, HAFGRTR_EL2); + + if (!cpus_have_final_cap(ARM64_HAS_FGT2)) + return; + + __deactivate_fgt(hctxt, vcpu, HFGRTR2_EL2); + __deactivate_fgt(hctxt, vcpu, HFGWTR2_EL2); + __deactivate_fgt(hctxt, vcpu, HFGITR2_EL2); + __deactivate_fgt(hctxt, vcpu, HDFGRTR2_EL2); + __deactivate_fgt(hctxt, vcpu, HDFGWTR2_EL2); } static inline void __activate_traps_mpam(struct kvm_vcpu *vcpu) @@ -235,6 +412,8 @@ static inline void __deactivate_traps_mpam(void) static inline void __activate_traps_common(struct kvm_vcpu *vcpu) { + struct kvm_cpu_context *hctxt = host_data_ptr(host_ctxt); + /* Trap on AArch32 cp15 c15 (impdef sysregs) accesses (EL1 or EL0) */ write_sysreg(1 << 15, hstr_el2); @@ -244,12 +423,9 @@ static inline void __activate_traps_common(struct kvm_vcpu *vcpu) * counter, which could make a PMXEVCNTR_EL0 access UNDEF at * EL1 instead of being trapped to EL2. */ - if (kvm_arm_support_pmu_v3()) { - struct kvm_cpu_context *hctxt; - + if (system_supports_pmuv3()) { write_sysreg(0, pmselr_el0); - hctxt = host_data_ptr(host_ctxt); ctxt_sys_reg(hctxt, PMUSERENR_EL0) = read_sysreg(pmuserenr_el0); write_sysreg(ARMV8_PMU_USERENR_MASK, pmuserenr_el0); vcpu_set_flag(vcpu, PMUSERENR_ON_CPU); @@ -261,14 +437,12 @@ static inline void __activate_traps_common(struct kvm_vcpu *vcpu) if (cpus_have_final_cap(ARM64_HAS_HCX)) { u64 hcrx = vcpu->arch.hcrx_el2; if (vcpu_has_nv(vcpu) && !is_hyp_ctxt(vcpu)) { - u64 clr = 0, set = 0; - - compute_clr_set(vcpu, HCRX_EL2, clr, set); - - hcrx |= set; - hcrx &= ~clr; + u64 val = __vcpu_sys_reg(vcpu, HCRX_EL2); + hcrx |= val & __HCRX_EL2_MASK; + hcrx &= ~(~val & __HCRX_EL2_nMASK); } + ctxt_sys_reg(hctxt, HCRX_EL2) = read_sysreg_s(SYS_HCRX_EL2); write_sysreg_s(hcrx, SYS_HCRX_EL2); } @@ -278,19 +452,18 @@ static inline void __activate_traps_common(struct kvm_vcpu *vcpu) static inline void __deactivate_traps_common(struct kvm_vcpu *vcpu) { + struct kvm_cpu_context *hctxt = host_data_ptr(host_ctxt); + write_sysreg(*host_data_ptr(host_debug_state.mdcr_el2), mdcr_el2); write_sysreg(0, hstr_el2); - if (kvm_arm_support_pmu_v3()) { - struct kvm_cpu_context *hctxt; - - hctxt = host_data_ptr(host_ctxt); + if (system_supports_pmuv3()) { write_sysreg(ctxt_sys_reg(hctxt, PMUSERENR_EL0), pmuserenr_el0); vcpu_clear_flag(vcpu, PMUSERENR_ON_CPU); } if (cpus_have_final_cap(ARM64_HAS_HCX)) - write_sysreg_s(HCRX_HOST_FLAGS, SYS_HCRX_EL2); + write_sysreg_s(ctxt_sys_reg(hctxt, HCRX_EL2), SYS_HCRX_EL2); __deactivate_traps_hfgxtr(vcpu); __deactivate_traps_mpam(); @@ -301,7 +474,7 @@ static inline void ___activate_traps(struct kvm_vcpu *vcpu, u64 hcr) if (cpus_have_final_cap(ARM64_WORKAROUND_CAVIUM_TX2_219_TVM)) hcr |= HCR_TVM; - write_sysreg(hcr, hcr_el2); + write_sysreg_hcr(hcr); if (cpus_have_final_cap(ARM64_HAS_RAS_EXTN) && (hcr & HCR_VSE)) write_sysreg_s(vcpu->arch.vsesr_el2, SYS_VSESR_EL2); @@ -326,7 +499,7 @@ static inline bool __populate_fault_info(struct kvm_vcpu *vcpu) return __get_fault_info(vcpu->arch.fault.esr_el2, &vcpu->arch.fault); } -static bool kvm_hyp_handle_mops(struct kvm_vcpu *vcpu, u64 *exit_code) +static inline bool kvm_hyp_handle_mops(struct kvm_vcpu *vcpu, u64 *exit_code) { *vcpu_pc(vcpu) = read_sysreg_el2(SYS_ELR); arm64_mops_reset_regs(vcpu_gp_regs(vcpu), vcpu->arch.fault.esr_el2); @@ -375,7 +548,82 @@ static inline void __hyp_sve_save_host(void) true); } -static void kvm_hyp_save_fpsimd_host(struct kvm_vcpu *vcpu); +static inline void fpsimd_lazy_switch_to_guest(struct kvm_vcpu *vcpu) +{ + u64 zcr_el1, zcr_el2; + + if (!guest_owns_fp_regs()) + return; + + if (vcpu_has_sve(vcpu)) { + /* A guest hypervisor may restrict the effective max VL. */ + if (vcpu_has_nv(vcpu) && !is_hyp_ctxt(vcpu)) + zcr_el2 = __vcpu_sys_reg(vcpu, ZCR_EL2); + else + zcr_el2 = vcpu_sve_max_vq(vcpu) - 1; + + write_sysreg_el2(zcr_el2, SYS_ZCR); + + zcr_el1 = __vcpu_sys_reg(vcpu, vcpu_sve_zcr_elx(vcpu)); + write_sysreg_el1(zcr_el1, SYS_ZCR); + } +} + +static inline void fpsimd_lazy_switch_to_host(struct kvm_vcpu *vcpu) +{ + u64 zcr_el1, zcr_el2; + + if (!guest_owns_fp_regs()) + return; + + /* + * When the guest owns the FP regs, we know that guest+hyp traps for + * any FPSIMD/SVE/SME features exposed to the guest have been disabled + * by either fpsimd_lazy_switch_to_guest() or kvm_hyp_handle_fpsimd() + * prior to __guest_entry(). As __guest_entry() guarantees a context + * synchronization event, we don't need an ISB here to avoid taking + * traps for anything that was exposed to the guest. + */ + if (vcpu_has_sve(vcpu)) { + zcr_el1 = read_sysreg_el1(SYS_ZCR); + __vcpu_assign_sys_reg(vcpu, vcpu_sve_zcr_elx(vcpu), zcr_el1); + + /* + * The guest's state is always saved using the guest's max VL. + * Ensure that the host has the guest's max VL active such that + * the host can save the guest's state lazily, but don't + * artificially restrict the host to the guest's max VL. + */ + if (has_vhe()) { + zcr_el2 = vcpu_sve_max_vq(vcpu) - 1; + write_sysreg_el2(zcr_el2, SYS_ZCR); + } else { + zcr_el2 = sve_vq_from_vl(kvm_host_sve_max_vl) - 1; + write_sysreg_el2(zcr_el2, SYS_ZCR); + + zcr_el1 = vcpu_sve_max_vq(vcpu) - 1; + write_sysreg_el1(zcr_el1, SYS_ZCR); + } + } +} + +static void kvm_hyp_save_fpsimd_host(struct kvm_vcpu *vcpu) +{ + /* + * Non-protected kvm relies on the host restoring its sve state. + * Protected kvm restores the host's sve state as not to reveal that + * fpsimd was used by a guest nor leak upper sve bits. + */ + if (system_supports_sve()) { + __hyp_sve_save_host(); + } else { + __fpsimd_save_state(host_data_ptr(host_ctxt.fp_regs)); + } + + if (kvm_has_fpmr(kern_hyp_va(vcpu->kvm))) + *host_data_ptr(fpmr) = read_sysreg_s(SYS_FPMR); +} + /* * We trap the first access to the FP/SIMD to save the host context and @@ -383,7 +631,7 @@ static void kvm_hyp_save_fpsimd_host(struct kvm_vcpu *vcpu); * If FP/SIMD is not implemented, handle the trap and inject an undefined * instruction exception to the guest. Similarly for trapped SVE accesses. */ -static bool kvm_hyp_handle_fpsimd(struct kvm_vcpu *vcpu, u64 *exit_code) +static inline bool kvm_hyp_handle_fpsimd(struct kvm_vcpu *vcpu, u64 *exit_code) { bool sve_guest; u8 esr_ec; @@ -418,14 +666,11 @@ static bool kvm_hyp_handle_fpsimd(struct kvm_vcpu *vcpu, u64 *exit_code) /* Valid trap. Switch the context: */ /* First disable enough traps to allow us to update the registers */ - if (sve_guest || (is_protected_kvm_enabled() && system_supports_sve())) - cpacr_clear_set(0, CPACR_ELx_FPEN | CPACR_ELx_ZEN); - else - cpacr_clear_set(0, CPACR_ELx_FPEN); + __deactivate_cptr_traps(vcpu); isb(); /* Write out the host state if it's in the registers */ - if (host_owns_fp_regs()) + if (is_protected_kvm_enabled() && host_owns_fp_regs()) kvm_hyp_save_fpsimd_host(vcpu); /* Restore the guest state */ @@ -443,6 +688,13 @@ static bool kvm_hyp_handle_fpsimd(struct kvm_vcpu *vcpu, u64 *exit_code) *host_data_ptr(fp_owner) = FP_STATE_GUEST_OWNED; + /* + * Re-enable traps necessary for the current state of the guest, e.g. + * those enabled by a guest hypervisor. The ERET to the guest will + * provide the necessary context synchronization. + */ + __activate_cptr_traps(vcpu); + return true; } @@ -501,7 +753,25 @@ static inline bool handle_tx2_tvm(struct kvm_vcpu *vcpu) return true; } -static bool kvm_hyp_handle_cntpct(struct kvm_vcpu *vcpu) +/* Open-coded version of timer_get_offset() to allow for kern_hyp_va() */ +static inline u64 hyp_timer_get_offset(struct arch_timer_context *ctxt) +{ + u64 offset = 0; + + if (ctxt->offset.vm_offset) + offset += *kern_hyp_va(ctxt->offset.vm_offset); + if (ctxt->offset.vcpu_offset) + offset += *kern_hyp_va(ctxt->offset.vcpu_offset); + + return offset; +} + +static inline u64 compute_counter_value(struct arch_timer_context *ctxt) +{ + return arch_timer_read_cntpct_el0() - hyp_timer_get_offset(ctxt); +} + +static bool kvm_handle_cntxct(struct kvm_vcpu *vcpu) { struct arch_timer_context *ctxt; u32 sysreg; @@ -511,18 +781,19 @@ static bool kvm_hyp_handle_cntpct(struct kvm_vcpu *vcpu) * We only get here for 64bit guests, 32bit guests will hit * the long and winding road all the way to the standard * handling. Yes, it sucks to be irrelevant. + * + * Also, we only deal with non-hypervisor context here (either + * an EL1 guest, or a non-HYP context of an EL2 guest). */ + if (is_hyp_ctxt(vcpu)) + return false; + sysreg = esr_sys64_to_sysreg(kvm_vcpu_get_esr(vcpu)); switch (sysreg) { case SYS_CNTPCT_EL0: case SYS_CNTPCTSS_EL0: if (vcpu_has_nv(vcpu)) { - if (is_hyp_ctxt(vcpu)) { - ctxt = vcpu_hptimer(vcpu); - break; - } - /* Check for guest hypervisor trapping */ val = __vcpu_sys_reg(vcpu, CNTHCTL_EL2); if (!vcpu_el2_e2h_is_set(vcpu)) @@ -534,16 +805,23 @@ static bool kvm_hyp_handle_cntpct(struct kvm_vcpu *vcpu) ctxt = vcpu_ptimer(vcpu); break; + case SYS_CNTVCT_EL0: + case SYS_CNTVCTSS_EL0: + if (vcpu_has_nv(vcpu)) { + /* Check for guest hypervisor trapping */ + val = __vcpu_sys_reg(vcpu, CNTHCTL_EL2); + + if (val & CNTHCTL_EL1TVCT) + return false; + } + + ctxt = vcpu_vtimer(vcpu); + break; default: return false; } - val = arch_timer_read_cntpct_el0(); - - if (ctxt->offset.vm_offset) - val -= *kern_hyp_va(ctxt->offset.vm_offset); - if (ctxt->offset.vcpu_offset) - val -= *kern_hyp_va(ctxt->offset.vcpu_offset); + val = compute_counter_value(ctxt); vcpu_set_reg(vcpu, kvm_vcpu_sys_get_rt(vcpu), val); __kvm_skip_instr(vcpu); @@ -574,7 +852,7 @@ static bool handle_ampere1_tcr(struct kvm_vcpu *vcpu) return true; } -static bool kvm_hyp_handle_sysreg(struct kvm_vcpu *vcpu, u64 *exit_code) +static inline bool kvm_hyp_handle_sysreg(struct kvm_vcpu *vcpu, u64 *exit_code) { if (cpus_have_final_cap(ARM64_WORKAROUND_CAVIUM_TX2_219_TVM) && handle_tx2_tvm(vcpu)) @@ -588,13 +866,13 @@ static bool kvm_hyp_handle_sysreg(struct kvm_vcpu *vcpu, u64 *exit_code) __vgic_v3_perform_cpuif_access(vcpu) == 1) return true; - if (kvm_hyp_handle_cntpct(vcpu)) + if (kvm_handle_cntxct(vcpu)) return true; return false; } -static bool kvm_hyp_handle_cp15_32(struct kvm_vcpu *vcpu, u64 *exit_code) +static inline bool kvm_hyp_handle_cp15_32(struct kvm_vcpu *vcpu, u64 *exit_code) { if (static_branch_unlikely(&vgic_v3_cpuif_trap) && __vgic_v3_perform_cpuif_access(vcpu) == 1) @@ -603,19 +881,18 @@ static bool kvm_hyp_handle_cp15_32(struct kvm_vcpu *vcpu, u64 *exit_code) return false; } -static bool kvm_hyp_handle_memory_fault(struct kvm_vcpu *vcpu, u64 *exit_code) +static inline bool kvm_hyp_handle_memory_fault(struct kvm_vcpu *vcpu, + u64 *exit_code) { if (!__populate_fault_info(vcpu)) return true; return false; } -static bool kvm_hyp_handle_iabt_low(struct kvm_vcpu *vcpu, u64 *exit_code) - __alias(kvm_hyp_handle_memory_fault); -static bool kvm_hyp_handle_watchpt_low(struct kvm_vcpu *vcpu, u64 *exit_code) - __alias(kvm_hyp_handle_memory_fault); +#define kvm_hyp_handle_iabt_low kvm_hyp_handle_memory_fault +#define kvm_hyp_handle_watchpt_low kvm_hyp_handle_memory_fault -static bool kvm_hyp_handle_dabt_low(struct kvm_vcpu *vcpu, u64 *exit_code) +static inline bool kvm_hyp_handle_dabt_low(struct kvm_vcpu *vcpu, u64 *exit_code) { if (kvm_hyp_handle_memory_fault(vcpu, exit_code)) return true; @@ -645,23 +922,16 @@ static bool kvm_hyp_handle_dabt_low(struct kvm_vcpu *vcpu, u64 *exit_code) typedef bool (*exit_handler_fn)(struct kvm_vcpu *, u64 *); -static const exit_handler_fn *kvm_get_exit_handler_array(struct kvm_vcpu *vcpu); - -static void early_exit_filter(struct kvm_vcpu *vcpu, u64 *exit_code); - /* * Allow the hypervisor to handle the exit with an exit handler if it has one. * * Returns true if the hypervisor handled the exit, and control should go back * to the guest, or false if it hasn't. */ -static inline bool kvm_hyp_handle_exit(struct kvm_vcpu *vcpu, u64 *exit_code) +static inline bool kvm_hyp_handle_exit(struct kvm_vcpu *vcpu, u64 *exit_code, + const exit_handler_fn *handlers) { - const exit_handler_fn *handlers = kvm_get_exit_handler_array(vcpu); - exit_handler_fn fn; - - fn = handlers[kvm_vcpu_trap_get_class(vcpu)]; - + exit_handler_fn fn = handlers[kvm_vcpu_trap_get_class(vcpu)]; if (fn) return fn(vcpu, exit_code); @@ -691,20 +961,9 @@ static inline void synchronize_vcpu_pstate(struct kvm_vcpu *vcpu, u64 *exit_code * the guest, false when we should restore the host state and return to the * main run loop. */ -static inline bool fixup_guest_exit(struct kvm_vcpu *vcpu, u64 *exit_code) +static inline bool __fixup_guest_exit(struct kvm_vcpu *vcpu, u64 *exit_code, + const exit_handler_fn *handlers) { - /* - * Save PSTATE early so that we can evaluate the vcpu mode - * early on. - */ - synchronize_vcpu_pstate(vcpu, exit_code); - - /* - * Check whether we want to repaint the state one way or - * another. - */ - early_exit_filter(vcpu, exit_code); - if (ARM_EXCEPTION_CODE(*exit_code) != ARM_EXCEPTION_IRQ) vcpu->arch.fault.esr_el2 = read_sysreg_el2(SYS_ESR); @@ -734,7 +993,7 @@ static inline bool fixup_guest_exit(struct kvm_vcpu *vcpu, u64 *exit_code) goto exit; /* Check if there's an exit handler and allow it to handle the exit. */ - if (kvm_hyp_handle_exit(vcpu, exit_code)) + if (kvm_hyp_handle_exit(vcpu, exit_code, handlers)) goto guest; exit: /* Return to the host kernel and handle the exit */ diff --git a/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h b/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h index a651c43ad679..4d0dbea4c56f 100644 --- a/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h +++ b/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h @@ -18,9 +18,45 @@ static inline bool ctxt_has_s1poe(struct kvm_cpu_context *ctxt); +static inline struct kvm_vcpu *ctxt_to_vcpu(struct kvm_cpu_context *ctxt) +{ + struct kvm_vcpu *vcpu = ctxt->__hyp_running_vcpu; + + if (!vcpu) + vcpu = container_of(ctxt, struct kvm_vcpu, arch.ctxt); + + return vcpu; +} + +static inline bool ctxt_is_guest(struct kvm_cpu_context *ctxt) +{ + return host_data_ptr(host_ctxt) != ctxt; +} + +static inline u64 *ctxt_mdscr_el1(struct kvm_cpu_context *ctxt) +{ + struct kvm_vcpu *vcpu = ctxt_to_vcpu(ctxt); + + if (ctxt_is_guest(ctxt) && kvm_host_owns_debug_regs(vcpu)) + return &vcpu->arch.external_mdscr_el1; + + return &ctxt_sys_reg(ctxt, MDSCR_EL1); +} + +static inline u64 ctxt_midr_el1(struct kvm_cpu_context *ctxt) +{ + struct kvm *kvm = kern_hyp_va(ctxt_to_vcpu(ctxt)->kvm); + + if (!(ctxt_is_guest(ctxt) && + test_bit(KVM_ARCH_FLAG_WRITABLE_IMP_ID_REGS, &kvm->arch.flags))) + return read_cpuid_id(); + + return kvm_read_vm_id_reg(kvm, SYS_MIDR_EL1); +} + static inline void __sysreg_save_common_state(struct kvm_cpu_context *ctxt) { - ctxt_sys_reg(ctxt, MDSCR_EL1) = read_sysreg(mdscr_el1); + *ctxt_mdscr_el1(ctxt) = read_sysreg(mdscr_el1); // POR_EL0 can affect uaccess, so must be saved/restored early. if (ctxt_has_s1poe(ctxt)) @@ -33,16 +69,6 @@ static inline void __sysreg_save_user_state(struct kvm_cpu_context *ctxt) ctxt_sys_reg(ctxt, TPIDRRO_EL0) = read_sysreg(tpidrro_el0); } -static inline struct kvm_vcpu *ctxt_to_vcpu(struct kvm_cpu_context *ctxt) -{ - struct kvm_vcpu *vcpu = ctxt->__hyp_running_vcpu; - - if (!vcpu) - vcpu = container_of(ctxt, struct kvm_vcpu, arch.ctxt); - - return vcpu; -} - static inline bool ctxt_has_mte(struct kvm_cpu_context *ctxt) { struct kvm_vcpu *vcpu = ctxt_to_vcpu(ctxt); @@ -139,7 +165,7 @@ static inline void __sysreg_save_el2_return_state(struct kvm_cpu_context *ctxt) static inline void __sysreg_restore_common_state(struct kvm_cpu_context *ctxt) { - write_sysreg(ctxt_sys_reg(ctxt, MDSCR_EL1), mdscr_el1); + write_sysreg(*ctxt_mdscr_el1(ctxt), mdscr_el1); // POR_EL0 can affect uaccess, so must be saved/restored early. if (ctxt_has_s1poe(ctxt)) @@ -153,8 +179,9 @@ static inline void __sysreg_restore_user_state(struct kvm_cpu_context *ctxt) } static inline void __sysreg_restore_el1_state(struct kvm_cpu_context *ctxt, - u64 mpidr) + u64 midr, u64 mpidr) { + write_sysreg(midr, vpidr_el2); write_sysreg(mpidr, vmpidr_el2); if (has_vhe() || @@ -280,11 +307,11 @@ static inline void __sysreg32_save_state(struct kvm_vcpu *vcpu) vcpu->arch.ctxt.spsr_irq = read_sysreg(spsr_irq); vcpu->arch.ctxt.spsr_fiq = read_sysreg(spsr_fiq); - __vcpu_sys_reg(vcpu, DACR32_EL2) = read_sysreg(dacr32_el2); - __vcpu_sys_reg(vcpu, IFSR32_EL2) = read_sysreg(ifsr32_el2); + __vcpu_assign_sys_reg(vcpu, DACR32_EL2, read_sysreg(dacr32_el2)); + __vcpu_assign_sys_reg(vcpu, IFSR32_EL2, read_sysreg(ifsr32_el2)); - if (has_vhe() || vcpu_get_flag(vcpu, DEBUG_DIRTY)) - __vcpu_sys_reg(vcpu, DBGVCR32_EL2) = read_sysreg(dbgvcr32_el2); + if (has_vhe() || kvm_debug_regs_in_use(vcpu)) + __vcpu_assign_sys_reg(vcpu, DBGVCR32_EL2, read_sysreg(dbgvcr32_el2)); } static inline void __sysreg32_restore_state(struct kvm_vcpu *vcpu) @@ -300,7 +327,7 @@ static inline void __sysreg32_restore_state(struct kvm_vcpu *vcpu) write_sysreg(__vcpu_sys_reg(vcpu, DACR32_EL2), dacr32_el2); write_sysreg(__vcpu_sys_reg(vcpu, IFSR32_EL2), ifsr32_el2); - if (has_vhe() || vcpu_get_flag(vcpu, DEBUG_DIRTY)) + if (has_vhe() || kvm_debug_regs_in_use(vcpu)) write_sysreg(__vcpu_sys_reg(vcpu, DBGVCR32_EL2), dbgvcr32_el2); } diff --git a/arch/arm64/kvm/hyp/include/nvhe/fixed_config.h b/arch/arm64/kvm/hyp/include/nvhe/fixed_config.h deleted file mode 100644 index f957890c7e38..000000000000 --- a/arch/arm64/kvm/hyp/include/nvhe/fixed_config.h +++ /dev/null @@ -1,223 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Copyright (C) 2021 Google LLC - * Author: Fuad Tabba <tabba@google.com> - */ - -#ifndef __ARM64_KVM_FIXED_CONFIG_H__ -#define __ARM64_KVM_FIXED_CONFIG_H__ - -#include <asm/sysreg.h> - -/* - * This file contains definitions for features to be allowed or restricted for - * guest virtual machines, depending on the mode KVM is running in and on the - * type of guest that is running. - * - * The ALLOW masks represent a bitmask of feature fields that are allowed - * without any restrictions as long as they are supported by the system. - * - * The RESTRICT_UNSIGNED masks, if present, represent unsigned fields for - * features that are restricted to support at most the specified feature. - * - * If a feature field is not present in either, than it is not supported. - * - * The approach taken for protected VMs is to allow features that are: - * - Needed by common Linux distributions (e.g., floating point) - * - Trivial to support, e.g., supporting the feature does not introduce or - * require tracking of additional state in KVM - * - Cannot be trapped or prevent the guest from using anyway - */ - -/* - * Allow for protected VMs: - * - Floating-point and Advanced SIMD - * - Data Independent Timing - * - Spectre/Meltdown Mitigation - */ -#define PVM_ID_AA64PFR0_ALLOW (\ - ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_FP) | \ - ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_AdvSIMD) | \ - ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_DIT) | \ - ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_CSV2) | \ - ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_CSV3) \ - ) - -/* - * Restrict to the following *unsigned* features for protected VMs: - * - AArch64 guests only (no support for AArch32 guests): - * AArch32 adds complexity in trap handling, emulation, condition codes, - * etc... - * - RAS (v1) - * Supported by KVM - */ -#define PVM_ID_AA64PFR0_RESTRICT_UNSIGNED (\ - SYS_FIELD_PREP_ENUM(ID_AA64PFR0_EL1, EL0, IMP) | \ - SYS_FIELD_PREP_ENUM(ID_AA64PFR0_EL1, EL1, IMP) | \ - SYS_FIELD_PREP_ENUM(ID_AA64PFR0_EL1, EL2, IMP) | \ - SYS_FIELD_PREP_ENUM(ID_AA64PFR0_EL1, EL3, IMP) | \ - SYS_FIELD_PREP_ENUM(ID_AA64PFR0_EL1, RAS, IMP) \ - ) - -/* - * Allow for protected VMs: - * - Branch Target Identification - * - Speculative Store Bypassing - */ -#define PVM_ID_AA64PFR1_ALLOW (\ - ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_BT) | \ - ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_SSBS) \ - ) - -#define PVM_ID_AA64PFR2_ALLOW 0ULL - -/* - * Allow for protected VMs: - * - Mixed-endian - * - Distinction between Secure and Non-secure Memory - * - Mixed-endian at EL0 only - * - Non-context synchronizing exception entry and exit - */ -#define PVM_ID_AA64MMFR0_ALLOW (\ - ARM64_FEATURE_MASK(ID_AA64MMFR0_EL1_BIGEND) | \ - ARM64_FEATURE_MASK(ID_AA64MMFR0_EL1_SNSMEM) | \ - ARM64_FEATURE_MASK(ID_AA64MMFR0_EL1_BIGENDEL0) | \ - ARM64_FEATURE_MASK(ID_AA64MMFR0_EL1_EXS) \ - ) - -/* - * Restrict to the following *unsigned* features for protected VMs: - * - 40-bit IPA - * - 16-bit ASID - */ -#define PVM_ID_AA64MMFR0_RESTRICT_UNSIGNED (\ - FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64MMFR0_EL1_PARANGE), ID_AA64MMFR0_EL1_PARANGE_40) | \ - FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64MMFR0_EL1_ASIDBITS), ID_AA64MMFR0_EL1_ASIDBITS_16) \ - ) - -/* - * Allow for protected VMs: - * - Hardware translation table updates to Access flag and Dirty state - * - Number of VMID bits from CPU - * - Hierarchical Permission Disables - * - Privileged Access Never - * - SError interrupt exceptions from speculative reads - * - Enhanced Translation Synchronization - * - Control for cache maintenance permission - */ -#define PVM_ID_AA64MMFR1_ALLOW (\ - ARM64_FEATURE_MASK(ID_AA64MMFR1_EL1_HAFDBS) | \ - ARM64_FEATURE_MASK(ID_AA64MMFR1_EL1_VMIDBits) | \ - ARM64_FEATURE_MASK(ID_AA64MMFR1_EL1_HPDS) | \ - ARM64_FEATURE_MASK(ID_AA64MMFR1_EL1_PAN) | \ - ARM64_FEATURE_MASK(ID_AA64MMFR1_EL1_SpecSEI) | \ - ARM64_FEATURE_MASK(ID_AA64MMFR1_EL1_ETS) | \ - ARM64_FEATURE_MASK(ID_AA64MMFR1_EL1_CMOW) \ - ) - -/* - * Allow for protected VMs: - * - Common not Private translations - * - User Access Override - * - IESB bit in the SCTLR_ELx registers - * - Unaligned single-copy atomicity and atomic functions - * - ESR_ELx.EC value on an exception by read access to feature ID space - * - TTL field in address operations. - * - Break-before-make sequences when changing translation block size - * - E0PDx mechanism - */ -#define PVM_ID_AA64MMFR2_ALLOW (\ - ARM64_FEATURE_MASK(ID_AA64MMFR2_EL1_CnP) | \ - ARM64_FEATURE_MASK(ID_AA64MMFR2_EL1_UAO) | \ - ARM64_FEATURE_MASK(ID_AA64MMFR2_EL1_IESB) | \ - ARM64_FEATURE_MASK(ID_AA64MMFR2_EL1_AT) | \ - ARM64_FEATURE_MASK(ID_AA64MMFR2_EL1_IDS) | \ - ARM64_FEATURE_MASK(ID_AA64MMFR2_EL1_TTL) | \ - ARM64_FEATURE_MASK(ID_AA64MMFR2_EL1_BBM) | \ - ARM64_FEATURE_MASK(ID_AA64MMFR2_EL1_E0PD) \ - ) - -#define PVM_ID_AA64MMFR3_ALLOW (0ULL) - -/* - * No support for Scalable Vectors for protected VMs: - * Requires additional support from KVM, e.g., context-switching and - * trapping at EL2 - */ -#define PVM_ID_AA64ZFR0_ALLOW (0ULL) - -/* - * No support for debug, including breakpoints, and watchpoints for protected - * VMs: - * The Arm architecture mandates support for at least the Armv8 debug - * architecture, which would include at least 2 hardware breakpoints and - * watchpoints. Providing that support to protected guests adds - * considerable state and complexity. Therefore, the reserved value of 0 is - * used for debug-related fields. - */ -#define PVM_ID_AA64DFR0_ALLOW (0ULL) -#define PVM_ID_AA64DFR1_ALLOW (0ULL) - -/* - * No support for implementation defined features. - */ -#define PVM_ID_AA64AFR0_ALLOW (0ULL) -#define PVM_ID_AA64AFR1_ALLOW (0ULL) - -/* - * No restrictions on instructions implemented in AArch64. - */ -#define PVM_ID_AA64ISAR0_ALLOW (\ - ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_AES) | \ - ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_SHA1) | \ - ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_SHA2) | \ - ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_CRC32) | \ - ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_ATOMIC) | \ - ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_RDM) | \ - ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_SHA3) | \ - ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_SM3) | \ - ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_SM4) | \ - ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_DP) | \ - ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_FHM) | \ - ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_TS) | \ - ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_TLB) | \ - ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_RNDR) \ - ) - -/* Restrict pointer authentication to the basic version. */ -#define PVM_ID_AA64ISAR1_RESTRICT_UNSIGNED (\ - FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_APA), ID_AA64ISAR1_EL1_APA_PAuth) | \ - FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_API), ID_AA64ISAR1_EL1_API_PAuth) \ - ) - -#define PVM_ID_AA64ISAR2_RESTRICT_UNSIGNED (\ - FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64ISAR2_EL1_APA3), ID_AA64ISAR2_EL1_APA3_PAuth) \ - ) - -#define PVM_ID_AA64ISAR1_ALLOW (\ - ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_DPB) | \ - ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_JSCVT) | \ - ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_FCMA) | \ - ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_LRCPC) | \ - ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_GPA) | \ - ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_GPI) | \ - ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_FRINTTS) | \ - ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_SB) | \ - ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_SPECRES) | \ - ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_BF16) | \ - ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_DGH) | \ - ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_I8MM) \ - ) - -#define PVM_ID_AA64ISAR2_ALLOW (\ - ARM64_FEATURE_MASK(ID_AA64ISAR2_EL1_ATS1A)| \ - ARM64_FEATURE_MASK(ID_AA64ISAR2_EL1_GPA3) | \ - ARM64_FEATURE_MASK(ID_AA64ISAR2_EL1_MOPS) \ - ) - -u64 pvm_read_id_reg(const struct kvm_vcpu *vcpu, u32 id); -bool kvm_handle_pvm_sysreg(struct kvm_vcpu *vcpu, u64 *exit_code); -bool kvm_handle_pvm_restricted(struct kvm_vcpu *vcpu, u64 *exit_code); -int kvm_check_pvm_sysreg_table(void); - -#endif /* __ARM64_KVM_FIXED_CONFIG_H__ */ diff --git a/arch/arm64/kvm/hyp/include/nvhe/gfp.h b/arch/arm64/kvm/hyp/include/nvhe/gfp.h index 97c527ef53c2..3766333bace9 100644 --- a/arch/arm64/kvm/hyp/include/nvhe/gfp.h +++ b/arch/arm64/kvm/hyp/include/nvhe/gfp.h @@ -7,7 +7,7 @@ #include <nvhe/memory.h> #include <nvhe/spinlock.h> -#define HYP_NO_ORDER USHRT_MAX +#define HYP_NO_ORDER ((u8)(~0)) struct hyp_pool { /* @@ -19,11 +19,11 @@ struct hyp_pool { struct list_head free_area[NR_PAGE_ORDERS]; phys_addr_t range_start; phys_addr_t range_end; - unsigned short max_order; + u8 max_order; }; /* Allocation */ -void *hyp_alloc_pages(struct hyp_pool *pool, unsigned short order); +void *hyp_alloc_pages(struct hyp_pool *pool, u8 order); void hyp_split_page(struct hyp_page *page); void hyp_get_page(struct hyp_pool *pool, void *addr); void hyp_put_page(struct hyp_pool *pool, void *addr); diff --git a/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h b/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h index 0972faccc2af..5f9d56754e39 100644 --- a/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h +++ b/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h @@ -11,40 +11,10 @@ #include <asm/kvm_mmu.h> #include <asm/kvm_pgtable.h> #include <asm/virt.h> +#include <nvhe/memory.h> #include <nvhe/pkvm.h> #include <nvhe/spinlock.h> -/* - * SW bits 0-1 are reserved to track the memory ownership state of each page: - * 00: The page is owned exclusively by the page-table owner. - * 01: The page is owned by the page-table owner, but is shared - * with another entity. - * 10: The page is shared with, but not owned by the page-table owner. - * 11: Reserved for future use (lending). - */ -enum pkvm_page_state { - PKVM_PAGE_OWNED = 0ULL, - PKVM_PAGE_SHARED_OWNED = KVM_PGTABLE_PROT_SW0, - PKVM_PAGE_SHARED_BORROWED = KVM_PGTABLE_PROT_SW1, - __PKVM_PAGE_RESERVED = KVM_PGTABLE_PROT_SW0 | - KVM_PGTABLE_PROT_SW1, - - /* Meta-states which aren't encoded directly in the PTE's SW bits */ - PKVM_NOPAGE, -}; - -#define PKVM_PAGE_STATE_PROT_MASK (KVM_PGTABLE_PROT_SW0 | KVM_PGTABLE_PROT_SW1) -static inline enum kvm_pgtable_prot pkvm_mkstate(enum kvm_pgtable_prot prot, - enum pkvm_page_state state) -{ - return (prot & ~PKVM_PAGE_STATE_PROT_MASK) | state; -} - -static inline enum pkvm_page_state pkvm_getstate(enum kvm_pgtable_prot prot) -{ - return prot & PKVM_PAGE_STATE_PROT_MASK; -} - struct host_mmu { struct kvm_arch arch; struct kvm_pgtable pgt; @@ -69,6 +39,13 @@ int __pkvm_host_donate_hyp(u64 pfn, u64 nr_pages); int __pkvm_hyp_donate_host(u64 pfn, u64 nr_pages); int __pkvm_host_share_ffa(u64 pfn, u64 nr_pages); int __pkvm_host_unshare_ffa(u64 pfn, u64 nr_pages); +int __pkvm_host_share_guest(u64 pfn, u64 gfn, u64 nr_pages, struct pkvm_hyp_vcpu *vcpu, + enum kvm_pgtable_prot prot); +int __pkvm_host_unshare_guest(u64 gfn, u64 nr_pages, struct pkvm_hyp_vm *hyp_vm); +int __pkvm_host_relax_perms_guest(u64 gfn, struct pkvm_hyp_vcpu *vcpu, enum kvm_pgtable_prot prot); +int __pkvm_host_wrprotect_guest(u64 gfn, u64 nr_pages, struct pkvm_hyp_vm *hyp_vm); +int __pkvm_host_test_clear_young_guest(u64 gfn, u64 nr_pages, bool mkold, struct pkvm_hyp_vm *vm); +int __pkvm_host_mkyoung_guest(u64 gfn, struct pkvm_hyp_vcpu *vcpu); bool addr_is_memory(phys_addr_t phys); int host_stage2_idmap_locked(phys_addr_t addr, u64 size, enum kvm_pgtable_prot prot); @@ -79,7 +56,7 @@ void handle_host_mem_abort(struct kvm_cpu_context *host_ctxt); int hyp_pin_shared_mem(void *from, void *to); void hyp_unpin_shared_mem(void *from, void *to); -void reclaim_guest_pages(struct pkvm_hyp_vm *vm, struct kvm_hyp_memcache *mc); +void reclaim_pgtable_pages(struct pkvm_hyp_vm *vm, struct kvm_hyp_memcache *mc); int refill_memcache(struct kvm_hyp_memcache *mc, unsigned long min_pages, struct kvm_hyp_memcache *host_mc); @@ -90,4 +67,10 @@ static __always_inline void __load_host_stage2(void) else write_sysreg(0, vttbr_el2); } + +#ifdef CONFIG_NVHE_EL2_DEBUG +void pkvm_ownership_selftest(void *base); +#else +static inline void pkvm_ownership_selftest(void *base) { } +#endif #endif /* __KVM_NVHE_MEM_PROTECT__ */ diff --git a/arch/arm64/kvm/hyp/include/nvhe/memory.h b/arch/arm64/kvm/hyp/include/nvhe/memory.h index ab205c4d6774..dee1a406b0c2 100644 --- a/arch/arm64/kvm/hyp/include/nvhe/memory.h +++ b/arch/arm64/kvm/hyp/include/nvhe/memory.h @@ -7,9 +7,61 @@ #include <linux/types.h> +/* + * Bits 0-1 are used to encode the memory ownership state of each page from the + * point of view of a pKVM "component" (host, hyp, guest, ... see enum + * pkvm_component_id): + * 00: The page is owned and exclusively accessible by the component; + * 01: The page is owned and accessible by the component, but is also + * accessible by another component; + * 10: The page is accessible but not owned by the component; + * The storage of this state depends on the component: either in the + * hyp_vmemmap for the host and hyp states or in PTE software bits for guests. + */ +enum pkvm_page_state { + PKVM_PAGE_OWNED = 0ULL, + PKVM_PAGE_SHARED_OWNED = BIT(0), + PKVM_PAGE_SHARED_BORROWED = BIT(1), + + /* + * 'Meta-states' are not stored directly in PTE SW bits for guest + * states, but inferred from the context (e.g. invalid PTE entries). + * For the host and hyp, meta-states are stored directly in the + * struct hyp_page. + */ + PKVM_NOPAGE = BIT(0) | BIT(1), +}; +#define PKVM_PAGE_STATE_MASK (BIT(0) | BIT(1)) + +#define PKVM_PAGE_STATE_PROT_MASK (KVM_PGTABLE_PROT_SW0 | KVM_PGTABLE_PROT_SW1) +static inline enum kvm_pgtable_prot pkvm_mkstate(enum kvm_pgtable_prot prot, + enum pkvm_page_state state) +{ + prot &= ~PKVM_PAGE_STATE_PROT_MASK; + prot |= FIELD_PREP(PKVM_PAGE_STATE_PROT_MASK, state); + return prot; +} + +static inline enum pkvm_page_state pkvm_getstate(enum kvm_pgtable_prot prot) +{ + return FIELD_GET(PKVM_PAGE_STATE_PROT_MASK, prot); +} + struct hyp_page { - unsigned short refcount; - unsigned short order; + u16 refcount; + u8 order; + + /* Host state. Guarded by the host stage-2 lock. */ + unsigned __host_state : 4; + + /* + * Complement of the hyp state. Guarded by the hyp stage-1 lock. We use + * the complement so that the initial 0 in __hyp_state_comp (due to the + * entire vmemmap starting off zeroed) encodes PKVM_NOPAGE. + */ + unsigned __hyp_state_comp : 4; + + u32 host_share_guest_count; }; extern u64 __hyp_vmemmap; @@ -29,7 +81,13 @@ static inline phys_addr_t hyp_virt_to_phys(void *addr) #define hyp_phys_to_pfn(phys) ((phys) >> PAGE_SHIFT) #define hyp_pfn_to_phys(pfn) ((phys_addr_t)((pfn) << PAGE_SHIFT)) -#define hyp_phys_to_page(phys) (&hyp_vmemmap[hyp_phys_to_pfn(phys)]) + +static inline struct hyp_page *hyp_phys_to_page(phys_addr_t phys) +{ + BUILD_BUG_ON(sizeof(struct hyp_page) != sizeof(u64)); + return &hyp_vmemmap[hyp_phys_to_pfn(phys)]; +} + #define hyp_virt_to_page(virt) hyp_phys_to_page(__hyp_pa(virt)) #define hyp_virt_to_pfn(virt) hyp_phys_to_pfn(__hyp_pa(virt)) @@ -38,6 +96,26 @@ static inline phys_addr_t hyp_virt_to_phys(void *addr) #define hyp_page_to_virt(page) __hyp_va(hyp_page_to_phys(page)) #define hyp_page_to_pool(page) (((struct hyp_page *)page)->pool) +static inline enum pkvm_page_state get_host_state(struct hyp_page *p) +{ + return p->__host_state; +} + +static inline void set_host_state(struct hyp_page *p, enum pkvm_page_state state) +{ + p->__host_state = state; +} + +static inline enum pkvm_page_state get_hyp_state(struct hyp_page *p) +{ + return p->__hyp_state_comp ^ PKVM_PAGE_STATE_MASK; +} + +static inline void set_hyp_state(struct hyp_page *p, enum pkvm_page_state state) +{ + p->__hyp_state_comp = state ^ PKVM_PAGE_STATE_MASK; +} + /* * Refcounting for 'struct hyp_page'. * hyp_pool::lock must be held if atomic access to the refcount is required. diff --git a/arch/arm64/kvm/hyp/include/nvhe/mm.h b/arch/arm64/kvm/hyp/include/nvhe/mm.h index 230e4f2527de..6e83ce35c2f2 100644 --- a/arch/arm64/kvm/hyp/include/nvhe/mm.h +++ b/arch/arm64/kvm/hyp/include/nvhe/mm.h @@ -13,9 +13,11 @@ extern struct kvm_pgtable pkvm_pgtable; extern hyp_spinlock_t pkvm_pgd_lock; -int hyp_create_pcpu_fixmap(void); +int hyp_create_fixmap(void); void *hyp_fixmap_map(phys_addr_t phys); void hyp_fixmap_unmap(void); +void *hyp_fixblock_map(phys_addr_t phys, size_t *size); +void hyp_fixblock_unmap(void); int hyp_create_idmap(u32 hyp_va_bits); int hyp_map_vectors(void); diff --git a/arch/arm64/kvm/hyp/include/nvhe/pkvm.h b/arch/arm64/kvm/hyp/include/nvhe/pkvm.h index 24a9a8330d19..ce31d3b73603 100644 --- a/arch/arm64/kvm/hyp/include/nvhe/pkvm.h +++ b/arch/arm64/kvm/hyp/include/nvhe/pkvm.h @@ -20,6 +20,12 @@ struct pkvm_hyp_vcpu { /* Backpointer to the host's (untrusted) vCPU instance. */ struct kvm_vcpu *host_vcpu; + + /* + * If this hyp vCPU is loaded, then this is a backpointer to the + * per-cpu pointer tracking us. Otherwise, NULL if not loaded. + */ + struct pkvm_hyp_vcpu **loaded_hyp_vcpu; }; /* @@ -37,16 +43,12 @@ struct pkvm_hyp_vm { struct hyp_pool pool; hyp_spinlock_t lock; - /* - * The number of vcpus initialized and ready to run. - * Modifying this is protected by 'vm_table_lock'. - */ - unsigned int nr_vcpus; - /* Array of the hyp vCPU structures for this VM. */ struct pkvm_hyp_vcpu *vcpus[]; }; +extern hyp_spinlock_t vm_table_lock; + static inline struct pkvm_hyp_vm * pkvm_hyp_vcpu_to_hyp_vm(struct pkvm_hyp_vcpu *hyp_vcpu) { @@ -58,6 +60,11 @@ static inline bool pkvm_hyp_vcpu_is_protected(struct pkvm_hyp_vcpu *hyp_vcpu) return vcpu_is_protected(&hyp_vcpu->vcpu); } +static inline bool pkvm_hyp_vm_is_protected(struct pkvm_hyp_vm *hyp_vm) +{ + return kvm_vm_is_protected(&hyp_vm->kvm); +} + void pkvm_hyp_vm_table_init(void *tbl); int __pkvm_init_vm(struct kvm *host_kvm, unsigned long vm_hva, @@ -69,5 +76,15 @@ int __pkvm_teardown_vm(pkvm_handle_t handle); struct pkvm_hyp_vcpu *pkvm_load_hyp_vcpu(pkvm_handle_t handle, unsigned int vcpu_idx); void pkvm_put_hyp_vcpu(struct pkvm_hyp_vcpu *hyp_vcpu); +struct pkvm_hyp_vcpu *pkvm_get_loaded_hyp_vcpu(void); + +struct pkvm_hyp_vm *get_pkvm_hyp_vm(pkvm_handle_t handle); +struct pkvm_hyp_vm *get_np_pkvm_hyp_vm(pkvm_handle_t handle); +void put_pkvm_hyp_vm(struct pkvm_hyp_vm *hyp_vm); + +bool kvm_handle_pvm_sysreg(struct kvm_vcpu *vcpu, u64 *exit_code); +bool kvm_handle_pvm_restricted(struct kvm_vcpu *vcpu, u64 *exit_code); +void kvm_init_pvm_id_regs(struct kvm_vcpu *vcpu); +int kvm_check_pvm_sysreg_table(void); #endif /* __ARM64_KVM_NVHE_PKVM_H__ */ |