diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2018-04-09 21:42:31 +0300 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2018-04-09 21:42:31 +0300 |
commit | d8312a3f61024352f1c7cb967571fd53631b0d6c (patch) | |
tree | be2f2f699e763330b0f0179e9f86009affbc0c7d /arch/arm64/include | |
parent | e9092d0d97961146655ce51f43850907d95f68c3 (diff) | |
parent | e01bca2fc698d7f0626f0214001af523e18ad60b (diff) | |
download | linux-d8312a3f61024352f1c7cb967571fd53631b0d6c.tar.xz |
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull kvm updates from Paolo Bonzini:
"ARM:
- VHE optimizations
- EL2 address space randomization
- speculative execution mitigations ("variant 3a", aka execution past
invalid privilege register access)
- bugfixes and cleanups
PPC:
- improvements for the radix page fault handler for HV KVM on POWER9
s390:
- more kvm stat counters
- virtio gpu plumbing
- documentation
- facilities improvements
x86:
- support for VMware magic I/O port and pseudo-PMCs
- AMD pause loop exiting
- support for AMD core performance extensions
- support for synchronous register access
- expose nVMX capabilities to userspace
- support for Hyper-V signaling via eventfd
- use Enlightened VMCS when running on Hyper-V
- allow userspace to disable MWAIT/HLT/PAUSE vmexits
- usual roundup of optimizations and nested virtualization bugfixes
Generic:
- API selftest infrastructure (though the only tests are for x86 as
of now)"
* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (174 commits)
kvm: x86: fix a prototype warning
kvm: selftests: add sync_regs_test
kvm: selftests: add API testing infrastructure
kvm: x86: fix a compile warning
KVM: X86: Add Force Emulation Prefix for "emulate the next instruction"
KVM: X86: Introduce handle_ud()
KVM: vmx: unify adjacent #ifdefs
x86: kvm: hide the unused 'cpu' variable
KVM: VMX: remove bogus WARN_ON in handle_ept_misconfig
Revert "KVM: X86: Fix SMRAM accessing even if VM is shutdown"
kvm: Add emulation for movups/movupd
KVM: VMX: raise internal error for exception during invalid protected mode state
KVM: nVMX: Optimization: Dont set KVM_REQ_EVENT when VMExit with nested_run_pending
KVM: nVMX: Require immediate-exit when event reinjected to L2 and L1 event pending
KVM: x86: Fix misleading comments on handling pending exceptions
KVM: x86: Rename interrupt.pending to interrupt.injected
KVM: VMX: No need to clear pending NMI/interrupt on inject realmode interrupt
x86/kvm: use Enlightened VMCS when running on Hyper-V
x86/hyper-v: detect nested features
x86/hyper-v: define struct hv_enlightened_vmcs and clean field bits
...
Diffstat (limited to 'arch/arm64/include')
-rw-r--r-- | arch/arm64/include/asm/alternative.h | 41 | ||||
-rw-r--r-- | arch/arm64/include/asm/cpucaps.h | 2 | ||||
-rw-r--r-- | arch/arm64/include/asm/insn.h | 16 | ||||
-rw-r--r-- | arch/arm64/include/asm/kvm_arm.h | 6 | ||||
-rw-r--r-- | arch/arm64/include/asm/kvm_asm.h | 19 | ||||
-rw-r--r-- | arch/arm64/include/asm/kvm_emulate.h | 78 | ||||
-rw-r--r-- | arch/arm64/include/asm/kvm_host.h | 53 | ||||
-rw-r--r-- | arch/arm64/include/asm/kvm_hyp.h | 29 | ||||
-rw-r--r-- | arch/arm64/include/asm/kvm_mmu.h | 165 | ||||
-rw-r--r-- | arch/arm64/include/asm/mmu.h | 8 | ||||
-rw-r--r-- | arch/arm64/include/asm/sysreg.h | 6 |
11 files changed, 324 insertions, 99 deletions
diff --git a/arch/arm64/include/asm/alternative.h b/arch/arm64/include/asm/alternative.h index 669028172fd6..a91933b1e2e6 100644 --- a/arch/arm64/include/asm/alternative.h +++ b/arch/arm64/include/asm/alternative.h @@ -5,6 +5,8 @@ #include <asm/cpucaps.h> #include <asm/insn.h> +#define ARM64_CB_PATCH ARM64_NCAPS + #ifndef __ASSEMBLY__ #include <linux/init.h> @@ -22,12 +24,19 @@ struct alt_instr { u8 alt_len; /* size of new instruction(s), <= orig_len */ }; +typedef void (*alternative_cb_t)(struct alt_instr *alt, + __le32 *origptr, __le32 *updptr, int nr_inst); + void __init apply_alternatives_all(void); void apply_alternatives(void *start, size_t length); -#define ALTINSTR_ENTRY(feature) \ +#define ALTINSTR_ENTRY(feature,cb) \ " .word 661b - .\n" /* label */ \ + " .if " __stringify(cb) " == 0\n" \ " .word 663f - .\n" /* new instruction */ \ + " .else\n" \ + " .word " __stringify(cb) "- .\n" /* callback */ \ + " .endif\n" \ " .hword " __stringify(feature) "\n" /* feature bit */ \ " .byte 662b-661b\n" /* source len */ \ " .byte 664f-663f\n" /* replacement len */ @@ -45,15 +54,18 @@ void apply_alternatives(void *start, size_t length); * but most assemblers die if insn1 or insn2 have a .inst. This should * be fixed in a binutils release posterior to 2.25.51.0.2 (anything * containing commit 4e4d08cf7399b606 or c1baaddf8861). + * + * Alternatives with callbacks do not generate replacement instructions. */ -#define __ALTERNATIVE_CFG(oldinstr, newinstr, feature, cfg_enabled) \ +#define __ALTERNATIVE_CFG(oldinstr, newinstr, feature, cfg_enabled, cb) \ ".if "__stringify(cfg_enabled)" == 1\n" \ "661:\n\t" \ oldinstr "\n" \ "662:\n" \ ".pushsection .altinstructions,\"a\"\n" \ - ALTINSTR_ENTRY(feature) \ + ALTINSTR_ENTRY(feature,cb) \ ".popsection\n" \ + " .if " __stringify(cb) " == 0\n" \ ".pushsection .altinstr_replacement, \"a\"\n" \ "663:\n\t" \ newinstr "\n" \ @@ -61,11 +73,17 @@ void apply_alternatives(void *start, size_t length); ".popsection\n\t" \ ".org . - (664b-663b) + (662b-661b)\n\t" \ ".org . - (662b-661b) + (664b-663b)\n" \ + ".else\n\t" \ + "663:\n\t" \ + "664:\n\t" \ + ".endif\n" \ ".endif\n" #define _ALTERNATIVE_CFG(oldinstr, newinstr, feature, cfg, ...) \ - __ALTERNATIVE_CFG(oldinstr, newinstr, feature, IS_ENABLED(cfg)) + __ALTERNATIVE_CFG(oldinstr, newinstr, feature, IS_ENABLED(cfg), 0) +#define ALTERNATIVE_CB(oldinstr, cb) \ + __ALTERNATIVE_CFG(oldinstr, "NOT_AN_INSTRUCTION", ARM64_CB_PATCH, 1, cb) #else #include <asm/assembler.h> @@ -132,6 +150,14 @@ void apply_alternatives(void *start, size_t length); 661: .endm +.macro alternative_cb cb + .set .Lasm_alt_mode, 0 + .pushsection .altinstructions, "a" + altinstruction_entry 661f, \cb, ARM64_CB_PATCH, 662f-661f, 0 + .popsection +661: +.endm + /* * Provide the other half of the alternative code sequence. */ @@ -158,6 +184,13 @@ void apply_alternatives(void *start, size_t length); .endm /* + * Callback-based alternative epilogue + */ +.macro alternative_cb_end +662: +.endm + +/* * Provides a trivial alternative or default sequence consisting solely * of NOPs. The number of NOPs is chosen automatically to match the * previous case. diff --git a/arch/arm64/include/asm/cpucaps.h b/arch/arm64/include/asm/cpucaps.h index 21bb624e0a7a..a311880feb0f 100644 --- a/arch/arm64/include/asm/cpucaps.h +++ b/arch/arm64/include/asm/cpucaps.h @@ -32,7 +32,7 @@ #define ARM64_HAS_VIRT_HOST_EXTN 11 #define ARM64_WORKAROUND_CAVIUM_27456 12 #define ARM64_HAS_32BIT_EL0 13 -#define ARM64_HYP_OFFSET_LOW 14 +#define ARM64_HARDEN_EL2_VECTORS 14 #define ARM64_MISMATCHED_CACHE_LINE_SIZE 15 #define ARM64_HAS_NO_FPSIMD 16 #define ARM64_WORKAROUND_REPEAT_TLBI 17 diff --git a/arch/arm64/include/asm/insn.h b/arch/arm64/include/asm/insn.h index 4214c38d016b..f62c56b1793f 100644 --- a/arch/arm64/include/asm/insn.h +++ b/arch/arm64/include/asm/insn.h @@ -70,6 +70,7 @@ enum aarch64_insn_imm_type { AARCH64_INSN_IMM_6, AARCH64_INSN_IMM_S, AARCH64_INSN_IMM_R, + AARCH64_INSN_IMM_N, AARCH64_INSN_IMM_MAX }; @@ -314,6 +315,11 @@ __AARCH64_INSN_FUNCS(eor, 0x7F200000, 0x4A000000) __AARCH64_INSN_FUNCS(eon, 0x7F200000, 0x4A200000) __AARCH64_INSN_FUNCS(ands, 0x7F200000, 0x6A000000) __AARCH64_INSN_FUNCS(bics, 0x7F200000, 0x6A200000) +__AARCH64_INSN_FUNCS(and_imm, 0x7F800000, 0x12000000) +__AARCH64_INSN_FUNCS(orr_imm, 0x7F800000, 0x32000000) +__AARCH64_INSN_FUNCS(eor_imm, 0x7F800000, 0x52000000) +__AARCH64_INSN_FUNCS(ands_imm, 0x7F800000, 0x72000000) +__AARCH64_INSN_FUNCS(extr, 0x7FA00000, 0x13800000) __AARCH64_INSN_FUNCS(b, 0xFC000000, 0x14000000) __AARCH64_INSN_FUNCS(bl, 0xFC000000, 0x94000000) __AARCH64_INSN_FUNCS(cbz, 0x7F000000, 0x34000000) @@ -423,6 +429,16 @@ u32 aarch64_insn_gen_logical_shifted_reg(enum aarch64_insn_register dst, int shift, enum aarch64_insn_variant variant, enum aarch64_insn_logic_type type); +u32 aarch64_insn_gen_logical_immediate(enum aarch64_insn_logic_type type, + enum aarch64_insn_variant variant, + enum aarch64_insn_register Rn, + enum aarch64_insn_register Rd, + u64 imm); +u32 aarch64_insn_gen_extr(enum aarch64_insn_variant variant, + enum aarch64_insn_register Rm, + enum aarch64_insn_register Rn, + enum aarch64_insn_register Rd, + u8 lsb); u32 aarch64_insn_gen_prefetch(enum aarch64_insn_register base, enum aarch64_insn_prfm_type type, enum aarch64_insn_prfm_target target, diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h index b0c84171e6a3..6dd285e979c9 100644 --- a/arch/arm64/include/asm/kvm_arm.h +++ b/arch/arm64/include/asm/kvm_arm.h @@ -25,6 +25,7 @@ /* Hyp Configuration Register (HCR) bits */ #define HCR_TEA (UL(1) << 37) #define HCR_TERR (UL(1) << 36) +#define HCR_TLOR (UL(1) << 35) #define HCR_E2H (UL(1) << 34) #define HCR_ID (UL(1) << 33) #define HCR_CD (UL(1) << 32) @@ -64,6 +65,7 @@ /* * The bits we set in HCR: + * TLOR: Trap LORegion register accesses * RW: 64bit by default, can be overridden for 32bit VMs * TAC: Trap ACTLR * TSC: Trap SMC @@ -81,9 +83,9 @@ */ #define HCR_GUEST_FLAGS (HCR_TSC | HCR_TSW | HCR_TWE | HCR_TWI | HCR_VM | \ HCR_TVM | HCR_BSU_IS | HCR_FB | HCR_TAC | \ - HCR_AMO | HCR_SWIO | HCR_TIDCP | HCR_RW) + HCR_AMO | HCR_SWIO | HCR_TIDCP | HCR_RW | HCR_TLOR | \ + HCR_FMO | HCR_IMO) #define HCR_VIRT_EXCP_MASK (HCR_VSE | HCR_VI | HCR_VF) -#define HCR_INT_OVERRIDE (HCR_FMO | HCR_IMO) #define HCR_HOST_VHE_FLAGS (HCR_RW | HCR_TGE | HCR_E2H) /* TCR_EL2 Registers bits */ diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h index 24961b732e65..d53d40704416 100644 --- a/arch/arm64/include/asm/kvm_asm.h +++ b/arch/arm64/include/asm/kvm_asm.h @@ -33,6 +33,7 @@ #define KVM_ARM64_DEBUG_DIRTY_SHIFT 0 #define KVM_ARM64_DEBUG_DIRTY (1 << KVM_ARM64_DEBUG_DIRTY_SHIFT) +/* Translate a kernel address of @sym into its equivalent linear mapping */ #define kvm_ksym_ref(sym) \ ({ \ void *val = &sym; \ @@ -57,7 +58,9 @@ extern void __kvm_tlb_flush_local_vmid(struct kvm_vcpu *vcpu); extern void __kvm_timer_set_cntvoff(u32 cntvoff_low, u32 cntvoff_high); -extern int __kvm_vcpu_run(struct kvm_vcpu *vcpu); +extern int kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu); + +extern int __kvm_vcpu_run_nvhe(struct kvm_vcpu *vcpu); extern u64 __vgic_v3_get_ich_vtr_el2(void); extern u64 __vgic_v3_read_vmcr(void); @@ -70,6 +73,20 @@ extern u32 __init_stage2_translation(void); extern void __qcom_hyp_sanitize_btac_predictors(void); +#else /* __ASSEMBLY__ */ + +.macro get_host_ctxt reg, tmp + adr_l \reg, kvm_host_cpu_state + mrs \tmp, tpidr_el2 + add \reg, \reg, \tmp +.endm + +.macro get_vcpu_ptr vcpu, ctxt + get_host_ctxt \ctxt, \vcpu + ldr \vcpu, [\ctxt, #HOST_CONTEXT_VCPU] + kern_hyp_va \vcpu +.endm + #endif #endif /* __ARM_KVM_ASM_H__ */ diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h index 413dc82b1e89..23b33e8ea03a 100644 --- a/arch/arm64/include/asm/kvm_emulate.h +++ b/arch/arm64/include/asm/kvm_emulate.h @@ -26,13 +26,15 @@ #include <asm/esr.h> #include <asm/kvm_arm.h> +#include <asm/kvm_hyp.h> #include <asm/kvm_mmio.h> #include <asm/ptrace.h> #include <asm/cputype.h> #include <asm/virt.h> unsigned long *vcpu_reg32(const struct kvm_vcpu *vcpu, u8 reg_num); -unsigned long *vcpu_spsr32(const struct kvm_vcpu *vcpu); +unsigned long vcpu_read_spsr32(const struct kvm_vcpu *vcpu); +void vcpu_write_spsr32(struct kvm_vcpu *vcpu, unsigned long v); bool kvm_condition_valid32(const struct kvm_vcpu *vcpu); void kvm_skip_instr32(struct kvm_vcpu *vcpu, bool is_wide_instr); @@ -45,6 +47,11 @@ void kvm_inject_undef32(struct kvm_vcpu *vcpu); void kvm_inject_dabt32(struct kvm_vcpu *vcpu, unsigned long addr); void kvm_inject_pabt32(struct kvm_vcpu *vcpu, unsigned long addr); +static inline bool vcpu_el1_is_32bit(struct kvm_vcpu *vcpu) +{ + return !(vcpu->arch.hcr_el2 & HCR_RW); +} + static inline void vcpu_reset_hcr(struct kvm_vcpu *vcpu) { vcpu->arch.hcr_el2 = HCR_GUEST_FLAGS; @@ -59,16 +66,19 @@ static inline void vcpu_reset_hcr(struct kvm_vcpu *vcpu) if (test_bit(KVM_ARM_VCPU_EL1_32BIT, vcpu->arch.features)) vcpu->arch.hcr_el2 &= ~HCR_RW; -} -static inline unsigned long vcpu_get_hcr(struct kvm_vcpu *vcpu) -{ - return vcpu->arch.hcr_el2; + /* + * TID3: trap feature register accesses that we virtualise. + * For now this is conditional, since no AArch32 feature regs + * are currently virtualised. + */ + if (!vcpu_el1_is_32bit(vcpu)) + vcpu->arch.hcr_el2 |= HCR_TID3; } -static inline void vcpu_set_hcr(struct kvm_vcpu *vcpu, unsigned long hcr) +static inline unsigned long *vcpu_hcr(struct kvm_vcpu *vcpu) { - vcpu->arch.hcr_el2 = hcr; + return (unsigned long *)&vcpu->arch.hcr_el2; } static inline void vcpu_set_vsesr(struct kvm_vcpu *vcpu, u64 vsesr) @@ -81,11 +91,27 @@ static inline unsigned long *vcpu_pc(const struct kvm_vcpu *vcpu) return (unsigned long *)&vcpu_gp_regs(vcpu)->regs.pc; } -static inline unsigned long *vcpu_elr_el1(const struct kvm_vcpu *vcpu) +static inline unsigned long *__vcpu_elr_el1(const struct kvm_vcpu *vcpu) { return (unsigned long *)&vcpu_gp_regs(vcpu)->elr_el1; } +static inline unsigned long vcpu_read_elr_el1(const struct kvm_vcpu *vcpu) +{ + if (vcpu->arch.sysregs_loaded_on_cpu) + return read_sysreg_el1(elr); + else + return *__vcpu_elr_el1(vcpu); +} + +static inline void vcpu_write_elr_el1(const struct kvm_vcpu *vcpu, unsigned long v) +{ + if (vcpu->arch.sysregs_loaded_on_cpu) + write_sysreg_el1(v, elr); + else + *__vcpu_elr_el1(vcpu) = v; +} + static inline unsigned long *vcpu_cpsr(const struct kvm_vcpu *vcpu) { return (unsigned long *)&vcpu_gp_regs(vcpu)->regs.pstate; @@ -135,13 +161,28 @@ static inline void vcpu_set_reg(struct kvm_vcpu *vcpu, u8 reg_num, vcpu_gp_regs(vcpu)->regs.regs[reg_num] = val; } -/* Get vcpu SPSR for current mode */ -static inline unsigned long *vcpu_spsr(const struct kvm_vcpu *vcpu) +static inline unsigned long vcpu_read_spsr(const struct kvm_vcpu *vcpu) { if (vcpu_mode_is_32bit(vcpu)) - return vcpu_spsr32(vcpu); + return vcpu_read_spsr32(vcpu); - return (unsigned long *)&vcpu_gp_regs(vcpu)->spsr[KVM_SPSR_EL1]; + if (vcpu->arch.sysregs_loaded_on_cpu) + return read_sysreg_el1(spsr); + else + return vcpu_gp_regs(vcpu)->spsr[KVM_SPSR_EL1]; +} + +static inline void vcpu_write_spsr(struct kvm_vcpu *vcpu, unsigned long v) +{ + if (vcpu_mode_is_32bit(vcpu)) { + vcpu_write_spsr32(vcpu, v); + return; + } + + if (vcpu->arch.sysregs_loaded_on_cpu) + write_sysreg_el1(v, spsr); + else + vcpu_gp_regs(vcpu)->spsr[KVM_SPSR_EL1] = v; } static inline bool vcpu_mode_priv(const struct kvm_vcpu *vcpu) @@ -282,15 +323,18 @@ static inline int kvm_vcpu_sys_get_rt(struct kvm_vcpu *vcpu) static inline unsigned long kvm_vcpu_get_mpidr_aff(struct kvm_vcpu *vcpu) { - return vcpu_sys_reg(vcpu, MPIDR_EL1) & MPIDR_HWID_BITMASK; + return vcpu_read_sys_reg(vcpu, MPIDR_EL1) & MPIDR_HWID_BITMASK; } static inline void kvm_vcpu_set_be(struct kvm_vcpu *vcpu) { - if (vcpu_mode_is_32bit(vcpu)) + if (vcpu_mode_is_32bit(vcpu)) { *vcpu_cpsr(vcpu) |= COMPAT_PSR_E_BIT; - else - vcpu_sys_reg(vcpu, SCTLR_EL1) |= (1 << 25); + } else { + u64 sctlr = vcpu_read_sys_reg(vcpu, SCTLR_EL1); + sctlr |= (1 << 25); + vcpu_write_sys_reg(vcpu, SCTLR_EL1, sctlr); + } } static inline bool kvm_vcpu_is_be(struct kvm_vcpu *vcpu) @@ -298,7 +342,7 @@ static inline bool kvm_vcpu_is_be(struct kvm_vcpu *vcpu) if (vcpu_mode_is_32bit(vcpu)) return !!(*vcpu_cpsr(vcpu) & COMPAT_PSR_E_BIT); - return !!(vcpu_sys_reg(vcpu, SCTLR_EL1) & (1 << 25)); + return !!(vcpu_read_sys_reg(vcpu, SCTLR_EL1) & (1 << 25)); } static inline unsigned long vcpu_data_guest_to_host(struct kvm_vcpu *vcpu, diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 596f8e414a4c..ab46bc70add6 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -272,9 +272,6 @@ struct kvm_vcpu_arch { /* IO related fields */ struct kvm_decode mmio_decode; - /* Interrupt related fields */ - u64 irq_lines; /* IRQ and FIQ levels */ - /* Cache some mmu pages needed inside spinlock regions */ struct kvm_mmu_memory_cache mmu_page_cache; @@ -287,10 +284,25 @@ struct kvm_vcpu_arch { /* Virtual SError ESR to restore when HCR_EL2.VSE is set */ u64 vsesr_el2; + + /* True when deferrable sysregs are loaded on the physical CPU, + * see kvm_vcpu_load_sysregs and kvm_vcpu_put_sysregs. */ + bool sysregs_loaded_on_cpu; }; #define vcpu_gp_regs(v) (&(v)->arch.ctxt.gp_regs) -#define vcpu_sys_reg(v,r) ((v)->arch.ctxt.sys_regs[(r)]) + +/* + * Only use __vcpu_sys_reg if you know you want the memory backed version of a + * register, and not the one most recently accessed by a running VCPU. For + * example, for userspace access or for system registers that are never context + * switched, but only emulated. + */ +#define __vcpu_sys_reg(v,r) ((v)->arch.ctxt.sys_regs[(r)]) + +u64 vcpu_read_sys_reg(struct kvm_vcpu *vcpu, int reg); +void vcpu_write_sys_reg(struct kvm_vcpu *vcpu, u64 val, int reg); + /* * CP14 and CP15 live in the same array, as they are backed by the * same system registers. @@ -298,14 +310,6 @@ struct kvm_vcpu_arch { #define vcpu_cp14(v,r) ((v)->arch.ctxt.copro[(r)]) #define vcpu_cp15(v,r) ((v)->arch.ctxt.copro[(r)]) -#ifdef CONFIG_CPU_BIG_ENDIAN -#define vcpu_cp15_64_high(v,r) vcpu_cp15((v),(r)) -#define vcpu_cp15_64_low(v,r) vcpu_cp15((v),(r) + 1) -#else -#define vcpu_cp15_64_high(v,r) vcpu_cp15((v),(r) + 1) -#define vcpu_cp15_64_low(v,r) vcpu_cp15((v),(r)) -#endif - struct kvm_vm_stat { ulong remote_tlb_flush; }; @@ -358,10 +362,15 @@ int kvm_perf_teardown(void); struct kvm_vcpu *kvm_mpidr_to_vcpu(struct kvm *kvm, unsigned long mpidr); +void __kvm_set_tpidr_el2(u64 tpidr_el2); +DECLARE_PER_CPU(kvm_cpu_context_t, kvm_host_cpu_state); + static inline void __cpu_init_hyp_mode(phys_addr_t pgd_ptr, unsigned long hyp_stack_ptr, unsigned long vector_ptr) { + u64 tpidr_el2; + /* * Call initialization code, and switch to the full blown HYP code. * If the cpucaps haven't been finalized yet, something has gone very @@ -370,6 +379,16 @@ static inline void __cpu_init_hyp_mode(phys_addr_t pgd_ptr, */ BUG_ON(!static_branch_likely(&arm64_const_caps_ready)); __kvm_call_hyp((void *)pgd_ptr, hyp_stack_ptr, vector_ptr); + + /* + * Calculate the raw per-cpu offset without a translation from the + * kernel's mapping to the linear mapping, and store it in tpidr_el2 + * so that we can use adr_l to access per-cpu variables in EL2. + */ + tpidr_el2 = (u64)this_cpu_ptr(&kvm_host_cpu_state) + - (u64)kvm_ksym_ref(kvm_host_cpu_state); + + kvm_call_hyp(__kvm_set_tpidr_el2, tpidr_el2); } static inline void kvm_arch_hardware_unsetup(void) {} @@ -416,6 +435,13 @@ static inline void kvm_arm_vhe_guest_enter(void) static inline void kvm_arm_vhe_guest_exit(void) { local_daif_restore(DAIF_PROCCTX_NOIRQ); + + /* + * When we exit from the guest we change a number of CPU configuration + * parameters, such as traps. Make sure these changes take effect + * before running the host or additional guests. + */ + isb(); } static inline bool kvm_arm_harden_branch_predictor(void) @@ -423,4 +449,7 @@ static inline bool kvm_arm_harden_branch_predictor(void) return cpus_have_const_cap(ARM64_HARDEN_BRANCH_PREDICTOR); } +void kvm_vcpu_load_sysregs(struct kvm_vcpu *vcpu); +void kvm_vcpu_put_sysregs(struct kvm_vcpu *vcpu); + #endif /* __ARM64_KVM_HOST_H__ */ diff --git a/arch/arm64/include/asm/kvm_hyp.h b/arch/arm64/include/asm/kvm_hyp.h index f26f9cd70c72..384c34397619 100644 --- a/arch/arm64/include/asm/kvm_hyp.h +++ b/arch/arm64/include/asm/kvm_hyp.h @@ -120,37 +120,38 @@ typeof(orig) * __hyp_text fname(void) \ return val; \ } -void __vgic_v2_save_state(struct kvm_vcpu *vcpu); -void __vgic_v2_restore_state(struct kvm_vcpu *vcpu); int __vgic_v2_perform_cpuif_access(struct kvm_vcpu *vcpu); void __vgic_v3_save_state(struct kvm_vcpu *vcpu); void __vgic_v3_restore_state(struct kvm_vcpu *vcpu); +void __vgic_v3_activate_traps(struct kvm_vcpu *vcpu); +void __vgic_v3_deactivate_traps(struct kvm_vcpu *vcpu); +void __vgic_v3_save_aprs(struct kvm_vcpu *vcpu); +void __vgic_v3_restore_aprs(struct kvm_vcpu *vcpu); int __vgic_v3_perform_cpuif_access(struct kvm_vcpu *vcpu); void __timer_enable_traps(struct kvm_vcpu *vcpu); void __timer_disable_traps(struct kvm_vcpu *vcpu); -void __sysreg_save_host_state(struct kvm_cpu_context *ctxt); -void __sysreg_restore_host_state(struct kvm_cpu_context *ctxt); -void __sysreg_save_guest_state(struct kvm_cpu_context *ctxt); -void __sysreg_restore_guest_state(struct kvm_cpu_context *ctxt); +void __sysreg_save_state_nvhe(struct kvm_cpu_context *ctxt); +void __sysreg_restore_state_nvhe(struct kvm_cpu_context *ctxt); +void sysreg_save_host_state_vhe(struct kvm_cpu_context *ctxt); +void sysreg_restore_host_state_vhe(struct kvm_cpu_context *ctxt); +void sysreg_save_guest_state_vhe(struct kvm_cpu_context *ctxt); +void sysreg_restore_guest_state_vhe(struct kvm_cpu_context *ctxt); void __sysreg32_save_state(struct kvm_vcpu *vcpu); void __sysreg32_restore_state(struct kvm_vcpu *vcpu); -void __debug_save_state(struct kvm_vcpu *vcpu, - struct kvm_guest_debug_arch *dbg, - struct kvm_cpu_context *ctxt); -void __debug_restore_state(struct kvm_vcpu *vcpu, - struct kvm_guest_debug_arch *dbg, - struct kvm_cpu_context *ctxt); -void __debug_cond_save_host_state(struct kvm_vcpu *vcpu); -void __debug_cond_restore_host_state(struct kvm_vcpu *vcpu); +void __debug_switch_to_guest(struct kvm_vcpu *vcpu); +void __debug_switch_to_host(struct kvm_vcpu *vcpu); void __fpsimd_save_state(struct user_fpsimd_state *fp_regs); void __fpsimd_restore_state(struct user_fpsimd_state *fp_regs); bool __fpsimd_enabled(void); +void activate_traps_vhe_load(struct kvm_vcpu *vcpu); +void deactivate_traps_vhe_put(void); + u64 __guest_enter(struct kvm_vcpu *vcpu, struct kvm_cpu_context *host_ctxt); void __noreturn __hyp_do_panic(unsigned long, ...); diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h index 7faed6e48b46..082110993647 100644 --- a/arch/arm64/include/asm/kvm_mmu.h +++ b/arch/arm64/include/asm/kvm_mmu.h @@ -69,9 +69,6 @@ * mappings, and none of this applies in that case. */ -#define HYP_PAGE_OFFSET_HIGH_MASK ((UL(1) << VA_BITS) - 1) -#define HYP_PAGE_OFFSET_LOW_MASK ((UL(1) << (VA_BITS - 1)) - 1) - #ifdef __ASSEMBLY__ #include <asm/alternative.h> @@ -81,28 +78,19 @@ * Convert a kernel VA into a HYP VA. * reg: VA to be converted. * - * This generates the following sequences: - * - High mask: - * and x0, x0, #HYP_PAGE_OFFSET_HIGH_MASK - * nop - * - Low mask: - * and x0, x0, #HYP_PAGE_OFFSET_HIGH_MASK - * and x0, x0, #HYP_PAGE_OFFSET_LOW_MASK - * - VHE: - * nop - * nop - * - * The "low mask" version works because the mask is a strict subset of - * the "high mask", hence performing the first mask for nothing. - * Should be completely invisible on any viable CPU. + * The actual code generation takes place in kvm_update_va_mask, and + * the instructions below are only there to reserve the space and + * perform the register allocation (kvm_update_va_mask uses the + * specific registers encoded in the instructions). */ .macro kern_hyp_va reg -alternative_if_not ARM64_HAS_VIRT_HOST_EXTN - and \reg, \reg, #HYP_PAGE_OFFSET_HIGH_MASK -alternative_else_nop_endif -alternative_if ARM64_HYP_OFFSET_LOW - and \reg, \reg, #HYP_PAGE_OFFSET_LOW_MASK -alternative_else_nop_endif +alternative_cb kvm_update_va_mask + and \reg, \reg, #1 /* mask with va_mask */ + ror \reg, \reg, #1 /* rotate to the first tag bit */ + add \reg, \reg, #0 /* insert the low 12 bits of the tag */ + add \reg, \reg, #0, lsl 12 /* insert the top 12 bits of the tag */ + ror \reg, \reg, #63 /* rotate back */ +alternative_cb_end .endm #else @@ -113,24 +101,44 @@ alternative_else_nop_endif #include <asm/mmu_context.h> #include <asm/pgtable.h> +void kvm_update_va_mask(struct alt_instr *alt, + __le32 *origptr, __le32 *updptr, int nr_inst); + static inline unsigned long __kern_hyp_va(unsigned long v) { - asm volatile(ALTERNATIVE("and %0, %0, %1", - "nop", - ARM64_HAS_VIRT_HOST_EXTN) - : "+r" (v) - : "i" (HYP_PAGE_OFFSET_HIGH_MASK)); - asm volatile(ALTERNATIVE("nop", - "and %0, %0, %1", - ARM64_HYP_OFFSET_LOW) - : "+r" (v) - : "i" (HYP_PAGE_OFFSET_LOW_MASK)); + asm volatile(ALTERNATIVE_CB("and %0, %0, #1\n" + "ror %0, %0, #1\n" + "add %0, %0, #0\n" + "add %0, %0, #0, lsl 12\n" + "ror %0, %0, #63\n", + kvm_update_va_mask) + : "+r" (v)); return v; } #define kern_hyp_va(v) ((typeof(v))(__kern_hyp_va((unsigned long)(v)))) /* + * Obtain the PC-relative address of a kernel symbol + * s: symbol + * + * The goal of this macro is to return a symbol's address based on a + * PC-relative computation, as opposed to a loading the VA from a + * constant pool or something similar. This works well for HYP, as an + * absolute VA is guaranteed to be wrong. Only use this if trying to + * obtain the address of a symbol (i.e. not something you obtained by + * following a pointer). + */ +#define hyp_symbol_addr(s) \ + ({ \ + typeof(s) *addr; \ + asm("adrp %0, %1\n" \ + "add %0, %0, :lo12:%1\n" \ + : "=r" (addr) : "S" (&s)); \ + addr; \ + }) + +/* * We currently only support a 40bit IPA. */ #define KVM_PHYS_SHIFT (40) @@ -140,7 +148,11 @@ static inline unsigned long __kern_hyp_va(unsigned long v) #include <asm/stage2_pgtable.h> int create_hyp_mappings(void *from, void *to, pgprot_t prot); -int create_hyp_io_mappings(void *from, void *to, phys_addr_t); +int create_hyp_io_mappings(phys_addr_t phys_addr, size_t size, + void __iomem **kaddr, + void __iomem **haddr); +int create_hyp_exec_mappings(phys_addr_t phys_addr, size_t size, + void **haddr); void free_hyp_pgds(void); void stage2_unmap_vm(struct kvm *kvm); @@ -249,7 +261,7 @@ struct kvm; static inline bool vcpu_has_cache_enabled(struct kvm_vcpu *vcpu) { - return (vcpu_sys_reg(vcpu, SCTLR_EL1) & 0b101) == 0b101; + return (vcpu_read_sys_reg(vcpu, SCTLR_EL1) & 0b101) == 0b101; } static inline void __clean_dcache_guest_page(kvm_pfn_t pfn, unsigned long size) @@ -348,36 +360,95 @@ static inline unsigned int kvm_get_vmid_bits(void) return (cpuid_feature_extract_unsigned_field(reg, ID_AA64MMFR1_VMIDBITS_SHIFT) == 2) ? 16 : 8; } -#ifdef CONFIG_HARDEN_BRANCH_PREDICTOR +#ifdef CONFIG_KVM_INDIRECT_VECTORS +/* + * EL2 vectors can be mapped and rerouted in a number of ways, + * depending on the kernel configuration and CPU present: + * + * - If the CPU has the ARM64_HARDEN_BRANCH_PREDICTOR cap, the + * hardening sequence is placed in one of the vector slots, which is + * executed before jumping to the real vectors. + * + * - If the CPU has both the ARM64_HARDEN_EL2_VECTORS cap and the + * ARM64_HARDEN_BRANCH_PREDICTOR cap, the slot containing the + * hardening sequence is mapped next to the idmap page, and executed + * before jumping to the real vectors. + * + * - If the CPU only has the ARM64_HARDEN_EL2_VECTORS cap, then an + * empty slot is selected, mapped next to the idmap page, and + * executed before jumping to the real vectors. + * + * Note that ARM64_HARDEN_EL2_VECTORS is somewhat incompatible with + * VHE, as we don't have hypervisor-specific mappings. If the system + * is VHE and yet selects this capability, it will be ignored. + */ #include <asm/mmu.h> +extern void *__kvm_bp_vect_base; +extern int __kvm_harden_el2_vector_slot; + static inline void *kvm_get_hyp_vector(void) { struct bp_hardening_data *data = arm64_get_bp_hardening_data(); - void *vect = kvm_ksym_ref(__kvm_hyp_vector); + void *vect = kern_hyp_va(kvm_ksym_ref(__kvm_hyp_vector)); + int slot = -1; - if (data->fn) { - vect = __bp_harden_hyp_vecs_start + - data->hyp_vectors_slot * SZ_2K; + if (cpus_have_const_cap(ARM64_HARDEN_BRANCH_PREDICTOR) && data->fn) { + vect = kern_hyp_va(kvm_ksym_ref(__bp_harden_hyp_vecs_start)); + slot = data->hyp_vectors_slot; + } - if (!has_vhe()) - vect = lm_alias(vect); + if (this_cpu_has_cap(ARM64_HARDEN_EL2_VECTORS) && !has_vhe()) { + vect = __kvm_bp_vect_base; + if (slot == -1) + slot = __kvm_harden_el2_vector_slot; } + if (slot != -1) + vect += slot * SZ_2K; + return vect; } +/* This is only called on a !VHE system */ static inline int kvm_map_vectors(void) { - return create_hyp_mappings(kvm_ksym_ref(__bp_harden_hyp_vecs_start), - kvm_ksym_ref(__bp_harden_hyp_vecs_end), - PAGE_HYP_EXEC); -} + /* + * HBP = ARM64_HARDEN_BRANCH_PREDICTOR + * HEL2 = ARM64_HARDEN_EL2_VECTORS + * + * !HBP + !HEL2 -> use direct vectors + * HBP + !HEL2 -> use hardened vectors in place + * !HBP + HEL2 -> allocate one vector slot and use exec mapping + * HBP + HEL2 -> use hardened vertors and use exec mapping + */ + if (cpus_have_const_cap(ARM64_HARDEN_BRANCH_PREDICTOR)) { + __kvm_bp_vect_base = kvm_ksym_ref(__bp_harden_hyp_vecs_start); + __kvm_bp_vect_base = kern_hyp_va(__kvm_bp_vect_base); + } + + if (cpus_have_const_cap(ARM64_HARDEN_EL2_VECTORS)) { + phys_addr_t vect_pa = __pa_symbol(__bp_harden_hyp_vecs_start); + unsigned long size = (__bp_harden_hyp_vecs_end - + __bp_harden_hyp_vecs_start); + + /* + * Always allocate a spare vector slot, as we don't + * know yet which CPUs have a BP hardening slot that + * we can reuse. + */ + __kvm_harden_el2_vector_slot = atomic_inc_return(&arm64_el2_vector_last_slot); + BUG_ON(__kvm_harden_el2_vector_slot >= BP_HARDEN_EL2_SLOTS); + return create_hyp_exec_mappings(vect_pa, size, + &__kvm_bp_vect_base); + } + return 0; +} #else static inline void *kvm_get_hyp_vector(void) { - return kvm_ksym_ref(__kvm_hyp_vector); + return kern_hyp_va(kvm_ksym_ref(__kvm_hyp_vector)); } static inline int kvm_map_vectors(void) diff --git a/arch/arm64/include/asm/mmu.h b/arch/arm64/include/asm/mmu.h index a050d4f3615d..dd320df0d026 100644 --- a/arch/arm64/include/asm/mmu.h +++ b/arch/arm64/include/asm/mmu.h @@ -21,6 +21,8 @@ #define USER_ASID_FLAG (UL(1) << USER_ASID_BIT) #define TTBR_ASID_MASK (UL(0xffff) << 48) +#define BP_HARDEN_EL2_SLOTS 4 + #ifndef __ASSEMBLY__ typedef struct { @@ -49,9 +51,13 @@ struct bp_hardening_data { bp_hardening_cb_t fn; }; -#ifdef CONFIG_HARDEN_BRANCH_PREDICTOR +#if (defined(CONFIG_HARDEN_BRANCH_PREDICTOR) || \ + defined(CONFIG_HARDEN_EL2_VECTORS)) extern char __bp_harden_hyp_vecs_start[], __bp_harden_hyp_vecs_end[]; +extern atomic_t arm64_el2_vector_last_slot; +#endif /* CONFIG_HARDEN_BRANCH_PREDICTOR || CONFIG_HARDEN_EL2_VECTORS */ +#ifdef CONFIG_HARDEN_BRANCH_PREDICTOR DECLARE_PER_CPU_READ_MOSTLY(struct bp_hardening_data, bp_hardening_data); static inline struct bp_hardening_data *arm64_get_bp_hardening_data(void) diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index e7b9f154e476..6171178075dc 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -288,6 +288,12 @@ #define SYS_MAIR_EL1 sys_reg(3, 0, 10, 2, 0) #define SYS_AMAIR_EL1 sys_reg(3, 0, 10, 3, 0) +#define SYS_LORSA_EL1 sys_reg(3, 0, 10, 4, 0) +#define SYS_LOREA_EL1 sys_reg(3, 0, 10, 4, 1) +#define SYS_LORN_EL1 sys_reg(3, 0, 10, 4, 2) +#define SYS_LORC_EL1 sys_reg(3, 0, 10, 4, 3) +#define SYS_LORID_EL1 sys_reg(3, 0, 10, 4, 7) + #define SYS_VBAR_EL1 sys_reg(3, 0, 12, 0, 0) #define SYS_DISR_EL1 sys_reg(3, 0, 12, 1, 1) |